aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-11-19 20:06:13 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-11-19 20:06:13 +0000
commitc0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch)
treef42add1021b9f2ac6a69ac7cf6c4499962739a45
parent344a3780b2e33f6ca763666c380202b18aab72a3 (diff)
Vendor import of llvm-project main llvmorg-14-init-10186-gff7f2cfa959b.vendor/llvm-project/llvmorg-14-init-10186-gff7f2cfa959b
-rw-r--r--clang/include/clang-c/Index.h13
-rw-r--r--clang/include/clang/AST/ASTConcept.h7
-rw-r--r--clang/include/clang/AST/ASTContext.h93
-rw-r--r--clang/include/clang/AST/ASTFwd.h3
-rw-r--r--clang/include/clang/AST/ASTImporter.h3
-rw-r--r--clang/include/clang/AST/ASTImporterSharedState.h7
-rw-r--r--clang/include/clang/AST/ASTStructuralEquivalence.h2
-rw-r--r--clang/include/clang/AST/ASTTypeTraits.h63
-rw-r--r--clang/include/clang/AST/Attr.h5
-rw-r--r--clang/include/clang/AST/BuiltinTypes.def3
-rw-r--r--clang/include/clang/AST/Comment.h11
-rw-r--r--clang/include/clang/AST/CommentCommands.td103
-rw-r--r--clang/include/clang/AST/CommentHTMLTags.td6
-rw-r--r--clang/include/clang/AST/CommentSema.h9
-rw-r--r--clang/include/clang/AST/ComparisonCategories.h5
-rw-r--r--clang/include/clang/AST/CurrentSourceLocExprScope.h7
-rw-r--r--clang/include/clang/AST/Decl.h44
-rw-r--r--clang/include/clang/AST/DeclBase.h6
-rw-r--r--clang/include/clang/AST/DeclCXX.h63
-rw-r--r--clang/include/clang/AST/DeclContextInternals.h3
-rw-r--r--clang/include/clang/AST/DeclObjC.h10
-rw-r--r--clang/include/clang/AST/DeclTemplate.h9
-rw-r--r--clang/include/clang/AST/Expr.h6
-rw-r--r--clang/include/clang/AST/JSONNodeDumper.h7
-rw-r--r--clang/include/clang/AST/LambdaCapture.h2
-rw-r--r--clang/include/clang/AST/NestedNameSpecifier.h2
-rw-r--r--clang/include/clang/AST/OpenMPClause.h181
-rw-r--r--clang/include/clang/AST/PrettyPrinter.h11
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h27
-rw-r--r--clang/include/clang/AST/Stmt.h56
-rw-r--r--clang/include/clang/AST/StmtObjC.h41
-rw-r--r--clang/include/clang/AST/StmtOpenMP.h252
-rw-r--r--clang/include/clang/AST/TemplateName.h9
-rw-r--r--clang/include/clang/AST/Type.h63
-rw-r--r--clang/include/clang/AST/TypeLoc.h7
-rw-r--r--clang/include/clang/AST/TypeOrdering.h1
-rw-r--r--clang/include/clang/ASTMatchers/ASTMatchFinder.h2
-rw-r--r--clang/include/clang/ASTMatchers/ASTMatchers.h340
-rw-r--r--clang/include/clang/ASTMatchers/ASTMatchersInternal.h71
-rw-r--r--clang/include/clang/Analysis/Analyses/Dominators.h2
-rw-r--r--clang/include/clang/Analysis/CFG.h5
-rw-r--r--clang/include/clang/Analysis/CloneDetection.h4
-rw-r--r--clang/include/clang/Analysis/PathDiagnostic.h23
-rw-r--r--clang/include/clang/Basic/Attr.td43
-rw-r--r--clang/include/clang/Basic/AttrDocs.td92
-rw-r--r--clang/include/clang/Basic/Builtins.def11
-rw-r--r--clang/include/clang/Basic/BuiltinsAArch64.def3
-rw-r--r--clang/include/clang/Basic/BuiltinsAMDGPU.def13
-rw-r--r--clang/include/clang/Basic/BuiltinsNVPTX.def17
-rw-r--r--clang/include/clang/Basic/BuiltinsPPC.def31
-rw-r--r--clang/include/clang/Basic/BuiltinsRISCV.def2
-rw-r--r--clang/include/clang/Basic/BuiltinsRISCVVector.def21
-rw-r--r--clang/include/clang/Basic/BuiltinsWebAssembly.def35
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def219
-rw-r--r--clang/include/clang/Basic/BuiltinsX86_64.def8
-rw-r--r--clang/include/clang/Basic/CLWarnings.h26
-rw-r--r--clang/include/clang/Basic/CharInfo.h21
-rw-r--r--clang/include/clang/Basic/CodeGenOptions.def24
-rw-r--r--clang/include/clang/Basic/CodeGenOptions.h15
-rw-r--r--clang/include/clang/Basic/Cuda.h9
-rw-r--r--clang/include/clang/Basic/DebugInfoOptions.h6
-rw-r--r--clang/include/clang/Basic/Diagnostic.h31
-rw-r--r--clang/include/clang/Basic/DiagnosticASTKinds.td4
-rw-r--r--clang/include/clang/Basic/DiagnosticCategories.h7
-rw-r--r--clang/include/clang/Basic/DiagnosticCommonKinds.td8
-rw-r--r--clang/include/clang/Basic/DiagnosticDriverKinds.td168
-rw-r--r--clang/include/clang/Basic/DiagnosticFrontendKinds.td17
-rw-r--r--clang/include/clang/Basic/DiagnosticGroups.td36
-rw-r--r--clang/include/clang/Basic/DiagnosticIDs.h6
-rw-r--r--clang/include/clang/Basic/DiagnosticLexKinds.td69
-rw-r--r--clang/include/clang/Basic/DiagnosticParseKinds.td47
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td188
-rw-r--r--clang/include/clang/Basic/DiagnosticSerializationKinds.td2
-rw-r--r--clang/include/clang/Basic/IdentifierTable.h67
-rw-r--r--clang/include/clang/Basic/JsonSupport.h28
-rw-r--r--clang/include/clang/Basic/LangOptions.def15
-rw-r--r--clang/include/clang/Basic/LangOptions.h13
-rw-r--r--clang/include/clang/Basic/LangStandards.def15
-rw-r--r--clang/include/clang/Basic/MSP430Target.def3
-rw-r--r--clang/include/clang/Basic/ObjCRuntime.h7
-rw-r--r--clang/include/clang/Basic/OpenCLOptions.h9
-rw-r--r--clang/include/clang/Basic/OpenMPKinds.def19
-rw-r--r--clang/include/clang/Basic/OpenMPKinds.h24
-rw-r--r--clang/include/clang/Basic/Sanitizers.h7
-rw-r--r--clang/include/clang/Basic/SourceLocation.h19
-rw-r--r--clang/include/clang/Basic/Specifiers.h10
-rw-r--r--clang/include/clang/Basic/Stack.h2
-rw-r--r--clang/include/clang/Basic/StmtNodes.td7
-rw-r--r--clang/include/clang/Basic/TargetBuiltins.h11
-rw-r--r--clang/include/clang/Basic/TargetInfo.h70
-rw-r--r--clang/include/clang/Basic/TokenKinds.def4
-rw-r--r--clang/include/clang/Basic/X86Target.def110
-rw-r--r--clang/include/clang/Basic/riscv_vector.td257
-rw-r--r--clang/include/clang/CodeGen/ModuleBuilder.h4
-rw-r--r--clang/include/clang/DirectoryWatcher/DirectoryWatcher.h2
-rw-r--r--clang/include/clang/Driver/Distro.h6
-rw-r--r--clang/include/clang/Driver/Driver.h16
-rw-r--r--clang/include/clang/Driver/Options.td289
-rw-r--r--clang/include/clang/Driver/Phases.h3
-rw-r--r--clang/include/clang/Driver/SanitizerArgs.h3
-rw-r--r--clang/include/clang/Driver/ToolChain.h22
-rw-r--r--clang/include/clang/Driver/Types.h2
-rw-r--r--clang/include/clang/Format/Format.h433
-rw-r--r--clang/include/clang/Frontend/CompilerInstance.h3
-rw-r--r--clang/include/clang/Frontend/CompilerInvocation.h5
-rw-r--r--clang/include/clang/Frontend/FrontendAction.h17
-rw-r--r--clang/include/clang/Frontend/FrontendActions.h9
-rw-r--r--clang/include/clang/Frontend/FrontendOptions.h11
-rw-r--r--clang/include/clang/Interpreter/Interpreter.h17
-rw-r--r--clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h3
-rw-r--r--clang/include/clang/Lex/HeaderMap.h7
-rw-r--r--clang/include/clang/Lex/HeaderSearch.h82
-rw-r--r--clang/include/clang/Lex/HeaderSearchOptions.h15
-rw-r--r--clang/include/clang/Lex/Lexer.h29
-rw-r--r--clang/include/clang/Lex/LiteralSupport.h2
-rw-r--r--clang/include/clang/Lex/PPCallbacks.h24
-rw-r--r--clang/include/clang/Lex/Preprocessor.h94
-rw-r--r--clang/include/clang/Lex/PreprocessorLexer.h20
-rw-r--r--clang/include/clang/Lex/PreprocessorOptions.h3
-rw-r--r--clang/include/clang/Parse/Parser.h16
-rw-r--r--clang/include/clang/Sema/CodeCompleteConsumer.h3
-rw-r--r--clang/include/clang/Sema/DeclSpec.h1
-rw-r--r--clang/include/clang/Sema/Initialization.h29
-rw-r--r--clang/include/clang/Sema/Overload.h62
-rw-r--r--clang/include/clang/Sema/ParsedAttr.h25
-rw-r--r--clang/include/clang/Sema/Scope.h2
-rw-r--r--clang/include/clang/Sema/ScopeInfo.h19
-rw-r--r--clang/include/clang/Sema/Sema.h231
-rw-r--r--clang/include/clang/Sema/SemaConcept.h7
-rw-r--r--clang/include/clang/Serialization/ASTBitCodes.h8
-rw-r--r--clang/include/clang/Serialization/ASTReader.h21
-rw-r--r--clang/include/clang/Serialization/ASTRecordReader.h2
-rw-r--r--clang/include/clang/Serialization/ASTWriter.h5
-rw-r--r--clang/include/clang/Serialization/ModuleFile.h4
-rw-r--r--clang/include/clang/Serialization/ModuleFileExtension.h26
-rw-r--r--clang/include/clang/StaticAnalyzer/Checkers/Checkers.td25
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def24
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h6
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h113
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h173
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h107
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h38
-rw-r--r--clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h11
-rw-r--r--clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h7
-rw-r--r--clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h15
-rw-r--r--clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h26
-rw-r--r--clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h35
-rw-r--r--clang/include/clang/Tooling/Inclusions/HeaderIncludes.h3
-rw-r--r--clang/include/clang/Tooling/Inclusions/IncludeStyle.h4
-rw-r--r--clang/include/clang/Tooling/Tooling.h13
-rw-r--r--clang/include/clang/Tooling/Transformer/RangeSelector.h2
-rw-r--r--clang/include/clang/Tooling/Transformer/Stencil.h32
-rw-r--r--clang/include/clang/module.modulemap2
-rw-r--r--clang/lib/ARCMigrate/ARCMT.cpp2
-rw-r--r--clang/lib/ARCMigrate/ObjCMT.cpp33
-rw-r--r--clang/lib/ARCMigrate/TransUnbridgedCasts.cpp10
-rw-r--r--clang/lib/ARCMigrate/Transforms.cpp6
-rw-r--r--clang/lib/AST/APValue.cpp4
-rw-r--r--clang/lib/AST/ASTConcept.cpp7
-rw-r--r--clang/lib/AST/ASTContext.cpp569
-rw-r--r--clang/lib/AST/ASTDiagnostic.cpp3
-rw-r--r--clang/lib/AST/ASTDumper.cpp10
-rw-r--r--clang/lib/AST/ASTImporter.cpp616
-rw-r--r--clang/lib/AST/ASTImporterLookupTable.cpp16
-rw-r--r--clang/lib/AST/ASTStructuralEquivalence.cpp20
-rw-r--r--clang/lib/AST/ASTTypeTraits.cpp35
-rw-r--r--clang/lib/AST/AttrDocTable.cpp27
-rw-r--r--clang/lib/AST/AttrImpl.cpp34
-rw-r--r--clang/lib/AST/CXXInheritance.cpp6
-rw-r--r--clang/lib/AST/Comment.cpp80
-rw-r--r--clang/lib/AST/CommentBriefParser.cpp2
-rw-r--r--clang/lib/AST/CommentLexer.cpp5
-rw-r--r--clang/lib/AST/CommentSema.cpp71
-rw-r--r--clang/lib/AST/ComparisonCategories.cpp2
-rw-r--r--clang/lib/AST/Decl.cpp92
-rw-r--r--clang/lib/AST/DeclBase.cpp10
-rw-r--r--clang/lib/AST/DeclCXX.cpp102
-rw-r--r--clang/lib/AST/DeclObjC.cpp12
-rw-r--r--clang/lib/AST/DeclPrinter.cpp55
-rw-r--r--clang/lib/AST/DeclTemplate.cpp83
-rw-r--r--clang/lib/AST/Expr.cpp55
-rw-r--r--clang/lib/AST/ExprConstant.cpp166
-rw-r--r--clang/lib/AST/ExprObjC.cpp15
-rw-r--r--clang/lib/AST/ExternalASTMerger.cpp18
-rw-r--r--clang/lib/AST/Interp/ByteCodeEmitter.cpp44
-rw-r--r--clang/lib/AST/Interp/ByteCodeStmtGen.cpp6
-rw-r--r--clang/lib/AST/Interp/Context.h2
-rw-r--r--clang/lib/AST/Interp/Descriptor.h2
-rw-r--r--clang/lib/AST/Interp/Disasm.cpp13
-rw-r--r--clang/lib/AST/Interp/Function.h2
-rw-r--r--clang/lib/AST/Interp/Interp.h26
-rw-r--r--clang/lib/AST/Interp/InterpStack.h2
-rw-r--r--clang/lib/AST/Interp/InterpState.h2
-rw-r--r--clang/lib/AST/Interp/Opcodes.td2
-rw-r--r--clang/lib/AST/Interp/Program.cpp17
-rw-r--r--clang/lib/AST/Interp/Program.h11
-rw-r--r--clang/lib/AST/Interp/Source.h21
-rw-r--r--clang/lib/AST/ItaniumCXXABI.cpp34
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp36
-rw-r--r--clang/lib/AST/JSONNodeDumper.cpp2
-rw-r--r--clang/lib/AST/MicrosoftCXXABI.cpp21
-rw-r--r--clang/lib/AST/MicrosoftMangle.cpp9
-rw-r--r--clang/lib/AST/NSAPI.cpp1
-rw-r--r--clang/lib/AST/NestedNameSpecifier.cpp3
-rw-r--r--clang/lib/AST/OpenMPClause.cpp46
-rw-r--r--clang/lib/AST/ParentMapContext.cpp10
-rw-r--r--clang/lib/AST/PrintfFormatString.cpp1
-rw-r--r--clang/lib/AST/QualTypeNames.cpp2
-rw-r--r--clang/lib/AST/RecordLayoutBuilder.cpp58
-rw-r--r--clang/lib/AST/Stmt.cpp12
-rw-r--r--clang/lib/AST/StmtObjC.cpp8
-rw-r--r--clang/lib/AST/StmtOpenMP.cpp144
-rw-r--r--clang/lib/AST/StmtPrinter.cpp44
-rw-r--r--clang/lib/AST/StmtProfile.cpp51
-rw-r--r--clang/lib/AST/TemplateBase.cpp2
-rw-r--r--clang/lib/AST/TemplateName.cpp23
-rw-r--r--clang/lib/AST/TextNodeDumper.cpp8
-rw-r--r--clang/lib/AST/Type.cpp30
-rw-r--r--clang/lib/AST/TypeLoc.cpp10
-rw-r--r--clang/lib/AST/TypePrinter.cpp66
-rw-r--r--clang/lib/AST/VTableBuilder.cpp12
-rw-r--r--clang/lib/ASTMatchers/ASTMatchFinder.cpp34
-rw-r--r--clang/lib/ASTMatchers/ASTMatchersInternal.cpp27
-rw-r--r--clang/lib/ASTMatchers/Dynamic/Registry.cpp20
-rw-r--r--clang/lib/Analysis/BodyFarm.cpp9
-rw-r--r--clang/lib/Analysis/CFG.cpp260
-rw-r--r--clang/lib/Analysis/CloneDetection.cpp5
-rw-r--r--clang/lib/Analysis/ObjCNoReturn.cpp9
-rw-r--r--clang/lib/Analysis/ReachableCode.cpp25
-rw-r--r--clang/lib/Analysis/RetainSummaryManager.cpp3
-rw-r--r--clang/lib/Analysis/ThreadSafety.cpp96
-rw-r--r--clang/lib/Basic/Builtins.cpp2
-rw-r--r--clang/lib/Basic/CLWarnings.cpp28
-rw-r--r--clang/lib/Basic/Cuda.cpp23
-rw-r--r--clang/lib/Basic/Diagnostic.cpp12
-rw-r--r--clang/lib/Basic/DiagnosticIDs.cpp12
-rw-r--r--clang/lib/Basic/FileManager.cpp30
-rw-r--r--clang/lib/Basic/LangOptions.cpp30
-rw-r--r--clang/lib/Basic/Module.cpp6
-rw-r--r--clang/lib/Basic/OpenCLOptions.cpp7
-rw-r--r--clang/lib/Basic/OpenMPKinds.cpp55
-rw-r--r--clang/lib/Basic/ProfileList.cpp2
-rw-r--r--clang/lib/Basic/SourceManager.cpp34
-rw-r--r--clang/lib/Basic/TargetInfo.cpp39
-rw-r--r--clang/lib/Basic/Targets.cpp14
-rw-r--r--clang/lib/Basic/Targets/AArch64.cpp81
-rw-r--r--clang/lib/Basic/Targets/AArch64.h10
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp2
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.h31
-rw-r--r--clang/lib/Basic/Targets/ARC.h2
-rw-r--r--clang/lib/Basic/Targets/ARM.cpp15
-rw-r--r--clang/lib/Basic/Targets/ARM.h1
-rw-r--r--clang/lib/Basic/Targets/AVR.cpp38
-rw-r--r--clang/lib/Basic/Targets/BPF.cpp2
-rw-r--r--clang/lib/Basic/Targets/M68k.cpp4
-rw-r--r--clang/lib/Basic/Targets/Mips.cpp2
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp5
-rw-r--r--clang/lib/Basic/Targets/NVPTX.h4
-rw-r--r--clang/lib/Basic/Targets/OSTargets.h14
-rw-r--r--clang/lib/Basic/Targets/PPC.cpp79
-rw-r--r--clang/lib/Basic/Targets/PPC.h3
-rw-r--r--clang/lib/Basic/Targets/RISCV.cpp184
-rw-r--r--clang/lib/Basic/Targets/RISCV.h33
-rw-r--r--clang/lib/Basic/Targets/SPIR.cpp21
-rw-r--r--clang/lib/Basic/Targets/SPIR.h94
-rw-r--r--clang/lib/Basic/Targets/Sparc.h4
-rw-r--r--clang/lib/Basic/Targets/SystemZ.h14
-rw-r--r--clang/lib/Basic/Targets/WebAssembly.cpp21
-rw-r--r--clang/lib/Basic/Targets/WebAssembly.h13
-rw-r--r--clang/lib/Basic/Targets/X86.cpp67
-rw-r--r--clang/lib/Basic/Targets/X86.h9
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp258
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp4
-rw-r--r--clang/lib/CodeGen/CGBlocks.cpp12
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp566
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp28
-rw-r--r--clang/lib/CodeGen/CGCall.cpp146
-rw-r--r--clang/lib/CodeGen/CGClass.cpp18
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp665
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h46
-rw-r--r--clang/lib/CodeGen/CGDecl.cpp21
-rw-r--r--clang/lib/CodeGen/CGDeclCXX.cpp28
-rw-r--r--clang/lib/CodeGen/CGException.cpp24
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp109
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp2
-rw-r--r--clang/lib/CodeGen/CGExprCXX.cpp22
-rw-r--r--clang/lib/CodeGen/CGExprConstant.cpp4
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp92
-rw-r--r--clang/lib/CodeGen/CGGPUBuiltin.cpp135
-rw-r--r--clang/lib/CodeGen/CGObjC.cpp100
-rw-r--r--clang/lib/CodeGen/CGObjCGNU.cpp165
-rw-r--r--clang/lib/CodeGen/CGObjCMac.cpp137
-rw-r--r--clang/lib/CodeGen/CGObjCRuntime.cpp80
-rw-r--r--clang/lib/CodeGen/CGObjCRuntime.h17
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp304
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h12
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp60
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h43
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp74
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.h7
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp56
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h43
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp90
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp363
-rw-r--r--clang/lib/CodeGen/CGVTables.cpp2
-rw-r--r--clang/lib/CodeGen/CodeGenAction.cpp111
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp129
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h112
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp338
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h4
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.cpp8
-rw-r--r--clang/lib/CodeGen/CodeGenTypeCache.h6
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.cpp1
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp14
-rw-r--r--clang/lib/CodeGen/ItaniumCXXABI.cpp25
-rw-r--r--clang/lib/CodeGen/MicrosoftCXXABI.cpp9
-rw-r--r--clang/lib/CodeGen/ModuleBuilder.cpp8
-rw-r--r--clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp59
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp225
-rw-r--r--clang/lib/CodeGen/TargetInfo.h7
-rw-r--r--clang/lib/CodeGen/VarBypassDetector.h2
-rw-r--r--clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp9
-rw-r--r--clang/lib/Driver/Compilation.cpp2
-rw-r--r--clang/lib/Driver/Distro.cpp10
-rw-r--r--clang/lib/Driver/Driver.cpp185
-rw-r--r--clang/lib/Driver/Multilib.cpp2
-rw-r--r--clang/lib/Driver/SanitizerArgs.cpp250
-rw-r--r--clang/lib/Driver/ToolChain.cpp32
-rw-r--r--clang/lib/Driver/ToolChains/AIX.cpp53
-rw-r--r--clang/lib/Driver/ToolChains/AIX.h8
-rw-r--r--clang/lib/Driver/ToolChains/AMDGPU.cpp43
-rw-r--r--clang/lib/Driver/ToolChains/AMDGPU.h13
-rw-r--r--clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp54
-rw-r--r--clang/lib/Driver/ToolChains/AMDGPUOpenMP.h14
-rw-r--r--clang/lib/Driver/ToolChains/AVR.cpp43
-rw-r--r--clang/lib/Driver/ToolChains/AVR.h7
-rw-r--r--clang/lib/Driver/ToolChains/Arch/AArch64.cpp60
-rw-r--r--clang/lib/Driver/ToolChains/Arch/ARM.cpp36
-rw-r--r--clang/lib/Driver/ToolChains/Arch/ARM.h5
-rw-r--r--clang/lib/Driver/ToolChains/Arch/Mips.cpp5
-rw-r--r--clang/lib/Driver/ToolChains/Arch/Mips.h3
-rw-r--r--clang/lib/Driver/ToolChains/Arch/RISCV.cpp488
-rw-r--r--clang/lib/Driver/ToolChains/Arch/X86.cpp49
-rw-r--r--clang/lib/Driver/ToolChains/Arch/X86.h2
-rw-r--r--clang/lib/Driver/ToolChains/BareMetal.cpp17
-rw-r--r--clang/lib/Driver/ToolChains/BareMetal.h4
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp436
-rw-r--r--clang/lib/Driver/ToolChains/CloudABI.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/CloudABI.h2
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.cpp360
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.h37
-rw-r--r--clang/lib/Driver/ToolChains/CrossWindows.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/CrossWindows.h2
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp121
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.h11
-rw-r--r--clang/lib/Driver/ToolChains/Darwin.cpp179
-rw-r--r--clang/lib/Driver/ToolChains/Darwin.h17
-rw-r--r--clang/lib/Driver/ToolChains/Flang.cpp5
-rw-r--r--clang/lib/Driver/ToolChains/FreeBSD.cpp10
-rw-r--r--clang/lib/Driver/ToolChains/FreeBSD.h2
-rw-r--r--clang/lib/Driver/ToolChains/Fuchsia.cpp23
-rw-r--r--clang/lib/Driver/ToolChains/Fuchsia.h7
-rw-r--r--clang/lib/Driver/ToolChains/Gnu.cpp105
-rw-r--r--clang/lib/Driver/ToolChains/Gnu.h2
-rw-r--r--clang/lib/Driver/ToolChains/HIP.cpp129
-rw-r--r--clang/lib/Driver/ToolChains/HIP.h4
-rw-r--r--clang/lib/Driver/ToolChains/Haiku.h2
-rw-r--r--clang/lib/Driver/ToolChains/Hexagon.cpp55
-rw-r--r--clang/lib/Driver/ToolChains/Hexagon.h6
-rw-r--r--clang/lib/Driver/ToolChains/Linux.cpp34
-rw-r--r--clang/lib/Driver/ToolChains/Linux.h3
-rw-r--r--clang/lib/Driver/ToolChains/MSP430.h4
-rw-r--r--clang/lib/Driver/ToolChains/MSVC.cpp151
-rw-r--r--clang/lib/Driver/ToolChains/MSVC.h2
-rw-r--r--clang/lib/Driver/ToolChains/MinGW.cpp142
-rw-r--r--clang/lib/Driver/ToolChains/MinGW.h9
-rw-r--r--clang/lib/Driver/ToolChains/NetBSD.cpp53
-rw-r--r--clang/lib/Driver/ToolChains/OpenBSD.cpp43
-rw-r--r--clang/lib/Driver/ToolChains/OpenBSD.h4
-rw-r--r--clang/lib/Driver/ToolChains/PS4CPU.cpp11
-rw-r--r--clang/lib/Driver/ToolChains/PS4CPU.h3
-rw-r--r--clang/lib/Driver/ToolChains/SPIRV.cpp49
-rw-r--r--clang/lib/Driver/ToolChains/SPIRV.h46
-rw-r--r--clang/lib/Driver/ToolChains/TCE.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/TCE.h2
-rw-r--r--clang/lib/Driver/ToolChains/VEToolchain.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/VEToolchain.h2
-rw-r--r--clang/lib/Driver/ToolChains/WebAssembly.cpp49
-rw-r--r--clang/lib/Driver/ToolChains/WebAssembly.h2
-rw-r--r--clang/lib/Driver/ToolChains/XCore.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/XCore.h2
-rw-r--r--clang/lib/Driver/ToolChains/ZOS.h4
-rw-r--r--clang/lib/Driver/Types.cpp41
-rw-r--r--clang/lib/Edit/EditedSource.cpp6
-rw-r--r--clang/lib/Format/BreakableToken.cpp3
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp29
-rw-r--r--clang/lib/Format/Format.cpp235
-rw-r--r--clang/lib/Format/FormatToken.cpp1
-rw-r--r--clang/lib/Format/FormatToken.h6
-rw-r--r--clang/lib/Format/FormatTokenLexer.cpp6
-rw-r--r--clang/lib/Format/MacroExpander.cpp9
-rw-r--r--clang/lib/Format/Macros.h7
-rw-r--r--clang/lib/Format/QualifierAlignmentFixer.cpp468
-rw-r--r--clang/lib/Format/QualifierAlignmentFixer.h98
-rw-r--r--clang/lib/Format/SortJavaScriptImports.cpp5
-rw-r--r--clang/lib/Format/TokenAnalyzer.cpp49
-rw-r--r--clang/lib/Format/TokenAnalyzer.h12
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp210
-rw-r--r--clang/lib/Format/TokenAnnotator.h4
-rw-r--r--clang/lib/Format/UnwrappedLineFormatter.cpp11
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp129
-rw-r--r--clang/lib/Format/UnwrappedLineParser.h7
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp18
-rw-r--r--clang/lib/Format/WhitespaceManager.h2
-rw-r--r--clang/lib/Frontend/ASTConsumers.cpp7
-rw-r--r--clang/lib/Frontend/ASTUnit.cpp5
-rw-r--r--clang/lib/Frontend/CompilerInstance.cpp438
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp274
-rw-r--r--clang/lib/Frontend/CreateInvocationFromCommandLine.cpp15
-rw-r--r--clang/lib/Frontend/FrontendAction.cpp92
-rw-r--r--clang/lib/Frontend/FrontendActions.cpp14
-rw-r--r--clang/lib/Frontend/HeaderIncludeGen.cpp2
-rw-r--r--clang/lib/Frontend/InitHeaderSearch.cpp83
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp27
-rw-r--r--clang/lib/Frontend/LayoutOverrideSource.cpp6
-rw-r--r--clang/lib/Frontend/PrintPreprocessedOutput.cpp61
-rw-r--r--clang/lib/Frontend/Rewrite/FrontendActions.cpp2
-rw-r--r--clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp6
-rw-r--r--clang/lib/Frontend/TestModuleFileExtension.cpp14
-rw-r--r--clang/lib/Frontend/TestModuleFileExtension.h2
-rw-r--r--clang/lib/Frontend/VerifyDiagnosticConsumer.cpp4
-rw-r--r--clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp21
-rw-r--r--clang/lib/Headers/__clang_cuda_complex_builtins.h6
-rw-r--r--clang/lib/Headers/__clang_cuda_device_functions.h276
-rw-r--r--clang/lib/Headers/__clang_cuda_intrinsics.h32
-rw-r--r--clang/lib/Headers/__clang_cuda_libdevice_declares.h6
-rw-r--r--clang/lib/Headers/__clang_cuda_runtime_wrapper.h69
-rw-r--r--clang/lib/Headers/__clang_cuda_texture_intrinsics.h740
-rw-r--r--clang/lib/Headers/__clang_hip_cmath.h188
-rw-r--r--clang/lib/Headers/__clang_hip_math.h50
-rw-r--r--clang/lib/Headers/__wmmintrin_aes.h2
-rw-r--r--clang/lib/Headers/altivec.h234
-rw-r--r--clang/lib/Headers/ammintrin.h4
-rw-r--r--clang/lib/Headers/amxintrin.h33
-rw-r--r--clang/lib/Headers/avx2intrin.h332
-rw-r--r--clang/lib/Headers/avx512bf16intrin.h6
-rw-r--r--clang/lib/Headers/avx512bwintrin.h122
-rw-r--r--clang/lib/Headers/avx512dqintrin.h730
-rw-r--r--clang/lib/Headers/avx512erintrin.h204
-rw-r--r--clang/lib/Headers/avx512fintrin.h3070
-rw-r--r--clang/lib/Headers/avx512fp16intrin.h3349
-rw-r--r--clang/lib/Headers/avx512vbmi2intrin.h96
-rw-r--r--clang/lib/Headers/avx512vlbf16intrin.h54
-rw-r--r--clang/lib/Headers/avx512vlbwintrin.h188
-rw-r--r--clang/lib/Headers/avx512vldqintrin.h268
-rw-r--r--clang/lib/Headers/avx512vlfp16intrin.h2068
-rw-r--r--clang/lib/Headers/avx512vlintrin.h1106
-rw-r--r--clang/lib/Headers/avx512vlvbmi2intrin.h192
-rw-r--r--clang/lib/Headers/avx512vlvnniintrin.h16
-rw-r--r--clang/lib/Headers/avxintrin.h381
-rw-r--r--clang/lib/Headers/cpuid.h1
-rw-r--r--clang/lib/Headers/crc32intrin.h100
-rw-r--r--clang/lib/Headers/emmintrin.h30
-rw-r--r--clang/lib/Headers/f16cintrin.h8
-rw-r--r--clang/lib/Headers/gfniintrin.h95
-rw-r--r--clang/lib/Headers/ia32intrin.h12
-rw-r--r--clang/lib/Headers/immintrin.h34
-rw-r--r--clang/lib/Headers/intrin.h43
-rw-r--r--clang/lib/Headers/keylockerintrin.h10
-rw-r--r--clang/lib/Headers/mmintrin.h4
-rw-r--r--clang/lib/Headers/nmmintrin.h4
-rw-r--r--clang/lib/Headers/opencl-c-base.h67
-rw-r--r--clang/lib/Headers/opencl-c.h1302
-rw-r--r--clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h32
-rw-r--r--clang/lib/Headers/openmp_wrappers/cmath54
-rw-r--r--clang/lib/Headers/openmp_wrappers/complex13
-rw-r--r--clang/lib/Headers/openmp_wrappers/complex.h9
-rw-r--r--clang/lib/Headers/openmp_wrappers/math.h10
-rw-r--r--clang/lib/Headers/pmmintrin.h4
-rw-r--r--clang/lib/Headers/ppc_wrappers/smmintrin.h2
-rw-r--r--clang/lib/Headers/prfchwintrin.h7
-rw-r--r--clang/lib/Headers/smmintrin.h223
-rw-r--r--clang/lib/Headers/tmmintrin.h10
-rw-r--r--clang/lib/Headers/vpclmulqdqintrin.h12
-rw-r--r--clang/lib/Headers/wasm_simd128.h191
-rw-r--r--clang/lib/Headers/wmmintrin.h4
-rw-r--r--clang/lib/Headers/x86gprintrin.h12
-rw-r--r--clang/lib/Headers/xmmintrin.h14
-rw-r--r--clang/lib/Headers/xopintrin.h62
-rw-r--r--clang/lib/Index/FileIndexRecord.cpp7
-rw-r--r--clang/lib/Index/IndexDecl.cpp10
-rw-r--r--clang/lib/Index/USRGeneration.cpp1
-rw-r--r--clang/lib/Interpreter/IncrementalExecutor.cpp11
-rw-r--r--clang/lib/Interpreter/IncrementalExecutor.h4
-rw-r--r--clang/lib/Interpreter/IncrementalParser.cpp9
-rw-r--r--clang/lib/Interpreter/IncrementalParser.h6
-rw-r--r--clang/lib/Interpreter/Interpreter.cpp58
-rw-r--r--clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp63
-rw-r--r--clang/lib/Lex/HeaderMap.cpp13
-rw-r--r--clang/lib/Lex/HeaderSearch.cpp99
-rw-r--r--clang/lib/Lex/Lexer.cpp467
-rw-r--r--clang/lib/Lex/LiteralSupport.cpp242
-rw-r--r--clang/lib/Lex/ModuleMap.cpp31
-rw-r--r--clang/lib/Lex/PPDirectives.cpp67
-rw-r--r--clang/lib/Lex/PPExpressions.cpp6
-rw-r--r--clang/lib/Lex/PPLexerChange.cpp57
-rw-r--r--clang/lib/Lex/PPMacroExpansion.cpp70
-rw-r--r--clang/lib/Lex/Pragma.cpp267
-rw-r--r--clang/lib/Lex/Preprocessor.cpp50
-rw-r--r--clang/lib/Lex/UnicodeCharSets.h470
-rw-r--r--clang/lib/Parse/ParseDecl.cpp86
-rw-r--r--clang/lib/Parse/ParseDeclCXX.cpp112
-rw-r--r--clang/lib/Parse/ParseExpr.cpp3
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp40
-rw-r--r--clang/lib/Parse/ParseOpenMP.cpp483
-rw-r--r--clang/lib/Parse/ParsePragma.cpp78
-rw-r--r--clang/lib/Parse/ParseStmt.cpp163
-rw-r--r--clang/lib/Parse/ParseStmtAsm.cpp4
-rw-r--r--clang/lib/Parse/ParseTemplate.cpp29
-rw-r--r--clang/lib/Parse/ParseTentative.cpp8
-rw-r--r--clang/lib/Parse/Parser.cpp1
-rw-r--r--clang/lib/Rewrite/HTMLRewrite.cpp3
-rw-r--r--clang/lib/Rewrite/Rewriter.cpp2
-rw-r--r--clang/lib/Sema/AnalysisBasedWarnings.cpp31
-rw-r--r--clang/lib/Sema/CodeCompleteConsumer.cpp6
-rw-r--r--clang/lib/Sema/DeclSpec.cpp32
-rw-r--r--clang/lib/Sema/JumpDiagnostics.cpp17
-rw-r--r--clang/lib/Sema/OpenCLBuiltins.td435
-rw-r--r--clang/lib/Sema/ParsedAttr.cpp9
-rw-r--r--clang/lib/Sema/ScopeInfo.cpp2
-rw-r--r--clang/lib/Sema/Sema.cpp129
-rw-r--r--clang/lib/Sema/SemaAccess.cpp2
-rw-r--r--clang/lib/Sema/SemaAttr.cpp7
-rw-r--r--clang/lib/Sema/SemaAvailability.cpp7
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp7
-rw-r--r--clang/lib/Sema/SemaCXXScopeSpec.cpp4
-rw-r--r--clang/lib/Sema/SemaCast.cpp29
-rw-r--r--clang/lib/Sema/SemaChecking.cpp852
-rw-r--r--clang/lib/Sema/SemaCodeComplete.cpp188
-rw-r--r--clang/lib/Sema/SemaConcept.cpp35
-rw-r--r--clang/lib/Sema/SemaCoroutine.cpp73
-rw-r--r--clang/lib/Sema/SemaDecl.cpp826
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp192
-rw-r--r--clang/lib/Sema/SemaDeclCXX.cpp384
-rw-r--r--clang/lib/Sema/SemaDeclObjC.cpp17
-rw-r--r--clang/lib/Sema/SemaExceptionSpec.cpp22
-rw-r--r--clang/lib/Sema/SemaExpr.cpp610
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp165
-rw-r--r--clang/lib/Sema/SemaExprMember.cpp14
-rw-r--r--clang/lib/Sema/SemaExprObjC.cpp18
-rw-r--r--clang/lib/Sema/SemaInit.cpp35
-rw-r--r--clang/lib/Sema/SemaLambda.cpp13
-rw-r--r--clang/lib/Sema/SemaLookup.cpp17
-rw-r--r--clang/lib/Sema/SemaObjCProperty.cpp3
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp580
-rw-r--r--clang/lib/Sema/SemaOverload.cpp398
-rw-r--r--clang/lib/Sema/SemaSYCL.cpp34
-rw-r--r--clang/lib/Sema/SemaStmt.cpp111
-rw-r--r--clang/lib/Sema/SemaStmtAsm.cpp39
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp74
-rw-r--r--clang/lib/Sema/SemaTemplateDeduction.cpp1149
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp86
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiateDecl.cpp173
-rw-r--r--clang/lib/Sema/SemaTemplateVariadic.cpp8
-rw-r--r--clang/lib/Sema/SemaType.cpp203
-rw-r--r--clang/lib/Sema/TreeTransform.h145
-rw-r--r--clang/lib/Sema/UsedDeclVisitor.h3
-rw-r--r--clang/lib/Serialization/ASTCommon.cpp5
-rw-r--r--clang/lib/Serialization/ASTReader.cpp676
-rw-r--r--clang/lib/Serialization/ASTReaderDecl.cpp74
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp47
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp164
-rw-r--r--clang/lib/Serialization/ASTWriterDecl.cpp2
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp30
-rw-r--r--clang/lib/Serialization/GeneratePCH.cpp3
-rw-r--r--clang/lib/Serialization/ModuleFileExtension.cpp10
-rw-r--r--clang/lib/Serialization/ModuleManager.cpp11
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp16
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp28
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp6
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp1
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp7
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp50
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp12
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp9
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp13
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp4
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp8
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp19
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp19
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp7
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp203
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp39
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp13
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp8
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp7
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp45
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp1
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/STLAlgorithmModeling.cpp1
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp14
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp86
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp24
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp105
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp4
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp9
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp24
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h2
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp280
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/cert/PutenvWithAutoChecker.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Core/BugReporter.cpp42
-rw-r--r--clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp406
-rw-r--r--clang/lib/StaticAnalyzer/Core/CallDescription.cpp146
-rw-r--r--clang/lib/StaticAnalyzer/Core/CallEvent.cpp91
-rw-r--r--clang/lib/StaticAnalyzer/Core/CheckerContext.cpp9
-rw-r--r--clang/lib/StaticAnalyzer/Core/CheckerManager.cpp27
-rw-r--r--clang/lib/StaticAnalyzer/Core/Environment.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp16
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp10
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp637
-rw-r--r--clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp2
-rw-r--r--clang/lib/StaticAnalyzer/Core/LoopWidening.cpp3
-rw-r--r--clang/lib/StaticAnalyzer/Core/MemRegion.cpp57
-rw-r--r--clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp389
-rw-r--r--clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp15
-rw-r--r--clang/lib/StaticAnalyzer/Core/RegionStore.cpp349
-rw-r--r--clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp7
-rw-r--r--clang/lib/StaticAnalyzer/Core/SValBuilder.cpp27
-rw-r--r--clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp14
-rw-r--r--clang/lib/StaticAnalyzer/Core/Store.cpp46
-rw-r--r--clang/lib/StaticAnalyzer/Core/SymbolManager.cpp14
-rw-r--r--clang/lib/Tooling/CommonOptionsParser.cpp2
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp4
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp41
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp169
-rw-r--r--clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp113
-rw-r--r--clang/lib/Tooling/DumpTool/ClangSrcLocDump.cpp8
-rw-r--r--clang/lib/Tooling/JSONCompilationDatabase.cpp13
-rw-r--r--clang/lib/Tooling/Syntax/BuildTree.cpp5
-rw-r--r--clang/lib/Tooling/Tooling.cpp34
-rw-r--r--clang/lib/Tooling/Transformer/Parsing.cpp2
-rw-r--r--clang/lib/Tooling/Transformer/Stencil.cpp79
-rw-r--r--clang/tools/clang-format/ClangFormat.cpp69
-rw-r--r--clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp196
-rw-r--r--clang/tools/clang-repl/ClangRepl.cpp5
-rw-r--r--clang/tools/driver/cc1_main.cpp4
-rw-r--r--clang/tools/driver/cc1as_main.cpp6
-rw-r--r--clang/tools/driver/driver.cpp40
-rw-r--r--clang/utils/TableGen/ClangASTPropertiesEmitter.cpp2
-rw-r--r--clang/utils/TableGen/ClangAttrEmitter.cpp68
-rw-r--r--clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp6
-rw-r--r--clang/utils/TableGen/ClangDiagnosticsEmitter.cpp24
-rw-r--r--clang/utils/TableGen/ClangOpcodesEmitter.cpp8
-rw-r--r--clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp160
-rw-r--r--clang/utils/TableGen/MveEmitter.cpp8
-rw-r--r--clang/utils/TableGen/NeonEmitter.cpp17
-rw-r--r--clang/utils/TableGen/RISCVVEmitter.cpp187
-rw-r--r--clang/utils/TableGen/TableGen.cpp6
-rw-r--r--clang/utils/TableGen/TableGenBackends.h1
-rw-r--r--compiler-rt/include/profile/InstrProfData.inc11
-rw-r--r--compiler-rt/include/sanitizer/asan_interface.h2
-rw-r--r--compiler-rt/include/sanitizer/common_interface_defs.h2
-rw-r--r--compiler-rt/include/sanitizer/dfsan_interface.h3
-rw-r--r--compiler-rt/include/sanitizer/linux_syscall_hooks.h2120
-rw-r--r--compiler-rt/include/sanitizer/tsan_interface.h3
-rw-r--r--compiler-rt/lib/asan/asan_allocator.cpp34
-rw-r--r--compiler-rt/lib/asan/asan_allocator.h2
-rw-r--r--compiler-rt/lib/asan/asan_debugging.cpp5
-rw-r--r--compiler-rt/lib/asan/asan_descriptions.cpp9
-rw-r--r--compiler-rt/lib/asan/asan_errors.cpp43
-rw-r--r--compiler-rt/lib/asan/asan_fake_stack.cpp7
-rw-r--r--compiler-rt/lib/asan/asan_fuchsia.cpp46
-rw-r--r--compiler-rt/lib/asan/asan_globals.cpp25
-rw-r--r--compiler-rt/lib/asan/asan_interceptors.cpp18
-rw-r--r--compiler-rt/lib/asan/asan_interceptors.h45
-rw-r--r--compiler-rt/lib/asan/asan_linux.cpp4
-rw-r--r--compiler-rt/lib/asan/asan_malloc_linux.cpp115
-rw-r--r--compiler-rt/lib/asan/asan_poisoning.cpp2
-rw-r--r--compiler-rt/lib/asan/asan_report.cpp25
-rw-r--r--compiler-rt/lib/asan/asan_rtl.cpp3
-rw-r--r--compiler-rt/lib/asan/asan_shadow_setup.cpp4
-rw-r--r--compiler-rt/lib/asan/asan_stats.cpp14
-rw-r--r--compiler-rt/lib/asan/asan_thread.cpp11
-rw-r--r--compiler-rt/lib/builtins/README.txt8
-rw-r--r--compiler-rt/lib/builtins/arm/truncdfsf2vfp.S4
-rw-r--r--compiler-rt/lib/builtins/atomic.c15
-rw-r--r--compiler-rt/lib/builtins/clear_cache.c9
-rw-r--r--compiler-rt/lib/builtins/cpu_model.c16
-rw-r--r--compiler-rt/lib/builtins/emutls.c17
-rw-r--r--compiler-rt/lib/builtins/fixdfdi.c4
-rw-r--r--compiler-rt/lib/builtins/fixsfdi.c4
-rw-r--r--compiler-rt/lib/builtins/fixunsdfdi.c4
-rw-r--r--compiler-rt/lib/builtins/fixunssfdi.c4
-rw-r--r--compiler-rt/lib/builtins/fixunsxfdi.c2
-rw-r--r--compiler-rt/lib/builtins/fixunsxfsi.c2
-rw-r--r--compiler-rt/lib/builtins/fixxfdi.c2
-rw-r--r--compiler-rt/lib/builtins/floatdidf.c4
-rw-r--r--compiler-rt/lib/builtins/floatdisf.c4
-rw-r--r--compiler-rt/lib/builtins/floatundidf.c4
-rw-r--r--compiler-rt/lib/builtins/floatundisf.c4
-rw-r--r--compiler-rt/lib/builtins/mingw_fixfloat.c34
-rw-r--r--compiler-rt/lib/builtins/riscv/restore.S10
-rw-r--r--compiler-rt/lib/builtins/riscv/save.S2
-rw-r--r--compiler-rt/lib/cfi/cfi.cpp8
-rw-r--r--compiler-rt/lib/dfsan/dfsan.cpp104
-rw-r--r--compiler-rt/lib/dfsan/dfsan.h2
-rw-r--r--compiler-rt/lib/dfsan/dfsan_custom.cpp41
-rw-r--r--compiler-rt/lib/dfsan/dfsan_interceptors.cpp65
-rw-r--r--compiler-rt/lib/dfsan/dfsan_thread.cpp2
-rw-r--r--compiler-rt/lib/dfsan/dfsan_thread.h5
-rw-r--r--compiler-rt/lib/dfsan/done_abilist.txt3
-rw-r--r--compiler-rt/lib/dfsan/libc_ubuntu1404_abilist.txt1
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h3
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerCommand.h12
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerCorpus.h25
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp32
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h16
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDefs.h24
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDictionary.h11
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerDriver.cpp74
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp8
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerExtraCountersDarwin.cpp22
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerExtraCountersWindows.cpp80
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerFlags.def9
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerFork.cpp124
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerFork.h4
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerIO.cpp18
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerIO.h12
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp3
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp8
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerInternal.h11
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerLoop.cpp14
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerMerge.cpp211
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerMerge.h38
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerMutate.cpp6
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerMutate.h18
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerOptions.h1
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerTracePC.cpp10
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerTracePC.h2
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerUtil.cpp4
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerUtil.h4
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp156
-rw-r--r--compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp2
-rw-r--r--compiler-rt/lib/gwp_asan/common.h57
-rw-r--r--compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp7
-rw-r--r--compiler-rt/lib/hwasan/hwasan.cpp21
-rw-r--r--compiler-rt/lib/hwasan/hwasan.h25
-rw-r--r--compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp81
-rw-r--r--compiler-rt/lib/hwasan/hwasan_allocator.cpp58
-rw-r--r--compiler-rt/lib/hwasan/hwasan_exceptions.cpp4
-rw-r--r--compiler-rt/lib/hwasan/hwasan_fuchsia.cpp4
-rw-r--r--compiler-rt/lib/hwasan/hwasan_interceptors.cpp70
-rw-r--r--compiler-rt/lib/hwasan/hwasan_interface_internal.h48
-rw-r--r--compiler-rt/lib/hwasan/hwasan_linux.cpp149
-rw-r--r--compiler-rt/lib/hwasan/hwasan_report.cpp115
-rw-r--r--compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S (renamed from compiler-rt/lib/hwasan/hwasan_setjmp.S)21
-rw-r--r--compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S82
-rw-r--r--compiler-rt/lib/hwasan/hwasan_thread.cpp31
-rw-r--r--compiler-rt/lib/hwasan/hwasan_thread.h10
-rw-r--r--compiler-rt/lib/hwasan/hwasan_type_test.cpp2
-rw-r--r--compiler-rt/lib/interception/interception_win.cpp48
-rw-r--r--compiler-rt/lib/lsan/lsan_allocator.h2
-rw-r--r--compiler-rt/lib/lsan/lsan_common.cpp145
-rw-r--r--compiler-rt/lib/lsan/lsan_common.h10
-rw-r--r--compiler-rt/lib/lsan/lsan_common_mac.cpp2
-rw-r--r--compiler-rt/lib/lsan/lsan_fuchsia.cpp5
-rw-r--r--compiler-rt/lib/lsan/lsan_interceptors.cpp67
-rw-r--r--compiler-rt/lib/lsan/lsan_mac.cpp2
-rw-r--r--compiler-rt/lib/lsan/lsan_posix.cpp2
-rw-r--r--compiler-rt/lib/lsan/lsan_thread.cpp26
-rw-r--r--compiler-rt/lib/lsan/lsan_thread.h5
-rw-r--r--compiler-rt/lib/memprof/memprof_allocator.cpp328
-rw-r--r--compiler-rt/lib/memprof/memprof_flags.inc14
-rw-r--r--compiler-rt/lib/memprof/memprof_interceptors.cpp12
-rw-r--r--compiler-rt/lib/memprof/memprof_interceptors.h4
-rw-r--r--compiler-rt/lib/memprof/memprof_malloc_linux.cpp103
-rw-r--r--compiler-rt/lib/memprof/memprof_meminfoblock.h116
-rw-r--r--compiler-rt/lib/memprof/memprof_mibmap.cpp35
-rw-r--r--compiler-rt/lib/memprof/memprof_mibmap.h24
-rw-r--r--compiler-rt/lib/memprof/memprof_rawprofile.cpp250
-rw-r--r--compiler-rt/lib/memprof/memprof_rawprofile.h21
-rw-r--r--compiler-rt/lib/memprof/memprof_rtl.cpp11
-rw-r--r--compiler-rt/lib/memprof/memprof_stats.cpp14
-rw-r--r--compiler-rt/lib/memprof/memprof_thread.cpp11
-rw-r--r--compiler-rt/lib/memprof/tests/driver.cpp14
-rw-r--r--compiler-rt/lib/memprof/tests/rawprofile.cpp188
-rw-r--r--compiler-rt/lib/msan/msan.cpp5
-rw-r--r--compiler-rt/lib/msan/msan.h4
-rw-r--r--compiler-rt/lib/msan/msan_chained_origin_depot.cpp2
-rw-r--r--compiler-rt/lib/msan/msan_chained_origin_depot.h2
-rw-r--r--compiler-rt/lib/msan/msan_interceptors.cpp187
-rw-r--r--compiler-rt/lib/msan/msan_interface_internal.h2
-rw-r--r--compiler-rt/lib/msan/msan_linux.cpp12
-rw-r--r--compiler-rt/lib/msan/msan_poisoning.cpp4
-rw-r--r--compiler-rt/lib/msan/msan_report.cpp21
-rw-r--r--compiler-rt/lib/msan/msan_thread.cpp2
-rw-r--r--compiler-rt/lib/msan/msan_thread.h3
-rw-r--r--compiler-rt/lib/orc/c_api.h32
-rw-r--r--compiler-rt/lib/orc/elfnix_platform.cpp584
-rw-r--r--compiler-rt/lib/orc/elfnix_platform.h131
-rw-r--r--compiler-rt/lib/orc/elfnix_tls.x86-64.S64
-rw-r--r--compiler-rt/lib/orc/executor_address.h131
-rw-r--r--compiler-rt/lib/orc/macho_ehframe_registration.cpp68
-rw-r--r--compiler-rt/lib/orc/macho_platform.cpp197
-rw-r--r--compiler-rt/lib/orc/macho_platform.h48
-rw-r--r--compiler-rt/lib/orc/macho_tlv.arm64.S92
-rw-r--r--compiler-rt/lib/orc/macho_tlv.x86-64.S5
-rw-r--r--compiler-rt/lib/orc/simple_packed_serialization.h27
-rw-r--r--compiler-rt/lib/orc/wrapper_function_utils.h178
-rw-r--r--compiler-rt/lib/profile/InstrProfiling.h30
-rw-r--r--compiler-rt/lib/profile/InstrProfilingBuffer.c2
-rw-r--r--compiler-rt/lib/profile/InstrProfilingFile.c480
-rw-r--r--compiler-rt/lib/profile/InstrProfilingInternal.h4
-rw-r--r--compiler-rt/lib/profile/InstrProfilingMerge.c43
-rw-r--r--compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c8
-rw-r--r--compiler-rt/lib/profile/InstrProfilingPlatformLinux.c42
-rw-r--r--compiler-rt/lib/profile/InstrProfilingPlatformOther.c10
-rw-r--r--compiler-rt/lib/profile/InstrProfilingUtil.c20
-rw-r--r--compiler-rt/lib/profile/InstrProfilingValue.c2
-rw-r--r--compiler-rt/lib/profile/InstrProfilingVersionVar.c11
-rw-r--r--compiler-rt/lib/profile/InstrProfilingWriter.c8
-rw-r--r--compiler-rt/lib/sanitizer_common/sancov_flags.inc2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_addrhashmap.h40
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator.h8
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_bytemap.h107
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h6
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h79
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h17
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h21
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h8
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h8
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_asm.h11
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.cpp82
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.h45
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common.h48
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc734
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc10
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_netbsd_compat.inc4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp20
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc1559
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_coverage_fuchsia.cpp8
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp65
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h678
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h260
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_file.cpp15
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_file.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_flags.inc4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h173
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp43
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_hash.h24
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h7
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h47
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp12
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_libc.h5
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp8
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_libignore.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp133
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux.h12
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cpp14
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp34
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc20
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_mutex.cpp4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_mutex.h228
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.cpp18
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h71
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform.h25
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h45
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp25
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h180
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp56
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp25
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h30
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.h1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_posix.h7
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp37
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h18
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_procmaps_common.cpp26
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp10
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_signal_interceptors.inc12
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp22
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp91
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h50
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp138
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h38
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h173
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp7
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp12
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h6
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp12
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp10
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_hexagon.inc131
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cpp60
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h5
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp28
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_type_traits.h79
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_win.cpp26
-rwxr-xr-xcompiler-rt/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh2
-rwxr-xr-xcompiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh11
-rw-r--r--compiler-rt/lib/scudo/scudo_utils.cpp2
-rw-r--r--compiler-rt/lib/scudo/scudo_utils.h2
-rw-r--r--compiler-rt/lib/scudo/standalone/combined.h12
-rw-r--r--compiler-rt/lib/scudo/standalone/internal_defs.h20
-rw-r--r--compiler-rt/lib/scudo/standalone/memtag.h11
-rw-r--r--compiler-rt/lib/scudo/standalone/primary64.h4
-rw-r--r--compiler-rt/lib/scudo/standalone/secondary.h13
-rw-r--r--compiler-rt/lib/scudo/standalone/size_class_map.h4
-rw-r--r--compiler-rt/lib/scudo/standalone/string_utils.cpp2
-rw-r--r--compiler-rt/lib/scudo/standalone/string_utils.h7
-rw-r--r--compiler-rt/lib/scudo/standalone/vector.h15
-rw-r--r--compiler-rt/lib/scudo/standalone/wrappers_c.h13
-rw-r--r--compiler-rt/lib/scudo/standalone/wrappers_c.inc19
-rw-r--r--compiler-rt/lib/scudo/standalone/wrappers_c_checks.h4
-rw-r--r--compiler-rt/lib/tsan/dd/dd_interceptors.cpp3
-rw-r--r--compiler-rt/lib/tsan/dd/dd_rtl.cpp6
-rw-r--r--compiler-rt/lib/tsan/dd/dd_rtl.h6
-rw-r--r--compiler-rt/lib/tsan/go/tsan_go.cpp39
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_clock.cpp10
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_clock.h2
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_debugging.cpp10
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_defs.h54
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h34
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_external.cpp19
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_fd.cpp24
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_fd.h2
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_flags.cpp1
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_flags.inc1
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_ignoreset.cpp12
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_ignoreset.h13
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_ilist.h189
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors.h61
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp6
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp462
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface.cpp101
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface.h10
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface.inc182
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp169
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp321
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface_inl.h133
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp290
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_mman.cpp14
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_mman.h49
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp52
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_mutexset.h49
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform.h1089
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp104
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp84
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp37
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp3
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_report.cpp77
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_report.h36
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl.cpp722
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl.h544
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp604
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S80
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp450
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp330
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp402
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_shadow.h233
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_stack_trace.cpp12
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_symbolize.cpp3
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_sync.cpp58
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_sync.h35
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_trace.h182
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc (renamed from compiler-rt/lib/tsan/rtl/tsan_update_shadow_word_inl.h)2
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp126
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_vector_clock.h51
-rw-r--r--compiler-rt/lib/ubsan/ubsan_diag.cpp8
-rw-r--r--compiler-rt/lib/xray/xray_basic_flags.h2
-rw-r--r--compiler-rt/lib/xray/xray_buffer_queue.cpp2
-rw-r--r--compiler-rt/lib/xray/xray_flags.h2
-rw-r--r--compiler-rt/lib/xray/xray_interface.cpp2
-rw-r--r--compiler-rt/lib/xray/xray_profiling.cpp2
-rw-r--r--compiler-rt/lib/xray/xray_x86_64.cpp9
-rw-r--r--libcxx/CREDITS.TXT32
-rw-r--r--libcxx/include/__algorithm/adjacent_find.h5
-rw-r--r--libcxx/include/__algorithm/all_of.h5
-rw-r--r--libcxx/include/__algorithm/any_of.h5
-rw-r--r--libcxx/include/__algorithm/binary_search.h6
-rw-r--r--libcxx/include/__algorithm/clamp.h6
-rw-r--r--libcxx/include/__algorithm/comp.h5
-rw-r--r--libcxx/include/__algorithm/comp_ref_type.h14
-rw-r--r--libcxx/include/__algorithm/copy.h5
-rw-r--r--libcxx/include/__algorithm/copy_backward.h5
-rw-r--r--libcxx/include/__algorithm/copy_if.h5
-rw-r--r--libcxx/include/__algorithm/copy_n.h8
-rw-r--r--libcxx/include/__algorithm/count.h5
-rw-r--r--libcxx/include/__algorithm/count_if.h5
-rw-r--r--libcxx/include/__algorithm/equal.h11
-rw-r--r--libcxx/include/__algorithm/equal_range.h5
-rw-r--r--libcxx/include/__algorithm/fill.h5
-rw-r--r--libcxx/include/__algorithm/fill_n.h5
-rw-r--r--libcxx/include/__algorithm/find.h5
-rw-r--r--libcxx/include/__algorithm/find_end.h16
-rw-r--r--libcxx/include/__algorithm/find_first_of.h5
-rw-r--r--libcxx/include/__algorithm/find_if.h5
-rw-r--r--libcxx/include/__algorithm/find_if_not.h5
-rw-r--r--libcxx/include/__algorithm/for_each.h5
-rw-r--r--libcxx/include/__algorithm/for_each_n.h5
-rw-r--r--libcxx/include/__algorithm/generate.h5
-rw-r--r--libcxx/include/__algorithm/generate_n.h5
-rw-r--r--libcxx/include/__algorithm/half_positive.h5
-rw-r--r--libcxx/include/__algorithm/includes.h5
-rw-r--r--libcxx/include/__algorithm/is_heap.h5
-rw-r--r--libcxx/include/__algorithm/is_heap_until.h5
-rw-r--r--libcxx/include/__algorithm/is_partitioned.h5
-rw-r--r--libcxx/include/__algorithm/is_permutation.h12
-rw-r--r--libcxx/include/__algorithm/is_sorted.h5
-rw-r--r--libcxx/include/__algorithm/is_sorted_until.h5
-rw-r--r--libcxx/include/__algorithm/iter_swap.h5
-rw-r--r--libcxx/include/__algorithm/lexicographical_compare.h5
-rw-r--r--libcxx/include/__algorithm/lower_bound.h8
-rw-r--r--libcxx/include/__algorithm/make_heap.h5
-rw-r--r--libcxx/include/__algorithm/max_element.h5
-rw-r--r--libcxx/include/__algorithm/merge.h5
-rw-r--r--libcxx/include/__algorithm/min_element.h5
-rw-r--r--libcxx/include/__algorithm/minmax.h5
-rw-r--r--libcxx/include/__algorithm/minmax_element.h5
-rw-r--r--libcxx/include/__algorithm/mismatch.h5
-rw-r--r--libcxx/include/__algorithm/move.h5
-rw-r--r--libcxx/include/__algorithm/move_backward.h5
-rw-r--r--libcxx/include/__algorithm/next_permutation.h5
-rw-r--r--libcxx/include/__algorithm/none_of.h5
-rw-r--r--libcxx/include/__algorithm/nth_element.h18
-rw-r--r--libcxx/include/__algorithm/partial_sort.h15
-rw-r--r--libcxx/include/__algorithm/partial_sort_copy.h5
-rw-r--r--libcxx/include/__algorithm/partition.h10
-rw-r--r--libcxx/include/__algorithm/partition_copy.h5
-rw-r--r--libcxx/include/__algorithm/partition_point.h5
-rw-r--r--libcxx/include/__algorithm/pop_heap.h5
-rw-r--r--libcxx/include/__algorithm/prev_permutation.h5
-rw-r--r--libcxx/include/__algorithm/push_heap.h5
-rw-r--r--libcxx/include/__algorithm/remove.h5
-rw-r--r--libcxx/include/__algorithm/remove_copy.h5
-rw-r--r--libcxx/include/__algorithm/remove_copy_if.h5
-rw-r--r--libcxx/include/__algorithm/remove_if.h9
-rw-r--r--libcxx/include/__algorithm/replace.h5
-rw-r--r--libcxx/include/__algorithm/replace_copy.h5
-rw-r--r--libcxx/include/__algorithm/replace_copy_if.h5
-rw-r--r--libcxx/include/__algorithm/replace_if.h5
-rw-r--r--libcxx/include/__algorithm/reverse.h5
-rw-r--r--libcxx/include/__algorithm/reverse_copy.h5
-rw-r--r--libcxx/include/__algorithm/rotate.h5
-rw-r--r--libcxx/include/__algorithm/rotate_copy.h5
-rw-r--r--libcxx/include/__algorithm/sample.h3
-rw-r--r--libcxx/include/__algorithm/search.h12
-rw-r--r--libcxx/include/__algorithm/search_n.h12
-rw-r--r--libcxx/include/__algorithm/set_difference.h5
-rw-r--r--libcxx/include/__algorithm/set_intersection.h5
-rw-r--r--libcxx/include/__algorithm/set_symmetric_difference.h5
-rw-r--r--libcxx/include/__algorithm/set_union.h5
-rw-r--r--libcxx/include/__algorithm/shift_left.h5
-rw-r--r--libcxx/include/__algorithm/shift_right.h5
-rw-r--r--libcxx/include/__algorithm/shuffle.h33
-rw-r--r--libcxx/include/__algorithm/sift_down.h9
-rw-r--r--libcxx/include/__algorithm/sort.h96
-rw-r--r--libcxx/include/__algorithm/sort_heap.h5
-rw-r--r--libcxx/include/__algorithm/stable_partition.h24
-rw-r--r--libcxx/include/__algorithm/stable_sort.h9
-rw-r--r--libcxx/include/__algorithm/swap_ranges.h5
-rw-r--r--libcxx/include/__algorithm/transform.h5
-rw-r--r--libcxx/include/__algorithm/unique.h9
-rw-r--r--libcxx/include/__algorithm/unique_copy.h9
-rw-r--r--libcxx/include/__algorithm/unwrap_iter.h7
-rw-r--r--libcxx/include/__algorithm/upper_bound.h8
-rw-r--r--libcxx/include/__availability12
-rw-r--r--libcxx/include/__bit/bit_cast.h38
-rw-r--r--libcxx/include/__bsd_locale_defaults.h2
-rw-r--r--libcxx/include/__bsd_locale_fallbacks.h6
-rw-r--r--libcxx/include/__charconv/chars_format.h77
-rw-r--r--libcxx/include/__charconv/from_chars_result.h37
-rw-r--r--libcxx/include/__charconv/to_chars_result.h37
-rw-r--r--libcxx/include/__compare/common_comparison_category.h94
-rw-r--r--libcxx/include/__compare/compare_three_way.h41
-rw-r--r--libcxx/include/__compare/compare_three_way_result.h43
-rw-r--r--libcxx/include/__compare/is_eq.h34
-rw-r--r--libcxx/include/__compare/ordering.h319
-rw-r--r--libcxx/include/__compare/synth_three_way.h51
-rw-r--r--libcxx/include/__compare/three_way_comparable.h58
-rw-r--r--libcxx/include/__concepts/arithmetic.h48
-rw-r--r--libcxx/include/__concepts/assignable.h40
-rw-r--r--libcxx/include/__concepts/boolean_testable.h38
-rw-r--r--libcxx/include/__concepts/class_or_enum.h32
-rw-r--r--libcxx/include/__concepts/common_reference_with.h37
-rw-r--r--libcxx/include/__concepts/common_with.h47
-rw-r--r--libcxx/include/__concepts/constructible.h56
-rw-r--r--libcxx/include/__concepts/convertible_to.h36
-rw-r--r--libcxx/include/__concepts/copyable.h39
-rw-r--r--libcxx/include/__concepts/derived_from.h34
-rw-r--r--libcxx/include/__concepts/destructible.h32
-rw-r--r--libcxx/include/__concepts/different_from.h31
-rw-r--r--libcxx/include/__concepts/equality_comparable.h53
-rw-r--r--libcxx/include/__concepts/invocable.h41
-rw-r--r--libcxx/include/__concepts/movable.h39
-rw-r--r--libcxx/include/__concepts/predicate.h35
-rw-r--r--libcxx/include/__concepts/regular.h33
-rw-r--r--libcxx/include/__concepts/relation.h44
-rw-r--r--libcxx/include/__concepts/same_as.h35
-rw-r--r--libcxx/include/__concepts/semiregular.h33
-rw-r--r--libcxx/include/__concepts/swappable.h115
-rw-r--r--libcxx/include/__concepts/totally_ordered.h57
-rw-r--r--libcxx/include/__config264
-rw-r--r--libcxx/include/__coroutine/coroutine_handle.h202
-rw-r--r--libcxx/include/__coroutine/coroutine_traits.h53
-rw-r--r--libcxx/include/__coroutine/noop_coroutine_handle.h86
-rw-r--r--libcxx/include/__coroutine/trivial_awaitables.h46
-rw-r--r--libcxx/include/__debug2
-rw-r--r--libcxx/include/__errc2
-rw-r--r--libcxx/include/__format/format_arg.h256
-rw-r--r--libcxx/include/__format/format_args.h71
-rw-r--r--libcxx/include/__format/format_context.h168
-rw-r--r--libcxx/include/__format/format_error.h5
-rw-r--r--libcxx/include/__format/format_fwd.h56
-rw-r--r--libcxx/include/__format/format_parse_context.h12
-rw-r--r--libcxx/include/__format/format_string.h169
-rw-r--r--libcxx/include/__format/format_to_n_result.h41
-rw-r--r--libcxx/include/__format/formatter.h268
-rw-r--r--libcxx/include/__format/formatter_bool.h147
-rw-r--r--libcxx/include/__format/formatter_char.h104
-rw-r--r--libcxx/include/__format/formatter_integer.h170
-rw-r--r--libcxx/include/__format/formatter_integral.h463
-rw-r--r--libcxx/include/__format/formatter_string.h163
-rw-r--r--libcxx/include/__format/parser_std_format_spec.h1184
-rw-r--r--libcxx/include/__function_like.h5
-rw-r--r--libcxx/include/__functional/bind.h26
-rw-r--r--libcxx/include/__functional/bind_back.h65
-rw-r--r--libcxx/include/__functional/bind_front.h40
-rw-r--r--libcxx/include/__functional/compose.h52
-rw-r--r--libcxx/include/__functional/function.h20
-rw-r--r--libcxx/include/__functional/hash.h17
-rw-r--r--libcxx/include/__functional/not_fn.h32
-rw-r--r--libcxx/include/__functional/operations.h114
-rw-r--r--libcxx/include/__functional/perfect_forward.h111
-rw-r--r--libcxx/include/__functional/reference_wrapper.h4
-rw-r--r--libcxx/include/__functional/unary_function.h5
-rw-r--r--libcxx/include/__functional/unwrap_ref.h9
-rw-r--r--libcxx/include/__functional/weak_result_type.h8
-rw-r--r--libcxx/include/__hash_table8
-rw-r--r--libcxx/include/__iterator/access.h5
-rw-r--r--libcxx/include/__iterator/advance.h7
-rw-r--r--libcxx/include/__iterator/back_insert_iterator.h19
-rw-r--r--libcxx/include/__iterator/common_iterator.h5
-rw-r--r--libcxx/include/__iterator/concepts.h8
-rw-r--r--libcxx/include/__iterator/counted_iterator.h7
-rw-r--r--libcxx/include/__iterator/data.h5
-rw-r--r--libcxx/include/__iterator/default_sentinel.h5
-rw-r--r--libcxx/include/__iterator/distance.h5
-rw-r--r--libcxx/include/__iterator/empty.h5
-rw-r--r--libcxx/include/__iterator/erase_if_container.h5
-rw-r--r--libcxx/include/__iterator/front_insert_iterator.h5
-rw-r--r--libcxx/include/__iterator/incrementable_traits.h5
-rw-r--r--libcxx/include/__iterator/insert_iterator.h20
-rw-r--r--libcxx/include/__iterator/istream_iterator.h11
-rw-r--r--libcxx/include/__iterator/istreambuf_iterator.h5
-rw-r--r--libcxx/include/__iterator/iter_move.h5
-rw-r--r--libcxx/include/__iterator/iter_swap.h7
-rw-r--r--libcxx/include/__iterator/iterator.h5
-rw-r--r--libcxx/include/__iterator/iterator_traits.h11
-rw-r--r--libcxx/include/__iterator/move_iterator.h9
-rw-r--r--libcxx/include/__iterator/next.h5
-rw-r--r--libcxx/include/__iterator/ostream_iterator.h5
-rw-r--r--libcxx/include/__iterator/ostreambuf_iterator.h5
-rw-r--r--libcxx/include/__iterator/prev.h5
-rw-r--r--libcxx/include/__iterator/projected.h5
-rw-r--r--libcxx/include/__iterator/readable_traits.h5
-rw-r--r--libcxx/include/__iterator/reverse_access.h5
-rw-r--r--libcxx/include/__iterator/reverse_iterator.h17
-rw-r--r--libcxx/include/__iterator/size.h5
-rw-r--r--libcxx/include/__iterator/unreachable_sentinel.h38
-rw-r--r--libcxx/include/__iterator/wrap_iter.h38
-rw-r--r--libcxx/include/__libcpp_version2
-rw-r--r--libcxx/include/__locale18
-rw-r--r--libcxx/include/__mbstate_t.h44
-rw-r--r--libcxx/include/__memory/addressof.h20
-rw-r--r--libcxx/include/__memory/allocation_guard.h6
-rw-r--r--libcxx/include/__memory/allocator.h13
-rw-r--r--libcxx/include/__memory/allocator_arg_t.h10
-rw-r--r--libcxx/include/__memory/allocator_traits.h74
-rw-r--r--libcxx/include/__memory/auto_ptr.h5
-rw-r--r--libcxx/include/__memory/compressed_pair.h9
-rw-r--r--libcxx/include/__memory/construct_at.h40
-rw-r--r--libcxx/include/__memory/pointer_safety.h57
-rw-r--r--libcxx/include/__memory/pointer_traits.h37
-rw-r--r--libcxx/include/__memory/raw_storage_iterator.h5
-rw-r--r--libcxx/include/__memory/shared_ptr.h1046
-rw-r--r--libcxx/include/__memory/temporary_buffer.h5
-rw-r--r--libcxx/include/__memory/uninitialized_algorithms.h28
-rw-r--r--libcxx/include/__memory/unique_ptr.h41
-rw-r--r--libcxx/include/__memory/uses_allocator.h2
-rw-r--r--libcxx/include/__mutex_base10
-rw-r--r--libcxx/include/__node_handle52
-rw-r--r--libcxx/include/__nullptr2
-rw-r--r--libcxx/include/__ranges/access.h10
-rw-r--r--libcxx/include/__ranges/all.h20
-rw-r--r--libcxx/include/__ranges/common_view.h32
-rw-r--r--libcxx/include/__ranges/concepts.h7
-rw-r--r--libcxx/include/__ranges/copyable_box.h13
-rw-r--r--libcxx/include/__ranges/counted.h94
-rw-r--r--libcxx/include/__ranges/dangling.h5
-rw-r--r--libcxx/include/__ranges/data.h9
-rw-r--r--libcxx/include/__ranges/drop_view.h8
-rw-r--r--libcxx/include/__ranges/empty.h5
-rw-r--r--libcxx/include/__ranges/empty_view.h5
-rw-r--r--libcxx/include/__ranges/enable_borrowed_range.h7
-rw-r--r--libcxx/include/__ranges/enable_view.h5
-rw-r--r--libcxx/include/__ranges/iota_view.h408
-rw-r--r--libcxx/include/__ranges/join_view.h350
-rw-r--r--libcxx/include/__ranges/non_propagating_cache.h39
-rw-r--r--libcxx/include/__ranges/range_adaptor.h73
-rw-r--r--libcxx/include/__ranges/ref_view.h6
-rw-r--r--libcxx/include/__ranges/reverse_view.h190
-rw-r--r--libcxx/include/__ranges/single_view.h81
-rw-r--r--libcxx/include/__ranges/size.h9
-rw-r--r--libcxx/include/__ranges/subrange.h160
-rw-r--r--libcxx/include/__ranges/take_view.h185
-rw-r--r--libcxx/include/__ranges/transform_view.h57
-rw-r--r--libcxx/include/__ranges/view_interface.h8
-rw-r--r--libcxx/include/__split_buffer13
-rw-r--r--libcxx/include/__string58
-rw-r--r--libcxx/include/__thread/poll_with_backoff.h68
-rw-r--r--libcxx/include/__threading_support95
-rw-r--r--libcxx/include/__tree14
-rw-r--r--libcxx/include/__tuple42
-rw-r--r--libcxx/include/__undef_macros2
-rw-r--r--libcxx/include/__utility/as_const.h5
-rw-r--r--libcxx/include/__utility/cmp.h11
-rw-r--r--libcxx/include/__utility/decay_copy.h (renamed from libcxx/include/__utility/__decay_copy.h)16
-rw-r--r--libcxx/include/__utility/declval.h5
-rw-r--r--libcxx/include/__utility/exchange.h9
-rw-r--r--libcxx/include/__utility/forward.h5
-rw-r--r--libcxx/include/__utility/in_place.h11
-rw-r--r--libcxx/include/__utility/integer_sequence.h13
-rw-r--r--libcxx/include/__utility/move.h7
-rw-r--r--libcxx/include/__utility/pair.h110
-rw-r--r--libcxx/include/__utility/piecewise_construct.h7
-rw-r--r--libcxx/include/__utility/rel_ops.h5
-rw-r--r--libcxx/include/__utility/swap.h5
-rw-r--r--libcxx/include/__utility/to_underlying.h7
-rw-r--r--libcxx/include/__variant/monostate.h5
-rw-r--r--libcxx/include/algorithm7
-rw-r--r--libcxx/include/any2
-rw-r--r--libcxx/include/array6
-rw-r--r--libcxx/include/atomic513
-rw-r--r--libcxx/include/barrier2
-rw-r--r--libcxx/include/bit30
-rw-r--r--libcxx/include/bitset4
-rw-r--r--libcxx/include/cassert2
-rw-r--r--libcxx/include/ccomplex2
-rw-r--r--libcxx/include/cctype2
-rw-r--r--libcxx/include/cerrno2
-rw-r--r--libcxx/include/cfenv2
-rw-r--r--libcxx/include/cfloat2
-rw-r--r--libcxx/include/charconv102
-rw-r--r--libcxx/include/chrono189
-rw-r--r--libcxx/include/cinttypes2
-rw-r--r--libcxx/include/ciso6462
-rw-r--r--libcxx/include/climits2
-rw-r--r--libcxx/include/clocale4
-rw-r--r--libcxx/include/cmath4
-rw-r--r--libcxx/include/codecvt8
-rw-r--r--libcxx/include/compare377
-rw-r--r--libcxx/include/complex11
-rw-r--r--libcxx/include/complex.h2
-rw-r--r--libcxx/include/concepts340
-rw-r--r--libcxx/include/condition_variable2
-rw-r--r--libcxx/include/coroutine52
-rw-r--r--libcxx/include/csetjmp2
-rw-r--r--libcxx/include/csignal2
-rw-r--r--libcxx/include/cstdarg2
-rw-r--r--libcxx/include/cstdbool2
-rw-r--r--libcxx/include/cstddef6
-rw-r--r--libcxx/include/cstdint2
-rw-r--r--libcxx/include/cstdio8
-rw-r--r--libcxx/include/cstdlib14
-rw-r--r--libcxx/include/cstring4
-rw-r--r--libcxx/include/ctgmath2
-rw-r--r--libcxx/include/ctime6
-rw-r--r--libcxx/include/ctype.h2
-rw-r--r--libcxx/include/cwchar10
-rw-r--r--libcxx/include/cwctype4
-rw-r--r--libcxx/include/deque71
-rw-r--r--libcxx/include/errno.h2
-rw-r--r--libcxx/include/exception2
-rw-r--r--libcxx/include/execution2
-rw-r--r--libcxx/include/experimental/__config6
-rw-r--r--libcxx/include/experimental/algorithm8
-rw-r--r--libcxx/include/experimental/coroutine8
-rw-r--r--libcxx/include/experimental/deque2
-rw-r--r--libcxx/include/experimental/filesystem2
-rw-r--r--libcxx/include/experimental/forward_list2
-rw-r--r--libcxx/include/experimental/functional2
-rw-r--r--libcxx/include/experimental/iterator2
-rw-r--r--libcxx/include/experimental/list2
-rw-r--r--libcxx/include/experimental/map2
-rw-r--r--libcxx/include/experimental/memory_resource16
-rw-r--r--libcxx/include/experimental/propagate_const2
-rw-r--r--libcxx/include/experimental/regex6
-rw-r--r--libcxx/include/experimental/set2
-rw-r--r--libcxx/include/experimental/simd24
-rw-r--r--libcxx/include/experimental/string4
-rw-r--r--libcxx/include/experimental/type_traits2
-rw-r--r--libcxx/include/experimental/unordered_map2
-rw-r--r--libcxx/include/experimental/unordered_set2
-rw-r--r--libcxx/include/experimental/utility2
-rw-r--r--libcxx/include/experimental/vector2
-rw-r--r--libcxx/include/ext/__hash2
-rw-r--r--libcxx/include/ext/hash_map2
-rw-r--r--libcxx/include/ext/hash_set2
-rw-r--r--libcxx/include/fenv.h2
-rw-r--r--libcxx/include/filesystem47
-rw-r--r--libcxx/include/float.h2
-rw-r--r--libcxx/include/format627
-rw-r--r--libcxx/include/forward_list16
-rw-r--r--libcxx/include/fstream28
-rw-r--r--libcxx/include/functional6
-rw-r--r--libcxx/include/future4
-rw-r--r--libcxx/include/initializer_list2
-rw-r--r--libcxx/include/inttypes.h2
-rw-r--r--libcxx/include/iomanip2
-rw-r--r--libcxx/include/ios9
-rw-r--r--libcxx/include/iosfwd42
-rw-r--r--libcxx/include/iostream15
-rw-r--r--libcxx/include/istream4
-rw-r--r--libcxx/include/iterator7
-rw-r--r--libcxx/include/latch2
-rw-r--r--libcxx/include/limits2
-rw-r--r--libcxx/include/limits.h2
-rw-r--r--libcxx/include/list120
-rw-r--r--libcxx/include/locale288
-rw-r--r--libcxx/include/locale.h4
-rw-r--r--libcxx/include/map110
-rw-r--r--libcxx/include/math.h28
-rw-r--r--libcxx/include/memory23
-rw-r--r--libcxx/include/module.modulemap112
-rw-r--r--libcxx/include/mutex4
-rw-r--r--libcxx/include/new23
-rw-r--r--libcxx/include/numbers7
-rw-r--r--libcxx/include/numeric2
-rw-r--r--libcxx/include/optional101
-rw-r--r--libcxx/include/ostream13
-rw-r--r--libcxx/include/queue230
-rw-r--r--libcxx/include/random2
-rw-r--r--libcxx/include/ranges60
-rw-r--r--libcxx/include/ratio22
-rw-r--r--libcxx/include/regex60
-rw-r--r--libcxx/include/scoped_allocator12
-rw-r--r--libcxx/include/semaphore92
-rw-r--r--libcxx/include/set79
-rw-r--r--libcxx/include/setjmp.h2
-rw-r--r--libcxx/include/shared_mutex2
-rw-r--r--libcxx/include/span196
-rw-r--r--libcxx/include/sstream2
-rw-r--r--libcxx/include/stack22
-rw-r--r--libcxx/include/stdbool.h2
-rw-r--r--libcxx/include/stddef.h2
-rw-r--r--libcxx/include/stdexcept2
-rw-r--r--libcxx/include/stdint.h2
-rw-r--r--libcxx/include/stdio.h2
-rw-r--r--libcxx/include/stdlib.h32
-rw-r--r--libcxx/include/streambuf2
-rw-r--r--libcxx/include/string236
-rw-r--r--libcxx/include/string.h2
-rw-r--r--libcxx/include/string_view61
-rw-r--r--libcxx/include/strstream2
-rw-r--r--libcxx/include/system_error10
-rw-r--r--libcxx/include/tgmath.h2
-rw-r--r--libcxx/include/thread5
-rw-r--r--libcxx/include/tuple148
-rw-r--r--libcxx/include/type_traits914
-rw-r--r--libcxx/include/typeindex2
-rw-r--r--libcxx/include/typeinfo2
-rw-r--r--libcxx/include/unordered_map356
-rw-r--r--libcxx/include/unordered_set283
-rw-r--r--libcxx/include/utility23
-rw-r--r--libcxx/include/valarray11
-rw-r--r--libcxx/include/variant30
-rw-r--r--libcxx/include/vector391
-rw-r--r--libcxx/include/version52
-rw-r--r--libcxx/include/wchar.h12
-rw-r--r--libcxx/include/wctype.h16
-rw-r--r--libcxx/src/algorithm.cpp6
-rw-r--r--libcxx/src/any.cpp2
-rw-r--r--libcxx/src/atomic.cpp2
-rw-r--r--libcxx/src/barrier.cpp2
-rw-r--r--libcxx/src/bind.cpp2
-rw-r--r--libcxx/src/charconv.cpp2
-rw-r--r--libcxx/src/chrono.cpp33
-rw-r--r--libcxx/src/condition_variable.cpp2
-rw-r--r--libcxx/src/condition_variable_destructor.cpp2
-rw-r--r--libcxx/src/debug.cpp2
-rw-r--r--libcxx/src/exception.cpp2
-rw-r--r--libcxx/src/experimental/memory_resource.cpp4
-rw-r--r--libcxx/src/filesystem/directory_iterator.cpp2
-rw-r--r--libcxx/src/filesystem/filesystem_common.h3
-rw-r--r--libcxx/src/filesystem/operations.cpp2
-rw-r--r--libcxx/src/format.cpp2
-rw-r--r--libcxx/src/functional.cpp4
-rw-r--r--libcxx/src/future.cpp2
-rw-r--r--libcxx/src/hash.cpp2
-rw-r--r--libcxx/src/include/apple_availability.h2
-rw-r--r--libcxx/src/include/atomic_support.h4
-rw-r--r--libcxx/src/include/config_elast.h2
-rw-r--r--libcxx/src/include/refstring.h2
-rw-r--r--libcxx/src/ios.cpp2
-rw-r--r--libcxx/src/ios.instantiations.cpp11
-rw-r--r--libcxx/src/iostream.cpp54
-rw-r--r--libcxx/src/legacy_pointer_safety.cpp23
-rw-r--r--libcxx/src/locale.cpp427
-rw-r--r--libcxx/src/memory.cpp37
-rw-r--r--libcxx/src/mutex.cpp2
-rw-r--r--libcxx/src/mutex_destructor.cpp2
-rw-r--r--libcxx/src/new.cpp2
-rw-r--r--libcxx/src/optional.cpp2
-rw-r--r--libcxx/src/random.cpp2
-rw-r--r--libcxx/src/random_shuffle.cpp2
-rw-r--r--libcxx/src/regex.cpp2
-rw-r--r--libcxx/src/shared_mutex.cpp2
-rw-r--r--libcxx/src/stdexcept.cpp2
-rw-r--r--libcxx/src/string.cpp54
-rw-r--r--libcxx/src/strstream.cpp2
-rw-r--r--libcxx/src/support/ibm/mbsnrtowcs.cpp95
-rw-r--r--libcxx/src/support/ibm/wcsnrtombs.cpp93
-rw-r--r--libcxx/src/support/runtime/stdexcept_default.ipp2
-rw-r--r--libcxx/src/support/runtime/stdexcept_vcruntime.ipp2
-rw-r--r--libcxx/src/system_error.cpp2
-rw-r--r--libcxx/src/typeinfo.cpp2
-rw-r--r--libcxx/src/utility.cpp2
-rw-r--r--libcxx/src/valarray.cpp2
-rw-r--r--libcxx/src/variant.cpp2
-rw-r--r--libcxx/src/vector.cpp10
-rw-r--r--libunwind/include/__libunwind_config.h2
-rw-r--r--libunwind/include/libunwind.h215
-rw-r--r--libunwind/include/mach-o/compact_unwind_encoding.h2
-rw-r--r--libunwind/include/unwind.h198
-rw-r--r--libunwind/include/unwind_arm_ehabi.h169
-rw-r--r--libunwind/include/unwind_itanium.h76
-rw-r--r--libunwind/src/AddressSpace.hpp2
-rw-r--r--libunwind/src/CompactUnwinder.hpp76
-rw-r--r--libunwind/src/DwarfInstructions.hpp13
-rw-r--r--libunwind/src/DwarfParser.hpp28
-rw-r--r--libunwind/src/EHHeaderParser.hpp2
-rw-r--r--libunwind/src/RWMutex.hpp2
-rw-r--r--libunwind/src/Registers.hpp184
-rw-r--r--libunwind/src/Unwind-EHABI.cpp150
-rw-r--r--libunwind/src/Unwind-EHABI.h2
-rw-r--r--libunwind/src/Unwind-seh.cpp8
-rw-r--r--libunwind/src/Unwind-sjlj.c2
-rw-r--r--libunwind/src/UnwindCursor.hpp100
-rw-r--r--libunwind/src/UnwindLevel1-gcc-ext.c31
-rw-r--r--libunwind/src/UnwindLevel1.c56
-rw-r--r--libunwind/src/UnwindRegistersRestore.S16
-rw-r--r--libunwind/src/UnwindRegistersSave.S5
-rw-r--r--libunwind/src/Unwind_AppleExtras.cpp2
-rw-r--r--libunwind/src/assembly.h7
-rw-r--r--libunwind/src/cet_unwind.h41
-rw-r--r--libunwind/src/config.h2
-rw-r--r--libunwind/src/dwarf2.h2
-rw-r--r--libunwind/src/libunwind.cpp33
-rw-r--r--libunwind/src/libunwind_ext.h5
-rw-r--r--lld/COFF/COFFLinkerContext.cpp40
-rw-r--r--lld/COFF/COFFLinkerContext.h85
-rw-r--r--lld/COFF/CallGraphSort.cpp12
-rw-r--r--lld/COFF/CallGraphSort.h4
-rw-r--r--lld/COFF/Chunks.cpp13
-rw-r--r--lld/COFF/Chunks.h4
-rw-r--r--lld/COFF/Config.h11
-rw-r--r--lld/COFF/DLL.cpp7
-rw-r--r--lld/COFF/DLL.h2
-rw-r--r--lld/COFF/DebugTypes.cpp210
-rw-r--r--lld/COFF/DebugTypes.h39
-rw-r--r--lld/COFF/Driver.cpp222
-rw-r--r--lld/COFF/Driver.h8
-rw-r--r--lld/COFF/DriverUtils.cpp32
-rw-r--r--lld/COFF/ICF.cpp21
-rw-r--r--lld/COFF/ICF.h3
-rw-r--r--lld/COFF/InputFiles.cpp144
-rw-r--r--lld/COFF/InputFiles.h47
-rw-r--r--lld/COFF/LLDMapFile.cpp11
-rw-r--r--lld/COFF/LLDMapFile.h6
-rw-r--r--lld/COFF/LTO.cpp20
-rw-r--r--lld/COFF/LTO.h3
-rw-r--r--lld/COFF/MapFile.cpp38
-rw-r--r--lld/COFF/MapFile.h6
-rw-r--r--lld/COFF/MarkLive.cpp10
-rw-r--r--lld/COFF/MarkLive.h5
-rw-r--r--lld/COFF/MinGW.cpp27
-rw-r--r--lld/COFF/MinGW.h8
-rw-r--r--lld/COFF/Options.td33
-rw-r--r--lld/COFF/PDB.cpp117
-rw-r--r--lld/COFF/PDB.h10
-rw-r--r--lld/COFF/SymbolTable.cpp59
-rw-r--r--lld/COFF/SymbolTable.h15
-rw-r--r--lld/COFF/TypeMerger.h20
-rw-r--r--lld/COFF/Writer.cpp148
-rw-r--r--lld/COFF/Writer.h6
-rw-r--r--lld/Common/ErrorHandler.cpp35
-rw-r--r--lld/Common/Timer.cpp14
-rw-r--r--lld/Common/Version.cpp8
-rw-r--r--lld/ELF/Arch/AArch64.cpp23
-rw-r--r--lld/ELF/Arch/AMDGPU.cpp1
-rw-r--r--lld/ELF/Arch/ARM.cpp40
-rw-r--r--lld/ELF/Arch/AVR.cpp3
-rw-r--r--lld/ELF/Arch/Hexagon.cpp55
-rw-r--r--lld/ELF/Arch/Mips.cpp5
-rw-r--r--lld/ELF/Arch/PPC.cpp2
-rw-r--r--lld/ELF/Arch/PPC64.cpp15
-rw-r--r--lld/ELF/Arch/RISCV.cpp2
-rw-r--r--lld/ELF/Arch/SPARCV9.cpp1
-rw-r--r--lld/ELF/Arch/X86.cpp111
-rw-r--r--lld/ELF/Arch/X86_64.cpp16
-rw-r--r--lld/ELF/CallGraphSort.cpp2
-rw-r--r--lld/ELF/Config.h14
-rw-r--r--lld/ELF/DWARF.cpp9
-rw-r--r--lld/ELF/Driver.cpp217
-rw-r--r--lld/ELF/DriverUtils.cpp13
-rw-r--r--lld/ELF/ICF.cpp29
-rw-r--r--lld/ELF/InputFiles.cpp87
-rw-r--r--lld/ELF/InputFiles.h17
-rw-r--r--lld/ELF/InputSection.cpp152
-rw-r--r--lld/ELF/InputSection.h32
-rw-r--r--lld/ELF/LTO.cpp18
-rw-r--r--lld/ELF/LinkerScript.cpp92
-rw-r--r--lld/ELF/LinkerScript.h7
-rw-r--r--lld/ELF/MapFile.cpp19
-rw-r--r--lld/ELF/MapFile.h1
-rw-r--r--lld/ELF/MarkLive.cpp29
-rw-r--r--lld/ELF/Options.td50
-rw-r--r--lld/ELF/OutputSections.cpp2
-rw-r--r--lld/ELF/Relocations.cpp634
-rw-r--r--lld/ELF/Relocations.h2
-rw-r--r--lld/ELF/ScriptParser.cpp16
-rw-r--r--lld/ELF/SymbolTable.cpp121
-rw-r--r--lld/ELF/SymbolTable.h10
-rw-r--r--lld/ELF/Symbols.cpp24
-rw-r--r--lld/ELF/Symbols.h11
-rw-r--r--lld/ELF/SyntheticSections.cpp48
-rw-r--r--lld/ELF/Target.cpp12
-rw-r--r--lld/ELF/Target.h9
-rw-r--r--lld/ELF/Writer.cpp119
-rw-r--r--lld/MachO/Arch/ARM64.cpp8
-rw-r--r--lld/MachO/ConcatOutputSection.cpp99
-rw-r--r--lld/MachO/Config.h12
-rw-r--r--lld/MachO/Driver.cpp385
-rw-r--r--lld/MachO/Driver.h8
-rw-r--r--lld/MachO/DriverUtils.cpp41
-rw-r--r--lld/MachO/ICF.cpp93
-rw-r--r--lld/MachO/InputFiles.cpp430
-rw-r--r--lld/MachO/InputFiles.h39
-rw-r--r--lld/MachO/InputSection.cpp42
-rw-r--r--lld/MachO/InputSection.h37
-rw-r--r--lld/MachO/LTO.cpp17
-rw-r--r--lld/MachO/MarkLive.cpp37
-rw-r--r--lld/MachO/ObjC.cpp35
-rw-r--r--lld/MachO/Options.td28
-rw-r--r--lld/MachO/OutputSegment.cpp5
-rw-r--r--lld/MachO/OutputSegment.h1
-rw-r--r--lld/MachO/Relocations.cpp3
-rw-r--r--lld/MachO/Relocations.h2
-rw-r--r--lld/MachO/SymbolTable.cpp46
-rw-r--r--lld/MachO/SymbolTable.h3
-rw-r--r--lld/MachO/Symbols.cpp68
-rw-r--r--lld/MachO/Symbols.h48
-rw-r--r--lld/MachO/SyntheticSections.cpp50
-rw-r--r--lld/MachO/SyntheticSections.h9
-rw-r--r--lld/MachO/Target.h3
-rw-r--r--lld/MachO/UnwindInfoSection.cpp406
-rw-r--r--lld/MachO/UnwindInfoSection.h25
-rw-r--r--lld/MachO/Writer.cpp51
-rw-r--r--lld/MachO/Writer.h1
-rw-r--r--lld/MachO/ld64-vs-lld.rst15
-rw-r--r--lld/docs/AtomLLD.rst62
-rw-r--r--lld/docs/Driver.rst82
-rw-r--r--lld/docs/Readers.rst174
-rw-r--r--lld/docs/ReleaseNotes.rst30
-rw-r--r--lld/docs/WebAssembly.rst6
-rw-r--r--lld/docs/conf.py4
-rw-r--r--lld/docs/design.rst421
-rw-r--r--lld/docs/development.rst45
-rw-r--r--lld/docs/getting_started.rst87
-rw-r--r--lld/docs/index.rst5
-rw-r--r--lld/docs/ld.lld.120
-rw-r--r--lld/docs/open_projects.rst9
-rw-r--r--lld/docs/sphinx_intro.rst127
-rw-r--r--lld/include/lld/Common/ErrorHandler.h8
-rw-r--r--lld/include/lld/Common/LLVM.h2
-rw-r--r--lld/include/lld/Common/Timer.h4
-rw-r--r--lld/lib/Core/Resolver.cpp23
-rw-r--r--lld/lib/ReaderWriter/MachO/GOTPass.cpp2
-rw-r--r--lld/lib/ReaderWriter/MachO/ShimPass.cpp2
-rw-r--r--lld/tools/lld/lld.cpp2
-rw-r--r--lldb/bindings/interface/SBDebugger.i2
-rw-r--r--lldb/bindings/interface/SBMemoryRegionInfo.i3
-rw-r--r--lldb/bindings/interface/SBMemoryRegionInfoList.i3
-rw-r--r--lldb/bindings/interface/SBSymbolContextList.i2
-rw-r--r--lldb/bindings/interface/SBType.i15
-rw-r--r--lldb/bindings/lua/lua-typemaps.swig215
-rw-r--r--lldb/bindings/lua/lua-wrapper.swig28
-rw-r--r--lldb/bindings/lua/lua.swig4
-rwxr-xr-xlldb/bindings/python/lldb-python17
-rw-r--r--lldb/bindings/python/python-swigsafecast.swig167
-rw-r--r--lldb/bindings/python/python-wrapper.swig146
-rw-r--r--lldb/docs/design/reproducers.rst4
-rw-r--r--lldb/docs/design/sbapi.rst2
-rw-r--r--lldb/docs/man/lldb.rst6
-rw-r--r--lldb/include/lldb/API/SBDebugger.h2
-rw-r--r--lldb/include/lldb/API/SBExecutionContext.h2
-rw-r--r--lldb/include/lldb/API/SBMemoryRegionInfo.h6
-rw-r--r--lldb/include/lldb/API/SBMemoryRegionInfoList.h3
-rw-r--r--lldb/include/lldb/Breakpoint/Breakpoint.h9
-rw-r--r--lldb/include/lldb/Core/Address.h4
-rw-r--r--lldb/include/lldb/Core/AddressRange.h2
-rw-r--r--lldb/include/lldb/Core/Communication.h16
-rw-r--r--lldb/include/lldb/Core/Disassembler.h8
-rw-r--r--lldb/include/lldb/Core/IOHandler.h2
-rw-r--r--lldb/include/lldb/Core/Mangled.h3
-rw-r--r--lldb/include/lldb/Core/Module.h40
-rw-r--r--lldb/include/lldb/Core/ModuleList.h10
-rw-r--r--lldb/include/lldb/Core/PluginInterface.h6
-rw-r--r--lldb/include/lldb/Core/PluginManager.h85
-rw-r--r--lldb/include/lldb/Core/RichManglingContext.h3
-rw-r--r--lldb/include/lldb/Core/Section.h13
-rw-r--r--lldb/include/lldb/Core/StructuredDataImpl.h2
-rw-r--r--lldb/include/lldb/Core/ValueObjectConstResultImpl.h1
-rw-r--r--lldb/include/lldb/Core/ValueObjectDynamicValue.h11
-rw-r--r--lldb/include/lldb/Core/ValueObjectRegister.h9
-rw-r--r--lldb/include/lldb/DataFormatters/FormattersHelpers.h2
-rw-r--r--lldb/include/lldb/DataFormatters/StringPrinter.h20
-rw-r--r--lldb/include/lldb/Expression/IRExecutionUnit.h17
-rw-r--r--lldb/include/lldb/Host/Config.h.cmake5
-rw-r--r--lldb/include/lldb/Host/File.h75
-rw-r--r--lldb/include/lldb/Host/MainLoop.h2
-rw-r--r--lldb/include/lldb/Host/Socket.h32
-rw-r--r--lldb/include/lldb/Host/StringConvert.h38
-rw-r--r--lldb/include/lldb/Host/Terminal.h139
-rw-r--r--lldb/include/lldb/Host/common/NativeProcessProtocol.h16
-rw-r--r--lldb/include/lldb/Host/freebsd/HostInfoFreeBSD.h3
-rw-r--r--lldb/include/lldb/Host/netbsd/HostInfoNetBSD.h3
-rw-r--r--lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h3
-rw-r--r--lldb/include/lldb/Host/posix/ConnectionFileDescriptorPosix.h88
-rw-r--r--lldb/include/lldb/Host/posix/HostInfoPosix.h1
-rw-r--r--lldb/include/lldb/Interpreter/CommandCompletions.h10
-rw-r--r--lldb/include/lldb/Interpreter/CommandInterpreter.h69
-rw-r--r--lldb/include/lldb/Interpreter/CommandObject.h16
-rw-r--r--lldb/include/lldb/Interpreter/CommandObjectMultiword.h8
-rw-r--r--lldb/include/lldb/Interpreter/Property.h13
-rw-r--r--lldb/include/lldb/Interpreter/ScriptInterpreter.h6
-rw-r--r--lldb/include/lldb/Interpreter/ScriptedInterface.h74
-rw-r--r--lldb/include/lldb/Interpreter/ScriptedProcessInterface.h62
-rw-r--r--lldb/include/lldb/Symbol/Block.h2
-rw-r--r--lldb/include/lldb/Symbol/CompileUnit.h2
-rw-r--r--lldb/include/lldb/Symbol/SymbolFile.h33
-rw-r--r--lldb/include/lldb/Symbol/SymbolVendor.h4
-rw-r--r--lldb/include/lldb/Symbol/TypeSystem.h7
-rw-r--r--lldb/include/lldb/Target/ABI.h13
-rw-r--r--lldb/include/lldb/Target/AppleArm64ExceptionClass.def50
-rw-r--r--lldb/include/lldb/Target/AppleArm64ExceptionClass.h50
-rw-r--r--lldb/include/lldb/Target/DynamicLoader.h5
-rw-r--r--lldb/include/lldb/Target/DynamicRegisterInfo.h (renamed from lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h)66
-rw-r--r--lldb/include/lldb/Target/Language.h21
-rw-r--r--lldb/include/lldb/Target/MemoryRegionInfo.h12
-rw-r--r--lldb/include/lldb/Target/PathMappingList.h12
-rw-r--r--lldb/include/lldb/Target/Platform.h72
-rw-r--r--lldb/include/lldb/Target/Process.h183
-rw-r--r--lldb/include/lldb/Target/ProcessTrace.h13
-rw-r--r--lldb/include/lldb/Target/RegisterContext.h4
-rw-r--r--lldb/include/lldb/Target/RemoteAwarePlatform.h4
-rw-r--r--lldb/include/lldb/Target/Statistics.h142
-rw-r--r--lldb/include/lldb/Target/StopInfo.h10
-rw-r--r--lldb/include/lldb/Target/Target.h78
-rw-r--r--lldb/include/lldb/Target/Thread.h7
-rw-r--r--lldb/include/lldb/Target/ThreadPlan.h53
-rw-r--r--lldb/include/lldb/Target/ThreadPlanCallFunction.h4
-rw-r--r--lldb/include/lldb/Target/ThreadPlanCallUserExpression.h2
-rw-r--r--lldb/include/lldb/Target/ThreadPlanStack.h2
-rw-r--r--lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h2
-rw-r--r--lldb/include/lldb/Target/Trace.h23
-rw-r--r--lldb/include/lldb/Target/TraceExporter.h2
-rw-r--r--lldb/include/lldb/Target/UnixSignals.h14
-rw-r--r--lldb/include/lldb/Utility/ConstString.h2
-rw-r--r--lldb/include/lldb/Utility/DataExtractor.h16
-rw-r--r--lldb/include/lldb/Utility/Environment.h2
-rw-r--r--lldb/include/lldb/Utility/FileSpec.h2
-rw-r--r--lldb/include/lldb/Utility/ReproducerInstrumentation.h10
-rw-r--r--lldb/include/lldb/Utility/Status.h10
-rw-r--r--lldb/include/lldb/Utility/StringExtractorGDBRemote.h4
-rw-r--r--lldb/include/lldb/Utility/StructuredData.h11
-rw-r--r--lldb/include/lldb/Utility/Timer.h26
-rw-r--r--lldb/include/lldb/Utility/UriParser.h32
-rw-r--r--lldb/include/lldb/lldb-enumerations.h1
-rw-r--r--lldb/include/lldb/lldb-forward.h39
-rw-r--r--lldb/include/lldb/lldb-private-enumerations.h6
-rw-r--r--lldb/include/lldb/lldb-private-types.h12
-rw-r--r--lldb/include/lldb/module.modulemap2
-rw-r--r--lldb/source/API/SBCommandInterpreter.cpp15
-rw-r--r--lldb/source/API/SBDebugger.cpp31
-rw-r--r--lldb/source/API/SBFrame.cpp32
-rw-r--r--lldb/source/API/SBLaunchInfo.cpp14
-rw-r--r--lldb/source/API/SBMemoryRegionInfo.cpp30
-rw-r--r--lldb/source/API/SBMemoryRegionInfoList.cpp23
-rw-r--r--lldb/source/API/SBModule.cpp8
-rw-r--r--lldb/source/API/SBPlatform.cpp24
-rw-r--r--lldb/source/API/SBProcess.cpp6
-rw-r--r--lldb/source/API/SBStream.cpp2
-rw-r--r--lldb/source/API/SBTarget.cpp61
-rw-r--r--lldb/source/API/SBThread.cpp6
-rw-r--r--lldb/source/API/liblldb-private.exports1
-rw-r--r--lldb/source/API/liblldb.exports1
-rw-r--r--lldb/source/API/liblldb.xcode.exports3
-rw-r--r--lldb/source/Breakpoint/Breakpoint.cpp39
-rw-r--r--lldb/source/Breakpoint/BreakpointID.cpp7
-rw-r--r--lldb/source/Breakpoint/BreakpointList.cpp6
-rw-r--r--lldb/source/Breakpoint/BreakpointResolverFileLine.cpp13
-rw-r--r--lldb/source/Breakpoint/BreakpointResolverName.cpp14
-rw-r--r--lldb/source/Commands/CommandCompletions.cpp67
-rw-r--r--lldb/source/Commands/CommandObjectApropos.cpp4
-rw-r--r--lldb/source/Commands/CommandObjectBreakpoint.cpp14
-rw-r--r--lldb/source/Commands/CommandObjectCommands.cpp540
-rw-r--r--lldb/source/Commands/CommandObjectDisassemble.cpp10
-rw-r--r--lldb/source/Commands/CommandObjectExpression.cpp10
-rw-r--r--lldb/source/Commands/CommandObjectFrame.cpp26
-rw-r--r--lldb/source/Commands/CommandObjectHelp.cpp9
-rw-r--r--lldb/source/Commands/CommandObjectMemory.cpp2
-rw-r--r--lldb/source/Commands/CommandObjectMemoryTag.cpp182
-rw-r--r--lldb/source/Commands/CommandObjectMultiword.cpp129
-rw-r--r--lldb/source/Commands/CommandObjectPlatform.cpp218
-rw-r--r--lldb/source/Commands/CommandObjectProcess.cpp166
-rw-r--r--lldb/source/Commands/CommandObjectSettings.cpp2
-rw-r--r--lldb/source/Commands/CommandObjectSource.cpp23
-rw-r--r--lldb/source/Commands/CommandObjectStats.cpp74
-rw-r--r--lldb/source/Commands/CommandObjectTarget.cpp388
-rw-r--r--lldb/source/Commands/CommandObjectThread.cpp49
-rw-r--r--lldb/source/Commands/CommandObjectTrace.cpp4
-rw-r--r--lldb/source/Commands/CommandObjectType.cpp4
-rw-r--r--lldb/source/Commands/CommandObjectWatchpoint.cpp2
-rw-r--r--lldb/source/Commands/Options.td54
-rw-r--r--lldb/source/Core/Address.cpp13
-rw-r--r--lldb/source/Core/AddressRange.cpp17
-rw-r--r--lldb/source/Core/Communication.cpp14
-rw-r--r--lldb/source/Core/Debugger.cpp15
-rw-r--r--lldb/source/Core/Disassembler.cpp9
-rw-r--r--lldb/source/Core/DynamicLoader.cpp8
-rw-r--r--lldb/source/Core/EmulateInstruction.cpp3
-rw-r--r--lldb/source/Core/IOHandler.cpp5
-rw-r--r--lldb/source/Core/IOHandlerCursesGUI.cpp1896
-rw-r--r--lldb/source/Core/Mangled.cpp74
-rw-r--r--lldb/source/Core/Module.cpp37
-rw-r--r--lldb/source/Core/ModuleList.cpp115
-rw-r--r--lldb/source/Core/PluginManager.cpp246
-rw-r--r--lldb/source/Core/RichManglingContext.cpp13
-rw-r--r--lldb/source/Core/Section.cpp82
-rw-r--r--lldb/source/Core/SourceManager.cpp11
-rw-r--r--lldb/source/Core/StreamFile.cpp4
-rw-r--r--lldb/source/Core/ValueObject.cpp11
-rw-r--r--lldb/source/Core/ValueObjectConstResultImpl.cpp2
-rw-r--r--lldb/source/Core/ValueObjectDynamicValue.cpp4
-rw-r--r--lldb/source/Core/ValueObjectRegister.cpp23
-rw-r--r--lldb/source/DataFormatters/FormatManager.cpp11
-rw-r--r--lldb/source/DataFormatters/FormattersHelpers.cpp11
-rw-r--r--lldb/source/DataFormatters/StringPrinter.cpp20
-rw-r--r--lldb/source/Expression/FunctionCaller.cpp2
-rw-r--r--lldb/source/Expression/IRExecutionUnit.cpp359
-rw-r--r--lldb/source/Expression/IRInterpreter.cpp2
-rw-r--r--lldb/source/Expression/REPL.cpp2
-rw-r--r--lldb/source/Expression/UserExpression.cpp4
-rw-r--r--lldb/source/Expression/UtilityFunction.cpp5
-rw-r--r--lldb/source/Host/common/Editline.cpp24
-rw-r--r--lldb/source/Host/common/File.cpp159
-rw-r--r--lldb/source/Host/common/FileSystem.cpp14
-rw-r--r--lldb/source/Host/common/LockFileBase.cpp7
-rw-r--r--lldb/source/Host/common/NativeRegisterContext.cpp12
-rw-r--r--lldb/source/Host/common/Socket.cpp154
-rw-r--r--lldb/source/Host/common/StringConvert.cpp95
-rw-r--r--lldb/source/Host/common/TCPSocket.cpp46
-rw-r--r--lldb/source/Host/common/Terminal.cpp536
-rw-r--r--lldb/source/Host/common/UDPSocket.cpp29
-rw-r--r--lldb/source/Host/common/XML.cpp50
-rw-r--r--lldb/source/Host/freebsd/HostInfoFreeBSD.cpp29
-rw-r--r--lldb/source/Host/netbsd/HostInfoNetBSD.cpp26
-rw-r--r--lldb/source/Host/openbsd/HostInfoOpenBSD.cpp26
-rw-r--r--lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp544
-rw-r--r--lldb/source/Host/posix/DomainSocket.cpp57
-rw-r--r--lldb/source/Host/posix/HostInfoPosix.cpp9
-rw-r--r--lldb/source/Host/posix/HostProcessPosix.cpp4
-rw-r--r--lldb/source/Host/posix/LockFilePosix.cpp8
-rw-r--r--lldb/source/Host/posix/PipePosix.cpp9
-rw-r--r--lldb/source/Host/posix/ProcessLauncherPosixFork.cpp38
-rw-r--r--lldb/source/Interpreter/CommandInterpreter.cpp253
-rw-r--r--lldb/source/Interpreter/CommandObject.cpp2
-rw-r--r--lldb/source/Interpreter/OptionValueArray.cpp24
-rw-r--r--lldb/source/Interpreter/OptionValueFileSpecList.cpp24
-rw-r--r--lldb/source/Interpreter/OptionValuePathMappings.cpp45
-rw-r--r--lldb/source/Interpreter/OptionValueProperties.cpp3
-rw-r--r--lldb/source/Interpreter/OptionValueSInt64.cpp8
-rw-r--r--lldb/source/Interpreter/OptionValueUInt64.cpp10
-rw-r--r--lldb/source/Interpreter/Property.cpp37
-rw-r--r--lldb/source/Interpreter/ScriptInterpreter.cpp16
-rw-r--r--lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp82
-rw-r--r--lldb/source/Plugins/ABI/AArch64/ABIAArch64.h5
-rw-r--r--lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp11
-rw-r--r--lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.h8
-rw-r--r--lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp11
-rw-r--r--lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.h6
-rw-r--r--lldb/source/Plugins/ABI/ARC/ABISysV_arc.cpp42
-rw-r--r--lldb/source/Plugins/ABI/ARC/ABISysV_arc.h8
-rw-r--r--lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp333
-rw-r--r--lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.h6
-rw-r--r--lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp333
-rw-r--r--lldb/source/Plugins/ABI/ARM/ABISysV_arm.h6
-rw-r--r--lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp265
-rw-r--r--lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.h6
-rw-r--r--lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp141
-rw-r--r--lldb/source/Plugins/ABI/Mips/ABISysV_mips.h6
-rw-r--r--lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp149
-rw-r--r--lldb/source/Plugins/ABI/Mips/ABISysV_mips64.h6
-rw-r--r--lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp46
-rw-r--r--lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.h6
-rw-r--r--lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp13
-rw-r--r--lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.h6
-rw-r--r--lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp31
-rw-r--r--lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.h6
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.cpp13
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.h12
-rw-r--r--lldb/source/Plugins/ABI/X86/ABISysV_i386.cpp11
-rw-r--r--lldb/source/Plugins/ABI/X86/ABISysV_i386.h12
-rw-r--r--lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp15
-rw-r--r--lldb/source/Plugins/ABI/X86/ABISysV_x86_64.h7
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp17
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h6
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIX86.cpp240
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIX86.h8
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIX86_64.h7
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIX86_i386.cpp22
-rw-r--r--lldb/source/Plugins/ABI/X86/ABIX86_i386.h22
-rw-r--r--lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp9
-rw-r--r--lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.h5
-rw-r--r--lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp7
-rw-r--r--lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h5
-rw-r--r--lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp7
-rw-r--r--lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h5
-rw-r--r--lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp7
-rw-r--r--lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h5
-rw-r--r--lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp100
-rw-r--r--lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h13
-rw-r--r--lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp13
-rw-r--r--lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.h8
-rw-r--r--lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp74
-rw-r--r--lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h11
-rw-r--r--lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp77
-rw-r--r--lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h8
-rw-r--r--lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp14
-rw-r--r--lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.h8
-rw-r--r--lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp13
-rw-r--r--lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.h7
-rw-r--r--lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp7
-rw-r--r--lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.h7
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp51
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.h10
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp42
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp19
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp4
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp3
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp23
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp6
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h6
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp2
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp3
-rw-r--r--lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp7
-rw-r--r--lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h10
-rw-r--r--lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp14
-rw-r--r--lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h8
-rw-r--r--lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp18
-rw-r--r--lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h8
-rw-r--r--lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp18
-rw-r--r--lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h8
-rw-r--r--lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp12
-rw-r--r--lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h8
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.cpp4
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.h8
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp5
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.h8
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp4
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.h8
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.cpp4
-rw-r--r--lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.h10
-rw-r--r--lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp27
-rw-r--r--lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.h8
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp227
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h19
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp209
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp (renamed from lldb/source/Plugins/Language/CPlusPlus/LibCxxBitset.cpp)51
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp14
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp4
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h4
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.cpp2
-rw-r--r--lldb/source/Plugins/Language/ObjC/Cocoa.cpp72
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSArray.cpp26
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSDictionary.cpp176
-rw-r--r--lldb/source/Plugins/Language/ObjC/NSString.cpp14
-rw-r--r--lldb/source/Plugins/Language/ObjC/ObjCConstants.h44
-rw-r--r--lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp44
-rw-r--r--lldb/source/Plugins/Language/ObjC/ObjCLanguage.h6
-rw-r--r--lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.cpp12
-rw-r--r--lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp12
-rw-r--r--lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp11
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp12
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp164
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp70
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp5
-rw-r--r--lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp8
-rw-r--r--lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp8
-rw-r--r--lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.h2
-rw-r--r--lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp38
-rw-r--r--lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h6
-rw-r--r--lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp31
-rw-r--r--lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp5
-rw-r--r--lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.h8
-rw-r--r--lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp16
-rw-r--r--lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h10
-rw-r--r--lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp91
-rw-r--r--lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h50
-rw-r--r--lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp11
-rw-r--r--lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h6
-rw-r--r--lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp69
-rw-r--r--lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h10
-rw-r--r--lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp17
-rw-r--r--lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h10
-rw-r--r--lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp772
-rw-r--r--lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h92
-rw-r--r--lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp114
-rw-r--r--lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h66
-rw-r--r--lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp5
-rw-r--r--lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h6
-rw-r--r--lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp5
-rw-r--r--lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h5
-rw-r--r--lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp15
-rw-r--r--lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h17
-rw-r--r--lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp98
-rw-r--r--lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h18
-rw-r--r--lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp77
-rw-r--r--lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h18
-rw-r--r--lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.cpp77
-rw-r--r--lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h18
-rw-r--r--lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp51
-rw-r--r--lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h6
-rw-r--r--lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp169
-rw-r--r--lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h29
-rw-r--r--lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp41
-rw-r--r--lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.h2
-rw-r--r--lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.cpp58
-rw-r--r--lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h5
-rw-r--r--lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp27
-rw-r--r--lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h2
-rw-r--r--lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp165
-rw-r--r--lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h3
-rw-r--r--lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp21
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp161
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp2
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp89
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp104
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp102
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp25
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp25
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextMemory.cpp1
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextMemory.h7
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp1
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp23
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp2
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp2
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h1
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_arm.h1678
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_arm64.h12
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_arm64_sve.h6
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_i386.h24
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_mips64.h96
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_powerpc.h43
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64.h45
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64le.h51
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_s390x.h26
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfos_x86_64.h48
-rw-r--r--lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp187
-rw-r--r--lldb/source/Plugins/Process/Utility/StopInfoMachException.h5
-rw-r--r--lldb/source/Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h42
-rw-r--r--lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp24
-rw-r--r--lldb/source/Plugins/Process/elf-core/ProcessElfCore.h23
-rw-r--r--lldb/source/Plugins/Process/elf-core/RegisterUtilities.cpp2
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp26
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h4
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp28
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h29
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp623
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h65
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp153
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h2
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp161
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h6
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp11
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteErrno.def39
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp393
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h5
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp1324
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h48
-rw-r--r--lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp25
-rw-r--r--lldb/source/Plugins/Process/minidump/ProcessMinidump.h19
-rw-r--r--lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp36
-rw-r--r--lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp35
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp97
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedProcess.h28
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedThread.cpp211
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedThread.h68
-rw-r--r--lldb/source/Plugins/REPL/Clang/ClangREPL.cpp102
-rw-r--r--lldb/source/Plugins/REPL/Clang/ClangREPL.h63
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp19
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h10
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp13
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h8
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp29
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h12
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h6
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp44
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h7
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h18
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp285
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h25
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp92
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h151
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp140
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h48
-rw-r--r--lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp49
-rw-r--r--lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.h6
-rw-r--r--lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp173
-rw-r--r--lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h15
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp7
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h3
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp1081
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h47
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp4
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h9
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp9
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h8
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp285
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h29
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp11
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp4
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h2
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp13
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h3
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp640
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h62
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp69
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h10
-rw-r--r--lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp6
-rw-r--r--lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp20
-rw-r--r--lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h10
-rw-r--r--lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp2
-rw-r--r--lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp34
-rw-r--r--lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h8
-rw-r--r--lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp14
-rw-r--r--lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h8
-rw-r--r--lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp12
-rw-r--r--lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.h8
-rw-r--r--lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp12
-rw-r--r--lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.h7
-rw-r--r--lldb/source/Plugins/Trace/common/TraceJSONStructs.cpp106
-rw-r--r--lldb/source/Plugins/Trace/common/TraceJSONStructs.h98
-rw-r--r--lldb/source/Plugins/Trace/common/TraceSessionFileParser.cpp52
-rw-r--r--lldb/source/Plugins/Trace/common/TraceSessionFileParser.h88
-rw-r--r--lldb/source/Plugins/Trace/common/TraceSessionSaver.cpp149
-rw-r--r--lldb/source/Plugins/Trace/common/TraceSessionSaver.h112
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp39
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h22
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp59
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.h75
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td10
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp42
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.h34
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp79
-rw-r--r--lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.h57
-rw-r--r--lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp485
-rw-r--r--lldb/source/Plugins/TraceExporter/common/TraceHTR.h409
-rw-r--r--lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp40
-rw-r--r--lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h4
-rw-r--r--lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.cpp9
-rw-r--r--lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.h8
-rw-r--r--lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTFOptions.td4
-rw-r--r--lldb/source/Plugins/TraceExporter/docs/htr.rst48
-rw-r--r--lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp186
-rw-r--r--lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h32
-rw-r--r--lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp14
-rw-r--r--lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h8
-rw-r--r--lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp15
-rw-r--r--lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.h8
-rw-r--r--lldb/source/Symbol/Block.cpp10
-rw-r--r--lldb/source/Symbol/CompileUnit.cpp23
-rw-r--r--lldb/source/Symbol/DeclVendor.cpp2
-rw-r--r--lldb/source/Symbol/LocateSymbolFileMacOSX.cpp13
-rw-r--r--lldb/source/Symbol/Symbol.cpp3
-rw-r--r--lldb/source/Symbol/SymbolContext.cpp7
-rw-r--r--lldb/source/Symbol/SymbolFile.cpp12
-rw-r--r--lldb/source/Symbol/SymbolVendor.cpp8
-rw-r--r--lldb/source/Symbol/Symtab.cpp6
-rw-r--r--lldb/source/Symbol/TypeSystem.cpp90
-rw-r--r--lldb/source/Symbol/UnwindPlan.cpp1
-rw-r--r--lldb/source/Target/ABI.cpp58
-rw-r--r--lldb/source/Target/DynamicRegisterInfo.cpp (renamed from lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp)448
-rw-r--r--lldb/source/Target/Language.cpp19
-rw-r--r--lldb/source/Target/ModuleCache.cpp2
-rw-r--r--lldb/source/Target/OperatingSystem.cpp3
-rw-r--r--lldb/source/Target/PathMappingList.cpp40
-rw-r--r--lldb/source/Target/Platform.cpp314
-rw-r--r--lldb/source/Target/Process.cpp368
-rw-r--r--lldb/source/Target/ProcessTrace.cpp11
-rw-r--r--lldb/source/Target/RegisterContext.cpp50
-rw-r--r--lldb/source/Target/RegisterContextUnwind.cpp19
-rw-r--r--lldb/source/Target/RemoteAwarePlatform.cpp25
-rw-r--r--lldb/source/Target/Statistics.cpp196
-rw-r--r--lldb/source/Target/StopInfo.cpp143
-rw-r--r--lldb/source/Target/Target.cpp198
-rw-r--r--lldb/source/Target/TargetProperties.td7
-rw-r--r--lldb/source/Target/Thread.cpp29
-rw-r--r--lldb/source/Target/ThreadPlan.cpp8
-rw-r--r--lldb/source/Target/ThreadPlanBase.cpp6
-rw-r--r--lldb/source/Target/ThreadPlanCallFunction.cpp4
-rw-r--r--lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp2
-rw-r--r--lldb/source/Target/ThreadPlanCallUserExpression.cpp6
-rw-r--r--lldb/source/Target/ThreadPlanPython.cpp2
-rw-r--r--lldb/source/Target/ThreadPlanStack.cpp40
-rw-r--r--lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp4
-rw-r--r--lldb/source/Target/Trace.cpp28
-rw-r--r--lldb/source/Target/TraceExporter.cpp5
-rw-r--r--lldb/source/Target/UnixSignals.cpp24
-rw-r--r--lldb/source/Utility/Environment.cpp2
-rw-r--r--lldb/source/Utility/FileSpec.cpp4
-rw-r--r--lldb/source/Utility/ReproducerInstrumentation.cpp11
-rw-r--r--lldb/source/Utility/Scalar.cpp2
-rw-r--r--lldb/source/Utility/Status.cpp4
-rw-r--r--lldb/source/Utility/StringExtractorGDBRemote.cpp6
-rw-r--r--lldb/source/Utility/Timer.cpp5
-rw-r--r--lldb/source/Utility/UriParser.cpp43
-rw-r--r--lldb/source/Utility/VMRange.cpp10
-rw-r--r--lldb/tools/argdumper/argdumper.exports0
-rw-r--r--lldb/tools/compact-unwind/compact-unwind-dumper.c43
-rw-r--r--lldb/tools/driver/Driver.cpp21
-rw-r--r--lldb/tools/driver/Driver.h1
-rw-r--r--lldb/tools/driver/Options.td6
-rw-r--r--lldb/tools/driver/Platform.h7
-rw-r--r--lldb/tools/lldb-server/Acceptor.cpp15
-rw-r--r--lldb/tools/lldb-server/lldb-gdbserver.cpp170
-rw-r--r--lldb/tools/lldb-server/lldb-server.exports0
-rw-r--r--llvm/include/llvm-c/Comdat.h11
-rw-r--r--llvm/include/llvm-c/Core.h16
-rw-r--r--llvm/include/llvm-c/DebugInfo.h82
-rw-r--r--llvm/include/llvm-c/DisassemblerTypes.h10
-rw-r--r--llvm/include/llvm-c/Error.h11
-rw-r--r--llvm/include/llvm-c/ErrorHandling.h10
-rw-r--r--llvm/include/llvm-c/IRReader.h11
-rw-r--r--llvm/include/llvm-c/LLJIT.h11
-rw-r--r--llvm/include/llvm-c/Linker.h11
-rw-r--r--llvm/include/llvm-c/Orc.h54
-rw-r--r--llvm/include/llvm-c/OrcEE.h11
-rw-r--r--llvm/include/llvm-c/Support.h10
-rw-r--r--llvm/include/llvm-c/TargetMachine.h10
-rw-r--r--llvm/include/llvm-c/Transforms/PassBuilder.h13
-rw-r--r--llvm/include/llvm-c/lto.h12
-rw-r--r--llvm/include/llvm/ADT/APFloat.h4
-rw-r--r--llvm/include/llvm/ADT/APInt.h883
-rw-r--r--llvm/include/llvm/ADT/APSInt.h10
-rw-r--r--llvm/include/llvm/ADT/ArrayRef.h4
-rw-r--r--llvm/include/llvm/ADT/BitVector.h24
-rw-r--r--llvm/include/llvm/ADT/CombinationGenerator.h148
-rw-r--r--llvm/include/llvm/ADT/DenseMapInfo.h16
-rw-r--r--llvm/include/llvm/ADT/EquivalenceClasses.h33
-rw-r--r--llvm/include/llvm/ADT/FunctionExtras.h16
-rw-r--r--llvm/include/llvm/ADT/Hashing.h8
-rw-r--r--llvm/include/llvm/ADT/ImmutableList.h3
-rw-r--r--llvm/include/llvm/ADT/IntervalMap.h2
-rw-r--r--llvm/include/llvm/ADT/MapVector.h1
-rw-r--r--llvm/include/llvm/ADT/PointerIntPair.h4
-rw-r--r--llvm/include/llvm/ADT/PointerUnion.h31
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h171
-rw-r--r--llvm/include/llvm/ADT/Sequence.h164
-rw-r--r--llvm/include/llvm/ADT/SetOperations.h9
-rw-r--r--llvm/include/llvm/ADT/SmallBitVector.h51
-rw-r--r--llvm/include/llvm/ADT/SmallVector.h15
-rw-r--r--llvm/include/llvm/ADT/StringExtras.h137
-rw-r--r--llvm/include/llvm/ADT/StringMap.h27
-rw-r--r--llvm/include/llvm/ADT/StringRef.h3
-rw-r--r--llvm/include/llvm/ADT/Triple.h158
-rw-r--r--llvm/include/llvm/ADT/TypeSwitch.h7
-rw-r--r--llvm/include/llvm/ADT/iterator.h49
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysis.h69
-rw-r--r--llvm/include/llvm/Analysis/AssumeBundleQueries.h9
-rw-r--r--llvm/include/llvm/Analysis/AssumptionCache.h10
-rw-r--r--llvm/include/llvm/Analysis/BasicAliasAnalysis.h82
-rw-r--r--llvm/include/llvm/Analysis/CGSCCPassManager.h101
-rw-r--r--llvm/include/llvm/Analysis/CaptureTracking.h25
-rw-r--r--llvm/include/llvm/Analysis/ConstantFolding.h30
-rw-r--r--llvm/include/llvm/Analysis/CostModel.h26
-rw-r--r--llvm/include/llvm/Analysis/Delinearization.h105
-rw-r--r--llvm/include/llvm/Analysis/HeatUtils.h7
-rw-r--r--llvm/include/llvm/Analysis/IRSimilarityIdentifier.h250
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h88
-rw-r--r--llvm/include/llvm/Analysis/IVUsers.h3
-rw-r--r--llvm/include/llvm/Analysis/InlineAdvisor.h38
-rw-r--r--llvm/include/llvm/Analysis/InlineCost.h3
-rw-r--r--llvm/include/llvm/Analysis/InlineOrder.h172
-rw-r--r--llvm/include/llvm/Analysis/InstructionSimplify.h2
-rw-r--r--llvm/include/llvm/Analysis/LazyCallGraph.h65
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h26
-rw-r--r--llvm/include/llvm/Analysis/LoopAnalysisManager.h1
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h17
-rw-r--r--llvm/include/llvm/Analysis/LoopInfoImpl.h5
-rw-r--r--llvm/include/llvm/Analysis/LoopNestAnalysis.h22
-rw-r--r--llvm/include/llvm/Analysis/MLInlineAdvisor.h2
-rw-r--r--llvm/include/llvm/Analysis/MemorySSA.h30
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h28
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCUtil.h45
-rw-r--r--llvm/include/llvm/Analysis/ProfileSummaryInfo.h12
-rw-r--r--llvm/include/llvm/Analysis/ReplayInlineAdvisor.h53
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h282
-rw-r--r--llvm/include/llvm/Analysis/StackLifetime.h2
-rw-r--r--llvm/include/llvm/Analysis/StackSafetyAnalysis.h8
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.h6
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h83
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h131
-rw-r--r--llvm/include/llvm/Analysis/TypeMetadataUtils.h28
-rw-r--r--llvm/include/llvm/Analysis/Utils/TFUtils.h7
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h36
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h12
-rw-r--r--llvm/include/llvm/AsmParser/LLLexer.h4
-rw-r--r--llvm/include/llvm/AsmParser/LLParser.h27
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h8
-rw-r--r--llvm/include/llvm/BinaryFormat/Dwarf.def3
-rw-r--r--llvm/include/llvm/BinaryFormat/DynamicTags.def12
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h23
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def4
-rw-r--r--llvm/include/llvm/BinaryFormat/MachO.def2
-rw-r--r--llvm/include/llvm/BinaryFormat/Wasm.h41
-rw-r--r--llvm/include/llvm/BinaryFormat/WasmTraits.h18
-rw-r--r--llvm/include/llvm/BinaryFormat/XCOFF.h16
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeAnalyzer.h2
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeCommon.h8
-rw-r--r--llvm/include/llvm/Bitcode/LLVMBitCodes.h1
-rw-r--r--llvm/include/llvm/CodeGen/Analysis.h5
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h7
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h264
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenCommonISel.h219
-rw-r--r--llvm/include/llvm/CodeGen/CommandFlags.h7
-rw-r--r--llvm/include/llvm/CodeGen/FunctionLoweringInfo.h1
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h14
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h131
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h37
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h38
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h184
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h17
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h29
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h165
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h49
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h100
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h11
-rw-r--r--llvm/include/llvm/CodeGen/IndirectThunks.h2
-rw-r--r--llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h3
-rw-r--r--llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h3
-rw-r--r--llvm/include/llvm/CodeGen/LiveInterval.h10
-rw-r--r--llvm/include/llvm/CodeGen/LiveIntervalUnion.h29
-rw-r--r--llvm/include/llvm/CodeGen/LiveVariables.h6
-rw-r--r--llvm/include/llvm/CodeGen/LowLevelType.h3
-rw-r--r--llvm/include/llvm/CodeGen/MIRFSDiscriminator.h4
-rw-r--r--llvm/include/llvm/CodeGen/MIRFormatter.h7
-rw-r--r--llvm/include/llvm/CodeGen/MIRSampleProfile.h76
-rw-r--r--llvm/include/llvm/CodeGen/MIRYamlMapping.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineCombinerPattern.h13
-rw-r--r--llvm/include/llvm/CodeGen/MachineDominators.h16
-rw-r--r--llvm/include/llvm/CodeGen/MachineFrameInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h16
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstr.h14
-rw-r--r--llvm/include/llvm/CodeGen/MachineMemOperand.h12
-rw-r--r--llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h6
-rw-r--r--llvm/include/llvm/CodeGen/MachineRegisterInfo.h41
-rw-r--r--llvm/include/llvm/CodeGen/MacroFusion.h14
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h12
-rw-r--r--llvm/include/llvm/CodeGen/RegAllocCommon.h7
-rw-r--r--llvm/include/llvm/CodeGen/RegisterScavenging.h3
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h91
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h1
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h238
-rw-r--r--llvm/include/llvm/CodeGen/SwitchLoweringUtils.h8
-rw-r--r--llvm/include/llvm/CodeGen/TargetCallingConv.h8
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h33
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h84
-rw-r--r--llvm/include/llvm/CodeGen/TargetPassConfig.h14
-rw-r--r--llvm/include/llvm/CodeGen/TargetRegisterInfo.h6
-rw-r--r--llvm/include/llvm/CodeGen/TargetSchedule.h1
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.td3
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinker.h24
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CVRecord.h1
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h21
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h6
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h28
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h32
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h4
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h7
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h14
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h40
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h17
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h30
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/StringTable.h1
-rw-r--r--llvm/include/llvm/DebugInfo/MSF/MSFCommon.h3
-rw-r--r--llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h20
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h4
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h1
-rw-r--r--llvm/include/llvm/Demangle/Demangle.h14
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h1315
-rw-r--r--llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h93
-rw-r--r--llvm/include/llvm/Demangle/Utility.h51
-rw-r--r--llvm/include/llvm/ExecutionEngine/ExecutionEngine.h1
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h39
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h20
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h98
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h420
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h225
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h63
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h38
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/riscv.h14
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h275
-rw-r--r--llvm/include/llvm/ExecutionEngine/MCJIT.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h100
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h64
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h330
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h67
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h97
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h85
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h133
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h6
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h272
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h34
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h69
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h70
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h88
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h6
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h436
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h925
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h386
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h464
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h138
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h79
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h68
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h1659
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h183
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h769
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h124
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h235
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h286
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h124
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h140
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h36
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h660
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h20
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h64
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h70
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h182
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h131
-rw-r--r--llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h37
-rw-r--r--llvm/include/llvm/ExecutionEngine/RuntimeDyld.h14
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.td69
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h8
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h117
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h430
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPKinds.def63
-rw-r--r--llvm/include/llvm/IR/AbstractCallSite.h2
-rw-r--r--llvm/include/llvm/IR/Argument.h2
-rw-r--r--llvm/include/llvm/IR/Assumptions.h22
-rw-r--r--llvm/include/llvm/IR/Attributes.h371
-rw-r--r--llvm/include/llvm/IR/Attributes.td3
-rw-r--r--llvm/include/llvm/IR/BasicBlock.h12
-rw-r--r--llvm/include/llvm/IR/Constant.h6
-rw-r--r--llvm/include/llvm/IR/ConstantRange.h40
-rw-r--r--llvm/include/llvm/IR/Constants.h15
-rw-r--r--llvm/include/llvm/IR/DIBuilder.h80
-rw-r--r--llvm/include/llvm/IR/DataLayout.h23
-rw-r--r--llvm/include/llvm/IR/DebugInfo.h2
-rw-r--r--llvm/include/llvm/IR/DebugInfoMetadata.h437
-rw-r--r--llvm/include/llvm/IR/DerivedTypes.h7
-rw-r--r--llvm/include/llvm/IR/DiagnosticInfo.h36
-rw-r--r--llvm/include/llvm/IR/DiagnosticPrinter.h2
-rw-r--r--llvm/include/llvm/IR/Dominators.h6
-rw-r--r--llvm/include/llvm/IR/FPEnv.h14
-rw-r--r--llvm/include/llvm/IR/Function.h252
-rw-r--r--llvm/include/llvm/IR/GCStrategy.h3
-rw-r--r--llvm/include/llvm/IR/GlobalAlias.h33
-rw-r--r--llvm/include/llvm/IR/GlobalIFunc.h44
-rw-r--r--llvm/include/llvm/IR/GlobalIndirectSymbol.h93
-rw-r--r--llvm/include/llvm/IR/GlobalObject.h7
-rw-r--r--llvm/include/llvm/IR/GlobalValue.h14
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h32
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h292
-rw-r--r--llvm/include/llvm/IR/Instruction.h18
-rw-r--r--llvm/include/llvm/IR/Instructions.h105
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h22
-rw-r--r--llvm/include/llvm/IR/Intrinsics.h3
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td158
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td85
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td52
-rw-r--r--llvm/include/llvm/IR/IntrinsicsBPF.td3
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td778
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td60
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCV.td177
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSystemZ.td8
-rw-r--r--llvm/include/llvm/IR/IntrinsicsWebAssembly.td70
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td762
-rw-r--r--llvm/include/llvm/IR/LLVMContext.h4
-rw-r--r--llvm/include/llvm/IR/MatrixBuilder.h32
-rw-r--r--llvm/include/llvm/IR/Metadata.h35
-rw-r--r--llvm/include/llvm/IR/Module.h9
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h50
-rw-r--r--llvm/include/llvm/IR/Operator.h7
-rw-r--r--llvm/include/llvm/IR/OptBisect.h26
-rw-r--r--llvm/include/llvm/IR/PassManager.h81
-rw-r--r--llvm/include/llvm/IR/PassManagerInternal.h9
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h131
-rw-r--r--llvm/include/llvm/IR/ProfileSummary.h38
-rw-r--r--llvm/include/llvm/IR/PseudoProbe.h4
-rw-r--r--llvm/include/llvm/IR/ReplaceConstant.h4
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.def4
-rw-r--r--llvm/include/llvm/IR/Type.h38
-rw-r--r--llvm/include/llvm/IR/VPIntrinsics.def141
-rw-r--r--llvm/include/llvm/IR/Value.h35
-rw-r--r--llvm/include/llvm/InitializePasses.h5
-rw-r--r--llvm/include/llvm/InterfaceStub/IFSHandler.h3
-rw-r--r--llvm/include/llvm/LTO/Caching.h38
-rw-r--r--llvm/include/llvm/LTO/Config.h3
-rw-r--r--llvm/include/llvm/LTO/LTO.h47
-rw-r--r--llvm/include/llvm/LTO/SummaryBasedOptimizations.h2
-rw-r--r--llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h2
-rw-r--r--llvm/include/llvm/LTO/legacy/LTOModule.h4
-rw-r--r--llvm/include/llvm/LinkAllIR.h3
-rw-r--r--llvm/include/llvm/LinkAllPasses.h3
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h10
-rw-r--r--llvm/include/llvm/MC/MCAsmInfoGOFF.h29
-rw-r--r--llvm/include/llvm/MC/MCContext.h2
-rw-r--r--llvm/include/llvm/MC/MCDwarf.h38
-rw-r--r--llvm/include/llvm/MC/MCELFObjectWriter.h2
-rw-r--r--llvm/include/llvm/MC/MCELFStreamer.h2
-rw-r--r--llvm/include/llvm/MC/MCExpr.h2
-rw-r--r--llvm/include/llvm/MC/MCFragment.h31
-rw-r--r--llvm/include/llvm/MC/MCInstrAnalysis.h11
-rw-r--r--llvm/include/llvm/MC/MCInstrDesc.h4
-rw-r--r--llvm/include/llvm/MC/MCObjectFileInfo.h4
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h6
-rw-r--r--llvm/include/llvm/MC/MCPseudoProbe.h292
-rw-r--r--llvm/include/llvm/MC/MCRegister.h1
-rw-r--r--llvm/include/llvm/MC/MCSchedule.h1
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h10
-rw-r--r--llvm/include/llvm/MC/MCSymbolWasm.h12
-rw-r--r--llvm/include/llvm/MC/MCWasmStreamer.h5
-rw-r--r--llvm/include/llvm/MC/MCWinCOFFStreamer.h2
-rw-r--r--llvm/include/llvm/MC/TargetRegistry.h (renamed from llvm/include/llvm/Support/TargetRegistry.h)84
-rw-r--r--llvm/include/llvm/MCA/CustomBehaviour.h51
-rw-r--r--llvm/include/llvm/MCA/Instruction.h4
-rw-r--r--llvm/include/llvm/MCA/Stages/InOrderIssueStage.h5
-rw-r--r--llvm/include/llvm/MCA/View.h (renamed from llvm/tools/llvm-mca/Views/View.h)4
-rw-r--r--llvm/include/llvm/Object/ELF.h7
-rw-r--r--llvm/include/llvm/Object/ELFObjectFile.h17
-rw-r--r--llvm/include/llvm/Object/ELFTypes.h8
-rw-r--r--llvm/include/llvm/Object/Error.h4
-rw-r--r--llvm/include/llvm/Object/MachO.h3
-rw-r--r--llvm/include/llvm/Object/Wasm.h9
-rw-r--r--llvm/include/llvm/Object/XCOFFObjectFile.h153
-rw-r--r--llvm/include/llvm/ObjectYAML/MachOYAML.h1
-rw-r--r--llvm/include/llvm/ObjectYAML/WasmYAML.h38
-rw-r--r--llvm/include/llvm/ObjectYAML/XCOFFYAML.h54
-rw-r--r--llvm/include/llvm/Option/Arg.h5
-rw-r--r--llvm/include/llvm/Option/OptParser.td2
-rw-r--r--llvm/include/llvm/Option/OptTable.h13
-rw-r--r--llvm/include/llvm/Option/Option.h14
-rw-r--r--llvm/include/llvm/Passes/OptimizationLevel.h127
-rw-r--r--llvm/include/llvm/Passes/PassBuilder.h178
-rw-r--r--llvm/include/llvm/Passes/StandardInstrumentations.h217
-rw-r--r--llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h7
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h18
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc11
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfReader.h18
-rw-r--r--llvm/include/llvm/ProfileData/ProfileCommon.h10
-rw-r--r--llvm/include/llvm/ProfileData/SampleProf.h376
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfReader.h49
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfWriter.h74
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def36
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.def18
-rw-r--r--llvm/include/llvm/Support/Allocator.h2
-rw-r--r--llvm/include/llvm/Support/AtomicOrdering.h10
-rw-r--r--llvm/include/llvm/Support/BinaryByteStream.h34
-rw-r--r--llvm/include/llvm/Support/BinaryItemStream.h14
-rw-r--r--llvm/include/llvm/Support/BinaryStream.h12
-rw-r--r--llvm/include/llvm/Support/BinaryStreamArray.h7
-rw-r--r--llvm/include/llvm/Support/BinaryStreamReader.h14
-rw-r--r--llvm/include/llvm/Support/BinaryStreamRef.h71
-rw-r--r--llvm/include/llvm/Support/BinaryStreamWriter.h14
-rw-r--r--llvm/include/llvm/Support/Caching.h71
-rw-r--r--llvm/include/llvm/Support/CommandLine.h64
-rw-r--r--llvm/include/llvm/Support/Compiler.h60
-rw-r--r--llvm/include/llvm/Support/CrashRecoveryContext.h3
-rw-r--r--llvm/include/llvm/Support/DOTGraphTraits.h5
-rw-r--r--llvm/include/llvm/Support/DataExtractor.h3
-rw-r--r--llvm/include/llvm/Support/Debug.h21
-rw-r--r--llvm/include/llvm/Support/DivisionByConstantInfo.h38
-rw-r--r--llvm/include/llvm/Support/Error.h37
-rw-r--r--llvm/include/llvm/Support/ErrorHandling.h26
-rw-r--r--llvm/include/llvm/Support/ExtensibleRTTI.h7
-rw-r--r--llvm/include/llvm/Support/FileSystem.h8
-rw-r--r--llvm/include/llvm/Support/FileSystem/UniqueID.h27
-rw-r--r--llvm/include/llvm/Support/FormatVariadic.h2
-rw-r--r--llvm/include/llvm/Support/GenericDomTreeConstruction.h4
-rw-r--r--llvm/include/llvm/Support/GraphWriter.h91
-rw-r--r--llvm/include/llvm/Support/HashBuilder.h438
-rw-r--r--llvm/include/llvm/Support/JSON.h46
-rw-r--r--llvm/include/llvm/Support/KnownBits.h21
-rw-r--r--llvm/include/llvm/Support/MD5.h37
-rw-r--r--llvm/include/llvm/Support/MSP430AttributeParser.h44
-rw-r--r--llvm/include/llvm/Support/MSP430Attributes.h44
-rw-r--r--llvm/include/llvm/Support/MachineValueType.h50
-rw-r--r--llvm/include/llvm/Support/Memory.h13
-rw-r--r--llvm/include/llvm/Support/PGOOptions.h65
-rw-r--r--llvm/include/llvm/Support/Parallel.h5
-rw-r--r--llvm/include/llvm/Support/Path.h67
-rw-r--r--llvm/include/llvm/Support/Process.h6
-rw-r--r--llvm/include/llvm/Support/RISCVISAInfo.h89
-rw-r--r--llvm/include/llvm/Support/RISCVTargetParser.def10
-rw-r--r--llvm/include/llvm/Support/Signposts.h43
-rw-r--r--llvm/include/llvm/Support/TargetOpcodes.def3
-rw-r--r--llvm/include/llvm/Support/TargetSelect.h12
-rw-r--r--llvm/include/llvm/Support/TypeSize.h8
-rw-r--r--llvm/include/llvm/Support/VersionTuple.h7
-rw-r--r--llvm/include/llvm/Support/VirtualFileSystem.h35
-rw-r--r--llvm/include/llvm/Support/Windows/WindowsSupport.h4
-rw-r--r--llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h8
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.def135
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.h4
-rw-r--r--llvm/include/llvm/Support/YAMLTraits.h2
-rw-r--r--llvm/include/llvm/Support/raw_ostream.h8
-rw-r--r--llvm/include/llvm/TableGen/DirectiveEmitter.h2
-rw-r--r--llvm/include/llvm/TableGen/Error.h22
-rw-r--r--llvm/include/llvm/TableGen/Record.h51
-rw-r--r--llvm/include/llvm/Target/GenericOpcodes.td12
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td101
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td2
-rw-r--r--llvm/include/llvm/Target/Target.td22
-rw-r--r--llvm/include/llvm/Target/TargetLoweringObjectFile.h9
-rw-r--r--llvm/include/llvm/Target/TargetMachine.h25
-rw-r--r--llvm/include/llvm/Target/TargetOptions.h34
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td28
-rw-r--r--llvm/include/llvm/TextAPI/Architecture.h6
-rw-r--r--llvm/include/llvm/TextAPI/ArchitectureSet.h6
-rw-r--r--llvm/include/llvm/TextAPI/InterfaceFile.h8
-rw-r--r--llvm/include/llvm/TextAPI/PackedVersion.h6
-rw-r--r--llvm/include/llvm/TextAPI/Platform.h6
-rw-r--r--llvm/include/llvm/TextAPI/Symbol.h6
-rw-r--r--llvm/include/llvm/TextAPI/Target.h6
-rw-r--r--llvm/include/llvm/TextAPI/TextAPIReader.h6
-rw-r--r--llvm/include/llvm/TextAPI/TextAPIWriter.h6
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h214
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionAttrs.h8
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionImport.h29
-rw-r--r--llvm/include/llvm/Transforms/IPO/IROutliner.h36
-rw-r--r--llvm/include/llvm/Transforms/IPO/Inliner.h16
-rw-r--r--llvm/include/llvm/Transforms/IPO/LoopExtractor.h2
-rw-r--r--llvm/include/llvm/Transforms/IPO/ModuleInliner.h51
-rw-r--r--llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h13
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleContextTracker.h55
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombine.h10
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombiner.h50
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation.h6
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h48
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h79
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h30
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h17
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h8
-rw-r--r--llvm/include/llvm/Transforms/Scalar/EarlyCSE.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/GVN.h15
-rw-r--r--llvm/include/llvm/Transforms/Scalar/JumpThreading.h8
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopPassManager.h94
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h9
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SROA.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h12
-rw-r--r--llvm/include/llvm/Transforms/Utils/AddDiscriminators.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h30
-rw-r--r--llvm/include/llvm/Transforms/Utils/BuildLibCalls.h10
-rw-r--r--llvm/include/llvm/Transforms/Utils/Cloning.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/CodeExtractor.h18
-rw-r--r--llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h16
-rw-r--r--llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/GlobalStatus.h15
-rw-r--r--llvm/include/llvm/Transforms/Utils/InstructionWorklist.h (renamed from llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h)29
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h40
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopPeel.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h56
-rw-r--r--llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h7
-rw-r--r--llvm/include/llvm/Transforms/Utils/PredicateInfo.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h70
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h9
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/UnrollLoop.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/ValueMapper.h11
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h2
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h2
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h4
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/VectorCombine.h10
-rw-r--r--llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h3
-rw-r--r--llvm/include/llvm/module.modulemap18
-rw-r--r--llvm/lib/Analysis/AliasAnalysis.cpp56
-rw-r--r--llvm/lib/Analysis/AssumeBundleQueries.cpp4
-rw-r--r--llvm/lib/Analysis/AssumptionCache.cpp30
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp622
-rw-r--r--llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp2
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp2
-rw-r--r--llvm/lib/Analysis/CGSCCPassManager.cpp18
-rw-r--r--llvm/lib/Analysis/CaptureTracking.cpp93
-rw-r--r--llvm/lib/Analysis/CmpInstAnalysis.cpp8
-rw-r--r--llvm/lib/Analysis/CodeMetrics.cpp5
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp377
-rw-r--r--llvm/lib/Analysis/CostModel.cpp22
-rw-r--r--llvm/lib/Analysis/Delinearization.cpp489
-rw-r--r--llvm/lib/Analysis/DemandedBits.cpp15
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp37
-rw-r--r--llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp11
-rw-r--r--llvm/lib/Analysis/HeatUtils.cpp7
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp262
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp205
-rw-r--r--llvm/lib/Analysis/IVUsers.cpp62
-rw-r--r--llvm/lib/Analysis/InlineAdvisor.cpp124
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp168
-rw-r--r--llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp7
-rw-r--r--llvm/lib/Analysis/InstructionPrecedenceTracking.cpp14
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp299
-rw-r--r--llvm/lib/Analysis/LazyCallGraph.cpp26
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp113
-rw-r--r--llvm/lib/Analysis/Lint.cpp6
-rw-r--r--llvm/lib/Analysis/Loads.cpp13
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp95
-rw-r--r--llvm/lib/Analysis/LoopCacheAnalysis.cpp18
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp12
-rw-r--r--llvm/lib/Analysis/LoopNestAnalysis.cpp173
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp3
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp9
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp35
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp207
-rw-r--r--llvm/lib/Analysis/MemorySSAUpdater.cpp53
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp46
-rw-r--r--llvm/lib/Analysis/ObjCARCInstKind.cpp5
-rw-r--r--llvm/lib/Analysis/OverflowInstAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp4
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp16
-rw-r--r--llvm/lib/Analysis/ReplayInlineAdvisor.cpp106
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp1718
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp12
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp18
-rw-r--r--llvm/lib/Analysis/StackSafetyAnalysis.cpp118
-rw-r--r--llvm/lib/Analysis/TFUtils.cpp83
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp300
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp38
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp30
-rw-r--r--llvm/lib/Analysis/TypeMetadataUtils.cpp63
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp248
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp115
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp14
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp315
-rw-r--r--llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp7
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp62
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp277
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp215
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp77
-rw-r--r--llvm/lib/Bitcode/Writer/ValueEnumerator.cpp7
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp34
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ARMException.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp129
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp172
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp179
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h13
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp10
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp27
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp77
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp80
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h32
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp46
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WasmException.cpp29
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.h4
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp27
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp18
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp19
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp42
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp2
-rw-r--r--llvm/lib/CodeGen/CodeGenCommonISel.cpp169
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp92
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp34
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp25
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp20
-rw-r--r--llvm/lib/CodeGen/DwarfEHPrepare.cpp74
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp4
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp138
-rw-r--r--llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp7
-rw-r--r--llvm/lib/CodeGen/GCMetadata.cpp25
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp26
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp54
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Combiner.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1792
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp27
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp366
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp16
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp8
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp18
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp851
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp669
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp22
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp403
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp33
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp98
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp2
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp14
-rw-r--r--llvm/lib/CodeGen/IntrinsicLowering.cpp3
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp2913
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h1051
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp58
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h5
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp247
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp19
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp43
-rw-r--r--llvm/lib/CodeGen/LiveIntervalUnion.cpp23
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp60
-rw-r--r--llvm/lib/CodeGen/LivePhysRegs.cpp22
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp32
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp86
-rw-r--r--llvm/lib/CodeGen/LoopTraversal.cpp3
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp10
-rw-r--r--llvm/lib/CodeGen/MIRCanonicalizerPass.cpp8
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp22
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp3
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp343
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp34
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp90
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp76
-rw-r--r--llvm/lib/CodeGen/MachineDominators.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp47
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp11
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp38
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp53
-rw-r--r--llvm/lib/CodeGen/MachineSizeOpts.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineStripDebug.cpp21
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp167
-rw-r--r--llvm/lib/CodeGen/MacroFusion.cpp6
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp101
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp47
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp2
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp24
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp23
-rw-r--r--llvm/lib/CodeGen/PseudoProbeInserter.cpp9
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp2
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp47
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h90
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp3
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp132
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp15
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp15
-rw-r--r--llvm/lib/CodeGen/ReplaceWithVeclib.cpp4
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp28
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.cpp4
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.h15
-rw-r--r--llvm/lib/CodeGen/ScheduleDAG.cpp3
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp959
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp23
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp52
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp505
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h32
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp80
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp422
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp924
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp426
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h203
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp35
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp182
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp481
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp48
-rw-r--r--llvm/lib/CodeGen/SplitKit.h10
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp2
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp30
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--llvm/lib/CodeGen/SwitchLoweringUtils.cpp2
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp148
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp20
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp72
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp47
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp130
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp256
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp152
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/VirtRegMap.cpp25
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp4
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp1
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp42
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp45
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp28
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp11
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp11
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp33
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp660
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp24
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp18
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp10
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp216
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp105
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp30
-rw-r--r--llvm/lib/DebugInfo/GSYM/FileWriter.cpp7
-rw-r--r--llvm/lib/DebugInfo/GSYM/Range.cpp7
-rw-r--r--llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp90
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Symbolize.cpp24
-rw-r--r--llvm/lib/Demangle/DLangDemangle.cpp45
-rw-r--r--llvm/lib/Demangle/Demangle.cpp51
-rw-r--r--llvm/lib/Demangle/ItaniumDemangle.cpp70
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp100
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangleNodes.cpp408
-rw-r--r--llvm/lib/Demangle/RustDemangle.cpp182
-rw-r--r--llvm/lib/ExecutionEngine/ExecutionEngine.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h104
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp185
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp255
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp575
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLink.cpp14
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp290
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h65
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp585
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO.cpp7
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp180
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h47
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp68
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp214
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp33
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/aarch64.cpp30
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/x86_64.cpp137
-rw-r--r--llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp44
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp135
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp450
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp818
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp107
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp184
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp317
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp146
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp72
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp77
-rw-r--r--llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LLJIT.cpp105
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp82
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp398
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Mangling.cpp173
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp58
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp55
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp47
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp58
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp250
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp406
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp36
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp84
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h36
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp71
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp129
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp261
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp293
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp48
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp43
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp32
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp372
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h12
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h3
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h1
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h3
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h1
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h1
-rw-r--r--llvm/lib/ExecutionEngine/TargetSelect.cpp2
-rw-r--r--llvm/lib/FileCheck/FileCheck.cpp12
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp714
-rw-r--r--llvm/lib/IR/AbstractCallSite.cpp2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp644
-rw-r--r--llvm/lib/IR/Assumptions.cpp85
-rw-r--r--llvm/lib/IR/Attributes.cpp350
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp106
-rw-r--r--llvm/lib/IR/BasicBlock.cpp6
-rw-r--r--llvm/lib/IR/ConstantFold.cpp281
-rw-r--r--llvm/lib/IR/ConstantRange.cpp208
-rw-r--r--llvm/lib/IR/Constants.cpp137
-rw-r--r--llvm/lib/IR/Core.cpp38
-rw-r--r--llvm/lib/IR/DIBuilder.cpp200
-rw-r--r--llvm/lib/IR/DataLayout.cpp111
-rw-r--r--llvm/lib/IR/DebugInfo.cpp76
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp229
-rw-r--r--llvm/lib/IR/DiagnosticHandler.cpp2
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp40
-rw-r--r--llvm/lib/IR/DiagnosticPrinter.cpp2
-rw-r--r--llvm/lib/IR/FPEnv.cpp10
-rw-r--r--llvm/lib/IR/Function.cpp229
-rw-r--r--llvm/lib/IR/GCStrategy.cpp18
-rw-r--r--llvm/lib/IR/Globals.cpp115
-rw-r--r--llvm/lib/IR/IRBuilder.cpp19
-rw-r--r--llvm/lib/IR/Instruction.cpp20
-rw-r--r--llvm/lib/IR/Instructions.cpp248
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp63
-rw-r--r--llvm/lib/IR/LLVMContext.cpp8
-rw-r--r--llvm/lib/IR/LLVMContextImpl.cpp25
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h205
-rw-r--r--llvm/lib/IR/LegacyPassManager.cpp2
-rw-r--r--llvm/lib/IR/Mangler.cpp7
-rw-r--r--llvm/lib/IR/Metadata.cpp11
-rw-r--r--llvm/lib/IR/Module.cpp4
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp21
-rw-r--r--llvm/lib/IR/Operator.cpp35
-rw-r--r--llvm/lib/IR/OptBisect.cpp18
-rw-r--r--llvm/lib/IR/PassManager.cpp15
-rw-r--r--llvm/lib/IR/ProfileSummary.cpp4
-rw-r--r--llvm/lib/IR/PseudoProbe.cpp8
-rw-r--r--llvm/lib/IR/ReplaceConstant.cpp52
-rw-r--r--llvm/lib/IR/Statepoint.cpp6
-rw-r--r--llvm/lib/IR/Type.cpp48
-rw-r--r--llvm/lib/IR/TypeFinder.cpp8
-rw-r--r--llvm/lib/IR/User.cpp2
-rw-r--r--llvm/lib/IR/Value.cpp27
-rw-r--r--llvm/lib/IR/Verifier.cpp328
-rw-r--r--llvm/lib/InterfaceStub/ELFObjHandler.cpp2
-rw-r--r--llvm/lib/InterfaceStub/IFSHandler.cpp12
-rw-r--r--llvm/lib/InterfaceStub/IFSStub.cpp4
-rw-r--r--llvm/lib/LTO/LTO.cpp56
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp47
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp9
-rw-r--r--llvm/lib/LTO/LTOModule.cpp15
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp51
-rw-r--r--llvm/lib/Linker/IRMover.cpp125
-rw-r--r--llvm/lib/Linker/LinkModules.cpp100
-rw-r--r--llvm/lib/MC/ConstantPools.cpp2
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp3
-rw-r--r--llvm/lib/MC/MCAsmInfoGOFF.cpp27
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp5
-rw-r--r--llvm/lib/MC/MCAssembler.cpp17
-rw-r--r--llvm/lib/MC/MCDisassembler/Disassembler.cpp2
-rw-r--r--llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp2
-rw-r--r--llvm/lib/MC/MCDwarf.cpp94
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp11
-rw-r--r--llvm/lib/MC/MCExpr.cpp4
-rw-r--r--llvm/lib/MC/MCFragment.cpp8
-rw-r--r--llvm/lib/MC/MCInstrAnalysis.cpp12
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp15
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp10
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp3
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp18
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp25
-rw-r--r--llvm/lib/MC/MCParser/GOFFAsmParser.cpp48
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp7
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp392
-rw-r--r--llvm/lib/MC/MCSectionXCOFF.cpp6
-rw-r--r--llvm/lib/MC/MCStreamer.cpp7
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp87
-rw-r--r--llvm/lib/MC/MCWin64EH.cpp6
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp9
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp2
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/TargetRegistry.cpp (renamed from llvm/lib/Support/TargetRegistry.cpp)2
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp79
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp339
-rw-r--r--llvm/lib/MCA/Context.cpp5
-rw-r--r--llvm/lib/MCA/CustomBehaviour.cpp18
-rw-r--r--llvm/lib/MCA/HardwareUnits/RegisterFile.cpp13
-rw-r--r--llvm/lib/MCA/InstrBuilder.cpp2
-rw-r--r--llvm/lib/MCA/Stages/InOrderIssueStage.cpp28
-rw-r--r--llvm/lib/MCA/Stages/InstructionTables.cpp2
-rw-r--r--llvm/lib/MCA/View.cpp (renamed from llvm/tools/llvm-mca/Views/View.cpp)2
-rw-r--r--llvm/lib/Object/Archive.cpp2
-rw-r--r--llvm/lib/Object/COFFModuleDefinition.cpp5
-rw-r--r--llvm/lib/Object/ELF.cpp71
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp82
-rw-r--r--llvm/lib/Object/IRObjectFile.cpp2
-rw-r--r--llvm/lib/Object/IRSymtab.cpp29
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp46
-rw-r--r--llvm/lib/Object/ModuleSymbolTable.cpp7
-rw-r--r--llvm/lib/Object/Object.cpp9
-rw-r--r--llvm/lib/Object/ObjectFile.cpp15
-rw-r--r--llvm/lib/Object/RelocationResolver.cpp2
-rw-r--r--llvm/lib/Object/WasmObjectFile.cpp119
-rw-r--r--llvm/lib/Object/XCOFFObjectFile.cpp502
-rw-r--r--llvm/lib/ObjectYAML/COFFEmitter.cpp22
-rw-r--r--llvm/lib/ObjectYAML/COFFYAML.cpp36
-rw-r--r--llvm/lib/ObjectYAML/ELFEmitter.cpp9
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp19
-rw-r--r--llvm/lib/ObjectYAML/MachOEmitter.cpp35
-rw-r--r--llvm/lib/ObjectYAML/MachOYAML.cpp27
-rw-r--r--llvm/lib/ObjectYAML/WasmEmitter.cpp43
-rw-r--r--llvm/lib/ObjectYAML/WasmYAML.cpp28
-rw-r--r--llvm/lib/ObjectYAML/XCOFFEmitter.cpp411
-rw-r--r--llvm/lib/ObjectYAML/XCOFFYAML.cpp43
-rw-r--r--llvm/lib/Option/OptTable.cpp64
-rw-r--r--llvm/lib/Option/Option.cpp47
-rw-r--r--llvm/lib/Passes/OptimizationLevel.cpp30
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1848
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp1798
-rw-r--r--llvm/lib/Passes/PassRegistry.def113
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp1218
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp3
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp109
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp107
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp5
-rw-r--r--llvm/lib/ProfileData/ProfileSummaryBuilder.cpp18
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp73
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp262
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp235
-rw-r--r--llvm/lib/Support/AArch64TargetParser.cpp10
-rw-r--r--llvm/lib/Support/APFixedPoint.cpp4
-rw-r--r--llvm/lib/Support/APFloat.cpp38
-rw-r--r--llvm/lib/Support/APInt.cpp455
-rw-r--r--llvm/lib/Support/ARMTargetParser.cpp14
-rw-r--r--llvm/lib/Support/BinaryStreamReader.cpp16
-rw-r--r--llvm/lib/Support/BinaryStreamRef.cpp31
-rw-r--r--llvm/lib/Support/BinaryStreamWriter.cpp6
-rw-r--r--llvm/lib/Support/Caching.cpp (renamed from llvm/lib/LTO/Caching.cpp)56
-rw-r--r--llvm/lib/Support/CommandLine.cpp13
-rw-r--r--llvm/lib/Support/CrashRecoveryContext.cpp3
-rw-r--r--llvm/lib/Support/DebugOptions.h2
-rw-r--r--llvm/lib/Support/DivisionByConstantInfo.cpp107
-rw-r--r--llvm/lib/Support/Error.cpp11
-rw-r--r--llvm/lib/Support/ErrorHandling.cpp17
-rw-r--r--llvm/lib/Support/ExtensibleRTTI.cpp7
-rw-r--r--llvm/lib/Support/FileUtilities.cpp6
-rw-r--r--llvm/lib/Support/GraphWriter.cpp10
-rw-r--r--llvm/lib/Support/Host.cpp32
-rw-r--r--llvm/lib/Support/JSON.cpp5
-rw-r--r--llvm/lib/Support/KnownBits.cpp15
-rw-r--r--llvm/lib/Support/LockFileManager.cpp2
-rw-r--r--llvm/lib/Support/MD5.cpp85
-rw-r--r--llvm/lib/Support/MSP430AttributeParser.cpp53
-rw-r--r--llvm/lib/Support/MSP430Attributes.cpp22
-rw-r--r--llvm/lib/Support/Parallel.cpp7
-rw-r--r--llvm/lib/Support/Path.cpp103
-rw-r--r--llvm/lib/Support/Process.cpp3
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp718
-rw-r--r--llvm/lib/Support/Signposts.cpp32
-rw-r--r--llvm/lib/Support/SmallVector.cpp19
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp6
-rw-r--r--llvm/lib/Support/TimeProfiler.cpp8
-rw-r--r--llvm/lib/Support/Timer.cpp5
-rw-r--r--llvm/lib/Support/Triple.cpp92
-rw-r--r--llvm/lib/Support/Unix/Memory.inc8
-rw-r--r--llvm/lib/Support/Unix/Path.inc61
-rw-r--r--llvm/lib/Support/Unix/Process.inc3
-rw-r--r--llvm/lib/Support/Unix/Program.inc3
-rw-r--r--llvm/lib/Support/Unix/Unix.h5
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp175
-rw-r--r--llvm/lib/Support/Windows/Path.inc38
-rw-r--r--llvm/lib/Support/Windows/Process.inc4
-rw-r--r--llvm/lib/Support/Windows/Program.inc1
-rw-r--r--llvm/lib/Support/X86TargetParser.cpp82
-rw-r--r--llvm/lib/Support/raw_ostream.cpp5
-rw-r--r--llvm/lib/TableGen/Main.cpp6
-rw-r--r--llvm/lib/TableGen/Record.cpp299
-rw-r--r--llvm/lib/TableGen/TGParser.cpp13
-rw-r--r--llvm/lib/TableGen/TGParser.h9
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td918
-rw-r--r--llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td102
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp17
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp86
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp104
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp1254
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h37
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td349
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp392
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td227
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp293
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td616
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA53.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA55.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA57.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA64FX.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedCyclone.td66
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM3.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM4.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM5.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedFalkor.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedKryo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedTSV110.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64Schedule.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp33
-rw-r--r--llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp36
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h66
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td75
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp48
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp479
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h36
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp304
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp373
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp67
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp11
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp638
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp381
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp31
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp8
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp121
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp57
-rw-r--r--llvm/lib/Target/AArch64/GISel/select-saddo.mir158
-rw-r--r--llvm/lib/Target/AArch64/GISel/select-ssubo.mir158
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp13
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp46
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp13
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp6
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h1
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td302
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td178
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp168
-rw-r--r--llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h74
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h53
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp301
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp90
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp402
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp72
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td39
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp382
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp95
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUFeatures.td1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp468
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h256
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp393
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp76
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td40
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp261
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp45
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp75
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp111
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h69
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp95
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp195
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp351
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp39
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp70
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp313
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h72
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp132
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h70
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp48
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td51
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td23
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h5
-rw-r--r--llvm/lib/Target/AMDGPU/EvergreenInstructions.td68
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td46
-rw-r--r--llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp112
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp69
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h26
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp361
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h103
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h3
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp207
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h30
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h21
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h5
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp224
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h48
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h44
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td40
-rw-r--r--llvm/lib/Target/AMDGPU/R600.h50
-rw-r--r--llvm/lib/Target/AMDGPU/R600.td1
-rw-r--r--llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp184
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.td23
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td22
-rw-r--r--llvm/lib/Target/AMDGPU/R600MCInstLower.cpp73
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.h2
-rw-r--r--llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600Packetizer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600Processors.td4
-rw-r--r--llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.cpp46
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.h1
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetMachine.cpp143
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetMachine.h48
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp142
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h69
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h130
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp102
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp306
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp716
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h40
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td61
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td154
-rw-r--r--llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp81
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp77
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp152
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h7
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegister.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIPostRABundler.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp264
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h42
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td102
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td10
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td41
-rw-r--r--llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h10
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h3
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td19
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td60
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td66
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td12
-rw-r--r--llvm/lib/Target/ARC/ARCAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/ARC/ARCExpandPseudos.cpp79
-rw-r--r--llvm/lib/Target/ARC/ARCISelLowering.cpp48
-rw-r--r--llvm/lib/Target/ARC/ARCISelLowering.h3
-rw-r--r--llvm/lib/Target/ARC/ARCInstrFormats.td96
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.cpp38
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.h8
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.td86
-rw-r--r--llvm/lib/Target/ARC/ARCOptAddrMode.cpp81
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.cpp20
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.h5
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.td80
-rw-r--r--llvm/lib/Target/ARC/ARCSubtarget.cpp6
-rw-r--r--llvm/lib/Target/ARC/ARCSubtarget.h5
-rw-r--r--llvm/lib/Target/ARC/ARCTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp33
-rw-r--r--llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/A15SDOptimizer.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARM.td73
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp208
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h11
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMBlockPlacement.cpp108
-rw-r--r--llvm/lib/Target/ARM/ARMCallLowering.cpp19
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp25
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp124
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp41
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp1268
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h5
-rw-r--r--llvm/lib/Target/ARM/ARMInstrCDE.td12
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td10
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td27
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td965
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td58
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td14
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td22
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp133
-rw-r--r--llvm/lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h24
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMTargetObjectFile.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMTargetObjectFile.h5
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp104
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h14
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp29
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp33
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp19
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h21
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h6
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h8
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp5
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp1
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp195
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp83
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp22
-rw-r--r--llvm/lib/Target/ARM/MVETailPredication.cpp20
-rw-r--r--llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp10
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp22
-rw-r--r--llvm/lib/Target/AVR/AVR.h4
-rw-r--r--llvm/lib/Target/AVR/AVR.td11
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp58
-rw-r--r--llvm/lib/Target/AVR/AVRCallingConv.td10
-rw-r--r--llvm/lib/Target/AVR/AVRDevices.td794
-rw-r--r--llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp574
-rw-r--r--llvm/lib/Target/AVR/AVRFrameLowering.cpp32
-rw-r--r--llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp57
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.cpp90
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.h7
-rw-r--r--llvm/lib/Target/AVR/AVRInstrFormats.td301
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp64
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.h10
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.td2923
-rw-r--r--llvm/lib/Target/AVR/AVRMCInstLower.cpp8
-rw-r--r--llvm/lib/Target/AVR/AVRMCInstLower.h1
-rw-r--r--llvm/lib/Target/AVR/AVRMachineFunctionInfo.h12
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.cpp31
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.h9
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.td233
-rw-r--r--llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp30
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.h17
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.h7
-rw-r--r--llvm/lib/Target/AVR/AVRTargetObjectFile.cpp7
-rw-r--r--llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp46
-rw-r--r--llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp137
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp34
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp12
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h1
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp26
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp11
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp10
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp19
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h2
-rw-r--r--llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp5
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPF.h9
-rw-r--r--llvm/lib/Target/BPF/BPFAdjustOpt.cpp62
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp45
-rw-r--r--llvm/lib/Target/BPF/BPFIRPeephole.cpp118
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp24
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.h4
-rw-r--r--llvm/lib/Target/BPF/BPFMIChecking.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp12
-rw-r--r--llvm/lib/Target/BPF/BPFRegisterInfo.td2
-rw-r--r--llvm/lib/Target/BPF/BPFSubtarget.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp11
-rw-r--r--llvm/lib/Target/BPF/BPFTargetTransformInfo.h17
-rw-r--r--llvm/lib/Target/BPF/BTF.def2
-rw-r--r--llvm/lib/Target/BPF/BTF.h2
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp169
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.h28
-rw-r--r--llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp2
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp6
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp873
-rw-r--r--llvm/lib/Target/CSKY/CSKY.h (renamed from llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h)17
-rw-r--r--llvm/lib/Target/CSKY/CSKY.td87
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp58
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.h40
-rw-r--r--llvm/lib/Target/CSKY/CSKYCallingConv.h63
-rw-r--r--llvm/lib/Target/CSKY/CSKYCallingConv.td82
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.cpp57
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.h38
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp75
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp346
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.h69
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormats.td221
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td219
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp25
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.h36
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td644
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td452
-rw-r--r--llvm/lib/Target/CSKY/CSKYMCInstLower.cpp117
-rw-r--r--llvm/lib/Target/CSKY/CSKYMCInstLower.h35
-rw-r--r--llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h62
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp95
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.h45
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.td15
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.cpp74
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.h120
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetMachine.cpp41
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetMachine.h8
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp49
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h3
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h70
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h27
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp102
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h17
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp98
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h47
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp35
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h9
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp81
-rw-r--r--llvm/lib/Target/Hexagon/HexagonArch.h6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp35
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp30
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td24
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td38
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMux.cpp28
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp59
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrFormats.td3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPseudo.td8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.td187
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetStreamer.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp24
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp6
-rw-r--r--llvm/lib/Target/Lanai/LanaiAluCode.h2
-rw-r--r--llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.cpp16
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.h5
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.cpp19
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.h6
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.td6
-rw-r--r--llvm/lib/Target/Lanai/LanaiTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h13
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp6
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp5
-rw-r--r--llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp232
-rw-r--r--llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp5
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp (renamed from llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp)4
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.h (renamed from llvm/lib/Target/M68k/GlSel/M68kCallLowering.h)2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp (renamed from llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp)0
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp (renamed from llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp)0
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h (renamed from llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h)0
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp105
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h (renamed from llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h)6
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td (renamed from llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td)2
-rw-r--r--llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp27
-rw-r--r--llvm/lib/Target/M68k/M68k.td2
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kCallingConv.h22
-rw-r--r--llvm/lib/Target/M68k/M68kFrameLowering.cpp8
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.cpp17
-rw-r--r--llvm/lib/Target/M68k/M68kInstrArithmetic.td95
-rw-r--r--llvm/lib/Target/M68k/M68kInstrCompiler.td10
-rw-r--r--llvm/lib/Target/M68k/M68kInstrFormats.td2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.h2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.td74
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.cpp8
-rw-r--r--llvm/lib/Target/M68k/M68kTargetMachine.cpp12
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp8
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp2
-rw-r--r--llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp6
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp19
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.cpp5
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MSP430InstrInfo.cpp5
-rw-r--r--llvm/lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp10
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td2
-rw-r--r--llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td30
-rw-r--r--llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td73
-rw-r--r--llvm/lib/Target/Mips/MicroMipsInstrInfo.td61
-rw-r--r--llvm/lib/Target/Mips/Mips16HardFloat.cpp13
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.td12
-rw-r--r--llvm/lib/Target/Mips/Mips32r6InstrInfo.td7
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsCallLowering.cpp31
-rw-r--r--llvm/lib/Target/Mips/MipsDSPInstrInfo.td45
-rw-r--r--llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp12
-rw-r--r--llvm/lib/Target/Mips/MipsEVAInstrInfo.td9
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp7
-rw-r--r--llvm/lib/Target/Mips/MipsInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsMSAInstrInfo.td50
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsSEInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsSubtarget.cpp10
-rw-r--r--llvm/lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.td6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp168
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp11
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp23
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp666
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp25
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td6101
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPeephole.cpp25
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp22
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td12
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp1677
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp27
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.h4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h11
-rw-r--r--llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h7
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td2075
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td13
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td30
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp30
-rw-r--r--llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp19
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp44
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.h22
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp157
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp610
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h26
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td123
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrHTM.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp33
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h5
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td95
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td446
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td147
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp1066
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp80
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.cpp19
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def37
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp25
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSchedPredicates.td294
-rw-r--r--llvm/lib/Target/PowerPC/PPCSchedule.td3
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP10.td416
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP9.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h9
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXCopy.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp309
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp15
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp14
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h37
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp22
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h27
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp13
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp149
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp66
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td132
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp41
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp475
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp488
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h31
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp1906
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h58
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp424
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td142
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp389
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h35
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td170
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td1199
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td714
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td108
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td220
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td (renamed from llvm/lib/Target/RISCV/RISCVInstrInfoB.td)145
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td40
-rw-r--r--llvm/lib/Target/RISCV/RISCVMCInstLower.cpp23
-rw-r--r--llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td110
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td820
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp33
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h18
-rw-r--r--llvm/lib/Target/RISCV/RISCVSystemOperands.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h53
-rw-r--r--llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp2
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.cpp14
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.td34
-rw-r--r--llvm/lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp51
-rw-r--r--llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp24
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h5
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp35
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp52
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h10
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp41
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp64
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp33
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.h33
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.h71
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td45
-rw-r--r--llvm/lib/Target/SystemZ/SystemZElimCompare.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp341
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h56
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp263
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h13
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFP.td7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFormats.td49
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp39
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.h11
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td36
-rw-r--r--llvm/lib/Target/SystemZ/SystemZLongBranch.cpp18
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp19
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.h21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp110
-rw-r--r--llvm/lib/Target/SystemZ/SystemZShortenInst.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h19
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp11
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetStreamer.h55
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h9
-rw-r--r--llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/TargetMachine.cpp64
-rw-r--r--llvm/lib/Target/TargetMachineC.cpp2
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp2
-rw-r--r--llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp2
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp2
-rw-r--r--llvm/lib/Target/VE/VEAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp12
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.td52
-rw-r--r--llvm/lib/Target/VE/VESubtarget.cpp2
-rw-r--r--llvm/lib/Target/VE/VETargetMachine.cpp2
-rw-r--r--llvm/lib/Target/VE/VVPInstrPatternsVec.td7
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp52
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h18
-rw-r--r--llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp7
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/README.txt8
-rw-r--r--llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h23
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.h7
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.td3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp50
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp55
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp50
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp54
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISD.def9
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp85
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp562
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h45
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td38
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td15
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td57
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td226
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td39
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp15
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp905
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp84
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp48
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp51
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp13
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp100
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp231
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h3
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp38
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h13
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp48
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp36
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h13
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp3
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp18
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp48
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp17
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp33
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp1
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.h4
-rw-r--r--llvm/lib/Target/X86/X86.td743
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.h19
-rw-r--r--llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp20
-rw-r--r--llvm/lib/Target/X86/X86CallLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.td88
-rw-r--r--llvm/lib/Target/X86/X86CmovConversion.cpp8
-rw-r--r--llvm/lib/Target/X86/X86DynAllocaExpander.cpp (renamed from llvm/lib/Target/X86/X86WinAllocaExpander.cpp)58
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp50
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp32
-rw-r--r--llvm/lib/Target/X86/X86FastTileConfig.cpp4
-rw-r--r--llvm/lib/Target/X86/X86FixupLEAs.cpp3
-rw-r--r--llvm/lib/Target/X86/X86FlagsCopyLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp41
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp131
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp271
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp3561
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h83
-rw-r--r--llvm/lib/Target/X86/X86IndirectBranchTracking.cpp2
-rw-r--r--llvm/lib/Target/X86/X86IndirectThunks.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InsertWait.cpp21
-rw-r--r--llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2246
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td49
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td48
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td36
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA.td46
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp46
-rw-r--r--llvm/lib/Target/X86/X86InstrFPStack.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp475
-rw-r--r--llvm/lib/Target/X86/X86InstrFormats.td74
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td103
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp754
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h40
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrKL.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrMPX.td77
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td239
-rw-r--r--llvm/lib/Target/X86/X86InstrSystem.td15
-rw-r--r--llvm/lib/Target/X86/X86InstrVecCompiler.td96
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h231
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp3
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp2
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp15
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp503
-rw-r--r--llvm/lib/Target/X86/X86LowerTileCopy.cpp4
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp265
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.h15
-rw-r--r--llvm/lib/Target/X86/X86OptimizeLEAs.cpp9
-rw-r--r--llvm/lib/Target/X86/X86PadShortFunction.cpp9
-rw-r--r--llvm/lib/Target/X86/X86PfmCounters.td20
-rw-r--r--llvm/lib/Target/X86/X86PreTileConfig.cpp5
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td27
-rw-r--r--llvm/lib/Target/X86/X86SchedBroadwell.td266
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td64
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td2636
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td9
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td9
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td9
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td25
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td112
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBdVer2.td7
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td5
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td153
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td41
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td41
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver3.td35
-rw-r--r--llvm/lib/Target/X86/X86SelectionDAGInfo.cpp7
-rw-r--r--llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp2
-rw-r--r--llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp8
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp20
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h47
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp7
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp961
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h115
-rw-r--r--llvm/lib/Target/X86/X86VZeroUpper.cpp6
-rw-r--r--llvm/lib/Target/X86/X86WinEHState.cpp2
-rw-r--r--llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp2
-rw-r--r--llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreISelLowering.cpp12
-rw-r--r--llvm/lib/Target/XCore/XCoreInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp7
-rw-r--r--llvm/lib/Target/XCore/XCoreSubtarget.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreTargetMachine.cpp4
-rw-r--r--llvm/lib/TextAPI/TextStub.cpp4
-rw-r--r--llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp1
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp22
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h38
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp92
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCleanup.cpp20
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroEarly.cpp3
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp85
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInstr.h2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp61
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp14
-rw-r--r--llvm/lib/Transforms/IPO/AlwaysInliner.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp44
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp194
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp683
-rw-r--r--llvm/lib/Transforms/IPO/ConstantMerge.cpp44
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp43
-rw-r--r--llvm/lib/Transforms/IPO/ExtractGV.cpp29
-rw-r--r--llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp440
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp149
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp332
-rw-r--r--llvm/lib/Transforms/IPO/GlobalDCE.cpp14
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp368
-rw-r--r--llvm/lib/Transforms/IPO/GlobalSplit.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp977
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp262
-rw-r--r--llvm/lib/Transforms/IPO/Internalize.cpp30
-rw-r--r--llvm/lib/Transforms/IPO/LoopExtractor.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp79
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/ModuleInliner.cpp354
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp719
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp7
-rw-r--r--llvm/lib/Transforms/IPO/SCCP.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp164
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp226
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp15
-rw-r--r--llvm/lib/Transforms/IPO/StripSymbols.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp38
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp21
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp88
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp1103
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp538
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp165
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp616
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp48
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp44
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp14
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp226
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp549
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp76
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp175
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp384
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp187
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp8
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp483
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp10
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp384
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp7
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp136
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp18
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp121
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp19
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp104
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp16
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.cpp35
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.h8
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp15
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp41
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp14
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp13
-rw-r--r--llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h3
-rw-r--r--llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/ADCE.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/BDCE.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp35
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp93
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp36
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp1063
-rw-r--r--llvm/lib/Transforms/Scalar/DivRemPairs.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/Float2Int.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp234
-rw-r--r--llvm/lib/Transforms/Scalar/GVNHoist.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp86
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp211
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp190
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp64
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp369
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp126
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDeletion.cpp55
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp270
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp419
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp21
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp160
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSink.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp180
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp340
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp22
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp622
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp204
-rw-r--r--llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/NaryReassociate.cpp114
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp48
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp206
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp120
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp14
-rw-r--r--llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp34
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp23
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp33
-rw-r--r--llvm/lib/Transforms/Utils/CallPromotionUtils.cpp26
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp126
-rw-r--r--llvm/lib/Transforms/Utils/CodeMoverUtils.cpp71
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp14
-rw-r--r--llvm/lib/Transforms/Utils/Evaluator.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/FlattenCFG.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/FunctionComparator.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/GlobalStatus.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp202
-rw-r--r--llvm/lib/Transforms/Utils/LCSSA.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp150
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp198
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp24
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp13
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp440
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp313
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LowerSwitch.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp188
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp275
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp144
-rw-r--r--llvm/lib/Transforms/Utils/SplitModule.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/VNCoercion.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp67
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp108
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h39
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp947
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2406
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp50
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h76
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp104
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp290
-rw-r--r--llvm/lib/WindowsManifest/WindowsManifestMerger.cpp8
-rw-r--r--llvm/lib/XRay/InstrumentationMap.cpp8
-rw-r--r--llvm/tools/bugpoint/CrashDebugger.cpp24
-rw-r--r--llvm/tools/bugpoint/OptimizerDriver.cpp4
-rw-r--r--llvm/tools/bugpoint/ToolRunner.cpp12
-rw-r--r--llvm/tools/llc/llc.cpp39
-rw-r--r--llvm/tools/lli/ChildTarget/ChildTarget.cpp95
-rw-r--r--llvm/tools/lli/ForwardingMemoryManager.h (renamed from llvm/tools/lli/RemoteJITUtils.h)62
-rw-r--r--llvm/tools/lli/lli.cpp68
-rw-r--r--llvm/tools/llvm-ar/llvm-ar.cpp8
-rw-r--r--llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp11
-rw-r--r--llvm/tools/llvm-cov/CodeCoverage.cpp12
-rw-r--r--llvm/tools/llvm-cov/CoverageExporterLcov.cpp2
-rw-r--r--llvm/tools/llvm-cov/CoverageFilters.cpp2
-rw-r--r--llvm/tools/llvm-cxxdump/Error.cpp1
-rw-r--r--llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp4
-rw-r--r--llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp29
-rw-r--r--llvm/tools/llvm-diff/lib/DiffConsumer.cpp (renamed from llvm/tools/llvm-diff/DiffConsumer.cpp)6
-rw-r--r--llvm/tools/llvm-diff/lib/DiffConsumer.h (renamed from llvm/tools/llvm-diff/DiffConsumer.h)1
-rw-r--r--llvm/tools/llvm-diff/lib/DiffLog.cpp (renamed from llvm/tools/llvm-diff/DiffLog.cpp)0
-rw-r--r--llvm/tools/llvm-diff/lib/DiffLog.h (renamed from llvm/tools/llvm-diff/DiffLog.h)0
-rw-r--r--llvm/tools/llvm-diff/lib/DifferenceEngine.cpp (renamed from llvm/tools/llvm-diff/DifferenceEngine.cpp)0
-rw-r--r--llvm/tools/llvm-diff/lib/DifferenceEngine.h (renamed from llvm/tools/llvm-diff/DifferenceEngine.h)0
-rw-r--r--llvm/tools/llvm-diff/llvm-diff.cpp4
-rw-r--r--llvm/tools/llvm-dwarfdump/Statistics.cpp331
-rw-r--r--llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp23
-rw-r--r--llvm/tools/llvm-dwp/llvm-dwp.cpp2
-rw-r--r--llvm/tools/llvm-lto/llvm-lto.cpp44
-rw-r--r--llvm/tools/llvm-lto2/llvm-lto2.cpp12
-rw-r--r--llvm/tools/llvm-mc/Disassembler.cpp4
-rw-r--r--llvm/tools/llvm-mc/llvm-mc.cpp4
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.cpp2
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.h6
-rw-r--r--llvm/tools/llvm-mca/PipelinePrinter.cpp1
-rw-r--r--llvm/tools/llvm-mca/PipelinePrinter.h2
-rw-r--r--llvm/tools/llvm-mca/Views/DispatchStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionView.h2
-rw-r--r--llvm/tools/llvm-mca/Views/RegisterFileStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/SchedulerStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.h8
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.cpp19
-rw-r--r--llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp33
-rw-r--r--llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h57
-rw-r--r--llvm/tools/llvm-mca/llvm-mca.cpp98
-rw-r--r--llvm/tools/llvm-nm/llvm-nm.cpp115
-rw-r--r--llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp19
-rw-r--r--llvm/tools/llvm-objcopy/COFF/Object.cpp2
-rw-r--r--llvm/tools/llvm-objcopy/COFF/Writer.cpp18
-rw-r--r--llvm/tools/llvm-objcopy/CommonConfig.h22
-rw-r--r--llvm/tools/llvm-objcopy/ConfigManager.cpp124
-rw-r--r--llvm/tools/llvm-objcopy/ELF/ELFConfig.h10
-rw-r--r--llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp133
-rw-r--r--llvm/tools/llvm-objcopy/ELF/Object.cpp164
-rw-r--r--llvm/tools/llvm-objcopy/ELF/Object.h34
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOConfig.h24
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp45
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h47
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp63
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h3
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOReader.cpp38
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOReader.h2
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp195
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOWriter.h9
-rw-r--r--llvm/tools/llvm-objcopy/MachO/Object.cpp20
-rw-r--r--llvm/tools/llvm-objcopy/MachO/Object.h18
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOpts.td11
-rw-r--r--llvm/tools/llvm-objdump/COFFDump.cpp205
-rw-r--r--llvm/tools/llvm-objdump/COFFDump.h2
-rw-r--r--llvm/tools/llvm-objdump/ELFDump.cpp2
-rw-r--r--llvm/tools/llvm-objdump/MachODump.cpp10
-rw-r--r--llvm/tools/llvm-objdump/ObjdumpOpts.td58
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.cpp18
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.h4
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.cpp142
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.h16
-rw-r--r--llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp2
-rw-r--r--llvm/tools/llvm-pdbutil/LinePrinter.cpp30
-rw-r--r--llvm/tools/llvm-pdbutil/LinePrinter.h10
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp157
-rw-r--r--llvm/tools/llvm-readobj/ARMEHABIPrinter.h2
-rw-r--r--llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp42
-rw-r--r--llvm/tools/llvm-readobj/ARMWinEHPrinter.h3
-rw-r--r--llvm/tools/llvm-readobj/COFFDumper.cpp29
-rw-r--r--llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h3
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp241
-rw-r--r--llvm/tools/llvm-readobj/MachODumper.cpp30
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.cpp18
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.h5
-rw-r--r--llvm/tools/llvm-readobj/Opts.td20
-rw-r--r--llvm/tools/llvm-readobj/WasmDumper.cpp6
-rw-r--r--llvm/tools/llvm-readobj/Win64EHDumper.cpp47
-rw-r--r--llvm/tools/llvm-readobj/XCOFFDumper.cpp309
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.cpp19
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.h2
-rw-r--r--llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp74
-rw-r--r--llvm/tools/llvm-stress/llvm-stress.cpp6
-rw-r--r--llvm/tools/llvm-strings/llvm-strings.cpp4
-rw-r--r--llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp2
-rw-r--r--llvm/tools/llvm-tli-checker/Opts.td16
-rw-r--r--llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp357
-rw-r--r--llvm/tools/llvm-xray/xray-color-helper.cpp8
-rw-r--r--llvm/tools/llvm-xray/xray-converter.cpp14
-rw-r--r--llvm/tools/llvm-xray/xray-extract.cpp13
-rw-r--r--llvm/tools/opt/NewPMDriver.cpp64
-rw-r--r--llvm/tools/opt/opt.cpp79
-rw-r--r--llvm/utils/TableGen/AsmMatcherEmitter.cpp16
-rw-r--r--llvm/utils/TableGen/AsmWriterEmitter.cpp5
-rw-r--r--llvm/utils/TableGen/CodeEmitterGen.cpp4
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.cpp31
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.h7
-rw-r--r--llvm/utils/TableGen/CodeGenMapTable.cpp15
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.cpp23
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.h1
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp1
-rw-r--r--llvm/utils/TableGen/CompressInstEmitter.cpp (renamed from llvm/utils/TableGen/RISCVCompressInstEmitter.cpp)172
-rw-r--r--llvm/utils/TableGen/GlobalISelEmitter.cpp10
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/PredicateExpander.cpp2
-rw-r--r--llvm/utils/TableGen/RegisterInfoEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.cpp2
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.h4
-rw-r--r--llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp6
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp27
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h3
-rw-r--r--openmp/runtime/src/dllexports28
-rw-r--r--openmp/runtime/src/exports_so.txt2
-rw-r--r--openmp/runtime/src/i18n/en_US.txt4
-rw-r--r--openmp/runtime/src/include/omp.h.var11
-rw-r--r--openmp/runtime/src/include/omp_lib.f90.var49
-rw-r--r--openmp/runtime/src/include/omp_lib.h.var63
-rw-r--r--openmp/runtime/src/kmp.h96
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp335
-rw-r--r--openmp/runtime/src/kmp_affinity.h61
-rw-r--r--openmp/runtime/src/kmp_alloc.cpp115
-rw-r--r--openmp/runtime/src/kmp_atomic.cpp179
-rw-r--r--openmp/runtime/src/kmp_atomic.h82
-rw-r--r--openmp/runtime/src/kmp_barrier.cpp561
-rw-r--r--openmp/runtime/src/kmp_barrier.h141
-rw-r--r--openmp/runtime/src/kmp_config.h.cmake15
-rw-r--r--openmp/runtime/src/kmp_csupport.cpp80
-rw-r--r--openmp/runtime/src/kmp_dispatch.cpp9
-rw-r--r--openmp/runtime/src/kmp_ftn_os.h3
-rw-r--r--openmp/runtime/src/kmp_global.cpp5
-rw-r--r--openmp/runtime/src/kmp_gsupport.cpp107
-rw-r--r--openmp/runtime/src/kmp_itt.cpp9
-rw-r--r--openmp/runtime/src/kmp_itt.h22
-rw-r--r--openmp/runtime/src/kmp_itt.inl364
-rw-r--r--openmp/runtime/src/kmp_lock.cpp114
-rw-r--r--openmp/runtime/src/kmp_lock.h36
-rw-r--r--openmp/runtime/src/kmp_os.h24
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp406
-rw-r--r--openmp/runtime/src/kmp_settings.cpp174
-rw-r--r--openmp/runtime/src/kmp_stats.h4
-rw-r--r--openmp/runtime/src/kmp_str.cpp25
-rw-r--r--openmp/runtime/src/kmp_str.h1
-rw-r--r--openmp/runtime/src/kmp_stub.cpp80
-rw-r--r--openmp/runtime/src/kmp_taskdeps.cpp115
-rw-r--r--openmp/runtime/src/kmp_taskdeps.h12
-rw-r--r--openmp/runtime/src/kmp_tasking.cpp189
-rw-r--r--openmp/runtime/src/kmp_utility.cpp20
-rw-r--r--openmp/runtime/src/kmp_wait_release.cpp8
-rw-r--r--openmp/runtime/src/kmp_wait_release.h737
-rw-r--r--openmp/runtime/src/ompt-general.cpp9
-rw-r--r--openmp/runtime/src/ompt-specific.cpp35
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/LICENSE.txt8
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/disable_warnings.h26
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify.h437
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h103
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp439
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h52
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h2
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h8
-rw-r--r--openmp/runtime/src/z_Linux_util.cpp102
-rw-r--r--openmp/runtime/src/z_Windows_NT_util.cpp78
4447 files changed, 219225 insertions, 110197 deletions
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 26844d1c74f3..b0d7ef509c26 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2592,7 +2592,15 @@ enum CXCursorKind {
*/
CXCursor_OMPUnrollDirective = 293,
- CXCursor_LastStmt = CXCursor_OMPUnrollDirective,
+ /** OpenMP metadirective directive.
+ */
+ CXCursor_OMPMetaDirective = 294,
+
+ /** OpenMP loop directive.
+ */
+ CXCursor_OMPGenericLoopDirective = 295,
+
+ CXCursor_LastStmt = CXCursor_OMPGenericLoopDirective,
/**
* Cursor that represents the translation unit itself.
@@ -3298,8 +3306,9 @@ enum CXTypeKind {
CXType_UAccum = 37,
CXType_ULongAccum = 38,
CXType_BFloat16 = 39,
+ CXType_Ibm128 = 40,
CXType_FirstBuiltin = CXType_Void,
- CXType_LastBuiltin = CXType_BFloat16,
+ CXType_LastBuiltin = CXType_Ibm128,
CXType_Complex = 100,
CXType_Pointer = 101,
diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h
index d0526f4fa5c5..aba18b060b02 100644
--- a/clang/include/clang/AST/ASTConcept.h
+++ b/clang/include/clang/AST/ASTConcept.h
@@ -1,9 +1,8 @@
//===--- ASTConcept.h - Concepts Related AST Data Structures ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 34299581d89d..d336342e4cda 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -102,6 +102,7 @@ class ParentMapContext;
class DynTypedNode;
class DynTypedNodeList;
class Expr;
+enum class FloatModeKind;
class GlobalDecl;
class ItaniumMangleContext;
class MangleContext;
@@ -164,24 +165,46 @@ namespace serialization {
template <class> class AbstractTypeReader;
} // namespace serialization
+enum class AlignRequirementKind {
+ /// The alignment was not explicit in code.
+ None,
+
+ /// The alignment comes from an alignment attribute on a typedef.
+ RequiredByTypedef,
+
+ /// The alignment comes from an alignment attribute on a record type.
+ RequiredByRecord,
+
+ /// The alignment comes from an alignment attribute on a enum type.
+ RequiredByEnum,
+};
+
struct TypeInfo {
uint64_t Width = 0;
unsigned Align = 0;
- bool AlignIsRequired : 1;
+ AlignRequirementKind AlignRequirement;
- TypeInfo() : AlignIsRequired(false) {}
- TypeInfo(uint64_t Width, unsigned Align, bool AlignIsRequired)
- : Width(Width), Align(Align), AlignIsRequired(AlignIsRequired) {}
+ TypeInfo() : AlignRequirement(AlignRequirementKind::None) {}
+ TypeInfo(uint64_t Width, unsigned Align,
+ AlignRequirementKind AlignRequirement)
+ : Width(Width), Align(Align), AlignRequirement(AlignRequirement) {}
+ bool isAlignRequired() {
+ return AlignRequirement != AlignRequirementKind::None;
+ }
};
struct TypeInfoChars {
CharUnits Width;
CharUnits Align;
- bool AlignIsRequired : 1;
+ AlignRequirementKind AlignRequirement;
- TypeInfoChars() : AlignIsRequired(false) {}
- TypeInfoChars(CharUnits Width, CharUnits Align, bool AlignIsRequired)
- : Width(Width), Align(Align), AlignIsRequired(AlignIsRequired) {}
+ TypeInfoChars() : AlignRequirement(AlignRequirementKind::None) {}
+ TypeInfoChars(CharUnits Width, CharUnits Align,
+ AlignRequirementKind AlignRequirement)
+ : Width(Width), Align(Align), AlignRequirement(AlignRequirement) {}
+ bool isAlignRequired() {
+ return AlignRequirement != AlignRequirementKind::None;
+ }
};
/// Holds long-lived AST nodes (such as types and decls) that can be
@@ -672,6 +695,12 @@ public:
SourceManager& getSourceManager() { return SourceMgr; }
const SourceManager& getSourceManager() const { return SourceMgr; }
+ // Cleans up some of the data structures. This allows us to do cleanup
+ // normally done in the destructor earlier. Renders much of the ASTContext
+ // unusable, mostly the actual AST nodes, so should be called when we no
+ // longer need access to the AST.
+ void cleanup();
+
llvm::BumpPtrAllocator &getAllocator() const {
return BumpAlloc;
}
@@ -728,7 +757,8 @@ public:
/// getRealTypeForBitwidth -
/// sets floating point QualTy according to specified bitwidth.
/// Returns empty type if there is no appropriate target types.
- QualType getRealTypeForBitwidth(unsigned DestWidth, bool ExplicitIEEE) const;
+ QualType getRealTypeForBitwidth(unsigned DestWidth,
+ FloatModeKind ExplicitType) const;
bool AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const;
@@ -1054,7 +1084,7 @@ public:
CanQualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy, Int128Ty;
CanQualType UnsignedCharTy, UnsignedShortTy, UnsignedIntTy, UnsignedLongTy;
CanQualType UnsignedLongLongTy, UnsignedInt128Ty;
- CanQualType FloatTy, DoubleTy, LongDoubleTy, Float128Ty;
+ CanQualType FloatTy, DoubleTy, LongDoubleTy, Float128Ty, Ibm128Ty;
CanQualType ShortAccumTy, AccumTy,
LongAccumTy; // ISO/IEC JTC1 SC22 WG14 N1169 Extension
CanQualType UnsignedShortAccumTy, UnsignedAccumTy, UnsignedLongAccumTy;
@@ -1069,8 +1099,6 @@ public:
CanQualType HalfTy; // [OpenCL 6.1.1.1], ARM NEON
CanQualType BFloat16Ty;
CanQualType Float16Ty; // C11 extension ISO/IEC TS 18661-3
- CanQualType FloatComplexTy, DoubleComplexTy, LongDoubleComplexTy;
- CanQualType Float128ComplexTy;
CanQualType VoidPtrTy, NullPtrTy;
CanQualType DependentTy, OverloadTy, BoundMemberTy, UnknownAnyTy;
CanQualType BuiltinFnTy;
@@ -1340,6 +1368,12 @@ public:
/// Get address space for OpenCL type.
LangAS getOpenCLTypeAddrSpace(const Type *T) const;
+ /// Returns default address space based on OpenCL version and enabled features
+ inline LangAS getDefaultOpenCLPointeeAddrSpace() {
+ return LangOpts.OpenCLGenericAddressSpace ? LangAS::opencl_generic
+ : LangAS::opencl_private;
+ }
+
void setcudaConfigureCallDecl(FunctionDecl *FD) {
cudaConfigureCallDecl = FD;
}
@@ -1497,6 +1531,12 @@ private:
QualType getFunctionTypeInternal(QualType ResultTy, ArrayRef<QualType> Args,
const FunctionProtoType::ExtProtoInfo &EPI,
bool OnlyWantCanonical) const;
+ QualType
+ getAutoTypeInternal(QualType DeducedType, AutoTypeKeyword Keyword,
+ bool IsDependent, bool IsPack = false,
+ ConceptDecl *TypeConstraintConcept = nullptr,
+ ArrayRef<TemplateArgument> TypeConstraintArgs = {},
+ bool IsCanon = false) const;
public:
/// Return the unique reference to the type for the specified type
@@ -1631,6 +1671,8 @@ public:
QualType getTypeOfExprType(Expr *e) const;
QualType getTypeOfType(QualType t) const;
+ QualType getReferenceQualifiedType(const Expr *e) const;
+
/// C++11 decltype.
QualType getDecltypeType(Expr *e, QualType UnderlyingType) const;
@@ -2505,8 +2547,10 @@ public:
bool ObjCMethodsAreEqual(const ObjCMethodDecl *MethodDecl,
const ObjCMethodDecl *MethodImp);
- bool UnwrapSimilarTypes(QualType &T1, QualType &T2);
- void UnwrapSimilarArrayTypes(QualType &T1, QualType &T2);
+ bool UnwrapSimilarTypes(QualType &T1, QualType &T2,
+ bool AllowPiMismatch = true);
+ void UnwrapSimilarArrayTypes(QualType &T1, QualType &T2,
+ bool AllowPiMismatch = true);
/// Determine if two types are similar, according to the C++ rules. That is,
/// determine if they are the same other than qualifiers on the initial
@@ -3209,31 +3253,10 @@ public:
StringRef getCUIDHash() const;
- void AddSYCLKernelNamingDecl(const CXXRecordDecl *RD);
- bool IsSYCLKernelNamingDecl(const NamedDecl *RD) const;
- unsigned GetSYCLKernelNamingIndex(const NamedDecl *RD);
- /// A SourceLocation to store whether we have evaluated a kernel name already,
- /// and where it happened. If so, we need to diagnose an illegal use of the
- /// builtin.
- llvm::MapVector<const SYCLUniqueStableNameExpr *, std::string>
- SYCLUniqueStableNameEvaluatedValues;
-
private:
/// All OMPTraitInfo objects live in this collection, one per
/// `pragma omp [begin] declare variant` directive.
SmallVector<std::unique_ptr<OMPTraitInfo>, 4> OMPTraitInfoVector;
-
- /// A list of the (right now just lambda decls) declarations required to
- /// name all the SYCL kernels in the translation unit, so that we can get the
- /// correct kernel name, as well as implement
- /// __builtin_sycl_unique_stable_name.
- llvm::DenseMap<const DeclContext *,
- llvm::SmallPtrSet<const CXXRecordDecl *, 4>>
- SYCLKernelNamingTypes;
- std::unique_ptr<ItaniumMangleContext> SYCLKernelFilterContext;
- void FilterSYCLKernelNamingDecls(
- const CXXRecordDecl *RD,
- llvm::SmallVectorImpl<const CXXRecordDecl *> &Decls);
};
/// Insertion operator for diagnostics.
diff --git a/clang/include/clang/AST/ASTFwd.h b/clang/include/clang/AST/ASTFwd.h
index 649b57113424..fdbd603ce5d0 100644
--- a/clang/include/clang/AST/ASTFwd.h
+++ b/clang/include/clang/AST/ASTFwd.h
@@ -30,6 +30,9 @@ class OMPClause;
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) class Class;
#include "llvm/Frontend/OpenMP/OMP.inc"
+class Attr;
+#define ATTR(A) class A##Attr;
+#include "clang/Basic/AttrList.inc"
} // end namespace clang
diff --git a/clang/include/clang/AST/ASTImporter.h b/clang/include/clang/AST/ASTImporter.h
index 17e673a8471a..c8bdae10a6e6 100644
--- a/clang/include/clang/AST/ASTImporter.h
+++ b/clang/include/clang/AST/ASTImporter.h
@@ -379,6 +379,9 @@ class TypeSourceInfo;
return Import(const_cast<Decl *>(FromD));
}
+ llvm::Expected<InheritedConstructor>
+ Import(const InheritedConstructor &From);
+
/// Return the copy of the given declaration in the "to" context if
/// it has already been imported from the "from" context. Otherwise return
/// nullptr.
diff --git a/clang/include/clang/AST/ASTImporterSharedState.h b/clang/include/clang/AST/ASTImporterSharedState.h
index 829eb1c611c3..686a8e22b2fa 100644
--- a/clang/include/clang/AST/ASTImporterSharedState.h
+++ b/clang/include/clang/AST/ASTImporterSharedState.h
@@ -1,9 +1,8 @@
//===- ASTImporterSharedState.h - ASTImporter specific state --*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/clang/include/clang/AST/ASTStructuralEquivalence.h b/clang/include/clang/AST/ASTStructuralEquivalence.h
index c958a16aba21..0b9e9e4e605f 100644
--- a/clang/include/clang/AST/ASTStructuralEquivalence.h
+++ b/clang/include/clang/AST/ASTStructuralEquivalence.h
@@ -117,7 +117,7 @@ struct StructuralEquivalenceContext {
static llvm::Optional<unsigned>
findUntaggedStructOrUnionIndex(RecordDecl *Anon);
- // If ErrorOnTagTypeMismatch is set, return the the error, otherwise get the
+ // If ErrorOnTagTypeMismatch is set, return the error, otherwise get the
// relevant warning for the input error diagnostic.
unsigned getApplicableDiagnostic(unsigned ErrorDiagnostic);
diff --git a/clang/include/clang/AST/ASTTypeTraits.h b/clang/include/clang/AST/ASTTypeTraits.h
index 57195a9d6066..6d96146a4d45 100644
--- a/clang/include/clang/AST/ASTTypeTraits.h
+++ b/clang/include/clang/AST/ASTTypeTraits.h
@@ -17,6 +17,7 @@
#include "clang/AST/ASTFwd.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/AST/LambdaCapture.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/TypeLoc.h"
@@ -25,10 +26,8 @@
#include "llvm/Support/AlignOf.h"
namespace llvm {
-
class raw_ostream;
-
-}
+} // namespace llvm
namespace clang {
@@ -65,7 +64,10 @@ public:
static ASTNodeKind getFromNode(const Decl &D);
static ASTNodeKind getFromNode(const Stmt &S);
static ASTNodeKind getFromNode(const Type &T);
+ static ASTNodeKind getFromNode(const TypeLoc &T);
+ static ASTNodeKind getFromNode(const LambdaCapture &L);
static ASTNodeKind getFromNode(const OMPClause &C);
+ static ASTNodeKind getFromNode(const Attr &A);
/// \}
/// Returns \c true if \c this and \c Other represent the same kind.
@@ -131,9 +133,12 @@ private:
NKI_None,
NKI_TemplateArgument,
NKI_TemplateArgumentLoc,
+ NKI_LambdaCapture,
NKI_TemplateName,
NKI_NestedNameSpecifierLoc,
NKI_QualType,
+#define TYPELOC(CLASS, PARENT) NKI_##CLASS##TypeLoc,
+#include "clang/AST/TypeLocNodes.def"
NKI_TypeLoc,
NKI_LastKindWithoutPointerIdentity = NKI_TypeLoc,
NKI_CXXBaseSpecifier,
@@ -152,6 +157,9 @@ private:
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) NKI_##Class,
#include "llvm/Frontend/OpenMP/OMP.inc"
+ NKI_Attr,
+#define ATTR(A) NKI_##A##Attr,
+#include "clang/Basic/AttrList.inc"
NKI_NumberOfKinds
};
@@ -192,15 +200,19 @@ private:
KIND_TO_KIND_ID(CXXCtorInitializer)
KIND_TO_KIND_ID(TemplateArgument)
KIND_TO_KIND_ID(TemplateArgumentLoc)
+KIND_TO_KIND_ID(LambdaCapture)
KIND_TO_KIND_ID(TemplateName)
KIND_TO_KIND_ID(NestedNameSpecifier)
KIND_TO_KIND_ID(NestedNameSpecifierLoc)
KIND_TO_KIND_ID(QualType)
+#define TYPELOC(CLASS, PARENT) KIND_TO_KIND_ID(CLASS##TypeLoc)
+#include "clang/AST/TypeLocNodes.def"
KIND_TO_KIND_ID(TypeLoc)
KIND_TO_KIND_ID(Decl)
KIND_TO_KIND_ID(Stmt)
KIND_TO_KIND_ID(Type)
KIND_TO_KIND_ID(OMPClause)
+KIND_TO_KIND_ID(Attr)
KIND_TO_KIND_ID(CXXBaseSpecifier)
#define DECL(DERIVED, BASE) KIND_TO_KIND_ID(DERIVED##Decl)
#include "clang/AST/DeclNodes.inc"
@@ -211,6 +223,8 @@ KIND_TO_KIND_ID(CXXBaseSpecifier)
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) KIND_TO_KIND_ID(Class)
#include "llvm/Frontend/OpenMP/OMP.inc"
+#define ATTR(A) KIND_TO_KIND_ID(A##Attr)
+#include "clang/Basic/AttrList.inc"
#undef KIND_TO_KIND_ID
inline raw_ostream &operator<<(raw_ostream &OS, ASTNodeKind K) {
@@ -299,7 +313,7 @@ public:
return getUnchecked<QualType>().getAsOpaquePtr() <
Other.getUnchecked<QualType>().getAsOpaquePtr();
- if (ASTNodeKind::getFromNodeKind<TypeLoc>().isSame(NodeKind)) {
+ if (ASTNodeKind::getFromNodeKind<TypeLoc>().isBaseOf(NodeKind)) {
auto TLA = getUnchecked<TypeLoc>();
auto TLB = Other.getUnchecked<TypeLoc>();
return std::make_pair(TLA.getType().getAsOpaquePtr(),
@@ -331,7 +345,7 @@ public:
if (ASTNodeKind::getFromNodeKind<QualType>().isSame(NodeKind))
return getUnchecked<QualType>() == Other.getUnchecked<QualType>();
- if (ASTNodeKind::getFromNodeKind<TypeLoc>().isSame(NodeKind))
+ if (ASTNodeKind::getFromNodeKind<TypeLoc>().isBaseOf(NodeKind))
return getUnchecked<TypeLoc>() == Other.getUnchecked<TypeLoc>();
if (ASTNodeKind::getFromNodeKind<NestedNameSpecifierLoc>().isSame(NodeKind))
@@ -360,7 +374,7 @@ public:
}
static unsigned getHashValue(const DynTypedNode &Val) {
// FIXME: Add hashing support for the remaining types.
- if (ASTNodeKind::getFromNodeKind<TypeLoc>().isSame(Val.NodeKind)) {
+ if (ASTNodeKind::getFromNodeKind<TypeLoc>().isBaseOf(Val.NodeKind)) {
auto TL = Val.getUnchecked<TypeLoc>();
return llvm::hash_combine(TL.getType().getAsOpaquePtr(),
TL.getOpaqueData());
@@ -450,6 +464,29 @@ private:
}
};
+ /// Converter that stores nodes by value. It must be possible to dynamically
+ /// cast the stored node within a type hierarchy without breaking (especially
+ /// through slicing).
+ template <typename T, typename BaseT,
+ typename = std::enable_if_t<(sizeof(T) == sizeof(BaseT))>>
+ struct DynCastValueConverter {
+ static const T *get(ASTNodeKind NodeKind, const void *Storage) {
+ if (ASTNodeKind::getFromNodeKind<T>().isBaseOf(NodeKind))
+ return &getUnchecked(NodeKind, Storage);
+ return nullptr;
+ }
+ static const T &getUnchecked(ASTNodeKind NodeKind, const void *Storage) {
+ assert(ASTNodeKind::getFromNodeKind<T>().isBaseOf(NodeKind));
+ return *static_cast<const T *>(reinterpret_cast<const BaseT *>(Storage));
+ }
+ static DynTypedNode create(const T &Node) {
+ DynTypedNode Result;
+ Result.NodeKind = ASTNodeKind::getFromNode(Node);
+ new (&Result.Storage) T(Node);
+ return Result;
+ }
+ };
+
ASTNodeKind NodeKind;
/// Stores the data of the node.
@@ -486,6 +523,11 @@ struct DynTypedNode::BaseConverter<
T, std::enable_if_t<std::is_base_of<OMPClause, T>::value>>
: public DynCastPtrConverter<T, OMPClause> {};
+template <typename T>
+struct DynTypedNode::BaseConverter<
+ T, std::enable_if_t<std::is_base_of<Attr, T>::value>>
+ : public DynCastPtrConverter<T, Attr> {};
+
template <>
struct DynTypedNode::BaseConverter<
NestedNameSpecifier, void> : public PtrConverter<NestedNameSpecifier> {};
@@ -503,6 +545,10 @@ struct DynTypedNode::BaseConverter<TemplateArgumentLoc, void>
: public ValueConverter<TemplateArgumentLoc> {};
template <>
+struct DynTypedNode::BaseConverter<LambdaCapture, void>
+ : public ValueConverter<LambdaCapture> {};
+
+template <>
struct DynTypedNode::BaseConverter<
TemplateName, void> : public ValueConverter<TemplateName> {};
@@ -515,9 +561,10 @@ template <>
struct DynTypedNode::BaseConverter<QualType,
void> : public ValueConverter<QualType> {};
-template <>
+template <typename T>
struct DynTypedNode::BaseConverter<
- TypeLoc, void> : public ValueConverter<TypeLoc> {};
+ T, std::enable_if_t<std::is_base_of<TypeLoc, T>::value>>
+ : public DynCastValueConverter<T, TypeLoc> {};
template <>
struct DynTypedNode::BaseConverter<CXXBaseSpecifier, void>
diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h
index dbfecc125049..6366d6e8837e 100644
--- a/clang/include/clang/AST/Attr.h
+++ b/clang/include/clang/AST/Attr.h
@@ -109,6 +109,8 @@ public:
// Pretty print this attribute.
void printPretty(raw_ostream &OS, const PrintingPolicy &Policy) const;
+
+ static StringRef getDocumentation(attr::Kind);
};
class TypeAttr : public Attr {
@@ -372,8 +374,7 @@ struct ParsedTargetAttr {
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
const Attr *At) {
- DB.AddTaggedVal(reinterpret_cast<intptr_t>(At),
- DiagnosticsEngine::ak_attr);
+ DB.AddTaggedVal(reinterpret_cast<uint64_t>(At), DiagnosticsEngine::ak_attr);
return DB;
}
} // end namespace clang
diff --git a/clang/include/clang/AST/BuiltinTypes.def b/clang/include/clang/AST/BuiltinTypes.def
index 039765dfdfea..c04f6f6f1271 100644
--- a/clang/include/clang/AST/BuiltinTypes.def
+++ b/clang/include/clang/AST/BuiltinTypes.def
@@ -218,6 +218,9 @@ FLOATING_TYPE(BFloat16, BFloat16Ty)
// '__float128'
FLOATING_TYPE(Float128, Float128Ty)
+// '__ibm128'
+FLOATING_TYPE(Ibm128, Ibm128Ty)
+
//===- Language-specific types --------------------------------------------===//
// This is the type of C++0x 'nullptr'.
diff --git a/clang/include/clang/AST/Comment.h b/clang/include/clang/AST/Comment.h
index 54a4b0a9cfe6..4184e103206d 100644
--- a/clang/include/clang/AST/Comment.h
+++ b/clang/include/clang/AST/Comment.h
@@ -1019,8 +1019,6 @@ struct DeclInfo {
/// \li member function template,
/// \li member function template specialization,
/// \li ObjC method,
- /// \li a typedef for a function pointer, member function pointer,
- /// ObjC block.
FunctionKind,
/// Something that we consider a "class":
@@ -1030,8 +1028,8 @@ struct DeclInfo {
ClassKind,
/// Something that we consider a "variable":
- /// \li namespace scope variables;
- /// \li static and non-static class data members;
+ /// \li namespace scope variables and variable templates;
+ /// \li static and non-static class data members and member templates;
/// \li enumerators.
VariableKind,
@@ -1076,6 +1074,9 @@ struct DeclInfo {
/// Can be true only if \c IsFunctionDecl is true.
unsigned IsClassMethod : 1;
+ /// Is \c CommentDecl something we consider a "function" that's variadic.
+ unsigned IsVariadic : 1;
+
void fill();
DeclKind getKind() const LLVM_READONLY {
@@ -1085,6 +1086,8 @@ struct DeclInfo {
TemplateDeclKind getTemplateKind() const LLVM_READONLY {
return static_cast<TemplateDeclKind>(TemplateKind);
}
+
+ bool involvesFunctionType() const { return !ReturnType.isNull(); }
};
/// A full comment attached to a declaration, contains block content.
diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td
index fbbfc9f7e0b7..7e962a4b4171 100644
--- a/clang/include/clang/AST/CommentCommands.td
+++ b/clang/include/clang/AST/CommentCommands.td
@@ -87,8 +87,21 @@ def P : InlineCommand<"p">;
def A : InlineCommand<"a">;
def E : InlineCommand<"e">;
def Em : InlineCommand<"em">;
-def Ref : InlineCommand<"ref">;
-def Anchor : InlineCommand<"anchor">;
+def Emoji : InlineCommand<"emoji">;
+
+def Anchor : InlineCommand<"anchor">;
+def Ref : InlineCommand<"ref">;
+def RefItem : InlineCommand<"refitem">;
+def Cite : InlineCommand<"cite">;
+
+def CopyBrief : InlineCommand<"copybrief">;
+def CopyDetails : InlineCommand<"copydetails">;
+def CopyDoc : InlineCommand<"copydoc">;
+
+// Typically not used inline, but they take a single word.
+def Extends : InlineCommand<"extends">;
+def Implements : InlineCommand<"implements">;
+def MemberOf : InlineCommand<"memberof">;
//===----------------------------------------------------------------------===//
// BlockCommand
@@ -145,9 +158,11 @@ def Retval : BlockCommand<"retval">;
def Sa : BlockCommand<"sa">;
def See : BlockCommand<"see">;
def Since : BlockCommand<"since">;
+def Test : BlockCommand<"test">;
def Todo : BlockCommand<"todo">;
def Version : BlockCommand<"version">;
def Warning : BlockCommand<"warning">;
+def XRefItem : BlockCommand<"xrefitem">;
// HeaderDoc commands
def Abstract : BlockCommand<"abstract"> { let IsBriefCommand = 1; }
def ClassDesign : RecordLikeDetailCommand<"classdesign">;
@@ -170,6 +185,8 @@ def SuperClass : RecordLikeDetailCommand<"superclass">;
defm Code : VerbatimBlockCommand<"code", "endcode">;
defm Verbatim : VerbatimBlockCommand<"verbatim", "endverbatim">;
+
+defm DocbookOnly : VerbatimBlockCommand<"docbookonly", "enddocbookonly">;
defm Htmlonly : VerbatimBlockCommand<"htmlonly", "endhtmlonly">;
defm Latexonly : VerbatimBlockCommand<"latexonly", "endlatexonly">;
defm Xmlonly : VerbatimBlockCommand<"xmlonly", "endxmlonly">;
@@ -178,10 +195,19 @@ defm Rtfonly : VerbatimBlockCommand<"rtfonly", "endrtfonly">;
defm Dot : VerbatimBlockCommand<"dot", "enddot">;
defm Msc : VerbatimBlockCommand<"msc", "endmsc">;
+defm Uml : VerbatimBlockCommand<"startuml", "enduml">;
+
+// Actually not verbatim blocks, we should also parse commands within them.
+defm Internal : VerbatimBlockCommand<"internal", "endinternal">;
+// TODO: conflicts with HeaderDoc link, /link.
+//defm Link : VerbatimBlockCommand<"link", "endlink">;
+defm ParBlock : VerbatimBlockCommand<"parblock", "endparblock">;
+defm SecRefList : VerbatimBlockCommand<"secreflist", "endsecreflist">;
// These three commands have special support in CommentLexer to recognize their
// names.
def FDollar : VerbatimBlockCommand<"f$">; // Inline LaTeX formula
+defm FParen : VerbatimBlockCommand<"f(", "f)">; // Inline LaTeX text
defm FBracket : VerbatimBlockCommand<"f[", "f]">; // Displayed LaTeX formula
defm FBrace : VerbatimBlockCommand<"f{", "f}">; // LaTeX environment
@@ -199,11 +225,18 @@ def Addtogroup : VerbatimLineCommand<"addtogroup">;
def Weakgroup : VerbatimLineCommand<"weakgroup">;
def Name : VerbatimLineCommand<"name">;
+// These actually take a single word, but it's optional.
+// And they're used on a separate line typically, not inline.
+def Dir : VerbatimLineCommand<"dir">;
+def File : VerbatimLineCommand<"file">;
+
def Section : VerbatimLineCommand<"section">;
def Subsection : VerbatimLineCommand<"subsection">;
def Subsubsection : VerbatimLineCommand<"subsubsection">;
def Paragraph : VerbatimLineCommand<"paragraph">;
+def TableOfContents : VerbatimLineCommand<"tableofcontents">;
+def Page : VerbatimLineCommand<"page">;
def Mainpage : VerbatimLineCommand<"mainpage">;
def Subpage : VerbatimLineCommand<"subpage">;
@@ -212,13 +245,79 @@ def Related : VerbatimLineCommand<"related">;
def RelatesAlso : VerbatimLineCommand<"relatesalso">;
def RelatedAlso : VerbatimLineCommand<"relatedalso">;
+def AddIndex : VerbatimLineCommand<"addindex">;
+
+// These take a single argument mostly, but since they include a file they'll
+// typically be on their own line.
+def DocbookInclude : VerbatimLineCommand<"docbookinclude">;
+def DontInclude : VerbatimLineCommand<"dontinclude">;
+def Example : VerbatimLineCommand<"example">;
+def HtmlInclude : VerbatimLineCommand<"htmlinclude">;
+def Include : VerbatimLineCommand<"include">;
+def ManInclude : VerbatimLineCommand<"maninclude">;
+def LatexInclude : VerbatimLineCommand<"latexinclude">;
+def RtfInclude : VerbatimLineCommand<"rtfinclude">;
+def Snippet : VerbatimLineCommand<"snippet">;
+def VerbInclude : VerbatimLineCommand<"verbinclude">;
+def XmlInclude : VerbatimLineCommand<"xmlinclude">;
+
+def Image : VerbatimLineCommand<"image">;
+def DotFile : VerbatimLineCommand<"dotfile">;
+def MscFile : VerbatimLineCommand<"mscfile">;
+def DiaFile : VerbatimLineCommand<"diafile">;
+
+def Line : VerbatimLineCommand<"line">;
+def Skip : VerbatimLineCommand<"skip">;
+def SkipLine : VerbatimLineCommand<"skipline">;
+def Until : VerbatimLineCommand<"until">;
+
+def NoOp : VerbatimLineCommand<"noop">;
+
+// These have actually no arguments, but we can treat them as line commands.
+def CallGraph : VerbatimLineCommand<"callgraph">;
+def HideCallGraph : VerbatimLineCommand<"hidecallgraph">;
+def CallerGraph : VerbatimLineCommand<"callergraph">;
+def HideCallerGraph : VerbatimLineCommand<"hidecallergraph">;
+def ShowInitializer : VerbatimLineCommand<"showinitializer">;
+def HideInitializer : VerbatimLineCommand<"hideinitializer">;
+def ShowRefBy : VerbatimLineCommand<"showrefby">;
+def HideRefBy : VerbatimLineCommand<"hiderefby">;
+def ShowRefs : VerbatimLineCommand<"showrefs">;
+def HideRefs : VerbatimLineCommand<"hiderefs">;
+
+// These also have no argument.
+def Private : VerbatimLineCommand<"private">;
+def Protected : VerbatimLineCommand<"protected">;
+def Public : VerbatimLineCommand<"public">;
+def Pure : VerbatimLineCommand<"pure">;
+def Static : VerbatimLineCommand<"static">;
+
+// These also have no argument.
+def NoSubgrouping : VerbatimLineCommand<"nosubgrouping">;
+def PrivateSection : VerbatimLineCommand<"privatesection">;
+def ProtectedSection : VerbatimLineCommand<"protectedsection">;
+def PublicSection : VerbatimLineCommand<"publicsection">;
+
+// We might also build proper support for if/ifnot/else/elseif/endif.
+def If : VerbatimLineCommand<"if">;
+def IfNot : VerbatimLineCommand<"ifnot">;
+def Else : VerbatimLineCommand<"else">;
+def ElseIf : VerbatimLineCommand<"elseif">;
+def Endif : VerbatimLineCommand<"endif">;
+
+// Not treated as VerbatimBlockCommand because it spans multiple comments.
+def Cond : VerbatimLineCommand<"cond">;
+def EndCond : VerbatimLineCommand<"endcond">;
+
//===----------------------------------------------------------------------===//
// DeclarationVerbatimLineCommand
//===----------------------------------------------------------------------===//
// Doxygen commands.
+def Concept : DeclarationVerbatimLineCommand<"concept">;
def Def : DeclarationVerbatimLineCommand<"def">;
def Fn : DeclarationVerbatimLineCommand<"fn">;
+def IDLExcept : DeclarationVerbatimLineCommand<"idlexcept">;
def Namespace : DeclarationVerbatimLineCommand<"namespace">;
def Overload : DeclarationVerbatimLineCommand<"overload">;
def Property : DeclarationVerbatimLineCommand<"property">;
diff --git a/clang/include/clang/AST/CommentHTMLTags.td b/clang/include/clang/AST/CommentHTMLTags.td
index 251490094940..a1ce8c6da96c 100644
--- a/clang/include/clang/AST/CommentHTMLTags.td
+++ b/clang/include/clang/AST/CommentHTMLTags.td
@@ -52,11 +52,11 @@ def Tr : Tag<"tr"> { let EndTagOptional = 1; }
def Th : Tag<"th"> { let EndTagOptional = 1; }
def Td : Tag<"td"> { let EndTagOptional = 1; }
-// Define a blacklist of attributes that are not safe to pass through to HTML
+// Define a list of attributes that are not safe to pass through to HTML
// output if the input is untrusted.
//
-// FIXME: this should be a whitelist. When changing this to a whitelist, don't
-// forget to change the default in the TableGen backend.
+// FIXME: This should be a list of attributes that _are_ safe. When changing
+// this change, don't forget to change the default in the TableGen backend.
class Attribute<string spelling> {
string Spelling = spelling;
bit IsSafeToPassThrough = 1;
diff --git a/clang/include/clang/AST/CommentSema.h b/clang/include/clang/AST/CommentSema.h
index 6dfe0f4920d0..015ce8f8652a 100644
--- a/clang/include/clang/AST/CommentSema.h
+++ b/clang/include/clang/AST/CommentSema.h
@@ -181,6 +181,7 @@ public:
FullComment *actOnFullComment(ArrayRef<BlockContentComment *> Blocks);
+private:
void checkBlockCommandEmptyParagraph(BlockCommandComment *Command);
void checkReturnsCommand(const BlockCommandComment *Command);
@@ -201,16 +202,16 @@ public:
/// Emit diagnostics about unknown parametrs.
void resolveParamCommandIndexes(const FullComment *FC);
+ /// \returns \c true if the declaration that this comment is attached to
+ /// is a pointer to function/method/block type or has such a type.
+ bool involvesFunctionType();
+
bool isFunctionDecl();
bool isAnyFunctionDecl();
/// \returns \c true if declaration that this comment is attached to declares
/// a function pointer.
bool isFunctionPointerVarDecl();
- /// \returns \c true if the declaration that this comment is attached to
- /// declares a variable or a field whose type is a function or a block
- /// pointer.
- bool isFunctionOrBlockPointerVarLikeDecl();
bool isFunctionOrMethodVariadic();
bool isObjCMethodDecl();
bool isObjCPropertyDecl();
diff --git a/clang/include/clang/AST/ComparisonCategories.h b/clang/include/clang/AST/ComparisonCategories.h
index b41e934142ee..7b73b582fe2a 100644
--- a/clang/include/clang/AST/ComparisonCategories.h
+++ b/clang/include/clang/AST/ComparisonCategories.h
@@ -115,8 +115,7 @@ private:
public:
/// The declaration for the comparison category type from the
/// standard library.
- // FIXME: Make this const
- CXXRecordDecl *Record = nullptr;
+ const CXXRecordDecl *Record = nullptr;
/// The Kind of the comparison category type
ComparisonCategoryType Kind;
@@ -146,7 +145,7 @@ public:
return Kind == CCK::PartialOrdering;
}
- /// Converts the specified result kind into the the correct result kind
+ /// Converts the specified result kind into the correct result kind
/// for this category. Specifically it lowers strong equality results to
/// weak equivalence if needed.
ComparisonCategoryResult makeWeakResult(ComparisonCategoryResult Res) const {
diff --git a/clang/include/clang/AST/CurrentSourceLocExprScope.h b/clang/include/clang/AST/CurrentSourceLocExprScope.h
index 4ebbdf63abb5..34df8ce1309e 100644
--- a/clang/include/clang/AST/CurrentSourceLocExprScope.h
+++ b/clang/include/clang/AST/CurrentSourceLocExprScope.h
@@ -1,9 +1,8 @@
//===--- CurrentSourceLocExprScope.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 510bf8978985..85a3a8ab6970 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -614,7 +614,9 @@ public:
if (!isInline())
return false;
auto X = lookup(Name);
- auto Y = getParent()->lookup(Name);
+ // We should not perform a lookup within a transparent context, so find a
+ // non-transparent parent context.
+ auto Y = getParent()->getNonTransparentContext()->lookup(Name);
return std::distance(X.begin(), X.end()) ==
std::distance(Y.begin(), Y.end());
}
@@ -1990,8 +1992,8 @@ private:
protected:
FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T,
- TypeSourceInfo *TInfo, StorageClass S, bool isInlineSpecified,
- ConstexprSpecKind ConstexprKind,
+ TypeSourceInfo *TInfo, StorageClass S, bool UsesFPIntrin,
+ bool isInlineSpecified, ConstexprSpecKind ConstexprKind,
Expr *TrailingRequiresClause = nullptr);
using redeclarable_base = Redeclarable<FunctionDecl>;
@@ -2025,23 +2027,23 @@ public:
static FunctionDecl *
Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
SourceLocation NLoc, DeclarationName N, QualType T,
- TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified = false,
- bool hasWrittenPrototype = true,
+ TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin = false,
+ bool isInlineSpecified = false, bool hasWrittenPrototype = true,
ConstexprSpecKind ConstexprKind = ConstexprSpecKind::Unspecified,
Expr *TrailingRequiresClause = nullptr) {
DeclarationNameInfo NameInfo(N, NLoc);
return FunctionDecl::Create(C, DC, StartLoc, NameInfo, T, TInfo, SC,
- isInlineSpecified, hasWrittenPrototype,
- ConstexprKind, TrailingRequiresClause);
+ UsesFPIntrin, isInlineSpecified,
+ hasWrittenPrototype, ConstexprKind,
+ TrailingRequiresClause);
}
- static FunctionDecl *Create(ASTContext &C, DeclContext *DC,
- SourceLocation StartLoc,
- const DeclarationNameInfo &NameInfo, QualType T,
- TypeSourceInfo *TInfo, StorageClass SC,
- bool isInlineSpecified, bool hasWrittenPrototype,
- ConstexprSpecKind ConstexprKind,
- Expr *TrailingRequiresClause);
+ static FunctionDecl *
+ Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
+ const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
+ StorageClass SC, bool UsesFPIntrin, bool isInlineSpecified,
+ bool hasWrittenPrototype, ConstexprSpecKind ConstexprKind,
+ Expr *TrailingRequiresClause);
static FunctionDecl *CreateDeserialized(ASTContext &C, unsigned ID);
@@ -2594,6 +2596,14 @@ public:
FunctionDeclBits.IsInline = I;
}
+ /// Determine whether the function was declared in source context
+ /// that requires constrained FP intrinsics
+ bool UsesFPIntrin() const { return FunctionDeclBits.UsesFPIntrin; }
+
+ /// Set whether the function was declared in source context
+ /// that requires constrained FP intrinsics
+ void setUsesFPIntrin(bool I) { FunctionDeclBits.UsesFPIntrin = I; }
+
/// Flag that this function is implicitly inline.
void setImplicitlyInline(bool I = true) { FunctionDeclBits.IsInline = I; }
@@ -3691,6 +3701,10 @@ public:
bool IsFixed);
static EnumDecl *CreateDeserialized(ASTContext &C, unsigned ID);
+ /// Overrides to provide correct range when there's an enum-base specifier
+ /// with forward declarations.
+ SourceRange getSourceRange() const override LLVM_READONLY;
+
/// When created, the EnumDecl corresponds to a
/// forward-declared enum. This method is used to mark the
/// declaration as being defined; its enumerators have already been
@@ -4579,7 +4593,7 @@ public:
/// into a diagnostic with <<.
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &PD,
const NamedDecl *ND) {
- PD.AddTaggedVal(reinterpret_cast<intptr_t>(ND),
+ PD.AddTaggedVal(reinterpret_cast<uint64_t>(ND),
DiagnosticsEngine::ak_nameddecl);
return PD;
}
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 482d2889a25a..18468c8ca1c4 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1997,6 +1997,12 @@ public:
return const_cast<DeclContext*>(this)->getNonClosureAncestor();
}
+ // Retrieve the nearest context that is not a transparent context.
+ DeclContext *getNonTransparentContext();
+ const DeclContext *getNonTransparentContext() const {
+ return const_cast<DeclContext *>(this)->getNonTransparentContext();
+ }
+
/// getPrimaryContext - There may be many different
/// declarations of the same entity (including forward declarations
/// of classes, multiple definitions of namespaces, etc.), each with
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 0d5ad40fc19e..cc7bfc86a521 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -1857,7 +1857,7 @@ private:
TypeSourceInfo *TInfo, SourceLocation EndLocation,
CXXConstructorDecl *Ctor)
: FunctionDecl(CXXDeductionGuide, C, DC, StartLoc, NameInfo, T, TInfo,
- SC_None, false, ConstexprSpecKind::Unspecified),
+ SC_None, false, false, ConstexprSpecKind::Unspecified),
Ctor(Ctor), ExplicitSpec(ES) {
if (EndLocation.isValid())
setRangeEnd(EndLocation);
@@ -1952,23 +1952,22 @@ protected:
CXXMethodDecl(Kind DK, ASTContext &C, CXXRecordDecl *RD,
SourceLocation StartLoc, const DeclarationNameInfo &NameInfo,
QualType T, TypeSourceInfo *TInfo, StorageClass SC,
- bool isInline, ConstexprSpecKind ConstexprKind,
- SourceLocation EndLocation,
+ bool UsesFPIntrin, bool isInline,
+ ConstexprSpecKind ConstexprKind, SourceLocation EndLocation,
Expr *TrailingRequiresClause = nullptr)
- : FunctionDecl(DK, C, RD, StartLoc, NameInfo, T, TInfo, SC, isInline,
- ConstexprKind, TrailingRequiresClause) {
+ : FunctionDecl(DK, C, RD, StartLoc, NameInfo, T, TInfo, SC, UsesFPIntrin,
+ isInline, ConstexprKind, TrailingRequiresClause) {
if (EndLocation.isValid())
setRangeEnd(EndLocation);
}
public:
- static CXXMethodDecl *Create(ASTContext &C, CXXRecordDecl *RD,
- SourceLocation StartLoc,
- const DeclarationNameInfo &NameInfo, QualType T,
- TypeSourceInfo *TInfo, StorageClass SC,
- bool isInline, ConstexprSpecKind ConstexprKind,
- SourceLocation EndLocation,
- Expr *TrailingRequiresClause = nullptr);
+ static CXXMethodDecl *
+ Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
+ const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
+ StorageClass SC, bool UsesFPIntrin, bool isInline,
+ ConstexprSpecKind ConstexprKind, SourceLocation EndLocation,
+ Expr *TrailingRequiresClause = nullptr);
static CXXMethodDecl *CreateDeserialized(ASTContext &C, unsigned ID);
@@ -2413,7 +2412,8 @@ class CXXConstructorDecl final
CXXConstructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T,
- TypeSourceInfo *TInfo, ExplicitSpecifier ES, bool isInline,
+ TypeSourceInfo *TInfo, ExplicitSpecifier ES,
+ bool UsesFPIntrin, bool isInline,
bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind,
InheritedConstructor Inherited,
Expr *TrailingRequiresClause);
@@ -2456,8 +2456,8 @@ public:
static CXXConstructorDecl *
Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
- ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared,
- ConstexprSpecKind ConstexprKind,
+ ExplicitSpecifier ES, bool UsesFPIntrin, bool isInline,
+ bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind,
InheritedConstructor Inherited = InheritedConstructor(),
Expr *TrailingRequiresClause = nullptr);
@@ -2676,25 +2676,24 @@ class CXXDestructorDecl : public CXXMethodDecl {
CXXDestructorDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T,
- TypeSourceInfo *TInfo, bool isInline,
+ TypeSourceInfo *TInfo, bool UsesFPIntrin, bool isInline,
bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind,
Expr *TrailingRequiresClause = nullptr)
: CXXMethodDecl(CXXDestructor, C, RD, StartLoc, NameInfo, T, TInfo,
- SC_None, isInline, ConstexprKind, SourceLocation(),
- TrailingRequiresClause) {
+ SC_None, UsesFPIntrin, isInline, ConstexprKind,
+ SourceLocation(), TrailingRequiresClause) {
setImplicit(isImplicitlyDeclared);
}
void anchor() override;
public:
- static CXXDestructorDecl *Create(ASTContext &C, CXXRecordDecl *RD,
- SourceLocation StartLoc,
- const DeclarationNameInfo &NameInfo,
- QualType T, TypeSourceInfo *TInfo,
- bool isInline, bool isImplicitlyDeclared,
- ConstexprSpecKind ConstexprKind,
- Expr *TrailingRequiresClause = nullptr);
+ static CXXDestructorDecl *
+ Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
+ const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
+ bool UsesFPIntrin, bool isInline, bool isImplicitlyDeclared,
+ ConstexprSpecKind ConstexprKind,
+ Expr *TrailingRequiresClause = nullptr);
static CXXDestructorDecl *CreateDeserialized(ASTContext & C, unsigned ID);
void setOperatorDelete(FunctionDecl *OD, Expr *ThisArg);
@@ -2732,12 +2731,13 @@ public:
class CXXConversionDecl : public CXXMethodDecl {
CXXConversionDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T,
- TypeSourceInfo *TInfo, bool isInline, ExplicitSpecifier ES,
- ConstexprSpecKind ConstexprKind, SourceLocation EndLocation,
+ TypeSourceInfo *TInfo, bool UsesFPIntrin, bool isInline,
+ ExplicitSpecifier ES, ConstexprSpecKind ConstexprKind,
+ SourceLocation EndLocation,
Expr *TrailingRequiresClause = nullptr)
: CXXMethodDecl(CXXConversion, C, RD, StartLoc, NameInfo, T, TInfo,
- SC_None, isInline, ConstexprKind, EndLocation,
- TrailingRequiresClause),
+ SC_None, UsesFPIntrin, isInline, ConstexprKind,
+ EndLocation, TrailingRequiresClause),
ExplicitSpec(ES) {}
void anchor() override;
@@ -2750,8 +2750,9 @@ public:
static CXXConversionDecl *
Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
- bool isInline, ExplicitSpecifier ES, ConstexprSpecKind ConstexprKind,
- SourceLocation EndLocation, Expr *TrailingRequiresClause = nullptr);
+ bool UsesFPIntrin, bool isInline, ExplicitSpecifier ES,
+ ConstexprSpecKind ConstexprKind, SourceLocation EndLocation,
+ Expr *TrailingRequiresClause = nullptr);
static CXXConversionDecl *CreateDeserialized(ASTContext &C, unsigned ID);
ExplicitSpecifier getExplicitSpecifier() {
diff --git a/clang/include/clang/AST/DeclContextInternals.h b/clang/include/clang/AST/DeclContextInternals.h
index 2eef2343b750..9899fc29b82d 100644
--- a/clang/include/clang/AST/DeclContextInternals.h
+++ b/clang/include/clang/AST/DeclContextInternals.h
@@ -78,8 +78,7 @@ class StoredDeclsList {
}
Data.setPointer(NewHead);
- assert(llvm::find_if(getLookupResult(), ShouldErase) ==
- getLookupResult().end() && "Still exists!");
+ assert(llvm::none_of(getLookupResult(), ShouldErase) && "Still exists!");
}
void erase(NamedDecl *ND) {
diff --git a/clang/include/clang/AST/DeclObjC.h b/clang/include/clang/AST/DeclObjC.h
index 6bb9cdf67034..79ec1d6e5c3c 100644
--- a/clang/include/clang/AST/DeclObjC.h
+++ b/clang/include/clang/AST/DeclObjC.h
@@ -487,6 +487,9 @@ public:
/// True if the method is tagged as objc_direct
bool isDirectMethod() const;
+ /// True if the method has a parameter that's destroyed in the callee.
+ bool hasParamDestroyedInCallee() const;
+
/// Returns the property associated with this method's selector.
///
/// Note that even if this particular method is not marked as a property
@@ -1955,6 +1958,13 @@ public:
const ObjCIvarDecl *getNextIvar() const { return NextIvar; }
void setNextIvar(ObjCIvarDecl *ivar) { NextIvar = ivar; }
+ ObjCIvarDecl *getCanonicalDecl() override {
+ return cast<ObjCIvarDecl>(FieldDecl::getCanonicalDecl());
+ }
+ const ObjCIvarDecl *getCanonicalDecl() const {
+ return const_cast<ObjCIvarDecl *>(this)->getCanonicalDecl();
+ }
+
void setAccessControl(AccessControl ac) { DeclAccess = ac; }
AccessControl getAccessControl() const { return AccessControl(DeclAccess); }
diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h
index cbaa287f225a..d33babef958e 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -203,7 +203,8 @@ public:
void print(raw_ostream &Out, const ASTContext &Context,
const PrintingPolicy &Policy, bool OmitTemplateKW = false) const;
- static bool shouldIncludeTypeForArgument(const TemplateParameterList *TPL,
+ static bool shouldIncludeTypeForArgument(const PrintingPolicy &Policy,
+ const TemplateParameterList *TPL,
unsigned Idx);
};
@@ -729,6 +730,10 @@ public:
/// Returns the number of explicit template arguments that were given.
unsigned getNumTemplateArgs() const { return NumArgs; }
+ llvm::ArrayRef<TemplateArgumentLoc> arguments() const {
+ return llvm::makeArrayRef(getTemplateArgs(), getNumTemplateArgs());
+ }
+
/// Returns the nth template argument.
const TemplateArgumentLoc &getTemplateArg(unsigned I) const {
assert(I < getNumTemplateArgs() && "template arg index out of range");
@@ -1189,7 +1194,7 @@ class TemplateTypeParmDecl final : public TypeDecl,
/// Whether the type constraint has been initialized. This can be false if the
/// constraint was not initialized yet or if there was an error forming the
- /// type constriant.
+ /// type constraint.
bool TypeConstraintInitialized : 1;
/// Whether this non-type template parameter is an "expanded"
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 06164411cc2d..991abef73363 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -740,6 +740,12 @@ public:
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
unsigned Type) const;
+ /// If the current Expr is a pointer, this will try to statically
+ /// determine the strlen of the string pointed to.
+ /// Returns true if all of the above holds and we were able to figure out the
+ /// strlen, false otherwise.
+ bool tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const;
+
/// Enumeration used to describe the kind of Null pointer constant
/// returned from \c isNullPointerConstant().
enum NullPointerConstantKind {
diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index a96e21993e20..5638df42a1c5 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -1,9 +1,8 @@
//===--- JSONNodeDumper.h - Printing of AST nodes to JSON -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/clang/include/clang/AST/LambdaCapture.h b/clang/include/clang/AST/LambdaCapture.h
index 8e2806545dd6..7ad1e2361e42 100644
--- a/clang/include/clang/AST/LambdaCapture.h
+++ b/clang/include/clang/AST/LambdaCapture.h
@@ -86,7 +86,7 @@ public:
/// Determine whether this capture handles a variable.
bool capturesVariable() const {
- return dyn_cast_or_null<VarDecl>(DeclAndBits.getPointer());
+ return isa_and_nonnull<VarDecl>(DeclAndBits.getPointer());
}
/// Determine whether this captures a variable length array bound
diff --git a/clang/include/clang/AST/NestedNameSpecifier.h b/clang/include/clang/AST/NestedNameSpecifier.h
index 8bc3e25c0f4b..eb01780598a7 100644
--- a/clang/include/clang/AST/NestedNameSpecifier.h
+++ b/clang/include/clang/AST/NestedNameSpecifier.h
@@ -521,7 +521,7 @@ public:
/// NestedNameSpecifiers into a diagnostic with <<.
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
NestedNameSpecifier *NNS) {
- DB.AddTaggedVal(reinterpret_cast<intptr_t>(NNS),
+ DB.AddTaggedVal(reinterpret_cast<uint64_t>(NNS),
DiagnosticsEngine::ak_nestednamespec);
return DB;
}
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index aaddcfa307da..565eb0c9cf99 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -322,6 +322,81 @@ public:
}
};
+/// This represents the 'align' clause in the '#pragma omp allocate'
+/// directive.
+///
+/// \code
+/// #pragma omp allocate(a) allocator(omp_default_mem_alloc) align(8)
+/// \endcode
+/// In this example directive '#pragma omp allocate' has simple 'allocator'
+/// clause with the allocator 'omp_default_mem_alloc' and align clause with
+/// value of 8.
+class OMPAlignClause final : public OMPClause {
+ friend class OMPClauseReader;
+
+ /// Location of '('.
+ SourceLocation LParenLoc;
+
+ /// Alignment specified with align clause.
+ Stmt *Alignment = nullptr;
+
+ /// Set alignment value.
+ void setAlignment(Expr *A) { Alignment = A; }
+
+ /// Sets the location of '('.
+ void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+ /// Build 'align' clause with the given alignment
+ ///
+ /// \param A Alignment value.
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param EndLoc Ending location of the clause.
+ OMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation EndLoc)
+ : OMPClause(llvm::omp::OMPC_align, StartLoc, EndLoc),
+ LParenLoc(LParenLoc), Alignment(A) {}
+
+ /// Build an empty clause.
+ OMPAlignClause()
+ : OMPClause(llvm::omp::OMPC_align, SourceLocation(), SourceLocation()) {}
+
+public:
+ /// Build 'align' clause with the given alignment
+ ///
+ /// \param A Alignment value.
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param EndLoc Ending location of the clause.
+ static OMPAlignClause *Create(const ASTContext &C, Expr *A,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc);
+
+ /// Returns the location of '('.
+ SourceLocation getLParenLoc() const { return LParenLoc; }
+
+ /// Returns alignment
+ Expr *getAlignment() const { return cast_or_null<Expr>(Alignment); }
+
+ child_range children() { return child_range(&Alignment, &Alignment + 1); }
+
+ const_child_range children() const {
+ return const_child_range(&Alignment, &Alignment + 1);
+ }
+
+ child_range used_children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+ const_child_range used_children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ static bool classof(const OMPClause *T) {
+ return T->getClauseKind() == llvm::omp::OMPC_align;
+ }
+};
+
/// This represents clause 'allocate' in the '#pragma omp ...' directives.
///
/// \code
@@ -2005,13 +2080,13 @@ class OMPUpdateClause final
return IsExtended ? 2 : 0;
}
- /// Sets the the location of '(' in clause for 'depobj' directive.
+ /// Sets the location of '(' in clause for 'depobj' directive.
void setLParenLoc(SourceLocation Loc) {
assert(IsExtended && "Expected extended clause.");
*getTrailingObjects<SourceLocation>() = Loc;
}
- /// Sets the the location of '(' in clause for 'depobj' directive.
+ /// Sets the location of '(' in clause for 'depobj' directive.
void setArgumentLoc(SourceLocation Loc) {
assert(IsExtended && "Expected extended clause.");
*std::next(getTrailingObjects<SourceLocation>(), 1) = Loc;
@@ -2085,13 +2160,13 @@ public:
return const_child_range(const_child_iterator(), const_child_iterator());
}
- /// Gets the the location of '(' in clause for 'depobj' directive.
+ /// Gets the location of '(' in clause for 'depobj' directive.
SourceLocation getLParenLoc() const {
assert(IsExtended && "Expected extended clause.");
return *getTrailingObjects<SourceLocation>();
}
- /// Gets the the location of argument in clause for 'depobj' directive.
+ /// Gets the location of argument in clause for 'depobj' directive.
SourceLocation getArgumentLoc() const {
assert(IsExtended && "Expected extended clause.");
return *std::next(getTrailingObjects<SourceLocation>(), 1);
@@ -5606,7 +5681,8 @@ private:
/// Map-type-modifiers for the 'map' clause.
OpenMPMapModifierKind MapTypeModifiers[NumberOfOMPMapClauseModifiers] = {
OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown,
- OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown};
+ OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown,
+ OMPC_MAP_MODIFIER_unknown};
/// Location of map-type-modifiers for the 'map' clause.
SourceLocation MapTypeModifiersLoc[NumberOfOMPMapClauseModifiers];
@@ -8404,6 +8480,96 @@ public:
}
};
+/// This represents 'bind' clause in the '#pragma omp ...' directives.
+///
+/// \code
+/// #pragma omp loop bind(parallel)
+/// \endcode
+class OMPBindClause final : public OMPClause {
+ friend class OMPClauseReader;
+
+ /// Location of '('.
+ SourceLocation LParenLoc;
+
+ /// The binding kind of 'bind' clause.
+ OpenMPBindClauseKind Kind = OMPC_BIND_unknown;
+
+ /// Start location of the kind in source code.
+ SourceLocation KindLoc;
+
+ /// Sets the location of '('.
+ void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+ /// Set the binding kind.
+ void setBindKind(OpenMPBindClauseKind K) { Kind = K; }
+
+ /// Set the binding kind location.
+ void setBindKindLoc(SourceLocation KLoc) { KindLoc = KLoc; }
+
+ /// Build 'bind' clause with kind \a K ('teams', 'parallel', or 'thread').
+ ///
+ /// \param K Binding kind of the clause ('teams', 'parallel' or 'thread').
+ /// \param KLoc Starting location of the binding kind.
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param EndLoc Ending location of the clause.
+ OMPBindClause(OpenMPBindClauseKind K, SourceLocation KLoc,
+ SourceLocation StartLoc, SourceLocation LParenLoc,
+ SourceLocation EndLoc)
+ : OMPClause(llvm::omp::OMPC_bind, StartLoc, EndLoc), LParenLoc(LParenLoc),
+ Kind(K), KindLoc(KLoc) {}
+
+ /// Build an empty clause.
+ OMPBindClause()
+ : OMPClause(llvm::omp::OMPC_bind, SourceLocation(), SourceLocation()) {}
+
+public:
+ /// Build 'bind' clause with kind \a K ('teams', 'parallel', or 'thread').
+ ///
+ /// \param C AST context
+ /// \param K Binding kind of the clause ('teams', 'parallel' or 'thread').
+ /// \param KLoc Starting location of the binding kind.
+ /// \param StartLoc Starting location of the clause.
+ /// \param LParenLoc Location of '('.
+ /// \param EndLoc Ending location of the clause.
+ static OMPBindClause *Create(const ASTContext &C, OpenMPBindClauseKind K,
+ SourceLocation KLoc, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc);
+
+ /// Build an empty 'bind' clause.
+ ///
+ /// \param C AST context
+ static OMPBindClause *CreateEmpty(const ASTContext &C);
+
+ /// Returns the location of '('.
+ SourceLocation getLParenLoc() const { return LParenLoc; }
+
+ /// Returns kind of the clause.
+ OpenMPBindClauseKind getBindKind() const { return Kind; }
+
+ /// Returns location of clause kind.
+ SourceLocation getBindKindLoc() const { return KindLoc; }
+
+ child_range children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+
+ const_child_range children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ child_range used_children() {
+ return child_range(child_iterator(), child_iterator());
+ }
+ const_child_range used_children() const {
+ return const_child_range(const_child_iterator(), const_child_iterator());
+ }
+
+ static bool classof(const OMPClause *T) {
+ return T->getClauseKind() == llvm::omp::OMPC_bind;
+ }
+};
+
/// This class implements a simple visitor for OMPClause
/// subclasses.
template<class ImplClass, template <typename> class Ptr, typename RetTy>
@@ -8546,10 +8712,11 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const OMPTraitInfo *TI);
/// Clang specific specialization of the OMPContext to lookup target features.
struct TargetOMPContext final : public llvm::omp::OMPContext {
-
TargetOMPContext(ASTContext &ASTCtx,
std::function<void(StringRef)> &&DiagUnknownTrait,
- const FunctionDecl *CurrentFunctionDecl);
+ const FunctionDecl *CurrentFunctionDecl,
+ ArrayRef<llvm::omp::TraitProperty> ConstructTraits);
+
virtual ~TargetOMPContext() = default;
/// See llvm::omp::OMPContext::matchesISATrait
diff --git a/clang/include/clang/AST/PrettyPrinter.h b/clang/include/clang/AST/PrettyPrinter.h
index 3baf2b2ba94d..f6816e938f2a 100644
--- a/clang/include/clang/AST/PrettyPrinter.h
+++ b/clang/include/clang/AST/PrettyPrinter.h
@@ -74,7 +74,8 @@ struct PrintingPolicy {
MSWChar(LO.MicrosoftExt && !LO.WChar), IncludeNewlines(true),
MSVCFormatting(false), ConstantsAsWritten(false),
SuppressImplicitBase(false), FullyQualifiedName(false),
- PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true) {}
+ PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true),
+ UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false) {}
/// Adjust this printing policy for cases where it's known that we're
/// printing C++ code (for instance, if AST dumping reaches a C++-only
@@ -273,6 +274,14 @@ struct PrintingPolicy {
/// invalid C++ code.
unsigned PrintInjectedClassNameWithArguments : 1;
+ /// Whether to use C++ template preferred_name attributes when printing
+ /// templates.
+ unsigned UsePreferredNames : 1;
+
+ /// Whether to use type suffixes (eg: 1U) on integral non-type template
+ /// parameters.
+ unsigned AlwaysIncludeTypeForTemplateArgument : 1;
+
/// Callbacks to use to allow the behavior of printing to be customized.
const PrintingCallbacks *Callbacks = nullptr;
};
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 9bfa5b9c2326..9797eac53dde 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1681,10 +1681,7 @@ bool RecursiveASTVisitor<Derived>::TraverseTemplateInstantiations(
ClassTemplateDecl *D) {
for (auto *SD : D->specializations()) {
for (auto *RD : SD->redecls()) {
- // We don't want to visit injected-class-names in this traversal.
- if (cast<CXXRecordDecl>(RD)->isInjectedClassName())
- continue;
-
+ assert(!cast<CXXRecordDecl>(RD)->isInjectedClassName());
switch (
cast<ClassTemplateSpecializationDecl>(RD)->getSpecializationKind()) {
// Visit the implicit instantiations with the requested pattern.
@@ -1863,10 +1860,9 @@ DEF_TRAVERSE_DECL(UnresolvedUsingIfExistsDecl, {})
DEF_TRAVERSE_DECL(EnumDecl, {
TRY_TO(TraverseDeclTemplateParameterLists(D));
- if (D->getTypeForDecl())
- TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0)));
-
TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
+ if (auto *TSI = D->getIntegerTypeSourceInfo())
+ TRY_TO(TraverseTypeLoc(TSI->getTypeLoc()));
// The enumerators are already traversed by
// decls_begin()/decls_end().
})
@@ -2842,6 +2838,9 @@ RecursiveASTVisitor<Derived>::TraverseOMPLoopDirective(OMPLoopDirective *S) {
return TraverseOMPExecutableDirective(S);
}
+DEF_TRAVERSE_STMT(OMPMetaDirective,
+ { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
DEF_TRAVERSE_STMT(OMPParallelDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
@@ -3021,6 +3020,9 @@ DEF_TRAVERSE_STMT(OMPDispatchDirective,
DEF_TRAVERSE_STMT(OMPMaskedDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
+DEF_TRAVERSE_STMT(OMPGenericLoopDirective,
+ { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
// OpenMP clauses.
template <typename Derived>
bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
@@ -3092,6 +3094,12 @@ RecursiveASTVisitor<Derived>::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
}
template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPAlignClause(OMPAlignClause *C) {
+ TRY_TO(TraverseStmt(C->getAlignment()));
+ return true;
+}
+
+template <typename Derived>
bool RecursiveASTVisitor<Derived>::VisitOMPSafelenClause(OMPSafelenClause *C) {
TRY_TO(TraverseStmt(C->getSafelen()));
return true;
@@ -3674,6 +3682,11 @@ bool RecursiveASTVisitor<Derived>::VisitOMPFilterClause(OMPFilterClause *C) {
return true;
}
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPBindClause(OMPBindClause *C) {
+ return true;
+}
+
// FIXME: look at the following tricky-seeming exprs to see if we
// need to recurse on anything. These are ones that have methods
// returning decls or qualtypes or nestednamespecifier -- though I'm
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index 8e1d7df97096..a32126d23d31 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -20,6 +20,7 @@
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/Specifiers.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/PointerIntPair.h"
@@ -160,8 +161,8 @@ protected:
unsigned : NumStmtBits;
- /// True if this if statement is a constexpr if.
- unsigned IsConstexpr : 1;
+ /// Whether this is a constexpr if, or a consteval if, or neither.
+ unsigned Kind : 3;
/// True if this if statement has storage for an else statement.
unsigned HasElse : 1;
@@ -1215,6 +1216,11 @@ public:
const PrintingPolicy &Policy, unsigned Indentation = 0,
StringRef NewlineSymbol = "\n",
const ASTContext *Context = nullptr) const;
+ void printPrettyControlled(raw_ostream &OS, PrinterHelper *Helper,
+ const PrintingPolicy &Policy,
+ unsigned Indentation = 0,
+ StringRef NewlineSymbol = "\n",
+ const ASTContext *Context = nullptr) const;
/// Pretty-prints in JSON format.
void printJson(raw_ostream &Out, PrinterHelper *Helper,
@@ -1950,8 +1956,8 @@ class IfStmt final
unsigned elseOffset() const { return condOffset() + ElseOffsetFromCond; }
/// Build an if/then/else statement.
- IfStmt(const ASTContext &Ctx, SourceLocation IL, bool IsConstexpr, Stmt *Init,
- VarDecl *Var, Expr *Cond, SourceLocation LParenLoc,
+ IfStmt(const ASTContext &Ctx, SourceLocation IL, IfStatementKind Kind,
+ Stmt *Init, VarDecl *Var, Expr *Cond, SourceLocation LParenLoc,
SourceLocation RParenLoc, Stmt *Then, SourceLocation EL, Stmt *Else);
/// Build an empty if/then/else statement.
@@ -1960,9 +1966,9 @@ class IfStmt final
public:
/// Create an IfStmt.
static IfStmt *Create(const ASTContext &Ctx, SourceLocation IL,
- bool IsConstexpr, Stmt *Init, VarDecl *Var, Expr *Cond,
- SourceLocation LPL, SourceLocation RPL, Stmt *Then,
- SourceLocation EL = SourceLocation(),
+ IfStatementKind Kind, Stmt *Init, VarDecl *Var,
+ Expr *Cond, SourceLocation LPL, SourceLocation RPL,
+ Stmt *Then, SourceLocation EL = SourceLocation(),
Stmt *Else = nullptr);
/// Create an empty IfStmt optionally with storage for an else statement,
@@ -2077,8 +2083,30 @@ public:
*getTrailingObjects<SourceLocation>() = ElseLoc;
}
- bool isConstexpr() const { return IfStmtBits.IsConstexpr; }
- void setConstexpr(bool C) { IfStmtBits.IsConstexpr = C; }
+ bool isConsteval() const {
+ return getStatementKind() == IfStatementKind::ConstevalNonNegated ||
+ getStatementKind() == IfStatementKind::ConstevalNegated;
+ }
+
+ bool isNonNegatedConsteval() const {
+ return getStatementKind() == IfStatementKind::ConstevalNonNegated;
+ }
+
+ bool isNegatedConsteval() const {
+ return getStatementKind() == IfStatementKind::ConstevalNegated;
+ }
+
+ bool isConstexpr() const {
+ return getStatementKind() == IfStatementKind::Constexpr;
+ }
+
+ void setStatementKind(IfStatementKind Kind) {
+ IfStmtBits.Kind = static_cast<unsigned>(Kind);
+ }
+
+ IfStatementKind getStatementKind() const {
+ return static_cast<IfStatementKind>(IfStmtBits.Kind);
+ }
/// If this is an 'if constexpr', determine which substatement will be taken.
/// Otherwise, or if the condition is value-dependent, returns None.
@@ -2101,13 +2129,19 @@ public:
// Iterators over subexpressions. The iterators will include iterating
// over the initialization expression referenced by the condition variable.
child_range children() {
- return child_range(getTrailingObjects<Stmt *>(),
+ // We always store a condition, but there is none for consteval if
+ // statements, so skip it.
+ return child_range(getTrailingObjects<Stmt *>() +
+ (isConsteval() ? thenOffset() : 0),
getTrailingObjects<Stmt *>() +
numTrailingObjects(OverloadToken<Stmt *>()));
}
const_child_range children() const {
- return const_child_range(getTrailingObjects<Stmt *>(),
+ // We always store a condition, but there is none for consteval if
+ // statements, so skip it.
+ return const_child_range(getTrailingObjects<Stmt *>() +
+ (isConsteval() ? thenOffset() : 0),
getTrailingObjects<Stmt *>() +
numTrailingObjects(OverloadToken<Stmt *>()));
}
diff --git a/clang/include/clang/AST/StmtObjC.h b/clang/include/clang/AST/StmtObjC.h
index 948ef2421cb9..c46ff4634c82 100644
--- a/clang/include/clang/AST/StmtObjC.h
+++ b/clang/include/clang/AST/StmtObjC.h
@@ -162,8 +162,14 @@ public:
};
/// Represents Objective-C's \@try ... \@catch ... \@finally statement.
-class ObjCAtTryStmt : public Stmt {
-private:
+class ObjCAtTryStmt final
+ : public Stmt,
+ private llvm::TrailingObjects<ObjCAtTryStmt, Stmt *> {
+ friend TrailingObjects;
+ size_t numTrailingObjects(OverloadToken<Stmt *>) const {
+ return 1 + NumCatchStmts + HasFinally;
+ }
+
// The location of the @ in the \@try.
SourceLocation AtTryLoc;
@@ -178,10 +184,8 @@ private:
/// The order of the statements in memory follows the order in the source,
/// with the \@try body first, followed by the \@catch statements (if any)
/// and, finally, the \@finally (if it exists).
- Stmt **getStmts() { return reinterpret_cast<Stmt **> (this + 1); }
- const Stmt* const *getStmts() const {
- return reinterpret_cast<const Stmt * const*> (this + 1);
- }
+ Stmt **getStmts() { return getTrailingObjects<Stmt *>(); }
+ Stmt *const *getStmts() const { return getTrailingObjects<Stmt *>(); }
ObjCAtTryStmt(SourceLocation atTryLoc, Stmt *atTryStmt,
Stmt **CatchStmts, unsigned NumCatchStmts,
@@ -257,13 +261,34 @@ public:
}
child_range children() {
- return child_range(getStmts(),
- getStmts() + 1 + NumCatchStmts + HasFinally);
+ return child_range(
+ getStmts(), getStmts() + numTrailingObjects(OverloadToken<Stmt *>()));
}
const_child_range children() const {
return const_child_range(const_cast<ObjCAtTryStmt *>(this)->children());
}
+
+ using catch_stmt_iterator = CastIterator<ObjCAtCatchStmt>;
+ using const_catch_stmt_iterator = ConstCastIterator<ObjCAtCatchStmt>;
+ using catch_range = llvm::iterator_range<catch_stmt_iterator>;
+ using catch_const_range = llvm::iterator_range<const_catch_stmt_iterator>;
+
+ catch_stmt_iterator catch_stmts_begin() { return getStmts() + 1; }
+ catch_stmt_iterator catch_stmts_end() {
+ return catch_stmts_begin() + NumCatchStmts;
+ }
+ catch_range catch_stmts() {
+ return catch_range(catch_stmts_begin(), catch_stmts_end());
+ }
+
+ const_catch_stmt_iterator catch_stmts_begin() const { return getStmts() + 1; }
+ const_catch_stmt_iterator catch_stmts_end() const {
+ return catch_stmts_begin() + NumCatchStmts;
+ }
+ catch_const_range catch_stmts() const {
+ return catch_const_range(catch_stmts_begin(), catch_stmts_end());
+ }
};
/// Represents Objective-C's \@synchronized statement.
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 9c85df741f48..d5b5c9580da9 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -889,22 +889,23 @@ public:
/// Calls the specified callback function for all the loops in \p CurStmt,
/// from the outermost to the innermost.
- static bool doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
- unsigned NumLoops,
- llvm::function_ref<bool(unsigned, Stmt *)> Callback,
- llvm::function_ref<void(OMPLoopBasedDirective *)>
- OnTransformationCallback);
+ static bool
+ doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
+ unsigned NumLoops,
+ llvm::function_ref<bool(unsigned, Stmt *)> Callback,
+ llvm::function_ref<void(OMPLoopTransformationDirective *)>
+ OnTransformationCallback);
static bool
doForAllLoops(const Stmt *CurStmt, bool TryImperfectlyNestedLoops,
unsigned NumLoops,
llvm::function_ref<bool(unsigned, const Stmt *)> Callback,
- llvm::function_ref<void(const OMPLoopBasedDirective *)>
+ llvm::function_ref<void(const OMPLoopTransformationDirective *)>
OnTransformationCallback) {
auto &&NewCallback = [Callback](unsigned Cnt, Stmt *CurStmt) {
return Callback(Cnt, CurStmt);
};
auto &&NewTransformCb =
- [OnTransformationCallback](OMPLoopBasedDirective *A) {
+ [OnTransformationCallback](OMPLoopTransformationDirective *A) {
OnTransformationCallback(A);
};
return doForAllLoops(const_cast<Stmt *>(CurStmt), TryImperfectlyNestedLoops,
@@ -917,7 +918,7 @@ public:
doForAllLoops(Stmt *CurStmt, bool TryImperfectlyNestedLoops,
unsigned NumLoops,
llvm::function_ref<bool(unsigned, Stmt *)> Callback) {
- auto &&TransformCb = [](OMPLoopBasedDirective *) {};
+ auto &&TransformCb = [](OMPLoopTransformationDirective *) {};
return doForAllLoops(CurStmt, TryImperfectlyNestedLoops, NumLoops, Callback,
TransformCb);
}
@@ -954,6 +955,47 @@ public:
}
};
+/// The base class for all loop transformation directives.
+class OMPLoopTransformationDirective : public OMPLoopBasedDirective {
+ friend class ASTStmtReader;
+
+ /// Number of loops generated by this loop transformation.
+ unsigned NumGeneratedLoops = 0;
+
+protected:
+ explicit OMPLoopTransformationDirective(StmtClass SC,
+ OpenMPDirectiveKind Kind,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ unsigned NumAssociatedLoops)
+ : OMPLoopBasedDirective(SC, Kind, StartLoc, EndLoc, NumAssociatedLoops) {}
+
+ /// Set the number of loops generated by this loop transformation.
+ void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; }
+
+public:
+ /// Return the number of associated (consumed) loops.
+ unsigned getNumAssociatedLoops() const { return getLoopsNumber(); }
+
+ /// Return the number of loops generated by this loop transformation.
+ unsigned getNumGeneratedLoops() { return NumGeneratedLoops; }
+
+ /// Get the de-sugared statements after after the loop transformation.
+ ///
+ /// Might be nullptr if either the directive generates no loops and is handled
+ /// directly in CodeGen, or resolving a template-dependence context is
+ /// required.
+ Stmt *getTransformedStmt() const;
+
+ /// Return preinits statement.
+ Stmt *getPreInits() const;
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OMPTileDirectiveClass ||
+ T->getStmtClass() == OMPUnrollDirectiveClass;
+ }
+};
+
/// This is a common base class for loop directives ('omp simd', 'omp
/// for', 'omp for simd' etc.). It is responsible for the loop code generation.
///
@@ -1102,7 +1144,7 @@ protected:
if (isOpenMPLoopBoundSharingDirective(Kind))
return CombinedDistributeEnd;
if (isOpenMPWorksharingDirective(Kind) || isOpenMPTaskLoopDirective(Kind) ||
- isOpenMPDistributeDirective(Kind))
+ isOpenMPGenericLoopDirective(Kind) || isOpenMPDistributeDirective(Kind))
return WorksharingEnd;
return DefaultEnd;
}
@@ -1134,6 +1176,7 @@ protected:
}
void setIsLastIterVariable(Expr *IL) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1141,6 +1184,7 @@ protected:
}
void setLowerBoundVariable(Expr *LB) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1148,6 +1192,7 @@ protected:
}
void setUpperBoundVariable(Expr *UB) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1155,6 +1200,7 @@ protected:
}
void setStrideVariable(Expr *ST) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1162,6 +1208,7 @@ protected:
}
void setEnsureUpperBound(Expr *EUB) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1169,6 +1216,7 @@ protected:
}
void setNextLowerBound(Expr *NLB) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1176,6 +1224,7 @@ protected:
}
void setNextUpperBound(Expr *NUB) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1183,6 +1232,7 @@ protected:
}
void setNumIterations(Expr *NI) {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1285,6 +1335,7 @@ public:
Stmt *getPreInits() { return Data->getChildren()[PreInitsOffset]; }
Expr *getIsLastIterVariable() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1292,6 +1343,7 @@ public:
}
Expr *getLowerBoundVariable() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1299,6 +1351,7 @@ public:
}
Expr *getUpperBoundVariable() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1306,6 +1359,7 @@ public:
}
Expr *getStrideVariable() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1313,6 +1367,7 @@ public:
}
Expr *getEnsureUpperBound() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1320,6 +1375,7 @@ public:
}
Expr *getNextLowerBound() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1327,6 +1383,7 @@ public:
}
Expr *getNextUpperBound() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1334,6 +1391,7 @@ public:
}
Expr *getNumIterations() const {
assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+ isOpenMPGenericLoopDirective(getDirectiveKind()) ||
isOpenMPTaskLoopDirective(getDirectiveKind()) ||
isOpenMPDistributeDirective(getDirectiveKind())) &&
"expected worksharing loop directive");
@@ -1467,6 +1525,7 @@ public:
T->getStmtClass() == OMPTaskLoopSimdDirectiveClass ||
T->getStmtClass() == OMPMasterTaskLoopDirectiveClass ||
T->getStmtClass() == OMPMasterTaskLoopSimdDirectiveClass ||
+ T->getStmtClass() == OMPGenericLoopDirectiveClass ||
T->getStmtClass() == OMPParallelMasterTaskLoopDirectiveClass ||
T->getStmtClass() == OMPParallelMasterTaskLoopSimdDirectiveClass ||
T->getStmtClass() == OMPDistributeDirectiveClass ||
@@ -2510,15 +2569,20 @@ public:
/// \param C AST context.
/// \param StartLoc Starting location of the directive kind.
/// \param EndLoc Ending Location of the directive.
+ /// \param Clauses List of clauses.
///
- static OMPTaskwaitDirective *
- Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc);
+ static OMPTaskwaitDirective *Create(const ASTContext &C,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses);
/// Creates an empty directive.
///
/// \param C AST context.
+ /// \param NumClauses Number of clauses.
///
- static OMPTaskwaitDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+ static OMPTaskwaitDirective *CreateEmpty(const ASTContext &C,
+ unsigned NumClauses, EmptyShell);
static bool classof(const Stmt *T) {
return T->getStmtClass() == OMPTaskwaitDirectiveClass;
@@ -2794,16 +2858,25 @@ class OMPAtomicDirective : public OMPExecutableDirective {
: OMPExecutableDirective(OMPAtomicDirectiveClass, llvm::omp::OMPD_atomic,
SourceLocation(), SourceLocation()) {}
+ enum DataPositionTy : size_t {
+ POS_X = 0,
+ POS_V,
+ POS_E,
+ POS_UpdateExpr,
+ };
+
/// Set 'x' part of the associated expression/statement.
- void setX(Expr *X) { Data->getChildren()[0] = X; }
+ void setX(Expr *X) { Data->getChildren()[DataPositionTy::POS_X] = X; }
/// Set helper expression of the form
/// 'OpaqueValueExpr(x) binop OpaqueValueExpr(expr)' or
/// 'OpaqueValueExpr(expr) binop OpaqueValueExpr(x)'.
- void setUpdateExpr(Expr *UE) { Data->getChildren()[1] = UE; }
+ void setUpdateExpr(Expr *UE) {
+ Data->getChildren()[DataPositionTy::POS_UpdateExpr] = UE;
+ }
/// Set 'v' part of the associated expression/statement.
- void setV(Expr *V) { Data->getChildren()[2] = V; }
+ void setV(Expr *V) { Data->getChildren()[DataPositionTy::POS_V] = V; }
/// Set 'expr' part of the associated expression/statement.
- void setExpr(Expr *E) { Data->getChildren()[3] = E; }
+ void setExpr(Expr *E) { Data->getChildren()[DataPositionTy::POS_E] = E; }
public:
/// Creates directive with a list of \a Clauses and 'x', 'v' and 'expr'
@@ -2840,16 +2913,22 @@ public:
unsigned NumClauses, EmptyShell);
/// Get 'x' part of the associated expression/statement.
- Expr *getX() { return cast_or_null<Expr>(Data->getChildren()[0]); }
+ Expr *getX() {
+ return cast_or_null<Expr>(Data->getChildren()[DataPositionTy::POS_X]);
+ }
const Expr *getX() const {
- return cast_or_null<Expr>(Data->getChildren()[0]);
+ return cast_or_null<Expr>(Data->getChildren()[DataPositionTy::POS_X]);
}
/// Get helper expression of the form
/// 'OpaqueValueExpr(x) binop OpaqueValueExpr(expr)' or
/// 'OpaqueValueExpr(expr) binop OpaqueValueExpr(x)'.
- Expr *getUpdateExpr() { return cast_or_null<Expr>(Data->getChildren()[1]); }
+ Expr *getUpdateExpr() {
+ return cast_or_null<Expr>(
+ Data->getChildren()[DataPositionTy::POS_UpdateExpr]);
+ }
const Expr *getUpdateExpr() const {
- return cast_or_null<Expr>(Data->getChildren()[1]);
+ return cast_or_null<Expr>(
+ Data->getChildren()[DataPositionTy::POS_UpdateExpr]);
}
/// Return true if helper update expression has form
/// 'OpaqueValueExpr(x) binop OpaqueValueExpr(expr)' and false if it has form
@@ -2859,14 +2938,18 @@ public:
/// 'x', false if 'v' must be updated to the new value of 'x'.
bool isPostfixUpdate() const { return IsPostfixUpdate; }
/// Get 'v' part of the associated expression/statement.
- Expr *getV() { return cast_or_null<Expr>(Data->getChildren()[2]); }
+ Expr *getV() {
+ return cast_or_null<Expr>(Data->getChildren()[DataPositionTy::POS_V]);
+ }
const Expr *getV() const {
- return cast_or_null<Expr>(Data->getChildren()[2]);
+ return cast_or_null<Expr>(Data->getChildren()[DataPositionTy::POS_V]);
}
/// Get 'expr' part of the associated expression/statement.
- Expr *getExpr() { return cast_or_null<Expr>(Data->getChildren()[3]); }
+ Expr *getExpr() {
+ return cast_or_null<Expr>(Data->getChildren()[DataPositionTy::POS_E]);
+ }
const Expr *getExpr() const {
- return cast_or_null<Expr>(Data->getChildren()[3]);
+ return cast_or_null<Expr>(Data->getChildren()[DataPositionTy::POS_E]);
}
static bool classof(const Stmt *T) {
@@ -4992,7 +5075,7 @@ public:
};
/// This represents the '#pragma omp tile' loop transformation directive.
-class OMPTileDirective final : public OMPLoopBasedDirective {
+class OMPTileDirective final : public OMPLoopTransformationDirective {
friend class ASTStmtReader;
friend class OMPExecutableDirective;
@@ -5004,8 +5087,11 @@ class OMPTileDirective final : public OMPLoopBasedDirective {
explicit OMPTileDirective(SourceLocation StartLoc, SourceLocation EndLoc,
unsigned NumLoops)
- : OMPLoopBasedDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile,
- StartLoc, EndLoc, NumLoops) {}
+ : OMPLoopTransformationDirective(OMPTileDirectiveClass,
+ llvm::omp::OMPD_tile, StartLoc, EndLoc,
+ NumLoops) {
+ setNumGeneratedLoops(3 * NumLoops);
+ }
void setPreInits(Stmt *PreInits) {
Data->getChildren()[PreInitsOffset] = PreInits;
@@ -5042,8 +5128,6 @@ public:
static OMPTileDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
unsigned NumLoops);
- unsigned getNumAssociatedLoops() const { return getLoopsNumber(); }
-
/// Gets/sets the associated loops after tiling.
///
/// This is in de-sugared format stored as a CompoundStmt.
@@ -5073,7 +5157,7 @@ public:
/// #pragma omp unroll
/// for (int i = 0; i < 64; ++i)
/// \endcode
-class OMPUnrollDirective final : public OMPLoopBasedDirective {
+class OMPUnrollDirective final : public OMPLoopTransformationDirective {
friend class ASTStmtReader;
friend class OMPExecutableDirective;
@@ -5084,8 +5168,9 @@ class OMPUnrollDirective final : public OMPLoopBasedDirective {
};
explicit OMPUnrollDirective(SourceLocation StartLoc, SourceLocation EndLoc)
- : OMPLoopBasedDirective(OMPUnrollDirectiveClass, llvm::omp::OMPD_unroll,
- StartLoc, EndLoc, 1) {}
+ : OMPLoopTransformationDirective(OMPUnrollDirectiveClass,
+ llvm::omp::OMPD_unroll, StartLoc, EndLoc,
+ 1) {}
/// Set the pre-init statements.
void setPreInits(Stmt *PreInits) {
@@ -5111,7 +5196,7 @@ public:
static OMPUnrollDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- Stmt *TransformedStmt, Stmt *PreInits);
+ unsigned NumGeneratedLoops, Stmt *TransformedStmt, Stmt *PreInits);
/// Build an empty '#pragma omp unroll' AST node for deserialization.
///
@@ -5360,6 +5445,107 @@ public:
}
};
+/// This represents '#pragma omp metadirective' directive.
+///
+/// \code
+/// #pragma omp metadirective when(user={condition(N>10)}: parallel for)
+/// \endcode
+/// In this example directive '#pragma omp metadirective' has clauses 'when'
+/// with a dynamic user condition to check if a variable 'N > 10'
+///
+class OMPMetaDirective final : public OMPExecutableDirective {
+ friend class ASTStmtReader;
+ friend class OMPExecutableDirective;
+ Stmt *IfStmt;
+
+ OMPMetaDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+ : OMPExecutableDirective(OMPMetaDirectiveClass,
+ llvm::omp::OMPD_metadirective, StartLoc,
+ EndLoc) {}
+ explicit OMPMetaDirective()
+ : OMPExecutableDirective(OMPMetaDirectiveClass,
+ llvm::omp::OMPD_metadirective, SourceLocation(),
+ SourceLocation()) {}
+
+ void setIfStmt(Stmt *S) { IfStmt = S; }
+
+public:
+ static OMPMetaDirective *Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses,
+ Stmt *AssociatedStmt, Stmt *IfStmt);
+ static OMPMetaDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
+ EmptyShell);
+ Stmt *getIfStmt() const { return IfStmt; }
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OMPMetaDirectiveClass;
+ }
+};
+
+/// This represents '#pragma omp loop' directive.
+///
+/// \code
+/// #pragma omp loop private(a,b) binding(parallel) order(concurrent)
+/// \endcode
+/// In this example directive '#pragma omp loop' has
+/// clauses 'private' with the variables 'a' and 'b', 'binding' with
+/// modifier 'parallel' and 'order(concurrent).
+///
+class OMPGenericLoopDirective final : public OMPLoopDirective {
+ friend class ASTStmtReader;
+ friend class OMPExecutableDirective;
+ /// Build directive with the given start and end location.
+ ///
+ /// \param StartLoc Starting location of the directive kind.
+ /// \param EndLoc Ending location of the directive.
+ /// \param CollapsedNum Number of collapsed nested loops.
+ ///
+ OMPGenericLoopDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum)
+ : OMPLoopDirective(OMPGenericLoopDirectiveClass, llvm::omp::OMPD_loop,
+ StartLoc, EndLoc, CollapsedNum) {}
+
+ /// Build an empty directive.
+ ///
+ /// \param CollapsedNum Number of collapsed nested loops.
+ ///
+ explicit OMPGenericLoopDirective(unsigned CollapsedNum)
+ : OMPLoopDirective(OMPGenericLoopDirectiveClass, llvm::omp::OMPD_loop,
+ SourceLocation(), SourceLocation(), CollapsedNum) {}
+
+public:
+ /// Creates directive with a list of \p Clauses.
+ ///
+ /// \param C AST context.
+ /// \param StartLoc Starting location of the directive kind.
+ /// \param EndLoc Ending Location of the directive.
+ /// \param CollapsedNum Number of collapsed loops.
+ /// \param Clauses List of clauses.
+ /// \param AssociatedStmt Statement, associated with the directive.
+ /// \param Exprs Helper expressions for CodeGen.
+ ///
+ static OMPGenericLoopDirective *
+ Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+ Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+ /// Creates an empty directive with a place for \a NumClauses clauses.
+ ///
+ /// \param C AST context.
+ /// \param NumClauses Number of clauses.
+ /// \param CollapsedNum Number of collapsed nested loops.
+ ///
+ static OMPGenericLoopDirective *CreateEmpty(const ASTContext &C,
+ unsigned NumClauses,
+ unsigned CollapsedNum,
+ EmptyShell);
+
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OMPGenericLoopDirectiveClass;
+ }
+};
+
} // end namespace clang
#endif
diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h
index 010b813dc525..2befb5c1b45e 100644
--- a/clang/include/clang/AST/TemplateName.h
+++ b/clang/include/clang/AST/TemplateName.h
@@ -309,16 +309,17 @@ public:
/// unexpanded parameter pack (for C++0x variadic templates).
bool containsUnexpandedParameterPack() const;
+ enum class Qualified { None, AsWritten, Fully };
/// Print the template name.
///
/// \param OS the output stream to which the template name will be
/// printed.
///
- /// \param SuppressNNS if true, don't print the
- /// nested-name-specifier that precedes the template name (if it has
- /// one).
+ /// \param Qual print the (Qualified::None) simple name,
+ /// (Qualified::AsWritten) any written (possibly partial) qualifier, or
+ /// (Qualified::Fully) the fully qualified name.
void print(raw_ostream &OS, const PrintingPolicy &Policy,
- bool SuppressNNS = false) const;
+ Qualified Qual = Qualified::AsWritten) const;
/// Debugging aid that dumps the template name.
void dump(raw_ostream &OS) const;
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 9f46d5337897..fd25ec25d4f2 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -495,7 +495,12 @@ public:
(A == LangAS::Default &&
(B == LangAS::sycl_private || B == LangAS::sycl_local ||
B == LangAS::sycl_global || B == LangAS::sycl_global_device ||
- B == LangAS::sycl_global_host));
+ B == LangAS::sycl_global_host)) ||
+ // In HIP device compilation, any cuda address space is allowed
+ // to implicitly cast into the default address space.
+ (A == LangAS::Default &&
+ (B == LangAS::cuda_constant || B == LangAS::cuda_device ||
+ B == LangAS::cuda_shared));
}
/// Returns true if the address space in these qualifiers is equal to or
@@ -1998,6 +2003,7 @@ public:
bool isFloat16Type() const; // C11 extension ISO/IEC TS 18661
bool isBFloat16Type() const;
bool isFloat128Type() const;
+ bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
bool isArithmeticType() const; // C99 6.2.5p18 (integer + floating)
bool isVoidType() const; // C99 6.2.5p19
@@ -2545,7 +2551,7 @@ public:
}
bool isFloatingPoint() const {
- return getKind() >= Half && getKind() <= Float128;
+ return getKind() >= Half && getKind() <= Ibm128;
}
/// Determines whether the given kind corresponds to a placeholder type.
@@ -3450,10 +3456,6 @@ class ConstantMatrixType final : public MatrixType {
protected:
friend class ASTContext;
- /// The element type of the matrix.
- // FIXME: Appears to be unused? There is also MatrixType::ElementType...
- QualType ElementType;
-
/// Number of rows and columns.
unsigned NumRows;
unsigned NumColumns;
@@ -3523,14 +3525,10 @@ class DependentSizedMatrixType final : public MatrixType {
Expr *ColumnExpr, SourceLocation loc);
public:
- QualType getElementType() const { return ElementType; }
Expr *getRowExpr() const { return RowExpr; }
Expr *getColumnExpr() const { return ColumnExpr; }
SourceLocation getAttributeLoc() const { return loc; }
- bool isSugared() const { return false; }
- QualType desugar() const { return QualType(this, 0); }
-
static bool classof(const Type *T) {
return T->getTypeClass() == DependentSizedMatrix;
}
@@ -4946,29 +4944,29 @@ public:
/// type-dependent, there is no deduced type and the type is canonical. In
/// the latter case, it is also a dependent type.
class DeducedType : public Type {
+ QualType DeducedAsType;
+
protected:
DeducedType(TypeClass TC, QualType DeducedAsType,
- TypeDependence ExtraDependence)
- : Type(TC,
- // FIXME: Retain the sugared deduced type?
- DeducedAsType.isNull() ? QualType(this, 0)
- : DeducedAsType.getCanonicalType(),
+ TypeDependence ExtraDependence, QualType Canon)
+ : Type(TC, Canon,
ExtraDependence | (DeducedAsType.isNull()
? TypeDependence::None
: DeducedAsType->getDependence() &
- ~TypeDependence::VariablyModified)) {}
+ ~TypeDependence::VariablyModified)),
+ DeducedAsType(DeducedAsType) {}
public:
- bool isSugared() const { return !isCanonicalUnqualified(); }
- QualType desugar() const { return getCanonicalTypeInternal(); }
-
- /// Get the type deduced for this placeholder type, or null if it's
- /// either not been deduced or was deduced to a dependent type.
- QualType getDeducedType() const {
- return !isCanonicalUnqualified() ? getCanonicalTypeInternal() : QualType();
+ bool isSugared() const { return !DeducedAsType.isNull(); }
+ QualType desugar() const {
+ return isSugared() ? DeducedAsType : QualType(this, 0);
}
+
+ /// Get the type deduced for this placeholder type, or null if it
+ /// has not been deduced.
+ QualType getDeducedType() const { return DeducedAsType; }
bool isDeduced() const {
- return !isCanonicalUnqualified() || isDependentType();
+ return !DeducedAsType.isNull() || isDependentType();
}
static bool classof(const Type *T) {
@@ -4985,7 +4983,7 @@ class alignas(8) AutoType : public DeducedType, public llvm::FoldingSetNode {
ConceptDecl *TypeConstraintConcept;
AutoType(QualType DeducedAsType, AutoTypeKeyword Keyword,
- TypeDependence ExtraDependence, ConceptDecl *CD,
+ TypeDependence ExtraDependence, QualType Canon, ConceptDecl *CD,
ArrayRef<TemplateArgument> TypeConstraintArgs);
const TemplateArgument *getArgBuffer() const {
@@ -5059,7 +5057,9 @@ class DeducedTemplateSpecializationType : public DeducedType,
toTypeDependence(Template.getDependence()) |
(IsDeducedAsDependent
? TypeDependence::DependentInstantiation
- : TypeDependence::None)),
+ : TypeDependence::None),
+ DeducedAsType.isNull() ? QualType(this, 0)
+ : DeducedAsType.getCanonicalType()),
Template(Template) {}
public:
@@ -6018,10 +6018,9 @@ inline ObjCProtocolDecl **ObjCTypeParamType::getProtocolStorageImpl() {
class ObjCInterfaceType : public ObjCObjectType {
friend class ASTContext; // ASTContext creates these.
friend class ASTReader;
- friend class ObjCInterfaceDecl;
template <class T> friend class serialization::AbstractTypeReader;
- mutable ObjCInterfaceDecl *Decl;
+ ObjCInterfaceDecl *Decl;
ObjCInterfaceType(const ObjCInterfaceDecl *D)
: ObjCObjectType(Nonce_ObjCInterface),
@@ -6029,7 +6028,7 @@ class ObjCInterfaceType : public ObjCObjectType {
public:
/// Get the declaration of this interface.
- ObjCInterfaceDecl *getDecl() const { return Decl; }
+ ObjCInterfaceDecl *getDecl() const;
bool isSugared() const { return false; }
QualType desugar() const { return QualType(this, 0); }
@@ -6976,6 +6975,10 @@ inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
+inline bool Type::isIbm128Type() const {
+ return isSpecificBuiltinType(BuiltinType::Ibm128);
+}
+
inline bool Type::isNullPtrType() const {
return isSpecificBuiltinType(BuiltinType::NullPtr);
}
@@ -7144,7 +7147,7 @@ inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &PD,
/// into a diagnostic with <<.
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &PD,
QualType T) {
- PD.AddTaggedVal(reinterpret_cast<intptr_t>(T.getAsOpaquePtr()),
+ PD.AddTaggedVal(reinterpret_cast<uint64_t>(T.getAsOpaquePtr()),
DiagnosticsEngine::ak_qualtype);
return PD;
}
diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h
index 65e95d52c303..bb668c1980fe 100644
--- a/clang/include/clang/AST/TypeLoc.h
+++ b/clang/include/clang/AST/TypeLoc.h
@@ -581,10 +581,9 @@ public:
bool needsExtraLocalData() const {
BuiltinType::Kind bk = getTypePtr()->getKind();
- return (bk >= BuiltinType::UShort && bk <= BuiltinType::UInt128)
- || (bk >= BuiltinType::Short && bk <= BuiltinType::Float128)
- || bk == BuiltinType::UChar
- || bk == BuiltinType::SChar;
+ return (bk >= BuiltinType::UShort && bk <= BuiltinType::UInt128) ||
+ (bk >= BuiltinType::Short && bk <= BuiltinType::Ibm128) ||
+ bk == BuiltinType::UChar || bk == BuiltinType::SChar;
}
unsigned getExtraLocalDataSize() const {
diff --git a/clang/include/clang/AST/TypeOrdering.h b/clang/include/clang/AST/TypeOrdering.h
index 6630105136f5..8037f98cc965 100644
--- a/clang/include/clang/AST/TypeOrdering.h
+++ b/clang/include/clang/AST/TypeOrdering.h
@@ -34,7 +34,6 @@ struct QualTypeOrdering {
}
namespace llvm {
- template<class> struct DenseMapInfo;
template<> struct DenseMapInfo<clang::QualType> {
static inline clang::QualType getEmptyKey() { return clang::QualType(); }
diff --git a/clang/include/clang/ASTMatchers/ASTMatchFinder.h b/clang/include/clang/ASTMatchers/ASTMatchFinder.h
index 91024f9425e0..dafafa151a60 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchFinder.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchFinder.h
@@ -167,6 +167,7 @@ public:
MatchCallback *Action);
void addMatcher(const TemplateArgumentLocMatcher &NodeMatch,
MatchCallback *Action);
+ void addMatcher(const AttrMatcher &NodeMatch, MatchCallback *Action);
/// @}
/// Adds a matcher to execute when running over the AST.
@@ -219,6 +220,7 @@ public:
std::vector<std::pair<CXXCtorInitializerMatcher, MatchCallback *>> CtorInit;
std::vector<std::pair<TemplateArgumentLocMatcher, MatchCallback *>>
TemplateArgumentLoc;
+ std::vector<std::pair<AttrMatcher, MatchCallback *>> Attr;
/// All the callbacks in one container to simplify iteration.
llvm::SmallPtrSet<MatchCallback *, 16> AllCallbacks;
};
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 8e3ee6cb9e7e..d6e5b215462b 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -148,6 +148,8 @@ using CXXBaseSpecifierMatcher = internal::Matcher<CXXBaseSpecifier>;
using CXXCtorInitializerMatcher = internal::Matcher<CXXCtorInitializer>;
using TemplateArgumentMatcher = internal::Matcher<TemplateArgument>;
using TemplateArgumentLocMatcher = internal::Matcher<TemplateArgumentLoc>;
+using LambdaCaptureMatcher = internal::Matcher<LambdaCapture>;
+using AttrMatcher = internal::Matcher<Attr>;
/// @}
/// Matches any node.
@@ -307,7 +309,7 @@ AST_POLYMORPHIC_MATCHER_REGEX(isExpansionInFileMatching,
/// Matches statements that are (transitively) expanded from the named macro.
/// Does not match if only part of the statement is expanded from that macro or
-/// if different parts of the the statement are expanded from different
+/// if different parts of the statement are expanded from different
/// appearances of the macro.
AST_POLYMORPHIC_MATCHER_P(isExpandedFromMacro,
AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, TypeLoc),
@@ -752,9 +754,11 @@ AST_MATCHER_P(ClassTemplateSpecializationDecl, hasSpecializedTemplate,
InnerMatcher.matches(*Decl, Finder, Builder));
}
-/// Matches a declaration that has been implicitly added
-/// by the compiler (eg. implicit default/copy constructors).
-AST_MATCHER(Decl, isImplicit) {
+/// Matches an entity that has been implicitly added by the compiler (e.g.
+/// implicit default/copy constructors).
+AST_POLYMORPHIC_MATCHER(isImplicit,
+ AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Attr,
+ LambdaCapture)) {
return Node.isImplicit();
}
@@ -3489,8 +3493,8 @@ internal::Matcher<T> findAll(const internal::Matcher<T> &Matcher) {
/// Usable as: Any Matcher
extern const internal::ArgumentAdaptingMatcherFunc<
internal::HasParentMatcher,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>,
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>>
hasParent;
/// Matches AST nodes that have an ancestor that matches the provided
@@ -3506,8 +3510,8 @@ extern const internal::ArgumentAdaptingMatcherFunc<
/// Usable as: Any Matcher
extern const internal::ArgumentAdaptingMatcherFunc<
internal::HasAncestorMatcher,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>,
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>>
hasAncestor;
/// Matches if the provided matcher does not match.
@@ -4201,6 +4205,45 @@ AST_MATCHER_P(
InnerMatcher.matches(*Initializer, Finder, Builder));
}
+/// Matches a variable serving as the implicit variable for a lambda init-
+/// capture.
+///
+/// Example matches x (matcher = varDecl(isInitCapture()))
+/// \code
+/// auto f = [x=3]() { return x; };
+/// \endcode
+AST_MATCHER(VarDecl, isInitCapture) { return Node.isInitCapture(); }
+
+/// Matches each lambda capture in a lambda expression.
+///
+/// Given
+/// \code
+/// int main() {
+/// int x, y;
+/// float z;
+/// auto f = [=]() { return x + y + z; };
+/// }
+/// \endcode
+/// lambdaExpr(forEachLambdaCapture(
+/// lambdaCapture(capturesVar(varDecl(hasType(isInteger()))))))
+/// will trigger two matches, binding for 'x' and 'y' respectively.
+AST_MATCHER_P(LambdaExpr, forEachLambdaCapture, LambdaCaptureMatcher,
+ InnerMatcher) {
+ BoundNodesTreeBuilder Result;
+ bool Matched = false;
+ for (const auto &Capture : Node.captures()) {
+ if (Finder->isTraversalIgnoringImplicitNodes() && Capture.isImplicit())
+ continue;
+ BoundNodesTreeBuilder CaptureBuilder(*Builder);
+ if (InnerMatcher.matches(Capture, Finder, &CaptureBuilder)) {
+ Matched = true;
+ Result.addMatch(CaptureBuilder);
+ }
+ }
+ *Builder = std::move(Result);
+ return Matched;
+}
+
/// \brief Matches a static variable with local scope.
///
/// Example matches y (matcher = varDecl(isStaticLocal()))
@@ -4586,50 +4629,80 @@ AST_POLYMORPHIC_MATCHER_P(hasAnyArgument,
return false;
}
-/// Matches any capture of a lambda expression.
+/// Matches lambda captures.
///
/// Given
/// \code
-/// void foo() {
+/// int main() {
/// int x;
/// auto f = [x](){};
+/// auto g = [x = 1](){};
/// }
/// \endcode
-/// lambdaExpr(hasAnyCapture(anything()))
-/// matches [x](){};
-AST_MATCHER_P_OVERLOAD(LambdaExpr, hasAnyCapture, internal::Matcher<VarDecl>,
- InnerMatcher, 0) {
+/// In the matcher `lambdaExpr(hasAnyCapture(lambdaCapture()))`,
+/// `lambdaCapture()` matches `x` and `x=1`.
+extern const internal::VariadicAllOfMatcher<LambdaCapture> lambdaCapture;
+
+/// Matches any capture in a lambda expression.
+///
+/// Given
+/// \code
+/// void foo() {
+/// int t = 5;
+/// auto f = [=](){ return t; };
+/// }
+/// \endcode
+/// lambdaExpr(hasAnyCapture(lambdaCapture())) and
+/// lambdaExpr(hasAnyCapture(lambdaCapture(refersToVarDecl(hasName("t")))))
+/// both match `[=](){ return t; }`.
+AST_MATCHER_P(LambdaExpr, hasAnyCapture, LambdaCaptureMatcher, InnerMatcher) {
for (const LambdaCapture &Capture : Node.captures()) {
- if (Capture.capturesVariable()) {
- BoundNodesTreeBuilder Result(*Builder);
- if (InnerMatcher.matches(*Capture.getCapturedVar(), Finder, &Result)) {
- *Builder = std::move(Result);
- return true;
- }
+ clang::ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder);
+ if (InnerMatcher.matches(Capture, Finder, &Result)) {
+ *Builder = std::move(Result);
+ return true;
}
}
return false;
}
-/// Matches any capture of 'this' in a lambda expression.
+/// Matches a `LambdaCapture` that refers to the specified `VarDecl`. The
+/// `VarDecl` can be a separate variable that is captured by value or
+/// reference, or a synthesized variable if the capture has an initializer.
///
/// Given
/// \code
-/// struct foo {
-/// void bar() {
-/// auto f = [this](){};
-/// }
+/// void foo() {
+/// int x;
+/// auto f = [x](){};
+/// auto g = [x = 1](){};
/// }
/// \endcode
-/// lambdaExpr(hasAnyCapture(cxxThisExpr()))
-/// matches [this](){};
-AST_MATCHER_P_OVERLOAD(LambdaExpr, hasAnyCapture,
- internal::Matcher<CXXThisExpr>, InnerMatcher, 1) {
- return llvm::any_of(Node.captures(), [](const LambdaCapture &LC) {
- return LC.capturesThis();
- });
+/// In the matcher
+/// lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar(hasName("x")))),
+/// capturesVar(hasName("x")) matches `x` and `x = 1`.
+AST_MATCHER_P(LambdaCapture, capturesVar, internal::Matcher<VarDecl>,
+ InnerMatcher) {
+ auto *capturedVar = Node.getCapturedVar();
+ return capturedVar && InnerMatcher.matches(*capturedVar, Finder, Builder);
}
+/// Matches a `LambdaCapture` that refers to 'this'.
+///
+/// Given
+/// \code
+/// class C {
+/// int cc;
+/// int f() {
+/// auto l = [this]() { return cc; };
+/// return l();
+/// }
+/// };
+/// \endcode
+/// lambdaExpr(hasAnyCapture(lambdaCapture(capturesThis())))
+/// matches `[this]() { return cc; }`.
+AST_MATCHER(LambdaCapture, capturesThis) { return Node.capturesThis(); }
+
/// Matches a constructor call expression which uses list initialization.
AST_MATCHER(CXXConstructExpr, isListInitialization) {
return Node.isListInitialization();
@@ -5875,6 +5948,10 @@ AST_MATCHER(CXXMethodDecl, isVirtualAsWritten) {
return Node.isVirtualAsWritten();
}
+AST_MATCHER(CXXConstructorDecl, isInheritingConstructor) {
+ return Node.isInheritingConstructor();
+}
+
/// Matches if the given method or class declaration is final.
///
/// Given:
@@ -6333,6 +6410,187 @@ AST_MATCHER_FUNCTION_P_OVERLOAD(internal::BindableMatcher<TypeLoc>, loc,
new internal::TypeLocTypeMatcher(InnerMatcher));
}
+/// Matches `QualifiedTypeLoc`s in the clang AST.
+///
+/// Given
+/// \code
+/// const int x = 0;
+/// \endcode
+/// qualifiedTypeLoc()
+/// matches `const int`.
+extern const internal::VariadicDynCastAllOfMatcher<TypeLoc, QualifiedTypeLoc>
+ qualifiedTypeLoc;
+
+/// Matches `QualifiedTypeLoc`s that have an unqualified `TypeLoc` matching
+/// `InnerMatcher`.
+///
+/// Given
+/// \code
+/// int* const x;
+/// const int y;
+/// \endcode
+/// qualifiedTypeLoc(hasUnqualifiedLoc(pointerTypeLoc()))
+/// matches the `TypeLoc` of the variable declaration of `x`, but not `y`.
+AST_MATCHER_P(QualifiedTypeLoc, hasUnqualifiedLoc, internal::Matcher<TypeLoc>,
+ InnerMatcher) {
+ return InnerMatcher.matches(Node.getUnqualifiedLoc(), Finder, Builder);
+}
+
+/// Matches a function declared with the specified return `TypeLoc`.
+///
+/// Given
+/// \code
+/// int f() { return 5; }
+/// void g() {}
+/// \endcode
+/// functionDecl(hasReturnTypeLoc(loc(asString("int"))))
+/// matches the declaration of `f`, but not `g`.
+AST_MATCHER_P(FunctionDecl, hasReturnTypeLoc, internal::Matcher<TypeLoc>,
+ ReturnMatcher) {
+ auto Loc = Node.getFunctionTypeLoc();
+ return Loc && ReturnMatcher.matches(Loc.getReturnLoc(), Finder, Builder);
+}
+
+/// Matches pointer `TypeLoc`s.
+///
+/// Given
+/// \code
+/// int* x;
+/// \endcode
+/// pointerTypeLoc()
+/// matches `int*`.
+extern const internal::VariadicDynCastAllOfMatcher<TypeLoc, PointerTypeLoc>
+ pointerTypeLoc;
+
+/// Matches pointer `TypeLoc`s that have a pointee `TypeLoc` matching
+/// `PointeeMatcher`.
+///
+/// Given
+/// \code
+/// int* x;
+/// \endcode
+/// pointerTypeLoc(hasPointeeLoc(loc(asString("int"))))
+/// matches `int*`.
+AST_MATCHER_P(PointerTypeLoc, hasPointeeLoc, internal::Matcher<TypeLoc>,
+ PointeeMatcher) {
+ return PointeeMatcher.matches(Node.getPointeeLoc(), Finder, Builder);
+}
+
+/// Matches reference `TypeLoc`s.
+///
+/// Given
+/// \code
+/// int x = 3;
+/// int& l = x;
+/// int&& r = 3;
+/// \endcode
+/// referenceTypeLoc()
+/// matches `int&` and `int&&`.
+extern const internal::VariadicDynCastAllOfMatcher<TypeLoc, ReferenceTypeLoc>
+ referenceTypeLoc;
+
+/// Matches reference `TypeLoc`s that have a referent `TypeLoc` matching
+/// `ReferentMatcher`.
+///
+/// Given
+/// \code
+/// int x = 3;
+/// int& xx = x;
+/// \endcode
+/// referenceTypeLoc(hasReferentLoc(loc(asString("int"))))
+/// matches `int&`.
+AST_MATCHER_P(ReferenceTypeLoc, hasReferentLoc, internal::Matcher<TypeLoc>,
+ ReferentMatcher) {
+ return ReferentMatcher.matches(Node.getPointeeLoc(), Finder, Builder);
+}
+
+/// Matches template specialization `TypeLoc`s.
+///
+/// Given
+/// \code
+/// template <typename T> class C {};
+/// C<char> var;
+/// \endcode
+/// varDecl(hasTypeLoc(templateSpecializationTypeLoc(typeLoc())))
+/// matches `C<char> var`.
+extern const internal::VariadicDynCastAllOfMatcher<
+ TypeLoc, TemplateSpecializationTypeLoc>
+ templateSpecializationTypeLoc;
+
+/// Matches template specialization `TypeLoc`s that have at least one
+/// `TemplateArgumentLoc` matching the given `InnerMatcher`.
+///
+/// Given
+/// \code
+/// template<typename T> class A {};
+/// A<int> a;
+/// \endcode
+/// varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasAnyTemplateArgumentLoc(
+/// hasTypeLoc(loc(asString("int")))))))
+/// matches `A<int> a`.
+AST_MATCHER_P(TemplateSpecializationTypeLoc, hasAnyTemplateArgumentLoc,
+ internal::Matcher<TemplateArgumentLoc>, InnerMatcher) {
+ for (unsigned Index = 0, N = Node.getNumArgs(); Index < N; ++Index) {
+ clang::ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder);
+ if (InnerMatcher.matches(Node.getArgLoc(Index), Finder, &Result)) {
+ *Builder = std::move(Result);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Matches template specialization `TypeLoc`s where the n'th
+/// `TemplateArgumentLoc` matches the given `InnerMatcher`.
+///
+/// Given
+/// \code
+/// template<typename T, typename U> class A {};
+/// A<double, int> b;
+/// A<int, double> c;
+/// \endcode
+/// varDecl(hasTypeLoc(templateSpecializationTypeLoc(hasTemplateArgumentLoc(0,
+/// hasTypeLoc(loc(asString("double")))))))
+/// matches `A<double, int> b`, but not `A<int, double> c`.
+AST_POLYMORPHIC_MATCHER_P2(
+ hasTemplateArgumentLoc,
+ AST_POLYMORPHIC_SUPPORTED_TYPES(DeclRefExpr, TemplateSpecializationTypeLoc),
+ unsigned, Index, internal::Matcher<TemplateArgumentLoc>, InnerMatcher) {
+ return internal::MatchTemplateArgLocAt(Node, Index, InnerMatcher, Finder,
+ Builder);
+}
+
+/// Matches C or C++ elaborated `TypeLoc`s.
+///
+/// Given
+/// \code
+/// struct s {};
+/// struct s ss;
+/// \endcode
+/// elaboratedTypeLoc()
+/// matches the `TypeLoc` of the variable declaration of `ss`.
+extern const internal::VariadicDynCastAllOfMatcher<TypeLoc, ElaboratedTypeLoc>
+ elaboratedTypeLoc;
+
+/// Matches elaborated `TypeLoc`s that have a named `TypeLoc` matching
+/// `InnerMatcher`.
+///
+/// Given
+/// \code
+/// template <typename T>
+/// class C {};
+/// class C<int> c;
+///
+/// class D {};
+/// class D d;
+/// \endcode
+/// elaboratedTypeLoc(hasNamedTypeLoc(templateSpecializationTypeLoc()));
+/// matches the `TypeLoc` of the variable declaration of `c`, but not `d`.
+AST_MATCHER_P(ElaboratedTypeLoc, hasNamedTypeLoc, internal::Matcher<TypeLoc>,
+ InnerMatcher) {
+ return InnerMatcher.matches(Node.getNamedTypeLoc(), Finder, Builder);
+}
+
/// Matches type \c bool.
///
/// Given
@@ -7133,6 +7391,24 @@ AST_MATCHER_P(NestedNameSpecifier, specifiesNamespace,
return InnerMatcher.matches(*Node.getAsNamespace(), Finder, Builder);
}
+/// Matches attributes.
+/// Attributes may be attached with a variety of different syntaxes (including
+/// keywords, C++11 attributes, GNU ``__attribute``` and MSVC `__declspec``,
+/// and ``#pragma``s). They may also be implicit.
+///
+/// Given
+/// \code
+/// struct [[nodiscard]] Foo{};
+/// void bar(int * __attribute__((nonnull)) );
+/// __declspec(noinline) void baz();
+///
+/// #pragma omp declare simd
+/// int min();
+/// \endcode
+/// attr()
+/// matches "nodiscard", "nonnull", "noinline", and the whole "#pragma" line.
+extern const internal::VariadicAllOfMatcher<Attr> attr;
+
/// Overloads for the \c equalsNode matcher.
/// FIXME: Implement for other node types.
/// @{
diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
index 71f4f2d17ae3..a77611001fb1 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h
@@ -312,8 +312,7 @@ public:
template <typename ExcludePredicate>
bool removeBindings(const ExcludePredicate &Predicate) {
- Bindings.erase(std::remove_if(Bindings.begin(), Bindings.end(), Predicate),
- Bindings.end());
+ llvm::erase_if(Bindings, Predicate);
return !Bindings.empty();
}
@@ -757,7 +756,8 @@ public:
std::is_base_of<NestedNameSpecifier, T>::value ||
std::is_base_of<NestedNameSpecifierLoc, T>::value ||
std::is_base_of<TypeLoc, T>::value ||
- std::is_base_of<QualType, T>::value,
+ std::is_base_of<QualType, T>::value ||
+ std::is_base_of<Attr, T>::value,
"unsupported type for recursive matching");
return matchesChildOf(DynTypedNode::create(Node), getASTContext(), Matcher,
Builder, Bind);
@@ -771,7 +771,8 @@ public:
std::is_base_of<NestedNameSpecifier, T>::value ||
std::is_base_of<NestedNameSpecifierLoc, T>::value ||
std::is_base_of<TypeLoc, T>::value ||
- std::is_base_of<QualType, T>::value,
+ std::is_base_of<QualType, T>::value ||
+ std::is_base_of<Attr, T>::value,
"unsupported type for recursive matching");
return matchesDescendantOf(DynTypedNode::create(Node), getASTContext(),
Matcher, Builder, Bind);
@@ -785,7 +786,8 @@ public:
static_assert(std::is_base_of<Decl, T>::value ||
std::is_base_of<NestedNameSpecifierLoc, T>::value ||
std::is_base_of<Stmt, T>::value ||
- std::is_base_of<TypeLoc, T>::value,
+ std::is_base_of<TypeLoc, T>::value ||
+ std::is_base_of<Attr, T>::value,
"type not allowed for recursive matching");
return matchesAncestorOf(DynTypedNode::create(Node), getASTContext(),
Matcher, Builder, MatchMode);
@@ -954,7 +956,7 @@ class HasNameMatcher : public SingleNodeMatcherInterface<NamedDecl> {
bool matchesNode(const NamedDecl &Node) const override;
- private:
+private:
/// Unqualified match routine.
///
/// It is much faster than the full match, but it only works for unqualified
@@ -1025,31 +1027,29 @@ private:
BoundNodesTreeBuilder *Builder) const {
// DeducedType does not have declarations of its own, so
// match the deduced type instead.
- const Type *EffectiveType = &Node;
if (const auto *S = dyn_cast<DeducedType>(&Node)) {
- EffectiveType = S->getDeducedType().getTypePtrOrNull();
- if (!EffectiveType)
- return false;
+ QualType DT = S->getDeducedType();
+ return !DT.isNull() ? matchesSpecialized(*DT, Finder, Builder) : false;
}
// First, for any types that have a declaration, extract the declaration and
// match on it.
- if (const auto *S = dyn_cast<TagType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<TagType>(&Node)) {
return matchesDecl(S->getDecl(), Finder, Builder);
}
- if (const auto *S = dyn_cast<InjectedClassNameType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<InjectedClassNameType>(&Node)) {
return matchesDecl(S->getDecl(), Finder, Builder);
}
- if (const auto *S = dyn_cast<TemplateTypeParmType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<TemplateTypeParmType>(&Node)) {
return matchesDecl(S->getDecl(), Finder, Builder);
}
- if (const auto *S = dyn_cast<TypedefType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<TypedefType>(&Node)) {
return matchesDecl(S->getDecl(), Finder, Builder);
}
- if (const auto *S = dyn_cast<UnresolvedUsingType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<UnresolvedUsingType>(&Node)) {
return matchesDecl(S->getDecl(), Finder, Builder);
}
- if (const auto *S = dyn_cast<ObjCObjectType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<ObjCObjectType>(&Node)) {
return matchesDecl(S->getInterface(), Finder, Builder);
}
@@ -1061,14 +1061,14 @@ private:
// template<typename T> struct X { T t; } class A {}; X<A> a;
// The following matcher will match, which otherwise would not:
// fieldDecl(hasType(pointerType())).
- if (const auto *S = dyn_cast<SubstTemplateTypeParmType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<SubstTemplateTypeParmType>(&Node)) {
return matchesSpecialized(S->getReplacementType(), Finder, Builder);
}
// For template specialization types, we want to match the template
// declaration, as long as the type is still dependent, and otherwise the
// declaration of the instantiated tag type.
- if (const auto *S = dyn_cast<TemplateSpecializationType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<TemplateSpecializationType>(&Node)) {
if (!S->isTypeAlias() && S->isSugared()) {
// If the template is non-dependent, we want to match the instantiated
// tag type.
@@ -1087,7 +1087,7 @@ private:
// FIXME: We desugar elaborated types. This makes the assumption that users
// do never want to match on whether a type is elaborated - there are
// arguments for both sides; for now, continue desugaring.
- if (const auto *S = dyn_cast<ElaboratedType>(EffectiveType)) {
+ if (const auto *S = dyn_cast<ElaboratedType>(&Node)) {
return matchesSpecialized(S->desugar(), Finder, Builder);
}
return false;
@@ -1175,7 +1175,8 @@ struct IsBaseType {
std::is_same<T, NestedNameSpecifier>::value ||
std::is_same<T, NestedNameSpecifierLoc>::value ||
std::is_same<T, CXXCtorInitializer>::value ||
- std::is_same<T, TemplateArgumentLoc>::value;
+ std::is_same<T, TemplateArgumentLoc>::value ||
+ std::is_same<T, Attr>::value;
};
template <typename T>
const bool IsBaseType<T>::value;
@@ -1185,7 +1186,7 @@ const bool IsBaseType<T>::value;
/// Useful for matchers like \c anything and \c unless.
using AllNodeBaseTypes =
TypeList<Decl, Stmt, NestedNameSpecifier, NestedNameSpecifierLoc, QualType,
- Type, TypeLoc, CXXCtorInitializer>;
+ Type, TypeLoc, CXXCtorInitializer, Attr>;
/// Helper meta-function to extract the argument out of a function of
/// type void(Arg).
@@ -1212,7 +1213,7 @@ template <class T, class Tuple> constexpr T *new_from_tuple(Tuple &&t) {
using AdaptativeDefaultFromTypes = AllNodeBaseTypes;
using AdaptativeDefaultToTypes =
TypeList<Decl, Stmt, NestedNameSpecifier, NestedNameSpecifierLoc, TypeLoc,
- QualType>;
+ QualType, Attr>;
/// All types that are supported by HasDeclarationMatcher above.
using HasDeclarationSupportedTypes =
@@ -2245,11 +2246,7 @@ public:
bool matchesNode(const T &Node) const override {
Optional<StringRef> OptOpName = getOpName(Node);
- if (!OptOpName)
- return false;
- return llvm::any_of(Names, [OpName = *OptOpName](const std::string &Name) {
- return Name == OpName;
- });
+ return OptOpName && llvm::is_contained(Names, *OptOpName);
}
private:
@@ -2304,6 +2301,26 @@ std::shared_ptr<llvm::Regex> createAndVerifyRegex(StringRef Regex,
llvm::Regex::RegexFlags Flags,
StringRef MatcherID);
+inline bool
+MatchTemplateArgLocAt(const DeclRefExpr &Node, unsigned int Index,
+ internal::Matcher<TemplateArgumentLoc> InnerMatcher,
+ internal::ASTMatchFinder *Finder,
+ internal::BoundNodesTreeBuilder *Builder) {
+ llvm::ArrayRef<TemplateArgumentLoc> ArgLocs = Node.template_arguments();
+ return Index < ArgLocs.size() &&
+ InnerMatcher.matches(ArgLocs[Index], Finder, Builder);
+}
+
+inline bool
+MatchTemplateArgLocAt(const TemplateSpecializationTypeLoc &Node,
+ unsigned int Index,
+ internal::Matcher<TemplateArgumentLoc> InnerMatcher,
+ internal::ASTMatchFinder *Finder,
+ internal::BoundNodesTreeBuilder *Builder) {
+ return !Node.isNull() && Index < Node.getNumArgs() &&
+ InnerMatcher.matches(Node.getArgLoc(Index), Finder, Builder);
+}
+
} // namespace internal
} // namespace ast_matchers
diff --git a/clang/include/clang/Analysis/Analyses/Dominators.h b/clang/include/clang/Analysis/Analyses/Dominators.h
index 25a5ba9d83fe..f588a5c7d1d7 100644
--- a/clang/include/clang/Analysis/Analyses/Dominators.h
+++ b/clang/include/clang/Analysis/Analyses/Dominators.h
@@ -202,7 +202,7 @@ struct ChildrenGetterTy<clang::CFGBlock, IsPostDom> {
auto Children = children<OrderedNodeTy>(N);
ChildrenTy Ret{Children.begin(), Children.end()};
- Ret.erase(std::remove(Ret.begin(), Ret.end(), nullptr), Ret.end());
+ llvm::erase_value(Ret, nullptr);
return Ret;
}
};
diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h
index 9e32eb8e066a..f9223fe58a27 100644
--- a/clang/include/clang/Analysis/CFG.h
+++ b/clang/include/clang/Analysis/CFG.h
@@ -1337,6 +1337,7 @@ public:
const CFGBlock * getIndirectGotoBlock() const { return IndirectGotoBlock; }
using try_block_iterator = std::vector<const CFGBlock *>::const_iterator;
+ using try_block_range = llvm::iterator_range<try_block_iterator>;
try_block_iterator try_blocks_begin() const {
return TryDispatchBlocks.begin();
@@ -1346,6 +1347,10 @@ public:
return TryDispatchBlocks.end();
}
+ try_block_range try_blocks() const {
+ return try_block_range(try_blocks_begin(), try_blocks_end());
+ }
+
void addTryDispatchBlock(const CFGBlock *block) {
TryDispatchBlocks.push_back(block);
}
diff --git a/clang/include/clang/Analysis/CloneDetection.h b/clang/include/clang/Analysis/CloneDetection.h
index db827c3a6d6f..0b86c7fd86dd 100644
--- a/clang/include/clang/Analysis/CloneDetection.h
+++ b/clang/include/clang/Analysis/CloneDetection.h
@@ -235,9 +235,7 @@ public:
static void filterGroups(
std::vector<CloneDetector::CloneGroup> &CloneGroups,
llvm::function_ref<bool(const CloneDetector::CloneGroup &)> Filter) {
- CloneGroups.erase(
- std::remove_if(CloneGroups.begin(), CloneGroups.end(), Filter),
- CloneGroups.end());
+ llvm::erase_if(CloneGroups, Filter);
}
/// Splits the given CloneGroups until the given Compare function returns true
diff --git a/clang/include/clang/Analysis/PathDiagnostic.h b/clang/include/clang/Analysis/PathDiagnostic.h
index 539aa20b8168..235d26083191 100644
--- a/clang/include/clang/Analysis/PathDiagnostic.h
+++ b/clang/include/clang/Analysis/PathDiagnostic.h
@@ -75,14 +75,8 @@ struct PathDiagnosticConsumerOptions {
bool ShouldSerializeStats = false;
/// If the consumer intends to produce multiple output files, should it
- /// use randomly generated file names for these files (with the tiny risk of
- /// having random collisions) or deterministic human-readable file names
- /// (with a larger risk of deterministic collisions or invalid characters
- /// in the file name). We should not really give this choice to the users
- /// because deterministic mode is always superior when done right, but
- /// for some consumers this mode is experimental and needs to be
- /// off by default.
- bool ShouldWriteStableReportFilename = false;
+ /// use a pseudo-random file name name or a human-readable file name.
+ bool ShouldWriteVerboseReportFilename = false;
/// Whether the consumer should treat consumed diagnostics as hard errors.
/// Useful for breaking your build when issues are found.
@@ -151,11 +145,14 @@ public:
/// Only runs visitors, no output generated.
None,
- /// Used for HTML, SARIF, and text output.
+ /// Used for SARIF and text output.
Minimal,
/// Used for plist output, used for "arrows" generation.
Extensive,
+
+ /// Used for HTML, shows both "arrows" and control notes.
+ Everything
};
virtual PathGenerationScheme getGenerationScheme() const { return Minimal; }
@@ -164,7 +161,11 @@ public:
return getGenerationScheme() != None;
}
- bool shouldAddPathEdges() const { return getGenerationScheme() == Extensive; }
+ bool shouldAddPathEdges() const { return getGenerationScheme() >= Extensive; }
+ bool shouldAddControlNotes() const {
+ return getGenerationScheme() == Minimal ||
+ getGenerationScheme() == Everything;
+ }
virtual bool supportsLogicalOpControlFlow() const { return false; }
@@ -552,7 +553,7 @@ public:
/// Return true if the diagnostic piece is prunable.
bool isPrunable() const {
- return IsPrunable.hasValue() ? IsPrunable.getValue() : false;
+ return IsPrunable.getValueOr(false);
}
void dump() const override;
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 12d09181a2ea..d8f0fcd56550 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -848,6 +848,7 @@ def Availability : InheritableAttr {
[{static llvm::StringRef getPrettyPlatformName(llvm::StringRef Platform) {
return llvm::StringSwitch<llvm::StringRef>(Platform)
.Case("android", "Android")
+ .Case("fuchsia", "Fuchsia")
.Case("ios", "iOS")
.Case("macos", "macOS")
.Case("tvos", "tvOS")
@@ -1835,6 +1836,22 @@ def BPFPreserveAccessIndex : InheritableAttr,
let LangOpts = [COnly];
}
+def BTFDeclTag : InheritableAttr {
+ let Spellings = [Clang<"btf_decl_tag">];
+ let Args = [StringArgument<"BTFDeclTag">];
+ let Subjects = SubjectList<[Var, Function, Record, Field, TypedefName],
+ ErrorDiag>;
+ let Documentation = [BTFDeclTagDocs];
+ let LangOpts = [COnly];
+}
+
+def BTFTypeTag : TypeAttr {
+ let Spellings = [Clang<"btf_type_tag">];
+ let Args = [StringArgument<"BTFTypeTag">];
+ let Documentation = [BTFTypeTagDocs];
+ let LangOpts = [COnly];
+}
+
def WebAssemblyExportName : InheritableAttr,
TargetSpecificAttr<TargetWebAssembly> {
let Spellings = [Clang<"export_name">];
@@ -1971,7 +1988,7 @@ def NoReturn : InheritableAttr {
def NoInstrumentFunction : InheritableAttr {
let Spellings = [GCC<"no_instrument_function">];
- let Subjects = SubjectList<[Function]>;
+ let Subjects = SubjectList<[Function, ObjCMethod]>;
let Documentation = [Undocumented];
let SimpleHandler = 1;
}
@@ -2940,6 +2957,13 @@ def NoSanitizeSpecific : InheritableAttr {
let ASTNode = 0;
}
+def DisableSanitizerInstrumentation : InheritableAttr {
+ let Spellings = [Clang<"disable_sanitizer_instrumentation">];
+ let Subjects = SubjectList<[Function, ObjCMethod, GlobalVar]>;
+ let Documentation = [DisableSanitizerInstrumentationDocs];
+ let SimpleHandler = 1;
+}
+
def CFICanonicalJumpTable : InheritableAttr {
let Spellings = [Clang<"cfi_canonical_jump_table">];
let Subjects = SubjectList<[Function], ErrorDiag>;
@@ -3659,7 +3683,8 @@ def OMPAllocateDecl : InheritableAttr {
"OMPCGroupMemAlloc", "OMPPTeamMemAlloc", "OMPThreadMemAlloc",
"OMPUserDefinedMemAlloc"
]>,
- ExprArgument<"Allocator">
+ ExprArgument<"Allocator">,
+ ExprArgument<"Alignment">
];
let Documentation = [Undocumented];
}
@@ -3674,6 +3699,11 @@ def OMPDeclareVariant : InheritableAttr {
let Args = [
ExprArgument<"VariantFuncRef">,
OMPTraitInfoArgument<"TraitInfos">,
+ VariadicExprArgument<"AdjustArgsNothing">,
+ VariadicExprArgument<"AdjustArgsNeedDevicePtr">,
+ VariadicEnumArgument<"AppendArgs", "InteropType",
+ ["target", "targetsync", "target,targetsync"],
+ ["Target", "TargetSync", "Target_TargetSync"]>
];
let AdditionalMembers = [{
OMPTraitInfo &getTraitInfo() { return *traitInfos; }
@@ -3823,3 +3853,12 @@ def EnforceTCBLeaf : InheritableAttr {
let Documentation = [EnforceTCBLeafDocs];
bit InheritEvenIfAlreadyPresent = 1;
}
+
+def Error : InheritableAttr {
+ let Spellings = [GCC<"error">, GCC<"warning">];
+ let Accessors = [Accessor<"isError", [GCC<"error">]>,
+ Accessor<"isWarning", [GCC<"warning">]>];
+ let Args = [StringArgument<"UserDiagnostic">];
+ let Subjects = SubjectList<[Function], ErrorDiag>;
+ let Documentation = [ErrorAttrDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index c265a877e3b1..e7afb3699eb1 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -22,7 +22,7 @@
// Windows (from within the clang\docs directory):
// make.bat html
// Non-Windows (from within the clang\docs directory):
-// make -f Makefile.sphinx html
+// sphinx-build -b html _build/html
def GlobalDocumentation {
code Intro =[{..
@@ -2011,6 +2011,34 @@ preserving struct or union member access debuginfo indices of this
struct or union, similar to clang ``__builtin_preserve_access_index()``.
}];
}
+def BTFDeclTagDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+Clang supports the ``__attribute__((btf_decl_tag("ARGUMENT")))`` attribute for
+all targets. This attribute may be attached to a struct/union, struct/union
+field, function, function parameter, variable or typedef declaration. If -g is
+specified, the ``ARGUMENT`` info will be preserved in IR and be emitted to
+dwarf. For BPF targets, the ``ARGUMENT`` info will be emitted to .BTF ELF
+section too.
+ }];
+}
+
+def BTFTypeTagDocs : Documentation {
+ let Category = DocCatType;
+ let Content = [{
+Clang supports the ``__attribute__((btf_type_tag("ARGUMENT")))`` attribute for
+all targets. It only has effect when ``-g`` is specified on the command line and
+is currently silently ignored when not applied to a pointer type (note: this
+scenario may be diagnosed in the future).
+
+The ``ARGUMENT`` string will be preserved in IR and emitted to DWARF for the
+types used in variable declarations, function declarations, or typedef
+declarations.
+
+For BPF targets, the ``ARGUMENT`` string will also be emitted to .BTF ELF
+section.
+ }];
+}
def MipsInterruptDocs : Documentation {
let Category = DocCatFunction;
@@ -2592,6 +2620,18 @@ full list of supported sanitizer flags.
}];
}
+def DisableSanitizerInstrumentationDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+Use the ``disable_sanitizer_instrumentation`` attribute on a function,
+Objective-C method, or global variable, to specify that no sanitizer
+instrumentation should be applied.
+
+This is not the same as ``__attribute__((no_sanitize(...)))``, which depending
+on the tool may still insert instrumentation to prevent false positive reports.
+ }];
+}
+
def NoSanitizeAddressDocs : Documentation {
let Category = DocCatFunction;
// This function has multiple distinct spellings, and so it requires a custom
@@ -5859,19 +5899,21 @@ def AcquireHandleDocs : Documentation {
If this annotation is on a function or a function type it is assumed to return
a new handle. In case this annotation is on an output parameter,
the function is assumed to fill the corresponding argument with a new
-handle.
+handle. The attribute requires a string literal argument which used to
+identify the handle with later uses of ``use_handle`` or
+``release_handle``.
.. code-block:: c++
// Output arguments from Zircon.
zx_status_t zx_socket_create(uint32_t options,
- zx_handle_t __attribute__((acquire_handle)) * out0,
- zx_handle_t* out1 [[clang::acquire_handle]]);
+ zx_handle_t __attribute__((acquire_handle("zircon"))) * out0,
+ zx_handle_t* out1 [[clang::acquire_handle("zircon")]]);
// Returned handle.
- [[clang::acquire_handle]] int open(const char *path, int oflag, ... );
- int open(const char *path, int oflag, ... ) __attribute__((acquire_handle));
+ [[clang::acquire_handle("tag")]] int open(const char *path, int oflag, ... );
+ int open(const char *path, int oflag, ... ) __attribute__((acquire_handle("tag")));
}];
}
@@ -5879,12 +5921,13 @@ def UseHandleDocs : Documentation {
let Category = HandleDocs;
let Content = [{
A function taking a handle by value might close the handle. If a function
-parameter is annotated with ``use_handle`` it is assumed to not to change
+parameter is annotated with ``use_handle(tag)`` it is assumed to not to change
the state of the handle. It is also assumed to require an open handle to work with.
+The attribute requires a string literal argument to identify the handle being used.
.. code-block:: c++
- zx_status_t zx_port_wait(zx_handle_t handle [[clang::use_handle]],
+ zx_status_t zx_port_wait(zx_handle_t handle [[clang::use_handle("zircon")]],
zx_time_t deadline,
zx_port_packet_t* packet);
}];
@@ -5893,12 +5936,13 @@ the state of the handle. It is also assumed to require an open handle to work wi
def ReleaseHandleDocs : Documentation {
let Category = HandleDocs;
let Content = [{
-If a function parameter is annotated with ``release_handle`` it is assumed to
-close the handle. It is also assumed to require an open handle to work with.
+If a function parameter is annotated with ``release_handle(tag)`` it is assumed to
+close the handle. It is also assumed to require an open handle to work with. The
+attribute requires a string literal argument to identify the handle being released.
.. code-block:: c++
- zx_status_t zx_handle_close(zx_handle_t handle [[clang::release_handle]]);
+ zx_status_t zx_handle_close(zx_handle_t handle [[clang::release_handle("tag")]]);
}];
}
@@ -6045,3 +6089,29 @@ def EnforceTCBLeafDocs : Documentation {
- ``enforce_tcb_leaf(Name)`` indicates that this function is a part of the TCB named ``Name``
}];
}
+
+def ErrorAttrDocs : Documentation {
+ let Category = DocCatFunction;
+ let Heading = "error, warning";
+ let Content = [{
+The ``error`` and ``warning`` function attributes can be used to specify a
+custom diagnostic to be emitted when a call to such a function is not
+eliminated via optimizations. This can be used to create compile time
+assertions that depend on optimizations, while providing diagnostics
+pointing to precise locations of the call site in the source.
+
+.. code-block:: c++
+
+ __attribute__((warning("oh no"))) void dontcall();
+ void foo() {
+ if (someCompileTimeAssertionThatsTrue)
+ dontcall(); // Warning
+
+ dontcall(); // Warning
+
+ if (someCompileTimeAssertionThatsFalse)
+ dontcall(); // No Warning
+ sizeof(dontcall()); // No Warning
+ }
+ }];
+}
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 0e3898537bcf..b05777889e79 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -80,9 +80,7 @@
// builtin even if type doesn't match signature, and don't warn if we
// can't be sure the type is right
// F -> this is a libc/libm function with a '__builtin_' prefix added.
-// f -> this is a libc/libm function without the '__builtin_' prefix. It can
-// be followed by ':headername:' to state which header this function
-// comes from.
+// f -> this is a libc/libm function without the '__builtin_' prefix.
// h -> this function requires a specific header or an explicit declaration.
// i -> this is a runtime library implemented function without the
// '__builtin_' prefix. It will be implemented in compiler-rt or libgcc.
@@ -645,6 +643,12 @@ BUILTIN(__builtin_alloca, "v*z" , "Fn")
BUILTIN(__builtin_alloca_with_align, "v*zIz", "Fn")
BUILTIN(__builtin_call_with_static_chain, "v.", "nt")
+BUILTIN(__builtin_elementwise_abs, "v.", "nct")
+BUILTIN(__builtin_elementwise_max, "v.", "nct")
+BUILTIN(__builtin_elementwise_min, "v.", "nct")
+BUILTIN(__builtin_reduce_max, "v.", "nct")
+BUILTIN(__builtin_reduce_min, "v.", "nct")
+
BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
BUILTIN(__builtin_matrix_column_major_store, "v.", "nFt")
@@ -794,6 +798,7 @@ ATOMIC_BUILTIN(__c11_atomic_fetch_sub, "v.", "t")
ATOMIC_BUILTIN(__c11_atomic_fetch_and, "v.", "t")
ATOMIC_BUILTIN(__c11_atomic_fetch_or, "v.", "t")
ATOMIC_BUILTIN(__c11_atomic_fetch_xor, "v.", "t")
+ATOMIC_BUILTIN(__c11_atomic_fetch_nand, "v.", "t")
ATOMIC_BUILTIN(__c11_atomic_fetch_max, "v.", "t")
ATOMIC_BUILTIN(__c11_atomic_fetch_min, "v.", "t")
BUILTIN(__c11_atomic_thread_fence, "vi", "n")
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 1dac5d2371d4..634bcaed20a6 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -243,6 +243,9 @@ TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAG
TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__mulh, "SLLiSLLiSLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
#undef BUILTIN
#undef LANGBUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 3570431d952c..2e1d3c7ccbff 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -196,6 +196,19 @@ TARGET_BUILTIN(__builtin_amdgcn_perm, "UiUiUiUi", "nc", "gfx8-insts")
TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f64, "dd*0d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmin_f64, "dd*0d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
+
//===----------------------------------------------------------------------===//
// Deep learning builtins.
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 3c96900136a4..025fef05c8e0 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -39,7 +39,13 @@
#pragma push_macro("PTX70")
#pragma push_macro("PTX71")
#pragma push_macro("PTX72")
-#define PTX72 "ptx72"
+#pragma push_macro("PTX73")
+#pragma push_macro("PTX74")
+#pragma push_macro("PTX75")
+#define PTX75 "ptx75"
+#define PTX74 "ptx74|" PTX75
+#define PTX73 "ptx73|" PTX74
+#define PTX72 "ptx72|" PTX73
#define PTX71 "ptx71|" PTX72
#define PTX70 "ptx70|" PTX71
#define PTX65 "ptx65|" PTX70
@@ -683,6 +689,12 @@ BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
+// Address space predicates.
+BUILTIN(__nvvm_isspacep_const, "bvC*", "nc")
+BUILTIN(__nvvm_isspacep_global, "bvC*", "nc")
+BUILTIN(__nvvm_isspacep_local, "bvC*", "nc")
+BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc")
+
// Builtins to support WMMA instructions on sm_70
TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
@@ -815,3 +827,6 @@ TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
#pragma pop_macro("PTX70")
#pragma pop_macro("PTX71")
#pragma pop_macro("PTX72")
+#pragma pop_macro("PTX73")
+#pragma pop_macro("PTX74")
+#pragma pop_macro("PTX75")
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index dfe97af300f4..cd6b2df10e52 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -74,8 +74,8 @@ BUILTIN(__builtin_ppc_fetch_and_swap, "UiUiD*Ui", "")
BUILTIN(__builtin_ppc_fetch_and_swaplp, "ULiULiD*ULi", "")
BUILTIN(__builtin_ppc_ldarx, "LiLiD*", "")
BUILTIN(__builtin_ppc_lwarx, "iiD*", "")
-BUILTIN(__builtin_ppc_lharx, "isD*", "")
-BUILTIN(__builtin_ppc_lbarx, "UiUcD*", "")
+BUILTIN(__builtin_ppc_lharx, "ssD*", "")
+BUILTIN(__builtin_ppc_lbarx, "ccD*", "")
BUILTIN(__builtin_ppc_stdcx, "iLiD*Li", "")
BUILTIN(__builtin_ppc_stwcx, "iiD*i", "")
BUILTIN(__builtin_ppc_sthcx, "isD*s", "")
@@ -96,6 +96,13 @@ BUILTIN(__builtin_ppc_swdiv_nochk, "ddd", "")
BUILTIN(__builtin_ppc_swdivs_nochk, "fff", "")
BUILTIN(__builtin_ppc_alignx, "vIivC*", "nc")
BUILTIN(__builtin_ppc_rdlam, "UWiUWiUWiUWIi", "nc")
+BUILTIN(__builtin_ppc_compare_exp_uo, "idd", "")
+BUILTIN(__builtin_ppc_compare_exp_lt, "idd", "")
+BUILTIN(__builtin_ppc_compare_exp_gt, "idd", "")
+BUILTIN(__builtin_ppc_compare_exp_eq, "idd", "")
+BUILTIN(__builtin_ppc_test_data_class, "idIi", "t")
+BUILTIN(__builtin_ppc_swdiv, "ddd", "")
+BUILTIN(__builtin_ppc_swdivs, "fff", "")
// Compare
BUILTIN(__builtin_ppc_cmpeqb, "LLiLLiLLi", "")
BUILTIN(__builtin_ppc_cmprb, "iCIiii", "")
@@ -110,11 +117,11 @@ BUILTIN(__builtin_ppc_maddhd, "LLiLLiLLiLLi", "")
BUILTIN(__builtin_ppc_maddhdu, "ULLiULLiULLiULLi", "")
BUILTIN(__builtin_ppc_maddld, "LLiLLiLLiLLi", "")
// Rotate
-BUILTIN(__builtin_ppc_rlwnm, "UiUiIUiIUi", "")
+BUILTIN(__builtin_ppc_rlwnm, "UiUiUiIUi", "")
BUILTIN(__builtin_ppc_rlwimi, "UiUiUiIUiIUi", "")
BUILTIN(__builtin_ppc_rldimi, "ULLiULLiULLiIUiIULLi", "")
// load
-BUILTIN(__builtin_ppc_load2r, "UiUs*", "")
+BUILTIN(__builtin_ppc_load2r, "UsUs*", "")
BUILTIN(__builtin_ppc_load4r, "UiUi*", "")
BUILTIN(__builtin_ppc_load8r, "ULLiULLi*", "")
// store
@@ -144,6 +151,7 @@ BUILTIN(__builtin_ppc_mfspr, "ULiIi", "")
BUILTIN(__builtin_ppc_mtmsr, "vUi", "")
BUILTIN(__builtin_ppc_mtspr, "vIiULi", "")
BUILTIN(__builtin_ppc_stfiw, "viC*d", "")
+BUILTIN(__builtin_ppc_addex, "LLiLLiLLiCIi", "")
BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n")
@@ -391,6 +399,7 @@ BUILTIN(__builtin_altivec_vcmpgtfp_p, "iiV4fV4f", "")
BUILTIN(__builtin_altivec_vgbbd, "V16UcV16Uc", "")
BUILTIN(__builtin_altivec_vbpermq, "V2ULLiV16UcV16Uc", "")
+BUILTIN(__builtin_altivec_vbpermd, "V2ULLiV2ULLiV16Uc", "")
// P8 Crypto built-ins.
BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "")
@@ -771,6 +780,10 @@ BUILTIN(__builtin_cfuged, "ULLiULLiULLi", "")
BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "")
BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "")
+// Double-double (un)pack
+BUILTIN(__builtin_unpack_longdouble, "dLdIi", "")
+BUILTIN(__builtin_pack_longdouble, "Lddd", "")
+
// Generate random number
BUILTIN(__builtin_darn, "LLi", "")
BUILTIN(__builtin_darn_raw, "LLi", "")
@@ -812,18 +825,20 @@ BUILTIN(__builtin_dcbf, "vvC*", "")
// its given accumulator.
// Provided builtins with _mma_ prefix for compatibility.
-CUSTOM_BUILTIN(mma_lxvp, vsx_lxvp, "W256SLLiW256C*", false)
-CUSTOM_BUILTIN(mma_stxvp, vsx_stxvp, "vW256SLLiW256C*", false)
+CUSTOM_BUILTIN(mma_lxvp, vsx_lxvp, "W256SLiW256C*", false)
+CUSTOM_BUILTIN(mma_stxvp, vsx_stxvp, "vW256SLiW256C*", false)
CUSTOM_BUILTIN(mma_assemble_pair, vsx_assemble_pair, "vW256*VV", false)
CUSTOM_BUILTIN(mma_disassemble_pair, vsx_disassemble_pair, "vv*W256*", false)
+CUSTOM_BUILTIN(vsx_build_pair, vsx_assemble_pair, "vW256*VV", false)
+CUSTOM_BUILTIN(mma_build_acc, mma_assemble_acc, "vW512*VVVV", false)
// UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have
// the same name as that of the intrinsic they generate, i.e. the
// ID and INTR are the same.
// This avoids repeating the ID and INTR in the macro expression.
-UNALIASED_CUSTOM_BUILTIN(vsx_lxvp, "W256SLLiW256C*", false)
-UNALIASED_CUSTOM_BUILTIN(vsx_stxvp, "vW256SLLiW256C*", false)
+UNALIASED_CUSTOM_BUILTIN(vsx_lxvp, "W256SLiW256C*", false)
+UNALIASED_CUSTOM_BUILTIN(vsx_stxvp, "vW256SLiW256C*", false)
UNALIASED_CUSTOM_BUILTIN(vsx_assemble_pair, "vW256*VV", false)
UNALIASED_CUSTOM_BUILTIN(vsx_disassemble_pair, "vv*W256*", false)
diff --git a/clang/include/clang/Basic/BuiltinsRISCV.def b/clang/include/clang/Basic/BuiltinsRISCV.def
index b2b4950f92bd..06560415e686 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.def
+++ b/clang/include/clang/Basic/BuiltinsRISCV.def
@@ -15,8 +15,6 @@
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif
-#include "clang/Basic/riscv_vector_builtins.inc"
-
// Zbb extension
TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "experimental-zbb")
TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "experimental-zbb,64bit")
diff --git a/clang/include/clang/Basic/BuiltinsRISCVVector.def b/clang/include/clang/Basic/BuiltinsRISCVVector.def
new file mode 100644
index 000000000000..008cb939a30b
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsRISCVVector.def
@@ -0,0 +1,21 @@
+//==- BuiltinsRISCVVector.def - RISC-V Vector Builtin Database ---*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RISC-V-specific builtin function database. Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
+# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
+#endif
+
+#include "clang/Basic/riscv_vector_builtins.inc"
+
+#undef BUILTIN
+#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 04ec45aa3b74..057d968e9bca 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -119,18 +119,22 @@ TARGET_BUILTIN(__builtin_wasm_all_true_i16x8, "iV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_all_true_i32x4, "iV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_all_true_i64x2, "iV2LLi", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_bitmask_i8x16, "iV16Sc", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "iV8s", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "iV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "iV2LLi", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_bitmask_i8x16, "UiV16Sc", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
@@ -157,5 +161,28 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
+// Relaxed SIMD builtins (experimental)
+TARGET_BUILTIN(__builtin_wasm_fma_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_fms_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_fma_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_fms_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
+
+TARGET_BUILTIN(__builtin_wasm_laneselect_i8x16, "V16ScV16ScV16ScV16Sc", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_laneselect_i16x8, "V8sV8sV8sV8s", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_laneselect_i32x4, "V4iV4iV4iV4i", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_laneselect_i64x2, "V2LLiV2LLiV2LLiV2LLi", "nc", "relaxed-simd")
+
+TARGET_BUILTIN(__builtin_wasm_relaxed_swizzle_i8x16, "V16ScV16ScV16Sc", "nc", "relaxed-simd")
+
+TARGET_BUILTIN(__builtin_wasm_relaxed_min_f32x4, "V4fV4fV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_relaxed_max_f32x4, "V4fV4fV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_relaxed_min_f64x2, "V2dV2dV2d", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_relaxed_max_f64x2, "V2dV2dV2d", "nc", "relaxed-simd")
+
+TARGET_BUILTIN(__builtin_wasm_relaxed_trunc_s_i32x4_f32x4, "V4iV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_relaxed_trunc_u_i32x4_f32x4, "V4UiV4f", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2, "V4iV2d", "nc", "relaxed-simd")
+TARGET_BUILTIN(__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2, "V4UiV2d", "nc", "relaxed-simd")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 18e541fe9cb5..bc6208be4560 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -421,9 +421,9 @@ TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2
TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "crc32")
+TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "crc32")
+TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "crc32")
// SSE4a
TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")
@@ -1849,6 +1849,203 @@ TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:",
TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
+// AVX512 fp16 intrinsics
+TARGET_BUILTIN(__builtin_ia32_vcomish, "iV8xV8xIiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_addph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_subph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_mulph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_divph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_maxph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_minph512, "V32xV32xV32xIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_minph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_minph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_maxph256, "V16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_maxph128, "V8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_addsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_divsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_mulsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_subsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_maxsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_minsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_cmpph512_mask, "UiV32xV32xIiUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rcpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rcpph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rsqrtph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rsqrtph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rsqrtph512_mask, "V32xV32xV32xUi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_getmantph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getmantph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getmantph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_getexpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexpph256_mask, "V16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexpph512_mask, "V32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_scalefph128_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefph256_mask, "V16xV16xV16xV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_rndscaleph_128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscaleph_256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscaleph_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduceph128_mask, "V8xV8xIiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_reduceph256_mask, "V16xV16xIiV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_reduceph512_mask, "V32xV32xIiV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rcpsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rsqrtsh_mask, "V8xV8xV8xV8xUc", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_getmantsh_round_mask, "V8xV8xV8xIiV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_getexpsh128_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_scalefsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_rndscalesh_round_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reducesh_mask, "V8xV8xV8xV8xUcIiIi", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_sqrtph, "V8xV8x", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_sqrtph256, "V16xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_sqrtph512, "V32xV32xIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_sqrtsh_round_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_fpclassph128_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclassph256_mask, "UsV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclassph512_mask, "UiV32xIiUi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_fpclasssh_mask, "UcV8xIiUc", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph128_mask, "V8xV2dV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_mask, "V8xV4dV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph512_mask, "V8xV8dV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd128_mask, "V2dV8xV2dUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_mask, "V4dV8xV4dUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd512_mask, "V8dV8xV8dUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2ss_round_mask, "V4fV4fV8xV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2sh_round_mask, "V8xV8xV4fV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2sh_round_mask, "V8xV8xV2dV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2sd_round_mask, "V2dV2dV8xV2dUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w128_mask, "V8sV8xV8sUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_mask, "V16sV16xV16sUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w512_mask, "V32sV32xV32sUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph128_mask, "V8xV8sV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_mask, "V16xV16sV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph512_mask, "V32xV32sV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw128_mask, "V8UsV8xV8UsUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_mask, "V16UsV16xV16UsUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw512_mask, "V32UsV32xV32UsUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph128_mask, "V8xV8UsV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_mask, "V16xV16UsV16xUs", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph512_mask, "V32xV32UsV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph128_mask, "V8xV4iV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_mask, "V8xV8iV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph512_mask, "V16xV16iV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph128_mask, "V8xV4UiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_mask, "V8xV8UiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph512_mask, "V16xV16UiV16xUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq128_mask, "V4iV8xV4iUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_mask, "V8iV8xV8iUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq512_mask, "V16iV16xV16iUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq128_mask, "V4UiV8xV4UiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_mask, "V8UiV8xV8UiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq512_mask, "V16UiV16xV16UiUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph128_mask, "V8xV2OiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_mask, "V8xV4OiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph512_mask, "V8xV8OiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph128_mask, "V8xV2UOiV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_mask, "V8xV4UOiV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph512_mask, "V8xV8UOiV8xUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq128_mask, "V2OiV8xV2OiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_mask, "V4OiV8xV4OiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq512_mask, "V8OiV8xV8OiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq128_mask, "V2UOiV8xV2UOiUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_mask, "V4UOiV8xV4UOiUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq512_mask, "V8UOiV8xV8UOiUcIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtusi2sh, "V8xV8xUiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsi2sh, "V8xV8xiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2si32, "iV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi32, "UiV8xIi", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx128_mask, "V4fV8xV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_mask, "V8fV8xV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx512_mask, "V16fV16xV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx128_mask, "V8xV4fV8xUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_mask, "V8xV8fV8xUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx512_mask, "V16xV16fV16xUsIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph, "V8xV8xV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256, "V16xV16xV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_maskz, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmsubph512_mask3, "V32xV32xV32xV32xUiIi", "ncV:512:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_maskz, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsh3_mask3, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmsubsh3_mask3, "V8xV8xV8xV8xUcIi", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph128_maskz, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_maskz, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_maskz, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_maskz, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+
+TARGET_BUILTIN(__builtin_ia32_vfmulcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmulcph128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmulcph256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmulcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcph128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+
// generic select intrinsics
TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
@@ -1859,6 +2056,9 @@ TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512
TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectph_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectph_256, "V16xUsV16xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectph_512, "V32xUiV32xV32x", "ncV:512:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2OiUcV2OiV2Oi", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4OiUcV4OiV4Oi", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8OiUcV8OiV8Oi", "ncV:512:", "avx512f")
@@ -1868,6 +2068,7 @@ TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx51
TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectsh_128, "V8xUcV8xV8x", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_selectss_128, "V4fUcV4fV4f", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f")
@@ -1878,12 +2079,24 @@ TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_pd512, "dV8d", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ps512, "fV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph512, "xV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmax_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_pd512, "dV8d", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ps512, "fV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph512, "xV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph256, "xV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmin_ph128, "xV8x", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph512, "xxV32x", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp16,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index ce2b1decdf6c..c3b9703a9cc6 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -44,7 +44,7 @@ TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "OiV2d", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2")
TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "crc32")
TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx")
TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")
@@ -92,6 +92,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtusi642sh, "V8xV8xUOiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvtsi642sh, "V8xV8xOiIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2si64, "OiV8xIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vcvttsh2usi64, "UOiV8xIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
// UINTR
diff --git a/clang/include/clang/Basic/CLWarnings.h b/clang/include/clang/Basic/CLWarnings.h
new file mode 100644
index 000000000000..e3351f430c43
--- /dev/null
+++ b/clang/include/clang/Basic/CLWarnings.h
@@ -0,0 +1,26 @@
+//===--- CLWarnings.h - Maps some cl.exe warning ids -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_CLWARNINGS_H
+#define LLVM_CLANG_BASIC_CLWARNINGS_H
+
+#include "llvm/ADT/Optional.h"
+
+namespace clang {
+
+namespace diag {
+enum class Group;
+}
+
+/// For cl.exe warning IDs that cleany map to clang diagnostic groups,
+/// returns the corresponding group. Else, returns an empty Optional.
+llvm::Optional<diag::Group> diagGroupFromCLWarningID(unsigned);
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_BASIC_CLWARNINGS_H
diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h
index 8577475fab06..c751b6a005e2 100644
--- a/clang/include/clang/Basic/CharInfo.h
+++ b/clang/include/clang/Basic/CharInfo.h
@@ -43,10 +43,15 @@ LLVM_READNONE inline bool isASCII(char c) {
return static_cast<unsigned char>(c) <= 127;
}
+LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; }
+
+/// Returns true if this is an ASCII character.
+LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; }
+
/// Returns true if this is a valid first character of a C identifier,
/// which is [a-zA-Z_].
-LLVM_READONLY inline bool isIdentifierHead(unsigned char c,
- bool AllowDollar = false) {
+LLVM_READONLY inline bool isAsciiIdentifierStart(unsigned char c,
+ bool AllowDollar = false) {
using namespace charinfo;
if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER))
return true;
@@ -55,8 +60,8 @@ LLVM_READONLY inline bool isIdentifierHead(unsigned char c,
/// Returns true if this is a body character of a C identifier,
/// which is [a-zA-Z0-9_].
-LLVM_READONLY inline bool isIdentifierBody(unsigned char c,
- bool AllowDollar = false) {
+LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c,
+ bool AllowDollar = false) {
using namespace charinfo;
if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER))
return true;
@@ -181,13 +186,13 @@ LLVM_READONLY inline char toUppercase(char c) {
///
/// Note that this is a very simple check; it does not accept UCNs as valid
/// identifier characters.
-LLVM_READONLY inline bool isValidIdentifier(StringRef S,
- bool AllowDollar = false) {
- if (S.empty() || !isIdentifierHead(S[0], AllowDollar))
+LLVM_READONLY inline bool isValidAsciiIdentifier(StringRef S,
+ bool AllowDollar = false) {
+ if (S.empty() || !isAsciiIdentifierStart(S[0], AllowDollar))
return false;
for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I)
- if (!isIdentifierBody(*I, AllowDollar))
+ if (!isAsciiIdentifierContinue(*I, AllowDollar))
return false;
return true;
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index e3202cf88756..94b3003a9c33 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -54,6 +54,7 @@ CODEGENOPT(UniqueBasicBlockSectionNames, 1, 1) ///< Set for -funique-basic-block
CODEGENOPT(EnableAIXExtendedAltivecABI, 1, 0) ///< Set for -mabi=vec-extabi. Enables the extended Altivec ABI on AIX.
ENUM_CODEGENOPT(FramePointer, FramePointerKind, 2, FramePointerKind::None) /// frame-pointer: all,non-leaf,none
+CODEGENOPT(ClearASTBeforeBackend , 1, 0) ///< Free the AST before running backend code generation. Only works with -disable-free.
CODEGENOPT(DisableFree , 1, 0) ///< Don't free memory.
CODEGENOPT(DiscardValueNames , 1, 0) ///< Discard Value Names from the IR (LLVMContext flag)
CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get
@@ -91,6 +92,8 @@ CODEGENOPT(EmulatedTLS , 1, 0) ///< Set by default or -f[no-]emulated-tls.
CODEGENOPT(ExplicitEmulatedTLS , 1, 0) ///< Set if -f[no-]emulated-tls is used.
/// Embed Bitcode mode (off/all/bitcode/marker).
ENUM_CODEGENOPT(EmbedBitcode, EmbedBitcodeKind, 2, Embed_Off)
+/// Inline asm dialect, -masm=(att|intel)
+ENUM_CODEGENOPT(InlineAsmDialect, InlineAsmDialectKind, 1, IAD_ATT)
CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables
///< are required.
CODEGENOPT(FunctionSections , 1, 0) ///< Set when -ffunction-sections is enabled.
@@ -188,6 +191,7 @@ CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
/// Replace certain message sends with calls to ObjC runtime entrypoints
CODEGENOPT(ObjCConvertMessagesToRuntimeCalls , 1, 1)
+CODEGENOPT(ObjCAvoidHeapifyLocalBlocks, 1, 0)
VALUE_CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
VALUE_CODEGENOPT(OptimizeSize, 2, 0) ///< If -Os (==1) or -Oz (==2) is specified.
@@ -259,6 +263,8 @@ CODEGENOPT(SanitizeCoverageInlineBoolFlag, 1, 0) ///< Use inline bool flag.
CODEGENOPT(SanitizeCoveragePCTable, 1, 0) ///< Create a PC Table.
CODEGENOPT(SanitizeCoverageNoPrune, 1, 0) ///< Disable coverage pruning.
CODEGENOPT(SanitizeCoverageStackDepth, 1, 0) ///< Enable max stack depth tracing
+CODEGENOPT(SanitizeCoverageTraceLoads, 1, 0) ///< Enable tracing of loads.
+CODEGENOPT(SanitizeCoverageTraceStores, 1, 0) ///< Enable tracing of stores.
CODEGENOPT(SanitizeStats , 1, 0) ///< Collect statistics for sanitizers.
CODEGENOPT(SimplifyLibCalls , 1, 1) ///< Set when -fbuiltin is enabled.
CODEGENOPT(SoftFloat , 1, 0) ///< -soft-float.
@@ -274,7 +280,7 @@ VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (i
CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled.
CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled.
CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled.
-CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables.
+VALUE_CODEGENOPT(UnwindTables, 2, 0) ///< Unwind tables (1) or asynchronous unwind tables (2)
CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
@@ -293,6 +299,8 @@ CODEGENOPT(StackRealignment , 1, 0) ///< Control whether to force stack
///< realignment.
CODEGENOPT(UseInitArray , 1, 0) ///< Control whether to use .init_array or
///< .ctors.
+VALUE_CODEGENOPT(LoopAlignment , 32, 0) ///< Overrides default loop
+ ///< alignment, if not 0.
VALUE_CODEGENOPT(StackAlignment , 32, 0) ///< Overrides default stack
///< alignment, if not 0.
VALUE_CODEGENOPT(StackProbeSize , 32, 4096) ///< Overrides default stack
@@ -317,6 +325,12 @@ CODEGENOPT(DebugFwdTemplateParams, 1, 0) ///< Whether to emit complete
///< template parameter descriptions in
///< forward declarations (versus just
///< including them in the name).
+ENUM_CODEGENOPT(DebugSimpleTemplateNames, codegenoptions::DebugTemplateNamesKind, 2, codegenoptions::DebugTemplateNamesKind::Full) ///< Whether to emit template parameters
+ ///< in the textual names of template
+ ///< specializations.
+ ///< Implies DebugFwdTemplateNames to
+ ///< allow decorated names to be
+ ///< reconstructed when needed.
CODEGENOPT(EmitLLVMUseLists, 1, 0) ///< Control whether to serialize use-lists.
CODEGENOPT(WholeProgramVTables, 1, 0) ///< Whether to apply whole-program
@@ -437,6 +451,14 @@ CODEGENOPT(AAPCSBitfieldWidth, 1, 1)
/// propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU Only)
CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1)
+// Whether to emit Swift Async function extended frame information: auto,
+// never, always.
+ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2,
+ SwiftAsyncFramePointerKind::Always)
+
+/// Whether to skip RAX setup when passing variable arguments (x86 only).
+CODEGENOPT(SkipRaxSetup, 1, 0)
+
#undef CODEGENOPT
#undef ENUM_CODEGENOPT
#undef VALUE_CODEGENOPT
diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 617c255641ef..664e4998b8de 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -97,6 +97,11 @@ public:
Embed_Marker // Embed a marker as a placeholder for bitcode.
};
+ enum InlineAsmDialectKind {
+ IAD_ATT,
+ IAD_Intel,
+ };
+
// This field stores one of the allowed values for the option
// -fbasic-block-sections=. The allowed values with this option are:
// {"labels", "all", "list=<file>", "none"}.
@@ -125,6 +130,13 @@ public:
All, // Keep all frame pointers.
};
+ enum class SwiftAsyncFramePointerKind {
+ Auto, // Choose Swift async extended frame info based on deployment target.
+ Always, // Unconditionally emit Swift async extended frame info.
+ Never, // Don't emit Swift async extended frame info.
+ Default = Always,
+ };
+
enum FiniteLoopsKind {
Language, // Not specified, use language standard.
Always, // All loops are assumed to be finite.
@@ -456,7 +468,8 @@ public:
// Check if any one of SanitizeCoverage* is enabled.
bool hasSanitizeCoverage() const {
return SanitizeCoverageType || SanitizeCoverageIndirectCalls ||
- SanitizeCoverageTraceCmp;
+ SanitizeCoverageTraceCmp || SanitizeCoverageTraceLoads ||
+ SanitizeCoverageTraceStores;
}
};
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index aa12724cbf0c..8c08ab3f5d74 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -31,8 +31,13 @@ enum class CudaVersion {
CUDA_110,
CUDA_111,
CUDA_112,
- LATEST = CUDA_112,
- LATEST_SUPPORTED = CUDA_101,
+ CUDA_113,
+ CUDA_114,
+ CUDA_115,
+ FULLY_SUPPORTED = CUDA_115,
+ PARTIALLY_SUPPORTED =
+ CUDA_115, // Partially supported. Proceed with a warning.
+ NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
// Input is "Major.Minor"
diff --git a/clang/include/clang/Basic/DebugInfoOptions.h b/clang/include/clang/Basic/DebugInfoOptions.h
index c1259d7797db..a99a2b5903d7 100644
--- a/clang/include/clang/Basic/DebugInfoOptions.h
+++ b/clang/include/clang/Basic/DebugInfoOptions.h
@@ -54,6 +54,12 @@ enum DebugInfoKind {
UnusedTypeInfo,
};
+enum class DebugTemplateNamesKind {
+ Full,
+ Simple,
+ Mangled
+};
+
} // end namespace codegenoptions
} // end namespace clang
diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index 3b915fb15a89..e5577e74fa63 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -164,9 +164,9 @@ struct DiagnosticStorage {
/// The values for the various substitution positions.
///
/// This is used when the argument is not an std::string. The specific value
- /// is mangled into an intptr_t and the interpretation depends on exactly
+ /// is mangled into an uint64_t and the interpretation depends on exactly
/// what sort of argument kind it is.
- intptr_t DiagArgumentsVal[MaxArguments];
+ uint64_t DiagArgumentsVal[MaxArguments];
/// The values for the various substitution positions that have
/// string arguments.
@@ -807,6 +807,9 @@ public:
bool setSeverityForGroup(diag::Flavor Flavor, StringRef Group,
diag::Severity Map,
SourceLocation Loc = SourceLocation());
+ bool setSeverityForGroup(diag::Flavor Flavor, diag::Group Group,
+ diag::Severity Map,
+ SourceLocation Loc = SourceLocation());
/// Set the warning-as-error flag for the given diagnostic group.
///
@@ -1176,7 +1179,7 @@ public:
DiagStorage = nullptr;
}
- void AddTaggedVal(intptr_t V, DiagnosticsEngine::ArgumentKind Kind) const {
+ void AddTaggedVal(uint64_t V, DiagnosticsEngine::ArgumentKind Kind) const {
if (!DiagStorage)
DiagStorage = getStorage();
@@ -1399,6 +1402,12 @@ inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
return DB;
}
+inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
+ int64_t I) {
+ DB.AddTaggedVal(I, DiagnosticsEngine::ak_sint);
+ return DB;
+}
+
// We use enable_if here to prevent that this overload is selected for
// pointers or other arguments that are implicitly convertible to bool.
template <typename T>
@@ -1416,6 +1425,12 @@ inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
}
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
+ uint64_t I) {
+ DB.AddTaggedVal(I, DiagnosticsEngine::ak_uint);
+ return DB;
+}
+
+inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
tok::TokenKind I) {
DB.AddTaggedVal(static_cast<unsigned>(I), DiagnosticsEngine::ak_tokenkind);
return DB;
@@ -1577,18 +1592,18 @@ public:
/// Return the specified signed integer argument.
/// \pre getArgKind(Idx) == DiagnosticsEngine::ak_sint
- int getArgSInt(unsigned Idx) const {
+ int64_t getArgSInt(unsigned Idx) const {
assert(getArgKind(Idx) == DiagnosticsEngine::ak_sint &&
"invalid argument accessor!");
- return (int)DiagObj->DiagStorage.DiagArgumentsVal[Idx];
+ return (int64_t)DiagObj->DiagStorage.DiagArgumentsVal[Idx];
}
/// Return the specified unsigned integer argument.
/// \pre getArgKind(Idx) == DiagnosticsEngine::ak_uint
- unsigned getArgUInt(unsigned Idx) const {
+ uint64_t getArgUInt(unsigned Idx) const {
assert(getArgKind(Idx) == DiagnosticsEngine::ak_uint &&
"invalid argument accessor!");
- return (unsigned)DiagObj->DiagStorage.DiagArgumentsVal[Idx];
+ return DiagObj->DiagStorage.DiagArgumentsVal[Idx];
}
/// Return the specified IdentifierInfo argument.
@@ -1602,7 +1617,7 @@ public:
/// Return the specified non-string argument in an opaque form.
/// \pre getArgKind(Idx) != DiagnosticsEngine::ak_std_string
- intptr_t getRawArg(unsigned Idx) const {
+ uint64_t getRawArg(unsigned Idx) const {
assert(getArgKind(Idx) != DiagnosticsEngine::ak_std_string &&
"invalid argument accessor!");
return DiagObj->DiagStorage.DiagArgumentsVal[Idx];
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 496d86ee2fe7..d788c8517914 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -567,8 +567,8 @@ def remark_sanitize_address_insert_extra_padding_accepted : Remark<
def remark_sanitize_address_insert_extra_padding_rejected : Remark<
"-fsanitize-address-field-padding ignored for %0 because it "
"%select{is not C++|is packed|is a union|is trivially copyable|"
- "has trivial destructor|is standard layout|is in a blacklisted file|"
- "is blacklisted}1">, ShowInSystemHeader,
+ "has trivial destructor|is standard layout|is in a ignorelisted file|"
+ "is ignorelisted}1">, ShowInSystemHeader,
InGroup<SanitizeAddressRemarks>;
def warn_npot_ms_struct : Warning<
diff --git a/clang/include/clang/Basic/DiagnosticCategories.h b/clang/include/clang/Basic/DiagnosticCategories.h
index 0decf15080a0..2bbdeb31a7b7 100644
--- a/clang/include/clang/Basic/DiagnosticCategories.h
+++ b/clang/include/clang/Basic/DiagnosticCategories.h
@@ -19,6 +19,13 @@ namespace clang {
#undef GET_CATEGORY_TABLE
DiagCat_NUM_CATEGORIES
};
+
+ enum class Group {
+#define DIAG_ENTRY(GroupName, FlagNameOffset, Members, SubGroups) GroupName,
+#include "clang/Basic/DiagnosticGroups.inc"
+#undef CATEGORY
+#undef DIAG_ENTRY
+ };
} // end namespace diag
} // end namespace clang
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 4dff3379ed35..fe4ac5ed6cb0 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -149,8 +149,8 @@ def err_nullability_conflicting : Error<
// OpenCL Section 6.8.g
def err_opencl_unknown_type_specifier : Error<
- "%select{OpenCL C|C++ for OpenCL}0 version %1 does not support the "
- "'%2' %select{type qualifier|storage class specifier}3">;
+ "%0 does not support the '%1' "
+ "%select{type qualifier|storage class specifier}2">;
def warn_unknown_attribute_ignored : Warning<
"unknown attribute %0 ignored">, InGroup<UnknownAttributes>;
@@ -298,6 +298,8 @@ def err_target_unsupported_unaligned : Error<
"the %0 sub-architecture does not support unaligned accesses">;
def err_target_unsupported_execute_only : Error<
"execute only is not supported for the %0 sub-architecture">;
+def err_target_unsupported_tp_hard : Error<
+ "hardware TLS register is not supported for the %0 sub-architecture">;
def err_target_unsupported_mcmse : Error<
"-mcmse is not supported for %0">;
def err_opt_not_valid_with_opt : Error<
@@ -306,6 +308,8 @@ def err_opt_not_valid_without_opt : Error<
"option '%0' cannot be specified without '%1'">;
def err_opt_not_valid_on_target : Error<
"option '%0' cannot be specified on this target">;
+def err_invalid_feature_combination : Error<
+ "invalid feature combination: %0">;
// Source manager
def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 3b4daa59f66b..ff8c36910e13 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -16,6 +16,8 @@ def err_drv_unsupported_opt_with_suggestion : Error<
"unsupported option '%0'; did you mean '%1'?">;
def err_drv_unsupported_opt_for_target : Error<
"unsupported option '%0' for target '%1'">;
+def err_drv_unsupported_opt_for_language_mode : Error<
+ "unsupported option '%0' for language mode '%1'">;
def err_drv_unsupported_option_argument : Error<
"unsupported argument '%1' to option '%0'">;
def err_drv_unknown_stdin_type : Error<
@@ -29,6 +31,9 @@ def err_drv_invalid_riscv_arch_name : Error<
"invalid arch name '%0', %1">;
def err_drv_invalid_riscv_ext_arch_name : Error<
"invalid arch name '%0', %1 '%2'">;
+def warn_drv_invalid_arch_name_with_suggestion : Warning<
+ "ignoring invalid /arch: argument '%0'; for %select{64|32}1-bit expected one of %2">,
+ InGroup<UnusedCommandLineArgument>;
def warn_drv_avr_mcu_not_specified : Warning<
"no target microcontroller specified on command line, cannot "
"link standard libraries, please pass -mmcu=<mcu name>">,
@@ -52,38 +57,57 @@ def warn_drv_avr_stdlib_not_linked: Warning<
"standard library not linked and so no interrupt vector table or "
"compiler runtime routines will be linked">,
InGroup<AVRRtlibLinkingQuirks>;
-def err_drv_cuda_bad_gpu_arch : Error<"Unsupported CUDA gpu architecture: %0">;
+def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">;
def err_drv_no_cuda_installation : Error<
- "cannot find CUDA installation. Provide its path via --cuda-path, or pass "
- "-nocudainc to build without CUDA includes.">;
+ "cannot find CUDA installation; provide its path via '--cuda-path', or pass "
+ "'-nocudainc' to build without CUDA includes">;
def err_drv_no_cuda_libdevice : Error<
- "cannot find libdevice for %0. Provide path to different CUDA installation "
- "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
+ "cannot find libdevice for %0; provide path to different CUDA installation "
+ "via '--cuda-path', or pass '-nocudalib' to build without linking with "
+ "libdevice">;
def err_drv_no_rocm_device_lib : Error<
- "cannot find ROCm device library%select{| for %1}0. Provide its path via --rocm-path or "
- "--rocm-device-lib-path, or pass -nogpulib to build without ROCm device library.">;
+ "cannot find ROCm device library%select{| for %1}0; provide its path via "
+ "'--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build "
+ "without ROCm device library">;
def err_drv_no_hip_runtime : Error<
- "cannot find HIP runtime. Provide its path via --rocm-path, or pass "
- "-nogpuinc to build without HIP runtime.">;
+ "cannot find HIP runtime; provide its path via '--rocm-path', or pass "
+ "'-nogpuinc' to build without HIP runtime">;
def err_drv_undetermined_amdgpu_arch : Error<
- "Cannot determine AMDGPU architecture: %0. Consider passing it via --march.">;
+ "cannot determine AMDGPU architecture: %0; consider passing it via "
+ "'--march'">;
def err_drv_cuda_version_unsupported : Error<
"GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
- "but installation at %3 is %4. Use --cuda-path to specify a different CUDA "
- "install, pass a different GPU arch with --cuda-gpu-arch, or pass "
- "--no-cuda-version-check.">;
-def warn_drv_unknown_cuda_version: Warning<
- "Unknown CUDA version. %0 Assuming the latest supported version %1">,
+ "but installation at %3 is %4; use '--cuda-path' to specify a different CUDA "
+ "install, pass a different GPU arch with '--cuda-gpu-arch', or pass "
+ "'--no-cuda-version-check'">;
+def warn_drv_new_cuda_version: Warning<
+ "CUDA version%0 is newer than the latest%select{| partially}1 supported version %2">,
+ InGroup<CudaUnknownVersion>;
+def warn_drv_partially_supported_cuda_version: Warning<
+ "CUDA version %0 is only partially supported">,
InGroup<CudaUnknownVersion>;
-def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">;
-def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">;
-def err_drv_bad_target_id : Error<"Invalid target ID: %0 (A target ID is a processor name "
- "followed by an optional list of predefined features post-fixed by a plus or minus sign deliminated "
- "by colon, e.g. 'gfx908:sramecc+:xnack-')">;
-def err_drv_bad_offload_arch_combo : Error<"Invalid offload arch combinations: %0 and %1 (For a specific "
- "processor, a feature should either exist in all offload archs, or not exist in any offload archs)">;
+def err_drv_cuda_host_arch : Error<
+ "unsupported architecture '%0' for host compilation">;
+def err_drv_mix_cuda_hip : Error<
+ "mixed CUDA and HIP compilation is not supported">;
+def err_drv_bad_target_id : Error<
+ "invalid target ID '%0'; format is a processor name followed by an optional "
+ "colon-delimited list of features followed by an enable/disable sign (e.g., "
+ "'gfx908:sramecc+:xnack-')">;
+def err_drv_bad_offload_arch_combo : Error<
+ "invalid offload arch combinations: '%0' and '%1' (for a specific processor, "
+ "a feature should either exist in all offload archs, or not exist in any "
+ "offload archs)">;
+def warn_drv_unsupported_option_for_offload_arch_req_feature : Warning<
+ "ignoring '%0' option as it is not currently supported for "
+ "offload arch '%1'. Use it with an offload arch containing '%2' instead">,
+ InGroup<OptionIgnored>;
+def warn_drv_unsupported_option_for_target : Warning<
+ "ignoring '%0' option as it is not currently supported for target '%1'">,
+ InGroup<OptionIgnored>;
+
def err_drv_invalid_thread_model_for_target : Error<
"invalid thread model '%0' in '%1' for this target">;
def err_drv_invalid_linker_name : Error<
@@ -171,8 +195,8 @@ def err_drv_invalid_argument_to_option : Error<
"invalid argument '%0' to -%1">;
def err_drv_malformed_sanitizer_ignorelist : Error<
"malformed sanitizer ignorelist: '%0'">;
-def err_drv_malformed_sanitizer_coverage_whitelist : Error<
- "malformed sanitizer coverage whitelist: '%0'">;
+def err_drv_malformed_sanitizer_coverage_allowlist : Error<
+ "malformed sanitizer coverage allowlist: '%0'">;
def err_drv_malformed_sanitizer_coverage_ignorelist : Error<
"malformed sanitizer coverage ignorelist: '%0'">;
def err_drv_duplicate_config : Error<
@@ -195,7 +219,7 @@ def err_target_unsupported_arch
def err_cpu_unsupported_isa
: Error<"CPU '%0' does not support '%1' execution mode">;
def err_arch_unsupported_isa
- : Error<"Architecture '%0' does not support '%1' execution mode">;
+ : Error<"architecture '%0' does not support '%1' execution mode">;
def err_drv_I_dash_not_supported : Error<
"'%0' not supported, please use -iquote instead">;
@@ -220,6 +244,7 @@ def err_drv_invalid_value : Error<"invalid value '%1' in '%0'">;
def err_drv_invalid_int_value : Error<"invalid integral value '%1' in '%0'">;
def err_drv_invalid_value_with_suggestion : Error<
"invalid value '%1' in '%0', expected one of: %2">;
+def err_drv_alignment_not_power_of_two : Error<"alignment is not a power of 2 in '%0'">;
def err_drv_invalid_remap_file : Error<
"invalid option '%0' not of the form <from-file>;<to-file>">;
def err_drv_invalid_gcc_output_type : Error<
@@ -235,6 +260,7 @@ def warn_invalid_ios_deployment_target : Warning<
DefaultError;
def err_invalid_macos_32bit_deployment_target : Error<
"32-bit targets are not supported when building for Mac Catalyst">;
+def err_drv_invalid_os_in_arg : Error<"invalid OS value '%0' in '%1'">;
def err_drv_conflicting_deployment_targets : Error<
"conflicting deployment targets, both '%0' and '%1' are present in environment">;
def err_arc_unsupported_on_runtime : Error<
@@ -262,20 +288,26 @@ def err_drv_optimization_remark_format : Error<
"unknown remark serializer format: '%0'">;
def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">;
+def err_drv_debug_no_new_runtime : Error<"OpenMP target device debugging enabled with incompatible runtime">;
def err_drv_incompatible_omp_arch : Error<"OpenMP target architecture '%0' pointer size is incompatible with host '%1'">;
def err_drv_omp_host_ir_file_not_found : Error<
- "The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">;
+ "provided host compiler IR file '%0' is required to generate code for OpenMP "
+ "target regions but cannot be found">;
def err_drv_omp_host_target_not_supported : Error<
- "The target '%0' is not a supported OpenMP host target.">;
+ "target '%0' is not a supported OpenMP host target">;
def err_drv_expecting_fopenmp_with_fopenmp_targets : Error<
- "The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.">;
+ "'-fopenmp-targets' must be used in conjunction with a '-fopenmp' option "
+ "compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5'">;
def err_drv_omp_offload_target_missingbcruntime : Error<
- "No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Please use --libomptarget-%1-bc-path to specify %1 bitcode library.">;
-def err_drv_omp_offload_target_bcruntime_not_found : Error<"Bitcode library '%0' does not exist.">;
-def err_drv_omp_offload_target_cuda_version_not_support : Error<"NVPTX target requires CUDA 9.2 or above. CUDA %0 is detected.">;
+ "no library '%0' found in the default clang lib directory or in LIBRARY_PATH"
+ "; use '--libomptarget-%1-bc-path' to specify %1 bitcode library">;
+def err_drv_omp_offload_target_bcruntime_not_found : Error<
+ "bitcode library '%0' does not exist">;
+def err_drv_omp_offload_target_cuda_version_not_support : Error<
+ "NVPTX target requires CUDA 9.2 or above; CUDA %0 detected">;
def warn_drv_omp_offload_target_duplicate : Warning<
- "The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
- InGroup<OpenMPTarget>;
+ "OpenMP offloading target '%0' is similar to target '%1' already specified; "
+ "will be ignored">, InGroup<OpenMPTarget>;
def err_drv_unsupported_embed_bitcode
: Error<"%0 is not supported with -fembed-bitcode">;
def err_drv_bitcode_unsupported_on_toolchain : Error<
@@ -302,7 +334,8 @@ def warn_drv_unsupported_debug_info_opt_for_target : Warning<
"debug information option '%0' is not supported for target '%1'">,
InGroup<UnsupportedTargetOpt>;
def warn_drv_dwarf_version_limited_by_target : Warning<
- "debug information option '%0' is not supported. It needs DWARF-%2 but target '%1' only provides DWARF-%3.">,
+ "debug information option '%0' is not supported; requires DWARF-%2 but "
+ "target '%1' only provides DWARF-%3">,
InGroup<UnsupportedTargetOpt>;
def warn_c_kext : Warning<
"ignoring -fapple-kext which is valid for C++ and Objective-C++ only">;
@@ -369,7 +402,7 @@ def err_sls_hardening_arm_not_supported : Error<
def note_drv_command_failed_diag_msg : Note<
"diagnostic msg: %0">;
def note_drv_t_option_is_global : Note<
- "The last /TC or /TP option takes precedence over earlier instances">;
+ "the last '/TC' or '/TP' option takes precedence over earlier instances">;
def note_drv_address_sanitizer_debug_runtime : Note<
"AddressSanitizer doesn't support linking with debug runtime libraries yet">;
def note_drv_use_standard : Note<"use '%0'"
@@ -409,7 +442,8 @@ def err_test_module_file_extension_format : Error<
def warn_slash_u_filename : Warning<"'/U%0' treated as the '/U' option">,
InGroup<DiagGroup<"slash-u-filename">>;
-def note_use_dashdash : Note<"Use '--' to treat subsequent arguments as filenames">;
+def note_use_dashdash : Note<
+ "use '--' to treat subsequent arguments as filenames">;
def err_drv_ropi_rwpi_incompatible_with_pic : Error<
"embedded and GOT-based position independence are incompatible">;
@@ -478,14 +512,14 @@ def warn_drv_ps4_sdk_dir : Warning<
def err_drv_unsupported_linker : Error<"unsupported value '%0' for -linker option">;
def err_drv_defsym_invalid_format : Error<"defsym must be of the form: sym=value: %0">;
-def err_drv_defsym_invalid_symval : Error<"Value is not an integer: %0">;
+def err_drv_defsym_invalid_symval : Error<"value is not an integer: %0">;
def warn_drv_msvc_not_found : Warning<
"unable to find a Visual Studio installation; "
"try running Clang from a developer command prompt">,
InGroup<DiagGroup<"msvc-not-found">>;
def warn_drv_fuse_ld_path : Warning<
- "'-fuse-ld=' taking a path is deprecated. Use '--ld-path=' instead">,
+ "'-fuse-ld=' taking a path is deprecated; use '--ld-path=' instead">,
InGroup<FUseLdPath>, DefaultIgnore;
def warn_drv_fine_grained_bitfield_accesses_ignored : Warning<
@@ -505,11 +539,11 @@ def warn_drv_global_isel_incomplete_opt : Warning<
InGroup<GlobalISel>;
def warn_drv_moutline_unsupported_opt : Warning<
- "The '%0' architecture does not support -moutline; flag ignored">,
+ "'%0' does not support '-moutline'; flag ignored">,
InGroup<OptionIgnored>;
def warn_drv_moutline_atomics_unsupported_opt : Warning<
- "The '%0' architecture does not support -moutline-atomics; flag ignored">,
+ "'%0' does not support '-moutline-atomics'; flag ignored">,
InGroup<OptionIgnored>;
def warn_drv_darwin_sdk_invalid_settings : Warning<
@@ -517,25 +551,28 @@ def warn_drv_darwin_sdk_invalid_settings : Warning<
InGroup<DiagGroup<"darwin-sdk-settings">>;
def err_drv_trivial_auto_var_init_zero_disabled : Error<
- "-ftrivial-auto-var-init=zero hasn't been enabled. Enable it at your own peril for benchmarking purpose only with "
- "-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">;
+ "'-ftrivial-auto-var-init=zero' hasn't been enabled; enable it at your own "
+ "peril for benchmarking purpose only with "
+ "'-enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang'">;
def err_drv_trivial_auto_var_init_stop_after_missing_dependency : Error<
- "-ftrivial-auto-var-init-stop-after=* is used without -ftrivial-auto-var-init=zero or -ftrivial-auto-var-init=pattern.">;
+ "'-ftrivial-auto-var-init-stop-after=*' is used without "
+ "'-ftrivial-auto-var-init=zero' or '-ftrivial-auto-var-init=pattern'">;
def err_drv_trivial_auto_var_init_stop_after_invalid_value : Error<
- "-ftrivial-auto-var-init-stop-after=* only accepts positive integers.">;
+ "'-ftrivial-auto-var-init-stop-after=*' only accepts positive integers">;
-def warn_drv_msp430_hwmult_unsupported : Warning<"the given MCU does not "
- "support hardware multiply, but -mhwmult is set to %0.">,
- InGroup<InvalidCommandLineArgument>;
-def warn_drv_msp430_hwmult_mismatch : Warning<"the given MCU supports %0 "
- "hardware multiply, but -mhwmult is set to %1.">,
+def warn_drv_msp430_hwmult_unsupported : Warning<
+ "the given MCU does not support hardware multiply, but '-mhwmult' is set to "
+ "%0">, InGroup<InvalidCommandLineArgument>;
+def warn_drv_msp430_hwmult_mismatch : Warning<
+ "the given MCU supports %0 hardware multiply, but '-mhwmult' is set to %1">,
InGroup<InvalidCommandLineArgument>;
-def warn_drv_msp430_hwmult_no_device : Warning<"no MCU device specified, but "
- "'-mhwmult' is set to 'auto', assuming no hardware multiply. Use -mmcu to "
- "specify a MSP430 device, or -mhwmult to set hardware multiply type "
- "explicitly.">, InGroup<InvalidCommandLineArgument>;
+def warn_drv_msp430_hwmult_no_device : Warning<
+ "no MCU device specified, but '-mhwmult' is set to 'auto', assuming no "
+ "hardware multiply; use '-mmcu' to specify an MSP430 device, or '-mhwmult' "
+ "to set the hardware multiply type explicitly">,
+ InGroup<InvalidCommandLineArgument>;
def warn_drv_libstdcxx_not_found : Warning<
"include path for libstdc++ headers not found; pass '-stdlib=libc++' on the "
@@ -544,17 +581,26 @@ def warn_drv_libstdcxx_not_found : Warning<
def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">;
-def err_drv_invalid_object_mode : Error<"OBJECT_MODE setting %0 is not recognized and is not a valid setting.">;
+def err_drv_invalid_object_mode : Error<
+ "OBJECT_MODE setting %0 is not recognized and is not a valid setting">;
def err_aix_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">;
-def err_invalid_cxx_abi : Error<"Invalid C++ ABI name '%0'">;
+def err_invalid_cxx_abi : Error<"invalid C++ ABI name '%0'">;
def err_unsupported_cxx_abi : Error<"C++ ABI '%0' is not supported on target triple '%1'">;
-def note_cc1_round_trip_original : Note<"Original arguments in round-trip: %0">;
-def note_cc1_round_trip_generated : Note<"Generated arguments #%0 in round-trip: %1">;
-def remark_cc1_round_trip_generated : Remark<"Generated arguments #%0 in round-trip: %1">, InGroup<RoundTripCC1Args>;
-def err_cc1_round_trip_fail_then_ok : Error<"Original arguments parse failed, then succeeded in round-trip">;
-def err_cc1_round_trip_ok_then_fail : Error<"Generated arguments parse failed in round-trip">;
-def err_cc1_round_trip_mismatch : Error<"Generated arguments do not match in round-trip">;
+def note_cc1_round_trip_original : Note<"original arguments in round-trip: %0">;
+def note_cc1_round_trip_generated : Note<
+ "generated arguments #%0 in round-trip: %1">;
+def remark_cc1_round_trip_generated : Remark<
+ "generated arguments #%0 in round-trip: %1">, InGroup<RoundTripCC1Args>;
+def err_cc1_round_trip_fail_then_ok : Error<
+ "original arguments parse failed, then succeeded in round-trip">;
+def err_cc1_round_trip_ok_then_fail : Error<
+ "generated arguments parse failed in round-trip">;
+def err_cc1_round_trip_mismatch : Error<
+ "generated arguments do not match in round-trip">;
+
+def err_drv_ssp_missing_offset_argument : Error<
+ "'%0' is used without '-mstack-protector-guard-offset', and there is no default">;
}
diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 0f4ccec38550..eacb7e4de0ea 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -22,10 +22,11 @@ def note_fe_inline_asm_here : Note<"instantiated into assembly here">;
def err_fe_source_mgr : Error<"%0">, CatSourceMgr;
def warn_fe_source_mgr : Warning<"%0">, CatSourceMgr, InGroup<BackendSourceMgr>;
def note_fe_source_mgr : Note<"%0">, CatSourceMgr;
-def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">,
- DefaultFatal;
+def err_fe_linking_module : Error<"cannot link module '%0': %1">, DefaultFatal;
+def warn_fe_linking_module : Warning<"linking module '%0': %1">, InGroup<LinkerWarnings>;
+def note_fe_linking_module : Note<"linking module '%0': %1">;
-def warn_fe_frame_larger_than : Warning<"stack frame size (%0) exceeds limit (%1) in %q2">,
+def warn_fe_frame_larger_than : Warning<"stack frame size (%0) exceeds limit (%1) in '%2'">,
BackendInfo, InGroup<BackendFrameLargerThan>;
def warn_fe_backend_frame_larger_than: Warning<"%0">,
BackendInfo, InGroup<BackendFrameLargerThan>;
@@ -72,6 +73,12 @@ def note_fe_backend_invalid_loc : Note<"could "
def err_fe_backend_unsupported : Error<"%0">, BackendInfo;
def warn_fe_backend_unsupported : Warning<"%0">, BackendInfo;
+def err_fe_backend_error_attr :
+ Error<"call to %0 declared with 'error' attribute: %1">, BackendInfo;
+def warn_fe_backend_warning_attr :
+ Warning<"call to %0 declared with 'warning' attribute: %1">, BackendInfo,
+ InGroup<BackendWarningAttributes>;
+
def err_fe_invalid_code_complete_file : Error<
"cannot locate code-completion file %0">, DefaultFatal;
def err_fe_dependency_file_requires_MT : Error<
@@ -229,6 +236,8 @@ def remark_module_build : Remark<"building module '%0' as '%1'">,
InGroup<ModuleBuild>;
def remark_module_build_done : Remark<"finished building module '%0'">,
InGroup<ModuleBuild>;
+def remark_module_lock : Remark<"locking '%0' to build module '%1'">,
+ InGroup<ModuleLock>;
def err_modules_embed_file_not_found :
Error<"file '%0' specified by '-fmodules-embed-file=' not found">,
DefaultFatal;
@@ -245,7 +254,7 @@ def err_invalid_vfs_overlay : Error<
"invalid virtual filesystem overlay file '%0'">, DefaultFatal;
def warn_option_invalid_ocl_version : Warning<
- "OpenCL version %0 does not support the option '%1'">, InGroup<Deprecated>;
+ "%0 does not support the option '%1'">, InGroup<Deprecated>;
def err_builtin_needs_feature : Error<"%0 needs target feature %1">;
def err_function_needs_feature : Error<
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 4b4928a7a00e..85d373845c81 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -54,7 +54,9 @@ def CompoundTokenSplit : DiagGroup<"compound-token-split",
CompoundTokenSplitBySpace]>;
def CoroutineMissingUnhandledException :
DiagGroup<"coroutine-missing-unhandled-exception">;
-def Coroutine : DiagGroup<"coroutine", [CoroutineMissingUnhandledException]>;
+def DeprecatedExperimentalCoroutine :
+ DiagGroup<"deprecated-experimental-coroutine">;
+def Coroutine : DiagGroup<"coroutine", [CoroutineMissingUnhandledException, DeprecatedExperimentalCoroutine]>;
def ObjCBoolConstantConversion : DiagGroup<"objc-bool-constant-conversion">;
def ConstantConversion : DiagGroup<"constant-conversion",
[BitFieldConstantConversion,
@@ -64,6 +66,8 @@ def StringConversion : DiagGroup<"string-conversion">;
def SignConversion : DiagGroup<"sign-conversion">;
def PointerBoolConversion : DiagGroup<"pointer-bool-conversion">;
def UndefinedBoolConversion : DiagGroup<"undefined-bool-conversion">;
+def BitwiseInsteadOfLogical : DiagGroup<"bitwise-instead-of-logical">;
+def BoolOperation : DiagGroup<"bool-operation", [BitwiseInsteadOfLogical]>;
def BoolConversion : DiagGroup<"bool-conversion", [PointerBoolConversion,
UndefinedBoolConversion]>;
def IntConversion : DiagGroup<"int-conversion">;
@@ -184,6 +188,7 @@ def DeprecatedThisCapture : DiagGroup<"deprecated-this-capture">;
def DeprecatedVolatile : DiagGroup<"deprecated-volatile">;
def DeprecatedWritableStr : DiagGroup<"deprecated-writable-strings",
[CXX11CompatDeprecatedWritableStr]>;
+def DeprecatedPragma : DiagGroup<"deprecated-pragma">;
// FIXME: Why is DeprecatedImplementations not in this group?
def Deprecated : DiagGroup<"deprecated", [DeprecatedAnonEnumEnumConversion,
DeprecatedArrayCompare,
@@ -198,6 +203,7 @@ def Deprecated : DiagGroup<"deprecated", [DeprecatedAnonEnumEnumConversion,
DeprecatedEnumEnumConversion,
DeprecatedEnumFloatConversion,
DeprecatedIncrementBool,
+ DeprecatedPragma,
DeprecatedRegister,
DeprecatedThisCapture,
DeprecatedVolatile,
@@ -393,6 +399,7 @@ def Dangling : DiagGroup<"dangling", [DanglingField,
DanglingGsl,
ReturnStackAddress]>;
def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">;
+def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">;
def ExcessInitializers : DiagGroup<"excess-initializers">;
def ExpansionToDefined : DiagGroup<"expansion-to-defined">;
def FlagEnum : DiagGroup<"flag-enum">;
@@ -400,7 +407,8 @@ def IncrementBool : DiagGroup<"increment-bool", [DeprecatedIncrementBool]>;
def InfiniteRecursion : DiagGroup<"infinite-recursion">;
def PureVirtualCallFromCtorDtor: DiagGroup<"call-to-pure-virtual-from-ctor-dtor">;
def GNUImaginaryConstant : DiagGroup<"gnu-imaginary-constant">;
-def IgnoredQualifiers : DiagGroup<"ignored-qualifiers">;
+def IgnoredReferenceQualifiers : DiagGroup<"ignored-reference-qualifiers">;
+def IgnoredQualifiers : DiagGroup<"ignored-qualifiers", [IgnoredReferenceQualifiers]>;
def : DiagGroup<"import">;
def GNUIncludeNext : DiagGroup<"gnu-include-next">;
def IncompatibleMSStruct : DiagGroup<"incompatible-ms-struct">;
@@ -462,6 +470,7 @@ def MismatchedParameterTypes : DiagGroup<"mismatched-parameter-types">;
def MismatchedReturnTypes : DiagGroup<"mismatched-return-types">;
def MismatchedTags : DiagGroup<"mismatched-tags">;
def MissingFieldInitializers : DiagGroup<"missing-field-initializers">;
+def ModuleLock : DiagGroup<"module-lock">;
def ModuleBuild : DiagGroup<"module-build">;
def ModuleImport : DiagGroup<"module-import">;
def ModuleConflict : DiagGroup<"module-conflict">;
@@ -642,6 +651,8 @@ def AmbiguousMacro : DiagGroup<"ambiguous-macro">;
def KeywordAsMacro : DiagGroup<"keyword-macro">;
def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">;
def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>;
+def RestrictExpansionMacro : DiagGroup<"restrict-expansion">;
+def FinalMacro : DiagGroup<"final-macro">;
// Just silence warnings about -Wstrict-aliasing for now.
def : DiagGroup<"strict-aliasing=0">;
@@ -742,6 +753,7 @@ def UnusedLocalTypedef : DiagGroup<"unused-local-typedef">;
def UnusedPropertyIvar : DiagGroup<"unused-property-ivar">;
def UnusedGetterReturnValue : DiagGroup<"unused-getter-return-value">;
def UsedButMarkedUnused : DiagGroup<"used-but-marked-unused">;
+def UsedSearchPath : DiagGroup<"search-path-usage">;
def UserDefinedLiterals : DiagGroup<"user-defined-literals">;
def UserDefinedWarnings : DiagGroup<"user-defined-warnings">;
def ReorderCtor : DiagGroup<"reorder-ctor">;
@@ -816,8 +828,10 @@ def ReservedIdentifier : DiagGroup<"reserved-identifier",
// under separate flags.
//
def UnreachableCodeLoopIncrement : DiagGroup<"unreachable-code-loop-increment">;
+def UnreachableCodeFallthrough : DiagGroup<"unreachable-code-fallthrough">;
def UnreachableCode : DiagGroup<"unreachable-code",
- [UnreachableCodeLoopIncrement]>;
+ [UnreachableCodeLoopIncrement,
+ UnreachableCodeFallthrough]>;
def UnreachableCodeBreak : DiagGroup<"unreachable-code-break">;
def UnreachableCodeReturn : DiagGroup<"unreachable-code-return">;
def UnreachableCodeAggressive : DiagGroup<"unreachable-code-aggressive",
@@ -940,6 +954,7 @@ def Extra : DiagGroup<"extra", [
]>;
def Most : DiagGroup<"most", [
+ BoolOperation,
CharSubscript,
Comment,
DeleteNonVirtualDtor,
@@ -1185,6 +1200,9 @@ def ASM : DiagGroup<"asm", [
ASMOperandWidths
]>;
+// Linker warnings.
+def LinkerWarnings : DiagGroup<"linker-warnings">;
+
// OpenMP warnings.
def SourceUsesOpenMP : DiagGroup<"source-uses-openmp">;
def OpenMPClauses : DiagGroup<"openmp-clauses">;
@@ -1210,6 +1228,7 @@ def BackendOptimizationRemark : DiagGroup<"pass">;
def BackendOptimizationRemarkMissed : DiagGroup<"pass-missed">;
def BackendOptimizationRemarkAnalysis : DiagGroup<"pass-analysis">;
def BackendOptimizationFailure : DiagGroup<"pass-failed">;
+def BackendWarningAttributes : DiagGroup<"attribute-warning">;
// Instrumentation based profiling warnings.
def ProfileInstrMissing : DiagGroup<"profile-instr-missing">;
@@ -1247,8 +1266,9 @@ def OptionIgnored : DiagGroup<"option-ignored">;
def UnknownArgument : DiagGroup<"unknown-argument">;
// A warning group for warnings about code that clang accepts when
-// compiling OpenCL C/C++ but which is not compatible with the SPIR spec.
+// compiling OpenCL C/C++ but which is not compatible with the SPIR(-V) spec.
def SpirCompat : DiagGroup<"spir-compat">;
+def : DiagGroup<"spirv-compat", [SpirCompat]>; // Alias.
// Warning for the GlobalISel options.
def GlobalISel : DiagGroup<"global-isel">;
@@ -1303,3 +1323,11 @@ def WebAssemblyExceptionSpec : DiagGroup<"wasm-exception-spec">;
def RTTI : DiagGroup<"rtti">;
def OpenCLCoreFeaturesDiagGroup : DiagGroup<"pedantic-core-features">;
+
+// Warnings and extensions to make preprocessor macro usage pedantic.
+def PedanticMacros : DiagGroup<"pedantic-macros",
+ [DeprecatedPragma,
+ MacroRedefined,
+ BuiltinMacroRedefined,
+ RestrictExpansionMacro,
+ FinalMacro]>;
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index 288504def5eb..aef86516707c 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -25,6 +25,8 @@ namespace clang {
// Import the diagnostic enums themselves.
namespace diag {
+ enum class Group;
+
// Size of each of the diagnostic categories.
enum {
DIAG_SIZE_COMMON = 300,
@@ -224,6 +226,10 @@ public:
///
static bool isBuiltinExtensionDiag(unsigned DiagID, bool &EnabledByDefault);
+ /// Given a group ID, returns the flag that toggles the group.
+ /// For example, for Group::DeprecatedDeclarations, returns
+ /// "deprecated-declarations".
+ static StringRef getWarningOptionForGroup(diag::Group);
/// Return the lowest-level warning option that enables the specified
/// diagnostic.
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index ce6d0d0394b4..a4436208799f 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -113,8 +113,10 @@ def warn_four_char_character_literal : Warning<
// Unicode and UCNs
def err_invalid_utf8 : Error<
"source file is not valid UTF-8">;
-def err_non_ascii : Error<
- "non-ASCII characters are not allowed outside of literals and identifiers">;
+def err_character_not_allowed : Error<
+ "unexpected character <U+%0>">;
+def err_character_not_allowed_identifier : Error<
+ "character <U+%0> not allowed %select{in|at the start of}1 an identifier">;
def ext_unicode_whitespace : ExtWarn<
"treating Unicode character as whitespace">,
InGroup<DiagGroup<"unicode-whitespace">>;
@@ -125,6 +127,15 @@ def warn_utf8_symbol_zero_width : Warning<
"identifier contains Unicode character <U+%0> that is invisible in "
"some environments">, InGroup<DiagGroup<"unicode-zero-width">>;
+def ext_delimited_escape_sequence : Extension<
+ "delimited escape sequences are a Clang extension">,
+ InGroup<DiagGroup<"delimited-escape-sequence-extension">>;
+def err_delimited_escape_empty : Error<
+ "delimited escape sequence cannot be empty">;
+def err_delimited_escape_missing_brace: Error<
+ "expected '{' after '\\%0' escape sequence">;
+def err_delimited_escape_invalid : Error<
+ "invalid digit '%0' in escape sequence">;
def err_hex_escape_no_digits : Error<
"\\%0 used with no following hex digits">;
def warn_ucn_escape_no_digits : Warning<
@@ -132,6 +143,12 @@ def warn_ucn_escape_no_digits : Warning<
"treating as '\\' followed by identifier">, InGroup<Unicode>;
def err_ucn_escape_incomplete : Error<
"incomplete universal character name">;
+def warn_delimited_ucn_incomplete : Warning<
+ "incomplete delimited universal character name; "
+ "treating as '\\' 'u' '{' identifier">, InGroup<Unicode>;
+def warn_delimited_ucn_empty : Warning<
+ "empty delimited universal character name; "
+ "treating as '\\' 'u' '{' '}'">, InGroup<Unicode>;
def warn_ucn_escape_incomplete : Warning<
"incomplete universal character name; "
"treating as '\\' followed by identifier">, InGroup<Unicode>;
@@ -150,9 +167,6 @@ def warn_c99_compat_unicode_id : Warning<
"%select{using this character in an identifier|starting an identifier with "
"this character}0 is incompatible with C99">,
InGroup<C99Compat>, DefaultIgnore;
-def warn_cxx98_compat_unicode_id : Warning<
- "using this character in an identifier is incompatible with C++98">,
- InGroup<CXX98Compat>, DefaultIgnore;
def warn_cxx98_compat_literal_ucn_escape_basic_scs : Warning<
"specifying character '%0' with a universal character name "
@@ -184,12 +198,10 @@ def warn_c2x_compat_digit_separator : Warning<
InGroup<CPre2xCompat>, DefaultIgnore;
def err_digit_separator_not_between_digits : Error<
"digit separator cannot appear at %select{start|end}0 of digit sequence">;
-def warn_extraneous_char_constant : Warning<
- "extraneous characters in character constant ignored">;
def warn_char_constant_too_large : Warning<
"character constant too long for its type">;
-def err_multichar_utf_character_literal : Error<
- "Unicode character literals may not contain multiple characters">;
+def err_multichar_character_literal : Error<
+ "%select{wide|Unicode}0 character literals may not contain multiple characters">;
def err_exponent_has_no_digits : Error<"exponent has no digits">;
def err_hex_constant_requires : Error<
"hexadecimal floating %select{constant|literal}0 requires "
@@ -257,7 +269,9 @@ def err_bad_character_encoding : Error<
def warn_bad_character_encoding : ExtWarn<
"illegal character encoding in character literal">,
InGroup<InvalidSourceEncoding>;
-def err_lexing_string : Error<"failure when lexing a string">;
+def err_lexing_string : Error<"failure when lexing a string literal">;
+def err_lexing_char : Error<"failure when lexing a character literal">;
+def err_lexing_numeric : Error<"failure when lexing a numeric literal">;
def err_placeholder_in_source : Error<"editor placeholder in source file">;
//===----------------------------------------------------------------------===//
@@ -300,6 +314,13 @@ def pp_pragma_once_in_main_file : Warning<"#pragma once in main file">,
def pp_pragma_sysheader_in_main_file : Warning<
"#pragma system_header ignored in main file">,
InGroup<DiagGroup<"pragma-system-header-outside-header">>;
+
+def err_pragma_include_instead_not_sysheader : Error<
+ "'#pragma clang include_instead' cannot be used outside of system headers">;
+def err_pragma_include_instead_system_reserved : Error<
+ "header '%0' is an implementation detail; #include %select{'%2'|either '%2' "
+ "or '%3'|one of %2}1 instead">;
+
def pp_poisoning_existing_macro : Warning<"poisoning existing macro">;
def pp_out_of_date_dependency : Warning<
"current file is older than dependency %0">;
@@ -393,6 +414,10 @@ def ext_embedded_directive : Extension<
def ext_missing_varargs_arg : Extension<
"must specify at least one argument for '...' parameter of variadic macro">,
InGroup<GNUZeroVariadicMacroArguments>;
+def warn_cxx17_compat_missing_varargs_arg : Warning<
+ "passing no argument for the '...' parameter of a variadic macro is "
+ "incompatible with C++ standards before C++20">,
+ InGroup<CXXPre20Compat>, DefaultIgnore;
def ext_empty_fnmacro_arg : Extension<
"empty macro arguments are a C99 feature">, InGroup<C99>;
def warn_cxx98_compat_empty_fnmacro_arg : Warning<
@@ -423,6 +448,9 @@ def warn_pp_hdrstop_filename_ignored : Warning<
"#pragma hdrstop filename not supported, "
"/Fp can be used to specify precompiled header filename">,
InGroup<ClangClPch>;
+def remark_pp_search_path_usage : Remark<
+ "search path used: '%0'">,
+ InGroup<UsedSearchPath>;
def err_pp_file_not_found_angled_include_not_fatal : Error<
"'%0' file not found with <angled> %select{include|import}1; "
"use \"quotes\" instead">;
@@ -519,6 +547,27 @@ def warn_pragma_warning_expected_number :
ExtWarn<"#pragma warning expected a warning number">,
InGroup<UnknownPragmas>;
+// - #pragma deprecated(...)
+def warn_pragma_deprecated_macro_use :
+ ExtWarn<"macro %0 has been marked as deprecated%select{|: %2}1">,
+ InGroup<DeprecatedPragma>;
+
+// - #pragma clang restrict_expansion(...)
+def warn_pragma_restrict_expansion_macro_use :
+ ExtWarn<"macro %0 has been marked as unsafe for use in headers"
+ "%select{|: %2}1">,
+ InGroup<RestrictExpansionMacro>;
+
+// - Note for macro annotations.
+def note_pp_macro_annotation :
+ Note<"macro marked '%select{deprecated|restrict_expansion|final}0' here">;
+
+// - #pragma clang final(...)
+def warn_pragma_final_macro :
+ ExtWarn<"macro %0 has been marked as final and should not be "
+ "%select{undefined|redefined}1">,
+ InGroup<FinalMacro>, ShowInSystemHeader;
+
// - #pragma execution_character_set(...)
def warn_pragma_exec_charset_expected :
ExtWarn<"#pragma execution_character_set expected '%0'">,
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 7e4b0841e06b..1bc2e8b0c7ef 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -549,6 +549,12 @@ def err_expected_init_in_condition_lparen : Error<
"variable declaration in condition cannot have a parenthesized initializer">;
def err_extraneous_rparen_in_condition : Error<
"extraneous ')' after condition, expected a statement">;
+def ext_alias_in_init_statement : ExtWarn<
+ "alias declaration in this context is a C++2b extension">,
+ InGroup<CXX2b>;
+def warn_cxx20_alias_in_init_statement : Warning<
+ "alias declaration in this context is incompatible with C++ standards before C++2b">,
+ DefaultIgnore, InGroup<CXXPre2bCompat>;
def warn_dangling_else : Warning<
"add explicit braces to avoid dangling else">,
InGroup<DanglingElse>;
@@ -626,6 +632,13 @@ def ext_constexpr_if : ExtWarn<
def warn_cxx14_compat_constexpr_if : Warning<
"constexpr if is incompatible with C++ standards before C++17">,
DefaultIgnore, InGroup<CXXPre17Compat>;
+def ext_consteval_if : ExtWarn<
+ "consteval if is a C++2b extension">,
+ InGroup<CXX2b>;
+def warn_cxx20_compat_consteval_if : Warning<
+ "consteval if is incompatible with C++ standards before C++2b">,
+ InGroup<CXXPre2bCompat>, DefaultIgnore;
+
def ext_init_statement : ExtWarn<
"'%select{if|switch}0' initialization statements are a C++17 extension">,
InGroup<CXX17>;
@@ -742,8 +755,9 @@ def err_unknown_template_name : Error<
"unknown template name %0">;
def err_expected_comma_greater : Error<
"expected ',' or '>' in template-parameter-list">;
-def err_class_on_template_template_param : Error<
- "template template parameter requires 'class' after the parameter list">;
+def err_class_on_template_template_param
+ : Error<"template template parameter requires 'class'%select{| or "
+ "'typename'}0 after the parameter list">;
def ext_template_template_param_typename : ExtWarn<
"template template parameter using 'typename' is a C++17 extension">,
InGroup<CXX17>;
@@ -806,10 +820,10 @@ def err_requires_expr_expected_type_constraint : Error<
def err_requires_expr_simple_requirement_noexcept : Error<
"'noexcept' can only be used in a compound requirement (with '{' '}' around "
"the expression)">;
-def warn_requires_expr_in_simple_requirement : Warning<
- "this requires expression will only be checked for syntactic validity; did "
+def err_requires_expr_in_simple_requirement : Error<
+ "requires expression in requirement body; did "
"you intend to place it in a nested requirement? (add another 'requires' "
- "before the expression)">, InGroup<DiagGroup<"requires-expression">>;
+ "before the expression)">;
def err_missing_dependent_template_keyword : Error<
"use 'template' keyword to treat '%0' as a dependent template name">;
@@ -944,6 +958,9 @@ def err_duplicate_class_virt_specifier : Error<
def err_duplicate_virt_specifier : Error<
"class member already marked '%0'">;
+def err_virt_specifier_outside_class : Error<
+ "'%0' specifier is not allowed outside a class definition">;
+
def err_expected_parameter_pack : Error<
"expected the name of a parameter pack">;
def err_paren_sizeof_parameter_pack : Error<
@@ -1102,6 +1119,9 @@ def warn_pragma_expected_integer : Warning<
def warn_pragma_ms_struct : Warning<
"incorrect use of '#pragma ms_struct on|off' - ignored">,
InGroup<IgnoredPragmas>;
+def warn_pragma_ms_fenv_access : Warning<
+ "incorrect use of '#pragma fenv_access (on|off)' - ignored">,
+ InGroup<IgnoredPragmas>;
def warn_pragma_extra_tokens_at_eol : Warning<
"extra tokens at end of '#pragma %0' - ignored">,
InGroup<IgnoredPragmas>;
@@ -1167,9 +1187,6 @@ def ext_stdc_pragma_ignored : ExtWarn<"unknown pragma in STDC namespace">,
// The C standard 7.6.1p2 says "The [FENV_ACCESS] pragma shall occur either
// outside external declarations or preceding all explicit declarations and
// statements inside a compound statement.
-def err_pragma_stdc_fenv_access_scope : Error<
- "'#pragma STDC FENV_ACCESS' can only appear at file scope or at the start of"
- " a compound statement">;
def warn_stdc_fenv_round_not_supported :
Warning<"pragma STDC FENV_ROUND is not supported">,
InGroup<UnknownPragmas>;
@@ -1303,8 +1320,8 @@ def err_omp_decl_in_declare_simd_variant : Error<
def err_omp_unknown_map_type : Error<
"incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'">;
def err_omp_unknown_map_type_modifier : Error<
- "incorrect map type modifier, expected 'always', 'close', "
- "%select{or 'mapper'|'mapper', or 'present'}0">;
+ "incorrect map type modifier, expected one of: 'always', 'close', 'mapper'"
+ "%select{|, 'present'}0%select{|, 'ompx_hold'}1">;
def err_omp_map_type_missing : Error<
"missing map type">;
def err_omp_map_type_modifier_missing : Error<
@@ -1342,8 +1359,11 @@ def err_omp_mapper_illegal_identifier : Error<
"illegal OpenMP user-defined mapper identifier">;
def err_omp_mapper_expected_declarator : Error<
"expected declarator on 'omp declare mapper' directive">;
+def err_omp_unexpected_append_op : Error<
+ "unexpected operation specified in 'append_args' clause, expected 'interop'">;
def err_omp_declare_variant_wrong_clause : Error<
- "expected '%0' clause on 'omp declare variant' directive">;
+ "expected %select{'match'|'match', 'adjust_args', or 'append_args'}0 clause "
+ "on 'omp declare variant' directive">;
def err_omp_declare_variant_duplicate_nested_trait : Error<
"nested OpenMP context selector contains duplicated trait '%0'"
" in selector '%1' and set '%2' with different score">;
@@ -1436,6 +1456,9 @@ def warn_omp51_compat_attributes : Warning<
"specifying OpenMP directives with [[]] is incompatible with OpenMP "
"standards before OpenMP 5.1">,
InGroup<OpenMPPre51Compat>, DefaultIgnore;
+def err_omp_expected_colon : Error<"missing ':' in %0">;
+def err_omp_expected_context_selector
+ : Error<"expected valid context selector in %0">;
// Pragma loop support.
def err_pragma_loop_missing_argument : Error<
@@ -1472,7 +1495,7 @@ def warn_pragma_unroll_cuda_value_in_parens : Warning<
InGroup<CudaCompat>;
def warn_cuda_attr_lambda_position : Warning<
- "nvcc does not allow '__%0__' to appear after '()' in lambdas">,
+ "nvcc does not allow '__%0__' to appear after the parameter list in lambdas">,
InGroup<CudaCompat>;
def warn_pragma_force_cuda_host_device_bad_arg : Warning<
"incorrect use of #pragma clang force_cuda_host_device begin|end">,
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 108f1796415c..dc67f86f25ca 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -66,6 +66,7 @@ def warn_infinite_recursive_function : Warning<
def warn_comma_operator : Warning<"possible misuse of comma operator here">,
InGroup<DiagGroup<"comma">>, DefaultIgnore;
def note_cast_to_void : Note<"cast expression to void to silence warning">;
+def note_cast_operand_to_int : Note<"cast one or both operands to int to silence this warning">;
// Constant expressions
def err_expr_not_ice : Error<
@@ -82,11 +83,11 @@ def err_typecheck_converted_constant_expression_indirect : Error<
"bind reference to a temporary">;
def err_expr_not_cce : Error<
"%select{case value|enumerator value|non-type template argument|"
- "array size|explicit specifier argument}0 "
+ "array size|explicit specifier argument|noexcept specifier argument}0 "
"is not a constant expression">;
def ext_cce_narrowing : ExtWarn<
"%select{case value|enumerator value|non-type template argument|"
- "array size|explicit specifier argument}0 "
+ "array size|explicit specifier argument|noexcept specifier argument}0 "
"%select{cannot be narrowed from type %2 to %3|"
"evaluates to %2, which cannot be narrowed to type %3}1">,
InGroup<CXX11Narrowing>, DefaultError, SFINAEFailure;
@@ -267,10 +268,12 @@ def err_invalid_vector_double_decl_spec : Error <
def err_invalid_vector_bool_int128_decl_spec : Error <
"use of '__int128' with '__vector bool' requires VSX support enabled (on "
"POWER10 or later)">;
+def err_invalid_vector_int128_decl_spec : Error<
+ "use of '__int128' with '__vector' requires extended Altivec support"
+ " (available on POWER8 or later)">;
def err_invalid_vector_long_long_decl_spec : Error <
- "use of 'long long' with '__vector bool' requires VSX support (available on "
- "POWER7 or later) or extended Altivec support (available on POWER8 or later) "
- "to be enabled">;
+ "use of 'long long' with '__vector' requires VSX support (available on "
+ "POWER7 or later) to be enabled">;
def err_invalid_vector_long_double_decl_spec : Error<
"cannot use 'long double' with '__vector'">;
def warn_vector_long_decl_spec_combination : Warning<
@@ -388,6 +391,7 @@ def warn_reserved_extern_symbol: Warning<
"identifier %0 is reserved because %select{"
"<ERROR>|" // ReservedIdentifierStatus::NotReserved
"it starts with '_' at global scope|"
+ "it starts with '_' and has C language linkage|"
"it starts with '__'|"
"it starts with '_' followed by a capital letter|"
"it contains '__'}1">,
@@ -682,6 +686,9 @@ def warn_unreachable_return : Warning<
def warn_unreachable_loop_increment : Warning<
"loop will run at most once (loop increment never executed)">,
InGroup<UnreachableCodeLoopIncrement>, DefaultIgnore;
+def warn_unreachable_fallthrough_attr : Warning<
+ "fallthrough annotation in unreachable code">,
+ InGroup<UnreachableCodeFallthrough>, DefaultIgnore;
def note_unreachable_silence : Note<
"silence by adding parentheses to mark code as explicitly dead">;
@@ -816,11 +823,20 @@ def warn_fortify_source_size_mismatch : Warning<
"'%0' size argument is too large; destination buffer has size %1,"
" but size argument is %2">, InGroup<FortifySource>;
+def warn_fortify_strlen_overflow: Warning<
+ "'%0' will always overflow; destination buffer has size %1,"
+ " but the source string has length %2 (including NUL byte)">,
+ InGroup<FortifySource>;
+
def warn_fortify_source_format_overflow : Warning<
"'%0' will always overflow; destination buffer has size %1,"
" but format string expands to at least %2">,
InGroup<FortifySource>;
+def warn_fortify_scanf_overflow : Warning<
+ "'%0' may overflow; destination buffer in argument %1 has size "
+ "%2, but the corresponding specifier may require size %3">,
+ InGroup<FortifySource>;
/// main()
// static main() is not an error in C, just in C++.
@@ -908,6 +924,9 @@ def warn_pragma_options_align_reset_failed : Warning<
InGroup<IgnoredPragmas>;
def err_pragma_options_align_mac68k_target_unsupported : Error<
"mac68k alignment pragma is not supported on this target">;
+def warn_pragma_align_not_xl_compatible : Warning<
+ "#pragma align(packed) may not be compatible with objects generated with AIX XL C/C++">,
+ InGroup<AIXCompat>;
def warn_pragma_pack_invalid_alignment : Warning<
"expected #pragma pack parameter to be '1', '2', '4', '8', or '16'">,
InGroup<IgnoredPragmas>;
@@ -933,7 +952,8 @@ def warn_pragma_pack_pop_identifier_and_alignment : Warning<
def warn_pragma_pop_failed : Warning<"#pragma %0(pop, ...) failed: %1">,
InGroup<IgnoredPragmas>;
def err_pragma_fc_pp_scope : Error<
- "'#pragma float_control push/pop' can only appear at file scope or namespace scope">;
+ "'#pragma float_control push/pop' can only appear at file or namespace scope "
+ "or within a language linkage specification">;
def err_pragma_fc_noprecise_requires_nofenv : Error<
"'#pragma float_control(precise, off)' is illegal when fenv_access is enabled">;
def err_pragma_fc_except_requires_precise : Error<
@@ -1493,6 +1513,10 @@ def err_static_assert_failed : Error<"static_assert failed%select{ %1|}0">;
def err_static_assert_requirement_failed : Error<
"static_assert failed due to requirement '%0'%select{ %2|}1">;
+def warn_consteval_if_always_true : Warning<
+ "consteval if is always true in an %select{unevaluated|immediate}0 context">,
+ InGroup<DiagGroup<"redundant-consteval-if">>;
+
def ext_inline_variable : ExtWarn<
"inline variables are a C++17 extension">, InGroup<CXX17>;
def warn_cxx14_compat_inline_variable : Warning<
@@ -1624,8 +1648,7 @@ def warn_weak_vtable : Warning<
"emitted in every translation unit">,
InGroup<DiagGroup<"weak-vtables">>, DefaultIgnore;
def warn_weak_template_vtable : Warning<
- "explicit template instantiation %0 will emit a vtable in every "
- "translation unit">,
+ "this warning is no longer in use and will be removed in the next release">,
InGroup<DiagGroup<"weak-template-vtables">>, DefaultIgnore;
def ext_using_undefined_std : ExtWarn<
@@ -2953,7 +2976,7 @@ def err_attribute_requires_positive_integer : Error<
"%0 attribute requires a %select{positive|non-negative}1 "
"integral compile time constant expression">;
def err_attribute_requires_opencl_version : Error<
- "%0 attribute requires OpenCL version %1%select{| or above}2">;
+ "attribute %0 is supported in the OpenCL version %1%select{| onwards}2">;
def err_invalid_branch_protection_spec : Error<
"invalid or misplaced branch protection specification '%0'">;
def warn_unsupported_target_attribute
@@ -2980,6 +3003,8 @@ def err_alignas_mismatch : Error<
"redeclaration has different alignment requirement (%1 vs %0)">;
def err_alignas_underaligned : Error<
"requested alignment is less than minimum alignment of %1 for type %0">;
+def warn_aligned_attr_underaligned : Warning<err_alignas_underaligned.Text>,
+ InGroup<IgnoredAttributes>;
def err_attribute_sizeless_type : Error<
"%0 attribute cannot be applied to sizeless type %1">;
def err_attribute_argument_n_type : Error<
@@ -3256,7 +3281,8 @@ def warn_assume_aligned_too_great
InGroup<DiagGroup<"builtin-assume-aligned-alignment">>;
def warn_not_xl_compatible
: Warning<"requesting an alignment of 16 bytes or greater for struct"
- " members is not binary compatible with AIX XL 16.1 and older">,
+ " members is not binary compatible with IBM XL C/C++ for AIX"
+ " 16.1.0 and older">,
InGroup<AIXCompat>;
def warn_redeclaration_without_attribute_prev_attribute_ignored : Warning<
"%q0 redeclared without %1 attribute: previous %1 ignored">,
@@ -3298,11 +3324,11 @@ def warn_attribute_has_no_effect_on_infinite_loop : Warning<
InGroup<IgnoredAttributes>;
def note_attribute_has_no_effect_on_infinite_loop_here : Note<
"annotating the infinite loop here">;
-def warn_attribute_has_no_effect_on_if_constexpr : Warning<
- "attribute %0 has no effect when annotating an 'if constexpr' statement">,
+def warn_attribute_has_no_effect_on_compile_time_if : Warning<
+ "attribute %0 has no effect when annotating an 'if %select{constexpr|consteval}1' statement">,
InGroup<IgnoredAttributes>;
-def note_attribute_has_no_effect_on_if_constexpr_here : Note<
- "annotating the 'if constexpr' statement here">;
+def note_attribute_has_no_effect_on_compile_time_if_here : Note<
+ "annotating the 'if %select{constexpr|consteval}0' statement here">;
def err_decl_attribute_invalid_on_stmt : Error<
"%0 attribute cannot be applied to a statement">;
def err_stmt_attribute_invalid_on_decl : Error<
@@ -3382,7 +3408,7 @@ def warn_attribute_dllimport_static_field_definition : Warning<
InGroup<DiagGroup<"dllimport-static-field-def">>;
def warn_attribute_dllexport_explicit_instantiation_decl : Warning<
"explicit instantiation declaration should not be 'dllexport'">,
- InGroup<DiagGroup<"dllexport-explicit-instantiation-decl">>;
+ InGroup<DllexportExplicitInstantiationDecl>;
def warn_attribute_dllexport_explicit_instantiation_def : Warning<
"'dllexport' attribute ignored on explicit instantiation definition">,
InGroup<IgnoredAttributes>;
@@ -3532,6 +3558,9 @@ def warn_availability_swift_unavailable_deprecated_only : Warning<
InGroup<Availability>;
def note_protocol_method : Note<
"protocol method is here">;
+def warn_availability_fuchsia_unavailable_minor : Warning<
+ "Fuchsia API Level prohibits specifying a minor or sub-minor version">,
+ InGroup<Availability>;
def warn_unguarded_availability :
Warning<"%0 is only available on %1 %2 or newer">,
@@ -4472,7 +4501,8 @@ def note_ovl_candidate_bad_conv_incomplete : Note<
"; remove &}7">;
def note_ovl_candidate_bad_list_argument : Note<
"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
- "cannot convert initializer list argument to %4">;
+ "%select{cannot convert initializer list|too few initializers in list"
+ "|too many initializers in list}7 argument to %4">;
def note_ovl_candidate_bad_overload : Note<
"candidate %sub{select_ovl_candidate_kind}0,1,2 not viable: "
"no overload of %4 matching %3 for %ordinal5 argument">;
@@ -5563,8 +5593,8 @@ def warn_undefined_inline : Warning<"inline function %q0 is not defined">,
def err_undefined_inline_var : Error<"inline variable %q0 is not defined">;
def note_used_here : Note<"used here">;
-def err_internal_linkage_redeclaration : Error<
- "'internal_linkage' attribute does not appear on the first declaration of %0">;
+def err_attribute_missing_on_first_decl : Error<
+ "%0 attribute does not appear on the first declaration">;
def warn_internal_linkage_local_storage : Warning<
"'internal_linkage' attribute on a non-static local variable is ignored">,
InGroup<IgnoredAttributes>;
@@ -5727,7 +5757,7 @@ def warn_typecheck_function_qualifiers_unspecified : Warning<
"'%0' qualifier on function type %1 has unspecified behavior">;
def warn_typecheck_reference_qualifiers : Warning<
"'%0' qualifier on reference type %1 has no effect">,
- InGroup<IgnoredQualifiers>;
+ InGroup<IgnoredReferenceQualifiers>;
def err_typecheck_invalid_restrict_not_pointer : Error<
"restrict requires a pointer or reference (%0 is invalid)">;
def err_typecheck_invalid_restrict_not_pointer_noarg : Error<
@@ -5926,6 +5956,8 @@ def note_protected_by_vla_type_alias : Note<
"jump bypasses initialization of VLA type alias">;
def note_protected_by_constexpr_if : Note<
"jump enters controlled statement of constexpr if">;
+def note_protected_by_consteval_if : Note<
+ "jump enters controlled statement of consteval if">;
def note_protected_by_if_available : Note<
"jump enters controlled statement of if available">;
def note_protected_by_vla : Note<
@@ -6397,11 +6429,6 @@ def warn_gnu_null_ptr_arith : Warning<
def warn_pointer_sub_null_ptr : Warning<
"performing pointer subtraction with a null pointer %select{has|may have}0 undefined behavior">,
InGroup<NullPointerSubtraction>, DefaultIgnore;
-def err_kernel_invalidates_sycl_unique_stable_name
- : Error<"kernel instantiation changes the result of an evaluated "
- "'__builtin_sycl_unique_stable_name'">;
-def note_sycl_unique_stable_name_evaluated_here
- : Note<"'__builtin_sycl_unique_stable_name' evaluated here">;
def warn_floatingpoint_eq : Warning<
"comparing floating point with == or != is unsafe">,
@@ -6770,7 +6797,7 @@ def warn_taking_address_of_packed_member : Warning<
"taking address of packed member %0 of class or structure %q1 may result in an unaligned pointer value">,
InGroup<DiagGroup<"address-of-packed-member">>;
def warn_param_mismatched_alignment : Warning<
- "passing %0-byte aligned argument to %1-byte aligned parameter %2 of %3 may result in an unaligned pointer access">,
+ "passing %0-byte aligned argument to %1-byte aligned parameter %2%select{| of %4}3 may result in an unaligned pointer access">,
InGroup<DiagGroup<"align-mismatch">>;
def err_objc_object_assignment : Error<
@@ -7413,10 +7440,13 @@ def note_member_declared_here : Note<
"member %0 declared here">;
def note_member_first_declared_here : Note<
"member %0 first declared here">;
+def warn_bitwise_instead_of_logical : Warning<
+ "use of bitwise '%0' with boolean operands">,
+ InGroup<BitwiseInsteadOfLogical>, DefaultIgnore;
def warn_bitwise_negation_bool : Warning<
"bitwise negation of a boolean expression%select{;| always evaluates to 'true';}0 "
"did you mean logical negation?">,
- InGroup<DiagGroup<"bool-operation">>;
+ InGroup<BoolOperation>, DefaultIgnore;
def err_decrement_bool : Error<"cannot decrement expression of type bool">;
def warn_increment_bool : Warning<
"incrementing expression of type bool is deprecated and "
@@ -7490,7 +7520,8 @@ def note_throw_in_function : Note<"function declared non-throwing here">;
def err_seh_try_outside_functions : Error<
"cannot use SEH '__try' in blocks, captured regions, or Obj-C method decls">;
def err_mixing_cxx_try_seh_try : Error<
- "cannot use C++ 'try' in the same function as SEH '__try'">;
+ "cannot use %select{C++ 'try'|Objective-C '@try'}0 "
+ "in the same function as SEH '__try'">;
def err_seh_try_unsupported : Error<
"SEH '__try' is not supported on this target">;
def note_conflicting_try_here : Note<
@@ -8380,8 +8411,10 @@ def err_ref_bad_target_global_initializer : Error<
"function %1 in global initializer">;
def err_capture_bad_target : Error<
"capture host variable %0 by reference in device or host device lambda function">;
-def err_capture_bad_target_this_ptr : Error<
- "capture host side class data member by this pointer in device or host device lambda function">;
+def warn_maybe_capture_bad_target_this_ptr : Warning<
+ "capture host side class data member by this pointer in device or host device lambda function "
+ "may result in invalid memory access if this pointer is not accessible on device side">,
+ InGroup<DiagGroup<"gpu-maybe-wrong-side">>;
def warn_kern_is_method : Extension<
"kernel function %0 is a member function; this may not be accepted by nvcc">,
InGroup<CudaCompat>;
@@ -8551,6 +8584,9 @@ def err_typecheck_choose_expr_requires_constant : Error<
"'__builtin_choose_expr' requires a constant expression">;
def warn_unused_expr : Warning<"expression result unused">,
InGroup<UnusedValue>;
+def warn_unused_comma_left_operand : Warning<
+ "left operand of comma operator has no effect">,
+ InGroup<UnusedValue>;
def warn_unused_voidptr : Warning<
"expression result unused; should this cast be to 'void'?">,
InGroup<UnusedValue>;
@@ -9142,14 +9178,18 @@ def note_defaulted_comparison_calls_deleted : Note<
"defaulted %0 is implicitly deleted because it would invoke a deleted "
"comparison function%select{| for member %2| for base class %2}1">;
def note_defaulted_comparison_no_viable_function : Note<
- "defaulted %0 is implicitly deleted because there is no viable three-way "
- "comparison function for%select{| member| base class}1 %2">;
+ "defaulted %0 is implicitly deleted because there is no viable "
+ "%select{three-way comparison function|'operator=='}1 for "
+ "%select{|member |base class }2%3">;
def note_defaulted_comparison_no_viable_function_synthesized : Note<
"three-way comparison cannot be synthesized because there is no viable "
"function for %select{'=='|'<'}0 comparison">;
def note_defaulted_comparison_not_rewritten_callee : Note<
"defaulted %0 is implicitly deleted because this non-rewritten comparison "
"function would be the best match for the comparison">;
+def note_defaulted_comparison_not_rewritten_conversion : Note<
+ "defaulted %0 is implicitly deleted because a builtin comparison function "
+ "using this conversion would be the best match for the comparison">;
def note_defaulted_comparison_cannot_deduce : Note<
"return type of defaulted 'operator<=>' cannot be deduced because "
"return type %2 of three-way comparison for %select{|member|base class}0 %1 "
@@ -9570,9 +9610,6 @@ def err_fallthrough_attr_outside_switch : Error<
"fallthrough annotation is outside switch statement">;
def err_fallthrough_attr_invalid_placement : Error<
"fallthrough annotation does not directly precede switch label">;
-def warn_fallthrough_attr_unreachable : Warning<
- "fallthrough annotation in unreachable code">,
- InGroup<ImplicitFallthrough>, DefaultIgnore;
def warn_unreachable_default : Warning<
"default label in switch which covers all enumeration values">,
@@ -9668,10 +9705,6 @@ def warn_falloff_noreturn_function : Warning<
InGroup<InvalidNoreturn>;
def err_noreturn_block_has_return_expr : Error<
"block declared 'noreturn' should not return">;
-def err_noreturn_missing_on_first_decl : Error<
- "function declared '[[noreturn]]' after its first declaration">;
-def note_noreturn_missing_first_decl : Note<
- "declaration missing '[[noreturn]]' attribute is here">;
def err_carries_dependency_missing_on_first_decl : Error<
"%select{function|parameter}0 declared '[[carries_dependency]]' "
"after its first declaration">;
@@ -9729,6 +9762,9 @@ def err_argument_invalid_range : Error<
def warn_argument_invalid_range : Warning<
"argument value %0 is outside the valid range [%1, %2]">, DefaultError,
InGroup<DiagGroup<"argument-outside-range">>;
+def warn_argument_undefined_behaviour : Warning<
+ "argument value %0 will result in undefined behaviour">,
+ InGroup<DiagGroup<"argument-undefined-behaviour">>;
def err_argument_not_multiple : Error<
"argument should be a multiple of %0">;
def err_argument_not_power_of_2 : Error<
@@ -9769,8 +9805,14 @@ def err_mips_builtin_requires_msa : Error<
"this builtin requires 'msa' ASE, please use -mmsa">;
def err_ppc_builtin_only_on_arch : Error<
"this builtin is only valid on POWER%0 or later CPUs">;
+def err_ppc_builtin_requires_vsx : Error<
+ "this builtin requires VSX to be enabled">;
+def err_ppc_builtin_requires_abi : Error<
+ "this builtin requires ABI -mabi=%0">;
def err_ppc_invalid_use_mma_type : Error<
"invalid use of PPC MMA type">;
+def err_ppc_invalid_test_data_class_type : Error<
+ "expected a 'float' or 'double' for the first argument">;
def err_x86_builtin_invalid_rounding : Error<
"invalid rounding argument">;
def err_x86_builtin_invalid_scale : Error<
@@ -10091,8 +10133,7 @@ def err_opencl_type_can_only_be_used_as_function_parameter : Error <
def err_opencl_type_not_found : Error<
"%0 type %1 not found; include the base header with -finclude-default-header">;
def warn_opencl_attr_deprecated_ignored : Warning <
- "%0 attribute is deprecated and ignored in OpenCL version %1">,
- InGroup<IgnoredAttributes>;
+ "%0 attribute is deprecated and ignored in %1">, InGroup<IgnoredAttributes>;
def err_opencl_variadic_function : Error<
"invalid prototype, variadic arguments are not allowed in OpenCL">;
def err_opencl_requires_extension : Error<
@@ -10100,8 +10141,6 @@ def err_opencl_requires_extension : Error<
def ext_opencl_double_without_pragma : Extension<
"Clang permits use of type 'double' regardless pragma if 'cl_khr_fp64' is"
" supported">;
-def err_opencl_double_requires_extension : Error<
- "use of type 'double' requires %select{cl_khr_fp64|cl_khr_fp64 and __opencl_c_fp64}0 support">;
def warn_opencl_generic_address_space_arg : Warning<
"passing non-generic address space pointer to %0"
" may cause dynamic conversion affecting performance">,
@@ -10159,7 +10198,7 @@ def err_opencl_builtin_expected_type : Error<
// OpenCL v3.0 s6.3.7 - Vector Components
def ext_opencl_ext_vector_type_rgba_selector: ExtWarn<
- "vector component name '%0' is an OpenCL C version 3.0 feature">,
+ "vector component name '%0' is a feature from OpenCL version 3.0 onwards">,
InGroup<OpenCLUnsupportedRGBA>;
def err_openclcxx_placement_new : Error<
@@ -10530,6 +10569,8 @@ def err_omp_map_shared_storage : Error<
"variable already marked as mapped in current construct">;
def err_omp_invalid_map_type_for_directive : Error<
"%select{map type '%1' is not allowed|map type must be specified}0 for '#pragma omp %2'">;
+def err_omp_invalid_map_type_modifier_for_directive : Error<
+ "map type modifier '%0' is not allowed for '#pragma omp %1'">;
def err_omp_no_clause_for_directive : Error<
"expected at least one %0 clause for '#pragma omp %1'">;
def err_omp_threadprivate_in_clause : Error<
@@ -10558,6 +10599,8 @@ def err_omp_depend_sink_unexpected_expr : Error<
"unexpected expression: number of expressions is larger than the number of associated loops">;
def err_omp_depend_sink_expected_plus_minus : Error<
"expected '+' or '-' operation">;
+def err_omp_taskwait_depend_mutexinoutset_not_allowed : Error<
+ "'mutexinoutset' modifier not allowed in 'depend' clause on 'taskwait' directive">;
def err_omp_depend_sink_source_not_allowed : Error<
"'depend(%select{source|sink:vec}0)' clause%select{|s}0 cannot be mixed with 'depend(%select{sink:vec|source}0)' clause%select{s|}0">;
def err_omp_depend_zero_length_array_section_not_allowed : Error<
@@ -10679,9 +10722,9 @@ def err_omp_invariant_or_linear_dependency : Error<
"expected loop invariant expression or '<invariant1> * %0 + <invariant2>' kind of expression">;
def err_omp_wrong_dependency_iterator_type : Error<
"expected an integer or a pointer type of the outer loop counter '%0' for non-rectangular nests">;
-def err_device_unsupported_type
- : Error<"%0 requires %select{|%2 bit size}1 %3 type support, but device "
- "'%4' does not support it">;
+def err_target_unsupported_type
+ : Error<"%0 requires %select{|%2 bit size}1 %3 %select{|return }4type support,"
+ " but target '%5' does not support it">;
def err_omp_lambda_capture_in_declare_target_not_to : Error<
"variable captured in declare target region must appear in a to clause">;
def err_omp_device_type_mismatch : Error<
@@ -10716,9 +10759,14 @@ def err_omp_declare_variant_diff : Error<
"function with '#pragma omp declare variant' has a different %select{calling convention"
"|return type|constexpr specification|inline specification|storage class|"
"linkage}0">;
+def err_omp_declare_variant_prototype_required : Error<
+ "function with '#pragma omp declare variant' must have a prototype when "
+ "'append_args' is used">;
+def err_omp_interop_type_not_found : Error<
+ "'omp_interop_t' must be defined when 'append_args' clause is used; include <omp.h>">;
def err_omp_declare_variant_incompat_types : Error<
- "variant in '#pragma omp declare variant' with type %0 is incompatible with type %1"
- >;
+ "variant in '#pragma omp declare variant' with type %0 is incompatible with"
+ " type %1%select{| with appended arguments}2">;
def warn_omp_declare_variant_marked_as_declare_variant : Warning<
"variant function in '#pragma omp declare variant' is itself marked as '#pragma omp declare variant'"
>, InGroup<SourceUsesOpenMP>;
@@ -10764,6 +10812,9 @@ def note_omp_protected_structured_block
: Note<"jump bypasses OpenMP structured block">;
def note_omp_exits_structured_block
: Note<"jump exits scope of OpenMP structured block">;
+def err_omp_lastprivate_loop_var_non_loop_iteration : Error<
+ "only loop iteration variables are allowed in 'lastprivate' clause in "
+ "'omp loop' directives">;
def err_omp_interop_variable_expected : Error<
"expected%select{| non-const}0 variable of type 'omp_interop_t'">;
def err_omp_interop_variable_wrong_type : Error<
@@ -10781,6 +10832,14 @@ def err_omp_dispatch_statement_call
def err_omp_unroll_full_variable_trip_count : Error<
"loop to be fully unrolled must have a constant trip count">;
def note_omp_directive_here : Note<"'%0' directive found here">;
+def err_omp_instantiation_not_supported
+ : Error<"instantiation of '%0' not supported yet">;
+def err_omp_adjust_arg_multiple_clauses : Error<
+ "'adjust_arg' argument %0 used in multiple clauses">;
+def err_omp_clause_requires_dispatch_construct : Error<
+ "'%0' clause requires 'dispatch' context selector">;
+def err_omp_append_args_with_varargs : Error<
+ "'append_args' is not allowed with varargs functions">;
} // end of OpenMP category
let CategoryName = "Related Result Type Issue" in {
@@ -10951,19 +11010,29 @@ def err_coroutine_invalid_func_context : Error<
"|a function with a deduced return type|a varargs function"
"|a consteval function}0">;
def err_implied_coroutine_type_not_found : Error<
- "%0 type was not found; include <experimental/coroutine> before defining "
- "a coroutine">;
+ "%0 type was not found; include <coroutine> before defining "
+ "a coroutine; include <experimental/coroutine> if your version "
+ "of libcxx is less than 14.0">;
+def warn_deprecated_coroutine_namespace : Warning<
+ "Please move from std::experimental::%0 to std::%0. "
+ "Support for std::experimental::%0 will be removed in LLVM 15.">,
+ InGroup<DeprecatedExperimentalCoroutine>;
+def err_mixed_use_std_and_experimental_namespace_for_coroutine : Error <
+ "Found mixed use of std namespace and std::experimental namespace for "
+ "coroutine, which is disallowed. The coroutine components in "
+ "std::experimental namespace is deprecated. Please use coroutine components "
+ "under std namespace.">;
def err_implicit_coroutine_std_nothrow_type_not_found : Error<
"std::nothrow was not found; include <new> before defining a coroutine which "
"uses get_return_object_on_allocation_failure()">;
def err_malformed_std_nothrow : Error<
"std::nothrow must be a valid variable declaration">;
def err_malformed_std_coroutine_handle : Error<
- "std::experimental::coroutine_handle must be a class template">;
+ "std::coroutine_handle isn't a class template">;
def err_coroutine_handle_missing_member : Error<
- "std::experimental::coroutine_handle missing a member named '%0'">;
+ "std::coroutine_handle must have a member named '%0'">;
def err_malformed_std_coroutine_traits : Error<
- "'std::experimental::coroutine_traits' must be a class template">;
+ "std::coroutine_traits isn't a class template">;
def err_implied_std_coroutine_traits_promise_type_not_found : Error<
"this function cannot be a coroutine: %q0 has no member named 'promise_type'">;
def err_implied_std_coroutine_traits_promise_type_not_class : Error<
@@ -11193,8 +11262,8 @@ def err_multiversion_mismatched_attrs
"%0 %select{is missing|has different arguments}1">;
def err_multiversion_diff : Error<
"multiversioned function declaration has a different %select{calling convention"
- "|return type|constexpr specification|inline specification|storage class|"
- "linkage}0">;
+ "|return type|constexpr specification|inline specification|linkage|"
+ "language linkage}0">;
def err_multiversion_doesnt_support : Error<
"attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioned functions do not "
"yet support %select{function templates|virtual functions|"
@@ -11259,6 +11328,12 @@ def err_builtin_launder_invalid_arg : Error<
"%select{non-pointer|function pointer|void pointer}0 argument to "
"'__builtin_launder' is not allowed">;
+def err_builtin_invalid_arg_type: Error <
+ "%ordinal0 argument must be a "
+ "%select{vector, integer or floating point type|matrix|"
+ "pointer to a valid matrix element type|"
+ "signed integer or floating point type|vector type}1 (was %2)">;
+
def err_builtin_matrix_disabled: Error<
"matrix types extension is disabled. Pass -fenable-matrix to enable it">;
def err_matrix_index_not_integer: Error<
@@ -11271,11 +11346,8 @@ def err_matrix_separate_incomplete_index: Error<
"matrix row and column subscripts cannot be separated by any expression">;
def err_matrix_subscript_comma: Error<
"comma expressions are not allowed as indices in matrix subscript expressions">;
-def err_builtin_matrix_arg: Error<"1st argument must be a matrix">;
def err_builtin_matrix_scalar_unsigned_arg: Error<
"%0 argument must be a constant unsigned integer expression">;
-def err_builtin_matrix_pointer_arg: Error<
- "%ordinal0 argument must be a pointer to a valid matrix element type">;
def err_builtin_matrix_pointer_arg_mismatch: Error<
"the pointee of the 2nd argument must match the element type of the 1st argument (%0 != %1)">;
def err_builtin_matrix_store_to_const: Error<
diff --git a/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/clang/include/clang/Basic/DiagnosticSerializationKinds.td
index bf3221be004d..f15a935d2af1 100644
--- a/clang/include/clang/Basic/DiagnosticSerializationKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSerializationKinds.td
@@ -20,7 +20,7 @@ def err_fe_pch_malformed_block : Error<
def err_fe_ast_file_modified : Error<
"file '%0' has been modified since the "
"%select{precompiled header|module file|AST file}1 '%2' was built"
- ": %select{size|mtime|content}3 changed">,
+ ": %select{size|mtime|content}3 changed%select{| (was %5, now %6)}4">,
DefaultFatal;
def err_fe_pch_file_overridden : Error<
"file '%0' from the precompiled header has been overridden">;
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index f2379c7ddfbd..19c967efcc42 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -43,11 +43,28 @@ class SourceLocation;
enum class ReservedIdentifierStatus {
NotReserved = 0,
StartsWithUnderscoreAtGlobalScope,
+ StartsWithUnderscoreAndIsExternC,
StartsWithDoubleUnderscore,
StartsWithUnderscoreFollowedByCapitalLetter,
ContainsDoubleUnderscore,
};
+/// Determine whether an identifier is reserved for use as a name at global
+/// scope. Such identifiers might be implementation-specific global functions
+/// or variables.
+inline bool isReservedAtGlobalScope(ReservedIdentifierStatus Status) {
+ return Status != ReservedIdentifierStatus::NotReserved;
+}
+
+/// Determine whether an identifier is reserved in all contexts. Such
+/// identifiers might be implementation-specific keywords or macros, for
+/// example.
+inline bool isReservedInAllContexts(ReservedIdentifierStatus Status) {
+ return Status != ReservedIdentifierStatus::NotReserved &&
+ Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope &&
+ Status != ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
+}
+
/// A simple pair of identifier info and location.
using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
@@ -121,7 +138,16 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
// True if this is a mangled OpenMP variant name.
unsigned IsMangledOpenMPVariantName : 1;
- // 28 bits left in a 64-bit word.
+ // True if this is a deprecated macro.
+ unsigned IsDeprecatedMacro : 1;
+
+ // True if this macro is unsafe in headers.
+ unsigned IsRestrictExpansion : 1;
+
+ // True if this macro is final.
+ unsigned IsFinal : 1;
+
+ // 22 bits left in a 64-bit word.
// Managed by the language front-end.
void *FETokenInfo = nullptr;
@@ -134,7 +160,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
IsPoisoned(false), IsCPPOperatorKeyword(false),
NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
- IsModulesImport(false), IsMangledOpenMPVariantName(false) {}
+ IsModulesImport(false), IsMangledOpenMPVariantName(false),
+ IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
public:
IdentifierInfo(const IdentifierInfo &) = delete;
@@ -182,6 +209,14 @@ public:
NeedsHandleIdentifier = true;
HadMacro = true;
} else {
+ // If this is a final macro, make the deprecation and header unsafe bits
+ // stick around after the undefinition so they apply to any redefinitions.
+ if (!IsFinal) {
+ // Because calling the setters of these calls recomputes, just set them
+ // manually to avoid recomputing a bunch of times.
+ IsDeprecatedMacro = false;
+ IsRestrictExpansion = false;
+ }
RecomputeNeedsHandleIdentifier();
}
}
@@ -192,6 +227,34 @@ public:
return HadMacro;
}
+ bool isDeprecatedMacro() const { return IsDeprecatedMacro; }
+
+ void setIsDeprecatedMacro(bool Val) {
+ if (IsDeprecatedMacro == Val)
+ return;
+ IsDeprecatedMacro = Val;
+ if (Val)
+ NeedsHandleIdentifier = true;
+ else
+ RecomputeNeedsHandleIdentifier();
+ }
+
+ bool isRestrictExpansion() const { return IsRestrictExpansion; }
+
+ void setIsRestrictExpansion(bool Val) {
+ if (IsRestrictExpansion == Val)
+ return;
+ IsRestrictExpansion = Val;
+ if (Val)
+ NeedsHandleIdentifier = true;
+ else
+ RecomputeNeedsHandleIdentifier();
+ }
+
+ bool isFinal() const { return IsFinal; }
+
+ void setIsFinal(bool Val) { IsFinal = Val; }
+
/// If this is a source-language token (e.g. 'for'), this API
/// can be used to cause the lexer to map identifiers to source-language
/// tokens.
diff --git a/clang/include/clang/Basic/JsonSupport.h b/clang/include/clang/Basic/JsonSupport.h
index 8b02e440df44..2ccb08e4bdaa 100644
--- a/clang/include/clang/Basic/JsonSupport.h
+++ b/clang/include/clang/Basic/JsonSupport.h
@@ -12,6 +12,7 @@
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <iterator>
@@ -70,7 +71,7 @@ inline std::string JsonFormat(StringRef RawSR, bool AddQuotes) {
}
// Remove new-lines.
- Str.erase(std::remove(Str.begin(), Str.end(), '\n'), Str.end());
+ llvm::erase_value(Str, '\n');
if (!AddQuotes)
return Str;
@@ -98,18 +99,19 @@ inline void printSourceLocationAsJson(raw_ostream &Out, SourceLocation Loc,
if (AddBraces)
Out << "{ ";
std::string filename(PLoc.getFilename());
-#ifdef _WIN32
- // Remove forbidden Windows path characters
- auto RemoveIt =
- std::remove_if(filename.begin(), filename.end(), [](auto Char) {
- static const char ForbiddenChars[] = "<>*?\"|";
- return std::find(std::begin(ForbiddenChars), std::end(ForbiddenChars),
- Char) != std::end(ForbiddenChars);
- });
- filename.erase(RemoveIt, filename.end());
- // Handle windows-specific path delimiters.
- std::replace(filename.begin(), filename.end(), '\\', '/');
-#endif
+ if (is_style_windows(llvm::sys::path::Style::native)) {
+ // Remove forbidden Windows path characters
+ auto RemoveIt =
+ std::remove_if(filename.begin(), filename.end(), [](auto Char) {
+ static const char ForbiddenChars[] = "<>*?\"|";
+ return std::find(std::begin(ForbiddenChars),
+ std::end(ForbiddenChars),
+ Char) != std::end(ForbiddenChars);
+ });
+ filename.erase(RemoveIt, filename.end());
+ // Handle windows-specific path delimiters.
+ std::replace(filename.begin(), filename.end(), '\\', '/');
+ }
Out << "\"line\": " << PLoc.getLine()
<< ", \"column\": " << PLoc.getColumn()
<< ", \"file\": \"" << filename << "\"";
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 08b8d8851afa..4651f4fff6aa 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -224,13 +224,14 @@ LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version")
LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL")
LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version")
LANGOPT(OpenCLGenericAddressSpace, 1, 0, "OpenCL generic keyword")
-LANGOPT(OpenCLPipe , 1, 0, "OpenCL pipe keyword")
+LANGOPT(OpenCLPipes , 1, 0, "OpenCL pipes language constructs and built-ins")
LANGOPT(NativeHalfType , 1, 0, "Native half type support")
LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns")
LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
LANGOPT(CUDA , 1, 0, "CUDA")
LANGOPT(HIP , 1, 0, "HIP")
LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
+LANGOPT(OpenMPExtensions , 1, 1, "Enable all Clang extensions for OpenMP directives and clauses")
LANGOPT(OpenMPSimd , 1, 0, "Use SIMD only OpenMP support.")
LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
@@ -241,7 +242,10 @@ LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.")
LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.")
LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.")
LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
+LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL")
LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
+LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
+LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
LANGOPT(RenderScript , 1, 0, "RenderScript")
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
@@ -280,6 +284,7 @@ BENIGN_LANGOPT(VisibilityInlinesHiddenStaticLocalVar, 1, 0,
"hidden visibility for static local variables in inline C++ "
"methods when -fvisibility-inlines hidden is enabled")
LANGOPT(GlobalAllocationFunctionVisibilityHidden , 1, 0, "hidden visibility for global operator new and delete declaration")
+LANGOPT(NewInfallible , 1, 0, "Treats throwing global C++ operator new as always returning valid memory (annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.")
BENIGN_LANGOPT(ParseUnknownAnytype, 1, 0, "__unknown_anytype")
BENIGN_LANGOPT(DebuggerSupport , 1, 0, "debugger support")
BENIGN_LANGOPT(DebuggerCastResultToId, 1, 0, "for 'po' in the debugger, cast the result to id if it is of unknown type")
@@ -397,6 +402,7 @@ ENUM_LANGOPT(ClangABICompat, ClangABI, 4, ClangABI::Latest,
"with")
COMPATIBLE_VALUE_LANGOPT(FunctionAlignment, 5, 0, "Default alignment for functions")
+COMPATIBLE_VALUE_LANGOPT(LoopAlignment, 32, 0, "Default alignment for loops")
LANGOPT(FixedPoint, 1, 0, "fixed point types")
LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0,
@@ -419,12 +425,15 @@ LANGOPT(SpeculativeLoadHardening, 1, 0, "Speculative load hardening enabled")
LANGOPT(RelativeCXXABIVTables, 1, 0,
"Use an ABI-incompatible v-table layout that uses relative references")
-LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits")
+LANGOPT(VScaleMin, 32, 0, "Minimum vscale value")
+LANGOPT(VScaleMax, 32, 0, "Maximum vscale value")
-ENUM_LANGOPT(ExtendIntArgs, ExtendArgsKind, 1, ExtendArgsKind::ExtendTo32,
+ENUM_LANGOPT(ExtendIntArgs, ExtendArgsKind, 1, ExtendArgsKind::ExtendTo32,
"Controls how scalar integer arguments are extended in calls "
"to unprototyped and varargs functions")
+VALUE_LANGOPT(FuchsiaAPILevel, 32, 0, "Fuchsia API level")
+
#undef LANGOPT
#undef COMPATIBLE_LANGOPT
#undef BENIGN_LANGOPT
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 71cf0c65e692..35b33c2e0971 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -354,6 +354,9 @@ public:
/// A list of all -fno-builtin-* function names (e.g., memset).
std::vector<std::string> NoBuiltinFuncs;
+ /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE().
+ std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap;
+
/// Triples of the OpenMP targets that the host code codegen should
/// take into account in order to generate accurate offloading descriptors.
std::vector<llvm::Triple> OMPTargetTriples;
@@ -428,6 +431,13 @@ public:
/// Return the OpenCL C or C++ version as a VersionTuple.
VersionTuple getOpenCLVersionTuple() const;
+ /// Return the OpenCL version that kernel language is compatible with
+ unsigned getOpenCLCompatibleVersion() const;
+
+ /// Return the OpenCL C or C++ for OpenCL language name and version
+ /// as a string.
+ std::string getOpenCLVersionString() const;
+
/// Check if return address signing is enabled.
bool hasSignReturnAddress() const {
return getSignReturnAddressScope() != SignReturnAddressScopeKind::None;
@@ -460,6 +470,9 @@ public:
}
bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; }
+
+ /// Remap path prefix according to -fmacro-prefix-path option.
+ void remapPathPrefix(SmallString<256> &Path) const;
};
/// Floating point control options
diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def
index 2cfeb68e56d6..6056cfd65bbb 100644
--- a/clang/include/clang/Basic/LangStandards.def
+++ b/clang/include/clang/Basic/LangStandards.def
@@ -180,8 +180,15 @@ LANGSTANDARD(opencl20, "cl2.0",
LANGSTANDARD(opencl30, "cl3.0",
OpenCL, "OpenCL 3.0",
LineComment | C99 | Digraphs | HexFloat | OpenCL)
-LANGSTANDARD(openclcpp, "clc++",
- OpenCL, "C++ for OpenCL",
+
+LANGSTANDARD(openclcpp10, "clc++1.0",
+ OpenCL, "C++ for OpenCL 1.0",
+ LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
+ Digraphs | HexFloat | OpenCL)
+LANGSTANDARD_ALIAS(openclcpp10, "clc++")
+
+LANGSTANDARD(openclcpp2021, "clc++2021",
+ OpenCL, "C++ for OpenCL 2021",
LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
Digraphs | HexFloat | OpenCL)
@@ -190,7 +197,9 @@ LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1")
LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2")
LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0")
LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0")
-LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++")
+LANGSTANDARD_ALIAS_DEPR(openclcpp10, "CLC++")
+LANGSTANDARD_ALIAS_DEPR(openclcpp10, "CLC++1.0")
+LANGSTANDARD_ALIAS_DEPR(openclcpp2021, "CLC++2021")
// CUDA
LANGSTANDARD(cuda, "cuda", CUDA, "NVIDIA CUDA(tm)",
diff --git a/clang/include/clang/Basic/MSP430Target.def b/clang/include/clang/Basic/MSP430Target.def
index a1e192c19261..7a10be1d54c8 100644
--- a/clang/include/clang/Basic/MSP430Target.def
+++ b/clang/include/clang/Basic/MSP430Target.def
@@ -238,8 +238,7 @@ MSP430_MCU_FEAT("msp430f4793", "32bit")
MSP430_MCU_FEAT("msp430f4784", "32bit")
MSP430_MCU_FEAT("msp430f4794", "32bit")
-// Generic MSUs
-MSP430_MCU("msp430")
+// Generic MCUs
MSP430_MCU("msp430i2xxgeneric")
#undef MSP430_MCU
diff --git a/clang/include/clang/Basic/ObjCRuntime.h b/clang/include/clang/Basic/ObjCRuntime.h
index 26403bfa98c9..30a5fde40754 100644
--- a/clang/include/clang/Basic/ObjCRuntime.h
+++ b/clang/include/clang/Basic/ObjCRuntime.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/VersionTuple.h"
#include <string>
@@ -480,6 +481,12 @@ public:
friend llvm::hash_code hash_value(const ObjCRuntime &OCR) {
return llvm::hash_combine(OCR.getKind(), OCR.getVersion());
}
+
+ template <typename HasherT, llvm::support::endianness Endianness>
+ friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ const ObjCRuntime &OCR) {
+ HBuilder.add(OCR.getKind(), OCR.getVersion());
+ }
};
raw_ostream &operator<<(raw_ostream &out, const ObjCRuntime &value);
diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h
index 1a035626fade..d6cb1a210519 100644
--- a/clang/include/clang/Basic/OpenCLOptions.h
+++ b/clang/include/clang/Basic/OpenCLOptions.h
@@ -58,7 +58,7 @@ static inline OpenCLVersionID encodeOpenCLVersion(unsigned OpenCLVersion) {
// mask.
static inline bool isOpenCLVersionContainedInMask(const LangOptions &LO,
unsigned Mask) {
- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
+ auto CLVer = LO.getOpenCLCompatibleVersion();
OpenCLVersionID Code = encodeOpenCLVersion(CLVer);
return Mask & Code;
}
@@ -79,8 +79,8 @@ public:
// the __opencl_c_program_scope_global_variables feature is supported
// C++ for OpenCL inherits rule from OpenCL C v2.0.
bool areProgramScopeVariablesSupported(const LangOptions &Opts) const {
- return Opts.OpenCLCPlusPlus || Opts.OpenCLVersion == 200 ||
- (Opts.OpenCLVersion == 300 &&
+ return Opts.getOpenCLCompatibleVersion() == 200 ||
+ (Opts.getOpenCLCompatibleVersion() == 300 &&
isSupported("__opencl_c_program_scope_global_variables", Opts));
}
@@ -115,8 +115,7 @@ public:
// Is option available in OpenCL version \p LO.
bool isAvailableIn(const LangOptions &LO) const {
// In C++ mode all extensions should work at least as in v2.0.
- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
- return CLVer >= Avail;
+ return LO.getOpenCLCompatibleVersion() >= Avail;
}
// Is core option in OpenCL version \p LO.
diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def
index 9f9c32da4aa0..80ebda917945 100644
--- a/clang/include/clang/Basic/OpenMPKinds.def
+++ b/clang/include/clang/Basic/OpenMPKinds.def
@@ -59,6 +59,12 @@
#ifndef OPENMP_REDUCTION_MODIFIER
#define OPENMP_REDUCTION_MODIFIER(Name)
#endif
+#ifndef OPENMP_ADJUST_ARGS_KIND
+#define OPENMP_ADJUST_ARGS_KIND(Name)
+#endif
+#ifndef OPENMP_BIND_KIND
+#define OPENMP_BIND_KIND(Name)
+#endif
// Static attributes for 'schedule' clause.
OPENMP_SCHEDULE_KIND(static)
@@ -123,6 +129,8 @@ OPENMP_MAP_MODIFIER_KIND(always)
OPENMP_MAP_MODIFIER_KIND(close)
OPENMP_MAP_MODIFIER_KIND(mapper)
OPENMP_MAP_MODIFIER_KIND(present)
+// This is an OpenMP extension for the sake of OpenACC support.
+OPENMP_MAP_MODIFIER_KIND(ompx_hold)
// Modifiers for 'to' or 'from' clause.
OPENMP_MOTION_MODIFIER_KIND(mapper)
@@ -147,6 +155,17 @@ OPENMP_REDUCTION_MODIFIER(default)
OPENMP_REDUCTION_MODIFIER(inscan)
OPENMP_REDUCTION_MODIFIER(task)
+// Adjust-op kinds for the 'adjust_args' clause.
+OPENMP_ADJUST_ARGS_KIND(nothing)
+OPENMP_ADJUST_ARGS_KIND(need_device_ptr)
+
+// Binding kinds for the 'bind' clause.
+OPENMP_BIND_KIND(teams)
+OPENMP_BIND_KIND(parallel)
+OPENMP_BIND_KIND(thread)
+
+#undef OPENMP_BIND_KIND
+#undef OPENMP_ADJUST_ARGS_KIND
#undef OPENMP_REDUCTION_MODIFIER
#undef OPENMP_DEVICE_MODIFIER
#undef OPENMP_ORDER_KIND
diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h
index c7a2591de26c..e95a717f268d 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CLANG_BASIC_OPENMPKINDS_H
#define LLVM_CLANG_BASIC_OPENMPKINDS_H
+#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -166,8 +167,22 @@ enum OpenMPReductionClauseModifier {
OMPC_REDUCTION_unknown,
};
+/// OpenMP adjust-op kinds for 'adjust_args' clause.
+enum OpenMPAdjustArgsOpKind {
+#define OPENMP_ADJUST_ARGS_KIND(Name) OMPC_ADJUST_ARGS_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+ OMPC_ADJUST_ARGS_unknown,
+};
+
+/// OpenMP bindings for the 'bind' clause.
+enum OpenMPBindClauseKind {
+#define OPENMP_BIND_KIND(Name) OMPC_BIND_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+ OMPC_BIND_unknown
+};
+
unsigned getOpenMPSimpleClauseType(OpenMPClauseKind Kind, llvm::StringRef Str,
- unsigned OpenMPVersion);
+ const LangOptions &LangOpts);
const char *getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, unsigned Type);
/// Checks if the specified directive is a directive with an associated
@@ -245,6 +260,13 @@ bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind);
/// otherwise - false.
bool isOpenMPNestingDistributeDirective(OpenMPDirectiveKind DKind);
+/// Checks if the specified directive constitutes a 'loop' directive in the
+/// outermost nest. For example, 'omp teams loop' or 'omp loop'.
+/// \param DKind Specified directive.
+/// \return true - the directive has loop on the outermost nest.
+/// otherwise - false.
+bool isOpenMPGenericLoopDirective(OpenMPDirectiveKind DKind);
+
/// Checks if the specified clause is one of private clauses like
/// 'private', 'firstprivate', 'reduction' etc..
/// \param Kind Clause kind.
diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h
index b12a3b7821d7..db53010645ae 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -16,6 +16,7 @@
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/HashBuilder.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
#include <cassert>
#include <cstdint>
@@ -72,6 +73,12 @@ public:
llvm::hash_code hash_value() const;
+ template <typename HasherT, llvm::support::endianness Endianness>
+ friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ const SanitizerMask &SM) {
+ HBuilder.addRange(&SM.maskLoToHigh[0], &SM.maskLoToHigh[kNumElem]);
+ }
+
constexpr explicit operator bool() const {
return maskLoToHigh[0] || maskLoToHigh[1];
}
diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h
index 540de23b9f55..543245a811db 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -23,8 +23,6 @@
namespace llvm {
-template <typename T> struct DenseMapInfo;
-
class FoldingSetNodeID;
template <typename T> struct FoldingSetTrait;
@@ -363,6 +361,10 @@ class FileEntry;
/// A SourceLocation and its associated SourceManager.
///
/// This is useful for argument passing to functions that expect both objects.
+///
+/// This class does not guarantee the presence of either the SourceManager or
+/// a valid SourceLocation. Clients should use `isValid()` and `hasManager()`
+/// before calling the member functions.
class FullSourceLoc : public SourceLocation {
const SourceManager *SrcMgr = nullptr;
@@ -373,13 +375,10 @@ public:
explicit FullSourceLoc(SourceLocation Loc, const SourceManager &SM)
: SourceLocation(Loc), SrcMgr(&SM) {}
- bool hasManager() const {
- bool hasSrcMgr = SrcMgr != nullptr;
- assert(hasSrcMgr == isValid() && "FullSourceLoc has location but no manager");
- return hasSrcMgr;
- }
+ /// Checks whether the SourceManager is present.
+ bool hasManager() const { return SrcMgr != nullptr; }
- /// \pre This FullSourceLoc has an associated SourceManager.
+ /// \pre hasManager()
const SourceManager &getManager() const {
assert(SrcMgr && "SourceManager is NULL.");
return *SrcMgr;
@@ -466,7 +465,7 @@ namespace llvm {
/// Define DenseMapInfo so that FileID's can be used as keys in DenseMap and
/// DenseSets.
template <>
- struct DenseMapInfo<clang::FileID> {
+ struct DenseMapInfo<clang::FileID, void> {
static clang::FileID getEmptyKey() {
return {};
}
@@ -487,7 +486,7 @@ namespace llvm {
/// Define DenseMapInfo so that SourceLocation's can be used as keys in
/// DenseMap and DenseSet. This trait class is eqivalent to
/// DenseMapInfo<unsigned> which uses SourceLocation::ID is used as a key.
- template <> struct DenseMapInfo<clang::SourceLocation> {
+ template <> struct DenseMapInfo<clang::SourceLocation, void> {
static clang::SourceLocation getEmptyKey() {
constexpr clang::SourceLocation::UIntTy Zero = 0;
return clang::SourceLocation::getFromRawEncoding(~Zero);
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 1c38b411e083..66cdba3f912e 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -31,6 +31,15 @@ namespace clang {
/// Define the kind of constexpr specifier.
enum class ConstexprSpecKind { Unspecified, Constexpr, Consteval, Constinit };
+ /// In an if statement, this denotes whether the the statement is
+ /// a constexpr or consteval if statement.
+ enum class IfStatementKind : unsigned {
+ Ordinary,
+ Constexpr,
+ ConstevalNonNegated,
+ ConstevalNegated
+ };
+
/// Specifies the width of a type, e.g., short, long, or long long.
enum class TypeSpecifierWidth { Unspecified, Short, Long, LongLong };
@@ -59,6 +68,7 @@ namespace clang {
TST_float,
TST_double,
TST_float128,
+ TST_ibm128,
TST_bool, // _Bool
TST_decimal32, // _Decimal32
TST_decimal64, // _Decimal64
diff --git a/clang/include/clang/Basic/Stack.h b/clang/include/clang/Basic/Stack.h
index 3418c3bad11b..30ebd94aedd1 100644
--- a/clang/include/clang/Basic/Stack.h
+++ b/clang/include/clang/Basic/Stack.h
@@ -39,7 +39,7 @@ namespace clang {
/// is insufficient, calls Diag to emit a diagnostic before calling Fn.
inline void runWithSufficientStackSpace(llvm::function_ref<void()> Diag,
llvm::function_ref<void()> Fn) {
-#ifdef LLVM_ENABLE_THREADS
+#if LLVM_ENABLE_THREADS
if (LLVM_UNLIKELY(isStackNearlyExhausted()))
runWithSufficientStackSpaceSlow(Diag, Fn);
else
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 508f1fddf1b3..ab31c544ea9d 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -219,12 +219,14 @@ def AsTypeExpr : StmtNode<Expr>;
// OpenMP Directives.
def OMPCanonicalLoop : StmtNode<Stmt>;
def OMPExecutableDirective : StmtNode<Stmt, 1>;
+def OMPMetaDirective : StmtNode<OMPExecutableDirective>;
def OMPLoopBasedDirective : StmtNode<OMPExecutableDirective, 1>;
def OMPLoopDirective : StmtNode<OMPLoopBasedDirective, 1>;
def OMPParallelDirective : StmtNode<OMPExecutableDirective>;
def OMPSimdDirective : StmtNode<OMPLoopDirective>;
-def OMPTileDirective : StmtNode<OMPLoopBasedDirective>;
-def OMPUnrollDirective : StmtNode<OMPLoopBasedDirective>;
+def OMPLoopTransformationDirective : StmtNode<OMPLoopBasedDirective, 1>;
+def OMPTileDirective : StmtNode<OMPLoopTransformationDirective>;
+def OMPUnrollDirective : StmtNode<OMPLoopTransformationDirective>;
def OMPForDirective : StmtNode<OMPLoopDirective>;
def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;
@@ -280,3 +282,4 @@ def OMPTargetTeamsDistributeSimdDirective : StmtNode<OMPLoopDirective>;
def OMPInteropDirective : StmtNode<OMPExecutableDirective>;
def OMPDispatchDirective : StmtNode<OMPExecutableDirective>;
def OMPMaskedDirective : StmtNode<OMPExecutableDirective>;
+def OMPGenericLoopDirective : StmtNode<OMPLoopDirective>;
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index ed53b10f61ef..d4ea8e98b2e3 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -124,10 +124,21 @@ namespace clang {
enum { LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, LastTSBuiltin };
}
+ namespace RISCVVector {
+ enum {
+ LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/BuiltinsRISCVVector.def"
+ FirstTSBuiltin,
+ };
+ }
+
/// RISCV builtins
namespace RISCV {
enum {
LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
+ FirstRVVBuiltin = clang::Builtin::FirstTSBuiltin,
+ LastRVVBuiltin = RISCVVector::FirstTSBuiltin - 1,
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
#include "clang/Basic/BuiltinsRISCV.def"
LastTSBuiltin
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 4f0cbf986b31..3e1e09417c66 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -53,6 +53,15 @@ class SourceManager;
namespace Builtin { struct Info; }
+enum class FloatModeKind {
+ NoFloat = 255,
+ Float = 0,
+ Double,
+ LongDouble,
+ Float128,
+ Ibm128
+};
+
/// Fields controlling how types are laid out in memory; these may need to
/// be copied for targets like AMDGPU that base their ABIs on an auxiliary
/// CPU target.
@@ -64,7 +73,7 @@ struct TransferrableTargetInfo {
unsigned char BFloat16Width, BFloat16Align;
unsigned char FloatWidth, FloatAlign;
unsigned char DoubleWidth, DoubleAlign;
- unsigned char LongDoubleWidth, LongDoubleAlign, Float128Align;
+ unsigned char LongDoubleWidth, LongDoubleAlign, Float128Align, Ibm128Align;
unsigned char LargeArrayMinWidth, LargeArrayAlign;
unsigned char LongWidth, LongAlign;
unsigned char LongLongWidth, LongLongAlign;
@@ -104,7 +113,7 @@ struct TransferrableTargetInfo {
unsigned MaxTLSAlign;
const llvm::fltSemantics *HalfFormat, *BFloat16Format, *FloatFormat,
- *DoubleFormat, *LongDoubleFormat, *Float128Format;
+ *DoubleFormat, *LongDoubleFormat, *Float128Format, *Ibm128Format;
///===---- Target Data Type Query Methods -------------------------------===//
enum IntType {
@@ -121,13 +130,6 @@ struct TransferrableTargetInfo {
UnsignedLongLong
};
- enum RealType {
- NoFloat = 255,
- Float = 0,
- Double,
- LongDouble,
- Float128
- };
protected:
IntType SizeType, IntMaxType, PtrDiffType, IntPtrType, WCharType, WIntType,
Char16Type, Char32Type, Int64Type, Int16Type, SigAtomicType,
@@ -200,6 +202,9 @@ protected:
bool HasFloat128;
bool HasFloat16;
bool HasBFloat16;
+ bool HasIbm128;
+ bool HasLongDouble;
+ bool HasFPReturn;
bool HasStrictFP;
unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
@@ -210,9 +215,6 @@ protected:
unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
- const unsigned *GridValues =
- nullptr; // Array of target-specific GPU grid values that must be
- // consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
mutable VersionTuple PlatformMinVersion;
@@ -401,7 +403,8 @@ public:
/// is represented as one of those two). At this time, there is no support
/// for an explicit "PPC double-double" type (i.e. __ibm128) so we only
/// need to differentiate between "long double" and IEEE quad precision.
- RealType getRealTypeByWidth(unsigned BitWidth, bool ExplicitIEEE) const;
+ FloatModeKind getRealTypeByWidth(unsigned BitWidth,
+ FloatModeKind ExplicitType) const;
/// Return the alignment (in bits) of the specified integer type enum.
///
@@ -597,6 +600,16 @@ public:
/// Determine whether the _BFloat16 type is supported on this target.
virtual bool hasBFloat16Type() const { return HasBFloat16; }
+ /// Determine whether the __ibm128 type is supported on this target.
+ virtual bool hasIbm128Type() const { return HasIbm128; }
+
+ /// Determine whether the long double type is supported on this target.
+ virtual bool hasLongDoubleType() const { return HasLongDouble; }
+
+ /// Determine whether return of a floating point value is supported
+ /// on this target.
+ virtual bool hasFPReturn() const { return HasFPReturn; }
+
/// Determine whether constrained floating point is supported on this target.
virtual bool hasStrictFP() const { return HasStrictFP; }
@@ -675,12 +688,23 @@ public:
return *Float128Format;
}
+ /// getIbm128Width/Align/Format - Return the size/align/format of
+ /// '__ibm128'.
+ unsigned getIbm128Width() const { return 128; }
+ unsigned getIbm128Align() const { return Ibm128Align; }
+ const llvm::fltSemantics &getIbm128Format() const { return *Ibm128Format; }
+
/// Return the mangled code of long double.
virtual const char *getLongDoubleMangling() const { return "e"; }
/// Return the mangled code of __float128.
virtual const char *getFloat128Mangling() const { return "g"; }
+ /// Return the mangled code of __ibm128.
+ virtual const char *getIbm128Mangling() const {
+ llvm_unreachable("ibm128 not implemented on this target");
+ }
+
/// Return the mangled code of bfloat.
virtual const char *getBFloat16Mangling() const {
llvm_unreachable("bfloat not implemented on this target");
@@ -833,8 +857,8 @@ public:
/// Check whether the given real type should use the "fpret" flavor of
/// Objective-C message passing on this target.
- bool useObjCFPRetForRealType(RealType T) const {
- return RealTypeUsesObjCFPRet & (1 << T);
+ bool useObjCFPRetForRealType(FloatModeKind T) const {
+ return RealTypeUsesObjCFPRet & (1 << (int)T);
}
/// Check whether _Complex long double should use the "fp2ret" flavor
@@ -870,6 +894,11 @@ public:
/// across the current set of primary and secondary targets.
virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;
+ /// Returns target-specific min and max values VScale_Range.
+ virtual Optional<std::pair<unsigned, unsigned>>
+ getVScaleRange(const LangOptions &LangOpts) const {
+ return None;
+ }
/// The __builtin_clz* and __builtin_ctz* built-in
/// functions are specified to have undefined results for zero inputs, but
/// on targets that support these operations in a way that provides
@@ -993,8 +1022,7 @@ public:
}
bool isValidAsmImmediate(const llvm::APInt &Value) const {
if (!ImmSet.empty())
- return Value.isSignedIntN(32) &&
- ImmSet.count(Value.getZExtValue()) != 0;
+ return Value.isSignedIntN(32) && ImmSet.contains(Value.getZExtValue());
return !ImmRange.isConstrained ||
(Value.sge(ImmRange.Min) && Value.sle(ImmRange.Max));
}
@@ -1404,10 +1432,10 @@ public:
return LangAS::Default;
}
- /// Return a target-specific GPU grid value based on the GVIDX enum \p gv
- unsigned getGridValue(llvm::omp::GVIDX gv) const {
- assert(GridValues != nullptr && "GridValues not initialized");
- return GridValues[gv];
+ // access target-specific GPU grid values that must be consistent between
+ // host RTL (plugin), deviceRTL and clang.
+ virtual const llvm::omp::GV &getGridValue() const {
+ llvm_unreachable("getGridValue not implemented on this target");
}
/// Retrieve the name of the platform as it is used in the
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 48a664e3494e..0dd5936aa3e6 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -438,6 +438,7 @@ TYPE_TRAIT_2(__builtin_types_compatible_p, TypeCompatible, KEYNOCXX)
KEYWORD(__builtin_va_arg , KEYALL)
KEYWORD(__extension__ , KEYALL)
KEYWORD(__float128 , KEYALL)
+KEYWORD(__ibm128 , KEYALL)
KEYWORD(__imag , KEYALL)
KEYWORD(__int128 , KEYALL)
KEYWORD(__label__ , KEYALL)
@@ -827,10 +828,11 @@ PRAGMA_ANNOTATION(pragma_redefine_extname)
// handles them.
PRAGMA_ANNOTATION(pragma_fp_contract)
-// Annotation for #pragma STDC FENV_ACCESS
+// Annotations for #pragma STDC FENV_ACCESS and #pragma fenv_access (MS compat)
// The lexer produces these so that they only take effect when the parser
// handles them.
PRAGMA_ANNOTATION(pragma_fenv_access)
+PRAGMA_ANNOTATION(pragma_fenv_access_ms)
// Annotation for #pragma STDC FENV_ROUND
// The lexer produces these so that they only take effect when the parser
diff --git a/clang/include/clang/Basic/X86Target.def b/clang/include/clang/Basic/X86Target.def
deleted file mode 100644
index 70f3879f33a1..000000000000
--- a/clang/include/clang/Basic/X86Target.def
+++ /dev/null
@@ -1,110 +0,0 @@
-//===--- X86Target.def - X86 Feature/Processor Database ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the X86-specific Features and Processors, as used by
-// the X86 Targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef FEATURE
-#define FEATURE(ENUM)
-#endif
-
-#ifndef CPU_SPECIFIC
-#define CPU_SPECIFIC(NAME, MANGLING, FEATURES)
-#endif
-
-#ifndef CPU_SPECIFIC_ALIAS
-#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME)
-#endif
-
-// List of CPU Supports features in order. These need to remain in the order
-// required by attribute 'target' checking. Note that not all are supported/
-// prioritized by GCC, so synchronization with GCC's implementation may require
-// changing some existing values.
-FEATURE(FEATURE_CMOV)
-FEATURE(FEATURE_MMX)
-FEATURE(FEATURE_SSE)
-FEATURE(FEATURE_SSE2)
-FEATURE(FEATURE_SSE3)
-FEATURE(FEATURE_SSSE3)
-FEATURE(FEATURE_SSE4_A)
-FEATURE(FEATURE_SSE4_1)
-FEATURE(FEATURE_SSE4_2)
-FEATURE(FEATURE_POPCNT)
-FEATURE(FEATURE_AES)
-FEATURE(FEATURE_PCLMUL)
-FEATURE(FEATURE_AVX)
-FEATURE(FEATURE_BMI)
-FEATURE(FEATURE_FMA4)
-FEATURE(FEATURE_XOP)
-FEATURE(FEATURE_FMA)
-FEATURE(FEATURE_BMI2)
-FEATURE(FEATURE_AVX2)
-FEATURE(FEATURE_AVX512F)
-FEATURE(FEATURE_AVX512VL)
-FEATURE(FEATURE_AVX512BW)
-FEATURE(FEATURE_AVX512DQ)
-FEATURE(FEATURE_AVX512CD)
-FEATURE(FEATURE_AVX512ER)
-FEATURE(FEATURE_AVX512PF)
-FEATURE(FEATURE_AVX512VBMI)
-FEATURE(FEATURE_AVX512IFMA)
-FEATURE(FEATURE_AVX5124VNNIW)
-FEATURE(FEATURE_AVX5124FMAPS)
-FEATURE(FEATURE_AVX512VPOPCNTDQ)
-FEATURE(FEATURE_AVX512VBMI2)
-FEATURE(FEATURE_GFNI)
-FEATURE(FEATURE_VPCLMULQDQ)
-FEATURE(FEATURE_AVX512VNNI)
-FEATURE(FEATURE_AVX512BITALG)
-FEATURE(FEATURE_AVX512BF16)
-FEATURE(FEATURE_AVX512VP2INTERSECT)
-
-
-// FIXME: When commented out features are supported in LLVM, enable them here.
-CPU_SPECIFIC("generic", 'A', "")
-CPU_SPECIFIC("pentium", 'B', "")
-CPU_SPECIFIC("pentium_pro", 'C', "+cmov")
-CPU_SPECIFIC("pentium_mmx", 'D', "+mmx")
-CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx")
-CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse")
-CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii")
-CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2")
-CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2")
-CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
-CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
-CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
-CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
-CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
-CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
-CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
-CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge")
-CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
-CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge")
-CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
-CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell")
-CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
-CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
-CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell")
-CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
-CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
-CPU_SPECIFIC_ALIAS("mic_avx512", "knl")
-CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
-CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
-CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
-CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
-
-#undef CPU_SPECIFIC_ALIAS
-#undef CPU_SPECIFIC
-#undef PROC_64_BIT
-#undef PROC_32_BIT
-#undef FEATURE
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 48c032dd1422..cc242da7f1ca 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -170,16 +170,27 @@ class RVVBuiltin<string suffix, string prototype, string type_range,
// is always the first operand in builtin and IR intrinsic.
bit HasMaskedOffOperand = true;
- // This builtin has a granted vector length parameter in the last position.
+ // This builtin has a granted vector length parameter.
bit HasVL = true;
+ // There are several cases for specifying tail policy.
+ // 1. Add tail policy argument to masked intrinsics. It may have the maskedoff
+ // argument or not.
+ // * Have the maskedoff argument: (HasPolicy = true, HasMaskedOffOperand = true)
+ // Ex: vadd_vv_i8m1_mt(mask, maskedoff, op1, op2, vl, ta);
+ // * Do not have the maskedoff argument: (HasPolicy = true, HasMaskedOffOperand = false)
+ // Ex: vmacc_vv_i8m1_mt(mask, vd, vs1, vs2, vl, ta);
+ // 2. Add dest argument for no mask intrinsics. (TODO)
+ // Ex: vmv_v_x_i8m1_t(dest, src, vl);
+ // 3. Always tail agnostic. (HasPolicy = false)
+ // Ex: vmseq_vv_i8m1_b8_m(mask, maskedoff, op1, op2, vl);
+ // The tail policy argument is located at the last position.
+ bit HasPolicy = true;
+
// This builtin supports non-masked function overloading api.
// All masked operations support overloading api.
bit HasNoMaskedOverloaded = true;
- // Reads or writes "memory" or has other side-effects.
- bit HasSideEffects = false;
-
// This builtin is valid for the given Log2LMULs.
list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3];
@@ -204,13 +215,18 @@ class RVVBuiltin<string suffix, string prototype, string type_range,
// an automatic definition in header is emitted.
string HeaderCode = "";
- // Sub extension of vector spec. Currently only support Zvamo or Zvlsseg.
+ // Sub extension of vector spec. Currently only support Zvlsseg.
string RequiredExtension = "";
// Number of fields for Zvlsseg.
int NF = 1;
}
+class RVVHeader
+{
+ code HeaderCode;
+}
+
//===----------------------------------------------------------------------===//
// Basic classes with automatic codegen.
//===----------------------------------------------------------------------===//
@@ -557,6 +573,7 @@ class IsFloat<string type> {
}
let HasNoMaskedOverloaded = false,
+ HasPolicy = false,
ManualCodegen = [{
IntrinsicTypes = {ResultType, Ops[1]->getType()};
Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
@@ -568,10 +585,24 @@ let HasNoMaskedOverloaded = false,
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
}] in {
class RVVVLEMaskBuiltin : RVVBuiltin<"m", "mPCUe", "c"> {
- let Name = "vle1_v";
- let IRName = "vle1";
+ let Name = "vlm_v";
+ let IRName = "vlm";
let HasMask = false;
}
+}
+
+let HasNoMaskedOverloaded = false,
+ ManualCodegen = [{
+ IntrinsicTypes = {ResultType, Ops[1]->getType()};
+ Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
+ }],
+ ManualCodegenMask= [{
+ // Move mask to right before vl.
+ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
+ IntrinsicTypes = {ResultType, Ops[3]->getType()};
+ Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
+ }] in {
multiclass RVVVLEBuiltin<list<string> types> {
let Name = NAME # "_v",
IRName = "vle",
@@ -602,7 +633,7 @@ multiclass RVVVLEFFBuiltin<list<string> types> {
llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0});
// Store new_vl.
clang::CharUnits Align =
- CGM.getNaturalTypeAlignment(getContext().getSizeType());
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(1)->getType());
Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {1}),
Address(NewVL, Align));
return V;
@@ -612,6 +643,7 @@ multiclass RVVVLEFFBuiltin<list<string> types> {
{
// Move mask to right before vl.
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
IntrinsicTypes = {ResultType, Ops[4]->getType()};
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
Value *NewVL = Ops[2];
@@ -621,7 +653,7 @@ multiclass RVVVLEFFBuiltin<list<string> types> {
llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0});
// Store new_vl.
clang::CharUnits Align =
- CGM.getNaturalTypeAlignment(getContext().getSizeType());
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(3)->getType());
Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {1}),
Address(NewVL, Align));
return V;
@@ -649,6 +681,7 @@ multiclass RVVVLSEBuiltin<list<string> types> {
ManualCodegenMask= [{
// Move mask to right before vl.
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
IntrinsicTypes = {ResultType, Ops[4]->getType()};
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
}] in {
@@ -669,6 +702,7 @@ multiclass RVVIndexedLoad<string op> {
ManualCodegenMask = [{
// Move mask to right before vl.
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
IntrinsicTypes = {ResultType, Ops[2]->getType(), Ops[4]->getType()};
Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo());
}] in {
@@ -688,6 +722,7 @@ multiclass RVVIndexedLoad<string op> {
}
let HasMaskedOffOperand = false,
+ HasPolicy = false,
ManualCodegen = [{
// Builtin: (ptr, value, vl). Intrinsic: (value, ptr, vl)
std::swap(Ops[0], Ops[1]);
@@ -701,8 +736,8 @@ let HasMaskedOffOperand = false,
IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()};
}] in {
class RVVVSEMaskBuiltin : RVVBuiltin<"m", "0PUem", "c"> {
- let Name = "vse1_v";
- let IRName = "vse1";
+ let Name = "vsm_v";
+ let IRName = "vsm";
let HasMask = false;
}
multiclass RVVVSEBuiltin<list<string> types> {
@@ -724,6 +759,7 @@ multiclass RVVVSSEBuiltin<list<string> types> {
IRName = "vsse",
IRNameMask = "vsse_mask",
HasMaskedOffOperand = false,
+ HasPolicy = false,
ManualCodegen = [{
// Builtin: (ptr, stride, value, vl). Intrinsic: (value, ptr, stride, vl)
std::rotate(Ops.begin(), Ops.begin() + 2, Ops.begin() + 3);
@@ -747,6 +783,7 @@ multiclass RVVVSSEBuiltin<list<string> types> {
multiclass RVVIndexedStore<string op> {
let HasMaskedOffOperand = false,
+ HasPolicy = false,
ManualCodegen = [{
// Builtin: (ptr, index, value, vl). Intrinsic: (value, ptr, index, vl)
std::rotate(Ops.begin(), Ops.begin() + 2, Ops.begin() + 3);
@@ -805,14 +842,14 @@ multiclass RVVUnitStridedSegLoad<string op> {
ManualCodegen = [{
{
// builtin: (val0 address, val1 address, ..., ptr, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[NF + 1]->getType()};
// intrinsic: (ptr, vl)
llvm::Value *Operands[] = {Ops[NF], Ops[NF + 1]};
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
llvm::Value *V;
for (unsigned I = 0; I < NF; ++I) {
V = Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
@@ -825,7 +862,7 @@ multiclass RVVUnitStridedSegLoad<string op> {
{
// builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, vl)
// intrinsic: (maskedoff0, ..., ptr, mask, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[2 * NF + 2]->getType()};
SmallVector<llvm::Value*, 12> Operands;
for (unsigned I = 0; I < NF; ++I)
@@ -833,11 +870,12 @@ multiclass RVVUnitStridedSegLoad<string op> {
Operands.push_back(Ops[2 * NF + 1]);
Operands.push_back(Ops[NF]);
Operands.push_back(Ops[2 * NF + 2]);
- assert(Operands.size() == NF + 3);
+ Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
+ assert(Operands.size() == NF + 4);
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
llvm::Value *V;
for (unsigned I = 0; I < NF; ++I) {
V = Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
@@ -875,15 +913,15 @@ multiclass RVVUnitStridedSegLoadFF<string op> {
ManualCodegen = [{
{
// builtin: (val0 address, val1 address, ..., ptr, new_vl, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[NF + 2]->getType()};
// intrinsic: (ptr, vl)
llvm::Value *Operands[] = {Ops[NF], Ops[NF + 2]};
Value *NewVL = Ops[NF + 1];
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
for (unsigned I = 0; I < NF; ++I) {
Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
Address(Ops[I], Align));
@@ -897,7 +935,7 @@ multiclass RVVUnitStridedSegLoadFF<string op> {
{
// builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, new_vl, vl)
// intrinsic: (maskedoff0, ..., ptr, mask, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[2 * NF + 3]->getType()};
SmallVector<llvm::Value*, 12> Operands;
for (unsigned I = 0; I < NF; ++I)
@@ -905,12 +943,13 @@ multiclass RVVUnitStridedSegLoadFF<string op> {
Operands.push_back(Ops[2 * NF + 1]);
Operands.push_back(Ops[NF]);
Operands.push_back(Ops[2 * NF + 3]);
+ Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
Value *NewVL = Ops[2 * NF + 2];
- assert(Operands.size() == NF + 3);
+ assert(Operands.size() == NF + 4);
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
for (unsigned I = 0; I < NF; ++I) {
Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
Address(Ops[I], Align));
@@ -949,14 +988,14 @@ multiclass RVVStridedSegLoad<string op> {
ManualCodegen = [{
{
// builtin: (val0 address, val1 address, ..., ptr, stride, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[NF + 2]->getType()};
// intrinsic: (ptr, stride, vl)
llvm::Value *Operands[] = {Ops[NF], Ops[NF + 1], Ops[NF + 2]};
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
llvm::Value *V;
for (unsigned I = 0; I < NF; ++I) {
V = Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
@@ -969,7 +1008,7 @@ multiclass RVVStridedSegLoad<string op> {
{
// builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, stride, vl)
// intrinsic: (maskedoff0, ..., ptr, stride, mask, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[2 * NF + 3]->getType()};
SmallVector<llvm::Value*, 12> Operands;
for (unsigned I = 0; I < NF; ++I)
@@ -978,11 +1017,12 @@ multiclass RVVStridedSegLoad<string op> {
Operands.push_back(Ops[2 * NF + 2]);
Operands.push_back(Ops[NF]);
Operands.push_back(Ops[2 * NF + 3]);
- assert(Operands.size() == NF + 4);
+ Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
+ assert(Operands.size() == NF + 5);
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
llvm::Value *V;
for (unsigned I = 0; I < NF; ++I) {
V = Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
@@ -1015,14 +1055,14 @@ multiclass RVVIndexedSegLoad<string op> {
ManualCodegen = [{
{
// builtin: (val0 address, val1 address, ..., ptr, index, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[NF + 1]->getType(), Ops[NF + 2]->getType()};
// intrinsic: (ptr, index, vl)
llvm::Value *Operands[] = {Ops[NF], Ops[NF + 1], Ops[NF + 2]};
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
llvm::Value *V;
for (unsigned I = 0; I < NF; ++I) {
V = Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
@@ -1034,7 +1074,7 @@ multiclass RVVIndexedSegLoad<string op> {
ManualCodegenMask = [{
{
// builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, index, vl)
- IntrinsicTypes = {Ops[0]->getType()->getPointerElementType(),
+ IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()),
Ops[2 * NF + 2]->getType(), Ops[2 * NF + 3]->getType()};
// intrinsic: (maskedoff0, ..., ptr, index, mask, vl)
SmallVector<llvm::Value*, 12> Operands;
@@ -1044,11 +1084,12 @@ multiclass RVVIndexedSegLoad<string op> {
Operands.push_back(Ops[2 * NF + 2]);
Operands.push_back(Ops[NF]);
Operands.push_back(Ops[2 * NF + 3]);
- assert(Operands.size() == NF + 4);
+ Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
+ assert(Operands.size() == NF + 5);
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
llvm::Value *LoadValue = Builder.CreateCall(F, Operands, "");
- clang::CharUnits Align = CharUnits::fromQuantity(
- IntrinsicTypes[0]->getScalarSizeInBits() / 8);
+ clang::CharUnits Align =
+ CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
llvm::Value *V;
for (unsigned I = 0; I < NF; ++I) {
V = Builder.CreateStore(Builder.CreateExtractValue(LoadValue, {I}),
@@ -1094,6 +1135,7 @@ multiclass RVVUnitStridedSegStore<string op> {
IRNameMask = op # nf # "_mask",
NF = nf,
HasMaskedOffOperand = false,
+ HasPolicy = false,
ManualCodegen = [{
{
// Builtin: (ptr, val0, val1, ..., vl)
@@ -1139,6 +1181,7 @@ multiclass RVVStridedSegStore<string op> {
IRNameMask = op # nf # "_mask",
NF = nf,
HasMaskedOffOperand = false,
+ HasPolicy = false,
ManualCodegen = [{
{
// Builtin: (ptr, stride, val0, val1, ..., vl).
@@ -1180,6 +1223,7 @@ multiclass RVVIndexedSegStore<string op> {
IRNameMask = op # nf # "_mask",
NF = nf,
HasMaskedOffOperand = false,
+ HasPolicy = false,
ManualCodegen = [{
{
// Builtin: (ptr, index, val0, val1, ..., vl)
@@ -1213,35 +1257,6 @@ multiclass RVVIndexedSegStore<string op> {
}
}
-multiclass RVVAMOBuiltinSet<bit has_signed = false, bit has_unsigned = false,
- bit has_fp = false> {
- defvar type_list = !if(has_fp, ["i","l","f","d"], ["i","l"]);
- foreach type = type_list in
- foreach eew_list = EEWList in {
- defvar eew = eew_list[0];
- defvar eew_index = eew_list[1];
- let Name = NAME # "ei" # eew # "_" # "v",
- IRName = NAME,
- IRNameMask = NAME # "_mask",
- HasMaskedOffOperand = false,
- ManualCodegen = [{
- // base, bindex, value, vl
- IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()};
- Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
- }],
- ManualCodegenMask = [{
- std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
- IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[4]->getType()};
- Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
- }] in {
- if has_signed then
- def : RVVBuiltin<"v", "vPe" # eew_index # "Uvv", type>;
- if !and(!not(IsFloat<type>.val), has_unsigned) then
- def : RVVBuiltin<"Uv", "UvPUe" # eew_index # "UvUv", type>;
- }
- }
-}
-
multiclass RVVPseudoUnaryBuiltin<string IR, string type_range> {
let Name = NAME,
IRName = IR,
@@ -1259,6 +1274,7 @@ multiclass RVVPseudoUnaryBuiltin<string IR, string type_range> {
ManualCodegenMask = [{
{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
// maskedoff, op1, mask, vl
IntrinsicTypes = {ResultType,
cast<llvm::VectorType>(ResultType)->getElementType(),
@@ -1289,6 +1305,7 @@ multiclass RVVPseudoVNotBuiltin<string IR, string type_range> {
ManualCodegenMask = [{
{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
// maskedoff, op1, mask, vl
IntrinsicTypes = {ResultType,
cast<llvm::VectorType>(ResultType)->getElementType(),
@@ -1336,6 +1353,7 @@ multiclass RVVPseudoVFUnaryBuiltin<string IR, string type_range> {
ManualCodegenMask = [{
{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
// maskedoff, op1, mask, vl
IntrinsicTypes = {ResultType,
Ops[1]->getType(),
@@ -1368,6 +1386,7 @@ multiclass RVVPseudoVWCVTBuiltin<string IR, string MName, string type_range,
ManualCodegenMask = [{
{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
// maskedoff, op1, mask, vl
IntrinsicTypes = {ResultType,
Ops[1]->getType(),
@@ -1403,6 +1422,7 @@ multiclass RVVPseudoVNCVTBuiltin<string IR, string MName, string type_range,
ManualCodegenMask = [{
{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);
+ Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED));
// maskedoff, op1, mask, vl
IntrinsicTypes = {ResultType,
Ops[1]->getType(),
@@ -1422,7 +1442,7 @@ multiclass RVVPseudoVNCVTBuiltin<string IR, string MName, string type_range,
// 6.1. vsetvli/vsetvl instructions
let HasVL = false,
HasMask = false,
- HasSideEffects = true,
+ HasPolicy = false,
Log2LMUL = [0],
ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type
{
@@ -1492,13 +1512,13 @@ let HasVL = false,
// 7. Vector Loads and Stores
// 7.4. Vector Unit-Stride Instructions
-def vle1: RVVVLEMaskBuiltin;
+def vlm: RVVVLEMaskBuiltin;
defm vle8: RVVVLEBuiltin<["c"]>;
defm vle16: RVVVLEBuiltin<["s","x"]>;
defm vle32: RVVVLEBuiltin<["i","f"]>;
defm vle64: RVVVLEBuiltin<["l","d"]>;
-def vse1 : RVVVSEMaskBuiltin;
+def vsm : RVVVSEMaskBuiltin;
defm vse8 : RVVVSEBuiltin<["c"]>;
defm vse16: RVVVSEBuiltin<["s","x"]>;
defm vse32: RVVVSEBuiltin<["i","f"]>;
@@ -1541,19 +1561,6 @@ defm : RVVIndexedSegStore<"vsuxseg">;
defm : RVVIndexedSegStore<"vsoxseg">;
}
-// 8. Vector AMO Operations
-let RequiredExtension = "Zvamo" in {
-defm vamoswap : RVVAMOBuiltinSet< /* hasSigned */ true, /* hasUnsigned */ true, /* hasFP */ true>;
-defm vamoadd : RVVAMOBuiltinSet< /* hasSigned */ true, /* hasUnsigned */ true>;
-defm vamoxor : RVVAMOBuiltinSet< /* hasSigned */ true, /* hasUnsigned */ true>;
-defm vamoand : RVVAMOBuiltinSet< /* hasSigned */ true, /* hasUnsigned */ true>;
-defm vamoor : RVVAMOBuiltinSet< /* hasSigned */ true, /* hasUnsigned */ true>;
-defm vamomin : RVVAMOBuiltinSet< /* hasSigned */ true>;
-defm vamomax : RVVAMOBuiltinSet< /* hasSigned */ true>;
-defm vamominu : RVVAMOBuiltinSet< /* hasSigned */ false, /* hasUnsigned */ true>;
-defm vamomaxu : RVVAMOBuiltinSet< /* hasSigned */ false, /* hasUnsigned */ true>;
-}
-
// 12. Vector Integer Arithmetic Instructions
// 12.1. Vector Single-Width Integer Add and Subtract
defm vadd : RVVIntBinBuiltinSet;
@@ -1596,7 +1603,7 @@ let Log2LMUL = [-3, -2, -1, 0] in {
}
// 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
-let HasMask = false in {
+let HasMask = false, HasPolicy = false in {
defm vadc : RVVCarryinBuiltinSet;
defm vmadc : RVVCarryOutInBuiltinSet<"vmadc_carry_in">;
defm vmadc : RVVIntMaskOutBuiltinSet;
@@ -1624,6 +1631,7 @@ defm vncvt_x_x_w : RVVPseudoVNCVTBuiltin<"vnsrl", "vncvt_x", "csi",
["Uv", "UvUw"]]>;
// 12.8. Vector Integer Comparison Instructions
+let HasPolicy = false in {
defm vmseq : RVVIntMaskOutBuiltinSet;
defm vmsne : RVVIntMaskOutBuiltinSet;
defm vmsltu : RVVUnsignedMaskOutBuiltinSet;
@@ -1634,6 +1642,7 @@ defm vmsgtu : RVVUnsignedMaskOutBuiltinSet;
defm vmsgt : RVVSignedMaskOutBuiltinSet;
defm vmsgeu : RVVUnsignedMaskOutBuiltinSet;
defm vmsge : RVVSignedMaskOutBuiltinSet;
+}
// 12.9. Vector Integer Min/Max Instructions
defm vminu : RVVUnsignedBinBuiltinSet;
@@ -1669,6 +1678,7 @@ defm vwmulsu : RVVOutOp0Op1BuiltinSet<"vwmulsu", "csi",
}
// 12.13. Vector Single-Width Integer Multiply-Add Instructions
+let HasPolicy = false in {
defm vmacc : RVVIntTerBuiltinSet;
defm vnmsac : RVVIntTerBuiltinSet;
defm vmadd : RVVIntTerBuiltinSet;
@@ -1689,10 +1699,11 @@ defm vwmaccsu : RVVOutOp1Op2BuiltinSet<"vwmaccsu", "csi",
defm vwmaccus : RVVOutOp1Op2BuiltinSet<"vwmaccus", "csi",
[["vx", "w", "wwUev"]]>;
}
+}
// 12.15. Vector Integer Merge Instructions
// C/C++ Operand: (mask, op1, op2, vl), Intrinsic: (op1, op2, mask, vl)
-let HasMask = false,
+let HasMask = false, HasPolicy = false,
ManualCodegen = [{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3);
IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()};
@@ -1705,7 +1716,7 @@ let HasMask = false,
}
// 12.16. Vector Integer Move Instructions
-let HasMask = false in {
+let HasMask = false, HasPolicy = false in {
let MangledName = "vmv_v" in {
defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "csil",
[["v", "Uv", "UvUv"]]>;
@@ -1769,6 +1780,7 @@ let Log2LMUL = [-2, -1, 0, 1, 2] in {
}
// 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+let HasPolicy = false in {
defm vfmacc : RVVFloatingTerBuiltinSet;
defm vfnmacc : RVVFloatingTerBuiltinSet;
defm vfmsac : RVVFloatingTerBuiltinSet;
@@ -1783,6 +1795,7 @@ defm vfwmacc : RVVFloatingWidenTerBuiltinSet;
defm vfwnmacc : RVVFloatingWidenTerBuiltinSet;
defm vfwmsac : RVVFloatingWidenTerBuiltinSet;
defm vfwnmsac : RVVFloatingWidenTerBuiltinSet;
+}
// 14.8. Vector Floating-Point Square-Root Instruction
def vfsqrt : RVVFloatingUnaryVVBuiltin;
@@ -1805,32 +1818,34 @@ defm vfneg_v : RVVPseudoVFUnaryBuiltin<"vfsgnjn", "xfd">;
defm vfabs_v : RVVPseudoVFUnaryBuiltin<"vfsgnjx", "xfd">;
// 14.13. Vector Floating-Point Compare Instructions
+let HasPolicy = false in {
defm vmfeq : RVVFloatingMaskOutBuiltinSet;
defm vmfne : RVVFloatingMaskOutBuiltinSet;
defm vmflt : RVVFloatingMaskOutBuiltinSet;
defm vmfle : RVVFloatingMaskOutBuiltinSet;
defm vmfgt : RVVFloatingMaskOutBuiltinSet;
defm vmfge : RVVFloatingMaskOutBuiltinSet;
+}
// 14.14. Vector Floating-Point Classify Instruction
-let Name = "vfclass_v" in
+let Name = "vfclass_v", HasPolicy = false in
def vfclass : RVVOp0Builtin<"Uv", "Uvv", "xfd">;
// 14.15. Vector Floating-Point Merge Instructio
// C/C++ Operand: (mask, op1, op2, vl), Builtin: (op1, op2, mask, vl)
-let HasMask = false,
+let HasMask = false, HasPolicy = false,
ManualCodegen = [{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3);
IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()};
}] in {
- defm vmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
+ defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "xfd",
[["vvm", "v", "vmvv"]]>;
defm vfmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
[["vfm", "v", "vmve"]]>;
}
// 14.16. Vector Floating-Point Move Instruction
-let HasMask = false, HasNoMaskedOverloaded = false in
+let HasMask = false, HasNoMaskedOverloaded = false, HasPolicy = false in
defm vfmv_v : RVVOutBuiltinSet<"vfmv_v_f", "xfd",
[["f", "v", "ve"]]>;
@@ -1867,6 +1882,7 @@ let Log2LMUL = [-3, -2, -1, 0, 1, 2] in {
// 15. Vector Reduction Operations
// 15.1. Vector Single-Width Integer Reduction Instructions
+let HasPolicy = false in {
defm vredsum : RVVIntReductionBuiltinSet;
defm vredmaxu : RVVUnsignedReductionBuiltin;
defm vredmax : RVVSignedReductionBuiltin;
@@ -1888,22 +1904,23 @@ let HasMaskedOffOperand = false in {
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
defm vfredmax : RVVFloatingReductionBuiltin;
defm vfredmin : RVVFloatingReductionBuiltin;
-defm vfredsum : RVVFloatingReductionBuiltin;
+defm vfredusum : RVVFloatingReductionBuiltin;
defm vfredosum : RVVFloatingReductionBuiltin;
// 15.4. Vector Widening Floating-Point Reduction Instructions
-defm vfwredsum : RVVFloatingWidenReductionBuiltin;
+defm vfwredusum : RVVFloatingWidenReductionBuiltin;
defm vfwredosum : RVVFloatingWidenReductionBuiltin;
+}
// 16. Vector Mask Instructions
// 16.1. Vector Mask-Register Logical Instructions
def vmand : RVVMaskBinBuiltin;
def vmnand : RVVMaskBinBuiltin;
-def vmandnot : RVVMaskBinBuiltin;
+def vmandn : RVVMaskBinBuiltin;
def vmxor : RVVMaskBinBuiltin;
def vmor : RVVMaskBinBuiltin;
def vmnor : RVVMaskBinBuiltin;
-def vmornot : RVVMaskBinBuiltin;
+def vmorn : RVVMaskBinBuiltin;
def vmxnor : RVVMaskBinBuiltin;
// pseudoinstructions
def vmclr : RVVMaskNullaryBuiltin;
@@ -1911,8 +1928,9 @@ def vmset : RVVMaskNullaryBuiltin;
defm vmmv_m : RVVPseudoMaskBuiltin<"vmand", "c">;
defm vmnot_m : RVVPseudoMaskBuiltin<"vmnand", "c">;
-// 16.2. Vector mask population count vpopc
-def vpopc : RVVMaskOp0Builtin<"um">;
+let HasPolicy = false in {
+// 16.2. Vector count population in mask vcpop.m
+def vcpop : RVVMaskOp0Builtin<"um">;
// 16.3. vfirst find-first-set mask bit
def vfirst : RVVMaskOp0Builtin<"lm">;
@@ -1934,10 +1952,11 @@ let HasNoMaskedOverloaded = false in {
defm vid : RVVOutBuiltinSet<"vid", "csil", [["v", "v", "v"],
["v", "Uv", "Uv"]]>;
}
+}
// 17. Vector Permutation Instructions
// 17.1. Integer Scalar Move Instructions
-let HasMask = false in {
+let HasMask = false, HasPolicy = false in {
let HasVL = false, MangledName = "vmv_x" in
defm vmv_x : RVVOp0BuiltinSet<"vmv_x_s", "csil",
[["s", "ve", "ev"],
@@ -1949,7 +1968,7 @@ let HasMask = false in {
}
// 17.2. Floating-Point Scalar Move Instructions
-let HasMask = false in {
+let HasMask = false, HasPolicy = false in {
let HasVL = false, MangledName = "vfmv_f" in
defm vfmv_f : RVVOp0BuiltinSet<"vfmv_f_s", "xfd",
[["s", "ve", "ev"]]>;
@@ -1960,10 +1979,12 @@ let HasMask = false in {
}
// 17.3. Vector Slide Instructions
+let HasPolicy = false in {
// 17.3.1. Vector Slideup Instructions
defm vslideup : RVVSlideBuiltinSet;
// 17.3.2. Vector Slidedown Instructions
defm vslidedown : RVVSlideBuiltinSet;
+}
// 17.3.3. Vector Slide1up Instructions
defm vslide1up : RVVSlideOneBuiltinSet;
@@ -1990,7 +2011,7 @@ defm vrgatherei16 : RVVOutBuiltinSet<"vrgatherei16_vv", "csil",
[["vv", "Uv", "UvUv(Log2EEW:4)Uv"]]>;
// 17.5. Vector Compress Instruction
-let HasMask = false,
+let HasMask = false, HasPolicy = false,
ManualCodegen = [{
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3);
IntrinsicTypes = {ResultType, Ops[3]->getType()};
@@ -2005,7 +2026,7 @@ let HasMask = false,
// Miscellaneous
let HasMask = false, HasVL = false, IRName = "" in {
- let Name = "vreinterpret_v",
+ let Name = "vreinterpret_v", HasPolicy = false,
ManualCodegen = [{
return Builder.CreateBitCast(Ops[0], ResultType);
}] in {
@@ -2027,7 +2048,7 @@ let HasMask = false, HasVL = false, IRName = "" in {
}
}
- let Name = "vundefined", HasNoMaskedOverloaded = false,
+ let Name = "vundefined", HasNoMaskedOverloaded = false, HasPolicy = false,
ManualCodegen = [{
return llvm::UndefValue::get(ResultType);
}] in {
@@ -2037,7 +2058,7 @@ let HasMask = false, HasVL = false, IRName = "" in {
// LMUL truncation
// C/C++ Operand: VecTy, IR Operand: VecTy, Index
- let Name = "vlmul_trunc_v", MangledName = "vlmul_trunc",
+ let Name = "vlmul_trunc_v", MangledName = "vlmul_trunc", HasPolicy = false,
ManualCodegen = [{ {
ID = Intrinsic::experimental_vector_extract;
IntrinsicTypes = {ResultType, Ops[0]->getType()};
@@ -2055,7 +2076,7 @@ let HasMask = false, HasVL = false, IRName = "" in {
// LMUL extension
// C/C++ Operand: SubVecTy, IR Operand: VecTy, SubVecTy, Index
- let Name = "vlmul_ext_v", MangledName = "vlmul_ext",
+ let Name = "vlmul_ext_v", MangledName = "vlmul_ext", HasPolicy = false,
ManualCodegen = [{
ID = Intrinsic::experimental_vector_insert;
IntrinsicTypes = {ResultType, Ops[0]->getType()};
@@ -2073,11 +2094,16 @@ let HasMask = false, HasVL = false, IRName = "" in {
}
}
- let Name = "vget_v",
+ let Name = "vget_v", HasPolicy = false,
ManualCodegen = [{
{
ID = Intrinsic::experimental_vector_extract;
- ScalableVectorType *VecTy = cast<ScalableVectorType>(ResultType);
+ auto *VecTy = cast<ScalableVectorType>(ResultType);
+ auto *OpVecTy = cast<ScalableVectorType>(Ops[0]->getType());
+ // Mask to only valid indices.
+ unsigned MaxIndex = OpVecTy->getMinNumElements() / VecTy->getMinNumElements();
+ assert(isPowerOf2_32(MaxIndex));
+ Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1);
Ops[1] = Builder.CreateMul(Ops[1],
ConstantInt::get(Ops[1]->getType(),
VecTy->getMinNumElements()));
@@ -2086,17 +2112,22 @@ let HasMask = false, HasVL = false, IRName = "" in {
}
}] in {
foreach dst_lmul = ["(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in {
- def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilfd", dst_lmul # "v">;
+ def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilxfd", dst_lmul # "v">;
def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil", dst_lmul # "Uv">;
}
}
- let Name = "vset_v", Log2LMUL = [0, 1, 2],
+ let Name = "vset_v", Log2LMUL = [0, 1, 2], HasPolicy = false,
ManualCodegen = [{
{
ID = Intrinsic::experimental_vector_insert;
IntrinsicTypes = {ResultType, Ops[2]->getType()};
- ScalableVectorType *VecTy = cast<ScalableVectorType>(Ops[2]->getType());
+ auto *ResVecTy = cast<ScalableVectorType>(ResultType);
+ auto *VecTy = cast<ScalableVectorType>(Ops[2]->getType());
+ // Mask to only valid indices.
+ unsigned MaxIndex = ResVecTy->getMinNumElements() / VecTy->getMinNumElements();
+ assert(isPowerOf2_32(MaxIndex));
+ Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1);
Ops[1] = Builder.CreateMul(Ops[1],
ConstantInt::get(Ops[1]->getType(),
VecTy->getMinNumElements()));
@@ -2105,8 +2136,14 @@ let HasMask = false, HasVL = false, IRName = "" in {
}
}] in {
foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in {
- def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilfd">;
+ def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">;
def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">;
}
}
}
+
+let HeaderCode = [{
+#define VE_TAIL_UNDISTURBED 0
+#define VE_TAIL_AGNOSTIC 1
+}] in
+def policy : RVVHeader;
diff --git a/clang/include/clang/CodeGen/ModuleBuilder.h b/clang/include/clang/CodeGen/ModuleBuilder.h
index f9d056ed8b1e..26587e73bf6c 100644
--- a/clang/include/clang/CodeGen/ModuleBuilder.h
+++ b/clang/include/clang/CodeGen/ModuleBuilder.h
@@ -74,6 +74,10 @@ public:
/// This may return null if there was no matching declaration.
const Decl *GetDeclForMangledName(llvm::StringRef MangledName);
+ /// Given a global declaration, return a mangled name for this declaration
+ /// which has been added to this code generator via a Handle method.
+ llvm::StringRef GetMangledName(GlobalDecl GD);
+
/// Return the LLVM address of the given global entity.
///
/// \param isForDefinition If true, the caller intends to define the
diff --git a/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h b/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h
index 4475807dfce9..d879b6411c9e 100644
--- a/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h
+++ b/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h
@@ -20,7 +20,7 @@ namespace clang {
/// Provides notifications for file changes in a directory.
///
/// Invokes client-provided function on every filesystem event in the watched
-/// directory. Initially the the watched directory is scanned and for every file
+/// directory. Initially the watched directory is scanned and for every file
/// found, an event is synthesized as if the file was added.
///
/// This is not a general purpose directory monitoring tool - list of
diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h
index 0d2a0939639e..2723f75e8945 100644
--- a/clang/include/clang/Driver/Distro.h
+++ b/clang/include/clang/Driver/Distro.h
@@ -37,6 +37,7 @@ public:
DebianStretch,
DebianBuster,
DebianBullseye,
+ DebianBookworm,
Exherbo,
RHEL5,
RHEL6,
@@ -72,6 +73,7 @@ public:
UbuntuGroovy,
UbuntuHirsute,
UbuntuImpish,
+ UbuntuJammy,
UnknownDistro
};
@@ -119,11 +121,11 @@ public:
bool IsOpenSUSE() const { return DistroVal == OpenSUSE; }
bool IsDebian() const {
- return DistroVal >= DebianLenny && DistroVal <= DebianBullseye;
+ return DistroVal >= DebianLenny && DistroVal <= DebianBookworm;
}
bool IsUbuntu() const {
- return DistroVal >= UbuntuHardy && DistroVal <= UbuntuImpish;
+ return DistroVal >= UbuntuHardy && DistroVal <= UbuntuJammy;
}
bool IsAlpineLinux() const { return DistroVal == AlpineLinux; }
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index da7e8386a151..8b1f7091e701 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -253,6 +253,14 @@ public:
/// or when using the -gen-reproducer driver flag.
unsigned GenReproducer : 1;
+ // getFinalPhase - Determine which compilation mode we are in and record
+ // which option we used to determine the final phase.
+ // TODO: Much of what getFinalPhase returns are not actually true compiler
+ // modes. Fold this functionality into Types::getCompilationPhases and
+ // handleArguments.
+ phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL,
+ llvm::opt::Arg **FinalPhaseArg = nullptr) const;
+
private:
/// Certain options suppress the 'no input files' warning.
unsigned SuppressMissingInputWarning : 1;
@@ -270,14 +278,6 @@ private:
llvm::opt::DerivedArgList *
TranslateInputArgs(const llvm::opt::InputArgList &Args) const;
- // getFinalPhase - Determine which compilation mode we are in and record
- // which option we used to determine the final phase.
- // TODO: Much of what getFinalPhase returns are not actually true compiler
- // modes. Fold this functionality into Types::getCompilationPhases and
- // handleArguments.
- phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL,
- llvm::opt::Arg **FinalPhaseArg = nullptr) const;
-
// handleArguments - All code related to claiming and printing diagnostics
// related to arguments to the driver are done here.
void handleArguments(Compilation &C, llvm::opt::DerivedArgList &Args,
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5a9fd078390e..9bde64cf49fd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -272,7 +272,7 @@ class MigratorOpts<string base>
// Args.hasArg(OPT_ffoo) can be used to check that the flag is enabled.
// This is useful if the option is usually disabled.
// Use this only when the option cannot be declared via BoolFOption.
-multiclass OptInFFlag<string name, string pos_prefix, string neg_prefix="",
+multiclass OptInCC1FFlag<string name, string pos_prefix, string neg_prefix="",
string help="", list<OptionFlag> flags=[]> {
def f#NAME : Flag<["-"], "f"#name>, Flags<[CC1Option] # flags>,
Group<f_Group>, HelpText<pos_prefix # help>;
@@ -283,7 +283,7 @@ multiclass OptInFFlag<string name, string pos_prefix, string neg_prefix="",
// A boolean option which is opt-out in CC1. The negative option exists in CC1 and
// Args.hasArg(OPT_fno_foo) can be used to check that the flag is disabled.
// Use this only when the option cannot be declared via BoolFOption.
-multiclass OptOutFFlag<string name, string pos_prefix, string neg_prefix,
+multiclass OptOutCC1FFlag<string name, string pos_prefix, string neg_prefix,
string help="", list<OptionFlag> flags=[]> {
def f#NAME : Flag<["-"], "f"#name>, Flags<flags>,
Group<f_Group>, HelpText<pos_prefix # help>;
@@ -291,6 +291,27 @@ multiclass OptOutFFlag<string name, string pos_prefix, string neg_prefix,
Group<f_Group>, HelpText<neg_prefix # help>;
}
+// A boolean option which is opt-in in FC1. The positive option exists in FC1 and
+// Args.hasArg(OPT_ffoo) can be used to check that the flag is enabled.
+// This is useful if the option is usually disabled.
+multiclass OptInFC1FFlag<string name, string pos_prefix, string neg_prefix="",
+ string help="", list<OptionFlag> flags=[]> {
+ def f#NAME : Flag<["-"], "f"#name>, Flags<[FC1Option] # flags>,
+ Group<f_Group>, HelpText<pos_prefix # help>;
+ def fno_#NAME : Flag<["-"], "fno-"#name>, Flags<flags>,
+ Group<f_Group>, HelpText<neg_prefix # help>;
+}
+
+// A boolean option which is opt-out in FC1. The negative option exists in FC1 and
+// Args.hasArg(OPT_fno_foo) can be used to check that the flag is disabled.
+multiclass OptOutFC1FFlag<string name, string pos_prefix, string neg_prefix,
+ string help="", list<OptionFlag> flags=[]> {
+ def f#NAME : Flag<["-"], "f"#name>, Flags<flags>,
+ Group<f_Group>, HelpText<pos_prefix # help>;
+ def fno_#NAME : Flag<["-"], "fno-"#name>, Flags<[FC1Option] # flags>,
+ Group<f_Group>, HelpText<neg_prefix # help>;
+}
+
// Creates a positive and negative flags where both of them are prefixed with
// "m", have help text specified for positive and negative option, and a Group
// optionally specified by the opt_group argument, otherwise Group<m_Group>.
@@ -386,8 +407,7 @@ class MarshalledFlagRec<FlagDefExpanded flag, FlagDefExpanded other,
Default default>
: Flag<["-"], flag.Spelling>, Flags<flag.OptionFlags>, HelpText<flag.Help>,
MarshallingInfoBooleanFlag<kpm, default.Value, flag.ValueAsCode,
- flag.RecordName, other.ValueAsCode,
- other.RecordName>,
+ other.ValueAsCode, other.RecordName>,
ImpliedByAnyOf<implied.ImpliedBy, implied.ValueAsCode> {}
// Generates TableGen records for two command line flags that control the same
@@ -467,7 +487,6 @@ defvar render_script = LangOpts<"RenderScript">;
defvar hip = LangOpts<"HIP">;
defvar gnu_mode = LangOpts<"GNUMode">;
defvar asm_preprocessor = LangOpts<"AsmPreprocessor">;
-defvar cpp_modules = LangOpts<"CPlusPlusModules">;
defvar std = !strconcat("LangStandard::getLangStandardForKind(", lang_std.KeyPath, ")");
@@ -598,12 +617,15 @@ def objcmt_migrate_designated_init : Flag<["-"], "objcmt-migrate-designated-init
HelpText<"Enable migration to infer NS_DESIGNATED_INITIALIZER for initializer methods">,
MarshallingInfoBitfieldFlag<FrontendOpts<"ObjCMTAction">, "FrontendOptions::ObjCMT_DesignatedInitializer">;
-def objcmt_whitelist_dir_path: Joined<["-"], "objcmt-whitelist-dir-path=">, Flags<[CC1Option]>,
+def objcmt_allowlist_dir_path: Joined<["-"], "objcmt-allowlist-dir-path=">, Flags<[CC1Option]>,
HelpText<"Only modify files with a filename contained in the provided directory path">,
- MarshallingInfoString<FrontendOpts<"ObjCMTWhiteListPath">>;
+ MarshallingInfoString<FrontendOpts<"ObjCMTAllowListPath">>;
+def : Joined<["-"], "objcmt-whitelist-dir-path=">, Flags<[CC1Option]>,
+ HelpText<"Alias for -objcmt-allowlist-dir-path">,
+ Alias<objcmt_allowlist_dir_path>;
// The misspelt "white-list" [sic] alias is due for removal.
def : Joined<["-"], "objcmt-white-list-dir-path=">, Flags<[CC1Option]>,
- Alias<objcmt_whitelist_dir_path>;
+ Alias<objcmt_allowlist_dir_path>;
// Make sure all other -ccc- options are rejected.
def ccc_ : Joined<["-"], "ccc-">, Group<internal_Group>, Flags<[Unsupported]>;
@@ -701,7 +723,7 @@ def ObjC : Flag<["-"], "ObjC">, Flags<[NoXarchOption]>,
def O : Joined<["-"], "O">, Group<O_Group>, Flags<[CC1Option]>;
def O_flag : Flag<["-"], "O">, Flags<[CC1Option]>, Alias<O>, AliasArgs<["1"]>;
def Ofast : Joined<["-"], "Ofast">, Group<O_Group>, Flags<[CC1Option]>;
-def P : Flag<["-"], "P">, Flags<[CC1Option]>, Group<Preprocessor_Group>,
+def P : Flag<["-"], "P">, Flags<[CC1Option,FlangOption,FC1Option]>, Group<Preprocessor_Group>,
HelpText<"Disable linemarker output in -E mode">,
MarshallingInfoNegativeFlag<PreprocessorOutputOpts<"ShowLineMarkers">>;
def Qy : Flag<["-"], "Qy">, Flags<[CC1Option]>,
@@ -815,7 +837,9 @@ def autocomplete : Joined<["--"], "autocomplete=">;
def bind__at__load : Flag<["-"], "bind_at_load">;
def bundle__loader : Separate<["-"], "bundle_loader">;
def bundle : Flag<["-"], "bundle">;
-def b : JoinedOrSeparate<["-"], "b">, Flags<[Unsupported]>;
+def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput, RenderAsInput]>,
+ HelpText<"Pass -b <arg> to the linker on AIX (only).">, MetaVarName<"<arg>">,
+ Group<Link_Group>;
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. This option disables all optimizations. By default optimizations are enabled.">;
def cl_strict_aliasing : Flag<["-"], "cl-strict-aliasing">, Group<opencl_Group>, Flags<[CC1Option]>,
@@ -843,7 +867,8 @@ def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group<opencl_Group>,
HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">,
MarshallingInfoFlag<LangOpts<"CLNoSignedZero">>;
def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]>,
- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.0,CL1.0,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++">;
+ HelpText<"OpenCL language standard to compile for.">,
+ Values<"cl,CL,cl1.0,CL1.0,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++,clc++1.0,CLC++1.0,clc++2021,CLC++2021">;
def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>,
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
@@ -1004,7 +1029,7 @@ def fuse_cuid_EQ : Joined<["-"], "fuse-cuid=">,
"offloading languages CUDA and HIP: 'hash' (ID's generated by hashing "
"file path and command line options) | 'random' (ID's generated as "
"random numbers) | 'none' (disabled). Default is 'hash'. This option "
- "will be overriden by option '-cuid=[ID]' if it is specified." >;
+ "will be overridden by option '-cuid=[ID]' if it is specified." >;
def libomptarget_amdgcn_bc_path_EQ : Joined<["--"], "libomptarget-amdgcn-bc-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-amdgcn bitcode library">;
def libomptarget_nvptx_bc_path_EQ : Joined<["--"], "libomptarget-nvptx-bc-path=">, Group<i_Group>,
@@ -1064,6 +1089,9 @@ defm access_control : BoolFOption<"access-control",
PosFlag<SetTrue>>;
def falign_functions : Flag<["-"], "falign-functions">, Group<f_Group>;
def falign_functions_EQ : Joined<["-"], "falign-functions=">, Group<f_Group>;
+def falign_loops_EQ : Joined<["-"], "falign-loops=">, Group<f_Group>, Flags<[CC1Option]>, MetaVarName<"<N>">,
+ HelpText<"N must be a power of two. Align loops to the boundary">,
+ MarshallingInfoInt<CodeGenOpts<"LoopAlignment">>;
def fno_align_functions: Flag<["-"], "fno-align-functions">, Group<f_Group>;
defm allow_editor_placeholders : BoolFOption<"allow-editor-placeholders",
LangOpts<"AllowEditorPlaceholders">, DefaultFalse,
@@ -1250,12 +1278,19 @@ def fprofile_list_EQ : Joined<["-"], "fprofile-list=">,
Group<f_Group>, Flags<[CC1Option, CoreOption]>,
HelpText<"Filename defining the list of functions/files to instrument">,
MarshallingInfoStringVector<LangOpts<"ProfileListFiles">>;
+def fswift_async_fp_EQ : Joined<["-"], "fswift-async-fp=">,
+ Group<f_Group>, Flags<[CC1Option, CC1AsOption, CoreOption]>, MetaVarName<"<option>">,
+ HelpText<"Control emission of Swift async extended frame info (option: auto, always, never)">,
+ Values<"auto,always,never">,
+ NormalizedValuesScope<"CodeGenOptions::SwiftAsyncFramePointerKind">,
+ NormalizedValues<["Auto", "Always", "Never"]>,
+ MarshallingInfoEnum<CodeGenOpts<"SwiftAsyncFramePointer">, "Always">;
defm addrsig : BoolFOption<"addrsig",
CodeGenOpts<"Addrsig">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Emit">, NegFlag<SetFalse, [], "Don't emit">,
BothFlags<[CoreOption], " an address-significance table">>;
-defm blocks : OptInFFlag<"blocks", "Enable the 'blocks' language feature", "", "", [CoreOption]>;
+defm blocks : OptInCC1FFlag<"blocks", "Enable the 'blocks' language feature", "", "", [CoreOption]>;
def fbootclasspath_EQ : Joined<["-"], "fbootclasspath=">, Group<f_Group>;
defm borland_extensions : BoolFOption<"borland-extensions",
LangOpts<"Borland">, DefaultFalse,
@@ -1271,7 +1306,7 @@ def fclang_abi_compat_EQ : Joined<["-"], "fclang-abi-compat=">, Group<f_clang_Gr
Flags<[CC1Option]>, MetaVarName<"<version>">, Values<"<major>.<minor>,latest">,
HelpText<"Attempt to match the ABI of Clang <version>">;
def fclasspath_EQ : Joined<["-"], "fclasspath=">, Group<f_Group>;
-defm color_diagnostics : OptInFFlag<"color-diagnostics", "Enable", "Disable", " colors in diagnostics",
+defm color_diagnostics : OptInCC1FFlag<"color-diagnostics", "Enable", "Disable", " colors in diagnostics",
[CoreOption, FlangOption]>;
def fdiagnostics_color : Flag<["-"], "fdiagnostics-color">, Group<f_Group>,
Flags<[CoreOption, NoXarchOption]>;
@@ -1329,8 +1364,11 @@ defm cxx_exceptions: BoolFOption<"cxx-exceptions",
defm async_exceptions: BoolFOption<"async-exceptions",
LangOpts<"EHAsynch">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Enable EH Asynchronous exceptions">, NegFlag<SetFalse>>;
-def fcxx_modules : Flag <["-"], "fcxx-modules">, Group<f_Group>,
- Flags<[NoXarchOption]>;
+defm cxx_modules : BoolFOption<"cxx-modules",
+ LangOpts<"CPlusPlusModules">, Default<cpp20.KeyPath>,
+ NegFlag<SetFalse, [CC1Option], "Disable">, PosFlag<SetTrue, [], "Enable">,
+ BothFlags<[NoXarchOption], " modules for C++">>,
+ ShouldParseIf<cplusplus.KeyPath>;
def fdebug_pass_arguments : Flag<["-"], "fdebug-pass-arguments">, Group<f_Group>;
def fdebug_pass_structure : Flag<["-"], "fdebug-pass-structure">, Group<f_Group>;
def fdepfile_entry : Joined<["-"], "fdepfile-entry=">,
@@ -1389,7 +1427,7 @@ def fno_elide_type : Flag<["-"], "fno-elide-type">, Group<f_Group>,
HelpText<"Do not elide types when printing diagnostics">,
MarshallingInfoNegativeFlag<DiagnosticOpts<"ElideType">>;
def feliminate_unused_debug_symbols : Flag<["-"], "feliminate-unused-debug-symbols">, Group<f_Group>;
-defm eliminate_unused_debug_types : OptOutFFlag<"eliminate-unused-debug-types",
+defm eliminate_unused_debug_types : OptOutCC1FFlag<"eliminate-unused-debug-types",
"Do not emit ", "Emit ", " debug info for defined but unused types">;
def femit_all_decls : Flag<["-"], "femit-all-decls">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Emit all declarations, even if unused">,
@@ -1489,7 +1527,7 @@ defm cxx_static_destructors : BoolFOption<"c++-static-destructors",
def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group<f_Group>,
Flags<[CC1Option]>, MarshallingInfoString<CodeGenOpts<"SymbolPartition">>;
-defm memory_profile : OptInFFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">;
+defm memory_profile : OptInCC1FFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">;
def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">,
Group<f_Group>, Flags<[CC1Option]>, MetaVarName<"<directory>">,
HelpText<"Enable heap memory profiling and dump results into <directory>">;
@@ -1729,8 +1767,11 @@ defm reciprocal_math : BoolFOption<"reciprocal-math",
PosFlag<SetTrue, [CC1Option], "Allow division operations to be reassociated",
[menable_unsafe_fp_math.KeyPath]>,
NegFlag<SetFalse>>;
-def fapprox_func : Flag<["-"], "fapprox-func">, Group<f_Group>, Flags<[CC1Option, NoDriverOption]>,
- MarshallingInfoFlag<LangOpts<"ApproxFunc">>, ImpliedByAnyOf<[menable_unsafe_fp_math.KeyPath]>;
+defm approx_func : BoolFOption<"approx-func", LangOpts<"ApproxFunc">, DefaultFalse,
+ PosFlag<SetTrue, [CC1Option], "Allow certain math function calls to be replaced "
+ "with an approximately equivalent calculation",
+ [menable_unsafe_fp_math.KeyPath]>,
+ NegFlag<SetFalse>>;
defm finite_math_only : BoolFOption<"finite-math-only",
LangOpts<"FiniteMathOnly">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "", [cl_finite_math_only.KeyPath, ffast_math.KeyPath]>,
@@ -1770,7 +1811,7 @@ defm strict_float_cast_overflow : BoolFOption<"strict-float-cast-overflow",
defm protect_parens : BoolFOption<"protect-parens",
LangOpts<"ProtectParens">, DefaultFalse,
- PosFlag<SetTrue, [CoreOption, CC1Option],
+ PosFlag<SetTrue, [CoreOption, CC1Option],
"Determines whether the optimizer honors parentheses when "
"floating-point expressions are evaluated">,
NegFlag<SetFalse>>;
@@ -1840,7 +1881,7 @@ def fexperimental_isel : Flag<["-"], "fexperimental-isel">, Group<f_clang_Group>
Alias<fglobal_isel>;
defm legacy_pass_manager : BoolOption<"f", "legacy-pass-manager",
CodeGenOpts<"LegacyPassManager">, Default<"!static_cast<unsigned>(LLVM_ENABLE_NEW_PASS_MANAGER)">,
- PosFlag<SetTrue, [], "Use the legacy pass manager in LLVM">,
+ PosFlag<SetTrue, [], "Use the legacy pass manager in LLVM (deprecated, to be removed in a future release)">,
NegFlag<SetFalse, [], "Use the new pass manager in LLVM">,
BothFlags<[CC1Option]>>, Group<f_clang_Group>;
def fexperimental_new_pass_manager : Flag<["-"], "fexperimental-new-pass-manager">,
@@ -1988,16 +2029,18 @@ def fapple_link_rtlib : Flag<["-"], "fapple-link-rtlib">, Group<f_Group>,
HelpText<"Force linking the clang builtins runtime library">;
def flto_EQ : Joined<["-"], "flto=">, Flags<[CoreOption, CC1Option]>, Group<f_Group>,
HelpText<"Set LTO mode to either 'full' or 'thin'">, Values<"thin,full">;
-def flto_EQ_jobserver : Flag<["-"], "flto=jobserver">, Group<f_Group>;
-def flto_EQ_auto : Flag<["-"], "flto=auto">, Group<f_Group>;
+def flto_EQ_jobserver : Flag<["-"], "flto=jobserver">, Group<f_Group>,
+ Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
+def flto_EQ_auto : Flag<["-"], "flto=auto">, Group<f_Group>,
+ Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
def flto : Flag<["-"], "flto">, Flags<[CoreOption, CC1Option]>, Group<f_Group>,
- HelpText<"Enable LTO in 'full' mode">;
+ Alias<flto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">;
def fno_lto : Flag<["-"], "fno-lto">, Flags<[CoreOption, CC1Option]>, Group<f_Group>,
HelpText<"Disable LTO mode (default)">;
def foffload_lto_EQ : Joined<["-"], "foffload-lto=">, Flags<[CoreOption]>, Group<f_Group>,
HelpText<"Set LTO mode to either 'full' or 'thin' for offload compilation">, Values<"thin,full">;
def foffload_lto : Flag<["-"], "foffload-lto">, Flags<[CoreOption]>, Group<f_Group>,
- HelpText<"Enable LTO in 'full' mode for offload compilation">;
+ Alias<foffload_lto_EQ>, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode for offload compilation">;
def fno_offload_lto : Flag<["-"], "fno-offload-lto">, Flags<[CoreOption]>, Group<f_Group>,
HelpText<"Disable LTO mode (default) for offload compilation">;
def flto_jobs_EQ : Joined<["-"], "flto-jobs=">,
@@ -2133,9 +2176,9 @@ defm pch_instantiate_templates : BoolFOption<"pch-instantiate-templates",
LangOpts<"PCHInstantiateTemplates">, DefaultFalse,
PosFlag<SetTrue, [], "Instantiate templates already while building a PCH">,
NegFlag<SetFalse>, BothFlags<[CC1Option, CoreOption]>>;
-defm pch_codegen: OptInFFlag<"pch-codegen", "Generate ", "Do not generate ",
+defm pch_codegen: OptInCC1FFlag<"pch-codegen", "Generate ", "Do not generate ",
"code for uses of this PCH that assumes an explicit object file will be built for the PCH">;
-defm pch_debuginfo: OptInFFlag<"pch-debuginfo", "Generate ", "Do not generate ",
+defm pch_debuginfo: OptInCC1FFlag<"pch-debuginfo", "Generate ", "Do not generate ",
"debug info for types in an object file built from this PCH and do not generate them elsewhere">;
def fimplicit_module_maps : Flag <["-"], "fimplicit-module-maps">, Group<f_Group>,
@@ -2146,7 +2189,7 @@ def fmodules_ts : Flag <["-"], "fmodules-ts">, Group<f_Group>,
Flags<[CC1Option]>, HelpText<"Enable support for the C++ Modules TS">,
MarshallingInfoFlag<LangOpts<"ModulesTS">>;
defm modules : BoolFOption<"modules",
- LangOpts<"Modules">, Default<!strconcat(fmodules_ts.KeyPath, "||", cpp_modules.KeyPath)>,
+ LangOpts<"Modules">, Default<!strconcat(fmodules_ts.KeyPath, "||", fcxx_modules.KeyPath)>,
PosFlag<SetTrue, [CC1Option], "Enable the 'modules' language feature">,
NegFlag<SetFalse>, BothFlags<[NoXarchOption, CoreOption]>>;
def fmodule_maps : Flag <["-"], "fmodule-maps">, Flags<[CoreOption]>, Alias<fimplicit_module_maps>;
@@ -2205,8 +2248,6 @@ def fno_diagnostics_color : Flag<["-"], "fno-diagnostics-color">, Group<f_Group>
Flags<[CoreOption, NoXarchOption]>;
def fno_common : Flag<["-"], "fno-common">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Compile common globals like normal definitions">;
-def fno_cxx_modules : Flag <["-"], "fno-cxx-modules">, Group<f_Group>,
- Flags<[NoXarchOption]>;
defm digraphs : BoolFOption<"digraphs",
LangOpts<"Digraphs">, Default<std#".hasDigraphs()">,
PosFlag<SetTrue, [], "Enable alternative token representations '<:', ':>', '<%', '%>', '%:', '%:%:' (default)">,
@@ -2341,12 +2382,23 @@ def fobjc_disable_direct_methods_for_testing :
Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Ignore attribute objc_direct so that direct methods can be tested">,
MarshallingInfoFlag<LangOpts<"ObjCDisableDirectMethodsForTesting">>;
+defm objc_avoid_heapify_local_blocks : BoolFOption<"objc-avoid-heapify-local-blocks",
+ CodeGenOpts<"ObjCAvoidHeapifyLocalBlocks">, DefaultFalse,
+ PosFlag<SetTrue, [], "Try">,
+ NegFlag<SetFalse, [], "Don't try">,
+ BothFlags<[CC1Option, NoDriverOption], " to avoid heapifying local blocks">>;
def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group<f_Group>;
def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, FlangOption, FC1Option]>,
HelpText<"Parse OpenMP pragmas and generate parallel code.">;
def fno_openmp : Flag<["-"], "fno-openmp">, Group<f_Group>, Flags<[NoArgumentUnused]>;
def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+defm openmp_extensions: BoolFOption<"openmp-extensions",
+ LangOpts<"OpenMPExtensions">, DefaultTrue,
+ PosFlag<SetTrue, [CC1Option, NoArgumentUnused],
+ "Enable all Clang extensions for OpenMP directives and clauses">,
+ NegFlag<SetFalse, [CC1Option, NoArgumentUnused],
+ "Disable all Clang extensions for OpenMP directives and clauses">>;
def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group<f_Group>;
def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>;
@@ -2377,6 +2429,18 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
+ HelpText<"Enable debugging in the OpenMP offloading device RTL">;
+def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>;
+def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">,
+ Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">,
+ Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">,
+ Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
+ Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
@@ -2498,7 +2562,7 @@ defm signed_char : BoolFOption<"signed-char",
ShouldParseIf<!strconcat("!", open_cl.KeyPath)>;
defm split_stack : BoolFOption<"split-stack",
CodeGenOpts<"EnableSegmentedStacks">, DefaultFalse,
- NegFlag<SetFalse, [], "Wouldn't use segmented stack">,
+ NegFlag<SetFalse, [], "Wouldn't use segmented stack">,
PosFlag<SetTrue, [CC1Option], "Use segmented stack">>;
def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group<f_Group>,
HelpText<"Enable stack protectors for all functions">;
@@ -2529,7 +2593,7 @@ def ftrivial_auto_var_init : Joined<["-"], "ftrivial-auto-var-init=">, Group<f_G
NormalizedValues<["Uninitialized", "Zero", "Pattern"]>,
MarshallingInfoEnum<LangOpts<"TrivialAutoVarInit">, "Uninitialized">;
def enable_trivial_var_init_zero : Flag<["-"], "enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang">,
- Flags<[CC1Option, CoreOption]>,
+ Flags<[CC1Option, CoreOption, NoArgumentUnused]>,
HelpText<"Trivial automatic variable initialization to zero is only here for benchmarks, it'll eventually be removed, and I'm OK with that because I'm only using it to benchmark">;
def ftrivial_auto_var_init_stop_after : Joined<["-"], "ftrivial-auto-var-init-stop-after=">, Group<f_Group>,
Flags<[CC1Option, CoreOption]>, HelpText<"Stop initializing trivial automatic stack variables after the specified number of instances">,
@@ -2686,10 +2750,10 @@ def fverbose_asm : Flag<["-"], "fverbose-asm">, Group<f_Group>,
def dA : Flag<["-"], "dA">, Alias<fverbose_asm>;
defm visibility_from_dllstorageclass : BoolFOption<"visibility-from-dllstorageclass",
LangOpts<"VisibilityFromDLLStorageClass">, DefaultFalse,
- PosFlag<SetTrue, [CC1Option], "Set the visiblity of symbols in the generated code from their DLL storage class">,
+ PosFlag<SetTrue, [CC1Option], "Set the visibility of symbols in the generated code from their DLL storage class">,
NegFlag<SetFalse>>;
def fvisibility_dllexport_EQ : Joined<["-"], "fvisibility-dllexport=">, Group<f_Group>, Flags<[CC1Option]>,
- HelpText<"The visibility for dllexport defintions [-fvisibility-from-dllstorageclass]">,
+ HelpText<"The visibility for dllexport definitions [-fvisibility-from-dllstorageclass]">,
MarshallingInfoVisibility<LangOpts<"DLLExportVisibility">, "DefaultVisibility">,
ShouldParseIf<fvisibility_from_dllstorageclass.KeyPath>;
def fvisibility_nodllstorageclass_EQ : Joined<["-"], "fvisibility-nodllstorageclass=">, Group<f_Group>, Flags<[CC1Option]>,
@@ -2722,6 +2786,10 @@ def fvisibility_ms_compat : Flag<["-"], "fvisibility-ms-compat">, Group<f_Group>
def fvisibility_global_new_delete_hidden : Flag<["-"], "fvisibility-global-new-delete-hidden">, Group<f_Group>,
HelpText<"Give global C++ operator new and delete declarations hidden visibility">, Flags<[CC1Option]>,
MarshallingInfoFlag<LangOpts<"GlobalAllocationFunctionVisibilityHidden">>;
+def fnew_infallible : Flag<["-"], "fnew-infallible">, Group<f_Group>,
+ HelpText<"Treats throwing global C++ operator new as always returning valid memory "
+ "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">,
+ Flags<[CC1Option]>, MarshallingInfoFlag<LangOpts<"NewInfallible">>;
defm whole_program_vtables : BoolFOption<"whole-program-vtables",
CodeGenOpts<"WholeProgramVTables">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Enables whole-program vtable optimization. Requires -flto">,
@@ -2828,10 +2896,10 @@ def fcoverage_prefix_map_EQ
HelpText<"remap file source paths in coverage mapping">;
def ffile_prefix_map_EQ
: Joined<["-"], "ffile-prefix-map=">, Group<f_Group>,
- HelpText<"remap file source paths in debug info and predefined preprocessor macros">;
+ HelpText<"remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE()">;
def fmacro_prefix_map_EQ
- : Joined<["-"], "fmacro-prefix-map=">, Group<Preprocessor_Group>, Flags<[CC1Option]>,
- HelpText<"remap file source paths in predefined preprocessor macros">;
+ : Joined<["-"], "fmacro-prefix-map=">, Group<f_Group>, Flags<[CC1Option]>,
+ HelpText<"remap file source paths in predefined preprocessor macros and __builtin_FILE()">;
defm force_dwarf_frame : BoolFOption<"force-dwarf-frame",
CodeGenOpts<"ForceDwarfFrameSection">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Always emit a debug frame section">, NegFlag<SetFalse>>;
@@ -2913,6 +2981,14 @@ def gsplit_dwarf_EQ : Joined<["-"], "gsplit-dwarf=">, Group<g_flags_Group>,
HelpText<"Set DWARF fission mode to either 'split' or 'single'">,
Values<"split,single">;
def gno_split_dwarf : Flag<["-"], "gno-split-dwarf">, Group<g_flags_Group>;
+def gsimple_template_names : Flag<["-"], "gsimple-template-names">, Group<g_flags_Group>;
+def gsimple_template_names_EQ
+ : Joined<["-"], "gsimple-template-names=">,
+ HelpText<"Use simple template names in DWARF, or include the full "
+ "template name with a modified prefix for validation">,
+ Values<"simple,mangled">, Flags<[CC1Option, NoDriverOption]>;
+def gno_simple_template_names : Flag<["-"], "gno-simple-template-names">,
+ Group<g_flags_Group>;
def ggnu_pubnames : Flag<["-"], "ggnu-pubnames">, Group<g_flags_Group>, Flags<[CC1Option]>;
def gno_gnu_pubnames : Flag<["-"], "gno-gnu-pubnames">, Group<g_flags_Group>;
def gpubnames : Flag<["-"], "gpubnames">, Group<g_flags_Group>, Flags<[CC1Option]>;
@@ -3055,6 +3131,10 @@ def mwatchos_simulator_version_min_EQ : Joined<["-"], "mwatchos-simulator-versio
def mwatchsimulator_version_min_EQ : Joined<["-"], "mwatchsimulator-version-min=">, Alias<mwatchos_simulator_version_min_EQ>;
def march_EQ : Joined<["-"], "march=">, Group<m_Group>, Flags<[CoreOption]>;
def masm_EQ : Joined<["-"], "masm=">, Group<m_Group>, Flags<[NoXarchOption]>;
+def inline_asm_EQ : Joined<["-"], "inline-asm=">, Group<m_Group>, Flags<[CC1Option]>,
+ Values<"att,intel">,
+ NormalizedValuesScope<"CodeGenOptions">, NormalizedValues<["IAD_ATT", "IAD_Intel"]>,
+ MarshallingInfoEnum<CodeGenOpts<"InlineAsmDialect">, "IAD_ATT">;
def mcmodel_EQ : Joined<["-"], "mcmodel=">, Group<m_Group>, Flags<[CC1Option]>,
MarshallingInfoString<TargetOpts<"CodeModel">, [{"default"}]>;
def mtls_size_EQ : Joined<["-"], "mtls-size=">, Group<m_Group>, Flags<[NoXarchOption, CC1Option]>,
@@ -3100,6 +3180,9 @@ def mlinker_version_EQ : Joined<["-"], "mlinker-version=">,
def mllvm : Separate<["-"], "mllvm">, Flags<[CC1Option,CC1AsOption,CoreOption]>,
HelpText<"Additional arguments to forward to LLVM's option processing">,
MarshallingInfoStringVector<FrontendOpts<"LLVMArgs">>;
+def ffuchsia_api_level_EQ : Joined<["-"], "ffuchsia-api-level=">,
+ Group<m_Group>, Flags<[CC1Option]>, HelpText<"Set Fuchsia API level">,
+ MarshallingInfoInt<LangOpts<"FuchsiaAPILevel">>;
def mmacosx_version_min_EQ : Joined<["-"], "mmacosx-version-min=">,
Group<m_Group>, HelpText<"Set Mac OS X deployment target">;
def mmacos_version_min_EQ : Joined<["-"], "mmacos-version-min=">,
@@ -3113,6 +3196,10 @@ def mno_outline : Flag<["-"], "mno-outline">, Group<f_clang_Group>, Flags<[CC1Op
HelpText<"Disable function outlining (AArch64 only)">;
def mno_ms_bitfields : Flag<["-"], "mno-ms-bitfields">, Group<m_Group>,
HelpText<"Do not set the default structure layout to be compatible with the Microsoft compiler standard">;
+def mskip_rax_setup : Flag<["-"], "mskip-rax-setup">, Group<m_Group>, Flags<[CC1Option]>,
+ HelpText<"Skip setting up RAX register when passing variable arguments (x86 only)">,
+ MarshallingInfoFlag<CodeGenOpts<"SkipRaxSetup">>;
+def mno_skip_rax_setup : Flag<["-"], "mno-skip-rax-setup">, Group<m_Group>, Flags<[CC1Option]>;
def mstackrealign : Flag<["-"], "mstackrealign">, Group<m_Group>, Flags<[CC1Option]>,
HelpText<"Force realign the stack at entry to every function">,
MarshallingInfoFlag<CodeGenOpts<"StackRealignment">>;
@@ -3136,6 +3223,8 @@ def meabi : Separate<["-"], "meabi">, Group<m_Group>, Flags<[CC1Option]>,
MarshallingInfoEnum<TargetOpts<"EABIVersion">, "Default">,
NormalizedValuesScope<"llvm::EABI">,
NormalizedValues<["Default", "EABI4", "EABI5", "GNU"]>;
+def mtargetos_EQ : Joined<["-"], "mtargetos=">, Group<m_Group>,
+ HelpText<"Set the deployment target to be the specified OS and OS version">;
def mno_constant_cfstrings : Flag<["-"], "mno-constant-cfstrings">, Group<m_Group>;
def mno_global_merge : Flag<["-"], "mno-global-merge">, Group<m_Group>, Flags<[CC1Option]>,
@@ -3230,6 +3319,12 @@ defm aapcs_bitfield_width : BoolOption<"f", "aapcs-bitfield-width",
def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group<m_Group>,
HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">;
+def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">,
+ Group<m_arm_Features_Group>,
+ HelpText<"Work around VLLDM erratum CVE-2021-35465 (ARM only)">;
+def mno_fix_cmse_cve_2021_35465 : Flag<["-"], "mno-fix-cmse-cve-2021-35465">,
+ Group<m_arm_Features_Group>,
+ HelpText<"Don't work around VLLDM erratum CVE-2021-35465 (ARM only)">;
def mfix_cortex_a53_835769 : Flag<["-"], "mfix-cortex-a53-835769">,
Group<m_aarch64_Features_Group>,
HelpText<"Workaround Cortex-A53 erratum 835769 (AArch64 only)">;
@@ -3247,13 +3342,20 @@ foreach i = {8-15,18} in
def fcall_saved_x#i : Flag<["-"], "fcall-saved-x"#i>, Group<m_aarch64_Features_Group>,
HelpText<"Make the x"#i#" register call-saved (AArch64 only)">;
-def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">,
- Group<m_aarch64_Features_Group>, Flags<[NoXarchOption,CC1Option]>,
+def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">, Group<m_aarch64_Features_Group>,
HelpText<"Specify the size in bits of an SVE vector register. Defaults to the"
- " vector length agnostic value of \"scalable\". (AArch64 only)">,
- Values<"128,256,512,1024,2048,scalable">,
- NormalizedValues<["128", "256", "512", "1024", "2048", "0"]>,
- MarshallingInfoEnum<LangOpts<"ArmSveVectorBits">, "0">;
+ " vector length agnostic value of \"scalable\". (AArch64 only)">;
+
+def mvscale_min_EQ : Joined<["-"], "mvscale-min=">,
+ Group<m_aarch64_Features_Group>, Flags<[NoXarchOption,CC1Option]>,
+ HelpText<"Specify the vscale minimum. Defaults to the"
+ " vector length agnostic value of \"0\". (AArch64 only)">,
+ MarshallingInfoInt<LangOpts<"VScaleMin">>;
+def mvscale_max_EQ : Joined<["-"], "mvscale-max=">,
+ Group<m_aarch64_Features_Group>, Flags<[NoXarchOption,CC1Option]>,
+ HelpText<"Specify the vscale maximum. Defaults to the"
+ " vector length agnostic value of \"0\". (AArch64 only)">,
+ MarshallingInfoInt<LangOpts<"VScaleMax">>;
def msign_return_address_EQ : Joined<["-"], "msign-return-address=">,
Flags<[CC1Option]>, Group<m_Group>, Values<"none,all,non-leaf">,
@@ -3266,6 +3368,8 @@ def mharden_sls_EQ : Joined<["-"], "mharden-sls=">,
def msimd128 : Flag<["-"], "msimd128">, Group<m_wasm_Features_Group>;
def mno_simd128 : Flag<["-"], "mno-simd128">, Group<m_wasm_Features_Group>;
+def mrelaxed_simd : Flag<["-"], "mrelaxed-simd">, Group<m_wasm_Features_Group>;
+def mno_relaxed_simd : Flag<["-"], "mno-relaxed-simd">, Group<m_wasm_Features_Group>;
def mnontrapping_fptoint : Flag<["-"], "mnontrapping-fptoint">, Group<m_wasm_Features_Group>;
def mno_nontrapping_fptoint : Flag<["-"], "mno-nontrapping-fptoint">, Group<m_wasm_Features_Group>;
def msign_ext : Flag<["-"], "msign-ext">, Group<m_wasm_Features_Group>;
@@ -3638,8 +3742,10 @@ def multi__module : Flag<["-"], "multi_module">;
def multiply__defined__unused : Separate<["-"], "multiply_defined_unused">;
def multiply__defined : Separate<["-"], "multiply_defined">;
def mwarn_nonportable_cfstrings : Flag<["-"], "mwarn-nonportable-cfstrings">, Group<m_Group>;
+def canonical_prefixes : Flag<["-"], "canonical-prefixes">, Flags<[HelpHidden, CoreOption]>,
+ HelpText<"Use absolute paths for invoking subcommands (default)">;
def no_canonical_prefixes : Flag<["-"], "no-canonical-prefixes">, Flags<[HelpHidden, CoreOption]>,
- HelpText<"Use relative instead of canonical paths">;
+ HelpText<"Use relative paths for invoking subcommands">;
def no_cpp_precomp : Flag<["-"], "no-cpp-precomp">, Group<clang_ignored_f_Group>;
def no_integrated_cpp : Flag<["-", "--"], "no-integrated-cpp">, Flags<[NoXarchOption]>;
def no_pedantic : Flag<["-", "--"], "no-pedantic">, Group<pedantic_Group>;
@@ -4078,6 +4184,7 @@ def mx87 : Flag<["-"], "mx87">, Group<m_x86_Features_Group>;
def mno_x87 : Flag<["-"], "mno-x87">, Group<m_x86_Features_Group>;
def m80387 : Flag<["-"], "m80387">, Alias<mx87>;
def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
+def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
def m3dnow : Flag<["-"], "m3dnow">, Group<m_x86_Features_Group>;
@@ -4127,6 +4234,8 @@ def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
+def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
+def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
@@ -4163,6 +4272,8 @@ def mwbnoinvd : Flag<["-"], "mwbnoinvd">, Group<m_x86_Features_Group>;
def mno_wbnoinvd : Flag<["-"], "mno-wbnoinvd">, Group<m_x86_Features_Group>;
def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
+def mcrc32 : Flag<["-"], "mcrc32">, Group<m_x86_Features_Group>;
+def mno_crc32 : Flag<["-"], "mno-crc32">, Group<m_x86_Features_Group>;
def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
def mno_cx16 : Flag<["-"], "mno-cx16">, Group<m_x86_Features_Group>;
def menqcmd : Flag<["-"], "menqcmd">, Group<m_x86_Features_Group>;
@@ -4261,7 +4372,6 @@ def mno_vzeroupper : Flag<["-"], "mno-vzeroupper">, Group<m_x86_Features_Group>;
// These are legacy user-facing driver-level option spellings. They are always
// aliases for options that are spelled using the more common Unix / GNU flag
// style of double-dash and equals-joined flags.
-def gcc_toolchain_legacy_spelling : Separate<["-"], "gcc-toolchain">, Alias<gcc_toolchain>;
def target_legacy_spelling : Separate<["-"], "target">, Alias<target>;
// Special internal option to handle -Xlinker --no-demangle.
@@ -4301,7 +4411,6 @@ def ld_path_EQ : Joined<["--"], "ld-path=">, Group<Link_Group>;
defm align_labels : BooleanFFlag<"align-labels">, Group<clang_ignored_gcc_optimization_f_Group>;
def falign_labels_EQ : Joined<["-"], "falign-labels=">, Group<clang_ignored_gcc_optimization_f_Group>;
defm align_loops : BooleanFFlag<"align-loops">, Group<clang_ignored_gcc_optimization_f_Group>;
-def falign_loops_EQ : Joined<["-"], "falign-loops=">, Group<clang_ignored_gcc_optimization_f_Group>;
defm align_jumps : BooleanFFlag<"align-jumps">, Group<clang_ignored_gcc_optimization_f_Group>;
def falign_jumps_EQ : Joined<["-"], "falign-jumps=">, Group<clang_ignored_gcc_optimization_f_Group>;
@@ -4495,26 +4604,18 @@ def fdefault_real_8 : Flag<["-"],"fdefault-real-8">, Group<f_Group>,
HelpText<"Set the default real kind to an 8 byte wide type">;
def flarge_sizes : Flag<["-"],"flarge-sizes">, Group<f_Group>,
HelpText<"Use INTEGER(KIND=8) for the result type in size-related intrinsics">;
-def fbackslash : Flag<["-"], "fbackslash">, Group<f_Group>,
- HelpText<"Specify that backslash in string introduces an escape character">,
- DocBrief<[{Change the interpretation of backslashes in string literals from
-a single backslash character to "C-style" escape characters.}]>;
-def fno_backslash : Flag<["-"], "fno-backslash">, Group<f_Group>;
-def fxor_operator : Flag<["-"], "fxor-operator">, Group<f_Group>,
- HelpText<"Enable .XOR. as a synonym of .NEQV.">;
-def fno_xor_operator : Flag<["-"], "fno-xor-operator">, Group<f_Group>;
-def flogical_abbreviations : Flag<["-"], "flogical-abbreviations">, Group<f_Group>,
- HelpText<"Enable logical abbreviations">;
-def fno_logical_abbreviations : Flag<["-"], "fno-logical-abbreviations">, Group<f_Group>;
-def fimplicit_none : Flag<["-"], "fimplicit-none">, Group<f_Group>,
- HelpText<"No implicit typing allowed unless overridden by IMPLICIT statements">;
-def fno_implicit_none : Flag<["-"], "fno-implicit-none">, Group<f_Group>;
+
def falternative_parameter_statement : Flag<["-"], "falternative-parameter-statement">, Group<f_Group>,
HelpText<"Enable the old style PARAMETER statement">;
def fintrinsic_modules_path : Separate<["-"], "fintrinsic-modules-path">, Group<f_Group>, MetaVarName<"<dir>">,
HelpText<"Specify where to find the compiled intrinsic modules">,
- DocBrief<[{This option specifies the location of pre-compiled intrinsic modules,
+ DocBrief<[{This option specifies the location of pre-compiled intrinsic modules,
if they are not in the default location expected by the compiler.}]>;
+
+defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string introduces an escape character">;
+defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">;
+defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
+defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
}
def J : JoinedOrSeparate<["-"], "J">,
@@ -4562,18 +4663,16 @@ def fdebug_measure_parse_tree : Flag<["-"], "fdebug-measure-parse-tree">, Group<
HelpText<"Measure the parse tree">;
def fdebug_pre_fir_tree : Flag<["-"], "fdebug-pre-fir-tree">, Group<Action_Group>,
HelpText<"Dump the pre-FIR tree">;
-def fdebug_module_writer : Flag<["-"],"fdebug-module-writer">,
+def fdebug_module_writer : Flag<["-"],"fdebug-module-writer">,
HelpText<"Enable debug messages while writing module files">;
def fget_symbols_sources : Flag<["-"], "fget-symbols-sources">, Group<Action_Group>,
HelpText<"Dump symbols and their source code locations">;
def module_suffix : Separate<["-"], "module-suffix">, Group<f_Group>, MetaVarName<"<suffix>">,
HelpText<"Use <suffix> as the suffix for module files (the default value is `.mod`)">;
-def fanalyzed_objects_for_unparse : Flag<["-"],
- "fanalyzed-objects-for-unparse">, Group<f_Group>;
-def fno_analyzed_objects_for_unparse : Flag<["-"],
- "fno-analyzed-objects-for-unparse">, Group<f_Group>,
- HelpText<"Do not use the analyzed objects when unparsing">;
+def fno_reformat : Flag<["-"], "fno-reformat">, Group<Preprocessor_Group>,
+ HelpText<"Dump the cooked character stream in -E mode">;
+defm analyzed_objects_for_unparse : OptOutFC1FFlag<"analyzed-objects-for-unparse", "", "Do not use the analyzed objects when unparsing">;
}
@@ -4957,9 +5056,9 @@ def mregparm : Separate<["-"], "mregparm">,
def msmall_data_limit : Separate<["-"], "msmall-data-limit">,
HelpText<"Put global and static data smaller than the limit into a special section">,
MarshallingInfoInt<CodeGenOpts<"SmallDataLimit">>;
-def munwind_tables : Flag<["-"], "munwind-tables">,
+def funwind_tables_EQ : Joined<["-"], "funwind-tables=">,
HelpText<"Generate unwinding tables for all functions">,
- MarshallingInfoFlag<CodeGenOpts<"UnwindTables">>;
+ MarshallingInfoInt<CodeGenOpts<"UnwindTables">>;
def mconstructor_aliases : Flag<["-"], "mconstructor-aliases">,
HelpText<"Emit complete constructors and destructors as aliases when possible">,
MarshallingInfoFlag<CodeGenOpts<"CXXCtorDtorAliases">>;
@@ -5037,6 +5136,14 @@ def fsanitize_coverage_stack_depth
: Flag<["-"], "fsanitize-coverage-stack-depth">,
HelpText<"Enable max stack depth tracing">,
MarshallingInfoFlag<CodeGenOpts<"SanitizeCoverageStackDepth">>;
+def fsanitize_coverage_trace_loads
+ : Flag<["-"], "fsanitize-coverage-trace-loads">,
+ HelpText<"Enable tracing of loads">,
+ MarshallingInfoFlag<CodeGenOpts<"SanitizeCoverageTraceLoads">>;
+def fsanitize_coverage_trace_stores
+ : Flag<["-"], "fsanitize-coverage-trace-stores">,
+ HelpText<"Enable tracing of stores">,
+ MarshallingInfoFlag<CodeGenOpts<"SanitizeCoverageTraceStores">>;
def fpatchable_function_entry_offset_EQ
: Joined<["-"], "fpatchable-function-entry-offset=">, MetaVarName<"<M>">,
HelpText<"Generate M NOPs before function entry">,
@@ -5231,16 +5338,19 @@ def code_completion_with_fixits : Flag<["-"], "code-completion-with-fixits">,
def disable_free : Flag<["-"], "disable-free">,
HelpText<"Disable freeing of memory on exit">,
MarshallingInfoFlag<FrontendOpts<"DisableFree">>;
+defm clear_ast_before_backend : BoolOption<"",
+ "clear-ast-before-backend",
+ CodeGenOpts<"ClearASTBeforeBackend">,
+ DefaultFalse,
+ PosFlag<SetTrue, [], "Clear">,
+ NegFlag<SetFalse, [], "Don't clear">,
+ BothFlags<[], " the Clang AST before running backend code generation">>;
def enable_noundef_analysis : Flag<["-"], "enable-noundef-analysis">, Group<f_Group>,
HelpText<"Enable analyzing function argument and return types for mandatory definedness">,
MarshallingInfoFlag<CodeGenOpts<"EnableNoundefAttrs">>;
def discard_value_names : Flag<["-"], "discard-value-names">,
HelpText<"Discard value names in LLVM IR">,
MarshallingInfoFlag<CodeGenOpts<"DiscardValueNames">>;
-def load : Separate<["-"], "load">, MetaVarName<"<dsopath>">,
- HelpText<"Load the named plugin (dynamic shared object)">;
-def plugin : Separate<["-"], "plugin">, MetaVarName<"<name>">,
- HelpText<"Use the named plugin action instead of the default action (use \"help\" to list available options)">;
def plugin_arg : JoinedAndSeparate<["-"], "plugin-arg-">,
MetaVarName<"<name> <arg>">,
HelpText<"Pass <arg> to plugin <name>">;
@@ -5253,6 +5363,8 @@ def ast_dump_filter : Separate<["-"], "ast-dump-filter">,
" nodes having a certain substring in a qualified name. Use"
" -ast-list to list all filterable declaration node names.">,
MarshallingInfoString<FrontendOpts<"ASTDumpFilter">>;
+def ast_dump_filter_EQ : Joined<["-"], "ast-dump-filter=">,
+ Alias<ast_dump_filter>;
def fno_modules_global_index : Flag<["-"], "fno-modules-global-index">,
HelpText<"Do not automatically generate or update the global module index">,
MarshallingInfoNegativeFlag<FrontendOpts<"UseGlobalModuleIndex">>;
@@ -5276,6 +5388,12 @@ def fmodules_embed_all_files : Joined<["-"], "fmodules-embed-all-files">,
HelpText<"Embed the contents of all files read by this compilation into "
"the produced module file.">,
MarshallingInfoFlag<FrontendOpts<"ModulesEmbedAllFiles">>;
+defm fimplicit_modules_use_lock : BoolOption<"f", "implicit-modules-use-lock",
+ FrontendOpts<"BuildingImplicitModuleUsesLock">, DefaultTrue,
+ NegFlag<SetFalse>,
+ PosFlag<SetTrue, [],
+ "Use filesystem locks for implicit modules builds to avoid "
+ "duplicating work in competing clang invocations.">>;
// FIXME: We only need this in C++ modules / Modules TS if we might textually
// enter a different module (eg, when building a header unit).
def fmodules_local_submodule_visibility :
@@ -5283,7 +5401,7 @@ def fmodules_local_submodule_visibility :
HelpText<"Enforce name visibility rules across submodules of the same "
"top-level module.">,
MarshallingInfoFlag<LangOpts<"ModulesLocalVisibility">>,
- ImpliedByAnyOf<[fmodules_ts.KeyPath, cpp_modules.KeyPath]>;
+ ImpliedByAnyOf<[fmodules_ts.KeyPath, fcxx_modules.KeyPath]>;
def fmodules_codegen :
Flag<["-"], "fmodules-codegen">,
HelpText<"Generate code for uses of this module that assumes an explicit "
@@ -5807,6 +5925,12 @@ def init_only : Flag<["-"], "init-only">,
HelpText<"Only execute frontend initialization">;
} // let Group = Action_Group
+
+def load : Separate<["-"], "load">, MetaVarName<"<dsopath>">,
+ HelpText<"Load the named plugin (dynamic shared object)">;
+def plugin : Separate<["-"], "plugin">, MetaVarName<"<name>">,
+ HelpText<"Use the named plugin action instead of the default action (use \"help\" to list available options)">;
+
} // let Flags = [CC1Option, FC1Option, NoDriverOption]
//===----------------------------------------------------------------------===//
@@ -6030,16 +6154,7 @@ def _SLASH_WX_ : CLFlag<"WX-">,
HelpText<"Do not treat warnings as errors (default)">,
Alias<W_Joined>, AliasArgs<["no-error"]>;
def _SLASH_w_flag : CLFlag<"w">, HelpText<"Disable all warnings">, Alias<w>;
-def _SLASH_wd4005 : CLFlag<"wd4005">, Alias<W_Joined>,
- AliasArgs<["no-macro-redefined"]>;
-def _SLASH_wd4018 : CLFlag<"wd4018">, Alias<W_Joined>,
- AliasArgs<["no-sign-compare"]>;
-def _SLASH_wd4100 : CLFlag<"wd4100">, Alias<W_Joined>,
- AliasArgs<["no-unused-parameter"]>;
-def _SLASH_wd4910 : CLFlag<"wd4910">, Alias<W_Joined>,
- AliasArgs<["no-dllexport-explicit-instantiation-decl"]>;
-def _SLASH_wd4996 : CLFlag<"wd4996">, Alias<W_Joined>,
- AliasArgs<["no-deprecated-declarations"]>;
+def _SLASH_wd : CLCompileJoined<"wd">;
def _SLASH_vd : CLJoined<"vd">, HelpText<"Control vtordisp placement">,
Alias<vtordisp_mode_EQ>;
def _SLASH_X : CLFlag<"X">,
@@ -6175,6 +6290,8 @@ def _SLASH_TC : CLCompileFlag<"TC">, HelpText<"Treat all source files as C">;
def _SLASH_Tp : CLCompileJoinedOrSeparate<"Tp">,
HelpText<"Treat <file> as C++ source file">, MetaVarName<"<file>">;
def _SLASH_TP : CLCompileFlag<"TP">, HelpText<"Treat all source files as C++">;
+def _SLASH_diasdkdir : CLJoinedOrSeparate<"diasdkdir">,
+ HelpText<"Path to the DIA SDK">, MetaVarName<"<dir>">;
def _SLASH_vctoolsdir : CLJoinedOrSeparate<"vctoolsdir">,
HelpText<"Path to the VCToolChain">, MetaVarName<"<dir>">;
def _SLASH_vctoolsversion : CLJoinedOrSeparate<"vctoolsversion">,
@@ -6184,7 +6301,7 @@ def _SLASH_winsdkdir : CLJoinedOrSeparate<"winsdkdir">,
def _SLASH_winsdkversion : CLJoinedOrSeparate<"winsdkversion">,
HelpText<"Full version of the Windows SDK, defaults to newest found">;
def _SLASH_winsysroot : CLJoinedOrSeparate<"winsysroot">,
- HelpText<"Same as /vctoolsdir <dir>/VC/Tools/MSVC/<vctoolsversion> /winsdkdir <dir>/Windows Kits/10">,
+ HelpText<"Same as \"/diasdkdir <dir>/DIA SDK\" /vctoolsdir <dir>/VC/Tools/MSVC/<vctoolsversion> \"/winsdkdir <dir>/Windows Kits/10\"">,
MetaVarName<"<dir>">;
def _SLASH_volatile_iso : Option<["/", "-"], "volatile:iso", KIND_FLAG>,
Group<_SLASH_volatile_Group>, Flags<[CLOption, NoXarchOption]>,
diff --git a/clang/include/clang/Driver/Phases.h b/clang/include/clang/Driver/Phases.h
index ce914dd70514..9003c5857351 100644
--- a/clang/include/clang/Driver/Phases.h
+++ b/clang/include/clang/Driver/Phases.h
@@ -22,11 +22,10 @@ namespace phases {
Assemble,
Link,
IfsMerge,
- LastPhase = IfsMerge,
};
enum {
- MaxNumberOfPhases = LastPhase + 1
+ MaxNumberOfPhases = IfsMerge + 1
};
const char *getPhaseName(ID Id);
diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index e9e329e7cb53..84bb324775d1 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -65,7 +65,8 @@ class SanitizerArgs {
public:
/// Parses the sanitizer arguments from an argument list.
- SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args);
+ SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
+ bool DiagnoseErrors = true);
bool needsSharedRt() const { return SharedRuntime; }
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 882ae40086ce..dad861d586cb 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -113,6 +113,13 @@ public:
RM_Disabled,
};
+ struct BitCodeLibraryInfo {
+ std::string Path;
+ bool ShouldInternalize;
+ BitCodeLibraryInfo(StringRef Path, bool ShouldInternalize = true)
+ : Path(Path), ShouldInternalize(ShouldInternalize) {}
+ };
+
enum FileType { FT_Object, FT_Static, FT_Shared };
private:
@@ -155,7 +162,7 @@ private:
Tool *getOffloadBundler() const;
Tool *getOffloadWrapper() const;
- mutable std::unique_ptr<SanitizerArgs> SanitizerArguments;
+ mutable bool SanitizerArgsChecked = false;
mutable std::unique_ptr<XRayArgs> XRayArguments;
/// The effective clang triple for the current Job.
@@ -259,7 +266,7 @@ public:
const Multilib &getMultilib() const { return SelectedMultilib; }
- const SanitizerArgs& getSanitizerArgs() const;
+ SanitizerArgs getSanitizerArgs(const llvm::opt::ArgList &JobArgs) const;
const XRayArgs& getXRayArgs() const;
@@ -478,15 +485,12 @@ public:
virtual bool isPICDefault() const = 0;
/// Test whether this toolchain defaults to PIE.
- virtual bool isPIEDefault() const = 0;
-
- /// Test whether this toolchaind defaults to non-executable stacks.
- virtual bool isNoExecStackDefault() const;
+ virtual bool isPIEDefault(const llvm::opt::ArgList &Args) const = 0;
/// Tests whether this toolchain forces its default for PIC, PIE or
/// non-PIC. If this returns true, any PIC related flags should be ignored
- /// and instead the results of \c isPICDefault() and \c isPIEDefault() are
- /// used exclusively.
+ /// and instead the results of \c isPICDefault() and \c isPIEDefault(const
+ /// llvm::opt::ArgList &Args) are used exclusively.
virtual bool isPICDefaultForced() const = 0;
/// SupportsProfiling - Does this tool chain support -pg.
@@ -681,7 +685,7 @@ public:
const llvm::opt::ArgList &Args) const;
/// Get paths of HIP device libraries.
- virtual llvm::SmallVector<std::string, 12>
+ virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const;
/// Return sanitizers which are available in this toolchain.
diff --git a/clang/include/clang/Driver/Types.h b/clang/include/clang/Driver/Types.h
index c9d63551090c..4aecf7ee1e52 100644
--- a/clang/include/clang/Driver/Types.h
+++ b/clang/include/clang/Driver/Types.h
@@ -111,7 +111,7 @@ namespace types {
/// getCompilationPhases - Get the list of compilation phases ('Phases') to be
/// done for type 'Id' up until including LastPhase.
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases>
- getCompilationPhases(ID Id, phases::ID LastPhase = phases::LastPhase);
+ getCompilationPhases(ID Id, phases::ID LastPhase = phases::IfsMerge);
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases>
getCompilationPhases(const clang::driver::Driver &Driver,
llvm::opt::DerivedArgList &DAL, ID Id);
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index c424e79a971c..d38bc6e3f0e6 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -40,7 +40,11 @@ enum class ParseError {
Success = 0,
Error,
Unsuitable,
- BinPackTrailingCommaConflict
+ BinPackTrailingCommaConflict,
+ InvalidQualifierSpecified,
+ DuplicateQualifierSpecified,
+ MissingQualifierType,
+ MissingQualifierOrder
};
class ParseErrorCategory final : public std::error_category {
public:
@@ -59,6 +63,7 @@ struct FormatStyle {
bool InheritsParentConfig;
/// The extra indent or outdent of access modifiers, e.g. ``public:``.
+ /// \version 3.3
int AccessModifierOffset;
/// Different styles for aligning after open brackets.
@@ -88,6 +93,7 @@ struct FormatStyle {
///
/// This applies to round brackets (parentheses), angle brackets and square
/// brackets.
+ /// \version 3.8
BracketAlignmentStyle AlignAfterOpenBracket;
/// Different style for aligning array initializers.
@@ -117,6 +123,7 @@ struct FormatStyle {
};
/// if not ``None``, when using initialization for an array of structs
/// aligns the fields into columns.
+ /// \version 13
ArrayInitializerAlignmentStyle AlignArrayOfStructures;
/// Styles for alignment of consecutive tokens. Tokens can be assignment signs
@@ -201,6 +208,7 @@ struct FormatStyle {
/// /* some comment */
/// #define bar(y, z) (y + z)
/// \endcode
+ /// \version 9
AlignConsecutiveStyle AlignConsecutiveMacros;
/// Style of aligning consecutive assignments.
@@ -269,6 +277,7 @@ struct FormatStyle {
/// /* A comment. */
/// double e = 4;
/// \endcode
+ /// \version 3.8
AlignConsecutiveStyle AlignConsecutiveAssignments;
/// Style of aligning consecutive bit field.
@@ -338,6 +347,7 @@ struct FormatStyle {
/// /* A comment. */
/// int ee : 3;
/// \endcode
+ /// \version 11
AlignConsecutiveStyle AlignConsecutiveBitFields;
/// Style of aligning consecutive declarations.
@@ -407,6 +417,7 @@ struct FormatStyle {
/// /* A comment. */
/// bool c = false;
/// \endcode
+ /// \version 3.8
AlignConsecutiveStyle AlignConsecutiveDeclarations;
/// Different styles for aligning escaped newlines.
@@ -441,6 +452,7 @@ struct FormatStyle {
};
/// Options for aligning backslashes in escaped newlines.
+ /// \version 5
EscapedNewlineAlignmentStyle AlignEscapedNewlines;
/// Different styles for aligning operands.
@@ -479,6 +491,7 @@ struct FormatStyle {
/// If ``true``, horizontally align operands of binary and ternary
/// expressions.
+ /// \version 12
OperandAlignmentStyle AlignOperands;
/// If ``true``, aligns trailing comments.
@@ -487,6 +500,7 @@ struct FormatStyle {
/// int a; // My comment a vs. int a; // My comment a
/// int b = 2; // comment b int b = 2; // comment about b
/// \endcode
+ /// \version 3.7
bool AlignTrailingComments;
/// \brief If a function call or braced initializer list doesn't fit on a
@@ -503,22 +517,12 @@ struct FormatStyle {
/// c,
/// d);
/// \endcode
+ /// \version 9
bool AllowAllArgumentsOnNextLine;
- /// \brief If a constructor definition with a member initializer list doesn't
- /// fit on a single line, allow putting all member initializers onto the next
- /// line, if ```ConstructorInitializerAllOnOneLineOrOnePerLine``` is true.
- /// Note that this parameter has no effect if
- /// ```ConstructorInitializerAllOnOneLineOrOnePerLine``` is false.
- /// \code
- /// true:
- /// MyClass::MyClass() :
- /// member0(0), member1(2) {}
- ///
- /// false:
- /// MyClass::MyClass() :
- /// member0(0),
- /// member1(2) {}
+ /// This option is **deprecated**. See ``NextLine`` of
+ /// ``PackConstructorInitializers``.
+ /// \version 9
bool AllowAllConstructorInitializersOnNextLine;
/// If the function declaration doesn't fit on a line,
@@ -536,6 +540,7 @@ struct FormatStyle {
/// int d,
/// int e);
/// \endcode
+ /// \version 3.3
bool AllowAllParametersOfDeclarationOnNextLine;
/// Allow short enums on a single line.
@@ -544,12 +549,12 @@ struct FormatStyle {
/// enum { A, B } myEnum;
///
/// false:
- /// enum
- /// {
+ /// enum {
/// A,
/// B
/// } myEnum;
/// \endcode
+ /// \version 12
bool AllowShortEnumsOnASingleLine;
/// Different styles for merging short blocks containing at most one
@@ -582,6 +587,7 @@ struct FormatStyle {
/// Dependent on the value, ``while (true) { continue; }`` can be put on a
/// single line.
+ /// \version 11
ShortBlockStyle AllowShortBlocksOnASingleLine;
/// If ``true``, short case labels will be contracted to a single line.
@@ -595,6 +601,7 @@ struct FormatStyle {
/// return;
/// }
/// \endcode
+ /// \version 3.6
bool AllowShortCaseLabelsOnASingleLine;
/// Different styles for merging short functions containing at most one
@@ -647,6 +654,7 @@ struct FormatStyle {
/// Dependent on the value, ``int f() { return 0; }`` can be put on a
/// single line.
+ /// \version 3.5
ShortFunctionStyle AllowShortFunctionsOnASingleLine;
/// Different styles for handling short if statements.
@@ -717,6 +725,7 @@ struct FormatStyle {
};
/// Dependent on the value, ``if (a) return;`` can be put on a single line.
+ /// \version 9
ShortIfStyle AllowShortIfStatementsOnASingleLine;
/// Different styles for merging short lambdas containing at most one
@@ -750,10 +759,12 @@ struct FormatStyle {
/// Dependent on the value, ``auto lambda []() { return 0; }`` can be put on a
/// single line.
+ /// \version 9
ShortLambdaStyle AllowShortLambdasOnASingleLine;
/// If ``true``, ``while (true) continue;`` can be put on a single
/// line.
+ /// \version 3.7
bool AllowShortLoopsOnASingleLine;
/// Different ways to break after the function definition return type.
@@ -841,9 +852,11 @@ struct FormatStyle {
/// The function definition return type breaking style to use. This
/// option is **deprecated** and is retained for backwards compatibility.
+ /// \version 3.7
DefinitionReturnTypeBreakingStyle AlwaysBreakAfterDefinitionReturnType;
/// The function declaration return type breaking style to use.
+ /// \version 3.8
ReturnTypeBreakingStyle AlwaysBreakAfterReturnType;
/// If ``true``, always break before multiline string literals.
@@ -858,6 +871,7 @@ struct FormatStyle {
/// "bbbb" "cccc";
/// "cccc";
/// \endcode
+ /// \version 3.4
bool AlwaysBreakBeforeMultilineStrings;
/// Different ways to break after the template declaration.
@@ -897,6 +911,7 @@ struct FormatStyle {
};
/// The template declaration breaking style to use.
+ /// \version 7
BreakTemplateDeclarationsStyle AlwaysBreakTemplateDeclarations;
/// A vector of strings that should be interpreted as attributes/qualifiers
@@ -915,6 +930,7 @@ struct FormatStyle {
/// AttributeMacros: ['__capability', '__output', '__ununsed']
/// \endcode
///
+ /// \version 12
std::vector<std::string> AttributeMacros;
/// If ``false``, a function call's arguments will either be all on the
@@ -933,6 +949,7 @@ struct FormatStyle {
/// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);
/// }
/// \endcode
+ /// \version 3.7
bool BinPackArguments;
/// The style of inserting trailing commas into container literals.
@@ -962,6 +979,7 @@ struct FormatStyle {
/// // ^ inserted
/// ]
/// \endcode
+ /// \version 12
TrailingCommaStyle InsertTrailingCommas;
/// If ``false``, a function declaration's or function definition's
@@ -976,6 +994,7 @@ struct FormatStyle {
/// int aaaaaaaaaaaaaaaaaaaa,
/// int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) {}
/// \endcode
+ /// \version 3.7
bool BinPackParameters;
/// The style of wrapping parameters on the same line (bin-packed) or
@@ -1030,6 +1049,7 @@ struct FormatStyle {
};
/// The way to wrap binary operators.
+ /// \version 3.6
BinaryOperatorStyle BreakBeforeBinaryOperators;
/// Different ways to attach braces to their surrounding context.
@@ -1464,6 +1484,7 @@ struct FormatStyle {
};
/// The brace breaking style to use.
+ /// \version 3.7
BraceBreakingStyle BreakBeforeBraces;
/// Different ways to wrap braces after control statements.
@@ -1738,6 +1759,7 @@ struct FormatStyle {
/// AfterStruct: false
/// SplitEmptyFunction: false
/// \endcode
+ /// \version 3.8
BraceWrappingFlags BraceWrapping;
/// If ``true``, concept will be placed on a new line.
@@ -1749,6 +1771,7 @@ struct FormatStyle {
/// false:
/// template<typename T> concept ...
/// \endcode
+ /// \version 13
bool BreakBeforeConceptDeclarations;
/// If ``true``, ternary operators will be placed after line breaks.
@@ -1763,6 +1786,7 @@ struct FormatStyle {
/// firstValue :
/// SecondValueVeryVeryVeryVeryLong;
/// \endcode
+ /// \version 3.7
bool BreakBeforeTernaryOperators;
/// Different ways to break initializers.
@@ -1791,7 +1815,8 @@ struct FormatStyle {
BCIS_AfterColon
};
- /// The constructor initializers style to use.
+ /// The break constructor initializers style to use.
+ /// \version 5
BreakConstructorInitializersStyle BreakConstructorInitializers;
/// Break after each annotation on a field in Java files.
@@ -1801,6 +1826,7 @@ struct FormatStyle {
/// @Mock
/// DataLoad loader;
/// \endcode
+ /// \version 3.8
bool BreakAfterJavaFieldAnnotations;
/// Allow breaking string literals when formatting.
@@ -1814,6 +1840,7 @@ struct FormatStyle {
/// const char* x =
/// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
/// \endcode
+ /// \version 3.9
bool BreakStringLiterals;
/// The column limit.
@@ -1821,6 +1848,7 @@ struct FormatStyle {
/// A column limit of ``0`` means that there is no column limit. In this case,
/// clang-format will respect the input's line breaking decisions within
/// statements unless they contradict other rules.
+ /// \version 3.7
unsigned ColumnLimit;
/// A regular expression that describes comments with special meaning,
@@ -1830,8 +1858,77 @@ struct FormatStyle {
/// // Will leave the following line unaffected
/// #include <vector> // FOOBAR pragma: keep
/// \endcode
+ /// \version 3.7
std::string CommentPragmas;
+ /// Different specifiers and qualifiers alignment styles.
+ enum QualifierAlignmentStyle {
+ /// Don't change specifiers/qualifiers to either Left or Right alignment
+ /// (default).
+ /// \code
+ /// int const a;
+ /// const int *a;
+ /// \endcode
+ QAS_Leave,
+ /// Change specifiers/qualifiers to be left-aligned.
+ /// \code
+ /// const int a;
+ /// const int *a;
+ /// \endcode
+ QAS_Left,
+ /// Change specifiers/qualifiers to be right-aligned.
+ /// \code
+ /// int const a;
+ /// int const *a;
+ /// \endcode
+ QAS_Right,
+ /// Change specifiers/qualifiers to be aligned based on ``QualifierOrder``.
+ /// With:
+ /// \code{.yaml}
+ /// QualifierOrder: ['inline', 'static' , 'type', 'const']
+ /// \endcode
+ ///
+ /// \code
+ ///
+ /// int const a;
+ /// int const *a;
+ /// \endcode
+ QAS_Custom
+ };
+
+ /// Different ways to arrange specifiers and qualifiers (e.g. const/volatile).
+ /// \warning
+ /// Setting ``QualifierAlignment`` to something other than `Leave`, COULD
+ /// lead to incorrect code formatting due to incorrect decisions made due to
+ /// clang-formats lack of complete semantic information.
+ /// As such extra care should be taken to review code changes made by the use
+ /// of this option.
+ /// \endwarning
+ /// \version 14
+ QualifierAlignmentStyle QualifierAlignment;
+
+ /// The order in which the qualifiers appear.
+ /// Order is an array that can contain any of the following:
+ ///
+ /// * const
+ /// * inline
+ /// * static
+ /// * constexpr
+ /// * volatile
+ /// * restrict
+ /// * type
+ ///
+ /// Note: it MUST contain 'type'.
+ /// Items to the left of 'type' will be placed to the left of the type and
+ /// aligned in the order supplied. Items to the right of 'type' will be placed
+ /// to the right of the type and aligned in the order supplied.
+ ///
+ /// \code{.yaml}
+ /// QualifierOrder: ['inline', 'static', 'type', 'const', 'volatile' ]
+ /// \endcode
+ /// \version 14
+ std::vector<std::string> QualifierOrder;
+
/// Different ways to break inheritance list.
enum BreakInheritanceListStyle : unsigned char {
/// Break inheritance list before the colon and after the commas.
@@ -1869,6 +1966,7 @@ struct FormatStyle {
};
/// The inheritance list style to use.
+ /// \version 7
BreakInheritanceListStyle BreakInheritanceList;
/// If ``true``, consecutive namespace declarations will be on the same
@@ -1892,30 +1990,17 @@ struct FormatStyle {
/// namespace Extra {
/// }}}
/// \endcode
+ /// \version 5
bool CompactNamespaces;
- // clang-format off
- /// If the constructor initializers don't fit on a line, put each
- /// initializer on its own line.
- /// \code
- /// true:
- /// SomeClass::Constructor()
- /// : aaaaaaaa(aaaaaaaa), aaaaaaaa(aaaaaaaa), aaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaa) {
- /// return 0;
- /// }
- ///
- /// false:
- /// SomeClass::Constructor()
- /// : aaaaaaaa(aaaaaaaa), aaaaaaaa(aaaaaaaa),
- /// aaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaa) {
- /// return 0;
- /// }
- /// \endcode
+ /// This option is **deprecated**. See ``CurrentLine`` of
+ /// ``PackConstructorInitializers``.
+ /// \version 3.7
bool ConstructorInitializerAllOnOneLineOrOnePerLine;
- // clang-format on
/// The number of characters to use for indentation of constructor
/// initializer lists as well as inheritance lists.
+ /// \version 3.7
unsigned ConstructorInitializerIndentWidth;
/// Indent width for line continuations.
@@ -1926,6 +2011,7 @@ struct FormatStyle {
/// longFunction( // Again a long comment
/// arg);
/// \endcode
+ /// \version 3.7
unsigned ContinuationIndentWidth;
/// If ``true``, format braced lists as best suited for C++11 braced
@@ -1948,10 +2034,12 @@ struct FormatStyle {
/// f(MyMap[{composite, key}]); f(MyMap[{ composite, key }]);
/// new int[3]{1, 2, 3}; new int[3]{ 1, 2, 3 };
/// \endcode
+ /// \version 3.4
bool Cpp11BracedListStyle;
/// \brief Analyze the formatted file for the most used line ending (``\r\n``
/// or ``\n``). ``UseCRLF`` is only used as a fallback if none can be derived.
+ /// \version 11
bool DeriveLineEnding;
/// If ``true``, analyze the formatted file for the most common
@@ -1959,9 +2047,11 @@ struct FormatStyle {
/// Pointer and reference alignment styles are going to be updated according
/// to the preferences found in the file.
/// ``PointerAlignment`` is then used only as fallback.
+ /// \version 3.7
bool DerivePointerAlignment;
/// Disables formatting completely.
+ /// \version 3.7
bool DisableFormat;
/// Different styles for empty line after access modifiers.
@@ -2012,6 +2102,7 @@ struct FormatStyle {
/// Defines when to put an empty line after access modifiers.
/// ``EmptyLineBeforeAccessModifier`` configuration handles the number of
/// empty lines between two access modifiers.
+ /// \version 14
EmptyLineAfterAccessModifierStyle EmptyLineAfterAccessModifier;
/// Different styles for empty line before access modifiers.
@@ -2074,6 +2165,7 @@ struct FormatStyle {
};
/// Defines in which cases to put empty line before access modifiers.
+ /// \version 13
EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier;
/// If ``true``, clang-format detects whether function calls and
@@ -2086,8 +2178,56 @@ struct FormatStyle {
///
/// NOTE: This is an experimental flag, that might go away or be renamed. Do
/// not use this in config files, etc. Use at your own risk.
+ /// \version 3.7
bool ExperimentalAutoDetectBinPacking;
+ /// Different ways to try to fit all constructor initializers on a line.
+ enum PackConstructorInitializersStyle : unsigned char {
+ /// Always put each constructor initializer on its own line.
+ /// \code
+ /// Constructor()
+ /// : a(),
+ /// b()
+ /// \endcode
+ PCIS_Never,
+ /// Bin-pack constructor initializers.
+ /// \code
+ /// Constructor()
+ /// : aaaaaaaaaaaaaaaaaaaa(), bbbbbbbbbbbbbbbbbbbb(),
+ /// cccccccccccccccccccc()
+ /// \endcode
+ PCIS_BinPack,
+ /// Put all constructor initializers on the current line if they fit.
+ /// Otherwise, put each one on its own line.
+ /// \code
+ /// Constructor() : a(), b()
+ ///
+ /// Constructor()
+ /// : aaaaaaaaaaaaaaaaaaaa(),
+ /// bbbbbbbbbbbbbbbbbbbb(),
+ /// ddddddddddddd()
+ /// \endcode
+ PCIS_CurrentLine,
+ /// Same as ``PCIS_CurrentLine`` except that if all constructor initializers
+ /// do not fit on the current line, try to fit them on the next line.
+ /// \code
+ /// Constructor() : a(), b()
+ ///
+ /// Constructor()
+ /// : aaaaaaaaaaaaaaaaaaaa(), bbbbbbbbbbbbbbbbbbbb(), ddddddddddddd()
+ ///
+ /// Constructor()
+ /// : aaaaaaaaaaaaaaaaaaaa(),
+ /// bbbbbbbbbbbbbbbbbbbb(),
+ /// cccccccccccccccccccc()
+ /// \endcode
+ PCIS_NextLine,
+ };
+
+ /// The pack constructor initializers style to use.
+ /// \version 14;
+ PackConstructorInitializersStyle PackConstructorInitializers;
+
/// If ``true``, clang-format adds missing namespace end comments for
/// short namespaces and fixes invalid existing ones. Short ones are
/// controlled by "ShortNamespaceLines".
@@ -2098,6 +2238,7 @@ struct FormatStyle {
/// bar(); bar();
/// } // namespace a }
/// \endcode
+ /// \version 5
bool FixNamespaceComments;
/// A vector of macros that should be interpreted as foreach loops
@@ -2115,6 +2256,7 @@ struct FormatStyle {
/// \endcode
///
/// For example: BOOST_FOREACH.
+ /// \version 3.7
std::vector<std::string> ForEachMacros;
/// A vector of macros that should be interpreted as conditionals
@@ -2135,6 +2277,7 @@ struct FormatStyle {
///
/// For example: `KJ_IF_MAYBE
/// <https://github.com/capnproto/capnproto/blob/master/kjdoc/tour.md#maybes>`_
+ /// \version 14
std::vector<std::string> IfMacros;
/// \brief A vector of macros that should be interpreted as type declarations
@@ -2151,6 +2294,7 @@ struct FormatStyle {
/// \endcode
///
/// For example: OpenSSL STACK_OF, BSD LIST_ENTRY.
+ /// \version 9
std::vector<std::string> TypenameMacros;
/// A vector of macros that should be interpreted as complete
@@ -2161,6 +2305,7 @@ struct FormatStyle {
/// clang-format aware of such cases.
///
/// For example: Q_UNUSED
+ /// \version 8
std::vector<std::string> StatementMacros;
/// A vector of macros which are used to open namespace blocks.
@@ -2173,6 +2318,7 @@ struct FormatStyle {
/// \endcode
///
/// For example: TESTSUITE
+ /// \version 9
std::vector<std::string> NamespaceMacros;
/// A vector of macros which are whitespace-sensitive and should not
@@ -2189,6 +2335,7 @@ struct FormatStyle {
/// \endcode
///
/// For example: BOOST_PP_STRINGIZE
+ /// \version 12
std::vector<std::string> WhitespaceSensitiveMacros;
tooling::IncludeStyle IncludeStyle;
@@ -2217,6 +2364,7 @@ struct FormatStyle {
/// return 1; return 1;
/// } }
/// \endcode
+ /// \version 13
bool IndentAccessModifiers;
/// Indent case labels one level from the switch statement.
@@ -2235,6 +2383,7 @@ struct FormatStyle {
/// plop(); plop();
/// } }
/// \endcode
+ /// \version 3.3
bool IndentCaseLabels;
/// Indent case label blocks one level from the case label.
@@ -2257,6 +2406,7 @@ struct FormatStyle {
/// }
/// }
/// \endcode
+ /// \version 11
bool IndentCaseBlocks;
/// Indent goto labels.
@@ -2273,6 +2423,7 @@ struct FormatStyle {
/// return 1; return 1;
/// } }
/// \endcode
+ /// \version 10
bool IndentGotoLabels;
/// Options for indenting preprocessor directives.
@@ -2307,6 +2458,7 @@ struct FormatStyle {
};
/// The preprocessor directive indenting style to use.
+ /// \version 6
PPDirectiveIndentStyle IndentPPDirectives;
/// Indents extern blocks
@@ -2346,6 +2498,7 @@ struct FormatStyle {
};
/// IndentExternBlockStyle is the type of indenting of extern blocks.
+ /// \version 12
IndentExternBlockStyle IndentExternBlock;
/// Indent the requires clause in a template
@@ -2364,6 +2517,7 @@ struct FormatStyle {
/// //....
/// }
/// \endcode
+ /// \version 13
bool IndentRequires;
/// The number of columns to use for indentation.
@@ -2377,6 +2531,7 @@ struct FormatStyle {
/// }
/// }
/// \endcode
+ /// \version 3.7
unsigned IndentWidth;
/// Indent if a function definition or declaration is wrapped after the
@@ -2390,6 +2545,7 @@ struct FormatStyle {
/// LoooooooooooooooooooooooooooooooooooooooongReturnType
/// LoooooooooooooooooooooooooooooooongFunctionDeclaration();
/// \endcode
+ /// \version 3.7
bool IndentWrappedFunctionNames;
/// A vector of prefixes ordered by the desired groups for Java imports.
@@ -2423,6 +2579,7 @@ struct FormatStyle {
///
/// import org.example.ClassD;
/// \endcode
+ /// \version 8
std::vector<std::string> JavaImportGroups;
/// Quotation styles for JavaScript strings. Does not affect template
@@ -2449,6 +2606,7 @@ struct FormatStyle {
};
/// The JavaScriptQuoteStyle to use for JavaScript strings.
+ /// \version 3.9
JavaScriptQuoteStyle JavaScriptQuotes;
// clang-format off
@@ -2464,6 +2622,7 @@ struct FormatStyle {
/// false:
/// import {VeryLongImportsAreAnnoying, VeryLongImportsAreAnnoying, VeryLongImportsAreAnnoying,} from "some/module.js"
/// \endcode
+ /// \version 3.9
bool JavaScriptWrapImports;
// clang-format on
@@ -2475,6 +2634,7 @@ struct FormatStyle {
/// bar(); }
/// }
/// \endcode
+ /// \version 3.7
bool KeepEmptyLinesAtTheStartOfBlocks;
/// Supported languages.
@@ -2511,6 +2671,7 @@ struct FormatStyle {
bool isJson() const { return Language == LK_Json; }
/// Language, this format style is targeted at.
+ /// \version 3.5
LanguageKind Language;
/// Indentation logic for lambda bodies.
@@ -2543,16 +2704,17 @@ struct FormatStyle {
/// ``OuterScope``. The KJ style guide requires ``OuterScope``.
/// `KJ style guide
/// <https://github.com/capnproto/capnproto/blob/master/kjdoc/style-guide.md>`_
+ /// \version 13
LambdaBodyIndentationKind LambdaBodyIndentation;
/// A regular expression matching macros that start a block.
/// \code
/// # With:
/// MacroBlockBegin: "^NS_MAP_BEGIN|\
- /// NS_TABLE_HEAD$"
+ /// NS_TABLE_HEAD$"
/// MacroBlockEnd: "^\
- /// NS_MAP_END|\
- /// NS_TABLE_.*_END$"
+ /// NS_MAP_END|\
+ /// NS_TABLE_.*_END$"
///
/// NS_MAP_BEGIN
/// foo();
@@ -2571,9 +2733,11 @@ struct FormatStyle {
/// bar();
/// NS_TABLE_FOO_END
/// \endcode
+ /// \version 3.7
std::string MacroBlockBegin;
/// A regular expression matching macros that end a block.
+ /// \version 3.7
std::string MacroBlockEnd;
/// The maximum number of consecutive empty lines to keep.
@@ -2587,6 +2751,7 @@ struct FormatStyle {
/// return i;
/// }
/// \endcode
+ /// \version 3.7
unsigned MaxEmptyLinesToKeep;
/// Different ways to indent namespace contents.
@@ -2624,6 +2789,7 @@ struct FormatStyle {
};
/// The indentation used for namespaces.
+ /// \version 3.7
NamespaceIndentationKind NamespaceIndentation;
/// Controls bin-packing Objective-C protocol conformance list
@@ -2656,6 +2822,7 @@ struct FormatStyle {
/// ddddddddddddd> {
/// }
/// \endcode
+ /// \version 7
BinPackStyle ObjCBinPackProtocolList;
/// The number of characters to use for indentation of ObjC blocks.
@@ -2666,10 +2833,12 @@ struct FormatStyle {
/// [self onOperationDone];
/// }];
/// \endcode
+ /// \version 3.7
unsigned ObjCBlockIndentWidth;
/// Add a space after ``@property`` in Objective-C, i.e. use
/// ``@property (readonly)`` instead of ``@property(readonly)``.
+ /// \version 3.7
bool ObjCSpaceAfterProperty;
/// Break parameters list into lines when there is nested block
@@ -2693,39 +2862,50 @@ struct FormatStyle {
/// }]
/// }
/// \endcode
+ /// \version 12
bool ObjCBreakBeforeNestedBlockParam;
/// Add a space in front of an Objective-C protocol list, i.e. use
/// ``Foo <Protocol>`` instead of ``Foo<Protocol>``.
+ /// \version 3.7
bool ObjCSpaceBeforeProtocolList;
/// The penalty for breaking around an assignment operator.
+ /// \version 5
unsigned PenaltyBreakAssignment;
/// The penalty for breaking a function call after ``call(``.
+ /// \version 3.7
unsigned PenaltyBreakBeforeFirstCallParameter;
/// The penalty for each line break introduced inside a comment.
+ /// \version 3.7
unsigned PenaltyBreakComment;
/// The penalty for breaking before the first ``<<``.
+ /// \version 3.7
unsigned PenaltyBreakFirstLessLess;
/// The penalty for each line break introduced inside a string literal.
+ /// \version 3.7
unsigned PenaltyBreakString;
/// The penalty for breaking after template declaration.
+ /// \version 7
unsigned PenaltyBreakTemplateDeclaration;
/// The penalty for each character outside of the column limit.
+ /// \version 3.7
unsigned PenaltyExcessCharacter;
/// Penalty for putting the return type of a function onto its own
/// line.
+ /// \version 3.7
unsigned PenaltyReturnTypeOnItsOwnLine;
/// Penalty for each character of whitespace indentation
/// (counted relative to leading non-whitespace column).
+ /// \version 12
unsigned PenaltyIndentedWhitespace;
/// The ``&``, ``&&`` and ``*`` alignment style.
@@ -2748,6 +2928,7 @@ struct FormatStyle {
};
/// Pointer and reference alignment style.
+ /// \version 3.7
PointerAlignmentStyle PointerAlignment;
/// The number of columns to use for indentation of preprocessor statements.
@@ -2762,6 +2943,7 @@ struct FormatStyle {
/// # define BAR
/// #endif
/// \endcode
+ /// \version 14
int PPIndentWidth;
/// See documentation of ``RawStringFormats``.
@@ -2821,6 +3003,7 @@ struct FormatStyle {
/// BasedOnStyle: llvm
/// CanonicalDelimiter: 'cc'
/// \endcode
+ /// \version 6
std::vector<RawStringFormat> RawStringFormats;
/// \brief The ``&`` and ``&&`` alignment style.
@@ -2846,6 +3029,7 @@ struct FormatStyle {
/// \brief Reference alignment style (overrides ``PointerAlignment`` for
/// references).
+ /// \version 14
ReferenceAlignmentStyle ReferenceAlignment;
// clang-format off
@@ -2861,6 +3045,7 @@ struct FormatStyle {
/// /* second veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongComment with plenty of
/// * information */
/// \endcode
+ /// \version 4
bool ReflowComments;
// clang-format on
@@ -2883,6 +3068,7 @@ struct FormatStyle {
/// int bar; int bar;
/// } // namespace b } // namespace b
/// \endcode
+ /// \version 14
unsigned ShortNamespaceLines;
/// Include sorting options.
@@ -2922,6 +3108,7 @@ struct FormatStyle {
/// insensitive fashion.
/// If ``CaseSensitive``, includes are sorted in an alphabetical or case
/// sensitive fashion.
+ /// \version 4
SortIncludesOptions SortIncludes;
/// Position for Java Static imports.
@@ -2945,6 +3132,7 @@ struct FormatStyle {
/// When sorting Java imports, by default static imports are placed before
/// non-static imports. If ``JavaStaticImportAfterImport`` is ``After``,
/// static imports are placed after non-static imports.
+ /// \version 12
SortJavaStaticImportOptions SortJavaStaticImport;
/// If ``true``, clang-format will sort using declarations.
@@ -2961,6 +3149,7 @@ struct FormatStyle {
/// using std::cout; vs. using std::cin;
/// using std::cin; using std::cout;
/// \endcode
+ /// \version 5
bool SortUsingDeclarations;
/// If ``true``, a space is inserted after C style casts.
@@ -2968,6 +3157,7 @@ struct FormatStyle {
/// true: false:
/// (int) i; vs. (int)i;
/// \endcode
+ /// \version 3.5
bool SpaceAfterCStyleCast;
/// If ``true``, a space is inserted after the logical not operator (``!``).
@@ -2975,6 +3165,7 @@ struct FormatStyle {
/// true: false:
/// ! someExpression(); vs. !someExpression();
/// \endcode
+ /// \version 9
bool SpaceAfterLogicalNot;
/// If \c true, a space will be inserted after the 'template' keyword.
@@ -2982,6 +3173,7 @@ struct FormatStyle {
/// true: false:
/// template <int> void foo(); vs. template<int> void foo();
/// \endcode
+ /// \version 4
bool SpaceAfterTemplateKeyword;
/// Different ways to put a space before opening parentheses.
@@ -3014,6 +3206,7 @@ struct FormatStyle {
};
/// Defines in which cases to put a space before or after pointer qualifiers
+ /// \version 12
SpaceAroundPointerQualifiersStyle SpaceAroundPointerQualifiers;
/// If ``false``, spaces will be removed before assignment operators.
@@ -3022,6 +3215,7 @@ struct FormatStyle {
/// int a = 5; vs. int a= 5;
/// a += 42; a+= 42;
/// \endcode
+ /// \version 3.7
bool SpaceBeforeAssignmentOperators;
/// If ``false``, spaces will be removed before case colon.
@@ -3031,6 +3225,7 @@ struct FormatStyle {
/// case 1 : break; case 1: break;
/// } }
/// \endcode
+ /// \version 12
bool SpaceBeforeCaseColon;
/// If ``true``, a space will be inserted before a C++11 braced list
@@ -3042,6 +3237,7 @@ struct FormatStyle {
/// vector<int> { 1, 2, 3 }; vector<int>{ 1, 2, 3 };
/// new int[3] { 1, 2, 3 }; new int[3]{ 1, 2, 3 };
/// \endcode
+ /// \version 7
bool SpaceBeforeCpp11BracedList;
/// If ``false``, spaces will be removed before constructor initializer
@@ -3050,6 +3246,7 @@ struct FormatStyle {
/// true: false:
/// Foo::Foo() : a(a) {} Foo::Foo(): a(a) {}
/// \endcode
+ /// \version 7
bool SpaceBeforeCtorInitializerColon;
/// If ``false``, spaces will be removed before inheritance colon.
@@ -3057,10 +3254,11 @@ struct FormatStyle {
/// true: false:
/// class Foo : Bar {} vs. class Foo: Bar {}
/// \endcode
+ /// \version 7
bool SpaceBeforeInheritanceColon;
/// Different ways to put a space before opening parentheses.
- enum SpaceBeforeParensOptions : unsigned char {
+ enum SpaceBeforeParensStyle : unsigned char {
/// Never put a space before opening parentheses.
/// \code
/// void f() {
@@ -3084,7 +3282,7 @@ struct FormatStyle {
/// ForEach and If macros. This is useful in projects where ForEach/If
/// macros are treated as function calls instead of control statements.
/// ``SBPO_ControlStatementsExceptForEachMacros`` remains an alias for
- /// backward compatability.
+ /// backward compatibility.
/// \code
/// void f() {
/// Q_FOREACH(...) {
@@ -3115,11 +3313,100 @@ struct FormatStyle {
/// }
/// }
/// \endcode
- SBPO_Always
+ SBPO_Always,
+ /// Configure each individual space before parentheses in
+ /// `SpaceBeforeParensOptions`.
+ SBPO_Custom,
};
/// Defines in which cases to put a space before opening parentheses.
- SpaceBeforeParensOptions SpaceBeforeParens;
+ /// \version 3.5
+ SpaceBeforeParensStyle SpaceBeforeParens;
+
+ /// Precise control over the spacing before parentheses.
+ /// \code
+ /// # Should be declared this way:
+ /// SpaceBeforeParens: Custom
+ /// SpaceBeforeParensOptions:
+ /// AfterControlStatements: true
+ /// AfterFunctionDefinitionName: true
+ /// \endcode
+ struct SpaceBeforeParensCustom {
+ /// If ``true``, put space betwee control statement keywords
+ /// (for/if/while...) and opening parentheses.
+ /// \code
+ /// true: false:
+ /// if (...) {} vs. if(...) {}
+ /// \endcode
+ bool AfterControlStatements;
+ /// If ``true``, put space between foreach macros and opening parentheses.
+ /// \code
+ /// true: false:
+ /// FOREACH (...) vs. FOREACH(...)
+ /// <loop-body> <loop-body>
+ /// \endcode
+ bool AfterForeachMacros;
+ /// If ``true``, put a space between function declaration name and opening
+ /// parentheses.
+ /// \code
+ /// true: false:
+ /// void f (); vs. void f();
+ /// \endcode
+ bool AfterFunctionDeclarationName;
+ /// If ``true``, put a space between function definition name and opening
+ /// parentheses.
+ /// \code
+ /// true: false:
+ /// void f () {} vs. void f() {}
+ /// \endcode
+ bool AfterFunctionDefinitionName;
+ /// If ``true``, put space between if macros and opening parentheses.
+ /// \code
+ /// true: false:
+ /// IF (...) vs. IF(...)
+ /// <conditional-body> <conditional-body>
+ /// \endcode
+ bool AfterIfMacros;
+ /// If ``true``, put a space before opening parentheses only if the
+ /// parentheses are not empty.
+ /// \code
+ /// true: false:
+ /// void f (int a); vs. void f();
+ /// f (a); f();
+ /// \endcode
+ bool BeforeNonEmptyParentheses;
+
+ SpaceBeforeParensCustom()
+ : AfterControlStatements(false), AfterForeachMacros(false),
+ AfterFunctionDeclarationName(false),
+ AfterFunctionDefinitionName(false), AfterIfMacros(false),
+ BeforeNonEmptyParentheses(false) {}
+
+ bool operator==(const SpaceBeforeParensCustom &Other) const {
+ return AfterControlStatements == Other.AfterControlStatements &&
+ AfterForeachMacros == Other.AfterForeachMacros &&
+ AfterFunctionDeclarationName ==
+ Other.AfterFunctionDeclarationName &&
+ AfterFunctionDefinitionName == Other.AfterFunctionDefinitionName &&
+ AfterIfMacros == Other.AfterIfMacros &&
+ BeforeNonEmptyParentheses == Other.BeforeNonEmptyParentheses;
+ }
+ };
+
+ /// Control of individual space before parentheses.
+ ///
+ /// If ``SpaceBeforeParens`` is set to ``Custom``, use this to specify
+ /// how each individual space before parentheses case should be handled.
+ /// Otherwise, this is ignored.
+ /// \code{.yaml}
+ /// # Example of usage:
+ /// SpaceBeforeParens: Custom
+ /// SpaceBeforeParensOptions:
+ /// AfterControlStatements: true
+ /// AfterFunctionDefinitionName: true
+ /// \endcode
+ /// \version 14
+ SpaceBeforeParensCustom SpaceBeforeParensOptions;
/// If ``false``, spaces will be removed before range-based for loop
/// colon.
@@ -3127,6 +3414,7 @@ struct FormatStyle {
/// true: false:
/// for (auto v : values) {} vs. for(auto v: values) {}
/// \endcode
+ /// \version 7
bool SpaceBeforeRangeBasedForLoopColon;
/// If ``true``, spaces will be inserted into ``{}``.
@@ -3135,6 +3423,7 @@ struct FormatStyle {
/// void f() { } vs. void f() {}
/// while (true) { } while (true) {}
/// \endcode
+ /// \version 11
bool SpaceInEmptyBlock;
/// If ``true``, spaces may be inserted into ``()``.
@@ -3147,6 +3436,7 @@ struct FormatStyle {
/// } }
/// } }
/// \endcode
+ /// \version 3.7
bool SpaceInEmptyParentheses;
/// The number of spaces before trailing line comments
@@ -3163,6 +3453,7 @@ struct FormatStyle {
/// } // foo
/// }
/// \endcode
+ /// \version 3.7
unsigned SpacesBeforeTrailingComments;
/// Styles for adding spacing after ``<`` and before ``>`
@@ -3185,6 +3476,7 @@ struct FormatStyle {
SIAS_Leave
};
/// The SpacesInAnglesStyle to use for template argument lists.
+ /// \version 14
SpacesInAnglesStyle SpacesInAngles;
/// If ``true``, spaces will be inserted around if/for/switch/while
@@ -3194,6 +3486,7 @@ struct FormatStyle {
/// if ( a ) { ... } vs. if (a) { ... }
/// while ( i < 5 ) { ... } while (i < 5) { ... }
/// \endcode
+ /// \version 11
bool SpacesInConditionalStatement;
/// If ``true``, spaces are inserted inside container literals (e.g.
@@ -3203,6 +3496,7 @@ struct FormatStyle {
/// var arr = [ 1, 2, 3 ]; vs. var arr = [1, 2, 3];
/// f({a : 1, b : 2, c : 3}); f({a: 1, b: 2, c: 3});
/// \endcode
+ /// \version 3.7
bool SpacesInContainerLiterals;
/// If ``true``, spaces may be inserted into C style casts.
@@ -3210,6 +3504,7 @@ struct FormatStyle {
/// true: false:
/// x = ( int32 )y vs. x = (int32)y
/// \endcode
+ /// \version 3.7
bool SpacesInCStyleCastParentheses;
/// Control of spaces within a single line comment
@@ -3223,30 +3518,33 @@ struct FormatStyle {
/// How many spaces are allowed at the start of a line comment. To disable the
/// maximum set it to ``-1``, apart from that the maximum takes precedence
/// over the minimum.
- /// \code Minimum = 1 Maximum = -1
- /// // One space is forced
+ /// \code
+ /// Minimum = 1
+ /// Maximum = -1
+ /// // One space is forced
///
- /// // but more spaces are possible
+ /// // but more spaces are possible
///
- /// Minimum = 0
- /// Maximum = 0
- /// //Forces to start every comment directly after the slashes
+ /// Minimum = 0
+ /// Maximum = 0
+ /// //Forces to start every comment directly after the slashes
/// \endcode
///
/// Note that in line comment sections the relative indent of the subsequent
/// lines is kept, that means the following:
/// \code
- /// before: after:
- /// Minimum: 1
- /// //if (b) { // if (b) {
- /// // return true; // return true;
- /// //} // }
- ///
- /// Maximum: 0
- /// /// List: ///List:
- /// /// - Foo /// - Foo
- /// /// - Bar /// - Bar
+ /// before: after:
+ /// Minimum: 1
+ /// //if (b) { // if (b) {
+ /// // return true; // return true;
+ /// //} // }
+ ///
+ /// Maximum: 0
+ /// /// List: ///List:
+ /// /// - Foo /// - Foo
+ /// /// - Bar /// - Bar
/// \endcode
+ /// \version 14
SpacesInLineComment SpacesInLineCommentPrefix;
/// If ``true``, spaces will be inserted after ``(`` and before ``)``.
@@ -3254,6 +3552,7 @@ struct FormatStyle {
/// true: false:
/// t f( Deleted & ) & = delete; vs. t f(Deleted &) & = delete;
/// \endcode
+ /// \version 3.7
bool SpacesInParentheses;
/// If ``true``, spaces will be inserted after ``[`` and before ``]``.
@@ -3264,6 +3563,7 @@ struct FormatStyle {
/// int a[ 5 ]; vs. int a[5];
/// std::unique_ptr<int[]> foo() {} // Won't be affected
/// \endcode
+ /// \version 3.7
bool SpacesInSquareBrackets;
/// If ``true``, spaces will be before ``[``.
@@ -3273,6 +3573,7 @@ struct FormatStyle {
/// int a [5]; vs. int a[5];
/// int a [5][5]; vs. int a[5][5];
/// \endcode
+ /// \version 11
bool SpaceBeforeSquareBrackets;
/// Styles for adding spacing around ``:`` in bitfield definitions.
@@ -3301,6 +3602,7 @@ struct FormatStyle {
BFCS_After
};
/// The BitFieldColonSpacingStyle to use for bitfields.
+ /// \version 12
BitFieldColonSpacingStyle BitFieldColonSpacing;
/// Supported language standards for parsing and formatting C++ constructs.
@@ -3335,6 +3637,7 @@ struct FormatStyle {
/// c++03: latest:
/// vector<set<int> > x; vs. vector<set<int>> x;
/// \endcode
+ /// \version 3.7
LanguageStandard Standard;
/// Macros which are ignored in front of a statement, as if they were an
@@ -3351,9 +3654,11 @@ struct FormatStyle {
/// unsigned char data = 'x';
/// emit signal(data); // Now it's fine again.
/// \endcode
+ /// \version 12
std::vector<std::string> StatementAttributeLikeMacros;
/// The number of columns used for tab stops.
+ /// \version 3.7
unsigned TabWidth;
/// Different ways to use tab in formatting.
@@ -3375,9 +3680,11 @@ struct FormatStyle {
/// \brief Use ``\r\n`` instead of ``\n`` for line breaks.
/// Also used as fallback if ``DeriveLineEnding`` is true.
+ /// \version 11
bool UseCRLF;
/// The way to use tab characters in the resulting file.
+ /// \version 3.7
UseTabStyle UseTab;
bool operator==(const FormatStyle &R) const {
@@ -3392,8 +3699,6 @@ struct FormatStyle {
AlignOperands == R.AlignOperands &&
AlignTrailingComments == R.AlignTrailingComments &&
AllowAllArgumentsOnNextLine == R.AllowAllArgumentsOnNextLine &&
- AllowAllConstructorInitializersOnNextLine ==
- R.AllowAllConstructorInitializersOnNextLine &&
AllowAllParametersOfDeclarationOnNextLine ==
R.AllowAllParametersOfDeclarationOnNextLine &&
AllowShortEnumsOnASingleLine == R.AllowShortEnumsOnASingleLine &&
@@ -3424,8 +3729,6 @@ struct FormatStyle {
BreakStringLiterals == R.BreakStringLiterals &&
ColumnLimit == R.ColumnLimit && CommentPragmas == R.CommentPragmas &&
BreakInheritanceList == R.BreakInheritanceList &&
- ConstructorInitializerAllOnOneLineOrOnePerLine ==
- R.ConstructorInitializerAllOnOneLineOrOnePerLine &&
ConstructorInitializerIndentWidth ==
R.ConstructorInitializerIndentWidth &&
ContinuationIndentWidth == R.ContinuationIndentWidth &&
@@ -3437,6 +3740,7 @@ struct FormatStyle {
EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier &&
ExperimentalAutoDetectBinPacking ==
R.ExperimentalAutoDetectBinPacking &&
+ PackConstructorInitializers == R.PackConstructorInitializers &&
FixNamespaceComments == R.FixNamespaceComments &&
ForEachMacros == R.ForEachMacros &&
IncludeStyle.IncludeBlocks == R.IncludeStyle.IncludeBlocks &&
@@ -3482,6 +3786,8 @@ struct FormatStyle {
PenaltyBreakTemplateDeclaration ==
R.PenaltyBreakTemplateDeclaration &&
PointerAlignment == R.PointerAlignment &&
+ QualifierAlignment == R.QualifierAlignment &&
+ QualifierOrder == R.QualifierOrder &&
RawStringFormats == R.RawStringFormats &&
ReferenceAlignment == R.ReferenceAlignment &&
ShortNamespaceLines == R.ShortNamespaceLines &&
@@ -3497,6 +3803,7 @@ struct FormatStyle {
R.SpaceBeforeCtorInitializerColon &&
SpaceBeforeInheritanceColon == R.SpaceBeforeInheritanceColon &&
SpaceBeforeParens == R.SpaceBeforeParens &&
+ SpaceBeforeParensOptions == R.SpaceBeforeParensOptions &&
SpaceAroundPointerQualifiers == R.SpaceAroundPointerQualifiers &&
SpaceBeforeRangeBasedForLoopColon ==
R.SpaceBeforeRangeBasedForLoopColon &&
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 861b15020329..74e152ea5952 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -219,6 +219,9 @@ public:
// of the context or else not CompilerInstance specific.
bool ExecuteAction(FrontendAction &Act);
+ /// Load the list of plugins requested in the \c FrontendOptions.
+ void LoadRequestedPlugins();
+
/// }
/// @name Compiler Invocation and Options
/// {
diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h
index 2245439d0632..922c84a3bee2 100644
--- a/clang/include/clang/Frontend/CompilerInvocation.h
+++ b/clang/include/clang/Frontend/CompilerInvocation.h
@@ -50,6 +50,11 @@ class HeaderSearchOptions;
class PreprocessorOptions;
class TargetOptions;
+// This lets us create the DiagnosticsEngine with a properly-filled-out
+// DiagnosticOptions instance.
+std::unique_ptr<DiagnosticOptions>
+CreateAndPopulateDiagOpts(ArrayRef<const char *> Argv);
+
/// Fill out Opts based on the options given in Args.
///
/// Args must have been created from the OptTable returned by
diff --git a/clang/include/clang/Frontend/FrontendAction.h b/clang/include/clang/Frontend/FrontendAction.h
index dfefddfb4527..039f6f247b6d 100644
--- a/clang/include/clang/Frontend/FrontendAction.h
+++ b/clang/include/clang/Frontend/FrontendAction.h
@@ -270,17 +270,18 @@ public:
const std::vector<std::string> &arg) = 0;
enum ActionType {
- Cmdline, ///< Action is determined by the cc1 command-line
- ReplaceAction, ///< Replace the main action
- AddBeforeMainAction, ///< Execute the action before the main action
- AddAfterMainAction ///< Execute the action after the main action
+ CmdlineBeforeMainAction, ///< Execute the action before the main action if
+ ///< on the command line
+ CmdlineAfterMainAction, ///< Execute the action after the main action if on
+ ///< the command line
+ ReplaceAction, ///< Replace the main action
+ AddBeforeMainAction, ///< Execute the action before the main action
+ AddAfterMainAction ///< Execute the action after the main action
};
/// Get the action type for this plugin
///
- /// \return The action type. If the type is Cmdline then by default the
- /// plugin does nothing and what it does is determined by the cc1
- /// command-line.
- virtual ActionType getActionType() { return Cmdline; }
+ /// \return The action type. By default we use CmdlineAfterMainAction.
+ virtual ActionType getActionType() { return CmdlineAfterMainAction; }
};
/// Abstract base class to use for preprocessor-based frontend actions.
diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h
index ff8d4417eaa4..545a7e842c4f 100644
--- a/clang/include/clang/Frontend/FrontendActions.h
+++ b/clang/include/clang/Frontend/FrontendActions.h
@@ -299,6 +299,15 @@ protected:
bool hasPCHSupport() const override { return true; }
};
+class GetDependenciesByModuleNameAction : public PreprocessOnlyAction {
+ StringRef ModuleName;
+ void ExecuteAction() override;
+
+public:
+ GetDependenciesByModuleNameAction(StringRef ModuleName)
+ : ModuleName(ModuleName) {}
+};
+
} // end namespace clang
#endif
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 15c905d712a3..1d9d89a28c6c 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -289,6 +289,9 @@ public:
/// Whether we are performing an implicit module build.
unsigned BuildingImplicitModule : 1;
+ /// Whether to use a filesystem lock when building implicit modules.
+ unsigned BuildingImplicitModuleUsesLock : 1;
+
/// Whether we should embed all used files into the PCM file.
unsigned ModulesEmbedAllFiles : 1;
@@ -370,7 +373,7 @@ public:
ObjCMT_MigrateDecls | ObjCMT_PropertyDotSyntax)
};
unsigned ObjCMTAction = ObjCMT_None;
- std::string ObjCMTWhiteListPath;
+ std::string ObjCMTAllowListPath;
std::string MTMigrateDir;
std::string ARCMTMigrateReportOut;
@@ -461,9 +464,9 @@ public:
SkipFunctionBodies(false), UseGlobalModuleIndex(true),
GenerateGlobalModuleIndex(true), ASTDumpDecls(false),
ASTDumpLookups(false), BuildingImplicitModule(false),
- ModulesEmbedAllFiles(false), IncludeTimestamps(true),
- UseTemporary(true), AllowPCMWithCompilerErrors(false),
- TimeTraceGranularity(500) {}
+ BuildingImplicitModuleUsesLock(true), ModulesEmbedAllFiles(false),
+ IncludeTimestamps(true), UseTemporary(true),
+ AllowPCMWithCompilerErrors(false), TimeTraceGranularity(500) {}
/// getInputKindForExtension - Return the appropriate input kind for a file
/// extension. For example, "c" would return Language::C.
diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h
index 020cbe2db3d0..2dc0fd5963a2 100644
--- a/clang/include/clang/Interpreter/Interpreter.h
+++ b/clang/include/clang/Interpreter/Interpreter.h
@@ -16,6 +16,9 @@
#include "clang/Interpreter/PartialTranslationUnit.h"
+#include "clang/AST/GlobalDecl.h"
+
+#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/Error.h"
#include <memory>
@@ -65,6 +68,20 @@ public:
return Execute(*PTU);
return llvm::Error::success();
}
+
+ /// \returns the \c JITTargetAddress of a \c GlobalDecl. This interface uses
+ /// the CodeGenModule's internal mangling cache to avoid recomputing the
+ /// mangled name.
+ llvm::Expected<llvm::JITTargetAddress> getSymbolAddress(GlobalDecl GD) const;
+
+ /// \returns the \c JITTargetAddress of a given name as written in the IR.
+ llvm::Expected<llvm::JITTargetAddress>
+ getSymbolAddress(llvm::StringRef IRName) const;
+
+ /// \returns the \c JITTargetAddress of a given name as written in the object
+ /// file.
+ llvm::Expected<llvm::JITTargetAddress>
+ getSymbolAddressFromLinkerName(llvm::StringRef LinkerName) const;
};
} // namespace clang
diff --git a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
index 9bb820156c25..121ca893e314 100644
--- a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
+++ b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
@@ -39,6 +39,9 @@ enum TokenKind {
pp_import,
pp_pragma_import,
pp_pragma_once,
+ pp_pragma_push_macro,
+ pp_pragma_pop_macro,
+ pp_pragma_include_alias,
pp_include_next,
pp_if,
pp_ifdef,
diff --git a/clang/include/clang/Lex/HeaderMap.h b/clang/include/clang/Lex/HeaderMap.h
index 53108b00bd16..ca6a49bae3bf 100644
--- a/clang/include/clang/Lex/HeaderMap.h
+++ b/clang/include/clang/Lex/HeaderMap.h
@@ -77,13 +77,6 @@ public:
static std::unique_ptr<HeaderMap> Create(const FileEntry *FE,
FileManager &FM);
- /// Check to see if the specified relative filename is located in this
- /// HeaderMap. If so, open it and return its FileEntry. If RawPath is not
- /// NULL and the file is found, RawPath will be set to the raw path at which
- /// the file was found in the file system. For example, for a search path
- /// ".." and a filename "../file.h" this would be "../../file.h".
- Optional<FileEntryRef> LookupFile(StringRef Filename, FileManager &FM) const;
-
using HeaderMapImpl::dump;
using HeaderMapImpl::getFileName;
using HeaderMapImpl::lookupFilename;
diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 93d6ea72270a..b3445703f782 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -20,9 +20,12 @@
#include "clang/Lex/ModuleMap.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
#include <cstddef>
@@ -48,7 +51,7 @@ class TargetInfo;
/// The preprocessor keeps track of this information for each
/// file that is \#included.
struct HeaderFileInfo {
- /// True if this is a \#import'd or \#pragma once file.
+ /// True if this is a \#import'd file.
unsigned isImport : 1;
/// True if this is a \#pragma once file.
@@ -110,6 +113,14 @@ struct HeaderFileInfo {
/// of the framework.
StringRef Framework;
+ /// List of aliases that this header is known as.
+ /// Most headers should only have at most one alias, but a handful
+ /// have two.
+ llvm::SetVector<llvm::SmallString<32>,
+ llvm::SmallVector<llvm::SmallString<32>, 2>,
+ llvm::SmallSet<llvm::SmallString<32>, 2>>
+ Aliases;
+
HeaderFileInfo()
: isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User),
External(false), isModuleHeader(false), isCompilingModuleHeader(false),
@@ -119,13 +130,6 @@ struct HeaderFileInfo {
/// any.
const IdentifierInfo *
getControllingMacro(ExternalPreprocessorSource *External);
-
- /// Determine whether this is a non-default header file info, e.g.,
- /// it corresponds to an actual header we've included or tried to include.
- bool isNonDefault() const {
- return isImport || isPragmaOnce || NumIncludes || ControllingMacro ||
- ControllingMacroID;
- }
};
/// An external source of header file information, which may supply
@@ -161,6 +165,9 @@ class HeaderSearch {
/// Header-search options used to initialize this header search.
std::shared_ptr<HeaderSearchOptions> HSOpts;
+ /// Mapping from SearchDir to HeaderSearchOptions::UserEntries indices.
+ llvm::DenseMap<unsigned, unsigned> SearchDirToHSEntry;
+
DiagnosticsEngine &Diags;
FileManager &FileMgr;
@@ -171,6 +178,9 @@ class HeaderSearch {
/// NoCurDirSearch is true, then the check for the file in the current
/// directory is suppressed.
std::vector<DirectoryLookup> SearchDirs;
+ /// Whether the DirectoryLookup at the corresponding index in SearchDirs has
+ /// been successfully used to lookup a file.
+ std::vector<bool> SearchDirsUsage;
unsigned AngledDirIdx = 0;
unsigned SystemDirIdx = 0;
bool NoCurDirSearch = false;
@@ -269,15 +279,17 @@ public:
DiagnosticsEngine &getDiags() const { return Diags; }
/// Interface for setting the file search paths.
- void SetSearchPaths(const std::vector<DirectoryLookup> &dirs,
- unsigned angledDirIdx, unsigned systemDirIdx,
- bool noCurDirSearch) {
+ void SetSearchPaths(std::vector<DirectoryLookup> dirs, unsigned angledDirIdx,
+ unsigned systemDirIdx, bool noCurDirSearch,
+ llvm::DenseMap<unsigned, unsigned> searchDirToHSEntry) {
assert(angledDirIdx <= systemDirIdx && systemDirIdx <= dirs.size() &&
"Directory indices are unordered");
- SearchDirs = dirs;
+ SearchDirs = std::move(dirs);
+ SearchDirsUsage.assign(SearchDirs.size(), false);
AngledDirIdx = angledDirIdx;
SystemDirIdx = systemDirIdx;
NoCurDirSearch = noCurDirSearch;
+ SearchDirToHSEntry = std::move(searchDirToHSEntry);
//LookupFileCache.clear();
}
@@ -285,6 +297,7 @@ public:
void AddSearchPath(const DirectoryLookup &dir, bool isAngled) {
unsigned idx = isAngled ? SystemDirIdx : AngledDirIdx;
SearchDirs.insert(SearchDirs.begin() + idx, dir);
+ SearchDirsUsage.insert(SearchDirsUsage.begin() + idx, false);
if (!isAngled)
AngledDirIdx++;
SystemDirIdx++;
@@ -430,8 +443,8 @@ public:
/// \return false if \#including the file will have no effect or true
/// if we should include it.
bool ShouldEnterIncludeFile(Preprocessor &PP, const FileEntry *File,
- bool isImport, bool ModulesEnabled,
- Module *M);
+ bool isImport, bool ModulesEnabled, Module *M,
+ bool &IsFirstIncludeOfFile);
/// Return whether the specified file is a normal header,
/// a system header, or a C++ friendly system header.
@@ -439,11 +452,10 @@ public:
return (SrcMgr::CharacteristicKind)getFileInfo(File).DirInfo;
}
- /// Mark the specified file as a "once only" file, e.g. due to
+ /// Mark the specified file as a "once only" file due to
/// \#pragma once.
void MarkFileIncludeOnce(const FileEntry *File) {
HeaderFileInfo &FI = getFileInfo(File);
- FI.isImport = true;
FI.isPragmaOnce = true;
}
@@ -453,6 +465,10 @@ public:
getFileInfo(File).DirInfo = SrcMgr::C_System;
}
+ void AddFileAlias(const FileEntry *File, StringRef Alias) {
+ getFileInfo(File).Aliases.insert(Alias);
+ }
+
/// Mark the specified file as part of a module.
void MarkFileModuleHeader(const FileEntry *FE,
ModuleMap::ModuleHeaderRole Role,
@@ -473,11 +489,6 @@ public:
getFileInfo(File).ControllingMacro = ControllingMacro;
}
- /// Return true if this is the first time encountering this header.
- bool FirstTimeLexingFile(const FileEntry *File) {
- return getFileInfo(File).NumIncludes == 1;
- }
-
/// Determine whether this file is intended to be safe from
/// multiple inclusions, e.g., it has \#pragma once or a controlling
/// macro.
@@ -485,13 +496,16 @@ public:
/// This routine does not consider the effect of \#import
bool isFileMultipleIncludeGuarded(const FileEntry *File);
- /// Determine whether the given file is known to have ever been \#imported
- /// (or if it has been \#included and we've encountered a \#pragma once).
+ /// Determine whether the given file is known to have ever been \#imported.
bool hasFileBeenImported(const FileEntry *File) {
const HeaderFileInfo *FI = getExistingFileInfo(File);
return FI && FI->isImport;
}
+ /// Determine which HeaderSearchOptions::UserEntries have been successfully
+ /// used so far and mark their index with 'true' in the resulting bit vector.
+ std::vector<bool> computeUserEntryUsage() const;
+
/// This method returns a HeaderMap for the specified
/// FileEntry, uniquing them through the 'HeaderMaps' datastructure.
const HeaderMap *CreateHeaderMap(const FileEntry *FE);
@@ -547,6 +561,8 @@ public:
///
/// \param ModuleName The name of the module we're looking for.
///
+ /// \param ImportLoc Location of the module include/import.
+ ///
/// \param AllowSearch Whether we are allowed to search in the various
/// search directories to produce a module definition. If not, this lookup
/// will only return an already-known module.
@@ -555,7 +571,9 @@ public:
/// in subdirectories.
///
/// \returns The module with the given name.
- Module *lookupModule(StringRef ModuleName, bool AllowSearch = true,
+ Module *lookupModule(StringRef ModuleName,
+ SourceLocation ImportLoc = SourceLocation(),
+ bool AllowSearch = true,
bool AllowExtraModuleMapSearch = false);
/// Try to find a module map file in the given directory, returning
@@ -625,11 +643,14 @@ private:
/// but for compatibility with some buggy frameworks, additional attempts
/// may be made to find the module under a related-but-different search-name.
///
+ /// \param ImportLoc Location of the module include/import.
+ ///
/// \param AllowExtraModuleMapSearch Whether we allow to search modulemaps
/// in subdirectories.
///
/// \returns The module named ModuleName.
Module *lookupModule(StringRef ModuleName, StringRef SearchName,
+ SourceLocation ImportLoc,
bool AllowExtraModuleMapSearch = false);
/// Retrieve the name of the (to-be-)cached module file that should
@@ -694,6 +715,14 @@ private:
Module *RequestingModule,
ModuleMap::KnownHeader *SuggestedModule);
+ /// Cache the result of a successful lookup at the given include location
+ /// using the search path at index `HitIdx`.
+ void cacheLookupSuccess(LookupFileCacheInfo &CacheLookup, unsigned HitIdx,
+ SourceLocation IncludeLoc);
+ /// Note that a lookup at the given include location was successful using the
+ /// search path at index `HitIdx`.
+ void noteLookupUsage(unsigned HitIdx, SourceLocation IncludeLoc);
+
public:
/// Retrieve the module map.
ModuleMap &getModuleMap() { return ModMap; }
@@ -743,6 +772,9 @@ public:
search_dir_iterator system_dir_end() const { return SearchDirs.end(); }
+ /// Get the index of the given search directory.
+ Optional<unsigned> searchDirIdx(const DirectoryLookup &DL) const;
+
/// Retrieve a uniqued framework name.
StringRef getUniqueFrameworkName(StringRef Framework);
diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index 42f3cff8c57a..4efdfc26c3c6 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -14,10 +14,11 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/HashBuilder.h"
#include <cstdint>
+#include <map>
#include <string>
#include <vector>
-#include <map>
namespace clang {
@@ -256,11 +257,23 @@ inline llvm::hash_code hash_value(const HeaderSearchOptions::Entry &E) {
return llvm::hash_combine(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot);
}
+template <typename HasherT, llvm::support::endianness Endianness>
+inline void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ const HeaderSearchOptions::Entry &E) {
+ HBuilder.add(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot);
+}
+
inline llvm::hash_code
hash_value(const HeaderSearchOptions::SystemHeaderPrefix &SHP) {
return llvm::hash_combine(SHP.Prefix, SHP.IsSystemHeader);
}
+template <typename HasherT, llvm::support::endianness Endianness>
+inline void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ const HeaderSearchOptions::SystemHeaderPrefix &SHP) {
+ HBuilder.add(SHP.Prefix, SHP.IsSystemHeader);
+}
+
} // namespace clang
#endif // LLVM_CLANG_LEX_HEADERSEARCHOPTIONS_H
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index a291520ae5ca..ba1706b1d13e 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -128,6 +128,9 @@ class Lexer : public PreprocessorLexer {
bool HasLeadingEmptyMacro;
+ /// True if this is the first time we're lexing the input file.
+ bool IsFirstTimeLexingFile;
+
// NewLinePtr - A pointer to new line character '\n' being lexed. For '\r\n',
// it also points to '\n.'
const char *NewLinePtr;
@@ -142,19 +145,22 @@ public:
/// with the specified preprocessor managing the lexing process. This lexer
/// assumes that the associated file buffer and Preprocessor objects will
/// outlive it, so it doesn't take ownership of either of them.
- Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP);
+ Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP,
+ bool IsFirstIncludeOfFile = true);
/// Lexer constructor - Create a new raw lexer object. This object is only
/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the
/// text range will outlive it, so it doesn't take ownership of it.
Lexer(SourceLocation FileLoc, const LangOptions &LangOpts,
- const char *BufStart, const char *BufPtr, const char *BufEnd);
+ const char *BufStart, const char *BufPtr, const char *BufEnd,
+ bool IsFirstIncludeOfFile = true);
/// Lexer constructor - Create a new raw lexer object. This object is only
/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the
/// text range will outlive it, so it doesn't take ownership of it.
Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,
- const SourceManager &SM, const LangOptions &LangOpts);
+ const SourceManager &SM, const LangOptions &LangOpts,
+ bool IsFirstIncludeOfFile = true);
Lexer(const Lexer &) = delete;
Lexer &operator=(const Lexer &) = delete;
@@ -536,7 +542,8 @@ public:
bool SkipTrailingWhitespaceAndNewLine);
/// Returns true if the given character could appear in an identifier.
- static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts);
+ static bool isAsciiIdentifierContinueChar(char c,
+ const LangOptions &LangOpts);
/// Checks whether new line pointed by Str is preceded by escape
/// sequence.
@@ -562,6 +569,9 @@ public:
static StringRef getIndentationForLine(SourceLocation Loc,
const SourceManager &SM);
+ /// Check if this is the first time we're lexing the input file.
+ bool isFirstTimeLexingFile() const { return IsFirstTimeLexingFile; }
+
private:
//===--------------------------------------------------------------------===//
// Internal implementation interfaces.
@@ -573,10 +583,7 @@ private:
bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
- /// Given that a token begins with the Unicode character \p C, figure out
- /// what kind of token it is and dispatch to the appropriate lexing helper
- /// function.
- bool LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
+ bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);
/// FormTokenWithChars - When we lex a token, we have identified a span
/// starting at BufferPtr, going to TokEnd that forms the token. This method
@@ -701,7 +708,11 @@ private:
bool IsStringLiteral);
// Helper functions to lex the remainder of a token of the specific type.
- bool LexIdentifier (Token &Result, const char *CurPtr);
+
+ // This function handles both ASCII and Unicode identifiers after
+ // the first codepoint of the identifyier has been parsed.
+ bool LexIdentifierContinue(Token &Result, const char *CurPtr);
+
bool LexNumericConstant (Token &Result, const char *CurPtr);
bool LexStringLiteral (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index f131f045a73e..32471969f596 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -224,7 +224,7 @@ class StringLiteralParser {
unsigned UDSuffixOffset;
public:
StringLiteralParser(ArrayRef<Token> StringToks,
- Preprocessor &PP, bool Complain = true);
+ Preprocessor &PP);
StringLiteralParser(ArrayRef<Token> StringToks,
const SourceManager &sm, const LangOptions &features,
const TargetInfo &target,
diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h
index bcf49c577735..76a74f20cc3b 100644
--- a/clang/include/clang/Lex/PPCallbacks.h
+++ b/clang/include/clang/Lex/PPCallbacks.h
@@ -252,9 +252,20 @@ public:
}
/// Callback invoked when a \#pragma warning directive is read.
- virtual void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
- ArrayRef<int> Ids) {
- }
+ enum PragmaWarningSpecifier {
+ PWS_Default,
+ PWS_Disable,
+ PWS_Error,
+ PWS_Once,
+ PWS_Suppress,
+ PWS_Level1,
+ PWS_Level2,
+ PWS_Level3,
+ PWS_Level4,
+ };
+ virtual void PragmaWarning(SourceLocation Loc,
+ PragmaWarningSpecifier WarningSpec,
+ ArrayRef<int> Ids) {}
/// Callback invoked when a \#pragma warning(push) directive is read.
virtual void PragmaWarningPush(SourceLocation Loc, int Level) {
@@ -492,6 +503,11 @@ public:
Second->PragmaComment(Loc, Kind, Str);
}
+ void PragmaMark(SourceLocation Loc, StringRef Trivia) override {
+ First->PragmaMark(Loc, Trivia);
+ Second->PragmaMark(Loc, Trivia);
+ }
+
void PragmaDetectMismatch(SourceLocation Loc, StringRef Name,
StringRef Value) override {
First->PragmaDetectMismatch(Loc, Name, Value);
@@ -535,7 +551,7 @@ public:
Second->PragmaOpenCLExtension(NameLoc, Name, StateLoc, State);
}
- void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
+ void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
ArrayRef<int> Ids) override {
First->PragmaWarning(Loc, WarningSpec, Ids);
Second->PragmaWarning(Loc, WarningSpec, Ids);
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 7ab13640ce2c..ea96bb12bec6 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -15,6 +15,7 @@
#define LLVM_CLANG_LEX_PREPROCESSOR_H
#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/LangOptions.h"
@@ -786,6 +787,42 @@ private:
using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
+ /// This is a pair of an optional message and source location used for pragmas
+ /// that annotate macros like pragma clang restrict_expansion and pragma clang
+ /// deprecated. This pair stores the optional message and the location of the
+ /// annotation pragma for use producing diagnostics and notes.
+ using MsgLocationPair = std::pair<std::string, SourceLocation>;
+
+ struct MacroAnnotationInfo {
+ SourceLocation Location;
+ std::string Message;
+ };
+
+ struct MacroAnnotations {
+ llvm::Optional<MacroAnnotationInfo> DeprecationInfo;
+ llvm::Optional<MacroAnnotationInfo> RestrictExpansionInfo;
+ llvm::Optional<SourceLocation> FinalAnnotationLoc;
+
+ static MacroAnnotations makeDeprecation(SourceLocation Loc,
+ std::string Msg) {
+ return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
+ llvm::None, llvm::None};
+ }
+
+ static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
+ std::string Msg) {
+ return MacroAnnotations{
+ llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None};
+ }
+
+ static MacroAnnotations makeFinal(SourceLocation Loc) {
+ return MacroAnnotations{llvm::None, llvm::None, Loc};
+ }
+ };
+
+ /// Warning information for macro annotations.
+ llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
+
/// A "freelist" of MacroArg objects that can be
/// reused for quick allocation.
MacroArgs *MacroArgCache = nullptr;
@@ -1331,7 +1368,7 @@ public:
///
/// Emits a diagnostic, doesn't enter the file, and returns true on error.
bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
- SourceLocation Loc);
+ SourceLocation Loc, bool IsFirstIncludeOfFile = true);
/// Add a Macro to the top of the include stack and start lexing
/// tokens from it instead of the current buffer.
@@ -1953,7 +1990,8 @@ public:
/// This either returns the EOF token and returns true, or
/// pops a level off the include stack and returns false, at which point the
/// client should call lex again.
- bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
+ bool HandleEndOfFile(Token &Result, SourceLocation Loc,
+ bool isEndOfMacro = false);
/// Callback invoked when the current TokenLexer hits the end of its
/// token stream.
@@ -2363,12 +2401,14 @@ private:
// Pragmas.
void HandlePragmaDirective(PragmaIntroducer Introducer);
+ void ResolvePragmaIncludeInstead(SourceLocation Location) const;
public:
void HandlePragmaOnce(Token &OnceTok);
void HandlePragmaMark(Token &MarkTok);
void HandlePragmaPoison();
void HandlePragmaSystemHeader(Token &SysHeaderTok);
+ void HandlePragmaIncludeInstead(Token &Tok);
void HandlePragmaDependency(Token &DependencyTok);
void HandlePragmaPushMacro(Token &Tok);
void HandlePragmaPopMacro(Token &Tok);
@@ -2385,7 +2425,57 @@ public:
/// warnings.
void markMacroAsUsed(MacroInfo *MI);
+ void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
+ SourceLocation AnnotationLoc) {
+ auto Annotations = AnnotationInfos.find(II);
+ if (Annotations == AnnotationInfos.end())
+ AnnotationInfos.insert(std::make_pair(
+ II,
+ MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg))));
+ else
+ Annotations->second.DeprecationInfo =
+ MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
+ }
+
+ void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
+ SourceLocation AnnotationLoc) {
+ auto Annotations = AnnotationInfos.find(II);
+ if (Annotations == AnnotationInfos.end())
+ AnnotationInfos.insert(
+ std::make_pair(II, MacroAnnotations::makeRestrictExpansion(
+ AnnotationLoc, std::move(Msg))));
+ else
+ Annotations->second.RestrictExpansionInfo =
+ MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
+ }
+
+ void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
+ auto Annotations = AnnotationInfos.find(II);
+ if (Annotations == AnnotationInfos.end())
+ AnnotationInfos.insert(
+ std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc)));
+ else
+ Annotations->second.FinalAnnotationLoc = AnnotationLoc;
+ }
+
+ const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
+ return AnnotationInfos.find(II)->second;
+ }
+
+ void emitMacroExpansionWarnings(const Token &Identifier) const {
+ if (Identifier.getIdentifierInfo()->isDeprecatedMacro())
+ emitMacroDeprecationWarning(Identifier);
+
+ if (Identifier.getIdentifierInfo()->isRestrictExpansion() &&
+ !SourceMgr.isInMainFile(Identifier.getLocation()))
+ emitRestrictExpansionWarning(Identifier);
+ }
+
private:
+ void emitMacroDeprecationWarning(const Token &Identifier) const;
+ void emitRestrictExpansionWarning(const Token &Identifier) const;
+ void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
+
Optional<unsigned>
getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
diff --git a/clang/include/clang/Lex/PreprocessorLexer.h b/clang/include/clang/Lex/PreprocessorLexer.h
index 03b1cc2c10e2..b43197a6031c 100644
--- a/clang/include/clang/Lex/PreprocessorLexer.h
+++ b/clang/include/clang/Lex/PreprocessorLexer.h
@@ -14,11 +14,13 @@
#ifndef LLVM_CLANG_LEX_PREPROCESSORLEXER_H
#define LLVM_CLANG_LEX_PREPROCESSORLEXER_H
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/MultipleIncludeOpt.h"
#include "clang/Lex/Token.h"
-#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include <cassert>
namespace clang {
@@ -74,6 +76,13 @@ protected:
/// we are currently in.
SmallVector<PPConditionalInfo, 4> ConditionalStack;
+ struct IncludeInfo {
+ const FileEntry *File;
+ SourceLocation Location;
+ };
+ // A complete history of all the files included by the current file.
+ llvm::StringMap<IncludeInfo> IncludeHistory;
+
PreprocessorLexer() : FID() {}
PreprocessorLexer(Preprocessor *pp, FileID fid);
virtual ~PreprocessorLexer() = default;
@@ -175,6 +184,15 @@ public:
ConditionalStack.clear();
ConditionalStack.append(CL.begin(), CL.end());
}
+
+ void addInclude(StringRef Filename, const FileEntry &File,
+ SourceLocation Location) {
+ IncludeHistory.insert({Filename, {&File, Location}});
+ }
+
+ const llvm::StringMap<IncludeInfo> &getIncludeHistory() const {
+ return IncludeHistory;
+ }
};
} // namespace clang
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h
index 99085b98fc7a..a7aabc3e1df2 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -199,9 +199,6 @@ public:
/// build it again.
std::shared_ptr<FailedModulesSet> FailedModules;
- /// A prefix map for __FILE__ and __BASE_FILE__.
- std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap;
-
/// Contains the currently active skipped range mappings for skipping excluded
/// conditional directives.
///
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 8eb3f9029d9d..92a703b42173 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -196,6 +196,7 @@ class Parser : public CodeCompletionHandler {
std::unique_ptr<PragmaHandler> MSRuntimeChecks;
std::unique_ptr<PragmaHandler> MSIntrinsic;
std::unique_ptr<PragmaHandler> MSOptimize;
+ std::unique_ptr<PragmaHandler> MSFenvAccess;
std::unique_ptr<PragmaHandler> CUDAForceHostDeviceHandler;
std::unique_ptr<PragmaHandler> OptimizeHandler;
std::unique_ptr<PragmaHandler> LoopHintHandler;
@@ -1977,6 +1978,9 @@ private:
Sema::ConditionKind CK,
ForRangeInfo *FRI = nullptr,
bool EnterForConditionScope = false);
+ DeclGroupPtrTy
+ ParseAliasDeclarationInInitStatement(DeclaratorContext Context,
+ ParsedAttributesWithRange &Attrs);
//===--------------------------------------------------------------------===//
// C++ Coroutines
@@ -2396,7 +2400,8 @@ private:
if (getLangOpts().OpenMP)
Actions.startOpenMPLoop();
if (getLangOpts().CPlusPlus)
- return isCXXSimpleDeclaration(/*AllowForRangeDecl=*/true);
+ return Tok.is(tok::kw_using) ||
+ isCXXSimpleDeclaration(/*AllowForRangeDecl=*/true);
return isDeclarationSpecifier(true);
}
@@ -2834,7 +2839,10 @@ private:
SourceLocation ScopeLoc,
CachedTokens &OpenMPTokens);
- IdentifierInfo *TryParseCXX11AttributeIdentifier(SourceLocation &Loc);
+ IdentifierInfo *TryParseCXX11AttributeIdentifier(
+ SourceLocation &Loc,
+ Sema::AttributeCompletion Completion = Sema::AttributeCompletion::None,
+ const IdentifierInfo *EnclosingScope = nullptr);
void MaybeParseMicrosoftAttributes(ParsedAttributes &attrs,
SourceLocation *endLoc = nullptr) {
@@ -3197,6 +3205,10 @@ private:
/// Parses OpenMP context selectors.
bool parseOMPContextSelectors(SourceLocation Loc, OMPTraitInfo &TI);
+ /// Parse an 'append_args' clause for '#pragma omp declare variant'.
+ bool parseOpenMPAppendArgs(
+ SmallVectorImpl<OMPDeclareVariantAttr::InteropType> &InterOpTypes);
+
/// Parse a `match` clause for an '#pragma omp declare variant'. Return true
/// if there was an error.
bool parseOMPDeclareVariantMatchClause(SourceLocation Loc, OMPTraitInfo &TI,
diff --git a/clang/include/clang/Sema/CodeCompleteConsumer.h b/clang/include/clang/Sema/CodeCompleteConsumer.h
index 87646ab95025..6b37e3c50dba 100644
--- a/clang/include/clang/Sema/CodeCompleteConsumer.h
+++ b/clang/include/clang/Sema/CodeCompleteConsumer.h
@@ -329,6 +329,9 @@ public:
/// Code completion inside the filename part of a #include directive.
CCC_IncludedFile,
+ /// Code completion of an attribute name.
+ CCC_Attribute,
+
/// An unknown context, in which we are recovering from a parsing
/// error and don't know which completions we should give.
CCC_Recovery
diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index 423f4f4ee7b7..ed5be2da3acd 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -275,6 +275,7 @@ public:
static const TST TST_accum = clang::TST_Accum;
static const TST TST_fract = clang::TST_Fract;
static const TST TST_float128 = clang::TST_float128;
+ static const TST TST_ibm128 = clang::TST_ibm128;
static const TST TST_bool = clang::TST_bool;
static const TST TST_decimal32 = clang::TST_decimal32;
static const TST TST_decimal64 = clang::TST_decimal64;
diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index 8feb66995f95..679e12ee22d4 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -298,8 +298,8 @@ public:
/// Create the initialization entity for the result of a function.
static InitializedEntity InitializeResult(SourceLocation ReturnLoc,
- QualType Type, bool NRVO) {
- return InitializedEntity(EK_Result, ReturnLoc, Type, NRVO);
+ QualType Type) {
+ return InitializedEntity(EK_Result, ReturnLoc, Type);
}
static InitializedEntity InitializeStmtExprResult(SourceLocation ReturnLoc,
@@ -308,20 +308,20 @@ public:
}
static InitializedEntity InitializeBlock(SourceLocation BlockVarLoc,
- QualType Type, bool NRVO) {
- return InitializedEntity(EK_BlockElement, BlockVarLoc, Type, NRVO);
+ QualType Type) {
+ return InitializedEntity(EK_BlockElement, BlockVarLoc, Type);
}
static InitializedEntity InitializeLambdaToBlock(SourceLocation BlockVarLoc,
- QualType Type, bool NRVO) {
+ QualType Type) {
return InitializedEntity(EK_LambdaToBlockConversionBlockElement,
- BlockVarLoc, Type, NRVO);
+ BlockVarLoc, Type);
}
/// Create the initialization entity for an exception object.
static InitializedEntity InitializeException(SourceLocation ThrowLoc,
- QualType Type, bool NRVO) {
- return InitializedEntity(EK_Exception, ThrowLoc, Type, NRVO);
+ QualType Type) {
+ return InitializedEntity(EK_Exception, ThrowLoc, Type);
}
/// Create the initialization entity for an object allocated via new.
@@ -335,8 +335,15 @@ public:
}
/// Create the initialization entity for a temporary.
- static InitializedEntity InitializeTemporary(TypeSourceInfo *TypeInfo) {
- return InitializeTemporary(TypeInfo, TypeInfo->getType());
+ static InitializedEntity InitializeTemporary(ASTContext &Context,
+ TypeSourceInfo *TypeInfo) {
+ QualType Type = TypeInfo->getType();
+ if (Context.getLangOpts().OpenCLCPlusPlus) {
+ assert(!Type.hasAddressSpace() && "Temporary already has address space!");
+ Type = Context.getAddrSpaceQualType(Type, LangAS::opencl_private);
+ }
+
+ return InitializeTemporary(TypeInfo, Type);
}
/// Create the initialization entity for a temporary.
@@ -481,7 +488,7 @@ public:
/// Determine whether this is an array new with an unknown bound.
bool isVariableLengthArrayNew() const {
- return getKind() == EK_New && dyn_cast_or_null<IncompleteArrayType>(
+ return getKind() == EK_New && isa_and_nonnull<IncompleteArrayType>(
getType()->getAsArrayTypeUnsafe());
}
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index 82661cb3d12a..88405a63b735 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -469,7 +469,9 @@ class Sema;
unrelated_class,
bad_qualifiers,
lvalue_ref_to_rvalue,
- rvalue_ref_to_lvalue
+ rvalue_ref_to_lvalue,
+ too_few_initializers,
+ too_many_initializers,
};
// This can be null, e.g. for implicit object arguments.
@@ -533,11 +535,17 @@ class Sema;
};
/// ConversionKind - The kind of implicit conversion sequence.
- unsigned ConversionKind : 30;
+ unsigned ConversionKind : 31;
- /// Whether the target is really a std::initializer_list, and the
- /// sequence only represents the worst element conversion.
- unsigned StdInitializerListElement : 1;
+ // Whether the initializer list was of an incomplete array.
+ unsigned InitializerListOfIncompleteArray : 1;
+
+ /// When initializing an array or std::initializer_list from an
+ /// initializer-list, this is the array or std::initializer_list type being
+ /// initialized. The remainder of the conversion sequence, including ToType,
+ /// describe the worst conversion of an initializer to an element of the
+ /// array or std::initializer_list. (Note, 'worst' is not well defined.)
+ QualType InitializerListContainerType;
void setKind(Kind K) {
destruct();
@@ -568,13 +576,17 @@ class Sema;
};
ImplicitConversionSequence()
- : ConversionKind(Uninitialized), StdInitializerListElement(false) {
+ : ConversionKind(Uninitialized),
+ InitializerListOfIncompleteArray(false),
+ InitializerListContainerType() {
Standard.setAsIdentityConversion();
}
ImplicitConversionSequence(const ImplicitConversionSequence &Other)
: ConversionKind(Other.ConversionKind),
- StdInitializerListElement(Other.StdInitializerListElement) {
+ InitializerListOfIncompleteArray(
+ Other.InitializerListOfIncompleteArray),
+ InitializerListContainerType(Other.InitializerListContainerType) {
switch (ConversionKind) {
case Uninitialized: break;
case StandardConversion: Standard = Other.Standard; break;
@@ -670,14 +682,22 @@ class Sema;
Standard.setAllToTypes(T);
}
- /// Whether the target is really a std::initializer_list, and the
- /// sequence only represents the worst element conversion.
- bool isStdInitializerListElement() const {
- return StdInitializerListElement;
+ // True iff this is a conversion sequence from an initializer list to an
+ // array or std::initializer.
+ bool hasInitializerListContainerType() const {
+ return !InitializerListContainerType.isNull();
}
-
- void setStdInitializerListElement(bool V = true) {
- StdInitializerListElement = V;
+ void setInitializerListContainerType(QualType T, bool IA) {
+ InitializerListContainerType = T;
+ InitializerListOfIncompleteArray = IA;
+ }
+ bool isInitializerListOfIncompleteArray() const {
+ return InitializerListOfIncompleteArray;
+ }
+ QualType getInitializerListContainerType() const {
+ assert(hasInitializerListContainerType() &&
+ "not initializer list container");
+ return InitializerListContainerType;
}
/// Form an "implicit" conversion sequence from nullptr_t to bool, for a
@@ -1184,6 +1204,20 @@ class Sema;
return Info;
}
+ // Returns false if signature help is relevant despite number of arguments
+ // exceeding parameters. Specifically, it returns false when
+ // PartialOverloading is true and one of the following:
+ // * Function is variadic
+ // * Function is template variadic
+ // * Function is an instantiation of template variadic function
+ // The last case may seem strange. The idea is that if we added one more
+ // argument, we'd end up with a function similar to Function. Since, in the
+ // context of signature help and/or code completion, we do not know what the
+ // type of the next argument (that the user is typing) will be, this is as
+ // good candidate as we can get, despite the fact that it takes one less
+ // parameter.
+ bool shouldEnforceArgLimit(bool PartialOverloading, FunctionDecl *Function);
+
} // namespace clang
#endif // LLVM_CLANG_SEMA_OVERLOAD_H
diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h
index f47f557adeb1..ff2303c84bd2 100644
--- a/clang/include/clang/Sema/ParsedAttr.h
+++ b/clang/include/clang/Sema/ParsedAttr.h
@@ -67,6 +67,8 @@ struct ParsedAttrInfo {
const char *NormalizedFullName;
};
ArrayRef<Spelling> Spellings;
+ // The names of the known arguments of this attribute.
+ ArrayRef<const char *> ArgNames;
ParsedAttrInfo(AttributeCommonInfo::Kind AttrKind =
AttributeCommonInfo::NoSemaHandlerAttribute)
@@ -92,11 +94,9 @@ struct ParsedAttrInfo {
const Decl *D) const {
return true;
}
- /// Check if this attribute is allowed by the language we are compiling, and
- /// issue a diagnostic if not.
- virtual bool diagLangOpts(Sema &S, const ParsedAttr &Attr) const {
- return true;
- }
+ /// Check if this attribute is allowed by the language we are compiling.
+ virtual bool acceptsLangOpts(const LangOptions &LO) const { return true; }
+
/// Check if this attribute is allowed when compiling for the given target.
virtual bool existsInTarget(const TargetInfo &Target) const {
return true;
@@ -125,6 +125,7 @@ struct ParsedAttrInfo {
}
static const ParsedAttrInfo &get(const AttributeCommonInfo &A);
+ static ArrayRef<const ParsedAttrInfo *> getAllBuiltin();
};
typedef llvm::Registry<ParsedAttrInfo> ParsedAttrInfoRegistry;
@@ -628,7 +629,7 @@ public:
/// a Spelling enumeration, the value UINT_MAX is returned.
unsigned getSemanticSpelling() const;
- /// If this is an OpenCL address space attribute returns its representation
+ /// If this is an OpenCL address space attribute, returns its representation
/// in LangAS, otherwise returns default address space.
LangAS asOpenCLLangAS() const {
switch (getParsedKind()) {
@@ -651,7 +652,7 @@ public:
}
}
- /// If this is an OpenCL address space attribute returns its SYCL
+ /// If this is an OpenCL address space attribute, returns its SYCL
/// representation in LangAS, otherwise returns default address space.
LangAS asSYCLLangAS() const {
switch (getKind()) {
@@ -893,7 +894,7 @@ public:
ParsedAttr> {
iterator() : iterator_adaptor_base(nullptr) {}
iterator(VecTy::iterator I) : iterator_adaptor_base(I) {}
- reference operator*() { return **I; }
+ reference operator*() const { return **I; }
friend class ParsedAttributesView;
};
struct const_iterator
@@ -1118,14 +1119,14 @@ enum AttributeDeclKind {
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
const ParsedAttr &At) {
- DB.AddTaggedVal(reinterpret_cast<intptr_t>(At.getAttrName()),
+ DB.AddTaggedVal(reinterpret_cast<uint64_t>(At.getAttrName()),
DiagnosticsEngine::ak_identifierinfo);
return DB;
}
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
const ParsedAttr *At) {
- DB.AddTaggedVal(reinterpret_cast<intptr_t>(At->getAttrName()),
+ DB.AddTaggedVal(reinterpret_cast<uint64_t>(At->getAttrName()),
DiagnosticsEngine::ak_identifierinfo);
return DB;
}
@@ -1140,7 +1141,7 @@ template <typename ACI,
std::is_same<ACI, AttributeCommonInfo>::value, int> = 0>
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
const ACI &CI) {
- DB.AddTaggedVal(reinterpret_cast<intptr_t>(CI.getAttrName()),
+ DB.AddTaggedVal(reinterpret_cast<uint64_t>(CI.getAttrName()),
DiagnosticsEngine::ak_identifierinfo);
return DB;
}
@@ -1150,7 +1151,7 @@ template <typename ACI,
std::is_same<ACI, AttributeCommonInfo>::value, int> = 0>
inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
const ACI* CI) {
- DB.AddTaggedVal(reinterpret_cast<intptr_t>(CI->getAttrName()),
+ DB.AddTaggedVal(reinterpret_cast<uint64_t>(CI->getAttrName()),
DiagnosticsEngine::ak_identifierinfo);
return DB;
}
diff --git a/clang/include/clang/Sema/Scope.h b/clang/include/clang/Sema/Scope.h
index b499ba1e7c2a..872951a0829b 100644
--- a/clang/include/clang/Sema/Scope.h
+++ b/clang/include/clang/Sema/Scope.h
@@ -337,7 +337,7 @@ public:
/// isDeclScope - Return true if this is the scope that the specified decl is
/// declared in.
- bool isDeclScope(const Decl *D) const { return DeclsInScope.count(D) != 0; }
+ bool isDeclScope(const Decl *D) const { return DeclsInScope.contains(D); }
/// Get the entity corresponding to this scope.
DeclContext *getEntity() const {
diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 98ed75acd9d2..ccd15ea6a818 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -175,8 +175,9 @@ public:
/// First 'return' statement in the current function.
SourceLocation FirstReturnLoc;
- /// First C++ 'try' statement in the current function.
- SourceLocation FirstCXXTryLoc;
+ /// First C++ 'try' or ObjC @try statement in the current function.
+ SourceLocation FirstCXXOrObjCTryLoc;
+ enum { TryLocIsCXX, TryLocIsObjC, Unknown } FirstTryType = Unknown;
/// First SEH '__try' statement in the current function.
SourceLocation FirstSEHTryLoc;
@@ -446,7 +447,14 @@ public:
void setHasCXXTry(SourceLocation TryLoc) {
setHasBranchProtectedScope();
- FirstCXXTryLoc = TryLoc;
+ FirstCXXOrObjCTryLoc = TryLoc;
+ FirstTryType = TryLocIsCXX;
+ }
+
+ void setHasObjCTry(SourceLocation TryLoc) {
+ setHasBranchProtectedScope();
+ FirstCXXOrObjCTryLoc = TryLoc;
+ FirstTryType = TryLocIsObjC;
}
void setHasSEHTry(SourceLocation TryLoc) {
@@ -1001,10 +1009,7 @@ public:
return NonODRUsedCapturingExprs.count(CapturingVarExpr);
}
void removePotentialCapture(Expr *E) {
- PotentiallyCapturingExprs.erase(
- std::remove(PotentiallyCapturingExprs.begin(),
- PotentiallyCapturingExprs.end(), E),
- PotentiallyCapturingExprs.end());
+ llvm::erase_value(PotentiallyCapturingExprs, E);
}
void clearPotentialCaptures() {
PotentiallyCapturingExprs.clear();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 83a2d132bf6a..43ce5d983217 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -74,7 +74,6 @@
namespace llvm {
class APSInt;
- template <typename ValueT> struct DenseMapInfo;
template <typename ValueT, typename ValueInfoT> class DenseSet;
class SmallBitVector;
struct InlineAsmIdentifierInfo;
@@ -396,8 +395,8 @@ public:
///
/// This is the greatest alignment value supported by load, store, and alloca
/// instructions, and global values.
- static const unsigned MaxAlignmentExponent = 29;
- static const unsigned MaximumAlignment = 1u << MaxAlignmentExponent;
+ static const unsigned MaxAlignmentExponent = 32;
+ static const uint64_t MaximumAlignment = 1ull << MaxAlignmentExponent;
typedef OpaquePtr<DeclGroupRef> DeclGroupPtrTy;
typedef OpaquePtr<TemplateName> TemplateTy;
@@ -914,10 +913,6 @@ public:
OpaqueParser = P;
}
- // Does the work necessary to deal with a SYCL kernel lambda. At the moment,
- // this just marks the list of lambdas required to name the kernel.
- void AddSYCLKernelLambda(const FunctionDecl *FD);
-
class DelayedDiagnostics;
class DelayedDiagnosticsState {
@@ -1128,6 +1123,10 @@ public:
/// The C++ "std::coroutine_traits" template, which is defined in
/// \<coroutine_traits>
ClassTemplateDecl *StdCoroutineTraitsCache;
+ /// The namespace where coroutine components are defined. In standard,
+ /// they are defined in std namespace. And in the previous implementation,
+ /// they are defined in std::experimental namespace.
+ NamespaceDecl *CoroTraitsNamespaceCache;
/// The C++ "type_info" declaration, which is defined in \<typeinfo>.
RecordDecl *CXXTypeInfoDecl;
@@ -1218,6 +1217,11 @@ public:
/// cases in a switch statement).
ConstantEvaluated,
+ /// In addition of being constant evaluated, the current expression
+ /// occurs in an immediate function context - either a consteval function
+ /// or a consteval if function.
+ ImmediateFunctionContext,
+
/// The current expression is potentially evaluated at run time,
/// which means that code may be generated to evaluate the value of the
/// expression at run time.
@@ -1306,8 +1310,14 @@ public:
Context == ExpressionEvaluationContext::UnevaluatedAbstract ||
Context == ExpressionEvaluationContext::UnevaluatedList;
}
+
bool isConstantEvaluated() const {
- return Context == ExpressionEvaluationContext::ConstantEvaluated;
+ return Context == ExpressionEvaluationContext::ConstantEvaluated ||
+ Context == ExpressionEvaluationContext::ImmediateFunctionContext;
+ }
+
+ bool isImmediateFunctionContext() const {
+ return Context == ExpressionEvaluationContext::ImmediateFunctionContext;
}
};
@@ -1419,8 +1429,22 @@ public:
const llvm::MapVector<FieldDecl *, DeleteLocs> &
getMismatchingDeleteExpressions() const;
- typedef std::pair<ObjCMethodList, ObjCMethodList> GlobalMethods;
- typedef llvm::DenseMap<Selector, GlobalMethods> GlobalMethodPool;
+ class GlobalMethodPool {
+ public:
+ using Lists = std::pair<ObjCMethodList, ObjCMethodList>;
+ using iterator = llvm::DenseMap<Selector, Lists>::iterator;
+ iterator begin() { return Methods.begin(); }
+ iterator end() { return Methods.end(); }
+ iterator find(Selector Sel) { return Methods.find(Sel); }
+ std::pair<iterator, bool> insert(std::pair<Selector, Lists> &&Val) {
+ return Methods.insert(Val);
+ }
+ int count(Selector Sel) const { return Methods.count(Sel); }
+ bool empty() const { return Methods.empty(); }
+
+ private:
+ llvm::DenseMap<Selector, Lists> Methods;
+ };
/// Method Pool - allows efficient lookup when typechecking messages to "id".
/// We need to maintain a list, since selectors can have differing signatures
@@ -2334,12 +2358,13 @@ public:
const CXXScopeSpec &SS, QualType T,
TagDecl *OwnedTagDecl = nullptr);
- QualType getDecltypeForParenthesizedExpr(Expr *E);
- QualType BuildTypeofExprType(Expr *E, SourceLocation Loc);
+ // Returns the underlying type of a decltype with the given expression.
+ QualType getDecltypeForExpr(Expr *E);
+
+ QualType BuildTypeofExprType(Expr *E);
/// If AsUnevaluated is false, E is treated as though it were an evaluated
/// context, such as when building a type for decltype(auto).
- QualType BuildDecltypeType(Expr *E, SourceLocation Loc,
- bool AsUnevaluated = true);
+ QualType BuildDecltypeType(Expr *E, bool AsUnevaluated = true);
QualType BuildUnaryTransformType(QualType BaseType,
UnaryTransformType::UTTKind UKind,
SourceLocation Loc);
@@ -3343,6 +3368,8 @@ public:
const AttributeCommonInfo &CI,
bool BestCase,
MSInheritanceModel Model);
+ ErrorAttr *mergeErrorAttr(Decl *D, const AttributeCommonInfo &CI,
+ StringRef NewUserDiagnostic);
FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
IdentifierInfo *Format, int FormatIdx,
int FirstArg);
@@ -3368,6 +3395,7 @@ public:
EnforceTCBAttr *mergeEnforceTCBAttr(Decl *D, const EnforceTCBAttr &AL);
EnforceTCBLeafAttr *mergeEnforceTCBLeafAttr(Decl *D,
const EnforceTCBLeafAttr &AL);
+ BTFDeclTagAttr *mergeBTFDeclTagAttr(Decl *D, const BTFDeclTagAttr &AL);
void mergeDeclAttributes(NamedDecl *New, Decl *Old,
AvailabilityMergeKind AMK = AMK_Redeclaration);
@@ -3475,7 +3503,7 @@ public:
bool IsFunctionConversion(QualType FromType, QualType ToType,
QualType &ResultTy);
bool DiagnoseMultipleUserDefinedConversion(Expr *From, QualType ToType);
- bool isSameOrCompatibleFunctionType(CanQualType Param, CanQualType Arg);
+ bool isSameOrCompatibleFunctionType(QualType Param, QualType Arg);
bool CanPerformAggregateInitializationForOverloadResolution(
const InitializedEntity &Entity, InitListExpr *From);
@@ -3504,11 +3532,12 @@ public:
/// Contexts in which a converted constant expression is required.
enum CCEKind {
- CCEK_CaseValue, ///< Expression in a case label.
- CCEK_Enumerator, ///< Enumerator value with fixed underlying type.
- CCEK_TemplateArg, ///< Value of a non-type template parameter.
- CCEK_ArrayBound, ///< Array bound in array declarator or new-expression.
- CCEK_ExplicitBool ///< Condition in an explicit(bool) specifier.
+ CCEK_CaseValue, ///< Expression in a case label.
+ CCEK_Enumerator, ///< Enumerator value with fixed underlying type.
+ CCEK_TemplateArg, ///< Value of a non-type template parameter.
+ CCEK_ArrayBound, ///< Array bound in array declarator or new-expression.
+ CCEK_ExplicitBool, ///< Condition in an explicit(bool) specifier.
+ CCEK_Noexcept ///< Condition in a noexcept(bool) specifier.
};
ExprResult CheckConvertedConstantExpression(Expr *From, QualType T,
llvm::APSInt &Value, CCEKind CCE);
@@ -3888,6 +3917,8 @@ public:
SourceLocation LParenLoc,
MultiExprArg Args,
SourceLocation RParenLoc,
+ Expr *ExecConfig = nullptr,
+ bool IsExecConfig = false,
bool AllowRecovery = false);
ExprResult
BuildCallToObjectOfClassType(Scope *S, Expr *Object, SourceLocation LParenLoc,
@@ -4690,11 +4721,12 @@ public:
Stmt *SubStmt);
class ConditionResult;
- StmtResult ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+
+ StmtResult ActOnIfStmt(SourceLocation IfLoc, IfStatementKind StatementKind,
SourceLocation LParenLoc, Stmt *InitStmt,
ConditionResult Cond, SourceLocation RParenLoc,
Stmt *ThenVal, SourceLocation ElseLoc, Stmt *ElseVal);
- StmtResult BuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+ StmtResult BuildIfStmt(SourceLocation IfLoc, IfStatementKind StatementKind,
SourceLocation LParenLoc, Stmt *InitStmt,
ConditionResult Cond, SourceLocation RParenLoc,
Stmt *ThenVal, SourceLocation ElseLoc, Stmt *ElseVal);
@@ -4893,7 +4925,7 @@ public:
/// DiagnoseUnusedExprResult - If the statement passed in is an expression
/// whose result is unused, warn.
- void DiagnoseUnusedExprResult(const Stmt *S);
+ void DiagnoseUnusedExprResult(const Stmt *S, unsigned DiagID);
void DiagnoseUnusedNestedTypedefs(const RecordDecl *D);
void DiagnoseUnusedDecl(const NamedDecl *ND);
@@ -5082,7 +5114,8 @@ public:
/// type -- entities referenced by the type are now referenced.
void MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T);
void MarkDeclarationsReferencedInExpr(Expr *E,
- bool SkipLocalVariables = false);
+ bool SkipLocalVariables = false,
+ ArrayRef<const Expr *> StopAt = None);
/// Try to recover by turning the given expression into a
/// call. Returns true if recovery was attempted or an error was
@@ -5099,6 +5132,16 @@ public:
/// conversion.
ExprResult tryConvertExprToType(Expr *E, QualType Ty);
+ /// Conditionally issue a diagnostic based on the statements's reachability
+ /// analysis.
+ ///
+ /// \param Stmts If Stmts is non-empty, delay reporting the diagnostic until
+ /// the function body is parsed, and then do a basic reachability analysis to
+ /// determine if the statement is reachable. If it is unreachable, the
+ /// diagnostic will not be emitted.
+ bool DiagIfReachable(SourceLocation Loc, ArrayRef<const Stmt *> Stmts,
+ const PartialDiagnostic &PD);
+
/// Conditionally issue a diagnostic based on the current
/// evaluation context.
///
@@ -5659,6 +5702,7 @@ public:
NamespaceDecl *getOrCreateStdNamespace();
NamespaceDecl *lookupStdExperimentalNamespace();
+ NamespaceDecl *getCachedCoroNamespace() { return CoroTraitsNamespaceCache; }
CXXRecordDecl *getStdBadAlloc() const;
EnumDecl *getStdAlignValT() const;
@@ -5909,7 +5953,7 @@ public:
/// Check the given noexcept-specifier, convert its expression, and compute
/// the appropriate ExceptionSpecificationType.
- ExprResult ActOnNoexceptSpec(SourceLocation NoexceptLoc, Expr *NoexceptExpr,
+ ExprResult ActOnNoexceptSpec(Expr *NoexceptExpr,
ExceptionSpecificationType &EST);
/// Check the given exception-specification and update the
@@ -6105,6 +6149,13 @@ public:
// AltiVecPixel and AltiVecBool when -faltivec-src-compat=xl is specified.
bool ShouldSplatAltivecScalarInCast(const VectorType *VecTy);
+ // Checks if the -faltivec-src-compat=gcc option is specified.
+ // If so, AltiVecVector, AltiVecBool and AltiVecPixel types are
+ // treated the same way as they are when trying to initialize
+ // these vectors on gcc (an error is emitted).
+ bool CheckAltivecInitFromScalar(SourceRange R, QualType VecTy,
+ QualType SrcTy);
+
/// ActOnCXXNamedCast - Parse
/// {dynamic,static,reinterpret,const,addrspace}_cast's.
ExprResult ActOnCXXNamedCast(SourceLocation OpLoc,
@@ -7828,8 +7879,7 @@ public:
TemplateArgumentLoc &Arg,
SmallVectorImpl<TemplateArgument> &Converted);
- bool CheckTemplateArgument(TemplateTypeParmDecl *Param,
- TypeSourceInfo *Arg);
+ bool CheckTemplateArgument(TypeSourceInfo *Arg);
ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
QualType InstantiatedParamType, Expr *Arg,
TemplateArgument &Converted,
@@ -8517,6 +8567,14 @@ public:
/// Substitute Replacement for auto in TypeWithAuto
TypeSourceInfo* SubstAutoTypeSourceInfo(TypeSourceInfo *TypeWithAuto,
QualType Replacement);
+
+ // Substitute auto in TypeWithAuto for a Dependent auto type
+ QualType SubstAutoTypeDependent(QualType TypeWithAuto);
+
+ // Substitute auto in TypeWithAuto for a Dependent auto type
+ TypeSourceInfo *
+ SubstAutoTypeSourceInfoDependent(TypeSourceInfo *TypeWithAuto);
+
/// Completely replace the \c auto in \p TypeWithAuto by
/// \p Replacement. This does not retain any \c auto type sugar.
QualType ReplaceAutoType(QualType TypeWithAuto, QualType Replacement);
@@ -9089,6 +9147,19 @@ public:
return ExprEvalContexts.back().isUnevaluated();
}
+ bool isImmediateFunctionContext() const {
+ assert(!ExprEvalContexts.empty() &&
+ "Must be in an expression evaluation context");
+ for (const ExpressionEvaluationContextRecord &context :
+ llvm::reverse(ExprEvalContexts)) {
+ if (context.isImmediateFunctionContext())
+ return true;
+ if (context.isUnevaluated())
+ return false;
+ }
+ return false;
+ }
+
/// RAII class used to determine whether SFINAE has
/// trapped any errors that occur during template argument
/// deduction.
@@ -9452,9 +9523,9 @@ public:
SubstTemplateName(NestedNameSpecifierLoc QualifierLoc, TemplateName Name,
SourceLocation Loc,
const MultiLevelTemplateArgumentList &TemplateArgs);
- bool Subst(const TemplateArgumentLoc *Args, unsigned NumArgs,
- TemplateArgumentListInfo &Result,
- const MultiLevelTemplateArgumentList &TemplateArgs);
+
+ bool SubstTypeConstraint(TemplateTypeParmDecl *Inst, const TypeConstraint *TC,
+ const MultiLevelTemplateArgumentList &TemplateArgs);
bool InstantiateDefaultArgument(SourceLocation CallLoc, FunctionDecl *FD,
ParmVarDecl *Param);
@@ -10209,8 +10280,11 @@ public:
bool buildCoroutineParameterMoves(SourceLocation Loc);
VarDecl *buildCoroutinePromise(SourceLocation Loc);
void CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body);
+ /// Lookup 'coroutine_traits' in std namespace and std::experimental
+ /// namespace. The namespace found is recorded in Namespace.
ClassTemplateDecl *lookupCoroutineTraits(SourceLocation KwLoc,
- SourceLocation FuncLoc);
+ SourceLocation FuncLoc,
+ NamespaceDecl *&Namespace);
/// Check that the expression co_await promise.final_suspend() shall not be
/// potentially-throwing.
bool checkFinalSuspendNoThrow(const Stmt *FinalSuspend);
@@ -10431,6 +10505,12 @@ public:
/// \param Init First part of the for loop.
void ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init);
+ /// Called on well-formed '\#pragma omp metadirective' after parsing
+ /// of the associated statement.
+ StmtResult ActOnOpenMPMetaDirective(ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt, SourceLocation StartLoc,
+ SourceLocation EndLoc);
+
// OpenMP directives and clauses.
/// Called on correct id-expression from the '#pragma omp
/// threadprivate'.
@@ -10453,7 +10533,7 @@ public:
/// Called on well-formed '#pragma omp [begin] assume[s]'.
void ActOnOpenMPAssumesDirective(SourceLocation Loc,
OpenMPDirectiveKind DKind,
- ArrayRef<StringRef> Assumptions,
+ ArrayRef<std::string> Assumptions,
bool SkippedClauses);
/// Check if there is an active global `omp begin assumes` directive.
@@ -10563,6 +10643,11 @@ public:
/// an OpenMP loop directive.
StmtResult ActOnOpenMPCanonicalLoop(Stmt *AStmt);
+ /// Process a canonical OpenMP loop nest that can either be a canonical
+ /// literal loop (ForStmt or CXXForRangeStmt), or the generated loop of an
+ /// OpenMP loop transformation construct.
+ StmtResult ActOnOpenMPLoopnest(Stmt *AStmt);
+
/// End of OpenMP region.
///
/// \param S Statement associated with the current OpenMP region.
@@ -10668,7 +10753,8 @@ public:
StmtResult ActOnOpenMPBarrierDirective(SourceLocation StartLoc,
SourceLocation EndLoc);
/// Called on well-formed '\#pragma omp taskwait'.
- StmtResult ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
+ StmtResult ActOnOpenMPTaskwaitDirective(ArrayRef<OMPClause *> Clauses,
+ SourceLocation StartLoc,
SourceLocation EndLoc);
/// Called on well-formed '\#pragma omp taskgroup'.
StmtResult ActOnOpenMPTaskgroupDirective(ArrayRef<OMPClause *> Clauses,
@@ -10873,6 +10959,12 @@ public:
Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc);
+ /// Called on well-formed '\#pragma omp loop' after parsing of the
+ /// associated statement.
+ StmtResult ActOnOpenMPGenericLoopDirective(
+ ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+ SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA);
+
/// Checks correctness of linear modifiers.
bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
SourceLocation LinLoc);
@@ -10897,11 +10989,14 @@ public:
/// \param VariantRef Expression that references the variant function, which
/// must be used instead of the original one, specified in \p DG.
/// \param TI The trait info object representing the match clause.
+ /// \param NumAppendArgs The number of omp_interop_t arguments to account for
+ /// in checking.
/// \returns None, if the function/variant function are not compatible with
/// the pragma, pair of original function/variant ref expression otherwise.
Optional<std::pair<FunctionDecl *, Expr *>>
checkOpenMPDeclareVariantFunction(DeclGroupPtrTy DG, Expr *VariantRef,
- OMPTraitInfo &TI, SourceRange SR);
+ OMPTraitInfo &TI, unsigned NumAppendArgs,
+ SourceRange SR);
/// Called on well-formed '\#pragma omp declare variant' after parsing of
/// the associated method/function.
@@ -10910,8 +11005,19 @@ public:
/// \param VariantRef Expression that references the variant function, which
/// must be used instead of the original one, specified in \p DG.
/// \param TI The context traits associated with the function variant.
- void ActOnOpenMPDeclareVariantDirective(FunctionDecl *FD, Expr *VariantRef,
- OMPTraitInfo &TI, SourceRange SR);
+ /// \param AdjustArgsNothing The list of 'nothing' arguments.
+ /// \param AdjustArgsNeedDevicePtr The list of 'need_device_ptr' arguments.
+ /// \param AppendArgs The list of 'append_args' arguments.
+ /// \param AdjustArgsLoc The Location of an 'adjust_args' clause.
+ /// \param AppendArgsLoc The Location of an 'append_args' clause.
+ /// \param SR The SourceRange of the 'declare variant' directive.
+ void ActOnOpenMPDeclareVariantDirective(
+ FunctionDecl *FD, Expr *VariantRef, OMPTraitInfo &TI,
+ ArrayRef<Expr *> AdjustArgsNothing,
+ ArrayRef<Expr *> AdjustArgsNeedDevicePtr,
+ ArrayRef<OMPDeclareVariantAttr::InteropType> AppendArgs,
+ SourceLocation AdjustArgsLoc, SourceLocation AppendArgsLoc,
+ SourceRange SR);
OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
Expr *Expr,
@@ -10939,6 +11045,10 @@ public:
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc);
+ /// Called on well-formed 'align' clause.
+ OMPClause *ActOnOpenMPAlignClause(Expr *Alignment, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc);
/// Called on well-formed 'safelen' clause.
OMPClause *ActOnOpenMPSafelenClause(Expr *Length,
SourceLocation StartLoc,
@@ -10993,6 +11103,10 @@ public:
SourceLocation StartLoc,
SourceLocation LParenLoc,
SourceLocation EndLoc);
+ /// Called on well-formed 'when' clause.
+ OMPClause *ActOnOpenMPWhenClause(OMPTraitInfo &TI, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc);
/// Called on well-formed 'default' clause.
OMPClause *ActOnOpenMPDefaultClause(llvm::omp::DefaultKind Kind,
SourceLocation KindLoc,
@@ -11243,15 +11357,14 @@ public:
SourceLocation ModifierLoc,
SourceLocation EndLoc);
/// Called on well-formed 'map' clause.
- OMPClause *
- ActOnOpenMPMapClause(ArrayRef<OpenMPMapModifierKind> MapTypeModifiers,
- ArrayRef<SourceLocation> MapTypeModifiersLoc,
- CXXScopeSpec &MapperIdScopeSpec,
- DeclarationNameInfo &MapperId,
- OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
- SourceLocation MapLoc, SourceLocation ColonLoc,
- ArrayRef<Expr *> VarList, const OMPVarListLocTy &Locs,
- ArrayRef<Expr *> UnresolvedMappers = llvm::None);
+ OMPClause *ActOnOpenMPMapClause(
+ ArrayRef<OpenMPMapModifierKind> MapTypeModifiers,
+ ArrayRef<SourceLocation> MapTypeModifiersLoc,
+ CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId,
+ OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
+ SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
+ const OMPVarListLocTy &Locs, bool NoDiagnose = false,
+ ArrayRef<Expr *> UnresolvedMappers = llvm::None);
/// Called on well-formed 'num_teams' clause.
OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc,
SourceLocation LParenLoc,
@@ -11326,6 +11439,12 @@ public:
SourceLocation ColonLoc,
SourceLocation EndLoc, Expr *Modifier,
ArrayRef<Expr *> Locators);
+ /// Called on a well-formed 'bind' clause.
+ OMPClause *ActOnOpenMPBindClause(OpenMPBindClauseKind Kind,
+ SourceLocation KindLoc,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc);
/// The kind of conversion being performed.
enum CheckedConversionKind {
@@ -12147,9 +12266,9 @@ public:
return targetDiag(Loc, PD.getDiagID(), FD) << PD;
}
- /// Check if the expression is allowed to be used in expressions for the
- /// offloading devices.
- void checkDeviceDecl(ValueDecl *D, SourceLocation Loc);
+ /// Check if the type is allowed to be used for the current target.
+ void checkTypeSupport(QualType Ty, SourceLocation Loc,
+ ValueDecl *D = nullptr);
enum CUDAFunctionTarget {
CFT_Device,
@@ -12375,6 +12494,15 @@ public:
const VirtSpecifiers *VS = nullptr);
void CodeCompleteBracketDeclarator(Scope *S);
void CodeCompleteCase(Scope *S);
+ enum class AttributeCompletion {
+ Attribute,
+ Scope,
+ None,
+ };
+ void CodeCompleteAttribute(
+ AttributeCommonInfo::Syntax Syntax,
+ AttributeCompletion Completion = AttributeCompletion::Attribute,
+ const IdentifierInfo *Scope = nullptr);
/// Determines the preferred type of the current function argument, by
/// examining the signatures of all possible overloads.
/// Returns null if unknown or ambiguous, or if code completion is off.
@@ -12626,10 +12754,15 @@ private:
int ArgNum, unsigned ExpectedFieldNum,
bool AllowName);
bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall);
- bool SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeDesc);
+ bool SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID,
+ const char *TypeDesc);
bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc);
+ bool SemaBuiltinElementwiseMath(CallExpr *TheCall);
+ bool SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall);
+ bool SemaBuiltinReduceMath(CallExpr *TheCall);
+
// Matrix builtin handling.
ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall,
ExprResult CallResult);
diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h
index c5f9fc45612a..dc5f0ec97e85 100644
--- a/clang/include/clang/Sema/SemaConcept.h
+++ b/clang/include/clang/Sema/SemaConcept.h
@@ -1,9 +1,8 @@
//===-- SemaConcept.h - Semantic Analysis for Constraints and Concepts ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 027a981df22c..341da5bd1d62 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -402,6 +402,9 @@ enum UnhashedControlBlockRecordTypes {
/// Record code for \#pragma diagnostic mappings.
DIAG_PRAGMA_MAPPINGS,
+
+ /// Record code for the indices of used header search entries.
+ HEADER_SEARCH_ENTRY_USAGE,
};
/// Record code for extension blocks.
@@ -1064,6 +1067,9 @@ enum PredefinedTypeIDs {
/// \brief The '__bf16' type
PREDEF_TYPE_BFLOAT16_ID = 73,
+ /// \brief The '__ibm128' type
+ PREDEF_TYPE_IBM128_ID = 74,
+
/// OpenCL image types with auto numeration
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
PREDEF_TYPE_##Id##_ID,
@@ -1890,6 +1896,7 @@ enum StmtCode {
STMT_SEH_TRY, // SEHTryStmt
// OpenMP directives
+ STMT_OMP_META_DIRECTIVE,
STMT_OMP_CANONICAL_LOOP,
STMT_OMP_PARALLEL_DIRECTIVE,
STMT_OMP_SIMD_DIRECTIVE,
@@ -1950,6 +1957,7 @@ enum StmtCode {
STMT_OMP_INTEROP_DIRECTIVE,
STMT_OMP_DISPATCH_DIRECTIVE,
STMT_OMP_MASKED_DIRECTIVE,
+ STMT_OMP_GENERIC_LOOP_DIRECTIVE,
EXPR_OMP_ARRAY_SECTION,
EXPR_OMP_ARRAY_SHAPING,
EXPR_OMP_ITERATOR,
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 242b75baca6c..f24ccf579aa8 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1162,6 +1162,10 @@ private:
/// definitions. Only populated when using modules in C++.
llvm::DenseMap<EnumDecl *, EnumDecl *> EnumDefinitions;
+ /// A mapping from canonical declarations of records to their canonical
+ /// definitions. Doesn't cover CXXRecordDecl.
+ llvm::DenseMap<RecordDecl *, RecordDecl *> RecordDefinitions;
+
/// When reading a Stmt tree, Stmt operands are placed in this stack.
SmallVector<Stmt *, 16> StmtStack;
@@ -1320,18 +1324,18 @@ private:
ASTReaderListener *Listener,
bool ValidateDiagnosticOptions);
- ASTReadResult ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities);
- ASTReadResult ReadExtensionBlock(ModuleFile &F);
+ llvm::Error ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities);
+ llvm::Error ReadExtensionBlock(ModuleFile &F);
void ReadModuleOffsetMap(ModuleFile &F) const;
- bool ParseLineTable(ModuleFile &F, const RecordData &Record);
- bool ReadSourceManagerBlock(ModuleFile &F);
+ void ParseLineTable(ModuleFile &F, const RecordData &Record);
+ llvm::Error ReadSourceManagerBlock(ModuleFile &F);
llvm::BitstreamCursor &SLocCursorForID(int ID);
SourceLocation getImportLocation(ModuleFile *F);
ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
const ModuleFile *ImportedBy,
unsigned ClientLoadCapabilities);
- ASTReadResult ReadSubmoduleBlock(ModuleFile &F,
- unsigned ClientLoadCapabilities);
+ llvm::Error ReadSubmoduleBlock(ModuleFile &F,
+ unsigned ClientLoadCapabilities);
static bool ParseLanguageOptions(const RecordData &Record, bool Complain,
ASTReaderListener &Listener,
bool AllowCompatibleDifferences);
@@ -1904,8 +1908,9 @@ public:
/// ReadBlockAbbrevs - Enter a subblock of the specified BlockID with the
/// specified cursor. Read the abbreviations that are at the top of the block
/// and then leave the cursor pointing into the block.
- static bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID,
- uint64_t *StartOfBlockOffset = nullptr);
+ static llvm::Error ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor,
+ unsigned BlockID,
+ uint64_t *StartOfBlockOffset = nullptr);
/// Finds all the visible declarations with a given name.
/// The current implementation of this method just loads the entire
diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h
index b85609bf4e05..36179ec23408 100644
--- a/clang/include/clang/Serialization/ASTRecordReader.h
+++ b/clang/include/clang/Serialization/ASTRecordReader.h
@@ -350,7 +350,7 @@ struct SavedStreamPosition {
~SavedStreamPosition() {
if (llvm::Error Err = Cursor.JumpToBit(Offset))
llvm::report_fatal_error(
- "Cursor should always be able to go back, failed: " +
+ llvm::Twine("Cursor should always be able to go back, failed: ") +
toString(std::move(Err)));
}
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index ac88cb0a3177..978f6d86ea5c 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -456,6 +456,9 @@ private:
std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
ModuleFileExtensionWriters;
+ /// User ModuleMaps skipped when writing control block.
+ std::set<const FileEntry *> SkippedModuleMaps;
+
/// Retrieve or create a submodule ID for this module.
unsigned getSubmoduleID(Module *Mod);
@@ -475,7 +478,7 @@ private:
createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
- bool Modules);
+ std::set<const FileEntry *> &AffectingModuleMaps);
void WriteSourceManagerBlock(SourceManager &SourceMgr,
const Preprocessor &PP);
void WritePreprocessor(const Preprocessor &PP, bool IsModule);
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index b1c8a8c8e72b..b275f8b8db5d 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -20,6 +20,7 @@
#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/ContinuousRangeMap.h"
#include "clang/Serialization/ModuleFileExtension.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SetVector.h"
@@ -173,6 +174,9 @@ public:
/// unique module files based on AST contents.
ASTFileSignature ASTBlockHash;
+ /// The bit vector denoting usage of each header search entry (true = used).
+ llvm::BitVector SearchPathUsage;
+
/// Whether this module has been directly imported by the
/// user.
bool DirectlyImported = false;
diff --git a/clang/include/clang/Serialization/ModuleFileExtension.h b/clang/include/clang/Serialization/ModuleFileExtension.h
index 34ea870724a4..3e84a65c4b80 100644
--- a/clang/include/clang/Serialization/ModuleFileExtension.h
+++ b/clang/include/clang/Serialization/ModuleFileExtension.h
@@ -11,13 +11,14 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/Support/ExtensibleRTTI.h"
+#include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/MD5.h"
#include <memory>
#include <string>
namespace llvm {
class BitstreamCursor;
class BitstreamWriter;
-class hash_code;
class raw_ostream;
}
@@ -74,19 +75,20 @@ public:
virtual ModuleFileExtensionMetadata getExtensionMetadata() const = 0;
/// Hash information about the presence of this extension into the
- /// module hash code.
+ /// module hash.
///
- /// The module hash code is used to distinguish different variants
- /// of a module that are incompatible. If the presence, absence, or
- /// version of the module file extension should force the creation
- /// of a separate set of module files, override this method to
- /// combine that distinguishing information into the module hash
- /// code.
+ /// The module hash is used to distinguish different variants of a module that
+ /// are incompatible. If the presence, absence, or version of the module file
+ /// extension should force the creation of a separate set of module files,
+ /// override this method to combine that distinguishing information into the
+ /// module hash.
///
- /// The default implementation of this function simply returns the
- /// hash code as given, so the presence/absence of this extension
- /// does not distinguish module files.
- virtual llvm::hash_code hashExtension(llvm::hash_code c) const;
+ /// The default implementation of this function simply does nothing, so the
+ /// presence/absence of this extension does not distinguish module files.
+ using ExtensionHashBuilder =
+ llvm::HashBuilderImpl<llvm::MD5,
+ llvm::support::endian::system_endianness()>;
+ virtual void hashExtension(ExtensionHashBuilder &HBuilder) const;
/// Create a new module file extension writer, which will be
/// responsible for writing the extension contents into a particular
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 444b00d73f0b..bd21d7778f93 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -73,6 +73,7 @@ def Taint : Package<"taint">, ParentPackage<SecurityAlpha>;
def CERT : Package<"cert">, ParentPackage<SecurityAlpha>;
def POS : Package<"pos">, ParentPackage<CERT>;
+def ENV : Package<"env">, ParentPackage<CERT>;
def Unix : Package<"unix">;
def UnixAlpha : Package<"unix">, ParentPackage<Alpha>;
@@ -485,7 +486,17 @@ def DynamicMemoryModeling: Checker<"DynamicMemoryModeling">,
"allocating and deallocating functions are annotated with "
"ownership_holds, ownership_takes and ownership_returns.",
"false",
- InAlpha>
+ InAlpha>,
+ CmdLineOption<Boolean,
+ "AddNoOwnershipChangeNotes",
+ "Add an additional note to the bug report for leak-like "
+ "bugs. Dynamically allocated objects passed to functions "
+ "that neither deallocated it, or have taken responsibility "
+ "of the ownership are noted, similarly to "
+ "NoStoreFuncVisitor.",
+ "true",
+ Released,
+ Hide>
]>,
Dependencies<[CStringModeling]>,
Documentation<NotDocumented>,
@@ -592,6 +603,10 @@ def SmartPtrModeling: Checker<"SmartPtrModeling">,
]>,
Hidden;
+def StringChecker: Checker<"StringChecker">,
+ HelpText<"Checks C++ std::string bugs">,
+ Documentation<HasDocumentation>;
+
def MoveChecker: Checker<"Move">,
HelpText<"Find use-after-move bugs in C++">,
CheckerOptions<[
@@ -937,6 +952,14 @@ let ParentPackage = POS in {
} // end "alpha.cert.pos"
+let ParentPackage = ENV in {
+
+ def InvalidPtrChecker : Checker<"InvalidPtr">,
+ HelpText<"Finds usages of possibly invalidated pointers">,
+ Documentation<HasDocumentation>;
+
+} // end "alpha.cert.env"
+
let ParentPackage = SecurityAlpha in {
def ArrayBoundChecker : Checker<"ArrayBound">,
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index f0359d2dbb3c..aab8e1284bf6 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -190,7 +190,13 @@ ANALYZER_OPTION(bool, ShouldReportIssuesInMainSourceFile,
false)
ANALYZER_OPTION(bool, ShouldWriteStableReportFilename, "stable-report-filename",
- "Whether or not the report filename should be random or not.",
+ "Deprecated: report filenames are now always stable. "
+ "See also 'verbose-report-filename'.",
+ false)
+
+ANALYZER_OPTION(bool, ShouldWriteVerboseReportFilename, "verbose-report-filename",
+ "Whether or not the report filename should contain extra "
+ "information about the issue.",
false)
ANALYZER_OPTION(
@@ -314,6 +320,22 @@ ANALYZER_OPTION(bool, ShouldDisplayCheckerNameForText, "display-checker-name",
"Display the checker name for textual outputs",
true)
+ANALYZER_OPTION(
+ bool, ShouldConsiderSingleElementArraysAsFlexibleArrayMembers,
+ "consider-single-element-arrays-as-flexible-array-members",
+ "Consider single element arrays as flexible array member candidates. "
+ "This will prevent the analyzer from assuming that a single element array "
+ "holds a single element.",
+ false)
+
+ANALYZER_OPTION(
+ bool, ShouldAssumeControlledEnvironment, "assume-controlled-environment",
+ "Whether the analyzed application runs in a controlled environment. "
+ "We will assume that environment variables exist in queries and they hold "
+ "no malicious data. For instance, if this option is enabled, 'getenv()' "
+ "might be modeled by the analyzer to never return NULL.",
+ false)
+
//===----------------------------------------------------------------------===//
// Unsigned analyzer options.
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index ccf35e0a81ec..7514eee7244f 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -395,7 +395,11 @@ public:
return {FullCompilerInvocation,
ShouldDisplayMacroExpansions,
ShouldSerializeStats,
- ShouldWriteStableReportFilename,
+ // The stable report filename option is deprecated because
+ // file names are now always stable. Now the old option acts as
+ // an alias to the new verbose filename option because this
+ // closely mimics the behavior under the old option.
+ ShouldWriteStableReportFilename || ShouldWriteVerboseReportFilename,
AnalyzerWerror,
ShouldApplyFixIts,
ShouldDisplayCheckerNameForText};
diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
index 24cae12af24a..c42521376af9 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include <list>
#include <memory>
@@ -622,8 +623,118 @@ public:
PathSensitiveBugReport &R) override;
};
-} // namespace ento
+class ObjCMethodCall;
+class CXXConstructorCall;
+
+/// Put a diagnostic on return statement (or on } in its absence) of all inlined
+/// functions for which some property remained unchanged.
+/// Resulting diagnostics may read such as "Returning without writing to X".
+///
+/// Descendants can define what a "state change is", like a change of value
+/// to a memory region, liveness, etc. For function calls where the state did
+/// not change as defined, a custom note may be constructed.
+///
+/// For a minimal example, check out
+/// clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp.
+class NoStateChangeFuncVisitor : public BugReporterVisitor {
+private:
+ /// Frames modifying the state as defined in \c wasModifiedBeforeCallExit.
+ /// This visitor generates a note only if a function does *not* change the
+ /// state that way. This information is not immediately available
+ /// by looking at the node associated with the exit from the function
+ /// (usually the return statement). To avoid recomputing the same information
+ /// many times (going up the path for each node and checking whether the
+ /// region was written into) we instead lazily compute the stack frames
+ /// along the path.
+ // TODO: Can't we just use a map instead? This is likely not as cheap as it
+ // makes the code difficult to read.
+ llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifying;
+ llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingCalculated;
+
+ /// Check and lazily calculate whether the state is modified in the stack
+ /// frame to which \p CallExitBeginN belongs.
+ /// The calculation is cached in FramesModifying.
+ bool isModifiedInFrame(const ExplodedNode *CallExitBeginN);
+
+ void markFrameAsModifying(const StackFrameContext *SCtx);
+
+ /// Write to \c FramesModifying all stack frames along the path in the current
+ /// stack frame which modifies the state.
+ void findModifyingFrames(const ExplodedNode *const CallExitBeginN);
+protected:
+ bugreporter::TrackingKind TKind;
+
+ /// \return Whether the state was modified from the current node, \p CurrN, to
+ /// the end of the stack frame, at \p CallExitBeginN. \p CurrN and
+ /// \p CallExitBeginN are always in the same stack frame.
+ /// Clients should override this callback when a state change is important
+ /// not only on the entire function call, but inside of it as well.
+ /// Example: we may want to leave a note about the lack of locking/unlocking
+ /// on a particular mutex, but not if inside the function its state was
+ /// changed, but also restored. wasModifiedInFunction() wouldn't know of this
+ /// change.
+ virtual bool wasModifiedBeforeCallExit(const ExplodedNode *CurrN,
+ const ExplodedNode *CallExitBeginN) {
+ return false;
+ }
+
+ /// \return Whether the state was modified in the inlined function call in
+ /// between \p CallEnterN and \p CallExitEndN. Mind that the stack frame
+ /// retrieved from a CallEnterN and CallExitEndN is the *caller's* stack
+ /// frame! The inlined function's stack should be retrieved from either the
+ /// immediate successor to \p CallEnterN or immediate predecessor to
+ /// \p CallExitEndN.
+ /// Clients should override this function if a state changes local to the
+ /// inlined function are not interesting, only the change occuring as a
+ /// result of it.
+ /// Example: we want to leave a not about a leaked resource object not being
+ /// deallocated / its ownership changed inside a function, and we don't care
+ /// if it was assigned to a local variable (its change in ownership is
+ /// inconsequential).
+ virtual bool wasModifiedInFunction(const ExplodedNode *CallEnterN,
+ const ExplodedNode *CallExitEndN) {
+ return false;
+ }
+
+ /// Consume the information on the non-modifying stack frame in order to
+ /// either emit a note or not. May suppress the report entirely.
+ /// \return Diagnostics piece for the unmodified state in the current
+ /// function, if it decides to emit one. A good description might start with
+ /// "Returning without...".
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R,
+ const ObjCMethodCall &Call,
+ const ExplodedNode *N) = 0;
+
+ /// Consume the information on the non-modifying stack frame in order to
+ /// either emit a note or not. May suppress the report entirely.
+ /// \return Diagnostics piece for the unmodified state in the current
+ /// function, if it decides to emit one. A good description might start with
+ /// "Returning without...".
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForCXXThis(PathSensitiveBugReport &R,
+ const CXXConstructorCall &Call,
+ const ExplodedNode *N) = 0;
+
+ /// Consume the information on the non-modifying stack frame in order to
+ /// either emit a note or not. May suppress the report entirely.
+ /// \return Diagnostics piece for the unmodified state in the current
+ /// function, if it decides to emit one. A good description might start with
+ /// "Returning without...".
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call,
+ const ExplodedNode *N) = 0;
+
+public:
+ NoStateChangeFuncVisitor(bugreporter::TrackingKind TKind) : TKind(TKind) {}
+
+ PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
+ BugReporterContext &BR,
+ PathSensitiveBugReport &R) override final;
+};
+
+} // namespace ento
} // namespace clang
#endif // LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTERVISITORS_H
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
new file mode 100644
index 000000000000..396c9a4de440
--- /dev/null
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h
@@ -0,0 +1,173 @@
+//===- CallDescription.h - function/method call matching --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file defines a generic mechanism for matching for function and
+/// method calls of C, C++, and Objective-C languages. Instances of these
+/// classes are frequently used together with the CallEvent classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Compiler.h"
+#include <vector>
+
+namespace clang {
+class IdentifierInfo;
+} // namespace clang
+
+namespace clang {
+namespace ento {
+
+enum CallDescriptionFlags : unsigned {
+ CDF_None = 0,
+
+ /// Describes a C standard function that is sometimes implemented as a macro
+ /// that expands to a compiler builtin with some __builtin prefix.
+ /// The builtin may as well have a few extra arguments on top of the requested
+ /// number of arguments.
+ CDF_MaybeBuiltin = 1 << 0,
+};
+
+/// This class represents a description of a function call using the number of
+/// arguments and the name of the function.
+class CallDescription {
+ friend class CallEvent;
+ using MaybeCount = Optional<unsigned>;
+
+ mutable Optional<const IdentifierInfo *> II;
+ // The list of the qualified names used to identify the specified CallEvent,
+ // e.g. "{a, b}" represent the qualified names, like "a::b".
+ std::vector<std::string> QualifiedName;
+ MaybeCount RequiredArgs;
+ MaybeCount RequiredParams;
+ int Flags;
+
+public:
+ /// Constructs a CallDescription object.
+ ///
+ /// @param QualifiedName The list of the name qualifiers of the function that
+ /// will be matched. The user is allowed to skip any of the qualifiers.
+ /// For example, {"std", "basic_string", "c_str"} would match both
+ /// std::basic_string<...>::c_str() and std::__1::basic_string<...>::c_str().
+ ///
+ /// @param RequiredArgs The number of arguments that is expected to match a
+ /// call. Omit this parameter to match every occurrence of call with a given
+ /// name regardless the number of arguments.
+ CallDescription(CallDescriptionFlags Flags,
+ ArrayRef<const char *> QualifiedName,
+ MaybeCount RequiredArgs = None,
+ MaybeCount RequiredParams = None);
+
+ /// Construct a CallDescription with default flags.
+ CallDescription(ArrayRef<const char *> QualifiedName,
+ MaybeCount RequiredArgs = None,
+ MaybeCount RequiredParams = None);
+
+ CallDescription(std::nullptr_t) = delete;
+
+ /// Get the name of the function that this object matches.
+ StringRef getFunctionName() const { return QualifiedName.back(); }
+
+ /// Get the qualified name parts in reversed order.
+ /// E.g. { "std", "vector", "data" } -> "vector", "std"
+ auto begin_qualified_name_parts() const {
+ return std::next(QualifiedName.rbegin());
+ }
+ auto end_qualified_name_parts() const { return QualifiedName.rend(); }
+
+ /// It's false, if and only if we expect a single identifier, such as
+ /// `getenv`. It's true for `std::swap`, or `my::detail::container::data`.
+ bool hasQualifiedNameParts() const { return QualifiedName.size() > 1; }
+
+ /// @name Matching CallDescriptions against a CallEvent
+ /// @{
+
+ /// Returns true if the CallEvent is a call to a function that matches
+ /// the CallDescription.
+ ///
+ /// \note This function is not intended to be used to match Obj-C method
+ /// calls.
+ bool matches(const CallEvent &Call) const;
+
+ /// Returns true whether the CallEvent matches on any of the CallDescriptions
+ /// supplied.
+ ///
+ /// \note This function is not intended to be used to match Obj-C method
+ /// calls.
+ friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1) {
+ return CD1.matches(Call);
+ }
+
+ /// \copydoc clang::ento::matchesAny(const CallEvent &, const CallDescription &)
+ template <typename... Ts>
+ friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1,
+ const Ts &...CDs) {
+ return CD1.matches(Call) || matchesAny(Call, CDs...);
+ }
+ /// @}
+};
+
+/// An immutable map from CallDescriptions to arbitrary data. Provides a unified
+/// way for checkers to react on function calls.
+template <typename T> class CallDescriptionMap {
+ friend class CallDescriptionSet;
+
+ // Some call descriptions aren't easily hashable (eg., the ones with qualified
+ // names in which some sections are omitted), so let's put them
+ // in a simple vector and use linear lookup.
+ // TODO: Implement an actual map for fast lookup for "hashable" call
+ // descriptions (eg., the ones for C functions that just match the name).
+ std::vector<std::pair<CallDescription, T>> LinearMap;
+
+public:
+ CallDescriptionMap(
+ std::initializer_list<std::pair<CallDescription, T>> &&List)
+ : LinearMap(List) {}
+
+ ~CallDescriptionMap() = default;
+
+ // These maps are usually stored once per checker, so let's make sure
+ // we don't do redundant copies.
+ CallDescriptionMap(const CallDescriptionMap &) = delete;
+ CallDescriptionMap &operator=(const CallDescription &) = delete;
+
+ LLVM_NODISCARD const T *lookup(const CallEvent &Call) const {
+ // Slow path: linear lookup.
+ // TODO: Implement some sort of fast path.
+ for (const std::pair<CallDescription, T> &I : LinearMap)
+ if (I.first.matches(Call))
+ return &I.second;
+
+ return nullptr;
+ }
+};
+
+/// An immutable set of CallDescriptions.
+/// Checkers can efficiently decide if a given CallEvent matches any
+/// CallDescription in the set.
+class CallDescriptionSet {
+ CallDescriptionMap<bool /*unused*/> Impl = {};
+
+public:
+ CallDescriptionSet(std::initializer_list<CallDescription> &&List);
+
+ CallDescriptionSet(const CallDescriptionSet &) = delete;
+ CallDescriptionSet &operator=(const CallDescription &) = delete;
+
+ LLVM_NODISCARD bool contains(const CallEvent &Call) const;
+};
+
+} // namespace ento
+} // namespace clang
+
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
index 060fff1a7407..d135e70dd75d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
@@ -257,20 +257,6 @@ public:
return false;
}
- /// Returns true if the CallEvent is a call to a function that matches
- /// the CallDescription.
- ///
- /// Note that this function is not intended to be used to match Obj-C method
- /// calls.
- bool isCalled(const CallDescription &CD) const;
-
- /// Returns true whether the CallEvent is any of the CallDescriptions supplied
- /// as a parameter.
- template <typename FirstCallDesc, typename... CallDescs>
- bool isCalled(const FirstCallDesc &First, const CallDescs &... Rest) const {
- return isCalled(First) || isCalled(Rest...);
- }
-
/// Returns a source range for the entire call, suitable for
/// outputting in diagnostics.
virtual SourceRange getSourceRange() const {
@@ -1225,99 +1211,6 @@ public:
}
};
-enum CallDescriptionFlags : int {
- /// Describes a C standard function that is sometimes implemented as a macro
- /// that expands to a compiler builtin with some __builtin prefix.
- /// The builtin may as well have a few extra arguments on top of the requested
- /// number of arguments.
- CDF_MaybeBuiltin = 1 << 0,
-};
-
-/// This class represents a description of a function call using the number of
-/// arguments and the name of the function.
-class CallDescription {
- friend CallEvent;
-
- mutable IdentifierInfo *II = nullptr;
- mutable bool IsLookupDone = false;
- // The list of the qualified names used to identify the specified CallEvent,
- // e.g. "{a, b}" represent the qualified names, like "a::b".
- std::vector<const char *> QualifiedName;
- Optional<unsigned> RequiredArgs;
- Optional<size_t> RequiredParams;
- int Flags;
-
- // A constructor helper.
- static Optional<size_t> readRequiredParams(Optional<unsigned> RequiredArgs,
- Optional<size_t> RequiredParams) {
- if (RequiredParams)
- return RequiredParams;
- if (RequiredArgs)
- return static_cast<size_t>(*RequiredArgs);
- return None;
- }
-
-public:
- /// Constructs a CallDescription object.
- ///
- /// @param QualifiedName The list of the name qualifiers of the function that
- /// will be matched. The user is allowed to skip any of the qualifiers.
- /// For example, {"std", "basic_string", "c_str"} would match both
- /// std::basic_string<...>::c_str() and std::__1::basic_string<...>::c_str().
- ///
- /// @param RequiredArgs The number of arguments that is expected to match a
- /// call. Omit this parameter to match every occurrence of call with a given
- /// name regardless the number of arguments.
- CallDescription(int Flags, ArrayRef<const char *> QualifiedName,
- Optional<unsigned> RequiredArgs = None,
- Optional<size_t> RequiredParams = None)
- : QualifiedName(QualifiedName), RequiredArgs(RequiredArgs),
- RequiredParams(readRequiredParams(RequiredArgs, RequiredParams)),
- Flags(Flags) {}
-
- /// Construct a CallDescription with default flags.
- CallDescription(ArrayRef<const char *> QualifiedName,
- Optional<unsigned> RequiredArgs = None,
- Optional<size_t> RequiredParams = None)
- : CallDescription(0, QualifiedName, RequiredArgs, RequiredParams) {}
-
- /// Get the name of the function that this object matches.
- StringRef getFunctionName() const { return QualifiedName.back(); }
-};
-
-/// An immutable map from CallDescriptions to arbitrary data. Provides a unified
-/// way for checkers to react on function calls.
-template <typename T> class CallDescriptionMap {
- // Some call descriptions aren't easily hashable (eg., the ones with qualified
- // names in which some sections are omitted), so let's put them
- // in a simple vector and use linear lookup.
- // TODO: Implement an actual map for fast lookup for "hashable" call
- // descriptions (eg., the ones for C functions that just match the name).
- std::vector<std::pair<CallDescription, T>> LinearMap;
-
-public:
- CallDescriptionMap(
- std::initializer_list<std::pair<CallDescription, T>> &&List)
- : LinearMap(List) {}
-
- ~CallDescriptionMap() = default;
-
- // These maps are usually stored once per checker, so let's make sure
- // we don't do redundant copies.
- CallDescriptionMap(const CallDescriptionMap &) = delete;
- CallDescriptionMap &operator=(const CallDescription &) = delete;
-
- const T *lookup(const CallEvent &Call) const {
- // Slow path: linear lookup.
- // TODO: Implement some sort of fast path.
- for (const std::pair<CallDescription, T> &I : LinearMap)
- if (Call.isCalled(I.first))
- return &I.second;
-
- return nullptr;
- }
-};
-
/// Manages the lifetime of CallEvent objects.
///
/// CallEventManager provides a way to create arbitrary CallEvents "on the
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
index c67df1e51b4f..a80484610131 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
@@ -48,6 +48,7 @@ public:
ID.AddPointer(&To());
}
void dump(raw_ostream &OS) const;
+ void dump() const;
// In order to keep non-overlapping ranges sorted, we can compare only From
// points.
@@ -281,7 +282,27 @@ public:
/// where N = size(this)
bool contains(llvm::APSInt Point) const { return containsImpl(Point); }
+ bool containsZero() const {
+ APSIntType T{getMinValue()};
+ return contains(T.getZeroValue());
+ }
+
+ /// Test if the range is the [0,0] range.
+ ///
+ /// Complexity: O(1)
+ bool encodesFalseRange() const {
+ const llvm::APSInt *Constant = getConcreteValue();
+ return Constant && Constant->isZero();
+ }
+
+ /// Test if the range doesn't contain zero.
+ ///
+ /// Complexity: O(logN)
+ /// where N = size(this)
+ bool encodesTrueRange() const { return !containsZero(); }
+
void dump(raw_ostream &OS) const;
+ void dump() const;
bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; }
bool operator!=(const RangeSet &Other) const { return !(*this == Other); }
@@ -387,11 +408,22 @@ private:
static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment);
};
-/// Try to simplify a given symbolic expression's associated value based on the
-/// constraints in State. This is needed because the Environment bindings are
-/// not getting updated when a new constraint is added to the State.
+/// Try to simplify a given symbolic expression based on the constraints in
+/// State. This is needed because the Environment bindings are not getting
+/// updated when a new constraint is added to the State. If the symbol is
+/// simplified to a non-symbol (e.g. to a constant) then the original symbol
+/// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE
+/// functions where we can work only with symbols. Use the other function
+/// (simplifyToSVal) if you are interested in a simplification that may yield
+/// a concrete constant value.
SymbolRef simplify(ProgramStateRef State, SymbolRef Sym);
+/// Try to simplify a given symbolic expression's associated `SVal` based on the
+/// constraints in State. This is very similar to `simplify`, but this function
+/// always returns the simplified SVal. The simplified SVal might be a single
+/// constant (i.e. `ConcreteInt`).
+SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym);
+
} // namespace ento
} // namespace clang
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
index 87a49cf4ffe9..61dfdbb0688b 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -33,6 +33,7 @@
namespace clang {
+class AnalyzerOptions;
class BlockDecl;
class CXXBoolLiteralExpr;
class CXXMethodDecl;
@@ -66,6 +67,8 @@ protected:
ProgramStateManager &StateMgr;
+ const AnalyzerOptions &AnOpts;
+
/// The scalar type to use for array indices.
const QualType ArrayIndexTy;
@@ -96,11 +99,7 @@ protected:
public:
SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
- ProgramStateManager &stateMgr)
- : Context(context), BasicVals(context, alloc),
- SymMgr(context, BasicVals, alloc), MemMgr(context, alloc),
- StateMgr(stateMgr), ArrayIndexTy(context.LongLongTy),
- ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {}
+ ProgramStateManager &stateMgr);
virtual ~SValBuilder() = default;
@@ -188,6 +187,8 @@ public:
MemRegionManager &getRegionManager() { return MemMgr; }
const MemRegionManager &getRegionManager() const { return MemMgr; }
+ const AnalyzerOptions &getAnalyzerOptions() const { return AnOpts; }
+
// Forwarding methods to SymbolManager.
const SymbolConjured* conjureSymbol(const Stmt *stmt,
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 76edf150dbee..d58e736ab6a6 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -48,7 +48,8 @@ class DependencyScanningService {
public:
DependencyScanningService(ScanningMode Mode, ScanningOutputFormat Format,
bool ReuseFileManager = true,
- bool SkipExcludedPPRanges = true);
+ bool SkipExcludedPPRanges = true,
+ bool OptimizeArgs = false);
ScanningMode getMode() const { return Mode; }
@@ -58,6 +59,8 @@ public:
bool canSkipExcludedPPRanges() const { return SkipExcludedPPRanges; }
+ bool canOptimizeArgs() const { return OptimizeArgs; }
+
DependencyScanningFilesystemSharedCache &getSharedCache() {
return SharedCache;
}
@@ -70,6 +73,8 @@ private:
/// ranges by bumping the buffer pointer in the lexer instead of lexing the
/// tokens in the range until reaching the corresponding directive.
const bool SkipExcludedPPRanges;
+ /// Whether to optimize the modules' command-line arguments.
+ const bool OptimizeArgs;
/// The global file system cache.
DependencyScanningFilesystemSharedCache SharedCache;
};
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
index f88dc472c80b..9e2ff82f5614 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h
@@ -77,16 +77,18 @@ public:
/// Print out the dependency information into a string using the dependency
/// file format that is specified in the options (-MD is the default) and
- /// return it.
+ /// return it. If \p ModuleName isn't empty, this function returns the
+ /// dependency information of module \p ModuleName.
///
/// \returns A \c StringError with the diagnostic output if clang errors
/// occurred, dependency file contents otherwise.
llvm::Expected<std::string>
- getDependencyFile(const tooling::CompilationDatabase &Compilations,
- StringRef CWD);
+ getDependencyFile(const std::vector<std::string> &CommandLine, StringRef CWD,
+ llvm::Optional<StringRef> ModuleName = None);
/// Collect the full module dependency graph for the input, ignoring any
- /// modules which have already been seen.
+ /// modules which have already been seen. If \p ModuleName isn't empty, this
+ /// function returns the full dependency information of module \p ModuleName.
///
/// \param AlreadySeen This stores modules which have previously been
/// reported. Use the same instance for all calls to this
@@ -97,8 +99,9 @@ public:
/// \returns a \c StringError with the diagnostic output if clang errors
/// occurred, \c FullDependencies otherwise.
llvm::Expected<FullDependenciesResult>
- getFullDependencies(const tooling::CompilationDatabase &Compilations,
- StringRef CWD, const llvm::StringSet<> &AlreadySeen);
+ getFullDependencies(const std::vector<std::string> &CommandLine,
+ StringRef CWD, const llvm::StringSet<> &AlreadySeen,
+ llvm::Optional<StringRef> ModuleName = None);
private:
DependencyScanningWorker Worker;
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
index 5903ad13c1d8..0f3a5369a021 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -14,7 +14,6 @@
#include "clang/Basic/LLVM.h"
#include "clang/Frontend/PCHContainerOperations.h"
#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
-#include "clang/Tooling/CompilationDatabase.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
#include "llvm/Support/Error.h"
@@ -56,31 +55,36 @@ class DependencyScanningWorker {
public:
DependencyScanningWorker(DependencyScanningService &Service);
- /// Run the dependency scanning tool for a given clang driver invocation (as
- /// specified for the given Input in the CDB), and report the discovered
- /// dependencies to the provided consumer.
+ /// Run the dependency scanning tool for a given clang driver command-line,
+ /// and report the discovered dependencies to the provided consumer. If \p
+ /// ModuleName isn't empty, this function reports the dependencies of module
+ /// \p ModuleName.
///
/// \returns A \c StringError with the diagnostic output if clang errors
/// occurred, success otherwise.
- llvm::Error computeDependencies(const std::string &Input,
- StringRef WorkingDirectory,
- const CompilationDatabase &CDB,
- DependencyConsumer &Consumer);
+ llvm::Error computeDependencies(StringRef WorkingDirectory,
+ const std::vector<std::string> &CommandLine,
+ DependencyConsumer &Consumer,
+ llvm::Optional<StringRef> ModuleName = None);
private:
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
std::shared_ptr<PCHContainerOperations> PCHContainerOps;
std::unique_ptr<ExcludedPreprocessorDirectiveSkipMapping> PPSkipMappings;
+ /// The physical filesystem overlaid by `InMemoryFS`.
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> RealFS;
+ /// The in-memory filesystem laid on top the physical filesystem in `RealFS`.
+ llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFS;
/// The file system that is used by each worker when scanning for
- /// dependencies. This filesystem persists accross multiple compiler
+ /// dependencies. This filesystem persists across multiple compiler
/// invocations.
llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
- /// The file manager that is reused accross multiple invocations by this
+ /// The file manager that is reused across multiple invocations by this
/// worker. If null, the file manager will not be reused.
llvm::IntrusiveRefCntPtr<FileManager> Files;
ScanningOutputFormat Format;
+ /// Whether to optimize the modules' command-line arguments.
+ bool OptimizeArgs;
};
} // end namespace dependencies
diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index a9f2b4d0c6fc..e61147d6f2b0 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -1,9 +1,8 @@
//===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -102,7 +101,7 @@ struct ModuleDeps {
bool ImportedByMainFile = false;
/// Compiler invocation that can be used to build this module (without paths).
- CompilerInvocation Invocation;
+ CompilerInvocation BuildInvocation;
/// Gets the canonical command line suitable for passing to clang.
///
@@ -142,8 +141,7 @@ class ModuleDepCollector;
/// \c DependencyConsumer of the parent \c ModuleDepCollector.
class ModuleDepCollectorPP final : public PPCallbacks {
public:
- ModuleDepCollectorPP(CompilerInstance &I, ModuleDepCollector &MDC)
- : Instance(I), MDC(MDC) {}
+ ModuleDepCollectorPP(ModuleDepCollector &MDC) : MDC(MDC) {}
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
@@ -160,8 +158,6 @@ public:
void EndOfMainFile() override;
private:
- /// The compiler instance for the current translation unit.
- CompilerInstance &Instance;
/// The parent dependency collector.
ModuleDepCollector &MDC;
/// Working set of direct modular dependencies.
@@ -173,7 +169,11 @@ private:
/// Adds direct modular dependencies that have already been built to the
/// ModuleDeps instance.
- void addDirectPrebuiltModuleDeps(const Module *M, ModuleDeps &MD);
+ void
+ addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
+ llvm::DenseSet<const Module *> &SeenSubmodules);
+ void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
+ llvm::DenseSet<const Module *> &SeenSubmodules);
/// Traverses the previously collected direct modular dependencies to discover
/// transitive modular dependencies and fills the parent \c ModuleDepCollector
@@ -190,8 +190,8 @@ private:
class ModuleDepCollector final : public DependencyCollector {
public:
ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
- CompilerInstance &I, DependencyConsumer &C,
- CompilerInvocation &&OriginalCI);
+ CompilerInstance &ScanInstance, DependencyConsumer &C,
+ CompilerInvocation &&OriginalCI, bool OptimizeArgs);
void attachToPreprocessor(Preprocessor &PP) override;
void attachToASTReader(ASTReader &R) override;
@@ -199,8 +199,8 @@ public:
private:
friend ModuleDepCollectorPP;
- /// The compiler instance for the current translation unit.
- CompilerInstance &Instance;
+ /// The compiler instance for scanning the current translation unit.
+ CompilerInstance &ScanInstance;
/// The consumer of collected dependency information.
DependencyConsumer &Consumer;
/// Path to the main source file.
@@ -216,6 +216,8 @@ private:
std::unique_ptr<DependencyOutputOptions> Opts;
/// The original Clang invocation passed to dependency scanner.
CompilerInvocation OriginalInvocation;
+ /// Whether to optimize the modules' command-line arguments.
+ bool OptimizeArgs;
/// Checks whether the module is known as being prebuilt.
bool isPrebuiltModule(const Module *M);
@@ -223,8 +225,9 @@ private:
/// Constructs a CompilerInvocation that can be used to build the given
/// module, excluding paths to discovered modular dependencies that are yet to
/// be built.
- CompilerInvocation
- makeInvocationForModuleBuildWithoutPaths(const ModuleDeps &Deps) const;
+ CompilerInvocation makeInvocationForModuleBuildWithoutPaths(
+ const ModuleDeps &Deps,
+ llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
};
} // end namespace dependencies
diff --git a/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h b/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
index 02fb2875671a..c1b7baec7ec5 100644
--- a/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
+++ b/clang/include/clang/Tooling/Inclusions/HeaderIncludes.h
@@ -84,7 +84,7 @@ private:
// An include header quoted with either <> or "".
std::string Name;
- // The range of the whole line of include directive including any eading
+ // The range of the whole line of include directive including any leading
// whitespaces and trailing comment.
tooling::Range R;
};
@@ -127,7 +127,6 @@ private:
llvm::Regex IncludeRegex;
};
-
} // namespace tooling
} // namespace clang
diff --git a/clang/include/clang/Tooling/Inclusions/IncludeStyle.h b/clang/include/clang/Tooling/Inclusions/IncludeStyle.h
index 4caaf4121f15..d54f8a402e2d 100644
--- a/clang/include/clang/Tooling/Inclusions/IncludeStyle.h
+++ b/clang/include/clang/Tooling/Inclusions/IncludeStyle.h
@@ -50,6 +50,7 @@ struct IncludeStyle {
/// Dependent on the value, multiple ``#include`` blocks can be sorted
/// as one and divided based on category.
+ /// \version 7
IncludeBlocksStyle IncludeBlocks;
/// See documentation of ``IncludeCategories``.
@@ -113,6 +114,7 @@ struct IncludeStyle {
/// Priority: 1
/// SortPriority: 0
/// \endcode
+ /// \version 7
std::vector<IncludeCategory> IncludeCategories;
/// Specify a regular expression of suffixes that are allowed in the
@@ -126,6 +128,7 @@ struct IncludeStyle {
///
/// For example, if configured to "(_test)?$", then a header a.h would be seen
/// as the "main" include in both a.cc and a_test.cc.
+ /// \version 7
std::string IncludeIsMainRegex;
/// Specify a regular expression for files being formatted
@@ -146,6 +149,7 @@ struct IncludeStyle {
/// also being respected in later phase). Without this option set,
/// ``ClassImpl.hpp`` would not have the main include file put on top
/// before any other include.
+ /// \version 7
std::string IncludeIsMainSourceRegex;
};
diff --git a/clang/include/clang/Tooling/Tooling.h b/clang/include/clang/Tooling/Tooling.h
index 73d09662562b..c9c6a2ffb7b3 100644
--- a/clang/include/clang/Tooling/Tooling.h
+++ b/clang/include/clang/Tooling/Tooling.h
@@ -115,7 +115,7 @@ public:
/// T must derive from clang::FrontendAction.
///
/// Example:
-/// FrontendActionFactory *Factory =
+/// std::unique_ptr<FrontendActionFactory> Factory =
/// newFrontendActionFactory<clang::SyntaxOnlyAction>();
template <typename T>
std::unique_ptr<FrontendActionFactory> newFrontendActionFactory();
@@ -145,7 +145,7 @@ public:
///
/// Example:
/// struct ProvidesASTConsumers {
-/// clang::ASTConsumer *newASTConsumer();
+/// std::unique_ptr<clang::ASTConsumer> newASTConsumer();
/// } Factory;
/// std::unique_ptr<FrontendActionFactory> FactoryAdapter(
/// newFrontendActionFactory(&Factory));
@@ -268,11 +268,17 @@ public:
~ToolInvocation();
- /// Set a \c DiagnosticConsumer to use during parsing.
+ /// Set a \c DiagnosticConsumer to use during driver command-line parsing and
+ /// the action invocation itself.
void setDiagnosticConsumer(DiagnosticConsumer *DiagConsumer) {
this->DiagConsumer = DiagConsumer;
}
+ /// Set a \c DiagnosticOptions to use during driver command-line parsing.
+ void setDiagnosticOptions(DiagnosticOptions *DiagOpts) {
+ this->DiagOpts = DiagOpts;
+ }
+
/// Run the clang invocation.
///
/// \returns True if there were no errors during execution.
@@ -290,6 +296,7 @@ public:
FileManager *Files;
std::shared_ptr<PCHContainerOperations> PCHContainerOps;
DiagnosticConsumer *DiagConsumer = nullptr;
+ DiagnosticOptions *DiagOpts = nullptr;
};
/// Utility to run a FrontendAction over a set of files.
diff --git a/clang/include/clang/Tooling/Transformer/RangeSelector.h b/clang/include/clang/Tooling/Transformer/RangeSelector.h
index 8ff31f7a0342..38ec24efec65 100644
--- a/clang/include/clang/Tooling/Transformer/RangeSelector.h
+++ b/clang/include/clang/Tooling/Transformer/RangeSelector.h
@@ -50,7 +50,7 @@ inline RangeSelector range(std::string BeginID, std::string EndID) {
/// Selects the (empty) range [B,B) when \p Selector selects the range [B,E).
RangeSelector before(RangeSelector Selector);
-/// Selects the the point immediately following \p Selector. That is, the
+/// Selects the point immediately following \p Selector. That is, the
/// (empty) range [E,E), when \p Selector selects either
/// * the CharRange [B,E) or
/// * the TokenRange [B,E'] where the token at E' spans the range [E',E).
diff --git a/clang/include/clang/Tooling/Transformer/Stencil.h b/clang/include/clang/Tooling/Transformer/Stencil.h
index 1b7495eb0262..249f95b7391d 100644
--- a/clang/include/clang/Tooling/Transformer/Stencil.h
+++ b/clang/include/clang/Tooling/Transformer/Stencil.h
@@ -117,6 +117,38 @@ inline Stencil ifBound(llvm::StringRef Id, llvm::StringRef TrueText,
detail::makeStencil(FalseText));
}
+/// Chooses between multiple stencils, based on the presence of bound nodes. \p
+/// CaseStencils takes a vector of (ID, \c Stencil) pairs and checks each ID in
+/// order to see if it's bound to a node. If so, the associated \c Stencil is
+/// run and all other cases are ignored. An optional \p DefaultStencil can be
+/// provided to be run if all cases are exhausted beacause none of the provided
+/// IDs are bound. If no default case is provided and all cases are exhausted,
+/// the stencil will fail with error `llvm::errc::result_out_of_range`.
+///
+/// For example, say one matches a statement's type with:
+/// anyOf(
+/// qualType(isInteger()).bind("int"),
+/// qualType(realFloatingPointType()).bind("float"),
+/// qualType(isAnyCharacter()).bind("char"),
+/// booleanType().bind("bool"))
+///
+/// Then, one can decide in a stencil how to construct a literal.
+/// cat("a = ",
+/// selectBound(
+/// {{"int", cat("0")},
+/// {"float", cat("0.0")},
+/// {"char", cat("'\\0'")},
+/// {"bool", cat("false")}}))
+///
+/// In addition, one could supply a default case for all other types:
+/// selectBound(
+/// {{"int", cat("0")},
+/// ...
+/// {"bool", cat("false")}},
+/// cat("{}"))
+Stencil selectBound(std::vector<std::pair<std::string, Stencil>> CaseStencils,
+ Stencil DefaultStencil = nullptr);
+
/// Wraps a \c MatchConsumer in a \c Stencil, so that it can be used in a \c
/// Stencil. This supports user-defined extensions to the \c Stencil language.
Stencil run(MatchConsumer<std::string> C);
diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap
index 33fcf9dc7576..e850a1cd4b9a 100644
--- a/clang/include/clang/module.modulemap
+++ b/clang/include/clang/module.modulemap
@@ -45,6 +45,7 @@ module Clang_Basic {
textual header "Basic/BuiltinsNVPTX.def"
textual header "Basic/BuiltinsPPC.def"
textual header "Basic/BuiltinsRISCV.def"
+ textual header "Basic/BuiltinsRISCVVector.def"
textual header "Basic/BuiltinsSVE.def"
textual header "Basic/BuiltinsSystemZ.def"
textual header "Basic/BuiltinsWebAssembly.def"
@@ -67,7 +68,6 @@ module Clang_Basic {
textual header "Basic/Sanitizers.def"
textual header "Basic/TargetCXXABI.def"
textual header "Basic/TokenKinds.def"
- textual header "Basic/X86Target.def"
module * { export * }
}
diff --git a/clang/lib/ARCMigrate/ARCMT.cpp b/clang/lib/ARCMigrate/ARCMT.cpp
index 36fbe90e1e3a..4851c434d765 100644
--- a/clang/lib/ARCMigrate/ARCMT.cpp
+++ b/clang/lib/ARCMigrate/ARCMT.cpp
@@ -65,7 +65,7 @@ bool CapturedDiagList::hasDiagnostic(ArrayRef<unsigned> IDs,
while (I != List.end()) {
FullSourceLoc diagLoc = I->getLocation();
if ((IDs.empty() || // empty means any diagnostic in the range.
- llvm::find(IDs, I->getID()) != IDs.end()) &&
+ llvm::is_contained(IDs, I->getID())) &&
!diagLoc.isBeforeInTranslationUnitThan(range.getBegin()) &&
(diagLoc == range.getEnd() ||
diagLoc.isBeforeInTranslationUnitThan(range.getEnd()))) {
diff --git a/clang/lib/ARCMigrate/ObjCMT.cpp b/clang/lib/ARCMigrate/ObjCMT.cpp
index c8069b51567c..3dfa9a0218a7 100644
--- a/clang/lib/ARCMigrate/ObjCMT.cpp
+++ b/clang/lib/ARCMigrate/ObjCMT.cpp
@@ -104,7 +104,7 @@ public:
bool FoundationIncluded;
llvm::SmallPtrSet<ObjCProtocolDecl *, 32> ObjCProtocolDecls;
llvm::SmallVector<const Decl *, 8> CFFunctionIBCandidates;
- llvm::StringSet<> WhiteListFilenames;
+ llvm::StringSet<> AllowListFilenames;
RetainSummaryManager &getSummaryManager(ASTContext &Ctx) {
if (!Summaries)
@@ -118,14 +118,14 @@ public:
FileRemapper &remapper, FileManager &fileMgr,
const PPConditionalDirectiveRecord *PPRec,
Preprocessor &PP, bool isOutputFile,
- ArrayRef<std::string> WhiteList)
+ ArrayRef<std::string> AllowList)
: MigrateDir(migrateDir), ASTMigrateActions(astMigrateActions),
NSIntegerTypedefed(nullptr), NSUIntegerTypedefed(nullptr),
Remapper(remapper), FileMgr(fileMgr), PPRec(PPRec), PP(PP),
IsOutputFile(isOutputFile), FoundationIncluded(false) {
// FIXME: StringSet should have insert(iter, iter) to use here.
- for (const std::string &Val : WhiteList)
- WhiteListFilenames.insert(Val);
+ for (const std::string &Val : AllowList)
+ AllowListFilenames.insert(Val);
}
protected:
@@ -151,10 +151,10 @@ protected:
void HandleTranslationUnit(ASTContext &Ctx) override;
bool canModifyFile(StringRef Path) {
- if (WhiteListFilenames.empty())
+ if (AllowListFilenames.empty())
return true;
- return WhiteListFilenames.find(llvm::sys::path::filename(Path))
- != WhiteListFilenames.end();
+ return AllowListFilenames.find(llvm::sys::path::filename(Path)) !=
+ AllowListFilenames.end();
}
bool canModifyFile(Optional<FileEntryRef> FE) {
if (!FE)
@@ -487,9 +487,8 @@ static void rewriteToObjCProperty(const ObjCMethodDecl *Getter,
// Short circuit 'delegate' properties that contain the name "delegate" or
// "dataSource", or have exact name "target" to have 'assign' attribute.
- if (PropertyName.equals("target") ||
- (PropertyName.find("delegate") != StringRef::npos) ||
- (PropertyName.find("dataSource") != StringRef::npos)) {
+ if (PropertyName.equals("target") || PropertyName.contains("delegate") ||
+ PropertyName.contains("dataSource")) {
QualType QT = Getter->getReturnType();
if (!QT->isRealType())
append_attr(PropertyString, "assign", LParenAdded);
@@ -1144,7 +1143,7 @@ static bool AttributesMatch(const Decl *Decl1, const Decl *Decl2,
static bool IsValidIdentifier(ASTContext &Ctx,
const char *Name) {
- if (!isIdentifierHead(Name[0]))
+ if (!isAsciiIdentifierStart(Name[0]))
return false;
std::string NameString = Name;
NameString[0] = toLowercase(NameString[0]);
@@ -1987,7 +1986,7 @@ bool MigrateSourceAction::BeginInvocation(CompilerInstance &CI) {
return true;
}
-static std::vector<std::string> getWhiteListFilenames(StringRef DirPath) {
+static std::vector<std::string> getAllowListFilenames(StringRef DirPath) {
using namespace llvm::sys::fs;
using namespace llvm::sys::path;
@@ -2018,16 +2017,16 @@ MigrateSourceAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
if (ObjCMTOpts == FrontendOptions::ObjCMT_None) {
// If no specific option was given, enable literals+subscripting transforms
// by default.
- ObjCMTAction |= FrontendOptions::ObjCMT_Literals |
- FrontendOptions::ObjCMT_Subscripting;
+ ObjCMTAction |=
+ FrontendOptions::ObjCMT_Literals | FrontendOptions::ObjCMT_Subscripting;
}
CI.getPreprocessor().addPPCallbacks(std::unique_ptr<PPCallbacks>(PPRec));
- std::vector<std::string> WhiteList =
- getWhiteListFilenames(CI.getFrontendOpts().ObjCMTWhiteListPath);
+ std::vector<std::string> AllowList =
+ getAllowListFilenames(CI.getFrontendOpts().ObjCMTAllowListPath);
return std::make_unique<ObjCMigrateASTConsumer>(
CI.getFrontendOpts().OutputFile, ObjCMTAction, Remapper,
CI.getFileManager(), PPRec, CI.getPreprocessor(),
- /*isOutputFile=*/true, WhiteList);
+ /*isOutputFile=*/true, AllowList);
}
namespace {
diff --git a/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp b/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
index e767ad5346c3..40220a2eef49 100644
--- a/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
+++ b/clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
@@ -146,9 +146,8 @@ private:
ento::cocoa::isRefType(E->getSubExpr()->getType(), "CF",
FD->getIdentifier()->getName())) {
StringRef fname = FD->getIdentifier()->getName();
- if (fname.endswith("Retain") ||
- fname.find("Create") != StringRef::npos ||
- fname.find("Copy") != StringRef::npos) {
+ if (fname.endswith("Retain") || fname.contains("Create") ||
+ fname.contains("Copy")) {
// Do not migrate to couple of bridge transfer casts which
// cancel each other out. Leave it unchanged so error gets user
// attention instead.
@@ -168,7 +167,7 @@ private:
return;
}
- if (fname.find("Get") != StringRef::npos) {
+ if (fname.contains("Get")) {
castToObjCObject(E, /*retained=*/false);
return;
}
@@ -253,7 +252,8 @@ private:
SourceManager &SM = Pass.Ctx.getSourceManager();
char PrevChar = *SM.getCharacterData(InsertLoc.getLocWithOffset(-1));
- if (Lexer::isIdentifierBodyChar(PrevChar, Pass.Ctx.getLangOpts()))
+ if (Lexer::isAsciiIdentifierContinueChar(PrevChar,
+ Pass.Ctx.getLangOpts()))
BridgeCall += ' ';
if (Kind == OBC_BridgeTransfer)
diff --git a/clang/lib/ARCMigrate/Transforms.cpp b/clang/lib/ARCMigrate/Transforms.cpp
index e274a540e408..ca48160d9c85 100644
--- a/clang/lib/ARCMigrate/Transforms.cpp
+++ b/clang/lib/ARCMigrate/Transforms.cpp
@@ -95,11 +95,9 @@ bool trans::isPlusOne(const Expr *E) {
ento::cocoa::isRefType(callE->getType(), "CF",
FD->getIdentifier()->getName())) {
StringRef fname = FD->getIdentifier()->getName();
- if (fname.endswith("Retain") ||
- fname.find("Create") != StringRef::npos ||
- fname.find("Copy") != StringRef::npos) {
+ if (fname.endswith("Retain") || fname.contains("Create") ||
+ fname.contains("Copy"))
return true;
- }
}
}
}
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index 9a9233bc1ea7..ef333c771166 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -700,7 +700,9 @@ void APValue::printPretty(raw_ostream &Out, const PrintingPolicy &Policy,
if (!hasLValuePath()) {
// No lvalue path: just print the offset.
CharUnits O = getLValueOffset();
- CharUnits S = Ctx ? Ctx->getTypeSizeInChars(InnerTy) : CharUnits::Zero();
+ CharUnits S = Ctx ? Ctx->getTypeSizeInCharsIfKnown(InnerTy).getValueOr(
+ CharUnits::Zero())
+ : CharUnits::Zero();
if (!O.isZero()) {
if (IsReference)
Out << "*(";
diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp
index 549088ad4a8a..18582782888c 100644
--- a/clang/lib/AST/ASTConcept.cpp
+++ b/clang/lib/AST/ASTConcept.cpp
@@ -1,9 +1,8 @@
//===--- ASTConcept.cpp - Concepts Related AST Data Structures --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e102a3ba508d..f0b931bdc905 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -101,7 +101,14 @@
using namespace clang;
enum FloatingRank {
- BFloat16Rank, Float16Rank, HalfRank, FloatRank, DoubleRank, LongDoubleRank, Float128Rank
+ BFloat16Rank,
+ Float16Rank,
+ HalfRank,
+ FloatRank,
+ DoubleRank,
+ LongDoubleRank,
+ Float128Rank,
+ Ibm128Rank
};
/// \returns location that is relevant when searching for Doc comments related
@@ -172,29 +179,28 @@ static SourceLocation getDeclLocForCommentSearch(const Decl *D,
// Allow association with Y across {} in `typedef struct X {} Y`.
isa<TypedefDecl>(D))
return D->getBeginLoc();
- else {
- const SourceLocation DeclLoc = D->getLocation();
- if (DeclLoc.isMacroID()) {
- if (isa<TypedefDecl>(D)) {
- // If location of the typedef name is in a macro, it is because being
- // declared via a macro. Try using declaration's starting location as
- // the "declaration location".
- return D->getBeginLoc();
- } else if (const auto *TD = dyn_cast<TagDecl>(D)) {
- // If location of the tag decl is inside a macro, but the spelling of
- // the tag name comes from a macro argument, it looks like a special
- // macro like NS_ENUM is being used to define the tag decl. In that
- // case, adjust the source location to the expansion loc so that we can
- // attach the comment to the tag decl.
- if (SourceMgr.isMacroArgExpansion(DeclLoc) &&
- TD->isCompleteDefinition())
- return SourceMgr.getExpansionLoc(DeclLoc);
- }
+
+ const SourceLocation DeclLoc = D->getLocation();
+ if (DeclLoc.isMacroID()) {
+ if (isa<TypedefDecl>(D)) {
+ // If location of the typedef name is in a macro, it is because being
+ // declared via a macro. Try using declaration's starting location as
+ // the "declaration location".
+ return D->getBeginLoc();
+ }
+
+ if (const auto *TD = dyn_cast<TagDecl>(D)) {
+ // If location of the tag decl is inside a macro, but the spelling of
+ // the tag name comes from a macro argument, it looks like a special
+ // macro like NS_ENUM is being used to define the tag decl. In that
+ // case, adjust the source location to the expansion loc so that we can
+ // attach the comment to the tag decl.
+ if (SourceMgr.isMacroArgExpansion(DeclLoc) && TD->isCompleteDefinition())
+ return SourceMgr.getExpansionLoc(DeclLoc);
}
- return DeclLoc;
}
- return {};
+ return DeclLoc;
}
RawComment *ASTContext::getRawCommentForDeclNoCacheImpl(
@@ -984,7 +990,7 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
addTranslationUnitDecl();
}
-ASTContext::~ASTContext() {
+void ASTContext::cleanup() {
// Release the DenseMaps associated with DeclContext objects.
// FIXME: Is this the ideal solution?
ReleaseDeclContextMaps();
@@ -992,6 +998,7 @@ ASTContext::~ASTContext() {
// Call all of the deallocation functions on all of their targets.
for (auto &Pair : Deallocations)
(Pair.first)(Pair.second);
+ Deallocations.clear();
// ASTRecordLayout objects in ASTRecordLayouts must always be destroyed
// because they can contain DenseMaps.
@@ -1001,6 +1008,7 @@ ASTContext::~ASTContext() {
// Increment in loop to prevent using deallocated memory.
if (auto *R = const_cast<ASTRecordLayout *>((I++)->second))
R->Destroy(*this);
+ ObjCLayouts.clear();
for (llvm::DenseMap<const RecordDecl*, const ASTRecordLayout*>::iterator
I = ASTRecordLayouts.begin(), E = ASTRecordLayouts.end(); I != E; ) {
@@ -1008,16 +1016,21 @@ ASTContext::~ASTContext() {
if (auto *R = const_cast<ASTRecordLayout *>((I++)->second))
R->Destroy(*this);
}
+ ASTRecordLayouts.clear();
for (llvm::DenseMap<const Decl*, AttrVec*>::iterator A = DeclAttrs.begin(),
AEnd = DeclAttrs.end();
A != AEnd; ++A)
A->second->~AttrVec();
+ DeclAttrs.clear();
for (const auto &Value : ModuleInitializers)
Value.second->~PerModuleInitializers();
+ ModuleInitializers.clear();
}
+ASTContext::~ASTContext() { cleanup(); }
+
void ASTContext::setTraversalScope(const std::vector<Decl *> &TopLevelDecls) {
TraversalScope = TopLevelDecls;
getParentMapContext().clear();
@@ -1112,7 +1125,7 @@ void ASTContext::deduplicateMergedDefinitonsFor(NamedDecl *ND) {
for (Module *&M : Merged)
if (!Found.insert(M).second)
M = nullptr;
- Merged.erase(std::remove(Merged.begin(), Merged.end(), nullptr), Merged.end());
+ llvm::erase_value(Merged, nullptr);
}
ArrayRef<Module *>
@@ -1308,6 +1321,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
// GNU extension, __float128 for IEEE quadruple precision
InitBuiltinType(Float128Ty, BuiltinType::Float128);
+ // __ibm128 for IBM extended precision
+ InitBuiltinType(Ibm128Ty, BuiltinType::Ibm128);
+
// C11 extension ISO/IEC TS 18661-3
InitBuiltinType(Float16Ty, BuiltinType::Float16);
@@ -1402,12 +1418,6 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
if (LangOpts.MatrixTypes)
InitBuiltinType(IncompleteMatrixIdxTy, BuiltinType::IncompleteMatrixIdx);
- // C99 6.2.5p11.
- FloatComplexTy = getComplexType(FloatTy);
- DoubleComplexTy = getComplexType(DoubleTy);
- LongDoubleComplexTy = getComplexType(LongDoubleTy);
- Float128ComplexTy = getComplexType(Float128Ty);
-
// Builtin types for 'id', 'Class', and 'SEL'.
InitBuiltinType(ObjCBuiltinIdTy, BuiltinType::ObjCId);
InitBuiltinType(ObjCBuiltinClassTy, BuiltinType::ObjCClass);
@@ -1435,13 +1445,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
#include "clang/Basic/AArch64SVEACLETypes.def"
}
- if (Target.getTriple().isPPC64() &&
- Target.hasFeature("paired-vector-memops")) {
- if (Target.hasFeature("mma")) {
+ if (Target.getTriple().isPPC64()) {
#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/PPCTypes.def"
- }
#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
InitBuiltinType(Id##Ty, BuiltinType::Id);
#include "clang/Basic/PPCTypes.def"
@@ -1704,6 +1711,8 @@ const llvm::fltSemantics &ASTContext::getFloatTypeSemantics(QualType T) const {
return Target->getHalfFormat();
case BuiltinType::Float: return Target->getFloatFormat();
case BuiltinType::Double: return Target->getDoubleFormat();
+ case BuiltinType::Ibm128:
+ return Target->getIbm128Format();
case BuiltinType::LongDouble:
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice)
return AuxTarget->getLongDoubleFormat();
@@ -1859,7 +1868,7 @@ static getConstantArrayInfoInChars(const ASTContext &Context,
Width = llvm::alignTo(Width, Align);
return TypeInfoChars(CharUnits::fromQuantity(Width),
CharUnits::fromQuantity(Align),
- EltInfo.AlignIsRequired);
+ EltInfo.AlignRequirement);
}
TypeInfoChars ASTContext::getTypeInfoInChars(const Type *T) const {
@@ -1867,8 +1876,7 @@ TypeInfoChars ASTContext::getTypeInfoInChars(const Type *T) const {
return getConstantArrayInfoInChars(*this, CAT);
TypeInfo Info = getTypeInfo(T);
return TypeInfoChars(toCharUnitsFromBits(Info.Width),
- toCharUnitsFromBits(Info.Align),
- Info.AlignIsRequired);
+ toCharUnitsFromBits(Info.Align), Info.AlignRequirement);
}
TypeInfoChars ASTContext::getTypeInfoInChars(QualType T) const {
@@ -1876,7 +1884,7 @@ TypeInfoChars ASTContext::getTypeInfoInChars(QualType T) const {
}
bool ASTContext::isAlignmentRequired(const Type *T) const {
- return getTypeInfo(T).AlignIsRequired;
+ return getTypeInfo(T).AlignRequirement != AlignRequirementKind::None;
}
bool ASTContext::isAlignmentRequired(QualType T) const {
@@ -1928,7 +1936,7 @@ TypeInfo ASTContext::getTypeInfo(const Type *T) const {
TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
uint64_t Width = 0;
unsigned Align = 8;
- bool AlignIsRequired = false;
+ AlignRequirementKind AlignRequirement = AlignRequirementKind::None;
unsigned AS = 0;
switch (T->getTypeClass()) {
#define TYPE(Class, Base)
@@ -1962,7 +1970,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
"Overflow in array type bit size evaluation");
Width = EltInfo.Width * Size;
Align = EltInfo.Align;
- AlignIsRequired = EltInfo.AlignIsRequired;
+ AlignRequirement = EltInfo.AlignRequirement;
if (!getTargetInfo().getCXXABI().isMicrosoft() ||
getTargetInfo().getPointerWidth(0) == 64)
Width = llvm::alignTo(Width, Align);
@@ -2131,6 +2139,10 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
Width = Target->getDoubleWidth();
Align = Target->getDoubleAlign();
break;
+ case BuiltinType::Ibm128:
+ Width = Target->getIbm128Width();
+ Align = Target->getIbm128Align();
+ break;
case BuiltinType::LongDouble:
if (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice &&
(Target->getLongDoubleWidth() != AuxTarget->getLongDoubleWidth() ||
@@ -2299,7 +2311,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
getTypeInfo(ED->getIntegerType()->getUnqualifiedDesugaredType());
if (unsigned AttrAlign = ED->getMaxAlignment()) {
Info.Align = AttrAlign;
- Info.AlignIsRequired = true;
+ Info.AlignRequirement = AlignRequirementKind::RequiredByEnum;
}
return Info;
}
@@ -2309,7 +2321,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
const ASTRecordLayout &Layout = getASTRecordLayout(RD);
Width = toBits(Layout.getSize());
Align = toBits(Layout.getAlignment());
- AlignIsRequired = RD->hasAttr<AlignedAttr>();
+ AlignRequirement = RD->hasAttr<AlignedAttr>()
+ ? AlignRequirementKind::RequiredByRecord
+ : AlignRequirementKind::None;
break;
}
@@ -2343,10 +2357,10 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
// attribute(aligned) can only round up) but matches its implementation.
if (unsigned AttrAlign = Typedef->getMaxAlignment()) {
Align = AttrAlign;
- AlignIsRequired = true;
+ AlignRequirement = AlignRequirementKind::RequiredByTypedef;
} else {
Align = Info.Align;
- AlignIsRequired = Info.AlignIsRequired;
+ AlignRequirement = Info.AlignRequirement;
}
Width = Info.Width;
break;
@@ -2392,7 +2406,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
}
assert(llvm::isPowerOf2_32(Align) && "Alignment must be power of 2");
- return TypeInfo(Width, Align, AlignIsRequired);
+ return TypeInfo(Width, Align, AlignRequirement);
}
unsigned ASTContext::getTypeUnadjustedAlign(const Type *T) const {
@@ -2478,11 +2492,18 @@ unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
return ABIAlign;
if (const auto *RT = T->getAs<RecordType>()) {
- if (TI.AlignIsRequired || RT->getDecl()->isInvalidDecl())
+ const RecordDecl *RD = RT->getDecl();
+
+ // When used as part of a typedef, or together with a 'packed' attribute,
+ // the 'aligned' attribute can be used to decrease alignment. Note that the
+ // 'packed' case is already taken into consideration when computing the
+ // alignment, we only need to handle the typedef case here.
+ if (TI.AlignRequirement == AlignRequirementKind::RequiredByTypedef ||
+ RD->isInvalidDecl())
return ABIAlign;
unsigned PreferredAlign = static_cast<unsigned>(
- toBits(getASTRecordLayout(RT->getDecl()).PreferredAlignment));
+ toBits(getASTRecordLayout(RD).PreferredAlignment));
assert(PreferredAlign >= ABIAlign &&
"PreferredAlign should be at least as large as ABIAlign.");
return PreferredAlign;
@@ -2502,7 +2523,7 @@ unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
Target->defaultsToAIXPowerAlignment()))
// Don't increase the alignment if an alignment attribute was specified on a
// typedef declaration.
- if (!TI.AlignIsRequired)
+ if (!TI.isAlignRequired())
return std::max(ABIAlign, (unsigned)getTypeSize(T));
return ABIAlign;
@@ -2629,16 +2650,66 @@ static bool unionHasUniqueObjectRepresentations(const ASTContext &Context,
return !RD->field_empty();
}
-static bool isStructEmpty(QualType Ty) {
- const RecordDecl *RD = Ty->castAs<RecordType>()->getDecl();
+static int64_t getSubobjectOffset(const FieldDecl *Field,
+ const ASTContext &Context,
+ const clang::ASTRecordLayout & /*Layout*/) {
+ return Context.getFieldOffset(Field);
+}
- if (!RD->field_empty())
- return false;
+static int64_t getSubobjectOffset(const CXXRecordDecl *RD,
+ const ASTContext &Context,
+ const clang::ASTRecordLayout &Layout) {
+ return Context.toBits(Layout.getBaseClassOffset(RD));
+}
- if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RD))
- return ClassDecl->isEmpty();
+static llvm::Optional<int64_t>
+structHasUniqueObjectRepresentations(const ASTContext &Context,
+ const RecordDecl *RD);
- return true;
+static llvm::Optional<int64_t>
+getSubobjectSizeInBits(const FieldDecl *Field, const ASTContext &Context) {
+ if (Field->getType()->isRecordType()) {
+ const RecordDecl *RD = Field->getType()->getAsRecordDecl();
+ if (!RD->isUnion())
+ return structHasUniqueObjectRepresentations(Context, RD);
+ }
+ if (!Field->getType()->isReferenceType() &&
+ !Context.hasUniqueObjectRepresentations(Field->getType()))
+ return llvm::None;
+
+ int64_t FieldSizeInBits =
+ Context.toBits(Context.getTypeSizeInChars(Field->getType()));
+ if (Field->isBitField()) {
+ int64_t BitfieldSize = Field->getBitWidthValue(Context);
+ if (BitfieldSize > FieldSizeInBits)
+ return llvm::None;
+ FieldSizeInBits = BitfieldSize;
+ }
+ return FieldSizeInBits;
+}
+
+static llvm::Optional<int64_t>
+getSubobjectSizeInBits(const CXXRecordDecl *RD, const ASTContext &Context) {
+ return structHasUniqueObjectRepresentations(Context, RD);
+}
+
+template <typename RangeT>
+static llvm::Optional<int64_t> structSubobjectsHaveUniqueObjectRepresentations(
+ const RangeT &Subobjects, int64_t CurOffsetInBits,
+ const ASTContext &Context, const clang::ASTRecordLayout &Layout) {
+ for (const auto *Subobject : Subobjects) {
+ llvm::Optional<int64_t> SizeInBits =
+ getSubobjectSizeInBits(Subobject, Context);
+ if (!SizeInBits)
+ return llvm::None;
+ if (*SizeInBits != 0) {
+ int64_t Offset = getSubobjectOffset(Subobject, Context, Layout);
+ if (Offset != CurOffsetInBits)
+ return llvm::None;
+ CurOffsetInBits += *SizeInBits;
+ }
+ }
+ return CurOffsetInBits;
}
static llvm::Optional<int64_t>
@@ -2652,58 +2723,32 @@ structHasUniqueObjectRepresentations(const ASTContext &Context,
if (ClassDecl->isDynamicClass())
return llvm::None;
- SmallVector<std::pair<QualType, int64_t>, 4> Bases;
+ SmallVector<CXXRecordDecl *, 4> Bases;
for (const auto &Base : ClassDecl->bases()) {
// Empty types can be inherited from, and non-empty types can potentially
// have tail padding, so just make sure there isn't an error.
- if (!isStructEmpty(Base.getType())) {
- llvm::Optional<int64_t> Size = structHasUniqueObjectRepresentations(
- Context, Base.getType()->castAs<RecordType>()->getDecl());
- if (!Size)
- return llvm::None;
- Bases.emplace_back(Base.getType(), Size.getValue());
- }
+ Bases.emplace_back(Base.getType()->getAsCXXRecordDecl());
}
- llvm::sort(Bases, [&](const std::pair<QualType, int64_t> &L,
- const std::pair<QualType, int64_t> &R) {
- return Layout.getBaseClassOffset(L.first->getAsCXXRecordDecl()) <
- Layout.getBaseClassOffset(R.first->getAsCXXRecordDecl());
+ llvm::sort(Bases, [&](const CXXRecordDecl *L, const CXXRecordDecl *R) {
+ return Layout.getBaseClassOffset(L) < Layout.getBaseClassOffset(R);
});
- for (const auto &Base : Bases) {
- int64_t BaseOffset = Context.toBits(
- Layout.getBaseClassOffset(Base.first->getAsCXXRecordDecl()));
- int64_t BaseSize = Base.second;
- if (BaseOffset != CurOffsetInBits)
- return llvm::None;
- CurOffsetInBits = BaseOffset + BaseSize;
- }
- }
-
- for (const auto *Field : RD->fields()) {
- if (!Field->getType()->isReferenceType() &&
- !Context.hasUniqueObjectRepresentations(Field->getType()))
- return llvm::None;
-
- int64_t FieldSizeInBits =
- Context.toBits(Context.getTypeSizeInChars(Field->getType()));
- if (Field->isBitField()) {
- int64_t BitfieldSize = Field->getBitWidthValue(Context);
-
- if (BitfieldSize > FieldSizeInBits)
- return llvm::None;
- FieldSizeInBits = BitfieldSize;
- }
-
- int64_t FieldOffsetInBits = Context.getFieldOffset(Field);
-
- if (FieldOffsetInBits != CurOffsetInBits)
+ llvm::Optional<int64_t> OffsetAfterBases =
+ structSubobjectsHaveUniqueObjectRepresentations(Bases, CurOffsetInBits,
+ Context, Layout);
+ if (!OffsetAfterBases)
return llvm::None;
-
- CurOffsetInBits = FieldSizeInBits + FieldOffsetInBits;
+ CurOffsetInBits = *OffsetAfterBases;
}
+ llvm::Optional<int64_t> OffsetAfterFields =
+ structSubobjectsHaveUniqueObjectRepresentations(
+ RD->fields(), CurOffsetInBits, Context, Layout);
+ if (!OffsetAfterFields)
+ return llvm::None;
+ CurOffsetInBits = *OffsetAfterFields;
+
return CurOffsetInBits;
}
@@ -4770,6 +4815,23 @@ ASTContext::getTemplateSpecializationType(TemplateName Template,
return QualType(Spec, 0);
}
+static bool
+getCanonicalTemplateArguments(const ASTContext &C,
+ ArrayRef<TemplateArgument> OrigArgs,
+ SmallVectorImpl<TemplateArgument> &CanonArgs) {
+ bool AnyNonCanonArgs = false;
+ unsigned NumArgs = OrigArgs.size();
+ CanonArgs.resize(NumArgs);
+ for (unsigned I = 0; I != NumArgs; ++I) {
+ const TemplateArgument &OrigArg = OrigArgs[I];
+ TemplateArgument &CanonArg = CanonArgs[I];
+ CanonArg = C.getCanonicalTemplateArgument(OrigArg);
+ if (!CanonArg.structurallyEquals(OrigArg))
+ AnyNonCanonArgs = true;
+ }
+ return AnyNonCanonArgs;
+}
+
QualType ASTContext::getCanonicalTemplateSpecializationType(
TemplateName Template, ArrayRef<TemplateArgument> Args) const {
assert(!Template.getAsDependentTemplateName() &&
@@ -4782,10 +4844,7 @@ QualType ASTContext::getCanonicalTemplateSpecializationType(
// Build the canonical template specialization type.
TemplateName CanonTemplate = getCanonicalTemplateName(Template);
SmallVector<TemplateArgument, 4> CanonArgs;
- unsigned NumArgs = Args.size();
- CanonArgs.reserve(NumArgs);
- for (const TemplateArgument &Arg : Args)
- CanonArgs.push_back(getCanonicalTemplateArgument(Arg));
+ ::getCanonicalTemplateArguments(*this, Args, CanonArgs);
// Determine whether this canonical template specialization type already
// exists.
@@ -4800,7 +4859,7 @@ QualType ASTContext::getCanonicalTemplateSpecializationType(
if (!Spec) {
// Allocate a new canonical template specialization type.
void *Mem = Allocate((sizeof(TemplateSpecializationType) +
- sizeof(TemplateArgument) * NumArgs),
+ sizeof(TemplateArgument) * CanonArgs.size()),
TypeAlignment);
Spec = new (Mem) TemplateSpecializationType(CanonTemplate,
CanonArgs,
@@ -4942,14 +5001,9 @@ ASTContext::getDependentTemplateSpecializationType(
ElaboratedTypeKeyword CanonKeyword = Keyword;
if (Keyword == ETK_None) CanonKeyword = ETK_Typename;
- bool AnyNonCanonArgs = false;
- unsigned NumArgs = Args.size();
- SmallVector<TemplateArgument, 16> CanonArgs(NumArgs);
- for (unsigned I = 0; I != NumArgs; ++I) {
- CanonArgs[I] = getCanonicalTemplateArgument(Args[I]);
- if (!CanonArgs[I].structurallyEquals(Args[I]))
- AnyNonCanonArgs = true;
- }
+ SmallVector<TemplateArgument, 16> CanonArgs;
+ bool AnyNonCanonArgs =
+ ::getCanonicalTemplateArguments(*this, Args, CanonArgs);
QualType Canon;
if (AnyNonCanonArgs || CanonNNS != NNS || CanonKeyword != Keyword) {
@@ -4962,7 +5016,7 @@ ASTContext::getDependentTemplateSpecializationType(
}
void *Mem = Allocate((sizeof(DependentTemplateSpecializationType) +
- sizeof(TemplateArgument) * NumArgs),
+ sizeof(TemplateArgument) * Args.size()),
TypeAlignment);
T = new (Mem) DependentTemplateSpecializationType(Keyword, NNS,
Name, Args, Canon);
@@ -5123,11 +5177,8 @@ QualType ASTContext::getObjCObjectType(
// sorted-and-uniqued list of protocols and the type arguments
// canonicalized.
QualType canonical;
- bool typeArgsAreCanonical = std::all_of(effectiveTypeArgs.begin(),
- effectiveTypeArgs.end(),
- [&](QualType type) {
- return type.isCanonical();
- });
+ bool typeArgsAreCanonical = llvm::all_of(
+ effectiveTypeArgs, [&](QualType type) { return type.isCanonical(); });
bool protocolsSorted = areSortedAndUniqued(protocols);
if (!typeArgsAreCanonical || !protocolsSorted || !baseType.isCanonical()) {
// Determine the canonical type arguments.
@@ -5454,6 +5505,29 @@ QualType ASTContext::getTypeOfType(QualType tofType) const {
return QualType(tot, 0);
}
+/// getReferenceQualifiedType - Given an expr, will return the type for
+/// that expression, as in [dcl.type.simple]p4 but without taking id-expressions
+/// and class member access into account.
+QualType ASTContext::getReferenceQualifiedType(const Expr *E) const {
+ // C++11 [dcl.type.simple]p4:
+ // [...]
+ QualType T = E->getType();
+ switch (E->getValueKind()) {
+ // - otherwise, if e is an xvalue, decltype(e) is T&&, where T is the
+ // type of e;
+ case VK_XValue:
+ return getRValueReferenceType(T);
+ // - otherwise, if e is an lvalue, decltype(e) is T&, where T is the
+ // type of e;
+ case VK_LValue:
+ return getLValueReferenceType(T);
+ // - otherwise, decltype(e) is the type of e.
+ case VK_PRValue:
+ return T;
+ }
+ llvm_unreachable("Unknown value kind");
+}
+
/// Unlike many "get<Type>" functions, we don't unique DecltypeType
/// nodes. This would never be helpful, since each such type has its own
/// expression, and would not give a significant memory saving, since there
@@ -5524,15 +5598,10 @@ QualType ASTContext::getUnaryTransformType(QualType BaseType,
return QualType(ut, 0);
}
-/// getAutoType - Return the uniqued reference to the 'auto' type which has been
-/// deduced to the given type, or to the canonical undeduced 'auto' type, or the
-/// canonical deduced-but-dependent 'auto' type.
-QualType
-ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
- bool IsDependent, bool IsPack,
- ConceptDecl *TypeConstraintConcept,
- ArrayRef<TemplateArgument> TypeConstraintArgs) const {
- assert((!IsPack || IsDependent) && "only use IsPack for a dependent pack");
+QualType ASTContext::getAutoTypeInternal(
+ QualType DeducedType, AutoTypeKeyword Keyword, bool IsDependent,
+ bool IsPack, ConceptDecl *TypeConstraintConcept,
+ ArrayRef<TemplateArgument> TypeConstraintArgs, bool IsCanon) const {
if (DeducedType.isNull() && Keyword == AutoTypeKeyword::Auto &&
!TypeConstraintConcept && !IsDependent)
return getAutoDeductType();
@@ -5545,21 +5614,52 @@ ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
if (AutoType *AT = AutoTypes.FindNodeOrInsertPos(ID, InsertPos))
return QualType(AT, 0);
+ QualType Canon;
+ if (!IsCanon) {
+ if (DeducedType.isNull()) {
+ SmallVector<TemplateArgument, 4> CanonArgs;
+ bool AnyNonCanonArgs =
+ ::getCanonicalTemplateArguments(*this, TypeConstraintArgs, CanonArgs);
+ if (AnyNonCanonArgs) {
+ Canon = getAutoTypeInternal(QualType(), Keyword, IsDependent, IsPack,
+ TypeConstraintConcept, CanonArgs, true);
+ // Find the insert position again.
+ AutoTypes.FindNodeOrInsertPos(ID, InsertPos);
+ }
+ } else {
+ Canon = DeducedType.getCanonicalType();
+ }
+ }
+
void *Mem = Allocate(sizeof(AutoType) +
- sizeof(TemplateArgument) * TypeConstraintArgs.size(),
+ sizeof(TemplateArgument) * TypeConstraintArgs.size(),
TypeAlignment);
auto *AT = new (Mem) AutoType(
DeducedType, Keyword,
(IsDependent ? TypeDependence::DependentInstantiation
: TypeDependence::None) |
(IsPack ? TypeDependence::UnexpandedPack : TypeDependence::None),
- TypeConstraintConcept, TypeConstraintArgs);
+ Canon, TypeConstraintConcept, TypeConstraintArgs);
Types.push_back(AT);
- if (InsertPos)
- AutoTypes.InsertNode(AT, InsertPos);
+ AutoTypes.InsertNode(AT, InsertPos);
return QualType(AT, 0);
}
+/// getAutoType - Return the uniqued reference to the 'auto' type which has been
+/// deduced to the given type, or to the canonical undeduced 'auto' type, or the
+/// canonical deduced-but-dependent 'auto' type.
+QualType
+ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
+ bool IsDependent, bool IsPack,
+ ConceptDecl *TypeConstraintConcept,
+ ArrayRef<TemplateArgument> TypeConstraintArgs) const {
+ assert((!IsPack || IsDependent) && "only use IsPack for a dependent pack");
+ assert((!IsDependent || DeducedType.isNull()) &&
+ "A dependent auto should be undeduced");
+ return getAutoTypeInternal(DeducedType, Keyword, IsDependent, IsPack,
+ TypeConstraintConcept, TypeConstraintArgs);
+}
+
/// Return the uniqued reference to the deduced template specialization type
/// which has been deduced to the given type, or to the canonical undeduced
/// such type, or the canonical deduced-but-dependent such type.
@@ -5577,8 +5677,7 @@ QualType ASTContext::getDeducedTemplateSpecializationType(
auto *DTST = new (*this, TypeAlignment)
DeducedTemplateSpecializationType(Template, DeducedType, IsDependent);
Types.push_back(DTST);
- if (InsertPos)
- DeducedTemplateSpecializationTypes.InsertNode(DTST, InsertPos);
+ DeducedTemplateSpecializationTypes.InsertNode(DTST, InsertPos);
return QualType(DTST, 0);
}
@@ -5615,7 +5714,7 @@ QualType ASTContext::getAutoDeductType() const {
if (AutoDeductTy.isNull())
AutoDeductTy = QualType(new (*this, TypeAlignment)
AutoType(QualType(), AutoTypeKeyword::Auto,
- TypeDependence::None,
+ TypeDependence::None, QualType(),
/*concept*/ nullptr, /*args*/ {}),
0);
return AutoDeductTy;
@@ -5783,7 +5882,11 @@ QualType ASTContext::getUnqualifiedArrayType(QualType type,
/// Attempt to unwrap two types that may both be array types with the same bound
/// (or both be array types of unknown bound) for the purpose of comparing the
/// cv-decomposition of two types per C++ [conv.qual].
-void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2) {
+///
+/// \param AllowPiMismatch Allow the Pi1 and Pi2 to differ as described in
+/// C++20 [conv.qual], if permitted by the current language mode.
+void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2,
+ bool AllowPiMismatch) {
while (true) {
auto *AT1 = getAsArrayType(T1);
if (!AT1)
@@ -5795,12 +5898,21 @@ void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2) {
// If we don't have two array types with the same constant bound nor two
// incomplete array types, we've unwrapped everything we can.
+ // C++20 also permits one type to be a constant array type and the other
+ // to be an incomplete array type.
+ // FIXME: Consider also unwrapping array of unknown bound and VLA.
if (auto *CAT1 = dyn_cast<ConstantArrayType>(AT1)) {
auto *CAT2 = dyn_cast<ConstantArrayType>(AT2);
- if (!CAT2 || CAT1->getSize() != CAT2->getSize())
+ if (!((CAT2 && CAT1->getSize() == CAT2->getSize()) ||
+ (AllowPiMismatch && getLangOpts().CPlusPlus20 &&
+ isa<IncompleteArrayType>(AT2))))
+ return;
+ } else if (isa<IncompleteArrayType>(AT1)) {
+ if (!(isa<IncompleteArrayType>(AT2) ||
+ (AllowPiMismatch && getLangOpts().CPlusPlus20 &&
+ isa<ConstantArrayType>(AT2))))
return;
- } else if (!isa<IncompleteArrayType>(AT1) ||
- !isa<IncompleteArrayType>(AT2)) {
+ } else {
return;
}
@@ -5819,10 +5931,14 @@ void ASTContext::UnwrapSimilarArrayTypes(QualType &T1, QualType &T2) {
/// "unwraps" pointer and pointer-to-member types to compare them at each
/// level.
///
+/// \param AllowPiMismatch Allow the Pi1 and Pi2 to differ as described in
+/// C++20 [conv.qual], if permitted by the current language mode.
+///
/// \return \c true if a pointer type was unwrapped, \c false if we reached a
/// pair of types that can't be unwrapped further.
-bool ASTContext::UnwrapSimilarTypes(QualType &T1, QualType &T2) {
- UnwrapSimilarArrayTypes(T1, T2);
+bool ASTContext::UnwrapSimilarTypes(QualType &T1, QualType &T2,
+ bool AllowPiMismatch) {
+ UnwrapSimilarArrayTypes(T1, T2, AllowPiMismatch);
const auto *T1PtrType = T1->getAs<PointerType>();
const auto *T2PtrType = T2->getAs<PointerType>();
@@ -5883,7 +5999,7 @@ bool ASTContext::hasCvrSimilarType(QualType T1, QualType T2) {
if (hasSameType(T1, T2))
return true;
- if (!UnwrapSimilarTypes(T1, T2))
+ if (!UnwrapSimilarTypes(T1, T2, /*AllowPiMismatch*/ false))
return false;
}
}
@@ -6066,9 +6182,11 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
NNS->getAsNamespaceAlias()->getNamespace()
->getOriginalNamespace());
+ // The difference between TypeSpec and TypeSpecWithTemplate is that the
+ // latter will have the 'template' keyword when printed.
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate: {
- QualType T = getCanonicalType(QualType(NNS->getAsType(), 0));
+ const Type *T = getCanonicalType(NNS->getAsType());
// If we have some kind of dependent-named type (e.g., "typename T::type"),
// break it apart into its prefix and identifier, then reconsititute those
@@ -6078,14 +6196,16 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
// typedef typename T::type T1;
// typedef typename T1::type T2;
if (const auto *DNT = T->getAs<DependentNameType>())
- return NestedNameSpecifier::Create(*this, DNT->getQualifier(),
- const_cast<IdentifierInfo *>(DNT->getIdentifier()));
-
- // Otherwise, just canonicalize the type, and force it to be a TypeSpec.
- // FIXME: Why are TypeSpec and TypeSpecWithTemplate distinct in the
- // first place?
+ return NestedNameSpecifier::Create(
+ *this, DNT->getQualifier(),
+ const_cast<IdentifierInfo *>(DNT->getIdentifier()));
+ if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>())
+ return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true,
+ const_cast<Type *>(T));
+
+ // TODO: Set 'Template' parameter to true for other template types.
return NestedNameSpecifier::Create(*this, nullptr, false,
- const_cast<Type *>(T.getTypePtr()));
+ const_cast<Type *>(T));
}
case NestedNameSpecifier::Global:
@@ -6253,6 +6373,7 @@ static FloatingRank getFloatingRank(QualType T) {
case BuiltinType::LongDouble: return LongDoubleRank;
case BuiltinType::Float128: return Float128Rank;
case BuiltinType::BFloat16: return BFloat16Rank;
+ case BuiltinType::Ibm128: return Ibm128Rank;
}
}
@@ -6268,10 +6389,11 @@ QualType ASTContext::getFloatingTypeOfSizeWithinDomain(QualType Size,
case BFloat16Rank: llvm_unreachable("Complex bfloat16 is not supported");
case Float16Rank:
case HalfRank: llvm_unreachable("Complex half is not supported");
- case FloatRank: return FloatComplexTy;
- case DoubleRank: return DoubleComplexTy;
- case LongDoubleRank: return LongDoubleComplexTy;
- case Float128Rank: return Float128ComplexTy;
+ case Ibm128Rank: return getComplexType(Ibm128Ty);
+ case FloatRank: return getComplexType(FloatTy);
+ case DoubleRank: return getComplexType(DoubleTy);
+ case LongDoubleRank: return getComplexType(LongDoubleTy);
+ case Float128Rank: return getComplexType(Float128Ty);
}
}
@@ -6284,6 +6406,8 @@ QualType ASTContext::getFloatingTypeOfSizeWithinDomain(QualType Size,
case DoubleRank: return DoubleTy;
case LongDoubleRank: return LongDoubleTy;
case Float128Rank: return Float128Ty;
+ case Ibm128Rank:
+ return Ibm128Ty;
}
llvm_unreachable("getFloatingRank(): illegal value for rank");
}
@@ -7001,7 +7125,7 @@ ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl) const {
void ASTContext::getObjCEncodingForMethodParameter(Decl::ObjCDeclQualifier QT,
QualType T, std::string& S,
bool Extended) const {
- // Encode type qualifer, 'in', 'inout', etc. for the parameter.
+ // Encode type qualifier, 'in', 'inout', etc. for the parameter.
getObjCEncodingForTypeQualifier(QT, S);
// Encode parameter type.
ObjCEncOptions Options = ObjCEncOptions()
@@ -7259,6 +7383,7 @@ static char getObjCEncodingForPrimitiveType(const ASTContext *C,
case BuiltinType::BFloat16:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Ibm128:
case BuiltinType::Half:
case BuiltinType::ShortAccum:
case BuiltinType::Accum:
@@ -7705,7 +7830,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
.setExpandStructures()),
FD);
if (FD || Options.EncodingProperty() || Options.EncodeClassNames()) {
- // Note that we do extended encoding of protocol qualifer list
+ // Note that we do extended encoding of protocol qualifier list
// Only when doing ivar or property encoding.
S += '"';
for (const auto *I : OPT->quals()) {
@@ -8674,8 +8799,8 @@ bool ASTContext::areCompatibleVectorTypes(QualType FirstVec,
static uint64_t getSVETypeSize(ASTContext &Context, const BuiltinType *Ty) {
assert(Ty->isVLSTBuiltinType() && "Invalid SVE Type");
return Ty->getKind() == BuiltinType::SveBool
- ? Context.getLangOpts().ArmSveVectorBits / Context.getCharWidth()
- : Context.getLangOpts().ArmSveVectorBits;
+ ? (Context.getLangOpts().VScaleMin * 128) / Context.getCharWidth()
+ : Context.getLangOpts().VScaleMin * 128;
}
bool ASTContext::areCompatibleSveTypes(QualType FirstType,
@@ -9105,13 +9230,9 @@ void getIntersectionOfProtocols(ASTContext &Context,
// Remove any implied protocols from the list of inherited protocols.
if (!ImpliedProtocols.empty()) {
- IntersectionSet.erase(
- std::remove_if(IntersectionSet.begin(),
- IntersectionSet.end(),
- [&](ObjCProtocolDecl *proto) -> bool {
- return ImpliedProtocols.count(proto) > 0;
- }),
- IntersectionSet.end());
+ llvm::erase_if(IntersectionSet, [&](ObjCProtocolDecl *proto) -> bool {
+ return ImpliedProtocols.count(proto) > 0;
+ });
}
// Sort the remaining protocols by name.
@@ -9649,11 +9770,19 @@ static QualType mergeEnumWithInteger(ASTContext &Context, const EnumType *ET,
QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
bool OfBlockPointer,
bool Unqualified, bool BlockReturnType) {
+ // For C++ we will not reach this code with reference types (see below),
+ // for OpenMP variant call overloading we might.
+ //
// C++ [expr]: If an expression initially has the type "reference to T", the
// type is adjusted to "T" prior to any further analysis, the expression
// designates the object or function denoted by the reference, and the
// expression is an lvalue unless the reference is an rvalue reference and
// the expression is a function call (possibly inside parentheses).
+ if (LangOpts.OpenMP && LHS->getAs<ReferenceType>() &&
+ RHS->getAs<ReferenceType>() && LHS->getTypeClass() == RHS->getTypeClass())
+ return mergeTypes(LHS->getAs<ReferenceType>()->getPointeeType(),
+ RHS->getAs<ReferenceType>()->getPointeeType(),
+ OfBlockPointer, Unqualified, BlockReturnType);
if (LHS->getAs<ReferenceType>() || RHS->getAs<ReferenceType>())
return {};
@@ -9976,7 +10105,7 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
unsigned LHSBits = LHS->castAs<ExtIntType>()->getNumBits();
unsigned RHSBits = RHS->castAs<ExtIntType>()->getNumBits();
- // Like unsigned/int, shouldn't have a type if they dont match.
+ // Like unsigned/int, shouldn't have a type if they don't match.
if (LHSUnsigned != RHSUnsigned)
return {};
@@ -10617,7 +10746,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
}
// On some targets such as PowerPC, some of the builtins are defined with custom
-// type decriptors for target-dependent types. These descriptors are decoded in
+// type descriptors for target-dependent types. These descriptors are decoded in
// other functions, but it may be useful to be able to fall back to default
// descriptor decoding to define builtins mixing target-dependent and target-
// independent types. This function allows decoding one type descriptor with
@@ -11174,19 +11303,21 @@ QualType ASTContext::getIntTypeForBitwidth(unsigned DestWidth,
/// sets floating point QualTy according to specified bitwidth.
/// Returns empty type if there is no appropriate target types.
QualType ASTContext::getRealTypeForBitwidth(unsigned DestWidth,
- bool ExplicitIEEE) const {
- TargetInfo::RealType Ty =
- getTargetInfo().getRealTypeByWidth(DestWidth, ExplicitIEEE);
+ FloatModeKind ExplicitType) const {
+ FloatModeKind Ty =
+ getTargetInfo().getRealTypeByWidth(DestWidth, ExplicitType);
switch (Ty) {
- case TargetInfo::Float:
+ case FloatModeKind::Float:
return FloatTy;
- case TargetInfo::Double:
+ case FloatModeKind::Double:
return DoubleTy;
- case TargetInfo::LongDouble:
+ case FloatModeKind::LongDouble:
return LongDoubleTy;
- case TargetInfo::Float128:
+ case FloatModeKind::Float128:
return Float128Ty;
- case TargetInfo::NoFloat:
+ case FloatModeKind::Ibm128:
+ return Ibm128Ty;
+ case FloatModeKind::NoFloat:
return {};
}
@@ -11615,13 +11746,9 @@ ASTContext::filterFunctionTargetAttrs(const TargetAttr *TD) const {
assert(TD != nullptr);
ParsedTargetAttr ParsedAttr = TD->parse();
- ParsedAttr.Features.erase(
- llvm::remove_if(ParsedAttr.Features,
- [&](const std::string &Feat) {
- return !Target->isValidFeatureName(
- StringRef{Feat}.substr(1));
- }),
- ParsedAttr.Features.end());
+ llvm::erase_if(ParsedAttr.Features, [&](const std::string &Feat) {
+ return !Target->isValidFeatureName(StringRef{Feat}.substr(1));
+ });
return ParsedAttr;
}
@@ -11666,6 +11793,9 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
Target->getCPUSpecificCPUDispatchFeatures(
SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
+ Features.insert(Features.begin(),
+ Target->getTargetOpts().FeaturesAsWritten.begin(),
+ Target->getTargetOpts().FeaturesAsWritten.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
FeatureMap = Target->getTargetOpts().FeatureMap;
@@ -11712,86 +11842,3 @@ StringRef ASTContext::getCUIDHash() const {
CUIDHash = llvm::utohexstr(llvm::MD5Hash(LangOpts.CUID), /*LowerCase=*/true);
return CUIDHash;
}
-
-// Get the closest named parent, so we can order the sycl naming decls somewhere
-// that mangling is meaningful.
-static const DeclContext *GetNamedParent(const CXXRecordDecl *RD) {
- const DeclContext *DC = RD->getDeclContext();
-
- while (!isa<NamedDecl, TranslationUnitDecl>(DC))
- DC = DC->getParent();
- return DC;
-}
-
-void ASTContext::AddSYCLKernelNamingDecl(const CXXRecordDecl *RD) {
- assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
- RD = RD->getCanonicalDecl();
- const DeclContext *DC = GetNamedParent(RD);
-
- assert(RD->getLocation().isValid() &&
- "Invalid location on kernel naming decl");
-
- (void)SYCLKernelNamingTypes[DC].insert(RD);
-}
-
-bool ASTContext::IsSYCLKernelNamingDecl(const NamedDecl *ND) const {
- assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
- const auto *RD = dyn_cast<CXXRecordDecl>(ND);
- if (!RD)
- return false;
- RD = RD->getCanonicalDecl();
- const DeclContext *DC = GetNamedParent(RD);
-
- auto Itr = SYCLKernelNamingTypes.find(DC);
-
- if (Itr == SYCLKernelNamingTypes.end())
- return false;
-
- return Itr->getSecond().count(RD);
-}
-
-// Filters the Decls list to those that share the lambda mangling with the
-// passed RD.
-void ASTContext::FilterSYCLKernelNamingDecls(
- const CXXRecordDecl *RD,
- llvm::SmallVectorImpl<const CXXRecordDecl *> &Decls) {
-
- if (!SYCLKernelFilterContext)
- SYCLKernelFilterContext.reset(
- ItaniumMangleContext::create(*this, getDiagnostics()));
-
- llvm::SmallString<128> LambdaSig;
- llvm::raw_svector_ostream Out(LambdaSig);
- SYCLKernelFilterContext->mangleLambdaSig(RD, Out);
-
- llvm::erase_if(Decls, [this, &LambdaSig](const CXXRecordDecl *LocalRD) {
- llvm::SmallString<128> LocalLambdaSig;
- llvm::raw_svector_ostream LocalOut(LocalLambdaSig);
- SYCLKernelFilterContext->mangleLambdaSig(LocalRD, LocalOut);
- return LambdaSig != LocalLambdaSig;
- });
-}
-
-unsigned ASTContext::GetSYCLKernelNamingIndex(const NamedDecl *ND) {
- assert(getLangOpts().isSYCL() && "Only valid for SYCL programs");
- assert(IsSYCLKernelNamingDecl(ND) &&
- "Lambda not involved in mangling asked for a naming index?");
-
- const CXXRecordDecl *RD = cast<CXXRecordDecl>(ND)->getCanonicalDecl();
- const DeclContext *DC = GetNamedParent(RD);
-
- auto Itr = SYCLKernelNamingTypes.find(DC);
- assert(Itr != SYCLKernelNamingTypes.end() && "Not a valid DeclContext?");
-
- const llvm::SmallPtrSet<const CXXRecordDecl *, 4> &Set = Itr->getSecond();
-
- llvm::SmallVector<const CXXRecordDecl *> Decls{Set.begin(), Set.end()};
-
- FilterSYCLKernelNamingDecls(RD, Decls);
-
- llvm::sort(Decls, [](const CXXRecordDecl *LHS, const CXXRecordDecl *RHS) {
- return LHS->getLambdaManglingNumber() < RHS->getLambdaManglingNumber();
- });
-
- return llvm::find(Decls, RD) - Decls.begin();
-}
diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp
index dc22481d0a84..7e435e8b35b8 100644
--- a/clang/lib/AST/ASTDiagnostic.cpp
+++ b/clang/lib/AST/ASTDiagnostic.cpp
@@ -1088,6 +1088,9 @@ class TemplateDiff {
Ty->getAs<TemplateSpecializationType>())
return TST;
+ if (const auto* SubstType = Ty->getAs<SubstTemplateTypeParmType>())
+ Ty = SubstType->getReplacementType();
+
const RecordType *RT = Ty->getAs<RecordType>();
if (!RT)
diff --git a/clang/lib/AST/ASTDumper.cpp b/clang/lib/AST/ASTDumper.cpp
index 3d368a0a7b63..c6df61f79e2e 100644
--- a/clang/lib/AST/ASTDumper.cpp
+++ b/clang/lib/AST/ASTDumper.cpp
@@ -90,15 +90,7 @@ void ASTDumper::dumpTemplateDeclSpecialization(const SpecializationDecl *D,
// FIXME: The redecls() range sometimes has elements of a less-specific
// type. (In particular, ClassTemplateSpecializationDecl::redecls() gives
// us TagDecls, and should give CXXRecordDecls).
- auto *Redecl = dyn_cast<SpecializationDecl>(RedeclWithBadType);
- if (!Redecl) {
- // Found the injected-class-name for a class template. This will be dumped
- // as part of its surrounding class so we don't need to dump it here.
- assert(isa<CXXRecordDecl>(RedeclWithBadType) &&
- "expected an injected-class-name");
- continue;
- }
-
+ auto *Redecl = cast<SpecializationDecl>(RedeclWithBadType);
switch (Redecl->getTemplateSpecializationKind()) {
case TSK_ExplicitInstantiationDeclaration:
case TSK_ExplicitInstantiationDefinition:
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 787e02029dae..710e40bbb4b7 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -76,6 +76,7 @@ namespace clang {
using llvm::make_error;
using llvm::Error;
using llvm::Expected;
+ using ExpectedTypePtr = llvm::Expected<const Type *>;
using ExpectedType = llvm::Expected<QualType>;
using ExpectedStmt = llvm::Expected<Stmt *>;
using ExpectedExpr = llvm::Expected<Expr *>;
@@ -160,7 +161,9 @@ namespace clang {
// Call the import function of ASTImporter for a baseclass of type `T` and
// cast the return value to `T`.
template <typename T>
- Expected<T *> import(T *From) {
+ auto import(T *From)
+ -> std::conditional_t<std::is_base_of<Type, T>::value,
+ Expected<const T *>, Expected<T *>> {
auto ToOrErr = Importer.Import(From);
if (!ToOrErr)
return ToOrErr.takeError();
@@ -168,7 +171,7 @@ namespace clang {
}
template <typename T>
- Expected<T *> import(const T *From) {
+ auto import(const T *From) {
return import(const_cast<T *>(From));
}
@@ -186,22 +189,6 @@ namespace clang {
return import(*From);
}
- // Helper for chaining together multiple imports. If an error is detected,
- // subsequent imports will return default constructed nodes, so that failure
- // can be detected with a single conditional branch after a sequence of
- // imports.
- template <typename T> T importChecked(Error &Err, const T &From) {
- // Don't attempt to import nodes if we hit an error earlier.
- if (Err)
- return T{};
- Expected<T> MaybeVal = import(From);
- if (!MaybeVal) {
- Err = MaybeVal.takeError();
- return T{};
- }
- return *MaybeVal;
- }
-
ExplicitSpecifier importExplicitSpecifier(Error &Err,
ExplicitSpecifier ESpec);
@@ -313,11 +300,8 @@ namespace clang {
auto *ToNamed = cast<NamedDecl>(ToD);
DeclContextLookupResult FromLookup =
FromDC->lookup(FromNamed->getDeclName());
- for (NamedDecl *ND : FromLookup)
- if (ND == FromNamed) {
- ToDC->makeDeclVisibleInContext(ToNamed);
- break;
- }
+ if (llvm::is_contained(FromLookup, FromNamed))
+ ToDC->makeDeclVisibleInContext(ToNamed);
}
}
}
@@ -383,6 +367,8 @@ namespace clang {
ExpectedType VisitTemplateTypeParmType(const TemplateTypeParmType *T);
ExpectedType VisitSubstTemplateTypeParmType(
const SubstTemplateTypeParmType *T);
+ ExpectedType
+ VisitSubstTemplateTypeParmPackType(const SubstTemplateTypeParmPackType *T);
ExpectedType VisitTemplateSpecializationType(
const TemplateSpecializationType *T);
ExpectedType VisitElaboratedType(const ElaboratedType *T);
@@ -475,21 +461,13 @@ namespace clang {
Error ImportDefaultArgOfParmVarDecl(const ParmVarDecl *FromParam,
ParmVarDecl *ToParam);
+ Expected<InheritedConstructor>
+ ImportInheritedConstructor(const InheritedConstructor &From);
+
template <typename T>
bool hasSameVisibilityContextAndLinkage(T *Found, T *From);
- bool IsStructuralMatch(Decl *From, Decl *To, bool Complain);
- bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord,
- bool Complain = true);
- bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,
- bool Complain = true);
- bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord);
- bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC);
- bool IsStructuralMatch(FunctionTemplateDecl *From,
- FunctionTemplateDecl *To);
- bool IsStructuralMatch(FunctionDecl *From, FunctionDecl *To);
- bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To);
- bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To);
+ bool IsStructuralMatch(Decl *From, Decl *To, bool Complain = true);
ExpectedDecl VisitDecl(Decl *D);
ExpectedDecl VisitImportDecl(ImportDecl *D);
ExpectedDecl VisitEmptyDecl(EmptyDecl *D);
@@ -597,6 +575,7 @@ namespace clang {
ExpectedStmt VisitSourceLocExpr(SourceLocExpr *E);
ExpectedStmt VisitVAArgExpr(VAArgExpr *E);
ExpectedStmt VisitChooseExpr(ChooseExpr *E);
+ ExpectedStmt VisitShuffleVectorExpr(ShuffleVectorExpr *E);
ExpectedStmt VisitGNUNullExpr(GNUNullExpr *E);
ExpectedStmt VisitGenericSelectionExpr(GenericSelectionExpr *E);
ExpectedStmt VisitPredefinedExpr(PredefinedExpr *E);
@@ -667,6 +646,22 @@ namespace clang {
ExpectedStmt VisitCXXTypeidExpr(CXXTypeidExpr *E);
ExpectedStmt VisitCXXFoldExpr(CXXFoldExpr *E);
+ // Helper for chaining together multiple imports. If an error is detected,
+ // subsequent imports will return default constructed nodes, so that failure
+ // can be detected with a single conditional branch after a sequence of
+ // imports.
+ template <typename T> T importChecked(Error &Err, const T &From) {
+ // Don't attempt to import nodes if we hit an error earlier.
+ if (Err)
+ return T{};
+ Expected<T> MaybeVal = import(From);
+ if (!MaybeVal) {
+ Err = MaybeVal.takeError();
+ return T{};
+ }
+ return *MaybeVal;
+ }
+
template<typename IIter, typename OIter>
Error ImportArrayChecked(IIter Ibegin, IIter Iend, OIter Obegin) {
using ItemT = std::remove_reference_t<decltype(*Obegin)>;
@@ -1161,12 +1156,12 @@ ASTNodeImporter::VisitMemberPointerType(const MemberPointerType *T) {
if (!ToPointeeTypeOrErr)
return ToPointeeTypeOrErr.takeError();
- ExpectedType ClassTypeOrErr = import(QualType(T->getClass(), 0));
+ ExpectedTypePtr ClassTypeOrErr = import(T->getClass());
if (!ClassTypeOrErr)
return ClassTypeOrErr.takeError();
- return Importer.getToContext().getMemberPointerType(
- *ToPointeeTypeOrErr, (*ClassTypeOrErr).getTypePtr());
+ return Importer.getToContext().getMemberPointerType(*ToPointeeTypeOrErr,
+ *ClassTypeOrErr);
}
ExpectedType
@@ -1472,18 +1467,32 @@ ExpectedType ASTNodeImporter::VisitTemplateTypeParmType(
ExpectedType ASTNodeImporter::VisitSubstTemplateTypeParmType(
const SubstTemplateTypeParmType *T) {
- ExpectedType ReplacedOrErr = import(QualType(T->getReplacedParameter(), 0));
+ Expected<const TemplateTypeParmType *> ReplacedOrErr =
+ import(T->getReplacedParameter());
if (!ReplacedOrErr)
return ReplacedOrErr.takeError();
- const TemplateTypeParmType *Replaced =
- cast<TemplateTypeParmType>((*ReplacedOrErr).getTypePtr());
ExpectedType ToReplacementTypeOrErr = import(T->getReplacementType());
if (!ToReplacementTypeOrErr)
return ToReplacementTypeOrErr.takeError();
return Importer.getToContext().getSubstTemplateTypeParmType(
- Replaced, (*ToReplacementTypeOrErr).getCanonicalType());
+ *ReplacedOrErr, ToReplacementTypeOrErr->getCanonicalType());
+}
+
+ExpectedType ASTNodeImporter::VisitSubstTemplateTypeParmPackType(
+ const SubstTemplateTypeParmPackType *T) {
+ Expected<const TemplateTypeParmType *> ReplacedOrErr =
+ import(T->getReplacedParameter());
+ if (!ReplacedOrErr)
+ return ReplacedOrErr.takeError();
+
+ Expected<TemplateArgument> ToArgumentPack = import(T->getArgumentPack());
+ if (!ToArgumentPack)
+ return ToArgumentPack.takeError();
+
+ return Importer.getToContext().getSubstTemplateTypeParmPackType(
+ *ReplacedOrErr, *ToArgumentPack);
}
ExpectedType ASTNodeImporter::VisitTemplateSpecializationType(
@@ -1498,7 +1507,7 @@ ExpectedType ASTNodeImporter::VisitTemplateSpecializationType(
return std::move(Err);
QualType ToCanonType;
- if (!QualType(T, 0).isCanonical()) {
+ if (!T->isCanonicalUnqualified()) {
QualType FromCanonType
= Importer.getFromContext().getCanonicalType(QualType(T, 0));
if (ExpectedType TyOrErr = import(FromCanonType))
@@ -1806,7 +1815,7 @@ ASTNodeImporter::ImportDeclContext(DeclContext *FromDC, bool ForceImport) {
RecordDecl *FromRecordDecl = nullptr;
RecordDecl *ToRecordDecl = nullptr;
// If we have a field that is an ArrayType we need to check if the array
- // element is a RecordDecl and if so we need to import the defintion.
+ // element is a RecordDecl and if so we need to import the definition.
if (FieldFrom->getType()->isArrayType()) {
// getBaseElementTypeUnsafe(...) handles multi-dimensonal arrays for us.
FromRecordDecl = FieldFrom->getType()->getBaseElementTypeUnsafe()->getAsRecordDecl();
@@ -2158,96 +2167,17 @@ getStructuralEquivalenceKind(const ASTImporter &Importer) {
}
bool ASTNodeImporter::IsStructuralMatch(Decl *From, Decl *To, bool Complain) {
- StructuralEquivalenceContext Ctx(
- Importer.getFromContext(), Importer.getToContext(),
- Importer.getNonEquivalentDecls(), getStructuralEquivalenceKind(Importer),
- false, Complain);
- return Ctx.IsEquivalent(From, To);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord,
- RecordDecl *ToRecord, bool Complain) {
// Eliminate a potential failure point where we attempt to re-import
// something we're trying to import while completing ToRecord.
- Decl *ToOrigin = Importer.GetOriginalDecl(ToRecord);
+ Decl *ToOrigin = Importer.GetOriginalDecl(To);
if (ToOrigin) {
- auto *ToOriginRecord = dyn_cast<RecordDecl>(ToOrigin);
- if (ToOriginRecord)
- ToRecord = ToOriginRecord;
+ To = ToOrigin;
}
- StructuralEquivalenceContext Ctx(Importer.getFromContext(),
- ToRecord->getASTContext(),
- Importer.getNonEquivalentDecls(),
- getStructuralEquivalenceKind(Importer),
- false, Complain);
- return Ctx.IsEquivalent(FromRecord, ToRecord);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,
- bool Complain) {
StructuralEquivalenceContext Ctx(
Importer.getFromContext(), Importer.getToContext(),
Importer.getNonEquivalentDecls(), getStructuralEquivalenceKind(Importer),
false, Complain);
- return Ctx.IsEquivalent(FromVar, ToVar);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToEnum) {
- // Eliminate a potential failure point where we attempt to re-import
- // something we're trying to import while completing ToEnum.
- if (Decl *ToOrigin = Importer.GetOriginalDecl(ToEnum))
- if (auto *ToOriginEnum = dyn_cast<EnumDecl>(ToOrigin))
- ToEnum = ToOriginEnum;
-
- StructuralEquivalenceContext Ctx(
- Importer.getFromContext(), Importer.getToContext(),
- Importer.getNonEquivalentDecls(), getStructuralEquivalenceKind(Importer));
- return Ctx.IsEquivalent(FromEnum, ToEnum);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(FunctionTemplateDecl *From,
- FunctionTemplateDecl *To) {
- StructuralEquivalenceContext Ctx(
- Importer.getFromContext(), Importer.getToContext(),
- Importer.getNonEquivalentDecls(), getStructuralEquivalenceKind(Importer),
- false, false);
- return Ctx.IsEquivalent(From, To);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(FunctionDecl *From, FunctionDecl *To) {
- StructuralEquivalenceContext Ctx(
- Importer.getFromContext(), Importer.getToContext(),
- Importer.getNonEquivalentDecls(), getStructuralEquivalenceKind(Importer),
- false, false);
- return Ctx.IsEquivalent(From, To);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC,
- EnumConstantDecl *ToEC) {
- const llvm::APSInt &FromVal = FromEC->getInitVal();
- const llvm::APSInt &ToVal = ToEC->getInitVal();
-
- return FromVal.isSigned() == ToVal.isSigned() &&
- FromVal.getBitWidth() == ToVal.getBitWidth() &&
- FromVal == ToVal;
-}
-
-bool ASTNodeImporter::IsStructuralMatch(ClassTemplateDecl *From,
- ClassTemplateDecl *To) {
- StructuralEquivalenceContext Ctx(Importer.getFromContext(),
- Importer.getToContext(),
- Importer.getNonEquivalentDecls(),
- getStructuralEquivalenceKind(Importer));
- return Ctx.IsEquivalent(From, To);
-}
-
-bool ASTNodeImporter::IsStructuralMatch(VarTemplateDecl *From,
- VarTemplateDecl *To) {
- StructuralEquivalenceContext Ctx(Importer.getFromContext(),
- Importer.getToContext(),
- Importer.getNonEquivalentDecls(),
- getStructuralEquivalenceKind(Importer));
return Ctx.IsEquivalent(From, To);
}
@@ -3242,13 +3172,14 @@ static bool isAncestorDeclContextOf(const DeclContext *DC, const Decl *D) {
bool ASTNodeImporter::hasAutoReturnTypeDeclaredInside(FunctionDecl *D) {
QualType FromTy = D->getType();
- const FunctionProtoType *FromFPT = FromTy->getAs<FunctionProtoType>();
+ const auto *FromFPT = FromTy->getAs<FunctionProtoType>();
assert(FromFPT && "Must be called on FunctionProtoType");
- if (AutoType *AutoT = FromFPT->getReturnType()->getContainedAutoType()) {
+ if (const AutoType *AutoT =
+ FromFPT->getReturnType()->getContainedAutoType()) {
QualType DeducedT = AutoT->getDeducedType();
- if (const RecordType *RecordT =
- DeducedT.isNull() ? nullptr : dyn_cast<RecordType>(DeducedT)) {
- RecordDecl *RD = RecordT->getDecl();
+ if (const auto *RecordT =
+ !DeducedT.isNull() ? DeducedT->getAs<RecordType>() : nullptr) {
+ const RecordDecl *RD = RecordT->getDecl();
assert(RD);
if (isAncestorDeclContextOf(D, RD)) {
assert(RD->getLexicalDeclContext() == RD->getDeclContext());
@@ -3256,9 +3187,8 @@ bool ASTNodeImporter::hasAutoReturnTypeDeclaredInside(FunctionDecl *D) {
}
}
}
- if (const TypedefType *TypedefT =
- dyn_cast<TypedefType>(FromFPT->getReturnType())) {
- TypedefNameDecl *TD = TypedefT->getDecl();
+ if (const auto *TypedefT = FromFPT->getReturnType()->getAs<TypedefType>()) {
+ const TypedefNameDecl *TD = TypedefT->getDecl();
assert(TD);
if (isAncestorDeclContextOf(D, TD)) {
assert(TD->getLexicalDeclContext() == TD->getDeclContext());
@@ -3399,11 +3329,14 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
return std::move(Err);
QualType FromTy = D->getType();
+ TypeSourceInfo *FromTSI = D->getTypeSourceInfo();
// Set to true if we do not import the type of the function as is. There are
// cases when the original type would result in an infinite recursion during
// the import. To avoid an infinite recursion when importing, we create the
// FunctionDecl with a simplified function type and update it only after the
// relevant AST nodes are already imported.
+ // The type is related to TypeSourceInfo (it references the type), so we must
+ // do the same with TypeSourceInfo.
bool UsedDifferentProtoType = false;
if (const auto *FromFPT = FromTy->getAs<FunctionProtoType>()) {
QualType FromReturnTy = FromFPT->getReturnType();
@@ -3430,11 +3363,13 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
}
FromTy = Importer.getFromContext().getFunctionType(
FromReturnTy, FromFPT->getParamTypes(), FromEPI);
+ FromTSI = Importer.getFromContext().getTrivialTypeSourceInfo(
+ FromTy, D->getBeginLoc());
}
Error Err = Error::success();
auto T = importChecked(Err, FromTy);
- auto TInfo = importChecked(Err, D->getTypeSourceInfo());
+ auto TInfo = importChecked(Err, FromTSI);
auto ToInnerLocStart = importChecked(Err, D->getInnerLocStart());
auto ToEndLoc = importChecked(Err, D->getEndLoc());
auto ToQualifierLoc = importChecked(Err, D->getQualifierLoc());
@@ -3459,13 +3394,19 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
importExplicitSpecifier(Err, FromConstructor->getExplicitSpecifier());
if (Err)
return std::move(Err);
+ auto ToInheritedConstructor = InheritedConstructor();
+ if (FromConstructor->isInheritingConstructor()) {
+ Expected<InheritedConstructor> ImportedInheritedCtor =
+ import(FromConstructor->getInheritedConstructor());
+ if (!ImportedInheritedCtor)
+ return ImportedInheritedCtor.takeError();
+ ToInheritedConstructor = *ImportedInheritedCtor;
+ }
if (GetImportedOrCreateDecl<CXXConstructorDecl>(
ToFunction, D, Importer.getToContext(), cast<CXXRecordDecl>(DC),
- ToInnerLocStart, NameInfo, T, TInfo, ESpec, D->isInlineSpecified(),
- D->isImplicit(), D->getConstexprKind(),
- InheritedConstructor(), // FIXME: Properly import inherited
- // constructor info
- TrailingRequiresClause))
+ ToInnerLocStart, NameInfo, T, TInfo, ESpec, D->UsesFPIntrin(),
+ D->isInlineSpecified(), D->isImplicit(), D->getConstexprKind(),
+ ToInheritedConstructor, TrailingRequiresClause))
return ToFunction;
} else if (CXXDestructorDecl *FromDtor = dyn_cast<CXXDestructorDecl>(D)) {
@@ -3477,9 +3418,10 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
return std::move(Err);
if (GetImportedOrCreateDecl<CXXDestructorDecl>(
- ToFunction, D, Importer.getToContext(), cast<CXXRecordDecl>(DC),
- ToInnerLocStart, NameInfo, T, TInfo, D->isInlineSpecified(),
- D->isImplicit(), D->getConstexprKind(), TrailingRequiresClause))
+ ToFunction, D, Importer.getToContext(), cast<CXXRecordDecl>(DC),
+ ToInnerLocStart, NameInfo, T, TInfo, D->UsesFPIntrin(),
+ D->isInlineSpecified(), D->isImplicit(), D->getConstexprKind(),
+ TrailingRequiresClause))
return ToFunction;
CXXDestructorDecl *ToDtor = cast<CXXDestructorDecl>(ToFunction);
@@ -3493,15 +3435,16 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
return std::move(Err);
if (GetImportedOrCreateDecl<CXXConversionDecl>(
ToFunction, D, Importer.getToContext(), cast<CXXRecordDecl>(DC),
- ToInnerLocStart, NameInfo, T, TInfo, D->isInlineSpecified(), ESpec,
- D->getConstexprKind(), SourceLocation(), TrailingRequiresClause))
+ ToInnerLocStart, NameInfo, T, TInfo, D->UsesFPIntrin(),
+ D->isInlineSpecified(), ESpec, D->getConstexprKind(),
+ SourceLocation(), TrailingRequiresClause))
return ToFunction;
} else if (auto *Method = dyn_cast<CXXMethodDecl>(D)) {
if (GetImportedOrCreateDecl<CXXMethodDecl>(
ToFunction, D, Importer.getToContext(), cast<CXXRecordDecl>(DC),
ToInnerLocStart, NameInfo, T, TInfo, Method->getStorageClass(),
- Method->isInlineSpecified(), D->getConstexprKind(),
- SourceLocation(), TrailingRequiresClause))
+ Method->UsesFPIntrin(), Method->isInlineSpecified(),
+ D->getConstexprKind(), SourceLocation(), TrailingRequiresClause))
return ToFunction;
} else if (auto *Guide = dyn_cast<CXXDeductionGuideDecl>(D)) {
ExplicitSpecifier ESpec =
@@ -3519,9 +3462,9 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
} else {
if (GetImportedOrCreateDecl(
ToFunction, D, Importer.getToContext(), DC, ToInnerLocStart,
- NameInfo, T, TInfo, D->getStorageClass(), D->isInlineSpecified(),
- D->hasWrittenPrototype(), D->getConstexprKind(),
- TrailingRequiresClause))
+ NameInfo, T, TInfo, D->getStorageClass(), D->UsesFPIntrin(),
+ D->isInlineSpecified(), D->hasWrittenPrototype(),
+ D->getConstexprKind(), TrailingRequiresClause))
return ToFunction;
}
@@ -3604,6 +3547,10 @@ ExpectedDecl ASTNodeImporter::VisitFunctionDecl(FunctionDecl *D) {
ToFunction->setType(*TyOrErr);
else
return TyOrErr.takeError();
+ if (Expected<TypeSourceInfo *> TSIOrErr = import(D->getTypeSourceInfo()))
+ ToFunction->setTypeSourceInfo(*TSIOrErr);
+ else
+ return TSIOrErr.takeError();
}
// FIXME: Other bits to merge?
@@ -4197,6 +4144,17 @@ Error ASTNodeImporter::ImportDefaultArgOfParmVarDecl(
return Error::success();
}
+Expected<InheritedConstructor>
+ASTNodeImporter::ImportInheritedConstructor(const InheritedConstructor &From) {
+ Error Err = Error::success();
+ CXXConstructorDecl *ToBaseCtor = importChecked(Err, From.getConstructor());
+ ConstructorUsingShadowDecl *ToShadow =
+ importChecked(Err, From.getShadowDecl());
+ if (Err)
+ return std::move(Err);
+ return InheritedConstructor(ToShadow, ToBaseCtor);
+}
+
ExpectedDecl ASTNodeImporter::VisitParmVarDecl(ParmVarDecl *D) {
// Parameters are created in the translation unit's context, then moved
// into the function declaration's context afterward.
@@ -4736,9 +4694,29 @@ ExpectedDecl ASTNodeImporter::VisitUsingShadowDecl(UsingShadowDecl *D) {
return ToTargetOrErr.takeError();
UsingShadowDecl *ToShadow;
- if (GetImportedOrCreateDecl(ToShadow, D, Importer.getToContext(), DC, Loc,
- Name, *ToIntroducerOrErr, *ToTargetOrErr))
- return ToShadow;
+ if (auto *FromConstructorUsingShadow =
+ dyn_cast<ConstructorUsingShadowDecl>(D)) {
+ Error Err = Error::success();
+ ConstructorUsingShadowDecl *Nominated = importChecked(
+ Err, FromConstructorUsingShadow->getNominatedBaseClassShadowDecl());
+ if (Err)
+ return std::move(Err);
+ // The 'Target' parameter of ConstructorUsingShadowDecl constructor
+ // is really the "NominatedBaseClassShadowDecl" value if it exists
+ // (see code of ConstructorUsingShadowDecl::ConstructorUsingShadowDecl).
+ // We should pass the NominatedBaseClassShadowDecl to it (if non-null) to
+ // get the correct values.
+ if (GetImportedOrCreateDecl<ConstructorUsingShadowDecl>(
+ ToShadow, D, Importer.getToContext(), DC, Loc,
+ cast<UsingDecl>(*ToIntroducerOrErr),
+ Nominated ? Nominated : *ToTargetOrErr,
+ FromConstructorUsingShadow->constructsVirtualBase()))
+ return ToShadow;
+ } else {
+ if (GetImportedOrCreateDecl(ToShadow, D, Importer.getToContext(), DC, Loc,
+ Name, *ToIntroducerOrErr, *ToTargetOrErr))
+ return ToShadow;
+ }
ToShadow->setLexicalDeclContext(LexicalDC);
ToShadow->setAccess(D->getAccess());
@@ -6339,7 +6317,7 @@ ExpectedStmt ASTNodeImporter::VisitIfStmt(IfStmt *S) {
if (Err)
return std::move(Err);
- return IfStmt::Create(Importer.getToContext(), ToIfLoc, S->isConstexpr(),
+ return IfStmt::Create(Importer.getToContext(), ToIfLoc, S->getStatementKind(),
ToInit, ToConditionVariable, ToCond, ToLParenLoc,
ToRParenLoc, ToThen, ToElseLoc, ToElse);
}
@@ -6710,6 +6688,24 @@ ExpectedStmt ASTNodeImporter::VisitChooseExpr(ChooseExpr *E) {
ToRParenLoc, CondIsTrue);
}
+ExpectedStmt ASTNodeImporter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
+ Error Err = Error::success();
+ auto ToRParenLoc = importChecked(Err, E->getRParenLoc());
+ auto ToBeginLoc = importChecked(Err, E->getBeginLoc());
+ auto ToType = importChecked(Err, E->getType());
+ const unsigned NumSubExprs = E->getNumSubExprs();
+
+ llvm::SmallVector<Expr *, 8> ToSubExprs;
+ llvm::ArrayRef<Expr *> FromSubExprs(E->getSubExprs(), NumSubExprs);
+ ToSubExprs.resize(NumSubExprs);
+
+ if ((Err = ImportContainerChecked(FromSubExprs, ToSubExprs)))
+ return std::move(Err);
+
+ return new (Importer.getToContext()) ShuffleVectorExpr(
+ Importer.getToContext(), ToSubExprs, ToType, ToBeginLoc, ToRParenLoc);
+}
+
ExpectedStmt ASTNodeImporter::VisitGNUNullExpr(GNUNullExpr *E) {
ExpectedType TypeOrErr = import(E->getType());
if (!TypeOrErr)
@@ -8339,7 +8335,7 @@ ASTImporter::Import(ExprWithCleanups::CleanupObject From) {
return make_error<ImportError>(ImportError::UnsupportedConstruct);
}
-Expected<const Type *> ASTImporter::Import(const Type *FromT) {
+ExpectedTypePtr ASTImporter::Import(const Type *FromT) {
if (!FromT)
return FromT;
@@ -8349,7 +8345,7 @@ Expected<const Type *> ASTImporter::Import(const Type *FromT) {
if (Pos != ImportedTypes.end())
return Pos->second;
- // Import the type
+ // Import the type.
ASTNodeImporter Importer(*this);
ExpectedType ToTOrErr = Importer.Visit(FromT);
if (!ToTOrErr)
@@ -8365,7 +8361,7 @@ Expected<QualType> ASTImporter::Import(QualType FromT) {
if (FromT.isNull())
return QualType{};
- Expected<const Type *> ToTyOrErr = Import(FromT.getTypePtr());
+ ExpectedTypePtr ToTyOrErr = Import(FromT.getTypePtr());
if (!ToTyOrErr)
return ToTyOrErr.takeError();
@@ -8388,59 +8384,269 @@ Expected<TypeSourceInfo *> ASTImporter::Import(TypeSourceInfo *FromTSI) {
return ToContext.getTrivialTypeSourceInfo(*TOrErr, *BeginLocOrErr);
}
-Expected<Attr *> ASTImporter::Import(const Attr *FromAttr) {
+// To use this object, it should be created before the new attribute is created,
+// and destructed after it is created. The construction already performs the
+// import of the data.
+template <typename T> struct AttrArgImporter {
+ AttrArgImporter<T>(const AttrArgImporter<T> &) = delete;
+ AttrArgImporter<T>(AttrArgImporter<T> &&) = default;
+ AttrArgImporter<T> &operator=(const AttrArgImporter<T> &) = delete;
+ AttrArgImporter<T> &operator=(AttrArgImporter<T> &&) = default;
+
+ AttrArgImporter(ASTNodeImporter &I, Error &Err, const T &From)
+ : To(I.importChecked(Err, From)) {}
+
+ const T &value() { return To; }
+
+private:
+ T To;
+};
+
+// To use this object, it should be created before the new attribute is created,
+// and destructed after it is created. The construction already performs the
+// import of the data. The array data is accessible in a pointer form, this form
+// is used by the attribute classes. This object should be created once for the
+// array data to be imported (the array size is not imported, just copied).
+template <typename T> struct AttrArgArrayImporter {
+ AttrArgArrayImporter<T>(const AttrArgArrayImporter<T> &) = delete;
+ AttrArgArrayImporter<T>(AttrArgArrayImporter<T> &&) = default;
+ AttrArgArrayImporter<T> &operator=(const AttrArgArrayImporter<T> &) = delete;
+ AttrArgArrayImporter<T> &operator=(AttrArgArrayImporter<T> &&) = default;
+
+ AttrArgArrayImporter(ASTNodeImporter &I, Error &Err,
+ const llvm::iterator_range<T *> &From,
+ unsigned ArraySize) {
+ if (Err)
+ return;
+ To.reserve(ArraySize);
+ Err = I.ImportContainerChecked(From, To);
+ }
+
+ T *value() { return To.data(); }
+
+private:
+ llvm::SmallVector<T, 2> To;
+};
+
+class AttrImporter {
+ Error Err{Error::success()};
Attr *ToAttr = nullptr;
- SourceRange ToRange;
- if (Error Err = importInto(ToRange, FromAttr->getRange()))
- return std::move(Err);
+ ASTImporter &Importer;
+ ASTNodeImporter NImporter;
+
+public:
+ AttrImporter(ASTImporter &I) : Importer(I), NImporter(I) {}
+
+ // Create an "importer" for an attribute parameter.
+ // Result of the 'value()' of that object is to be passed to the function
+ // 'importAttr', in the order that is expected by the attribute class.
+ template <class T> AttrArgImporter<T> importArg(const T &From) {
+ return AttrArgImporter<T>(NImporter, Err, From);
+ }
+
+ // Create an "importer" for an attribute parameter that has array type.
+ // Result of the 'value()' of that object is to be passed to the function
+ // 'importAttr', then the size of the array as next argument.
+ template <typename T>
+ AttrArgArrayImporter<T> importArrayArg(const llvm::iterator_range<T *> &From,
+ unsigned ArraySize) {
+ return AttrArgArrayImporter<T>(NImporter, Err, From, ArraySize);
+ }
+
+ // Create an attribute object with the specified arguments.
+ // The 'FromAttr' is the original (not imported) attribute, the 'ImportedArg'
+ // should be values that are passed to the 'Create' function of the attribute.
+ // (The 'Create' with 'ASTContext' first and 'AttributeCommonInfo' last is
+ // used here.) As much data is copied or imported from the old attribute
+ // as possible. The passed arguments should be already imported.
+ // If an import error happens, the internal error is set to it, and any
+ // further import attempt is ignored.
+ template <typename T, typename... Arg>
+ void importAttr(const T *FromAttr, Arg &&...ImportedArg) {
+ static_assert(std::is_base_of<Attr, T>::value,
+ "T should be subclass of Attr.");
+ assert(!ToAttr && "Use one AttrImporter to import one Attribute object.");
+
+ const IdentifierInfo *ToAttrName = Importer.Import(FromAttr->getAttrName());
+ const IdentifierInfo *ToScopeName =
+ Importer.Import(FromAttr->getScopeName());
+ SourceRange ToAttrRange =
+ NImporter.importChecked(Err, FromAttr->getRange());
+ SourceLocation ToScopeLoc =
+ NImporter.importChecked(Err, FromAttr->getScopeLoc());
+
+ if (Err)
+ return;
+
+ AttributeCommonInfo ToI(ToAttrName, ToScopeName, ToAttrRange, ToScopeLoc,
+ FromAttr->getParsedKind(), FromAttr->getSyntax(),
+ FromAttr->getAttributeSpellingListIndex());
+ // The "SemanticSpelling" is not needed to be passed to the constructor.
+ // That value is recalculated from the SpellingListIndex if needed.
+ ToAttr = T::Create(Importer.getToContext(),
+ std::forward<Arg>(ImportedArg)..., ToI);
+
+ ToAttr->setImplicit(FromAttr->isImplicit());
+ ToAttr->setPackExpansion(FromAttr->isPackExpansion());
+ if (auto *ToInheritableAttr = dyn_cast<InheritableAttr>(ToAttr))
+ ToInheritableAttr->setInherited(FromAttr->isInherited());
+ }
+
+ // Create a clone of the 'FromAttr' and import its source range only.
+ // This causes objects with invalid references to be created if the 'FromAttr'
+ // contains other data that should be imported.
+ void cloneAttr(const Attr *FromAttr) {
+ assert(!ToAttr && "Use one AttrImporter to import one Attribute object.");
+
+ SourceRange ToRange = NImporter.importChecked(Err, FromAttr->getRange());
+ if (Err)
+ return;
+
+ ToAttr = FromAttr->clone(Importer.getToContext());
+ ToAttr->setRange(ToRange);
+ }
+
+ // Get the result of the previous import attempt (can be used only once).
+ llvm::Expected<Attr *> getResult() && {
+ if (Err)
+ return std::move(Err);
+ assert(ToAttr && "Attribute should be created.");
+ return ToAttr;
+ }
+};
+
+Expected<Attr *> ASTImporter::Import(const Attr *FromAttr) {
+ AttrImporter AI(*this);
// FIXME: Is there some kind of AttrVisitor to use here?
switch (FromAttr->getKind()) {
case attr::Aligned: {
auto *From = cast<AlignedAttr>(FromAttr);
- AlignedAttr *To;
- auto CreateAlign = [&](bool IsAlignmentExpr, void *Alignment) {
- return AlignedAttr::Create(ToContext, IsAlignmentExpr, Alignment, ToRange,
- From->getSyntax(),
- From->getSemanticSpelling());
- };
- if (From->isAlignmentExpr()) {
- if (auto ToEOrErr = Import(From->getAlignmentExpr()))
- To = CreateAlign(true, *ToEOrErr);
- else
- return ToEOrErr.takeError();
- } else {
- if (auto ToTOrErr = Import(From->getAlignmentType()))
- To = CreateAlign(false, *ToTOrErr);
- else
- return ToTOrErr.takeError();
- }
- To->setInherited(From->isInherited());
- To->setPackExpansion(From->isPackExpansion());
- To->setImplicit(From->isImplicit());
- ToAttr = To;
+ if (From->isAlignmentExpr())
+ AI.importAttr(From, true, AI.importArg(From->getAlignmentExpr()).value());
+ else
+ AI.importAttr(From, false,
+ AI.importArg(From->getAlignmentType()).value());
break;
}
+
case attr::Format: {
const auto *From = cast<FormatAttr>(FromAttr);
- FormatAttr *To;
- IdentifierInfo *ToAttrType = Import(From->getType());
- To = FormatAttr::Create(ToContext, ToAttrType, From->getFormatIdx(),
- From->getFirstArg(), ToRange, From->getSyntax());
- To->setInherited(From->isInherited());
- ToAttr = To;
+ AI.importAttr(From, Import(From->getType()), From->getFormatIdx(),
+ From->getFirstArg());
break;
}
- default:
- // FIXME: 'clone' copies every member but some of them should be imported.
- // Handle other Attrs that have parameters that should be imported.
- ToAttr = FromAttr->clone(ToContext);
- ToAttr->setRange(ToRange);
+
+ case attr::AssertCapability: {
+ const auto *From = cast<AssertCapabilityAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::AcquireCapability: {
+ const auto *From = cast<AcquireCapabilityAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
break;
}
- assert(ToAttr && "Attribute should be created.");
-
- return ToAttr;
+ case attr::TryAcquireCapability: {
+ const auto *From = cast<TryAcquireCapabilityAttr>(FromAttr);
+ AI.importAttr(From, AI.importArg(From->getSuccessValue()).value(),
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::ReleaseCapability: {
+ const auto *From = cast<ReleaseCapabilityAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::RequiresCapability: {
+ const auto *From = cast<RequiresCapabilityAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::GuardedBy: {
+ const auto *From = cast<GuardedByAttr>(FromAttr);
+ AI.importAttr(From, AI.importArg(From->getArg()).value());
+ break;
+ }
+ case attr::PtGuardedBy: {
+ const auto *From = cast<PtGuardedByAttr>(FromAttr);
+ AI.importAttr(From, AI.importArg(From->getArg()).value());
+ break;
+ }
+ case attr::AcquiredAfter: {
+ const auto *From = cast<AcquiredAfterAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::AcquiredBefore: {
+ const auto *From = cast<AcquiredBeforeAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::AssertExclusiveLock: {
+ const auto *From = cast<AssertExclusiveLockAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::AssertSharedLock: {
+ const auto *From = cast<AssertSharedLockAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::ExclusiveTrylockFunction: {
+ const auto *From = cast<ExclusiveTrylockFunctionAttr>(FromAttr);
+ AI.importAttr(From, AI.importArg(From->getSuccessValue()).value(),
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::SharedTrylockFunction: {
+ const auto *From = cast<SharedTrylockFunctionAttr>(FromAttr);
+ AI.importAttr(From, AI.importArg(From->getSuccessValue()).value(),
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+ case attr::LockReturned: {
+ const auto *From = cast<LockReturnedAttr>(FromAttr);
+ AI.importAttr(From, AI.importArg(From->getArg()).value());
+ break;
+ }
+ case attr::LocksExcluded: {
+ const auto *From = cast<LocksExcludedAttr>(FromAttr);
+ AI.importAttr(From,
+ AI.importArrayArg(From->args(), From->args_size()).value(),
+ From->args_size());
+ break;
+ }
+
+ default: {
+ // The default branch works for attributes that have no arguments to import.
+ // FIXME: Handle every attribute type that has arguments of type to import
+ // (most often Expr* or Decl* or type) in the switch above.
+ AI.cloneAttr(FromAttr);
+ break;
+ }
+ }
+
+ return std::move(AI).getResult();
}
Decl *ASTImporter::GetAlreadyImportedOrNull(const Decl *FromD) const {
@@ -8597,6 +8803,11 @@ Expected<Decl *> ASTImporter::Import(Decl *FromD) {
return ToDOrErr;
}
+llvm::Expected<InheritedConstructor>
+ASTImporter::Import(const InheritedConstructor &From) {
+ return ASTNodeImporter(*this).ImportInheritedConstructor(From);
+}
+
Expected<DeclContext *> ASTImporter::ImportContext(DeclContext *FromDC) {
if (!FromDC)
return FromDC;
@@ -8741,12 +8952,11 @@ ASTImporter::Import(NestedNameSpecifier *FromNNS) {
case NestedNameSpecifier::TypeSpec:
case NestedNameSpecifier::TypeSpecWithTemplate:
- if (Expected<QualType> TyOrErr =
- Import(QualType(FromNNS->getAsType(), 0u))) {
+ if (ExpectedTypePtr TyOrErr = Import(FromNNS->getAsType())) {
bool TSTemplate =
FromNNS->getKind() == NestedNameSpecifier::TypeSpecWithTemplate;
return NestedNameSpecifier::Create(ToContext, Prefix, TSTemplate,
- TyOrErr->getTypePtr());
+ *TyOrErr);
} else {
return TyOrErr.takeError();
}
@@ -9381,16 +9591,14 @@ ASTNodeImporter::ImportAPValue(const APValue &FromValue) {
}
} else {
FromElemTy = FromValue.getLValueBase().getTypeInfoType();
- QualType ImpTypeInfo = importChecked(
- Err,
- QualType(FromValue.getLValueBase().get<TypeInfoLValue>().getType(),
- 0));
+ const Type *ImpTypeInfo = importChecked(
+ Err, FromValue.getLValueBase().get<TypeInfoLValue>().getType());
QualType ImpType =
importChecked(Err, FromValue.getLValueBase().getTypeInfoType());
if (Err)
return std::move(Err);
- Base = APValue::LValueBase::getTypeInfo(
- TypeInfoLValue(ImpTypeInfo.getTypePtr()), ImpType);
+ Base = APValue::LValueBase::getTypeInfo(TypeInfoLValue(ImpTypeInfo),
+ ImpType);
}
}
CharUnits Offset = FromValue.getLValueOffset();
diff --git a/clang/lib/AST/ASTImporterLookupTable.cpp b/clang/lib/AST/ASTImporterLookupTable.cpp
index b78cc0c053f6..ef42561c6f94 100644
--- a/clang/lib/AST/ASTImporterLookupTable.cpp
+++ b/clang/lib/AST/ASTImporterLookupTable.cpp
@@ -14,6 +14,7 @@
#include "clang/AST/ASTImporterLookupTable.h"
#include "clang/AST/Decl.h"
#include "clang/AST/RecursiveASTVisitor.h"
+#include "llvm/Support/FormatVariadic.h"
namespace clang {
@@ -93,10 +94,19 @@ void ASTImporterLookupTable::add(DeclContext *DC, NamedDecl *ND) {
}
void ASTImporterLookupTable::remove(DeclContext *DC, NamedDecl *ND) {
- DeclList &Decls = LookupTable[DC][ND->getDeclName()];
+ const DeclarationName Name = ND->getDeclName();
+ DeclList &Decls = LookupTable[DC][Name];
bool EraseResult = Decls.remove(ND);
(void)EraseResult;
- assert(EraseResult == true && "Trying to remove not contained Decl");
+#ifndef NDEBUG
+ if (!EraseResult) {
+ std::string Message =
+ llvm::formatv("Trying to remove not contained Decl '{0}' of type {1}",
+ Name.getAsString(), DC->getDeclKindName())
+ .str();
+ llvm_unreachable(Message.c_str());
+ }
+#endif
}
void ASTImporterLookupTable::add(NamedDecl *ND) {
@@ -145,7 +155,7 @@ ASTImporterLookupTable::lookup(DeclContext *DC, DeclarationName Name) const {
}
bool ASTImporterLookupTable::contains(DeclContext *DC, NamedDecl *ND) const {
- return 0 < lookup(DC, ND->getDeclName()).count(ND);
+ return lookup(DC, ND->getDeclName()).contains(ND);
}
void ASTImporterLookupTable::dump(DeclContext *DC) const {
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index c4ff05ba9325..e85feb779190 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -1591,6 +1591,26 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
return true;
}
+static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
+ EnumConstantDecl *D1,
+ EnumConstantDecl *D2) {
+ const llvm::APSInt &FromVal = D1->getInitVal();
+ const llvm::APSInt &ToVal = D2->getInitVal();
+ if (FromVal.isSigned() != ToVal.isSigned())
+ return false;
+ if (FromVal.getBitWidth() != ToVal.getBitWidth())
+ return false;
+ if (FromVal != ToVal)
+ return false;
+
+ if (!IsStructurallyEquivalent(D1->getIdentifier(), D2->getIdentifier()))
+ return false;
+
+ // Init expressions are the most expensive check, so do them last.
+ return IsStructurallyEquivalent(Context, D1->getInitExpr(),
+ D2->getInitExpr());
+}
+
/// Determine structural equivalence of two enums.
static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
EnumDecl *D1, EnumDecl *D2) {
diff --git a/clang/lib/AST/ASTTypeTraits.cpp b/clang/lib/AST/ASTTypeTraits.cpp
index 4a033bf50bd4..b333f4618efb 100644
--- a/clang/lib/AST/ASTTypeTraits.cpp
+++ b/clang/lib/AST/ASTTypeTraits.cpp
@@ -14,9 +14,11 @@
#include "clang/AST/ASTTypeTraits.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/OpenMPClause.h"
+#include "clang/AST/TypeLoc.h"
using namespace clang;
@@ -24,9 +26,12 @@ const ASTNodeKind::KindInfo ASTNodeKind::AllKindInfo[] = {
{NKI_None, "<None>"},
{NKI_None, "TemplateArgument"},
{NKI_None, "TemplateArgumentLoc"},
+ {NKI_None, "LambdaCapture"},
{NKI_None, "TemplateName"},
{NKI_None, "NestedNameSpecifierLoc"},
{NKI_None, "QualType"},
+#define TYPELOC(CLASS, PARENT) {NKI_##PARENT, #CLASS "TypeLoc"},
+#include "clang/AST/TypeLocNodes.def"
{NKI_None, "TypeLoc"},
{NKI_None, "CXXBaseSpecifier"},
{NKI_None, "CXXCtorInitializer"},
@@ -44,6 +49,9 @@ const ASTNodeKind::KindInfo ASTNodeKind::AllKindInfo[] = {
#define GEN_CLANG_CLAUSE_CLASS
#define CLAUSE_CLASS(Enum, Str, Class) {NKI_OMPClause, #Class},
#include "llvm/Frontend/OpenMP/OMP.inc"
+ {NKI_None, "Attr"},
+#define ATTR(A) {NKI_Attr, #A "Attr"},
+#include "clang/Basic/AttrList.inc"
};
bool ASTNodeKind::isBaseOf(ASTNodeKind Other, unsigned *Distance) const {
@@ -123,6 +131,17 @@ ASTNodeKind ASTNodeKind::getFromNode(const Type &T) {
llvm_unreachable("invalid type kind");
}
+ ASTNodeKind ASTNodeKind::getFromNode(const TypeLoc &T) {
+ switch (T.getTypeLocClass()) {
+#define ABSTRACT_TYPELOC(CLASS, PARENT)
+#define TYPELOC(CLASS, PARENT) \
+ case TypeLoc::CLASS: \
+ return ASTNodeKind(NKI_##CLASS##TypeLoc);
+#include "clang/AST/TypeLocNodes.def"
+ }
+ llvm_unreachable("invalid typeloc kind");
+ }
+
ASTNodeKind ASTNodeKind::getFromNode(const OMPClause &C) {
switch (C.getClauseKind()) {
#define GEN_CLANG_CLAUSE_CLASS
@@ -134,7 +153,17 @@ ASTNodeKind ASTNodeKind::getFromNode(const OMPClause &C) {
llvm_unreachable("unexpected OpenMP clause kind");
#include "llvm/Frontend/OpenMP/OMP.inc"
}
- llvm_unreachable("invalid stmt kind");
+ llvm_unreachable("invalid omp clause kind");
+}
+
+ASTNodeKind ASTNodeKind::getFromNode(const Attr &A) {
+ switch (A.getKind()) {
+#define ATTR(A) \
+ case attr::A: \
+ return ASTNodeKind(NKI_##A##Attr);
+#include "clang/Basic/AttrList.inc"
+ }
+ llvm_unreachable("invalid attr kind");
}
void DynTypedNode::print(llvm::raw_ostream &OS,
@@ -162,6 +191,8 @@ void DynTypedNode::print(llvm::raw_ostream &OS,
S->printPretty(OS, nullptr, PP);
else if (const Type *T = get<Type>())
QualType(T, 0).print(OS, PP);
+ else if (const Attr *A = get<Attr>())
+ A->printPretty(OS, PP);
else
OS << "Unable to print values of type " << NodeKind.asStringRef() << "\n";
}
@@ -195,5 +226,7 @@ SourceRange DynTypedNode::getSourceRange() const {
return SourceRange(C->getBeginLoc(), C->getEndLoc());
if (const auto *CBS = get<CXXBaseSpecifier>())
return CBS->getSourceRange();
+ if (const auto *A = get<Attr>())
+ return A->getRange();
return SourceRange();
}
diff --git a/clang/lib/AST/AttrDocTable.cpp b/clang/lib/AST/AttrDocTable.cpp
new file mode 100644
index 000000000000..3bfedac8b8f1
--- /dev/null
+++ b/clang/lib/AST/AttrDocTable.cpp
@@ -0,0 +1,27 @@
+//===--- AttrDocTable.cpp - implements Attr::getDocumentation() -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains out-of-line methods for Attr classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Attr.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "AttrDocTable.inc"
+
+static const llvm::StringRef AttrDoc[] = {
+#define ATTR(NAME) AttrDoc_##NAME,
+#include "clang/Basic/AttrList.inc"
+};
+
+llvm::StringRef clang::Attr::getDocumentation(clang::attr::Kind K) {
+ if(K < llvm::array_lengthof(AttrDoc))
+ return AttrDoc[K];
+ return "";
+}
diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp
index 662f86722fa3..a3b46752c511 100644
--- a/clang/lib/AST/AttrImpl.cpp
+++ b/clang/lib/AST/AttrImpl.cpp
@@ -195,6 +195,40 @@ void OMPDeclareVariantAttr::printPrettyPragma(
OS << ")";
}
OS << " match(" << traitInfos << ")";
+
+ auto PrintExprs = [&OS, &Policy](Expr **Begin, Expr **End) {
+ for (Expr **I = Begin; I != End; ++I) {
+ assert(*I && "Expected non-null Stmt");
+ if (I != Begin)
+ OS << ",";
+ (*I)->printPretty(OS, nullptr, Policy);
+ }
+ };
+ if (adjustArgsNothing_size()) {
+ OS << " adjust_args(nothing:";
+ PrintExprs(adjustArgsNothing_begin(), adjustArgsNothing_end());
+ OS << ")";
+ }
+ if (adjustArgsNeedDevicePtr_size()) {
+ OS << " adjust_args(need_device_ptr:";
+ PrintExprs(adjustArgsNeedDevicePtr_begin(), adjustArgsNeedDevicePtr_end());
+ OS << ")";
+ }
+
+ auto PrintInteropTypes = [&OS](InteropType *Begin, InteropType *End) {
+ for (InteropType *I = Begin; I != End; ++I) {
+ if (I != Begin)
+ OS << ", ";
+ OS << "interop(";
+ OS << ConvertInteropTypeToStr(*I);
+ OS << ")";
+ }
+ };
+ if (appendArgs_size()) {
+ OS << " append_args(";
+ PrintInteropTypes(appendArgs_begin(), appendArgs_end());
+ OS << ")";
+ }
}
#include "clang/AST/AttrImpl.inc"
diff --git a/clang/lib/AST/CXXInheritance.cpp b/clang/lib/AST/CXXInheritance.cpp
index 9027fa7a7515..96a6f344be7c 100644
--- a/clang/lib/AST/CXXInheritance.cpp
+++ b/clang/lib/AST/CXXInheritance.cpp
@@ -465,7 +465,7 @@ void OverridingMethods::add(unsigned OverriddenSubobject,
UniqueVirtualMethod Overriding) {
SmallVectorImpl<UniqueVirtualMethod> &SubobjectOverrides
= Overrides[OverriddenSubobject];
- if (llvm::find(SubobjectOverrides, Overriding) == SubobjectOverrides.end())
+ if (!llvm::is_contained(SubobjectOverrides, Overriding))
SubobjectOverrides.push_back(Overriding);
}
@@ -671,9 +671,7 @@ CXXRecordDecl::getFinalOverriders(CXXFinalOverriderMap &FinalOverriders) const {
// FIXME: IsHidden reads from Overriding from the middle of a remove_if
// over the same sequence! Is this guaranteed to work?
- Overriding.erase(
- std::remove_if(Overriding.begin(), Overriding.end(), IsHidden),
- Overriding.end());
+ llvm::erase_if(Overriding, IsHidden);
}
}
}
diff --git a/clang/lib/AST/Comment.cpp b/clang/lib/AST/Comment.cpp
index a02cc9d119fe..fae3640d5ff7 100644
--- a/clang/lib/AST/Comment.cpp
+++ b/clang/lib/AST/Comment.cpp
@@ -210,6 +210,7 @@ void DeclInfo::fill() {
IsObjCMethod = false;
IsInstanceMethod = false;
IsClassMethod = false;
+ IsVariadic = false;
ParamVars = None;
TemplateParameters = nullptr;
@@ -221,6 +222,7 @@ void DeclInfo::fill() {
CurrentDecl = CommentDecl;
Decl::Kind K = CommentDecl->getKind();
+ const TypeSourceInfo *TSI = nullptr;
switch (K) {
default:
// Defaults are should be good for declarations we don't handle explicitly.
@@ -247,6 +249,8 @@ void DeclInfo::fill() {
IsInstanceMethod = MD->isInstance();
IsClassMethod = !IsInstanceMethod;
}
+ IsVariadic = FD->isVariadic();
+ assert(involvesFunctionType());
break;
}
case Decl::ObjCMethod: {
@@ -257,6 +261,8 @@ void DeclInfo::fill() {
IsObjCMethod = true;
IsInstanceMethod = MD->isInstanceMethod();
IsClassMethod = !IsInstanceMethod;
+ IsVariadic = MD->isVariadic();
+ assert(involvesFunctionType());
break;
}
case Decl::FunctionTemplate: {
@@ -267,6 +273,8 @@ void DeclInfo::fill() {
ParamVars = FD->parameters();
ReturnType = FD->getReturnType();
TemplateParameters = FTD->getTemplateParameters();
+ IsVariadic = FD->isVariadic();
+ assert(involvesFunctionType());
break;
}
case Decl::ClassTemplate: {
@@ -293,76 +301,66 @@ void DeclInfo::fill() {
Kind = ClassKind;
break;
case Decl::Var:
+ if (const VarTemplateDecl *VTD =
+ cast<VarDecl>(CommentDecl)->getDescribedVarTemplate()) {
+ TemplateKind = TemplateSpecialization;
+ TemplateParameters = VTD->getTemplateParameters();
+ }
+ LLVM_FALLTHROUGH;
case Decl::Field:
case Decl::EnumConstant:
case Decl::ObjCIvar:
case Decl::ObjCAtDefsField:
- case Decl::ObjCProperty: {
- const TypeSourceInfo *TSI;
+ case Decl::ObjCProperty:
if (const auto *VD = dyn_cast<DeclaratorDecl>(CommentDecl))
TSI = VD->getTypeSourceInfo();
else if (const auto *PD = dyn_cast<ObjCPropertyDecl>(CommentDecl))
TSI = PD->getTypeSourceInfo();
- else
- TSI = nullptr;
- if (TSI) {
- TypeLoc TL = TSI->getTypeLoc().getUnqualifiedLoc();
- FunctionTypeLoc FTL;
- if (getFunctionTypeLoc(TL, FTL)) {
- ParamVars = FTL.getParams();
- ReturnType = FTL.getReturnLoc().getType();
- }
- }
Kind = VariableKind;
break;
+ case Decl::VarTemplate: {
+ const VarTemplateDecl *VTD = cast<VarTemplateDecl>(CommentDecl);
+ Kind = VariableKind;
+ TemplateKind = Template;
+ TemplateParameters = VTD->getTemplateParameters();
+ if (const VarDecl *VD = VTD->getTemplatedDecl())
+ TSI = VD->getTypeSourceInfo();
+ break;
}
case Decl::Namespace:
Kind = NamespaceKind;
break;
case Decl::TypeAlias:
- case Decl::Typedef: {
+ case Decl::Typedef:
Kind = TypedefKind;
- // If this is a typedef / using to something we consider a function, extract
- // arguments and return type.
- const TypeSourceInfo *TSI =
- K == Decl::Typedef
- ? cast<TypedefDecl>(CommentDecl)->getTypeSourceInfo()
- : cast<TypeAliasDecl>(CommentDecl)->getTypeSourceInfo();
- if (!TSI)
- break;
- TypeLoc TL = TSI->getTypeLoc().getUnqualifiedLoc();
- FunctionTypeLoc FTL;
- if (getFunctionTypeLoc(TL, FTL)) {
- Kind = FunctionKind;
- ParamVars = FTL.getParams();
- ReturnType = FTL.getReturnLoc().getType();
- }
+ TSI = cast<TypedefNameDecl>(CommentDecl)->getTypeSourceInfo();
break;
- }
case Decl::TypeAliasTemplate: {
const TypeAliasTemplateDecl *TAT = cast<TypeAliasTemplateDecl>(CommentDecl);
Kind = TypedefKind;
TemplateKind = Template;
TemplateParameters = TAT->getTemplateParameters();
- TypeAliasDecl *TAD = TAT->getTemplatedDecl();
- if (!TAD)
- break;
+ if (TypeAliasDecl *TAD = TAT->getTemplatedDecl())
+ TSI = TAD->getTypeSourceInfo();
+ break;
+ }
+ case Decl::Enum:
+ Kind = EnumKind;
+ break;
+ }
- const TypeSourceInfo *TSI = TAD->getTypeSourceInfo();
- if (!TSI)
- break;
+ // If the type is a typedef / using to something we consider a function,
+ // extract arguments and return type.
+ if (TSI) {
TypeLoc TL = TSI->getTypeLoc().getUnqualifiedLoc();
FunctionTypeLoc FTL;
if (getFunctionTypeLoc(TL, FTL)) {
- Kind = FunctionKind;
ParamVars = FTL.getParams();
ReturnType = FTL.getReturnLoc().getType();
+ if (const auto *FPT = dyn_cast<FunctionProtoType>(FTL.getTypePtr()))
+ IsVariadic = FPT->isVariadic();
+ assert(involvesFunctionType());
}
- break;
- }
- case Decl::Enum:
- Kind = EnumKind;
- break;
}
IsFilled = true;
diff --git a/clang/lib/AST/CommentBriefParser.cpp b/clang/lib/AST/CommentBriefParser.cpp
index 2b648cbb1d4b..2a5f7452b776 100644
--- a/clang/lib/AST/CommentBriefParser.cpp
+++ b/clang/lib/AST/CommentBriefParser.cpp
@@ -123,7 +123,7 @@ std::string BriefParser::Parse() {
// We found a paragraph end. This ends the brief description if
// \command or its equivalent was explicitly used.
// Stop scanning text because an explicit \paragraph is the
- // preffered one.
+ // preferred one.
if (InBrief)
break;
// End first paragraph if we found some non-whitespace text.
diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp
index 4bebd41e15ee..93531c06192d 100644
--- a/clang/lib/AST/CommentLexer.cpp
+++ b/clang/lib/AST/CommentLexer.cpp
@@ -392,10 +392,11 @@ void Lexer::lexCommentText(Token &T) {
unsigned Length = TokenPtr - (BufferPtr + 1);
// Hardcoded support for lexing LaTeX formula commands
- // \f$ \f[ \f] \f{ \f} as a single command.
+ // \f$ \f( \f) \f[ \f] \f{ \f} as a single command.
if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
C = *TokenPtr;
- if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
+ if (C == '$' || C == '(' || C == ')' || C == '[' || C == ']' ||
+ C == '{' || C == '}') {
TokenPtr++;
Length++;
}
diff --git a/clang/lib/AST/CommentSema.cpp b/clang/lib/AST/CommentSema.cpp
index 7642e73fa171..087f103e4931 100644
--- a/clang/lib/AST/CommentSema.cpp
+++ b/clang/lib/AST/CommentSema.cpp
@@ -86,7 +86,7 @@ ParamCommandComment *Sema::actOnParamCommandStart(
new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID,
CommandMarker);
- if (!isFunctionDecl() && !isFunctionOrBlockPointerVarLikeDecl())
+ if (!involvesFunctionType())
Diag(Command->getLocation(),
diag::warn_doc_param_not_attached_to_a_function_decl)
<< CommandMarker
@@ -236,9 +236,7 @@ void Sema::actOnParamCommandDirectionArg(ParamCommandComment *Command,
if (Direction == -1) {
// Try again with whitespace removed.
- ArgLower.erase(
- std::remove_if(ArgLower.begin(), ArgLower.end(), clang::isWhitespace),
- ArgLower.end());
+ llvm::erase_if(ArgLower, clang::isWhitespace);
Direction = getParamPassDirection(ArgLower);
SourceRange ArgRange(ArgLocBegin, ArgLocEnd);
@@ -590,7 +588,7 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) {
// to document the value that the property getter returns.
if (isObjCPropertyDecl())
return;
- if (isFunctionDecl() || isFunctionOrBlockPointerVarLikeDecl()) {
+ if (involvesFunctionType()) {
assert(!ThisDeclInfo->ReturnType.isNull() &&
"should have a valid return type");
if (ThisDeclInfo->ReturnType->isVoidType()) {
@@ -730,7 +728,7 @@ void Sema::checkDeprecatedCommand(const BlockCommandComment *Command) {
}
void Sema::resolveParamCommandIndexes(const FullComment *FC) {
- if (!isFunctionDecl()) {
+ if (!involvesFunctionType()) {
// We already warned that \\param commands are not attached to a function
// decl.
return;
@@ -818,6 +816,14 @@ void Sema::resolveParamCommandIndexes(const FullComment *FC) {
}
}
+bool Sema::involvesFunctionType() {
+ if (!ThisDeclInfo)
+ return false;
+ if (!ThisDeclInfo->IsFilled)
+ inspectThisDecl();
+ return ThisDeclInfo->involvesFunctionType();
+}
+
bool Sema::isFunctionDecl() {
if (!ThisDeclInfo)
return false;
@@ -832,26 +838,11 @@ bool Sema::isAnyFunctionDecl() {
}
bool Sema::isFunctionOrMethodVariadic() {
- if (!isFunctionDecl() || !ThisDeclInfo->CurrentDecl)
+ if (!ThisDeclInfo)
return false;
- if (const FunctionDecl *FD =
- dyn_cast<FunctionDecl>(ThisDeclInfo->CurrentDecl))
- return FD->isVariadic();
- if (const FunctionTemplateDecl *FTD =
- dyn_cast<FunctionTemplateDecl>(ThisDeclInfo->CurrentDecl))
- return FTD->getTemplatedDecl()->isVariadic();
- if (const ObjCMethodDecl *MD =
- dyn_cast<ObjCMethodDecl>(ThisDeclInfo->CurrentDecl))
- return MD->isVariadic();
- if (const TypedefNameDecl *TD =
- dyn_cast<TypedefNameDecl>(ThisDeclInfo->CurrentDecl)) {
- QualType Type = TD->getUnderlyingType();
- if (Type->isFunctionPointerType() || Type->isBlockPointerType())
- Type = Type->getPointeeType();
- if (const auto *FT = Type->getAs<FunctionProtoType>())
- return FT->isVariadic();
- }
- return false;
+ if (!ThisDeclInfo->IsFilled)
+ inspectThisDecl();
+ return ThisDeclInfo->IsVariadic;
}
bool Sema::isObjCMethodDecl() {
@@ -873,36 +864,6 @@ bool Sema::isFunctionPointerVarDecl() {
return false;
}
-bool Sema::isFunctionOrBlockPointerVarLikeDecl() {
- if (!ThisDeclInfo)
- return false;
- if (!ThisDeclInfo->IsFilled)
- inspectThisDecl();
- if (ThisDeclInfo->getKind() != DeclInfo::VariableKind ||
- !ThisDeclInfo->CurrentDecl)
- return false;
- QualType QT;
- if (const auto *VD = dyn_cast<DeclaratorDecl>(ThisDeclInfo->CurrentDecl))
- QT = VD->getType();
- else if (const auto *PD =
- dyn_cast<ObjCPropertyDecl>(ThisDeclInfo->CurrentDecl))
- QT = PD->getType();
- else
- return false;
- // We would like to warn about the 'returns'/'param' commands for
- // variables that don't directly specify the function type, so type aliases
- // can be ignored.
- if (QT->getAs<TypedefType>())
- return false;
- if (const auto *P = QT->getAs<PointerType>())
- if (P->getPointeeType()->getAs<TypedefType>())
- return false;
- if (const auto *P = QT->getAs<BlockPointerType>())
- if (P->getPointeeType()->getAs<TypedefType>())
- return false;
- return QT->isFunctionPointerType() || QT->isBlockPointerType();
-}
-
bool Sema::isObjCPropertyDecl() {
if (!ThisDeclInfo)
return false;
diff --git a/clang/lib/AST/ComparisonCategories.cpp b/clang/lib/AST/ComparisonCategories.cpp
index 896050482644..a42960ad3c7f 100644
--- a/clang/lib/AST/ComparisonCategories.cpp
+++ b/clang/lib/AST/ComparisonCategories.cpp
@@ -57,7 +57,7 @@ bool ComparisonCategoryInfo::ValueInfo::hasValidIntValue() const {
/// Attempt to determine the integer value used to represent the comparison
/// category result by evaluating the initializer for the specified VarDecl as
-/// a constant expression and retreiving the value of the class's first
+/// a constant expression and retrieving the value of the class's first
/// (and only) field.
///
/// Note: The STL types are expected to have the form:
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 8f2ecb7bcf2a..5ea091edcf4c 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -1088,13 +1088,29 @@ NamedDecl::isReserved(const LangOptions &LangOpts) const {
return ReservedIdentifierStatus::NotReserved;
ReservedIdentifierStatus Status = II->isReserved(LangOpts);
- if (Status == ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope) {
- // Check if we're at TU level or not.
+ if (isReservedAtGlobalScope(Status) && !isReservedInAllContexts(Status)) {
+ // This name is only reserved at global scope. Check if this declaration
+ // conflicts with a global scope declaration.
if (isa<ParmVarDecl>(this) || isTemplateParameter())
return ReservedIdentifierStatus::NotReserved;
+
+ // C++ [dcl.link]/7:
+ // Two declarations [conflict] if [...] one declares a function or
+ // variable with C language linkage, and the other declares [...] a
+ // variable that belongs to the global scope.
+ //
+ // Therefore names that are reserved at global scope are also reserved as
+ // names of variables and functions with C language linkage.
const DeclContext *DC = getDeclContext()->getRedeclContext();
- if (!DC->isTranslationUnit())
- return ReservedIdentifierStatus::NotReserved;
+ if (DC->isTranslationUnit())
+ return Status;
+ if (auto *VD = dyn_cast<VarDecl>(this))
+ if (VD->isExternC())
+ return ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
+ if (auto *FD = dyn_cast<FunctionDecl>(this))
+ if (FD->isExternC())
+ return ReservedIdentifierStatus::StartsWithUnderscoreAndIsExternC;
+ return ReservedIdentifierStatus::NotReserved;
}
return Status;
@@ -1647,8 +1663,7 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS,
NameInScope = ND->getDeclName();
}
- for (unsigned I = Contexts.size(); I != 0; --I) {
- const DeclContext *DC = Contexts[I - 1];
+ for (const DeclContext *DC : llvm::reverse(Contexts)) {
if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(DC)) {
OS << Spec->getName();
const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
@@ -2216,14 +2231,18 @@ VarDecl *VarDecl::getActingDefinition() {
return nullptr;
VarDecl *LastTentative = nullptr;
- VarDecl *First = getFirstDecl();
- for (auto I : First->redecls()) {
- Kind = I->isThisDeclarationADefinition();
+
+ // Loop through the declaration chain, starting with the most recent.
+ for (VarDecl *Decl = getMostRecentDecl(); Decl;
+ Decl = Decl->getPreviousDecl()) {
+ Kind = Decl->isThisDeclarationADefinition();
if (Kind == Definition)
return nullptr;
- if (Kind == TentativeDefinition)
- LastTentative = I;
+ // Record the first (most recent) TentativeDefinition that is encountered.
+ if (Kind == TentativeDefinition && !LastTentative)
+ LastTentative = Decl;
}
+
return LastTentative;
}
@@ -2769,11 +2788,15 @@ SourceRange ParmVarDecl::getSourceRange() const {
}
bool ParmVarDecl::isDestroyedInCallee() const {
+ // ns_consumed only affects code generation in ARC
if (hasAttr<NSConsumedAttr>())
- return true;
+ return getASTContext().getLangOpts().ObjCAutoRefCount;
+ // FIXME: isParamDestroyedInCallee() should probably imply
+ // isDestructedType()
auto *RT = getType()->getAs<RecordType>();
- if (RT && RT->getDecl()->isParamDestroyedInCallee())
+ if (RT && RT->getDecl()->isParamDestroyedInCallee() &&
+ getType().isDestructedType())
return true;
return false;
@@ -2852,7 +2875,7 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC,
SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T,
TypeSourceInfo *TInfo, StorageClass S,
- bool isInlineSpecified,
+ bool UsesFPIntrin, bool isInlineSpecified,
ConstexprSpecKind ConstexprKind,
Expr *TrailingRequiresClause)
: DeclaratorDecl(DK, DC, NameInfo.getLoc(), NameInfo.getName(), T, TInfo,
@@ -2878,7 +2901,7 @@ FunctionDecl::FunctionDecl(Kind DK, ASTContext &C, DeclContext *DC,
FunctionDeclBits.ConstexprKind = static_cast<uint64_t>(ConstexprKind);
FunctionDeclBits.InstantiationIsPending = false;
FunctionDeclBits.UsesSEHTry = false;
- FunctionDeclBits.UsesFPIntrin = false;
+ FunctionDeclBits.UsesFPIntrin = UsesFPIntrin;
FunctionDeclBits.HasSkippedBody = false;
FunctionDeclBits.WillHaveBody = false;
FunctionDeclBits.IsMultiVersion = false;
@@ -3172,7 +3195,9 @@ bool FunctionDecl::isInlineBuiltinDeclaration() const {
return false;
const FunctionDecl *Definition;
- return hasBody(Definition) && Definition->isInlineSpecified();
+ return hasBody(Definition) && Definition->isInlineSpecified() &&
+ Definition->hasAttr<AlwaysInlineAttr>() &&
+ Definition->hasAttr<GNUInlineAttr>();
}
bool FunctionDecl::isDestroyingOperatorDelete() const {
@@ -4498,6 +4523,17 @@ unsigned EnumDecl::getODRHash() {
return ODRHash;
}
+SourceRange EnumDecl::getSourceRange() const {
+ auto Res = TagDecl::getSourceRange();
+ // Set end-point to enum-base, e.g. enum foo : ^bar
+ if (auto *TSI = getIntegerTypeSourceInfo()) {
+ // TagDecl doesn't know about the enum base.
+ if (!getBraceRange().getEnd().isValid())
+ Res.setEnd(TSI->getTypeLoc().getEndLoc());
+ }
+ return Res;
+}
+
//===----------------------------------------------------------------------===//
// RecordDecl Implementation
//===----------------------------------------------------------------------===//
@@ -4857,18 +4893,16 @@ ImplicitParamDecl *ImplicitParamDecl::CreateDeserialized(ASTContext &C,
return new (C, ID) ImplicitParamDecl(C, QualType(), ImplicitParamKind::Other);
}
-FunctionDecl *FunctionDecl::Create(ASTContext &C, DeclContext *DC,
- SourceLocation StartLoc,
- const DeclarationNameInfo &NameInfo,
- QualType T, TypeSourceInfo *TInfo,
- StorageClass SC, bool isInlineSpecified,
- bool hasWrittenPrototype,
- ConstexprSpecKind ConstexprKind,
- Expr *TrailingRequiresClause) {
- FunctionDecl *New =
- new (C, DC) FunctionDecl(Function, C, DC, StartLoc, NameInfo, T, TInfo,
- SC, isInlineSpecified, ConstexprKind,
- TrailingRequiresClause);
+FunctionDecl *
+FunctionDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
+ const DeclarationNameInfo &NameInfo, QualType T,
+ TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin,
+ bool isInlineSpecified, bool hasWrittenPrototype,
+ ConstexprSpecKind ConstexprKind,
+ Expr *TrailingRequiresClause) {
+ FunctionDecl *New = new (C, DC) FunctionDecl(
+ Function, C, DC, StartLoc, NameInfo, T, TInfo, SC, UsesFPIntrin,
+ isInlineSpecified, ConstexprKind, TrailingRequiresClause);
New->setHasWrittenPrototype(hasWrittenPrototype);
return New;
}
@@ -4876,7 +4910,7 @@ FunctionDecl *FunctionDecl::Create(ASTContext &C, DeclContext *DC,
FunctionDecl *FunctionDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) FunctionDecl(
Function, C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(),
- nullptr, SC_None, false, ConstexprSpecKind::Unspecified, nullptr);
+ nullptr, SC_None, false, false, ConstexprSpecKind::Unspecified, nullptr);
}
BlockDecl *BlockDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L) {
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 3467da2b549e..4044404f74ef 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -1217,6 +1217,15 @@ bool DeclContext::Encloses(const DeclContext *DC) const {
return false;
}
+DeclContext *DeclContext::getNonTransparentContext() {
+ DeclContext *DC = this;
+ while (DC->isTransparentContext()) {
+ DC = DC->getParent();
+ assert(DC && "All transparent contexts should have a parent!");
+ }
+ return DC;
+}
+
DeclContext *DeclContext::getPrimaryContext() {
switch (getDeclKind()) {
case Decl::ExternCContext:
@@ -1953,6 +1962,7 @@ void ASTContext::ReleaseDeclContextMaps() {
// pointer because the subclass doesn't add anything that needs to
// be deleted.
StoredDeclsMap::DestroyAll(LastSDM.getPointer(), LastSDM.getInt());
+ LastSDM.setPointer(nullptr);
}
void StoredDeclsMap::DestroyAll(StoredDeclsMap *Map, bool Dependent) {
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index aeee35d9c74f..1780358cc348 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -178,6 +178,8 @@ static bool hasRepeatedBaseClass(const CXXRecordDecl *StartRD) {
SmallVector<const CXXRecordDecl*, 8> WorkList = {StartRD};
while (!WorkList.empty()) {
const CXXRecordDecl *RD = WorkList.pop_back_val();
+ if (RD->getTypeForDecl()->isDependentType())
+ continue;
for (const CXXBaseSpecifier &BaseSpec : RD->bases()) {
if (const CXXRecordDecl *B = BaseSpec.getType()->getAsCXXRecordDecl()) {
if (!SeenBaseTypes.insert(B).second)
@@ -1776,7 +1778,7 @@ void CXXRecordDecl::removeConversion(const NamedDecl *ConvDecl) {
for (unsigned I = 0, E = Convs.size(); I != E; ++I) {
if (Convs[I].getDecl() == ConvDecl) {
Convs.erase(I);
- assert(llvm::find(Convs, ConvDecl) == Convs.end() &&
+ assert(!llvm::is_contained(Convs, ConvDecl) &&
"conversion was found multiple times in unresolved set");
return;
}
@@ -2156,12 +2158,9 @@ CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD,
}
// Other candidate final overriders might be overridden by this function.
- FinalOverriders.erase(
- std::remove_if(FinalOverriders.begin(), FinalOverriders.end(),
- [&](CXXMethodDecl *OtherD) {
- return recursivelyOverrides(D, OtherD);
- }),
- FinalOverriders.end());
+ llvm::erase_if(FinalOverriders, [&](CXXMethodDecl *OtherD) {
+ return recursivelyOverrides(D, OtherD);
+ });
FinalOverriders.push_back(D);
};
@@ -2178,25 +2177,23 @@ CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD,
return FinalOverriders.size() == 1 ? FinalOverriders.front() : nullptr;
}
-CXXMethodDecl *CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD,
- SourceLocation StartLoc,
- const DeclarationNameInfo &NameInfo,
- QualType T, TypeSourceInfo *TInfo,
- StorageClass SC, bool isInline,
- ConstexprSpecKind ConstexprKind,
- SourceLocation EndLocation,
- Expr *TrailingRequiresClause) {
- return new (C, RD)
- CXXMethodDecl(CXXMethod, C, RD, StartLoc, NameInfo, T, TInfo, SC,
- isInline, ConstexprKind, EndLocation,
- TrailingRequiresClause);
+CXXMethodDecl *
+CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
+ const DeclarationNameInfo &NameInfo, QualType T,
+ TypeSourceInfo *TInfo, StorageClass SC, bool UsesFPIntrin,
+ bool isInline, ConstexprSpecKind ConstexprKind,
+ SourceLocation EndLocation,
+ Expr *TrailingRequiresClause) {
+ return new (C, RD) CXXMethodDecl(
+ CXXMethod, C, RD, StartLoc, NameInfo, T, TInfo, SC, UsesFPIntrin,
+ isInline, ConstexprKind, EndLocation, TrailingRequiresClause);
}
CXXMethodDecl *CXXMethodDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
- return new (C, ID)
- CXXMethodDecl(CXXMethod, C, nullptr, SourceLocation(),
- DeclarationNameInfo(), QualType(), nullptr, SC_None, false,
- ConstexprSpecKind::Unspecified, SourceLocation(), nullptr);
+ return new (C, ID) CXXMethodDecl(
+ CXXMethod, C, nullptr, SourceLocation(), DeclarationNameInfo(),
+ QualType(), nullptr, SC_None, false, false,
+ ConstexprSpecKind::Unspecified, SourceLocation(), nullptr);
}
CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base,
@@ -2339,7 +2336,7 @@ bool CXXMethodDecl::isUsualDeallocationFunction(
// In C++17 onwards, all potential usual deallocation functions are actual
// usual deallocation functions. Honor this behavior when post-C++14
// deallocation functions are offered as extensions too.
- // FIXME(EricWF): Destrying Delete should be a language option. How do we
+ // FIXME(EricWF): Destroying Delete should be a language option. How do we
// handle when destroying delete is used prior to C++17?
if (Context.getLangOpts().CPlusPlus17 ||
Context.getLangOpts().AlignedAllocation ||
@@ -2568,12 +2565,12 @@ SourceRange CXXCtorInitializer::getSourceRange() const {
CXXConstructorDecl::CXXConstructorDecl(
ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
- ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared,
- ConstexprSpecKind ConstexprKind, InheritedConstructor Inherited,
- Expr *TrailingRequiresClause)
+ ExplicitSpecifier ES, bool UsesFPIntrin, bool isInline,
+ bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind,
+ InheritedConstructor Inherited, Expr *TrailingRequiresClause)
: CXXMethodDecl(CXXConstructor, C, RD, StartLoc, NameInfo, T, TInfo,
- SC_None, isInline, ConstexprKind, SourceLocation(),
- TrailingRequiresClause) {
+ SC_None, UsesFPIntrin, isInline, ConstexprKind,
+ SourceLocation(), TrailingRequiresClause) {
setNumCtorInitializers(0);
setInheritingConstructor(static_cast<bool>(Inherited));
setImplicit(isImplicitlyDeclared);
@@ -2596,7 +2593,7 @@ CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C,
isInheritingConstructor, hasTrailingExplicit);
auto *Result = new (C, ID, Extra) CXXConstructorDecl(
C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr,
- ExplicitSpecifier(), false, false, ConstexprSpecKind::Unspecified,
+ ExplicitSpecifier(), false, false, false, ConstexprSpecKind::Unspecified,
InheritedConstructor(), nullptr);
Result->setInheritingConstructor(isInheritingConstructor);
Result->CXXConstructorDeclBits.HasTrailingExplicitSpecifier =
@@ -2608,19 +2605,18 @@ CXXConstructorDecl *CXXConstructorDecl::CreateDeserialized(ASTContext &C,
CXXConstructorDecl *CXXConstructorDecl::Create(
ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
- ExplicitSpecifier ES, bool isInline, bool isImplicitlyDeclared,
- ConstexprSpecKind ConstexprKind, InheritedConstructor Inherited,
- Expr *TrailingRequiresClause) {
+ ExplicitSpecifier ES, bool UsesFPIntrin, bool isInline,
+ bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind,
+ InheritedConstructor Inherited, Expr *TrailingRequiresClause) {
assert(NameInfo.getName().getNameKind()
== DeclarationName::CXXConstructorName &&
"Name must refer to a constructor");
unsigned Extra =
additionalSizeToAlloc<InheritedConstructor, ExplicitSpecifier>(
Inherited ? 1 : 0, ES.getExpr() ? 1 : 0);
- return new (C, RD, Extra)
- CXXConstructorDecl(C, RD, StartLoc, NameInfo, T, TInfo, ES, isInline,
- isImplicitlyDeclared, ConstexprKind, Inherited,
- TrailingRequiresClause);
+ return new (C, RD, Extra) CXXConstructorDecl(
+ C, RD, StartLoc, NameInfo, T, TInfo, ES, UsesFPIntrin, isInline,
+ isImplicitlyDeclared, ConstexprKind, Inherited, TrailingRequiresClause);
}
CXXConstructorDecl::init_const_iterator CXXConstructorDecl::init_begin() const {
@@ -2737,21 +2733,20 @@ CXXDestructorDecl *
CXXDestructorDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) CXXDestructorDecl(
C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr,
- false, false, ConstexprSpecKind::Unspecified, nullptr);
+ false, false, false, ConstexprSpecKind::Unspecified, nullptr);
}
CXXDestructorDecl *CXXDestructorDecl::Create(
ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
- bool isInline, bool isImplicitlyDeclared, ConstexprSpecKind ConstexprKind,
- Expr *TrailingRequiresClause) {
+ bool UsesFPIntrin, bool isInline, bool isImplicitlyDeclared,
+ ConstexprSpecKind ConstexprKind, Expr *TrailingRequiresClause) {
assert(NameInfo.getName().getNameKind()
== DeclarationName::CXXDestructorName &&
"Name must refer to a destructor");
- return new (C, RD)
- CXXDestructorDecl(C, RD, StartLoc, NameInfo, T, TInfo, isInline,
- isImplicitlyDeclared, ConstexprKind,
- TrailingRequiresClause);
+ return new (C, RD) CXXDestructorDecl(
+ C, RD, StartLoc, NameInfo, T, TInfo, UsesFPIntrin, isInline,
+ isImplicitlyDeclared, ConstexprKind, TrailingRequiresClause);
}
void CXXDestructorDecl::setOperatorDelete(FunctionDecl *OD, Expr *ThisArg) {
@@ -2770,21 +2765,22 @@ CXXConversionDecl *
CXXConversionDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) CXXConversionDecl(
C, nullptr, SourceLocation(), DeclarationNameInfo(), QualType(), nullptr,
- false, ExplicitSpecifier(), ConstexprSpecKind::Unspecified,
+ false, false, ExplicitSpecifier(), ConstexprSpecKind::Unspecified,
SourceLocation(), nullptr);
}
CXXConversionDecl *CXXConversionDecl::Create(
ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
const DeclarationNameInfo &NameInfo, QualType T, TypeSourceInfo *TInfo,
- bool isInline, ExplicitSpecifier ES, ConstexprSpecKind ConstexprKind,
- SourceLocation EndLocation, Expr *TrailingRequiresClause) {
+ bool UsesFPIntrin, bool isInline, ExplicitSpecifier ES,
+ ConstexprSpecKind ConstexprKind, SourceLocation EndLocation,
+ Expr *TrailingRequiresClause) {
assert(NameInfo.getName().getNameKind()
== DeclarationName::CXXConversionFunctionName &&
"Name must refer to a conversion function");
- return new (C, RD)
- CXXConversionDecl(C, RD, StartLoc, NameInfo, T, TInfo, isInline, ES,
- ConstexprKind, EndLocation, TrailingRequiresClause);
+ return new (C, RD) CXXConversionDecl(
+ C, RD, StartLoc, NameInfo, T, TInfo, UsesFPIntrin, isInline, ES,
+ ConstexprKind, EndLocation, TrailingRequiresClause);
}
bool CXXConversionDecl::isLambdaToBlockPointerConversion() const {
@@ -3017,8 +3013,7 @@ CXXRecordDecl *ConstructorUsingShadowDecl::getNominatedBaseClass() const {
void BaseUsingDecl::anchor() {}
void BaseUsingDecl::addShadowDecl(UsingShadowDecl *S) {
- assert(std::find(shadow_begin(), shadow_end(), S) == shadow_end() &&
- "declaration already in set");
+ assert(!llvm::is_contained(shadows(), S) && "declaration already in set");
assert(S->getIntroducer() == this);
if (FirstUsingShadow.getPointer())
@@ -3027,8 +3022,7 @@ void BaseUsingDecl::addShadowDecl(UsingShadowDecl *S) {
}
void BaseUsingDecl::removeShadowDecl(UsingShadowDecl *S) {
- assert(std::find(shadow_begin(), shadow_end(), S) != shadow_end() &&
- "declaration not in set");
+ assert(llvm::is_contained(shadows(), S) && "declaration not in set");
assert(S->getIntroducer() == this);
// Remove S from the shadow decl chain. This is O(n) but hopefully rare.
diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp
index 6e790f03b027..ba827a79c022 100644
--- a/clang/lib/AST/DeclObjC.cpp
+++ b/clang/lib/AST/DeclObjC.cpp
@@ -603,10 +603,6 @@ void ObjCInterfaceDecl::allocateDefinitionData() {
assert(!hasDefinition() && "ObjC class already has a definition");
Data.setPointer(new (getASTContext()) DefinitionData());
Data.getPointer()->Definition = this;
-
- // Make the type point at the definition, now that we have one.
- if (TypeForDecl)
- cast<ObjCInterfaceType>(TypeForDecl)->Decl = this;
}
void ObjCInterfaceDecl::startDefinition() {
@@ -855,6 +851,14 @@ bool ObjCMethodDecl::isDesignatedInitializerForTheInterface(
return false;
}
+bool ObjCMethodDecl::hasParamDestroyedInCallee() const {
+ for (auto param : parameters()) {
+ if (param->isDestroyedInCallee())
+ return true;
+ }
+ return false;
+}
+
Stmt *ObjCMethodDecl::getBody() const {
return Body.get(getASTContext().getExternalSource());
}
diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index 4dcf3d0e6ab1..044eb8f8f8e5 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -112,11 +112,9 @@ namespace {
void printTemplateParameters(const TemplateParameterList *Params,
bool OmitTemplateKW = false);
void printTemplateArguments(llvm::ArrayRef<TemplateArgument> Args,
- const TemplateParameterList *Params,
- bool TemplOverloaded);
+ const TemplateParameterList *Params);
void printTemplateArguments(llvm::ArrayRef<TemplateArgumentLoc> Args,
- const TemplateParameterList *Params,
- bool TemplOverloaded);
+ const TemplateParameterList *Params);
void prettyPrintAttributes(Decl *D);
void prettyPrintPragmas(Decl *D);
void printDeclType(QualType T, StringRef DeclName, bool Pack = false);
@@ -153,11 +151,14 @@ static QualType GetBaseType(QualType T) {
while (!BaseType->isSpecifierType()) {
if (const PointerType *PTy = BaseType->getAs<PointerType>())
BaseType = PTy->getPointeeType();
+ else if (const ObjCObjectPointerType *OPT =
+ BaseType->getAs<ObjCObjectPointerType>())
+ BaseType = OPT->getPointeeType();
else if (const BlockPointerType *BPy = BaseType->getAs<BlockPointerType>())
BaseType = BPy->getPointeeType();
- else if (const ArrayType* ATy = dyn_cast<ArrayType>(BaseType))
+ else if (const ArrayType *ATy = dyn_cast<ArrayType>(BaseType))
BaseType = ATy->getElementType();
- else if (const FunctionType* FTy = BaseType->getAs<FunctionType>())
+ else if (const FunctionType *FTy = BaseType->getAs<FunctionType>())
BaseType = FTy->getReturnType();
else if (const VectorType *VTy = BaseType->getAs<VectorType>())
BaseType = VTy->getElementType();
@@ -649,16 +650,11 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) {
llvm::raw_string_ostream POut(Proto);
DeclPrinter TArgPrinter(POut, SubPolicy, Context, Indentation);
const auto *TArgAsWritten = D->getTemplateSpecializationArgsAsWritten();
- const TemplateParameterList *TPL = D->getTemplateSpecializationInfo()
- ->getTemplate()
- ->getTemplateParameters();
if (TArgAsWritten && !Policy.PrintCanonicalTypes)
- TArgPrinter.printTemplateArguments(TArgAsWritten->arguments(), TPL,
- /*TemplOverloaded*/ true);
+ TArgPrinter.printTemplateArguments(TArgAsWritten->arguments(), nullptr);
else if (const TemplateArgumentList *TArgs =
D->getTemplateSpecializationArgs())
- TArgPrinter.printTemplateArguments(TArgs->asArray(), TPL,
- /*TemplOverloaded*/ true);
+ TArgPrinter.printTemplateArguments(TArgs->asArray(), nullptr);
}
QualType Ty = D->getType();
@@ -786,11 +782,10 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) {
Out << ";\n";
}
Indentation -= Policy.Indentation;
- } else
- Out << ' ';
+ }
if (D->getBody())
- D->getBody()->printPretty(Out, nullptr, SubPolicy, Indentation, "\n",
+ D->getBody()->printPrettyControlled(Out, nullptr, SubPolicy, Indentation, "\n",
&Context);
} else {
if (!Policy.TerseOutput && isa<CXXConstructorDecl>(*D))
@@ -999,8 +994,7 @@ void DeclPrinter::VisitCXXRecordDecl(CXXRecordDecl *D) {
dyn_cast<TemplateSpecializationType>(TSI->getType()))
Args = TST->template_arguments();
printTemplateArguments(
- Args, S->getSpecializedTemplate()->getTemplateParameters(),
- /*TemplOverloaded*/ false);
+ Args, S->getSpecializedTemplate()->getTemplateParameters());
}
}
@@ -1093,35 +1087,34 @@ void DeclPrinter::printTemplateParameters(const TemplateParameterList *Params,
}
void DeclPrinter::printTemplateArguments(ArrayRef<TemplateArgument> Args,
- const TemplateParameterList *Params,
- bool TemplOverloaded) {
+ const TemplateParameterList *Params) {
Out << "<";
for (size_t I = 0, E = Args.size(); I < E; ++I) {
if (I)
Out << ", ";
- if (TemplOverloaded || !Params)
+ if (!Params)
Args[I].print(Policy, Out, /*IncludeType*/ true);
else
- Args[I].print(
- Policy, Out,
- TemplateParameterList::shouldIncludeTypeForArgument(Params, I));
+ Args[I].print(Policy, Out,
+ TemplateParameterList::shouldIncludeTypeForArgument(
+ Policy, Params, I));
}
Out << ">";
}
void DeclPrinter::printTemplateArguments(ArrayRef<TemplateArgumentLoc> Args,
- const TemplateParameterList *Params,
- bool TemplOverloaded) {
+ const TemplateParameterList *Params) {
Out << "<";
for (size_t I = 0, E = Args.size(); I < E; ++I) {
if (I)
Out << ", ";
- if (TemplOverloaded)
+ if (!Params)
Args[I].getArgument().print(Policy, Out, /*IncludeType*/ true);
else
Args[I].getArgument().print(
Policy, Out,
- TemplateParameterList::shouldIncludeTypeForArgument(Params, I));
+ TemplateParameterList::shouldIncludeTypeForArgument(Policy, Params,
+ I));
}
Out << ">";
}
@@ -1191,6 +1184,7 @@ void DeclPrinter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
if (D->isThisDeclarationADefinition())
Out << ";";
Out << "\n";
+ Indent();
Visit(I);
}
}
@@ -1658,10 +1652,11 @@ void DeclPrinter::VisitOMPAllocateDecl(OMPAllocateDecl *D) {
Out << ")";
}
if (!D->clauselist_empty()) {
- Out << " ";
OMPClausePrinter Printer(Out, Policy);
- for (OMPClause *C : D->clauselists())
+ for (OMPClause *C : D->clauselists()) {
+ Out << " ";
Printer.Visit(C);
+ }
}
}
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index ec8b00a9eb7d..223f06b9db1c 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -77,7 +77,7 @@ TemplateParameterList::TemplateParameterList(const ASTContext& C,
if (TTP->hasTypeConstraint())
HasConstrainedParameters = true;
} else {
- llvm_unreachable("unexpcted template parameter type");
+ llvm_unreachable("unexpected template parameter type");
}
// FIXME: If a default argument contains an unexpanded parameter pack, the
// template parameter list does too.
@@ -165,14 +165,20 @@ unsigned TemplateParameterList::getDepth() const {
return cast<TemplateTemplateParmDecl>(FirstParm)->getDepth();
}
-static void AdoptTemplateParameterList(TemplateParameterList *Params,
+static bool AdoptTemplateParameterList(TemplateParameterList *Params,
DeclContext *Owner) {
+ bool Invalid = false;
for (NamedDecl *P : *Params) {
P->setDeclContext(Owner);
if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(P))
- AdoptTemplateParameterList(TTP->getTemplateParameters(), Owner);
+ if (AdoptTemplateParameterList(TTP->getTemplateParameters(), Owner))
+ Invalid = true;
+
+ if (P->isInvalidDecl())
+ Invalid = true;
}
+ return Invalid;
}
void TemplateParameterList::
@@ -196,8 +202,9 @@ bool TemplateParameterList::hasAssociatedConstraints() const {
}
bool TemplateParameterList::shouldIncludeTypeForArgument(
- const TemplateParameterList *TPL, unsigned Idx) {
- if (!TPL || Idx >= TPL->size())
+ const PrintingPolicy &Policy, const TemplateParameterList *TPL,
+ unsigned Idx) {
+ if (!TPL || Idx >= TPL->size() || Policy.AlwaysIncludeTypeForTemplateArgument)
return true;
const NamedDecl *TemplParam = TPL->getParam(Idx);
if (const auto *ParamValueDecl =
@@ -339,14 +346,15 @@ void RedeclarableTemplateDecl::addSpecializationImpl(
// FunctionTemplateDecl Implementation
//===----------------------------------------------------------------------===//
-FunctionTemplateDecl *FunctionTemplateDecl::Create(ASTContext &C,
- DeclContext *DC,
- SourceLocation L,
- DeclarationName Name,
- TemplateParameterList *Params,
- NamedDecl *Decl) {
- AdoptTemplateParameterList(Params, cast<DeclContext>(Decl));
- return new (C, DC) FunctionTemplateDecl(C, DC, L, Name, Params, Decl);
+FunctionTemplateDecl *
+FunctionTemplateDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L,
+ DeclarationName Name,
+ TemplateParameterList *Params, NamedDecl *Decl) {
+ bool Invalid = AdoptTemplateParameterList(Params, cast<DeclContext>(Decl));
+ auto *TD = new (C, DC) FunctionTemplateDecl(C, DC, L, Name, Params, Decl);
+ if (Invalid)
+ TD->setInvalidDecl();
+ return TD;
}
FunctionTemplateDecl *FunctionTemplateDecl::CreateDeserialized(ASTContext &C,
@@ -438,15 +446,16 @@ void FunctionTemplateDecl::mergePrevDecl(FunctionTemplateDecl *Prev) {
// ClassTemplateDecl Implementation
//===----------------------------------------------------------------------===//
-ClassTemplateDecl *ClassTemplateDecl::Create(ASTContext &C,
- DeclContext *DC,
+ClassTemplateDecl *ClassTemplateDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params,
NamedDecl *Decl) {
- AdoptTemplateParameterList(Params, cast<DeclContext>(Decl));
-
- return new (C, DC) ClassTemplateDecl(C, DC, L, Name, Params, Decl);
+ bool Invalid = AdoptTemplateParameterList(Params, cast<DeclContext>(Decl));
+ auto *TD = new (C, DC) ClassTemplateDecl(C, DC, L, Name, Params, Decl);
+ if (Invalid)
+ TD->setInvalidDecl();
+ return TD;
}
ClassTemplateDecl *ClassTemplateDecl::CreateDeserialized(ASTContext &C,
@@ -1005,8 +1014,11 @@ ConceptDecl *ConceptDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params,
Expr *ConstraintExpr) {
- AdoptTemplateParameterList(Params, DC);
- return new (C, DC) ConceptDecl(DC, L, Name, Params, ConstraintExpr);
+ bool Invalid = AdoptTemplateParameterList(Params, DC);
+ auto *TD = new (C, DC) ConceptDecl(DC, L, Name, Params, ConstraintExpr);
+ if (Invalid)
+ TD->setInvalidDecl();
+ return TD;
}
ConceptDecl *ConceptDecl::CreateDeserialized(ASTContext &C,
@@ -1039,7 +1051,8 @@ ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
SpecializedTemplate, Args, PrevDecl),
TemplateParams(Params), ArgsAsWritten(ArgInfos),
InstantiatedFromMember(nullptr, false) {
- AdoptTemplateParameterList(Params, this);
+ if (AdoptTemplateParameterList(Params, this))
+ setInvalidDecl();
}
ClassTemplatePartialSpecializationDecl *
@@ -1097,14 +1110,15 @@ FriendTemplateDecl *FriendTemplateDecl::CreateDeserialized(ASTContext &C,
// TypeAliasTemplateDecl Implementation
//===----------------------------------------------------------------------===//
-TypeAliasTemplateDecl *TypeAliasTemplateDecl::Create(ASTContext &C,
- DeclContext *DC,
- SourceLocation L,
- DeclarationName Name,
- TemplateParameterList *Params,
- NamedDecl *Decl) {
- AdoptTemplateParameterList(Params, DC);
- return new (C, DC) TypeAliasTemplateDecl(C, DC, L, Name, Params, Decl);
+TypeAliasTemplateDecl *
+TypeAliasTemplateDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L,
+ DeclarationName Name,
+ TemplateParameterList *Params, NamedDecl *Decl) {
+ bool Invalid = AdoptTemplateParameterList(Params, DC);
+ auto *TD = new (C, DC) TypeAliasTemplateDecl(C, DC, L, Name, Params, Decl);
+ if (Invalid)
+ TD->setInvalidDecl();
+ return TD;
}
TypeAliasTemplateDecl *TypeAliasTemplateDecl::CreateDeserialized(ASTContext &C,
@@ -1151,8 +1165,11 @@ VarTemplateDecl *VarTemplateDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params,
VarDecl *Decl) {
- AdoptTemplateParameterList(Params, DC);
- return new (C, DC) VarTemplateDecl(C, DC, L, Name, Params, Decl);
+ bool Invalid = AdoptTemplateParameterList(Params, DC);
+ auto *TD = new (C, DC) VarTemplateDecl(C, DC, L, Name, Params, Decl);
+ if (Invalid)
+ TD->setInvalidDecl();
+ return TD;
}
VarTemplateDecl *VarTemplateDecl::CreateDeserialized(ASTContext &C,
@@ -1334,8 +1351,8 @@ VarTemplatePartialSpecializationDecl::VarTemplatePartialSpecializationDecl(
TInfo, S, Args),
TemplateParams(Params), ArgsAsWritten(ArgInfos),
InstantiatedFromMember(nullptr, false) {
- // TODO: The template parameters should be in DC by now. Verify.
- // AdoptTemplateParameterList(Params, DC);
+ if (AdoptTemplateParameterList(Params, DC))
+ setInvalidDecl();
}
VarTemplatePartialSpecializationDecl *
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index e8b4aaa2b81e..7bd3dce43f4d 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -545,20 +545,11 @@ std::string SYCLUniqueStableNameExpr::ComputeName(ASTContext &Context,
QualType Ty) {
auto MangleCallback = [](ASTContext &Ctx,
const NamedDecl *ND) -> llvm::Optional<unsigned> {
- // This replaces the 'lambda number' in the mangling with a unique number
- // based on its order in the declaration. To provide some level of visual
- // notability (actual uniqueness from normal lambdas isn't necessary, as
- // these are used differently), we add 10,000 to the number.
- // For example:
- // _ZTSZ3foovEUlvE10005_
- // Demangles to: typeinfo name for foo()::'lambda10005'()
- // Note that the mangler subtracts 2, since with normal lambdas the lambda
- // mangling number '0' is an anonymous struct mangle, and '1' is omitted.
- // So 10,002 results in the first number being 10,000.
- if (Ctx.IsSYCLKernelNamingDecl(ND))
- return 10'002 + Ctx.GetSYCLKernelNamingIndex(ND);
+ if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
+ return RD->getDeviceLambdaManglingNumber();
return llvm::None;
};
+
std::unique_ptr<MangleContext> Ctx{ItaniumMangleContext::create(
Context, Context.getDiagnostics(), MangleCallback)};
@@ -762,19 +753,18 @@ std::string PredefinedExpr::ComputeName(IdentKind IK, const Decl *CurrentDecl) {
std::string TemplateParams;
llvm::raw_string_ostream TOut(TemplateParams);
- for (SpecsTy::reverse_iterator I = Specs.rbegin(), E = Specs.rend();
- I != E; ++I) {
- const TemplateParameterList *Params
- = (*I)->getSpecializedTemplate()->getTemplateParameters();
- const TemplateArgumentList &Args = (*I)->getTemplateArgs();
+ for (const ClassTemplateSpecializationDecl *D : llvm::reverse(Specs)) {
+ const TemplateParameterList *Params =
+ D->getSpecializedTemplate()->getTemplateParameters();
+ const TemplateArgumentList &Args = D->getTemplateArgs();
assert(Params->size() == Args.size());
for (unsigned i = 0, numParams = Params->size(); i != numParams; ++i) {
StringRef Param = Params->getParam(i)->getName();
if (Param.empty()) continue;
TOut << Param << " = ";
- Args.get(i).print(
- Policy, TOut,
- TemplateParameterList::shouldIncludeTypeForArgument(Params, i));
+ Args.get(i).print(Policy, TOut,
+ TemplateParameterList::shouldIncludeTypeForArgument(
+ Policy, Params, i));
TOut << ", ";
}
}
@@ -2233,8 +2223,11 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
};
switch (getIdentKind()) {
- case SourceLocExpr::File:
- return MakeStringLiteral(PLoc.getFilename());
+ case SourceLocExpr::File: {
+ SmallString<256> Path(PLoc.getFilename());
+ Ctx.getLangOpts().remapPathPrefix(Path);
+ return MakeStringLiteral(Path);
+ }
case SourceLocExpr::Function: {
const Decl *CurDecl = dyn_cast_or_null<Decl>(Context);
return MakeStringLiteral(
@@ -2305,7 +2298,7 @@ bool InitListExpr::isStringLiteralInit() const {
const Expr *Init = getInit(0);
if (!Init)
return false;
- Init = Init->IgnoreParens();
+ Init = Init->IgnoreParenImpCasts();
return isa<StringLiteral>(Init) || isa<ObjCEncodeExpr>(Init);
}
@@ -2367,10 +2360,8 @@ SourceLocation InitListExpr::getEndLoc() const {
SourceLocation End = RBraceLoc;
if (End.isInvalid()) {
// Find the first non-null initializer from the end.
- for (InitExprsTy::const_reverse_iterator I = InitExprs.rbegin(),
- E = InitExprs.rend();
- I != E; ++I) {
- if (Stmt *S = *I) {
+ for (Stmt *S : llvm::reverse(InitExprs)) {
+ if (S) {
End = S->getEndLoc();
break;
}
@@ -3776,11 +3767,8 @@ Expr::isNullPointerConstant(ASTContext &Ctx,
// has non-default address space it is not treated as nullptr.
// (__generic void*)0 in OpenCL 2.0 should not be treated as nullptr
// since it cannot be assigned to a pointer to constant address space.
- if ((Ctx.getLangOpts().OpenCLVersion >= 200 &&
- Pointee.getAddressSpace() == LangAS::opencl_generic) ||
- (Ctx.getLangOpts().OpenCL &&
- Ctx.getLangOpts().OpenCLVersion < 200 &&
- Pointee.getAddressSpace() == LangAS::opencl_private))
+ if (Ctx.getLangOpts().OpenCL &&
+ Pointee.getAddressSpace() == Ctx.getDefaultOpenCLPointeeAddrSpace())
Qs.removeAddressSpace();
if (Pointee->isVoidType() && Qs.empty() && // to void*
@@ -4125,7 +4113,7 @@ bool ExtVectorElementExpr::containsDuplicateElements() const {
Comp = Comp.substr(1);
for (unsigned i = 0, e = Comp.size(); i != e; ++i)
- if (Comp.substr(i + 1).find(Comp[i]) != StringRef::npos)
+ if (Comp.substr(i + 1).contains(Comp[i]))
return true;
return false;
@@ -4704,6 +4692,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__c11_atomic_fetch_and:
case AO__c11_atomic_fetch_or:
case AO__c11_atomic_fetch_xor:
+ case AO__c11_atomic_fetch_nand:
case AO__c11_atomic_fetch_max:
case AO__c11_atomic_fetch_min:
case AO__atomic_fetch_add:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 01c0168d61a4..fe96db9ca918 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1084,14 +1084,10 @@ namespace {
void performLifetimeExtension() {
// Disable the cleanups for lifetime-extended temporaries.
- CleanupStack.erase(std::remove_if(CleanupStack.begin(),
- CleanupStack.end(),
- [](Cleanup &C) {
- return !C.isDestroyedAtEndOf(
- ScopeKind::FullExpression);
- }),
- CleanupStack.end());
- }
+ llvm::erase_if(CleanupStack, [](Cleanup &C) {
+ return !C.isDestroyedAtEndOf(ScopeKind::FullExpression);
+ });
+ }
/// Throw away any remaining cleanups at the end of evaluation. If any
/// cleanups would have had a side-effect, note that as an unmodeled
@@ -1826,6 +1822,8 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
EvalInfo &Info);
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
+static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
+ EvalInfo &Info);
/// Evaluate an integer or fixed point expression into an APResult.
static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@@ -2673,7 +2671,7 @@ static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E,
QualType EltTy = VecTy->castAs<VectorType>()->getElementType();
unsigned EltSize = Info.Ctx.getTypeSize(EltTy);
bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
- Res = llvm::APInt::getNullValue(VecSize);
+ Res = llvm::APInt::getZero(VecSize);
for (unsigned i = 0; i < SVal.getVectorLength(); i++) {
APValue &Elt = SVal.getVectorElt(i);
llvm::APInt EltAsInt;
@@ -2755,8 +2753,8 @@ static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS,
Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS);
// Check for overflow case: INT_MIN / -1 or INT_MIN % -1. APSInt supports
// this operation and gives the two's complement result.
- if (RHS.isNegative() && RHS.isAllOnesValue() &&
- LHS.isSigned() && LHS.isMinSignedValue())
+ if (RHS.isNegative() && RHS.isAllOnes() && LHS.isSigned() &&
+ LHS.isMinSignedValue())
return HandleOverflow(Info, E, -LHS.extend(LHS.getBitWidth() + 1),
E->getType());
return true;
@@ -5193,7 +5191,10 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
}
}
bool Cond;
- if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(), Cond))
+ if (IS->isConsteval())
+ Cond = IS->isNonNegatedConsteval();
+ else if (!EvaluateCond(Info, IS->getConditionVariable(), IS->getCond(),
+ Cond))
return ESR_Failed;
if (const Stmt *SubStmt = Cond ? IS->getThen() : IS->getElse()) {
@@ -5318,6 +5319,11 @@ static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
return ESR;
}
+ // In error-recovery cases it's possible to get here even if we failed to
+ // synthesize the __begin and __end variables.
+ if (!FS->getBeginStmt() || !FS->getEndStmt() || !FS->getCond())
+ return ESR_Failed;
+
// Create the __begin and __end iterators.
ESR = EvaluateStmt(Result, Info, FS->getBeginStmt());
if (ESR != ESR_Succeeded) {
@@ -6757,7 +6763,7 @@ public:
SmallVectorImpl<unsigned char> &Output) const {
for (CharUnits I = Offset, E = Offset + Width; I != E; ++I) {
// If a byte of an integer is uninitialized, then the whole integer is
- // uninitalized.
+ // uninitialized.
if (!Bytes[I.getQuantity()])
return false;
Output.push_back(*Bytes[I.getQuantity()]);
@@ -8672,8 +8678,6 @@ public:
bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) {
std::string ResultStr = E->ComputeName(Info.Ctx);
- Info.Ctx.SYCLUniqueStableNameEvaluatedValues[E] = ResultStr;
-
QualType CharTy = Info.Ctx.CharTy.withConst();
APInt Size(Info.Ctx.getTypeSize(Info.Ctx.getSizeType()),
ResultStr.size() + 1);
@@ -9931,10 +9935,19 @@ bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E,
return false;
// Avoid materializing a temporary for an elidable copy/move constructor.
- if (E->isElidable() && !ZeroInit)
- if (const MaterializeTemporaryExpr *ME
- = dyn_cast<MaterializeTemporaryExpr>(E->getArg(0)))
+ if (E->isElidable() && !ZeroInit) {
+ // FIXME: This only handles the simplest case, where the source object
+ // is passed directly as the first argument to the constructor.
+ // This should also handle stepping though implicit casts and
+ // and conversion sequences which involve two steps, with a
+ // conversion operator followed by a converting constructor.
+ const Expr *SrcObj = E->getArg(0);
+ assert(SrcObj->isTemporaryObject(Info.Ctx, FD->getParent()));
+ assert(Info.Ctx.hasSameUnqualifiedType(E->getType(), SrcObj->getType()));
+ if (const MaterializeTemporaryExpr *ME =
+ dyn_cast<MaterializeTemporaryExpr>(SrcObj))
return Visit(ME->getSubExpr());
+ }
if (ZeroInit && !ZeroInitialization(E, T))
return false;
@@ -10471,13 +10484,17 @@ bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E,
// C++11 [dcl.init.string]p1: A char array [...] can be initialized by [...]
// an appropriately-typed string literal enclosed in braces.
if (E->isStringLiteralInit()) {
- auto *SL = dyn_cast<StringLiteral>(E->getInit(0)->IgnoreParens());
+ auto *SL = dyn_cast<StringLiteral>(E->getInit(0)->IgnoreParenImpCasts());
// FIXME: Support ObjCEncodeExpr here once we support it in
// ArrayExprEvaluator generally.
if (!SL)
return Error(E);
return VisitStringLiteral(SL, AllocType);
}
+ // Any other transparent list init will need proper handling of the
+ // AllocType; we can't just recurse to the inner initializer.
+ assert(!E->isTransparent() &&
+ "transparent array list initialization is not string literal init?");
bool Success = true;
@@ -10598,8 +10615,8 @@ bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E,
for (unsigned I = 0; I != N; ++I)
if (!VisitCXXConstructExpr(E, ArrayElt, &Value->getArrayInitializedElt(I),
CAT->getElementType()) ||
- !HandleLValueArrayAdjustment(Info, E, ArrayElt,
- CAT->getElementType(), 1))
+ !HandleLValueArrayAdjustment(Info, E, ArrayElt, CAT->getElementType(),
+ 1))
return false;
return true;
@@ -11836,46 +11853,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
case Builtin::BI__builtin_wcslen: {
// As an extension, we support __builtin_strlen() as a constant expression,
// and support folding strlen() to a constant.
- LValue String;
- if (!EvaluatePointer(E->getArg(0), String, Info))
- return false;
-
- QualType CharTy = E->getArg(0)->getType()->getPointeeType();
-
- // Fast path: if it's a string literal, search the string value.
- if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
- String.getLValueBase().dyn_cast<const Expr *>())) {
- // The string literal may have embedded null characters. Find the first
- // one and truncate there.
- StringRef Str = S->getBytes();
- int64_t Off = String.Offset.getQuantity();
- if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
- S->getCharByteWidth() == 1 &&
- // FIXME: Add fast-path for wchar_t too.
- Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
- Str = Str.substr(Off);
-
- StringRef::size_type Pos = Str.find(0);
- if (Pos != StringRef::npos)
- Str = Str.substr(0, Pos);
-
- return Success(Str.size(), E);
- }
-
- // Fall through to slow path to issue appropriate diagnostic.
- }
-
- // Slow path: scan the bytes of the string looking for the terminating 0.
- for (uint64_t Strlen = 0; /**/; ++Strlen) {
- APValue Char;
- if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
- !Char.isInt())
- return false;
- if (!Char.getInt())
- return Success(Strlen, E);
- if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
- return false;
- }
+ uint64_t StrLen;
+ if (EvaluateBuiltinStrLen(E->getArg(0), StrLen, Info))
+ return Success(StrLen, E);
+ return false;
}
case Builtin::BIstrcmp:
@@ -15347,7 +15328,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
llvm::APSInt REval = Exp->getRHS()->EvaluateKnownConstInt(Ctx);
if (REval == 0)
return ICEDiag(IK_ICEIfUnevaluated, E->getBeginLoc());
- if (REval.isSigned() && REval.isAllOnesValue()) {
+ if (REval.isSigned() && REval.isAllOnes()) {
llvm::APSInt LEval = Exp->getLHS()->EvaluateKnownConstInt(Ctx);
if (LEval.isMinSignedValue())
return ICEDiag(IK_ICEIfUnevaluated, E->getBeginLoc());
@@ -15525,8 +15506,10 @@ bool Expr::isIntegerConstantExpr(const ASTContext &Ctx,
Optional<llvm::APSInt> Expr::getIntegerConstantExpr(const ASTContext &Ctx,
SourceLocation *Loc,
bool isEvaluated) const {
- assert(!isValueDependent() &&
- "Expression evaluator can't be called on a dependent expression.");
+ if (isValueDependent()) {
+ // Expression evaluator can't succeed on a dependent expression.
+ return None;
+ }
APSInt Value;
@@ -15736,3 +15719,58 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
}
+
+static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
+ EvalInfo &Info) {
+ if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
+ return false;
+
+ LValue String;
+
+ if (!EvaluatePointer(E, String, Info))
+ return false;
+
+ QualType CharTy = E->getType()->getPointeeType();
+
+ // Fast path: if it's a string literal, search the string value.
+ if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
+ String.getLValueBase().dyn_cast<const Expr *>())) {
+ StringRef Str = S->getBytes();
+ int64_t Off = String.Offset.getQuantity();
+ if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
+ S->getCharByteWidth() == 1 &&
+ // FIXME: Add fast-path for wchar_t too.
+ Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
+ Str = Str.substr(Off);
+
+ StringRef::size_type Pos = Str.find(0);
+ if (Pos != StringRef::npos)
+ Str = Str.substr(0, Pos);
+
+ Result = Str.size();
+ return true;
+ }
+
+ // Fall through to slow path.
+ }
+
+ // Slow path: scan the bytes of the string looking for the terminating 0.
+ for (uint64_t Strlen = 0; /**/; ++Strlen) {
+ APValue Char;
+ if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
+ !Char.isInt())
+ return false;
+ if (!Char.getInt()) {
+ Result = Strlen;
+ return true;
+ }
+ if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
+ return false;
+ }
+}
+
+bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const {
+ Expr::EvalStatus Status;
+ EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
+ return EvaluateBuiltinStrLen(this, Result, Info);
+}
diff --git a/clang/lib/AST/ExprObjC.cpp b/clang/lib/AST/ExprObjC.cpp
index 7d932c8b059d..a3222c2da24f 100644
--- a/clang/lib/AST/ExprObjC.cpp
+++ b/clang/lib/AST/ExprObjC.cpp
@@ -271,20 +271,7 @@ QualType ObjCMessageExpr::getCallReturnType(ASTContext &Ctx) const {
}
return QT;
}
-
- // Expression type might be different from an expected call return type,
- // as expression type would never be a reference even if call returns a
- // reference. Reconstruct the original expression type.
- QualType QT = getType();
- switch (getValueKind()) {
- case VK_LValue:
- return Ctx.getLValueReferenceType(QT);
- case VK_XValue:
- return Ctx.getRValueReferenceType(QT);
- case VK_PRValue:
- return QT;
- }
- llvm_unreachable("Unsupported ExprValueKind");
+ return Ctx.getReferenceQualifiedType(this);
}
SourceRange ObjCMessageExpr::getReceiverRange() const {
diff --git a/clang/lib/AST/ExternalASTMerger.cpp b/clang/lib/AST/ExternalASTMerger.cpp
index c7789b707b21..a2ef270d7a9c 100644
--- a/clang/lib/AST/ExternalASTMerger.cpp
+++ b/clang/lib/AST/ExternalASTMerger.cpp
@@ -425,16 +425,14 @@ void ExternalASTMerger::RemoveSources(llvm::ArrayRef<ImporterSource> Sources) {
logs() << "(ExternalASTMerger*)" << (void *)this
<< " removing source (ASTContext*)" << (void *)&S.getASTContext()
<< "\n";
- Importers.erase(
- std::remove_if(Importers.begin(), Importers.end(),
- [&Sources](std::unique_ptr<ASTImporter> &Importer) -> bool {
- for (const ImporterSource &S : Sources) {
- if (&Importer->getFromContext() == &S.getASTContext())
- return true;
- }
- return false;
- }),
- Importers.end());
+ llvm::erase_if(Importers,
+ [&Sources](std::unique_ptr<ASTImporter> &Importer) -> bool {
+ for (const ImporterSource &S : Sources) {
+ if (&Importer->getFromContext() == &S.getASTContext())
+ return true;
+ }
+ return false;
+ });
for (OriginMap::iterator OI = Origins.begin(), OE = Origins.end(); OI != OE; ) {
std::pair<const DeclContext *, DCOrigin> Origin = *OI;
bool Erase = false;
diff --git a/clang/lib/AST/Interp/ByteCodeEmitter.cpp b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
index 7a4569820a1d..a69b23fd613c 100644
--- a/clang/lib/AST/Interp/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/Interp/ByteCodeEmitter.cpp
@@ -11,6 +11,7 @@
#include "Opcode.h"
#include "Program.h"
#include "clang/AST/DeclCXX.h"
+#include <type_traits>
using namespace clang;
using namespace clang::interp;
@@ -122,29 +123,48 @@ bool ByteCodeEmitter::bail(const SourceLocation &Loc) {
return false;
}
+/// Helper to write bytecode and bail out if 32-bit offsets become invalid.
+/// Pointers will be automatically marshalled as 32-bit IDs.
+template <typename T>
+static std::enable_if_t<!std::is_pointer<T>::value, void>
+emit(Program &P, std::vector<char> &Code, const T &Val, bool &Success) {
+ size_t Size = sizeof(Val);
+ if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+ Success = false;
+ return;
+ }
+
+ const char *Data = reinterpret_cast<const char *>(&Val);
+ Code.insert(Code.end(), Data, Data + Size);
+}
+
+template <typename T>
+static std::enable_if_t<std::is_pointer<T>::value, void>
+emit(Program &P, std::vector<char> &Code, const T &Val, bool &Success) {
+ size_t Size = sizeof(uint32_t);
+ if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
+ Success = false;
+ return;
+ }
+
+ uint32_t ID = P.getOrCreateNativePointer(Val);
+ const char *Data = reinterpret_cast<const char *>(&ID);
+ Code.insert(Code.end(), Data, Data + Size);
+}
+
template <typename... Tys>
bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
bool Success = true;
- /// Helper to write bytecode and bail out if 32-bit offsets become invalid.
- auto emit = [this, &Success](const char *Data, size_t Size) {
- if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
- Success = false;
- return;
- }
- Code.insert(Code.end(), Data, Data + Size);
- };
-
/// The opcode is followed by arguments. The source info is
/// attached to the address after the opcode.
- emit(reinterpret_cast<const char *>(&Op), sizeof(Opcode));
+ emit(P, Code, Op, Success);
if (SI)
SrcMap.emplace_back(Code.size(), SI);
/// The initializer list forces the expression to be evaluated
/// for each argument in the variadic template, in order.
- (void)std::initializer_list<int>{
- (emit(reinterpret_cast<const char *>(&Args), sizeof(Args)), 0)...};
+ (void)std::initializer_list<int>{(emit(P, Code, Args, Success), 0)...};
return Success;
}
diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp
index 5b47489e65e0..90e84149b055 100644
--- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp
@@ -188,6 +188,12 @@ bool ByteCodeStmtGen<Emitter>::visitReturnStmt(const ReturnStmt *RS) {
template <class Emitter>
bool ByteCodeStmtGen<Emitter>::visitIfStmt(const IfStmt *IS) {
BlockScope<Emitter> IfScope(this);
+
+ if (IS->isNonNegatedConsteval())
+ return visitStmt(IS->getThen());
+ if (IS->isNegatedConsteval())
+ return IS->getElse() ? visitStmt(IS->getElse()) : true;
+
if (auto *CondInit = IS->getInit())
if (!visitStmt(IS->getInit()))
return false;
diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h
index e8238eea716a..4f25ff977b81 100644
--- a/clang/lib/AST/Interp/Context.h
+++ b/clang/lib/AST/Interp/Context.h
@@ -67,7 +67,7 @@ private:
/// Runs a function.
bool Run(State &Parent, Function *Func, APValue &Result);
- /// Checks a result fromt the interpreter.
+ /// Checks a result from the interpreter.
bool Check(State &Parent, llvm::Expected<bool> &&R);
private:
diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h
index b260b7600974..11072cab3e90 100644
--- a/clang/lib/AST/Interp/Descriptor.h
+++ b/clang/lib/AST/Interp/Descriptor.h
@@ -184,7 +184,7 @@ struct InlineDescriptor {
/// Bitfield tracking the initialisation status of elements of primitive arrays.
/// A pointer to this is embedded at the end of all primitive arrays.
-/// If the map was not yet created and nothing was initialied, the pointer to
+/// If the map was not yet created and nothing was initialized, the pointer to
/// this structure is 0. If the object was fully initialized, the pointer is -1.
struct InitMap {
private:
diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp
index c1c18f832d4f..36adbe296b0c 100644
--- a/clang/lib/AST/Interp/Disasm.cpp
+++ b/clang/lib/AST/Interp/Disasm.cpp
@@ -21,6 +21,19 @@
using namespace clang;
using namespace clang::interp;
+template <typename T>
+inline std::enable_if_t<!std::is_pointer<T>::value, T> ReadArg(Program &P,
+ CodePtr OpPC) {
+ return OpPC.read<T>();
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_pointer<T>::value, T> ReadArg(Program &P,
+ CodePtr OpPC) {
+ uint32_t ID = OpPC.read<uint32_t>();
+ return reinterpret_cast<T>(P.getNativePointer(ID));
+}
+
LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
diff --git a/clang/lib/AST/Interp/Function.h b/clang/lib/AST/Interp/Function.h
index 28531f04b6e9..ac1dffea1160 100644
--- a/clang/lib/AST/Interp/Function.h
+++ b/clang/lib/AST/Interp/Function.h
@@ -73,7 +73,7 @@ public:
/// Returns the original FunctionDecl.
const FunctionDecl *getDecl() const { return F; }
- /// Returns the lcoation.
+ /// Returns the location.
SourceLocation getLoc() const { return Loc; }
/// Returns a parameter descriptor.
diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h
index e2f7bf0dc26a..a1d90f26ba46 100644
--- a/clang/lib/AST/Interp/Interp.h
+++ b/clang/lib/AST/Interp/Interp.h
@@ -13,8 +13,6 @@
#ifndef LLVM_CLANG_AST_INTERP_INTERP_H
#define LLVM_CLANG_AST_INTERP_INTERP_H
-#include <limits>
-#include <vector>
#include "Function.h"
#include "InterpFrame.h"
#include "InterpStack.h"
@@ -30,6 +28,9 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/Support/Endian.h"
+#include <limits>
+#include <type_traits>
+#include <vector>
namespace clang {
namespace interp {
@@ -37,7 +38,7 @@ namespace interp {
using APInt = llvm::APInt;
using APSInt = llvm::APSInt;
-/// Convers a value to an APValue.
+/// Convert a value to an APValue.
template <typename T> bool ReturnValue(const T &V, APValue &R) {
R = V.toAPValue();
return true;
@@ -49,7 +50,7 @@ bool CheckExtern(InterpState &S, CodePtr OpPC, const Pointer &Ptr);
/// Checks if the array is offsetable.
bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr);
-/// Checks if a pointer is live and accesible.
+/// Checks if a pointer is live and accessible.
bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
AccessKinds AK);
/// Checks if a pointer is null.
@@ -949,6 +950,23 @@ inline bool ExpandPtr(InterpState &S, CodePtr OpPC) {
return true;
}
+//===----------------------------------------------------------------------===//
+// Read opcode arguments
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+inline std::enable_if_t<!std::is_pointer<T>::value, T> ReadArg(InterpState &S,
+ CodePtr OpPC) {
+ return OpPC.read<T>();
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_pointer<T>::value, T> ReadArg(InterpState &S,
+ CodePtr OpPC) {
+ uint32_t ID = OpPC.read<uint32_t>();
+ return reinterpret_cast<T>(S.P.getNativePointer(ID));
+}
+
/// Interpreter entry point.
bool Interpret(InterpState &S, APValue &Result);
diff --git a/clang/lib/AST/Interp/InterpStack.h b/clang/lib/AST/Interp/InterpStack.h
index 127adb6b8eba..b02d3c6a34b0 100644
--- a/clang/lib/AST/Interp/InterpStack.h
+++ b/clang/lib/AST/Interp/InterpStack.h
@@ -69,7 +69,7 @@ private:
return ((sizeof(T) + PtrAlign - 1) / PtrAlign) * PtrAlign;
}
- /// Grows the stack to accomodate a value and returns a pointer to it.
+ /// Grows the stack to accommodate a value and returns a pointer to it.
void *grow(size_t Size);
/// Returns a pointer from the top of the stack.
void *peek(size_t Size);
diff --git a/clang/lib/AST/Interp/InterpState.h b/clang/lib/AST/Interp/InterpState.h
index c2209bbcbb92..57e36c4c63ea 100644
--- a/clang/lib/AST/Interp/InterpState.h
+++ b/clang/lib/AST/Interp/InterpState.h
@@ -46,7 +46,7 @@ public:
return Parent.getBottomFrame();
}
- // Acces objects from the walker context.
+ // Access objects from the walker context.
Expr::EvalStatus &getEvalStatus() const override {
return Parent.getEvalStatus();
}
diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td
index 4aba5f5cd83c..638d5b3d2357 100644
--- a/clang/lib/AST/Interp/Opcodes.td
+++ b/clang/lib/AST/Interp/Opcodes.td
@@ -57,7 +57,7 @@ def ArgValueDecl : ArgType { let Name = "const ValueDecl *"; }
def ArgRecordField : ArgType { let Name = "const Record::Field *"; }
//===----------------------------------------------------------------------===//
-// Classes of types intructions operate on.
+// Classes of types instructions operate on.
//===----------------------------------------------------------------------===//
class TypeClass {
diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp
index fcbab0ea8172..e310c9678140 100644
--- a/clang/lib/AST/Interp/Program.cpp
+++ b/clang/lib/AST/Interp/Program.cpp
@@ -18,6 +18,21 @@
using namespace clang;
using namespace clang::interp;
+unsigned Program::getOrCreateNativePointer(const void *Ptr) {
+ auto It = NativePointerIndices.find(Ptr);
+ if (It != NativePointerIndices.end())
+ return It->second;
+
+ unsigned Idx = NativePointers.size();
+ NativePointers.push_back(Ptr);
+ NativePointerIndices[Ptr] = Idx;
+ return Idx;
+}
+
+const void *Program::getNativePointer(unsigned Idx) {
+ return NativePointers[Idx];
+}
+
unsigned Program::createGlobalString(const StringLiteral *S) {
const size_t CharWidth = S->getCharByteWidth();
const size_t BitWidth = CharWidth * Ctx.getCharBit();
@@ -89,7 +104,7 @@ llvm::Optional<unsigned> Program::getGlobal(const ValueDecl *VD) {
if (It != GlobalIndices.end())
return It->second;
- // Find any previous declarations which were aleady evaluated.
+ // Find any previous declarations which were already evaluated.
llvm::Optional<unsigned> Index;
for (const Decl *P = VD; P; P = P->getPreviousDecl()) {
auto It = GlobalIndices.find(P);
diff --git a/clang/lib/AST/Interp/Program.h b/clang/lib/AST/Interp/Program.h
index 5f0012db9b3f..c81ec777a5fe 100644
--- a/clang/lib/AST/Interp/Program.h
+++ b/clang/lib/AST/Interp/Program.h
@@ -44,6 +44,12 @@ class Program {
public:
Program(Context &Ctx) : Ctx(Ctx) {}
+ /// Marshals a native pointer to an ID for embedding in bytecode.
+ unsigned getOrCreateNativePointer(const void *Ptr);
+
+ /// Returns the value of a marshalled native pointer.
+ const void *getNativePointer(unsigned Idx);
+
/// Emits a string literal among global data.
unsigned createGlobalString(const StringLiteral *S);
@@ -143,6 +149,11 @@ private:
/// Function relocation locations.
llvm::DenseMap<const FunctionDecl *, std::vector<unsigned>> Relocs;
+ /// Native pointers referenced by bytecode.
+ std::vector<const void *> NativePointers;
+ /// Cached native pointer indices.
+ llvm::DenseMap<const void *, unsigned> NativePointerIndices;
+
/// Custom allocator for global storage.
using PoolAllocTy = llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator>;
diff --git a/clang/lib/AST/Interp/Source.h b/clang/lib/AST/Interp/Source.h
index 19c652b7331a..6acaf406b47a 100644
--- a/clang/lib/AST/Interp/Source.h
+++ b/clang/lib/AST/Interp/Source.h
@@ -44,8 +44,9 @@ public:
bool operator!=(const CodePtr &RHS) const { return Ptr != RHS.Ptr; }
/// Reads data and advances the pointer.
- template <typename T> T read() {
- T Value = ReadHelper<T>(Ptr);
+ template <typename T> std::enable_if_t<!std::is_pointer<T>::value, T> read() {
+ using namespace llvm::support;
+ T Value = endian::read<T, endianness::native, 1>(Ptr);
Ptr += sizeof(T);
return Value;
}
@@ -54,22 +55,6 @@ private:
/// Constructor used by Function to generate pointers.
CodePtr(const char *Ptr) : Ptr(Ptr) {}
- /// Helper to decode a value or a pointer.
- template <typename T>
- static std::enable_if_t<!std::is_pointer<T>::value, T>
- ReadHelper(const char *Ptr) {
- using namespace llvm::support;
- return endian::read<T, endianness::native, 1>(Ptr);
- }
-
- template <typename T>
- static std::enable_if_t<std::is_pointer<T>::value, T>
- ReadHelper(const char *Ptr) {
- using namespace llvm::support;
- auto Punned = endian::read<uintptr_t, endianness::native, 1>(Ptr);
- return reinterpret_cast<T>(Punned);
- }
-
private:
friend class Function;
diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp
index be10258a2d77..e99c21dcff73 100644
--- a/clang/lib/AST/ItaniumCXXABI.cpp
+++ b/clang/lib/AST/ItaniumCXXABI.cpp
@@ -181,6 +181,37 @@ public:
}
};
+// A version of this for SYCL that makes sure that 'device' mangling context
+// matches the lambda mangling number, so that __builtin_sycl_unique_stable_name
+// can be consistently generated between a MS and Itanium host by just referring
+// to the device mangling number.
+class ItaniumSYCLNumberingContext : public ItaniumNumberingContext {
+ llvm::DenseMap<const CXXMethodDecl *, unsigned> ManglingNumbers;
+ using ManglingItr = decltype(ManglingNumbers)::iterator;
+
+public:
+ ItaniumSYCLNumberingContext(ItaniumMangleContext *Mangler)
+ : ItaniumNumberingContext(Mangler) {}
+
+ unsigned getManglingNumber(const CXXMethodDecl *CallOperator) override {
+ unsigned Number = ItaniumNumberingContext::getManglingNumber(CallOperator);
+ std::pair<ManglingItr, bool> emplace_result =
+ ManglingNumbers.try_emplace(CallOperator, Number);
+ (void)emplace_result;
+ assert(emplace_result.second && "Lambda number set multiple times?");
+ return Number;
+ }
+
+ using ItaniumNumberingContext::getManglingNumber;
+
+ unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
+ ManglingItr Itr = ManglingNumbers.find(CallOperator);
+ assert(Itr != ManglingNumbers.end() && "Lambda not yet mangled?");
+
+ return Itr->second;
+ }
+};
+
class ItaniumCXXABI : public CXXABI {
private:
std::unique_ptr<MangleContext> Mangler;
@@ -249,6 +280,9 @@ public:
std::unique_ptr<MangleNumberingContext>
createMangleNumberingContext() const override {
+ if (Context.getLangOpts().isSYCL())
+ return std::make_unique<ItaniumSYCLNumberingContext>(
+ cast<ItaniumMangleContext>(Mangler.get()));
return std::make_unique<ItaniumNumberingContext>(
cast<ItaniumMangleContext>(Mangler.get()));
}
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 8cbac66fcf00..07579d04e275 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -1518,9 +1518,16 @@ void CXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
// <lambda-sig> ::= <template-param-decl>* <parameter-type>+
// # Parameter types or 'v' for 'void'.
if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(TD)) {
- if (Record->isLambda() && (Record->getLambdaManglingNumber() ||
- Context.getDiscriminatorOverride()(
- Context.getASTContext(), Record))) {
+ llvm::Optional<unsigned> DeviceNumber =
+ Context.getDiscriminatorOverride()(Context.getASTContext(), Record);
+
+ // If we have a device-number via the discriminator, use that to mangle
+ // the lambda, otherwise use the typical lambda-mangling-number. In either
+ // case, a '0' should be mangled as a normal unnamed class instead of as a
+ // lambda.
+ if (Record->isLambda() &&
+ ((DeviceNumber && *DeviceNumber > 0) ||
+ (!DeviceNumber && Record->getLambdaManglingNumber() > 0))) {
assert(!AdditionalAbiTags &&
"Lambda type cannot have additional abi tags");
mangleLambda(Record);
@@ -1960,8 +1967,8 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) {
// mangling number for this lambda.
llvm::Optional<unsigned> DeviceNumber =
Context.getDiscriminatorOverride()(Context.getASTContext(), Lambda);
- unsigned Number = DeviceNumber.hasValue() ? *DeviceNumber
- : Lambda->getLambdaManglingNumber();
+ unsigned Number =
+ DeviceNumber ? *DeviceNumber : Lambda->getLambdaManglingNumber();
assert(Number > 0 && "Lambda should be mangled as an unnamed class");
if (Number > 1)
@@ -2860,6 +2867,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
// ::= d # double
// ::= e # long double, __float80
// ::= g # __float128
+ // ::= g # __ibm128
// UNSUPPORTED: ::= Dd # IEEE 754r decimal floating point (64 bits)
// UNSUPPORTED: ::= De # IEEE 754r decimal floating point (128 bits)
// UNSUPPORTED: ::= Df # IEEE 754r decimal floating point (32 bits)
@@ -2988,6 +2996,11 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
Out << TI->getBFloat16Mangling();
break;
}
+ case BuiltinType::Ibm128: {
+ const TargetInfo *TI = &getASTContext().getTargetInfo();
+ Out << TI->getIbm128Mangling();
+ break;
+ }
case BuiltinType::NullPtr:
Out << "Dn";
break;
@@ -3559,7 +3572,7 @@ void CXXNameMangler::mangleAArch64NeonVectorType(const DependentVectorType *T) {
// mangling scheme, it will be specified in the next revision. The mangling
// scheme is otherwise defined in the appendices to the Procedure Call Standard
// for the Arm Architecture, see
-// https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#appendix-c-mangling
+// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#appendix-c-mangling
void CXXNameMangler::mangleAArch64FixedSveVectorType(const VectorType *T) {
assert((T->getVectorKind() == VectorType::SveFixedLengthDataVector ||
T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) &&
@@ -4168,7 +4181,6 @@ recurse:
case Expr::ArrayInitIndexExprClass:
case Expr::NoInitExprClass:
case Expr::ParenListExprClass:
- case Expr::LambdaExprClass:
case Expr::MSPropertyRefExprClass:
case Expr::MSPropertySubscriptExprClass:
case Expr::TypoExprClass: // This should no longer exist in the AST by now.
@@ -4953,6 +4965,16 @@ recurse:
break;
}
+ case Expr::LambdaExprClass: {
+ // A lambda-expression can't appear in the signature of an
+ // externally-visible declaration, so there's no standard mangling for
+ // this, but mangling as a literal of the closure type seems reasonable.
+ Out << "L";
+ mangleType(Context.getASTContext().getRecordType(cast<LambdaExpr>(E)->getLambdaClass()));
+ Out << "E";
+ break;
+ }
+
case Expr::PackExpansionExprClass:
NotPrimaryExpr();
Out << "sp";
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index f09f9d38759f..86879b8c3533 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -1489,6 +1489,8 @@ void JSONNodeDumper::VisitIfStmt(const IfStmt *IS) {
attributeOnlyIfTrue("hasVar", IS->hasVarStorage());
attributeOnlyIfTrue("hasElse", IS->hasElseStorage());
attributeOnlyIfTrue("isConstexpr", IS->isConstexpr());
+ attributeOnlyIfTrue("isConsteval", IS->isConsteval());
+ attributeOnlyIfTrue("constevalIsNegated", IS->isNegatedConsteval());
}
void JSONNodeDumper::VisitSwitchStmt(const SwitchStmt *SS) {
diff --git a/clang/lib/AST/MicrosoftCXXABI.cpp b/clang/lib/AST/MicrosoftCXXABI.cpp
index 166aa3b3bd60..53d7e0b042ff 100644
--- a/clang/lib/AST/MicrosoftCXXABI.cpp
+++ b/clang/lib/AST/MicrosoftCXXABI.cpp
@@ -78,6 +78,19 @@ public:
}
};
+class MSSYCLNumberingContext : public MicrosoftNumberingContext {
+ std::unique_ptr<MangleNumberingContext> DeviceCtx;
+
+public:
+ MSSYCLNumberingContext(MangleContext *DeviceMangler) {
+ DeviceCtx = createItaniumNumberingContext(DeviceMangler);
+ }
+
+ unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
+ return DeviceCtx->getManglingNumber(CallOperator);
+ }
+};
+
class MicrosoftCXXABI : public CXXABI {
ASTContext &Context;
llvm::SmallDenseMap<CXXRecordDecl *, CXXConstructorDecl *> RecordToCopyCtor;
@@ -100,6 +113,10 @@ public:
DeviceMangler.reset(
Context.createMangleContext(Context.getAuxTargetInfo()));
}
+ else if (Context.getLangOpts().isSYCL()) {
+ DeviceMangler.reset(
+ ItaniumMangleContext::create(Context, Context.getDiagnostics()));
+ }
}
MemberPointerInfo
@@ -162,7 +179,11 @@ public:
if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) {
assert(DeviceMangler && "Missing device mangler");
return std::make_unique<MSHIPNumberingContext>(DeviceMangler.get());
+ } else if (Context.getLangOpts().isSYCL()) {
+ assert(DeviceMangler && "Missing device mangler");
+ return std::make_unique<MSSYCLNumberingContext>(DeviceMangler.get());
}
+
return std::make_unique<MicrosoftNumberingContext>();
}
};
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index d89cddd2adda..163d4e95386e 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2466,6 +2466,7 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
case BuiltinType::SatUFract:
case BuiltinType::SatULongFract:
case BuiltinType::BFloat16:
+ case BuiltinType::Ibm128:
case BuiltinType::Float128: {
DiagnosticsEngine &Diags = Context.getDiags();
unsigned DiagID = Diags.getCustomDiagID(
@@ -2824,8 +2825,8 @@ void MicrosoftCXXNameMangler::mangleArtificialTagType(
// Always start with the unqualified name.
mangleSourceName(UnqualifiedName);
- for (auto I = NestedNames.rbegin(), E = NestedNames.rend(); I != E; ++I)
- mangleSourceName(*I);
+ for (StringRef N : llvm::reverse(NestedNames))
+ mangleSourceName(N);
// Terminate the whole name with an '@'.
Out << '@';
@@ -3602,7 +3603,7 @@ void MicrosoftMangleContextImpl::mangleCXXCatchableType(
// FIXME: It is known that the Ctor is present in 2013, and in 2017.7
// (_MSC_VER 1914) and newer, and that it's omitted in 2015 and 2017.4
// (_MSC_VER 1911), but it's unknown when exactly it reappeared (1914?
- // Or 1912, 1913 aleady?).
+ // Or 1912, 1913 already?).
bool OmitCopyCtor = getASTContext().getLangOpts().isCompatibleWithMSVC(
LangOptions::MSVC2015) &&
!getASTContext().getLangOpts().isCompatibleWithMSVC(
@@ -3883,7 +3884,7 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
// - ?[A-Z]: The range from \xc1 to \xda.
// - ?[0-9]: The set of [,/\:. \n\t'-].
// - ?$XX: A fallback which maps nibbles.
- if (isIdentifierBody(Byte, /*AllowDollar=*/true)) {
+ if (isAsciiIdentifierContinue(Byte, /*AllowDollar=*/true)) {
Mangler.getStream() << Byte;
} else if (isLetter(Byte & 0x7f)) {
Mangler.getStream() << '?' << static_cast<char>(Byte & 0x7f);
diff --git a/clang/lib/AST/NSAPI.cpp b/clang/lib/AST/NSAPI.cpp
index 861060d7c875..db7878e18c42 100644
--- a/clang/lib/AST/NSAPI.cpp
+++ b/clang/lib/AST/NSAPI.cpp
@@ -456,6 +456,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
case BuiltinType::UInt128:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Ibm128:
case BuiltinType::NullPtr:
case BuiltinType::ObjCClass:
case BuiltinType::ObjCId:
diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp
index 21afdd1570f4..8f19d80cbdc5 100644
--- a/clang/lib/AST/NestedNameSpecifier.cpp
+++ b/clang/lib/AST/NestedNameSpecifier.cpp
@@ -311,7 +311,8 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy,
= dyn_cast<TemplateSpecializationType>(T)) {
// Print the template name without its corresponding
// nested-name-specifier.
- SpecType->getTemplateName().print(OS, InnerPolicy, true);
+ SpecType->getTemplateName().print(OS, InnerPolicy,
+ TemplateName::Qualified::None);
// Print the template argument list.
printTemplateArgumentList(OS, SpecType->template_arguments(),
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 50f40395a197..f721e56f7fdd 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -160,6 +160,8 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) {
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
+ case OMPC_bind:
break;
default:
break;
@@ -257,6 +259,8 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C)
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
+ case OMPC_bind:
break;
default:
break;
@@ -625,6 +629,13 @@ OMPAlignedClause *OMPAlignedClause::CreateEmpty(const ASTContext &C,
return new (Mem) OMPAlignedClause(NumVars);
}
+OMPAlignClause *OMPAlignClause::Create(const ASTContext &C, Expr *A,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ return new (C) OMPAlignClause(A, StartLoc, LParenLoc, EndLoc);
+}
+
void OMPCopyinClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
"not the same as the "
@@ -1584,6 +1595,16 @@ OMPInitClause *OMPInitClause::CreateEmpty(const ASTContext &C, unsigned N) {
return new (Mem) OMPInitClause(N);
}
+OMPBindClause *
+OMPBindClause::Create(const ASTContext &C, OpenMPBindClauseKind K,
+ SourceLocation KLoc, SourceLocation StartLoc,
+ SourceLocation LParenLoc, SourceLocation EndLoc) {
+ return new (C) OMPBindClause(K, KLoc, StartLoc, LParenLoc, EndLoc);
+}
+
+OMPBindClause *OMPBindClause::CreateEmpty(const ASTContext &C) {
+ return new (C) OMPBindClause();
+}
//===----------------------------------------------------------------------===//
// OpenMP clauses printing methods
//===----------------------------------------------------------------------===//
@@ -1608,6 +1629,12 @@ void OMPClausePrinter::VisitOMPNumThreadsClause(OMPNumThreadsClause *Node) {
OS << ")";
}
+void OMPClausePrinter::VisitOMPAlignClause(OMPAlignClause *Node) {
+ OS << "align(";
+ Node->getAlignment()->printPretty(OS, nullptr, Policy, 0);
+ OS << ")";
+}
+
void OMPClausePrinter::VisitOMPSafelenClause(OMPSafelenClause *Node) {
OS << "safelen(";
Node->getSafelen()->printPretty(OS, nullptr, Policy, 0);
@@ -2295,6 +2322,12 @@ void OMPClausePrinter::VisitOMPFilterClause(OMPFilterClause *Node) {
OS << ")";
}
+void OMPClausePrinter::VisitOMPBindClause(OMPBindClause *Node) {
+ OS << "bind("
+ << getOpenMPSimpleClauseTypeName(OMPC_bind, unsigned(Node->getBindKind()))
+ << ")";
+}
+
void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
VariantMatchInfo &VMI) const {
for (const OMPTraitSet &Set : Sets) {
@@ -2311,9 +2344,8 @@ void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
if (Optional<APSInt> CondVal =
Selector.ScoreOrCondition->getIntegerConstantExpr(ASTCtx))
- VMI.addTrait(CondVal->isNullValue()
- ? TraitProperty::user_condition_false
- : TraitProperty::user_condition_true,
+ VMI.addTrait(CondVal->isZero() ? TraitProperty::user_condition_false
+ : TraitProperty::user_condition_true,
"<condition>");
else
VMI.addTrait(TraitProperty::user_condition_false, "<condition>");
@@ -2342,8 +2374,6 @@ void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx,
getOpenMPContextTraitPropertyForSelector(
Selector.Kind) &&
"Ill-formed construct selector!");
-
- VMI.ConstructTraits.push_back(Selector.Properties.front().Kind);
}
}
}
@@ -2474,7 +2504,8 @@ llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
TargetOMPContext::TargetOMPContext(
ASTContext &ASTCtx, std::function<void(StringRef)> &&DiagUnknownTrait,
- const FunctionDecl *CurrentFunctionDecl)
+ const FunctionDecl *CurrentFunctionDecl,
+ ArrayRef<llvm::omp::TraitProperty> ConstructTraits)
: OMPContext(ASTCtx.getLangOpts().OpenMPIsDevice,
ASTCtx.getTargetInfo().getTriple()),
FeatureValidityCheck([&](StringRef FeatureName) {
@@ -2482,6 +2513,9 @@ TargetOMPContext::TargetOMPContext(
}),
DiagUnknownTrait(std::move(DiagUnknownTrait)) {
ASTCtx.getFunctionFeatureMap(FeatureMap, CurrentFunctionDecl);
+
+ for (llvm::omp::TraitProperty Property : ConstructTraits)
+ addTrait(Property);
}
bool TargetOMPContext::matchesISATrait(StringRef RawString) const {
diff --git a/clang/lib/AST/ParentMapContext.cpp b/clang/lib/AST/ParentMapContext.cpp
index 4a3e0a99c8a6..d216be5b59e8 100644
--- a/clang/lib/AST/ParentMapContext.cpp
+++ b/clang/lib/AST/ParentMapContext.cpp
@@ -389,11 +389,10 @@ private:
auto *Vector = NodeOrVector.template get<ParentVector *>();
// Skip duplicates for types that have memoization data.
// We must check that the type has memoization data before calling
- // std::find() because DynTypedNode::operator== can't compare all
+ // llvm::is_contained() because DynTypedNode::operator== can't compare all
// types.
bool Found = ParentStack.back().getMemoizationData() &&
- std::find(Vector->begin(), Vector->end(),
- ParentStack.back()) != Vector->end();
+ llvm::is_contained(*Vector, ParentStack.back());
if (!Found)
Vector->push_back(ParentStack.back());
}
@@ -429,6 +428,11 @@ private:
[&] { return VisitorBase::TraverseNestedNameSpecifierLoc(NNSLocNode); },
&Map.OtherParents);
}
+ bool TraverseAttr(Attr *AttrNode) {
+ return TraverseNode(
+ AttrNode, AttrNode, [&] { return VisitorBase::TraverseAttr(AttrNode); },
+ &Map.PointerParents);
+ }
// Using generic TraverseNode for Stmt would prevent data-recursion.
bool dataTraverseStmtPre(Stmt *StmtNode) {
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index 4806c554a2a1..e2569c9e20df 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -755,6 +755,7 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
case BuiltinType::BFloat16:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Ibm128:
case BuiltinType::ShortAccum:
case BuiltinType::Accum:
case BuiltinType::LongAccum:
diff --git a/clang/lib/AST/QualTypeNames.cpp b/clang/lib/AST/QualTypeNames.cpp
index 9a1b418f5ac1..673821078345 100644
--- a/clang/lib/AST/QualTypeNames.cpp
+++ b/clang/lib/AST/QualTypeNames.cpp
@@ -296,7 +296,7 @@ static NestedNameSpecifier *createNestedNameSpecifierForScopeOf(
} else if (const auto *TD = dyn_cast<TagDecl>(Outer)) {
return createNestedNameSpecifier(
Ctx, TD, FullyQualified, WithGlobalNsPrefix);
- } else if (dyn_cast<TranslationUnitDecl>(Outer)) {
+ } else if (isa<TranslationUnitDecl>(Outer)) {
// Context is the TU. Nothing needs to be done.
return nullptr;
} else {
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 972690becf9e..3e39ec1c718d 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -240,7 +240,7 @@ EmptySubobjectMap::CanPlaceSubobjectAtOffset(const CXXRecordDecl *RD,
return true;
const ClassVectorTy &Classes = I->second;
- if (llvm::find(Classes, RD) == Classes.end())
+ if (!llvm::is_contained(Classes, RD))
return true;
// There is already an empty class of the same type at this offset.
@@ -1538,7 +1538,7 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
TypeInfo FieldInfo = Context.getTypeInfo(D->getType());
uint64_t StorageUnitSize = FieldInfo.Width;
unsigned FieldAlign = FieldInfo.Align;
- bool AlignIsRequired = FieldInfo.AlignIsRequired;
+ bool AlignIsRequired = FieldInfo.isAlignRequired();
// UnfilledBitsInLastUnit is the difference between the end of the
// last allocated bitfield (i.e. the first bit offset available for
@@ -1775,11 +1775,18 @@ void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
!D->getIdentifier())
FieldAlign = UnpackedFieldAlign = 1;
- // On AIX, zero-width bitfields pad out to the alignment boundary, but then
- // do not affect overall record alignment if there is a pragma pack or
- // pragma align(packed).
- if (isAIXLayout(Context) && !MaxFieldAlignment.isZero() && !FieldSize)
- FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits);
+ // On AIX, zero-width bitfields pad out to the natural alignment boundary,
+ // but do not increase the alignment greater than the MaxFieldAlignment, or 1
+ // if packed.
+ if (isAIXLayout(Context) && !FieldSize) {
+ if (FieldPacked)
+ FieldAlign = 1;
+ if (!MaxFieldAlignment.isZero()) {
+ UnpackedFieldAlign =
+ std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits);
+ FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits);
+ }
+ }
// Diagnose differences in layout due to padding or packing.
if (!UseExternalLayout)
@@ -1882,7 +1889,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
bool FieldPacked = Packed || D->hasAttr<PackedAttr>();
- bool AlignIsRequired = false;
+ AlignRequirementKind AlignRequirement = AlignRequirementKind::None;
CharUnits FieldSize;
CharUnits FieldAlign;
// The amount of this class's dsize occupied by the field.
@@ -1897,7 +1904,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
// aligned appropriately for their element type.
EffectiveFieldSize = FieldSize =
IsIncompleteArrayType ? CharUnits::Zero() : TI.Width;
- AlignIsRequired = TI.AlignIsRequired;
+ AlignRequirement = TI.AlignRequirement;
};
if (D->getType()->isIncompleteArrayType()) {
@@ -1947,7 +1954,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
// Since the combination of -mms-bitfields together with structs
// like max_align_t (which contains a long double) for mingw is
- // quite comon (and GCC handles it silently), just handle it
+ // quite common (and GCC handles it silently), just handle it
// silently there. For other targets that have ms_struct enabled
// (most probably via a pragma or attribute), trigger a diagnostic
// that defaults to an error.
@@ -1961,6 +1968,19 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
}
}
+ // When used as part of a typedef, or together with a 'packed' attribute, the
+ // 'aligned' attribute can be used to decrease alignment. In that case, it
+ // overrides any computed alignment we have, and there is no need to upgrade
+ // the alignment.
+ auto alignedAttrCanDecreaseAIXAlignment = [AlignRequirement, FieldPacked] {
+ // Enum alignment sources can be safely ignored here, because this only
+ // helps decide whether we need the AIX alignment upgrade, which only
+ // applies to floating-point types.
+ return AlignRequirement == AlignRequirementKind::RequiredByTypedef ||
+ (AlignRequirement == AlignRequirementKind::RequiredByRecord &&
+ FieldPacked);
+ };
+
// The AIX `power` alignment rules apply the natural alignment of the
// "first member" if it is of a floating-point data type (or is an aggregate
// whose recursively "first" member or element is such a type). The alignment
@@ -1971,7 +1991,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
// and zero-width bit-fields count as prior members; members of empty class
// types marked `no_unique_address` are not considered to be prior members.
CharUnits PreferredAlign = FieldAlign;
- if (DefaultsToAIXPowerAlignment && !AlignIsRequired &&
+ if (DefaultsToAIXPowerAlignment && !alignedAttrCanDecreaseAIXAlignment() &&
(FoundFirstNonOverlappingEmptyFieldForAIX || IsNaturalAlign)) {
auto performBuiltinTypeAlignmentUpgrade = [&](const BuiltinType *BTy) {
if (BTy->getKind() == BuiltinType::Double ||
@@ -1982,12 +2002,13 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
}
};
- const Type *Ty = D->getType()->getBaseElementTypeUnsafe();
- if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
- performBuiltinTypeAlignmentUpgrade(CTy->getElementType()->castAs<BuiltinType>());
- } else if (const BuiltinType *BTy = Ty->getAs<BuiltinType>()) {
+ const Type *BaseTy = D->getType()->getBaseElementTypeUnsafe();
+ if (const ComplexType *CTy = BaseTy->getAs<ComplexType>()) {
+ performBuiltinTypeAlignmentUpgrade(
+ CTy->getElementType()->castAs<BuiltinType>());
+ } else if (const BuiltinType *BTy = BaseTy->getAs<BuiltinType>()) {
performBuiltinTypeAlignmentUpgrade(BTy);
- } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ } else if (const RecordType *RT = BaseTy->getAs<RecordType>()) {
const RecordDecl *RD = RT->getDecl();
assert(RD && "Expected non-null RecordDecl.");
const ASTRecordLayout &FieldRecord = Context.getASTRecordLayout(RD);
@@ -2610,7 +2631,7 @@ MicrosoftRecordLayoutBuilder::getAdjustedElementInfo(
// Track zero-sized subobjects here where it's already available.
EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject();
// Respect required alignment, this is necessary because we may have adjusted
- // the alignment in the case of pragam pack. Note that the required alignment
+ // the alignment in the case of pragma pack. Note that the required alignment
// doesn't actually apply to the struct alignment at this point.
Alignment = std::max(Alignment, Info.Alignment);
RequiredAlignment = std::max(RequiredAlignment, Layout.getRequiredAlignment());
@@ -3070,7 +3091,7 @@ void MicrosoftRecordLayoutBuilder::layoutVirtualBases(const CXXRecordDecl *RD) {
for (const CXXBaseSpecifier &VBase : RD->vbases()) {
const CXXRecordDecl *BaseDecl = VBase.getType()->getAsCXXRecordDecl();
const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl);
- bool HasVtordisp = HasVtorDispSet.count(BaseDecl) > 0;
+ bool HasVtordisp = HasVtorDispSet.contains(BaseDecl);
// Insert padding between two bases if the left first one is zero sized or
// contains a zero sized subobject and the right is zero sized or one leads
// with a zero sized base. The padding between virtual bases is 4
@@ -3383,6 +3404,7 @@ uint64_t ASTContext::getFieldOffset(const ValueDecl *VD) const {
uint64_t ASTContext::lookupFieldBitOffset(const ObjCInterfaceDecl *OID,
const ObjCImplementationDecl *ID,
const ObjCIvarDecl *Ivar) const {
+ Ivar = Ivar->getCanonicalDecl();
const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
// FIXME: We should eliminate the need to have ObjCImplementationDecl passed
diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp
index 47693ef9fee3..4f76f6ec12ed 100644
--- a/clang/lib/AST/Stmt.cpp
+++ b/clang/lib/AST/Stmt.cpp
@@ -912,7 +912,7 @@ void MSAsmStmt::initialize(const ASTContext &C, StringRef asmstr,
});
}
-IfStmt::IfStmt(const ASTContext &Ctx, SourceLocation IL, bool IsConstexpr,
+IfStmt::IfStmt(const ASTContext &Ctx, SourceLocation IL, IfStatementKind Kind,
Stmt *Init, VarDecl *Var, Expr *Cond, SourceLocation LPL,
SourceLocation RPL, Stmt *Then, SourceLocation EL, Stmt *Else)
: Stmt(IfStmtClass), LParenLoc(LPL), RParenLoc(RPL) {
@@ -923,7 +923,7 @@ IfStmt::IfStmt(const ASTContext &Ctx, SourceLocation IL, bool IsConstexpr,
IfStmtBits.HasVar = HasVar;
IfStmtBits.HasInit = HasInit;
- setConstexpr(IsConstexpr);
+ setStatementKind(Kind);
setCond(Cond);
setThen(Then);
@@ -947,9 +947,9 @@ IfStmt::IfStmt(EmptyShell Empty, bool HasElse, bool HasVar, bool HasInit)
}
IfStmt *IfStmt::Create(const ASTContext &Ctx, SourceLocation IL,
- bool IsConstexpr, Stmt *Init, VarDecl *Var, Expr *Cond,
- SourceLocation LPL, SourceLocation RPL, Stmt *Then,
- SourceLocation EL, Stmt *Else) {
+ IfStatementKind Kind, Stmt *Init, VarDecl *Var,
+ Expr *Cond, SourceLocation LPL, SourceLocation RPL,
+ Stmt *Then, SourceLocation EL, Stmt *Else) {
bool HasElse = Else != nullptr;
bool HasVar = Var != nullptr;
bool HasInit = Init != nullptr;
@@ -958,7 +958,7 @@ IfStmt *IfStmt::Create(const ASTContext &Ctx, SourceLocation IL,
NumMandatoryStmtPtr + HasElse + HasVar + HasInit, HasElse),
alignof(IfStmt));
return new (Mem)
- IfStmt(Ctx, IL, IsConstexpr, Init, Var, Cond, LPL, RPL, Then, EL, Else);
+ IfStmt(Ctx, IL, Kind, Init, Var, Cond, LPL, RPL, Then, EL, Else);
}
IfStmt *IfStmt::CreateEmpty(const ASTContext &Ctx, bool HasElse, bool HasVar,
diff --git a/clang/lib/AST/StmtObjC.cpp b/clang/lib/AST/StmtObjC.cpp
index 3d586795517c..12d8a9e7dac8 100644
--- a/clang/lib/AST/StmtObjC.cpp
+++ b/clang/lib/AST/StmtObjC.cpp
@@ -46,9 +46,8 @@ ObjCAtTryStmt *ObjCAtTryStmt::Create(const ASTContext &Context,
SourceLocation atTryLoc, Stmt *atTryStmt,
Stmt **CatchStmts, unsigned NumCatchStmts,
Stmt *atFinallyStmt) {
- unsigned Size =
- sizeof(ObjCAtTryStmt) +
- (1 + NumCatchStmts + (atFinallyStmt != nullptr)) * sizeof(Stmt *);
+ size_t Size =
+ totalSizeToAlloc<Stmt *>(1 + NumCatchStmts + (atFinallyStmt != nullptr));
void *Mem = Context.Allocate(Size, alignof(ObjCAtTryStmt));
return new (Mem) ObjCAtTryStmt(atTryLoc, atTryStmt, CatchStmts, NumCatchStmts,
atFinallyStmt);
@@ -57,8 +56,7 @@ ObjCAtTryStmt *ObjCAtTryStmt::Create(const ASTContext &Context,
ObjCAtTryStmt *ObjCAtTryStmt::CreateEmpty(const ASTContext &Context,
unsigned NumCatchStmts,
bool HasFinally) {
- unsigned Size =
- sizeof(ObjCAtTryStmt) + (1 + NumCatchStmts + HasFinally) * sizeof(Stmt *);
+ size_t Size = totalSizeToAlloc<Stmt *>(1 + NumCatchStmts + HasFinally);
void *Mem = Context.Allocate(Size, alignof(ObjCAtTryStmt));
return new (Mem) ObjCAtTryStmt(EmptyShell(), NumCatchStmts, HasFinally);
}
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index b0ef2f49ba04..b336a0637d5e 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -125,28 +125,34 @@ OMPLoopBasedDirective::tryToFindNextInnerLoop(Stmt *CurStmt,
bool OMPLoopBasedDirective::doForAllLoops(
Stmt *CurStmt, bool TryImperfectlyNestedLoops, unsigned NumLoops,
llvm::function_ref<bool(unsigned, Stmt *)> Callback,
- llvm::function_ref<void(OMPLoopBasedDirective *)>
+ llvm::function_ref<void(OMPLoopTransformationDirective *)>
OnTransformationCallback) {
CurStmt = CurStmt->IgnoreContainers();
for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) {
while (true) {
- auto *OrigStmt = CurStmt;
- if (auto *Dir = dyn_cast<OMPTileDirective>(OrigStmt)) {
- OnTransformationCallback(Dir);
- CurStmt = Dir->getTransformedStmt();
- } else if (auto *Dir = dyn_cast<OMPUnrollDirective>(OrigStmt)) {
- OnTransformationCallback(Dir);
- CurStmt = Dir->getTransformedStmt();
- } else {
+ auto *Dir = dyn_cast<OMPLoopTransformationDirective>(CurStmt);
+ if (!Dir)
break;
- }
- if (!CurStmt) {
- // May happen if the loop transformation does not result in a generated
- // loop (such as full unrolling).
- CurStmt = OrigStmt;
- break;
+ OnTransformationCallback(Dir);
+
+ Stmt *TransformedStmt = Dir->getTransformedStmt();
+ if (!TransformedStmt) {
+ unsigned NumGeneratedLoops = Dir->getNumGeneratedLoops();
+ if (NumGeneratedLoops == 0) {
+ // May happen if the loop transformation does not result in a
+ // generated loop (such as full unrolling).
+ break;
+ }
+ if (NumGeneratedLoops > 0) {
+ // The loop transformation construct has generated loops, but these
+ // may not have been generated yet due to being in a dependent
+ // context.
+ return true;
+ }
}
+
+ CurStmt = TransformedStmt;
}
if (auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(CurStmt))
CurStmt = CanonLoop->getLoopStmt();
@@ -253,6 +259,25 @@ void OMPLoopDirective::setFinalsConditions(ArrayRef<Expr *> A) {
llvm::copy(A, getFinalsConditions().begin());
}
+OMPMetaDirective *OMPMetaDirective::Create(const ASTContext &C,
+ SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses,
+ Stmt *AssociatedStmt, Stmt *IfStmt) {
+ auto *Dir = createDirective<OMPMetaDirective>(
+ C, Clauses, AssociatedStmt, /*NumChildren=*/1, StartLoc, EndLoc);
+ Dir->setIfStmt(IfStmt);
+ return Dir;
+}
+
+OMPMetaDirective *OMPMetaDirective::CreateEmpty(const ASTContext &C,
+ unsigned NumClauses,
+ EmptyShell) {
+ return createEmptyDirective<OMPMetaDirective>(C, NumClauses,
+ /*HasAssociatedStmt=*/true,
+ /*NumChildren=*/1);
+}
+
OMPParallelDirective *OMPParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
@@ -344,6 +369,32 @@ OMPForDirective *OMPForDirective::Create(
return Dir;
}
+Stmt *OMPLoopTransformationDirective::getTransformedStmt() const {
+ switch (getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define OMPLOOPTRANSFORMATIONDIRECTIVE(CLASS, PARENT) \
+ case Stmt::CLASS##Class: \
+ return static_cast<const CLASS *>(this)->getTransformedStmt();
+#include "clang/AST/StmtNodes.inc"
+ default:
+ llvm_unreachable("Not a loop transformation");
+ }
+}
+
+Stmt *OMPLoopTransformationDirective::getPreInits() const {
+ switch (getStmtClass()) {
+#define STMT(CLASS, PARENT)
+#define ABSTRACT_STMT(CLASS)
+#define OMPLOOPTRANSFORMATIONDIRECTIVE(CLASS, PARENT) \
+ case Stmt::CLASS##Class: \
+ return static_cast<const CLASS *>(this)->getPreInits();
+#include "clang/AST/StmtNodes.inc"
+ default:
+ llvm_unreachable("Not a loop transformation");
+ }
+}
+
OMPForDirective *OMPForDirective::CreateEmpty(const ASTContext &C,
unsigned NumClauses,
unsigned CollapsedNum,
@@ -377,10 +428,13 @@ OMPTileDirective *OMPTileDirective::CreateEmpty(const ASTContext &C,
OMPUnrollDirective *
OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, Stmt *TransformedStmt,
- Stmt *PreInits) {
+ Stmt *AssociatedStmt, unsigned NumGeneratedLoops,
+ Stmt *TransformedStmt, Stmt *PreInits) {
+ assert(NumGeneratedLoops <= 1 && "Unrolling generates at most one loop");
+
auto *Dir = createDirective<OMPUnrollDirective>(
C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
+ Dir->setNumGeneratedLoops(NumGeneratedLoops);
Dir->setTransformedStmt(TransformedStmt);
Dir->setPreInits(PreInits);
return Dir;
@@ -685,15 +739,19 @@ OMPBarrierDirective *OMPBarrierDirective::CreateEmpty(const ASTContext &C,
return new (C) OMPBarrierDirective();
}
-OMPTaskwaitDirective *OMPTaskwaitDirective::Create(const ASTContext &C,
- SourceLocation StartLoc,
- SourceLocation EndLoc) {
- return new (C) OMPTaskwaitDirective(StartLoc, EndLoc);
+OMPTaskwaitDirective *
+OMPTaskwaitDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+ SourceLocation EndLoc,
+ ArrayRef<OMPClause *> Clauses) {
+ return createDirective<OMPTaskwaitDirective>(
+ C, Clauses, /*AssociatedStmt=*/nullptr, /*NumChildren=*/0, StartLoc,
+ EndLoc);
}
OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C,
+ unsigned NumClauses,
EmptyShell) {
- return new (C) OMPTaskwaitDirective();
+ return createEmptyDirective<OMPTaskwaitDirective>(C, NumClauses);
}
OMPTaskgroupDirective *OMPTaskgroupDirective::Create(
@@ -2032,3 +2090,45 @@ OMPMaskedDirective *OMPMaskedDirective::CreateEmpty(const ASTContext &C,
return createEmptyDirective<OMPMaskedDirective>(C, NumClauses,
/*HasAssociatedStmt=*/true);
}
+
+OMPGenericLoopDirective *OMPGenericLoopDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+ const HelperExprs &Exprs) {
+ auto *Dir = createDirective<OMPGenericLoopDirective>(
+ C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_loop),
+ StartLoc, EndLoc, CollapsedNum);
+ Dir->setIterationVariable(Exprs.IterationVarRef);
+ Dir->setLastIteration(Exprs.LastIteration);
+ Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+ Dir->setPreCond(Exprs.PreCond);
+ Dir->setCond(Exprs.Cond);
+ Dir->setInit(Exprs.Init);
+ Dir->setInc(Exprs.Inc);
+ Dir->setIsLastIterVariable(Exprs.IL);
+ Dir->setLowerBoundVariable(Exprs.LB);
+ Dir->setUpperBoundVariable(Exprs.UB);
+ Dir->setStrideVariable(Exprs.ST);
+ Dir->setEnsureUpperBound(Exprs.EUB);
+ Dir->setNextLowerBound(Exprs.NLB);
+ Dir->setNextUpperBound(Exprs.NUB);
+ Dir->setNumIterations(Exprs.NumIterations);
+ Dir->setCounters(Exprs.Counters);
+ Dir->setPrivateCounters(Exprs.PrivateCounters);
+ Dir->setInits(Exprs.Inits);
+ Dir->setUpdates(Exprs.Updates);
+ Dir->setFinals(Exprs.Finals);
+ Dir->setDependentCounters(Exprs.DependentCounters);
+ Dir->setDependentInits(Exprs.DependentInits);
+ Dir->setFinalsConditions(Exprs.FinalsConditions);
+ Dir->setPreInits(Exprs.PreInits);
+ return Dir;
+}
+
+OMPGenericLoopDirective *
+OMPGenericLoopDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
+ unsigned CollapsedNum, EmptyShell) {
+ return createEmptyDirective<OMPGenericLoopDirective>(
+ C, NumClauses, /*HasAssociatedStmt=*/true,
+ numLoopChildren(CollapsedNum, OMPD_loop), CollapsedNum);
+}
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 45b15171aa97..fc267d7006a1 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -236,6 +236,22 @@ void StmtPrinter::VisitAttributedStmt(AttributedStmt *Node) {
}
void StmtPrinter::PrintRawIfStmt(IfStmt *If) {
+ if (If->isConsteval()) {
+ OS << "if ";
+ if (If->isNegatedConsteval())
+ OS << "!";
+ OS << "consteval";
+ OS << NL;
+ PrintStmt(If->getThen());
+ if (Stmt *Else = If->getElse()) {
+ Indent();
+ OS << "else";
+ PrintStmt(Else);
+ OS << NL;
+ }
+ return;
+ }
+
OS << "if (";
if (If->getInit())
PrintInitStmt(If->getInit(), 4);
@@ -505,13 +521,10 @@ void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) {
OS << NL;
}
- for (unsigned I = 0, N = Node->getNumCatchStmts(); I != N; ++I) {
- ObjCAtCatchStmt *catchStmt = Node->getCatchStmt(I);
+ for (ObjCAtCatchStmt *catchStmt : Node->catch_stmts()) {
Indent() << "@catch(";
- if (catchStmt->getCatchParamDecl()) {
- if (Decl *DS = catchStmt->getCatchParamDecl())
- PrintRawDecl(DS);
- }
+ if (Decl *DS = catchStmt->getCatchParamDecl())
+ PrintRawDecl(DS);
OS << ")";
if (auto *CS = dyn_cast<CompoundStmt>(catchStmt->getCatchBody())) {
PrintRawCompoundStmt(CS);
@@ -654,6 +667,11 @@ void StmtPrinter::PrintOMPExecutableDirective(OMPExecutableDirective *S,
PrintStmt(S->getRawStmt());
}
+void StmtPrinter::VisitOMPMetaDirective(OMPMetaDirective *Node) {
+ Indent() << "#pragma omp metadirective";
+ PrintOMPExecutableDirective(Node);
+}
+
void StmtPrinter::VisitOMPParallelDirective(OMPParallelDirective *Node) {
Indent() << "#pragma omp parallel";
PrintOMPExecutableDirective(Node);
@@ -982,6 +1000,11 @@ void StmtPrinter::VisitOMPMaskedDirective(OMPMaskedDirective *Node) {
PrintOMPExecutableDirective(Node);
}
+void StmtPrinter::VisitOMPGenericLoopDirective(OMPGenericLoopDirective *Node) {
+ Indent() << "#pragma omp loop";
+ PrintOMPExecutableDirective(Node);
+}
+
//===----------------------------------------------------------------------===//
// Expr printing methods.
//===----------------------------------------------------------------------===//
@@ -1183,6 +1206,7 @@ static void PrintFloatingLiteral(raw_ostream &OS, FloatingLiteral *Node,
switch (Node->getType()->castAs<BuiltinType>()->getKind()) {
default: llvm_unreachable("Unexpected type for float literal!");
case BuiltinType::Half: break; // FIXME: suffix?
+ case BuiltinType::Ibm128: break; // FIXME: No suffix for ibm128 literal
case BuiltinType::Double: break; // no suffix.
case BuiltinType::Float16: OS << "F16"; break;
case BuiltinType::Float: OS << 'F'; break;
@@ -2571,6 +2595,14 @@ void Stmt::printPretty(raw_ostream &Out, PrinterHelper *Helper,
P.Visit(const_cast<Stmt *>(this));
}
+void Stmt::printPrettyControlled(raw_ostream &Out, PrinterHelper *Helper,
+ const PrintingPolicy &Policy,
+ unsigned Indentation, StringRef NL,
+ const ASTContext *Context) const {
+ StmtPrinter P(Out, Helper, Policy, Indentation, NL, Context);
+ P.PrintControlledStmt(const_cast<Stmt *>(this));
+}
+
void Stmt::printJson(raw_ostream &Out, PrinterHelper *Helper,
const PrintingPolicy &Policy, bool AddQuotes) const {
std::string Buf;
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index ed000c2467fa..4339c249e027 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -452,6 +452,11 @@ void OMPClauseProfiler::VisitOMPNumThreadsClause(const OMPNumThreadsClause *C) {
Profiler->VisitStmt(C->getNumThreads());
}
+void OMPClauseProfiler::VisitOMPAlignClause(const OMPAlignClause *C) {
+ if (C->getAlignment())
+ Profiler->VisitStmt(C->getAlignment());
+}
+
void OMPClauseProfiler::VisitOMPSafelenClause(const OMPSafelenClause *C) {
if (C->getSafelen())
Profiler->VisitStmt(C->getSafelen());
@@ -878,6 +883,7 @@ void OMPClauseProfiler::VisitOMPAffinityClause(const OMPAffinityClause *C) {
Profiler->VisitStmt(E);
}
void OMPClauseProfiler::VisitOMPOrderClause(const OMPOrderClause *C) {}
+void OMPClauseProfiler::VisitOMPBindClause(const OMPBindClause *C) {}
} // namespace
void
@@ -903,6 +909,10 @@ void StmtProfiler::VisitOMPLoopDirective(const OMPLoopDirective *S) {
VisitOMPLoopBasedDirective(S);
}
+void StmtProfiler::VisitOMPMetaDirective(const OMPMetaDirective *S) {
+ VisitOMPExecutableDirective(S);
+}
+
void StmtProfiler::VisitOMPParallelDirective(const OMPParallelDirective *S) {
VisitOMPExecutableDirective(S);
}
@@ -911,12 +921,17 @@ void StmtProfiler::VisitOMPSimdDirective(const OMPSimdDirective *S) {
VisitOMPLoopDirective(S);
}
-void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) {
+void StmtProfiler::VisitOMPLoopTransformationDirective(
+ const OMPLoopTransformationDirective *S) {
VisitOMPLoopBasedDirective(S);
}
+void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) {
+ VisitOMPLoopTransformationDirective(S);
+}
+
void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) {
- VisitOMPLoopBasedDirective(S);
+ VisitOMPLoopTransformationDirective(S);
}
void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
@@ -1181,6 +1196,11 @@ void StmtProfiler::VisitOMPMaskedDirective(const OMPMaskedDirective *S) {
VisitOMPExecutableDirective(S);
}
+void StmtProfiler::VisitOMPGenericLoopDirective(
+ const OMPGenericLoopDirective *S) {
+ VisitOMPLoopDirective(S);
+}
+
void StmtProfiler::VisitExpr(const Expr *S) {
VisitStmt(S);
}
@@ -1924,30 +1944,9 @@ StmtProfiler::VisitCXXTemporaryObjectExpr(const CXXTemporaryObjectExpr *S) {
void
StmtProfiler::VisitLambdaExpr(const LambdaExpr *S) {
VisitExpr(S);
- for (LambdaExpr::capture_iterator C = S->explicit_capture_begin(),
- CEnd = S->explicit_capture_end();
- C != CEnd; ++C) {
- if (C->capturesVLAType())
- continue;
-
- ID.AddInteger(C->getCaptureKind());
- switch (C->getCaptureKind()) {
- case LCK_StarThis:
- case LCK_This:
- break;
- case LCK_ByRef:
- case LCK_ByCopy:
- VisitDecl(C->getCapturedVar());
- ID.AddBoolean(C->isPackExpansion());
- break;
- case LCK_VLAType:
- llvm_unreachable("VLA type in explicit captures.");
- }
- }
- // Note: If we actually needed to be able to match lambda
- // expressions, we would have to consider parameters and return type
- // here, among other things.
- VisitStmt(S->getBody());
+ // C++20 [temp.over.link]p5:
+ // Two lambda-expressions are never considered equivalent.
+ VisitDecl(S->getLambdaClass());
}
void
diff --git a/clang/lib/AST/TemplateBase.cpp b/clang/lib/AST/TemplateBase.cpp
index f44230d1bd03..619ce42f9dd1 100644
--- a/clang/lib/AST/TemplateBase.cpp
+++ b/clang/lib/AST/TemplateBase.cpp
@@ -452,7 +452,7 @@ void TemplateArgument::print(const PrintingPolicy &Policy, raw_ostream &Out,
break;
case Template:
- getAsTemplate().print(Out, Policy);
+ getAsTemplate().print(Out, Policy, TemplateName::Qualified::Fully);
break;
case TemplateExpansion:
diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp
index 22cfa9acbe1b..c8bd74f0b5bb 100644
--- a/clang/lib/AST/TemplateName.cpp
+++ b/clang/lib/AST/TemplateName.cpp
@@ -220,19 +220,28 @@ bool TemplateName::containsUnexpandedParameterPack() const {
return getDependence() & TemplateNameDependence::UnexpandedPack;
}
-void
-TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy,
- bool SuppressNNS) const {
+void TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy,
+ Qualified Qual) const {
if (TemplateDecl *Template = Storage.dyn_cast<TemplateDecl *>())
- OS << *Template;
+ if (Qual == Qualified::Fully &&
+ getDependence() != TemplateNameDependenceScope::DependentInstantiation)
+ Template->printQualifiedName(OS, Policy);
+ else
+ OS << *Template;
else if (QualifiedTemplateName *QTN = getAsQualifiedTemplateName()) {
- if (!SuppressNNS)
+ if (Qual == Qualified::Fully &&
+ getDependence() !=
+ TemplateNameDependenceScope::DependentInstantiation) {
+ QTN->getTemplateDecl()->printQualifiedName(OS, Policy);
+ return;
+ }
+ if (Qual == Qualified::AsWritten)
QTN->getQualifier()->print(OS, Policy);
if (QTN->hasTemplateKeyword())
OS << "template ";
OS << *QTN->getDecl();
} else if (DependentTemplateName *DTN = getAsDependentTemplateName()) {
- if (!SuppressNNS && DTN->getQualifier())
+ if (Qual == Qualified::AsWritten && DTN->getQualifier())
DTN->getQualifier()->print(OS, Policy);
OS << "template ";
@@ -242,7 +251,7 @@ TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy,
OS << "operator " << getOperatorSpelling(DTN->getOperator());
} else if (SubstTemplateTemplateParmStorage *subst
= getAsSubstTemplateTemplateParm()) {
- subst->getReplacement().print(OS, Policy, SuppressNNS);
+ subst->getReplacement().print(OS, Policy, Qual);
} else if (SubstTemplateTemplateParmPackStorage *SubstPack
= getAsSubstTemplateTemplateParmPack())
OS << *SubstPack->getParameterPack();
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 33f914f9f886..b21e806e307c 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -948,6 +948,14 @@ void TextNodeDumper::VisitIfStmt(const IfStmt *Node) {
OS << " has_var";
if (Node->hasElseStorage())
OS << " has_else";
+ if (Node->isConstexpr())
+ OS << " constexpr";
+ if (Node->isConsteval()) {
+ OS << " ";
+ if (Node->isNegatedConsteval())
+ OS << "!";
+ OS << "consteval";
+ }
}
void TextNodeDumper::VisitSwitchStmt(const SwitchStmt *Node) {
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 4a2fc5219ef0..e0ac3f5b1351 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -821,6 +821,13 @@ QualType ObjCObjectType::stripObjCKindOfTypeAndQuals(
/*isKindOf=*/false);
}
+ObjCInterfaceDecl *ObjCInterfaceType::getDecl() const {
+ ObjCInterfaceDecl *Canon = Decl->getCanonicalDecl();
+ if (ObjCInterfaceDecl *Def = Canon->getDefinition())
+ return Def;
+ return Canon;
+}
+
const ObjCObjectPointerType *ObjCObjectPointerType::stripObjCKindOfTypeAndQuals(
const ASTContext &ctx) const {
if (!isKindOfType() && qual_empty())
@@ -1885,7 +1892,7 @@ DeducedType *Type::getContainedDeducedType() const {
}
bool Type::hasAutoForTrailingReturnType() const {
- return dyn_cast_or_null<FunctionType>(
+ return isa_and_nonnull<FunctionType>(
GetContainedDeducedTypeVisitor(true).Visit(this));
}
@@ -2097,7 +2104,7 @@ bool Type::hasUnsignedIntegerRepresentation() const {
bool Type::isFloatingType() const {
if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
return BT->getKind() >= BuiltinType::Half &&
- BT->getKind() <= BuiltinType::Float128;
+ BT->getKind() <= BuiltinType::Ibm128;
if (const auto *CT = dyn_cast<ComplexType>(CanonicalType))
return CT->getElementType()->isFloatingType();
return false;
@@ -2119,7 +2126,7 @@ bool Type::isRealFloatingType() const {
bool Type::isRealType() const {
if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
return BT->getKind() >= BuiltinType::Bool &&
- BT->getKind() <= BuiltinType::Float128;
+ BT->getKind() <= BuiltinType::Ibm128;
if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
return ET->getDecl()->isComplete() && !ET->getDecl()->isScoped();
return isExtIntType();
@@ -2128,7 +2135,7 @@ bool Type::isRealType() const {
bool Type::isArithmeticType() const {
if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
return BT->getKind() >= BuiltinType::Bool &&
- BT->getKind() <= BuiltinType::Float128 &&
+ BT->getKind() <= BuiltinType::Ibm128 &&
BT->getKind() != BuiltinType::BFloat16;
if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
// GCC allows forward declaration of enum types (forbid by C99 6.7.2.3p2).
@@ -2785,7 +2792,6 @@ bool Type::isSpecifierType() const {
case DependentTemplateSpecialization:
case ObjCInterface:
case ObjCObject:
- case ObjCObjectPointer: // FIXME: object pointers aren't really specifiers
return true;
default:
return false;
@@ -3030,6 +3036,8 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
return "_Float16";
case Float128:
return "__float128";
+ case Ibm128:
+ return "__ibm128";
case WChar_S:
case WChar_U:
return Policy.MSWChar ? "__wchar_t" : "wchar_t";
@@ -3040,7 +3048,7 @@ StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
case Char32:
return "char32_t";
case NullPtr:
- return "nullptr_t";
+ return "std::nullptr_t";
case Overload:
return "<overloaded function type>";
case BoundMember:
@@ -3505,7 +3513,7 @@ bool RecordType::hasConstFields() const {
return true;
FieldTy = FieldTy.getCanonicalType();
if (const auto *FieldRecTy = FieldTy->getAs<RecordType>()) {
- if (llvm::find(RecordTypeList, FieldRecTy) == RecordTypeList.end())
+ if (!llvm::is_contained(RecordTypeList, FieldRecTy))
RecordTypeList.push_back(FieldRecTy);
}
}
@@ -3757,8 +3765,8 @@ public:
friend CachedProperties merge(CachedProperties L, CachedProperties R) {
Linkage MergedLinkage = minLinkage(L.L, R.L);
- return CachedProperties(MergedLinkage,
- L.hasLocalOrUnnamedType() | R.hasLocalOrUnnamedType());
+ return CachedProperties(MergedLinkage, L.hasLocalOrUnnamedType() ||
+ R.hasLocalOrUnnamedType());
}
};
@@ -4392,10 +4400,10 @@ void clang::FixedPointValueToString(SmallVectorImpl<char> &Str,
}
AutoType::AutoType(QualType DeducedAsType, AutoTypeKeyword Keyword,
- TypeDependence ExtraDependence,
+ TypeDependence ExtraDependence, QualType Canon,
ConceptDecl *TypeConstraintConcept,
ArrayRef<TemplateArgument> TypeConstraintArgs)
- : DeducedType(Auto, DeducedAsType, ExtraDependence) {
+ : DeducedType(Auto, DeducedAsType, ExtraDependence, Canon) {
AutoTypeBits.Keyword = (unsigned)Keyword;
AutoTypeBits.NumArgs = TypeConstraintArgs.size();
this->TypeConstraintConcept = TypeConstraintConcept;
diff --git a/clang/lib/AST/TypeLoc.cpp b/clang/lib/AST/TypeLoc.cpp
index 16d953b4bece..c3ed08d5a8b3 100644
--- a/clang/lib/AST/TypeLoc.cpp
+++ b/clang/lib/AST/TypeLoc.cpp
@@ -240,6 +240,8 @@ SourceLocation TypeLoc::getEndLoc() const {
case IncompleteArray:
case VariableArray:
case FunctionNoProto:
+ // The innermost type with suffix syntax always determines the end of the
+ // type.
Last = Cur;
break;
case FunctionProto:
@@ -248,12 +250,19 @@ SourceLocation TypeLoc::getEndLoc() const {
else
Last = Cur;
break;
+ case ObjCObjectPointer:
+ // `id` and `id<...>` have no star location.
+ if (Cur.castAs<ObjCObjectPointerTypeLoc>().getStarLoc().isInvalid())
+ break;
+ LLVM_FALLTHROUGH;
case Pointer:
case BlockPointer:
case MemberPointer:
case LValueReference:
case RValueReference:
case PackExpansion:
+ // Types with prefix syntax only determine the end of the type if there
+ // is no suffix type.
if (!Last)
Last = Cur;
break;
@@ -351,6 +360,7 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
case BuiltinType::LongDouble:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Ibm128:
case BuiltinType::ShortAccum:
case BuiltinType::Accum:
case BuiltinType::LongAccum:
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 5de22f76f458..eca9af3e5f36 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -200,11 +200,12 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
// type expands to a simple string.
bool CanPrefixQualifiers = false;
NeedARCStrongQualifier = false;
- Type::TypeClass TC = T->getTypeClass();
+ const Type *UnderlyingType = T;
if (const auto *AT = dyn_cast<AutoType>(T))
- TC = AT->desugar()->getTypeClass();
+ UnderlyingType = AT->desugar().getTypePtr();
if (const auto *Subst = dyn_cast<SubstTemplateTypeParmType>(T))
- TC = Subst->getReplacementType()->getTypeClass();
+ UnderlyingType = Subst->getReplacementType().getTypePtr();
+ Type::TypeClass TC = UnderlyingType->getTypeClass();
switch (TC) {
case Type::Auto:
@@ -241,13 +242,17 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
T->isObjCQualifiedIdType() || T->isObjCQualifiedClassType();
break;
- case Type::ConstantArray:
- case Type::IncompleteArray:
case Type::VariableArray:
case Type::DependentSizedArray:
NeedARCStrongQualifier = true;
LLVM_FALLTHROUGH;
+ case Type::ConstantArray:
+ case Type::IncompleteArray:
+ return canPrefixQualifiers(
+ cast<ArrayType>(UnderlyingType)->getElementType().getTypePtr(),
+ NeedARCStrongQualifier);
+
case Type::Adjusted:
case Type::Decayed:
case Type::Pointer:
@@ -498,7 +503,6 @@ void TypePrinter::printMemberPointerAfter(const MemberPointerType *T,
void TypePrinter::printConstantArrayBefore(const ConstantArrayType *T,
raw_ostream &OS) {
IncludeStrongLifetimeRAII Strong(Policy);
- SaveAndRestore<bool> NonEmptyPH(HasEmptyPlaceHolder, false);
printBefore(T->getElementType(), OS);
}
@@ -521,7 +525,6 @@ void TypePrinter::printConstantArrayAfter(const ConstantArrayType *T,
void TypePrinter::printIncompleteArrayBefore(const IncompleteArrayType *T,
raw_ostream &OS) {
IncludeStrongLifetimeRAII Strong(Policy);
- SaveAndRestore<bool> NonEmptyPH(HasEmptyPlaceHolder, false);
printBefore(T->getElementType(), OS);
}
@@ -534,7 +537,6 @@ void TypePrinter::printIncompleteArrayAfter(const IncompleteArrayType *T,
void TypePrinter::printVariableArrayBefore(const VariableArrayType *T,
raw_ostream &OS) {
IncludeStrongLifetimeRAII Strong(Policy);
- SaveAndRestore<bool> NonEmptyPH(HasEmptyPlaceHolder, false);
printBefore(T->getElementType(), OS);
}
@@ -581,7 +583,6 @@ void TypePrinter::printDependentSizedArrayBefore(
const DependentSizedArrayType *T,
raw_ostream &OS) {
IncludeStrongLifetimeRAII Strong(Policy);
- SaveAndRestore<bool> NonEmptyPH(HasEmptyPlaceHolder, false);
printBefore(T->getElementType(), OS);
}
@@ -1363,9 +1364,11 @@ void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) {
void TypePrinter::printRecordBefore(const RecordType *T, raw_ostream &OS) {
// Print the preferred name if we have one for this type.
- for (const auto *PNA : T->getDecl()->specific_attrs<PreferredNameAttr>()) {
- if (declaresSameEntity(PNA->getTypedefType()->getAsCXXRecordDecl(),
- T->getDecl())) {
+ if (Policy.UsePreferredNames) {
+ for (const auto *PNA : T->getDecl()->specific_attrs<PreferredNameAttr>()) {
+ if (!declaresSameEntity(PNA->getTypedefType()->getAsCXXRecordDecl(),
+ T->getDecl()))
+ continue;
// Find the outermost typedef or alias template.
QualType T = PNA->getTypedefType();
while (true) {
@@ -1743,6 +1746,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
case attr::ArmMveStrictPolymorphism:
OS << "__clang_arm_mve_strict_polymorphism";
break;
+ case attr::BTFTypeTag:
+ OS << "btf_type_tag";
+ break;
}
OS << "))";
}
@@ -1998,10 +2004,9 @@ static bool isSubstitutedDefaultArgument(ASTContext &Ctx, TemplateArgument Arg,
}
template <typename TA>
-static void printTo(raw_ostream &OS, ArrayRef<TA> Args,
- const PrintingPolicy &Policy, bool SkipBrackets,
- const TemplateParameterList *TPL, bool IsPack,
- unsigned ParmIndex) {
+static void
+printTo(raw_ostream &OS, ArrayRef<TA> Args, const PrintingPolicy &Policy,
+ const TemplateParameterList *TPL, bool IsPack, unsigned ParmIndex) {
// Drop trailing template arguments that match default arguments.
if (TPL && Policy.SuppressDefaultTemplateArgs &&
!Policy.PrintCanonicalTypes && !Args.empty() && !IsPack &&
@@ -2018,7 +2023,7 @@ static void printTo(raw_ostream &OS, ArrayRef<TA> Args,
}
const char *Comma = Policy.MSVCFormatting ? "," : ", ";
- if (!SkipBrackets)
+ if (!IsPack)
OS << '<';
bool NeedSpace = false;
@@ -2031,15 +2036,15 @@ static void printTo(raw_ostream &OS, ArrayRef<TA> Args,
if (Argument.getKind() == TemplateArgument::Pack) {
if (Argument.pack_size() && !FirstArg)
OS << Comma;
- printTo(ArgOS, Argument.getPackAsArray(), Policy, true, TPL,
+ printTo(ArgOS, Argument.getPackAsArray(), Policy, TPL,
/*IsPack*/ true, ParmIndex);
} else {
if (!FirstArg)
OS << Comma;
// Tries to print the argument with location info if exists.
- printArgument(
- Arg, Policy, ArgOS,
- TemplateParameterList::shouldIncludeTypeForArgument(TPL, ParmIndex));
+ printArgument(Arg, Policy, ArgOS,
+ TemplateParameterList::shouldIncludeTypeForArgument(
+ Policy, TPL, ParmIndex));
}
StringRef ArgString = ArgOS.str();
@@ -2053,20 +2058,21 @@ static void printTo(raw_ostream &OS, ArrayRef<TA> Args,
// If the last character of our string is '>', add another space to
// keep the two '>''s separate tokens.
- NeedSpace = Policy.SplitTemplateClosers && !ArgString.empty() &&
- ArgString.back() == '>';
- FirstArg = false;
+ if (!ArgString.empty()) {
+ NeedSpace = Policy.SplitTemplateClosers && ArgString.back() == '>';
+ FirstArg = false;
+ }
// Use same template parameter for all elements of Pack
if (!IsPack)
ParmIndex++;
}
- if (NeedSpace)
- OS << ' ';
-
- if (!SkipBrackets)
+ if (!IsPack) {
+ if (NeedSpace)
+ OS << ' ';
OS << '>';
+ }
}
void clang::printTemplateArgumentList(raw_ostream &OS,
@@ -2080,14 +2086,14 @@ void clang::printTemplateArgumentList(raw_ostream &OS,
ArrayRef<TemplateArgument> Args,
const PrintingPolicy &Policy,
const TemplateParameterList *TPL) {
- printTo(OS, Args, Policy, false, TPL, /*isPack*/ false, /*parmIndex*/ 0);
+ printTo(OS, Args, Policy, TPL, /*isPack*/ false, /*parmIndex*/ 0);
}
void clang::printTemplateArgumentList(raw_ostream &OS,
ArrayRef<TemplateArgumentLoc> Args,
const PrintingPolicy &Policy,
const TemplateParameterList *TPL) {
- printTo(OS, Args, Policy, false, TPL, /*isPack*/ false, /*parmIndex*/ 0);
+ printTo(OS, Args, Policy, TPL, /*isPack*/ false, /*parmIndex*/ 0);
}
std::string Qualifiers::getAsString() const {
diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp
index 38d6fc28e098..f938565c3cb4 100644
--- a/clang/lib/AST/VTableBuilder.cpp
+++ b/clang/lib/AST/VTableBuilder.cpp
@@ -1070,7 +1070,7 @@ void ItaniumVTableBuilder::AddThunk(const CXXMethodDecl *MD,
SmallVectorImpl<ThunkInfo> &ThunksVector = Thunks[MD];
// Check if we have this thunk already.
- if (llvm::find(ThunksVector, Thunk) != ThunksVector.end())
+ if (llvm::is_contained(ThunksVector, Thunk))
return;
ThunksVector.push_back(Thunk);
@@ -1418,8 +1418,7 @@ FindNearestOverriddenMethod(const CXXMethodDecl *MD,
OverriddenMethodsSetTy OverriddenMethods;
ComputeAllOverriddenMethods(MD, OverriddenMethods);
- for (const CXXRecordDecl *PrimaryBase :
- llvm::make_range(Bases.rbegin(), Bases.rend())) {
+ for (const CXXRecordDecl *PrimaryBase : llvm::reverse(Bases)) {
// Now check the overridden methods.
for (const CXXMethodDecl *OverriddenMD : OverriddenMethods) {
// We found our overridden method.
@@ -2498,7 +2497,7 @@ private:
SmallVector<ThunkInfo, 1> &ThunksVector = Thunks[MD];
// Check if we have this thunk already.
- if (llvm::find(ThunksVector, Thunk) != ThunksVector.end())
+ if (llvm::is_contained(ThunksVector, Thunk))
return;
ThunksVector.push_back(Thunk);
@@ -3098,8 +3097,7 @@ void VFTableBuilder::AddMethods(BaseSubobject Base, unsigned BaseDepth,
}
static void PrintBasePath(const VPtrInfo::BasePath &Path, raw_ostream &Out) {
- for (const CXXRecordDecl *Elem :
- llvm::make_range(Path.rbegin(), Path.rend())) {
+ for (const CXXRecordDecl *Elem : llvm::reverse(Path)) {
Out << "'";
Elem->printQualifiedName(Out);
Out << "' in ";
@@ -3454,7 +3452,7 @@ static void removeRedundantPaths(std::list<FullPathTy> &FullPaths) {
if (&SpecificPath == &OtherPath)
continue;
if (llvm::all_of(SpecificPath, [&](const BaseSubobject &BSO) {
- return OtherPath.count(BSO) != 0;
+ return OtherPath.contains(BSO);
})) {
return true;
}
diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 5d6cea54b8ec..b19a7fe3be04 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -133,6 +133,8 @@ public:
else if (const TemplateArgumentLoc *TALoc =
DynNode.get<TemplateArgumentLoc>())
traverse(*TALoc);
+ else if (const Attr *A = DynNode.get<Attr>())
+ traverse(*A);
// FIXME: Add other base types after adding tests.
// It's OK to always overwrite the bound nodes, as if there was
@@ -263,6 +265,15 @@ public:
return match(*Node->getLHS()) && match(*Node->getRHS());
}
+ bool TraverseAttr(Attr *A) {
+ if (A == nullptr ||
+ (A->isImplicit() &&
+ Finder->getASTContext().getParentMapContext().getTraversalKind() ==
+ TK_IgnoreUnlessSpelledInSource))
+ return true;
+ ScopedIncrement ScopedDepth(&CurrentDepth);
+ return traverse(*A);
+ }
bool TraverseLambdaExpr(LambdaExpr *Node) {
if (!Finder->isTraversalIgnoringImplicitNodes())
return VisitorBase::TraverseLambdaExpr(Node);
@@ -345,6 +356,9 @@ private:
bool baseTraverse(TemplateArgumentLoc TAL) {
return VisitorBase::TraverseTemplateArgumentLoc(TAL);
}
+ bool baseTraverse(const Attr &AttrNode) {
+ return VisitorBase::TraverseAttr(const_cast<Attr *>(&AttrNode));
+ }
// Sets 'Matched' to true if 'Matcher' matches 'Node' and:
// 0 < CurrentDepth <= MaxDepth.
@@ -489,6 +503,7 @@ public:
bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS);
bool TraverseConstructorInitializer(CXXCtorInitializer *CtorInit);
bool TraverseTemplateArgumentLoc(TemplateArgumentLoc TAL);
+ bool TraverseAttr(Attr *AttrNode);
bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue) {
if (auto *RF = dyn_cast<CXXForRangeStmt>(S)) {
@@ -694,6 +709,8 @@ public:
match(*N);
} else if (auto *N = Node.get<TemplateArgumentLoc>()) {
match(*N);
+ } else if (auto *N = Node.get<Attr>()) {
+ match(*N);
}
}
@@ -894,6 +911,9 @@ private:
void matchDispatch(const TemplateArgumentLoc *Node) {
matchWithoutFilter(*Node, Matchers->TemplateArgumentLoc);
}
+ void matchDispatch(const Attr *Node) {
+ matchWithoutFilter(*Node, Matchers->Attr);
+ }
void matchDispatch(const void *) { /* Do nothing. */ }
/// @}
@@ -1300,6 +1320,11 @@ bool MatchASTVisitor::TraverseTemplateArgumentLoc(TemplateArgumentLoc Loc) {
return RecursiveASTVisitor<MatchASTVisitor>::TraverseTemplateArgumentLoc(Loc);
}
+bool MatchASTVisitor::TraverseAttr(Attr *AttrNode) {
+ match(*AttrNode);
+ return RecursiveASTVisitor<MatchASTVisitor>::TraverseAttr(AttrNode);
+}
+
class MatchASTConsumer : public ASTConsumer {
public:
MatchASTConsumer(MatchFinder *Finder,
@@ -1394,6 +1419,12 @@ void MatchFinder::addMatcher(const TemplateArgumentLocMatcher &NodeMatch,
Matchers.AllCallbacks.insert(Action);
}
+void MatchFinder::addMatcher(const AttrMatcher &AttrMatch,
+ MatchCallback *Action) {
+ Matchers.Attr.emplace_back(AttrMatch, Action);
+ Matchers.AllCallbacks.insert(Action);
+}
+
bool MatchFinder::addDynamicMatcher(const internal::DynTypedMatcher &NodeMatch,
MatchCallback *Action) {
if (NodeMatch.canConvertTo<Decl>()) {
@@ -1420,6 +1451,9 @@ bool MatchFinder::addDynamicMatcher(const internal::DynTypedMatcher &NodeMatch,
} else if (NodeMatch.canConvertTo<TemplateArgumentLoc>()) {
addMatcher(NodeMatch.convertTo<TemplateArgumentLoc>(), Action);
return true;
+ } else if (NodeMatch.canConvertTo<Attr>()) {
+ addMatcher(NodeMatch.convertTo<Attr>(), Action);
+ return true;
}
return false;
}
diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index 169ce3b83980..7680eb38283e 100644
--- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -468,8 +468,8 @@ hasAnyOverloadedOperatorNameFunc(ArrayRef<const StringRef *> NameRefs) {
}
HasNameMatcher::HasNameMatcher(std::vector<std::string> N)
- : UseUnqualifiedMatch(llvm::all_of(
- N, [](StringRef Name) { return Name.find("::") == Name.npos; })),
+ : UseUnqualifiedMatch(
+ llvm::all_of(N, [](StringRef Name) { return !Name.contains("::"); })),
Names(std::move(N)) {
#ifndef NDEBUG
for (StringRef Name : Names)
@@ -768,9 +768,23 @@ const internal::VariadicDynCastAllOfMatcher<Decl, TemplateTypeParmDecl>
const internal::VariadicDynCastAllOfMatcher<Decl, TemplateTemplateParmDecl>
templateTemplateParmDecl;
+const internal::VariadicAllOfMatcher<LambdaCapture> lambdaCapture;
const internal::VariadicAllOfMatcher<QualType> qualType;
const internal::VariadicAllOfMatcher<Type> type;
const internal::VariadicAllOfMatcher<TypeLoc> typeLoc;
+
+const internal::VariadicDynCastAllOfMatcher<TypeLoc, QualifiedTypeLoc>
+ qualifiedTypeLoc;
+const internal::VariadicDynCastAllOfMatcher<TypeLoc, PointerTypeLoc>
+ pointerTypeLoc;
+const internal::VariadicDynCastAllOfMatcher<TypeLoc, ReferenceTypeLoc>
+ referenceTypeLoc;
+const internal::VariadicDynCastAllOfMatcher<TypeLoc,
+ TemplateSpecializationTypeLoc>
+ templateSpecializationTypeLoc;
+const internal::VariadicDynCastAllOfMatcher<TypeLoc, ElaboratedTypeLoc>
+ elaboratedTypeLoc;
+
const internal::VariadicDynCastAllOfMatcher<Stmt, UnaryExprOrTypeTraitExpr>
unaryExprOrTypeTraitExpr;
const internal::VariadicDynCastAllOfMatcher<Decl, ValueDecl> valueDecl;
@@ -1000,19 +1014,20 @@ const internal::ArgumentAdaptingMatcherFunc<internal::ForEachDescendantMatcher>
forEachDescendant = {};
const internal::ArgumentAdaptingMatcherFunc<
internal::HasParentMatcher,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>,
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>>
hasParent = {};
const internal::ArgumentAdaptingMatcherFunc<
internal::HasAncestorMatcher,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
- internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>,
+ internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc, Attr>>
hasAncestor = {};
const internal::VariadicOperatorMatcherFunc<1, 1> unless = {
internal::DynTypedMatcher::VO_UnaryNot};
const internal::VariadicAllOfMatcher<NestedNameSpecifier> nestedNameSpecifier;
const internal::VariadicAllOfMatcher<NestedNameSpecifierLoc>
nestedNameSpecifierLoc;
+const internal::VariadicAllOfMatcher<Attr> attr;
const internal::VariadicDynCastAllOfMatcher<Stmt, CUDAKernelCallExpr>
cudaKernelCallExpr;
const AstTypeMatcher<BuiltinType> builtinType;
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 0048f1133ca2..878547923d27 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -106,7 +106,6 @@ RegistryMaps::RegistryMaps() {
std::make_unique<internal::MapAnyOfBuilderDescriptor>());
REGISTER_OVERLOADED_2(callee);
- REGISTER_OVERLOADED_2(hasAnyCapture);
REGISTER_OVERLOADED_2(hasPrefix);
REGISTER_OVERLOADED_2(hasType);
REGISTER_OVERLOADED_2(ignoringParens);
@@ -142,6 +141,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(asmStmt);
REGISTER_MATCHER(atomicExpr);
REGISTER_MATCHER(atomicType);
+ REGISTER_MATCHER(attr);
REGISTER_MATCHER(autoType);
REGISTER_MATCHER(autoreleasePoolStmt)
REGISTER_MATCHER(binaryConditionalOperator);
@@ -156,6 +156,8 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(builtinType);
REGISTER_MATCHER(cStyleCastExpr);
REGISTER_MATCHER(callExpr);
+ REGISTER_MATCHER(capturesThis);
+ REGISTER_MATCHER(capturesVar);
REGISTER_MATCHER(caseStmt);
REGISTER_MATCHER(castExpr);
REGISTER_MATCHER(characterLiteral);
@@ -225,6 +227,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(doStmt);
REGISTER_MATCHER(eachOf);
REGISTER_MATCHER(elaboratedType);
+ REGISTER_MATCHER(elaboratedTypeLoc);
REGISTER_MATCHER(enumConstantDecl);
REGISTER_MATCHER(enumDecl);
REGISTER_MATCHER(enumType);
@@ -243,6 +246,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(forEachArgumentWithParamType);
REGISTER_MATCHER(forEachConstructorInitializer);
REGISTER_MATCHER(forEachDescendant);
+ REGISTER_MATCHER(forEachLambdaCapture);
REGISTER_MATCHER(forEachOverridden);
REGISTER_MATCHER(forEachSwitchCase);
REGISTER_MATCHER(forField);
@@ -262,6 +266,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(hasAnyBase);
REGISTER_MATCHER(hasAnyBinding);
REGISTER_MATCHER(hasAnyBody);
+ REGISTER_MATCHER(hasAnyCapture);
REGISTER_MATCHER(hasAnyClause);
REGISTER_MATCHER(hasAnyConstructorInitializer);
REGISTER_MATCHER(hasAnyDeclaration);
@@ -273,6 +278,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(hasAnySelector);
REGISTER_MATCHER(hasAnySubstatement);
REGISTER_MATCHER(hasAnyTemplateArgument);
+ REGISTER_MATCHER(hasAnyTemplateArgumentLoc);
REGISTER_MATCHER(hasAnyUsingShadowDecl);
REGISTER_MATCHER(hasArgument);
REGISTER_MATCHER(hasArgumentOfType);
@@ -321,6 +327,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(hasMemberName);
REGISTER_MATCHER(hasMethod);
REGISTER_MATCHER(hasName);
+ REGISTER_MATCHER(hasNamedTypeLoc);
REGISTER_MATCHER(hasNullSelector);
REGISTER_MATCHER(hasObjectExpression);
REGISTER_MATCHER(hasOperands);
@@ -328,12 +335,15 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(hasOverloadedOperatorName);
REGISTER_MATCHER(hasParameter);
REGISTER_MATCHER(hasParent);
+ REGISTER_MATCHER(hasPointeeLoc);
REGISTER_MATCHER(hasQualifier);
REGISTER_MATCHER(hasRHS);
REGISTER_MATCHER(hasRangeInit);
REGISTER_MATCHER(hasReceiver);
REGISTER_MATCHER(hasReceiverType);
+ REGISTER_MATCHER(hasReferentLoc);
REGISTER_MATCHER(hasReplacementType);
+ REGISTER_MATCHER(hasReturnTypeLoc);
REGISTER_MATCHER(hasReturnValue);
REGISTER_MATCHER(hasPlacementArg);
REGISTER_MATCHER(hasSelector);
@@ -347,6 +357,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(hasSyntacticForm);
REGISTER_MATCHER(hasTargetDecl);
REGISTER_MATCHER(hasTemplateArgument);
+ REGISTER_MATCHER(hasTemplateArgumentLoc);
REGISTER_MATCHER(hasThen);
REGISTER_MATCHER(hasThreadStorageDuration);
REGISTER_MATCHER(hasTrailingReturn);
@@ -357,6 +368,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(hasUnderlyingDecl);
REGISTER_MATCHER(hasUnderlyingType);
REGISTER_MATCHER(hasUnqualifiedDesugaredType);
+ REGISTER_MATCHER(hasUnqualifiedLoc);
REGISTER_MATCHER(hasValueType);
REGISTER_MATCHER(ifStmt);
REGISTER_MATCHER(ignoringElidableConstructorCall);
@@ -413,6 +425,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(isImplicit);
REGISTER_MATCHER(isInStdNamespace);
REGISTER_MATCHER(isInTemplateInstantiation);
+ REGISTER_MATCHER(isInitCapture);
REGISTER_MATCHER(isInline);
REGISTER_MATCHER(isInstanceMessage);
REGISTER_MATCHER(isInstanceMethod);
@@ -456,6 +469,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(lValueReferenceType);
REGISTER_MATCHER(labelDecl);
REGISTER_MATCHER(labelStmt);
+ REGISTER_MATCHER(lambdaCapture);
REGISTER_MATCHER(lambdaExpr);
REGISTER_MATCHER(linkageSpecDecl);
REGISTER_MATCHER(materializeTemporaryExpr);
@@ -503,13 +517,16 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(parmVarDecl);
REGISTER_MATCHER(pointee);
REGISTER_MATCHER(pointerType);
+ REGISTER_MATCHER(pointerTypeLoc);
REGISTER_MATCHER(predefinedExpr);
REGISTER_MATCHER(qualType);
+ REGISTER_MATCHER(qualifiedTypeLoc);
REGISTER_MATCHER(rValueReferenceType);
REGISTER_MATCHER(realFloatingPointType);
REGISTER_MATCHER(recordDecl);
REGISTER_MATCHER(recordType);
REGISTER_MATCHER(referenceType);
+ REGISTER_MATCHER(referenceTypeLoc);
REGISTER_MATCHER(refersToDeclaration);
REGISTER_MATCHER(refersToIntegralType);
REGISTER_MATCHER(refersToTemplate);
@@ -537,6 +554,7 @@ RegistryMaps::RegistryMaps() {
REGISTER_MATCHER(templateArgumentLoc);
REGISTER_MATCHER(templateName);
REGISTER_MATCHER(templateSpecializationType);
+ REGISTER_MATCHER(templateSpecializationTypeLoc);
REGISTER_MATCHER(templateTemplateParmDecl);
REGISTER_MATCHER(templateTypeParmDecl);
REGISTER_MATCHER(templateTypeParmType);
diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp
index e357bfb29b82..49ac74c233bd 100644
--- a/clang/lib/Analysis/BodyFarm.cpp
+++ b/clang/lib/Analysis/BodyFarm.cpp
@@ -461,8 +461,7 @@ static Stmt *create_call_once(ASTContext &C, const FunctionDecl *D) {
DerefType);
auto *Out =
- IfStmt::Create(C, SourceLocation(),
- /* IsConstexpr=*/false,
+ IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary,
/* Init=*/nullptr,
/* Var=*/nullptr,
/* Cond=*/FlagCheck,
@@ -547,8 +546,7 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) {
Expr *GuardCondition = M.makeComparison(LValToRval, DoneValue, BO_NE);
// (5) Create the 'if' statement.
- auto *If = IfStmt::Create(C, SourceLocation(),
- /* IsConstexpr=*/false,
+ auto *If = IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary,
/* Init=*/nullptr,
/* Var=*/nullptr,
/* Cond=*/GuardCondition,
@@ -658,8 +656,7 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D)
/// Construct the If.
auto *If =
- IfStmt::Create(C, SourceLocation(),
- /* IsConstexpr=*/false,
+ IfStmt::Create(C, SourceLocation(), IfStatementKind::Ordinary,
/* Init=*/nullptr,
/* Var=*/nullptr, Comparison,
/* LPL=*/SourceLocation(),
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index ba5eceda24b5..abf65e3efce9 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -482,8 +482,10 @@ class CFGBuilder {
CFGBlock *SwitchTerminatedBlock = nullptr;
CFGBlock *DefaultCaseBlock = nullptr;
- // This can point either to a try or a __try block. The frontend forbids
- // mixing both kinds in one function, so having one for both is enough.
+ // This can point to either a C++ try, an Objective-C @try, or an SEH __try.
+ // try and @try can be mixed and generally work the same.
+ // The frontend forbids mixing SEH __try with either try or @try.
+ // So having one for all three is enough.
CFGBlock *TryTerminatedBlock = nullptr;
// Current position in local scope.
@@ -542,6 +544,7 @@ private:
// Visitors to walk an AST and construct the CFG.
CFGBlock *VisitInitListExpr(InitListExpr *ILE, AddStmtChoice asc);
CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc);
+ CFGBlock *VisitAttributedStmt(AttributedStmt *A, AddStmtChoice asc);
CFGBlock *VisitBinaryOperator(BinaryOperator *B, AddStmtChoice asc);
CFGBlock *VisitBreakStmt(BreakStmt *B);
CFGBlock *VisitCallExpr(CallExpr *C, AddStmtChoice asc);
@@ -1798,16 +1801,11 @@ void CFGBuilder::addLifetimeEnds(LocalScope::const_iterator B,
autoCreateBlock();
// object with trivial destructor end their lifetime last (when storage
// duration ends)
- for (SmallVectorImpl<VarDecl *>::reverse_iterator I = DeclsTrivial.rbegin(),
- E = DeclsTrivial.rend();
- I != E; ++I)
- appendLifetimeEnds(Block, *I, S);
+ for (VarDecl *VD : llvm::reverse(DeclsTrivial))
+ appendLifetimeEnds(Block, VD, S);
- for (SmallVectorImpl<VarDecl *>::reverse_iterator
- I = DeclsNonTrivial.rbegin(),
- E = DeclsNonTrivial.rend();
- I != E; ++I)
- appendLifetimeEnds(Block, *I, S);
+ for (VarDecl *VD : llvm::reverse(DeclsNonTrivial))
+ appendLifetimeEnds(Block, VD, S);
}
/// Add to current block markers for ending scopes.
@@ -1820,9 +1818,8 @@ void CFGBuilder::addScopesEnd(LocalScope::const_iterator B,
autoCreateBlock();
- for (auto I = DeclsWithEndedScope.rbegin(), E = DeclsWithEndedScope.rend();
- I != E; ++I)
- appendScopeEnd(Block, *I, S);
+ for (VarDecl *VD : llvm::reverse(DeclsWithEndedScope))
+ appendScopeEnd(Block, VD, S);
return;
}
@@ -1847,24 +1844,22 @@ void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B,
for (LocalScope::const_iterator I = B; I != E; ++I)
Decls.push_back(*I);
- for (SmallVectorImpl<VarDecl*>::reverse_iterator I = Decls.rbegin(),
- E = Decls.rend();
- I != E; ++I) {
- if (hasTrivialDestructor(*I)) {
+ for (VarDecl *VD : llvm::reverse(Decls)) {
+ if (hasTrivialDestructor(VD)) {
// If AddScopes is enabled and *I is a first variable in a scope, add a
// ScopeEnd marker in a Block.
- if (BuildOpts.AddScopes && DeclsWithEndedScope.count(*I)) {
+ if (BuildOpts.AddScopes && DeclsWithEndedScope.count(VD)) {
autoCreateBlock();
- appendScopeEnd(Block, *I, S);
+ appendScopeEnd(Block, VD, S);
}
continue;
}
// If this destructor is marked as a no-return destructor, we need to
// create a new block for the destructor which does not have as a successor
// anything built thus far: control won't flow out of this block.
- QualType Ty = (*I)->getType();
+ QualType Ty = VD->getType();
if (Ty->isReferenceType()) {
- Ty = getReferenceInitTemporaryType((*I)->getInit());
+ Ty = getReferenceInitTemporaryType(VD->getInit());
}
Ty = Context->getBaseElementType(Ty);
@@ -1874,9 +1869,9 @@ void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B,
autoCreateBlock();
// Add ScopeEnd just after automatic obj destructor.
- if (BuildOpts.AddScopes && DeclsWithEndedScope.count(*I))
- appendScopeEnd(Block, *I, S);
- appendAutomaticObjDtor(Block, *I, S);
+ if (BuildOpts.AddScopes && DeclsWithEndedScope.count(VD))
+ appendScopeEnd(Block, VD, S);
+ appendAutomaticObjDtor(Block, VD, S);
}
}
@@ -2149,6 +2144,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc,
case Stmt::InitListExprClass:
return VisitInitListExpr(cast<InitListExpr>(S), asc);
+ case Stmt::AttributedStmtClass:
+ return VisitAttributedStmt(cast<AttributedStmt>(S), asc);
+
case Stmt::AddrLabelExprClass:
return VisitAddrLabelExpr(cast<AddrLabelExpr>(S), asc);
@@ -2282,7 +2280,7 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc,
return VisitObjCAtCatchStmt(cast<ObjCAtCatchStmt>(S));
case Stmt::ObjCAutoreleasePoolStmtClass:
- return VisitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(S));
+ return VisitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(S));
case Stmt::ObjCAtSynchronizedStmtClass:
return VisitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(S));
@@ -2398,8 +2396,32 @@ CFGBlock *CFGBuilder::VisitAddrLabelExpr(AddrLabelExpr *A,
return Block;
}
-CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U,
- AddStmtChoice asc) {
+static bool isFallthroughStatement(const AttributedStmt *A) {
+ bool isFallthrough = hasSpecificAttr<FallThroughAttr>(A->getAttrs());
+ assert((!isFallthrough || isa<NullStmt>(A->getSubStmt())) &&
+ "expected fallthrough not to have children");
+ return isFallthrough;
+}
+
+CFGBlock *CFGBuilder::VisitAttributedStmt(AttributedStmt *A,
+ AddStmtChoice asc) {
+ // AttributedStmts for [[likely]] can have arbitrary statements as children,
+ // and the current visitation order here would add the AttributedStmts
+ // for [[likely]] after the child nodes, which is undesirable: For example,
+ // if the child contains an unconditional return, the [[likely]] would be
+ // considered unreachable.
+ // So only add the AttributedStmt for FallThrough, which has CFG effects and
+ // also no children, and omit the others. None of the other current StmtAttrs
+ // have semantic meaning for the CFG.
+ if (isFallthroughStatement(A) && asc.alwaysAdd(*this, A)) {
+ autoCreateBlock();
+ appendStmt(Block, A);
+ }
+
+ return VisitChildren(A);
+}
+
+CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc) {
if (asc.alwaysAdd(*this, U)) {
autoCreateBlock();
appendStmt(Block, U);
@@ -2711,7 +2733,8 @@ CFGBlock *CFGBuilder::VisitChooseExpr(ChooseExpr *C,
return addStmt(C->getCond());
}
-CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C, bool ExternallyDestructed) {
+CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C,
+ bool ExternallyDestructed) {
LocalScope::const_iterator scopeBeginPos = ScopePos;
addLocalScopeForStmt(C);
@@ -2723,11 +2746,10 @@ CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C, bool ExternallyDestruct
CFGBlock *LastBlock = Block;
- for (CompoundStmt::reverse_body_iterator I=C->body_rbegin(), E=C->body_rend();
- I != E; ++I ) {
+ for (Stmt *S : llvm::reverse(C->body())) {
// If we hit a segment of code just containing ';' (NullStmts), we can
// get a null block back. In such cases, just use the LastBlock
- CFGBlock *newBlock = Visit(*I, AddStmtChoice::AlwaysAdd,
+ CFGBlock *newBlock = Visit(S, AddStmtChoice::AlwaysAdd,
ExternallyDestructed);
if (newBlock)
@@ -3047,7 +3069,7 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) {
// control-flow transfer of '&&' or '||' go directly into the then/else
// blocks directly.
BinaryOperator *Cond =
- I->getConditionVariable()
+ (I->isConsteval() || I->getConditionVariable())
? nullptr
: dyn_cast<BinaryOperator>(I->getCond()->IgnoreParens());
CFGBlock *LastBlock;
@@ -3061,7 +3083,9 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) {
Block->setTerminator(I);
// See if this is a known constant.
- const TryResult &KnownVal = tryEvaluateBool(I->getCond());
+ TryResult KnownVal;
+ if (!I->isConsteval())
+ KnownVal = tryEvaluateBool(I->getCond());
// Add the successors. If we know that specific branches are
// unreachable, inform addSuccessor() of that knowledge.
@@ -3122,9 +3146,9 @@ CFGBlock *CFGBuilder::VisitReturnStmt(Stmt *S) {
if (Expr *O = RS->getRetValue())
return Visit(O, AddStmtChoice::AlwaysAdd, /*ExternallyDestructed=*/true);
return Block;
- } else { // co_return
- return VisitChildren(S);
}
+ // co_return
+ return VisitChildren(S);
}
CFGBlock *CFGBuilder::VisitSEHExceptStmt(SEHExceptStmt *ES) {
@@ -3223,8 +3247,7 @@ CFGBlock *CFGBuilder::VisitSEHTryStmt(SEHTryStmt *Terminator) {
Succ = SEHTrySuccessor;
// Save the current "__try" context.
- SaveAndRestore<CFGBlock *> save_try(TryTerminatedBlock,
- NewTryTerminatedBlock);
+ SaveAndRestore<CFGBlock *> SaveTry(TryTerminatedBlock, NewTryTerminatedBlock);
cfg->addTryDispatchBlock(TryTerminatedBlock);
// Save the current value for the __leave target.
@@ -3258,7 +3281,7 @@ CFGBlock *CFGBuilder::VisitLabelStmt(LabelStmt *L) {
if (badCFG)
return nullptr;
- // We set Block to NULL to allow lazy creation of a new block (if necessary);
+ // We set Block to NULL to allow lazy creation of a new block (if necessary).
Block = nullptr;
// This block is now the implicit successor of other blocks.
@@ -3670,11 +3693,6 @@ CFGBlock *CFGBuilder::VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S) {
return addStmt(S->getSynchExpr());
}
-CFGBlock *CFGBuilder::VisitObjCAtTryStmt(ObjCAtTryStmt *S) {
- // FIXME
- return NYS();
-}
-
CFGBlock *CFGBuilder::VisitPseudoObjectExpr(PseudoObjectExpr *E) {
autoCreateBlock();
@@ -3835,16 +3853,37 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) {
return EntryConditionBlock;
}
-CFGBlock *CFGBuilder::VisitObjCAtCatchStmt(ObjCAtCatchStmt *S) {
- // FIXME: For now we pretend that @catch and the code it contains does not
- // exit.
- return Block;
+CFGBlock *CFGBuilder::VisitObjCAtCatchStmt(ObjCAtCatchStmt *CS) {
+ // ObjCAtCatchStmt are treated like labels, so they are the first statement
+ // in a block.
+
+ // Save local scope position because in case of exception variable ScopePos
+ // won't be restored when traversing AST.
+ SaveAndRestore<LocalScope::const_iterator> save_scope_pos(ScopePos);
+
+ if (CS->getCatchBody())
+ addStmt(CS->getCatchBody());
+
+ CFGBlock *CatchBlock = Block;
+ if (!CatchBlock)
+ CatchBlock = createBlock();
+
+ appendStmt(CatchBlock, CS);
+
+ // Also add the ObjCAtCatchStmt as a label, like with regular labels.
+ CatchBlock->setLabel(CS);
+
+ // Bail out if the CFG is bad.
+ if (badCFG)
+ return nullptr;
+
+ // We set Block to NULL to allow lazy creation of a new block (if necessary).
+ Block = nullptr;
+
+ return CatchBlock;
}
CFGBlock *CFGBuilder::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) {
- // FIXME: This isn't complete. We basically treat @throw like a return
- // statement.
-
// If we were in the middle of a block we stop processing that block.
if (badCFG)
return nullptr;
@@ -3852,14 +3891,77 @@ CFGBlock *CFGBuilder::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) {
// Create the new block.
Block = createBlock(false);
- // The Exit block is the only successor.
- addSuccessor(Block, &cfg->getExit());
+ if (TryTerminatedBlock)
+ // The current try statement is the only successor.
+ addSuccessor(Block, TryTerminatedBlock);
+ else
+ // otherwise the Exit block is the only successor.
+ addSuccessor(Block, &cfg->getExit());
// Add the statement to the block. This may create new blocks if S contains
// control-flow (short-circuit operations).
return VisitStmt(S, AddStmtChoice::AlwaysAdd);
}
+CFGBlock *CFGBuilder::VisitObjCAtTryStmt(ObjCAtTryStmt *Terminator) {
+ // "@try"/"@catch" is a control-flow statement. Thus we stop processing the
+ // current block.
+ CFGBlock *TrySuccessor = nullptr;
+
+ if (Block) {
+ if (badCFG)
+ return nullptr;
+ TrySuccessor = Block;
+ } else
+ TrySuccessor = Succ;
+
+ // FIXME: Implement @finally support.
+ if (Terminator->getFinallyStmt())
+ return NYS();
+
+ CFGBlock *PrevTryTerminatedBlock = TryTerminatedBlock;
+
+ // Create a new block that will contain the try statement.
+ CFGBlock *NewTryTerminatedBlock = createBlock(false);
+ // Add the terminator in the try block.
+ NewTryTerminatedBlock->setTerminator(Terminator);
+
+ bool HasCatchAll = false;
+ for (ObjCAtCatchStmt *CS : Terminator->catch_stmts()) {
+ // The code after the try is the implicit successor.
+ Succ = TrySuccessor;
+ if (CS->hasEllipsis()) {
+ HasCatchAll = true;
+ }
+ Block = nullptr;
+ CFGBlock *CatchBlock = VisitObjCAtCatchStmt(CS);
+ if (!CatchBlock)
+ return nullptr;
+ // Add this block to the list of successors for the block with the try
+ // statement.
+ addSuccessor(NewTryTerminatedBlock, CatchBlock);
+ }
+
+ // FIXME: This needs updating when @finally support is added.
+ if (!HasCatchAll) {
+ if (PrevTryTerminatedBlock)
+ addSuccessor(NewTryTerminatedBlock, PrevTryTerminatedBlock);
+ else
+ addSuccessor(NewTryTerminatedBlock, &cfg->getExit());
+ }
+
+ // The code after the try is the implicit successor.
+ Succ = TrySuccessor;
+
+ // Save the current "try" context.
+ SaveAndRestore<CFGBlock *> SaveTry(TryTerminatedBlock, NewTryTerminatedBlock);
+ cfg->addTryDispatchBlock(TryTerminatedBlock);
+
+ assert(Terminator->getTryBody() && "try must contain a non-NULL body");
+ Block = nullptr;
+ return addStmt(Terminator->getTryBody());
+}
+
CFGBlock *CFGBuilder::VisitObjCMessageExpr(ObjCMessageExpr *ME,
AddStmtChoice asc) {
findConstructionContextsForArguments(ME);
@@ -4244,7 +4346,7 @@ CFGBlock *CFGBuilder::VisitCaseStmt(CaseStmt *CS) {
shouldAddCase(switchExclusivelyCovered, switchCond,
CS, *Context));
- // We set Block to NULL to allow lazy creation of a new block (if necessary)
+ // We set Block to NULL to allow lazy creation of a new block (if necessary).
Block = nullptr;
if (TopBlock) {
@@ -4280,7 +4382,7 @@ CFGBlock *CFGBuilder::VisitDefaultStmt(DefaultStmt *Terminator) {
// (including a fall-through to the code after the switch statement) to always
// be the last successor of a switch-terminated block.
- // We set Block to NULL to allow lazy creation of a new block (if necessary)
+ // We set Block to NULL to allow lazy creation of a new block (if necessary).
Block = nullptr;
// This block is now the implicit successor of other blocks.
@@ -4298,7 +4400,8 @@ CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) {
if (badCFG)
return nullptr;
TrySuccessor = Block;
- } else TrySuccessor = Succ;
+ } else
+ TrySuccessor = Succ;
CFGBlock *PrevTryTerminatedBlock = TryTerminatedBlock;
@@ -4308,10 +4411,10 @@ CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) {
NewTryTerminatedBlock->setTerminator(Terminator);
bool HasCatchAll = false;
- for (unsigned h = 0; h <Terminator->getNumHandlers(); ++h) {
+ for (unsigned I = 0, E = Terminator->getNumHandlers(); I != E; ++I) {
// The code after the try is the implicit successor.
Succ = TrySuccessor;
- CXXCatchStmt *CS = Terminator->getHandler(h);
+ CXXCatchStmt *CS = Terminator->getHandler(I);
if (CS->getExceptionDecl() == nullptr) {
HasCatchAll = true;
}
@@ -4334,7 +4437,7 @@ CFGBlock *CFGBuilder::VisitCXXTryStmt(CXXTryStmt *Terminator) {
Succ = TrySuccessor;
// Save the current "try" context.
- SaveAndRestore<CFGBlock*> save_try(TryTerminatedBlock, NewTryTerminatedBlock);
+ SaveAndRestore<CFGBlock *> SaveTry(TryTerminatedBlock, NewTryTerminatedBlock);
cfg->addTryDispatchBlock(TryTerminatedBlock);
assert(Terminator->getTryBlock() && "try must contain a non-NULL body");
@@ -4379,7 +4482,7 @@ CFGBlock *CFGBuilder::VisitCXXCatchStmt(CXXCatchStmt *CS) {
if (badCFG)
return nullptr;
- // We set Block to NULL to allow lazy creation of a new block (if necessary)
+ // We set Block to NULL to allow lazy creation of a new block (if necessary).
Block = nullptr;
return CatchBlock;
@@ -5287,13 +5390,11 @@ public:
Terminator->getCond()->printPretty(OS, Helper, Policy);
}
- void VisitCXXTryStmt(CXXTryStmt *CS) {
- OS << "try ...";
- }
+ void VisitCXXTryStmt(CXXTryStmt *) { OS << "try ..."; }
- void VisitSEHTryStmt(SEHTryStmt *CS) {
- OS << "__try ...";
- }
+ void VisitObjCAtTryStmt(ObjCAtTryStmt *) { OS << "@try ..."; }
+
+ void VisitSEHTryStmt(SEHTryStmt *CS) { OS << "__try ..."; }
void VisitAbstractConditionalOperator(AbstractConditionalOperator* C) {
if (Stmt *Cond = C->getCond())
@@ -5609,7 +5710,8 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper,
}
case CFGElement::Kind::TemporaryDtor: {
- const CXXBindTemporaryExpr *BT = E.castAs<CFGTemporaryDtor>().getBindTemporaryExpr();
+ const CXXBindTemporaryExpr *BT =
+ E.castAs<CFGTemporaryDtor>().getBindTemporaryExpr();
OS << "~";
BT->getType().print(OS, PrintingPolicy(Helper.getLangOpts()));
OS << "() (Temporary object destructor)\n";
@@ -5653,21 +5755,25 @@ static void print_block(raw_ostream &OS, const CFG* cfg,
OS << L->getName();
else if (CaseStmt *C = dyn_cast<CaseStmt>(Label)) {
OS << "case ";
- if (C->getLHS())
- C->getLHS()->printPretty(OS, &Helper,
- PrintingPolicy(Helper.getLangOpts()));
- if (C->getRHS()) {
+ if (const Expr *LHS = C->getLHS())
+ LHS->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts()));
+ if (const Expr *RHS = C->getRHS()) {
OS << " ... ";
- C->getRHS()->printPretty(OS, &Helper,
- PrintingPolicy(Helper.getLangOpts()));
+ RHS->printPretty(OS, &Helper, PrintingPolicy(Helper.getLangOpts()));
}
} else if (isa<DefaultStmt>(Label))
OS << "default";
else if (CXXCatchStmt *CS = dyn_cast<CXXCatchStmt>(Label)) {
OS << "catch (";
- if (CS->getExceptionDecl())
- CS->getExceptionDecl()->print(OS, PrintingPolicy(Helper.getLangOpts()),
- 0);
+ if (const VarDecl *ED = CS->getExceptionDecl())
+ ED->print(OS, PrintingPolicy(Helper.getLangOpts()), 0);
+ else
+ OS << "...";
+ OS << ")";
+ } else if (ObjCAtCatchStmt *CS = dyn_cast<ObjCAtCatchStmt>(Label)) {
+ OS << "@catch (";
+ if (const VarDecl *PD = CS->getCatchParamDecl())
+ PD->print(OS, PrintingPolicy(Helper.getLangOpts()), 0);
else
OS << "...";
OS << ")";
@@ -5882,7 +5988,7 @@ static bool isImmediateSinkBlock(const CFGBlock *Blk) {
// at least for now, but once we have better support for exceptions,
// we'd need to carefully handle the case when the throw is being
// immediately caught.
- if (std::any_of(Blk->begin(), Blk->end(), [](const CFGElement &Elm) {
+ if (llvm::any_of(*Blk, [](const CFGElement &Elm) {
if (Optional<CFGStmt> StmtElm = Elm.getAs<CFGStmt>())
if (isa<CXXThrowExpr>(StmtElm->getStmt()))
return true;
diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp
index 0a1122bd5a4a..65ac4ad6a5e5 100644
--- a/clang/lib/Analysis/CloneDetection.cpp
+++ b/clang/lib/Analysis/CloneDetection.cpp
@@ -147,9 +147,8 @@ void OnlyLargestCloneConstraint::constrain(
// Erasing a list of indexes from the vector should be done with decreasing
// indexes. As IndexesToRemove is constructed with increasing values, we just
// reverse iterate over it to get the desired order.
- for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) {
- Result.erase(Result.begin() + *I);
- }
+ for (unsigned I : llvm::reverse(IndexesToRemove))
+ Result.erase(Result.begin() + I);
}
bool FilenamePatternConstraint::isAutoGenerated(
diff --git a/clang/lib/Analysis/ObjCNoReturn.cpp b/clang/lib/Analysis/ObjCNoReturn.cpp
index fe1edb496859..9d7c365c3b99 100644
--- a/clang/lib/Analysis/ObjCNoReturn.cpp
+++ b/clang/lib/Analysis/ObjCNoReturn.cpp
@@ -54,12 +54,9 @@ bool ObjCNoReturn::isImplicitNoReturn(const ObjCMessageExpr *ME) {
}
if (const ObjCInterfaceDecl *ID = ME->getReceiverInterface()) {
- if (isSubclass(ID, NSExceptionII)) {
- for (unsigned i = 0; i < NUM_RAISE_SELECTORS; ++i) {
- if (S == NSExceptionInstanceRaiseSelectors[i])
- return true;
- }
- }
+ if (isSubclass(ID, NSExceptionII) &&
+ llvm::is_contained(NSExceptionInstanceRaiseSelectors, S))
+ return true;
}
return false;
diff --git a/clang/lib/Analysis/ReachableCode.cpp b/clang/lib/Analysis/ReachableCode.cpp
index 221d137dadb8..5be8180113da 100644
--- a/clang/lib/Analysis/ReachableCode.cpp
+++ b/clang/lib/Analysis/ReachableCode.cpp
@@ -87,10 +87,8 @@ static bool isDeadReturn(const CFGBlock *B, const Stmt *S) {
// block, or may be in a subsequent block because of destructors.
const CFGBlock *Current = B;
while (true) {
- for (CFGBlock::const_reverse_iterator I = Current->rbegin(),
- E = Current->rend();
- I != E; ++I) {
- if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
+ for (const CFGElement &CE : llvm::reverse(*Current)) {
+ if (Optional<CFGStmt> CS = CE.getAs<CFGStmt>()) {
if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) {
if (RS == S)
return true;
@@ -227,7 +225,8 @@ static bool isConfigurationValue(const Stmt *S,
if (IncludeIntegers) {
if (SilenceableCondVal && !SilenceableCondVal->getBegin().isValid())
*SilenceableCondVal = E->getSourceRange();
- return WrappedInParens || isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO);
+ return WrappedInParens ||
+ isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO);
}
return false;
}
@@ -530,12 +529,11 @@ unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start,
// earliest location.
if (!DeferredLocs.empty()) {
llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp);
- for (DeferredLocsTy::iterator I = DeferredLocs.begin(),
- E = DeferredLocs.end(); I != E; ++I) {
- const CFGBlock *Block = I->first;
+ for (const auto &I : DeferredLocs) {
+ const CFGBlock *Block = I.first;
if (Reachable[Block->getBlockID()])
continue;
- reportDeadCode(Block, I->second, CB);
+ reportDeadCode(Block, I.second, CB);
count += scanMaybeReachableFromBlock(Block, PP, Reachable);
}
}
@@ -694,18 +692,15 @@ void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP,
// If there aren't explicit EH edges, we should include the 'try' dispatch
// blocks as roots.
if (!AC.getCFGBuildOptions().AddEHEdges) {
- for (CFG::try_block_iterator I = cfg->try_blocks_begin(),
- E = cfg->try_blocks_end() ; I != E; ++I) {
- numReachable += scanMaybeReachableFromBlock(*I, PP, reachable);
- }
+ for (const CFGBlock *B : cfg->try_blocks())
+ numReachable += scanMaybeReachableFromBlock(B, PP, reachable);
if (numReachable == cfg->getNumBlockIDs())
return;
}
// There are some unreachable blocks. We need to find the root blocks that
// contain code that should be considered unreachable.
- for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) {
- const CFGBlock *block = *I;
+ for (const CFGBlock *block : *cfg) {
// A block may have been marked reachable during this loop.
if (reachable[block->getBlockID()])
continue;
diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp
index 7ed1e40333f4..1d7b968e994f 100644
--- a/clang/lib/Analysis/RetainSummaryManager.cpp
+++ b/clang/lib/Analysis/RetainSummaryManager.cpp
@@ -397,8 +397,7 @@ const RetainSummary *RetainSummaryManager::getSummaryForObjCOrCFObject(
ArgEffect(DoNothing), ArgEffect(DoNothing));
} else if (FName.startswith("NSLog")) {
return getDoNothingSummary();
- } else if (FName.startswith("NS") &&
- (FName.find("Insert") != StringRef::npos)) {
+ } else if (FName.startswith("NS") && FName.contains("Insert")) {
// Whitelist NSXXInsertXX, for example NSMapInsertIfAbsent, since they can
// be deallocated by NSMapRemove. (radar://11152419)
ScratchArgs = AF.add(ScratchArgs, 1, ArgEffect(StopTracking));
diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp
index 5b2c882c4235..b196ffa73cbf 100644
--- a/clang/lib/Analysis/ThreadSafety.cpp
+++ b/clang/lib/Analysis/ThreadSafety.cpp
@@ -86,11 +86,9 @@ class CapExprSet : public SmallVector<CapabilityExpr, 4> {
public:
/// Push M onto list, but discard duplicates.
void push_back_nodup(const CapabilityExpr &CapE) {
- iterator It = std::find_if(begin(), end(),
- [=](const CapabilityExpr &CapE2) {
- return CapE.equals(CapE2);
- });
- if (It == end())
+ if (llvm::none_of(*this, [=](const CapabilityExpr &CapE2) {
+ return CapE.equals(CapE2);
+ }))
push_back(CapE);
}
};
@@ -849,6 +847,11 @@ static void findBlockLocations(CFG *CFGraph,
// location.
CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc =
BlockInfo[(*CurrBlock->pred_begin())->getBlockID()].ExitLoc;
+ } else if (CurrBlock->succ_size() == 1 && *CurrBlock->succ_begin()) {
+ // The block is empty, and has a single successor. Use its entry
+ // location.
+ CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc =
+ BlockInfo[(*CurrBlock->succ_begin())->getBlockID()].EntryLoc;
}
}
}
@@ -1050,7 +1053,7 @@ public:
const CFGBlock* PredBlock,
const CFGBlock *CurrBlock);
- bool join(const FactEntry &a, const FactEntry &b);
+ bool join(const FactEntry &a, const FactEntry &b, bool CanModify);
void intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet,
SourceLocation JoinLoc, LockErrorKind EntryLEK,
@@ -2188,25 +2191,28 @@ void BuildLockset::VisitDeclStmt(const DeclStmt *S) {
}
}
-/// Given two facts merging on a join point, decide whether to warn and which
-/// one to keep.
+/// Given two facts merging on a join point, possibly warn and decide whether to
+/// keep or replace.
///
-/// \return false if we should keep \p A, true if we should keep \p B.
-bool ThreadSafetyAnalyzer::join(const FactEntry &A, const FactEntry &B) {
+/// \param CanModify Whether we can replace \p A by \p B.
+/// \return false if we should keep \p A, true if we should take \p B.
+bool ThreadSafetyAnalyzer::join(const FactEntry &A, const FactEntry &B,
+ bool CanModify) {
if (A.kind() != B.kind()) {
// For managed capabilities, the destructor should unlock in the right mode
// anyway. For asserted capabilities no unlocking is needed.
if ((A.managed() || A.asserted()) && (B.managed() || B.asserted())) {
- // The shared capability subsumes the exclusive capability.
- return B.kind() == LK_Shared;
- } else {
- Handler.handleExclusiveAndShared("mutex", B.toString(), B.loc(), A.loc());
- // Take the exclusive capability to reduce further warnings.
- return B.kind() == LK_Exclusive;
+ // The shared capability subsumes the exclusive capability, if possible.
+ bool ShouldTakeB = B.kind() == LK_Shared;
+ if (CanModify || !ShouldTakeB)
+ return ShouldTakeB;
}
+ Handler.handleExclusiveAndShared("mutex", B.toString(), B.loc(), A.loc());
+ // Take the exclusive capability to reduce further warnings.
+ return CanModify && B.kind() == LK_Exclusive;
} else {
// The non-asserted capability is the one we want to track.
- return A.asserted() && !B.asserted();
+ return CanModify && A.asserted() && !B.asserted();
}
}
@@ -2237,8 +2243,8 @@ void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet,
FactSet::iterator EntryIt = EntrySet.findLockIter(FactMan, ExitFact);
if (EntryIt != EntrySet.end()) {
- if (join(FactMan[*EntryIt], ExitFact) &&
- EntryLEK == LEK_LockedSomePredecessors)
+ if (join(FactMan[*EntryIt], ExitFact,
+ EntryLEK != LEK_LockedSomeLoopIterations))
*EntryIt = Fact;
} else if (!ExitFact.managed()) {
ExitFact.handleRemovalFromIntersection(ExitSet, FactMan, JoinLoc,
@@ -2412,7 +2418,6 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) {
// union because the real error is probably that we forgot to unlock M on
// all code paths.
bool LocksetInitialized = false;
- SmallVector<CFGBlock *, 8> SpecialBlocks;
for (CFGBlock::const_pred_iterator PI = CurrBlock->pred_begin(),
PE = CurrBlock->pred_end(); PI != PE; ++PI) {
// if *PI -> CurrBlock is a back edge
@@ -2429,17 +2434,6 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) {
// Okay, we can reach this block from the entry.
CurrBlockInfo->Reachable = true;
- // If the previous block ended in a 'continue' or 'break' statement, then
- // a difference in locksets is probably due to a bug in that block, rather
- // than in some other predecessor. In that case, keep the other
- // predecessor's lockset.
- if (const Stmt *Terminator = (*PI)->getTerminatorStmt()) {
- if (isa<ContinueStmt>(Terminator) || isa<BreakStmt>(Terminator)) {
- SpecialBlocks.push_back(*PI);
- continue;
- }
- }
-
FactSet PrevLockset;
getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet, *PI, CurrBlock);
@@ -2447,9 +2441,14 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) {
CurrBlockInfo->EntrySet = PrevLockset;
LocksetInitialized = true;
} else {
- intersectAndWarn(CurrBlockInfo->EntrySet, PrevLockset,
- CurrBlockInfo->EntryLoc,
- LEK_LockedSomePredecessors);
+ // Surprisingly 'continue' doesn't always produce back edges, because
+ // the CFG has empty "transition" blocks where they meet with the end
+ // of the regular loop body. We still want to diagnose them as loop.
+ intersectAndWarn(
+ CurrBlockInfo->EntrySet, PrevLockset, CurrBlockInfo->EntryLoc,
+ isa_and_nonnull<ContinueStmt>((*PI)->getTerminatorStmt())
+ ? LEK_LockedSomeLoopIterations
+ : LEK_LockedSomePredecessors);
}
}
@@ -2457,35 +2456,6 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) {
if (!CurrBlockInfo->Reachable)
continue;
- // Process continue and break blocks. Assume that the lockset for the
- // resulting block is unaffected by any discrepancies in them.
- for (const auto *PrevBlock : SpecialBlocks) {
- unsigned PrevBlockID = PrevBlock->getBlockID();
- CFGBlockInfo *PrevBlockInfo = &BlockInfo[PrevBlockID];
-
- if (!LocksetInitialized) {
- CurrBlockInfo->EntrySet = PrevBlockInfo->ExitSet;
- LocksetInitialized = true;
- } else {
- // Determine whether this edge is a loop terminator for diagnostic
- // purposes. FIXME: A 'break' statement might be a loop terminator, but
- // it might also be part of a switch. Also, a subsequent destructor
- // might add to the lockset, in which case the real issue might be a
- // double lock on the other path.
- const Stmt *Terminator = PrevBlock->getTerminatorStmt();
- bool IsLoop = Terminator && isa<ContinueStmt>(Terminator);
-
- FactSet PrevLockset;
- getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet,
- PrevBlock, CurrBlock);
-
- // Do not update EntrySet.
- intersectAndWarn(
- CurrBlockInfo->EntrySet, PrevLockset, PrevBlockInfo->ExitLoc,
- IsLoop ? LEK_LockedSomeLoopIterations : LEK_LockedSomePredecessors);
- }
- }
-
BuildLockset LocksetBuilder(this, *CurrBlockInfo);
// Visit all the statements in the basic block.
diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
index 7118aa9dc210..2b0f4071662c 100644
--- a/clang/lib/Basic/Builtins.cpp
+++ b/clang/lib/Basic/Builtins.cpp
@@ -72,7 +72,7 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo,
bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 &&
(BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG;
bool OclC2Unsupported =
- (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) &&
+ (LangOpts.getOpenCLCompatibleVersion() != 200) &&
(BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG;
bool OclCUnsupported = !LangOpts.OpenCL &&
(BuiltinInfo.Langs & ALL_OCLC_LANGUAGES);
diff --git a/clang/lib/Basic/CLWarnings.cpp b/clang/lib/Basic/CLWarnings.cpp
new file mode 100644
index 000000000000..0cf367d9f7f6
--- /dev/null
+++ b/clang/lib/Basic/CLWarnings.cpp
@@ -0,0 +1,28 @@
+//===--- CLWarnings.h - Maps some cl.exe warning ids -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Diagnostic-related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/CLWarnings.h"
+#include "clang/Basic/DiagnosticCategories.h"
+
+using namespace clang;
+
+llvm::Optional<diag::Group>
+clang::diagGroupFromCLWarningID(unsigned CLWarningID) {
+ switch (CLWarningID) {
+ case 4005: return diag::Group::MacroRedefined;
+ case 4018: return diag::Group::SignCompare;
+ case 4100: return diag::Group::UnusedParameter;
+ case 4910: return diag::Group::DllexportExplicitInstantiationDecl;
+ case 4996: return diag::Group::DeprecatedDeclarations;
+ }
+ return {};
+}
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 766135bcb376..e82a3a705e70 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -36,6 +36,14 @@ const char *CudaVersionToString(CudaVersion V) {
return "11.1";
case CudaVersion::CUDA_112:
return "11.2";
+ case CudaVersion::CUDA_113:
+ return "11.3";
+ case CudaVersion::CUDA_114:
+ return "11.4";
+ case CudaVersion::CUDA_115:
+ return "11.5";
+ case CudaVersion::NEW:
+ return "";
}
llvm_unreachable("invalid enum");
}
@@ -54,6 +62,9 @@ CudaVersion CudaStringToVersion(const llvm::Twine &S) {
.Case("11.0", CudaVersion::CUDA_110)
.Case("11.1", CudaVersion::CUDA_111)
.Case("11.2", CudaVersion::CUDA_112)
+ .Case("11.3", CudaVersion::CUDA_113)
+ .Case("11.4", CudaVersion::CUDA_114)
+ .Case("11.5", CudaVersion::CUDA_115)
.Default(CudaVersion::UNKNOWN);
}
@@ -186,7 +197,7 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
CudaVersion MaxVersionForCudaArch(CudaArch A) {
// AMD GPUs do not depend on CUDA versions.
if (IsAMDGpuArch(A))
- return CudaVersion::LATEST;
+ return CudaVersion::NEW;
switch (A) {
case CudaArch::UNKNOWN:
@@ -194,8 +205,10 @@ CudaVersion MaxVersionForCudaArch(CudaArch A) {
case CudaArch::SM_20:
case CudaArch::SM_21:
return CudaVersion::CUDA_80;
+ case CudaArch::SM_30:
+ return CudaVersion::CUDA_110;
default:
- return CudaVersion::LATEST;
+ return CudaVersion::NEW;
}
}
@@ -227,6 +240,12 @@ CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
return CudaVersion::CUDA_111;
case 112:
return CudaVersion::CUDA_112;
+ case 113:
+ return CudaVersion::CUDA_113;
+ case 114:
+ return CudaVersion::CUDA_114;
+ case 115:
+ return CudaVersion::CUDA_115;
default:
return CudaVersion::UNKNOWN;
}
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index d3b2122e9c59..9b7ad96b949f 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -408,6 +408,14 @@ bool DiagnosticsEngine::setSeverityForGroup(diag::Flavor Flavor,
return false;
}
+bool DiagnosticsEngine::setSeverityForGroup(diag::Flavor Flavor,
+ diag::Group Group,
+ diag::Severity Map,
+ SourceLocation Loc) {
+ return setSeverityForGroup(Flavor, Diags->getWarningOptionForGroup(Group),
+ Map, Loc);
+}
+
bool DiagnosticsEngine::setDiagnosticGroupWarningAsError(StringRef Group,
bool Enabled) {
// If we are enabling this feature, just set the diagnostic mappings to map to
@@ -924,7 +932,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
}
// ---- INTEGERS ----
case DiagnosticsEngine::ak_sint: {
- int Val = getArgSInt(ArgNo);
+ int64_t Val = getArgSInt(ArgNo);
if (ModifierIs(Modifier, ModifierLen, "select")) {
HandleSelectModifier(*this, (unsigned)Val, Argument, ArgumentLen,
@@ -943,7 +951,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
break;
}
case DiagnosticsEngine::ak_uint: {
- unsigned Val = getArgUInt(ArgNo);
+ uint64_t Val = getArgUInt(ArgNo);
if (ModifierIs(Modifier, ModifierLen, "select")) {
HandleSelectModifier(*this, Val, Argument, ArgumentLen, OutStr);
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index c333076d2efc..88801c683e8d 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -609,17 +609,23 @@ namespace {
// Second the table of options, sorted by name for fast binary lookup.
static const WarningOption OptionTable[] = {
-#define GET_DIAG_TABLE
+#define DIAG_ENTRY(GroupName, FlagNameOffset, Members, SubGroups) \
+ {FlagNameOffset, Members, SubGroups},
#include "clang/Basic/DiagnosticGroups.inc"
-#undef GET_DIAG_TABLE
+#undef DIAG_ENTRY
};
+StringRef DiagnosticIDs::getWarningOptionForGroup(diag::Group Group) {
+ return OptionTable[static_cast<int>(Group)].getName();
+}
+
/// getWarningOptionForDiag - Return the lowest-level warning option that
/// enables the specified diagnostic. If there is no -Wfoo flag that controls
/// the diagnostic, this returns null.
StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned DiagID) {
if (const StaticDiagInfoRec *Info = GetDiagInfo(DiagID))
- return OptionTable[Info->getOptionGroupIndex()].getName();
+ return getWarningOptionForGroup(
+ static_cast<diag::Group>(Info->getOptionGroupIndex()));
return StringRef();
}
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index 74cd2f295be6..f4cf27848d7d 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -123,16 +123,16 @@ FileManager::getDirectoryRef(StringRef DirName, bool CacheFailure) {
DirName != llvm::sys::path::root_path(DirName) &&
llvm::sys::path::is_separator(DirName.back()))
DirName = DirName.substr(0, DirName.size()-1);
-#ifdef _WIN32
- // Fixing a problem with "clang C:test.c" on Windows.
- // Stat("C:") does not recognize "C:" as a valid directory
- std::string DirNameStr;
- if (DirName.size() > 1 && DirName.back() == ':' &&
- DirName.equals_insensitive(llvm::sys::path::root_name(DirName))) {
- DirNameStr = DirName.str() + '.';
- DirName = DirNameStr;
+ Optional<std::string> DirNameStr;
+ if (is_style_windows(llvm::sys::path::Style::native)) {
+ // Fixing a problem with "clang C:test.c" on Windows.
+ // Stat("C:") does not recognize "C:" as a valid directory
+ if (DirName.size() > 1 && DirName.back() == ':' &&
+ DirName.equals_insensitive(llvm::sys::path::root_name(DirName))) {
+ DirNameStr = DirName.str() + '.';
+ DirName = *DirNameStr;
+ }
}
-#endif
++NumDirLookups;
@@ -276,6 +276,18 @@ FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) {
} else {
// Name mismatch. We need a redirect. First grab the actual entry we want
// to return.
+ //
+ // This redirection logic intentionally leaks the external name of a
+ // redirected file that uses 'use-external-name' in \a
+ // vfs::RedirectionFileSystem. This allows clang to report the external
+ // name to users (in diagnostics) and to tools that don't have access to
+ // the VFS (in debug info and dependency '.d' files).
+ //
+ // FIXME: This is pretty complicated. It's also inconsistent with how
+ // "real" filesystems behave and confuses parts of clang expect to see the
+ // name-as-accessed on the \a FileEntryRef. Maybe the returned \a
+ // FileEntryRef::getName() could return the accessed name unmodified, but
+ // make the external name available via a separate API.
auto &Redirection =
*SeenFileEntries
.insert({Status.getName(), FileEntryRef::MapValue(UFE, DirInfo)})
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index dc392d5352aa..b6dc73d66304 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -11,6 +11,8 @@
//===----------------------------------------------------------------------===//
#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Path.h"
using namespace clang;
@@ -45,9 +47,37 @@ bool LangOptions::isNoBuiltinFunc(StringRef FuncName) const {
VersionTuple LangOptions::getOpenCLVersionTuple() const {
const int Ver = OpenCLCPlusPlus ? OpenCLCPlusPlusVersion : OpenCLVersion;
+ if (OpenCLCPlusPlus && Ver != 100)
+ return VersionTuple(Ver / 100);
return VersionTuple(Ver / 100, (Ver % 100) / 10);
}
+unsigned LangOptions::getOpenCLCompatibleVersion() const {
+ if (!OpenCLCPlusPlus)
+ return OpenCLVersion;
+ if (OpenCLCPlusPlusVersion == 100)
+ return 200;
+ if (OpenCLCPlusPlusVersion == 202100)
+ return 300;
+ llvm_unreachable("Unknown OpenCL version");
+}
+
+void LangOptions::remapPathPrefix(SmallString<256> &Path) const {
+ for (const auto &Entry : MacroPrefixMap)
+ if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second))
+ break;
+}
+
+std::string LangOptions::getOpenCLVersionString() const {
+ std::string Result;
+ {
+ llvm::raw_string_ostream Out(Result);
+ Out << (OpenCLCPlusPlus ? "C++ for OpenCL" : "OpenCL C") << " version "
+ << getOpenCLVersionTuple().getAsString();
+ }
+ return Result;
+}
+
FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) {
FPOptions result(LO);
return result;
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index b6cf1624ef01..09bd3251fea0 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -121,9 +121,7 @@ static bool hasFeature(StringRef Feature, const LangOptions &LangOpts,
.Default(Target.hasFeature(Feature) ||
isPlatformEnvironment(Target, Feature));
if (!HasFeature)
- HasFeature = std::find(LangOpts.ModuleFeatures.begin(),
- LangOpts.ModuleFeatures.end(),
- Feature) != LangOpts.ModuleFeatures.end();
+ HasFeature = llvm::is_contained(LangOpts.ModuleFeatures, Feature);
return HasFeature;
}
@@ -203,7 +201,7 @@ static void printModuleId(raw_ostream &OS, InputIter Begin, InputIter End,
OS << ".";
StringRef Name = getModuleNameFromComponent(*It);
- if (!AllowStringLiterals || isValidIdentifier(Name))
+ if (!AllowStringLiterals || isValidAsciiIdentifier(Name))
OS << Name;
else {
OS << '"';
diff --git a/clang/lib/Basic/OpenCLOptions.cpp b/clang/lib/Basic/OpenCLOptions.cpp
index 2e215b185f66..b7408f39bdab 100644
--- a/clang/lib/Basic/OpenCLOptions.cpp
+++ b/clang/lib/Basic/OpenCLOptions.cpp
@@ -111,7 +111,9 @@ bool OpenCLOptions::diagnoseUnsupportedFeatureDependencies(
// Feature pairs. First feature in a pair requires the second one to be
// supported.
static const llvm::StringMap<llvm::StringRef> DependentFeaturesMap = {
- {"__opencl_c_read_write_images", "__opencl_c_images"}};
+ {"__opencl_c_read_write_images", "__opencl_c_images"},
+ {"__opencl_c_3d_image_writes", "__opencl_c_images"},
+ {"__opencl_c_pipes", "__opencl_c_generic_address_space"}};
auto OpenCLFeaturesMap = TI.getSupportedOpenCLOpts();
@@ -130,7 +132,8 @@ bool OpenCLOptions::diagnoseFeatureExtensionDifferences(
const TargetInfo &TI, DiagnosticsEngine &Diags) {
// Extensions and equivalent feature pairs.
static const llvm::StringMap<llvm::StringRef> FeatureExtensionMap = {
- {"cl_khr_fp64", "__opencl_c_fp64"}};
+ {"cl_khr_fp64", "__opencl_c_fp64"},
+ {"cl_khr_3d_image_writes", "__opencl_c_3d_image_writes"}};
auto OpenCLFeaturesMap = TI.getSupportedOpenCLOpts();
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index cfdba09eb1ec..9e74e05bd863 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -21,7 +21,7 @@ using namespace clang;
using namespace llvm::omp;
unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
- unsigned OpenMPVersion) {
+ const LangOptions &LangOpts) {
switch (Kind) {
case OMPC_default:
return llvm::StringSwitch<unsigned>(Str)
@@ -59,7 +59,9 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
.Case(#Name, static_cast<unsigned>(OMPC_MAP_MODIFIER_##Name))
#include "clang/Basic/OpenMPKinds.def"
.Default(OMPC_MAP_unknown);
- if (OpenMPVersion < 51 && Type == OMPC_MAP_MODIFIER_present)
+ if (LangOpts.OpenMP < 51 && Type == OMPC_MAP_MODIFIER_present)
+ return OMPC_MAP_MODIFIER_unknown;
+ if (!LangOpts.OpenMPExtensions && Type == OMPC_MAP_MODIFIER_ompx_hold)
return OMPC_MAP_MODIFIER_unknown;
return Type;
}
@@ -70,7 +72,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
.Case(#Name, static_cast<unsigned>(OMPC_MOTION_MODIFIER_##Name))
#include "clang/Basic/OpenMPKinds.def"
.Default(OMPC_MOTION_MODIFIER_unknown);
- if (OpenMPVersion < 51 && Type == OMPC_MOTION_MODIFIER_present)
+ if (LangOpts.OpenMP < 51 && Type == OMPC_MOTION_MODIFIER_present)
return OMPC_MOTION_MODIFIER_unknown;
return Type;
}
@@ -123,6 +125,16 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
#define OPENMP_REDUCTION_MODIFIER(Name) .Case(#Name, OMPC_REDUCTION_##Name)
#include "clang/Basic/OpenMPKinds.def"
.Default(OMPC_REDUCTION_unknown);
+ case OMPC_adjust_args:
+ return llvm::StringSwitch<OpenMPAdjustArgsOpKind>(Str)
+#define OPENMP_ADJUST_ARGS_KIND(Name) .Case(#Name, OMPC_ADJUST_ARGS_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+ .Default(OMPC_ADJUST_ARGS_unknown);
+ case OMPC_bind:
+ return llvm::StringSwitch<unsigned>(Str)
+#define OPENMP_BIND_KIND(Name) .Case(#Name, OMPC_BIND_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+ .Default(OMPC_BIND_unknown);
case OMPC_unknown:
case OMPC_threadprivate:
case OMPC_if:
@@ -183,6 +195,8 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str,
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
+ case OMPC_append_args:
break;
default:
break;
@@ -366,6 +380,26 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
#include "clang/Basic/OpenMPKinds.def"
}
llvm_unreachable("Invalid OpenMP 'reduction' clause modifier");
+ case OMPC_adjust_args:
+ switch (Type) {
+ case OMPC_ADJUST_ARGS_unknown:
+ return "unknown";
+#define OPENMP_ADJUST_ARGS_KIND(Name) \
+ case OMPC_ADJUST_ARGS_##Name: \
+ return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+ }
+ llvm_unreachable("Invalid OpenMP 'adjust_args' clause kind");
+ case OMPC_bind:
+ switch (Type) {
+ case OMPC_BIND_unknown:
+ return "unknown";
+#define OPENMP_BIND_KIND(Name) \
+ case OMPC_BIND_##Name: \
+ return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+ }
+ llvm_unreachable("Invalid OpenMP 'bind' clause type");
case OMPC_unknown:
case OMPC_threadprivate:
case OMPC_if:
@@ -426,6 +460,8 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
+ case OMPC_append_args:
break;
default:
break;
@@ -453,7 +489,7 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd ||
DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile ||
- DKind == OMPD_unroll;
+ DKind == OMPD_unroll || DKind == OMPD_loop;
}
bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@@ -556,6 +592,10 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
Kind == OMPD_target_teams_distribute_simd;
}
+bool clang::isOpenMPGenericLoopDirective(OpenMPDirectiveKind Kind) {
+ return Kind == OMPD_loop;
+}
+
bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
return Kind == OMPC_private || Kind == OMPC_firstprivate ||
Kind == OMPC_lastprivate || Kind == OMPC_linear ||
@@ -589,6 +629,9 @@ void clang::getOpenMPCaptureRegions(
OpenMPDirectiveKind DKind) {
assert(unsigned(DKind) < llvm::omp::Directive_enumSize);
switch (DKind) {
+ case OMPD_metadirective:
+ CaptureRegions.push_back(OMPD_metadirective);
+ break;
case OMPD_parallel:
case OMPD_parallel_for:
case OMPD_parallel_for_simd:
@@ -651,6 +694,10 @@ void clang::getOpenMPCaptureRegions(
CaptureRegions.push_back(OMPD_teams);
CaptureRegions.push_back(OMPD_parallel);
break;
+ case OMPD_loop:
+ // TODO: 'loop' may require different capture regions depending on the bind
+ // clause or the parent directive when there is no bind clause. Use
+ // OMPD_unknown for now.
case OMPD_simd:
case OMPD_for:
case OMPD_for_simd:
diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp
index 2cb05c1c3c07..9c88559d1c33 100644
--- a/clang/lib/Basic/ProfileList.cpp
+++ b/clang/lib/Basic/ProfileList.cpp
@@ -58,7 +58,7 @@ ProfileSpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
std::string Error;
if (auto PSCL = create(Paths, VFS, Error))
return PSCL;
- llvm::report_fatal_error(Error);
+ llvm::report_fatal_error(llvm::Twine(Error));
}
}
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 8cba379aa0f8..c2e7b684cfd8 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -207,28 +207,30 @@ void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, unsigned LineNo,
SrcMgr::CharacteristicKind FileKind) {
std::vector<LineEntry> &Entries = LineEntries[FID];
- // An unspecified FilenameID means use the last filename if available, or the
- // main source file otherwise.
- if (FilenameID == -1 && !Entries.empty())
- FilenameID = Entries.back().FilenameID;
-
assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
"Adding line entries out of order!");
unsigned IncludeOffset = 0;
- if (EntryExit == 0) { // No #include stack change.
- IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
- } else if (EntryExit == 1) {
+ if (EntryExit == 1) {
+ // Push #include
IncludeOffset = Offset-1;
- } else if (EntryExit == 2) {
- assert(!Entries.empty() && Entries.back().IncludeOffset &&
- "PPDirectives should have caught case when popping empty include stack");
-
- // Get the include loc of the last entries' include loc as our include loc.
- IncludeOffset = 0;
- if (const LineEntry *PrevEntry =
- FindNearestLineEntry(FID, Entries.back().IncludeOffset))
+ } else {
+ const auto *PrevEntry = Entries.empty() ? nullptr : &Entries.back();
+ if (EntryExit == 2) {
+ // Pop #include
+ assert(PrevEntry && PrevEntry->IncludeOffset &&
+ "PPDirectives should have caught case when popping empty include "
+ "stack");
+ PrevEntry = FindNearestLineEntry(FID, PrevEntry->IncludeOffset);
+ }
+ if (PrevEntry) {
IncludeOffset = PrevEntry->IncludeOffset;
+ if (FilenameID == -1) {
+ // An unspecified FilenameID means use the previous (or containing)
+ // filename if available, or the main source file otherwise.
+ FilenameID = PrevEntry->FilenameID;
+ }
+ }
}
Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index b647a2fb8a67..646bbe8b7387 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -34,8 +34,11 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
NoAsmVariants = false;
HasLegalHalfType = false;
HasFloat128 = false;
+ HasIbm128 = false;
HasFloat16 = false;
HasBFloat16 = false;
+ HasLongDouble = true;
+ HasFPReturn = true;
HasStrictFP = false;
PointerWidth = PointerAlign = 32;
BoolWidth = BoolAlign = 8;
@@ -83,6 +86,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
LongDoubleWidth = 64;
LongDoubleAlign = 64;
Float128Align = 128;
+ Ibm128Align = 128;
LargeArrayMinWidth = 0;
LargeArrayAlign = 0;
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
@@ -113,6 +117,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
DoubleFormat = &llvm::APFloat::IEEEdouble();
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
Float128Format = &llvm::APFloat::IEEEquad();
+ Ibm128Format = &llvm::APFloat::PPCDoubleDouble();
MCountName = "mcount";
UserLabelPrefix = "_";
RegParmMax = 0;
@@ -276,32 +281,36 @@ TargetInfo::IntType TargetInfo::getLeastIntTypeByWidth(unsigned BitWidth,
return NoInt;
}
-TargetInfo::RealType TargetInfo::getRealTypeByWidth(unsigned BitWidth,
- bool ExplicitIEEE) const {
+FloatModeKind TargetInfo::getRealTypeByWidth(unsigned BitWidth,
+ FloatModeKind ExplicitType) const {
if (getFloatWidth() == BitWidth)
- return Float;
+ return FloatModeKind::Float;
if (getDoubleWidth() == BitWidth)
- return Double;
+ return FloatModeKind::Double;
switch (BitWidth) {
case 96:
if (&getLongDoubleFormat() == &llvm::APFloat::x87DoubleExtended())
- return LongDouble;
+ return FloatModeKind::LongDouble;
break;
case 128:
// The caller explicitly asked for an IEEE compliant type but we still
// have to check if the target supports it.
- if (ExplicitIEEE)
- return hasFloat128Type() ? Float128 : NoFloat;
+ if (ExplicitType == FloatModeKind::Float128)
+ return hasFloat128Type() ? FloatModeKind::Float128
+ : FloatModeKind::NoFloat;
+ if (ExplicitType == FloatModeKind::Ibm128)
+ return hasIbm128Type() ? FloatModeKind::Ibm128
+ : FloatModeKind::NoFloat;
if (&getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble() ||
&getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
- return LongDouble;
+ return FloatModeKind::LongDouble;
if (hasFloat128Type())
- return Float128;
+ return FloatModeKind::Float128;
break;
}
- return NoFloat;
+ return FloatModeKind::NoFloat;
}
/// getTypeAlign - Return the alignment (in bits) of the specified integer type
@@ -400,14 +409,18 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
// OpenCL C v3.0 s6.7.5 - The generic address space requires support for
// OpenCL C 2.0 or OpenCL C 3.0 with the __opencl_c_generic_address_space
// feature
- // FIXME: OpenCLGenericAddressSpace is also defined in setLangDefaults()
+ // OpenCL C v3.0 s6.2.1 - OpenCL pipes require support of OpenCL C 2.0
+ // or later and __opencl_c_pipes feature
+ // FIXME: These language options are also defined in setLangDefaults()
// for OpenCL C 2.0 but with no access to target capabilities. Target
- // should be immutable once created and thus this language option needs
+ // should be immutable once created and thus these language options need
// to be defined only once.
- if (Opts.OpenCLVersion >= 300) {
+ if (Opts.getOpenCLCompatibleVersion() == 300) {
const auto &OpenCLFeaturesMap = getSupportedOpenCLOpts();
Opts.OpenCLGenericAddressSpace = hasFeatureEnabled(
OpenCLFeaturesMap, "__opencl_c_generic_address_space");
+ Opts.OpenCLPipes =
+ hasFeatureEnabled(OpenCLFeaturesMap, "__opencl_c_pipes");
}
}
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index ba91d0439968..994a491cddf2 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -606,6 +606,18 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
return nullptr;
return new SPIR64TargetInfo(Triple, Opts);
}
+ case llvm::Triple::spirv32: {
+ if (os != llvm::Triple::UnknownOS ||
+ Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
+ return nullptr;
+ return new SPIRV32TargetInfo(Triple, Opts);
+ }
+ case llvm::Triple::spirv64: {
+ if (os != llvm::Triple::UnknownOS ||
+ Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
+ return nullptr;
+ return new SPIRV64TargetInfo(Triple, Opts);
+ }
case llvm::Triple::wasm32:
if (Triple.getSubArch() != llvm::Triple::NoSubArch ||
Triple.getVendor() != llvm::Triple::UnknownVendor ||
@@ -745,7 +757,7 @@ bool TargetInfo::validateOpenCLTarget(const LangOptions &Opts,
// Validate that feature macros are set properly for OpenCL C 3.0.
// In other cases assume that target is always valid.
- if (Opts.OpenCLCPlusPlus || Opts.OpenCLVersion < 300)
+ if (Opts.getOpenCLCompatibleVersion() < 300)
return true;
return OpenCLOptions::diagnoseUnsupportedFeatureDependencies(*this, Diags) &&
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 4070ac727d16..f75b8ffcb53d 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -40,6 +40,26 @@ const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = {
#include "clang/Basic/BuiltinsAArch64.def"
};
+static StringRef getArchVersionString(llvm::AArch64::ArchKind Kind) {
+ switch (Kind) {
+ case llvm::AArch64::ArchKind::ARMV9A:
+ case llvm::AArch64::ArchKind::ARMV9_1A:
+ case llvm::AArch64::ArchKind::ARMV9_2A:
+ return "9";
+ default:
+ return "8";
+ }
+}
+
+StringRef AArch64TargetInfo::getArchProfile() const {
+ switch (ArchKind) {
+ case llvm::AArch64::ArchKind::ARMV8R:
+ return "R";
+ default:
+ return "A";
+ }
+}
+
AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
: TargetInfo(Triple), ABI("aapcs") {
@@ -203,6 +223,24 @@ void AArch64TargetInfo::getTargetDefinesARMV87A(const LangOptions &Opts,
getTargetDefinesARMV86A(Opts, Builder);
}
+void AArch64TargetInfo::getTargetDefinesARMV9A(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ // Armv9-A maps to Armv8.5-A
+ getTargetDefinesARMV85A(Opts, Builder);
+}
+
+void AArch64TargetInfo::getTargetDefinesARMV91A(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ // Armv9.1-A maps to Armv8.6-A
+ getTargetDefinesARMV86A(Opts, Builder);
+}
+
+void AArch64TargetInfo::getTargetDefinesARMV92A(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ // Armv9.2-A maps to Armv8.7-A
+ getTargetDefinesARMV87A(Opts, Builder);
+}
+
void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
// Target identification.
@@ -227,8 +265,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
// ACLE predefines. Many can only have one possible value on v8 AArch64.
Builder.defineMacro("__ARM_ACLE", "200");
- Builder.defineMacro("__ARM_ARCH", "8");
- Builder.defineMacro("__ARM_ARCH_PROFILE", "'A'");
+ Builder.defineMacro("__ARM_ARCH", getArchVersionString(ArchKind));
+ Builder.defineMacro("__ARM_ARCH_PROFILE", "'" + getArchProfile() + "'");
Builder.defineMacro("__ARM_64BIT_STATE", "1");
Builder.defineMacro("__ARM_PCS_AAPCS64", "1");
@@ -405,6 +443,15 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
case llvm::AArch64::ArchKind::ARMV8_7A:
getTargetDefinesARMV87A(Opts, Builder);
break;
+ case llvm::AArch64::ArchKind::ARMV9A:
+ getTargetDefinesARMV9A(Opts, Builder);
+ break;
+ case llvm::AArch64::ArchKind::ARMV9_1A:
+ getTargetDefinesARMV91A(Opts, Builder);
+ break;
+ case llvm::AArch64::ArchKind::ARMV9_2A:
+ getTargetDefinesARMV92A(Opts, Builder);
+ break;
}
// All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
@@ -413,8 +460,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
- if (Opts.ArmSveVectorBits) {
- Builder.defineMacro("__ARM_FEATURE_SVE_BITS", Twine(Opts.ArmSveVectorBits));
+ if (Opts.VScaleMin && Opts.VScaleMin == Opts.VScaleMax) {
+ Builder.defineMacro("__ARM_FEATURE_SVE_BITS", Twine(Opts.VScaleMin * 128));
Builder.defineMacro("__ARM_FEATURE_SVE_VECTOR_OPERATORS");
}
}
@@ -424,6 +471,16 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
Builtin::FirstTSBuiltin);
}
+Optional<std::pair<unsigned, unsigned>>
+AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
+ if (LangOpts.VScaleMin || LangOpts.VScaleMax)
+ return std::pair<unsigned, unsigned>(LangOpts.VScaleMin,
+ LangOpts.VScaleMax);
+ if (hasFeature("sve"))
+ return std::pair<unsigned, unsigned>(0, 16);
+ return None;
+}
+
bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" ||
(Feature == "neon" && (FPU & NeonMode)) ||
@@ -431,7 +488,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
Feature == "sve2-aes" || Feature == "sve2-sha3" ||
Feature == "sve2-sm4" || Feature == "f64mm" || Feature == "f32mm" ||
Feature == "i8mm" || Feature == "bf16") &&
- (FPU & SveMode));
+ (FPU & SveMode)) ||
+ (Feature == "ls64" && HasLS64);
}
bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
@@ -462,7 +520,7 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasMatmulFP32 = false;
HasLSE = false;
- ArchKind = llvm::AArch64::ArchKind::ARMV8A;
+ ArchKind = llvm::AArch64::ArchKind::INVALID;
for (const auto &Feature : Features) {
if (Feature == "+neon")
@@ -524,6 +582,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasSM4 = true;
if (Feature == "+strict-align")
HasUnaligned = false;
+ if (Feature == "+v8a")
+ ArchKind = llvm::AArch64::ArchKind::ARMV8A;
if (Feature == "+v8.1a")
ArchKind = llvm::AArch64::ArchKind::ARMV8_1A;
if (Feature == "+v8.2a")
@@ -538,6 +598,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
ArchKind = llvm::AArch64::ArchKind::ARMV8_6A;
if (Feature == "+v8.7a")
ArchKind = llvm::AArch64::ArchKind::ARMV8_7A;
+ if (Feature == "+v9a")
+ ArchKind = llvm::AArch64::ArchKind::ARMV9A;
+ if (Feature == "+v9.1a")
+ ArchKind = llvm::AArch64::ArchKind::ARMV9_1A;
+ if (Feature == "+v9.2a")
+ ArchKind = llvm::AArch64::ArchKind::ARMV9_2A;
if (Feature == "+v8r")
ArchKind = llvm::AArch64::ArchKind::ARMV8R;
if (Feature == "+fullfp16")
@@ -752,6 +818,9 @@ bool AArch64TargetInfo::validateConstraintModifier(
if (Size == 64)
return true;
+ if (Size == 512)
+ return HasLS64;
+
SuggestedModifier = "w";
return false;
}
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 46882a808336..dea59a9b015d 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -59,6 +59,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
static const Builtin::Info BuiltinInfo[];
std::string ABI;
+ StringRef getArchProfile() const;
public:
AArch64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
@@ -91,11 +92,20 @@ public:
MacroBuilder &Builder) const;
void getTargetDefinesARMV87A(const LangOptions &Opts,
MacroBuilder &Builder) const;
+ void getTargetDefinesARMV9A(const LangOptions &Opts,
+ MacroBuilder &Builder) const;
+ void getTargetDefinesARMV91A(const LangOptions &Opts,
+ MacroBuilder &Builder) const;
+ void getTargetDefinesARMV92A(const LangOptions &Opts,
+ MacroBuilder &Builder) const;
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+ Optional<std::pair<unsigned, unsigned>>
+ getVScaleRange(const LangOptions &LangOpts) const override;
+
bool hasFeature(StringRef Feature) const override;
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index fac786dbcf9e..ba7ffa34c73e 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,7 +17,6 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang;
using namespace clang::targets;
@@ -335,7 +334,6 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
- GridValues = llvm::omp::AMDGPUGpuGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 244a6e044690..8b9d7ce79c16 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -310,9 +310,12 @@ public:
Opts["cl_khr_mipmap_image"] = true;
Opts["cl_khr_mipmap_image_writes"] = true;
Opts["cl_khr_subgroups"] = true;
- Opts["cl_khr_3d_image_writes"] = true;
Opts["cl_amd_media_ops"] = true;
Opts["cl_amd_media_ops2"] = true;
+
+ Opts["__opencl_c_images"] = true;
+ Opts["__opencl_c_3d_image_writes"] = true;
+ Opts["cl_khr_3d_image_writes"] = true;
}
}
@@ -349,13 +352,35 @@ public:
}
LangAS getCUDABuiltinAddressSpace(unsigned AS) const override {
- return LangAS::Default;
+ switch (AS) {
+ case 0:
+ return LangAS::Default;
+ case 1:
+ return LangAS::cuda_device;
+ case 3:
+ return LangAS::cuda_shared;
+ case 4:
+ return LangAS::cuda_constant;
+ default:
+ return getLangASFromTargetAS(AS);
+ }
}
llvm::Optional<LangAS> getConstantAddressSpace() const override {
return getLangASFromTargetAS(Constant);
}
+ const llvm::omp::GV &getGridValue() const override {
+ switch (WavefrontSize) {
+ case 32:
+ return llvm::omp::getAMDGPUGridValues<32>();
+ case 64:
+ return llvm::omp::getAMDGPUGridValues<64>();
+ default:
+ llvm_unreachable("getGridValue not implemented for this wavesize");
+ }
+ }
+
/// \returns Target specific vtbl ptr address space.
unsigned getVtblPtrAddressSpace() const override {
return static_cast<unsigned>(Constant);
@@ -415,7 +440,7 @@ public:
WavefrontSize = 64;
bool IsOn = F.front() == '+';
StringRef Name = StringRef(F).drop_front();
- if (llvm::find(TargetIDFeatures, Name) == TargetIDFeatures.end())
+ if (!llvm::is_contained(TargetIDFeatures, Name))
return;
assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end());
OffloadArchFeatures[Name] = IsOn;
diff --git a/clang/lib/Basic/Targets/ARC.h b/clang/lib/Basic/Targets/ARC.h
index b314c42be1e9..3c0c5f6df2f4 100644
--- a/clang/lib/Basic/Targets/ARC.h
+++ b/clang/lib/Basic/Targets/ARC.h
@@ -67,6 +67,8 @@ public:
}
bool hasExtIntType() const override { return true; }
+
+ bool isCLZForZeroUndef() const override { return false; }
};
} // namespace targets
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 0e4048f8d5ff..fc6b01c87fd2 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -212,6 +212,12 @@ StringRef ARMTargetInfo::getCPUAttr() const {
return "8_6A";
case llvm::ARM::ArchKind::ARMV8_7A:
return "8_7A";
+ case llvm::ARM::ArchKind::ARMV9A:
+ return "9A";
+ case llvm::ARM::ArchKind::ARMV9_1A:
+ return "9_1A";
+ case llvm::ARM::ArchKind::ARMV9_2A:
+ return "9_2A";
case llvm::ARM::ArchKind::ARMV8MBaseline:
return "8M_BASE";
case llvm::ARM::ArchKind::ARMV8MMainline:
@@ -440,6 +446,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasFloat16 = true;
ARMCDECoprocMask = 0;
HasBFloat16 = false;
+ FPRegsDisabled = false;
// This does not diagnose illegal cases like having both
// "+vfpv2" and "+vfpv3" or having "+neon" and "-fp64".
@@ -516,6 +523,8 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
ARMCDECoprocMask |= (1U << Coproc);
} else if (Feature == "+bf16") {
HasBFloat16 = true;
+ } else if (Feature == "-fpregs") {
+ FPRegsDisabled = true;
}
}
@@ -535,6 +544,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
break;
case 8:
+ case 9:
LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
}
@@ -877,6 +887,9 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
case llvm::ARM::ArchKind::ARMV8_4A:
case llvm::ARM::ArchKind::ARMV8_5A:
case llvm::ARM::ArchKind::ARMV8_6A:
+ case llvm::ARM::ArchKind::ARMV9A:
+ case llvm::ARM::ArchKind::ARMV9_1A:
+ case llvm::ARM::ArchKind::ARMV9_2A:
getTargetDefinesARMV83A(Opts, Builder);
break;
}
@@ -968,6 +981,8 @@ bool ARMTargetInfo::validateAsmConstraint(
case 't': // s0-s31, d0-d31, or q0-q15
case 'w': // s0-s15, d0-d7, or q0-q3
case 'x': // s0-s31, d0-d15, or q0-q7
+ if (FPRegsDisabled)
+ return false;
Info.setAllowsRegister();
return true;
case 'j': // An immediate integer between 0 and 65535 (valid for MOVW)
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 0910064a033b..d54a049042d6 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
unsigned Unaligned : 1;
unsigned DotProd : 1;
unsigned HasMatMul : 1;
+ unsigned FPRegsDisabled : 1;
enum {
LDREX_B = (1 << 0), /// byte (8-bit)
diff --git a/clang/lib/Basic/Targets/AVR.cpp b/clang/lib/Basic/Targets/AVR.cpp
index e87b7338c4d6..50b0fc07b311 100644
--- a/clang/lib/Basic/Targets/AVR.cpp
+++ b/clang/lib/Basic/Targets/AVR.cpp
@@ -223,7 +223,7 @@ static MCUInfo AVRMcus[] = {
{"atmega256rfr2", "__AVR_ATmega256RFR2__"},
{"atmega2564rfr2", "__AVR_ATmega2564RFR2__"},
{"atxmega16a4", "__AVR_ATxmega16A4__"},
- {"atxmega16a4u", "__AVR_ATxmega16a4U__"},
+ {"atxmega16a4u", "__AVR_ATxmega16A4U__"},
{"atxmega16c4", "__AVR_ATxmega16C4__"},
{"atxmega16d4", "__AVR_ATxmega16D4__"},
{"atxmega32a4", "__AVR_ATxmega32A4__"},
@@ -265,7 +265,7 @@ static MCUInfo AVRMcus[] = {
{"atxmega384d3", "__AVR_ATxmega384D3__"},
{"atxmega128a1", "__AVR_ATxmega128A1__"},
{"atxmega128a1u", "__AVR_ATxmega128A1U__"},
- {"atxmega128a4u", "__AVR_ATxmega128a4U__"},
+ {"atxmega128a4u", "__AVR_ATxmega128A4U__"},
{"attiny4", "__AVR_ATtiny4__"},
{"attiny5", "__AVR_ATtiny5__"},
{"attiny9", "__AVR_ATtiny9__"},
@@ -274,6 +274,31 @@ static MCUInfo AVRMcus[] = {
{"attiny40", "__AVR_ATtiny40__"},
{"attiny102", "__AVR_ATtiny102__"},
{"attiny104", "__AVR_ATtiny104__"},
+ {"attiny202", "__AVR_ATtiny202__"},
+ {"attiny402", "__AVR_ATtiny402__"},
+ {"attiny204", "__AVR_ATtiny204__"},
+ {"attiny404", "__AVR_ATtiny404__"},
+ {"attiny804", "__AVR_ATtiny804__"},
+ {"attiny1604", "__AVR_ATtiny1604__"},
+ {"attiny406", "__AVR_ATtiny406__"},
+ {"attiny806", "__AVR_ATtiny806__"},
+ {"attiny1606", "__AVR_ATtiny1606__"},
+ {"attiny807", "__AVR_ATtiny807__"},
+ {"attiny1607", "__AVR_ATtiny1607__"},
+ {"attiny212", "__AVR_ATtiny212__"},
+ {"attiny412", "__AVR_ATtiny412__"},
+ {"attiny214", "__AVR_ATtiny214__"},
+ {"attiny414", "__AVR_ATtiny414__"},
+ {"attiny814", "__AVR_ATtiny814__"},
+ {"attiny1614", "__AVR_ATtiny1614__"},
+ {"attiny416", "__AVR_ATtiny416__"},
+ {"attiny816", "__AVR_ATtiny816__"},
+ {"attiny1616", "__AVR_ATtiny1616__"},
+ {"attiny3216", "__AVR_ATtiny3216__"},
+ {"attiny417", "__AVR_ATtiny417__"},
+ {"attiny817", "__AVR_ATtiny817__"},
+ {"attiny1617", "__AVR_ATtiny1617__"},
+ {"attiny3217", "__AVR_ATtiny3217__"},
};
} // namespace targets
@@ -286,13 +311,10 @@ static constexpr llvm::StringLiteral ValidFamilyNames[] = {
"avrxmega6", "avrxmega7", "avrtiny"};
bool AVRTargetInfo::isValidCPUName(StringRef Name) const {
- bool IsFamily =
- llvm::find(ValidFamilyNames, Name) != std::end(ValidFamilyNames);
+ bool IsFamily = llvm::is_contained(ValidFamilyNames, Name);
- bool IsMCU =
- llvm::find_if(AVRMcus, [&](const MCUInfo &Info) {
- return Info.Name == Name;
- }) != std::end(AVRMcus);
+ bool IsMCU = llvm::any_of(
+ AVRMcus, [&](const MCUInfo &Info) { return Info.Name == Name; });
return IsFamily || IsMCU;
}
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index 0b0298df30a5..2dfe21564cc1 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -35,7 +35,7 @@ static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
"v3", "probe"};
bool BPFTargetInfo::isValidCPUName(StringRef Name) const {
- return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+ return llvm::is_contained(ValidCPUNames, Name);
}
void BPFTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp
index 31cb36d37636..c0cd8fa90ed6 100644
--- a/clang/lib/Basic/Targets/M68k.cpp
+++ b/clang/lib/Basic/Targets/M68k.cpp
@@ -37,8 +37,8 @@ M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple,
// FIXME how to wire it with the used object format?
Layout += "-m:e";
- // M68k pointers are always 32 bit wide even for 16 bit cpus
- Layout += "-p:32:32";
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs
+ Layout += "-p:32:16:32";
// M68k integer data types
Layout += "-i8:8:8-i16:16:16-i32:16:32";
diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp
index 3a32fd492c6b..39246f650cce 100644
--- a/clang/lib/Basic/Targets/Mips.cpp
+++ b/clang/lib/Basic/Targets/Mips.cpp
@@ -50,7 +50,7 @@ static constexpr llvm::StringLiteral ValidCPUNames[] = {
{"octeon"}, {"octeon+"}, {"p5600"}};
bool MipsTargetInfo::isValidCPUName(StringRef Name) const {
- return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+ return llvm::is_contained(ValidCPUNames, Name);
}
void MipsTargetInfo::fillValidCPUList(
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 56f8a179db3c..3561b22677bc 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -16,7 +16,6 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang;
using namespace clang::targets;
@@ -45,6 +44,9 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
if (!Feature.startswith("+ptx"))
continue;
PTXVersion = llvm::StringSwitch<unsigned>(Feature)
+ .Case("+ptx75", 75)
+ .Case("+ptx74", 74)
+ .Case("+ptx73", 73)
.Case("+ptx72", 72)
.Case("+ptx71", 71)
.Case("+ptx70", 70)
@@ -65,7 +67,6 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
- GridValues = llvm::omp::NVPTXGpuGridValues;
UseAddrSpaceMapMangling = true;
// Define available target features
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index c7db3cdaaf10..ef751b8e1a8d 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -147,6 +147,10 @@ public:
Opts["cl_khr_local_int32_extended_atomics"] = true;
}
+ const llvm::omp::GV &getGridValue() const override {
+ return llvm::omp::NVPTXGridValues;
+ }
+
/// \returns If a target requires an address within a target specific address
/// space \p AddressSpace to be converted in order to be used, then return the
/// corresponding target specific DWARF address space.
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index e24fb5cf082d..7fbe2cbc5653 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -179,6 +179,8 @@ protected:
Builder.defineMacro("__KPRINTF_ATTRIBUTE__");
Builder.defineMacro("__tune_i386__");
DefineStd(Builder, "unix", Opts);
+ if (this->HasFloat128)
+ Builder.defineMacro("__FLOAT128__");
}
public:
@@ -188,6 +190,7 @@ public:
default:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
+ this->HasFloat128 = true;
this->MCountName = ".mcount";
break;
}
@@ -460,6 +463,9 @@ protected:
Builder.defineMacro("_REENTRANT");
if (this->HasFloat128)
Builder.defineMacro("__FLOAT128__");
+
+ if (Opts.C11)
+ Builder.defineMacro("__STDC_NO_THREADS__");
}
public:
@@ -673,9 +679,11 @@ protected:
DefineStd(Builder, "unix", Opts);
Builder.defineMacro("_IBMR2");
Builder.defineMacro("_POWER");
+ Builder.defineMacro("__THW_BIG_ENDIAN__");
Builder.defineMacro("_AIX");
Builder.defineMacro("__TOS_AIX__");
+ Builder.defineMacro("__HOS_AIX__");
if (Opts.C11) {
Builder.defineMacro("__STDC_NO_ATOMICS__");
@@ -736,7 +744,6 @@ public:
// AIX sets FLT_EVAL_METHOD to be 1.
unsigned getFloatEvalMethod() const override { return 1; }
- bool hasInt128Type() const override { return false; }
bool defaultsToAIXPowerAlignment() const override { return true; }
};
@@ -796,7 +803,6 @@ public:
this->UseZeroLengthBitfieldAlignment = true;
this->UseLeadingZeroLengthBitfield = false;
this->ZeroLengthBitfieldBoundary = 32;
- this->DefaultAlignForAttributeAligned = 128;
}
};
@@ -884,6 +890,9 @@ protected:
// Required by the libc++ locale support.
if (Opts.CPlusPlus)
Builder.defineMacro("_GNU_SOURCE");
+ Builder.defineMacro("__Fuchsia_API_level__", Twine(Opts.FuchsiaAPILevel));
+ this->PlatformName = "fuchsia";
+ this->PlatformMinVersion = VersionTuple(Opts.FuchsiaAPILevel);
}
public:
@@ -943,6 +952,7 @@ class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo
void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
MacroBuilder &Builder) const final {
WebAssemblyOSTargetInfo<Target>::getOSDefines(Opts, Triple, Builder);
+ DefineStd(Builder, "unix", Opts);
Builder.defineMacro("__EMSCRIPTEN__");
if (Opts.POSIXThreads)
Builder.defineMacro("__EMSCRIPTEN_PTHREADS__");
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 59656888e25f..c3c61ed443ca 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -73,6 +73,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasROPProtect = true;
} else if (Feature == "+privileged") {
HasPrivileged = true;
+ } else if (Feature == "+isa-v206-instructions") {
+ IsISA2_06 = true;
} else if (Feature == "+isa-v207-instructions") {
IsISA2_07 = true;
} else if (Feature == "+isa-v30-instructions") {
@@ -236,6 +238,15 @@ static void defineXLCompatMacros(MacroBuilder &Builder) {
Builder.defineMacro("__frsqrtes", "__builtin_ppc_frsqrtes");
Builder.defineMacro("__fsqrt", "__builtin_ppc_fsqrt");
Builder.defineMacro("__fsqrts", "__builtin_ppc_fsqrts");
+ Builder.defineMacro("__addex", "__builtin_ppc_addex");
+ Builder.defineMacro("__cmplxl", "__builtin_complex");
+ Builder.defineMacro("__compare_exp_uo", "__builtin_ppc_compare_exp_uo");
+ Builder.defineMacro("__compare_exp_lt", "__builtin_ppc_compare_exp_lt");
+ Builder.defineMacro("__compare_exp_gt", "__builtin_ppc_compare_exp_gt");
+ Builder.defineMacro("__compare_exp_eq", "__builtin_ppc_compare_exp_eq");
+ Builder.defineMacro("__test_data_class", "__builtin_ppc_test_data_class");
+ Builder.defineMacro("__swdiv", "__builtin_ppc_swdiv");
+ Builder.defineMacro("__swdivs", "__builtin_ppc_swdivs");
}
/// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific
@@ -243,7 +254,10 @@ static void defineXLCompatMacros(MacroBuilder &Builder) {
void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
- defineXLCompatMacros(Builder);
+ // We define the XLC compatibility macros only on AIX and Linux since XLC
+ // was never available on any other platforms.
+ if (getTriple().isOSAIX() || getTriple().isOSLinux())
+ defineXLCompatMacros(Builder);
// Target identification.
Builder.defineMacro("__ppc__");
@@ -256,6 +270,15 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__powerpc64__");
Builder.defineMacro("__ppc64__");
Builder.defineMacro("__PPC64__");
+ } else if (getTriple().isOSAIX()) {
+ // The XL compilers on AIX define _ARCH_PPC64 for both 32 and 64-bit modes.
+ Builder.defineMacro("_ARCH_PPC64");
+ }
+ if (getTriple().isOSAIX()) {
+ Builder.defineMacro("__THW_PPC__");
+ // Define __PPC and __powerpc for AIX XL C/C++ compatibility
+ Builder.defineMacro("__PPC");
+ Builder.defineMacro("__powerpc");
}
// Target properties.
@@ -367,8 +390,6 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__MMA__");
if (HasROPProtect)
Builder.defineMacro("__ROP_PROTECT__");
- if (HasPrivileged)
- Builder.defineMacro("__PRIVILEGED__");
if (HasP10Vector)
Builder.defineMacro("__POWER10_VECTOR__");
if (HasPCRelativeMemops)
@@ -416,11 +437,11 @@ static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags,
const std::vector<std::string> &FeaturesVec) {
// vsx was not explicitly turned off.
- if (llvm::find(FeaturesVec, "-vsx") == FeaturesVec.end())
+ if (!llvm::is_contained(FeaturesVec, "-vsx"))
return true;
auto FindVSXSubfeature = [&](StringRef Feature, StringRef Option) {
- if (llvm::find(FeaturesVec, Feature) != FeaturesVec.end()) {
+ if (llvm::is_contained(FeaturesVec, Feature)) {
Diags.Report(diag::err_opt_not_valid_with_opt) << Option << "-mno-vsx";
return true;
}
@@ -507,6 +528,13 @@ bool PPCTargetInfo::initFeatureMap(
.Case("e500", true)
.Default(false);
+ Features["isa-v206-instructions"] = llvm::StringSwitch<bool>(CPU)
+ .Case("ppc64le", true)
+ .Case("pwr9", true)
+ .Case("pwr8", true)
+ .Case("pwr7", true)
+ .Default(false);
+
Features["isa-v207-instructions"] = llvm::StringSwitch<bool>(CPU)
.Case("ppc64le", true)
.Case("pwr9", true)
@@ -534,28 +562,50 @@ bool PPCTargetInfo::initFeatureMap(
return false;
if (!(ArchDefs & ArchDefinePwr9) && (ArchDefs & ArchDefinePpcgr) &&
- llvm::find(FeaturesVec, "+float128") != FeaturesVec.end()) {
+ llvm::is_contained(FeaturesVec, "+float128")) {
// We have __float128 on PPC but not power 9 and above.
Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfloat128" << CPU;
return false;
}
- if (!(ArchDefs & ArchDefinePwr10) &&
- llvm::find(FeaturesVec, "+mma") != FeaturesVec.end()) {
- // We have MMA on PPC but not power 10 and above.
- Diags.Report(diag::err_opt_not_valid_with_opt) << "-mmma" << CPU;
- return false;
+ if (!(ArchDefs & ArchDefinePwr10)) {
+ if (llvm::find(FeaturesVec, "+mma") != FeaturesVec.end()) {
+ // MMA operations are not available pre-Power10.
+ Diags.Report(diag::err_opt_not_valid_with_opt) << "-mmma" << CPU;
+ return false;
+ }
+ if (llvm::find(FeaturesVec, "+pcrel") != FeaturesVec.end()) {
+ // PC-Relative instructions are not available pre-Power10,
+ // and these instructions also require prefixed instructions support.
+ Diags.Report(diag::err_opt_not_valid_without_opt)
+ << "-mpcrel"
+ << "-mcpu=pwr10 -mprefixed";
+ return false;
+ }
+ if (llvm::find(FeaturesVec, "+prefixed") != FeaturesVec.end()) {
+ // Prefixed instructions are not available pre-Power10.
+ Diags.Report(diag::err_opt_not_valid_without_opt) << "-mprefixed"
+ << "-mcpu=pwr10";
+ return false;
+ }
+ if (llvm::find(FeaturesVec, "+paired-vector-memops") != FeaturesVec.end()) {
+ // Paired vector memops are not available pre-Power10.
+ Diags.Report(diag::err_opt_not_valid_without_opt)
+ << "-mpaired-vector-memops"
+ << "-mcpu=pwr10";
+ return false;
+ }
}
if (!(ArchDefs & ArchDefinePwr8) &&
- llvm::find(FeaturesVec, "+rop-protect") != FeaturesVec.end()) {
+ llvm::is_contained(FeaturesVec, "+rop-protect")) {
// We can turn on ROP Protect on Power 8 and above.
Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU;
return false;
}
if (!(ArchDefs & ArchDefinePwr8) &&
- llvm::find(FeaturesVec, "+privileged") != FeaturesVec.end()) {
+ llvm::is_contained(FeaturesVec, "+privileged")) {
Diags.Report(diag::err_opt_not_valid_with_opt) << "-mprivileged" << CPU;
return false;
}
@@ -603,6 +653,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("mma", HasMMA)
.Case("rop-protect", HasROPProtect)
.Case("privileged", HasPrivileged)
+ .Case("isa-v206-instructions", IsISA2_06)
.Case("isa-v207-instructions", IsISA2_07)
.Case("isa-v30-instructions", IsISA3_0)
.Case("isa-v31-instructions", IsISA3_1)
@@ -753,7 +804,7 @@ static constexpr llvm::StringLiteral ValidCPUNames[] = {
{"powerpc64le"}, {"ppc64le"}, {"future"}};
bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
- return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+ return llvm::is_contained(ValidCPUNames, Name);
}
void PPCTargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index 7c14a4eb9410..f19d3ebbc066 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -74,6 +74,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool HasP10Vector = false;
bool HasPCRelativeMemops = false;
bool HasPrefixInstrs = false;
+ bool IsISA2_06 = false;
bool IsISA2_07 = false;
bool IsISA3_0 = false;
bool IsISA3_1 = false;
@@ -89,6 +90,7 @@ public:
LongDoubleWidth = LongDoubleAlign = 128;
LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble();
HasStrictFP = true;
+ HasIbm128 = true;
}
// Set the language option for altivec based on our value.
@@ -347,6 +349,7 @@ public:
: "u9__ieee128";
}
const char *getFloat128Mangling() const override { return "u9__ieee128"; }
+ const char *getIbm128Mangling() const override { return "g"; }
bool hasExtIntType() const override { return true; }
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 9705129b39d8..93562dde2f54 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -11,10 +11,12 @@
//===----------------------------------------------------------------------===//
#include "RISCV.h"
+#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/raw_ostream.h"
using namespace clang;
using namespace clang::targets;
@@ -105,7 +107,7 @@ std::string RISCVTargetInfo::convertConstraint(const char *&Constraint) const {
std::string R;
switch (*Constraint) {
case 'v':
- R = std::string("v");
+ R = std::string("^") + std::string(Constraint, 2);
Constraint += 1;
break;
default:
@@ -122,6 +124,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
bool Is64Bit = getTriple().getArch() == llvm::Triple::riscv64;
Builder.defineMacro("__riscv_xlen", Is64Bit ? "64" : "32");
StringRef CodeModel = getTargetOpts().CodeModel;
+ unsigned FLen = ISAInfo->getFLen();
if (CodeModel == "default")
CodeModel = "small";
@@ -142,17 +145,23 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__riscv_abi_rve");
Builder.defineMacro("__riscv_arch_test");
- Builder.defineMacro("__riscv_i", "2000000");
- if (HasM) {
- Builder.defineMacro("__riscv_m", "2000000");
+ for (auto &Extension : ISAInfo->getExtensions()) {
+ auto ExtName = Extension.first;
+ auto ExtInfo = Extension.second;
+ unsigned Version =
+ (ExtInfo.MajorVersion * 1000000) + (ExtInfo.MinorVersion * 1000);
+
+ Builder.defineMacro(Twine("__riscv_", ExtName), Twine(Version));
+ }
+
+ if (ISAInfo->hasExtension("m")) {
Builder.defineMacro("__riscv_mul");
Builder.defineMacro("__riscv_div");
Builder.defineMacro("__riscv_muldiv");
}
- if (HasA) {
- Builder.defineMacro("__riscv_a", "2000000");
+ if (ISAInfo->hasExtension("a")) {
Builder.defineMacro("__riscv_atomic");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
@@ -161,72 +170,17 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
}
- if (HasF || HasD) {
- Builder.defineMacro("__riscv_f", "2000000");
- Builder.defineMacro("__riscv_flen", HasD ? "64" : "32");
+ if (FLen) {
+ Builder.defineMacro("__riscv_flen", Twine(FLen));
Builder.defineMacro("__riscv_fdiv");
Builder.defineMacro("__riscv_fsqrt");
}
- if (HasD)
- Builder.defineMacro("__riscv_d", "2000000");
-
- if (HasC) {
- Builder.defineMacro("__riscv_c", "2000000");
+ if (ISAInfo->hasExtension("c"))
Builder.defineMacro("__riscv_compressed");
- }
-
- if (HasB) {
- Builder.defineMacro("__riscv_b", "93000");
- Builder.defineMacro("__riscv_bitmanip");
- }
- if (HasV) {
- Builder.defineMacro("__riscv_v", "10000");
+ if (ISAInfo->hasExtension("v"))
Builder.defineMacro("__riscv_vector");
- }
-
- if (HasZba)
- Builder.defineMacro("__riscv_zba", "93000");
-
- if (HasZbb)
- Builder.defineMacro("__riscv_zbb", "93000");
-
- if (HasZbc)
- Builder.defineMacro("__riscv_zbc", "93000");
-
- if (HasZbe)
- Builder.defineMacro("__riscv_zbe", "93000");
-
- if (HasZbf)
- Builder.defineMacro("__riscv_zbf", "93000");
-
- if (HasZbm)
- Builder.defineMacro("__riscv_zbm", "93000");
-
- if (HasZbp)
- Builder.defineMacro("__riscv_zbp", "93000");
-
- if (HasZbproposedc)
- Builder.defineMacro("__riscv_zbproposedc", "93000");
-
- if (HasZbr)
- Builder.defineMacro("__riscv_zbr", "93000");
-
- if (HasZbs)
- Builder.defineMacro("__riscv_zbs", "93000");
-
- if (HasZbt)
- Builder.defineMacro("__riscv_zbt", "93000");
-
- if (HasZfh)
- Builder.defineMacro("__riscv_zfh", "1000");
-
- if (HasZvamo)
- Builder.defineMacro("__riscv_zvamo", "10000");
-
- if (HasZvlsseg)
- Builder.defineMacro("__riscv_zvlsseg", "10000");
}
const Builtin::Info RISCVTargetInfo::BuiltinInfo[] = {
@@ -234,6 +188,11 @@ const Builtin::Info RISCVTargetInfo::BuiltinInfo[] = {
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
{#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
+#include "clang/Basic/BuiltinsRISCVVector.def"
+#define BUILTIN(ID, TYPE, ATTRS) \
+ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
+ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
#include "clang/Basic/BuiltinsRISCV.def"
};
@@ -255,81 +214,36 @@ bool RISCVTargetInfo::initFeatureMap(
/// Return true if has this feature, need to sync with handleTargetFeatures.
bool RISCVTargetInfo::hasFeature(StringRef Feature) const {
bool Is64Bit = getTriple().getArch() == llvm::Triple::riscv64;
- return llvm::StringSwitch<bool>(Feature)
- .Case("riscv", true)
- .Case("riscv32", !Is64Bit)
- .Case("riscv64", Is64Bit)
- .Case("64bit", Is64Bit)
- .Case("m", HasM)
- .Case("a", HasA)
- .Case("f", HasF)
- .Case("d", HasD)
- .Case("c", HasC)
- .Case("experimental-b", HasB)
- .Case("experimental-v", HasV)
- .Case("experimental-zba", HasZba)
- .Case("experimental-zbb", HasZbb)
- .Case("experimental-zbc", HasZbc)
- .Case("experimental-zbe", HasZbe)
- .Case("experimental-zbf", HasZbf)
- .Case("experimental-zbm", HasZbm)
- .Case("experimental-zbp", HasZbp)
- .Case("experimental-zbproposedc", HasZbproposedc)
- .Case("experimental-zbr", HasZbr)
- .Case("experimental-zbs", HasZbs)
- .Case("experimental-zbt", HasZbt)
- .Case("experimental-zfh", HasZfh)
- .Case("experimental-zvamo", HasZvamo)
- .Case("experimental-zvlsseg", HasZvlsseg)
- .Default(false);
+ auto Result = llvm::StringSwitch<Optional<bool>>(Feature)
+ .Case("riscv", true)
+ .Case("riscv32", !Is64Bit)
+ .Case("riscv64", Is64Bit)
+ .Case("64bit", Is64Bit)
+ .Default(None);
+ if (Result.hasValue())
+ return Result.getValue();
+
+ if (ISAInfo->isSupportedExtensionFeature(Feature))
+ return ISAInfo->hasExtension(Feature);
+
+ return false;
}
/// Perform initialization based on the user configured set of features.
bool RISCVTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) {
- for (const auto &Feature : Features) {
- if (Feature == "+m")
- HasM = true;
- else if (Feature == "+a")
- HasA = true;
- else if (Feature == "+f")
- HasF = true;
- else if (Feature == "+d")
- HasD = true;
- else if (Feature == "+c")
- HasC = true;
- else if (Feature == "+experimental-b")
- HasB = true;
- else if (Feature == "+experimental-v")
- HasV = true;
- else if (Feature == "+experimental-zba")
- HasZba = true;
- else if (Feature == "+experimental-zbb")
- HasZbb = true;
- else if (Feature == "+experimental-zbc")
- HasZbc = true;
- else if (Feature == "+experimental-zbe")
- HasZbe = true;
- else if (Feature == "+experimental-zbf")
- HasZbf = true;
- else if (Feature == "+experimental-zbm")
- HasZbm = true;
- else if (Feature == "+experimental-zbp")
- HasZbp = true;
- else if (Feature == "+experimental-zbproposedc")
- HasZbproposedc = true;
- else if (Feature == "+experimental-zbr")
- HasZbr = true;
- else if (Feature == "+experimental-zbs")
- HasZbs = true;
- else if (Feature == "+experimental-zbt")
- HasZbt = true;
- else if (Feature == "+experimental-zfh")
- HasZfh = true;
- else if (Feature == "+experimental-zvamo")
- HasZvamo = true;
- else if (Feature == "+experimental-zvlsseg")
- HasZvlsseg = true;
+ unsigned XLen = getTriple().isArch64Bit() ? 64 : 32;
+ auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, Features);
+ if (!ParseResult) {
+ std::string Buffer;
+ llvm::raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << ErrMsg.getMessage();
+ });
+ Diags.Report(diag::err_invalid_feature_combination) << OutputErrMsg.str();
+ return false;
+ } else {
+ ISAInfo = std::move(*ParseResult);
}
return true;
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index 7e0846581ca1..f7ffe9febcd0 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -17,6 +17,7 @@
#include "clang/Basic/TargetOptions.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/RISCVISAInfo.h"
namespace clang {
namespace targets {
@@ -25,28 +26,7 @@ namespace targets {
class RISCVTargetInfo : public TargetInfo {
protected:
std::string ABI, CPU;
- bool HasM = false;
- bool HasA = false;
- bool HasF = false;
- bool HasD = false;
- bool HasC = false;
- bool HasB = false;
- bool HasV = false;
- bool HasZba = false;
- bool HasZbb = false;
- bool HasZbc = false;
- bool HasZbe = false;
- bool HasZbf = false;
- bool HasZbm = false;
- bool HasZbp = false;
- bool HasZbproposedc = false;
- bool HasZbr = false;
- bool HasZbs = false;
- bool HasZbt = false;
- bool HasZfh = false;
- bool HasZvamo = false;
- bool HasZvlsseg = false;
-
+ std::unique_ptr<llvm::RISCVISAInfo> ISAInfo;
static const Builtin::Info BuiltinInfo[];
public:
@@ -82,6 +62,11 @@ public:
const char *getClobbers() const override { return ""; }
+ StringRef getConstraintRegister(StringRef Constraint,
+ StringRef Expression) const override {
+ return Expression;
+ }
+
ArrayRef<const char *> getGCCRegNames() const override;
int getEHDataRegisterNumber(unsigned RegNo) const override {
@@ -138,7 +123,7 @@ public:
void setMaxAtomicWidth() override {
MaxAtomicPromoteWidth = 128;
- if (HasA)
+ if (ISAInfo->hasExtension("a"))
MaxAtomicInlineWidth = 32;
}
};
@@ -167,7 +152,7 @@ public:
void setMaxAtomicWidth() override {
MaxAtomicPromoteWidth = 128;
- if (HasA)
+ if (ISAInfo->hasExtension("a"))
MaxAtomicInlineWidth = 64;
}
};
diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp
index 9b7aab85314a..09d482a8b9ef 100644
--- a/clang/lib/Basic/Targets/SPIR.cpp
+++ b/clang/lib/Basic/Targets/SPIR.cpp
@@ -1,4 +1,4 @@
-//===--- SPIR.cpp - Implement SPIR target feature support -----------------===//
+//===--- SPIR.cpp - Implement SPIR and SPIR-V target feature support ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements SPIR TargetInfo objects.
+// This file implements SPIR and SPIR-V TargetInfo objects.
//
//===----------------------------------------------------------------------===//
@@ -32,3 +32,20 @@ void SPIR64TargetInfo::getTargetDefines(const LangOptions &Opts,
SPIRTargetInfo::getTargetDefines(Opts, Builder);
DefineStd(Builder, "SPIR64", Opts);
}
+
+void SPIRVTargetInfo::getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ DefineStd(Builder, "SPIRV", Opts);
+}
+
+void SPIRV32TargetInfo::getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ SPIRVTargetInfo::getTargetDefines(Opts, Builder);
+ DefineStd(Builder, "SPIRV32", Opts);
+}
+
+void SPIRV64TargetInfo::getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const {
+ SPIRVTargetInfo::getTargetDefines(Opts, Builder);
+ DefineStd(Builder, "SPIRV64", Opts);
+}
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 50f34abd6630..704b1843dfed 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -1,4 +1,4 @@
-//===--- SPIR.h - Declare SPIR target feature support -----------*- C++ -*-===//
+//===--- SPIR.h - Declare SPIR and SPIR-V target feature support *- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares SPIR TargetInfo objects.
+// This file declares SPIR and SPIR-V TargetInfo objects.
//
//===----------------------------------------------------------------------===//
@@ -21,6 +21,7 @@
namespace clang {
namespace targets {
+// Used by both the SPIR and SPIR-V targets.
static const unsigned SPIRDefIsPrivMap[] = {
0, // Default
1, // opencl_global
@@ -44,6 +45,7 @@ static const unsigned SPIRDefIsPrivMap[] = {
0 // ptr64
};
+// Used by both the SPIR and SPIR-V targets.
static const unsigned SPIRDefIsGenMap[] = {
4, // Default
// OpenCL address space values for this map are dummy and they can't be used
@@ -67,14 +69,15 @@ static const unsigned SPIRDefIsGenMap[] = {
0 // ptr64
};
-class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo {
-public:
- SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+// Base class for SPIR and SPIR-V target info.
+class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
+protected:
+ BaseSPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
: TargetInfo(Triple) {
assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
- "SPIR target must use unknown OS");
+ "SPIR(-V) target must use unknown OS");
assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
- "SPIR target must use unknown environment type");
+ "SPIR(-V) target must use unknown environment type");
TLSSupported = false;
VLASupported = false;
LongWidth = LongAlign = 64;
@@ -87,13 +90,7 @@ public:
NoAsmVariants = true;
}
- void getTargetDefines(const LangOptions &Opts,
- MacroBuilder &Builder) const override;
-
- bool hasFeature(StringRef Feature) const override {
- return Feature == "spir";
- }
-
+public:
// SPIR supports the half type and the only llvm intrinsic allowed in SPIR is
// memcpy as per section 3 of the SPIR spec.
bool useFP16ConversionIntrinsics() const override { return false; }
@@ -149,7 +146,7 @@ public:
void setSupportedOpenCLOpts() override {
// Assume all OpenCL extensions and optional core features are supported
- // for SPIR since it is a generic target.
+ // for SPIR and SPIR-V since they are generic targets.
supportAllOpenCLOpts();
}
@@ -158,6 +155,24 @@ public:
bool hasInt128Type() const override { return false; }
};
+class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public BaseSPIRTargetInfo {
+public:
+ SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+ : BaseSPIRTargetInfo(Triple, Opts) {
+ assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
+ "SPIR target must use unknown OS");
+ assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
+ "SPIR target must use unknown environment type");
+ }
+
+ void getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const override;
+
+ bool hasFeature(StringRef Feature) const override {
+ return Feature == "spir";
+ }
+};
+
class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
public:
SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
@@ -187,6 +202,55 @@ public:
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
};
+
+class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRTargetInfo {
+public:
+ SPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+ : BaseSPIRTargetInfo(Triple, Opts) {
+ assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
+ "SPIR-V target must use unknown OS");
+ assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
+ "SPIR-V target must use unknown environment type");
+ }
+
+ void getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const override;
+
+ bool hasFeature(StringRef Feature) const override {
+ return Feature == "spirv";
+ }
+};
+
+class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public SPIRVTargetInfo {
+public:
+ SPIRV32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+ : SPIRVTargetInfo(Triple, Opts) {
+ PointerWidth = PointerAlign = 32;
+ SizeType = TargetInfo::UnsignedInt;
+ PtrDiffType = IntPtrType = TargetInfo::SignedInt;
+ resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024");
+ }
+
+ void getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public SPIRVTargetInfo {
+public:
+ SPIRV64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
+ : SPIRVTargetInfo(Triple, Opts) {
+ PointerWidth = PointerAlign = 64;
+ SizeType = TargetInfo::UnsignedLong;
+ PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
+ "v96:128-v192:256-v256:256-v512:512-v1024:1024");
+ }
+
+ void getTargetDefines(const LangOptions &Opts,
+ MacroBuilder &Builder) const override;
+};
+
} // namespace targets
} // namespace clang
#endif // LLVM_CLANG_LIB_BASIC_TARGETS_SPIR_H
diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h
index 07844abafe11..22a1621fcb9f 100644
--- a/clang/lib/Basic/Targets/Sparc.h
+++ b/clang/lib/Basic/Targets/Sparc.h
@@ -39,10 +39,8 @@ public:
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override {
// Check if software floating point is enabled
- auto Feature = llvm::find(Features, "+soft-float");
- if (Feature != Features.end()) {
+ if (llvm::is_contained(Features, "+soft-float"))
SoftFloat = true;
- }
return true;
}
void getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index b749c3f75d18..d3e3ed50dd47 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -46,7 +46,17 @@ public:
LongDoubleFormat = &llvm::APFloat::IEEEquad();
DefaultAlignForAttributeAligned = 64;
MinGlobalAlign = 16;
- resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
+ if (Triple.isOSzOS()) {
+ // All vector types are default aligned on an 8-byte boundary, even if the
+ // vector facility is not available. That is different from Linux.
+ MaxVectorAlign = 64;
+ // Compared to Linux/ELF, the data layout differs only in some details:
+ // - name mangling is GOFF
+ // - 128 bit vector types are 64 bit aligned
+ resetDataLayout(
+ "E-m:l-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64");
+ } else
+ resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64");
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
HasStrictFP = true;
}
@@ -129,7 +139,7 @@ public:
HasVector &= !SoftFloat;
// If we use the vector ABI, vector types are 64-bit aligned.
- if (HasVector) {
+ if (HasVector && !getTriple().isOSzOS()) {
MaxVectorAlign = 64;
resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
"-v128:64-a:8:16-n32:64");
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index 7ef79849cb75..4cba861f61d2 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -46,6 +46,7 @@ bool WebAssemblyTargetInfo::setABI(const std::string &Name) {
bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("simd128", SIMDLevel >= SIMD128)
+ .Case("relaxed-simd", SIMDLevel >= RelaxedSIMD)
.Case("nontrapping-fptoint", HasNontrappingFPToInt)
.Case("sign-ext", HasSignExt)
.Case("exception-handling", HasExceptionHandling)
@@ -59,7 +60,7 @@ bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const {
}
bool WebAssemblyTargetInfo::isValidCPUName(StringRef Name) const {
- return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames);
+ return llvm::is_contained(ValidCPUNames, Name);
}
void WebAssemblyTargetInfo::fillValidCPUList(
@@ -72,6 +73,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
defineCPUMacros(Builder, "wasm", /*Tuning=*/false);
if (SIMDLevel >= SIMD128)
Builder.defineMacro("__wasm_simd128__");
+ if (SIMDLevel >= RelaxedSIMD)
+ Builder.defineMacro("__wasm_relaxed_simd__");
if (HasNontrappingFPToInt)
Builder.defineMacro("__wasm_nontrapping_fptoint__");
if (HasSignExt)
@@ -96,6 +99,9 @@ void WebAssemblyTargetInfo::setSIMDLevel(llvm::StringMap<bool> &Features,
SIMDEnum Level, bool Enabled) {
if (Enabled) {
switch (Level) {
+ case RelaxedSIMD:
+ Features["relaxed-simd"] = true;
+ LLVM_FALLTHROUGH;
case SIMD128:
Features["simd128"] = true;
LLVM_FALLTHROUGH;
@@ -109,6 +115,9 @@ void WebAssemblyTargetInfo::setSIMDLevel(llvm::StringMap<bool> &Features,
case NoSIMD:
case SIMD128:
Features["simd128"] = false;
+ LLVM_FALLTHROUGH;
+ case RelaxedSIMD:
+ Features["relaxed-simd"] = false;
break;
}
}
@@ -118,6 +127,8 @@ void WebAssemblyTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
bool Enabled) const {
if (Name == "simd128")
setSIMDLevel(Features, SIMD128, Enabled);
+ else if (Name == "relaxed-simd")
+ setSIMDLevel(Features, RelaxedSIMD, Enabled);
else
Features[Name] = Enabled;
}
@@ -149,6 +160,14 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
SIMDLevel = std::min(SIMDLevel, SIMDEnum(SIMD128 - 1));
continue;
}
+ if (Feature == "+relaxed-simd") {
+ SIMDLevel = std::max(SIMDLevel, RelaxedSIMD);
+ continue;
+ }
+ if (Feature == "-relaxed-simd") {
+ SIMDLevel = std::min(SIMDLevel, SIMDEnum(RelaxedSIMD - 1));
+ continue;
+ }
if (Feature == "+nontrapping-fptoint") {
HasNontrappingFPToInt = true;
continue;
diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index 4a5ba25c75e7..16534d3ef99b 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -27,6 +27,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
enum SIMDEnum {
NoSIMD,
SIMD128,
+ RelaxedSIMD,
} SIMDLevel = NoSIMD;
bool HasNontrappingFPToInt = false;
@@ -150,9 +151,11 @@ public:
const TargetOptions &Opts)
: WebAssemblyTargetInfo(T, Opts) {
if (T.isOSEmscripten())
- resetDataLayout("e-m:e-p:32:32-i64:64-f128:64-n32:64-S128-ni:1:10:20");
+ resetDataLayout("e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-f128:64-n32:64-"
+ "S128-ni:1:10:20");
else
- resetDataLayout("e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20");
+ resetDataLayout(
+ "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20");
}
protected:
@@ -172,9 +175,11 @@ public:
PtrDiffType = SignedLong;
IntPtrType = SignedLong;
if (T.isOSEmscripten())
- resetDataLayout("e-m:e-p:64:64-i64:64-f128:64-n32:64-S128-ni:1:10:20");
+ resetDataLayout("e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-f128:64-n32:64-"
+ "S128-ni:1:10:20");
else
- resetDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20");
+ resetDataLayout(
+ "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20");
}
protected:
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 9db96c20250f..5e3686893719 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -139,22 +139,28 @@ bool X86TargetInfo::initFeatureMap(
// Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
auto I = Features.find("sse4.2");
if (I != Features.end() && I->getValue() &&
- llvm::find(UpdatedFeaturesVec, "-popcnt") == UpdatedFeaturesVec.end())
+ !llvm::is_contained(UpdatedFeaturesVec, "-popcnt"))
Features["popcnt"] = true;
// Additionally, if SSE is enabled and mmx is not explicitly disabled,
// then enable MMX.
I = Features.find("sse");
if (I != Features.end() && I->getValue() &&
- llvm::find(UpdatedFeaturesVec, "-mmx") == UpdatedFeaturesVec.end())
+ !llvm::is_contained(UpdatedFeaturesVec, "-mmx"))
Features["mmx"] = true;
// Enable xsave if avx is enabled and xsave is not explicitly disabled.
I = Features.find("avx");
if (I != Features.end() && I->getValue() &&
- llvm::find(UpdatedFeaturesVec, "-xsave") == UpdatedFeaturesVec.end())
+ !llvm::is_contained(UpdatedFeaturesVec, "-xsave"))
Features["xsave"] = true;
+ // Enable CRC32 if SSE4.2 is enabled and CRC32 is not explicitly disabled.
+ I = Features.find("sse4.2");
+ if (I != Features.end() && I->getValue() &&
+ !llvm::is_contained(UpdatedFeaturesVec, "-crc32"))
+ Features["crc32"] = true;
+
return true;
}
@@ -231,8 +237,11 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512BF16 = true;
} else if (Feature == "+avx512er") {
HasAVX512ER = true;
+ } else if (Feature == "+avx512fp16") {
+ HasAVX512FP16 = true;
} else if (Feature == "+avx512pf") {
HasAVX512PF = true;
+ HasLegalHalfType = true;
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
@@ -327,6 +336,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasTSXLDTRK = true;
} else if (Feature == "+uintr") {
HasUINTR = true;
+ } else if (Feature == "+crc32") {
+ HasCRC32 = true;
+ } else if (Feature == "+x87") {
+ HasX87 = true;
}
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -356,6 +369,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
.Default(NoXOP);
XOPLevel = std::max(XOPLevel, XLevel);
}
+ // Turn on _float16 for x86 (feature sse2)
+ HasFloat16 = SSELevel >= SSE2;
// LLVM doesn't have a separate switch for fpmath, so only accept it if it
// matches the selected sse level.
@@ -368,6 +383,14 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
SimdDefaultAlign =
hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
+
+ if (!HasX87) {
+ if (LongDoubleFormat == &llvm::APFloat::x87DoubleExtended())
+ HasLongDouble = false;
+ if (getTriple().getArch() == llvm::Triple::x86)
+ HasFPReturn = false;
+ }
+
return true;
}
@@ -668,6 +691,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512BF16__");
if (HasAVX512ER)
Builder.defineMacro("__AVX512ER__");
+ if (HasAVX512FP16)
+ Builder.defineMacro("__AVX512FP16__");
if (HasAVX512PF)
Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
@@ -753,6 +778,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__TSXLDTRK__");
if (HasUINTR)
Builder.defineMacro("__UINTR__");
+ if (HasCRC32)
+ Builder.defineMacro("__CRC32__");
// Each case falls through to the previous one here.
switch (SSELevel) {
@@ -856,6 +883,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512vnni", true)
.Case("avx512bf16", true)
.Case("avx512er", true)
+ .Case("avx512fp16", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
.Case("avx512bitalg", true)
@@ -872,6 +900,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("clflushopt", true)
.Case("clwb", true)
.Case("clzero", true)
+ .Case("crc32", true)
.Case("cx16", true)
.Case("enqcmd", true)
.Case("f16c", true)
@@ -948,6 +977,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512bf16", HasAVX512BF16)
.Case("avx512er", HasAVX512ER)
+ .Case("avx512fp16", HasAVX512FP16)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bitalg", HasAVX512BITALG)
@@ -963,6 +993,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("clflushopt", HasCLFLUSHOPT)
.Case("clwb", HasCLWB)
.Case("clzero", HasCLZERO)
+ .Case("crc32", HasCRC32)
.Case("cx8", HasCX8)
.Case("cx16", HasCX16)
.Case("enqcmd", HasENQCMD)
@@ -1019,6 +1050,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("x86", true)
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
+ .Case("x87", HasX87)
.Case("xop", XOPLevel >= XOP)
.Case("xsave", HasXSAVE)
.Case("xsavec", HasXSAVEC)
@@ -1034,35 +1066,22 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
// X86TargetInfo::hasFeature for a somewhat comprehensive list).
bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const {
return llvm::StringSwitch<bool>(FeatureStr)
-#define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, true)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) .Case(STR, true)
#include "llvm/Support/X86TargetParser.def"
.Default(false);
}
static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
return llvm::StringSwitch<llvm::X86::ProcessorFeatures>(Name)
-#define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, llvm::X86::FEATURE_##ENUM)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
+ .Case(STR, llvm::X86::FEATURE_##ENUM)
+
#include "llvm/Support/X86TargetParser.def"
;
// Note, this function should only be used after ensuring the value is
// correct, so it asserts if the value is out of range.
}
-static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) {
- enum class FeatPriority {
-#define FEATURE(FEAT) FEAT,
-#include "clang/Basic/X86Target.def"
- };
- switch (Feat) {
-#define FEATURE(FEAT) \
- case llvm::X86::FEAT: \
- return static_cast<unsigned>(FeatPriority::FEAT);
-#include "clang/Basic/X86Target.def"
- default:
- llvm_unreachable("No Feature Priority for non-CPUSupports Features");
- }
-}
-
unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
// Valid CPUs have a 'key feature' that compares just better than its key
// feature.
@@ -1082,21 +1101,21 @@ bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
return llvm::StringSwitch<bool>(Name)
#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true)
#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true)
-#include "clang/Basic/X86Target.def"
+#include "llvm/Support/X86TargetParser.def"
.Default(false);
}
static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) {
return llvm::StringSwitch<StringRef>(Name)
#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME)
-#include "clang/Basic/X86Target.def"
+#include "llvm/Support/X86TargetParser.def"
.Default(Name);
}
char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const {
return llvm::StringSwitch<char>(CPUSpecificCPUDispatchNameDealias(Name))
#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING)
-#include "clang/Basic/X86Target.def"
+#include "llvm/Support/X86TargetParser.def"
.Default(0);
}
@@ -1105,7 +1124,7 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
StringRef WholeList =
llvm::StringSwitch<StringRef>(CPUSpecificCPUDispatchNameDealias(Name))
#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES)
-#include "clang/Basic/X86Target.def"
+#include "llvm/Support/X86TargetParser.def"
.Default("");
WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
}
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index fcaaf50624e9..b9b2ac79815b 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -92,6 +92,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512CD = false;
bool HasAVX512VPOPCNTDQ = false;
bool HasAVX512VNNI = false;
+ bool HasAVX512FP16 = false;
bool HasAVX512BF16 = false;
bool HasAVX512ER = false;
bool HasAVX512PF = false;
@@ -142,6 +143,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasSERIALIZE = false;
bool HasTSXLDTRK = false;
bool HasUINTR = false;
+ bool HasCRC32 = false;
+ bool HasX87 = false;
protected:
llvm::X86::CPUKind CPU = llvm::X86::CK_None;
@@ -410,8 +413,8 @@ public:
// Use fpret for all types.
RealTypeUsesObjCFPRet =
- ((1 << TargetInfo::Float) | (1 << TargetInfo::Double) |
- (1 << TargetInfo::LongDouble));
+ ((1 << (int)FloatModeKind::Float) | (1 << (int)FloatModeKind::Double) |
+ (1 << (int)FloatModeKind::LongDouble));
// x86-32 has atomics up to 8 bytes
MaxAtomicPromoteWidth = 64;
@@ -690,7 +693,7 @@ public:
"64-i64:64-f80:128-n8:16:32:64-S128");
// Use fpret only for long double.
- RealTypeUsesObjCFPRet = (1 << TargetInfo::LongDouble);
+ RealTypeUsesObjCFPRet = (1 << (int)FloatModeKind::LongDouble);
// Use fp2ret for _Complex long double.
ComplexLongDoubleUsesFP2Ret = true;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 481f5347d978..648c7b3df8ed 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -38,6 +38,7 @@
#include "llvm/LTO/LTOBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/StandardInstrumentations.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -147,6 +147,14 @@ class EmitAssemblyHelper {
return F;
}
+ void
+ RunOptimizationPipeline(BackendAction Action,
+ std::unique_ptr<raw_pwrite_stream> &OS,
+ std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS);
+ void RunCodegenPipeline(BackendAction Action,
+ std::unique_ptr<raw_pwrite_stream> &OS,
+ std::unique_ptr<llvm::ToolOutputFile> &DwoOS);
+
public:
EmitAssemblyHelper(DiagnosticsEngine &_Diags,
const HeaderSearchOptions &HeaderSearchOpts,
@@ -164,11 +172,16 @@ public:
std::unique_ptr<TargetMachine> TM;
+ // Emit output using the legacy pass manager for the optimization pipeline.
+ // This will be removed soon when using the legacy pass manager for the
+ // optimization pipeline is no longer supported.
+ void EmitAssemblyWithLegacyPassManager(BackendAction Action,
+ std::unique_ptr<raw_pwrite_stream> OS);
+
+ // Emit output using the new pass manager for the optimization pipeline. This
+ // is the default.
void EmitAssembly(BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS);
-
- void EmitAssemblyWithNewPassManager(BackendAction Action,
- std::unique_ptr<raw_pwrite_stream> OS);
};
// We need this wrapper to access LangOpts and CGOpts from extension functions
@@ -234,6 +247,8 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) {
Opts.InlineBoolFlag = CGOpts.SanitizeCoverageInlineBoolFlag;
Opts.PCTable = CGOpts.SanitizeCoveragePCTable;
Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth;
+ Opts.TraceLoads = CGOpts.SanitizeCoverageTraceLoads;
+ Opts.TraceStores = CGOpts.SanitizeCoverageTraceStores;
return Opts;
}
@@ -474,6 +489,11 @@ static CodeGenFileType getCodeGenFileType(BackendAction Action) {
}
}
+static bool actionRequiresCodeGen(BackendAction Action) {
+ return Action != Backend_EmitNothing && Action != Backend_EmitBC &&
+ Action != Backend_EmitLL;
+}
+
static bool initTargetOptions(DiagnosticsEngine &Diags,
llvm::TargetOptions &Options,
const CodeGenOptions &CodeGenOpts,
@@ -539,6 +559,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
Options.NoNaNsFPMath = LangOpts.NoHonorNaNs;
Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
Options.UnsafeFPMath = LangOpts.UnsafeFPMath;
+ Options.ApproxFuncFPMath = LangOpts.ApproxFunc;
Options.BBSections =
llvm::StringSwitch<llvm::BasicBlockSection>(CodeGenOpts.BBSections)
@@ -576,10 +597,25 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI;
- Options.PseudoProbeForProfiling = CodeGenOpts.PseudoProbeForProfiling;
Options.ValueTrackingVariableLocations =
CodeGenOpts.ValueTrackingVariableLocations;
Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex;
+ Options.LoopAlignment = CodeGenOpts.LoopAlignment;
+
+ switch (CodeGenOpts.getSwiftAsyncFramePointer()) {
+ case CodeGenOptions::SwiftAsyncFramePointerKind::Auto:
+ Options.SwiftAsyncFramePointer =
+ SwiftAsyncFramePointerMode::DeploymentBased;
+ break;
+
+ case CodeGenOptions::SwiftAsyncFramePointerKind::Always:
+ Options.SwiftAsyncFramePointer = SwiftAsyncFramePointerMode::Always;
+ break;
+
+ case CodeGenOptions::SwiftAsyncFramePointerKind::Never:
+ Options.SwiftAsyncFramePointer = SwiftAsyncFramePointerMode::Never;
+ break;
+ }
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
@@ -942,15 +978,13 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
return true;
}
-void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
- std::unique_ptr<raw_pwrite_stream> OS) {
+void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager(
+ BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) {
TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr);
setCommandLineOpts(CodeGenOpts);
- bool UsesCodeGen = (Action != Backend_EmitNothing &&
- Action != Backend_EmitBC &&
- Action != Backend_EmitLL);
+ bool UsesCodeGen = actionRequiresCodeGen(Action);
CreateTargetMachine(UsesCodeGen);
if (UsesCodeGen && !TM)
@@ -977,6 +1011,12 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
CreatePasses(PerModulePasses, PerFunctionPasses);
+ // Add a verifier pass if requested. We don't have to do this if the action
+ // requires code generation because there will already be a verifier pass in
+ // the code-generation pipeline.
+ if (!UsesCodeGen && CodeGenOpts.VerifyModule)
+ PerModulePasses.add(createVerifierPass());
+
legacy::PassManager CodeGenPasses;
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
@@ -1069,16 +1109,16 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
DwoOS->keep();
}
-static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
+static OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
switch (Opts.OptimizationLevel) {
default:
llvm_unreachable("Invalid optimization level!");
case 0:
- return PassBuilder::OptimizationLevel::O0;
+ return OptimizationLevel::O0;
case 1:
- return PassBuilder::OptimizationLevel::O1;
+ return OptimizationLevel::O1;
case 2:
switch (Opts.OptimizeSize) {
@@ -1086,17 +1126,17 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) {
llvm_unreachable("Invalid optimization level for size!");
case 0:
- return PassBuilder::OptimizationLevel::O2;
+ return OptimizationLevel::O2;
case 1:
- return PassBuilder::OptimizationLevel::Os;
+ return OptimizationLevel::Os;
case 2:
- return PassBuilder::OptimizationLevel::Oz;
+ return OptimizationLevel::Oz;
}
case 3:
- return PassBuilder::OptimizationLevel::O3;
+ return OptimizationLevel::O3;
}
}
@@ -1104,7 +1144,7 @@ static void addSanitizers(const Triple &TargetTriple,
const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts, PassBuilder &PB) {
PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level) {
+ OptimizationLevel Level) {
if (CodeGenOpts.hasSanitizeCoverage()) {
auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
MPM.addPass(ModuleSanitizerCoveragePass(
@@ -1118,11 +1158,11 @@ static void addSanitizers(const Triple &TargetTriple,
bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
MPM.addPass(
- MemorySanitizerPass({TrackOrigins, Recover, CompileKernel}));
+ ModuleMemorySanitizerPass({TrackOrigins, Recover, CompileKernel}));
FunctionPassManager FPM;
FPM.addPass(
MemorySanitizerPass({TrackOrigins, Recover, CompileKernel}));
- if (Level != PassBuilder::OptimizationLevel::O0) {
+ if (Level != OptimizationLevel::O0) {
// MemorySanitizer inserts complex instrumentation that mostly
// follows the logic of the original code, but operates on
// "shadow" values. It can benefit from re-running some
@@ -1141,26 +1181,24 @@ static void addSanitizers(const Triple &TargetTriple,
MSanPass(SanitizerKind::KernelMemory, true);
if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
- MPM.addPass(ThreadSanitizerPass());
+ MPM.addPass(ModuleThreadSanitizerPass());
MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
}
auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) {
if (LangOpts.Sanitize.has(Mask)) {
- bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
- bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
- bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
+ bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts);
bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator;
llvm::AsanDtorKind DestructorKind =
CodeGenOpts.getSanitizeAddressDtor();
- llvm::AsanDetectStackUseAfterReturnMode UseAfterReturn =
- CodeGenOpts.getSanitizeAddressUseAfterReturn();
+ AddressSanitizerOptions Opts;
+ Opts.CompileKernel = CompileKernel;
+ Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
+ Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
+ Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
MPM.addPass(ModuleAddressSanitizerPass(
- CompileKernel, Recover, ModuleUseAfterScope, UseOdrIndicator,
- DestructorKind));
- MPM.addPass(createModuleToFunctionPassAdaptor(AddressSanitizerPass(
- CompileKernel, Recover, UseAfterScope, UseAfterReturn)));
+ Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
}
};
ASanPass(SanitizerKind::Address, false);
@@ -1170,8 +1208,8 @@ static void addSanitizers(const Triple &TargetTriple,
if (LangOpts.Sanitize.has(Mask)) {
bool Recover = CodeGenOpts.SanitizeRecover.has(Mask);
MPM.addPass(HWAddressSanitizerPass(
- CompileKernel, Recover,
- /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0));
+ {CompileKernel, Recover,
+ /*DisableOptimization=*/CodeGenOpts.OptimizationLevel == 0}));
}
};
HWASanPass(SanitizerKind::HWAddress, false);
@@ -1183,29 +1221,9 @@ static void addSanitizers(const Triple &TargetTriple,
});
}
-/// A clean version of `EmitAssembly` that uses the new pass manager.
-///
-/// Not all features are currently supported in this system, but where
-/// necessary it falls back to the legacy pass manager to at least provide
-/// basic functionality.
-///
-/// This API is planned to have its functionality finished and then to replace
-/// `EmitAssembly` at some point in the future when the default switches.
-void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
- BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) {
- TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr);
- setCommandLineOpts(CodeGenOpts);
-
- bool RequiresCodeGen = (Action != Backend_EmitNothing &&
- Action != Backend_EmitBC &&
- Action != Backend_EmitLL);
- CreateTargetMachine(RequiresCodeGen);
-
- if (RequiresCodeGen && !TM)
- return;
- if (TM)
- TheModule->setDataLayout(TM->createDataLayout());
-
+void EmitAssemblyHelper::RunOptimizationPipeline(
+ BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS,
+ std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS) {
Optional<PGOOptions> PGOOpt;
if (CodeGenOpts.hasProfileIRInstr())
@@ -1260,6 +1278,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
"", PGOOptions::NoAction, PGOOptions::CSIRInstr,
CodeGenOpts.DebugInfoForProfiling);
}
+ if (TM)
+ TM->setPGOOption(PGOOpt);
PipelineTuningOptions PTO;
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
@@ -1303,9 +1323,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);
#include "llvm/Support/Extension.def"
- // Register the AA manager first so that our version is the one used.
- FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); });
-
// Register the target library analysis directly and give it a customized
// preset TLI.
Triple TargetTriple(TheModule->getTargetTriple());
@@ -1325,26 +1342,26 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (!CodeGenOpts.DisableLLVMPasses) {
// Map our optimization levels into one of the distinct levels used to
// configure the pipeline.
- PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
+ OptimizationLevel Level = mapToLevel(CodeGenOpts);
bool IsThinLTO = CodeGenOpts.PrepareForThinLTO;
bool IsLTO = CodeGenOpts.PrepareForLTO;
if (LangOpts.ObjCAutoRefCount) {
PB.registerPipelineStartEPCallback(
- [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
- if (Level != PassBuilder::OptimizationLevel::O0)
+ [](ModulePassManager &MPM, OptimizationLevel Level) {
+ if (Level != OptimizationLevel::O0)
MPM.addPass(
createModuleToFunctionPassAdaptor(ObjCARCExpandPass()));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
- if (Level != PassBuilder::OptimizationLevel::O0)
+ [](ModulePassManager &MPM, OptimizationLevel Level) {
+ if (Level != OptimizationLevel::O0)
MPM.addPass(ObjCARCAPElimPass());
});
PB.registerScalarOptimizerLateEPCallback(
- [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
- if (Level != PassBuilder::OptimizationLevel::O0)
+ [](FunctionPassManager &FPM, OptimizationLevel Level) {
+ if (Level != OptimizationLevel::O0)
FPM.addPass(ObjCARCOptPass());
});
}
@@ -1357,7 +1374,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// vtables so that codegen doesn't complain.
if (IsThinLTOPostLink)
PB.registerPipelineStartEPCallback(
- [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
+ [](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr,
/*ImportSummary=*/nullptr,
/*DropTypeTests=*/true));
@@ -1368,12 +1385,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
CodeGenOpts.InstrumentFunctionsAfterInlining ||
CodeGenOpts.InstrumentForProfiling) {
PB.registerPipelineStartEPCallback(
- [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
+ [](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(createModuleToFunctionPassAdaptor(
EntryExitInstrumenterPass(/*PostInlining=*/false)));
});
PB.registerOptimizerLastEPCallback(
- [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) {
+ [](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(createModuleToFunctionPassAdaptor(
EntryExitInstrumenterPass(/*PostInlining=*/true)));
});
@@ -1383,7 +1400,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// of the pipeline.
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
PB.registerScalarOptimizerLateEPCallback(
- [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
+ [](FunctionPassManager &FPM, OptimizationLevel Level) {
FPM.addPass(BoundsCheckingPass());
});
@@ -1394,15 +1411,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
- [Options](ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level) {
+ [Options](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(GCOVProfilerPass(*Options));
});
if (Optional<InstrProfOptions> Options =
getInstrProfOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
- [Options](ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level) {
+ [Options](ModulePassManager &MPM, OptimizationLevel Level) {
MPM.addPass(InstrProfiling(*Options, false));
});
@@ -1422,17 +1437,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
}
- // FIXME: We still use the legacy pass manager to do code generation. We
- // create that pass manager here and use it as needed below.
- legacy::PassManager CodeGenPasses;
- bool NeedCodeGen = false;
- std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
+ // Add a verifier pass if requested. We don't have to do this if the action
+ // requires code generation because there will already be a verifier pass in
+ // the code-generation pipeline.
+ if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule)
+ MPM.addPass(VerifierPass());
- // Append any output we need to the pass manager.
switch (Action) {
- case Backend_EmitNothing:
- break;
-
case Backend_EmitBC:
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
@@ -1448,8 +1459,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// Emit a module summary by default for Regular LTO except for ld64
// targets
bool EmitLTOSummary =
- (CodeGenOpts.PrepareForLTO &&
- !CodeGenOpts.DisableLLVMPasses &&
+ (CodeGenOpts.PrepareForLTO && !CodeGenOpts.DisableLLVMPasses &&
llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
llvm::Triple::Apple);
if (EmitLTOSummary) {
@@ -1467,10 +1477,28 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists));
break;
+ default:
+ break;
+ }
+
+ // Now that we have all of the passes ready, run them.
+ PrettyStackTraceString CrashInfo("Optimizer");
+ MPM.run(*TheModule, MAM);
+}
+
+void EmitAssemblyHelper::RunCodegenPipeline(
+ BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS,
+ std::unique_ptr<llvm::ToolOutputFile> &DwoOS) {
+ // We still use the legacy PM to run the codegen pipeline since the new PM
+ // does not work with the codegen pipeline.
+ // FIXME: make the new PM work with the codegen pipeline.
+ legacy::PassManager CodeGenPasses;
+
+ // Append any output we need to the pass manager.
+ switch (Action) {
case Backend_EmitAssembly:
case Backend_EmitMCNull:
case Backend_EmitObj:
- NeedCodeGen = true;
CodeGenPasses.add(
createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
if (!CodeGenOpts.SplitDwarfOutput.empty()) {
@@ -1483,22 +1511,41 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// FIXME: Should we handle this error differently?
return;
break;
+ default:
+ return;
}
+ PrettyStackTraceString CrashInfo("Code generation");
+ CodeGenPasses.run(*TheModule);
+}
+
+/// A clean version of `EmitAssembly` that uses the new pass manager.
+///
+/// Not all features are currently supported in this system, but where
+/// necessary it falls back to the legacy pass manager to at least provide
+/// basic functionality.
+///
+/// This API is planned to have its functionality finished and then to replace
+/// `EmitAssembly` at some point in the future when the default switches.
+void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
+ std::unique_ptr<raw_pwrite_stream> OS) {
+ TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr);
+ setCommandLineOpts(CodeGenOpts);
+
+ bool RequiresCodeGen = actionRequiresCodeGen(Action);
+ CreateTargetMachine(RequiresCodeGen);
+
+ if (RequiresCodeGen && !TM)
+ return;
+ if (TM)
+ TheModule->setDataLayout(TM->createDataLayout());
+
// Before executing passes, print the final values of the LLVM options.
cl::PrintOptionValues();
- // Now that we have all of the passes ready, run them.
- {
- PrettyStackTraceString CrashInfo("Optimizer");
- MPM.run(*TheModule, MAM);
- }
-
- // Now if needed, run the legacy PM for codegen.
- if (NeedCodeGen) {
- PrettyStackTraceString CrashInfo("Code generation");
- CodeGenPasses.run(*TheModule);
- }
+ std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
+ RunOptimizationPipeline(Action, OS, ThinLinkOS);
+ RunCodegenPipeline(Action, OS, DwoOS);
if (ThinLinkOS)
ThinLinkOS->keep();
@@ -1526,7 +1573,7 @@ static void runThinLTOBackend(
return;
auto AddStream = [&](size_t Task) {
- return std::make_unique<lto::NativeObjectStream>(std::move(OS));
+ return std::make_unique<CachedFileStream>(std::move(OS));
};
lto::Config Conf;
if (CGOpts.SaveTempsFilePrefix != "") {
@@ -1622,16 +1669,17 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
// If we are performing a ThinLTO importing compile, load the function index
// into memory and pass it into runThinLTOBackend, which will run the
// function importer and invoke LTO passes.
- Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
- llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile,
- /*IgnoreEmptyThinLTOIndexFile*/true);
- if (!IndexOrErr) {
- logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
+ std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
+ if (Error E = llvm::getModuleSummaryIndexForFile(
+ CGOpts.ThinLTOIndexFile,
+ /*IgnoreEmptyThinLTOIndexFile*/ true)
+ .moveInto(CombinedIndex)) {
+ logAllUnhandledErrors(std::move(E), errs(),
"Error loading index file '" +
CGOpts.ThinLTOIndexFile + "': ");
return;
}
- std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
+
// A null CombinedIndex means we should skip ThinLTO compilation
// (LLVM will optionally ignore empty index files, returning null instead
// of an error).
@@ -1656,8 +1704,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M);
- if (!CGOpts.LegacyPassManager)
- AsmHelper.EmitAssemblyWithNewPassManager(Action, std::move(OS));
+ if (CGOpts.LegacyPassManager)
+ AsmHelper.EmitAssemblyWithLegacyPassManager(Action, std::move(OS));
else
AsmHelper.EmitAssembly(Action, std::move(OS));
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index b6722ad4e4f1..326ca8d50533 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -664,6 +664,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
LLVM_FALLTHROUGH;
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
Op = llvm::AtomicRMWInst::Nand;
break;
@@ -906,6 +907,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_and:
@@ -972,6 +974,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
@@ -1211,6 +1214,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
LLVM_FALLTHROUGH;
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
LibCallName = "__atomic_fetch_nand";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index f39a56f81d41..2da2014345d8 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -2695,8 +2695,8 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
size = varOffset;
// Conversely, we might have to prevent LLVM from inserting padding.
- } else if (CGM.getDataLayout().getABITypeAlignment(varTy)
- > varAlign.getQuantity()) {
+ } else if (CGM.getDataLayout().getABITypeAlignment(varTy) >
+ uint64_t(varAlign.getQuantity())) {
packed = true;
}
types.push_back(varTy);
@@ -2910,8 +2910,8 @@ llvm::Constant *CodeGenModule::getNSConcreteGlobalBlock() {
if (NSConcreteGlobalBlock)
return NSConcreteGlobalBlock;
- NSConcreteGlobalBlock =
- GetOrCreateLLVMGlobal("_NSConcreteGlobalBlock", Int8PtrTy, 0, nullptr);
+ NSConcreteGlobalBlock = GetOrCreateLLVMGlobal(
+ "_NSConcreteGlobalBlock", Int8PtrTy, LangAS::Default, nullptr);
configureBlocksRuntimeObject(*this, NSConcreteGlobalBlock);
return NSConcreteGlobalBlock;
}
@@ -2920,8 +2920,8 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() {
if (NSConcreteStackBlock)
return NSConcreteStackBlock;
- NSConcreteStackBlock =
- GetOrCreateLLVMGlobal("_NSConcreteStackBlock", Int8PtrTy, 0, nullptr);
+ NSConcreteStackBlock = GetOrCreateLLVMGlobal(
+ "_NSConcreteStackBlock", Int8PtrTy, LangAS::Default, nullptr);
configureBlocksRuntimeObject(*this, NSConcreteStackBlock);
return NSConcreteStackBlock;
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d9b2a5fe16be..849423c8b9ba 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3101,6 +3101,88 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(V);
}
+ case Builtin::BI__builtin_elementwise_abs: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Result;
+ if (Op0->getType()->isIntOrIntVectorTy())
+ Result = Builder.CreateBinaryIntrinsic(
+ llvm::Intrinsic::abs, Op0, Builder.getFalse(), nullptr, "elt.abs");
+ else
+ Result = Builder.CreateUnaryIntrinsic(llvm::Intrinsic::fabs, Op0, nullptr,
+ "elt.abs");
+ return RValue::get(Result);
+ }
+ case Builtin::BI__builtin_elementwise_max: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result;
+ if (Op0->getType()->isIntOrIntVectorTy()) {
+ QualType Ty = E->getArg(0)->getType();
+ if (auto *VecTy = Ty->getAs<VectorType>())
+ Ty = VecTy->getElementType();
+ Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
+ ? llvm::Intrinsic::smax
+ : llvm::Intrinsic::umax,
+ Op0, Op1, nullptr, "elt.max");
+ } else
+ Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
+ return RValue::get(Result);
+ }
+ case Builtin::BI__builtin_elementwise_min: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result;
+ if (Op0->getType()->isIntOrIntVectorTy()) {
+ QualType Ty = E->getArg(0)->getType();
+ if (auto *VecTy = Ty->getAs<VectorType>())
+ Ty = VecTy->getElementType();
+ Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
+ ? llvm::Intrinsic::smin
+ : llvm::Intrinsic::umin,
+ Op0, Op1, nullptr, "elt.min");
+ } else
+ Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_reduce_max: {
+ auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
+ if (IrTy->isIntOrIntVectorTy()) {
+ if (auto *VecTy = QT->getAs<VectorType>())
+ QT = VecTy->getElementType();
+ if (QT->isSignedIntegerType())
+ return llvm::Intrinsic::vector_reduce_smax;
+ else
+ return llvm::Intrinsic::vector_reduce_umax;
+ }
+ return llvm::Intrinsic::vector_reduce_fmax;
+ };
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Result = Builder.CreateUnaryIntrinsic(
+ GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr,
+ "rdx.min");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_reduce_min: {
+ auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) {
+ if (IrTy->isIntOrIntVectorTy()) {
+ if (auto *VecTy = QT->getAs<VectorType>())
+ QT = VecTy->getElementType();
+ if (QT->isSignedIntegerType())
+ return llvm::Intrinsic::vector_reduce_smin;
+ else
+ return llvm::Intrinsic::vector_reduce_umin;
+ }
+ return llvm::Intrinsic::vector_reduce_fmin;
+ };
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Result = Builder.CreateUnaryIntrinsic(
+ GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr,
+ "rdx.min");
+ return RValue::get(Result);
+ }
+
case Builtin::BI__builtin_matrix_transpose: {
const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
Value *MatValue = EmitScalarExpr(E->getArg(0));
@@ -5024,11 +5106,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
}
case Builtin::BIprintf:
- if (getTarget().getTriple().isNVPTX())
- return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
- if (getTarget().getTriple().getArch() == Triple::amdgcn &&
- getLangOpts().HIP)
- return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue);
+ if (getTarget().getTriple().isNVPTX() ||
+ getTarget().getTriple().isAMDGCN()) {
+ if (getLangOpts().OpenMPIsDevice)
+ return EmitOpenMPDevicePrintfCallExpr(E);
+ if (getTarget().getTriple().isNVPTX())
+ return EmitNVPTXDevicePrintfCallExpr(E);
+ if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
+ return EmitAMDGPUDevicePrintfCallExpr(E);
+ }
+
break;
case Builtin::BI__builtin_canonicalize:
case Builtin::BI__builtin_canonicalizef:
@@ -8399,7 +8486,7 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
/// access builtin. Only required if it can't be inferred from the base pointer
/// operand.
-llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) {
+llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
switch (TypeFlags.getMemEltType()) {
case SVETypeFlags::MemEltTyDefault:
return getEltType(TypeFlags);
@@ -8415,7 +8502,7 @@ llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) {
llvm_unreachable("Unknown MemEltType");
}
-llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) {
+llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
switch (TypeFlags.getEltType()) {
default:
llvm_unreachable("Invalid SVETypeFlag!");
@@ -8450,7 +8537,7 @@ llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) {
// Return the llvm predicate vector type corresponding to the specified element
// TypeFlags.
llvm::ScalableVectorType *
-CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) {
+CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {
switch (TypeFlags.getEltType()) {
default: llvm_unreachable("Unhandled SVETypeFlag!");
@@ -8519,7 +8606,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
}
}
-llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) {
+llvm::Value *
+CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {
Function *Ptrue =
CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
@@ -8563,7 +8651,7 @@ Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
return C;
}
-Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
auto *ResultTy = getSVEType(TypeFlags);
@@ -8615,7 +8703,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags,
: Builder.CreateSExt(Call, ResultTy);
}
-Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
auto *SrcDataTy = getSVEType(TypeFlags);
@@ -8670,7 +8758,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, Ops);
}
-Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
// The gather prefetches are overloaded on the vector input - this can either
@@ -8703,7 +8791,7 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, Ops);
}
-Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value*> &Ops,
unsigned IntID) {
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
@@ -8737,7 +8825,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, { Predicate, BasePtr });
}
-Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value*> &Ops,
unsigned IntID) {
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
@@ -8784,7 +8872,7 @@ Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags,
// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
// svpmullt_pair intrinsics, with the exception that their results are bitcast
// to a wider type.
-Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned BuiltinID) {
// Splat scalar operand to vector (intrinsics with _n infix)
@@ -8802,14 +8890,14 @@ Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags,
return EmitSVEReinterpret(Call, Ty);
}
-Value *CodeGenFunction::EmitSVEMovl(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,
ArrayRef<Value *> Ops, unsigned BuiltinID) {
llvm::Type *OverloadedTy = getSVEType(TypeFlags);
Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
}
-Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned BuiltinID) {
auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
@@ -8918,8 +9006,10 @@ static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
Ops.insert(Ops.begin(), SplatUndef);
}
-SmallVector<llvm::Type *, 2> CodeGenFunction::getSVEOverloadTypes(
- SVETypeFlags TypeFlags, llvm::Type *ResultType, ArrayRef<Value *> Ops) {
+SmallVector<llvm::Type *, 2>
+CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
+ llvm::Type *ResultType,
+ ArrayRef<Value *> Ops) {
if (TypeFlags.isOverloadNone())
return {};
@@ -9732,6 +9822,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F);
}
+ if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
+ llvm::Type *ResType = ConvertType(E->getType());
+ llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
+
+ bool IsSigned = BuiltinID == AArch64::BI__mulh;
+ Value *LHS =
+ Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
+ Value *RHS =
+ Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
+
+ Value *MulResult, *HigherBits;
+ if (IsSigned) {
+ MulResult = Builder.CreateNSWMul(LHS, RHS);
+ HigherBits = Builder.CreateAShr(MulResult, 64);
+ } else {
+ MulResult = Builder.CreateNUWMul(LHS, RHS);
+ HigherBits = Builder.CreateLShr(MulResult, 64);
+ }
+ HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
+
+ return HigherBits;
+ }
+
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
@@ -12044,6 +12157,22 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
Intrinsic::ID IID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
default: break;
+ case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
+ IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
+ IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
+ break;
case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
Subtract = true;
LLVM_FALLTHROUGH;
@@ -12107,22 +12236,30 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
// Handle any required masking.
Value *MaskFalseVal = nullptr;
switch (BuiltinID) {
+ case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
MaskFalseVal = Ops[0];
break;
+ case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
break;
+ case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
@@ -12153,9 +12290,21 @@ static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
Value *Res;
if (Rnd != 4) {
- Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
- Intrinsic::x86_avx512_vfmadd_f32 :
- Intrinsic::x86_avx512_vfmadd_f64;
+ Intrinsic::ID IID;
+
+ switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
+ case 16:
+ IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
+ break;
+ case 32:
+ IID = Intrinsic::x86_avx512_vfmadd_f32;
+ break;
+ case 64:
+ IID = Intrinsic::x86_avx512_vfmadd_f64;
+ break;
+ default:
+ llvm_unreachable("Unexpected size");
+ }
Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
{Ops[0], Ops[1], Ops[2], Ops[4]});
} else if (CGF.Builder.getIsFPConstrained()) {
@@ -12362,23 +12511,8 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
return EmitX86CpuSupports(FeatureStr);
}
-uint64_t
-CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
- // Processor features and mapping to processor feature value.
- uint64_t FeaturesMask = 0;
- for (const StringRef &FeatureStr : FeatureStrs) {
- unsigned Feature =
- StringSwitch<unsigned>(FeatureStr)
-#define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, llvm::X86::FEATURE_##ENUM)
-#include "llvm/Support/X86TargetParser.def"
- ;
- FeaturesMask |= (1ULL << Feature);
- }
- return FeaturesMask;
-}
-
Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
- return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
+ return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
}
llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
@@ -12461,6 +12595,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
SmallVector<Value*, 4> Ops;
bool IsMaskFCmp = false;
+ bool IsConjFMA = false;
// Find out if any arguments are required to be integer constant expressions.
unsigned ICEArguments = 0;
@@ -12691,6 +12826,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storeups512_mask:
return EmitX86MaskedStore(*this, Ops, Align(1));
+ case X86::BI__builtin_ia32_storesh128_mask:
case X86::BI__builtin_ia32_storess128_mask:
case X86::BI__builtin_ia32_storesd128_mask:
return EmitX86MaskedStore(*this, Ops, Align(1));
@@ -12742,14 +12878,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
case X86::BI__builtin_ia32_vfmaddss3:
case X86::BI__builtin_ia32_vfmaddsd3:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask:
case X86::BI__builtin_ia32_vfmaddss3_mask:
case X86::BI__builtin_ia32_vfmaddsd3_mask:
return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
@@ -12757,20 +12900,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddsd:
return EmitScalarFMAExpr(*this, E, Ops,
Constant::getNullValue(Ops[0]->getType()));
+ case X86::BI__builtin_ia32_vfmaddsh3_maskz:
case X86::BI__builtin_ia32_vfmaddss3_maskz:
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
+ case X86::BI__builtin_ia32_vfmaddsh3_mask3:
case X86::BI__builtin_ia32_vfmaddss3_mask3:
case X86::BI__builtin_ia32_vfmaddsd3_mask3:
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
+ case X86::BI__builtin_ia32_vfmsubsh3_mask3:
case X86::BI__builtin_ia32_vfmsubss3_mask3:
case X86::BI__builtin_ia32_vfmsubsd3_mask3:
return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
/*NegAcc*/ true);
+ case X86::BI__builtin_ia32_vfmaddph:
case X86::BI__builtin_ia32_vfmaddps:
case X86::BI__builtin_ia32_vfmaddpd:
+ case X86::BI__builtin_ia32_vfmaddph256:
case X86::BI__builtin_ia32_vfmaddps256:
case X86::BI__builtin_ia32_vfmaddpd256:
+ case X86::BI__builtin_ia32_vfmaddph512_mask:
+ case X86::BI__builtin_ia32_vfmaddph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddph512_mask3:
case X86::BI__builtin_ia32_vfmaddps512_mask:
case X86::BI__builtin_ia32_vfmaddps512_maskz:
case X86::BI__builtin_ia32_vfmaddps512_mask3:
@@ -12779,7 +12930,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddpd512_maskz:
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
+ case X86::BI__builtin_ia32_vfmsubph512_mask3:
return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
@@ -12826,6 +12982,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loaddqudi512_mask:
return EmitX86MaskedLoad(*this, Ops, Align(1));
+ case X86::BI__builtin_ia32_loadsh128_mask:
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
return EmitX86MaskedLoad(*this, Ops, Align(1));
@@ -13705,6 +13862,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectq_128:
case X86::BI__builtin_ia32_selectq_256:
case X86::BI__builtin_ia32_selectq_512:
+ case X86::BI__builtin_ia32_selectph_128:
+ case X86::BI__builtin_ia32_selectph_256:
+ case X86::BI__builtin_ia32_selectph_512:
case X86::BI__builtin_ia32_selectps_128:
case X86::BI__builtin_ia32_selectps_256:
case X86::BI__builtin_ia32_selectps_512:
@@ -13712,6 +13872,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_selectpd_256:
case X86::BI__builtin_ia32_selectpd_512:
return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
+ case X86::BI__builtin_ia32_selectsh_128:
case X86::BI__builtin_ia32_selectss_128:
case X86::BI__builtin_ia32_selectsd_128: {
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
@@ -13944,15 +14105,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
}
return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
}
+ case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask: {
unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
// Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
// otherwise keep the intrinsic.
if (CC != 4) {
- Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
- Intrinsic::x86_avx512_mask_sqrt_sd :
- Intrinsic::x86_avx512_mask_sqrt_ss;
+ Intrinsic::ID IID;
+
+ switch (BuiltinID) {
+ default:
+ llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_sqrtsh_round_mask:
+ IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
+ break;
+ case X86::BI__builtin_ia32_sqrtsd_round_mask:
+ IID = Intrinsic::x86_avx512_mask_sqrt_sd;
+ break;
+ case X86::BI__builtin_ia32_sqrtss_round_mask:
+ IID = Intrinsic::x86_avx512_mask_sqrt_ss;
+ break;
+ }
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
@@ -13974,6 +14148,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtph256:
+ case X86::BI__builtin_ia32_sqrtph:
+ case X86::BI__builtin_ia32_sqrtph512:
case X86::BI__builtin_ia32_sqrtps512:
case X86::BI__builtin_ia32_sqrtpd512: {
if (Ops.size() == 2) {
@@ -13981,9 +14158,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
// otherwise keep the intrinsic.
if (CC != 4) {
- Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
- Intrinsic::x86_avx512_sqrt_ps_512 :
- Intrinsic::x86_avx512_sqrt_pd_512;
+ Intrinsic::ID IID;
+
+ switch (BuiltinID) {
+ default:
+ llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_sqrtph512:
+ IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
+ break;
+ case X86::BI__builtin_ia32_sqrtps512:
+ IID = Intrinsic::x86_avx512_sqrt_ps_512;
+ break;
+ case X86::BI__builtin_ia32_sqrtpd512:
+ IID = Intrinsic::x86_avx512_sqrt_pd_512;
+ break;
+ }
return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
}
}
@@ -14151,28 +14340,40 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {Ops[0]});
}
case X86::BI__builtin_ia32_reduce_fadd_pd512:
- case X86::BI__builtin_ia32_reduce_fadd_ps512: {
+ case X86::BI__builtin_ia32_reduce_fadd_ps512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph512:
+ case X86::BI__builtin_ia32_reduce_fadd_ph256:
+ case X86::BI__builtin_ia32_reduce_fadd_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
Builder.getFastMathFlags().setAllowReassoc();
return Builder.CreateCall(F, {Ops[0], Ops[1]});
}
case X86::BI__builtin_ia32_reduce_fmul_pd512:
- case X86::BI__builtin_ia32_reduce_fmul_ps512: {
+ case X86::BI__builtin_ia32_reduce_fmul_ps512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph512:
+ case X86::BI__builtin_ia32_reduce_fmul_ph256:
+ case X86::BI__builtin_ia32_reduce_fmul_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
Builder.getFastMathFlags().setAllowReassoc();
return Builder.CreateCall(F, {Ops[0], Ops[1]});
}
case X86::BI__builtin_ia32_reduce_fmax_pd512:
- case X86::BI__builtin_ia32_reduce_fmax_ps512: {
+ case X86::BI__builtin_ia32_reduce_fmax_ps512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph512:
+ case X86::BI__builtin_ia32_reduce_fmax_ph256:
+ case X86::BI__builtin_ia32_reduce_fmax_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
Builder.getFastMathFlags().setNoNaNs();
return Builder.CreateCall(F, {Ops[0]});
}
case X86::BI__builtin_ia32_reduce_fmin_pd512:
- case X86::BI__builtin_ia32_reduce_fmin_ps512: {
+ case X86::BI__builtin_ia32_reduce_fmin_ps512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph512:
+ case X86::BI__builtin_ia32_reduce_fmin_ph256:
+ case X86::BI__builtin_ia32_reduce_fmin_ph128: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
Builder.getFastMathFlags().setNoNaNs();
@@ -14288,6 +14489,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclassps128_mask:
case X86::BI__builtin_ia32_fpclassps256_mask:
case X86::BI__builtin_ia32_fpclassps512_mask:
+ case X86::BI__builtin_ia32_fpclassph128_mask:
+ case X86::BI__builtin_ia32_fpclassph256_mask:
+ case X86::BI__builtin_ia32_fpclassph512_mask:
case X86::BI__builtin_ia32_fpclasspd128_mask:
case X86::BI__builtin_ia32_fpclasspd256_mask:
case X86::BI__builtin_ia32_fpclasspd512_mask: {
@@ -14299,6 +14503,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_fpclassph128_mask:
+ ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
+ break;
+ case X86::BI__builtin_ia32_fpclassph256_mask:
+ ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
+ break;
+ case X86::BI__builtin_ia32_fpclassph512_mask:
+ ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
+ break;
case X86::BI__builtin_ia32_fpclassps128_mask:
ID = Intrinsic::x86_avx512_fpclass_ps_128;
break;
@@ -14436,6 +14649,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpordps:
case X86::BI__builtin_ia32_cmpordpd:
return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
+ case X86::BI__builtin_ia32_cmpph128_mask:
+ case X86::BI__builtin_ia32_cmpph256_mask:
+ case X86::BI__builtin_ia32_cmpph512_mask:
case X86::BI__builtin_ia32_cmpps128_mask:
case X86::BI__builtin_ia32_cmpps256_mask:
case X86::BI__builtin_ia32_cmpps512_mask:
@@ -14777,7 +14993,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
- for (int i = 0; i < 6; ++i) {
+ for (int i = 0; i < 3; ++i) {
Value *Extract = Builder.CreateExtractValue(Call, i + 1);
Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
Ptr = Builder.CreateBitCast(
@@ -14793,7 +15009,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Value *Call =
Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
- for (int i = 0; i < 7; ++i) {
+ for (int i = 0; i < 4; ++i) {
Value *Extract = Builder.CreateExtractValue(Call, i + 1);
Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
Ptr = Builder.CreateBitCast(
@@ -14918,6 +15134,36 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Builder.SetInsertPoint(End);
return Builder.CreateExtractValue(Call, 0);
}
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask:
+ IsConjFMA = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_vfmaddcph512_mask: {
+ Intrinsic::ID IID = IsConjFMA
+ ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
+ : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ return EmitX86Select(*this, Ops[3], Call, Ops[0]);
+ }
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ IsConjFMA = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
+ Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
+ : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
+ return EmitX86Select(*this, And, Call, Ops[0]);
+ }
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
+ IsConjFMA = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
+ Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
+ : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ static constexpr int Mask[] = {0, 5, 6, 7};
+ return Builder.CreateShuffleVector(Call, Ops[2], Mask);
+ }
}
}
@@ -14925,8 +15171,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<Value*, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+ if (E->getArg(i)->getType()->isArrayType())
+ Ops.push_back(EmitArrayToPointerDecay(E->getArg(i)).getPointer());
+ else
+ Ops.push_back(EmitScalarExpr(E->getArg(i)));
+ }
Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15496,6 +15746,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Value *Rotate = Builder.CreateCall(F, {Ops[0], Ops[0], ShiftAmt});
return Builder.CreateAnd(Rotate, Ops[2]);
}
+ case PPC::BI__builtin_ppc_load2r: {
+ Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
+ Value *LoadIntrinsic = Builder.CreateCall(F, Ops);
+ return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
+ }
// FMA variations
case PPC::BI__builtin_vsx_xvmaddadp:
case PPC::BI__builtin_vsx_xvmaddasp:
@@ -15751,6 +16007,17 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
}
return Call;
}
+ if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
+ BuiltinID == PPC::BI__builtin_mma_build_acc) {
+ // Reverse the order of the operands for LE, so the
+ // same builtin call can be used on both LE and BE
+ // without the need for the programmer to swap operands.
+ // The operands are reversed starting from the second argument,
+ // the first operand is the pointer to the pair/accumulator
+ // that is being built.
+ if (getTarget().isLittleEndian())
+ std::reverse(Ops.begin() + 1, Ops.end());
+ }
bool Accumulate;
switch (BuiltinID) {
#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
@@ -15808,7 +16075,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
// store.
Value *LoadedVal = Pair.first.getScalarVal();
Builder.CreateStore(LoadedVal, OldValAddr);
- return Pair.second;
+ return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
}
case PPC::BI__builtin_ppc_fetch_and_add:
case PPC::BI__builtin_ppc_fetch_and_addlp: {
@@ -15907,6 +16174,21 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
*this, E, Intrinsic::sqrt,
Intrinsic::experimental_constrained_sqrt))
.getScalarVal();
+ case PPC::BI__builtin_ppc_test_data_class: {
+ llvm::Type *ArgType = EmitScalarExpr(E->getArg(0))->getType();
+ unsigned IntrinsicID;
+ if (ArgType->isDoubleTy())
+ IntrinsicID = Intrinsic::ppc_test_data_class_d;
+ else if (ArgType->isFloatTy())
+ IntrinsicID = Intrinsic::ppc_test_data_class_f;
+ else
+ llvm_unreachable("Invalid Argument Type");
+ return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops,
+ "test_data_class");
+ }
+ case PPC::BI__builtin_ppc_swdiv:
+ case PPC::BI__builtin_ppc_swdivs:
+ return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv");
}
}
@@ -15917,11 +16199,9 @@ Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
const CallExpr *E = nullptr) {
auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
auto *Call = CGF.Builder.CreateCall(F);
- Call->addAttribute(
- AttributeList::ReturnIndex,
+ Call->addRetAttr(
Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
- Call->addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithAlignment(Call->getContext(), Align(4)));
+ Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
if (!E)
return Call;
QualType BuiltinRetType = E->getType();
@@ -16197,6 +16477,74 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
}
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
+ Intrinsic::ID IID;
+ llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
+ ArgTy = llvm::Type::getFloatTy(getLLVMContext());
+ IID = Intrinsic::amdgcn_global_atomic_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
+ ArgTy = llvm::FixedVectorType::get(
+ llvm::Type::getHalfTy(getLLVMContext()), 2);
+ IID = Intrinsic::amdgcn_global_atomic_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
+ IID = Intrinsic::amdgcn_global_atomic_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
+ IID = Intrinsic::amdgcn_global_atomic_fmin;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
+ IID = Intrinsic::amdgcn_global_atomic_fmax;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
+ IID = Intrinsic::amdgcn_flat_atomic_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
+ IID = Intrinsic::amdgcn_flat_atomic_fmin;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
+ IID = Intrinsic::amdgcn_flat_atomic_fmax;
+ break;
+ }
+ llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Val = EmitScalarExpr(E->getArg(1));
+ llvm::Function *F =
+ CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
+ return Builder.CreateCall(F, {Addr, Val});
+ }
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: {
+ Intrinsic::ID IID;
+ llvm::Type *ArgTy;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
+ ArgTy = llvm::Type::getFloatTy(getLLVMContext());
+ IID = Intrinsic::amdgcn_ds_fadd;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
+ ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
+ IID = Intrinsic::amdgcn_ds_fadd;
+ break;
+ }
+ llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Val = EmitScalarExpr(E->getArg(1));
+ llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
+ llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
+ llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
+ llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
+ llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
+ return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
+ }
case AMDGPU::BI__builtin_amdgcn_read_exec: {
CallInst *CI = cast<CallInst>(
EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
@@ -17726,6 +18074,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_pmin_f32x4:
+ case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_pmax_f32x4:
+ case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
case WebAssembly::BI__builtin_wasm_floor_f32x4:
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
@@ -18012,6 +18376,93 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
return Builder.CreateCall(Callee, Ops);
}
+ case WebAssembly::BI__builtin_wasm_fma_f32x4:
+ case WebAssembly::BI__builtin_wasm_fms_f32x4:
+ case WebAssembly::BI__builtin_wasm_fma_f64x2:
+ case WebAssembly::BI__builtin_wasm_fms_f64x2: {
+ Value *A = EmitScalarExpr(E->getArg(0));
+ Value *B = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_fma_f32x4:
+ case WebAssembly::BI__builtin_wasm_fma_f64x2:
+ IntNo = Intrinsic::wasm_fma;
+ break;
+ case WebAssembly::BI__builtin_wasm_fms_f32x4:
+ case WebAssembly::BI__builtin_wasm_fms_f64x2:
+ IntNo = Intrinsic::wasm_fms;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
+ return Builder.CreateCall(Callee, {A, B, C});
+ }
+ case WebAssembly::BI__builtin_wasm_laneselect_i8x16:
+ case WebAssembly::BI__builtin_wasm_laneselect_i16x8:
+ case WebAssembly::BI__builtin_wasm_laneselect_i32x4:
+ case WebAssembly::BI__builtin_wasm_laneselect_i64x2: {
+ Value *A = EmitScalarExpr(E->getArg(0));
+ Value *B = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ Function *Callee =
+ CGM.getIntrinsic(Intrinsic::wasm_laneselect, A->getType());
+ return Builder.CreateCall(Callee, {A, B, C});
+ }
+ case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ Value *Indices = EmitScalarExpr(E->getArg(1));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
+ return Builder.CreateCall(Callee, {Src, Indices});
+ }
+ case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
+ case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
+ IntNo = Intrinsic::wasm_relaxed_min;
+ break;
+ case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
+ IntNo = Intrinsic::wasm_relaxed_max;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2:
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
+ IntNo = Intrinsic::wasm_relaxed_trunc_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
+ IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
+ break;
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_s_i32x4_f64x2:
+ IntNo = Intrinsic::wasm_relaxed_trunc_zero_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_relaxed_trunc_zero_u_i32x4_f64x2:
+ IntNo = Intrinsic::wasm_relaxed_trunc_zero_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Function *Callee = CGM.getIntrinsic(IntNo);
+ return Builder.CreateCall(Callee, {Vec});
+ }
default:
return nullptr;
}
@@ -18272,6 +18723,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
Intrinsic::ID ID = Intrinsic::not_intrinsic;
unsigned NF = 1;
+ constexpr unsigned TAIL_UNDISTURBED = 0;
// Required for overloaded intrinsics.
llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 88030fee501b..a1b4431ca8c4 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -177,7 +177,7 @@ public:
llvm::Function *finalizeModule() override;
};
-}
+} // end anonymous namespace
std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
if (CGM.getLangOpts().HIP)
@@ -237,11 +237,10 @@ llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
// hipError_t hipLaunchByPtr(char *);
return CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
- } else {
- // cudaError_t cudaLaunch(char *);
- return CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
}
+ // cudaError_t cudaLaunch(char *);
+ return CGM.CreateRuntimeFunction(
+ llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
}
llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
@@ -253,8 +252,8 @@ llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
}
llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
- auto CallbackFnTy = getCallbackFnTy();
- auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
+ auto *CallbackFnTy = getCallbackFnTy();
+ auto *RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
VoidPtrTy, CallbackFnTy->getPointerTo()};
return llvm::FunctionType::get(VoidTy, Params, false);
@@ -397,7 +396,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
QualType QT = cudaLaunchKernelFD->getType();
QualType CQT = QT.getCanonicalType();
llvm::Type *Ty = CGM.getTypes().ConvertType(CQT);
- llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty);
+ llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
const CGFunctionInfo &FI =
CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
@@ -473,7 +472,7 @@ static void replaceManagedVar(llvm::GlobalVariable *Var,
// variable with instructions.
for (auto &&Op : WorkItem) {
auto *CE = cast<llvm::ConstantExpr>(Op);
- auto *NewInst = llvm::createReplacementInstr(CE, I);
+ auto *NewInst = CE->getAsInstruction(I);
NewInst->replaceUsesOfWith(OldV, NewV);
OldV = CE;
NewV = NewInst;
@@ -590,7 +589,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
uint64_t VarSize =
CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
if (Info.Flags.isManaged()) {
- auto ManagedVar = new llvm::GlobalVariable(
+ auto *ManagedVar = new llvm::GlobalVariable(
CGM.getModule(), Var->getType(),
/*isConstant=*/false, Var->getLinkage(),
/*Init=*/Var->isDeclaration()
@@ -823,7 +822,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
GpuBinaryHandle,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
{
- auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+ auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
llvm::Constant *Zero =
llvm::Constant::getNullValue(HandleValue->getType());
llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
@@ -842,7 +841,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
CtorBuilder.SetInsertPoint(ExitBlock);
// Call __hip_register_globals(GpuBinaryHandle);
if (RegisterGlobalsFunc) {
- auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+ auto *HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
}
}
@@ -958,7 +957,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity(
GpuBinaryHandle->getAlignment()));
- auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
+ auto *HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
// There is only one HIP fat binary per linked module, however there are
// multiple destructor functions. Make sure the fat binary is unregistered
// only once.
@@ -1071,7 +1070,7 @@ void CGNVCUDARuntime::transformManagedVars() {
llvm::GlobalVariable *Var = Info.Var;
if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
Info.Flags.isManaged()) {
- auto ManagedVar = new llvm::GlobalVariable(
+ auto *ManagedVar = new llvm::GlobalVariable(
CGM.getModule(), Var->getType(),
/*isConstant=*/false, Var->getLinkage(),
/*Init=*/Var->isDeclaration()
@@ -1148,6 +1147,7 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
Var->setAlignment(CGM.getPointerAlign().getAsAlign());
Var->setDSOLocal(F->isDSOLocal());
Var->setVisibility(F->getVisibility());
+ CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
KernelHandles[F] = Var;
KernelStubs[Var] = F;
return Var;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 47a4ed35be85..d830a7e01709 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1271,12 +1271,26 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// perform the conversion.
if (auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(Ty)) {
if (auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
+ // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+ // vector, use a vector insert and bitcast the result.
+ bool NeedsBitcast = false;
+ auto PredType =
+ llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16);
+ llvm::Type *OrigType = Ty;
+ if (ScalableDst == PredType &&
+ FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) {
+ ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2);
+ NeedsBitcast = true;
+ }
if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
auto *Load = CGF.Builder.CreateLoad(Src);
auto *UndefVec = llvm::UndefValue::get(ScalableDst);
auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
- return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, Zero,
- "castScalableSve");
+ llvm::Value *Result = CGF.Builder.CreateInsertVector(
+ ScalableDst, UndefVec, Load, Zero, "castScalableSve");
+ if (NeedsBitcast)
+ Result = CGF.Builder.CreateBitCast(Result, OrigType);
+ return Result;
}
}
}
@@ -1550,11 +1564,11 @@ bool CodeGenModule::ReturnTypeUsesFPRet(QualType ResultType) {
default:
return false;
case BuiltinType::Float:
- return getTarget().useObjCFPRetForRealType(TargetInfo::Float);
+ return getTarget().useObjCFPRetForRealType(FloatModeKind::Float);
case BuiltinType::Double:
- return getTarget().useObjCFPRetForRealType(TargetInfo::Double);
+ return getTarget().useObjCFPRetForRealType(FloatModeKind::Double);
case BuiltinType::LongDouble:
- return getTarget().useObjCFPRetForRealType(TargetInfo::LongDouble);
+ return getTarget().useObjCFPRetForRealType(FloatModeKind::LongDouble);
}
}
@@ -1733,6 +1747,21 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
+static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs,
+ const Decl *Callee) {
+ if (!Callee)
+ return;
+
+ SmallVector<StringRef, 4> Attrs;
+
+ for (const AssumptionAttr *AA : Callee->specific_attrs<AssumptionAttr>())
+ AA->getAssumption().split(Attrs, ",");
+
+ if (!Attrs.empty())
+ FuncAttrs.addAttribute(llvm::AssumptionAttrKey,
+ llvm::join(Attrs.begin(), Attrs.end(), ","));
+}
+
bool CodeGenModule::MayDropFunctionReturn(const ASTContext &Context,
QualType ReturnType) {
// We can't just discard the return value for a record type with a
@@ -1814,6 +1843,8 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
FuncAttrs.addAttribute("no-infs-fp-math", "true");
if (LangOpts.NoHonorNaNs)
FuncAttrs.addAttribute("no-nans-fp-math", "true");
+ if (LangOpts.ApproxFunc)
+ FuncAttrs.addAttribute("approx-func-fp-math", "true");
if (LangOpts.UnsafeFPMath)
FuncAttrs.addAttribute("unsafe-fp-math", "true");
if (CodeGenOpts.SoftFloat)
@@ -1871,7 +1902,7 @@ void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {
getDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
/* AttrOnCallSite = */ false, FuncAttrs);
// TODO: call GetCPUAndFeaturesAttributes?
- F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
+ F.addFnAttrs(FuncAttrs);
}
void CodeGenModule::addDefaultFunctionDefinitionAttributes(
@@ -2006,6 +2037,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
+ // Attach assumption attributes to the declaration. If this is a call
+ // site, attach assumptions from the caller to the call as well.
+ AddAttributesFromAssumes(FuncAttrs, TargetDecl);
+
bool HasOptnone = false;
// The NoBuiltinAttr attached to the target FunctionDecl.
const NoBuiltinAttr *NBA = nullptr;
@@ -2052,24 +2087,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// allows it to work on indirect virtual function calls.
if (AttrOnCallSite && TargetDecl->hasAttr<NoMergeAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoMerge);
-
- // Add known guaranteed alignment for allocation functions.
- if (unsigned BuiltinID = Fn->getBuiltinID()) {
- switch (BuiltinID) {
- case Builtin::BIaligned_alloc:
- case Builtin::BIcalloc:
- case Builtin::BImalloc:
- case Builtin::BImemalign:
- case Builtin::BIrealloc:
- case Builtin::BIstrdup:
- case Builtin::BIstrndup:
- RetAttrs.addAlignmentAttr(Context.getTargetInfo().getNewAlign() /
- Context.getTargetInfo().getCharWidth());
- break;
- default:
- break;
- }
- }
}
// 'const', 'pure' and 'noalias' attributed functions are also nounwind.
@@ -2123,18 +2140,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
llvm::toStringRef(CodeGenOpts.UniformWGSize));
}
}
-
- std::string AssumptionValueStr;
- for (AssumptionAttr *AssumptionA :
- TargetDecl->specific_attrs<AssumptionAttr>()) {
- std::string AS = AssumptionA->getAssumption().str();
- if (!AS.empty() && !AssumptionValueStr.empty())
- AssumptionValueStr += ",";
- AssumptionValueStr += AS;
- }
-
- if (!AssumptionValueStr.empty())
- FuncAttrs.addAttribute(llvm::AssumptionAttrKey, AssumptionValueStr);
}
// Attach "no-builtins" attributes to:
@@ -2227,7 +2232,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// C++ explicitly makes returning undefined values UB. C's rule only applies
// to used values, so we never mark them noundef for now.
bool HasStrictReturn = getLangOpts().CPlusPlus;
- if (TargetDecl) {
+ if (TargetDecl && HasStrictReturn) {
if (const FunctionDecl *FDecl = dyn_cast<FunctionDecl>(TargetDecl))
HasStrictReturn &= !FDecl->isExternC();
else if (const VarDecl *VDecl = dyn_cast<VarDecl>(TargetDecl))
@@ -2790,7 +2795,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// so the UBSAN check could function.
llvm::ConstantInt *AlignmentCI =
cast<llvm::ConstantInt>(EmitScalarExpr(AVAttr->getAlignment()));
- unsigned AlignmentInt =
+ uint64_t AlignmentInt =
AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment);
if (AI->getParamAlign().valueOrOne() < AlignmentInt) {
AI->removeAttr(llvm::Attribute::AttrKind::Alignment);
@@ -2857,9 +2862,18 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// llvm.experimental.vector.extract to convert back to the original
// VLST.
if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(ConvertType(Ty))) {
- auto *Coerced = Fn->getArg(FirstIRArg);
+ llvm::Value *Coerced = Fn->getArg(FirstIRArg);
if (auto *VecTyFrom =
dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
+ // If we are casting a scalable 16 x i1 predicate vector to a fixed i8
+ // vector, bitcast the source and use a vector extract.
+ auto PredType =
+ llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ if (VecTyFrom == PredType &&
+ VecTyTo->getElementType() == Builder.getInt8Ty()) {
+ VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+ Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
+ }
if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
@@ -4503,10 +4517,8 @@ maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx,
if (CurAlign >= NewAlign)
return Attrs;
llvm::Attribute AlignAttr = llvm::Attribute::getWithAlignment(Ctx, NewAlign);
- return Attrs
- .removeAttribute(Ctx, llvm::AttributeList::ReturnIndex,
- llvm::Attribute::AttrKind::Alignment)
- .addAttribute(Ctx, llvm::AttributeList::ReturnIndex, AlignAttr);
+ return Attrs.removeRetAttribute(Ctx, llvm::Attribute::AttrKind::Alignment)
+ .addRetAttribute(Ctx, AlignAttr);
}
template <typename AlignedAttrTy> class AbstractAssumeAlignedAttrEmitter {
@@ -5005,12 +5017,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
// Materialize to a temporary.
- addr = CreateTempAlloca(
- RV.getScalarVal()->getType(),
- CharUnits::fromQuantity(std::max(
- (unsigned)layout->getAlignment().value(), scalarAlign)),
- "tmp",
- /*ArraySize=*/nullptr, &AllocaAddr);
+ addr =
+ CreateTempAlloca(RV.getScalarVal()->getType(),
+ CharUnits::fromQuantity(std::max(
+ layout->getAlignment().value(), scalarAlign)),
+ "tmp",
+ /*ArraySize=*/nullptr, &AllocaAddr);
tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());
Builder.CreateStore(RV.getScalarVal(), addr);
@@ -5167,15 +5179,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
if (FD->hasAttr<StrictFPAttr>())
// All calls within a strictfp function are marked strictfp
- Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::StrictFP);
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);
// Add call-site nomerge attribute if exists.
if (InNoMergeAttributedStmt)
- Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoMerge);
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge);
// Apply some call-site-specific attributes.
// TODO: work this into building the attribute set.
@@ -5185,15 +5193,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
!(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) {
Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::AlwaysInline);
+ Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
}
// Disable inlining inside SEH __try blocks.
if (isSEHTryScope()) {
- Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoInline);
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline);
}
// Decide whether to use a call or an invoke.
@@ -5209,7 +5214,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
CannotThrow = true;
} else {
// Otherwise, nounwind call sites will never throw.
- CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind);
+ CannotThrow = Attrs.hasFnAttr(llvm::Attribute::NoUnwind);
if (auto *FPtr = dyn_cast<llvm::Function>(CalleePtr))
if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind))
@@ -5232,9 +5237,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
if (FD->hasAttr<StrictFPAttr>())
// All calls within a strictfp function are marked strictfp
- Attrs =
- Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
- llvm::Attribute::StrictFP);
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);
AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl);
Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs);
@@ -5261,8 +5264,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) {
if (const auto *A = FD->getAttr<CFGuardAttr>()) {
if (A->getGuard() == CFGuardAttr::GuardArg::nocf && !CI->getCalledFunction())
- Attrs = Attrs.addAttribute(
- getLLVMContext(), llvm::AttributeList::FunctionIndex, "guard_nocf");
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), "guard_nocf");
}
}
@@ -5306,6 +5308,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
TargetDecl->hasAttr<MSAllocatorAttr>())
getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy->getPointeeType(), Loc);
+ // Add metadata if calling an __attribute__((error(""))) or warning fn.
+ if (TargetDecl && TargetDecl->hasAttr<ErrorAttr>()) {
+ llvm::ConstantInt *Line =
+ llvm::ConstantInt::get(Int32Ty, Loc.getRawEncoding());
+ llvm::ConstantAsMetadata *MD = llvm::ConstantAsMetadata::get(Line);
+ llvm::MDTuple *MDT = llvm::MDNode::get(getLLVMContext(), {MD});
+ CI->setMetadata("srcloc", MDT);
+ }
+
// 4. Finish the call.
// If the call doesn't return, finish the basic block and clear the
@@ -5321,8 +5332,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// attributes of the called function.
if (auto *F = CI->getCalledFunction())
F->removeFnAttr(llvm::Attribute::NoReturn);
- CI->removeAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoReturn);
+ CI->removeFnAttr(llvm::Attribute::NoReturn);
// Avoid incompatibility with ASan which relies on the `noreturn`
// attribute to insert handler calls.
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 9895a23b7093..0df64d4d5d26 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -1424,6 +1424,11 @@ static bool CanSkipVTablePointerInitialization(CodeGenFunction &CGF,
if (!ClassDecl->isDynamicClass())
return true;
+ // For a final class, the vtable pointer is known to already point to the
+ // class's vtable.
+ if (ClassDecl->isEffectivelyFinal())
+ return true;
+
if (!Dtor->hasTrivialBody())
return false;
@@ -2502,6 +2507,8 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
// Apply the offsets.
Address VTableField = LoadCXXThisAddress();
+ unsigned ThisAddrSpace =
+ VTableField.getPointer()->getType()->getPointerAddressSpace();
if (!NonVirtualOffset.isZero() || VirtualOffset)
VTableField = ApplyNonVirtualAndVirtualOffset(
@@ -2516,12 +2523,11 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true)
->getPointerTo(ProgAS)
->getPointerTo(GlobalsAS);
- // vtable field is is derived from `this` pointer, therefore it should be in
- // default address space.
- VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast(
- VTableField, VTablePtrTy->getPointerTo());
- VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast(
- VTableAddressPoint, VTablePtrTy);
+ // vtable field is is derived from `this` pointer, therefore they should be in
+ // the same addr space. Note that this might not be LLVM address space 0.
+ VTableField = Builder.CreateBitCast(VTableField,
+ VTablePtrTy->getPointerTo(ThisAddrSpace));
+ VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy);
llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField);
TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTablePtrTy);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 81c910f40bf8..af651e6f44b7 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -25,6 +25,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
@@ -52,7 +53,7 @@ using namespace clang::CodeGen;
static uint32_t getTypeAlignIfRequired(const Type *Ty, const ASTContext &Ctx) {
auto TI = Ctx.getTypeInfo(Ty);
- return TI.AlignIsRequired ? TI.Align : 0;
+ return TI.isAlignRequired() ? TI.Align : 0;
}
static uint32_t getTypeAlignIfRequired(QualType Ty, const ASTContext &Ctx) {
@@ -243,6 +244,11 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const {
PP.SplitTemplateClosers = true;
}
+ PP.SuppressInlineNamespace = false;
+ PP.PrintCanonicalTypes = true;
+ PP.UsePreferredNames = false;
+ PP.AlwaysIncludeTypeForTemplateArgument = true;
+
// Apply -fdebug-prefix-map.
PP.Callbacks = &PrintCB;
return PP;
@@ -385,7 +391,7 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
} else {
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
FileName = PLoc.getFilename();
-
+
if (FileName.empty()) {
FileName = TheCU->getFile()->getFilename();
} else {
@@ -830,11 +836,12 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
case BuiltinType::BFloat16:
case BuiltinType::Float128:
case BuiltinType::Double:
- // FIXME: For targets where long double and __float128 have the same size,
- // they are currently indistinguishable in the debugger without some
- // special treatment. However, there is currently no consensus on encoding
- // and this should be updated once a DWARF encoding exists for distinct
- // floating point types of the same size.
+ case BuiltinType::Ibm128:
+ // FIXME: For targets where long double, __ibm128 and __float128 have the
+ // same size, they are currently indistinguishable in the debugger without
+ // some special treatment. However, there is currently no consensus on
+ // encoding and this should be updated once a DWARF encoding exists for
+ // distinct floating point types of the same size.
Encoding = llvm::dwarf::DW_ATE_float;
break;
case BuiltinType::ShortAccum:
@@ -867,23 +874,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
break;
}
- switch (BT->getKind()) {
- case BuiltinType::Long:
- BTName = "long int";
- break;
- case BuiltinType::LongLong:
- BTName = "long long int";
- break;
- case BuiltinType::ULong:
- BTName = "long unsigned int";
- break;
- case BuiltinType::ULongLong:
- BTName = "long long unsigned int";
- break;
- default:
- BTName = BT->getName(CGM.getLangOpts());
- break;
- }
+ BTName = BT->getName(CGM.getLangOpts());
// Bit size and offset of the type.
uint64_t Size = CGM.getContext().getTypeSize(BT);
return DBuilder.createBasicType(BTName, Size, Encoding);
@@ -914,34 +905,98 @@ llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
return DBuilder.createBasicType("complex", Size, Encoding);
}
-llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty,
- llvm::DIFile *Unit) {
+static void stripUnusedQualifiers(Qualifiers &Q) {
+ // Ignore these qualifiers for now.
+ Q.removeObjCGCAttr();
+ Q.removeAddressSpace();
+ Q.removeObjCLifetime();
+ Q.removeUnaligned();
+}
+
+static llvm::dwarf::Tag getNextQualifier(Qualifiers &Q) {
+ if (Q.hasConst()) {
+ Q.removeConst();
+ return llvm::dwarf::DW_TAG_const_type;
+ }
+ if (Q.hasVolatile()) {
+ Q.removeVolatile();
+ return llvm::dwarf::DW_TAG_volatile_type;
+ }
+ if (Q.hasRestrict()) {
+ Q.removeRestrict();
+ return llvm::dwarf::DW_TAG_restrict_type;
+ }
+ return (llvm::dwarf::Tag)0;
+}
+
+// Strip MacroQualifiedTypeLoc and AttributedTypeLoc
+// as their corresponding types will be ignored
+// during code generation. Stripping them allows
+// to maintain proper TypeLoc for a given type
+// during code generation.
+static TypeLoc StripMacroAttributed(TypeLoc TL) {
+ if (!TL)
+ return TL;
+
+ while (true) {
+ if (auto MTL = TL.getAs<MacroQualifiedTypeLoc>())
+ TL = MTL.getInnerLoc();
+ else if (auto ATL = TL.getAs<AttributedTypeLoc>())
+ TL = ATL.getModifiedLoc();
+ else
+ break;
+ }
+ return TL;
+}
+
+llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty, llvm::DIFile *Unit,
+ TypeLoc TL) {
QualifierCollector Qc;
const Type *T = Qc.strip(Ty);
- // Ignore these qualifiers for now.
- Qc.removeObjCGCAttr();
- Qc.removeAddressSpace();
- Qc.removeObjCLifetime();
+ stripUnusedQualifiers(Qc);
// We will create one Derived type for one qualifier and recurse to handle any
// additional ones.
- llvm::dwarf::Tag Tag;
- if (Qc.hasConst()) {
- Tag = llvm::dwarf::DW_TAG_const_type;
- Qc.removeConst();
- } else if (Qc.hasVolatile()) {
- Tag = llvm::dwarf::DW_TAG_volatile_type;
- Qc.removeVolatile();
- } else if (Qc.hasRestrict()) {
- Tag = llvm::dwarf::DW_TAG_restrict_type;
- Qc.removeRestrict();
- } else {
+ llvm::dwarf::Tag Tag = getNextQualifier(Qc);
+ if (!Tag) {
assert(Qc.empty() && "Unknown type qualifier for debug info");
return getOrCreateType(QualType(T, 0), Unit);
}
- auto *FromTy = getOrCreateType(Qc.apply(CGM.getContext(), T), Unit);
+ QualType NextTy = Qc.apply(CGM.getContext(), T);
+ TypeLoc NextTL;
+ if (NextTy.hasQualifiers())
+ NextTL = TL;
+ else if (TL) {
+ if (auto QTL = TL.getAs<QualifiedTypeLoc>())
+ NextTL = StripMacroAttributed(QTL.getNextTypeLoc());
+ }
+ auto *FromTy = getOrCreateType(NextTy, Unit, NextTL);
+
+ // No need to fill in the Name, Line, Size, Alignment, Offset in case of
+ // CVR derived types.
+ return DBuilder.createQualifiedType(Tag, FromTy);
+}
+
+llvm::DIType *CGDebugInfo::CreateQualifiedType(const FunctionProtoType *F,
+ llvm::DIFile *Unit) {
+ FunctionProtoType::ExtProtoInfo EPI = F->getExtProtoInfo();
+ Qualifiers &Q = EPI.TypeQuals;
+ stripUnusedQualifiers(Q);
+
+ // We will create one Derived type for one qualifier and recurse to handle any
+ // additional ones.
+ llvm::dwarf::Tag Tag = getNextQualifier(Q);
+ if (!Tag) {
+ assert(Q.empty() && "Unknown type qualifier for debug info");
+ return nullptr;
+ }
+
+ auto *FromTy =
+ getOrCreateType(CGM.getContext().getFunctionType(F->getReturnType(),
+ F->getParamTypes(), EPI),
+ Unit);
// No need to fill in the Name, Line, Size, Alignment, Offset in case of
// CVR derived types.
@@ -961,10 +1016,10 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCObjectPointerType *Ty,
Ty->getPointeeType(), Unit);
}
-llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty,
- llvm::DIFile *Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, llvm::DIFile *Unit,
+ TypeLoc TL) {
return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty,
- Ty->getPointeeType(), Unit);
+ Ty->getPointeeType(), Unit, TL);
}
/// \return whether a C++ mangling exists for the type defined by TD.
@@ -1105,7 +1160,8 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
const Type *Ty,
QualType PointeeTy,
- llvm::DIFile *Unit) {
+ llvm::DIFile *Unit,
+ TypeLoc TL) {
// Bit size, align and offset of the type.
// Size is always the size of a pointer. We can't use getTypeSize here
// because that does not return the correct value for references.
@@ -1115,13 +1171,52 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
Optional<unsigned> DWARFAddressSpace =
CGM.getTarget().getDWARFAddressSpace(AddressSpace);
+ llvm::DINodeArray Annotations = nullptr;
+ TypeLoc NextTL;
+ if (TL) {
+ SmallVector<llvm::Metadata *, 4> Annots;
+ NextTL = TL.getNextTypeLoc();
+ if (NextTL) {
+ // Traverse all MacroQualifiedTypeLoc, QualifiedTypeLoc and
+ // AttributedTypeLoc type locations so we can collect
+ // BTFTypeTag attributes for this pointer.
+ while (true) {
+ if (auto MTL = NextTL.getAs<MacroQualifiedTypeLoc>()) {
+ NextTL = MTL.getInnerLoc();
+ } else if (auto QTL = NextTL.getAs<QualifiedTypeLoc>()) {
+ NextTL = QTL.getNextTypeLoc();
+ } else if (auto ATL = NextTL.getAs<AttributedTypeLoc>()) {
+ if (const auto *A = ATL.getAttrAs<BTFTypeTagAttr>()) {
+ StringRef BTFTypeTag = A->getBTFTypeTag();
+ if (!BTFTypeTag.empty()) {
+ llvm::Metadata *Ops[2] = {
+ llvm::MDString::get(CGM.getLLVMContext(),
+ StringRef("btf_type_tag")),
+ llvm::MDString::get(CGM.getLLVMContext(), BTFTypeTag)};
+ Annots.insert(Annots.begin(),
+ llvm::MDNode::get(CGM.getLLVMContext(), Ops));
+ }
+ }
+ NextTL = ATL.getModifiedLoc();
+ } else {
+ break;
+ }
+ }
+ }
+
+ NextTL = StripMacroAttributed(TL.getNextTypeLoc());
+ if (Annots.size() > 0)
+ Annotations = DBuilder.getOrCreateArray(Annots);
+ }
+
if (Tag == llvm::dwarf::DW_TAG_reference_type ||
Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit),
Size, Align, DWARFAddressSpace);
else
- return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size,
- Align, DWARFAddressSpace);
+ return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit, NextTL),
+ Size, Align, DWARFAddressSpace,
+ StringRef(), Annotations);
}
llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
@@ -1226,7 +1321,8 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
SmallString<128> NS;
llvm::raw_svector_ostream OS(NS);
- Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false);
+ Ty->getTemplateName().print(OS, getPrintingPolicy(),
+ TemplateName::Qualified::None);
printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy());
SourceLocation Loc = AliasDecl->getLocation();
@@ -1237,8 +1333,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
llvm::DIFile *Unit) {
+ TypeLoc TL;
+ if (const TypeSourceInfo *TSI = Ty->getDecl()->getTypeSourceInfo())
+ TL = TSI->getTypeLoc();
llvm::DIType *Underlying =
- getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit);
+ getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit, TL);
if (Ty->getDecl()->hasAttr<NoDebugAttr>())
return Underlying;
@@ -1249,9 +1348,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
uint32_t Align = getDeclAlignIfRequired(Ty->getDecl(), CGM.getContext());
// Typedefs are derived from some other type.
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(Ty->getDecl());
return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(),
getOrCreateFile(Loc), getLineNumber(Loc),
- getDeclContextDescriptor(Ty->getDecl()), Align);
+ getDeclContextDescriptor(Ty->getDecl()), Align,
+ Annotations);
}
static unsigned getDwarfCC(CallingConv CC) {
@@ -1300,27 +1401,74 @@ static unsigned getDwarfCC(CallingConv CC) {
return 0;
}
+static llvm::DINode::DIFlags getRefFlags(const FunctionProtoType *Func) {
+ llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
+ Flags |= llvm::DINode::FlagLValueReference;
+ if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
+ Flags |= llvm::DINode::FlagRValueReference;
+ return Flags;
+}
+
llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
- llvm::DIFile *Unit) {
+ llvm::DIFile *Unit, TypeLoc TL) {
+ const auto *FPT = dyn_cast<FunctionProtoType>(Ty);
+ if (FPT) {
+ if (llvm::DIType *QTy = CreateQualifiedType(FPT, Unit))
+ return QTy;
+ }
+
+ // Create the type without any qualifiers
+
SmallVector<llvm::Metadata *, 16> EltTys;
// Add the result type at least.
- EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit));
+ TypeLoc RetTL;
+ if (TL) {
+ if (auto FTL = TL.getAs<FunctionTypeLoc>())
+ RetTL = FTL.getReturnLoc();
+ }
+ EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit, RetTL));
+ llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
// Set up remainder of arguments if there is a prototype.
// otherwise emit it as a variadic function.
- if (isa<FunctionNoProtoType>(Ty))
+ if (!FPT) {
EltTys.push_back(DBuilder.createUnspecifiedParameter());
- else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) {
- for (const QualType &ParamType : FPT->param_types())
- EltTys.push_back(getOrCreateType(ParamType, Unit));
+ } else {
+ Flags = getRefFlags(FPT);
+ bool DoneWithTL = false;
+ if (TL) {
+ if (auto FTL = TL.getAs<FunctionTypeLoc>()) {
+ DoneWithTL = true;
+ unsigned Idx = 0;
+ unsigned FTL_NumParams = FTL.getNumParams();
+ for (const QualType &ParamType : FPT->param_types()) {
+ TypeLoc ParamTL;
+ if (Idx < FTL_NumParams) {
+ if (ParmVarDecl *Param = FTL.getParam(Idx)) {
+ if (const TypeSourceInfo *TSI = Param->getTypeSourceInfo())
+ ParamTL = TSI->getTypeLoc();
+ }
+ }
+ EltTys.push_back(getOrCreateType(ParamType, Unit, ParamTL));
+ Idx++;
+ }
+ }
+ }
+
+ if (!DoneWithTL) {
+ for (const QualType &ParamType : FPT->param_types())
+ EltTys.push_back(getOrCreateType(ParamType, Unit));
+ }
if (FPT->isVariadic())
EltTys.push_back(DBuilder.createUnspecifiedParameter());
}
llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
- return DBuilder.createSubroutineType(EltTypeArray, llvm::DINode::FlagZero,
- getDwarfCC(Ty->getCallConv()));
+ llvm::DIType *F = DBuilder.createSubroutineType(
+ EltTypeArray, Flags, getDwarfCC(Ty->getCallConv()));
+ return F;
}
/// Convert an AccessSpecifier into the corresponding DINode flag.
@@ -1377,17 +1525,19 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
Offset = BitFieldInfo.StorageSize - BitFieldInfo.Size - Offset;
uint64_t OffsetInBits = StorageOffsetInBits + Offset;
llvm::DINode::DIFlags Flags = getAccessFlag(BitFieldDecl->getAccess(), RD);
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(BitFieldDecl);
return DBuilder.createBitFieldMemberType(
RecordTy, Name, File, Line, SizeInBits, OffsetInBits, StorageOffsetInBits,
- Flags, DebugType);
+ Flags, DebugType, Annotations);
}
llvm::DIType *
CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
AccessSpecifier AS, uint64_t offsetInBits,
uint32_t AlignInBits, llvm::DIFile *tunit,
- llvm::DIScope *scope, const RecordDecl *RD) {
- llvm::DIType *debugType = getOrCreateType(type, tunit);
+ llvm::DIScope *scope, const RecordDecl *RD,
+ llvm::DINodeArray Annotations, TypeLoc TL) {
+ llvm::DIType *debugType = getOrCreateType(type, tunit, TL);
// Get the location for the field.
llvm::DIFile *file = getOrCreateFile(loc);
@@ -1404,7 +1554,7 @@ CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
llvm::DINode::DIFlags flags = getAccessFlag(AS, RD);
return DBuilder.createMemberType(scope, name, file, line, SizeInBits, Align,
- offsetInBits, flags, debugType);
+ offsetInBits, flags, debugType, Annotations);
}
void CGDebugInfo::CollectRecordLambdaFields(
@@ -1494,9 +1644,13 @@ void CGDebugInfo::CollectRecordNormalField(
FieldType = createBitFieldType(field, RecordTy, RD);
} else {
auto Align = getDeclAlignIfRequired(field, CGM.getContext());
- FieldType =
- createFieldType(name, type, field->getLocation(), field->getAccess(),
- OffsetInBits, Align, tunit, RecordTy, RD);
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(field);
+ TypeLoc TL;
+ if (const TypeSourceInfo *TSI = field->getTypeSourceInfo())
+ TL = TSI->getTypeLoc();
+ FieldType = createFieldType(name, type, field->getLocation(),
+ field->getAccess(), OffsetInBits, Align, tunit,
+ RecordTy, RD, Annotations, TL);
}
elements.push_back(FieldType);
@@ -1584,10 +1738,25 @@ llvm::DISubroutineType *
CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr,
const FunctionProtoType *Func,
llvm::DIFile *Unit, bool decl) {
+ FunctionProtoType::ExtProtoInfo EPI = Func->getExtProtoInfo();
+ Qualifiers &Qc = EPI.TypeQuals;
+ Qc.removeConst();
+ Qc.removeVolatile();
+ Qc.removeRestrict();
+ Qc.removeUnaligned();
+ // Keep the removed qualifiers in sync with
+ // CreateQualifiedType(const FunctionPrototype*, DIFile *Unit)
+ // On a 'real' member function type, these qualifiers are carried on the type
+ // of the first parameter, not as separate DW_TAG_const_type (etc) decorator
+ // tags around them. (But, in the raw function types with qualifiers, they have
+ // to use wrapper types.)
+
// Add "this" pointer.
- llvm::DITypeRefArray Args(
- cast<llvm::DISubroutineType>(getOrCreateType(QualType(Func, 0), Unit))
- ->getTypeArray());
+ const auto *OriginalFunc = cast<llvm::DISubroutineType>(
+ getOrCreateType(CGM.getContext().getFunctionType(
+ Func->getReturnType(), Func->getParamTypes(), EPI),
+ Unit));
+ llvm::DITypeRefArray Args = OriginalFunc->getTypeArray();
assert(Args.size() && "Invalid number of arguments!");
SmallVector<llvm::Metadata *, 16> Elts;
@@ -1629,13 +1798,7 @@ CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr,
llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
- llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
- if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
- Flags |= llvm::DINode::FlagLValueReference;
- if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
- Flags |= llvm::DINode::FlagRValueReference;
-
- return DBuilder.createSubroutineType(EltTypeArray, Flags,
+ return DBuilder.createSubroutineType(EltTypeArray, OriginalFunc->getFlags(),
getDwarfCC(Func->getCallConv()));
}
@@ -1887,23 +2050,25 @@ void CGDebugInfo::CollectCXXBasesAux(
}
llvm::DINodeArray
-CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
- ArrayRef<TemplateArgument> TAList,
+CGDebugInfo::CollectTemplateParams(Optional<TemplateArgs> OArgs,
llvm::DIFile *Unit) {
+ if (!OArgs)
+ return llvm::DINodeArray();
+ TemplateArgs &Args = *OArgs;
SmallVector<llvm::Metadata *, 16> TemplateParams;
- for (unsigned i = 0, e = TAList.size(); i != e; ++i) {
- const TemplateArgument &TA = TAList[i];
+ for (unsigned i = 0, e = Args.Args.size(); i != e; ++i) {
+ const TemplateArgument &TA = Args.Args[i];
StringRef Name;
bool defaultParameter = false;
- if (TPList)
- Name = TPList->getParam(i)->getName();
+ if (Args.TList)
+ Name = Args.TList->getParam(i)->getName();
switch (TA.getKind()) {
case TemplateArgument::Type: {
llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit);
- if (TPList)
+ if (Args.TList)
if (auto *templateType =
- dyn_cast_or_null<TemplateTypeParmDecl>(TPList->getParam(i)))
+ dyn_cast_or_null<TemplateTypeParmDecl>(Args.TList->getParam(i)))
if (templateType->hasDefaultArgument())
defaultParameter =
templateType->getDefaultArgument() == TA.getAsType();
@@ -1914,9 +2079,9 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
} break;
case TemplateArgument::Integral: {
llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit);
- if (TPList && CGM.getCodeGenOpts().DwarfVersion >= 5)
- if (auto *templateType =
- dyn_cast_or_null<NonTypeTemplateParmDecl>(TPList->getParam(i)))
+ if (Args.TList && CGM.getCodeGenOpts().DwarfVersion >= 5)
+ if (auto *templateType = dyn_cast_or_null<NonTypeTemplateParmDecl>(
+ Args.TList->getParam(i)))
if (templateType->hasDefaultArgument() &&
!templateType->getDefaultArgument()->isValueDependent())
defaultParameter = llvm::APSInt::isSameValue(
@@ -1993,15 +2158,19 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
TemplateParams.push_back(DBuilder.createTemplateValueParameter(
TheCU, Name, TTy, defaultParameter, V));
} break;
- case TemplateArgument::Template:
+ case TemplateArgument::Template: {
+ std::string QualName;
+ llvm::raw_string_ostream OS(QualName);
+ TA.getAsTemplate().getAsTemplateDecl()->printQualifiedName(
+ OS, getPrintingPolicy());
TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
- TheCU, Name, nullptr,
- TA.getAsTemplate().getAsTemplateDecl()->getQualifiedNameAsString()));
+ TheCU, Name, nullptr, OS.str()));
break;
+ }
case TemplateArgument::Pack:
TemplateParams.push_back(DBuilder.createTemplateParameterPack(
TheCU, Name, nullptr,
- CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit)));
+ CollectTemplateParams({{nullptr, TA.getPackAsArray()}}, Unit)));
break;
case TemplateArgument::Expression: {
const Expr *E = TA.getAsExpr();
@@ -2024,43 +2193,72 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
return DBuilder.getOrCreateArray(TemplateParams);
}
-llvm::DINodeArray
-CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
- llvm::DIFile *Unit) {
+Optional<CGDebugInfo::TemplateArgs>
+CGDebugInfo::GetTemplateArgs(const FunctionDecl *FD) const {
if (FD->getTemplatedKind() ==
FunctionDecl::TK_FunctionTemplateSpecialization) {
const TemplateParameterList *TList = FD->getTemplateSpecializationInfo()
->getTemplate()
->getTemplateParameters();
- return CollectTemplateParams(
- TList, FD->getTemplateSpecializationArgs()->asArray(), Unit);
+ return {{TList, FD->getTemplateSpecializationArgs()->asArray()}};
}
- return llvm::DINodeArray();
+ return None;
}
-
-llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL,
- llvm::DIFile *Unit) {
+Optional<CGDebugInfo::TemplateArgs>
+CGDebugInfo::GetTemplateArgs(const VarDecl *VD) const {
// Always get the full list of parameters, not just the ones from the
// specialization. A partial specialization may have fewer parameters than
// there are arguments.
- auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL);
+ auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VD);
if (!TS)
- return llvm::DINodeArray();
+ return None;
VarTemplateDecl *T = TS->getSpecializedTemplate();
const TemplateParameterList *TList = T->getTemplateParameters();
auto TA = TS->getTemplateArgs().asArray();
- return CollectTemplateParams(TList, TA, Unit);
+ return {{TList, TA}};
+}
+Optional<CGDebugInfo::TemplateArgs>
+CGDebugInfo::GetTemplateArgs(const RecordDecl *RD) const {
+ if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+ // Always get the full list of parameters, not just the ones from the
+ // specialization. A partial specialization may have fewer parameters than
+ // there are arguments.
+ TemplateParameterList *TPList =
+ TSpecial->getSpecializedTemplate()->getTemplateParameters();
+ const TemplateArgumentList &TAList = TSpecial->getTemplateArgs();
+ return {{TPList, TAList.asArray()}};
+ }
+ return None;
}
-llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(
- const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) {
- // Always get the full list of parameters, not just the ones from the
- // specialization. A partial specialization may have fewer parameters than
- // there are arguments.
- TemplateParameterList *TPList =
- TSpecial->getSpecializedTemplate()->getTemplateParameters();
- const TemplateArgumentList &TAList = TSpecial->getTemplateArgs();
- return CollectTemplateParams(TPList, TAList.asArray(), Unit);
+llvm::DINodeArray
+CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
+ llvm::DIFile *Unit) {
+ return CollectTemplateParams(GetTemplateArgs(FD), Unit);
+}
+
+llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL,
+ llvm::DIFile *Unit) {
+ return CollectTemplateParams(GetTemplateArgs(VL), Unit);
+}
+
+llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(const RecordDecl *RD,
+ llvm::DIFile *Unit) {
+ return CollectTemplateParams(GetTemplateArgs(RD), Unit);
+}
+
+llvm::DINodeArray CGDebugInfo::CollectBTFDeclTagAnnotations(const Decl *D) {
+ if (!D->hasAttr<BTFDeclTagAttr>())
+ return nullptr;
+
+ SmallVector<llvm::Metadata *, 4> Annotations;
+ for (const auto *I : D->specific_attrs<BTFDeclTagAttr>()) {
+ llvm::Metadata *Ops[2] = {
+ llvm::MDString::get(CGM.getLLVMContext(), StringRef("btf_decl_tag")),
+ llvm::MDString::get(CGM.getLLVMContext(), I->getBTFDeclTag())};
+ Annotations.push_back(llvm::MDNode::get(CGM.getLLVMContext(), Ops));
+ }
+ return DBuilder.getOrCreateArray(Annotations);
}
llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
@@ -3210,7 +3408,8 @@ void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) {
RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr());
}
-llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
+llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit,
+ TypeLoc TL) {
if (Ty.isNull())
return nullptr;
@@ -3227,7 +3426,7 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
if (auto *T = getTypeOrNull(Ty))
return T;
- llvm::DIType *Res = CreateTypeNode(Ty, Unit);
+ llvm::DIType *Res = CreateTypeNode(Ty, Unit, TL);
void *TyPtr = Ty.getAsOpaquePtr();
// And update the type cache.
@@ -3271,10 +3470,11 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
return nullptr;
}
-llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
+llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit,
+ TypeLoc TL) {
// Handle qualifiers, which recursively handles what they refer to.
if (Ty.hasLocalQualifiers())
- return CreateQualifiedType(Ty, Unit);
+ return CreateQualifiedType(Ty, Unit, TL);
// Work out details of type.
switch (Ty->getTypeClass()) {
@@ -3303,7 +3503,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
case Type::Complex:
return CreateType(cast<ComplexType>(Ty));
case Type::Pointer:
- return CreateType(cast<PointerType>(Ty), Unit);
+ return CreateType(cast<PointerType>(Ty), Unit, TL);
case Type::BlockPointer:
return CreateType(cast<BlockPointerType>(Ty), Unit);
case Type::Typedef:
@@ -3314,7 +3514,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
return CreateEnumType(cast<EnumType>(Ty));
case Type::FunctionProto:
case Type::FunctionNoProto:
- return CreateType(cast<FunctionType>(Ty), Unit);
+ return CreateType(cast<FunctionType>(Ty), Unit, TL);
case Type::ConstantArray:
case Type::VariableArray:
case Type::IncompleteArray:
@@ -3435,9 +3635,10 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
Flags |= llvm::DINode::FlagExportSymbols;
}
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D);
llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align,
- Flags, Identifier);
+ Flags, Identifier, Annotations);
// Elements of composite types usually have back to the type, creating
// uniquing cycles. Distinct nodes are more efficient.
@@ -3858,7 +4059,26 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
getDwarfCC(CC));
}
- return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F));
+ TypeLoc TL;
+ if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
+ if (const TypeSourceInfo *TSI = FD->getTypeSourceInfo())
+ TL = TSI->getTypeLoc();
+ }
+ return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F, TL));
+}
+
+QualType
+CGDebugInfo::getFunctionType(const FunctionDecl *FD, QualType RetTy,
+ const SmallVectorImpl<const VarDecl *> &Args) {
+ CallingConv CC = CallingConv::CC_C;
+ if (FD)
+ if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>())
+ CC = SrcFnTy->getCallConv();
+ SmallVector<QualType, 16> ArgTypes;
+ for (const VarDecl *VD : Args)
+ ArgTypes.push_back(VD->getType());
+ return CGM.getContext().getFunctionType(RetTy, ArgTypes,
+ FunctionProtoType::ExtProtoInfo(CC));
}
void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
@@ -3935,10 +4155,13 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
unsigned ScopeLine = getLineNumber(ScopeLoc);
llvm::DISubroutineType *DIFnType = getOrCreateFunctionType(D, FnType, Unit);
llvm::DISubprogram *Decl = nullptr;
- if (D)
+ llvm::DINodeArray Annotations = nullptr;
+ if (D) {
Decl = isa<ObjCMethodDecl>(D)
? getObjCMethodDeclaration(D, DIFnType, LineNo, Flags, SPFlags)
: getFunctionDeclaration(D);
+ Annotations = CollectBTFDeclTagAnnotations(D);
+ }
// FIXME: The function declaration we're constructing here is mostly reusing
// declarations from CXXMethodDecl and not constructing new ones for arbitrary
@@ -3947,7 +4170,8 @@ void CGDebugInfo::emitFunctionStart(GlobalDecl GD, SourceLocation Loc,
// are emitted as CU level entities by the backend.
llvm::DISubprogram *SP = DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo, DIFnType, ScopeLine,
- FlagsForDef, SPFlagsForDef, TParamsArray.get(), Decl);
+ FlagsForDef, SPFlagsForDef, TParamsArray.get(), Decl, nullptr,
+ Annotations);
Fn->setSubprogram(SP);
// We might get here with a VarDecl in the case we're generating
// code for the initialization of globals. Do not record these decls
@@ -4006,10 +4230,11 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
if (CGM.getLangOpts().Optimize)
SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D);
llvm::DISubprogram *SP = DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
- TParamsArray.get(), getFunctionDeclaration(D));
+ TParamsArray.get(), getFunctionDeclaration(D), nullptr, Annotations);
if (IsDeclForCallSite)
Fn->setSubprogram(SP);
@@ -4241,8 +4466,12 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
uint64_t XOffset = 0;
if (VD->hasAttr<BlocksAttr>())
Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
- else
- Ty = getOrCreateType(VD->getType(), Unit);
+ else {
+ TypeLoc TL;
+ if (const TypeSourceInfo *TSI = VD->getTypeSourceInfo())
+ TL = TSI->getTypeLoc();
+ Ty = getOrCreateType(VD->getType(), Unit, TL);
+ }
// If there is no debug info for this type then do not emit debug info
// for this variable.
@@ -4337,8 +4566,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
// Use DW_OP_deref to tell the debugger to load the pointer and treat it as
// the address of the variable.
if (UsePointerValue) {
- assert(std::find(Expr.begin(), Expr.end(), llvm::dwarf::DW_OP_deref) ==
- Expr.end() &&
+ assert(!llvm::is_contained(Expr, llvm::dwarf::DW_OP_deref) &&
"Debug info already contains DW_OP_deref.");
Expr.push_back(llvm::dwarf::DW_OP_deref);
}
@@ -4346,8 +4574,10 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
// Create the descriptor for the variable.
llvm::DILocalVariable *D = nullptr;
if (ArgNo) {
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(VD);
D = DBuilder.createParameterVariable(Scope, Name, *ArgNo, Unit, Line, Ty,
- CGM.getLangOpts().Optimize, Flags);
+ CGM.getLangOpts().Optimize, Flags,
+ Annotations);
} else {
// For normal local variable, we will try to find out whether 'VD' is the
// copy parameter of coroutine.
@@ -4653,7 +4883,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
llvm::DIType *fieldType;
if (capture->isByRef()) {
TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy);
- auto Align = PtrInfo.AlignIsRequired ? PtrInfo.Align : 0;
+ auto Align = PtrInfo.isAlignRequired() ? PtrInfo.Align : 0;
// FIXME: This recomputes the layout of the BlockByRefWrapper.
uint64_t xoffset;
fieldType =
@@ -4740,14 +4970,172 @@ llvm::DIGlobalVariableExpression *CGDebugInfo::CollectAnonRecordDecls(
return GVE;
}
+namespace {
+struct ReconstitutableType : public RecursiveASTVisitor<ReconstitutableType> {
+ bool Reconstitutable = true;
+ bool VisitVectorType(VectorType *FT) {
+ Reconstitutable = false;
+ return false;
+ }
+ bool VisitAtomicType(AtomicType *FT) {
+ Reconstitutable = false;
+ return false;
+ }
+ bool TraverseEnumType(EnumType *ET) {
+ // Unnamed enums can't be reconstituted due to a lack of column info we
+ // produce in the DWARF, so we can't get Clang's full name back.
+ if (const auto *ED = dyn_cast<EnumDecl>(ET->getDecl())) {
+ if (!ED->getIdentifier()) {
+ Reconstitutable = false;
+ return false;
+ }
+ }
+ return true;
+ }
+ bool VisitFunctionProtoType(FunctionProtoType *FT) {
+ // noexcept is not encoded in DWARF, so the reversi
+ Reconstitutable &= !isNoexceptExceptionSpec(FT->getExceptionSpecType());
+ return Reconstitutable;
+ }
+ bool TraverseRecordType(RecordType *RT) {
+ // Unnamed classes/lambdas can't be reconstituted due to a lack of column
+ // info we produce in the DWARF, so we can't get Clang's full name back.
+ // But so long as it's not one of those, it doesn't matter if some sub-type
+ // of the record (a template parameter) can't be reconstituted - because the
+ // un-reconstitutable type itself will carry its own name.
+ const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
+ if (!RD)
+ return true;
+ if (RD->isLambda() || !RD->getIdentifier()) {
+ Reconstitutable = false;
+ return false;
+ }
+ return true;
+ }
+};
+} // anonymous namespace
+
+// Test whether a type name could be rebuilt from emitted debug info.
+static bool IsReconstitutableType(QualType QT) {
+ ReconstitutableType T;
+ T.TraverseType(QT);
+ return T.Reconstitutable;
+}
+
std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const {
std::string Name;
llvm::raw_string_ostream OS(Name);
- if (const NamedDecl *ND = dyn_cast<NamedDecl>(D)) {
- PrintingPolicy PP = getPrintingPolicy();
- PP.PrintCanonicalTypes = true;
- PP.SuppressInlineNamespace = false;
+ const NamedDecl *ND = dyn_cast<NamedDecl>(D);
+ if (!ND)
+ return Name;
+ codegenoptions::DebugTemplateNamesKind TemplateNamesKind =
+ CGM.getCodeGenOpts().getDebugSimpleTemplateNames();
+ Optional<TemplateArgs> Args;
+
+ bool IsOperatorOverload = false; // isa<CXXConversionDecl>(ND);
+ if (auto *RD = dyn_cast<CXXRecordDecl>(ND)) {
+ Args = GetTemplateArgs(RD);
+ } else if (auto *FD = dyn_cast<FunctionDecl>(ND)) {
+ Args = GetTemplateArgs(FD);
+ auto NameKind = ND->getDeclName().getNameKind();
+ IsOperatorOverload |=
+ NameKind == DeclarationName::CXXOperatorName ||
+ NameKind == DeclarationName::CXXConversionFunctionName;
+ } else if (auto *VD = dyn_cast<VarDecl>(ND)) {
+ Args = GetTemplateArgs(VD);
+ }
+ std::function<bool(ArrayRef<TemplateArgument>)> HasReconstitutableArgs =
+ [&](ArrayRef<TemplateArgument> Args) {
+ return llvm::all_of(Args, [&](const TemplateArgument &TA) {
+ switch (TA.getKind()) {
+ case TemplateArgument::Template:
+ // Easy to reconstitute - the value of the parameter in the debug
+ // info is the string name of the template. (so the template name
+ // itself won't benefit from any name rebuilding, but that's a
+ // representational limitation - maybe DWARF could be
+ // changed/improved to use some more structural representation)
+ return true;
+ case TemplateArgument::Declaration:
+ // Reference and pointer non-type template parameters point to
+ // variables, functions, etc and their value is, at best (for
+ // variables) represented as an address - not a reference to the
+ // DWARF describing the variable/function/etc. This makes it hard,
+ // possibly impossible to rebuild the original name - looking up the
+ // address in the executable file's symbol table would be needed.
+ return false;
+ case TemplateArgument::NullPtr:
+ // These could be rebuilt, but figured they're close enough to the
+ // declaration case, and not worth rebuilding.
+ return false;
+ case TemplateArgument::Pack:
+ // A pack is invalid if any of the elements of the pack are invalid.
+ return HasReconstitutableArgs(TA.getPackAsArray());
+ case TemplateArgument::Integral:
+ // Larger integers get encoded as DWARF blocks which are a bit
+ // harder to parse back into a large integer, etc - so punting on
+ // this for now. Re-parsing the integers back into APInt is probably
+ // feasible some day.
+ return TA.getAsIntegral().getBitWidth() <= 64;
+ case TemplateArgument::Type:
+ return IsReconstitutableType(TA.getAsType());
+ default:
+ llvm_unreachable("Other, unresolved, template arguments should "
+ "not be seen here");
+ }
+ });
+ };
+ // A conversion operator presents complications/ambiguity if there's a
+ // conversion to class template that is itself a template, eg:
+ // template<typename T>
+ // operator ns::t1<T, int>();
+ // This should be named, eg: "operator ns::t1<float, int><float>"
+ // (ignoring clang bug that means this is currently "operator t1<float>")
+ // but if the arguments were stripped, the consumer couldn't differentiate
+ // whether the template argument list for the conversion type was the
+ // function's argument list (& no reconstitution was needed) or not.
+ // This could be handled if reconstitutable names had a separate attribute
+ // annotating them as such - this would remove the ambiguity.
+ //
+ // Alternatively the template argument list could be parsed enough to check
+ // whether there's one list or two, then compare that with the DWARF
+ // description of the return type and the template argument lists to determine
+ // how many lists there should be and if one is missing it could be assumed(?)
+ // to be the function's template argument list & then be rebuilt.
+ //
+ // Other operator overloads that aren't conversion operators could be
+ // reconstituted but would require a bit more nuance about detecting the
+ // difference between these different operators during that rebuilding.
+ bool Reconstitutable =
+ Args && HasReconstitutableArgs(Args->Args) && !IsOperatorOverload;
+
+ PrintingPolicy PP = getPrintingPolicy();
+
+ if (TemplateNamesKind == codegenoptions::DebugTemplateNamesKind::Full ||
+ !Reconstitutable) {
ND->getNameForDiagnostic(OS, PP, Qualified);
+ } else {
+ bool Mangled =
+ TemplateNamesKind == codegenoptions::DebugTemplateNamesKind::Mangled;
+ // check if it's a template
+ if (Mangled)
+ OS << "_STN";
+
+ OS << ND->getDeclName();
+ std::string EncodedOriginalName;
+ llvm::raw_string_ostream EncodedOriginalNameOS(EncodedOriginalName);
+ EncodedOriginalNameOS << ND->getDeclName();
+
+ if (Mangled) {
+ OS << "|";
+ printTemplateArgumentList(OS, Args->Args, PP);
+ printTemplateArgumentList(EncodedOriginalNameOS, Args->Args, PP);
+#ifndef NDEBUG
+ std::string CanonicalOriginalName;
+ llvm::raw_string_ostream OriginalOS(CanonicalOriginalName);
+ ND->getNameForDiagnostic(OriginalOS, PP, Qualified);
+ assert(EncodedOriginalNameOS.str() == OriginalOS.str());
+#endif
+ }
}
return Name;
}
@@ -4807,12 +5195,17 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
}
AppendAddressSpaceXDeref(AddressSpace, Expr);
+ TypeLoc TL;
+ if (const TypeSourceInfo *TSI = D->getTypeSourceInfo())
+ TL = TSI->getTypeLoc();
+
+ llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D);
GVE = DBuilder.createGlobalVariableExpression(
- DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
- Var->hasLocalLinkage(), true,
+ DContext, DeclName, LinkageName, Unit, LineNo,
+ getOrCreateType(T, Unit, TL), Var->hasLocalLinkage(), true,
Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters,
- Align);
+ Align, Annotations);
Var->addDebugInfo(GVE);
}
DeclCache[D->getCanonicalDecl()].reset(GVE);
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index b01165f85a6c..a7b72fa5f5a6 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -178,14 +178,19 @@ class CGDebugInfo {
llvm::DIType *CreateType(const ComplexType *Ty);
llvm::DIType *CreateType(const AutoType *Ty);
llvm::DIType *CreateType(const ExtIntType *Ty);
- llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
+ llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg,
+ TypeLoc TL = TypeLoc());
+ llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty,
+ llvm::DIFile *Fg);
llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg);
llvm::DIType *CreateType(const TemplateSpecializationType *Ty,
llvm::DIFile *Fg);
llvm::DIType *CreateType(const ObjCObjectPointerType *Ty, llvm::DIFile *F);
- llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F);
+ llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F,
+ TypeLoc TL = TypeLoc());
llvm::DIType *CreateType(const BlockPointerType *Ty, llvm::DIFile *F);
- llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F);
+ llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F,
+ TypeLoc TL = TypeLoc());
/// Get structure or union type.
llvm::DIType *CreateType(const RecordType *Tyg);
llvm::DIType *CreateTypeDefinition(const RecordType *Ty);
@@ -240,7 +245,8 @@ class CGDebugInfo {
/// \return namespace descriptor for the given namespace decl.
llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N);
llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty,
- QualType PointeeTy, llvm::DIFile *F);
+ QualType PointeeTy, llvm::DIFile *F,
+ TypeLoc TL = TypeLoc());
llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache);
/// A helper function to create a subprogram for a single member
@@ -272,9 +278,12 @@ class CGDebugInfo {
llvm::DenseSet<CanonicalDeclPtr<const CXXRecordDecl>> &SeenTypes,
llvm::DINode::DIFlags StartingFlags);
+ struct TemplateArgs {
+ const TemplateParameterList *TList;
+ llvm::ArrayRef<TemplateArgument> Args;
+ };
/// A helper function to collect template parameters.
- llvm::DINodeArray CollectTemplateParams(const TemplateParameterList *TPList,
- ArrayRef<TemplateArgument> TAList,
+ llvm::DINodeArray CollectTemplateParams(Optional<TemplateArgs> Args,
llvm::DIFile *Unit);
/// A helper function to collect debug info for function template
/// parameters.
@@ -286,17 +295,25 @@ class CGDebugInfo {
llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD,
llvm::DIFile *Unit);
+ Optional<TemplateArgs> GetTemplateArgs(const VarDecl *) const;
+ Optional<TemplateArgs> GetTemplateArgs(const RecordDecl *) const;
+ Optional<TemplateArgs> GetTemplateArgs(const FunctionDecl *) const;
+
/// A helper function to collect debug info for template
/// parameters.
- llvm::DINodeArray
- CollectCXXTemplateParams(const ClassTemplateSpecializationDecl *TS,
- llvm::DIFile *F);
+ llvm::DINodeArray CollectCXXTemplateParams(const RecordDecl *TS,
+ llvm::DIFile *F);
+
+ /// A helper function to collect debug info for btf_decl_tag annotations.
+ llvm::DINodeArray CollectBTFDeclTagAnnotations(const Decl *D);
llvm::DIType *createFieldType(StringRef name, QualType type,
SourceLocation loc, AccessSpecifier AS,
uint64_t offsetInBits, uint32_t AlignInBits,
llvm::DIFile *tunit, llvm::DIScope *scope,
- const RecordDecl *RD = nullptr);
+ const RecordDecl *RD = nullptr,
+ llvm::DINodeArray Annotations = nullptr,
+ TypeLoc TL = TypeLoc());
llvm::DIType *createFieldType(StringRef name, QualType type,
SourceLocation loc, AccessSpecifier AS,
@@ -417,6 +434,9 @@ public:
/// location will be reused.
void EmitLocation(CGBuilderTy &Builder, SourceLocation Loc);
+ QualType getFunctionType(const FunctionDecl *FD, QualType RetTy,
+ const SmallVectorImpl<const VarDecl *> &Args);
+
/// Emit a call to llvm.dbg.function.start to indicate
/// start of a new function.
/// \param Loc The location of the function header.
@@ -613,7 +633,8 @@ private:
Optional<StringRef> Source);
/// Get the type from the cache or create a new type if necessary.
- llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
+ llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg,
+ TypeLoc TL = TypeLoc());
/// Get a reference to a clang module. If \p CreateSkeletonCU is true,
/// this also creates a split dwarf skeleton compile unit.
@@ -628,7 +649,8 @@ private:
llvm::DICompositeType *getOrCreateLimitedType(const RecordType *Ty);
/// Create type metadata for a source language type.
- llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg);
+ llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg,
+ TypeLoc TL = TypeLoc());
/// Create new member and increase Offset by FType's size.
llvm::DIType *CreateMemberType(llvm::DIFile *Unit, QualType FType,
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 5b3d39f20b41..941671c61482 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1142,7 +1142,7 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D,
GV->setAlignment(Align.getAsAlign());
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CacheEntry = GV;
- } else if (CacheEntry->getAlignment() < Align.getQuantity()) {
+ } else if (CacheEntry->getAlignment() < uint64_t(Align.getQuantity())) {
CacheEntry->setAlignment(Align.getAsAlign());
}
@@ -1447,6 +1447,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
address = OpenMPLocalAddr;
+ AllocaAddr = OpenMPLocalAddr;
} else if (Ty->isConstantSizeType()) {
// If this value is an array or struct with a statically determinable
// constant initializer, there are optimizations we can do.
@@ -1492,6 +1493,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// return slot, so that we can elide the copy when returning this
// variable (C++0x [class.copy]p34).
address = ReturnValue;
+ AllocaAddr = ReturnValue;
if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
const auto *RD = RecordTy->getDecl();
@@ -1503,7 +1505,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// applied.
llvm::Value *Zero = Builder.getFalse();
Address NRVOFlag =
- CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo");
+ CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo",
+ /*ArraySize=*/nullptr, &AllocaAddr);
EnsureInsertPoint();
Builder.CreateStore(Zero, NRVOFlag);
@@ -1605,10 +1608,11 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
DI->setLocation(D.getLocation());
// If NRVO, use a pointer to the return address.
- if (UsePointerValue)
+ if (UsePointerValue) {
DebugAddr = ReturnValuePointer;
-
- (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder,
+ AllocaAddr = ReturnValuePointer;
+ }
+ (void)DI->EmitDeclareOfAutoVariable(&D, AllocaAddr.getPointer(), Builder,
UsePointerValue);
}
@@ -2450,6 +2454,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
}
Address DeclPtr = Address::invalid();
+ Address AllocaPtr = Address::invalid();
bool DoStore = false;
bool IsScalar = hasScalarEvaluationKind(Ty);
// If we already have a pointer to the argument, reuse the input pointer.
@@ -2464,6 +2469,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// from the default address space.
auto AllocaAS = CGM.getASTAllocaAddressSpace();
auto *V = DeclPtr.getPointer();
+ AllocaPtr = DeclPtr;
auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS;
auto DestLangAS =
getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default;
@@ -2500,10 +2506,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
: Address::invalid();
if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) {
DeclPtr = OpenMPLocalAddr;
+ AllocaPtr = DeclPtr;
} else {
// Otherwise, create a temporary to hold the value.
DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
- D.getName() + ".addr");
+ D.getName() + ".addr", &AllocaPtr);
}
DoStore = true;
}
@@ -2579,7 +2586,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
if (CGDebugInfo *DI = getDebugInfo()) {
if (CGM.getCodeGenOpts().hasReducedDebugInfo() && !CurFuncIsThunk) {
llvm::DILocalVariable *DILocalVar = DI->EmitDeclareOfArgVariable(
- &D, DeclPtr.getPointer(), ArgNo, Builder);
+ &D, AllocaPtr.getPointer(), ArgNo, Builder);
if (const auto *Var = dyn_cast_or_null<ParmVarDecl>(&D))
DI->getParamDbgMappings().insert({Var, DILocalVar});
}
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index d43fb99550a8..d22f9dc3b68c 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -555,7 +555,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
PrioritizedCXXGlobalInits.size());
PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
} else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) ||
- getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) {
+ getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR ||
+ D->hasAttr<SelectAnyAttr>()) {
// C++ [basic.start.init]p2:
// Definitions of explicitly specialized class template static data
// members have ordered initialization. Other class template static data
@@ -568,17 +569,28 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
// group with the global being initialized. On most platforms, this is a
// minor startup time optimization. In the MS C++ ABI, there are no guard
// variables, so this COMDAT key is required for correctness.
- AddGlobalCtor(Fn, 65535, COMDATKey);
- if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) {
- // In The MS C++, MS add template static data member in the linker
- // drective.
- addUsedGlobal(COMDATKey);
- }
- } else if (D->hasAttr<SelectAnyAttr>()) {
+ //
// SelectAny globals will be comdat-folded. Put the initializer into a
// COMDAT group associated with the global, so the initializers get folded
// too.
+
AddGlobalCtor(Fn, 65535, COMDATKey);
+ if (COMDATKey && (getTriple().isOSBinFormatELF() ||
+ getTarget().getCXXABI().isMicrosoft())) {
+ // When COMDAT is used on ELF or in the MS C++ ABI, the key must be in
+ // llvm.used to prevent linker GC.
+ addUsedGlobal(COMDATKey);
+ }
+
+ // If we used a COMDAT key for the global ctor, the init function can be
+ // discarded if the global ctor entry is discarded.
+ // FIXME: Do we need to restrict this to ELF and Wasm?
+ llvm::Comdat *C = Addr->getComdat();
+ if (COMDATKey && C &&
+ (getTarget().getTriple().isOSBinFormatELF() ||
+ getTarget().getTriple().isOSBinFormatWasm())) {
+ Fn->setComdat(C);
+ }
} else {
I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash.
if (I == DelayedCXXInitPosition.end()) {
diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 9f65e9eb120c..aff9c77d53c7 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -477,11 +477,11 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) {
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
- if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot) {
- // noexcept functions are simple terminate scopes.
- if (!getLangOpts().EHAsynch) // -EHa: HW exception still can occur
- EHStack.pushTerminate();
- } else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
+ // In C++17 and later, 'throw()' aka EST_DynamicNone is treated the same way
+ // as noexcept. In earlier standards, it is handled in this block, along with
+ // 'throw(X...)'.
+ if (EST == EST_Dynamic ||
+ (EST == EST_DynamicNone && !getLangOpts().CPlusPlus17)) {
// TODO: Revisit exception specifications for the MS ABI. There is a way to
// encode these in an object file but MSVC doesn't do anything with it.
if (getTarget().getCXXABI().isMicrosoft())
@@ -521,6 +521,10 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) {
/*ForEH=*/true);
Filter->setFilter(I, EHType);
}
+ } else if (Proto->canThrow() == CT_Cannot) {
+ // noexcept functions are simple terminate scopes.
+ if (!getLangOpts().EHAsynch) // -EHa: HW exception still can occur
+ EHStack.pushTerminate();
}
}
@@ -580,10 +584,8 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
return;
ExceptionSpecificationType EST = Proto->getExceptionSpecType();
- if (isNoexceptExceptionSpec(EST) && Proto->canThrow() == CT_Cannot &&
- !EHStack.empty() /* possible empty when under async exceptions */) {
- EHStack.popTerminate();
- } else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
+ if (EST == EST_Dynamic ||
+ (EST == EST_DynamicNone && !getLangOpts().CPlusPlus17)) {
// TODO: Revisit exception specifications for the MS ABI. There is a way to
// encode these in an object file but MSVC doesn't do anything with it.
if (getTarget().getCXXABI().isMicrosoft())
@@ -599,6 +601,10 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
EHFilterScope &filterScope = cast<EHFilterScope>(*EHStack.begin());
emitFilterDispatchBlock(*this, filterScope);
EHStack.popFilter();
+ } else if (Proto->canThrow() == CT_Cannot &&
+ /* possible empty when under async exceptions */
+ !EHStack.empty()) {
+ EHStack.popTerminate();
}
}
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index bf514aab8851..4332e74dbb24 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/MatrixBuilder.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
@@ -94,7 +95,7 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
// otherwise alloca is inserted at the current insertion point of the
// builder.
if (!ArraySize)
- Builder.SetInsertPoint(AllocaInsertPt);
+ Builder.SetInsertPoint(getPostAllocaInsertPoint());
V = getTargetHooks().performAddrSpaceCast(
*this, V, getASTAllocaAddressSpace(), LangAS::Default,
Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
@@ -122,23 +123,10 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
const Twine &Name) {
CharUnits Align =
- CharUnits::fromQuantity(CGM.getDataLayout().getABITypeAlignment(Ty));
+ CharUnits::fromQuantity(CGM.getDataLayout().getPrefTypeAlignment(Ty));
return CreateTempAlloca(Ty, Align, Name);
}
-void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) {
- auto *Alloca = Var.getPointer();
- assert(isa<llvm::AllocaInst>(Alloca) ||
- (isa<llvm::AddrSpaceCastInst>(Alloca) &&
- isa<llvm::AllocaInst>(
- cast<llvm::AddrSpaceCastInst>(Alloca)->getPointerOperand())));
-
- auto *Store = new llvm::StoreInst(Init, Alloca, /*volatile*/ false,
- Var.getAlignment().getAsAlign());
- llvm::BasicBlock *Block = AllocaInsertPt->getParent();
- Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store);
-}
-
Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
CharUnits Align = getContext().getTypeAlignInChars(Ty);
return CreateTempAlloca(ConvertType(Ty), Align, Name);
@@ -580,8 +568,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
// Perform derived-to-base casts and/or field accesses, to get from the
// temporary object we created (and, potentially, for which we extended
// the lifetime) to the subobject we're binding the reference to.
- for (unsigned I = Adjustments.size(); I != 0; --I) {
- SubobjectAdjustment &Adjustment = Adjustments[I-1];
+ for (SubobjectAdjustment &Adjustment : llvm::reverse(Adjustments)) {
switch (Adjustment.Kind) {
case SubobjectAdjustment::DerivedToBaseAdjustment:
Object =
@@ -667,9 +654,9 @@ bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) {
}
bool CodeGenFunction::sanitizePerformTypeCheck() const {
- return SanOpts.has(SanitizerKind::Null) |
- SanOpts.has(SanitizerKind::Alignment) |
- SanOpts.has(SanitizerKind::ObjectSize) |
+ return SanOpts.has(SanitizerKind::Null) ||
+ SanOpts.has(SanitizerKind::Alignment) ||
+ SanOpts.has(SanitizerKind::ObjectSize) ||
SanOpts.has(SanitizerKind::Vptr);
}
@@ -1642,7 +1629,7 @@ static bool getRangeForType(CodeGenFunction &CGF, QualType Ty,
} else {
assert(NumPositiveBits <= Bitwidth);
End = llvm::APInt(Bitwidth, 1) << NumPositiveBits;
- Min = llvm::APInt(Bitwidth, 0);
+ Min = llvm::APInt::getZero(Bitwidth);
}
}
return true;
@@ -1939,10 +1926,15 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
return EmitLoadOfGlobalRegLValue(LV);
if (LV.isMatrixElt()) {
+ llvm::Value *Idx = LV.getMatrixIdx();
+ if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+ const auto *const MatTy = LV.getType()->getAs<ConstantMatrixType>();
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+ }
llvm::LoadInst *Load =
Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
- return RValue::get(
- Builder.CreateExtractElement(Load, LV.getMatrixIdx(), "matrixext"));
+ return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext"));
}
assert(LV.isBitField() && "Unknown LValue type!");
@@ -2080,9 +2072,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
return EmitStoreThroughGlobalRegLValue(Src, Dst);
if (Dst.isMatrixElt()) {
- llvm::Value *Vec = Builder.CreateLoad(Dst.getMatrixAddress());
- Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
- Dst.getMatrixIdx(), "matins");
+ llvm::Value *Idx = Dst.getMatrixIdx();
+ if (CGM.getCodeGenOpts().OptimizationLevel > 0) {
+ const auto *const MatTy = Dst.getType()->getAs<ConstantMatrixType>();
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+ }
+ llvm::Instruction *Load = Builder.CreateLoad(Dst.getMatrixAddress());
+ llvm::Value *Vec =
+ Builder.CreateInsertElement(Load, Src.getScalarVal(), Idx, "matins");
Builder.CreateStore(Vec, Dst.getMatrixAddress(),
Dst.isVolatileQualified());
return;
@@ -3498,7 +3496,7 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
CGM.getCodeGenOpts().TrapFuncName);
- TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A);
+ TrapCall->addFnAttr(A);
}
TrapCall->setDoesNotReturn();
TrapCall->setDoesNotThrow();
@@ -3522,7 +3520,7 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) {
if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
CGM.getCodeGenOpts().TrapFuncName);
- TrapCall->addAttribute(llvm::AttributeList::FunctionIndex, A);
+ TrapCall->addFnAttr(A);
}
return TrapCall;
@@ -4684,10 +4682,28 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
case CK_UserDefinedConversion:
case CK_CPointerToObjCPointerCast:
case CK_BlockPointerToObjCPointerCast:
- case CK_NoOp:
case CK_LValueToRValue:
return EmitLValue(E->getSubExpr());
+ case CK_NoOp: {
+ // CK_NoOp can model a qualification conversion, which can remove an array
+ // bound and change the IR type.
+ // FIXME: Once pointee types are removed from IR, remove this.
+ LValue LV = EmitLValue(E->getSubExpr());
+ if (LV.isSimple()) {
+ Address V = LV.getAddress(*this);
+ if (V.isValid()) {
+ llvm::Type *T =
+ ConvertTypeForMem(E->getType())
+ ->getPointerTo(
+ cast<llvm::PointerType>(V.getType())->getAddressSpace());
+ if (V.getType() != T)
+ LV.setAddress(Builder.CreateBitCast(V, T));
+ }
+ }
+ return LV;
+ }
+
case CK_UncheckedDerivedToBase:
case CK_DerivedToBase: {
const auto *DerivedClassTy =
@@ -4879,12 +4895,28 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
if (auto builtinID = FD->getBuiltinID()) {
- // Replaceable builtin provide their own implementation of a builtin. Unless
- // we are in the builtin implementation itself, don't call the actual
- // builtin. If we are in the builtin implementation, avoid trivial infinite
+ std::string FDInlineName = (FD->getName() + ".inline").str();
+ // When directing calling an inline builtin, call it through it's mangled
+ // name to make it clear it's not the actual builtin.
+ if (FD->isInlineBuiltinDeclaration() &&
+ CGF.CurFn->getName() != FDInlineName) {
+ llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD);
+ llvm::Function *Fn = llvm::cast<llvm::Function>(CalleePtr);
+ llvm::Module *M = Fn->getParent();
+ llvm::Function *Clone = M->getFunction(FDInlineName);
+ if (!Clone) {
+ Clone = llvm::Function::Create(Fn->getFunctionType(),
+ llvm::GlobalValue::InternalLinkage,
+ Fn->getAddressSpace(), FDInlineName, M);
+ Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+ return CGCallee::forDirect(Clone, GD);
+ }
+
+ // Replaceable builtins provide their own implementation of a builtin. If we
+ // are in an inline builtin implementation, avoid trivial infinite
// recursion.
- if (!FD->isInlineBuiltinDeclaration() ||
- CGF.CurFn->getName() == FD->getName())
+ else
return CGCallee::forBuiltin(builtinID, FD);
}
@@ -4893,6 +4925,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) {
FD->hasAttr<CUDAGlobalAttr>())
CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub(
cast<llvm::GlobalValue>(CalleePtr->stripPointerCasts()));
+
return CGCallee::forDirect(CalleePtr, GD);
}
@@ -5306,9 +5339,13 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
// Generate function declaration DISuprogram in order to be used
// in debug info about call sites.
if (CGDebugInfo *DI = getDebugInfo()) {
- if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl))
- DI->EmitFuncDeclForCallSite(CallOrInvoke, QualType(FnType, 0),
+ if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+ FunctionArgList Args;
+ QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
+ DI->EmitFuncDeclForCallSite(CallOrInvoke,
+ DI->getFunctionType(CalleeDecl, ResTy, Args),
CalleeDecl);
+ }
}
return Call;
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 1e81ad9f2dc7..5b56a587fa5f 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -127,6 +127,8 @@ public:
}
void VisitConstantExpr(ConstantExpr *E) {
+ EnsureDest(E->getType());
+
if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
CGF.EmitAggregateStore(Result, Dest.getAddress(),
E->getType().isVolatileQualified());
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 96cf977ca290..cc838bf38c6c 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -609,15 +609,18 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
return;
// Elide the constructor if we're constructing from a temporary.
- // The temporary check is required because Sema sets this on NRVO
- // returns.
if (getLangOpts().ElideConstructors && E->isElidable()) {
- assert(getContext().hasSameUnqualifiedType(E->getType(),
- E->getArg(0)->getType()));
- if (E->getArg(0)->isTemporaryObject(getContext(), CD->getParent())) {
- EmitAggExpr(E->getArg(0), Dest);
- return;
- }
+ // FIXME: This only handles the simplest case, where the source object
+ // is passed directly as the first argument to the constructor.
+ // This should also handle stepping though implicit casts and
+ // conversion sequences which involve two steps, with a
+ // conversion operator followed by a converting constructor.
+ const Expr *SrcObj = E->getArg(0);
+ assert(SrcObj->isTemporaryObject(getContext(), CD->getParent()));
+ assert(
+ getContext().hasSameUnqualifiedType(E->getType(), SrcObj->getType()));
+ EmitAggExpr(SrcObj, Dest);
+ return;
}
if (const ArrayType *arrayType
@@ -1323,8 +1326,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
llvm::Function *Fn = dyn_cast<llvm::Function>(CalleePtr);
if (CalleeDecl->isReplaceableGlobalAllocationFunction() &&
Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) {
- CallOrInvoke->addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::Builtin);
+ CallOrInvoke->addFnAttr(llvm::Attribute::Builtin);
}
return RV;
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 734024149bbb..ff900ed077e6 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1369,7 +1369,7 @@ llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) {
const Expr *Inner = CE->getSubExpr()->IgnoreImplicit();
QualType RetType;
if (auto *Call = dyn_cast<CallExpr>(Inner))
- RetType = Call->getCallReturnType(CGF->getContext());
+ RetType = Call->getCallReturnType(CGM.getContext());
else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Inner))
RetType = Ctor->getType();
llvm::Constant *Res =
@@ -1714,6 +1714,8 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E,
QualType destType) {
+ assert(!destType->isVoidType() && "can't emit a void constant");
+
Expr::EvalResult Result;
bool Success = false;
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 418f23bd1a97..ae9434f96529 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -419,6 +419,11 @@ public:
Value *VisitExpr(Expr *S);
Value *VisitConstantExpr(ConstantExpr *E) {
+ // A constant expression of type 'void' generates no code and produces no
+ // value.
+ if (E->getType()->isVoidType())
+ return nullptr;
+
if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) {
if (E->isGLValue())
return CGF.Builder.CreateLoad(Address(
@@ -1647,7 +1652,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
for (unsigned i = 2; i < E->getNumSubExprs(); ++i) {
llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2);
// Check for -1 and output it as undef in the IR.
- if (Idx.isSigned() && Idx.isAllOnesValue())
+ if (Idx.isSigned() && Idx.isAllOnes())
Indices.push_back(-1);
else
Indices.push_back(Idx.getZExtValue());
@@ -1775,13 +1780,18 @@ Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
// integer value.
Value *RowIdx = Visit(E->getRowIdx());
Value *ColumnIdx = Visit(E->getColumnIdx());
+
+ const auto *MatrixTy = E->getBase()->getType()->castAs<ConstantMatrixType>();
+ unsigned NumRows = MatrixTy->getNumRows();
+ llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+ Value *Idx = MB.CreateIndex(RowIdx, ColumnIdx, NumRows);
+ if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0)
+ MB.CreateIndexAssumption(Idx, MatrixTy->getNumElementsFlattened());
+
Value *Matrix = Visit(E->getBase());
// TODO: Should we emit bounds checks with SanitizerKind::ArrayBounds?
- llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
- return MB.CreateExtractElement(
- Matrix, RowIdx, ColumnIdx,
- E->getBase()->getType()->castAs<ConstantMatrixType>()->getNumRows());
+ return Builder.CreateExtractElement(Matrix, Idx, "matrixext");
}
static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
@@ -2063,11 +2073,25 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// perform the bitcast.
if (const auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
if (const auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
+ // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+ // vector, use a vector insert and bitcast the result.
+ bool NeedsBitCast = false;
+ auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ llvm::Type *OrigType = DstTy;
+ if (ScalableDst == PredType &&
+ FixedSrc->getElementType() == Builder.getInt8Ty()) {
+ DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+ ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy);
+ NeedsBitCast = true;
+ }
if (FixedSrc->getElementType() == ScalableDst->getElementType()) {
llvm::Value *UndefVec = llvm::UndefValue::get(DstTy);
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
- return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero,
- "castScalableSve");
+ llvm::Value *Result = Builder.CreateInsertVector(
+ DstTy, UndefVec, Src, Zero, "castScalableSve");
+ if (NeedsBitCast)
+ Result = Builder.CreateBitCast(Result, OrigType);
+ return Result;
}
}
}
@@ -2077,6 +2101,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// perform the bitcast.
if (const auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
if (const auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) {
+ // If we are casting a scalable 16 x i1 predicate vector to a fixed i8
+ // vector, bitcast the source and use a vector extract.
+ auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ if (ScalableSrc == PredType &&
+ FixedDst->getElementType() == Builder.getInt8Ty()) {
+ SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+ ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy);
+ Src = Builder.CreateBitCast(Src, SrcTy);
+ }
if (ScalableSrc->getElementType() == FixedDst->getElementType()) {
llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve");
@@ -2087,10 +2120,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
// Perform VLAT <-> VLST bitcast through memory.
// TODO: since the llvm.experimental.vector.{insert,extract} intrinsics
// require the element types of the vectors to be the same, we
- // need to keep this around for casting between predicates, or more
- // generally for bitcasts between VLAT <-> VLST where the element
- // types of the vectors are not the same, until we figure out a better
- // way of doing these casts.
+ // need to keep this around for bitcasts between VLAT <-> VLST where
+ // the element types of the vectors are not the same, until we figure
+ // out a better way of doing these casts.
if ((isa<llvm::FixedVectorType>(SrcTy) &&
isa<llvm::ScalableVectorType>(DstTy)) ||
(isa<llvm::ScalableVectorType>(SrcTy) &&
@@ -2127,10 +2159,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
}
case CK_AtomicToNonAtomic:
case CK_NonAtomicToAtomic:
- case CK_NoOp:
case CK_UserDefinedConversion:
return Visit(const_cast<Expr*>(E));
+ case CK_NoOp: {
+ llvm::Value *V = Visit(const_cast<Expr *>(E));
+ if (V) {
+ // CK_NoOp can model a pointer qualification conversion, which can remove
+ // an array bound and change the IR type.
+ // FIXME: Once pointee types are removed from IR, remove this.
+ llvm::Type *T = ConvertType(DestTy);
+ if (T != V->getType())
+ V = Builder.CreateBitCast(V, T);
+ }
+ return V;
+ }
+
case CK_BaseToDerived: {
const CXXRecordDecl *DerivedClassDecl = DestTy->getPointeeCXXRecordDecl();
assert(DerivedClassDecl && "BaseToDerived arg isn't a C++ object pointer!");
@@ -2658,7 +2702,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
amt = llvm::ConstantFP::get(VMContext,
llvm::APFloat(static_cast<double>(amount)));
else {
- // Remaining types are Half, LongDouble or __float128. Convert from float.
+ // Remaining types are Half, LongDouble, __ibm128 or __float128. Convert
+ // from float.
llvm::APFloat F(static_cast<float>(amount));
bool ignored;
const llvm::fltSemantics *FS;
@@ -2668,6 +2713,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
FS = &CGF.getTarget().getFloat128Format();
else if (value->getType()->isHalfTy())
FS = &CGF.getTarget().getHalfFormat();
+ else if (value->getType()->isPPC_FP128Ty())
+ FS = &CGF.getTarget().getIbm128Format();
else
FS = &CGF.getTarget().getLongDoubleFormat();
F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored);
@@ -4763,11 +4810,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// vector to get a vec4, then a bitcast if the target type is different.
if (NumElementsSrc == 3 && NumElementsDst != 3) {
Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
-
- if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- DstTy);
- }
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ DstTy);
Src->setName("astype");
return Src;
@@ -4777,12 +4821,10 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
// to vec4 if the original type is not vec4, then a shuffle vector to
// get a vec3.
if (NumElementsSrc != 3 && NumElementsDst == 3) {
- if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
- auto *Vec4Ty = llvm::FixedVectorType::get(
- cast<llvm::VectorType>(DstTy)->getElementType(), 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- Vec4Ty);
- }
+ auto *Vec4Ty = llvm::FixedVectorType::get(
+ cast<llvm::VectorType>(DstTy)->getElementType(), 4);
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ Vec4Ty);
Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
Src->setName("astype");
@@ -4942,7 +4984,7 @@ static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal,
auto *GEP = cast<llvm::GEPOperator>(GEPVal);
assert(GEP->getPointerOperand() == BasePtr &&
- "BasePtr must be the the base of the GEP.");
+ "BasePtr must be the base of the GEP.");
assert(GEP->isInBounds() && "Expected inbounds GEP");
auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType());
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index f860623e2bc3..fdd2fa18bb4a 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -21,13 +21,14 @@
using namespace clang;
using namespace CodeGen;
-static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
+namespace {
+llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
llvm::Type::getInt8PtrTy(M.getContext())};
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
- if (auto* F = M.getFunction("vprintf")) {
+ if (auto *F = M.getFunction("vprintf")) {
// Our CUDA system header declares vprintf with the right signature, so
// nobody else should have been able to declare vprintf with a bogus
// signature.
@@ -41,6 +42,28 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
}
+llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
+ const char *Name = "__llvm_omp_vprintf";
+ llvm::Module &M = CGM.getModule();
+ llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
+ llvm::Type::getInt8PtrTy(M.getContext()),
+ llvm::Type::getInt32Ty(M.getContext())};
+ llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
+ llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
+
+ if (auto *F = M.getFunction(Name)) {
+ if (F->getFunctionType() != VprintfFuncType) {
+ CGM.Error(SourceLocation(),
+ "Invalid type declaration for __llvm_omp_vprintf");
+ return nullptr;
+ }
+ return F;
+ }
+
+ return llvm::Function::Create(
+ VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
+}
+
// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
// isn't particularly special; it's invoked just like a regular function).
// vprintf takes two args: A format string, and a pointer to a buffer containing
@@ -66,39 +89,22 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
//
// Note that by the time this function runs, E's args have already undergone the
// standard C vararg promotion (short -> int, float -> double, etc.).
-RValue
-CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue) {
- assert(getTarget().getTriple().isNVPTX());
- assert(E->getBuiltinCallee() == Builtin::BIprintf);
- assert(E->getNumArgs() >= 1); // printf always has at least one arg.
- const llvm::DataLayout &DL = CGM.getDataLayout();
- llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-
- CallArgList Args;
- EmitCallArgs(Args,
- E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
- E->arguments(), E->getDirectCallee(),
- /* ParamsToSkip = */ 0);
-
- // We don't know how to emit non-scalar varargs.
- if (std::any_of(Args.begin() + 1, Args.end(), [&](const CallArg &A) {
- return !A.getRValue(*this).isScalar();
- })) {
- CGM.ErrorUnsupported(E, "non-scalar arg to printf");
- return RValue::get(llvm::ConstantInt::get(IntTy, 0));
- }
+std::pair<llvm::Value *, llvm::TypeSize>
+packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
+ const llvm::DataLayout &DL = CGF->CGM.getDataLayout();
+ llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext();
+ CGBuilderTy &Builder = CGF->Builder;
// Construct and fill the args buffer that we'll pass to vprintf.
- llvm::Value *BufferPtr;
if (Args.size() <= 1) {
- // If there are no args, pass a null pointer to vprintf.
- BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
+ // If there are no args, pass a null pointer and size 0
+ llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
+ return {BufferPtr, llvm::TypeSize::Fixed(0)};
} else {
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
- ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType());
+ ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType());
// Using llvm::StructType is correct only because printf doesn't accept
// aggregates. If we had to handle aggregates here, we'd have to manually
@@ -106,25 +112,71 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
// that the alignment of the llvm type was the same as the alignment of the
// clang type.
llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
- llvm::Value *Alloca = CreateTempAlloca(AllocaTy);
+ llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy);
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
- llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal();
+ llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal();
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
}
- BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
+ llvm::Value *BufferPtr =
+ Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
+ return {BufferPtr, DL.getTypeAllocSize(AllocaTy)};
}
+}
- // Invoke vprintf and return.
- llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
- return RValue::get(Builder.CreateCall(
- VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr}));
+bool containsNonScalarVarargs(CodeGenFunction *CGF, CallArgList Args) {
+ return llvm::any_of(llvm::drop_begin(Args), [&](const CallArg &A) {
+ return !A.getRValue(*CGF).isScalar();
+ });
}
-RValue
-CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue) {
+RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
+ llvm::Function *Decl, bool WithSizeArg) {
+ CodeGenModule &CGM = CGF->CGM;
+ CGBuilderTy &Builder = CGF->Builder;
+ assert(E->getBuiltinCallee() == Builtin::BIprintf);
+ assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+ // Uses the same format as nvptx for the argument packing, but also passes
+ // an i32 for the total size of the passed pointer
+ CallArgList Args;
+ CGF->EmitCallArgs(Args,
+ E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+ E->arguments(), E->getDirectCallee(),
+ /* ParamsToSkip = */ 0);
+
+ // We don't know how to emit non-scalar varargs.
+ if (containsNonScalarVarargs(CGF, Args)) {
+ CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+ return RValue::get(llvm::ConstantInt::get(CGF->IntTy, 0));
+ }
+
+ auto r = packArgsIntoNVPTXFormatBuffer(CGF, Args);
+ llvm::Value *BufferPtr = r.first;
+
+ llvm::SmallVector<llvm::Value *, 3> Vec = {
+ Args[0].getRValue(*CGF).getScalarVal(), BufferPtr};
+ if (WithSizeArg) {
+ // Passing > 32bit of data as a local alloca doesn't work for nvptx or
+ // amdgpu
+ llvm::Constant *Size =
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(CGM.getLLVMContext()),
+ static_cast<uint32_t>(r.second.getFixedSize()));
+
+ Vec.push_back(Size);
+ }
+ return RValue::get(Builder.CreateCall(Decl, Vec));
+}
+} // namespace
+
+RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E) {
+ assert(getTarget().getTriple().isNVPTX());
+ return EmitDevicePrintfCallExpr(
+ E, this, GetVprintfDeclaration(CGM.getModule()), false);
+}
+
+RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn);
assert(E->getBuiltinCallee() == Builtin::BIprintf ||
E->getBuiltinCallee() == Builtin::BI__builtin_printf);
@@ -154,3 +206,10 @@ CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
return RValue::get(Printf);
}
+
+RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
+ assert(getTarget().getTriple().isNVPTX() ||
+ getTarget().getTriple().isAMDGCN());
+ return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
+ true);
+}
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 937a0e8a3b69..ac26f0d4232c 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1555,6 +1555,12 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
argCK = CK_AnyPointerToBlockPointerCast;
} else if (ivarRef.getType()->isPointerType()) {
argCK = CK_BitCast;
+ } else if (argLoad.getType()->isAtomicType() &&
+ !ivarRef.getType()->isAtomicType()) {
+ argCK = CK_AtomicToNonAtomic;
+ } else if (!argLoad.getType()->isAtomicType() &&
+ ivarRef.getType()->isAtomicType()) {
+ argCK = CK_NonAtomicToAtomic;
}
ImplicitCastExpr argCast(ImplicitCastExpr::OnStack, ivarRef.getType(), argCK,
&argLoad, VK_PRValue, FPOptionsOverride());
@@ -2108,6 +2114,13 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM,
setARCRuntimeFunctionLinkage(CGM, RTF.getCallee());
}
+static llvm::Function *getARCIntrinsic(llvm::Intrinsic::ID IntID,
+ CodeGenModule &CGM) {
+ llvm::Function *fn = CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ return fn;
+}
+
/// Perform an operation having the signature
/// i8* (i8*)
/// where a null input causes a no-op and returns null.
@@ -2118,10 +2131,8 @@ static llvm::Value *emitARCValueOperation(
if (isa<llvm::ConstantPointerNull>(value))
return value;
- if (!fn) {
- fn = CGF.CGM.getIntrinsic(IntID);
- setARCRuntimeFunctionLinkage(CGF.CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(IntID, CGF.CGM);
// Cast the argument to 'id'.
llvm::Type *origType = returnType ? returnType : value->getType();
@@ -2140,10 +2151,8 @@ static llvm::Value *emitARCValueOperation(
static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr,
llvm::Function *&fn,
llvm::Intrinsic::ID IntID) {
- if (!fn) {
- fn = CGF.CGM.getIntrinsic(IntID);
- setARCRuntimeFunctionLinkage(CGF.CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(IntID, CGF.CGM);
// Cast the argument to 'id*'.
llvm::Type *origType = addr.getElementType();
@@ -2168,10 +2177,8 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, Address addr,
bool ignored) {
assert(addr.getElementType() == value->getType());
- if (!fn) {
- fn = CGF.CGM.getIntrinsic(IntID);
- setARCRuntimeFunctionLinkage(CGF.CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(IntID, CGF.CGM);
llvm::Type *origType = value->getType();
@@ -2193,10 +2200,8 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, Address dst, Address src,
llvm::Intrinsic::ID IntID) {
assert(dst.getType() == src.getType());
- if (!fn) {
- fn = CGF.CGM.getIntrinsic(IntID);
- setARCRuntimeFunctionLinkage(CGF.CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(IntID, CGF.CGM);
llvm::Value *args[] = {
CGF.Builder.CreateBitCast(dst.getPointer(), CGF.Int8PtrPtrTy),
@@ -2340,13 +2345,22 @@ static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value,
// retainRV or claimRV calls in the IR. We currently do this only when the
// optimization level isn't -O0 since global-isel, which is currently run at
// -O0, doesn't know about the operand bundle.
+ ObjCEntrypoints &EPs = CGF.CGM.getObjCEntrypoints();
+ llvm::Function *&EP = IsRetainRV
+ ? EPs.objc_retainAutoreleasedReturnValue
+ : EPs.objc_unsafeClaimAutoreleasedReturnValue;
+ llvm::Intrinsic::ID IID =
+ IsRetainRV ? llvm::Intrinsic::objc_retainAutoreleasedReturnValue
+ : llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue;
+ EP = getARCIntrinsic(IID, CGF.CGM);
- // FIXME: Do this when the target isn't aarch64.
+ llvm::Triple::ArchType Arch = CGF.CGM.getTriple().getArch();
+
+ // FIXME: Do this on all targets and at -O0 too. This can be enabled only if
+ // the target backend knows how to handle the operand bundle.
if (CGF.CGM.getCodeGenOpts().OptimizationLevel > 0 &&
- CGF.CGM.getTarget().getTriple().isAArch64()) {
- llvm::Value *bundleArgs[] = {llvm::ConstantInt::get(
- CGF.Int64Ty,
- llvm::objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))};
+ (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::x86_64)) {
+ llvm::Value *bundleArgs[] = {EP};
llvm::OperandBundleDef OB("clang.arc.attachedcall", bundleArgs);
auto *oldCall = cast<llvm::CallBase>(value);
llvm::CallBase *newCall = llvm::CallBase::addOperandBundle(
@@ -2362,13 +2376,6 @@ static llvm::Value *emitOptimizedARCReturnCall(llvm::Value *value,
CGF.CGM.getTargetCodeGenInfo().markARCOptimizedReturnCallsAsNoTail();
llvm::CallInst::TailCallKind tailKind =
isNoTail ? llvm::CallInst::TCK_NoTail : llvm::CallInst::TCK_None;
- ObjCEntrypoints &EPs = CGF.CGM.getObjCEntrypoints();
- llvm::Function *&EP = IsRetainRV
- ? EPs.objc_retainAutoreleasedReturnValue
- : EPs.objc_unsafeClaimAutoreleasedReturnValue;
- llvm::Intrinsic::ID IID =
- IsRetainRV ? llvm::Intrinsic::objc_retainAutoreleasedReturnValue
- : llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue;
return emitARCValueOperation(CGF, value, nullptr, EP, IID, tailKind);
}
@@ -2401,10 +2408,8 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value,
if (isa<llvm::ConstantPointerNull>(value)) return;
llvm::Function *&fn = CGM.getObjCEntrypoints().objc_release;
- if (!fn) {
- fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release);
- setARCRuntimeFunctionLinkage(CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(llvm::Intrinsic::objc_release, CGM);
// Cast the argument to 'id'.
value = Builder.CreateBitCast(value, Int8PtrTy);
@@ -2447,10 +2452,8 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr,
assert(addr.getElementType() == value->getType());
llvm::Function *&fn = CGM.getObjCEntrypoints().objc_storeStrong;
- if (!fn) {
- fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong);
- setARCRuntimeFunctionLinkage(CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(llvm::Intrinsic::objc_storeStrong, CGM);
llvm::Value *args[] = {
Builder.CreateBitCast(addr.getPointer(), Int8PtrPtrTy),
@@ -2603,10 +2606,8 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) {
/// Essentially objc_storeWeak(addr, nil).
void CodeGenFunction::EmitARCDestroyWeak(Address addr) {
llvm::Function *&fn = CGM.getObjCEntrypoints().objc_destroyWeak;
- if (!fn) {
- fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak);
- setARCRuntimeFunctionLinkage(CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(llvm::Intrinsic::objc_destroyWeak, CGM);
// Cast the argument to 'id*'.
addr = Builder.CreateBitCast(addr, Int8PtrPtrTy);
@@ -2651,10 +2652,8 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr,
/// call i8* \@objc_autoreleasePoolPush(void)
llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
llvm::Function *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush;
- if (!fn) {
- fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush);
- setARCRuntimeFunctionLinkage(CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush, CGM);
return EmitNounwindRuntimeCall(fn);
}
@@ -2679,10 +2678,8 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) {
EmitRuntimeCallOrInvoke(fn, value);
} else {
llvm::FunctionCallee &fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop;
- if (!fn) {
- fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop);
- setARCRuntimeFunctionLinkage(CGM, fn);
- }
+ if (!fn)
+ fn = getARCIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop, CGM);
EmitRuntimeCall(fn, value);
}
@@ -3344,7 +3341,8 @@ struct ARCRetainExprEmitter :
TryEmitResult result = visitExpr(e);
// Avoid the block-retain if this is a block literal that doesn't need to be
// copied to the heap.
- if (e->getBlockDecl()->canAvoidCopyToHeap())
+ if (CGF.CGM.getCodeGenOpts().ObjCAvoidHeapifyLocalBlocks &&
+ e->getBlockDecl()->canAvoidCopyToHeap())
result.setInt(true);
return result;
}
@@ -3697,7 +3695,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
FunctionDecl *FD = FunctionDecl::Create(
C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
- FunctionTy, nullptr, SC_Static, false, false);
+ FunctionTy, nullptr, SC_Static, false, false, false);
FunctionArgList args;
ParmVarDecl *Params[2];
@@ -3787,7 +3785,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
FunctionDecl *FD = FunctionDecl::Create(
C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
- FunctionTy, nullptr, SC_Static, false, false);
+ FunctionTy, nullptr, SC_Static, false, false, false);
FunctionArgList args;
ParmVarDecl *Params[2];
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index 3f361f4e7931..e016644150b4 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -2651,35 +2651,6 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
}
}
- // If the return type is something that goes in an integer register, the
- // runtime will handle 0 returns. For other cases, we fill in the 0 value
- // ourselves.
- //
- // The language spec says the result of this kind of message send is
- // undefined, but lots of people seem to have forgotten to read that
- // paragraph and insist on sending messages to nil that have structure
- // returns. With GCC, this generates a random return value (whatever happens
- // to be on the stack / in those registers at the time) on most platforms,
- // and generates an illegal instruction trap on SPARC. With LLVM it corrupts
- // the stack.
- bool isPointerSizedReturn = (ResultType->isAnyPointerType() ||
- ResultType->isIntegralOrEnumerationType() || ResultType->isVoidType());
-
- llvm::BasicBlock *startBB = nullptr;
- llvm::BasicBlock *messageBB = nullptr;
- llvm::BasicBlock *continueBB = nullptr;
-
- if (!isPointerSizedReturn) {
- startBB = Builder.GetInsertBlock();
- messageBB = CGF.createBasicBlock("msgSend");
- continueBB = CGF.createBasicBlock("continue");
-
- llvm::Value *isNil = Builder.CreateICmpEQ(Receiver,
- llvm::Constant::getNullValue(Receiver->getType()));
- Builder.CreateCondBr(isNil, continueBB, messageBB);
- CGF.EmitBlock(messageBB);
- }
-
IdTy = cast<llvm::PointerType>(CGM.getTypes().ConvertType(ASTIdTy));
llvm::Value *cmd;
if (Method)
@@ -2703,6 +2674,96 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs);
+ // Message sends are expected to return a zero value when the
+ // receiver is nil. At one point, this was only guaranteed for
+ // simple integer and pointer types, but expectations have grown
+ // over time.
+ //
+ // Given a nil receiver, the GNU runtime's message lookup will
+ // return a stub function that simply sets various return-value
+ // registers to zero and then returns. That's good enough for us
+ // if and only if (1) the calling conventions of that stub are
+ // compatible with the signature we're using and (2) the registers
+ // it sets are sufficient to produce a zero value of the return type.
+ // Rather than doing a whole target-specific analysis, we assume it
+ // only works for void, integer, and pointer types, and in all
+ // other cases we do an explicit nil check is emitted code. In
+ // addition to ensuring we produe a zero value for other types, this
+ // sidesteps the few outright CC incompatibilities we know about that
+ // could otherwise lead to crashes, like when a method is expected to
+ // return on the x87 floating point stack or adjust the stack pointer
+ // because of an indirect return.
+ bool hasParamDestroyedInCallee = false;
+ bool requiresExplicitZeroResult = false;
+ bool requiresNilReceiverCheck = [&] {
+ // We never need a check if we statically know the receiver isn't nil.
+ if (!canMessageReceiverBeNull(CGF, Method, /*IsSuper*/ false,
+ Class, Receiver))
+ return false;
+
+ // If there's a consumed argument, we need a nil check.
+ if (Method && Method->hasParamDestroyedInCallee()) {
+ hasParamDestroyedInCallee = true;
+ }
+
+ // If the return value isn't flagged as unused, and the result
+ // type isn't in our narrow set where we assume compatibility,
+ // we need a nil check to ensure a nil value.
+ if (!Return.isUnused()) {
+ if (ResultType->isVoidType()) {
+ // void results are definitely okay.
+ } else if (ResultType->hasPointerRepresentation() &&
+ CGM.getTypes().isZeroInitializable(ResultType)) {
+ // Pointer types should be fine as long as they have
+ // bitwise-zero null pointers. But do we need to worry
+ // about unusual address spaces?
+ } else if (ResultType->isIntegralOrEnumerationType()) {
+ // Bitwise zero should always be zero for integral types.
+ // FIXME: we probably need a size limit here, but we've
+ // never imposed one before
+ } else {
+ // Otherwise, use an explicit check just to be sure.
+ requiresExplicitZeroResult = true;
+ }
+ }
+
+ return hasParamDestroyedInCallee || requiresExplicitZeroResult;
+ }();
+
+ // We will need to explicitly zero-initialize an aggregate result slot
+ // if we generally require explicit zeroing and we have an aggregate
+ // result.
+ bool requiresExplicitAggZeroing =
+ requiresExplicitZeroResult && CGF.hasAggregateEvaluationKind(ResultType);
+
+ // The block we're going to end up in after any message send or nil path.
+ llvm::BasicBlock *continueBB = nullptr;
+ // The block that eventually branched to continueBB along the nil path.
+ llvm::BasicBlock *nilPathBB = nullptr;
+ // The block to do explicit work in along the nil path, if necessary.
+ llvm::BasicBlock *nilCleanupBB = nullptr;
+
+ // Emit the nil-receiver check.
+ if (requiresNilReceiverCheck) {
+ llvm::BasicBlock *messageBB = CGF.createBasicBlock("msgSend");
+ continueBB = CGF.createBasicBlock("continue");
+
+ // If we need to zero-initialize an aggregate result or destroy
+ // consumed arguments, we'll need a separate cleanup block.
+ // Otherwise we can just branch directly to the continuation block.
+ if (requiresExplicitAggZeroing || hasParamDestroyedInCallee) {
+ nilCleanupBB = CGF.createBasicBlock("nilReceiverCleanup");
+ } else {
+ nilPathBB = Builder.GetInsertBlock();
+ }
+
+ llvm::Value *isNil = Builder.CreateICmpEQ(Receiver,
+ llvm::Constant::getNullValue(Receiver->getType()));
+ Builder.CreateCondBr(isNil, nilCleanupBB ? nilCleanupBB : continueBB,
+ messageBB);
+ CGF.EmitBlock(messageBB);
+ }
+
// Get the IMP to call
llvm::Value *imp;
@@ -2744,36 +2805,48 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call);
call->setMetadata(msgSendMDKind, node);
-
- if (!isPointerSizedReturn) {
- messageBB = CGF.Builder.GetInsertBlock();
+ if (requiresNilReceiverCheck) {
+ llvm::BasicBlock *nonNilPathBB = CGF.Builder.GetInsertBlock();
CGF.Builder.CreateBr(continueBB);
+
+ // Emit the nil path if we decided it was necessary above.
+ if (nilCleanupBB) {
+ CGF.EmitBlock(nilCleanupBB);
+
+ if (hasParamDestroyedInCallee) {
+ destroyCalleeDestroyedArguments(CGF, Method, CallArgs);
+ }
+
+ if (requiresExplicitAggZeroing) {
+ assert(msgRet.isAggregate());
+ Address addr = msgRet.getAggregateAddress();
+ CGF.EmitNullInitialization(addr, ResultType);
+ }
+
+ nilPathBB = CGF.Builder.GetInsertBlock();
+ CGF.Builder.CreateBr(continueBB);
+ }
+
+ // Enter the continuation block and emit a phi if required.
CGF.EmitBlock(continueBB);
if (msgRet.isScalar()) {
llvm::Value *v = msgRet.getScalarVal();
llvm::PHINode *phi = Builder.CreatePHI(v->getType(), 2);
- phi->addIncoming(v, messageBB);
- phi->addIncoming(llvm::Constant::getNullValue(v->getType()), startBB);
+ phi->addIncoming(v, nonNilPathBB);
+ phi->addIncoming(CGM.EmitNullConstant(ResultType), nilPathBB);
msgRet = RValue::get(phi);
} else if (msgRet.isAggregate()) {
- Address v = msgRet.getAggregateAddress();
- llvm::PHINode *phi = Builder.CreatePHI(v.getType(), 2);
- llvm::Type *RetTy = v.getElementType();
- Address NullVal = CGF.CreateTempAlloca(RetTy, v.getAlignment(), "null");
- CGF.InitTempAlloca(NullVal, llvm::Constant::getNullValue(RetTy));
- phi->addIncoming(v.getPointer(), messageBB);
- phi->addIncoming(NullVal.getPointer(), startBB);
- msgRet = RValue::getAggregate(Address(phi, v.getAlignment()));
+ // Aggregate zeroing is handled in nilCleanupBB when it's required.
} else /* isComplex() */ {
std::pair<llvm::Value*,llvm::Value*> v = msgRet.getComplexVal();
llvm::PHINode *phi = Builder.CreatePHI(v.first->getType(), 2);
- phi->addIncoming(v.first, messageBB);
+ phi->addIncoming(v.first, nonNilPathBB);
phi->addIncoming(llvm::Constant::getNullValue(v.first->getType()),
- startBB);
+ nilPathBB);
llvm::PHINode *phi2 = Builder.CreatePHI(v.second->getType(), 2);
- phi2->addIncoming(v.second, messageBB);
+ phi2->addIncoming(v.second, nonNilPathBB);
phi2->addIncoming(llvm::Constant::getNullValue(v.second->getType()),
- startBB);
+ nilPathBB);
msgRet = RValue::getComplex(phi, phi2);
}
}
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 3de67bb4bbc5..5b925359ac25 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -1754,37 +1754,9 @@ struct NullReturnState {
// Okay, start emitting the null-receiver block.
CGF.EmitBlock(NullBB);
- // Release any consumed arguments we've got.
+ // Destroy any consumed arguments we've got.
if (Method) {
- CallArgList::const_iterator I = CallArgs.begin();
- for (ObjCMethodDecl::param_const_iterator i = Method->param_begin(),
- e = Method->param_end(); i != e; ++i, ++I) {
- const ParmVarDecl *ParamDecl = (*i);
- if (ParamDecl->hasAttr<NSConsumedAttr>()) {
- RValue RV = I->getRValue(CGF);
- assert(RV.isScalar() &&
- "NullReturnState::complete - arg not on object");
- CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime);
- } else {
- QualType QT = ParamDecl->getType();
- auto *RT = QT->getAs<RecordType>();
- if (RT && RT->getDecl()->isParamDestroyedInCallee()) {
- RValue RV = I->getRValue(CGF);
- QualType::DestructionKind DtorKind = QT.isDestructedType();
- switch (DtorKind) {
- case QualType::DK_cxx_destructor:
- CGF.destroyCXXObject(CGF, RV.getAggregateAddress(), QT);
- break;
- case QualType::DK_nontrivial_c_struct:
- CGF.destroyNonTrivialCStruct(CGF, RV.getAggregateAddress(), QT);
- break;
- default:
- llvm_unreachable("unexpected dtor kind");
- break;
- }
- }
- }
- }
+ CGObjCRuntime::destroyCalleeDestroyedArguments(CGF, Method, CallArgs);
}
// The phi code below assumes that we haven't needed any control flow yet.
@@ -2151,15 +2123,6 @@ CodeGen::RValue CGObjCMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
Method, Class, ObjCTypes);
}
-static bool isWeakLinkedClass(const ObjCInterfaceDecl *ID) {
- do {
- if (ID->isWeakImported())
- return true;
- } while ((ID = ID->getSuperClass()));
-
- return false;
-}
-
CodeGen::RValue
CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
ReturnValueSlot Return,
@@ -2200,32 +2163,8 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
CGM.getContext().getCanonicalType(ResultType) &&
"Result type mismatch!");
- bool ReceiverCanBeNull = true;
-
- // Super dispatch assumes that self is non-null; even the messenger
- // doesn't have a null check internally.
- if (IsSuper) {
- ReceiverCanBeNull = false;
-
- // If this is a direct dispatch of a class method, check whether the class,
- // or anything in its hierarchy, was weak-linked.
- } else if (ClassReceiver && Method && Method->isClassMethod()) {
- ReceiverCanBeNull = isWeakLinkedClass(ClassReceiver);
-
- // If we're emitting a method, and self is const (meaning just ARC, for now),
- // and the receiver is a load of self, then self is a valid object.
- } else if (auto CurMethod =
- dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl)) {
- auto Self = CurMethod->getSelfDecl();
- if (Self->getType().isConstQualified()) {
- if (auto LI = dyn_cast<llvm::LoadInst>(Arg0->stripPointerCasts())) {
- llvm::Value *SelfAddr = CGF.GetAddrOfLocalVar(Self).getPointer();
- if (SelfAddr == LI->getPointerOperand()) {
- ReceiverCanBeNull = false;
- }
- }
- }
- }
+ bool ReceiverCanBeNull =
+ canMessageReceiverBeNull(CGF, Method, IsSuper, ClassReceiver, Arg0);
bool RequiresNullCheck = false;
@@ -2261,14 +2200,8 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
RequiresNullCheck = false;
// Emit a null-check if there's a consumed argument other than the receiver.
- if (!RequiresNullCheck && CGM.getLangOpts().ObjCAutoRefCount && Method) {
- for (const auto *ParamDecl : Method->parameters()) {
- if (ParamDecl->isDestroyedInCallee()) {
- RequiresNullCheck = true;
- break;
- }
- }
- }
+ if (!RequiresNullCheck && Method && Method->hasParamDestroyedInCallee())
+ RequiresNullCheck = true;
NullReturnState nullReturn;
if (RequiresNullCheck) {
@@ -4788,9 +4721,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
// matched and avoid generating code for falling off the end if
// so.
bool AllMatched = false;
- for (unsigned I = 0, N = AtTryStmt->getNumCatchStmts(); I != N; ++I) {
- const ObjCAtCatchStmt *CatchStmt = AtTryStmt->getCatchStmt(I);
-
+ for (const ObjCAtCatchStmt *CatchStmt : AtTryStmt->catch_stmts()) {
const VarDecl *CatchParam = CatchStmt->getCatchParamDecl();
const ObjCObjectPointerType *OPT = nullptr;
@@ -6741,33 +6672,53 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
}
}
- values.add(emitMethodList(listName, MethodListType::CategoryInstanceMethods,
- instanceMethods));
- values.add(emitMethodList(listName, MethodListType::CategoryClassMethods,
- classMethods));
+ auto instanceMethodList = emitMethodList(
+ listName, MethodListType::CategoryInstanceMethods, instanceMethods);
+ auto classMethodList = emitMethodList(
+ listName, MethodListType::CategoryClassMethods, classMethods);
+ values.add(instanceMethodList);
+ values.add(classMethodList);
+ // Keep track of whether we have actual metadata to emit.
+ bool isEmptyCategory =
+ instanceMethodList->isNullValue() && classMethodList->isNullValue();
const ObjCCategoryDecl *Category =
- Interface->FindCategoryDeclaration(OCD->getIdentifier());
+ Interface->FindCategoryDeclaration(OCD->getIdentifier());
if (Category) {
SmallString<256> ExtName;
- llvm::raw_svector_ostream(ExtName) << Interface->getObjCRuntimeNameAsString() << "_$_"
- << OCD->getName();
- values.add(EmitProtocolList("_OBJC_CATEGORY_PROTOCOLS_$_"
- + Interface->getObjCRuntimeNameAsString() + "_$_"
- + Category->getName(),
- Category->protocol_begin(),
- Category->protocol_end()));
- values.add(EmitPropertyList("_OBJC_$_PROP_LIST_" + ExtName.str(),
- OCD, Category, ObjCTypes, false));
- values.add(EmitPropertyList("_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(),
- OCD, Category, ObjCTypes, true));
+ llvm::raw_svector_ostream(ExtName)
+ << Interface->getObjCRuntimeNameAsString() << "_$_" << OCD->getName();
+ auto protocolList =
+ EmitProtocolList("_OBJC_CATEGORY_PROTOCOLS_$_" +
+ Interface->getObjCRuntimeNameAsString() + "_$_" +
+ Category->getName(),
+ Category->protocol_begin(), Category->protocol_end());
+ auto propertyList = EmitPropertyList("_OBJC_$_PROP_LIST_" + ExtName.str(),
+ OCD, Category, ObjCTypes, false);
+ auto classPropertyList =
+ EmitPropertyList("_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), OCD,
+ Category, ObjCTypes, true);
+ values.add(protocolList);
+ values.add(propertyList);
+ values.add(classPropertyList);
+ isEmptyCategory &= protocolList->isNullValue() &&
+ propertyList->isNullValue() &&
+ classPropertyList->isNullValue();
} else {
values.addNullPointer(ObjCTypes.ProtocolListnfABIPtrTy);
values.addNullPointer(ObjCTypes.PropertyListPtrTy);
values.addNullPointer(ObjCTypes.PropertyListPtrTy);
}
- unsigned Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy);
+ if (isEmptyCategory) {
+ // Empty category, don't emit any metadata.
+ values.abandon();
+ MethodDefinitions.clear();
+ return;
+ }
+
+ unsigned Size =
+ CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy);
values.addInt(ObjCTypes.IntTy, Size);
llvm::GlobalVariable *GCATV =
diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp
index 108f6fc7ba60..33ae3c7c2b28 100644
--- a/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -163,8 +163,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
// Enter the catch, if there is one.
if (S.getNumCatchStmts()) {
- for (unsigned I = 0, N = S.getNumCatchStmts(); I != N; ++I) {
- const ObjCAtCatchStmt *CatchStmt = S.getCatchStmt(I);
+ for (const ObjCAtCatchStmt *CatchStmt : S.catch_stmts()) {
const VarDecl *CatchDecl = CatchStmt->getCatchParamDecl();
Handlers.push_back(CatchHandler());
@@ -385,6 +384,83 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method,
return MessageSendInfo(argsInfo, signatureType);
}
+bool CGObjCRuntime::canMessageReceiverBeNull(CodeGenFunction &CGF,
+ const ObjCMethodDecl *method,
+ bool isSuper,
+ const ObjCInterfaceDecl *classReceiver,
+ llvm::Value *receiver) {
+ // Super dispatch assumes that self is non-null; even the messenger
+ // doesn't have a null check internally.
+ if (isSuper)
+ return false;
+
+ // If this is a direct dispatch of a class method, check whether the class,
+ // or anything in its hierarchy, was weak-linked.
+ if (classReceiver && method && method->isClassMethod())
+ return isWeakLinkedClass(classReceiver);
+
+ // If we're emitting a method, and self is const (meaning just ARC, for now),
+ // and the receiver is a load of self, then self is a valid object.
+ if (auto curMethod =
+ dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl)) {
+ auto self = curMethod->getSelfDecl();
+ if (self->getType().isConstQualified()) {
+ if (auto LI = dyn_cast<llvm::LoadInst>(receiver->stripPointerCasts())) {
+ llvm::Value *selfAddr = CGF.GetAddrOfLocalVar(self).getPointer();
+ if (selfAddr == LI->getPointerOperand()) {
+ return false;
+ }
+ }
+ }
+ }
+
+ // Otherwise, assume it can be null.
+ return true;
+}
+
+bool CGObjCRuntime::isWeakLinkedClass(const ObjCInterfaceDecl *ID) {
+ do {
+ if (ID->isWeakImported())
+ return true;
+ } while ((ID = ID->getSuperClass()));
+
+ return false;
+}
+
+void CGObjCRuntime::destroyCalleeDestroyedArguments(CodeGenFunction &CGF,
+ const ObjCMethodDecl *method,
+ const CallArgList &callArgs) {
+ CallArgList::const_iterator I = callArgs.begin();
+ for (auto i = method->param_begin(), e = method->param_end();
+ i != e; ++i, ++I) {
+ const ParmVarDecl *param = (*i);
+ if (param->hasAttr<NSConsumedAttr>()) {
+ RValue RV = I->getRValue(CGF);
+ assert(RV.isScalar() &&
+ "NullReturnState::complete - arg not on object");
+ CGF.EmitARCRelease(RV.getScalarVal(), ARCImpreciseLifetime);
+ } else {
+ QualType QT = param->getType();
+ auto *RT = QT->getAs<RecordType>();
+ if (RT && RT->getDecl()->isParamDestroyedInCallee()) {
+ RValue RV = I->getRValue(CGF);
+ QualType::DestructionKind DtorKind = QT.isDestructedType();
+ switch (DtorKind) {
+ case QualType::DK_cxx_destructor:
+ CGF.destroyCXXObject(CGF, RV.getAggregateAddress(), QT);
+ break;
+ case QualType::DK_nontrivial_c_struct:
+ CGF.destroyNonTrivialCStruct(CGF, RV.getAggregateAddress(), QT);
+ break;
+ default:
+ llvm_unreachable("unexpected dtor kind");
+ break;
+ }
+ }
+ }
+ }
+}
+
llvm::Constant *
clang::CodeGen::emitObjCProtocolObject(CodeGenModule &CGM,
const ObjCProtocolDecl *protocol) {
diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h
index f56101df77b6..bb27c38db204 100644
--- a/clang/lib/CodeGen/CGObjCRuntime.h
+++ b/clang/lib/CodeGen/CGObjCRuntime.h
@@ -337,6 +337,23 @@ public:
MessageSendInfo getMessageSendInfo(const ObjCMethodDecl *method,
QualType resultType,
CallArgList &callArgs);
+ bool canMessageReceiverBeNull(CodeGenFunction &CGF,
+ const ObjCMethodDecl *method,
+ bool isSuper,
+ const ObjCInterfaceDecl *classReceiver,
+ llvm::Value *receiver);
+ static bool isWeakLinkedClass(const ObjCInterfaceDecl *cls);
+
+ /// Destroy the callee-destroyed arguments of the given method,
+ /// if it has any. Used for nil-receiver paths in message sends.
+ /// Never does anything if the method does not satisfy
+ /// hasParamDestroyedInCallee().
+ ///
+ /// \param callArgs - just the formal arguments, not including implicit
+ /// arguments such as self and cmd
+ static void destroyCalleeDestroyedArguments(CodeGenFunction &CGF,
+ const ObjCMethodDecl *method,
+ const CallArgList &callArgs);
// FIXME: This probably shouldn't be here, but the code to compute
// it is here.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c09797e91b99..75709b3c7e78 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1448,8 +1448,8 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
const char *FileName = PLoc.getFilename();
unsigned Line = PLoc.getLine();
unsigned Column = PLoc.getColumn();
- SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
- Line, Column);
+ SrcLocStr =
+ OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
}
unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
@@ -1560,13 +1560,22 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
}
llvm::FunctionCallee
-CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
+CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
+ bool IsGPUDistribute) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
- StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
- : "__kmpc_for_static_init_4u")
- : (IVSigned ? "__kmpc_for_static_init_8"
- : "__kmpc_for_static_init_8u");
+ StringRef Name;
+ if (IsGPUDistribute)
+ Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
+ : "__kmpc_distribute_static_init_4u")
+ : (IVSigned ? "__kmpc_distribute_static_init_8"
+ : "__kmpc_distribute_static_init_8u");
+ else
+ Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
+ : "__kmpc_for_static_init_4u")
+ : (IVSigned ? "__kmpc_for_static_init_8"
+ : "__kmpc_for_static_init_8u");
+
llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
@@ -2112,7 +2121,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
Address ZeroAddrBound =
CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".bound.zero.addr");
- CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
@@ -2120,11 +2129,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
// Ensure we do not inline the function. This is trivially true for the ones
- // passed to __kmpc_fork_call but the ones calles in serialized regions
+ // passed to __kmpc_fork_call but the ones called in serialized regions
// could be inlined. This is not a perfect but it is closer to the invariant
// we want, namely, every data environment starts with a new function.
// TODO: We should pass the if condition to the runtime function and do the
// handling there. Much cleaner code.
+ OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -2825,7 +2835,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
- createForStaticInitFunction(Values.IVSize, Values.IVSigned);
+ createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
@@ -2840,8 +2850,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
llvm::Value *UpdatedLocation =
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
- llvm::FunctionCallee StaticInitFunction =
- createForStaticInitFunction(Values.IVSize, Values.IVSigned);
+ llvm::FunctionCallee StaticInitFunction;
+ bool isGPUDistribute =
+ CGM.getLangOpts().OpenMPIsDevice &&
+ (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
+ StaticInitFunction = createForStaticInitFunction(
+ Values.IVSize, Values.IVSigned, isGPUDistribute);
+
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
OMPC_SCHEDULE_MODIFIER_unknown, Values);
@@ -2862,9 +2877,16 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS),
getThreadID(CGF, Loc)};
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_for_static_fini),
- Args);
+ if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
+ (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
+ Args);
+ else
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_for_static_fini),
+ Args);
}
void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
@@ -3891,7 +3913,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
SharedRefLValue.getTBAAInfo());
} else if (CGF.LambdaCaptureFields.count(
Pair.second.Original->getCanonicalDecl()) > 0 ||
- dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
+ isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
} else {
// Processing for implicitly captured variables.
@@ -4400,14 +4422,14 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
if (NumOfElements) {
NumOfElements = CGF.Builder.CreateNUWAdd(
llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
- OpaqueValueExpr OVE(
+ auto *OVE = new (C) OpaqueValueExpr(
Loc,
C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
VK_PRValue);
- CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
RValue::get(NumOfElements));
KmpTaskAffinityInfoArrayTy =
- C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
+ C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
// Properly emit variable-sized array.
auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
@@ -4758,8 +4780,8 @@ emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
LValue NumLVal = CGF.MakeAddrLValue(
CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
C.getUIntPtrType());
- CGF.InitTempAlloca(NumLVal.getAddress(CGF),
- llvm::ConstantInt::get(CGF.IntPtrTy, 0));
+ CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
+ NumLVal.getAddress(CGF));
llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
CGF.EmitStoreOfScalar(Add, NumLVal);
@@ -4860,7 +4882,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
bool HasRegularWithIterators = false;
llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
llvm::Value *NumOfRegularWithIterators =
- llvm::ConstantInt::get(CGF.IntPtrTy, 1);
+ llvm::ConstantInt::get(CGF.IntPtrTy, 0);
// Calculate number of depobj dependecies and regular deps with the iterators.
for (const OMPTaskDataTy::DependData &D : Dependencies) {
if (D.DepKind == OMPC_DEPEND_depobj) {
@@ -4874,12 +4896,15 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
continue;
}
// Include number of iterations, if any.
+
if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
+ llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
+ Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
NumOfRegularWithIterators =
- CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
+ CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
}
HasRegularWithIterators = true;
continue;
@@ -4898,13 +4923,13 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
NumOfElements =
CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
}
- OpaqueValueExpr OVE(Loc,
- C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
- VK_PRValue);
- CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
+ auto *OVE = new (C) OpaqueValueExpr(
+ Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
+ VK_PRValue);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
RValue::get(NumOfElements));
KmpDependInfoArrayTy =
- C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
+ C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
// CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
// Properly emit variable-sized array.
@@ -6239,21 +6264,51 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
SharedLVal.getAlignment());
}
-void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
- SourceLocation Loc) {
+void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPTaskDataTy &Data) {
if (!CGF.HaveInsertPoint())
return;
- if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
+ if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
+ // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
OMPBuilder.createTaskwait(CGF.Builder);
} else {
- // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- // Ignore return result until untied tasks are supported.
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
- Args);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+ auto &M = CGM.getModule();
+ Address DependenciesArray = Address::invalid();
+ llvm::Value *NumOfElements;
+ std::tie(NumOfElements, DependenciesArray) =
+ emitDependClause(CGF, Data.Dependences, Loc);
+ llvm::Value *DepWaitTaskArgs[6];
+ if (!Data.Dependences.empty()) {
+ DepWaitTaskArgs[0] = UpLoc;
+ DepWaitTaskArgs[1] = ThreadID;
+ DepWaitTaskArgs[2] = NumOfElements;
+ DepWaitTaskArgs[3] = DependenciesArray.getPointer();
+ DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
+ DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+
+ CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+
+ // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
+ // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+ // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
+ // is specified.
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
+ DepWaitTaskArgs);
+
+ } else {
+
+ // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Value *Args[] = {UpLoc, ThreadID};
+ // Ignore return result until untied tasks are supported.
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
+ Args);
+ }
}
if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
@@ -6739,6 +6794,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
case OMPD_parallel_master_taskloop:
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
+ case OMPD_metadirective:
case OMPD_unknown:
break;
default:
@@ -7213,6 +7269,7 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
case OMPD_parallel_master_taskloop:
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
+ case OMPD_metadirective:
case OMPD_unknown:
break;
default:
@@ -7268,6 +7325,14 @@ public:
/// 0x800 is reserved for compatibility with XLC.
/// Produce a runtime error if the data is not already allocated.
OMP_MAP_PRESENT = 0x1000,
+ // Increment and decrement a separate reference counter so that the data
+ // cannot be unmapped within the associated region. Thus, this flag is
+ // intended to be used on 'target' and 'target data' directives because they
+ // are inherently structured. It is not intended to be used on 'target
+ // enter data' and 'target exit data' directives because they are inherently
+ // dynamic.
+ // This is an OpenMP extension for the sake of OpenACC support.
+ OMP_MAP_OMPX_HOLD = 0x2000,
/// Signal that the runtime library should use args as an array of
/// descriptor_dim pointers and use args_size as dims. Used when we have
/// non-contiguous list items in target update directive
@@ -7446,6 +7511,9 @@ private:
SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
DevPointersMap;
+ /// Map between lambda declarations and their map type.
+ llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
+
llvm::Value *getExprTypeSize(const Expr *E) const {
QualType ExprTy = E->getType().getCanonicalType();
@@ -7558,17 +7626,15 @@ private:
Bits |= OMP_MAP_PTR_AND_OBJ;
if (AddIsTargetParamFlag)
Bits |= OMP_MAP_TARGET_PARAM;
- if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
- != MapModifiers.end())
+ if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
Bits |= OMP_MAP_ALWAYS;
- if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
- != MapModifiers.end())
+ if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
Bits |= OMP_MAP_CLOSE;
- if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
- MapModifiers.end() ||
- llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
- MotionModifiers.end())
+ if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
+ llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
Bits |= OMP_MAP_PRESENT;
+ if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
+ Bits |= OMP_MAP_OMPX_HOLD;
if (IsNonContiguous)
Bits |= OMP_MAP_NON_CONTIG;
return Bits;
@@ -8404,6 +8470,15 @@ private:
return MappableExprsHandler::OMP_MAP_PRIVATE |
MappableExprsHandler::OMP_MAP_TO;
}
+ auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
+ if (I != LambdasMap.end())
+ // for map(to: lambda): using user specified map type.
+ return getMapTypeBits(
+ I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
+ /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
+ /*AddPtrFlag=*/false,
+ /*AddIsTargetParamFlag=*/false,
+ /*isNonContiguous=*/false);
return MappableExprsHandler::OMP_MAP_TO |
MappableExprsHandler::OMP_MAP_FROM;
}
@@ -8535,10 +8610,8 @@ private:
if (!C)
continue;
MapKind Kind = Other;
- if (!C->getMapTypeModifiers().empty() &&
- llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
- return K == OMPC_MAP_MODIFIER_present;
- }))
+ if (llvm::is_contained(C->getMapTypeModifiers(),
+ OMPC_MAP_MODIFIER_present))
Kind = Present;
else if (C->getMapType() == OMPC_MAP_alloc)
Kind = Allocs;
@@ -8557,10 +8630,8 @@ private:
if (!C)
continue;
MapKind Kind = Other;
- if (!C->getMotionModifiers().empty() &&
- llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
- return K == OMPC_MOTION_MODIFIER_present;
- }))
+ if (llvm::is_contained(C->getMotionModifiers(),
+ OMPC_MOTION_MODIFIER_present))
Kind = Present;
const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
@@ -8575,10 +8646,8 @@ private:
if (!C)
continue;
MapKind Kind = Other;
- if (!C->getMotionModifiers().empty() &&
- llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
- return K == OMPC_MOTION_MODIFIER_present;
- }))
+ if (llvm::is_contained(C->getMotionModifiers(),
+ OMPC_MOTION_MODIFIER_present))
Kind = Present;
const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
@@ -8868,6 +8937,21 @@ public:
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
for (auto L : C->component_lists())
DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
+ // Extract map information.
+ for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
+ if (C->getMapType() != OMPC_MAP_to)
+ continue;
+ for (auto L : C->component_lists()) {
+ const ValueDecl *VD = std::get<0>(L);
+ const auto *RD = VD ? VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl()
+ : nullptr;
+ if (RD && RD->isLambda())
+ LambdasMap.try_emplace(std::get<0>(L), C);
+ }
+ }
}
/// Constructor for the declare mapper directive.
@@ -8922,6 +9006,20 @@ public:
CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
// Remove TARGET_PARAM flag from the first element
(*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
+ // If any element has the ompx_hold modifier, then make sure the runtime
+ // uses the hold reference count for the struct as a whole so that it won't
+ // be unmapped by an extra dynamic reference count decrement. Add it to all
+ // elements as well so the runtime knows which reference count to check
+ // when determining whether it's time for device-to-host transfers of
+ // individual elements.
+ if (CurTypes.end() !=
+ llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
+ return Type & OMP_MAP_OMPX_HOLD;
+ })) {
+ CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
+ for (auto &M : CurTypes)
+ M |= OMP_MAP_OMPX_HOLD;
+ }
// All other current entries will be MEMBER_OF the combined entry
// (except for PTR_AND_OBJ entries which do not have a placeholder value
@@ -9066,6 +9164,11 @@ public:
? nullptr
: Cap->getCapturedVar()->getCanonicalDecl();
+ // for map(to: lambda): skip here, processing it in
+ // generateDefaultMapInfo
+ if (LambdasMap.count(VD))
+ return;
+
// If this declaration appears in a is_device_ptr clause we just have to
// pass the pointer by value. If it is a reference to a declaration, we just
// pass its value.
@@ -9112,18 +9215,13 @@ public:
const MapData &RHS) {
ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
OpenMPMapClauseKind MapType = std::get<1>(RHS);
- bool HasPresent = !MapModifiers.empty() &&
- llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
- return K == clang::OMPC_MAP_MODIFIER_present;
- });
+ bool HasPresent =
+ llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
bool HasAllocs = MapType == OMPC_MAP_alloc;
MapModifiers = std::get<2>(RHS);
MapType = std::get<1>(LHS);
bool HasPresentR =
- !MapModifiers.empty() &&
- llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
- return K == clang::OMPC_MAP_MODIFIER_present;
- });
+ llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
bool HasAllocsR = MapType == OMPC_MAP_alloc;
return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
});
@@ -9434,34 +9532,50 @@ static void emitNonContiguousDescriptor(
}
}
+// Try to extract the base declaration from a `this->x` expression if possible.
+static ValueDecl *getDeclFromThisExpr(const Expr *E) {
+ if (!E)
+ return nullptr;
+
+ if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
+ if (const MemberExpr *ME =
+ dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
+ return ME->getMemberDecl();
+ return nullptr;
+}
+
/// Emit a string constant containing the names of the values mapped to the
/// offloading runtime library.
llvm::Constant *
emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
MappableExprsHandler::MappingExprInfo &MapExprs) {
- llvm::Constant *SrcLocStr;
- if (!MapExprs.getMapDecl()) {
- SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
+
+ if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
+ return OMPBuilder.getOrCreateDefaultSrcLocStr();
+
+ SourceLocation Loc;
+ if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
+ if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
+ Loc = VD->getLocation();
+ else
+ Loc = MapExprs.getMapExpr()->getExprLoc();
} else {
- std::string ExprName = "";
- if (MapExprs.getMapExpr()) {
- PrintingPolicy P(CGF.getContext().getLangOpts());
- llvm::raw_string_ostream OS(ExprName);
- MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
- OS.flush();
- } else {
- ExprName = MapExprs.getMapDecl()->getNameAsString();
- }
+ Loc = MapExprs.getMapDecl()->getLocation();
+ }
- SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
- PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
- const char *FileName = PLoc.getFilename();
- unsigned Line = PLoc.getLine();
- unsigned Column = PLoc.getColumn();
- SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
- Line, Column);
+ std::string ExprName = "";
+ if (MapExprs.getMapExpr()) {
+ PrintingPolicy P(CGF.getContext().getLangOpts());
+ llvm::raw_string_ostream OS(ExprName);
+ MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
+ OS.flush();
+ } else {
+ ExprName = MapExprs.getMapDecl()->getNameAsString();
}
- return SrcLocStr;
+
+ PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+ return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
+ PLoc.getLine(), PLoc.getColumn());
}
/// Emit the arrays used to pass the captures and map information to the
@@ -9809,6 +9923,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_parallel_master_taskloop:
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
+ case OMPD_metadirective:
case OMPD_unknown:
default:
llvm_unreachable("Unexpected directive.");
@@ -10659,6 +10774,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_parallel_master_taskloop:
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
+ case OMPD_metadirective:
case OMPD_unknown:
default:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
@@ -11340,6 +11456,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_requires:
+ case OMPD_metadirective:
case OMPD_unknown:
default:
llvm_unreachable("Unexpected standalone target data directive.");
@@ -11626,11 +11743,11 @@ getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
// The LS of a function parameter / return value can only be a power
// of 2, starting from 8 bits, up to 128.
- assert(std::all_of(Sizes.begin(), Sizes.end(),
- [](unsigned Size) {
- return Size == 8 || Size == 16 || Size == 32 ||
- Size == 64 || Size == 128;
- }) &&
+ assert(llvm::all_of(Sizes,
+ [](unsigned Size) {
+ return Size == 8 || Size == 16 || Size == 32 ||
+ Size == 64 || Size == 128;
+ }) &&
"Invalid size");
return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
@@ -12286,7 +12403,7 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
return llvm::any_of(
CGM.getOpenMPRuntime().NontemporalDeclsStack,
- [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
+ [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
}
void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
@@ -12957,7 +13074,8 @@ Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
}
void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
- SourceLocation Loc) {
+ SourceLocation Loc,
+ const OMPTaskDataTy &Data) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index c24648aae7e1..527a23a8af6a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -795,9 +795,11 @@ private:
llvm::Type *getKmpc_MicroPointerTy();
/// Returns __kmpc_for_static_init_* runtime function for the specified
- /// size \a IVSize and sign \a IVSigned.
+ /// size \a IVSize and sign \a IVSigned. Will create a distribute call
+ /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize,
- bool IVSigned);
+ bool IVSigned,
+ bool IsGPUDistribute);
/// Returns __kmpc_dispatch_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
@@ -1545,7 +1547,8 @@ public:
LValue SharedLVal);
/// Emit code for 'taskwait' directive.
- virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
+ virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPTaskDataTy &Data);
/// Emit code for 'cancellation point' construct.
/// \param CancelRegion Region kind for which the cancellation point must be
@@ -2383,7 +2386,8 @@ public:
LValue SharedLVal) override;
/// Emit code for 'taskwait' directive.
- void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override;
+ void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
+ const OMPTaskDataTy &Data) override;
/// Emit code for 'cancellation point' construct.
/// \param CancelRegion Region kind for which the cancellation point must be
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
deleted file mode 100644
index 33d4ab838af1..000000000000
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to
-// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/Attr.h"
-#include "clang/AST/DeclOpenMP.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/Cuda.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
-
-using namespace clang;
-using namespace CodeGen;
-using namespace llvm::omp;
-
-CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
- : CGOpenMPRuntimeGPU(CGM) {
- if (!CGM.getLangOpts().OpenMPIsDevice)
- llvm_unreachable("OpenMP AMDGCN can only handle device code.");
-}
-
-llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
- CGBuilderTy &Bld = CGF.Builder;
- // return constant compile-time target-specific warp size
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
- return Bld.getInt32(WarpSize);
-}
-
-llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Function *F =
- CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
- return Bld.CreateCall(F, llvm::None, "nvptx_tid");
-}
-
-llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Module *M = &CGF.CGM.getModule();
- const char *LocSize = "__kmpc_amdgcn_gpu_num_threads";
- llvm::Function *F = M->getFunction(LocSize);
- if (!F) {
- F = llvm::Function::Create(
- llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
- llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
- }
- return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
-}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
deleted file mode 100644
index c1421261bfc1..000000000000
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to
-// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU {
-
-public:
- explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM);
-
- /// Get the GPU warp size.
- llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override;
-
- /// Get the id of the current thread on the GPU.
- llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;
-
- /// Get the maximum number of threads in a block of the GPU.
- llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
-};
-
-} // namespace CodeGen
-} // namespace clang
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 63fecedc6fb7..dcb224f33156 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntimeGPU.h"
-#include "CGOpenMPRuntimeNVPTX.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
@@ -21,7 +20,7 @@
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
-#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
using namespace clang;
using namespace CodeGen;
@@ -106,8 +105,7 @@ public:
/// is the same for all known NVPTX architectures.
enum MachineConfiguration : unsigned {
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
- /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
- /// and GV_Warp_Size_Log2_Mask.
+ /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
@@ -339,7 +337,7 @@ class CheckVarsEscapingDeclContext final
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (IsInTTDRegion)
EscapedDeclsForTeams = EscapedDecls.getArrayRef();
else
@@ -536,7 +534,7 @@ public:
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
unsigned LaneIDBits =
- CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
+ llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
@@ -546,8 +544,9 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
- llvm::omp::GV_Warp_Size_Log2_Mask);
+ unsigned LaneIDBits =
+ llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
+ unsigned LaneIDMask = ~0u >> (32u - LaneIDBits);
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");
@@ -1111,11 +1110,12 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
// warps participate in parallel work.
static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
bool Mode) {
- auto *GVMode =
- new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::WeakAnyLinkage,
- llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
- Twine(Name, "_exec_mode"));
+ auto *GVMode = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
+ : OMP_TGT_EXEC_MODE_GENERIC),
+ Twine(Name, "_exec_mode"));
CGM.addCompilerUsedGlobal(GVMode);
}
@@ -1195,7 +1195,17 @@ unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
- llvm_unreachable("OpenMP NVPTX can only handle device code.");
+ llvm_unreachable("OpenMP can only handle device code.");
+
+ llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
+ if (CGM.getLangOpts().OpenMPTargetNewRuntime) {
+ OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug,
+ "__omp_rtl_debug_kind");
+ OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription,
+ "__omp_rtl_assume_teams_oversubscription");
+ OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription,
+ "__omp_rtl_assume_threads_oversubscription");
+ }
}
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
@@ -1308,7 +1318,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
const RecordDecl *GlobalizedRD = nullptr;
llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
- unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;
// Globalize team reductions variable unconditionally in all modes.
if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
@@ -1488,7 +1498,7 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -2089,7 +2099,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (!TransferMedium) {
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
@@ -3476,7 +3486,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
@@ -3935,3 +3945,31 @@ void CGOpenMPRuntimeGPU::clear() {
}
CGOpenMPRuntime::clear();
}
+
+llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::Module *M = &CGF.CGM.getModule();
+ const char *LocSize = "__kmpc_get_hardware_num_threads_in_block";
+ llvm::Function *F = M->getFunction(LocSize);
+ if (!F) {
+ F = llvm::Function::Create(
+ llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
+ llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
+ }
+ return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
+}
+
+llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {
+ ArrayRef<llvm::Value *> Args{};
+ return CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
+ Args);
+}
+
+llvm::Value *CGOpenMPRuntimeGPU::getGPUWarpSize(CodeGenFunction &CGF) {
+ ArrayRef<llvm::Value *> Args{};
+ return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_get_warp_size),
+ Args);
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index b5f1b843c46b..ac51264d7685 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -17,7 +17,6 @@
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtOpenMP.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
namespace clang {
namespace CodeGen {
@@ -177,13 +176,13 @@ public:
/// and NVPTX.
/// Get the GPU warp size.
- virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0;
+ llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
/// Get the id of the current thread on the GPU.
- virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0;
+ llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
/// Get the maximum number of threads in a block of the GPU.
- virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0;
+ llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
deleted file mode 100644
index 1688d07b90b6..000000000000
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CGOpenMPRuntimeNVPTX.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/Attr.h"
-#include "clang/AST/DeclOpenMP.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/Cuda.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/IntrinsicsNVPTX.h"
-
-using namespace clang;
-using namespace CodeGen;
-using namespace llvm::omp;
-
-CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
- : CGOpenMPRuntimeGPU(CGM) {
- if (!CGM.getLangOpts().OpenMPIsDevice)
- llvm_unreachable("OpenMP NVPTX can only handle device code.");
-}
-
-llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) {
- return CGF.EmitRuntimeCall(
- llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
- "nvptx_warp_size");
-}
-
-llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Function *F;
- F = llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x);
- return Bld.CreateCall(F, llvm::None, "nvptx_tid");
-}
-
-llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Function *F;
- F = llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
- return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
-}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
deleted file mode 100644
index 5f1602959266..000000000000
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU {
-
-public:
- explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
-
- /// Get the GPU warp size.
- llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override;
-
- /// Get the id of the current thread on the GPU.
- llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;
-
- /// Get the maximum number of threads in a block of the GPU.
- llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
-};
-
-} // CodeGen namespace.
-} // clang namespace.
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index aeb319ca1581..d399ff919cc3 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -26,6 +26,7 @@
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Assumptions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Intrinsics.h"
@@ -196,6 +197,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::SEHTryStmtClass:
EmitSEHTryStmt(cast<SEHTryStmt>(*S));
break;
+ case Stmt::OMPMetaDirectiveClass:
+ EmitOMPMetaDirective(cast<OMPMetaDirective>(*S));
+ break;
case Stmt::OMPCanonicalLoopClass:
EmitOMPCanonicalLoop(cast<OMPCanonicalLoop>(S));
break;
@@ -389,6 +393,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OMPMaskedDirectiveClass:
EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S));
break;
+ case Stmt::OMPGenericLoopDirectiveClass:
+ EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S));
+ break;
}
}
@@ -709,6 +716,17 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
}
void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
+ // The else branch of a consteval if statement is always the only branch that
+ // can be runtime evaluated.
+ if (S.isConsteval()) {
+ const Stmt *Executed = S.isNegatedConsteval() ? S.getThen() : S.getElse();
+ if (Executed) {
+ RunCleanupsScope ExecutedScope(*this);
+ EmitStmt(Executed);
+ }
+ return;
+ }
+
// C99 6.8.4.1: The first substatement is executed if the expression compares
// unequal to 0. The condition must be a scalar type.
LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
@@ -1518,6 +1536,12 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S,
NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
}
+ // Generate a stop point for debug info if the case statement is
+ // followed by a default statement. A fallthrough case before a
+ // default case gets its own branch target.
+ if (CurCase->getSubStmt()->getStmtClass() == Stmt::DefaultStmtClass)
+ EmitStopPoint(CurCase);
+
// Normal default recursion for non-cases.
EmitStmt(CurCase->getSubStmt());
}
@@ -2097,7 +2121,8 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info,
} else {
llvm::Type *Ty = ConvertType(InputType);
uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty);
- if (Size <= 64 && llvm::isPowerOf2_64(Size)) {
+ if ((Size <= 64 && llvm::isPowerOf2_64(Size)) ||
+ getTargetHooks().isScalarizableAsmOperand(*this, Ty)) {
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
Ty = llvm::PointerType::getUnqual(Ty);
@@ -2187,20 +2212,16 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
CodeGenFunction &CGF,
std::vector<llvm::Value *> &RegResults) {
if (!HasUnwindClobber)
- Result.addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoUnwind);
+ Result.addFnAttr(llvm::Attribute::NoUnwind);
if (NoMerge)
- Result.addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoMerge);
+ Result.addFnAttr(llvm::Attribute::NoMerge);
// Attach readnone and readonly attributes.
if (!HasSideEffect) {
if (ReadNone)
- Result.addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReadNone);
+ Result.addFnAttr(llvm::Attribute::ReadNone);
else if (ReadOnly)
- Result.addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::ReadOnly);
+ Result.addFnAttr(llvm::Attribute::ReadOnly);
}
// Slap the source location of the inline asm into a !srcloc metadata on the
@@ -2222,8 +2243,7 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
// convergent (meaning, they may call an intrinsically convergent op, such
// as bar.sync, and so can't have certain optimizations applied around
// them).
- Result.addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::Convergent);
+ Result.addFnAttr(llvm::Attribute::Convergent);
// Extract all of the register value results from the asm.
if (ResultRegTypes.size() == 1) {
RegResults.push_back(&Result);
@@ -2320,23 +2340,28 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// If this is a register output, then make the inline asm return it
// by-value. If this is a memory result, return the value by-reference.
- bool isScalarizableAggregate =
- hasAggregateEvaluationKind(OutExpr->getType());
- if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) ||
- isScalarizableAggregate)) {
+ QualType QTy = OutExpr->getType();
+ const bool IsScalarOrAggregate = hasScalarEvaluationKind(QTy) ||
+ hasAggregateEvaluationKind(QTy);
+ if (!Info.allowsMemory() && IsScalarOrAggregate) {
+
Constraints += "=" + OutputConstraint;
- ResultRegQualTys.push_back(OutExpr->getType());
+ ResultRegQualTys.push_back(QTy);
ResultRegDests.push_back(Dest);
- ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType()));
- if (Info.allowsRegister() && isScalarizableAggregate) {
- ResultTypeRequiresCast.push_back(true);
- unsigned Size = getContext().getTypeSize(OutExpr->getType());
- llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size);
- ResultRegTypes.push_back(ConvTy);
- } else {
- ResultTypeRequiresCast.push_back(false);
- ResultRegTypes.push_back(ResultTruncRegTypes.back());
+
+ llvm::Type *Ty = ConvertTypeForMem(QTy);
+ const bool RequiresCast = Info.allowsRegister() &&
+ (getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
+ Ty->isAggregateType());
+
+ ResultTruncRegTypes.push_back(Ty);
+ ResultTypeRequiresCast.push_back(RequiresCast);
+
+ if (RequiresCast) {
+ unsigned Size = getContext().getTypeSize(QTy);
+ Ty = llvm::IntegerType::get(getLLVMContext(), Size);
}
+ ResultRegTypes.push_back(Ty);
// If this output is tied to an input, and if the input is larger, then
// we need to set the actual result type of the inline asm node to be the
// same as the input type.
@@ -2604,8 +2629,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
llvm::FunctionType::get(ResultType, ArgTypes, false);
bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0;
+
+ llvm::InlineAsm::AsmDialect GnuAsmDialect =
+ CGM.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT
+ ? llvm::InlineAsm::AD_ATT
+ : llvm::InlineAsm::AD_Intel;
llvm::InlineAsm::AsmDialect AsmDialect = isa<MSAsmStmt>(&S) ?
- llvm::InlineAsm::AD_Intel : llvm::InlineAsm::AD_ATT;
+ llvm::InlineAsm::AD_Intel : GnuAsmDialect;
+
llvm::InlineAsm *IA = llvm::InlineAsm::get(
FTy, AsmString, Constraints, HasSideEffect,
/* IsAlignStack */ false, AsmDialect, HasUnwindClobber);
@@ -2638,11 +2669,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
llvm::Value *Tmp = RegResults[i];
+ llvm::Type *TruncTy = ResultTruncRegTypes[i];
// If the result type of the LLVM IR asm doesn't match the result type of
// the expression, do the conversion.
if (ResultRegTypes[i] != ResultTruncRegTypes[i]) {
- llvm::Type *TruncTy = ResultTruncRegTypes[i];
// Truncate the integer result to the right size, note that TruncTy can be
// a pointer.
@@ -2672,6 +2703,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]);
Address A = Builder.CreateBitCast(Dest.getAddress(*this),
ResultRegTypes[i]->getPointerTo());
+ if (getTargetHooks().isScalarizableAsmOperand(*this, TruncTy)) {
+ Builder.CreateStore(Tmp, A);
+ continue;
+ }
+
QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false);
if (Ty.isNull()) {
const Expr *OutExpr = S.getOutputExpr(i);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f6233b791182..4f14459e4d28 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -309,8 +309,8 @@ llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
while (const VariableArrayType *VAT = C.getAsVariableArrayType(Ty)) {
VlaSizePair VlaSize = getVLASize(VAT);
Ty = VlaSize.Type;
- Size = Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts)
- : VlaSize.NumElts;
+ Size =
+ Size ? Builder.CreateNUWMul(Size, VlaSize.NumElts) : VlaSize.NumElts;
}
SizeInChars = C.getTypeSizeInChars(Ty);
if (SizeInChars.isZero())
@@ -450,7 +450,8 @@ static llvm::Function *emitOutlinedFunctionPrologue(
Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
SourceLocation(), DeclarationName(), FunctionTy,
Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
- /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
+ /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
+ /*hasWrittenPrototype=*/false);
}
for (const FieldDecl *FD : RD->fields()) {
QualType ArgType = FD->getType();
@@ -497,9 +498,8 @@ static llvm::Function *emitOutlinedFunctionPrologue(
: CGM.getOpenMPRuntime().translateParameter(FD, Arg));
++I;
}
- Args.append(
- std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
- CD->param_end());
+ Args.append(std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
+ CD->param_end());
TargetArgs.append(
std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
CD->param_end());
@@ -518,8 +518,10 @@ static llvm::Function *emitOutlinedFunctionPrologue(
F->setDoesNotRecurse();
// Always inline the outlined function if optimizations are enabled.
- if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+ if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
+ F->removeFnAttr(llvm::Attribute::NoInline);
F->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
@@ -671,9 +673,9 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
if (EI != VLASizes.end()) {
CallArg = EI->second.second;
} else {
- LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
- Arg->getType(),
- AlignmentSource::Decl);
+ LValue LV =
+ WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
+ Arg->getType(), AlignmentSource::Decl);
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
}
}
@@ -718,29 +720,29 @@ void CodeGenFunction::EmitOMPAggregateAssign(
CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
llvm::PHINode *SrcElementPHI =
- Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
+ Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
Address SrcElementCurrent =
Address(SrcElementPHI,
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
- llvm::PHINode *DestElementPHI =
- Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
+ llvm::PHINode *DestElementPHI = Builder.CreatePHI(
+ DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
DestElementPHI->addIncoming(DestBegin, EntryBB);
Address DestElementCurrent =
- Address(DestElementPHI,
- DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+ Address(DestElementPHI,
+ DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
// Emit copy.
CopyGen(DestElementCurrent, SrcElementCurrent);
// Shift the address forward by one element.
- llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
- DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
- "omp.arraycpy.dest.element");
- llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
- SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
- "omp.arraycpy.src.element");
+ llvm::Value *DestElementNext =
+ Builder.CreateConstGEP1_32(DestAddr.getElementType(), DestElementPHI,
+ /*Idx0=*/1, "omp.arraycpy.dest.element");
+ llvm::Value *SrcElementNext =
+ Builder.CreateConstGEP1_32(SrcAddr.getElementType(), SrcElementPHI,
+ /*Idx0=*/1, "omp.arraycpy.src.element");
// Check whether we've reached the end.
llvm::Value *Done =
Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
@@ -1003,9 +1005,9 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
LocalDeclMap.erase(VD);
} else {
MasterAddr =
- Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
- : CGM.GetAddrOfGlobal(VD),
- getContext().getDeclAlign(VD));
+ Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
+ : CGM.GetAddrOfGlobal(VD),
+ getContext().getDeclAlign(VD));
}
// Get the address of the threadprivate variable.
Address PrivateAddr = EmitLValue(*IRef).getAddress(*this);
@@ -1076,7 +1078,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/
- CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ CapturedStmtInfo->lookup(OrigVD) != nullptr,
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
return EmitLValue(&DRE).getAddress(*this);
});
@@ -1085,19 +1087,19 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
// for 'firstprivate' clause.
if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
- OrigVD]() {
- if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
- Address VDAddr =
- CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
- OrigVD);
- setAddrOfLocalVar(VD, VDAddr);
- return VDAddr;
- }
- // Emit private VarDecl with copy init.
- EmitDecl(*VD);
- return GetAddrOfLocalVar(VD);
- });
+ bool IsRegistered =
+ PrivateScope.addPrivate(OrigVD, [this, VD, C, OrigVD]() {
+ if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
+ Address VDAddr =
+ CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
+ *this, OrigVD);
+ setAddrOfLocalVar(VD, VDAddr);
+ return VDAddr;
+ }
+ // Emit private VarDecl with copy init.
+ EmitDecl(*VD);
+ return GetAddrOfLocalVar(VD);
+ });
assert(IsRegistered &&
"lastprivate var already registered as private");
(void)IsRegistered;
@@ -1292,14 +1294,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
}
PrivateScope.addPrivate(LHSVD, [OriginalAddr]() { return OriginalAddr; });
- PrivateScope.addPrivate(
- RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
- return IsArray
- ? Builder.CreateElementBitCast(
+ PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD, IsArray]() {
+ return IsArray ? Builder.CreateElementBitCast(
GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
: GetAddrOfLocalVar(PrivateVD);
- });
+ });
}
++ILHS;
++IRHS;
@@ -1786,6 +1786,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
checkForLastprivateConditionalUpdate(*this, S);
}
+void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
+ EmitStmt(S.getIfStmt());
+}
+
namespace {
/// RAII to handle scopes for loop transformation directives.
class OMPTransformDirectiveScopeRAII {
@@ -1827,9 +1831,7 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
return;
}
if (SimplifiedS == NextLoop) {
- if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
- SimplifiedS = Dir->getTransformedStmt();
- if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
+ if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(SimplifiedS))
SimplifiedS = Dir->getTransformedStmt();
if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
SimplifiedS = CanonLoop->getLoopStmt();
@@ -1953,11 +1955,27 @@ llvm::CanonicalLoopInfo *
CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
+ // The caller is processing the loop-associated directive processing the \p
+ // Depth loops nested in \p S. Put the previous pending loop-associated
+ // directive to the stack. If the current loop-associated directive is a loop
+ // transformation directive, it will push its generated loops onto the stack
+ // such that together with the loops left here they form the combined loop
+ // nest for the parent loop-associated directive.
+ int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
+ ExpectedOMPLoopDepth = Depth;
+
EmitStmt(S);
assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
// The last added loop is the outermost one.
- return OMPLoopNestStack.back();
+ llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
+
+ // Pop the \p Depth loops requested by the call from that stack and restore
+ // the previous context.
+ OMPLoopNestStack.set_size(OMPLoopNestStack.size() - Depth);
+ ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
+
+ return Result;
}
void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
@@ -2113,9 +2131,10 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
CapturedStmtInfo->lookup(OrigVD) != nullptr,
VD->getInit()->getType(), VK_LValue,
VD->getInit()->getExprLoc());
- EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
- VD->getType()),
- /*capturedByInit=*/false);
+ EmitExprAsInit(
+ &DRE, VD,
+ MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
+ /*capturedByInit=*/false);
EmitAutoVarCleanups(Emission);
} else {
EmitVarDecl(*VD);
@@ -2218,9 +2237,8 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
EmitAutoVarCleanups(VarEmission);
LocalDeclMap.erase(PrivateVD);
- (void)LoopScope.addPrivate(VD, [&VarEmission]() {
- return VarEmission.getAllocatedAddress();
- });
+ (void)LoopScope.addPrivate(
+ VD, [&VarEmission]() { return VarEmission.getAllocatedAddress(); });
if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
VD->hasGlobalStorage()) {
(void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
@@ -2272,7 +2290,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
// Create temp loop control variables with their init values to support
// non-rectangular loops.
CodeGenFunction::OMPMapVars PreCondVars;
- for (const Expr * E: S.dependent_counters()) {
+ for (const Expr *E : S.dependent_counters()) {
if (!E)
continue;
assert(!E->getType().getNonReferenceType()->isRecordType() &&
@@ -2587,6 +2605,46 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
}
void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
+ bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
+
+ if (UseOMPIRBuilder) {
+ auto DL = SourceLocToDebugLoc(S.getBeginLoc());
+ const Stmt *Inner = S.getRawStmt();
+
+ // Consume nested loop. Clear the entire remaining loop stack because a
+ // fully unrolled loop is non-transformable. For partial unrolling the
+ // generated outer loop is pushed back to the stack.
+ llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
+ OMPLoopNestStack.clear();
+
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
+ llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
+
+ if (S.hasClausesOfKind<OMPFullClause>()) {
+ assert(ExpectedOMPLoopDepth == 0);
+ OMPBuilder.unrollLoopFull(DL, CLI);
+ } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
+ uint64_t Factor = 0;
+ if (Expr *FactorExpr = PartialClause->getFactor()) {
+ Factor = FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
+ assert(Factor >= 1 && "Only positive factors are valid");
+ }
+ OMPBuilder.unrollLoopPartial(DL, CLI, Factor,
+ NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
+ } else {
+ OMPBuilder.unrollLoopHeuristic(DL, CLI);
+ }
+
+ assert((!NeedsUnrolledCLI || UnrolledCLI) &&
+ "NeedsUnrolledCLI implies UnrolledCLI to be set");
+ if (UnrolledCLI)
+ OMPLoopNestStack.push_back(UnrolledCLI);
+
+ return;
+ }
+
// This function is only called if the unrolled loop is not consumed by any
// other loop-associated construct. Such a loop-associated construct will have
// used the transformed AST.
@@ -2732,12 +2790,10 @@ void CodeGenFunction::EmitOMPForOuterLoop(
CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
- const bool DynamicOrOrdered =
- Ordered || RT.isDynamic(ScheduleKind.Schedule);
+ const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind.Schedule);
- assert((Ordered ||
- !RT.isStaticNonchunked(ScheduleKind.Schedule,
- LoopArgs.Chunk != nullptr)) &&
+ assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
+ LoopArgs.Chunk != nullptr)) &&
"static non-chunked schedule does not need outer loop");
// Emit outer loop.
@@ -3057,15 +3113,15 @@ void CodeGenFunction::EmitOMPTargetSimdDirective(
}
namespace {
- struct ScheduleKindModifiersTy {
- OpenMPScheduleClauseKind Kind;
- OpenMPScheduleClauseModifier M1;
- OpenMPScheduleClauseModifier M2;
- ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
- OpenMPScheduleClauseModifier M1,
- OpenMPScheduleClauseModifier M2)
- : Kind(Kind), M1(M1), M2(M2) {}
- };
+struct ScheduleKindModifiersTy {
+ OpenMPScheduleClauseKind Kind;
+ OpenMPScheduleClauseModifier M1;
+ OpenMPScheduleClauseModifier M2;
+ ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
+ OpenMPScheduleClauseModifier M1,
+ OpenMPScheduleClauseModifier M2)
+ : Kind(Kind), M1(M1), M2(M2) {}
+};
} // namespace
bool CodeGenFunction::EmitOMPWorksharingLoop(
@@ -3185,8 +3241,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// If the static schedule kind is specified or if the ordered clause is
// specified, and if no monotonic modifier is specified, the effect will
// be as if the monotonic modifier was specified.
- bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
- /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
+ bool StaticChunkedOne =
+ RT.isStaticChunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) &&
+ HasChunkSizeOne &&
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
bool IsMonotonic =
Ordered ||
@@ -3620,7 +3678,8 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
CGM.getOpenMPRuntime().getOMPBuilder();
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
- OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
+ OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
+ AllocaIP, NeedsBarrier);
return;
}
@@ -4440,7 +4499,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/
- CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
Pair.second->getType(), VK_LValue,
Pair.second->getExprLoc());
Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
@@ -4661,7 +4720,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
[&InputInfo]() { return InputInfo.SizesArray; });
// If there is no user-defined mapper, the mapper array will be nullptr. In
// this case, we don't need to privatize it.
- if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
+ if (!isa_and_nonnull<llvm::ConstantPointerNull>(
InputInfo.MappersArray.getPointer())) {
MVD = createImplicitFirstprivateForType(
getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
@@ -4786,7 +4845,14 @@ void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
}
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
- CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
+ OMPTaskDataTy Data;
+ // Build list of dependences
+ for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
+ OMPTaskDataTy::DependData &DD =
+ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
+ DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
+ }
+ CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data);
}
void CodeGenFunction::EmitOMPTaskgroupDirective(
@@ -5168,8 +5234,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// iteration space is divided into chunks that are approximately equal
// in size, and at most one chunk is distributed to each team of the
// league. The size of the chunks is unspecified in this case.
- bool StaticChunked = RT.isStaticChunked(
- ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+ bool StaticChunked =
+ RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
if (RT.isStaticNonchunked(ScheduleKind,
/* Chunked */ Chunk != nullptr) ||
@@ -5307,12 +5373,78 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
CGF.CapturedStmtInfo = &CapStmtInfo;
llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
Fn->setDoesNotRecurse();
- if (CGM.getCodeGenOpts().OptimizationLevel != 0)
- Fn->addFnAttr(llvm::Attribute::AlwaysInline);
return Fn;
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ if (S.hasClausesOfKind<OMPDependClause>()) {
+ // The ordered directive with depend clause.
+ assert(!S.hasAssociatedStmt() &&
+ "No associated statement must be in ordered depend construct.");
+ InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
+ AllocaInsertPt->getIterator());
+ for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) {
+ unsigned NumLoops = DC->getNumLoops();
+ QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/64, /*Signed=*/1);
+ llvm::SmallVector<llvm::Value *> StoreValues;
+ for (unsigned I = 0; I < NumLoops; I++) {
+ const Expr *CounterVal = DC->getLoopData(I);
+ assert(CounterVal);
+ llvm::Value *StoreValue = EmitScalarConversion(
+ EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
+ CounterVal->getExprLoc());
+ StoreValues.emplace_back(StoreValue);
+ }
+ bool IsDependSource = false;
+ if (DC->getDependencyKind() == OMPC_DEPEND_source)
+ IsDependSource = true;
+ Builder.restoreIP(OMPBuilder.createOrderedDepend(
+ Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr",
+ IsDependSource));
+ }
+ } else {
+ // The ordered directive with threads or simd clause, or without clause.
+ // Without clause, it behaves as if the threads clause is specified.
+ const auto *C = S.getSingleClause<OMPSIMDClause>();
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
+ if (C) {
+ llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ GenerateOpenMPCapturedVars(*CS, CapturedVars);
+ llvm::Function *OutlinedFn =
+ emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
+ assert(S.getBeginLoc().isValid() &&
+ "Outlined function call location must be valid.");
+ ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
+ OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB,
+ OutlinedFn, CapturedVars);
+ } else {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
+ FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(),
+ CodeGenIP, FiniBB);
+ }
+ };
+
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
+ Builder.restoreIP(
+ OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
+ }
+ return;
+ }
+
if (S.hasClausesOfKind<OMPDependClause>()) {
assert(!S.hasAssociatedStmt() &&
"No associated statement must be in ordered depend construct.");
@@ -5863,6 +5995,12 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_novariants:
case OMPC_nocontext:
case OMPC_filter:
+ case OMPC_when:
+ case OMPC_adjust_args:
+ case OMPC_append_args:
+ case OMPC_memory_order:
+ case OMPC_bind:
+ case OMPC_align:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
@@ -5940,8 +6078,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
return;
}
- auto LPCRegion =
- CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
+ auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
@@ -6471,7 +6608,8 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
auto OrigVarIt = C.varlist_begin();
auto InitIt = C.inits().begin();
for (const Expr *PvtVarIt : C.private_copies()) {
- const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
+ const auto *OrigVD =
+ cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
const auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
@@ -6494,31 +6632,30 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
if (InitAddrIt == CaptureDeviceAddrMap.end())
continue;
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, OrigVD,
- InitAddrIt, InitVD,
- PvtVD]() {
- // Initialize the temporary initialization variable with the address we
- // get from the runtime library. We have to cast the source address
- // because it is always a void *. References are materialized in the
- // privatization scope, so the initialization here disregards the fact
- // the original variable is a reference.
- QualType AddrQTy =
- getContext().getPointerType(OrigVD->getType().getNonReferenceType());
- llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
- Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
- setAddrOfLocalVar(InitVD, InitAddr);
-
- // Emit private declaration, it will be initialized by the value we
- // declaration we just added to the local declarations map.
- EmitDecl(*PvtVD);
-
- // The initialization variables reached its purpose in the emission
- // of the previous declaration, so we don't need it anymore.
- LocalDeclMap.erase(InitVD);
-
- // Return the address of the private variable.
- return GetAddrOfLocalVar(PvtVD);
- });
+ bool IsRegistered = PrivateScope.addPrivate(
+ OrigVD, [this, OrigVD, InitAddrIt, InitVD, PvtVD]() {
+ // Initialize the temporary initialization variable with the address
+ // we get from the runtime library. We have to cast the source address
+ // because it is always a void *. References are materialized in the
+ // privatization scope, so the initialization here disregards the fact
+ // the original variable is a reference.
+ QualType AddrQTy = getContext().getPointerType(
+ OrigVD->getType().getNonReferenceType());
+ llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
+ Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
+ setAddrOfLocalVar(InitVD, InitAddr);
+
+ // Emit private declaration, it will be initialized by the value we
+ // declaration we just added to the local declarations map.
+ EmitDecl(*PvtVD);
+
+ // The initialization variables reached its purpose in the emission
+ // of the previous declaration, so we don't need it anymore.
+ LocalDeclMap.erase(InitVD);
+
+ // Return the address of the private variable.
+ return GetAddrOfLocalVar(PvtVD);
+ });
assert(IsRegistered && "firstprivate var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
@@ -6879,11 +7016,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// TODO: Check if we should emit tied or untied task.
Data.Tied = true;
// Set scheduling for taskloop
- if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
+ if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
// grainsize clause
Data.Schedule.setInt(/*IntVal=*/false);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
- } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
+ } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
// num_tasks clause
Data.Schedule.setInt(/*IntVal=*/true);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
@@ -7111,6 +7248,16 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
+void CodeGenFunction::EmitOMPGenericLoopDirective(
+ const OMPGenericLoopDirective &S) {
+ // Unimplemented, just inline the underlying statement for now.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ };
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
+ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
+}
+
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
const OMPExecutableDirective &D) {
if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 9eb650814238..482499da1b0f 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -201,7 +201,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
Address ThisPtr(&*AI, CGM.getClassPointerAlignment(MD->getParent()));
llvm::BasicBlock *EntryBB = &Fn->front();
llvm::BasicBlock::iterator ThisStore =
- std::find_if(EntryBB->begin(), EntryBB->end(), [&](llvm::Instruction &I) {
+ llvm::find_if(*EntryBB, [&](llvm::Instruction &I) {
return isa<llvm::StoreInst>(I) &&
I.getOperand(0) == ThisPtr.getPointer();
});
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index b30bd11edbad..52c54d3c7a72 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -25,8 +25,10 @@
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
@@ -50,6 +52,8 @@
using namespace clang;
using namespace llvm;
+#define DEBUG_TYPE "codegenaction"
+
namespace clang {
class BackendConsumer;
class ClangDiagnosticHandler final : public DiagnosticHandler {
@@ -125,6 +129,17 @@ namespace clang {
SmallVector<LinkModule, 4> LinkModules;
+ // A map from mangled names to their function's source location, used for
+ // backend diagnostics as the Clang AST may be unavailable. We actually use
+ // the mangled name's hash as the key because mangled names can be very
+ // long and take up lots of space. Using a hash can cause name collision,
+ // but that is rare and the consequences are pointing to a wrong source
+ // location which is not severe. This is a vector instead of an actual map
+ // because we optimize for time building this map rather than time
+ // retrieving an entry, as backend diagnostics are uncommon.
+ std::vector<std::pair<llvm::hash_code, FullSourceLoc>>
+ ManglingFullSourceLocs;
+
// This is here so that the diagnostic printer knows the module a diagnostic
// refers to.
llvm::Module *CurLinkModule = nullptr;
@@ -160,7 +175,7 @@ namespace clang {
const PreprocessorOptions &PPOpts,
const CodeGenOptions &CodeGenOpts,
const TargetOptions &TargetOpts,
- const LangOptions &LangOpts,
+ const LangOptions &LangOpts, llvm::Module *Module,
SmallVector<LinkModule, 4> LinkModules, LLVMContext &C,
CoverageSourceInfo *CoverageInfo = nullptr)
: Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
@@ -170,7 +185,7 @@ namespace clang {
LLVMIRGenerationRefCount(0),
Gen(CreateLLVMCodeGen(Diags, "", HeaderSearchOpts, PPOpts,
CodeGenOpts, C, CoverageInfo)),
- LinkModules(std::move(LinkModules)) {
+ LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
TimerIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
@@ -329,6 +344,27 @@ namespace clang {
if (LinkInModules())
return;
+ for (auto &F : getModule()->functions()) {
+ if (const Decl *FD = Gen->GetDeclForMangledName(F.getName())) {
+ auto Loc = FD->getASTContext().getFullLoc(FD->getLocation());
+ // TODO: use a fast content hash when available.
+ auto NameHash = llvm::hash_value(F.getName());
+ ManglingFullSourceLocs.push_back(std::make_pair(NameHash, Loc));
+ }
+ }
+
+ if (CodeGenOpts.ClearASTBeforeBackend) {
+ LLVM_DEBUG(llvm::dbgs() << "Clearing AST...\n");
+ // Access to the AST is no longer available after this.
+ // Other things that the ASTContext manages are still available, e.g.
+ // the SourceManager. It'd be nice if we could separate out all the
+ // things in ASTContext used after this point and null out the
+ // ASTContext, but too many various parts of the ASTContext are still
+ // used in various parts.
+ C.cleanup();
+ C.getAllocator().Reset();
+ }
+
EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts,
@@ -375,6 +411,8 @@ namespace clang {
bool &BadDebugInfo, StringRef &Filename,
unsigned &Line, unsigned &Column) const;
+ Optional<FullSourceLoc> getFunctionSourceLocation(const Function &F) const;
+
void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
/// Specialized handler for InlineAsm diagnostic.
/// \return True if the diagnostic has been successfully reported, false
@@ -401,6 +439,7 @@ namespace clang {
const llvm::OptimizationRemarkAnalysisAliasing &D);
void OptimizationFailureHandler(
const llvm::DiagnosticInfoOptimizationFailure &D);
+ void DontCallDiagHandler(const DiagnosticInfoDontCall &D);
};
void BackendConsumer::anchor() {}
@@ -567,17 +606,16 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
// We do not know how to format other severities.
return false;
- if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) {
- // FIXME: Shouldn't need to truncate to uint32_t
- Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()),
- diag::warn_fe_frame_larger_than)
- << static_cast<uint32_t>(D.getStackSize())
- << static_cast<uint32_t>(D.getStackLimit())
- << Decl::castToDeclContext(ND);
- return true;
- }
+ auto Loc = getFunctionSourceLocation(D.getFunction());
+ if (!Loc)
+ return false;
- return false;
+ // FIXME: Shouldn't need to truncate to uint32_t
+ Diags.Report(*Loc, diag::warn_fe_frame_larger_than)
+ << static_cast<uint32_t>(D.getStackSize())
+ << static_cast<uint32_t>(D.getStackLimit())
+ << llvm::demangle(D.getFunction().getName().str());
+ return true;
}
const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
@@ -606,9 +644,10 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
// function definition. We use the definition's right brace to differentiate
// from diagnostics that genuinely relate to the function itself.
FullSourceLoc Loc(DILoc, SourceMgr);
- if (Loc.isInvalid())
- if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName()))
- Loc = FD->getASTContext().getFullLoc(FD->getLocation());
+ if (Loc.isInvalid()) {
+ if (auto MaybeLoc = getFunctionSourceLocation(D.getFunction()))
+ Loc = *MaybeLoc;
+ }
if (DILoc.isInvalid() && D.isLocationAvailable())
// If we were not able to translate the file:line:col information
@@ -621,6 +660,16 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
return Loc;
}
+Optional<FullSourceLoc>
+BackendConsumer::getFunctionSourceLocation(const Function &F) const {
+ auto Hash = llvm::hash_value(F.getName());
+ for (const auto &Pair : ManglingFullSourceLocs) {
+ if (Pair.first == Hash)
+ return Pair.second;
+ }
+ return Optional<FullSourceLoc>();
+}
+
void BackendConsumer::UnsupportedDiagHandler(
const llvm::DiagnosticInfoUnsupported &D) {
// We only support warnings or errors.
@@ -758,6 +807,21 @@ void BackendConsumer::OptimizationFailureHandler(
EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure);
}
+void BackendConsumer::DontCallDiagHandler(const DiagnosticInfoDontCall &D) {
+ SourceLocation LocCookie =
+ SourceLocation::getFromRawEncoding(D.getLocCookie());
+
+ // FIXME: we can't yet diagnose indirect calls. When/if we can, we
+ // should instead assert that LocCookie.isValid().
+ if (!LocCookie.isValid())
+ return;
+
+ Diags.Report(LocCookie, D.getSeverity() == DiagnosticSeverity::DS_Error
+ ? diag::err_fe_backend_error_attr
+ : diag::warn_fe_backend_warning_attr)
+ << llvm::demangle(D.getFunctionName().str()) << D.getNote();
+}
+
/// This function is invoked when the backend needs
/// to report something to the user.
void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
@@ -779,11 +843,7 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
ComputeDiagID(Severity, backend_frame_larger_than, DiagID);
break;
case DK_Linker:
- assert(CurLinkModule);
- // FIXME: stop eating the warnings and notes.
- if (Severity != DS_Error)
- return;
- DiagID = diag::err_fe_cannot_link_module;
+ ComputeDiagID(Severity, linking_module, DiagID);
break;
case llvm::DK_OptimizationRemark:
// Optimization remarks are always handled completely by this
@@ -833,6 +893,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
case llvm::DK_Unsupported:
UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI));
return;
+ case llvm::DK_DontCall:
+ DontCallDiagHandler(cast<DiagnosticInfoDontCall>(DI));
+ return;
default:
// Plugin IDs are not bound to any value as they are set dynamically.
ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
@@ -845,9 +908,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
DI.print(DP);
}
- if (DiagID == diag::err_fe_cannot_link_module) {
- Diags.Report(diag::err_fe_cannot_link_module)
- << CurLinkModule->getModuleIdentifier() << MsgStorage;
+ if (DI.getKind() == DK_Linker) {
+ assert(CurLinkModule && "CurLinkModule must be set for linker diagnostics");
+ Diags.Report(DiagID) << CurLinkModule->getModuleIdentifier() << MsgStorage;
return;
}
@@ -1088,7 +1151,7 @@ void CodeGenAction::ExecuteAction() {
// BackendConsumer.
BackendConsumer Result(BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
- CI.getTargetOpts(), CI.getLangOpts(),
+ CI.getTargetOpts(), CI.getLangOpts(), TheModule.get(),
std::move(LinkModules), *VMContext, nullptr);
// PR44896: Force DiscardValueNames as false. DiscardValueNames cannot be
// true here because the valued names are needed for reading textual IR.
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index a2384456ea94..d87cf2d49720 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/CRC.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+
using namespace clang;
using namespace CodeGen;
@@ -78,7 +79,6 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
EHStack.setCGF(this);
SetFastMathFlags(CurFPFeatures);
- SetFPModel();
}
CodeGenFunction::~CodeGenFunction() {
@@ -109,17 +109,6 @@ clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) {
llvm_unreachable("Unsupported FP Exception Behavior");
}
-void CodeGenFunction::SetFPModel() {
- llvm::RoundingMode RM = getLangOpts().getFPRoundingMode();
- auto fpExceptionBehavior = ToConstrainedExceptMD(
- getLangOpts().getFPExceptionMode());
-
- Builder.setDefaultConstrainedRounding(RM);
- Builder.setDefaultConstrainedExcept(fpExceptionBehavior);
- Builder.setIsFPConstrained(fpExceptionBehavior != llvm::fp::ebIgnore ||
- RM != llvm::RoundingMode::NearestTiesToEven);
-}
-
void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) {
llvm::FastMathFlags FMF;
FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate());
@@ -393,6 +382,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
"__cyg_profile_func_exit");
}
+ if (ShouldSkipSanitizerInstrumentation())
+ CurFn->addFnAttr(llvm::Attribute::DisableSanitizerInstrumentation);
+
// Emit debug descriptor for function end.
if (CGDebugInfo *DI = getDebugInfo())
DI->EmitFunctionEnd(Builder, CurFn);
@@ -432,6 +424,14 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
AllocaInsertPt = nullptr;
Ptr->eraseFromParent();
+ // PostAllocaInsertPt, if created, was lazily created when it was required,
+ // remove it now since it was just created for our own convenience.
+ if (PostAllocaInsertPt) {
+ llvm::Instruction *PostPtr = PostAllocaInsertPt;
+ PostAllocaInsertPt = nullptr;
+ PostPtr->eraseFromParent();
+ }
+
// If someone took the address of a label but never did an indirect goto, we
// made a zero entry PHI node, which is illegal, zap it now.
if (IndirectBranch) {
@@ -496,11 +496,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// function.
CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
- // Add vscale attribute if appropriate.
- if (getLangOpts().ArmSveVectorBits) {
- unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
- CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
- VScale, VScale));
+ // Add vscale_range attribute if appropriate.
+ Optional<std::pair<unsigned, unsigned>> VScaleRange =
+ getContext().getTargetInfo().getVScaleRange(getLangOpts());
+ if (VScaleRange) {
+ CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
+ getLLVMContext(), VScaleRange.getValue().first,
+ VScaleRange.getValue().second));
}
// If we generated an unreachable return block, delete it now.
@@ -529,6 +531,12 @@ bool CodeGenFunction::ShouldInstrumentFunction() {
return true;
}
+bool CodeGenFunction::ShouldSkipSanitizerInstrumentation() {
+ if (!CurFuncDecl)
+ return false;
+ return CurFuncDecl->hasAttr<DisableSanitizerInstrumentationAttr>();
+}
+
/// ShouldXRayInstrument - Return true if the current function should be
/// instrumented with XRay nop sleds.
bool CodeGenFunction::ShouldXRayInstrumentFunction() const {
@@ -947,10 +955,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
(getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>())))
Fn->addFnAttr(llvm::Attribute::NoRecurse);
- if (FD) {
- Builder.setIsFPConstrained(FD->hasAttr<StrictFPAttr>());
- if (FD->hasAttr<StrictFPAttr>())
- Fn->addFnAttr(llvm::Attribute::StrictFP);
+ llvm::RoundingMode RM = getLangOpts().getFPRoundingMode();
+ llvm::fp::ExceptionBehavior FPExceptionBehavior =
+ ToConstrainedExceptMD(getLangOpts().getFPExceptionMode());
+ Builder.setDefaultConstrainedRounding(RM);
+ Builder.setDefaultConstrainedExcept(FPExceptionBehavior);
+ if ((FD && (FD->UsesFPIntrin() || FD->hasAttr<StrictFPAttr>())) ||
+ (!FD && (FPExceptionBehavior != llvm::fp::ebIgnore ||
+ RM != llvm::RoundingMode::NearestTiesToEven))) {
+ Builder.setIsFPConstrained(true);
+ Fn->addFnAttr(llvm::Attribute::StrictFP);
}
// If a custom alignment is used, force realigning to this alignment on
@@ -975,7 +989,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
// precise source location of the checked return statement.
if (requiresReturnValueCheck()) {
ReturnLocation = CreateDefaultAlignTempAlloca(Int8PtrTy, "return.sloc.ptr");
- InitTempAlloca(ReturnLocation, llvm::ConstantPointerNull::get(Int8PtrTy));
+ Builder.CreateStore(llvm::ConstantPointerNull::get(Int8PtrTy),
+ ReturnLocation);
}
// Emit subprogram debug descriptor.
@@ -983,16 +998,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
// Reconstruct the type from the argument list so that implicit parameters,
// such as 'this' and 'vtt', show up in the debug info. Preserve the calling
// convention.
- CallingConv CC = CallingConv::CC_C;
- if (FD)
- if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>())
- CC = SrcFnTy->getCallConv();
- SmallVector<QualType, 16> ArgTypes;
- for (const VarDecl *VD : Args)
- ArgTypes.push_back(VD->getType());
- QualType FnType = getContext().getFunctionType(
- RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
- DI->emitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, CurFuncIsThunk);
+ DI->emitFunctionStart(GD, Loc, StartLoc,
+ DI->getFunctionType(FD, RetTy, Args), CurFn,
+ CurFuncIsThunk);
}
if (ShouldInstrumentFunction()) {
@@ -1044,7 +1052,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
Fn->addFnAttr("packed-stack");
}
- if (CGM.getCodeGenOpts().WarnStackSize != UINT_MAX)
+ if (CGM.getCodeGenOpts().WarnStackSize != UINT_MAX &&
+ !CGM.getDiags().isIgnored(diag::warn_fe_backend_frame_larger_than, Loc))
Fn->addFnAttr("warn-stack-size",
std::to_string(CGM.getCodeGenOpts().WarnStackSize));
@@ -1295,6 +1304,45 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
FunctionArgList Args;
QualType ResTy = BuildFunctionArgList(GD, Args);
+ // When generating code for a builtin with an inline declaration, use a
+ // mangled name to hold the actual body, while keeping an external definition
+ // in case the function pointer is referenced somewhere.
+ if (Fn) {
+ if (FD->isInlineBuiltinDeclaration()) {
+ std::string FDInlineName = (Fn->getName() + ".inline").str();
+ llvm::Module *M = Fn->getParent();
+ llvm::Function *Clone = M->getFunction(FDInlineName);
+ if (!Clone) {
+ Clone = llvm::Function::Create(Fn->getFunctionType(),
+ llvm::GlobalValue::InternalLinkage,
+ Fn->getAddressSpace(), FDInlineName, M);
+ Clone->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+ Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ Fn = Clone;
+ }
+
+ // Detect the unusual situation where an inline version is shadowed by a
+ // non-inline version. In that case we should pick the external one
+ // everywhere. That's GCC behavior too. Unfortunately, I cannot find a way
+ // to detect that situation before we reach codegen, so do some late
+ // replacement.
+ else {
+ for (const FunctionDecl *PD = FD->getPreviousDecl(); PD;
+ PD = PD->getPreviousDecl()) {
+ if (LLVM_UNLIKELY(PD->isInlineBuiltinDeclaration())) {
+ std::string FDInlineName = (Fn->getName() + ".inline").str();
+ llvm::Module *M = Fn->getParent();
+ if (llvm::Function *Clone = M->getFunction(FDInlineName)) {
+ Clone->replaceAllUsesWith(Fn);
+ Clone->eraseFromParent();
+ }
+ break;
+ }
+ }
+ }
+ }
+
// Check if we should generate debug info for this function.
if (FD->hasAttr<NoDebugAttr>()) {
// Clear non-distinct debug info that was possibly attached to the function
@@ -2399,15 +2447,19 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
llvm::Value *V = Addr.getPointer();
llvm::Type *VTy = V->getType();
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
- CGM.Int8PtrTy);
+ auto *PTy = dyn_cast<llvm::PointerType>(VTy);
+ unsigned AS = PTy ? PTy->getAddressSpace() : 0;
+ llvm::PointerType *IntrinTy =
+ llvm::PointerType::getWithSamePointeeType(CGM.Int8PtrTy, AS);
+ llvm::Function *F =
+ CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, IntrinTy);
for (const auto *I : D->specific_attrs<AnnotateAttr>()) {
// FIXME Always emit the cast inst so we can differentiate between
// annotation on the first field of a struct and annotation on the struct
// itself.
- if (VTy != CGM.Int8PtrTy)
- V = Builder.CreateBitCast(V, CGM.Int8PtrTy);
+ if (VTy != IntrinTy)
+ V = Builder.CreateBitCast(V, IntrinTy);
V = EmitAnnotationCall(F, V, I->getAnnotation(), D->getLocation(), I);
V = Builder.CreateBitCast(V, VTy);
}
@@ -2478,8 +2530,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
// Return if the builtin doesn't have any required features.
if (FeatureList.empty())
return;
- assert(FeatureList.find(' ') == StringRef::npos &&
- "Space in feature list");
+ assert(!FeatureList.contains(' ') && "Space in feature list");
TargetFeatures TF(CallerFeatureMap);
if (!TF.hasRequiredFeatures(FeatureList))
CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 4e087ce51e37..ff5b6634da1c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -291,6 +291,10 @@ public:
/// nest would extend.
SmallVector<llvm::CanonicalLoopInfo *, 4> OMPLoopNestStack;
+ /// Number of nested loop to be consumed by the last surrounding
+ /// loop-associated directive.
+ int ExpectedOMPLoopDepth = 0;
+
// CodeGen lambda for loops and support for ordered clause
typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
JumpDest)>
@@ -375,6 +379,34 @@ public:
/// we prefer to insert allocas.
llvm::AssertingVH<llvm::Instruction> AllocaInsertPt;
+private:
+ /// PostAllocaInsertPt - This is a place in the prologue where code can be
+ /// inserted that will be dominated by all the static allocas. This helps
+ /// achieve two things:
+ /// 1. Contiguity of all static allocas (within the prologue) is maintained.
+ /// 2. All other prologue code (which are dominated by static allocas) do
+ /// appear in the source order immediately after all static allocas.
+ ///
+ /// PostAllocaInsertPt will be lazily created when it is *really* required.
+ llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
+
+public:
+ /// Return PostAllocaInsertPt. If it is not yet created, then insert it
+ /// immediately after AllocaInsertPt.
+ llvm::Instruction *getPostAllocaInsertPoint() {
+ if (!PostAllocaInsertPt) {
+ assert(AllocaInsertPt &&
+ "Expected static alloca insertion point at function prologue");
+ assert(AllocaInsertPt->getParent()->isEntryBlock() &&
+ "EBB should be entry block of the current code gen function");
+ PostAllocaInsertPt = AllocaInsertPt->clone();
+ PostAllocaInsertPt->setName("postallocapt");
+ PostAllocaInsertPt->insertAfter(AllocaInsertPt);
+ }
+
+ return PostAllocaInsertPt;
+ }
+
/// API for captured statement code generation.
class CGCapturedStmtInfo {
public:
@@ -467,7 +499,7 @@ public:
AbstractCallee(const FunctionDecl *FD) : CalleeDecl(FD) {}
AbstractCallee(const ObjCMethodDecl *OMD) : CalleeDecl(OMD) {}
bool hasFunctionDecl() const {
- return dyn_cast_or_null<FunctionDecl>(CalleeDecl);
+ return isa_and_nonnull<FunctionDecl>(CalleeDecl);
}
const Decl *getDecl() const { return CalleeDecl; }
unsigned getNumParams() const {
@@ -1775,6 +1807,24 @@ public:
CGF.Builder.CreateBr(&FiniBB);
}
+ static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB, llvm::Function *Fn,
+ ArrayRef<llvm::Value *> Args) {
+ llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+ if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator())
+ CodeGenIPBBTI->eraseFromParent();
+
+ CGF.Builder.SetInsertPoint(CodeGenIPBB);
+
+ if (Fn->doesNotThrow())
+ CGF.EmitNounwindRuntimeCall(Fn, Args);
+ else
+ CGF.EmitRuntimeCall(Fn, Args);
+
+ if (CGF.Builder.saveIP().isSet())
+ CGF.Builder.CreateBr(&FiniBB);
+ }
+
/// RAII for preserving necessary info during Outlined region body codegen.
class OutlinedRegionBodyRAII {
@@ -2286,6 +2336,10 @@ public:
/// instrumented with __cyg_profile_func_* calls
bool ShouldInstrumentFunction();
+ /// ShouldSkipSanitizerInstrumentation - Return true if the current function
+ /// should not be instrumented with sanitizers.
+ bool ShouldSkipSanitizerInstrumentation();
+
/// ShouldXRayInstrument - Return true if the current function should be
/// instrumented with XRay nop sleds.
bool ShouldXRayInstrumentFunction() const;
@@ -2519,15 +2573,6 @@ public:
Address CreateDefaultAlignTempAlloca(llvm::Type *Ty,
const Twine &Name = "tmp");
- /// InitTempAlloca - Provide an initial value for the given alloca which
- /// will be observable at all locations in the function.
- ///
- /// The address should be something that was returned from one of
- /// the CreateTempAlloca or CreateMemTemp routines, and the
- /// initializer must be valid in the entry block (i.e. it must
- /// either be a constant or an argument value).
- void InitTempAlloca(Address Alloca, llvm::Value *Value);
-
/// CreateIRTemp - Create a temporary IR object of the given type, with
/// appropriate alignment. This routine should only be used when an temporary
/// value needs to be stored into an alloca (for example, to avoid explicit
@@ -3438,6 +3483,7 @@ public:
const RegionCodeGenTy &BodyGen,
OMPTargetDataInfo &InputInfo);
+ void EmitOMPMetaDirective(const OMPMetaDirective &S);
void EmitOMPParallelDirective(const OMPParallelDirective &S);
void EmitOMPSimdDirective(const OMPSimdDirective &S);
void EmitOMPTileDirective(const OMPTileDirective &S);
@@ -3511,6 +3557,7 @@ public:
const OMPTargetTeamsDistributeParallelForSimdDirective &S);
void EmitOMPTargetTeamsDistributeSimdDirective(
const OMPTargetTeamsDistributeSimdDirective &S);
+ void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
/// Emit device code for the target directive.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
@@ -4051,10 +4098,9 @@ public:
RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue);
- RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue);
- RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E,
- ReturnValueSlot ReturnValue);
+ RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
+ RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
+ RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E);
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E, ReturnValueSlot ReturnValue);
@@ -4126,30 +4172,30 @@ public:
/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
/// access builtin. Only required if it can't be inferred from the base
/// pointer operand.
- llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags);
+ llvm::Type *SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags);
- SmallVector<llvm::Type *, 2> getSVEOverloadTypes(SVETypeFlags TypeFlags,
- llvm::Type *ReturnType,
- ArrayRef<llvm::Value *> Ops);
- llvm::Type *getEltType(SVETypeFlags TypeFlags);
+ SmallVector<llvm::Type *, 2>
+ getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType,
+ ArrayRef<llvm::Value *> Ops);
+ llvm::Type *getEltType(const SVETypeFlags &TypeFlags);
llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags);
- llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags);
- llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags);
+ llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags);
+ llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags);
llvm::Value *EmitSVEDupX(llvm::Value *Scalar);
llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty);
llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty);
- llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEPMull(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
- llvm::Value *EmitSVEMovl(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEMovl(const SVETypeFlags &TypeFlags,
llvm::ArrayRef<llvm::Value *> Ops,
unsigned BuiltinID);
llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred,
llvm::ScalableVectorType *VTy);
- llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
- llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy,
@@ -4158,15 +4204,16 @@ public:
llvm::Value *EmitSVEMaskedStore(const CallExpr *,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
- llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
- llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
- llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags,
- SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID);
- llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ unsigned IntID);
+ llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
@@ -4588,9 +4635,6 @@ public:
/// point operation, expressed as the maximum relative error in ulp.
void SetFPAccuracy(llvm::Value *Val, float Accuracy);
- /// SetFPModel - Control floating point behavior via fp-model settings.
- void SetFPModel();
-
/// Set the codegen fast-math flags.
void SetFastMathFlags(FPOptions FPFeatures);
@@ -4726,8 +4770,6 @@ public:
void EmitMultiVersionResolver(llvm::Function *Resolver,
ArrayRef<MultiVersionResolverOption> Options);
- static uint64_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
-
private:
QualType getVarArgType(const Expr *Arg);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 9b40b88ea3c9..59f3e0270571 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -19,8 +19,7 @@
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
#include "CodeGenFunction.h"
#include "CodeGenPGO.h"
#include "ConstantEmitter.h"
@@ -63,6 +62,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/X86TargetParser.h"
using namespace clang;
using namespace CodeGen;
@@ -130,8 +130,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
C.getTargetInfo().getMaxPointerWidth());
Int8PtrTy = Int8Ty->getPointerTo(0);
Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
- AllocaInt8PtrTy = Int8Ty->getPointerTo(
- M.getDataLayout().getAllocaAddrSpace());
+ const llvm::DataLayout &DL = M.getDataLayout();
+ AllocaInt8PtrTy = Int8Ty->getPointerTo(DL.getAllocaAddrSpace());
+ GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace());
ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace();
RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
@@ -186,7 +187,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
!getModule().getSourceFileName().empty()) {
std::string Path = getModule().getSourceFileName();
// Check if a path substitution is needed from the MacroPrefixMap.
- for (const auto &Entry : PPO.MacroPrefixMap)
+ for (const auto &Entry : LangOpts.MacroPrefixMap)
if (Path.rfind(Entry.first, 0) != std::string::npos) {
Path = Entry.second + Path.substr(Entry.first.size());
break;
@@ -242,14 +243,10 @@ void CodeGenModule::createOpenMPRuntime() {
switch (getTriple().getArch()) {
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
- assert(getLangOpts().OpenMPIsDevice &&
- "OpenMP NVPTX is only prepared to deal with device code.");
- OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
- break;
case llvm::Triple::amdgcn:
assert(getLangOpts().OpenMPIsDevice &&
- "OpenMP AMDGCN is only prepared to deal with device code.");
- OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
+ "OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
+ OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
break;
default:
if (LangOpts.OpenMPSimd)
@@ -315,22 +312,58 @@ void CodeGenModule::applyGlobalValReplacements() {
// This is only used in aliases that we created and we know they have a
// linear structure.
-static const llvm::GlobalObject *getAliasedGlobal(
- const llvm::GlobalIndirectSymbol &GIS) {
- llvm::SmallPtrSet<const llvm::GlobalIndirectSymbol*, 4> Visited;
- const llvm::Constant *C = &GIS;
- for (;;) {
- C = C->stripPointerCasts();
- if (auto *GO = dyn_cast<llvm::GlobalObject>(C))
- return GO;
- // stripPointerCasts will not walk over weak aliases.
- auto *GIS2 = dyn_cast<llvm::GlobalIndirectSymbol>(C);
- if (!GIS2)
- return nullptr;
- if (!Visited.insert(GIS2).second)
- return nullptr;
- C = GIS2->getIndirectSymbol();
+static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) {
+ const llvm::Constant *C;
+ if (auto *GA = dyn_cast<llvm::GlobalAlias>(GV))
+ C = GA->getAliasee();
+ else if (auto *GI = dyn_cast<llvm::GlobalIFunc>(GV))
+ C = GI->getResolver();
+ else
+ return GV;
+
+ const auto *AliaseeGV = dyn_cast<llvm::GlobalValue>(C->stripPointerCasts());
+ if (!AliaseeGV)
+ return nullptr;
+
+ const llvm::GlobalValue *FinalGV = AliaseeGV->getAliaseeObject();
+ if (FinalGV == GV)
+ return nullptr;
+
+ return FinalGV;
+}
+
+static bool checkAliasedGlobal(DiagnosticsEngine &Diags,
+ SourceLocation Location, bool IsIFunc,
+ const llvm::GlobalValue *Alias,
+ const llvm::GlobalValue *&GV) {
+ GV = getAliasedGlobal(Alias);
+ if (!GV) {
+ Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc;
+ return false;
+ }
+
+ if (GV->isDeclaration()) {
+ Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc;
+ return false;
+ }
+
+ if (IsIFunc) {
+ // Check resolver function type.
+ const auto *F = dyn_cast<llvm::Function>(GV);
+ if (!F) {
+ Diags.Report(Location, diag::err_alias_to_undefined)
+ << IsIFunc << IsIFunc;
+ return false;
+ }
+
+ llvm::FunctionType *FTy = F->getFunctionType();
+ if (!FTy->getReturnType()->isPointerTy()) {
+ Diags.Report(Location, diag::err_ifunc_resolver_return);
+ return false;
+ }
}
+
+ return true;
}
void CodeGenModule::checkAliases() {
@@ -347,27 +380,19 @@ void CodeGenModule::checkAliases() {
Location = A->getLocation();
else
llvm_unreachable("Not an alias or ifunc?");
+
StringRef MangledName = getMangledName(GD);
- llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
- auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry);
- const llvm::GlobalValue *GV = getAliasedGlobal(*Alias);
- if (!GV) {
- Error = true;
- Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc;
- } else if (GV->isDeclaration()) {
+ llvm::GlobalValue *Alias = GetGlobalValue(MangledName);
+ const llvm::GlobalValue *GV = nullptr;
+ if (!checkAliasedGlobal(Diags, Location, IsIFunc, Alias, GV)) {
Error = true;
- Diags.Report(Location, diag::err_alias_to_undefined)
- << IsIFunc << IsIFunc;
- } else if (IsIFunc) {
- // Check resolver function type.
- llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(
- GV->getType()->getPointerElementType());
- assert(FTy);
- if (!FTy->getReturnType()->isPointerTy())
- Diags.Report(Location, diag::err_ifunc_resolver_return);
+ continue;
}
- llvm::Constant *Aliasee = Alias->getIndirectSymbol();
+ llvm::Constant *Aliasee =
+ IsIFunc ? cast<llvm::GlobalIFunc>(Alias)->getResolver()
+ : cast<llvm::GlobalAlias>(Alias)->getAliasee();
+
llvm::GlobalValue *AliaseeGV;
if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee))
AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0));
@@ -386,13 +411,17 @@ void CodeGenModule::checkAliases() {
// compatibility with gcc we implement it by just pointing the alias
// to its aliasee's aliasee. We also warn, since the user is probably
// expecting the link to be weak.
- if (auto GA = dyn_cast<llvm::GlobalIndirectSymbol>(AliaseeGV)) {
+ if (auto *GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) {
if (GA->isInterposable()) {
Diags.Report(Location, diag::warn_alias_to_weak_alias)
<< GV->getName() << GA->getName() << IsIFunc;
Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- GA->getIndirectSymbol(), Alias->getType());
- Alias->setIndirectSymbol(Aliasee);
+ GA->getAliasee(), Alias->getType());
+
+ if (IsIFunc)
+ cast<llvm::GlobalIFunc>(Alias)->setResolver(Aliasee);
+ else
+ cast<llvm::GlobalAlias>(Alias)->setAliasee(Aliasee);
}
}
}
@@ -401,8 +430,7 @@ void CodeGenModule::checkAliases() {
for (const GlobalDecl &GD : Aliases) {
StringRef MangledName = getMangledName(GD);
- llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
- auto *Alias = cast<llvm::GlobalIndirectSymbol>(Entry);
+ llvm::GlobalValue *Alias = GetGlobalValue(MangledName);
Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType()));
Alias->eraseFromParent();
}
@@ -537,6 +565,7 @@ void CodeGenModule::Release() {
"__amdgpu_device_library_preserve_asan_functions_ptr", nullptr,
llvm::GlobalVariable::NotThreadLocal);
addCompilerUsedGlobal(Var);
+ getModule().addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
}
emitLLVMUsed();
@@ -731,8 +760,9 @@ void CodeGenModule::Release() {
if (getTriple().isSPIR()) {
// SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
// opencl.spir.version named metadata.
- // C++ is backwards compatible with OpenCL v2.0.
- auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion;
+ // C++ for OpenCL has a distinct mapping for version compatibility with
+ // OpenCL.
+ auto Version = LangOpts.getOpenCLCompatibleVersion();
llvm::Metadata *SPIRVerElts[] = {
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, Version / 100)),
@@ -810,6 +840,8 @@ void CodeGenModule::Release() {
getCodeGenOpts().StackProtectorGuardOffset);
if (getCodeGenOpts().StackAlignment)
getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment);
+ if (getCodeGenOpts().SkipRaxSetup)
+ getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1);
getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
@@ -825,9 +857,8 @@ void CodeGenModule::Release() {
void CodeGenModule::EmitOpenCLMetadata() {
// SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
// opencl.ocl.version named metadata node.
- // C++ is backwards compatible with OpenCL v2.0.
- // FIXME: We might need to add CXX version at some point too?
- auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion;
+ // C++ for OpenCL has a distinct mapping for versions compatibile with OpenCL.
+ auto Version = LangOpts.getOpenCLCompatibleVersion();
llvm::Metadata *OCLVerElts[] = {
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
Int32Ty, Version / 100)),
@@ -1781,7 +1812,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
return;
}
@@ -1868,7 +1899,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::MinSize);
}
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
if (alignment)
@@ -1918,13 +1949,13 @@ void CodeGenModule::setLLVMFunctionFEnvAttributes(const FunctionDecl *D,
if (D->hasAttr<StrictFPAttr>()) {
llvm::AttrBuilder FuncAttrs;
FuncAttrs.addAttribute("strictfp");
- F->addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
+ F->addFnAttrs(FuncAttrs);
}
}
void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
const Decl *D = GD.getDecl();
- if (dyn_cast_or_null<NamedDecl>(D))
+ if (isa_and_nonnull<NamedDecl>(D))
setGVProperties(GV, GD);
else
GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
@@ -2034,8 +2065,8 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
RemoveAttrs.addAttribute("target-cpu");
RemoveAttrs.addAttribute("target-features");
RemoveAttrs.addAttribute("tune-cpu");
- F->removeAttributes(llvm::AttributeList::FunctionIndex, RemoveAttrs);
- F->addAttributes(llvm::AttributeList::FunctionIndex, Attrs);
+ F->removeFnAttrs(RemoveAttrs);
+ F->addFnAttrs(Attrs);
}
}
@@ -2118,7 +2149,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
F->arg_begin()->getType()
->canLosslesslyBitCastTo(F->getReturnType()) &&
"unexpected this return");
- F->addAttribute(1, llvm::Attribute::Returned);
+ F->addParamAttr(0, llvm::Attribute::Returned);
}
// Only a few attributes are set on declarations; these may later be
@@ -2136,6 +2167,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
else if (const auto *SA = FD->getAttr<SectionAttr>())
F->setSection(SA->getName());
+ if (const auto *EA = FD->getAttr<ErrorAttr>()) {
+ if (EA->isError())
+ F->addFnAttr("dontcall-error", EA->getUserDiagnostic());
+ else if (EA->isWarning())
+ F->addFnAttr("dontcall-warn", EA->getUserDiagnostic());
+ }
+
// If we plan on emitting this inline builtin, we can't treat it as a builtin.
if (FD->isInlineBuiltinDeclaration()) {
const FunctionDecl *FDBody;
@@ -2144,15 +2182,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
assert(HasBody && "Inline builtin declarations should always have an "
"available body!");
if (shouldEmitFunction(FDBody))
- F->addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoBuiltin);
+ F->addFnAttr(llvm::Attribute::NoBuiltin);
}
if (FD->isReplaceableGlobalAllocationFunction()) {
// A replaceable global allocation function does not act like a builtin by
// default, only if it is invoked by a new-expression or delete-expression.
- F->addAttribute(llvm::AttributeList::FunctionIndex,
- llvm::Attribute::NoBuiltin);
+ F->addFnAttr(llvm::Attribute::NoBuiltin);
}
if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD))
@@ -2281,9 +2317,9 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
}
// Import this module's dependencies.
- for (unsigned I = Mod->Imports.size(); I > 0; --I) {
- if (Visited.insert(Mod->Imports[I - 1]).second)
- addLinkOptionsPostorder(CGM, Mod->Imports[I-1], Metadata, Visited);
+ for (Module *Import : llvm::reverse(Mod->Imports)) {
+ if (Visited.insert(Import).second)
+ addLinkOptionsPostorder(CGM, Import, Metadata, Visited);
}
// Add linker options to link against the libraries/frameworks
@@ -2296,13 +2332,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
if (Mod->UseExportAsModuleLinkName)
return;
- for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) {
+ for (const Module::LinkLibrary &LL : llvm::reverse(Mod->LinkLibraries)) {
// Link against a framework. Frameworks are currently Darwin only, so we
// don't to ask TargetCodeGenInfo for the spelling of the linker option.
- if (Mod->LinkLibraries[I-1].IsFramework) {
- llvm::Metadata *Args[2] = {
- llvm::MDString::get(Context, "-framework"),
- llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library)};
+ if (LL.IsFramework) {
+ llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"),
+ llvm::MDString::get(Context, LL.Library)};
Metadata.push_back(llvm::MDNode::get(Context, Args));
continue;
@@ -2312,13 +2347,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
if (IsELF) {
llvm::Metadata *Args[2] = {
llvm::MDString::get(Context, "lib"),
- llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library),
+ llvm::MDString::get(Context, LL.Library),
};
Metadata.push_back(llvm::MDNode::get(Context, Args));
} else {
llvm::SmallString<24> Opt;
- CGM.getTargetCodeGenInfo().getDependentLibraryOption(
- Mod->LinkLibraries[I - 1].Library, Opt);
+ CGM.getTargetCodeGenInfo().getDependentLibraryOption(LL.Library, Opt);
auto *OptString = llvm::MDString::get(Context, Opt);
Metadata.push_back(llvm::MDNode::get(Context, OptString));
}
@@ -2531,7 +2565,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) {
llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) {
ArrayRef<Expr *> Exprs = {Attr->args_begin(), Attr->args_size()};
if (Exprs.empty())
- return llvm::ConstantPointerNull::get(Int8PtrTy);
+ return llvm::ConstantPointerNull::get(GlobalsInt8PtrTy);
llvm::FoldingSetNodeID ID;
for (Expr *E : Exprs) {
@@ -2555,7 +2589,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) {
".args");
GV->setSection(AnnotationSection);
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, Int8PtrTy);
+ auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, GlobalsInt8PtrTy);
Lookup = Bitcasted;
return Bitcasted;
@@ -2570,17 +2604,19 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV,
*LineNoCst = EmitAnnotationLineNo(L),
*Args = EmitAnnotationArgs(AA);
- llvm::Constant *ASZeroGV = GV;
- if (GV->getAddressSpace() != 0) {
- ASZeroGV = llvm::ConstantExpr::getAddrSpaceCast(
- GV, GV->getValueType()->getPointerTo(0));
+ llvm::Constant *GVInGlobalsAS = GV;
+ if (GV->getAddressSpace() !=
+ getDataLayout().getDefaultGlobalsAddressSpace()) {
+ GVInGlobalsAS = llvm::ConstantExpr::getAddrSpaceCast(
+ GV, GV->getValueType()->getPointerTo(
+ getDataLayout().getDefaultGlobalsAddressSpace()));
}
// Create the ConstantStruct for the global annotation.
llvm::Constant *Fields[] = {
- llvm::ConstantExpr::getBitCast(ASZeroGV, Int8PtrTy),
- llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy),
- llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy),
+ llvm::ConstantExpr::getBitCast(GVInGlobalsAS, GlobalsInt8PtrTy),
+ llvm::ConstantExpr::getBitCast(AnnoGV, GlobalsInt8PtrTy),
+ llvm::ConstantExpr::getBitCast(UnitGV, GlobalsInt8PtrTy),
LineNoCst,
Args,
};
@@ -2853,7 +2889,8 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
GlobalDecl(cast<FunctionDecl>(VD)),
/*ForVTable=*/false);
else
- Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, 0, nullptr);
+ Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default,
+ nullptr);
auto *F = cast<llvm::GlobalValue>(Aliasee);
F->setLinkage(llvm::Function::ExternalWeakLinkage);
@@ -3163,6 +3200,11 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
}
}
+ // Inline builtins declaration must be emitted. They often are fortified
+ // functions.
+ if (F->isInlineBuiltinDeclaration())
+ return true;
+
// PR9614. Avoid cases where the source code is lying to us. An available
// externally function should have an equivalent function somewhere else,
// but a function that calls itself through asm label/`__builtin_` trickery is
@@ -3252,6 +3294,19 @@ TargetMVPriority(const TargetInfo &TI,
return Priority;
}
+// Multiversion functions should be at most 'WeakODRLinkage' so that a different
+// TU can forward declare the function without causing problems. Particularly
+// in the cases of CPUDispatch, this causes issues. This also makes sure we
+// work with internal linkage functions, so that the same function name can be
+// used with internal linkage in multiple TUs.
+llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM,
+ GlobalDecl GD) {
+ const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+ if (FD->getFormalLinkage() == InternalLinkage)
+ return llvm::GlobalValue::InternalLinkage;
+ return llvm::GlobalValue::WeakODRLinkage;
+}
+
void CodeGenModule::emitMultiVersionFunctions() {
std::vector<GlobalDecl> MVFuncsToEmit;
MultiVersionFuncs.swap(MVFuncsToEmit);
@@ -3292,7 +3347,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
if (TI.supportsIFunc() || FD->isTargetMultiVersion()) {
ResolverFunc = cast<llvm::Function>(
GetGlobalValue((getMangledName(GD) + ".resolver").str()));
- ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage);
+ ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));
} else {
ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
}
@@ -3350,7 +3405,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction(
ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false));
- ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage);
+ ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));
if (supportsCOMDAT())
ResolverFunc->setComdat(
getModule().getOrInsertComdat(ResolverFunc->getName()));
@@ -3386,10 +3441,9 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
llvm::transform(Features, Features.begin(),
[](StringRef Str) { return Str.substr(1); });
- Features.erase(std::remove_if(
- Features.begin(), Features.end(), [&Target](StringRef Feat) {
- return !Target.validateCpuSupports(Feat);
- }), Features.end());
+ llvm::erase_if(Features, [&Target](StringRef Feat) {
+ return !Target.validateCpuSupports(Feat);
+ });
Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features);
++Index;
}
@@ -3397,8 +3451,8 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
llvm::stable_sort(
Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS,
const CodeGenFunction::MultiVersionResolverOption &RHS) {
- return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) >
- CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features);
+ return llvm::X86::getCpuSupportsMask(LHS.Conditions.Features) >
+ llvm::X86::getCpuSupportsMask(RHS.Conditions.Features);
});
// If the list contains multiple 'default' versions, such as when it contains
@@ -3406,7 +3460,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
// always run on at least a 'pentium'). We do this by deleting the 'least
// advanced' (read, lowest mangling letter).
while (Options.size() > 1 &&
- CodeGenFunction::GetX86CpuSupportsMask(
+ llvm::X86::getCpuSupportsMask(
(Options.end() - 2)->Conditions.Features) == 0) {
StringRef LHSName = (Options.end() - 2)->Function->getName();
StringRef RHSName = (Options.end() - 1)->Function->getName();
@@ -3427,9 +3481,9 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
auto *IFunc = cast<llvm::GlobalIFunc>(GetOrCreateLLVMFunction(
AliasName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true,
/*IsThunk=*/false, llvm::AttributeList(), NotForDefinition));
- auto *GA = llvm::GlobalAlias::create(
- DeclTy, 0, getFunctionLinkage(GD), AliasName, IFunc, &getModule());
- GA->setLinkage(llvm::Function::WeakODRLinkage);
+ auto *GA = llvm::GlobalAlias::create(DeclTy, 0,
+ getMultiversionLinkage(*this, GD),
+ AliasName, IFunc, &getModule());
SetCommonAttributes(GD, GA);
}
}
@@ -3468,8 +3522,9 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
llvm::Constant *Resolver = GetOrCreateLLVMFunction(
MangledName + ".resolver", ResolverType, GlobalDecl{},
/*ForVTable=*/false);
- llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
- DeclTy, 0, llvm::Function::WeakODRLinkage, "", Resolver, &getModule());
+ llvm::GlobalIFunc *GIF =
+ llvm::GlobalIFunc::create(DeclTy, 0, getMultiversionLinkage(*this, GD),
+ "", Resolver, &getModule());
GIF->setName(ResolverName);
SetCommonAttributes(FD, GIF);
@@ -3613,9 +3668,9 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
assert(F->getName() == MangledName && "name was uniqued!");
if (D)
SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
- if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) {
+ if (ExtraAttrs.hasFnAttrs()) {
llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
}
if (!DontDefer) {
@@ -3761,8 +3816,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
bool AssumeConvergent) {
if (AssumeConvergent) {
ExtraAttrs =
- ExtraAttrs.addAttribute(VMContext, llvm::AttributeList::FunctionIndex,
- llvm::Attribute::Convergent);
+ ExtraAttrs.addFnAttribute(VMContext, llvm::Attribute::Convergent);
}
llvm::Constant *C =
@@ -3827,10 +3881,11 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) {
/// mangled name but some other type.
llvm::Constant *
CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
- unsigned AddrSpace, const VarDecl *D,
+ LangAS AddrSpace, const VarDecl *D,
ForDefinition_t IsForDefinition) {
// Lookup the entry, lazily creating it if necessary.
llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
+ unsigned TargetAS = getContext().getTargetAddressSpace(AddrSpace);
if (Entry) {
if (WeakRefReferences.erase(Entry)) {
if (D && !D->hasAttr<WeakAttr>())
@@ -3844,7 +3899,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D)
getOpenMPRuntime().registerTargetGlobalVariable(D, Entry);
- if (Entry->getValueType() == Ty && Entry->getAddressSpace() == AddrSpace)
+ if (Entry->getValueType() == Ty && Entry->getAddressSpace() == TargetAS)
return Entry;
// If there are two attempts to define the same mangled name, issue an
@@ -3868,24 +3923,23 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
}
// Make sure the result is of the correct type.
- if (Entry->getType()->getAddressSpace() != AddrSpace) {
+ if (Entry->getType()->getAddressSpace() != TargetAS) {
return llvm::ConstantExpr::getAddrSpaceCast(Entry,
- Ty->getPointerTo(AddrSpace));
+ Ty->getPointerTo(TargetAS));
}
// (If global is requested for a definition, we always need to create a new
// global, not just return a bitcast.)
if (!IsForDefinition)
- return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo(AddrSpace));
+ return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo(TargetAS));
}
auto DAddrSpace = GetGlobalVarAddressSpace(D);
- auto TargetAddrSpace = getContext().getTargetAddressSpace(DAddrSpace);
auto *GV = new llvm::GlobalVariable(
getModule(), Ty, false, llvm::GlobalValue::ExternalLinkage, nullptr,
MangledName, nullptr, llvm::GlobalVariable::NotThreadLocal,
- TargetAddrSpace);
+ getContext().getTargetAddressSpace(DAddrSpace));
// If we already created a global with the same mangled name (but different
// type) before, take its name and remove it from its parent.
@@ -4008,10 +4062,10 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
LangAS ExpectedAS =
D ? D->getType().getAddressSpace()
: (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default);
- assert(getContext().getTargetAddressSpace(ExpectedAS) == AddrSpace);
+ assert(getContext().getTargetAddressSpace(ExpectedAS) == TargetAS);
if (DAddrSpace != ExpectedAS) {
return getTargetCodeGenInfo().performAddrSpaceCast(
- *this, GV, DAddrSpace, ExpectedAS, Ty->getPointerTo(AddrSpace));
+ *this, GV, DAddrSpace, ExpectedAS, Ty->getPointerTo(TargetAS));
}
return GV;
@@ -4101,8 +4155,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
Ty = getTypes().ConvertTypeForMem(ASTTy);
StringRef MangledName = getMangledName(D);
- return GetOrCreateLLVMGlobal(MangledName, Ty,
- getContext().getTargetAddressSpace(ASTTy), D,
+ return GetOrCreateLLVMGlobal(MangledName, Ty, ASTTy.getAddressSpace(), D,
IsForDefinition);
}
@@ -4111,10 +4164,8 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
llvm::Constant *
CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
StringRef Name) {
- auto AddrSpace =
- getContext().getLangOpts().OpenCL
- ? getContext().getTargetAddressSpace(LangAS::opencl_global)
- : 0;
+ LangAS AddrSpace = getContext().getLangOpts().OpenCL ? LangAS::opencl_global
+ : LangAS::Default;
auto *Ret = GetOrCreateLLVMGlobal(Name, Ty, AddrSpace, nullptr);
setDSOLocal(cast<llvm::GlobalValue>(Ret->stripPointerCasts()));
return Ret;
@@ -4153,16 +4204,15 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
}
LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
- LangAS AddrSpace = LangAS::Default;
if (LangOpts.OpenCL) {
- AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
- assert(AddrSpace == LangAS::opencl_global ||
- AddrSpace == LangAS::opencl_global_device ||
- AddrSpace == LangAS::opencl_global_host ||
- AddrSpace == LangAS::opencl_constant ||
- AddrSpace == LangAS::opencl_local ||
- AddrSpace >= LangAS::FirstTargetAddressSpace);
- return AddrSpace;
+ LangAS AS = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
+ assert(AS == LangAS::opencl_global ||
+ AS == LangAS::opencl_global_device ||
+ AS == LangAS::opencl_global_host ||
+ AS == LangAS::opencl_constant ||
+ AS == LangAS::opencl_local ||
+ AS >= LangAS::FirstTargetAddressSpace);
+ return AS;
}
if (LangOpts.SYCLIsDevice &&
@@ -4261,11 +4311,6 @@ static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) {
if (!CGM.supportsCOMDAT())
return false;
- // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent
- // them being "merged" by the COMDAT Folding linker optimization.
- if (D.hasAttr<CUDAGlobalAttr>())
- return false;
-
if (D.hasAttr<SelectAnyAttr>())
return true;
@@ -4438,7 +4483,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
if (GV && LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {
if (Linkage != llvm::GlobalValue::InternalLinkage &&
- (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()))
+ (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+ D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ D->getType()->isCUDADeviceBuiltinTextureType()))
GV->setExternallyInitialized(true);
} else {
getCUDARuntime().internalizeDeviceSideVar(D, Linkage);
@@ -4529,8 +4576,8 @@ void CodeGenModule::EmitExternalVarDeclaration(const VarDecl *D) {
if (getCodeGenOpts().hasReducedDebugInfo()) {
QualType ASTTy = D->getType();
llvm::Type *Ty = getTypes().ConvertTypeForMem(D->getType());
- llvm::Constant *GV = GetOrCreateLLVMGlobal(
- D->getName(), Ty, getContext().getTargetAddressSpace(ASTTy), D);
+ llvm::Constant *GV =
+ GetOrCreateLLVMGlobal(D->getName(), Ty, ASTTy.getAddressSpace(), D);
DI->EmitExternalVariable(
cast<llvm::GlobalVariable>(GV->stripPointerCasts()), D);
}
@@ -4747,7 +4794,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
}
// Add any parameter attributes.
- newArgAttrs.push_back(oldAttrs.getParamAttributes(argNo));
+ newArgAttrs.push_back(oldAttrs.getParamAttrs(argNo));
argNo++;
}
if (dontTransform)
@@ -4762,7 +4809,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
callSite->getOperandBundlesAsDefs(newBundles);
llvm::CallBase *newCall;
- if (dyn_cast<llvm::CallInst>(callSite)) {
+ if (isa<llvm::CallInst>(callSite)) {
newCall =
llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite);
} else {
@@ -4775,9 +4822,9 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
if (!newCall->getType()->isVoidTy())
newCall->takeName(callSite);
- newCall->setAttributes(llvm::AttributeList::get(
- newFn->getContext(), oldAttrs.getFnAttributes(),
- oldAttrs.getRetAttributes(), newArgAttrs));
+ newCall->setAttributes(
+ llvm::AttributeList::get(newFn->getContext(), oldAttrs.getFnAttrs(),
+ oldAttrs.getRetAttrs(), newArgAttrs));
newCall->setCallingConv(callSite->getCallingConv());
// Finally, remove the old call, replacing any uses with the new one.
@@ -4902,7 +4949,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
/*ForVTable=*/false);
LT = getFunctionLinkage(GD);
} else {
- Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, 0,
+ Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default,
/*D=*/nullptr);
if (const auto *VD = dyn_cast<VarDecl>(GD.getDecl()))
LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified());
@@ -4983,8 +5030,9 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
Aliases.push_back(GD);
llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType());
+ llvm::Type *ResolverTy = llvm::GlobalIFunc::getResolverFunctionType(DeclTy);
llvm::Constant *Resolver =
- GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD,
+ GetOrCreateLLVMFunction(IFA->getResolver(), ResolverTy, {},
/*ForVTable=*/false);
llvm::GlobalIFunc *GIF =
llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage,
@@ -5360,7 +5408,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
if (!LangOpts.WritableStrings) {
Entry = &ConstantStringMap[C];
if (auto GV = *Entry) {
- if (Alignment.getQuantity() > GV->getAlignment())
+ if (uint64_t(Alignment.getQuantity()) > GV->getAlignment())
GV->setAlignment(Alignment.getAsAlign());
return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
Alignment);
@@ -5423,7 +5471,7 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString(
if (!LangOpts.WritableStrings) {
Entry = &ConstantStringMap[C];
if (auto GV = *Entry) {
- if (Alignment.getQuantity() > GV->getAlignment())
+ if (uint64_t(Alignment.getQuantity()) > GV->getAlignment())
GV->setAlignment(Alignment.getAsAlign());
return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV),
Alignment);
@@ -6448,5 +6496,5 @@ bool CodeGenModule::stopAutoInit() {
void CodeGenModule::printPostfixForExternalizedStaticVar(
llvm::raw_ostream &OS) const {
- OS << ".static." << getContext().getCUIDHash();
+ OS << "__static__" << getContext().getCUIDHash();
}
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 47dc6f415b60..fbed22376c82 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1478,8 +1478,8 @@ private:
void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD);
llvm::Constant *
- GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
- unsigned AddrSpace, const VarDecl *D,
+ GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace,
+ const VarDecl *D,
ForDefinition_t IsForDefinition = NotForDefinition);
bool GetCPUAndFeaturesAttributes(GlobalDecl GD,
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index d828ac0eb5e9..ab953c2c7d52 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -649,6 +649,14 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
void VisitIfStmt(const IfStmt *S) {
RecordStmtCount(S);
+
+ if (S->isConsteval()) {
+ const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse();
+ if (Stm)
+ Visit(Stm);
+ return;
+ }
+
uint64_t ParentCount = CurrentCount;
if (S->getInit())
Visit(S->getInit());
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h
index f258234fb4d8..577f88367a3a 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -69,6 +69,12 @@ struct CodeGenTypeCache {
llvm::PointerType *AllocaInt8PtrTy;
};
+ /// void* in default globals address space
+ union {
+ llvm::PointerType *GlobalsVoidPtrTy;
+ llvm::PointerType *GlobalsInt8PtrTy;
+ };
+
/// The size and alignment of the builtin C type 'int'. This comes
/// up enough in various ABI lowering tasks to be worth pre-computing.
union {
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 9cb42941cb96..fb05475a4e8c 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -512,6 +512,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::Double:
case BuiltinType::LongDouble:
case BuiltinType::Float128:
+ case BuiltinType::Ibm128:
ResultType = getTypeForFormat(getLLVMContext(),
Context.getFloatTypeSemantics(T),
/* UseNativeHalf = */ false);
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index 8a11da600e4a..9b81c8a670f5 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -751,13 +751,11 @@ struct CounterCoverageMappingBuilder
/// is already added to \c SourceRegions.
bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc,
bool isBranch = false) {
- return SourceRegions.rend() !=
- std::find_if(SourceRegions.rbegin(), SourceRegions.rend(),
- [&](const SourceMappingRegion &Region) {
- return Region.getBeginLoc() == StartLoc &&
- Region.getEndLoc() == EndLoc &&
- Region.isBranch() == isBranch;
- });
+ return llvm::any_of(
+ llvm::reverse(SourceRegions), [&](const SourceMappingRegion &Region) {
+ return Region.getBeginLoc() == StartLoc &&
+ Region.getEndLoc() == EndLoc && Region.isBranch() == isBranch;
+ });
}
/// Adjust the most recently visited location to \c EndLoc.
@@ -971,7 +969,7 @@ struct CounterCoverageMappingBuilder
// If last statement contains terminate statements, add a gap area
// between the two statements. Skipping attributed statements, because
// they don't have valid start location.
- if (LastStmt && HasTerminateStmt && !dyn_cast<AttributedStmt>(Child)) {
+ if (LastStmt && HasTerminateStmt && !isa<AttributedStmt>(Child)) {
auto Gap = findGapAreaBetween(getEnd(LastStmt), getStart(Child));
if (Gap)
fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(),
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index d3dc0e6212b8..04163aeaddc5 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -334,6 +334,19 @@ public:
ArrayRef<llvm::Function *> CXXThreadLocalInits,
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
+ bool mayNeedDestruction(const VarDecl *VD) const {
+ if (VD->needsDestruction(getContext()))
+ return true;
+
+ // If the variable has an incomplete class type (or array thereof), it
+ // might need destruction.
+ const Type *T = VD->getType()->getBaseElementTypeUnsafe();
+ if (T->getAs<RecordType>() && T->isIncompleteType())
+ return true;
+
+ return false;
+ }
+
/// Determine whether we will definitely emit this variable with a constant
/// initializer, either because the language semantics demand it or because
/// we know that the initializer is a constant.
@@ -364,7 +377,7 @@ public:
// If we have the only definition, we don't need a thread wrapper if we
// will emit the value as a constant.
if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD)))
- return !VD->needsDestruction(getContext()) && InitDecl->evaluateValue();
+ return !mayNeedDestruction(VD) && InitDecl->evaluateValue();
// Otherwise, we need a thread wrapper unless we know that every
// translation unit will emit the value as a constant. We rely on the
@@ -376,7 +389,7 @@ public:
bool usesThreadWrapperFunction(const VarDecl *VD) const override {
return !isEmittedWithConstantInitializer(VD) ||
- VD->needsDestruction(getContext());
+ mayNeedDestruction(VD);
}
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
QualType LValType) override;
@@ -2445,11 +2458,6 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
(CGM.getTarget().getTriple().isOSBinFormatELF() ||
CGM.getTarget().getTriple().isOSBinFormatWasm())) {
guard->setComdat(C);
- // An inline variable's guard function is run from the per-TU
- // initialization function, not via a dedicated global ctor function, so
- // we can't put it in a comdat.
- if (!NonTemplateInline)
- CGF.CurFn->setComdat(C);
} else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
}
@@ -2968,7 +2976,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
// also when the symbol is weak.
if (CGM.getTriple().isOSAIX() && VD->hasDefinition() &&
isEmittedWithConstantInitializer(VD, true) &&
- !VD->needsDestruction(getContext())) {
+ !mayNeedDestruction(VD)) {
// Init should be null. If it were non-null, then the logic above would
// either be defining the function to be an alias or declaring the
// function with the expectation that the definition of the variable
@@ -3274,6 +3282,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
case BuiltinType::LongDouble:
case BuiltinType::Float16:
case BuiltinType::Float128:
+ case BuiltinType::Ibm128:
case BuiltinType::Char8:
case BuiltinType::Char16:
case BuiltinType::Char32:
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 990648b131fe..0fd5a0ffe06c 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -847,7 +847,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
// arguments was not supported and resulted in a compiler error. In 19.14
// and later versions, such arguments are now passed indirectly.
TypeInfo Info = getContext().getTypeInfo(RD->getTypeForDecl());
- if (Info.AlignIsRequired && Info.Align > 4)
+ if (Info.isAlignRequired() && Info.Align > 4)
return RAA_Indirect;
// If C++ prohibits us from making a copy, construct the arguments directly
@@ -1810,8 +1810,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
#endif
}
- const std::unique_ptr<VPtrInfo> *VFPtrI = std::find_if(
- VFPtrs.begin(), VFPtrs.end(), [&](const std::unique_ptr<VPtrInfo>& VPI) {
+ const std::unique_ptr<VPtrInfo> *VFPtrI =
+ llvm::find_if(VFPtrs, [&](const std::unique_ptr<VPtrInfo> &VPI) {
return VPI->FullOffsetInMDC == VPtrOffset;
});
if (VFPtrI == VFPtrs.end()) {
@@ -1844,7 +1844,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
VFTablesMap[ID] = VFTable;
VTable = VTableAliasIsRequred
? cast<llvm::GlobalVariable>(
- cast<llvm::GlobalAlias>(VFTable)->getBaseObject())
+ cast<llvm::GlobalAlias>(VFTable)->getAliaseeObject())
: cast<llvm::GlobalVariable>(VFTable);
return VTable;
}
@@ -4348,6 +4348,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
const Expr *SubExpr = E->getSubExpr();
+ assert(SubExpr && "SubExpr cannot be null");
QualType ThrowType = SubExpr->getType();
// The exception object lives on the stack and it's address is passed to the
// runtime function.
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index b63f756ca288..f6642a79e1e4 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -122,6 +122,10 @@ namespace {
return D;
}
+ llvm::StringRef GetMangledName(GlobalDecl GD) {
+ return Builder->getMangledName(GD);
+ }
+
llvm::Constant *GetAddrOfGlobal(GlobalDecl global, bool isForDefinition) {
return Builder->GetAddrOfGlobal(global, ForDefinition_t(isForDefinition));
}
@@ -325,6 +329,10 @@ const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) {
return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name);
}
+llvm::StringRef CodeGenerator::GetMangledName(GlobalDecl GD) {
+ return static_cast<CodeGeneratorImpl *>(this)->GetMangledName(GD);
+}
+
llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global,
bool isForDefinition) {
return static_cast<CodeGeneratorImpl*>(this)
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 1adf0ad9c0e5..f7b83c45022d 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -27,10 +27,10 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include <memory>
#include <utility>
@@ -264,31 +264,48 @@ public:
std::string Error;
auto Triple = Ctx.getTargetInfo().getTriple();
if (!llvm::TargetRegistry::lookupTarget(Triple.getTriple(), Error))
- llvm::report_fatal_error(Error);
+ llvm::report_fatal_error(llvm::Twine(Error));
// Emit the serialized Clang AST into its own section.
assert(Buffer->IsComplete && "serialization did not complete");
auto &SerializedAST = Buffer->Data;
auto Size = SerializedAST.size();
- auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
- auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
- auto *Data = llvm::ConstantDataArray::getString(
- *VMContext, StringRef(SerializedAST.data(), Size),
- /*AddNull=*/false);
- auto *ASTSym = new llvm::GlobalVariable(
- *M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data,
- "__clang_ast");
- // The on-disk hashtable needs to be aligned.
- ASTSym->setAlignment(llvm::Align(8));
-
- // Mach-O also needs a segment name.
- if (Triple.isOSBinFormatMachO())
- ASTSym->setSection("__CLANG,__clangast");
- // COFF has an eight character length limit.
- else if (Triple.isOSBinFormatCOFF())
- ASTSym->setSection("clangast");
- else
- ASTSym->setSection("__clangast");
+
+ if (Triple.isOSBinFormatWasm()) {
+ // Emit __clangast in custom section instead of named data segment
+ // to find it while iterating sections.
+ // This could be avoided if all data segements (the wasm sense) were
+ // represented as their own sections (in the llvm sense).
+ // TODO: https://github.com/WebAssembly/tool-conventions/issues/138
+ llvm::NamedMDNode *MD =
+ M->getOrInsertNamedMetadata("wasm.custom_sections");
+ llvm::Metadata *Ops[2] = {
+ llvm::MDString::get(*VMContext, "__clangast"),
+ llvm::MDString::get(*VMContext,
+ StringRef(SerializedAST.data(), Size))};
+ auto *NameAndContent = llvm::MDTuple::get(*VMContext, Ops);
+ MD->addOperand(NameAndContent);
+ } else {
+ auto Int8Ty = llvm::Type::getInt8Ty(*VMContext);
+ auto *Ty = llvm::ArrayType::get(Int8Ty, Size);
+ auto *Data = llvm::ConstantDataArray::getString(
+ *VMContext, StringRef(SerializedAST.data(), Size),
+ /*AddNull=*/false);
+ auto *ASTSym = new llvm::GlobalVariable(
+ *M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage,
+ Data, "__clang_ast");
+ // The on-disk hashtable needs to be aligned.
+ ASTSym->setAlignment(llvm::Align(8));
+
+ // Mach-O also needs a segment name.
+ if (Triple.isOSBinFormatMachO())
+ ASTSym->setSection("__CLANG,__clangast");
+ // COFF has an eight character length limit.
+ else if (Triple.isOSBinFormatCOFF())
+ ASTSym->setSection("clangast");
+ else
+ ASTSym->setSection("__clangast");
+ }
LLVM_DEBUG({
// Print the IR for the PCH container to the debug output.
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index a2b68a04d351..302dc653c46e 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -392,6 +392,36 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
}
+static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty, CharUnits SlotSize,
+ CharUnits EltSize, const ComplexType *CTy) {
+ Address Addr =
+ emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2,
+ SlotSize, SlotSize, /*AllowHigher*/ true);
+
+ Address RealAddr = Addr;
+ Address ImagAddr = RealAddr;
+ if (CGF.CGM.getDataLayout().isBigEndian()) {
+ RealAddr =
+ CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize);
+ ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
+ 2 * SlotSize - EltSize);
+ } else {
+ ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
+ }
+
+ llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
+ RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
+ ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
+ llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
+ llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
+
+ Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
+ CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
+ /*init*/ true);
+ return Temp;
+}
+
static Address emitMergePHI(CodeGenFunction &CGF,
Address Addr1, llvm::BasicBlock *Block1,
Address Addr2, llvm::BasicBlock *Block2,
@@ -827,19 +857,19 @@ public:
llvm::Function *Fn = cast<llvm::Function>(GV);
llvm::AttrBuilder B;
B.addAttribute("wasm-import-module", Attr->getImportModule());
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ Fn->addFnAttrs(B);
}
if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
llvm::AttrBuilder B;
B.addAttribute("wasm-import-name", Attr->getImportName());
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ Fn->addFnAttrs(B);
}
if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
llvm::AttrBuilder B;
B.addAttribute("wasm-export-name", Attr->getExportName());
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ Fn->addFnAttrs(B);
}
}
@@ -1170,7 +1200,8 @@ public:
IsRetSmallStructInRegABI(RetSmallStructInRegABI),
IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
- IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
+ IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() ||
+ CGT.getTarget().getTriple().isOSCygMing()),
DefaultNumRegisterParameters(NumRegisterParameters) {}
bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
@@ -1524,6 +1555,14 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
if (isEmptyRecord(getContext(), RetTy, true))
return ABIArgInfo::getIgnore();
+ // Return complex of _Float16 as <2 x half> so the backend will use xmm0.
+ if (const ComplexType *CT = RetTy->getAs<ComplexType>()) {
+ QualType ET = getContext().getCanonicalType(CT->getElementType());
+ if (ET->isFloat16Type())
+ return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
+ llvm::Type::getHalfTy(getVMContext()), 2));
+ }
+
// Small structures which are register sized are generally returned
// in a register.
if (shouldReturnTypeInRegister(RetTy, getContext())) {
@@ -1831,7 +1870,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
// Pass over-aligned aggregates on Windows indirectly. This behavior was
// added in MSVC 2015.
- if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
+ if (IsWin32StructABI && TI.isAlignRequired() && TI.Align > 32)
return getIndirectResult(Ty, /*ByVal=*/false, State);
// Expand small (<= 128-bit) record types when we know that the stack layout
@@ -2607,7 +2646,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
// If the argument does not end in .lib, automatically add the suffix.
// If the argument contains a space, enclose it in quotes.
// This matches the behavior of MSVC.
- bool Quote = (Lib.find(' ') != StringRef::npos);
+ bool Quote = Lib.contains(' ');
std::string ArgStr = Quote ? "\"" : "";
ArgStr += Lib;
if (!Lib.endswith_insensitive(".lib") && !Lib.endswith_insensitive(".a"))
@@ -2812,7 +2851,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
Hi = Integer;
} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
Current = Integer;
- } else if (k == BuiltinType::Float || k == BuiltinType::Double) {
+ } else if (k == BuiltinType::Float || k == BuiltinType::Double ||
+ k == BuiltinType::Float16) {
Current = SSE;
} else if (k == BuiltinType::LongDouble) {
const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
@@ -2943,7 +2983,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
Current = Integer;
else if (Size <= 128)
Lo = Hi = Integer;
- } else if (ET == getContext().FloatTy) {
+ } else if (ET->isFloat16Type() || ET == getContext().FloatTy) {
Current = SSE;
} else if (ET == getContext().DoubleTy) {
Lo = Hi = SSE;
@@ -3367,55 +3407,77 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
return false;
}
-/// ContainsFloatAtOffset - Return true if the specified LLVM IR type has a
-/// float member at the specified offset. For example, {int,{float}} has a
-/// float at offset 4. It is conservatively correct for this routine to return
-/// false.
-static bool ContainsFloatAtOffset(llvm::Type *IRType, unsigned IROffset,
- const llvm::DataLayout &TD) {
- // Base case if we find a float.
- if (IROffset == 0 && IRType->isFloatTy())
- return true;
+/// getFPTypeAtOffset - Return a floating point type at the specified offset.
+static llvm::Type *getFPTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
+ const llvm::DataLayout &TD) {
+ if (IROffset == 0 && IRType->isFloatingPointTy())
+ return IRType;
// If this is a struct, recurse into the field at the specified offset.
if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
+ if (!STy->getNumContainedTypes())
+ return nullptr;
+
const llvm::StructLayout *SL = TD.getStructLayout(STy);
unsigned Elt = SL->getElementContainingOffset(IROffset);
IROffset -= SL->getElementOffset(Elt);
- return ContainsFloatAtOffset(STy->getElementType(Elt), IROffset, TD);
+ return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD);
}
// If this is an array, recurse into the field at the specified offset.
if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
llvm::Type *EltTy = ATy->getElementType();
unsigned EltSize = TD.getTypeAllocSize(EltTy);
- IROffset -= IROffset/EltSize*EltSize;
- return ContainsFloatAtOffset(EltTy, IROffset, TD);
+ IROffset -= IROffset / EltSize * EltSize;
+ return getFPTypeAtOffset(EltTy, IROffset, TD);
}
- return false;
+ return nullptr;
}
-
/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
/// low 8 bytes of an XMM register, corresponding to the SSE class.
llvm::Type *X86_64ABIInfo::
GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
QualType SourceTy, unsigned SourceOffset) const {
- // The only three choices we have are either double, <2 x float>, or float. We
- // pass as float if the last 4 bytes is just padding. This happens for
- // structs that contain 3 floats.
- if (BitsContainNoUserData(SourceTy, SourceOffset*8+32,
- SourceOffset*8+64, getContext()))
- return llvm::Type::getFloatTy(getVMContext());
-
- // We want to pass as <2 x float> if the LLVM IR type contains a float at
- // offset+0 and offset+4. Walk the LLVM IR type to find out if this is the
- // case.
- if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) &&
- ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout()))
- return llvm::FixedVectorType::get(llvm::Type::getFloatTy(getVMContext()),
- 2);
+ const llvm::DataLayout &TD = getDataLayout();
+ unsigned SourceSize =
+ (unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
+ llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
+ if (!T0 || T0->isDoubleTy())
+ return llvm::Type::getDoubleTy(getVMContext());
+
+ // Get the adjacent FP type.
+ llvm::Type *T1 = nullptr;
+ unsigned T0Size = TD.getTypeAllocSize(T0);
+ if (SourceSize > T0Size)
+ T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
+ if (T1 == nullptr) {
+ // Check if IRType is a half + float. float type will be in IROffset+4 due
+ // to its alignment.
+ if (T0->isHalfTy() && SourceSize > 4)
+ T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
+ // If we can't get a second FP type, return a simple half or float.
+ // avx512fp16-abi.c:pr51813_2 shows it works to return float for
+ // {float, i8} too.
+ if (T1 == nullptr)
+ return T0;
+ }
+
+ if (T0->isFloatTy() && T1->isFloatTy())
+ return llvm::FixedVectorType::get(T0, 2);
+
+ if (T0->isHalfTy() && T1->isHalfTy()) {
+ llvm::Type *T2 = nullptr;
+ if (SourceSize > 4)
+ T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
+ if (T2 == nullptr)
+ return llvm::FixedVectorType::get(T0, 2);
+ return llvm::FixedVectorType::get(T0, 4);
+ }
+
+ if (T0->isHalfTy() || T1->isHalfTy())
+ return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);
return llvm::Type::getDoubleTy(getVMContext());
}
@@ -3521,11 +3583,11 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
// struct.
if (HiStart != 8) {
// There are usually two sorts of types the ABI generation code can produce
- // for the low part of a pair that aren't 8 bytes in size: float or
+ // for the low part of a pair that aren't 8 bytes in size: half, float or
// i8/i16/i32. This can also include pointers when they are 32-bit (X32 and
// NaCl).
// Promote these to a larger type.
- if (Lo->isFloatTy())
+ if (Lo->isHalfTy() || Lo->isFloatTy())
Lo = llvm::Type::getDoubleTy(Lo->getContext());
else {
assert((Lo->isIntegerTy() || Lo->isPointerTy())
@@ -4572,14 +4634,25 @@ CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const {
Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {
- if (Ty->isAnyComplexType())
- llvm::report_fatal_error("complex type is not supported on AIX yet");
auto TypeInfo = getContext().getTypeInfoInChars(Ty);
TypeInfo.Align = getParamTypeAlignment(Ty);
CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);
+ // If we have a complex type and the base type is smaller than the register
+ // size, the ABI calls for the real and imaginary parts to be right-adjusted
+ // in separate words in 32bit mode or doublewords in 64bit mode. However,
+ // Clang expects us to produce a pointer to a structure with the two parts
+ // packed tightly. So generate loads of the real and imaginary parts relative
+ // to the va_list pointer, and store them to a temporary structure. We do the
+ // same as the PPC64ABI here.
+ if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
+ CharUnits EltSize = TypeInfo.Width / 2;
+ if (EltSize < SlotSize)
+ return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
+ }
+
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo,
SlotSize, /*AllowHigher*/ true);
}
@@ -5168,8 +5241,9 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
if (BT->getKind() == BuiltinType::Float ||
BT->getKind() == BuiltinType::Double ||
BT->getKind() == BuiltinType::LongDouble ||
+ BT->getKind() == BuiltinType::Ibm128 ||
(getContext().getTargetInfo().hasFloat128Type() &&
- (BT->getKind() == BuiltinType::Float128))) {
+ (BT->getKind() == BuiltinType::Float128))) {
if (IsSoftFloatABI)
return false;
return true;
@@ -5346,33 +5420,8 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
// and store them to a temporary structure.
if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
CharUnits EltSize = TypeInfo.Width / 2;
- if (EltSize < SlotSize) {
- Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty,
- SlotSize * 2, SlotSize,
- SlotSize, /*AllowHigher*/ true);
-
- Address RealAddr = Addr;
- Address ImagAddr = RealAddr;
- if (CGF.CGM.getDataLayout().isBigEndian()) {
- RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
- SlotSize - EltSize);
- ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
- 2 * SlotSize - EltSize);
- } else {
- ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
- }
-
- llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
- RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
- ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
- llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
- llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
-
- Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
- CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
- /*init*/ true);
- return Temp;
- }
+ if (EltSize < SlotSize)
+ return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
}
// Otherwise, just use the general rule.
@@ -5526,6 +5575,20 @@ public:
Fn->addFnAttr("branch-target-enforcement",
BPI.BranchTargetEnforcement ? "true" : "false");
}
+
+ bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
+ llvm::Type *Ty) const override {
+ if (CGF.getTarget().hasFeature("ls64")) {
+ auto *ST = dyn_cast<llvm::StructType>(Ty);
+ if (ST && ST->getNumElements() == 1) {
+ auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
+ if (AT && AT->getNumElements() == 8 &&
+ AT->getElementType()->isIntegerTy(64))
+ return true;
+ }
+ }
+ return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
+ }
};
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
@@ -6329,7 +6392,7 @@ public:
// the backend to perform a realignment as part of the function prologue.
llvm::AttrBuilder B;
B.addStackAlignmentAttr(8);
- Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ Fn->addFnAttrs(B);
}
};
@@ -6920,7 +6983,7 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
TyAlignForABI = CharUnits::fromQuantity(4);
}
- TypeInfoChars TyInfo(TySize, TyAlignForABI, false);
+ TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None);
return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
SlotSize, /*AllowHigherAlign*/ true);
}
@@ -10106,24 +10169,26 @@ void XCoreTargetCodeGenInfo::emitTargetMetadata(
}
}
}
+
//===----------------------------------------------------------------------===//
-// SPIR ABI Implementation
+// Base ABI and target codegen info implementation common between SPIR and
+// SPIR-V.
//===----------------------------------------------------------------------===//
namespace {
-class SPIRABIInfo : public DefaultABIInfo {
+class CommonSPIRABIInfo : public DefaultABIInfo {
public:
- SPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
+ CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }
private:
void setCCs();
};
} // end anonymous namespace
namespace {
-class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
+class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
public:
- SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
- : TargetCodeGenInfo(std::make_unique<SPIRABIInfo>(CGT)) {}
+ CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+ : TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {}
LangAS getASTAllocaAddressSpace() const override {
return getLangASFromTargetAS(
@@ -10134,7 +10199,7 @@ public:
};
} // End anonymous namespace.
-void SPIRABIInfo::setCCs() {
+void CommonSPIRABIInfo::setCCs() {
assert(getRuntimeCC() == llvm::CallingConv::C);
RuntimeCC = llvm::CallingConv::SPIR_FUNC;
}
@@ -10148,7 +10213,7 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
}
}
-unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
return llvm::CallingConv::SPIR_KERNEL;
}
@@ -11217,7 +11282,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new ARCTargetCodeGenInfo(Types));
case llvm::Triple::spir:
case llvm::Triple::spir64:
- return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
+ case llvm::Triple::spirv32:
+ case llvm::Triple::spirv64:
+ return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types));
case llvm::Triple::ve:
return SetCGInfo(new VETargetCodeGenInfo(Types));
}
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index e6e474544fc4..aa8bbb60a75f 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -148,6 +148,13 @@ public:
return Ty;
}
+ /// Target hook to decide whether an inline asm operand can be passed
+ /// by value.
+ virtual bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
+ llvm::Type *Ty) const {
+ return false;
+ }
+
/// Adds constraints and types for result registers.
virtual void addReturnRegisterOutputs(
CodeGen::CodeGenFunction &CGF, CodeGen::LValue ReturnValue,
diff --git a/clang/lib/CodeGen/VarBypassDetector.h b/clang/lib/CodeGen/VarBypassDetector.h
index b654eefd963d..164e88c0b2f1 100644
--- a/clang/lib/CodeGen/VarBypassDetector.h
+++ b/clang/lib/CodeGen/VarBypassDetector.h
@@ -55,7 +55,7 @@ public:
/// Returns true if the variable declaration was by bypassed by any goto or
/// switch statement.
bool IsBypassed(const VarDecl *D) const {
- return AlwaysBypassed || Bypasses.find(D) != Bypasses.end();
+ return AlwaysBypassed || Bypasses.contains(D);
}
private:
diff --git a/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp b/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp
index 1f040f60ff19..110d402436ee 100644
--- a/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp
+++ b/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp
@@ -88,10 +88,15 @@ DirectoryWatcherWindows::DirectoryWatcherWindows(
// handle to the watcher and performing synchronous operations.
{
DWORD Size = GetFinalPathNameByHandleW(DirectoryHandle, NULL, 0, 0);
- std::unique_ptr<WCHAR[]> Buffer{new WCHAR[Size]};
+ std::unique_ptr<WCHAR[]> Buffer{new WCHAR[Size + 1]};
Size = GetFinalPathNameByHandleW(DirectoryHandle, Buffer.get(), Size, 0);
Buffer[Size] = L'\0';
- llvm::sys::windows::UTF16ToUTF8(Buffer.get(), Size, Path);
+ WCHAR *Data = Buffer.get();
+ if (Size >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) {
+ Data += 4;
+ Size -= 4;
+ }
+ llvm::sys::windows::UTF16ToUTF8(Data, Size, Path);
}
size_t EntrySize = sizeof(FILE_NOTIFY_INFORMATION) + MAX_PATH * sizeof(WCHAR);
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 0144d808cf12..67d941c6c2ab 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -174,7 +174,7 @@ int Compilation::ExecuteCommand(const Command &C,
!getDriver().CCPrintOptionsFilename.empty()) {
std::error_code EC;
OwnedStream.reset(new llvm::raw_fd_ostream(
- getDriver().CCPrintOptionsFilename.c_str(), EC,
+ getDriver().CCPrintOptionsFilename, EC,
llvm::sys::fs::OF_Append | llvm::sys::fs::OF_TextWithCRLF));
if (EC) {
getDriver().Diag(diag::err_drv_cc_print_options_failure)
diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp
index c4cf4e48b5b8..5ac38c34d112 100644
--- a/clang/lib/Driver/Distro.cpp
+++ b/clang/lib/Driver/Distro.cpp
@@ -90,6 +90,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) {
.Case("groovy", Distro::UbuntuGroovy)
.Case("hirsute", Distro::UbuntuHirsute)
.Case("impish", Distro::UbuntuImpish)
+ .Case("jammy", Distro::UbuntuJammy)
.Default(Distro::UnknownDistro);
return Version;
}
@@ -118,11 +119,11 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) {
return Distro::Fedora;
if (Data.startswith("Red Hat Enterprise Linux") ||
Data.startswith("CentOS") || Data.startswith("Scientific Linux")) {
- if (Data.find("release 7") != StringRef::npos)
+ if (Data.contains("release 7"))
return Distro::RHEL7;
- else if (Data.find("release 6") != StringRef::npos)
+ else if (Data.contains("release 6"))
return Distro::RHEL6;
- else if (Data.find("release 5") != StringRef::npos)
+ else if (Data.contains("release 5"))
return Distro::RHEL5;
}
return Distro::UnknownDistro;
@@ -150,6 +151,8 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) {
return Distro::DebianBuster;
case 11:
return Distro::DebianBullseye;
+ case 12:
+ return Distro::DebianBookworm;
default:
return Distro::UnknownDistro;
}
@@ -161,6 +164,7 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) {
.Case("stretch/sid", Distro::DebianStretch)
.Case("buster/sid", Distro::DebianBuster)
.Case("bullseye/sid", Distro::DebianBullseye)
+ .Case("bookworm/sid", Distro::DebianBookworm)
.Default(Distro::UnknownDistro);
}
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 5c323cb6ea23..8023d03013a1 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -67,6 +67,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptSpecifier.h"
@@ -84,7 +85,6 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
@@ -277,7 +277,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) ||
(PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) ||
(PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) ||
- (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {
+ (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) ||
+ CCGenDiagnostics) {
FinalPhase = phases::Preprocess;
// --precompile only runs up to precompilation.
@@ -304,6 +305,9 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
} else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
FinalPhase = phases::Assemble;
+ } else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) {
+ FinalPhase = phases::IfsMerge;
+
// Otherwise do everything.
} else
FinalPhase = phases::Link;
@@ -432,8 +436,7 @@ static llvm::Triple computeTargetTriple(const Driver &D,
// GNU/Hurd's triples should have been -hurd-gnu*, but were historically made
// -gnu* only, and we can not change this, so we have to detect that case as
// being the Hurd OS.
- if (TargetTriple.find("-unknown-gnu") != StringRef::npos ||
- TargetTriple.find("-pc-gnu") != StringRef::npos)
+ if (TargetTriple.contains("-unknown-gnu") || TargetTriple.contains("-pc-gnu"))
Target.setOSName("hurd");
// Handle Apple-specific options available here.
@@ -522,8 +525,11 @@ static llvm::Triple computeTargetTriple(const Driver &D,
Target.setEnvironment(llvm::Triple::CODE16);
}
- if (AT != llvm::Triple::UnknownArch && AT != Target.getArch())
+ if (AT != llvm::Triple::UnknownArch && AT != Target.getArch()) {
Target.setArch(AT);
+ if (Target.isWindowsGNUEnvironment())
+ toolchains::MinGW::fixTripleArch(D, Target, Args);
+ }
}
// Handle -miamcu flag.
@@ -585,53 +591,34 @@ static llvm::Triple computeTargetTriple(const Driver &D,
// Parse the LTO options and record the type of LTO compilation
// based on which -f(no-)?lto(=.*)? or -f(no-)?offload-lto(=.*)?
// option occurs last.
-static llvm::Optional<driver::LTOKind>
-parseLTOMode(Driver &D, const llvm::opt::ArgList &Args, OptSpecifier OptPos,
- OptSpecifier OptNeg, OptSpecifier OptEq, bool IsOffload) {
- driver::LTOKind LTOMode = LTOK_None;
- // Non-offload LTO allows -flto=auto and -flto=jobserver. Offload LTO does
- // not support those options.
- if (!Args.hasFlag(OptPos, OptEq, OptNeg, false) &&
- (IsOffload ||
- (!Args.hasFlag(options::OPT_flto_EQ_auto, options::OPT_fno_lto, false) &&
- !Args.hasFlag(options::OPT_flto_EQ_jobserver, options::OPT_fno_lto,
- false))))
- return None;
-
- StringRef LTOName("full");
+static driver::LTOKind parseLTOMode(Driver &D, const llvm::opt::ArgList &Args,
+ OptSpecifier OptEq, OptSpecifier OptNeg) {
+ if (!Args.hasFlag(OptEq, OptNeg, false))
+ return LTOK_None;
const Arg *A = Args.getLastArg(OptEq);
- if (A)
- LTOName = A->getValue();
+ StringRef LTOName = A->getValue();
- LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
- .Case("full", LTOK_Full)
- .Case("thin", LTOK_Thin)
- .Default(LTOK_Unknown);
+ driver::LTOKind LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
+ .Case("full", LTOK_Full)
+ .Case("thin", LTOK_Thin)
+ .Default(LTOK_Unknown);
if (LTOMode == LTOK_Unknown) {
- assert(A);
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << A->getValue();
- return None;
+ return LTOK_None;
}
return LTOMode;
}
// Parse the LTO options.
void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
- LTOMode = LTOK_None;
- if (auto M = parseLTOMode(*this, Args, options::OPT_flto,
- options::OPT_fno_lto, options::OPT_flto_EQ,
- /*IsOffload=*/false))
- LTOMode = M.getValue();
-
- OffloadLTOMode = LTOK_None;
- if (auto M = parseLTOMode(*this, Args, options::OPT_foffload_lto,
- options::OPT_fno_offload_lto,
- options::OPT_foffload_lto_EQ,
- /*IsOffload=*/true))
- OffloadLTOMode = M.getValue();
+ LTOMode =
+ parseLTOMode(*this, Args, options::OPT_flto_EQ, options::OPT_fno_lto);
+
+ OffloadLTOMode = parseLTOMode(*this, Args, options::OPT_foffload_lto_EQ,
+ options::OPT_fno_offload_lto);
}
/// Compute the desired OpenMP runtime from the flags provided.
@@ -699,6 +686,12 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
}
C.addOffloadDeviceToolChain(CudaTC.get(), OFK);
} else if (IsHIP) {
+ if (auto *OMPTargetArg =
+ C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) {
+ Diag(clang::diag::err_drv_unsupported_opt_for_language_mode)
+ << OMPTargetArg->getSpelling() << "HIP";
+ return;
+ }
const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
auto OFK = Action::OFK_HIP;
@@ -889,10 +882,9 @@ bool Driver::loadConfigFile() {
std::vector<std::string> ConfigFiles =
CLOptions->getAllArgValues(options::OPT_config);
if (ConfigFiles.size() > 1) {
- if (!std::all_of(ConfigFiles.begin(), ConfigFiles.end(),
- [ConfigFiles](const std::string &s) {
- return s == ConfigFiles[0];
- })) {
+ if (!llvm::all_of(ConfigFiles, [ConfigFiles](const std::string &s) {
+ return s == ConfigFiles[0];
+ })) {
Diag(diag::err_drv_duplicate_config);
return true;
}
@@ -1091,7 +1083,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
// Silence driver warnings if requested
Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
- // -no-canonical-prefixes is used very early in main.
+ // -canonical-prefixes, -no-canonical-prefixes are used very early in main.
+ Args.ClaimAllArgs(options::OPT_canonical_prefixes);
Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
// f(no-)integated-cc1 is also used very early in main.
@@ -1228,8 +1221,14 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {
llvm::opt::ArgStringList ASL;
- for (const auto *A : Args)
+ for (const auto *A : Args) {
+ // Use user's original spelling of flags. For example, use
+ // `/source-charset:utf-8` instead of `-finput-charset=utf-8` if the user
+ // wrote the former.
+ while (A->getAlias())
+ A = A->getAlias();
A->render(Args, ASL);
+ }
for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) {
if (I != ASL.begin())
@@ -1346,7 +1345,6 @@ void Driver::generateCompilationDiagnostics(
PrintVersion(C, llvm::errs());
// Suppress driver output and emit preprocessor output to temp file.
- Mode = CPPMode;
CCGenDiagnostics = true;
// Save the original job command(s).
@@ -2148,19 +2146,6 @@ bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
if (getVFS().exists(Value))
return true;
- if (IsCLMode()) {
- if (!llvm::sys::path::is_absolute(Twine(Value)) &&
- llvm::sys::Process::FindInEnvPath("LIB", Value, ';'))
- return true;
-
- if (Args.hasArg(options::OPT__SLASH_link) && Ty == types::TY_Object) {
- // Arguments to the /link flag might cause the linker to search for object
- // and library files in paths we don't know about. Don't error in such
- // cases.
- return true;
- }
- }
-
if (TypoCorrect) {
// Check if the filename is a typo for an option flag. OptTable thinks
// that all args that are not known options and that start with / are
@@ -2180,6 +2165,43 @@ bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
}
}
+ // In CL mode, don't error on apparently non-existent linker inputs, because
+ // they can be influenced by linker flags the clang driver might not
+ // understand.
+ // Examples:
+ // - `clang-cl main.cc ole32.lib` in a a non-MSVC shell will make the driver
+ // module look for an MSVC installation in the registry. (We could ask
+ // the MSVCToolChain object if it can find `ole32.lib`, but the logic to
+ // look in the registry might move into lld-link in the future so that
+ // lld-link invocations in non-MSVC shells just work too.)
+ // - `clang-cl ... /link ...` can pass arbitrary flags to the linker,
+ // including /libpath:, which is used to find .lib and .obj files.
+ // So do not diagnose this on the driver level. Rely on the linker diagnosing
+ // it. (If we don't end up invoking the linker, this means we'll emit a
+ // "'linker' input unused [-Wunused-command-line-argument]" warning instead
+ // of an error.)
+ //
+ // Only do this skip after the typo correction step above. `/Brepo` is treated
+ // as TY_Object, but it's clearly a typo for `/Brepro`. It seems fine to emit
+ // an error if we have a flag that's within an edit distance of 1 from a
+ // flag. (Users can use `-Wl,` or `/linker` to launder the flag past the
+ // driver in the unlikely case they run into this.)
+ //
+ // Don't do this for inputs that start with a '/', else we'd pass options
+ // like /libpath: through to the linker silently.
+ //
+ // Emitting an error for linker inputs can also cause incorrect diagnostics
+ // with the gcc driver. The command
+ // clang -fuse-ld=lld -Wl,--chroot,some/dir /file.o
+ // will make lld look for some/dir/file.o, while we will diagnose here that
+ // `/file.o` does not exist. However, configure scripts check if
+ // `clang /GR-` compiles without error to see if the compiler is cl.exe,
+ // so we can't downgrade diagnostics for `/GR-` from an error to a warning
+ // in cc mode. (We can in cl mode because cl.exe itself only warns on
+ // unknown flags.)
+ if (IsCLMode() && Ty == types::TY_Object && !Value.startswith("/"))
+ return true;
+
Diag(clang::diag::err_drv_no_such_file) << Value;
return false;
}
@@ -2242,6 +2264,7 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
//
// Otherwise emit an error but still use a valid type to avoid
// spurious errors (e.g., no inputs).
+ assert(!CCGenDiagnostics && "stdin produces no crash reproducer");
if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP())
Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl
: clang::diag::err_drv_unknown_stdin_type);
@@ -2257,10 +2280,10 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
Ty = TC.LookupTypeForExtension(Ext + 1);
if (Ty == types::TY_INVALID) {
- if (CCCIsCPP())
- Ty = types::TY_C;
- else if (IsCLMode() && Args.hasArgNoClaim(options::OPT_E))
+ if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics))
Ty = types::TY_CXX;
+ else if (CCCIsCPP() || CCGenDiagnostics)
+ Ty = types::TY_C;
else
Ty = types::TY_Object;
}
@@ -2634,7 +2657,7 @@ class OffloadingActionBuilder final {
assert(CudaDeviceActions.size() == GpuArchList.size() &&
"Expecting one action per GPU architecture.");
assert(ToolChains.size() == 1 &&
- "Expecting to have a sing CUDA toolchain.");
+ "Expecting to have a single CUDA toolchain.");
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
AddTopLevel(CudaDeviceActions[I], GpuArchList[I]);
@@ -2765,7 +2788,7 @@ class OffloadingActionBuilder final {
CudaActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {
- DefaultCudaArch = CudaArch::SM_20;
+ DefaultCudaArch = CudaArch::SM_35;
}
StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
@@ -2891,7 +2914,6 @@ class OffloadingActionBuilder final {
class HIPActionBuilder final : public CudaActionBuilderBase {
/// The linker inputs obtained for each device arch.
SmallVector<ActionList, 8> DeviceLinkerInputs;
- bool GPUSanitize;
// The default bundling behavior depends on the type of output, therefore
// BundleOutput needs to be tri-value: None, true, or false.
// Bundle code objects except --no-gpu-output is specified for device
@@ -2904,8 +2926,6 @@ class OffloadingActionBuilder final {
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
DefaultCudaArch = CudaArch::GFX803;
- GPUSanitize = Args.hasFlag(options::OPT_fgpu_sanitize,
- options::OPT_fno_gpu_sanitize, false);
if (Args.hasArg(options::OPT_gpu_bundle_output,
options::OPT_no_gpu_bundle_output))
BundleOutput = Args.hasFlag(options::OPT_gpu_bundle_output,
@@ -3823,7 +3843,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
if (Args.hasArg(options::OPT_emit_interface_stubs)) {
auto PhaseList = types::getCompilationPhases(
types::TY_IFS_CPP,
- Args.hasArg(options::OPT_c) ? phases::Compile : phases::LastPhase);
+ Args.hasArg(options::OPT_c) ? phases::Compile : phases::IfsMerge);
ActionList MergerInputs;
@@ -4124,7 +4144,7 @@ void Driver::BuildJobs(Compilation &C) const {
<< '\n';
Out.flush();
std::error_code EC;
- llvm::raw_fd_ostream OS(CCPrintStatReportFilename.c_str(), EC,
+ llvm::raw_fd_ostream OS(CCPrintStatReportFilename, EC,
llvm::sys::fs::OF_Append |
llvm::sys::fs::OF_Text);
if (EC)
@@ -4674,8 +4694,14 @@ InputInfo Driver::BuildJobsForActionNoCache(
CachedResults, A->getOffloadingDeviceKind()));
}
- // Always use the first input as the base input.
+ // Always use the first file input as the base input.
const char *BaseInput = InputInfos[0].getBaseInput();
+ for (auto &Info : InputInfos) {
+ if (Info.isFilename()) {
+ BaseInput = Info.getBaseInput();
+ break;
+ }
+ }
// ... except dsymutil actions, which use their actual input as the base
// input.
@@ -4870,11 +4896,11 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
bool MultipleArchs,
StringRef OffloadingPrefix) const {
std::string BoundArch = OrigBoundArch.str();
-#if defined(_WIN32)
- // BoundArch may contains ':', which is invalid in file names on Windows,
- // therefore replace it with '%'.
- std::replace(BoundArch.begin(), BoundArch.end(), ':', '@');
-#endif
+ if (is_style_windows(llvm::sys::path::Style::native)) {
+ // BoundArch may contains ':', which is invalid in file names on Windows,
+ // therefore replace it with '%'.
+ std::replace(BoundArch.begin(), BoundArch.end(), ':', '@');
+ }
llvm::PrettyStackTraceString CrashInfo("Computing output path");
// Output to a user requested destination?
@@ -4939,7 +4965,13 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
return "";
}
} else {
- TmpName = GetTemporaryPath(Split.first, Suffix);
+ if (MultipleArchs && !BoundArch.empty()) {
+ TmpName = GetTemporaryDirectory(Split.first);
+ llvm::sys::path::append(TmpName,
+ Split.first + "-" + BoundArch + "." + Suffix);
+ } else {
+ TmpName = GetTemporaryPath(Split.first, Suffix);
+ }
}
return C.addTempFile(C.getArgs().MakeArgString(TmpName));
}
@@ -5568,7 +5600,6 @@ llvm::StringRef clang::driver::getDriverMode(StringRef ProgName,
if (!Arg.startswith(OptName))
continue;
Opt = Arg;
- break;
}
if (Opt.empty())
Opt = ToolChain::getTargetAndModeFromProgramName(ProgName).DriverMode;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 5dd55553bcb5..ab44ba50b5d5 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -299,7 +299,7 @@ MultilibSet::multilib_list MultilibSet::filterCopy(FilterCallback F,
}
void MultilibSet::filterInPlace(FilterCallback F, multilib_list &Ms) {
- Ms.erase(std::remove_if(Ms.begin(), Ms.end(), F), Ms.end());
+ llvm::erase_if(Ms, F);
}
raw_ostream &clang::driver::operator<<(raw_ostream &OS, const MultilibSet &MS) {
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 8770fb1cf9fe..d31529748b62 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -91,6 +91,8 @@ enum CoverageFeature {
CoveragePCTable = 1 << 13,
CoverageStackDepth = 1 << 14,
CoverageInlineBoolFlag = 1 << 15,
+ CoverageTraceLoads = 1 << 16,
+ CoverageTraceStores = 1 << 17,
};
/// Parse a -fsanitize= or -fno-sanitize= argument's values, diagnosing any
@@ -100,7 +102,8 @@ static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
/// Parse -f(no-)?sanitize-coverage= flag values, diagnosing any invalid
/// components. Returns OR of members of \c CoverageFeature enumeration.
-static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A);
+static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A,
+ bool DiagnoseErrors);
/// Produce an argument string from ArgList \p Args, which shows how it
/// provides some sanitizer kind from \p Mask. For example, the argument list
@@ -123,19 +126,21 @@ static std::string toString(const clang::SanitizerSet &Sanitizers);
static void validateSpecialCaseListFormat(const Driver &D,
std::vector<std::string> &SCLFiles,
- unsigned MalformedSCLErrorDiagID) {
+ unsigned MalformedSCLErrorDiagID,
+ bool DiagnoseErrors) {
if (SCLFiles.empty())
return;
std::string BLError;
std::unique_ptr<llvm::SpecialCaseList> SCL(
llvm::SpecialCaseList::create(SCLFiles, D.getVFS(), BLError));
- if (!SCL.get())
+ if (!SCL.get() && DiagnoseErrors)
D.Diag(MalformedSCLErrorDiagID) << BLError;
}
static void addDefaultIgnorelists(const Driver &D, SanitizerMask Kinds,
- std::vector<std::string> &IgnorelistFiles) {
+ std::vector<std::string> &IgnorelistFiles,
+ bool DiagnoseErrors) {
struct Ignorelist {
const char *File;
SanitizerMask Mask;
@@ -159,13 +164,14 @@ static void addDefaultIgnorelists(const Driver &D, SanitizerMask Kinds,
llvm::sys::path::append(Path, "share", BL.File);
if (D.getVFS().exists(Path))
IgnorelistFiles.push_back(std::string(Path.str()));
- else if (BL.Mask == SanitizerKind::CFI)
+ else if (BL.Mask == SanitizerKind::CFI && DiagnoseErrors)
// If cfi_ignorelist.txt cannot be found in the resource dir, driver
// should fail.
D.Diag(clang::diag::err_drv_no_such_file) << Path;
}
validateSpecialCaseListFormat(
- D, IgnorelistFiles, clang::diag::err_drv_malformed_sanitizer_ignorelist);
+ D, IgnorelistFiles, clang::diag::err_drv_malformed_sanitizer_ignorelist,
+ DiagnoseErrors);
}
/// Parse -f(no-)?sanitize-(coverage-)?(white|ignore)list argument's values,
@@ -175,7 +181,8 @@ static void parseSpecialCaseListArg(const Driver &D,
std::vector<std::string> &SCLFiles,
llvm::opt::OptSpecifier SCLOptionID,
llvm::opt::OptSpecifier NoSCLOptionID,
- unsigned MalformedSCLErrorDiagID) {
+ unsigned MalformedSCLErrorDiagID,
+ bool DiagnoseErrors) {
for (const auto *Arg : Args) {
// Match -fsanitize-(coverage-)?(white|ignore)list.
if (Arg->getOption().matches(SCLOptionID)) {
@@ -183,7 +190,7 @@ static void parseSpecialCaseListArg(const Driver &D,
std::string SCLPath = Arg->getValue();
if (D.getVFS().exists(SCLPath)) {
SCLFiles.push_back(SCLPath);
- } else {
+ } else if (DiagnoseErrors) {
D.Diag(clang::diag::err_drv_no_such_file) << SCLPath;
}
// Match -fno-sanitize-ignorelist.
@@ -192,7 +199,8 @@ static void parseSpecialCaseListArg(const Driver &D,
SCLFiles.clear();
}
}
- validateSpecialCaseListFormat(D, SCLFiles, MalformedSCLErrorDiagID);
+ validateSpecialCaseListFormat(D, SCLFiles, MalformedSCLErrorDiagID,
+ DiagnoseErrors);
}
/// Sets group bits for every group that has at least one representative already
@@ -207,21 +215,21 @@ static SanitizerMask setGroupBits(SanitizerMask Kinds) {
}
static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
- const llvm::opt::ArgList &Args) {
+ const llvm::opt::ArgList &Args,
+ bool DiagnoseErrors) {
SanitizerMask TrapRemove; // During the loop below, the accumulated set of
// sanitizers disabled by the current sanitizer
// argument or any argument after it.
SanitizerMask TrappingKinds;
SanitizerMask TrappingSupportedWithGroups = setGroupBits(TrappingSupported);
- for (ArgList::const_reverse_iterator I = Args.rbegin(), E = Args.rend();
- I != E; ++I) {
- const auto *Arg = *I;
+ for (const llvm::opt::Arg *Arg : llvm::reverse(Args)) {
if (Arg->getOption().matches(options::OPT_fsanitize_trap_EQ)) {
Arg->claim();
SanitizerMask Add = parseArgValues(D, Arg, true);
Add &= ~TrapRemove;
- if (SanitizerMask InvalidValues = Add & ~TrappingSupportedWithGroups) {
+ SanitizerMask InvalidValues = Add & ~TrappingSupportedWithGroups;
+ if (InvalidValues && DiagnoseErrors) {
SanitizerSet S;
S.Mask = InvalidValues;
D.Diag(diag::err_drv_unsupported_option_argument) << "-fsanitize-trap"
@@ -230,7 +238,8 @@ static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
TrappingKinds |= expandSanitizerGroups(Add) & ~TrapRemove;
} else if (Arg->getOption().matches(options::OPT_fno_sanitize_trap_EQ)) {
Arg->claim();
- TrapRemove |= expandSanitizerGroups(parseArgValues(D, Arg, true));
+ TrapRemove |=
+ expandSanitizerGroups(parseArgValues(D, Arg, DiagnoseErrors));
}
}
@@ -278,7 +287,8 @@ bool SanitizerArgs::needsLTO() const {
}
SanitizerArgs::SanitizerArgs(const ToolChain &TC,
- const llvm::opt::ArgList &Args) {
+ const llvm::opt::ArgList &Args,
+ bool DiagnoseErrors) {
SanitizerMask AllRemove; // During the loop below, the accumulated set of
// sanitizers disabled by the current sanitizer
// argument or any argument after it.
@@ -298,7 +308,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
const Driver &D = TC.getDriver();
- SanitizerMask TrappingKinds = parseSanitizeTrapArgs(D, Args);
+ SanitizerMask TrappingKinds = parseSanitizeTrapArgs(D, Args, DiagnoseErrors);
SanitizerMask InvalidTrappingKinds = TrappingKinds & NotAllowedWithTrap;
MinimalRuntime =
@@ -310,19 +320,17 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
bool RemoveObjectSizeAtO0 =
!OptLevel || OptLevel->getOption().matches(options::OPT_O0);
- for (ArgList::const_reverse_iterator I = Args.rbegin(), E = Args.rend();
- I != E; ++I) {
- const auto *Arg = *I;
+ for (const llvm::opt::Arg *Arg : llvm::reverse(Args)) {
if (Arg->getOption().matches(options::OPT_fsanitize_EQ)) {
Arg->claim();
- SanitizerMask Add = parseArgValues(D, Arg, /*AllowGroups=*/true);
+ SanitizerMask Add = parseArgValues(D, Arg, DiagnoseErrors);
if (RemoveObjectSizeAtO0) {
AllRemove |= SanitizerKind::ObjectSize;
// The user explicitly enabled the object size sanitizer. Warn
// that this does nothing at -O0.
- if (Add & SanitizerKind::ObjectSize)
+ if ((Add & SanitizerKind::ObjectSize) && DiagnoseErrors)
D.Diag(diag::warn_drv_object_size_disabled_O0)
<< Arg->getAsString(Args);
}
@@ -336,9 +344,11 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
// Diagnose them.
if (SanitizerMask KindsToDiagnose =
Add & InvalidTrappingKinds & ~DiagnosedKinds) {
- std::string Desc = describeSanitizeArg(*I, KindsToDiagnose);
- D.Diag(diag::err_drv_argument_not_allowed_with)
- << Desc << "-fsanitize-trap=undefined";
+ if (DiagnoseErrors) {
+ std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose);
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << Desc << "-fsanitize-trap=undefined";
+ }
DiagnosedKinds |= KindsToDiagnose;
}
Add &= ~InvalidTrappingKinds;
@@ -346,9 +356,11 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
if (MinimalRuntime) {
if (SanitizerMask KindsToDiagnose =
Add & NotAllowedWithMinimalRuntime & ~DiagnosedKinds) {
- std::string Desc = describeSanitizeArg(*I, KindsToDiagnose);
- D.Diag(diag::err_drv_argument_not_allowed_with)
- << Desc << "-fsanitize-minimal-runtime";
+ if (DiagnoseErrors) {
+ std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose);
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << Desc << "-fsanitize-minimal-runtime";
+ }
DiagnosedKinds |= KindsToDiagnose;
}
Add &= ~NotAllowedWithMinimalRuntime;
@@ -365,17 +377,20 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
// Fixing both of those may require changes to the cross-DSO CFI
// interface.
if (CfiCrossDso && (Add & SanitizerKind::CFIMFCall & ~DiagnosedKinds)) {
- D.Diag(diag::err_drv_argument_not_allowed_with)
- << "-fsanitize=cfi-mfcall"
- << "-fsanitize-cfi-cross-dso";
+ if (DiagnoseErrors)
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << "-fsanitize=cfi-mfcall"
+ << "-fsanitize-cfi-cross-dso";
Add &= ~SanitizerKind::CFIMFCall;
DiagnosedKinds |= SanitizerKind::CFIMFCall;
}
if (SanitizerMask KindsToDiagnose = Add & ~Supported & ~DiagnosedKinds) {
- std::string Desc = describeSanitizeArg(*I, KindsToDiagnose);
- D.Diag(diag::err_drv_unsupported_opt_for_target)
- << Desc << TC.getTriple().str();
+ if (DiagnoseErrors) {
+ std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose);
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << Desc << TC.getTriple().str();
+ }
DiagnosedKinds |= KindsToDiagnose;
}
Add &= Supported;
@@ -389,12 +404,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
"RTTI disabled without -fno-rtti option?");
// The user explicitly passed -fno-rtti with -fsanitize=vptr, but
// the vptr sanitizer requires RTTI, so this is a user error.
- D.Diag(diag::err_drv_argument_not_allowed_with)
- << "-fsanitize=vptr" << NoRTTIArg->getAsString(Args);
+ if (DiagnoseErrors)
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << "-fsanitize=vptr" << NoRTTIArg->getAsString(Args);
} else {
// The vptr sanitizer requires RTTI, but RTTI is disabled (by
// default). Warn that the vptr sanitizer is being disabled.
- D.Diag(diag::warn_drv_disabling_vptr_no_rtti_default);
+ if (DiagnoseErrors)
+ D.Diag(diag::warn_drv_disabling_vptr_no_rtti_default);
}
// Take out the Vptr sanitizer from the enabled sanitizers
@@ -429,7 +446,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
Kinds |= Add;
} else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) {
Arg->claim();
- SanitizerMask Remove = parseArgValues(D, Arg, true);
+ SanitizerMask Remove = parseArgValues(D, Arg, DiagnoseErrors);
AllRemove |= expandSanitizerGroups(Remove);
}
}
@@ -490,7 +507,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
}
// Check that LTO is enabled if we need it.
- if ((Kinds & NeedsLTO) && !D.isUsingLTO()) {
+ if ((Kinds & NeedsLTO) && !D.isUsingLTO() && DiagnoseErrors) {
D.Diag(diag::err_drv_argument_only_allowed_with)
<< lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
}
@@ -499,7 +516,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
((TC.getTriple().isAArch64() &&
!llvm::AArch64::isX18ReservedByDefault(TC.getTriple())) ||
TC.getTriple().isRISCV()) &&
- !Args.hasArg(options::OPT_ffixed_x18)) {
+ !Args.hasArg(options::OPT_ffixed_x18) && DiagnoseErrors) {
D.Diag(diag::err_drv_argument_only_allowed_with)
<< lastArgumentForMask(D, Args, Kinds & SanitizerKind::ShadowCallStack)
<< "-ffixed-x18";
@@ -518,8 +535,9 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
if (KindsToDiagnose) {
SanitizerSet S;
S.Mask = KindsToDiagnose;
- D.Diag(diag::err_drv_unsupported_opt_for_target)
- << ("-fno-sanitize-trap=" + toString(S)) << TC.getTriple().str();
+ if (DiagnoseErrors)
+ D.Diag(diag::err_drv_unsupported_opt_for_target)
+ << ("-fno-sanitize-trap=" + toString(S)) << TC.getTriple().str();
Kinds &= ~KindsToDiagnose;
}
}
@@ -529,9 +547,10 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
SanitizerMask Group = G.first;
if (Kinds & Group) {
if (SanitizerMask Incompatible = Kinds & G.second) {
- D.Diag(clang::diag::err_drv_argument_not_allowed_with)
- << lastArgumentForMask(D, Args, Group)
- << lastArgumentForMask(D, Args, Incompatible);
+ if (DiagnoseErrors)
+ D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+ << lastArgumentForMask(D, Args, Group)
+ << lastArgumentForMask(D, Args, Incompatible);
Kinds &= ~Incompatible;
}
}
@@ -547,29 +566,31 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
SanitizerMask DiagnosedAlwaysRecoverableKinds;
for (const auto *Arg : Args) {
if (Arg->getOption().matches(options::OPT_fsanitize_recover_EQ)) {
- SanitizerMask Add = parseArgValues(D, Arg, true);
+ SanitizerMask Add = parseArgValues(D, Arg, DiagnoseErrors);
// Report error if user explicitly tries to recover from unrecoverable
// sanitizer.
if (SanitizerMask KindsToDiagnose =
Add & Unrecoverable & ~DiagnosedUnrecoverableKinds) {
SanitizerSet SetToDiagnose;
SetToDiagnose.Mask |= KindsToDiagnose;
- D.Diag(diag::err_drv_unsupported_option_argument)
- << Arg->getOption().getName() << toString(SetToDiagnose);
+ if (DiagnoseErrors)
+ D.Diag(diag::err_drv_unsupported_option_argument)
+ << Arg->getOption().getName() << toString(SetToDiagnose);
DiagnosedUnrecoverableKinds |= KindsToDiagnose;
}
RecoverableKinds |= expandSanitizerGroups(Add);
Arg->claim();
} else if (Arg->getOption().matches(options::OPT_fno_sanitize_recover_EQ)) {
- SanitizerMask Remove = parseArgValues(D, Arg, true);
+ SanitizerMask Remove = parseArgValues(D, Arg, DiagnoseErrors);
// Report error if user explicitly tries to disable recovery from
// always recoverable sanitizer.
if (SanitizerMask KindsToDiagnose =
Remove & AlwaysRecoverable & ~DiagnosedAlwaysRecoverableKinds) {
SanitizerSet SetToDiagnose;
SetToDiagnose.Mask |= KindsToDiagnose;
- D.Diag(diag::err_drv_unsupported_option_argument)
- << Arg->getOption().getName() << toString(SetToDiagnose);
+ if (DiagnoseErrors)
+ D.Diag(diag::err_drv_unsupported_option_argument)
+ << Arg->getOption().getName() << toString(SetToDiagnose);
DiagnosedAlwaysRecoverableKinds |= KindsToDiagnose;
}
RecoverableKinds &= ~expandSanitizerGroups(Remove);
@@ -586,14 +607,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
// Add default ignorelist from resource directory for activated sanitizers,
// and validate special case lists format.
if (!Args.hasArgNoClaim(options::OPT_fno_sanitize_ignorelist))
- addDefaultIgnorelists(D, Kinds, SystemIgnorelistFiles);
+ addDefaultIgnorelists(D, Kinds, SystemIgnorelistFiles, DiagnoseErrors);
// Parse -f(no-)?sanitize-ignorelist options.
// This also validates special case lists format.
- parseSpecialCaseListArg(D, Args, UserIgnorelistFiles,
- options::OPT_fsanitize_ignorelist_EQ,
- options::OPT_fno_sanitize_ignorelist,
- clang::diag::err_drv_malformed_sanitizer_ignorelist);
+ parseSpecialCaseListArg(
+ D, Args, UserIgnorelistFiles, options::OPT_fsanitize_ignorelist_EQ,
+ options::OPT_fno_sanitize_ignorelist,
+ clang::diag::err_drv_malformed_sanitizer_ignorelist, DiagnoseErrors);
// Parse -f[no-]sanitize-memory-track-origins[=level] options.
if (AllAddedKinds & SanitizerKind::Memory) {
@@ -610,7 +631,9 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
StringRef S = A->getValue();
if (S.getAsInteger(0, MsanTrackOrigins) || MsanTrackOrigins < 0 ||
MsanTrackOrigins > 2) {
- D.Diag(clang::diag::err_drv_invalid_value) << A->getAsString(Args) << S;
+ if (DiagnoseErrors)
+ D.Diag(clang::diag::err_drv_invalid_value)
+ << A->getAsString(Args) << S;
}
}
}
@@ -643,7 +666,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
CfiICallGeneralizePointers =
Args.hasArg(options::OPT_fsanitize_cfi_icall_generalize_pointers);
- if (CfiCrossDso && CfiICallGeneralizePointers)
+ if (CfiCrossDso && CfiICallGeneralizePointers && DiagnoseErrors)
D.Diag(diag::err_drv_argument_not_allowed_with)
<< "-fsanitize-cfi-cross-dso"
<< "-fsanitize-cfi-icall-generalize-pointers";
@@ -659,13 +682,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
if (MinimalRuntime) {
SanitizerMask IncompatibleMask =
Kinds & ~setGroupBits(CompatibleWithMinimalRuntime);
- if (IncompatibleMask)
+ if (IncompatibleMask && DiagnoseErrors)
D.Diag(clang::diag::err_drv_argument_not_allowed_with)
<< "-fsanitize-minimal-runtime"
<< lastArgumentForMask(D, Args, IncompatibleMask);
SanitizerMask NonTrappingCfi = Kinds & SanitizerKind::CFI & ~TrappingKinds;
- if (NonTrappingCfi)
+ if (NonTrappingCfi && DiagnoseErrors)
D.Diag(clang::diag::err_drv_argument_only_allowed_with)
<< "fsanitize-minimal-runtime"
<< "fsanitize-trap=cfi";
@@ -681,13 +704,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
.getAsInteger(0, LegacySanitizeCoverage)) {
CoverageFeatures = 0;
Arg->claim();
- if (LegacySanitizeCoverage != 0) {
+ if (LegacySanitizeCoverage != 0 && DiagnoseErrors) {
D.Diag(diag::warn_drv_deprecated_arg)
<< Arg->getAsString(Args) << "-fsanitize-coverage=trace-pc-guard";
}
continue;
}
- CoverageFeatures |= parseCoverageFeatures(D, Arg);
+ CoverageFeatures |= parseCoverageFeatures(D, Arg, DiagnoseErrors);
// Disable coverage and not claim the flags if there is at least one
// non-supporting sanitizer.
@@ -698,39 +721,41 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
}
} else if (Arg->getOption().matches(options::OPT_fno_sanitize_coverage)) {
Arg->claim();
- CoverageFeatures &= ~parseCoverageFeatures(D, Arg);
+ CoverageFeatures &= ~parseCoverageFeatures(D, Arg, DiagnoseErrors);
}
}
// Choose at most one coverage type: function, bb, or edge.
- if ((CoverageFeatures & CoverageFunc) && (CoverageFeatures & CoverageBB))
- D.Diag(clang::diag::err_drv_argument_not_allowed_with)
- << "-fsanitize-coverage=func"
- << "-fsanitize-coverage=bb";
- if ((CoverageFeatures & CoverageFunc) && (CoverageFeatures & CoverageEdge))
- D.Diag(clang::diag::err_drv_argument_not_allowed_with)
- << "-fsanitize-coverage=func"
- << "-fsanitize-coverage=edge";
- if ((CoverageFeatures & CoverageBB) && (CoverageFeatures & CoverageEdge))
- D.Diag(clang::diag::err_drv_argument_not_allowed_with)
- << "-fsanitize-coverage=bb"
- << "-fsanitize-coverage=edge";
- // Basic block tracing and 8-bit counters require some type of coverage
- // enabled.
- if (CoverageFeatures & CoverageTraceBB)
- D.Diag(clang::diag::warn_drv_deprecated_arg)
- << "-fsanitize-coverage=trace-bb"
- << "-fsanitize-coverage=trace-pc-guard";
- if (CoverageFeatures & Coverage8bitCounters)
- D.Diag(clang::diag::warn_drv_deprecated_arg)
- << "-fsanitize-coverage=8bit-counters"
- << "-fsanitize-coverage=trace-pc-guard";
+ if (DiagnoseErrors) {
+ if ((CoverageFeatures & CoverageFunc) && (CoverageFeatures & CoverageBB))
+ D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+ << "-fsanitize-coverage=func"
+ << "-fsanitize-coverage=bb";
+ if ((CoverageFeatures & CoverageFunc) && (CoverageFeatures & CoverageEdge))
+ D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+ << "-fsanitize-coverage=func"
+ << "-fsanitize-coverage=edge";
+ if ((CoverageFeatures & CoverageBB) && (CoverageFeatures & CoverageEdge))
+ D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+ << "-fsanitize-coverage=bb"
+ << "-fsanitize-coverage=edge";
+ // Basic block tracing and 8-bit counters require some type of coverage
+ // enabled.
+ if (CoverageFeatures & CoverageTraceBB)
+ D.Diag(clang::diag::warn_drv_deprecated_arg)
+ << "-fsanitize-coverage=trace-bb"
+ << "-fsanitize-coverage=trace-pc-guard";
+ if (CoverageFeatures & Coverage8bitCounters)
+ D.Diag(clang::diag::warn_drv_deprecated_arg)
+ << "-fsanitize-coverage=8bit-counters"
+ << "-fsanitize-coverage=trace-pc-guard";
+ }
int InsertionPointTypes = CoverageFunc | CoverageBB | CoverageEdge;
int InstrumentationTypes = CoverageTracePC | CoverageTracePCGuard |
- CoverageInline8bitCounters |
- CoverageInlineBoolFlag;
+ CoverageInline8bitCounters | CoverageTraceLoads |
+ CoverageTraceStores | CoverageInlineBoolFlag;
if ((CoverageFeatures & InsertionPointTypes) &&
- !(CoverageFeatures & InstrumentationTypes)) {
+ !(CoverageFeatures & InstrumentationTypes) && DiagnoseErrors) {
D.Diag(clang::diag::warn_drv_deprecated_arg)
<< "-fsanitize-coverage=[func|bb|edge]"
<< "-fsanitize-coverage=[func|bb|edge],[trace-pc-guard|trace-pc]";
@@ -755,11 +780,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
parseSpecialCaseListArg(
D, Args, CoverageAllowlistFiles,
options::OPT_fsanitize_coverage_allowlist, OptSpecifier(),
- clang::diag::err_drv_malformed_sanitizer_coverage_whitelist);
+ clang::diag::err_drv_malformed_sanitizer_coverage_allowlist,
+ DiagnoseErrors);
parseSpecialCaseListArg(
D, Args, CoverageIgnorelistFiles,
options::OPT_fsanitize_coverage_ignorelist, OptSpecifier(),
- clang::diag::err_drv_malformed_sanitizer_coverage_ignorelist);
+ clang::diag::err_drv_malformed_sanitizer_coverage_ignorelist,
+ DiagnoseErrors);
}
SharedRuntime =
@@ -775,8 +802,9 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
Args.getLastArg(options::OPT_fsanitize_address_field_padding)) {
StringRef S = A->getValue();
// Legal values are 0 and 1, 2, but in future we may add more levels.
- if (S.getAsInteger(0, AsanFieldPadding) || AsanFieldPadding < 0 ||
- AsanFieldPadding > 2) {
+ if ((S.getAsInteger(0, AsanFieldPadding) || AsanFieldPadding < 0 ||
+ AsanFieldPadding > 2) &&
+ DiagnoseErrors) {
D.Diag(clang::diag::err_drv_invalid_value) << A->getAsString(Args) << S;
}
}
@@ -789,10 +817,12 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
case options::OPT__SLASH_MTd:
case options::OPT__SLASH_MDd:
case options::OPT__SLASH_LDd:
- D.Diag(clang::diag::err_drv_argument_not_allowed_with)
- << WindowsDebugRTArg->getAsString(Args)
- << lastArgumentForMask(D, Args, SanitizerKind::Address);
- D.Diag(clang::diag::note_drv_address_sanitizer_debug_runtime);
+ if (DiagnoseErrors) {
+ D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+ << WindowsDebugRTArg->getAsString(Args)
+ << lastArgumentForMask(D, Args, SanitizerKind::Address);
+ D.Diag(clang::diag::note_drv_address_sanitizer_debug_runtime);
+ }
}
}
@@ -840,7 +870,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
if (const auto *Arg =
Args.getLastArg(options::OPT_sanitize_address_destructor_EQ)) {
auto parsedAsanDtorKind = AsanDtorKindFromString(Arg->getValue());
- if (parsedAsanDtorKind == llvm::AsanDtorKind::Invalid) {
+ if (parsedAsanDtorKind == llvm::AsanDtorKind::Invalid && DiagnoseErrors) {
TC.getDriver().Diag(clang::diag::err_drv_unsupported_option_argument)
<< Arg->getOption().getName() << Arg->getValue();
}
@@ -852,7 +882,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
auto parsedAsanUseAfterReturn =
AsanDetectStackUseAfterReturnModeFromString(Arg->getValue());
if (parsedAsanUseAfterReturn ==
- llvm::AsanDetectStackUseAfterReturnMode::Invalid) {
+ llvm::AsanDetectStackUseAfterReturnMode::Invalid &&
+ DiagnoseErrors) {
TC.getDriver().Diag(clang::diag::err_drv_unsupported_option_argument)
<< Arg->getOption().getName() << Arg->getValue();
}
@@ -864,7 +895,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
// -fsanitize=pointer-compare/pointer-subtract requires -fsanitize=address.
SanitizerMask DetectInvalidPointerPairs =
SanitizerKind::PointerCompare | SanitizerKind::PointerSubtract;
- if (AllAddedKinds & DetectInvalidPointerPairs & ~AllRemove) {
+ if ((AllAddedKinds & DetectInvalidPointerPairs & ~AllRemove) &&
+ DiagnoseErrors) {
TC.getDriver().Diag(clang::diag::err_drv_argument_only_allowed_with)
<< lastArgumentForMask(D, Args,
SanitizerKind::PointerCompare |
@@ -877,7 +909,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
if (Arg *HwasanAbiArg =
Args.getLastArg(options::OPT_fsanitize_hwaddress_abi_EQ)) {
HwasanAbi = HwasanAbiArg->getValue();
- if (HwasanAbi != "platform" && HwasanAbi != "interceptor")
+ if (HwasanAbi != "platform" && HwasanAbi != "interceptor" &&
+ DiagnoseErrors)
D.Diag(clang::diag::err_drv_invalid_value)
<< HwasanAbiArg->getAsString(Args) << HwasanAbi;
} else {
@@ -977,8 +1010,8 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
// AMDGPU sanitizer support is experimental and controlled by -fgpu-sanitize.
if (TC.getTriple().isNVPTX() ||
(TC.getTriple().isAMDGPU() &&
- !Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
- false)))
+ !Args.hasFlag(options::OPT_fgpu_sanitize,
+ options::OPT_fno_gpu_sanitize)))
return;
// Translate available CoverageFeatures to corresponding clang-cc1 flags.
@@ -1003,7 +1036,9 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
"-fsanitize-coverage-inline-bool-flag"),
std::make_pair(CoveragePCTable, "-fsanitize-coverage-pc-table"),
std::make_pair(CoverageNoPrune, "-fsanitize-coverage-no-prune"),
- std::make_pair(CoverageStackDepth, "-fsanitize-coverage-stack-depth")};
+ std::make_pair(CoverageStackDepth, "-fsanitize-coverage-stack-depth"),
+ std::make_pair(CoverageTraceLoads, "-fsanitize-coverage-trace-loads"),
+ std::make_pair(CoverageTraceStores, "-fsanitize-coverage-trace-stores")};
for (auto F : CoverageFlags) {
if (CoverageFeatures & F.first)
CmdArgs.push_back(F.second);
@@ -1146,7 +1181,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
CmdArgs.push_back(Args.MakeArgString("hwasan-abi=" + HwasanAbi));
}
- if (Sanitizers.has(SanitizerKind::HWAddress) && TC.getTriple().isAArch64()) {
+ if (Sanitizers.has(SanitizerKind::HWAddress) && !HwasanUseAliases) {
CmdArgs.push_back("-target-feature");
CmdArgs.push_back("+tagged-globals");
}
@@ -1220,7 +1255,8 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
return Kinds;
}
-int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A) {
+int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A,
+ bool DiagnoseErrors) {
assert(A->getOption().matches(options::OPT_fsanitize_coverage) ||
A->getOption().matches(options::OPT_fno_sanitize_coverage));
int Features = 0;
@@ -1243,8 +1279,10 @@ int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A) {
.Case("inline-bool-flag", CoverageInlineBoolFlag)
.Case("pc-table", CoveragePCTable)
.Case("stack-depth", CoverageStackDepth)
+ .Case("trace-loads", CoverageTraceLoads)
+ .Case("trace-stores", CoverageTraceStores)
.Default(0);
- if (F == 0)
+ if (F == 0 && DiagnoseErrors)
D.Diag(clang::diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
Features |= F;
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 6c1b88141c45..6588cdf9fecd 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -9,8 +9,8 @@
#include "clang/Driver/ToolChain.h"
#include "ToolChains/Arch/ARM.h"
#include "ToolChains/Clang.h"
-#include "ToolChains/InterfaceStubs.h"
#include "ToolChains/Flang.h"
+#include "ToolChains/InterfaceStubs.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Sanitizers.h"
#include "clang/Config/config.h"
@@ -29,6 +29,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
@@ -37,7 +38,6 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <cassert>
@@ -110,14 +110,11 @@ bool ToolChain::useRelaxRelocations() const {
return ENABLE_X86_RELAX_RELOCATIONS;
}
-bool ToolChain::isNoExecStackDefault() const {
- return false;
-}
-
-const SanitizerArgs& ToolChain::getSanitizerArgs() const {
- if (!SanitizerArguments.get())
- SanitizerArguments.reset(new SanitizerArgs(*this, Args));
- return *SanitizerArguments.get();
+SanitizerArgs
+ToolChain::getSanitizerArgs(const llvm::opt::ArgList &JobArgs) const {
+ SanitizerArgs SanArgs(*this, JobArgs, !SanitizerArgsChecked);
+ SanitizerArgsChecked = true;
+ return SanArgs;
}
const XRayArgs& ToolChain::getXRayArgs() const {
@@ -169,10 +166,11 @@ static const DriverSuffix *FindDriverSuffix(StringRef ProgName, size_t &Pos) {
/// present and lower-casing the string on Windows.
static std::string normalizeProgramName(llvm::StringRef Argv0) {
std::string ProgName = std::string(llvm::sys::path::stem(Argv0));
-#ifdef _WIN32
- // Transform to lowercase for case insensitive file systems.
- std::transform(ProgName.begin(), ProgName.end(), ProgName.begin(), ::tolower);
-#endif
+ if (is_style_windows(llvm::sys::path::Style::native)) {
+ // Transform to lowercase for case insensitive file systems.
+ std::transform(ProgName.begin(), ProgName.end(), ProgName.begin(),
+ ::tolower);
+ }
return ProgName;
}
@@ -583,7 +581,7 @@ std::string ToolChain::GetLinkerPath(bool *LinkerIsLLD,
// for the linker flavor is brittle. In addition, prepending "ld." or "ld64."
// to a relative path is surprising. This is more complex due to priorities
// among -B, COMPILER_PATH and PATH. --ld-path= should be used instead.
- if (UseLinker.find('/') != StringRef::npos)
+ if (UseLinker.contains('/'))
getDriver().Diag(diag::warn_drv_fuse_ld_path);
if (llvm::sys::path::is_absolute(UseLinker)) {
@@ -618,6 +616,8 @@ std::string ToolChain::GetLinkerPath(bool *LinkerIsLLD,
std::string ToolChain::GetStaticLibToolPath() const {
// TODO: Add support for static lib archiving on Windows
+ if (Triple.isOSDarwin())
+ return GetProgramPath("libtool");
return GetProgramPath("llvm-ar");
}
@@ -1024,7 +1024,7 @@ void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
void ToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {}
-llvm::SmallVector<std::string, 12>
+llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
ToolChain::getHIPDeviceLibs(const ArgList &DriverArgs) const {
return {};
}
diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp
index 3000b8416adf..e4bbf498b9cd 100644
--- a/clang/lib/Driver/ToolChains/AIX.cpp
+++ b/clang/lib/Driver/ToolChains/AIX.cpp
@@ -98,6 +98,26 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-bnoentry");
}
+ // Specify PGO linker option without LTO
+ if (!D.isUsingLTO() &&
+ (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
+ false) ||
+ Args.hasFlag(options::OPT_fprofile_generate,
+ options::OPT_fno_profile_generate, false) ||
+ Args.hasFlag(options::OPT_fprofile_generate_EQ,
+ options::OPT_fno_profile_generate, false) ||
+ Args.hasFlag(options::OPT_fprofile_instr_generate,
+ options::OPT_fno_profile_instr_generate, false) ||
+ Args.hasFlag(options::OPT_fprofile_instr_generate_EQ,
+ options::OPT_fno_profile_instr_generate, false) ||
+ Args.hasFlag(options::OPT_fcs_profile_generate,
+ options::OPT_fno_profile_generate, false) ||
+ Args.hasFlag(options::OPT_fcs_profile_generate_EQ,
+ options::OPT_fno_profile_generate, false) ||
+ Args.hasArg(options::OPT_fcreate_profile) ||
+ Args.hasArg(options::OPT_coverage)))
+ CmdArgs.push_back("-bdbg:namedsects");
+
// Specify linker output file.
assert((Output.isFilename() || Output.isNothing()) && "Invalid output.");
if (Output.isFilename()) {
@@ -218,15 +238,44 @@ void AIX::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
addSystemInclude(DriverArgs, CC1Args, UP.str());
}
+void AIX::AddClangCXXStdlibIncludeArgs(
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const {
+
+ if (DriverArgs.hasArg(options::OPT_nostdinc) ||
+ DriverArgs.hasArg(options::OPT_nostdincxx) ||
+ DriverArgs.hasArg(options::OPT_nostdlibinc))
+ return;
+
+ switch (GetCXXStdlibType(DriverArgs)) {
+ case ToolChain::CST_Libstdcxx:
+ llvm::report_fatal_error(
+ "picking up libstdc++ headers is unimplemented on AIX");
+ case ToolChain::CST_Libcxx: {
+ llvm::StringRef Sysroot = GetHeaderSysroot(DriverArgs);
+ SmallString<128> PathCPP(Sysroot);
+ llvm::sys::path::append(PathCPP, "opt/IBM/openxlCSDK", "include", "c++",
+ "v1");
+ addSystemInclude(DriverArgs, CC1Args, PathCPP.str());
+ // Required in order to suppress conflicting C++ overloads in the system
+ // libc headers that were used by XL C++.
+ CC1Args.push_back("-D__LIBC_NO_CPP_MATH_OVERLOADS__");
+ return;
+ }
+ }
+
+ llvm_unreachable("Unexpected C++ library type; only libc++ is supported.");
+}
+
void AIX::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const {
switch (GetCXXStdlibType(Args)) {
+ case ToolChain::CST_Libstdcxx:
+ llvm::report_fatal_error("linking libstdc++ unimplemented on AIX");
case ToolChain::CST_Libcxx:
CmdArgs.push_back("-lc++");
CmdArgs.push_back("-lc++abi");
return;
- case ToolChain::CST_Libstdcxx:
- llvm::report_fatal_error("linking libstdc++ unimplemented on AIX");
}
llvm_unreachable("Unexpected C++ library type; only libc++ is supported.");
diff --git a/clang/lib/Driver/ToolChains/AIX.h b/clang/lib/Driver/ToolChains/AIX.h
index d1ec6d10fb3a..e7ec3a5ece4d 100644
--- a/clang/lib/Driver/ToolChains/AIX.h
+++ b/clang/lib/Driver/ToolChains/AIX.h
@@ -63,13 +63,19 @@ public:
return ParseInlineAsmUsingAsmParser;
}
bool isPICDefault() const override { return true; }
- bool isPIEDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return false;
+ }
bool isPICDefaultForced() const override { return true; }
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
+ void AddClangCXXStdlibIncludeArgs(
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const override;
+
void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const override;
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index d63c5e12c4af..b5eaf1adca6b 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -754,7 +754,7 @@ AMDGPUToolChain::detectSystemGPUs(const ArgList &Args,
std::string ErrorMessage;
if (int Result = llvm::sys::ExecuteAndWait(
- Program.c_str(), {}, {}, Redirects, /* SecondsToWait */ 0,
+ Program, {}, {}, Redirects, /* SecondsToWait */ 0,
/*MemoryLimit*/ 0, &ErrorMessage)) {
if (Result > 0) {
ErrorMessage = "Exited with error code " + std::to_string(Result);
@@ -796,9 +796,9 @@ llvm::Error AMDGPUToolChain::getSystemGPUArch(const ArgList &Args,
}
GPUArch = GPUArchs[0];
if (GPUArchs.size() > 1) {
- bool AllSame = std::all_of(
- GPUArchs.begin(), GPUArchs.end(),
- [&](const StringRef &GPUArch) { return GPUArch == GPUArchs.front(); });
+ bool AllSame = llvm::all_of(GPUArchs, [&](const StringRef &GPUArch) {
+ return GPUArch == GPUArchs.front();
+ });
if (!AllSame)
return llvm::createStringError(
std::error_code(), "Multiple AMD GPUs found with different archs");
@@ -893,3 +893,38 @@ bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
return true;
return false;
}
+
+llvm::SmallVector<std::string, 12>
+ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
+ const std::string &GPUArch) const {
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
+ const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
+
+ std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
+ if (LibDeviceFile.empty()) {
+ getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GPUArch;
+ return {};
+ }
+
+ // If --hip-device-lib is not set, add the default bitcode libraries.
+ // TODO: There are way too many flags that change this. Do we need to check
+ // them all?
+ bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+ options::OPT_fno_gpu_flush_denormals_to_zero,
+ getDefaultDenormsAreZeroForTarget(Kind));
+ bool FiniteOnly = DriverArgs.hasFlag(
+ options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
+ bool UnsafeMathOpt =
+ DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
+ options::OPT_fno_unsafe_math_optimizations, false);
+ bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
+ options::OPT_fno_fast_math, false);
+ bool CorrectSqrt = DriverArgs.hasFlag(
+ options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
+ options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
+ bool Wave64 = isWave64(DriverArgs, Kind);
+
+ return RocmInstallation.getCommonBitcodeLibs(
+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
+ FastRelaxedMath, CorrectSqrt);
+}
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 50ed3b3ded9a..156bfd1fbdb2 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -51,7 +51,7 @@ protected:
const std::map<options::ID, const StringRef> OptionsDefault;
Tool *buildLinker() const override;
- const StringRef getOptionDefault(options::ID OptID) const {
+ StringRef getOptionDefault(options::ID OptID) const {
auto opt = OptionsDefault.find(OptID);
assert(opt != OptionsDefault.end() && "No Default for Option");
return opt->second;
@@ -60,14 +60,16 @@ protected:
public:
AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
- unsigned GetDefaultDwarfVersion() const override { return 4; }
+ unsigned GetDefaultDwarfVersion() const override { return 5; }
bool IsIntegratedAssemblerDefault() const override { return true; }
bool IsMathErrnoDefault() const override { return false; }
bool useIntegratedAs() const override { return true; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
- bool isPIEDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return false;
+ }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
@@ -136,6 +138,11 @@ public:
addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadKind) const override;
+
+ // Returns a list of device library names shared by different languages
+ llvm::SmallVector<std::string, 12>
+ getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
+ const std::string &GPUArch) const;
};
} // end namespace toolchains
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index fe1d19c2dd67..863e2c597d53 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -9,12 +9,14 @@
#include "AMDGPUOpenMP.h"
#include "AMDGPU.h"
#include "CommonArgs.h"
+#include "ToolChains/ROCm.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
@@ -84,14 +86,54 @@ static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
} // namespace
const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
- Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
- const ArgList &Args, StringRef SubArchName,
- StringRef OutputFilePrefix) const {
+ const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
+ const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
+ StringRef SubArchName, StringRef OutputFilePrefix) const {
ArgStringList CmdArgs;
for (const auto &II : Inputs)
if (II.isFilename())
CmdArgs.push_back(II.getFilename());
+
+ if (Args.hasArg(options::OPT_l)) {
+ auto Lm = Args.getAllArgValues(options::OPT_l);
+ bool HasLibm = false;
+ for (auto &Lib : Lm) {
+ if (Lib == "m") {
+ HasLibm = true;
+ break;
+ }
+ }
+
+ if (HasLibm) {
+ // This is not certain to work. The device libs added here, and passed to
+ // llvm-link, are missing attributes that they expect to be inserted when
+ // passed to mlink-builtin-bitcode. The amdgpu backend does not generate
+ // conservatively correct code when attributes are missing, so this may
+ // be the root cause of miscompilations. Passing via mlink-builtin-bitcode
+ // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes
+ // on each function, see D28538 for context.
+ // Potential workarounds:
+ // - unconditionally link all of the device libs to every translation
+ // unit in clang via mlink-builtin-bitcode
+ // - build a libm bitcode file as part of the DeviceRTL and explictly
+ // mlink-builtin-bitcode the rocm device libs components at build time
+ // - drop this llvm-link fork in favour or some calls into LLVM, chosen
+ // to do basically the same work as llvm-link but with that call first
+ // - write an opt pass that sets that on every function it sees and pipe
+ // the device-libs bitcode through that on the way to this llvm-link
+ SmallVector<std::string, 12> BCLibs =
+ AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
+ llvm::for_each(BCLibs, [&](StringRef BCFile) {
+ CmdArgs.push_back(Args.MakeArgString(BCFile));
+ });
+ }
+ }
+
+ AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
+ SubArchName,
+ /* bitcode SDL?*/ true,
+ /* PostClang Link? */ false);
// Add an intermediate output file.
CmdArgs.push_back("-o");
const char *OutputFileName =
@@ -180,8 +222,8 @@ void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
assert(Prefix.length() && "no linker inputs are files ");
// Each command outputs different files.
- const char *LLVMLinkCommand =
- constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
+ const char *LLVMLinkCommand = constructLLVMLinkCommand(
+ AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
// Produce readable assembly if save-temps is enabled.
if (C.getDriver().isSaveTempsEnabled())
@@ -226,7 +268,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
std::string BitcodeSuffix;
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
options::OPT_fno_openmp_target_new_runtime, false))
- BitcodeSuffix = "new-amdgcn-" + GPUArch;
+ BitcodeSuffix = "new-amdgpu-" + GPUArch;
else
BitcodeSuffix = "amdgcn-" + GPUArch;
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
index effca7e212cc..233256bf7378 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -16,6 +16,10 @@
namespace clang {
namespace driver {
+namespace toolchains {
+class AMDGPUOpenMPToolChain;
+}
+
namespace tools {
namespace AMDGCN {
@@ -35,11 +39,11 @@ public:
private:
/// \return llvm-link output file name.
- const char *constructLLVMLinkCommand(Compilation &C, const JobAction &JA,
- const InputInfoList &Inputs,
- const llvm::opt::ArgList &Args,
- llvm::StringRef SubArchName,
- llvm::StringRef OutputFilePrefix) const;
+ const char *constructLLVMLinkCommand(
+ const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
+ const JobAction &JA, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
+ llvm::StringRef OutputFilePrefix) const;
/// \return llc output file name.
const char *constructLlcCommand(Compilation &C, const JobAction &JA,
diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp
index f147292038a8..a66cae8b4d6b 100644
--- a/clang/lib/Driver/ToolChains/AVR.cpp
+++ b/clang/lib/Driver/ToolChains/AVR.cpp
@@ -28,9 +28,9 @@ using namespace llvm::opt;
namespace {
-const struct {
+constexpr struct {
StringRef Name;
- std::string SubPath;
+ StringRef SubPath;
StringRef Family;
unsigned DataAddr;
} MCUInfo[] = {
@@ -298,6 +298,7 @@ llvm::Optional<unsigned> GetMCUSectionAddressData(StringRef MCUName) {
}
const StringRef PossibleAVRLibcLocations[] = {
+ "/avr",
"/usr/avr",
"/usr/lib/avr",
};
@@ -314,7 +315,7 @@ AVRToolChain::AVRToolChain(const Driver &D, const llvm::Triple &Triple,
if (!Args.hasArg(options::OPT_nostdlib) &&
!Args.hasArg(options::OPT_nodefaultlibs) &&
!Args.hasArg(options::OPT_c /* does not apply when not linking */)) {
- std::string CPU = getCPUName(Args, Triple);
+ std::string CPU = getCPUName(D, Args, Triple);
if (CPU.empty()) {
// We cannot link any standard libraries without an MCU specified.
@@ -370,6 +371,16 @@ void AVRToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
addSystemInclude(DriverArgs, CC1Args, AVRInc);
}
+void AVRToolChain::addClangTargetOptions(
+ const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadKind) const {
+ // By default, use `.ctors` (not `.init_array`), as required by libgcc, which
+ // runs constructors/destructors on AVR.
+ if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
+ options::OPT_fno_use_init_array, false))
+ CC1Args.push_back("-fno-use-init-array");
+}
+
Tool *AVRToolChain::buildLinker() const {
return new tools::AVR::Linker(getTriple(), *this, LinkStdlib);
}
@@ -378,8 +389,10 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs, const ArgList &Args,
const char *LinkingOutput) const {
+ const Driver &D = getToolChain().getDriver();
+
// Compute information about the target AVR.
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
+ std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
llvm::Optional<StringRef> FamilyName = GetMCUFamilyName(CPU);
llvm::Optional<unsigned> SectionAddressData = GetMCUSectionAddressData(CPU);
@@ -403,9 +416,7 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(DataSectionArg));
} else {
// We do not have an entry for this CPU in the address mapping table yet.
- getToolChain().getDriver().Diag(
- diag::warn_drv_avr_linker_section_addresses_not_implemented)
- << CPU;
+ D.Diag(diag::warn_drv_avr_linker_section_addresses_not_implemented) << CPU;
}
// If the family name is known, we can link with the device-specific libgcc.
@@ -414,6 +425,8 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (LinkStdlib) {
assert(!CPU.empty() && "CPU name must be known in order to link stdlibs");
+ CmdArgs.push_back("--start-group");
+
// Add the object file for the CRT.
std::string CrtFileName = std::string("-l:crt") + CPU + std::string(".o");
CmdArgs.push_back(Args.MakeArgString(CrtFileName));
@@ -425,6 +438,8 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA,
// Add the link library specific to the MCU.
CmdArgs.push_back(Args.MakeArgString(std::string("-l") + CPU));
+ CmdArgs.push_back("--end-group");
+
// Specify the family name as the emulation mode to use.
// This is almost always required because otherwise avr-ld
// will assume 'avr2' and warn about the program being larger
@@ -438,11 +453,21 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
llvm::Optional<std::string> AVRToolChain::findAVRLibcInstallation() const {
+ // Search avr-libc installation according to avr-gcc installation.
+ std::string GCCParent(GCCInstallation.getParentLibPath());
+ std::string Path(GCCParent + "/avr");
+ if (llvm::sys::fs::is_directory(Path))
+ return Path;
+ Path = GCCParent + "/../avr";
+ if (llvm::sys::fs::is_directory(Path))
+ return Path;
+
+ // Search avr-libc installation from possible locations, and return the first
+ // one that exists, if there is no avr-gcc installed.
for (StringRef PossiblePath : PossibleAVRLibcLocations) {
std::string Path = getDriver().SysRoot + PossiblePath.str();
- // Return the first avr-libc installation that exists.
if (llvm::sys::fs::is_directory(Path))
- return Optional<std::string>(Path);
+ return Path;
}
return llvm::None;
diff --git a/clang/lib/Driver/ToolChains/AVR.h b/clang/lib/Driver/ToolChains/AVR.h
index f612aa691182..2d027957ed76 100644
--- a/clang/lib/Driver/ToolChains/AVR.h
+++ b/clang/lib/Driver/ToolChains/AVR.h
@@ -11,8 +11,8 @@
#include "Gnu.h"
#include "clang/Driver/InputInfo.h"
-#include "clang/Driver/ToolChain.h"
#include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
namespace clang {
namespace driver {
@@ -26,6 +26,11 @@ public:
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
+ void
+ addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadKind) const override;
+
protected:
Tool *buildLinker() const override;
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index ed8c7e94b013..0b60d097b9ca 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -79,10 +79,32 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text,
else
return false;
- // +sve implies +f32mm if the base architecture is v8.6A or v8.7A
- // it isn't the case in general that sve implies both f64mm and f32mm
+ if (Feature == "sve2")
+ Features.push_back("+sve");
+ else if (Feature == "sve2-bitperm" || Feature == "sve2-sha3" ||
+ Feature == "sve2-aes" || Feature == "sve2-sm4") {
+ Features.push_back("+sve");
+ Features.push_back("+sve2");
+ } else if (Feature == "nosve") {
+ Features.push_back("-sve2");
+ Features.push_back("-sve2-bitperm");
+ Features.push_back("-sve2-sha3");
+ Features.push_back("-sve2-aes");
+ Features.push_back("-sve2-sm4");
+ } else if (Feature == "nosve2") {
+ Features.push_back("-sve2-bitperm");
+ Features.push_back("-sve2-sha3");
+ Features.push_back("-sve2-aes");
+ Features.push_back("-sve2-sm4");
+ }
+
+ // +sve implies +f32mm if the base architecture is v8.6A, v8.7A, v9.1A or
+ // v9.2A. It isn't the case in general that sve implies both f64mm and f32mm
if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A ||
- ArchKind == llvm::AArch64::ArchKind::ARMV8_7A) && Feature == "sve")
+ ArchKind == llvm::AArch64::ArchKind::ARMV8_7A ||
+ ArchKind == llvm::AArch64::ArchKind::ARMV9_1A ||
+ ArchKind == llvm::AArch64::ArchKind::ARMV9_2A) &&
+ Feature == "sve")
Features.push_back("+f32mm");
}
return true;
@@ -127,8 +149,20 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March,
llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first);
if (ArchKind == llvm::AArch64::ArchKind::INVALID ||
- !llvm::AArch64::getArchFeatures(ArchKind, Features) ||
- (Split.second.size() &&
+ !llvm::AArch64::getArchFeatures(ArchKind, Features))
+ return false;
+
+ // Enable SVE2 by default on Armv9-A.
+ // It can still be disabled if +nosve2 is present.
+ // We must do this early so that DecodeAArch64Features has the correct state
+ if ((ArchKind == llvm::AArch64::ArchKind::ARMV9A ||
+ ArchKind == llvm::AArch64::ArchKind::ARMV9_1A ||
+ ArchKind == llvm::AArch64::ArchKind::ARMV9_2A)) {
+ Features.push_back("+sve");
+ Features.push_back("+sve2");
+ }
+
+ if ((Split.second.size() &&
!DecodeAArch64Features(D, Split.second, Features, ArchKind)))
return false;
@@ -210,6 +244,9 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
else if (Args.hasArg(options::OPT_arch) || isCPUDeterminedByTriple(Triple))
success = getAArch64ArchFeaturesFromMcpu(
D, getAArch64TargetCPU(Args, Triple, A), Args, Features);
+ else
+ // Default to 'A' profile if the architecture is not specified.
+ success = getAArch64ArchFeaturesFromMarch(D, "armv8-a", Args, Features);
if (success && (A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)))
success =
@@ -345,7 +382,10 @@ fp16_fml_fallthrough:
NoCrypto = true;
}
- if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd) {
+ if (std::find(ItBegin, ItEnd, "+v8.4a") != ItEnd ||
+ std::find(ItBegin, ItEnd, "+v9a") != ItEnd ||
+ std::find(ItBegin, ItEnd, "+v9.1a") != ItEnd ||
+ std::find(ItBegin, ItEnd, "+v9.2a") != ItEnd) {
if (HasCrypto && !NoCrypto) {
// Check if we have NOT disabled an algorithm with something like:
// +crypto, -algorithm
@@ -404,9 +444,11 @@ fp16_fml_fallthrough:
}
}
- auto V8_6Pos = llvm::find(Features, "+v8.6a");
- if (V8_6Pos != std::end(Features))
- V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
+ const char *Archs[] = {"+v8.6a", "+v8.7a", "+v9.1a", "+v9.2a"};
+ auto Pos = std::find_first_of(Features.begin(), Features.end(),
+ std::begin(Archs), std::end(Archs));
+ if (Pos != std::end(Features))
+ Pos = Features.insert(std::next(Pos), {"+i8mm", "+bf16"});
if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
options::OPT_munaligned_access)) {
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 4ab547fabe43..21c091e1a0ba 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -148,13 +148,21 @@ bool arm::useAAPCSForMachO(const llvm::Triple &T) {
}
// Select mode for reading thread pointer (-mtp=soft/cp15).
-arm::ReadTPMode arm::getReadTPMode(const Driver &D, const ArgList &Args) {
+arm::ReadTPMode arm::getReadTPMode(const Driver &D, const ArgList &Args,
+ const llvm::Triple &Triple) {
if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) {
arm::ReadTPMode ThreadPointer =
llvm::StringSwitch<arm::ReadTPMode>(A->getValue())
.Case("cp15", ReadTPMode::Cp15)
.Case("soft", ReadTPMode::Soft)
.Default(ReadTPMode::Invalid);
+ if (ThreadPointer == ReadTPMode::Cp15 &&
+ getARMSubArchVersionNumber(Triple) < 7 &&
+ llvm::ARM::parseArch(Triple.getArchName()) !=
+ llvm::ARM::ArchKind::ARMV6T2) {
+ D.Diag(diag::err_target_unsupported_tp_hard) << Triple.getArchName();
+ return ReadTPMode::Invalid;
+ }
if (ThreadPointer != ReadTPMode::Invalid)
return ThreadPointer;
if (StringRef(A->getValue()).empty())
@@ -314,6 +322,10 @@ arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) {
// FIXME: this is invalid for WindowsCE
case llvm::Triple::Win32:
+ // It is incorrect to select hard float ABI on MachO platforms if the ABI is
+ // "apcs-gnu".
+ if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple))
+ return FloatABI::Soft;
return FloatABI::Hard;
case llvm::Triple::NetBSD:
@@ -418,7 +430,7 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
bool KernelOrKext =
Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
arm::FloatABI ABI = arm::getARMFloatABI(D, Triple, Args);
- arm::ReadTPMode ThreadPointer = arm::getReadTPMode(D, Args);
+ arm::ReadTPMode ThreadPointer = arm::getReadTPMode(D, Args, Triple);
llvm::Optional<std::pair<const Arg *, StringRef>> WaCPU, WaFPU, WaHDiv,
WaArch;
@@ -701,6 +713,18 @@ fp16_fml_fallthrough:
if (Args.getLastArg(options::OPT_mcmse))
Features.push_back("+8msecext");
+ if (Arg *A = Args.getLastArg(options::OPT_mfix_cmse_cve_2021_35465,
+ options::OPT_mno_fix_cmse_cve_2021_35465)) {
+ if (!Args.getLastArg(options::OPT_mcmse))
+ D.Diag(diag::err_opt_not_valid_without_opt)
+ << A->getOption().getName() << "-mcmse";
+
+ if (A->getOption().matches(options::OPT_mfix_cmse_cve_2021_35465))
+ Features.push_back("+fix-cmse-cve-2021-35465");
+ else
+ Features.push_back("-fix-cmse-cve-2021-35465");
+ }
+
// Look for the last occurrence of -mlong-calls or -mno-long-calls. If
// neither options are specified, see if we are compiling for kernel/kext and
// decide whether to pass "+long-calls" based on the OS and its version.
@@ -763,7 +787,8 @@ fp16_fml_fallthrough:
// which raises an alignment fault on unaligned accesses. Linux
// defaults this bit to 0 and handles it as a system-wide (not
// per-process) setting. It is therefore safe to assume that ARMv7+
- // Linux targets support unaligned accesses. The same goes for NaCl.
+ // Linux targets support unaligned accesses. The same goes for NaCl
+ // and Windows.
//
// The above behavior is consistent with GCC.
int VersionNum = getARMSubArchVersionNumber(Triple);
@@ -771,7 +796,8 @@ fp16_fml_fallthrough:
if (VersionNum < 6 ||
Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
Features.push_back("+strict-align");
- } else if (Triple.isOSLinux() || Triple.isOSNaCl()) {
+ } else if (Triple.isOSLinux() || Triple.isOSNaCl() ||
+ Triple.isOSWindows()) {
if (VersionNum < 7)
Features.push_back("+strict-align");
} else
@@ -845,7 +871,7 @@ fp16_fml_fallthrough:
}
-const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) {
+std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) {
std::string MArch;
if (!Arch.empty())
MArch = std::string(Arch);
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.h b/clang/lib/Driver/ToolChains/Arch/ARM.h
index 8e7c10ecd5d6..b6fd68fbb9c6 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.h
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.h
@@ -24,7 +24,7 @@ namespace arm {
std::string getARMTargetCPU(StringRef CPU, llvm::StringRef Arch,
const llvm::Triple &Triple);
-const std::string getARMArch(llvm::StringRef Arch, const llvm::Triple &Triple);
+std::string getARMArch(llvm::StringRef Arch, const llvm::Triple &Triple);
StringRef getARMCPUForMArch(llvm::StringRef Arch, const llvm::Triple &Triple);
llvm::ARM::ArchKind getLLVMArchKindForARM(StringRef CPU, StringRef Arch,
const llvm::Triple &Triple);
@@ -53,7 +53,8 @@ FloatABI getARMFloatABI(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
void setFloatABIInTriple(const Driver &D, const llvm::opt::ArgList &Args,
llvm::Triple &triple);
-ReadTPMode getReadTPMode(const Driver &D, const llvm::opt::ArgList &Args);
+ReadTPMode getReadTPMode(const Driver &D, const llvm::opt::ArgList &Args,
+ const llvm::Triple &Triple);
void setArchNameInTriple(const Driver &D, const llvm::opt::ArgList &Args,
types::ID InputType, llvm::Triple &Triple);
diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
index 5a509dbb2bd3..c374d745da38 100644
--- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -441,7 +441,8 @@ bool mips::isUCLibc(const ArgList &Args) {
return A && A->getOption().matches(options::OPT_muclibc);
}
-bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
+bool mips::isNaN2008(const Driver &D, const ArgList &Args,
+ const llvm::Triple &Triple) {
if (Arg *NaNArg = Args.getLastArg(options::OPT_mnan_EQ))
return llvm::StringSwitch<bool>(NaNArg->getValue())
.Case("2008", true)
@@ -449,7 +450,7 @@ bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
.Default(false);
// NaN2008 is the default for MIPS32r6/MIPS64r6.
- return llvm::StringSwitch<bool>(getCPUName(Args, Triple))
+ return llvm::StringSwitch<bool>(getCPUName(D, Args, Triple))
.Cases("mips32r6", "mips64r6", true)
.Default(false);
}
diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.h b/clang/lib/Driver/ToolChains/Arch/Mips.h
index 074012f40fe5..f4c11a7e3188 100644
--- a/clang/lib/Driver/ToolChains/Arch/Mips.h
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.h
@@ -44,7 +44,8 @@ std::string getMipsABILibSuffix(const llvm::opt::ArgList &Args,
const llvm::Triple &Triple);
bool hasMipsAbiArg(const llvm::opt::ArgList &Args, const char *Value);
bool isUCLibc(const llvm::opt::ArgList &Args);
-bool isNaN2008(const llvm::opt::ArgList &Args, const llvm::Triple &Triple);
+bool isNaN2008(const Driver &D, const llvm::opt::ArgList &Args,
+ const llvm::Triple &Triple);
bool isFP64ADefault(const llvm::Triple &Triple, StringRef CPUName);
bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
StringRef ABIName, mips::FloatABI FloatABI);
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index ade93d6881a7..323f588c8269 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -7,462 +7,42 @@
//===----------------------------------------------------------------------===//
#include "RISCV.h"
+#include "ToolChains/CommonArgs.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
-#include "llvm/Option/ArgList.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
-#include "ToolChains/CommonArgs.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
-namespace {
-// Represents the major and version number components of a RISC-V extension
-struct RISCVExtensionVersion {
- StringRef Major;
- StringRef Minor;
-};
-} // end anonymous namespace
-
-static StringRef getExtensionTypeDesc(StringRef Ext) {
- if (Ext.startswith("sx"))
- return "non-standard supervisor-level extension";
- if (Ext.startswith("s"))
- return "standard supervisor-level extension";
- if (Ext.startswith("x"))
- return "non-standard user-level extension";
- if (Ext.startswith("z"))
- return "standard user-level extension";
- return StringRef();
-}
-
-static StringRef getExtensionType(StringRef Ext) {
- if (Ext.startswith("sx"))
- return "sx";
- if (Ext.startswith("s"))
- return "s";
- if (Ext.startswith("x"))
- return "x";
- if (Ext.startswith("z"))
- return "z";
- return StringRef();
-}
-
-// If the extension is supported as experimental, return the version of that
-// extension that the compiler currently supports.
-static Optional<RISCVExtensionVersion>
-isExperimentalExtension(StringRef Ext) {
- if (Ext == "b" || Ext == "zba" || Ext == "zbb" || Ext == "zbc" ||
- Ext == "zbe" || Ext == "zbf" || Ext == "zbm" || Ext == "zbp" ||
- Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc")
- return RISCVExtensionVersion{"0", "93"};
- if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg")
- return RISCVExtensionVersion{"0", "10"};
- if (Ext == "zfh")
- return RISCVExtensionVersion{"0", "1"};
- return None;
-}
-
-static bool isSupportedExtension(StringRef Ext) {
- // LLVM supports "z" extensions which are marked as experimental.
- if (isExperimentalExtension(Ext))
- return true;
-
- // LLVM does not support "sx", "s" nor "x" extensions.
- return false;
-}
-
-// Extensions may have a version number, and may be separated by
-// an underscore '_' e.g.: rv32i2_m2.
-// Version number is divided into major and minor version numbers,
-// separated by a 'p'. If the minor version is 0 then 'p0' can be
-// omitted from the version string. E.g., rv32i2p0, rv32i2, rv32i2p1.
-static bool getExtensionVersion(const Driver &D, const ArgList &Args,
- StringRef MArch, StringRef Ext, StringRef In,
- std::string &Major, std::string &Minor) {
- Major = std::string(In.take_while(isDigit));
- In = In.substr(Major.size());
-
- if (Major.size() && In.consume_front("p")) {
- Minor = std::string(In.take_while(isDigit));
- In = In.substr(Major.size() + 1);
-
- // Expected 'p' to be followed by minor version number.
- if (Minor.empty()) {
- std::string Error =
- "minor version number missing after 'p' for extension";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Ext;
- return false;
- }
- }
-
- // Expected multi-character extension with version number to have no
- // subsequent characters (i.e. must either end string or be followed by
- // an underscore).
- if (Ext.size() > 1 && In.size()) {
- std::string Error =
- "multi-character extensions must be separated by underscores";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name) << MArch << Error << In;
- return false;
- }
-
- // If experimental extension, require use of current version number number
- if (auto ExperimentalExtension = isExperimentalExtension(Ext)) {
- if (!Args.hasArg(options::OPT_menable_experimental_extensions)) {
- std::string Error =
- "requires '-menable-experimental-extensions' for experimental extension";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Ext;
- return false;
- } else if (Major.empty() && Minor.empty()) {
- std::string Error =
- "experimental extension requires explicit version number";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Ext;
- return false;
- }
- auto SupportedVers = *ExperimentalExtension;
- if (Major != SupportedVers.Major || Minor != SupportedVers.Minor) {
- std::string Error =
- "unsupported version number " + Major;
- if (!Minor.empty())
- Error += "." + Minor;
- Error += " for experimental extension (this compiler supports "
- + SupportedVers.Major.str() + "."
- + SupportedVers.Minor.str() + ")";
-
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Ext;
- return false;
- }
- return true;
- }
-
- // Allow extensions to declare no version number
- if (Major.empty() && Minor.empty())
- return true;
-
- // TODO: Handle supported extensions with version number.
- std::string Error = "unsupported version number " + Major;
- if (!Minor.empty())
- Error += "." + Minor;
- Error += " for extension";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name) << MArch << Error << Ext;
-
- return false;
-}
-
-// Handle other types of extensions other than the standard
-// general purpose and standard user-level extensions.
-// Parse the ISA string containing non-standard user-level
-// extensions, standard supervisor-level extensions and
-// non-standard supervisor-level extensions.
-// These extensions start with 'z', 'x', 's', 'sx' prefixes, follow a
-// canonical order, might have a version number (major, minor)
-// and are separated by a single underscore '_'.
-// Set the hardware features for the extensions that are supported.
-static void getExtensionFeatures(const Driver &D,
- const ArgList &Args,
- std::vector<StringRef> &Features,
- StringRef &MArch, StringRef &Exts) {
- if (Exts.empty())
- return;
-
- // Multi-letter extensions are seperated by a single underscore
- // as described in RISC-V User-Level ISA V2.2.
- SmallVector<StringRef, 8> Split;
- Exts.split(Split, StringRef("_"));
-
- SmallVector<StringRef, 4> Prefix{"z", "x", "s", "sx"};
- auto I = Prefix.begin();
- auto E = Prefix.end();
-
- SmallVector<StringRef, 8> AllExts;
-
- for (StringRef Ext : Split) {
- if (Ext.empty()) {
- D.Diag(diag::err_drv_invalid_riscv_arch_name) << MArch
- << "extension name missing after separator '_'";
- return;
- }
-
- StringRef Type = getExtensionType(Ext);
- StringRef Desc = getExtensionTypeDesc(Ext);
- auto Pos = Ext.find_if(isDigit);
- StringRef Name(Ext.substr(0, Pos));
- StringRef Vers(Ext.substr(Pos));
-
- if (Type.empty()) {
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << "invalid extension prefix" << Ext;
- return;
- }
-
- // Check ISA extensions are specified in the canonical order.
- while (I != E && *I != Type)
- ++I;
-
- if (I == E) {
- std::string Error = std::string(Desc);
- Error += " not given in canonical order";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Ext;
- return;
- }
-
- // The order is OK, do not advance I to the next prefix
- // to allow repeated extension type, e.g.: rv32ixabc_xdef.
-
- if (Name.size() == Type.size()) {
- std::string Error = std::string(Desc);
- Error += " name missing after";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Type;
- return;
- }
-
- std::string Major, Minor;
- if (!getExtensionVersion(D, Args, MArch, Name, Vers, Major, Minor))
- return;
-
- // Check if duplicated extension.
- if (llvm::is_contained(AllExts, Name)) {
- std::string Error = "duplicated ";
- Error += Desc;
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Name;
- return;
- }
-
- // Extension format is correct, keep parsing the extensions.
- // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
- AllExts.push_back(Name);
- }
-
- // Set target features.
- // TODO: Hardware features to be handled in Support/TargetParser.cpp.
- // TODO: Use version number when setting target features.
- for (auto Ext : AllExts) {
- if (!isSupportedExtension(Ext)) {
- StringRef Desc = getExtensionTypeDesc(getExtensionType(Ext));
- std::string Error = "unsupported ";
- Error += Desc;
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << Ext;
- return;
- }
- if (Ext == "zvlsseg") {
- Features.push_back("+experimental-v");
- Features.push_back("+experimental-zvlsseg");
- } else if (Ext == "zvamo") {
- Features.push_back("+experimental-v");
- Features.push_back("+experimental-zvlsseg");
- Features.push_back("+experimental-zvamo");
- } else if (isExperimentalExtension(Ext))
- Features.push_back(Args.MakeArgString("+experimental-" + Ext));
- else
- Features.push_back(Args.MakeArgString("+" + Ext));
- }
-}
-
// Returns false if an error is diagnosed.
-static bool getArchFeatures(const Driver &D, StringRef MArch,
+static bool getArchFeatures(const Driver &D, StringRef Arch,
std::vector<StringRef> &Features,
const ArgList &Args) {
- // RISC-V ISA strings must be lowercase.
- if (llvm::any_of(MArch, [](char c) { return isupper(c); })) {
- D.Diag(diag::err_drv_invalid_riscv_arch_name)
- << MArch << "string must be lowercase";
- return false;
- }
-
- // ISA string must begin with rv32 or rv64.
- if (!(MArch.startswith("rv32") || MArch.startswith("rv64")) ||
- (MArch.size() < 5)) {
- D.Diag(diag::err_drv_invalid_riscv_arch_name)
- << MArch << "string must begin with rv32{i,e,g} or rv64{i,g}";
- return false;
- }
-
- bool HasRV64 = MArch.startswith("rv64");
-
- // The canonical order specified in ISA manual.
- // Ref: Table 22.1 in RISC-V User-Level ISA V2.2
- StringRef StdExts = "mafdqlcbjtpvn";
- bool HasF = false, HasD = false;
- char Baseline = MArch[4];
-
- // First letter should be 'e', 'i' or 'g'.
- switch (Baseline) {
- default:
- D.Diag(diag::err_drv_invalid_riscv_arch_name)
- << MArch << "first letter should be 'e', 'i' or 'g'";
- return false;
- case 'e': {
- StringRef Error;
- // Currently LLVM does not support 'e'.
- // Extension 'e' is not allowed in rv64.
- if (HasRV64)
- Error = "standard user-level extension 'e' requires 'rv32'";
- else
- Error = "unsupported standard user-level extension 'e'";
- D.Diag(diag::err_drv_invalid_riscv_arch_name) << MArch << Error;
- return false;
- }
- case 'i':
- break;
- case 'g':
- // g = imafd
- StdExts = StdExts.drop_front(4);
- Features.push_back("+m");
- Features.push_back("+a");
- Features.push_back("+f");
- Features.push_back("+d");
- HasF = true;
- HasD = true;
- break;
- }
-
- // Skip rvxxx
- StringRef Exts = MArch.substr(5);
-
- // Remove multi-letter standard extensions, non-standard extensions and
- // supervisor-level extensions. They have 'z', 'x', 's', 'sx' prefixes.
- // Parse them at the end.
- // Find the very first occurrence of 's', 'x' or 'z'.
- StringRef OtherExts;
- size_t Pos = Exts.find_first_of("zsx");
- if (Pos != StringRef::npos) {
- OtherExts = Exts.substr(Pos);
- Exts = Exts.substr(0, Pos);
- }
+ bool EnableExperimentalExtensions =
+ Args.hasArg(options::OPT_menable_experimental_extensions);
+ auto ISAInfo =
+ llvm::RISCVISAInfo::parseArchString(Arch, EnableExperimentalExtensions);
+ if (!ISAInfo) {
+ handleAllErrors(ISAInfo.takeError(), [&](llvm::StringError &ErrMsg) {
+ D.Diag(diag::err_drv_invalid_riscv_arch_name)
+ << Arch << ErrMsg.getMessage();
+ });
- std::string Major, Minor;
- if (!getExtensionVersion(D, Args, MArch, std::string(1, Baseline), Exts,
- Major, Minor))
return false;
-
- // Consume the base ISA version number and any '_' between rvxxx and the
- // first extension
- Exts = Exts.drop_front(Major.size());
- if (!Minor.empty())
- Exts = Exts.drop_front(Minor.size() + 1 /*'p'*/);
- Exts.consume_front("_");
-
- // TODO: Use version number when setting target features
-
- auto StdExtsItr = StdExts.begin();
- auto StdExtsEnd = StdExts.end();
-
- for (auto I = Exts.begin(), E = Exts.end(); I != E; ) {
- char c = *I;
-
- // Check ISA extensions are specified in the canonical order.
- while (StdExtsItr != StdExtsEnd && *StdExtsItr != c)
- ++StdExtsItr;
-
- if (StdExtsItr == StdExtsEnd) {
- // Either c contains a valid extension but it was not given in
- // canonical order or it is an invalid extension.
- StringRef Error;
- if (StdExts.contains(c))
- Error = "standard user-level extension not given in canonical order";
- else
- Error = "invalid standard user-level extension";
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << Error << std::string(1, c);
- return false;
- }
-
- // Move to next char to prevent repeated letter.
- ++StdExtsItr;
-
- std::string Next, Major, Minor;
- if (std::next(I) != E)
- Next = std::string(std::next(I), E);
- if (!getExtensionVersion(D, Args, MArch, std::string(1, c), Next, Major,
- Minor))
- return false;
-
- // The order is OK, then push it into features.
- // TODO: Use version number when setting target features
- switch (c) {
- default:
- // Currently LLVM supports only "mafdc".
- D.Diag(diag::err_drv_invalid_riscv_ext_arch_name)
- << MArch << "unsupported standard user-level extension"
- << std::string(1, c);
- return false;
- case 'm':
- Features.push_back("+m");
- break;
- case 'a':
- Features.push_back("+a");
- break;
- case 'f':
- Features.push_back("+f");
- HasF = true;
- break;
- case 'd':
- Features.push_back("+d");
- HasD = true;
- break;
- case 'c':
- Features.push_back("+c");
- break;
- case 'b':
- Features.push_back("+experimental-b");
- Features.push_back("+experimental-zba");
- Features.push_back("+experimental-zbb");
- Features.push_back("+experimental-zbc");
- Features.push_back("+experimental-zbe");
- Features.push_back("+experimental-zbf");
- Features.push_back("+experimental-zbm");
- Features.push_back("+experimental-zbp");
- Features.push_back("+experimental-zbr");
- Features.push_back("+experimental-zbs");
- Features.push_back("+experimental-zbt");
- break;
- case 'v':
- Features.push_back("+experimental-v");
- Features.push_back("+experimental-zvlsseg");
- break;
- }
-
- // Consume full extension name and version, including any optional '_'
- // between this extension and the next
- ++I;
- I += Major.size();
- if (Minor.size())
- I += Minor.size() + 1 /*'p'*/;
- if (*I == '_')
- ++I;
}
- // Dependency check.
- // It's illegal to specify the 'd' (double-precision floating point)
- // extension without also specifying the 'f' (single precision
- // floating-point) extension.
- if (HasD && !HasF) {
- D.Diag(diag::err_drv_invalid_riscv_arch_name)
- << MArch << "d requires f extension to also be specified";
- return false;
- }
-
- // Additional dependency checks.
- // TODO: The 'q' extension requires rv64.
- // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
-
- // Handle all other types of extensions.
- getExtensionFeatures(D, Args, Features, MArch, OtherExts);
-
+ (*ISAInfo)->toFeatures(
+ Features, [&Args](const Twine &Str) { return Args.MakeArgString(Str); });
return true;
}
@@ -610,24 +190,30 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
// rv32* -> ilp32
// rv64g | rv64*d -> lp64d
// rv64* -> lp64
- StringRef MArch = getRISCVArch(Args, Triple);
+ StringRef Arch = getRISCVArch(Args, Triple);
- if (MArch.startswith_insensitive("rv32")) {
- // FIXME: parse `March` to find `D` extension properly
- if (MArch.substr(4).contains_insensitive("d") ||
- MArch.startswith_insensitive("rv32g"))
- return "ilp32d";
- else if (MArch.startswith_insensitive("rv32e"))
- return "ilp32e";
- else
+ auto ParseResult = llvm::RISCVISAInfo::parseArchString(
+ Arch, /* EnableExperimentalExtension */ true);
+ if (!ParseResult) {
+ // Ignore parsing error, just go 3rd step.
+ consumeError(ParseResult.takeError());
+ } else {
+ auto &ISAInfo = *ParseResult;
+ bool HasD = ISAInfo->hasExtension("d");
+ unsigned XLen = ISAInfo->getXLen();
+ if (XLen == 32) {
+ bool HasE = ISAInfo->hasExtension("e");
+ if (HasD)
+ return "ilp32d";
+ if (HasE)
+ return "ilp32e";
return "ilp32";
- } else if (MArch.startswith_insensitive("rv64")) {
- // FIXME: parse `March` to find `D` extension properly
- if (MArch.substr(4).contains_insensitive("d") ||
- MArch.startswith_insensitive("rv64g"))
- return "lp64d";
- else
+ } else if (XLen == 64) {
+ if (HasD)
+ return "lp64d";
return "lp64";
+ }
+ llvm_unreachable("unhandled XLen");
}
// 3. Choose a default based on the triple
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index 12749c7ec871..bfa008f964e1 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -11,6 +11,8 @@
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/Host.h"
@@ -20,7 +22,7 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
-std::string x86::getX86TargetCPU(const ArgList &Args,
+std::string x86::getX86TargetCPU(const Driver &D, const ArgList &Args,
const llvm::Triple &Triple) {
if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) {
StringRef CPU = A->getValue();
@@ -37,29 +39,34 @@ std::string x86::getX86TargetCPU(const ArgList &Args,
return std::string(CPU);
}
- if (const Arg *A = Args.getLastArgNoClaim(options::OPT__SLASH_arch)) {
+ if (const Arg *A = Args.getLastArg(options::OPT__SLASH_arch)) {
// Mapping built by looking at lib/Basic's X86TargetInfo::initFeatureMap().
- StringRef Arch = A->getValue();
- StringRef CPU;
- if (Triple.getArch() == llvm::Triple::x86) { // 32-bit-only /arch: flags.
- CPU = llvm::StringSwitch<StringRef>(Arch)
- .Case("IA32", "i386")
- .Case("SSE", "pentium3")
- .Case("SSE2", "pentium4")
- .Default("");
+ // The keys are case-sensitive; this matches link.exe.
+ // 32-bit and 64-bit /arch: flags.
+ llvm::StringMap<StringRef> ArchMap({
+ {"AVX", "sandybridge"},
+ {"AVX2", "haswell"},
+ {"AVX512F", "knl"},
+ {"AVX512", "skylake-avx512"},
+ });
+ if (Triple.getArch() == llvm::Triple::x86) {
+ // 32-bit-only /arch: flags.
+ ArchMap.insert({
+ {"IA32", "i386"},
+ {"SSE", "pentium3"},
+ {"SSE2", "pentium4"},
+ });
}
- if (CPU.empty()) { // 32-bit and 64-bit /arch: flags.
- CPU = llvm::StringSwitch<StringRef>(Arch)
- .Case("AVX", "sandybridge")
- .Case("AVX2", "haswell")
- .Case("AVX512F", "knl")
- .Case("AVX512", "skylake-avx512")
- .Default("");
- }
- if (!CPU.empty()) {
- A->claim();
- return std::string(CPU);
+ StringRef CPU = ArchMap.lookup(A->getValue());
+ if (CPU.empty()) {
+ std::vector<StringRef> ValidArchs{ArchMap.keys().begin(),
+ ArchMap.keys().end()};
+ sort(ValidArchs);
+ D.Diag(diag::warn_drv_invalid_arch_name_with_suggestion)
+ << A->getValue() << (Triple.getArch() == llvm::Triple::x86)
+ << join(ValidArchs, ", ");
}
+ return std::string(CPU);
}
// Select the default CPU if none was given (or detection failed).
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.h b/clang/lib/Driver/ToolChains/Arch/X86.h
index 14f0a26c8be4..36a2ab52899d 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.h
+++ b/clang/lib/Driver/ToolChains/Arch/X86.h
@@ -21,7 +21,7 @@ namespace driver {
namespace tools {
namespace x86 {
-std::string getX86TargetCPU(const llvm::opt::ArgList &Args,
+std::string getX86TargetCPU(const Driver &D, const llvm::opt::ArgList &Args,
const llvm::Triple &Triple);
void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp
index ce73e39d1456..cd07692be358 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -125,6 +125,20 @@ static bool isARMBareMetal(const llvm::Triple &Triple) {
return true;
}
+/// Is the triple aarch64-none-elf?
+static bool isAArch64BareMetal(const llvm::Triple &Triple) {
+ if (Triple.getArch() != llvm::Triple::aarch64)
+ return false;
+
+ if (Triple.getVendor() != llvm::Triple::UnknownVendor)
+ return false;
+
+ if (Triple.getOS() != llvm::Triple::UnknownOS)
+ return false;
+
+ return Triple.getEnvironmentName() == "elf";
+}
+
static bool isRISCVBareMetal(const llvm::Triple &Triple) {
if (Triple.getArch() != llvm::Triple::riscv32 &&
Triple.getArch() != llvm::Triple::riscv64)
@@ -151,7 +165,8 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple,
}
bool BareMetal::handlesTarget(const llvm::Triple &Triple) {
- return isARMBareMetal(Triple) || isRISCVBareMetal(Triple);
+ return isARMBareMetal(Triple) || isAArch64BareMetal(Triple) ||
+ isRISCVBareMetal(Triple);
}
Tool *BareMetal::buildLinker() const {
diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h
index d68c43c64c97..dc718e09ad43 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.h
+++ b/clang/lib/Driver/ToolChains/BareMetal.h
@@ -42,7 +42,9 @@ public:
bool useIntegratedAs() const override { return true; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
- bool isPIEDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return false;
+ }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a4b53a640ab5..e5476e07a5cc 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -22,6 +22,7 @@
#include "Hexagon.h"
#include "MSP430.h"
#include "PS4CPU.h"
+#include "clang/Basic/CLWarnings.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/LangOptions.h"
@@ -612,7 +613,8 @@ getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) {
A && A->getOption().matches(options::OPT_fno_omit_frame_pointer);
bool OmitLeafFP = Args.hasFlag(options::OPT_momit_leaf_frame_pointer,
options::OPT_mno_omit_leaf_frame_pointer,
- Triple.isAArch64() || Triple.isPS4CPU());
+ Triple.isAArch64() || Triple.isPS4CPU() ||
+ Triple.isVE());
if (NoOmitFP || mustUseNonLeafFramePointerForTarget(Triple) ||
(!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) {
if (OmitLeafFP)
@@ -643,7 +645,7 @@ static void addDebugPrefixMapArg(const Driver &D, const ArgList &Args, ArgString
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fdebug_prefix_map_EQ)) {
StringRef Map = A->getValue();
- if (Map.find('=') == StringRef::npos)
+ if (!Map.contains('='))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else
@@ -658,7 +660,7 @@ static void addMacroPrefixMapArg(const Driver &D, const ArgList &Args,
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fmacro_prefix_map_EQ)) {
StringRef Map = A->getValue();
- if (Map.find('=') == StringRef::npos)
+ if (!Map.contains('='))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else
@@ -673,7 +675,7 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fcoverage_prefix_map_EQ)) {
StringRef Map = A->getValue();
- if (Map.find('=') == StringRef::npos)
+ if (!Map.contains('='))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else
@@ -748,7 +750,7 @@ static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
const Driver &D, const InputInfo &Output,
- const ArgList &Args,
+ const ArgList &Args, SanitizerArgs &SanArgs,
ArgStringList &CmdArgs) {
auto *PGOGenerateArg = Args.getLastArg(options::OPT_fprofile_generate,
@@ -795,11 +797,6 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
}
if (TC.getTriple().isOSAIX()) {
- if (PGOGenerateArg)
- if (!D.isUsingLTO(false /*IsDeviceOffloadAction */) ||
- D.getLTOMode() != LTOK_Full)
- D.Diag(clang::diag::err_drv_argument_only_allowed_with)
- << PGOGenerateArg->getSpelling() << "-flto";
if (ProfileGenerateArg)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< ProfileGenerateArg->getSpelling() << TC.getTriple().str();
@@ -927,7 +924,7 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
else if (Val != "single")
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
- } else if (TC.getSanitizerArgs().needsTsanRt()) {
+ } else if (SanArgs.needsTsanRt()) {
CmdArgs.push_back("-fprofile-update=atomic");
}
@@ -1256,7 +1253,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
// If we are offloading to a target via OpenMP we need to include the
// openmp_wrappers folder which contains alternative system headers.
if (JA.isDeviceOffloading(Action::OFK_OpenMP) &&
- getToolChain().getTriple().isNVPTX()){
+ (getToolChain().getTriple().isNVPTX() ||
+ getToolChain().getTriple().isAMDGCN())) {
if (!Args.hasArg(options::OPT_nobuiltininc)) {
// Add openmp_wrappers/* to our system include path. This lets us wrap
// standard library headers.
@@ -1587,8 +1585,8 @@ void AddAAPCSVolatileBitfieldArgs(const ArgList &Args, ArgStringList &CmdArgs) {
}
namespace {
-void RenderARMABI(const llvm::Triple &Triple, const ArgList &Args,
- ArgStringList &CmdArgs) {
+void RenderARMABI(const Driver &D, const llvm::Triple &Triple,
+ const ArgList &Args, ArgStringList &CmdArgs) {
// Select the ABI to use.
// FIXME: Support -meabi.
// FIXME: Parts of this are duplicated in the backend, unify this somehow.
@@ -1596,7 +1594,7 @@ void RenderARMABI(const llvm::Triple &Triple, const ArgList &Args,
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
ABIName = A->getValue();
} else {
- std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
+ std::string CPU = getCPUName(D, Args, Triple, /*FromAs*/ false);
ABIName = llvm::ARM::computeDefaultTargetABI(Triple, CPU).data();
}
@@ -1607,7 +1605,7 @@ void RenderARMABI(const llvm::Triple &Triple, const ArgList &Args,
void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs, bool KernelOrKext) const {
- RenderARMABI(Triple, Args, CmdArgs);
+ RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs);
// Determine floating point ABI from the options & target defaults.
arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
@@ -1825,17 +1823,46 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
StringRef Val = A->getValue();
const Driver &D = getToolChain().getDriver();
if (Val.equals("128") || Val.equals("256") || Val.equals("512") ||
- Val.equals("1024") || Val.equals("2048"))
+ Val.equals("1024") || Val.equals("2048") || Val.equals("128+") ||
+ Val.equals("256+") || Val.equals("512+") || Val.equals("1024+") ||
+ Val.equals("2048+")) {
+ unsigned Bits = 0;
+ if (Val.endswith("+"))
+ Val = Val.substr(0, Val.size() - 1);
+ else {
+ bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid;
+ assert(!Invalid && "Failed to parse value");
+ CmdArgs.push_back(
+ Args.MakeArgString("-mvscale-max=" + llvm::Twine(Bits / 128)));
+ }
+
+ bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid;
+ assert(!Invalid && "Failed to parse value");
CmdArgs.push_back(
- Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+ Args.MakeArgString("-mvscale-min=" + llvm::Twine(Bits / 128)));
// Silently drop requests for vector-length agnostic code as it's implied.
- else if (!Val.equals("scalable"))
+ } else if (!Val.equals("scalable"))
// Handle the unsupported values passed to msve-vector-bits.
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Val;
}
AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
+
+ if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
+ StringRef Name = A->getValue();
+
+ std::string TuneCPU;
+ if (Name == "native")
+ TuneCPU = std::string(llvm::sys::getHostCPUName());
+ else
+ TuneCPU = std::string(Name);
+
+ if (!TuneCPU.empty()) {
+ CmdArgs.push_back("-tune-cpu");
+ CmdArgs.push_back(Args.MakeArgString(TuneCPU));
+ }
+ }
}
void Clang::AddMIPSTargetArgs(const ArgList &Args,
@@ -2173,6 +2200,7 @@ void Clang::AddX86TargetArgs(const ArgList &Args,
if (Value == "intel" || Value == "att") {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString("-x86-asm-syntax=" + Value));
+ CmdArgs.push_back(Args.MakeArgString("-inline-asm=" + Value));
} else {
D.Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << Value;
@@ -2182,6 +2210,11 @@ void Clang::AddX86TargetArgs(const ArgList &Args,
CmdArgs.push_back("-x86-asm-syntax=intel");
}
+ if (Arg *A = Args.getLastArg(options::OPT_mskip_rax_setup,
+ options::OPT_mno_skip_rax_setup))
+ if (A->getOption().matches(options::OPT_mskip_rax_setup))
+ CmdArgs.push_back(Args.MakeArgString("-mskip-rax-setup"));
+
// Set flags to support MCU ABI.
if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) {
CmdArgs.push_back("-mfloat-abi");
@@ -2411,7 +2444,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
bool TakeNextArg = false;
bool UseRelaxRelocations = C.getDefaultToolChain().useRelaxRelocations();
- bool UseNoExecStack = C.getDefaultToolChain().isNoExecStackDefault();
+ bool UseNoExecStack = false;
const char *MipsTargetFeature = nullptr;
StringRef ImplicitIt;
for (const Arg *A :
@@ -2617,6 +2650,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
// LLVM flags based on the final state.
bool HonorINFs = true;
bool HonorNaNs = true;
+ bool ApproxFunc = false;
// -fmath-errno is the default on some platforms, e.g. BSD-derived OSes.
bool MathErrno = TC.IsMathErrnoDefault();
bool AssociativeMath = false;
@@ -2638,10 +2672,14 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
- StringRef FPContract = "on";
+ // CUDA and HIP don't rely on the frontend to pass an ffp-contract option.
+ // If one wasn't given by the user, don't pass it here.
+ StringRef FPContract;
+ if (!JA.isDeviceOffloading(Action::OFK_Cuda) &&
+ !JA.isOffloading(Action::OFK_HIP))
+ FPContract = "on";
bool StrictFPModel = false;
-
if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
CmdArgs.push_back("-mlimit-float-precision");
CmdArgs.push_back(A->getValue());
@@ -2685,12 +2723,12 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
// Use the gcc option in the switch below.
if (!FPModel.empty() && !FPModel.equals(Val))
D.Diag(clang::diag::warn_drv_overriding_flag_option)
- << Args.MakeArgString("-ffp-model=" + FPModel)
- << Args.MakeArgString("-ffp-model=" + Val);
+ << Args.MakeArgString("-ffp-model=" + FPModel)
+ << Args.MakeArgString("-ffp-model=" + Val);
if (Val.equals("fast")) {
optID = options::OPT_ffast_math;
FPModel = Val;
- FPContract = Val;
+ FPContract = "fast";
} else if (Val.equals("precise")) {
optID = options::OPT_ffp_contract;
FPModel = Val;
@@ -2719,6 +2757,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
case options::OPT_fno_honor_infinities: HonorINFs = false; break;
case options::OPT_fhonor_nans: HonorNaNs = true; break;
case options::OPT_fno_honor_nans: HonorNaNs = false; break;
+ case options::OPT_fapprox_func: ApproxFunc = true; break;
+ case options::OPT_fno_approx_func: ApproxFunc = false; break;
case options::OPT_fmath_errno: MathErrno = true; break;
case options::OPT_fno_math_errno: MathErrno = false; break;
case options::OPT_fassociative_math: AssociativeMath = true; break;
@@ -2780,11 +2820,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
case options::OPT_ffp_contract: {
StringRef Val = A->getValue();
if (PreciseFPModel) {
- // When -ffp-model=precise is seen on the command line,
- // the boolean PreciseFPModel is set to true which indicates
- // "the current option is actually PreciseFPModel". The optID
- // is changed to OPT_ffp_contract and FPContract is set to "on".
- // the argument Val string is "precise": it shouldn't be checked.
+ // -ffp-model=precise enables ffp-contract=on.
+ // -ffp-model=precise sets PreciseFPModel to on and Val to
+ // "precise". FPContract is set.
;
} else if (Val.equals("fast") || Val.equals("on") || Val.equals("off"))
FPContract = Val;
@@ -2877,12 +2915,17 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
AssociativeMath = false;
ReciprocalMath = false;
SignedZeros = true;
- TrappingMath = false;
- RoundingFPMath = false;
// -fno_fast_math restores default denormal and fpcontract handling
DenormalFPMath = DefaultDenormalFPMath;
DenormalFP32Math = llvm::DenormalMode::getIEEE();
- FPContract = "on";
+ if (!JA.isDeviceOffloading(Action::OFK_Cuda) &&
+ !JA.isOffloading(Action::OFK_HIP))
+ if (FPContract == "fast") {
+ FPContract = "on";
+ D.Diag(clang::diag::warn_drv_overriding_flag_option)
+ << "-ffp-contract=fast"
+ << "-ffp-contract=on";
+ }
break;
}
if (StrictFPModel) {
@@ -2915,6 +2958,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
if (!HonorNaNs)
CmdArgs.push_back("-menable-no-nans");
+ if (ApproxFunc)
+ CmdArgs.push_back("-fapprox-func");
+
if (MathErrno)
CmdArgs.push_back("-fmath-errno");
@@ -3128,14 +3174,44 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC,
const std::string &TripleStr = EffectiveTriple.getTriple();
if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_EQ)) {
StringRef Value = A->getValue();
- if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
+ if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() &&
+ !EffectiveTriple.isARM() && !EffectiveTriple.isThumb())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
- if (EffectiveTriple.isX86() && Value != "tls" && Value != "global") {
+ if ((EffectiveTriple.isX86() || EffectiveTriple.isARM() ||
+ EffectiveTriple.isThumb()) &&
+ Value != "tls" && Value != "global") {
D.Diag(diag::err_drv_invalid_value_with_suggestion)
<< A->getOption().getName() << Value << "tls global";
return;
}
+ if ((EffectiveTriple.isARM() || EffectiveTriple.isThumb()) &&
+ Value == "tls") {
+ if (!Args.hasArg(options::OPT_mstack_protector_guard_offset_EQ)) {
+ D.Diag(diag::err_drv_ssp_missing_offset_argument)
+ << A->getAsString(Args);
+ return;
+ }
+ // Check whether the target subarch supports the hardware TLS register
+ if (arm::getARMSubArchVersionNumber(EffectiveTriple) < 7 &&
+ llvm::ARM::parseArch(EffectiveTriple.getArchName()) !=
+ llvm::ARM::ArchKind::ARMV6T2) {
+ D.Diag(diag::err_target_unsupported_tp_hard)
+ << EffectiveTriple.getArchName();
+ return;
+ }
+ // Check whether the user asked for something other than -mtp=cp15
+ if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) {
+ StringRef Value = A->getValue();
+ if (Value != "cp15") {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << A->getAsString(Args) << "-mstack-protector-guard=tls";
+ return;
+ }
+ }
+ CmdArgs.push_back("-target-feature");
+ CmdArgs.push_back("+read-tp-hard");
+ }
if (EffectiveTriple.isAArch64() && Value != "sysreg" && Value != "global") {
D.Diag(diag::err_drv_invalid_value_with_suggestion)
<< A->getOption().getName() << Value << "sysreg global";
@@ -3146,7 +3222,8 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC,
if (Arg *A = Args.getLastArg(options::OPT_mstack_protector_guard_offset_EQ)) {
StringRef Value = A->getValue();
- if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64())
+ if (!EffectiveTriple.isX86() && !EffectiveTriple.isAArch64() &&
+ !EffectiveTriple.isARM() && !EffectiveTriple.isThumb())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getAsString(Args) << TripleStr;
int Offset;
@@ -3154,6 +3231,12 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC,
D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Value;
return;
}
+ if ((EffectiveTriple.isARM() || EffectiveTriple.isThumb()) &&
+ (Offset < 0 || Offset > 0xfffff)) {
+ D.Diag(diag::err_drv_invalid_int_value)
+ << A->getOption().getName() << Value;
+ return;
+ }
A->render(Args, CmdArgs);
}
@@ -3179,7 +3262,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
const llvm::Triple &EffectiveTriple = TC.getEffectiveTriple();
- if (!EffectiveTriple.isOSLinux())
+ if (!EffectiveTriple.isOSFreeBSD() && !EffectiveTriple.isOSLinux())
return;
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
@@ -3354,7 +3437,7 @@ static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args,
Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly);
Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init);
- Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path);
+ Args.AddLastArg(CmdArgs, options::OPT_objcmt_allowlist_dir_path);
}
}
@@ -3554,11 +3637,11 @@ static void RenderModulesOptions(Compilation &C, const Driver &D,
llvm::sys::fs::file_status Status;
if (llvm::sys::fs::status(A->getValue(), Status))
D.Diag(diag::err_drv_no_such_file) << A->getValue();
- CmdArgs.push_back(
- Args.MakeArgString("-fbuild-session-timestamp=" +
- Twine((uint64_t)Status.getLastModificationTime()
- .time_since_epoch()
- .count())));
+ CmdArgs.push_back(Args.MakeArgString(
+ "-fbuild-session-timestamp=" +
+ Twine((uint64_t)std::chrono::duration_cast<std::chrono::seconds>(
+ Status.getLastModificationTime().time_since_epoch())
+ .count())));
}
if (Args.getLastArg(options::OPT_fmodules_validate_once_per_build_session)) {
@@ -3873,12 +3956,6 @@ static void renderDebugOptions(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs,
codegenoptions::DebugInfoKind &DebugInfoKind,
DwarfFissionKind &DwarfFission) {
- // These two forms of profiling info can't be used together.
- if (const Arg *A1 = Args.getLastArg(options::OPT_fpseudo_probe_for_profiling))
- if (const Arg *A2 = Args.getLastArg(options::OPT_fdebug_info_for_profiling))
- D.Diag(diag::err_drv_argument_not_allowed_with)
- << A1->getAsString(Args) << A2->getAsString(Args);
-
if (Args.hasFlag(options::OPT_fdebug_info_for_profiling,
options::OPT_fno_debug_info_for_profiling, false) &&
checkDebugInfoOption(
@@ -4132,6 +4209,29 @@ static void renderDebugOptions(const ToolChain &TC, const Driver &D,
options::OPT_gpubnames)
? "-gpubnames"
: "-ggnu-pubnames");
+ const auto *SimpleTemplateNamesArg =
+ Args.getLastArg(options::OPT_gsimple_template_names, options::OPT_gno_simple_template_names,
+ options::OPT_gsimple_template_names_EQ);
+ bool ForwardTemplateParams = DebuggerTuning == llvm::DebuggerKind::SCE;
+ if (SimpleTemplateNamesArg &&
+ checkDebugInfoOption(SimpleTemplateNamesArg, Args, D, TC)) {
+ const auto &Opt = SimpleTemplateNamesArg->getOption();
+ if (Opt.matches(options::OPT_gsimple_template_names)) {
+ ForwardTemplateParams = true;
+ CmdArgs.push_back("-gsimple-template-names=simple");
+ } else if (Opt.matches(options::OPT_gsimple_template_names_EQ)) {
+ ForwardTemplateParams = true;
+ StringRef Value = SimpleTemplateNamesArg->getValue();
+ if (Value == "simple") {
+ CmdArgs.push_back("-gsimple-template-names=simple");
+ } else if (Value == "mangled") {
+ CmdArgs.push_back("-gsimple-template-names=mangled");
+ } else {
+ D.Diag(diag::err_drv_unsupported_option_argument)
+ << Opt.getName() << SimpleTemplateNamesArg->getValue();
+ }
+ }
+ }
if (Args.hasFlag(options::OPT_fdebug_ranges_base_address,
options::OPT_fno_debug_ranges_base_address, false)) {
@@ -4178,7 +4278,7 @@ static void renderDebugOptions(const ToolChain &TC, const Driver &D,
// Decide how to render forward declarations of template instantiations.
// SCE wants full descriptions, others just get them in the name.
- if (DebuggerTuning == llvm::DebuggerKind::SCE)
+ if (ForwardTemplateParams)
CmdArgs.push_back("-debug-forward-template-params");
// Do we need to explicitly import anonymous namespaces into the parent
@@ -4360,7 +4460,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
TC.addClangWarningOptions(CmdArgs);
// FIXME: Subclass ToolChain for SPIR and move this to addClangWarningOptions.
- if (Triple.isSPIR())
+ if (Triple.isSPIR() || Triple.isSPIRV())
CmdArgs.push_back("-Wspir-compat");
// Select the appropriate action.
@@ -4478,28 +4578,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-emit-llvm-uselists");
if (IsUsingLTO) {
- if (!IsDeviceOffloadAction) {
- if (Args.hasArg(options::OPT_flto))
- CmdArgs.push_back("-flto");
- else {
- if (D.getLTOMode() == LTOK_Thin)
- CmdArgs.push_back("-flto=thin");
- else
- CmdArgs.push_back("-flto=full");
- }
- CmdArgs.push_back("-flto-unit");
- } else if (Triple.isAMDGPU()) {
- // Only AMDGPU supports device-side LTO
- assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin);
- CmdArgs.push_back(Args.MakeArgString(
- Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full")));
- CmdArgs.push_back("-flto-unit");
- } else {
+ // Only AMDGPU supports device-side LTO.
+ if (IsDeviceOffloadAction && !Triple.isAMDGPU()) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Args.getLastArg(options::OPT_foffload_lto,
options::OPT_foffload_lto_EQ)
->getAsString(Args)
<< Triple.getTriple();
+ } else {
+ assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin);
+ CmdArgs.push_back(Args.MakeArgString(
+ Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full")));
+ CmdArgs.push_back("-flto-unit");
}
}
}
@@ -4537,7 +4627,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// reject options that shouldn't be supported in bitcode
// also reject kernel/kext
- static const constexpr unsigned kBitcodeOptionBlacklist[] = {
+ static const constexpr unsigned kBitcodeOptionIgnorelist[] = {
options::OPT_mkernel,
options::OPT_fapple_kext,
options::OPT_ffunction_sections,
@@ -4581,8 +4671,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_mllvm,
};
for (const auto &A : Args)
- if (llvm::find(kBitcodeOptionBlacklist, A->getOption().getID()) !=
- std::end(kBitcodeOptionBlacklist))
+ if (llvm::is_contained(kBitcodeOptionIgnorelist, A->getOption().getID()))
D.Diag(diag::err_drv_unsupported_embed_bitcode) << A->getSpelling();
// Render the CodeGen options that need to be passed.
@@ -4599,7 +4688,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumbeb:
- RenderARMABI(Triple, Args, CmdArgs);
+ RenderARMABI(D, Triple, Args, CmdArgs);
break;
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
@@ -4650,6 +4739,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// cleanup.
if (!C.isForDiagnostics())
CmdArgs.push_back("-disable-free");
+ CmdArgs.push_back("-clear-ast-before-backend");
#ifdef NDEBUG
const bool IsAssertBuild = false;
@@ -4665,10 +4755,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (Args.hasFlag(options::OPT_fdiscard_value_names,
options::OPT_fno_discard_value_names, !IsAssertBuild)) {
if (Args.hasArg(options::OPT_fdiscard_value_names) &&
- (std::any_of(Inputs.begin(), Inputs.end(),
- [](const clang::driver::InputInfo &II) {
- return types::isLLVMIR(II.getType());
- }))) {
+ llvm::any_of(Inputs, [](const clang::driver::InputInfo &II) {
+ return types::isLLVMIR(II.getType());
+ })) {
D.Diag(diag::warn_ignoring_fdiscard_for_bitcode);
}
CmdArgs.push_back("-discard-value-names");
@@ -4721,6 +4810,22 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(std::to_string(FunctionAlignment)));
}
+ // We support -falign-loops=N where N is a power of 2. GCC supports more
+ // forms.
+ if (const Arg *A = Args.getLastArg(options::OPT_falign_loops_EQ)) {
+ unsigned Value = 0;
+ if (StringRef(A->getValue()).getAsInteger(10, Value) || Value > 65536)
+ TC.getDriver().Diag(diag::err_drv_invalid_int_value)
+ << A->getAsString(Args) << A->getValue();
+ else if (Value & (Value - 1))
+ TC.getDriver().Diag(diag::err_drv_alignment_not_power_of_two)
+ << A->getAsString(Args) << A->getValue();
+ // Treat =0 as unspecified (use the target preference).
+ if (Value)
+ CmdArgs.push_back(Args.MakeArgString("-falign-loops=" +
+ Twine(std::min(Value, 65536u))));
+ }
+
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
@@ -5062,9 +5167,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
// Enable -mconstructor-aliases except on darwin, where we have to work around
- // a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
- // where aliases aren't supported.
- if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
+ // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
+ // aliases aren't supported.
+ if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
CmdArgs.push_back("-mconstructor-aliases");
// Darwin's kernel doesn't support guard variables; just die if we
@@ -5102,16 +5207,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// This is a coarse approximation of what llvm-gcc actually does, both
// -fasynchronous-unwind-tables and -fnon-call-exceptions interact in more
// complicated ways.
- bool UnwindTables =
- Args.hasFlag(options::OPT_fasynchronous_unwind_tables,
- options::OPT_fno_asynchronous_unwind_tables,
- (TC.IsUnwindTablesDefault(Args) ||
- TC.getSanitizerArgs().needsUnwindTables()) &&
- !Freestanding);
- UnwindTables = Args.hasFlag(options::OPT_funwind_tables,
- options::OPT_fno_unwind_tables, UnwindTables);
- if (UnwindTables)
- CmdArgs.push_back("-munwind-tables");
+ auto SanitizeArgs = TC.getSanitizerArgs(Args);
+ bool AsyncUnwindTables = Args.hasFlag(
+ options::OPT_fasynchronous_unwind_tables,
+ options::OPT_fno_asynchronous_unwind_tables,
+ (TC.IsUnwindTablesDefault(Args) || SanitizeArgs.needsUnwindTables()) &&
+ !Freestanding);
+ bool UnwindTables = Args.hasFlag(options::OPT_funwind_tables,
+ options::OPT_fno_unwind_tables, false);
+ if (AsyncUnwindTables)
+ CmdArgs.push_back("-funwind-tables=2");
+ else if (UnwindTables)
+ CmdArgs.push_back("-funwind-tables=1");
// Prepare `-aux-target-cpu` and `-aux-target-feature` unless
// `--gpu-use-aux-triple-only` is specified.
@@ -5120,7 +5227,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
const ArgList &HostArgs =
C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
std::string HostCPU =
- getCPUName(HostArgs, *TC.getAuxTriple(), /*FromAs*/ false);
+ getCPUName(D, HostArgs, *TC.getAuxTriple(), /*FromAs*/ false);
if (!HostCPU.empty()) {
CmdArgs.push_back("-aux-target-cpu");
CmdArgs.push_back(Args.MakeArgString(HostCPU));
@@ -5162,7 +5269,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
// Add the target cpu
- std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
+ std::string CPU = getCPUName(D, Args, Triple, /*FromAs*/ false);
if (!CPU.empty()) {
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPU));
@@ -5346,7 +5453,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// for sampling, overhead of call arc collection is way too high and there's
// no way to collect the output.
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
- addPGOAndCoverageFlags(TC, C, D, Output, Args, CmdArgs);
+ addPGOAndCoverageFlags(TC, C, D, Output, Args, SanitizeArgs, CmdArgs);
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
@@ -5354,7 +5461,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (RawTriple.isPS4CPU() &&
!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
PS4cpu::addProfileRTArgs(TC, Args, CmdArgs);
- PS4cpu::addSanitizerArgs(TC, CmdArgs);
+ PS4cpu::addSanitizerArgs(TC, Args, CmdArgs);
}
// Pass options for controlling the default header search paths.
@@ -5416,7 +5523,26 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddAllArgs(CmdArgs, options::OPT_R_Group);
- Args.AddAllArgs(CmdArgs, options::OPT_W_Group);
+ for (const Arg *A :
+ Args.filtered(options::OPT_W_Group, options::OPT__SLASH_wd)) {
+ A->claim();
+ if (A->getOption().getID() == options::OPT__SLASH_wd) {
+ unsigned WarningNumber;
+ if (StringRef(A->getValue()).getAsInteger(10, WarningNumber)) {
+ D.Diag(diag::err_drv_invalid_int_value)
+ << A->getAsString(Args) << A->getValue();
+ continue;
+ }
+
+ if (auto Group = diagGroupFromCLWarningID(WarningNumber)) {
+ CmdArgs.push_back(Args.MakeArgString(
+ "-Wno-" + DiagnosticIDs::getWarningOptionForGroup(*Group)));
+ }
+ continue;
+ }
+ A->render(Args, CmdArgs);
+ }
+
if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false))
CmdArgs.push_back("-pedantic");
Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors);
@@ -5433,6 +5559,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fexperimental_relative_cxx_abi_vtables,
options::OPT_fno_experimental_relative_cxx_abi_vtables);
+ if (Arg *A = Args.getLastArg(options::OPT_ffuchsia_api_level_EQ))
+ A->render(Args, CmdArgs);
+
// Handle -{std, ansi, trigraphs} -- take the last of -{std, ansi}
// (-ansi is equivalent to -std=c89 or -std=c++98).
//
@@ -5693,7 +5822,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var,
options::OPT_fno_visibility_inlines_hidden_static_local_var);
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden);
-
+ Args.AddLastArg(CmdArgs, options::OPT_fnew_infallible);
Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);
if (Args.hasFlag(options::OPT_fno_operator_names,
@@ -5736,6 +5865,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_openmp_simd);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_enable_irbuilder);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
+ if (!Args.hasFlag(options::OPT_fopenmp_extensions,
+ options::OPT_fno_openmp_extensions, /*Default=*/true))
+ CmdArgs.push_back("-fno-openmp-extensions");
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ);
Args.AddAllArgs(CmdArgs,
@@ -5751,12 +5883,36 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_openmp_cuda_mode, /*Default=*/false))
CmdArgs.push_back("-fopenmp-cuda-mode");
+ // When in OpenMP offloading mode, enable or disable the new device
+ // runtime.
+ if (Args.hasFlag(options::OPT_fopenmp_target_new_runtime,
+ options::OPT_fno_openmp_target_new_runtime,
+ /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-target-new-runtime");
+
+ // When in OpenMP offloading mode, enable debugging on the device.
+ Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_target_debug_EQ);
+ if (Args.hasFlag(options::OPT_fopenmp_target_debug,
+ options::OPT_fno_openmp_target_debug, /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-target-debug");
+
// When in OpenMP offloading mode with NVPTX target, check if full runtime
// is required.
if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime,
options::OPT_fno_openmp_cuda_force_full_runtime,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-cuda-force-full-runtime");
+
+ // When in OpenMP offloading mode, forward assumptions information about
+ // thread and team counts in the device.
+ if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription,
+ options::OPT_fno_openmp_assume_teams_oversubscription,
+ /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-assume-teams-oversubscription");
+ if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription,
+ options::OPT_fno_openmp_assume_threads_oversubscription,
+ /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
break;
default:
// By default, if Clang doesn't know how to generate useful OpenMP code
@@ -5771,10 +5927,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fopenmp_simd,
options::OPT_fno_openmp_simd);
Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ);
+ if (!Args.hasFlag(options::OPT_fopenmp_extensions,
+ options::OPT_fno_openmp_extensions, /*Default=*/true))
+ CmdArgs.push_back("-fno-openmp-extensions");
}
- const SanitizerArgs &Sanitize = TC.getSanitizerArgs();
- Sanitize.addArgs(TC, Args, CmdArgs, InputType);
+ SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
const XRayArgs &XRay = TC.getXRayArgs();
XRay.addArgs(TC, Args, CmdArgs, InputType);
@@ -5891,6 +6049,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
RenderSCPOptions(TC, Args, CmdArgs);
RenderTrivialAutoVarInitOptions(D, TC, Args, CmdArgs);
+ Args.AddLastArg(CmdArgs, options::OPT_fswift_async_fp_EQ);
+
// Translate -mstackrealign
if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign,
false))
@@ -6175,7 +6335,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// than 19.
if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
options::OPT_fno_threadsafe_statics,
- !IsWindowsMSVC || IsMSVC2015Compatible))
+ !types::isOpenCL(InputType) &&
+ (!IsWindowsMSVC || IsMSVC2015Compatible)))
CmdArgs.push_back("-fno-threadsafe-statics");
// -fno-delayed-template-parsing is default, except when targeting MSVC.
@@ -6715,12 +6876,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
bool DefaultsSplitLTOUnit =
- (WholeProgramVTables || Sanitize.needsLTO()) &&
+ (WholeProgramVTables || SanitizeArgs.needsLTO()) &&
(LTOMode == LTOK_Full || TC.canSplitThinLTOUnit());
bool SplitLTOUnit =
Args.hasFlag(options::OPT_fsplit_lto_unit,
options::OPT_fno_split_lto_unit, DefaultsSplitLTOUnit);
- if (Sanitize.needsLTO() && !SplitLTOUnit)
+ if (SanitizeArgs.needsLTO() && !SplitLTOUnit)
D.Diag(diag::err_drv_argument_not_allowed_with) << "-fno-split-lto-unit"
<< "-fsanitize=cfi";
if (SplitLTOUnit)
@@ -6819,7 +6980,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-faddrsig");
if ((Triple.isOSBinFormatELF() || Triple.isOSBinFormatMachO()) &&
- (EH || UnwindTables || DebugInfoKind != codegenoptions::NoDebugInfo))
+ (EH || AsyncUnwindTables || UnwindTables ||
+ DebugInfoKind != codegenoptions::NoDebugInfo))
CmdArgs.push_back("-D__GCC_HAVE_DWARF2_CFI_ASM=1");
if (Arg *A = Args.getLastArg(options::OPT_fsymbol_partition_EQ)) {
@@ -7431,7 +7593,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Clang::getBaseInputName(Args, Input));
// Add the target cpu
- std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true);
+ std::string CPU = getCPUName(D, Args, Triple, /*FromAs*/ true);
if (!CPU.empty()) {
CmdArgs.push_back("-target-cpu");
CmdArgs.push_back(Args.MakeArgString(CPU));
@@ -7667,18 +7829,30 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
});
}
Triples += Action::GetOffloadKindName(CurKind);
- Triples += "-";
- std::string NormalizedTriple = CurTC->getTriple().normalize();
- Triples += NormalizedTriple;
-
- if (CurDep->getOffloadingArch() != nullptr) {
- // If OffloadArch is present it can only appear as the 6th hypen
- // sepearated field of Bundle Entry ID. So, pad required number of
- // hyphens in Triple.
- for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
- Triples += "-";
+ Triples += '-';
+ Triples += CurTC->getTriple().normalize();
+ if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_Cuda) &&
+ CurDep->getOffloadingArch()) {
+ Triples += '-';
Triples += CurDep->getOffloadingArch();
}
+
+ // TODO: Replace parsing of -march flag. Can be done by storing GPUArch
+ // with each toolchain.
+ StringRef GPUArchName;
+ if (CurKind == Action::OFK_OpenMP) {
+ // Extract GPUArch from -march argument in TC argument list.
+ for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
+ auto ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
+ auto Arch = ArchStr.startswith_insensitive("-march=");
+ if (Arch) {
+ GPUArchName = ArchStr.substr(7);
+ Triples += "-";
+ break;
+ }
+ }
+ Triples += GPUArchName.str();
+ }
}
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
@@ -7701,8 +7875,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
assert(CurTC == nullptr && "Expected one dependence!");
CurTC = TC;
});
+ UB += C.addTempFile(
+ C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I])));
+ } else {
+ UB += CurTC->getInputFilename(Inputs[I]);
}
- UB += CurTC->getInputFilename(Inputs[I]);
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
@@ -7746,19 +7923,30 @@ void OffloadBundler::ConstructJobMultipleOutputs(
auto &Dep = DepInfo[I];
Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
- Triples += "-";
- std::string NormalizedTriple =
- Dep.DependentToolChain->getTriple().normalize();
- Triples += NormalizedTriple;
-
- if (!Dep.DependentBoundArch.empty()) {
- // If OffloadArch is present it can only appear as the 6th hypen
- // sepearated field of Bundle Entry ID. So, pad required number of
- // hyphens in Triple.
- for (int i = 4 - StringRef(NormalizedTriple).count("-"); i > 0; i--)
- Triples += "-";
+ Triples += '-';
+ Triples += Dep.DependentToolChain->getTriple().normalize();
+ if ((Dep.DependentOffloadKind == Action::OFK_HIP ||
+ Dep.DependentOffloadKind == Action::OFK_Cuda) &&
+ !Dep.DependentBoundArch.empty()) {
+ Triples += '-';
Triples += Dep.DependentBoundArch;
}
+ // TODO: Replace parsing of -march flag. Can be done by storing GPUArch
+ // with each toolchain.
+ StringRef GPUArchName;
+ if (Dep.DependentOffloadKind == Action::OFK_OpenMP) {
+ // Extract GPUArch from -march argument in TC argument list.
+ for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
+ StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
+ auto Arch = ArchStr.startswith_insensitive("-march=");
+ if (Arch) {
+ GPUArchName = ArchStr.substr(7);
+ Triples += "-";
+ break;
+ }
+ }
+ Triples += GPUArchName.str();
+ }
}
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
diff --git a/clang/lib/Driver/ToolChains/CloudABI.cpp b/clang/lib/Driver/ToolChains/CloudABI.cpp
index 9ee46ac857f0..501e3a382ec1 100644
--- a/clang/lib/Driver/ToolChains/CloudABI.cpp
+++ b/clang/lib/Driver/ToolChains/CloudABI.cpp
@@ -47,7 +47,7 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("--no-dynamic-linker");
// Provide PIE linker flags in case PIE is default for the architecture.
- if (ToolChain.isPIEDefault()) {
+ if (ToolChain.isPIEDefault(Args)) {
CmdArgs.push_back("-pie");
CmdArgs.push_back("-zrelro");
}
@@ -125,7 +125,7 @@ Tool *CloudABI::buildLinker() const {
return new tools::cloudabi::Linker(*this);
}
-bool CloudABI::isPIEDefault() const {
+bool CloudABI::isPIEDefault(const llvm::opt::ArgList &Args) const {
// Only enable PIE on architectures that support PC-relative
// addressing. PC-relative addressing is required, as the process
// startup code must be able to relocate itself.
diff --git a/clang/lib/Driver/ToolChains/CloudABI.h b/clang/lib/Driver/ToolChains/CloudABI.h
index 98bf23127706..8856fe3dde6d 100644
--- a/clang/lib/Driver/ToolChains/CloudABI.h
+++ b/clang/lib/Driver/ToolChains/CloudABI.h
@@ -55,7 +55,7 @@ public:
void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
SanitizerMask getSupportedSanitizers() const override;
SanitizerMask getDefaultSanitizers() const override;
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 83cab3ac00cb..630baf9d6ae6 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -34,6 +34,7 @@
#include "clang/Driver/Util.h"
#include "clang/Driver/XRayArgs.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
@@ -266,6 +267,15 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
// Pass -z prefix for gcc linker compatibility.
A.claim();
A.render(Args, CmdArgs);
+ } else if (A.getOption().matches(options::OPT_b)) {
+ const llvm::Triple &T = TC.getTriple();
+ if (!T.isOSAIX()) {
+ TC.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
+ << A.getSpelling() << T.str();
+ }
+ // Pass -b prefix for AIX linker.
+ A.claim();
+ A.render(Args, CmdArgs);
} else {
A.renderAsInput(Args, CmdArgs);
}
@@ -346,8 +356,8 @@ static StringRef getWebAssemblyTargetCPU(const ArgList &Args) {
return "generic";
}
-std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
- bool FromAs) {
+std::string tools::getCPUName(const Driver &D, const ArgList &Args,
+ const llvm::Triple &T, bool FromAs) {
Arg *A;
switch (T.getArch()) {
@@ -403,14 +413,9 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
if (!TargetCPUName.empty())
return TargetCPUName;
- if (T.isOSAIX()) {
- unsigned major, minor, unused_micro;
- T.getOSVersion(major, minor, unused_micro);
- // The minimal arch level moved from pwr4 for AIX7.1 to
- // pwr7 for AIX7.2.
- TargetCPUName =
- (major < 7 || (major == 7 && minor < 2)) ? "pwr4" : "pwr7";
- } else if (T.getArch() == llvm::Triple::ppc64le)
+ if (T.isOSAIX())
+ TargetCPUName = "pwr7";
+ else if (T.getArch() == llvm::Triple::ppc64le)
TargetCPUName = "ppc64le";
else if (T.getArch() == llvm::Triple::ppc64)
TargetCPUName = "ppc64";
@@ -438,7 +443,7 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
case llvm::Triple::x86:
case llvm::Triple::x86_64:
- return x86::getX86TargetCPU(Args, T);
+ return x86::getX86TargetCPU(D, Args, T);
case llvm::Triple::hexagon:
return "hexagon" +
@@ -506,7 +511,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
// the plugin.
// Handle flags for selecting CPU variants.
- std::string CPU = getCPUName(Args, ToolChain.getTriple());
+ std::string CPU = getCPUName(D, Args, ToolChain.getTriple());
if (!CPU.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
@@ -616,11 +621,6 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
CmdArgs.push_back("-plugin-opt=new-pass-manager");
}
- // Pass an option to enable pseudo probe emission.
- if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
- options::OPT_fno_pseudo_probe_for_profiling, false))
- CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling");
-
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
if (!StatsFile.empty())
@@ -657,7 +657,7 @@ void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args,
std::string CandidateRPath = TC.getArchSpecificLibPath();
if (TC.getVFS().exists(CandidateRPath)) {
CmdArgs.push_back("-rpath");
- CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str()));
+ CmdArgs.push_back(Args.MakeArgString(CandidateRPath));
}
}
@@ -775,7 +775,8 @@ void tools::linkSanitizerRuntimeDeps(const ToolChain &TC,
CmdArgs.push_back("-ldl");
// Required for backtrace on some OSes
if (TC.getTriple().isOSFreeBSD() ||
- TC.getTriple().isOSNetBSD())
+ TC.getTriple().isOSNetBSD() ||
+ TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lexecinfo");
}
@@ -786,7 +787,7 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
SmallVectorImpl<StringRef> &NonWholeStaticRuntimes,
SmallVectorImpl<StringRef> &HelperStaticRuntimes,
SmallVectorImpl<StringRef> &RequiredSymbols) {
- const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
+ const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args);
// Collect shared runtimes.
if (SanArgs.needsSharedRt()) {
if (SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
@@ -922,7 +923,7 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
NonWholeStaticRuntimes, HelperStaticRuntimes,
RequiredSymbols);
- const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
+ const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args);
// Inject libfuzzer dependencies.
if (SanArgs.needsFuzzer() && SanArgs.linkRuntimes() &&
!Args.hasArg(options::OPT_shared)) {
@@ -1115,7 +1116,7 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
const llvm::Triple &EffectiveTriple = ToolChain.getEffectiveTriple();
const llvm::Triple &Triple = ToolChain.getTriple();
- bool PIE = ToolChain.isPIEDefault();
+ bool PIE = ToolChain.isPIEDefault(Args);
bool PIC = PIE || ToolChain.isPICDefault();
// The Darwin/MachO default to use PIC does not apply when using -static.
if (Triple.isOSBinFormatMachO() && Args.hasArg(options::OPT_static))
@@ -1587,6 +1588,292 @@ void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
}
}
+/// SDLSearch: Search for Static Device Library
+/// The search for SDL bitcode files is consistent with how static host
+/// libraries are discovered. That is, the -l option triggers a search for
+/// files in a set of directories called the LINKPATH. The host library search
+/// procedure looks for a specific filename in the LINKPATH. The filename for
+/// a host library is lib<libname>.a or lib<libname>.so. For SDLs, there is an
+/// ordered-set of filenames that are searched. We call this ordered-set of
+/// filenames as SEARCH-ORDER. Since an SDL can either be device-type specific,
+/// architecture specific, or generic across all architectures, a naming
+/// convention and search order is used where the file name embeds the
+/// architecture name <arch-name> (nvptx or amdgcn) and the GPU device type
+/// <device-name> such as sm_30 and gfx906. <device-name> is absent in case of
+/// device-independent SDLs. To reduce congestion in host library directories,
+/// the search first looks for files in the “libdevice” subdirectory. SDLs that
+/// are bc files begin with the prefix “lib”.
+///
+/// Machine-code SDLs can also be managed as an archive (*.a file). The
+/// convention has been to use the prefix “lib”. To avoid confusion with host
+/// archive libraries, we use prefix "libbc-" for the bitcode SDL archives.
+///
+bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ SmallVector<std::string, 8> LibraryPaths, std::string Lib,
+ StringRef Arch, StringRef Target, bool isBitCodeSDL,
+ bool postClangLink) {
+ SmallVector<std::string, 12> SDLs;
+
+ std::string LibDeviceLoc = "/libdevice";
+ std::string LibBcPrefix = "/libbc-";
+ std::string LibPrefix = "/lib";
+
+ if (isBitCodeSDL) {
+ // SEARCH-ORDER for Bitcode SDLs:
+ // libdevice/libbc-<libname>-<arch-name>-<device-type>.a
+ // libbc-<libname>-<arch-name>-<device-type>.a
+ // libdevice/libbc-<libname>-<arch-name>.a
+ // libbc-<libname>-<arch-name>.a
+ // libdevice/libbc-<libname>.a
+ // libbc-<libname>.a
+ // libdevice/lib<libname>-<arch-name>-<device-type>.bc
+ // lib<libname>-<arch-name>-<device-type>.bc
+ // libdevice/lib<libname>-<arch-name>.bc
+ // lib<libname>-<arch-name>.bc
+ // libdevice/lib<libname>.bc
+ // lib<libname>.bc
+
+ for (StringRef Base : {LibBcPrefix, LibPrefix}) {
+ const auto *Ext = Base.contains(LibBcPrefix) ? ".a" : ".bc";
+
+ for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
+ Twine(Lib + "-" + Arch).str(), Twine(Lib).str()}) {
+ SDLs.push_back(Twine(LibDeviceLoc + Base + Suffix + Ext).str());
+ SDLs.push_back(Twine(Base + Suffix + Ext).str());
+ }
+ }
+ } else {
+ // SEARCH-ORDER for Machine-code SDLs:
+ // libdevice/lib<libname>-<arch-name>-<device-type>.a
+ // lib<libname>-<arch-name>-<device-type>.a
+ // libdevice/lib<libname>-<arch-name>.a
+ // lib<libname>-<arch-name>.a
+
+ const auto *Ext = ".a";
+
+ for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
+ Twine(Lib + "-" + Arch).str()}) {
+ SDLs.push_back(Twine(LibDeviceLoc + LibPrefix + Suffix + Ext).str());
+ SDLs.push_back(Twine(LibPrefix + Suffix + Ext).str());
+ }
+ }
+
+ // The CUDA toolchain does not use a global device llvm-link before the LLVM
+ // backend generates ptx. So currently, the use of bitcode SDL for nvptx is
+ // only possible with post-clang-cc1 linking. Clang cc1 has a feature that
+ // will link libraries after clang compilation while the LLVM IR is still in
+ // memory. This utilizes a clang cc1 option called “-mlink-builtin-bitcode”.
+ // This is a clang -cc1 option that is generated by the clang driver. The
+ // option value must a full path to an existing file.
+ bool FoundSDL = false;
+ for (auto LPath : LibraryPaths) {
+ for (auto SDL : SDLs) {
+ auto FullName = Twine(LPath + SDL).str();
+ if (llvm::sys::fs::exists(FullName)) {
+ if (postClangLink)
+ CC1Args.push_back("-mlink-builtin-bitcode");
+ CC1Args.push_back(DriverArgs.MakeArgString(FullName));
+ FoundSDL = true;
+ break;
+ }
+ }
+ if (FoundSDL)
+ break;
+ }
+ return FoundSDL;
+}
+
+/// Search if a user provided archive file lib<libname>.a exists in any of
+/// the library paths. If so, add a new command to clang-offload-bundler to
+/// unbundle this archive and create a temporary device specific archive. Name
+/// of this SDL is passed to the llvm-link (for amdgcn) or to the
+/// clang-nvlink-wrapper (for nvptx) commands by the driver.
+bool tools::GetSDLFromOffloadArchive(
+ Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
+ const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
+ StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
+ bool postClangLink) {
+
+ // We don't support bitcode archive bundles for nvptx
+ if (isBitCodeSDL && Arch.contains("nvptx"))
+ return false;
+
+ bool FoundAOB = false;
+ SmallVector<std::string, 2> AOBFileNames;
+ std::string ArchiveOfBundles;
+ for (auto LPath : LibraryPaths) {
+ ArchiveOfBundles.clear();
+
+ AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
+ AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());
+
+ for (auto AOB : AOBFileNames) {
+ if (llvm::sys::fs::exists(AOB)) {
+ ArchiveOfBundles = AOB;
+ FoundAOB = true;
+ break;
+ }
+ }
+
+ if (!FoundAOB)
+ continue;
+
+ StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
+ std::string OutputLib = D.GetTemporaryPath(
+ Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");
+
+ C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str()));
+
+ ArgStringList CmdArgs;
+ SmallString<128> DeviceTriple;
+ DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
+ DeviceTriple += '-';
+ std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
+ DeviceTriple += NormalizedTriple;
+ if (!Target.empty()) {
+ DeviceTriple += '-';
+ DeviceTriple += Target;
+ }
+
+ std::string UnbundleArg("-unbundle");
+ std::string TypeArg("-type=a");
+ std::string InputArg("-inputs=" + ArchiveOfBundles);
+ std::string OffloadArg("-targets=" + std::string(DeviceTriple));
+ std::string OutputArg("-outputs=" + OutputLib);
+
+ const char *UBProgram = DriverArgs.MakeArgString(
+ T.getToolChain().GetProgramPath("clang-offload-bundler"));
+
+ ArgStringList UBArgs;
+ UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str()));
+ UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str()));
+ UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str()));
+ UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str()));
+ UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str()));
+
+ // Add this flag to not exit from clang-offload-bundler if no compatible
+ // code object is found in heterogenous archive library.
+ std::string AdditionalArgs("-allow-missing-bundles");
+ UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str()));
+
+ C.addCommand(std::make_unique<Command>(
+ JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
+ InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str()))));
+ if (postClangLink)
+ CC1Args.push_back("-mlink-builtin-bitcode");
+
+ CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
+ break;
+ }
+
+ return FoundAOB;
+}
+
+// Wrapper function used by driver for adding SDLs during link phase.
+void tools::AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
+ const JobAction &JA,
+ const InputInfoList &Inputs,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ StringRef Arch, StringRef Target,
+ bool isBitCodeSDL, bool postClangLink) {
+ AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
+ Arch, Target, isBitCodeSDL, postClangLink);
+}
+
+// Wrapper function used for post clang linking of bitcode SDLS for nvptx by
+// the CUDA toolchain.
+void tools::AddStaticDeviceLibsPostLinking(const Driver &D,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ StringRef Arch, StringRef Target,
+ bool isBitCodeSDL, bool postClangLink) {
+ AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
+ CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
+}
+
+// User defined Static Device Libraries(SDLs) can be passed to clang for
+// offloading GPU compilers. Like static host libraries, the use of a SDL is
+// specified with the -l command line option. The primary difference between
+// host and SDLs is the filenames for SDLs (refer SEARCH-ORDER for Bitcode SDLs
+// and SEARCH-ORDER for Machine-code SDLs for the naming convention).
+// SDLs are of following types:
+//
+// * Bitcode SDLs: They can either be a *.bc file or an archive of *.bc files.
+// For NVPTX, these libraries are post-clang linked following each
+// compilation. For AMDGPU, these libraries are linked one time
+// during the application link phase.
+//
+// * Machine-code SDLs: They are archive files. For NVPTX, the archive members
+// contain cubin for Nvidia GPUs and are linked one time during the
+// link phase by the CUDA SDK linker called nvlink. For AMDGPU, the
+// process for machine code SDLs is still in development. But they
+// will be linked by the LLVM tool lld.
+//
+// * Bundled objects that contain both host and device codes: Bundled objects
+// may also contain library code compiled from source. For NVPTX, the
+// bundle contains cubin. For AMDGPU, the bundle contains bitcode.
+//
+// For Bitcode and Machine-code SDLs, current compiler toolchains hardcode the
+// inclusion of specific SDLs such as math libraries and the OpenMP device
+// library libomptarget.
+void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
+ const JobAction *JA,
+ const InputInfoList *Inputs, const Driver &D,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ StringRef Arch, StringRef Target,
+ bool isBitCodeSDL, bool postClangLink) {
+
+ SmallVector<std::string, 8> LibraryPaths;
+ // Add search directories from LIBRARY_PATH env variable
+ llvm::Optional<std::string> LibPath =
+ llvm::sys::Process::GetEnv("LIBRARY_PATH");
+ if (LibPath) {
+ SmallVector<StringRef, 8> Frags;
+ const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
+ llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
+ for (StringRef Path : Frags)
+ LibraryPaths.emplace_back(Path.trim());
+ }
+
+ // Add directories from user-specified -L options
+ for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
+ LibraryPaths.emplace_back(Search_Dir);
+
+ // Add path to lib-debug folders
+ SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
+ llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
+ LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
+ // Build list of Static Device Libraries SDLs specified by -l option
+ llvm::SmallSet<std::string, 16> SDLNames;
+ static const StringRef HostOnlyArchives[] = {
+ "omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
+ for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
+ if (!HostOnlyArchives->contains(SDLName)) {
+ SDLNames.insert(SDLName);
+ }
+ }
+
+ // The search stops as soon as an SDL file is found. The driver then provides
+ // the full filename of the SDL to the llvm-link or clang-nvlink-wrapper
+ // command. If no SDL is found after searching each LINKPATH with
+ // SEARCH-ORDER, it is possible that an archive file lib<libname>.a exists
+ // and may contain bundled object files.
+ for (auto SDLName : SDLNames) {
+ // This is the only call to SDLSearch
+ if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
+ isBitCodeSDL, postClangLink)) {
+ GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
+ LibraryPaths, SDLName, Arch, Target,
+ isBitCodeSDL, postClangLink);
+ }
+ }
+}
+
static llvm::opt::Arg *
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
// The last of -mcode-object-v3, -mno-code-object-v3 and
@@ -1684,6 +1971,12 @@ void tools::addOpenMPDeviceRTL(const Driver &D,
StringRef BitcodeSuffix,
const llvm::Triple &Triple) {
SmallVector<StringRef, 8> LibraryPaths;
+
+ // Add path to clang lib / lib64 folder.
+ SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
+ llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
+ LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
// Add user defined library paths from LIBRARY_PATH.
llvm::Optional<std::string> LibPath =
llvm::sys::Process::GetEnv("LIBRARY_PATH");
@@ -1695,32 +1988,31 @@ void tools::addOpenMPDeviceRTL(const Driver &D,
LibraryPaths.emplace_back(Path.trim());
}
- // Add path to lib / lib64 folder.
- SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
- llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
- LibraryPaths.emplace_back(DefaultLibPath.c_str());
-
OptSpecifier LibomptargetBCPathOpt =
Triple.isAMDGCN() ? options::OPT_libomptarget_amdgcn_bc_path_EQ
: options::OPT_libomptarget_nvptx_bc_path_EQ;
StringRef ArchPrefix = Triple.isAMDGCN() ? "amdgcn" : "nvptx";
+ std::string LibOmpTargetName = "libomptarget-" + BitcodeSuffix.str() + ".bc";
+
// First check whether user specifies bc library
if (const Arg *A = DriverArgs.getLastArg(LibomptargetBCPathOpt)) {
- std::string LibOmpTargetName(A->getValue());
- if (llvm::sys::fs::exists(LibOmpTargetName)) {
+ SmallString<128> LibOmpTargetFile(A->getValue());
+ if (llvm::sys::fs::exists(LibOmpTargetFile) &&
+ llvm::sys::fs::is_directory(LibOmpTargetFile)) {
+ llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
+ }
+
+ if (llvm::sys::fs::exists(LibOmpTargetFile)) {
CC1Args.push_back("-mlink-builtin-bitcode");
- CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetName));
+ CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
} else {
D.Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
- << LibOmpTargetName;
+ << LibOmpTargetFile;
}
} else {
bool FoundBCLibrary = false;
- std::string LibOmpTargetName =
- "libomptarget-" + BitcodeSuffix.str() + ".bc";
-
for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibOmpTargetFile(LibraryPath);
llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index c94c15864661..00291a3681c8 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -49,6 +49,39 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
llvm::opt::ArgStringList &CmdArgs,
const llvm::opt::ArgList &Args);
+void AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
+ const JobAction &JA,
+ const InputInfoList &Inputs,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CmdArgs,
+ StringRef Arch, StringRef Target,
+ bool isBitCodeSDL, bool postClangLink);
+void AddStaticDeviceLibsPostLinking(const Driver &D,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CmdArgs,
+ StringRef Arch, StringRef Target,
+ bool isBitCodeSDL, bool postClangLink);
+void AddStaticDeviceLibs(Compilation *C, const Tool *T, const JobAction *JA,
+ const InputInfoList *Inputs, const Driver &D,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
+ StringRef Target, bool isBitCodeSDL,
+ bool postClangLink);
+
+bool SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CmdArgs,
+ SmallVector<std::string, 8> LibraryPaths, std::string Lib,
+ StringRef Arch, StringRef Target, bool isBitCodeSDL,
+ bool postClangLink);
+
+bool GetSDLFromOffloadArchive(Compilation &C, const Driver &D, const Tool &T,
+ const JobAction &JA, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args,
+ SmallVector<std::string, 8> LibraryPaths,
+ StringRef Lib, StringRef Arch, StringRef Target,
+ bool isBitCodeSDL, bool postClangLink);
+
const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
const InputInfo &Input, const InputInfo &Output);
@@ -107,8 +140,8 @@ void AddTargetFeature(const llvm::opt::ArgList &Args,
llvm::opt::OptSpecifier OnOpt,
llvm::opt::OptSpecifier OffOpt, StringRef FeatureName);
-std::string getCPUName(const llvm::opt::ArgList &Args, const llvm::Triple &T,
- bool FromAs = false);
+std::string getCPUName(const Driver &D, const llvm::opt::ArgList &Args,
+ const llvm::Triple &T, bool FromAs = false);
/// Iterate \p Args and convert -mxxx to +xxx and -mno-xxx to -xxx and
/// append it to \p Features.
diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp
index 07abf4f83f7d..2b043fbeecda 100644
--- a/clang/lib/Driver/ToolChains/CrossWindows.cpp
+++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp
@@ -185,7 +185,7 @@ void tools::CrossWindows::Linker::ConstructJob(
}
}
- if (TC.getSanitizerArgs().needsAsanRt()) {
+ if (TC.getSanitizerArgs(Args).needsAsanRt()) {
// TODO handle /MT[d] /MD[d]
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
@@ -223,7 +223,7 @@ bool CrossWindowsToolChain::isPICDefault() const {
return getArch() == llvm::Triple::x86_64;
}
-bool CrossWindowsToolChain::isPIEDefault() const {
+bool CrossWindowsToolChain::isPIEDefault(const llvm::opt::ArgList &Args) const {
return getArch() == llvm::Triple::x86_64;
}
diff --git a/clang/lib/Driver/ToolChains/CrossWindows.h b/clang/lib/Driver/ToolChains/CrossWindows.h
index ffe75332c2e8..bab690ea34d0 100644
--- a/clang/lib/Driver/ToolChains/CrossWindows.h
+++ b/clang/lib/Driver/ToolChains/CrossWindows.h
@@ -57,7 +57,7 @@ public:
bool IsIntegratedAssemblerDefault() const override { return true; }
bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
LangOptions::StackProtectorMode
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 769eae14df51..5397c7a9a0e6 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -17,6 +17,7 @@
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
@@ -34,25 +35,6 @@ using namespace clang;
using namespace llvm::opt;
namespace {
-struct CudaVersionInfo {
- std::string DetectedVersion;
- CudaVersion Version;
-};
-// Parses the contents of version.txt in an CUDA installation. It should
-// contain one line of the from e.g. "CUDA Version 7.5.2".
-CudaVersionInfo parseCudaVersionFile(llvm::StringRef V) {
- V = V.trim();
- if (!V.startswith("CUDA Version "))
- return {V.str(), CudaVersion::UNKNOWN};
- V = V.substr(strlen("CUDA Version "));
- SmallVector<StringRef,4> VersionParts;
- V.split(VersionParts, '.');
- return {"version.txt: " + V.str() + ".",
- VersionParts.size() < 2
- ? CudaVersion::UNKNOWN
- : CudaStringToVersion(
- join_items(".", VersionParts[0], VersionParts[1]))};
-}
CudaVersion getCudaVersion(uint32_t raw_version) {
if (raw_version < 7050)
@@ -77,10 +59,18 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_110;
if (raw_version < 11020)
return CudaVersion::CUDA_111;
- return CudaVersion::LATEST;
+ if (raw_version < 11030)
+ return CudaVersion::CUDA_112;
+ if (raw_version < 11040)
+ return CudaVersion::CUDA_113;
+ if (raw_version < 11050)
+ return CudaVersion::CUDA_114;
+ if (raw_version < 11060)
+ return CudaVersion::CUDA_115;
+ return CudaVersion::NEW;
}
-CudaVersionInfo parseCudaHFile(llvm::StringRef Input) {
+CudaVersion parseCudaHFile(llvm::StringRef Input) {
// Helper lambda which skips the words if the line starts with them or returns
// None otherwise.
auto StartsWithWords =
@@ -100,21 +90,27 @@ CudaVersionInfo parseCudaHFile(llvm::StringRef Input) {
StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
uint32_t RawVersion;
Line->consumeInteger(10, RawVersion);
- return {"cuda.h: CUDA_VERSION=" + Twine(RawVersion).str() + ".",
- getCudaVersion(RawVersion)};
+ return getCudaVersion(RawVersion);
}
// Find next non-empty line.
Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
}
- return {"cuda.h: CUDA_VERSION not found.", CudaVersion::UNKNOWN};
+ return CudaVersion::UNKNOWN;
}
} // namespace
void CudaInstallationDetector::WarnIfUnsupportedVersion() {
- if (DetectedVersionIsNotSupported)
- D.Diag(diag::warn_drv_unknown_cuda_version)
- << DetectedVersion
- << CudaVersionToString(CudaVersion::LATEST_SUPPORTED);
+ if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
+ std::string VersionString = CudaVersionToString(Version);
+ if (!VersionString.empty())
+ VersionString.insert(0, " ");
+ D.Diag(diag::warn_drv_new_cuda_version)
+ << VersionString
+ << (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
+ << CudaVersionToString(CudaVersion::PARTIALLY_SUPPORTED);
+ } else if (Version > CudaVersion::FULLY_SUPPORTED)
+ D.Diag(diag::warn_drv_partially_supported_cuda_version)
+ << CudaVersionToString(Version);
}
CudaInstallationDetector::CudaInstallationDetector(
@@ -206,31 +202,17 @@ CudaInstallationDetector::CudaInstallationDetector(
else
continue;
- CudaVersionInfo VersionInfo = {"", CudaVersion::UNKNOWN};
- if (auto VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"))
- VersionInfo = parseCudaVersionFile((*VersionFile)->getBuffer());
- // If version file didn't give us the version, try to find it in cuda.h
- if (VersionInfo.Version == CudaVersion::UNKNOWN)
- if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
- VersionInfo = parseCudaHFile((*CudaHFile)->getBuffer());
- // As the last resort, make an educated guess between CUDA-7.0, (which had
- // no version.txt file and had old-style libdevice bitcode ) and an unknown
- // recent CUDA version (no version.txt, new style bitcode).
- if (VersionInfo.Version == CudaVersion::UNKNOWN) {
- VersionInfo.Version = (FS.exists(LibDevicePath + "/libdevice.10.bc"))
- ? Version = CudaVersion::LATEST
- : Version = CudaVersion::CUDA_70;
- VersionInfo.DetectedVersion =
- "No version found in version.txt or cuda.h.";
+ Version = CudaVersion::UNKNOWN;
+ if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
+ Version = parseCudaHFile((*CudaHFile)->getBuffer());
+ // As the last resort, make an educated guess between CUDA-7.0, which had
+ // old-style libdevice bitcode, and an unknown recent CUDA version.
+ if (Version == CudaVersion::UNKNOWN) {
+ Version = FS.exists(LibDevicePath + "/libdevice.10.bc")
+ ? CudaVersion::NEW
+ : CudaVersion::CUDA_70;
}
- Version = VersionInfo.Version;
- DetectedVersion = VersionInfo.DetectedVersion;
-
- // TODO(tra): remove the warning once we have all features of 10.2
- // and 11.0 implemented.
- DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
-
if (Version >= CudaVersion::CUDA_90) {
// CUDA-9+ uses single libdevice file for all GPU variants.
std::string FilePath = LibDevicePath + "/libdevice.10.bc";
@@ -319,8 +301,6 @@ void CudaInstallationDetector::AddCudaIncludeArgs(
return;
}
- CC1Args.push_back("-internal-isystem");
- CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
CC1Args.push_back("-include");
CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
}
@@ -632,8 +612,16 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(CubinF);
}
+ AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
+ false, false);
+
+ // Find nvlink and pass it as "--nvlink-path=" argument of
+ // clang-nvlink-wrapper.
+ CmdArgs.push_back(Args.MakeArgString(
+ Twine("--nvlink-path=" + getToolChain().GetProgramPath("nvlink"))));
+
const char *Exec =
- Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
+ Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
C.addCommand(std::make_unique<Command>(
JA, *this,
ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
@@ -686,7 +674,8 @@ void CudaToolChain::addClangTargetOptions(
"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
if (DeviceOffloadingKind == Action::OFK_Cuda) {
- CC1Args.push_back("-fcuda-is-device");
+ CC1Args.append(
+ {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
options::OPT_fno_cuda_approx_transcendentals, false))
@@ -720,6 +709,9 @@ void CudaToolChain::addClangTargetOptions(
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
+ CASE_CUDA_VERSION(115, 75);
+ CASE_CUDA_VERSION(114, 74);
+ CASE_CUDA_VERSION(113, 73);
CASE_CUDA_VERSION(112, 72);
CASE_CUDA_VERSION(111, 71);
CASE_CUDA_VERSION(110, 70);
@@ -760,6 +752,8 @@ void CudaToolChain::addClangTargetOptions(
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
getTriple());
+ AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
+ /* bitcode SDL?*/ true, /* PostClang Link? */ true);
}
}
@@ -831,17 +825,9 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
// flags are not duplicated.
// Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
- for (Arg *A : Args) {
- bool IsDuplicate = false;
- for (Arg *DALArg : *DAL) {
- if (A == DALArg) {
- IsDuplicate = true;
- break;
- }
- }
- if (!IsDuplicate)
+ for (Arg *A : Args)
+ if (!llvm::is_contained(*DAL, A))
DAL->append(A);
- }
StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
if (Arch.empty())
@@ -884,6 +870,11 @@ CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+
+ if (!DriverArgs.hasArg(options::OPT_nogpuinc) && CudaInstallation.isValid())
+ CC1Args.append(
+ {"-internal-isystem",
+ DriverArgs.MakeArgString(CudaInstallation.getIncludePath())});
}
void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 6ae4415a563a..a7e6e84f4902 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -30,8 +30,6 @@ private:
const Driver &D;
bool IsValid = false;
CudaVersion Version = CudaVersion::UNKNOWN;
- std::string DetectedVersion;
- bool DetectedVersionIsNotSupported = false;
std::string InstallPath;
std::string BinPath;
std::string LibPath;
@@ -62,7 +60,10 @@ public:
void print(raw_ostream &OS) const;
/// Get the detected Cuda install's version.
- CudaVersion version() const { return Version; }
+ CudaVersion version() const {
+ return Version == CudaVersion::NEW ? CudaVersion::PARTIALLY_SUPPORTED
+ : Version;
+ }
/// Get the detected Cuda installation path.
StringRef getInstallPath() const { return InstallPath; }
/// Get the detected path to Cuda's bin directory.
@@ -156,7 +157,9 @@ public:
bool useIntegratedAs() const override { return false; }
bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }
- bool isPIEDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return false;
+ }
bool isPICDefaultForced() const override { return false; }
bool SupportsProfiling() const override { return false; }
bool supportsDebugInfoOption(const llvm::opt::Arg *A) const override;
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 261f522f6c49..06d3edc70e45 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -34,7 +34,7 @@ using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
-static const VersionTuple minimumMacCatalystDeploymentTarget() {
+static VersionTuple minimumMacCatalystDeploymentTarget() {
return VersionTuple(13, 1);
}
@@ -94,6 +94,8 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
+ const llvm::Triple &T(getToolChain().getTriple());
+
ArgStringList CmdArgs;
assert(Inputs.size() == 1 && "Unexpected number of inputs.");
@@ -112,7 +114,6 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
// FIXME: at run-time detect assembler capabilities or rely on version
// information forwarded by -target-assembler-version.
if (Args.hasArg(options::OPT_fno_integrated_as)) {
- const llvm::Triple &T(getToolChain().getTriple());
if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7)))
CmdArgs.push_back("-Q");
}
@@ -130,8 +131,7 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
AddMachOArch(Args, CmdArgs);
// Use -force_cpusubtype_ALL on x86 by default.
- if (getToolChain().getTriple().isX86() ||
- Args.hasArg(options::OPT_force__cpusubtype__ALL))
+ if (T.isX86() || Args.hasArg(options::OPT_force__cpusubtype__ALL))
CmdArgs.push_back("-force_cpusubtype_ALL");
if (getToolChain().getArch() != llvm::Triple::x86_64 &&
@@ -729,6 +729,54 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
C.addCommand(std::move(Cmd));
}
+void darwin::StaticLibTool::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+ const Driver &D = getToolChain().getDriver();
+
+ // Silence warning for "clang -g foo.o -o foo"
+ Args.ClaimAllArgs(options::OPT_g_Group);
+ // and "clang -emit-llvm foo.o -o foo"
+ Args.ClaimAllArgs(options::OPT_emit_llvm);
+ // and for "clang -w foo.o -o foo". Other warning options are already
+ // handled somewhere else.
+ Args.ClaimAllArgs(options::OPT_w);
+ // Silence warnings when linking C code with a C++ '-stdlib' argument.
+ Args.ClaimAllArgs(options::OPT_stdlib_EQ);
+
+ // libtool <options> <output_file> <input_files>
+ ArgStringList CmdArgs;
+ // Create and insert file members with a deterministic index.
+ CmdArgs.push_back("-static");
+ CmdArgs.push_back("-D");
+ CmdArgs.push_back("-no_warning_for_no_symbols");
+ CmdArgs.push_back("-o");
+ CmdArgs.push_back(Output.getFilename());
+
+ for (const auto &II : Inputs) {
+ if (II.isFilename()) {
+ CmdArgs.push_back(II.getFilename());
+ }
+ }
+
+ // Delete old output archive file if it already exists before generating a new
+ // archive file.
+ const auto *OutputFileName = Output.getFilename();
+ if (Output.isFilename() && llvm::sys::fs::exists(OutputFileName)) {
+ if (std::error_code EC = llvm::sys::fs::remove(OutputFileName)) {
+ D.Diag(diag::err_drv_unable_to_remove_file) << EC.message();
+ return;
+ }
+ }
+
+ const char *Exec = Args.MakeArgString(getToolChain().GetStaticLibToolPath());
+ C.addCommand(std::make_unique<Command>(JA, *this,
+ ResponseFileSupport::AtFileUTF8(),
+ Exec, CmdArgs, Inputs, Output));
+}
+
void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
@@ -982,6 +1030,10 @@ Tool *MachO::getTool(Action::ActionClass AC) const {
Tool *MachO::buildLinker() const { return new tools::darwin::Linker(*this); }
+Tool *MachO::buildStaticLibTool() const {
+ return new tools::darwin::StaticLibTool(*this);
+}
+
Tool *MachO::buildAssembler() const {
return new tools::darwin::Assembler(*this);
}
@@ -1305,7 +1357,7 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
return;
}
- const SanitizerArgs &Sanitize = getSanitizerArgs();
+ const SanitizerArgs &Sanitize = getSanitizerArgs(Args);
if (Sanitize.needsAsanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
if (Sanitize.needsLsanRt())
@@ -1379,6 +1431,8 @@ struct DarwinPlatform {
enum SourceKind {
/// The OS was specified using the -target argument.
TargetArg,
+ /// The OS was specified using the -mtargetos= argument.
+ MTargetOSArg,
/// The OS was specified using the -m<os>-version-min argument.
OSVersionArg,
/// The OS was specified using the OS_DEPLOYMENT_TARGET environment.
@@ -1430,7 +1484,8 @@ struct DarwinPlatform {
void addOSVersionMinArgument(DerivedArgList &Args, const OptTable &Opts) {
if (Argument)
return;
- assert(Kind != TargetArg && Kind != OSVersionArg && "Invalid kind");
+ assert(Kind != TargetArg && Kind != MTargetOSArg && Kind != OSVersionArg &&
+ "Invalid kind");
options::ID Opt;
switch (Platform) {
case DarwinPlatformKind::MacOS:
@@ -1455,6 +1510,7 @@ struct DarwinPlatform {
std::string getAsString(DerivedArgList &Args, const OptTable &Opts) {
switch (Kind) {
case TargetArg:
+ case MTargetOSArg:
case OSVersionArg:
case InferredFromSDK:
case InferredFromArch:
@@ -1466,40 +1522,54 @@ struct DarwinPlatform {
llvm_unreachable("Unsupported Darwin Source Kind");
}
- static DarwinPlatform
- createFromTarget(const llvm::Triple &TT, StringRef OSVersion, Arg *A,
- const Optional<DarwinSDKInfo> &SDKInfo) {
- DarwinPlatform Result(TargetArg, getPlatformFromOS(TT.getOS()), OSVersion,
- A);
- unsigned Major, Minor, Micro;
- TT.getOSVersion(Major, Minor, Micro);
- if (Major == 0)
- Result.HasOSVersion = false;
-
- switch (TT.getEnvironment()) {
+ void setEnvironment(llvm::Triple::EnvironmentType EnvType,
+ const VersionTuple &OSVersion,
+ const Optional<DarwinSDKInfo> &SDKInfo) {
+ switch (EnvType) {
case llvm::Triple::Simulator:
- Result.Environment = DarwinEnvironmentKind::Simulator;
+ Environment = DarwinEnvironmentKind::Simulator;
break;
case llvm::Triple::MacABI: {
+ Environment = DarwinEnvironmentKind::MacCatalyst;
// The minimum native macOS target for MacCatalyst is macOS 10.15.
- auto NativeTargetVersion = VersionTuple(10, 15);
- if (Result.HasOSVersion && SDKInfo) {
+ NativeTargetVersion = VersionTuple(10, 15);
+ if (HasOSVersion && SDKInfo) {
if (const auto *MacCatalystToMacOSMapping = SDKInfo->getVersionMapping(
DarwinSDKInfo::OSEnvPair::macCatalystToMacOSPair())) {
if (auto MacOSVersion = MacCatalystToMacOSMapping->map(
- VersionTuple(Major, Minor, Micro), NativeTargetVersion,
- None)) {
+ OSVersion, NativeTargetVersion, None)) {
NativeTargetVersion = *MacOSVersion;
}
}
}
- Result.Environment = DarwinEnvironmentKind::MacCatalyst;
- Result.NativeTargetVersion = NativeTargetVersion;
break;
}
default:
break;
}
+ }
+
+ static DarwinPlatform
+ createFromTarget(const llvm::Triple &TT, StringRef OSVersion, Arg *A,
+ const Optional<DarwinSDKInfo> &SDKInfo) {
+ DarwinPlatform Result(TargetArg, getPlatformFromOS(TT.getOS()), OSVersion,
+ A);
+ unsigned Major, Minor, Micro;
+ TT.getOSVersion(Major, Minor, Micro);
+ if (Major == 0)
+ Result.HasOSVersion = false;
+ Result.setEnvironment(TT.getEnvironment(),
+ VersionTuple(Major, Minor, Micro), SDKInfo);
+ return Result;
+ }
+ static DarwinPlatform
+ createFromMTargetOS(llvm::Triple::OSType OS, VersionTuple OSVersion,
+ llvm::Triple::EnvironmentType Environment, Arg *A,
+ const Optional<DarwinSDKInfo> &SDKInfo) {
+ DarwinPlatform Result(MTargetOSArg, getPlatformFromOS(OS),
+ OSVersion.getAsString(), A);
+ Result.InferSimulatorFromArch = false;
+ Result.setEnvironment(Environment, OSVersion, SDKInfo);
return Result;
}
static DarwinPlatform createOSVersionArg(DarwinPlatformKind Platform,
@@ -1750,7 +1820,12 @@ std::string getOSVersion(llvm::Triple::OSType OS, const llvm::Triple &Triple,
<< Triple.getOSName();
break;
case llvm::Triple::IOS:
- Triple.getiOSVersion(Major, Minor, Micro);
+ if (Triple.isMacCatalystEnvironment() && !Triple.getOSMajorVersion()) {
+ Major = 13;
+ Minor = 1;
+ Micro = 0;
+ } else
+ Triple.getiOSVersion(Major, Minor, Micro);
break;
case llvm::Triple::TvOS:
Triple.getOSVersion(Major, Minor, Micro);
@@ -1813,6 +1888,39 @@ Optional<DarwinPlatform> getDeploymentTargetFromTargetArg(
Triple, OSVersion, Args.getLastArg(options::OPT_target), SDKInfo);
}
+/// Returns the deployment target that's specified using the -mtargetos option.
+Optional<DarwinPlatform>
+getDeploymentTargetFromMTargetOSArg(DerivedArgList &Args,
+ const Driver &TheDriver,
+ const Optional<DarwinSDKInfo> &SDKInfo) {
+ auto *A = Args.getLastArg(options::OPT_mtargetos_EQ);
+ if (!A)
+ return None;
+ llvm::Triple TT(llvm::Twine("unknown-apple-") + A->getValue());
+ switch (TT.getOS()) {
+ case llvm::Triple::MacOSX:
+ case llvm::Triple::IOS:
+ case llvm::Triple::TvOS:
+ case llvm::Triple::WatchOS:
+ break;
+ default:
+ TheDriver.Diag(diag::err_drv_invalid_os_in_arg)
+ << TT.getOSName() << A->getAsString(Args);
+ return None;
+ }
+
+ unsigned Major, Minor, Micro;
+ TT.getOSVersion(Major, Minor, Micro);
+ if (!Major) {
+ TheDriver.Diag(diag::err_drv_invalid_version_number)
+ << A->getAsString(Args);
+ return None;
+ }
+ return DarwinPlatform::createFromMTargetOS(TT.getOS(),
+ VersionTuple(Major, Minor, Micro),
+ TT.getEnvironment(), A, SDKInfo);
+}
+
Optional<DarwinSDKInfo> parseSDKSettings(llvm::vfs::FileSystem &VFS,
const ArgList &Args,
const Driver &TheDriver) {
@@ -1861,6 +1969,13 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
Optional<DarwinPlatform> OSTarget =
getDeploymentTargetFromTargetArg(Args, getTriple(), getDriver(), SDKInfo);
if (OSTarget) {
+ // Disallow mixing -target and -mtargetos=.
+ if (const auto *MTargetOSArg = Args.getLastArg(options::OPT_mtargetos_EQ)) {
+ std::string TargetArgStr = OSTarget->getAsString(Args, Opts);
+ std::string MTargetOSArgStr = MTargetOSArg->getAsString(Args);
+ getDriver().Diag(diag::err_drv_cannot_mix_options)
+ << TargetArgStr << MTargetOSArgStr;
+ }
Optional<DarwinPlatform> OSVersionArgTarget =
getDeploymentTargetFromOSVersionArg(Args, getDriver());
if (OSVersionArgTarget) {
@@ -1892,6 +2007,18 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
}
}
}
+ } else if ((OSTarget = getDeploymentTargetFromMTargetOSArg(Args, getDriver(),
+ SDKInfo))) {
+ // The OS target can be specified using the -mtargetos= argument.
+ // Disallow mixing -mtargetos= and -m<os>version-min=.
+ Optional<DarwinPlatform> OSVersionArgTarget =
+ getDeploymentTargetFromOSVersionArg(Args, getDriver());
+ if (OSVersionArgTarget) {
+ std::string MTargetOSArgStr = OSTarget->getAsString(Args, Opts);
+ std::string OSVersionArgStr = OSVersionArgTarget->getAsString(Args, Opts);
+ getDriver().Diag(diag::err_drv_cannot_mix_options)
+ << MTargetOSArgStr << OSVersionArgStr;
+ }
} else {
// The OS target can be specified using the -m<os>version-min argument.
OSTarget = getDeploymentTargetFromOSVersionArg(Args, getDriver());
@@ -2656,7 +2783,7 @@ bool Darwin::SupportsEmbeddedBitcode() const {
bool MachO::isPICDefault() const { return true; }
-bool MachO::isPIEDefault() const { return false; }
+bool MachO::isPIEDefault(const llvm::opt::ArgList &Args) const { return false; }
bool MachO::isPICDefaultForced() const {
return (getArch() == llvm::Triple::x86_64 ||
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index 4de122c8d513..a307cd317ac3 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -78,6 +78,20 @@ public:
const char *LinkingOutput) const override;
};
+class LLVM_LIBRARY_VISIBILITY StaticLibTool : public MachOTool {
+public:
+ StaticLibTool(const ToolChain &TC)
+ : MachOTool("darwin::StaticLibTool", "static-lib-linker", TC) {}
+
+ bool hasIntegratedCPP() const override { return false; }
+ bool isLinkJob() const override { return true; }
+
+ void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
+
class LLVM_LIBRARY_VISIBILITY Lipo : public MachOTool {
public:
Lipo(const ToolChain &TC) : MachOTool("darwin::Lipo", "lipo", TC) {}
@@ -125,6 +139,7 @@ class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain {
protected:
Tool *buildAssembler() const override;
Tool *buildLinker() const override;
+ Tool *buildStaticLibTool() const override;
Tool *getTool(Action::ActionClass AC) const override;
private:
@@ -239,7 +254,7 @@ public:
}
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
bool SupportsProfiling() const override;
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 1bfad6115d51..b82c5d7600df 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -37,8 +37,9 @@ void Flang::AddFortranDialectOptions(const ArgList &Args,
void Flang::AddPreprocessingOptions(const ArgList &Args,
ArgStringList &CmdArgs) const {
- Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I,
- options::OPT_cpp, options::OPT_nocpp});
+ Args.AddAllArgs(CmdArgs,
+ {options::OPT_P, options::OPT_D, options::OPT_U,
+ options::OPT_I, options::OPT_cpp, options::OPT_nocpp});
}
void Flang::AddOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp
index 5dcf74dabf4f..dc05f9893465 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.cpp
+++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp
@@ -99,7 +99,7 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9: {
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
+ std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
CmdArgs.push_back(
sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
@@ -110,7 +110,7 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fdebug_prefix_map_EQ)) {
StringRef Map = A->getValue();
- if (Map.find('=') == StringRef::npos)
+ if (!Map.contains('='))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else {
@@ -145,7 +145,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const llvm::Triple::ArchType Arch = ToolChain.getArch();
const bool IsPIE =
!Args.hasArg(options::OPT_shared) &&
- (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
+ (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault(Args));
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
@@ -467,7 +467,9 @@ bool FreeBSD::HasNativeLLVMSupport() const { return true; }
bool FreeBSD::IsUnwindTablesDefault(const ArgList &Args) const { return true; }
-bool FreeBSD::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); }
+bool FreeBSD::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return getSanitizerArgs(Args).requiresPIE();
+}
SanitizerMask FreeBSD::getSupportedSanitizers() const {
const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64;
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.h b/clang/lib/Driver/ToolChains/FreeBSD.h
index abc0876cef26..2a721c750a64 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.h
+++ b/clang/lib/Driver/ToolChains/FreeBSD.h
@@ -74,7 +74,7 @@ public:
llvm::ExceptionHandling
GetExceptionModel(const llvm::opt::ArgList &Args) const override;
bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
SanitizerMask getSupportedSanitizers() const override;
unsigned GetDefaultDwarfVersion() const override;
// Until dtrace (via CTF) and LLDB can deal with distributed debug info,
diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp
index fd9804a7f353..a7afec6963a1 100644
--- a/clang/lib/Driver/ToolChains/Fuchsia.cpp
+++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp
@@ -60,6 +60,8 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("rodynamic");
CmdArgs.push_back("-z");
CmdArgs.push_back("separate-loadable-segments");
+ CmdArgs.push_back("-z");
+ CmdArgs.push_back("rel");
CmdArgs.push_back("--pack-dyn-relocs=relr");
}
@@ -89,7 +91,7 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA,
else if (Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-shared");
- const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs();
+ const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs(Args);
if (!Args.hasArg(options::OPT_shared)) {
std::string Dyld = D.DyldPrefix;
@@ -247,17 +249,16 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple,
Multilibs.FilterOut([&](const Multilib &M) {
std::vector<std::string> RD = FilePaths(M);
- return std::all_of(RD.begin(), RD.end(), [&](std::string P) {
- return !getVFS().exists(P);
- });
+ return llvm::all_of(RD, [&](std::string P) { return !getVFS().exists(P); });
});
Multilib::flags_list Flags;
addMultilibFlag(
Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, true),
"fexceptions", Flags);
- addMultilibFlag(getSanitizerArgs().needsAsanRt(), "fsanitize=address", Flags);
- addMultilibFlag(getSanitizerArgs().needsHwasanRt(), "fsanitize=hwaddress",
+ addMultilibFlag(getSanitizerArgs(Args).needsAsanRt(), "fsanitize=address",
+ Flags);
+ addMultilibFlag(getSanitizerArgs(Args).needsHwasanRt(), "fsanitize=hwaddress",
Flags);
addMultilibFlag(
@@ -437,13 +438,3 @@ SanitizerMask Fuchsia::getDefaultSanitizers() const {
}
return Res;
}
-
-void Fuchsia::addProfileRTLibs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const {
- // Add linker option -u__llvm_profile_runtime to cause runtime
- // initialization module to be linked in.
- if (needsProfileRT(Args))
- CmdArgs.push_back(Args.MakeArgString(
- Twine("-u", llvm::getInstrProfRuntimeHookVarName())));
- ToolChain::addProfileRTLibs(Args, CmdArgs);
-}
diff --git a/clang/lib/Driver/ToolChains/Fuchsia.h b/clang/lib/Driver/ToolChains/Fuchsia.h
index 07adf9b7101d..c0e69df22821 100644
--- a/clang/lib/Driver/ToolChains/Fuchsia.h
+++ b/clang/lib/Driver/ToolChains/Fuchsia.h
@@ -54,7 +54,9 @@ public:
return true;
}
bool isPICDefault() const override { return false; }
- bool isPIEDefault() const override { return true; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return true;
+ }
bool isPICDefaultForced() const override { return false; }
llvm::DebuggerKind getDefaultDebuggerTuning() const override {
return llvm::DebuggerKind::GDB;
@@ -71,9 +73,6 @@ public:
SanitizerMask getSupportedSanitizers() const override;
SanitizerMask getDefaultSanitizers() const override;
- void addProfileRTLibs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const override;
-
RuntimeLibType
GetRuntimeLibType(const llvm::opt::ArgList &Args) const override;
CXXStdlibType
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index da39f29e4619..7aeadd84dfee 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -312,7 +312,7 @@ static bool getPIE(const ArgList &Args, const ToolChain &TC) {
Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
options::OPT_nopie);
if (!A)
- return TC.isPIEDefault();
+ return TC.isPIEDefault(Args);
return A->getOption().matches(options::OPT_pie);
}
@@ -429,11 +429,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("text");
}
- if (ToolChain.isNoExecStackDefault()) {
- CmdArgs.push_back("-z");
- CmdArgs.push_back("noexecstack");
- }
-
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
@@ -451,7 +446,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
// Most Android ARM64 targets should enable the linker fix for erratum
// 843419. Only non-Cortex-A53 devices are allowed to skip this flag.
if (Arch == llvm::Triple::aarch64 && isAndroid) {
- std::string CPU = getCPUName(Args, Triple);
+ std::string CPU = getCPUName(D, Args, Triple);
if (CPU.empty() || CPU == "generic" || CPU == "cortex-a53")
CmdArgs.push_back("--fix-cortex-a53-843419");
}
@@ -473,17 +468,12 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
return;
}
- if (IsStatic) {
- if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb ||
- Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb)
- CmdArgs.push_back("-Bstatic");
- else
- CmdArgs.push_back("-static");
- } else if (Args.hasArg(options::OPT_shared)) {
+ if (Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-shared");
- }
- if (!IsStatic) {
+ if (IsStatic) {
+ CmdArgs.push_back("-static");
+ } else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
@@ -534,10 +524,10 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
if (P.empty()) {
const char *crtbegin;
- if (IsStatic)
- crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
- else if (Args.hasArg(options::OPT_shared))
+ if (Args.hasArg(options::OPT_shared))
crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
+ else if (IsStatic)
+ crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
else if (IsPIE || IsStaticPIE)
crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
else
@@ -712,10 +702,6 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
}
}
- if (getToolChain().isNoExecStackDefault()) {
- CmdArgs.push_back("--noexecstack");
- }
-
switch (getToolChain().getArch()) {
default:
break;
@@ -734,32 +720,32 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
CmdArgs.push_back("-a32");
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-mbig-endian");
- CmdArgs.push_back(
- ppc::getPPCAsmModeForCPU(getCPUName(Args, getToolChain().getTriple())));
+ CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
+ getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::ppcle: {
CmdArgs.push_back("-a32");
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-mlittle-endian");
- CmdArgs.push_back(
- ppc::getPPCAsmModeForCPU(getCPUName(Args, getToolChain().getTriple())));
+ CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
+ getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::ppc64: {
CmdArgs.push_back("-a64");
CmdArgs.push_back("-mppc64");
CmdArgs.push_back("-mbig-endian");
- CmdArgs.push_back(
- ppc::getPPCAsmModeForCPU(getCPUName(Args, getToolChain().getTriple())));
+ CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
+ getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::ppc64le: {
CmdArgs.push_back("-a64");
CmdArgs.push_back("-mppc64");
CmdArgs.push_back("-mlittle-endian");
- CmdArgs.push_back(
- ppc::getPPCAsmModeForCPU(getCPUName(Args, getToolChain().getTriple())));
+ CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
+ getCPUName(D, Args, getToolChain().getTriple())));
break;
}
case llvm::Triple::riscv32:
@@ -775,7 +761,7 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
case llvm::Triple::sparc:
case llvm::Triple::sparcel: {
CmdArgs.push_back("-32");
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
+ std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
CmdArgs.push_back(
sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
@@ -783,7 +769,7 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
}
case llvm::Triple::sparcv9: {
CmdArgs.push_back("-64");
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
+ std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
CmdArgs.push_back(
sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
@@ -931,7 +917,7 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
options::OPT_fdebug_prefix_map_EQ)) {
StringRef Map = A->getValue();
- if (Map.find('=') == StringRef::npos)
+ if (!Map.contains('='))
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Map << A->getOption().getName();
else {
@@ -1086,7 +1072,8 @@ static bool findMipsCsMultilibs(const Multilib::flags_list &Flags,
.flag("-m32")
.flag("-mabi=n32");
- Multilib M32 = Multilib().flag("-m64").flag("+m32").flag("-mabi=n32");
+ Multilib M32 =
+ Multilib().gccSuffix("/32").flag("-m64").flag("+m32").flag("-mabi=n32");
DebianMipsMultilibs =
MultilibSet().Either(M32, M64, MAbiN32).FilterOut(NonExistent);
@@ -1473,7 +1460,7 @@ bool clang::driver::findMIPSMultilibs(const Driver &D,
addMultilibFlag(CPUName == "mips64r6", "march=mips64r6", Flags);
addMultilibFlag(isMicroMips(Args), "mmicromips", Flags);
addMultilibFlag(tools::mips::isUCLibc(Args), "muclibc", Flags);
- addMultilibFlag(tools::mips::isNaN2008(Args, TargetTriple), "mnan=2008",
+ addMultilibFlag(tools::mips::isNaN2008(D, Args, TargetTriple), "mnan=2008",
Flags);
addMultilibFlag(ABIName == "n32", "mabi=n32", Flags);
addMultilibFlag(ABIName == "n64", "mabi=n64", Flags);
@@ -2052,7 +2039,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
// Non-Solaris is much simpler - most systems just go with "/usr".
if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux) {
- // Yet, still look for RHEL devtoolsets.
+ // Yet, still look for RHEL/CentOS devtoolsets and gcc-toolsets.
+ Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr");
Prefixes.push_back("/opt/rh/devtoolset-10/root/usr");
Prefixes.push_back("/opt/rh/devtoolset-9/root/usr");
Prefixes.push_back("/opt/rh/devtoolset-8/root/usr");
@@ -2074,24 +2062,28 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
// Declare a bunch of static data sets that we'll select between below. These
// are specifically designed to always refer to string literals to avoid any
// lifetime or initialization issues.
+ //
+ // The *Triples variables hard code some triples so that, for example,
+ // --target=aarch64 (incomplete triple) can detect lib/aarch64-linux-gnu.
+ // They are not needed when the user has correct LLVM_DEFAULT_TARGET_TRIPLE
+ // and always uses the full --target (e.g. --target=aarch64-linux-gnu). The
+ // lists should shrink over time. Please don't add more elements to *Triples.
static const char *const AArch64LibDirs[] = {"/lib64", "/lib"};
static const char *const AArch64Triples[] = {
"aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux",
- "aarch64-suse-linux", "aarch64-linux-android"};
+ "aarch64-suse-linux"};
static const char *const AArch64beLibDirs[] = {"/lib"};
static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu",
"aarch64_be-linux-gnu"};
static const char *const ARMLibDirs[] = {"/lib"};
- static const char *const ARMTriples[] = {"arm-linux-gnueabi",
- "arm-linux-androideabi"};
+ static const char *const ARMTriples[] = {"arm-linux-gnueabi"};
static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf",
"armv7hl-redhat-linux-gnueabi",
"armv6hl-suse-linux-gnueabi",
"armv7hl-suse-linux-gnueabi"};
static const char *const ARMebLibDirs[] = {"/lib"};
- static const char *const ARMebTriples[] = {"armeb-linux-gnueabi",
- "armeb-linux-androideabi"};
+ static const char *const ARMebTriples[] = {"armeb-linux-gnueabi"};
static const char *const ARMebHFTriples[] = {
"armeb-linux-gnueabihf", "armebv7hl-redhat-linux-gnueabi"};
@@ -2105,31 +2097,28 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
"x86_64-redhat-linux", "x86_64-suse-linux",
"x86_64-manbo-linux-gnu", "x86_64-linux-gnu",
"x86_64-slackware-linux", "x86_64-unknown-linux",
- "x86_64-amazon-linux", "x86_64-linux-android"};
+ "x86_64-amazon-linux"};
static const char *const X32Triples[] = {"x86_64-linux-gnux32",
"x86_64-pc-linux-gnux32"};
static const char *const X32LibDirs[] = {"/libx32", "/lib"};
static const char *const X86LibDirs[] = {"/lib32", "/lib"};
static const char *const X86Triples[] = {
- "i586-linux-gnu", "i686-linux-gnu",
- "i686-pc-linux-gnu", "i386-redhat-linux6E",
- "i686-redhat-linux", "i386-redhat-linux",
- "i586-suse-linux", "i686-montavista-linux",
- "i686-linux-android", "i686-gnu",
+ "i586-linux-gnu", "i686-linux-gnu", "i686-pc-linux-gnu",
+ "i386-redhat-linux6E", "i686-redhat-linux", "i386-redhat-linux",
+ "i586-suse-linux", "i686-montavista-linux", "i686-gnu",
};
static const char *const M68kLibDirs[] = {"/lib"};
static const char *const M68kTriples[] = {
"m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"};
- static const char *const MIPSLibDirs[] = {"/lib"};
+ static const char *const MIPSLibDirs[] = {"/libo32", "/lib"};
static const char *const MIPSTriples[] = {
"mips-linux-gnu", "mips-mti-linux", "mips-mti-linux-gnu",
"mips-img-linux-gnu", "mipsisa32r6-linux-gnu"};
- static const char *const MIPSELLibDirs[] = {"/lib"};
+ static const char *const MIPSELLibDirs[] = {"/libo32", "/lib"};
static const char *const MIPSELTriples[] = {
- "mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu",
- "mipsel-linux-android"};
+ "mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu"};
static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"};
static const char *const MIPS64Triples[] = {
@@ -2140,8 +2129,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
static const char *const MIPS64ELTriples[] = {
"mips64el-linux-gnu", "mips-mti-linux-gnu",
"mips-img-linux-gnu", "mips64el-linux-gnuabi64",
- "mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64",
- "mips64el-linux-android"};
+ "mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"};
static const char *const MIPSN32LibDirs[] = {"/lib32"};
static const char *const MIPSN32Triples[] = {"mips64-linux-gnuabin32",
@@ -2181,9 +2169,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"};
static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu",
"riscv64-linux-gnu",
- "riscv64-unknown-elf",
- "riscv64-redhat-linux",
- "riscv64-suse-linux"};
+ "riscv64-unknown-elf"};
static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"};
static const char *const SPARCv8Triples[] = {"sparc-linux-gnu",
@@ -2708,6 +2694,7 @@ bool Generic_GCC::IsUnwindTablesDefault(const ArgList &Args) const {
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
+ case llvm::Triple::x86:
case llvm::Triple::x86_64:
return true;
default:
@@ -2727,7 +2714,9 @@ bool Generic_GCC::isPICDefault() const {
}
}
-bool Generic_GCC::isPIEDefault() const { return false; }
+bool Generic_GCC::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return false;
+}
bool Generic_GCC::isPICDefaultForced() const {
return getArch() == llvm::Triple::x86_64 && getTriple().isOSWindows();
diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h
index 40fd756a5653..4eb7ab0215ab 100644
--- a/clang/lib/Driver/ToolChains/Gnu.h
+++ b/clang/lib/Driver/ToolChains/Gnu.h
@@ -298,7 +298,7 @@ public:
bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
bool IsIntegratedAssemblerDefault() const override;
llvm::opt::DerivedArgList *
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 59d58aadb687..07af1a0457c7 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -16,6 +16,7 @@
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
+#include "clang/Driver/SanitizerArgs.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
@@ -37,6 +38,43 @@ namespace {
const unsigned HIPCodeObjectAlign = 4096;
} // namespace
+static bool shouldSkipSanitizeOption(const ToolChain &TC,
+ const llvm::opt::ArgList &DriverArgs,
+ StringRef TargetID,
+ const llvm::opt::Arg *A) {
+ // For actions without targetID, do nothing.
+ if (TargetID.empty())
+ return false;
+ Option O = A->getOption();
+ if (!O.matches(options::OPT_fsanitize_EQ))
+ return false;
+
+ if (!DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
+ -options::OPT_fno_gpu_sanitize))
+ return true;
+
+ auto &Diags = TC.getDriver().getDiags();
+
+ // For simplicity, we only allow -fsanitize=address
+ SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
+ if (K != SanitizerKind::Address)
+ return true;
+
+ llvm::StringMap<bool> FeatureMap;
+ auto OptionalGpuArch = parseTargetID(TC.getTriple(), TargetID, &FeatureMap);
+
+ assert(OptionalGpuArch && "Invalid Target ID");
+ (void)OptionalGpuArch;
+ auto Loc = FeatureMap.find("xnack");
+ if (Loc == FeatureMap.end() || !Loc->second) {
+ Diags.Report(
+ clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature)
+ << A->getAsString(DriverArgs) << TargetID << "xnack+";
+ return true;
+ }
+ return false;
+}
+
void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const InputInfo &Output,
@@ -86,12 +124,6 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
for (auto Input : Inputs)
LldArgs.push_back(Input.getFilename());
- if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
- false))
- llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) {
- LldArgs.push_back(Args.MakeArgString(BCFile));
- });
-
const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Lld, LldArgs, Inputs, Output));
@@ -237,6 +269,14 @@ HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
// Lookup binaries into the driver directory, this is used to
// discover the clang-offload-bundler executable.
getProgramPaths().push_back(getDriver().Dir);
+
+ // Diagnose unsupported sanitizer options only once.
+ for (auto A : Args.filtered(options::OPT_fsanitize_EQ)) {
+ SanitizerMask K = parseSanitizerValue(A->getValue(), /*AllowGroups=*/false);
+ if (K != SanitizerKind::Address)
+ D.getDiags().Report(clang::diag::warn_drv_unsupported_option_for_target)
+ << A->getAsString(Args) << getTriple().str();
+ }
}
void HIPToolChain::addClangTargetOptions(
@@ -276,9 +316,10 @@ void HIPToolChain::addClangTargetOptions(
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
- llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
- CC1Args.push_back("-mlink-builtin-bitcode");
- CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
+ llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](auto BCFile) {
+ CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
+ : "-mlink-bitcode-file");
+ CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
});
}
@@ -294,7 +335,8 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
const OptTable &Opts = getDriver().getOpts();
for (Arg *A : Args) {
- if (!shouldSkipArgument(A))
+ if (!shouldSkipArgument(A) &&
+ !shouldSkipSanitizeOption(*this, Args, BoundArch, A))
DAL->append(A);
}
@@ -359,9 +401,9 @@ VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
return HostTC.computeMSVCVersion(D, Args);
}
-llvm::SmallVector<std::string, 12>
+llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
- llvm::SmallVector<std::string, 12> BCLibs;
+ llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
if (DriverArgs.hasArg(options::OPT_nogpulib))
return {};
ArgStringList LibraryPaths;
@@ -382,7 +424,7 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
llvm::sys::path::append(Path, BCName);
FullName = Path;
if (llvm::sys::fs::exists(FullName)) {
- BCLibs.push_back(FullName.str());
+ BCLibs.push_back(FullName);
return;
}
}
@@ -395,37 +437,11 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
}
StringRef GpuArch = getGPUArch(DriverArgs);
assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
- (void)GpuArch;
- auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
- const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
-
- std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
- if (LibDeviceFile.empty()) {
- getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
- return {};
- }
// If --hip-device-lib is not set, add the default bitcode libraries.
- // TODO: There are way too many flags that change this. Do we need to check
- // them all?
- bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
- options::OPT_fno_gpu_flush_denormals_to_zero,
- getDefaultDenormsAreZeroForTarget(Kind));
- bool FiniteOnly =
- DriverArgs.hasFlag(options::OPT_ffinite_math_only,
- options::OPT_fno_finite_math_only, false);
- bool UnsafeMathOpt =
- DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
- options::OPT_fno_unsafe_math_optimizations, false);
- bool FastRelaxedMath = DriverArgs.hasFlag(
- options::OPT_ffast_math, options::OPT_fno_fast_math, false);
- bool CorrectSqrt = DriverArgs.hasFlag(
- options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
- options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
- bool Wave64 = isWave64(DriverArgs, Kind);
-
if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
- options::OPT_fno_gpu_sanitize, false)) {
+ options::OPT_fno_gpu_sanitize) &&
+ getSanitizerArgs(DriverArgs).needsAsanRt()) {
auto AsanRTL = RocmInstallation.getAsanRTLPath();
if (AsanRTL.empty()) {
unsigned DiagID = getDriver().getDiags().getCustomDiagID(
@@ -436,16 +452,15 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
getDriver().Diag(DiagID);
return {};
} else
- BCLibs.push_back(AsanRTL.str());
+ BCLibs.push_back({AsanRTL.str(), /*ShouldInternalize=*/false});
}
// Add the HIP specific bitcode library.
- BCLibs.push_back(RocmInstallation.getHIPPath().str());
+ BCLibs.push_back(RocmInstallation.getHIPPath());
- // Add the generic set of libraries.
- BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
- DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
- FastRelaxedMath, CorrectSqrt));
+ // Add common device libraries like ocml etc.
+ for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
+ BCLibs.push_back(StringRef(N));
// Add instrument lib.
auto InstLib =
@@ -453,7 +468,7 @@ HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
if (InstLib.empty())
return BCLibs;
if (llvm::sys::fs::exists(InstLib))
- BCLibs.push_back(InstLib.str());
+ BCLibs.push_back(InstLib);
else
getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
}
@@ -466,22 +481,6 @@ void HIPToolChain::checkTargetID(const llvm::opt::ArgList &DriverArgs) const {
if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) {
getDriver().Diag(clang::diag::err_drv_bad_target_id)
<< PTID.OptionalTargetID.getValue();
- return;
- }
-
- assert(PTID.OptionalFeatures && "Invalid return from getParsedTargetID");
- auto &FeatureMap = PTID.OptionalFeatures.getValue();
- // Sanitizer is not supported with xnack-.
- if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
- options::OPT_fno_gpu_sanitize, false)) {
- auto Loc = FeatureMap.find("xnack");
- if (Loc != FeatureMap.end() && !Loc->second) {
- auto &Diags = getDriver().getDiags();
- auto DiagID = Diags.getCustomDiagID(
- DiagnosticsEngine::Error,
- "'-fgpu-sanitize' is not compatible with offload arch '%0'. "
- "Use an offload arch without 'xnack-' instead");
- Diags.Report(DiagID) << PTID.OptionalTargetID.getValue();
- }
}
+ return;
}
diff --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h
index 3cced0a320dc..60b3d69b3f52 100644
--- a/clang/lib/Driver/ToolChains/HIP.h
+++ b/clang/lib/Driver/ToolChains/HIP.h
@@ -83,7 +83,7 @@ public:
llvm::opt::ArgStringList &CC1Args) const override;
void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
- llvm::SmallVector<std::string, 12>
+ llvm::SmallVector<BitCodeLibraryInfo, 12>
getHIPDeviceLibs(const llvm::opt::ArgList &Args) const override;
SanitizerMask getSupportedSanitizers() const override;
@@ -92,7 +92,7 @@ public:
computeMSVCVersion(const Driver *D,
const llvm::opt::ArgList &Args) const override;
- unsigned GetDefaultDwarfVersion() const override { return 4; }
+ unsigned GetDefaultDwarfVersion() const override { return 5; }
const ToolChain &HostTC;
void checkTargetID(const llvm::opt::ArgList &DriverArgs) const override;
diff --git a/clang/lib/Driver/ToolChains/Haiku.h b/clang/lib/Driver/ToolChains/Haiku.h
index 2bc98322bebf..669379a21605 100644
--- a/clang/lib/Driver/ToolChains/Haiku.h
+++ b/clang/lib/Driver/ToolChains/Haiku.h
@@ -22,7 +22,7 @@ public:
Haiku(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
- bool isPIEDefault() const override {
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
return getTriple().getArch() == llvm::Triple::x86_64;
}
diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp
index 828bfdbb05a3..18270818d158 100644
--- a/clang/lib/Driver/ToolChains/Hexagon.cpp
+++ b/clang/lib/Driver/ToolChains/Hexagon.cpp
@@ -146,6 +146,8 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
"-mcpu=hexagon" +
toolchains::HexagonToolChain::GetTargetCPUVersion(Args)));
+ addSanitizerRuntimes(HTC, Args, CmdArgs);
+
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
@@ -223,6 +225,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
bool UseShared = IsShared && !IsStatic;
StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args);
+ bool NeedsSanitizerDeps = addSanitizerRuntimes(HTC, Args, CmdArgs);
+
//----------------------------------------------------------------------------
// Silence warnings for various options
//----------------------------------------------------------------------------
@@ -288,6 +292,12 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
AddLinkerInputs(HTC, Inputs, Args, CmdArgs, JA);
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+ if (NeedsSanitizerDeps) {
+ linkSanitizerRuntimeDeps(HTC, CmdArgs);
+
+ CmdArgs.push_back("-lunwind");
+ }
+
CmdArgs.push_back("-lclang_rt.builtins-hexagon");
CmdArgs.push_back("-lc");
}
@@ -450,6 +460,13 @@ Optional<unsigned> HexagonToolChain::getSmallDataThreshold(
return None;
}
+std::string HexagonToolChain::getCompilerRTPath() const {
+ SmallString<128> Dir(getDriver().SysRoot);
+ llvm::sys::path::append(Dir, "usr", "lib");
+ Dir += SelectedMultilib.gccSuffix();
+ return std::string(Dir.str());
+}
+
void HexagonToolChain::getHexagonLibraryPaths(const ArgList &Args,
ToolChain::path_list &LibPaths) const {
const Driver &D = getDriver();
@@ -470,7 +487,7 @@ void HexagonToolChain::getHexagonLibraryPaths(const ArgList &Args,
std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
D.PrefixDirs);
- if (llvm::find(RootDirs, TargetDir) == RootDirs.end())
+ if (!llvm::is_contained(RootDirs, TargetDir))
RootDirs.push_back(TargetDir);
bool HasPIC = Args.hasArg(options::OPT_fpic, options::OPT_fPIC);
@@ -588,21 +605,43 @@ void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs,
void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
- if (DriverArgs.hasArg(options::OPT_nostdinc) ||
- DriverArgs.hasArg(options::OPT_nostdlibinc))
+ if (DriverArgs.hasArg(options::OPT_nostdinc))
return;
+ const bool IsELF = !getTriple().isMusl() && !getTriple().isOSLinux();
+ const bool IsLinuxMusl = getTriple().isMusl() && getTriple().isOSLinux();
+
const Driver &D = getDriver();
- if (!D.SysRoot.empty()) {
+ SmallString<128> ResourceDirInclude(D.ResourceDir);
+ if (!IsELF) {
+ llvm::sys::path::append(ResourceDirInclude, "include");
+ if (!DriverArgs.hasArg(options::OPT_nobuiltininc) &&
+ (!IsLinuxMusl || DriverArgs.hasArg(options::OPT_nostdlibinc)))
+ addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude);
+ }
+ if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+ return;
+
+ const bool HasSysRoot = !D.SysRoot.empty();
+ if (HasSysRoot) {
SmallString<128> P(D.SysRoot);
- if (getTriple().isMusl())
+ if (IsLinuxMusl)
llvm::sys::path::append(P, "usr/include");
else
llvm::sys::path::append(P, "include");
+
addExternCSystemInclude(DriverArgs, CC1Args, P.str());
- return;
+ // LOCAL_INCLUDE_DIR
+ addSystemInclude(DriverArgs, CC1Args, P + "/usr/local/include");
+ // TOOL_INCLUDE_DIR
+ AddMultilibIncludeArgs(DriverArgs, CC1Args);
}
+ if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && IsLinuxMusl)
+ addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude);
+
+ if (HasSysRoot)
+ return;
std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
D.PrefixDirs);
addExternCSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include");
@@ -665,11 +704,11 @@ bool HexagonToolChain::isAutoHVXEnabled(const llvm::opt::ArgList &Args) {
// Returns the default CPU for Hexagon. This is the default compilation target
// if no Hexagon processor is selected at the command-line.
//
-const StringRef HexagonToolChain::GetDefaultCPU() {
+StringRef HexagonToolChain::GetDefaultCPU() {
return "hexagonv60";
}
-const StringRef HexagonToolChain::GetTargetCPUVersion(const ArgList &Args) {
+StringRef HexagonToolChain::GetTargetCPUVersion(const ArgList &Args) {
Arg *CpuArg = nullptr;
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
CpuArg = A;
diff --git a/clang/lib/Driver/ToolChains/Hexagon.h b/clang/lib/Driver/ToolChains/Hexagon.h
index c32cb7f09591..899630555352 100644
--- a/clang/lib/Driver/ToolChains/Hexagon.h
+++ b/clang/lib/Driver/ToolChains/Hexagon.h
@@ -104,9 +104,11 @@ public:
void getHexagonLibraryPaths(const llvm::opt::ArgList &Args,
ToolChain::path_list &LibPaths) const;
+ std::string getCompilerRTPath() const override;
+
static bool isAutoHVXEnabled(const llvm::opt::ArgList &Args);
- static const StringRef GetDefaultCPU();
- static const StringRef GetTargetCPUVersion(const llvm::opt::ArgList &Args);
+ static StringRef GetDefaultCPU();
+ static StringRef GetTargetCPUVersion(const llvm::opt::ArgList &Args);
static Optional<unsigned> getSmallDataThreshold(
const llvm::opt::ArgList &Args);
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index c9360fc67165..0224383e63a1 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -206,8 +206,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
ExtraOpts.push_back("max-page-size=4096");
}
- if (GCCInstallation.getParentLibPath().find("opt/rh/devtoolset") !=
- StringRef::npos)
+ if (GCCInstallation.getParentLibPath().contains("opt/rh/"))
// With devtoolset on RHEL, we want to add a bin directory that is relative
// to the detected gcc install, because if we are using devtoolset gcc then
// we want to use other tools from devtoolset (e.g. ld) instead of the
@@ -262,6 +261,13 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
const std::string OSLibDir = std::string(getOSLibDir(Triple, Args));
const std::string MultiarchTriple = getMultiarchTriple(D, Triple, SysRoot);
+ // mips32: Debian multilib, we use /libo32, while in other case, /lib is
+ // used. We need add both libo32 and /lib.
+ if (Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel) {
+ Generic_GCC::AddMultilibPaths(D, SysRoot, "libo32", MultiarchTriple, Paths);
+ addPathIfExists(D, SysRoot + "/libo32", Paths);
+ addPathIfExists(D, SysRoot + "/usr/libo32", Paths);
+ }
Generic_GCC::AddMultilibPaths(D, SysRoot, OSLibDir, MultiarchTriple, Paths);
addPathIfExists(D, SysRoot + "/lib/" + MultiarchTriple, Paths);
@@ -303,8 +309,13 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
// searched.
// FIXME: It's not clear whether we should use the driver's installed
// directory ('Dir' below) or the ResourceDir.
- if (StringRef(D.Dir).startswith(SysRoot))
+ if (StringRef(D.Dir).startswith(SysRoot)) {
+ // Even if OSLibDir != "lib", this is needed for Clang in the build
+ // directory (not installed) to find libc++.
addPathIfExists(D, D.Dir + "/../lib", Paths);
+ if (OSLibDir != "lib")
+ addPathIfExists(D, D.Dir + "/../" + OSLibDir, Paths);
+ }
addPathIfExists(D, SysRoot + "/lib", Paths);
addPathIfExists(D, SysRoot + "/usr/lib", Paths);
@@ -449,7 +460,7 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const {
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
- bool IsNaN2008 = tools::mips::isNaN2008(Args, Triple);
+ bool IsNaN2008 = tools::mips::isNaN2008(getDriver(), Args, Triple);
LibDir = "lib" + tools::mips::getMipsABILibSuffix(Args, Triple);
@@ -651,9 +662,9 @@ void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs,
}
}
-bool Linux::isPIEDefault() const {
- return (getTriple().isAndroid() && !getTriple().isAndroidVersionLT(16)) ||
- getTriple().isMusl() || getSanitizerArgs().requiresPIE();
+bool Linux::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return getTriple().isAndroid() || getTriple().isMusl() ||
+ getSanitizerArgs(Args).requiresPIE();
}
bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList &Args) const {
@@ -669,10 +680,6 @@ bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList &Args) const {
return true;
}
-bool Linux::isNoExecStackDefault() const {
- return getTriple().isAndroid();
-}
-
bool Linux::IsMathErrnoDefault() const {
if (getTriple().isAndroid())
return false;
@@ -694,6 +701,7 @@ SanitizerMask Linux::getSupportedSanitizers() const {
getTriple().getArch() == llvm::Triple::thumbeb;
const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64;
const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz;
+ const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon;
SanitizerMask Res = ToolChain::getSupportedSanitizers();
Res |= SanitizerKind::Address;
Res |= SanitizerKind::PointerCompare;
@@ -707,7 +715,7 @@ SanitizerMask Linux::getSupportedSanitizers() const {
if (IsX86_64 || IsMIPS64 || IsAArch64)
Res |= SanitizerKind::DataFlow;
if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 ||
- IsRISCV64 || IsSystemZ)
+ IsRISCV64 || IsSystemZ || IsHexagon)
Res |= SanitizerKind::Leak;
if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ)
Res |= SanitizerKind::Thread;
@@ -716,7 +724,7 @@ SanitizerMask Linux::getSupportedSanitizers() const {
if (IsX86 || IsX86_64)
Res |= SanitizerKind::Function;
if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch ||
- IsPowerPC64)
+ IsPowerPC64 || IsHexagon)
Res |= SanitizerKind::Scudo;
if (IsX86_64 || IsAArch64) {
Res |= SanitizerKind::HWAddress;
diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h
index 169a37c44072..a5ec33bd44f1 100644
--- a/clang/lib/Driver/ToolChains/Linux.h
+++ b/clang/lib/Driver/ToolChains/Linux.h
@@ -43,8 +43,7 @@ public:
CXXStdlibType GetDefaultCXXStdlibType() const override;
bool
IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList &Args) const override;
- bool isPIEDefault() const override;
- bool isNoExecStackDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool IsMathErrnoDefault() const override;
SanitizerMask getSupportedSanitizers() const override;
void addProfileRTLibs(const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/MSP430.h b/clang/lib/Driver/ToolChains/MSP430.h
index 9d247ca3a896..2e838c027e0f 100644
--- a/clang/lib/Driver/ToolChains/MSP430.h
+++ b/clang/lib/Driver/ToolChains/MSP430.h
@@ -37,7 +37,9 @@ public:
Action::OffloadKind) const override;
bool isPICDefault() const override { return false; }
- bool isPIEDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return false;
+ }
bool isPICDefaultForced() const override { return true; }
UnwindLibType
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 0dc94a4c6c7d..792b0a51fea0 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -63,6 +63,61 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
+// Windows SDKs and VC Toolchains group their contents into subdirectories based
+// on the target architecture. This function converts an llvm::Triple::ArchType
+// to the corresponding subdirectory name.
+static const char *llvmArchToWindowsSDKArch(llvm::Triple::ArchType Arch) {
+ using ArchType = llvm::Triple::ArchType;
+ switch (Arch) {
+ case ArchType::x86:
+ return "x86";
+ case ArchType::x86_64:
+ return "x64";
+ case ArchType::arm:
+ return "arm";
+ case ArchType::aarch64:
+ return "arm64";
+ default:
+ return "";
+ }
+}
+
+// Similar to the above function, but for Visual Studios before VS2017.
+static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) {
+ using ArchType = llvm::Triple::ArchType;
+ switch (Arch) {
+ case ArchType::x86:
+ // x86 is default in legacy VC toolchains.
+ // e.g. x86 libs are directly in /lib as opposed to /lib/x86.
+ return "";
+ case ArchType::x86_64:
+ return "amd64";
+ case ArchType::arm:
+ return "arm";
+ case ArchType::aarch64:
+ return "arm64";
+ default:
+ return "";
+ }
+}
+
+// Similar to the above function, but for DevDiv internal builds.
+static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) {
+ using ArchType = llvm::Triple::ArchType;
+ switch (Arch) {
+ case ArchType::x86:
+ return "i386";
+ case ArchType::x86_64:
+ return "amd64";
+ case ArchType::arm:
+ return "arm";
+ case ArchType::aarch64:
+ return "arm64";
+ default:
+ return "";
+ }
+}
+
static bool canExecute(llvm::vfs::FileSystem &VFS, StringRef Path) {
auto Status = VFS.status(Path);
if (!Status)
@@ -396,6 +451,20 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
// the environment variable is set however, assume the user knows what
// they're doing. If the user passes /vctoolsdir or /winsdkdir, trust that
// over env vars.
+ if (const Arg *A = Args.getLastArg(options::OPT__SLASH_diasdkdir,
+ options::OPT__SLASH_winsysroot)) {
+ // cl.exe doesn't find the DIA SDK automatically, so this too requires
+ // explicit flags and doesn't automatically look in "DIA SDK" relative
+ // to the path we found for VCToolChainPath.
+ llvm::SmallString<128> DIAPath(A->getValue());
+ if (A->getOption().getID() == options::OPT__SLASH_winsysroot)
+ llvm::sys::path::append(DIAPath, "DIA SDK");
+
+ // The DIA SDK always uses the legacy vc arch, even in new MSVC versions.
+ llvm::sys::path::append(DIAPath, "lib",
+ llvmArchToLegacyVCArch(TC.getArch()));
+ CmdArgs.push_back(Args.MakeArgString(Twine("-libpath:") + DIAPath));
+ }
if (!llvm::sys::Process::GetEnv("LIB") ||
Args.getLastArg(options::OPT__SLASH_vctoolsdir,
options::OPT__SLASH_winsysroot)) {
@@ -461,7 +530,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(std::string("-implib:") + ImplibName));
}
- if (TC.getSanitizerArgs().needsFuzzer()) {
+ if (TC.getSanitizerArgs(Args).needsFuzzer()) {
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back(
Args.MakeArgString(std::string("-wholearchive:") +
@@ -472,10 +541,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
}
- if (TC.getSanitizerArgs().needsAsanRt()) {
+ if (TC.getSanitizerArgs(Args).needsAsanRt()) {
CmdArgs.push_back(Args.MakeArgString("-debug"));
CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
- if (TC.getSanitizerArgs().needsSharedRt() ||
+ if (TC.getSanitizerArgs(Args).needsSharedRt() ||
Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
@@ -726,15 +795,17 @@ bool MSVCToolChain::IsUnwindTablesDefault(const ArgList &Args) const {
}
bool MSVCToolChain::isPICDefault() const {
- return getArch() == llvm::Triple::x86_64;
+ return getArch() == llvm::Triple::x86_64 ||
+ getArch() == llvm::Triple::aarch64;
}
-bool MSVCToolChain::isPIEDefault() const {
+bool MSVCToolChain::isPIEDefault(const llvm::opt::ArgList &Args) const {
return false;
}
bool MSVCToolChain::isPICDefaultForced() const {
- return getArch() == llvm::Triple::x86_64;
+ return getArch() == llvm::Triple::x86_64 ||
+ getArch() == llvm::Triple::aarch64;
}
void MSVCToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
@@ -752,61 +823,6 @@ void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const {
RocmInstallation.print(OS);
}
-// Windows SDKs and VC Toolchains group their contents into subdirectories based
-// on the target architecture. This function converts an llvm::Triple::ArchType
-// to the corresponding subdirectory name.
-static const char *llvmArchToWindowsSDKArch(llvm::Triple::ArchType Arch) {
- using ArchType = llvm::Triple::ArchType;
- switch (Arch) {
- case ArchType::x86:
- return "x86";
- case ArchType::x86_64:
- return "x64";
- case ArchType::arm:
- return "arm";
- case ArchType::aarch64:
- return "arm64";
- default:
- return "";
- }
-}
-
-// Similar to the above function, but for Visual Studios before VS2017.
-static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) {
- using ArchType = llvm::Triple::ArchType;
- switch (Arch) {
- case ArchType::x86:
- // x86 is default in legacy VC toolchains.
- // e.g. x86 libs are directly in /lib as opposed to /lib/x86.
- return "";
- case ArchType::x86_64:
- return "amd64";
- case ArchType::arm:
- return "arm";
- case ArchType::aarch64:
- return "arm64";
- default:
- return "";
- }
-}
-
-// Similar to the above function, but for DevDiv internal builds.
-static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) {
- using ArchType = llvm::Triple::ArchType;
- switch (Arch) {
- case ArchType::x86:
- return "i386";
- case ArchType::x86_64:
- return "amd64";
- case ArchType::arm:
- return "arm";
- case ArchType::aarch64:
- return "arm64";
- default:
- return "";
- }
-}
-
// Get the path to a specific subdirectory in the current toolchain for
// a given target architecture.
// VS2017 changed the VC toolchain layout, so this should be used instead
@@ -1263,6 +1279,19 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
AddSystemIncludesFromEnv(Var);
}
+ // Add DIA SDK include if requested.
+ if (const Arg *A = DriverArgs.getLastArg(options::OPT__SLASH_diasdkdir,
+ options::OPT__SLASH_winsysroot)) {
+ // cl.exe doesn't find the DIA SDK automatically, so this too requires
+ // explicit flags and doesn't automatically look in "DIA SDK" relative
+ // to the path we found for VCToolChainPath.
+ llvm::SmallString<128> DIASDKPath(A->getValue());
+ if (A->getOption().getID() == options::OPT__SLASH_winsysroot)
+ llvm::sys::path::append(DIASDKPath, "DIA SDK");
+ AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, std::string(DIASDKPath),
+ "include");
+ }
+
if (DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h
index 19d94c5c606e..8f033de09bf6 100644
--- a/clang/lib/Driver/ToolChains/MSVC.h
+++ b/clang/lib/Driver/ToolChains/MSVC.h
@@ -52,7 +52,7 @@ public:
bool IsIntegratedAssemblerDefault() const override;
bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
/// Set CodeView as the default debug info format for non-MachO binary
diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp
index 20efbdc237a8..ecce2f062bd7 100644
--- a/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -98,7 +98,7 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const char *LinkingOutput) const {
const ToolChain &TC = getToolChain();
const Driver &D = TC.getDriver();
- const SanitizerArgs &Sanitize = TC.getSanitizerArgs();
+ const SanitizerArgs &Sanitize = TC.getSanitizerArgs(Args);
ArgStringList CmdArgs;
@@ -136,10 +136,13 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
llvm_unreachable("Unsupported target architecture.");
}
- if (Args.hasArg(options::OPT_mwindows)) {
+ Arg *SubsysArg =
+ Args.getLastArg(options::OPT_mwindows, options::OPT_mconsole);
+ if (SubsysArg && SubsysArg->getOption().matches(options::OPT_mwindows)) {
CmdArgs.push_back("--subsystem");
CmdArgs.push_back("windows");
- } else if (Args.hasArg(options::OPT_mconsole)) {
+ } else if (SubsysArg &&
+ SubsysArg->getOption().matches(options::OPT_mconsole)) {
CmdArgs.push_back("--subsystem");
CmdArgs.push_back("console");
}
@@ -346,29 +349,29 @@ static bool findGccVersion(StringRef LibDir, std::string &GccLibDir,
}
void toolchains::MinGW::findGccLibDir() {
- llvm::SmallVector<llvm::SmallString<32>, 2> Archs;
- Archs.emplace_back(getTriple().getArchName());
- Archs[0] += "-w64-mingw32";
- Archs.emplace_back("mingw32");
- if (Arch.empty())
- Arch = std::string(Archs[0].str());
+ llvm::SmallVector<llvm::SmallString<32>, 2> SubdirNames;
+ SubdirNames.emplace_back(getTriple().getArchName());
+ SubdirNames[0] += "-w64-mingw32";
+ SubdirNames.emplace_back("mingw32");
+ if (SubdirName.empty())
+ SubdirName = std::string(SubdirNames[0].str());
// lib: Arch Linux, Ubuntu, Windows
// lib64: openSUSE Linux
for (StringRef CandidateLib : {"lib", "lib64"}) {
- for (StringRef CandidateArch : Archs) {
+ for (StringRef CandidateSysroot : SubdirNames) {
llvm::SmallString<1024> LibDir(Base);
- llvm::sys::path::append(LibDir, CandidateLib, "gcc", CandidateArch);
+ llvm::sys::path::append(LibDir, CandidateLib, "gcc", CandidateSysroot);
if (findGccVersion(LibDir, GccLibDir, Ver)) {
- Arch = std::string(CandidateArch);
+ SubdirName = std::string(CandidateSysroot);
return;
}
}
}
}
-llvm::ErrorOr<std::string> toolchains::MinGW::findGcc() {
+static llvm::ErrorOr<std::string> findGcc(const llvm::Triple &T) {
llvm::SmallVector<llvm::SmallString<32>, 2> Gccs;
- Gccs.emplace_back(getTriple().getArchName());
+ Gccs.emplace_back(T.getArchName());
Gccs[0] += "-w64-mingw32-gcc";
Gccs.emplace_back("mingw32-gcc");
// Please do not add "gcc" here
@@ -378,17 +381,18 @@ llvm::ErrorOr<std::string> toolchains::MinGW::findGcc() {
return make_error_code(std::errc::no_such_file_or_directory);
}
-llvm::ErrorOr<std::string> toolchains::MinGW::findClangRelativeSysroot() {
+static llvm::ErrorOr<std::string>
+findClangRelativeSysroot(const Driver &D, const llvm::Triple &T,
+ std::string &SubdirName) {
llvm::SmallVector<llvm::SmallString<32>, 2> Subdirs;
- Subdirs.emplace_back(getTriple().str());
- Subdirs.emplace_back(getTriple().getArchName());
+ Subdirs.emplace_back(T.str());
+ Subdirs.emplace_back(T.getArchName());
Subdirs[1] += "-w64-mingw32";
- StringRef ClangRoot =
- llvm::sys::path::parent_path(getDriver().getInstalledDir());
+ StringRef ClangRoot = llvm::sys::path::parent_path(D.getInstalledDir());
StringRef Sep = llvm::sys::path::get_separator();
for (StringRef CandidateSubdir : Subdirs) {
if (llvm::sys::fs::is_directory(ClangRoot + Sep + CandidateSubdir)) {
- Arch = std::string(CandidateSubdir);
+ SubdirName = std::string(CandidateSubdir);
return (ClangRoot + Sep + CandidateSubdir).str();
}
}
@@ -401,13 +405,16 @@ toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple,
RocmInstallation(D, Triple, Args) {
getProgramPaths().push_back(getDriver().getInstalledDir());
+ // The sequence for detecting a sysroot here should be kept in sync with
+ // the testTriple function below.
if (getDriver().SysRoot.size())
Base = getDriver().SysRoot;
// Look for <clang-bin>/../<triplet>; if found, use <clang-bin>/.. as the
// base as it could still be a base for a gcc setup with libgcc.
- else if (llvm::ErrorOr<std::string> TargetSubdir = findClangRelativeSysroot())
+ else if (llvm::ErrorOr<std::string> TargetSubdir =
+ findClangRelativeSysroot(getDriver(), getTriple(), SubdirName))
Base = std::string(llvm::sys::path::parent_path(TargetSubdir.get()));
- else if (llvm::ErrorOr<std::string> GPPName = findGcc())
+ else if (llvm::ErrorOr<std::string> GPPName = findGcc(getTriple()))
Base = std::string(llvm::sys::path::parent_path(
llvm::sys::path::parent_path(GPPName.get())));
else
@@ -420,10 +427,10 @@ toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple,
// correct crtbegin.o ,cetend.o would be found.
getFilePaths().push_back(GccLibDir);
getFilePaths().push_back(
- (Base + Arch + llvm::sys::path::get_separator() + "lib").str());
+ (Base + SubdirName + llvm::sys::path::get_separator() + "lib").str());
getFilePaths().push_back(Base + "lib");
// openSUSE
- getFilePaths().push_back(Base + Arch + "/sys-root/mingw/lib");
+ getFilePaths().push_back(Base + SubdirName + "/sys-root/mingw/lib");
NativeLLVMSupport =
Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER)
@@ -471,13 +478,17 @@ bool toolchains::MinGW::IsUnwindTablesDefault(const ArgList &Args) const {
}
bool toolchains::MinGW::isPICDefault() const {
- return getArch() == llvm::Triple::x86_64;
+ return getArch() == llvm::Triple::x86_64 ||
+ getArch() == llvm::Triple::aarch64;
}
-bool toolchains::MinGW::isPIEDefault() const { return false; }
+bool toolchains::MinGW::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return false;
+}
bool toolchains::MinGW::isPICDefaultForced() const {
- return getArch() == llvm::Triple::x86_64;
+ return getArch() == llvm::Triple::x86_64 ||
+ getArch() == llvm::Triple::aarch64;
}
llvm::ExceptionHandling
@@ -568,11 +579,12 @@ void toolchains::MinGW::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
if (GetRuntimeLibType(DriverArgs) == ToolChain::RLT_Libgcc) {
// openSUSE
addSystemInclude(DriverArgs, CC1Args,
- Base + Arch + "/sys-root/mingw/include");
+ Base + SubdirName + "/sys-root/mingw/include");
}
addSystemInclude(DriverArgs, CC1Args,
- Base + Arch + llvm::sys::path::get_separator() + "include");
+ Base + SubdirName + llvm::sys::path::get_separator() +
+ "include");
addSystemInclude(DriverArgs, CC1Args, Base + "include");
}
@@ -585,19 +597,27 @@ void toolchains::MinGW::AddClangCXXStdlibIncludeArgs(
StringRef Slash = llvm::sys::path::get_separator();
switch (GetCXXStdlibType(DriverArgs)) {
- case ToolChain::CST_Libcxx:
- addSystemInclude(DriverArgs, CC1Args, Base + Arch + Slash + "include" +
- Slash + "c++" + Slash + "v1");
+ case ToolChain::CST_Libcxx: {
+ std::string TargetDir = (Base + "include" + Slash + getTripleString() +
+ Slash + "c++" + Slash + "v1")
+ .str();
+ if (getDriver().getVFS().exists(TargetDir))
+ addSystemInclude(DriverArgs, CC1Args, TargetDir);
+ addSystemInclude(DriverArgs, CC1Args,
+ Base + SubdirName + Slash + "include" + Slash + "c++" +
+ Slash + "v1");
addSystemInclude(DriverArgs, CC1Args,
Base + "include" + Slash + "c++" + Slash + "v1");
break;
+ }
case ToolChain::CST_Libstdcxx:
llvm::SmallVector<llvm::SmallString<1024>, 4> CppIncludeBases;
CppIncludeBases.emplace_back(Base);
- llvm::sys::path::append(CppIncludeBases[0], Arch, "include", "c++");
+ llvm::sys::path::append(CppIncludeBases[0], SubdirName, "include", "c++");
CppIncludeBases.emplace_back(Base);
- llvm::sys::path::append(CppIncludeBases[1], Arch, "include", "c++", Ver);
+ llvm::sys::path::append(CppIncludeBases[1], SubdirName, "include", "c++",
+ Ver);
CppIncludeBases.emplace_back(Base);
llvm::sys::path::append(CppIncludeBases[2], "include", "c++", Ver);
CppIncludeBases.emplace_back(GccLibDir);
@@ -605,9 +625,61 @@ void toolchains::MinGW::AddClangCXXStdlibIncludeArgs(
for (auto &CppIncludeBase : CppIncludeBases) {
addSystemInclude(DriverArgs, CC1Args, CppIncludeBase);
CppIncludeBase += Slash;
- addSystemInclude(DriverArgs, CC1Args, CppIncludeBase + Arch);
+ addSystemInclude(DriverArgs, CC1Args, CppIncludeBase + SubdirName);
addSystemInclude(DriverArgs, CC1Args, CppIncludeBase + "backward");
}
break;
}
}
+
+static bool testTriple(const Driver &D, const llvm::Triple &Triple,
+ const ArgList &Args) {
+ // If an explicit sysroot is set, that will be used and we shouldn't try to
+ // detect anything else.
+ std::string SubdirName;
+ if (D.SysRoot.size())
+ return true;
+ if (llvm::ErrorOr<std::string> TargetSubdir =
+ findClangRelativeSysroot(D, Triple, SubdirName))
+ return true;
+ if (llvm::ErrorOr<std::string> GPPName = findGcc(Triple))
+ return true;
+ // If we neither found a colocated sysroot or a matching gcc executable,
+ // conclude that we can't know if this is the correct spelling of the triple.
+ return false;
+}
+
+static llvm::Triple adjustTriple(const Driver &D, const llvm::Triple &Triple,
+ const ArgList &Args) {
+ // First test if the original triple can find a sysroot with the triple
+ // name.
+ if (testTriple(D, Triple, Args))
+ return Triple;
+ llvm::SmallVector<llvm::StringRef, 3> Archs;
+ // If not, test a couple other possible arch names that might be what was
+ // intended.
+ if (Triple.getArch() == llvm::Triple::x86) {
+ Archs.emplace_back("i386");
+ Archs.emplace_back("i586");
+ Archs.emplace_back("i686");
+ } else if (Triple.getArch() == llvm::Triple::arm ||
+ Triple.getArch() == llvm::Triple::thumb) {
+ Archs.emplace_back("armv7");
+ }
+ for (auto A : Archs) {
+ llvm::Triple TestTriple(Triple);
+ TestTriple.setArchName(A);
+ if (testTriple(D, TestTriple, Args))
+ return TestTriple;
+ }
+ // If none was found, just proceed with the original value.
+ return Triple;
+}
+
+void toolchains::MinGW::fixTripleArch(const Driver &D, llvm::Triple &Triple,
+ const ArgList &Args) {
+ if (Triple.getArch() == llvm::Triple::x86 ||
+ Triple.getArch() == llvm::Triple::arm ||
+ Triple.getArch() == llvm::Triple::thumb)
+ Triple = adjustTriple(D, Triple, Args);
+}
diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h
index 2f1559fcf34c..c3de19b97724 100644
--- a/clang/lib/Driver/ToolChains/MinGW.h
+++ b/clang/lib/Driver/ToolChains/MinGW.h
@@ -60,12 +60,15 @@ public:
MinGW(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
+ static void fixTripleArch(const Driver &D, llvm::Triple &Triple,
+ const llvm::opt::ArgList &Args);
+
bool HasNativeLLVMSupport() const override;
bool IsIntegratedAssemblerDefault() const override;
bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
SanitizerMask getSupportedSanitizers() const override;
@@ -99,12 +102,10 @@ private:
std::string Base;
std::string GccLibDir;
std::string Ver;
- std::string Arch;
+ std::string SubdirName;
mutable std::unique_ptr<tools::gcc::Preprocessor> Preprocessor;
mutable std::unique_ptr<tools::gcc::Compiler> Compiler;
void findGccLibDir();
- llvm::ErrorOr<std::string> findGcc();
- llvm::ErrorOr<std::string> findClangRelativeSysroot();
bool NativeLLVMSupport;
};
diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp
index 1ce5a2a203c2..7571398b7cc6 100644
--- a/clang/lib/Driver/ToolChains/NetBSD.cpp
+++ b/clang/lib/Driver/ToolChains/NetBSD.cpp
@@ -29,12 +29,17 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
+ const toolchains::NetBSD &ToolChain =
+ static_cast<const toolchains::NetBSD &>(getToolChain());
+ const Driver &D = ToolChain.getDriver();
+ const llvm::Triple &Triple = ToolChain.getTriple();
+
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
// GNU as needs different flags for creating the correct output format
// on architectures with different ABIs or optional feature sets.
- switch (getToolChain().getArch()) {
+ switch (ToolChain.getArch()) {
case llvm::Triple::x86:
CmdArgs.push_back("--32");
break;
@@ -44,8 +49,7 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::thumbeb: {
StringRef MArch, MCPU;
arm::getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true);
- std::string Arch =
- arm::getARMTargetCPU(MCPU, MArch, getToolChain().getTriple());
+ std::string Arch = arm::getARMTargetCPU(MCPU, MArch, Triple);
CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch));
break;
}
@@ -56,7 +60,7 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
- mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
+ mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
CmdArgs.push_back("-march");
CmdArgs.push_back(CPUName.data());
@@ -64,29 +68,29 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-mabi");
CmdArgs.push_back(mips::getGnuCompatibleMipsABIName(ABIName).data());
- if (getToolChain().getTriple().isLittleEndian())
+ if (Triple.isLittleEndian())
CmdArgs.push_back("-EL");
else
CmdArgs.push_back("-EB");
- AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+ AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
case llvm::Triple::sparc:
case llvm::Triple::sparcel: {
CmdArgs.push_back("-32");
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
- CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
- AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+ std::string CPU = getCPUName(D, Args, Triple);
+ CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, Triple));
+ AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
case llvm::Triple::sparcv9: {
CmdArgs.push_back("-64");
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
- CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
- AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+ std::string CPU = getCPUName(D, Args, Triple);
+ CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, Triple));
+ AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
@@ -102,7 +106,7 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
- const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as")));
+ const char *Exec = Args.MakeArgString((ToolChain.GetProgramPath("as")));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
@@ -116,6 +120,8 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const toolchains::NetBSD &ToolChain =
static_cast<const toolchains::NetBSD &>(getToolChain());
const Driver &D = ToolChain.getDriver();
+ const llvm::Triple &Triple = ToolChain.getTriple();
+
ArgStringList CmdArgs;
if (!D.SysRoot.empty())
@@ -150,7 +156,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::arm:
case llvm::Triple::thumb:
CmdArgs.push_back("-m");
- switch (ToolChain.getTriple().getEnvironment()) {
+ switch (Triple.getEnvironment()) {
case llvm::Triple::EABI:
case llvm::Triple::GNUEABI:
CmdArgs.push_back("armelf_nbsd_eabi");
@@ -168,7 +174,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::thumbeb:
arm::appendBE8LinkFlag(Args, CmdArgs, ToolChain.getEffectiveTriple());
CmdArgs.push_back("-m");
- switch (ToolChain.getTriple().getEnvironment()) {
+ switch (Triple.getEnvironment()) {
case llvm::Triple::EABI:
case llvm::Triple::GNUEABI:
CmdArgs.push_back("armelfb_nbsd_eabi");
@@ -254,19 +260,18 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_r);
- bool NeedsSanitizerDeps = addSanitizerRuntimes(getToolChain(), Args, CmdArgs);
+ bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
- AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+ AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
- const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs();
+ const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs(Args);
if (SanArgs.needsSharedRt()) {
CmdArgs.push_back("-rpath");
- CmdArgs.push_back(Args.MakeArgString(
- ToolChain.getCompilerRTPath().c_str()));
+ CmdArgs.push_back(Args.MakeArgString(ToolChain.getCompilerRTPath()));
}
unsigned Major, Minor, Micro;
- ToolChain.getTriple().getOSVersion(Major, Minor, Micro);
+ Triple.getOSVersion(Major, Minor, Micro);
bool useLibgcc = true;
if (Major >= 7 || Major == 0) {
switch (ToolChain.getArch()) {
@@ -294,7 +299,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
// Use the static OpenMP runtime with -static-openmp
bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
!Args.hasArg(options::OPT_static);
- addOpenMPRuntime(CmdArgs, getToolChain(), Args, StaticOpenMP);
+ addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP);
if (D.CCCIsCXX()) {
if (ToolChain.ShouldLinkCXXStdlib(Args))
@@ -302,7 +307,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-lm");
}
if (NeedsSanitizerDeps)
- linkSanitizerRuntimeDeps(getToolChain(), CmdArgs);
+ linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
if (NeedsXRayDeps)
linkXRayRuntimeDeps(ToolChain, CmdArgs);
if (Args.hasArg(options::OPT_pthread))
@@ -496,7 +501,7 @@ SanitizerMask NetBSD::getSupportedSanitizers() const {
void NetBSD::addClangTargetOptions(const ArgList &DriverArgs,
ArgStringList &CC1Args,
Action::OffloadKind) const {
- const SanitizerArgs &SanArgs = getSanitizerArgs();
+ const SanitizerArgs &SanArgs = getSanitizerArgs(DriverArgs);
if (SanArgs.hasAnySanitizer())
CC1Args.push_back("-D_REENTRANT");
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index e162165b2561..96abac57764f 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "OpenBSD.h"
+#include "Arch/ARM.h"
#include "Arch/Mips.h"
#include "Arch/Sparc.h"
#include "CommonArgs.h"
@@ -28,16 +29,30 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
+ const toolchains::OpenBSD &ToolChain =
+ static_cast<const toolchains::OpenBSD &>(getToolChain());
+ const Driver &D = ToolChain.getDriver();
+ const llvm::Triple &Triple = ToolChain.getTriple();
+
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
- switch (getToolChain().getArch()) {
+ switch (ToolChain.getArch()) {
case llvm::Triple::x86:
// When building 32-bit code on OpenBSD/amd64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
CmdArgs.push_back("--32");
break;
+ case llvm::Triple::arm:
+ case llvm::Triple::armeb: {
+ StringRef MArch, MCPU;
+ arm::getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true);
+ std::string Arch = arm::getARMTargetCPU(MCPU, MArch, Triple);
+ CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch));
+ break;
+ }
+
case llvm::Triple::ppc:
CmdArgs.push_back("-mppc");
CmdArgs.push_back("-many");
@@ -45,9 +60,9 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::sparcv9: {
CmdArgs.push_back("-64");
- std::string CPU = getCPUName(Args, getToolChain().getTriple());
- CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
- AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+ std::string CPU = getCPUName(D, Args, Triple);
+ CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, Triple));
+ AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
@@ -55,17 +70,20 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
- mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
+ mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
+
+ CmdArgs.push_back("-march");
+ CmdArgs.push_back(CPUName.data());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(mips::getGnuCompatibleMipsABIName(ABIName).data());
- if (getToolChain().getTriple().isLittleEndian())
+ if (Triple.isLittleEndian())
CmdArgs.push_back("-EL");
else
CmdArgs.push_back("-EB");
- AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+ AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
@@ -81,7 +99,7 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
- const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
+ const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("as"));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
@@ -94,7 +112,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const char *LinkingOutput) const {
const toolchains::OpenBSD &ToolChain =
static_cast<const toolchains::OpenBSD &>(getToolChain());
- const Driver &D = getToolChain().getDriver();
+ const Driver &D = ToolChain.getDriver();
ArgStringList CmdArgs;
// Silence warning for "clang -g foo.o -o foo"
@@ -174,6 +192,11 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+ // Use the static OpenMP runtime with -static-openmp
+ bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
+ !Args.hasArg(options::OPT_static);
+ addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP);
+
if (D.CCCIsCXX()) {
if (ToolChain.ShouldLinkCXXStdlib(Args))
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
@@ -221,6 +244,8 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
}
+ ToolChain.addProfileRTLibs(Args, CmdArgs);
+
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.h b/clang/lib/Driver/ToolChains/OpenBSD.h
index 4932ed5c609c..95c10cc62316 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.h
+++ b/clang/lib/Driver/ToolChains/OpenBSD.h
@@ -59,7 +59,9 @@ public:
bool IsMathErrnoDefault() const override { return false; }
bool IsObjCNonFragileABIDefault() const override { return true; }
- bool isPIEDefault() const override { return true; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return true;
+ }
RuntimeLibType GetDefaultRuntimeLibType() const override {
return ToolChain::RLT_CompilerRT;
diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp
index 383b0c50d410..5783a733983a 100644
--- a/clang/lib/Driver/ToolChains/PS4CPU.cpp
+++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp
@@ -71,8 +71,9 @@ void tools::PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
Exec, CmdArgs, Inputs, Output));
}
-static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) {
- const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
+static void AddPS4SanitizerArgs(const ToolChain &TC, const ArgList &Args,
+ ArgStringList &CmdArgs) {
+ const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args);
if (SanArgs.needsUbsanRt()) {
CmdArgs.push_back("-lSceDbgUBSanitizer_stub_weak");
}
@@ -81,9 +82,9 @@ static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) {
}
}
-void tools::PS4cpu::addSanitizerArgs(const ToolChain &TC,
+void tools::PS4cpu::addSanitizerArgs(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
- const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
+ const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args);
if (SanArgs.needsUbsanRt())
CmdArgs.push_back("--dependent-lib=libSceDbgUBSanitizer_stub_weak.a");
if (SanArgs.needsAsanRt())
@@ -127,7 +128,7 @@ void tools::PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA,
}
if(!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs))
- AddPS4SanitizerArgs(ToolChain, CmdArgs);
+ AddPS4SanitizerArgs(ToolChain, Args, CmdArgs);
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
diff --git a/clang/lib/Driver/ToolChains/PS4CPU.h b/clang/lib/Driver/ToolChains/PS4CPU.h
index 5f5d0e57d4ea..82f9523f84fb 100644
--- a/clang/lib/Driver/ToolChains/PS4CPU.h
+++ b/clang/lib/Driver/ToolChains/PS4CPU.h
@@ -23,7 +23,8 @@ namespace PS4cpu {
void addProfileRTArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
-void addSanitizerArgs(const ToolChain &TC, llvm::opt::ArgStringList &CmdArgs);
+void addSanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs);
class LLVM_LIBRARY_VISIBILITY Assemble : public Tool {
public:
diff --git a/clang/lib/Driver/ToolChains/SPIRV.cpp b/clang/lib/Driver/ToolChains/SPIRV.cpp
new file mode 100644
index 000000000000..16e72d3c733f
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/SPIRV.cpp
@@ -0,0 +1,49 @@
+//===--- SPIRV.cpp - SPIR-V Tool Implementations ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "SPIRV.h"
+#include "CommonArgs.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/InputInfo.h"
+#include "clang/Driver/Options.h"
+
+using namespace clang::driver;
+using namespace clang::driver::tools;
+using namespace llvm::opt;
+
+void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T,
+ const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfo &Input,
+ const llvm::opt::ArgStringList &Args) {
+ llvm::opt::ArgStringList CmdArgs(Args);
+ CmdArgs.push_back(Input.getFilename());
+
+ if (Input.getType() == types::TY_PP_Asm)
+ CmdArgs.push_back("-to-binary");
+ if (Output.getType() == types::TY_PP_Asm)
+ CmdArgs.push_back("-spirv-text");
+
+ CmdArgs.append({"-o", Output.getFilename()});
+
+ const char *Exec =
+ C.getArgs().MakeArgString(T.getToolChain().GetProgramPath("llvm-spirv"));
+ C.addCommand(std::make_unique<Command>(JA, T, ResponseFileSupport::None(),
+ Exec, CmdArgs, Input, Output));
+}
+
+void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output,
+ const InputInfoList &Inputs,
+ const ArgList &Args,
+ const char *LinkingOutput) const {
+ claimNoWarnArgs(Args);
+ if (Inputs.size() != 1)
+ llvm_unreachable("Invalid number of input files.");
+ constructTranslateCommand(C, *this, JA, Output, Inputs[0], {});
+}
diff --git a/clang/lib/Driver/ToolChains/SPIRV.h b/clang/lib/Driver/ToolChains/SPIRV.h
new file mode 100644
index 000000000000..35d0446bd8b8
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/SPIRV.h
@@ -0,0 +1,46 @@
+//===--- SPIRV.h - SPIR-V Tool Implementations ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SPIRV_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SPIRV_H
+
+#include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
+
+namespace clang {
+namespace driver {
+namespace tools {
+namespace SPIRV {
+
+void addTranslatorArgs(const llvm::opt::ArgList &InArgs,
+ llvm::opt::ArgStringList &OutArgs);
+
+void constructTranslateCommand(Compilation &C, const Tool &T,
+ const JobAction &JA, const InputInfo &Output,
+ const InputInfo &Input,
+ const llvm::opt::ArgStringList &Args);
+
+class LLVM_LIBRARY_VISIBILITY Translator : public Tool {
+public:
+ Translator(const ToolChain &TC)
+ : Tool("SPIR-V::Translator", "llvm-spirv", TC) {}
+
+ bool hasIntegratedCPP() const override { return false; }
+ bool hasIntegratedAssembler() const override { return true; }
+
+ void ConstructJob(Compilation &C, const JobAction &JA,
+ const InputInfo &Output, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &TCArgs,
+ const char *LinkingOutput) const override;
+};
+
+} // namespace SPIRV
+} // namespace tools
+} // namespace driver
+} // namespace clang
+#endif
diff --git a/clang/lib/Driver/ToolChains/TCE.cpp b/clang/lib/Driver/ToolChains/TCE.cpp
index 33a81c54bd42..5f4051d31168 100644
--- a/clang/lib/Driver/ToolChains/TCE.cpp
+++ b/clang/lib/Driver/ToolChains/TCE.cpp
@@ -34,7 +34,9 @@ bool TCEToolChain::IsMathErrnoDefault() const { return true; }
bool TCEToolChain::isPICDefault() const { return false; }
-bool TCEToolChain::isPIEDefault() const { return false; }
+bool TCEToolChain::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return false;
+}
bool TCEToolChain::isPICDefaultForced() const { return false; }
diff --git a/clang/lib/Driver/ToolChains/TCE.h b/clang/lib/Driver/ToolChains/TCE.h
index 72933dae965e..31a64cfe878a 100644
--- a/clang/lib/Driver/ToolChains/TCE.h
+++ b/clang/lib/Driver/ToolChains/TCE.h
@@ -27,7 +27,7 @@ public:
bool IsMathErrnoDefault() const override;
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
};
diff --git a/clang/lib/Driver/ToolChains/VEToolchain.cpp b/clang/lib/Driver/ToolChains/VEToolchain.cpp
index e28f340f9aad..1fcc52684baa 100644
--- a/clang/lib/Driver/ToolChains/VEToolchain.cpp
+++ b/clang/lib/Driver/ToolChains/VEToolchain.cpp
@@ -53,7 +53,9 @@ Tool *VEToolChain::buildLinker() const {
bool VEToolChain::isPICDefault() const { return false; }
-bool VEToolChain::isPIEDefault() const { return false; }
+bool VEToolChain::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return false;
+}
bool VEToolChain::isPICDefaultForced() const { return false; }
diff --git a/clang/lib/Driver/ToolChains/VEToolchain.h b/clang/lib/Driver/ToolChains/VEToolchain.h
index b330331ca84e..964b0d0dd8d4 100644
--- a/clang/lib/Driver/ToolChains/VEToolchain.h
+++ b/clang/lib/Driver/ToolChains/VEToolchain.h
@@ -28,7 +28,7 @@ protected:
public:
bool IsIntegratedAssemblerDefault() const override { return true; }
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
bool SupportsProfiling() const override;
bool hasBlocksRuntime() const override;
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 19f3571e6b38..a7298a9a71bf 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -63,7 +63,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
ArgStringList CmdArgs;
CmdArgs.push_back("-m");
- if (getToolChain().getTriple().isArch64Bit())
+ if (ToolChain.getTriple().isArch64Bit())
CmdArgs.push_back("wasm64");
else
CmdArgs.push_back("wasm32");
@@ -130,7 +130,7 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
// When optimizing, if wasm-opt is available, run it.
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
- auto WasmOptPath = getToolChain().GetProgramPath("wasm-opt");
+ auto WasmOptPath = ToolChain.GetProgramPath("wasm-opt");
if (WasmOptPath != "wasm-opt") {
StringRef OOpt = "s";
if (A->getOption().matches(options::OPT_O4) ||
@@ -201,7 +201,9 @@ bool WebAssembly::UseObjCMixedDispatch() const { return true; }
bool WebAssembly::isPICDefault() const { return false; }
-bool WebAssembly::isPIEDefault() const { return false; }
+bool WebAssembly::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return false;
+}
bool WebAssembly::isPICDefaultForced() const { return false; }
@@ -293,6 +295,9 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
// '-fwasm-exceptions' implies exception-handling feature
CC1Args.push_back("-target-feature");
CC1Args.push_back("+exception-handling");
+ // Backend needs -wasm-enable-eh to enable Wasm EH
+ CC1Args.push_back("-mllvm");
+ CC1Args.push_back("-wasm-enable-eh");
}
for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) {
@@ -300,14 +305,14 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
if (Opt.startswith("-emscripten-cxx-exceptions-allowed")) {
// '-mllvm -emscripten-cxx-exceptions-allowed' should be used with
// '-mllvm -enable-emscripten-cxx-exceptions'
- bool EmExceptionArgExists = false;
+ bool EmEHArgExists = false;
for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) {
if (StringRef(A->getValue(0)) == "-enable-emscripten-cxx-exceptions") {
- EmExceptionArgExists = true;
+ EmEHArgExists = true;
break;
}
}
- if (!EmExceptionArgExists)
+ if (!EmEHArgExists)
getDriver().Diag(diag::err_drv_argument_only_allowed_with)
<< "-mllvm -emscripten-cxx-exceptions-allowed"
<< "-mllvm -enable-emscripten-cxx-exceptions";
@@ -323,6 +328,38 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
":noinline"));
}
}
+
+ if (Opt.startswith("-wasm-enable-sjlj")) {
+ // '-mllvm -wasm-enable-sjlj' is not compatible with
+ // '-mno-exception-handling'
+ if (DriverArgs.hasFlag(options::OPT_mno_exception_handing,
+ options::OPT_mexception_handing, false))
+ getDriver().Diag(diag::err_drv_argument_not_allowed_with)
+ << "-mllvm -wasm-enable-sjlj"
+ << "-mno-exception-handling";
+ // '-mllvm -wasm-enable-sjlj' is not compatible with
+ // '-mllvm -enable-emscripten-cxx-exceptions'
+ // because we don't allow Emscripten EH + Wasm SjLj
+ for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) {
+ if (StringRef(A->getValue(0)) == "-enable-emscripten-cxx-exceptions")
+ getDriver().Diag(diag::err_drv_argument_not_allowed_with)
+ << "-mllvm -wasm-enable-sjlj"
+ << "-mllvm -enable-emscripten-cxx-exceptions";
+ }
+ // '-mllvm -wasm-enable-sjlj' is not compatible with
+ // '-mllvm -enable-emscripten-sjlj'
+ for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) {
+ if (StringRef(A->getValue(0)) == "-enable-emscripten-sjlj")
+ getDriver().Diag(diag::err_drv_argument_not_allowed_with)
+ << "-mllvm -wasm-enable-sjlj"
+ << "-mllvm -enable-emscripten-sjlj";
+ }
+ // '-mllvm -wasm-enable-sjlj' implies exception-handling feature
+ CC1Args.push_back("-target-feature");
+ CC1Args.push_back("+exception-handling");
+ // Backend needs '-exception-model=wasm' to use Wasm EH instructions
+ CC1Args.push_back("-exception-model=wasm");
+ }
}
}
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.h b/clang/lib/Driver/ToolChains/WebAssembly.h
index 8a3f82d9efdf..c84e59675946 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.h
+++ b/clang/lib/Driver/ToolChains/WebAssembly.h
@@ -45,7 +45,7 @@ private:
bool IsObjCNonFragileABIDefault() const override;
bool UseObjCMixedDispatch() const override;
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
bool IsIntegratedAssemblerDefault() const override;
bool hasBlocksRuntime() const override;
diff --git a/clang/lib/Driver/ToolChains/XCore.cpp b/clang/lib/Driver/ToolChains/XCore.cpp
index 5f94f83d3691..7e74f6374050 100644
--- a/clang/lib/Driver/ToolChains/XCore.cpp
+++ b/clang/lib/Driver/ToolChains/XCore.cpp
@@ -102,7 +102,9 @@ Tool *XCoreToolChain::buildLinker() const {
bool XCoreToolChain::isPICDefault() const { return false; }
-bool XCoreToolChain::isPIEDefault() const { return false; }
+bool XCoreToolChain::isPIEDefault(const llvm::opt::ArgList &Args) const {
+ return false;
+}
bool XCoreToolChain::isPICDefaultForced() const { return false; }
diff --git a/clang/lib/Driver/ToolChains/XCore.h b/clang/lib/Driver/ToolChains/XCore.h
index 41dce08454c0..d9a05da3c678 100644
--- a/clang/lib/Driver/ToolChains/XCore.h
+++ b/clang/lib/Driver/ToolChains/XCore.h
@@ -58,7 +58,7 @@ protected:
public:
bool isPICDefault() const override;
- bool isPIEDefault() const override;
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override;
bool isPICDefaultForced() const override;
bool SupportsProfiling() const override;
bool hasBlocksRuntime() const override;
diff --git a/clang/lib/Driver/ToolChains/ZOS.h b/clang/lib/Driver/ToolChains/ZOS.h
index cace85d6da77..50bff0993561 100644
--- a/clang/lib/Driver/ToolChains/ZOS.h
+++ b/clang/lib/Driver/ToolChains/ZOS.h
@@ -23,7 +23,9 @@ public:
~ZOS() override;
bool isPICDefault() const override { return false; }
- bool isPIEDefault() const override { return false; }
+ bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
+ return false;
+ }
bool isPICDefaultForced() const override { return false; }
bool IsIntegratedAssemblerDefault() const override { return true; }
diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index 3cb2d6e8f6fd..1bd187ad2fc0 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -362,46 +362,7 @@ types::getCompilationPhases(ID Id, phases::ID LastPhase) {
llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases>
types::getCompilationPhases(const clang::driver::Driver &Driver,
llvm::opt::DerivedArgList &DAL, ID Id) {
- phases::ID LastPhase;
-
- // Filter to compiler mode. When the compiler is run as a preprocessor then
- // compilation is not an option.
- // -S runs the compiler in Assembly listing mode.
- if (Driver.CCCIsCPP() || DAL.getLastArg(options::OPT_E) ||
- DAL.getLastArg(options::OPT__SLASH_EP) ||
- DAL.getLastArg(options::OPT_M, options::OPT_MM) ||
- DAL.getLastArg(options::OPT__SLASH_P))
- LastPhase = phases::Preprocess;
-
- // --precompile only runs up to precompilation.
- // This is a clang extension and is not compatible with GCC.
- else if (DAL.getLastArg(options::OPT__precompile))
- LastPhase = phases::Precompile;
-
- // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.
- else if (DAL.getLastArg(options::OPT_fsyntax_only) ||
- DAL.getLastArg(options::OPT_print_supported_cpus) ||
- DAL.getLastArg(options::OPT_module_file_info) ||
- DAL.getLastArg(options::OPT_verify_pch) ||
- DAL.getLastArg(options::OPT_rewrite_objc) ||
- DAL.getLastArg(options::OPT_rewrite_legacy_objc) ||
- DAL.getLastArg(options::OPT__migrate) ||
- DAL.getLastArg(options::OPT__analyze) ||
- DAL.getLastArg(options::OPT_emit_ast))
- LastPhase = phases::Compile;
-
- else if (DAL.getLastArg(options::OPT_S) ||
- DAL.getLastArg(options::OPT_emit_llvm))
- LastPhase = phases::Backend;
-
- else if (DAL.getLastArg(options::OPT_c))
- LastPhase = phases::Assemble;
-
- // Generally means, do every phase until Link.
- else
- LastPhase = phases::LastPhase;
-
- return types::getCompilationPhases(Id, LastPhase);
+ return types::getCompilationPhases(Id, Driver.getFinalPhase(DAL));
}
ID types::lookupCXXTypeForCType(ID Id) {
diff --git a/clang/lib/Edit/EditedSource.cpp b/clang/lib/Edit/EditedSource.cpp
index 74e6005faeb0..ee57660b8c72 100644
--- a/clang/lib/Edit/EditedSource.cpp
+++ b/clang/lib/Edit/EditedSource.cpp
@@ -60,7 +60,7 @@ void EditedSource::finishedCommit() {
MacroArgUse ArgUse;
std::tie(ExpLoc, ArgUse) = ExpArg;
auto &ArgUses = ExpansionToArgMap[ExpLoc];
- if (llvm::find(ArgUses, ArgUse) == ArgUses.end())
+ if (!llvm::is_contained(ArgUses, ArgUse))
ArgUses.push_back(ArgUse);
}
CurrCommitMacroArgExps.clear();
@@ -314,8 +314,8 @@ bool EditedSource::commit(const Commit &commit) {
static bool canBeJoined(char left, char right, const LangOptions &LangOpts) {
// FIXME: Should use TokenConcatenation to make sure we don't allow stuff like
// making two '<' adjacent.
- return !(Lexer::isIdentifierBodyChar(left, LangOpts) &&
- Lexer::isIdentifierBodyChar(right, LangOpts));
+ return !(Lexer::isAsciiIdentifierContinueChar(left, LangOpts) &&
+ Lexer::isAsciiIdentifierContinueChar(right, LangOpts));
}
/// Returns true if it is ok to eliminate the trailing whitespace between
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index 455904895848..968b35bfda23 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -779,8 +779,7 @@ BreakableLineCommentSection::BreakableLineCommentSection(
Lines[i] = Lines[i].ltrim(Blanks);
StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
OriginalPrefix[i] = IndentPrefix;
- const unsigned SpacesInPrefix =
- std::count(IndentPrefix.begin(), IndentPrefix.end(), ' ');
+ const unsigned SpacesInPrefix = llvm::count(IndentPrefix, ' ');
// On the first line of the comment section we calculate how many spaces
// are to be added or removed, all lines after that just get only the
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 8fbc15f27922..1e4f5690ef24 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -14,10 +14,12 @@
#include "ContinuationIndenter.h"
#include "BreakableToken.h"
#include "FormatInternal.h"
+#include "FormatToken.h"
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "format-indenter"
@@ -491,11 +493,24 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
return true;
}
+ // Break after the closing parenthesis of TypeScript decorators before
+ // functions, getters and setters.
+ static const llvm::StringSet<> BreakBeforeDecoratedTokens = {"get", "set",
+ "function"};
+ if (Style.Language == FormatStyle::LK_JavaScript &&
+ BreakBeforeDecoratedTokens.contains(Current.TokenText) &&
+ Previous.is(tok::r_paren) && Previous.is(TT_JavaAnnotation)) {
+ return true;
+ }
+
// If the return type spans multiple lines, wrap before the function name.
if (((Current.is(TT_FunctionDeclarationName) &&
// Don't break before a C# function when no break after return type
(!Style.isCSharp() ||
- Style.AlwaysBreakAfterReturnType != FormatStyle::RTBS_None)) ||
+ Style.AlwaysBreakAfterReturnType != FormatStyle::RTBS_None) &&
+ // Don't always break between a JavaScript `function` and the function
+ // name.
+ Style.Language != FormatStyle::LK_JavaScript) ||
(Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) &&
!Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter)
return true;
@@ -943,7 +958,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
State.Line->MustBeDeclaration) ||
(!Style.AllowAllArgumentsOnNextLine &&
!State.Line->MustBeDeclaration) ||
- (!Style.AllowAllConstructorInitializersOnNextLine &&
+ (Style.PackConstructorInitializers != FormatStyle::PCIS_NextLine &&
PreviousIsBreakingCtorInitializerColon) ||
Previous.is(TT_DictLiteral))
State.Stack.back().BreakBeforeParameter = true;
@@ -952,7 +967,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
// and we allow all arguments on the next line, we should not break
// before the next parameter.
if (PreviousIsBreakingCtorInitializerColon &&
- Style.AllowAllConstructorInitializersOnNextLine)
+ Style.PackConstructorInitializers == FormatStyle::PCIS_NextLine)
State.Stack.back().BreakBeforeParameter = false;
}
@@ -1232,10 +1247,10 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
? 0
: 2);
State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
- if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
+ if (Style.PackConstructorInitializers > FormatStyle::PCIS_BinPack) {
State.Stack.back().AvoidBinPacking = true;
State.Stack.back().BreakBeforeParameter =
- !Style.AllowAllConstructorInitializersOnNextLine;
+ Style.PackConstructorInitializers != FormatStyle::PCIS_NextLine;
} else {
State.Stack.back().BreakBeforeParameter = false;
}
@@ -1245,7 +1260,7 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.Stack.back().Indent =
State.FirstIndent + Style.ConstructorInitializerIndentWidth;
State.Stack.back().NestedBlockIndent = State.Stack.back().Indent;
- if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ if (Style.PackConstructorInitializers > FormatStyle::PCIS_BinPack)
State.Stack.back().AvoidBinPacking = true;
}
if (Current.is(TT_InheritanceColon))
@@ -1592,7 +1607,7 @@ void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) {
// BreakBeforeParameter is calculated based on an incorrect assumption
// (it is checked whether the whole expression fits into one line without
// considering a line break inside a message receiver).
- // We check whether arguements fit after receiver scope closer (into the same
+ // We check whether arguments fit after receiver scope closer (into the same
// line).
if (State.Stack.back().BreakBeforeParameter && Current.MatchingParen &&
Current.MatchingParen->Previous) {
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 2b860d2a25f7..085cca8853e6 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -19,6 +19,7 @@
#include "FormatInternal.h"
#include "FormatTokenLexer.h"
#include "NamespaceEndCommentsFixer.h"
+#include "QualifierAlignmentFixer.h"
#include "SortJavaScriptImports.h"
#include "TokenAnalyzer.h"
#include "TokenAnnotator.h"
@@ -126,6 +127,16 @@ template <> struct ScalarEnumerationTraits<FormatStyle::ShortBlockStyle> {
}
};
+template <>
+struct ScalarEnumerationTraits<FormatStyle::QualifierAlignmentStyle> {
+ static void enumeration(IO &IO, FormatStyle::QualifierAlignmentStyle &Value) {
+ IO.enumCase(Value, "Leave", FormatStyle::QAS_Leave);
+ IO.enumCase(Value, "Left", FormatStyle::QAS_Left);
+ IO.enumCase(Value, "Right", FormatStyle::QAS_Right);
+ IO.enumCase(Value, "Custom", FormatStyle::QAS_Custom);
+ }
+};
+
template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
IO.enumCase(Value, "None", FormatStyle::SFS_None);
@@ -147,7 +158,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::AlignConsecutiveStyle> {
IO.enumCase(Value, "AcrossEmptyLinesAndComments",
FormatStyle::ACS_AcrossEmptyLinesAndComments);
- // For backward compability.
+ // For backward compatibility.
IO.enumCase(Value, "true", FormatStyle::ACS_Consecutive);
IO.enumCase(Value, "false", FormatStyle::ACS_None);
}
@@ -265,6 +276,17 @@ struct ScalarEnumerationTraits<FormatStyle::BreakInheritanceListStyle> {
};
template <>
+struct ScalarEnumerationTraits<FormatStyle::PackConstructorInitializersStyle> {
+ static void
+ enumeration(IO &IO, FormatStyle::PackConstructorInitializersStyle &Value) {
+ IO.enumCase(Value, "Never", FormatStyle::PCIS_Never);
+ IO.enumCase(Value, "BinPack", FormatStyle::PCIS_BinPack);
+ IO.enumCase(Value, "CurrentLine", FormatStyle::PCIS_CurrentLine);
+ IO.enumCase(Value, "NextLine", FormatStyle::PCIS_NextLine);
+ }
+};
+
+template <>
struct ScalarEnumerationTraits<FormatStyle::EmptyLineAfterAccessModifierStyle> {
static void
enumeration(IO &IO, FormatStyle::EmptyLineAfterAccessModifierStyle &Value) {
@@ -429,9 +451,8 @@ struct ScalarEnumerationTraits<FormatStyle::ReferenceAlignmentStyle> {
};
template <>
-struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
- static void enumeration(IO &IO,
- FormatStyle::SpaceBeforeParensOptions &Value) {
+struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensStyle> {
+ static void enumeration(IO &IO, FormatStyle::SpaceBeforeParensStyle &Value) {
IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
IO.enumCase(Value, "ControlStatements",
FormatStyle::SBPO_ControlStatements);
@@ -440,6 +461,7 @@ struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
IO.enumCase(Value, "NonEmptyParentheses",
FormatStyle::SBPO_NonEmptyParentheses);
IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
+ IO.enumCase(Value, "Custom", FormatStyle::SBPO_Custom);
// For backward compatibility.
IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
@@ -552,8 +574,6 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
IO.mapOptional("AllowAllArgumentsOnNextLine",
Style.AllowAllArgumentsOnNextLine);
- IO.mapOptional("AllowAllConstructorInitializersOnNextLine",
- Style.AllowAllConstructorInitializersOnNextLine);
IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
Style.AllowAllParametersOfDeclarationOnNextLine);
IO.mapOptional("AllowShortEnumsOnASingleLine",
@@ -632,9 +652,18 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals);
IO.mapOptional("ColumnLimit", Style.ColumnLimit);
IO.mapOptional("CommentPragmas", Style.CommentPragmas);
+ IO.mapOptional("QualifierAlignment", Style.QualifierAlignment);
+
+ // Default Order for Left/Right based Qualifier alignment.
+ if (Style.QualifierAlignment == FormatStyle::QAS_Right) {
+ Style.QualifierOrder = {"type", "const", "volatile"};
+ } else if (Style.QualifierAlignment == FormatStyle::QAS_Left) {
+ Style.QualifierOrder = {"const", "volatile", "type"};
+ } else if (Style.QualifierAlignment == FormatStyle::QAS_Custom) {
+ IO.mapOptional("QualifierOrder", Style.QualifierOrder);
+ }
+
IO.mapOptional("CompactNamespaces", Style.CompactNamespaces);
- IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
- Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
IO.mapOptional("ConstructorInitializerIndentWidth",
Style.ConstructorInitializerIndentWidth);
IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
@@ -648,6 +677,41 @@ template <> struct MappingTraits<FormatStyle> {
Style.EmptyLineBeforeAccessModifier);
IO.mapOptional("ExperimentalAutoDetectBinPacking",
Style.ExperimentalAutoDetectBinPacking);
+
+ IO.mapOptional("PackConstructorInitializers",
+ Style.PackConstructorInitializers);
+ // For backward compatibility:
+ // The default value of ConstructorInitializerAllOnOneLineOrOnePerLine was
+ // false unless BasedOnStyle was Google or Chromium whereas that of
+ // AllowAllConstructorInitializersOnNextLine was always true, so the
+ // equivalent default value of PackConstructorInitializers is PCIS_NextLine
+ // for Google/Chromium or PCIS_BinPack otherwise. If the deprecated options
+ // had a non-default value while PackConstructorInitializers has a default
+ // value, set the latter to an equivalent non-default value if needed.
+ StringRef BasedOn;
+ IO.mapOptional("BasedOnStyle", BasedOn);
+ const bool IsGoogleOrChromium = BasedOn.equals_insensitive("google") ||
+ BasedOn.equals_insensitive("chromium");
+ bool OnCurrentLine = IsGoogleOrChromium;
+ bool OnNextLine = true;
+ IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
+ OnCurrentLine);
+ IO.mapOptional("AllowAllConstructorInitializersOnNextLine", OnNextLine);
+ if (!IsGoogleOrChromium) {
+ if (Style.PackConstructorInitializers == FormatStyle::PCIS_BinPack &&
+ OnCurrentLine) {
+ Style.PackConstructorInitializers = OnNextLine
+ ? FormatStyle::PCIS_NextLine
+ : FormatStyle::PCIS_CurrentLine;
+ }
+ } else if (Style.PackConstructorInitializers ==
+ FormatStyle::PCIS_NextLine) {
+ if (!OnCurrentLine)
+ Style.PackConstructorInitializers = FormatStyle::PCIS_BinPack;
+ else if (!OnNextLine)
+ Style.PackConstructorInitializers = FormatStyle::PCIS_CurrentLine;
+ }
+
IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments);
IO.mapOptional("ForEachMacros", Style.ForEachMacros);
IO.mapOptional("IfMacros", Style.IfMacros);
@@ -723,6 +787,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("SpaceBeforeInheritanceColon",
Style.SpaceBeforeInheritanceColon);
IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
+ IO.mapOptional("SpaceBeforeParensOptions", Style.SpaceBeforeParensOptions);
IO.mapOptional("SpaceAroundPointerQualifiers",
Style.SpaceAroundPointerQualifiers);
IO.mapOptional("SpaceBeforeRangeBasedForLoopColon",
@@ -781,6 +846,20 @@ template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
}
};
+template <> struct MappingTraits<FormatStyle::SpaceBeforeParensCustom> {
+ static void mapping(IO &IO, FormatStyle::SpaceBeforeParensCustom &Spacing) {
+ IO.mapOptional("AfterControlStatements", Spacing.AfterControlStatements);
+ IO.mapOptional("AfterForeachMacros", Spacing.AfterForeachMacros);
+ IO.mapOptional("AfterFunctionDefinitionName",
+ Spacing.AfterFunctionDefinitionName);
+ IO.mapOptional("AfterFunctionDeclarationName",
+ Spacing.AfterFunctionDeclarationName);
+ IO.mapOptional("AfterIfMacros", Spacing.AfterIfMacros);
+ IO.mapOptional("BeforeNonEmptyParentheses",
+ Spacing.BeforeNonEmptyParentheses);
+ }
+};
+
template <> struct MappingTraits<FormatStyle::RawStringFormat> {
static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) {
IO.mapOptional("Language", Format.Language);
@@ -863,14 +942,21 @@ std::string ParseErrorCategory::message(int EV) const {
return "Unsuitable";
case ParseError::BinPackTrailingCommaConflict:
return "trailing comma insertion cannot be used with bin packing";
+ case ParseError::InvalidQualifierSpecified:
+ return "Invalid qualifier specified in QualifierOrder";
+ case ParseError::DuplicateQualifierSpecified:
+ return "Duplicate qualifier specified in QualfierOrder";
+ case ParseError::MissingQualifierType:
+ return "Missing type in QualfierOrder";
+ case ParseError::MissingQualifierOrder:
+ return "Missing QualfierOrder";
}
llvm_unreachable("unexpected parse error");
}
-static FormatStyle expandPresets(const FormatStyle &Style) {
- if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
- return Style;
- FormatStyle Expanded = Style;
+static void expandPresetsBraceWrapping(FormatStyle &Expanded) {
+ if (Expanded.BreakBeforeBraces == FormatStyle::BS_Custom)
+ return;
Expanded.BraceWrapping = {/*AfterCaseLabel=*/false,
/*AfterClass=*/false,
/*AfterControlStatement=*/FormatStyle::BWACS_Never,
@@ -889,7 +975,7 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
/*SplitEmptyFunction=*/true,
/*SplitEmptyRecord=*/true,
/*SplitEmptyNamespace=*/true};
- switch (Style.BreakBeforeBraces) {
+ switch (Expanded.BreakBeforeBraces) {
case FormatStyle::BS_Linux:
Expanded.BraceWrapping.AfterClass = true;
Expanded.BraceWrapping.AfterFunction = true;
@@ -970,7 +1056,33 @@ static FormatStyle expandPresets(const FormatStyle &Style) {
default:
break;
}
- return Expanded;
+}
+
+static void expandPresetsSpaceBeforeParens(FormatStyle &Expanded) {
+ if (Expanded.SpaceBeforeParens == FormatStyle::SBPO_Custom)
+ return;
+ // Reset all flags
+ Expanded.SpaceBeforeParensOptions = {};
+
+ switch (Expanded.SpaceBeforeParens) {
+ case FormatStyle::SBPO_Never:
+ break;
+ case FormatStyle::SBPO_ControlStatements:
+ Expanded.SpaceBeforeParensOptions.AfterControlStatements = true;
+ Expanded.SpaceBeforeParensOptions.AfterForeachMacros = true;
+ Expanded.SpaceBeforeParensOptions.AfterIfMacros = true;
+ break;
+ case FormatStyle::SBPO_ControlStatementsExceptControlMacros:
+ Expanded.SpaceBeforeParensOptions.AfterControlStatements = true;
+ break;
+ case FormatStyle::SBPO_NonEmptyParentheses:
+ Expanded.SpaceBeforeParensOptions.BeforeNonEmptyParentheses = true;
+ break;
+ case FormatStyle::SBPO_Always:
+ break;
+ default:
+ break;
+ }
}
FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
@@ -988,7 +1100,6 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.AlignConsecutiveDeclarations = FormatStyle::ACS_None;
LLVMStyle.AlignConsecutiveMacros = FormatStyle::ACS_None;
LLVMStyle.AllowAllArgumentsOnNextLine = true;
- LLVMStyle.AllowAllConstructorInitializersOnNextLine = true;
LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
LLVMStyle.AllowShortEnumsOnASingleLine = true;
LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
@@ -1034,15 +1145,19 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.ColumnLimit = 80;
LLVMStyle.CommentPragmas = "^ IWYU pragma:";
LLVMStyle.CompactNamespaces = false;
- LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
LLVMStyle.ConstructorInitializerIndentWidth = 4;
LLVMStyle.ContinuationIndentWidth = 4;
LLVMStyle.Cpp11BracedListStyle = true;
+
+ // Off by default Qualifier ordering
+ LLVMStyle.QualifierAlignment = FormatStyle::QAS_Leave;
+
LLVMStyle.DeriveLineEnding = true;
LLVMStyle.DerivePointerAlignment = false;
LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never;
LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock;
LLVMStyle.ExperimentalAutoDetectBinPacking = false;
+ LLVMStyle.PackConstructorInitializers = FormatStyle::PCIS_BinPack;
LLVMStyle.FixNamespaceComments = true;
LLVMStyle.ForEachMacros.push_back("foreach");
LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
@@ -1099,6 +1214,9 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
LLVMStyle.SpaceBeforeCtorInitializerColon = true;
LLVMStyle.SpaceBeforeInheritanceColon = true;
LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
+ LLVMStyle.SpaceBeforeParensOptions.AfterControlStatements = true;
+ LLVMStyle.SpaceBeforeParensOptions.AfterForeachMacros = true;
+ LLVMStyle.SpaceBeforeParensOptions.AfterIfMacros = true;
LLVMStyle.SpaceBeforeRangeBasedForLoopColon = true;
LLVMStyle.SpaceBeforeAssignmentOperators = true;
LLVMStyle.SpaceBeforeCpp11BracedList = false;
@@ -1158,7 +1276,6 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.AllowShortLoopsOnASingleLine = true;
GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
GoogleStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_Yes;
- GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
GoogleStyle.DerivePointerAlignment = true;
GoogleStyle.IncludeStyle.IncludeCategories = {{"^<ext/.*\\.h>", 2, 0, false},
{"^<.*\\.h>", 1, 0, false},
@@ -1171,6 +1288,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
GoogleStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Never;
GoogleStyle.ObjCSpaceAfterProperty = false;
GoogleStyle.ObjCSpaceBeforeProtocolList = true;
+ GoogleStyle.PackConstructorInitializers = FormatStyle::PCIS_NextLine;
GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
GoogleStyle.RawStringFormats = {
{
@@ -1311,7 +1429,7 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
ChromiumStyle.ContinuationIndentWidth = 8;
ChromiumStyle.IndentWidth = 4;
// See styleguide for import groups:
- // https://chromium.googlesource.com/chromium/src/+/master/styleguide/java/java.md#Import-Order
+ // https://chromium.googlesource.com/chromium/src/+/refs/heads/main/styleguide/java/java.md#Import-Order
ChromiumStyle.JavaImportGroups = {
"android",
"androidx",
@@ -1471,6 +1589,37 @@ bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
return true;
}
+ParseError validateQualifierOrder(FormatStyle *Style) {
+ // If its empty then it means don't do anything.
+ if (Style->QualifierOrder.empty())
+ return ParseError::MissingQualifierOrder;
+
+ // Ensure the list contains only currently valid qualifiers.
+ for (const auto &Qualifier : Style->QualifierOrder) {
+ if (Qualifier == "type")
+ continue;
+ auto token =
+ LeftRightQualifierAlignmentFixer::getTokenFromQualifier(Qualifier);
+ if (token == tok::identifier)
+ return ParseError::InvalidQualifierSpecified;
+ }
+ // Ensure the list is unqiue (no duplicates).
+ std::set<std::string> UniqueQualifiers(Style->QualifierOrder.begin(),
+ Style->QualifierOrder.end());
+ if (Style->QualifierOrder.size() != UniqueQualifiers.size()) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Duplicate Qualifiers " << Style->QualifierOrder.size()
+ << " vs " << UniqueQualifiers.size() << "\n");
+ return ParseError::DuplicateQualifierSpecified;
+ }
+
+ auto type = std::find(Style->QualifierOrder.begin(),
+ Style->QualifierOrder.end(), "type");
+ if (type == Style->QualifierOrder.end())
+ return ParseError::MissingQualifierType;
+ return ParseError::Success;
+}
+
std::error_code parseConfiguration(llvm::MemoryBufferRef Config,
FormatStyle *Style, bool AllowUnknownOptions,
llvm::SourceMgr::DiagHandlerTy DiagHandler,
@@ -1532,6 +1681,8 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config,
// See comment on FormatStyle::TSC_Wrapped.
return make_error_code(ParseError::BinPackTrailingCommaConflict);
}
+ if (Style->QualifierAlignment != FormatStyle::QAS_Leave)
+ return make_error_code(validateQualifierOrder(Style));
return make_error_code(ParseError::Success);
}
@@ -1541,8 +1692,11 @@ std::string configurationAsText(const FormatStyle &Style) {
llvm::yaml::Output Output(Stream);
// We use the same mapping method for input and output, so we need a non-const
// reference here.
- FormatStyle NonConstStyle = expandPresets(Style);
+ FormatStyle NonConstStyle = Style;
+ expandPresetsBraceWrapping(NonConstStyle);
+ expandPresetsSpaceBeforeParens(NonConstStyle);
Output << NonConstStyle;
+
return Stream.str();
}
@@ -2821,7 +2975,9 @@ reformat(const FormatStyle &Style, StringRef Code,
ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName,
FormattingAttemptStatus *Status) {
- FormatStyle Expanded = expandPresets(Style);
+ FormatStyle Expanded = Style;
+ expandPresetsBraceWrapping(Expanded);
+ expandPresetsSpaceBeforeParens(Expanded);
if (Expanded.DisableFormat)
return {tooling::Replacements(), 0};
if (isLikelyXml(Code))
@@ -2833,8 +2989,10 @@ reformat(const FormatStyle &Style, StringRef Code,
if (Style.isJson()) {
std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
auto Env =
- std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
+ Environment::make(Code, FileName, Ranges, FirstStartColumn,
NextStartColumn, LastStartColumn);
+ if (!Env)
+ return {};
// Perform the actual formatting pass.
tooling::Replacements Replaces =
Formatter(*Env, Style, Status).process().first;
@@ -2853,6 +3011,15 @@ reformat(const FormatStyle &Style, StringRef Code,
AnalyzerPass;
SmallVector<AnalyzerPass, 4> Passes;
+ if (Style.isCpp() && Style.QualifierAlignment != FormatStyle::QAS_Leave) {
+ Passes.emplace_back([&](const Environment &Env) {
+ return QualifierAlignmentFixer(Env, Expanded, Code, Ranges,
+ FirstStartColumn, NextStartColumn,
+ LastStartColumn, FileName)
+ .process();
+ });
+ }
+
if (Style.Language == FormatStyle::LK_Cpp) {
if (Style.FixNamespaceComments)
Passes.emplace_back([&](const Environment &Env) {
@@ -2881,9 +3048,10 @@ reformat(const FormatStyle &Style, StringRef Code,
return TrailingCommaInserter(Env, Expanded).process();
});
- auto Env =
- std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
- NextStartColumn, LastStartColumn);
+ auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn,
+ NextStartColumn, LastStartColumn);
+ if (!Env)
+ return {};
llvm::Optional<std::string> CurrentCode = None;
tooling::Replacements Fixes;
unsigned Penalty = 0;
@@ -2896,10 +3064,12 @@ reformat(const FormatStyle &Style, StringRef Code,
Penalty += PassFixes.second;
if (I + 1 < E) {
CurrentCode = std::move(*NewCode);
- Env = std::make_unique<Environment>(
+ Env = Environment::make(
*CurrentCode, FileName,
tooling::calculateRangesAfterReplacements(Fixes, Ranges),
FirstStartColumn, NextStartColumn, LastStartColumn);
+ if (!Env)
+ return {};
}
}
}
@@ -2925,7 +3095,10 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
// cleanups only apply to C++ (they mostly concern ctor commas etc.)
if (Style.Language != FormatStyle::LK_Cpp)
return tooling::Replacements();
- return Cleaner(Environment(Code, FileName, Ranges), Style).process().first;
+ auto Env = Environment::make(Code, FileName, Ranges);
+ if (!Env)
+ return {};
+ return Cleaner(*Env, Style).process().first;
}
tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
@@ -2942,7 +3115,10 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName) {
- return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style)
+ auto Env = Environment::make(Code, FileName, Ranges);
+ if (!Env)
+ return {};
+ return NamespaceEndCommentsFixer(*Env, Style)
.process()
.first;
}
@@ -2951,7 +3127,10 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
StringRef Code,
ArrayRef<tooling::Range> Ranges,
StringRef FileName) {
- return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style)
+ auto Env = Environment::make(Code, FileName, Ranges);
+ if (!Env)
+ return {};
+ return UsingDeclarationsSorter(*Env, Style)
.process()
.first;
}
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index 8e4994f4c0d5..6768f041135c 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -53,6 +53,7 @@ bool FormatToken::isSimpleTypeSpecifier() const {
case tok::kw___bf16:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_wchar_t:
case tok::kw_bool:
case tok::kw___underlying_type:
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 0506cd554bcb..06d51dd95f50 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -934,8 +934,8 @@ struct AdditionalKeywords {
// already initialized.
JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
{kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
- kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
- kw_set, kw_type, kw_typeof, kw_var, kw_yield,
+ kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_override,
+ kw_readonly, kw_set, kw_type, kw_typeof, kw_var, kw_yield,
// Keywords from the Java section.
kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
@@ -1060,7 +1060,7 @@ struct AdditionalKeywords {
bool IsJavaScriptIdentifier(const FormatToken &Tok,
bool AcceptIdentifierName = true) const {
// Based on the list of JavaScript & TypeScript keywords here:
- // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74
+ // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74
switch (Tok.Tok.getKind()) {
case tok::kw_break:
case tok::kw_case:
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index a9cfb4a247f0..8075756cca03 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -506,11 +506,11 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
return;
FormatToken *Prev = nullptr;
- for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
+ for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {
// NB: Because previous pointers are not initialized yet, this cannot use
// Token.getPreviousNonComment.
- if ((*I)->isNot(tok::comment)) {
- Prev = *I;
+ if (FT->isNot(tok::comment)) {
+ Prev = FT;
break;
}
}
diff --git a/clang/lib/Format/MacroExpander.cpp b/clang/lib/Format/MacroExpander.cpp
index e50c80446963..de96cb24ba1f 100644
--- a/clang/lib/Format/MacroExpander.cpp
+++ b/clang/lib/Format/MacroExpander.cpp
@@ -1,9 +1,8 @@
//===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -53,7 +52,7 @@ public:
Current = Tokens[0];
}
- // Parse the token stream and return the corresonding Definition object.
+ // Parse the token stream and return the corresponding Definition object.
// Returns an empty definition object with a null-Name on error.
MacroExpander::Definition parse() {
if (!Current->is(tok::identifier))
diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
index 591ef8b5be3c..da03beb09145 100644
--- a/clang/lib/Format/Macros.h
+++ b/clang/lib/Format/Macros.h
@@ -1,9 +1,8 @@
//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
new file mode 100644
index 000000000000..5a89225c7fc8
--- /dev/null
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -0,0 +1,468 @@
+//===--- LeftRightQualifierAlignmentFixer.cpp -------------------*- C++--*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements LeftRightQualifierAlignmentFixer, a TokenAnalyzer that
+/// enforces either left or right const depending on the style.
+///
+//===----------------------------------------------------------------------===//
+
+#include "QualifierAlignmentFixer.h"
+#include "FormatToken.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Regex.h"
+
+#include <algorithm>
+
+#define DEBUG_TYPE "format-qualifier-alignment-fixer"
+
+namespace clang {
+namespace format {
+
+QualifierAlignmentFixer::QualifierAlignmentFixer(
+ const Environment &Env, const FormatStyle &Style, StringRef &Code,
+ ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+ unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName)
+ : TokenAnalyzer(Env, Style), Code(Code), Ranges(Ranges),
+ FirstStartColumn(FirstStartColumn), NextStartColumn(NextStartColumn),
+ LastStartColumn(LastStartColumn), FileName(FileName) {
+ std::vector<std::string> LeftOrder;
+ std::vector<std::string> RightOrder;
+ std::vector<tok::TokenKind> ConfiguredQualifierTokens;
+ PrepareLeftRightOrdering(Style.QualifierOrder, LeftOrder, RightOrder,
+ ConfiguredQualifierTokens);
+
+ // Handle the left and right Alignment Seperately
+ for (const auto &Qualifier : LeftOrder) {
+ Passes.emplace_back(
+ [&, Qualifier, ConfiguredQualifierTokens](const Environment &Env) {
+ return LeftRightQualifierAlignmentFixer(Env, Style, Qualifier,
+ ConfiguredQualifierTokens,
+ /*RightAlign=*/false)
+ .process();
+ });
+ }
+ for (const auto &Qualifier : RightOrder) {
+ Passes.emplace_back(
+ [&, Qualifier, ConfiguredQualifierTokens](const Environment &Env) {
+ return LeftRightQualifierAlignmentFixer(Env, Style, Qualifier,
+ ConfiguredQualifierTokens,
+ /*RightAlign=*/true)
+ .process();
+ });
+ }
+}
+
+std::pair<tooling::Replacements, unsigned> QualifierAlignmentFixer::analyze(
+ TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) {
+ auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn,
+ NextStartColumn, LastStartColumn);
+ if (!Env)
+ return {};
+ llvm::Optional<std::string> CurrentCode = None;
+ tooling::Replacements Fixes;
+ for (size_t I = 0, E = Passes.size(); I < E; ++I) {
+ std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env);
+ auto NewCode = applyAllReplacements(
+ CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first);
+ if (NewCode) {
+ Fixes = Fixes.merge(PassFixes.first);
+ if (I + 1 < E) {
+ CurrentCode = std::move(*NewCode);
+ Env = Environment::make(
+ *CurrentCode, FileName,
+ tooling::calculateRangesAfterReplacements(Fixes, Ranges),
+ FirstStartColumn, NextStartColumn, LastStartColumn);
+ if (!Env)
+ return {};
+ }
+ }
+ }
+
+ // Don't make replacements that replace nothing.
+ tooling::Replacements NonNoOpFixes;
+
+ for (auto I = Fixes.begin(), E = Fixes.end(); I != E; ++I) {
+ StringRef OriginalCode = Code.substr(I->getOffset(), I->getLength());
+
+ if (!OriginalCode.equals(I->getReplacementText())) {
+ auto Err = NonNoOpFixes.add(*I);
+ if (Err)
+ llvm::errs() << "Error adding replacements : "
+ << llvm::toString(std::move(Err)) << "\n";
+ }
+ }
+ return {NonNoOpFixes, 0};
+}
+
+static void replaceToken(const SourceManager &SourceMgr,
+ tooling::Replacements &Fixes,
+ const CharSourceRange &Range, std::string NewText) {
+ auto Replacement = tooling::Replacement(SourceMgr, Range, NewText);
+ auto Err = Fixes.add(Replacement);
+
+ if (Err)
+ llvm::errs() << "Error while rearranging Qualifier : "
+ << llvm::toString(std::move(Err)) << "\n";
+}
+
+static void removeToken(const SourceManager &SourceMgr,
+ tooling::Replacements &Fixes,
+ const FormatToken *First) {
+ auto Range = CharSourceRange::getCharRange(First->getStartOfNonWhitespace(),
+ First->Tok.getEndLoc());
+ replaceToken(SourceMgr, Fixes, Range, "");
+}
+
+static void insertQualifierAfter(const SourceManager &SourceMgr,
+ tooling::Replacements &Fixes,
+ const FormatToken *First,
+ const std::string &Qualifier) {
+ FormatToken *Next = First->Next;
+ if (!Next)
+ return;
+ auto Range = CharSourceRange::getCharRange(Next->getStartOfNonWhitespace(),
+ Next->Tok.getEndLoc());
+
+ std::string NewText = " " + Qualifier + " ";
+ NewText += Next->TokenText;
+ replaceToken(SourceMgr, Fixes, Range, NewText);
+}
+
+static void insertQualifierBefore(const SourceManager &SourceMgr,
+ tooling::Replacements &Fixes,
+ const FormatToken *First,
+ const std::string &Qualifier) {
+ auto Range = CharSourceRange::getCharRange(First->getStartOfNonWhitespace(),
+ First->Tok.getEndLoc());
+
+ std::string NewText = " " + Qualifier + " ";
+ NewText += First->TokenText;
+
+ replaceToken(SourceMgr, Fixes, Range, NewText);
+}
+
+static bool endsWithSpace(const std::string &s) {
+ if (s.empty()) {
+ return false;
+ }
+ return isspace(s.back());
+}
+
+static bool startsWithSpace(const std::string &s) {
+ if (s.empty()) {
+ return false;
+ }
+ return isspace(s.front());
+}
+
+static void rotateTokens(const SourceManager &SourceMgr,
+ tooling::Replacements &Fixes, const FormatToken *First,
+ const FormatToken *Last, bool Left) {
+ auto *End = Last;
+ auto *Begin = First;
+ if (!Left) {
+ End = Last->Next;
+ Begin = First->Next;
+ }
+
+ std::string NewText;
+ // If we are rotating to the left we move the Last token to the front.
+ if (Left) {
+ NewText += Last->TokenText;
+ NewText += " ";
+ }
+
+ // Then move through the other tokens.
+ auto *Tok = Begin;
+ while (Tok != End) {
+ if (!NewText.empty() && !endsWithSpace(NewText)) {
+ NewText += " ";
+ }
+
+ NewText += Tok->TokenText;
+ Tok = Tok->Next;
+ }
+
+ // If we are rotating to the right we move the first token to the back.
+ if (!Left) {
+ if (!NewText.empty() && !startsWithSpace(NewText)) {
+ NewText += " ";
+ }
+ NewText += First->TokenText;
+ }
+
+ auto Range = CharSourceRange::getCharRange(First->getStartOfNonWhitespace(),
+ Last->Tok.getEndLoc());
+
+ replaceToken(SourceMgr, Fixes, Range, NewText);
+}
+
+FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight(
+ const SourceManager &SourceMgr, const AdditionalKeywords &Keywords,
+ tooling::Replacements &Fixes, FormatToken *Tok,
+ const std::string &Qualifier, tok::TokenKind QualifierType) {
+ // We only need to think about streams that begin with a qualifier.
+ if (!Tok->is(QualifierType))
+ return Tok;
+ // Don't concern yourself if nothing follows the qualifier.
+ if (!Tok->Next)
+ return Tok;
+ if (LeftRightQualifierAlignmentFixer::isPossibleMacro(Tok->Next))
+ return Tok;
+
+ FormatToken *Qual = Tok->Next;
+ FormatToken *LastQual = Qual;
+ while (Qual && isQualifierOrType(Qual, ConfiguredQualifierTokens)) {
+ LastQual = Qual;
+ Qual = Qual->Next;
+ }
+ if (LastQual && Qual != LastQual) {
+ rotateTokens(SourceMgr, Fixes, Tok, LastQual, /*Left=*/false);
+ Tok = LastQual;
+ } else if (Tok->startsSequence(QualifierType, tok::identifier,
+ TT_TemplateOpener)) {
+ // Read from the TemplateOpener to
+ // TemplateCloser as in const ArrayRef<int> a; const ArrayRef<int> &a;
+ FormatToken *EndTemplate = Tok->Next->Next->MatchingParen;
+ if (EndTemplate) {
+ // Move to the end of any template class members e.g.
+ // `Foo<int>::iterator`.
+ if (EndTemplate->startsSequence(TT_TemplateCloser, tok::coloncolon,
+ tok::identifier))
+ EndTemplate = EndTemplate->Next->Next;
+ }
+ if (EndTemplate && EndTemplate->Next &&
+ !EndTemplate->Next->isOneOf(tok::equal, tok::l_paren)) {
+ insertQualifierAfter(SourceMgr, Fixes, EndTemplate, Qualifier);
+ // Remove the qualifier.
+ removeToken(SourceMgr, Fixes, Tok);
+ return Tok;
+ }
+ } else if (Tok->startsSequence(QualifierType, tok::identifier)) {
+ FormatToken *Next = Tok->Next;
+ // The case `const Foo` -> `Foo const`
+ // The case `const Foo *` -> `Foo const *`
+ // The case `const Foo &` -> `Foo const &`
+ // The case `const Foo &&` -> `Foo const &&`
+ // The case `const std::Foo &&` -> `std::Foo const &&`
+ // The case `const std::Foo<T> &&` -> `std::Foo<T> const &&`
+ while (Next && Next->isOneOf(tok::identifier, tok::coloncolon)) {
+ Next = Next->Next;
+ }
+ if (Next && Next->is(TT_TemplateOpener)) {
+ Next = Next->MatchingParen;
+ // Move to the end of any template class members e.g.
+ // `Foo<int>::iterator`.
+ if (Next && Next->startsSequence(TT_TemplateCloser, tok::coloncolon,
+ tok::identifier)) {
+ Next = Next->Next->Next;
+ return Tok;
+ }
+ assert(Next && "Missing template opener");
+ Next = Next->Next;
+ }
+ if (Next && Next->isOneOf(tok::star, tok::amp, tok::ampamp) &&
+ !Tok->Next->isOneOf(Keywords.kw_override, Keywords.kw_final)) {
+ if (Next->Previous && !Next->Previous->is(QualifierType)) {
+ insertQualifierAfter(SourceMgr, Fixes, Next->Previous, Qualifier);
+ removeToken(SourceMgr, Fixes, Tok);
+ }
+ return Next;
+ }
+ }
+
+ return Tok;
+}
+
+FormatToken *LeftRightQualifierAlignmentFixer::analyzeLeft(
+ const SourceManager &SourceMgr, const AdditionalKeywords &Keywords,
+ tooling::Replacements &Fixes, FormatToken *Tok,
+ const std::string &Qualifier, tok::TokenKind QualifierType) {
+ // if Tok is an identifier and possibly a macro then don't convert.
+ if (LeftRightQualifierAlignmentFixer::isPossibleMacro(Tok))
+ return Tok;
+
+ FormatToken *Qual = Tok;
+ FormatToken *LastQual = Qual;
+ while (Qual && isQualifierOrType(Qual, ConfiguredQualifierTokens)) {
+ LastQual = Qual;
+ Qual = Qual->Next;
+ if (Qual && Qual->is(QualifierType))
+ break;
+ }
+
+ if (!Qual) {
+ return Tok;
+ }
+
+ if (LastQual && Qual != LastQual && Qual->is(QualifierType)) {
+ rotateTokens(SourceMgr, Fixes, Tok, Qual, /*Left=*/true);
+ Tok = Qual->Next;
+ } else if (Tok->startsSequence(tok::identifier, QualifierType)) {
+ if (Tok->Next->Next && Tok->Next->Next->isOneOf(tok::identifier, tok::star,
+ tok::amp, tok::ampamp)) {
+ // Don't swap `::iterator const` to `::const iterator`.
+ if (!Tok->Previous ||
+ (Tok->Previous && !Tok->Previous->is(tok::coloncolon))) {
+ rotateTokens(SourceMgr, Fixes, Tok, Tok->Next, /*Left=*/true);
+ Tok = Tok->Next;
+ }
+ }
+ }
+ if (Tok->is(TT_TemplateOpener) && Tok->Next &&
+ (Tok->Next->is(tok::identifier) || Tok->Next->isSimpleTypeSpecifier()) &&
+ Tok->Next->Next && Tok->Next->Next->is(QualifierType)) {
+ rotateTokens(SourceMgr, Fixes, Tok->Next, Tok->Next->Next, /*Left=*/true);
+ }
+ if (Tok->startsSequence(tok::identifier) && Tok->Next) {
+ if (Tok->Previous &&
+ Tok->Previous->isOneOf(tok::star, tok::ampamp, tok::amp)) {
+ return Tok;
+ }
+ FormatToken *Next = Tok->Next;
+ // The case `std::Foo<T> const` -> `const std::Foo<T> &&`
+ while (Next && Next->isOneOf(tok::identifier, tok::coloncolon))
+ Next = Next->Next;
+ if (Next && Next->Previous &&
+ Next->Previous->startsSequence(tok::identifier, TT_TemplateOpener)) {
+ // Read from to the end of the TemplateOpener to
+ // TemplateCloser const ArrayRef<int> a; const ArrayRef<int> &a;
+ assert(Next->MatchingParen && "Missing template closer");
+ Next = Next->MatchingParen->Next;
+
+ // Move to the end of any template class members e.g.
+ // `Foo<int>::iterator`.
+ if (Next && Next->startsSequence(tok::coloncolon, tok::identifier))
+ Next = Next->Next->Next;
+ if (Next && Next->is(QualifierType)) {
+ // Remove the const.
+ insertQualifierBefore(SourceMgr, Fixes, Tok, Qualifier);
+ removeToken(SourceMgr, Fixes, Next);
+ return Next;
+ }
+ }
+ if (Next && Next->Next &&
+ Next->Next->isOneOf(tok::amp, tok::ampamp, tok::star)) {
+ if (Next->is(QualifierType)) {
+ // Remove the qualifier.
+ insertQualifierBefore(SourceMgr, Fixes, Tok, Qualifier);
+ removeToken(SourceMgr, Fixes, Next);
+ return Next;
+ }
+ }
+ }
+ return Tok;
+}
+
+tok::TokenKind LeftRightQualifierAlignmentFixer::getTokenFromQualifier(
+ const std::string &Qualifier) {
+ // Don't let 'type' be an identifier, but steal typeof token.
+ return llvm::StringSwitch<tok::TokenKind>(Qualifier)
+ .Case("type", tok::kw_typeof)
+ .Case("const", tok::kw_const)
+ .Case("volatile", tok::kw_volatile)
+ .Case("static", tok::kw_static)
+ .Case("inline", tok::kw_inline)
+ .Case("constexpr", tok::kw_constexpr)
+ .Case("restrict", tok::kw_restrict)
+ .Default(tok::identifier);
+}
+
+LeftRightQualifierAlignmentFixer::LeftRightQualifierAlignmentFixer(
+ const Environment &Env, const FormatStyle &Style,
+ const std::string &Qualifier,
+ const std::vector<tok::TokenKind> &QualifierTokens, bool RightAlign)
+ : TokenAnalyzer(Env, Style), Qualifier(Qualifier), RightAlign(RightAlign),
+ ConfiguredQualifierTokens(QualifierTokens) {}
+
+std::pair<tooling::Replacements, unsigned>
+LeftRightQualifierAlignmentFixer::analyze(
+ TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) {
+ tooling::Replacements Fixes;
+ const AdditionalKeywords &Keywords = Tokens.getKeywords();
+ const SourceManager &SourceMgr = Env.getSourceManager();
+ AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+
+ tok::TokenKind QualifierToken = getTokenFromQualifier(Qualifier);
+ assert(QualifierToken != tok::identifier && "Unrecognised Qualifier");
+
+ for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
+ FormatToken *First = AnnotatedLines[I]->First;
+ const auto *Last = AnnotatedLines[I]->Last;
+
+ for (auto *Tok = First; Tok && Tok != Last && Tok->Next; Tok = Tok->Next) {
+ if (Tok->is(tok::comment))
+ continue;
+ if (RightAlign)
+ Tok = analyzeRight(SourceMgr, Keywords, Fixes, Tok, Qualifier,
+ QualifierToken);
+ else
+ Tok = analyzeLeft(SourceMgr, Keywords, Fixes, Tok, Qualifier,
+ QualifierToken);
+ }
+ }
+ return {Fixes, 0};
+}
+
+void QualifierAlignmentFixer::PrepareLeftRightOrdering(
+ const std::vector<std::string> &Order, std::vector<std::string> &LeftOrder,
+ std::vector<std::string> &RightOrder,
+ std::vector<tok::TokenKind> &Qualifiers) {
+
+ // Depending on the position of type in the order you need
+ // To iterate forward or backward through the order list as qualifier
+ // can push through each other.
+ // The Order list must define the position of "type" to signify
+ assert(llvm::is_contained(Order, "type") &&
+ "QualifierOrder must contain type");
+ // Split the Order list by type and reverse the left side.
+
+ bool left = true;
+ for (const auto &s : Order) {
+ if (s == "type") {
+ left = false;
+ continue;
+ }
+
+ tok::TokenKind QualifierToken =
+ LeftRightQualifierAlignmentFixer::getTokenFromQualifier(s);
+ if (QualifierToken != tok::kw_typeof && QualifierToken != tok::identifier) {
+ Qualifiers.push_back(QualifierToken);
+ }
+
+ if (left)
+ // Reverse the order for left aligned items.
+ LeftOrder.insert(LeftOrder.begin(), s);
+ else
+ RightOrder.push_back(s);
+ }
+}
+
+bool LeftRightQualifierAlignmentFixer::isQualifierOrType(
+ const FormatToken *Tok, const std::vector<tok::TokenKind> &specifiedTypes) {
+ return Tok && (Tok->isSimpleTypeSpecifier() || Tok->is(tok::kw_auto) ||
+ llvm::is_contained(specifiedTypes, Tok->Tok.getKind()));
+}
+
+// If a token is an identifier and it's upper case, it could
+// be a macro and hence we need to be able to ignore it.
+bool LeftRightQualifierAlignmentFixer::isPossibleMacro(const FormatToken *Tok) {
+ if (!Tok)
+ return false;
+ if (!Tok->is(tok::identifier))
+ return false;
+ if (Tok->TokenText.upper() == Tok->TokenText.str())
+ return true;
+ return false;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/QualifierAlignmentFixer.h b/clang/lib/Format/QualifierAlignmentFixer.h
new file mode 100644
index 000000000000..7abd25687564
--- /dev/null
+++ b/clang/lib/Format/QualifierAlignmentFixer.h
@@ -0,0 +1,98 @@
+//===--- LeftRightQualifierAlignmentFixer.h ------------------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares LeftRightQualifierAlignmentFixer, a TokenAnalyzer that
+/// enforces either east or west const depending on the style.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_QUALIFIERALIGNMENTFIXER_H
+#define LLVM_CLANG_LIB_FORMAT_QUALIFIERALIGNMENTFIXER_H
+
+#include "TokenAnalyzer.h"
+
+namespace clang {
+namespace format {
+
+typedef std::function<std::pair<tooling::Replacements, unsigned>(
+ const Environment &)>
+ AnalyzerPass;
+
+class QualifierAlignmentFixer : public TokenAnalyzer {
+ // Left to Right ordering requires multiple passes
+ SmallVector<AnalyzerPass, 8> Passes;
+ StringRef &Code;
+ ArrayRef<tooling::Range> Ranges;
+ unsigned FirstStartColumn;
+ unsigned NextStartColumn;
+ unsigned LastStartColumn;
+ StringRef FileName;
+
+public:
+ QualifierAlignmentFixer(const Environment &Env, const FormatStyle &Style,
+ StringRef &Code, ArrayRef<tooling::Range> Ranges,
+ unsigned FirstStartColumn, unsigned NextStartColumn,
+ unsigned LastStartColumn, StringRef FileName);
+
+ std::pair<tooling::Replacements, unsigned>
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override;
+
+ static void PrepareLeftRightOrdering(const std::vector<std::string> &Order,
+ std::vector<std::string> &LeftOrder,
+ std::vector<std::string> &RightOrder,
+ std::vector<tok::TokenKind> &Qualifiers);
+};
+
+class LeftRightQualifierAlignmentFixer : public TokenAnalyzer {
+ std::string Qualifier;
+ bool RightAlign;
+ SmallVector<tok::TokenKind, 8> QualifierTokens;
+ std::vector<tok::TokenKind> ConfiguredQualifierTokens;
+
+public:
+ LeftRightQualifierAlignmentFixer(
+ const Environment &Env, const FormatStyle &Style,
+ const std::string &Qualifier,
+ const std::vector<tok::TokenKind> &ConfiguredQualifierTokens,
+ bool RightAlign);
+
+ std::pair<tooling::Replacements, unsigned>
+ analyze(TokenAnnotator &Annotator,
+ SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+ FormatTokenLexer &Tokens) override;
+
+ static tok::TokenKind getTokenFromQualifier(const std::string &Qualifier);
+
+ FormatToken *analyzeRight(const SourceManager &SourceMgr,
+ const AdditionalKeywords &Keywords,
+ tooling::Replacements &Fixes, FormatToken *Tok,
+ const std::string &Qualifier,
+ tok::TokenKind QualifierType);
+
+ FormatToken *analyzeLeft(const SourceManager &SourceMgr,
+ const AdditionalKeywords &Keywords,
+ tooling::Replacements &Fixes, FormatToken *Tok,
+ const std::string &Qualifier,
+ tok::TokenKind QualifierType);
+
+ // is the Token a simple or qualifier type
+ static bool isQualifierOrType(const FormatToken *Tok,
+ const std::vector<tok::TokenKind> &Qualifiers);
+
+ // is the Token likely a Macro
+ static bool isPossibleMacro(const FormatToken *Tok);
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index a5e3ce69207b..515cfce725a4 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -550,7 +550,10 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
ArrayRef<tooling::Range> Ranges,
StringRef FileName) {
// FIXME: Cursor support.
- return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style)
+ auto Env = Environment::make(Code, FileName, Ranges);
+ if (!Env)
+ return {};
+ return JavaScriptImportSorter(*Env, Style)
.process()
.first;
}
diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp
index f1459a808ff8..a619c6d939e9 100644
--- a/clang/lib/Format/TokenAnalyzer.cpp
+++ b/clang/lib/Format/TokenAnalyzer.cpp
@@ -26,26 +26,61 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
+#include <type_traits>
#define DEBUG_TYPE "format-formatter"
namespace clang {
namespace format {
+// FIXME: Instead of printing the diagnostic we should store it and have a
+// better way to return errors through the format APIs.
+class FatalDiagnosticConsumer: public DiagnosticConsumer {
+public:
+ void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
+ const Diagnostic &Info) override {
+ if (DiagLevel == DiagnosticsEngine::Fatal) {
+ Fatal = true;
+ llvm::SmallVector<char, 128> Message;
+ Info.FormatDiagnostic(Message);
+ llvm::errs() << Message << "\n";
+ }
+ }
+
+ bool fatalError() const { return Fatal; }
+
+private:
+ bool Fatal = false;
+};
+
+std::unique_ptr<Environment>
+Environment::make(StringRef Code, StringRef FileName,
+ ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+ unsigned NextStartColumn, unsigned LastStartColumn) {
+ auto Env = std::make_unique<Environment>(Code, FileName, FirstStartColumn,
+ NextStartColumn, LastStartColumn);
+ FatalDiagnosticConsumer Diags;
+ Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false);
+ SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID);
+ for (const tooling::Range &Range : Ranges) {
+ SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+ SourceLocation End = Start.getLocWithOffset(Range.getLength());
+ Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+ }
+ // Validate that we can get the buffer data without a fatal error.
+ Env->SM.getBufferData(Env->ID);
+ if (Diags.fatalError()) return nullptr;
+ return Env;
+}
+
Environment::Environment(StringRef Code, StringRef FileName,
- ArrayRef<tooling::Range> Ranges,
unsigned FirstStartColumn, unsigned NextStartColumn,
unsigned LastStartColumn)
: VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {
- SourceLocation StartOfFile = SM.getLocForStartOfFile(ID);
- for (const tooling::Range &Range : Ranges) {
- SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
- SourceLocation End = Start.getLocWithOffset(Range.getLength());
- CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
- }
}
TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h
index 5ce44a0f3ea7..aaca518df41f 100644
--- a/clang/lib/Format/TokenAnalyzer.h
+++ b/clang/lib/Format/TokenAnalyzer.h
@@ -29,6 +29,7 @@
#include "clang/Format/Format.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
+#include <memory>
namespace clang {
namespace format {
@@ -40,8 +41,7 @@ public:
// that the next lines of \p Code should start at \p NextStartColumn, and
// that \p Code should end at \p LastStartColumn if it ends in newline.
// See also the documentation of clang::format::internal::reformat.
- Environment(StringRef Code, StringRef FileName,
- ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn = 0,
+ Environment(StringRef Code, StringRef FileName, unsigned FirstStartColumn = 0,
unsigned NextStartColumn = 0, unsigned LastStartColumn = 0);
FileID getFileID() const { return ID; }
@@ -62,6 +62,14 @@ public:
// environment should end if it ends in a newline.
unsigned getLastStartColumn() const { return LastStartColumn; }
+ // Returns nullptr and prints a diagnostic to stderr if the environment
+ // can't be created.
+ static std::unique_ptr<Environment> make(StringRef Code, StringRef FileName,
+ ArrayRef<tooling::Range> Ranges,
+ unsigned FirstStartColumn = 0,
+ unsigned NextStartColumn = 0,
+ unsigned LastStartColumn = 0);
+
private:
// This is only set if constructed from string.
std::unique_ptr<SourceManagerForFile> VirtualSM;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 54e6c7d38e7d..3897241cb858 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1671,7 +1671,7 @@ private:
Current.setType(TT_TrailingReturnArrow);
} else if (Current.is(tok::arrow) && Current.Previous &&
Current.Previous->is(tok::r_brace)) {
- // Concept implicit conversion contraint needs to be treated like
+ // Concept implicit conversion constraint needs to be treated like
// a trailing return type ... } -> <type>.
Current.setType(TT_TrailingReturnArrow);
} else if (isDeductionGuide(Current)) {
@@ -2322,11 +2322,9 @@ private:
void TokenAnnotator::setCommentLineLevels(
SmallVectorImpl<AnnotatedLine *> &Lines) {
const AnnotatedLine *NextNonCommentLine = nullptr;
- for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
- E = Lines.rend();
- I != E; ++I) {
+ for (AnnotatedLine *AL : llvm::reverse(Lines)) {
bool CommentLine = true;
- for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
+ for (const FormatToken *Tok = AL->First; Tok; Tok = Tok->Next) {
if (!Tok->is(tok::comment)) {
CommentLine = false;
break;
@@ -2338,21 +2336,21 @@ void TokenAnnotator::setCommentLineLevels(
if (NextNonCommentLine && CommentLine &&
NextNonCommentLine->First->NewlinesBefore <= 1 &&
NextNonCommentLine->First->OriginalColumn ==
- (*I)->First->OriginalColumn) {
+ AL->First->OriginalColumn) {
// Align comments for preprocessor lines with the # in column 0 if
// preprocessor lines are not indented. Otherwise, align with the next
// line.
- (*I)->Level =
+ AL->Level =
(Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
(NextNonCommentLine->Type == LT_PreprocessorDirective ||
NextNonCommentLine->Type == LT_ImportStatement))
? 0
: NextNonCommentLine->Level;
} else {
- NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
+ NextNonCommentLine = AL->First->isNot(tok::r_brace) ? AL : nullptr;
}
- setCommentLineLevels((*I)->Children);
+ setCommentLineLevels(AL->Children);
}
}
@@ -2398,7 +2396,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
// This function heuristically determines whether 'Current' starts the name of a
// function declaration.
-static bool isFunctionDeclarationName(const FormatToken &Current,
+static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
const AnnotatedLine &Line) {
auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
for (; Next; Next = Next->Next) {
@@ -2467,7 +2465,7 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
// Check whether parameter list can belong to a function declaration.
if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
return false;
- // If the lines ends with "{", this is likely an function definition.
+ // If the lines ends with "{", this is likely a function definition.
if (Line.Last->is(tok::l_brace))
return true;
if (Next->Next == Next->MatchingParen)
@@ -2476,14 +2474,21 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
if (Next->MatchingParen->Next &&
Next->MatchingParen->Next->is(TT_PointerOrReference))
return true;
- // Check for K&R C function definitions, e.g.:
+
+ // Check for K&R C function definitions (and C++ function definitions with
+ // unnamed parameters), e.g.:
// int f(i)
// {
// return i + 1;
// }
- if (Next->Next && Next->Next->is(tok::identifier) &&
- !(Next->MatchingParen->Next && Next->MatchingParen->Next->is(tok::semi)))
+ // bool g(size_t = 0, bool b = false)
+ // {
+ // return !b;
+ // }
+ if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
+ !Line.endsWith(tok::semi))
return true;
+
for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
Tok = Tok->Next) {
if (Tok->is(TT_TypeDeclarationParen))
@@ -2544,7 +2549,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
calculateArrayInitializerColumnList(Line);
while (Current) {
- if (isFunctionDeclarationName(*Current, Line))
+ if (isFunctionDeclarationName(Style.isCpp(), *Current, Line))
Current->setType(TT_FunctionDeclarationName);
if (Current->is(TT_LineComment)) {
if (Current->Previous->is(BK_BracedInit) &&
@@ -2899,7 +2904,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
return Style.SpaceBeforeParens == FormatStyle::SBPO_Always ||
- (Style.SpaceBeforeParens == FormatStyle::SBPO_NonEmptyParentheses &&
+ (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
Right.ParameterCount > 0);
}
@@ -2933,9 +2938,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
return true;
}
- // requires ( or requires(
- if (Right.is(tok::l_paren) && Left.is(tok::kw_requires))
- return spaceRequiredBeforeParens(Right);
+ // auto{x} auto(x)
+ if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
+ return false;
+
// requires clause Concept1<T> && Concept2<T>
if (Left.is(TT_ConstraintJunctions) && Right.is(tok::identifier))
return true;
@@ -2991,7 +2997,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen))
return true;
}
- // Add a space if the previous token is a pointer qualifer or the closing
+ // Add a space if the previous token is a pointer qualifier or the closing
// parenthesis of __attribute__(()) expression and the style requires spaces
// after pointer qualifiers.
if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
@@ -3012,7 +3018,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
!Line.IsMultiVariableDeclStmt)))
return true;
if (Left.is(TT_PointerOrReference)) {
- // Add a space if the next token is a pointer qualifer and the style
+ // Add a space if the next token is a pointer qualifier and the style
// requires spaces before pointer qualifiers.
if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
@@ -3031,7 +3037,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
!Left.Previous->isOneOf(tok::l_paren, tok::coloncolon,
tok::l_square));
}
- // Ensure right pointer alignement with ellipsis e.g. int *...P
+ // Ensure right pointer alignment with ellipsis e.g. int *...P
if (Left.is(tok::ellipsis) && Left.Previous &&
Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp))
return Style.PointerAlignment != FormatStyle::PAS_Right;
@@ -3127,33 +3133,60 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
// e.g. template <typename T> [[nodiscard]] ...
if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
return true;
+ // Space before parentheses common for all languages
if (Right.is(tok::l_paren)) {
+ if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
+ return spaceRequiredBeforeParens(Right);
+ if (Left.is(tok::kw_requires))
+ return spaceRequiredBeforeParens(Right);
if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
(Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
return true;
- if (Style.SpaceBeforeParens ==
- FormatStyle::SBPO_ControlStatementsExceptControlMacros &&
- Left.is(TT_ForEachMacro))
- return false;
- if (Style.SpaceBeforeParens ==
- FormatStyle::SBPO_ControlStatementsExceptControlMacros &&
- Left.is(TT_IfMacro))
- return false;
- return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
- (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
- (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while,
- tok::kw_switch, tok::kw_case, TT_ForEachMacro,
- TT_ObjCForIn) ||
- Left.isIf(Line.Type != LT_PreprocessorDirective) ||
- (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
- tok::kw_new, tok::kw_delete) &&
- (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
- (spaceRequiredBeforeParens(Right) &&
- (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
- Left.is(tok::r_paren) || Left.isSimpleTypeSpecifier() ||
- (Left.is(tok::r_square) && Left.MatchingParen &&
- Left.MatchingParen->is(TT_LambdaLSquare))) &&
- Line.Type != LT_PreprocessorDirective);
+ if (Left.is(TT_ForEachMacro))
+ return (Style.SpaceBeforeParensOptions.AfterForeachMacros ||
+ spaceRequiredBeforeParens(Right));
+ if (Left.is(TT_IfMacro))
+ return (Style.SpaceBeforeParensOptions.AfterIfMacros ||
+ spaceRequiredBeforeParens(Right));
+ if (Line.Type == LT_ObjCDecl)
+ return true;
+ if (Left.is(tok::semi))
+ return true;
+ if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
+ tok::kw_case, TT_ForEachMacro, TT_ObjCForIn))
+ return Style.SpaceBeforeParensOptions.AfterControlStatements ||
+ spaceRequiredBeforeParens(Right);
+ if (Left.isIf(Line.Type != LT_PreprocessorDirective))
+ return Style.SpaceBeforeParensOptions.AfterControlStatements ||
+ spaceRequiredBeforeParens(Right);
+ // Function declaration or definition
+ if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName) ||
+ Right.is(TT_OverloadedOperatorLParen))) {
+ if (Line.mightBeFunctionDefinition())
+ return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
+ spaceRequiredBeforeParens(Right);
+ else
+ return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
+ spaceRequiredBeforeParens(Right);
+ }
+ // Lambda
+ if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
+ Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare))
+ return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
+ spaceRequiredBeforeParens(Right);
+ if (!Left.Previous || Left.Previous->isNot(tok::period)) {
+ if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch))
+ return Style.SpaceBeforeParensOptions.AfterControlStatements ||
+ spaceRequiredBeforeParens(Right);
+ if (Left.isOneOf(tok::kw_new, tok::kw_delete))
+ return Style.SpaceBeforeParens != FormatStyle::SBPO_Never ||
+ spaceRequiredBeforeParens(Right);
+ }
+ if (Line.Type != LT_PreprocessorDirective &&
+ (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
+ Left.is(tok::r_paren) || Left.isSimpleTypeSpecifier()))
+ return spaceRequiredBeforeParens(Right);
+ return false;
}
if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
return false;
@@ -3195,6 +3228,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
// qualifiers such as
// void Fn() const &;
return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
+
return true;
}
@@ -3300,7 +3334,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Right.is(tok::l_paren))
if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
Keywords.kw_lock))
- return Style.SpaceBeforeParens == FormatStyle::SBPO_ControlStatements ||
+ return Style.SpaceBeforeParensOptions.AfterControlStatements ||
spaceRequiredBeforeParens(Right);
// space between method modifier and opening parenthesis of a tuple return
@@ -3407,7 +3441,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Left.is(tok::r_square) && Right.is(tok::l_brace))
return true;
if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
- return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
+ return Style.SpaceBeforeParensOptions.AfterControlStatements ||
+ spaceRequiredBeforeParens(Right);
if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
tok::kw_protected) ||
Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
@@ -3433,9 +3468,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
return true;
- if (Right.is(TT_OverloadedOperatorLParen))
- return spaceRequiredBeforeParens(Right);
- if (Left.is(tok::comma))
+ if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
return true;
if (Right.is(tok::comma))
return false;
@@ -3558,9 +3591,6 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
(Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
!Right.is(tok::r_paren)))
return true;
- if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
- Right.isNot(TT_FunctionTypeLParen))
- return spaceRequiredBeforeParens(Right);
if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
return false;
@@ -3580,7 +3610,7 @@ static bool isAllmanBrace(const FormatToken &Tok) {
!Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
}
-// Returns 'true' if 'Tok' is an function argument.
+// Returns 'true' if 'Tok' is a function argument.
static bool IsFunctionArgument(const FormatToken &Tok) {
return Tok.MatchingParen && Tok.MatchingParen->Next &&
Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
@@ -3597,6 +3627,16 @@ static bool isAllmanLambdaBrace(const FormatToken &Tok) {
!Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral));
}
+// Returns the first token on the line that is not a comment.
+static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) {
+ const FormatToken *Next = Line.First;
+ if (!Next)
+ return Next;
+ if (Next->is(tok::comment))
+ Next = Next->getNextNonComment();
+ return Next;
+}
+
bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
const FormatToken &Right) {
const FormatToken &Left = *Right.Previous;
@@ -3696,7 +3736,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
!Right.is(tok::r_square))
return true;
- // Always break afer successive entries.
+ // Always break after successive entries.
// 1,
// 2
if (Left.is(tok::comma))
@@ -3747,13 +3787,18 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
return Style.BreakBeforeConceptDeclarations;
return (Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes);
}
- if (Right.is(TT_CtorInitializerComma) &&
- Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
- !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
- return true;
- if (Right.is(TT_CtorInitializerColon) &&
+ if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
+ if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
+ (Left.is(TT_CtorInitializerComma) || Right.is(TT_CtorInitializerColon)))
+ return true;
+
+ if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
+ Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma))
+ return true;
+ }
+ if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
- !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon))
return true;
// Break only if we have multiple inheritance.
if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
@@ -3778,16 +3823,42 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
if (Right.is(TT_InlineASMBrace))
return Right.HasUnescapedNewline;
- if (isAllmanBrace(Left) || isAllmanBrace(Right))
- return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
- (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
- Style.BraceWrapping.AfterEnum) ||
- (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
+ if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
+ auto FirstNonComment = getFirstNonComment(Line);
+ bool AccessSpecifier =
+ FirstNonComment &&
+ FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
+ tok::kw_private, tok::kw_protected);
+
+ if (Style.BraceWrapping.AfterEnum) {
+ if (Line.startsWith(tok::kw_enum) ||
+ Line.startsWith(tok::kw_typedef, tok::kw_enum))
+ return true;
+ // Ensure BraceWrapping for `public enum A {`.
+ if (AccessSpecifier && FirstNonComment->Next &&
+ FirstNonComment->Next->is(tok::kw_enum))
+ return true;
+ }
+
+ // Ensure BraceWrapping for `public interface A {`.
+ if (Style.BraceWrapping.AfterClass &&
+ ((AccessSpecifier && FirstNonComment->Next &&
+ FirstNonComment->Next->is(Keywords.kw_interface)) ||
+ Line.startsWith(Keywords.kw_interface)))
+ return true;
+
+ return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
(Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
+ }
+
if (Left.is(TT_ObjCBlockLBrace) &&
Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
return true;
+ // Ensure wrapping after __attribute__((XX)) and @interface etc.
+ if (Left.is(TT_AttributeParen) && Right.is(TT_ObjCDecl))
+ return true;
+
if (Left.is(TT_LambdaLBrace)) {
if (IsFunctionArgument(Left) &&
Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline)
@@ -3957,8 +4028,9 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
- Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
- Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
+ Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
+ Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
+ Keywords.kw_await))
return false; // Otherwise automatic semicolon insertion would trigger.
if (Right.NestingLevel == 0 &&
(Left.Tok.getIdentifierInfo() ||
@@ -4012,7 +4084,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
Keywords.kw_let, tok::kw_const))
// See grammar for 'declare' statements at:
- // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
+ // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
return false;
if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
Right.isOneOf(tok::identifier, tok::string_literal))
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 0f9c02dbeb34..6e5e62cd4d82 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -53,9 +53,7 @@ public:
// left them in a different state.
First->Previous = nullptr;
FormatToken *Current = First;
- for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
- E = Line.Tokens.end();
- I != E; ++I) {
+ for (auto I = ++Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) {
const UnwrappedLineNode &Node = *I;
Current->Next = I->Tok;
I->Tok->Previous = Current;
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index cca85c1074de..299536cd806e 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -104,7 +104,7 @@ private:
RootToken.isObjCAccessSpecifier() ||
(RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
RootToken.Next && RootToken.Next->is(tok::colon))) {
- // The AccessModifierOffset may be overriden by IndentAccessModifiers,
+ // The AccessModifierOffset may be overridden by IndentAccessModifiers,
// in which case we take a negative value of the IndentWidth to simulate
// the upper indent level.
return Style.IndentAccessModifiers ? -Style.IndentWidth
@@ -632,10 +632,11 @@ private:
FormatToken *RecordTok = Line.First;
// Skip record modifiers.
while (RecordTok->Next &&
- RecordTok->isOneOf(
- tok::kw_typedef, tok::kw_export, Keywords.kw_declare,
- Keywords.kw_abstract, tok::kw_default, tok::kw_public,
- tok::kw_private, tok::kw_protected, Keywords.kw_internal))
+ RecordTok->isOneOf(tok::kw_typedef, tok::kw_export,
+ Keywords.kw_declare, Keywords.kw_abstract,
+ tok::kw_default, Keywords.kw_override,
+ tok::kw_public, tok::kw_private,
+ tok::kw_protected, Keywords.kw_internal))
RecordTok = RecordTok->Next;
if (RecordTok &&
RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 103e3559b120..28d925858f77 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -385,7 +385,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
// be in a non-declaration context.
if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
continue;
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
break;
case tok::r_brace:
@@ -431,7 +431,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
}
LLVM_FALLTHROUGH;
default:
- parseStructuralElement(/*IsTopLevel=*/true);
+ parseStructuralElement(!HasOpeningBrace);
break;
}
} while (!eof());
@@ -489,6 +489,17 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
if (Style.Language == FormatStyle::LK_Proto) {
ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
} else {
+ // Skip NextTok over preprocessor lines, otherwise we may not
+ // properly diagnose the block as a braced intializer
+ // if the comma separator appears after the pp directive.
+ while (NextTok->is(tok::hash)) {
+ ScopedMacroState MacroState(*Line, Tokens, NextTok);
+ do {
+ NextTok = Tokens->getNextToken();
+ ++ReadTokens;
+ } while (NextTok->isNot(tok::eof));
+ }
+
// Using OriginalColumn to distinguish between ObjC methods and
// binary operators is a bit hacky.
bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
@@ -962,8 +973,8 @@ static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
Keywords.kw_let, Keywords.kw_var, tok::kw_const,
Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
- Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
- Keywords.kw_from));
+ Keywords.kw_instanceof, Keywords.kw_interface,
+ Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
}
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
@@ -994,6 +1005,13 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
Keywords.kw_import, tok::kw_export);
}
+// Checks whether a token is a type in K&R C (aka C78).
+static bool isC78Type(const FormatToken &Tok) {
+ return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
+ tok::kw_unsigned, tok::kw_float, tok::kw_double,
+ tok::identifier);
+}
+
// This function checks whether a token starts the first parameter declaration
// in a K&R C (aka C78) function definition, e.g.:
// int f(a, b)
@@ -1001,13 +1019,24 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
// {
// return a + b;
// }
-static bool isC78ParameterDecl(const FormatToken *Tok) {
- if (!Tok)
+static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
+ const FormatToken *FuncName) {
+ assert(Tok);
+ assert(Next);
+ assert(FuncName);
+
+ if (FuncName->isNot(tok::identifier))
return false;
- if (!Tok->isOneOf(tok::kw_int, tok::kw_char, tok::kw_float, tok::kw_double,
- tok::kw_struct, tok::kw_union, tok::kw_long, tok::kw_short,
- tok::kw_unsigned, tok::kw_register, tok::identifier))
+ const FormatToken *Prev = FuncName->Previous;
+ if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
+ return false;
+
+ if (!isC78Type(*Tok) &&
+ !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
+ return false;
+
+ if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
return false;
Tok = Tok->Previous;
@@ -1046,10 +1075,9 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
// If the line contains an '@' sign, the previous token might be an
// annotation, which can precede another identifier/value.
- bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
- [](UnwrappedLineNode &LineNode) {
- return LineNode.Tok->is(tok::at);
- }) != Line->Tokens.end();
+ bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
+ return LineNode.Tok->is(tok::at);
+ });
if (HasAt)
return;
}
@@ -1285,7 +1313,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
if (Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always)
addUnwrappedLine();
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
}
addUnwrappedLine();
return;
@@ -1298,7 +1326,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
if (Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always)
addUnwrappedLine();
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
}
addUnwrappedLine();
return;
@@ -1368,21 +1396,20 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
case tok::r_brace:
addUnwrappedLine();
return;
- case tok::l_paren:
+ case tok::l_paren: {
parseParens();
// Break the unwrapped line if a K&R C function definition has a parameter
// declaration.
- if (!IsTopLevel || !Style.isCpp())
- break;
- if (!Previous || Previous->isNot(tok::identifier))
+ if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
break;
- if (Previous->Previous && Previous->Previous->is(tok::at))
- break;
- if (isC78ParameterDecl(FormatTok)) {
+ const unsigned Position = Tokens->getPosition() + 1;
+ assert(Position < AllTokens.size());
+ if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
addUnwrappedLine();
return;
}
break;
+ }
case tok::kw_operator:
nextToken();
if (FormatTok->isBinaryOperator())
@@ -1407,7 +1434,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
if (Style.BraceWrapping.AfterFunction)
addUnwrappedLine();
FormatTok->setType(TT_FunctionLBrace);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
return;
}
@@ -2051,7 +2078,7 @@ void UnwrappedLineParser::parseIfThenElse() {
bool NeedsUnwrappedLine = false;
if (FormatTok->Tok.is(tok::l_brace)) {
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
if (Style.BraceWrapping.BeforeElse)
addUnwrappedLine();
else
@@ -2069,7 +2096,7 @@ void UnwrappedLineParser::parseIfThenElse() {
parseSquare();
if (FormatTok->Tok.is(tok::l_brace)) {
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
} else if (FormatTok->Tok.is(tok::kw_if)) {
FormatToken *Previous = AllTokens[Tokens->getPosition() - 1];
@@ -2131,7 +2158,7 @@ void UnwrappedLineParser::parseTryCatch() {
}
if (FormatTok->is(tok::l_brace)) {
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
if (Style.BraceWrapping.BeforeCatch) {
addUnwrappedLine();
} else {
@@ -2169,7 +2196,7 @@ void UnwrappedLineParser::parseTryCatch() {
}
NeedsUnwrappedLine = false;
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
if (Style.BraceWrapping.BeforeCatch)
addUnwrappedLine();
else
@@ -2189,7 +2216,7 @@ void UnwrappedLineParser::parseNamespace() {
parseParens();
} else {
while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
- tok::l_square)) {
+ tok::l_square, tok::period)) {
if (FormatTok->is(tok::l_square))
parseSquare();
else
@@ -2282,7 +2309,7 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
parseParens();
if (FormatTok->Tok.is(tok::l_brace)) {
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
} else {
addUnwrappedLine();
@@ -2297,7 +2324,7 @@ void UnwrappedLineParser::parseDoWhile() {
nextToken();
if (FormatTok->Tok.is(tok::l_brace)) {
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
if (Style.BraceWrapping.BeforeWhile)
addUnwrappedLine();
} else {
@@ -2336,7 +2363,7 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
CompoundStatementIndenter Indenter(this, Line->Level,
Style.BraceWrapping.AfterCaseLabel,
Style.BraceWrapping.IndentBraces);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
if (FormatTok->Tok.is(tok::kw_break)) {
if (Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always) {
@@ -2378,7 +2405,7 @@ void UnwrappedLineParser::parseSwitch() {
parseParens();
if (FormatTok->Tok.is(tok::l_brace)) {
CompoundStatementIndenter Indenter(this, Style, Line->Level);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
} else {
addUnwrappedLine();
@@ -2430,7 +2457,7 @@ void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
if (Style.BraceWrapping.AfterFunction)
addUnwrappedLine();
FormatTok->setType(TT_FunctionLBrace);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
} else {
parseConstraintExpression(OriginalLevel);
@@ -2467,7 +2494,7 @@ void UnwrappedLineParser::parseConstraintExpression(
if (Style.BraceWrapping.AfterFunction)
addUnwrappedLine();
FormatTok->setType(TT_FunctionLBrace);
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
}
if (FormatTok->Tok.is(tok::semi)) {
// Eat any trailing semi.
@@ -2515,6 +2542,8 @@ bool UnwrappedLineParser::parseEnum() {
if (FormatTok->Tok.is(tok::kw_enum))
nextToken();
+ const FormatToken &InitialToken = *FormatTok;
+
// In TypeScript, "enum" can also be used as property name, e.g. in interface
// declarations. An "enum" keyword followed by a colon would be a syntax
// error and thus assume it is just an identifier.
@@ -2561,7 +2590,8 @@ bool UnwrappedLineParser::parseEnum() {
return true;
}
- if (!Style.AllowShortEnumsOnASingleLine)
+ if (!Style.AllowShortEnumsOnASingleLine &&
+ ShouldBreakBeforeBrace(Style, InitialToken))
addUnwrappedLine();
// Parse enum body.
nextToken();
@@ -2804,7 +2834,7 @@ void UnwrappedLineParser::parseObjCMethod() {
} else if (FormatTok->Tok.is(tok::l_brace)) {
if (Style.BraceWrapping.AfterFunction)
addUnwrappedLine();
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
addUnwrappedLine();
return;
} else {
@@ -2833,7 +2863,7 @@ void UnwrappedLineParser::parseObjCUntilAtEnd() {
break;
}
if (FormatTok->is(tok::l_brace)) {
- parseBlock(/*MustBeDeclaration=*/false);
+ parseBlock();
// In ObjC interfaces, nothing should be following the "}".
addUnwrappedLine();
} else if (FormatTok->is(tok::r_brace)) {
@@ -3004,24 +3034,15 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
llvm::dbgs() << Prefix << "Line(" << Line.Level
<< ", FSC=" << Line.FirstStartColumn << ")"
<< (Line.InPPDirective ? " MACRO" : "") << ": ";
- for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
- E = Line.Tokens.end();
- I != E; ++I) {
- llvm::dbgs() << I->Tok->Tok.getName() << "["
- << "T=" << (unsigned)I->Tok->getType()
- << ", OC=" << I->Tok->OriginalColumn << "] ";
- }
- for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
- E = Line.Tokens.end();
- I != E; ++I) {
- const UnwrappedLineNode &Node = *I;
- for (SmallVectorImpl<UnwrappedLine>::const_iterator
- I = Node.Children.begin(),
- E = Node.Children.end();
- I != E; ++I) {
- printDebugInfo(*I, "\nChild: ");
- }
+ for (const auto &Node : Line.Tokens) {
+ llvm::dbgs() << Node.Tok->Tok.getName() << "["
+ << "T=" << static_cast<unsigned>(Node.Tok->getType())
+ << ", OC=" << Node.Tok->OriginalColumn << "] ";
}
+ for (const auto &Node : Line.Tokens)
+ for (const auto &ChildNode : Node.Children)
+ printDebugInfo(ChildNode, "\nChild: ");
+
llvm::dbgs() << "\n";
}
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index f22bb6323e3d..bcae0f3ad258 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -19,8 +19,8 @@
#include "clang/Basic/IdentifierTable.h"
#include "clang/Format/Format.h"
#include "llvm/Support/Regex.h"
-#include <list>
#include <stack>
+#include <vector>
namespace clang {
namespace format {
@@ -36,9 +36,8 @@ struct UnwrappedLineNode;
struct UnwrappedLine {
UnwrappedLine();
- // FIXME: Don't use std::list here.
/// The \c Tokens comprising this \c UnwrappedLine.
- std::list<UnwrappedLineNode> Tokens;
+ std::vector<UnwrappedLineNode> Tokens;
/// The indent level of the \c UnwrappedLine.
unsigned Level;
@@ -85,7 +84,7 @@ private:
void reset();
void parseFile();
void parseLevel(bool HasOpeningBrace);
- void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u,
+ void parseBlock(bool MustBeDeclaration = false, unsigned AddLevels = 1u,
bool MunchSemi = true,
bool UnindentWhitesmithsBraces = false);
void parseChildBlock();
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index ca2222d1feff..74136d2f5caa 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -347,7 +347,7 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
if (ScopeStart > Start + 1 &&
Changes[ScopeStart - 2].Tok->is(tok::identifier) &&
Changes[ScopeStart - 1].Tok->is(tok::l_paren))
- return true;
+ return Style.BinPackArguments;
// Ternary operator
if (Changes[i].Tok->is(TT_ConditionalExpr))
@@ -1146,14 +1146,15 @@ WhitespaceManager::CellDescriptions WhitespaceManager::getCells(unsigned Start,
} else if (C.Tok->is(tok::comma)) {
if (!Cells.empty())
Cells.back().EndIndex = i;
- Cell++;
+ if (C.Tok->getNextNonComment()->isNot(tok::r_brace)) // dangling comma
+ ++Cell;
}
} else if (Depth == 1) {
if (C.Tok == MatchingParen) {
if (!Cells.empty())
Cells.back().EndIndex = i;
Cells.push_back(CellDescription{i, ++Cell, i + 1, false, nullptr});
- CellCount = Cell + 1;
+ CellCount = C.Tok->Previous->isNot(tok::comma) ? Cell + 1 : Cell;
// Go to the next non-comment and ensure there is a break in front
const auto *NextNonComment = C.Tok->getNextNonComment();
while (NextNonComment->is(tok::comma))
@@ -1190,6 +1191,17 @@ WhitespaceManager::CellDescriptions WhitespaceManager::getCells(unsigned Start,
// So if we split a line previously and the tail line + this token is
// less then the column limit we remove the split here and just put
// the column start at a space past the comma
+ //
+ // FIXME This if branch covers the cases where the column is not
+ // the first column. This leads to weird pathologies like the formatting
+ // auto foo = Items{
+ // Section{
+ // 0, bar(),
+ // }
+ // };
+ // Well if it doesn't lead to that it's indicative that the line
+ // breaking should be revisited. Unfortunately alot of other options
+ // interact with this
auto j = i - 1;
if ((j - 1) > Start && Changes[j].Tok->is(tok::comma) &&
Changes[j - 1].NewlinesBefore > 0) {
diff --git a/clang/lib/Format/WhitespaceManager.h b/clang/lib/Format/WhitespaceManager.h
index 4f8f95040af6..029f4159b748 100644
--- a/clang/lib/Format/WhitespaceManager.h
+++ b/clang/lib/Format/WhitespaceManager.h
@@ -257,7 +257,7 @@ private:
/// Does this \p Cell contain a split element?
static bool isSplitCell(const CellDescription &Cell);
- /// Get the width of the preceeding cells from \p Start to \p End.
+ /// Get the width of the preceding cells from \p Start to \p End.
template <typename I>
auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {
auto NetWidth = InitialSpaces;
diff --git a/clang/lib/Frontend/ASTConsumers.cpp b/clang/lib/Frontend/ASTConsumers.cpp
index a73cc8876d5d..96f5926c0d7e 100644
--- a/clang/lib/Frontend/ASTConsumers.cpp
+++ b/clang/lib/Frontend/ASTConsumers.cpp
@@ -57,8 +57,11 @@ namespace {
bool ShowColors = Out.has_colors();
if (ShowColors)
Out.changeColor(raw_ostream::BLUE);
- Out << (OutputKind != Print ? "Dumping " : "Printing ") << getName(D)
- << ":\n";
+
+ if (OutputFormat == ADOF_Default)
+ Out << (OutputKind != Print ? "Dumping " : "Printing ") << getName(D)
+ << ":\n";
+
if (ShowColors)
Out.resetColor();
print(D);
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 996783aa9cf4..52589677ca28 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -1069,9 +1069,7 @@ static void
checkAndRemoveNonDriverDiags(SmallVectorImpl<StoredDiagnostic> &StoredDiags) {
// Get rid of stored diagnostics except the ones from the driver which do not
// have a source location.
- StoredDiags.erase(
- std::remove_if(StoredDiags.begin(), StoredDiags.end(), isNonDriverDiag),
- StoredDiags.end());
+ llvm::erase_if(StoredDiags, isNonDriverDiag);
}
static void checkAndSanitizeDiags(SmallVectorImpl<StoredDiagnostic> &
@@ -1989,6 +1987,7 @@ static void CalculateHiddenNames(const CodeCompletionContext &Context,
case CodeCompletionContext::CCC_ObjCClassMessage:
case CodeCompletionContext::CCC_ObjCCategoryName:
case CodeCompletionContext::CCC_IncludedFile:
+ case CodeCompletionContext::CCC_Attribute:
case CodeCompletionContext::CCC_NewName:
// We're looking for nothing, or we're looking for names that cannot
// be hidden.
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index c642af1849bc..1432607204bd 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -23,6 +23,7 @@
#include "clang/Frontend/FrontendAction.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/Frontend/LogDiagnosticPrinter.h"
#include "clang/Frontend/SerializedDiagnosticPrinter.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"
@@ -558,6 +559,54 @@ void CompilerInstance::createASTContext() {
// ExternalASTSource
+namespace {
+// Helper to recursively read the module names for all modules we're adding.
+// We mark these as known and redirect any attempt to load that module to
+// the files we were handed.
+struct ReadModuleNames : ASTReaderListener {
+ Preprocessor &PP;
+ llvm::SmallVector<std::string, 8> LoadedModules;
+
+ ReadModuleNames(Preprocessor &PP) : PP(PP) {}
+
+ void ReadModuleName(StringRef ModuleName) override {
+ // Keep the module name as a string for now. It's not safe to create a new
+ // IdentifierInfo from an ASTReader callback.
+ LoadedModules.push_back(ModuleName.str());
+ }
+
+ void registerAll() {
+ ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap();
+ for (const std::string &LoadedModule : LoadedModules)
+ MM.cacheModuleLoad(*PP.getIdentifierInfo(LoadedModule),
+ MM.findModule(LoadedModule));
+ LoadedModules.clear();
+ }
+
+ void markAllUnavailable() {
+ for (const std::string &LoadedModule : LoadedModules) {
+ if (Module *M = PP.getHeaderSearchInfo().getModuleMap().findModule(
+ LoadedModule)) {
+ M->HasIncompatibleModuleFile = true;
+
+ // Mark module as available if the only reason it was unavailable
+ // was missing headers.
+ SmallVector<Module *, 2> Stack;
+ Stack.push_back(M);
+ while (!Stack.empty()) {
+ Module *Current = Stack.pop_back_val();
+ if (Current->IsUnimportable) continue;
+ Current->IsAvailable = true;
+ Stack.insert(Stack.end(),
+ Current->submodule_begin(), Current->submodule_end());
+ }
+ }
+ }
+ LoadedModules.clear();
+ }
+};
+} // namespace
+
void CompilerInstance::createPCHExternalASTSource(
StringRef Path, DisableValidationForModuleKind DisableValidation,
bool AllowPCHWithCompilerErrors, void *DeserializationListener,
@@ -602,6 +651,11 @@ IntrusiveRefCntPtr<ASTReader> CompilerInstance::createPCHExternalASTSource(
for (auto &Listener : DependencyCollectors)
Listener->attachToASTReader(*Reader);
+ auto Listener = std::make_unique<ReadModuleNames>(PP);
+ auto &ListenerRef = *Listener;
+ ASTReader::ListenerScope ReadModuleNamesListener(*Reader,
+ std::move(Listener));
+
switch (Reader->ReadAST(Path,
Preamble ? serialization::MK_Preamble
: serialization::MK_PCH,
@@ -611,6 +665,7 @@ IntrusiveRefCntPtr<ASTReader> CompilerInstance::createPCHExternalASTSource(
// Set the predefines buffer as suggested by the PCH reader. Typically, the
// predefines buffer will be empty.
PP.setPredefines(Reader->getSuggestedPredefines());
+ ListenerRef.registerAll();
return Reader;
case ASTReader::Failure:
@@ -626,6 +681,7 @@ IntrusiveRefCntPtr<ASTReader> CompilerInstance::createPCHExternalASTSource(
break;
}
+ ListenerRef.markAllUnavailable();
Context.setExternalSource(nullptr);
return nullptr;
}
@@ -1029,6 +1085,27 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
return !getDiagnostics().getClient()->getNumErrors();
}
+void CompilerInstance::LoadRequestedPlugins() {
+ // Load any requested plugins.
+ for (const std::string &Path : getFrontendOpts().Plugins) {
+ std::string Error;
+ if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(Path.c_str(), &Error))
+ getDiagnostics().Report(diag::err_fe_unable_to_load_plugin)
+ << Path << Error;
+ }
+
+ // Check if any of the loaded plugins replaces the main AST action
+ for (const FrontendPluginRegistry::entry &Plugin :
+ FrontendPluginRegistry::entries()) {
+ std::unique_ptr<PluginASTAction> P(Plugin.instantiate());
+ if (P->getActionType() == PluginASTAction::ReplaceAction) {
+ getFrontendOpts().ProgramAction = clang::frontend::PluginAction;
+ getFrontendOpts().ActionName = Plugin.getName().str();
+ break;
+ }
+ }
+}
+
/// Determine the appropriate source input kind based on language
/// options.
static Language getLanguageFromOptions(const LangOptions &LangOpts) {
@@ -1077,14 +1154,12 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
// Remove any macro definitions that are explicitly ignored by the module.
// They aren't supposed to affect how the module is built anyway.
HeaderSearchOptions &HSOpts = Invocation->getHeaderSearchOpts();
- PPOpts.Macros.erase(
- std::remove_if(PPOpts.Macros.begin(), PPOpts.Macros.end(),
- [&HSOpts](const std::pair<std::string, bool> &def) {
+ llvm::erase_if(
+ PPOpts.Macros, [&HSOpts](const std::pair<std::string, bool> &def) {
StringRef MacroDef = def.first;
return HSOpts.ModulesIgnoreMacros.count(
llvm::CachedHashString(MacroDef.split('=').first)) > 0;
- }),
- PPOpts.Macros.end());
+ });
// If the original compiler invocation had -fmodule-name, pass it through.
Invocation->getLangOpts()->ModuleName =
@@ -1264,23 +1339,75 @@ static bool compileModule(CompilerInstance &ImportingInstance,
return Result;
}
+/// Read the AST right after compiling the module.
+static bool readASTAfterCompileModule(CompilerInstance &ImportingInstance,
+ SourceLocation ImportLoc,
+ SourceLocation ModuleNameLoc,
+ Module *Module, StringRef ModuleFileName,
+ bool *OutOfDate) {
+ DiagnosticsEngine &Diags = ImportingInstance.getDiagnostics();
+
+ unsigned ModuleLoadCapabilities = ASTReader::ARR_Missing;
+ if (OutOfDate)
+ ModuleLoadCapabilities |= ASTReader::ARR_OutOfDate;
+
+ // Try to read the module file, now that we've compiled it.
+ ASTReader::ASTReadResult ReadResult =
+ ImportingInstance.getASTReader()->ReadAST(
+ ModuleFileName, serialization::MK_ImplicitModule, ImportLoc,
+ ModuleLoadCapabilities);
+ if (ReadResult == ASTReader::Success)
+ return true;
+
+ // The caller wants to handle out-of-date failures.
+ if (OutOfDate && ReadResult == ASTReader::OutOfDate) {
+ *OutOfDate = true;
+ return false;
+ }
+
+ // The ASTReader didn't diagnose the error, so conservatively report it.
+ if (ReadResult == ASTReader::Missing || !Diags.hasErrorOccurred())
+ Diags.Report(ModuleNameLoc, diag::err_module_not_built)
+ << Module->Name << SourceRange(ImportLoc, ModuleNameLoc);
+
+ return false;
+}
+
/// Compile a module in a separate compiler instance and read the AST,
/// returning true if the module compiles without errors.
+static bool compileModuleAndReadASTImpl(CompilerInstance &ImportingInstance,
+ SourceLocation ImportLoc,
+ SourceLocation ModuleNameLoc,
+ Module *Module,
+ StringRef ModuleFileName) {
+ if (!compileModule(ImportingInstance, ModuleNameLoc, Module,
+ ModuleFileName)) {
+ ImportingInstance.getDiagnostics().Report(ModuleNameLoc,
+ diag::err_module_not_built)
+ << Module->Name << SourceRange(ImportLoc, ModuleNameLoc);
+ return false;
+ }
+
+ return readASTAfterCompileModule(ImportingInstance, ImportLoc, ModuleNameLoc,
+ Module, ModuleFileName,
+ /*OutOfDate=*/nullptr);
+}
+
+/// Compile a module in a separate compiler instance and read the AST,
+/// returning true if the module compiles without errors, using a lock manager
+/// to avoid building the same module in multiple compiler instances.
///
/// Uses a lock file manager and exponential backoff to reduce the chances that
/// multiple instances will compete to create the same module. On timeout,
/// deletes the lock file in order to avoid deadlock from crashing processes or
/// bugs in the lock file manager.
-static bool compileModuleAndReadAST(CompilerInstance &ImportingInstance,
- SourceLocation ImportLoc,
- SourceLocation ModuleNameLoc,
- Module *Module, StringRef ModuleFileName) {
+static bool compileModuleAndReadASTBehindLock(
+ CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
+ SourceLocation ModuleNameLoc, Module *Module, StringRef ModuleFileName) {
DiagnosticsEngine &Diags = ImportingInstance.getDiagnostics();
- auto diagnoseBuildFailure = [&] {
- Diags.Report(ModuleNameLoc, diag::err_module_not_built)
- << Module->Name << SourceRange(ImportLoc, ModuleNameLoc);
- };
+ Diags.Report(ModuleNameLoc, diag::remark_module_lock)
+ << ModuleFileName << Module->Name;
// FIXME: have LockFileManager return an error_code so that we can
// avoid the mkdir when the directory already exists.
@@ -1288,7 +1415,6 @@ static bool compileModuleAndReadAST(CompilerInstance &ImportingInstance,
llvm::sys::fs::create_directories(Dir);
while (1) {
- unsigned ModuleLoadCapabilities = ASTReader::ARR_Missing;
llvm::LockFileManager Locked(ModuleFileName);
switch (Locked) {
case llvm::LockFileManager::LFS_Error:
@@ -1302,58 +1428,64 @@ static bool compileModuleAndReadAST(CompilerInstance &ImportingInstance,
LLVM_FALLTHROUGH;
case llvm::LockFileManager::LFS_Owned:
// We're responsible for building the module ourselves.
- if (!compileModule(ImportingInstance, ModuleNameLoc, Module,
- ModuleFileName)) {
- diagnoseBuildFailure();
- return false;
- }
- break;
+ return compileModuleAndReadASTImpl(ImportingInstance, ImportLoc,
+ ModuleNameLoc, Module, ModuleFileName);
case llvm::LockFileManager::LFS_Shared:
- // Someone else is responsible for building the module. Wait for them to
- // finish.
- switch (Locked.waitForUnlock()) {
- case llvm::LockFileManager::Res_Success:
- ModuleLoadCapabilities |= ASTReader::ARR_OutOfDate;
- break;
- case llvm::LockFileManager::Res_OwnerDied:
- continue; // try again to get the lock.
- case llvm::LockFileManager::Res_Timeout:
- // Since ModuleCache takes care of correctness, we try waiting for
- // another process to complete the build so clang does not do it done
- // twice. If case of timeout, build it ourselves.
- Diags.Report(ModuleNameLoc, diag::remark_module_lock_timeout)
- << Module->Name;
- // Clear the lock file so that future invocations can make progress.
- Locked.unsafeRemoveLockFile();
- continue;
- }
- break;
+ break; // The interesting case.
}
- // Try to read the module file, now that we've compiled it.
- ASTReader::ASTReadResult ReadResult =
- ImportingInstance.getASTReader()->ReadAST(
- ModuleFileName, serialization::MK_ImplicitModule, ImportLoc,
- ModuleLoadCapabilities);
-
- if (ReadResult == ASTReader::OutOfDate &&
- Locked == llvm::LockFileManager::LFS_Shared) {
- // The module may be out of date in the presence of file system races,
- // or if one of its imports depends on header search paths that are not
- // consistent with this ImportingInstance. Try again...
+ // Someone else is responsible for building the module. Wait for them to
+ // finish.
+ switch (Locked.waitForUnlock()) {
+ case llvm::LockFileManager::Res_Success:
+ break; // The interesting case.
+ case llvm::LockFileManager::Res_OwnerDied:
+ continue; // try again to get the lock.
+ case llvm::LockFileManager::Res_Timeout:
+ // Since ModuleCache takes care of correctness, we try waiting for
+ // another process to complete the build so clang does not do it done
+ // twice. If case of timeout, build it ourselves.
+ Diags.Report(ModuleNameLoc, diag::remark_module_lock_timeout)
+ << Module->Name;
+ // Clear the lock file so that future invocations can make progress.
+ Locked.unsafeRemoveLockFile();
continue;
- } else if (ReadResult == ASTReader::Missing) {
- diagnoseBuildFailure();
- } else if (ReadResult != ASTReader::Success &&
- !Diags.hasErrorOccurred()) {
- // The ASTReader didn't diagnose the error, so conservatively report it.
- diagnoseBuildFailure();
}
- return ReadResult == ASTReader::Success;
+
+ // Read the module that was just written by someone else.
+ bool OutOfDate = false;
+ if (readASTAfterCompileModule(ImportingInstance, ImportLoc, ModuleNameLoc,
+ Module, ModuleFileName, &OutOfDate))
+ return true;
+ if (!OutOfDate)
+ return false;
+
+ // The module may be out of date in the presence of file system races,
+ // or if one of its imports depends on header search paths that are not
+ // consistent with this ImportingInstance. Try again...
}
}
+/// Compile a module in a separate compiler instance and read the AST,
+/// returning true if the module compiles without errors, potentially using a
+/// lock manager to avoid building the same module in multiple compiler
+/// instances.
+static bool compileModuleAndReadAST(CompilerInstance &ImportingInstance,
+ SourceLocation ImportLoc,
+ SourceLocation ModuleNameLoc,
+ Module *Module, StringRef ModuleFileName) {
+ return ImportingInstance.getInvocation()
+ .getFrontendOpts()
+ .BuildingImplicitModuleUsesLock
+ ? compileModuleAndReadASTBehindLock(ImportingInstance, ImportLoc,
+ ModuleNameLoc, Module,
+ ModuleFileName)
+ : compileModuleAndReadASTImpl(ImportingInstance, ImportLoc,
+ ModuleNameLoc, Module,
+ ModuleFileName);
+}
+
/// Diagnose differences between the current definition of the given
/// configuration macro and the definition provided on the command line.
static void checkConfigMacro(Preprocessor &PP, StringRef ConfigMacro,
@@ -1555,52 +1687,6 @@ bool CompilerInstance::loadModuleFile(StringRef FileName) {
*FrontendTimerGroup);
llvm::TimeRegion TimeLoading(FrontendTimerGroup ? &Timer : nullptr);
- // Helper to recursively read the module names for all modules we're adding.
- // We mark these as known and redirect any attempt to load that module to
- // the files we were handed.
- struct ReadModuleNames : ASTReaderListener {
- CompilerInstance &CI;
- llvm::SmallVector<IdentifierInfo*, 8> LoadedModules;
-
- ReadModuleNames(CompilerInstance &CI) : CI(CI) {}
-
- void ReadModuleName(StringRef ModuleName) override {
- LoadedModules.push_back(
- CI.getPreprocessor().getIdentifierInfo(ModuleName));
- }
-
- void registerAll() {
- ModuleMap &MM = CI.getPreprocessor().getHeaderSearchInfo().getModuleMap();
- for (auto *II : LoadedModules)
- MM.cacheModuleLoad(*II, MM.findModule(II->getName()));
- LoadedModules.clear();
- }
-
- void markAllUnavailable() {
- for (auto *II : LoadedModules) {
- if (Module *M = CI.getPreprocessor()
- .getHeaderSearchInfo()
- .getModuleMap()
- .findModule(II->getName())) {
- M->HasIncompatibleModuleFile = true;
-
- // Mark module as available if the only reason it was unavailable
- // was missing headers.
- SmallVector<Module *, 2> Stack;
- Stack.push_back(M);
- while (!Stack.empty()) {
- Module *Current = Stack.pop_back_val();
- if (Current->IsUnimportable) continue;
- Current->IsAvailable = true;
- Stack.insert(Stack.end(),
- Current->submodule_begin(), Current->submodule_end());
- }
- }
- }
- LoadedModules.clear();
- }
- };
-
// If we don't already have an ASTReader, create one now.
if (!TheASTReader)
createASTReader();
@@ -1612,7 +1698,7 @@ bool CompilerInstance::loadModuleFile(StringRef FileName) {
SourceLocation())
<= DiagnosticsEngine::Warning;
- auto Listener = std::make_unique<ReadModuleNames>(*this);
+ auto Listener = std::make_unique<ReadModuleNames>(*PP);
auto &ListenerRef = *Listener;
ASTReader::ListenerScope ReadModuleNamesListener(*TheASTReader,
std::move(Listener));
@@ -1691,7 +1777,8 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST(
SourceLocation ModuleNameLoc, bool IsInclusionDirective) {
// Search for a module with the given name.
HeaderSearch &HS = PP->getHeaderSearchInfo();
- Module *M = HS.lookupModule(ModuleName, true, !IsInclusionDirective);
+ Module *M =
+ HS.lookupModule(ModuleName, ImportLoc, true, !IsInclusionDirective);
// Select the source and filename for loading the named module.
std::string ModuleFilename;
@@ -1750,7 +1837,7 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST(
// A prebuilt module is indexed as a ModuleFile; the Module does not exist
// until the first call to ReadAST. Look it up now.
- M = HS.lookupModule(ModuleName, true, !IsInclusionDirective);
+ M = HS.lookupModule(ModuleName, ImportLoc, true, !IsInclusionDirective);
// Check whether M refers to the file in the prebuilt module path.
if (M && M->getASTFile())
@@ -1873,7 +1960,7 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
} else if (ModuleName == getLangOpts().CurrentModule) {
// This is the module we're building.
Module = PP->getHeaderSearchInfo().lookupModule(
- ModuleName, /*AllowSearch*/ true,
+ ModuleName, ImportLoc, /*AllowSearch*/ true,
/*AllowExtraModuleMapSearch*/ !IsInclusionDirective);
/// FIXME: perhaps we should (a) look for a module using the module name
// to file map (PrebuiltModuleFiles) and (b) diagnose if still not found?
@@ -1903,90 +1990,84 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
// Verify that the rest of the module path actually corresponds to
// a submodule.
bool MapPrivateSubModToTopLevel = false;
- if (Path.size() > 1) {
- for (unsigned I = 1, N = Path.size(); I != N; ++I) {
- StringRef Name = Path[I].first->getName();
- clang::Module *Sub = Module->findSubmodule(Name);
-
- // If the user is requesting Foo.Private and it doesn't exist, try to
- // match Foo_Private and emit a warning asking for the user to write
- // @import Foo_Private instead. FIXME: remove this when existing clients
- // migrate off of Foo.Private syntax.
- if (!Sub && PP->getLangOpts().ImplicitModules && Name == "Private" &&
- Module == Module->getTopLevelModule()) {
- SmallString<128> PrivateModule(Module->Name);
- PrivateModule.append("_Private");
-
- SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> PrivPath;
- auto &II = PP->getIdentifierTable().get(
- PrivateModule, PP->getIdentifierInfo(Module->Name)->getTokenID());
- PrivPath.push_back(std::make_pair(&II, Path[0].second));
-
- if (PP->getHeaderSearchInfo().lookupModule(PrivateModule, true,
- !IsInclusionDirective))
- Sub =
- loadModule(ImportLoc, PrivPath, Visibility, IsInclusionDirective);
- if (Sub) {
- MapPrivateSubModToTopLevel = true;
- if (!getDiagnostics().isIgnored(
- diag::warn_no_priv_submodule_use_toplevel, ImportLoc)) {
- getDiagnostics().Report(Path[I].second,
- diag::warn_no_priv_submodule_use_toplevel)
- << Path[I].first << Module->getFullModuleName() << PrivateModule
- << SourceRange(Path[0].second, Path[I].second)
- << FixItHint::CreateReplacement(SourceRange(Path[0].second),
- PrivateModule);
- getDiagnostics().Report(Sub->DefinitionLoc,
- diag::note_private_top_level_defined);
- }
+ for (unsigned I = 1, N = Path.size(); I != N; ++I) {
+ StringRef Name = Path[I].first->getName();
+ clang::Module *Sub = Module->findSubmodule(Name);
+
+ // If the user is requesting Foo.Private and it doesn't exist, try to
+ // match Foo_Private and emit a warning asking for the user to write
+ // @import Foo_Private instead. FIXME: remove this when existing clients
+ // migrate off of Foo.Private syntax.
+ if (!Sub && PP->getLangOpts().ImplicitModules && Name == "Private" &&
+ Module == Module->getTopLevelModule()) {
+ SmallString<128> PrivateModule(Module->Name);
+ PrivateModule.append("_Private");
+
+ SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> PrivPath;
+ auto &II = PP->getIdentifierTable().get(
+ PrivateModule, PP->getIdentifierInfo(Module->Name)->getTokenID());
+ PrivPath.push_back(std::make_pair(&II, Path[0].second));
+
+ if (PP->getHeaderSearchInfo().lookupModule(PrivateModule, ImportLoc, true,
+ !IsInclusionDirective))
+ Sub = loadModule(ImportLoc, PrivPath, Visibility, IsInclusionDirective);
+ if (Sub) {
+ MapPrivateSubModToTopLevel = true;
+ if (!getDiagnostics().isIgnored(
+ diag::warn_no_priv_submodule_use_toplevel, ImportLoc)) {
+ getDiagnostics().Report(Path[I].second,
+ diag::warn_no_priv_submodule_use_toplevel)
+ << Path[I].first << Module->getFullModuleName() << PrivateModule
+ << SourceRange(Path[0].second, Path[I].second)
+ << FixItHint::CreateReplacement(SourceRange(Path[0].second),
+ PrivateModule);
+ getDiagnostics().Report(Sub->DefinitionLoc,
+ diag::note_private_top_level_defined);
}
}
+ }
- if (!Sub) {
- // Attempt to perform typo correction to find a module name that works.
- SmallVector<StringRef, 2> Best;
- unsigned BestEditDistance = (std::numeric_limits<unsigned>::max)();
-
- for (clang::Module::submodule_iterator J = Module->submodule_begin(),
- JEnd = Module->submodule_end();
- J != JEnd; ++J) {
- unsigned ED = Name.edit_distance((*J)->Name,
- /*AllowReplacements=*/true,
- BestEditDistance);
- if (ED <= BestEditDistance) {
- if (ED < BestEditDistance) {
- Best.clear();
- BestEditDistance = ED;
- }
-
- Best.push_back((*J)->Name);
+ if (!Sub) {
+ // Attempt to perform typo correction to find a module name that works.
+ SmallVector<StringRef, 2> Best;
+ unsigned BestEditDistance = (std::numeric_limits<unsigned>::max)();
+
+ for (class Module *SubModule : Module->submodules()) {
+ unsigned ED =
+ Name.edit_distance(SubModule->Name,
+ /*AllowReplacements=*/true, BestEditDistance);
+ if (ED <= BestEditDistance) {
+ if (ED < BestEditDistance) {
+ Best.clear();
+ BestEditDistance = ED;
}
+
+ Best.push_back(SubModule->Name);
}
+ }
- // If there was a clear winner, user it.
- if (Best.size() == 1) {
- getDiagnostics().Report(Path[I].second,
- diag::err_no_submodule_suggest)
+ // If there was a clear winner, user it.
+ if (Best.size() == 1) {
+ getDiagnostics().Report(Path[I].second, diag::err_no_submodule_suggest)
<< Path[I].first << Module->getFullModuleName() << Best[0]
- << SourceRange(Path[0].second, Path[I-1].second)
+ << SourceRange(Path[0].second, Path[I - 1].second)
<< FixItHint::CreateReplacement(SourceRange(Path[I].second),
Best[0]);
- Sub = Module->findSubmodule(Best[0]);
- }
+ Sub = Module->findSubmodule(Best[0]);
}
+ }
- if (!Sub) {
- // No submodule by this name. Complain, and don't look for further
- // submodules.
- getDiagnostics().Report(Path[I].second, diag::err_no_submodule)
+ if (!Sub) {
+ // No submodule by this name. Complain, and don't look for further
+ // submodules.
+ getDiagnostics().Report(Path[I].second, diag::err_no_submodule)
<< Path[I].first << Module->getFullModuleName()
- << SourceRange(Path[0].second, Path[I-1].second);
- break;
- }
-
- Module = Sub;
+ << SourceRange(Path[0].second, Path[I - 1].second);
+ break;
}
+
+ Module = Sub;
}
// Make the named module visible, if it's not already part of the module
@@ -2071,8 +2152,7 @@ void CompilerInstance::createModuleFromSource(SourceLocation ImportLoc,
const FileEntry *ModuleMapFile = Other.getFileManager().getVirtualFile(
ModuleMapFileName, NullTerminatedSource.size(), 0);
Other.getSourceManager().overrideFileContents(
- ModuleMapFile,
- llvm::MemoryBuffer::getMemBuffer(NullTerminatedSource.c_str()));
+ ModuleMapFile, llvm::MemoryBuffer::getMemBuffer(NullTerminatedSource));
Other.BuiltModules = std::move(BuiltModules);
Other.DeleteBuiltModules = false;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index d545e9358f04..c104a6f40e20 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -78,6 +78,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -453,6 +454,8 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
CodeGenOpts.XRayAlwaysEmitTypedEvents = LangOpts.XRayAlwaysEmitTypedEvents;
CodeGenOpts.DisableFree = FrontendOpts.DisableFree;
FrontendOpts.GenerateGlobalModuleIndex = FrontendOpts.UseGlobalModuleIndex;
+ if (FrontendOpts.ShowStats)
+ CodeGenOpts.ClearASTBeforeBackend = false;
LangOpts.SanitizeCoverage = CodeGenOpts.hasSanitizeCoverage();
LangOpts.ForceEmitVTables = CodeGenOpts.ForceEmitVTables;
LangOpts.SpeculativeLoadHardening = CodeGenOpts.SpeculativeLoadHardening;
@@ -503,9 +506,10 @@ static bool FixupInvocation(CompilerInvocation &Invocation,
// -cl-strict-aliasing needs to emit diagnostic in the case where CL > 1.0.
// This option should be deprecated for CL > 1.0 because
// this option was added for compatibility with OpenCL 1.0.
- if (Args.getLastArg(OPT_cl_strict_aliasing) && LangOpts.OpenCLVersion > 100)
+ if (Args.getLastArg(OPT_cl_strict_aliasing) &&
+ (LangOpts.getOpenCLCompatibleVersion() > 100))
Diags.Report(diag::warn_option_invalid_ocl_version)
- << LangOpts.getOpenCLVersionTuple().getAsString()
+ << LangOpts.getOpenCLVersionString()
<< Args.getLastArg(OPT_cl_strict_aliasing)->getAsString(Args);
if (Arg *A = Args.getLastArg(OPT_fdefault_calling_conv_EQ)) {
@@ -608,9 +612,8 @@ using GenerateFn = llvm::function_ref<void(
CompilerInvocation::StringAllocator)>;
// May perform round-trip of command line arguments. By default, the round-trip
-// is enabled if CLANG_ROUND_TRIP_CC1_ARGS was defined during build. This can be
-// overwritten at run-time via the "-round-trip-args" and "-no-round-trip-args"
-// command line flags.
+// is enabled in assert builds. This can be overwritten at run-time via the
+// "-round-trip-args" and "-no-round-trip-args" command line flags.
// During round-trip, the command line arguments are parsed into a dummy
// instance of CompilerInvocation which is used to generate the command line
// arguments again. The real CompilerInvocation instance is then created by
@@ -620,8 +623,7 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
CompilerInvocation &DummyInvocation,
ArrayRef<const char *> CommandLineArgs,
DiagnosticsEngine &Diags, const char *Argv0) {
- // FIXME: Switch to '#ifndef NDEBUG' when possible.
-#ifdef CLANG_ROUND_TRIP_CC1_ARGS
+#ifndef NDEBUG
bool DoRoundTripDefault = true;
#else
bool DoRoundTripDefault = false;
@@ -995,7 +997,7 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args,
diag::err_analyzer_config_no_value) << configVal;
break;
}
- if (val.find('=') != StringRef::npos) {
+ if (val.contains('=')) {
Diags.Report(SourceLocation(),
diag::err_analyzer_config_multiple_values)
<< configVal;
@@ -1118,10 +1120,9 @@ static void parseAnalyzerConfigs(AnalyzerOptions &AnOpts,
for (const StringRef &CheckerOrPackage : CheckersAndPackages) {
if (Diags) {
bool IsChecker = CheckerOrPackage.contains('.');
- bool IsValidName =
- IsChecker
- ? llvm::find(Checkers, CheckerOrPackage) != Checkers.end()
- : llvm::find(Packages, CheckerOrPackage) != Packages.end();
+ bool IsValidName = IsChecker
+ ? llvm::is_contained(Checkers, CheckerOrPackage)
+ : llvm::is_contained(Packages, CheckerOrPackage);
if (!IsValidName)
Diags->Report(diag::err_unknown_analyzer_checker_or_package)
@@ -1172,8 +1173,9 @@ ParseOptimizationRemark(DiagnosticsEngine &Diags, ArgList &Args,
OptSpecifier OptEQ, StringRef Name) {
CodeGenOptions::OptRemark Result;
- auto InitializeResultPattern = [&Diags, &Args, &Result](const Arg *A) {
- Result.Pattern = A->getValue();
+ auto InitializeResultPattern = [&Diags, &Args, &Result](const Arg *A,
+ StringRef Pattern) {
+ Result.Pattern = Pattern.str();
std::string RegexError;
Result.Regex = std::make_shared<llvm::Regex>(Result.Pattern);
@@ -1198,19 +1200,23 @@ ParseOptimizationRemark(DiagnosticsEngine &Diags, ArgList &Args,
Result.Kind = CodeGenOptions::RK_Disabled;
else if (Value == "no-everything")
Result.Kind = CodeGenOptions::RK_DisabledEverything;
+ else
+ continue;
+
+ if (Result.Kind == CodeGenOptions::RK_Disabled ||
+ Result.Kind == CodeGenOptions::RK_DisabledEverything) {
+ Result.Pattern = "";
+ Result.Regex = nullptr;
+ } else {
+ InitializeResultPattern(A, ".*");
+ }
} else if (A->getOption().matches(OptEQ)) {
Result.Kind = CodeGenOptions::RK_WithPattern;
- if (!InitializeResultPattern(A))
+ if (!InitializeResultPattern(A, A->getValue()))
return CodeGenOptions::OptRemark();
}
}
- if (Result.Kind == CodeGenOptions::RK_Disabled ||
- Result.Kind == CodeGenOptions::RK_DisabledEverything) {
- Result.Pattern = "";
- Result.Regex = nullptr;
- }
-
return Result;
}
@@ -1406,6 +1412,13 @@ void CompilerInvocation::GenerateCodeGenArgs(
llvm::DICompileUnit::DebugNameTableKind::Default))
GenerateArg(Args, OPT_gpubnames, SA);
+ auto TNK = Opts.getDebugSimpleTemplateNames();
+ if (TNK != codegenoptions::DebugTemplateNamesKind::Full) {
+ if (TNK == codegenoptions::DebugTemplateNamesKind::Simple)
+ GenerateArg(Args, OPT_gsimple_template_names_EQ, "simple", SA);
+ else if (TNK == codegenoptions::DebugTemplateNamesKind::Mangled)
+ GenerateArg(Args, OPT_gsimple_template_names_EQ, "mangled", SA);
+ }
// ProfileInstrumentUsePath is marshalled automatically, no need to generate
// it or PGOUseInstrumentor.
@@ -1417,7 +1430,7 @@ void CompilerInvocation::GenerateCodeGenArgs(
}
if (Opts.PrepareForLTO && !Opts.PrepareForThinLTO)
- GenerateArg(Args, OPT_flto, SA);
+ GenerateArg(Args, OPT_flto_EQ, "full", SA);
if (Opts.PrepareForThinLTO)
GenerateArg(Args, OPT_flto_EQ, "thin", SA);
@@ -1680,6 +1693,16 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
: Args.hasArg(OPT_gpubnames)
? llvm::DICompileUnit::DebugNameTableKind::Default
: llvm::DICompileUnit::DebugNameTableKind::None);
+ if (const Arg *A = Args.getLastArg(OPT_gsimple_template_names_EQ)) {
+ StringRef Value = A->getValue();
+ if (Value != "simple" && Value != "mangled")
+ Diags.Report(diag::err_drv_unsupported_option_argument)
+ << A->getOption().getName() << A->getValue();
+ Opts.setDebugSimpleTemplateNames(
+ StringRef(A->getValue()) == "simple"
+ ? codegenoptions::DebugTemplateNamesKind::Simple
+ : codegenoptions::DebugTemplateNamesKind::Mangled);
+ }
if (!Opts.ProfileInstrumentUsePath.empty())
setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath);
@@ -1704,9 +1727,10 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
}
}
- Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
+ Opts.PrepareForLTO = false;
Opts.PrepareForThinLTO = false;
if (Arg *A = Args.getLastArg(OPT_flto_EQ)) {
+ Opts.PrepareForLTO = true;
StringRef S = A->getValue();
if (S == "thin")
Opts.PrepareForThinLTO = true;
@@ -2035,13 +2059,13 @@ static bool ParseDependencyOutputArgs(DependencyOutputOptions &Opts,
if (!Args.hasArg(OPT_fno_sanitize_ignorelist)) {
for (const auto *A : Args.filtered(OPT_fsanitize_ignorelist_EQ)) {
StringRef Val = A->getValue();
- if (Val.find('=') == StringRef::npos)
+ if (!Val.contains('='))
Opts.ExtraDeps.emplace_back(std::string(Val), EDK_SanitizeIgnorelist);
}
if (Opts.IncludeSystemHeaders) {
for (const auto *A : Args.filtered(OPT_fsanitize_system_ignorelist_EQ)) {
StringRef Val = A->getValue();
- if (Val.find('=') == StringRef::npos)
+ if (!Val.contains('='))
Opts.ExtraDeps.emplace_back(std::string(Val), EDK_SanitizeIgnorelist);
}
}
@@ -2058,7 +2082,7 @@ static bool ParseDependencyOutputArgs(DependencyOutputOptions &Opts,
// Only the -fmodule-file=<file> form.
for (const auto *A : Args.filtered(OPT_fmodule_file)) {
StringRef Val = A->getValue();
- if (Val.find('=') == StringRef::npos)
+ if (!Val.contains('='))
Opts.ExtraDeps.emplace_back(std::string(Val), EDK_ModuleFile);
}
@@ -2255,6 +2279,19 @@ void CompilerInvocation::GenerateDiagnosticArgs(
}
}
+std::unique_ptr<DiagnosticOptions>
+clang::CreateAndPopulateDiagOpts(ArrayRef<const char *> Argv) {
+ auto DiagOpts = std::make_unique<DiagnosticOptions>();
+ unsigned MissingArgIndex, MissingArgCount;
+ InputArgList Args = getDriverOptTable().ParseArgs(
+ Argv.slice(1), MissingArgIndex, MissingArgCount);
+ // We ignore MissingArgCount and the return value of ParseDiagnosticArgs.
+ // Any errors that would be diagnosed here will also be diagnosed later,
+ // when the DiagnosticsEngine actually exists.
+ (void)ParseDiagnosticArgs(*DiagOpts, Args);
+ return DiagOpts;
+}
+
bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
DiagnosticsEngine *Diags,
bool DefaultDiagColor) {
@@ -2689,7 +2726,7 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
// Only the -fmodule-file=<file> form.
for (const auto *A : Args.filtered(OPT_fmodule_file)) {
StringRef Val = A->getValue();
- if (Val.find('=') == StringRef::npos)
+ if (!Val.contains('='))
Opts.ModuleFiles.push_back(std::string(Val));
}
@@ -2837,7 +2874,7 @@ static void GenerateHeaderSearchArgs(HeaderSearchOptions &Opts,
llvm::ArrayRef<frontend::IncludeDirGroup> Groups,
llvm::Optional<bool> IsFramework,
llvm::Optional<bool> IgnoreSysRoot) {
- return llvm::find(Groups, Entry.Group) != Groups.end() &&
+ return llvm::is_contained(Groups, Entry.Group) &&
(!IsFramework || (Entry.IsFramework == *IsFramework)) &&
(!IgnoreSysRoot || (Entry.IgnoreSysRoot == *IgnoreSysRoot));
};
@@ -2964,7 +3001,7 @@ static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args,
// Only the -fmodule-file=<name>=<file> form.
for (const auto *A : Args.filtered(OPT_fmodule_file)) {
StringRef Val = A->getValue();
- if (Val.find('=') != StringRef::npos){
+ if (Val.contains('=')) {
auto Split = Val.split('=');
Opts.PrebuiltModuleFiles.insert(
{std::string(Split.first), std::string(Split.second)});
@@ -3091,7 +3128,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
LangStd = LangStandard::lang_opencl12;
break;
case Language::OpenCLCXX:
- LangStd = LangStandard::lang_openclcpp;
+ LangStd = LangStandard::lang_openclcpp10;
break;
case Language::CUDA:
LangStd = LangStandard::lang_cuda;
@@ -3150,8 +3187,6 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
Opts.HexFloats = Std.hasHexFloats();
Opts.ImplicitInt = Std.hasImplicitInt();
- Opts.CPlusPlusModules = Opts.CPlusPlus20;
-
// Set OpenCL Version.
Opts.OpenCL = Std.isOpenCL();
if (LangStd == LangStandard::lang_opencl10)
@@ -3164,8 +3199,10 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
Opts.OpenCLVersion = 200;
else if (LangStd == LangStandard::lang_opencl30)
Opts.OpenCLVersion = 300;
- else if (LangStd == LangStandard::lang_openclcpp)
+ else if (LangStd == LangStandard::lang_openclcpp10)
Opts.OpenCLCPlusPlusVersion = 100;
+ else if (LangStd == LangStandard::lang_openclcpp2021)
+ Opts.OpenCLCPlusPlusVersion = 202100;
// OpenCL has some additional defaults.
if (Opts.OpenCL) {
@@ -3173,9 +3210,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
Opts.ZVector = 0;
Opts.setDefaultFPContractMode(LangOptions::FPM_On);
Opts.OpenCLCPlusPlus = Opts.CPlusPlus;
- Opts.OpenCLPipe = Opts.OpenCLCPlusPlus || Opts.OpenCLVersion == 200;
- Opts.OpenCLGenericAddressSpace =
- Opts.OpenCLCPlusPlus || Opts.OpenCLVersion == 200;
+ Opts.OpenCLPipes = Opts.getOpenCLCompatibleVersion() == 200;
+ Opts.OpenCLGenericAddressSpace = Opts.getOpenCLCompatibleVersion() == 200;
// Include default header file for OpenCL.
if (Opts.IncludeDefaultHeader) {
@@ -3314,7 +3350,8 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
case LangStandard::lang_opencl12:
case LangStandard::lang_opencl20:
case LangStandard::lang_opencl30:
- case LangStandard::lang_openclcpp:
+ case LangStandard::lang_openclcpp10:
+ case LangStandard::lang_openclcpp2021:
StdOpt = OPT_cl_std_EQ;
break;
default:
@@ -3445,6 +3482,19 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA);
}
+ if (Opts.OpenMPTargetNewRuntime)
+ GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA);
+
+ if (Opts.OpenMPThreadSubscription)
+ GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA);
+
+ if (Opts.OpenMPTeamSubscription)
+ GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA);
+
+ if (Opts.OpenMPTargetDebug != 0)
+ GenerateArg(Args, OPT_fopenmp_target_debug_EQ,
+ Twine(Opts.OpenMPTargetDebug), SA);
+
if (Opts.OpenMPCUDANumSMs != 0)
GenerateArg(Args, OPT_fopenmp_cuda_number_of_sm_EQ,
Twine(Opts.OpenMPCUDANumSMs), SA);
@@ -3528,6 +3578,9 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA);
else
GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA);
+
+ for (const auto &MP : Opts.MacroPrefixMap)
+ GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA);
}
bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
@@ -3608,7 +3661,9 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
.Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12)
.Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20)
.Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30)
- .Cases("clc++", "CLC++", LangStandard::lang_openclcpp)
+ .Cases("clc++", "CLC++", LangStandard::lang_openclcpp10)
+ .Cases("clc++1.0", "CLC++1.0", LangStandard::lang_openclcpp10)
+ .Cases("clc++2021", "CLC++2021", LangStandard::lang_openclcpp2021)
.Default(LangStandard::lang_unspecified);
if (OpenCLLangStd == LangStandard::lang_unspecified) {
@@ -3737,8 +3792,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
// '-mignore-xcoff-visibility' is implied. The generated command line will
// contain both '-fvisibility default' and '-mignore-xcoff-visibility' and
// subsequent calls to `CreateFromArgs`/`generateCC1CommandLine` will always
- // produce the same arguments.
-
+ // produce the same arguments.
+
if (T.isOSAIX() && (Args.hasArg(OPT_mignore_xcoff_visibility) ||
!Args.hasArg(OPT_fvisibility)))
Opts.IgnoreXCOFFVisibility = 1;
@@ -3818,6 +3873,9 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder);
bool IsTargetSpecified =
Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
+ Opts.OpenMPTargetNewRuntime =
+ Opts.OpenMPIsDevice &&
+ Args.hasArg(options::OPT_fopenmp_target_new_runtime);
Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice;
@@ -3845,6 +3903,7 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
// handling code for those requiring so.
if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) ||
Opts.OpenCLCPlusPlus) {
+
Opts.Exceptions = 0;
Opts.CXXExceptions = 0;
}
@@ -3860,6 +3919,27 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Opts.OpenMPCUDAReductionBufNum, Diags);
}
+ // Set the value of the debugging flag used in the new offloading device RTL.
+ // Set either by a specific value or to a default if not specified.
+ if (Opts.OpenMPIsDevice && (Args.hasArg(OPT_fopenmp_target_debug) ||
+ Args.hasArg(OPT_fopenmp_target_debug_EQ))) {
+ if (Opts.OpenMPTargetNewRuntime) {
+ Opts.OpenMPTargetDebug = getLastArgIntValue(
+ Args, OPT_fopenmp_target_debug_EQ, Opts.OpenMPTargetDebug, Diags);
+ if (!Opts.OpenMPTargetDebug && Args.hasArg(OPT_fopenmp_target_debug))
+ Opts.OpenMPTargetDebug = 1;
+ } else {
+ Diags.Report(diag::err_drv_debug_no_new_runtime);
+ }
+ }
+
+ if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) {
+ if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription))
+ Opts.OpenMPTeamSubscription = true;
+ if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription))
+ Opts.OpenMPThreadSubscription = true;
+ }
+
// Get the OpenMP target triples if any.
if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) {
enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit };
@@ -4037,6 +4117,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
options::OPT_fno_experimental_relative_cxx_abi_vtables,
TargetCXXABI::usesRelativeVTables(T));
+ for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) {
+ auto Split = StringRef(A).split('=');
+ Opts.MacroPrefixMap.insert(
+ {std::string(Split.first), std::string(Split.second)});
+ }
+
return Diags.getNumErrors() == NumErrorsBefore;
}
@@ -4109,9 +4195,6 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts,
for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn)
GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA);
- for (const auto &MP : Opts.MacroPrefixMap)
- GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA);
-
if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false))
GenerateArg(Args, OPT_preamble_bytes_EQ,
Twine(Opts.PrecompiledPreambleBytes.first) + "," +
@@ -4180,12 +4263,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl))
Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue());
- for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) {
- auto Split = StringRef(A).split('=');
- Opts.MacroPrefixMap.insert(
- {std::string(Split.first), std::string(Split.second)});
- }
-
if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) {
StringRef Value(A->getValue());
size_t Comma = Value.find(',');
@@ -4460,116 +4537,99 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Invocation,
}
std::string CompilerInvocation::getModuleHash() const {
+ // FIXME: Consider using SHA1 instead of MD5.
+ llvm::HashBuilder<llvm::MD5, llvm::support::endianness::native> HBuilder;
+
// Note: For QoI reasons, the things we use as a hash here should all be
// dumped via the -module-info flag.
- using llvm::hash_code;
- using llvm::hash_value;
- using llvm::hash_combine;
- using llvm::hash_combine_range;
// Start the signature with the compiler version.
- // FIXME: We'd rather use something more cryptographically sound than
- // CityHash, but this will do for now.
- hash_code code = hash_value(getClangFullRepositoryVersion());
+ HBuilder.add(getClangFullRepositoryVersion());
// Also include the serialization version, in case LLVM_APPEND_VC_REV is off
// and getClangFullRepositoryVersion() doesn't include git revision.
- code = hash_combine(code, serialization::VERSION_MAJOR,
- serialization::VERSION_MINOR);
+ HBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
// Extend the signature with the language options
-#define LANGOPT(Name, Bits, Default, Description) \
- code = hash_combine(code, LangOpts->Name);
-#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
- code = hash_combine(code, static_cast<unsigned>(LangOpts->get##Name()));
+#define LANGOPT(Name, Bits, Default, Description) HBuilder.add(LangOpts->Name);
+#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
+ HBuilder.add(static_cast<unsigned>(LangOpts->get##Name()));
#define BENIGN_LANGOPT(Name, Bits, Default, Description)
#define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
#include "clang/Basic/LangOptions.def"
- for (StringRef Feature : LangOpts->ModuleFeatures)
- code = hash_combine(code, Feature);
+ HBuilder.addRange(LangOpts->ModuleFeatures);
- code = hash_combine(code, LangOpts->ObjCRuntime);
- const auto &BCN = LangOpts->CommentOpts.BlockCommandNames;
- code = hash_combine(code, hash_combine_range(BCN.begin(), BCN.end()));
+ HBuilder.add(LangOpts->ObjCRuntime);
+ HBuilder.addRange(LangOpts->CommentOpts.BlockCommandNames);
// Extend the signature with the target options.
- code = hash_combine(code, TargetOpts->Triple, TargetOpts->CPU,
- TargetOpts->TuneCPU, TargetOpts->ABI);
- for (const auto &FeatureAsWritten : TargetOpts->FeaturesAsWritten)
- code = hash_combine(code, FeatureAsWritten);
+ HBuilder.add(TargetOpts->Triple, TargetOpts->CPU, TargetOpts->TuneCPU,
+ TargetOpts->ABI);
+ HBuilder.addRange(TargetOpts->FeaturesAsWritten);
// Extend the signature with preprocessor options.
const PreprocessorOptions &ppOpts = getPreprocessorOpts();
- const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
- code = hash_combine(code, ppOpts.UsePredefines, ppOpts.DetailedRecord);
+ HBuilder.add(ppOpts.UsePredefines, ppOpts.DetailedRecord);
- for (const auto &I : getPreprocessorOpts().Macros) {
+ const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
+ for (const auto &Macro : getPreprocessorOpts().Macros) {
// If we're supposed to ignore this macro for the purposes of modules,
// don't put it into the hash.
if (!hsOpts.ModulesIgnoreMacros.empty()) {
// Check whether we're ignoring this macro.
- StringRef MacroDef = I.first;
+ StringRef MacroDef = Macro.first;
if (hsOpts.ModulesIgnoreMacros.count(
llvm::CachedHashString(MacroDef.split('=').first)))
continue;
}
- code = hash_combine(code, I.first, I.second);
+ HBuilder.add(Macro);
}
// Extend the signature with the sysroot and other header search options.
- code = hash_combine(code, hsOpts.Sysroot,
- hsOpts.ModuleFormat,
- hsOpts.UseDebugInfo,
- hsOpts.UseBuiltinIncludes,
- hsOpts.UseStandardSystemIncludes,
- hsOpts.UseStandardCXXIncludes,
- hsOpts.UseLibcxx,
- hsOpts.ModulesValidateDiagnosticOptions);
- code = hash_combine(code, hsOpts.ResourceDir);
+ HBuilder.add(hsOpts.Sysroot, hsOpts.ModuleFormat, hsOpts.UseDebugInfo,
+ hsOpts.UseBuiltinIncludes, hsOpts.UseStandardSystemIncludes,
+ hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx,
+ hsOpts.ModulesValidateDiagnosticOptions);
+ HBuilder.add(hsOpts.ResourceDir);
if (hsOpts.ModulesStrictContextHash) {
- hash_code SHPC = hash_combine_range(hsOpts.SystemHeaderPrefixes.begin(),
- hsOpts.SystemHeaderPrefixes.end());
- hash_code UEC = hash_combine_range(hsOpts.UserEntries.begin(),
- hsOpts.UserEntries.end());
- code = hash_combine(code, hsOpts.SystemHeaderPrefixes.size(), SHPC,
- hsOpts.UserEntries.size(), UEC);
+ HBuilder.addRange(hsOpts.SystemHeaderPrefixes);
+ HBuilder.addRange(hsOpts.UserEntries);
const DiagnosticOptions &diagOpts = getDiagnosticOpts();
- #define DIAGOPT(Name, Bits, Default) \
- code = hash_combine(code, diagOpts.Name);
- #define ENUM_DIAGOPT(Name, Type, Bits, Default) \
- code = hash_combine(code, diagOpts.get##Name());
- #include "clang/Basic/DiagnosticOptions.def"
- #undef DIAGOPT
- #undef ENUM_DIAGOPT
+#define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name);
+#define ENUM_DIAGOPT(Name, Type, Bits, Default) \
+ HBuilder.add(diagOpts.get##Name());
+#include "clang/Basic/DiagnosticOptions.def"
+#undef DIAGOPT
+#undef ENUM_DIAGOPT
}
// Extend the signature with the user build path.
- code = hash_combine(code, hsOpts.ModuleUserBuildPath);
+ HBuilder.add(hsOpts.ModuleUserBuildPath);
// Extend the signature with the module file extensions.
- const FrontendOptions &frontendOpts = getFrontendOpts();
- for (const auto &ext : frontendOpts.ModuleFileExtensions) {
- code = ext->hashExtension(code);
- }
+ for (const auto &ext : getFrontendOpts().ModuleFileExtensions)
+ ext->hashExtension(HBuilder);
// When compiling with -gmodules, also hash -fdebug-prefix-map as it
// affects the debug info in the PCM.
if (getCodeGenOpts().DebugTypeExtRefs)
- for (const auto &KeyValue : getCodeGenOpts().DebugPrefixMap)
- code = hash_combine(code, KeyValue.first, KeyValue.second);
+ HBuilder.addRange(getCodeGenOpts().DebugPrefixMap);
// Extend the signature with the enabled sanitizers, if at least one is
// enabled. Sanitizers which cannot affect AST generation aren't hashed.
SanitizerSet SanHash = LangOpts->Sanitize;
SanHash.clear(getPPTransparentSanitizers());
if (!SanHash.empty())
- code = hash_combine(code, SanHash.Mask);
+ HBuilder.add(SanHash.Mask);
- return toString(llvm::APInt(64, code), 36, /*Signed=*/false);
+ llvm::MD5::MD5Result Result;
+ HBuilder.getHasher().final(Result);
+ uint64_t Hash = Result.high() ^ Result.low();
+ return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false);
}
void CompilerInvocation::generateCC1CommandLine(
diff --git a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
index 2e23ebfdf160..c5627d13a7a7 100644
--- a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
+++ b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp
@@ -30,6 +30,7 @@ std::unique_ptr<CompilerInvocation> clang::createInvocationFromCommandLine(
ArrayRef<const char *> ArgList, IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, bool ShouldRecoverOnErorrs,
std::vector<std::string> *CC1Args) {
+ assert(!ArgList.empty());
if (!Diags.get()) {
// No diagnostics engine was provided, so create our own diagnostics object
// with the default options.
@@ -79,22 +80,24 @@ std::unique_ptr<CompilerInvocation> clang::createInvocationFromCommandLine(
}
}
}
- if (Jobs.size() == 0 || !isa<driver::Command>(*Jobs.begin()) ||
- (Jobs.size() > 1 && !OffloadCompilation)) {
+
+ bool PickFirstOfMany = OffloadCompilation || ShouldRecoverOnErorrs;
+ if (Jobs.size() == 0 || (Jobs.size() > 1 && !PickFirstOfMany)) {
SmallString<256> Msg;
llvm::raw_svector_ostream OS(Msg);
Jobs.Print(OS, "; ", true);
Diags->Report(diag::err_fe_expected_compiler_job) << OS.str();
return nullptr;
}
-
- const driver::Command &Cmd = cast<driver::Command>(*Jobs.begin());
- if (StringRef(Cmd.getCreator().getName()) != "clang") {
+ auto Cmd = llvm::find_if(Jobs, [](const driver::Command &Cmd) {
+ return StringRef(Cmd.getCreator().getName()) == "clang";
+ });
+ if (Cmd == Jobs.end()) {
Diags->Report(diag::err_fe_expected_clang_command);
return nullptr;
}
- const ArgStringList &CCArgs = Cmd.getArguments();
+ const ArgStringList &CCArgs = Cmd->getArguments();
if (CC1Args)
*CC1Args = {CCArgs.begin(), CCArgs.end()};
auto CI = std::make_unique<CompilerInvocation>();
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index c996c9c486bc..089f40b36089 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -27,6 +27,7 @@
#include "clang/Serialization/ASTDeserializationListener.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/GlobalModuleIndex.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -143,7 +144,7 @@ void FrontendAction::setCurrentInput(const FrontendInputFile &CurrentInput,
Module *FrontendAction::getCurrentModule() const {
CompilerInstance &CI = getCompilerInstance();
return CI.getPreprocessor().getHeaderSearchInfo().lookupModule(
- CI.getLangOpts().CurrentModule, /*AllowSearch*/false);
+ CI.getLangOpts().CurrentModule, SourceLocation(), /*AllowSearch*/false);
}
std::unique_ptr<ASTConsumer>
@@ -186,14 +187,19 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
FrontendPluginRegistry::entries()) {
std::unique_ptr<PluginASTAction> P = Plugin.instantiate();
PluginASTAction::ActionType ActionType = P->getActionType();
- if (ActionType == PluginASTAction::Cmdline) {
+ if (ActionType == PluginASTAction::CmdlineAfterMainAction ||
+ ActionType == PluginASTAction::CmdlineBeforeMainAction) {
// This is O(|plugins| * |add_plugins|), but since both numbers are
// way below 50 in practice, that's ok.
if (llvm::any_of(CI.getFrontendOpts().AddPluginActions,
[&](const std::string &PluginAction) {
return PluginAction == Plugin.getName();
- }))
- ActionType = PluginASTAction::AddAfterMainAction;
+ })) {
+ if (ActionType == PluginASTAction::CmdlineBeforeMainAction)
+ ActionType = PluginASTAction::AddBeforeMainAction;
+ else
+ ActionType = PluginASTAction::AddAfterMainAction;
+ }
}
if ((ActionType == PluginASTAction::AddBeforeMainAction ||
ActionType == PluginASTAction::AddAfterMainAction) &&
@@ -211,8 +217,13 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
// Add to Consumers the main consumer, then all the plugins that go after it
Consumers.push_back(std::move(Consumer));
- for (auto &C : AfterConsumers) {
- Consumers.push_back(std::move(C));
+ if (!AfterConsumers.empty()) {
+ // If we have plugins after the main consumer, which may be the codegen
+ // action, they likely will need the ASTContext, so don't clear it in the
+ // codegen action.
+ CI.getCodeGenOpts().ClearASTBeforeBackend = false;
+ for (auto &C : AfterConsumers)
+ Consumers.push_back(std::move(C));
}
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
@@ -471,7 +482,7 @@ static Module *prepareToBuildModule(CompilerInstance &CI,
// Dig out the module definition.
HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo();
- Module *M = HS.lookupModule(CI.getLangOpts().CurrentModule,
+ Module *M = HS.lookupModule(CI.getLangOpts().CurrentModule, SourceLocation(),
/*AllowSearch=*/true);
if (!M) {
CI.getDiagnostics().Report(diag::err_missing_module)
@@ -558,8 +569,20 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
bool HasBegunSourceFile = false;
bool ReplayASTFile = Input.getKind().getFormat() == InputKind::Precompiled &&
usesPreprocessorOnly();
+
+ // If we fail, reset state since the client will not end up calling the
+ // matching EndSourceFile(). All paths that return true should release this.
+ auto FailureCleanup = llvm::make_scope_exit([&]() {
+ if (HasBegunSourceFile)
+ CI.getDiagnosticClient().EndSourceFile();
+ CI.clearOutputFiles(/*EraseFiles=*/true);
+ CI.getLangOpts().setCompilingModule(LangOptions::CMK_None);
+ setCurrentInput(FrontendInputFile());
+ setCompilerInstance(nullptr);
+ });
+
if (!BeginInvocation(CI))
- goto failure;
+ return false;
// If we're replaying the build of an AST file, import it and set up
// the initial state from its build.
@@ -580,7 +603,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
ASTUnit::LoadPreprocessorOnly, ASTDiags, CI.getFileSystemOpts(),
CI.getCodeGenOpts().DebugTypeExtRefs);
if (!AST)
- goto failure;
+ return false;
// Options relating to how we treat the input (but not what we do with it)
// are inherited from the AST unit.
@@ -617,7 +640,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
if (Kind.getFormat() == InputKind::ModuleMap) {
Module *ASTModule =
AST->getPreprocessor().getHeaderSearchInfo().lookupModule(
- AST->getLangOpts().CurrentModule, /*AllowSearch*/ false);
+ AST->getLangOpts().CurrentModule, SourceLocation(),
+ /*AllowSearch*/ false);
assert(ASTModule && "module file does not define its own module");
Input = FrontendInputFile(ASTModule->PresumedModuleMapFile, Kind);
} else {
@@ -649,7 +673,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
CI.getCodeGenOpts().DebugTypeExtRefs);
if (!AST)
- goto failure;
+ return false;
// Inform the diagnostic client we are processing a source file.
CI.getDiagnosticClient().BeginSourceFile(CI.getLangOpts(), nullptr);
@@ -669,20 +693,21 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Initialize the action.
if (!BeginSourceFileAction(CI))
- goto failure;
+ return false;
// Create the AST consumer.
CI.setASTConsumer(CreateWrappedASTConsumer(CI, InputFile));
if (!CI.hasASTConsumer())
- goto failure;
+ return false;
+ FailureCleanup.release();
return true;
}
// Set up the file and source managers, if needed.
if (!CI.hasFileManager()) {
if (!CI.createFileManager()) {
- goto failure;
+ return false;
}
}
if (!CI.hasSourceManager())
@@ -710,12 +735,13 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Initialize the action.
if (!BeginSourceFileAction(CI))
- goto failure;
+ return false;
// Initialize the main file entry.
if (!CI.InitializeSourceManager(CurrentInput))
- goto failure;
+ return false;
+ FailureCleanup.release();
return true;
}
@@ -748,7 +774,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
if (!Found) {
CI.getDiagnostics().Report(diag::err_fe_no_pch_in_dir) << PCHInclude;
- goto failure;
+ return false;
}
}
}
@@ -765,7 +791,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Initialize the main file entry.
if (!CI.InitializeSourceManager(Input))
- goto failure;
+ return false;
// For module map files, we first parse the module map and synthesize a
// "<module-includes>" buffer before more conventional processing.
@@ -777,11 +803,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
if (loadModuleMapForModuleBuild(CI, Input.isSystem(),
Input.isPreprocessed(),
PresumedModuleMapFile, OffsetToContents))
- goto failure;
+ return false;
auto *CurrentModule = prepareToBuildModule(CI, Input.getFile());
if (!CurrentModule)
- goto failure;
+ return false;
CurrentModule->PresumedModuleMapFile = PresumedModuleMapFile;
@@ -792,7 +818,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Otherwise, convert the module description to a suitable input buffer.
auto Buffer = getInputBufferForModule(CI, CurrentModule);
if (!Buffer)
- goto failure;
+ return false;
// Reinitialize the main file entry to refer to the new input.
auto Kind = CurrentModule->IsSystem ? SrcMgr::C_System : SrcMgr::C_User;
@@ -805,7 +831,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// Initialize the action.
if (!BeginSourceFileAction(CI))
- goto failure;
+ return false;
// If we were asked to load any module map files, do so now.
for (const auto &Filename : CI.getFrontendOpts().ModuleMapFiles) {
@@ -839,7 +865,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
std::unique_ptr<ASTConsumer> Consumer =
CreateWrappedASTConsumer(CI, PresumedInputFile);
if (!Consumer)
- goto failure;
+ return false;
// FIXME: should not overwrite ASTMutationListener when parsing model files?
if (!isModelParsingAction())
@@ -850,7 +876,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
IntrusiveRefCntPtr<ExternalSemaSource> source, FinalReader;
source = createChainedIncludesSource(CI, FinalReader);
if (!source)
- goto failure;
+ return false;
CI.setASTReader(static_cast<ASTReader *>(FinalReader.get()));
CI.getASTContext().setExternalSource(source);
} else if (CI.getLangOpts().Modules ||
@@ -879,7 +905,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
CI.getPreprocessorOpts().AllowPCHWithCompilerErrors,
DeserialListener, DeleteDeserialListener);
if (!CI.getASTContext().getExternalSource())
- goto failure;
+ return false;
}
// If modules are enabled, create the AST reader before creating
// any builtins, so that all declarations know that they might be
@@ -894,7 +920,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
CI.setASTConsumer(std::move(Consumer));
if (!CI.hasASTConsumer())
- goto failure;
+ return false;
}
// Initialize built-in info as long as we aren't using an external AST
@@ -915,7 +941,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
// If we were asked to load any module files, do so now.
for (const auto &ModuleFile : CI.getFrontendOpts().ModuleFiles)
if (!CI.loadModuleFile(ModuleFile))
- goto failure;
+ return false;
// If there is a layout overrides file, attach an external AST source that
// provides the layouts from that file.
@@ -927,18 +953,8 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
CI.getASTContext().setExternalSource(Override);
}
+ FailureCleanup.release();
return true;
-
- // If we failed, reset state since the client will not end up calling the
- // matching EndSourceFile().
-failure:
- if (HasBegunSourceFile)
- CI.getDiagnosticClient().EndSourceFile();
- CI.clearOutputFiles(/*EraseFiles=*/true);
- CI.getLangOpts().setCompilingModule(LangOptions::CMK_None);
- setCurrentInput(FrontendInputFile());
- setCompilerInstance(nullptr);
- return false;
}
llvm::Error FrontendAction::Execute() {
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index c6ebbdc8c04e..b5544afa9f24 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -993,3 +993,17 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
}
llvm::outs() << Output;
}
+
+void GetDependenciesByModuleNameAction::ExecuteAction() {
+ CompilerInstance &CI = getCompilerInstance();
+ Preprocessor &PP = CI.getPreprocessor();
+ SourceManager &SM = PP.getSourceManager();
+ FileID MainFileID = SM.getMainFileID();
+ SourceLocation FileStart = SM.getLocForStartOfFile(MainFileID);
+ SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
+ IdentifierInfo *ModuleID = PP.getIdentifierInfo(ModuleName);
+ Path.push_back(std::make_pair(ModuleID, FileStart));
+ auto ModResult = CI.loadModule(FileStart, Path, Module::Hidden, false);
+ PPCallbacks *CB = PP.getPPCallbacks();
+ CB->moduleImport(SourceLocation(), Path, ModResult);
+}
diff --git a/clang/lib/Frontend/HeaderIncludeGen.cpp b/clang/lib/Frontend/HeaderIncludeGen.cpp
index 1ee47d8d2480..5db8792bf420 100644
--- a/clang/lib/Frontend/HeaderIncludeGen.cpp
+++ b/clang/lib/Frontend/HeaderIncludeGen.cpp
@@ -119,7 +119,7 @@ void clang::AttachHeaderIncludeGen(Preprocessor &PP,
// Print header info for extra headers, pretending they were discovered by
// the regular preprocessor. The primary use case is to support proper
// generation of Make / Ninja file dependencies for implicit includes, such
- // as sanitizer blacklists. It's only important for cl.exe compatibility,
+ // as sanitizer ignorelists. It's only important for cl.exe compatibility,
// the GNU way to generate rules is -M / -MM / -MD / -MMD.
for (const auto &Header : DepOpts.ExtraDeps)
PrintHeaderInfo(OutputFile, Header.first, ShowDepth, 2, MSStyle);
diff --git a/clang/lib/Frontend/InitHeaderSearch.cpp b/clang/lib/Frontend/InitHeaderSearch.cpp
index ba9f96384f81..ed1314f3b03d 100644
--- a/clang/lib/Frontend/InitHeaderSearch.cpp
+++ b/clang/lib/Frontend/InitHeaderSearch.cpp
@@ -36,9 +36,11 @@ namespace {
struct DirectoryLookupInfo {
IncludeDirGroup Group;
DirectoryLookup Lookup;
+ Optional<unsigned> UserEntryIdx;
- DirectoryLookupInfo(IncludeDirGroup Group, DirectoryLookup Lookup)
- : Group(Group), Lookup(Lookup) {}
+ DirectoryLookupInfo(IncludeDirGroup Group, DirectoryLookup Lookup,
+ Optional<unsigned> UserEntryIdx)
+ : Group(Group), Lookup(Lookup), UserEntryIdx(UserEntryIdx) {}
};
/// InitHeaderSearch - This class makes it easier to set the search paths of
@@ -60,13 +62,15 @@ public:
/// AddPath - Add the specified path to the specified group list, prefixing
/// the sysroot if used.
/// Returns true if the path exists, false if it was ignored.
- bool AddPath(const Twine &Path, IncludeDirGroup Group, bool isFramework);
+ bool AddPath(const Twine &Path, IncludeDirGroup Group, bool isFramework,
+ Optional<unsigned> UserEntryIdx = None);
/// AddUnmappedPath - Add the specified path to the specified group list,
/// without performing any sysroot remapping.
/// Returns true if the path exists, false if it was ignored.
bool AddUnmappedPath(const Twine &Path, IncludeDirGroup Group,
- bool isFramework);
+ bool isFramework,
+ Optional<unsigned> UserEntryIdx = None);
/// AddSystemHeaderPrefix - Add the specified prefix to the system header
/// prefix list.
@@ -119,22 +123,25 @@ static bool CanPrefixSysroot(StringRef Path) {
}
bool InitHeaderSearch::AddPath(const Twine &Path, IncludeDirGroup Group,
- bool isFramework) {
+ bool isFramework,
+ Optional<unsigned> UserEntryIdx) {
// Add the path with sysroot prepended, if desired and this is a system header
// group.
if (HasSysroot) {
SmallString<256> MappedPathStorage;
StringRef MappedPathStr = Path.toStringRef(MappedPathStorage);
if (CanPrefixSysroot(MappedPathStr)) {
- return AddUnmappedPath(IncludeSysroot + Path, Group, isFramework);
+ return AddUnmappedPath(IncludeSysroot + Path, Group, isFramework,
+ UserEntryIdx);
}
}
- return AddUnmappedPath(Path, Group, isFramework);
+ return AddUnmappedPath(Path, Group, isFramework, UserEntryIdx);
}
bool InitHeaderSearch::AddUnmappedPath(const Twine &Path, IncludeDirGroup Group,
- bool isFramework) {
+ bool isFramework,
+ Optional<unsigned> UserEntryIdx) {
assert(!Path.isTriviallyEmpty() && "can't handle empty path here");
FileManager &FM = Headers.getFileMgr();
@@ -160,7 +167,8 @@ bool InitHeaderSearch::AddUnmappedPath(const Twine &Path, IncludeDirGroup Group,
// If the directory exists, add it.
if (auto DE = FM.getOptionalDirectoryRef(MappedPathStr)) {
- IncludePath.emplace_back(Group, DirectoryLookup(*DE, Type, isFramework));
+ IncludePath.emplace_back(Group, DirectoryLookup(*DE, Type, isFramework),
+ UserEntryIdx);
return true;
}
@@ -171,7 +179,8 @@ bool InitHeaderSearch::AddUnmappedPath(const Twine &Path, IncludeDirGroup Group,
if (const HeaderMap *HM = Headers.CreateHeaderMap(*FE)) {
// It is a headermap, add it to the search path.
IncludePath.emplace_back(
- Group, DirectoryLookup(HM, Type, Group == IndexHeaderMap));
+ Group, DirectoryLookup(HM, Type, Group == IndexHeaderMap),
+ UserEntryIdx);
return true;
}
}
@@ -471,7 +480,7 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,
/// RemoveDuplicates - If there are duplicate directory entries in the specified
/// search list, remove the later (dead) ones. Returns the number of non-system
/// headers removed, which is used to update NumAngled.
-static unsigned RemoveDuplicates(std::vector<DirectoryLookup> &SearchList,
+static unsigned RemoveDuplicates(std::vector<DirectoryLookupInfo> &SearchList,
unsigned First, bool Verbose) {
llvm::SmallPtrSet<const DirectoryEntry *, 8> SeenDirs;
llvm::SmallPtrSet<const DirectoryEntry *, 8> SeenFrameworkDirs;
@@ -480,7 +489,7 @@ static unsigned RemoveDuplicates(std::vector<DirectoryLookup> &SearchList,
for (unsigned i = First; i != SearchList.size(); ++i) {
unsigned DirToRemove = i;
- const DirectoryLookup &CurEntry = SearchList[i];
+ const DirectoryLookup &CurEntry = SearchList[i].Lookup;
if (CurEntry.isNormalDir()) {
// If this isn't the first time we've seen this dir, remove it.
@@ -510,7 +519,7 @@ static unsigned RemoveDuplicates(std::vector<DirectoryLookup> &SearchList,
for (FirstDir = First;; ++FirstDir) {
assert(FirstDir != i && "Didn't find dupe?");
- const DirectoryLookup &SearchEntry = SearchList[FirstDir];
+ const DirectoryLookup &SearchEntry = SearchList[FirstDir].Lookup;
// If these are different lookup types, then they can't be the dupe.
if (SearchEntry.getLookupType() != CurEntry.getLookupType())
@@ -532,7 +541,7 @@ static unsigned RemoveDuplicates(std::vector<DirectoryLookup> &SearchList,
// If the first dir in the search path is a non-system dir, zap it
// instead of the system one.
- if (SearchList[FirstDir].getDirCharacteristic() == SrcMgr::C_User)
+ if (SearchList[FirstDir].Lookup.getDirCharacteristic() == SrcMgr::C_User)
DirToRemove = FirstDir;
}
@@ -554,16 +563,37 @@ static unsigned RemoveDuplicates(std::vector<DirectoryLookup> &SearchList,
return NonSystemRemoved;
}
+/// Extract DirectoryLookups from DirectoryLookupInfos.
+static std::vector<DirectoryLookup>
+extractLookups(const std::vector<DirectoryLookupInfo> &Infos) {
+ std::vector<DirectoryLookup> Lookups;
+ Lookups.reserve(Infos.size());
+ llvm::transform(Infos, std::back_inserter(Lookups),
+ [](const DirectoryLookupInfo &Info) { return Info.Lookup; });
+ return Lookups;
+}
+
+/// Collect the mapping between indices of DirectoryLookups and UserEntries.
+static llvm::DenseMap<unsigned, unsigned>
+mapToUserEntries(const std::vector<DirectoryLookupInfo> &Infos) {
+ llvm::DenseMap<unsigned, unsigned> LookupsToUserEntries;
+ for (unsigned I = 0, E = Infos.size(); I < E; ++I) {
+ // Check whether this DirectoryLookup maps to a HeaderSearch::UserEntry.
+ if (Infos[I].UserEntryIdx)
+ LookupsToUserEntries.insert({I, *Infos[I].UserEntryIdx});
+ }
+ return LookupsToUserEntries;
+}
void InitHeaderSearch::Realize(const LangOptions &Lang) {
// Concatenate ANGLE+SYSTEM+AFTER chains together into SearchList.
- std::vector<DirectoryLookup> SearchList;
+ std::vector<DirectoryLookupInfo> SearchList;
SearchList.reserve(IncludePath.size());
// Quoted arguments go first.
for (auto &Include : IncludePath)
if (Include.Group == Quoted)
- SearchList.push_back(Include.Lookup);
+ SearchList.push_back(Include);
// Deduplicate and remember index.
RemoveDuplicates(SearchList, 0, Verbose);
@@ -571,7 +601,7 @@ void InitHeaderSearch::Realize(const LangOptions &Lang) {
for (auto &Include : IncludePath)
if (Include.Group == Angled || Include.Group == IndexHeaderMap)
- SearchList.push_back(Include.Lookup);
+ SearchList.push_back(Include);
RemoveDuplicates(SearchList, NumQuoted, Verbose);
unsigned NumAngled = SearchList.size();
@@ -583,11 +613,11 @@ void InitHeaderSearch::Realize(const LangOptions &Lang) {
Include.Group == CXXSystem) ||
(Lang.ObjC && !Lang.CPlusPlus && Include.Group == ObjCSystem) ||
(Lang.ObjC && Lang.CPlusPlus && Include.Group == ObjCXXSystem))
- SearchList.push_back(Include.Lookup);
+ SearchList.push_back(Include);
for (auto &Include : IncludePath)
if (Include.Group == After)
- SearchList.push_back(Include.Lookup);
+ SearchList.push_back(Include);
// Remove duplicates across both the Angled and System directories. GCC does
// this and failing to remove duplicates across these two groups breaks
@@ -596,7 +626,8 @@ void InitHeaderSearch::Realize(const LangOptions &Lang) {
NumAngled -= NonSystemRemoved;
bool DontSearchCurDir = false; // TODO: set to true if -I- is set?
- Headers.SetSearchPaths(SearchList, NumQuoted, NumAngled, DontSearchCurDir);
+ Headers.SetSearchPaths(extractLookups(SearchList), NumQuoted, NumAngled,
+ DontSearchCurDir, mapToUserEntries(SearchList));
Headers.SetSystemHeaderPrefixes(SystemHeaderPrefixes);
@@ -606,14 +637,14 @@ void InitHeaderSearch::Realize(const LangOptions &Lang) {
for (unsigned i = 0, e = SearchList.size(); i != e; ++i) {
if (i == NumQuoted)
llvm::errs() << "#include <...> search starts here:\n";
- StringRef Name = SearchList[i].getName();
+ StringRef Name = SearchList[i].Lookup.getName();
const char *Suffix;
- if (SearchList[i].isNormalDir())
+ if (SearchList[i].Lookup.isNormalDir())
Suffix = "";
- else if (SearchList[i].isFramework())
+ else if (SearchList[i].Lookup.isFramework())
Suffix = " (framework directory)";
else {
- assert(SearchList[i].isHeaderMap() && "Unknown DirectoryLookup");
+ assert(SearchList[i].Lookup.isHeaderMap() && "Unknown DirectoryLookup");
Suffix = " (headermap)";
}
llvm::errs() << " " << Name << Suffix << "\n";
@@ -632,9 +663,9 @@ void clang::ApplyHeaderSearchOptions(HeaderSearch &HS,
for (unsigned i = 0, e = HSOpts.UserEntries.size(); i != e; ++i) {
const HeaderSearchOptions::Entry &E = HSOpts.UserEntries[i];
if (E.IgnoreSysRoot) {
- Init.AddUnmappedPath(E.Path, E.Group, E.IsFramework);
+ Init.AddUnmappedPath(E.Path, E.Group, E.IsFramework, i);
} else {
- Init.AddPath(E.Path, E.Group, E.IsFramework);
+ Init.AddPath(E.Path, E.Group, E.IsFramework, i);
}
}
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index bca0bb4ada67..0ecb024fc6b9 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -371,7 +371,10 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
// value is, are implementation-defined.
// (Removed in C++20.)
if (!LangOpts.CPlusPlus) {
- if (LangOpts.C17)
+ // FIXME: Use correct value for C23.
+ if (LangOpts.C2x)
+ Builder.defineMacro("__STDC_VERSION__", "202000L");
+ else if (LangOpts.C17)
Builder.defineMacro("__STDC_VERSION__", "201710L");
else if (LangOpts.C11)
Builder.defineMacro("__STDC_VERSION__", "201112L");
@@ -433,11 +436,18 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
// OpenCL v1.0/1.1 s6.9, v1.2/2.0 s6.10: Preprocessor Directives and Macros.
if (LangOpts.OpenCL) {
if (LangOpts.CPlusPlus) {
- if (LangOpts.OpenCLCPlusPlusVersion == 100)
+ switch (LangOpts.OpenCLCPlusPlusVersion) {
+ case 100:
Builder.defineMacro("__OPENCL_CPP_VERSION__", "100");
- else
+ break;
+ case 202100:
+ Builder.defineMacro("__OPENCL_CPP_VERSION__", "202100");
+ break;
+ default:
llvm_unreachable("Unsupported C++ version for OpenCL");
+ }
Builder.defineMacro("__CL_CPP_VERSION_1_0__", "100");
+ Builder.defineMacro("__CL_CPP_VERSION_2021__", "202100");
} else {
// OpenCL v1.0 and v1.1 do not have a predefined macro to indicate the
// language standard with which the program is compiled. __OPENCL_VERSION__
@@ -590,7 +600,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
//Builder.defineMacro("__cpp_consteval", "201811L");
Builder.defineMacro("__cpp_constexpr_dynamic_alloc", "201907L");
Builder.defineMacro("__cpp_constinit", "201907L");
- //Builder.defineMacro("__cpp_coroutines", "201902L");
+ Builder.defineMacro("__cpp_impl_coroutine", "201902L");
Builder.defineMacro("__cpp_designated_initializers", "201707L");
Builder.defineMacro("__cpp_impl_three_way_comparison", "201907L");
//Builder.defineMacro("__cpp_modules", "201907L");
@@ -600,6 +610,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
if (LangOpts.CPlusPlus2b) {
Builder.defineMacro("__cpp_implicit_move", "202011L");
Builder.defineMacro("__cpp_size_t_suffix", "202011L");
+ Builder.defineMacro("__cpp_if_consteval", "202106L");
}
if (LangOpts.Char8)
Builder.defineMacro("__cpp_char8_t", "201811L");
@@ -1141,6 +1152,12 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
case 45:
Builder.defineMacro("_OPENMP", "201511");
break;
+ case 51:
+ Builder.defineMacro("_OPENMP", "202011");
+ break;
+ case 52:
+ Builder.defineMacro("_OPENMP", "202111");
+ break;
default:
// Default version is OpenMP 5.0
Builder.defineMacro("_OPENMP", "201811");
@@ -1171,7 +1188,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
if (LangOpts.OpenCL) {
InitializeOpenCLFeatureTestMacros(TI, LangOpts, Builder);
- if (TI.getTriple().isSPIR())
+ if (TI.getTriple().isSPIR() || TI.getTriple().isSPIRV())
Builder.defineMacro("__IMAGE_SUPPORT__");
}
diff --git a/clang/lib/Frontend/LayoutOverrideSource.cpp b/clang/lib/Frontend/LayoutOverrideSource.cpp
index 76762d58fe25..0d288db0632f 100644
--- a/clang/lib/Frontend/LayoutOverrideSource.cpp
+++ b/clang/lib/Frontend/LayoutOverrideSource.cpp
@@ -16,11 +16,11 @@ using namespace clang;
/// Parse a simple identifier.
static std::string parseName(StringRef S) {
- if (S.empty() || !isIdentifierHead(S[0]))
+ if (S.empty() || !isAsciiIdentifierStart(S[0]))
return "";
unsigned Offset = 1;
- while (Offset < S.size() && isIdentifierBody(S[Offset]))
+ while (Offset < S.size() && isAsciiIdentifierContinue(S[Offset]))
++Offset;
return S.substr(0, Offset).str();
@@ -43,7 +43,7 @@ LayoutOverrideSource::LayoutOverrideSource(StringRef Filename) {
StringRef LineStr(Line);
// Determine whether the following line will start a
- if (LineStr.find("*** Dumping AST Record Layout") != StringRef::npos) {
+ if (LineStr.contains("*** Dumping AST Record Layout")) {
// Flush the last type/layout, if there is one.
if (!CurrentType.empty())
Layouts[CurrentType] = CurrentLayout;
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index b7259569595d..45df86ef91cd 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -155,7 +155,7 @@ public:
void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
diag::Severity Map, StringRef Str) override;
- void PragmaWarning(SourceLocation Loc, StringRef WarningSpec,
+ void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
ArrayRef<int> Ids) override;
void PragmaWarningPush(SourceLocation Loc, int Level) override;
void PragmaWarningPop(SourceLocation Loc) override;
@@ -182,25 +182,23 @@ public:
/// implicitly when at the beginning of the file.
///
/// @param Tok Token where to move to.
- /// @param RequiresStartOfLine Whether the next line depends on being in the
+ /// @param RequireStartOfLine Whether the next line depends on being in the
/// first column, such as a directive.
///
/// @return Whether column adjustments are necessary.
bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation());
- if (PLoc.isInvalid())
- return false;
+ unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
bool IsFirstInFile = Tok.isAtStartOfLine() && PLoc.getLine() == 1;
- return MoveToLine(PLoc.getLine(), RequireStartOfLine) || IsFirstInFile;
+ return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile;
}
/// Move to the line of the provided source location. Returns true if a new
/// line was inserted.
bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- if (PLoc.isInvalid())
- return false;
- return MoveToLine(PLoc.getLine(), RequireStartOfLine);
+ unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
+ return MoveToLine(TargetLine, RequireStartOfLine);
}
bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
@@ -276,20 +274,27 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
// otherwise print a #line directive.
if (CurLine == LineNo) {
// Nothing to do if we are already on the correct line.
- } else if (!StartedNewLine && (!MinimizeWhitespace || !DisableLineMarkers) &&
- LineNo - CurLine == 1) {
+ } else if (MinimizeWhitespace && DisableLineMarkers) {
+ // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
+ } else if (!StartedNewLine && LineNo - CurLine == 1) {
// Printing a single line has priority over printing a #line directive, even
// when minimizing whitespace which otherwise would print #line directives
// for every single line.
OS << '\n';
StartedNewLine = true;
- } else if (!MinimizeWhitespace && LineNo - CurLine <= 8) {
- const char *NewLines = "\n\n\n\n\n\n\n\n";
- OS.write(NewLines, LineNo - CurLine);
- StartedNewLine = true;
} else if (!DisableLineMarkers) {
- // Emit a #line or line marker.
- WriteLineInfo(LineNo, nullptr, 0);
+ if (LineNo - CurLine <= 8) {
+ const char *NewLines = "\n\n\n\n\n\n\n\n";
+ OS.write(NewLines, LineNo - CurLine);
+ } else {
+ // Emit a #line or line marker.
+ WriteLineInfo(LineNo, nullptr, 0);
+ }
+ StartedNewLine = true;
+ } else if (EmittedTokensOnThisLine) {
+ // If we are not on the correct line and don't need to be line-correct,
+ // at least ensure we start on a new line.
+ OS << '\n';
StartedNewLine = true;
}
@@ -573,10 +578,24 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
}
void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
- StringRef WarningSpec,
+ PragmaWarningSpecifier WarningSpec,
ArrayRef<int> Ids) {
MoveToLine(Loc, /*RequireStartOfLine=*/true);
- OS << "#pragma warning(" << WarningSpec << ':';
+
+ OS << "#pragma warning(";
+ switch(WarningSpec) {
+ case PWS_Default: OS << "default"; break;
+ case PWS_Disable: OS << "disable"; break;
+ case PWS_Error: OS << "error"; break;
+ case PWS_Once: OS << "once"; break;
+ case PWS_Suppress: OS << "suppress"; break;
+ case PWS_Level1: OS << '1'; break;
+ case PWS_Level2: OS << '2'; break;
+ case PWS_Level3: OS << '3'; break;
+ case PWS_Level4: OS << '4'; break;
+ }
+ OS << ':';
+
for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
OS << ' ' << *I;
OS << ')';
@@ -639,7 +658,9 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
!Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
return;
- if (!RequireSameLine && MoveToLine(Tok, /*RequireStartOfLine=*/false)) {
+ // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
+ if ((!RequireSameLine || EmittedDirectiveOnThisLine) &&
+ MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) {
if (MinimizeWhitespace) {
// Avoid interpreting hash as a directive under -fpreprocessed.
if (Tok.is(tok::hash))
@@ -677,7 +698,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
// - The whitespace is necessary to keep the tokens apart and there is not
// already a newline between them
if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
- ((EmittedTokensOnThisLine || EmittedTokensOnThisLine) &&
+ ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
AvoidConcat(PrevPrevTok, PrevTok, Tok)))
OS << ' ';
}
diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp
index 09ed07be923e..6685109f8d33 100644
--- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -231,7 +231,7 @@ public:
assert(OS && "loaded module file after finishing rewrite action?");
(*OS) << "#pragma clang module build ";
- if (isValidIdentifier(MF->ModuleName))
+ if (isValidAsciiIdentifier(MF->ModuleName))
(*OS) << MF->ModuleName;
else {
(*OS) << '"';
diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index fd54bcbf7c35..626ec4d71ccd 100644
--- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -1957,15 +1957,15 @@ Stmt *RewriteModernObjC::RewriteObjCTryStmt(ObjCAtTryStmt *S) {
// @try -> try
ReplaceText(startLoc, 1, "");
- for (unsigned I = 0, N = S->getNumCatchStmts(); I != N; ++I) {
- ObjCAtCatchStmt *Catch = S->getCatchStmt(I);
+ for (ObjCAtCatchStmt *Catch : S->catch_stmts()) {
VarDecl *catchDecl = Catch->getCatchParamDecl();
startLoc = Catch->getBeginLoc();
bool AtRemoved = false;
if (catchDecl) {
QualType t = catchDecl->getType();
- if (const ObjCObjectPointerType *Ptr = t->getAs<ObjCObjectPointerType>()) {
+ if (const ObjCObjectPointerType *Ptr =
+ t->getAs<ObjCObjectPointerType>()) {
// Should be a pointer to a class.
ObjCInterfaceDecl *IDecl = Ptr->getObjectType()->getInterface();
if (IDecl) {
diff --git a/clang/lib/Frontend/TestModuleFileExtension.cpp b/clang/lib/Frontend/TestModuleFileExtension.cpp
index 7d4026a7efc6..ea737e6891bf 100644
--- a/clang/lib/Frontend/TestModuleFileExtension.cpp
+++ b/clang/lib/Frontend/TestModuleFileExtension.cpp
@@ -93,16 +93,14 @@ TestModuleFileExtension::getExtensionMetadata() const {
return { BlockName, MajorVersion, MinorVersion, UserInfo };
}
-llvm::hash_code TestModuleFileExtension::hashExtension(
- llvm::hash_code Code) const {
+void TestModuleFileExtension::hashExtension(
+ ExtensionHashBuilder &HBuilder) const {
if (Hashed) {
- Code = llvm::hash_combine(Code, BlockName);
- Code = llvm::hash_combine(Code, MajorVersion);
- Code = llvm::hash_combine(Code, MinorVersion);
- Code = llvm::hash_combine(Code, UserInfo);
+ HBuilder.add(BlockName);
+ HBuilder.add(MajorVersion);
+ HBuilder.add(MinorVersion);
+ HBuilder.add(UserInfo);
}
-
- return Code;
}
std::unique_ptr<ModuleFileExtensionWriter>
diff --git a/clang/lib/Frontend/TestModuleFileExtension.h b/clang/lib/Frontend/TestModuleFileExtension.h
index c8ca4cd4f210..e22c87ed2d1b 100644
--- a/clang/lib/Frontend/TestModuleFileExtension.h
+++ b/clang/lib/Frontend/TestModuleFileExtension.h
@@ -55,7 +55,7 @@ public:
ModuleFileExtensionMetadata getExtensionMetadata() const override;
- llvm::hash_code hashExtension(llvm::hash_code Code) const override;
+ void hashExtension(ExtensionHashBuilder &HBuilder) const override;
std::unique_ptr<ModuleFileExtensionWriter>
createExtensionWriter(ASTWriter &Writer) override;
diff --git a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
index 0503ae46a15f..2759625ae254 100644
--- a/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/clang/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -99,9 +99,7 @@ public:
return true;
}
- bool match(StringRef S) override {
- return S.find(Text) != StringRef::npos;
- }
+ bool match(StringRef S) override { return S.contains(Text); }
};
/// RegexDirective - Directive with regular-expression matching.
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index b95851e380d2..8e18f33af0cb 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -79,7 +79,7 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
if (Plugin.getName() == CI.getFrontendOpts().ActionName) {
std::unique_ptr<PluginASTAction> P(Plugin.instantiate());
if ((P->getActionType() != PluginASTAction::ReplaceAction &&
- P->getActionType() != PluginASTAction::Cmdline) ||
+ P->getActionType() != PluginASTAction::CmdlineAfterMainAction) ||
!P->ParseArgs(
CI,
CI.getFrontendOpts().PluginArgs[std::string(Plugin.getName())]))
@@ -203,24 +203,7 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
return true;
}
- // Load any requested plugins.
- for (const std::string &Path : Clang->getFrontendOpts().Plugins) {
- std::string Error;
- if (llvm::sys::DynamicLibrary::LoadLibraryPermanently(Path.c_str(), &Error))
- Clang->getDiagnostics().Report(diag::err_fe_unable_to_load_plugin)
- << Path << Error;
- }
-
- // Check if any of the loaded plugins replaces the main AST action
- for (const FrontendPluginRegistry::entry &Plugin :
- FrontendPluginRegistry::entries()) {
- std::unique_ptr<PluginASTAction> P(Plugin.instantiate());
- if (P->getActionType() == PluginASTAction::ReplaceAction) {
- Clang->getFrontendOpts().ProgramAction = clang::frontend::PluginAction;
- Clang->getFrontendOpts().ActionName = Plugin.getName().str();
- break;
- }
- }
+ Clang->LoadRequestedPlugins();
// Honor -mllvm.
//
diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index 2b701fef0ea2..7bc7bc2ce63e 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -16,7 +16,7 @@
// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
#pragma push_macro("__DEVICE__")
-#ifdef __OPENMP_NVPTX__
+#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
#pragma omp declare target
#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
#else
@@ -26,7 +26,7 @@
// To make the algorithms available for C and C++ in CUDA and OpenMP we select
// different but equivalent function versions. TODO: For OpenMP we currently
// select the native builtins as the overload support for templates is lacking.
-#if !defined(__OPENMP_NVPTX__)
+#if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
#define _ISNANd std::isnan
#define _ISNANf std::isnan
#define _ISINFd std::isinf
@@ -276,7 +276,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
#undef _fmaxd
#undef _fmaxf
-#ifdef __OPENMP_NVPTX__
+#if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
#pragma omp end declare target
#endif
diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h
index f801e5426aa4..cc4e1a4dd96a 100644
--- a/clang/lib/Headers/__clang_cuda_device_functions.h
+++ b/clang/lib/Headers/__clang_cuda_device_functions.h
@@ -34,10 +34,12 @@ __DEVICE__ unsigned long long __brevll(unsigned long long __a) {
return __nv_brevll(__a);
}
#if defined(__cplusplus)
-__DEVICE__ void __brkpt() { asm volatile("brkpt;"); }
+__DEVICE__ void __brkpt() { __asm__ __volatile__("brkpt;"); }
__DEVICE__ void __brkpt(int __a) { __brkpt(); }
#else
-__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); }
+__DEVICE__ void __attribute__((overloadable)) __brkpt(void) {
+ __asm__ __volatile__("brkpt;");
+}
__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); }
#endif
__DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b,
@@ -507,7 +509,7 @@ __DEVICE__ float __powf(float __a, float __b) {
}
// Parameter must have a known integer value.
-#define __prof_trigger(__a) asm __volatile__("pmevent \t%0;" ::"i"(__a))
+#define __prof_trigger(__a) __asm__ __volatile__("pmevent \t%0;" ::"i"(__a))
__DEVICE__ int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
__DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) {
return __nv_sad(__a, __b, __c);
@@ -526,7 +528,7 @@ __DEVICE__ float __tanf(float __a) { return __nv_fast_tanf(__a); }
__DEVICE__ void __threadfence(void) { __nvvm_membar_gl(); }
__DEVICE__ void __threadfence_block(void) { __nvvm_membar_cta(); };
__DEVICE__ void __threadfence_system(void) { __nvvm_membar_sys(); };
-__DEVICE__ void __trap(void) { asm volatile("trap;"); }
+__DEVICE__ void __trap(void) { __asm__ __volatile__("trap;"); }
__DEVICE__ unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
return __nvvm_atom_add_gen_i((int *)__p, __v);
}
@@ -1051,122 +1053,136 @@ __DEVICE__ unsigned int __bool2mask(unsigned int __a, int shift) {
}
__DEVICE__ unsigned int __vabs2(unsigned int __a) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabs4(unsigned int __a) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsss2(unsigned int __a) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vabsss4(unsigned int __a) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(0), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(0), "r"(0));
return r;
}
__DEVICE__ unsigned int __vadd2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vadd4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vavrg4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.eq %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
@@ -1174,7 +1190,9 @@ __DEVICE__ unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.eq %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.eq %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
@@ -1182,7 +1200,9 @@ __DEVICE__ unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
@@ -1190,7 +1210,9 @@ __DEVICE__ unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
@@ -1198,7 +1220,9 @@ __DEVICE__ unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
@@ -1206,7 +1230,9 @@ __DEVICE__ unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.ge %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.ge %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
@@ -1214,7 +1240,9 @@ __DEVICE__ unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
@@ -1222,7 +1250,9 @@ __DEVICE__ unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
@@ -1230,7 +1260,9 @@ __DEVICE__ unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
@@ -1238,7 +1270,9 @@ __DEVICE__ unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.gt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.gt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
@@ -1246,7 +1280,9 @@ __DEVICE__ unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
@@ -1254,7 +1290,9 @@ __DEVICE__ unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
@@ -1262,7 +1300,9 @@ __DEVICE__ unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
@@ -1270,7 +1310,9 @@ __DEVICE__ unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.le %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.le %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
@@ -1278,7 +1320,9 @@ __DEVICE__ unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.s32.s32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
@@ -1286,7 +1330,9 @@ __DEVICE__ unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.s32.s32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.s32.s32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
@@ -1294,7 +1340,9 @@ __DEVICE__ unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
@@ -1302,7 +1350,9 @@ __DEVICE__ unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.lt %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.lt %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
@@ -1310,7 +1360,9 @@ __DEVICE__ unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset2.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset2.u32.u32.ne %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
@@ -1318,7 +1370,9 @@ __DEVICE__ unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
}
__DEVICE__ unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vset4.u32.u32.ne %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vset4.u32.u32.ne %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
@@ -1345,94 +1399,112 @@ __DEVICE__ unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
unsigned mask = __vcmpgts2(__a, __b);
r = (__a & mask) | (__b & ~mask);
} else {
- asm("vmax2.s32.s32.s32 %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
}
return r;
}
__DEVICE__ unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmax4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmax2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmax4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmins2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin2.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vmins4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin4.s32.s32.s32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vminu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vminu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vmin4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsads2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsads4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsub2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub2.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }
__DEVICE__ unsigned int __vsub4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub4.u32.u32.u32 %0,%1,%2,%3;" : "=r"(r) : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
__DEVICE__ unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vnegss2(unsigned int __a) {
@@ -1440,9 +1512,9 @@ __DEVICE__ unsigned int __vnegss2(unsigned int __a) {
}
__DEVICE__ unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vnegss4(unsigned int __a) {
@@ -1450,16 +1522,16 @@ __DEVICE__ unsigned int __vnegss4(unsigned int __a) {
}
__DEVICE__ unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
__DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
unsigned int r;
- asm("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
- : "=r"(r)
- : "r"(__a), "r"(__b), "r"(0));
+ __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
+ : "=r"(r)
+ : "r"(__a), "r"(__b), "r"(0));
return r;
}
#endif // CUDA_VERSION >= 9020
diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h
index c7bff6a9d8fe..e0875bbcbf4a 100644
--- a/clang/lib/Headers/__clang_cuda_intrinsics.h
+++ b/clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -483,4 +483,36 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32,
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+#if CUDA_VERSION >= 11000
+extern "C" {
+__device__ inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(1))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(3))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(4))) *)__ptr;
+}
+__device__ inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) {
+ return (size_t)(void __attribute__((address_space(5))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(1))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(3))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(4))) *)__ptr;
+}
+__device__ inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
+ return (void *)(void __attribute__((address_space(5))) *)__ptr;
+}
+__device__ inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
+ return __nv_cvta_generic_to_shared_impl(__ptr);
+}
+} // extern "C"
+#endif // CUDA_VERSION >= 11000
+
#endif // defined(__CLANG_CUDA_INTRINSICS_H__)
diff --git a/clang/lib/Headers/__clang_cuda_libdevice_declares.h b/clang/lib/Headers/__clang_cuda_libdevice_declares.h
index 6173b589e3ef..23f35964ea97 100644
--- a/clang/lib/Headers/__clang_cuda_libdevice_declares.h
+++ b/clang/lib/Headers/__clang_cuda_libdevice_declares.h
@@ -16,6 +16,7 @@ extern "C" {
#if defined(__OPENMP_NVPTX__)
#define __DEVICE__
+#pragma omp begin assumes ext_spmd_amenable no_openmp
#elif defined(__CUDA__)
#define __DEVICE__ __device__
#endif
@@ -456,6 +457,11 @@ __DEVICE__ double __nv_y1(double __a);
__DEVICE__ float __nv_y1f(float __a);
__DEVICE__ float __nv_ynf(int __a, float __b);
__DEVICE__ double __nv_yn(int __a, double __b);
+
+#if defined(__OPENMP_NVPTX__)
+#pragma omp end assumes ext_spmd_amenable no_openmp
+#endif
+
#if defined(__cplusplus)
} // extern "C"
#endif
diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index f401964bd529..512fc300fc34 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -41,6 +41,7 @@
#include <cmath>
#include <cstdlib>
#include <stdlib.h>
+#include <string.h>
#undef __CUDACC__
// Preserve common macros that will be changed below by us or by CUDA
@@ -64,9 +65,9 @@
#endif
// Make largest subset of device functions available during host
-// compilation -- SM_35 for the time being.
+// compilation.
#ifndef __CUDA_ARCH__
-#define __CUDA_ARCH__ 350
+#define __CUDA_ARCH__ 9999
#endif
#include "__clang_cuda_builtin_vars.h"
@@ -205,11 +206,6 @@ inline __host__ double __signbitd(double x) {
#endif
#if CUDA_VERSION >= 9000
-// CUDA-9.2 needs host-side memcpy for some host functions in
-// device_functions.hpp
-#if CUDA_VERSION >= 9020
-#include <string.h>
-#endif
#include "crt/math_functions.hpp"
#else
#include "math_functions.hpp"
@@ -275,7 +271,38 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
#undef __CUDABE__
#endif
#include "sm_20_atomic_functions.hpp"
+// Predicate functions used in `__builtin_assume` need to have no side effect.
+// However, sm_20_intrinsics.hpp doesn't define them with neither pure nor
+// const attribute. Rename definitions from sm_20_intrinsics.hpp and re-define
+// them as pure ones.
+#pragma push_macro("__isGlobal")
+#pragma push_macro("__isShared")
+#pragma push_macro("__isConstant")
+#pragma push_macro("__isLocal")
+#define __isGlobal __ignored_cuda___isGlobal
+#define __isShared __ignored_cuda___isShared
+#define __isConstant __ignored_cuda___isConstant
+#define __isLocal __ignored_cuda___isLocal
#include "sm_20_intrinsics.hpp"
+#pragma pop_macro("__isGlobal")
+#pragma pop_macro("__isShared")
+#pragma pop_macro("__isConstant")
+#pragma pop_macro("__isLocal")
+#pragma push_macro("__DEVICE__")
+#define __DEVICE__ static __device__ __forceinline__ __attribute__((const))
+__DEVICE__ unsigned int __isGlobal(const void *p) {
+ return __nvvm_isspacep_global(p);
+}
+__DEVICE__ unsigned int __isShared(const void *p) {
+ return __nvvm_isspacep_shared(p);
+}
+__DEVICE__ unsigned int __isConstant(const void *p) {
+ return __nvvm_isspacep_const(p);
+}
+__DEVICE__ unsigned int __isLocal(const void *p) {
+ return __nvvm_isspacep_local(p);
+}
+#pragma pop_macro("__DEVICE__")
#include "sm_32_atomic_functions.hpp"
// Don't include sm_30_intrinsics.h and sm_32_intrinsics.h. These define the
@@ -330,6 +357,34 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); }
#pragma pop_macro("__host__")
+// __clang_cuda_texture_intrinsics.h must be included first in order to provide
+// implementation for __nv_tex_surf_handler that CUDA's headers depend on.
+// The implementation requires c++11 and only works with CUDA-9 or newer.
+#if __cplusplus >= 201103L && CUDA_VERSION >= 9000
+// clang-format off
+#include <__clang_cuda_texture_intrinsics.h>
+// clang-format on
+#else
+#if CUDA_VERSION >= 9000
+// Provide a hint that texture support needs C++11.
+template <typename T> struct __nv_tex_needs_cxx11 {
+ const static bool value = false;
+};
+template <class T>
+__host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr,
+ cudaTextureObject_t obj,
+ float x) {
+ _Static_assert(__nv_tex_needs_cxx11<T>::value,
+ "Texture support requires C++11");
+}
+#else
+// Textures in CUDA-8 and older are not supported by clang.There's no
+// convenient way to intercept texture use in these versions, so we can't
+// produce a meaningful error. The source code that attempts to use textures
+// will continue to fail as it does now.
+#endif // CUDA_VERSION
+#endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000
+#include "texture_fetch_functions.h"
#include "texture_indirect_functions.h"
// Restore state of __CUDA_ARCH__ and __THROW we had on entry.
diff --git a/clang/lib/Headers/__clang_cuda_texture_intrinsics.h b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
new file mode 100644
index 000000000000..3c0f0026f1f0
--- /dev/null
+++ b/clang/lib/Headers/__clang_cuda_texture_intrinsics.h
@@ -0,0 +1,740 @@
+/*===--- __clang_cuda_texture_intrinsics.h - Device-side texture support ---===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ *
+ * This header provides in-header implmentations for NVCC's built-in
+ * __nv_tex_surf_handler() which is used by CUDA's texture-related headers. The
+ * built-in is unusual as it's actually a set of function overloads that use the
+ * first string literal argument as one of the overload parameters.
+ */
+#ifndef __CLANG_CUDA_TEXTURE_INTRINSICS_H__
+#define __CLANG_CUDA_TEXTURE_INTRINSICS_H__
+#ifndef __CUDA__
+#error "This file is for CUDA compilation only."
+#endif
+
+// __nv_tex_surf_handler() provided by this header as a macro.
+#define __nv_tex_surf_handler(__op, __ptr, ...) \
+ ::__cuda_tex::__tex_fetch< \
+ ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash(__op)>>(__ptr, \
+ __VA_ARGS__)
+
+#pragma push_macro("__ASM_OUT")
+#pragma push_macro("__ASM_OUTP")
+#pragma push_macro("__Args")
+#pragma push_macro("__ID")
+#pragma push_macro("__IDV")
+#pragma push_macro("__IMPL_2DGATHER")
+#pragma push_macro("__IMPL_ALIAS")
+#pragma push_macro("__IMPL_ALIASI")
+#pragma push_macro("__IMPL_F1")
+#pragma push_macro("__IMPL_F3")
+#pragma push_macro("__IMPL_F3N")
+#pragma push_macro("__IMPL_F3S")
+#pragma push_macro("__IMPL_S")
+#pragma push_macro("__IMPL_S3")
+#pragma push_macro("__IMPL_S3I")
+#pragma push_macro("__IMPL_S3N")
+#pragma push_macro("__IMPL_S3NI")
+#pragma push_macro("__IMPL_S3S")
+#pragma push_macro("__IMPL_S3SI")
+#pragma push_macro("__IMPL_SI")
+#pragma push_macro("__L")
+#pragma push_macro("__STRIP_PARENS")
+
+// Put all functions into anonymous namespace so they have internal linkage.
+// The device-only function here must be internal in order to avoid ODR
+// violations in case they are used from the files compiled with
+// -fgpu-rdc. E.g. a library and an app using it may be built with a different
+// version of this header file.
+namespace {
+
+// Put the implmentation into its own namespace so we don't pollute the TU.
+namespace __cuda_tex {
+
+// First, we need a perfect hash function and a few constexpr helper functions
+// for converting a string literal into a numeric value which can be used to
+// parametrize a template. We can not use string literals for that as that would
+// require C++20.
+//
+// The hash function was generated with 'gperf' and then manually converted into
+// its constexpr equivalent.
+//
+// NOTE: the perfect hashing scheme comes with inherent self-test. If the hash
+// function has a collision for any of the texture operations, the compilation
+// will fail due to an attempt to redefine a tag with the same value. If the
+// header compiles, then the hash function is good enough for the job.
+
+constexpr int __tex_len(const char *s) {
+ return (s[0] == 0) ? 0
+ : (s[1] == 0) ? 1
+ : (s[2] == 0) ? 2
+ : (s[3] == 0) ? 3
+ : (s[4] == 0) ? 4
+ : (s[5] == 0) ? 5
+ : (s[6] == 0) ? 6
+ : (s[7] == 0) ? 7
+ : (s[8] == 0) ? 8
+ : (s[9] == 0) ? 9
+ : (s[10] == 0) ? 10
+ : (s[11] == 0) ? 11
+ : (s[12] == 0) ? 12
+ : (s[13] == 0) ? 13
+ : (s[14] == 0) ? 14
+ : (s[15] == 0) ? 15
+ : (s[16] == 0) ? 16
+ : (s[17] == 0) ? 17
+ : (s[18] == 0) ? 18
+ : (s[19] == 0) ? 19
+ : (s[20] == 0) ? 20
+ : (s[21] == 0) ? 21
+ : (s[22] == 0) ? 22
+ : (s[23] == 0) ? 23
+ : (s[24] == 0) ? 24
+ : (s[25] == 0) ? 25
+ : (s[26] == 0) ? 26
+ : (s[27] == 0) ? 27
+ : (s[28] == 0) ? 28
+ : (s[29] == 0) ? 29
+ : (s[30] == 0) ? 30
+ : (s[31] == 0) ? 31
+ : 32;
+}
+
+constexpr int __tex_hash_map(int c) {
+ return (c == 49) ? 10
+ : (c == 50) ? 0
+ : (c == 51) ? 100
+ : (c == 52) ? 30
+ : (c == 67) ? 10
+ : (c == 68) ? 0
+ : (c == 69) ? 25
+ : (c == 72) ? 70
+ : (c == 77) ? 0
+ : (c == 96) ? 44
+ : (c == 99) ? 10
+ : (c == 100) ? 5
+ : (c == 101) ? 60
+ : (c == 102) ? 40
+ : (c == 103) ? 70
+ : (c == 104) ? 25
+ : (c == 112) ? 0
+ : (c == 114) ? 45
+ : (c == 117) ? 5
+ : (c == 118) ? 85
+ : (c == 120) ? 20
+ : 225;
+}
+
+constexpr int __tex_op_hash(const char *str) {
+ return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) +
+ __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]);
+}
+
+// Tag type to identify particular texture operation.
+template <int N> struct __Tag;
+#define __ID(__op) __Tag<__tex_op_hash(__op)>
+// Tags for variants of particular operation. E.g. tex2Dgather can translate
+// into 4 different instructions.
+#define __IDV(__op, __variant) \
+ __Tag<10000 + __tex_op_hash(__op) * 100 + __variant>
+
+// Helper classes for figuring out key data types for derived types.
+// E.g. char2 has __base_t = char, __fetch_t = char4
+template <class> struct __TypeInfoT;
+// Type info for the fundamental types.
+template <> struct __TypeInfoT<float> {
+ using __base_t = float;
+ using __fetch_t = float4;
+};
+template <> struct __TypeInfoT<char> {
+ using __base_t = char;
+ using __fetch_t = int4;
+};
+template <> struct __TypeInfoT<signed char> {
+ using __base_t = signed char;
+ using __fetch_t = int4;
+};
+template <> struct __TypeInfoT<unsigned char> {
+ using __base_t = unsigned char;
+ using __fetch_t = uint4;
+};
+template <> struct __TypeInfoT<short> {
+ using __base_t = short;
+ using __fetch_t = int4;
+};
+template <> struct __TypeInfoT<unsigned short> {
+ using __base_t = unsigned short;
+ using __fetch_t = uint4;
+};
+template <> struct __TypeInfoT<int> {
+ using __base_t = int;
+ using __fetch_t = int4;
+};
+template <> struct __TypeInfoT<unsigned int> {
+ using __base_t = unsigned int;
+ using __fetch_t = uint4;
+};
+
+// Derived base/fetch types for N-element vectors.
+template <class __T> struct __TypeInfoT {
+ using __base_t = decltype(__T::x);
+ using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t;
+};
+
+// Classes that implement specific texture ops.
+template <class __op> struct __tex_fetch_v4;
+
+// Helper macros to strip parens from a macro argument.
+#define __Args(...) __VA_ARGS__
+#define __STRIP_PARENS(__X) __X
+#define __L(__X) __STRIP_PARENS(__Args __X)
+
+// Construct inline assembly output args.
+// Results are stored in a temp var __r.
+// isResident bool is pointed to by __ir
+// Asm args for return values. It's a 4-element vector
+#define __ASM_OUT(__t) \
+ ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w))
+// .. possibly combined with a predicate.
+#define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir))
+
+// Implements a single variant of texture fetch instruction.
+#define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args) \
+ template <> \
+ __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) { \
+ __rt __r; \
+ asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args)); \
+ return __r; \
+ }
+
+// Implements texture fetch instructions for int4/uint4/float4 data types.
+#define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
+ __ASM_OUT("r"), __asm_args) \
+ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
+ __ASM_OUT("r"), __asm_args) \
+ __IMPL_F1(float4, float4, __args, \
+ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"), \
+ __asm_args)
+// Implements 'sparse' texture fetch instructions for int4/uint4/float4 data
+// types. Similar to above, but returns a boolean 'isPresent' value in addition
+// to texture data,
+#define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
+ __ASM_OUTP("r"), __asm_args) \
+ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
+ __ASM_OUTP("r"), __asm_args) \
+ __IMPL_F1(float4, float4, __args, \
+ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"), \
+ __asm_args)
+
+// Similar to F3, but for integer data which is returned as normalized floats.
+// Only instantiates fetch functions for int4/uint4.
+#define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
+ __ASM_OUT("r"), __asm_args) \
+ __IMPL_F1(float4, uint4, __args, \
+ __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"), \
+ __asm_args)
+
+// Instantiates __tex_fetch_v4 with regular fetch functions.
+#define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ template <> struct __tex_fetch_v4<__op> { \
+ template <class T> \
+ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \
+ __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ }
+
+// Same, but for sparse ops. Only available on sm_60+
+#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
+#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, \
+ __asm_args) \
+ template <> struct __tex_fetch_v4<__op> { \
+ template <class T> \
+ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \
+ __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ }
+#else
+#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
+#endif
+
+// Same, but for normalized float ops.
+#define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args, \
+ __asm_args) \
+ template <> struct __tex_fetch_v4<__op> { \
+ template <class T> \
+ __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args)); \
+ __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ }
+
+// Regular and normalized float ops share a lot of similarities. This macro
+// instantiates both variants -- normal for __op and normalized for __opn.
+#define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \
+ __asm_args) \
+ __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args); \
+ __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
+
+// Convenience macros which converts string literal __op into a __Tag,
+#define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
+#define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
+#define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
+ __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
+#define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \
+ __asm_args) \
+ __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \
+ __asm_args)
+
+// CUDA headers have some 'legacy' texture oprerations that duplicate
+// functionality. So, we just inherit it, instead of refining a copy.
+#define __IMPL_ALIASI(__op, __opn) \
+ template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {}
+#define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn))
+
+// Now we can instantiate everything we need for each specific texture fetch
+// variant.
+__IMPL_S("__tex1D_v2", "__tex1D_rmnf_v2", (float __x), "tex.1d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5}];", ("f"(__x)));
+__IMPL_S("__tex1Dfetch_v2", "__tex1Dfetch_rmnf_v2", (int __x), "tex.1d.v4",
+ "s32", "{%0, %1, %2, %3}, [%4, {%5}];", ("r"(__x)));
+__IMPL_ALIAS("__itex1D", "__tex1D_v2");
+__IMPL_ALIAS("__itex1Dfetch", "__tex1Dfetch_v2");
+
+__IMPL_S("__tex1DGrad_v2", "__tex1DGrad_rmnf_v2",
+ (float __x, float __dPdx, float __dPdy), "tex.grad.1d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};",
+ ("f"(__x), "f"(__dPdx), "f"(__dPdy)));
+__IMPL_ALIAS("__itex1DGrad", "__tex1DGrad_v2");
+
+__IMPL_S("__tex1DLayered_v2", "__tex1DLayered_rmnf_v2",
+ (float __x, int __layer), "tex.a1d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("r"(__layer), "f"(__x)));
+__IMPL_ALIAS("__itex1DLayered", "__tex1DLayered_v2");
+
+__IMPL_S("__tex1DLayeredGrad_v2", "__tex1DLayeredGrad_rmnf_v2",
+ (float __x, int __layer, float __dPdx, float __dPdy),
+ "tex.grad.a1d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};",
+ ("r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)));
+__IMPL_ALIAS("__itex1DLayeredGrad", "__tex1DLayeredGrad_v2");
+
+__IMPL_S("__tex1DLayeredLod_v2", "__tex1DLayeredLod_rmnf_v2",
+ (float __x, int __layer, float __level), "tex.level.a1d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
+ ("r"(__layer), "f"(__x), "f"(__level)));
+__IMPL_ALIAS("__itex1DLayeredLod", "__tex1DLayeredLod_v2");
+
+__IMPL_S("__tex1DLod_v2", "__tex1DLod_rmnf_v2", (float __x, float __level),
+ "tex.level.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], %6;",
+ ("f"(__x), "f"(__level)));
+__IMPL_ALIAS("__itex1DLod", "__tex1DLod_v2");
+
+// 2D
+__IMPL_S("__tex2D_v2", "__tex2D_rmnf_v2", (float __x, float __y), "tex.2d.v4",
+ "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y)));
+__IMPL_ALIAS("__itex2D", "__tex2D_v2");
+
+__IMPL_S3S("__itex2D_sparse", (float __x, float __y, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.2d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
+ " selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y)));
+
+__IMPL_S("__tex2DGrad_v2", "__tex2DGrad_rmnf_v2",
+ (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy),
+ "tex.grad.2d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};",
+ ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x),
+ "f"(__dPdy->y)));
+__IMPL_ALIAS("__itex2DGrad_v2", "__tex2DGrad_v2");
+
+__IMPL_S3S("__itex2DGrad_sparse",
+ (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy,
+ unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.grad.2d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x),
+ "f"(__dPdy->y)));
+
+__IMPL_S("__tex2DLayered_v2", "__tex2DLayered_rmnf_v2",
+ (float __x, float __y, int __layer), "tex.a2d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
+ ("r"(__layer), "f"(__x), "f"(__y)));
+__IMPL_ALIAS("__itex2DLayered", "__tex2DLayered_v2");
+
+__IMPL_S3S("__itex2DLayered_sparse",
+ (float __x, float __y, int __layer, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.a2d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("r"(__layer), "f"(__x), "f"(__y)));
+
+__IMPL_S("__tex2DLayeredGrad_v2", "__tex2DLayeredGrad_rmnf_v2",
+ (float __x, float __y, int __layer, const float2 *__dPdx,
+ const float2 *__dPdy),
+ "tex.grad.a2d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
+ "f"(__dPdy->x), "f"(__dPdy->y)));
+__IMPL_ALIAS("__itex2DLayeredGrad_v2", "__tex2DLayeredGrad_v2");
+
+__IMPL_S3S(
+ "__itex2DLayeredGrad_sparse",
+ (float __x, float __y, int __layer, const float2 *__dPdx,
+ const float2 *__dPdy, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.grad.a2d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
+ "f"(__dPdy->x), "f"(__dPdy->y)));
+
+__IMPL_S("__tex2DLayeredLod_v2", "__tex2DLayeredLod_rmnf_v2",
+ (float __x, float __y, int __layer, float __level), "tex.level.a2d.v4",
+ "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__level)));
+__IMPL_ALIAS("__itex2DLayeredLod", "__tex2DLayeredLod_v2");
+
+__IMPL_S3S("__itex2DLayeredLod_sparse",
+ (float __x, float __y, int __layer, float __level,
+ unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.level.a2d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__level)));
+
+__IMPL_S("__tex2DLod_v2", "__tex2DLod_rmnf_v2",
+ (float __x, float __y, float __level), "tex.level.2d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
+ ("f"(__x), "f"(__y), "f"(__level)));
+__IMPL_ALIAS("__itex2DLod", "__tex2DLod_v2");
+
+__IMPL_S3S("__itex2DLod_sparse",
+ (float __x, float __y, float __level, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.level.2d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y), "f"(__level)));
+
+// 2D gather is special. Unlike other variants that translate into exactly one
+// asm instruction, it uses one of the four different instructions selected by
+// __comp. We implement each instruction variant separately, and dispatch the
+// right one from the manually implemented 'umbrella' fetch.
+#define __IMPL_2DGATHER(variant, instr) \
+ __IMPL_SI(__IDV("__tex2Dgather_v2", variant), \
+ __IDV("__tex2Dgather_rmnf_v2", variant), \
+ (float __x, float __y, int __comp), instr, "f32", \
+ "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); \
+ __IMPL_ALIASI(__IDV("__itex2Dgather", variant), \
+ __IDV("__tex2Dgather_v2", variant)); \
+ __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant), \
+ (float __x, float __y, unsigned char *__ir, int __comp), \
+ "{.reg .pred %%p0;\n\t" instr, "f32", \
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" \
+ "selp.u16 %4, 1, 0, %%p0; }", \
+ ("f"(__x), "f"(__y)));
+__IMPL_2DGATHER(0, "tld4.r.2d.v4");
+__IMPL_2DGATHER(1, "tld4.g.2d.v4");
+__IMPL_2DGATHER(2, "tld4.b.2d.v4");
+__IMPL_2DGATHER(3, "tld4.a.2d.v4");
+
+// Umbrella dispatcher -- calls into specific 2Dgather variant.
+template <> struct __tex_fetch_v4<__ID("__tex2Dgather_v2")> {
+ template <class __T>
+ __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y,
+ int __comp) {
+ switch (__comp) {
+ case 0:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 0)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ case 1:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 1)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ case 2:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 2)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ case 3:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 3)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ }
+ }
+};
+__IMPL_ALIAS("__itex2Dgather", "__tex2Dgather_v2");
+
+template <> struct __tex_fetch_v4<__ID("__tex2Dgather_rmnf_v2")> {
+ template <class __T>
+ __device__ static float4 __run(cudaTextureObject_t __obj, float __x,
+ float __y, int __comp) {
+ switch (__comp) {
+ case 0:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 0)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ case 1:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 1)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ case 2:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 2)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ case 3:
+ return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 3)>::__run<__T>(
+ __obj, __x, __y, __comp);
+ }
+ }
+};
+
+#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
+template <> struct __tex_fetch_v4<__ID("__itex2Dgather_sparse")> {
+ template <class __T>
+ __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y,
+ unsigned char *__ir, int __comp) {
+ switch (__comp) {
+ case 0:
+ return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 0)>::__run<__T>(
+ __obj, __x, __y, __ir, __comp);
+ case 1:
+ return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 1)>::__run<__T>(
+ __obj, __x, __y, __ir, __comp);
+ case 2:
+ return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 2)>::__run<__T>(
+ __obj, __x, __y, __ir, __comp);
+ case 3:
+ return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 3)>::__run<__T>(
+ __obj, __x, __y, __ir, __comp);
+ }
+ }
+};
+#endif
+
+// 3D
+__IMPL_S("__tex3D_v2", "__tex3D_rmnf_v2", (float __x, float __y, float __z),
+ "tex.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
+ ("f"(__x), "f"(__y), "f"(__z)));
+__IMPL_ALIAS("__itex3D", "__tex3D_v2");
+
+__IMPL_S3S("__itex3D_sparse",
+ (float __x, float __y, float __z, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.3d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y), "f"(__z)));
+
+__IMPL_S("__tex3DGrad_v2", "__tex3DGrad_rmnf_v2",
+ (float __x, float __y, float __z, const float4 *__dPdx,
+ const float4 *__dPdy),
+ "tex.grad.3d.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
+ "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
+ ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y),
+ "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)));
+__IMPL_ALIAS("__itex3DGrad_v2", "__tex3DGrad_v2");
+
+__IMPL_S3S("__itex3DGrad_sparse",
+ (float __x, float __y, float __z, const float4 *__dPdx,
+ const float4 *__dPdy, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.grad.3d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
+ "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y),
+ "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)));
+
+__IMPL_S("__tex3DLod_v2", "__tex3DLod_rmnf_v2",
+ (float __x, float __y, float __z, float __level), "tex.level.3d.v4",
+ "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
+ ("f"(__x), "f"(__y), "f"(__z), "f"(__level)));
+__IMPL_ALIAS("__itex3DLod", "__tex3DLod_v2");
+
+__IMPL_S3S("__itex3DLod_sparse",
+ (float __x, float __y, float __z, float __level,
+ unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.level.3d.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y), "f"(__z), "f"(__level)));
+
+// Cubemap
+__IMPL_S("__texCubemap_v2", "__texCubemap_rmnf_v2",
+ (float __x, float __y, float __z), "tex.cube.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
+ ("f"(__x), "f"(__y), "f"(__z)));
+__IMPL_ALIAS("__itexCubemap", "__texCubemap_v2");
+
+__IMPL_S3S("__itexCubemap_sparse",
+ (float __x, float __y, float __z, unsigned char *__ir),
+ "{.reg .pred %%p0;\n\t"
+ "tex.cube.v4",
+ "f32",
+ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
+ "selp.u16 %4, 1, 0, %%p0; }",
+ ("f"(__x), "f"(__y), "f"(__z)));
+
+__IMPL_S("__texCubemapGrad_v2", "__texCubemapGrad_rmnf_v2",
+ (float __x, float __y, float __z, const float4 *__dPdx,
+ const float4 *__dPdy),
+ "tex.grad.cube.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
+ "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
+ ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y),
+ "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)));
+__IMPL_ALIAS("__itexCubemapGrad_v2", "__texCubemapGrad_v2");
+
+__IMPL_S("__texCubemapLayered_v2", "__texCubemapLayered_rmnf_v2",
+ (float __x, float __y, float __z, int __layer), "tex.acube.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__z)));
+__IMPL_ALIAS("__itexCubemapLayered", "__texCubemapLayered_v2");
+
+__IMPL_S("__texCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_rmnf_v2",
+ (float __x, float __y, float __z, int __layer, const float4 *__dPdx,
+ const float4 *__dPdy),
+ "tex.grad.acube.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
+ "{%9, %10, %11, %11}, {%12, %13, %14, %14};",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
+ "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
+ "f"(__dPdy->z)));
+__IMPL_ALIAS("__itexCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_v2");
+
+__IMPL_S("__texCubemapLayeredLod_v2", "__texCubemapLayeredLod_rmnf_v2",
+ (float __x, float __y, float __z, int __layer, float __level),
+ "tex.level.acube.v4", "f32",
+ "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;",
+ ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)));
+__IMPL_ALIAS("__itexCubemapLayeredLod", "__texCubemapLayeredLod_v2");
+
+__IMPL_S("__texCubemapLod_v2", "__texCubemapLod_rmnf_v2",
+ (float __x, float __y, float __z, float __level), "tex.level.cube.v4",
+ "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
+ ("f"(__x), "f"(__y), "f"(__z), "f"(__level)));
+__IMPL_ALIAS("__itexCubemapLod", "__texCubemapLod_v2");
+
+// Helper class for extracting slice of data from V4 fetch results.
+template <class __DestT, class __SrcT> struct __convert {
+ template <int __NElements = sizeof(__DestT) /
+ sizeof(typename __TypeInfoT<__DestT>::__base_t)>
+ __device__ static __DestT __run(__SrcT __v);
+ template <> __device__ static __DestT __run<1>(__SrcT __v) { return {__v.x}; }
+ template <> __device__ static __DestT __run<2>(__SrcT __v) {
+ return {__v.x, __v.y};
+ }
+ template <> __device__ static __DestT __run<3>(__SrcT __v) {
+ return {__v.x, __v.y, __v.z};
+ }
+ template <> __device__ static __DestT __run<4>(__SrcT __v) {
+ return {__v.x, __v.y, __v.z, __v.w};
+ }
+};
+
+// These are the top-level function overloads the __nv_tex_surf_handler expands
+// to. Each overload deals with one of the several ways __nv_tex_surf_handler
+// is called by CUDA headers. In the end, each of the overloads does the same
+// job -- it figures out which `__tex_fetch_v4::run` variant should be used to
+// fetch texture data and which `__convert::run` is needed to convert it into
+// appropriate return type.
+
+// __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...);
+// Data type and return type are based on ret.
+template <class __op, class __T, class... __Args>
+__device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
+ __Args... __args) {
+ using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
+ *__ptr = __convert<__T, __FetchT>::__run(
+ __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...));
+}
+
+// texture<> objects get magically converted into a texture reference. However,
+// there's no way to convert them to cudaTextureObject_t on C++ level. So, we
+// cheat a bit and use inline assembly to do it. It costs us an extra register
+// and a move, but that is easy for ptxas to optimize away.
+template <class __T>
+__device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) {
+ cudaTextureObject_t __obj;
+ asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle));
+ return __obj;
+}
+
+// __nv_tex_surf_handler ("__tex...", &ret, textureReference, args...);
+// Data type and return type is based on ret.
+template <class __op, class __T, class __HandleT, class... __Args>
+__device__ static void __tex_fetch(__T *__ptr, __HandleT __handle,
+ __Args... __args) {
+ using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
+ *__ptr = __convert<__T, __FetchT>::__run(
+ __tex_fetch_v4<__op>::template __run<__FetchT>(
+ __tex_handle_to_obj(__handle), __args...));
+}
+
+// __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...);
+// cudaReadModeNormalizedFloat fetches always return float4.
+template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
+__device__ static void
+__tex_fetch(__DataT *, __RetT *__ptr,
+ texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle,
+ __Args... __args) {
+ using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t;
+ *__ptr = __convert<__RetT, float4>::__run(
+ __tex_fetch_v4<__op>::template __run<__FetchT>(
+ __tex_handle_to_obj(__handle), __args...));
+}
+
+// __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...);
+// For cudaReadModeElementType fetch return type is based on type_dummy.
+template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
+__device__ static void
+__tex_fetch(__DataT *, __RetT *__ptr,
+ texture<__DataT, __TexT, cudaReadModeElementType> __handle,
+ __Args... __args) {
+ using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t;
+ *__ptr = __convert<__RetT, __FetchT>::__run(
+ __tex_fetch_v4<__op>::template __run<__FetchT>(
+ __tex_handle_to_obj(__handle), __args...));
+}
+} // namespace __cuda_tex
+} // namespace
+#pragma pop_macro("__ASM_OUT")
+#pragma pop_macro("__ASM_OUTP")
+#pragma pop_macro("__Args")
+#pragma pop_macro("__ID")
+#pragma pop_macro("__IDV")
+#pragma pop_macro("__IMPL_2DGATHER")
+#pragma pop_macro("__IMPL_ALIAS")
+#pragma pop_macro("__IMPL_ALIASI")
+#pragma pop_macro("__IMPL_F1")
+#pragma pop_macro("__IMPL_F3")
+#pragma pop_macro("__IMPL_F3N")
+#pragma pop_macro("__IMPL_F3S")
+#pragma pop_macro("__IMPL_S")
+#pragma pop_macro("__IMPL_S3")
+#pragma pop_macro("__IMPL_S3I")
+#pragma pop_macro("__IMPL_S3N")
+#pragma pop_macro("__IMPL_S3NI")
+#pragma pop_macro("__IMPL_S3S")
+#pragma pop_macro("__IMPL_S3SI")
+#pragma pop_macro("__IMPL_SI")
+#pragma pop_macro("__L")
+#pragma pop_macro("__STRIP_PARENS")
+#endif // __CLANG_CUDA_TEXTURE_INTRINSICS_H__
diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h
index 7342705434e6..d488db0a94d9 100644
--- a/clang/lib/Headers/__clang_hip_cmath.h
+++ b/clang/lib/Headers/__clang_hip_cmath.h
@@ -10,7 +10,7 @@
#ifndef __CLANG_HIP_CMATH_H__
#define __CLANG_HIP_CMATH_H__
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
#endif
@@ -25,31 +25,43 @@
#endif // !defined(__HIPCC_RTC__)
#pragma push_macro("__DEVICE__")
+#pragma push_macro("__CONSTEXPR__")
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static __attribute__((always_inline, nothrow))
+#define __CONSTEXPR__ constexpr
+#else
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#define __CONSTEXPR__
+#endif // __OPENMP_AMDGCN__
// Start with functions that cannot be defined by DEF macros below.
#if defined(__cplusplus)
-__DEVICE__ double abs(double __x) { return ::fabs(__x); }
-__DEVICE__ float abs(float __x) { return ::fabsf(__x); }
-__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
-__DEVICE__ long abs(long __n) { return ::labs(__n); }
-__DEVICE__ float fma(float __x, float __y, float __z) {
+#if defined __OPENMP_AMDGCN__
+__DEVICE__ __CONSTEXPR__ float fabs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ float sin(float __x) { return ::sinf(__x); }
+__DEVICE__ __CONSTEXPR__ float cos(float __x) { return ::cosf(__x); }
+#endif
+__DEVICE__ __CONSTEXPR__ double abs(double __x) { return ::fabs(__x); }
+__DEVICE__ __CONSTEXPR__ float abs(float __x) { return ::fabsf(__x); }
+__DEVICE__ __CONSTEXPR__ long long abs(long long __n) { return ::llabs(__n); }
+__DEVICE__ __CONSTEXPR__ long abs(long __n) { return ::labs(__n); }
+__DEVICE__ __CONSTEXPR__ float fma(float __x, float __y, float __z) {
return ::fmaf(__x, __y, __z);
}
#if !defined(__HIPCC_RTC__)
// The value returned by fpclassify is platform dependent, therefore it is not
// supported by hipRTC.
-__DEVICE__ int fpclassify(float __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(float __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
-__DEVICE__ int fpclassify(double __x) {
+__DEVICE__ __CONSTEXPR__ int fpclassify(double __x) {
return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
FP_ZERO, __x);
}
#endif // !defined(__HIPCC_RTC__)
-__DEVICE__ float frexp(float __arg, int *__exp) {
+__DEVICE__ __CONSTEXPR__ float frexp(float __arg, int *__exp) {
return ::frexpf(__arg, __exp);
}
@@ -71,93 +83,101 @@ __DEVICE__ float frexp(float __arg, int *__exp) {
// of the variants inside the inner region and avoid the clash.
#pragma omp begin declare variant match(implementation = {vendor(llvm)})
-__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ int isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ int isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ int isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ int isnan(double __x) { return ::__isnan(__x); }
#pragma omp end declare variant
#endif // defined(__OPENMP_AMDGCN__)
-__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
-__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
-__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
-__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); }
-__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
-__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(float __x) { return ::__isinff(__x); }
+__DEVICE__ __CONSTEXPR__ bool isinf(double __x) { return ::__isinf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(float __x) { return ::__finitef(__x); }
+__DEVICE__ __CONSTEXPR__ bool isfinite(double __x) { return ::__finite(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(float __x) { return ::__isnanf(__x); }
+__DEVICE__ __CONSTEXPR__ bool isnan(double __x) { return ::__isnan(__x); }
#if defined(__OPENMP_AMDGCN__)
#pragma omp end declare variant
#endif // defined(__OPENMP_AMDGCN__)
-__DEVICE__ bool isgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(float __x, float __y) {
return __builtin_isgreater(__x, __y);
}
-__DEVICE__ bool isgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreater(double __x, double __y) {
return __builtin_isgreater(__x, __y);
}
-__DEVICE__ bool isgreaterequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(float __x, float __y) {
return __builtin_isgreaterequal(__x, __y);
}
-__DEVICE__ bool isgreaterequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isgreaterequal(double __x, double __y) {
return __builtin_isgreaterequal(__x, __y);
}
-__DEVICE__ bool isless(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(float __x, float __y) {
return __builtin_isless(__x, __y);
}
-__DEVICE__ bool isless(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isless(double __x, double __y) {
return __builtin_isless(__x, __y);
}
-__DEVICE__ bool islessequal(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(float __x, float __y) {
return __builtin_islessequal(__x, __y);
}
-__DEVICE__ bool islessequal(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessequal(double __x, double __y) {
return __builtin_islessequal(__x, __y);
}
-__DEVICE__ bool islessgreater(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(float __x, float __y) {
return __builtin_islessgreater(__x, __y);
}
-__DEVICE__ bool islessgreater(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool islessgreater(double __x, double __y) {
return __builtin_islessgreater(__x, __y);
}
-__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
-__DEVICE__ bool isunordered(float __x, float __y) {
+__DEVICE__ __CONSTEXPR__ bool isnormal(float __x) {
+ return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isnormal(double __x) {
+ return __builtin_isnormal(__x);
+}
+__DEVICE__ __CONSTEXPR__ bool isunordered(float __x, float __y) {
return __builtin_isunordered(__x, __y);
}
-__DEVICE__ bool isunordered(double __x, double __y) {
+__DEVICE__ __CONSTEXPR__ bool isunordered(double __x, double __y) {
return __builtin_isunordered(__x, __y);
}
-__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
-__DEVICE__ float pow(float __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ float modf(float __x, float *__iptr) {
+ return ::modff(__x, __iptr);
+}
+__DEVICE__ __CONSTEXPR__ float pow(float __base, int __iexp) {
return ::powif(__base, __iexp);
}
-__DEVICE__ double pow(double __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ double pow(double __base, int __iexp) {
return ::powi(__base, __iexp);
}
-__DEVICE__ float remquo(float __x, float __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__ float remquo(float __x, float __y, int *__quo) {
return ::remquof(__x, __y, __quo);
}
-__DEVICE__ float scalbln(float __x, long int __n) {
+__DEVICE__ __CONSTEXPR__ float scalbln(float __x, long int __n) {
return ::scalblnf(__x, __n);
}
-__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
-__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(float __x) { return ::__signbitf(__x); }
+__DEVICE__ __CONSTEXPR__ bool signbit(double __x) { return ::__signbit(__x); }
// Notably missing above is nexttoward. We omit it because
// ocml doesn't provide an implementation, and we don't want to be in the
// business of implementing tricky libm functions in this header.
// Other functions.
-__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) {
+__DEVICE__ __CONSTEXPR__ _Float16 fma(_Float16 __x, _Float16 __y,
+ _Float16 __z) {
return __ocml_fma_f16(__x, __y, __z);
}
-__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
+__DEVICE__ __CONSTEXPR__ _Float16 pow(_Float16 __base, int __iexp) {
return __ocml_pown_f16(__base, __iexp);
}
+#ifndef __OPENMP_AMDGCN__
// BEGIN DEF_FUN and HIP_OVERLOAD
// BEGIN DEF_FUN
@@ -168,18 +188,19 @@ __DEVICE__ _Float16 pow(_Float16 __base, int __iexp) {
// Define cmath functions with float argument and returns __retty.
#define __DEF_FUN1(__retty, __func) \
- __DEVICE__ \
- __retty __func(float __x) { return __func##f(__x); }
+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x) { return __func##f(__x); }
// Define cmath functions with two float arguments and returns __retty.
#define __DEF_FUN2(__retty, __func) \
- __DEVICE__ \
- __retty __func(float __x, float __y) { return __func##f(__x, __y); }
+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, float __y) { \
+ return __func##f(__x, __y); \
+ }
// Define cmath functions with a float and an int argument and returns __retty.
#define __DEF_FUN2_FI(__retty, __func) \
- __DEVICE__ \
- __retty __func(float __x, int __y) { return __func##f(__x, __y); }
+ __DEVICE__ __CONSTEXPR__ __retty __func(float __x, int __y) { \
+ return __func##f(__x, __y); \
+ }
__DEF_FUN1(float, acos)
__DEF_FUN1(float, acosh)
@@ -426,7 +447,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
// floor(double).
#define __HIP_OVERLOAD1(__retty, __fn) \
template <typename __T> \
- __DEVICE__ \
+ __DEVICE__ __CONSTEXPR__ \
typename __hip_enable_if<__hip::is_integral<__T>::value, __retty>::type \
__fn(__T __x) { \
return ::__fn((double)__x); \
@@ -438,7 +459,7 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
#if __cplusplus >= 201103L
#define __HIP_OVERLOAD2(__retty, __fn) \
template <typename __T1, typename __T2> \
- __DEVICE__ typename __hip_enable_if< \
+ __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \
typename __hip::__promote<__T1, __T2>::type>::type \
__fn(__T1 __x, __T2 __y) { \
@@ -448,10 +469,11 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {};
#else
#define __HIP_OVERLOAD2(__retty, __fn) \
template <typename __T1, typename __T2> \
- __DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
- __hip::is_arithmetic<__T2>::value, \
- __retty>::type \
- __fn(__T1 __x, __T2 __y) { \
+ __DEVICE__ __CONSTEXPR__ \
+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \
+ __hip::is_arithmetic<__T2>::value, \
+ __retty>::type \
+ __fn(__T1 __x, __T2 __y) { \
return __fn((double)__x, (double)__y); \
}
#endif
@@ -526,7 +548,7 @@ __HIP_OVERLOAD2(double, min)
// Additional Overloads that don't quite match HIP_OVERLOAD.
#if __cplusplus >= 201103L
template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<
+__DEVICE__ __CONSTEXPR__ typename __hip_enable_if<
__hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
__hip::is_arithmetic<__T3>::value,
typename __hip::__promote<__T1, __T2, __T3>::type>::type
@@ -536,31 +558,32 @@ fma(__T1 __x, __T2 __y, __T3 __z) {
}
#else
template <typename __T1, typename __T2, typename __T3>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
- __hip::is_arithmetic<__T2>::value &&
- __hip::is_arithmetic<__T3>::value,
- double>::type
-fma(__T1 __x, __T2 __y, __T3 __z) {
+__DEVICE__ __CONSTEXPR__
+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+ __hip::is_arithmetic<__T2>::value &&
+ __hip::is_arithmetic<__T3>::value,
+ double>::type
+ fma(__T1 __x, __T2 __y, __T3 __z) {
return ::fma((double)__x, (double)__y, (double)__z);
}
#endif
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
frexp(__T __x, int *__exp) {
return ::frexp((double)__x, __exp);
}
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
ldexp(__T __x, int __exp) {
return ::ldexp((double)__x, __exp);
}
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
modf(__T __x, double *__exp) {
return ::modf((double)__x, __exp);
@@ -568,7 +591,7 @@ __DEVICE__
#if __cplusplus >= 201103L
template <typename __T1, typename __T2>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
__hip::is_arithmetic<__T2>::value,
typename __hip::__promote<__T1, __T2>::type>::type
@@ -578,23 +601,24 @@ __DEVICE__
}
#else
template <typename __T1, typename __T2>
-__DEVICE__ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
- __hip::is_arithmetic<__T2>::value,
- double>::type
-remquo(__T1 __x, __T2 __y, int *__quo) {
+__DEVICE__ __CONSTEXPR__
+ typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
+ __hip::is_arithmetic<__T2>::value,
+ double>::type
+ remquo(__T1 __x, __T2 __y, int *__quo) {
return ::remquo((double)__x, (double)__y, __quo);
}
#endif
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
scalbln(__T __x, long int __exp) {
return ::scalbln((double)__x, __exp);
}
template <typename __T>
-__DEVICE__
+__DEVICE__ __CONSTEXPR__
typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
scalbn(__T __x, int __exp) {
return ::scalbn((double)__x, __exp);
@@ -607,8 +631,10 @@ __DEVICE__
// END DEF_FUN and HIP_OVERLOAD
+#endif // ifndef __OPENMP_AMDGCN__
#endif // defined(__cplusplus)
+#ifndef __OPENMP_AMDGCN__
// Define these overloads inside the namespace our standard library uses.
#if !defined(__HIPCC_RTC__)
#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
@@ -781,22 +807,26 @@ _GLIBCXX_END_NAMESPACE_VERSION
#if defined(__cplusplus)
extern "C" {
#endif // defined(__cplusplus)
-__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Cosh(double x,
+ double y) {
return cosh(x) * y;
}
-__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FCosh(float x,
+ float y) {
return coshf(x) * y;
}
-__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _Dtest(double *p) {
return fpclassify(*p);
}
-__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) short _FDtest(float *p) {
return fpclassify(*p);
}
-__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) double _Sinh(double x,
+ double y) {
return sinh(x) * y;
}
-__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
+__DEVICE__ __CONSTEXPR__ __attribute__((overloadable)) float _FSinh(float x,
+ float y) {
return sinhf(x) * y;
}
#if defined(__cplusplus)
@@ -804,7 +834,9 @@ __DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) {
#endif // defined(__cplusplus)
#endif // defined(_MSC_VER)
#endif // !defined(__HIPCC_RTC__)
+#endif // ifndef __OPENMP_AMDGCN__
#pragma pop_macro("__DEVICE__")
+#pragma pop_macro("__CONSTEXPR__")
#endif // __CLANG_HIP_CMATH_H__
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 1f0982d92eff..ef7e087b832c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -9,7 +9,7 @@
#ifndef __CLANG_HIP_MATH_H__
#define __CLANG_HIP_MATH_H__
-#if !defined(__HIP__)
+#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
#error "This file is for HIP and OpenMP AMDGCN device compilation only."
#endif
@@ -19,18 +19,30 @@
#endif
#include <limits.h>
#include <stdint.h>
-#endif // __HIPCC_RTC__
+#ifdef __OPENMP_AMDGCN__
+#include <omp.h>
+#endif
+#endif // !defined(__HIPCC_RTC__)
#pragma push_macro("__DEVICE__")
+
+#ifdef __OPENMP_AMDGCN__
+#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
+#else
#define __DEVICE__ static __device__ inline __attribute__((always_inline))
+#endif
// A few functions return bool type starting only in C++11.
#pragma push_macro("__RETURN_TYPE")
+#ifdef __OPENMP_AMDGCN__
+#define __RETURN_TYPE int
+#else
#if defined(__cplusplus)
#define __RETURN_TYPE bool
#else
#define __RETURN_TYPE int
#endif
+#endif // __OPENMP_AMDGCN__
#if defined (__cplusplus) && __cplusplus < 201103L
// emulate static_assert on type sizes
@@ -249,6 +261,9 @@ float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
__DEVICE__
float frexpf(float __x, int *__nptr) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
float __r =
__ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
*__nptr = __tmp;
@@ -334,6 +349,9 @@ long int lroundf(float __x) { return __ocml_round_f32(__x); }
__DEVICE__
float modff(float __x, float *__iptr) {
float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
float __r =
__ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__iptr = __tmp;
@@ -414,6 +432,9 @@ float remainderf(float __x, float __y) {
__DEVICE__
float remquof(float __x, float __y, int *__quo) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
float __r = __ocml_remquo_f32(
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
*__quo = __tmp;
@@ -470,6 +491,9 @@ __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
__DEVICE__
void sincosf(float __x, float *__sinptr, float *__cosptr) {
float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr =
__ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
@@ -478,6 +502,9 @@ void sincosf(float __x, float *__sinptr, float *__cosptr) {
__DEVICE__
void sincospif(float __x, float *__sinptr, float *__cosptr) {
float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr = __ocml_sincospi_f32(
__x, (__attribute__((address_space(5))) float *)&__tmp);
*__cosptr = __tmp;
@@ -790,6 +817,9 @@ double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
__DEVICE__
double frexp(double __x, int *__nptr) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
double __r =
__ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
*__nptr = __tmp;
@@ -874,6 +904,9 @@ long int lround(double __x) { return __ocml_round_f64(__x); }
__DEVICE__
double modf(double __x, double *__iptr) {
double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
double __r =
__ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
*__iptr = __tmp;
@@ -962,6 +995,9 @@ double remainder(double __x, double __y) {
__DEVICE__
double remquo(double __x, double __y, int *__quo) {
int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
double __r = __ocml_remquo_f64(
__x, __y, (__attribute__((address_space(5))) int *)&__tmp);
*__quo = __tmp;
@@ -1020,6 +1056,9 @@ double sin(double __x) { return __ocml_sin_f64(__x); }
__DEVICE__
void sincos(double __x, double *__sinptr, double *__cosptr) {
double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr = __ocml_sincos_f64(
__x, (__attribute__((address_space(5))) double *)&__tmp);
*__cosptr = __tmp;
@@ -1028,6 +1067,9 @@ void sincos(double __x, double *__sinptr, double *__cosptr) {
__DEVICE__
void sincospi(double __x, double *__sinptr, double *__cosptr) {
double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
*__sinptr = __ocml_sincospi_f64(
__x, (__attribute__((address_space(5))) double *)&__tmp);
*__cosptr = __tmp;
@@ -1262,7 +1304,7 @@ float min(float __x, float __y) { return fminf(__x, __y); }
__DEVICE__
double min(double __x, double __y) { return fmin(__x, __y); }
-#if !defined(__HIPCC_RTC__)
+#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
__host__ inline static int min(int __arg1, int __arg2) {
return std::min(__arg1, __arg2);
}
@@ -1270,7 +1312,7 @@ __host__ inline static int min(int __arg1, int __arg2) {
__host__ inline static int max(int __arg1, int __arg2) {
return std::max(__arg1, __arg2);
}
-#endif // __HIPCC_RTC__
+#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
#endif
#pragma pop_macro("__DEVICE__")
diff --git a/clang/lib/Headers/__wmmintrin_aes.h b/clang/lib/Headers/__wmmintrin_aes.h
index f540319c7fd2..3010b38711e6 100644
--- a/clang/lib/Headers/__wmmintrin_aes.h
+++ b/clang/lib/Headers/__wmmintrin_aes.h
@@ -133,7 +133,7 @@ _mm_aesimc_si128(__m128i __V)
/// An 8-bit round constant used to generate the AES encryption key.
/// \returns A 128-bit round key for AES encryption.
#define _mm_aeskeygenassist_si128(C, R) \
- (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))
+ ((__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R)))
#undef __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 0dd8c859366b..fb808d7b0a4f 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -1810,6 +1810,11 @@ vec_cmpeq(vector unsigned __int128 __a, vector unsigned __int128 __b) {
return (vector bool __int128)__builtin_altivec_vcmpequq(
(vector bool __int128)__a, (vector bool __int128)__b);
}
+
+static __inline__ vector bool __int128 __ATTRS_o_ai
+vec_cmpeq(vector bool __int128 __a, vector bool __int128 __b) {
+ return (vector bool __int128)__builtin_altivec_vcmpequq(__a, __b);
+}
#endif
#ifdef __POWER9_VECTOR__
@@ -1887,6 +1892,11 @@ vec_cmpne(vector signed __int128 __a, vector signed __int128 __b) {
return (vector bool __int128) ~(__builtin_altivec_vcmpequq(
(vector bool __int128)__a, (vector bool __int128)__b));
}
+
+static __inline__ vector bool __int128 __ATTRS_o_ai
+vec_cmpne(vector bool __int128 __a, vector bool __int128 __b) {
+ return (vector bool __int128) ~(__builtin_altivec_vcmpequq(__a, __b));
+}
#endif
/* vec_cmpnez */
@@ -2472,7 +2482,7 @@ vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {
#ifdef __POWER8_VECTOR__
/* vec_popcnt */
-static __inline__ vector signed char __ATTRS_o_ai
+static __inline__ vector unsigned char __ATTRS_o_ai
vec_popcnt(vector signed char __a) {
return __builtin_altivec_vpopcntb(__a);
}
@@ -2480,7 +2490,7 @@ static __inline__ vector unsigned char __ATTRS_o_ai
vec_popcnt(vector unsigned char __a) {
return __builtin_altivec_vpopcntb(__a);
}
-static __inline__ vector signed short __ATTRS_o_ai
+static __inline__ vector unsigned short __ATTRS_o_ai
vec_popcnt(vector signed short __a) {
return __builtin_altivec_vpopcnth(__a);
}
@@ -2488,7 +2498,7 @@ static __inline__ vector unsigned short __ATTRS_o_ai
vec_popcnt(vector unsigned short __a) {
return __builtin_altivec_vpopcnth(__a);
}
-static __inline__ vector signed int __ATTRS_o_ai
+static __inline__ vector unsigned int __ATTRS_o_ai
vec_popcnt(vector signed int __a) {
return __builtin_altivec_vpopcntw(__a);
}
@@ -2496,7 +2506,7 @@ static __inline__ vector unsigned int __ATTRS_o_ai
vec_popcnt(vector unsigned int __a) {
return __builtin_altivec_vpopcntw(__a);
}
-static __inline__ vector signed long long __ATTRS_o_ai
+static __inline__ vector unsigned long long __ATTRS_o_ai
vec_popcnt(vector signed long long __a) {
return __builtin_altivec_vpopcntd(__a);
}
@@ -3049,13 +3059,10 @@ static __inline__ vector unsigned char __ATTRS_o_ai
vec_xl_len_r(const unsigned char *__a, size_t __b) {
vector unsigned char __res =
(vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56));
-#ifdef __LITTLE_ENDIAN__
vector unsigned char __mask =
(vector unsigned char)__builtin_altivec_lvsr(16 - __b, (int *)NULL);
- __res = (vector unsigned char)__builtin_altivec_vperm_4si(
+ return (vector unsigned char)__builtin_altivec_vperm_4si(
(vector int)__res, (vector int)__res, __mask);
-#endif
- return __res;
}
// vec_xst_len
@@ -3130,15 +3137,11 @@ static __inline__ void __ATTRS_o_ai vec_xst_len(vector double __a, double *__b,
static __inline__ void __ATTRS_o_ai vec_xst_len_r(vector unsigned char __a,
unsigned char *__b,
size_t __c) {
-#ifdef __LITTLE_ENDIAN__
vector unsigned char __mask =
(vector unsigned char)__builtin_altivec_lvsl(16 - __c, (int *)NULL);
vector unsigned char __res =
__builtin_altivec_vperm_4si((vector int)__a, (vector int)__a, __mask);
return __builtin_vsx_stxvll((vector int)__res, __b, (__c << 56));
-#else
- return __builtin_vsx_stxvll((vector int)__a, __b, (__c << 56));
-#endif
}
#endif
#endif
@@ -7106,6 +7109,11 @@ vec_orc(vector float __a, vector bool int __b) {
return (vector float)((vector unsigned int)__a | ~__b);
}
+static __inline__ vector float __ATTRS_o_ai vec_orc(vector float __a,
+ vector float __b) {
+ return (vector float)((vector unsigned int)__a | ~(vector unsigned int)__b);
+}
+
static __inline__ vector signed long long __ATTRS_o_ai
vec_orc(vector signed long long __a, vector signed long long __b) {
return __a | ~__b;
@@ -7150,6 +7158,12 @@ static __inline__ vector double __ATTRS_o_ai
vec_orc(vector bool long long __a, vector double __b) {
return (vector double)(__a | ~(vector unsigned long long)__b);
}
+
+static __inline__ vector double __ATTRS_o_ai vec_orc(vector double __a,
+ vector double __b) {
+ return (vector double)((vector bool long long)__a |
+ ~(vector unsigned long long)__b);
+}
#endif
/* vec_vor */
@@ -8839,7 +8853,7 @@ static __inline__ vector long long __ATTRS_o_ai
vec_sl(vector long long __a, vector unsigned long long __b) {
return (vector long long)vec_sl((vector unsigned long long)__a, __b);
}
-#else
+#elif defined(__VSX__)
static __inline__ vector unsigned char __ATTRS_o_ai
vec_vspltb(vector unsigned char __a, unsigned char __b);
static __inline__ vector unsigned long long __ATTRS_o_ai
@@ -8885,7 +8899,7 @@ static __inline__ vector long long __ATTRS_o_ai
vec_sl(vector long long __a, vector unsigned long long __b) {
return (vector long long)vec_sl((vector unsigned long long)__a, __b);
}
-#endif
+#endif /* __VSX__ */
/* vec_vslb */
@@ -10350,7 +10364,7 @@ static __inline__ vector long long __ATTRS_o_ai
vec_sr(vector long long __a, vector unsigned long long __b) {
return (vector long long)vec_sr((vector unsigned long long)__a, __b);
}
-#else
+#elif defined(__VSX__)
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
__b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__);
@@ -10394,7 +10408,7 @@ static __inline__ vector long long __ATTRS_o_ai
vec_sr(vector long long __a, vector unsigned long long __b) {
return (vector long long)vec_sr((vector unsigned long long)__a, __b);
}
-#endif
+#endif /* __VSX__ */
/* vec_vsrb */
@@ -10480,7 +10494,7 @@ static __inline__ vector unsigned long long __ATTRS_o_ai
vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
return (vector unsigned long long)((vector signed long long)__a >> __b);
}
-#else
+#elif defined(__VSX__)
static __inline__ vector signed long long __ATTRS_o_ai
vec_sra(vector signed long long __a, vector unsigned long long __b) {
__b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__);
@@ -10492,7 +10506,7 @@ vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
__b %= (vector unsigned long long)(sizeof(unsigned long long) * __CHAR_BIT__);
return (vector unsigned long long)((vector signed long long)__a >> __b);
}
-#endif
+#endif /* __VSX__ */
/* vec_vsrab */
@@ -13441,74 +13455,74 @@ vec_vxor(vector bool long long __a, vector bool long long __b) {
/* vec_extract */
static __inline__ signed char __ATTRS_o_ai vec_extract(vector signed char __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0xf];
}
static __inline__ unsigned char __ATTRS_o_ai
-vec_extract(vector unsigned char __a, unsigned int __b) {
+vec_extract(vector unsigned char __a, signed int __b) {
return __a[__b & 0xf];
}
static __inline__ unsigned char __ATTRS_o_ai vec_extract(vector bool char __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0xf];
}
static __inline__ signed short __ATTRS_o_ai vec_extract(vector signed short __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x7];
}
static __inline__ unsigned short __ATTRS_o_ai
-vec_extract(vector unsigned short __a, unsigned int __b) {
+vec_extract(vector unsigned short __a, signed int __b) {
return __a[__b & 0x7];
}
static __inline__ unsigned short __ATTRS_o_ai vec_extract(vector bool short __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x7];
}
static __inline__ signed int __ATTRS_o_ai vec_extract(vector signed int __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x3];
}
static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x3];
}
static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector bool int __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x3];
}
#ifdef __VSX__
static __inline__ signed long long __ATTRS_o_ai
-vec_extract(vector signed long long __a, unsigned int __b) {
+vec_extract(vector signed long long __a, signed int __b) {
return __a[__b & 0x1];
}
static __inline__ unsigned long long __ATTRS_o_ai
-vec_extract(vector unsigned long long __a, unsigned int __b) {
+vec_extract(vector unsigned long long __a, signed int __b) {
return __a[__b & 0x1];
}
static __inline__ unsigned long long __ATTRS_o_ai
-vec_extract(vector bool long long __a, unsigned int __b) {
+vec_extract(vector bool long long __a, signed int __b) {
return __a[__b & 0x1];
}
static __inline__ double __ATTRS_o_ai vec_extract(vector double __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x1];
}
#endif
static __inline__ float __ATTRS_o_ai vec_extract(vector float __a,
- unsigned int __b) {
+ signed int __b) {
return __a[__b & 0x3];
}
@@ -13568,82 +13582,82 @@ vec_extract_fp32_from_shortl(vector unsigned short __a) {
static __inline__ vector signed char __ATTRS_o_ai
vec_insert(signed char __a, vector signed char __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0xF] = __a;
return __b;
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_insert(unsigned char __a, vector unsigned char __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0xF] = __a;
return __b;
}
static __inline__ vector bool char __ATTRS_o_ai vec_insert(unsigned char __a,
vector bool char __b,
int __c) {
- __b[__c] = __a;
+ __b[__c & 0xF] = __a;
return __b;
}
static __inline__ vector signed short __ATTRS_o_ai
vec_insert(signed short __a, vector signed short __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x7] = __a;
return __b;
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_insert(unsigned short __a, vector unsigned short __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x7] = __a;
return __b;
}
static __inline__ vector bool short __ATTRS_o_ai
vec_insert(unsigned short __a, vector bool short __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x7] = __a;
return __b;
}
static __inline__ vector signed int __ATTRS_o_ai
vec_insert(signed int __a, vector signed int __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x3] = __a;
return __b;
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_insert(unsigned int __a, vector unsigned int __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x3] = __a;
return __b;
}
static __inline__ vector bool int __ATTRS_o_ai vec_insert(unsigned int __a,
vector bool int __b,
int __c) {
- __b[__c] = __a;
+ __b[__c & 0x3] = __a;
return __b;
}
#ifdef __VSX__
static __inline__ vector signed long long __ATTRS_o_ai
vec_insert(signed long long __a, vector signed long long __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x1] = __a;
return __b;
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_insert(unsigned long long __a, vector unsigned long long __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x1] = __a;
return __b;
}
static __inline__ vector bool long long __ATTRS_o_ai
vec_insert(unsigned long long __a, vector bool long long __b, int __c) {
- __b[__c] = __a;
+ __b[__c & 0x1] = __a;
return __b;
}
static __inline__ vector double __ATTRS_o_ai vec_insert(double __a,
vector double __b,
int __c) {
- __b[__c] = __a;
+ __b[__c & 0x1] = __a;
return __b;
}
#endif
@@ -13651,7 +13665,7 @@ static __inline__ vector double __ATTRS_o_ai vec_insert(double __a,
static __inline__ vector float __ATTRS_o_ai vec_insert(float __a,
vector float __b,
int __c) {
- __b[__c] = __a;
+ __b[__c & 0x3] = __a;
return __b;
}
@@ -14812,42 +14826,43 @@ static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a,
#ifdef __VSX__
static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed long long __a,
vector signed long long __b) {
+#ifdef __POWER8_VECTOR__
return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b);
+#else
+ // No vcmpequd on Power7 so we xor the two vectors and compare against zero as
+ // 32-bit elements.
+ return vec_all_eq((vector signed int)vec_xor(__a, __b), (vector signed int)0);
+#endif
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
vector long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
#endif
@@ -14877,6 +14892,11 @@ static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned __int128 __a,
vector unsigned __int128 __b) {
return __builtin_altivec_vcmpequq_p(__CR6_LT, __a, __b);
}
+
+static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool __int128 __a,
+ vector bool __int128 __b) {
+ return __builtin_altivec_vcmpequq_p(__CR6_LT, __a, __b);
+}
#endif
/* vec_all_ge */
@@ -15822,6 +15842,11 @@ static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned __int128 __a,
vector unsigned __int128 __b) {
return __builtin_altivec_vcmpequq_p(__CR6_EQ, __a, __b);
}
+
+static __inline__ int __ATTRS_o_ai vec_all_ne(vector bool __int128 __a,
+ vector bool __int128 __b) {
+ return __builtin_altivec_vcmpequq_p(__CR6_EQ, __a, __b);
+}
#endif
/* vec_all_nge */
@@ -16111,6 +16136,11 @@ static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned __int128 __a,
vector unsigned __int128 __b) {
return __builtin_altivec_vcmpequq_p(__CR6_EQ_REV, __a, __b);
}
+
+static __inline__ int __ATTRS_o_ai vec_any_eq(vector bool __int128 __a,
+ vector bool __int128 __b) {
+ return __builtin_altivec_vcmpequq_p(__CR6_EQ_REV, __a, __b);
+}
#endif
/* vec_any_ge */
@@ -17020,43 +17050,43 @@ static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a,
#ifdef __VSX__
static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
vector signed long long __b) {
+#ifdef __POWER8_VECTOR__
return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b);
+#else
+ // Take advantage of the optimized sequence for vec_all_eq when vcmpequd is
+ // not available.
+ return !vec_all_eq(__a, __b);
+#endif
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a,
- (vector long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a,
- (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
vector signed long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
#endif
@@ -17086,6 +17116,11 @@ static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned __int128 __a,
vector unsigned __int128 __b) {
return __builtin_altivec_vcmpequq_p(__CR6_LT_REV, __a, __b);
}
+
+static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool __int128 __a,
+ vector bool __int128 __b) {
+ return __builtin_altivec_vcmpequq_p(__CR6_LT_REV, __a, __b);
+}
#endif
/* vec_any_nge */
@@ -17203,6 +17238,7 @@ provided.
#define vec_ncipher_be __builtin_altivec_crypto_vncipher
#define vec_ncipherlast_be __builtin_altivec_crypto_vncipherlast
+#ifdef __VSX__
static __inline__ vector unsigned long long __attribute__((__always_inline__))
__builtin_crypto_vsbox(vector unsigned long long __a) {
return __builtin_altivec_crypto_vsbox(__a);
@@ -17231,6 +17267,7 @@ __builtin_crypto_vncipherlast(vector unsigned long long __a,
vector unsigned long long __b) {
return __builtin_altivec_crypto_vncipherlast(__a, __b);
}
+#endif /* __VSX__ */
#define __builtin_crypto_vshasigmad __builtin_altivec_crypto_vshasigmad
#define __builtin_crypto_vshasigmaw __builtin_altivec_crypto_vshasigmaw
@@ -17346,12 +17383,22 @@ vec_vbpermq(vector unsigned char __a, vector unsigned char __b) {
}
#if defined(__powerpc64__) && defined(__SIZEOF_INT128__)
-static __inline__ vector unsigned long long __attribute__((__always_inline__))
+static __inline__ vector unsigned long long __ATTRS_o_ai
vec_bperm(vector unsigned __int128 __a, vector unsigned char __b) {
return __builtin_altivec_vbpermq((vector unsigned char)__a,
(vector unsigned char)__b);
}
#endif
+static __inline__ vector unsigned char __ATTRS_o_ai
+vec_bperm(vector unsigned char __a, vector unsigned char __b) {
+ return __builtin_altivec_vbpermq(__a, __b);
+}
+#endif // __POWER8_VECTOR__
+#ifdef __POWER9_VECTOR__
+static __inline__ vector unsigned long long __ATTRS_o_ai
+vec_bperm(vector unsigned long long __a, vector unsigned char __b) {
+ return __builtin_altivec_vbpermd(__a, __b);
+}
#endif
@@ -18198,13 +18245,13 @@ vec_expandm(vector unsigned __int128 __a) {
#define vec_cntm(__a, __mp) \
_Generic((__a), vector unsigned char \
- : __builtin_altivec_vcntmbb((__a), (unsigned int)(__mp)), \
+ : __builtin_altivec_vcntmbb((__a), (unsigned char)(__mp)), \
vector unsigned short \
- : __builtin_altivec_vcntmbh((__a), (unsigned int)(__mp)), \
+ : __builtin_altivec_vcntmbh((__a), (unsigned char)(__mp)), \
vector unsigned int \
- : __builtin_altivec_vcntmbw((__a), (unsigned int)(__mp)), \
+ : __builtin_altivec_vcntmbw((__a), (unsigned char)(__mp)), \
vector unsigned long long \
- : __builtin_altivec_vcntmbd((__a), (unsigned int)(__mp)))
+ : __builtin_altivec_vcntmbd((__a), (unsigned char)(__mp)))
/* vec_gen[b|h|w|d|q]m */
@@ -18319,10 +18366,10 @@ vec_cfuge(vector unsigned long long __a, vector unsigned long long __b) {
: __builtin_vsx_xxgenpcvdm((__a), (int)(__imm)))
#endif /* __VSX__ */
-/* vec_clrl */
+/* vec_clr_first */
static __inline__ vector signed char __ATTRS_o_ai
-vec_clrl(vector signed char __a, unsigned int __n) {
+vec_clr_first(vector signed char __a, unsigned int __n) {
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vclrrb(__a, __n);
#else
@@ -18331,7 +18378,7 @@ vec_clrl(vector signed char __a, unsigned int __n) {
}
static __inline__ vector unsigned char __ATTRS_o_ai
-vec_clrl(vector unsigned char __a, unsigned int __n) {
+vec_clr_first(vector unsigned char __a, unsigned int __n) {
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vclrrb((vector signed char)__a, __n);
#else
@@ -18339,10 +18386,10 @@ vec_clrl(vector unsigned char __a, unsigned int __n) {
#endif
}
-/* vec_clrr */
+/* vec_clr_last */
static __inline__ vector signed char __ATTRS_o_ai
-vec_clrr(vector signed char __a, unsigned int __n) {
+vec_clr_last(vector signed char __a, unsigned int __n) {
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vclrlb(__a, __n);
#else
@@ -18351,7 +18398,7 @@ vec_clrr(vector signed char __a, unsigned int __n) {
}
static __inline__ vector unsigned char __ATTRS_o_ai
-vec_clrr(vector unsigned char __a, unsigned int __n) {
+vec_clr_last(vector unsigned char __a, unsigned int __n) {
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vclrlb((vector signed char)__a, __n);
#else
@@ -18733,36 +18780,39 @@ static __inline__ vector double __ATTRS_o_ai vec_splatid(const float __a) {
static __inline__ vector signed int __ATTRS_o_ai vec_splati_ins(
vector signed int __a, const unsigned int __b, const signed int __c) {
+ const unsigned int __d = __b & 0x01;
#ifdef __LITTLE_ENDIAN__
- __a[1 - __b] = __c;
- __a[3 - __b] = __c;
+ __a[1 - __d] = __c;
+ __a[3 - __d] = __c;
#else
- __a[__b] = __c;
- __a[2 + __b] = __c;
+ __a[__d] = __c;
+ __a[2 + __d] = __c;
#endif
return __a;
}
static __inline__ vector unsigned int __ATTRS_o_ai vec_splati_ins(
vector unsigned int __a, const unsigned int __b, const unsigned int __c) {
+ const unsigned int __d = __b & 0x01;
#ifdef __LITTLE_ENDIAN__
- __a[1 - __b] = __c;
- __a[3 - __b] = __c;
+ __a[1 - __d] = __c;
+ __a[3 - __d] = __c;
#else
- __a[__b] = __c;
- __a[2 + __b] = __c;
+ __a[__d] = __c;
+ __a[2 + __d] = __c;
#endif
return __a;
}
static __inline__ vector float __ATTRS_o_ai
vec_splati_ins(vector float __a, const unsigned int __b, const float __c) {
+ const unsigned int __d = __b & 0x01;
#ifdef __LITTLE_ENDIAN__
- __a[1 - __b] = __c;
- __a[3 - __b] = __c;
+ __a[1 - __d] = __c;
+ __a[3 - __d] = __c;
#else
- __a[__b] = __c;
- __a[2 + __b] = __c;
+ __a[__d] = __c;
+ __a[2 + __d] = __c;
#endif
return __a;
}
diff --git a/clang/lib/Headers/ammintrin.h b/clang/lib/Headers/ammintrin.h
index 3806be6ebc43..1af2096595ca 100644
--- a/clang/lib/Headers/ammintrin.h
+++ b/clang/lib/Headers/ammintrin.h
@@ -10,6 +10,10 @@
#ifndef __AMMINTRIN_H
#define __AMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
diff --git a/clang/lib/Headers/amxintrin.h b/clang/lib/Headers/amxintrin.h
index ec601a58e7c3..4940666e8083 100644
--- a/clang/lib/Headers/amxintrin.h
+++ b/clang/lib/Headers/amxintrin.h
@@ -314,8 +314,8 @@ typedef struct __tile1024i_str {
/// \param stride
/// The stride between the rows' data to be loaded in memory.
__DEFAULT_FN_ATTRS_TILE
-static void __tile_loadd(__tile1024i *dst, const void *base,
- __SIZE_TYPE__ stride) {
+static __inline__ void __tile_loadd(__tile1024i *dst, const void *base,
+ __SIZE_TYPE__ stride) {
dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride);
}
@@ -335,8 +335,8 @@ static void __tile_loadd(__tile1024i *dst, const void *base,
/// \param stride
/// The stride between the rows' data to be loaded in memory.
__DEFAULT_FN_ATTRS_TILE
-static void __tile_stream_loadd(__tile1024i *dst, const void *base,
- __SIZE_TYPE__ stride) {
+static __inline__ void __tile_stream_loadd(__tile1024i *dst, const void *base,
+ __SIZE_TYPE__ stride) {
dst->tile = _tile_loaddt1_internal(dst->row, dst->col, base, stride);
}
@@ -357,8 +357,8 @@ static void __tile_stream_loadd(__tile1024i *dst, const void *base,
/// \param src1
/// The 2nd source tile. Max size is 1024 Bytes.
__DEFAULT_FN_ATTRS_INT8
-static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
+static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0,
+ __tile1024i src1) {
dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile,
src0.tile, src1.tile);
}
@@ -380,8 +380,8 @@ static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0,
/// \param src1
/// The 2nd source tile. Max size is 1024 Bytes.
__DEFAULT_FN_ATTRS_INT8
-static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
+static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0,
+ __tile1024i src1) {
dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile,
src0.tile, src1.tile);
}
@@ -403,8 +403,8 @@ static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0,
/// \param src1
/// The 2nd source tile. Max size is 1024 Bytes.
__DEFAULT_FN_ATTRS_INT8
-static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
+static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0,
+ __tile1024i src1) {
dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile,
src0.tile, src1.tile);
}
@@ -426,8 +426,8 @@ static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0,
/// \param src1
/// The 2nd source tile. Max size is 1024 Bytes.
__DEFAULT_FN_ATTRS_INT8
-static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
+static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0,
+ __tile1024i src1) {
dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile,
src0.tile, src1.tile);
}
@@ -446,7 +446,8 @@ static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0,
/// \param stride
/// The stride between the rows' data to be stored in memory.
__DEFAULT_FN_ATTRS_TILE
-static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) {
+static __inline__ void __tile_stored(void *base, __SIZE_TYPE__ stride,
+ __tile1024i src) {
_tile_stored_internal(src.row, src.col, base, stride, src.tile);
}
@@ -459,7 +460,7 @@ static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) {
/// \param dst
/// The destination tile to be zero. Max size is 1024 Bytes.
__DEFAULT_FN_ATTRS_TILE
-static void __tile_zero(__tile1024i *dst) {
+static __inline__ void __tile_zero(__tile1024i *dst) {
dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col);
}
@@ -479,8 +480,8 @@ static void __tile_zero(__tile1024i *dst) {
/// \param src1
/// The 2nd source tile. Max size is 1024 Bytes.
__DEFAULT_FN_ATTRS_BF16
-static void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0,
- __tile1024i src1) {
+static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0,
+ __tile1024i src1) {
dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile,
src0.tile, src1.tile);
}
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index cc16720949ea..5064c87c2bb1 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -20,8 +20,8 @@
/* SSE4 Multiple Packed Sums of Absolute Difference. */
#define _mm256_mpsadbw_epu8(X, Y, M) \
- (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
- (__v32qi)(__m256i)(Y), (int)(M))
+ ((__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_abs_epi8(__m256i __a)
@@ -114,8 +114,8 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
}
#define _mm256_alignr_epi8(a, b, n) \
- (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
- (__v32qi)(__m256i)(b), (n))
+ ((__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (n)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_and_si256(__m256i __a, __m256i __b)
@@ -149,8 +149,8 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
}
#define _mm256_blend_epi16(V1, V2, M) \
- (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
- (__v16hi)(__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
+ (__v16hi)(__m256i)(V2), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
@@ -467,13 +467,13 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
}
#define _mm256_shuffle_epi32(a, imm) \
- (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm)))
#define _mm256_shufflehi_epi16(a, imm) \
- (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm)))
#define _mm256_shufflelo_epi16(a, imm) \
- (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sign_epi8(__m256i __a, __m256i __b)
@@ -494,10 +494,10 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
}
#define _mm256_slli_si256(a, imm) \
- (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
#define _mm256_bslli_epi128(a, imm) \
- (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_slli_epi16(__m256i __a, int __count)
@@ -560,10 +560,10 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
}
#define _mm256_srli_si256(a, imm) \
- (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
#define _mm256_bsrli_epi128(a, imm) \
- (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
+ ((__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_srli_epi16(__m256i __a, int __count)
@@ -743,12 +743,12 @@ _mm256_broadcastsi128_si256(__m128i __X)
#define _mm_broadcastsi128_si256(X) _mm256_broadcastsi128_si256(X)
#define _mm_blend_epi32(V1, V2, M) \
- (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
- (__v4si)(__m128i)(V2), (int)(M))
+ ((__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
+ (__v4si)(__m128i)(V2), (int)(M)))
#define _mm256_blend_epi32(V1, V2, M) \
- (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
- (__v8si)(__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_broadcastb_epi8(__m128i __X)
@@ -806,7 +806,7 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
}
#define _mm256_permute4x64_pd(V, M) \
- (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))
+ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)))
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
@@ -815,17 +815,17 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
}
#define _mm256_permute4x64_epi64(V, M) \
- (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M))
+ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(V), (int)(M)))
#define _mm256_permute2x128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (int)(M)))
#define _mm256_extracti128_si256(V, M) \
- (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M))
+ ((__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M)))
#define _mm256_inserti128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
- (__v2di)(__m128i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
+ (__v2di)(__m128i)(V2), (int)(M)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskload_epi32(int const *__X, __m256i __M)
@@ -936,211 +936,211 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
}
#define _mm_mask_i32gather_pd(a, m, i, mask, s) \
- (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2df)(__m128d)(mask), (s))
+ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)))
#define _mm256_mask_i32gather_pd(a, m, i, mask, s) \
- (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4df)(__m256d)(mask), (s))
+ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)(__m256d)(mask), (s)))
#define _mm_mask_i64gather_pd(a, m, i, mask, s) \
- (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
- (double const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2df)(__m128d)(mask), (s))
+ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
+ (double const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2df)(__m128d)(mask), (s)))
#define _mm256_mask_i64gather_pd(a, m, i, mask, s) \
- (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
- (double const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4df)(__m256d)(mask), (s))
+ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
+ (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)(__m256d)(mask), (s)))
#define _mm_mask_i32gather_ps(a, m, i, mask, s) \
- (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4sf)(__m128)(mask), (s))
+ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
#define _mm256_mask_i32gather_ps(a, m, i, mask, s) \
- (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
- (float const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8sf)(__m256)(mask), (s))
+ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
+ (float const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8sf)(__m256)(mask), (s)))
#define _mm_mask_i64gather_ps(a, m, i, mask, s) \
- (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4sf)(__m128)(mask), (s))
+ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
#define _mm256_mask_i64gather_ps(a, m, i, mask, s) \
- (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
- (float const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4sf)(__m128)(mask), (s))
+ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
+ (float const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4sf)(__m128)(mask), (s)))
#define _mm_mask_i32gather_epi32(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4si)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) \
- (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
- (int const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8si)(__m256i)(mask), (s))
+ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
+ (int const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8si)(__m256i)(mask), (s)))
#define _mm_mask_i64gather_epi32(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4si)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
- (int const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4si)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
+ (int const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4si)(__m128i)(mask), (s)))
#define _mm_mask_i32gather_epi64(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2di)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)))
#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) \
- (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4di)(__m256i)(mask), (s))
+ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4di)(__m256i)(mask), (s)))
#define _mm_mask_i64gather_epi64(a, m, i, mask, s) \
- (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
- (long long const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2di)(__m128i)(mask), (s))
+ ((__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
+ (long long const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2di)(__m128i)(mask), (s)))
#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) \
- (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
- (long long const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4di)(__m256i)(mask), (s))
+ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
+ (long long const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4di)(__m256i)(mask), (s)))
#define _mm_i32gather_pd(m, i, s) \
- (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
- _mm_setzero_pd()), \
- (s))
+ ((__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+ _mm_setzero_pd()), \
+ (s)))
#define _mm256_i32gather_pd(m, i, s) \
- (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
- (double const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
- _mm256_setzero_pd(), \
- _CMP_EQ_OQ), \
- (s))
+ ((__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
+ (double const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+ _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)))
#define _mm_i64gather_pd(m, i, s) \
- (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
- (double const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
- _mm_setzero_pd()), \
- (s))
+ ((__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
+ (double const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+ _mm_setzero_pd()), \
+ (s)))
#define _mm256_i64gather_pd(m, i, s) \
- (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
- (double const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
- _mm256_setzero_pd(), \
- _CMP_EQ_OQ), \
- (s))
+ ((__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
+ (double const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+ _mm256_setzero_pd(), \
+ _CMP_EQ_OQ), \
+ (s)))
#define _mm_i32gather_ps(m, i, s) \
- (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s))
+ ((__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)))
#define _mm256_i32gather_ps(m, i, s) \
- (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
- (float const *)(m), \
- (__v8si)(__m256i)(i), \
- (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
- _mm256_setzero_ps(), \
- _CMP_EQ_OQ), \
- (s))
+ ((__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
+ (float const *)(m), \
+ (__v8si)(__m256i)(i), \
+ (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
+ _mm256_setzero_ps(), \
+ _CMP_EQ_OQ), \
+ (s)))
#define _mm_i64gather_ps(m, i, s) \
- (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s))
+ ((__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)))
#define _mm256_i64gather_ps(m, i, s) \
- (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
- (float const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
- _mm_setzero_ps()), \
- (s))
+ ((__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
+ (float const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+ _mm_setzero_ps()), \
+ (s)))
#define _mm_i32gather_epi32(m, i, s) \
- (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v4si)(__m128i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s))
+ ((__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4si)(__m128i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
#define _mm256_i32gather_epi32(m, i, s) \
- (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
- (int const *)(m), (__v8si)(__m256i)(i), \
- (__v8si)_mm256_set1_epi32(-1), (s))
+ ((__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
+ (int const *)(m), (__v8si)(__m256i)(i), \
+ (__v8si)_mm256_set1_epi32(-1), (s)))
#define _mm_i64gather_epi32(m, i, s) \
- (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v2di)(__m128i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s))
+ ((__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v2di)(__m128i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
#define _mm256_i64gather_epi32(m, i, s) \
- (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
- (int const *)(m), (__v4di)(__m256i)(i), \
- (__v4si)_mm_set1_epi32(-1), (s))
+ ((__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
+ (int const *)(m), (__v4di)(__m256i)(i), \
+ (__v4si)_mm_set1_epi32(-1), (s)))
#define _mm_i32gather_epi64(m, i, s) \
- (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v2di)_mm_set1_epi64x(-1), (s))
+ ((__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v2di)_mm_set1_epi64x(-1), (s)))
#define _mm256_i32gather_epi64(m, i, s) \
- (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
- (long long const *)(m), \
- (__v4si)(__m128i)(i), \
- (__v4di)_mm256_set1_epi64x(-1), (s))
+ ((__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
+ (long long const *)(m), \
+ (__v4si)(__m128i)(i), \
+ (__v4di)_mm256_set1_epi64x(-1), (s)))
#define _mm_i64gather_epi64(m, i, s) \
- (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
- (long long const *)(m), \
- (__v2di)(__m128i)(i), \
- (__v2di)_mm_set1_epi64x(-1), (s))
+ ((__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
+ (long long const *)(m), \
+ (__v2di)(__m128i)(i), \
+ (__v2di)_mm_set1_epi64x(-1), (s)))
#define _mm256_i64gather_epi64(m, i, s) \
- (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
- (long long const *)(m), \
- (__v4di)(__m256i)(i), \
- (__v4di)_mm256_set1_epi64x(-1), (s))
+ ((__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
+ (long long const *)(m), \
+ (__v4di)(__m256i)(i), \
+ (__v4di)_mm256_set1_epi64x(-1), (s)))
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128
diff --git a/clang/lib/Headers/avx512bf16intrin.h b/clang/lib/Headers/avx512bf16intrin.h
index d1d87e72f147..09653738d40a 100644
--- a/clang/lib/Headers/avx512bf16intrin.h
+++ b/clang/lib/Headers/avx512bf16intrin.h
@@ -232,7 +232,7 @@ _mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) {
///
/// \param __A
/// A 256-bit vector of [16 x bfloat].
-/// \returns A 512-bit vector of [16 x float] come from convertion of __A
+/// \returns A 512-bit vector of [16 x float] come from conversion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
(__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
@@ -247,7 +247,7 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
/// bit is not set.
/// \param __A
/// A 256-bit vector of [16 x bfloat].
-/// \returns A 512-bit vector of [16 x float] come from convertion of __A
+/// \returns A 512-bit vector of [16 x float] come from conversion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
@@ -265,7 +265,7 @@ _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
/// A 16-bit mask.
/// \param __A
/// A 256-bit vector of [16 x bfloat].
-/// \returns A 512-bit vector of [16 x float] come from convertion of __A
+/// \returns A 512-bit vector of [16 x float] come from conversion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32(
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4281a33d375c..6aee8aed8487 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -178,16 +178,16 @@ _kadd_mask64(__mmask64 __A, __mmask64 __B)
}
#define _kshiftli_mask32(A, I) \
- (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))
+ ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)))
#define _kshiftri_mask32(A, I) \
- (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))
+ ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)))
#define _kshiftli_mask64(A, I) \
- (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))
+ ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)))
#define _kshiftri_mask64(A, I) \
- (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
+ ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_cvtmask32_u32(__mmask32 __A) {
@@ -232,44 +232,44 @@ _store_mask64(__mmask64 *__A, __mmask64 __B) {
/* Integer compare */
#define _mm512_cmp_epi8_mask(a, b, p) \
- (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
- (__v64qi)(__m512i)(b), (int)(p), \
- (__mmask64)-1)
+ ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), (int)(p), \
+ (__mmask64)-1))
#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
- (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
- (__v64qi)(__m512i)(b), (int)(p), \
- (__mmask64)(m))
+ ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), (int)(p), \
+ (__mmask64)(m)))
#define _mm512_cmp_epu8_mask(a, b, p) \
- (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
- (__v64qi)(__m512i)(b), (int)(p), \
- (__mmask64)-1)
+ ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), (int)(p), \
+ (__mmask64)-1))
#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
- (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
- (__v64qi)(__m512i)(b), (int)(p), \
- (__mmask64)(m))
+ ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+ (__v64qi)(__m512i)(b), (int)(p), \
+ (__mmask64)(m)))
#define _mm512_cmp_epi16_mask(a, b, p) \
- (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
- (__v32hi)(__m512i)(b), (int)(p), \
- (__mmask32)-1)
+ ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), (int)(p), \
+ (__mmask32)-1))
#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
- (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
- (__v32hi)(__m512i)(b), (int)(p), \
- (__mmask32)(m))
+ ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), (int)(p), \
+ (__mmask32)(m)))
#define _mm512_cmp_epu16_mask(a, b, p) \
- (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
- (__v32hi)(__m512i)(b), (int)(p), \
- (__mmask32)-1)
+ ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), (int)(p), \
+ (__mmask32)-1))
#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
- (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
- (__v32hi)(__m512i)(b), (int)(p), \
- (__mmask32)(m))
+ ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+ (__v32hi)(__m512i)(b), (int)(p), \
+ (__mmask32)(m)))
#define _mm512_cmpeq_epi8_mask(A, B) \
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
@@ -1428,36 +1428,36 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
#define _mm512_shufflehi_epi16(A, imm) \
- (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
+ ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)))
#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shufflehi_epi16((A), \
- (imm)), \
- (__v32hi)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shufflehi_epi16((A), \
+ (imm)), \
+ (__v32hi)(__m512i)(W)))
#define _mm512_maskz_shufflehi_epi16(U, A, imm) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shufflehi_epi16((A), \
- (imm)), \
- (__v32hi)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shufflehi_epi16((A), \
+ (imm)), \
+ (__v32hi)_mm512_setzero_si512()))
#define _mm512_shufflelo_epi16(A, imm) \
- (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
+ ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)))
#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shufflelo_epi16((A), \
- (imm)), \
- (__v32hi)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shufflelo_epi16((A), \
+ (imm)), \
+ (__v32hi)(__m512i)(W)))
#define _mm512_maskz_shufflelo_epi16(U, A, imm) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shufflelo_epi16((A), \
- (imm)), \
- (__v32hi)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shufflelo_epi16((A), \
+ (imm)), \
+ (__v32hi)_mm512_setzero_si512()))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_sllv_epi16(__m512i __A, __m512i __B)
@@ -1527,7 +1527,7 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
}
#define _mm512_bslli_epi128(a, imm) \
- (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
+ ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_srlv_epi16(__m512i __A, __m512i __B)
@@ -1664,7 +1664,7 @@ _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
}
#define _mm512_bsrli_epi128(a, imm) \
- (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
+ ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
@@ -1984,32 +1984,32 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
}
#define _mm512_alignr_epi8(A, B, N) \
- (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
- (__v64qi)(__m512i)(B), (int)(N))
+ ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
+ (__v64qi)(__m512i)(B), (int)(N)))
#define _mm512_mask_alignr_epi8(W, U, A, B, N) \
- (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
- (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
- (__v64qi)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
+ (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
+ (__v64qi)(__m512i)(W)))
#define _mm512_maskz_alignr_epi8(U, A, B, N) \
- (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
+ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
(__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
- (__v64qi)(__m512i)_mm512_setzero_si512())
+ (__v64qi)(__m512i)_mm512_setzero_si512()))
#define _mm512_dbsad_epu8(A, B, imm) \
- (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
- (__v64qi)(__m512i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
+ (__v64qi)(__m512i)(B), (int)(imm)))
#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
(__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
- (__v32hi)(__m512i)(W))
+ (__v32hi)(__m512i)(W)))
#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
(__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
- (__v32hi)_mm512_setzero_si512())
+ (__v32hi)_mm512_setzero_si512()))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_sad_epu8 (__m512i __A, __m512i __B)
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 337256c50f50..3ba0a0cfd5fd 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -121,10 +121,10 @@ _kadd_mask16(__mmask16 __A, __mmask16 __B)
}
#define _kshiftli_mask8(A, I) \
- (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
+ ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)))
#define _kshiftri_mask8(A, I) \
- (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
+ ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_cvtmask8_u32(__mmask8 __A) {
@@ -342,19 +342,19 @@ _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
}
#define _mm512_cvt_roundpd_epi64(A, R) \
- (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
- (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epu64 (__m512d __A) {
@@ -381,19 +381,19 @@ _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
}
#define _mm512_cvt_roundpd_epu64(A, R) \
- (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
- (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epi64 (__m256 __A) {
@@ -420,19 +420,19 @@ _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
}
#define _mm512_cvt_roundps_epi64(A, R) \
- (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
- (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epu64 (__m256 __A) {
@@ -459,19 +459,19 @@ _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
}
#define _mm512_cvt_roundps_epu64(A, R) \
- (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
- (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -494,19 +494,19 @@ _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
}
#define _mm512_cvt_roundepi64_pd(A, R) \
- (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
- (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
- (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtepi64_ps (__m512i __A) {
@@ -533,19 +533,19 @@ _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
}
#define _mm512_cvt_roundepi64_ps(A, R) \
- (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
- (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
- (__v8sf)(__m256)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
+ (__v8sf)(__m256)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
- (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -573,19 +573,19 @@ _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
}
#define _mm512_cvtt_roundpd_epi64(A, R) \
- (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
- (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epu64 (__m512d __A) {
@@ -612,19 +612,19 @@ _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
}
#define _mm512_cvtt_roundpd_epu64(A, R) \
- (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
- (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epi64 (__m256 __A) {
@@ -651,19 +651,19 @@ _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
}
#define _mm512_cvtt_roundps_epi64(A, R) \
- (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
- (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epu64 (__m256 __A) {
@@ -690,19 +690,19 @@ _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
}
#define _mm512_cvtt_roundps_epu64(A, R) \
- (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
- (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)(__m512i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
- (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
+ (__v8di)_mm512_setzero_si512(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_cvtepu64_pd (__m512i __A) {
@@ -724,20 +724,20 @@ _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
}
#define _mm512_cvt_roundepu64_pd(A, R) \
- (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
- (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
- (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m256 __DEFAULT_FN_ATTRS512
@@ -765,290 +765,290 @@ _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
}
#define _mm512_cvt_roundepu64_ps(A, R) \
- (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
- (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
- (__v8sf)(__m256)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
+ (__v8sf)(__m256)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
- (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_range_pd(A, B, C) \
- (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_range_pd(W, U, A, B, C) \
- (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(C), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_range_pd(U, A, B, C) \
- (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_range_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
- (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(C), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(C), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_range_ps(A, B, C) \
- (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_range_ps(W, U, A, B, C) \
- (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_range_ps(U, A, B, C) \
- (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_range_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
- (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), \
+ (int)(R)))
#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
#define _mm_range_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8) -1, (int)(C),\
- (int)(R))
+ ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8) -1, (int)(C),\
+ (int)(R)))
#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
#define _mm_mask_range_round_ss(W, U, A, B, C, R) \
- (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W),\
- (__mmask8)(U), (int)(C),\
- (int)(R))
+ ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W),\
+ (__mmask8)(U), (int)(C),\
+ (int)(R)))
#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_range_round_ss(U, A, B, C, R) \
- (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(C),\
- (int)(R))
+ ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(C),\
+ (int)(R)))
#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
#define _mm_range_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8) -1, (int)(C),\
- (int)(R))
+ ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8) -1, (int)(C),\
+ (int)(R)))
#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
#define _mm_mask_range_round_sd(W, U, A, B, C, R) \
- (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W),\
- (__mmask8)(U), (int)(C),\
- (int)(R))
+ ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W),\
+ (__mmask8)(U), (int)(C),\
+ (int)(R)))
#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_range_round_sd(U, A, B, C, R) \
- (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(C),\
- (int)(R))
+ ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(C),\
+ (int)(R)))
#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
#define _mm512_reduce_pd(A, B) \
- (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_reduce_pd(W, U, A, B) \
- (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_reduce_pd(U, A, B) \
- (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_reduce_ps(A, B) \
- (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_reduce_ps(W, U, A, B) \
- (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_reduce_ps(U, A, B) \
- (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_reduce_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_reduce_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_reduce_round_ps(A, B, R) \
- (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_reduce_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
#define _mm_reduce_ss(A, B, C) \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
- (int)(C), _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+ (int)(C), _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_reduce_ss(W, U, A, B, C) \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(C), _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(C), _MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_reduce_ss(U, A, B, C) \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(C), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_reduce_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
- (int)(C), (int)(R))
+ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
+ (int)(C), (int)(R)))
#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(C), (int)(R))
+ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(C), (int)(R)))
#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
- (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(C), (int)(R))
+ ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(C), (int)(R)))
#define _mm_reduce_sd(A, B, C) \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(C), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_reduce_sd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), (__mmask8)(U), \
- (int)(C), _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), (__mmask8)(U), \
+ (int)(C), _MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_reduce_sd(U, A, B, C) \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(C), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_reduce_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(C), (int)(R))
+ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(C), (int)(R)))
#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), (__mmask8)(U), \
- (int)(C), (int)(R))
+ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), (__mmask8)(U), \
+ (int)(C), (int)(R)))
#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
- (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(C), (int)(R))
+ ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(C), (int)(R)))
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
_mm512_movepi32_mask (__m512i __A)
@@ -1218,158 +1218,158 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
}
#define _mm512_extractf32x8_ps(A, imm) \
- (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_undefined_ps(), \
- (__mmask8)-1)
+ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)_mm256_undefined_ps(), \
+ (__mmask8)-1))
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
- (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extractf32x8_ps(U, A, imm) \
- (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm512_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)_mm_undefined_pd(), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)-1))
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+ (int)(imm), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extractf64x2_pd(U, A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm512_extracti32x8_epi32(A, imm) \
- (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_undefined_si256(), \
- (__mmask8)-1)
+ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v8si)_mm256_undefined_si256(), \
+ (__mmask8)-1))
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
- (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U))
+ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
- (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U))
+ ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
#define _mm512_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
(int)(imm), \
(__v2di)_mm_undefined_si128(), \
- (__mmask8)-1)
+ (__mmask8)-1))
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)(__m128i)(W), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+ (int)(imm), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_si128(), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
+ (int)(imm), \
+ (__v2di)_mm_setzero_si128(), \
+ (__mmask8)(U)))
#define _mm512_insertf32x8(A, B, imm) \
- (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
- (__v8sf)(__m256)(B), (int)(imm))
+ ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
+ (__v8sf)(__m256)(B), (int)(imm)))
#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
- (__v16sf)(__m512)(W))
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_insertf32x8(U, A, B, imm) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
#define _mm512_insertf64x2(A, B, imm) \
- (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
- (__v2df)(__m128d)(B), (int)(imm))
+ ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
+ (__v2df)(__m128d)(B), (int)(imm)))
#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_insertf64x2((A), (B), (imm)), \
- (__v8df)(__m512d)(W))
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_insertf64x2(U, A, B, imm) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_insertf64x2((A), (B), (imm)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_inserti32x8(A, B, imm) \
- (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
- (__v8si)(__m256i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm)))
#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_inserti32x8((A), (B), (imm)), \
- (__v16si)(__m512i)(W))
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_inserti32x8(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
(__v16si)_mm512_inserti32x8((A), (B), (imm)), \
- (__v16si)_mm512_setzero_si512())
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_inserti64x2(A, B, imm) \
- (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
- (__v2di)(__m128i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm)))
#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_inserti64x2((A), (B), (imm)), \
- (__v8di)(__m512i)(W))
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_inserti64x2(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
(__v8di)_mm512_inserti64x2((A), (B), (imm)), \
- (__v8di)_mm512_setzero_si512())
+ (__v8di)_mm512_setzero_si512()))
#define _mm512_mask_fpclass_ps_mask(U, A, imm) \
- (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
- (int)(imm), (__mmask16)(U))
+ ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
+ (int)(imm), (__mmask16)(U)))
#define _mm512_fpclass_ps_mask(A, imm) \
- (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
- (int)(imm), (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
+ (int)(imm), (__mmask16)-1))
#define _mm512_mask_fpclass_pd_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__mmask8)(U)))
#define _mm512_fpclass_pd_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__mmask8)-1))
#define _mm_fpclass_sd_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_fpclass_sd_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
+ (__mmask8)(U)))
#define _mm_fpclass_ss_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_fpclass_ss_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__mmask8)(U)))
#undef __DEFAULT_FN_ATTRS512
#undef __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
index 857006169906..1c5a2d2d208f 100644
--- a/clang/lib/Headers/avx512erintrin.h
+++ b/clang/lib/Headers/avx512erintrin.h
@@ -15,19 +15,19 @@
/* exp2a23 */
#define _mm512_exp2a23_round_pd(A, R) \
- (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
- (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R))
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
- (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R))
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
#define _mm512_exp2a23_pd(A) \
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -39,19 +39,19 @@
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_exp2a23_round_ps(A, R) \
- (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
- (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R))
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
- (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R))
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
#define _mm512_exp2a23_ps(A) \
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -64,19 +64,19 @@
/* rsqrt28 */
#define _mm512_rsqrt28_round_pd(A, R) \
- (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
- (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R))
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
- (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R))
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
#define _mm512_rsqrt28_pd(A) \
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -88,19 +88,19 @@
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_rsqrt28_round_ps(A, R) \
- (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
- (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R))
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
- (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R))
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
#define _mm512_rsqrt28_ps(A) \
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -112,22 +112,22 @@
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_ss(A, B, R) \
- (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
- (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(S), \
- (__mmask8)(M), (int)(R))
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (int)(R)))
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
- (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(M), (int)(R))
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (int)(R)))
#define _mm_rsqrt28_ss(A, B) \
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -139,22 +139,22 @@
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
- (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(S), \
- (__mmask8)(M), (int)(R))
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (int)(R)))
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
- (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(M), (int)(R))
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
#define _mm_rsqrt28_sd(A, B) \
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -167,19 +167,19 @@
/* rcp28 */
#define _mm512_rcp28_round_pd(A, R) \
- (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
- (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R))
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
#define _mm512_maskz_rcp28_round_pd(M, A, R) \
- (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R))
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
#define _mm512_rcp28_pd(A) \
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
@@ -191,19 +191,19 @@
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm512_rcp28_round_ps(A, R) \
- (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
- (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R))
+ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
#define _mm512_maskz_rcp28_round_ps(M, A, R) \
- (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R))
+ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
#define _mm512_rcp28_ps(A) \
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
@@ -215,22 +215,22 @@
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_ss(A, B, R) \
- (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
- (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(S), \
- (__mmask8)(M), (int)(R))
+ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (int)(R)))
#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
- (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(M), (int)(R))
+ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (int)(R)))
#define _mm_rcp28_ss(A, B) \
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
@@ -242,22 +242,22 @@
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
- (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(S), \
- (__mmask8)(M), (int)(R))
+ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (int)(R)))
#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
- (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(M), (int)(R))
+ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
#define _mm_rcp28_sd(A, B) \
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 010bcadab019..df298640523b 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -937,18 +937,18 @@ _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
}
#define _mm512_max_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(R))
+ ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R)))
#define _mm512_mask_max_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_max_round_pd((A), (B), (R)), \
- (__v8df)(W))
+ (__v8df)(W)))
#define _mm512_maskz_max_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_max_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_max_pd(__m512d __A, __m512d __B)
@@ -974,18 +974,18 @@ _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
}
#define _mm512_max_round_ps(A, B, R) \
- (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(R))
+ ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R)))
#define _mm512_mask_max_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_max_round_ps((A), (B), (R)), \
- (__v16sf)(W))
+ (__v16sf)(W)))
#define _mm512_maskz_max_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_max_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_max_ps(__m512 __A, __m512 __B)
@@ -1029,22 +1029,22 @@ _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
}
#define _mm_max_round_ss(A, B, R) \
- (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_max_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm_maskz_max_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
@@ -1065,22 +1065,22 @@ _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
}
#define _mm_max_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_max_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_max_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline __m512i
__DEFAULT_FN_ATTRS512
@@ -1172,18 +1172,18 @@ _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
}
#define _mm512_min_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(R))
+ ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R)))
#define _mm512_mask_min_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_min_round_pd((A), (B), (R)), \
- (__v8df)(W))
+ (__v8df)(W)))
#define _mm512_maskz_min_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_min_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_min_pd(__m512d __A, __m512d __B)
@@ -1209,18 +1209,18 @@ _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
}
#define _mm512_min_round_ps(A, B, R) \
- (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(R))
+ ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R)))
#define _mm512_mask_min_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_min_round_ps((A), (B), (R)), \
- (__v16sf)(W))
+ (__v16sf)(W)))
#define _mm512_maskz_min_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_min_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_min_ps(__m512 __A, __m512 __B)
@@ -1264,22 +1264,22 @@ _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
}
#define _mm_min_round_ss(A, B, R) \
- (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_min_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm_maskz_min_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
@@ -1300,22 +1300,22 @@ _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
}
#define _mm_min_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_min_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_min_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline __m512i
__DEFAULT_FN_ATTRS512
@@ -1485,17 +1485,17 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
}
#define _mm512_sqrt_round_pd(A, R) \
- (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
+ ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sqrt_round_pd((A), (R)), \
- (__v8df)(__m512d)(W))
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_sqrt_round_pd(U, A, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sqrt_round_pd((A), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_sqrt_pd(__m512d __A)
@@ -1521,17 +1521,17 @@ _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
}
#define _mm512_sqrt_round_ps(A, R) \
- (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
+ ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sqrt_round_ps((A), (R)), \
- (__v16sf)(__m512)(W))
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_sqrt_round_ps(U, A, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sqrt_round_ps((A), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_sqrt_ps(__m512 __A)
@@ -1900,22 +1900,22 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
}
#define _mm_add_round_ss(A, B, R) \
- (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_add_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm_maskz_add_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
@@ -1929,22 +1929,22 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
}
#define _mm_add_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_add_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_add_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -1975,32 +1975,32 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_add_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(R))
+ ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R)))
#define _mm512_mask_add_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_add_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W))
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_add_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_add_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_add_round_ps(A, B, R) \
- (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(R))
+ ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R)))
#define _mm512_mask_add_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W))
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_add_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2014,22 +2014,22 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
#define _mm_sub_round_ss(A, B, R) \
- (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_sub_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm_maskz_sub_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
@@ -2044,22 +2044,22 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
}
#define _mm_sub_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_sub_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_sub_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -2090,32 +2090,32 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_sub_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(R))
+ ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R)))
#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sub_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W))
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_sub_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_sub_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_sub_round_ps(A, B, R) \
- (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(R))
+ ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R)))
#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W))
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_sub_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2129,22 +2129,22 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
}
#define _mm_mul_round_ss(A, B, R) \
- (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_mul_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm_maskz_mul_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
@@ -2159,22 +2159,22 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
}
#define _mm_mul_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_mul_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_mul_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
@@ -2205,32 +2205,32 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_mul_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(R))
+ ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R)))
#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_mul_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W))
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_mul_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_mul_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_mul_round_ps(A, B, R) \
- (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(R))
+ ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R)))
#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W))
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_mul_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
@@ -2245,22 +2245,22 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
}
#define _mm_div_round_ss(A, B, R) \
- (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_div_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm_maskz_div_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
@@ -2275,22 +2275,22 @@ _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
}
#define _mm_div_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_div_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_div_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline __m512d __DEFAULT_FN_ATTRS512
_mm512_div_pd(__m512d __a, __m512d __b)
@@ -2333,179 +2333,179 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
}
#define _mm512_div_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(R))
+ ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(R)))
#define _mm512_mask_div_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_div_round_pd((A), (B), (R)), \
- (__v8df)(__m512d)(W))
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_div_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
(__v8df)_mm512_div_round_pd((A), (B), (R)), \
- (__v8df)_mm512_setzero_pd())
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_div_round_ps(A, B, R) \
- (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(R))
+ ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(R)))
#define _mm512_mask_div_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_div_round_ps((A), (B), (R)), \
- (__v16sf)(__m512)(W))
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_div_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
(__v16sf)_mm512_div_round_ps((A), (B), (R)), \
- (__v16sf)_mm512_setzero_ps())
+ (__v16sf)_mm512_setzero_ps()))
#define _mm512_roundscale_ps(A, B) \
- (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_ps(A, B, C, imm) \
- (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
+ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
(__v16sf)(__m512)(A), (__mmask16)(B), \
- _MM_FROUND_CUR_DIRECTION)
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_roundscale_ps(A, B, imm) \
- (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(A), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(A), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
- (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
+ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
(__v16sf)(__m512)(A), (__mmask16)(B), \
- (int)(R))
+ (int)(R)))
#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
- (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(A), (int)(R))
+ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(A), (int)(R)))
#define _mm512_roundscale_round_ps(A, imm, R) \
- (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_roundscale_pd(A, B) \
- (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_pd(A, B, C, imm) \
- (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
+ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
(__v8df)(__m512d)(A), (__mmask8)(B), \
- _MM_FROUND_CUR_DIRECTION)
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_roundscale_pd(A, B, imm) \
- (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(A), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(A), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
- (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
+ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
(__v8df)(__m512d)(A), (__mmask8)(B), \
- (int)(R))
+ (int)(R)))
#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
- (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(A), (int)(R))
+ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(A), (int)(R)))
#define _mm512_roundscale_round_pd(A, imm, R) \
- (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_fmadd_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_fmsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_fnmadd_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_fnmsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -2629,87 +2629,87 @@ _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
}
#define _mm512_fmadd_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_fmsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_fnmadd_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- -(__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_fnmsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- -(__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
@@ -2833,52 +2833,52 @@ _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
}
#define _mm512_fmaddsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_fmsubadd_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -2952,52 +2952,52 @@ _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
}
#define _mm512_fmaddsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_fmsubadd_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
@@ -3071,10 +3071,10 @@ _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
}
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -3088,10 +3088,10 @@ _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
}
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
@@ -3104,10 +3104,10 @@ _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
}
#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -3121,10 +3121,10 @@ _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
}
#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
@@ -3138,10 +3138,10 @@ _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
}
#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- -(__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -3155,10 +3155,10 @@ _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
}
#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- -(__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
@@ -3172,17 +3172,17 @@ _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
}
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
- -(__v8df)(__m512d)(B), \
- -(__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
+ -(__v8df)(__m512d)(B), \
+ -(__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
- (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
@@ -3206,17 +3206,17 @@ _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
}
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
- -(__v16sf)(__m512)(B), \
- -(__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
+ -(__v16sf)(__m512)(B), \
+ -(__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
- (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(C), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(C), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
@@ -3312,63 +3312,63 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
}
#define _mm512_alignr_epi64(A, B, I) \
- (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (int)(I)))
#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
- (__v8di)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
+ (__v8di)_mm512_setzero_si512()))
#define _mm512_alignr_epi32(A, B, I) \
- (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (int)(I)))
#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
- (__v16si)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
+ (__v16si)_mm512_setzero_si512()))
/* Vector Extract */
#define _mm512_extractf64x4_pd(A, I) \
- (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
- (__v4df)_mm256_undefined_pd(), \
- (__mmask8)-1)
+ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
+ (__v4df)_mm256_undefined_pd(), \
+ (__mmask8)-1))
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
- (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
- (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm512_extractf32x4_ps(A, I) \
- (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v4sf)_mm_undefined_ps(), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)-1))
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
- (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
- (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U)))
/* Vector Blend */
@@ -3407,14 +3407,14 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
/* Compare */
#define _mm512_cmp_round_ps_mask(A, B, P, R) \
- (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(P), \
- (__mmask16)-1, (int)(R))
+ ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(P), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
- (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(P), \
- (__mmask16)(U), (int)(R))
+ ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(P), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_cmp_ps_mask(A, B, P) \
_mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
@@ -3462,14 +3462,14 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
#define _mm512_cmp_round_pd_mask(A, B, P, R) \
- (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(P), \
- (__mmask8)-1, (int)(R))
+ ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(P), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
- (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(P), \
- (__mmask8)(U), (int)(R))
+ ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(P), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_cmp_pd_mask(A, B, P) \
_mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
@@ -3519,19 +3519,19 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
/* Conversion */
#define _mm512_cvtt_roundps_epu32(A, R) \
- (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_undefined_epi32(), \
- (__mmask16)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_undefined_epi32(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
- (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
- (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)))
static __inline __m512i __DEFAULT_FN_ATTRS512
@@ -3563,34 +3563,34 @@ _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
}
#define _mm512_cvt_roundepi32_ps(A, R) \
- (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
- (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
- (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_cvt_roundepu32_ps(A, R) \
- (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
- (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
- (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_cvtepu32_ps (__m512i __A)
@@ -3705,19 +3705,19 @@ _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
}
#define _mm512_cvt_roundpd_ps(A, R) \
- (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
- (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
- (__v8sf)(__m256)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
+ (__v8sf)(__m256)(W), (__mmask8)(U), \
+ (int)(R)))
#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
- (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtpd_ps (__m512d __A)
@@ -3765,38 +3765,38 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
}
#define _mm512_cvt_roundps_ph(A, I) \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v16hi)_mm256_undefined_si256(), \
- (__mmask16)-1)
+ ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v16hi)_mm256_undefined_si256(), \
+ (__mmask16)-1))
#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v16hi)(__m256i)(U), \
- (__mmask16)(W))
+ ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v16hi)(__m256i)(U), \
+ (__mmask16)(W)))
#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
- (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v16hi)_mm256_setzero_si256(), \
- (__mmask16)(W))
+ ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
+ (__v16hi)_mm256_setzero_si256(), \
+ (__mmask16)(W)))
#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
#define _mm512_cvt_roundph_ps(A, R) \
- (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
- (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
- (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
static __inline __m512 __DEFAULT_FN_ATTRS512
@@ -3828,19 +3828,19 @@ _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
}
#define _mm512_cvtt_roundpd_epi32(A, R) \
- (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1, (int)(R))
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
- (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
- (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U), (int)(R)))
static __inline __m256i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epi32(__m512d __a)
@@ -3870,19 +3870,19 @@ _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
}
#define _mm512_cvtt_roundps_epi32(A, R) \
- (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
- (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
- (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)))
static __inline __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epi32(__m512 __a)
@@ -3912,19 +3912,19 @@ _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
}
#define _mm512_cvt_roundps_epi32(A, R) \
- (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
- (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
- (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epi32 (__m512 __A)
@@ -3955,19 +3955,19 @@ _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
}
#define _mm512_cvt_roundpd_epi32(A, R) \
- (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1, (int)(R))
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
- (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
- (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m256i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epi32 (__m512d __A)
@@ -3999,19 +3999,19 @@ _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
}
#define _mm512_cvt_roundps_epu32(A, R) \
- (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)-1, (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
- (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)(__m512i)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
- (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U), (int)(R))
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
+ (__v16si)_mm512_setzero_si512(), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epu32 ( __m512 __A)
@@ -4043,19 +4043,19 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
}
#define _mm512_cvt_roundpd_epu32(A, R) \
- (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1, (int)(R))
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
- (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
- (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m256i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epu32 (__m512d __A)
@@ -4975,70 +4975,70 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
#define _mm512_cmp_epi32_mask(a, b, p) \
- (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
- (__v16si)(__m512i)(b), (int)(p), \
- (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (int)(p), \
+ (__mmask16)-1))
#define _mm512_cmp_epu32_mask(a, b, p) \
- (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
- (__v16si)(__m512i)(b), (int)(p), \
- (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (int)(p), \
+ (__mmask16)-1))
#define _mm512_cmp_epi64_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
- (__v8di)(__m512i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm512_cmp_epu64_mask(a, b, p) \
- (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
- (__v8di)(__m512i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
- (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
- (__v16si)(__m512i)(b), (int)(p), \
- (__mmask16)(m))
+ ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (int)(p), \
+ (__mmask16)(m)))
#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
- (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
- (__v16si)(__m512i)(b), (int)(p), \
- (__mmask16)(m))
+ ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+ (__v16si)(__m512i)(b), (int)(p), \
+ (__mmask16)(m)))
#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
- (__v8di)(__m512i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
- (__v8di)(__m512i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+ (__v8di)(__m512i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm512_rol_epi32(a, b) \
- (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
+ ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
#define _mm512_mask_rol_epi32(W, U, a, b) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_rol_epi32((a), (b)), \
- (__v16si)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_rol_epi32((a), (b)), \
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_rol_epi32(U, a, b) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_rol_epi32((a), (b)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_rol_epi32((a), (b)), \
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_rol_epi64(a, b) \
- (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
+ ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
#define _mm512_mask_rol_epi64(W, U, a, b) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_rol_epi64((a), (b)), \
- (__v8di)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_rol_epi64((a), (b)), \
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_rol_epi64(U, a, b) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_rol_epi64((a), (b)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_rol_epi64((a), (b)), \
+ (__v8di)_mm512_setzero_si512()))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_rolv_epi32 (__m512i __A, __m512i __B)
@@ -5085,30 +5085,30 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
}
#define _mm512_ror_epi32(A, B) \
- (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
+ ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
#define _mm512_mask_ror_epi32(W, U, A, B) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_ror_epi32((A), (B)), \
- (__v16si)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_ror_epi32((A), (B)), \
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_ror_epi32(U, A, B) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_ror_epi32((A), (B)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_ror_epi32((A), (B)), \
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_ror_epi64(A, B) \
- (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
+ ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
#define _mm512_mask_ror_epi64(W, U, A, B) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_ror_epi64((A), (B)), \
- (__v8di)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_ror_epi64((A), (B)), \
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_ror_epi64(U, A, B) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_ror_epi64((A), (B)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_ror_epi64((A), (B)), \
+ (__v8di)_mm512_setzero_si512()))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_slli_epi32(__m512i __A, unsigned int __B)
@@ -5304,168 +5304,168 @@ _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
}
#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
- (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8di)(__m512i)(C), (int)(imm), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
- (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8di)(__m512i)(C), (int)(imm), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_fixupimm_pd(A, B, C, imm) \
- (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8di)(__m512i)(C), (int)(imm), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
- (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8di)(__m512i)(C), (int)(imm), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
- (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8di)(__m512i)(C), \
- (int)(imm), (__mmask8)(U), \
- (int)(R))
+ ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8di)(__m512i)(C), \
+ (int)(imm), (__mmask8)(U), \
+ (int)(R)))
#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
- (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8di)(__m512i)(C), \
- (int)(imm), (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8di)(__m512i)(C), \
+ (int)(imm), (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
- (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16si)(__m512i)(C), (int)(imm), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16si)(__m512i)(C), (int)(imm), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
- (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16si)(__m512i)(C), (int)(imm), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16si)(__m512i)(C), (int)(imm), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_fixupimm_ps(A, B, C, imm) \
- (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16si)(__m512i)(C), (int)(imm), \
- (__mmask16)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16si)(__m512i)(C), (int)(imm), \
+ (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
- (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16si)(__m512i)(C), (int)(imm), \
- (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16si)(__m512i)(C), (int)(imm), \
+ (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
- (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16si)(__m512i)(C), \
- (int)(imm), (__mmask16)(U), \
- (int)(R))
+ ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16si)(__m512i)(C), \
+ (int)(imm), (__mmask16)(U), \
+ (int)(R)))
#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
- (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16si)(__m512i)(C), \
- (int)(imm), (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16si)(__m512i)(C), \
+ (int)(imm), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
- (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
- (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U), (int)(R)))
#define _mm_fixupimm_sd(A, B, C, imm) \
- (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
- (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
- (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)(U), (int)(R))
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
-#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
- (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
+#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
+ ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
(__v2df)(__m128d)(B), \
(__v2di)(__m128i)(C), (int)(imm), \
(__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
+ ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
- (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
- (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U), (int)(R)))
#define _mm_fixupimm_ss(A, B, C, imm) \
- (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
- (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
- (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
+ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U), (int)(R))
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
-#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
- (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
+#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
+ ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4si)(__m128i)(C), (int)(imm), \
(__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
+ ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_getexp_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
@@ -5486,10 +5486,10 @@ _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
@@ -5502,16 +5502,16 @@ _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_maskz_getexp_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm_getexp_round_ss(A, B, R) \
- (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_getexp_ss (__m128 __A, __m128 __B)
@@ -5531,10 +5531,10 @@ _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
@@ -5547,100 +5547,100 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_maskz_getexp_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
#define _mm_getmant_round_sd(A, B, C, D, R) \
- (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(((D)<<2) | (C)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_getmant_sd(A, B, C, D) \
- (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(((D)<<2) | (C)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
- (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(((D)<<2) | (C)), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
- (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(((D)<<2) | (C)), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_getmant_sd(U, A, B, C, D) \
- (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(((D)<<2) | (C)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
- (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(((D)<<2) | (C)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm_getmant_round_ss(A, B, C, D, R) \
- (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_getmant_ss(A, B, C, D) \
- (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
- (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
- (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_getmant_ss(U, A, B, C, D) \
- (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
- (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (int)(((D)<<2) | (C)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (int)(((D)<<2) | (C)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_kmov (__mmask16 __A)
@@ -5649,16 +5649,16 @@ _mm512_kmov (__mmask16 __A)
}
#define _mm_comi_round_sd(A, B, P, R) \
- (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
- (int)(P), (int)(R))
+ ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
+ (int)(P), (int)(R)))
#define _mm_comi_round_ss(A, B, P, R) \
- (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
- (int)(P), (int)(R))
+ ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
+ (int)(P), (int)(R)))
#ifdef __x86_64__
#define _mm_cvt_roundsd_si64(A, R) \
- (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
#endif
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5926,54 +5926,54 @@ _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
}
#define _mm512_ternarylogic_epi32(A, B, C, imm) \
- (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), \
- (__v16si)(__m512i)(C), (int)(imm), \
- (__mmask16)-1)
+ ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (__v16si)(__m512i)(C), (int)(imm), \
+ (__mmask16)-1))
#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
- (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), \
- (__v16si)(__m512i)(C), (int)(imm), \
- (__mmask16)(U))
+ ((__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (__v16si)(__m512i)(C), (int)(imm), \
+ (__mmask16)(U)))
#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
- (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), \
- (__v16si)(__m512i)(C), \
- (int)(imm), (__mmask16)(U))
+ ((__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), \
+ (__v16si)(__m512i)(C), \
+ (int)(imm), (__mmask16)(U)))
#define _mm512_ternarylogic_epi64(A, B, C, imm) \
- (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), \
- (__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (__v8di)(__m512i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
- (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), \
- (__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
- (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
+ ((__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), \
(__v8di)(__m512i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
+ ((__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (__v8di)(__m512i)(C), (int)(imm), \
+ (__mmask8)(U)))
#ifdef __x86_64__
#define _mm_cvt_roundsd_i64(A, R) \
- (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
#endif
#define _mm_cvt_roundsd_si32(A, R) \
- (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
+ ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
#define _mm_cvt_roundsd_i32(A, R) \
- (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
+ ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
#define _mm_cvt_roundsd_u32(A, R) \
- (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
+ ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
static __inline__ unsigned __DEFAULT_FN_ATTRS128
_mm_cvtsd_u32 (__m128d __A)
@@ -5984,8 +5984,8 @@ _mm_cvtsd_u32 (__m128d __A)
#ifdef __x86_64__
#define _mm_cvt_roundsd_u64(A, R) \
- (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
- (int)(R))
+ ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
+ (int)(R)))
static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
_mm_cvtsd_u64 (__m128d __A)
@@ -5997,21 +5997,21 @@ _mm_cvtsd_u64 (__m128d __A)
#endif
#define _mm_cvt_roundss_si32(A, R) \
- (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
+ ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
#define _mm_cvt_roundss_i32(A, R) \
- (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
+ ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
#ifdef __x86_64__
#define _mm_cvt_roundss_si64(A, R) \
- (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
#define _mm_cvt_roundss_i64(A, R) \
- (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
#endif
#define _mm_cvt_roundss_u32(A, R) \
- (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
+ ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
static __inline__ unsigned __DEFAULT_FN_ATTRS128
_mm_cvtss_u32 (__m128 __A)
@@ -6022,8 +6022,8 @@ _mm_cvtss_u32 (__m128 __A)
#ifdef __x86_64__
#define _mm_cvt_roundss_u64(A, R) \
- (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
- (int)(R))
+ ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
+ (int)(R)))
static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
_mm_cvtss_u64 (__m128 __A)
@@ -6035,10 +6035,10 @@ _mm_cvtss_u64 (__m128 __A)
#endif
#define _mm_cvtt_roundsd_i32(A, R) \
- (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
+ ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
#define _mm_cvtt_roundsd_si32(A, R) \
- (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
+ ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
static __inline__ int __DEFAULT_FN_ATTRS128
_mm_cvttsd_i32 (__m128d __A)
@@ -6049,10 +6049,10 @@ _mm_cvttsd_i32 (__m128d __A)
#ifdef __x86_64__
#define _mm_cvtt_roundsd_si64(A, R) \
- (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
#define _mm_cvtt_roundsd_i64(A, R) \
- (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
static __inline__ long long __DEFAULT_FN_ATTRS128
_mm_cvttsd_i64 (__m128d __A)
@@ -6063,7 +6063,7 @@ _mm_cvttsd_i64 (__m128d __A)
#endif
#define _mm_cvtt_roundsd_u32(A, R) \
- (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
+ ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
static __inline__ unsigned __DEFAULT_FN_ATTRS128
_mm_cvttsd_u32 (__m128d __A)
@@ -6074,8 +6074,8 @@ _mm_cvttsd_u32 (__m128d __A)
#ifdef __x86_64__
#define _mm_cvtt_roundsd_u64(A, R) \
- (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
- (int)(R))
+ ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
+ (int)(R)))
static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
_mm_cvttsd_u64 (__m128d __A)
@@ -6087,10 +6087,10 @@ _mm_cvttsd_u64 (__m128d __A)
#endif
#define _mm_cvtt_roundss_i32(A, R) \
- (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
+ ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
#define _mm_cvtt_roundss_si32(A, R) \
- (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
+ ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
static __inline__ int __DEFAULT_FN_ATTRS128
_mm_cvttss_i32 (__m128 __A)
@@ -6101,10 +6101,10 @@ _mm_cvttss_i32 (__m128 __A)
#ifdef __x86_64__
#define _mm_cvtt_roundss_i64(A, R) \
- (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
#define _mm_cvtt_roundss_si64(A, R) \
- (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
+ ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
static __inline__ long long __DEFAULT_FN_ATTRS128
_mm_cvttss_i64 (__m128 __A)
@@ -6115,7 +6115,7 @@ _mm_cvttss_i64 (__m128 __A)
#endif
#define _mm_cvtt_roundss_u32(A, R) \
- (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
+ ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
static __inline__ unsigned __DEFAULT_FN_ATTRS128
_mm_cvttss_u32 (__m128 __A)
@@ -6126,8 +6126,8 @@ _mm_cvttss_u32 (__m128 __A)
#ifdef __x86_64__
#define _mm_cvtt_roundss_u64(A, R) \
- (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
- (int)(R))
+ ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
+ (int)(R)))
static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
_mm_cvttss_u64 (__m128 __A)
@@ -6139,30 +6139,30 @@ _mm_cvttss_u64 (__m128 __A)
#endif
#define _mm512_permute_pd(X, C) \
- (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
+ ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
#define _mm512_mask_permute_pd(W, U, X, C) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_permute_pd((X), (C)), \
- (__v8df)(__m512d)(W))
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permute_pd((X), (C)), \
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_permute_pd(U, X, C) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_permute_pd((X), (C)), \
- (__v8df)_mm512_setzero_pd())
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permute_pd((X), (C)), \
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_permute_ps(X, C) \
- (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
+ ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
#define _mm512_mask_permute_ps(W, U, X, C) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_permute_ps((X), (C)), \
- (__v16sf)(__m512)(W))
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_permute_ps((X), (C)), \
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_permute_ps(U, X, C) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_permute_ps((X), (C)), \
- (__v16sf)_mm512_setzero_ps())
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_permute_ps((X), (C)), \
+ (__v16sf)_mm512_setzero_ps()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_permutevar_pd(__m512d __A, __m512i __C)
@@ -6274,19 +6274,19 @@ _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
#define _mm512_cvtt_roundpd_epu32(A, R) \
- (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_undefined_si256(), \
- (__mmask8)-1, (int)(R))
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_undefined_si256(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
- (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
- (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U), (int)(R))
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
+ (__v8si)_mm256_setzero_si256(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m256i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epu32 (__m512d __A)
@@ -6318,106 +6318,106 @@ _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
}
#define _mm_roundscale_round_sd(A, B, imm, R) \
- (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(imm), \
- (int)(R))
+ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(imm), \
+ (int)(R)))
#define _mm_roundscale_sd(A, B, imm) \
- (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(imm), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(imm), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
- (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(imm), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(imm), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
- (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(I), \
- (int)(R))
+ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(I), \
+ (int)(R)))
#define _mm_maskz_roundscale_sd(U, A, B, I) \
- (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(I), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(I), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
- (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(I), \
- (int)(R))
+ ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(I), \
+ (int)(R)))
#define _mm_roundscale_round_ss(A, B, imm, R) \
- (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(imm), \
- (int)(R))
+ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(imm), \
+ (int)(R)))
#define _mm_roundscale_ss(A, B, imm) \
- (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(imm), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(imm), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_roundscale_ss(W, U, A, B, I) \
- (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), (int)(I), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(I), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
- (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), (int)(I), \
- (int)(R))
+ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(I), \
+ (int)(R)))
#define _mm_maskz_roundscale_ss(U, A, B, I) \
- (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(I), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(I), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
- (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(I), \
- (int)(R))
+ ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(I), \
+ (int)(R)))
#define _mm512_scalef_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
- (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
- (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_scalef_pd (__m512d __A, __m512d __B)
@@ -6452,22 +6452,22 @@ _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
}
#define _mm512_scalef_round_ps(A, B, R) \
- (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
- (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
- (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_scalef_ps (__m512 __A, __m512 __B)
@@ -6502,10 +6502,10 @@ _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
}
#define _mm_scalef_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_scalef_sd (__m128d __A, __m128d __B)
@@ -6527,10 +6527,10 @@ _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
@@ -6543,16 +6543,16 @@ _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_maskz_scalef_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm_scalef_round_ss(A, B, R) \
- (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_scalef_ss (__m128 __A, __m128 __B)
@@ -6574,10 +6574,10 @@ _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
@@ -6590,11 +6590,11 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_maskz_scalef_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_srai_epi32(__m512i __A, unsigned int __B)
@@ -6642,94 +6642,94 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
}
#define _mm512_shuffle_f32x4(A, B, imm) \
- (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(imm))
+ ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(imm)))
#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
- (__v16sf)(__m512)(W))
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
- (__v16sf)_mm512_setzero_ps())
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
+ (__v16sf)_mm512_setzero_ps()))
#define _mm512_shuffle_f64x2(A, B, imm) \
- (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(imm))
+ ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(imm)))
#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
- (__v8df)(__m512d)(W))
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
- (__v8df)_mm512_setzero_pd())
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_shuffle_i32x4(A, B, imm) \
- (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (int)(imm)))
#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
- (__v16si)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_shuffle_i64x2(A, B, imm) \
- (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (int)(imm)))
#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
- (__v8di)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
+ (__v8di)_mm512_setzero_si512()))
#define _mm512_shuffle_pd(A, B, M) \
- (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(M))
+ ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(M)))
#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
- (__v8df)(__m512d)(W))
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_shuffle_pd(U, A, B, M) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
- (__v8df)_mm512_setzero_pd())
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_shuffle_ps(A, B, M) \
- (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(M))
+ ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(M)))
#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
- (__v16sf)(__m512)(W))
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_shuffle_ps(U, A, B, M) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
- (__v16sf)_mm512_setzero_ps())
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+ (__v16sf)_mm512_setzero_ps()))
#define _mm_sqrt_round_sd(A, B, R) \
- (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
@@ -6742,10 +6742,10 @@ _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
@@ -6758,16 +6758,16 @@ _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm_sqrt_round_ss(A, B, R) \
- (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
@@ -6780,10 +6780,10 @@ _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(W), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(W), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
@@ -6796,10 +6796,10 @@ _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
- (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_broadcast_f32x4(__m128 __A)
@@ -7366,183 +7366,183 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
}
#define _mm512_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_undefined_si128(), \
- (__mmask8)-1)
+ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1))
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)(__m128i)(W), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U)))
#define _mm512_extracti64x4_epi64(A, imm) \
- (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_undefined_si256(), \
- (__mmask8)-1)
+ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)_mm256_undefined_si256(), \
+ (__mmask8)-1))
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
- (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)(__m256i)(W), \
- (__mmask8)(U))
+ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
- (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)(U))
+ ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
+ (__v4di)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
#define _mm512_insertf64x4(A, B, imm) \
- (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
- (__v4df)(__m256d)(B), (int)(imm))
+ ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
+ (__v4df)(__m256d)(B), (int)(imm)))
#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
- (__v8df)(__m512d)(W))
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_insertf64x4(U, A, B, imm) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
- (__v8df)_mm512_setzero_pd())
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_inserti64x4(A, B, imm) \
- (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
- (__v4di)(__m256i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
+ (__v4di)(__m256i)(B), (int)(imm)))
#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
- (__v8di)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_inserti64x4(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
+ (__v8di)_mm512_setzero_si512()))
#define _mm512_insertf32x4(A, B, imm) \
- (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
- (__v4sf)(__m128)(B), (int)(imm))
+ ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
+ (__v4sf)(__m128)(B), (int)(imm)))
#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
- (__v16sf)(__m512)(W))
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
+ (__v16sf)(__m512)(W)))
#define _mm512_maskz_insertf32x4(U, A, B, imm) \
- (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
- (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
- (__v16sf)_mm512_setzero_ps())
+ ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
+ (__v16sf)_mm512_setzero_ps()))
#define _mm512_inserti32x4(A, B, imm) \
- (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
- (__v4si)(__m128i)(B), (int)(imm))
+ ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm)))
#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
- (__v16si)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_inserti32x4(U, A, B, imm) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_getmant_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
- (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_getmant_pd(A, B, C) \
- (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_getmant_pd(W, U, A, B, C) \
- (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_getmant_pd(U, A, B, C) \
- (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_getmant_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
- (int)(((C)<<2) | (B)), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
- (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
- (int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
- (int)(((C)<<2) | (B)), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_getmant_ps(A, B, C) \
- (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
- (int)(((C)<<2)|(B)), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+ (int)(((C)<<2)|(B)), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_getmant_ps(W, U, A, B, C) \
- (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
- (int)(((C)<<2)|(B)), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+ (int)(((C)<<2)|(B)), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_maskz_getmant_ps(U, A, B, C) \
- (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
- (int)(((C)<<2)|(B)), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
+ (int)(((C)<<2)|(B)), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm512_getexp_round_pd(A, R) \
- (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_getexp_round_pd(W, U, A, R) \
- (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_getexp_round_pd(U, A, R) \
- (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_getexp_pd (__m512d __A)
@@ -7572,19 +7572,19 @@ _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
}
#define _mm512_getexp_round_ps(A, R) \
- (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, (int)(R))
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)-1, (int)(R)))
#define _mm512_mask_getexp_round_ps(W, U, A, R) \
- (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(W), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_maskz_getexp_round_ps(U, A, R) \
- (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
+ ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(U), (int)(R)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_getexp_ps (__m512 __A)
@@ -7614,100 +7614,100 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
}
#define _mm512_i64gather_ps(index, addr, scale) \
- (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
- (void const *)(addr), \
- (__v8di)(__m512i)(index), (__mmask8)-1, \
- (int)(scale))
+ ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), (__mmask8)-1, \
+ (int)(scale)))
#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
- (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
- (void const *)(addr), \
- (__v8di)(__m512i)(index), \
- (__mmask8)(mask), (int)(scale))
-
-#define _mm512_i64gather_epi32(index, addr, scale) \
- (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
+ ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
(void const *)(addr), \
(__v8di)(__m512i)(index), \
- (__mmask8)-1, (int)(scale))
+ (__mmask8)(mask), (int)(scale)))
+
+#define _mm512_i64gather_epi32(index, addr, scale) \
+ ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), \
+ (__mmask8)-1, (int)(scale)))
#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
- (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
- (void const *)(addr), \
- (__v8di)(__m512i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm512_i64gather_pd(index, addr, scale) \
- (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
- (void const *)(addr), \
- (__v8di)(__m512i)(index), (__mmask8)-1, \
- (int)(scale))
+ ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), (__mmask8)-1, \
+ (int)(scale)))
#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
- (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
- (void const *)(addr), \
- (__v8di)(__m512i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm512_i64gather_epi64(index, addr, scale) \
- (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
- (void const *)(addr), \
- (__v8di)(__m512i)(index), (__mmask8)-1, \
- (int)(scale))
+ ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), (__mmask8)-1, \
+ (int)(scale)))
#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
- (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
- (void const *)(addr), \
- (__v8di)(__m512i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
+ (void const *)(addr), \
+ (__v8di)(__m512i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm512_i32gather_ps(index, addr, scale) \
- (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
- (void const *)(addr), \
- (__v16si)(__m512)(index), \
- (__mmask16)-1, (int)(scale))
+ ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
+ (void const *)(addr), \
+ (__v16si)(__m512)(index), \
+ (__mmask16)-1, (int)(scale)))
#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
- (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
- (void const *)(addr), \
- (__v16si)(__m512)(index), \
- (__mmask16)(mask), (int)(scale))
+ ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
+ (void const *)(addr), \
+ (__v16si)(__m512)(index), \
+ (__mmask16)(mask), (int)(scale)))
#define _mm512_i32gather_epi32(index, addr, scale) \
- (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
- (void const *)(addr), \
- (__v16si)(__m512i)(index), \
- (__mmask16)-1, (int)(scale))
+ ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
+ (void const *)(addr), \
+ (__v16si)(__m512i)(index), \
+ (__mmask16)-1, (int)(scale)))
#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
- (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
- (void const *)(addr), \
- (__v16si)(__m512i)(index), \
- (__mmask16)(mask), (int)(scale))
+ ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
+ (void const *)(addr), \
+ (__v16si)(__m512i)(index), \
+ (__mmask16)(mask), (int)(scale)))
#define _mm512_i32gather_pd(index, addr, scale) \
- (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
- (void const *)(addr), \
- (__v8si)(__m256i)(index), (__mmask8)-1, \
- (int)(scale))
+ ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
+ (void const *)(addr), \
+ (__v8si)(__m256i)(index), (__mmask8)-1, \
+ (int)(scale)))
#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
- (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
- (void const *)(addr), \
- (__v8si)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
+ (void const *)(addr), \
+ (__v8si)(__m256i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm512_i32gather_epi64(index, addr, scale) \
- (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
- (void const *)(addr), \
- (__v8si)(__m256i)(index), (__mmask8)-1, \
- (int)(scale))
+ ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
+ (void const *)(addr), \
+ (__v8si)(__m256i)(index), (__mmask8)-1, \
+ (int)(scale)))
#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
- (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
- (void const *)(addr), \
- (__v8si)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
+ (void const *)(addr), \
+ (__v8si)(__m256i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm512_i64scatter_ps(addr, index, v1, scale) \
__builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
@@ -7800,16 +7800,16 @@ _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_fmadd_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
+ (__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
@@ -7822,10 +7822,10 @@ _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
}
#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(C), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(C), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
@@ -7838,10 +7838,10 @@ _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
}
#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
+ (__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
@@ -7854,16 +7854,16 @@ _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_fmsub_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- -(__v4sf)(__m128)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ -(__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(A), \
- -(__v4sf)(__m128)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
+ (__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
@@ -7876,10 +7876,10 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
}
#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- -(__v4sf)(__m128)(C), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ -(__v4sf)(__m128)(C), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
@@ -7892,10 +7892,10 @@ _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
}
#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
- (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
- (__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
+ (__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
@@ -7908,16 +7908,16 @@ _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_fnmadd_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
- -(__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
- -(__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
@@ -7930,10 +7930,10 @@ _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
}
#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
- -(__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(C), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(C), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
@@ -7946,10 +7946,10 @@ _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
}
#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
- -(__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
@@ -7962,16 +7962,16 @@ _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
}
#define _mm_fnmsub_round_ss(A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
- -(__v4sf)(__m128)(B), \
- -(__v4sf)(__m128)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
+ -(__v4sf)(__m128)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
- -(__v4sf)(__m128)(A), \
- -(__v4sf)(__m128)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
@@ -7984,10 +7984,10 @@ _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
}
#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
- (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
- -(__v4sf)(__m128)(B), \
- -(__v4sf)(__m128)(C), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
+ -(__v4sf)(__m128)(B), \
+ -(__v4sf)(__m128)(C), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
@@ -8000,10 +8000,10 @@ _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
}
#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
- (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
- -(__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (__mmask8)(U), \
- (int)(R))
+ ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
+ -(__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
@@ -8016,16 +8016,16 @@ _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_fmadd_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
+ (__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
@@ -8038,10 +8038,10 @@ _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
}
#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(C), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
@@ -8054,10 +8054,10 @@ _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
}
#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
+ (__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
@@ -8070,16 +8070,16 @@ _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_fmsub_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- -(__v2df)(__m128d)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ -(__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(A), \
- -(__v2df)(__m128d)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
+ (__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
@@ -8092,10 +8092,10 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
}
#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- -(__v2df)(__m128d)(C), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ -(__v2df)(__m128d)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
@@ -8108,10 +8108,10 @@ _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
}
#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
- (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
- (__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
+ (__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
@@ -8124,16 +8124,16 @@ _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_fnmadd_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
- -(__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
- -(__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
@@ -8146,10 +8146,10 @@ _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
}
#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
- -(__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(C), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(C), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
@@ -8162,10 +8162,10 @@ _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
}
#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
- -(__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
@@ -8178,16 +8178,16 @@ _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
}
#define _mm_fnmsub_round_sd(A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
- -(__v2df)(__m128d)(B), \
- -(__v2df)(__m128d)(C), (__mmask8)-1, \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
+ -(__v2df)(__m128d)(C), (__mmask8)-1, \
+ (int)(R)))
#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
- -(__v2df)(__m128d)(A), \
- -(__v2df)(__m128d)(B), (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
@@ -8200,11 +8200,11 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
}
#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
- (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
- -(__v2df)(__m128d)(B), \
- -(__v2df)(__m128d)(C), \
- (__mmask8)(U), \
- (int)(R))
+ ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
+ -(__v2df)(__m128d)(B), \
+ -(__v2df)(__m128d)(C), \
+ (__mmask8)(U), \
+ (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
@@ -8217,36 +8217,36 @@ _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
}
#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
- (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
- -(__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
+ -(__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_permutex_pd(X, C) \
- (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
+ ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
#define _mm512_mask_permutex_pd(W, U, X, C) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_permutex_pd((X), (C)), \
- (__v8df)(__m512d)(W))
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permutex_pd((X), (C)), \
+ (__v8df)(__m512d)(W)))
#define _mm512_maskz_permutex_pd(U, X, C) \
- (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
- (__v8df)_mm512_permutex_pd((X), (C)), \
- (__v8df)_mm512_setzero_pd())
+ ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permutex_pd((X), (C)), \
+ (__v8df)_mm512_setzero_pd()))
#define _mm512_permutex_epi64(X, C) \
- (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
+ ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
#define _mm512_mask_permutex_epi64(W, U, X, C) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_permutex_epi64((X), (C)), \
- (__v8di)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_permutex_epi64((X), (C)), \
+ (__v8di)(__m512i)(W)))
#define _mm512_maskz_permutex_epi64(U, X, C) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_permutex_epi64((X), (C)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_permutex_epi64((X), (C)), \
+ (__v8di)_mm512_setzero_si512()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
@@ -8416,10 +8416,10 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
#define _kxor_mask16 _mm512_kxor
#define _kshiftli_mask16(A, I) \
- (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
+ ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
#define _kshiftri_mask16(A, I) \
- (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
+ ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
static __inline__ unsigned int __DEFAULT_FN_ATTRS
_cvtmask16_u32(__mmask16 __A) {
@@ -8538,48 +8538,48 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
}
#define _mm_cmp_round_ss_mask(X, Y, P, R) \
- (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (__mmask8)-1, (int)(R))
+ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
- (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (__mmask8)(M), (int)(R))
+ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)(M), (int)(R)))
#define _mm_cmp_ss_mask(X, Y, P) \
- (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
- (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P), \
- (__mmask8)(M), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P), \
+ (__mmask8)(M), \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_cmp_round_sd_mask(X, Y, P, R) \
- (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P), \
- (__mmask8)-1, (int)(R))
+ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
- (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P), \
- (__mmask8)(M), (int)(R))
+ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P), \
+ (__mmask8)(M), (int)(R)))
#define _mm_cmp_sd_mask(X, Y, P) \
- (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P), \
- (__mmask8)-1, \
- _MM_FROUND_CUR_DIRECTION)
+ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P), \
+ (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
- (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P), \
- (__mmask8)(M), \
- _MM_FROUND_CUR_DIRECTION)
+ ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P), \
+ (__mmask8)(M), \
+ _MM_FROUND_CUR_DIRECTION))
/* Bit Test */
@@ -8760,17 +8760,17 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A)
}
#define _mm512_shuffle_epi32(A, I) \
- (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
+ ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
#define _mm512_mask_shuffle_epi32(W, U, A, I) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shuffle_epi32((A), (I)), \
- (__v16si)(__m512i)(W))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_epi32((A), (I)), \
+ (__v16si)(__m512i)(W)))
#define _mm512_maskz_shuffle_epi32(U, A, I) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shuffle_epi32((A), (I)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_epi32((A), (I)), \
+ (__v16si)_mm512_setzero_si512()))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
@@ -8901,19 +8901,19 @@ _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
}
#define _mm512_cvt_roundps_pd(A, R) \
- (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
+ (__v8df)_mm512_undefined_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
- (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
+ (__v8df)(__m512d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
- (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_cvtps_pd (__m256 __A)
@@ -9010,22 +9010,22 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
}
#define _mm_cvt_roundsd_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
- (__v2df)(__m128d)(B), \
- (__v4sf)_mm_undefined_ps(), \
- (__mmask8)-1, (int)(R))
+ ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
- (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
- (__v2df)(__m128d)(B), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
- (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
- (__v2df)(__m128d)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U), (int)(R))
+ ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
@@ -9058,47 +9058,47 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
#ifdef __x86_64__
#define _mm_cvt_roundi64_sd(A, B, R) \
- (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
- (int)(R))
+ ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
+ (int)(R)))
#define _mm_cvt_roundsi64_sd(A, B, R) \
- (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
- (int)(R))
+ ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
+ (int)(R)))
#endif
#define _mm_cvt_roundsi32_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
+ ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
#define _mm_cvt_roundi32_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
+ ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
#ifdef __x86_64__
#define _mm_cvt_roundsi64_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
- (int)(R))
+ ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
+ (int)(R)))
#define _mm_cvt_roundi64_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
- (int)(R))
+ ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
+ (int)(R)))
#endif
#define _mm_cvt_roundss_sd(A, B, R) \
- (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
- (__v4sf)(__m128)(B), \
- (__v2df)_mm_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)-1, (int)(R)))
#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
- (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
- (__v4sf)(__m128)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
- (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
- (__v4sf)(__m128)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U), (int)(R))
+ ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
@@ -9127,8 +9127,8 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B)
#ifdef __x86_64__
#define _mm_cvt_roundu64_sd(A, B, R) \
- (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
- (unsigned long long)(B), (int)(R))
+ ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
+ (unsigned long long)(B), (int)(R)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
@@ -9139,8 +9139,8 @@ _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
#endif
#define _mm_cvt_roundu32_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
- (int)(R))
+ ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
+ (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_cvtu32_ss (__m128 __A, unsigned __B)
@@ -9151,8 +9151,8 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B)
#ifdef __x86_64__
#define _mm_cvt_roundu64_ss(A, B, R) \
- (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
- (unsigned long long)(B), (int)(R))
+ ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
+ (unsigned long long)(B), (int)(R)))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
new file mode 100644
index 000000000000..99409a31b32b
--- /dev/null
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -0,0 +1,3349 @@
+/*===----------- avx512fp16intrin.h - AVX512-FP16 intrinsics ---------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512FP16INTRIN_H
+#define __AVX512FP16INTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+typedef _Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64)));
+typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64)));
+typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
+typedef _Float16 __v8hf __attribute__((__vector_size__(16), __aligned__(16)));
+typedef _Float16 __m128h __attribute__((__vector_size__(16), __aligned__(16)));
+typedef _Float16 __m128h_u __attribute__((__vector_size__(16), __aligned__(1)));
+typedef _Float16 __v16hf __attribute__((__vector_size__(32), __aligned__(32)));
+typedef _Float16 __m256h __attribute__((__vector_size__(32), __aligned__(32)));
+typedef _Float16 __m256h_u __attribute__((__vector_size__(32), __aligned__(1)));
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
+ __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
+ __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
+ __min_vector_width__(128)))
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
+ return __a[0];
+}
+
+static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) {
+ return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+}
+
+static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) {
+ return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) {
+ return (__m256h)__builtin_ia32_undef256();
+}
+
+static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) {
+ return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void) {
+ return (__m128h)__builtin_ia32_undef128();
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) {
+ return (__m512h)__builtin_ia32_undef512();
+}
+
+static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) {
+ return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h,
+ __h, __h, __h, __h, __h, __h, __h, __h,
+ __h, __h, __h, __h, __h, __h, __h, __h,
+ __h, __h, __h, __h, __h, __h, __h, __h};
+}
+
+static __inline __m512h __DEFAULT_FN_ATTRS512
+_mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
+ _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
+ _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
+ _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16,
+ _Float16 __h17, _Float16 __h18, _Float16 __h19, _Float16 __h20,
+ _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
+ _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
+ _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
+ return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
+ __h25, __h24, __h23, __h22, __h21, __h20, __h19,
+ __h18, __h17, __h16, __h15, __h14, __h13, __h12,
+ __h11, __h10, __h9, __h8, __h7, __h6, __h5,
+ __h4, __h3, __h2, __h1};
+}
+
+#define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
+ h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24, \
+ h25, h26, h27, h28, h29, h30, h31, h32) \
+ _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \
+ (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \
+ (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6), \
+ (h5), (h4), (h3), (h2), (h1))
+
+static __inline __m512h __DEFAULT_FN_ATTRS512
+_mm512_set1_pch(_Float16 _Complex h) {
+ return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
+ return (__m128)__a;
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) {
+ return (__m256)__a;
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) {
+ return (__m512)__a;
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) {
+ return (__m128d)__a;
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) {
+ return (__m256d)__a;
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) {
+ return (__m512d)__a;
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) {
+ return (__m128i)__a;
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_castph_si256(__m256h __a) {
+ return (__m256i)__a;
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_castph_si512(__m512h __a) {
+ return (__m512i)__a;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) {
+ return (__m128h)__a;
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) {
+ return (__m256h)__a;
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) {
+ return (__m512h)__a;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) {
+ return (__m128h)__a;
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) {
+ return (__m256h)__a;
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) {
+ return (__m512h)__a;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) {
+ return (__m128h)__a;
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_castsi256_ph(__m256i __a) {
+ return (__m256h)__a;
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_castsi512_ph(__m512i __a) {
+ return (__m512h)__a;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_castph256_ph128(__m256h __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_castph512_ph128(__m512h __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_castph512_ph256(__m512h __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_castph128_ph256(__m128h __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_castph128_ph512(__m128h __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_castph256_ph512(__m256h __a) {
+ return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+/// Constructs a 256-bit floating-point vector of [16 x half] from a
+/// 128-bit floating-point vector of [8 x half]. The lower 128 bits
+/// contain the value of the source vector. The upper 384 bits are set
+/// to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic has no corresponding instruction.
+///
+/// \param __a
+/// A 128-bit vector of [8 x half].
+/// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits
+/// contain the value of the parameter. The upper 384 bits are set to zero.
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_zextph128_ph256(__m128h __a) {
+ return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+/// Constructs a 512-bit floating-point vector of [32 x half] from a
+/// 128-bit floating-point vector of [8 x half]. The lower 128 bits
+/// contain the value of the source vector. The upper 384 bits are set
+/// to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic has no corresponding instruction.
+///
+/// \param __a
+/// A 128-bit vector of [8 x half].
+/// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits
+/// contain the value of the parameter. The upper 384 bits are set to zero.
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_zextph128_ph512(__m128h __a) {
+ return __builtin_shufflevector(
+ __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
+}
+
+/// Constructs a 512-bit floating-point vector of [32 x half] from a
+/// 256-bit floating-point vector of [16 x half]. The lower 256 bits
+/// contain the value of the source vector. The upper 256 bits are set
+/// to zero.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic has no corresponding instruction.
+///
+/// \param __a
+/// A 256-bit vector of [16 x half].
+/// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits
+/// contain the value of the parameter. The upper 256 bits are set to zero.
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_zextph256_ph512(__m256h __a) {
+ return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+ 29, 30, 31);
+}
+
+#define _mm_comi_round_sh(A, B, P, R) \
+ __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, (int)(P), (int)(R))
+
+#define _mm_comi_sh(A, B, pred) \
+ _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A,
+ __m128h B) {
+ return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)((__v32hf)__A + (__v32hf)__B);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
+ (__v32hf)_mm512_add_ph(__A, __B),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm512_add_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(R)))
+
+#define _mm512_mask_add_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_add_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)((__v32hf)__A - (__v32hf)__B);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
+ (__v32hf)_mm512_sub_ph(__A, __B),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm512_sub_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(R)))
+
+#define _mm512_mask_sub_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_sub_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)((__v32hf)__A * (__v32hf)__B);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
+ (__v32hf)_mm512_mul_ph(__A, __B),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm512_mul_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(R)))
+
+#define _mm512_mask_mul_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_mul_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)((__v32hf)__A / (__v32hf)__B);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
+ (__v32hf)_mm512_div_ph(__A, __B),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm512_div_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(R)))
+
+#define _mm512_mask_div_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_div_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
+ (__v32hf)_mm512_min_ph(__A, __B),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm512_min_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(R)))
+
+#define _mm512_mask_min_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_min_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
+ (__v32hf)_mm512_max_ph(__A, __B),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm512_max_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(R)))
+
+#define _mm512_mask_max_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_max_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
+ return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
+ return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_selectps_512(
+ (__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_selectps_512((__mmask16)__U,
+ (__v16sf)_mm512_conj_pch(__A),
+ (__v16sf)_mm512_setzero_ps());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
+ __m128h __B) {
+ __A[0] += __B[0];
+ return __A;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_add_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_add_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
+}
+
+#define _mm_add_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_addsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_add_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_addsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_add_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_addsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
+ __m128h __B) {
+ __A[0] -= __B[0];
+ return __A;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_sub_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_sub_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
+}
+
+#define _mm_sub_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_subsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_sub_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_subsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_sub_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_subsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
+ __m128h __B) {
+ __A[0] *= __B[0];
+ return __A;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_mul_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_mul_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
+}
+
+#define _mm_mul_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_mulsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_mul_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_mulsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_mul_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_mulsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
+ __m128h __B) {
+ __A[0] /= __B[0];
+ return __A;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_div_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, __W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ __A = _mm_div_sh(__A, __B);
+ return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
+}
+
+#define _mm_div_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_divsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_div_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_divsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_div_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_divsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_minsh_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_minsh_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_minsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_min_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_minsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_min_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_minsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_maxsh_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_maxsh_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_maxsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_max_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_maxsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_max_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_maxsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_cmp_round_ph_mask(A, B, P, R) \
+ ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(P), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_cmp_round_ph_mask(U, A, B, P, R) \
+ ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)(__m512h)(B), (int)(P), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_cmp_ph_mask(A, B, P) \
+ _mm512_cmp_round_ph_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_cmp_ph_mask(U, A, B, P) \
+ _mm512_mask_cmp_round_ph_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_cmp_round_sh_mask(X, Y, P, R) \
+ ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
+ (__v8hf)(__m128h)(Y), (int)(P), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_cmp_round_sh_mask(M, X, Y, P, R) \
+ ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
+ (__v8hf)(__m128h)(Y), (int)(P), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_cmp_sh_mask(X, Y, P) \
+ ((__mmask8)__builtin_ia32_cmpsh_mask( \
+ (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_cmp_sh_mask(M, X, Y, P) \
+ ((__mmask8)__builtin_ia32_cmpsh_mask( \
+ (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)(M), \
+ _MM_FROUND_CUR_DIRECTION))
+// loads with vmovsh:
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp) {
+ struct __mm_load_sh_struct {
+ _Float16 __u;
+ } __attribute__((__packed__, __may_alias__));
+ _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u;
+ return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A) {
+ __m128h src = (__v8hf)__builtin_shufflevector(
+ (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
+
+ return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_load_sh(__mmask8 __U, const void *__A) {
+ return (__m128h)__builtin_ia32_loadsh128_mask(
+ (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_load_ph(void const *__p) {
+ return *(const __m512h *)__p;
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_load_ph(void const *__p) {
+ return *(const __m256h *)__p;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_ph(void const *__p) {
+ return *(const __m128h *)__p;
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_loadu_ph(void const *__p) {
+ struct __loadu_ph {
+ __m512h_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_ph *)__p)->__v;
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_loadu_ph(void const *__p) {
+ struct __loadu_ph {
+ __m256h_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_ph *)__p)->__v;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_loadu_ph(void const *__p) {
+ struct __loadu_ph {
+ __m128h_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ return ((const struct __loadu_ph *)__p)->__v;
+}
+
+// stores with vmovsh:
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sh(void *__dp,
+ __m128h __a) {
+ struct __mm_store_sh_struct {
+ _Float16 __u;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __mm_store_sh_struct *)__dp)->__u = __a[0];
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sh(void *__W,
+ __mmask8 __U,
+ __m128h __A) {
+ __builtin_ia32_storesh128_mask((__v8hf *)__W, __A, __U & 1);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_ph(void *__P,
+ __m512h __A) {
+ *(__m512h *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_ph(void *__P,
+ __m256h __A) {
+ *(__m256h *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_ph(void *__P,
+ __m128h __A) {
+ *(__m128h *)__P = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_ph(void *__P,
+ __m512h __A) {
+ struct __storeu_ph {
+ __m512h_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_ph *)__P)->__v = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_ph(void *__P,
+ __m256h __A) {
+ struct __storeu_ph {
+ __m256h_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_ph *)__P)->__v = __A;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P,
+ __m128h __A) {
+ struct __storeu_ph {
+ __m128h_u __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_ph *)__P)->__v = __A;
+}
+
+// moves with vmovsh:
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a,
+ __m128h __b) {
+ __a[0] = __b[0];
+ return __a;
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
+ _mm_setzero_ph());
+}
+
+// vmovw:
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsi16_si128(short __a) {
+ return (__m128i)(__v8hi){__a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
+ __v8hi __b = (__v8hi)__a;
+ return __b[0];
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) {
+ return (__m512h)__builtin_ia32_rcpph512_mask(
+ (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W,
+ (__mmask32)__U);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_rcpph512_mask(
+ (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) {
+ return (__m512h)__builtin_ia32_rsqrtph512_mask(
+ (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W,
+ (__mmask32)__U);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_rsqrtph512_mask(
+ (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
+}
+
+#define _mm512_getmant_ph(A, B, C) \
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
+ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_getmant_ph(W, U, A, B, C) \
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_getmant_ph(U, A, B, C) \
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_getmant_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
+ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_getmant_round_ph(W, U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_getmant_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_getmantph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) {
+ return (__m512h)__builtin_ia32_getexpph512_mask(
+ (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_getexpph512_mask(
+ (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_getexpph512_mask(
+ (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_getexp_round_ph(A, R) \
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_getexp_round_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_getexpph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_getexp_round_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
+ (__v32hf)_mm512_setzero_ph(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A,
+ __m512h __B) {
+ return (__m512h)__builtin_ia32_scalefph512_mask(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__W, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_scalefph512_mask(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_scalef_round_ph(A, B, R) \
+ ((__m512h)__builtin_ia32_scalefph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
+ (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_scalef_round_ph(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_scalefph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_scalef_round_ph(U, A, B, R) \
+ ((__m512h)__builtin_ia32_scalefph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
+ (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+#define _mm512_roundscale_ph(A, B) \
+ ((__m512h)__builtin_ia32_rndscaleph_mask( \
+ (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_roundscale_ph(A, B, C, imm) \
+ ((__m512h)__builtin_ia32_rndscaleph_mask( \
+ (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \
+ (__mmask32)(B), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_roundscale_ph(A, B, imm) \
+ ((__m512h)__builtin_ia32_rndscaleph_mask( \
+ (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
+ (__mmask32)(A), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_roundscale_round_ph(A, B, C, imm, R) \
+ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \
+ (__v32hf)(__m512h)(A), \
+ (__mmask32)(B), (int)(R)))
+
+#define _mm512_maskz_roundscale_round_ph(A, B, imm, R) \
+ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \
+ (__v32hf)_mm512_setzero_ph(), \
+ (__mmask32)(A), (int)(R)))
+
+#define _mm512_roundscale_round_ph(A, imm, R) \
+ ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_reduce_ph(A, imm) \
+ ((__m512h)__builtin_ia32_reduceph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_reduce_ph(W, U, A, imm) \
+ ((__m512h)__builtin_ia32_reduceph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_reduce_ph(U, A, imm) \
+ ((__m512h)__builtin_ia32_reduceph512_mask( \
+ (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
+ (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_reduce_round_ph(W, U, A, imm, R) \
+ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
+ (__v32hf)(__m512h)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_reduce_round_ph(U, A, imm, R) \
+ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
+ (__v32hf)_mm512_setzero_ph(), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_reduce_round_ph(A, imm, R) \
+ ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)-1, (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_rcpsh_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_rcpsh_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_rsqrtsh_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_rsqrtsh_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+#define _mm_getmant_round_sh(A, B, C, D, R) \
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
+ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R)))
+
+#define _mm_getmant_sh(A, B, C, D) \
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
+ (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getmant_sh(W, U, A, B, C, D) \
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
+ (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
+ (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_getmant_sh(U, A, B, C, D) \
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
+ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \
+ ((__m128h)__builtin_ia32_getmantsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
+ (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+#define _mm_getexp_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_getexpsh128_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_getexpsh128_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_getexp_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_getexpsh128_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_getexp_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_scalef_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_scalefsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_scalefsh_round_mask(
+ (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask_scalef_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_scalefsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_scalefsh_round_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_scalef_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_scalefsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_roundscale_round_sh(A, B, imm, R) \
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(imm), (int)(R)))
+
+#define _mm_roundscale_sh(A, B, imm) \
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_roundscale_sh(W, U, A, B, I) \
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(I), (int)(R)))
+
+#define _mm_maskz_roundscale_sh(U, A, B, I) \
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \
+ ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(I), (int)(R)))
+
+#define _mm_reduce_sh(A, B, C) \
+ ((__m128h)__builtin_ia32_reducesh_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(C), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_mask_reduce_sh(W, U, A, B, C) \
+ ((__m128h)__builtin_ia32_reducesh_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_maskz_reduce_sh(U, A, B, C) \
+ ((__m128h)__builtin_ia32_reducesh_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_reduce_round_sh(A, B, C, R) \
+ ((__m128h)__builtin_ia32_reducesh_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(C), (int)(R)))
+
+#define _mm_mask_reduce_round_sh(W, U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_reducesh_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(C), (int)(R)))
+
+#define _mm_maskz_reduce_round_sh(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_reducesh_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(C), (int)(R)))
+
+#define _mm512_sqrt_round_ph(A, R) \
+ ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R)))
+
+#define _mm512_mask_sqrt_round_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
+ (__v32hf)(__m512h)(W)))
+
+#define _mm512_maskz_sqrt_round_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_selectph_512( \
+ (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
+ (__v32hf)_mm512_setzero_ph()))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
+ return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)(__U),
+ (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
+ (__v32hf)(__m512h)(__W));
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
+ return (__m512h)__builtin_ia32_selectph_512(
+ (__mmask32)(__U),
+ (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
+ (__v32hf)_mm512_setzero_ph());
+}
+
+#define _mm_sqrt_round_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_sqrt_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_sqrt_round_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_sqrtsh_round_mask(
+ (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W,
+ __mmask32 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_sqrtsh_round_mask(
+ (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W),
+ (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_sqrtsh_round_mask(
+ (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
+ (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fpclass_ph_mask(U, A, imm) \
+ ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
+ (int)(imm), (__mmask32)(U)))
+
+#define _mm512_fpclass_ph_mask(A, imm) \
+ ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
+ (int)(imm), (__mmask32)-1))
+
+#define _mm_fpclass_sh_mask(A, imm) \
+ ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
+ (__mmask8)-1))
+
+#define _mm_mask_fpclass_sh_mask(U, A, imm) \
+ ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
+ (__mmask8)(U)))
+
+#define _mm512_cvt_roundpd_ph(A, R) \
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
+ (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundpd_ph(U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
+ (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+ (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+ (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
+ (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_pd(A, R) \
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
+ (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_pd(W, U, A, R) \
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_pd(U, A, R) \
+ ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
+ (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+ (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+ (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+ return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
+ (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_ss(A, B, R) \
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \
+ (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \
+ ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
+ __m128h __B) {
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
+ (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
+ __mmask8 __U,
+ __m128 __A,
+ __m128h __B) {
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
+ (__v4sf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U,
+ __m128 __A,
+ __m128h __B) {
+ return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
+ (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundss_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
+ (__v8hf)_mm_undefined_ph(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \
+ (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundss_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
+ (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
+ __m128 __B) {
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128 __B) {
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
+ __m128h __A,
+ __m128 __B) {
+ return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
+ (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsd_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
+ (__v8hf)_mm_undefined_ph(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \
+ (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
+ (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
+ __m128d __B) {
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+ (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128d __B) {
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+ (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
+ return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
+ (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \
+ (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \
+ ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
+ __m128h __B) {
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+ (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
+ __mmask8 __U,
+ __m128d __A,
+ __m128h __B) {
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+ (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
+ return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
+ (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi16(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
+ (__v32hi)_mm512_undefined_epi32(), \
+ (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
+ (__v32hi)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+ (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi16(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask( \
+ (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
+ (__v32hi)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+ (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2w512_mask(
+ (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi16_ph(A, R) \
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \
+ (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi16_ph(__m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+ (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+ (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
+ (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu16(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \
+ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
+ (__v32hu)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+ (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu16(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \
+ (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
+ (__v32hu)_mm512_setzero_epi32(), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu16(__m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+ (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
+ (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu16_ph(A, R) \
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \
+ (__v32hf)_mm512_undefined_ph(), \
+ (__mmask32)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \
+ ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \
+ (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu16_ph(__m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+ (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+ (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
+ return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
+ (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi32(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \
+ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \
+ (__v16si)_mm512_setzero_epi32(), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+ (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu32(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \
+ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \
+ (__v16su)_mm512_setzero_epi32(), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+ (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi32_ph(A, R) \
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \
+ (__v16hf)_mm256_undefined_ph(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \
+ (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi32_ph(__m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+ (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+ (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
+ (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu32_ph(A, R) \
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \
+ (__v16hf)_mm256_undefined_ph(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \
+ (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu32_ph(__m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+ (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+ (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
+ return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
+ (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi32(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \
+ (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \
+ (__v16si)_mm512_setzero_epi32(), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+ (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
+ (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu32(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
+ (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
+ (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \
+ (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu32(__m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+ (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
+ (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_ph(A, R) \
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
+ (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
+ (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_cvtepi64_ph(__m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+ (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+ (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
+ (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epi64(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \
+ (__v8di)_mm512_undefined_epi32(), \
+ (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epi64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \
+ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epi64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+ (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_ph(A, R) \
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
+ (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
+
+#define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \
+ ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
+ (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_cvtepu64_ph(__m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+ (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+ (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
+ (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundph_epu64(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvt_roundph_epu64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtph_epu64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+ (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epi64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
+ (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
+ (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epi64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+ (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
+ (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundph_epu64(A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
+ (int)(R)))
+
+#define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
+ (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvttph_epu64(__m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+ (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
+ (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_i32(A, R) \
+ ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
+ return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS128
+_mm_cvtsh_u32(__m128h __A) {
+ return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundsh_i64(A, R) \
+ ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
+
+static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
+ return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_cvt_roundsh_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
+_mm_cvtsh_u64(__m128h __A) {
+ return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
+ (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+#endif // __x86_64__
+
+#define _mm_cvt_roundu32_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_cvtu32_sh(__m128h __A, unsigned int __B) {
+ __A[0] = __B;
+ return __A;
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundu64_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \
+ (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
+ __A[0] = __B;
+ return __A;
+}
+#endif
+
+#define _mm_cvt_roundi32_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
+ int __B) {
+ __A[0] = __B;
+ return __A;
+}
+
+#ifdef __x86_64__
+#define _mm_cvt_roundi64_sh(A, B, R) \
+ ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
+ long long __B) {
+ __A[0] = __B;
+ return __A;
+}
+#endif
+
+#define _mm_cvtt_roundsh_i32(A, R) \
+ ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
+
+static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
+ return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsh_i64(A, R) \
+ ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
+
+static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
+ return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+#define _mm_cvtt_roundsh_u32(A, R) \
+ ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned int __DEFAULT_FN_ATTRS128
+_mm_cvttsh_u32(__m128h __A) {
+ return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __x86_64__
+#define _mm_cvtt_roundsh_u64(A, R) \
+ ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
+
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
+_mm_cvttsh_u64(__m128h __A) {
+ return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
+ (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
+}
+#endif
+
+#define _mm512_cvtx_roundph_ps(A, R) \
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtx_roundph_ps(U, A, R) \
+ ((__m512)__builtin_ia32_vcvtph2psx512_mask( \
+ (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+ (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+ (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
+ return (__m512)__builtin_ia32_vcvtph2psx512_mask(
+ (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtx_roundps_ph(A, R) \
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \
+ (__v16hf)_mm256_undefined_ph(), \
+ (__mmask16)(-1), (int)(R)))
+
+#define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_cvtx_roundps_ph(U, A, R) \
+ ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \
+ (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+ (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+ (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
+ return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
+ (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmadd_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_fmsub_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_fnmadd_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_fnmsub_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A,
+ __m512h __B,
+ __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A,
+ __m512h __B,
+ __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
+ -(__v32hf)__C, (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
+ -(__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz(
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A,
+ __m512h __B,
+ __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
+ (__v32hf)__C, (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A,
+ __m512h __B,
+ __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
+ -(__v32hf)__C, (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_maskz(
+ -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmaddsub_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask3( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_fmsubadd_round_ph(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)-1, (int)(R)))
+
+#define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask3(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
+ (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
+ return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmsubaddph512_mask3( \
+ (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
+ return (__m512h)__builtin_ia32_vfmsubaddph512_mask3(
+ (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddph512_mask( \
+ (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+#define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
+ -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
+ (__mmask32)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
+ -(__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
+ return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
+ (__v32hf)__C, (__mmask32)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W,
+ __m128h __A,
+ __m128h __B) {
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
+ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fmadd_round_sh(A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fmadd_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
+ return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fmadd_round_sh(W, X, Y, U, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
+ -(__v8hf)__B, (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
+ -(__v8hf)__B, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fmsub_round_sh(A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fmsub_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
+ -(__v8hf)__C, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
+ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
+ (__mmask8)(U), (int)R))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
+ return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fmsub_round_sh(W, X, Y, U, R) \
+ ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
+ (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W,
+ __m128h __A,
+ __m128h __B) {
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
+ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fnmadd_round_sh(A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fnmadd_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
+ return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fnmadd_round_sh(W, X, Y, U, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W,
+ __m128h __A,
+ __m128h __B) {
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
+ (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
+ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fnmsub_round_sh(A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fnmsub_round_sh(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
+ (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
+ return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
+ (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mask3_fnmsub_round_sh(W, X, Y, U, R) \
+ ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
+ (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask(
+ (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fcmadd_round_sch(A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfcmaddcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \
+ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \
+ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddcsh_round_mask(
+ (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fmadd_round_sch(A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fmadd_round_sch(A, U, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \
+ ((__m128h)__builtin_ia32_vfmaddcsh_maskz( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \
+ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfcmulcsh_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_vfcmulcsh_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fcmul_round_sch(A, B, R) \
+ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
+ (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fcmul_round_sch(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_fcmul_round_sch(U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
+ (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmulcsh_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_sch(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__W, (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmulcsh_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_fmul_round_sch(A, B, R) \
+ ((__m128h)__builtin_ia32_vfmulcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
+ (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_fmul_round_sch(W, U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfmulcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
+ (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_fmul_round_sch(U, A, B, R) \
+ ((__m128h)__builtin_ia32_vfmulcsh_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
+ (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A,
+ __m512h __B) {
+ return (__m512h)__builtin_ia32_vfcmulcph512_mask(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B,
+ (__v16sf)__W, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_vfcmulcph512_mask(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fcmul_round_pch(A, B, R) \
+ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
+ (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_fcmul_round_pch(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_fcmul_round_pch(U, A, B, R) \
+ ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
+ (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A,
+ __m512h __B) {
+ return (__m512h)__builtin_ia32_vfmulcph512_mask(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B,
+ (__v16sf)__W, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_vfmulcph512_mask(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmul_round_pch(A, B, R) \
+ ((__m512h)__builtin_ia32_vfmulcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
+ (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_fmul_round_pch(W, U, A, B, R) \
+ ((__m512h)__builtin_ia32_vfmulcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_fmul_round_pch(U, A, B, R) \
+ ((__m512h)__builtin_ia32_vfmulcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
+ (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
+ __m512h __B,
+ __m512h __C) {
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfcmaddcph512_maskz(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fcmadd_round_pch(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_fcmadd_round_pch(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfcmaddcph512_maskz( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
+ __m512h __B,
+ __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
+ (__v16sf)__C, (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
+ (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask3(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
+ return (__m512h)__builtin_ia32_vfmaddcph512_maskz(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_fmadd_round_pch(A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
+
+#define _mm512_maskz_fmadd_round_pch(U, A, B, C, R) \
+ ((__m512h)__builtin_ia32_vfmaddcph512_maskz( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512
+_mm512_reduce_add_ph(__m512h __W) {
+ return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512
+_mm512_reduce_mul_ph(__m512h __W) {
+ return __builtin_ia32_reduce_fmul_ph512(1.0f16, __W);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512
+_mm512_reduce_max_ph(__m512h __V) {
+ return __builtin_ia32_reduce_fmax_ph512(__V);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512
+_mm512_reduce_min_ph(__m512h __V) {
+ return __builtin_ia32_reduce_fmin_ph512(__V);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
+ return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
+ (__v32hf)__A);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
+ return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
+ (__v32hi)__B);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_permutexvar_ph(__m512i __A, __m512h __B) {
+ return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
+}
+
+// intrinsics below are alias for f*mul_*ch
+#define _mm512_mul_pch(A, B) _mm512_fmul_pch(A, B)
+#define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch(W, U, A, B)
+#define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch(U, A, B)
+#define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch(A, B, R)
+#define _mm512_mask_mul_round_pch(W, U, A, B, R) \
+ _mm512_mask_fmul_round_pch(W, U, A, B, R)
+#define _mm512_maskz_mul_round_pch(U, A, B, R) \
+ _mm512_maskz_fmul_round_pch(U, A, B, R)
+
+#define _mm512_cmul_pch(A, B) _mm512_fcmul_pch(A, B)
+#define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch(W, U, A, B)
+#define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch(U, A, B)
+#define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch(A, B, R)
+#define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
+ _mm512_mask_fcmul_round_pch(W, U, A, B, R)
+#define _mm512_maskz_cmul_round_pch(U, A, B, R) \
+ _mm512_maskz_fcmul_round_pch(U, A, B, R)
+
+#define _mm_mul_sch(A, B) _mm_fmul_sch(A, B)
+#define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch(W, U, A, B)
+#define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch(U, A, B)
+#define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch(A, B, R)
+#define _mm_mask_mul_round_sch(W, U, A, B, R) \
+ _mm_mask_fmul_round_sch(W, U, A, B, R)
+#define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch(U, A, B, R)
+
+#define _mm_cmul_sch(A, B) _mm_fcmul_sch(A, B)
+#define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch(W, U, A, B)
+#define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch(U, A, B)
+#define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch(A, B, R)
+#define _mm_mask_cmul_round_sch(W, U, A, B, R) \
+ _mm_mask_fcmul_round_sch(W, U, A, B, R)
+#define _mm_maskz_cmul_round_sch(U, A, B, R) \
+ _mm_maskz_fcmul_round_sch(U, A, B, R)
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS512
+
+#endif
diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h
index a23144616ce3..17fa77722c64 100644
--- a/clang/lib/Headers/avx512vbmi2intrin.h
+++ b/clang/lib/Headers/avx512vbmi2intrin.h
@@ -129,88 +129,88 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
}
#define _mm512_shldi_epi64(A, B, I) \
- (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (int)(I)))
#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
- (__v8di)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
+ (__v8di)(__m512i)(S)))
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
+ (__v8di)_mm512_setzero_si512()))
#define _mm512_shldi_epi32(A, B, I) \
- (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (int)(I)))
#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
- (__v16si)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
+ (__v16si)(__m512i)(S)))
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_shldi_epi16(A, B, I) \
- (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
- (__v32hi)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
+ (__v32hi)(__m512i)(B), (int)(I)))
#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
- (__v32hi)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
+ (__v32hi)(__m512i)(S)))
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
- (__v32hi)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
+ (__v32hi)_mm512_setzero_si512()))
#define _mm512_shrdi_epi64(A, B, I) \
- (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (int)(I)))
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
- (__v8di)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
+ (__v8di)(__m512i)(S)))
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
- (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
- (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
- (__v8di)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
+ (__v8di)_mm512_setzero_si512()))
#define _mm512_shrdi_epi32(A, B, I) \
- (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (int)(I)))
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
- (__v16si)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
+ (__v16si)(__m512i)(S)))
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
- (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
- (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
- (__v16si)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
+ (__v16si)_mm512_setzero_si512()))
#define _mm512_shrdi_epi16(A, B, I) \
- (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
- (__v32hi)(__m512i)(B), (int)(I))
+ ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
+ (__v32hi)(__m512i)(B), (int)(I)))
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
- (__v32hi)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
+ (__v32hi)(__m512i)(S)))
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
- (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
- (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
- (__v32hi)_mm512_setzero_si512())
+ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
+ (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
+ (__v32hi)_mm512_setzero_si512()))
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
diff --git a/clang/lib/Headers/avx512vlbf16intrin.h b/clang/lib/Headers/avx512vlbf16intrin.h
index 1b1a744bcdbf..6a5a86071f0b 100644
--- a/clang/lib/Headers/avx512vlbf16intrin.h
+++ b/clang/lib/Headers/avx512vlbf16intrin.h
@@ -425,8 +425,20 @@ static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
/// \headerfile <x86intrin.h>
///
/// \param __A
+/// A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
+ return _mm_castsi128_ps(
+ (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+}
+
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __A
/// A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
(__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
@@ -437,11 +449,27 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
/// \headerfile <x86intrin.h>
///
/// \param __U
+/// A 4-bit mask. Elements are zeroed out when the corresponding mask
+/// bit is not set.
+/// \param __A
+/// A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+ return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
+ (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __U
/// A 8-bit mask. Elements are zeroed out when the corresponding mask
/// bit is not set.
/// \param __A
/// A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
@@ -453,6 +481,26 @@ _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
/// \headerfile <x86intrin.h>
///
/// \param __S
+/// A 128-bit vector of [4 x float]. Elements are copied from __S when
+/// the corresponding mask bit is not set.
+/// \param __U
+/// A 4-bit mask. Elements are zeroed out when the corresponding mask
+/// bit is not set.
+/// \param __A
+/// A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
+ return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32(
+ (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A),
+ 16));
+}
+
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __S
/// A 256-bit vector of [8 x float]. Elements are copied from __S when
/// the corresponding mask bit is not set.
/// \param __U
@@ -460,7 +508,7 @@ _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
/// bit is not set.
/// \param __A
/// A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32(
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 6ed10ed9803b..7873516053ec 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -21,84 +21,84 @@
/* Integer compare */
#define _mm_cmp_epi8_mask(a, b, p) \
- (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (int)(p), \
- (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (int)(p), \
+ (__mmask16)-1))
#define _mm_mask_cmp_epi8_mask(m, a, b, p) \
- (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (int)(p), \
- (__mmask16)(m))
+ ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (int)(p), \
+ (__mmask16)(m)))
#define _mm_cmp_epu8_mask(a, b, p) \
- (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (int)(p), \
- (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (int)(p), \
+ (__mmask16)-1))
#define _mm_mask_cmp_epu8_mask(m, a, b, p) \
- (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (int)(p), \
- (__mmask16)(m))
+ ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (int)(p), \
+ (__mmask16)(m)))
#define _mm256_cmp_epi8_mask(a, b, p) \
- (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
- (__v32qi)(__m256i)(b), (int)(p), \
- (__mmask32)-1)
+ ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (int)(p), \
+ (__mmask32)-1))
#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
- (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
- (__v32qi)(__m256i)(b), (int)(p), \
- (__mmask32)(m))
+ ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (int)(p), \
+ (__mmask32)(m)))
#define _mm256_cmp_epu8_mask(a, b, p) \
- (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
- (__v32qi)(__m256i)(b), (int)(p), \
- (__mmask32)-1)
+ ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (int)(p), \
+ (__mmask32)-1))
#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
- (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
- (__v32qi)(__m256i)(b), (int)(p), \
- (__mmask32)(m))
+ ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+ (__v32qi)(__m256i)(b), (int)(p), \
+ (__mmask32)(m)))
#define _mm_cmp_epi16_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
- (__v8hi)(__m128i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_epi16_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
- (__v8hi)(__m128i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm_cmp_epu16_mask(a, b, p) \
- (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
- (__v8hi)(__m128i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_epu16_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
- (__v8hi)(__m128i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+ (__v8hi)(__m128i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_epi16_mask(a, b, p) \
- (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
- (__v16hi)(__m256i)(b), (int)(p), \
- (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), (int)(p), \
+ (__mmask16)-1))
#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
- (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
- (__v16hi)(__m256i)(b), (int)(p), \
- (__mmask16)(m))
+ ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), (int)(p), \
+ (__mmask16)(m)))
#define _mm256_cmp_epu16_mask(a, b, p) \
- (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
- (__v16hi)(__m256i)(b), (int)(p), \
- (__mmask16)-1)
+ ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), (int)(p), \
+ (__mmask16)-1))
#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
- (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
- (__v16hi)(__m256i)(b), (int)(p), \
- (__mmask16)(m))
+ ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+ (__v16hi)(__m256i)(b), (int)(p), \
+ (__mmask16)(m)))
#define _mm_cmpeq_epi8_mask(A, B) \
_mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
@@ -1821,46 +1821,46 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
#define _mm_mask_shufflehi_epi16(W, U, A, imm) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
- (__v8hi)(__m128i)(W))
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
+ (__v8hi)(__m128i)(W)))
#define _mm_maskz_shufflehi_epi16(U, A, imm) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
- (__v8hi)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
+ (__v8hi)_mm_setzero_si128()))
#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
- (__v16hi)(__m256i)(W))
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
+ (__v16hi)(__m256i)(W)))
#define _mm256_maskz_shufflehi_epi16(U, A, imm) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
- (__v16hi)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
+ (__v16hi)_mm256_setzero_si256()))
#define _mm_mask_shufflelo_epi16(W, U, A, imm) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
- (__v8hi)(__m128i)(W))
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
+ (__v8hi)(__m128i)(W)))
#define _mm_maskz_shufflelo_epi16(U, A, imm) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
- (__v8hi)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
+ (__v8hi)_mm_setzero_si128()))
#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shufflelo_epi16((A), \
- (imm)), \
- (__v16hi)(__m256i)(W))
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shufflelo_epi16((A), \
+ (imm)), \
+ (__v16hi)(__m256i)(W)))
#define _mm256_maskz_shufflelo_epi16(U, A, imm) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shufflelo_epi16((A), \
- (imm)), \
- (__v16hi)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shufflelo_epi16((A), \
+ (imm)), \
+ (__v16hi)_mm256_setzero_si256()))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_sllv_epi16(__m256i __A, __m256i __B)
@@ -2756,52 +2756,52 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
}
#define _mm_mask_alignr_epi8(W, U, A, B, N) \
- (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
+ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
(__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
- (__v16qi)(__m128i)(W))
+ (__v16qi)(__m128i)(W)))
#define _mm_maskz_alignr_epi8(U, A, B, N) \
- (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
+ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
(__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
- (__v16qi)_mm_setzero_si128())
+ (__v16qi)_mm_setzero_si128()))
#define _mm256_mask_alignr_epi8(W, U, A, B, N) \
- (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
+ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
(__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
- (__v32qi)(__m256i)(W))
+ (__v32qi)(__m256i)(W)))
#define _mm256_maskz_alignr_epi8(U, A, B, N) \
- (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
+ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
(__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
- (__v32qi)_mm256_setzero_si256())
+ (__v32qi)_mm256_setzero_si256()))
#define _mm_dbsad_epu8(A, B, imm) \
- (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(imm))
+ ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(imm)))
#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
(__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
- (__v8hi)(__m128i)(W))
+ (__v8hi)(__m128i)(W)))
#define _mm_maskz_dbsad_epu8(U, A, B, imm) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
(__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
- (__v8hi)_mm_setzero_si128())
+ (__v8hi)_mm_setzero_si128()))
#define _mm256_dbsad_epu8(A, B, imm) \
- (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
- (__v32qi)(__m256i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
+ (__v32qi)(__m256i)(B), (int)(imm)))
#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
(__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
- (__v16hi)(__m256i)(W))
+ (__v16hi)(__m256i)(W)))
#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
(__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
- (__v16hi)_mm256_setzero_si256())
+ (__v16hi)_mm256_setzero_si256()))
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 95ba574ea821..713e1a18a1b3 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -773,134 +773,134 @@ _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
}
#define _mm_range_pd(A, B, C) \
- (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (int)(C), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1))
#define _mm_mask_range_pd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (int)(C), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_range_pd(U, A, B, C) \
- (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), (int)(C), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm256_range_pd(A, B, C) \
- (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1)
+ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)-1))
#define _mm256_mask_range_pd(W, U, A, B, C) \
- (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_range_pd(U, A, B, C) \
- (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), (int)(C), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm_range_ps(A, B, C) \
- (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (int)(C), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1))
#define _mm_mask_range_ps(W, U, A, B, C) \
- (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (int)(C), \
- (__v4sf)(__m128)(W), (__mmask8)(U))
+ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)(W), (__mmask8)(U)))
#define _mm_maskz_range_ps(U, A, B, C) \
- (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), (int)(C), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm256_range_ps(A, B, C) \
- (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1)
+ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1))
#define _mm256_mask_range_ps(W, U, A, B, C) \
- (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)(__m256)(W), (__mmask8)(U))
+ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)(W), (__mmask8)(U)))
#define _mm256_maskz_range_ps(U, A, B, C) \
- (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm_reduce_pd(A, B) \
- (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1))
#define _mm_mask_reduce_pd(W, U, A, B) \
- (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_reduce_pd(U, A, B) \
- (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm256_reduce_pd(A, B) \
- (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1)
+ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)-1))
#define _mm256_mask_reduce_pd(W, U, A, B) \
- (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_reduce_pd(U, A, B) \
- (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm_reduce_ps(A, B) \
- (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1))
#define _mm_mask_reduce_ps(W, U, A, B) \
- (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_reduce_ps(U, A, B) \
- (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm256_reduce_ps(A, B) \
- (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1)
+ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1))
#define _mm256_mask_reduce_ps(W, U, A, B) \
- (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_reduce_ps(U, A, B) \
- (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
_mm_movepi32_mask (__m128i __A)
@@ -1066,100 +1066,100 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
}
#define _mm256_extractf64x2_pd(A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_undefined_pd(), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_undefined_pd(), \
+ (__mmask8)-1))
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_extractf64x2_pd(U, A, imm) \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm256_extracti64x2_epi64(A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
(int)(imm), \
(__v2di)_mm_undefined_si128(), \
- (__mmask8)-1)
+ (__mmask8)-1))
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)(__m128i)(W), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+ (int)(imm), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_si128(), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
+ (int)(imm), \
+ (__v2di)_mm_setzero_si128(), \
+ (__mmask8)(U)))
#define _mm256_insertf64x2(A, B, imm) \
- (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
- (__v2df)(__m128d)(B), (int)(imm))
+ ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
+ (__v2df)(__m128d)(B), (int)(imm)))
#define _mm256_mask_insertf64x2(W, U, A, B, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm256_insertf64x2((A), (B), (imm)), \
- (__v4df)(__m256d)(W))
+ (__v4df)(__m256d)(W)))
#define _mm256_maskz_insertf64x2(U, A, B, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm256_insertf64x2((A), (B), (imm)), \
- (__v4df)_mm256_setzero_pd())
+ (__v4df)_mm256_setzero_pd()))
#define _mm256_inserti64x2(A, B, imm) \
- (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
- (__v2di)(__m128i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm)))
#define _mm256_mask_inserti64x2(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
- (__v4di)(__m256i)(W))
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
+ (__v4di)(__m256i)(W)))
#define _mm256_maskz_inserti64x2(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
- (__v4di)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
+ (__v4di)_mm256_setzero_si256()))
#define _mm_mask_fpclass_pd_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
+ (__mmask8)(U)))
#define _mm_fpclass_pd_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
+ (__mmask8)-1))
#define _mm256_mask_fpclass_pd_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_fpclass_pd_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_fpclass_ps_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__mmask8)(U)))
#define _mm_fpclass_ps_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__mmask8)-1))
#define _mm256_mask_fpclass_ps_mask(U, A, imm) \
- (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
- (__mmask8)(U))
+ ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_fpclass_ps_mask(A, imm) \
- (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
+ (__mmask8)-1))
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
new file mode 100644
index 000000000000..3d27853ad964
--- /dev/null
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -0,0 +1,2068 @@
+/*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLFP16INTRIN_H
+#define __AVX512VLFP16INTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512fp16, avx512vl"), \
+ __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512fp16, avx512vl"), \
+ __min_vector_width__(128)))
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) {
+ return __a[0];
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) {
+ return __a[0];
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) {
+ return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0};
+}
+
+static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) {
+ return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h};
+}
+
+static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) {
+ return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h,
+ __h, __h, __h, __h, __h, __h, __h, __h};
+}
+
+static __inline __m128h __DEFAULT_FN_ATTRS128
+_mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
+ _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) {
+ return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1};
+}
+
+static __inline __m256h __DEFAULT_FN_ATTRS256
+_mm256_set1_pch(_Float16 _Complex h) {
+ return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h));
+}
+
+static __inline __m128h __DEFAULT_FN_ATTRS128
+_mm_set1_pch(_Float16 _Complex h) {
+ return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h));
+}
+
+static __inline __m256h __DEFAULT_FN_ATTRS256
+_mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
+ _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
+ _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
+ _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) {
+ return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11,
+ __h10, __h9, __h8, __h7, __h6, __h5,
+ __h4, __h3, __h2, __h1};
+}
+
+#define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8) \
+ _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1))
+
+#define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
+ h14, h15, h16) \
+ _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \
+ (h7), (h6), (h5), (h4), (h3), (h2), (h1))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)((__v16hf)__A + (__v16hf)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)((__v8hf)__A + (__v8hf)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
+ (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)((__v16hf)__A - (__v16hf)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)((__v8hf)__A - (__v8hf)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
+ (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)((__v16hf)__A * (__v16hf)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)((__v8hf)__A * (__v8hf)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
+ (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)((__v16hf)__A / (__v16hf)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)((__v8hf)__A / (__v8hf)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
+ (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
+ (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
+ (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
+ (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
+ (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) {
+ return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) {
+ return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) {
+ return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f));
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_selectps_256(
+ (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_selectps_256(
+ (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) {
+ return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f));
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128h)__builtin_ia32_selectps_128(
+ (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_conj_pch(__mmask8 __U, __m128h __A) {
+ return (__m128h)__builtin_ia32_selectps_128(
+ (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps());
+}
+
+#define _mm256_cmp_ph_mask(a, b, p) \
+ ((__mmask16)__builtin_ia32_cmpph256_mask( \
+ (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1))
+
+#define _mm256_mask_cmp_ph_mask(m, a, b, p) \
+ ((__mmask16)__builtin_ia32_cmpph256_mask( \
+ (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m)))
+
+#define _mm_cmp_ph_mask(a, b, p) \
+ ((__mmask8)__builtin_ia32_cmpph128_mask( \
+ (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1))
+
+#define _mm_mask_cmp_ph_mask(m, a, b, p) \
+ ((__mmask8)__builtin_ia32_cmpph128_mask( \
+ (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m)))
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) {
+ return (__m256h)__builtin_ia32_rcpph256_mask(
+ (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_rcpph256_mask(
+ (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) {
+ return (__m128h)__builtin_ia32_rcpph128_mask(
+ (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U,
+ __m128h __A) {
+ return (__m128h)__builtin_ia32_rcpph128_mask(
+ (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) {
+ return (__m256h)__builtin_ia32_rsqrtph256_mask(
+ (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_rsqrtph256_mask(
+ (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) {
+ return (__m128h)__builtin_ia32_rsqrtph128_mask(
+ (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) {
+ return (__m128h)__builtin_ia32_rsqrtph128_mask(
+ (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) {
+ return (__m128h)__builtin_ia32_getexpph128_mask(
+ (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) {
+ return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) {
+ return (__m128h)__builtin_ia32_getexpph128_mask(
+ (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) {
+ return (__m256h)__builtin_ia32_getexpph256_mask(
+ (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_getexpph256_mask(
+ (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
+}
+
+#define _mm_getmant_ph(A, B, C) \
+ ((__m128h)__builtin_ia32_getmantph128_mask( \
+ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1))
+
+#define _mm_mask_getmant_ph(W, U, A, B, C) \
+ ((__m128h)__builtin_ia32_getmantph128_mask( \
+ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_ph(U, A, B, C) \
+ ((__m128h)__builtin_ia32_getmantph128_mask( \
+ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U)))
+
+#define _mm256_getmant_ph(A, B, C) \
+ ((__m256h)__builtin_ia32_getmantph256_mask( \
+ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
+ (__v16hf)_mm256_setzero_ph(), (__mmask16)-1))
+
+#define _mm256_mask_getmant_ph(W, U, A, B, C) \
+ ((__m256h)__builtin_ia32_getmantph256_mask( \
+ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_getmant_ph(U, A, B, C) \
+ ((__m256h)__builtin_ia32_getmantph256_mask( \
+ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \
+ (__v16hf)_mm256_setzero_ph(), (__mmask16)(U)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_scalefph128_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_scalefph128_mask(
+ (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A,
+ __m256h __B) {
+ return (__m256h)__builtin_ia32_scalefph256_mask(
+ (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B,
+ (__v16hf)__W, (__mmask16)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_scalefph256_mask(
+ (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U);
+}
+
+#define _mm_roundscale_ph(A, imm) \
+ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
+ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1))
+
+#define _mm_mask_roundscale_ph(W, U, A, imm) \
+ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
+ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
+
+#define _mm_maskz_roundscale_ph(U, A, imm) \
+ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \
+ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U)))
+
+#define _mm256_roundscale_ph(A, imm) \
+ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
+ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
+ (__mmask16)-1))
+
+#define _mm256_mask_roundscale_ph(W, U, A, imm) \
+ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
+ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_roundscale_ph(U, A, imm) \
+ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \
+ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \
+ (__mmask16)(U)))
+
+#define _mm_reduce_ph(A, imm) \
+ ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
+ (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)-1))
+
+#define _mm_mask_reduce_ph(W, U, A, imm) \
+ ((__m128h)__builtin_ia32_reduceph128_mask( \
+ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U)))
+
+#define _mm_maskz_reduce_ph(U, A, imm) \
+ ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \
+ (__v8hf)_mm_setzero_ph(), \
+ (__mmask8)(U)))
+
+#define _mm256_reduce_ph(A, imm) \
+ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
+ (__v16hf)_mm256_setzero_ph(), \
+ (__mmask16)-1))
+
+#define _mm256_mask_reduce_ph(W, U, A, imm) \
+ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
+ (__v16hf)(__m256h)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_reduce_ph(U, A, imm) \
+ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \
+ (__v16hf)_mm256_setzero_ph(), \
+ (__mmask16)(U)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) {
+ return __builtin_ia32_sqrtph((__v8hf)__a);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U,
+ __m128h __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) {
+ return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) {
+ return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+ (__v16hf)_mm256_sqrt_ph(__A),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+#define _mm_mask_fpclass_ph_mask(U, A, imm) \
+ ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
+ (int)(imm), (__mmask8)(U)))
+
+#define _mm_fpclass_ph_mask(A, imm) \
+ ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \
+ (int)(imm), (__mmask8)-1))
+
+#define _mm256_mask_fpclass_ph_mask(U, A, imm) \
+ ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
+ (int)(imm), (__mmask16)(U)))
+
+#define _mm256_fpclass_ph_mask(A, imm) \
+ ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \
+ (int)(imm), (__mmask16)-1))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
+ (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W,
+ __mmask8 __U,
+ __m128d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph128_mask(
+ (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
+ (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) {
+ return (__m128h)__builtin_ia32_vcvtpd2ph256_mask(
+ (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) {
+ return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
+ (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+ return (__m128d)__builtin_ia32_vcvtph2pd128_mask(
+ (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) {
+ return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
+ (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) {
+ return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
+ return (__m256d)__builtin_ia32_vcvtph2pd256_mask(
+ (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2w128_mask(
+ (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2w256_mask(
+ (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) {
+ return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi16_ph(__m256i __A) {
+ return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+ (__v16hf)_mm256_cvtepi16_ph(__A),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uw128_mask(
+ (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu16(__m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W,
+ (__mmask16)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uw256_mask(
+ (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) {
+ return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu16_ph(__m256i __A) {
+ return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) {
+ return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U,
+ (__v16hf)_mm256_cvtepu16_ph(__A),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
+ (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtdq2ph128_mask(
+ (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi32_ph(__m256i __A) {
+ return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
+ (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtudq2ph128_mask(
+ (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu32_ph(__m256i __A) {
+ return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2dq128_mask(
+ (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2dq256_mask(
+ (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2udq128_mask(
+ (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu32(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2udq256_mask(
+ (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
+ (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph128_mask(
+ (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepi64_ph(__m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
+ (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtqq2ph256_mask(
+ (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epi64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
+ (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask(
+ (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_cvtepu64_ph(__m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
+ (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) {
+ return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask(
+ (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvtph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvtph_epu64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvtph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2qq128_mask(
+ (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epi64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2qq256_mask(
+ (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m128i)__builtin_ia32_vcvttph2uqq128_mask(
+ (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_cvttph_epu64(__m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
+ return (__m256i)__builtin_ia32_vcvttph2uqq256_mask(
+ (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) {
+ return (__m128)__builtin_ia32_vcvtph2psx128_mask(
+ (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W,
+ __mmask8 __U,
+ __m128h __A) {
+ return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
+ return (__m128)__builtin_ia32_vcvtph2psx128_mask(
+ (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) {
+ return (__m256)__builtin_ia32_vcvtph2psx256_mask(
+ (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) {
+ return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) {
+ return (__m256)__builtin_ia32_vcvtph2psx256_mask(
+ (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
+ (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W,
+ __mmask8 __U,
+ __m128 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx128_mask(
+ (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
+ (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W,
+ (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS256
+_mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) {
+ return (__m128h)__builtin_ia32_vcvtps2phx256_mask(
+ (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A,
+ __mmask8 __U,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B,
+ -(__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A,
+ __mmask8 __U,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A,
+ __m256h __B,
+ __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
+ (__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A,
+ __m256h __B,
+ __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B,
+ -(__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
+ (__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B,
+ -(__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)_mm_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
+ (__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B,
+ -(__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)_mm256_setzero_ph());
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
+ (__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C),
+ (__v8hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A,
+ __m256h __B,
+ __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
+ (__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C),
+ (__v16hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B,
+ -(__v8hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_selectph_128(
+ (__mmask8)__U,
+ __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C),
+ (__v8hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A,
+ __m256h __B,
+ __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B,
+ -(__v16hf)__C);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) {
+ return (__m256h)__builtin_ia32_selectph_256(
+ (__mmask16)__U,
+ __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C),
+ (__v16hf)__C);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfcmulcph128_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_vfcmulcph128_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A,
+ __m256h __B) {
+ return (__m256h)__builtin_ia32_vfcmulcph256_mask(
+ (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_vfcmulcph256_mask(
+ (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectps_128(
+ __U,
+ __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B,
+ (__v4sf)__C, (__mmask8)__U),
+ (__v4sf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfcmaddcph128_maskz(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A,
+ __m256h __B,
+ __m256h __C) {
+ return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C, (__mmask8)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectps_256(
+ __U,
+ __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
+ (__mmask8)__U),
+ (__v8sf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
+ return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_vfcmaddcph256_maskz(
+ (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmulcph128_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W,
+ __mmask8 __U,
+ __m128h __A,
+ __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_vfmulcph128_mask(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A,
+ __m256h __B) {
+ return (__m256h)__builtin_ia32_vfmulcph256_mask(
+ (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_vfmulcph256_mask(
+ (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A,
+ __m128h __B,
+ __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)-1);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_selectps_128(
+ __U,
+ __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
+ (__mmask8)__U),
+ (__v4sf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
+ return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A,
+ __m256h __B,
+ __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C, (__mmask8)-1);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_selectps_256(
+ __U,
+ __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C,
+ (__mmask8)__U),
+ (__v8sf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) {
+ return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
+ return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C, (__mmask8)__U);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
+ __m128h __A,
+ __m128h __W) {
+ return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
+ (__v8hf)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
+ return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
+ (__v16hf)__A);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) {
+ return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
+ (__v8hi)__B);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) {
+ return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
+ (__v16hi)__B);
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_permutexvar_ph(__m128i __A, __m128h __B) {
+ return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256
+_mm256_permutexvar_ph(__m256i __A, __m256h __B) {
+ return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256
+_mm256_reduce_add_ph(__m256h __W) {
+ return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256
+_mm256_reduce_mul_ph(__m256h __W) {
+ return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256
+_mm256_reduce_max_ph(__m256h __V) {
+ return __builtin_ia32_reduce_fmax_ph256(__V);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS256
+_mm256_reduce_min_ph(__m256h __V) {
+ return __builtin_ia32_reduce_fmin_ph256(__V);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128
+_mm_reduce_add_ph(__m128h __W) {
+ return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128
+_mm_reduce_mul_ph(__m128h __W) {
+ return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128
+_mm_reduce_max_ph(__m128h __V) {
+ return __builtin_ia32_reduce_fmax_ph128(__V);
+}
+
+static __inline__ _Float16 __DEFAULT_FN_ATTRS128
+_mm_reduce_min_ph(__m128h __V) {
+ return __builtin_ia32_reduce_fmin_ph128(__V);
+}
+
+// intrinsics below are alias for f*mul_*ch
+#define _mm_mul_pch(A, B) _mm_fmul_pch(A, B)
+#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B)
+#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B)
+#define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B)
+#define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B)
+#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B)
+
+#define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B)
+#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B)
+#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B)
+#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B)
+#define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B)
+#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B)
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 968c10efeac0..0519dba59081 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -771,124 +771,124 @@ _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
}
#define _mm_cmp_epi32_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
- (__v4si)(__m128i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
- (__v4si)(__m128i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm_cmp_epu32_mask(a, b, p) \
- (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
- (__v4si)(__m128i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
- (__v4si)(__m128i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
+ (__v4si)(__m128i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_epi32_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
- (__v8si)(__m256i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
- (__v8si)(__m256i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_epu32_mask(a, b, p) \
- (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
- (__v8si)(__m256i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
- (__v8si)(__m256i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
+ (__v8si)(__m256i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm_cmp_epi64_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
- (__v2di)(__m128i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
- (__v2di)(__m128i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm_cmp_epu64_mask(a, b, p) \
- (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
- (__v2di)(__m128i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
- (__v2di)(__m128i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
+ (__v2di)(__m128i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_epi64_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
- (__v4di)(__m256i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
- (__v4di)(__m256i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_epu64_mask(a, b, p) \
- (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
- (__v4di)(__m256i)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
- (__v4di)(__m256i)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
+ (__v4di)(__m256i)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_ps_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm256_cmp_pd_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm_cmp_ps_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
- (__v4sf)(__m128)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_ps_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
- (__v4sf)(__m128)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (int)(p), \
+ (__mmask8)(m)))
#define _mm_cmp_pd_mask(a, b, p) \
- (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
- (__v2df)(__m128d)(b), (int)(p), \
- (__mmask8)-1)
+ ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (int)(p), \
+ (__mmask8)-1))
#define _mm_mask_cmp_pd_mask(m, a, b, p) \
- (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
- (__v2df)(__m128d)(b), (int)(p), \
- (__mmask8)(m))
+ ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (int)(p), \
+ (__mmask8)(m)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
@@ -3289,78 +3289,78 @@ _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
}
#define _mm_roundscale_pd(A, imm) \
- (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1))
#define _mm_mask_roundscale_pd(W, U, A, imm) \
- (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
- (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
+ (int)(imm), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_roundscale_pd(U, A, imm) \
- (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
+ (int)(imm), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm256_roundscale_pd(A, imm) \
- (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1)
+ ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)-1))
#define _mm256_mask_roundscale_pd(W, U, A, imm) \
- (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_roundscale_pd(U, A, imm) \
- (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
+ (int)(imm), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm_roundscale_ps(A, imm) \
- (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1))
#define _mm_mask_roundscale_ps(W, U, A, imm) \
- (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_roundscale_ps(U, A, imm) \
- (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm256_roundscale_ps(A, imm) \
- (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1)
+ ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1))
#define _mm256_mask_roundscale_ps(W, U, A, imm) \
- (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_roundscale_ps(U, A, imm) \
- (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_scalef_pd (__m128d __A, __m128d __B) {
@@ -4298,56 +4298,56 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
#define _mm_rol_epi32(a, b) \
- (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
+ ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
#define _mm_mask_rol_epi32(w, u, a, b) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
- (__v4si)_mm_rol_epi32((a), (b)), \
- (__v4si)(__m128i)(w))
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
+ (__v4si)_mm_rol_epi32((a), (b)), \
+ (__v4si)(__m128i)(w)))
#define _mm_maskz_rol_epi32(u, a, b) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
- (__v4si)_mm_rol_epi32((a), (b)), \
- (__v4si)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
+ (__v4si)_mm_rol_epi32((a), (b)), \
+ (__v4si)_mm_setzero_si128()))
#define _mm256_rol_epi32(a, b) \
- (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
+ ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
#define _mm256_mask_rol_epi32(w, u, a, b) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
- (__v8si)_mm256_rol_epi32((a), (b)), \
- (__v8si)(__m256i)(w))
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
+ (__v8si)_mm256_rol_epi32((a), (b)), \
+ (__v8si)(__m256i)(w)))
#define _mm256_maskz_rol_epi32(u, a, b) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
- (__v8si)_mm256_rol_epi32((a), (b)), \
- (__v8si)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
+ (__v8si)_mm256_rol_epi32((a), (b)), \
+ (__v8si)_mm256_setzero_si256()))
#define _mm_rol_epi64(a, b) \
- (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
+ ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
#define _mm_mask_rol_epi64(w, u, a, b) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
- (__v2di)_mm_rol_epi64((a), (b)), \
- (__v2di)(__m128i)(w))
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
+ (__v2di)_mm_rol_epi64((a), (b)), \
+ (__v2di)(__m128i)(w)))
#define _mm_maskz_rol_epi64(u, a, b) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
- (__v2di)_mm_rol_epi64((a), (b)), \
- (__v2di)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
+ (__v2di)_mm_rol_epi64((a), (b)), \
+ (__v2di)_mm_setzero_si128()))
#define _mm256_rol_epi64(a, b) \
- (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
+ ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
#define _mm256_mask_rol_epi64(w, u, a, b) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
- (__v4di)_mm256_rol_epi64((a), (b)), \
- (__v4di)(__m256i)(w))
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
+ (__v4di)_mm256_rol_epi64((a), (b)), \
+ (__v4di)(__m256i)(w)))
#define _mm256_maskz_rol_epi64(u, a, b) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
- (__v4di)_mm256_rol_epi64((a), (b)), \
- (__v4di)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
+ (__v4di)_mm256_rol_epi64((a), (b)), \
+ (__v4di)_mm256_setzero_si256()))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_rolv_epi32 (__m128i __A, __m128i __B)
@@ -4438,56 +4438,56 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
}
#define _mm_ror_epi32(a, b) \
- (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
+ ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
#define _mm_mask_ror_epi32(w, u, a, b) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
- (__v4si)_mm_ror_epi32((a), (b)), \
- (__v4si)(__m128i)(w))
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
+ (__v4si)_mm_ror_epi32((a), (b)), \
+ (__v4si)(__m128i)(w)))
#define _mm_maskz_ror_epi32(u, a, b) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
- (__v4si)_mm_ror_epi32((a), (b)), \
- (__v4si)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
+ (__v4si)_mm_ror_epi32((a), (b)), \
+ (__v4si)_mm_setzero_si128()))
#define _mm256_ror_epi32(a, b) \
- (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
+ ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
#define _mm256_mask_ror_epi32(w, u, a, b) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
- (__v8si)_mm256_ror_epi32((a), (b)), \
- (__v8si)(__m256i)(w))
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
+ (__v8si)_mm256_ror_epi32((a), (b)), \
+ (__v8si)(__m256i)(w)))
#define _mm256_maskz_ror_epi32(u, a, b) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
- (__v8si)_mm256_ror_epi32((a), (b)), \
- (__v8si)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
+ (__v8si)_mm256_ror_epi32((a), (b)), \
+ (__v8si)_mm256_setzero_si256()))
#define _mm_ror_epi64(a, b) \
- (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
+ ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
#define _mm_mask_ror_epi64(w, u, a, b) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
- (__v2di)_mm_ror_epi64((a), (b)), \
- (__v2di)(__m128i)(w))
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
+ (__v2di)_mm_ror_epi64((a), (b)), \
+ (__v2di)(__m128i)(w)))
#define _mm_maskz_ror_epi64(u, a, b) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
- (__v2di)_mm_ror_epi64((a), (b)), \
- (__v2di)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
+ (__v2di)_mm_ror_epi64((a), (b)), \
+ (__v2di)_mm_setzero_si128()))
#define _mm256_ror_epi64(a, b) \
- (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
+ ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
#define _mm256_mask_ror_epi64(w, u, a, b) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
- (__v4di)_mm256_ror_epi64((a), (b)), \
- (__v4di)(__m256i)(w))
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
+ (__v4di)_mm256_ror_epi64((a), (b)), \
+ (__v4di)(__m256i)(w)))
#define _mm256_maskz_ror_epi64(u, a, b) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
- (__v4di)_mm256_ror_epi64((a), (b)), \
- (__v4di)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
+ (__v4di)_mm256_ror_epi64((a), (b)), \
+ (__v4di)_mm256_setzero_si256()))
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
@@ -5356,76 +5356,76 @@ _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
}
#define _mm_fixupimm_pd(A, B, C, imm) \
- (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
- (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
- (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2di)(__m128i)(C), \
- (int)(imm), (__mmask8)(U))
+ ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2di)(__m128i)(C), \
+ (int)(imm), (__mmask8)(U)))
#define _mm256_fixupimm_pd(A, B, C, imm) \
- (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), \
- (__v4di)(__m256i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), \
+ (__v4di)(__m256i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
- (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), \
- (__v4di)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), \
+ (__v4di)(__m256i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
- (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), \
- (__v4di)(__m256i)(C), \
- (int)(imm), (__mmask8)(U))
+ ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), \
+ (__v4di)(__m256i)(C), \
+ (int)(imm), (__mmask8)(U)))
#define _mm_fixupimm_ps(A, B, C, imm) \
- (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
- (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
- (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
+ ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
(__v4sf)(__m128)(B), \
(__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
+ ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_fixupimm_ps(A, B, C, imm) \
- (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), \
- (__v8si)(__m256i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), \
+ (__v8si)(__m256i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
- (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), \
- (__v8si)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
- (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
+ ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
(__v8sf)(__m256)(B), \
(__v8si)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
+ ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), \
+ (__v8si)(__m256i)(C), (int)(imm), \
+ (__mmask8)(U)))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
@@ -6033,44 +6033,44 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
}
#define _mm_mask_permute_pd(W, U, X, C) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm_permute_pd((X), (C)), \
- (__v2df)(__m128d)(W))
+ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm_permute_pd((X), (C)), \
+ (__v2df)(__m128d)(W)))
#define _mm_maskz_permute_pd(U, X, C) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm_permute_pd((X), (C)), \
- (__v2df)_mm_setzero_pd())
+ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm_permute_pd((X), (C)), \
+ (__v2df)_mm_setzero_pd()))
#define _mm256_mask_permute_pd(W, U, X, C) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_permute_pd((X), (C)), \
- (__v4df)(__m256d)(W))
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permute_pd((X), (C)), \
+ (__v4df)(__m256d)(W)))
#define _mm256_maskz_permute_pd(U, X, C) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_permute_pd((X), (C)), \
- (__v4df)_mm256_setzero_pd())
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permute_pd((X), (C)), \
+ (__v4df)_mm256_setzero_pd()))
#define _mm_mask_permute_ps(W, U, X, C) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm_permute_ps((X), (C)), \
- (__v4sf)(__m128)(W))
+ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm_permute_ps((X), (C)), \
+ (__v4sf)(__m128)(W)))
#define _mm_maskz_permute_ps(U, X, C) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm_permute_ps((X), (C)), \
- (__v4sf)_mm_setzero_ps())
+ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm_permute_ps((X), (C)), \
+ (__v4sf)_mm_setzero_ps()))
#define _mm256_mask_permute_ps(W, U, X, C) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm256_permute_ps((X), (C)), \
- (__v8sf)(__m256)(W))
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_permute_ps((X), (C)), \
+ (__v8sf)(__m256)(W)))
#define _mm256_maskz_permute_ps(U, X, C) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm256_permute_ps((X), (C)), \
- (__v8sf)_mm256_setzero_ps())
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_permute_ps((X), (C)), \
+ (__v8sf)_mm256_setzero_ps()))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
@@ -6526,175 +6526,175 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
}
#define _mm_ternarylogic_epi32(A, B, C, imm) \
- (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
- (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), \
- (__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
- (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
+ ((__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
(__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
+ ((__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), \
+ (__v4si)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_ternarylogic_epi32(A, B, C, imm) \
- (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), \
- (__v8si)(__m256i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), \
+ (__v8si)(__m256i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
- (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), \
- (__v8si)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
- (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
+ ((__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
(__v8si)(__m256i)(B), \
(__v8si)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
+ ((__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), \
+ (__v8si)(__m256i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm_ternarylogic_epi64(A, B, C, imm) \
- (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
- (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), \
- (__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
- (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
+ ((__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
(__v2di)(__m128i)(B), \
(__v2di)(__m128i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
+ ((__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), \
+ (__v2di)(__m128i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_ternarylogic_epi64(A, B, C, imm) \
- (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (__v4di)(__m256i)(C), (int)(imm), \
- (__mmask8)-1)
+ ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (__v4di)(__m256i)(C), (int)(imm), \
+ (__mmask8)-1))
#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
- (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (__v4di)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
-
-#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
- (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
+ ((__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
(__v4di)(__m256i)(B), \
(__v4di)(__m256i)(C), (int)(imm), \
- (__mmask8)(U))
+ (__mmask8)(U)))
+
+#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
+ ((__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (__v4di)(__m256i)(C), (int)(imm), \
+ (__mmask8)(U)))
#define _mm256_shuffle_f32x4(A, B, imm) \
- (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(imm))
+ ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(imm)))
#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
- (__v8sf)(__m256)(W))
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
+ (__v8sf)(__m256)(W)))
#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
- (__v8sf)_mm256_setzero_ps())
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
+ (__v8sf)_mm256_setzero_ps()))
#define _mm256_shuffle_f64x2(A, B, imm) \
- (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), (int)(imm))
+ ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(imm)))
#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
- (__v4df)(__m256d)(W))
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
+ (__v4df)(__m256d)(W)))
#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
- (__v4df)_mm256_setzero_pd())
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
+ (__v4df)_mm256_setzero_pd()))
#define _mm256_shuffle_i32x4(A, B, imm) \
- (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm)))
#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
- (__v8si)(__m256i)(W))
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
+ (__v8si)(__m256i)(W)))
#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
- (__v8si)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
+ (__v8si)_mm256_setzero_si256()))
#define _mm256_shuffle_i64x2(A, B, imm) \
- (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (int)(imm)))
#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
- (__v4di)(__m256i)(W))
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
+ (__v4di)(__m256i)(W)))
#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
- (__v4di)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
+ (__v4di)_mm256_setzero_si256()))
#define _mm_mask_shuffle_pd(W, U, A, B, M) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm_shuffle_pd((A), (B), (M)), \
- (__v2df)(__m128d)(W))
+ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm_shuffle_pd((A), (B), (M)), \
+ (__v2df)(__m128d)(W)))
#define _mm_maskz_shuffle_pd(U, A, B, M) \
- (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
- (__v2df)_mm_shuffle_pd((A), (B), (M)), \
- (__v2df)_mm_setzero_pd())
+ ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm_shuffle_pd((A), (B), (M)), \
+ (__v2df)_mm_setzero_pd()))
#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
- (__v4df)(__m256d)(W))
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
+ (__v4df)(__m256d)(W)))
#define _mm256_maskz_shuffle_pd(U, A, B, M) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
- (__v4df)_mm256_setzero_pd())
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
+ (__v4df)_mm256_setzero_pd()))
#define _mm_mask_shuffle_ps(W, U, A, B, M) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
- (__v4sf)(__m128)(W))
+ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
+ (__v4sf)(__m128)(W)))
#define _mm_maskz_shuffle_ps(U, A, B, M) \
- (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
- (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
- (__v4sf)_mm_setzero_ps())
+ ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
+ (__v4sf)_mm_setzero_ps()))
#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
- (__v8sf)(__m256)(W))
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
+ (__v8sf)(__m256)(W)))
#define _mm256_maskz_shuffle_ps(U, A, B, M) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
- (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
- (__v8sf)_mm256_setzero_ps())
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
+ (__v8sf)_mm256_setzero_ps()))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_rsqrt14_pd (__m128d __A)
@@ -7834,262 +7834,262 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
}
#define _mm256_extractf32x4_ps(A, imm) \
- (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)_mm_undefined_ps(), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+ (int)(imm), \
+ (__v4sf)_mm_undefined_ps(), \
+ (__mmask8)-1))
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
- (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+ (int)(imm), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
- (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
+ (int)(imm), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm256_extracti32x4_epi32(A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)_mm_undefined_si128(), \
- (__mmask8)-1)
+ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+ (int)(imm), \
+ (__v4si)_mm_undefined_si128(), \
+ (__mmask8)-1))
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)(__m128i)(W), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+ (int)(imm), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
- (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
+ (int)(imm), \
+ (__v4si)_mm_setzero_si128(), \
+ (__mmask8)(U)))
#define _mm256_insertf32x4(A, B, imm) \
- (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
- (__v4sf)(__m128)(B), (int)(imm))
+ ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
+ (__v4sf)(__m128)(B), (int)(imm)))
#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
- (__v8sf)(__m256)(W))
+ (__v8sf)(__m256)(W)))
#define _mm256_maskz_insertf32x4(U, A, B, imm) \
- (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
(__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
- (__v8sf)_mm256_setzero_ps())
+ (__v8sf)_mm256_setzero_ps()))
#define _mm256_inserti32x4(A, B, imm) \
- (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
- (__v4si)(__m128i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm)))
#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_inserti32x4((A), (B), (imm)), \
- (__v8si)(__m256i)(W))
+ (__v8si)(__m256i)(W)))
#define _mm256_maskz_inserti32x4(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_inserti32x4((A), (B), (imm)), \
- (__v8si)_mm256_setzero_si256())
+ (__v8si)_mm256_setzero_si256()))
#define _mm_getmant_pd(A, B, C) \
- (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1)
+ ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1))
#define _mm_mask_getmant_pd(W, U, A, B, C) \
- (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_getmant_pd(U, A, B, C) \
- (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U))
+ ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm256_getmant_pd(A, B, C) \
- (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1)
+ ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)-1))
#define _mm256_mask_getmant_pd(W, U, A, B, C) \
- (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_getmant_pd(U, A, B, C) \
- (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
- (int)(((C)<<2) | (B)), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U))
+ ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
#define _mm_getmant_ps(A, B, C) \
- (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
- (int)(((C)<<2) | (B)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1)
+ ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1))
#define _mm_mask_getmant_ps(W, U, A, B, C) \
- (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
- (int)(((C)<<2) | (B)), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_getmant_ps(U, A, B, C) \
- (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
- (int)(((C)<<2) | (B)), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U))
+ ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm256_getmant_ps(A, B, C) \
- (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1)
+ ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)-1))
#define _mm256_mask_getmant_ps(W, U, A, B, C) \
- (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_getmant_ps(U, A, B, C) \
- (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
- (int)(((C)<<2) | (B)), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U))
+ ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
- (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
- (void const *)(addr), \
- (__v2di)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
+ (void const *)(addr), \
+ (__v2di)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
- (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
- (void const *)(addr), \
- (__v2di)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
+ (void const *)(addr), \
+ (__v2di)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
- (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
- (void const *)(addr), \
- (__v4di)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
+ (void const *)(addr), \
+ (__v4di)(__m256i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
- (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
- (void const *)(addr), \
- (__v4di)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
+ (void const *)(addr), \
+ (__v4di)(__m256i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
- (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
- (void const *)(addr), \
- (__v2di)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
-
-#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
- (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
+ ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
(void const *)(addr), \
(__v2di)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ (__mmask8)(mask), (int)(scale)))
-#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
- (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
- (void const *)(addr), \
- (__v4di)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
+ ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
+ (void const *)(addr), \
+ (__v2di)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
-#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
- (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
+#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
+ ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
(void const *)(addr), \
(__v4di)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+ (__mmask8)(mask), (int)(scale)))
+
+#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
+ ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
+ (void const *)(addr), \
+ (__v4di)(__m256i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
- (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
- (void const *)(addr), \
- (__v4si)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
+ (void const *)(addr), \
+ (__v4si)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
- (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
- (void const *)(addr), \
- (__v4si)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
+ (void const *)(addr), \
+ (__v4si)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
- (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
- (void const *)(addr), \
- (__v4si)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
+ (void const *)(addr), \
+ (__v4si)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
- (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
- (void const *)(addr), \
- (__v4si)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
+ (void const *)(addr), \
+ (__v4si)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
- (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
- (void const *)(addr), \
- (__v4si)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
-
-#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
- (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
+ ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
(void const *)(addr), \
(__v4si)(__m128i)(index), \
- (__mmask8)(mask), (int)(scale))
+ (__mmask8)(mask), (int)(scale)))
-#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
- (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
- (void const *)(addr), \
- (__v8si)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
+ ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
+ (void const *)(addr), \
+ (__v4si)(__m128i)(index), \
+ (__mmask8)(mask), (int)(scale)))
-#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
- (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
+#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
+ ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
(void const *)(addr), \
(__v8si)(__m256i)(index), \
- (__mmask8)(mask), (int)(scale))
+ (__mmask8)(mask), (int)(scale)))
+
+#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
+ ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
+ (void const *)(addr), \
+ (__v8si)(__m256i)(index), \
+ (__mmask8)(mask), (int)(scale)))
#define _mm256_permutex_pd(X, C) \
- (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
+ ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
#define _mm256_mask_permutex_pd(W, U, X, C) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
(__v4df)_mm256_permutex_pd((X), (C)), \
- (__v4df)(__m256d)(W))
+ (__v4df)(__m256d)(W)))
#define _mm256_maskz_permutex_pd(U, X, C) \
- (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
- (__v4df)_mm256_permutex_pd((X), (C)), \
- (__v4df)_mm256_setzero_pd())
+ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permutex_pd((X), (C)), \
+ (__v4df)_mm256_setzero_pd()))
#define _mm256_permutex_epi64(X, C) \
- (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
+ ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
#define _mm256_mask_permutex_epi64(W, U, X, C) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_permutex_epi64((X), (C)), \
- (__v4di)(__m256i)(W))
+ (__v4di)(__m256i)(W)))
#define _mm256_maskz_permutex_epi64(U, X, C) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_permutex_epi64((X), (C)), \
- (__v4di)_mm256_setzero_si256())
+ (__v4di)_mm256_setzero_si256()))
static __inline__ __m256d __DEFAULT_FN_ATTRS256
_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
@@ -8175,60 +8175,60 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
}
#define _mm_alignr_epi32(A, B, imm) \
- (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), (int)(imm))
+ ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm)))
#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_alignr_epi32((A), (B), (imm)), \
- (__v4si)(__m128i)(W))
+ (__v4si)(__m128i)(W)))
#define _mm_maskz_alignr_epi32(U, A, B, imm) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
(__v4si)_mm_alignr_epi32((A), (B), (imm)), \
- (__v4si)_mm_setzero_si128())
+ (__v4si)_mm_setzero_si128()))
#define _mm256_alignr_epi32(A, B, imm) \
- (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm)))
#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
- (__v8si)(__m256i)(W))
+ (__v8si)(__m256i)(W)))
#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
(__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
- (__v8si)_mm256_setzero_si256())
+ (__v8si)_mm256_setzero_si256()))
#define _mm_alignr_epi64(A, B, imm) \
- (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), (int)(imm))
+ ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm)))
#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_alignr_epi64((A), (B), (imm)), \
- (__v2di)(__m128i)(W))
+ (__v2di)(__m128i)(W)))
#define _mm_maskz_alignr_epi64(U, A, B, imm) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
(__v2di)_mm_alignr_epi64((A), (B), (imm)), \
- (__v2di)_mm_setzero_si128())
+ (__v2di)_mm_setzero_si128()))
#define _mm256_alignr_epi64(A, B, imm) \
- (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), (int)(imm))
+ ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (int)(imm)))
#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
- (__v4di)(__m256i)(W))
+ (__v4di)(__m256i)(W)))
#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
(__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
- (__v4di)_mm256_setzero_si256())
+ (__v4di)_mm256_setzero_si256()))
static __inline__ __m128 __DEFAULT_FN_ATTRS128
_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
@@ -8295,24 +8295,24 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
}
#define _mm256_mask_shuffle_epi32(W, U, A, I) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shuffle_epi32((A), (I)), \
- (__v8si)(__m256i)(W))
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_epi32((A), (I)), \
+ (__v8si)(__m256i)(W)))
#define _mm256_maskz_shuffle_epi32(U, A, I) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shuffle_epi32((A), (I)), \
- (__v8si)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_epi32((A), (I)), \
+ (__v8si)_mm256_setzero_si256()))
#define _mm_mask_shuffle_epi32(W, U, A, I) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm_shuffle_epi32((A), (I)), \
- (__v4si)(__m128i)(W))
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shuffle_epi32((A), (I)), \
+ (__v4si)(__m128i)(W)))
#define _mm_maskz_shuffle_epi32(U, A, I) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm_shuffle_epi32((A), (I)), \
- (__v4si)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shuffle_epi32((A), (I)), \
+ (__v4si)_mm_setzero_si128()))
static __inline__ __m128d __DEFAULT_FN_ATTRS128
_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
@@ -8413,27 +8413,27 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
}
#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
- (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
- (__v8hi)(__m128i)(W), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
#define _mm_maskz_cvt_roundps_ph(U, A, I) \
- (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
- (__v8hi)_mm_setzero_si128(), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
+ (__v8hi)_mm_setzero_si128(), \
+ (__mmask8)(U)))
#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
- (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
- (__v8hi)(__m128i)(W), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
- (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
- (__v8hi)_mm_setzero_si128(), \
- (__mmask8)(U))
+ ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
+ (__v8hi)_mm_setzero_si128(), \
+ (__mmask8)(U)))
#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h
index a40f926de75a..fac1f232415a 100644
--- a/clang/lib/Headers/avx512vlvbmi2intrin.h
+++ b/clang/lib/Headers/avx512vlvbmi2intrin.h
@@ -239,172 +239,172 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
}
#define _mm256_shldi_epi64(A, B, I) \
- (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), (int)(I))
+ ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (int)(I)))
#define _mm256_mask_shldi_epi64(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
- (__v4di)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
+ (__v4di)(__m256i)(S)))
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
- (__v4di)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shldi_epi64((A), (B), (I)), \
+ (__v4di)_mm256_setzero_si256()))
#define _mm_shldi_epi64(A, B, I) \
- (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), (int)(I))
+ ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (int)(I)))
#define _mm_mask_shldi_epi64(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm_shldi_epi64((A), (B), (I)), \
- (__v2di)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm_shldi_epi64((A), (B), (I)), \
+ (__v2di)(__m128i)(S)))
#define _mm_maskz_shldi_epi64(U, A, B, I) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm_shldi_epi64((A), (B), (I)), \
- (__v2di)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm_shldi_epi64((A), (B), (I)), \
+ (__v2di)_mm_setzero_si128()))
#define _mm256_shldi_epi32(A, B, I) \
- (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), (int)(I))
+ ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (int)(I)))
#define _mm256_mask_shldi_epi32(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
- (__v8si)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
+ (__v8si)(__m256i)(S)))
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
- (__v8si)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shldi_epi32((A), (B), (I)), \
+ (__v8si)_mm256_setzero_si256()))
#define _mm_shldi_epi32(A, B, I) \
- (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), (int)(I))
+ ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (int)(I)))
#define _mm_mask_shldi_epi32(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm_shldi_epi32((A), (B), (I)), \
- (__v4si)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shldi_epi32((A), (B), (I)), \
+ (__v4si)(__m128i)(S)))
#define _mm_maskz_shldi_epi32(U, A, B, I) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm_shldi_epi32((A), (B), (I)), \
- (__v4si)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shldi_epi32((A), (B), (I)), \
+ (__v4si)_mm_setzero_si128()))
#define _mm256_shldi_epi16(A, B, I) \
- (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
- (__v16hi)(__m256i)(B), (int)(I))
+ ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \
+ (__v16hi)(__m256i)(B), (int)(I)))
#define _mm256_mask_shldi_epi16(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
- (__v16hi)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
+ (__v16hi)(__m256i)(S)))
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
- (__v16hi)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \
+ (__v16hi)_mm256_setzero_si256()))
#define _mm_shldi_epi16(A, B, I) \
- (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
- (__v8hi)(__m128i)(B), (int)(I))
+ ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (int)(I)))
#define _mm_mask_shldi_epi16(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
- (__v8hi)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
+ (__v8hi)(__m128i)(S)))
#define _mm_maskz_shldi_epi16(U, A, B, I) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
- (__v8hi)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shldi_epi16((A), (B), (I)), \
+ (__v8hi)_mm_setzero_si128()))
#define _mm256_shrdi_epi64(A, B, I) \
- (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), (int)(I))
+ ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (int)(I)))
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
- (__v4di)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
+ (__v4di)(__m256i)(S)))
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
- (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
- (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
- (__v4di)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \
+ (__v4di)_mm256_setzero_si256()))
#define _mm_shrdi_epi64(A, B, I) \
- (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), (int)(I))
+ ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (int)(I)))
#define _mm_mask_shrdi_epi64(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
- (__v2di)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
+ (__v2di)(__m128i)(S)))
#define _mm_maskz_shrdi_epi64(U, A, B, I) \
- (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
- (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
- (__v2di)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm_shrdi_epi64((A), (B), (I)), \
+ (__v2di)_mm_setzero_si128()))
#define _mm256_shrdi_epi32(A, B, I) \
- (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), (int)(I))
+ ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (int)(I)))
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
- (__v8si)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
+ (__v8si)(__m256i)(S)))
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
- (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
- (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
- (__v8si)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \
+ (__v8si)_mm256_setzero_si256()))
#define _mm_shrdi_epi32(A, B, I) \
- (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), (int)(I))
+ ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (int)(I)))
#define _mm_mask_shrdi_epi32(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
- (__v4si)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
+ (__v4si)(__m128i)(S)))
#define _mm_maskz_shrdi_epi32(U, A, B, I) \
- (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
- (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
- (__v4si)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
+ (__v4si)_mm_shrdi_epi32((A), (B), (I)), \
+ (__v4si)_mm_setzero_si128()))
#define _mm256_shrdi_epi16(A, B, I) \
- (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
- (__v16hi)(__m256i)(B), (int)(I))
+ ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \
+ (__v16hi)(__m256i)(B), (int)(I)))
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
- (__v16hi)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
+ (__v16hi)(__m256i)(S)))
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
- (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
- (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
- (__v16hi)_mm256_setzero_si256())
+ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
+ (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \
+ (__v16hi)_mm256_setzero_si256()))
#define _mm_shrdi_epi16(A, B, I) \
- (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
- (__v8hi)(__m128i)(B), (int)(I))
+ ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (int)(I)))
#define _mm_mask_shrdi_epi16(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
- (__v8hi)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
+ (__v8hi)(__m128i)(S)))
#define _mm_maskz_shrdi_epi16(U, A, B, I) \
- (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
- (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
- (__v8hi)_mm_setzero_si128())
+ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
+ (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
+ (__v8hi)_mm_setzero_si128()))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h
index 71ac1b4370d4..0fb29af262f7 100644
--- a/clang/lib/Headers/avx512vlvnniintrin.h
+++ b/clang/lib/Headers/avx512vlvnniintrin.h
@@ -36,7 +36,7 @@
/// DST[MAX:256] := 0
/// \endoperation
#define _mm256_dpbusd_epi32(S, A, B) \
- (__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B))
+ ((__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -56,7 +56,7 @@
/// DST[MAX:256] := 0
/// \endoperation
#define _mm256_dpbusds_epi32(S, A, B) \
- (__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B))
+ ((__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -74,7 +74,7 @@
/// DST[MAX:256] := 0
/// \endoperation
#define _mm256_dpwssd_epi32(S, A, B) \
- (__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B))
+ ((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -92,7 +92,7 @@
/// DST[MAX:256] := 0
/// \endoperation
#define _mm256_dpwssds_epi32(S, A, B) \
- (__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B))
+ ((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -112,7 +112,7 @@
/// DST[MAX:128] := 0
/// \endoperation
#define _mm_dpbusd_epi32(S, A, B) \
- (__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B))
+ ((__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -132,7 +132,7 @@
/// DST[MAX:128] := 0
/// \endoperation
#define _mm_dpbusds_epi32(S, A, B) \
- (__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B))
+ ((__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -150,7 +150,7 @@
/// DST[MAX:128] := 0
/// \endoperation
#define _mm_dpwssd_epi32(S, A, B) \
- (__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B))
+ ((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -168,7 +168,7 @@
/// DST[MAX:128] := 0
/// \endoperation
#define _mm_dpwssds_epi32(S, A, B) \
- (__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B))
+ ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 382b6215751e..17fe63691177 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -400,7 +400,7 @@ _mm256_rcp_ps(__m256 __a)
/// 11: Truncated.
/// \returns A 256-bit vector of [4 x double] containing the rounded values.
#define _mm256_round_pd(V, M) \
- (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M))
+ ((__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)))
/// Rounds the values stored in a 256-bit vector of [8 x float] as
/// specified by the byte operand. The source values are rounded to integer
@@ -432,7 +432,7 @@ _mm256_rcp_ps(__m256 __a)
/// 11: Truncated.
/// \returns A 256-bit vector of [8 x float] containing the rounded values.
#define _mm256_round_ps(V, M) \
- (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M))
+ ((__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)))
/// Rounds up the values stored in a 256-bit vector of [4 x double]. The
/// source values are rounded up to integer values and returned as 64-bit
@@ -989,7 +989,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_permute_pd(A, C) \
- (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))
+ ((__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C)))
/// Copies the values in a 256-bit vector of [4 x double] as specified by
/// the immediate integer operand.
@@ -1029,7 +1029,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_permute_pd(A, C) \
- (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))
+ ((__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C)))
/// Copies the values in a 128-bit vector of [4 x float] as specified by
/// the immediate integer operand.
@@ -1085,7 +1085,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_permute_ps(A, C) \
- (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))
+ ((__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C)))
/// Copies the values in a 256-bit vector of [8 x float] as specified by
/// the immediate integer operand.
@@ -1177,7 +1177,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_permute_ps(A, C) \
- (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))
+ ((__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C)))
/// Permutes 128-bit data values stored in two 256-bit vectors of
/// [4 x double], as specified by the immediate integer operand.
@@ -1217,8 +1217,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_permute2f128_pd(V1, V2, M) \
- (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
- (__v4df)(__m256d)(V2), (int)(M))
+ ((__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (int)(M)))
/// Permutes 128-bit data values stored in two 256-bit vectors of
/// [8 x float], as specified by the immediate integer operand.
@@ -1258,8 +1258,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// destination.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_permute2f128_ps(V1, V2, M) \
- (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (int)(M))
+ ((__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (int)(M)))
/// Permutes 128-bit data values stored in two 256-bit integer vectors,
/// as specified by the immediate integer operand.
@@ -1298,8 +1298,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// destination.
/// \returns A 256-bit integer vector containing the copied values.
#define _mm256_permute2f128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
- (__v8si)(__m256i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (int)(M)))
/* Vector Blend */
/// Merges 64-bit double-precision data values stored in either of the
@@ -1327,8 +1327,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// operand \a V2 is copied to the same position in the destination.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_blend_pd(V1, V2, M) \
- (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \
- (__v4df)(__m256d)(V2), (int)(M))
+ ((__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (int)(M)))
/// Merges 32-bit single-precision data values stored in either of the
/// two 256-bit vectors of [8 x float], as specified by the immediate
@@ -1355,8 +1355,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// operand \a V2 is copied to the same position in the destination.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_blend_ps(V1, V2, M) \
- (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (int)(M))
+ ((__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (int)(M)))
/// Merges 64-bit double-precision data values stored in either of the
/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector
@@ -1453,8 +1453,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// two parallel dot product computations.
/// \returns A 256-bit vector of [8 x float] containing the two dot products.
#define _mm256_dp_ps(V1, V2, M) \
- (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), (M))
+ ((__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (M)))
/* Vector shuffle */
/// Selects 8 float values from the 256-bit operands of [8 x float], as
@@ -1507,8 +1507,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 11: Bits [127:96] and [255:224] are copied from the selected operand.
/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
#define _mm256_shuffle_ps(a, b, mask) \
- (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), (int)(mask))
+ ((__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (int)(mask)))
/// Selects four double-precision values from the 256-bit operands of
/// [4 x double], as specified by the immediate value operand.
@@ -1553,8 +1553,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the shuffled values.
#define _mm256_shuffle_pd(a, b, mask) \
- (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), (int)(mask))
+ ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (int)(mask)))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
@@ -1647,8 +1647,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_pd(a, b, c) \
- (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
- (__v2df)(__m128d)(b), (c))
+ ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)))
/// Compares each of the corresponding values of two 128-bit vectors of
/// [4 x float], using the operation specified by the immediate integer
@@ -1707,8 +1707,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ps(a, b, c) \
- (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
- (__v4sf)(__m128)(b), (c))
+ ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)))
/// Compares each of the corresponding double-precision values of two
/// 256-bit vectors of [4 x double], using the operation specified by the
@@ -1767,8 +1767,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 256-bit vector of [4 x double] containing the comparison results.
#define _mm256_cmp_pd(a, b, c) \
- (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), (c))
+ ((__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (c)))
/// Compares each of the corresponding values of two 256-bit vectors of
/// [8 x float], using the operation specified by the immediate integer
@@ -1827,8 +1827,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 256-bit vector of [8 x float] containing the comparison results.
#define _mm256_cmp_ps(a, b, c) \
- (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), (c))
+ ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (c)))
/// Compares each of the corresponding scalar double-precision values of
/// two 128-bit vectors of [2 x double], using the operation specified by the
@@ -1886,8 +1886,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
#define _mm_cmp_sd(a, b, c) \
- (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
- (__v2df)(__m128d)(b), (c))
+ ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
+ (__v2df)(__m128d)(b), (c)))
/// Compares each of the corresponding scalar values of two 128-bit
/// vectors of [4 x float], using the operation specified by the immediate
@@ -1945,8 +1945,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 0x1F: True (unordered, signaling)
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
#define _mm_cmp_ss(a, b, c) \
- (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
- (__v4sf)(__m128)(b), (c))
+ ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
+ (__v4sf)(__m128)(b), (c)))
/// Takes a [8 x i32] vector and returns the vector element value
/// indexed by the immediate constant operand.
@@ -1964,7 +1964,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 32-bit integer containing the extracted 32 bits of extended
/// packed data.
#define _mm256_extract_epi32(X, N) \
- (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N))
+ ((int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N)))
/// Takes a [16 x i16] vector and returns the vector element value
/// indexed by the immediate constant operand.
@@ -1982,8 +1982,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 32-bit integer containing the extracted 16 bits of zero extended
/// packed data.
#define _mm256_extract_epi16(X, N) \
- (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \
- (int)(N))
+ ((int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \
+ (int)(N)))
/// Takes a [32 x i8] vector and returns the vector element value
/// indexed by the immediate constant operand.
@@ -2001,8 +2001,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 32-bit integer containing the extracted 8 bits of zero extended
/// packed data.
#define _mm256_extract_epi8(X, N) \
- (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \
- (int)(N))
+ ((int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \
+ (int)(N)))
#ifdef __x86_64__
/// Takes a [4 x i64] vector and returns the vector element value
@@ -2021,7 +2021,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A 64-bit integer containing the extracted 64 bits of extended
/// packed data.
#define _mm256_extract_epi64(X, N) \
- (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N))
+ ((long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N)))
#endif
/// Takes a [8 x i32] vector and replaces the vector element value
@@ -2043,8 +2043,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi32(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
- (int)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \
+ (int)(I), (int)(N)))
/// Takes a [16 x i16] vector and replaces the vector element value
@@ -2066,8 +2066,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi16(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
- (int)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \
+ (int)(I), (int)(N)))
/// Takes a [32 x i8] vector and replaces the vector element value
/// indexed by the immediate constant operand with a new value. Returns the
@@ -2088,8 +2088,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi8(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
- (int)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \
+ (int)(I), (int)(N)))
#ifdef __x86_64__
/// Takes a [4 x i64] vector and replaces the vector element value
@@ -2111,8 +2111,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// \returns A copy of vector \a __a, after replacing its element indexed by
/// \a __imm with \a __b.
#define _mm256_insert_epi64(X, I, N) \
- (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
- (long long)(I), (int)(N))
+ ((__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \
+ (long long)(I), (int)(N)))
#endif
/* Conversion */
@@ -4592,8 +4592,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
#define _mm256_insertf128_ps(V1, V2, M) \
- (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
- (__v4sf)(__m128)(V2), (int)(M))
+ ((__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M)))
/// Constructs a new 256-bit vector of [4 x double] by first duplicating
/// a 256-bit vector of [4 x double] given in the first parameter, and then
@@ -4630,8 +4630,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
#define _mm256_insertf128_pd(V1, V2, M) \
- (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
- (__v2df)(__m128d)(V2), (int)(M))
+ ((__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M)))
/// Constructs a new 256-bit integer vector by first duplicating a
/// 256-bit integer vector given in the first parameter, and then replacing
@@ -4668,8 +4668,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit integer vector containing the interleaved values.
#define _mm256_insertf128_si256(V1, V2, M) \
- (__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \
- (__v4si)(__m128i)(V2), (int)(M))
+ ((__m256i)__builtin_ia32_vinsertf128_si256((__v8si)(__m256i)(V1), \
+ (__v4si)(__m128i)(V2), (int)(M)))
/*
Vector extract.
@@ -4698,7 +4698,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [4 x float] containing the extracted bits.
#define _mm256_extractf128_ps(V, M) \
- (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))
+ ((__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M)))
/// Extracts either the upper or the lower 128 bits from a 256-bit vector
/// of [4 x double], as determined by the immediate integer parameter, and
@@ -4722,7 +4722,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [2 x double] containing the extracted bits.
#define _mm256_extractf128_pd(V, M) \
- (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))
+ ((__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M)))
/// Extracts either the upper or the lower 128 bits from a 256-bit
/// integer vector, as determined by the immediate integer parameter, and
@@ -4746,7 +4746,136 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit integer vector containing the extracted bits.
#define _mm256_extractf128_si256(V, M) \
- (__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M))
+ ((__m128i)__builtin_ia32_vextractf128_si256((__v8si)(__m256i)(V), (int)(M)))
+
+/// Constructs a 256-bit floating-point vector of [8 x float] by
+/// concatenating two 128-bit floating-point vectors of [4 x float].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
+///
+/// \param __hi
+/// A 128-bit floating-point vector of [4 x float] to be copied to the upper
+/// 128 bits of the result.
+/// \param __lo
+/// A 128-bit floating-point vector of [4 x float] to be copied to the lower
+/// 128 bits of the result.
+/// \returns A 256-bit floating-point vector of [8 x float] containing the
+/// concatenated result.
+static __inline __m256 __DEFAULT_FN_ATTRS
+_mm256_set_m128 (__m128 __hi, __m128 __lo)
+{
+ return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+/// Constructs a 256-bit floating-point vector of [4 x double] by
+/// concatenating two 128-bit floating-point vectors of [2 x double].
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
+///
+/// \param __hi
+/// A 128-bit floating-point vector of [2 x double] to be copied to the upper
+/// 128 bits of the result.
+/// \param __lo
+/// A 128-bit floating-point vector of [2 x double] to be copied to the lower
+/// 128 bits of the result.
+/// \returns A 256-bit floating-point vector of [4 x double] containing the
+/// concatenated result.
+static __inline __m256d __DEFAULT_FN_ATTRS
+_mm256_set_m128d (__m128d __hi, __m128d __lo)
+{
+ return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
+}
+
+/// Constructs a 256-bit integer vector by concatenating two 128-bit
+/// integer vectors.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
+///
+/// \param __hi
+/// A 128-bit integer vector to be copied to the upper 128 bits of the
+/// result.
+/// \param __lo
+/// A 128-bit integer vector to be copied to the lower 128 bits of the
+/// result.
+/// \returns A 256-bit integer vector containing the concatenated result.
+static __inline __m256i __DEFAULT_FN_ATTRS
+_mm256_set_m128i (__m128i __hi, __m128i __lo)
+{
+ return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
+}
+
+/// Constructs a 256-bit floating-point vector of [8 x float] by
+/// concatenating two 128-bit floating-point vectors of [4 x float]. This is
+/// similar to _mm256_set_m128, but the order of the input parameters is
+/// swapped.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
+///
+/// \param __lo
+/// A 128-bit floating-point vector of [4 x float] to be copied to the lower
+/// 128 bits of the result.
+/// \param __hi
+/// A 128-bit floating-point vector of [4 x float] to be copied to the upper
+/// 128 bits of the result.
+/// \returns A 256-bit floating-point vector of [8 x float] containing the
+/// concatenated result.
+static __inline __m256 __DEFAULT_FN_ATTRS
+_mm256_setr_m128 (__m128 __lo, __m128 __hi)
+{
+ return _mm256_set_m128(__hi, __lo);
+}
+
+/// Constructs a 256-bit floating-point vector of [4 x double] by
+/// concatenating two 128-bit floating-point vectors of [2 x double]. This is
+/// similar to _mm256_set_m128d, but the order of the input parameters is
+/// swapped.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
+///
+/// \param __lo
+/// A 128-bit floating-point vector of [2 x double] to be copied to the lower
+/// 128 bits of the result.
+/// \param __hi
+/// A 128-bit floating-point vector of [2 x double] to be copied to the upper
+/// 128 bits of the result.
+/// \returns A 256-bit floating-point vector of [4 x double] containing the
+/// concatenated result.
+static __inline __m256d __DEFAULT_FN_ATTRS
+_mm256_setr_m128d (__m128d __lo, __m128d __hi)
+{
+ return (__m256d)_mm256_set_m128d(__hi, __lo);
+}
+
+/// Constructs a 256-bit integer vector by concatenating two 128-bit
+/// integer vectors. This is similar to _mm256_set_m128i, but the order of
+/// the input parameters is swapped.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
+///
+/// \param __lo
+/// A 128-bit integer vector to be copied to the lower 128 bits of the
+/// result.
+/// \param __hi
+/// A 128-bit integer vector to be copied to the upper 128 bits of the
+/// result.
+/// \returns A 256-bit integer vector containing the concatenated result.
+static __inline __m256i __DEFAULT_FN_ATTRS
+_mm256_setr_m128i (__m128i __lo, __m128i __hi)
+{
+ return (__m256i)_mm256_set_m128i(__hi, __lo);
+}
/* SIMD load ops (unaligned) */
/// Loads two 128-bit floating-point vectors of [4 x float] from
@@ -4773,8 +4902,7 @@ _mm256_zextsi128_si256(__m128i __a)
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
{
- __m256 __v256 = _mm256_castps128_ps256(_mm_loadu_ps(__addr_lo));
- return _mm256_insertf128_ps(__v256, _mm_loadu_ps(__addr_hi), 1);
+ return _mm256_set_m128(_mm_loadu_ps(__addr_hi), _mm_loadu_ps(__addr_lo));
}
/// Loads two 128-bit floating-point vectors of [2 x double] from
@@ -4801,8 +4929,7 @@ _mm256_loadu2_m128(float const *__addr_hi, float const *__addr_lo)
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
{
- __m256d __v256 = _mm256_castpd128_pd256(_mm_loadu_pd(__addr_lo));
- return _mm256_insertf128_pd(__v256, _mm_loadu_pd(__addr_hi), 1);
+ return _mm256_set_m128d(_mm_loadu_pd(__addr_hi), _mm_loadu_pd(__addr_lo));
}
/// Loads two 128-bit integer vectors from unaligned memory locations and
@@ -4826,8 +4953,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
static __inline __m256i __DEFAULT_FN_ATTRS
_mm256_loadu2_m128i(__m128i_u const *__addr_hi, __m128i_u const *__addr_lo)
{
- __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
- return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
+ return _mm256_set_m128i(_mm_loadu_si128(__addr_hi), _mm_loadu_si128(__addr_lo));
}
/* SIMD store ops (unaligned) */
@@ -4918,135 +5044,6 @@ _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a)
_mm_storeu_si128(__addr_hi, __v128);
}
-/// Constructs a 256-bit floating-point vector of [8 x float] by
-/// concatenating two 128-bit floating-point vectors of [4 x float].
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
-///
-/// \param __hi
-/// A 128-bit floating-point vector of [4 x float] to be copied to the upper
-/// 128 bits of the result.
-/// \param __lo
-/// A 128-bit floating-point vector of [4 x float] to be copied to the lower
-/// 128 bits of the result.
-/// \returns A 256-bit floating-point vector of [8 x float] containing the
-/// concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_set_m128 (__m128 __hi, __m128 __lo)
-{
- return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
-}
-
-/// Constructs a 256-bit floating-point vector of [4 x double] by
-/// concatenating two 128-bit floating-point vectors of [2 x double].
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
-///
-/// \param __hi
-/// A 128-bit floating-point vector of [2 x double] to be copied to the upper
-/// 128 bits of the result.
-/// \param __lo
-/// A 128-bit floating-point vector of [2 x double] to be copied to the lower
-/// 128 bits of the result.
-/// \returns A 256-bit floating-point vector of [4 x double] containing the
-/// concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_set_m128d (__m128d __hi, __m128d __lo)
-{
- return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
-}
-
-/// Constructs a 256-bit integer vector by concatenating two 128-bit
-/// integer vectors.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
-///
-/// \param __hi
-/// A 128-bit integer vector to be copied to the upper 128 bits of the
-/// result.
-/// \param __lo
-/// A 128-bit integer vector to be copied to the lower 128 bits of the
-/// result.
-/// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_set_m128i (__m128i __hi, __m128i __lo)
-{
- return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
-}
-
-/// Constructs a 256-bit floating-point vector of [8 x float] by
-/// concatenating two 128-bit floating-point vectors of [4 x float]. This is
-/// similar to _mm256_set_m128, but the order of the input parameters is
-/// swapped.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
-///
-/// \param __lo
-/// A 128-bit floating-point vector of [4 x float] to be copied to the lower
-/// 128 bits of the result.
-/// \param __hi
-/// A 128-bit floating-point vector of [4 x float] to be copied to the upper
-/// 128 bits of the result.
-/// \returns A 256-bit floating-point vector of [8 x float] containing the
-/// concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_setr_m128 (__m128 __lo, __m128 __hi)
-{
- return _mm256_set_m128(__hi, __lo);
-}
-
-/// Constructs a 256-bit floating-point vector of [4 x double] by
-/// concatenating two 128-bit floating-point vectors of [2 x double]. This is
-/// similar to _mm256_set_m128d, but the order of the input parameters is
-/// swapped.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
-///
-/// \param __lo
-/// A 128-bit floating-point vector of [2 x double] to be copied to the lower
-/// 128 bits of the result.
-/// \param __hi
-/// A 128-bit floating-point vector of [2 x double] to be copied to the upper
-/// 128 bits of the result.
-/// \returns A 256-bit floating-point vector of [4 x double] containing the
-/// concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_setr_m128d (__m128d __lo, __m128d __hi)
-{
- return (__m256d)_mm256_set_m128d(__hi, __lo);
-}
-
-/// Constructs a 256-bit integer vector by concatenating two 128-bit
-/// integer vectors. This is similar to _mm256_set_m128i, but the order of
-/// the input parameters is swapped.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> VINSERTF128 </c> instruction.
-///
-/// \param __lo
-/// A 128-bit integer vector to be copied to the lower 128 bits of the
-/// result.
-/// \param __hi
-/// A 128-bit integer vector to be copied to the upper 128 bits of the
-/// result.
-/// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_setr_m128i (__m128i __lo, __m128i __hi)
-{
- return (__m256i)_mm256_set_m128i(__hi, __lo);
-}
-
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS128
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 34f0e76807c5..6df1b4a11172 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -195,6 +195,7 @@
#define bit_PCONFIG 0x00040000
#define bit_IBT 0x00100000
#define bit_AMXBF16 0x00400000
+#define bit_AVX512FP16 0x00800000
#define bit_AMXTILE 0x01000000
#define bit_AMXINT8 0x02000000
diff --git a/clang/lib/Headers/crc32intrin.h b/clang/lib/Headers/crc32intrin.h
new file mode 100644
index 000000000000..a0bd99d1b572
--- /dev/null
+++ b/clang/lib/Headers/crc32intrin.h
@@ -0,0 +1,100 @@
+/*===---- crc32intrin.h - SSE4.2 Accumulate CRC32 intrinsics ---------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CRC32INTRIN_H
+#define __CRC32INTRIN_H
+
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("crc32")))
+
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// unsigned char operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u8(unsigned int __C, unsigned char __D)
+{
+ return __builtin_ia32_crc32qi(__C, __D);
+}
+
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// unsigned short operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u16(unsigned int __C, unsigned short __D)
+{
+ return __builtin_ia32_crc32hi(__C, __D);
+}
+
+/// Adds the first unsigned integer operand to the CRC-32C checksum of
+/// the second unsigned integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_mm_crc32_u32(unsigned int __C, unsigned int __D)
+{
+ return __builtin_ia32_crc32si(__C, __D);
+}
+
+#ifdef __x86_64__
+/// Adds the unsigned integer operand to the CRC-32C checksum of the
+/// unsigned 64-bit integer operand.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
+///
+/// \param __C
+/// An unsigned integer operand to add to the CRC-32C checksum of operand
+/// \a __D.
+/// \param __D
+/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
+/// \returns The result of adding operand \a __C to the CRC-32C checksum of
+/// operand \a __D.
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
+{
+ return __builtin_ia32_crc32di(__C, __D);
+}
+#endif /* __x86_64__ */
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __CRC32INTRIN_H */
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index bb759721faeb..6e9c3032c21f 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -10,6 +10,10 @@
#ifndef __EMMINTRIN_H
#define __EMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <xmmintrin.h>
typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
@@ -2818,10 +2822,10 @@ _mm_xor_si128(__m128i __a, __m128i __b)
/// \a a.
/// \returns A 128-bit integer vector containing the left-shifted value.
#define _mm_slli_si128(a, imm) \
- (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
#define _mm_bslli_si128(a, imm) \
- (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
/// Left-shifts each 16-bit value in the 128-bit integer vector operand
/// by the specified number of bits. Low-order bits are cleared.
@@ -3035,10 +3039,10 @@ _mm_sra_epi32(__m128i __a, __m128i __count)
/// \a a.
/// \returns A 128-bit integer vector containing the right-shifted value.
#define _mm_srli_si128(a, imm) \
- (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
#define _mm_bsrli_si128(a, imm) \
- (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
/// Right-shifts each of 16-bit values in the 128-bit integer vector
/// operand by the specified number of bits. High-order bits are cleared.
@@ -4356,8 +4360,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
/// \returns An integer, whose lower 16 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
#define _mm_extract_epi16(a, imm) \
- (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
- (int)(imm))
+ ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
+ (int)(imm)))
/// Constructs a 128-bit integer vector by first making a copy of the
/// 128-bit integer vector parameter, and then inserting the lower 16 bits
@@ -4380,8 +4384,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b)
/// lower 16 bits of \a __b are written.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi16(a, b, imm) \
- (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
- (int)(imm))
+ ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
+ (int)(imm)))
/// Copies the values of the most significant bits from each 8-bit
/// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask
@@ -4430,7 +4434,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [127:96] of \a a.
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shuffle_epi32(a, imm) \
- (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
/// Constructs a 128-bit integer vector by shuffling four lower 16-bit
/// elements of a 128-bit integer vector of [8 x i16], using the immediate
@@ -4460,7 +4464,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [63:48] of \a a. \n
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflelo_epi16(a, imm) \
- (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
/// Constructs a 128-bit integer vector by shuffling four upper 16-bit
/// elements of a 128-bit integer vector of [8 x i16], using the immediate
@@ -4490,7 +4494,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [127:112] of \a a. \n
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflehi_epi16(a, imm) \
- (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))
+ ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
/// Unpacks the high-order (index 8-15) values from two 128-bit vectors
/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
@@ -4844,8 +4848,8 @@ _mm_movemask_pd(__m128d __a)
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
#define _mm_shuffle_pd(a, b, i) \
- (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
- (int)(i))
+ ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
+ (int)(i)))
/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
/// floating-point vector of [4 x float].
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index 109b604adae3..13905e6fb0ec 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -66,8 +66,8 @@ _cvtsh_ss(unsigned short __a)
/// 1XX: Use MXCSR.RC for rounding
/// \returns The converted 16-bit half-precision float value.
#define _cvtss_sh(a, imm) \
- (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
- (imm)))[0])
+ ((unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
+ (imm)))[0]))
/// Converts a 128-bit vector containing 32-bit float values into a
/// 128-bit vector containing 16-bit half-precision float values.
@@ -93,7 +93,7 @@ _cvtsh_ss(unsigned short __a)
/// values. The lower 64 bits are used to store the converted 16-bit
/// half-precision floating-point values.
#define _mm_cvtps_ph(a, imm) \
- (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm))
+ ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
/// Converts a 128-bit vector containing 16-bit half-precision float
/// values into a 128-bit vector containing 32-bit float values.
@@ -136,7 +136,7 @@ _mm_cvtph_ps(__m128i __a)
/// \returns A 128-bit vector containing the converted 16-bit half-precision
/// float values.
#define _mm256_cvtps_ph(a, imm) \
- (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm))
+ ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)))
/// Converts a 128-bit vector containing 16-bit half-precision float
/// values into a 256-bit vector of [8 x float].
diff --git a/clang/lib/Headers/gfniintrin.h b/clang/lib/Headers/gfniintrin.h
index 11a321b7c919..a59238b0b131 100644
--- a/clang/lib/Headers/gfniintrin.h
+++ b/clang/lib/Headers/gfniintrin.h
@@ -28,14 +28,14 @@
#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \
- (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), \
- (char)(I))
+ ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), \
+ (char)(I)))
#define _mm_gf2p8affine_epi64_epi8(A, B, I) \
- (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), \
- (char)(I))
+ ((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), \
+ (char)(I)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
@@ -46,14 +46,14 @@ _mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
#ifdef __AVXINTRIN_H
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \
- (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
- (__v32qi)(__m256i)(B), \
- (char)(I))
+ ((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
+ (__v32qi)(__m256i)(B), \
+ (char)(I)))
#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \
- (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
- (__v32qi)(__m256i)(B), \
- (char)(I))
+ ((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
+ (__v32qi)(__m256i)(B), \
+ (char)(I)))
static __inline__ __m256i __DEFAULT_FN_ATTRS_Y
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
@@ -65,32 +65,32 @@ _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
#ifdef __AVX512BWINTRIN_H
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \
- (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
- (__v64qi)(__m512i)(B), \
- (char)(I))
+ ((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
+ (__v64qi)(__m512i)(B), \
+ (char)(I)))
#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
- (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
- (__v64qi)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
+ (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
+ (__v64qi)(__m512i)(S)))
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
- (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
- U, A, B, I)
+ _mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \
+ U, A, B, I)
#define _mm512_gf2p8affine_epi64_epi8(A, B, I) \
- (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
- (__v64qi)(__m512i)(B), \
- (char)(I))
+ ((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
+ (__v64qi)(__m512i)(B), \
+ (char)(I)))
#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
- (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
- (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
- (__v64qi)(__m512i)(S))
+ ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
+ (__v64qi)_mm512_gf2p8affine_epi64_epi8((A), (B), (I)), \
+ (__v64qi)(__m512i)(S)))
#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
- (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
- U, A, B, I)
+ _mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \
+ U, A, B, I)
static __inline__ __m512i __DEFAULT_FN_ATTRS_Z
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
@@ -117,40 +117,39 @@ _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
#ifdef __AVX512VLBWINTRIN_H
#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
- (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
- (__v16qi)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
+ (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
+ (__v16qi)(__m128i)(S)))
#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
- (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
- U, A, B, I)
+ _mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
+ U, A, B, I)
#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
- (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
- (__v32qi)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
+ (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
+ (__v32qi)(__m256i)(S)))
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \
- (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
- U, A, B, I)
+ _mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
+ U, A, B, I)
#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
- (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
- (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
- (__v16qi)(__m128i)(S))
+ ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
+ (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
+ (__v16qi)(__m128i)(S)))
#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
- (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
- U, A, B, I)
+ _mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), U, A, B, I)
#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \
- (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
- (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
- (__v32qi)(__m256i)(S))
+ ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
+ (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
+ (__v32qi)(__m256i)(S)))
#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \
- (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
- U, A, B, I)
+ _mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
+ U, A, B, I)
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
diff --git a/clang/lib/Headers/ia32intrin.h b/clang/lib/Headers/ia32intrin.h
index 00138effd505..ec8142b9c640 100644
--- a/clang/lib/Headers/ia32intrin.h
+++ b/clang/lib/Headers/ia32intrin.h
@@ -16,7 +16,7 @@
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
-#define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
+#define __DEFAULT_FN_ATTRS_CRC32 __attribute__((__always_inline__, __nodebug__, __target__("crc32")))
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr
@@ -282,7 +282,7 @@ _castu64_f64(unsigned long long __A) {
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32b(unsigned int __C, unsigned char __D)
{
return __builtin_ia32_crc32qi(__C, __D);
@@ -303,7 +303,7 @@ __crc32b(unsigned int __C, unsigned char __D)
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32w(unsigned int __C, unsigned short __D)
{
return __builtin_ia32_crc32hi(__C, __D);
@@ -324,7 +324,7 @@ __crc32w(unsigned int __C, unsigned short __D)
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned int __DEFAULT_FN_ATTRS_CRC32
__crc32d(unsigned int __C, unsigned int __D)
{
return __builtin_ia32_crc32si(__C, __D);
@@ -346,7 +346,7 @@ __crc32d(unsigned int __C, unsigned int __D)
* \returns The result of adding operand \a __C to the CRC-32C checksum of
* operand \a __D.
*/
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CRC32
__crc32q(unsigned long long __C, unsigned long long __D)
{
return __builtin_ia32_crc32di(__C, __D);
@@ -435,7 +435,7 @@ __rorq(unsigned long long __X, int __C) {
#undef __DEFAULT_FN_ATTRS
#undef __DEFAULT_FN_ATTRS_CAST
-#undef __DEFAULT_FN_ATTRS_SSE42
+#undef __DEFAULT_FN_ATTRS_CRC32
#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif /* __IA32INTRIN_H */
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 56d3dadf6a33..e5174f8785e5 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -10,6 +10,10 @@
#ifndef __IMMINTRIN_H
#define __IMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <x86gprintrin.h>
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
@@ -210,6 +214,20 @@
#include <avx512pfintrin.h>
#endif
+/*
+ * FIXME: _Float16 type is legal only when HW support float16 operation.
+ * We use __AVX512FP16__ to identify if float16 is supported or not, so
+ * when float16 is not supported, the related header is not included.
+ *
+ */
+#if defined(__AVX512FP16__)
+#include <avx512fp16intrin.h>
+#endif
+
+#if defined(__AVX512FP16__) && defined(__AVX512VL__)
+#include <avx512vlfp16intrin.h>
+#endif
+
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX512BF16__)
#include <avx512bf16intrin.h>
@@ -525,13 +543,13 @@ extern "C" {
#if defined(__i386__) || defined(__x86_64__)
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
- __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
+ __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
- __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
+ __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
@@ -539,13 +557,13 @@ _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
#if defined(__x86_64__)
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
- __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
+ __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
- __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
+ __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
: "+r" (_Value), "+m" (*_Target) :: "memory");
return _Value;
}
@@ -557,7 +575,7 @@ _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
long _Exchange, long _Comparand) {
- __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
+ __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
@@ -565,7 +583,7 @@ _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile *_Destination,
long _Exchange, long _Comparand) {
- __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
+ __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
@@ -575,7 +593,7 @@ _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
- __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
+ __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
@@ -583,7 +601,7 @@ _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand) {
- __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
+ __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
: "+a" (_Comparand), "+m" (*_Destination)
: "r" (_Exchange) : "memory");
return _Comparand;
diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index ff8eb8fca268..02e66d02067c 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -97,8 +97,9 @@ unsigned long __readcr8(void);
unsigned int __readdr(unsigned int);
#ifdef __i386__
unsigned char __readfsbyte(unsigned long);
-unsigned __int64 __readfsqword(unsigned long);
unsigned short __readfsword(unsigned long);
+unsigned long __readfsdword(unsigned long);
+unsigned __int64 __readfsqword(unsigned long);
#endif
unsigned __int64 __readmsr(unsigned long);
unsigned __int64 __readpmc(unsigned long);
@@ -149,10 +150,8 @@ long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long);
long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);
__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);
-void __attribute__((__deprecated__(
- "use other intrinsics or C++11 atomics instead"))) _ReadBarrier(void);
-void __attribute__((__deprecated__(
- "use other intrinsics or C++11 atomics instead"))) _ReadWriteBarrier(void);
+void _ReadBarrier(void);
+void _ReadWriteBarrier(void);
unsigned int _rorx_u32(unsigned int, const unsigned int);
int _sarx_i32(int, unsigned int);
#if __STDC_HOSTED__
@@ -163,8 +162,7 @@ unsigned int _shrx_u32(unsigned int, unsigned int);
void _Store_HLERelease(long volatile *, long);
void _Store64_HLERelease(__int64 volatile *, __int64);
void _StorePointer_HLERelease(void *volatile *, void *);
-void __attribute__((__deprecated__(
- "use other intrinsics or C++11 atomics instead"))) _WriteBarrier(void);
+void _WriteBarrier(void);
unsigned __int32 xbegin(void);
void _xend(void);
@@ -457,7 +455,9 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst,
:
: "memory");
#else
- __asm__ __volatile__("xchg %%esi, %1\nrep movsb\nxchg %%esi, %1"
+ __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n"
+ "rep movsb\n"
+ "xchg {%%esi, %1|%1, esi}"
: "+D"(__dst), "+r"(__src), "+c"(__n)
:
: "memory");
@@ -467,12 +467,14 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst,
unsigned long const *__src,
size_t __n) {
#if defined(__x86_64__)
- __asm__ __volatile__("rep movsl"
+ __asm__ __volatile__("rep movs{l|d}"
: "+D"(__dst), "+S"(__src), "+c"(__n)
:
: "memory");
#else
- __asm__ __volatile__("xchg %%esi, %1\nrep movsl\nxchg %%esi, %1"
+ __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n"
+ "rep movs{l|d}\n"
+ "xchg {%%esi, %1|%1, esi}"
: "+D"(__dst), "+r"(__src), "+c"(__n)
:
: "memory");
@@ -487,7 +489,9 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst,
:
: "memory");
#else
- __asm__ __volatile__("xchg %%esi, %1\nrep movsw\nxchg %%esi, %1"
+ __asm__ __volatile__("xchg {%%esi, %1|%1, esi}\n"
+ "rep movsw\n"
+ "xchg {%%esi, %1|%1, esi}"
: "+D"(__dst), "+r"(__src), "+c"(__n)
:
: "memory");
@@ -496,7 +500,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst,
static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst,
unsigned long __x,
size_t __n) {
- __asm__ __volatile__("rep stosl"
+ __asm__ __volatile__("rep stos{l|d}"
: "+D"(__dst), "+c"(__n)
: "a"(__x)
: "memory");
@@ -538,9 +542,9 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst,
#else
/* x86-64 uses %rbx as the base register, so preserve it. */
#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx) \
- __asm("xchgq %%rbx,%q1\n" \
+ __asm("xchg{q} {%%rbx, %q1|%q1, rbx}\n" \
"cpuid\n" \
- "xchgq %%rbx,%q1" \
+ "xchg{q} {%%rbx, %q1|%q1, rbx}" \
: "=a"(__eax), "=r"(__ebx), "=c"(__ecx), "=d"(__edx) \
: "0"(__leaf), "2"(__count))
#endif
@@ -574,6 +578,9 @@ void _WriteStatusReg(int, __int64);
unsigned short __cdecl _byteswap_ushort(unsigned short val);
unsigned long __cdecl _byteswap_ulong (unsigned long val);
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
+
+__int64 __mulh(__int64 __a, __int64 __b);
+unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
#endif
/*----------------------------------------------------------------------------*\
@@ -597,13 +604,17 @@ __readmsr(unsigned long __register) {
static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) {
unsigned __LPTRINT_TYPE__ __cr3_val;
- __asm__ __volatile__ ("mov %%cr3, %0" : "=r"(__cr3_val) : : "memory");
+ __asm__ __volatile__(
+ "mov {%%cr3, %0|%0, cr3}"
+ : "=r"(__cr3_val)
+ :
+ : "memory");
return __cr3_val;
}
static __inline__ void __DEFAULT_FN_ATTRS
__writecr3(unsigned __INTPTR_TYPE__ __cr3_val) {
- __asm__ ("mov %0, %%cr3" : : "r"(__cr3_val) : "memory");
+ __asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory");
}
#ifdef __cplusplus
diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h
index 68b0a5689618..ad9428e6c8b5 100644
--- a/clang/lib/Headers/keylockerintrin.h
+++ b/clang/lib/Headers/keylockerintrin.h
@@ -99,7 +99,7 @@ _mm_loadiwkey (unsigned int __ctl, __m128i __intkey,
}
/// Wrap a 128-bit AES key from __key into a key handle and output in
-/// ((__m128i*)__h) to ((__m128i*)__h) + 5 and a 32-bit value as return.
+/// ((__m128i*)__h) to ((__m128i*)__h) + 2 and a 32-bit value as return.
/// The explicit source operand __htype specifies handle restrictions.
///
/// \headerfile <x86intrin.h>
@@ -120,9 +120,6 @@ _mm_loadiwkey (unsigned int __ctl, __m128i __intkey,
/// MEM[__h+127:__h] := Handle[127:0] // AAD
/// MEM[__h+255:__h+128] := Handle[255:128] // Integrity Tag
/// MEM[__h+383:__h+256] := Handle[383:256] // CipherText
-/// MEM[__h+511:__h+384] := 0 // Reserved for future usage
-/// MEM[__h+639:__h+512] := 0 // Reserved for future usage
-/// MEM[__h+767:__h+640] := 0 // Reserved for future usage
/// OF := 0
/// SF := 0
/// ZF := 0
@@ -136,7 +133,7 @@ _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) {
}
/// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then
-/// output handle in ((__m128i*)__h) to ((__m128i*)__h) + 6 and
+/// output handle in ((__m128i*)__h) to ((__m128i*)__h) + 3 and
/// a 32-bit value as return.
/// The explicit source operand __htype specifies handle restrictions.
///
@@ -160,9 +157,6 @@ _mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) {
/// MEM[__h+255:__h+128] := Handle[255:128] // Tag
/// MEM[__h+383:__h+256] := Handle[383:256] // CipherText[127:0]
/// MEM[__h+511:__h+384] := Handle[511:384] // CipherText[255:128]
-/// MEM[__h+639:__h+512] := 0 // Reserved for future usage
-/// MEM[__h+767:__h+640] := 0 // Reserved for future usage
-/// MEM[__h+895:__h+768] := 0 Integrity// Reserved for future usage
/// OF := 0
/// SF := 0
/// ZF := 0
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index 79a8b55016b1..03bac92198ad 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -10,6 +10,10 @@
#ifndef __MMINTRIN_H
#define __MMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
typedef long long __v1di __attribute__((__vector_size__(8)));
diff --git a/clang/lib/Headers/nmmintrin.h b/clang/lib/Headers/nmmintrin.h
index 672aea496681..59fc7ec99e61 100644
--- a/clang/lib/Headers/nmmintrin.h
+++ b/clang/lib/Headers/nmmintrin.h
@@ -10,6 +10,10 @@
#ifndef __NMMINTRIN_H
#define __NMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
/* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
just include it now then. */
#include <smmintrin.h>
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 3c5e2c973936..9c81ddb5e2a7 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -12,8 +12,8 @@
// Define extension macros
#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
-// For SPIR all extensions are supported.
-#if defined(__SPIR__)
+// For SPIR and SPIR-V all extensions are supported.
+#if defined(__SPIR__) || defined(__SPIRV__)
#define cl_khr_subgroup_extended_types 1
#define cl_khr_subgroup_non_uniform_vote 1
#define cl_khr_subgroup_ballot 1
@@ -25,12 +25,31 @@
#define cl_khr_integer_dot_product 1
#define __opencl_c_integer_dot_product_input_4x8bit 1
#define __opencl_c_integer_dot_product_input_4x8bit_packed 1
+#define cl_ext_float_atomics 1
+#ifdef cl_khr_fp16
+#define __opencl_c_ext_fp16_global_atomic_load_store 1
+#define __opencl_c_ext_fp16_local_atomic_load_store 1
+#define __opencl_c_ext_fp16_global_atomic_add 1
+#define __opencl_c_ext_fp16_local_atomic_add 1
+#define __opencl_c_ext_fp16_global_atomic_min_max 1
+#define __opencl_c_ext_fp16_local_atomic_min_max 1
+#endif
+#ifdef cl_khr_fp64
+#define __opencl_c_ext_fp64_global_atomic_add 1
+#define __opencl_c_ext_fp64_local_atomic_add 1
+#define __opencl_c_ext_fp64_global_atomic_min_max 1
+#define __opencl_c_ext_fp64_local_atomic_min_max 1
+#endif
+#define __opencl_c_ext_fp32_global_atomic_add 1
+#define __opencl_c_ext_fp32_local_atomic_add 1
+#define __opencl_c_ext_fp32_global_atomic_min_max 1
+#define __opencl_c_ext_fp32_local_atomic_min_max 1
-#endif // defined(__SPIR__)
+#endif // defined(__SPIR__) || defined(__SPIRV__)
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
// Define feature macros for OpenCL C 2.0
-#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ == 200)
+#if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200)
#define __opencl_c_pipes 1
#define __opencl_c_generic_address_space 1
#define __opencl_c_work_group_collective_functions 1
@@ -45,12 +64,12 @@
#endif
// Define header-only feature macros for OpenCL C 3.0.
-#if (__OPENCL_C_VERSION__ == 300)
-// For the SPIR target all features are supported.
-#if defined(__SPIR__)
+#if (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300)
+// For the SPIR and SPIR-V target all features are supported.
+#if defined(__SPIR__) || defined(__SPIRV__)
#define __opencl_c_atomic_scope_all_devices 1
#endif // defined(__SPIR__)
-#endif // (__OPENCL_C_VERSION__ == 300)
+#endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300)
// built-in scalar data types:
@@ -329,11 +348,17 @@ typedef enum memory_scope {
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
#if defined(__opencl_c_atomic_scope_all_devices)
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
-#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
memory_scope_all_devices = memory_scope_all_svm_devices,
-#endif // __OPENCL_C_VERSION__ >= CL_VERSION_3_0
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif // defined(__opencl_c_atomic_scope_all_devices)
-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+/**
+ * Subgroups have different requirements on forward progress, so just test
+ * all the relevant macros.
+ * CL 3.0 sub-groups "they are not guaranteed to make independent forward progress"
+ * KHR subgroups "Subgroups within a workgroup are independent, make forward progress with respect to each other"
+ */
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || defined(__opencl_c_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
@@ -572,6 +597,26 @@ typedef struct {
#define as_intptr_t(x) __builtin_astype((x), intptr_t)
#define as_uintptr_t(x) __builtin_astype((x), uintptr_t)
+// C++ for OpenCL - __remove_address_space
+#if defined(__OPENCL_CPP_VERSION__)
+template <typename _Tp> struct __remove_address_space { using type = _Tp; };
+template <typename _Tp> struct __remove_address_space<__generic _Tp> {
+ using type = _Tp;
+};
+template <typename _Tp> struct __remove_address_space<__global _Tp> {
+ using type = _Tp;
+};
+template <typename _Tp> struct __remove_address_space<__private _Tp> {
+ using type = _Tp;
+};
+template <typename _Tp> struct __remove_address_space<__local _Tp> {
+ using type = _Tp;
+};
+template <typename _Tp> struct __remove_address_space<__constant _Tp> {
+ using type = _Tp;
+};
+#endif
+
// OpenCL v1.1 s6.9, v1.2/2.0 s6.10 - Function qualifiers
#define __kernel_exec(X, typen) __kernel \
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index fc50dd718c4e..32af848a94c4 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -23,11 +23,14 @@
#endif //cl_khr_3d_image_writes
#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
-
-#if (defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && defined(__SPIR__)
+#if (defined(__OPENCL_CPP_VERSION__) || \
+ (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && \
+ (defined(__SPIR__) || defined(__SPIRV__))
#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
#pragma OPENCL EXTENSION cl_intel_planar_yuv : end
-#endif // (defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && defined(__SPIR__)
+#endif // (defined(__OPENCL_CPP_VERSION__) ||
+ // (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) &&
+ // (defined(__SPIR__) || defined(__SPIRV__))
#define __ovld __attribute__((overloadable))
#define __conv __attribute__((convergent))
@@ -12070,33 +12073,28 @@ void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
* The address computed as (p + (offset * 4))
* must be aligned to sizeof (half) * 4 bytes.
*/
-float __ovld vloada_half(size_t offset, const __constant half *p);
float2 __ovld vloada_half2(size_t offset, const __constant half *p);
float3 __ovld vloada_half3(size_t offset, const __constant half *p);
float4 __ovld vloada_half4(size_t offset, const __constant half *p);
float8 __ovld vloada_half8(size_t offset, const __constant half *p);
float16 __ovld vloada_half16(size_t offset, const __constant half *p);
#if defined(__opencl_c_generic_address_space)
-float __ovld vloada_half(size_t offset, const half *p);
float2 __ovld vloada_half2(size_t offset, const half *p);
float3 __ovld vloada_half3(size_t offset, const half *p);
float4 __ovld vloada_half4(size_t offset, const half *p);
float8 __ovld vloada_half8(size_t offset, const half *p);
float16 __ovld vloada_half16(size_t offset, const half *p);
#else
-float __ovld vloada_half(size_t offset, const __global half *p);
float2 __ovld vloada_half2(size_t offset, const __global half *p);
float3 __ovld vloada_half3(size_t offset, const __global half *p);
float4 __ovld vloada_half4(size_t offset, const __global half *p);
float8 __ovld vloada_half8(size_t offset, const __global half *p);
float16 __ovld vloada_half16(size_t offset, const __global half *p);
-float __ovld vloada_half(size_t offset, const __local half *p);
float2 __ovld vloada_half2(size_t offset, const __local half *p);
float3 __ovld vloada_half3(size_t offset, const __local half *p);
float4 __ovld vloada_half4(size_t offset, const __local half *p);
float8 __ovld vloada_half8(size_t offset, const __local half *p);
float16 __ovld vloada_half16(size_t offset, const __local half *p);
-float __ovld vloada_half(size_t offset, const __private half *p);
float2 __ovld vloada_half2(size_t offset, const __private half *p);
float3 __ovld vloada_half3(size_t offset, const __private half *p);
float4 __ovld vloada_half4(size_t offset, const __private half *p);
@@ -12121,35 +12119,30 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p);
* round to nearest even.
*/
#if defined(__opencl_c_generic_address_space)
-void __ovld vstorea_half(float data, size_t offset, half *p);
void __ovld vstorea_half2(float2 data, size_t offset, half *p);
void __ovld vstorea_half3(float3 data, size_t offset, half *p);
void __ovld vstorea_half4(float4 data, size_t offset, half *p);
void __ovld vstorea_half8(float8 data, size_t offset, half *p);
void __ovld vstorea_half16(float16 data, size_t offset, half *p);
-void __ovld vstorea_half_rte(float data, size_t offset, half *p);
void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);
void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);
void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);
void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);
void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);
-void __ovld vstorea_half_rtz(float data, size_t offset, half *p);
void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);
void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);
void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);
void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);
void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);
-void __ovld vstorea_half_rtp(float data, size_t offset, half *p);
void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);
void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);
void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);
void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);
void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);
-void __ovld vstorea_half_rtn(float data, size_t offset, half *p);
void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);
void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);
void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);
@@ -12157,35 +12150,30 @@ void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);
void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);
#ifdef cl_khr_fp64
-void __ovld vstorea_half(double data, size_t offset, half *p);
void __ovld vstorea_half2(double2 data, size_t offset, half *p);
void __ovld vstorea_half3(double3 data, size_t offset, half *p);
void __ovld vstorea_half4(double4 data, size_t offset, half *p);
void __ovld vstorea_half8(double8 data, size_t offset, half *p);
void __ovld vstorea_half16(double16 data, size_t offset, half *p);
-void __ovld vstorea_half_rte(double data, size_t offset, half *p);
void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);
void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);
void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);
void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);
void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);
-void __ovld vstorea_half_rtz(double data, size_t offset, half *p);
void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);
void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);
void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);
void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);
void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);
-void __ovld vstorea_half_rtp(double data, size_t offset, half *p);
void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);
void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);
void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);
void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);
void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);
-void __ovld vstorea_half_rtn(double data, size_t offset, half *p);
void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);
void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);
void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);
@@ -12194,105 +12182,90 @@ void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);
#endif //cl_khr_fp64
#else
-void __ovld vstorea_half(float data, size_t offset, __global half *p);
void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);
void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);
void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);
void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);
void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rte(float data, size_t offset, __global half *p);
void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rtz(float data, size_t offset, __global half *p);
void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rtp(float data, size_t offset, __global half *p);
void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rtn(float data, size_t offset, __global half *p);
void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);
-void __ovld vstorea_half(float data, size_t offset, __local half *p);
void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);
void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);
void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);
void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);
void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rte(float data, size_t offset, __local half *p);
void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rtz(float data, size_t offset, __local half *p);
void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rtp(float data, size_t offset, __local half *p);
void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rtn(float data, size_t offset, __local half *p);
void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);
-void __ovld vstorea_half(float data, size_t offset, __private half *p);
void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);
void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);
void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);
void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);
void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rte(float data, size_t offset, __private half *p);
void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);
void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rtz(float data, size_t offset, __private half *p);
void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);
void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rtp(float data, size_t offset, __private half *p);
void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);
void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rtn(float data, size_t offset, __private half *p);
void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);
@@ -12300,105 +12273,90 @@ void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);
#ifdef cl_khr_fp64
-void __ovld vstorea_half(double data, size_t offset, __global half *p);
void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);
void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);
void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);
void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);
void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rte(double data, size_t offset, __global half *p);
void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rtz(double data, size_t offset, __global half *p);
void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rtp(double data, size_t offset, __global half *p);
void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);
-void __ovld vstorea_half_rtn(double data, size_t offset, __global half *p);
void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);
void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);
void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);
void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);
void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);
-void __ovld vstorea_half(double data, size_t offset, __local half *p);
void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);
void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);
void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);
void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);
void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rte(double data, size_t offset, __local half *p);
void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rtz(double data, size_t offset, __local half *p);
void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rtp(double data, size_t offset, __local half *p);
void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);
-void __ovld vstorea_half_rtn(double data, size_t offset, __local half *p);
void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);
void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);
void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);
void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);
void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);
-void __ovld vstorea_half(double data, size_t offset, __private half *p);
void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);
void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);
void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);
void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);
void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rte(double data, size_t offset, __private half *p);
void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);
void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rtz(double data, size_t offset, __private half *p);
void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);
void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rtp(double data, size_t offset, __private half *p);
void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);
void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);
void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);
void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);
void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);
-void __ovld vstorea_half_rtn(double data, size_t offset, __private half *p);
void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);
void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);
void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);
@@ -13289,6 +13247,7 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
#endif
// atomic_init()
+#if defined(__opencl_c_generic_address_space)
void __ovld atomic_init(volatile atomic_int *object, int value);
void __ovld atomic_init(volatile atomic_uint *object, uint value);
void __ovld atomic_init(volatile atomic_float *object, float value);
@@ -13299,6 +13258,25 @@ void __ovld atomic_init(volatile atomic_ulong *object, ulong value);
void __ovld atomic_init(volatile atomic_double *object, double value);
#endif //cl_khr_fp64
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+void __ovld atomic_init(volatile __global atomic_int *object, int value);
+void __ovld atomic_init(volatile __local atomic_int *object, int value);
+void __ovld atomic_init(volatile __global atomic_uint *object, uint value);
+void __ovld atomic_init(volatile __local atomic_uint *object, uint value);
+void __ovld atomic_init(volatile __global atomic_float *object, float value);
+void __ovld atomic_init(volatile __local atomic_float *object, float value);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+void __ovld atomic_init(volatile __global atomic_long *object, long value);
+void __ovld atomic_init(volatile __local atomic_long *object, long value);
+void __ovld atomic_init(volatile __global atomic_ulong *object, ulong value);
+void __ovld atomic_init(volatile __local atomic_ulong *object, ulong value);
+#ifdef cl_khr_fp64
+void __ovld atomic_init(volatile __global atomic_double *object, double value);
+void __ovld atomic_init(volatile __local atomic_double *object, double value);
+#endif //cl_khr_fp64
+#endif
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
// atomic_work_item_fence()
void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);
@@ -13308,6 +13286,7 @@ void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order,
// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument can be ptrdiff_t.
#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);
uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);
int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);
@@ -13322,7 +13301,6 @@ int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);
uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);
int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);
uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);
-
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);
ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);
@@ -13341,9 +13319,93 @@ ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);
uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);
uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);
#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_fetch_add(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_add(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_add(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_add(volatile __local atomic_uint *object, uint operand);
+int __ovld atomic_fetch_sub(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_sub(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_sub(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_sub(volatile __local atomic_uint *object, uint operand);
+int __ovld atomic_fetch_or(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_or(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_or(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_or(volatile __local atomic_uint *object, uint operand);
+int __ovld atomic_fetch_xor(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_xor(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_xor(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_xor(volatile __local atomic_uint *object, uint operand);
+int __ovld atomic_fetch_and(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_and(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_and(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_and(volatile __local atomic_uint *object, uint operand);
+int __ovld atomic_fetch_min(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_min(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_min(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_min(volatile __local atomic_uint *object, uint operand);
+int __ovld atomic_fetch_max(volatile __global atomic_int *object, int operand);
+int __ovld atomic_fetch_max(volatile __local atomic_int *object, int operand);
+uint __ovld atomic_fetch_max(volatile __global atomic_uint *object, uint operand);
+uint __ovld atomic_fetch_max(volatile __local atomic_uint *object, uint operand);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+long __ovld atomic_fetch_add(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_add(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_add(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_add(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_add(volatile __global atomic_uintptr_t *object, ptrdiff_t operand);
+uintptr_t __ovld atomic_fetch_add(volatile __local atomic_uintptr_t *object, ptrdiff_t operand);
+long __ovld atomic_fetch_sub(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_sub(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_sub(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_sub(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_sub(volatile __global atomic_uintptr_t *object, ptrdiff_t operand);
+uintptr_t __ovld atomic_fetch_sub(volatile __local atomic_uintptr_t *object, ptrdiff_t operand);
+long __ovld atomic_fetch_or(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_or(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_or(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_or(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_or(volatile __global atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_or(volatile __local atomic_uintptr_t *object, intptr_t operand);
+intptr_t __ovld atomic_fetch_or(volatile __global atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_or(volatile __local atomic_intptr_t *object, uintptr_t operand);
+long __ovld atomic_fetch_xor(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_xor(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_xor(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_xor(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_xor(volatile __global atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_xor(volatile __local atomic_uintptr_t *object, intptr_t operand);
+intptr_t __ovld atomic_fetch_xor(volatile __global atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_xor(volatile __local atomic_intptr_t *object, uintptr_t operand);
+long __ovld atomic_fetch_and(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_and(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_and(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_and(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_and(volatile __global atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_and(volatile __local atomic_uintptr_t *object, intptr_t operand);
+intptr_t __ovld atomic_fetch_and(volatile __global atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_and(volatile __local atomic_intptr_t *object, uintptr_t operand);
+long __ovld atomic_fetch_min(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_min(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_min(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_min(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_min(volatile __global atomic_uintptr_t *object, intptr_t operand);
+uintptr_t __ovld atomic_fetch_min(volatile __local atomic_uintptr_t *object, intptr_t operand);
+intptr_t __ovld atomic_fetch_min(volatile __global atomic_intptr_t *object, uintptr_t operand);
+intptr_t __ovld atomic_fetch_min(volatile __local atomic_intptr_t *object, uintptr_t operand);
+long __ovld atomic_fetch_max(volatile __global atomic_long *object, long operand);
+long __ovld atomic_fetch_max(volatile __local atomic_long *object, long operand);
+ulong __ovld atomic_fetch_max(volatile __global atomic_ulong *object, ulong operand);
+ulong __ovld atomic_fetch_max(volatile __local atomic_ulong *object, ulong operand);
+uintptr_t __ovld atomic_fetch_add(volatile __global atomic_uintptr_t *object, ptrdiff_t operand);
+uintptr_t __ovld atomic_fetch_sub(volatile __local atomic_uintptr_t *object, ptrdiff_t operand);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
#if defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);
uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);
int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);
@@ -13376,8 +13438,92 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong oper
uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_fetch_add_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_add_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_add_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_add_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+int __ovld atomic_fetch_sub_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_sub_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_sub_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_sub_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+int __ovld atomic_fetch_or_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_or_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_or_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_or_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+int __ovld atomic_fetch_xor_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_xor_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_xor_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_xor_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+int __ovld atomic_fetch_and_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_and_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_and_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_and_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+int __ovld atomic_fetch_min_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_min_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_min_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_min_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+int __ovld atomic_fetch_max_explicit(volatile __global atomic_int *object, int operand, memory_order order);
+int __ovld atomic_fetch_max_explicit(volatile __local atomic_int *object, int operand, memory_order order);
+uint __ovld atomic_fetch_max_explicit(volatile __global atomic_uint *object, uint operand, memory_order order);
+uint __ovld atomic_fetch_max_explicit(volatile __local atomic_uint *object, uint operand, memory_order order);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+long __ovld atomic_fetch_add_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_add_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_add_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_add_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile __local atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+long __ovld atomic_fetch_sub_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_sub_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_sub_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_sub_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile __global atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+long __ovld atomic_fetch_or_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_or_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_or_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_or_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order);
+long __ovld atomic_fetch_xor_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_xor_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_xor_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_xor_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order);
+long __ovld atomic_fetch_and_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_and_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_and_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_and_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order);
+long __ovld atomic_fetch_min_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_min_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_min_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_min_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order);
+intptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order);
+long __ovld atomic_fetch_max_explicit(volatile __global atomic_long *object, long operand, memory_order order);
+long __ovld atomic_fetch_max_explicit(volatile __local atomic_long *object, long operand, memory_order order);
+ulong __ovld atomic_fetch_max_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order);
+ulong __ovld atomic_fetch_max_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
@@ -13407,15 +13553,469 @@ long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand
ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
-#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
-#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_fetch_add_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_add_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_add_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_add_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_sub_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_sub_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_sub_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_sub_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_or_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_or_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_or_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_or_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_xor_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_xor_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_xor_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_xor_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_and_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_and_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_and_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_and_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_min_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_min_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_min_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_min_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_max_explicit(volatile __global atomic_int *object, int operand, memory_order order, memory_scope scope);
+int __ovld atomic_fetch_max_explicit(volatile __local atomic_int *object, int operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_max_explicit(volatile __global atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+uint __ovld atomic_fetch_max_explicit(volatile __local atomic_uint *object, uint operand, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+long __ovld atomic_fetch_add_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_add_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile __local atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_add_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_add_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_sub_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_sub_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_sub_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_sub_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile __global atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_or_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_or_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_or_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_or_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_or_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_or_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_xor_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_xor_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_xor_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_xor_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_xor_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_xor_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_and_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_and_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_and_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_and_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_and_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_and_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_min_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_min_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_min_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_min_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_min_explicit(volatile __global atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+intptr_t __ovld atomic_fetch_min_explicit(volatile __local atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_max_explicit(volatile __global atomic_long *object, long operand, memory_order order, memory_scope scope);
+long __ovld atomic_fetch_max_explicit(volatile __local atomic_long *object, long operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_max_explicit(volatile __global atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+ulong __ovld atomic_fetch_max_explicit(volatile __local atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_add_explicit(volatile __global atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+uintptr_t __ovld atomic_fetch_sub_explicit(volatile __local atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+
+// The functionality added by cl_ext_float_atomics extension
+#if defined(cl_ext_float_atomics)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_load_store)
+void __ovld atomic_store(volatile __global atomic_half *object, half operand);
+void __ovld atomic_store_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+void __ovld atomic_store_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_load(volatile __global atomic_half *object);
+half __ovld atomic_load_explicit(volatile __global atomic_half *object,
+ memory_order order);
+half __ovld atomic_load_explicit(volatile __global atomic_half *object,
+ memory_order order, memory_scope scope);
+half __ovld atomic_exchange(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_exchange_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_exchange_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store)
+
+#if defined(__opencl_c_ext_fp16_local_atomic_load_store)
+void __ovld atomic_store(volatile __local atomic_half *object, half operand);
+void __ovld atomic_store_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_load(volatile __local atomic_half *object);
+half __ovld atomic_load_explicit(volatile __local atomic_half *object,
+ memory_order order);
+half __ovld atomic_load_explicit(volatile __local atomic_half *object,
+ memory_order order, memory_scope scope);
+half __ovld atomic_exchange(volatile __local atomic_half *object, half operand);
+half __ovld atomic_exchange_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_exchange_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_local_atomic_load_store)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_load_store) && \
+ defined(__opencl_c_ext_fp16_local_atomic_load_store)
+void __ovld atomic_store(volatile atomic_half *object, half operand);
+void __ovld atomic_store_explicit(volatile atomic_half *object, half operand,
+ memory_order order);
+void __ovld atomic_store_explicit(volatile atomic_half *object, half operand,
+ memory_order order, memory_scope scope);
+half __ovld atomic_load(volatile atomic_half *object);
+half __ovld atomic_load_explicit(volatile atomic_half *object,
+ memory_order order);
+half __ovld atomic_load_explicit(volatile atomic_half *object,
+ memory_order order, memory_scope scope);
+half __ovld atomic_exchange(volatile atomic_half *object, half operand);
+half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand,
+ memory_order order);
+half __ovld atomic_exchange_explicit(volatile atomic_half *object, half operand,
+ memory_order order, memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_load_store) &&
+ // defined(__opencl_c_ext_fp16_local_atomic_load_store)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_min_max)
+half __ovld atomic_fetch_min(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_max(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_min_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_max_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp16_local_atomic_min_max)
+half __ovld atomic_fetch_min(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_max(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_min_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_max_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp16_local_atomic_min_max)
+half __ovld atomic_fetch_min(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_max(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_min_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_max_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_min_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_max_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp16_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max)
+float __ovld atomic_fetch_min(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_max(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_local_atomic_min_max)
+float __ovld atomic_fetch_min(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_max(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
+double __ovld atomic_fetch_min(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_max(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_local_atomic_min_max)
+double __ovld atomic_fetch_min(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_max(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_add)
+half __ovld atomic_fetch_add(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_sub(volatile __global atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_add_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_sub_explicit(volatile __global atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp16_local_atomic_add)
+half __ovld atomic_fetch_add(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_sub(volatile __local atomic_half *object,
+ half operand);
+half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_add_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_sub_explicit(volatile __local atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp16_global_atomic_add) && \
+ defined(__opencl_c_ext_fp16_local_atomic_add)
+half __ovld atomic_fetch_add(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_sub(volatile atomic_half *object, half operand);
+half __ovld atomic_fetch_add_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object,
+ half operand, memory_order order);
+half __ovld atomic_fetch_add_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+half __ovld atomic_fetch_sub_explicit(volatile atomic_half *object,
+ half operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp16_global_atomic_add) && \
+ defined(__opencl_c_ext_fp16_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_add)
+float __ovld atomic_fetch_add(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_sub(volatile __global atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_local_atomic_add)
+float __ovld atomic_fetch_add(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_sub(volatile __local atomic_float *object,
+ float operand);
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp32_global_atomic_add) && \
+ defined(__opencl_c_ext_fp32_local_atomic_add)
+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
+ float operand, memory_order order);
+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
+ float operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \
+ defined(__opencl_c_ext_fp32_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_add)
+double __ovld atomic_fetch_add(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_sub(volatile __global atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_local_atomic_add)
+double __ovld atomic_fetch_add(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_sub(volatile __local atomic_double *object,
+ double operand);
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
+
+#if defined(__opencl_c_ext_fp64_global_atomic_add) && \
+ defined(__opencl_c_ext_fp64_local_atomic_add)
+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
+ double operand, memory_order order);
+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
+ double operand, memory_order order,
+ memory_scope scope);
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \
+ defined(__opencl_c_ext_fp64_local_atomic_add)
+
+#endif // cl_ext_float_atomics
// atomic_store()
#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
void __ovld atomic_store(volatile atomic_int *object, int desired);
void __ovld atomic_store(volatile atomic_uint *object, uint desired);
void __ovld atomic_store(volatile atomic_float *object, float desired);
@@ -13427,9 +14027,29 @@ void __ovld atomic_store(volatile atomic_double *object, double desired);
void __ovld atomic_store(volatile atomic_long *object, long desired);
void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+void __ovld atomic_store(volatile __global atomic_int *object, int desired);
+void __ovld atomic_store(volatile __local atomic_int *object, int desired);
+void __ovld atomic_store(volatile __global atomic_uint *object, uint desired);
+void __ovld atomic_store(volatile __local atomic_uint *object, uint desired);
+void __ovld atomic_store(volatile __global atomic_float *object, float desired);
+void __ovld atomic_store(volatile __local atomic_float *object, float desired);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+void __ovld atomic_store(volatile __global atomic_double *object, double desired);
+void __ovld atomic_store(volatile __local atomic_double *object, double desired);
+#endif //cl_khr_fp64
+void __ovld atomic_store(volatile __global atomic_long *object, long desired);
+void __ovld atomic_store(volatile __local atomic_long *object, long desired);
+void __ovld atomic_store(volatile __global atomic_ulong *object, ulong desired);
+void __ovld atomic_store(volatile __local atomic_ulong *object, ulong desired);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
#if defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);
void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);
void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);
@@ -13440,8 +14060,28 @@ void __ovld atomic_store_explicit(volatile atomic_double *object, double desired
void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);
void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+void __ovld atomic_store_explicit(volatile __global atomic_int *object, int desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_int *object, int desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __global atomic_uint *object, uint desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_uint *object, uint desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __global atomic_float *object, float desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_float *object, float desired, memory_order order);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+void __ovld atomic_store_explicit(volatile __global atomic_double *object, double desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_double *object, double desired, memory_order order);
+#endif
+void __ovld atomic_store_explicit(volatile __global atomic_long *object, long desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_long *object, long desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __global atomic_ulong *object, ulong desired, memory_order order);
+void __ovld atomic_store_explicit(volatile __local atomic_ulong *object, ulong desired, memory_order order);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
+#if defined(__opencl_c_generic_address_space)
void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);
void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);
void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);
@@ -13452,9 +14092,29 @@ void __ovld atomic_store_explicit(volatile atomic_double *object, double desired
void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);
void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+void __ovld atomic_store_explicit(volatile __global atomic_int *object, int desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __local atomic_int *object, int desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __global atomic_uint *object, uint desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __local atomic_uint *object, uint desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __global atomic_float *object, float desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __local atomic_float *object, float desired, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+void __ovld atomic_store_explicit(volatile __global atomic_double *object, double desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __local atomic_double *object, double desired, memory_order order, memory_scope scope);
+#endif //cl_khr_fp64
+void __ovld atomic_store_explicit(volatile __global atomic_long *object, long desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __local atomic_long *object, long desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __global atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
+void __ovld atomic_store_explicit(volatile __local atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
// atomic_load()
#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_load(volatile atomic_int *object);
uint __ovld atomic_load(volatile atomic_uint *object);
float __ovld atomic_load(volatile atomic_float *object);
@@ -13465,9 +14125,29 @@ double __ovld atomic_load(volatile atomic_double *object);
long __ovld atomic_load(volatile atomic_long *object);
ulong __ovld atomic_load(volatile atomic_ulong *object);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_load(volatile __global atomic_int *object);
+int __ovld atomic_load(volatile __local atomic_int *object);
+uint __ovld atomic_load(volatile __global atomic_uint *object);
+uint __ovld atomic_load(volatile __local atomic_uint *object);
+float __ovld atomic_load(volatile __global atomic_float *object);
+float __ovld atomic_load(volatile __local atomic_float *object);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_load(volatile __global atomic_double *object);
+double __ovld atomic_load(volatile __local atomic_double *object);
+#endif //cl_khr_fp64
+long __ovld atomic_load(volatile __global atomic_long *object);
+long __ovld atomic_load(volatile __local atomic_long *object);
+ulong __ovld atomic_load(volatile __global atomic_ulong *object);
+ulong __ovld atomic_load(volatile __local atomic_ulong *object);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
#if defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);
uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);
float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);
@@ -13478,8 +14158,28 @@ double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order
long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);
ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_load_explicit(volatile __global atomic_int *object, memory_order order);
+int __ovld atomic_load_explicit(volatile __local atomic_int *object, memory_order order);
+uint __ovld atomic_load_explicit(volatile __global atomic_uint *object, memory_order order);
+uint __ovld atomic_load_explicit(volatile __local atomic_uint *object, memory_order order);
+float __ovld atomic_load_explicit(volatile __global atomic_float *object, memory_order order);
+float __ovld atomic_load_explicit(volatile __local atomic_float *object, memory_order order);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_load_explicit(volatile __global atomic_double *object, memory_order order);
+double __ovld atomic_load_explicit(volatile __local atomic_double *object, memory_order order);
+#endif //cl_khr_fp64
+long __ovld atomic_load_explicit(volatile __global atomic_long *object, memory_order order);
+long __ovld atomic_load_explicit(volatile __local atomic_long *object, memory_order order);
+ulong __ovld atomic_load_explicit(volatile __global atomic_ulong *object, memory_order order);
+ulong __ovld atomic_load_explicit(volatile __local atomic_ulong *object, memory_order order);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);
uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);
float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);
@@ -13490,10 +14190,30 @@ double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order
long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);
ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_load_explicit(volatile __global atomic_int *object, memory_order order, memory_scope scope);
+int __ovld atomic_load_explicit(volatile __local atomic_int *object, memory_order order, memory_scope scope);
+uint __ovld atomic_load_explicit(volatile __global atomic_uint *object, memory_order order, memory_scope scope);
+uint __ovld atomic_load_explicit(volatile __local atomic_uint *object, memory_order order, memory_scope scope);
+float __ovld atomic_load_explicit(volatile __global atomic_float *object, memory_order order, memory_scope scope);
+float __ovld atomic_load_explicit(volatile __local atomic_float *object, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_load_explicit(volatile __global atomic_double *object, memory_order order, memory_scope scope);
+double __ovld atomic_load_explicit(volatile __local atomic_double *object, memory_order order, memory_scope scope);
+#endif
+long __ovld atomic_load_explicit(volatile __global atomic_long *object, memory_order order, memory_scope scope);
+long __ovld atomic_load_explicit(volatile __local atomic_long *object, memory_order order, memory_scope scope);
+ulong __ovld atomic_load_explicit(volatile __global atomic_ulong *object, memory_order order, memory_scope scope);
+ulong __ovld atomic_load_explicit(volatile __local atomic_ulong *object, memory_order order, memory_scope scope);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
// atomic_exchange()
#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_exchange(volatile atomic_int *object, int desired);
uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);
float __ovld atomic_exchange(volatile atomic_float *object, float desired);
@@ -13504,9 +14224,29 @@ double __ovld atomic_exchange(volatile atomic_double *object, double desired);
long __ovld atomic_exchange(volatile atomic_long *object, long desired);
ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_exchange(volatile __global atomic_int *object, int desired);
+int __ovld atomic_exchange(volatile __local atomic_int *object, int desired);
+uint __ovld atomic_exchange(volatile __global atomic_uint *object, uint desired);
+uint __ovld atomic_exchange(volatile __local atomic_uint *object, uint desired);
+float __ovld atomic_exchange(volatile __global atomic_float *object, float desired);
+float __ovld atomic_exchange(volatile __local atomic_float *object, float desired);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_exchange(volatile __global atomic_double *object, double desired);
+double __ovld atomic_exchange(volatile __local atomic_double *object, double desired);
+#endif //cl_khr_fp64
+long __ovld atomic_exchange(volatile __global atomic_long *object, long desired);
+long __ovld atomic_exchange(volatile __local atomic_long *object, long desired);
+ulong __ovld atomic_exchange(volatile __global atomic_ulong *object, ulong desired);
+ulong __ovld atomic_exchange(volatile __local atomic_ulong *object, ulong desired);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
#if defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);
uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);
float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);
@@ -13517,8 +14257,28 @@ double __ovld atomic_exchange_explicit(volatile atomic_double *object, double de
long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);
ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_exchange_explicit(volatile __global atomic_int *object, int desired, memory_order order);
+int __ovld atomic_exchange_explicit(volatile __local atomic_int *object, int desired, memory_order order);
+uint __ovld atomic_exchange_explicit(volatile __global atomic_uint *object, uint desired, memory_order order);
+uint __ovld atomic_exchange_explicit(volatile __local atomic_uint *object, uint desired, memory_order order);
+float __ovld atomic_exchange_explicit(volatile __global atomic_float *object, float desired, memory_order order);
+float __ovld atomic_exchange_explicit(volatile __local atomic_float *object, float desired, memory_order order);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_exchange_explicit(volatile __global atomic_double *object, double desired, memory_order order);
+double __ovld atomic_exchange_explicit(volatile __local atomic_double *object, double desired, memory_order order);
+#endif //cl_khr_fp64
+long __ovld atomic_exchange_explicit(volatile __global atomic_long *object, long desired, memory_order order);
+long __ovld atomic_exchange_explicit(volatile __local atomic_long *object, long desired, memory_order order);
+ulong __ovld atomic_exchange_explicit(volatile __global atomic_ulong *object, ulong desired, memory_order order);
+ulong __ovld atomic_exchange_explicit(volatile __local atomic_ulong *object, ulong desired, memory_order order);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)wi
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
+#if defined(__opencl_c_generic_address_space)
int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);
uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);
float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);
@@ -13529,16 +14289,35 @@ double __ovld atomic_exchange_explicit(volatile atomic_double *object, double de
long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);
ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+int __ovld atomic_exchange_explicit(volatile __global atomic_int *object, int desired, memory_order order, memory_scope scope);
+int __ovld atomic_exchange_explicit(volatile __local atomic_int *object, int desired, memory_order order, memory_scope scope);
+uint __ovld atomic_exchange_explicit(volatile __global atomic_uint *object, uint desired, memory_order order, memory_scope scope);
+uint __ovld atomic_exchange_explicit(volatile __local atomic_uint *object, uint desired, memory_order order, memory_scope scope);
+float __ovld atomic_exchange_explicit(volatile __global atomic_float *object, float desired, memory_order order, memory_scope scope);
+float __ovld atomic_exchange_explicit(volatile __local atomic_float *object, float desired, memory_order order, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+double __ovld atomic_exchange_explicit(volatile __global atomic_double *object, double desired, memory_order order, memory_scope scope);
+double __ovld atomic_exchange_explicit(volatile __local atomic_double *object, double desired, memory_order order, memory_scope scope);
+#endif //cl_khr_fp64
+long __ovld atomic_exchange_explicit(volatile __global atomic_long *object, long desired, memory_order order, memory_scope scope);
+long __ovld atomic_exchange_explicit(volatile __local atomic_long *object, long desired, memory_order order, memory_scope scope);
+ulong __ovld atomic_exchange_explicit(volatile __global atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
+ulong __ovld atomic_exchange_explicit(volatile __local atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
// atomic_compare_exchange_strong() and atomic_compare_exchange_weak()
#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);
bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);
bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);
bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);
bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);
bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);
-
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
#ifdef cl_khr_fp64
bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);
@@ -13549,8 +14328,88 @@ bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *exp
bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);
bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *object, __global int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *object, __local int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_int *object, __private int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *object, __global int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *object, __local int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_int *object, __private int *expected, int desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *object, __global uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *object, __local uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_uint *object, __private uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *object, __global uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *object, __local uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_uint *object, __private uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *object, __global float *expected, float desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *object, __local float *expected, float desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_float *object, __private float *expected, float desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *object, __global float *expected, float desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *object, __local float *expected, float desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_float *object, __private float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *object, __global int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *object, __local int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_int *object, __private int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *object, __global int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *object, __local int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_int *object, __private int *expected, int desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *object, __global uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *object, __local uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_uint *object, __private uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *object, __global uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *object, __local uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_uint *object, __private uint *expected, uint desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *object, __global float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *object, __local float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_float *object, __private float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *object, __global float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *object, __local float *expected, float desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_float *object, __private float *expected, float desired);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *object, __global double *expected, double desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *object, __local double *expected, double desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_double *object, __private double *expected, double desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *object, __global double *expected, double desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *object, __local double *expected, double desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_double *object, __private double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *object, __global double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *object, __local double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_double *object, __private double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *object, __global double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *object, __local double *expected, double desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_double *object, __private double *expected, double desired);
+#endif //cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *object, __global long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *object, __local long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_long *object, __private long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *object, __global long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *object, __local long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_long *object, __private long *expected, long desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *object, __global ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *object, __local ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_strong(volatile __global atomic_ulong *object, __private ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *object, __global ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *object, __local ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_strong(volatile __local atomic_ulong *object, __private ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *object, __global long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *object, __local long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_long *object, __private long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *object, __global long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *object, __local long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_long *object, __private long *expected, long desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *object, __global ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *object, __local ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak(volatile __global atomic_ulong *object, __private ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *object, __global ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *object, __local ulong *expected, ulong desired);
+bool __ovld atomic_compare_exchange_weak(volatile __local atomic_ulong *object, __private ulong *expected, ulong desired);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
+#if defined(__opencl_c_generic_address_space)
bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,
int desired, memory_order success, memory_order failure);
bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,
@@ -13579,7 +14438,159 @@ bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *objec
bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,
ulong desired, memory_order success, memory_order failure);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure);
+#endif //cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+#if defined(__opencl_c_generic_address_space)
bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,
int desired, memory_order success, memory_order failure, memory_scope scope);
bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,
@@ -13608,20 +14619,195 @@ bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *objec
bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,
ulong desired, memory_order success, memory_order failure, memory_scope scope);
#endif
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *object, __global int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *object, __local int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_int *object, __private int *expected,
+ int desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *object, __global uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *object, __local uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_uint *object, __private uint *expected,
+ uint desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *object, __global float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *object, __local float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_float *object, __private float *expected,
+ float desired, memory_order success, memory_order failure, memory_scope scope);
+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#ifdef cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *object, __global double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *object, __local double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_double *object, __private double *expected,
+ double desired, memory_order success, memory_order failure, memory_scope scope);
+#endif //cl_khr_fp64
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __global atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_strong_explicit(volatile __local atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *object, __global long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *object, __local long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_long *object, __private long *expected,
+ long desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __global atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *object, __global ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *object, __local ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+bool __ovld atomic_compare_exchange_weak_explicit(volatile __local atomic_ulong *object, __private ulong *expected,
+ ulong desired, memory_order success, memory_order failure, memory_scope scope);
+#endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
// atomic_flag_test_and_set() and atomic_flag_clear()
#if defined(__opencl_c_atomic_order_seq_cst) && defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);
void __ovld atomic_flag_clear(volatile atomic_flag *object);
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+bool __ovld atomic_flag_test_and_set(volatile __global atomic_flag *object);
+bool __ovld atomic_flag_test_and_set(volatile __local atomic_flag *object);
+void __ovld atomic_flag_clear(volatile __global atomic_flag *object);
+void __ovld atomic_flag_clear(volatile __local atomic_flag *object);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
#if defined(__opencl_c_atomic_scope_device)
+#if defined(__opencl_c_generic_address_space)
bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);
void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+bool __ovld atomic_flag_test_and_set_explicit(volatile __global atomic_flag *object, memory_order order);
+bool __ovld atomic_flag_test_and_set_explicit(volatile __local atomic_flag *object, memory_order order);
+void __ovld atomic_flag_clear_explicit(volatile __global atomic_flag *object, memory_order order);
+void __ovld atomic_flag_clear_explicit(volatile __local atomic_flag *object, memory_order order);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif
+#if defined(__opencl_c_generic_address_space)
bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
+#endif //defined(__opencl_c_generic_address_space)
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+bool __ovld atomic_flag_test_and_set_explicit(volatile __global atomic_flag *object, memory_order order, memory_scope scope);
+bool __ovld atomic_flag_test_and_set_explicit(volatile __local atomic_flag *object, memory_order order, memory_scope scope);
+void __ovld atomic_flag_clear_explicit(volatile __global atomic_flag *object, memory_order order, memory_scope scope);
+void __ovld atomic_flag_clear_explicit(volatile __local atomic_flag *object, memory_order order, memory_scope scope);
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions
@@ -15155,7 +16341,7 @@ int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
// OpenCL v2.0 s6.13.15 - Work-group Functions
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_work_group_collective_functions)
int __ovld __conv work_group_all(int predicate);
int __ovld __conv work_group_any(int predicate);
@@ -15253,12 +16439,12 @@ double __ovld __conv work_group_scan_inclusive_min(double x);
double __ovld __conv work_group_scan_inclusive_max(double x);
#endif //cl_khr_fp64
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_work_group_collective_functions)
// OpenCL v2.0 s6.13.16 - Pipe Functions
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_pipes)
bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_pipes)
// OpenCL v2.0 s6.13.17 - Enqueue Kernels
@@ -15295,7 +16481,7 @@ queue_t __ovld get_default_queue(void);
// OpenCL Extension v2.0 s9.17 - Sub-groups
-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || defined(__opencl_c_subgroups)
// Shared Sub Group Functions
uint __ovld get_sub_group_size(void);
uint __ovld get_max_sub_group_size(void);
@@ -15394,7 +16580,7 @@ double __ovld __conv sub_group_scan_inclusive_min(double x);
double __ovld __conv sub_group_scan_inclusive_max(double x);
#endif //cl_khr_fp64
-#endif //cl_khr_subgroups cl_intel_subgroups
+#endif //cl_khr_subgroups cl_intel_subgroups __opencl_c_subgroups
#if defined(cl_khr_subgroup_extended_types)
char __ovld __conv sub_group_broadcast( char value, uint index );
diff --git a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
index 953857badfc4..279fb26fbaf7 100644
--- a/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ b/clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -14,13 +14,13 @@
#error "This file is for OpenMP compilation only."
#endif
-#pragma omp begin declare variant match( \
- device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
-
#ifdef __cplusplus
extern "C" {
#endif
+#pragma omp begin declare variant match( \
+ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
+
#define __CUDA__
#define __OPENMP_NVPTX__
@@ -33,11 +33,33 @@ extern "C" {
#undef __OPENMP_NVPTX__
#undef __CUDA__
-#ifdef __cplusplus
-} // extern "C"
+#pragma omp end declare variant
+
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+// Import types which will be used by __clang_hip_libdevice_declares.h
+#ifndef __cplusplus
+#include <stdbool.h>
+#include <stdint.h>
#endif
+#define __OPENMP_AMDGCN__
+#pragma push_macro("__device__")
+#define __device__
+
+/// Include declarations for libdevice functions.
+#include <__clang_hip_libdevice_declares.h>
+
+#pragma pop_macro("__device__")
+#undef __OPENMP_AMDGCN__
+
#pragma omp end declare variant
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
// need to `include <new>` in C++ mode.
diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath
index 1aff66af7d52..22a720aca956 100644
--- a/clang/lib/Headers/openmp_wrappers/cmath
+++ b/clang/lib/Headers/openmp_wrappers/cmath
@@ -75,4 +75,58 @@ __DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
#pragma omp end declare variant
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#pragma push_macro("__constant__")
+#define __constant__ __attribute__((constant))
+#define __OPENMP_AMDGCN__
+
+#include <__clang_hip_cmath.h>
+
+#pragma pop_macro("__constant__")
+#undef __OPENMP_AMDGCN__
+
+// Define overloads otherwise which are absent
+#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
+
+__DEVICE__ float acos(float __x) { return ::acosf(__x); }
+__DEVICE__ float acosh(float __x) { return ::acoshf(__x); }
+__DEVICE__ float asin(float __x) { return ::asinf(__x); }
+__DEVICE__ float asinh(float __x) { return ::asinhf(__x); }
+__DEVICE__ float atan(float __x) { return ::atanf(__x); }
+__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
+__DEVICE__ float atanh(float __x) { return ::atanhf(__x); }
+__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); }
+__DEVICE__ float cosh(float __x) { return ::coshf(__x); }
+__DEVICE__ float erf(float __x) { return ::erff(__x); }
+__DEVICE__ float erfc(float __x) { return ::erfcf(__x); }
+__DEVICE__ float exp2(float __x) { return ::exp2f(__x); }
+__DEVICE__ float expm1(float __x) { return ::expm1f(__x); }
+__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); }
+__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); }
+__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); }
+__DEVICE__ float ldexp(float __arg, int __exp) {
+ return ::ldexpf(__arg, __exp);
+}
+__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); }
+__DEVICE__ float log1p(float __x) { return ::log1pf(__x); }
+__DEVICE__ float logb(float __x) { return ::logbf(__x); }
+__DEVICE__ float nextafter(float __x, float __y) {
+ return ::nextafterf(__x, __y);
+}
+__DEVICE__ float remainder(float __x, float __y) {
+ return ::remainderf(__x, __y);
+}
+__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); }
+__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
+__DEVICE__ float tan(float __x) { return ::tanf(__x); }
+__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
+__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); }
+
+#undef __DEVICE__
+
+#pragma omp end declare variant
+#endif // __AMDGCN__
+
#endif
diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex
index dfd6193c97cb..1ceecc1af8ae 100644
--- a/clang/lib/Headers/openmp_wrappers/complex
+++ b/clang/lib/Headers/openmp_wrappers/complex
@@ -17,9 +17,18 @@
// We require std::math functions in the complex builtins below.
#include <cmath>
+#ifdef __NVPTX__
#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_NVPTX__
+#endif // __NVPTX__
+
+#ifdef __AMDGCN__
+#define __OPENMP_AMDGCN__
+#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_AMDGCN__
+#endif // __AMDGCN__
+
#endif
// Grab the host header too.
@@ -36,11 +45,11 @@
#ifndef _LIBCPP_STD_VER
#pragma omp begin declare variant match( \
- device = {arch(nvptx, nvptx64)}, \
+ device = {arch(amdgcn, nvptx, nvptx64)}, \
implementation = {extension(match_any, allow_templates)})
#include <complex_cmath.h>
#pragma omp end declare variant
-#endif
+#endif // _LIBCPP_STD_VER
diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h
index 15dc415b8126..7e7c0866426b 100644
--- a/clang/lib/Headers/openmp_wrappers/complex.h
+++ b/clang/lib/Headers/openmp_wrappers/complex.h
@@ -17,10 +17,19 @@
// We require math functions in the complex builtins below.
#include <math.h>
+#ifdef __NVPTX__
#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
#undef __OPENMP_NVPTX__
#endif
+#ifdef __AMDGCN__
+#define __OPENMP_AMDGCN__
+#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_AMDGCN__
+#endif
+
+#endif
+
// Grab the host header too.
#include_next <complex.h>
diff --git a/clang/lib/Headers/openmp_wrappers/math.h b/clang/lib/Headers/openmp_wrappers/math.h
index c64af8b13ece..1e3c07cfdb8c 100644
--- a/clang/lib/Headers/openmp_wrappers/math.h
+++ b/clang/lib/Headers/openmp_wrappers/math.h
@@ -48,4 +48,14 @@
#pragma omp end declare variant
+#ifdef __AMDGCN__
+#pragma omp begin declare variant match(device = {arch(amdgcn)})
+
+#define __OPENMP_AMDGCN__
+#include <__clang_hip_math.h>
+#undef __OPENMP_AMDGCN__
+
+#pragma omp end declare variant
+#endif
+
#endif
diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h
index a83b2eb6d8e2..eda83567cd05 100644
--- a/clang/lib/Headers/pmmintrin.h
+++ b/clang/lib/Headers/pmmintrin.h
@@ -10,6 +10,10 @@
#ifndef __PMMINTRIN_H
#define __PMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <emmintrin.h>
/* Define the default attributes for the functions in this file. */
diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h
index 64f0c761994d..f41264b27584 100644
--- a/clang/lib/Headers/ppc_wrappers/smmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h
@@ -32,7 +32,7 @@
#if defined(__linux__) && defined(__ppc64__)
#include <altivec.h>
-#include <emmintrin.h>
+#include <tmmintrin.h>
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/clang/lib/Headers/prfchwintrin.h b/clang/lib/Headers/prfchwintrin.h
index 6e8a4ef2ec97..d2f91aa0123e 100644
--- a/clang/lib/Headers/prfchwintrin.h
+++ b/clang/lib/Headers/prfchwintrin.h
@@ -47,9 +47,12 @@ _m_prefetch(void *__P)
/// \param __P
/// A pointer specifying the memory address to be prefetched.
static __inline__ void __attribute__((__always_inline__, __nodebug__))
-_m_prefetchw(void *__P)
+_m_prefetchw(volatile const void *__P)
{
- __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+ __builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */);
+#pragma clang diagnostic pop
}
#endif /* __PRFCHWINTRIN_H */
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 025830a74280..710e55aaa120 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -10,6 +10,10 @@
#ifndef __SMMINTRIN_H
#define __SMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <tmmintrin.h>
/* Define the default attributes for the functions in this file. */
@@ -231,7 +235,7 @@
/// 11: Truncated
/// \returns A 128-bit vector of [4 x float] containing the rounded values.
#define _mm_round_ps(X, M) \
- (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M))
+ ((__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)))
/// Copies three upper elements of the first 128-bit vector operand to
/// the corresponding three upper elements of the 128-bit result vector of
@@ -272,8 +276,8 @@
/// \returns A 128-bit vector of [4 x float] containing the copied and rounded
/// values.
#define _mm_round_ss(X, Y, M) \
- (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M))
+ ((__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)))
/// Rounds each element of the 128-bit vector of [2 x double] to an
/// integer value according to the rounding control specified by the second
@@ -306,7 +310,7 @@
/// 11: Truncated
/// \returns A 128-bit vector of [2 x double] containing the rounded values.
#define _mm_round_pd(X, M) \
- (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M))
+ ((__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)))
/// Copies the upper element of the first 128-bit vector operand to the
/// corresponding upper element of the 128-bit result vector of [2 x double].
@@ -347,8 +351,8 @@
/// \returns A 128-bit vector of [2 x double] containing the copied and rounded
/// values.
#define _mm_round_sd(X, Y, M) \
- (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (M))
+ ((__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)))
/* SSE4 Packed Blending Intrinsics. */
/// Returns a 128-bit vector of [2 x double] where the values are
@@ -376,8 +380,8 @@
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_blend_pd(V1, V2, M) \
- (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
- (__v2df)(__m128d)(V2), (int)(M))
+ ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M)))
/// Returns a 128-bit vector of [4 x float] where the values are selected
/// from either the first or second operand as specified by the third
@@ -404,8 +408,8 @@
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_blend_ps(V1, V2, M) \
- (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
- (__v4sf)(__m128)(V2), (int)(M))
+ ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M)))
/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
@@ -513,8 +517,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [8 x i16] containing the copied values.
#define _mm_blend_epi16(V1, V2, M) \
- (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
- (__v8hi)(__m128i)(V2), (int)(M))
+ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
+ (__v8hi)(__m128i)(V2), (int)(M)))
/* SSE4 Dword Multiply Instructions. */
/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
@@ -590,8 +594,8 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// in the corresponding element; otherwise that element is set to zero.
/// \returns A 128-bit vector of [4 x float] containing the dot product.
#define _mm_dp_ps(X, Y, M) \
- (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (M))
+ ((__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (M)))
/// Computes the dot product of the two 128-bit vectors of [2 x double]
/// and returns it in the elements of the 128-bit result vector of
@@ -625,8 +629,8 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
/// each [2 x double] vector. If a bit is set, the dot product is returned in
/// the corresponding element; otherwise that element is set to zero.
#define _mm_dp_pd(X, Y, M) \
- (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (M))
+ ((__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (M)))
/* SSE4 Streaming Load Hint Instruction. */
/// Loads integer values from a 128-bit aligned memory location to a
@@ -865,15 +869,13 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 10: Bits [95:64] of parameter \a X are returned. \n
/// 11: Bits [127:96] of parameter \a X are returned.
/// \returns A 32-bit integer containing the extracted 32 bits of float data.
-#define _mm_extract_ps(X, N) (__extension__ \
- ({ union { int __i; float __f; } __t; \
- __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \
- __t.__i;}))
+#define _mm_extract_ps(X, N) \
+ __builtin_bit_cast(int, __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)))
/* Miscellaneous insert and extract macros. */
/* Extract a single-precision float from X at index N into D. */
#define _MM_EXTRACT_FLOAT(D, X, N) \
- { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); }
+ do { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); } while (0)
/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
an index suitable for _mm_insert_ps. */
@@ -925,8 +927,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1111: Bits [127:120] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi8(X, I, N) \
- (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
- (int)(I), (int)(N))
+ ((__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \
+ (int)(I), (int)(N)))
/// Constructs a 128-bit vector of [4 x i32] by first making a copy of
/// the 128-bit integer vector parameter, and then inserting the 32-bit
@@ -957,8 +959,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 11: Bits [127:96] of the result are used for insertion.
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi32(X, I, N) \
- (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
- (int)(I), (int)(N))
+ ((__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \
+ (int)(I), (int)(N)))
#ifdef __x86_64__
/// Constructs a 128-bit vector of [2 x i64] by first making a copy of
@@ -988,8 +990,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1: Bits [127:64] of the result are used for insertion. \n
/// \returns A 128-bit integer vector containing the constructed values.
#define _mm_insert_epi64(X, I, N) \
- (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
- (long long)(I), (int)(N))
+ ((__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \
+ (long long)(I), (int)(N)))
#endif /* __x86_64__ */
/* Extract int from packed integer array at index. This returns the element
@@ -1031,8 +1033,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 128-bit integer vector parameter and the remaining bits are assigned
/// zeros.
#define _mm_extract_epi8(X, N) \
- (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
- (int)(N))
+ ((int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \
+ (int)(N)))
/// Extracts a 32-bit element from the 128-bit integer vector of
/// [4 x i32], using the immediate value parameter \a N as a selector.
@@ -1057,7 +1059,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// \returns An integer, whose lower 32 bits are selected from the 128-bit
/// integer vector parameter and the remaining bits are assigned zeros.
#define _mm_extract_epi32(X, N) \
- (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N))
+ ((int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N)))
#ifdef __x86_64__
/// Extracts a 64-bit element from the 128-bit integer vector of
@@ -1080,7 +1082,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
/// 1: Bits [127:64] are returned. \n
/// \returns A 64-bit integer.
#define _mm_extract_epi64(X, N) \
- (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N))
+ ((long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N)))
#endif /* __x86_64 */
/* SSE4 128-bit Packed Integer Comparisons. */
@@ -1514,8 +1516,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
/// \returns A 128-bit integer vector containing the sums of the sets of
/// absolute differences between both operands.
#define _mm_mpsadbw_epu8(X, Y, M) \
- (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
- (__v16qi)(__m128i)(Y), (M))
+ ((__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (M)))
/// Finds the minimum unsigned 16-bit element in the input 128-bit
/// vector of [8 x u16] and returns it and along with its index.
@@ -1624,8 +1626,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns a 128-bit integer vector representing the result mask of
/// the comparison.
#define _mm_cmpistrm(A, B, M) \
- (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1678,8 +1680,8 @@ _mm_minpos_epu16(__m128i __V)
/// 1: The index of the most significant set bit. \n
/// \returns Returns an integer representing the result index of the comparison.
#define _mm_cmpistri(A, B, M) \
- (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -1738,9 +1740,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns a 128-bit integer vector representing the result mask of
/// the comparison.
#define _mm_cmpestrm(A, LA, B, LB, M) \
- (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -1797,9 +1799,9 @@ _mm_minpos_epu16(__m128i __V)
/// 1: The index of the most significant set bit. \n
/// \returns Returns an integer representing the result index of the comparison.
#define _mm_cmpestri(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */
/// Uses the immediate operand \a M to perform a comparison of string
@@ -1849,8 +1851,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the bit mask is zero and the length of the string in
/// \a B is the maximum; otherwise, returns 0.
#define _mm_cmpistra(A, B, M) \
- (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1898,8 +1900,8 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B.
/// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0.
#define _mm_cmpistrc(A, B, M) \
- (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1946,8 +1948,8 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B. \n
/// \returns Returns bit 0 of the resulting bit mask.
#define _mm_cmpistro(A, B, M) \
- (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -1996,8 +1998,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a A is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpistrs(A, B, M) \
- (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with implicitly defined lengths that is contained in source operands
@@ -2046,8 +2048,8 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a B is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpistrz(A, B, M) \
- (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (int)(M))
+ ((int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2100,9 +2102,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the bit mask is zero and the length of the string in
/// \a B is the maximum, otherwise, returns 0.
#define _mm_cmpestra(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2154,9 +2156,9 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B. \n
/// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0.
#define _mm_cmpestrc(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2207,9 +2209,9 @@ _mm_minpos_epu16(__m128i __V)
/// to the size of \a A or \a B.
/// \returns Returns bit 0 of the resulting bit mask.
#define _mm_cmpestro(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2262,9 +2264,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a A is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpestrs(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/// Uses the immediate operand \a M to perform a comparison of string
/// data with explicitly defined lengths that is contained in source operands
@@ -2316,9 +2318,9 @@ _mm_minpos_epu16(__m128i __V)
/// \returns Returns 1 if the length of the string in \a B is less than the
/// maximum, otherwise, returns 0.
#define _mm_cmpestrz(A, LA, B, LB, M) \
- (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
- (__v16qi)(__m128i)(B), (int)(LB), \
- (int)(M))
+ ((int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
+ (__v16qi)(__m128i)(B), (int)(LB), \
+ (int)(M)))
/* SSE4.2 Compare Packed Data -- Greater Than. */
/// Compares each of the corresponding 64-bit values of the 128-bit
@@ -2340,91 +2342,10 @@ _mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
return (__m128i)((__v2di)__V1 > (__v2di)__V2);
}
-/* SSE4.2 Accumulate CRC32. */
-/// Adds the unsigned integer operand to the CRC-32C checksum of the
-/// unsigned char operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32B </c> instruction.
-///
-/// \param __C
-/// An unsigned integer operand to add to the CRC-32C checksum of operand
-/// \a __D.
-/// \param __D
-/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-/// operand \a __D.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mm_crc32_u8(unsigned int __C, unsigned char __D)
-{
- return __builtin_ia32_crc32qi(__C, __D);
-}
-
-/// Adds the unsigned integer operand to the CRC-32C checksum of the
-/// unsigned short operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32W </c> instruction.
-///
-/// \param __C
-/// An unsigned integer operand to add to the CRC-32C checksum of operand
-/// \a __D.
-/// \param __D
-/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-/// operand \a __D.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mm_crc32_u16(unsigned int __C, unsigned short __D)
-{
- return __builtin_ia32_crc32hi(__C, __D);
-}
-
-/// Adds the first unsigned integer operand to the CRC-32C checksum of
-/// the second unsigned integer operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32L </c> instruction.
-///
-/// \param __C
-/// An unsigned integer operand to add to the CRC-32C checksum of operand
-/// \a __D.
-/// \param __D
-/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-/// operand \a __D.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mm_crc32_u32(unsigned int __C, unsigned int __D)
-{
- return __builtin_ia32_crc32si(__C, __D);
-}
-
-#ifdef __x86_64__
-/// Adds the unsigned integer operand to the CRC-32C checksum of the
-/// unsigned 64-bit integer operand.
-///
-/// \headerfile <x86intrin.h>
-///
-/// This intrinsic corresponds to the <c> CRC32Q </c> instruction.
-///
-/// \param __C
-/// An unsigned integer operand to add to the CRC-32C checksum of operand
-/// \a __D.
-/// \param __D
-/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
-/// \returns The result of adding operand \a __C to the CRC-32C checksum of
-/// operand \a __D.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
-_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
-{
- return __builtin_ia32_crc32di(__C, __D);
-}
-#endif /* __x86_64__ */
-
#undef __DEFAULT_FN_ATTRS
#include <popcntintrin.h>
+#include <crc32intrin.h>
+
#endif /* __SMMINTRIN_H */
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index 35533e115c7d..bcffa8187801 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -10,6 +10,10 @@
#ifndef __TMMINTRIN_H
#define __TMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <pmmintrin.h>
/* Define the default attributes for the functions in this file. */
@@ -145,8 +149,8 @@ _mm_abs_epi32(__m128i __a)
/// \returns A 128-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_epi8(a, b, n) \
- (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
- (__v16qi)(__m128i)(b), (n))
+ ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+ (__v16qi)(__m128i)(b), (n)))
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
@@ -168,7 +172,7 @@ _mm_abs_epi32(__m128i __a)
/// \returns A 64-bit integer vector containing the concatenated right-shifted
/// value.
#define _mm_alignr_pi8(a, b, n) \
- (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
+ ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
diff --git a/clang/lib/Headers/vpclmulqdqintrin.h b/clang/lib/Headers/vpclmulqdqintrin.h
index 44daadb07d57..485692ea2b5b 100644
--- a/clang/lib/Headers/vpclmulqdqintrin.h
+++ b/clang/lib/Headers/vpclmulqdqintrin.h
@@ -15,15 +15,15 @@
#define __VPCLMULQDQINTRIN_H
#define _mm256_clmulepi64_epi128(A, B, I) \
- (__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (char)(I))
+ ((__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ (char)(I)))
#ifdef __AVX512FINTRIN_H
#define _mm512_clmulepi64_epi128(A, B, I) \
- (__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), \
- (char)(I))
+ ((__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ (char)(I)))
#endif // __AVX512FINTRIN_H
#endif /* __VPCLMULQDQINTRIN_H */
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 712fa0378098..3889a2769faf 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -277,11 +277,27 @@ wasm_i8x16_make(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3, int8_t __c4,
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u8x16_make(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3,
+ uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7,
+ uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11,
+ uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15) {
+ return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5,
+ __c6, __c7, __c8, __c9, __c10, __c11,
+ __c12, __c13, __c14, __c15};
+}
+
+static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_make(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3,
int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7) {
return (v128_t)(__i16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u16x8_make(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3,
+ uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7) {
+ return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t __c0,
int32_t __c1,
int32_t __c2,
@@ -289,11 +305,23 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t __c0,
return (v128_t)(__i32x4){__c0, __c1, __c2, __c3};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_make(uint32_t __c0,
+ uint32_t __c1,
+ uint32_t __c2,
+ uint32_t __c3) {
+ return (v128_t)(__u32x4){__c0, __c1, __c2, __c3};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t __c0,
int64_t __c1) {
return (v128_t)(__i64x2){__c0, __c1};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_make(uint64_t __c0,
+ uint64_t __c1) {
+ return (v128_t)(__u64x2){__c0, __c1};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float __c0,
float __c1,
float __c2,
@@ -325,6 +353,24 @@ wasm_i8x16_const(int8_t __c0, int8_t __c1, int8_t __c2, int8_t __c3,
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u8x16_const(uint8_t __c0, uint8_t __c1, uint8_t __c2, uint8_t __c3,
+ uint8_t __c4, uint8_t __c5, uint8_t __c6, uint8_t __c7,
+ uint8_t __c8, uint8_t __c9, uint8_t __c10, uint8_t __c11,
+ uint8_t __c12, uint8_t __c13, uint8_t __c14, uint8_t __c15)
+ __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2)
+ __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4)
+ __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6)
+ __REQUIRE_CONSTANT(__c7) __REQUIRE_CONSTANT(__c8)
+ __REQUIRE_CONSTANT(__c9) __REQUIRE_CONSTANT(__c10)
+ __REQUIRE_CONSTANT(__c11) __REQUIRE_CONSTANT(__c12)
+ __REQUIRE_CONSTANT(__c13) __REQUIRE_CONSTANT(__c14)
+ __REQUIRE_CONSTANT(__c15) {
+ return (v128_t)(__u8x16){__c0, __c1, __c2, __c3, __c4, __c5,
+ __c6, __c7, __c8, __c9, __c10, __c11,
+ __c12, __c13, __c14, __c15};
+}
+
+static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i16x8_const(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3,
int16_t __c4, int16_t __c5, int16_t __c6, int16_t __c7)
__REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2)
@@ -335,18 +381,41 @@ wasm_i16x8_const(int16_t __c0, int16_t __c1, int16_t __c2, int16_t __c3,
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u16x8_const(uint16_t __c0, uint16_t __c1, uint16_t __c2, uint16_t __c3,
+ uint16_t __c4, uint16_t __c5, uint16_t __c6, uint16_t __c7)
+ __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2)
+ __REQUIRE_CONSTANT(__c3) __REQUIRE_CONSTANT(__c4)
+ __REQUIRE_CONSTANT(__c5) __REQUIRE_CONSTANT(__c6)
+ __REQUIRE_CONSTANT(__c7) {
+ return (v128_t)(__u16x8){__c0, __c1, __c2, __c3, __c4, __c5, __c6, __c7};
+}
+
+static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_const(int32_t __c0, int32_t __c1, int32_t __c2, int32_t __c3)
__REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2)
__REQUIRE_CONSTANT(__c3) {
return (v128_t)(__i32x4){__c0, __c1, __c2, __c3};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u32x4_const(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3)
+ __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2)
+ __REQUIRE_CONSTANT(__c3) {
+ return (v128_t)(__u32x4){__c0, __c1, __c2, __c3};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const(int64_t __c0,
int64_t __c1)
__REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) {
return (v128_t)(__i64x2){__c0, __c1};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const(uint64_t __c0,
+ uint64_t __c1)
+ __REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) {
+ return (v128_t)(__u64x2){__c0, __c1};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_f32x4_const(float __c0, float __c1, float __c2, float __c3)
__REQUIRE_CONSTANT(__c0) __REQUIRE_CONSTANT(__c1) __REQUIRE_CONSTANT(__c2)
@@ -366,21 +435,42 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_const_splat(int8_t __c)
__c, __c, __c, __c, __c, __c, __c, __c};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_const_splat(uint8_t __c)
+ __REQUIRE_CONSTANT(__c) {
+ return (v128_t)(__u8x16){__c, __c, __c, __c, __c, __c, __c, __c,
+ __c, __c, __c, __c, __c, __c, __c, __c};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_const_splat(int16_t __c)
__REQUIRE_CONSTANT(__c) {
return (v128_t)(__i16x8){__c, __c, __c, __c, __c, __c, __c, __c};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_const_splat(uint16_t __c)
+ __REQUIRE_CONSTANT(__c) {
+ return (v128_t)(__u16x8){__c, __c, __c, __c, __c, __c, __c, __c};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_const_splat(int32_t __c)
__REQUIRE_CONSTANT(__c) {
return (v128_t)(__i32x4){__c, __c, __c, __c};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_const_splat(uint32_t __c)
+ __REQUIRE_CONSTANT(__c) {
+ return (v128_t)(__u32x4){__c, __c, __c, __c};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_const_splat(int64_t __c)
__REQUIRE_CONSTANT(__c) {
return (v128_t)(__i64x2){__c, __c};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_const_splat(uint64_t __c)
+ __REQUIRE_CONSTANT(__c) {
+ return (v128_t)(__u64x2){__c, __c};
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_const_splat(float __c)
__REQUIRE_CONSTANT(__c) {
return (v128_t)(__f32x4){__c, __c, __c, __c};
@@ -396,6 +486,11 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t __a) {
__a, __a, __a, __a, __a, __a, __a, __a};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_splat(uint8_t __a) {
+ return (v128_t)(__u8x16){__a, __a, __a, __a, __a, __a, __a, __a,
+ __a, __a, __a, __a, __a, __a, __a, __a};
+}
+
static __inline__ int8_t __DEFAULT_FN_ATTRS wasm_i8x16_extract_lane(v128_t __a,
int __i)
__REQUIRE_CONSTANT(__i) {
@@ -417,10 +512,23 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_replace_lane(v128_t __a,
return (v128_t)__v;
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_replace_lane(v128_t __a,
+ int __i,
+ uint8_t __b)
+ __REQUIRE_CONSTANT(__i) {
+ __u8x16 __v = (__u8x16)__a;
+ __v[__i] = __b;
+ return (v128_t)__v;
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t __a) {
return (v128_t)(__i16x8){__a, __a, __a, __a, __a, __a, __a, __a};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_splat(uint16_t __a) {
+ return (v128_t)(__u16x8){__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
static __inline__ int16_t __DEFAULT_FN_ATTRS wasm_i16x8_extract_lane(v128_t __a,
int __i)
__REQUIRE_CONSTANT(__i) {
@@ -441,16 +549,32 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_replace_lane(v128_t __a,
return (v128_t)__v;
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_replace_lane(
+ v128_t __a, int __i, uint16_t __b) __REQUIRE_CONSTANT(__i) {
+ __u16x8 __v = (__u16x8)__a;
+ __v[__i] = __b;
+ return (v128_t)__v;
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t __a) {
return (v128_t)(__i32x4){__a, __a, __a, __a};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_splat(uint32_t __a) {
+ return (v128_t)(__u32x4){__a, __a, __a, __a};
+}
+
static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_extract_lane(v128_t __a,
int __i)
__REQUIRE_CONSTANT(__i) {
return ((__i32x4)__a)[__i];
}
+static __inline__ uint32_t __DEFAULT_FN_ATTRS
+wasm_u32x4_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) {
+ return ((__u32x4)__a)[__i];
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a,
int __i,
int32_t __b)
@@ -460,16 +584,32 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_replace_lane(v128_t __a,
return (v128_t)__v;
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_replace_lane(
+ v128_t __a, int __i, uint32_t __b) __REQUIRE_CONSTANT(__i) {
+ __u32x4 __v = (__u32x4)__a;
+ __v[__i] = __b;
+ return (v128_t)__v;
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t __a) {
return (v128_t)(__i64x2){__a, __a};
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_splat(uint64_t __a) {
+ return (v128_t)(__u64x2){__a, __a};
+}
+
static __inline__ int64_t __DEFAULT_FN_ATTRS wasm_i64x2_extract_lane(v128_t __a,
int __i)
__REQUIRE_CONSTANT(__i) {
return ((__i64x2)__a)[__i];
}
+static __inline__ uint64_t __DEFAULT_FN_ATTRS
+wasm_u64x2_extract_lane(v128_t __a, int __i) __REQUIRE_CONSTANT(__i) {
+ return ((__u64x2)__a)[__i];
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a,
int __i,
int64_t __b)
@@ -479,6 +619,13 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_replace_lane(v128_t __a,
return (v128_t)__v;
}
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_replace_lane(
+ v128_t __a, int __i, uint64_t __b) __REQUIRE_CONSTANT(__i) {
+ __u64x2 __v = (__u64x2)__a;
+ __v[__i] = __b;
+ return (v128_t)__v;
+}
+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float __a) {
return (v128_t)(__f32x4){__a, __a, __a, __a};
}
@@ -804,7 +951,7 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t __a) {
return __builtin_wasm_all_true_i8x16((__i8x16)__a);
}
-static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) {
+static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) {
return __builtin_wasm_bitmask_i8x16((__i8x16)__a);
}
@@ -813,17 +960,17 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) {
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i8x16)__a << __b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i8x16)__a >> __b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__u8x16)__a >> __b);
}
@@ -894,22 +1041,22 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t __a) {
return __builtin_wasm_all_true_i16x8((__i16x8)__a);
}
-static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) {
+static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i16x8_bitmask(v128_t __a) {
return __builtin_wasm_bitmask_i16x8((__i16x8)__a);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i16x8)__a << __b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i16x8)__a >> __b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__u16x8)__a >> __b);
}
@@ -985,22 +1132,22 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t __a) {
return __builtin_wasm_all_true_i32x4((__i32x4)__a);
}
-static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) {
+static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i32x4_bitmask(v128_t __a) {
return __builtin_wasm_bitmask_i32x4((__i32x4)__a);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i32x4)__a << __b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i32x4)__a >> __b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__u32x4)__a >> __b);
}
@@ -1056,22 +1203,22 @@ static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t __a) {
return __builtin_wasm_all_true_i64x2((__i64x2)__a);
}
-static __inline__ int32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) {
+static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i64x2_bitmask(v128_t __a) {
return __builtin_wasm_bitmask_i64x2((__i64x2)__a);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i64x2)__a << (int64_t)__b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__i64x2)__a >> (int64_t)__b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t __a,
- int32_t __b) {
+ uint32_t __b) {
return (v128_t)((__u64x2)__a >> (int64_t)__b);
}
@@ -1150,14 +1297,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a,
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a,
v128_t __b) {
- __i32x4 __mask = (__i32x4)((__f32x4)__b < (__f32x4)__a);
- return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask));
+ return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a,
v128_t __b) {
- __i32x4 __mask = (__i32x4)((__f32x4)__a < (__f32x4)__b);
- return (v128_t)((((__i32x4)__b) & __mask) | (((__i32x4)__a) & ~__mask));
+ return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) {
@@ -1220,14 +1365,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a,
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a,
v128_t __b) {
- __i64x2 __mask = (__i64x2)((__f64x2)__b < (__f64x2)__a);
- return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask));
+ return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a,
v128_t __b) {
- __i64x2 __mask = (__i64x2)((__f64x2)__a < (__f64x2)__b);
- return (v128_t)((((__i64x2)__b) & __mask) | (((__i64x2)__a) & ~__mask));
+ return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/wmmintrin.h b/clang/lib/Headers/wmmintrin.h
index f932ca81089c..49148dbf3ac6 100644
--- a/clang/lib/Headers/wmmintrin.h
+++ b/clang/lib/Headers/wmmintrin.h
@@ -10,6 +10,10 @@
#ifndef __WMMINTRIN_H
#define __WMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <emmintrin.h>
#include <__wmmintrin_aes.h>
diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h
index 1fc6cab4b28f..01e741f1eb61 100644
--- a/clang/lib/Headers/x86gprintrin.h
+++ b/clang/lib/Headers/x86gprintrin.h
@@ -20,4 +20,16 @@
#include <uintrintrin.h>
#endif
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
+ defined(__CRC32__)
+#include <crc32intrin.h>
+#endif
+
+#define __SSC_MARK(Tag) \
+ __asm__ __volatile__("mov {%%ebx, %%eax|eax, ebx}; " \
+ "mov {%0, %%ebx|ebx, %0}; " \
+ ".byte 0x64, 0x67, 0x90; " \
+ "mov {%%eax, %%ebx|ebx, eax};" ::"i"(Tag) \
+ : "%eax");
+
#endif /* __X86GPRINTRIN_H */
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index f4686691c7ed..1612d3d2773d 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -10,6 +10,10 @@
#ifndef __XMMINTRIN_H
#define __XMMINTRIN_H
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "This header is only meant to be used on x86 and x64 architecture"
+#endif
+
#include <mmintrin.h>
typedef int __v4si __attribute__((__vector_size__(16)));
@@ -2181,7 +2185,7 @@ void _mm_sfence(void);
/// 3: Bits [63:48] are copied to the destination.
/// \returns A 16-bit integer containing the extracted 16 bits of packed data.
#define _mm_extract_pi16(a, n) \
- (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
+ ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n))
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
@@ -2212,7 +2216,7 @@ void _mm_sfence(void);
/// \returns A 64-bit integer vector containing the copied packed data from the
/// operands.
#define _mm_insert_pi16(a, d, n) \
- (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
+ ((__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n))
/// Compares each of the corresponding packed 16-bit integer values of
/// the 64-bit integer vectors, and writes the greater value to the
@@ -2359,7 +2363,7 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
/// 11: assigned from bits [63:48] of \a a.
/// \returns A 64-bit integer vector containing the shuffled values.
#define _mm_shuffle_pi16(a, n) \
- (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
+ ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)))
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
@@ -2601,8 +2605,8 @@ void _mm_setcsr(unsigned int __i);
/// 11: Bits [127:96] copied from the specified operand.
/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
#define _mm_shuffle_ps(a, b, mask) \
- (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
- (int)(mask))
+ ((__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask)))
/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].
diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index 5cedde41b625..976cdf4902a4 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -225,16 +225,16 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
}
#define _mm_roti_epi8(A, N) \
- (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
+ ((__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)))
#define _mm_roti_epi16(A, N) \
- (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
+ ((__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)))
#define _mm_roti_epi32(A, N) \
- (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
+ ((__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)))
#define _mm_roti_epi64(A, N) \
- (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
+ ((__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)))
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shl_epi8(__m128i __A, __m128i __B)
@@ -285,36 +285,36 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
}
#define _mm_com_epu8(A, B, N) \
- (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (N)))
#define _mm_com_epu16(A, B, N) \
- (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
- (__v8hi)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (N)))
#define _mm_com_epu32(A, B, N) \
- (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (N)))
#define _mm_com_epu64(A, B, N) \
- (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (N)))
#define _mm_com_epi8(A, B, N) \
- (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
- (__v16qi)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
+ (__v16qi)(__m128i)(B), (N)))
#define _mm_com_epi16(A, B, N) \
- (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
- (__v8hi)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
+ (__v8hi)(__m128i)(B), (N)))
#define _mm_com_epi32(A, B, N) \
- (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
- (__v4si)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (N)))
#define _mm_com_epi64(A, B, N) \
- (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
- (__v2di)(__m128i)(B), (N))
+ ((__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (N)))
#define _MM_PCOMCTRL_LT 0
#define _MM_PCOMCTRL_LE 1
@@ -710,23 +710,23 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
}
#define _mm_permute2_pd(X, Y, C, I) \
- (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), \
- (__v2di)(__m128i)(C), (I))
+ ((__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(C), (I)))
#define _mm256_permute2_pd(X, Y, C, I) \
- (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
- (__v4df)(__m256d)(Y), \
- (__v4di)(__m256i)(C), (I))
+ ((__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(C), (I)))
#define _mm_permute2_ps(X, Y, C, I) \
- (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
- (__v4si)(__m128i)(C), (I))
+ ((__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(C), (I)))
#define _mm256_permute2_ps(X, Y, C, I) \
- (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
- (__v8sf)(__m256)(Y), \
- (__v8si)(__m256i)(C), (I))
+ ((__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(C), (I)))
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_frcz_ss(__m128 __A)
diff --git a/clang/lib/Index/FileIndexRecord.cpp b/clang/lib/Index/FileIndexRecord.cpp
index d392a2bedeba..d4d1d2f70a9a 100644
--- a/clang/lib/Index/FileIndexRecord.cpp
+++ b/clang/lib/Index/FileIndexRecord.cpp
@@ -1,9 +1,8 @@
//===--- FileIndexRecord.cpp - Index data per file --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Index/IndexDecl.cpp b/clang/lib/Index/IndexDecl.cpp
index 00adb3644ff2..3139aedaf01d 100644
--- a/clang/lib/Index/IndexDecl.cpp
+++ b/clang/lib/Index/IndexDecl.cpp
@@ -8,6 +8,7 @@
#include "IndexingContext.h"
#include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/Index/IndexDataConsumer.h"
@@ -372,6 +373,15 @@ public:
return true;
}
+ bool VisitEnumDecl(const EnumDecl *ED) {
+ TRY_TO(VisitTagDecl(ED));
+ // Indexing for enumdecl itself is handled inside TagDecl, we just want to
+ // visit integer-base here, which is different than other TagDecl bases.
+ if (auto *TSI = ED->getIntegerTypeSourceInfo())
+ IndexCtx.indexTypeSourceInfo(TSI, ED, ED, /*isBase=*/true);
+ return true;
+ }
+
bool handleReferencedProtocols(const ObjCProtocolList &ProtList,
const ObjCContainerDecl *ContD,
SourceLocation SuperLoc) {
diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp
index 6db763ca6f2b..41edd431dd5b 100644
--- a/clang/lib/Index/USRGeneration.cpp
+++ b/clang/lib/Index/USRGeneration.cpp
@@ -705,6 +705,7 @@ void USRGenerator::VisitType(QualType T) {
c = 'f'; break;
case BuiltinType::Double:
c = 'd'; break;
+ case BuiltinType::Ibm128: // FIXME: Need separate tag
case BuiltinType::LongDouble:
c = 'D'; break;
case BuiltinType::Float128:
diff --git a/clang/lib/Interpreter/IncrementalExecutor.cpp b/clang/lib/Interpreter/IncrementalExecutor.cpp
index 9a368d9122bc..705235aafa07 100644
--- a/clang/lib/Interpreter/IncrementalExecutor.cpp
+++ b/clang/lib/Interpreter/IncrementalExecutor.cpp
@@ -60,4 +60,15 @@ llvm::Error IncrementalExecutor::runCtors() const {
return Jit->initialize(Jit->getMainJITDylib());
}
+llvm::Expected<llvm::JITTargetAddress>
+IncrementalExecutor::getSymbolAddress(llvm::StringRef Name,
+ SymbolNameKind NameKind) const {
+ auto Sym = (NameKind == LinkerName) ? Jit->lookupLinkerMangled(Name)
+ : Jit->lookup(Name);
+
+ if (!Sym)
+ return Sym.takeError();
+ return Sym->getAddress();
+}
+
} // end namespace clang
diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h
index b4c6ddec1047..24447994d5f1 100644
--- a/clang/lib/Interpreter/IncrementalExecutor.h
+++ b/clang/lib/Interpreter/IncrementalExecutor.h
@@ -35,12 +35,16 @@ class IncrementalExecutor {
llvm::orc::ThreadSafeContext &TSCtx;
public:
+ enum SymbolNameKind { IRName, LinkerName };
+
IncrementalExecutor(llvm::orc::ThreadSafeContext &TSC, llvm::Error &Err,
const llvm::Triple &Triple);
~IncrementalExecutor();
llvm::Error addModule(std::unique_ptr<llvm::Module> M);
llvm::Error runCtors() const;
+ llvm::Expected<llvm::JITTargetAddress>
+ getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const;
};
} // end namespace clang
diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp
index 897e2cd1aaed..84eabc3a210f 100644
--- a/clang/lib/Interpreter/IncrementalParser.cpp
+++ b/clang/lib/Interpreter/IncrementalParser.cpp
@@ -65,6 +65,8 @@ public:
case frontend::ParseSyntaxOnly:
Act = CreateFrontendAction(CI);
break;
+ case frontend::PluginAction:
+ LLVM_FALLTHROUGH;
case frontend::EmitAssembly:
LLVM_FALLTHROUGH;
case frontend::EmitObj:
@@ -289,4 +291,11 @@ IncrementalParser::Parse(llvm::StringRef input) {
return PTU;
}
+
+llvm::StringRef IncrementalParser::GetMangledName(GlobalDecl GD) const {
+ CodeGenerator *CG = getCodeGen(Act.get());
+ assert(CG);
+ return CG->GetMangledName(GD);
+}
+
} // end namespace clang
diff --git a/clang/lib/Interpreter/IncrementalParser.h b/clang/lib/Interpreter/IncrementalParser.h
index aa8142cbe493..e5ce798025d9 100644
--- a/clang/lib/Interpreter/IncrementalParser.h
+++ b/clang/lib/Interpreter/IncrementalParser.h
@@ -15,6 +15,8 @@
#include "clang/Interpreter/PartialTranslationUnit.h"
+#include "clang/AST/GlobalDecl.h"
+
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
@@ -69,6 +71,10 @@ public:
/// \c TranslationUnitDecl and \c llvm::Module corresponding to the input.
llvm::Expected<PartialTranslationUnit &> Parse(llvm::StringRef Input);
+ /// Uses the CodeGenModule mangled name cache and avoids recomputing.
+ ///\returns the mangled name of a \c GD.
+ llvm::StringRef GetMangledName(GlobalDecl GD) const;
+
private:
llvm::Expected<PartialTranslationUnit &> ParseOrWrapTopLevelDecl();
};
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 937504f34739..b2e7727be39a 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -30,13 +30,13 @@
#include "clang/Lex/PreprocessorOptions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Host.h"
using namespace clang;
// FIXME: Figure out how to unify with namespace init_convenience from
-// tools/clang-import-test/clang-import-test.cpp and
-// examples/clang-interpreter/main.cpp
+// tools/clang-import-test/clang-import-test.cpp
namespace {
/// Retrieves the clang CC1 specific flags out of the compilation's jobs.
/// \returns NULL on error.
@@ -47,14 +47,14 @@ GetCC1Arguments(DiagnosticsEngine *Diagnostics,
// failed. Extract that job from the Compilation.
const driver::JobList &Jobs = Compilation->getJobs();
if (!Jobs.size() || !isa<driver::Command>(*Jobs.begin()))
- return llvm::createStringError(std::errc::state_not_recoverable,
+ return llvm::createStringError(llvm::errc::not_supported,
"Driver initialization failed. "
"Unable to create a driver job");
// The one job we find should be to invoke clang again.
const driver::Command *Cmd = cast<driver::Command>(&(*Jobs.begin()));
if (llvm::StringRef(Cmd->getCreator().getName()) != "clang")
- return llvm::createStringError(std::errc::state_not_recoverable,
+ return llvm::createStringError(llvm::errc::not_supported,
"Driver initialization failed");
return &Cmd->getArguments();
@@ -89,13 +89,13 @@ CreateCI(const llvm::opt::ArgStringList &Argv) {
// Create the actual diagnostics engine.
Clang->createDiagnostics();
if (!Clang->hasDiagnostics())
- return llvm::createStringError(std::errc::state_not_recoverable,
+ return llvm::createStringError(llvm::errc::not_supported,
"Initialization failed. "
"Unable to create diagnostics engine");
DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics());
if (!Success)
- return llvm::createStringError(std::errc::state_not_recoverable,
+ return llvm::createStringError(llvm::errc::not_supported,
"Initialization failed. "
"Unable to flush diagnostics");
@@ -106,12 +106,16 @@ CreateCI(const llvm::opt::ArgStringList &Argv) {
Clang->setTarget(TargetInfo::CreateTargetInfo(
Clang->getDiagnostics(), Clang->getInvocation().TargetOpts));
if (!Clang->hasTarget())
- return llvm::createStringError(std::errc::state_not_recoverable,
+ return llvm::createStringError(llvm::errc::not_supported,
"Initialization failed. "
"Target is missing");
Clang->getTarget().adjust(Clang->getDiagnostics(), Clang->getLangOpts());
+ // Don't clear the AST before backend codegen since we do codegen multiple
+ // times, reusing the same AST.
+ Clang->getCodeGenOpts().ClearASTBeforeBackend = false;
+
return std::move(Clang);
}
@@ -143,19 +147,13 @@ IncrementalCompilerBuilder::create(std::vector<const char *> &ClangArgv) {
// driver to construct.
ClangArgv.push_back("<<< inputs >>>");
- CompilerInvocation Invocation;
// Buffer diagnostics from argument parsing so that we can output them using a
// well formed diagnostic object.
IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+ IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts =
+ CreateAndPopulateDiagOpts(ClangArgv);
TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer;
DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer);
- unsigned MissingArgIndex, MissingArgCount;
- const llvm::opt::OptTable &Opts = driver::getDriverOptTable();
- llvm::opt::InputArgList ParsedArgs =
- Opts.ParseArgs(ArrayRef<const char *>(ClangArgv).slice(1),
- MissingArgIndex, MissingArgCount);
- ParseDiagnosticArgs(*DiagOpts, ParsedArgs, &Diags);
driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0],
llvm::sys::getProcessTriple(), Diags);
@@ -223,3 +221,33 @@ llvm::Error Interpreter::Execute(PartialTranslationUnit &T) {
return llvm::Error::success();
}
+
+llvm::Expected<llvm::JITTargetAddress>
+Interpreter::getSymbolAddress(GlobalDecl GD) const {
+ if (!IncrExecutor)
+ return llvm::make_error<llvm::StringError>("Operation failed. "
+ "No execution engine",
+ std::error_code());
+ llvm::StringRef MangledName = IncrParser->GetMangledName(GD);
+ return getSymbolAddress(MangledName);
+}
+
+llvm::Expected<llvm::JITTargetAddress>
+Interpreter::getSymbolAddress(llvm::StringRef IRName) const {
+ if (!IncrExecutor)
+ return llvm::make_error<llvm::StringError>("Operation failed. "
+ "No execution engine",
+ std::error_code());
+
+ return IncrExecutor->getSymbolAddress(IRName, IncrementalExecutor::IRName);
+}
+
+llvm::Expected<llvm::JITTargetAddress>
+Interpreter::getSymbolAddressFromLinkerName(llvm::StringRef Name) const {
+ if (!IncrExecutor)
+ return llvm::make_error<llvm::StringError>("Operation failed. "
+ "No execution engine",
+ std::error_code());
+
+ return IncrExecutor->getSymbolAddress(Name, IncrementalExecutor::LinkerName);
+}
diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
index cfca167f8bf1..f597c56837fb 100644
--- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -131,17 +131,17 @@ LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
--Current;
if (*Current != 'R')
return false;
- if (First == Current || !isIdentifierBody(*--Current))
+ if (First == Current || !isAsciiIdentifierContinue(*--Current))
return true;
// Check for a prefix of "u", "U", or "L".
if (*Current == 'u' || *Current == 'U' || *Current == 'L')
- return First == Current || !isIdentifierBody(*--Current);
+ return First == Current || !isAsciiIdentifierContinue(*--Current);
// Check for a prefix of "u8".
if (*Current != '8' || First == Current || *Current-- != 'u')
return false;
- return First == Current || !isIdentifierBody(*--Current);
+ return First == Current || !isAsciiIdentifierContinue(*--Current);
}
static void skipRawString(const char *&First, const char *const End) {
@@ -319,7 +319,7 @@ static bool isQuoteCppDigitSeparator(const char *const Start,
if (!isPreprocessingNumberBody(Prev))
return false;
// The next character should be a valid identifier body character.
- return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
+ return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));
}
static void skipLine(const char *&First, const char *const End) {
@@ -484,7 +484,7 @@ void Minimizer::printAdjacentModuleNameParts(const char *&First,
const char *Last = First;
do
++Last;
- while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
+ while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.'));
append(First, Last);
First = Last;
}
@@ -507,7 +507,7 @@ bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
}
// Don't handle macro expansions inside @import for now.
- if (!isIdentifierBody(*First) && *First != '.')
+ if (!isAsciiIdentifierContinue(*First) && *First != '.')
return true;
printAdjacentModuleNameParts(First, End);
@@ -524,9 +524,9 @@ void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
const char *const End) {
- assert(isIdentifierBody(*First) && "invalid identifer");
+ assert(isAsciiIdentifierContinue(*First) && "invalid identifer");
const char *Last = First + 1;
- while (Last != End && isIdentifierBody(*Last))
+ while (Last != End && isAsciiIdentifierContinue(*Last))
++Last;
return Last;
}
@@ -540,7 +540,7 @@ getIdentifierContinuation(const char *First, const char *const End) {
skipNewline(First, End);
if (First == End)
return nullptr;
- return isIdentifierBody(First[0]) ? First : nullptr;
+ return isAsciiIdentifierContinue(First[0]) ? First : nullptr;
}
Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
@@ -569,7 +569,7 @@ void Minimizer::printAdjacentMacroArgs(const char *&First,
do
++Last;
while (Last != End &&
- (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
+ (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ','));
append(First, Last);
First = Last;
}
@@ -588,7 +588,7 @@ bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
}
// This is intentionally fairly liberal.
- if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
+ if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ','))
return true;
printAdjacentMacroArgs(First, End);
@@ -602,7 +602,7 @@ bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
const char *const End) {
skipWhitespace(First, End);
- if (First == End || !isIdentifierHead(*First))
+ if (First == End || !isAsciiIdentifierStart(*First))
return false;
IdInfo FoundId = lexIdentifier(First, End);
@@ -639,7 +639,7 @@ bool Minimizer::lexModule(const char *&First, const char *const End) {
if (Id.Name == "export") {
Export = true;
skipWhitespace(First, End);
- if (!isIdentifierBody(*First)) {
+ if (!isAsciiIdentifierContinue(*First)) {
skipLine(First, End);
return false;
}
@@ -663,7 +663,7 @@ bool Minimizer::lexModule(const char *&First, const char *const End) {
case '"':
break;
default:
- if (!isIdentifierBody(*First)) {
+ if (!isAsciiIdentifierContinue(*First)) {
skipLine(First, End);
return false;
}
@@ -690,7 +690,7 @@ bool Minimizer::lexDefine(const char *&First, const char *const End) {
append("#define ");
skipWhitespace(First, End);
- if (!isIdentifierHead(*First))
+ if (!isAsciiIdentifierStart(*First))
return reportError(First, diag::err_pp_macro_not_identifier);
IdInfo Id = lexIdentifier(First, End);
@@ -722,7 +722,7 @@ bool Minimizer::lexDefine(const char *&First, const char *const End) {
bool Minimizer::lexPragma(const char *&First, const char *const End) {
// #pragma.
skipWhitespace(First, End);
- if (First == End || !isIdentifierHead(*First))
+ if (First == End || !isAsciiIdentifierStart(*First))
return false;
IdInfo FoundId = lexIdentifier(First, End);
@@ -734,6 +734,27 @@ bool Minimizer::lexPragma(const char *&First, const char *const End) {
append("#pragma once\n");
return false;
}
+ if (FoundId.Name == "push_macro") {
+ // #pragma push_macro
+ makeToken(pp_pragma_push_macro);
+ append("#pragma push_macro");
+ printDirectiveBody(First, End);
+ return false;
+ }
+ if (FoundId.Name == "pop_macro") {
+ // #pragma pop_macro
+ makeToken(pp_pragma_pop_macro);
+ append("#pragma pop_macro");
+ printDirectiveBody(First, End);
+ return false;
+ }
+ if (FoundId.Name == "include_alias") {
+ // #pragma include_alias
+ makeToken(pp_pragma_include_alias);
+ append("#pragma include_alias");
+ printDirectiveBody(First, End);
+ return false;
+ }
if (FoundId.Name != "clang") {
skipLine(First, End);
@@ -827,7 +848,7 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
if (First == End)
return reportError(First, diag::err_pp_expected_eol);
- if (!isIdentifierHead(*First)) {
+ if (!isAsciiIdentifierStart(*First)) {
skipLine(First, End);
return false;
}
@@ -835,6 +856,10 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
// Figure out the token.
IdInfo Id = lexIdentifier(First, End);
First = Id.Last;
+
+ if (Id.Name == "pragma")
+ return lexPragma(First, End);
+
auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
.Case("include", pp_include)
.Case("__include_macros", pp___include_macros)
@@ -850,7 +875,6 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
.Case("elifndef", pp_elifndef)
.Case("else", pp_else)
.Case("endif", pp_endif)
- .Case("pragma", pp_pragma_import)
.Default(pp_none);
if (Kind == pp_none) {
skipDirective(Id.Name, First, End);
@@ -863,9 +887,6 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) {
if (Kind == pp_define)
return lexDefine(First, End);
- if (Kind == pp_pragma_import)
- return lexPragma(First, End);
-
// Everything else.
return lexDefault(Kind, Id.Name, First, End);
}
diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp
index ae5e6b221953..0001fc348eda 100644
--- a/clang/lib/Lex/HeaderMap.cpp
+++ b/clang/lib/Lex/HeaderMap.cpp
@@ -194,19 +194,6 @@ LLVM_DUMP_METHOD void HeaderMapImpl::dump() const {
}
}
-/// LookupFile - Check to see if the specified relative filename is located in
-/// this HeaderMap. If so, open it and return its FileEntry.
-Optional<FileEntryRef> HeaderMap::LookupFile(StringRef Filename,
- FileManager &FM) const {
-
- SmallString<1024> Path;
- StringRef Dest = HeaderMapImpl::lookupFilename(Filename, Path);
- if (Dest.empty())
- return None;
-
- return FM.getOptionalFileRef(Dest);
-}
-
StringRef HeaderMapImpl::lookupFilename(StringRef Filename,
SmallVectorImpl<char> &DestPath) const {
const HMapHeader &Hdr = getHeader();
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index d5adbcf62cbc..a0b60118a1a8 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -91,7 +91,7 @@ void HeaderSearch::PrintStats() {
<< FileInfo.size() << " files tracked.\n";
unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
- NumOnceOnlyFiles += FileInfo[i].isImport;
+ NumOnceOnlyFiles += (FileInfo[i].isPragmaOnce || FileInfo[i].isImport);
if (MaxNumIncludes < FileInfo[i].NumIncludes)
MaxNumIncludes = FileInfo[i].NumIncludes;
NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
@@ -108,6 +108,20 @@ void HeaderSearch::PrintStats() {
<< NumSubFrameworkLookups << " subframework lookups.\n";
}
+std::vector<bool> HeaderSearch::computeUserEntryUsage() const {
+ std::vector<bool> UserEntryUsage(HSOpts->UserEntries.size());
+ for (unsigned I = 0, E = SearchDirsUsage.size(); I < E; ++I) {
+ // Check whether this DirectoryLookup has been successfully used.
+ if (SearchDirsUsage[I]) {
+ auto UserEntryIdxIt = SearchDirToHSEntry.find(I);
+ // Check whether this DirectoryLookup maps to a HeaderSearch::UserEntry.
+ if (UserEntryIdxIt != SearchDirToHSEntry.end())
+ UserEntryUsage[UserEntryIdxIt->second] = true;
+ }
+ }
+ return UserEntryUsage;
+}
+
/// CreateHeaderMap - This method returns a HeaderMap for the specified
/// FileEntry, uniquing them through the 'HeaderMaps' datastructure.
const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) {
@@ -229,7 +243,8 @@ std::string HeaderSearch::getCachedModuleFileNameImpl(StringRef ModuleName,
return Result.str().str();
}
-Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch,
+Module *HeaderSearch::lookupModule(StringRef ModuleName,
+ SourceLocation ImportLoc, bool AllowSearch,
bool AllowExtraModuleMapSearch) {
// Look in the module map to determine if there is a module by this name.
Module *Module = ModMap.findModule(ModuleName);
@@ -237,7 +252,8 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch,
return Module;
StringRef SearchName = ModuleName;
- Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch);
+ Module = lookupModule(ModuleName, SearchName, ImportLoc,
+ AllowExtraModuleMapSearch);
// The facility for "private modules" -- adjacent, optional module maps named
// module.private.modulemap that are supposed to define private submodules --
@@ -248,19 +264,23 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, bool AllowSearch,
// could force building unwanted dependencies into the parent module and cause
// dependency cycles.
if (!Module && SearchName.consume_back("_Private"))
- Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch);
+ Module = lookupModule(ModuleName, SearchName, ImportLoc,
+ AllowExtraModuleMapSearch);
if (!Module && SearchName.consume_back("Private"))
- Module = lookupModule(ModuleName, SearchName, AllowExtraModuleMapSearch);
+ Module = lookupModule(ModuleName, SearchName, ImportLoc,
+ AllowExtraModuleMapSearch);
return Module;
}
Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
+ SourceLocation ImportLoc,
bool AllowExtraModuleMapSearch) {
Module *Module = nullptr;
+ unsigned Idx;
// Look through the various header search paths to load any available module
// maps, searching for a module map that describes this module.
- for (unsigned Idx = 0, N = SearchDirs.size(); Idx != N; ++Idx) {
+ for (Idx = 0; Idx != SearchDirs.size(); ++Idx) {
if (SearchDirs[Idx].isFramework()) {
// Search for or infer a module map for a framework. Here we use
// SearchName rather than ModuleName, to permit finding private modules
@@ -323,6 +343,9 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
break;
}
+ if (Module)
+ noteLookupUsage(Idx, ImportLoc);
+
return Module;
}
@@ -435,16 +458,19 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(
if (llvm::sys::path::is_relative(Dest)) {
MappedName.append(Dest.begin(), Dest.end());
Filename = StringRef(MappedName.begin(), MappedName.size());
- Optional<FileEntryRef> Result = HM->LookupFile(Filename, HS.getFileMgr());
- if (Result) {
- FixupSearchPath();
- return *Result;
- }
- } else if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest)) {
+ Dest = HM->lookupFilename(Filename, Path);
+ }
+
+ if (auto Res = HS.getFileMgr().getOptionalFileRef(Dest)) {
FixupSearchPath();
return *Res;
}
+ // Header maps need to be marked as used whenever the filename matches.
+ // The case where the target file **exists** is handled by callee of this
+ // function as part of the regular logic that applies to include search paths.
+ // The case where the target file **does not exist** is handled here:
+ HS.noteLookupUsage(*HS.searchDirIdx(*this), IncludeLoc);
return None;
}
@@ -649,6 +675,21 @@ Optional<FileEntryRef> DirectoryLookup::DoFrameworkLookup(
return None;
}
+void HeaderSearch::cacheLookupSuccess(LookupFileCacheInfo &CacheLookup,
+ unsigned HitIdx, SourceLocation Loc) {
+ CacheLookup.HitIdx = HitIdx;
+ noteLookupUsage(HitIdx, Loc);
+}
+
+void HeaderSearch::noteLookupUsage(unsigned HitIdx, SourceLocation Loc) {
+ SearchDirsUsage[HitIdx] = true;
+
+ auto UserEntryIdxIt = SearchDirToHSEntry.find(HitIdx);
+ if (UserEntryIdxIt != SearchDirToHSEntry.end())
+ Diags.Report(Loc, diag::remark_pp_search_path_usage)
+ << HSOpts->UserEntries[UserEntryIdxIt->second].Path;
+}
+
void HeaderSearch::setTarget(const TargetInfo &Target) {
ModMap.setTarget(Target);
}
@@ -964,13 +1005,13 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(
// If this file is found in a header map and uses the framework style of
// includes, then this header is part of a framework we're building.
- if (CurDir->isIndexHeaderMap()) {
+ if (CurDir->isHeaderMap() && isAngled) {
size_t SlashPos = Filename.find('/');
- if (SlashPos != StringRef::npos) {
+ if (SlashPos != StringRef::npos)
+ HFI.Framework =
+ getUniqueFrameworkName(StringRef(Filename.begin(), SlashPos));
+ if (CurDir->isIndexHeaderMap())
HFI.IndexHeaderMapHeader = 1;
- HFI.Framework = getUniqueFrameworkName(StringRef(Filename.begin(),
- SlashPos));
- }
}
if (checkMSVCHeaderSearch(Diags, MSFE ? &MSFE->getFileEntry() : nullptr,
@@ -987,7 +1028,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(
&File->getFileEntry(), isAngled, FoundByHeaderMap);
// Remember this location for the next lookup we do.
- CacheLookup.HitIdx = i;
+ cacheLookupSuccess(CacheLookup, i, IncludeLoc);
return File;
}
@@ -996,7 +1037,7 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(
// resolve "foo.h" any other way, change the include to <Foo/foo.h>, where
// "Foo" is the name of the framework in which the including header was found.
if (!Includers.empty() && Includers.front().first && !isAngled &&
- Filename.find('/') == StringRef::npos) {
+ !Filename.contains('/')) {
HeaderFileInfo &IncludingHFI = getFileInfo(Includers.front().first);
if (IncludingHFI.IndexHeaderMapHeader) {
SmallString<128> ScratchFilename;
@@ -1017,8 +1058,8 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(
return MSFE;
}
- LookupFileCacheInfo &CacheLookup = LookupFileCache[Filename];
- CacheLookup.HitIdx = LookupFileCache[ScratchFilename].HitIdx;
+ cacheLookupSuccess(LookupFileCache[Filename],
+ LookupFileCache[ScratchFilename].HitIdx, IncludeLoc);
// FIXME: SuggestedModule.
return File;
}
@@ -1269,9 +1310,12 @@ void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE,
bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
const FileEntry *File, bool isImport,
- bool ModulesEnabled, Module *M) {
+ bool ModulesEnabled, Module *M,
+ bool &IsFirstIncludeOfFile) {
++NumIncluded; // Count # of attempted #includes.
+ IsFirstIncludeOfFile = false;
+
// Get information about this file.
HeaderFileInfo &FileInfo = getFileInfo(File);
@@ -1325,7 +1369,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
} else {
// Otherwise, if this is a #include of a file that was previously #import'd
// or if this is the second #include of a #pragma once file, ignore it.
- if (FileInfo.isImport && !TryEnterImported())
+ if ((FileInfo.isPragmaOnce || FileInfo.isImport) && !TryEnterImported())
return false;
}
@@ -1346,6 +1390,8 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
// Increment the number of times this file has been included.
++FileInfo.NumIncludes;
+ IsFirstIncludeOfFile = FileInfo.NumIncludes == 1;
+
return true;
}
@@ -1357,6 +1403,13 @@ size_t HeaderSearch::getTotalMemory() const {
+ FrameworkMap.getAllocator().getTotalMemory();
}
+Optional<unsigned> HeaderSearch::searchDirIdx(const DirectoryLookup &DL) const {
+ for (unsigned I = 0; I < SearchDirs.size(); ++I)
+ if (&SearchDirs[I] == &DL)
+ return I;
+ return None;
+}
+
StringRef HeaderSearch::getUniqueFrameworkName(StringRef Framework) {
return FrameworkNames.insert(Framework).first->first();
}
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 3034af231e0e..38467a1835d0 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -133,10 +133,10 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
/// assumes that the associated file buffer and Preprocessor objects will
/// outlive it, so it doesn't take ownership of either of them.
Lexer::Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile,
- Preprocessor &PP)
+ Preprocessor &PP, bool IsFirstIncludeOfFile)
: PreprocessorLexer(&PP, FID),
FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
- LangOpts(PP.getLangOpts()) {
+ LangOpts(PP.getLangOpts()), IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
InitLexer(InputFile.getBufferStart(), InputFile.getBufferStart(),
InputFile.getBufferEnd());
@@ -147,8 +147,10 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile,
/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text
/// range will outlive it, so it doesn't take ownership of it.
Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
- const char *BufStart, const char *BufPtr, const char *BufEnd)
- : FileLoc(fileloc), LangOpts(langOpts) {
+ const char *BufStart, const char *BufPtr, const char *BufEnd,
+ bool IsFirstIncludeOfFile)
+ : FileLoc(fileloc), LangOpts(langOpts),
+ IsFirstTimeLexingFile(IsFirstIncludeOfFile) {
InitLexer(BufStart, BufPtr, BufEnd);
// We *are* in raw mode.
@@ -159,9 +161,11 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts,
/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text
/// range will outlive it, so it doesn't take ownership of it.
Lexer::Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,
- const SourceManager &SM, const LangOptions &langOpts)
+ const SourceManager &SM, const LangOptions &langOpts,
+ bool IsFirstIncludeOfFile)
: Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile.getBufferStart(),
- FromFile.getBufferStart(), FromFile.getBufferEnd()) {}
+ FromFile.getBufferStart(), FromFile.getBufferEnd(),
+ IsFirstIncludeOfFile) {}
void Lexer::resetExtendedTokenMode() {
assert(PP && "Cannot reset token mode without a preprocessor");
@@ -1062,8 +1066,8 @@ StringRef Lexer::getImmediateMacroNameForDiagnostics(
return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
}
-bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) {
- return isIdentifierBody(c, LangOpts.DollarIdents);
+bool Lexer::isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts) {
+ return isAsciiIdentifierContinue(c, LangOpts.DollarIdents);
}
bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) {
@@ -1446,19 +1450,30 @@ void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
IsAtPhysicalStartOfLine = StartOfLine;
}
+static bool isUnicodeWhitespace(uint32_t Codepoint) {
+ static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
+ UnicodeWhitespaceCharRanges);
+ return UnicodeWhitespaceChars.contains(Codepoint);
+}
+
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
if (LangOpts.AsmPreprocessor) {
return false;
} else if (LangOpts.DollarIdents && '$' == C) {
return true;
- } else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+ } else if (LangOpts.CPlusPlus) {
+ // A non-leading codepoint must have the XID_Continue property.
+ // XIDContinueRanges doesn't contains characters also in XIDStartRanges,
+ // so we need to check both tables.
+ // '_' doesn't have the XID_Continue property but is allowed in C++.
+ static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);
+ static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges);
+ return C == '_' || XIDStartChars.contains(C) ||
+ XIDContinueChars.contains(C);
+ } else if (LangOpts.C11) {
static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
C11AllowedIDCharRanges);
return C11AllowedIDChars.contains(C);
- } else if (LangOpts.CPlusPlus) {
- static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
- CXX03AllowedIDCharRanges);
- return CXX03AllowedIDChars.contains(C);
} else {
static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
C99AllowedIDCharRanges);
@@ -1467,20 +1482,24 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
}
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {
- assert(isAllowedIDChar(C, LangOpts));
if (LangOpts.AsmPreprocessor) {
return false;
- } else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+ }
+ if (LangOpts.CPlusPlus) {
+ static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);
+ // '_' doesn't have the XID_Start property but is allowed in C++.
+ return C == '_' || XIDStartChars.contains(C);
+ }
+ if (!isAllowedIDChar(C, LangOpts))
+ return false;
+ if (LangOpts.C11) {
static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
C11DisallowedInitialIDCharRanges);
return !C11DisallowedInitialIDChars.contains(C);
- } else if (LangOpts.CPlusPlus) {
- return true;
- } else {
- static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
- C99DisallowedInitialIDCharRanges);
- return !C99DisallowedInitialIDChars.contains(C);
}
+ static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
+ C99DisallowedInitialIDCharRanges);
+ return !C99DisallowedInitialIDChars.contains(C);
}
static inline CharSourceRange makeCharRange(Lexer &L, const char *Begin,
@@ -1512,16 +1531,6 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
<< CannotStartIdentifier;
}
}
-
- // Check C++98 compatibility.
- if (!Diags.isIgnored(diag::warn_cxx98_compat_unicode_id, Range.getBegin())) {
- static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
- CXX03AllowedIDCharRanges);
- if (!CXX03AllowedIDChars.contains(C)) {
- Diags.Report(Range.getBegin(), diag::warn_cxx98_compat_unicode_id)
- << Range;
- }
- }
}
/// After encountering UTF-8 character C and interpreting it as an identifier
@@ -1608,14 +1617,56 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
}
}
+static void diagnoseInvalidUnicodeCodepointInIdentifier(
+ DiagnosticsEngine &Diags, const LangOptions &LangOpts, uint32_t CodePoint,
+ CharSourceRange Range, bool IsFirst) {
+ if (isASCII(CodePoint))
+ return;
+
+ bool IsIDStart = isAllowedInitiallyIDChar(CodePoint, LangOpts);
+ bool IsIDContinue = IsIDStart || isAllowedIDChar(CodePoint, LangOpts);
+
+ if ((IsFirst && IsIDStart) || (!IsFirst && IsIDContinue))
+ return;
+
+ bool InvalidOnlyAtStart = IsFirst && !IsIDStart && IsIDContinue;
+
+ llvm::SmallString<5> CharBuf;
+ llvm::raw_svector_ostream CharOS(CharBuf);
+ llvm::write_hex(CharOS, CodePoint, llvm::HexPrintStyle::Upper, 4);
+
+ if (!IsFirst || InvalidOnlyAtStart) {
+ Diags.Report(Range.getBegin(), diag::err_character_not_allowed_identifier)
+ << Range << CharBuf << int(InvalidOnlyAtStart)
+ << FixItHint::CreateRemoval(Range);
+ } else {
+ Diags.Report(Range.getBegin(), diag::err_character_not_allowed)
+ << Range << CharBuf << FixItHint::CreateRemoval(Range);
+ }
+}
+
bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
Token &Result) {
const char *UCNPtr = CurPtr + Size;
uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/nullptr);
- if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
+ if (CodePoint == 0) {
return false;
+ }
- if (!isLexingRawMode())
+ if (!isAllowedIDChar(CodePoint, LangOpts)) {
+ if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint))
+ return false;
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput())
+ diagnoseInvalidUnicodeCodepointInIdentifier(
+ PP->getDiagnostics(), LangOpts, CodePoint,
+ makeCharRange(*this, CurPtr, UCNPtr),
+ /*IsFirst=*/false);
+
+ // We got a unicode codepoint that is neither a space nor a
+ // a valid identifier part.
+ // Carry on as if the codepoint was valid for recovery purposes.
+ } else if (!isLexingRawMode())
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
makeCharRange(*this, CurPtr, UCNPtr),
/*IsFirst=*/false);
@@ -1638,11 +1689,22 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
(const llvm::UTF8 *)BufferEnd,
&CodePoint,
llvm::strictConversion);
- if (Result != llvm::conversionOK ||
- !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
+ if (Result != llvm::conversionOK)
return false;
- if (!isLexingRawMode()) {
+ if (!isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) {
+ if (isASCII(CodePoint) || isUnicodeWhitespace(CodePoint))
+ return false;
+
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput())
+ diagnoseInvalidUnicodeCodepointInIdentifier(
+ PP->getDiagnostics(), LangOpts, CodePoint,
+ makeCharRange(*this, CurPtr, UnicodePtr), /*IsFirst=*/false);
+ // We got a unicode codepoint that is neither a space nor a
+ // a valid identifier part. Carry on as if the codepoint was
+ // valid for recovery purposes.
+ } else if (!isLexingRawMode()) {
maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
makeCharRange(*this, CurPtr, UnicodePtr),
/*IsFirst=*/false);
@@ -1654,103 +1716,128 @@ bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
return true;
}
-bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
- // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
- unsigned Size;
- unsigned char C = *CurPtr++;
- while (isIdentifierBody(C))
- C = *CurPtr++;
-
- --CurPtr; // Back up over the skipped character.
-
- // Fast path, no $,\,? in identifier found. '\' might be an escaped newline
- // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
- //
- // TODO: Could merge these checks into an InfoTable flag to make the
- // comparison cheaper
- if (isASCII(C) && C != '\\' && C != '?' &&
- (C != '$' || !LangOpts.DollarIdents)) {
-FinishIdentifier:
- const char *IdStart = BufferPtr;
- FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
- Result.setRawIdentifierData(IdStart);
-
- // If we are in raw mode, return this identifier raw. There is no need to
- // look up identifier information or attempt to macro expand it.
- if (LexingRawMode)
- return true;
-
- // Fill in Result.IdentifierInfo and update the token kind,
- // looking up the identifier in the identifier table.
- IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
- // Note that we have to call PP->LookUpIdentifierInfo() even for code
- // completion, it writes IdentifierInfo into Result, and callers rely on it.
-
- // If the completion point is at the end of an identifier, we want to treat
- // the identifier as incomplete even if it resolves to a macro or a keyword.
- // This allows e.g. 'class^' to complete to 'classifier'.
- if (isCodeCompletionPoint(CurPtr)) {
- // Return the code-completion token.
- Result.setKind(tok::code_completion);
- // Skip the code-completion char and all immediate identifier characters.
- // This ensures we get consistent behavior when completing at any point in
- // an identifier (i.e. at the start, in the middle, at the end). Note that
- // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code
- // simpler.
- assert(*CurPtr == 0 && "Completion character must be 0");
- ++CurPtr;
- // Note that code completion token is not added as a separate character
- // when the completion point is at the end of the buffer. Therefore, we need
- // to check if the buffer has ended.
- if (CurPtr < BufferEnd) {
- while (isIdentifierBody(*CurPtr))
- ++CurPtr;
- }
- BufferPtr = CurPtr;
- return true;
+bool Lexer::LexUnicodeIdentifierStart(Token &Result, uint32_t C,
+ const char *CurPtr) {
+ if (isAllowedInitiallyIDChar(C, LangOpts)) {
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput()) {
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,
+ makeCharRange(*this, BufferPtr, CurPtr),
+ /*IsFirst=*/true);
+ maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C,
+ makeCharRange(*this, BufferPtr, CurPtr));
}
- // Finally, now that we know we have an identifier, pass this off to the
- // preprocessor, which may macro expand it or something.
- if (II->isHandleIdentifierCase())
- return PP->HandleIdentifier(Result);
+ MIOpt.ReadToken();
+ return LexIdentifierContinue(Result, CurPtr);
+ }
- return true;
+ if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
+ !PP->isPreprocessedOutput() && !isASCII(*BufferPtr) &&
+ !isAllowedInitiallyIDChar(C, LangOpts) && !isUnicodeWhitespace(C)) {
+ // Non-ASCII characters tend to creep into source code unintentionally.
+ // Instead of letting the parser complain about the unknown token,
+ // just drop the character.
+ // Note that we can /only/ do this when the non-ASCII character is actually
+ // spelled as Unicode, not written as a UCN. The standard requires that
+ // we not throw away any possible preprocessor tokens, but there's a
+ // loophole in the mapping of Unicode characters to basic character set
+ // characters that allows us to map these particular characters to, say,
+ // whitespace.
+ diagnoseInvalidUnicodeCodepointInIdentifier(
+ PP->getDiagnostics(), LangOpts, C,
+ makeCharRange(*this, BufferPtr, CurPtr), /*IsStart*/ true);
+ BufferPtr = CurPtr;
+ return false;
}
- // Otherwise, $,\,? in identifier found. Enter slower path.
+ // Otherwise, we have an explicit UCN or a character that's unlikely to show
+ // up by accident.
+ MIOpt.ReadToken();
+ FormTokenWithChars(Result, CurPtr, tok::unknown);
+ return true;
+}
- C = getCharAndSize(CurPtr, Size);
+bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
+ // Match [_A-Za-z0-9]*, we have already matched an identifier start.
while (true) {
+ unsigned char C = *CurPtr;
+ // Fast path.
+ if (isAsciiIdentifierContinue(C)) {
+ ++CurPtr;
+ continue;
+ }
+
+ unsigned Size;
+ // Slow path: handle trigraph, unicode codepoints, UCNs.
+ C = getCharAndSize(CurPtr, Size);
+ if (isAsciiIdentifierContinue(C)) {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ continue;
+ }
if (C == '$') {
// If we hit a $ and they are not supported in identifiers, we are done.
- if (!LangOpts.DollarIdents) goto FinishIdentifier;
-
+ if (!LangOpts.DollarIdents)
+ break;
// Otherwise, emit a diagnostic and continue.
if (!isLexingRawMode())
Diag(CurPtr, diag::ext_dollar_in_identifier);
CurPtr = ConsumeChar(CurPtr, Size, Result);
- C = getCharAndSize(CurPtr, Size);
continue;
- } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
- C = getCharAndSize(CurPtr, Size);
+ }
+ if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
continue;
- } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
- C = getCharAndSize(CurPtr, Size);
+ if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
continue;
- } else if (!isIdentifierBody(C)) {
- goto FinishIdentifier;
- }
+ // Neither an expected Unicode codepoint nor a UCN.
+ break;
+ }
- // Otherwise, this character is good, consume it.
- CurPtr = ConsumeChar(CurPtr, Size, Result);
+ const char *IdStart = BufferPtr;
+ FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
+ Result.setRawIdentifierData(IdStart);
- C = getCharAndSize(CurPtr, Size);
- while (isIdentifierBody(C)) {
- CurPtr = ConsumeChar(CurPtr, Size, Result);
- C = getCharAndSize(CurPtr, Size);
+ // If we are in raw mode, return this identifier raw. There is no need to
+ // look up identifier information or attempt to macro expand it.
+ if (LexingRawMode)
+ return true;
+
+ // Fill in Result.IdentifierInfo and update the token kind,
+ // looking up the identifier in the identifier table.
+ IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
+ // Note that we have to call PP->LookUpIdentifierInfo() even for code
+ // completion, it writes IdentifierInfo into Result, and callers rely on it.
+
+ // If the completion point is at the end of an identifier, we want to treat
+ // the identifier as incomplete even if it resolves to a macro or a keyword.
+ // This allows e.g. 'class^' to complete to 'classifier'.
+ if (isCodeCompletionPoint(CurPtr)) {
+ // Return the code-completion token.
+ Result.setKind(tok::code_completion);
+ // Skip the code-completion char and all immediate identifier characters.
+ // This ensures we get consistent behavior when completing at any point in
+ // an identifier (i.e. at the start, in the middle, at the end). Note that
+ // only simple cases (i.e. [a-zA-Z0-9_]) are supported to keep the code
+ // simpler.
+ assert(*CurPtr == 0 && "Completion character must be 0");
+ ++CurPtr;
+ // Note that code completion token is not added as a separate character
+ // when the completion point is at the end of the buffer. Therefore, we need
+ // to check if the buffer has ended.
+ if (CurPtr < BufferEnd) {
+ while (isAsciiIdentifierContinue(*CurPtr))
+ ++CurPtr;
}
+ BufferPtr = CurPtr;
+ return true;
}
+
+ // Finally, now that we know we have an identifier, pass this off to the
+ // preprocessor, which may macro expand it or something.
+ if (II->isHandleIdentifierCase())
+ return PP->HandleIdentifier(Result);
+
+ return true;
}
/// isHexaLiteral - Return true if Start points to a hex constant.
@@ -1806,7 +1893,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
if (C == '\'' && (getLangOpts().CPlusPlus14 || getLangOpts().C2x)) {
unsigned NextSize;
char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, getLangOpts());
- if (isIdentifierBody(Next)) {
+ if (isAsciiIdentifierContinue(Next)) {
if (!isLexingRawMode())
Diag(CurPtr, getLangOpts().CPlusPlus
? diag::warn_cxx11_compat_digit_separator
@@ -1841,7 +1928,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
char C = getCharAndSize(CurPtr, Size);
bool Consumed = false;
- if (!isIdentifierHead(C)) {
+ if (!isAsciiIdentifierStart(C)) {
if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
Consumed = true;
else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
@@ -1880,7 +1967,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
unsigned NextSize;
char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize,
getLangOpts());
- if (!isIdentifierBody(Next)) {
+ if (!isAsciiIdentifierContinue(Next)) {
// End of suffix. Check whether this is on the allowed list.
const StringRef CompleteSuffix(Buffer, Chars);
IsUDSuffix = StringLiteralParser::isValidUDSuffix(getLangOpts(),
@@ -1912,10 +1999,12 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
Result.setFlag(Token::HasUDSuffix);
while (true) {
C = getCharAndSize(CurPtr, Size);
- if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); }
- else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
- else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
- else break;
+ if (isAsciiIdentifierContinue(C)) {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
+ } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
+ } else
+ break;
}
return CurPtr;
@@ -2811,11 +2900,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
ConditionalStack.pop_back();
}
+ SourceLocation EndLoc = getSourceLocation(BufferEnd);
// C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue
// a pedwarn.
if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) {
DiagnosticsEngine &Diags = PP->getDiagnostics();
- SourceLocation EndLoc = getSourceLocation(BufferEnd);
unsigned DiagID;
if (LangOpts.CPlusPlus11) {
@@ -2838,7 +2927,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
BufferPtr = CurPtr;
// Finally, let the preprocessor handle this.
- return PP->HandleEndOfFile(Result, isPragmaLexer());
+ return PP->HandleEndOfFile(Result, EndLoc, isPragmaLexer());
}
/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from
@@ -3027,6 +3116,10 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
Token *Result) {
unsigned CharSize;
char Kind = getCharAndSize(StartPtr, CharSize);
+ bool Delimited = false;
+ bool FoundEndDelimiter = false;
+ unsigned Count = 0;
+ bool Diagnose = Result && !isLexingRawMode();
unsigned NumHexDigits;
if (Kind == 'u')
@@ -3037,7 +3130,7 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
return 0;
if (!LangOpts.CPlusPlus && !LangOpts.C99) {
- if (Result && !isLexingRawMode())
+ if (Diagnose)
Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
return 0;
}
@@ -3046,39 +3139,70 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
const char *KindLoc = &CurPtr[-1];
uint32_t CodePoint = 0;
- for (unsigned i = 0; i < NumHexDigits; ++i) {
+ while (Count != NumHexDigits || Delimited) {
char C = getCharAndSize(CurPtr, CharSize);
+ if (!Delimited && C == '{') {
+ Delimited = true;
+ CurPtr += CharSize;
+ continue;
+ }
+
+ if (Delimited && C == '}') {
+ CurPtr += CharSize;
+ FoundEndDelimiter = true;
+ break;
+ }
unsigned Value = llvm::hexDigitValue(C);
if (Value == -1U) {
- if (Result && !isLexingRawMode()) {
- if (i == 0) {
- Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
- << StringRef(KindLoc, 1);
- } else {
- Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
-
- // If the user wrote \U1234, suggest a fixit to \u.
- if (i == 4 && NumHexDigits == 8) {
- CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
- Diag(KindLoc, diag::note_ucn_four_not_eight)
- << FixItHint::CreateReplacement(URange, "u");
- }
- }
- }
+ if (!Delimited)
+ break;
+ if (Diagnose)
+ Diag(BufferPtr, diag::warn_delimited_ucn_incomplete)
+ << StringRef(&C, 1);
+ return 0;
+ }
+ if (CodePoint & 0xF000'0000) {
+ if (Diagnose)
+ Diag(KindLoc, diag::err_escape_too_large) << 0;
return 0;
}
CodePoint <<= 4;
- CodePoint += Value;
-
+ CodePoint |= Value;
CurPtr += CharSize;
+ Count++;
+ }
+
+ if (Count == 0) {
+ if (Diagnose)
+ Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
+ : diag::warn_ucn_escape_no_digits)
+ << StringRef(KindLoc, 1);
+ return 0;
+ }
+
+ if (!Delimited && Count != NumHexDigits) {
+ if (Diagnose) {
+ Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+ // If the user wrote \U1234, suggest a fixit to \u.
+ if (Count == 4 && NumHexDigits == 8) {
+ CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
+ Diag(KindLoc, diag::note_ucn_four_not_eight)
+ << FixItHint::CreateReplacement(URange, "u");
+ }
+ }
+ return 0;
+ }
+
+ if (Delimited && PP) {
+ Diag(BufferPtr, diag::ext_delimited_escape_sequence);
}
if (Result) {
Result->setFlag(Token::HasUCN);
- if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2)
+ if (CurPtr - StartPtr == (ptrdiff_t)(Count + 2 + (Delimited ? 2 : 0)))
StartPtr = CurPtr;
else
while (StartPtr != CurPtr)
@@ -3136,10 +3260,8 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
const char *CurPtr) {
- static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
- UnicodeWhitespaceCharRanges);
if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
- UnicodeWhitespaceChars.contains(C)) {
+ isUnicodeWhitespace(C)) {
Diag(BufferPtr, diag::ext_unicode_whitespace)
<< makeCharRange(*this, BufferPtr, CurPtr);
@@ -3149,47 +3271,6 @@ bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
return false;
}
-bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
- if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) {
- if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
- !PP->isPreprocessedOutput()) {
- maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C,
- makeCharRange(*this, BufferPtr, CurPtr),
- /*IsFirst=*/true);
- maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C,
- makeCharRange(*this, BufferPtr, CurPtr));
- }
-
- MIOpt.ReadToken();
- return LexIdentifier(Result, CurPtr);
- }
-
- if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
- !PP->isPreprocessedOutput() &&
- !isASCII(*BufferPtr) && !isAllowedIDChar(C, LangOpts)) {
- // Non-ASCII characters tend to creep into source code unintentionally.
- // Instead of letting the parser complain about the unknown token,
- // just drop the character.
- // Note that we can /only/ do this when the non-ASCII character is actually
- // spelled as Unicode, not written as a UCN. The standard requires that
- // we not throw away any possible preprocessor tokens, but there's a
- // loophole in the mapping of Unicode characters to basic character set
- // characters that allows us to map these particular characters to, say,
- // whitespace.
- Diag(BufferPtr, diag::err_non_ascii)
- << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr));
-
- BufferPtr = CurPtr;
- return false;
- }
-
- // Otherwise, we have an explicit UCN or a character that's unlikely to show
- // up by accident.
- MIOpt.ReadToken();
- FormTokenWithChars(Result, CurPtr, tok::unknown);
- return true;
-}
-
void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
IsAtStartOfLine = Result.isAtStartOfLine();
HasLeadingSpace = Result.hasLeadingSpace();
@@ -3433,7 +3514,7 @@ LexNextToken:
}
// treat u like the start of an identifier.
- return LexIdentifier(Result, CurPtr);
+ return LexIdentifierContinue(Result, CurPtr);
case 'U': // Identifier (Uber) or C11/C++11 UTF-32 string literal
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -3462,7 +3543,7 @@ LexNextToken:
}
// treat U like the start of an identifier.
- return LexIdentifier(Result, CurPtr);
+ return LexIdentifierContinue(Result, CurPtr);
case 'R': // Identifier or C++0x raw string literal
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -3478,7 +3559,7 @@ LexNextToken:
}
// treat R like the start of an identifier.
- return LexIdentifier(Result, CurPtr);
+ return LexIdentifierContinue(Result, CurPtr);
case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
// Notify MIOpt that we read a non-whitespace/non-comment token.
@@ -3517,7 +3598,7 @@ LexNextToken:
case '_':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexIdentifier(Result, CurPtr);
+ return LexIdentifierContinue(Result, CurPtr);
case '$': // $ in identifiers.
if (LangOpts.DollarIdents) {
@@ -3525,7 +3606,7 @@ LexNextToken:
Diag(CurPtr-1, diag::ext_dollar_in_identifier);
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexIdentifier(Result, CurPtr);
+ return LexIdentifierContinue(Result, CurPtr);
}
Kind = tok::unknown;
@@ -3940,7 +4021,7 @@ LexNextToken:
goto LexNextToken;
}
- return LexUnicode(Result, CodePoint, CurPtr);
+ return LexUnicodeIdentifierStart(Result, CodePoint, CurPtr);
}
}
@@ -3972,7 +4053,7 @@ LexNextToken:
// (We manually eliminate the tail call to avoid recursion.)
goto LexNextToken;
}
- return LexUnicode(Result, CodePoint, CurPtr);
+ return LexUnicodeIdentifierStart(Result, CodePoint, CurPtr);
}
if (isLexingRawMode() || ParsingPreprocessorDirective ||
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 85d826ce9c6f..76c8b324671d 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -95,6 +95,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
DiagnosticsEngine *Diags,
const LangOptions &Features) {
const char *EscapeBegin = ThisTokBuf;
+ bool Delimited = false;
+ bool EndDelimiterFound = false;
// Skip the '\' char.
++ThisTokBuf;
@@ -143,26 +145,47 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
break;
case 'x': { // Hex escape.
ResultChar = 0;
- if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+ if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
+ Delimited = true;
+ ThisTokBuf++;
+ if (*ThisTokBuf == '}') {
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_empty);
+ return ResultChar;
+ }
+ } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
diag::err_hex_escape_no_digits) << "x";
- HadError = true;
- break;
+ return ResultChar;
}
// Hex escapes are a maximal series of hex digits.
bool Overflow = false;
for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
- int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
- if (CharVal == -1) break;
+ if (Delimited && *ThisTokBuf == '}') {
+ ThisTokBuf++;
+ EndDelimiterFound = true;
+ break;
+ }
+ int CharVal = llvm::hexDigitValue(*ThisTokBuf);
+ if (CharVal == -1) {
+ // Non delimited hex escape sequences stop at the first non-hex digit.
+ if (!Delimited)
+ break;
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_invalid)
+ << StringRef(ThisTokBuf, 1);
+ continue;
+ }
// About to shift out a digit?
if (ResultChar & 0xF0000000)
Overflow = true;
ResultChar <<= 4;
ResultChar |= CharVal;
}
-
// See if any bits will be truncated when evaluated as a character.
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
Overflow = true;
@@ -170,9 +193,13 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
}
// Check for overflow.
- if (Overflow && Diags) // Too many digits to fit in
- Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
- diag::err_escape_too_large) << 0;
+ if (!HadError && Overflow) { // Too many digits to fit in
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_escape_too_large)
+ << 0;
+ }
break;
}
case '0': case '1': case '2': case '3':
@@ -200,7 +227,58 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
}
break;
}
+ case 'o': {
+ bool Overflow = false;
+ if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_missing_brace);
+
+ break;
+ }
+ ResultChar = 0;
+ Delimited = true;
+ ++ThisTokBuf;
+ if (*ThisTokBuf == '}') {
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_empty);
+ return ResultChar;
+ }
+ while (ThisTokBuf != ThisTokEnd) {
+ if (*ThisTokBuf == '}') {
+ EndDelimiterFound = true;
+ ThisTokBuf++;
+ break;
+ }
+ if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_invalid)
+ << StringRef(ThisTokBuf, 1);
+ ThisTokBuf++;
+ continue;
+ }
+ if (ResultChar & 0x020000000)
+ Overflow = true;
+
+ ResultChar <<= 3;
+ ResultChar |= *ThisTokBuf++ - '0';
+ }
+ // Check for overflow. Reject '\777', but not L'\777'.
+ if (!HadError &&
+ (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_escape_too_large)
+ << 1;
+ ResultChar &= ~0U >> (32 - CharWidth);
+ }
+ break;
+ }
// Otherwise, these are not valid escapes.
case '(': case '{': case '[': case '%':
// GCC accepts these as extensions. We warn about them as such though.
@@ -224,6 +302,17 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
break;
}
+ if (Delimited && Diags) {
+ if (!EndDelimiterFound)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_expected)
+ << tok::r_brace;
+ else if (!HadError) {
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_delimited_escape_sequence);
+ }
+ }
+
return ResultChar;
}
@@ -245,18 +334,32 @@ void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
}
++I;
- assert(*I == 'u' || *I == 'U');
+ char Kind = *I;
+ ++I;
+
+ assert(Kind == 'u' || Kind == 'U');
+ uint32_t CodePoint = 0;
+
+ if (Kind == 'u' && *I == '{') {
+ for (++I; *I != '}'; ++I) {
+ unsigned Value = llvm::hexDigitValue(*I);
+ assert(Value != -1U);
+ CodePoint <<= 4;
+ CodePoint += Value;
+ }
+ appendCodePoint(CodePoint, Buf);
+ continue;
+ }
unsigned NumHexDigits;
- if (*I == 'u')
+ if (Kind == 'u')
NumHexDigits = 4;
else
NumHexDigits = 8;
assert(I + NumHexDigits <= E);
- uint32_t CodePoint = 0;
- for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+ for (; NumHexDigits != 0; ++I, --NumHexDigits) {
unsigned Value = llvm::hexDigitValue(*I);
assert(Value != -1U);
@@ -282,28 +385,82 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
// Skip the '\u' char's.
ThisTokBuf += 2;
- if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+ bool Delimited = false;
+ bool EndDelimiterFound = false;
+ bool HasError = false;
+
+ if (UcnBegin[1] == 'u' && in_char_string_literal &&
+ ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
+ Delimited = true;
+ ThisTokBuf++;
+ } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
return false;
}
UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
- unsigned short UcnLenSave = UcnLen;
- for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
- int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
- if (CharVal == -1) break;
+
+ bool Overflow = false;
+ unsigned short Count = 0;
+ for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
+ ++ThisTokBuf) {
+ if (Delimited && *ThisTokBuf == '}') {
+ ++ThisTokBuf;
+ EndDelimiterFound = true;
+ break;
+ }
+ int CharVal = llvm::hexDigitValue(*ThisTokBuf);
+ if (CharVal == -1) {
+ HasError = true;
+ if (!Delimited)
+ break;
+ if (Diags) {
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_delimited_escape_invalid)
+ << StringRef(ThisTokBuf, 1);
+ }
+ Count++;
+ continue;
+ }
+ if (UcnVal & 0xF0000000) {
+ Overflow = true;
+ continue;
+ }
UcnVal <<= 4;
UcnVal |= CharVal;
+ Count++;
}
+
+ if (Overflow) {
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_escape_too_large)
+ << 0;
+ return false;
+ }
+
+ if (Delimited && !EndDelimiterFound) {
+ if (Diags) {
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_expected)
+ << tok::r_brace;
+ }
+ return false;
+ }
+
// If we didn't consume the proper number of digits, there is a problem.
- if (UcnLenSave) {
+ if (Count == 0 || (!Delimited && Count != UcnLen)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
- diag::err_ucn_escape_incomplete);
+ Delimited ? diag::err_delimited_escape_empty
+ : diag::err_ucn_escape_incomplete);
return false;
}
+ if (HasError)
+ return false;
+
// Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
UcnVal > 0x10FFFF) { // maximum legal UTF32 value
@@ -338,6 +495,10 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
diag::warn_ucn_not_valid_in_c89_literal);
+ if (Delimited && Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::ext_delimited_escape_sequence);
+
return true;
}
@@ -532,12 +693,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
: SM(SM), LangOpts(LangOpts), Diags(Diags),
ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
- // This routine assumes that the range begin/end matches the regex for integer
- // and FP constants (specifically, the 'pp-number' regex), and assumes that
- // the byte at "*end" is both valid and not part of the regex. Because of
- // this, it doesn't have to check for 'overscan' in various places.
- assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
-
s = DigitsBegin = ThisTokBegin;
saw_exponent = false;
saw_period = false;
@@ -557,6 +712,16 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isAccum = false;
hadError = false;
+ // This routine assumes that the range begin/end matches the regex for integer
+ // and FP constants (specifically, the 'pp-number' regex), and assumes that
+ // the byte at "*end" is both valid and not part of the regex. Because of
+ // this, it doesn't have to check for 'overscan' in various places.
+ if (isPreprocessingNumberBody(*ThisTokEnd)) {
+ Diags.Report(TokLoc, diag::err_lexing_numeric);
+ hadError = true;
+ return;
+ }
+
if (*s == '0') { // parse radix
ParseNumberStartingWithZero(TokLoc);
if (hadError)
@@ -1081,7 +1246,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
llvm::SmallString<16> Buffer;
StringRef Str(ThisTokBegin, n);
- if (Str.find('\'') != StringRef::npos) {
+ if (Str.contains('\'')) {
Buffer.reserve(n);
std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
&isDigitSeparator);
@@ -1196,7 +1361,7 @@ bool NumericLiteralParser::GetFixedPointValue(llvm::APInt &StoreVal, unsigned Sc
Val *= Base;
}
} else if (BaseShift < 0) {
- for (int64_t i = BaseShift; i < 0 && !Val.isNullValue(); ++i)
+ for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
Val = Val.udiv(Base);
}
@@ -1271,7 +1436,12 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
++begin;
// Skip over the entry quote.
- assert(begin[0] == '\'' && "Invalid token lexed");
+ if (begin[0] != '\'') {
+ PP.Diag(Loc, diag::err_lexing_char);
+ HadError = true;
+ return;
+ }
+
++begin;
// Remove an optional ud-suffix.
@@ -1390,14 +1560,14 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
if (NumCharsSoFar > 1) {
- if (isWide())
- PP.Diag(Loc, diag::warn_extraneous_char_constant);
- else if (isAscii() && NumCharsSoFar == 4)
+ if (isAscii() && NumCharsSoFar == 4)
PP.Diag(Loc, diag::warn_four_char_character_literal);
else if (isAscii())
PP.Diag(Loc, diag::warn_multichar_character_literal);
- else
- PP.Diag(Loc, diag::err_multichar_utf_character_literal);
+ else {
+ PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
+ HadError = true;
+ }
IsMultiChar = true;
} else {
IsMultiChar = false;
@@ -1493,9 +1663,9 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
///
StringLiteralParser::
StringLiteralParser(ArrayRef<Token> StringToks,
- Preprocessor &PP, bool Complain)
+ Preprocessor &PP)
: SM(PP.getSourceManager()), Features(PP.getLangOpts()),
- Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
+ Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
init(StringToks);
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index f9af7c2a24fb..9fa170410da3 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -167,8 +167,8 @@ static void appendSubframeworkPaths(Module *Mod,
return;
// Add Frameworks/Name.framework for each subframework.
- for (unsigned I = Paths.size() - 1; I != 0; --I)
- llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework");
+ for (StringRef Framework : llvm::drop_begin(llvm::reverse(Paths)))
+ llvm::sys::path::append(Path, "Frameworks", Framework + ".framework");
}
Optional<FileEntryRef> ModuleMap::findHeader(
@@ -338,7 +338,7 @@ static StringRef sanitizeFilenameAsIdentifier(StringRef Name,
if (Name.empty())
return Name;
- if (!isValidIdentifier(Name)) {
+ if (!isValidAsciiIdentifier(Name)) {
// If we don't already have something with the form of an identifier,
// create a buffer with the sanitized name.
Buffer.clear();
@@ -346,7 +346,7 @@ static StringRef sanitizeFilenameAsIdentifier(StringRef Name,
Buffer.push_back('_');
Buffer.reserve(Buffer.size() + Name.size());
for (unsigned I = 0, N = Name.size(); I != N; ++I) {
- if (isIdentifierBody(Name[I]))
+ if (isAsciiIdentifierContinue(Name[I]))
Buffer.push_back(Name[I]);
else
Buffer.push_back('_');
@@ -618,18 +618,18 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) {
// the actual header is located.
bool Explicit = UmbrellaModule->InferExplicitSubmodules;
- for (unsigned I = SkippedDirs.size(); I != 0; --I) {
+ for (const DirectoryEntry *SkippedDir : llvm::reverse(SkippedDirs)) {
// Find or create the module that corresponds to this directory name.
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
- llvm::sys::path::stem(SkippedDirs[I-1]->getName()), NameBuf);
+ llvm::sys::path::stem(SkippedDir->getName()), NameBuf);
Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
Explicit).first;
InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
Result->IsInferred = true;
// Associate the module and the directory.
- UmbrellaDirs[SkippedDirs[I-1]] = Result;
+ UmbrellaDirs[SkippedDir] = Result;
// If inferred submodules export everything they import, add a
// wildcard to the set of exports.
@@ -745,12 +745,11 @@ ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header,
UmbrellaModule = UmbrellaModule->Parent;
if (UmbrellaModule->InferSubmodules) {
- for (unsigned I = SkippedDirs.size(); I != 0; --I) {
+ for (const DirectoryEntry *SkippedDir : llvm::reverse(SkippedDirs)) {
// Find or create the module that corresponds to this directory name.
SmallString<32> NameBuf;
StringRef Name = sanitizeFilenameAsIdentifier(
- llvm::sys::path::stem(SkippedDirs[I-1]->getName()),
- NameBuf);
+ llvm::sys::path::stem(SkippedDir->getName()), NameBuf);
Found = lookupModuleQualified(Name, Found);
if (!Found)
return false;
@@ -989,9 +988,8 @@ Module *ModuleMap::inferFrameworkModule(const DirectoryEntry *FrameworkDir,
// We're allowed to infer for this directory, but make sure it's okay
// to infer this particular module.
StringRef Name = llvm::sys::path::stem(FrameworkDirName);
- canInfer = std::find(inferred->second.ExcludedModules.begin(),
- inferred->second.ExcludedModules.end(),
- Name) == inferred->second.ExcludedModules.end();
+ canInfer =
+ !llvm::is_contained(inferred->second.ExcludedModules, Name);
Attrs.IsSystem |= inferred->second.Attrs.IsSystem;
Attrs.IsExternC |= inferred->second.Attrs.IsExternC;
@@ -1218,9 +1216,8 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
// FIXME: Should we diagnose if a header is listed twice in the
// same module definition?
auto &HeaderList = Headers[Header.Entry];
- for (auto H : HeaderList)
- if (H == KH)
- return;
+ if (llvm::is_contained(HeaderList, KH))
+ return;
HeaderList.push_back(KH);
Mod->Headers[headerRoleToKind(Role)].push_back(Header);
@@ -2174,7 +2171,7 @@ void ModuleMapParser::parseExternModuleDecl() {
}
if (auto File = SourceMgr.getFileManager().getFile(FileNameRef))
Map.parseModuleMapFile(
- *File, /*IsSystem=*/false,
+ *File, IsSystem,
Map.HeaderInfo.getHeaderSearchOpts().ModuleMapFileHomeIsCwd
? Directory
: (*File)->getDir(),
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 556dd8daf652..ef7a5351953e 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -112,7 +112,7 @@ enum PPElifDiag {
// the specified module, meaning clang won't build the specified module. This is
// useful in a number of situations, for instance, when building a library that
// vends a module map, one might want to avoid hitting intermediate build
-// products containing the the module map or avoid finding the system installed
+// products containimg the module map or avoid finding the system installed
// modulemap for that library.
static bool isForModuleBuilding(Module *M, StringRef CurrentModule,
StringRef ModuleName) {
@@ -129,7 +129,7 @@ static bool isForModuleBuilding(Module *M, StringRef CurrentModule,
static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
const LangOptions &Lang = PP.getLangOpts();
- if (II->isReserved(Lang) != ReservedIdentifierStatus::NotReserved) {
+ if (isReservedInAllContexts(II->isReserved(Lang))) {
// list from:
// - https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
// - https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
@@ -183,7 +183,7 @@ static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
const LangOptions &Lang = PP.getLangOpts();
// Do not warn on keyword undef. It is generally harmless and widely used.
- if (II->isReserved(Lang) != ReservedIdentifierStatus::NotReserved)
+ if (isReservedInAllContexts(II->isReserved(Lang)))
return MD_ReservedMacro;
return MD_NoWarn;
}
@@ -617,6 +617,10 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
// If this is in a skipping block or if we're already handled this #if
// block, don't bother parsing the condition.
if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+ // FIXME: We should probably do at least some minimal parsing of the
+ // condition to verify that it is well-formed. The current state
+ // allows #elif* directives with completely malformed (or missing)
+ // conditions.
DiscardUntilEndOfDirective();
} else {
// Restore the value of LexingRawMode so that identifiers are
@@ -656,6 +660,10 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
// If this is in a skipping block or if we're already handled this #if
// block, don't bother parsing the condition.
if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
+ // FIXME: We should probably do at least some minimal parsing of the
+ // condition to verify that it is well-formed. The current state
+ // allows #elif* directives with completely malformed (or missing)
+ // conditions.
DiscardUntilEndOfDirective();
} else {
// Restore the value of LexingRawMode so that identifiers are
@@ -674,6 +682,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
continue;
}
+ emitMacroExpansionWarnings(MacroNameTok);
+
CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
@@ -735,7 +745,7 @@ Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {
// to the current module, if there is one.
return getLangOpts().CurrentModule.empty()
? nullptr
- : HeaderInfo.lookupModule(getLangOpts().CurrentModule);
+ : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
}
const FileEntry *
@@ -1441,11 +1451,15 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) {
DiscardUntilEndOfDirective();
return;
}
- FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
// If a filename was present, read any flags that are present.
if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
return;
+
+ // Exiting to an empty string means pop to the including file, so leave
+ // FilenameID as -1 in that case.
+ if (!(IsFileExit && Literal.GetString().empty()))
+ FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
}
// Create a line note with this information.
@@ -2002,26 +2016,26 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
SourceLocation FilenameLoc = FilenameTok.getLocation();
StringRef LookupFilename = Filename;
-#ifdef _WIN32
- llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::windows;
-#else
// Normalize slashes when compiling with -fms-extensions on non-Windows. This
// is unnecessary on Windows since the filesystem there handles backslashes.
SmallString<128> NormalizedPath;
- llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::posix;
- if (LangOpts.MicrosoftExt) {
+ llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
+ if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
NormalizedPath = Filename.str();
llvm::sys::path::native(NormalizedPath);
LookupFilename = NormalizedPath;
BackslashStyle = llvm::sys::path::Style::windows;
}
-#endif
Optional<FileEntryRef> File = LookupHeaderIncludeOrImport(
CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
+ // Record the header's filename for later use.
+ if (File)
+ CurLexer->addInclude(OriginalFilename, File->getFileEntry(), FilenameLoc);
+
if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
if (File && isPCHThroughHeader(&File->getFileEntry()))
SkippingUntilPCHThroughHeader = false;
@@ -2129,12 +2143,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
IsImportDecl ||
IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
+ bool IsFirstIncludeOfFile = false;
+
// Ask HeaderInfo if we should enter this #include file. If not, #including
// this file will have no effect.
if (Action == Enter && File &&
- !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
- EnterOnce, getLangOpts().Modules,
- SuggestedModule.getModule())) {
+ !HeaderInfo.ShouldEnterIncludeFile(
+ *this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules,
+ SuggestedModule.getModule(), IsFirstIncludeOfFile)) {
// Even if we've already preprocessed this header once and know that we
// don't need to see its contents again, we still need to import it if it's
// modular because we might not have imported it from this submodule before.
@@ -2326,7 +2342,8 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
}
// If all is good, enter the new file!
- if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
+ if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
+ IsFirstIncludeOfFile))
return {ImportAction::None};
// Determine if we're switching to building a new submodule, and which one.
@@ -2521,7 +2538,7 @@ bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
// If this is already used as a parameter, it is used multiple times (e.g.
// #define X(A,A.
- if (llvm::find(Parameters, II) != Parameters.end()) { // C99 6.10.3p6
+ if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
return true;
}
@@ -2851,6 +2868,12 @@ void Preprocessor::HandleDefineDirective(
if (MacroNameTok.is(tok::eod))
return;
+ IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
+ // Issue a final pragma warning if we're defining a macro that was has been
+ // undefined and is being redefined.
+ if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
+ emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
+
// If we are supposed to keep comments in #defines, reenable comment saving
// mode.
if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
@@ -2893,6 +2916,12 @@ void Preprocessor::HandleDefineDirective(
// Finally, if this identifier already had a macro defined for it, verify that
// the macro bodies are identical, and issue diagnostics if they are not.
if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+ // Final macros are hard-mode: they always warn. Even if the bodies are
+ // identical. Even if they are in system headers. Even if they are things we
+ // would silently allow in the past.
+ if (MacroNameTok.getIdentifierInfo()->isFinal())
+ emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
+
// In Objective-C, ignore attempts to directly redefine the builtin
// definitions of the ownership qualifiers. It's still possible to
// #undef them.
@@ -2922,6 +2951,7 @@ void Preprocessor::HandleDefineDirective(
// then don't bother calling MacroInfo::isIdenticalTo.
if (!getDiagnostics().getSuppressSystemWarnings() ||
!SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
+
if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
@@ -2999,6 +3029,9 @@ void Preprocessor::HandleUndefDirective() {
auto MD = getMacroDefinition(II);
UndefMacroDirective *Undef = nullptr;
+ if (II->isFinal())
+ emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
+
// If the macro is not defined, this is a noop undef.
if (const MacroInfo *MI = MD.getMacroInfo()) {
if (!MI->isUsed() && MI->isWarnIfUnused())
@@ -3048,6 +3081,8 @@ void Preprocessor::HandleIfdefDirective(Token &Result,
return;
}
+ emitMacroExpansionWarnings(MacroNameTok);
+
// Check to see if this is the last token on the #if[n]def line.
CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index cab4bab630dc..424cccfdb9ee 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -133,6 +133,8 @@ static bool EvaluateDefined(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
DT.IncludedUndefinedIds = !Macro;
+ PP.emitMacroExpansionWarnings(PeekTok);
+
// If there is a macro, mark it used.
if (Result.Val != 0 && ValueLive)
PP.markMacroAsUsed(Macro.getMacroInfo());
@@ -660,7 +662,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
case tok::ampamp: // Logical && does not do UACs.
break; // No UAC
default:
- Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned());
+ Res.setIsUnsigned(LHS.isUnsigned() || RHS.isUnsigned());
// If this just promoted something from signed to unsigned, and if the
// value was negative, warn about it.
if (ValueLive && Res.isUnsigned()) {
@@ -820,7 +822,7 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
// Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if
// either operand is unsigned.
- Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned());
+ Res.setIsUnsigned(RHS.isUnsigned() || AfterColonVal.isUnsigned());
// Figure out the precedence of the token after the : part.
PeekPrec = getPrecedence(PeekTok.getKind());
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index b979b965f46a..f8b0a2c5f71b 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/LexDiagnostic.h"
@@ -22,6 +23,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/Path.h"
+
using namespace clang;
//===----------------------------------------------------------------------===//
@@ -37,8 +39,8 @@ bool Preprocessor::isInPrimaryFile() const {
// If there are any stacked lexers, we're in a #include.
assert(IsFileLexer(IncludeMacroStack[0]) &&
"Top level include stack isn't our primary lexer?");
- return std::none_of(
- IncludeMacroStack.begin() + 1, IncludeMacroStack.end(),
+ return llvm::none_of(
+ llvm::drop_begin(IncludeMacroStack),
[&](const IncludeStackInfo &ISI) -> bool { return IsFileLexer(ISI); });
}
@@ -65,7 +67,8 @@ PreprocessorLexer *Preprocessor::getCurrentFileLexer() const {
/// EnterSourceFile - Add a source file to the top of the include stack and
/// start lexing tokens from it instead of the current buffer.
bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
- SourceLocation Loc) {
+ SourceLocation Loc,
+ bool IsFirstIncludeOfFile) {
assert(!CurTokenLexer && "Cannot #include a file inside a macro!");
++NumEnteredSourceFiles;
@@ -89,7 +92,8 @@ bool Preprocessor::EnterSourceFile(FileID FID, const DirectoryLookup *CurDir,
CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset);
}
- EnterSourceFileWithLexer(new Lexer(FID, *InputFile, *this), CurDir);
+ EnterSourceFileWithLexer(
+ new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir);
return false;
}
@@ -299,10 +303,46 @@ void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) {
}
}
+void Preprocessor::ResolvePragmaIncludeInstead(
+ const SourceLocation Location) const {
+ assert(Location.isValid());
+ if (CurLexer == nullptr)
+ return;
+
+ if (SourceMgr.isInSystemHeader(Location))
+ return;
+
+ for (const auto &Include : CurLexer->getIncludeHistory()) {
+ StringRef Filename = Include.getKey();
+ const PreprocessorLexer::IncludeInfo &Info = Include.getValue();
+ ArrayRef<SmallString<32>> Aliases =
+ HeaderInfo.getFileInfo(Info.File).Aliases.getArrayRef();
+
+ if (Aliases.empty())
+ continue;
+
+ switch (Aliases.size()) {
+ case 1:
+ Diag(Info.Location, diag::err_pragma_include_instead_system_reserved)
+ << Filename << 0 << Aliases[0];
+ continue;
+ case 2:
+ Diag(Info.Location, diag::err_pragma_include_instead_system_reserved)
+ << Filename << 1 << Aliases[0] << Aliases[1];
+ continue;
+ default: {
+ Diag(Info.Location, diag::err_pragma_include_instead_system_reserved)
+ << Filename << 2 << ("{'" + llvm::join(Aliases, "', '") + "'}");
+ }
+ }
+ }
+}
+
/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
/// the current file. This either returns the EOF token or pops a level off
/// the include stack and keeps going.
-bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
+bool Preprocessor::HandleEndOfFile(Token &Result, SourceLocation EndLoc,
+ bool isEndOfMacro) {
assert(!CurTokenLexer &&
"Ending a file when currently in a macro!");
@@ -339,7 +379,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
CurPPLexer->MIOpt.GetDefinedMacro()) {
if (!isMacroDefined(ControllingMacro) &&
DefinedMacro != ControllingMacro &&
- HeaderInfo.FirstTimeLexingFile(FE)) {
+ CurLexer->isFirstTimeLexingFile()) {
// If the edit distance between the two macros is more than 50%,
// DefinedMacro may not be header guard, or can be header guard of
@@ -372,6 +412,9 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
}
}
+ if (EndLoc.isValid())
+ ResolvePragmaIncludeInstead(EndLoc);
+
// Complain about reaching a true EOF within arc_cf_code_audited.
// We don't want to complain about reaching the end of a macro
// instantiation or a _Pragma.
@@ -560,7 +603,7 @@ bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer);
// Handle this like a #include file being popped off the stack.
- return HandleEndOfFile(Result, true);
+ return HandleEndOfFile(Result, {}, true);
}
/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 8728ac9e2166..cfee7a3c2513 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -155,11 +155,8 @@ ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II,
// If we were the first overrider for any macro, it's no longer a leaf.
auto &LeafMacros = LeafModuleMacros[II];
if (HidAny) {
- LeafMacros.erase(std::remove_if(LeafMacros.begin(), LeafMacros.end(),
- [](ModuleMacro *MM) {
- return MM->NumOverriddenBy != 0;
- }),
- LeafMacros.end());
+ llvm::erase_if(LeafMacros,
+ [](ModuleMacro *MM) { return MM->NumOverriddenBy != 0; });
}
// The new macro is always a leaf macro.
@@ -426,7 +423,7 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI,
// If this is a function-like macro invocation, it's safe to trivially expand
// as long as the identifier is not a macro argument.
- return std::find(MI->param_begin(), MI->param_end(), II) == MI->param_end();
+ return !llvm::is_contained(MI->params(), II);
}
/// isNextPPTokenLParen - Determine whether the next preprocessor token to be
@@ -471,6 +468,8 @@ bool Preprocessor::isNextPPTokenLParen() {
/// expanded as a macro, handle it and return the next token as 'Identifier'.
bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
const MacroDefinition &M) {
+ emitMacroExpansionWarnings(Identifier);
+
MacroInfo *MI = M.getMacroInfo();
// If this is a macro expansion in the "#if !defined(x)" line for the file,
@@ -986,7 +985,11 @@ MacroArgs *Preprocessor::ReadMacroCallArgumentList(Token &MacroName,
// If the macro contains the comma pasting extension, the diagnostic
// is suppressed; we know we'll get another diagnostic later.
if (!MI->hasCommaPasting()) {
- Diag(Tok, diag::ext_missing_varargs_arg);
+ // C++20 allows this construct, but standards before C++20 and all C
+ // standards do not allow the construct (we allow it as an extension).
+ Diag(Tok, getLangOpts().CPlusPlus20
+ ? diag::warn_cxx17_compat_missing_varargs_arg
+ : diag::ext_missing_varargs_arg);
Diag(MI->getDefinitionLoc(), diag::note_macro_here)
<< MacroName.getIdentifierInfo();
}
@@ -1287,7 +1290,7 @@ static bool EvaluateHasIncludeNext(Token &Tok,
/// integer values.
static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
Token &Tok, IdentifierInfo *II,
- Preprocessor &PP,
+ Preprocessor &PP, bool ExpandArgs,
llvm::function_ref<
int(Token &Tok,
bool &HasLexedNextTok)> Op) {
@@ -1313,7 +1316,10 @@ static void EvaluateFeatureLikeBuiltinMacro(llvm::raw_svector_ostream& OS,
bool SuppressDiagnostic = false;
while (true) {
// Parse next token.
- PP.LexUnexpandedToken(Tok);
+ if (ExpandArgs)
+ PP.Lex(Tok);
+ else
+ PP.LexUnexpandedToken(Tok);
already_lexed:
switch (Tok.getKind()) {
@@ -1453,15 +1459,6 @@ static bool isTargetEnvironment(const TargetInfo &TI,
return TI.getTriple().getEnvironment() == Env.getEnvironment();
}
-static void remapMacroPath(
- SmallString<256> &Path,
- const std::map<std::string, std::string, std::greater<std::string>>
- &MacroPrefixMap) {
- for (const auto &Entry : MacroPrefixMap)
- if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second))
- break;
-}
-
/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
/// as a builtin macro, handle it and return the next token as 'Tok'.
void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
@@ -1543,7 +1540,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
} else {
FN += PLoc.getFilename();
}
- remapMacroPath(FN, PPOpts->MacroPrefixMap);
+ getLangOpts().remapPathPrefix(FN);
Lexer::Stringify(FN);
OS << '"' << FN << '"';
}
@@ -1612,21 +1609,21 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
OS << CounterValue++;
Tok.setKind(tok::numeric_constant);
} else if (II == Ident__has_feature) {
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_feature_check_malformed);
return II && HasFeature(*this, II->getName());
});
} else if (II == Ident__has_extension) {
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_feature_check_malformed);
return II && HasExtension(*this, II->getName());
});
} else if (II == Ident__has_builtin) {
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_feature_check_malformed);
@@ -1678,12 +1675,12 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
}
});
} else if (II == Ident__is_identifier) {
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[](Token &Tok, bool &HasLexedNextToken) -> int {
return Tok.is(tok::identifier);
});
} else if (II == Ident__has_attribute) {
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_feature_check_malformed);
@@ -1691,7 +1688,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
getTargetInfo(), getLangOpts()) : 0;
});
} else if (II == Ident__has_declspec) {
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_feature_check_malformed);
@@ -1707,8 +1704,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
} else if (II == Ident__has_cpp_attribute ||
II == Ident__has_c_attribute) {
bool IsCXX = II == Ident__has_cpp_attribute;
- EvaluateFeatureLikeBuiltinMacro(
- OS, Tok, II, *this, [&](Token &Tok, bool &HasLexedNextToken) -> int {
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, true,
+ [&](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *ScopeII = nullptr;
IdentifierInfo *II = ExpectFeatureIdentifierInfo(
Tok, *this, diag::err_feature_check_malformed);
@@ -1722,7 +1719,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
HasLexedNextToken = true;
else {
ScopeII = II;
- LexUnexpandedToken(Tok);
+ // Lex an expanded token for the attribute name.
+ Lex(Tok);
II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_feature_check_malformed);
}
@@ -1749,7 +1747,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
Tok.setKind(tok::numeric_constant);
} else if (II == Ident__has_warning) {
// The argument should be a parenthesized string literal.
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
std::string WarningName;
SourceLocation StrStartLoc = Tok.getLocation();
@@ -1780,7 +1778,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
// The argument to this builtin should be an identifier. The
// builtin evaluates to 1 when that identifier names the module we are
// currently building.
- EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this,
+ EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false,
[this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(Tok, *this,
diag::err_expected_id_building_module);
@@ -1840,28 +1838,32 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
return;
} else if (II == Ident__is_target_arch) {
EvaluateFeatureLikeBuiltinMacro(
- OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ OS, Tok, II, *this, false,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(
Tok, *this, diag::err_feature_check_malformed);
return II && isTargetArch(getTargetInfo(), II);
});
} else if (II == Ident__is_target_vendor) {
EvaluateFeatureLikeBuiltinMacro(
- OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ OS, Tok, II, *this, false,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(
Tok, *this, diag::err_feature_check_malformed);
return II && isTargetVendor(getTargetInfo(), II);
});
} else if (II == Ident__is_target_os) {
EvaluateFeatureLikeBuiltinMacro(
- OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ OS, Tok, II, *this, false,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(
Tok, *this, diag::err_feature_check_malformed);
return II && isTargetOS(getTargetInfo(), II);
});
} else if (II == Ident__is_target_environment) {
EvaluateFeatureLikeBuiltinMacro(
- OS, Tok, II, *this, [this](Token &Tok, bool &HasLexedNextToken) -> int {
+ OS, Tok, II, *this, false,
+ [this](Token &Tok, bool &HasLexedNextToken) -> int {
IdentifierInfo *II = ExpectFeatureIdentifierInfo(
Tok, *this, diag::err_feature_check_malformed);
return II && isTargetEnvironment(getTargetInfo(), II);
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index c89061ba6d02..67daa5841983 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "clang/Lex/Pragma.h"
+#include "clang/Basic/CLWarnings.h"
#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticLex.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
@@ -35,11 +37,12 @@
#include "clang/Lex/TokenLexer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
@@ -495,43 +498,88 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
SrcMgr::C_System);
}
-/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
-void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
+static llvm::Optional<Token> LexHeader(Preprocessor &PP,
+ Optional<FileEntryRef> &File,
+ bool SuppressIncludeNotFoundError) {
Token FilenameTok;
- if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false))
- return;
+ if (PP.LexHeaderName(FilenameTok, /*AllowConcatenation*/ false))
+ return llvm::None;
// If the next token wasn't a header-name, diagnose the error.
if (FilenameTok.isNot(tok::header_name)) {
- Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
- return;
+ PP.Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
+ return llvm::None;
}
// Reserve a buffer to get the spelling.
SmallString<128> FilenameBuffer;
bool Invalid = false;
- StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid);
+ StringRef Filename = PP.getSpelling(FilenameTok, FilenameBuffer, &Invalid);
if (Invalid)
- return;
+ return llvm::None;
bool isAngled =
- GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
+ PP.GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
// If GetIncludeFilenameSpelling set the start ptr to null, there was an
// error.
if (Filename.empty())
- return;
+ return llvm::None;
// Search include directories for this file.
const DirectoryLookup *CurDir;
- Optional<FileEntryRef> File =
- LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr,
- nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, nullptr);
+ File = PP.LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr,
+ nullptr, CurDir, nullptr, nullptr, nullptr, nullptr,
+ nullptr);
if (!File) {
if (!SuppressIncludeNotFoundError)
- Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+ PP.Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
+ return llvm::None;
+ }
+
+ return FilenameTok;
+}
+
+/// HandlePragmaIncludeInstead - Handle \#pragma clang include_instead(header).
+void Preprocessor::HandlePragmaIncludeInstead(Token &Tok) {
+ // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc.
+ PreprocessorLexer *TheLexer = getCurrentFileLexer();
+
+ if (!SourceMgr.isInSystemHeader(Tok.getLocation())) {
+ Diag(Tok, diag::err_pragma_include_instead_not_sysheader);
+ return;
+ }
+
+ Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ Diag(Tok, diag::err_expected) << "(";
+ return;
+ }
+
+ Optional<FileEntryRef> File;
+ llvm::Optional<Token> FilenameTok =
+ LexHeader(*this, File, SuppressIncludeNotFoundError);
+ if (!FilenameTok)
+ return;
+
+ Lex(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ Diag(Tok, diag::err_expected) << ")";
return;
}
+ SmallString<128> FilenameBuffer;
+ StringRef Filename = getSpelling(*FilenameTok, FilenameBuffer);
+ HeaderInfo.AddFileAlias(TheLexer->getFileEntry(), Filename);
+}
+
+/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
+void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
+ Optional<FileEntryRef> File;
+ llvm::Optional<Token> FilenameTok =
+ LexHeader(*this, File, SuppressIncludeNotFoundError);
+ if (!FilenameTok)
+ return;
+
const FileEntry *CurFile = getCurrentFileLexer()->getFileEntry();
// If this file is older than the file it depends on, emit a diagnostic.
@@ -547,7 +595,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
// Remove the trailing ' ' if present.
if (!Message.empty())
Message.erase(Message.end()-1);
- Diag(FilenameTok, diag::pp_out_of_date_dependency) << Message;
+ Diag(*FilenameTok, diag::pp_out_of_date_dependency) << Message;
}
}
@@ -1022,6 +1070,18 @@ struct PragmaSystemHeaderHandler : public PragmaHandler {
}
};
+/// PragmaIncludeInsteadHandler - "\#pragma clang include_instead(header)" marks
+/// the current file as non-includable if the including header is not a system
+/// header.
+struct PragmaIncludeInsteadHandler : public PragmaHandler {
+ PragmaIncludeInsteadHandler() : PragmaHandler("include_instead") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &IIToken) override {
+ PP.HandlePragmaIncludeInstead(IIToken);
+ }
+};
+
struct PragmaDependencyHandler : public PragmaHandler {
PragmaDependencyHandler() : PragmaHandler("dependency") {}
@@ -1354,12 +1414,15 @@ struct PragmaWarningHandler : public PragmaHandler {
return;
}
}
+ PP.getDiagnostics().pushMappings(DiagLoc);
if (Callbacks)
Callbacks->PragmaWarningPush(DiagLoc, Level);
} else if (II && II->isStr("pop")) {
// #pragma warning( pop )
PP.Lex(Tok);
- if (Callbacks)
+ if (!PP.getDiagnostics().popMappings(DiagLoc))
+ PP.Diag(Tok, diag::warn_pragma_diagnostic_cannot_pop);
+ else if (Callbacks)
Callbacks->PragmaWarningPop(DiagLoc);
} else {
// #pragma warning( warning-specifier : warning-number-list
@@ -1373,14 +1436,19 @@ struct PragmaWarningHandler : public PragmaHandler {
// Figure out which warning specifier this is.
bool SpecifierValid;
- StringRef Specifier;
- llvm::SmallString<1> SpecifierBuf;
+ PPCallbacks::PragmaWarningSpecifier Specifier;
if (II) {
- Specifier = II->getName();
- SpecifierValid = llvm::StringSwitch<bool>(Specifier)
- .Cases("default", "disable", "error", "once",
- "suppress", true)
- .Default(false);
+ int SpecifierInt = llvm::StringSwitch<int>(II->getName())
+ .Case("default", PPCallbacks::PWS_Default)
+ .Case("disable", PPCallbacks::PWS_Disable)
+ .Case("error", PPCallbacks::PWS_Error)
+ .Case("once", PPCallbacks::PWS_Once)
+ .Case("suppress", PPCallbacks::PWS_Suppress)
+ .Default(-1);
+ if ((SpecifierValid = SpecifierInt != -1))
+ Specifier =
+ static_cast<PPCallbacks::PragmaWarningSpecifier>(SpecifierInt);
+
// If we read a correct specifier, snatch next token (that should be
// ":", checked later).
if (SpecifierValid)
@@ -1388,9 +1456,10 @@ struct PragmaWarningHandler : public PragmaHandler {
} else {
// Token is a numeric constant. It should be either 1, 2, 3 or 4.
uint64_t Value;
- Specifier = PP.getSpelling(Tok, SpecifierBuf);
if (PP.parseSimpleIntegerLiteral(Tok, Value)) {
- SpecifierValid = (Value >= 1) && (Value <= 4);
+ if ((SpecifierValid = (Value >= 1) && (Value <= 4)))
+ Specifier = static_cast<PPCallbacks::PragmaWarningSpecifier>(
+ PPCallbacks::PWS_Level1 + Value - 1);
} else
SpecifierValid = false;
// Next token already snatched by parseSimpleIntegerLiteral.
@@ -1417,6 +1486,22 @@ struct PragmaWarningHandler : public PragmaHandler {
}
Ids.push_back(int(Value));
}
+
+ // Only act on disable for now.
+ diag::Severity SV = diag::Severity();
+ if (Specifier == PPCallbacks::PWS_Disable)
+ SV = diag::Severity::Ignored;
+ if (SV != diag::Severity())
+ for (int Id : Ids) {
+ if (auto Group = diagGroupFromCLWarningID(Id)) {
+ bool unknownDiag = PP.getDiagnostics().setSeverityForGroup(
+ diag::Flavor::WarningOrError, *Group, SV, DiagLoc);
+ assert(!unknownDiag &&
+ "wd table should only contain known diags");
+ (void)unknownDiag;
+ }
+ }
+
if (Callbacks)
Callbacks->PragmaWarning(DiagLoc, Specifier, Ids);
@@ -1667,7 +1752,7 @@ struct PragmaModuleBeginHandler : public PragmaHandler {
// Find the module we're entering. We require that a module map for it
// be loaded or implicitly loadable.
auto &HSI = PP.getHeaderSearchInfo();
- Module *M = HSI.lookupModule(Current);
+ Module *M = HSI.lookupModule(Current, ModuleName.front().second);
if (!M) {
PP.Diag(ModuleName.front().second,
diag::err_pp_module_begin_no_module_map) << Current;
@@ -1911,6 +1996,130 @@ struct PragmaRegionHandler : public PragmaHandler {
}
};
+/// This handles parsing pragmas that take a macro name and optional message
+static IdentifierInfo *HandleMacroAnnotationPragma(Preprocessor &PP, Token &Tok,
+ const char *Pragma,
+ std::string &MessageString) {
+ std::string Macro;
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ PP.Diag(Tok, diag::err_expected) << "(";
+ return nullptr;
+ }
+
+ PP.LexUnexpandedToken(Tok);
+ if (!Tok.is(tok::identifier)) {
+ PP.Diag(Tok, diag::err_expected) << tok::identifier;
+ return nullptr;
+ }
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+
+ if (!II->hasMacroDefinition()) {
+ PP.Diag(Tok, diag::err_pp_visibility_non_macro) << II;
+ return nullptr;
+ }
+
+ PP.Lex(Tok);
+ if (Tok.is(tok::comma)) {
+ PP.Lex(Tok);
+ if (!PP.FinishLexStringLiteral(Tok, MessageString, Pragma,
+ /*AllowMacroExpansion=*/true))
+ return nullptr;
+ }
+
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok, diag::err_expected) << ")";
+ return nullptr;
+ }
+ return II;
+}
+
+/// "\#pragma clang deprecated(...)"
+///
+/// The syntax is
+/// \code
+/// #pragma clang deprecate(MACRO_NAME [, Message])
+/// \endcode
+struct PragmaDeprecatedHandler : public PragmaHandler {
+ PragmaDeprecatedHandler() : PragmaHandler("deprecated") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ std::string MessageString;
+
+ if (IdentifierInfo *II = HandleMacroAnnotationPragma(
+ PP, Tok, "#pragma clang deprecated", MessageString)) {
+ II->setIsDeprecatedMacro(true);
+ PP.addMacroDeprecationMsg(II, std::move(MessageString),
+ Tok.getLocation());
+ }
+ }
+};
+
+/// "\#pragma clang restrict_expansion(...)"
+///
+/// The syntax is
+/// \code
+/// #pragma clang restrict_expansion(MACRO_NAME [, Message])
+/// \endcode
+struct PragmaRestrictExpansionHandler : public PragmaHandler {
+ PragmaRestrictExpansionHandler() : PragmaHandler("restrict_expansion") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ std::string MessageString;
+
+ if (IdentifierInfo *II = HandleMacroAnnotationPragma(
+ PP, Tok, "#pragma clang restrict_expansion", MessageString)) {
+ II->setIsRestrictExpansion(true);
+ PP.addRestrictExpansionMsg(II, std::move(MessageString),
+ Tok.getLocation());
+ }
+ }
+};
+
+/// "\#pragma clang final(...)"
+///
+/// The syntax is
+/// \code
+/// #pragma clang final(MACRO_NAME)
+/// \endcode
+struct PragmaFinalHandler : public PragmaHandler {
+ PragmaFinalHandler() : PragmaHandler("final") {}
+
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &Tok) override {
+ std::string Macro;
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ PP.Diag(Tok, diag::err_expected) << "(";
+ return;
+ }
+
+ PP.LexUnexpandedToken(Tok);
+ if (!Tok.is(tok::identifier)) {
+ PP.Diag(Tok, diag::err_expected) << tok::identifier;
+ return;
+ }
+ IdentifierInfo *II = Tok.getIdentifierInfo();
+
+ if (!II->hasMacroDefinition()) {
+ PP.Diag(Tok, diag::err_pp_visibility_non_macro) << II;
+ return;
+ }
+
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok, diag::err_expected) << ")";
+ return;
+ }
+ II->setIsFinal(true);
+ PP.addFinalLoc(II, Tok.getLocation());
+ }
+};
+
} // namespace
/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas:
@@ -1934,11 +2143,15 @@ void Preprocessor::RegisterBuiltinPragmas() {
// #pragma clang ...
AddPragmaHandler("clang", new PragmaPoisonHandler());
AddPragmaHandler("clang", new PragmaSystemHeaderHandler());
+ AddPragmaHandler("clang", new PragmaIncludeInsteadHandler());
AddPragmaHandler("clang", new PragmaDebugHandler());
AddPragmaHandler("clang", new PragmaDependencyHandler());
AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang"));
AddPragmaHandler("clang", new PragmaARCCFCodeAuditedHandler());
AddPragmaHandler("clang", new PragmaAssumeNonNullHandler());
+ AddPragmaHandler("clang", new PragmaDeprecatedHandler());
+ AddPragmaHandler("clang", new PragmaRestrictExpansionHandler());
+ AddPragmaHandler("clang", new PragmaFinalHandler());
// #pragma clang module ...
auto *ModuleHandler = new PragmaNamespace("module");
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 32ea8791d29a..b026ae36fc0f 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -723,11 +723,7 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
// is cleaned to tok::identifier "B". After cleaning the token's length is
// still 3 and the SourceLocation refers to the location of the backslash.
Identifier.setIdentifierInfo(II);
- if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() &&
- getSourceManager().isInSystemHeader(Identifier.getLocation()))
- Identifier.setKind(tok::identifier);
- else
- Identifier.setKind(II->getTokenID());
+ Identifier.setKind(II->getTokenID());
return II;
}
@@ -1387,7 +1383,7 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
void Preprocessor::addCommentHandler(CommentHandler *Handler) {
assert(Handler && "NULL comment handler");
- assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
+ assert(!llvm::is_contained(CommentHandlers, Handler) &&
"Comment handler already registered");
CommentHandlers.push_back(Handler);
}
@@ -1413,6 +1409,48 @@ bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
return true;
}
+void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
+ const MacroAnnotations &A =
+ getMacroAnnotations(Identifier.getIdentifierInfo());
+ assert(A.DeprecationInfo &&
+ "Macro deprecation warning without recorded annotation!");
+ const MacroAnnotationInfo &Info = *A.DeprecationInfo;
+ if (Info.Message.empty())
+ Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
+ << Identifier.getIdentifierInfo() << 0;
+ else
+ Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
+ << Identifier.getIdentifierInfo() << 1 << Info.Message;
+ Diag(Info.Location, diag::note_pp_macro_annotation) << 0;
+}
+
+void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
+ const MacroAnnotations &A =
+ getMacroAnnotations(Identifier.getIdentifierInfo());
+ assert(A.RestrictExpansionInfo &&
+ "Macro restricted expansion warning without recorded annotation!");
+ const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
+ if (Info.Message.empty())
+ Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
+ << Identifier.getIdentifierInfo() << 0;
+ else
+ Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
+ << Identifier.getIdentifierInfo() << 1 << Info.Message;
+ Diag(Info.Location, diag::note_pp_macro_annotation) << 1;
+}
+
+void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
+ bool IsUndef) const {
+ const MacroAnnotations &A =
+ getMacroAnnotations(Identifier.getIdentifierInfo());
+ assert(A.FinalAnnotationLoc &&
+ "Final macro warning without recorded annotation!");
+
+ Diag(Identifier, diag::warn_pragma_final_macro)
+ << Identifier.getIdentifierInfo() << (IsUndef ? 0 : 1);
+ Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << 2;
+}
+
ModuleLoader::~ModuleLoader() = default;
CommentHandler::~CommentHandler() = default;
diff --git a/clang/lib/Lex/UnicodeCharSets.h b/clang/lib/Lex/UnicodeCharSets.h
index 74dd57fdf118..e79a85bc72b7 100644
--- a/clang/lib/Lex/UnicodeCharSets.h
+++ b/clang/lib/Lex/UnicodeCharSets.h
@@ -10,6 +10,355 @@
#include "llvm/Support/UnicodeCharRanges.h"
+// Unicode 14 XID_Start
+static const llvm::sys::UnicodeCharRange XIDStartRanges[] = {
+ {0x0041, 0x005A}, {0x0061, 0x007A}, {0x00AA, 0x00AA},
+ {0x00B5, 0x00B5}, {0x00BA, 0x00BA}, {0x00C0, 0x00D6},
+ {0x00D8, 0x00F6}, {0x00F8, 0x02C1}, {0x02C6, 0x02D1},
+ {0x02E0, 0x02E4}, {0x02EC, 0x02EC}, {0x02EE, 0x02EE},
+ {0x0370, 0x0374}, {0x0376, 0x0377}, {0x037B, 0x037D},
+ {0x037F, 0x037F}, {0x0386, 0x0386}, {0x0388, 0x038A},
+ {0x038C, 0x038C}, {0x038E, 0x03A1}, {0x03A3, 0x03F5},
+ {0x03F7, 0x0481}, {0x048A, 0x052F}, {0x0531, 0x0556},
+ {0x0559, 0x0559}, {0x0560, 0x0588}, {0x05D0, 0x05EA},
+ {0x05EF, 0x05F2}, {0x0620, 0x064A}, {0x066E, 0x066F},
+ {0x0671, 0x06D3}, {0x06D5, 0x06D5}, {0x06E5, 0x06E6},
+ {0x06EE, 0x06EF}, {0x06FA, 0x06FC}, {0x06FF, 0x06FF},
+ {0x0710, 0x0710}, {0x0712, 0x072F}, {0x074D, 0x07A5},
+ {0x07B1, 0x07B1}, {0x07CA, 0x07EA}, {0x07F4, 0x07F5},
+ {0x07FA, 0x07FA}, {0x0800, 0x0815}, {0x081A, 0x081A},
+ {0x0824, 0x0824}, {0x0828, 0x0828}, {0x0840, 0x0858},
+ {0x0860, 0x086A}, {0x0870, 0x0887}, {0x0889, 0x088E},
+ {0x08A0, 0x08C9}, {0x0904, 0x0939}, {0x093D, 0x093D},
+ {0x0950, 0x0950}, {0x0958, 0x0961}, {0x0971, 0x0980},
+ {0x0985, 0x098C}, {0x098F, 0x0990}, {0x0993, 0x09A8},
+ {0x09AA, 0x09B0}, {0x09B2, 0x09B2}, {0x09B6, 0x09B9},
+ {0x09BD, 0x09BD}, {0x09CE, 0x09CE}, {0x09DC, 0x09DD},
+ {0x09DF, 0x09E1}, {0x09F0, 0x09F1}, {0x09FC, 0x09FC},
+ {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
+ {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
+ {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E},
+ {0x0A72, 0x0A74}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
+ {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
+ {0x0AB5, 0x0AB9}, {0x0ABD, 0x0ABD}, {0x0AD0, 0x0AD0},
+ {0x0AE0, 0x0AE1}, {0x0AF9, 0x0AF9}, {0x0B05, 0x0B0C},
+ {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30},
+ {0x0B32, 0x0B33}, {0x0B35, 0x0B39}, {0x0B3D, 0x0B3D},
+ {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B71, 0x0B71},
+ {0x0B83, 0x0B83}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90},
+ {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9C, 0x0B9C},
+ {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA},
+ {0x0BAE, 0x0BB9}, {0x0BD0, 0x0BD0}, {0x0C05, 0x0C0C},
+ {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
+ {0x0C3D, 0x0C3D}, {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D},
+ {0x0C60, 0x0C61}, {0x0C80, 0x0C80}, {0x0C85, 0x0C8C},
+ {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3},
+ {0x0CB5, 0x0CB9}, {0x0CBD, 0x0CBD}, {0x0CDD, 0x0CDE},
+ {0x0CE0, 0x0CE1}, {0x0CF1, 0x0CF2}, {0x0D04, 0x0D0C},
+ {0x0D0E, 0x0D10}, {0x0D12, 0x0D3A}, {0x0D3D, 0x0D3D},
+ {0x0D4E, 0x0D4E}, {0x0D54, 0x0D56}, {0x0D5F, 0x0D61},
+ {0x0D7A, 0x0D7F}, {0x0D85, 0x0D96}, {0x0D9A, 0x0DB1},
+ {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD}, {0x0DC0, 0x0DC6},
+ {0x0E01, 0x0E30}, {0x0E32, 0x0E32}, {0x0E40, 0x0E46},
+ {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
+ {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EB0},
+ {0x0EB2, 0x0EB2}, {0x0EBD, 0x0EBD}, {0x0EC0, 0x0EC4},
+ {0x0EC6, 0x0EC6}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F00},
+ {0x0F40, 0x0F47}, {0x0F49, 0x0F6C}, {0x0F88, 0x0F8C},
+ {0x1000, 0x102A}, {0x103F, 0x103F}, {0x1050, 0x1055},
+ {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066},
+ {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E},
+ {0x10A0, 0x10C5}, {0x10C7, 0x10C7}, {0x10CD, 0x10CD},
+ {0x10D0, 0x10FA}, {0x10FC, 0x1248}, {0x124A, 0x124D},
+ {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D},
+ {0x1260, 0x1288}, {0x128A, 0x128D}, {0x1290, 0x12B0},
+ {0x12B2, 0x12B5}, {0x12B8, 0x12BE}, {0x12C0, 0x12C0},
+ {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310},
+ {0x1312, 0x1315}, {0x1318, 0x135A}, {0x1380, 0x138F},
+ {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C},
+ {0x166F, 0x167F}, {0x1681, 0x169A}, {0x16A0, 0x16EA},
+ {0x16EE, 0x16F8}, {0x1700, 0x1711}, {0x171F, 0x1731},
+ {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770},
+ {0x1780, 0x17B3}, {0x17D7, 0x17D7}, {0x17DC, 0x17DC},
+ {0x1820, 0x1878}, {0x1880, 0x18A8}, {0x18AA, 0x18AA},
+ {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D},
+ {0x1970, 0x1974}, {0x1980, 0x19AB}, {0x19B0, 0x19C9},
+ {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7},
+ {0x1B05, 0x1B33}, {0x1B45, 0x1B4C}, {0x1B83, 0x1BA0},
+ {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23},
+ {0x1C4D, 0x1C4F}, {0x1C5A, 0x1C7D}, {0x1C80, 0x1C88},
+ {0x1C90, 0x1CBA}, {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC},
+ {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA},
+ {0x1D00, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D},
+ {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57},
+ {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D},
+ {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
+ {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC},
+ {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC},
+ {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2071, 0x2071},
+ {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102},
+ {0x2107, 0x2107}, {0x210A, 0x2113}, {0x2115, 0x2115},
+ {0x2118, 0x211D}, {0x2124, 0x2124}, {0x2126, 0x2126},
+ {0x2128, 0x2128}, {0x212A, 0x2139}, {0x213C, 0x213F},
+ {0x2145, 0x2149}, {0x214E, 0x214E}, {0x2160, 0x2188},
+ {0x2C00, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3},
+ {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D},
+ {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F}, {0x2D80, 0x2D96},
+ {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6},
+ {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE},
+ {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x3005, 0x3007},
+ {0x3021, 0x3029}, {0x3031, 0x3035}, {0x3038, 0x303C},
+ {0x3041, 0x3096}, {0x309D, 0x309F}, {0x30A1, 0x30FA},
+ {0x30FC, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E},
+ {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF},
+ {0x4E00, 0xA48C}, {0xA4D0, 0xA4FD}, {0xA500, 0xA60C},
+ {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66E},
+ {0xA67F, 0xA69D}, {0xA6A0, 0xA6EF}, {0xA717, 0xA71F},
+ {0xA722, 0xA788}, {0xA78B, 0xA7CA}, {0xA7D0, 0xA7D1},
+ {0xA7D3, 0xA7D3}, {0xA7D5, 0xA7D9}, {0xA7F2, 0xA801},
+ {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822},
+ {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7},
+ {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE}, {0xA90A, 0xA925},
+ {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2},
+ {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF},
+ {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42},
+ {0xAA44, 0xAA4B}, {0xAA60, 0xAA76}, {0xAA7A, 0xAA7A},
+ {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6},
+ {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2},
+ {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4},
+ {0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16},
+ {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A},
+ {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3},
+ {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, {0xF900, 0xFA6D},
+ {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17},
+ {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36},
+ {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41},
+ {0xFB43, 0xFB44}, {0xFB46, 0xFBB1}, {0xFBD3, 0xFC5D},
+ {0xFC64, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7},
+ {0xFDF0, 0xFDF9}, {0xFE71, 0xFE71}, {0xFE73, 0xFE73},
+ {0xFE77, 0xFE77}, {0xFE79, 0xFE79}, {0xFE7B, 0xFE7B},
+ {0xFE7D, 0xFE7D}, {0xFE7F, 0xFEFC}, {0xFF21, 0xFF3A},
+ {0xFF41, 0xFF5A}, {0xFF66, 0xFF9D}, {0xFFA0, 0xFFBE},
+ {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7},
+ {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026},
+ {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D},
+ {0x10050, 0x1005D}, {0x10080, 0x100FA}, {0x10140, 0x10174},
+ {0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F},
+ {0x1032D, 0x1034A}, {0x10350, 0x10375}, {0x10380, 0x1039D},
+ {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x103D1, 0x103D5},
+ {0x10400, 0x1049D}, {0x104B0, 0x104D3}, {0x104D8, 0x104FB},
+ {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10570, 0x1057A},
+ {0x1057C, 0x1058A}, {0x1058C, 0x10592}, {0x10594, 0x10595},
+ {0x10597, 0x105A1}, {0x105A3, 0x105B1}, {0x105B3, 0x105B9},
+ {0x105BB, 0x105BC}, {0x10600, 0x10736}, {0x10740, 0x10755},
+ {0x10760, 0x10767}, {0x10780, 0x10785}, {0x10787, 0x107B0},
+ {0x107B2, 0x107BA}, {0x10800, 0x10805}, {0x10808, 0x10808},
+ {0x1080A, 0x10835}, {0x10837, 0x10838}, {0x1083C, 0x1083C},
+ {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E},
+ {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x10900, 0x10915},
+ {0x10920, 0x10939}, {0x10980, 0x109B7}, {0x109BE, 0x109BF},
+ {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17},
+ {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, {0x10A80, 0x10A9C},
+ {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35},
+ {0x10B40, 0x10B55}, {0x10B60, 0x10B72}, {0x10B80, 0x10B91},
+ {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2},
+ {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1},
+ {0x10F00, 0x10F1C}, {0x10F27, 0x10F27}, {0x10F30, 0x10F45},
+ {0x10F70, 0x10F81}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6},
+ {0x11003, 0x11037}, {0x11071, 0x11072}, {0x11075, 0x11075},
+ {0x11083, 0x110AF}, {0x110D0, 0x110E8}, {0x11103, 0x11126},
+ {0x11144, 0x11144}, {0x11147, 0x11147}, {0x11150, 0x11172},
+ {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4},
+ {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211},
+ {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288},
+ {0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8},
+ {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
+ {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
+ {0x11335, 0x11339}, {0x1133D, 0x1133D}, {0x11350, 0x11350},
+ {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A},
+ {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5},
+ {0x114C7, 0x114C7}, {0x11580, 0x115AE}, {0x115D8, 0x115DB},
+ {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA},
+ {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11740, 0x11746},
+ {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906},
+ {0x11909, 0x11909}, {0x1190C, 0x11913}, {0x11915, 0x11916},
+ {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941},
+ {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1},
+ {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, {0x11A0B, 0x11A32},
+ {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89},
+ {0x11A9D, 0x11A9D}, {0x11AB0, 0x11AF8}, {0x11C00, 0x11C08},
+ {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F},
+ {0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30},
+ {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68},
+ {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2},
+ {0x11FB0, 0x11FB0}, {0x12000, 0x12399}, {0x12400, 0x1246E},
+ {0x12480, 0x12543}, {0x12F90, 0x12FF0}, {0x13000, 0x1342E},
+ {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E},
+ {0x16A70, 0x16ABE}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F},
+ {0x16B40, 0x16B43}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F},
+ {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50},
+ {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3},
+ {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
+ {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
+ {0x1B000, 0x1B122}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167},
+ {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},
+ {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454},
+ {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2},
+ {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9},
+ {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505},
+ {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C},
+ {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544},
+ {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5},
+ {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},
+ {0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E},
+ {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8},
+ {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1DF00, 0x1DF1E},
+ {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D}, {0x1E14E, 0x1E14E},
+ {0x1E290, 0x1E2AD}, {0x1E2C0, 0x1E2EB}, {0x1E7E0, 0x1E7E6},
+ {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE},
+ {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B},
+ {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22},
+ {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, {0x1EE29, 0x1EE32},
+ {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B},
+ {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49},
+ {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
+ {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59},
+ {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F},
+ {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A},
+ {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C},
+ {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B},
+ {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB},
+ {0x20000, 0x2A6DF}, {0x2A700, 0x2B738}, {0x2B740, 0x2B81D},
+ {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D},
+ {0x30000, 0x3134A}};
+
+// Unicode 14 XID_Continue, excluding XID_Start
+// The Unicode Property XID_Continue is a super set of XID_Start.
+// To save Space, the table below only contains the codepoints
+// that are not also in XID_Start.
+static const llvm::sys::UnicodeCharRange XIDContinueRanges[] = {
+ {0x0030, 0x0039}, {0x005F, 0x005F}, {0x00B7, 0x00B7},
+ {0x0300, 0x036F}, {0x0387, 0x0387}, {0x0483, 0x0487},
+ {0x0591, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
+ {0x05C4, 0x05C5}, {0x05C7, 0x05C7}, {0x0610, 0x061A},
+ {0x064B, 0x0669}, {0x0670, 0x0670}, {0x06D6, 0x06DC},
+ {0x06DF, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
+ {0x06F0, 0x06F9}, {0x0711, 0x0711}, {0x0730, 0x074A},
+ {0x07A6, 0x07B0}, {0x07C0, 0x07C9}, {0x07EB, 0x07F3},
+ {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823},
+ {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B},
+ {0x0898, 0x089F}, {0x08CA, 0x08E1}, {0x08E3, 0x0903},
+ {0x093A, 0x093C}, {0x093E, 0x094F}, {0x0951, 0x0957},
+ {0x0962, 0x0963}, {0x0966, 0x096F}, {0x0981, 0x0983},
+ {0x09BC, 0x09BC}, {0x09BE, 0x09C4}, {0x09C7, 0x09C8},
+ {0x09CB, 0x09CD}, {0x09D7, 0x09D7}, {0x09E2, 0x09E3},
+ {0x09E6, 0x09EF}, {0x09FE, 0x09FE}, {0x0A01, 0x0A03},
+ {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42}, {0x0A47, 0x0A48},
+ {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51}, {0x0A66, 0x0A71},
+ {0x0A75, 0x0A75}, {0x0A81, 0x0A83}, {0x0ABC, 0x0ABC},
+ {0x0ABE, 0x0AC5}, {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD},
+ {0x0AE2, 0x0AE3}, {0x0AE6, 0x0AEF}, {0x0AFA, 0x0AFF},
+ {0x0B01, 0x0B03}, {0x0B3C, 0x0B3C}, {0x0B3E, 0x0B44},
+ {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B55, 0x0B57},
+ {0x0B62, 0x0B63}, {0x0B66, 0x0B6F}, {0x0B82, 0x0B82},
+ {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD},
+ {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BEF}, {0x0C00, 0x0C04},
+ {0x0C3C, 0x0C3C}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48},
+ {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0C62, 0x0C63},
+ {0x0C66, 0x0C6F}, {0x0C81, 0x0C83}, {0x0CBC, 0x0CBC},
+ {0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD},
+ {0x0CD5, 0x0CD6}, {0x0CE2, 0x0CE3}, {0x0CE6, 0x0CEF},
+ {0x0D00, 0x0D03}, {0x0D3B, 0x0D3C}, {0x0D3E, 0x0D44},
+ {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D57, 0x0D57},
+ {0x0D62, 0x0D63}, {0x0D66, 0x0D6F}, {0x0D81, 0x0D83},
+ {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4}, {0x0DD6, 0x0DD6},
+ {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF}, {0x0DF2, 0x0DF3},
+ {0x0E31, 0x0E31}, {0x0E33, 0x0E3A}, {0x0E47, 0x0E4E},
+ {0x0E50, 0x0E59}, {0x0EB1, 0x0EB1}, {0x0EB3, 0x0EBC},
+ {0x0EC8, 0x0ECD}, {0x0ED0, 0x0ED9}, {0x0F18, 0x0F19},
+ {0x0F20, 0x0F29}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
+ {0x0F39, 0x0F39}, {0x0F3E, 0x0F3F}, {0x0F71, 0x0F84},
+ {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, {0x0F99, 0x0FBC},
+ {0x0FC6, 0x0FC6}, {0x102B, 0x103E}, {0x1040, 0x1049},
+ {0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064},
+ {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D},
+ {0x108F, 0x109D}, {0x135D, 0x135F}, {0x1369, 0x1371},
+ {0x1712, 0x1715}, {0x1732, 0x1734}, {0x1752, 0x1753},
+ {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD},
+ {0x17E0, 0x17E9}, {0x180B, 0x180D}, {0x180F, 0x1819},
+ {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B},
+ {0x1946, 0x194F}, {0x19D0, 0x19DA}, {0x1A17, 0x1A1B},
+ {0x1A55, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89},
+ {0x1A90, 0x1A99}, {0x1AB0, 0x1ABD}, {0x1ABF, 0x1ACE},
+ {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B50, 0x1B59},
+ {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD},
+ {0x1BB0, 0x1BB9}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37},
+ {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x1CD0, 0x1CD2},
+ {0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4},
+ {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DFF}, {0x203F, 0x2040},
+ {0x2054, 0x2054}, {0x20D0, 0x20DC}, {0x20E1, 0x20E1},
+ {0x20E5, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
+ {0x2DE0, 0x2DFF}, {0x302A, 0x302F}, {0x3099, 0x309A},
+ {0xA620, 0xA629}, {0xA66F, 0xA66F}, {0xA674, 0xA67D},
+ {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802},
+ {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827},
+ {0xA82C, 0xA82C}, {0xA880, 0xA881}, {0xA8B4, 0xA8C5},
+ {0xA8D0, 0xA8D9}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA909},
+ {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983},
+ {0xA9B3, 0xA9C0}, {0xA9D0, 0xA9D9}, {0xA9E5, 0xA9E5},
+ {0xA9F0, 0xA9F9}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43},
+ {0xAA4C, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA7B, 0xAA7D},
+ {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
+ {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF},
+ {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED},
+ {0xABF0, 0xABF9}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
+ {0xFE20, 0xFE2F}, {0xFE33, 0xFE34}, {0xFE4D, 0xFE4F},
+ {0xFF10, 0xFF19}, {0xFF3F, 0xFF3F}, {0xFF9E, 0xFF9F},
+ {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A},
+ {0x104A0, 0x104A9}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06},
+ {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},
+ {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10D30, 0x10D39},
+ {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x10F82, 0x10F85},
+ {0x11000, 0x11002}, {0x11038, 0x11046}, {0x11066, 0x11070},
+ {0x11073, 0x11074}, {0x1107F, 0x11082}, {0x110B0, 0x110BA},
+ {0x110C2, 0x110C2}, {0x110F0, 0x110F9}, {0x11100, 0x11102},
+ {0x11127, 0x11134}, {0x11136, 0x1113F}, {0x11145, 0x11146},
+ {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0},
+ {0x111C9, 0x111CC}, {0x111CE, 0x111D9}, {0x1122C, 0x11237},
+ {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x112F0, 0x112F9},
+ {0x11300, 0x11303}, {0x1133B, 0x1133C}, {0x1133E, 0x11344},
+ {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357},
+ {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
+ {0x11435, 0x11446}, {0x11450, 0x11459}, {0x1145E, 0x1145E},
+ {0x114B0, 0x114C3}, {0x114D0, 0x114D9}, {0x115AF, 0x115B5},
+ {0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640},
+ {0x11650, 0x11659}, {0x116AB, 0x116B7}, {0x116C0, 0x116C9},
+ {0x1171D, 0x1172B}, {0x11730, 0x11739}, {0x1182C, 0x1183A},
+ {0x118E0, 0x118E9}, {0x11930, 0x11935}, {0x11937, 0x11938},
+ {0x1193B, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11943},
+ {0x11950, 0x11959}, {0x119D1, 0x119D7}, {0x119DA, 0x119E0},
+ {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A39},
+ {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47}, {0x11A51, 0x11A5B},
+ {0x11A8A, 0x11A99}, {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F},
+ {0x11C50, 0x11C59}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6},
+ {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},
+ {0x11D3F, 0x11D45}, {0x11D47, 0x11D47}, {0x11D50, 0x11D59},
+ {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97},
+ {0x11DA0, 0x11DA9}, {0x11EF3, 0x11EF6}, {0x16A60, 0x16A69},
+ {0x16AC0, 0x16AC9}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36},
+ {0x16B50, 0x16B59}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87},
+ {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1},
+ {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46},
+ {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182},
+ {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},
+ {0x1D7CE, 0x1D7FF}, {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C},
+ {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F},
+ {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018},
+ {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
+ {0x1E130, 0x1E136}, {0x1E140, 0x1E149}, {0x1E2AE, 0x1E2AE},
+ {0x1E2EC, 0x1E2F9}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
+ {0x1E950, 0x1E959}, {0x1FBF0, 0x1FBF9}, {0xE0100, 0xE01EF}};
+
// C11 D.1, C++11 [charname.allowed]
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = {
// 1
@@ -40,127 +389,6 @@ static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[] = {
{ 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD }
};
-// C++03 [extendid]
-// Note that this is not the same as C++98, but we don't distinguish C++98
-// and C++03 in Clang.
-static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[] = {
- // Latin
- { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x01F5 },
- { 0x01FA, 0x0217 }, { 0x0250, 0x02A8 },
-
- // Greek
- { 0x0384, 0x0384 }, { 0x0388, 0x038A }, { 0x038C, 0x038C },
- { 0x038E, 0x03A1 }, { 0x03A3, 0x03CE }, { 0x03D0, 0x03D6 },
- { 0x03DA, 0x03DA }, { 0x03DC, 0x03DC }, { 0x03DE, 0x03DE },
- { 0x03E0, 0x03E0 }, { 0x03E2, 0x03F3 },
-
- // Cyrillic
- { 0x0401, 0x040D }, { 0x040F, 0x044F }, { 0x0451, 0x045C },
- { 0x045E, 0x0481 }, { 0x0490, 0x04C4 }, { 0x04C7, 0x04C8 },
- { 0x04CB, 0x04CC }, { 0x04D0, 0x04EB }, { 0x04EE, 0x04F5 },
- { 0x04F8, 0x04F9 },
-
- // Armenian
- { 0x0531, 0x0556 }, { 0x0561, 0x0587 },
-
- // Hebrew
- { 0x05D0, 0x05EA }, { 0x05F0, 0x05F4 },
-
- // Arabic
- { 0x0621, 0x063A }, { 0x0640, 0x0652 }, { 0x0670, 0x06B7 },
- { 0x06BA, 0x06BE }, { 0x06C0, 0x06CE }, { 0x06E5, 0x06E7 },
-
- // Devanagari
- { 0x0905, 0x0939 }, { 0x0958, 0x0962 },
-
- // Bengali
- { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 },
- { 0x09AA, 0x09B0 }, { 0x09B2, 0x09B2 }, { 0x09B6, 0x09B9 },
- { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 },
-
- // Gurmukhi
- { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 },
- { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 },
- { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A5E, 0x0A5E },
-
- // Gujarti
- { 0x0A85, 0x0A8B }, { 0x0A8D, 0x0A8D }, { 0x0A8F, 0x0A91 },
- { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 },
- { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE0 },
-
- // Oriya
- { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 },
- { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B36, 0x0B39 },
- { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 },
-
- // Tamil
- { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 },
- { 0x0B99, 0x0B9A }, { 0x0B9C, 0x0B9C }, { 0x0B9E, 0x0B9F },
- { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB5 },
- { 0x0BB7, 0x0BB9 },
-
- // Telugu
- { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 },
- { 0x0C2A, 0x0C33 }, { 0x0C35, 0x0C39 }, { 0x0C60, 0x0C61 },
-
- // Kannada
- { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 },
- { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CE0, 0x0CE1 },
-
- // Malayam
- { 0x0D05, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D28 },
- { 0x0D2A, 0x0D39 }, { 0x0D60, 0x0D61 },
-
- // Thai
- { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 },
- { 0x0E4F, 0x0E5B },
-
- // Lao
- { 0x0E81, 0x0E82 }, { 0x0E84, 0x0E84 }, { 0x0E87, 0x0E87 },
- { 0x0E88, 0x0E88 }, { 0x0E8A, 0x0E8A }, { 0x0E8D, 0x0E8D },
- { 0x0E94, 0x0E97 }, { 0x0E99, 0x0E9F }, { 0x0EA1, 0x0EA3 },
- { 0x0EA5, 0x0EA5 }, { 0x0EA7, 0x0EA7 }, { 0x0EAA, 0x0EAA },
- { 0x0EAB, 0x0EAB }, { 0x0EAD, 0x0EB0 }, { 0x0EB2, 0x0EB2 },
- { 0x0EB3, 0x0EB3 }, { 0x0EBD, 0x0EBD }, { 0x0EC0, 0x0EC4 },
- { 0x0EC6, 0x0EC6 },
-
- // Georgian
- { 0x10A0, 0x10C5 }, { 0x10D0, 0x10F6 },
-
- // Hangul
- { 0x1100, 0x1159 }, { 0x1161, 0x11A2 }, { 0x11A8, 0x11F9 },
-
- // Latin (2)
- { 0x1E00, 0x1E9A }, { 0x1EA0, 0x1EF9 },
-
- // Greek (2)
- { 0x1F00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 },
- { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F59, 0x1F59 },
- { 0x1F5B, 0x1F5B }, { 0x1F5D, 0x1F5D }, { 0x1F5F, 0x1F7D },
- { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 },
- { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB },
- { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC },
-
- // Hiragana
- { 0x3041, 0x3094 }, { 0x309B, 0x309E },
-
- // Katakana
- { 0x30A1, 0x30FE },
-
- // Bopmofo [sic]
- { 0x3105, 0x312C },
-
- // CJK Unified Ideographs
- { 0x4E00, 0x9FA5 }, { 0xF900, 0xFA2D }, { 0xFB1F, 0xFB36 },
- { 0xFB38, 0xFB3C }, { 0xFB3E, 0xFB3E }, { 0xFB40, 0xFB41 },
- { 0xFB42, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3F },
- { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB },
- { 0xFE70, 0xFE72 }, { 0xFE74, 0xFE74 }, { 0xFE76, 0xFEFC },
- { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE },
- { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 },
- { 0xFFDA, 0xFFDC }
-};
-
// C99 Annex D
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[] = {
// Latin (1)
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index f4f5f461e3b6..a0871062395e 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -195,6 +195,11 @@ void Parser::ParseGNUAttributes(ParsedAttributesWithRange &Attrs,
// Expect an identifier or declaration specifier (const, int, etc.)
if (Tok.isAnnotation())
break;
+ if (Tok.is(tok::code_completion)) {
+ cutOffParsing();
+ Actions.CodeCompleteAttribute(AttributeCommonInfo::Syntax::AS_GNU);
+ break;
+ }
IdentifierInfo *AttrName = Tok.getIdentifierInfo();
if (!AttrName)
break;
@@ -714,6 +719,12 @@ void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs,
if (TryConsumeToken(tok::comma))
continue;
+ if (Tok.is(tok::code_completion)) {
+ cutOffParsing();
+ Actions.CodeCompleteAttribute(AttributeCommonInfo::AS_Declspec);
+ return;
+ }
+
// We expect either a well-known identifier or a generic string. Anything
// else is a malformed declspec.
bool IsString = Tok.getKind() == tok::string_literal;
@@ -2010,6 +2021,18 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS,
Actions.CodeCompleteAfterFunctionEquals(D);
return nullptr;
}
+ // We're at the point where the parsing of function declarator is finished.
+ //
+ // A common error is that users accidently add a virtual specifier
+ // (e.g. override) in an out-line method definition.
+ // We attempt to recover by stripping all these specifiers coming after
+ // the declarator.
+ while (auto Specifier = isCXX11VirtSpecifier()) {
+ Diag(Tok, diag::err_virt_specifier_outside_class)
+ << VirtSpecifiers::getSpecifierName(Specifier)
+ << FixItHint::CreateRemoval(Tok.getLocation());
+ ConsumeToken();
+ }
// Look at the next token to make sure that this isn't a function
// declaration. We have to check this because __attribute__ might be the
// start of a function definition in GCC-extended K&R C.
@@ -3091,6 +3114,12 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
return true;
};
+ // Turn off usual access checking for template specializations and
+ // instantiations.
+ bool IsTemplateSpecOrInst =
+ (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation ||
+ TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization);
+
switch (Tok.getKind()) {
default:
DoneWithDeclSpec:
@@ -3261,6 +3290,12 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
isConstructorDeclarator(/*Unqualified*/ false))
goto DoneWithDeclSpec;
+ // C++20 [temp.spec] 13.9/6.
+ // This disables the access checking rules for function template explicit
+ // instantiation and explicit specialization:
+ // - `return type`.
+ SuppressAccessChecks SAC(*this, IsTemplateSpecOrInst);
+
ParsedType TypeRep =
Actions.getTypeName(*Next.getIdentifierInfo(), Next.getLocation(),
getCurScope(), &SS, false, false, nullptr,
@@ -3268,6 +3303,9 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
/*WantNontrivialTypeSourceInfo=*/true,
isClassTemplateDeductionContext(DSContext));
+ if (IsTemplateSpecOrInst)
+ SAC.done();
+
// If the referenced identifier is not a type, then this declspec is
// erroneous: We already checked about that it has no type specifier, and
// C++ doesn't have implicit int. Diagnose it as a typo w.r.t. to the
@@ -3377,10 +3415,24 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
// In C++, check to see if this is a scope specifier like foo::bar::, if
// so handle it as such. This is important for ctor parsing.
if (getLangOpts().CPlusPlus) {
- if (TryAnnotateCXXScopeToken(EnteringContext)) {
+ // C++20 [temp.spec] 13.9/6.
+ // This disables the access checking rules for function template
+ // explicit instantiation and explicit specialization:
+ // - `return type`.
+ SuppressAccessChecks SAC(*this, IsTemplateSpecOrInst);
+
+ const bool Success = TryAnnotateCXXScopeToken(EnteringContext);
+
+ if (IsTemplateSpecOrInst)
+ SAC.done();
+
+ if (Success) {
+ if (IsTemplateSpecOrInst)
+ SAC.redelay();
DS.SetTypeSpecError();
goto DoneWithDeclSpec;
}
+
if (!Tok.is(tok::identifier))
continue;
}
@@ -3889,6 +3941,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
isInvalid = DS.SetTypeSpecType(DeclSpec::TST_float128, Loc, PrevSpec,
DiagID, Policy);
break;
+ case tok::kw___ibm128:
+ isInvalid = DS.SetTypeSpecType(DeclSpec::TST_ibm128, Loc, PrevSpec,
+ DiagID, Policy);
+ break;
case tok::kw_wchar_t:
isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec,
DiagID, Policy);
@@ -3945,15 +4001,19 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy);
break;
case tok::kw_pipe:
- if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200 &&
- !getLangOpts().OpenCLCPlusPlus)) {
+ if (!getLangOpts().OpenCL ||
+ getLangOpts().getOpenCLCompatibleVersion() < 200) {
// OpenCL 2.0 and later define this keyword. OpenCL 1.2 and earlier
// should support the "pipe" word as identifier.
Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
Tok.setKind(tok::identifier);
goto DoneWithDeclSpec;
- }
- isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy);
+ } else if (!getLangOpts().OpenCLPipes) {
+ DiagID = diag::err_opencl_unknown_type_specifier;
+ PrevSpec = Tok.getIdentifierInfo()->getNameStart();
+ isInvalid = true;
+ } else
+ isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy);
break;
// We only need to enumerate each image type once.
#define IMAGE_READ_WRITE_TYPE(Type, Id, Ext)
@@ -4138,9 +4198,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
<< FixItHint::CreateRemoval(
SourceRange(Loc, DS.getEndLoc()));
else if (DiagID == diag::err_opencl_unknown_type_specifier) {
- Diag(Loc, DiagID) << getLangOpts().OpenCLCPlusPlus
- << getLangOpts().getOpenCLVersionTuple().getAsString()
- << PrevSpec << isStorageClass;
+ Diag(Loc, DiagID) << getLangOpts().getOpenCLVersionString() << PrevSpec
+ << isStorageClass;
} else
Diag(Loc, DiagID) << PrevSpec;
}
@@ -4964,6 +5023,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
case tok::kw__Fract:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_bool:
case tok::kw__Bool:
case tok::kw__Decimal32:
@@ -5045,6 +5105,7 @@ bool Parser::isTypeSpecifierQualifier() {
case tok::kw__Fract:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_bool:
case tok::kw__Bool:
case tok::kw__Decimal32:
@@ -5126,8 +5187,10 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
switch (Tok.getKind()) {
default: return false;
+ // OpenCL 2.0 and later define this keyword.
case tok::kw_pipe:
- return getLangOpts().OpenCLPipe;
+ return getLangOpts().OpenCL &&
+ getLangOpts().getOpenCLCompatibleVersion() >= 200;
case tok::identifier: // foo::bar
// Unfortunate hack to support "Class.factoryMethod" notation.
@@ -5213,6 +5276,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
case tok::kw__Fract:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_bool:
case tok::kw__Bool:
case tok::kw__Decimal32:
@@ -5656,7 +5720,9 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang,
if (Kind == tok::star || Kind == tok::caret)
return true;
- if (Kind == tok::kw_pipe && Lang.OpenCLPipe)
+ // OpenCL 2.0 and later define this keyword.
+ if (Kind == tok::kw_pipe && Lang.OpenCL &&
+ Lang.getOpenCLCompatibleVersion() >= 200)
return true;
if (!Lang.CPlusPlus)
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index ca5c013a51fe..f5a6ffcff9e9 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -678,7 +678,10 @@ Parser::ParseUsingDeclaration(
SourceLocation UsingLoc, SourceLocation &DeclEnd,
ParsedAttributesWithRange &PrefixAttrs, AccessSpecifier AS) {
SourceLocation UELoc;
- if (TryConsumeToken(tok::kw_enum, UELoc)) {
+ bool InInitStatement = Context == DeclaratorContext::SelectionInit ||
+ Context == DeclaratorContext::ForInit;
+
+ if (TryConsumeToken(tok::kw_enum, UELoc) && !InInitStatement) {
// C++20 using-enum
Diag(UELoc, getLangOpts().CPlusPlus20
? diag::warn_cxx17_compat_using_enum_declaration
@@ -714,6 +717,9 @@ Parser::ParseUsingDeclaration(
ParsedAttributesWithRange MisplacedAttrs(AttrFactory);
MaybeParseCXX11Attributes(MisplacedAttrs);
+ if (InInitStatement && Tok.isNot(tok::identifier))
+ return nullptr;
+
UsingDeclarator D;
bool InvalidDeclarator = ParseUsingDeclarator(Context, D);
@@ -732,7 +738,7 @@ Parser::ParseUsingDeclaration(
}
// Maybe this is an alias-declaration.
- if (Tok.is(tok::equal)) {
+ if (Tok.is(tok::equal) || InInitStatement) {
if (InvalidDeclarator) {
SkipUntil(tok::semi);
return nullptr;
@@ -1474,19 +1480,15 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
return;
}
- // C++03 [temp.explicit] 14.7.2/8:
- // The usual access checking rules do not apply to names used to specify
- // explicit instantiations.
- //
- // As an extension we do not perform access checking on the names used to
- // specify explicit specializations either. This is important to allow
- // specializing traits classes for private types.
- //
- // Note that we don't suppress if this turns out to be an elaborated
- // type specifier.
- bool shouldDelayDiagsInTag =
- (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation ||
- TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization);
+ // C++20 [temp.class.spec] 13.7.5/10
+ // The usual access checking rules do not apply to non-dependent names
+ // used to specify template arguments of the simple-template-id of the
+ // partial specialization.
+ // C++20 [temp.spec] 13.9/6:
+ // The usual access checking rules do not apply to names in a declaration
+ // of an explicit instantiation or explicit specialization...
+ const bool shouldDelayDiagsInTag =
+ (TemplateInfo.Kind != ParsedTemplateInfo::NonTemplate);
SuppressAccessChecks diagsFromTag(*this, shouldDelayDiagsInTag);
ParsedAttributesWithRange attrs(AttrFactory);
@@ -1834,14 +1836,6 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
}
}
- // If this is an elaborated type specifier, and we delayed
- // diagnostics before, just merge them into the current pool.
- if (shouldDelayDiagsInTag) {
- diagsFromTag.done();
- if (TUK == Sema::TUK_Reference)
- diagsFromTag.redelay();
- }
-
if (!Name && !TemplateId && (DS.getTypeSpecType() == DeclSpec::TST_error ||
TUK != Sema::TUK_Definition)) {
if (DS.getTypeSpecType() != DeclSpec::TST_error) {
@@ -2018,6 +2012,16 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
}
}
+ // If this is an elaborated type specifier in function template,
+ // and we delayed diagnostics before,
+ // just merge them into the current pool.
+ if (shouldDelayDiagsInTag) {
+ diagsFromTag.done();
+ if (TUK == Sema::TUK_Reference &&
+ TemplateInfo.Kind == ParsedTemplateInfo::Template)
+ diagsFromTag.redelay();
+ }
+
// If there is a body, parse it and inform the actions module.
if (TUK == Sema::TUK_Definition) {
assert(Tok.is(tok::l_brace) ||
@@ -2713,9 +2717,22 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
if (MalformedTypeSpec)
DS.SetTypeSpecError();
+ // Turn off usual access checking for templates explicit specialization
+ // and instantiation.
+ // C++20 [temp.spec] 13.9/6.
+ // This disables the access checking rules for member function template
+ // explicit instantiation and explicit specialization.
+ bool IsTemplateSpecOrInst =
+ (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation ||
+ TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization);
+ SuppressAccessChecks diagsFromTag(*this, IsTemplateSpecOrInst);
+
ParseDeclarationSpecifiers(DS, TemplateInfo, AS, DeclSpecContext::DSC_class,
&CommonLateParsedAttrs);
+ if (IsTemplateSpecOrInst)
+ diagsFromTag.done();
+
// Turn off colon protection that was set for declspec.
X.restore();
@@ -2784,6 +2801,11 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
ExprResult TrailingRequiresClause;
bool ExpectSemi = true;
+ // C++20 [temp.spec] 13.9/6.
+ // This disables the access checking rules for member function template
+ // explicit instantiation and explicit specialization.
+ SuppressAccessChecks SAC(*this, IsTemplateSpecOrInst);
+
// Parse the first declarator.
if (ParseCXXMemberDeclaratorBeforeInitializer(
DeclaratorInfo, VS, BitfieldSize, LateParsedAttrs)) {
@@ -2791,6 +2813,9 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
return nullptr;
}
+ if (IsTemplateSpecOrInst)
+ SAC.done();
+
// Check for a member function definition.
if (BitfieldSize.isUnset()) {
// MSVC permits pure specifier on inline functions defined at class scope.
@@ -2989,9 +3014,11 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
ExprResult Init = ParseCXXMemberInitializer(
ThisDecl, DeclaratorInfo.isDeclarationOfFunction(), EqualLoc);
- if (Init.isInvalid())
+ if (Init.isInvalid()) {
+ if (ThisDecl)
+ Actions.ActOnUninitializedDecl(ThisDecl);
SkipUntil(tok::comma, StopAtSemi | StopBeforeMatch);
- else if (ThisDecl)
+ } else if (ThisDecl)
Actions.AddInitializerToDecl(ThisDecl, Init.get(), EqualLoc.isInvalid());
} else if (ThisDecl && DS.getStorageClassSpec() == DeclSpec::SCS_static)
// No initializer.
@@ -3835,7 +3862,7 @@ Parser::tryParseExceptionSpecification(bool Delayed,
NoexceptExpr = ParseConstantExpression();
T.consumeClose();
if (!NoexceptExpr.isInvalid()) {
- NoexceptExpr = Actions.ActOnNoexceptSpec(KeywordLoc, NoexceptExpr.get(),
+ NoexceptExpr = Actions.ActOnNoexceptSpec(NoexceptExpr.get(),
NoexceptType);
NoexceptRange = SourceRange(KeywordLoc, T.getCloseLocation());
} else {
@@ -4084,7 +4111,10 @@ void Parser::PopParsingClass(Sema::ParsingClassState state) {
/// If a keyword or an alternative token that satisfies the syntactic
/// requirements of an identifier is contained in an attribute-token,
/// it is considered an identifier.
-IdentifierInfo *Parser::TryParseCXX11AttributeIdentifier(SourceLocation &Loc) {
+IdentifierInfo *
+Parser::TryParseCXX11AttributeIdentifier(SourceLocation &Loc,
+ Sema::AttributeCompletion Completion,
+ const IdentifierInfo *Scope) {
switch (Tok.getKind()) {
default:
// Identifiers and keywords have identifier info attached.
@@ -4096,6 +4126,13 @@ IdentifierInfo *Parser::TryParseCXX11AttributeIdentifier(SourceLocation &Loc) {
}
return nullptr;
+ case tok::code_completion:
+ cutOffParsing();
+ Actions.CodeCompleteAttribute(getLangOpts().CPlusPlus ? ParsedAttr::AS_CXX11
+ : ParsedAttr::AS_C2x,
+ Completion, Scope);
+ return nullptr;
+
case tok::numeric_constant: {
// If we got a numeric constant, check to see if it comes from a macro that
// corresponds to the predefined __clang__ macro. If it does, warn the user
@@ -4371,7 +4408,8 @@ void Parser::ParseCXX11AttributeSpecifierInternal(ParsedAttributes &Attrs,
: diag::ext_using_attribute_ns);
ConsumeToken();
- CommonScopeName = TryParseCXX11AttributeIdentifier(CommonScopeLoc);
+ CommonScopeName = TryParseCXX11AttributeIdentifier(
+ CommonScopeLoc, Sema::AttributeCompletion::Scope);
if (!CommonScopeName) {
Diag(Tok.getLocation(), diag::err_expected) << tok::identifier;
SkipUntil(tok::r_square, tok::colon, StopBeforeMatch);
@@ -4383,7 +4421,7 @@ void Parser::ParseCXX11AttributeSpecifierInternal(ParsedAttributes &Attrs,
llvm::SmallDenseMap<IdentifierInfo*, SourceLocation, 4> SeenAttrs;
bool AttrParsed = false;
- while (!Tok.isOneOf(tok::r_square, tok::semi)) {
+ while (!Tok.isOneOf(tok::r_square, tok::semi, tok::eof)) {
if (AttrParsed) {
// If we parsed an attribute, a comma is required before parsing any
// additional attributes.
@@ -4401,7 +4439,8 @@ void Parser::ParseCXX11AttributeSpecifierInternal(ParsedAttributes &Attrs,
SourceLocation ScopeLoc, AttrLoc;
IdentifierInfo *ScopeName = nullptr, *AttrName = nullptr;
- AttrName = TryParseCXX11AttributeIdentifier(AttrLoc);
+ AttrName = TryParseCXX11AttributeIdentifier(
+ AttrLoc, Sema::AttributeCompletion::Attribute, CommonScopeName);
if (!AttrName)
// Break out to the "expected ']'" diagnostic.
break;
@@ -4411,7 +4450,8 @@ void Parser::ParseCXX11AttributeSpecifierInternal(ParsedAttributes &Attrs,
ScopeName = AttrName;
ScopeLoc = AttrLoc;
- AttrName = TryParseCXX11AttributeIdentifier(AttrLoc);
+ AttrName = TryParseCXX11AttributeIdentifier(
+ AttrLoc, Sema::AttributeCompletion::Attribute, ScopeName);
if (!AttrName) {
Diag(Tok.getLocation(), diag::err_expected) << tok::identifier;
SkipUntil(tok::r_square, tok::comma, StopAtSemi | StopBeforeMatch);
@@ -4626,7 +4666,15 @@ void Parser::ParseMicrosoftAttributes(ParsedAttributes &attrs,
// Skip most ms attributes except for a specific list.
while (true) {
- SkipUntil(tok::r_square, tok::identifier, StopAtSemi | StopBeforeMatch);
+ SkipUntil(tok::r_square, tok::identifier,
+ StopAtSemi | StopBeforeMatch | StopAtCodeCompletion);
+ if (Tok.is(tok::code_completion)) {
+ cutOffParsing();
+ Actions.CodeCompleteAttribute(AttributeCommonInfo::AS_Microsoft,
+ Sema::AttributeCompletion::Attribute,
+ /*Scope=*/nullptr);
+ break;
+ }
if (Tok.isNot(tok::identifier)) // ']', but also eof
break;
if (Tok.getIdentifierInfo()->getName() == "uuid")
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 22f3b7624c45..2c8b4f9f441f 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1521,6 +1521,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
case tok::kw___bf16:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_void:
case tok::kw_typename:
case tok::kw_typeof:
@@ -2331,7 +2332,7 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
/// a parameter.
ExprResult Parser::ParseSYCLUniqueStableNameExpression() {
assert(Tok.is(tok::kw___builtin_sycl_unique_stable_name) &&
- "Not __bulitin_sycl_unique_stable_name");
+ "Not __builtin_sycl_unique_stable_name");
SourceLocation OpLoc = ConsumeToken();
BalancedDelimiterTracker T(*this, tok::l_paren);
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index f3d10b4a0889..4e5c0ac6c1c1 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1068,8 +1068,8 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
// Ensure that any ellipsis was in the right place.
SourceLocation EllipsisLoc;
- if (std::any_of(std::begin(EllipsisLocs), std::end(EllipsisLocs),
- [](SourceLocation Loc) { return Loc.isValid(); })) {
+ if (llvm::any_of(EllipsisLocs,
+ [](SourceLocation Loc) { return Loc.isValid(); })) {
// The '...' should appear before the identifier in an init-capture, and
// after the identifier otherwise.
bool InitCapture = InitKind != LambdaCaptureInitKind::NoInit;
@@ -1910,6 +1910,28 @@ Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) {
}
}
+Parser::DeclGroupPtrTy
+Parser::ParseAliasDeclarationInInitStatement(DeclaratorContext Context,
+ ParsedAttributesWithRange &Attrs) {
+ assert(Tok.is(tok::kw_using) && "Expected using");
+ assert((Context == DeclaratorContext::ForInit ||
+ Context == DeclaratorContext::SelectionInit) &&
+ "Unexpected Declarator Context");
+ DeclGroupPtrTy DG;
+ SourceLocation DeclStart = ConsumeToken(), DeclEnd;
+
+ DG = ParseUsingDeclaration(Context, {}, DeclStart, DeclEnd, Attrs, AS_none);
+ if (!DG)
+ return DG;
+
+ Diag(DeclStart, !getLangOpts().CPlusPlus2b
+ ? diag::ext_alias_in_init_statement
+ : diag::warn_cxx20_alias_in_init_statement)
+ << SourceRange(DeclStart, DeclEnd);
+
+ return DG;
+}
+
/// ParseCXXCondition - if/switch/while condition expression.
///
/// condition:
@@ -2017,9 +2039,14 @@ Sema::ConditionResult Parser::ParseCXXCondition(StmtResult *InitStmt,
case ConditionOrInitStatement::InitStmtDecl: {
WarnOnInit();
+ DeclGroupPtrTy DG;
SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
- DeclGroupPtrTy DG = ParseSimpleDeclaration(
- DeclaratorContext::SelectionInit, DeclEnd, attrs, /*RequireSemi=*/true);
+ if (Tok.is(tok::kw_using))
+ DG = ParseAliasDeclarationInInitStatement(
+ DeclaratorContext::SelectionInit, attrs);
+ else
+ DG = ParseSimpleDeclaration(DeclaratorContext::SelectionInit, DeclEnd,
+ attrs, /*RequireSemi=*/true);
*InitStmt = Actions.ActOnDeclStmt(DG, DeclStart, DeclEnd);
return ParseCXXCondition(nullptr, Loc, CK);
}
@@ -2226,6 +2253,9 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
case tok::kw___float128:
DS.SetTypeSpecType(DeclSpec::TST_float128, Loc, PrevSpec, DiagID, Policy);
break;
+ case tok::kw___ibm128:
+ DS.SetTypeSpecType(DeclSpec::TST_ibm128, Loc, PrevSpec, DiagID, Policy);
+ break;
case tok::kw_wchar_t:
DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec, DiagID, Policy);
break;
@@ -3602,7 +3632,7 @@ ExprResult Parser::ParseRequiresExpression() {
break;
}
if (!Expression.isInvalid() && PossibleRequiresExprInSimpleRequirement)
- Diag(StartLoc, diag::warn_requires_expr_in_simple_requirement)
+ Diag(StartLoc, diag::err_requires_expr_in_simple_requirement)
<< FixItHint::CreateInsertion(StartLoc, "requires");
if (auto *Req = Actions.ActOnSimpleRequirement(Expression.get()))
Requirements.push_back(Req);
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 18e43c3734ac..613ad742c93f 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -828,7 +828,7 @@ static StringRef stringLiteralParser(Parser &P) {
static StringRef getNameFromIdOrString(Parser &P, Token &Tok,
OMPContextLvl Lvl) {
- if (Tok.is(tok::identifier)) {
+ if (Tok.is(tok::identifier) || Tok.is(tok::kw_for)) {
llvm::SmallString<16> Buffer;
StringRef Name = P.getPreprocessor().getSpelling(Tok, Buffer);
(void)P.ConsumeToken();
@@ -1402,26 +1402,178 @@ void Parser::ParseOMPDeclareVariantClauses(Parser::DeclGroupPtrTy Ptr,
OMPTraitInfo *ParentTI = Actions.getOMPTraitInfoForSurroundingScope();
ASTContext &ASTCtx = Actions.getASTContext();
OMPTraitInfo &TI = ASTCtx.getNewOMPTraitInfo();
- if (parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI))
- return;
+ SmallVector<Expr *, 6> AdjustNothing;
+ SmallVector<Expr *, 6> AdjustNeedDevicePtr;
+ SmallVector<OMPDeclareVariantAttr::InteropType, 3> AppendArgs;
+ SourceLocation AdjustArgsLoc, AppendArgsLoc;
+
+ // At least one clause is required.
+ if (Tok.is(tok::annot_pragma_openmp_end)) {
+ Diag(Tok.getLocation(), diag::err_omp_declare_variant_wrong_clause)
+ << (getLangOpts().OpenMP < 51 ? 0 : 1);
+ }
+
+ bool IsError = false;
+ while (Tok.isNot(tok::annot_pragma_openmp_end)) {
+ OpenMPClauseKind CKind = Tok.isAnnotation()
+ ? OMPC_unknown
+ : getOpenMPClauseKind(PP.getSpelling(Tok));
+ if (!isAllowedClauseForDirective(OMPD_declare_variant, CKind,
+ getLangOpts().OpenMP)) {
+ Diag(Tok.getLocation(), diag::err_omp_declare_variant_wrong_clause)
+ << (getLangOpts().OpenMP < 51 ? 0 : 1);
+ IsError = true;
+ }
+ if (!IsError) {
+ switch (CKind) {
+ case OMPC_match:
+ IsError = parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI);
+ break;
+ case OMPC_adjust_args: {
+ AdjustArgsLoc = Tok.getLocation();
+ ConsumeToken();
+ Parser::OpenMPVarListDataTy Data;
+ SmallVector<Expr *> Vars;
+ IsError = ParseOpenMPVarList(OMPD_declare_variant, OMPC_adjust_args,
+ Vars, Data);
+ if (!IsError)
+ llvm::append_range(Data.ExtraModifier == OMPC_ADJUST_ARGS_nothing
+ ? AdjustNothing
+ : AdjustNeedDevicePtr,
+ Vars);
+ break;
+ }
+ case OMPC_append_args:
+ if (!AppendArgs.empty()) {
+ Diag(AppendArgsLoc, diag::err_omp_more_one_clause)
+ << getOpenMPDirectiveName(OMPD_declare_variant)
+ << getOpenMPClauseName(CKind) << 0;
+ IsError = true;
+ }
+ if (!IsError) {
+ AppendArgsLoc = Tok.getLocation();
+ ConsumeToken();
+ IsError = parseOpenMPAppendArgs(AppendArgs);
+ }
+ break;
+ default:
+ llvm_unreachable("Unexpected clause for declare variant.");
+ }
+ }
+ if (IsError) {
+ while (!SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch))
+ ;
+ // Skip the last annot_pragma_openmp_end.
+ (void)ConsumeAnnotationToken();
+ return;
+ }
+ // Skip ',' if any.
+ if (Tok.is(tok::comma))
+ ConsumeToken();
+ }
Optional<std::pair<FunctionDecl *, Expr *>> DeclVarData =
Actions.checkOpenMPDeclareVariantFunction(
- Ptr, AssociatedFunction.get(), TI,
+ Ptr, AssociatedFunction.get(), TI, AppendArgs.size(),
SourceRange(Loc, Tok.getLocation()));
- // Skip last tokens.
- while (Tok.isNot(tok::annot_pragma_openmp_end))
- ConsumeAnyToken();
if (DeclVarData && !TI.Sets.empty())
Actions.ActOnOpenMPDeclareVariantDirective(
- DeclVarData->first, DeclVarData->second, TI,
+ DeclVarData->first, DeclVarData->second, TI, AdjustNothing,
+ AdjustNeedDevicePtr, AppendArgs, AdjustArgsLoc, AppendArgsLoc,
SourceRange(Loc, Tok.getLocation()));
// Skip the last annot_pragma_openmp_end.
(void)ConsumeAnnotationToken();
}
+/// Parse a list of interop-types. These are 'target' and 'targetsync'. Both
+/// are allowed but duplication of either is not meaningful.
+static Optional<OMPDeclareVariantAttr::InteropType>
+parseInteropTypeList(Parser &P) {
+ const Token &Tok = P.getCurToken();
+ bool HasError = false;
+ bool IsTarget = false;
+ bool IsTargetSync = false;
+
+ while (Tok.is(tok::identifier)) {
+ if (Tok.getIdentifierInfo()->isStr("target")) {
+ // OpenMP 5.1 [2.15.1, interop Construct, Restrictions]
+ // Each interop-type may be specified on an action-clause at most
+ // once.
+ if (IsTarget)
+ P.Diag(Tok, diag::warn_omp_more_one_interop_type) << "target";
+ IsTarget = true;
+ } else if (Tok.getIdentifierInfo()->isStr("targetsync")) {
+ if (IsTargetSync)
+ P.Diag(Tok, diag::warn_omp_more_one_interop_type) << "targetsync";
+ IsTargetSync = true;
+ } else {
+ HasError = true;
+ P.Diag(Tok, diag::err_omp_expected_interop_type);
+ }
+ P.ConsumeToken();
+
+ if (!Tok.is(tok::comma))
+ break;
+ P.ConsumeToken();
+ }
+ if (HasError)
+ return None;
+
+ if (!IsTarget && !IsTargetSync) {
+ P.Diag(Tok, diag::err_omp_expected_interop_type);
+ return None;
+ }
+
+ // As of OpenMP 5.1,there are two interop-types, "target" and
+ // "targetsync". Either or both are allowed for a single interop.
+ if (IsTarget && IsTargetSync)
+ return OMPDeclareVariantAttr::Target_TargetSync;
+ if (IsTarget)
+ return OMPDeclareVariantAttr::Target;
+ return OMPDeclareVariantAttr::TargetSync;
+}
+
+bool Parser::parseOpenMPAppendArgs(
+ SmallVectorImpl<OMPDeclareVariantAttr::InteropType> &InterOpTypes) {
+ bool HasError = false;
+ // Parse '('.
+ BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
+ if (T.expectAndConsume(diag::err_expected_lparen_after,
+ getOpenMPClauseName(OMPC_append_args).data()))
+ return true;
+
+ // Parse the list of append-ops, each is;
+ // interop(interop-type[,interop-type]...)
+ while (Tok.is(tok::identifier) && Tok.getIdentifierInfo()->isStr("interop")) {
+ ConsumeToken();
+ BalancedDelimiterTracker IT(*this, tok::l_paren,
+ tok::annot_pragma_openmp_end);
+ if (IT.expectAndConsume(diag::err_expected_lparen_after, "interop"))
+ return true;
+
+ // Parse the interop-types.
+ if (Optional<OMPDeclareVariantAttr::InteropType> IType =
+ parseInteropTypeList(*this))
+ InterOpTypes.push_back(IType.getValue());
+ else
+ HasError = true;
+
+ IT.consumeClose();
+ if (Tok.is(tok::comma))
+ ConsumeToken();
+ }
+ if (!HasError && InterOpTypes.empty()) {
+ HasError = true;
+ Diag(Tok.getLocation(), diag::err_omp_unexpected_append_op);
+ SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
+ StopBeforeMatch);
+ }
+ HasError = T.consumeClose() || HasError;
+ return HasError;
+}
+
bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc,
OMPTraitInfo &TI,
OMPTraitInfo *ParentTI) {
@@ -1431,24 +1583,15 @@ bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc,
: getOpenMPClauseKind(PP.getSpelling(Tok));
if (CKind != OMPC_match) {
Diag(Tok.getLocation(), diag::err_omp_declare_variant_wrong_clause)
- << getOpenMPClauseName(OMPC_match);
- while (!SkipUntil(tok::annot_pragma_openmp_end, Parser::StopBeforeMatch))
- ;
- // Skip the last annot_pragma_openmp_end.
- (void)ConsumeAnnotationToken();
+ << (getLangOpts().OpenMP < 51 ? 0 : 1);
return true;
}
(void)ConsumeToken();
// Parse '('.
BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
if (T.expectAndConsume(diag::err_expected_lparen_after,
- getOpenMPClauseName(OMPC_match).data())) {
- while (!SkipUntil(tok::annot_pragma_openmp_end, StopBeforeMatch))
- ;
- // Skip the last annot_pragma_openmp_end.
- (void)ConsumeAnnotationToken();
+ getOpenMPClauseName(OMPC_match).data()))
return true;
- }
// Parse inner context selectors.
parseOMPContextSelectors(Loc, TI);
@@ -1532,7 +1675,7 @@ bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc,
///
void Parser::ParseOpenMPAssumesDirective(OpenMPDirectiveKind DKind,
SourceLocation Loc) {
- SmallVector<StringRef, 4> Assumptions;
+ SmallVector<std::string, 4> Assumptions;
bool SkippedClauses = false;
auto SkipBraces = [&](llvm::StringRef Spelling, bool IssueNote) {
@@ -1599,9 +1742,11 @@ void Parser::ParseOpenMPAssumesDirective(OpenMPDirectiveKind DKind,
}
assert(II && "Expected an identifier clause!");
- StringRef Assumption = II->getName();
+ std::string Assumption = II->getName().str();
if (ACMI.StartsWith)
- Assumption = Assumption.substr(ACMI.Identifier.size());
+ Assumption = "ompx_" + Assumption.substr(ACMI.Identifier.size());
+ else
+ Assumption = "omp_" + Assumption;
Assumptions.push_back(Assumption);
}
@@ -1651,7 +1796,7 @@ parseOpenMPSimpleClause(Parser &P, OpenMPClauseKind Kind) {
unsigned Type = getOpenMPSimpleClauseType(
Kind, Tok.isAnnotation() ? "" : P.getPreprocessor().getSpelling(Tok),
- P.getLangOpts().OpenMP);
+ P.getLangOpts());
SourceLocation TypeLoc = Tok.getLocation();
if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
Tok.isNot(tok::annot_pragma_openmp_end))
@@ -2027,8 +2172,13 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
OMPTraitInfo *ParentTI = Actions.getOMPTraitInfoForSurroundingScope();
ASTContext &ASTCtx = Actions.getASTContext();
OMPTraitInfo &TI = ASTCtx.getNewOMPTraitInfo();
- if (parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI))
+ if (parseOMPDeclareVariantMatchClause(Loc, TI, ParentTI)) {
+ while (!SkipUntil(tok::annot_pragma_openmp_end, Parser::StopBeforeMatch))
+ ;
+ // Skip the last annot_pragma_openmp_end.
+ (void)ConsumeAnnotationToken();
break;
+ }
// Skip last tokens.
skipUntilPragmaOpenMPEnd(OMPD_begin_declare_variant);
@@ -2044,8 +2194,10 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
// improve the diagnostic location.
Diag(Loc, diag::warn_unknown_begin_declare_variant_isa_trait) << ISATrait;
};
- TargetOMPContext OMPCtx(ASTCtx, std::move(DiagUnknownTrait),
- /* CurrentFunctionDecl */ nullptr);
+ TargetOMPContext OMPCtx(
+ ASTCtx, std::move(DiagUnknownTrait),
+ /* CurrentFunctionDecl */ nullptr,
+ /* ConstructTraits */ ArrayRef<llvm::omp::TraitProperty>());
if (isVariantApplicableInContext(VMI, OMPCtx, /* DeviceSetOnly */ true)) {
Actions.ActOnOpenMPBeginDeclareVariant(Loc, TI);
@@ -2222,6 +2374,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
case OMPD_target_teams_distribute_simd:
case OMPD_dispatch:
case OMPD_masked:
+ case OMPD_metadirective:
+ case OMPD_loop:
Diag(Tok, diag::err_omp_unexpected_directive)
<< 1 << getOpenMPDirectiveName(DKind);
break;
@@ -2276,8 +2430,10 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
///
StmtResult
Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
- assert(Tok.isOneOf(tok::annot_pragma_openmp, tok::annot_attr_openmp) &&
- "Not an OpenMP directive!");
+ static bool ReadDirectiveWithinMetadirective = false;
+ if (!ReadDirectiveWithinMetadirective)
+ assert(Tok.isOneOf(tok::annot_pragma_openmp, tok::annot_attr_openmp) &&
+ "Not an OpenMP directive!");
ParsingOpenMPDirectiveRAII DirScope(*this);
ParenBraceBracketBalancer BalancerRAIIObj(*this);
SmallVector<OMPClause *, 5> Clauses;
@@ -2286,8 +2442,15 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
FirstClauses(llvm::omp::Clause_enumSize + 1);
unsigned ScopeFlags = Scope::FnScope | Scope::DeclScope |
Scope::CompoundStmtScope | Scope::OpenMPDirectiveScope;
- SourceLocation Loc = ConsumeAnnotationToken(), EndLoc;
+ SourceLocation Loc = ReadDirectiveWithinMetadirective
+ ? Tok.getLocation()
+ : ConsumeAnnotationToken(),
+ EndLoc;
OpenMPDirectiveKind DKind = parseOpenMPDirectiveKind(*this);
+ if (ReadDirectiveWithinMetadirective && DKind == OMPD_unknown) {
+ Diag(Tok, diag::err_omp_unknown_directive);
+ return StmtError();
+ }
OpenMPDirectiveKind CancelRegion = OMPD_unknown;
// Name of critical directive.
DeclarationNameInfo DirName;
@@ -2295,6 +2458,141 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
bool HasAssociatedStatement = true;
switch (DKind) {
+ case OMPD_metadirective: {
+ ConsumeToken();
+ SmallVector<VariantMatchInfo, 4> VMIs;
+
+ // First iteration of parsing all clauses of metadirective.
+ // This iteration only parses and collects all context selector ignoring the
+ // associated directives.
+ TentativeParsingAction TPA(*this);
+ ASTContext &ASTContext = Actions.getASTContext();
+
+ BalancedDelimiterTracker T(*this, tok::l_paren,
+ tok::annot_pragma_openmp_end);
+ while (Tok.isNot(tok::annot_pragma_openmp_end)) {
+ OpenMPClauseKind CKind = Tok.isAnnotation()
+ ? OMPC_unknown
+ : getOpenMPClauseKind(PP.getSpelling(Tok));
+ SourceLocation Loc = ConsumeToken();
+
+ // Parse '('.
+ if (T.expectAndConsume(diag::err_expected_lparen_after,
+ getOpenMPClauseName(CKind).data()))
+ return Directive;
+
+ OMPTraitInfo &TI = Actions.getASTContext().getNewOMPTraitInfo();
+ if (CKind == OMPC_when) {
+ // parse and get OMPTraitInfo to pass to the When clause
+ parseOMPContextSelectors(Loc, TI);
+ if (TI.Sets.size() == 0) {
+ Diag(Tok, diag::err_omp_expected_context_selector) << "when clause";
+ TPA.Commit();
+ return Directive;
+ }
+
+ // Parse ':'
+ if (Tok.is(tok::colon))
+ ConsumeAnyToken();
+ else {
+ Diag(Tok, diag::err_omp_expected_colon) << "when clause";
+ TPA.Commit();
+ return Directive;
+ }
+ }
+ // Skip Directive for now. We will parse directive in the second iteration
+ int paren = 0;
+ while (Tok.isNot(tok::r_paren) || paren != 0) {
+ if (Tok.is(tok::l_paren))
+ paren++;
+ if (Tok.is(tok::r_paren))
+ paren--;
+ if (Tok.is(tok::annot_pragma_openmp_end)) {
+ Diag(Tok, diag::err_omp_expected_punc)
+ << getOpenMPClauseName(CKind) << 0;
+ TPA.Commit();
+ return Directive;
+ }
+ ConsumeAnyToken();
+ }
+ // Parse ')'
+ if (Tok.is(tok::r_paren))
+ T.consumeClose();
+
+ VariantMatchInfo VMI;
+ TI.getAsVariantMatchInfo(ASTContext, VMI);
+
+ VMIs.push_back(VMI);
+ }
+
+ TPA.Revert();
+ // End of the first iteration. Parser is reset to the start of metadirective
+
+ TargetOMPContext OMPCtx(ASTContext, /* DiagUnknownTrait */ nullptr,
+ /* CurrentFunctionDecl */ nullptr,
+ ArrayRef<llvm::omp::TraitProperty>());
+
+ // A single match is returned for OpenMP 5.0
+ int BestIdx = getBestVariantMatchForContext(VMIs, OMPCtx);
+
+ int Idx = 0;
+ // In OpenMP 5.0 metadirective is either replaced by another directive or
+ // ignored.
+ // TODO: In OpenMP 5.1 generate multiple directives based upon the matches
+ // found by getBestWhenMatchForContext.
+ while (Tok.isNot(tok::annot_pragma_openmp_end)) {
+ // OpenMP 5.0 implementation - Skip to the best index found.
+ if (Idx++ != BestIdx) {
+ ConsumeToken(); // Consume clause name
+ T.consumeOpen(); // Consume '('
+ int paren = 0;
+ // Skip everything inside the clause
+ while (Tok.isNot(tok::r_paren) || paren != 0) {
+ if (Tok.is(tok::l_paren))
+ paren++;
+ if (Tok.is(tok::r_paren))
+ paren--;
+ ConsumeAnyToken();
+ }
+ // Parse ')'
+ if (Tok.is(tok::r_paren))
+ T.consumeClose();
+ continue;
+ }
+
+ OpenMPClauseKind CKind = Tok.isAnnotation()
+ ? OMPC_unknown
+ : getOpenMPClauseKind(PP.getSpelling(Tok));
+ SourceLocation Loc = ConsumeToken();
+
+ // Parse '('.
+ T.consumeOpen();
+
+ // Skip ContextSelectors for when clause
+ if (CKind == OMPC_when) {
+ OMPTraitInfo &TI = Actions.getASTContext().getNewOMPTraitInfo();
+ // parse and skip the ContextSelectors
+ parseOMPContextSelectors(Loc, TI);
+
+ // Parse ':'
+ ConsumeAnyToken();
+ }
+
+ // If no directive is passed, skip in OpenMP 5.0.
+ // TODO: Generate nothing directive from OpenMP 5.1.
+ if (Tok.is(tok::r_paren)) {
+ SkipUntil(tok::annot_pragma_openmp_end);
+ break;
+ }
+
+ // Parse Directive
+ ReadDirectiveWithinMetadirective = true;
+ Directive = ParseOpenMPDeclarativeOrExecutableDirective(StmtCtx);
+ ReadDirectiveWithinMetadirective = false;
+ break;
+ }
+ break;
+ }
case OMPD_threadprivate: {
// FIXME: Should this be permitted in C++?
if ((StmtCtx & ParsedStmtContext::AllowDeclarationsInC) ==
@@ -2427,6 +2725,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
case OMPD_target_data:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
+ case OMPD_loop:
case OMPD_taskloop:
case OMPD_taskloop_simd:
case OMPD_master_taskloop:
@@ -2486,6 +2785,13 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
Actions.StartOpenMPDSABlock(DKind, DirName, Actions.getCurScope(), Loc);
while (Tok.isNot(tok::annot_pragma_openmp_end)) {
+ // If we are parsing for a directive within a metadirective, the directive
+ // ends with a ')'.
+ if (ReadDirectiveWithinMetadirective && Tok.is(tok::r_paren)) {
+ while (Tok.isNot(tok::annot_pragma_openmp_end))
+ ConsumeAnyToken();
+ break;
+ }
bool HasImplicitClause = false;
if (ImplicitClauseAllowed && Tok.is(tok::l_paren)) {
HasImplicitClause = true;
@@ -2562,8 +2868,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) &&
getLangOpts().OpenMPIRBuilder)
- AssociatedStmt =
- Actions.ActOnOpenMPCanonicalLoop(AssociatedStmt.get());
+ AssociatedStmt = Actions.ActOnOpenMPLoopnest(AssociatedStmt.get());
}
AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
} else if (DKind == OMPD_target_update || DKind == OMPD_target_enter_data ||
@@ -2751,7 +3056,7 @@ OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) {
/// clause:
/// if-clause | final-clause | num_threads-clause | safelen-clause |
/// default-clause | private-clause | firstprivate-clause | shared-clause
-/// | linear-clause | aligned-clause | collapse-clause |
+/// | linear-clause | aligned-clause | collapse-clause | bind-clause |
/// lastprivate-clause | reduction-clause | proc_bind-clause |
/// schedule-clause | copyin-clause | copyprivate-clause | untied-clause |
/// mergeable-clause | flush-clause | read-clause | write-clause |
@@ -2800,6 +3105,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
case OMPC_nocontext:
case OMPC_filter:
case OMPC_partial:
+ case OMPC_align:
// OpenMP [2.5, Restrictions]
// At most one num_threads clause can appear on the directive.
// OpenMP [2.8.1, simd construct, Restrictions]
@@ -2841,6 +3147,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
case OMPC_proc_bind:
case OMPC_atomic_default_mem_order:
case OMPC_order:
+ case OMPC_bind:
// OpenMP [2.14.3.1, Restrictions]
// Only a single default clause may be specified on a parallel, task or
// teams directive.
@@ -2849,6 +3156,8 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
// OpenMP [5.0, Requires directive, Restrictions]
// At most one atomic_default_mem_order clause can appear
// on the directive
+ // OpenMP 5.1, 2.11.7 loop Construct, Restrictions.
+ // At most one bind clause can appear on a loop directive.
if (!FirstClause && CKind != OMPC_order) {
Diag(Tok, diag::err_omp_more_one_clause)
<< getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
@@ -3054,6 +3363,9 @@ ExprResult Parser::ParseOpenMPParensExpr(StringRef ClauseName,
/// detach-clause:
/// 'detach' '(' event-handler-expression ')'
///
+/// align-clause
+/// 'align' '(' positive-integer-constant ')'
+///
OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind,
bool ParseOnly) {
SourceLocation Loc = ConsumeToken();
@@ -3143,36 +3455,15 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind,
}
// Parse the interop-types.
- bool HasError = false;
- while (Tok.is(tok::identifier)) {
- if (PP.getSpelling(Tok) == "target") {
- // OpenMP 5.1 [2.15.1, interop Construct, Restrictions]
- // Each interop-type may be specified on an action-clause at most
- // once.
- if (IsTarget)
- Diag(Tok, diag::warn_omp_more_one_interop_type) << "target";
- IsTarget = true;
- } else if (PP.getSpelling(Tok) == "targetsync") {
- if (IsTargetSync)
- Diag(Tok, diag::warn_omp_more_one_interop_type) << "targetsync";
- IsTargetSync = true;
- } else {
- HasError = true;
- Diag(Tok, diag::err_omp_expected_interop_type);
- }
- ConsumeToken();
-
- if (!Tok.is(tok::comma))
- break;
- ConsumeToken();
+ if (Optional<OMPDeclareVariantAttr::InteropType> IType =
+ parseInteropTypeList(*this)) {
+ IsTarget = IType != OMPDeclareVariantAttr::TargetSync;
+ IsTargetSync = IType != OMPDeclareVariantAttr::Target;
+ if (Tok.isNot(tok::colon))
+ Diag(Tok, diag::warn_pragma_expected_colon) << "interop types";
}
- if (!HasError && !IsTarget && !IsTargetSync)
- Diag(Tok, diag::err_omp_expected_interop_type);
-
if (Tok.is(tok::colon))
ConsumeToken();
- else if (IsTarget || IsTargetSync)
- Diag(Tok, diag::warn_pragma_expected_colon) << "interop types";
}
// Parse the variable.
@@ -3216,6 +3507,9 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind,
/// proc_bind-clause:
/// 'proc_bind' '(' 'master' | 'close' | 'spread' ')'
///
+/// bind-clause:
+/// 'bind' '(' 'teams' | 'parallel' | 'thread' ')'
+///
/// update-clause:
/// 'update' '(' 'in' | 'out' | 'inout' | 'mutexinoutset' ')'
///
@@ -3310,8 +3604,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
Arg[Modifier2] = OMPC_SCHEDULE_MODIFIER_unknown;
Arg[ScheduleKind] = OMPC_SCHEDULE_unknown;
unsigned KindModifier = getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts());
if (KindModifier > OMPC_SCHEDULE_unknown) {
// Parse 'modifier'
Arg[Modifier1] = KindModifier;
@@ -3323,8 +3616,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
// Parse ',' 'modifier'
ConsumeAnyToken();
KindModifier = getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts());
Arg[Modifier2] = KindModifier > OMPC_SCHEDULE_unknown
? KindModifier
: (unsigned)OMPC_SCHEDULE_unknown;
@@ -3339,8 +3631,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
else
Diag(Tok, diag::warn_pragma_expected_colon) << "schedule modifier";
KindModifier = getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts());
}
Arg[ScheduleKind] = KindModifier;
KLoc[ScheduleKind] = Tok.getLocation();
@@ -3354,8 +3645,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
DelimLoc = ConsumeAnyToken();
} else if (Kind == OMPC_dist_schedule) {
Arg.push_back(getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP));
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts()));
KLoc.push_back(Tok.getLocation());
if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
Tok.isNot(tok::annot_pragma_openmp_end))
@@ -3365,8 +3655,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
} else if (Kind == OMPC_defaultmap) {
// Get a defaultmap modifier
unsigned Modifier = getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts());
// Set defaultmap modifier to unknown if it is either scalar, aggregate, or
// pointer
if (Modifier < OMPC_DEFAULTMAP_MODIFIER_unknown)
@@ -3384,8 +3673,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
Diag(Tok, diag::warn_pragma_expected_colon) << "defaultmap modifier";
// Get a defaultmap kind
Arg.push_back(getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP));
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts()));
KLoc.push_back(Tok.getLocation());
if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
Tok.isNot(tok::annot_pragma_openmp_end))
@@ -3400,8 +3688,7 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPDirectiveKind DKind,
NextToken().is(tok::colon)) {
// Parse optional <device modifier> ':'
Arg.push_back(getOpenMPSimpleClauseType(
- Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok),
- getLangOpts().OpenMP));
+ Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok), getLangOpts()));
KLoc.push_back(Tok.getLocation());
ConsumeAnyToken();
// Parse ':'
@@ -3512,7 +3799,7 @@ static OpenMPMapModifierKind isMapModifier(Parser &P) {
Preprocessor &PP = P.getPreprocessor();
OpenMPMapModifierKind TypeModifier =
static_cast<OpenMPMapModifierKind>(getOpenMPSimpleClauseType(
- OMPC_map, PP.getSpelling(Tok), P.getLangOpts().OpenMP));
+ OMPC_map, PP.getSpelling(Tok), P.getLangOpts()));
return TypeModifier;
}
@@ -3554,7 +3841,8 @@ bool Parser::parseMapTypeModifiers(OpenMPVarListDataTy &Data) {
OpenMPMapModifierKind TypeModifier = isMapModifier(*this);
if (TypeModifier == OMPC_MAP_MODIFIER_always ||
TypeModifier == OMPC_MAP_MODIFIER_close ||
- TypeModifier == OMPC_MAP_MODIFIER_present) {
+ TypeModifier == OMPC_MAP_MODIFIER_present ||
+ TypeModifier == OMPC_MAP_MODIFIER_ompx_hold) {
Data.MapTypeModifiers.push_back(TypeModifier);
Data.MapTypeModifiersLoc.push_back(Tok.getLocation());
ConsumeToken();
@@ -3577,7 +3865,8 @@ bool Parser::parseMapTypeModifiers(OpenMPVarListDataTy &Data) {
if (PP.LookAhead(0).is(tok::colon))
return false;
Diag(Tok, diag::err_omp_unknown_map_type_modifier)
- << (getLangOpts().OpenMP >= 51 ? 1 : 0);
+ << (getLangOpts().OpenMP >= 51 ? 1 : 0)
+ << getLangOpts().OpenMPExtensions;
ConsumeToken();
}
if (getCurToken().is(tok::comma))
@@ -3596,7 +3885,7 @@ static OpenMPMapClauseKind isMapType(Parser &P) {
Preprocessor &PP = P.getPreprocessor();
OpenMPMapClauseKind MapType =
static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
- OMPC_map, PP.getSpelling(Tok), P.getLangOpts().OpenMP));
+ OMPC_map, PP.getSpelling(Tok), P.getLangOpts()));
return MapType;
}
@@ -3749,8 +4038,8 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
(Tok.is(tok::identifier) || Tok.is(tok::kw_default)) &&
NextToken().is(tok::comma)) {
// Parse optional reduction modifier.
- Data.ExtraModifier = getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Data.ExtraModifier =
+ getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok), getLangOpts());
Data.ExtraModifierLoc = Tok.getLocation();
ConsumeToken();
assert(Tok.is(tok::comma) && "Expected comma.");
@@ -3796,7 +4085,7 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
ColonProtectionRAIIObject ColonRAII(*this);
Data.ExtraModifier = getOpenMPSimpleClauseType(
Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : "",
- getLangOpts().OpenMP);
+ getLangOpts());
Data.ExtraModifierLoc = Tok.getLocation();
if (Data.ExtraModifier == OMPC_DEPEND_unknown) {
SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
@@ -3821,8 +4110,8 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
// Try to parse modifier if any.
Data.ExtraModifier = OMPC_LINEAR_val;
if (Tok.is(tok::identifier) && PP.LookAhead(0).is(tok::l_paren)) {
- Data.ExtraModifier = getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Data.ExtraModifier =
+ getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok), getLangOpts());
Data.ExtraModifierLoc = ConsumeToken();
LinearT.consumeOpen();
NeedRParenForLinear = true;
@@ -3835,8 +4124,8 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
if ((getLangOpts().OpenMP >= 50 && !isOpenMPDistributeDirective(DKind) &&
!isOpenMPTaskLoopDirective(DKind)) &&
Tok.is(tok::identifier) && PP.LookAhead(0).is(tok::colon)) {
- Data.ExtraModifier = getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok),
- getLangOpts().OpenMP);
+ Data.ExtraModifier =
+ getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok), getLangOpts());
Data.ExtraModifierLoc = Tok.getLocation();
ConsumeToken();
assert(Tok.is(tok::colon) && "Expected colon.");
@@ -3879,9 +4168,8 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
Data.ColonLoc = ConsumeToken();
} else if (Kind == OMPC_to || Kind == OMPC_from) {
while (Tok.is(tok::identifier)) {
- auto Modifier =
- static_cast<OpenMPMotionModifierKind>(getOpenMPSimpleClauseType(
- Kind, PP.getSpelling(Tok), getLangOpts().OpenMP));
+ auto Modifier = static_cast<OpenMPMotionModifierKind>(
+ getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok), getLangOpts()));
if (Modifier == OMPC_MOTION_MODIFIER_unknown)
break;
Data.MotionModifiers.push_back(Modifier);
@@ -3951,6 +4239,23 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openmp_end,
StopBeforeMatch);
}
+ } else if (Kind == OMPC_adjust_args) {
+ // Handle adjust-op for adjust_args clause.
+ ColonProtectionRAIIObject ColonRAII(*this);
+ Data.ExtraModifier = getOpenMPSimpleClauseType(
+ Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : "",
+ getLangOpts());
+ Data.ExtraModifierLoc = Tok.getLocation();
+ if (Data.ExtraModifier == OMPC_ADJUST_ARGS_unknown) {
+ SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
+ StopBeforeMatch);
+ } else {
+ ConsumeToken();
+ if (Tok.is(tok::colon))
+ Data.ColonLoc = Tok.getLocation();
+ ExpectAndConsume(tok::colon, diag::warn_pragma_expected_colon,
+ "adjust-op");
+ }
}
bool IsComma =
@@ -3958,7 +4263,9 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind DKind,
Kind != OMPC_in_reduction && Kind != OMPC_depend && Kind != OMPC_map) ||
(Kind == OMPC_reduction && !InvalidReductionId) ||
(Kind == OMPC_map && Data.ExtraModifier != OMPC_MAP_unknown) ||
- (Kind == OMPC_depend && Data.ExtraModifier != OMPC_DEPEND_unknown);
+ (Kind == OMPC_depend && Data.ExtraModifier != OMPC_DEPEND_unknown) ||
+ (Kind == OMPC_adjust_args &&
+ Data.ExtraModifier != OMPC_ADJUST_ARGS_unknown);
const bool MayHaveTail = (Kind == OMPC_linear || Kind == OMPC_aligned);
while (IsComma || (Tok.isNot(tok::r_paren) && Tok.isNot(tok::colon) &&
Tok.isNot(tok::annot_pragma_openmp_end))) {
diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
index 42072fe63fc8..27e850127862 100644
--- a/clang/lib/Parse/ParsePragma.cpp
+++ b/clang/lib/Parse/ParsePragma.cpp
@@ -261,6 +261,68 @@ struct PragmaMSOptimizeHandler : public PragmaHandler {
Token &FirstToken) override;
};
+// "\#pragma fenv_access (on)".
+struct PragmaMSFenvAccessHandler : public PragmaHandler {
+ PragmaMSFenvAccessHandler() : PragmaHandler("fenv_access") {}
+ void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
+ Token &FirstToken) override {
+ StringRef PragmaName = FirstToken.getIdentifierInfo()->getName();
+ if (!PP.getTargetInfo().hasStrictFP() && !PP.getLangOpts().ExpStrictFP) {
+ PP.Diag(FirstToken.getLocation(), diag::warn_pragma_fp_ignored)
+ << PragmaName;
+ return;
+ }
+
+ Token Tok;
+ PP.Lex(Tok);
+ if (Tok.isNot(tok::l_paren)) {
+ PP.Diag(Tok.getLocation(), diag::warn_pragma_expected_lparen)
+ << PragmaName;
+ return;
+ }
+ PP.Lex(Tok); // Consume the l_paren.
+ if (Tok.isNot(tok::identifier)) {
+ PP.Diag(Tok.getLocation(), diag::warn_pragma_ms_fenv_access);
+ return;
+ }
+ const IdentifierInfo *II = Tok.getIdentifierInfo();
+ tok::OnOffSwitch OOS;
+ if (II->isStr("on")) {
+ OOS = tok::OOS_ON;
+ PP.Lex(Tok);
+ } else if (II->isStr("off")) {
+ OOS = tok::OOS_OFF;
+ PP.Lex(Tok);
+ } else {
+ PP.Diag(Tok.getLocation(), diag::warn_pragma_ms_fenv_access);
+ return;
+ }
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok.getLocation(), diag::warn_pragma_expected_rparen)
+ << PragmaName;
+ return;
+ }
+ PP.Lex(Tok); // Consume the r_paren.
+
+ if (Tok.isNot(tok::eod)) {
+ PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
+ << PragmaName;
+ return;
+ }
+
+ MutableArrayRef<Token> Toks(
+ PP.getPreprocessorAllocator().Allocate<Token>(1), 1);
+ Toks[0].startToken();
+ Toks[0].setKind(tok::annot_pragma_fenv_access_ms);
+ Toks[0].setLocation(FirstToken.getLocation());
+ Toks[0].setAnnotationEndLoc(Tok.getLocation());
+ Toks[0].setAnnotationValue(
+ reinterpret_cast<void*>(static_cast<uintptr_t>(OOS)));
+ PP.EnterTokenStream(Toks, /*DisableMacroExpansion=*/true,
+ /*IsReinject=*/false);
+ }
+};
+
struct PragmaForceCUDAHostDeviceHandler : public PragmaHandler {
PragmaForceCUDAHostDeviceHandler(Sema &Actions)
: PragmaHandler("force_cuda_host_device"), Actions(Actions) {}
@@ -389,6 +451,8 @@ void Parser::initializePragmaHandlers() {
PP.AddPragmaHandler(MSIntrinsic.get());
MSOptimize = std::make_unique<PragmaMSOptimizeHandler>();
PP.AddPragmaHandler(MSOptimize.get());
+ MSFenvAccess = std::make_unique<PragmaMSFenvAccessHandler>();
+ PP.AddPragmaHandler(MSFenvAccess.get());
}
if (getLangOpts().CUDA) {
@@ -496,6 +560,8 @@ void Parser::resetPragmaHandlers() {
MSIntrinsic.reset();
PP.RemovePragmaHandler(MSOptimize.get());
MSOptimize.reset();
+ PP.RemovePragmaHandler(MSFenvAccess.get());
+ MSFenvAccess.reset();
}
if (getLangOpts().CUDA) {
@@ -701,7 +767,8 @@ void Parser::HandlePragmaFloatControl() {
}
void Parser::HandlePragmaFEnvAccess() {
- assert(Tok.is(tok::annot_pragma_fenv_access));
+ assert(Tok.is(tok::annot_pragma_fenv_access) ||
+ Tok.is(tok::annot_pragma_fenv_access_ms));
tok::OnOffSwitch OOS =
static_cast<tok::OnOffSwitch>(
reinterpret_cast<uintptr_t>(Tok.getAnnotationValue()));
@@ -1592,6 +1659,15 @@ void Parser::HandlePragmaAttribute() {
if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, "("))
return SkipToEnd();
+ // FIXME: The practical usefulness of completion here is limited because
+ // we only get here if the line has balanced parens.
+ if (Tok.is(tok::code_completion)) {
+ cutOffParsing();
+ // FIXME: suppress completion of unsupported attributes?
+ Actions.CodeCompleteAttribute(AttributeCommonInfo::Syntax::AS_GNU);
+ return SkipToEnd();
+ }
+
if (Tok.isNot(tok::identifier)) {
Diag(Tok, diag::err_pragma_attribute_expected_attribute_name);
SkipToEnd();
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index ebfe048513b1..bb8718671bb0 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -374,8 +374,11 @@ Retry:
return StmtError();
case tok::annot_pragma_fenv_access:
+ case tok::annot_pragma_fenv_access_ms:
ProhibitAttributes(Attrs);
- Diag(Tok, diag::err_pragma_stdc_fenv_access_scope);
+ Diag(Tok, diag::err_pragma_file_or_compound_scope)
+ << (Kind == tok::annot_pragma_fenv_access ? "STDC FENV_ACCESS"
+ : "fenv_access");
ConsumeAnnotationToken();
return StmtEmpty();
@@ -955,6 +958,7 @@ void Parser::ParseCompoundStatementLeadingPragmas() {
HandlePragmaFP();
break;
case tok::annot_pragma_fenv_access:
+ case tok::annot_pragma_fenv_access_ms:
HandlePragmaFEnvAccess();
break;
case tok::annot_pragma_fenv_round:
@@ -1338,20 +1342,36 @@ struct MisleadingIndentationChecker {
/// 'if' '(' expression ')' statement 'else' statement
/// [C++] 'if' '(' condition ')' statement
/// [C++] 'if' '(' condition ')' statement 'else' statement
+/// [C++23] 'if' '!' [opt] consteval compound-statement
+/// [C++23] 'if' '!' [opt] consteval compound-statement 'else' statement
///
StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
assert(Tok.is(tok::kw_if) && "Not an if stmt!");
SourceLocation IfLoc = ConsumeToken(); // eat the 'if'.
bool IsConstexpr = false;
+ bool IsConsteval = false;
+ SourceLocation NotLocation;
+ SourceLocation ConstevalLoc;
+
if (Tok.is(tok::kw_constexpr)) {
Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if
: diag::ext_constexpr_if);
IsConstexpr = true;
ConsumeToken();
- }
+ } else {
+ if (Tok.is(tok::exclaim)) {
+ NotLocation = ConsumeToken();
+ }
- if (Tok.isNot(tok::l_paren)) {
+ if (Tok.is(tok::kw_consteval)) {
+ Diag(Tok, getLangOpts().CPlusPlus2b ? diag::warn_cxx20_compat_consteval_if
+ : diag::ext_consteval_if);
+ IsConsteval = true;
+ ConstevalLoc = ConsumeToken();
+ }
+ }
+ if (!IsConsteval && (NotLocation.isValid() || Tok.isNot(tok::l_paren))) {
Diag(Tok, diag::err_expected_lparen_after) << "if";
SkipUntil(tok::semi);
return StmtError();
@@ -1378,15 +1398,18 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
Sema::ConditionResult Cond;
SourceLocation LParen;
SourceLocation RParen;
- if (ParseParenExprOrCondition(&InitStmt, Cond, IfLoc,
- IsConstexpr ? Sema::ConditionKind::ConstexprIf
- : Sema::ConditionKind::Boolean,
- &LParen, &RParen))
- return StmtError();
-
llvm::Optional<bool> ConstexprCondition;
- if (IsConstexpr)
- ConstexprCondition = Cond.getKnownValue();
+ if (!IsConsteval) {
+
+ if (ParseParenExprOrCondition(&InitStmt, Cond, IfLoc,
+ IsConstexpr ? Sema::ConditionKind::ConstexprIf
+ : Sema::ConditionKind::Boolean,
+ &LParen, &RParen))
+ return StmtError();
+
+ if (IsConstexpr)
+ ConstexprCondition = Cond.getKnownValue();
+ }
bool IsBracedThen = Tok.is(tok::l_brace);
@@ -1418,10 +1441,17 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
SourceLocation InnerStatementTrailingElseLoc;
StmtResult ThenStmt;
{
+ bool ShouldEnter = ConstexprCondition && !*ConstexprCondition;
+ Sema::ExpressionEvaluationContext Context =
+ Sema::ExpressionEvaluationContext::DiscardedStatement;
+ if (NotLocation.isInvalid() && IsConsteval) {
+ Context = Sema::ExpressionEvaluationContext::ImmediateFunctionContext;
+ ShouldEnter = true;
+ }
+
EnterExpressionEvaluationContext PotentiallyDiscarded(
- Actions, Sema::ExpressionEvaluationContext::DiscardedStatement, nullptr,
- Sema::ExpressionEvaluationContextRecord::EK_Other,
- /*ShouldEnter=*/ConstexprCondition && !*ConstexprCondition);
+ Actions, Context, nullptr,
+ Sema::ExpressionEvaluationContextRecord::EK_Other, ShouldEnter);
ThenStmt = ParseStatement(&InnerStatementTrailingElseLoc);
}
@@ -1456,11 +1486,17 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
Tok.is(tok::l_brace));
MisleadingIndentationChecker MIChecker(*this, MSK_else, ElseLoc);
+ bool ShouldEnter = ConstexprCondition && *ConstexprCondition;
+ Sema::ExpressionEvaluationContext Context =
+ Sema::ExpressionEvaluationContext::DiscardedStatement;
+ if (NotLocation.isValid() && IsConsteval) {
+ Context = Sema::ExpressionEvaluationContext::ImmediateFunctionContext;
+ ShouldEnter = true;
+ }
EnterExpressionEvaluationContext PotentiallyDiscarded(
- Actions, Sema::ExpressionEvaluationContext::DiscardedStatement, nullptr,
- Sema::ExpressionEvaluationContextRecord::EK_Other,
- /*ShouldEnter=*/ConstexprCondition && *ConstexprCondition);
+ Actions, Context, nullptr,
+ Sema::ExpressionEvaluationContextRecord::EK_Other, ShouldEnter);
ElseStmt = ParseStatement();
if (ElseStmt.isUsable())
@@ -1488,14 +1524,40 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) {
return StmtError();
}
+ if (IsConsteval) {
+ auto IsCompoundStatement = [](const Stmt *S) {
+ if (const auto *Outer = dyn_cast_or_null<AttributedStmt>(S))
+ S = Outer->getSubStmt();
+ return isa_and_nonnull<clang::CompoundStmt>(S);
+ };
+
+ if (!IsCompoundStatement(ThenStmt.get())) {
+ Diag(ConstevalLoc, diag::err_expected_after) << "consteval"
+ << "{";
+ return StmtError();
+ }
+ if (!ElseStmt.isUnset() && !IsCompoundStatement(ElseStmt.get())) {
+ Diag(ElseLoc, diag::err_expected_after) << "else"
+ << "{";
+ return StmtError();
+ }
+ }
+
// Now if either are invalid, replace with a ';'.
if (ThenStmt.isInvalid())
ThenStmt = Actions.ActOnNullStmt(ThenStmtLoc);
if (ElseStmt.isInvalid())
ElseStmt = Actions.ActOnNullStmt(ElseStmtLoc);
- return Actions.ActOnIfStmt(IfLoc, IsConstexpr, LParen, InitStmt.get(), Cond,
- RParen, ThenStmt.get(), ElseLoc, ElseStmt.get());
+ IfStatementKind Kind = IfStatementKind::Ordinary;
+ if (IsConstexpr)
+ Kind = IfStatementKind::Constexpr;
+ else if (IsConsteval)
+ Kind = NotLocation.isValid() ? IfStatementKind::ConstevalNegated
+ : IfStatementKind::ConstevalNonNegated;
+
+ return Actions.ActOnIfStmt(IfLoc, Kind, LParen, InitStmt.get(), Cond, RParen,
+ ThenStmt.get(), ElseLoc, ElseStmt.get());
}
/// ParseSwitchStatement
@@ -1767,6 +1829,7 @@ bool Parser::isForRangeIdentifier() {
/// [C++] for-init-statement:
/// [C++] expression-statement
/// [C++] simple-declaration
+/// [C++2b] alias-declaration
///
/// [C++0x] for-range-declaration:
/// [C++0x] attribute-specifier-seq[opt] type-specifier-seq declarator
@@ -1872,36 +1935,42 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
Diag(Tok, diag::ext_c99_variable_decl_in_for_loop);
Diag(Tok, diag::warn_gcc_variable_decl_in_for_loop);
}
+ DeclGroupPtrTy DG;
+ if (Tok.is(tok::kw_using)) {
+ DG = ParseAliasDeclarationInInitStatement(DeclaratorContext::ForInit,
+ attrs);
+ } else {
+ // In C++0x, "for (T NS:a" might not be a typo for ::
+ bool MightBeForRangeStmt = getLangOpts().CPlusPlus;
+ ColonProtectionRAIIObject ColonProtection(*this, MightBeForRangeStmt);
- // In C++0x, "for (T NS:a" might not be a typo for ::
- bool MightBeForRangeStmt = getLangOpts().CPlusPlus;
- ColonProtectionRAIIObject ColonProtection(*this, MightBeForRangeStmt);
-
- SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
- DeclGroupPtrTy DG = ParseSimpleDeclaration(
- DeclaratorContext::ForInit, DeclEnd, attrs, false,
- MightBeForRangeStmt ? &ForRangeInfo : nullptr);
- FirstPart = Actions.ActOnDeclStmt(DG, DeclStart, Tok.getLocation());
- if (ForRangeInfo.ParsedForRangeDecl()) {
- Diag(ForRangeInfo.ColonLoc, getLangOpts().CPlusPlus11 ?
- diag::warn_cxx98_compat_for_range : diag::ext_for_range);
- ForRangeInfo.LoopVar = FirstPart;
- FirstPart = StmtResult();
- } else if (Tok.is(tok::semi)) { // for (int x = 4;
- ConsumeToken();
- } else if ((ForEach = isTokIdentifier_in())) {
- Actions.ActOnForEachDeclStmt(DG);
- // ObjC: for (id x in expr)
- ConsumeToken(); // consume 'in'
-
- if (Tok.is(tok::code_completion)) {
- cutOffParsing();
- Actions.CodeCompleteObjCForCollection(getCurScope(), DG);
- return StmtError();
+ SourceLocation DeclStart = Tok.getLocation(), DeclEnd;
+ DG = ParseSimpleDeclaration(
+ DeclaratorContext::ForInit, DeclEnd, attrs, false,
+ MightBeForRangeStmt ? &ForRangeInfo : nullptr);
+ FirstPart = Actions.ActOnDeclStmt(DG, DeclStart, Tok.getLocation());
+ if (ForRangeInfo.ParsedForRangeDecl()) {
+ Diag(ForRangeInfo.ColonLoc, getLangOpts().CPlusPlus11
+ ? diag::warn_cxx98_compat_for_range
+ : diag::ext_for_range);
+ ForRangeInfo.LoopVar = FirstPart;
+ FirstPart = StmtResult();
+ } else if (Tok.is(tok::semi)) { // for (int x = 4;
+ ConsumeToken();
+ } else if ((ForEach = isTokIdentifier_in())) {
+ Actions.ActOnForEachDeclStmt(DG);
+ // ObjC: for (id x in expr)
+ ConsumeToken(); // consume 'in'
+
+ if (Tok.is(tok::code_completion)) {
+ cutOffParsing();
+ Actions.CodeCompleteObjCForCollection(getCurScope(), DG);
+ return StmtError();
+ }
+ Collection = ParseExpression();
+ } else {
+ Diag(Tok, diag::err_expected_semi_for);
}
- Collection = ParseExpression();
- } else {
- Diag(Tok, diag::err_expected_semi_for);
}
} else {
ProhibitAttributes(attrs);
diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index e520151dcad7..3e9ce8fd668f 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -10,10 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Parse/Parser.h"
#include "clang/AST/ASTContext.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Parse/Parser.h"
#include "clang/Parse/RAIIObjectsForParser.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -28,8 +28,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
using namespace clang;
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index 828b9b2277ff..45af61a3926a 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -248,7 +248,27 @@ Decl *Parser::ParseSingleDeclarationAfterTemplate(
ParsingDeclarator DeclaratorInfo(*this, DS, (DeclaratorContext)Context);
if (TemplateInfo.TemplateParams)
DeclaratorInfo.setTemplateParameterLists(*TemplateInfo.TemplateParams);
+
+ // Turn off usual access checking for template specializations and
+ // instantiations.
+ // C++20 [temp.spec] 13.9/6.
+ // This disables the access checking rules for function template explicit
+ // instantiation and explicit specialization:
+ // - parameter-list;
+ // - template-argument-list;
+ // - noexcept-specifier;
+ // - dynamic-exception-specifications (deprecated in C++11, removed since
+ // C++17).
+ bool IsTemplateSpecOrInst =
+ (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation ||
+ TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization);
+ SuppressAccessChecks SAC(*this, IsTemplateSpecOrInst);
+
ParseDeclarator(DeclaratorInfo);
+
+ if (IsTemplateSpecOrInst)
+ SAC.done();
+
// Error parsing the declarator?
if (!DeclaratorInfo.hasName()) {
// If so, skip until the semi-colon or a }.
@@ -886,10 +906,13 @@ Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
} else if (Next.isOneOf(tok::identifier, tok::comma, tok::greater,
tok::greatergreater, tok::ellipsis)) {
Diag(Tok.getLocation(), diag::err_class_on_template_template_param)
- << (Replace ? FixItHint::CreateReplacement(Tok.getLocation(), "class")
- : FixItHint::CreateInsertion(Tok.getLocation(), "class "));
+ << getLangOpts().CPlusPlus17
+ << (Replace
+ ? FixItHint::CreateReplacement(Tok.getLocation(), "class")
+ : FixItHint::CreateInsertion(Tok.getLocation(), "class "));
} else
- Diag(Tok.getLocation(), diag::err_class_on_template_template_param);
+ Diag(Tok.getLocation(), diag::err_class_on_template_template_param)
+ << getLangOpts().CPlusPlus17;
if (Replace)
ConsumeToken();
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index c0bfbbde40ac..be3823ecda01 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -483,6 +483,8 @@ Parser::isCXXConditionDeclarationOrInitStatement(bool CanBeInitStatement,
ConditionDeclarationOrInitStatementState State(*this, CanBeInitStatement,
CanBeForRangeDecl);
+ if (CanBeInitStatement && Tok.is(tok::kw_using))
+ return ConditionOrInitStatement::InitStmtDecl;
if (State.update(isCXXDeclarationSpecifier()))
return State.result();
@@ -1101,9 +1103,7 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract,
}
bool Parser::isTentativelyDeclared(IdentifierInfo *II) {
- return std::find(TentativelyDeclaredIdentifiers.begin(),
- TentativelyDeclaredIdentifiers.end(), II)
- != TentativelyDeclaredIdentifiers.end();
+ return llvm::is_contained(TentativelyDeclaredIdentifiers, II);
}
namespace {
@@ -1637,6 +1637,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
case tok::kw___bf16:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_void:
case tok::annot_decltype:
#define GENERIC_IMAGE_TYPE(ImgType, Id) case tok::kw_##ImgType##_t:
@@ -1751,6 +1752,7 @@ bool Parser::isCXXDeclarationSpecifierAType() {
case tok::kw___bf16:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_void:
case tok::kw___unknown_anytype:
case tok::kw___auto_type:
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index c81dd03ffaaa..11113fa1a060 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -785,6 +785,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs,
HandlePragmaFPContract();
return nullptr;
case tok::annot_pragma_fenv_access:
+ case tok::annot_pragma_fenv_access_ms:
HandlePragmaFEnvAccess();
return nullptr;
case tok::annot_pragma_fenv_round:
diff --git a/clang/lib/Rewrite/HTMLRewrite.cpp b/clang/lib/Rewrite/HTMLRewrite.cpp
index 2f5f2734aa46..371557a624c9 100644
--- a/clang/lib/Rewrite/HTMLRewrite.cpp
+++ b/clang/lib/Rewrite/HTMLRewrite.cpp
@@ -371,6 +371,7 @@ h1 { font-size:14pt }
.msg { border-radius:5px }
.msg { font-family:Helvetica, sans-serif; font-size:8pt }
.msg { float:left }
+.msg { position:relative }
.msg { padding:0.25em 1ex 0.25em 1ex }
.msg { margin-top:10px; margin-bottom:10px }
.msg { font-weight:bold }
@@ -391,7 +392,7 @@ h1 { font-size:14pt }
.CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
.CodeRemovalHint { background-color:#de1010 }
.CodeRemovalHint { border-bottom:1px solid #6F9DBE }
-.selected{ background-color:orange !important; }
+.msg.selected{ background-color:orange !important; }
table.simpletable {
padding: 5px;
diff --git a/clang/lib/Rewrite/Rewriter.cpp b/clang/lib/Rewrite/Rewriter.cpp
index 040e1c284253..3b06afc76e16 100644
--- a/clang/lib/Rewrite/Rewriter.cpp
+++ b/clang/lib/Rewrite/Rewriter.cpp
@@ -259,7 +259,7 @@ bool Rewriter::InsertText(SourceLocation Loc, StringRef Str,
unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
SmallString<128> indentedStr;
- if (indentNewLines && Str.find('\n') != StringRef::npos) {
+ if (indentNewLines && Str.contains('\n')) {
StringRef MB = SourceMgr->getBufferData(FID);
unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1;
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index aa2602c8d925..8544a4fccf4c 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -497,8 +497,7 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) {
HasAbnormalEdge = true;
continue;
}
- if (std::find(B.succ_begin(), B.succ_end(), &cfg->getExit())
- == B.succ_end()) {
+ if (!llvm::is_contained(B.succs(), &cfg->getExit())) {
HasAbnormalEdge = true;
continue;
}
@@ -1080,11 +1079,9 @@ namespace {
while (!BlockQueue.empty()) {
const CFGBlock *P = BlockQueue.front();
BlockQueue.pop_front();
- for (CFGBlock::const_succ_iterator I = P->succ_begin(),
- E = P->succ_end();
- I != E; ++I) {
- if (*I && ReachableBlocks.insert(*I).second)
- BlockQueue.push_back(*I);
+ for (const CFGBlock *B : P->succs()) {
+ if (B && ReachableBlocks.insert(B).second)
+ BlockQueue.push_back(B);
}
}
}
@@ -1115,17 +1112,15 @@ namespace {
continue; // Case label is preceded with a normal label, good.
if (!ReachableBlocks.count(P)) {
- for (CFGBlock::const_reverse_iterator ElemIt = P->rbegin(),
- ElemEnd = P->rend();
- ElemIt != ElemEnd; ++ElemIt) {
- if (Optional<CFGStmt> CS = ElemIt->getAs<CFGStmt>()) {
+ for (const CFGElement &Elem : llvm::reverse(*P)) {
+ if (Optional<CFGStmt> CS = Elem.getAs<CFGStmt>()) {
if (const AttributedStmt *AS = asFallThroughAttr(CS->getStmt())) {
// Don't issue a warning for an unreachable fallthrough
// attribute in template instantiations as it may not be
// unreachable in all instantiations of the template.
if (!IsTemplateInstantiation)
S.Diag(AS->getBeginLoc(),
- diag::warn_fallthrough_attr_unreachable);
+ diag::warn_unreachable_fallthrough_attr);
markFallthroughVisited(AS);
++AnnotatedCnt;
break;
@@ -1202,12 +1197,9 @@ namespace {
static const Stmt *getLastStmt(const CFGBlock &B) {
if (const Stmt *Term = B.getTerminatorStmt())
return Term;
- for (CFGBlock::const_reverse_iterator ElemIt = B.rbegin(),
- ElemEnd = B.rend();
- ElemIt != ElemEnd; ++ElemIt) {
- if (Optional<CFGStmt> CS = ElemIt->getAs<CFGStmt>())
+ for (const CFGElement &Elem : llvm::reverse(B))
+ if (Optional<CFGStmt> CS = Elem.getAs<CFGStmt>())
return CS->getStmt();
- }
// Workaround to detect a statement thrown out by CFGBuilder:
// case X: {} case Y:
// case X: ; case Y:
@@ -1637,7 +1629,7 @@ public:
private:
static bool hasAlwaysUninitializedUse(const UsesVec* vec) {
- return std::any_of(vec->begin(), vec->end(), [](const UninitUse &U) {
+ return llvm::any_of(*vec, [](const UninitUse &U) {
return U.getKind() == UninitUse::Always ||
U.getKind() == UninitUse::AfterCall ||
U.getKind() == UninitUse::AfterDecl;
@@ -2275,8 +2267,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
.setAlwaysAdd(Stmt::CStyleCastExprClass)
.setAlwaysAdd(Stmt::DeclRefExprClass)
.setAlwaysAdd(Stmt::ImplicitCastExprClass)
- .setAlwaysAdd(Stmt::UnaryOperatorClass)
- .setAlwaysAdd(Stmt::AttributedStmtClass);
+ .setAlwaysAdd(Stmt::UnaryOperatorClass);
}
// Install the logical handler.
diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp
index 3ab2a18f5e8d..6c47cedfccf3 100644
--- a/clang/lib/Sema/CodeCompleteConsumer.cpp
+++ b/clang/lib/Sema/CodeCompleteConsumer.cpp
@@ -82,6 +82,7 @@ bool CodeCompletionContext::wantConstructorResults() const {
case CCC_ObjCInterfaceName:
case CCC_ObjCCategoryName:
case CCC_IncludedFile:
+ case CCC_Attribute:
return false;
}
@@ -161,6 +162,8 @@ StringRef clang::getCompletionKindString(CodeCompletionContext::Kind Kind) {
return "ObjCCategoryName";
case CCKind::CCC_IncludedFile:
return "IncludedFile";
+ case CCKind::CCC_Attribute:
+ return "Attribute";
case CCKind::CCC_Recovery:
return "Recovery";
}
@@ -384,14 +387,13 @@ StringRef CodeCompletionTUInfo::getParentName(const DeclContext *DC) {
SmallString<128> S;
llvm::raw_svector_ostream OS(S);
bool First = true;
- for (unsigned I = Contexts.size(); I != 0; --I) {
+ for (const DeclContext *CurDC : llvm::reverse(Contexts)) {
if (First)
First = false;
else {
OS << "::";
}
- const DeclContext *CurDC = Contexts[I - 1];
if (const auto *CatImpl = dyn_cast<ObjCCategoryImplDecl>(CurDC))
CurDC = CatImpl->getCategoryDecl();
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 72d9ea6dd3bf..4405f29f3d99 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -358,6 +358,7 @@ bool Declarator::isDeclarationOfFunction() const {
case TST_Fract:
case TST_Float16:
case TST_float128:
+ case TST_ibm128:
case TST_enum:
case TST_error:
case TST_float:
@@ -558,6 +559,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
case DeclSpec::TST_fract: return "_Fract";
case DeclSpec::TST_float16: return "_Float16";
case DeclSpec::TST_float128: return "__float128";
+ case DeclSpec::TST_ibm128: return "__ibm128";
case DeclSpec::TST_bool: return Policy.Bool ? "bool" : "_Bool";
case DeclSpec::TST_decimal32: return "_Decimal32";
case DeclSpec::TST_decimal64: return "_Decimal64";
@@ -631,8 +633,7 @@ bool DeclSpec::SetStorageClassSpec(Sema &S, SCS SC, SourceLocation Loc,
case SCS_extern:
case SCS_private_extern:
case SCS_static:
- if (S.getLangOpts().OpenCLVersion < 120 &&
- !S.getLangOpts().OpenCLCPlusPlus) {
+ if (S.getLangOpts().getOpenCLCompatibleVersion() < 120) {
DiagID = diag::err_opencl_unknown_type_specifier;
PrevSpec = getSpecifierName(SC);
return true;
@@ -1155,6 +1156,17 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
// Validate and finalize AltiVec vector declspec.
if (TypeAltiVecVector) {
+ // No vector long long without VSX (or ZVector).
+ if ((getTypeSpecWidth() == TypeSpecifierWidth::LongLong) &&
+ !S.Context.getTargetInfo().hasFeature("vsx") &&
+ !S.getLangOpts().ZVector)
+ S.Diag(TSWRange.getBegin(), diag::err_invalid_vector_long_long_decl_spec);
+
+ // No vector __int128 prior to Power8.
+ if ((TypeSpecType == TST_int128) &&
+ !S.Context.getTargetInfo().hasFeature("power8-vector"))
+ S.Diag(TSTLoc, diag::err_invalid_vector_int128_decl_spec);
+
if (TypeAltiVecBool) {
// Sign specifiers are not allowed with vector bool. (PIM 2.1)
if (getTypeSpecSign() != TypeSpecifierSign::Unspecified) {
@@ -1183,13 +1195,6 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
S.Diag(TSWRange.getBegin(), diag::err_invalid_vector_bool_decl_spec)
<< getSpecifierName(getTypeSpecWidth());
- // vector bool long long requires VSX support or ZVector.
- if ((getTypeSpecWidth() == TypeSpecifierWidth::LongLong) &&
- (!S.Context.getTargetInfo().hasFeature("vsx")) &&
- (!S.Context.getTargetInfo().hasFeature("power8-vector")) &&
- !S.getLangOpts().ZVector)
- S.Diag(TSTLoc, diag::err_invalid_vector_long_long_decl_spec);
-
// Elements of vector bool are interpreted as unsigned. (PIM 2.1)
if ((TypeSpecType == TST_char) || (TypeSpecType == TST_int) ||
(TypeSpecType == TST_int128) ||
@@ -1212,13 +1217,15 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
!S.Context.getTargetInfo().hasFeature("arch12"))
S.Diag(TSTLoc, diag::err_invalid_vector_float_decl_spec);
} else if (getTypeSpecWidth() == TypeSpecifierWidth::Long) {
- // vector long is unsupported for ZVector and deprecated for AltiVec.
+ // Vector long is unsupported for ZVector, or without VSX, and deprecated
+ // for AltiVec.
// It has also been historically deprecated on AIX (as an alias for
// "vector int" in both 32-bit and 64-bit modes). It was then made
// unsupported in the Clang-based XL compiler since the deprecated type
// has a number of conflicting semantics and continuing to support it
// is a disservice to users.
if (S.getLangOpts().ZVector ||
+ !S.Context.getTargetInfo().hasFeature("vsx") ||
S.Context.getTargetInfo().getTriple().isOSAIX())
S.Diag(TSWRange.getBegin(), diag::err_invalid_vector_long_decl_spec);
else
@@ -1300,8 +1307,9 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
if (!S.getLangOpts().CPlusPlus)
S.Diag(TSTLoc, diag::ext_integer_complex);
} else if (TypeSpecType != TST_float && TypeSpecType != TST_double &&
- TypeSpecType != TST_float128) {
- // FIXME: _Float16, __fp16?
+ TypeSpecType != TST_float128 && TypeSpecType != TST_float16 &&
+ TypeSpecType != TST_ibm128) {
+ // FIXME: __fp16?
S.Diag(TSCLoc, diag::err_invalid_complex_spec)
<< getSpecifierName((TST)TypeSpecType, Policy);
TypeSpecComplex = TSC_unspecified;
diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp
index 999c2a481459..94f39e1eea6e 100644
--- a/clang/lib/Sema/JumpDiagnostics.cpp
+++ b/clang/lib/Sema/JumpDiagnostics.cpp
@@ -377,11 +377,15 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
case Stmt::IfStmtClass: {
IfStmt *IS = cast<IfStmt>(S);
- if (!(IS->isConstexpr() || IS->isObjCAvailabilityCheck()))
+ if (!(IS->isConstexpr() || IS->isConsteval() ||
+ IS->isObjCAvailabilityCheck()))
break;
- unsigned Diag = IS->isConstexpr() ? diag::note_protected_by_constexpr_if
- : diag::note_protected_by_if_available;
+ unsigned Diag = diag::note_protected_by_if_available;
+ if (IS->isConstexpr())
+ Diag = diag::note_protected_by_constexpr_if;
+ else if (IS->isConsteval())
+ Diag = diag::note_protected_by_consteval_if;
if (VarDecl *Var = IS->getConditionVariable())
BuildScopeInformation(Var, ParentScope);
@@ -389,7 +393,9 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
// Cannot jump into the middle of the condition.
unsigned NewParentScope = Scopes.size();
Scopes.push_back(GotoScope(ParentScope, Diag, 0, IS->getBeginLoc()));
- BuildScopeInformation(IS->getCond(), NewParentScope);
+
+ if (!IS->isConsteval())
+ BuildScopeInformation(IS->getCond(), NewParentScope);
// Jumps into either arm of an 'if constexpr' are not allowed.
NewParentScope = Scopes.size();
@@ -487,8 +493,7 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
}
// Jump from the catch to the finally or try is not valid.
- for (unsigned I = 0, N = AT->getNumCatchStmts(); I != N; ++I) {
- ObjCAtCatchStmt *AC = AT->getCatchStmt(I);
+ for (ObjCAtCatchStmt *AC : AT->catch_stmts()) {
unsigned NewParentScope = Scopes.size();
Scopes.push_back(GotoScope(ParentScope,
diag::note_protected_by_objc_catch,
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index cd704fe395a9..8cf7ec58eff5 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -83,6 +83,30 @@ def FuncExtKhrMipmapImage : FunctionExtension<"cl_khr_mipmap_imag
def FuncExtKhrMipmapImageWrites : FunctionExtension<"cl_khr_mipmap_image_writes">;
def FuncExtKhrGlMsaaSharing : FunctionExtension<"cl_khr_gl_msaa_sharing">;
+def FuncExtOpenCLCPipes : FunctionExtension<"__opencl_c_pipes">;
+def FuncExtOpenCLCWGCollectiveFunctions : FunctionExtension<"__opencl_c_work_group_collective_functions">;
+def FuncExtFloatAtomicsFp16GlobalLoadStore : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_load_store">;
+def FuncExtFloatAtomicsFp16LocalLoadStore : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_load_store">;
+def FuncExtFloatAtomicsFp16GenericLoadStore : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_load_store __opencl_c_ext_fp16_local_atomic_load_store">;
+def FuncExtFloatAtomicsFp16GlobalAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_add">;
+def FuncExtFloatAtomicsFp32GlobalAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp32_global_atomic_add">;
+def FuncExtFloatAtomicsFp64GlobalAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp64_global_atomic_add">;
+def FuncExtFloatAtomicsFp16LocalAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_add">;
+def FuncExtFloatAtomicsFp32LocalAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp32_local_atomic_add">;
+def FuncExtFloatAtomicsFp64LocalAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp64_local_atomic_add">;
+def FuncExtFloatAtomicsFp16GenericAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_add __opencl_c_ext_fp16_global_atomic_add">;
+def FuncExtFloatAtomicsFp32GenericAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp32_local_atomic_add __opencl_c_ext_fp32_global_atomic_add">;
+def FuncExtFloatAtomicsFp64GenericAdd : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp64_local_atomic_add __opencl_c_ext_fp64_global_atomic_add">;
+def FuncExtFloatAtomicsFp16GlobalMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_min_max">;
+def FuncExtFloatAtomicsFp32GlobalMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp32_global_atomic_min_max">;
+def FuncExtFloatAtomicsFp64GlobalMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp64_global_atomic_min_max">;
+def FuncExtFloatAtomicsFp16LocalMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_min_max">;
+def FuncExtFloatAtomicsFp32LocalMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp32_local_atomic_min_max">;
+def FuncExtFloatAtomicsFp64LocalMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp64_local_atomic_min_max">;
+def FuncExtFloatAtomicsFp16GenericMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_min_max __opencl_c_ext_fp16_global_atomic_min_max">;
+def FuncExtFloatAtomicsFp32GenericMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp32_local_atomic_min_max __opencl_c_ext_fp32_global_atomic_min_max">;
+def FuncExtFloatAtomicsFp64GenericMinMax : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp64_local_atomic_min_max __opencl_c_ext_fp64_global_atomic_min_max">;
+
// Not a real extension, but a workaround to add C++ for OpenCL specific builtins.
def FuncExtOpenCLCxx : FunctionExtension<"__cplusplus">;
@@ -347,6 +371,7 @@ def AtomicLong : Type<"atomic_long", QualType<"Context.getAtomicType(
def AtomicULong : Type<"atomic_ulong", QualType<"Context.getAtomicType(Context.UnsignedLongTy)">>;
def AtomicFloat : Type<"atomic_float", QualType<"Context.getAtomicType(Context.FloatTy)">>;
def AtomicDouble : Type<"atomic_double", QualType<"Context.getAtomicType(Context.DoubleTy)">>;
+def AtomicHalf : Type<"atomic_half", QualType<"Context.getAtomicType(Context.HalfTy)">>;
def AtomicIntPtr : Type<"atomic_intptr_t", QualType<"Context.getAtomicType(Context.getIntPtrType())">>;
def AtomicUIntPtr : Type<"atomic_uintptr_t", QualType<"Context.getAtomicType(Context.getUIntPtrType())">>;
def AtomicSize : Type<"atomic_size_t", QualType<"Context.getAtomicType(Context.getSizeType())">>;
@@ -543,9 +568,10 @@ foreach name = ["fma", "mad"] in {
def : Builtin<name, [FGenTypeN, FGenTypeN, FGenTypeN, FGenTypeN], Attr.Const>;
}
-// --- Version dependent ---
-let MaxVersion = CL20 in {
- foreach AS = [GlobalAS, LocalAS, PrivateAS] in {
+// The following math builtins take pointer arguments. Which overloads are
+// available depends on whether the generic address space feature is enabled.
+multiclass MathWithPointer<list<AddressSpace> addrspaces> {
+ foreach AS = addrspaces in {
foreach name = ["fract", "modf", "sincos"] in {
def : Builtin<name, [FGenTypeN, FGenTypeN, PointerType<FGenTypeN, AS>]>;
}
@@ -561,19 +587,12 @@ let MaxVersion = CL20 in {
}
}
}
+
+let MaxVersion = CL20 in {
+ defm : MathWithPointer<[GlobalAS, LocalAS, PrivateAS]>;
+}
let MinVersion = CL20 in {
- foreach name = ["fract", "modf", "sincos"] in {
- def : Builtin<name, [FGenTypeN, FGenTypeN, PointerType<FGenTypeN, GenericAS>]>;
- }
- foreach name = ["frexp", "lgamma_r"] in {
- foreach Type = [GenTypeFloatVecAndScalar, GenTypeDoubleVecAndScalar, GenTypeHalfVecAndScalar] in {
- def : Builtin<name, [Type, Type, PointerType<GenTypeIntVecAndScalar, GenericAS>]>;
- } }
- foreach name = ["remquo"] in {
- foreach Type = [GenTypeFloatVecAndScalar, GenTypeDoubleVecAndScalar, GenTypeHalfVecAndScalar] in {
- def : Builtin<name, [Type, Type, Type, PointerType<GenTypeIntVecAndScalar, GenericAS>]>;
- }
- }
+ defm : MathWithPointer<[GenericAS]>;
}
// --- Table 9 ---
@@ -783,10 +802,8 @@ foreach name = ["select"] in {
// OpenCL v1.1 s6.11.7, v1.2 s6.12.7, v2.0 s6.13.7 - Vector Data Load and Store Functions
// OpenCL Extension v1.1 s9.3.6 and s9.6.6, v1.2 s9.5.6, v2.0 s5.1.6 and s6.1.6 - Vector Data Load and Store Functions
// --- Table 15 ---
-// Variants for OpenCL versions below 2.0, using pointers to the global, local
-// and private address spaces.
-let MaxVersion = CL20 in {
- foreach AS = [GlobalAS, LocalAS, PrivateAS] in {
+multiclass VloadVstore<list<AddressSpace> addrspaces, bit defStores> {
+ foreach AS = addrspaces in {
foreach VSize = [2, 3, 4, 8, 16] in {
foreach name = ["vload" # VSize] in {
def : Builtin<name, [VectorType<Char, VSize>, Size, PointerType<ConstType<Char>, AS>]>;
@@ -801,147 +818,67 @@ let MaxVersion = CL20 in {
def : Builtin<name, [VectorType<Double, VSize>, Size, PointerType<ConstType<Double>, AS>]>;
def : Builtin<name, [VectorType<Half, VSize>, Size, PointerType<ConstType<Half>, AS>]>;
}
- foreach name = ["vstore" # VSize] in {
- def : Builtin<name, [Void, VectorType<Char, VSize>, Size, PointerType<Char, AS>]>;
- def : Builtin<name, [Void, VectorType<UChar, VSize>, Size, PointerType<UChar, AS>]>;
- def : Builtin<name, [Void, VectorType<Short, VSize>, Size, PointerType<Short, AS>]>;
- def : Builtin<name, [Void, VectorType<UShort, VSize>, Size, PointerType<UShort, AS>]>;
- def : Builtin<name, [Void, VectorType<Int, VSize>, Size, PointerType<Int, AS>]>;
- def : Builtin<name, [Void, VectorType<UInt, VSize>, Size, PointerType<UInt, AS>]>;
- def : Builtin<name, [Void, VectorType<Long, VSize>, Size, PointerType<Long, AS>]>;
- def : Builtin<name, [Void, VectorType<ULong, VSize>, Size, PointerType<ULong, AS>]>;
- def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Float, AS>]>;
- def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Double, AS>]>;
- def : Builtin<name, [Void, VectorType<Half, VSize>, Size, PointerType<Half, AS>]>;
- }
- foreach name = ["vloada_half" # VSize] in {
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Half>, AS>]>;
- }
- foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in {
- foreach name = ["vstorea_half" # VSize # rnd] in {
- def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Half, AS>]>;
- def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Half, AS>]>;
+ if defStores then {
+ foreach name = ["vstore" # VSize] in {
+ def : Builtin<name, [Void, VectorType<Char, VSize>, Size, PointerType<Char, AS>]>;
+ def : Builtin<name, [Void, VectorType<UChar, VSize>, Size, PointerType<UChar, AS>]>;
+ def : Builtin<name, [Void, VectorType<Short, VSize>, Size, PointerType<Short, AS>]>;
+ def : Builtin<name, [Void, VectorType<UShort, VSize>, Size, PointerType<UShort, AS>]>;
+ def : Builtin<name, [Void, VectorType<Int, VSize>, Size, PointerType<Int, AS>]>;
+ def : Builtin<name, [Void, VectorType<UInt, VSize>, Size, PointerType<UInt, AS>]>;
+ def : Builtin<name, [Void, VectorType<Long, VSize>, Size, PointerType<Long, AS>]>;
+ def : Builtin<name, [Void, VectorType<ULong, VSize>, Size, PointerType<ULong, AS>]>;
+ def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Float, AS>]>;
+ def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Double, AS>]>;
+ def : Builtin<name, [Void, VectorType<Half, VSize>, Size, PointerType<Half, AS>]>;
}
}
}
}
}
-// Variants for OpenCL versions above 2.0, using pointers to the generic
-// address space.
-let MinVersion = CL20 in {
- foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vload" # VSize] in {
- def : Builtin<name, [VectorType<Char, VSize>, Size, PointerType<ConstType<Char>, GenericAS>]>;
- def : Builtin<name, [VectorType<UChar, VSize>, Size, PointerType<ConstType<UChar>, GenericAS>]>;
- def : Builtin<name, [VectorType<Short, VSize>, Size, PointerType<ConstType<Short>, GenericAS>]>;
- def : Builtin<name, [VectorType<UShort, VSize>, Size, PointerType<ConstType<UShort>, GenericAS>]>;
- def : Builtin<name, [VectorType<Int, VSize>, Size, PointerType<ConstType<Int>, GenericAS>]>;
- def : Builtin<name, [VectorType<UInt, VSize>, Size, PointerType<ConstType<UInt>, GenericAS>]>;
- def : Builtin<name, [VectorType<Long, VSize>, Size, PointerType<ConstType<Long>, GenericAS>]>;
- def : Builtin<name, [VectorType<ULong, VSize>, Size, PointerType<ConstType<ULong>, GenericAS>]>;
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Float>, GenericAS>]>;
- def : Builtin<name, [VectorType<Double, VSize>, Size, PointerType<ConstType<Double>, GenericAS>]>;
- def : Builtin<name, [VectorType<Half, VSize>, Size, PointerType<ConstType<Half>, GenericAS>]>;
- }
- foreach name = ["vstore" # VSize] in {
- def : Builtin<name, [Void, VectorType<Char, VSize>, Size, PointerType<Char, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<UChar, VSize>, Size, PointerType<UChar, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Short, VSize>, Size, PointerType<Short, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<UShort, VSize>, Size, PointerType<UShort, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Int, VSize>, Size, PointerType<Int, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<UInt, VSize>, Size, PointerType<UInt, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Long, VSize>, Size, PointerType<Long, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<ULong, VSize>, Size, PointerType<ULong, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Float, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Double, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Half, VSize>, Size, PointerType<Half, GenericAS>]>;
- }
- foreach name = ["vloada_half" # VSize] in {
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Half>, GenericAS>]>;
- }
- foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in {
- foreach name = ["vstorea_half" # VSize # rnd] in {
- def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Half, GenericAS>]>;
- def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Half, GenericAS>]>;
- }
- }
- }
-}
-// Variants using pointers to the constant address space.
-foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vload" # VSize] in {
- def : Builtin<name, [VectorType<Char, VSize>, Size, PointerType<ConstType<Char>, ConstantAS>]>;
- def : Builtin<name, [VectorType<UChar, VSize>, Size, PointerType<ConstType<UChar>, ConstantAS>]>;
- def : Builtin<name, [VectorType<Short, VSize>, Size, PointerType<ConstType<Short>, ConstantAS>]>;
- def : Builtin<name, [VectorType<UShort, VSize>, Size, PointerType<ConstType<UShort>, ConstantAS>]>;
- def : Builtin<name, [VectorType<Int, VSize>, Size, PointerType<ConstType<Int>, ConstantAS>]>;
- def : Builtin<name, [VectorType<UInt, VSize>, Size, PointerType<ConstType<UInt>, ConstantAS>]>;
- def : Builtin<name, [VectorType<Long, VSize>, Size, PointerType<ConstType<Long>, ConstantAS>]>;
- def : Builtin<name, [VectorType<ULong, VSize>, Size, PointerType<ConstType<ULong>, ConstantAS>]>;
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Float>, ConstantAS>]>;
- def : Builtin<name, [VectorType<Double, VSize>, Size, PointerType<ConstType<Double>, ConstantAS>]>;
- def : Builtin<name, [VectorType<Half, VSize>, Size, PointerType<ConstType<Half>, ConstantAS>]>;
- }
- foreach name = ["vloada_half" # VSize] in {
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Half>, ConstantAS>]>;
- }
-}
+
let MaxVersion = CL20 in {
- foreach AS = [GlobalAS, LocalAS, PrivateAS] in {
- def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
- def : Builtin<"vloada_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
- foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vload_half" # VSize] in {
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Half>, AS>]>;
- }
- }
- foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in {
- foreach name = ["vstore_half" # rnd, "vstorea_half" # rnd] in {
- def : Builtin<name, [Void, Float, Size, PointerType<Half, AS>]>;
- def : Builtin<name, [Void, Double, Size, PointerType<Half, AS>]>;
- }
- foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vstore_half" # VSize # rnd] in {
- def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Half, AS>]>;
- def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Half, AS>]>;
- }
- }
- }
- }
+ defm : VloadVstore<[GlobalAS, LocalAS, PrivateAS], 1>;
}
let MinVersion = CL20 in {
- foreach AS = [GenericAS] in {
+ defm : VloadVstore<[GenericAS], 1>;
+}
+// vload with constant address space is available regardless of version.
+defm : VloadVstore<[ConstantAS], 0>;
+
+multiclass VloadVstoreHalf<list<AddressSpace> addrspaces, bit defStores> {
+ foreach AS = addrspaces in {
def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
- def : Builtin<"vloada_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vload_half" # VSize] in {
+ foreach name = ["vload_half" # VSize, "vloada_half" # VSize] in {
def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Half>, AS>]>;
}
}
- foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in {
- foreach name = ["vstore_half" # rnd, "vstorea_half" # rnd] in {
- def : Builtin<name, [Void, Float, Size, PointerType<Half, AS>]>;
- def : Builtin<name, [Void, Double, Size, PointerType<Half, AS>]>;
- }
- foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vstore_half" # VSize # rnd] in {
- def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Half, AS>]>;
- def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Half, AS>]>;
+ if defStores then {
+ foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in {
+ foreach name = ["vstore_half" # rnd] in {
+ def : Builtin<name, [Void, Float, Size, PointerType<Half, AS>]>;
+ def : Builtin<name, [Void, Double, Size, PointerType<Half, AS>]>;
+ }
+ foreach VSize = [2, 3, 4, 8, 16] in {
+ foreach name = ["vstore_half" # VSize # rnd, "vstorea_half" # VSize # rnd] in {
+ def : Builtin<name, [Void, VectorType<Float, VSize>, Size, PointerType<Half, AS>]>;
+ def : Builtin<name, [Void, VectorType<Double, VSize>, Size, PointerType<Half, AS>]>;
+ }
}
}
}
}
}
-foreach AS = [ConstantAS] in {
- def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
- def : Builtin<"vloada_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
- foreach VSize = [2, 3, 4, 8, 16] in {
- foreach name = ["vload_half" # VSize] in {
- def : Builtin<name, [VectorType<Float, VSize>, Size, PointerType<ConstType<Half>, AS>]>;
- }
- }
+let MaxVersion = CL20 in {
+ defm : VloadVstoreHalf<[GlobalAS, LocalAS, PrivateAS], 1>;
}
+let MinVersion = CL20 in {
+ defm : VloadVstoreHalf<[GenericAS], 1>;
+}
+// vload with constant address space is available regardless of version.
+defm : VloadVstoreHalf<[ConstantAS], 0>;
// OpenCL v3.0 s6.15.8 - Synchronization Functions.
def : Builtin<"barrier", [Void, MemFenceFlags], Attr.Convergent>;
@@ -1177,6 +1114,218 @@ let MinVersion = CL20 in {
[Bool, PointerType<VolatileType<AtomicFlag>, GenericAS>, MemoryOrder, MemoryScope]>;
}
+// The functionality added by cl_ext_float_atomics extension
+let MinVersion = CL20 in {
+ let Extension = FuncExtFloatAtomicsFp16GlobalLoadStore in {
+ def : Builtin<"atomic_store",
+ [Void, PointerType<VolatileType<AtomicHalf>, GlobalAS>, AtomicHalf]>;
+ def : Builtin<"atomic_store_explicit",
+ [Void, PointerType<VolatileType<AtomicHalf>, GlobalAS>, AtomicHalf, MemoryOrder]>;
+ def : Builtin<"atomic_store_explicit",
+ [Void, PointerType<VolatileType<AtomicHalf>, GlobalAS>, AtomicHalf, MemoryOrder, MemoryScope]>;
+ def : Builtin<"atomic_load",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>]>;
+ def : Builtin<"atomic_load_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, MemoryOrder]>;
+ def : Builtin<"atomic_load_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, MemoryOrder, MemoryScope]>;
+ def : Builtin<"atomic_exchange",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, Half]>;
+ def : Builtin<"atomic_exchange_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_exchange_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp16LocalLoadStore in {
+ def : Builtin<"atomic_store",
+ [Void, PointerType<VolatileType<AtomicHalf>, LocalAS>, AtomicHalf]>;
+ def : Builtin<"atomic_store_explicit",
+ [Void, PointerType<VolatileType<AtomicHalf>, LocalAS>, AtomicHalf, MemoryOrder]>;
+ def : Builtin<"atomic_store_explicit",
+ [Void, PointerType<VolatileType<AtomicHalf>, LocalAS>, AtomicHalf, MemoryOrder, MemoryScope]>;
+ def : Builtin<"atomic_load",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>]>;
+ def : Builtin<"atomic_load_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, MemoryOrder]>;
+ def : Builtin<"atomic_load_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, MemoryOrder, MemoryScope]>;
+ def : Builtin<"atomic_exchange",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, Half]>;
+ def : Builtin<"atomic_exchange_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_exchange_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp16GenericLoadStore in {
+ def : Builtin<"atomic_store",
+ [Void, PointerType<VolatileType<AtomicHalf>, GenericAS>, AtomicHalf]>;
+ def : Builtin<"atomic_store_explicit",
+ [Void, PointerType<VolatileType<AtomicHalf>, GenericAS>, AtomicHalf, MemoryOrder]>;
+ def : Builtin<"atomic_store_explicit",
+ [Void, PointerType<VolatileType<AtomicHalf>, GenericAS>, AtomicHalf, MemoryOrder, MemoryScope]>;
+ def : Builtin<"atomic_load",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>]>;
+ def : Builtin<"atomic_load_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, MemoryOrder]>;
+ def : Builtin<"atomic_load_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, MemoryOrder, MemoryScope]>;
+ def : Builtin<"atomic_exchange",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, Half]>;
+ def : Builtin<"atomic_exchange_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_exchange_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ foreach ModOp = ["add", "sub"] in {
+ let Extension = FuncExtFloatAtomicsFp16GlobalAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Half, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Half]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp32GlobalAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Float, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Float]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Float, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Float, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp64GlobalAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Double, PointerType<VolatileType<AtomicDouble>, GlobalAS>, Double]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GlobalAS>, Double, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GlobalAS>, Double, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp16LocalAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Half, PointerType<VolatileType<AtomicFloat>, LocalAS>, Half]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicFloat>, LocalAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicFloat>, LocalAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp32LocalAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Float, PointerType<VolatileType<AtomicFloat>, LocalAS>, Float]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, LocalAS>, Float, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, LocalAS>, Float, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp64LocalAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Double, PointerType<VolatileType<AtomicDouble>, LocalAS>, Double]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, LocalAS>, Double, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, LocalAS>, Double, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp16GenericAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Half, PointerType<VolatileType<AtomicFloat>, GenericAS>, Half]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicFloat>, GenericAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicFloat>, GenericAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp32GenericAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Float, PointerType<VolatileType<AtomicFloat>, GenericAS>, Float]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GenericAS>, Float, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GenericAS>, Float, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp64GenericAdd in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Double, PointerType<VolatileType<AtomicDouble>, GenericAS>, Double]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GenericAS>, Double, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GenericAS>, Double, MemoryOrder, MemoryScope]>;
+ }
+ }
+ foreach ModOp = ["min", "max"] in {
+ let Extension = FuncExtFloatAtomicsFp16GlobalMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, Half]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GlobalAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp32GlobalMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Float, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Float]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Float, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GlobalAS>, Float, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp64GlobalMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Double, PointerType<VolatileType<AtomicDouble>, GlobalAS>, Double]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GlobalAS>, Double, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GlobalAS>, Double, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp16LocalMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, Half]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, LocalAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp32LocalMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Float, PointerType<VolatileType<AtomicFloat>, LocalAS>, Float]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, LocalAS>, Float, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, LocalAS>, Float, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp64LocalMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Double, PointerType<VolatileType<AtomicDouble>, LocalAS>, Double]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, LocalAS>, Double, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, LocalAS>, Double, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp16GenericMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, Half]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, Half, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Half, PointerType<VolatileType<AtomicHalf>, GenericAS>, Half, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp32GenericMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Float, PointerType<VolatileType<AtomicFloat>, GenericAS>, Float]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GenericAS>, Float, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Float, PointerType<VolatileType<AtomicFloat>, GenericAS>, Float, MemoryOrder, MemoryScope]>;
+ }
+ let Extension = FuncExtFloatAtomicsFp64GenericMinMax in {
+ def : Builtin<"atomic_fetch_" # ModOp,
+ [Double, PointerType<VolatileType<AtomicDouble>, GenericAS>, Double]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GenericAS>, Double, MemoryOrder]>;
+ def : Builtin<"atomic_fetch_" # ModOp # "_explicit",
+ [Double, PointerType<VolatileType<AtomicDouble>, GenericAS>, Double, MemoryOrder, MemoryScope]>;
+ }
+ }
+}
+
//--------------------------------------------------------------------
// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions
// --- Table 19 ---
@@ -1370,7 +1519,7 @@ foreach aQual = ["WO", "RW"] in {
//--------------------------------------------------------------------
// OpenCL v2.0 s6.13.15 - Work-group Functions
// --- Table 26 ---
-let MinVersion = CL20 in {
+let Extension = FuncExtOpenCLCWGCollectiveFunctions in {
foreach name = ["work_group_all", "work_group_any"] in {
def : Builtin<name, [Int, Int], Attr.Convergent>;
}
@@ -1395,7 +1544,9 @@ let MinVersion = CL20 in {
// --- Table 28 ---
// Builtins taking pipe arguments are defined in Builtins.def
-def : Builtin<"is_valid_reserve_id", [Bool, ReserveId]>;
+let Extension = FuncExtOpenCLCPipes in {
+ def : Builtin<"is_valid_reserve_id", [Bool, ReserveId]>;
+}
// --- Table 29 ---
// Defined in Builtins.def
diff --git a/clang/lib/Sema/ParsedAttr.cpp b/clang/lib/Sema/ParsedAttr.cpp
index ed03b0c7f688..045847d0ce0f 100644
--- a/clang/lib/Sema/ParsedAttr.cpp
+++ b/clang/lib/Sema/ParsedAttr.cpp
@@ -145,6 +145,10 @@ const ParsedAttrInfo &ParsedAttrInfo::get(const AttributeCommonInfo &A) {
return DefaultParsedAttrInfo;
}
+ArrayRef<const ParsedAttrInfo *> ParsedAttrInfo::getAllBuiltin() {
+ return llvm::makeArrayRef(AttrInfoMap);
+}
+
unsigned ParsedAttr::getMinArgs() const { return getInfo().NumArgs; }
unsigned ParsedAttr::getMaxArgs() const {
@@ -180,7 +184,10 @@ void ParsedAttr::getMatchRules(
}
bool ParsedAttr::diagnoseLangOpts(Sema &S) const {
- return getInfo().diagLangOpts(S, *this);
+ if (getInfo().acceptsLangOpts(S.getLangOpts()))
+ return true;
+ S.Diag(getLoc(), diag::warn_attribute_ignored) << *this;
+ return false;
}
bool ParsedAttr::isTargetSpecificAttr() const {
diff --git a/clang/lib/Sema/ScopeInfo.cpp b/clang/lib/Sema/ScopeInfo.cpp
index 4857346018ae..cc7de16de2fd 100644
--- a/clang/lib/Sema/ScopeInfo.cpp
+++ b/clang/lib/Sema/ScopeInfo.cpp
@@ -37,7 +37,7 @@ void FunctionScopeInfo::Clear() {
ObjCIsSecondaryInit = false;
ObjCWarnForNoInitDelegation = false;
FirstReturnLoc = SourceLocation();
- FirstCXXTryLoc = SourceLocation();
+ FirstCXXOrObjCTryLoc = SourceLocation();
FirstSEHTryLoc = SourceLocation();
// Coroutine state
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 5d3de06e9576..a2b8f475aa8c 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -169,7 +169,7 @@ public:
} // end namespace clang
const unsigned Sema::MaxAlignmentExponent;
-const unsigned Sema::MaximumAlignment;
+const uint64_t Sema::MaximumAlignment;
Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter)
@@ -324,10 +324,11 @@ void Sema::Initialize() {
Context.getTargetInfo().getSupportedOpenCLOpts(), getLangOpts());
addImplicitTypedef("sampler_t", Context.OCLSamplerTy);
addImplicitTypedef("event_t", Context.OCLEventTy);
- if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) {
+ if (getLangOpts().getOpenCLCompatibleVersion() >= 200) {
addImplicitTypedef("clk_event_t", Context.OCLClkEventTy);
addImplicitTypedef("queue_t", Context.OCLQueueTy);
- addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy);
+ if (getLangOpts().OpenCLPipes)
+ addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy);
addImplicitTypedef("atomic_int", Context.getAtomicType(Context.IntTy));
addImplicitTypedef("atomic_uint",
Context.getAtomicType(Context.UnsignedIntTy));
@@ -366,6 +367,11 @@ void Sema::Initialize() {
AddPointerSizeDependentTypes();
}
+ if (getOpenCLOptions().isSupported("cl_khr_fp16", getLangOpts())) {
+ auto AtomicHalfT = Context.getAtomicType(Context.HalfTy);
+ addImplicitTypedef("atomic_half", AtomicHalfT);
+ }
+
std::vector<QualType> Atomic64BitTypes;
if (getOpenCLOptions().isSupported("cl_khr_int64_base_atomics",
getLangOpts()) &&
@@ -402,13 +408,10 @@ void Sema::Initialize() {
#include "clang/Basic/AArch64SVEACLETypes.def"
}
- if (Context.getTargetInfo().getTriple().isPPC64() &&
- Context.getTargetInfo().hasFeature("paired-vector-memops")) {
- if (Context.getTargetInfo().hasFeature("mma")) {
+ if (Context.getTargetInfo().getTriple().isPPC64()) {
#define PPC_VECTOR_MMA_TYPE(Name, Id, Size) \
addImplicitTypedef(#Name, Context.Id##Ty);
#include "clang/Basic/PPCTypes.def"
- }
#define PPC_VECTOR_VSX_TYPE(Name, Id, Size) \
addImplicitTypedef(#Name, Context.Id##Ty);
#include "clang/Basic/PPCTypes.def"
@@ -1429,7 +1432,7 @@ NamedDecl *Sema::getCurFunctionOrMethodDecl() {
LangAS Sema::getDefaultCXXMethodAddrSpace() const {
if (getLangOpts().OpenCL)
- return LangAS::opencl_generic;
+ return getASTContext().getDefaultOpenCLPointeeAddrSpace();
return LangAS::Default;
}
@@ -1851,8 +1854,8 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID,
return DB;
}
-void Sema::checkDeviceDecl(ValueDecl *D, SourceLocation Loc) {
- if (isUnevaluatedContext())
+void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
+ if (isUnevaluatedContext() || Ty.isNull())
return;
Decl *C = cast<Decl>(getCurLexicalContext());
@@ -1871,46 +1874,114 @@ void Sema::checkDeviceDecl(ValueDecl *D, SourceLocation Loc) {
// Try to associate errors with the lexical context, if that is a function, or
// the value declaration otherwise.
- FunctionDecl *FD =
- isa<FunctionDecl>(C) ? cast<FunctionDecl>(C) : dyn_cast<FunctionDecl>(D);
- auto CheckType = [&](QualType Ty) {
+ FunctionDecl *FD = isa<FunctionDecl>(C) ? cast<FunctionDecl>(C)
+ : dyn_cast_or_null<FunctionDecl>(D);
+
+ auto CheckDeviceType = [&](QualType Ty) {
if (Ty->isDependentType())
return;
if (Ty->isExtIntType()) {
if (!Context.getTargetInfo().hasExtIntType()) {
- targetDiag(Loc, diag::err_device_unsupported_type, FD)
- << D << false /*show bit size*/ << 0 /*bitsize*/
+ PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type);
+ if (D)
+ PD << D;
+ else
+ PD << "expression";
+ targetDiag(Loc, PD, FD)
+ << false /*show bit size*/ << 0 /*bitsize*/ << false /*return*/
<< Ty << Context.getTargetInfo().getTriple().str();
}
return;
}
+ // Check if we are dealing with two 'long double' but with different
+ // semantics.
+ bool LongDoubleMismatched = false;
+ if (Ty->isRealFloatingType() && Context.getTypeSize(Ty) == 128) {
+ const llvm::fltSemantics &Sem = Context.getFloatTypeSemantics(Ty);
+ if ((&Sem != &llvm::APFloat::PPCDoubleDouble() &&
+ !Context.getTargetInfo().hasFloat128Type()) ||
+ (&Sem == &llvm::APFloat::PPCDoubleDouble() &&
+ !Context.getTargetInfo().hasIbm128Type()))
+ LongDoubleMismatched = true;
+ }
+
if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type()) ||
- ((Ty->isFloat128Type() ||
- (Ty->isRealFloatingType() && Context.getTypeSize(Ty) == 128)) &&
- !Context.getTargetInfo().hasFloat128Type()) ||
+ (Ty->isFloat128Type() && !Context.getTargetInfo().hasFloat128Type()) ||
+ (Ty->isIbm128Type() && !Context.getTargetInfo().hasIbm128Type()) ||
(Ty->isIntegerType() && Context.getTypeSize(Ty) == 128 &&
- !Context.getTargetInfo().hasInt128Type())) {
- if (targetDiag(Loc, diag::err_device_unsupported_type, FD)
- << D << true /*show bit size*/
+ !Context.getTargetInfo().hasInt128Type()) ||
+ LongDoubleMismatched) {
+ PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type);
+ if (D)
+ PD << D;
+ else
+ PD << "expression";
+
+ if (targetDiag(Loc, PD, FD)
+ << true /*show bit size*/
<< static_cast<unsigned>(Context.getTypeSize(Ty)) << Ty
- << Context.getTargetInfo().getTriple().str())
- D->setInvalidDecl();
- targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
+ << false /*return*/ << Context.getTargetInfo().getTriple().str()) {
+ if (D)
+ D->setInvalidDecl();
+ }
+ if (D)
+ targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
}
};
- QualType Ty = D->getType();
- CheckType(Ty);
+ auto CheckType = [&](QualType Ty, bool IsRetTy = false) {
+ if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice))
+ CheckDeviceType(Ty);
+
+ QualType UnqualTy = Ty.getCanonicalType().getUnqualifiedType();
+ const TargetInfo &TI = Context.getTargetInfo();
+ if (!TI.hasLongDoubleType() && UnqualTy == Context.LongDoubleTy) {
+ PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type);
+ if (D)
+ PD << D;
+ else
+ PD << "expression";
+
+ if (Diag(Loc, PD, FD)
+ << false /*show bit size*/ << 0 << Ty << false /*return*/
+ << Context.getTargetInfo().getTriple().str()) {
+ if (D)
+ D->setInvalidDecl();
+ }
+ if (D)
+ targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
+ }
+
+ bool IsDouble = UnqualTy == Context.DoubleTy;
+ bool IsFloat = UnqualTy == Context.FloatTy;
+ if (IsRetTy && !TI.hasFPReturn() && (IsDouble || IsFloat)) {
+ PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type);
+ if (D)
+ PD << D;
+ else
+ PD << "expression";
+ if (Diag(Loc, PD, FD)
+ << false /*show bit size*/ << 0 << Ty << true /*return*/
+ << Context.getTargetInfo().getTriple().str()) {
+ if (D)
+ D->setInvalidDecl();
+ }
+ if (D)
+ targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
+ }
+ };
+
+ CheckType(Ty);
if (const auto *FPTy = dyn_cast<FunctionProtoType>(Ty)) {
for (const auto &ParamTy : FPTy->param_types())
CheckType(ParamTy);
- CheckType(FPTy->getReturnType());
+ CheckType(FPTy->getReturnType(), /*IsRetTy=*/true);
}
if (const auto *FNPTy = dyn_cast<FunctionNoProtoType>(Ty))
- CheckType(FNPTy->getReturnType());
+ CheckType(FNPTy->getReturnType(), /*IsRetTy=*/true);
}
/// Looks through the macro-expansion chain for the given
@@ -2009,7 +2080,7 @@ static void checkEscapingByref(VarDecl *VD, Sema &S) {
Expr *VarRef =
new (S.Context) DeclRefExpr(S.Context, VD, false, T, VK_LValue, Loc);
ExprResult Result;
- auto IE = InitializedEntity::InitializeBlock(Loc, T, false);
+ auto IE = InitializedEntity::InitializeBlock(Loc, T);
if (S.getLangOpts().CPlusPlus2b) {
auto *E = ImplicitCastExpr::Create(S.Context, T, CK_NoOp, VarRef, nullptr,
VK_XValue, FPOptionsOverride());
diff --git a/clang/lib/Sema/SemaAccess.cpp b/clang/lib/Sema/SemaAccess.cpp
index 580305c1110b..3f7b387ec925 100644
--- a/clang/lib/Sema/SemaAccess.cpp
+++ b/clang/lib/Sema/SemaAccess.cpp
@@ -140,7 +140,7 @@ struct EffectiveContext {
bool includesClass(const CXXRecordDecl *R) const {
R = R->getCanonicalDecl();
- return llvm::find(Records, R) != Records.end();
+ return llvm::is_contained(Records, R);
}
/// Retrieves the innermost "useful" context. Can be null if we're
diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp
index fe8f02f02368..100f8e36a9b8 100644
--- a/clang/lib/Sema/SemaAttr.cpp
+++ b/clang/lib/Sema/SemaAttr.cpp
@@ -340,7 +340,7 @@ void Sema::ActOnPragmaPack(SourceLocation PragmaLoc, PragmaMsStackAction Action,
// pack(0) is like pack(), which just works out since that is what
// we use 0 for in PackAttr.
- if (Alignment->isTypeDependent() || Alignment->isValueDependent() || !Val ||
+ if (Alignment->isTypeDependent() || !Val ||
!(*Val == 0 || Val->isPowerOf2()) || Val->getZExtValue() > 16) {
Diag(PragmaLoc, diag::warn_pragma_pack_invalid_alignment);
return; // Ignore
@@ -475,8 +475,9 @@ void Sema::ActOnPragmaFloatControl(SourceLocation Loc,
PragmaFloatControlKind Value) {
FPOptionsOverride NewFPFeatures = CurFPFeatureOverrides();
if ((Action == PSK_Push_Set || Action == PSK_Push || Action == PSK_Pop) &&
- !(CurContext->isTranslationUnit()) && !CurContext->isNamespace()) {
- // Push and pop can only occur at file or namespace scope.
+ !CurContext->getRedeclContext()->isFileContext()) {
+ // Push and pop can only occur at file or namespace scope, or within a
+ // language linkage declaration.
Diag(Loc, diag::err_pragma_fc_pp_scope);
return;
}
diff --git a/clang/lib/Sema/SemaAvailability.cpp b/clang/lib/Sema/SemaAvailability.cpp
index bb704b1066cf..dc15c87cb717 100644
--- a/clang/lib/Sema/SemaAvailability.cpp
+++ b/clang/lib/Sema/SemaAvailability.cpp
@@ -57,7 +57,7 @@ static const AvailabilityAttr *getAttrForPlatform(ASTContext &Context,
/// \param D The declaration to check.
/// \param Message If non-null, this will be populated with the message from
/// the availability attribute that is selected.
-/// \param ClassReceiver If we're checking the the method of a class message
+/// \param ClassReceiver If we're checking the method of a class message
/// send, the class. Otherwise nullptr.
static std::pair<AvailabilityResult, const NamedDecl *>
ShouldDiagnoseAvailabilityOfDecl(Sema &S, const NamedDecl *D,
@@ -268,7 +268,7 @@ tryParseObjCMethodName(StringRef Name, SmallVectorImpl<StringRef> &SlotNames,
for (StringRef S : SlotNames) {
if (S.empty())
continue;
- if (!isValidIdentifier(S, AllowDollar))
+ if (!isValidAsciiIdentifier(S, AllowDollar))
return None;
}
return NumParams;
@@ -630,8 +630,7 @@ public:
const CompoundStmt *Scope) {
LastDeclUSEFinder Visitor;
Visitor.D = D;
- for (auto I = Scope->body_rbegin(), E = Scope->body_rend(); I != E; ++I) {
- const Stmt *S = *I;
+ for (const Stmt *S : llvm::reverse(Scope->body())) {
if (!Visitor.TraverseStmt(const_cast<Stmt *>(S)))
return S;
}
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 75364c10c154..840b3daae63c 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -878,8 +878,13 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee,
diag::err_capture_bad_target, Callee, *this)
<< Capture.getVariable();
} else if (Capture.isThisCapture()) {
+ // Capture of this pointer is allowed since this pointer may be pointing to
+ // managed memory which is accessible on both device and host sides. It only
+ // results in invalid memory access if this pointer points to memory not
+ // accessible on device side.
SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
- diag::err_capture_bad_target_this_ptr, Callee, *this);
+ diag::warn_maybe_capture_bad_target_this_ptr, Callee,
+ *this);
}
return;
}
diff --git a/clang/lib/Sema/SemaCXXScopeSpec.cpp b/clang/lib/Sema/SemaCXXScopeSpec.cpp
index 1c8f6329bd67..c4826b5a6e8f 100644
--- a/clang/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/clang/lib/Sema/SemaCXXScopeSpec.cpp
@@ -442,7 +442,7 @@ bool Sema::isNonTypeNestedNameSpecifier(Scope *S, CXXScopeSpec &SS,
namespace {
// Callback to only accept typo corrections that can be a valid C++ member
-// intializer: either a non-static field member or a base class.
+// initializer: either a non-static field member or a base class.
class NestedNameSpecifierValidatorCCC final
: public CorrectionCandidateCallback {
public:
@@ -865,7 +865,7 @@ bool Sema::ActOnCXXNestedNameSpecifierDecltype(CXXScopeSpec &SS,
assert(DS.getTypeSpecType() == DeclSpec::TST_decltype);
- QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
+ QualType T = BuildDecltypeType(DS.getRepAsExpr());
if (T.isNull())
return true;
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index cac43075f860..7ef1732496c2 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -1313,7 +1313,9 @@ static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr,
// lvalue-to-rvalue, array-to-pointer, function-to-pointer, and boolean
// conversions, subject to further restrictions.
// Also, C++ 5.2.9p1 forbids casting away constness, which makes reversal
- // of qualification conversions impossible.
+ // of qualification conversions impossible. (In C++20, adding an array bound
+ // would be the reverse of a qualification conversion, but adding permission
+ // to add an array bound in a static_cast is a wording oversight.)
// In the CStyle case, the earlier attempt to const_cast should have taken
// care of reverse qualification conversions.
@@ -2637,6 +2639,19 @@ bool Sema::ShouldSplatAltivecScalarInCast(const VectorType *VecTy) {
return false;
}
+bool Sema::CheckAltivecInitFromScalar(SourceRange R, QualType VecTy,
+ QualType SrcTy) {
+ bool SrcCompatGCC = this->getLangOpts().getAltivecSrcCompat() ==
+ LangOptions::AltivecSrcCompatKind::GCC;
+ if (this->getLangOpts().AltiVec && SrcCompatGCC) {
+ this->Diag(R.getBegin(),
+ diag::err_invalid_conversion_between_vector_and_integer)
+ << VecTy << SrcTy << R;
+ return true;
+ }
+ return false;
+}
+
void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
bool ListInitialization) {
assert(Self.getLangOpts().CPlusPlus);
@@ -2690,7 +2705,12 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
}
// AltiVec vector initialization with a single literal.
- if (const VectorType *vecTy = DestType->getAs<VectorType>())
+ if (const VectorType *vecTy = DestType->getAs<VectorType>()) {
+ if (Self.CheckAltivecInitFromScalar(OpRange, DestType,
+ SrcExpr.get()->getType())) {
+ SrcExpr = ExprError();
+ return;
+ }
if (Self.ShouldSplatAltivecScalarInCast(vecTy) &&
(SrcExpr.get()->getType()->isIntegerType() ||
SrcExpr.get()->getType()->isFloatingType())) {
@@ -2698,6 +2718,7 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
SrcExpr = Self.prepareVectorSplat(DestType, SrcExpr.get());
return;
}
+ }
// C++ [expr.cast]p5: The conversions performed by
// - a const_cast,
@@ -2976,6 +2997,10 @@ void CastOperation::CheckCStyleCast() {
}
if (const VectorType *DestVecTy = DestType->getAs<VectorType>()) {
+ if (Self.CheckAltivecInitFromScalar(OpRange, DestType, SrcType)) {
+ SrcExpr = ExprError();
+ return;
+ }
if (Self.ShouldSplatAltivecScalarInCast(DestVecTy) &&
(SrcType->isIntegerType() || SrcType->isFloatingType())) {
Kind = CK_VectorSplat;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index de75c10417e7..6ffd2096cbc5 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -408,6 +408,64 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) {
namespace {
+class ScanfDiagnosticFormatHandler
+ : public analyze_format_string::FormatStringHandler {
+ // Accepts the argument index (relative to the first destination index) of the
+ // argument whose size we want.
+ using ComputeSizeFunction =
+ llvm::function_ref<Optional<llvm::APSInt>(unsigned)>;
+
+ // Accepts the argument index (relative to the first destination index), the
+ // destination size, and the source size).
+ using DiagnoseFunction =
+ llvm::function_ref<void(unsigned, unsigned, unsigned)>;
+
+ ComputeSizeFunction ComputeSizeArgument;
+ DiagnoseFunction Diagnose;
+
+public:
+ ScanfDiagnosticFormatHandler(ComputeSizeFunction ComputeSizeArgument,
+ DiagnoseFunction Diagnose)
+ : ComputeSizeArgument(ComputeSizeArgument), Diagnose(Diagnose) {}
+
+ bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
+ const char *StartSpecifier,
+ unsigned specifierLen) override {
+ if (!FS.consumesDataArgument())
+ return true;
+
+ unsigned NulByte = 0;
+ switch ((FS.getConversionSpecifier().getKind())) {
+ default:
+ return true;
+ case analyze_format_string::ConversionSpecifier::sArg:
+ case analyze_format_string::ConversionSpecifier::ScanListArg:
+ NulByte = 1;
+ break;
+ case analyze_format_string::ConversionSpecifier::cArg:
+ break;
+ }
+
+ auto OptionalFW = FS.getFieldWidth();
+ if (OptionalFW.getHowSpecified() !=
+ analyze_format_string::OptionalAmount::HowSpecified::Constant)
+ return true;
+
+ unsigned SourceSize = OptionalFW.getConstantAmount() + NulByte;
+
+ auto DestSizeAPS = ComputeSizeArgument(FS.getArgIndex());
+ if (!DestSizeAPS)
+ return true;
+
+ unsigned DestSize = DestSizeAPS->getZExtValue();
+
+ if (DestSize < SourceSize)
+ Diagnose(FS.getArgIndex(), DestSize, SourceSize);
+
+ return true;
+ }
+};
+
class EstimateSizeFormatHandler
: public analyze_format_string::FormatStringHandler {
size_t Size;
@@ -588,14 +646,8 @@ private:
} // namespace
-/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
-/// __builtin_*_chk function, then use the object size argument specified in the
-/// source. Otherwise, infer the object size using __builtin_object_size.
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
CallExpr *TheCall) {
- // FIXME: There are some more useful checks we could be doing here:
- // - Evaluate strlen of strcpy arguments, use as object size.
-
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
isConstantEvaluated())
return;
@@ -607,13 +659,138 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
const TargetInfo &TI = getASTContext().getTargetInfo();
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
+ auto ComputeExplicitObjectSizeArgument =
+ [&](unsigned Index) -> Optional<llvm::APSInt> {
+ Expr::EvalResult Result;
+ Expr *SizeArg = TheCall->getArg(Index);
+ if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
+ return llvm::None;
+ return Result.Val.getInt();
+ };
+
+ auto ComputeSizeArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
+ // If the parameter has a pass_object_size attribute, then we should use its
+ // (potentially) more strict checking mode. Otherwise, conservatively assume
+ // type 0.
+ int BOSType = 0;
+ // This check can fail for variadic functions.
+ if (Index < FD->getNumParams()) {
+ if (const auto *POS =
+ FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
+ BOSType = POS->getType();
+ }
+
+ const Expr *ObjArg = TheCall->getArg(Index);
+ uint64_t Result;
+ if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
+ return llvm::None;
+
+ // Get the object size in the target's size_t width.
+ return llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
+ };
+
+ auto ComputeStrLenArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
+ Expr *ObjArg = TheCall->getArg(Index);
+ uint64_t Result;
+ if (!ObjArg->tryEvaluateStrLen(Result, getASTContext()))
+ return llvm::None;
+ // Add 1 for null byte.
+ return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth);
+ };
+
+ Optional<llvm::APSInt> SourceSize;
+ Optional<llvm::APSInt> DestinationSize;
unsigned DiagID = 0;
bool IsChkVariant = false;
- Optional<llvm::APSInt> UsedSize;
- unsigned SizeIndex, ObjectIndex;
+
+ auto GetFunctionName = [&]() {
+ StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
+ // Skim off the details of whichever builtin was called to produce a better
+ // diagnostic, as it's unlikely that the user wrote the __builtin
+ // explicitly.
+ if (IsChkVariant) {
+ FunctionName = FunctionName.drop_front(std::strlen("__builtin___"));
+ FunctionName = FunctionName.drop_back(std::strlen("_chk"));
+ } else if (FunctionName.startswith("__builtin_")) {
+ FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
+ }
+ return FunctionName;
+ };
+
switch (BuiltinID) {
default:
return;
+ case Builtin::BI__builtin_strcpy:
+ case Builtin::BIstrcpy: {
+ DiagID = diag::warn_fortify_strlen_overflow;
+ SourceSize = ComputeStrLenArgument(1);
+ DestinationSize = ComputeSizeArgument(0);
+ break;
+ }
+
+ case Builtin::BI__builtin___strcpy_chk: {
+ DiagID = diag::warn_fortify_strlen_overflow;
+ SourceSize = ComputeStrLenArgument(1);
+ DestinationSize = ComputeExplicitObjectSizeArgument(2);
+ IsChkVariant = true;
+ break;
+ }
+
+ case Builtin::BIscanf:
+ case Builtin::BIfscanf:
+ case Builtin::BIsscanf: {
+ unsigned FormatIndex = 1;
+ unsigned DataIndex = 2;
+ if (BuiltinID == Builtin::BIscanf) {
+ FormatIndex = 0;
+ DataIndex = 1;
+ }
+
+ const auto *FormatExpr =
+ TheCall->getArg(FormatIndex)->IgnoreParenImpCasts();
+
+ const auto *Format = dyn_cast<StringLiteral>(FormatExpr);
+ if (!Format)
+ return;
+
+ if (!Format->isAscii() && !Format->isUTF8())
+ return;
+
+ auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize,
+ unsigned SourceSize) {
+ DiagID = diag::warn_fortify_scanf_overflow;
+ unsigned Index = ArgIndex + DataIndex;
+ StringRef FunctionName = GetFunctionName();
+ DiagRuntimeBehavior(TheCall->getArg(Index)->getBeginLoc(), TheCall,
+ PDiag(DiagID) << FunctionName << (Index + 1)
+ << DestSize << SourceSize);
+ };
+
+ StringRef FormatStrRef = Format->getString();
+ auto ShiftedComputeSizeArgument = [&](unsigned Index) {
+ return ComputeSizeArgument(Index + DataIndex);
+ };
+ ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose);
+ const char *FormatBytes = FormatStrRef.data();
+ const ConstantArrayType *T =
+ Context.getAsConstantArrayType(Format->getType());
+ assert(T && "String literal not of constant array type!");
+ size_t TypeSize = T->getSize().getZExtValue();
+
+ // In case there's a null byte somewhere.
+ size_t StrLen =
+ std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0));
+
+ analyze_format_string::ParseScanfString(H, FormatBytes,
+ FormatBytes + StrLen, getLangOpts(),
+ Context.getTargetInfo());
+
+ // Unlike the other cases, in this one we have already issued the diagnostic
+ // here, so no need to continue (because unlike the other cases, here the
+ // diagnostic refers to the argument number).
+ return;
+ }
+
case Builtin::BIsprintf:
case Builtin::BI__builtin___sprintf_chk: {
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
@@ -639,14 +816,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
Context.getTargetInfo(), false)) {
DiagID = diag::warn_fortify_source_format_overflow;
- UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
- .extOrTrunc(SizeTypeWidth);
+ SourceSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
+ .extOrTrunc(SizeTypeWidth);
if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
+ DestinationSize = ComputeExplicitObjectSizeArgument(2);
IsChkVariant = true;
- ObjectIndex = 2;
} else {
- IsChkVariant = false;
- ObjectIndex = 0;
+ DestinationSize = ComputeSizeArgument(0);
}
break;
}
@@ -664,18 +840,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BI__builtin___memccpy_chk:
case Builtin::BI__builtin___mempcpy_chk: {
DiagID = diag::warn_builtin_chk_overflow;
+ SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2);
+ DestinationSize =
+ ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
IsChkVariant = true;
- SizeIndex = TheCall->getNumArgs() - 2;
- ObjectIndex = TheCall->getNumArgs() - 1;
break;
}
case Builtin::BI__builtin___snprintf_chk:
case Builtin::BI__builtin___vsnprintf_chk: {
DiagID = diag::warn_builtin_chk_overflow;
+ SourceSize = ComputeExplicitObjectSizeArgument(1);
+ DestinationSize = ComputeExplicitObjectSizeArgument(3);
IsChkVariant = true;
- SizeIndex = 1;
- ObjectIndex = 3;
break;
}
@@ -691,8 +868,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
// size larger than the destination buffer though; this is a runtime abort
// in _FORTIFY_SOURCE mode, and is quite suspicious otherwise.
DiagID = diag::warn_fortify_source_size_mismatch;
- SizeIndex = TheCall->getNumArgs() - 1;
- ObjectIndex = 0;
+ SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
+ DestinationSize = ComputeSizeArgument(0);
break;
}
@@ -705,8 +882,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BImempcpy:
case Builtin::BI__builtin_mempcpy: {
DiagID = diag::warn_fortify_source_overflow;
- SizeIndex = TheCall->getNumArgs() - 1;
- ObjectIndex = 0;
+ SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
+ DestinationSize = ComputeSizeArgument(0);
break;
}
case Builtin::BIsnprintf:
@@ -714,66 +891,25 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
case Builtin::BIvsnprintf:
case Builtin::BI__builtin_vsnprintf: {
DiagID = diag::warn_fortify_source_size_mismatch;
- SizeIndex = 1;
- ObjectIndex = 0;
+ SourceSize = ComputeExplicitObjectSizeArgument(1);
+ DestinationSize = ComputeSizeArgument(0);
break;
}
}
- llvm::APSInt ObjectSize;
- // For __builtin___*_chk, the object size is explicitly provided by the caller
- // (usually using __builtin_object_size). Use that value to check this call.
- if (IsChkVariant) {
- Expr::EvalResult Result;
- Expr *SizeArg = TheCall->getArg(ObjectIndex);
- if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
- return;
- ObjectSize = Result.Val.getInt();
-
- // Otherwise, try to evaluate an imaginary call to __builtin_object_size.
- } else {
- // If the parameter has a pass_object_size attribute, then we should use its
- // (potentially) more strict checking mode. Otherwise, conservatively assume
- // type 0.
- int BOSType = 0;
- if (const auto *POS =
- FD->getParamDecl(ObjectIndex)->getAttr<PassObjectSizeAttr>())
- BOSType = POS->getType();
-
- Expr *ObjArg = TheCall->getArg(ObjectIndex);
- uint64_t Result;
- if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
- return;
- // Get the object size in the target's size_t width.
- ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
- }
-
- // Evaluate the number of bytes of the object that this call will use.
- if (!UsedSize) {
- Expr::EvalResult Result;
- Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
- if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
- return;
- UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
- }
-
- if (UsedSize.getValue().ule(ObjectSize))
+ if (!SourceSize || !DestinationSize ||
+ SourceSize.getValue().ule(DestinationSize.getValue()))
return;
- StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
- // Skim off the details of whichever builtin was called to produce a better
- // diagnostic, as it's unlikley that the user wrote the __builtin explicitly.
- if (IsChkVariant) {
- FunctionName = FunctionName.drop_front(std::strlen("__builtin___"));
- FunctionName = FunctionName.drop_back(std::strlen("_chk"));
- } else if (FunctionName.startswith("__builtin_")) {
- FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
- }
+ StringRef FunctionName = GetFunctionName();
+ SmallString<16> DestinationStr;
+ SmallString<16> SourceStr;
+ DestinationSize->toString(DestinationStr, /*Radix=*/10);
+ SourceSize->toString(SourceStr, /*Radix=*/10);
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
PDiag(DiagID)
- << FunctionName << toString(ObjectSize, /*Radix=*/10)
- << toString(UsedSize.getValue(), /*Radix=*/10));
+ << FunctionName << DestinationStr << SourceStr);
}
static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,
@@ -1715,7 +1851,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
// value so we bail out.
if (SizeOp->isValueDependent())
break;
- if (!SizeOp->EvaluateKnownConstInt(Context).isNullValue()) {
+ if (!SizeOp->EvaluateKnownConstInt(Context).isZero()) {
CheckNonNullArgument(*this, TheCall->getArg(0), TheCall->getExprLoc());
CheckNonNullArgument(*this, TheCall->getArg(1), TheCall->getExprLoc());
}
@@ -1962,6 +2098,20 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break;
}
+ case Builtin::BI__builtin_elementwise_abs:
+ if (SemaBuiltinElementwiseMathOneArg(TheCall))
+ return ExprError();
+ break;
+ case Builtin::BI__builtin_elementwise_min:
+ case Builtin::BI__builtin_elementwise_max:
+ if (SemaBuiltinElementwiseMath(TheCall))
+ return ExprError();
+ break;
+ case Builtin::BI__builtin_reduce_max:
+ case Builtin::BI__builtin_reduce_min:
+ if (SemaBuiltinReduceMath(TheCall))
+ return ExprError();
+ break;
case Builtin::BI__builtin_matrix_transpose:
return SemaBuiltinMatrixTranspose(TheCall, TheCallResult);
@@ -2591,8 +2741,8 @@ static bool isValidBPFPreserveFieldInfoArg(Expr *Arg) {
// to BPF backend to check whether the access is a
// field access or not.
return (Arg->IgnoreParens()->getObjectKind() == OK_BitField ||
- dyn_cast<MemberExpr>(Arg->IgnoreParens()) ||
- dyn_cast<ArraySubscriptExpr>(Arg->IgnoreParens()));
+ isa<MemberExpr>(Arg->IgnoreParens()) ||
+ isa<ArraySubscriptExpr>(Arg->IgnoreParens()));
}
static bool isEltOfVectorTy(ASTContext &Context, CallExpr *Call, Sema &S,
@@ -2616,8 +2766,8 @@ static bool isValidBPFPreserveTypeInfoArg(Expr *Arg) {
// 1. __builtin_preserve_type_info(*(<type> *)0, flag);
// 2. <type> var;
// __builtin_preserve_type_info(var, flag);
- if (!dyn_cast<DeclRefExpr>(Arg->IgnoreParens()) &&
- !dyn_cast<UnaryOperator>(Arg->IgnoreParens()))
+ if (!isa<DeclRefExpr>(Arg->IgnoreParens()) &&
+ !isa<UnaryOperator>(Arg->IgnoreParens()))
return false;
// Typedef type.
@@ -2674,12 +2824,7 @@ static bool isValidBPFPreserveEnumValueArg(Expr *Arg) {
return false;
// The enum value must be supported.
- for (auto *EDI : ET->getDecl()->enumerators()) {
- if (EDI == Enumerator)
- return true;
- }
-
- return false;
+ return llvm::is_contained(ET->getDecl()->enumerators(), Enumerator);
}
bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
@@ -2974,8 +3119,8 @@ bool Sema::CheckHexagonBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall) {
unsigned M = 1 << A.Align;
Min *= M;
Max *= M;
- Error |= SemaBuiltinConstantArgRange(TheCall, A.OpNum, Min, Max) |
- SemaBuiltinConstantArgMultiple(TheCall, A.OpNum, M);
+ Error |= SemaBuiltinConstantArgRange(TheCall, A.OpNum, Min, Max);
+ Error |= SemaBuiltinConstantArgMultiple(TheCall, A.OpNum, M);
}
}
return Error;
@@ -3280,6 +3425,14 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
case PPC::BI__builtin_ppc_store8r:
case PPC::BI__builtin_ppc_insert_exp:
case PPC::BI__builtin_ppc_extract_sig:
+ case PPC::BI__builtin_ppc_addex:
+ case PPC::BI__builtin_darn:
+ case PPC::BI__builtin_darn_raw:
+ case PPC::BI__builtin_ppc_compare_and_swaplp:
+ case PPC::BI__builtin_ppc_fetch_and_addlp:
+ case PPC::BI__builtin_ppc_fetch_and_andlp:
+ case PPC::BI__builtin_ppc_fetch_and_orlp:
+ case PPC::BI__builtin_ppc_fetch_and_swaplp:
return true;
}
return false;
@@ -3351,6 +3504,18 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case PPC::BI__builtin_tabortdci:
return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) ||
SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
+ // According to GCC 'Basic PowerPC Built-in Functions Available on ISA 2.05',
+ // __builtin_(un)pack_longdouble are available only if long double uses IBM
+ // extended double representation.
+ case PPC::BI__builtin_unpack_longdouble:
+ if (SemaBuiltinConstantArgRange(TheCall, 1, 0, 1))
+ return true;
+ LLVM_FALLTHROUGH;
+ case PPC::BI__builtin_pack_longdouble:
+ if (&TI.getLongDoubleFormat() != &llvm::APFloat::PPCDoubleDouble())
+ return Diag(TheCall->getBeginLoc(), diag::err_ppc_builtin_requires_abi)
+ << "ibmlongdouble";
+ return false;
case PPC::BI__builtin_altivec_dst:
case PPC::BI__builtin_altivec_dstt:
case PPC::BI__builtin_altivec_dstst:
@@ -3410,8 +3575,7 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
// For __rlwnm, __rlwimi and __rldimi, the last parameter mask must
// be a constant that represents a contiguous bit field.
case PPC::BI__builtin_ppc_rlwnm:
- return SemaBuiltinConstantArg(TheCall, 1, Result) ||
- SemaValueIsRunOfOnes(TheCall, 2);
+ return SemaValueIsRunOfOnes(TheCall, 2);
case PPC::BI__builtin_ppc_rlwimi:
case PPC::BI__builtin_ppc_rldimi:
return SemaBuiltinConstantArg(TheCall, 2, Result) ||
@@ -3421,6 +3585,19 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case PPC::BI__builtin_ppc_insert_exp:
return SemaFeatureCheck(*this, TheCall, "power9-vector",
diag::err_ppc_builtin_only_on_arch, "9");
+ case PPC::BI__builtin_ppc_addex: {
+ if (SemaFeatureCheck(*this, TheCall, "isa-v30-instructions",
+ diag::err_ppc_builtin_only_on_arch, "9") ||
+ SemaBuiltinConstantArgRange(TheCall, 2, 0, 3))
+ return true;
+ // Output warning for reserved values 1 to 3.
+ int ArgValue =
+ TheCall->getArg(2)->getIntegerConstantExpr(Context)->getSExtValue();
+ if (ArgValue != 0)
+ Diag(TheCall->getBeginLoc(), diag::warn_argument_undefined_behaviour)
+ << ArgValue;
+ return false;
+ }
case PPC::BI__builtin_ppc_mtfsb0:
case PPC::BI__builtin_ppc_mtfsb1:
return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31);
@@ -3445,9 +3622,50 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
return SemaFeatureCheck(*this, TheCall, "isa-v207-instructions",
diag::err_ppc_builtin_only_on_arch, "8") ||
SemaBuiltinConstantArgRange(TheCall, 1, 1, 16);
-#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
- case PPC::BI__builtin_##Name: \
- return SemaBuiltinPPCMMACall(TheCall, Types);
+ case PPC::BI__builtin_altivec_vcntmbb:
+ case PPC::BI__builtin_altivec_vcntmbh:
+ case PPC::BI__builtin_altivec_vcntmbw:
+ case PPC::BI__builtin_altivec_vcntmbd:
+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
+ case PPC::BI__builtin_darn:
+ case PPC::BI__builtin_darn_raw:
+ case PPC::BI__builtin_darn_32:
+ return SemaFeatureCheck(*this, TheCall, "isa-v30-instructions",
+ diag::err_ppc_builtin_only_on_arch, "9");
+ case PPC::BI__builtin_vsx_xxgenpcvbm:
+ case PPC::BI__builtin_vsx_xxgenpcvhm:
+ case PPC::BI__builtin_vsx_xxgenpcvwm:
+ case PPC::BI__builtin_vsx_xxgenpcvdm:
+ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
+ case PPC::BI__builtin_ppc_compare_exp_uo:
+ case PPC::BI__builtin_ppc_compare_exp_lt:
+ case PPC::BI__builtin_ppc_compare_exp_gt:
+ case PPC::BI__builtin_ppc_compare_exp_eq:
+ return SemaFeatureCheck(*this, TheCall, "isa-v30-instructions",
+ diag::err_ppc_builtin_only_on_arch, "9") ||
+ SemaFeatureCheck(*this, TheCall, "vsx",
+ diag::err_ppc_builtin_requires_vsx);
+ case PPC::BI__builtin_ppc_test_data_class: {
+ // Check if the first argument of the __builtin_ppc_test_data_class call is
+ // valid. The argument must be either a 'float' or a 'double'.
+ QualType ArgType = TheCall->getArg(0)->getType();
+ if (ArgType != QualType(Context.FloatTy) &&
+ ArgType != QualType(Context.DoubleTy))
+ return Diag(TheCall->getBeginLoc(),
+ diag::err_ppc_invalid_test_data_class_type);
+ return SemaFeatureCheck(*this, TheCall, "isa-v30-instructions",
+ diag::err_ppc_builtin_only_on_arch, "9") ||
+ SemaFeatureCheck(*this, TheCall, "vsx",
+ diag::err_ppc_builtin_requires_vsx) ||
+ SemaBuiltinConstantArgRange(TheCall, 1, 0, 127);
+ }
+ case PPC::BI__builtin_ppc_load8r:
+ case PPC::BI__builtin_ppc_store8r:
+ return SemaFeatureCheck(*this, TheCall, "isa-v206-instructions",
+ diag::err_ppc_builtin_only_on_arch, "7");
+#define CUSTOM_BUILTIN(Name, Intr, Types, Acc) \
+ case PPC::BI__builtin_##Name: \
+ return SemaBuiltinPPCMMACall(TheCall, BuiltinID, Types);
#include "clang/Basic/BuiltinsPPC.def"
}
return SemaBuiltinConstantArgRange(TheCall, i, l, u);
@@ -3499,7 +3717,7 @@ bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
<< ArgExpr->getType();
auto Ord = ArgResult.Val.getInt().getZExtValue();
- // Check valididty of memory ordering as per C11 / C++11's memody model.
+ // Check validity of memory ordering as per C11 / C++11's memody model.
// Only fence needs check. Atomic dec/inc allow all memory orders.
if (!llvm::isValidAtomicOrderingCABI(Ord))
return Diag(ArgExpr->getBeginLoc(),
@@ -3586,138 +3804,12 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI,
return true;
switch (BuiltinID) {
- case RISCV::BI__builtin_rvv_vsetvli:
+ case RISCVVector::BI__builtin_rvv_vsetvli:
return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3) ||
CheckRISCVLMUL(TheCall, 2);
- case RISCV::BI__builtin_rvv_vsetvlimax:
+ case RISCVVector::BI__builtin_rvv_vsetvlimax:
return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) ||
CheckRISCVLMUL(TheCall, 1);
- case RISCV::BI__builtin_rvv_vget_v_i8m2_i8m1:
- case RISCV::BI__builtin_rvv_vget_v_i16m2_i16m1:
- case RISCV::BI__builtin_rvv_vget_v_i32m2_i32m1:
- case RISCV::BI__builtin_rvv_vget_v_i64m2_i64m1:
- case RISCV::BI__builtin_rvv_vget_v_f32m2_f32m1:
- case RISCV::BI__builtin_rvv_vget_v_f64m2_f64m1:
- case RISCV::BI__builtin_rvv_vget_v_u8m2_u8m1:
- case RISCV::BI__builtin_rvv_vget_v_u16m2_u16m1:
- case RISCV::BI__builtin_rvv_vget_v_u32m2_u32m1:
- case RISCV::BI__builtin_rvv_vget_v_u64m2_u64m1:
- case RISCV::BI__builtin_rvv_vget_v_i8m4_i8m2:
- case RISCV::BI__builtin_rvv_vget_v_i16m4_i16m2:
- case RISCV::BI__builtin_rvv_vget_v_i32m4_i32m2:
- case RISCV::BI__builtin_rvv_vget_v_i64m4_i64m2:
- case RISCV::BI__builtin_rvv_vget_v_f32m4_f32m2:
- case RISCV::BI__builtin_rvv_vget_v_f64m4_f64m2:
- case RISCV::BI__builtin_rvv_vget_v_u8m4_u8m2:
- case RISCV::BI__builtin_rvv_vget_v_u16m4_u16m2:
- case RISCV::BI__builtin_rvv_vget_v_u32m4_u32m2:
- case RISCV::BI__builtin_rvv_vget_v_u64m4_u64m2:
- case RISCV::BI__builtin_rvv_vget_v_i8m8_i8m4:
- case RISCV::BI__builtin_rvv_vget_v_i16m8_i16m4:
- case RISCV::BI__builtin_rvv_vget_v_i32m8_i32m4:
- case RISCV::BI__builtin_rvv_vget_v_i64m8_i64m4:
- case RISCV::BI__builtin_rvv_vget_v_f32m8_f32m4:
- case RISCV::BI__builtin_rvv_vget_v_f64m8_f64m4:
- case RISCV::BI__builtin_rvv_vget_v_u8m8_u8m4:
- case RISCV::BI__builtin_rvv_vget_v_u16m8_u16m4:
- case RISCV::BI__builtin_rvv_vget_v_u32m8_u32m4:
- case RISCV::BI__builtin_rvv_vget_v_u64m8_u64m4:
- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
- case RISCV::BI__builtin_rvv_vget_v_i8m4_i8m1:
- case RISCV::BI__builtin_rvv_vget_v_i16m4_i16m1:
- case RISCV::BI__builtin_rvv_vget_v_i32m4_i32m1:
- case RISCV::BI__builtin_rvv_vget_v_i64m4_i64m1:
- case RISCV::BI__builtin_rvv_vget_v_f32m4_f32m1:
- case RISCV::BI__builtin_rvv_vget_v_f64m4_f64m1:
- case RISCV::BI__builtin_rvv_vget_v_u8m4_u8m1:
- case RISCV::BI__builtin_rvv_vget_v_u16m4_u16m1:
- case RISCV::BI__builtin_rvv_vget_v_u32m4_u32m1:
- case RISCV::BI__builtin_rvv_vget_v_u64m4_u64m1:
- case RISCV::BI__builtin_rvv_vget_v_i8m8_i8m2:
- case RISCV::BI__builtin_rvv_vget_v_i16m8_i16m2:
- case RISCV::BI__builtin_rvv_vget_v_i32m8_i32m2:
- case RISCV::BI__builtin_rvv_vget_v_i64m8_i64m2:
- case RISCV::BI__builtin_rvv_vget_v_f32m8_f32m2:
- case RISCV::BI__builtin_rvv_vget_v_f64m8_f64m2:
- case RISCV::BI__builtin_rvv_vget_v_u8m8_u8m2:
- case RISCV::BI__builtin_rvv_vget_v_u16m8_u16m2:
- case RISCV::BI__builtin_rvv_vget_v_u32m8_u32m2:
- case RISCV::BI__builtin_rvv_vget_v_u64m8_u64m2:
- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
- case RISCV::BI__builtin_rvv_vget_v_i8m8_i8m1:
- case RISCV::BI__builtin_rvv_vget_v_i16m8_i16m1:
- case RISCV::BI__builtin_rvv_vget_v_i32m8_i32m1:
- case RISCV::BI__builtin_rvv_vget_v_i64m8_i64m1:
- case RISCV::BI__builtin_rvv_vget_v_f32m8_f32m1:
- case RISCV::BI__builtin_rvv_vget_v_f64m8_f64m1:
- case RISCV::BI__builtin_rvv_vget_v_u8m8_u8m1:
- case RISCV::BI__builtin_rvv_vget_v_u16m8_u16m1:
- case RISCV::BI__builtin_rvv_vget_v_u32m8_u32m1:
- case RISCV::BI__builtin_rvv_vget_v_u64m8_u64m1:
- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7);
- case RISCV::BI__builtin_rvv_vset_v_i8m1_i8m2:
- case RISCV::BI__builtin_rvv_vset_v_i16m1_i16m2:
- case RISCV::BI__builtin_rvv_vset_v_i32m1_i32m2:
- case RISCV::BI__builtin_rvv_vset_v_i64m1_i64m2:
- case RISCV::BI__builtin_rvv_vset_v_f32m1_f32m2:
- case RISCV::BI__builtin_rvv_vset_v_f64m1_f64m2:
- case RISCV::BI__builtin_rvv_vset_v_u8m1_u8m2:
- case RISCV::BI__builtin_rvv_vset_v_u16m1_u16m2:
- case RISCV::BI__builtin_rvv_vset_v_u32m1_u32m2:
- case RISCV::BI__builtin_rvv_vset_v_u64m1_u64m2:
- case RISCV::BI__builtin_rvv_vset_v_i8m2_i8m4:
- case RISCV::BI__builtin_rvv_vset_v_i16m2_i16m4:
- case RISCV::BI__builtin_rvv_vset_v_i32m2_i32m4:
- case RISCV::BI__builtin_rvv_vset_v_i64m2_i64m4:
- case RISCV::BI__builtin_rvv_vset_v_f32m2_f32m4:
- case RISCV::BI__builtin_rvv_vset_v_f64m2_f64m4:
- case RISCV::BI__builtin_rvv_vset_v_u8m2_u8m4:
- case RISCV::BI__builtin_rvv_vset_v_u16m2_u16m4:
- case RISCV::BI__builtin_rvv_vset_v_u32m2_u32m4:
- case RISCV::BI__builtin_rvv_vset_v_u64m2_u64m4:
- case RISCV::BI__builtin_rvv_vset_v_i8m4_i8m8:
- case RISCV::BI__builtin_rvv_vset_v_i16m4_i16m8:
- case RISCV::BI__builtin_rvv_vset_v_i32m4_i32m8:
- case RISCV::BI__builtin_rvv_vset_v_i64m4_i64m8:
- case RISCV::BI__builtin_rvv_vset_v_f32m4_f32m8:
- case RISCV::BI__builtin_rvv_vset_v_f64m4_f64m8:
- case RISCV::BI__builtin_rvv_vset_v_u8m4_u8m8:
- case RISCV::BI__builtin_rvv_vset_v_u16m4_u16m8:
- case RISCV::BI__builtin_rvv_vset_v_u32m4_u32m8:
- case RISCV::BI__builtin_rvv_vset_v_u64m4_u64m8:
- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
- case RISCV::BI__builtin_rvv_vset_v_i8m1_i8m4:
- case RISCV::BI__builtin_rvv_vset_v_i16m1_i16m4:
- case RISCV::BI__builtin_rvv_vset_v_i32m1_i32m4:
- case RISCV::BI__builtin_rvv_vset_v_i64m1_i64m4:
- case RISCV::BI__builtin_rvv_vset_v_f32m1_f32m4:
- case RISCV::BI__builtin_rvv_vset_v_f64m1_f64m4:
- case RISCV::BI__builtin_rvv_vset_v_u8m1_u8m4:
- case RISCV::BI__builtin_rvv_vset_v_u16m1_u16m4:
- case RISCV::BI__builtin_rvv_vset_v_u32m1_u32m4:
- case RISCV::BI__builtin_rvv_vset_v_u64m1_u64m4:
- case RISCV::BI__builtin_rvv_vset_v_i8m2_i8m8:
- case RISCV::BI__builtin_rvv_vset_v_i16m2_i16m8:
- case RISCV::BI__builtin_rvv_vset_v_i32m2_i32m8:
- case RISCV::BI__builtin_rvv_vset_v_i64m2_i64m8:
- case RISCV::BI__builtin_rvv_vset_v_f32m2_f32m8:
- case RISCV::BI__builtin_rvv_vset_v_f64m2_f64m8:
- case RISCV::BI__builtin_rvv_vset_v_u8m2_u8m8:
- case RISCV::BI__builtin_rvv_vset_v_u16m2_u16m8:
- case RISCV::BI__builtin_rvv_vset_v_u32m2_u32m8:
- case RISCV::BI__builtin_rvv_vset_v_u64m2_u64m8:
- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
- case RISCV::BI__builtin_rvv_vset_v_i8m1_i8m8:
- case RISCV::BI__builtin_rvv_vset_v_i16m1_i16m8:
- case RISCV::BI__builtin_rvv_vset_v_i32m1_i32m8:
- case RISCV::BI__builtin_rvv_vset_v_i64m1_i64m8:
- case RISCV::BI__builtin_rvv_vset_v_f32m1_f32m8:
- case RISCV::BI__builtin_rvv_vset_v_f64m1_f64m8:
- case RISCV::BI__builtin_rvv_vset_v_u8m1_u8m8:
- case RISCV::BI__builtin_rvv_vset_v_u16m1_u16m8:
- case RISCV::BI__builtin_rvv_vset_v_u32m1_u32m8:
- case RISCV::BI__builtin_rvv_vset_v_u64m1_u64m8:
- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7);
}
return false;
@@ -3850,14 +3942,22 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvttss2si64:
case X86::BI__builtin_ia32_vcvttss2usi32:
case X86::BI__builtin_ia32_vcvttss2usi64:
+ case X86::BI__builtin_ia32_vcvttsh2si32:
+ case X86::BI__builtin_ia32_vcvttsh2si64:
+ case X86::BI__builtin_ia32_vcvttsh2usi32:
+ case X86::BI__builtin_ia32_vcvttsh2usi64:
ArgNum = 1;
break;
case X86::BI__builtin_ia32_maxpd512:
case X86::BI__builtin_ia32_maxps512:
case X86::BI__builtin_ia32_minpd512:
case X86::BI__builtin_ia32_minps512:
+ case X86::BI__builtin_ia32_maxph512:
+ case X86::BI__builtin_ia32_minph512:
ArgNum = 2;
break;
+ case X86::BI__builtin_ia32_vcvtph2pd512_mask:
+ case X86::BI__builtin_ia32_vcvtph2psx512_mask:
case X86::BI__builtin_ia32_cvtps2pd512_mask:
case X86::BI__builtin_ia32_cvttpd2dq512_mask:
case X86::BI__builtin_ia32_cvttpd2qq512_mask:
@@ -3867,16 +3967,24 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvttps2qq512_mask:
case X86::BI__builtin_ia32_cvttps2udq512_mask:
case X86::BI__builtin_ia32_cvttps2uqq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2w512_mask:
+ case X86::BI__builtin_ia32_vcvttph2uw512_mask:
+ case X86::BI__builtin_ia32_vcvttph2dq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2udq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2qq512_mask:
+ case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
case X86::BI__builtin_ia32_exp2pd_mask:
case X86::BI__builtin_ia32_exp2ps_mask:
case X86::BI__builtin_ia32_getexppd512_mask:
case X86::BI__builtin_ia32_getexpps512_mask:
+ case X86::BI__builtin_ia32_getexpph512_mask:
case X86::BI__builtin_ia32_rcp28pd_mask:
case X86::BI__builtin_ia32_rcp28ps_mask:
case X86::BI__builtin_ia32_rsqrt28pd_mask:
case X86::BI__builtin_ia32_rsqrt28ps_mask:
case X86::BI__builtin_ia32_vcomisd:
case X86::BI__builtin_ia32_vcomiss:
+ case X86::BI__builtin_ia32_vcomish:
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
ArgNum = 3;
break;
@@ -3884,21 +3992,30 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cmpps512_mask:
case X86::BI__builtin_ia32_cmpsd_mask:
case X86::BI__builtin_ia32_cmpss_mask:
+ case X86::BI__builtin_ia32_cmpsh_mask:
+ case X86::BI__builtin_ia32_vcvtsh2sd_round_mask:
+ case X86::BI__builtin_ia32_vcvtsh2ss_round_mask:
case X86::BI__builtin_ia32_cvtss2sd_round_mask:
case X86::BI__builtin_ia32_getexpsd128_round_mask:
case X86::BI__builtin_ia32_getexpss128_round_mask:
+ case X86::BI__builtin_ia32_getexpsh128_round_mask:
case X86::BI__builtin_ia32_getmantpd512_mask:
case X86::BI__builtin_ia32_getmantps512_mask:
+ case X86::BI__builtin_ia32_getmantph512_mask:
case X86::BI__builtin_ia32_maxsd_round_mask:
case X86::BI__builtin_ia32_maxss_round_mask:
+ case X86::BI__builtin_ia32_maxsh_round_mask:
case X86::BI__builtin_ia32_minsd_round_mask:
case X86::BI__builtin_ia32_minss_round_mask:
+ case X86::BI__builtin_ia32_minsh_round_mask:
case X86::BI__builtin_ia32_rcp28sd_round_mask:
case X86::BI__builtin_ia32_rcp28ss_round_mask:
case X86::BI__builtin_ia32_reducepd512_mask:
case X86::BI__builtin_ia32_reduceps512_mask:
+ case X86::BI__builtin_ia32_reduceph512_mask:
case X86::BI__builtin_ia32_rndscalepd_mask:
case X86::BI__builtin_ia32_rndscaleps_mask:
+ case X86::BI__builtin_ia32_rndscaleph_mask:
case X86::BI__builtin_ia32_rsqrt28sd_round_mask:
case X86::BI__builtin_ia32_rsqrt28ss_round_mask:
ArgNum = 4;
@@ -3913,14 +4030,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_fixupimmss_maskz:
case X86::BI__builtin_ia32_getmantsd_round_mask:
case X86::BI__builtin_ia32_getmantss_round_mask:
+ case X86::BI__builtin_ia32_getmantsh_round_mask:
case X86::BI__builtin_ia32_rangepd512_mask:
case X86::BI__builtin_ia32_rangeps512_mask:
case X86::BI__builtin_ia32_rangesd128_round_mask:
case X86::BI__builtin_ia32_rangess128_round_mask:
case X86::BI__builtin_ia32_reducesd_mask:
case X86::BI__builtin_ia32_reducess_mask:
+ case X86::BI__builtin_ia32_reducesh_mask:
case X86::BI__builtin_ia32_rndscalesd_round_mask:
case X86::BI__builtin_ia32_rndscaless_round_mask:
+ case X86::BI__builtin_ia32_rndscalesh_round_mask:
ArgNum = 5;
break;
case X86::BI__builtin_ia32_vcvtsd2si64:
@@ -3931,11 +4051,20 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvtss2si64:
case X86::BI__builtin_ia32_vcvtss2usi32:
case X86::BI__builtin_ia32_vcvtss2usi64:
+ case X86::BI__builtin_ia32_vcvtsh2si32:
+ case X86::BI__builtin_ia32_vcvtsh2si64:
+ case X86::BI__builtin_ia32_vcvtsh2usi32:
+ case X86::BI__builtin_ia32_vcvtsh2usi64:
case X86::BI__builtin_ia32_sqrtpd512:
case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtph512:
ArgNum = 1;
HasRC = true;
break;
+ case X86::BI__builtin_ia32_addph512:
+ case X86::BI__builtin_ia32_divph512:
+ case X86::BI__builtin_ia32_mulph512:
+ case X86::BI__builtin_ia32_subph512:
case X86::BI__builtin_ia32_addpd512:
case X86::BI__builtin_ia32_addps512:
case X86::BI__builtin_ia32_divpd512:
@@ -3950,11 +4079,17 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtusi2sd64:
case X86::BI__builtin_ia32_cvtusi2ss32:
case X86::BI__builtin_ia32_cvtusi2ss64:
+ case X86::BI__builtin_ia32_vcvtusi2sh:
+ case X86::BI__builtin_ia32_vcvtusi642sh:
+ case X86::BI__builtin_ia32_vcvtsi2sh:
+ case X86::BI__builtin_ia32_vcvtsi642sh:
ArgNum = 2;
HasRC = true;
break;
case X86::BI__builtin_ia32_cvtdq2ps512_mask:
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+ case X86::BI__builtin_ia32_vcvtpd2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtps2phx512_mask:
case X86::BI__builtin_ia32_cvtpd2ps512_mask:
case X86::BI__builtin_ia32_cvtpd2dq512_mask:
case X86::BI__builtin_ia32_cvtpd2qq512_mask:
@@ -3968,30 +4103,54 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtph2w512_mask:
+ case X86::BI__builtin_ia32_vcvtph2uw512_mask:
+ case X86::BI__builtin_ia32_vcvtph2dq512_mask:
+ case X86::BI__builtin_ia32_vcvtph2udq512_mask:
+ case X86::BI__builtin_ia32_vcvtph2qq512_mask:
+ case X86::BI__builtin_ia32_vcvtph2uqq512_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
ArgNum = 3;
HasRC = true;
break;
+ case X86::BI__builtin_ia32_addsh_round_mask:
case X86::BI__builtin_ia32_addss_round_mask:
case X86::BI__builtin_ia32_addsd_round_mask:
+ case X86::BI__builtin_ia32_divsh_round_mask:
case X86::BI__builtin_ia32_divss_round_mask:
case X86::BI__builtin_ia32_divsd_round_mask:
+ case X86::BI__builtin_ia32_mulsh_round_mask:
case X86::BI__builtin_ia32_mulss_round_mask:
case X86::BI__builtin_ia32_mulsd_round_mask:
+ case X86::BI__builtin_ia32_subsh_round_mask:
case X86::BI__builtin_ia32_subss_round_mask:
case X86::BI__builtin_ia32_subsd_round_mask:
+ case X86::BI__builtin_ia32_scalefph512_mask:
case X86::BI__builtin_ia32_scalefpd512_mask:
case X86::BI__builtin_ia32_scalefps512_mask:
case X86::BI__builtin_ia32_scalefsd_round_mask:
case X86::BI__builtin_ia32_scalefss_round_mask:
+ case X86::BI__builtin_ia32_scalefsh_round_mask:
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
+ case X86::BI__builtin_ia32_vcvtss2sh_round_mask:
+ case X86::BI__builtin_ia32_vcvtsd2sh_round_mask:
case X86::BI__builtin_ia32_sqrtsd_round_mask:
case X86::BI__builtin_ia32_sqrtss_round_mask:
+ case X86::BI__builtin_ia32_sqrtsh_round_mask:
case X86::BI__builtin_ia32_vfmaddsd3_mask:
case X86::BI__builtin_ia32_vfmaddsd3_maskz:
case X86::BI__builtin_ia32_vfmaddsd3_mask3:
case X86::BI__builtin_ia32_vfmaddss3_mask:
case X86::BI__builtin_ia32_vfmaddss3_maskz:
case X86::BI__builtin_ia32_vfmaddss3_mask3:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask:
+ case X86::BI__builtin_ia32_vfmaddsh3_maskz:
+ case X86::BI__builtin_ia32_vfmaddsh3_mask3:
case X86::BI__builtin_ia32_vfmaddpd512_mask:
case X86::BI__builtin_ia32_vfmaddpd512_maskz:
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
@@ -4000,6 +4159,10 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vfmaddps512_maskz:
case X86::BI__builtin_ia32_vfmaddps512_mask3:
case X86::BI__builtin_ia32_vfmsubps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddph512_mask:
+ case X86::BI__builtin_ia32_vfmaddph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmsubph512_mask3:
case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
@@ -4008,6 +4171,26 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask:
+ case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddcsh_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_vfmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfmaddcph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddcph512_mask3:
+ case X86::BI__builtin_ia32_vfcmaddcsh_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask:
+ case X86::BI__builtin_ia32_vfcmaddcph512_maskz:
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask3:
+ case X86::BI__builtin_ia32_vfmulcsh_mask:
+ case X86::BI__builtin_ia32_vfmulcph512_mask:
+ case X86::BI__builtin_ia32_vfcmulcsh_mask:
+ case X86::BI__builtin_ia32_vfcmulcph512_mask:
ArgNum = 4;
HasRC = true;
break;
@@ -4359,6 +4542,9 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_getmantps256_mask:
case X86::BI__builtin_ia32_getmantpd512_mask:
case X86::BI__builtin_ia32_getmantps512_mask:
+ case X86::BI__builtin_ia32_getmantph128_mask:
+ case X86::BI__builtin_ia32_getmantph256_mask:
+ case X86::BI__builtin_ia32_getmantph512_mask:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v16hi:
i = 1; l = 0; u = 15;
@@ -4377,6 +4563,7 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_rangeps512_mask:
case X86::BI__builtin_ia32_getmantsd_round_mask:
case X86::BI__builtin_ia32_getmantss_round_mask:
+ case X86::BI__builtin_ia32_getmantsh_round_mask:
case X86::BI__builtin_ia32_vec_set_v16qi:
case X86::BI__builtin_ia32_vec_set_v16hi:
i = 2; l = 0; u = 15;
@@ -4429,12 +4616,16 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_rndscalepd_256_mask:
case X86::BI__builtin_ia32_rndscaleps_mask:
case X86::BI__builtin_ia32_rndscalepd_mask:
+ case X86::BI__builtin_ia32_rndscaleph_mask:
case X86::BI__builtin_ia32_reducepd128_mask:
case X86::BI__builtin_ia32_reducepd256_mask:
case X86::BI__builtin_ia32_reducepd512_mask:
case X86::BI__builtin_ia32_reduceps128_mask:
case X86::BI__builtin_ia32_reduceps256_mask:
case X86::BI__builtin_ia32_reduceps512_mask:
+ case X86::BI__builtin_ia32_reduceph128_mask:
+ case X86::BI__builtin_ia32_reduceph256_mask:
+ case X86::BI__builtin_ia32_reduceph512_mask:
case X86::BI__builtin_ia32_prold512:
case X86::BI__builtin_ia32_prolq512:
case X86::BI__builtin_ia32_prold128:
@@ -4453,8 +4644,12 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_fpclassps256_mask:
case X86::BI__builtin_ia32_fpclassps512_mask:
case X86::BI__builtin_ia32_fpclasspd512_mask:
+ case X86::BI__builtin_ia32_fpclassph128_mask:
+ case X86::BI__builtin_ia32_fpclassph256_mask:
+ case X86::BI__builtin_ia32_fpclassph512_mask:
case X86::BI__builtin_ia32_fpclasssd_mask:
case X86::BI__builtin_ia32_fpclassss_mask:
+ case X86::BI__builtin_ia32_fpclasssh_mask:
case X86::BI__builtin_ia32_pslldqi128_byteshift:
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift:
@@ -4565,6 +4760,8 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_reducess_mask:
case X86::BI__builtin_ia32_rndscalesd_round_mask:
case X86::BI__builtin_ia32_rndscaless_round_mask:
+ case X86::BI__builtin_ia32_rndscalesh_round_mask:
+ case X86::BI__builtin_ia32_reducesh_mask:
i = 4; l = 0; u = 255;
break;
}
@@ -4829,7 +5026,7 @@ void Sema::CheckArgAlignment(SourceLocation Loc, NamedDecl *FDecl,
if (ArgAlign < ParamAlign)
Diag(Loc, diag::warn_param_mismatched_alignment)
<< (int)ArgAlign.getQuantity() << (int)ParamAlign.getQuantity()
- << ParamName << FDecl;
+ << ParamName << (FDecl != nullptr) << FDecl;
}
/// Handles the checks for format strings, non-POD arguments to vararg
@@ -5229,6 +5426,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
@@ -6286,6 +6484,21 @@ bool Sema::SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall) {
}
bool Sema::SemaBuiltinVAStartARMMicrosoft(CallExpr *Call) {
+ auto IsSuitablyTypedFormatArgument = [this](const Expr *Arg) -> bool {
+ const LangOptions &LO = getLangOpts();
+
+ if (LO.CPlusPlus)
+ return Arg->getType()
+ .getCanonicalType()
+ .getTypePtr()
+ ->getPointeeType()
+ .withoutLocalFastQualifiers() == Context.CharTy;
+
+ // In C, allow aliasing through `char *`, this is required for AArch64 at
+ // least.
+ return true;
+ };
+
// void __va_start(va_list *ap, const char *named_addr, size_t slot_size,
// const char *named_addr);
@@ -6314,8 +6527,7 @@ bool Sema::SemaBuiltinVAStartARMMicrosoft(CallExpr *Call) {
const QualType &ConstCharPtrTy =
Context.getPointerType(Context.CharTy.withConst());
- if (!Arg1Ty->isPointerType() ||
- Arg1Ty->getPointeeType().withoutLocalFastQualifiers() != Context.CharTy)
+ if (!Arg1Ty->isPointerType() || !IsSuitablyTypedFormatArgument(Arg1))
Diag(Arg1->getBeginLoc(), diag::err_typecheck_convert_incompatible)
<< Arg1->getType() << ConstCharPtrTy << 1 /* different class */
<< 0 /* qualifier difference */
@@ -6586,7 +6798,7 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
<< TheCall->getArg(i)->getSourceRange());
// Allow -1 which will be translated to undef in the IR.
- if (Result->isSigned() && Result->isAllOnesValue())
+ if (Result->isSigned() && Result->isAllOnes())
continue;
if (Result->getActiveBits() > 64 ||
@@ -7286,11 +7498,35 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
/// Emit an error and return true on failure; return false on success.
/// TypeStr is a string containing the type descriptor of the value returned by
/// the builtin and the descriptors of the expected type of the arguments.
-bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeStr) {
+bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID,
+ const char *TypeStr) {
assert((TypeStr[0] != '\0') &&
"Invalid types in PPC MMA builtin declaration");
+ switch (BuiltinID) {
+ default:
+ // This function is called in CheckPPCBuiltinFunctionCall where the
+ // BuiltinID is guaranteed to be an MMA or pair vector memop builtin, here
+ // we are isolating the pair vector memop builtins that can be used with mma
+ // off so the default case is every builtin that requires mma and paired
+ // vector memops.
+ if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops",
+ diag::err_ppc_builtin_only_on_arch, "10") ||
+ SemaFeatureCheck(*this, TheCall, "mma",
+ diag::err_ppc_builtin_only_on_arch, "10"))
+ return true;
+ break;
+ case PPC::BI__builtin_vsx_lxvp:
+ case PPC::BI__builtin_vsx_stxvp:
+ case PPC::BI__builtin_vsx_assemble_pair:
+ case PPC::BI__builtin_vsx_disassemble_pair:
+ if (SemaFeatureCheck(*this, TheCall, "paired-vector-memops",
+ diag::err_ppc_builtin_only_on_arch, "10"))
+ return true;
+ break;
+ }
+
unsigned Mask = 0;
unsigned ArgNum = 0;
@@ -7308,13 +7544,23 @@ bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, const char *TypeStr) {
}
Expr *Arg = TheCall->getArg(ArgNum);
- QualType ArgType = Arg->getType();
-
- if ((ExpectedType->isVoidPointerType() && !ArgType->isPointerType()) ||
- (!ExpectedType->isVoidPointerType() &&
- ArgType.getCanonicalType() != ExpectedType))
- return Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
- << ArgType << ExpectedType << 1 << 0 << 0;
+ QualType PassedType = Arg->getType();
+ QualType StrippedRVType = PassedType.getCanonicalType();
+
+ // Strip Restrict/Volatile qualifiers.
+ if (StrippedRVType.isRestrictQualified() ||
+ StrippedRVType.isVolatileQualified())
+ StrippedRVType = StrippedRVType.getCanonicalType().getUnqualifiedType();
+
+ // The only case where the argument type and expected type are allowed to
+ // mismatch is if the argument type is a non-void pointer (or array) and
+ // expected type is a void pointer.
+ if (StrippedRVType != ExpectedType)
+ if (!(ExpectedType->isVoidPointerType() &&
+ (StrippedRVType->isPointerType() || StrippedRVType->isArrayType())))
+ return Diag(Arg->getBeginLoc(),
+ diag::err_typecheck_convert_incompatible)
+ << PassedType << ExpectedType << 1 << 0 << 0;
// If the value of the Mask is not 0, we have a constraint in the size of
// the integer argument so here we ensure the argument is a constant that
@@ -7678,11 +7924,11 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef<const Expr *> Args,
// }
if (HasVAListArg) {
if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(VD)) {
- if (const NamedDecl *ND = dyn_cast<NamedDecl>(PV->getDeclContext())) {
+ if (const Decl *D = dyn_cast<Decl>(PV->getDeclContext())) {
int PVIndex = PV->getFunctionScopeIndex() + 1;
- for (const auto *PVFormat : ND->specific_attrs<FormatAttr>()) {
+ for (const auto *PVFormat : D->specific_attrs<FormatAttr>()) {
// adjust for implicit parameter
- if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))
+ if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D))
if (MD->isInstance())
++PVIndex;
// We also check if the formats are compatible.
@@ -9545,8 +9791,7 @@ static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
// Emit a warning if the string literal is truncated and does not contain an
// embedded null character.
- if (TypeSize <= StrRef.size() &&
- StrRef.substr(0, TypeSize).find('\0') == StringRef::npos) {
+ if (TypeSize <= StrRef.size() && !StrRef.substr(0, TypeSize).contains('\0')) {
CheckFormatHandler::EmitFormatDiagnostic(
S, inFunctionCall, Args[format_idx],
S.PDiag(diag::warn_printf_format_string_not_null_terminated),
@@ -11138,7 +11383,7 @@ static QualType GetExprType(const Expr *E) {
///
/// \param MaxWidth The width to which the value will be truncated.
/// \param Approximate If \c true, return a likely range for the result: in
-/// particular, assume that aritmetic on narrower types doesn't leave
+/// particular, assume that arithmetic on narrower types doesn't leave
/// those types. If \c false, return a range including all possible
/// result values.
static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
@@ -13059,6 +13304,20 @@ static void AnalyzeImplicitConversions(
<< OrigE->getSourceRange() << T->isBooleanType()
<< FixItHint::CreateReplacement(UO->getBeginLoc(), "!");
+ if (const auto *BO = dyn_cast<BinaryOperator>(SourceExpr))
+ if ((BO->getOpcode() == BO_And || BO->getOpcode() == BO_Or) &&
+ BO->getLHS()->isKnownToHaveBooleanValue() &&
+ BO->getRHS()->isKnownToHaveBooleanValue() &&
+ BO->getLHS()->HasSideEffects(S.Context) &&
+ BO->getRHS()->HasSideEffects(S.Context)) {
+ S.Diag(BO->getBeginLoc(), diag::warn_bitwise_instead_of_logical)
+ << (BO->getOpcode() == BO_And ? "&" : "|") << OrigE->getSourceRange()
+ << FixItHint::CreateReplacement(
+ BO->getOperatorLoc(),
+ (BO->getOpcode() == BO_And ? "&&" : "||"));
+ S.Diag(BO->getBeginLoc(), diag::note_cast_operand_to_int);
+ }
+
// For conditional operators, we analyze the arguments as if they
// were being fed directly into the output.
if (auto *CO = dyn_cast<AbstractConditionalOperator>(SourceExpr)) {
@@ -15999,7 +16258,7 @@ static bool isLayoutCompatible(ASTContext &C, QualType T1, QualType T2) {
///
/// \param MagicValue Type tag magic value.
///
-/// \param isConstantEvaluated wether the evalaution should be performed in
+/// \param isConstantEvaluated whether the evalaution should be performed in
/// constant context.
static bool FindTypeTagExpr(const Expr *TypeExpr, const ASTContext &Ctx,
@@ -16079,7 +16338,7 @@ static bool FindTypeTagExpr(const Expr *TypeExpr, const ASTContext &Ctx,
///
/// \param TypeInfo Information about the corresponding C type.
///
-/// \param isConstantEvaluated wether the evalaution should be performed in
+/// \param isConstantEvaluated whether the evalaution should be performed in
/// constant context.
///
/// \returns true if the corresponding C type was found.
@@ -16352,10 +16611,8 @@ void Sema::RefersToMemberWithReducedAlignment(
// Synthesize offset of the whole access.
CharUnits Offset;
- for (auto I = ReverseMemberChain.rbegin(); I != ReverseMemberChain.rend();
- I++) {
- Offset += Context.toCharUnitsFromBits(Context.getFieldOffset(*I));
- }
+ for (const FieldDecl *FD : llvm::reverse(ReverseMemberChain))
+ Offset += Context.toCharUnitsFromBits(Context.getFieldOffset(FD));
// Compute the CompleteObjectAlignment as the alignment of the whole chain.
CharUnits CompleteObjectAlignment = Context.getTypeAlignInChars(
@@ -16408,6 +16665,94 @@ void Sema::CheckAddressOfPackedMember(Expr *rhs) {
_2, _3, _4));
}
+// Check if \p Ty is a valid type for the elementwise math builtins. If it is
+// not a valid type, emit an error message and return true. Otherwise return
+// false.
+static bool checkMathBuiltinElementType(Sema &S, SourceLocation Loc,
+ QualType Ty) {
+ if (!Ty->getAs<VectorType>() && !ConstantMatrixType::isValidElementType(Ty)) {
+ S.Diag(Loc, diag::err_builtin_invalid_arg_type)
+ << 1 << /* vector, integer or float ty*/ 0 << Ty;
+ return true;
+ }
+ return false;
+}
+
+bool Sema::SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall) {
+ if (checkArgCount(*this, TheCall, 1))
+ return true;
+
+ ExprResult A = UsualUnaryConversions(TheCall->getArg(0));
+ SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc();
+ if (A.isInvalid())
+ return true;
+
+ TheCall->setArg(0, A.get());
+ QualType TyA = A.get()->getType();
+ if (checkMathBuiltinElementType(*this, ArgLoc, TyA))
+ return true;
+
+ QualType EltTy = TyA;
+ if (auto *VecTy = EltTy->getAs<VectorType>())
+ EltTy = VecTy->getElementType();
+ if (EltTy->isUnsignedIntegerType())
+ return Diag(ArgLoc, diag::err_builtin_invalid_arg_type)
+ << 1 << /*signed integer or float ty*/ 3 << TyA;
+
+ TheCall->setType(TyA);
+ return false;
+}
+
+bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) {
+ if (checkArgCount(*this, TheCall, 2))
+ return true;
+
+ ExprResult A = TheCall->getArg(0);
+ ExprResult B = TheCall->getArg(1);
+ // Do standard promotions between the two arguments, returning their common
+ // type.
+ QualType Res =
+ UsualArithmeticConversions(A, B, TheCall->getExprLoc(), ACK_Comparison);
+ if (A.isInvalid() || B.isInvalid())
+ return true;
+
+ QualType TyA = A.get()->getType();
+ QualType TyB = B.get()->getType();
+
+ if (Res.isNull() || TyA.getCanonicalType() != TyB.getCanonicalType())
+ return Diag(A.get()->getBeginLoc(),
+ diag::err_typecheck_call_different_arg_types)
+ << TyA << TyB;
+
+ if (checkMathBuiltinElementType(*this, A.get()->getBeginLoc(), TyA))
+ return true;
+
+ TheCall->setArg(0, A.get());
+ TheCall->setArg(1, B.get());
+ TheCall->setType(Res);
+ return false;
+}
+
+bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) {
+ if (checkArgCount(*this, TheCall, 1))
+ return true;
+
+ ExprResult A = UsualUnaryConversions(TheCall->getArg(0));
+ if (A.isInvalid())
+ return true;
+
+ TheCall->setArg(0, A.get());
+ const VectorType *TyA = A.get()->getType()->getAs<VectorType>();
+ if (!TyA) {
+ SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc();
+ return Diag(ArgLoc, diag::err_builtin_invalid_arg_type)
+ << 1 << /* vector ty*/ 4 << A.get()->getType();
+ }
+
+ TheCall->setType(TyA->getElementType());
+ return false;
+}
+
ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall,
ExprResult CallResult) {
if (checkArgCount(*this, TheCall, 1))
@@ -16420,7 +16765,8 @@ ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall,
auto *MType = Matrix->getType()->getAs<ConstantMatrixType>();
if (!MType) {
- Diag(Matrix->getBeginLoc(), diag::err_builtin_matrix_arg);
+ Diag(Matrix->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << 1 << /* matrix ty*/ 1 << Matrix->getType();
return ExprError();
}
@@ -16491,15 +16837,16 @@ ExprResult Sema::SemaBuiltinMatrixColumnMajorLoad(CallExpr *TheCall,
auto *PtrTy = PtrExpr->getType()->getAs<PointerType>();
QualType ElementTy;
if (!PtrTy) {
- Diag(PtrExpr->getBeginLoc(), diag::err_builtin_matrix_pointer_arg)
- << PtrArgIdx + 1;
+ Diag(PtrExpr->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << PtrArgIdx + 1 << /*pointer to element ty*/ 2 << PtrExpr->getType();
ArgError = true;
} else {
ElementTy = PtrTy->getPointeeType().getUnqualifiedType();
if (!ConstantMatrixType::isValidElementType(ElementTy)) {
- Diag(PtrExpr->getBeginLoc(), diag::err_builtin_matrix_pointer_arg)
- << PtrArgIdx + 1;
+ Diag(PtrExpr->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << PtrArgIdx + 1 << /* pointer to element ty*/ 2
+ << PtrExpr->getType();
ArgError = true;
}
}
@@ -16536,7 +16883,7 @@ ExprResult Sema::SemaBuiltinMatrixColumnMajorLoad(CallExpr *TheCall,
return CallResult;
}
- // Check row and column dimenions.
+ // Check row and column dimensions.
llvm::Optional<unsigned> MaybeRows;
if (RowsExpr)
MaybeRows = getAndVerifyMatrixDimension(RowsExpr, "row", *this);
@@ -16598,7 +16945,8 @@ ExprResult Sema::SemaBuiltinMatrixColumnMajorStore(CallExpr *TheCall,
auto *MatrixTy = MatrixExpr->getType()->getAs<ConstantMatrixType>();
if (!MatrixTy) {
- Diag(MatrixExpr->getBeginLoc(), diag::err_builtin_matrix_arg) << 0;
+ Diag(MatrixExpr->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << 1 << /*matrix ty */ 1 << MatrixExpr->getType();
ArgError = true;
}
@@ -16617,8 +16965,8 @@ ExprResult Sema::SemaBuiltinMatrixColumnMajorStore(CallExpr *TheCall,
// Check pointer argument.
auto *PtrTy = PtrExpr->getType()->getAs<PointerType>();
if (!PtrTy) {
- Diag(PtrExpr->getBeginLoc(), diag::err_builtin_matrix_pointer_arg)
- << PtrArgIdx + 1;
+ Diag(PtrExpr->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << PtrArgIdx + 1 << /*pointer to element ty*/ 2 << PtrExpr->getType();
ArgError = true;
} else {
QualType ElementTy = PtrTy->getPointeeType();
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index e03b671ae61e..083a67db7a91 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -23,6 +23,7 @@
#include "clang/AST/QualTypeNames.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Type.h"
+#include "clang/Basic/AttributeCommonInfo.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/Specifiers.h"
@@ -34,6 +35,7 @@
#include "clang/Sema/Designator.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Overload.h"
+#include "clang/Sema/ParsedAttr.h"
#include "clang/Sema/Scope.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/Sema.h"
@@ -741,9 +743,7 @@ getRequiredQualification(ASTContext &Context, const DeclContext *CurContext,
static bool shouldIgnoreDueToReservedName(const NamedDecl *ND, Sema &SemaRef) {
ReservedIdentifierStatus Status = ND->isReserved(SemaRef.getLangOpts());
// Ignore reserved names for compiler provided decls.
- if ((Status != ReservedIdentifierStatus::NotReserved) &&
- (Status != ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope) &&
- ND->getLocation().isInvalid())
+ if (isReservedInAllContexts(Status) && ND->getLocation().isInvalid())
return true;
// For system headers ignore only double-underscore names.
@@ -2821,7 +2821,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy, const ParmVarDecl *Param,
bool SuppressName = false, bool SuppressBlock = false,
Optional<ArrayRef<QualType>> ObjCSubsts = None) {
// Params are unavailable in FunctionTypeLoc if the FunctionType is invalid.
- // It would be better to pass in the param Type, which is usually avaliable.
+ // It would be better to pass in the param Type, which is usually available.
// But this case is rare, so just pretend we fell back to int as elsewhere.
if (!Param)
return "int";
@@ -4335,6 +4335,158 @@ void Sema::CodeCompleteDeclSpec(Scope *S, DeclSpec &DS,
Results.data(), Results.size());
}
+static const char *underscoreAttrScope(llvm::StringRef Scope) {
+ if (Scope == "clang")
+ return "_Clang";
+ if (Scope == "gnu")
+ return "__gnu__";
+ return nullptr;
+}
+
+static const char *noUnderscoreAttrScope(llvm::StringRef Scope) {
+ if (Scope == "_Clang")
+ return "clang";
+ if (Scope == "__gnu__")
+ return "gnu";
+ return nullptr;
+}
+
+void Sema::CodeCompleteAttribute(AttributeCommonInfo::Syntax Syntax,
+ AttributeCompletion Completion,
+ const IdentifierInfo *InScope) {
+ if (Completion == AttributeCompletion::None)
+ return;
+ ResultBuilder Results(*this, CodeCompleter->getAllocator(),
+ CodeCompleter->getCodeCompletionTUInfo(),
+ CodeCompletionContext::CCC_Attribute);
+
+ // We're going to iterate over the normalized spellings of the attribute.
+ // These don't include "underscore guarding": the normalized spelling is
+ // clang::foo but you can also write _Clang::__foo__.
+ //
+ // (Clang supports a mix like clang::__foo__ but we won't suggest it: either
+ // you care about clashing with macros or you don't).
+ //
+ // So if we're already in a scope, we determine its canonical spellings
+ // (for comparison with normalized attr spelling) and remember whether it was
+ // underscore-guarded (so we know how to spell contained attributes).
+ llvm::StringRef InScopeName;
+ bool InScopeUnderscore = false;
+ if (InScope) {
+ InScopeName = InScope->getName();
+ if (const char *NoUnderscore = noUnderscoreAttrScope(InScopeName)) {
+ InScopeName = NoUnderscore;
+ InScopeUnderscore = true;
+ }
+ }
+ bool SyntaxSupportsGuards = Syntax == AttributeCommonInfo::AS_GNU ||
+ Syntax == AttributeCommonInfo::AS_CXX11 ||
+ Syntax == AttributeCommonInfo::AS_C2x;
+
+ llvm::DenseSet<llvm::StringRef> FoundScopes;
+ auto AddCompletions = [&](const ParsedAttrInfo &A) {
+ if (A.IsTargetSpecific && !A.existsInTarget(Context.getTargetInfo()))
+ return;
+ if (!A.acceptsLangOpts(getLangOpts()))
+ return;
+ for (const auto &S : A.Spellings) {
+ if (S.Syntax != Syntax)
+ continue;
+ llvm::StringRef Name = S.NormalizedFullName;
+ llvm::StringRef Scope;
+ if ((Syntax == AttributeCommonInfo::AS_CXX11 ||
+ Syntax == AttributeCommonInfo::AS_C2x)) {
+ std::tie(Scope, Name) = Name.split("::");
+ if (Name.empty()) // oops, unscoped
+ std::swap(Name, Scope);
+ }
+
+ // Do we just want a list of scopes rather than attributes?
+ if (Completion == AttributeCompletion::Scope) {
+ // Make sure to emit each scope only once.
+ if (!Scope.empty() && FoundScopes.insert(Scope).second) {
+ Results.AddResult(
+ CodeCompletionResult(Results.getAllocator().CopyString(Scope)));
+ // Include alternate form (__gnu__ instead of gnu).
+ if (const char *Scope2 = underscoreAttrScope(Scope))
+ Results.AddResult(CodeCompletionResult(Scope2));
+ }
+ continue;
+ }
+
+ // If a scope was specified, it must match but we don't need to print it.
+ if (!InScopeName.empty()) {
+ if (Scope != InScopeName)
+ continue;
+ Scope = "";
+ }
+
+ auto Add = [&](llvm::StringRef Scope, llvm::StringRef Name,
+ bool Underscores) {
+ CodeCompletionBuilder Builder(Results.getAllocator(),
+ Results.getCodeCompletionTUInfo());
+ llvm::SmallString<32> Text;
+ if (!Scope.empty()) {
+ Text.append(Scope);
+ Text.append("::");
+ }
+ if (Underscores)
+ Text.append("__");
+ Text.append(Name);
+ if (Underscores)
+ Text.append("__");
+ Builder.AddTypedTextChunk(Results.getAllocator().CopyString(Text));
+
+ if (!A.ArgNames.empty()) {
+ Builder.AddChunk(CodeCompletionString::CK_LeftParen, "(");
+ bool First = true;
+ for (const char *Arg : A.ArgNames) {
+ if (!First)
+ Builder.AddChunk(CodeCompletionString::CK_Comma, ", ");
+ First = false;
+ Builder.AddPlaceholderChunk(Arg);
+ }
+ Builder.AddChunk(CodeCompletionString::CK_RightParen, ")");
+ }
+
+ Results.AddResult(Builder.TakeString());
+ };
+
+ // Generate the non-underscore-guarded result.
+ // Note this is (a suffix of) the NormalizedFullName, no need to copy.
+ // If an underscore-guarded scope was specified, only the
+ // underscore-guarded attribute name is relevant.
+ if (!InScopeUnderscore)
+ Add(Scope, Name, /*Underscores=*/false);
+
+ // Generate the underscore-guarded version, for syntaxes that support it.
+ // We skip this if the scope was already spelled and not guarded, or
+ // we must spell it and can't guard it.
+ if (!(InScope && !InScopeUnderscore) && SyntaxSupportsGuards) {
+ llvm::SmallString<32> Guarded;
+ if (Scope.empty()) {
+ Add(Scope, Name, /*Underscores=*/true);
+ } else {
+ const char *GuardedScope = underscoreAttrScope(Scope);
+ if (!GuardedScope)
+ continue;
+ Add(GuardedScope, Name, /*Underscores=*/true);
+ }
+ }
+
+ // It may be nice to include the Kind so we can look up the docs later.
+ }
+ };
+
+ for (const auto *A : ParsedAttrInfo::getAllBuiltin())
+ AddCompletions(*A);
+ for (const auto &Entry : ParsedAttrInfoRegistry::entries())
+ AddCompletions(*Entry.instantiate());
+
+ HandleCodeCompleteResults(this, CodeCompleter, Results.getCompletionContext(),
+ Results.data(), Results.size());
+}
+
struct Sema::CodeCompleteExpressionData {
CodeCompleteExpressionData(QualType PreferredType = QualType(),
bool IsParenthesized = false)
@@ -5666,7 +5818,8 @@ static void mergeCandidatesWithResults(
if (Candidate.Function) {
if (Candidate.Function->isDeleted())
continue;
- if (!Candidate.Function->isVariadic() &&
+ if (shouldEnforceArgLimit(/*PartialOverloading=*/true,
+ Candidate.Function) &&
Candidate.Function->getNumParams() <= ArgSize &&
// Having zero args is annoying, normally we don't surface a function
// with 2 params, if you already have 2 params, because you are
@@ -9461,6 +9614,10 @@ void Sema::CodeCompleteIncludedFile(llvm::StringRef Dir, bool Angled) {
}
}
+ const StringRef &Dirname = llvm::sys::path::filename(Dir);
+ const bool isQt = Dirname.startswith("Qt") || Dirname == "ActiveQt";
+ const bool ExtensionlessHeaders =
+ IsSystem || isQt || Dir.endswith(".framework/Headers");
std::error_code EC;
unsigned Count = 0;
for (auto It = FS.dir_begin(Dir, EC);
@@ -9487,18 +9644,19 @@ void Sema::CodeCompleteIncludedFile(llvm::StringRef Dir, bool Angled) {
AddCompletion(Filename, /*IsDirectory=*/true);
break;
- case llvm::sys::fs::file_type::regular_file:
- // Only files that really look like headers. (Except in system dirs).
- if (!IsSystem) {
- // Header extensions from Types.def, which we can't depend on here.
- if (!(Filename.endswith_insensitive(".h") ||
- Filename.endswith_insensitive(".hh") ||
- Filename.endswith_insensitive(".hpp") ||
- Filename.endswith_insensitive(".inc")))
- break;
- }
+ case llvm::sys::fs::file_type::regular_file: {
+ // Only files that really look like headers. (Except in special dirs).
+ // Header extensions from Types.def, which we can't depend on here.
+ const bool IsHeader = Filename.endswith_insensitive(".h") ||
+ Filename.endswith_insensitive(".hh") ||
+ Filename.endswith_insensitive(".hpp") ||
+ Filename.endswith_insensitive(".inc") ||
+ (ExtensionlessHeaders && !Filename.contains('.'));
+ if (!IsHeader)
+ break;
AddCompletion(Filename, /*IsDirectory=*/false);
break;
+ }
default:
break;
}
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index f2c70d0a56ef..466e37831f66 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -1,9 +1,8 @@
//===-- SemaConcept.cpp - Semantic Analysis for Constraints and Concepts --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -235,7 +234,7 @@ static bool calculateConstraintSatisfaction(
// ...If substitution results in an invalid type or expression, the
// constraint is not satisfied.
if (!Trap.hasErrorOccurred())
- // A non-SFINAE error has occured as a result of this
+ // A non-SFINAE error has occurred as a result of this
// substitution.
return ExprError();
@@ -461,7 +460,7 @@ static void diagnoseUnsatisfiedRequirement(Sema &S,
Expr *e = Req->getExpr();
S.Diag(e->getBeginLoc(),
diag::note_expr_requirement_constraints_not_satisfied_simple)
- << (int)First << S.getDecltypeForParenthesizedExpr(e)
+ << (int)First << S.Context.getReferenceQualifiedType(e)
<< ConstraintExpr->getNamedConcept();
} else {
S.Diag(ConstraintExpr->getBeginLoc(),
@@ -742,22 +741,15 @@ Optional<NormalizedConstraint>
NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D,
ArrayRef<const Expr *> E) {
assert(E.size() != 0);
- auto First = fromConstraintExpr(S, D, E[0]);
- if (E.size() == 1)
- return First;
- auto Second = fromConstraintExpr(S, D, E[1]);
- if (!Second)
+ auto Conjunction = fromConstraintExpr(S, D, E[0]);
+ if (!Conjunction)
return None;
- llvm::Optional<NormalizedConstraint> Conjunction;
- Conjunction.emplace(S.Context, std::move(*First), std::move(*Second),
- CCK_Conjunction);
- for (unsigned I = 2; I < E.size(); ++I) {
+ for (unsigned I = 1; I < E.size(); ++I) {
auto Next = fromConstraintExpr(S, D, E[I]);
if (!Next)
- return llvm::Optional<NormalizedConstraint>{};
- NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction),
+ return None;
+ *Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction),
std::move(*Next), CCK_Conjunction);
- *Conjunction = std::move(NewConjunction);
}
return Conjunction;
}
@@ -988,8 +980,8 @@ bool Sema::MaybeEmitAmbiguousAtomicConstraintsDiagnostic(NamedDecl *D1,
// Not the same source level expression - are the expressions
// identical?
llvm::FoldingSetNodeID IDA, IDB;
- EA->Profile(IDA, Context, /*Cannonical=*/true);
- EB->Profile(IDB, Context, /*Cannonical=*/true);
+ EA->Profile(IDA, Context, /*Canonical=*/true);
+ EB->Profile(IDB, Context, /*Canonical=*/true);
if (IDA != IDB)
return false;
@@ -1073,8 +1065,7 @@ ReturnTypeRequirement(TemplateParameterList *TPL) :
assert(TC &&
"TPL must have a template type parameter with a type constraint");
auto *Constraint =
- cast_or_null<ConceptSpecializationExpr>(
- TC->getImmediatelyDeclaredConstraint());
+ cast<ConceptSpecializationExpr>(TC->getImmediatelyDeclaredConstraint());
bool Dependent =
Constraint->getTemplateArgsAsWritten() &&
TemplateSpecializationType::anyInstantiationDependentTemplateArguments(
diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index 94c728093e7c..b999b08d1662 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -53,15 +53,10 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD,
SourceLocation KwLoc) {
const FunctionProtoType *FnType = FD->getType()->castAs<FunctionProtoType>();
const SourceLocation FuncLoc = FD->getLocation();
- // FIXME: Cache std::coroutine_traits once we've found it.
- NamespaceDecl *StdExp = S.lookupStdExperimentalNamespace();
- if (!StdExp) {
- S.Diag(KwLoc, diag::err_implied_coroutine_type_not_found)
- << "std::experimental::coroutine_traits";
- return QualType();
- }
- ClassTemplateDecl *CoroTraits = S.lookupCoroutineTraits(KwLoc, FuncLoc);
+ NamespaceDecl *CoroNamespace = nullptr;
+ ClassTemplateDecl *CoroTraits =
+ S.lookupCoroutineTraits(KwLoc, FuncLoc, CoroNamespace);
if (!CoroTraits) {
return QualType();
}
@@ -122,7 +117,7 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD,
QualType PromiseType = S.Context.getTypeDeclType(Promise);
auto buildElaboratedType = [&]() {
- auto *NNS = NestedNameSpecifier::Create(S.Context, nullptr, StdExp);
+ auto *NNS = NestedNameSpecifier::Create(S.Context, nullptr, CoroNamespace);
NNS = NestedNameSpecifier::Create(S.Context, NNS, false,
CoroTrait.getTypePtr());
return S.Context.getElaboratedType(ETK_None, NNS, PromiseType);
@@ -141,20 +136,20 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD,
return PromiseType;
}
-/// Look up the std::experimental::coroutine_handle<PromiseType>.
+/// Look up the std::coroutine_handle<PromiseType>.
static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType,
SourceLocation Loc) {
if (PromiseType.isNull())
return QualType();
- NamespaceDecl *StdExp = S.lookupStdExperimentalNamespace();
- assert(StdExp && "Should already be diagnosed");
+ NamespaceDecl *CoroNamespace = S.getCachedCoroNamespace();
+ assert(CoroNamespace && "Should already be diagnosed");
LookupResult Result(S, &S.PP.getIdentifierTable().get("coroutine_handle"),
Loc, Sema::LookupOrdinaryName);
- if (!S.LookupQualifiedName(Result, StdExp)) {
+ if (!S.LookupQualifiedName(Result, CoroNamespace)) {
S.Diag(Loc, diag::err_implied_coroutine_type_not_found)
- << "std::experimental::coroutine_handle";
+ << "std::coroutine_handle";
return QualType();
}
@@ -1000,7 +995,7 @@ static Expr *buildStdNoThrowDeclRef(Sema &S, SourceLocation Loc) {
LookupResult Result(S, &S.PP.getIdentifierTable().get("nothrow"), Loc,
Sema::LookupOrdinaryName);
if (!S.LookupQualifiedName(Result, Std)) {
- // FIXME: <experimental/coroutine> should have been included already.
+ // FIXME: <coroutine> should have been included already.
// If we require it to include <new> then this diagnostic is no longer
// needed.
S.Diag(Loc, diag::err_implicit_coroutine_std_nothrow_type_not_found);
@@ -1533,7 +1528,7 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() {
if (GroType->isVoidType()) {
// Trigger a nice error message.
InitializedEntity Entity =
- InitializedEntity::InitializeResult(Loc, FnRetType, false);
+ InitializedEntity::InitializeResult(Loc, FnRetType);
S.PerformCopyInitialization(Entity, SourceLocation(), ReturnValue);
noteMemberDeclaredHere(S, ReturnValue, Fn);
return false;
@@ -1663,25 +1658,47 @@ StmtResult Sema::BuildCoroutineBodyStmt(CoroutineBodyStmt::CtorArgs Args) {
}
ClassTemplateDecl *Sema::lookupCoroutineTraits(SourceLocation KwLoc,
- SourceLocation FuncLoc) {
+ SourceLocation FuncLoc,
+ NamespaceDecl *&Namespace) {
if (!StdCoroutineTraitsCache) {
- if (auto StdExp = lookupStdExperimentalNamespace()) {
- LookupResult Result(*this,
- &PP.getIdentifierTable().get("coroutine_traits"),
- FuncLoc, LookupOrdinaryName);
- if (!LookupQualifiedName(Result, StdExp)) {
+ NamespaceDecl *CoroNamespace = getStdNamespace();
+ LookupResult Result(*this, &PP.getIdentifierTable().get("coroutine_traits"),
+ FuncLoc, LookupOrdinaryName);
+
+ if (!CoroNamespace || !LookupQualifiedName(Result, CoroNamespace)) {
+ /// Look up in namespace std::experimental, for compatibility.
+ /// TODO: Remove this extra lookup when <experimental/coroutine> is
+ /// removed.
+ CoroNamespace = lookupStdExperimentalNamespace();
+ if (!CoroNamespace || !LookupQualifiedName(Result, CoroNamespace)) {
Diag(KwLoc, diag::err_implied_coroutine_type_not_found)
- << "std::experimental::coroutine_traits";
+ << "std::coroutine_traits";
return nullptr;
}
- if (!(StdCoroutineTraitsCache =
- Result.getAsSingle<ClassTemplateDecl>())) {
- Result.suppressDiagnostics();
- NamedDecl *Found = *Result.begin();
- Diag(Found->getLocation(), diag::err_malformed_std_coroutine_traits);
+ Diag(KwLoc, diag::warn_deprecated_coroutine_namespace)
+ << "coroutine_traits";
+ } else {
+ /// When we found coroutine_traits in std namespace. Make sure there is no
+ /// misleading definition in std::experimental namespace.
+ NamespaceDecl *ExpNamespace = lookupStdExperimentalNamespace();
+ LookupResult ExpResult(*this,
+ &PP.getIdentifierTable().get("coroutine_traits"),
+ FuncLoc, LookupOrdinaryName);
+ if (ExpNamespace && LookupQualifiedName(ExpResult, ExpNamespace)) {
+ Diag(KwLoc,
+ diag::err_mixed_use_std_and_experimental_namespace_for_coroutine);
return nullptr;
}
}
+
+ if (!(StdCoroutineTraitsCache = Result.getAsSingle<ClassTemplateDecl>())) {
+ Result.suppressDiagnostics();
+ NamedDecl *Found = *Result.begin();
+ Diag(Found->getLocation(), diag::err_malformed_std_coroutine_traits);
+ return nullptr;
+ }
+ CoroTraitsNamespaceCache = CoroNamespace;
}
+ Namespace = CoroTraitsNamespaceCache;
return StdCoroutineTraitsCache;
}
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 205f58000302..4fcc01012d44 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -141,6 +141,7 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
case tok::kw___bf16:
case tok::kw__Float16:
case tok::kw___float128:
+ case tok::kw___ibm128:
case tok::kw_wchar_t:
case tok::kw_bool:
case tok::kw___underlying_type:
@@ -1920,8 +1921,10 @@ void Sema::DiagnoseUnusedDecl(const NamedDecl *D) {
}
void Sema::DiagnoseUnusedButSetDecl(const VarDecl *VD) {
- // If it's not referenced, it can't be set.
- if (!VD->isReferenced() || !VD->getDeclName() || VD->hasAttr<UnusedAttr>())
+ // If it's not referenced, it can't be set. If it has the Cleanup attribute,
+ // it's not really unused.
+ if (!VD->isReferenced() || !VD->getDeclName() || VD->hasAttr<UnusedAttr>() ||
+ VD->hasAttr<CleanupAttr>())
return;
const auto *Ty = VD->getType().getTypePtr()->getBaseElementTypeUnsafe();
@@ -1941,6 +1944,12 @@ void Sema::DiagnoseUnusedButSetDecl(const VarDecl *VD) {
}
}
+ // Don't warn about __block Objective-C pointer variables, as they might
+ // be assigned in the block but not used elsewhere for the purpose of lifetime
+ // extension.
+ if (VD->hasAttr<BlocksAttr>() && Ty->isObjCObjectPointerType())
+ return;
+
auto iter = RefsMinusAssignments.find(VD);
if (iter == RefsMinusAssignments.end())
return;
@@ -2113,8 +2122,9 @@ FunctionDecl *Sema::CreateBuiltin(IdentifierInfo *II, QualType Type,
}
FunctionDecl *New = FunctionDecl::Create(Context, Parent, Loc, Loc, II, Type,
- /*TInfo=*/nullptr, SC_Extern, false,
- Type->isFunctionProtoType());
+ /*TInfo=*/nullptr, SC_Extern,
+ getCurFPFeatures().isFPConstrained(),
+ false, Type->isFunctionProtoType());
New->setImplicit();
New->addAttr(BuiltinAttr::CreateImplicit(Context, ID));
@@ -2627,6 +2637,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.mergeDLLImportAttr(D, *ImportA);
else if (const auto *ExportA = dyn_cast<DLLExportAttr>(Attr))
NewAttr = S.mergeDLLExportAttr(D, *ExportA);
+ else if (const auto *EA = dyn_cast<ErrorAttr>(Attr))
+ NewAttr = S.mergeErrorAttr(D, *EA, EA->getUserDiagnostic());
else if (const auto *FA = dyn_cast<FormatAttr>(Attr))
NewAttr = S.mergeFormatAttr(D, *FA, FA->getType(), FA->getFormatIdx(),
FA->getFirstArg());
@@ -2673,6 +2685,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
NewAttr = S.mergeEnforceTCBAttr(D, *TCBA);
else if (const auto *TCBLA = dyn_cast<EnforceTCBLeafAttr>(Attr))
NewAttr = S.mergeEnforceTCBLeafAttr(D, *TCBLA);
+ else if (const auto *BTFA = dyn_cast<BTFDeclTagAttr>(Attr))
+ NewAttr = S.mergeBTFDeclTagAttr(D, *BTFA);
else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr))
NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));
@@ -2954,8 +2968,7 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
if (const auto *NewAbiTagAttr = New->getAttr<AbiTagAttr>()) {
if (const auto *OldAbiTagAttr = Old->getAttr<AbiTagAttr>()) {
for (const auto &NewTag : NewAbiTagAttr->tags()) {
- if (std::find(OldAbiTagAttr->tags_begin(), OldAbiTagAttr->tags_end(),
- NewTag) == OldAbiTagAttr->tags_end()) {
+ if (!llvm::is_contained(OldAbiTagAttr->tags(), NewTag)) {
Diag(NewAbiTagAttr->getLocation(),
diag::err_new_abi_tag_on_redeclaration)
<< NewTag;
@@ -3352,12 +3365,20 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
}
}
- if (New->hasAttr<InternalLinkageAttr>() &&
- !Old->hasAttr<InternalLinkageAttr>()) {
- Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
- << New->getDeclName();
- notePreviousDefinition(Old, New->getLocation());
- New->dropAttr<InternalLinkageAttr>();
+ if (const auto *ILA = New->getAttr<InternalLinkageAttr>())
+ if (!Old->hasAttr<InternalLinkageAttr>()) {
+ Diag(New->getLocation(), diag::err_attribute_missing_on_first_decl)
+ << ILA;
+ Diag(Old->getLocation(), diag::note_previous_declaration);
+ New->dropAttr<InternalLinkageAttr>();
+ }
+
+ if (auto *EA = New->getAttr<ErrorAttr>()) {
+ if (!Old->hasAttr<ErrorAttr>()) {
+ Diag(EA->getLocation(), diag::err_attribute_missing_on_first_decl) << EA;
+ Diag(Old->getLocation(), diag::note_previous_declaration);
+ New->dropAttr<ErrorAttr>();
+ }
}
if (CheckRedeclarationModuleOwnership(New, Old))
@@ -3592,14 +3613,14 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
// defined, copy the deduced value from the old declaration.
AutoType *OldAT = Old->getReturnType()->getContainedAutoType();
if (OldAT && OldAT->isDeduced()) {
- New->setType(
- SubstAutoType(New->getType(),
- OldAT->isDependentType() ? Context.DependentTy
- : OldAT->getDeducedType()));
- NewQType = Context.getCanonicalType(
- SubstAutoType(NewQType,
- OldAT->isDependentType() ? Context.DependentTy
- : OldAT->getDeducedType()));
+ QualType DT = OldAT->getDeducedType();
+ if (DT.isNull()) {
+ New->setType(SubstAutoTypeDependent(New->getType()));
+ NewQType = Context.getCanonicalType(SubstAutoTypeDependent(NewQType));
+ } else {
+ New->setType(SubstAutoType(New->getType(), DT));
+ NewQType = Context.getCanonicalType(SubstAutoType(NewQType, DT));
+ }
}
}
@@ -3677,12 +3698,12 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
// The first declaration of a function shall specify the noreturn
// attribute if any declaration of that function specifies the noreturn
// attribute.
- const CXX11NoReturnAttr *NRA = New->getAttr<CXX11NoReturnAttr>();
- if (NRA && !Old->hasAttr<CXX11NoReturnAttr>()) {
- Diag(NRA->getLocation(), diag::err_noreturn_missing_on_first_decl);
- Diag(Old->getFirstDecl()->getLocation(),
- diag::note_noreturn_missing_first_decl);
- }
+ if (const auto *NRA = New->getAttr<CXX11NoReturnAttr>())
+ if (!Old->hasAttr<CXX11NoReturnAttr>()) {
+ Diag(NRA->getLocation(), diag::err_attribute_missing_on_first_decl)
+ << NRA;
+ Diag(Old->getLocation(), diag::note_previous_declaration);
+ }
// C++11 [dcl.attr.depend]p2:
// The first declaration of a function shall specify the
@@ -4160,18 +4181,18 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
Old->getStorageClass() == SC_None &&
!Old->hasAttr<WeakImportAttr>()) {
Diag(New->getLocation(), diag::warn_weak_import) << New->getDeclName();
- notePreviousDefinition(Old, New->getLocation());
+ Diag(Old->getLocation(), diag::note_previous_declaration);
// Remove weak_import attribute on new declaration.
New->dropAttr<WeakImportAttr>();
}
- if (New->hasAttr<InternalLinkageAttr>() &&
- !Old->hasAttr<InternalLinkageAttr>()) {
- Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
- << New->getDeclName();
- notePreviousDefinition(Old, New->getLocation());
- New->dropAttr<InternalLinkageAttr>();
- }
+ if (const auto *ILA = New->getAttr<InternalLinkageAttr>())
+ if (!Old->hasAttr<InternalLinkageAttr>()) {
+ Diag(New->getLocation(), diag::err_attribute_missing_on_first_decl)
+ << ILA;
+ Diag(Old->getLocation(), diag::note_previous_declaration);
+ New->dropAttr<InternalLinkageAttr>();
+ }
// Merge the types.
VarDecl *MostRecent = Old->getMostRecentDecl();
@@ -5281,8 +5302,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
// trivial in almost all cases, except if a union member has an in-class
// initializer:
// union { int n = 0; };
- if (!Invalid)
- ActOnUninitializedDecl(Anon);
+ ActOnUninitializedDecl(Anon);
}
Anon->setImplicit();
@@ -5761,8 +5781,8 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
NestedNameSpecifierLoc SpecLoc(SS.getScopeRep(), SS.location_data());
while (SpecLoc.getPrefix())
SpecLoc = SpecLoc.getPrefix();
- if (dyn_cast_or_null<DecltypeType>(
- SpecLoc.getNestedNameSpecifier()->getAsType()))
+ if (isa_and_nonnull<DecltypeType>(
+ SpecLoc.getNestedNameSpecifier()->getAsType()))
Diag(Loc, diag::err_decltype_in_declarator)
<< SpecLoc.getTypeLoc().getSourceRange();
@@ -7327,10 +7347,9 @@ NamedDecl *Sema::ActOnVariableDeclarator(
DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec();
if (TSC != TSCS_unspecified) {
- bool IsCXX = getLangOpts().OpenCLCPlusPlus;
Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
diag::err_opencl_unknown_type_specifier)
- << IsCXX << getLangOpts().getOpenCLVersionTuple().getAsString()
+ << getLangOpts().getOpenCLVersionString()
<< DeclSpec::getSpecifierName(TSC) << 1;
NewVD->setInvalidDecl();
}
@@ -8552,10 +8571,11 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
(D.isFunctionDeclarator() && D.getFunctionTypeInfo().hasPrototype) ||
(!R->getAsAdjusted<FunctionType>() && R->isFunctionProtoType());
- NewFD = FunctionDecl::Create(SemaRef.Context, DC, D.getBeginLoc(), NameInfo,
- R, TInfo, SC, isInline, HasPrototype,
- ConstexprSpecKind::Unspecified,
- /*TrailingRequiresClause=*/nullptr);
+ NewFD = FunctionDecl::Create(
+ SemaRef.Context, DC, D.getBeginLoc(), NameInfo, R, TInfo, SC,
+ SemaRef.getCurFPFeatures().isFPConstrained(), isInline, HasPrototype,
+ ConstexprSpecKind::Unspecified,
+ /*TrailingRequiresClause=*/nullptr);
if (D.isInvalidType())
NewFD->setInvalidDecl();
@@ -8591,9 +8611,9 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
R = SemaRef.CheckConstructorDeclarator(D, R, SC);
return CXXConstructorDecl::Create(
SemaRef.Context, cast<CXXRecordDecl>(DC), D.getBeginLoc(), NameInfo, R,
- TInfo, ExplicitSpecifier, isInline,
- /*isImplicitlyDeclared=*/false, ConstexprKind, InheritedConstructor(),
- TrailingRequiresClause);
+ TInfo, ExplicitSpecifier, SemaRef.getCurFPFeatures().isFPConstrained(),
+ isInline, /*isImplicitlyDeclared=*/false, ConstexprKind,
+ InheritedConstructor(), TrailingRequiresClause);
} else if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
// This is a C++ destructor declaration.
@@ -8602,7 +8622,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
CXXDestructorDecl *NewDD = CXXDestructorDecl::Create(
SemaRef.Context, Record, D.getBeginLoc(), NameInfo, R, TInfo,
- isInline, /*isImplicitlyDeclared=*/false, ConstexprKind,
+ SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
+ /*isImplicitlyDeclared=*/false, ConstexprKind,
TrailingRequiresClause);
// If the destructor needs an implicit exception specification, set it
@@ -8620,11 +8641,10 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
// Create a FunctionDecl to satisfy the function definition parsing
// code path.
- return FunctionDecl::Create(SemaRef.Context, DC, D.getBeginLoc(),
- D.getIdentifierLoc(), Name, R, TInfo, SC,
- isInline,
- /*hasPrototype=*/true, ConstexprKind,
- TrailingRequiresClause);
+ return FunctionDecl::Create(
+ SemaRef.Context, DC, D.getBeginLoc(), D.getIdentifierLoc(), Name, R,
+ TInfo, SC, SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
+ /*hasPrototype=*/true, ConstexprKind, TrailingRequiresClause);
}
} else if (Name.getNameKind() == DeclarationName::CXXConversionFunctionName) {
@@ -8641,7 +8661,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
IsVirtualOkay = true;
return CXXConversionDecl::Create(
SemaRef.Context, cast<CXXRecordDecl>(DC), D.getBeginLoc(), NameInfo, R,
- TInfo, isInline, ExplicitSpecifier, ConstexprKind, SourceLocation(),
+ TInfo, SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
+ ExplicitSpecifier, ConstexprKind, SourceLocation(),
TrailingRequiresClause);
} else if (Name.getNameKind() == DeclarationName::CXXDeductionGuideName) {
@@ -8670,8 +8691,8 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
// This is a C++ method declaration.
CXXMethodDecl *Ret = CXXMethodDecl::Create(
SemaRef.Context, cast<CXXRecordDecl>(DC), D.getBeginLoc(), NameInfo, R,
- TInfo, SC, isInline, ConstexprKind, SourceLocation(),
- TrailingRequiresClause);
+ TInfo, SC, SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
+ ConstexprKind, SourceLocation(), TrailingRequiresClause);
IsVirtualOkay = !Ret->isStatic();
return Ret;
} else {
@@ -8683,9 +8704,10 @@ static FunctionDecl *CreateNewFunctionDecl(Sema &SemaRef, Declarator &D,
// Determine whether the function was written with a
// prototype. This true when:
// - we're in C++ (where every function has a prototype),
- return FunctionDecl::Create(SemaRef.Context, DC, D.getBeginLoc(), NameInfo,
- R, TInfo, SC, isInline, true /*HasPrototype*/,
- ConstexprKind, TrailingRequiresClause);
+ return FunctionDecl::Create(
+ SemaRef.Context, DC, D.getBeginLoc(), NameInfo, R, TInfo, SC,
+ SemaRef.getCurFPFeatures().isFPConstrained(), isInline,
+ true /*HasPrototype*/, ConstexprKind, TrailingRequiresClause);
}
}
@@ -8819,8 +8841,7 @@ static void checkIsValidOpenCLKernelParameter(
// OpenCL v3.0 s6.11.a:
// A kernel function argument cannot be declared as a pointer to a pointer
// type. [...] This restriction only applies to OpenCL C 1.2 or below.
- if (S.getLangOpts().OpenCLVersion <= 120 &&
- !S.getLangOpts().OpenCLCPlusPlus) {
+ if (S.getLangOpts().getOpenCLCompatibleVersion() <= 120) {
S.Diag(Param->getLocation(), diag::err_opencl_ptrptr_kernel_param);
D.setInvalidType();
return;
@@ -9087,8 +9108,10 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// C++ [class.union]p2
// A union can have member functions, but not virtual functions.
- if (isVirtual && Parent->isUnion())
+ if (isVirtual && Parent->isUnion()) {
Diag(D.getDeclSpec().getVirtualSpecLoc(), diag::err_virtual_in_union);
+ NewFD->setInvalidDecl();
+ }
}
SetNestedNameSpecifier(*this, NewFD, D);
@@ -9235,8 +9258,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// a friend yet, so 'isDependentContext' on the FD doesn't work.
const FunctionProtoType *FPT =
NewFD->getType()->castAs<FunctionProtoType>();
- QualType Result =
- SubstAutoType(FPT->getReturnType(), Context.DependentTy);
+ QualType Result = SubstAutoTypeDependent(FPT->getReturnType());
NewFD->setType(Context.getFunctionType(Result, FPT->getParamTypes(),
FPT->getExtProtoInfo()));
}
@@ -9554,9 +9576,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
}
}
- if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice))
- checkDeviceDecl(NewFD, D.getBeginLoc());
-
if (!getLangOpts().CPlusPlus) {
// Perform semantic checking on the function declaration.
if (!NewFD->isInvalidDecl() && NewFD->isMain())
@@ -9955,8 +9974,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
if (getLangOpts().OpenCL && NewFD->hasAttr<OpenCLKernelAttr>()) {
// OpenCL v1.2 s6.8 static is invalid for kernel functions.
- if ((getLangOpts().OpenCLVersion >= 120)
- && (SC == SC_Static)) {
+ if (SC == SC_Static) {
Diag(D.getIdentifierLoc(), diag::err_static_kernel);
D.setInvalidType();
}
@@ -10002,7 +10020,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// OpenCL 2.0 pipe restrictions forbids pipe packet types to be non-value
// types.
- if (getLangOpts().OpenCLVersion >= 200 || getLangOpts().OpenCLCPlusPlus) {
+ if (getLangOpts().getOpenCLCompatibleVersion() >= 200) {
if(const PipeType *PipeTy = PT->getAs<PipeType>()) {
QualType ElemTy = PipeTy->getElementType();
if (ElemTy->isReferenceType() || ElemTy->isPointerType()) {
@@ -10306,8 +10324,8 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
ReturnType = 1,
ConstexprSpec = 2,
InlineSpec = 3,
- StorageClass = 4,
- Linkage = 5,
+ Linkage = 4,
+ LanguageLinkage = 5,
};
if (NoProtoDiagID.getDiagID() != 0 && OldFD &&
@@ -10381,11 +10399,11 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
if (OldFD->isInlineSpecified() != NewFD->isInlineSpecified())
return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << InlineSpec;
- if (OldFD->getStorageClass() != NewFD->getStorageClass())
- return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << StorageClass;
+ if (OldFD->getFormalLinkage() != NewFD->getFormalLinkage())
+ return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << Linkage;
if (!CLinkageMayDiffer && OldFD->isExternC() != NewFD->isExternC())
- return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << Linkage;
+ return Diag(DiffDiagIDAt.first, DiffDiagIDAt.second) << LanguageLinkage;
if (CheckEquivalentExceptionSpec(
OldFD->getType()->getAs<FunctionProtoType>(), OldFD->getLocation(),
@@ -12331,7 +12349,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
/*TreatUnavailableAsInvalid=*/false);
ExprResult Result = InitSeq.Perform(*this, Entity, Kind, Args, &DclT);
if (Result.isInvalid()) {
- // If the provied initializer fails to initialize the var decl,
+ // If the provided initializer fails to initialize the var decl,
// we attach a recovery expr for better recovery.
auto RecoveryExpr =
CreateRecoveryExpr(Init->getBeginLoc(), Init->getEndLoc(), Args);
@@ -12595,7 +12613,9 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
VDecl->setInitStyle(VarDecl::ListInit);
}
- if (LangOpts.OpenMP && VDecl->isFileVarDecl())
+ if (LangOpts.OpenMP &&
+ (LangOpts.OpenMPIsDevice || !LangOpts.OMPTargetTriples.empty()) &&
+ VDecl->isFileVarDecl())
DeclsToCheckForDeferredDiags.insert(VDecl);
CheckCompleteVariableDeclaration(VDecl);
}
@@ -14477,7 +14497,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
FunctionScopeInfo *FSI = getCurFunction();
FunctionDecl *FD = dcl ? dcl->getAsFunction() : nullptr;
- if (FSI->UsesFPIntrin && !FD->hasAttr<StrictFPAttr>())
+ if (FSI->UsesFPIntrin && FD && !FD->hasAttr<StrictFPAttr>())
FD->addAttr(StrictFPAttr::CreateImplicit(Context));
sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
@@ -14486,333 +14506,340 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
if (getLangOpts().Coroutines && FSI->isCoroutine())
CheckCompletedCoroutineBody(FD, Body);
- // Do not call PopExpressionEvaluationContext() if it is a lambda because one
- // is already popped when finishing the lambda in BuildLambdaExpr(). This is
- // meant to pop the context added in ActOnStartOfFunctionDef().
- ExitFunctionBodyRAII ExitRAII(*this, isLambdaCallOperator(FD));
-
- if (FD) {
- FD->setBody(Body);
- FD->setWillHaveBody(false);
-
- if (getLangOpts().CPlusPlus14) {
- if (!FD->isInvalidDecl() && Body && !FD->isDependentContext() &&
- FD->getReturnType()->isUndeducedType()) {
- // If the function has a deduced result type but contains no 'return'
- // statements, the result type as written must be exactly 'auto', and
- // the deduced result type is 'void'.
- if (!FD->getReturnType()->getAs<AutoType>()) {
- Diag(dcl->getLocation(), diag::err_auto_fn_no_return_but_not_auto)
- << FD->getReturnType();
- FD->setInvalidDecl();
- } else {
- // Substitute 'void' for the 'auto' in the type.
- TypeLoc ResultType = getReturnTypeLoc(FD);
- Context.adjustDeducedFunctionResultType(
- FD, SubstAutoType(ResultType.getType(), Context.VoidTy));
+ {
+ // Do not call PopExpressionEvaluationContext() if it is a lambda because
+ // one is already popped when finishing the lambda in BuildLambdaExpr().
+ // This is meant to pop the context added in ActOnStartOfFunctionDef().
+ ExitFunctionBodyRAII ExitRAII(*this, isLambdaCallOperator(FD));
+
+ if (FD) {
+ FD->setBody(Body);
+ FD->setWillHaveBody(false);
+
+ if (getLangOpts().CPlusPlus14) {
+ if (!FD->isInvalidDecl() && Body && !FD->isDependentContext() &&
+ FD->getReturnType()->isUndeducedType()) {
+ // If the function has a deduced result type but contains no 'return'
+ // statements, the result type as written must be exactly 'auto', and
+ // the deduced result type is 'void'.
+ if (!FD->getReturnType()->getAs<AutoType>()) {
+ Diag(dcl->getLocation(), diag::err_auto_fn_no_return_but_not_auto)
+ << FD->getReturnType();
+ FD->setInvalidDecl();
+ } else {
+ // Substitute 'void' for the 'auto' in the type.
+ TypeLoc ResultType = getReturnTypeLoc(FD);
+ Context.adjustDeducedFunctionResultType(
+ FD, SubstAutoType(ResultType.getType(), Context.VoidTy));
+ }
+ }
+ } else if (getLangOpts().CPlusPlus11 && isLambdaCallOperator(FD)) {
+ // In C++11, we don't use 'auto' deduction rules for lambda call
+ // operators because we don't support return type deduction.
+ auto *LSI = getCurLambda();
+ if (LSI->HasImplicitReturnType) {
+ deduceClosureReturnType(*LSI);
+
+ // C++11 [expr.prim.lambda]p4:
+ // [...] if there are no return statements in the compound-statement
+ // [the deduced type is] the type void
+ QualType RetType =
+ LSI->ReturnType.isNull() ? Context.VoidTy : LSI->ReturnType;
+
+ // Update the return type to the deduced type.
+ const auto *Proto = FD->getType()->castAs<FunctionProtoType>();
+ FD->setType(Context.getFunctionType(RetType, Proto->getParamTypes(),
+ Proto->getExtProtoInfo()));
}
}
- } else if (getLangOpts().CPlusPlus11 && isLambdaCallOperator(FD)) {
- // In C++11, we don't use 'auto' deduction rules for lambda call
- // operators because we don't support return type deduction.
- auto *LSI = getCurLambda();
- if (LSI->HasImplicitReturnType) {
- deduceClosureReturnType(*LSI);
-
- // C++11 [expr.prim.lambda]p4:
- // [...] if there are no return statements in the compound-statement
- // [the deduced type is] the type void
- QualType RetType =
- LSI->ReturnType.isNull() ? Context.VoidTy : LSI->ReturnType;
-
- // Update the return type to the deduced type.
- const auto *Proto = FD->getType()->castAs<FunctionProtoType>();
- FD->setType(Context.getFunctionType(RetType, Proto->getParamTypes(),
- Proto->getExtProtoInfo()));
+
+ // If the function implicitly returns zero (like 'main') or is naked,
+ // don't complain about missing return statements.
+ if (FD->hasImplicitReturnZero() || FD->hasAttr<NakedAttr>())
+ WP.disableCheckFallThrough();
+
+ // MSVC permits the use of pure specifier (=0) on function definition,
+ // defined at class scope, warn about this non-standard construct.
+ if (getLangOpts().MicrosoftExt && FD->isPure() && !FD->isOutOfLine())
+ Diag(FD->getLocation(), diag::ext_pure_function_definition);
+
+ if (!FD->isInvalidDecl()) {
+ // Don't diagnose unused parameters of defaulted or deleted functions.
+ if (!FD->isDeleted() && !FD->isDefaulted() && !FD->hasSkippedBody())
+ DiagnoseUnusedParameters(FD->parameters());
+ DiagnoseSizeOfParametersAndReturnValue(FD->parameters(),
+ FD->getReturnType(), FD);
+
+ // If this is a structor, we need a vtable.
+ if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(FD))
+ MarkVTableUsed(FD->getLocation(), Constructor->getParent());
+ else if (CXXDestructorDecl *Destructor =
+ dyn_cast<CXXDestructorDecl>(FD))
+ MarkVTableUsed(FD->getLocation(), Destructor->getParent());
+
+ // Try to apply the named return value optimization. We have to check
+ // if we can do this here because lambdas keep return statements around
+ // to deduce an implicit return type.
+ if (FD->getReturnType()->isRecordType() &&
+ (!getLangOpts().CPlusPlus || !FD->isDependentContext()))
+ computeNRVO(Body, FSI);
}
- }
- // If the function implicitly returns zero (like 'main') or is naked,
- // don't complain about missing return statements.
- if (FD->hasImplicitReturnZero() || FD->hasAttr<NakedAttr>())
- WP.disableCheckFallThrough();
-
- // MSVC permits the use of pure specifier (=0) on function definition,
- // defined at class scope, warn about this non-standard construct.
- if (getLangOpts().MicrosoftExt && FD->isPure() && !FD->isOutOfLine())
- Diag(FD->getLocation(), diag::ext_pure_function_definition);
-
- if (!FD->isInvalidDecl()) {
- // Don't diagnose unused parameters of defaulted or deleted functions.
- if (!FD->isDeleted() && !FD->isDefaulted() && !FD->hasSkippedBody())
- DiagnoseUnusedParameters(FD->parameters());
- DiagnoseSizeOfParametersAndReturnValue(FD->parameters(),
- FD->getReturnType(), FD);
-
- // If this is a structor, we need a vtable.
- if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(FD))
- MarkVTableUsed(FD->getLocation(), Constructor->getParent());
- else if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(FD))
- MarkVTableUsed(FD->getLocation(), Destructor->getParent());
-
- // Try to apply the named return value optimization. We have to check
- // if we can do this here because lambdas keep return statements around
- // to deduce an implicit return type.
- if (FD->getReturnType()->isRecordType() &&
- (!getLangOpts().CPlusPlus || !FD->isDependentContext()))
- computeNRVO(Body, FSI);
- }
-
- // GNU warning -Wmissing-prototypes:
- // Warn if a global function is defined without a previous
- // prototype declaration. This warning is issued even if the
- // definition itself provides a prototype. The aim is to detect
- // global functions that fail to be declared in header files.
- const FunctionDecl *PossiblePrototype = nullptr;
- if (ShouldWarnAboutMissingPrototype(FD, PossiblePrototype)) {
- Diag(FD->getLocation(), diag::warn_missing_prototype) << FD;
-
- if (PossiblePrototype) {
- // We found a declaration that is not a prototype,
- // but that could be a zero-parameter prototype
- if (TypeSourceInfo *TI = PossiblePrototype->getTypeSourceInfo()) {
- TypeLoc TL = TI->getTypeLoc();
- if (FunctionNoProtoTypeLoc FTL = TL.getAs<FunctionNoProtoTypeLoc>())
- Diag(PossiblePrototype->getLocation(),
- diag::note_declaration_not_a_prototype)
- << (FD->getNumParams() != 0)
- << (FD->getNumParams() == 0
- ? FixItHint::CreateInsertion(FTL.getRParenLoc(), "void")
- : FixItHint{});
- }
- } else {
- // Returns true if the token beginning at this Loc is `const`.
- auto isLocAtConst = [&](SourceLocation Loc, const SourceManager &SM,
- const LangOptions &LangOpts) {
- std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
- if (LocInfo.first.isInvalid())
- return false;
+ // GNU warning -Wmissing-prototypes:
+ // Warn if a global function is defined without a previous
+ // prototype declaration. This warning is issued even if the
+ // definition itself provides a prototype. The aim is to detect
+ // global functions that fail to be declared in header files.
+ const FunctionDecl *PossiblePrototype = nullptr;
+ if (ShouldWarnAboutMissingPrototype(FD, PossiblePrototype)) {
+ Diag(FD->getLocation(), diag::warn_missing_prototype) << FD;
+
+ if (PossiblePrototype) {
+ // We found a declaration that is not a prototype,
+ // but that could be a zero-parameter prototype
+ if (TypeSourceInfo *TI = PossiblePrototype->getTypeSourceInfo()) {
+ TypeLoc TL = TI->getTypeLoc();
+ if (FunctionNoProtoTypeLoc FTL = TL.getAs<FunctionNoProtoTypeLoc>())
+ Diag(PossiblePrototype->getLocation(),
+ diag::note_declaration_not_a_prototype)
+ << (FD->getNumParams() != 0)
+ << (FD->getNumParams() == 0 ? FixItHint::CreateInsertion(
+ FTL.getRParenLoc(), "void")
+ : FixItHint{});
+ }
+ } else {
+ // Returns true if the token beginning at this Loc is `const`.
+ auto isLocAtConst = [&](SourceLocation Loc, const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+ if (LocInfo.first.isInvalid())
+ return false;
- bool Invalid = false;
- StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
- if (Invalid)
- return false;
+ bool Invalid = false;
+ StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+ if (Invalid)
+ return false;
- if (LocInfo.second > Buffer.size())
- return false;
+ if (LocInfo.second > Buffer.size())
+ return false;
- const char *LexStart = Buffer.data() + LocInfo.second;
- StringRef StartTok(LexStart, Buffer.size() - LocInfo.second);
+ const char *LexStart = Buffer.data() + LocInfo.second;
+ StringRef StartTok(LexStart, Buffer.size() - LocInfo.second);
- return StartTok.consume_front("const") &&
- (StartTok.empty() || isWhitespace(StartTok[0]) ||
- StartTok.startswith("/*") || StartTok.startswith("//"));
- };
+ return StartTok.consume_front("const") &&
+ (StartTok.empty() || isWhitespace(StartTok[0]) ||
+ StartTok.startswith("/*") || StartTok.startswith("//"));
+ };
- auto findBeginLoc = [&]() {
- // If the return type has `const` qualifier, we want to insert
- // `static` before `const` (and not before the typename).
- if ((FD->getReturnType()->isAnyPointerType() &&
- FD->getReturnType()->getPointeeType().isConstQualified()) ||
- FD->getReturnType().isConstQualified()) {
- // But only do this if we can determine where the `const` is.
+ auto findBeginLoc = [&]() {
+ // If the return type has `const` qualifier, we want to insert
+ // `static` before `const` (and not before the typename).
+ if ((FD->getReturnType()->isAnyPointerType() &&
+ FD->getReturnType()->getPointeeType().isConstQualified()) ||
+ FD->getReturnType().isConstQualified()) {
+ // But only do this if we can determine where the `const` is.
- if (isLocAtConst(FD->getBeginLoc(), getSourceManager(),
- getLangOpts()))
+ if (isLocAtConst(FD->getBeginLoc(), getSourceManager(),
+ getLangOpts()))
- return FD->getBeginLoc();
- }
- return FD->getTypeSpecStartLoc();
- };
- Diag(FD->getTypeSpecStartLoc(), diag::note_static_for_internal_linkage)
- << /* function */ 1
- << (FD->getStorageClass() == SC_None
- ? FixItHint::CreateInsertion(findBeginLoc(), "static ")
- : FixItHint{});
- }
+ return FD->getBeginLoc();
+ }
+ return FD->getTypeSpecStartLoc();
+ };
+ Diag(FD->getTypeSpecStartLoc(),
+ diag::note_static_for_internal_linkage)
+ << /* function */ 1
+ << (FD->getStorageClass() == SC_None
+ ? FixItHint::CreateInsertion(findBeginLoc(), "static ")
+ : FixItHint{});
+ }
- // GNU warning -Wstrict-prototypes
- // Warn if K&R function is defined without a previous declaration.
- // This warning is issued only if the definition itself does not provide
- // a prototype. Only K&R definitions do not provide a prototype.
- if (!FD->hasWrittenPrototype()) {
- TypeSourceInfo *TI = FD->getTypeSourceInfo();
- TypeLoc TL = TI->getTypeLoc();
- FunctionTypeLoc FTL = TL.getAsAdjusted<FunctionTypeLoc>();
- Diag(FTL.getLParenLoc(), diag::warn_strict_prototypes) << 2;
+ // GNU warning -Wstrict-prototypes
+ // Warn if K&R function is defined without a previous declaration.
+ // This warning is issued only if the definition itself does not
+ // provide a prototype. Only K&R definitions do not provide a
+ // prototype.
+ if (!FD->hasWrittenPrototype()) {
+ TypeSourceInfo *TI = FD->getTypeSourceInfo();
+ TypeLoc TL = TI->getTypeLoc();
+ FunctionTypeLoc FTL = TL.getAsAdjusted<FunctionTypeLoc>();
+ Diag(FTL.getLParenLoc(), diag::warn_strict_prototypes) << 2;
+ }
}
- }
- // Warn on CPUDispatch with an actual body.
- if (FD->isMultiVersion() && FD->hasAttr<CPUDispatchAttr>() && Body)
- if (const auto *CmpndBody = dyn_cast<CompoundStmt>(Body))
- if (!CmpndBody->body_empty())
- Diag(CmpndBody->body_front()->getBeginLoc(),
- diag::warn_dispatch_body_ignored);
-
- if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
- const CXXMethodDecl *KeyFunction;
- if (MD->isOutOfLine() && (MD = MD->getCanonicalDecl()) &&
- MD->isVirtual() &&
- (KeyFunction = Context.getCurrentKeyFunction(MD->getParent())) &&
- MD == KeyFunction->getCanonicalDecl()) {
- // Update the key-function state if necessary for this ABI.
- if (FD->isInlined() &&
- !Context.getTargetInfo().getCXXABI().canKeyFunctionBeInline()) {
- Context.setNonKeyFunction(MD);
-
- // If the newly-chosen key function is already defined, then we
- // need to mark the vtable as used retroactively.
- KeyFunction = Context.getCurrentKeyFunction(MD->getParent());
- const FunctionDecl *Definition;
- if (KeyFunction && KeyFunction->isDefined(Definition))
- MarkVTableUsed(Definition->getLocation(), MD->getParent(), true);
- } else {
- // We just defined they key function; mark the vtable as used.
- MarkVTableUsed(FD->getLocation(), MD->getParent(), true);
+ // Warn on CPUDispatch with an actual body.
+ if (FD->isMultiVersion() && FD->hasAttr<CPUDispatchAttr>() && Body)
+ if (const auto *CmpndBody = dyn_cast<CompoundStmt>(Body))
+ if (!CmpndBody->body_empty())
+ Diag(CmpndBody->body_front()->getBeginLoc(),
+ diag::warn_dispatch_body_ignored);
+
+ if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
+ const CXXMethodDecl *KeyFunction;
+ if (MD->isOutOfLine() && (MD = MD->getCanonicalDecl()) &&
+ MD->isVirtual() &&
+ (KeyFunction = Context.getCurrentKeyFunction(MD->getParent())) &&
+ MD == KeyFunction->getCanonicalDecl()) {
+ // Update the key-function state if necessary for this ABI.
+ if (FD->isInlined() &&
+ !Context.getTargetInfo().getCXXABI().canKeyFunctionBeInline()) {
+ Context.setNonKeyFunction(MD);
+
+ // If the newly-chosen key function is already defined, then we
+ // need to mark the vtable as used retroactively.
+ KeyFunction = Context.getCurrentKeyFunction(MD->getParent());
+ const FunctionDecl *Definition;
+ if (KeyFunction && KeyFunction->isDefined(Definition))
+ MarkVTableUsed(Definition->getLocation(), MD->getParent(), true);
+ } else {
+ // We just defined they key function; mark the vtable as used.
+ MarkVTableUsed(FD->getLocation(), MD->getParent(), true);
+ }
}
}
- }
- assert((FD == getCurFunctionDecl() || getCurLambda()->CallOperator == FD) &&
- "Function parsing confused");
- } else if (ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(dcl)) {
- assert(MD == getCurMethodDecl() && "Method parsing confused");
- MD->setBody(Body);
- if (!MD->isInvalidDecl()) {
- DiagnoseSizeOfParametersAndReturnValue(MD->parameters(),
- MD->getReturnType(), MD);
-
- if (Body)
- computeNRVO(Body, FSI);
- }
- if (FSI->ObjCShouldCallSuper) {
- Diag(MD->getEndLoc(), diag::warn_objc_missing_super_call)
- << MD->getSelector().getAsString();
- FSI->ObjCShouldCallSuper = false;
- }
- if (FSI->ObjCWarnForNoDesignatedInitChain) {
- const ObjCMethodDecl *InitMethod = nullptr;
- bool isDesignated =
- MD->isDesignatedInitializerForTheInterface(&InitMethod);
- assert(isDesignated && InitMethod);
- (void)isDesignated;
-
- auto superIsNSObject = [&](const ObjCMethodDecl *MD) {
- auto IFace = MD->getClassInterface();
- if (!IFace)
- return false;
- auto SuperD = IFace->getSuperClass();
- if (!SuperD)
- return false;
- return SuperD->getIdentifier() ==
- NSAPIObj->getNSClassId(NSAPI::ClassId_NSObject);
- };
- // Don't issue this warning for unavailable inits or direct subclasses
- // of NSObject.
- if (!MD->isUnavailable() && !superIsNSObject(MD)) {
- Diag(MD->getLocation(),
- diag::warn_objc_designated_init_missing_super_call);
- Diag(InitMethod->getLocation(),
- diag::note_objc_designated_init_marked_here);
+ assert(
+ (FD == getCurFunctionDecl() || getCurLambda()->CallOperator == FD) &&
+ "Function parsing confused");
+ } else if (ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(dcl)) {
+ assert(MD == getCurMethodDecl() && "Method parsing confused");
+ MD->setBody(Body);
+ if (!MD->isInvalidDecl()) {
+ DiagnoseSizeOfParametersAndReturnValue(MD->parameters(),
+ MD->getReturnType(), MD);
+
+ if (Body)
+ computeNRVO(Body, FSI);
+ }
+ if (FSI->ObjCShouldCallSuper) {
+ Diag(MD->getEndLoc(), diag::warn_objc_missing_super_call)
+ << MD->getSelector().getAsString();
+ FSI->ObjCShouldCallSuper = false;
+ }
+ if (FSI->ObjCWarnForNoDesignatedInitChain) {
+ const ObjCMethodDecl *InitMethod = nullptr;
+ bool isDesignated =
+ MD->isDesignatedInitializerForTheInterface(&InitMethod);
+ assert(isDesignated && InitMethod);
+ (void)isDesignated;
+
+ auto superIsNSObject = [&](const ObjCMethodDecl *MD) {
+ auto IFace = MD->getClassInterface();
+ if (!IFace)
+ return false;
+ auto SuperD = IFace->getSuperClass();
+ if (!SuperD)
+ return false;
+ return SuperD->getIdentifier() ==
+ NSAPIObj->getNSClassId(NSAPI::ClassId_NSObject);
+ };
+ // Don't issue this warning for unavailable inits or direct subclasses
+ // of NSObject.
+ if (!MD->isUnavailable() && !superIsNSObject(MD)) {
+ Diag(MD->getLocation(),
+ diag::warn_objc_designated_init_missing_super_call);
+ Diag(InitMethod->getLocation(),
+ diag::note_objc_designated_init_marked_here);
+ }
+ FSI->ObjCWarnForNoDesignatedInitChain = false;
+ }
+ if (FSI->ObjCWarnForNoInitDelegation) {
+ // Don't issue this warning for unavaialable inits.
+ if (!MD->isUnavailable())
+ Diag(MD->getLocation(),
+ diag::warn_objc_secondary_init_missing_init_call);
+ FSI->ObjCWarnForNoInitDelegation = false;
}
- FSI->ObjCWarnForNoDesignatedInitChain = false;
- }
- if (FSI->ObjCWarnForNoInitDelegation) {
- // Don't issue this warning for unavaialable inits.
- if (!MD->isUnavailable())
- Diag(MD->getLocation(),
- diag::warn_objc_secondary_init_missing_init_call);
- FSI->ObjCWarnForNoInitDelegation = false;
- }
- diagnoseImplicitlyRetainedSelf(*this);
- } else {
- // Parsing the function declaration failed in some way. Pop the fake scope
- // we pushed on.
- PopFunctionScopeInfo(ActivePolicy, dcl);
- return nullptr;
- }
+ diagnoseImplicitlyRetainedSelf(*this);
+ } else {
+ // Parsing the function declaration failed in some way. Pop the fake scope
+ // we pushed on.
+ PopFunctionScopeInfo(ActivePolicy, dcl);
+ return nullptr;
+ }
- if (Body && FSI->HasPotentialAvailabilityViolations)
- DiagnoseUnguardedAvailabilityViolations(dcl);
+ if (Body && FSI->HasPotentialAvailabilityViolations)
+ DiagnoseUnguardedAvailabilityViolations(dcl);
- assert(!FSI->ObjCShouldCallSuper &&
- "This should only be set for ObjC methods, which should have been "
- "handled in the block above.");
+ assert(!FSI->ObjCShouldCallSuper &&
+ "This should only be set for ObjC methods, which should have been "
+ "handled in the block above.");
- // Verify and clean out per-function state.
- if (Body && (!FD || !FD->isDefaulted())) {
- // C++ constructors that have function-try-blocks can't have return
- // statements in the handlers of that block. (C++ [except.handle]p14)
- // Verify this.
- if (FD && isa<CXXConstructorDecl>(FD) && isa<CXXTryStmt>(Body))
- DiagnoseReturnInConstructorExceptionHandler(cast<CXXTryStmt>(Body));
+ // Verify and clean out per-function state.
+ if (Body && (!FD || !FD->isDefaulted())) {
+ // C++ constructors that have function-try-blocks can't have return
+ // statements in the handlers of that block. (C++ [except.handle]p14)
+ // Verify this.
+ if (FD && isa<CXXConstructorDecl>(FD) && isa<CXXTryStmt>(Body))
+ DiagnoseReturnInConstructorExceptionHandler(cast<CXXTryStmt>(Body));
- // Verify that gotos and switch cases don't jump into scopes illegally.
- if (FSI->NeedsScopeChecking() &&
- !PP.isCodeCompletionEnabled())
- DiagnoseInvalidJumps(Body);
+ // Verify that gotos and switch cases don't jump into scopes illegally.
+ if (FSI->NeedsScopeChecking() && !PP.isCodeCompletionEnabled())
+ DiagnoseInvalidJumps(Body);
- if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(dcl)) {
- if (!Destructor->getParent()->isDependentType())
- CheckDestructor(Destructor);
+ if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(dcl)) {
+ if (!Destructor->getParent()->isDependentType())
+ CheckDestructor(Destructor);
- MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
- Destructor->getParent());
- }
+ MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(),
+ Destructor->getParent());
+ }
- // If any errors have occurred, clear out any temporaries that may have
- // been leftover. This ensures that these temporaries won't be picked up for
- // deletion in some later function.
- if (hasUncompilableErrorOccurred() ||
- getDiagnostics().getSuppressAllDiagnostics()) {
- DiscardCleanupsInEvaluationContext();
- }
- if (!hasUncompilableErrorOccurred() &&
- !isa<FunctionTemplateDecl>(dcl)) {
- // Since the body is valid, issue any analysis-based warnings that are
- // enabled.
- ActivePolicy = &WP;
- }
+ // If any errors have occurred, clear out any temporaries that may have
+ // been leftover. This ensures that these temporaries won't be picked up
+ // for deletion in some later function.
+ if (hasUncompilableErrorOccurred() ||
+ getDiagnostics().getSuppressAllDiagnostics()) {
+ DiscardCleanupsInEvaluationContext();
+ }
+ if (!hasUncompilableErrorOccurred() && !isa<FunctionTemplateDecl>(dcl)) {
+ // Since the body is valid, issue any analysis-based warnings that are
+ // enabled.
+ ActivePolicy = &WP;
+ }
- if (!IsInstantiation && FD && FD->isConstexpr() && !FD->isInvalidDecl() &&
- !CheckConstexprFunctionDefinition(FD, CheckConstexprKind::Diagnose))
- FD->setInvalidDecl();
+ if (!IsInstantiation && FD && FD->isConstexpr() && !FD->isInvalidDecl() &&
+ !CheckConstexprFunctionDefinition(FD, CheckConstexprKind::Diagnose))
+ FD->setInvalidDecl();
- if (FD && FD->hasAttr<NakedAttr>()) {
- for (const Stmt *S : Body->children()) {
- // Allow local register variables without initializer as they don't
- // require prologue.
- bool RegisterVariables = false;
- if (auto *DS = dyn_cast<DeclStmt>(S)) {
- for (const auto *Decl : DS->decls()) {
- if (const auto *Var = dyn_cast<VarDecl>(Decl)) {
- RegisterVariables =
- Var->hasAttr<AsmLabelAttr>() && !Var->hasInit();
- if (!RegisterVariables)
- break;
+ if (FD && FD->hasAttr<NakedAttr>()) {
+ for (const Stmt *S : Body->children()) {
+ // Allow local register variables without initializer as they don't
+ // require prologue.
+ bool RegisterVariables = false;
+ if (auto *DS = dyn_cast<DeclStmt>(S)) {
+ for (const auto *Decl : DS->decls()) {
+ if (const auto *Var = dyn_cast<VarDecl>(Decl)) {
+ RegisterVariables =
+ Var->hasAttr<AsmLabelAttr>() && !Var->hasInit();
+ if (!RegisterVariables)
+ break;
+ }
}
}
- }
- if (RegisterVariables)
- continue;
- if (!isa<AsmStmt>(S) && !isa<NullStmt>(S)) {
- Diag(S->getBeginLoc(), diag::err_non_asm_stmt_in_naked_function);
- Diag(FD->getAttr<NakedAttr>()->getLocation(), diag::note_attribute);
- FD->setInvalidDecl();
- break;
+ if (RegisterVariables)
+ continue;
+ if (!isa<AsmStmt>(S) && !isa<NullStmt>(S)) {
+ Diag(S->getBeginLoc(), diag::err_non_asm_stmt_in_naked_function);
+ Diag(FD->getAttr<NakedAttr>()->getLocation(), diag::note_attribute);
+ FD->setInvalidDecl();
+ break;
+ }
}
}
- }
- assert(ExprCleanupObjects.size() ==
- ExprEvalContexts.back().NumCleanupObjects &&
- "Leftover temporaries in function");
- assert(!Cleanup.exprNeedsCleanups() && "Unaccounted cleanups in function");
- assert(MaybeODRUseExprs.empty() &&
- "Leftover expressions for odr-use checking");
- }
+ assert(ExprCleanupObjects.size() ==
+ ExprEvalContexts.back().NumCleanupObjects &&
+ "Leftover temporaries in function");
+ assert(!Cleanup.exprNeedsCleanups() &&
+ "Unaccounted cleanups in function");
+ assert(MaybeODRUseExprs.empty() &&
+ "Leftover expressions for odr-use checking");
+ }
+ } // Pops the ExitFunctionBodyRAII scope, which needs to happen before we pop
+ // the declaration context below. Otherwise, we're unable to transform
+ // 'this' expressions when transforming immediate context functions.
if (!IsInstantiation)
PopDeclContext();
@@ -14825,13 +14852,18 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
DiscardCleanupsInEvaluationContext();
}
- if (FD && (LangOpts.OpenMP || LangOpts.CUDA || LangOpts.SYCLIsDevice)) {
+ if (FD && ((LangOpts.OpenMP && (LangOpts.OpenMPIsDevice ||
+ !LangOpts.OMPTargetTriples.empty())) ||
+ LangOpts.CUDA || LangOpts.SYCLIsDevice)) {
auto ES = getEmissionStatus(FD);
if (ES == Sema::FunctionEmissionStatus::Emitted ||
ES == Sema::FunctionEmissionStatus::Unknown)
DeclsToCheckForDeferredDiags.insert(FD);
}
+ if (FD && !FD->isDeleted())
+ checkTypeSupport(FD->getType(), FD->getLocation(), FD);
+
return dcl;
}
@@ -15132,6 +15164,34 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
else
FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation()));
}
+
+ // Add known guaranteed alignment for allocation functions.
+ switch (BuiltinID) {
+ case Builtin::BIaligned_alloc:
+ if (!FD->hasAttr<AllocAlignAttr>())
+ FD->addAttr(AllocAlignAttr::CreateImplicit(Context, ParamIdx(1, FD),
+ FD->getLocation()));
+ LLVM_FALLTHROUGH;
+ case Builtin::BIcalloc:
+ case Builtin::BImalloc:
+ case Builtin::BImemalign:
+ case Builtin::BIrealloc:
+ case Builtin::BIstrdup:
+ case Builtin::BIstrndup: {
+ if (!FD->hasAttr<AssumeAlignedAttr>()) {
+ unsigned NewAlign = Context.getTargetInfo().getNewAlign() /
+ Context.getTargetInfo().getCharWidth();
+ IntegerLiteral *Alignment = IntegerLiteral::Create(
+ Context, Context.MakeIntValue(NewAlign, Context.UnsignedIntTy),
+ Context.UnsignedIntTy, FD->getLocation());
+ FD->addAttr(AssumeAlignedAttr::CreateImplicit(
+ Context, Alignment, /*Offset=*/nullptr, FD->getLocation()));
+ }
+ break;
+ }
+ default:
+ break;
+ }
}
AddKnownFunctionAttributesForReplaceableGlobalAllocationFunction(FD);
@@ -16569,6 +16629,23 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD,
// Notify the consumer that we've defined a tag.
if (!Tag->isInvalidDecl())
Consumer.HandleTagDeclDefinition(Tag);
+
+ // Clangs implementation of #pragma align(packed) differs in bitfield layout
+ // from XLs and instead matches the XL #pragma pack(1) behavior.
+ if (Context.getTargetInfo().getTriple().isOSAIX() &&
+ AlignPackStack.hasValue()) {
+ AlignPackInfo APInfo = AlignPackStack.CurrentValue;
+ // Only diagnose #pragma align(packed).
+ if (!APInfo.IsAlignAttr() || APInfo.getAlignMode() != AlignPackInfo::Packed)
+ return;
+ const RecordDecl *RD = dyn_cast<RecordDecl>(Tag);
+ if (!RD)
+ return;
+ // Only warn if there is at least 1 bitfield member.
+ if (llvm::any_of(RD->fields(),
+ [](const FieldDecl *FD) { return FD->isBitField(); }))
+ Diag(BraceRange.getBegin(), diag::warn_pragma_align_not_xl_compatible);
+ }
}
void Sema::ActOnObjCContainerFinishDefinition() {
@@ -17743,7 +17820,8 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum,
Val = DefaultLvalueConversion(Val).get();
if (Val) {
- if (Enum->isDependentType() || Val->isTypeDependent())
+ if (Enum->isDependentType() || Val->isTypeDependent() ||
+ Val->containsErrors())
EltTy = Context.DependentTy;
else {
// FIXME: We don't allow folding in C++11 mode for an enum with a fixed
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index bb4ce8d4962e..ef889a36bd55 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -216,7 +216,7 @@ static bool checkUInt32Argument(Sema &S, const AttrInfo &AI, const Expr *Expr,
uint32_t &Val, unsigned Idx = UINT_MAX,
bool StrictlyUnsigned = false) {
Optional<llvm::APSInt> I = llvm::APSInt(32);
- if (Expr->isTypeDependent() || Expr->isValueDependent() ||
+ if (Expr->isTypeDependent() ||
!(I = Expr->getIntegerConstantExpr(S.Context))) {
if (Idx != UINT_MAX)
S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
@@ -308,7 +308,7 @@ static bool checkFunctionOrMethodParameterIndex(
(HP ? getFunctionOrMethodNumParams(D) : 0) + HasImplicitThisParam;
Optional<llvm::APSInt> IdxInt;
- if (IdxExpr->isTypeDependent() || IdxExpr->isValueDependent() ||
+ if (IdxExpr->isTypeDependent() ||
!(IdxInt = IdxExpr->getIntegerConstantExpr(S.Context))) {
S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
<< &AI << AttrArgNum << AANT_ArgumentIntegerConstant
@@ -947,6 +947,14 @@ static void handleEnableIfAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) EnableIfAttr(S.Context, AL, Cond, Msg));
}
+static void handleErrorAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ StringRef NewUserDiagnostic;
+ if (!S.checkStringLiteralArgumentAttr(AL, 0, NewUserDiagnostic))
+ return;
+ if (ErrorAttr *EA = S.mergeErrorAttr(D, AL, NewUserDiagnostic))
+ D->addAttr(EA);
+}
+
namespace {
/// Determines if a given Expr references any of the given function's
/// ParmVarDecls, or the function's implicit `this` parameter (if applicable).
@@ -1788,7 +1796,7 @@ static void handleOwnershipAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
I->getOwnKind() == OwnershipAttr::Returns) {
// A returns attribute conflicts with any other returns attribute using
// a different index.
- if (std::find(I->args_begin(), I->args_end(), Idx) == I->args_end()) {
+ if (!llvm::is_contained(I->args(), Idx)) {
S.Diag(I->getLocation(), diag::err_ownership_returns_index_mismatch)
<< I->args_begin()->getSourceIndex();
if (I->args_size())
@@ -2488,6 +2496,15 @@ static void handleAvailabilityAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
}
+ if (II->isStr("fuchsia")) {
+ Optional<unsigned> Min, Sub;
+ if ((Min = Introduced.Version.getMinor()) ||
+ (Sub = Introduced.Version.getSubminor())) {
+ S.Diag(AL.getLoc(), diag::warn_availability_fuchsia_unavailable_minor);
+ return;
+ }
+ }
+
int PriorityModifier = AL.isPragmaClangAttribute()
? Sema::AP_PragmaClangAttribute
: Sema::AP_Explicit;
@@ -2843,8 +2860,7 @@ static void handleSentinelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (AL.getNumArgs() > 0) {
Expr *E = AL.getArgAsExpr(0);
Optional<llvm::APSInt> Idx = llvm::APSInt(32);
- if (E->isTypeDependent() || E->isValueDependent() ||
- !(Idx = E->getIntegerConstantExpr(S.Context))) {
+ if (E->isTypeDependent() || !(Idx = E->getIntegerConstantExpr(S.Context))) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
<< AL << 1 << AANT_ArgumentIntegerConstant << E->getSourceRange();
return;
@@ -2863,8 +2879,7 @@ static void handleSentinelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (AL.getNumArgs() > 1) {
Expr *E = AL.getArgAsExpr(1);
Optional<llvm::APSInt> Idx = llvm::APSInt(32);
- if (E->isTypeDependent() || E->isValueDependent() ||
- !(Idx = E->getIntegerConstantExpr(S.Context))) {
+ if (E->isTypeDependent() || !(Idx = E->getIntegerConstantExpr(S.Context))) {
S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
<< AL << 2 << AANT_ArgumentIntegerConstant << E->getSourceRange();
return;
@@ -3196,13 +3211,13 @@ static void handleCodeSegAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
enum FirstParam { Unsupported, Duplicate, Unknown };
enum SecondParam { None, Architecture, Tune };
- if (AttrStr.find("fpmath=") != StringRef::npos)
+ if (AttrStr.contains("fpmath="))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << "fpmath=";
// Diagnose use of tune if target doesn't support it.
if (!Context.getTargetInfo().supportsTargetAttributeTune() &&
- AttrStr.find("tune=") != StringRef::npos)
+ AttrStr.contains("tune="))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
<< Unsupported << None << "tune=";
@@ -3373,6 +3388,13 @@ static void handleFormatArgAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
return;
}
Ty = getFunctionOrMethodResultType(D);
+ // replace instancetype with the class type
+ auto Instancetype = S.Context.getObjCInstanceTypeDecl()->getTypeForDecl();
+ if (Ty->getAs<TypedefType>() == Instancetype)
+ if (auto *OMD = dyn_cast<ObjCMethodDecl>(D))
+ if (auto *Interface = OMD->getClassInterface())
+ Ty = S.Context.getObjCObjectPointerType(
+ QualType(Interface->getTypeForDecl(), 0));
if (!isNSStringType(Ty, S.Context, /*AllowNSAttributedString=*/true) &&
!isCFStringType(Ty, S.Context) &&
(!Ty->isPointerType() ||
@@ -3458,6 +3480,29 @@ static void handleInitPriorityAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) InitPriorityAttr(S.Context, AL, prioritynum));
}
+ErrorAttr *Sema::mergeErrorAttr(Decl *D, const AttributeCommonInfo &CI,
+ StringRef NewUserDiagnostic) {
+ if (const auto *EA = D->getAttr<ErrorAttr>()) {
+ std::string NewAttr = CI.getNormalizedFullName();
+ assert((NewAttr == "error" || NewAttr == "warning") &&
+ "unexpected normalized full name");
+ bool Match = (EA->isError() && NewAttr == "error") ||
+ (EA->isWarning() && NewAttr == "warning");
+ if (!Match) {
+ Diag(EA->getLocation(), diag::err_attributes_are_not_compatible)
+ << CI << EA;
+ Diag(CI.getLoc(), diag::note_conflicting_attribute);
+ return nullptr;
+ }
+ if (EA->getUserDiagnostic() != NewUserDiagnostic) {
+ Diag(CI.getLoc(), diag::warn_duplicate_attribute) << EA;
+ Diag(EA->getLoc(), diag::note_previous_attribute);
+ }
+ D->dropAttr<ErrorAttr>();
+ }
+ return ::new (Context) ErrorAttr(Context, CI, NewUserDiagnostic);
+}
+
FormatAttr *Sema::mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
IdentifierInfo *Format, int FormatIdx,
int FirstArg) {
@@ -4051,21 +4096,21 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
}
}
- unsigned MaximumAlignment = Sema::MaximumAlignment;
+ uint64_t MaximumAlignment = Sema::MaximumAlignment;
if (Context.getTargetInfo().getTriple().isOSBinFormatCOFF())
- MaximumAlignment = std::min(MaximumAlignment, 8192u);
+ MaximumAlignment = std::min(MaximumAlignment, uint64_t(8192));
if (AlignVal > MaximumAlignment) {
Diag(AttrLoc, diag::err_attribute_aligned_too_great)
<< MaximumAlignment << E->getSourceRange();
return;
}
- if (Context.getTargetInfo().isTLSSupported()) {
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (VD && Context.getTargetInfo().isTLSSupported()) {
unsigned MaxTLSAlign =
Context.toCharUnitsFromBits(Context.getTargetInfo().getMaxTLSAlign())
.getQuantity();
- const auto *VD = dyn_cast<VarDecl>(D);
- if (MaxTLSAlign && AlignVal > MaxTLSAlign && VD &&
+ if (MaxTLSAlign && AlignVal > MaxTLSAlign &&
VD->getTLSKind() != VarDecl::TLS_None) {
Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum)
<< (unsigned)AlignVal << VD << MaxTLSAlign;
@@ -4073,6 +4118,17 @@ void Sema::AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
}
}
+ // On AIX, an aligned attribute can not decrease the alignment when applied
+ // to a variable declaration with vector type.
+ if (VD && Context.getTargetInfo().getTriple().isOSAIX()) {
+ const Type *Ty = VD->getType().getTypePtr();
+ if (Ty->isVectorType() && AlignVal < 16) {
+ Diag(VD->getLocation(), diag::warn_aligned_attr_underaligned)
+ << VD->getType() << 16;
+ return;
+ }
+ }
+
AlignedAttr *AA = ::new (Context) AlignedAttr(Context, CI, true, ICE.get());
AA->setPackExpansion(IsPackExpansion);
D->addAttr(AA);
@@ -4161,9 +4217,10 @@ bool Sema::checkMSInheritanceAttrOnDefinition(
/// attribute.
static void parseModeAttrArg(Sema &S, StringRef Str, unsigned &DestWidth,
bool &IntegerMode, bool &ComplexMode,
- bool &ExplicitIEEE) {
+ FloatModeKind &ExplicitType) {
IntegerMode = true;
ComplexMode = false;
+ ExplicitType = FloatModeKind::NoFloat;
switch (Str.size()) {
case 2:
switch (Str[0]) {
@@ -4183,13 +4240,17 @@ static void parseModeAttrArg(Sema &S, StringRef Str, unsigned &DestWidth,
DestWidth = 96;
break;
case 'K': // KFmode - IEEE quad precision (__float128)
- ExplicitIEEE = true;
+ ExplicitType = FloatModeKind::Float128;
DestWidth = Str[1] == 'I' ? 0 : 128;
break;
case 'T':
- ExplicitIEEE = false;
+ ExplicitType = FloatModeKind::LongDouble;
DestWidth = 128;
break;
+ case 'I':
+ ExplicitType = FloatModeKind::Ibm128;
+ DestWidth = Str[1] == 'I' ? 0 : 128;
+ break;
}
if (Str[1] == 'F') {
IntegerMode = false;
@@ -4248,7 +4309,7 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
unsigned DestWidth = 0;
bool IntegerMode = true;
bool ComplexMode = false;
- bool ExplicitIEEE = false;
+ FloatModeKind ExplicitType = FloatModeKind::NoFloat;
llvm::APInt VectorSize(64, 0);
if (Str.size() >= 4 && Str[0] == 'V') {
// Minimal length of vector mode is 4: 'V' + NUMBER(>=1) + TYPE(>=2).
@@ -4261,7 +4322,7 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
!Str.substr(1, VectorStringLength).getAsInteger(10, VectorSize) &&
VectorSize.isPowerOf2()) {
parseModeAttrArg(*this, Str.substr(VectorStringLength + 1), DestWidth,
- IntegerMode, ComplexMode, ExplicitIEEE);
+ IntegerMode, ComplexMode, ExplicitType);
// Avoid duplicate warning from template instantiation.
if (!InInstantiation)
Diag(AttrLoc, diag::warn_vector_mode_deprecated);
@@ -4272,7 +4333,7 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
if (!VectorSize)
parseModeAttrArg(*this, Str, DestWidth, IntegerMode, ComplexMode,
- ExplicitIEEE);
+ ExplicitType);
// FIXME: Sync this with InitializePredefinedMacros; we need to match int8_t
// and friends, at least with glibc.
@@ -4338,7 +4399,7 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
NewElemTy = Context.getIntTypeForBitwidth(DestWidth,
OldElemTy->isSignedIntegerType());
else
- NewElemTy = Context.getRealTypeForBitwidth(DestWidth, ExplicitIEEE);
+ NewElemTy = Context.getRealTypeForBitwidth(DestWidth, ExplicitType);
if (NewElemTy.isNull()) {
Diag(AttrLoc, diag::err_machine_mode) << 1 /*Unsupported*/ << Name;
@@ -4733,7 +4794,7 @@ static void handleLifetimeCategoryAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
// To check if earlier decl attributes do not conflict the newly parsed ones
- // we always add (and check) the attribute to the cannonical decl. We need
+ // we always add (and check) the attribute to the canonical decl. We need
// to repeat the check for attribute mutual exclusion because we're attaching
// all of the attributes to the canonical declaration rather than the current
// declaration.
@@ -5284,8 +5345,8 @@ static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
static bool RISCVAliasValid(unsigned BuiltinID, StringRef AliasName) {
- return BuiltinID >= Builtin::FirstTSBuiltin &&
- BuiltinID < RISCV::LastTSBuiltin;
+ return BuiltinID >= RISCV::FirstRVVBuiltin &&
+ BuiltinID <= RISCV::LastRVVBuiltin;
}
static void handleBuiltinAliasAttr(Sema &S, Decl *D,
@@ -6063,7 +6124,7 @@ validateSwiftFunctionName(Sema &S, const ParsedAttr &AL, SourceLocation Loc,
if (BaseName.empty()) {
BaseName = ContextName;
ContextName = StringRef();
- } else if (ContextName.empty() || !isValidIdentifier(ContextName)) {
+ } else if (ContextName.empty() || !isValidAsciiIdentifier(ContextName)) {
S.Diag(Loc, diag::warn_attr_swift_name_invalid_identifier)
<< AL << /*context*/ 1;
return false;
@@ -6071,7 +6132,7 @@ validateSwiftFunctionName(Sema &S, const ParsedAttr &AL, SourceLocation Loc,
IsMember = true;
}
- if (!isValidIdentifier(BaseName) || BaseName == "_") {
+ if (!isValidAsciiIdentifier(BaseName) || BaseName == "_") {
S.Diag(Loc, diag::warn_attr_swift_name_invalid_identifier)
<< AL << /*basename*/ 0;
return false;
@@ -6121,7 +6182,7 @@ validateSwiftFunctionName(Sema &S, const ParsedAttr &AL, SourceLocation Loc,
do {
std::tie(CurrentParam, Parameters) = Parameters.split(':');
- if (!isValidIdentifier(CurrentParam)) {
+ if (!isValidAsciiIdentifier(CurrentParam)) {
S.Diag(Loc, diag::warn_attr_swift_name_invalid_identifier)
<< AL << /*parameter*/2;
return false;
@@ -6262,13 +6323,12 @@ bool Sema::DiagnoseSwiftName(Decl *D, StringRef Name, SourceLocation Loc,
// might be because we've transformed some of them. Check for potential
// "out" parameters and err on the side of not warning.
unsigned MaybeOutParamCount =
- std::count_if(Params.begin(), Params.end(),
- [](const ParmVarDecl *Param) -> bool {
- QualType ParamTy = Param->getType();
- if (ParamTy->isReferenceType() || ParamTy->isPointerType())
- return !ParamTy->getPointeeType().isConstQualified();
- return false;
- });
+ llvm::count_if(Params, [](const ParmVarDecl *Param) -> bool {
+ QualType ParamTy = Param->getType();
+ if (ParamTy->isReferenceType() || ParamTy->isPointerType())
+ return !ParamTy->getPointeeType().isConstQualified();
+ return false;
+ });
ParamCountValid = SwiftParamCount + MaybeOutParamCount >= ParamCount;
}
@@ -6290,13 +6350,13 @@ bool Sema::DiagnoseSwiftName(Decl *D, StringRef Name, SourceLocation Loc,
if (BaseName.empty()) {
BaseName = ContextName;
ContextName = StringRef();
- } else if (!isValidIdentifier(ContextName)) {
+ } else if (!isValidAsciiIdentifier(ContextName)) {
Diag(Loc, diag::warn_attr_swift_name_invalid_identifier) << AL
<< /*context*/1;
return false;
}
- if (!isValidIdentifier(BaseName)) {
+ if (!isValidAsciiIdentifier(BaseName)) {
Diag(Loc, diag::warn_attr_swift_name_invalid_identifier) << AL
<< /*basename*/0;
return false;
@@ -6831,6 +6891,30 @@ static void handleBPFPreserveAccessIndexAttr(Sema &S, Decl *D,
Rec->addAttr(::new (S.Context) BPFPreserveAccessIndexAttr(S.Context, AL));
}
+static bool hasBTFDeclTagAttr(Decl *D, StringRef Tag) {
+ for (const auto *I : D->specific_attrs<BTFDeclTagAttr>()) {
+ if (I->getBTFDeclTag() == Tag)
+ return true;
+ }
+ return false;
+}
+
+static void handleBTFDeclTagAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ StringRef Str;
+ if (!S.checkStringLiteralArgumentAttr(AL, 0, Str))
+ return;
+ if (hasBTFDeclTagAttr(D, Str))
+ return;
+
+ D->addAttr(::new (S.Context) BTFDeclTagAttr(S.Context, AL, Str));
+}
+
+BTFDeclTagAttr *Sema::mergeBTFDeclTagAttr(Decl *D, const BTFDeclTagAttr &AL) {
+ if (hasBTFDeclTagAttr(D, AL.getBTFDeclTag()))
+ return nullptr;
+ return ::new (Context) BTFDeclTagAttr(Context, AL, AL.getBTFDeclTag());
+}
+
static void handleWebAssemblyExportNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
if (!isFunctionOrMethod(D)) {
S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
@@ -7434,7 +7518,7 @@ static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D,
// index rather than incorrectly assume the index for NoSanitizeSpecificAttr
// has the same spellings as the index for NoSanitizeAttr. We don't have a
// general way to "translate" between the two, so this hack attempts to work
- // around the issue with hard-coded indicies. This is critical for calling
+ // around the issue with hard-coded indices. This is critical for calling
// getSpelling() or prettyPrint() on the resulting semantic attribute object
// without failing assertions.
unsigned TranslatedSpellingIndex = 0;
@@ -7453,12 +7537,12 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
- if (S.LangOpts.OpenCLVersion != 200)
+ if (S.LangOpts.getOpenCLCompatibleVersion() < 200)
S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version)
- << AL << "2.0" << 0;
+ << AL << "2.0" << 1;
else
- S.Diag(AL.getLoc(), diag::warn_opencl_attr_deprecated_ignored) << AL
- << "2.0";
+ S.Diag(AL.getLoc(), diag::warn_opencl_attr_deprecated_ignored)
+ << AL << S.LangOpts.getOpenCLVersionString();
}
static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
@@ -7486,18 +7570,17 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// OpenCL v3.0 s6.8 - For OpenCL C 2.0, or with the
// __opencl_c_read_write_images feature, image objects specified as arguments
// to a kernel can additionally be declared to be read-write.
- // C++ for OpenCL inherits rule from OpenCL C v2.0.
+ // C++ for OpenCL 1.0 inherits rule from OpenCL C v2.0.
+ // C++ for OpenCL 2021 inherits rule from OpenCL C v3.0.
if (const auto *PDecl = dyn_cast<ParmVarDecl>(D)) {
const Type *DeclTy = PDecl->getType().getCanonicalType().getTypePtr();
- if (AL.getAttrName()->getName().find("read_write") != StringRef::npos) {
- bool ReadWriteImagesUnsupportedForOCLC =
- (S.getLangOpts().OpenCLVersion < 200) ||
- (S.getLangOpts().OpenCLVersion == 300 &&
+ if (AL.getAttrName()->getName().contains("read_write")) {
+ bool ReadWriteImagesUnsupported =
+ (S.getLangOpts().getOpenCLCompatibleVersion() < 200) ||
+ (S.getLangOpts().getOpenCLCompatibleVersion() == 300 &&
!S.getOpenCLOptions().isSupported("__opencl_c_read_write_images",
S.getLangOpts()));
- if ((!S.getLangOpts().OpenCLCPlusPlus &&
- ReadWriteImagesUnsupportedForOCLC) ||
- DeclTy->isPipeType()) {
+ if (ReadWriteImagesUnsupported || DeclTy->isPipeType()) {
S.Diag(AL.getLoc(), diag::err_opencl_invalid_read_write)
<< AL << PDecl->getType() << DeclTy->isImageType();
D->setInvalidDecl(true);
@@ -7868,6 +7951,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_BPFPreserveAccessIndex:
handleBPFPreserveAccessIndexAttr(S, D, AL);
break;
+ case ParsedAttr::AT_BTFDeclTag:
+ handleBTFDeclTagAttr(S, D, AL);
+ break;
case ParsedAttr::AT_WebAssemblyExportName:
handleWebAssemblyExportNameAttr(S, D, AL);
break;
@@ -7941,6 +8027,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_EnableIf:
handleEnableIfAttr(S, D, AL);
break;
+ case ParsedAttr::AT_Error:
+ handleErrorAttr(S, D, AL);
+ break;
case ParsedAttr::AT_DiagnoseIf:
handleDiagnoseIfAttr(S, D, AL);
break;
@@ -8592,8 +8681,9 @@ NamedDecl * Sema::DeclClonePragmaWeak(NamedDecl *ND, IdentifierInfo *II,
NewFD = FunctionDecl::Create(
FD->getASTContext(), FD->getDeclContext(), Loc, Loc,
DeclarationName(II), FD->getType(), FD->getTypeSourceInfo(), SC_None,
- false /*isInlineSpecified*/, FD->hasPrototype(),
- ConstexprSpecKind::Unspecified, FD->getTrailingRequiresClause());
+ getCurFPFeatures().isFPConstrained(), false /*isInlineSpecified*/,
+ FD->hasPrototype(), ConstexprSpecKind::Unspecified,
+ FD->getTrailingRequiresClause());
NewD = NewFD;
if (FD->getQualifier())
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 83c97626ff7e..20410a959cd0 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -26,6 +26,7 @@
#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/AttributeCommonInfo.h"
#include "clang/Basic/PartialDiagnostic.h"
+#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/LiteralSupport.h"
#include "clang/Lex/Preprocessor.h"
@@ -435,7 +436,7 @@ void Sema::CheckExtraCXXDefaultArguments(Declarator &D) {
}
static bool functionDeclHasDefaultArgument(const FunctionDecl *FD) {
- return std::any_of(FD->param_begin(), FD->param_end(), [](ParmVarDecl *P) {
+ return llvm::any_of(FD->parameters(), [](ParmVarDecl *P) {
return P->hasDefaultArg() && !P->hasInheritedDefaultArg();
});
}
@@ -983,9 +984,9 @@ static std::string printTemplateArgs(const PrintingPolicy &PrintingPolicy,
for (auto &Arg : Args.arguments()) {
if (!First)
OS << ", ";
- Arg.getArgument().print(
- PrintingPolicy, OS,
- TemplateParameterList::shouldIncludeTypeForArgument(Params, I));
+ Arg.getArgument().print(PrintingPolicy, OS,
+ TemplateParameterList::shouldIncludeTypeForArgument(
+ PrintingPolicy, Params, I));
First = false;
I++;
}
@@ -1383,9 +1384,8 @@ static bool checkMemberDecomposition(Sema &S, ArrayRef<BindingDecl*> Bindings,
DecompType.getQualifiers());
auto DiagnoseBadNumberOfBindings = [&]() -> bool {
- unsigned NumFields =
- std::count_if(RD->field_begin(), RD->field_end(),
- [](FieldDecl *FD) { return !FD->isUnnamedBitfield(); });
+ unsigned NumFields = llvm::count_if(
+ RD->fields(), [](FieldDecl *FD) { return !FD->isUnnamedBitfield(); });
assert(Bindings.size() != NumFields);
S.Diag(Src->getLocation(), diag::err_decomp_decl_wrong_number_bindings)
<< DecompType << (unsigned)Bindings.size() << NumFields << NumFields
@@ -2050,6 +2050,13 @@ CheckConstexprFunctionStmt(Sema &SemaRef, const FunctionDecl *Dcl, Stmt *S,
ReturnStmts.push_back(S->getBeginLoc());
return true;
+ case Stmt::AttributedStmtClass:
+ // Attributes on a statement don't affect its formal kind and hence don't
+ // affect its validity in a constexpr function.
+ return CheckConstexprFunctionStmt(SemaRef, Dcl,
+ cast<AttributedStmt>(S)->getSubStmt(),
+ ReturnStmts, Cxx1yLoc, Cxx2aLoc, Kind);
+
case Stmt::CompoundStmtClass: {
// C++1y allows compound-statements.
if (!Cxx1yLoc.isValid())
@@ -2064,11 +2071,6 @@ CheckConstexprFunctionStmt(Sema &SemaRef, const FunctionDecl *Dcl, Stmt *S,
return true;
}
- case Stmt::AttributedStmtClass:
- if (!Cxx1yLoc.isValid())
- Cxx1yLoc = S->getBeginLoc();
- return true;
-
case Stmt::IfStmtClass: {
// C++1y allows if-statements.
if (!Cxx1yLoc.isValid())
@@ -2727,6 +2729,8 @@ bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class,
KnownBase = Bases[idx];
Bases[NumGoodBases++] = Bases[idx];
+ if (NewBaseType->isDependentType())
+ continue;
// Note this base's direct & indirect bases, if there could be ambiguity.
if (Bases.size() > 1)
NoteIndirectBases(Context, IndirectBaseTypes, NewBaseType);
@@ -3581,9 +3585,8 @@ namespace {
llvm::SmallVector<unsigned, 4> UsedFieldIndex;
// Discard the first field since it is the field decl that is being
// initialized.
- for (auto I = Fields.rbegin() + 1, E = Fields.rend(); I != E; ++I) {
- UsedFieldIndex.push_back((*I)->getFieldIndex());
- }
+ for (const FieldDecl *FD : llvm::drop_begin(llvm::reverse(Fields)))
+ UsedFieldIndex.push_back(FD->getFieldIndex());
for (auto UsedIter = UsedFieldIndex.begin(),
UsedEnd = UsedFieldIndex.end(),
@@ -4114,7 +4117,7 @@ Sema::ActOnMemInitializer(Decl *ConstructorD,
namespace {
// Callback to only accept typo corrections that can be a valid C++ member
-// intializer: either a non-static field member or a base class.
+// initializer: either a non-static field member or a base class.
class MemInitializerValidatorCCC final : public CorrectionCandidateCallback {
public:
explicit MemInitializerValidatorCCC(CXXRecordDecl *ClassDecl)
@@ -4162,7 +4165,8 @@ Sema::BuildMemInitializer(Decl *ConstructorD,
SourceLocation IdLoc,
Expr *Init,
SourceLocation EllipsisLoc) {
- ExprResult Res = CorrectDelayedTyposInExpr(Init);
+ ExprResult Res = CorrectDelayedTyposInExpr(Init, /*InitDecl=*/nullptr,
+ /*RecoverUncorrectedTypos=*/true);
if (!Res.isUsable())
return true;
Init = Res.get();
@@ -4214,7 +4218,7 @@ Sema::BuildMemInitializer(Decl *ConstructorD,
if (BaseType.isNull())
return true;
} else if (DS.getTypeSpecType() == TST_decltype) {
- BaseType = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
+ BaseType = BuildDecltypeType(DS.getRepAsExpr());
} else if (DS.getTypeSpecType() == TST_decltype_auto) {
Diag(DS.getTypeSpecTypeLoc(), diag::err_decltype_auto_invalid);
return true;
@@ -4375,18 +4379,25 @@ Sema::BuildMemberInitializer(ValueDecl *Member, Expr *Init,
InitializationSequence InitSeq(*this, MemberEntity, Kind, Args);
ExprResult MemberInit = InitSeq.Perform(*this, MemberEntity, Kind, Args,
nullptr);
- if (MemberInit.isInvalid())
- return true;
-
- // C++11 [class.base.init]p7:
- // The initialization of each base and member constitutes a
- // full-expression.
- MemberInit = ActOnFinishFullExpr(MemberInit.get(), InitRange.getBegin(),
- /*DiscardedValue*/ false);
- if (MemberInit.isInvalid())
- return true;
-
- Init = MemberInit.get();
+ if (!MemberInit.isInvalid()) {
+ // C++11 [class.base.init]p7:
+ // The initialization of each base and member constitutes a
+ // full-expression.
+ MemberInit = ActOnFinishFullExpr(MemberInit.get(), InitRange.getBegin(),
+ /*DiscardedValue*/ false);
+ }
+
+ if (MemberInit.isInvalid()) {
+ // Args were sensible expressions but we couldn't initialize the member
+ // from them. Preserve them in a RecoveryExpr instead.
+ Init = CreateRecoveryExpr(InitRange.getBegin(), InitRange.getEnd(), Args,
+ Member->getType())
+ .get();
+ if (!Init)
+ return true;
+ } else {
+ Init = MemberInit.get();
+ }
}
if (DirectMember) {
@@ -4428,29 +4439,35 @@ Sema::BuildDelegatingInitializer(TypeSourceInfo *TInfo, Expr *Init,
InitializationSequence InitSeq(*this, DelegationEntity, Kind, Args);
ExprResult DelegationInit = InitSeq.Perform(*this, DelegationEntity, Kind,
Args, nullptr);
- if (DelegationInit.isInvalid())
- return true;
-
- assert(cast<CXXConstructExpr>(DelegationInit.get())->getConstructor() &&
- "Delegating constructor with no target?");
+ if (!DelegationInit.isInvalid()) {
+ assert((DelegationInit.get()->containsErrors() ||
+ cast<CXXConstructExpr>(DelegationInit.get())->getConstructor()) &&
+ "Delegating constructor with no target?");
- // C++11 [class.base.init]p7:
- // The initialization of each base and member constitutes a
- // full-expression.
- DelegationInit = ActOnFinishFullExpr(
- DelegationInit.get(), InitRange.getBegin(), /*DiscardedValue*/ false);
- if (DelegationInit.isInvalid())
- return true;
+ // C++11 [class.base.init]p7:
+ // The initialization of each base and member constitutes a
+ // full-expression.
+ DelegationInit = ActOnFinishFullExpr(
+ DelegationInit.get(), InitRange.getBegin(), /*DiscardedValue*/ false);
+ }
- // If we are in a dependent context, template instantiation will
- // perform this type-checking again. Just save the arguments that we
- // received in a ParenListExpr.
- // FIXME: This isn't quite ideal, since our ASTs don't capture all
- // of the information that we have about the base
- // initializer. However, deconstructing the ASTs is a dicey process,
- // and this approach is far more likely to get the corner cases right.
- if (CurContext->isDependentContext())
- DelegationInit = Init;
+ if (DelegationInit.isInvalid()) {
+ DelegationInit =
+ CreateRecoveryExpr(InitRange.getBegin(), InitRange.getEnd(), Args,
+ QualType(ClassDecl->getTypeForDecl(), 0));
+ if (DelegationInit.isInvalid())
+ return true;
+ } else {
+ // If we are in a dependent context, template instantiation will
+ // perform this type-checking again. Just save the arguments that we
+ // received in a ParenListExpr.
+ // FIXME: This isn't quite ideal, since our ASTs don't capture all
+ // of the information that we have about the base
+ // initializer. However, deconstructing the ASTs is a dicey process,
+ // and this approach is far more likely to get the corner cases right.
+ if (CurContext->isDependentContext())
+ DelegationInit = Init;
+ }
return new (Context) CXXCtorInitializer(Context, TInfo, InitRange.getBegin(),
DelegationInit.getAs<Expr>(),
@@ -4474,7 +4491,12 @@ Sema::BuildBaseInitializer(QualType BaseType, TypeSourceInfo *BaseTInfo,
// of that class, the mem-initializer is ill-formed. A
// mem-initializer-list can initialize a base class using any
// name that denotes that base class type.
- bool Dependent = BaseType->isDependentType() || Init->isTypeDependent();
+
+ // We can store the initializers in "as-written" form and delay analysis until
+ // instantiation if the constructor is dependent. But not for dependent
+ // (broken) code in a non-template! SetCtorInitializers does not expect this.
+ bool Dependent = CurContext->isDependentContext() &&
+ (BaseType->isDependentType() || Init->isTypeDependent());
SourceRange InitRange = Init->getSourceRange();
if (EllipsisLoc.isValid()) {
@@ -4561,26 +4583,30 @@ Sema::BuildBaseInitializer(QualType BaseType, TypeSourceInfo *BaseTInfo,
InitRange.getEnd());
InitializationSequence InitSeq(*this, BaseEntity, Kind, Args);
ExprResult BaseInit = InitSeq.Perform(*this, BaseEntity, Kind, Args, nullptr);
- if (BaseInit.isInvalid())
- return true;
-
- // C++11 [class.base.init]p7:
- // The initialization of each base and member constitutes a
- // full-expression.
- BaseInit = ActOnFinishFullExpr(BaseInit.get(), InitRange.getBegin(),
- /*DiscardedValue*/ false);
- if (BaseInit.isInvalid())
- return true;
+ if (!BaseInit.isInvalid()) {
+ // C++11 [class.base.init]p7:
+ // The initialization of each base and member constitutes a
+ // full-expression.
+ BaseInit = ActOnFinishFullExpr(BaseInit.get(), InitRange.getBegin(),
+ /*DiscardedValue*/ false);
+ }
- // If we are in a dependent context, template instantiation will
- // perform this type-checking again. Just save the arguments that we
- // received in a ParenListExpr.
- // FIXME: This isn't quite ideal, since our ASTs don't capture all
- // of the information that we have about the base
- // initializer. However, deconstructing the ASTs is a dicey process,
- // and this approach is far more likely to get the corner cases right.
- if (CurContext->isDependentContext())
- BaseInit = Init;
+ if (BaseInit.isInvalid()) {
+ BaseInit = CreateRecoveryExpr(InitRange.getBegin(), InitRange.getEnd(),
+ Args, BaseType);
+ if (BaseInit.isInvalid())
+ return true;
+ } else {
+ // If we are in a dependent context, template instantiation will
+ // perform this type-checking again. Just save the arguments that we
+ // received in a ParenListExpr.
+ // FIXME: This isn't quite ideal, since our ASTs don't capture all
+ // of the information that we have about the base
+ // initializer. However, deconstructing the ASTs is a dicey process,
+ // and this approach is far more likely to get the corner cases right.
+ if (CurContext->isDependentContext())
+ BaseInit = Init;
+ }
return new (Context) CXXCtorInitializer(Context, BaseTInfo,
BaseSpec->isVirtual(),
@@ -5859,6 +5885,7 @@ struct CheckAbstractUsage {
if (CT != Info.AbstractType) return;
// It matched; do some magic.
+ // FIXME: These should be at most warnings. See P0929R2, CWG1640, CWG1646.
if (Sel == Sema::AbstractArrayType) {
Info.S.Diag(Ctx->getLocation(), diag::err_array_of_abstract_type)
<< T << TL.getSourceRange();
@@ -5877,19 +5904,31 @@ void AbstractUsageInfo::CheckType(const NamedDecl *D, TypeLoc TL,
}
-/// Check for invalid uses of an abstract type in a method declaration.
+/// Check for invalid uses of an abstract type in a function declaration.
static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
- CXXMethodDecl *MD) {
+ FunctionDecl *FD) {
// No need to do the check on definitions, which require that
// the return/param types be complete.
- if (MD->doesThisDeclarationHaveABody())
+ if (FD->doesThisDeclarationHaveABody())
return;
// For safety's sake, just ignore it if we don't have type source
// information. This should never happen for non-implicit methods,
// but...
- if (TypeSourceInfo *TSI = MD->getTypeSourceInfo())
- Info.CheckType(MD, TSI->getTypeLoc(), Sema::AbstractNone);
+ if (TypeSourceInfo *TSI = FD->getTypeSourceInfo())
+ Info.CheckType(FD, TSI->getTypeLoc(), Sema::AbstractNone);
+}
+
+/// Check for invalid uses of an abstract type in a variable0 declaration.
+static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
+ VarDecl *VD) {
+ // No need to do the check on definitions, which require that
+ // the type is complete.
+ if (VD->isThisDeclarationADefinition())
+ return;
+
+ Info.CheckType(VD, VD->getTypeSourceInfo()->getTypeLoc(),
+ Sema::AbstractVariableType);
}
/// Check for invalid uses of an abstract type within a class definition.
@@ -5898,29 +5937,32 @@ static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
for (auto *D : RD->decls()) {
if (D->isImplicit()) continue;
- // Methods and method templates.
- if (isa<CXXMethodDecl>(D)) {
- CheckAbstractClassUsage(Info, cast<CXXMethodDecl>(D));
- } else if (isa<FunctionTemplateDecl>(D)) {
- FunctionDecl *FD = cast<FunctionTemplateDecl>(D)->getTemplatedDecl();
- CheckAbstractClassUsage(Info, cast<CXXMethodDecl>(FD));
+ // Step through friends to the befriended declaration.
+ if (auto *FD = dyn_cast<FriendDecl>(D)) {
+ D = FD->getFriendDecl();
+ if (!D) continue;
+ }
+
+ // Functions and function templates.
+ if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+ CheckAbstractClassUsage(Info, FD);
+ } else if (auto *FTD = dyn_cast<FunctionTemplateDecl>(D)) {
+ CheckAbstractClassUsage(Info, FTD->getTemplatedDecl());
// Fields and static variables.
- } else if (isa<FieldDecl>(D)) {
- FieldDecl *FD = cast<FieldDecl>(D);
+ } else if (auto *FD = dyn_cast<FieldDecl>(D)) {
if (TypeSourceInfo *TSI = FD->getTypeSourceInfo())
Info.CheckType(FD, TSI->getTypeLoc(), Sema::AbstractFieldType);
- } else if (isa<VarDecl>(D)) {
- VarDecl *VD = cast<VarDecl>(D);
- if (TypeSourceInfo *TSI = VD->getTypeSourceInfo())
- Info.CheckType(VD, TSI->getTypeLoc(), Sema::AbstractVariableType);
+ } else if (auto *VD = dyn_cast<VarDecl>(D)) {
+ CheckAbstractClassUsage(Info, VD);
+ } else if (auto *VTD = dyn_cast<VarTemplateDecl>(D)) {
+ CheckAbstractClassUsage(Info, VTD->getTemplatedDecl());
// Nested classes and class templates.
- } else if (isa<CXXRecordDecl>(D)) {
- CheckAbstractClassUsage(Info, cast<CXXRecordDecl>(D));
- } else if (isa<ClassTemplateDecl>(D)) {
- CheckAbstractClassUsage(Info,
- cast<ClassTemplateDecl>(D)->getTemplatedDecl());
+ } else if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
+ CheckAbstractClassUsage(Info, RD);
+ } else if (auto *CTD = dyn_cast<ClassTemplateDecl>(D)) {
+ CheckAbstractClassUsage(Info, CTD->getTemplatedDecl());
}
}
}
@@ -5960,11 +6002,14 @@ static void ReferenceDllExportedMembers(Sema &S, CXXRecordDecl *Class) {
S.MarkVTableUsed(Class->getLocation(), Class, true);
for (Decl *Member : Class->decls()) {
+ // Skip members that were not marked exported.
+ if (!Member->hasAttr<DLLExportAttr>())
+ continue;
+
// Defined static variables that are members of an exported base
// class must be marked export too.
auto *VD = dyn_cast<VarDecl>(Member);
- if (VD && Member->getAttr<DLLExportAttr>() &&
- VD->getStorageClass() == SC_Static &&
+ if (VD && VD->getStorageClass() == SC_Static &&
TSK == TSK_ImplicitInstantiation)
S.MarkVariableReferenced(VD->getLocation(), VD);
@@ -5972,40 +6017,47 @@ static void ReferenceDllExportedMembers(Sema &S, CXXRecordDecl *Class) {
if (!MD)
continue;
- if (Member->getAttr<DLLExportAttr>()) {
- if (MD->isUserProvided()) {
- // Instantiate non-default class member functions ...
+ if (MD->isUserProvided()) {
+ // Instantiate non-default class member functions ...
- // .. except for certain kinds of template specializations.
- if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
- continue;
+ // .. except for certain kinds of template specializations.
+ if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
+ continue;
- S.MarkFunctionReferenced(Class->getLocation(), MD);
+ // If this is an MS ABI dllexport default constructor, instantiate any
+ // default arguments.
+ if (S.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+ auto *CD = dyn_cast<CXXConstructorDecl>(MD);
+ if (CD && CD->isDefaultConstructor() && TSK == TSK_Undeclared) {
+ S.InstantiateDefaultCtorDefaultArgs(CD);
+ }
+ }
- // The function will be passed to the consumer when its definition is
- // encountered.
- } else if (MD->isExplicitlyDefaulted()) {
- // Synthesize and instantiate explicitly defaulted methods.
- S.MarkFunctionReferenced(Class->getLocation(), MD);
+ S.MarkFunctionReferenced(Class->getLocation(), MD);
- if (TSK != TSK_ExplicitInstantiationDefinition) {
- // Except for explicit instantiation defs, we will not see the
- // definition again later, so pass it to the consumer now.
- S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
- }
- } else if (!MD->isTrivial() ||
- MD->isCopyAssignmentOperator() ||
- MD->isMoveAssignmentOperator()) {
- // Synthesize and instantiate non-trivial implicit methods, and the copy
- // and move assignment operators. The latter are exported even if they
- // are trivial, because the address of an operator can be taken and
- // should compare equal across libraries.
- S.MarkFunctionReferenced(Class->getLocation(), MD);
-
- // There is no later point when we will see the definition of this
- // function, so pass it to the consumer now.
+ // The function will be passed to the consumer when its definition is
+ // encountered.
+ } else if (MD->isExplicitlyDefaulted()) {
+ // Synthesize and instantiate explicitly defaulted methods.
+ S.MarkFunctionReferenced(Class->getLocation(), MD);
+
+ if (TSK != TSK_ExplicitInstantiationDefinition) {
+ // Except for explicit instantiation defs, we will not see the
+ // definition again later, so pass it to the consumer now.
S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
}
+ } else if (!MD->isTrivial() ||
+ MD->isCopyAssignmentOperator() ||
+ MD->isMoveAssignmentOperator()) {
+ // Synthesize and instantiate non-trivial implicit methods, and the copy
+ // and move assignment operators. The latter are exported even if they
+ // are trivial, because the address of an operator can be taken and
+ // should compare equal across libraries.
+ S.MarkFunctionReferenced(Class->getLocation(), MD);
+
+ // There is no later point when we will see the definition of this
+ // function, so pass it to the consumer now.
+ S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
}
}
}
@@ -7771,9 +7823,21 @@ private:
DCK == DefaultedComparisonKind::Relational) &&
!Best->RewriteKind) {
if (Diagnose == ExplainDeleted) {
- S.Diag(Best->Function->getLocation(),
- diag::note_defaulted_comparison_not_rewritten_callee)
- << FD;
+ if (Best->Function) {
+ S.Diag(Best->Function->getLocation(),
+ diag::note_defaulted_comparison_not_rewritten_callee)
+ << FD;
+ } else {
+ assert(Best->Conversions.size() == 2 &&
+ Best->Conversions[0].isUserDefined() &&
+ "non-user-defined conversion from class to built-in "
+ "comparison");
+ S.Diag(Best->Conversions[0]
+ .UserDefined.FoundConversionFunction.getDecl()
+ ->getLocation(),
+ diag::note_defaulted_comparison_not_rewritten_conversion)
+ << FD;
+ }
}
return Result::deleted();
}
@@ -7929,7 +7993,7 @@ private:
if (Diagnose == ExplainDeleted) {
S.Diag(Subobj.Loc, diag::note_defaulted_comparison_no_viable_function)
- << FD << Subobj.Kind << Subobj.Decl;
+ << FD << (OO == OO_ExclaimEqual) << Subobj.Kind << Subobj.Decl;
// For a three-way comparison, list both the candidates for the
// original operator and the candidates for the synthesized operator.
@@ -8157,7 +8221,7 @@ private:
if (ReturnFalse.isInvalid())
return StmtError();
- return S.ActOnIfStmt(Loc, false, Loc, nullptr,
+ return S.ActOnIfStmt(Loc, IfStatementKind::Ordinary, Loc, nullptr,
S.ActOnCondition(nullptr, Loc, NotCond.get(),
Sema::ConditionKind::Boolean),
Loc, ReturnFalse.get(), SourceLocation(), nullptr);
@@ -8312,8 +8376,8 @@ private:
return StmtError();
// if (...)
- return S.ActOnIfStmt(Loc, /*IsConstexpr=*/false, Loc, InitStmt, Cond, Loc,
- ReturnStmt.get(),
+ return S.ActOnIfStmt(Loc, IfStatementKind::Ordinary, Loc, InitStmt, Cond,
+ Loc, ReturnStmt.get(),
/*ElseLoc=*/SourceLocation(), /*Else=*/nullptr);
}
@@ -9778,7 +9842,7 @@ public:
};
} // end anonymous namespace
-/// Add the most overriden methods from MD to Methods
+/// Add the most overridden methods from MD to Methods
static void AddMostOverridenMethods(const CXXMethodDecl *MD,
llvm::SmallPtrSetImpl<const CXXMethodDecl *>& Methods) {
if (MD->size_overridden_methods() == 0)
@@ -12472,6 +12536,8 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
return false;
}
+ const NestedNameSpecifier *CNNS =
+ Context.getCanonicalNestedNameSpecifier(Qual);
for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) {
NamedDecl *D = *I;
@@ -12497,8 +12563,7 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
// using decls differ if they name different scopes (but note that
// template instantiation can cause this check to trigger when it
// didn't before instantiation).
- if (Context.getCanonicalNestedNameSpecifier(Qual) !=
- Context.getCanonicalNestedNameSpecifier(DQual))
+ if (CNNS != Context.getCanonicalNestedNameSpecifier(DQual))
continue;
Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange();
@@ -13234,6 +13299,7 @@ CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor(
CXXConstructorDecl *DefaultCon = CXXConstructorDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, /*Type*/ QualType(),
/*TInfo=*/nullptr, ExplicitSpecifier(),
+ getCurFPFeatures().isFPConstrained(),
/*isInline=*/true, /*isImplicitlyDeclared=*/true,
Constexpr ? ConstexprSpecKind::Constexpr
: ConstexprSpecKind::Unspecified);
@@ -13355,7 +13421,8 @@ Sema::findInheritingConstructor(SourceLocation Loc,
CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create(
Context, Derived, UsingLoc, NameInfo, TInfo->getType(), TInfo,
- BaseCtor->getExplicitSpecifier(), /*isInline=*/true,
+ BaseCtor->getExplicitSpecifier(), getCurFPFeatures().isFPConstrained(),
+ /*isInline=*/true,
/*isImplicitlyDeclared=*/true,
Constexpr ? BaseCtor->getConstexprKind() : ConstexprSpecKind::Unspecified,
InheritedConstructor(Shadow, BaseCtor),
@@ -13510,12 +13577,13 @@ CXXDestructorDecl *Sema::DeclareImplicitDestructor(CXXRecordDecl *ClassDecl) {
DeclarationName Name
= Context.DeclarationNames.getCXXDestructorName(ClassType);
DeclarationNameInfo NameInfo(Name, ClassLoc);
- CXXDestructorDecl *Destructor =
- CXXDestructorDecl::Create(Context, ClassDecl, ClassLoc, NameInfo,
- QualType(), nullptr, /*isInline=*/true,
- /*isImplicitlyDeclared=*/true,
- Constexpr ? ConstexprSpecKind::Constexpr
- : ConstexprSpecKind::Unspecified);
+ CXXDestructorDecl *Destructor = CXXDestructorDecl::Create(
+ Context, ClassDecl, ClassLoc, NameInfo, QualType(), nullptr,
+ getCurFPFeatures().isFPConstrained(),
+ /*isInline=*/true,
+ /*isImplicitlyDeclared=*/true,
+ Constexpr ? ConstexprSpecKind::Constexpr
+ : ConstexprSpecKind::Unspecified);
Destructor->setAccess(AS_public);
Destructor->setDefaulted();
@@ -14151,6 +14219,7 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) {
CXXMethodDecl *CopyAssignment = CXXMethodDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, QualType(),
/*TInfo=*/nullptr, /*StorageClass=*/SC_None,
+ getCurFPFeatures().isFPConstrained(),
/*isInline=*/true,
Constexpr ? ConstexprSpecKind::Constexpr : ConstexprSpecKind::Unspecified,
SourceLocation());
@@ -14485,6 +14554,7 @@ CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) {
CXXMethodDecl *MoveAssignment = CXXMethodDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, QualType(),
/*TInfo=*/nullptr, /*StorageClass=*/SC_None,
+ getCurFPFeatures().isFPConstrained(),
/*isInline=*/true,
Constexpr ? ConstexprSpecKind::Constexpr : ConstexprSpecKind::Unspecified,
SourceLocation());
@@ -14864,7 +14934,7 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor(
// member of its class.
CXXConstructorDecl *CopyConstructor = CXXConstructorDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/nullptr,
- ExplicitSpecifier(),
+ ExplicitSpecifier(), getCurFPFeatures().isFPConstrained(),
/*isInline=*/true,
/*isImplicitlyDeclared=*/true,
Constexpr ? ConstexprSpecKind::Constexpr
@@ -15004,7 +15074,7 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor(
// member of its class.
CXXConstructorDecl *MoveConstructor = CXXConstructorDecl::Create(
Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/nullptr,
- ExplicitSpecifier(),
+ ExplicitSpecifier(), getCurFPFeatures().isFPConstrained(),
/*isInline=*/true,
/*isImplicitlyDeclared=*/true,
Constexpr ? ConstexprSpecKind::Constexpr
@@ -15261,8 +15331,17 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType,
// can be omitted by constructing the temporary object
// directly into the target of the omitted copy/move
if (ConstructKind == CXXConstructExpr::CK_Complete && Constructor &&
+ // FIXME: Converting constructors should also be accepted.
+ // But to fix this, the logic that digs down into a CXXConstructExpr
+ // to find the source object needs to handle it.
+ // Right now it assumes the source object is passed directly as the
+ // first argument.
Constructor->isCopyOrMoveConstructor() && hasOneRealArgument(ExprArgs)) {
Expr *SubExpr = ExprArgs[0];
+ // FIXME: Per above, this is also incorrect if we want to accept
+ // converting constructors, as isTemporaryObject will
+ // reject temporaries with different type from the
+ // CXXRecord itself.
Elidable = SubExpr->isTemporaryObject(
Context, cast<CXXRecordDecl>(FoundDecl->getDeclContext()));
}
@@ -15400,7 +15479,7 @@ ExprResult Sema::BuildCXXDefaultInitExpr(SourceLocation Loc, FieldDecl *Field) {
void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) {
if (VD->isInvalidDecl()) return;
// If initializing the variable failed, don't also diagnose problems with
- // the desctructor, they're likely related.
+ // the destructor, they're likely related.
if (VD->getInit() && VD->getInit()->containsErrors())
return;
@@ -15830,7 +15909,7 @@ checkLiteralOperatorTemplateParameterList(Sema &SemaRef,
//
// As a DR resolution, we also allow placeholders for deduced class
// template specializations.
- if (SemaRef.getLangOpts().CPlusPlus20 &&
+ if (SemaRef.getLangOpts().CPlusPlus20 && PmDecl &&
!PmDecl->isTemplateParameterPack() &&
(PmDecl->getType()->isRecordType() ||
PmDecl->getType()->getAs<DeducedTemplateSpecializationType>()))
@@ -16806,10 +16885,7 @@ NamedDecl *Sema::ActOnFriendFunctionDecl(Scope *S, Declarator &D,
while (DC->isRecord())
DC = DC->getParent();
- DeclContext *LookupDC = DC;
- while (LookupDC->isTransparentContext())
- LookupDC = LookupDC->getParent();
-
+ DeclContext *LookupDC = DC->getNonTransparentContext();
while (true) {
LookupQualifiedName(Previous, LookupDC);
@@ -17572,16 +17648,12 @@ bool Sema::DefineUsedVTables() {
// no key function or the key function is inlined. Don't warn in C++ ABIs
// that lack key functions, since the user won't be able to make one.
if (Context.getTargetInfo().getCXXABI().hasKeyFunctions() &&
- Class->isExternallyVisible() && ClassTSK != TSK_ImplicitInstantiation) {
+ Class->isExternallyVisible() && ClassTSK != TSK_ImplicitInstantiation &&
+ ClassTSK != TSK_ExplicitInstantiationDefinition) {
const FunctionDecl *KeyFunctionDef = nullptr;
if (!KeyFunction || (KeyFunction->hasBody(KeyFunctionDef) &&
- KeyFunctionDef->isInlined())) {
- Diag(Class->getLocation(),
- ClassTSK == TSK_ExplicitInstantiationDefinition
- ? diag::warn_weak_template_vtable
- : diag::warn_weak_vtable)
- << Class;
- }
+ KeyFunctionDef->isInlined()))
+ Diag(Class->getLocation(), diag::warn_weak_vtable) << Class;
}
}
VTableUses.clear();
diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp
index e0f8c6e92d5a..d6e659e17069 100644
--- a/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/clang/lib/Sema/SemaDeclObjC.cpp
@@ -611,7 +611,7 @@ ActOnSuperClassOfClassInterface(Scope *S,
}
}
- if (!dyn_cast_or_null<TypedefNameDecl>(PrevDecl)) {
+ if (!isa_and_nonnull<TypedefNameDecl>(PrevDecl)) {
if (!SuperClassDecl)
Diag(SuperLoc, diag::err_undef_superclass)
<< SuperName << ClassName << SourceRange(AtInterfaceLoc, ClassLoc);
@@ -2614,7 +2614,7 @@ void Sema::WarnExactTypedMethods(ObjCMethodDecl *ImpMethodDecl,
if (MethodDecl->getImplementationControl() == ObjCMethodDecl::Optional)
return;
// don't issue warning when primary class's method is
- // depecated/unavailable.
+ // deprecated/unavailable.
if (MethodDecl->hasAttr<UnavailableAttr>() ||
MethodDecl->hasAttr<DeprecatedAttr>())
return;
@@ -2711,8 +2711,7 @@ static void CheckProtocolMethodDefs(Sema &S,
ProtocolsExplictImpl.reset(new ProtocolNameSet);
findProtocolsWithExplicitImpls(Super, *ProtocolsExplictImpl);
}
- if (ProtocolsExplictImpl->find(PDecl->getIdentifier()) !=
- ProtocolsExplictImpl->end())
+ if (ProtocolsExplictImpl->contains(PDecl->getIdentifier()))
return;
// If no super class conforms to the protocol, we should not search
@@ -3427,8 +3426,10 @@ void Sema::AddMethodToGlobalPool(ObjCMethodDecl *Method, bool impl,
GlobalMethodPool::iterator Pos = MethodPool.find(Method->getSelector());
if (Pos == MethodPool.end())
- Pos = MethodPool.insert(std::make_pair(Method->getSelector(),
- GlobalMethods())).first;
+ Pos = MethodPool
+ .insert(std::make_pair(Method->getSelector(),
+ GlobalMethodPool::Lists()))
+ .first;
Method->setDefined(impl);
@@ -3636,7 +3637,7 @@ ObjCMethodDecl *Sema::LookupImplementedMethodInGlobalPool(Selector Sel) {
if (Pos == MethodPool.end())
return nullptr;
- GlobalMethods &Methods = Pos->second;
+ GlobalMethodPool::Lists &Methods = Pos->second;
for (const ObjCMethodList *Method = &Methods.first; Method;
Method = Method->getNext())
if (Method->getMethod() &&
@@ -4832,7 +4833,7 @@ Decl *Sema::ActOnMethodDeclaration(
// If this method overrides a previous @synthesize declaration,
// register it with the property. Linear search through all
// properties here, because the autosynthesized stub hasn't been
- // made visible yet, so it can be overriden by a later
+ // made visible yet, so it can be overridden by a later
// user-specified implementation.
for (ObjCPropertyImplDecl *PropertyImpl : ImpDecl->property_impls()) {
if (auto *Setter = PropertyImpl->getSetterMethodDecl())
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 8816c9c1fea0..3af4c6f4bc41 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -78,14 +78,21 @@ bool Sema::isLibstdcxxEagerExceptionSpecHack(const Declarator &D) {
.Default(false);
}
-ExprResult Sema::ActOnNoexceptSpec(SourceLocation NoexceptLoc,
- Expr *NoexceptExpr,
+ExprResult Sema::ActOnNoexceptSpec(Expr *NoexceptExpr,
ExceptionSpecificationType &EST) {
- // FIXME: This is bogus, a noexcept expression is not a condition.
- ExprResult Converted = CheckBooleanCondition(NoexceptLoc, NoexceptExpr);
+
+ if (NoexceptExpr->isTypeDependent() ||
+ NoexceptExpr->containsUnexpandedParameterPack()) {
+ EST = EST_DependentNoexcept;
+ return NoexceptExpr;
+ }
+
+ llvm::APSInt Result;
+ ExprResult Converted = CheckConvertedConstantExpression(
+ NoexceptExpr, Context.BoolTy, Result, CCEK_Noexcept);
+
if (Converted.isInvalid()) {
EST = EST_NoexceptFalse;
-
// Fill in an expression of 'false' as a fixup.
auto *BoolExpr = new (Context)
CXXBoolLiteralExpr(false, Context.BoolTy, NoexceptExpr->getBeginLoc());
@@ -99,9 +106,6 @@ ExprResult Sema::ActOnNoexceptSpec(SourceLocation NoexceptLoc,
return Converted;
}
- llvm::APSInt Result;
- Converted = VerifyIntegerConstantExpression(
- Converted.get(), &Result, diag::err_noexcept_needs_constant_expression);
if (!Converted.isInvalid())
EST = !Result ? EST_NoexceptFalse : EST_NoexceptTrue;
return Converted;
@@ -1492,6 +1496,8 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Stmt::OMPInteropDirectiveClass:
case Stmt::OMPDispatchDirectiveClass:
case Stmt::OMPMaskedDirectiveClass:
+ case Stmt::OMPMetaDirectiveClass:
+ case Stmt::OMPGenericLoopDirectiveClass:
case Stmt::ReturnStmtClass:
case Stmt::SEHExceptStmtClass:
case Stmt::SEHFinallyStmtClass:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 0e6c933cd4f3..97f2062d4485 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -25,9 +25,11 @@
#include "clang/AST/ExprObjC.h"
#include "clang/AST/ExprOpenMP.h"
#include "clang/AST/OperationKinds.h"
+#include "clang/AST/ParentMapContext.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticSema.h"
#include "clang/Basic/PartialDiagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
@@ -366,10 +368,10 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc);
- if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) {
- if (auto *VD = dyn_cast<ValueDecl>(D))
- checkDeviceDecl(VD, Loc);
+ if (auto *VD = dyn_cast<ValueDecl>(D))
+ checkTypeSupport(VD->getType(), Loc, VD);
+ if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) {
if (!Context.getTargetInfo().isTLSSupported())
if (const auto *VD = dyn_cast<VarDecl>(D))
if (VD->getTLSKind() != VarDecl::TLS_None)
@@ -1197,45 +1199,32 @@ static QualType handleFloatConversion(Sema &S, ExprResult &LHS,
/*ConvertInt=*/!IsCompAssign);
}
-/// Diagnose attempts to convert between __float128 and long double if
-/// there is no support for such conversion. Helper function of
-/// UsualArithmeticConversions().
+/// Diagnose attempts to convert between __float128, __ibm128 and
+/// long double if there is no support for such conversion.
+/// Helper function of UsualArithmeticConversions().
static bool unsupportedTypeConversion(const Sema &S, QualType LHSType,
QualType RHSType) {
- /* No issue converting if at least one of the types is not a floating point
- type or the two types have the same rank.
- */
- if (!LHSType->isFloatingType() || !RHSType->isFloatingType() ||
- S.Context.getFloatingTypeOrder(LHSType, RHSType) == 0)
+ // No issue if either is not a floating point type.
+ if (!LHSType->isFloatingType() || !RHSType->isFloatingType())
return false;
- assert(LHSType->isFloatingType() && RHSType->isFloatingType() &&
- "The remaining types must be floating point types.");
-
+ // No issue if both have the same 128-bit float semantics.
auto *LHSComplex = LHSType->getAs<ComplexType>();
auto *RHSComplex = RHSType->getAs<ComplexType>();
- QualType LHSElemType = LHSComplex ?
- LHSComplex->getElementType() : LHSType;
- QualType RHSElemType = RHSComplex ?
- RHSComplex->getElementType() : RHSType;
+ QualType LHSElem = LHSComplex ? LHSComplex->getElementType() : LHSType;
+ QualType RHSElem = RHSComplex ? RHSComplex->getElementType() : RHSType;
- // No issue if the two types have the same representation
- if (&S.Context.getFloatTypeSemantics(LHSElemType) ==
- &S.Context.getFloatTypeSemantics(RHSElemType))
- return false;
+ const llvm::fltSemantics &LHSSem = S.Context.getFloatTypeSemantics(LHSElem);
+ const llvm::fltSemantics &RHSSem = S.Context.getFloatTypeSemantics(RHSElem);
- bool Float128AndLongDouble = (LHSElemType == S.Context.Float128Ty &&
- RHSElemType == S.Context.LongDoubleTy);
- Float128AndLongDouble |= (LHSElemType == S.Context.LongDoubleTy &&
- RHSElemType == S.Context.Float128Ty);
+ if ((&LHSSem != &llvm::APFloat::PPCDoubleDouble() ||
+ &RHSSem != &llvm::APFloat::IEEEquad()) &&
+ (&LHSSem != &llvm::APFloat::IEEEquad() ||
+ &RHSSem != &llvm::APFloat::PPCDoubleDouble()))
+ return false;
- // We've handled the situation where __float128 and long double have the same
- // representation. We allow all conversions for all possible long double types
- // except PPC's double double.
- return Float128AndLongDouble &&
- (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) ==
- &llvm::APFloat::PPCDoubleDouble());
+ return true;
}
typedef ExprResult PerformCastFn(Sema &S, Expr *operand, QualType toType);
@@ -1547,8 +1536,8 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
// At this point, we have two different arithmetic types.
- // Diagnose attempts to convert between __float128 and long double where
- // such conversions currently can't be handled.
+ // Diagnose attempts to convert between __ibm128, __float128 and long double
+ // where such conversions currently can't be handled.
if (unsupportedTypeConversion(*this, LHSType, RHSType))
return QualType();
@@ -3193,9 +3182,8 @@ ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS,
return ULE;
}
-static void
-diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
- ValueDecl *var, DeclContext *DC);
+static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
+ ValueDecl *var);
/// Complete semantic analysis for a reference to the given declaration.
ExprResult Sema::BuildDeclarationNameExpr(
@@ -3219,8 +3207,7 @@ ExprResult Sema::BuildDeclarationNameExpr(
// Make sure that we're referring to a value.
if (!isa<ValueDecl, UnresolvedUsingIfExistsDecl>(D)) {
- Diag(Loc, diag::err_ref_non_value)
- << D << SS.getRange();
+ Diag(Loc, diag::err_ref_non_value) << D << SS.getRange();
Diag(D->getLocation(), diag::note_declared_at);
return ExprError();
}
@@ -3246,210 +3233,204 @@ ExprResult Sema::BuildDeclarationNameExpr(
return BuildAnonymousStructUnionMemberReference(SS, NameInfo.getLoc(),
indirectField);
- {
- QualType type = VD->getType();
- if (type.isNull())
- return ExprError();
- ExprValueKind valueKind = VK_PRValue;
+ QualType type = VD->getType();
+ if (type.isNull())
+ return ExprError();
+ ExprValueKind valueKind = VK_PRValue;
- // In 'T ...V;', the type of the declaration 'V' is 'T...', but the type of
- // a reference to 'V' is simply (unexpanded) 'T'. The type, like the value,
- // is expanded by some outer '...' in the context of the use.
- type = type.getNonPackExpansionType();
+ // In 'T ...V;', the type of the declaration 'V' is 'T...', but the type of
+ // a reference to 'V' is simply (unexpanded) 'T'. The type, like the value,
+ // is expanded by some outer '...' in the context of the use.
+ type = type.getNonPackExpansionType();
- switch (D->getKind()) {
+ switch (D->getKind()) {
// Ignore all the non-ValueDecl kinds.
#define ABSTRACT_DECL(kind)
#define VALUE(type, base)
-#define DECL(type, base) \
- case Decl::type:
+#define DECL(type, base) case Decl::type:
#include "clang/AST/DeclNodes.inc"
- llvm_unreachable("invalid value decl kind");
-
- // These shouldn't make it here.
- case Decl::ObjCAtDefsField:
- llvm_unreachable("forming non-member reference to ivar?");
-
- // Enum constants are always r-values and never references.
- // Unresolved using declarations are dependent.
- case Decl::EnumConstant:
- case Decl::UnresolvedUsingValue:
- case Decl::OMPDeclareReduction:
- case Decl::OMPDeclareMapper:
- valueKind = VK_PRValue;
+ llvm_unreachable("invalid value decl kind");
+
+ // These shouldn't make it here.
+ case Decl::ObjCAtDefsField:
+ llvm_unreachable("forming non-member reference to ivar?");
+
+ // Enum constants are always r-values and never references.
+ // Unresolved using declarations are dependent.
+ case Decl::EnumConstant:
+ case Decl::UnresolvedUsingValue:
+ case Decl::OMPDeclareReduction:
+ case Decl::OMPDeclareMapper:
+ valueKind = VK_PRValue;
+ break;
+
+ // Fields and indirect fields that got here must be for
+ // pointer-to-member expressions; we just call them l-values for
+ // internal consistency, because this subexpression doesn't really
+ // exist in the high-level semantics.
+ case Decl::Field:
+ case Decl::IndirectField:
+ case Decl::ObjCIvar:
+ assert(getLangOpts().CPlusPlus && "building reference to field in C?");
+
+ // These can't have reference type in well-formed programs, but
+ // for internal consistency we do this anyway.
+ type = type.getNonReferenceType();
+ valueKind = VK_LValue;
+ break;
+
+ // Non-type template parameters are either l-values or r-values
+ // depending on the type.
+ case Decl::NonTypeTemplateParm: {
+ if (const ReferenceType *reftype = type->getAs<ReferenceType>()) {
+ type = reftype->getPointeeType();
+ valueKind = VK_LValue; // even if the parameter is an r-value reference
break;
+ }
- // Fields and indirect fields that got here must be for
- // pointer-to-member expressions; we just call them l-values for
- // internal consistency, because this subexpression doesn't really
- // exist in the high-level semantics.
- case Decl::Field:
- case Decl::IndirectField:
- case Decl::ObjCIvar:
- assert(getLangOpts().CPlusPlus &&
- "building reference to field in C?");
-
- // These can't have reference type in well-formed programs, but
- // for internal consistency we do this anyway.
- type = type.getNonReferenceType();
+ // [expr.prim.id.unqual]p2:
+ // If the entity is a template parameter object for a template
+ // parameter of type T, the type of the expression is const T.
+ // [...] The expression is an lvalue if the entity is a [...] template
+ // parameter object.
+ if (type->isRecordType()) {
+ type = type.getUnqualifiedType().withConst();
valueKind = VK_LValue;
break;
+ }
- // Non-type template parameters are either l-values or r-values
- // depending on the type.
- case Decl::NonTypeTemplateParm: {
- if (const ReferenceType *reftype = type->getAs<ReferenceType>()) {
- type = reftype->getPointeeType();
- valueKind = VK_LValue; // even if the parameter is an r-value reference
- break;
- }
-
- // [expr.prim.id.unqual]p2:
- // If the entity is a template parameter object for a template
- // parameter of type T, the type of the expression is const T.
- // [...] The expression is an lvalue if the entity is a [...] template
- // parameter object.
- if (type->isRecordType()) {
- type = type.getUnqualifiedType().withConst();
- valueKind = VK_LValue;
- break;
- }
+ // For non-references, we need to strip qualifiers just in case
+ // the template parameter was declared as 'const int' or whatever.
+ valueKind = VK_PRValue;
+ type = type.getUnqualifiedType();
+ break;
+ }
- // For non-references, we need to strip qualifiers just in case
- // the template parameter was declared as 'const int' or whatever.
+ case Decl::Var:
+ case Decl::VarTemplateSpecialization:
+ case Decl::VarTemplatePartialSpecialization:
+ case Decl::Decomposition:
+ case Decl::OMPCapturedExpr:
+ // In C, "extern void blah;" is valid and is an r-value.
+ if (!getLangOpts().CPlusPlus && !type.hasQualifiers() &&
+ type->isVoidType()) {
valueKind = VK_PRValue;
- type = type.getUnqualifiedType();
break;
}
+ LLVM_FALLTHROUGH;
- case Decl::Var:
- case Decl::VarTemplateSpecialization:
- case Decl::VarTemplatePartialSpecialization:
- case Decl::Decomposition:
- case Decl::OMPCapturedExpr:
- // In C, "extern void blah;" is valid and is an r-value.
- if (!getLangOpts().CPlusPlus &&
- !type.hasQualifiers() &&
- type->isVoidType()) {
- valueKind = VK_PRValue;
- break;
- }
- LLVM_FALLTHROUGH;
-
- case Decl::ImplicitParam:
- case Decl::ParmVar: {
- // These are always l-values.
- valueKind = VK_LValue;
- type = type.getNonReferenceType();
-
- // FIXME: Does the addition of const really only apply in
- // potentially-evaluated contexts? Since the variable isn't actually
- // captured in an unevaluated context, it seems that the answer is no.
- if (!isUnevaluatedContext()) {
- QualType CapturedType = getCapturedDeclRefType(cast<VarDecl>(VD), Loc);
- if (!CapturedType.isNull())
- type = CapturedType;
- }
+ case Decl::ImplicitParam:
+ case Decl::ParmVar: {
+ // These are always l-values.
+ valueKind = VK_LValue;
+ type = type.getNonReferenceType();
- break;
+ // FIXME: Does the addition of const really only apply in
+ // potentially-evaluated contexts? Since the variable isn't actually
+ // captured in an unevaluated context, it seems that the answer is no.
+ if (!isUnevaluatedContext()) {
+ QualType CapturedType = getCapturedDeclRefType(cast<VarDecl>(VD), Loc);
+ if (!CapturedType.isNull())
+ type = CapturedType;
}
- case Decl::Binding: {
- // These are always lvalues.
- valueKind = VK_LValue;
- type = type.getNonReferenceType();
- // FIXME: Support lambda-capture of BindingDecls, once CWG actually
- // decides how that's supposed to work.
- auto *BD = cast<BindingDecl>(VD);
- if (BD->getDeclContext() != CurContext) {
- auto *DD = dyn_cast_or_null<VarDecl>(BD->getDecomposedDecl());
- if (DD && DD->hasLocalStorage())
- diagnoseUncapturableValueReference(*this, Loc, BD, CurContext);
- }
- break;
- }
-
- case Decl::Function: {
- if (unsigned BID = cast<FunctionDecl>(VD)->getBuiltinID()) {
- if (!Context.BuiltinInfo.isPredefinedLibFunction(BID)) {
- type = Context.BuiltinFnTy;
- valueKind = VK_PRValue;
- break;
- }
- }
+ break;
+ }
- const FunctionType *fty = type->castAs<FunctionType>();
+ case Decl::Binding: {
+ // These are always lvalues.
+ valueKind = VK_LValue;
+ type = type.getNonReferenceType();
+ // FIXME: Support lambda-capture of BindingDecls, once CWG actually
+ // decides how that's supposed to work.
+ auto *BD = cast<BindingDecl>(VD);
+ if (BD->getDeclContext() != CurContext) {
+ auto *DD = dyn_cast_or_null<VarDecl>(BD->getDecomposedDecl());
+ if (DD && DD->hasLocalStorage())
+ diagnoseUncapturableValueReference(*this, Loc, BD);
+ }
+ break;
+ }
- // If we're referring to a function with an __unknown_anytype
- // result type, make the entire expression __unknown_anytype.
- if (fty->getReturnType() == Context.UnknownAnyTy) {
- type = Context.UnknownAnyTy;
+ case Decl::Function: {
+ if (unsigned BID = cast<FunctionDecl>(VD)->getBuiltinID()) {
+ if (!Context.BuiltinInfo.isPredefinedLibFunction(BID)) {
+ type = Context.BuiltinFnTy;
valueKind = VK_PRValue;
break;
}
+ }
- // Functions are l-values in C++.
- if (getLangOpts().CPlusPlus) {
- valueKind = VK_LValue;
- break;
- }
+ const FunctionType *fty = type->castAs<FunctionType>();
- // C99 DR 316 says that, if a function type comes from a
- // function definition (without a prototype), that type is only
- // used for checking compatibility. Therefore, when referencing
- // the function, we pretend that we don't have the full function
- // type.
- if (!cast<FunctionDecl>(VD)->hasPrototype() &&
- isa<FunctionProtoType>(fty))
- type = Context.getFunctionNoProtoType(fty->getReturnType(),
- fty->getExtInfo());
-
- // Functions are r-values in C.
+ // If we're referring to a function with an __unknown_anytype
+ // result type, make the entire expression __unknown_anytype.
+ if (fty->getReturnType() == Context.UnknownAnyTy) {
+ type = Context.UnknownAnyTy;
valueKind = VK_PRValue;
break;
}
- case Decl::CXXDeductionGuide:
- llvm_unreachable("building reference to deduction guide");
-
- case Decl::MSProperty:
- case Decl::MSGuid:
- case Decl::TemplateParamObject:
- // FIXME: Should MSGuidDecl and template parameter objects be subject to
- // capture in OpenMP, or duplicated between host and device?
+ // Functions are l-values in C++.
+ if (getLangOpts().CPlusPlus) {
valueKind = VK_LValue;
break;
+ }
- case Decl::CXXMethod:
- // If we're referring to a method with an __unknown_anytype
- // result type, make the entire expression __unknown_anytype.
- // This should only be possible with a type written directly.
- if (const FunctionProtoType *proto
- = dyn_cast<FunctionProtoType>(VD->getType()))
- if (proto->getReturnType() == Context.UnknownAnyTy) {
- type = Context.UnknownAnyTy;
- valueKind = VK_PRValue;
- break;
- }
+ // C99 DR 316 says that, if a function type comes from a
+ // function definition (without a prototype), that type is only
+ // used for checking compatibility. Therefore, when referencing
+ // the function, we pretend that we don't have the full function
+ // type.
+ if (!cast<FunctionDecl>(VD)->hasPrototype() && isa<FunctionProtoType>(fty))
+ type = Context.getFunctionNoProtoType(fty->getReturnType(),
+ fty->getExtInfo());
+
+ // Functions are r-values in C.
+ valueKind = VK_PRValue;
+ break;
+ }
- // C++ methods are l-values if static, r-values if non-static.
- if (cast<CXXMethodDecl>(VD)->isStatic()) {
- valueKind = VK_LValue;
+ case Decl::CXXDeductionGuide:
+ llvm_unreachable("building reference to deduction guide");
+
+ case Decl::MSProperty:
+ case Decl::MSGuid:
+ case Decl::TemplateParamObject:
+ // FIXME: Should MSGuidDecl and template parameter objects be subject to
+ // capture in OpenMP, or duplicated between host and device?
+ valueKind = VK_LValue;
+ break;
+
+ case Decl::CXXMethod:
+ // If we're referring to a method with an __unknown_anytype
+ // result type, make the entire expression __unknown_anytype.
+ // This should only be possible with a type written directly.
+ if (const FunctionProtoType *proto =
+ dyn_cast<FunctionProtoType>(VD->getType()))
+ if (proto->getReturnType() == Context.UnknownAnyTy) {
+ type = Context.UnknownAnyTy;
+ valueKind = VK_PRValue;
break;
}
- LLVM_FALLTHROUGH;
- case Decl::CXXConversion:
- case Decl::CXXDestructor:
- case Decl::CXXConstructor:
- valueKind = VK_PRValue;
+ // C++ methods are l-values if static, r-values if non-static.
+ if (cast<CXXMethodDecl>(VD)->isStatic()) {
+ valueKind = VK_LValue;
break;
}
+ LLVM_FALLTHROUGH;
- return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD,
- /*FIXME: TemplateKWLoc*/ SourceLocation(),
- TemplateArgs);
+ case Decl::CXXConversion:
+ case Decl::CXXDestructor:
+ case Decl::CXXConstructor:
+ valueKind = VK_PRValue;
+ break;
}
+
+ return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD,
+ /*FIXME: TemplateKWLoc*/ SourceLocation(),
+ TemplateArgs);
}
static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source,
@@ -3832,7 +3813,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
llvm::APInt Val(bit_width, 0, isSigned);
bool Overflowed = Literal.GetFixedPointValue(Val, scale);
- bool ValIsZero = Val.isNullValue() && !Overflowed;
+ bool ValIsZero = Val.isZero() && !Overflowed;
auto MaxVal = Context.getFixedPointMax(Ty).getValue();
if (Literal.isFract && Val == MaxVal + 1 && !ValIsZero)
@@ -3877,7 +3858,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
"cl_khr_fp64", getLangOpts())) {
// Impose single-precision float type when cl_khr_fp64 is not enabled.
Diag(Tok.getLocation(), diag::warn_double_const_requires_fp64)
- << (getLangOpts().OpenCLVersion >= 300);
+ << (getLangOpts().getOpenCLCompatibleVersion() >= 300);
Res = ImpCastExprToType(Res, Context.FloatTy, CK_FloatingCast).get();
}
}
@@ -5273,7 +5254,7 @@ ExprResult Sema::ActOnOMPIteratorExpr(Scope *S, SourceLocation IteratorKwLoc,
// OpenMP 5.0, 2.1.6 Iterators, Restrictions
// If the step expression of a range-specification equals zero, the
// behavior is unspecified.
- if (Result && Result->isNullValue()) {
+ if (Result && Result->isZero()) {
Diag(Step->getExprLoc(), diag::err_omp_iterator_step_constant_zero)
<< Step << Step->getSourceRange();
IsCorrect = false;
@@ -5728,7 +5709,7 @@ Sema::VariadicCallType
Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto,
Expr *Fn) {
if (Proto && Proto->isVariadic()) {
- if (dyn_cast_or_null<CXXConstructorDecl>(FDecl))
+ if (isa_and_nonnull<CXXConstructorDecl>(FDecl))
return VariadicConstructor;
else if (Fn && Fn->getType()->isBlockPointerType())
return VariadicBlock;
@@ -6249,14 +6230,12 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
QualType OverloadTy = Context.getFunctionType(FT->getReturnType(),
OverloadParams, EPI);
DeclContext *Parent = FDecl->getParent();
- FunctionDecl *OverloadDecl = FunctionDecl::Create(Context, Parent,
- FDecl->getLocation(),
- FDecl->getLocation(),
- FDecl->getIdentifier(),
- OverloadTy,
- /*TInfo=*/nullptr,
- SC_Extern, false,
- /*hasPrototype=*/true);
+ FunctionDecl *OverloadDecl = FunctionDecl::Create(
+ Context, Parent, FDecl->getLocation(), FDecl->getLocation(),
+ FDecl->getIdentifier(), OverloadTy,
+ /*TInfo=*/nullptr, SC_Extern, Sema->getCurFPFeatures().isFPConstrained(),
+ false,
+ /*hasPrototype=*/true);
SmallVector<ParmVarDecl*, 16> Params;
FT = cast<FunctionProtoType>(OverloadTy);
for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
@@ -6476,7 +6455,8 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
if (Fn->getType() == Context.BoundMemberTy) {
return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
- RParenLoc, AllowRecovery);
+ RParenLoc, ExecConfig, IsExecConfig,
+ AllowRecovery);
}
}
@@ -6495,7 +6475,8 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
Scope, Fn, ULE, LParenLoc, ArgExprs, RParenLoc, ExecConfig,
/*AllowTypoCorrection=*/true, find.IsAddressOfOperand);
return BuildCallToMemberFunction(Scope, Fn, LParenLoc, ArgExprs,
- RParenLoc, AllowRecovery);
+ RParenLoc, ExecConfig, IsExecConfig,
+ AllowRecovery);
}
}
@@ -6543,6 +6524,57 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
return ExprError();
checkDirectCallValidity(*this, Fn, FD, ArgExprs);
+
+ // If this expression is a call to a builtin function in HIP device
+ // compilation, allow a pointer-type argument to default address space to be
+ // passed as a pointer-type parameter to a non-default address space.
+ // If Arg is declared in the default address space and Param is declared
+ // in a non-default address space, perform an implicit address space cast to
+ // the parameter type.
+ if (getLangOpts().HIP && getLangOpts().CUDAIsDevice && FD &&
+ FD->getBuiltinID()) {
+ for (unsigned Idx = 0; Idx < FD->param_size(); ++Idx) {
+ ParmVarDecl *Param = FD->getParamDecl(Idx);
+ if (!ArgExprs[Idx] || !Param || !Param->getType()->isPointerType() ||
+ !ArgExprs[Idx]->getType()->isPointerType())
+ continue;
+
+ auto ParamAS = Param->getType()->getPointeeType().getAddressSpace();
+ auto ArgTy = ArgExprs[Idx]->getType();
+ auto ArgPtTy = ArgTy->getPointeeType();
+ auto ArgAS = ArgPtTy.getAddressSpace();
+
+ // Add address space cast if target address spaces are different
+ bool NeedImplicitASC =
+ ParamAS != LangAS::Default && // Pointer params in generic AS don't need special handling.
+ ( ArgAS == LangAS::Default || // We do allow implicit conversion from generic AS
+ // or from specific AS which has target AS matching that of Param.
+ getASTContext().getTargetAddressSpace(ArgAS) == getASTContext().getTargetAddressSpace(ParamAS));
+ if (!NeedImplicitASC)
+ continue;
+
+ // First, ensure that the Arg is an RValue.
+ if (ArgExprs[Idx]->isGLValue()) {
+ ArgExprs[Idx] = ImplicitCastExpr::Create(
+ Context, ArgExprs[Idx]->getType(), CK_NoOp, ArgExprs[Idx],
+ nullptr, VK_PRValue, FPOptionsOverride());
+ }
+
+ // Construct a new arg type with address space of Param
+ Qualifiers ArgPtQuals = ArgPtTy.getQualifiers();
+ ArgPtQuals.setAddressSpace(ParamAS);
+ auto NewArgPtTy =
+ Context.getQualifiedType(ArgPtTy.getUnqualifiedType(), ArgPtQuals);
+ auto NewArgTy =
+ Context.getQualifiedType(Context.getPointerType(NewArgPtTy),
+ ArgTy.getQualifiers());
+
+ // Finally perform an implicit address space cast
+ ArgExprs[Idx] = ImpCastExprToType(ArgExprs[Idx], NewArgTy,
+ CK_AddressSpaceConversion)
+ .get();
+ }
+ }
}
if (Context.isDependenceAllowed() &&
@@ -7706,6 +7738,9 @@ ExprResult Sema::BuildVectorLiteral(SourceLocation LParenLoc,
// initializers must be one or must match the size of the vector.
// If a single value is specified in the initializer then it will be
// replicated to all the components of the vector
+ if (CheckAltivecInitFromScalar(E->getSourceRange(), Ty,
+ VTy->getElementType()))
+ return ExprError();
if (ShouldSplatAltivecScalarInCast(VTy)) {
// The number of initializers must be one or must match the size of the
// vector. If a single value is specified in the initializer then it will
@@ -8334,8 +8369,8 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
QualType LHSTy = LHS.get()->getType();
QualType RHSTy = RHS.get()->getType();
- // Diagnose attempts to convert between __float128 and long double where
- // such conversions currently can't be handled.
+ // Diagnose attempts to convert between __ibm128, __float128 and long double
+ // where such conversions currently can't be handled.
if (unsupportedTypeConversion(*this, LHSTy, RHSTy)) {
Diag(QuestionLoc,
diag::err_typecheck_cond_incompatible_operands) << LHSTy << RHSTy
@@ -8346,7 +8381,7 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
// OpenCL v2.0 s6.12.5 - Blocks cannot be used as expressions of the ternary
// selection operator (?:).
if (getLangOpts().OpenCL &&
- (checkBlockType(*this, LHS.get()) | checkBlockType(*this, RHS.get()))) {
+ ((int)checkBlockType(*this, LHS.get()) | (int)checkBlockType(*this, RHS.get()))) {
return QualType();
}
@@ -9269,8 +9304,8 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
return Incompatible;
}
- // Diagnose attempts to convert between __float128 and long double where
- // such conversions currently can't be handled.
+ // Diagnose attempts to convert between __ibm128, __float128 and long double
+ // where such conversions currently can't be handled.
if (unsupportedTypeConversion(*this, LHSType, RHSType))
return Incompatible;
@@ -12191,7 +12226,7 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
return computeResultTy();
}
- if (getLangOpts().OpenCLVersion >= 200 || getLangOpts().OpenCLCPlusPlus) {
+ if (getLangOpts().getOpenCLCompatibleVersion() >= 200) {
if (LHSType->isClkEventT() && RHSType->isClkEventT()) {
return computeResultTy();
}
@@ -12398,8 +12433,7 @@ static void diagnoseXorMisusedAsPow(Sema &S, const ExprResult &XorLHS,
RHSStrRef.startswith("0x") || RHSStrRef.startswith("0X") ||
(LHSStrRef.size() > 1 && LHSStrRef.startswith("0")) ||
(RHSStrRef.size() > 1 && RHSStrRef.startswith("0")) ||
- LHSStrRef.find('\'') != StringRef::npos ||
- RHSStrRef.find('\'') != StringRef::npos)
+ LHSStrRef.contains('\'') || RHSStrRef.contains('\''))
return;
bool SuggestXor =
@@ -12450,8 +12484,9 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
/*AllowBoolConversions*/false);
if (vType.isNull())
return InvalidOperands(Loc, LHS, RHS);
- if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 &&
- !getLangOpts().OpenCLCPlusPlus && vType->hasFloatingRepresentation())
+ if (getLangOpts().OpenCL &&
+ getLangOpts().getOpenCLCompatibleVersion() < 120 &&
+ vType->hasFloatingRepresentation())
return InvalidOperands(Loc, LHS, RHS);
// FIXME: The check for C++ here is for GCC compatibility. GCC rejects the
// usage of the logical operators && and || with vectors in C. This
@@ -12913,7 +12948,7 @@ static void DiagnoseRecursiveConstFields(Sema &S, const ValueDecl *VD,
// Then we append it to the list to check next in order.
FieldTy = FieldTy.getCanonicalType();
if (const auto *FieldRecTy = FieldTy->getAs<RecordType>()) {
- if (llvm::find(RecordTypeList, FieldRecTy) == RecordTypeList.end())
+ if (!llvm::is_contained(RecordTypeList, FieldRecTy))
RecordTypeList.push_back(FieldRecTy);
}
}
@@ -13340,7 +13375,7 @@ static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS,
if (LHS.isInvalid())
return QualType();
- S.DiagnoseUnusedExprResult(LHS.get());
+ S.DiagnoseUnusedExprResult(LHS.get(), diag::warn_unused_comma_left_operand);
if (!S.getLangOpts().CPlusPlus) {
RHS = S.DefaultFunctionArrayLvalueConversion(RHS.get());
@@ -14132,6 +14167,9 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
}
}
+ checkTypeSupport(LHSExpr->getType(), OpLoc, /*ValueDecl*/ nullptr);
+ checkTypeSupport(RHSExpr->getType(), OpLoc, /*ValueDecl*/ nullptr);
+
switch (Opc) {
case BO_Assign:
ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType());
@@ -14902,8 +14940,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
}
} else if (resultType->isExtVectorType()) {
if (Context.getLangOpts().OpenCL &&
- Context.getLangOpts().OpenCLVersion < 120 &&
- !Context.getLangOpts().OpenCLCPlusPlus) {
+ Context.getLangOpts().getOpenCLCompatibleVersion() < 120) {
// OpenCL v1.1 6.3.h: The logical operator not (!) does not
// operate on vector float types.
QualType T = resultType->castAs<ExtVectorType>()->getElementType();
@@ -15683,7 +15720,7 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
if (!Result.isInvalid()) {
Result = PerformCopyInitialization(
InitializedEntity::InitializeBlock(Var->getLocation(),
- Cap.getCaptureType(), false),
+ Cap.getCaptureType()),
Loc, Result.get());
}
@@ -16602,15 +16639,15 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) {
if (auto *BO = dyn_cast<BinaryOperator>(E->IgnoreParenImpCasts())) {
if (BO->getOpcode() == BO_Assign) {
auto &LHSs = ExprEvalContexts.back().VolatileAssignmentLHSs;
- LHSs.erase(std::remove(LHSs.begin(), LHSs.end(), BO->getLHS()),
- LHSs.end());
+ llvm::erase_value(LHSs, BO->getLHS());
}
}
}
ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) {
- if (!E.isUsable() || !Decl || !Decl->isConsteval() || isConstantEvaluated() ||
- RebuildingImmediateInvocation)
+ if (isUnevaluatedContext() || !E.isUsable() || !Decl ||
+ !Decl->isConsteval() || isConstantEvaluated() ||
+ RebuildingImmediateInvocation || isImmediateFunctionContext())
return E;
/// Opportunistically remove the callee from ReferencesToConsteval if we can.
@@ -16881,6 +16918,8 @@ static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) {
// An expression or conversion is potentially constant evaluated if it is
switch (SemaRef.ExprEvalContexts.back().Context) {
case Sema::ExpressionEvaluationContext::ConstantEvaluated:
+ case Sema::ExpressionEvaluationContext::ImmediateFunctionContext:
+
// -- a manifestly constant-evaluated expression,
case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
@@ -17003,6 +17042,7 @@ static OdrUseContext isOdrUseContext(Sema &SemaRef) {
return OdrUseContext::None;
case Sema::ExpressionEvaluationContext::ConstantEvaluated:
+ case Sema::ExpressionEvaluationContext::ImmediateFunctionContext:
case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
Result = OdrUseContext::Used;
break;
@@ -17351,9 +17391,8 @@ void Sema::MarkCaptureUsedInEnclosingContext(VarDecl *Capture,
MarkVarDeclODRUsed(Capture, Loc, *this, &CapturingScopeIndex);
}
-static void
-diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
- ValueDecl *var, DeclContext *DC) {
+static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
+ ValueDecl *var) {
DeclContext *VarDC = var->getDeclContext();
// If the parameter still belongs to the translation unit, then
@@ -17432,7 +17471,7 @@ static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl *
return getLambdaAwareParentOfDeclContext(DC);
else if (Var->hasLocalStorage()) {
if (Diagnose)
- diagnoseUncapturableValueReference(S, Loc, Var, DC);
+ diagnoseUncapturableValueReference(S, Loc, Var);
}
return nullptr;
}
@@ -17906,7 +17945,7 @@ bool Sema::tryCaptureVariable(
Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl);
buildLambdaCaptureFixit(*this, LSI, Var);
} else
- diagnoseUncapturableValueReference(*this, ExprLoc, Var, DC);
+ diagnoseUncapturableValueReference(*this, ExprLoc, Var);
}
return true;
}
@@ -18270,7 +18309,6 @@ static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E,
ME->getQualifierLoc(), ME->getTemplateKeywordLoc(), ME->getMemberDecl(),
ME->getFoundDecl(), ME->getMemberNameInfo(), CopiedTemplateArgs(ME),
ME->getType(), ME->getValueKind(), ME->getObjectKind(), NOUR);
- return ExprEmpty();
}
case Expr::BinaryOperatorClass: {
@@ -18726,8 +18764,8 @@ void Sema::MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base) {
OdrUse = false;
if (auto *FD = dyn_cast<FunctionDecl>(E->getDecl()))
- if (!isConstantEvaluated() && FD->isConsteval() &&
- !RebuildingImmediateInvocation)
+ if (!isUnevaluatedContext() && !isConstantEvaluated() &&
+ FD->isConsteval() && !RebuildingImmediateInvocation)
ExprEvalContexts.back().ReferenceToConsteval.insert(E);
MarkExprReferenced(*this, E->getLocation(), E->getDecl(), E, OdrUse,
RefsMinusAssignments);
@@ -18829,14 +18867,22 @@ class EvaluatedExprMarker : public UsedDeclVisitor<EvaluatedExprMarker> {
public:
typedef UsedDeclVisitor<EvaluatedExprMarker> Inherited;
bool SkipLocalVariables;
+ ArrayRef<const Expr *> StopAt;
- EvaluatedExprMarker(Sema &S, bool SkipLocalVariables)
- : Inherited(S), SkipLocalVariables(SkipLocalVariables) {}
+ EvaluatedExprMarker(Sema &S, bool SkipLocalVariables,
+ ArrayRef<const Expr *> StopAt)
+ : Inherited(S), SkipLocalVariables(SkipLocalVariables), StopAt(StopAt) {}
void visitUsedDecl(SourceLocation Loc, Decl *D) {
S.MarkFunctionReferenced(Loc, cast<FunctionDecl>(D));
}
+ void Visit(Expr *E) {
+ if (std::find(StopAt.begin(), StopAt.end(), E) != StopAt.end())
+ return;
+ Inherited::Visit(E);
+ }
+
void VisitDeclRefExpr(DeclRefExpr *E) {
// If we were asked not to visit local variables, don't.
if (SkipLocalVariables) {
@@ -18863,9 +18909,43 @@ public:
///
/// \param SkipLocalVariables If true, don't mark local variables as
/// 'referenced'.
+/// \param StopAt Subexpressions that we shouldn't recurse into.
void Sema::MarkDeclarationsReferencedInExpr(Expr *E,
- bool SkipLocalVariables) {
- EvaluatedExprMarker(*this, SkipLocalVariables).Visit(E);
+ bool SkipLocalVariables,
+ ArrayRef<const Expr*> StopAt) {
+ EvaluatedExprMarker(*this, SkipLocalVariables, StopAt).Visit(E);
+}
+
+/// Emit a diagnostic when statements are reachable.
+/// FIXME: check for reachability even in expressions for which we don't build a
+/// CFG (eg, in the initializer of a global or in a constant expression).
+/// For example,
+/// namespace { auto *p = new double[3][false ? (1, 2) : 3]; }
+bool Sema::DiagIfReachable(SourceLocation Loc, ArrayRef<const Stmt *> Stmts,
+ const PartialDiagnostic &PD) {
+ if (!Stmts.empty() && getCurFunctionOrMethodDecl()) {
+ if (!FunctionScopes.empty())
+ FunctionScopes.back()->PossiblyUnreachableDiags.push_back(
+ sema::PossiblyUnreachableDiag(PD, Loc, Stmts));
+ return true;
+ }
+
+ // The initializer of a constexpr variable or of the first declaration of a
+ // static data member is not syntactically a constant evaluated constant,
+ // but nonetheless is always required to be a constant expression, so we
+ // can skip diagnosing.
+ // FIXME: Using the mangling context here is a hack.
+ if (auto *VD = dyn_cast_or_null<VarDecl>(
+ ExprEvalContexts.back().ManglingContextDecl)) {
+ if (VD->isConstexpr() ||
+ (VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline()))
+ return false;
+ // FIXME: For any other kind of variable, we should build a CFG for its
+ // initializer and check whether the context in question is reachable.
+ }
+
+ Diag(Loc, PD);
+ return true;
}
/// Emit a diagnostic that describes an effect on the run-time behavior
@@ -18895,33 +18975,13 @@ bool Sema::DiagRuntimeBehavior(SourceLocation Loc, ArrayRef<const Stmt*> Stmts,
break;
case ExpressionEvaluationContext::ConstantEvaluated:
+ case ExpressionEvaluationContext::ImmediateFunctionContext:
// Relevant diagnostics should be produced by constant evaluation.
break;
case ExpressionEvaluationContext::PotentiallyEvaluated:
case ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
- if (!Stmts.empty() && getCurFunctionOrMethodDecl()) {
- FunctionScopes.back()->PossiblyUnreachableDiags.
- push_back(sema::PossiblyUnreachableDiag(PD, Loc, Stmts));
- return true;
- }
-
- // The initializer of a constexpr variable or of the first declaration of a
- // static data member is not syntactically a constant evaluated constant,
- // but nonetheless is always required to be a constant expression, so we
- // can skip diagnosing.
- // FIXME: Using the mangling context here is a hack.
- if (auto *VD = dyn_cast_or_null<VarDecl>(
- ExprEvalContexts.back().ManglingContextDecl)) {
- if (VD->isConstexpr() ||
- (VD->isStaticDataMember() && VD->isFirstDecl() && !VD->isInline()))
- break;
- // FIXME: For any other kind of variable, we should build a CFG for its
- // initializer and check whether the context in question is reachable.
- }
-
- Diag(Loc, PD);
- return true;
+ return DiagIfReachable(Loc, Stmts, PD);
}
return false;
@@ -19371,14 +19431,7 @@ ExprResult RebuildUnknownAnyExpr::VisitCallExpr(CallExpr *E) {
if (ParamTypes.empty() && Proto->isVariadic()) { // the special case
ArgTypes.reserve(E->getNumArgs());
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
- Expr *Arg = E->getArg(i);
- QualType ArgType = Arg->getType();
- if (E->isLValue()) {
- ArgType = S.Context.getLValueReferenceType(ArgType);
- } else if (E->isXValue()) {
- ArgType = S.Context.getRValueReferenceType(ArgType);
- }
- ArgTypes.push_back(ArgType);
+ ArgTypes.push_back(S.Context.getReferenceQualifiedType(E->getArg(i)));
}
ParamTypes = ArgTypes;
}
@@ -19505,7 +19558,8 @@ ExprResult RebuildUnknownAnyExpr::resolveDecl(Expr *E, ValueDecl *VD) {
FunctionDecl *NewFD = FunctionDecl::Create(
S.Context, FD->getDeclContext(), Loc, Loc,
FD->getNameInfo().getName(), DestType, FD->getTypeSourceInfo(),
- SC_None, false /*isInlineSpecified*/, FD->hasPrototype(),
+ SC_None, S.getCurFPFeatures().isFPConstrained(),
+ false /*isInlineSpecified*/, FD->hasPrototype(),
/*ConstexprKind*/ ConstexprSpecKind::Unspecified);
if (FD->getQualifier())
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 111ffa1f04a0..891909c443cc 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -468,7 +468,7 @@ ParsedType Sema::getDestructorTypeForDecltype(const DeclSpec &DS,
assert(DS.getTypeSpecType() == DeclSpec::TST_decltype &&
"unexpected type in getDestructorType");
- QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc());
+ QualType T = BuildDecltypeType(DS.getRepAsExpr());
// If we know the type of the object, check that the correct destructor
// type was named now; we can give better diagnostics this way.
@@ -494,7 +494,7 @@ bool Sema::checkLiteralOperatorId(const CXXScopeSpec &SS,
IdentifierInfo *II = Name.Identifier;
ReservedIdentifierStatus Status = II->isReserved(PP.getLangOpts());
SourceLocation Loc = Name.getEndLoc();
- if (Status != ReservedIdentifierStatus::NotReserved &&
+ if (isReservedInAllContexts(Status) &&
!PP.getSourceManager().isInSystemHeader(Loc)) {
Diag(Loc, diag::warn_reserved_extern_symbol)
<< II << static_cast<int>(Status)
@@ -893,9 +893,8 @@ ExprResult Sema::BuildCXXThrow(SourceLocation OpLoc, Expr *Ex,
if (CheckCXXThrowOperand(OpLoc, ExceptionObjectTy, Ex))
return ExprError();
- InitializedEntity Entity = InitializedEntity::InitializeException(
- OpLoc, ExceptionObjectTy,
- /*NRVO=*/NRInfo.isCopyElidable());
+ InitializedEntity Entity =
+ InitializedEntity::InitializeException(OpLoc, ExceptionObjectTy);
ExprResult Res = PerformMoveOrCopyInitialization(Entity, NRInfo, Ex);
if (Res.isInvalid())
return ExprError();
@@ -1137,11 +1136,10 @@ static QualType adjustCVQualifiersForCXXThisWithinLambda(
}
}
- // 2) We've run out of ScopeInfos but check if CurDC is a lambda (which can
- // happen during instantiation of its nested generic lambda call operator)
- if (isLambdaCallOperator(CurDC)) {
- assert(CurLSI && "While computing 'this' capture-type for a generic "
- "lambda, we must have a corresponding LambdaScopeInfo");
+ // 2) We've run out of ScopeInfos but check 1. if CurDC is a lambda (which
+ // can happen during instantiation of its nested generic lambda call
+ // operator); 2. if we're in a lambda scope (lambda body).
+ if (CurLSI && isLambdaCallOperator(CurDC)) {
assert(isGenericLambdaCallOperatorSpecialization(CurLSI->CallOperator) &&
"While computing 'this' capture-type for a generic lambda, when we "
"run out of enclosing LSI's, yet the enclosing DC is a "
@@ -1454,7 +1452,8 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
"List initialization must have initializer list as expression.");
SourceRange FullRange = SourceRange(TyBeginLoc, RParenOrBraceLoc);
- InitializedEntity Entity = InitializedEntity::InitializeTemporary(TInfo);
+ InitializedEntity Entity =
+ InitializedEntity::InitializeTemporary(Context, TInfo);
InitializationKind Kind =
Exprs.size()
? ListInitialization
@@ -1968,10 +1967,10 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
if (Deduced && isa<DeducedTemplateSpecializationType>(Deduced)) {
if (ArraySize)
return ExprError(
- Diag(ArraySize ? (*ArraySize)->getExprLoc() : TypeRange.getBegin(),
+ Diag(*ArraySize ? (*ArraySize)->getExprLoc() : TypeRange.getBegin(),
diag::err_deduced_class_template_compound_type)
<< /*array*/ 2
- << (ArraySize ? (*ArraySize)->getSourceRange() : TypeRange));
+ << (*ArraySize ? (*ArraySize)->getSourceRange() : TypeRange));
InitializedEntity Entity
= InitializedEntity::InitializeNew(StartLoc, AllocType);
@@ -2138,39 +2137,38 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
// Let's see if this is a constant < 0. If so, we reject it out of hand,
// per CWG1464. Otherwise, if it's not a constant, we must have an
// unparenthesized array type.
- if (!(*ArraySize)->isValueDependent()) {
- // We've already performed any required implicit conversion to integer or
- // unscoped enumeration type.
- // FIXME: Per CWG1464, we are required to check the value prior to
- // converting to size_t. This will never find a negative array size in
- // C++14 onwards, because Value is always unsigned here!
- if (Optional<llvm::APSInt> Value =
- (*ArraySize)->getIntegerConstantExpr(Context)) {
- if (Value->isSigned() && Value->isNegative()) {
- return ExprError(Diag((*ArraySize)->getBeginLoc(),
- diag::err_typecheck_negative_array_size)
- << (*ArraySize)->getSourceRange());
- }
- if (!AllocType->isDependentType()) {
- unsigned ActiveSizeBits = ConstantArrayType::getNumAddressingBits(
- Context, AllocType, *Value);
- if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context))
- return ExprError(
- Diag((*ArraySize)->getBeginLoc(), diag::err_array_too_large)
- << toString(*Value, 10) << (*ArraySize)->getSourceRange());
- }
-
- KnownArraySize = Value->getZExtValue();
- } else if (TypeIdParens.isValid()) {
- // Can't have dynamic array size when the type-id is in parentheses.
- Diag((*ArraySize)->getBeginLoc(), diag::ext_new_paren_array_nonconst)
- << (*ArraySize)->getSourceRange()
- << FixItHint::CreateRemoval(TypeIdParens.getBegin())
- << FixItHint::CreateRemoval(TypeIdParens.getEnd());
+ // We've already performed any required implicit conversion to integer or
+ // unscoped enumeration type.
+ // FIXME: Per CWG1464, we are required to check the value prior to
+ // converting to size_t. This will never find a negative array size in
+ // C++14 onwards, because Value is always unsigned here!
+ if (Optional<llvm::APSInt> Value =
+ (*ArraySize)->getIntegerConstantExpr(Context)) {
+ if (Value->isSigned() && Value->isNegative()) {
+ return ExprError(Diag((*ArraySize)->getBeginLoc(),
+ diag::err_typecheck_negative_array_size)
+ << (*ArraySize)->getSourceRange());
+ }
- TypeIdParens = SourceRange();
+ if (!AllocType->isDependentType()) {
+ unsigned ActiveSizeBits =
+ ConstantArrayType::getNumAddressingBits(Context, AllocType, *Value);
+ if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context))
+ return ExprError(
+ Diag((*ArraySize)->getBeginLoc(), diag::err_array_too_large)
+ << toString(*Value, 10) << (*ArraySize)->getSourceRange());
}
+
+ KnownArraySize = Value->getZExtValue();
+ } else if (TypeIdParens.isValid()) {
+ // Can't have dynamic array size when the type-id is in parentheses.
+ Diag((*ArraySize)->getBeginLoc(), diag::ext_new_paren_array_nonconst)
+ << (*ArraySize)->getSourceRange()
+ << FixItHint::CreateRemoval(TypeIdParens.getBegin())
+ << FixItHint::CreateRemoval(TypeIdParens.getEnd());
+
+ TypeIdParens = SourceRange();
}
// Note that we do *not* convert the argument in any way. It can
@@ -2248,8 +2246,7 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
}
IntegerLiteral AllocationSizeLiteral(
- Context,
- AllocationSize.getValueOr(llvm::APInt::getNullValue(SizeTyWidth)),
+ Context, AllocationSize.getValueOr(llvm::APInt::getZero(SizeTyWidth)),
SizeTy, SourceLocation());
// Otherwise, if we failed to constant-fold the allocation size, we'll
// just give up and pass-in something opaque, that isn't a null pointer.
@@ -2594,10 +2591,9 @@ bool Sema::FindAllocationFunctions(SourceLocation StartLoc, SourceRange Range,
// FIXME: Should the Sema create the expression and embed it in the syntax
// tree? Or should the consumer just recalculate the value?
// FIXME: Using a dummy value will interact poorly with attribute enable_if.
- IntegerLiteral Size(Context, llvm::APInt::getNullValue(
- Context.getTargetInfo().getPointerWidth(0)),
- Context.getSizeType(),
- SourceLocation());
+ IntegerLiteral Size(
+ Context, llvm::APInt::getZero(Context.getTargetInfo().getPointerWidth(0)),
+ Context.getSizeType(), SourceLocation());
AllocArgs.push_back(&Size);
QualType AlignValT = Context.VoidTy;
@@ -3049,6 +3045,9 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
EPI.ExceptionSpec.Type = EST_Dynamic;
EPI.ExceptionSpec.Exceptions = llvm::makeArrayRef(BadAllocType);
}
+ if (getLangOpts().NewInfallible) {
+ EPI.ExceptionSpec.Type = EST_DynamicNone;
+ }
} else {
EPI.ExceptionSpec =
getLangOpts().CPlusPlus11 ? EST_BasicNoexcept : EST_DynamicNone;
@@ -3057,12 +3056,17 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
auto CreateAllocationFunctionDecl = [&](Attr *ExtraAttr) {
QualType FnType = Context.getFunctionType(Return, Params, EPI);
FunctionDecl *Alloc = FunctionDecl::Create(
- Context, GlobalCtx, SourceLocation(), SourceLocation(), Name,
- FnType, /*TInfo=*/nullptr, SC_None, false, true);
+ Context, GlobalCtx, SourceLocation(), SourceLocation(), Name, FnType,
+ /*TInfo=*/nullptr, SC_None, getCurFPFeatures().isFPConstrained(), false,
+ true);
Alloc->setImplicit();
// Global allocation functions should always be visible.
Alloc->setVisibleDespiteOwningModule();
+ if (HasBadAllocExceptionSpec && getLangOpts().NewInfallible)
+ Alloc->addAttr(
+ ReturnsNonNullAttr::CreateImplicit(Context, Alloc->getLocation()));
+
Alloc->addAttr(VisibilityAttr::CreateImplicit(
Context, LangOpts.GlobalAllocationFunctionVisibilityHidden
? VisibilityAttr::Hidden
@@ -5285,7 +5289,8 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc,
S, Sema::ExpressionEvaluationContext::Unevaluated);
Sema::SFINAETrap SFINAE(S, /*AccessCheckingSFINAE=*/true);
Sema::ContextRAII TUContext(S, S.Context.getTranslationUnitDecl());
- InitializedEntity To(InitializedEntity::InitializeTemporary(Args[0]));
+ InitializedEntity To(
+ InitializedEntity::InitializeTemporary(S.Context, Args[0]));
InitializationKind InitKind(InitializationKind::CreateDirect(KWLoc, KWLoc,
RParenLoc));
InitializationSequence Init(S, To, InitKind, ArgExprs);
@@ -5891,10 +5896,8 @@ static bool TryClassUnification(Sema &Self, Expr *From, Expr *To,
// -- If E2 is an xvalue: E1 can be converted to match E2 if E1 can be
// implicitly converted to the type "rvalue reference to R2", subject to
// the constraint that the reference must bind directly.
- if (To->isLValue() || To->isXValue()) {
- QualType T = To->isLValue() ? Self.Context.getLValueReferenceType(ToType)
- : Self.Context.getRValueReferenceType(ToType);
-
+ if (To->isGLValue()) {
+ QualType T = Self.Context.getReferenceQualifiedType(To);
InitializedEntity Entity = InitializedEntity::InitializeTemporary(T);
InitializationSequence InitSeq(Self, Entity, Kind, From);
@@ -6670,8 +6673,15 @@ QualType Sema::FindCompositePointerType(SourceLocation Loc,
} else if (Steps.size() == 1) {
bool MaybeQ1 = Q1.isAddressSpaceSupersetOf(Q2);
bool MaybeQ2 = Q2.isAddressSpaceSupersetOf(Q1);
- if (MaybeQ1 == MaybeQ2)
- return QualType(); // No unique best address space.
+ if (MaybeQ1 == MaybeQ2) {
+ // Exception for ptr size address spaces. Should be able to choose
+ // either address space during comparison.
+ if (isPtrSizeAddressSpace(Q1.getAddressSpace()) ||
+ isPtrSizeAddressSpace(Q2.getAddressSpace()))
+ MaybeQ1 = true;
+ else
+ return QualType(); // No unique best address space.
+ }
Quals.setAddressSpace(MaybeQ1 ? Q1.getAddressSpace()
: Q2.getAddressSpace());
} else {
@@ -6700,6 +6710,36 @@ QualType Sema::FindCompositePointerType(SourceLocation Loc,
}
// FIXME: Can we unify the following with UnwrapSimilarTypes?
+
+ const ArrayType *Arr1, *Arr2;
+ if ((Arr1 = Context.getAsArrayType(Composite1)) &&
+ (Arr2 = Context.getAsArrayType(Composite2))) {
+ auto *CAT1 = dyn_cast<ConstantArrayType>(Arr1);
+ auto *CAT2 = dyn_cast<ConstantArrayType>(Arr2);
+ if (CAT1 && CAT2 && CAT1->getSize() == CAT2->getSize()) {
+ Composite1 = Arr1->getElementType();
+ Composite2 = Arr2->getElementType();
+ Steps.emplace_back(Step::Array, CAT1);
+ continue;
+ }
+ bool IAT1 = isa<IncompleteArrayType>(Arr1);
+ bool IAT2 = isa<IncompleteArrayType>(Arr2);
+ if ((IAT1 && IAT2) ||
+ (getLangOpts().CPlusPlus20 && (IAT1 != IAT2) &&
+ ((bool)CAT1 != (bool)CAT2) &&
+ (Steps.empty() || Steps.back().K != Step::Array))) {
+ // In C++20 onwards, we can unify an array of N T with an array of
+ // a different or unknown bound. But we can't form an array whose
+ // element type is an array of unknown bound by doing so.
+ Composite1 = Arr1->getElementType();
+ Composite2 = Arr2->getElementType();
+ Steps.emplace_back(Step::Array);
+ if (CAT1 || CAT2)
+ NeedConstBefore = Steps.size();
+ continue;
+ }
+ }
+
const PointerType *Ptr1, *Ptr2;
if ((Ptr1 = Composite1->getAs<PointerType>()) &&
(Ptr2 = Composite2->getAs<PointerType>())) {
@@ -6760,8 +6800,6 @@ QualType Sema::FindCompositePointerType(SourceLocation Loc,
continue;
}
- // FIXME: arrays
-
// FIXME: block pointer types?
// Cannot unwrap any more types.
@@ -7723,8 +7761,7 @@ ExprResult Sema::ActOnPseudoDestructorExpr(Scope *S, Expr *Base,
return true;
}
- QualType T = BuildDecltypeType(DS.getRepAsExpr(), DS.getTypeSpecTypeLoc(),
- false);
+ QualType T = BuildDecltypeType(DS.getRepAsExpr(), /*AsUnevaluated=*/false);
TypeLocBuilder TLB;
DecltypeTypeLoc DecltypeTL = TLB.push<DecltypeTypeLoc>(T);
@@ -8519,7 +8556,7 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC,
if (FullExpr.isInvalid())
return ExprError();
- DiagnoseUnusedExprResult(FullExpr.get());
+ DiagnoseUnusedExprResult(FullExpr.get(), diag::warn_unused_expr);
}
FullExpr = CorrectDelayedTyposInExpr(FullExpr.get(), /*InitDecl=*/nullptr,
@@ -8743,7 +8780,7 @@ Sema::BuildExprRequirement(
TemplateParameterList *TPL =
ReturnTypeRequirement.getTypeConstraintTemplateParameterList();
QualType MatchedType =
- getDecltypeForParenthesizedExpr(E).getCanonicalType();
+ Context.getReferenceQualifiedType(E).getCanonicalType();
llvm::SmallVector<TemplateArgument, 1> Args;
Args.push_back(TemplateArgument(MatchedType));
TemplateArgumentList TAL(TemplateArgumentList::OnStack, Args);
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index af2aa49c0103..83006f9d804a 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -144,6 +144,7 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef,
case Sema::ExpressionEvaluationContext::DiscardedStatement:
case Sema::ExpressionEvaluationContext::ConstantEvaluated:
+ case Sema::ExpressionEvaluationContext::ImmediateFunctionContext:
case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
break;
@@ -340,7 +341,8 @@ CheckExtVectorComponent(Sema &S, QualType baseType, ExprValueKind &VK,
// Emit a warning if an rgba selector is used earlier than OpenCL C 3.0.
if (HasRGBA || (*compStr && IsRGBA(*compStr))) {
- if (S.getLangOpts().OpenCL && S.getLangOpts().OpenCLVersion < 300) {
+ if (S.getLangOpts().OpenCL &&
+ S.getLangOpts().getOpenCLCompatibleVersion() < 300) {
const char *DiagBegin = HasRGBA ? CompName->getNameStart() : compStr;
S.Diag(OpLoc, diag::ext_opencl_ext_vector_type_rgba_selector)
<< StringRef(DiagBegin, 1) << SourceRange(CompLoc);
@@ -564,10 +566,7 @@ bool Sema::CheckQualifiedMemberReference(Expr *BaseExpr,
return false;
// Note that we use the DC of the decl, not the underlying decl.
- DeclContext *DC = (*I)->getDeclContext();
- while (DC->isTransparentContext())
- DC = DC->getParent();
-
+ DeclContext *DC = (*I)->getDeclContext()->getNonTransparentContext();
if (!DC->isRecord())
continue;
@@ -612,11 +611,10 @@ public:
if (Record->containsDecl(ND))
return true;
- if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Record)) {
+ if (const auto *RD = dyn_cast<CXXRecordDecl>(Record)) {
// Accept candidates that occur in any of the current class' base classes.
for (const auto &BS : RD->bases()) {
- if (const RecordType *BSTy =
- dyn_cast_or_null<RecordType>(BS.getType().getTypePtrOrNull())) {
+ if (const auto *BSTy = BS.getType()->getAs<RecordType>()) {
if (BSTy->getDecl()->containsDecl(ND))
return true;
}
diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp
index 8a9c933fc93f..bdc8e1e0b336 100644
--- a/clang/lib/Sema/SemaExprObjC.cpp
+++ b/clang/lib/Sema/SemaExprObjC.cpp
@@ -3772,7 +3772,7 @@ static void addFixitForObjCARCConversion(
SourceManager &SM = S.getSourceManager();
char PrevChar = *SM.getCharacterData(range.getBegin().getLocWithOffset(-1));
- if (Lexer::isIdentifierBodyChar(PrevChar, S.getLangOpts()))
+ if (Lexer::isAsciiIdentifierContinueChar(PrevChar, S.getLangOpts()))
BridgeCall += ' ';
BridgeCall += CFBridgeName;
@@ -3790,7 +3790,7 @@ static void addFixitForObjCARCConversion(
SourceManager &SM = S.getSourceManager();
char PrevChar = *SM.getCharacterData(range.getBegin().getLocWithOffset(-1));
- if (Lexer::isIdentifierBodyChar(PrevChar, S.getLangOpts()))
+ if (Lexer::isAsciiIdentifierContinueChar(PrevChar, S.getLangOpts()))
BridgeCall += ' ';
BridgeCall += CFBridgeName;
@@ -4015,12 +4015,11 @@ static bool CheckObjCBridgeNSCast(Sema &S, QualType castType, Expr *castExpr,
if (Parm->isStr("id"))
return true;
- NamedDecl *Target = nullptr;
// Check for an existing type with this name.
LookupResult R(S, DeclarationName(Parm), SourceLocation(),
Sema::LookupOrdinaryName);
if (S.LookupName(R, S.TUScope)) {
- Target = R.getFoundDecl();
+ NamedDecl *Target = R.getFoundDecl();
if (Target && isa<ObjCInterfaceDecl>(Target)) {
ObjCInterfaceDecl *ExprClass = cast<ObjCInterfaceDecl>(Target);
if (const ObjCObjectPointerType *InterfacePointerType =
@@ -4056,8 +4055,6 @@ static bool CheckObjCBridgeNSCast(Sema &S, QualType castType, Expr *castExpr,
diag::err_objc_cf_bridged_not_interface)
<< castExpr->getType() << Parm;
S.Diag(TDNDecl->getBeginLoc(), diag::note_declared_at);
- if (Target)
- S.Diag(Target->getBeginLoc(), diag::note_declared_at);
}
return true;
}
@@ -4453,9 +4450,14 @@ Sema::CheckObjCConversion(SourceRange castRange, QualType castType,
// Allow casts between pointers to lifetime types (e.g., __strong id*)
// and pointers to void (e.g., cv void *). Casting from void* to lifetime*
// must be explicit.
- if (exprACTC == ACTC_indirectRetainable && castACTC == ACTC_voidPtr)
+ // Allow conversions between pointers to lifetime types and coreFoundation
+ // pointers too, but only when the conversions are explicit.
+ if (exprACTC == ACTC_indirectRetainable &&
+ (castACTC == ACTC_voidPtr ||
+ (castACTC == ACTC_coreFoundation && isCast(CCK))))
return ACR_okay;
- if (castACTC == ACTC_indirectRetainable && exprACTC == ACTC_voidPtr &&
+ if (castACTC == ACTC_indirectRetainable &&
+ (exprACTC == ACTC_voidPtr || exprACTC == ACTC_coreFoundation) &&
isCast(CCK))
return ACR_okay;
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 78574e34d906..119a90deb9c2 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -2041,7 +2041,7 @@ void InitListChecker::CheckStructUnionTypes(
RecordDecl *structDecl = DeclType->castAs<RecordType>()->getDecl();
// If the record is invalid, some of it's members are invalid. To avoid
- // confusion, we forgo checking the intializer for the entire record.
+ // confusion, we forgo checking the initializer for the entire record.
if (structDecl->isInvalidDecl()) {
// Assume it was supposed to consume a single initializer.
++Index;
@@ -2899,7 +2899,7 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity,
// We're modifying a string literal init; we have to decompose the string
// so we can modify the individual characters.
ASTContext &Context = SemaRef.Context;
- Expr *SubExpr = StructuredList->getInit(0)->IgnoreParens();
+ Expr *SubExpr = StructuredList->getInit(0)->IgnoreParenImpCasts();
// Compute the character type
QualType CharTy = AT->getElementType();
@@ -3501,10 +3501,10 @@ void InitializationSequence::Step::Destroy() {
bool InitializationSequence::isDirectReferenceBinding() const {
// There can be some lvalue adjustments after the SK_BindReference step.
- for (auto I = Steps.rbegin(); I != Steps.rend(); ++I) {
- if (I->Kind == SK_BindReference)
+ for (const Step &S : llvm::reverse(Steps)) {
+ if (S.Kind == SK_BindReference)
return true;
- if (I->Kind == SK_BindReferenceToTemporary)
+ if (S.Kind == SK_BindReferenceToTemporary)
return false;
}
return false;
@@ -6932,10 +6932,10 @@ static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call,
return;
// Once we initialized a value with a reference, it can no longer dangle.
if (!Value) {
- for (auto It = Path.rbegin(), End = Path.rend(); It != End; ++It) {
- if (It->Kind == IndirectLocalPathEntry::GslReferenceInit)
+ for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) {
+ if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit)
continue;
- if (It->Kind == IndirectLocalPathEntry::GslPointerInit)
+ if (PE.Kind == IndirectLocalPathEntry::GslPointerInit)
return;
break;
}
@@ -8252,7 +8252,7 @@ ExprResult InitializationSequence::Perform(Sema &S,
// When this is an incomplete array type (such as when this is
// initializing an array of unknown bounds from an init list), use THAT
- // type instead so that we propogate the array bounds.
+ // type instead so that we propagate the array bounds.
if (MTETy->isIncompleteArrayType() &&
!CurInit.get()->getType()->isIncompleteArrayType() &&
S.Context.hasSameType(
@@ -8914,12 +8914,16 @@ static void emitBadConversionNotes(Sema &S, const InitializedEntity &entity,
S.EmitRelatedResultTypeNoteForReturn(destType);
}
QualType fromType = op->getType();
- auto *fromDecl = fromType.getTypePtr()->getPointeeCXXRecordDecl();
- auto *destDecl = destType.getTypePtr()->getPointeeCXXRecordDecl();
+ QualType fromPointeeType = fromType.getCanonicalType()->getPointeeType();
+ QualType destPointeeType = destType.getCanonicalType()->getPointeeType();
+ auto *fromDecl = fromType->getPointeeCXXRecordDecl();
+ auto *destDecl = destType->getPointeeCXXRecordDecl();
if (fromDecl && destDecl && fromDecl->getDeclKind() == Decl::CXXRecord &&
destDecl->getDeclKind() == Decl::CXXRecord &&
!fromDecl->isInvalidDecl() && !destDecl->isInvalidDecl() &&
- !fromDecl->hasDefinition())
+ !fromDecl->hasDefinition() &&
+ destPointeeType.getQualifiers().compatiblyIncludes(
+ fromPointeeType.getQualifiers()))
S.Diag(fromDecl->getLocation(), diag::note_forward_class_conversion)
<< S.getASTContext().getTagDeclType(fromDecl)
<< S.getASTContext().getTagDeclType(destDecl);
@@ -9911,8 +9915,7 @@ Sema::PerformCopyInitialization(const InitializedEntity &Entity,
const bool ShouldTrackCopy =
Entity.isParameterKind() && Seq.isConstructorInitialization();
if (ShouldTrackCopy) {
- if (llvm::find(CurrentParameterCopyTypes, Entity.getType()) !=
- CurrentParameterCopyTypes.end()) {
+ if (llvm::is_contained(CurrentParameterCopyTypes, Entity.getType())) {
Seq.SetOverloadFailure(
InitializationSequence::FK_ConstructorOverloadFailed,
OR_No_Viable_Function);
@@ -9969,7 +9972,7 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer(
auto TemplateName = DeducedTST->getTemplateName();
if (TemplateName.isDependent())
- return SubstAutoType(TSInfo->getType(), Context.DependentTy);
+ return SubstAutoTypeDependent(TSInfo->getType());
// We can only perform deduction for class templates.
auto *Template =
@@ -9988,7 +9991,7 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer(
Diag(TSInfo->getTypeLoc().getBeginLoc(),
diag::warn_cxx14_compat_class_template_argument_deduction)
<< TSInfo->getTypeLoc().getSourceRange() << 0;
- return SubstAutoType(TSInfo->getType(), Context.DependentTy);
+ return SubstAutoTypeDependent(TSInfo->getType());
}
// FIXME: Perform "exact type" matching first, per CWG discussion?
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index eb1e9c3e5f7e..b05e0b5cc0f1 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -373,7 +373,7 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
const FunctionProtoType *FPT = MethodType->castAs<FunctionProtoType>();
QualType Result = FPT->getReturnType();
if (Result->isUndeducedType()) {
- Result = SubstAutoType(Result, Context.DependentTy);
+ Result = SubstAutoTypeDependent(Result);
MethodType = Context.getFunctionType(Result, FPT->getParamTypes(),
FPT->getExtProtoInfo());
}
@@ -392,7 +392,7 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
Context, Class, EndLoc,
DeclarationNameInfo(MethodName, IntroducerRange.getBegin(),
MethodNameLoc),
- MethodType, MethodTypeInfo, SC_None,
+ MethodType, MethodTypeInfo, SC_None, getCurFPFeatures().isFPConstrained(),
/*isInline=*/true, ConstexprKind, EndLoc, TrailingRequiresClause);
Method->setAccess(AS_public);
if (!TemplateParams)
@@ -1245,7 +1245,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
// cleanups from the enclosing full-expression.
PushExpressionEvaluationContext(
LSI->CallOperator->isConsteval()
- ? ExpressionEvaluationContext::ConstantEvaluated
+ ? ExpressionEvaluationContext::ImmediateFunctionContext
: ExpressionEvaluationContext::PotentiallyEvaluated);
}
@@ -1447,6 +1447,7 @@ static void addFunctionPointerConversion(Sema &S, SourceRange IntroducerRange,
CXXConversionDecl *Conversion = CXXConversionDecl::Create(
S.Context, Class, Loc,
DeclarationNameInfo(ConversionName, Loc, ConvNameLoc), ConvTy, ConvTSI,
+ S.getCurFPFeatures().isFPConstrained(),
/*isInline=*/true, ExplicitSpecifier(),
S.getLangOpts().CPlusPlus17 ? ConstexprSpecKind::Constexpr
: ConstexprSpecKind::Unspecified,
@@ -1488,6 +1489,7 @@ static void addFunctionPointerConversion(Sema &S, SourceRange IntroducerRange,
CXXMethodDecl *Invoke = CXXMethodDecl::Create(
S.Context, Class, Loc, DeclarationNameInfo(InvokerName, Loc),
InvokerFunctionTy, CallOperator->getTypeSourceInfo(), SC_Static,
+ S.getCurFPFeatures().isFPConstrained(),
/*isInline=*/true, ConstexprSpecKind::Unspecified,
CallOperator->getBody()->getEndLoc());
for (unsigned I = 0, N = CallOperator->getNumParams(); I != N; ++I)
@@ -1556,6 +1558,7 @@ static void addBlockPointerConversion(Sema &S,
CXXConversionDecl *Conversion = CXXConversionDecl::Create(
S.Context, Class, Loc, DeclarationNameInfo(Name, Loc, NameLoc), ConvTy,
S.Context.getTrivialTypeSourceInfo(ConvTy, Loc),
+ S.getCurFPFeatures().isFPConstrained(),
/*isInline=*/true, ExplicitSpecifier(), ConstexprSpecKind::Unspecified,
CallOperator->getBody()->getEndLoc());
Conversion->setAccess(AS_public);
@@ -1945,6 +1948,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
// ratified, it lays out the exact set of conditions where we shouldn't
// allow a lambda-expression.
case ExpressionEvaluationContext::ConstantEvaluated:
+ case ExpressionEvaluationContext::ImmediateFunctionContext:
// We don't actually diagnose this case immediately, because we
// could be within a context where we might find out later that
// the expression is potentially evaluated (e.g., for typeid).
@@ -1975,8 +1979,7 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation,
CallOperator->markUsed(Context);
ExprResult Init = PerformCopyInitialization(
- InitializedEntity::InitializeLambdaToBlock(ConvLocation, Src->getType(),
- /*NRVO=*/false),
+ InitializedEntity::InitializeLambdaToBlock(ConvLocation, Src->getType()),
CurrentLocation, Src);
if (!Init.isInvalid())
Init = ActOnFinishFullExpr(Init.get(), /*DiscardedValue*/ false);
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 5e8c4de61e5d..05529d055621 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -859,7 +859,8 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR,
for (const auto &FTy : FunctionList) {
NewOpenCLBuiltin = FunctionDecl::Create(
Context, Parent, Loc, Loc, II, FTy, /*TInfo=*/nullptr, SC_Extern,
- false, FTy->isFunctionProtoType());
+ S.getCurFPFeatures().isFPConstrained(), false,
+ FTy->isFunctionProtoType());
NewOpenCLBuiltin->setImplicit();
// Create Decl objects for each parameter, adding them to the
@@ -3845,7 +3846,7 @@ private:
: Ctx->noload_lookups(/*PreserveInternalState=*/false)) {
for (auto *D : R) {
if (auto *ND = Result.getAcceptableDecl(D)) {
- // Rather than visit immediatelly, we put ND into a vector and visit
+ // Rather than visit immediately, we put ND into a vector and visit
// all decls, in order, outside of this loop. The reason is that
// Consumer.FoundDecl() may invalidate the iterators used in the two
// loops above.
@@ -4595,9 +4596,7 @@ void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier(
dyn_cast_or_null<NamedDecl>(NamespaceDeclChain.back())) {
IdentifierInfo *Name = ND->getIdentifier();
bool SameNameSpecifier = false;
- if (std::find(CurNameSpecifierIdentifiers.begin(),
- CurNameSpecifierIdentifiers.end(),
- Name) != CurNameSpecifierIdentifiers.end()) {
+ if (llvm::is_contained(CurNameSpecifierIdentifiers, Name)) {
std::string NewNameSpecifier;
llvm::raw_string_ostream SpecifierOStream(NewNameSpecifier);
SmallVector<const IdentifierInfo *, 4> NewNameSpecifierIdentifiers;
@@ -4606,8 +4605,7 @@ void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier(
SpecifierOStream.flush();
SameNameSpecifier = NewNameSpecifier == CurNameSpecifier;
}
- if (SameNameSpecifier || llvm::find(CurContextIdentifiers, Name) !=
- CurContextIdentifiers.end()) {
+ if (SameNameSpecifier || llvm::is_contained(CurContextIdentifiers, Name)) {
// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
NNS = NestedNameSpecifier::GlobalSpecifier(Context);
NumSpecifiers =
@@ -5325,11 +5323,8 @@ static NamedDecl *getDefinitionToImport(NamedDecl *D) {
return FD->getDefinition();
if (TagDecl *TD = dyn_cast<TagDecl>(D))
return TD->getDefinition();
- // The first definition for this ObjCInterfaceDecl might be in the TU
- // and not associated with any module. Use the one we know to be complete
- // and have just seen in a module.
if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
- return ID;
+ return ID->getDefinition();
if (ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D))
return PD->getDefinition();
if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp
index a329d0f22b03..74c73ace3c5f 100644
--- a/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/clang/lib/Sema/SemaObjCProperty.cpp
@@ -1467,8 +1467,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
LoadSelfExpr, true, true);
ExprResult Res = PerformCopyInitialization(
InitializedEntity::InitializeResult(PropertyDiagLoc,
- getterMethod->getReturnType(),
- /*NRVO=*/false),
+ getterMethod->getReturnType()),
PropertyDiagLoc, IvarRefExpr);
if (!Res.isInvalid()) {
Expr *ResExpr = Res.getAs<Expr>();
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index c0cd2bf18a77..22ae5f59d41b 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -310,6 +310,8 @@ private:
/// Vector of previously encountered target directives
SmallVector<SourceLocation, 2> TargetLocations;
SourceLocation AtomicLocation;
+ /// Vector of declare variant construct traits.
+ SmallVector<llvm::omp::TraitProperty, 8> ConstructTraits;
public:
explicit DSAStackTy(Sema &S) : SemaRef(S) {}
@@ -513,7 +515,7 @@ public:
/// Checks if the specified declaration was used in the inner scan directive.
bool isUsedInScanDirective(ValueDecl *D) const {
if (const SharingMapTy *Stack = getTopOfStackOrNull())
- return Stack->UsedInScanDirective.count(D) > 0;
+ return Stack->UsedInScanDirective.contains(D);
return false;
}
@@ -726,6 +728,22 @@ public:
OMPC_DEFAULTMAP_MODIFIER_unknown;
}
+ ArrayRef<llvm::omp::TraitProperty> getConstructTraits() {
+ return ConstructTraits;
+ }
+ void handleConstructTrait(ArrayRef<llvm::omp::TraitProperty> Traits,
+ bool ScopeEntry) {
+ if (ScopeEntry)
+ ConstructTraits.append(Traits.begin(), Traits.end());
+ else
+ for (llvm::omp::TraitProperty Trait : llvm::reverse(Traits)) {
+ llvm::omp::TraitProperty Top = ConstructTraits.pop_back_val();
+ assert(Top == Trait && "Something left a trait on the stack!");
+ (void)Trait;
+ (void)Top;
+ }
+ }
+
DefaultDataSharingAttributes getDefaultDSA(unsigned Level) const {
return getStackSize() <= Level ? DSA_unspecified
: getStackElemAtLevel(Level).DefaultAttr;
@@ -1022,7 +1040,7 @@ public:
// Return set of mapped classes types
bool isClassPreviouslyMapped(QualType QT) const {
const SharingMapTy &StackElem = getTopOfStack();
- return StackElem.MappedClassesQualTypes.count(QT) != 0;
+ return StackElem.MappedClassesQualTypes.contains(QT);
}
/// Adds global declare target to the parent target region.
@@ -1061,7 +1079,7 @@ public:
}
/// Checks if the decl is implicitly firstprivate in the task-based region.
bool isImplicitTaskFirstprivate(Decl *D) const {
- return getTopOfStack().ImplicitTaskFirstprivates.count(D) > 0;
+ return getTopOfStack().ImplicitTaskFirstprivates.contains(D);
}
/// Marks decl as used in uses_allocators clause as the allocator.
@@ -3112,16 +3130,22 @@ static bool checkPreviousOMPAllocateAttribute(
static void
applyOMPAllocateAttribute(Sema &S, VarDecl *VD,
OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind,
- Expr *Allocator, SourceRange SR) {
+ Expr *Allocator, Expr *Alignment, SourceRange SR) {
if (VD->hasAttr<OMPAllocateDeclAttr>())
return;
+ if (Alignment &&
+ (Alignment->isTypeDependent() || Alignment->isValueDependent() ||
+ Alignment->isInstantiationDependent() ||
+ Alignment->containsUnexpandedParameterPack()))
+ // Apply later when we have a usable value.
+ return;
if (Allocator &&
(Allocator->isTypeDependent() || Allocator->isValueDependent() ||
Allocator->isInstantiationDependent() ||
Allocator->containsUnexpandedParameterPack()))
return;
auto *A = OMPAllocateDeclAttr::CreateImplicit(S.Context, AllocatorKind,
- Allocator, SR);
+ Allocator, Alignment, SR);
VD->addAttr(A);
if (ASTMutationListener *ML = S.Context.getASTMutationListener())
ML->DeclarationMarkedOpenMPAllocate(VD, A);
@@ -3130,7 +3154,8 @@ applyOMPAllocateAttribute(Sema &S, VarDecl *VD,
Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective(
SourceLocation Loc, ArrayRef<Expr *> VarList,
ArrayRef<OMPClause *> Clauses, DeclContext *Owner) {
- assert(Clauses.size() <= 1 && "Expected at most one clause.");
+ assert(Clauses.size() <= 2 && "Expected at most two clauses.");
+ Expr *Alignment = nullptr;
Expr *Allocator = nullptr;
if (Clauses.empty()) {
// OpenMP 5.0, 2.11.3 allocate Directive, Restrictions.
@@ -3141,7 +3166,13 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective(
!DSAStack->hasRequiresDeclWithClause<OMPDynamicAllocatorsClause>())
targetDiag(Loc, diag::err_expected_allocator_clause);
} else {
- Allocator = cast<OMPAllocatorClause>(Clauses.back())->getAllocator();
+ for (const OMPClause *C : Clauses)
+ if (const auto *AC = dyn_cast<OMPAllocatorClause>(C))
+ Allocator = AC->getAllocator();
+ else if (const auto *AC = dyn_cast<OMPAlignClause>(C))
+ Alignment = AC->getAlignment();
+ else
+ llvm_unreachable("Unexpected clause on allocate directive");
}
OMPAllocateDeclAttr::AllocatorTypeTy AllocatorKind =
getAllocatorKind(*this, DSAStack, Allocator);
@@ -3182,7 +3213,7 @@ Sema::DeclGroupPtrTy Sema::ActOnOpenMPAllocateDirective(
}
Vars.push_back(RefExpr);
- applyOMPAllocateAttribute(*this, VD, AllocatorKind, Allocator,
+ applyOMPAllocateAttribute(*this, VD, AllocatorKind, Allocator, Alignment,
DE->getSourceRange());
}
if (Vars.empty())
@@ -3213,7 +3244,7 @@ Sema::ActOnOpenMPRequiresDirective(SourceLocation Loc,
void Sema::ActOnOpenMPAssumesDirective(SourceLocation Loc,
OpenMPDirectiveKind DKind,
- ArrayRef<StringRef> Assumptions,
+ ArrayRef<std::string> Assumptions,
bool SkippedClauses) {
if (!SkippedClauses && Assumptions.empty())
Diag(Loc, diag::err_omp_no_clause_for_directive)
@@ -3805,13 +3836,8 @@ public:
VisitSubCaptures(S);
}
- void VisitOMPTileDirective(OMPTileDirective *S) {
- // #pragma omp tile does not introduce data sharing.
- VisitStmt(S);
- }
-
- void VisitOMPUnrollDirective(OMPUnrollDirective *S) {
- // #pragma omp unroll does not introduce data sharing.
+ void VisitOMPLoopTransformationDirective(OMPLoopTransformationDirective *S) {
+ // Loop transformation directives do not introduce data sharing
VisitStmt(S);
}
@@ -3871,6 +3897,23 @@ public:
};
} // namespace
+static void handleDeclareVariantConstructTrait(DSAStackTy *Stack,
+ OpenMPDirectiveKind DKind,
+ bool ScopeEntry) {
+ SmallVector<llvm::omp::TraitProperty, 8> Traits;
+ if (isOpenMPTargetExecutionDirective(DKind))
+ Traits.emplace_back(llvm::omp::TraitProperty::construct_target_target);
+ if (isOpenMPTeamsDirective(DKind))
+ Traits.emplace_back(llvm::omp::TraitProperty::construct_teams_teams);
+ if (isOpenMPParallelDirective(DKind))
+ Traits.emplace_back(llvm::omp::TraitProperty::construct_parallel_parallel);
+ if (isOpenMPWorksharingDirective(DKind))
+ Traits.emplace_back(llvm::omp::TraitProperty::construct_for_for);
+ if (isOpenMPSimdDirective(DKind))
+ Traits.emplace_back(llvm::omp::TraitProperty::construct_simd_simd);
+ Stack->handleConstructTrait(Traits, ScopeEntry);
+}
+
void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
switch (DKind) {
case OMPD_parallel:
@@ -3983,6 +4026,9 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
case OMPD_tile:
case OMPD_unroll:
break;
+ case OMPD_loop:
+ // TODO: 'loop' may require additional parameters depending on the binding.
+ // Treat similar to OMPD_simd/OMPD_for for now.
case OMPD_simd:
case OMPD_for:
case OMPD_for_simd:
@@ -4279,12 +4325,14 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
case OMPD_declare_variant:
case OMPD_begin_declare_variant:
case OMPD_end_declare_variant:
+ case OMPD_metadirective:
llvm_unreachable("OpenMP Directive is not allowed");
case OMPD_unknown:
default:
llvm_unreachable("Unknown OpenMP directive");
}
DSAStack->setContext(CurContext);
+ handleDeclareVariantConstructTrait(DSAStack, DKind, /* ScopeEntry */ true);
}
int Sema::getNumberOfConstructScopes(unsigned Level) const {
@@ -4460,6 +4508,8 @@ static bool checkOrderedOrderSpecified(Sema &S,
StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
ArrayRef<OMPClause *> Clauses) {
+ handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(),
+ /* ScopeEntry */ false);
if (DSAStack->getCurrentDirective() == OMPD_atomic ||
DSAStack->getCurrentDirective() == OMPD_critical ||
DSAStack->getCurrentDirective() == OMPD_section ||
@@ -4650,6 +4700,7 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
OpenMPDirectiveKind CurrentRegion,
const DeclarationNameInfo &CurrentName,
OpenMPDirectiveKind CancelRegion,
+ OpenMPBindClauseKind BindKind,
SourceLocation StartLoc) {
if (Stack->getCurScope()) {
OpenMPDirectiveKind ParentRegion = Stack->getParentDirective();
@@ -4754,6 +4805,7 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
// A masked region may not be closely nested inside a worksharing, loop,
// atomic, task, or taskloop region.
NestingProhibited = isOpenMPWorksharingDirective(ParentRegion) ||
+ isOpenMPGenericLoopDirective(ParentRegion) ||
isOpenMPTaskingDirective(ParentRegion);
} else if (CurrentRegion == OMPD_critical && CurrentName.getName()) {
// OpenMP [2.16, Nesting of Regions]
@@ -4787,6 +4839,7 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
// task, taskloop, critical, ordered, atomic, or masked region.
NestingProhibited =
isOpenMPWorksharingDirective(ParentRegion) ||
+ isOpenMPGenericLoopDirective(ParentRegion) ||
isOpenMPTaskingDirective(ParentRegion) ||
ParentRegion == OMPD_master || ParentRegion == OMPD_masked ||
ParentRegion == OMPD_parallel_master ||
@@ -4800,6 +4853,7 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
// critical, ordered, atomic, or masked region.
NestingProhibited =
isOpenMPWorksharingDirective(ParentRegion) ||
+ isOpenMPGenericLoopDirective(ParentRegion) ||
isOpenMPTaskingDirective(ParentRegion) ||
ParentRegion == OMPD_master || ParentRegion == OMPD_masked ||
ParentRegion == OMPD_parallel_master ||
@@ -4845,14 +4899,28 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack,
!isOpenMPTargetExecutionDirective(CurrentRegion) &&
!isOpenMPTargetDataManagementDirective(CurrentRegion) &&
(ParentRegion == OMPD_teams || ParentRegion == OMPD_target_teams)) {
- // OpenMP [2.16, Nesting of Regions]
- // distribute, parallel, parallel sections, parallel workshare, and the
- // parallel loop and parallel loop SIMD constructs are the only OpenMP
- // constructs that can be closely nested in the teams region.
+ // OpenMP [5.1, 2.22, Nesting of Regions]
+ // distribute, distribute simd, distribute parallel worksharing-loop,
+ // distribute parallel worksharing-loop SIMD, loop, parallel regions,
+ // including any parallel regions arising from combined constructs,
+ // omp_get_num_teams() regions, and omp_get_team_num() regions are the
+ // only OpenMP regions that may be strictly nested inside the teams
+ // region.
NestingProhibited = !isOpenMPParallelDirective(CurrentRegion) &&
- !isOpenMPDistributeDirective(CurrentRegion);
+ !isOpenMPDistributeDirective(CurrentRegion) &&
+ CurrentRegion != OMPD_loop;
Recommend = ShouldBeInParallelRegion;
}
+ if (!NestingProhibited && CurrentRegion == OMPD_loop) {
+ // OpenMP [5.1, 2.11.7, loop Construct, Restrictions]
+ // If the bind clause is present on the loop construct and binding is
+ // teams then the corresponding loop region must be strictly nested inside
+ // a teams region.
+ NestingProhibited = BindKind == OMPC_BIND_teams &&
+ ParentRegion != OMPD_teams &&
+ ParentRegion != OMPD_target_teams;
+ Recommend = ShouldBeInTeamsRegion;
+ }
if (!NestingProhibited &&
isOpenMPNestingDistributeDirective(CurrentRegion)) {
// OpenMP 4.5 [2.17 Nesting of Regions]
@@ -4929,14 +4997,7 @@ static bool checkIfClauses(Sema &S, OpenMPDirectiveKind Kind,
// directive.
// At most one if clause with the particular directive-name-modifier can
// appear on the directive.
- bool MatchFound = false;
- for (auto NM : AllowedNameModifiers) {
- if (CurNM == NM) {
- MatchFound = true;
- break;
- }
- }
- if (!MatchFound) {
+ if (!llvm::is_contained(AllowedNameModifiers, CurNM)) {
S.Diag(IC->getNameModifierLoc(),
diag::err_omp_wrong_if_directive_name_modifier)
<< getOpenMPDirectiveName(CurNM) << getOpenMPDirectiveName(Kind);
@@ -5180,8 +5241,10 @@ static void checkAllocateClauses(Sema &S, DSAStackTy *Stack,
if (checkPreviousOMPAllocateAttribute(S, Stack, E, PrivateVD,
AllocatorKind, AC->getAllocator()))
continue;
+ // Placeholder until allocate clause supports align modifier.
+ Expr *Alignment = nullptr;
applyOMPAllocateAttribute(S, PrivateVD, AllocatorKind, AC->getAllocator(),
- E->getSourceRange());
+ Alignment, E->getSourceRange());
}
}
}
@@ -5320,8 +5383,9 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy,
if (Rel == BO_LE || Rel == BO_GE) {
// Add one to the range if the relational operator is inclusive.
- Range =
- AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range));
+ Range = AssertSuccess(Actions.BuildBinOp(
+ nullptr, {}, BO_Add, Range,
+ Actions.ActOnIntegerConstant(SourceLocation(), 1).get()));
}
// Divide by the absolute step amount.
@@ -5573,6 +5637,19 @@ StmtResult Sema::ActOnOpenMPCanonicalLoop(Stmt *AStmt) {
LoopVarFunc, LVRef);
}
+StmtResult Sema::ActOnOpenMPLoopnest(Stmt *AStmt) {
+ // Handle a literal loop.
+ if (isa<ForStmt>(AStmt) || isa<CXXForRangeStmt>(AStmt))
+ return ActOnOpenMPCanonicalLoop(AStmt);
+
+ // If not a literal loop, it must be the result of a loop transformation.
+ OMPExecutableDirective *LoopTransform = cast<OMPExecutableDirective>(AStmt);
+ assert(
+ isOpenMPLoopTransformationDirective(LoopTransform->getDirectiveKind()) &&
+ "Loop transformation directive expected");
+ return LoopTransform;
+}
+
static ExprResult buildUserDefinedMapperRef(Sema &SemaRef, Scope *S,
CXXScopeSpec &MapperIdScopeSpec,
const DeclarationNameInfo &MapperId,
@@ -5682,7 +5759,7 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack,
SubExprs.push_back(BaseExpr);
continue;
}
- // Check for the "default" mapper for data memebers.
+ // Check for the "default" mapper for data members.
bool FirstIter = true;
for (FieldDecl *FD : RD->fields()) {
if (!FD)
@@ -5719,10 +5796,14 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
StmtResult Res = StmtError();
+ OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
+ if (const OMPBindClause *BC =
+ OMPExecutableDirective::getSingleClause<OMPBindClause>(Clauses))
+ BindKind = BC->getBindKind();
// First check CancelRegion which is then used in checkNestingOfRegions.
if (checkCancelRegion(*this, Kind, CancelRegion, StartLoc) ||
checkNestingOfRegions(*this, DSAStack, Kind, DirName, CancelRegion,
- StartLoc))
+ BindKind, StartLoc))
return StmtError();
llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit;
@@ -5812,6 +5893,31 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
ErrorFound = true;
}
}
+ // OpenMP 5.0 [2.19.7]
+ // If a list item appears in a reduction, lastprivate or linear
+ // clause on a combined target construct then it is treated as
+ // if it also appears in a map clause with a map-type of tofrom
+ if (getLangOpts().OpenMP >= 50 && Kind != OMPD_target &&
+ isOpenMPTargetExecutionDirective(Kind)) {
+ SmallVector<Expr *, 4> ImplicitExprs;
+ for (OMPClause *C : Clauses) {
+ if (auto *RC = dyn_cast<OMPReductionClause>(C))
+ for (Expr *E : RC->varlists())
+ if (!isa<DeclRefExpr>(E->IgnoreParenImpCasts()))
+ ImplicitExprs.emplace_back(E);
+ }
+ if (!ImplicitExprs.empty()) {
+ ArrayRef<Expr *> Exprs = ImplicitExprs;
+ CXXScopeSpec MapperIdScopeSpec;
+ DeclarationNameInfo MapperId;
+ if (OMPClause *Implicit = ActOnOpenMPMapClause(
+ OMPC_MAP_MODIFIER_unknown, SourceLocation(), MapperIdScopeSpec,
+ MapperId, OMPC_MAP_tofrom,
+ /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(),
+ Exprs, OMPVarListLocTy(), /*NoDiagnose=*/true))
+ ClausesWithImplicit.emplace_back(Implicit);
+ }
+ }
for (unsigned I = 0, E = DefaultmapKindNum; I < E; ++I) {
int ClauseKindCnt = -1;
for (ArrayRef<Expr *> ImplicitMap : ImplicitMaps[I]) {
@@ -5940,11 +6046,9 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
Res = ActOnOpenMPBarrierDirective(StartLoc, EndLoc);
break;
case OMPD_taskwait:
- assert(ClausesWithImplicit.empty() &&
- "No clauses are allowed for 'omp taskwait' directive");
assert(AStmt == nullptr &&
"No associated statement allowed for 'omp taskwait' directive");
- Res = ActOnOpenMPTaskwaitDirective(StartLoc, EndLoc);
+ Res = ActOnOpenMPTaskwaitDirective(ClausesWithImplicit, StartLoc, EndLoc);
break;
case OMPD_taskgroup:
Res = ActOnOpenMPTaskgroupDirective(ClausesWithImplicit, AStmt, StartLoc,
@@ -6165,6 +6269,10 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
Res = ActOnOpenMPDispatchDirective(ClausesWithImplicit, AStmt, StartLoc,
EndLoc);
break;
+ case OMPD_loop:
+ Res = ActOnOpenMPGenericLoopDirective(ClausesWithImplicit, AStmt, StartLoc,
+ EndLoc, VarsWithInheritedDSA);
+ break;
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_threadprivate:
@@ -6272,6 +6380,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_bind:
continue;
case OMPC_allocator:
case OMPC_flush:
@@ -6286,6 +6395,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
case OMPC_atomic_default_mem_order:
case OMPC_device_type:
case OMPC_match:
+ case OMPC_when:
default:
llvm_unreachable("Unexpected clause");
}
@@ -6619,7 +6729,7 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPAssumeScope(Decl *D) {
FD = cast<FunctionDecl>(D);
assert(FD && "Expected a function declaration!");
- // If we are intantiating templates we do *not* apply scoped assumptions but
+ // If we are instantiating templates we do *not* apply scoped assumptions but
// only global ones. We apply scoped assumption to the template definition
// though.
if (!inTemplateInstantiation()) {
@@ -6664,9 +6774,11 @@ void Sema::ActOnStartOfFunctionDefinitionInOpenMPDeclareVariantScope(
for (auto *Candidate : Lookup) {
auto *CandidateDecl = Candidate->getUnderlyingDecl();
FunctionDecl *UDecl = nullptr;
- if (IsTemplated && isa<FunctionTemplateDecl>(CandidateDecl))
- UDecl = cast<FunctionTemplateDecl>(CandidateDecl)->getTemplatedDecl();
- else if (!IsTemplated)
+ if (IsTemplated && isa<FunctionTemplateDecl>(CandidateDecl)) {
+ auto *FTD = cast<FunctionTemplateDecl>(CandidateDecl);
+ if (FTD->getTemplateParameters()->size() == TemplateParamLists.size())
+ UDecl = FTD->getTemplatedDecl();
+ } else if (!IsTemplated)
UDecl = dyn_cast<FunctionDecl>(CandidateDecl);
if (!UDecl)
continue;
@@ -6734,7 +6846,10 @@ void Sema::ActOnFinishedFunctionDefinitionInOpenMPDeclareVariantScope(
OMPDeclareVariantScope &DVScope = OMPDeclareVariantScopes.back();
auto *OMPDeclareVariantA = OMPDeclareVariantAttr::CreateImplicit(
- Context, VariantFuncRef, DVScope.TI);
+ Context, VariantFuncRef, DVScope.TI,
+ /*NothingArgs=*/nullptr, /*NothingArgsSize=*/0,
+ /*NeedDevicePtrArgs=*/nullptr, /*NeedDevicePtrArgsSize=*/0,
+ /*AppendArgs=*/nullptr, /*AppendArgsSize=*/0);
for (FunctionDecl *BaseFD : Bases)
BaseFD->addAttr(OMPDeclareVariantA);
}
@@ -6765,7 +6880,7 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope,
<< ISATrait;
};
TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait),
- getCurFunctionDecl());
+ getCurFunctionDecl(), DSAStack->getConstructTraits());
QualType CalleeFnType = CalleeFnDecl->getType();
@@ -6848,6 +6963,7 @@ ExprResult Sema::ActOnOpenMPCall(ExprResult Call, Scope *Scope,
Optional<std::pair<FunctionDecl *, Expr *>>
Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
Expr *VariantRef, OMPTraitInfo &TI,
+ unsigned NumAppendArgs,
SourceRange SR) {
if (!DG || DG.get().isNull())
return None;
@@ -6935,6 +7051,39 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
if (TI.anyScoreOrCondition(HandleNonConstantScoresAndConditions))
return None;
+ QualType AdjustedFnType = FD->getType();
+ if (NumAppendArgs) {
+ if (isa<FunctionNoProtoType>(FD->getType())) {
+ Diag(FD->getLocation(), diag::err_omp_declare_variant_prototype_required)
+ << SR;
+ return None;
+ }
+ // Adjust the function type to account for an extra omp_interop_t for each
+ // specified in the append_args clause.
+ const TypeDecl *TD = nullptr;
+ LookupResult Result(*this, &Context.Idents.get("omp_interop_t"),
+ SR.getBegin(), Sema::LookupOrdinaryName);
+ if (LookupName(Result, getCurScope())) {
+ NamedDecl *ND = Result.getFoundDecl();
+ TD = dyn_cast_or_null<TypeDecl>(ND);
+ }
+ if (!TD) {
+ Diag(SR.getBegin(), diag::err_omp_interop_type_not_found) << SR;
+ return None;
+ }
+ QualType InteropType = QualType(TD->getTypeForDecl(), 0);
+ auto *PTy = cast<FunctionProtoType>(FD->getType());
+ if (PTy->isVariadic()) {
+ Diag(FD->getLocation(), diag::err_omp_append_args_with_varargs) << SR;
+ return None;
+ }
+ llvm::SmallVector<QualType, 8> Params;
+ Params.append(PTy->param_type_begin(), PTy->param_type_end());
+ Params.insert(Params.end(), NumAppendArgs, InteropType);
+ AdjustedFnType = Context.getFunctionType(PTy->getReturnType(), Params,
+ PTy->getExtProtoInfo());
+ }
+
// Convert VariantRef expression to the type of the original function to
// resolve possible conflicts.
ExprResult VariantRefCast = VariantRef;
@@ -6944,7 +7093,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
if (Method && !Method->isStatic()) {
const Type *ClassType =
Context.getTypeDeclType(Method->getParent()).getTypePtr();
- FnPtrType = Context.getMemberPointerType(FD->getType(), ClassType);
+ FnPtrType = Context.getMemberPointerType(AdjustedFnType, ClassType);
ExprResult ER;
{
// Build adrr_of unary op to correctly handle type checks for member
@@ -6960,7 +7109,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
}
VariantRef = ER.get();
} else {
- FnPtrType = Context.getPointerType(FD->getType());
+ FnPtrType = Context.getPointerType(AdjustedFnType);
}
QualType VarianPtrType = Context.getPointerType(VariantRef->getType());
if (VarianPtrType.getUnqualifiedType() != FnPtrType.getUnqualifiedType()) {
@@ -6975,7 +7124,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
diag::err_omp_declare_variant_incompat_types)
<< VariantRef->getType()
<< ((Method && !Method->isStatic()) ? FnPtrType : FD->getType())
- << VariantRef->getSourceRange();
+ << (NumAppendArgs ? 1 : 0) << VariantRef->getSourceRange();
return None;
}
VariantRefCast = PerformImplicitConversion(
@@ -7017,11 +7166,12 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
// Check if function types are compatible in C.
if (!LangOpts.CPlusPlus) {
QualType NewType =
- Context.mergeFunctionTypes(FD->getType(), NewFD->getType());
+ Context.mergeFunctionTypes(AdjustedFnType, NewFD->getType());
if (NewType.isNull()) {
Diag(VariantRef->getExprLoc(),
diag::err_omp_declare_variant_incompat_types)
- << NewFD->getType() << FD->getType() << VariantRef->getSourceRange();
+ << NewFD->getType() << FD->getType() << (NumAppendArgs ? 1 : 0)
+ << VariantRef->getSourceRange();
return None;
}
if (NewType->isFunctionProtoType()) {
@@ -7107,12 +7257,74 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
return std::make_pair(FD, cast<Expr>(DRE));
}
-void Sema::ActOnOpenMPDeclareVariantDirective(FunctionDecl *FD,
- Expr *VariantRef,
- OMPTraitInfo &TI,
- SourceRange SR) {
- auto *NewAttr =
- OMPDeclareVariantAttr::CreateImplicit(Context, VariantRef, &TI, SR);
+void Sema::ActOnOpenMPDeclareVariantDirective(
+ FunctionDecl *FD, Expr *VariantRef, OMPTraitInfo &TI,
+ ArrayRef<Expr *> AdjustArgsNothing,
+ ArrayRef<Expr *> AdjustArgsNeedDevicePtr,
+ ArrayRef<OMPDeclareVariantAttr::InteropType> AppendArgs,
+ SourceLocation AdjustArgsLoc, SourceLocation AppendArgsLoc,
+ SourceRange SR) {
+
+ // OpenMP 5.1 [2.3.5, declare variant directive, Restrictions]
+ // An adjust_args clause or append_args clause can only be specified if the
+ // dispatch selector of the construct selector set appears in the match
+ // clause.
+
+ SmallVector<Expr *, 8> AllAdjustArgs;
+ llvm::append_range(AllAdjustArgs, AdjustArgsNothing);
+ llvm::append_range(AllAdjustArgs, AdjustArgsNeedDevicePtr);
+
+ if (!AllAdjustArgs.empty() || !AppendArgs.empty()) {
+ VariantMatchInfo VMI;
+ TI.getAsVariantMatchInfo(Context, VMI);
+ if (!llvm::is_contained(
+ VMI.ConstructTraits,
+ llvm::omp::TraitProperty::construct_dispatch_dispatch)) {
+ if (!AllAdjustArgs.empty())
+ Diag(AdjustArgsLoc, diag::err_omp_clause_requires_dispatch_construct)
+ << getOpenMPClauseName(OMPC_adjust_args);
+ if (!AppendArgs.empty())
+ Diag(AppendArgsLoc, diag::err_omp_clause_requires_dispatch_construct)
+ << getOpenMPClauseName(OMPC_append_args);
+ return;
+ }
+ }
+
+ // OpenMP 5.1 [2.3.5, declare variant directive, Restrictions]
+ // Each argument can only appear in a single adjust_args clause for each
+ // declare variant directive.
+ llvm::SmallPtrSet<const VarDecl *, 4> AdjustVars;
+
+ for (Expr *E : AllAdjustArgs) {
+ E = E->IgnoreParenImpCasts();
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+ if (const auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl())) {
+ const VarDecl *CanonPVD = PVD->getCanonicalDecl();
+ if (FD->getNumParams() > PVD->getFunctionScopeIndex() &&
+ FD->getParamDecl(PVD->getFunctionScopeIndex())
+ ->getCanonicalDecl() == CanonPVD) {
+ // It's a parameter of the function, check duplicates.
+ if (!AdjustVars.insert(CanonPVD).second) {
+ Diag(DRE->getLocation(), diag::err_omp_adjust_arg_multiple_clauses)
+ << PVD;
+ return;
+ }
+ continue;
+ }
+ }
+ }
+ // Anything that is not a function parameter is an error.
+ Diag(E->getExprLoc(), diag::err_omp_param_or_this_in_clause) << FD << 0;
+ return;
+ }
+
+ auto *NewAttr = OMPDeclareVariantAttr::CreateImplicit(
+ Context, VariantRef, &TI, const_cast<Expr **>(AdjustArgsNothing.data()),
+ AdjustArgsNothing.size(),
+ const_cast<Expr **>(AdjustArgsNeedDevicePtr.data()),
+ AdjustArgsNeedDevicePtr.size(),
+ const_cast<OMPDeclareVariantAttr::InteropType *>(AppendArgs.data()),
+ AppendArgs.size(), SR);
FD->addAttr(NewAttr);
}
@@ -7342,7 +7554,7 @@ bool OpenMPIterationSpaceChecker::setLCDeclAndLB(ValueDecl *NewLCDecl,
// State consistency checking to ensure correct usage.
assert(LCDecl == nullptr && LB == nullptr && LCRef == nullptr &&
UB == nullptr && Step == nullptr && !TestIsLessOp && !TestIsStrictOp);
- if (!NewLCDecl || !NewLB)
+ if (!NewLCDecl || !NewLB || NewLB->containsErrors())
return true;
LCDecl = getCanonicalDecl(NewLCDecl);
LCRef = NewLCRefExpr;
@@ -7365,7 +7577,7 @@ bool OpenMPIterationSpaceChecker::setUB(Expr *NewUB,
// State consistency checking to ensure correct usage.
assert(LCDecl != nullptr && LB != nullptr && UB == nullptr &&
Step == nullptr && !TestIsLessOp && !TestIsStrictOp);
- if (!NewUB)
+ if (!NewUB || NewUB->containsErrors())
return true;
UB = NewUB;
if (LessOp)
@@ -7380,7 +7592,7 @@ bool OpenMPIterationSpaceChecker::setUB(Expr *NewUB,
bool OpenMPIterationSpaceChecker::setStep(Expr *NewStep, bool Subtract) {
// State consistency checking to ensure correct usage.
assert(LCDecl != nullptr && LB != nullptr && Step == nullptr);
- if (!NewStep)
+ if (!NewStep || NewStep->containsErrors())
return true;
if (!NewStep->isValueDependent()) {
// Check that the step is integer expression.
@@ -8648,6 +8860,7 @@ static bool checkOpenMPIterationSpace(
ResultIterSpaces[CurrentNestedLoopCount].NumIterations =
ISC.buildNumIterations(DSA.getCurScope(), ResultIterSpaces,
(isOpenMPWorksharingDirective(DKind) ||
+ isOpenMPGenericLoopDirective(DKind) ||
isOpenMPTaskLoopDirective(DKind) ||
isOpenMPDistributeDirective(DKind) ||
isOpenMPLoopTransformationDirective(DKind)),
@@ -8971,15 +9184,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
}
return false;
},
- [&SemaRef, &Captures](OMPLoopBasedDirective *Transform) {
- Stmt *DependentPreInits;
- if (auto *Dir = dyn_cast<OMPTileDirective>(Transform)) {
- DependentPreInits = Dir->getPreInits();
- } else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform)) {
- DependentPreInits = Dir->getPreInits();
- } else {
- llvm_unreachable("Unexpected loop transformation");
- }
+ [&SemaRef, &Captures](OMPLoopTransformationDirective *Transform) {
+ Stmt *DependentPreInits = Transform->getPreInits();
if (!DependentPreInits)
return;
for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup()) {
@@ -9138,6 +9344,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
ExprResult LB, UB, IL, ST, EUB, CombLB, CombUB, PrevLB, PrevUB, CombEUB;
if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
isOpenMPDistributeDirective(DKind) ||
+ isOpenMPGenericLoopDirective(DKind) ||
isOpenMPLoopTransformationDirective(DKind)) {
// Lower bound variable, initialized with zero.
VarDecl *LBDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.lb");
@@ -9237,6 +9444,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv");
IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc);
Expr *RHS = (isOpenMPWorksharingDirective(DKind) ||
+ isOpenMPGenericLoopDirective(DKind) ||
isOpenMPTaskLoopDirective(DKind) ||
isOpenMPDistributeDirective(DKind) ||
isOpenMPLoopTransformationDirective(DKind))
@@ -9248,6 +9456,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
if (isOpenMPLoopBoundSharingDirective(DKind)) {
Expr *CombRHS =
(isOpenMPWorksharingDirective(DKind) ||
+ isOpenMPGenericLoopDirective(DKind) ||
isOpenMPTaskLoopDirective(DKind) ||
isOpenMPDistributeDirective(DKind))
? CombLB.get()
@@ -9279,6 +9488,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
}
ExprResult Cond =
(isOpenMPWorksharingDirective(DKind) ||
+ isOpenMPGenericLoopDirective(DKind) ||
isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind) ||
isOpenMPLoopTransformationDirective(DKind))
? SemaRef.BuildBinOp(CurScope, CondLoc,
@@ -9328,6 +9538,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
// base variables for the update
ExprResult NextLB, NextUB, CombNextLB, CombNextUB;
if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
+ isOpenMPGenericLoopDirective(DKind) ||
isOpenMPDistributeDirective(DKind) ||
isOpenMPLoopTransformationDirective(DKind)) {
// LB + ST
@@ -9882,6 +10093,57 @@ StmtResult Sema::ActOnOpenMPDispatchDirective(ArrayRef<OMPClause *> Clauses,
TargetCallLoc);
}
+StmtResult Sema::ActOnOpenMPGenericLoopDirective(
+ ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+ SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) {
+ if (!AStmt)
+ return StmtError();
+
+ // OpenMP 5.1 [2.11.7, loop construct]
+ // A list item may not appear in a lastprivate clause unless it is the
+ // loop iteration variable of a loop that is associated with the construct.
+ for (OMPClause *C : Clauses) {
+ if (auto *LPC = dyn_cast<OMPLastprivateClause>(C)) {
+ for (Expr *RefExpr : LPC->varlists()) {
+ SourceLocation ELoc;
+ SourceRange ERange;
+ Expr *SimpleRefExpr = RefExpr;
+ auto Res = getPrivateItem(*this, SimpleRefExpr, ELoc, ERange);
+ if (ValueDecl *D = Res.first) {
+ auto &&Info = DSAStack->isLoopControlVariable(D);
+ if (!Info.first) {
+ Diag(ELoc, diag::err_omp_lastprivate_loop_var_non_loop_iteration);
+ return StmtError();
+ }
+ }
+ }
+ }
+ }
+
+ auto *CS = cast<CapturedStmt>(AStmt);
+ // 1.2.2 OpenMP Language Terminology
+ // Structured block - An executable statement with a single entry at the
+ // top and a single exit at the bottom.
+ // The point of exit cannot be a branch out of the structured block.
+ // longjmp() and throw() must not violate the entry/exit criteria.
+ CS->getCapturedDecl()->setNothrow();
+
+ OMPLoopDirective::HelperExprs B;
+ // In presence of clause 'collapse', it will define the nested loops number.
+ unsigned NestedLoopCount = checkOpenMPLoop(
+ OMPD_loop, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses),
+ AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
+ if (NestedLoopCount == 0)
+ return StmtError();
+
+ assert((CurContext->isDependentContext() || B.builtAll()) &&
+ "omp loop exprs were not built");
+
+ setFunctionHasBranchProtectedScope();
+ return OMPGenericLoopDirective::Create(Context, StartLoc, EndLoc,
+ NestedLoopCount, Clauses, AStmt, B);
+}
+
StmtResult Sema::ActOnOpenMPSingleDirective(ArrayRef<OMPClause *> Clauses,
Stmt *AStmt,
SourceLocation StartLoc,
@@ -10203,9 +10465,10 @@ StmtResult Sema::ActOnOpenMPBarrierDirective(SourceLocation StartLoc,
return OMPBarrierDirective::Create(Context, StartLoc, EndLoc);
}
-StmtResult Sema::ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
+StmtResult Sema::ActOnOpenMPTaskwaitDirective(ArrayRef<OMPClause *> Clauses,
+ SourceLocation StartLoc,
SourceLocation EndLoc) {
- return OMPTaskwaitDirective::Create(Context, StartLoc, EndLoc);
+ return OMPTaskwaitDirective::Create(Context, StartLoc, EndLoc, Clauses);
}
StmtResult Sema::ActOnOpenMPTaskgroupDirective(ArrayRef<OMPClause *> Clauses,
@@ -12852,10 +13115,12 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
Body, OriginalInits))
return StmtError();
+ unsigned NumGeneratedLoops = PartialClause ? 1 : 0;
+
// Delay unrolling to when template is completely instantiated.
if (CurContext->isDependentContext())
return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
- nullptr, nullptr);
+ NumGeneratedLoops, nullptr, nullptr);
OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front();
@@ -12874,9 +13139,9 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
// The generated loop may only be passed to other loop-associated directive
// when a partial clause is specified. Without the requirement it is
// sufficient to generate loop unroll metadata at code-generation.
- if (!PartialClause)
+ if (NumGeneratedLoops == 0)
return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
- nullptr, nullptr);
+ NumGeneratedLoops, nullptr, nullptr);
// Otherwise, we need to provide a de-sugared/transformed AST that can be
// associated with another loop directive.
@@ -13097,7 +13362,8 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
- OuterFor, buildPreInits(Context, PreInits));
+ NumGeneratedLoops, OuterFor,
+ buildPreInits(Context, PreInits));
}
OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
@@ -13163,6 +13429,9 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
case OMPC_partial:
Res = ActOnOpenMPPartialClause(Expr, StartLoc, LParenLoc, EndLoc);
break;
+ case OMPC_align:
+ Res = ActOnOpenMPAlignClause(Expr, StartLoc, LParenLoc, EndLoc);
+ break;
case OMPC_device:
case OMPC_if:
case OMPC_default:
@@ -13223,6 +13492,8 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
+ case OMPC_bind:
default:
llvm_unreachable("Clause is not allowed.");
}
@@ -13363,6 +13634,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_teams:
case OMPD_tile:
case OMPD_unroll:
@@ -13379,6 +13651,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_atomic:
case OMPD_teams_distribute:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with if-clause");
case OMPD_unknown:
default:
@@ -13441,6 +13714,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_teams:
case OMPD_simd:
case OMPD_tile:
@@ -13461,6 +13735,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_teams_distribute:
case OMPD_teams_distribute_simd:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with num_threads-clause");
case OMPD_unknown:
default:
@@ -13524,6 +13799,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_simd:
case OMPD_tile:
case OMPD_unroll:
@@ -13541,6 +13817,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_atomic:
case OMPD_distribute_simd:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with num_teams-clause");
case OMPD_unknown:
default:
@@ -13604,6 +13881,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_simd:
case OMPD_tile:
case OMPD_unroll:
@@ -13621,6 +13899,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_atomic:
case OMPD_distribute_simd:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with thread_limit-clause");
case OMPD_unknown:
default:
@@ -13685,6 +13964,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_simd:
case OMPD_tile:
case OMPD_unroll:
@@ -13701,6 +13981,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_distribute_simd:
case OMPD_target_teams:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with schedule clause");
case OMPD_unknown:
default:
@@ -13765,6 +14046,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_simd:
case OMPD_tile:
case OMPD_unroll:
@@ -13781,6 +14063,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_atomic:
case OMPD_target_teams:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with dist_schedule clause");
case OMPD_unknown:
default:
@@ -13846,6 +14129,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_simd:
case OMPD_tile:
case OMPD_unroll:
@@ -13863,6 +14147,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_atomic:
case OMPD_distribute_simd:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with device-clause");
case OMPD_unknown:
default:
@@ -13928,6 +14213,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
+ case OMPD_loop:
case OMPD_simd:
case OMPD_tile:
case OMPD_unroll:
@@ -13945,6 +14231,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPD_atomic:
case OMPD_distribute_simd:
case OMPD_requires:
+ case OMPD_metadirective:
llvm_unreachable("Unexpected OpenMP directive with grainsize-clause");
case OMPD_unknown:
default:
@@ -13964,6 +14251,15 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPC_filter:
// Do not capture filter-clause expressions.
break;
+ case OMPC_when:
+ if (DKind == OMPD_metadirective) {
+ CaptureRegion = OMPD_metadirective;
+ } else if (DKind == OMPD_unknown) {
+ llvm_unreachable("Unknown OpenMP directive");
+ } else {
+ llvm_unreachable("Unexpected OpenMP directive with when clause");
+ }
+ break;
case OMPC_firstprivate:
case OMPC_lastprivate:
case OMPC_reduction:
@@ -14028,6 +14324,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_bind:
default:
llvm_unreachable("Unexpected OpenMP clause.");
}
@@ -14248,7 +14545,7 @@ ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,
<< E->getSourceRange();
return ExprError();
}
- if (CKind == OMPC_aligned && !Result.isPowerOf2()) {
+ if ((CKind == OMPC_aligned || CKind == OMPC_align) && !Result.isPowerOf2()) {
Diag(E->getExprLoc(), diag::warn_omp_alignment_not_power_of_two)
<< E->getSourceRange();
return ExprError();
@@ -14419,6 +14716,10 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
Res = ActOnOpenMPUpdateClause(static_cast<OpenMPDependClauseKind>(Argument),
ArgumentLoc, StartLoc, LParenLoc, EndLoc);
break;
+ case OMPC_bind:
+ Res = ActOnOpenMPBindClause(static_cast<OpenMPBindClauseKind>(Argument),
+ ArgumentLoc, StartLoc, LParenLoc, EndLoc);
+ break;
case OMPC_if:
case OMPC_final:
case OMPC_num_threads:
@@ -14491,6 +14792,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
default:
llvm_unreachable("Clause is not allowed.");
}
@@ -14668,6 +14970,17 @@ OMPClause *Sema::ActOnOpenMPPartialClause(Expr *FactorExpr,
FactorExpr);
}
+OMPClause *Sema::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ ExprResult AlignVal;
+ AlignVal = VerifyPositiveIntegerConstantInClause(A, OMPC_align);
+ if (AlignVal.isInvalid())
+ return nullptr;
+ return OMPAlignClause::Create(Context, AlignVal.get(), StartLoc, LParenLoc,
+ EndLoc);
+}
+
OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
OpenMPClauseKind Kind, ArrayRef<unsigned> Argument, Expr *Expr,
SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -14783,6 +15096,8 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
+ case OMPC_bind:
default:
llvm_unreachable("Clause is not allowed.");
}
@@ -15032,6 +15347,7 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
+ case OMPC_when:
default:
llvm_unreachable("Clause is not allowed.");
}
@@ -15574,6 +15890,8 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
case OMPC_nocontext:
case OMPC_detach:
case OMPC_uses_allocators:
+ case OMPC_when:
+ case OMPC_bind:
default:
llvm_unreachable("Clause is not allowed.");
}
@@ -15736,29 +16054,6 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
PrivateCopies);
}
-namespace {
-class DiagsUninitializedSeveretyRAII {
-private:
- DiagnosticsEngine &Diags;
- SourceLocation SavedLoc;
- bool IsIgnored = false;
-
-public:
- DiagsUninitializedSeveretyRAII(DiagnosticsEngine &Diags, SourceLocation Loc,
- bool IsIgnored)
- : Diags(Diags), SavedLoc(Loc), IsIgnored(IsIgnored) {
- if (!IsIgnored) {
- Diags.setSeverity(/*Diag*/ diag::warn_uninit_self_reference_in_init,
- /*Map*/ diag::Severity::Ignored, Loc);
- }
- }
- ~DiagsUninitializedSeveretyRAII() {
- if (!IsIgnored)
- Diags.popMappings(SavedLoc);
- }
-};
-}
-
OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
SourceLocation StartLoc,
SourceLocation LParenLoc,
@@ -17107,14 +17402,13 @@ static bool actOnOMPReductionKindClause(
Type = ComplexTy->getElementType();
if (Type->isRealFloatingType()) {
llvm::APFloat InitValue = llvm::APFloat::getAllOnesValue(
- Context.getFloatTypeSemantics(Type),
- Context.getTypeSize(Type));
+ Context.getFloatTypeSemantics(Type));
Init = FloatingLiteral::Create(Context, InitValue, /*isexact=*/true,
Type, ELoc);
} else if (Type->isScalarType()) {
uint64_t Size = Context.getTypeSize(Type);
QualType IntTy = Context.getIntTypeForBitwidth(Size, /*Signed=*/0);
- llvm::APInt InitValue = llvm::APInt::getAllOnesValue(Size);
+ llvm::APInt InitValue = llvm::APInt::getAllOnes(Size);
Init = IntegerLiteral::Create(Context, InitValue, IntTy, ELoc);
}
if (Init && OrigType->isAnyComplexType()) {
@@ -18149,6 +18443,11 @@ Sema::ActOnOpenMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind,
<< "'source' or 'sink'" << getOpenMPClauseName(OMPC_depend);
return nullptr;
}
+ if (DSAStack->getCurrentDirective() == OMPD_taskwait &&
+ DepKind == OMPC_DEPEND_mutexinoutset) {
+ Diag(DepLoc, diag::err_omp_taskwait_depend_mutexinoutset_not_allowed);
+ return nullptr;
+ }
if ((DSAStack->getCurrentDirective() != OMPD_ordered ||
DSAStack->getCurrentDirective() == OMPD_depobj) &&
(DepKind == OMPC_DEPEND_unknown || DepKind == OMPC_DEPEND_source ||
@@ -18322,7 +18621,7 @@ Sema::ActOnOpenMPDependClause(Expr *DepModifier, OpenMPDependClauseKind DepKind,
Expr::EvalResult Result;
if (Length && !Length->isValueDependent() &&
Length->EvaluateAsInt(Result, Context) &&
- Result.Val.getInt().isNullValue()) {
+ Result.Val.getInt().isZero()) {
Diag(ELoc,
diag::err_omp_depend_zero_length_array_section_not_allowed)
<< SimpleExpr->getSourceRange();
@@ -18443,11 +18742,8 @@ OMPClause *Sema::ActOnOpenMPDeviceClause(OpenMPDeviceClauseModifier Modifier,
static bool checkTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
DSAStackTy *Stack, QualType QTy,
bool FullCheck = true) {
- NamedDecl *ND;
- if (QTy->isIncompleteType(&ND)) {
- SemaRef.Diag(SL, diag::err_incomplete_type) << QTy << SR;
+ if (SemaRef.RequireCompleteType(SL, QTy, diag::err_incomplete_type))
return false;
- }
if (FullCheck && !SemaRef.CurContext->isDependentContext() &&
!QTy.isTriviallyCopyableType(SemaRef.Context))
SemaRef.Diag(SL, diag::warn_omp_non_trivial_type_mapped) << QTy << SR;
@@ -18718,7 +19014,7 @@ public:
Expr::EvalResult Result;
if (!AE->getIdx()->isValueDependent() &&
AE->getIdx()->EvaluateAsInt(Result, SemaRef.getASTContext()) &&
- !Result.Val.getInt().isNullValue()) {
+ !Result.Val.getInt().isZero()) {
SemaRef.Diag(AE->getIdx()->getExprLoc(),
diag::err_omp_invalid_map_this_expr);
SemaRef.Diag(AE->getIdx()->getExprLoc(),
@@ -18735,7 +19031,10 @@ public:
}
bool VisitOMPArraySectionExpr(OMPArraySectionExpr *OASE) {
- assert(!NoDiagnose && "Array sections cannot be implicitly mapped.");
+ // After OMP 5.0 Array section in reduction clause will be implicitly
+ // mapped
+ assert(!(SemaRef.getLangOpts().OpenMP < 50 && NoDiagnose) &&
+ "Array sections cannot be implicitly mapped.");
Expr *E = OASE->getBase()->IgnoreParenImpCasts();
QualType CurType =
OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType();
@@ -18778,6 +19077,8 @@ public:
} else if (AllowUnitySizeArraySection && NotUnity) {
// A unity or whole array section is not allowed and that is not
// compatible with the properties of the current array section.
+ if (NoDiagnose)
+ return false;
SemaRef.Diag(
ELoc, diag::err_array_section_does_not_specify_contiguous_storage)
<< OASE->getSourceRange();
@@ -18792,7 +19093,7 @@ public:
Expr::EvalResult ResultL;
if (!OASE->getLength()->isValueDependent() &&
OASE->getLength()->EvaluateAsInt(ResultR, SemaRef.getASTContext()) &&
- !ResultR.Val.getInt().isOneValue()) {
+ !ResultR.Val.getInt().isOne()) {
SemaRef.Diag(OASE->getLength()->getExprLoc(),
diag::err_omp_invalid_map_this_expr);
SemaRef.Diag(OASE->getLength()->getExprLoc(),
@@ -18801,7 +19102,7 @@ public:
if (OASE->getLowerBound() && !OASE->getLowerBound()->isValueDependent() &&
OASE->getLowerBound()->EvaluateAsInt(ResultL,
SemaRef.getASTContext()) &&
- !ResultL.Val.getInt().isNullValue()) {
+ !ResultL.Val.getInt().isZero()) {
SemaRef.Diag(OASE->getLowerBound()->getExprLoc(),
diag::err_omp_invalid_map_this_expr);
SemaRef.Diag(OASE->getLowerBound()->getExprLoc(),
@@ -19321,7 +19622,8 @@ static void checkMappableExpressionList(
CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo MapperId,
ArrayRef<Expr *> UnresolvedMappers,
OpenMPMapClauseKind MapType = OMPC_MAP_unknown,
- bool IsMapTypeImplicit = false) {
+ ArrayRef<OpenMPMapModifierKind> Modifiers = None,
+ bool IsMapTypeImplicit = false, bool NoDiagnose = false) {
// We only expect mappable expressions in 'to', 'from', and 'map' clauses.
assert((CKind == OMPC_map || CKind == OMPC_to || CKind == OMPC_from) &&
"Unexpected clause kind with mappable expressions!");
@@ -19342,6 +19644,9 @@ static void checkMappableExpressionList(
bool UpdateUMIt = false;
Expr *UnresolvedMapper = nullptr;
+ bool HasHoldModifier =
+ llvm::is_contained(Modifiers, OMPC_MAP_MODIFIER_ompx_hold);
+
// Keep track of the mappable components and base declarations in this clause.
// Each entry in the list is going to have a list of components associated. We
// record each set of the components so that we can build the clause later on.
@@ -19400,9 +19705,9 @@ static void checkMappableExpressionList(
// Obtain the array or member expression bases if required. Also, fill the
// components array with all the components identified in the process.
- const Expr *BE = checkMapClauseExpressionBase(
- SemaRef, SimpleExpr, CurComponents, CKind, DSAS->getCurrentDirective(),
- /*NoDiagnose=*/false);
+ const Expr *BE =
+ checkMapClauseExpressionBase(SemaRef, SimpleExpr, CurComponents, CKind,
+ DSAS->getCurrentDirective(), NoDiagnose);
if (!BE)
continue;
@@ -19448,6 +19753,8 @@ static void checkMappableExpressionList(
// OpenMP 4.5 [2.10.5, target update Construct]
// threadprivate variables cannot appear in a from clause.
if (VD && DSAS->isThreadPrivate(VD)) {
+ if (NoDiagnose)
+ continue;
DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false);
SemaRef.Diag(ELoc, diag::err_omp_threadprivate_in_clause)
<< getOpenMPClauseName(CKind);
@@ -19508,7 +19815,7 @@ static void checkMappableExpressionList(
// OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.9]
// A list item must have a mappable type.
if (!checkTypeMappable(VE->getExprLoc(), VE->getSourceRange(), SemaRef,
- DSAS, Type))
+ DSAS, Type, /*FullCheck=*/true))
continue;
if (CKind == OMPC_map) {
@@ -19540,6 +19847,21 @@ static void checkMappableExpressionList(
continue;
}
+ // The 'ompx_hold' modifier is specifically intended to be used on a
+ // 'target' or 'target data' directive to prevent data from being unmapped
+ // during the associated statement. It is not permitted on a 'target
+ // enter data' or 'target exit data' directive, which have no associated
+ // statement.
+ if ((DKind == OMPD_target_enter_data || DKind == OMPD_target_exit_data) &&
+ HasHoldModifier) {
+ SemaRef.Diag(StartLoc,
+ diag::err_omp_invalid_map_type_modifier_for_directive)
+ << getOpenMPSimpleClauseTypeName(OMPC_map,
+ OMPC_MAP_MODIFIER_ompx_hold)
+ << getOpenMPDirectiveName(DKind);
+ continue;
+ }
+
// target, target data
// OpenMP 5.0 [2.12.2, Restrictions, p. 163]
// OpenMP 5.0 [2.12.5, Restrictions, p. 174]
@@ -19611,17 +19933,19 @@ OMPClause *Sema::ActOnOpenMPMapClause(
CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId,
OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc,
SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
- const OMPVarListLocTy &Locs, ArrayRef<Expr *> UnresolvedMappers) {
+ const OMPVarListLocTy &Locs, bool NoDiagnose,
+ ArrayRef<Expr *> UnresolvedMappers) {
OpenMPMapModifierKind Modifiers[] = {
OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown,
- OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown};
+ OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown,
+ OMPC_MAP_MODIFIER_unknown};
SourceLocation ModifiersLoc[NumberOfOMPMapClauseModifiers];
// Process map-type-modifiers, flag errors for duplicate modifiers.
unsigned Count = 0;
for (unsigned I = 0, E = MapTypeModifiers.size(); I < E; ++I) {
if (MapTypeModifiers[I] != OMPC_MAP_MODIFIER_unknown &&
- llvm::find(Modifiers, MapTypeModifiers[I]) != std::end(Modifiers)) {
+ llvm::is_contained(Modifiers, MapTypeModifiers[I])) {
Diag(MapTypeModifiersLoc[I], diag::err_omp_duplicate_map_type_modifier);
continue;
}
@@ -19635,7 +19959,8 @@ OMPClause *Sema::ActOnOpenMPMapClause(
MappableVarListInfo MVLI(VarList);
checkMappableExpressionList(*this, DSAStack, OMPC_map, MVLI, Locs.StartLoc,
MapperIdScopeSpec, MapperId, UnresolvedMappers,
- MapType, IsMapTypeImplicit);
+ MapType, Modifiers, IsMapTypeImplicit,
+ NoDiagnose);
// We need to produce a map clause even if we don't have variables so that
// other diagnostics related with non-existing map clauses are accurate.
@@ -20609,7 +20934,7 @@ OMPClause *Sema::ActOnOpenMPToClause(
unsigned Count = 0;
for (unsigned I = 0, E = MotionModifiers.size(); I < E; ++I) {
if (MotionModifiers[I] != OMPC_MOTION_MODIFIER_unknown &&
- llvm::find(Modifiers, MotionModifiers[I]) != std::end(Modifiers)) {
+ llvm::is_contained(Modifiers, MotionModifiers[I])) {
Diag(MotionModifiersLoc[I], diag::err_omp_duplicate_motion_modifier);
continue;
}
@@ -20646,7 +20971,7 @@ OMPClause *Sema::ActOnOpenMPFromClause(
unsigned Count = 0;
for (unsigned I = 0, E = MotionModifiers.size(); I < E; ++I) {
if (MotionModifiers[I] != OMPC_MOTION_MODIFIER_unknown &&
- llvm::find(Modifiers, MotionModifiers[I]) != std::end(Modifiers)) {
+ llvm::is_contained(Modifiers, MotionModifiers[I])) {
Diag(MotionModifiersLoc[I], diag::err_omp_duplicate_motion_modifier);
continue;
}
@@ -21253,3 +21578,20 @@ OMPClause *Sema::ActOnOpenMPAffinityClause(
return OMPAffinityClause::Create(Context, StartLoc, LParenLoc, ColonLoc,
EndLoc, Modifier, Vars);
}
+
+OMPClause *Sema::ActOnOpenMPBindClause(OpenMPBindClauseKind Kind,
+ SourceLocation KindLoc,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ if (Kind == OMPC_BIND_unknown) {
+ Diag(KindLoc, diag::err_omp_unexpected_clause_value)
+ << getListOfPossibleValues(OMPC_bind, /*First=*/0,
+ /*Last=*/unsigned(OMPC_BIND_unknown))
+ << getOpenMPClauseName(OMPC_bind);
+ return nullptr;
+ }
+
+ return OMPBindClause::Create(Context, Kind, KindLoc, StartLoc, LParenLoc,
+ EndLoc);
+}
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 0758fbb84107..42b1340f9a65 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -541,8 +541,8 @@ void UserDefinedConversionSequence::dump() const {
/// error. Useful for debugging overloading issues.
void ImplicitConversionSequence::dump() const {
raw_ostream &OS = llvm::errs();
- if (isStdInitializerListElement())
- OS << "Worst std::initializer_list element conversion: ";
+ if (hasInitializerListContainerType())
+ OS << "Worst list element conversion: ";
switch (ConversionKind) {
case StandardConversion:
OS << "Standard conversion: ";
@@ -1869,24 +1869,25 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
SCS.Second = ICK_Complex_Real;
FromType = ToType.getUnqualifiedType();
} else if (FromType->isRealFloatingType() && ToType->isRealFloatingType()) {
- // FIXME: disable conversions between long double and __float128 if
- // their representation is different until there is back end support
+ // FIXME: disable conversions between long double, __ibm128 and __float128
+ // if their representation is different until there is back end support
// We of course allow this conversion if long double is really double.
// Conversions between bfloat and other floats are not permitted.
if (FromType == S.Context.BFloat16Ty || ToType == S.Context.BFloat16Ty)
return false;
- if (&S.Context.getFloatTypeSemantics(FromType) !=
- &S.Context.getFloatTypeSemantics(ToType)) {
- bool Float128AndLongDouble = ((FromType == S.Context.Float128Ty &&
- ToType == S.Context.LongDoubleTy) ||
- (FromType == S.Context.LongDoubleTy &&
- ToType == S.Context.Float128Ty));
- if (Float128AndLongDouble &&
- (&S.Context.getFloatTypeSemantics(S.Context.LongDoubleTy) ==
- &llvm::APFloat::PPCDoubleDouble()))
- return false;
- }
+
+ // Conversions between IEEE-quad and IBM-extended semantics are not
+ // permitted.
+ const llvm::fltSemantics &FromSem =
+ S.Context.getFloatTypeSemantics(FromType);
+ const llvm::fltSemantics &ToSem = S.Context.getFloatTypeSemantics(ToType);
+ if ((&FromSem == &llvm::APFloat::PPCDoubleDouble() &&
+ &ToSem == &llvm::APFloat::IEEEquad()) ||
+ (&FromSem == &llvm::APFloat::IEEEquad() &&
+ &ToSem == &llvm::APFloat::PPCDoubleDouble()))
+ return false;
+
// Floating point conversions (C++ 4.8).
SCS.Second = ICK_Floating_Conversion;
FromType = ToType.getUnqualifiedType();
@@ -2237,7 +2238,8 @@ bool Sema::IsFloatingPointPromotion(QualType FromType, QualType ToType) {
(FromBuiltin->getKind() == BuiltinType::Float ||
FromBuiltin->getKind() == BuiltinType::Double) &&
(ToBuiltin->getKind() == BuiltinType::LongDouble ||
- ToBuiltin->getKind() == BuiltinType::Float128))
+ ToBuiltin->getKind() == BuiltinType::Float128 ||
+ ToBuiltin->getKind() == BuiltinType::Ibm128))
return true;
// Half can be promoted to float.
@@ -3244,6 +3246,19 @@ static bool isQualificationConversionStep(QualType FromType, QualType ToType,
!PreviousToQualsIncludeConst)
return false;
+ // The following wording is from C++20, where the result of the conversion
+ // is T3, not T2.
+ // -- if [...] P1,i [...] is "array of unknown bound of", P3,i is
+ // "array of unknown bound of"
+ if (FromType->isIncompleteArrayType() && !ToType->isIncompleteArrayType())
+ return false;
+
+ // -- if the resulting P3,i is different from P1,i [...], then const is
+ // added to every cv 3_k for 0 < k < i.
+ if (!CStyle && FromType->isConstantArrayType() &&
+ ToType->isIncompleteArrayType() && !PreviousToQualsIncludeConst)
+ return false;
+
// Keep track of whether all prior cv-qualifiers in the "to" type
// include const.
PreviousToQualsIncludeConst =
@@ -3775,7 +3790,9 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc,
if (S.getLangOpts().CPlusPlus11 && !S.getLangOpts().WritableStrings &&
hasDeprecatedStringLiteralToCharPtrConversion(ICS1) !=
- hasDeprecatedStringLiteralToCharPtrConversion(ICS2))
+ hasDeprecatedStringLiteralToCharPtrConversion(ICS2) &&
+ // Ill-formedness must not differ
+ ICS1.isBad() == ICS2.isBad())
return hasDeprecatedStringLiteralToCharPtrConversion(ICS1)
? ImplicitConversionSequence::Worse
: ImplicitConversionSequence::Better;
@@ -3801,16 +3818,45 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc,
// list-initialization sequence L2 if:
// - L1 converts to std::initializer_list<X> for some X and L2 does not, or,
// if not that,
- // - L1 converts to type "array of N1 T", L2 converts to type "array of N2 T",
- // and N1 is smaller than N2.,
+ // — L1 and L2 convert to arrays of the same element type, and either the
+ // number of elements n_1 initialized by L1 is less than the number of
+ // elements n_2 initialized by L2, or (C++20) n_1 = n_2 and L2 converts to
+ // an array of unknown bound and L1 does not,
// even if one of the other rules in this paragraph would otherwise apply.
if (!ICS1.isBad()) {
- if (ICS1.isStdInitializerListElement() &&
- !ICS2.isStdInitializerListElement())
- return ImplicitConversionSequence::Better;
- if (!ICS1.isStdInitializerListElement() &&
- ICS2.isStdInitializerListElement())
- return ImplicitConversionSequence::Worse;
+ bool StdInit1 = false, StdInit2 = false;
+ if (ICS1.hasInitializerListContainerType())
+ StdInit1 = S.isStdInitializerList(ICS1.getInitializerListContainerType(),
+ nullptr);
+ if (ICS2.hasInitializerListContainerType())
+ StdInit2 = S.isStdInitializerList(ICS2.getInitializerListContainerType(),
+ nullptr);
+ if (StdInit1 != StdInit2)
+ return StdInit1 ? ImplicitConversionSequence::Better
+ : ImplicitConversionSequence::Worse;
+
+ if (ICS1.hasInitializerListContainerType() &&
+ ICS2.hasInitializerListContainerType())
+ if (auto *CAT1 = S.Context.getAsConstantArrayType(
+ ICS1.getInitializerListContainerType()))
+ if (auto *CAT2 = S.Context.getAsConstantArrayType(
+ ICS2.getInitializerListContainerType())) {
+ if (S.Context.hasSameUnqualifiedType(CAT1->getElementType(),
+ CAT2->getElementType())) {
+ // Both to arrays of the same element type
+ if (CAT1->getSize() != CAT2->getSize())
+ // Different sized, the smaller wins
+ return CAT1->getSize().ult(CAT2->getSize())
+ ? ImplicitConversionSequence::Better
+ : ImplicitConversionSequence::Worse;
+ if (ICS1.isInitializerListOfIncompleteArray() !=
+ ICS2.isInitializerListOfIncompleteArray())
+ // One is incomplete, it loses
+ return ICS2.isInitializerListOfIncompleteArray()
+ ? ImplicitConversionSequence::Better
+ : ImplicitConversionSequence::Worse;
+ }
+ }
}
if (ICS1.isStandard())
@@ -4176,12 +4222,15 @@ static ImplicitConversionSequence::CompareKind
CompareQualificationConversions(Sema &S,
const StandardConversionSequence& SCS1,
const StandardConversionSequence& SCS2) {
- // C++ 13.3.3.2p3:
+ // C++ [over.ics.rank]p3:
// -- S1 and S2 differ only in their qualification conversion and
- // yield similar types T1 and T2 (C++ 4.4), respectively, and the
- // cv-qualification signature of type T1 is a proper subset of
- // the cv-qualification signature of type T2, and S1 is not the
+ // yield similar types T1 and T2 (C++ 4.4), respectively, [...]
+ // [C++98]
+ // [...] and the cv-qualification signature of type T1 is a proper subset
+ // of the cv-qualification signature of type T2, and S1 is not the
// deprecated string literal array-to-pointer conversion (4.2).
+ // [C++2a]
+ // [...] where T1 can be converted to T2 by a qualification conversion.
if (SCS1.First != SCS2.First || SCS1.Second != SCS2.Second ||
SCS1.Third != SCS2.Third || SCS1.Third != ICK_Qualification)
return ImplicitConversionSequence::Indistinguishable;
@@ -4202,79 +4251,35 @@ CompareQualificationConversions(Sema &S,
if (UnqualT1 == UnqualT2)
return ImplicitConversionSequence::Indistinguishable;
- ImplicitConversionSequence::CompareKind Result
- = ImplicitConversionSequence::Indistinguishable;
+ // Don't ever prefer a standard conversion sequence that uses the deprecated
+ // string literal array to pointer conversion.
+ bool CanPick1 = !SCS1.DeprecatedStringLiteralToCharPtr;
+ bool CanPick2 = !SCS2.DeprecatedStringLiteralToCharPtr;
// Objective-C++ ARC:
// Prefer qualification conversions not involving a change in lifetime
- // to qualification conversions that do not change lifetime.
- if (SCS1.QualificationIncludesObjCLifetime !=
- SCS2.QualificationIncludesObjCLifetime) {
- Result = SCS1.QualificationIncludesObjCLifetime
- ? ImplicitConversionSequence::Worse
- : ImplicitConversionSequence::Better;
- }
+ // to qualification conversions that do change lifetime.
+ if (SCS1.QualificationIncludesObjCLifetime &&
+ !SCS2.QualificationIncludesObjCLifetime)
+ CanPick1 = false;
+ if (SCS2.QualificationIncludesObjCLifetime &&
+ !SCS1.QualificationIncludesObjCLifetime)
+ CanPick2 = false;
- while (S.Context.UnwrapSimilarTypes(T1, T2)) {
- // Within each iteration of the loop, we check the qualifiers to
- // determine if this still looks like a qualification
- // conversion. Then, if all is well, we unwrap one more level of
- // pointers or pointers-to-members and do it all again
- // until there are no more pointers or pointers-to-members left
- // to unwrap. This essentially mimics what
- // IsQualificationConversion does, but here we're checking for a
- // strict subset of qualifiers.
- if (T1.getQualifiers().withoutObjCLifetime() ==
- T2.getQualifiers().withoutObjCLifetime())
- // The qualifiers are the same, so this doesn't tell us anything
- // about how the sequences rank.
- // ObjC ownership quals are omitted above as they interfere with
- // the ARC overload rule.
- ;
- else if (T2.isMoreQualifiedThan(T1)) {
- // T1 has fewer qualifiers, so it could be the better sequence.
- if (Result == ImplicitConversionSequence::Worse)
- // Neither has qualifiers that are a subset of the other's
- // qualifiers.
- return ImplicitConversionSequence::Indistinguishable;
-
- Result = ImplicitConversionSequence::Better;
- } else if (T1.isMoreQualifiedThan(T2)) {
- // T2 has fewer qualifiers, so it could be the better sequence.
- if (Result == ImplicitConversionSequence::Better)
- // Neither has qualifiers that are a subset of the other's
- // qualifiers.
- return ImplicitConversionSequence::Indistinguishable;
-
- Result = ImplicitConversionSequence::Worse;
- } else {
- // Qualifiers are disjoint.
- return ImplicitConversionSequence::Indistinguishable;
- }
-
- // If the types after this point are equivalent, we're done.
- if (S.Context.hasSameUnqualifiedType(T1, T2))
- break;
- }
-
- // Check that the winning standard conversion sequence isn't using
- // the deprecated string literal array to pointer conversion.
- switch (Result) {
- case ImplicitConversionSequence::Better:
- if (SCS1.DeprecatedStringLiteralToCharPtr)
- Result = ImplicitConversionSequence::Indistinguishable;
- break;
-
- case ImplicitConversionSequence::Indistinguishable:
- break;
-
- case ImplicitConversionSequence::Worse:
- if (SCS2.DeprecatedStringLiteralToCharPtr)
- Result = ImplicitConversionSequence::Indistinguishable;
- break;
- }
-
- return Result;
+ bool ObjCLifetimeConversion;
+ if (CanPick1 &&
+ !S.IsQualificationConversion(T1, T2, false, ObjCLifetimeConversion))
+ CanPick1 = false;
+ // FIXME: In Objective-C ARC, we can have qualification conversions in both
+ // directions, so we can't short-cut this second check in general.
+ if (CanPick2 &&
+ !S.IsQualificationConversion(T2, T1, false, ObjCLifetimeConversion))
+ CanPick2 = false;
+
+ if (CanPick1 != CanPick2)
+ return CanPick1 ? ImplicitConversionSequence::Better
+ : ImplicitConversionSequence::Worse;
+ return ImplicitConversionSequence::Indistinguishable;
}
/// CompareDerivedToBaseConversions - Compares two standard conversion
@@ -5009,9 +5014,15 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType,
ImplicitConversionSequence Result;
Result.setBad(BadConversionSequence::no_conversion, From, ToType);
- // We need a complete type for what follows. Incomplete types can never be
- // initialized from init lists.
- if (!S.isCompleteType(From->getBeginLoc(), ToType))
+ // We need a complete type for what follows. With one C++20 exception,
+ // incomplete types can never be initialized from init lists.
+ QualType InitTy = ToType;
+ const ArrayType *AT = S.Context.getAsArrayType(ToType);
+ if (AT && S.getLangOpts().CPlusPlus20)
+ if (const auto *IAT = dyn_cast<IncompleteArrayType>(AT))
+ // C++20 allows list initialization of an incomplete array type.
+ InitTy = IAT->getElementType();
+ if (!S.isCompleteType(From->getBeginLoc(), InitTy))
return Result;
// Per DR1467:
@@ -5035,18 +5046,16 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType,
AllowObjCWritebackConversion);
}
- if (const auto *AT = S.Context.getAsArrayType(ToType)) {
- if (S.IsStringInit(From->getInit(0), AT)) {
- InitializedEntity Entity =
+ if (AT && S.IsStringInit(From->getInit(0), AT)) {
+ InitializedEntity Entity =
InitializedEntity::InitializeParameter(S.Context, ToType,
/*Consumed=*/false);
- if (S.CanPerformCopyInitialization(Entity, From)) {
- Result.setStandard();
- Result.Standard.setAsIdentityConversion();
- Result.Standard.setFromType(ToType);
- Result.Standard.setAllToTypes(ToType);
- return Result;
- }
+ if (S.CanPerformCopyInitialization(Entity, From)) {
+ Result.setStandard();
+ Result.Standard.setAsIdentityConversion();
+ Result.Standard.setFromType(ToType);
+ Result.Standard.setAllToTypes(ToType);
+ return Result;
}
}
}
@@ -5064,43 +5073,89 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType,
// default-constructible, and if all the elements of the initializer list
// can be implicitly converted to X, the implicit conversion sequence is
// the worst conversion necessary to convert an element of the list to X.
- //
- // FIXME: We're missing a lot of these checks.
- bool toStdInitializerList = false;
- QualType X;
- if (ToType->isArrayType())
- X = S.Context.getAsArrayType(ToType)->getElementType();
- else
- toStdInitializerList = S.isStdInitializerList(ToType, &X);
- if (!X.isNull()) {
- for (unsigned i = 0, e = From->getNumInits(); i < e; ++i) {
- Expr *Init = From->getInit(i);
- ImplicitConversionSequence ICS =
- TryCopyInitialization(S, Init, X, SuppressUserConversions,
- InOverloadResolution,
- AllowObjCWritebackConversion);
- // If a single element isn't convertible, fail.
- if (ICS.isBad()) {
- Result = ICS;
- break;
+ if (AT || S.isStdInitializerList(ToType, &InitTy)) {
+ unsigned e = From->getNumInits();
+ ImplicitConversionSequence DfltElt;
+ DfltElt.setBad(BadConversionSequence::no_conversion, QualType(),
+ QualType());
+ QualType ContTy = ToType;
+ bool IsUnbounded = false;
+ if (AT) {
+ InitTy = AT->getElementType();
+ if (ConstantArrayType const *CT = dyn_cast<ConstantArrayType>(AT)) {
+ if (CT->getSize().ult(e)) {
+ // Too many inits, fatally bad
+ Result.setBad(BadConversionSequence::too_many_initializers, From,
+ ToType);
+ Result.setInitializerListContainerType(ContTy, IsUnbounded);
+ return Result;
+ }
+ if (CT->getSize().ugt(e)) {
+ // Need an init from empty {}, is there one?
+ InitListExpr EmptyList(S.Context, From->getEndLoc(), None,
+ From->getEndLoc());
+ EmptyList.setType(S.Context.VoidTy);
+ DfltElt = TryListConversion(
+ S, &EmptyList, InitTy, SuppressUserConversions,
+ InOverloadResolution, AllowObjCWritebackConversion);
+ if (DfltElt.isBad()) {
+ // No {} init, fatally bad
+ Result.setBad(BadConversionSequence::too_few_initializers, From,
+ ToType);
+ Result.setInitializerListContainerType(ContTy, IsUnbounded);
+ return Result;
+ }
+ }
+ } else {
+ assert(isa<IncompleteArrayType>(AT) && "Expected incomplete array");
+ IsUnbounded = true;
+ if (!e) {
+ // Cannot convert to zero-sized.
+ Result.setBad(BadConversionSequence::too_few_initializers, From,
+ ToType);
+ Result.setInitializerListContainerType(ContTy, IsUnbounded);
+ return Result;
+ }
+ llvm::APInt Size(S.Context.getTypeSize(S.Context.getSizeType()), e);
+ ContTy = S.Context.getConstantArrayType(InitTy, Size, nullptr,
+ ArrayType::Normal, 0);
}
- // Otherwise, look for the worst conversion.
- if (Result.isBad() || CompareImplicitConversionSequences(
- S, From->getBeginLoc(), ICS, Result) ==
- ImplicitConversionSequence::Worse)
- Result = ICS;
}
- // For an empty list, we won't have computed any conversion sequence.
- // Introduce the identity conversion sequence.
- if (From->getNumInits() == 0) {
- Result.setStandard();
- Result.Standard.setAsIdentityConversion();
- Result.Standard.setFromType(ToType);
- Result.Standard.setAllToTypes(ToType);
+ Result.setStandard();
+ Result.Standard.setAsIdentityConversion();
+ Result.Standard.setFromType(InitTy);
+ Result.Standard.setAllToTypes(InitTy);
+ for (unsigned i = 0; i < e; ++i) {
+ Expr *Init = From->getInit(i);
+ ImplicitConversionSequence ICS = TryCopyInitialization(
+ S, Init, InitTy, SuppressUserConversions, InOverloadResolution,
+ AllowObjCWritebackConversion);
+
+ // Keep the worse conversion seen so far.
+ // FIXME: Sequences are not totally ordered, so 'worse' can be
+ // ambiguous. CWG has been informed.
+ if (CompareImplicitConversionSequences(S, From->getBeginLoc(), ICS,
+ Result) ==
+ ImplicitConversionSequence::Worse) {
+ Result = ICS;
+ // Bail as soon as we find something unconvertible.
+ if (Result.isBad()) {
+ Result.setInitializerListContainerType(ContTy, IsUnbounded);
+ return Result;
+ }
+ }
}
- Result.setStdInitializerListElement(toStdInitializerList);
+ // If we needed any implicit {} initialization, compare that now.
+ // over.ics.list/6 indicates we should compare that conversion. Again CWG
+ // has been informed that this might not be the best thing.
+ if (!DfltElt.isBad() && CompareImplicitConversionSequences(
+ S, From->getEndLoc(), DfltElt, Result) ==
+ ImplicitConversionSequence::Worse)
+ Result = DfltElt;
+ // Record the type being initialized so that we may compare sequences
+ Result.setInitializerListContainerType(ContTy, IsUnbounded);
return Result;
}
@@ -5479,6 +5534,10 @@ Sema::PerformObjectArgumentInitialization(Expr *From,
case BadConversionSequence::no_conversion:
case BadConversionSequence::unrelated_class:
break;
+
+ case BadConversionSequence::too_few_initializers:
+ case BadConversionSequence::too_many_initializers:
+ llvm_unreachable("Lists are not objects");
}
return Diag(From->getBeginLoc(), diag::err_member_function_call_bad_type)
@@ -5635,7 +5694,7 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From,
// expression is a constant expression and the implicit conversion
// sequence contains only [... list of conversions ...].
ImplicitConversionSequence ICS =
- CCE == Sema::CCEK_ExplicitBool
+ (CCE == Sema::CCEK_ExplicitBool || CCE == Sema::CCEK_Noexcept)
? TryContextuallyConvertToBool(S, From)
: TryCopyInitialization(S, From, T,
/*SuppressUserConversions=*/false,
@@ -6397,7 +6456,8 @@ void Sema::AddOverloadCandidate(
// parameters is viable only if it has an ellipsis in its parameter
// list (8.3.5).
if (TooManyArguments(NumParams, Args.size(), PartialOverloading) &&
- !Proto->isVariadic()) {
+ !Proto->isVariadic() &&
+ shouldEnforceArgLimit(PartialOverloading, Function)) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_fail_too_many_arguments;
return;
@@ -6887,7 +6947,8 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
// parameters is viable only if it has an ellipsis in its parameter
// list (8.3.5).
if (TooManyArguments(NumParams, Args.size(), PartialOverloading) &&
- !Proto->isVariadic()) {
+ !Proto->isVariadic() &&
+ shouldEnforceArgLimit(PartialOverloading, Method)) {
Candidate.Viable = false;
Candidate.FailureKind = ovl_fail_too_many_arguments;
return;
@@ -8173,6 +8234,8 @@ class BuiltinOperatorOverloadBuilder {
ArithmeticTypes.push_back(S.Context.LongDoubleTy);
if (S.Context.getTargetInfo().hasFloat128Type())
ArithmeticTypes.push_back(S.Context.Float128Ty);
+ if (S.Context.getTargetInfo().hasIbm128Type())
+ ArithmeticTypes.push_back(S.Context.Ibm128Ty);
// Start of integral types.
FirstIntegralType = ArithmeticTypes.size();
@@ -9525,7 +9588,8 @@ static bool haveSameParameterTypes(ASTContext &Context, const FunctionDecl *F1,
for (unsigned I = 0; I != NumParams; ++I) {
QualType T1 = NextParam(F1, I1, I == 0);
QualType T2 = NextParam(F2, I2, I == 0);
- if (!T1.isNull() && !T1.isNull() && !Context.hasSameUnqualifiedType(T1, T2))
+ assert(!T1.isNull() && !T2.isNull() && "Unexpected null param types");
+ if (!Context.hasSameUnqualifiedType(T1, T2))
return false;
}
return true;
@@ -9798,9 +9862,9 @@ bool clang::isBetterOverloadCandidate(
// F1 and F2 have the same type.
// FIXME: Implement the "all parameters have the same type" check.
bool Cand1IsInherited =
- dyn_cast_or_null<ConstructorUsingShadowDecl>(Cand1.FoundDecl.getDecl());
+ isa_and_nonnull<ConstructorUsingShadowDecl>(Cand1.FoundDecl.getDecl());
bool Cand2IsInherited =
- dyn_cast_or_null<ConstructorUsingShadowDecl>(Cand2.FoundDecl.getDecl());
+ isa_and_nonnull<ConstructorUsingShadowDecl>(Cand2.FoundDecl.getDecl());
if (Cand1IsInherited != Cand2IsInherited)
return Cand2IsInherited;
else if (Cand1IsInherited) {
@@ -10524,7 +10588,11 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
S.Diag(Fn->getLocation(), diag::note_ovl_candidate_bad_list_argument)
<< (unsigned)FnKindPair.first << (unsigned)FnKindPair.second << FnDesc
<< (FromExpr ? FromExpr->getSourceRange() : SourceRange()) << FromTy
- << ToTy << (unsigned)isObjectArgument << I + 1;
+ << ToTy << (unsigned)isObjectArgument << I + 1
+ << (Conv.Bad.Kind == BadConversionSequence::too_few_initializers ? 1
+ : Conv.Bad.Kind == BadConversionSequence::too_many_initializers
+ ? 2
+ : 0);
MaybeEmitInheritedConstructorNote(S, Cand->FoundDecl);
return;
}
@@ -12607,7 +12675,7 @@ static void AddOverloadedCallCandidate(Sema &S,
return;
}
// Prevent ill-formed function decls to be added as overload candidates.
- if (!dyn_cast<FunctionProtoType>(Func->getType()->getAs<FunctionType>()))
+ if (!isa<FunctionProtoType>(Func->getType()->getAs<FunctionType>()))
return;
S.AddOverloadCandidate(Func, FoundDecl, Args, CandidateSet,
@@ -14096,7 +14164,8 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
Method->getType()->castAs<FunctionProtoType>()))
return ExprError();
- return MaybeBindToTemporary(TheCall);
+ return CheckForImmediateInvocation(MaybeBindToTemporary(TheCall),
+ FnDecl);
} else {
// We matched a built-in operator. Convert the arguments, then
// break out so that we will build the appropriate built-in
@@ -14166,6 +14235,7 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
SourceLocation LParenLoc,
MultiExprArg Args,
SourceLocation RParenLoc,
+ Expr *ExecConfig, bool IsExecConfig,
bool AllowRecovery) {
assert(MemExprE->getType() == Context.BoundMemberTy ||
MemExprE->getType() == Context.OverloadTy);
@@ -14361,8 +14431,8 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
// If overload resolution picked a static member, build a
// non-member call based on that function.
if (Method->isStatic()) {
- return BuildResolvedCallExpr(MemExprE, Method, LParenLoc, Args,
- RParenLoc);
+ return BuildResolvedCallExpr(MemExprE, Method, LParenLoc, Args, RParenLoc,
+ ExecConfig, IsExecConfig);
}
MemExpr = cast<MemberExpr>(MemExprE->IgnoreParens());
@@ -14850,7 +14920,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
Method->getType()->castAs<FunctionProtoType>()))
return ExprError();
- return MaybeBindToTemporary(TheCall);
+ return CheckForImmediateInvocation(MaybeBindToTemporary(TheCall), Method);
}
/// BuildLiteralOperatorCall - Build a UserDefinedLiteral by creating a call to
@@ -15174,3 +15244,21 @@ ExprResult Sema::FixOverloadedFunctionReference(ExprResult E,
FunctionDecl *Fn) {
return FixOverloadedFunctionReference(E.get(), Found, Fn);
}
+
+bool clang::shouldEnforceArgLimit(bool PartialOverloading,
+ FunctionDecl *Function) {
+ if (!PartialOverloading || !Function)
+ return true;
+ if (Function->isVariadic())
+ return false;
+ if (const auto *Proto =
+ dyn_cast<FunctionProtoType>(Function->getFunctionType()))
+ if (Proto->isTemplateVariadic())
+ return false;
+ if (auto *Pattern = Function->getTemplateInstantiationPattern())
+ if (const auto *Proto =
+ dyn_cast<FunctionProtoType>(Pattern->getFunctionType()))
+ if (Proto->isTemplateVariadic())
+ return false;
+ return true;
+}
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 3b48a53efc0d..815463307ecc 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -38,7 +38,7 @@ bool Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) {
"Should only be called during SYCL compilation");
assert(Callee && "Callee may not be null.");
- // Errors in unevaluated context don't need to be generated,
+ // Errors in an unevaluated context don't need to be generated,
// so we can safely skip them.
if (isUnevaluatedContext() || isConstantEvaluated())
return true;
@@ -48,35 +48,3 @@ bool Sema::checkSYCLDeviceFunction(SourceLocation Loc, FunctionDecl *Callee) {
return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
}
-
-// The SYCL kernel's 'object type' used for diagnostics and naming/mangling is
-// the first parameter to a sycl_kernel labeled function template. In SYCL1.2.1,
-// this was passed by value, and in SYCL2020, it is passed by reference.
-static QualType GetSYCLKernelObjectType(const FunctionDecl *KernelCaller) {
- assert(KernelCaller->getNumParams() > 0 && "Insufficient kernel parameters");
- QualType KernelParamTy = KernelCaller->getParamDecl(0)->getType();
-
- // SYCL 2020 kernels are passed by reference.
- if (KernelParamTy->isReferenceType())
- return KernelParamTy->getPointeeType();
-
- // SYCL 1.2.1
- return KernelParamTy;
-}
-
-void Sema::AddSYCLKernelLambda(const FunctionDecl *FD) {
- auto MangleCallback = [](ASTContext &Ctx,
- const NamedDecl *ND) -> llvm::Optional<unsigned> {
- if (const auto *RD = dyn_cast<CXXRecordDecl>(ND))
- Ctx.AddSYCLKernelNamingDecl(RD);
- // We always want to go into the lambda mangling (skipping the unnamed
- // struct version), so make sure we return a value here.
- return 1;
- };
-
- QualType Ty = GetSYCLKernelObjectType(FD);
- std::unique_ptr<MangleContext> Ctx{ItaniumMangleContext::create(
- Context, Context.getDiagnostics(), MangleCallback)};
- llvm::raw_null_ostream Out;
- Ctx->mangleTypeName(Ty, Out);
-}
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 3baccec2d7bb..3c820829864d 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -216,9 +216,9 @@ static bool DiagnoseNoDiscard(Sema &S, const WarnUnusedResultAttr *A,
return S.Diag(Loc, diag::warn_unused_result_msg) << A << Msg << R1 << R2;
}
-void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
+void Sema::DiagnoseUnusedExprResult(const Stmt *S, unsigned DiagID) {
if (const LabelStmt *Label = dyn_cast_or_null<LabelStmt>(S))
- return DiagnoseUnusedExprResult(Label->getSubStmt());
+ return DiagnoseUnusedExprResult(Label->getSubStmt(), DiagID);
const Expr *E = dyn_cast_or_null<Expr>(S);
if (!E)
@@ -264,7 +264,6 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
// Okay, we have an unused result. Depending on what the base expression is,
// we might want to make a more specific diagnostic. Check for one of these
// cases now.
- unsigned DiagID = diag::warn_unused_expr;
if (const FullExpr *Temps = dyn_cast<FullExpr>(E))
E = Temps->getSubExpr();
if (const CXXBindTemporaryExpr *TempExpr = dyn_cast<CXXBindTemporaryExpr>(E))
@@ -339,7 +338,7 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
if (LangOpts.OpenMP && isa<CallExpr>(Source) &&
POE->getNumSemanticExprs() == 1 &&
isa<CallExpr>(POE->getSemanticExpr(0)))
- return DiagnoseUnusedExprResult(POE->getSemanticExpr(0));
+ return DiagnoseUnusedExprResult(POE->getSemanticExpr(0), DiagID);
if (isa<ObjCSubscriptRefExpr>(Source))
DiagID = diag::warn_unused_container_subscript_expr;
else
@@ -379,7 +378,12 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
return;
}
- DiagRuntimeBehavior(Loc, nullptr, PDiag(DiagID) << R1 << R2);
+ // Do not diagnose use of a comma operator in a SFINAE context because the
+ // type of the left operand could be used for SFINAE, so technically it is
+ // *used*.
+ if (DiagID != diag::warn_unused_comma_left_operand || !isSFINAEContext())
+ DiagIfReachable(Loc, S ? llvm::makeArrayRef(S) : llvm::None,
+ PDiag(DiagID) << R1 << R2);
}
void Sema::ActOnStartOfCompoundStmt(bool IsStmtExpr) {
@@ -543,7 +547,7 @@ Sema::ActOnLabelStmt(SourceLocation IdentLoc, LabelDecl *TheDecl,
}
ReservedIdentifierStatus Status = TheDecl->isReserved(getLangOpts());
- if (Status != ReservedIdentifierStatus::NotReserved &&
+ if (isReservedInAllContexts(Status) &&
!Context.getSourceManager().isInSystemHeader(IdentLoc))
Diag(IdentLoc, diag::warn_reserved_extern_symbol)
<< TheDecl << static_cast<int>(Status);
@@ -858,7 +862,8 @@ public:
};
}
-StmtResult Sema::ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+StmtResult Sema::ActOnIfStmt(SourceLocation IfLoc,
+ IfStatementKind StatementKind,
SourceLocation LParenLoc, Stmt *InitStmt,
ConditionResult Cond, SourceLocation RParenLoc,
Stmt *thenStmt, SourceLocation ElseLoc,
@@ -871,25 +876,36 @@ StmtResult Sema::ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr,
IfLoc),
false);
+ bool ConstevalOrNegatedConsteval =
+ StatementKind == IfStatementKind::ConstevalNonNegated ||
+ StatementKind == IfStatementKind::ConstevalNegated;
+
Expr *CondExpr = Cond.get().second;
+ assert((CondExpr || ConstevalOrNegatedConsteval) &&
+ "If statement: missing condition");
// Only call the CommaVisitor when not C89 due to differences in scope flags.
- if ((getLangOpts().C99 || getLangOpts().CPlusPlus) &&
+ if (CondExpr && (getLangOpts().C99 || getLangOpts().CPlusPlus) &&
!Diags.isIgnored(diag::warn_comma_operator, CondExpr->getExprLoc()))
CommaVisitor(*this).Visit(CondExpr);
- if (!elseStmt)
+ if (!ConstevalOrNegatedConsteval && !elseStmt)
DiagnoseEmptyStmtBody(CondExpr->getEndLoc(), thenStmt,
diag::warn_empty_if_body);
- if (IsConstexpr) {
+ if (ConstevalOrNegatedConsteval ||
+ StatementKind == IfStatementKind::Constexpr) {
auto DiagnoseLikelihood = [&](const Stmt *S) {
if (const Attr *A = Stmt::getLikelihoodAttr(S)) {
Diags.Report(A->getLocation(),
- diag::warn_attribute_has_no_effect_on_if_constexpr)
- << A << A->getRange();
+ diag::warn_attribute_has_no_effect_on_compile_time_if)
+ << A << ConstevalOrNegatedConsteval << A->getRange();
Diags.Report(IfLoc,
- diag::note_attribute_has_no_effect_on_if_constexpr_here)
- << SourceRange(IfLoc, LParenLoc.getLocWithOffset(-1));
+ diag::note_attribute_has_no_effect_on_compile_time_if_here)
+ << ConstevalOrNegatedConsteval
+ << SourceRange(IfLoc, (ConstevalOrNegatedConsteval
+ ? thenStmt->getBeginLoc()
+ : LParenLoc)
+ .getLocWithOffset(-1));
}
};
DiagnoseLikelihood(thenStmt);
@@ -908,11 +924,24 @@ StmtResult Sema::ActOnIfStmt(SourceLocation IfLoc, bool IsConstexpr,
}
}
- return BuildIfStmt(IfLoc, IsConstexpr, LParenLoc, InitStmt, Cond, RParenLoc,
+ if (ConstevalOrNegatedConsteval) {
+ bool Immediate = isImmediateFunctionContext();
+ if (CurContext->isFunctionOrMethod()) {
+ const auto *FD =
+ dyn_cast<FunctionDecl>(Decl::castFromDeclContext(CurContext));
+ if (FD && FD->isConsteval())
+ Immediate = true;
+ }
+ if (isUnevaluatedContext() || Immediate)
+ Diags.Report(IfLoc, diag::warn_consteval_if_always_true) << Immediate;
+ }
+
+ return BuildIfStmt(IfLoc, StatementKind, LParenLoc, InitStmt, Cond, RParenLoc,
thenStmt, ElseLoc, elseStmt);
}
-StmtResult Sema::BuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+StmtResult Sema::BuildIfStmt(SourceLocation IfLoc,
+ IfStatementKind StatementKind,
SourceLocation LParenLoc, Stmt *InitStmt,
ConditionResult Cond, SourceLocation RParenLoc,
Stmt *thenStmt, SourceLocation ElseLoc,
@@ -920,12 +949,13 @@ StmtResult Sema::BuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
if (Cond.isInvalid())
return StmtError();
- if (IsConstexpr || isa<ObjCAvailabilityCheckExpr>(Cond.get().second))
+ if (StatementKind != IfStatementKind::Ordinary ||
+ isa<ObjCAvailabilityCheckExpr>(Cond.get().second))
setFunctionHasBranchProtectedScope();
- return IfStmt::Create(Context, IfLoc, IsConstexpr, InitStmt, Cond.get().first,
- Cond.get().second, LParenLoc, RParenLoc, thenStmt,
- ElseLoc, elseStmt);
+ return IfStmt::Create(Context, IfLoc, StatementKind, InitStmt,
+ Cond.get().first, Cond.get().second, LParenLoc,
+ RParenLoc, thenStmt, ElseLoc, elseStmt);
}
namespace {
@@ -1563,7 +1593,7 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch,
auto DB = Diag(CondExpr->getExprLoc(), TheDefaultStmt
? diag::warn_def_missing_case
: diag::warn_missing_case)
- << (int)UnhandledNames.size();
+ << CondExpr->getSourceRange() << (int)UnhandledNames.size();
for (size_t I = 0, E = std::min(UnhandledNames.size(), (size_t)3);
I != E; ++I)
@@ -2729,7 +2759,7 @@ StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc,
if (auto *DD = dyn_cast<DecompositionDecl>(LoopVar))
for (auto *Binding : DD->bindings())
Binding->setType(Context.DependentTy);
- LoopVar->setType(SubstAutoType(LoopVar->getType(), Context.DependentTy));
+ LoopVar->setType(SubstAutoTypeDependent(LoopVar->getType()));
}
} else if (!BeginDeclStmt.get()) {
SourceLocation RangeLoc = RangeVar->getLocation();
@@ -3316,7 +3346,7 @@ Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) {
/// being thrown, or being co_returned from a coroutine. This expression
/// might be modified by the implementation.
///
-/// \param ForceCXX2b Overrides detection of current language mode
+/// \param Mode Overrides detection of current language mode
/// and uses the rules for C++2b.
///
/// \returns An aggregate which contains the Candidate and isMoveEligible
@@ -3453,7 +3483,7 @@ const VarDecl *Sema::getCopyElisionCandidate(NamedReturnInfo &Info,
/// Verify that the initialization sequence that was picked for the
/// first overload resolution is permissible under C++98.
///
-/// Reject (possibly converting) contructors not taking an rvalue reference,
+/// Reject (possibly converting) constructors not taking an rvalue reference,
/// or user conversion operators which are not ref-qualified.
static bool
VerifyInitializationSequenceCXX98(const Sema &S,
@@ -3481,7 +3511,8 @@ VerifyInitializationSequenceCXX98(const Sema &S,
ExprResult Sema::PerformMoveOrCopyInitialization(
const InitializedEntity &Entity, const NamedReturnInfo &NRInfo, Expr *Value,
bool SupressSimplerImplicitMoves) {
- if ((!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) &&
+ if (getLangOpts().CPlusPlus &&
+ (!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) &&
NRInfo.isMoveEligible()) {
ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(),
CK_NoOp, Value, VK_XValue, FPOptionsOverride());
@@ -3652,8 +3683,8 @@ StmtResult Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc,
// In C++ the return statement is handled via a copy initialization.
// the C version of which boils down to CheckSingleAssignmentConstraints.
- InitializedEntity Entity = InitializedEntity::InitializeResult(
- ReturnLoc, FnRetType, NRVOCandidate != nullptr);
+ InitializedEntity Entity =
+ InitializedEntity::InitializeResult(ReturnLoc, FnRetType);
ExprResult Res = PerformMoveOrCopyInitialization(
Entity, NRInfo, RetValExp, SupressSimplerImplicitMoves);
if (Res.isInvalid()) {
@@ -3884,7 +3915,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
if (RetValExp && DiagnoseUnexpandedParameterPack(RetValExp))
return StmtError();
- // HACK: We supress simpler implicit move here in msvc compatibility mode
+ // HACK: We suppress simpler implicit move here in msvc compatibility mode
// just as a temporary work around, as the MSVC STL has issues with
// this change.
bool SupressSimplerImplicitMoves =
@@ -4084,8 +4115,8 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
// the C version of which boils down to CheckSingleAssignmentConstraints.
if (!HasDependentReturnType && !RetValExp->isTypeDependent()) {
// we have a non-void function with an expression, continue checking
- InitializedEntity Entity = InitializedEntity::InitializeResult(
- ReturnLoc, RetType, NRVOCandidate != nullptr);
+ InitializedEntity Entity =
+ InitializedEntity::InitializeResult(ReturnLoc, RetType);
ExprResult Res = PerformMoveOrCopyInitialization(
Entity, NRInfo, RetValExp, SupressSimplerImplicitMoves);
if (Res.isInvalid()) {
@@ -4156,7 +4187,14 @@ Sema::ActOnObjCAtTryStmt(SourceLocation AtLoc, Stmt *Try,
if (!getLangOpts().ObjCExceptions)
Diag(AtLoc, diag::err_objc_exceptions_disabled) << "@try";
- setFunctionHasBranchProtectedScope();
+ // Objective-C try is incompatible with SEH __try.
+ sema::FunctionScopeInfo *FSI = getCurFunction();
+ if (FSI->FirstSEHTryLoc.isValid()) {
+ Diag(AtLoc, diag::err_mixing_cxx_try_seh_try) << 1;
+ Diag(FSI->FirstSEHTryLoc, diag::note_conflicting_try_here) << "'__try'";
+ }
+
+ FSI->setHasObjCTry(AtLoc);
unsigned NumCatchStmts = CatchStmts.size();
return ObjCAtTryStmt::Create(Context, AtLoc, Try, CatchStmts.data(),
NumCatchStmts, Finally);
@@ -4392,7 +4430,7 @@ StmtResult Sema::ActOnCXXTryBlock(SourceLocation TryLoc, Stmt *TryBlock,
// C++ try is incompatible with SEH __try.
if (!getLangOpts().Borland && FSI->FirstSEHTryLoc.isValid()) {
- Diag(TryLoc, diag::err_mixing_cxx_try_seh_try);
+ Diag(TryLoc, diag::err_mixing_cxx_try_seh_try) << 0;
Diag(FSI->FirstSEHTryLoc, diag::note_conflicting_try_here) << "'__try'";
}
@@ -4476,9 +4514,12 @@ StmtResult Sema::ActOnSEHTryBlock(bool IsCXXTry, SourceLocation TryLoc,
// SEH __try is incompatible with C++ try. Borland appears to support this,
// however.
if (!getLangOpts().Borland) {
- if (FSI->FirstCXXTryLoc.isValid()) {
- Diag(TryLoc, diag::err_mixing_cxx_try_seh_try);
- Diag(FSI->FirstCXXTryLoc, diag::note_conflicting_try_here) << "'try'";
+ if (FSI->FirstCXXOrObjCTryLoc.isValid()) {
+ Diag(TryLoc, diag::err_mixing_cxx_try_seh_try) << FSI->FirstTryType;
+ Diag(FSI->FirstCXXOrObjCTryLoc, diag::note_conflicting_try_here)
+ << (FSI->FirstTryType == sema::FunctionScopeInfo::TryLocIsCXX
+ ? "'try'"
+ : "'@try'");
}
}
diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index 243d0b921cd7..603611b2d86b 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -393,30 +393,31 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
diag::err_asm_invalid_lvalue_in_input)
<< Info.getConstraintStr()
<< InputExpr->getSourceRange());
- } else if (Info.requiresImmediateConstant() && !Info.allowsRegister()) {
- if (!InputExpr->isValueDependent()) {
- Expr::EvalResult EVResult;
- if (InputExpr->EvaluateAsRValue(EVResult, Context, true)) {
- // For compatibility with GCC, we also allow pointers that would be
- // integral constant expressions if they were cast to int.
- llvm::APSInt IntResult;
- if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
- Context))
- if (!Info.isValidAsmImmediate(IntResult))
- return StmtError(Diag(InputExpr->getBeginLoc(),
- diag::err_invalid_asm_value_for_constraint)
- << toString(IntResult, 10)
- << Info.getConstraintStr()
- << InputExpr->getSourceRange());
- }
- }
-
} else {
ExprResult Result = DefaultFunctionArrayLvalueConversion(Exprs[i]);
if (Result.isInvalid())
return StmtError();
- Exprs[i] = Result.get();
+ InputExpr = Exprs[i] = Result.get();
+
+ if (Info.requiresImmediateConstant() && !Info.allowsRegister()) {
+ if (!InputExpr->isValueDependent()) {
+ Expr::EvalResult EVResult;
+ if (InputExpr->EvaluateAsRValue(EVResult, Context, true)) {
+ // For compatibility with GCC, we also allow pointers that would be
+ // integral constant expressions if they were cast to int.
+ llvm::APSInt IntResult;
+ if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
+ Context))
+ if (!Info.isValidAsmImmediate(IntResult))
+ return StmtError(
+ Diag(InputExpr->getBeginLoc(),
+ diag::err_invalid_asm_value_for_constraint)
+ << toString(IntResult, 10) << Info.getConstraintStr()
+ << InputExpr->getSourceRange());
+ }
+ }
+ }
}
if (Info.allowsRegister()) {
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 175388198324..f4fd2ea5aa8e 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -485,8 +485,7 @@ bool Sema::LookupTemplateName(LookupResult &Found,
// all language modes, and diagnose the empty lookup in ActOnCallExpr if we
// successfully form a call to an undeclared template-id.
bool AllFunctions =
- getLangOpts().CPlusPlus20 &&
- std::all_of(Found.begin(), Found.end(), [](NamedDecl *ND) {
+ getLangOpts().CPlusPlus20 && llvm::all_of(Found, [](NamedDecl *ND) {
return isa<FunctionDecl>(ND->getUnderlyingDecl());
});
if (AllFunctions || (Found.empty() && !IsDependent)) {
@@ -744,7 +743,7 @@ Sema::ActOnDependentIdExpression(const CXXScopeSpec &SS,
// Check if the nested name specifier is an enum type.
bool IsEnum = false;
if (NestedNameSpecifier *NNS = SS.getScopeRep())
- IsEnum = dyn_cast_or_null<EnumType>(NNS->getAsType());
+ IsEnum = isa_and_nonnull<EnumType>(NNS->getAsType());
if (!MightBeCxx11UnevalField && !isAddressOfOperand && !IsEnum &&
isa<CXXMethodDecl>(DC) && cast<CXXMethodDecl>(DC)->isInstance()) {
@@ -1079,7 +1078,7 @@ NamedDecl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
return Param;
// Check the template argument itself.
- if (CheckTemplateArgument(Param, DefaultTInfo)) {
+ if (CheckTemplateArgument(DefaultTInfo)) {
Param->setInvalidDecl();
return Param;
}
@@ -1206,7 +1205,7 @@ static ExprResult formImmediatelyDeclaredConstraint(
}
/// Attach a type-constraint to a template parameter.
-/// \returns true if an error occured. This can happen if the
+/// \returns true if an error occurred. This can happen if the
/// immediately-declared constraint could not be formed (e.g. incorrect number
/// of arguments for the named concept).
bool Sema::AttachTypeConstraint(NestedNameSpecifierLoc NS,
@@ -1260,15 +1259,15 @@ bool Sema::AttachTypeConstraint(AutoTypeLoc TL, NonTypeTemplateParmDecl *NTTP,
BuildDeclRefExpr(NTTP, NTTP->getType(), VK_PRValue, NTTP->getLocation());
if (!Ref)
return true;
- ExprResult ImmediatelyDeclaredConstraint =
- formImmediatelyDeclaredConstraint(
- *this, TL.getNestedNameSpecifierLoc(), TL.getConceptNameInfo(),
- TL.getNamedConcept(), TL.getLAngleLoc(), TL.getRAngleLoc(),
- BuildDecltypeType(Ref, NTTP->getLocation()), NTTP->getLocation(),
- [&] (TemplateArgumentListInfo &ConstraintArgs) {
- for (unsigned I = 0, C = TL.getNumArgs(); I != C; ++I)
- ConstraintArgs.addArgument(TL.getArgLoc(I));
- }, EllipsisLoc);
+ ExprResult ImmediatelyDeclaredConstraint = formImmediatelyDeclaredConstraint(
+ *this, TL.getNestedNameSpecifierLoc(), TL.getConceptNameInfo(),
+ TL.getNamedConcept(), TL.getLAngleLoc(), TL.getRAngleLoc(),
+ BuildDecltypeType(Ref), NTTP->getLocation(),
+ [&](TemplateArgumentListInfo &ConstraintArgs) {
+ for (unsigned I = 0, C = TL.getNumArgs(); I != C; ++I)
+ ConstraintArgs.addArgument(TL.getArgLoc(I));
+ },
+ EllipsisLoc);
if (ImmediatelyDeclaredConstraint.isInvalid() ||
!ImmediatelyDeclaredConstraint.isUsable())
return true;
@@ -1290,7 +1289,7 @@ QualType Sema::CheckNonTypeTemplateParameterType(TypeSourceInfo *&TSI,
// - an identifier associated by name lookup with a non-type
// template-parameter declared with a type that contains a
// placeholder type (7.1.7.4),
- TSI = SubstAutoTypeSourceInfo(TSI, Context.DependentTy);
+ TSI = SubstAutoTypeSourceInfoDependent(TSI);
}
return CheckNonTypeTemplateParameterType(TSI->getType(), Loc);
@@ -2267,22 +2266,8 @@ private:
TTP->isParameterPack(), TTP->hasTypeConstraint(),
TTP->isExpandedParameterPack() ?
llvm::Optional<unsigned>(TTP->getNumExpansionParameters()) : None);
- if (const auto *TC = TTP->getTypeConstraint()) {
- TemplateArgumentListInfo TransformedArgs;
- const auto *ArgsAsWritten = TC->getTemplateArgsAsWritten();
- if (!ArgsAsWritten ||
- SemaRef.Subst(ArgsAsWritten->getTemplateArgs(),
- ArgsAsWritten->NumTemplateArgs, TransformedArgs,
- Args))
- SemaRef.AttachTypeConstraint(
- TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
- TC->getNamedConcept(), ArgsAsWritten ? &TransformedArgs : nullptr,
- NewTTP,
- NewTTP->isParameterPack()
- ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
- ->getEllipsisLoc()
- : SourceLocation());
- }
+ if (const auto *TC = TTP->getTypeConstraint())
+ SemaRef.SubstTypeConstraint(NewTTP, TC, Args);
if (TTP->hasDefaultArgument()) {
TypeSourceInfo *InstantiatedDefaultArg =
SemaRef.SubstType(TTP->getDefaultArgumentInfo(), Args,
@@ -3511,8 +3496,10 @@ checkBuiltinTemplateIdType(Sema &SemaRef, BuiltinTemplateDecl *BTD,
}
/// Determine whether this alias template is "enable_if_t".
+/// libc++ >=14 uses "__enable_if_t" in C++11 mode.
static bool isEnableIfAliasTemplate(TypeAliasTemplateDecl *AliasTemplate) {
- return AliasTemplate->getName().equals("enable_if_t");
+ return AliasTemplate->getName().equals("enable_if_t") ||
+ AliasTemplate->getName().equals("__enable_if_t");
}
/// Collect all of the separable terms in the given condition, which
@@ -5042,7 +5029,7 @@ bool Sema::CheckTemplateTypeArgument(TemplateTypeParmDecl *Param,
}
}
- if (CheckTemplateArgument(Param, TSI))
+ if (CheckTemplateArgument(TSI))
return true;
// Add the converted template type argument.
@@ -5110,7 +5097,11 @@ SubstDefaultTemplateArgument(Sema &SemaRef,
for (unsigned i = 0, e = Param->getDepth(); i != e; ++i)
TemplateArgLists.addOuterTemplateArguments(None);
- Sema::ContextRAII SavedContext(SemaRef, Template->getDeclContext());
+ bool ForLambdaCallOperator = false;
+ if (const auto *Rec = dyn_cast<CXXRecordDecl>(Template->getDeclContext()))
+ ForLambdaCallOperator = Rec->isLambda();
+ Sema::ContextRAII SavedContext(SemaRef, Template->getDeclContext(),
+ !ForLambdaCallOperator);
ArgType =
SemaRef.SubstType(ArgType, TemplateArgLists,
Param->getDefaultArgumentLoc(), Param->getDeclName());
@@ -5661,7 +5652,7 @@ bool Sema::CheckTemplateArgumentList(
TemplateArgumentListInfo NewArgs = TemplateArgs;
// Make sure we get the template parameter list from the most
- // recentdeclaration, since that is the only one that has is guaranteed to
+ // recent declaration, since that is the only one that is guaranteed to
// have all the default template argument information.
TemplateParameterList *Params =
cast<TemplateDecl>(Template->getMostRecentDecl())
@@ -6208,8 +6199,7 @@ bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier(
///
/// This routine implements the semantics of C++ [temp.arg.type]. It
/// returns true if an error occurred, and false otherwise.
-bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param,
- TypeSourceInfo *ArgInfo) {
+bool Sema::CheckTemplateArgument(TypeSourceInfo *ArgInfo) {
assert(ArgInfo && "invalid TypeSourceInfo");
QualType Arg = ArgInfo->getType();
SourceRange SR = ArgInfo->getTypeLoc().getSourceRange();
@@ -8703,7 +8693,7 @@ static SourceLocation DiagLocForExplicitInstantiation(
///
/// \param PrevTSK the kind of the old explicit specialization or instantiatin.
///
-/// \param PrevPointOfInstantiation if valid, indicates where the previus
+/// \param PrevPointOfInstantiation if valid, indicates where the previous
/// declaration was instantiated (either implicitly or explicitly).
///
/// \param HasNoEffect will be set to true to indicate that the new
@@ -10883,7 +10873,7 @@ bool Sema::RebuildTemplateParamsInCurrentInstantiation(
// - an identifier associated by name lookup with a non-type
// template-parameter declared with a type that contains a
// placeholder type (7.1.7.4),
- NewTSI = SubstAutoTypeSourceInfo(NewTSI, Context.DependentTy);
+ NewTSI = SubstAutoTypeSourceInfoDependent(NewTSI);
}
if (NewTSI != NTTP->getTypeSourceInfo()) {
@@ -10929,9 +10919,9 @@ Sema::getTemplateArgumentBindingsText(const TemplateParameterList *Params,
}
Out << " = ";
- Args[I].print(
- getPrintingPolicy(), Out,
- TemplateParameterList::shouldIncludeTypeForArgument(Params, I));
+ Args[I].print(getPrintingPolicy(), Out,
+ TemplateParameterList::shouldIncludeTypeForArgument(
+ getPrintingPolicy(), Params, I));
}
Out << ']';
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 08e798304b0c..81edae10335d 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -131,30 +131,16 @@ static bool hasSameExtendedValue(llvm::APSInt X, llvm::APSInt Y) {
return X == Y;
}
-static Sema::TemplateDeductionResult
-DeduceTemplateArguments(Sema &S,
- TemplateParameterList *TemplateParams,
- const TemplateArgument &Param,
- TemplateArgument Arg,
- TemplateDeductionInfo &Info,
- SmallVectorImpl<DeducedTemplateArgument> &Deduced);
-
-static Sema::TemplateDeductionResult
-DeduceTemplateArgumentsByTypeMatch(Sema &S,
- TemplateParameterList *TemplateParams,
- QualType Param,
- QualType Arg,
- TemplateDeductionInfo &Info,
- SmallVectorImpl<DeducedTemplateArgument> &
- Deduced,
- unsigned TDF,
- bool PartialOrdering = false,
- bool DeducedFromArrayBound = false);
+static Sema::TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
+ Sema &S, TemplateParameterList *TemplateParams, QualType Param,
+ QualType Arg, TemplateDeductionInfo &Info,
+ SmallVectorImpl<DeducedTemplateArgument> &Deduced, unsigned TDF,
+ bool PartialOrdering = false, bool DeducedFromArrayBound = false);
static Sema::TemplateDeductionResult
DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
- ArrayRef<TemplateArgument> Params,
- ArrayRef<TemplateArgument> Args,
+ ArrayRef<TemplateArgument> Ps,
+ ArrayRef<TemplateArgument> As,
TemplateDeductionInfo &Info,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
bool NumberOfArgumentsMustMatch);
@@ -559,75 +545,67 @@ DeduceTemplateArguments(Sema &S,
/// "success" result means that template argument deduction has not yet failed,
/// but it may still fail, later, for other reasons.
static Sema::TemplateDeductionResult
-DeduceTemplateArguments(Sema &S,
- TemplateParameterList *TemplateParams,
- const TemplateSpecializationType *Param,
- QualType Arg,
- TemplateDeductionInfo &Info,
- SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
- assert(Arg.isCanonical() && "Argument type must be canonical");
-
+DeduceTemplateSpecArguments(Sema &S, TemplateParameterList *TemplateParams,
+ const QualType P, QualType A,
+ TemplateDeductionInfo &Info,
+ SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
+ QualType UP = P;
+ if (const auto *IP = P->getAs<InjectedClassNameType>())
+ UP = IP->getInjectedSpecializationType();
+ // FIXME: Try to preserve type sugar here, which is hard
+ // because of the unresolved template arguments.
+ const auto *TP = UP.getCanonicalType()->castAs<TemplateSpecializationType>();
+ ArrayRef<TemplateArgument> PResolved = TP->template_arguments();
+
+ QualType UA = A;
// Treat an injected-class-name as its underlying template-id.
- if (auto *Injected = dyn_cast<InjectedClassNameType>(Arg))
- Arg = Injected->getInjectedSpecializationType();
+ if (const auto *Injected = A->getAs<InjectedClassNameType>())
+ UA = Injected->getInjectedSpecializationType();
// Check whether the template argument is a dependent template-id.
- if (const TemplateSpecializationType *SpecArg
- = dyn_cast<TemplateSpecializationType>(Arg)) {
+ // FIXME: Should not lose sugar here.
+ if (const auto *SA =
+ dyn_cast<TemplateSpecializationType>(UA.getCanonicalType())) {
// Perform template argument deduction for the template name.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArguments(S, TemplateParams,
- Param->getTemplateName(),
- SpecArg->getTemplateName(),
- Info, Deduced))
+ if (auto Result =
+ DeduceTemplateArguments(S, TemplateParams, TP->getTemplateName(),
+ SA->getTemplateName(), Info, Deduced))
return Result;
-
-
// Perform template argument deduction on each template
// argument. Ignore any missing/extra arguments, since they could be
// filled in by default arguments.
- return DeduceTemplateArguments(S, TemplateParams,
- Param->template_arguments(),
- SpecArg->template_arguments(), Info, Deduced,
+ return DeduceTemplateArguments(S, TemplateParams, PResolved,
+ SA->template_arguments(), Info, Deduced,
/*NumberOfArgumentsMustMatch=*/false);
}
// If the argument type is a class template specialization, we
// perform template argument deduction using its template
// arguments.
- const RecordType *RecordArg = dyn_cast<RecordType>(Arg);
- if (!RecordArg) {
- Info.FirstArg = TemplateArgument(QualType(Param, 0));
- Info.SecondArg = TemplateArgument(Arg);
- return Sema::TDK_NonDeducedMismatch;
- }
-
- ClassTemplateSpecializationDecl *SpecArg
- = dyn_cast<ClassTemplateSpecializationDecl>(RecordArg->getDecl());
- if (!SpecArg) {
- Info.FirstArg = TemplateArgument(QualType(Param, 0));
- Info.SecondArg = TemplateArgument(Arg);
+ const auto *RA = UA->getAs<RecordType>();
+ const auto *SA =
+ RA ? dyn_cast<ClassTemplateSpecializationDecl>(RA->getDecl()) : nullptr;
+ if (!SA) {
+ Info.FirstArg = TemplateArgument(P);
+ Info.SecondArg = TemplateArgument(A);
return Sema::TDK_NonDeducedMismatch;
}
// Perform template argument deduction for the template name.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArguments(S,
- TemplateParams,
- Param->getTemplateName(),
- TemplateName(SpecArg->getSpecializedTemplate()),
- Info, Deduced))
+ if (auto Result = DeduceTemplateArguments(
+ S, TemplateParams, TP->getTemplateName(),
+ TemplateName(SA->getSpecializedTemplate()), Info, Deduced))
return Result;
// Perform template argument deduction for the template arguments.
- return DeduceTemplateArguments(S, TemplateParams, Param->template_arguments(),
- SpecArg->getTemplateArgs().asArray(), Info,
- Deduced, /*NumberOfArgumentsMustMatch=*/true);
+ return DeduceTemplateArguments(S, TemplateParams, PResolved,
+ SA->getTemplateArgs().asArray(), Info, Deduced,
+ /*NumberOfArgumentsMustMatch=*/true);
}
-/// Determines whether the given type is an opaque type that
-/// might be more qualified when instantiated.
-static bool IsPossiblyOpaquelyQualifiedType(QualType T) {
+static bool IsPossiblyOpaquelyQualifiedTypeInternal(const Type *T) {
+ assert(T->isCanonicalUnqualified());
+
switch (T->getTypeClass()) {
case Type::TypeOfExpr:
case Type::TypeOf:
@@ -641,14 +619,21 @@ static bool IsPossiblyOpaquelyQualifiedType(QualType T) {
case Type::IncompleteArray:
case Type::VariableArray:
case Type::DependentSizedArray:
- return IsPossiblyOpaquelyQualifiedType(
- cast<ArrayType>(T)->getElementType());
+ return IsPossiblyOpaquelyQualifiedTypeInternal(
+ cast<ArrayType>(T)->getElementType().getTypePtr());
default:
return false;
}
}
+/// Determines whether the given type is an opaque type that
+/// might be more qualified when instantiated.
+static bool IsPossiblyOpaquelyQualifiedType(QualType T) {
+ return IsPossiblyOpaquelyQualifiedTypeInternal(
+ T->getCanonicalTypeInternal().getTypePtr());
+}
+
/// Helper function to build a TemplateParameter when we don't
/// know its type statically.
static TemplateParameter makeTemplateParameter(Decl *D) {
@@ -1047,11 +1032,12 @@ DeduceTemplateArguments(Sema &S,
return Sema::TDK_MiscellaneousDeductionFailure;
}
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- Params[ParamIdx], Args[ArgIdx],
- Info, Deduced, TDF,
- PartialOrdering))
+ if (Sema::TemplateDeductionResult Result =
+ DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, Params[ParamIdx].getUnqualifiedType(),
+ Args[ArgIdx].getUnqualifiedType(), Info, Deduced, TDF,
+ PartialOrdering,
+ /*DeducedFromArrayBound=*/false))
return Result;
++ArgIdx;
@@ -1073,10 +1059,11 @@ DeduceTemplateArguments(Sema &S,
if (ParamIdx + 1 == NumParams || PackScope.hasFixedArity()) {
for (; ArgIdx < NumArgs && PackScope.hasNextElement(); ++ArgIdx) {
// Deduce template arguments from the pattern.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, Pattern,
- Args[ArgIdx], Info, Deduced,
- TDF, PartialOrdering))
+ if (Sema::TemplateDeductionResult Result =
+ DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, Pattern.getUnqualifiedType(),
+ Args[ArgIdx].getUnqualifiedType(), Info, Deduced, TDF,
+ PartialOrdering, /*DeducedFromArrayBound=*/false))
return Result;
PackScope.nextPackElement();
@@ -1155,26 +1142,25 @@ static bool hasInconsistentOrSupersetQualifiersOf(QualType ParamType,
/// function types (noreturn adjustment, implicit calling conventions). If any
/// of parameter and argument is not a function, just perform type comparison.
///
-/// \param Param the template parameter type.
+/// \param P the template parameter type.
///
-/// \param Arg the argument type.
-bool Sema::isSameOrCompatibleFunctionType(CanQualType Param,
- CanQualType Arg) {
- const FunctionType *ParamFunction = Param->getAs<FunctionType>(),
- *ArgFunction = Arg->getAs<FunctionType>();
+/// \param A the argument type.
+bool Sema::isSameOrCompatibleFunctionType(QualType P, QualType A) {
+ const FunctionType *PF = P->getAs<FunctionType>(),
+ *AF = A->getAs<FunctionType>();
// Just compare if not functions.
- if (!ParamFunction || !ArgFunction)
- return Param == Arg;
+ if (!PF || !AF)
+ return Context.hasSameType(P, A);
// Noreturn and noexcept adjustment.
QualType AdjustedParam;
- if (IsFunctionConversion(Param, Arg, AdjustedParam))
- return Arg == Context.getCanonicalType(AdjustedParam);
+ if (IsFunctionConversion(P, A, AdjustedParam))
+ return Context.hasSameType(AdjustedParam, A);
// FIXME: Compatible calling conventions.
- return Param == Arg;
+ return Context.hasSameType(P, A);
}
/// Get the index of the first template parameter that was originally from the
@@ -1203,6 +1189,11 @@ static bool isForwardingReference(QualType Param, unsigned FirstInnerIndex) {
return false;
}
+static CXXRecordDecl *getCanonicalRD(QualType T) {
+ return cast<CXXRecordDecl>(
+ T->castAs<RecordType>()->getDecl()->getCanonicalDecl());
+}
+
/// Attempt to deduce the template arguments by checking the base types
/// according to (C++20 [temp.deduct.call] p4b3.
///
@@ -1221,10 +1212,11 @@ static bool isForwardingReference(QualType Param, unsigned FirstInnerIndex) {
/// \returns the result of template argument deduction with the bases. "invalid"
/// means no matches, "success" found a single item, and the
/// "MiscellaneousDeductionFailure" result happens when the match is ambiguous.
-static Sema::TemplateDeductionResult DeduceTemplateBases(
- Sema &S, const RecordType *RecordT, TemplateParameterList *TemplateParams,
- const TemplateSpecializationType *SpecParam, TemplateDeductionInfo &Info,
- SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
+static Sema::TemplateDeductionResult
+DeduceTemplateBases(Sema &S, const CXXRecordDecl *RD,
+ TemplateParameterList *TemplateParams, QualType P,
+ TemplateDeductionInfo &Info,
+ SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
// C++14 [temp.deduct.call] p4b3:
// If P is a class and P has the form simple-template-id, then the
// transformed A can be a derived class of the deduced A. Likewise if
@@ -1244,45 +1236,44 @@ static Sema::TemplateDeductionResult DeduceTemplateBases(
// visited, while ToVisit is our stack of records that we still need to
// visit. Matches contains a list of matches that have yet to be
// disqualified.
- llvm::SmallPtrSet<const RecordType *, 8> Visited;
- SmallVector<const RecordType *, 8> ToVisit;
+ llvm::SmallPtrSet<const CXXRecordDecl *, 8> Visited;
+ SmallVector<QualType, 8> ToVisit;
// We iterate over this later, so we have to use MapVector to ensure
// determinism.
- llvm::MapVector<const RecordType *, SmallVector<DeducedTemplateArgument, 8>>
+ llvm::MapVector<const CXXRecordDecl *,
+ SmallVector<DeducedTemplateArgument, 8>>
Matches;
- auto AddBases = [&Visited, &ToVisit](const RecordType *RT) {
- CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
+ auto AddBases = [&Visited, &ToVisit](const CXXRecordDecl *RD) {
for (const auto &Base : RD->bases()) {
- assert(Base.getType()->isRecordType() &&
- "Base class that isn't a record?");
- const RecordType *RT = Base.getType()->getAs<RecordType>();
- if (Visited.insert(RT).second)
- ToVisit.push_back(Base.getType()->getAs<RecordType>());
+ QualType T = Base.getType();
+ assert(T->isRecordType() && "Base class that isn't a record?");
+ if (Visited.insert(::getCanonicalRD(T)).second)
+ ToVisit.push_back(T);
}
};
// Set up the loop by adding all the bases.
- AddBases(RecordT);
+ AddBases(RD);
// Search each path of bases until we either run into a successful match
// (where all bases of it are invalid), or we run out of bases.
while (!ToVisit.empty()) {
- const RecordType *NextT = ToVisit.pop_back_val();
+ QualType NextT = ToVisit.pop_back_val();
SmallVector<DeducedTemplateArgument, 8> DeducedCopy(Deduced.begin(),
Deduced.end());
TemplateDeductionInfo BaseInfo(TemplateDeductionInfo::ForBase, Info);
- Sema::TemplateDeductionResult BaseResult =
- DeduceTemplateArguments(S, TemplateParams, SpecParam,
- QualType(NextT, 0), BaseInfo, DeducedCopy);
+ Sema::TemplateDeductionResult BaseResult = DeduceTemplateSpecArguments(
+ S, TemplateParams, P, NextT, BaseInfo, DeducedCopy);
// If this was a successful deduction, add it to the list of matches,
// otherwise we need to continue searching its bases.
+ const CXXRecordDecl *RD = ::getCanonicalRD(NextT);
if (BaseResult == Sema::TDK_Success)
- Matches.insert({NextT, DeducedCopy});
+ Matches.insert({RD, DeducedCopy});
else
- AddBases(NextT);
+ AddBases(RD);
}
// At this point, 'Matches' contains a list of seemingly valid bases, however
@@ -1297,14 +1288,14 @@ static Sema::TemplateDeductionResult DeduceTemplateBases(
AddBases(Match.first);
// We can give up once we have a single item (or have run out of things to
- // search) since cyclical inheritence isn't valid.
+ // search) since cyclical inheritance isn't valid.
while (Matches.size() > 1 && !ToVisit.empty()) {
- const RecordType *NextT = ToVisit.pop_back_val();
- Matches.erase(NextT);
+ const CXXRecordDecl *RD = ::getCanonicalRD(ToVisit.pop_back_val());
+ Matches.erase(RD);
- // Always add all bases, since the inheritence tree can contain
+ // Always add all bases, since the inheritance tree can contain
// disqualifications for multiple matches.
- AddBases(NextT);
+ AddBases(RD);
}
}
@@ -1341,41 +1332,33 @@ static Sema::TemplateDeductionResult DeduceTemplateBases(
/// \returns the result of template argument deduction so far. Note that a
/// "success" result means that template argument deduction has not yet failed,
/// but it may still fail, later, for other reasons.
-static Sema::TemplateDeductionResult
-DeduceTemplateArgumentsByTypeMatch(Sema &S,
- TemplateParameterList *TemplateParams,
- QualType ParamIn, QualType ArgIn,
- TemplateDeductionInfo &Info,
- SmallVectorImpl<DeducedTemplateArgument> &Deduced,
- unsigned TDF,
- bool PartialOrdering,
- bool DeducedFromArrayBound) {
- // We only want to look at the canonical types, since typedefs and
- // sugar are not part of template argument deduction.
- QualType Param = S.Context.getCanonicalType(ParamIn);
- QualType Arg = S.Context.getCanonicalType(ArgIn);
+static Sema::TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch(
+ Sema &S, TemplateParameterList *TemplateParams, QualType P, QualType A,
+ TemplateDeductionInfo &Info,
+ SmallVectorImpl<DeducedTemplateArgument> &Deduced, unsigned TDF,
+ bool PartialOrdering, bool DeducedFromArrayBound) {
// If the argument type is a pack expansion, look at its pattern.
// This isn't explicitly called out
- if (const PackExpansionType *ArgExpansion
- = dyn_cast<PackExpansionType>(Arg))
- Arg = ArgExpansion->getPattern();
+ if (const auto *AExp = dyn_cast<PackExpansionType>(A))
+ A = AExp->getPattern();
+ assert(!isa<PackExpansionType>(A.getCanonicalType()));
if (PartialOrdering) {
// C++11 [temp.deduct.partial]p5:
// Before the partial ordering is done, certain transformations are
// performed on the types used for partial ordering:
// - If P is a reference type, P is replaced by the type referred to.
- const ReferenceType *ParamRef = Param->getAs<ReferenceType>();
- if (ParamRef)
- Param = ParamRef->getPointeeType();
+ const ReferenceType *PRef = P->getAs<ReferenceType>();
+ if (PRef)
+ P = PRef->getPointeeType();
// - If A is a reference type, A is replaced by the type referred to.
- const ReferenceType *ArgRef = Arg->getAs<ReferenceType>();
- if (ArgRef)
- Arg = ArgRef->getPointeeType();
+ const ReferenceType *ARef = A->getAs<ReferenceType>();
+ if (ARef)
+ A = A->getPointeeType();
- if (ParamRef && ArgRef && S.Context.hasSameUnqualifiedType(Param, Arg)) {
+ if (PRef && ARef && S.Context.hasSameUnqualifiedType(P, A)) {
// C++11 [temp.deduct.partial]p9:
// If, for a given type, deduction succeeds in both directions (i.e.,
// the types are identical after the transformations above) and both
@@ -1395,29 +1378,26 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// succeeds, so we model this as a deduction failure. Note that
// [the first type] is P and [the other type] is A here; the standard
// gets this backwards.
- Qualifiers ParamQuals = Param.getQualifiers();
- Qualifiers ArgQuals = Arg.getQualifiers();
- if ((ParamRef->isLValueReferenceType() &&
- !ArgRef->isLValueReferenceType()) ||
- ParamQuals.isStrictSupersetOf(ArgQuals) ||
- (ParamQuals.hasNonTrivialObjCLifetime() &&
- ArgQuals.getObjCLifetime() == Qualifiers::OCL_ExplicitNone &&
- ParamQuals.withoutObjCLifetime() ==
- ArgQuals.withoutObjCLifetime())) {
- Info.FirstArg = TemplateArgument(ParamIn);
- Info.SecondArg = TemplateArgument(ArgIn);
+ Qualifiers PQuals = P.getQualifiers(), AQuals = A.getQualifiers();
+ if ((PRef->isLValueReferenceType() && !ARef->isLValueReferenceType()) ||
+ PQuals.isStrictSupersetOf(AQuals) ||
+ (PQuals.hasNonTrivialObjCLifetime() &&
+ AQuals.getObjCLifetime() == Qualifiers::OCL_ExplicitNone &&
+ PQuals.withoutObjCLifetime() == AQuals.withoutObjCLifetime())) {
+ Info.FirstArg = TemplateArgument(P);
+ Info.SecondArg = TemplateArgument(A);
return Sema::TDK_NonDeducedMismatch;
}
}
-
+ Qualifiers DiscardedQuals;
// C++11 [temp.deduct.partial]p7:
// Remove any top-level cv-qualifiers:
// - If P is a cv-qualified type, P is replaced by the cv-unqualified
// version of P.
- Param = Param.getUnqualifiedType();
+ P = S.Context.getUnqualifiedArrayType(P, DiscardedQuals);
// - If A is a cv-qualified type, A is replaced by the cv-unqualified
// version of A.
- Arg = Arg.getUnqualifiedType();
+ A = S.Context.getUnqualifiedArrayType(A, DiscardedQuals);
} else {
// C++0x [temp.deduct.call]p4 bullet 1:
// - If the original P is a reference type, the deduced A (i.e., the type
@@ -1425,13 +1405,12 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// transformed A.
if (TDF & TDF_ParamWithReferenceType) {
Qualifiers Quals;
- QualType UnqualParam = S.Context.getUnqualifiedArrayType(Param, Quals);
- Quals.setCVRQualifiers(Quals.getCVRQualifiers() &
- Arg.getCVRQualifiers());
- Param = S.Context.getQualifiedType(UnqualParam, Quals);
+ QualType UnqualP = S.Context.getUnqualifiedArrayType(P, Quals);
+ Quals.setCVRQualifiers(Quals.getCVRQualifiers() & A.getCVRQualifiers());
+ P = S.Context.getQualifiedType(UnqualP, Quals);
}
- if ((TDF & TDF_TopLevelParameterTypeList) && !Param->isFunctionType()) {
+ if ((TDF & TDF_TopLevelParameterTypeList) && !P->isFunctionType()) {
// C++0x [temp.deduct.type]p10:
// If P and A are function types that originated from deduction when
// taking the address of a function template (14.8.2.2) or when deducing
@@ -1444,8 +1423,9 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// Pi is T&& and Ai is X&, the adjusted Pi will be T, causing T to be
// deduced as X&. - end note ]
TDF &= ~TDF_TopLevelParameterTypeList;
- if (isForwardingReference(Param, 0) && Arg->isLValueReferenceType())
- Param = Param->getPointeeType();
+ if (isForwardingReference(P, /*FirstInnerIndex=*/0) &&
+ A->isLValueReferenceType())
+ P = P->getPointeeType();
}
}
@@ -1456,53 +1436,48 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
//
// T
// cv-list T
- if (const TemplateTypeParmType *TemplateTypeParm
- = Param->getAs<TemplateTypeParmType>()) {
+ if (const auto *TTP = P->getAs<TemplateTypeParmType>()) {
// Just skip any attempts to deduce from a placeholder type or a parameter
// at a different depth.
- if (Arg->isPlaceholderType() ||
- Info.getDeducedDepth() != TemplateTypeParm->getDepth())
+ if (A->isPlaceholderType() || Info.getDeducedDepth() != TTP->getDepth())
return Sema::TDK_Success;
- unsigned Index = TemplateTypeParm->getIndex();
- bool RecanonicalizeArg = false;
+ unsigned Index = TTP->getIndex();
// If the argument type is an array type, move the qualifiers up to the
// top level, so they can be matched with the qualifiers on the parameter.
- if (isa<ArrayType>(Arg)) {
+ if (A->isArrayType()) {
Qualifiers Quals;
- Arg = S.Context.getUnqualifiedArrayType(Arg, Quals);
- if (Quals) {
- Arg = S.Context.getQualifiedType(Arg, Quals);
- RecanonicalizeArg = true;
- }
+ A = S.Context.getUnqualifiedArrayType(A, Quals);
+ if (Quals)
+ A = S.Context.getQualifiedType(A, Quals);
}
// The argument type can not be less qualified than the parameter
// type.
if (!(TDF & TDF_IgnoreQualifiers) &&
- hasInconsistentOrSupersetQualifiersOf(Param, Arg)) {
+ hasInconsistentOrSupersetQualifiersOf(P, A)) {
Info.Param = cast<TemplateTypeParmDecl>(TemplateParams->getParam(Index));
- Info.FirstArg = TemplateArgument(Param);
- Info.SecondArg = TemplateArgument(Arg);
+ Info.FirstArg = TemplateArgument(P);
+ Info.SecondArg = TemplateArgument(A);
return Sema::TDK_Underqualified;
}
// Do not match a function type with a cv-qualified type.
// http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_active.html#1584
- if (Arg->isFunctionType() && Param.hasQualifiers()) {
+ if (A->isFunctionType() && P.hasQualifiers())
return Sema::TDK_NonDeducedMismatch;
- }
- assert(TemplateTypeParm->getDepth() == Info.getDeducedDepth() &&
+ assert(TTP->getDepth() == Info.getDeducedDepth() &&
"saw template type parameter with wrong depth");
- assert(Arg != S.Context.OverloadTy && "Unresolved overloaded function");
- QualType DeducedType = Arg;
+ assert(A->getCanonicalTypeInternal() != S.Context.OverloadTy &&
+ "Unresolved overloaded function");
+ QualType DeducedType = A;
// Remove any qualifiers on the parameter from the deduced type.
// We checked the qualifiers for consistency above.
Qualifiers DeducedQs = DeducedType.getQualifiers();
- Qualifiers ParamQs = Param.getQualifiers();
+ Qualifiers ParamQs = P.getQualifiers();
DeducedQs.removeCVRQualifiers(ParamQs.getCVRQualifiers());
if (ParamQs.hasObjCGCAttr())
DeducedQs.removeObjCGCAttr();
@@ -1517,29 +1492,24 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
if (ParamQs.hasObjCLifetime() && !DeducedType->isObjCLifetimeType() &&
!DeducedType->isDependentType()) {
Info.Param = cast<TemplateTypeParmDecl>(TemplateParams->getParam(Index));
- Info.FirstArg = TemplateArgument(Param);
- Info.SecondArg = TemplateArgument(Arg);
+ Info.FirstArg = TemplateArgument(P);
+ Info.SecondArg = TemplateArgument(A);
return Sema::TDK_Underqualified;
}
// Objective-C ARC:
// If template deduction would produce an argument type with lifetime type
// but no lifetime qualifier, the __strong lifetime qualifier is inferred.
- if (S.getLangOpts().ObjCAutoRefCount &&
- DeducedType->isObjCLifetimeType() &&
+ if (S.getLangOpts().ObjCAutoRefCount && DeducedType->isObjCLifetimeType() &&
!DeducedQs.hasObjCLifetime())
DeducedQs.setObjCLifetime(Qualifiers::OCL_Strong);
- DeducedType = S.Context.getQualifiedType(DeducedType.getUnqualifiedType(),
- DeducedQs);
-
- if (RecanonicalizeArg)
- DeducedType = S.Context.getCanonicalType(DeducedType);
+ DeducedType =
+ S.Context.getQualifiedType(DeducedType.getUnqualifiedType(), DeducedQs);
DeducedTemplateArgument NewDeduced(DeducedType, DeducedFromArrayBound);
- DeducedTemplateArgument Result = checkDeducedTemplateArguments(S.Context,
- Deduced[Index],
- NewDeduced);
+ DeducedTemplateArgument Result =
+ checkDeducedTemplateArguments(S.Context, Deduced[Index], NewDeduced);
if (Result.isNull()) {
Info.Param = cast<TemplateTypeParmDecl>(TemplateParams->getParam(Index));
Info.FirstArg = Deduced[Index];
@@ -1552,69 +1522,57 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
}
// Set up the template argument deduction information for a failure.
- Info.FirstArg = TemplateArgument(ParamIn);
- Info.SecondArg = TemplateArgument(ArgIn);
+ Info.FirstArg = TemplateArgument(P);
+ Info.SecondArg = TemplateArgument(A);
// If the parameter is an already-substituted template parameter
// pack, do nothing: we don't know which of its arguments to look
// at, so we have to wait until all of the parameter packs in this
// expansion have arguments.
- if (isa<SubstTemplateTypeParmPackType>(Param))
+ if (P->getAs<SubstTemplateTypeParmPackType>())
return Sema::TDK_Success;
// Check the cv-qualifiers on the parameter and argument types.
- CanQualType CanParam = S.Context.getCanonicalType(Param);
- CanQualType CanArg = S.Context.getCanonicalType(Arg);
if (!(TDF & TDF_IgnoreQualifiers)) {
if (TDF & TDF_ParamWithReferenceType) {
- if (hasInconsistentOrSupersetQualifiersOf(Param, Arg))
+ if (hasInconsistentOrSupersetQualifiersOf(P, A))
return Sema::TDK_NonDeducedMismatch;
} else if (TDF & TDF_ArgWithReferenceType) {
// C++ [temp.deduct.conv]p4:
// If the original A is a reference type, A can be more cv-qualified
// than the deduced A
- if (!Arg.getQualifiers().compatiblyIncludes(Param.getQualifiers()))
+ if (!A.getQualifiers().compatiblyIncludes(P.getQualifiers()))
return Sema::TDK_NonDeducedMismatch;
// Strip out all extra qualifiers from the argument to figure out the
// type we're converting to, prior to the qualification conversion.
Qualifiers Quals;
- Arg = S.Context.getUnqualifiedArrayType(Arg, Quals);
- Arg = S.Context.getQualifiedType(Arg, Param.getQualifiers());
- } else if (!IsPossiblyOpaquelyQualifiedType(Param)) {
- if (Param.getCVRQualifiers() != Arg.getCVRQualifiers())
+ A = S.Context.getUnqualifiedArrayType(A, Quals);
+ A = S.Context.getQualifiedType(A, P.getQualifiers());
+ } else if (!IsPossiblyOpaquelyQualifiedType(P)) {
+ if (P.getCVRQualifiers() != A.getCVRQualifiers())
return Sema::TDK_NonDeducedMismatch;
}
+ }
- // If the parameter type is not dependent, there is nothing to deduce.
- if (!Param->isDependentType()) {
- if (!(TDF & TDF_SkipNonDependent)) {
- bool NonDeduced =
- (TDF & TDF_AllowCompatibleFunctionType)
- ? !S.isSameOrCompatibleFunctionType(CanParam, CanArg)
- : Param != Arg;
- if (NonDeduced) {
- return Sema::TDK_NonDeducedMismatch;
- }
- }
+ // If the parameter type is not dependent, there is nothing to deduce.
+ if (!P->isDependentType()) {
+ if (TDF & TDF_SkipNonDependent)
return Sema::TDK_Success;
- }
- } else if (!Param->isDependentType()) {
- if (!(TDF & TDF_SkipNonDependent)) {
- CanQualType ParamUnqualType = CanParam.getUnqualifiedType(),
- ArgUnqualType = CanArg.getUnqualifiedType();
- bool Success =
- (TDF & TDF_AllowCompatibleFunctionType)
- ? S.isSameOrCompatibleFunctionType(ParamUnqualType, ArgUnqualType)
- : ParamUnqualType == ArgUnqualType;
- if (Success)
- return Sema::TDK_Success;
- } else {
+ if ((TDF & TDF_IgnoreQualifiers) ? S.Context.hasSameUnqualifiedType(P, A)
+ : S.Context.hasSameType(P, A))
return Sema::TDK_Success;
- }
+ if (TDF & TDF_AllowCompatibleFunctionType &&
+ S.isSameOrCompatibleFunctionType(P, A))
+ return Sema::TDK_Success;
+ if (!(TDF & TDF_IgnoreQualifiers))
+ return Sema::TDK_NonDeducedMismatch;
+ // Otherwise, when ignoring qualifiers, the types not having the same
+ // unqualified type does not mean they do not match, so in this case we
+ // must keep going and analyze with a non-dependent parameter type.
}
- switch (Param->getTypeClass()) {
+ switch (P.getCanonicalType()->getTypeClass()) {
// Non-canonical types cannot appear here.
#define NON_CANONICAL_TYPE(Class, Base) \
case Type::Class: llvm_unreachable("deducing non-canonical type: " #Class);
@@ -1625,8 +1583,11 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
case Type::SubstTemplateTypeParmPack:
llvm_unreachable("Type nodes handled above");
- // These types cannot be dependent, so simply check whether the types are
- // the same.
+ case Type::Auto:
+ // FIXME: Implement deduction in dependent case.
+ if (P->isDependentType())
+ return Sema::TDK_Success;
+ LLVM_FALLTHROUGH;
case Type::Builtin:
case Type::VariableArray:
case Type::Vector:
@@ -1637,134 +1598,115 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
case Type::ObjCInterface:
case Type::ObjCObjectPointer:
case Type::ExtInt:
- if (TDF & TDF_SkipNonDependent)
- return Sema::TDK_Success;
-
- if (TDF & TDF_IgnoreQualifiers) {
- Param = Param.getUnqualifiedType();
- Arg = Arg.getUnqualifiedType();
- }
-
- return Param == Arg? Sema::TDK_Success : Sema::TDK_NonDeducedMismatch;
+ return (TDF & TDF_SkipNonDependent) ||
+ ((TDF & TDF_IgnoreQualifiers)
+ ? S.Context.hasSameUnqualifiedType(P, A)
+ : S.Context.hasSameType(P, A))
+ ? Sema::TDK_Success
+ : Sema::TDK_NonDeducedMismatch;
// _Complex T [placeholder extension]
- case Type::Complex:
- if (const ComplexType *ComplexArg = Arg->getAs<ComplexType>())
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- cast<ComplexType>(Param)->getElementType(),
- ComplexArg->getElementType(),
- Info, Deduced, TDF);
-
- return Sema::TDK_NonDeducedMismatch;
+ case Type::Complex: {
+ const auto *CP = P->castAs<ComplexType>(), *CA = A->getAs<ComplexType>();
+ if (!CA)
+ return Sema::TDK_NonDeducedMismatch;
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, CP->getElementType(), CA->getElementType(), Info,
+ Deduced, TDF);
+ }
// _Atomic T [extension]
- case Type::Atomic:
- if (const AtomicType *AtomicArg = Arg->getAs<AtomicType>())
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- cast<AtomicType>(Param)->getValueType(),
- AtomicArg->getValueType(),
- Info, Deduced, TDF);
-
- return Sema::TDK_NonDeducedMismatch;
+ case Type::Atomic: {
+ const auto *PA = P->castAs<AtomicType>(), *AA = A->getAs<AtomicType>();
+ if (!AA)
+ return Sema::TDK_NonDeducedMismatch;
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, PA->getValueType(), AA->getValueType(), Info,
+ Deduced, TDF);
+ }
// T *
case Type::Pointer: {
QualType PointeeType;
- if (const PointerType *PointerArg = Arg->getAs<PointerType>()) {
- PointeeType = PointerArg->getPointeeType();
- } else if (const ObjCObjectPointerType *PointerArg
- = Arg->getAs<ObjCObjectPointerType>()) {
- PointeeType = PointerArg->getPointeeType();
+ if (const auto *PA = A->getAs<PointerType>()) {
+ PointeeType = PA->getPointeeType();
+ } else if (const auto *PA = A->getAs<ObjCObjectPointerType>()) {
+ PointeeType = PA->getPointeeType();
} else {
return Sema::TDK_NonDeducedMismatch;
}
-
- unsigned SubTDF = TDF & (TDF_IgnoreQualifiers | TDF_DerivedClass);
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- cast<PointerType>(Param)->getPointeeType(),
- PointeeType,
- Info, Deduced, SubTDF);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, P->castAs<PointerType>()->getPointeeType(),
+ PointeeType, Info, Deduced,
+ TDF & (TDF_IgnoreQualifiers | TDF_DerivedClass));
}
// T &
case Type::LValueReference: {
- const LValueReferenceType *ReferenceArg =
- Arg->getAs<LValueReferenceType>();
- if (!ReferenceArg)
+ const auto *RP = P->castAs<LValueReferenceType>(),
+ *RA = A->getAs<LValueReferenceType>();
+ if (!RA)
return Sema::TDK_NonDeducedMismatch;
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- cast<LValueReferenceType>(Param)->getPointeeType(),
- ReferenceArg->getPointeeType(), Info, Deduced, 0);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, RP->getPointeeType(), RA->getPointeeType(), Info,
+ Deduced, 0);
}
// T && [C++0x]
case Type::RValueReference: {
- const RValueReferenceType *ReferenceArg =
- Arg->getAs<RValueReferenceType>();
- if (!ReferenceArg)
+ const auto *RP = P->castAs<RValueReferenceType>(),
+ *RA = A->getAs<RValueReferenceType>();
+ if (!RA)
return Sema::TDK_NonDeducedMismatch;
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- cast<RValueReferenceType>(Param)->getPointeeType(),
- ReferenceArg->getPointeeType(),
- Info, Deduced, 0);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, RP->getPointeeType(), RA->getPointeeType(), Info,
+ Deduced, 0);
}
// T [] (implied, but not stated explicitly)
case Type::IncompleteArray: {
- const IncompleteArrayType *IncompleteArrayArg =
- S.Context.getAsIncompleteArrayType(Arg);
- if (!IncompleteArrayArg)
+ const auto *IAA = S.Context.getAsIncompleteArrayType(A);
+ if (!IAA)
return Sema::TDK_NonDeducedMismatch;
- unsigned SubTDF = TDF & TDF_IgnoreQualifiers;
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- S.Context.getAsIncompleteArrayType(Param)->getElementType(),
- IncompleteArrayArg->getElementType(),
- Info, Deduced, SubTDF);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams,
+ S.Context.getAsIncompleteArrayType(P)->getElementType(),
+ IAA->getElementType(), Info, Deduced, TDF & TDF_IgnoreQualifiers);
}
// T [integer-constant]
case Type::ConstantArray: {
- const ConstantArrayType *ConstantArrayArg =
- S.Context.getAsConstantArrayType(Arg);
- if (!ConstantArrayArg)
- return Sema::TDK_NonDeducedMismatch;
-
- const ConstantArrayType *ConstantArrayParm =
- S.Context.getAsConstantArrayType(Param);
- if (ConstantArrayArg->getSize() != ConstantArrayParm->getSize())
+ const auto *CAA = S.Context.getAsConstantArrayType(A),
+ *CAP = S.Context.getAsConstantArrayType(P);
+ assert(CAP);
+ if (!CAA || CAA->getSize() != CAP->getSize())
return Sema::TDK_NonDeducedMismatch;
- unsigned SubTDF = TDF & TDF_IgnoreQualifiers;
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- ConstantArrayParm->getElementType(),
- ConstantArrayArg->getElementType(),
- Info, Deduced, SubTDF);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, CAP->getElementType(), CAA->getElementType(), Info,
+ Deduced, TDF & TDF_IgnoreQualifiers);
}
// type [i]
case Type::DependentSizedArray: {
- const ArrayType *ArrayArg = S.Context.getAsArrayType(Arg);
- if (!ArrayArg)
+ const auto *AA = S.Context.getAsArrayType(A);
+ if (!AA)
return Sema::TDK_NonDeducedMismatch;
- unsigned SubTDF = TDF & TDF_IgnoreQualifiers;
-
// Check the element type of the arrays
- const DependentSizedArrayType *DependentArrayParm
- = S.Context.getAsDependentSizedArrayType(Param);
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- DependentArrayParm->getElementType(),
- ArrayArg->getElementType(),
- Info, Deduced, SubTDF))
+ const auto *DAP = S.Context.getAsDependentSizedArrayType(P);
+ assert(DAP);
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, DAP->getElementType(), AA->getElementType(),
+ Info, Deduced, TDF & TDF_IgnoreQualifiers))
return Result;
// Determine the array bound is something we can deduce.
- const NonTypeTemplateParmDecl *NTTP
- = getDeducedParameterFromExpr(Info, DependentArrayParm->getSizeExpr());
+ const NonTypeTemplateParmDecl *NTTP =
+ getDeducedParameterFromExpr(Info, DAP->getSizeExpr());
if (!NTTP)
return Sema::TDK_Success;
@@ -1772,20 +1714,16 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// template parameter.
assert(NTTP->getDepth() == Info.getDeducedDepth() &&
"saw non-type template parameter with wrong depth");
- if (const ConstantArrayType *ConstantArrayArg
- = dyn_cast<ConstantArrayType>(ArrayArg)) {
- llvm::APSInt Size(ConstantArrayArg->getSize());
- return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, Size,
- S.Context.getSizeType(),
- /*ArrayBound=*/true,
- Info, Deduced);
+ if (const auto *CAA = dyn_cast<ConstantArrayType>(AA)) {
+ llvm::APSInt Size(CAA->getSize());
+ return DeduceNonTypeTemplateArgument(
+ S, TemplateParams, NTTP, Size, S.Context.getSizeType(),
+ /*ArrayBound=*/true, Info, Deduced);
}
- if (const DependentSizedArrayType *DependentArrayArg
- = dyn_cast<DependentSizedArrayType>(ArrayArg))
- if (DependentArrayArg->getSizeExpr())
- return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
- DependentArrayArg->getSizeExpr(),
- Info, Deduced);
+ if (const auto *DAA = dyn_cast<DependentSizedArrayType>(AA))
+ if (DAA->getSizeExpr())
+ return DeduceNonTypeTemplateArgument(
+ S, TemplateParams, NTTP, DAA->getSizeExpr(), Info, Deduced);
// Incomplete type does not match a dependently-sized array type
return Sema::TDK_NonDeducedMismatch;
@@ -1795,34 +1733,29 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// T(*)()
// T(*)(T)
case Type::FunctionProto: {
- unsigned SubTDF = TDF & TDF_TopLevelParameterTypeList;
- const FunctionProtoType *FunctionProtoArg =
- dyn_cast<FunctionProtoType>(Arg);
- if (!FunctionProtoArg)
+ const auto *FPP = P->castAs<FunctionProtoType>(),
+ *FPA = A->getAs<FunctionProtoType>();
+ if (!FPA)
return Sema::TDK_NonDeducedMismatch;
- const FunctionProtoType *FunctionProtoParam =
- cast<FunctionProtoType>(Param);
-
- if (FunctionProtoParam->getMethodQuals()
- != FunctionProtoArg->getMethodQuals() ||
- FunctionProtoParam->getRefQualifier()
- != FunctionProtoArg->getRefQualifier() ||
- FunctionProtoParam->isVariadic() != FunctionProtoArg->isVariadic())
+ if (FPP->getMethodQuals() != FPA->getMethodQuals() ||
+ FPP->getRefQualifier() != FPA->getRefQualifier() ||
+ FPP->isVariadic() != FPA->isVariadic())
return Sema::TDK_NonDeducedMismatch;
// Check return types.
if (auto Result = DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, FunctionProtoParam->getReturnType(),
- FunctionProtoArg->getReturnType(), Info, Deduced, 0))
+ S, TemplateParams, FPP->getReturnType(), FPA->getReturnType(),
+ Info, Deduced, 0,
+ /*PartialOrdering=*/false,
+ /*DeducedFromArrayBound=*/false))
return Result;
// Check parameter types.
if (auto Result = DeduceTemplateArguments(
- S, TemplateParams, FunctionProtoParam->param_type_begin(),
- FunctionProtoParam->getNumParams(),
- FunctionProtoArg->param_type_begin(),
- FunctionProtoArg->getNumParams(), Info, Deduced, SubTDF))
+ S, TemplateParams, FPP->param_type_begin(), FPP->getNumParams(),
+ FPA->param_type_begin(), FPA->getNumParams(), Info, Deduced,
+ TDF & TDF_TopLevelParameterTypeList))
return Result;
if (TDF & TDF_AllowCompatibleFunctionType)
@@ -1831,15 +1764,15 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// FIXME: Per core-2016/10/1019 (no corresponding core issue yet), permit
// deducing through the noexcept-specifier if it's part of the canonical
// type. libstdc++ relies on this.
- Expr *NoexceptExpr = FunctionProtoParam->getNoexceptExpr();
+ Expr *NoexceptExpr = FPP->getNoexceptExpr();
if (const NonTypeTemplateParmDecl *NTTP =
- NoexceptExpr ? getDeducedParameterFromExpr(Info, NoexceptExpr)
- : nullptr) {
+ NoexceptExpr ? getDeducedParameterFromExpr(Info, NoexceptExpr)
+ : nullptr) {
assert(NTTP->getDepth() == Info.getDeducedDepth() &&
"saw non-type template parameter with wrong depth");
llvm::APSInt Noexcept(1);
- switch (FunctionProtoArg->canThrow()) {
+ switch (FPA->canThrow()) {
case CT_Cannot:
Noexcept = 1;
LLVM_FALLTHROUGH;
@@ -1849,10 +1782,10 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// FIXME: Should we?
return DeduceNonTypeTemplateArgument(
S, TemplateParams, NTTP, Noexcept, S.Context.BoolTy,
- /*ArrayBound*/true, Info, Deduced);
+ /*DeducedFromArrayBound=*/true, Info, Deduced);
case CT_Dependent:
- if (Expr *ArgNoexceptExpr = FunctionProtoArg->getNoexceptExpr())
+ if (Expr *ArgNoexceptExpr = FPA->getNoexceptExpr())
return DeduceNonTypeTemplateArgument(
S, TemplateParams, NTTP, ArgNoexceptExpr, Info, Deduced);
// Can't deduce anything from throw(T...).
@@ -1870,11 +1803,6 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
case Type::InjectedClassName:
// Treat a template's injected-class-name as if the template
// specialization type had been used.
- Param = cast<InjectedClassNameType>(Param)
- ->getInjectedSpecializationType();
- assert(isa<TemplateSpecializationType>(Param) &&
- "injected class name is not a template specialization type");
- LLVM_FALLTHROUGH;
// template-name<T> (where template-name refers to a class template)
// template-name<i>
@@ -1882,41 +1810,33 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// TT<i>
// TT<>
case Type::TemplateSpecialization: {
- const TemplateSpecializationType *SpecParam =
- cast<TemplateSpecializationType>(Param);
-
// When Arg cannot be a derived class, we can just try to deduce template
// arguments from the template-id.
- const RecordType *RecordT = Arg->getAs<RecordType>();
- if (!(TDF & TDF_DerivedClass) || !RecordT)
- return DeduceTemplateArguments(S, TemplateParams, SpecParam, Arg, Info,
- Deduced);
+ if (!(TDF & TDF_DerivedClass) || !A->isRecordType())
+ return DeduceTemplateSpecArguments(S, TemplateParams, P, A, Info,
+ Deduced);
SmallVector<DeducedTemplateArgument, 8> DeducedOrig(Deduced.begin(),
Deduced.end());
- Sema::TemplateDeductionResult Result = DeduceTemplateArguments(
- S, TemplateParams, SpecParam, Arg, Info, Deduced);
-
+ auto Result =
+ DeduceTemplateSpecArguments(S, TemplateParams, P, A, Info, Deduced);
if (Result == Sema::TDK_Success)
return Result;
// We cannot inspect base classes as part of deduction when the type
// is incomplete, so either instantiate any templates necessary to
// complete the type, or skip over it if it cannot be completed.
- if (!S.isCompleteType(Info.getLocation(), Arg))
+ if (!S.isCompleteType(Info.getLocation(), A))
return Result;
// Reset the incorrectly deduced argument from above.
Deduced = DeducedOrig;
// Check bases according to C++14 [temp.deduct.call] p4b3:
- Sema::TemplateDeductionResult BaseResult = DeduceTemplateBases(
- S, RecordT, TemplateParams, SpecParam, Info, Deduced);
-
- if (BaseResult != Sema::TDK_Invalid)
- return BaseResult;
- return Result;
+ auto BaseResult = DeduceTemplateBases(S, getCanonicalRD(A),
+ TemplateParams, P, Info, Deduced);
+ return BaseResult != Sema::TDK_Invalid ? BaseResult : Result;
}
// T type::*
@@ -1929,33 +1849,27 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// T (T::*)()
// T (T::*)(T)
case Type::MemberPointer: {
- const MemberPointerType *MemPtrParam = cast<MemberPointerType>(Param);
- const MemberPointerType *MemPtrArg = dyn_cast<MemberPointerType>(Arg);
- if (!MemPtrArg)
+ const auto *MPP = P->castAs<MemberPointerType>(),
+ *MPA = A->getAs<MemberPointerType>();
+ if (!MPA)
return Sema::TDK_NonDeducedMismatch;
- QualType ParamPointeeType = MemPtrParam->getPointeeType();
- if (ParamPointeeType->isFunctionType())
- S.adjustMemberFunctionCC(ParamPointeeType, /*IsStatic=*/true,
+ QualType PPT = MPP->getPointeeType();
+ if (PPT->isFunctionType())
+ S.adjustMemberFunctionCC(PPT, /*IsStatic=*/true,
/*IsCtorOrDtor=*/false, Info.getLocation());
- QualType ArgPointeeType = MemPtrArg->getPointeeType();
- if (ArgPointeeType->isFunctionType())
- S.adjustMemberFunctionCC(ArgPointeeType, /*IsStatic=*/true,
+ QualType APT = MPA->getPointeeType();
+ if (APT->isFunctionType())
+ S.adjustMemberFunctionCC(APT, /*IsStatic=*/true,
/*IsCtorOrDtor=*/false, Info.getLocation());
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- ParamPointeeType,
- ArgPointeeType,
- Info, Deduced,
- TDF & TDF_IgnoreQualifiers))
+ unsigned SubTDF = TDF & TDF_IgnoreQualifiers;
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, PPT, APT, Info, Deduced, SubTDF))
return Result;
-
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- QualType(MemPtrParam->getClass(), 0),
- QualType(MemPtrArg->getClass(), 0),
- Info, Deduced,
- TDF & TDF_IgnoreQualifiers);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, QualType(MPP->getClass(), 0),
+ QualType(MPA->getClass(), 0), Info, Deduced, SubTDF);
}
// (clang extension)
@@ -1964,70 +1878,58 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// T(^)()
// T(^)(T)
case Type::BlockPointer: {
- const BlockPointerType *BlockPtrParam = cast<BlockPointerType>(Param);
- const BlockPointerType *BlockPtrArg = dyn_cast<BlockPointerType>(Arg);
-
- if (!BlockPtrArg)
+ const auto *BPP = P->castAs<BlockPointerType>(),
+ *BPA = A->getAs<BlockPointerType>();
+ if (!BPA)
return Sema::TDK_NonDeducedMismatch;
-
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- BlockPtrParam->getPointeeType(),
- BlockPtrArg->getPointeeType(),
- Info, Deduced, 0);
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, BPP->getPointeeType(), BPA->getPointeeType(), Info,
+ Deduced, 0);
}
// (clang extension)
//
// T __attribute__(((ext_vector_type(<integral constant>))))
case Type::ExtVector: {
- const ExtVectorType *VectorParam = cast<ExtVectorType>(Param);
- if (const ExtVectorType *VectorArg = dyn_cast<ExtVectorType>(Arg)) {
+ const auto *VP = P->castAs<ExtVectorType>();
+ QualType ElementType;
+ if (const auto *VA = A->getAs<ExtVectorType>()) {
// Make sure that the vectors have the same number of elements.
- if (VectorParam->getNumElements() != VectorArg->getNumElements())
+ if (VP->getNumElements() != VA->getNumElements())
return Sema::TDK_NonDeducedMismatch;
-
- // Perform deduction on the element types.
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- VectorParam->getElementType(),
- VectorArg->getElementType(),
- Info, Deduced, TDF);
- }
-
- if (const DependentSizedExtVectorType *VectorArg
- = dyn_cast<DependentSizedExtVectorType>(Arg)) {
+ ElementType = VA->getElementType();
+ } else if (const auto *VA = A->getAs<DependentSizedExtVectorType>()) {
// We can't check the number of elements, since the argument has a
// dependent number of elements. This can only occur during partial
// ordering.
-
- // Perform deduction on the element types.
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- VectorParam->getElementType(),
- VectorArg->getElementType(),
- Info, Deduced, TDF);
+ ElementType = VA->getElementType();
+ } else {
+ return Sema::TDK_NonDeducedMismatch;
}
-
- return Sema::TDK_NonDeducedMismatch;
+ // Perform deduction on the element types.
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, VP->getElementType(), ElementType, Info, Deduced,
+ TDF);
}
case Type::DependentVector: {
- const auto *VectorParam = cast<DependentVectorType>(Param);
+ const auto *VP = P->castAs<DependentVectorType>();
- if (const auto *VectorArg = dyn_cast<VectorType>(Arg)) {
+ if (const auto *VA = A->getAs<VectorType>()) {
// Perform deduction on the element types.
- if (Sema::TemplateDeductionResult Result =
- DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, VectorParam->getElementType(),
- VectorArg->getElementType(), Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, VP->getElementType(), VA->getElementType(),
+ Info, Deduced, TDF))
return Result;
// Perform deduction on the vector size, if we can.
const NonTypeTemplateParmDecl *NTTP =
- getDeducedParameterFromExpr(Info, VectorParam->getSizeExpr());
+ getDeducedParameterFromExpr(Info, VP->getSizeExpr());
if (!NTTP)
return Sema::TDK_Success;
llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
- ArgSize = VectorArg->getNumElements();
+ ArgSize = VA->getNumElements();
// Note that we use the "array bound" rules here; just like in that
// case, we don't have any particular type for the vector size, but
// we can provide one if necessary.
@@ -2036,22 +1938,21 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
Info, Deduced);
}
- if (const auto *VectorArg = dyn_cast<DependentVectorType>(Arg)) {
+ if (const auto *VA = A->getAs<DependentVectorType>()) {
// Perform deduction on the element types.
- if (Sema::TemplateDeductionResult Result =
- DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, VectorParam->getElementType(),
- VectorArg->getElementType(), Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, VP->getElementType(), VA->getElementType(),
+ Info, Deduced, TDF))
return Result;
// Perform deduction on the vector size, if we can.
- const NonTypeTemplateParmDecl *NTTP = getDeducedParameterFromExpr(
- Info, VectorParam->getSizeExpr());
+ const NonTypeTemplateParmDecl *NTTP =
+ getDeducedParameterFromExpr(Info, VP->getSizeExpr());
if (!NTTP)
return Sema::TDK_Success;
- return DeduceNonTypeTemplateArgument(
- S, TemplateParams, NTTP, VectorArg->getSizeExpr(), Info, Deduced);
+ return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
+ VA->getSizeExpr(), Info, Deduced);
}
return Sema::TDK_NonDeducedMismatch;
@@ -2061,26 +1962,23 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
//
// T __attribute__(((ext_vector_type(N))))
case Type::DependentSizedExtVector: {
- const DependentSizedExtVectorType *VectorParam
- = cast<DependentSizedExtVectorType>(Param);
+ const auto *VP = P->castAs<DependentSizedExtVectorType>();
- if (const ExtVectorType *VectorArg = dyn_cast<ExtVectorType>(Arg)) {
+ if (const auto *VA = A->getAs<ExtVectorType>()) {
// Perform deduction on the element types.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- VectorParam->getElementType(),
- VectorArg->getElementType(),
- Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, VP->getElementType(), VA->getElementType(),
+ Info, Deduced, TDF))
return Result;
// Perform deduction on the vector size, if we can.
const NonTypeTemplateParmDecl *NTTP =
- getDeducedParameterFromExpr(Info, VectorParam->getSizeExpr());
+ getDeducedParameterFromExpr(Info, VP->getSizeExpr());
if (!NTTP)
return Sema::TDK_Success;
llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
- ArgSize = VectorArg->getNumElements();
+ ArgSize = VA->getNumElements();
// Note that we use the "array bound" rules here; just like in that
// case, we don't have any particular type for the vector size, but
// we can provide one if necessary.
@@ -2089,25 +1987,21 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
Deduced);
}
- if (const DependentSizedExtVectorType *VectorArg
- = dyn_cast<DependentSizedExtVectorType>(Arg)) {
+ if (const auto *VA = A->getAs<DependentSizedExtVectorType>()) {
// Perform deduction on the element types.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- VectorParam->getElementType(),
- VectorArg->getElementType(),
- Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, VP->getElementType(), VA->getElementType(),
+ Info, Deduced, TDF))
return Result;
// Perform deduction on the vector size, if we can.
const NonTypeTemplateParmDecl *NTTP =
- getDeducedParameterFromExpr(Info, VectorParam->getSizeExpr());
+ getDeducedParameterFromExpr(Info, VP->getSizeExpr());
if (!NTTP)
return Sema::TDK_Success;
return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
- VectorArg->getSizeExpr(),
- Info, Deduced);
+ VA->getSizeExpr(), Info, Deduced);
}
return Sema::TDK_NonDeducedMismatch;
@@ -2118,62 +2012,59 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
// T __attribute__((matrix_type(<integral constant>,
// <integral constant>)))
case Type::ConstantMatrix: {
- const ConstantMatrixType *MatrixArg = dyn_cast<ConstantMatrixType>(Arg);
- if (!MatrixArg)
+ const auto *MP = P->castAs<ConstantMatrixType>(),
+ *MA = A->getAs<ConstantMatrixType>();
+ if (!MA)
return Sema::TDK_NonDeducedMismatch;
- const ConstantMatrixType *MatrixParam = cast<ConstantMatrixType>(Param);
// Check that the dimensions are the same
- if (MatrixParam->getNumRows() != MatrixArg->getNumRows() ||
- MatrixParam->getNumColumns() != MatrixArg->getNumColumns()) {
+ if (MP->getNumRows() != MA->getNumRows() ||
+ MP->getNumColumns() != MA->getNumColumns()) {
return Sema::TDK_NonDeducedMismatch;
}
// Perform deduction on element types.
return DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, MatrixParam->getElementType(),
- MatrixArg->getElementType(), Info, Deduced, TDF);
+ S, TemplateParams, MP->getElementType(), MA->getElementType(), Info,
+ Deduced, TDF);
}
case Type::DependentSizedMatrix: {
- const MatrixType *MatrixArg = dyn_cast<MatrixType>(Arg);
- if (!MatrixArg)
+ const auto *MP = P->castAs<DependentSizedMatrixType>();
+ const auto *MA = A->getAs<MatrixType>();
+ if (!MA)
return Sema::TDK_NonDeducedMismatch;
// Check the element type of the matrixes.
- const DependentSizedMatrixType *MatrixParam =
- cast<DependentSizedMatrixType>(Param);
- if (Sema::TemplateDeductionResult Result =
- DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, MatrixParam->getElementType(),
- MatrixArg->getElementType(), Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, MP->getElementType(), MA->getElementType(),
+ Info, Deduced, TDF))
return Result;
// Try to deduce a matrix dimension.
auto DeduceMatrixArg =
[&S, &Info, &Deduced, &TemplateParams](
- Expr *ParamExpr, const MatrixType *Arg,
+ Expr *ParamExpr, const MatrixType *A,
unsigned (ConstantMatrixType::*GetArgDimension)() const,
Expr *(DependentSizedMatrixType::*GetArgDimensionExpr)() const) {
- const auto *ArgConstMatrix = dyn_cast<ConstantMatrixType>(Arg);
- const auto *ArgDepMatrix = dyn_cast<DependentSizedMatrixType>(Arg);
+ const auto *ACM = dyn_cast<ConstantMatrixType>(A);
+ const auto *ADM = dyn_cast<DependentSizedMatrixType>(A);
if (!ParamExpr->isValueDependent()) {
Optional<llvm::APSInt> ParamConst =
ParamExpr->getIntegerConstantExpr(S.Context);
if (!ParamConst)
return Sema::TDK_NonDeducedMismatch;
- if (ArgConstMatrix) {
- if ((ArgConstMatrix->*GetArgDimension)() == *ParamConst)
+ if (ACM) {
+ if ((ACM->*GetArgDimension)() == *ParamConst)
return Sema::TDK_Success;
return Sema::TDK_NonDeducedMismatch;
}
- Expr *ArgExpr = (ArgDepMatrix->*GetArgDimensionExpr)();
- if (!ArgExpr->isValueDependent())
- if (Optional<llvm::APSInt> ArgConst =
- ArgExpr->getIntegerConstantExpr(S.Context))
- if (*ArgConst == *ParamConst)
- return Sema::TDK_Success;
+ Expr *ArgExpr = (ADM->*GetArgDimensionExpr)();
+ if (Optional<llvm::APSInt> ArgConst =
+ ArgExpr->getIntegerConstantExpr(S.Context))
+ if (*ArgConst == *ParamConst)
+ return Sema::TDK_Success;
return Sema::TDK_NonDeducedMismatch;
}
@@ -2182,27 +2073,26 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
if (!NTTP)
return Sema::TDK_Success;
- if (ArgConstMatrix) {
+ if (ACM) {
llvm::APSInt ArgConst(
S.Context.getTypeSize(S.Context.getSizeType()));
- ArgConst = (ArgConstMatrix->*GetArgDimension)();
+ ArgConst = (ACM->*GetArgDimension)();
return DeduceNonTypeTemplateArgument(
S, TemplateParams, NTTP, ArgConst, S.Context.getSizeType(),
/*ArrayBound=*/true, Info, Deduced);
}
- return DeduceNonTypeTemplateArgument(
- S, TemplateParams, NTTP, (ArgDepMatrix->*GetArgDimensionExpr)(),
- Info, Deduced);
+ return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
+ (ADM->*GetArgDimensionExpr)(),
+ Info, Deduced);
};
- auto Result = DeduceMatrixArg(MatrixParam->getRowExpr(), MatrixArg,
- &ConstantMatrixType::getNumRows,
- &DependentSizedMatrixType::getRowExpr);
- if (Result)
+ if (auto Result = DeduceMatrixArg(MP->getRowExpr(), MA,
+ &ConstantMatrixType::getNumRows,
+ &DependentSizedMatrixType::getRowExpr))
return Result;
- return DeduceMatrixArg(MatrixParam->getColumnExpr(), MatrixArg,
+ return DeduceMatrixArg(MP->getColumnExpr(), MA,
&ConstantMatrixType::getNumColumns,
&DependentSizedMatrixType::getColumnExpr);
}
@@ -2211,44 +2101,39 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
//
// T __attribute__(((address_space(N))))
case Type::DependentAddressSpace: {
- const DependentAddressSpaceType *AddressSpaceParam =
- cast<DependentAddressSpaceType>(Param);
+ const auto *ASP = P->castAs<DependentAddressSpaceType>();
- if (const DependentAddressSpaceType *AddressSpaceArg =
- dyn_cast<DependentAddressSpaceType>(Arg)) {
+ if (const auto *ASA = A->getAs<DependentAddressSpaceType>()) {
// Perform deduction on the pointer type.
- if (Sema::TemplateDeductionResult Result =
- DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, AddressSpaceParam->getPointeeType(),
- AddressSpaceArg->getPointeeType(), Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, ASP->getPointeeType(), ASA->getPointeeType(),
+ Info, Deduced, TDF))
return Result;
// Perform deduction on the address space, if we can.
- const NonTypeTemplateParmDecl *NTTP = getDeducedParameterFromExpr(
- Info, AddressSpaceParam->getAddrSpaceExpr());
+ const NonTypeTemplateParmDecl *NTTP =
+ getDeducedParameterFromExpr(Info, ASP->getAddrSpaceExpr());
if (!NTTP)
return Sema::TDK_Success;
return DeduceNonTypeTemplateArgument(
- S, TemplateParams, NTTP, AddressSpaceArg->getAddrSpaceExpr(), Info,
- Deduced);
+ S, TemplateParams, NTTP, ASA->getAddrSpaceExpr(), Info, Deduced);
}
- if (isTargetAddressSpace(Arg.getAddressSpace())) {
+ if (isTargetAddressSpace(A.getAddressSpace())) {
llvm::APSInt ArgAddressSpace(S.Context.getTypeSize(S.Context.IntTy),
false);
- ArgAddressSpace = toTargetAddressSpace(Arg.getAddressSpace());
+ ArgAddressSpace = toTargetAddressSpace(A.getAddressSpace());
// Perform deduction on the pointer types.
- if (Sema::TemplateDeductionResult Result =
- DeduceTemplateArgumentsByTypeMatch(
- S, TemplateParams, AddressSpaceParam->getPointeeType(),
- S.Context.removeAddrSpaceQualType(Arg), Info, Deduced, TDF))
+ if (auto Result = DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, ASP->getPointeeType(),
+ S.Context.removeAddrSpaceQualType(A), Info, Deduced, TDF))
return Result;
// Perform deduction on the address space, if we can.
- const NonTypeTemplateParmDecl *NTTP = getDeducedParameterFromExpr(
- Info, AddressSpaceParam->getAddrSpaceExpr());
+ const NonTypeTemplateParmDecl *NTTP =
+ getDeducedParameterFromExpr(Info, ASP->getAddrSpaceExpr());
if (!NTTP)
return Sema::TDK_Success;
@@ -2260,30 +2145,31 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
return Sema::TDK_NonDeducedMismatch;
}
case Type::DependentExtInt: {
- const auto *IntParam = cast<DependentExtIntType>(Param);
+ const auto *IP = P->castAs<DependentExtIntType>();
- if (const auto *IntArg = dyn_cast<ExtIntType>(Arg)){
- if (IntParam->isUnsigned() != IntArg->isUnsigned())
+ if (const auto *IA = A->getAs<ExtIntType>()) {
+ if (IP->isUnsigned() != IA->isUnsigned())
return Sema::TDK_NonDeducedMismatch;
const NonTypeTemplateParmDecl *NTTP =
- getDeducedParameterFromExpr(Info, IntParam->getNumBitsExpr());
+ getDeducedParameterFromExpr(Info, IP->getNumBitsExpr());
if (!NTTP)
return Sema::TDK_Success;
llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
- ArgSize = IntArg->getNumBits();
+ ArgSize = IA->getNumBits();
return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
S.Context.IntTy, true, Info,
Deduced);
}
- if (const auto *IntArg = dyn_cast<DependentExtIntType>(Arg)) {
- if (IntParam->isUnsigned() != IntArg->isUnsigned())
+ if (const auto *IA = A->getAs<DependentExtIntType>()) {
+ if (IP->isUnsigned() != IA->isUnsigned())
return Sema::TDK_NonDeducedMismatch;
return Sema::TDK_Success;
}
+
return Sema::TDK_NonDeducedMismatch;
}
@@ -2293,125 +2179,103 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
case Type::UnresolvedUsing:
case Type::Decltype:
case Type::UnaryTransform:
- case Type::Auto:
case Type::DeducedTemplateSpecialization:
case Type::DependentTemplateSpecialization:
case Type::PackExpansion:
case Type::Pipe:
// No template argument deduction for these types
return Sema::TDK_Success;
- }
+ }
llvm_unreachable("Invalid Type Class!");
}
static Sema::TemplateDeductionResult
-DeduceTemplateArguments(Sema &S,
- TemplateParameterList *TemplateParams,
- const TemplateArgument &Param,
- TemplateArgument Arg,
+DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
+ const TemplateArgument &P, TemplateArgument A,
TemplateDeductionInfo &Info,
SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
// If the template argument is a pack expansion, perform template argument
// deduction against the pattern of that expansion. This only occurs during
// partial ordering.
- if (Arg.isPackExpansion())
- Arg = Arg.getPackExpansionPattern();
+ if (A.isPackExpansion())
+ A = A.getPackExpansionPattern();
- switch (Param.getKind()) {
+ switch (P.getKind()) {
case TemplateArgument::Null:
llvm_unreachable("Null template argument in parameter list");
case TemplateArgument::Type:
- if (Arg.getKind() == TemplateArgument::Type)
- return DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
- Param.getAsType(),
- Arg.getAsType(),
- Info, Deduced, 0);
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
+ if (A.getKind() == TemplateArgument::Type)
+ return DeduceTemplateArgumentsByTypeMatch(
+ S, TemplateParams, P.getAsType(), A.getAsType(), Info, Deduced, 0);
+ Info.FirstArg = P;
+ Info.SecondArg = A;
return Sema::TDK_NonDeducedMismatch;
case TemplateArgument::Template:
- if (Arg.getKind() == TemplateArgument::Template)
- return DeduceTemplateArguments(S, TemplateParams,
- Param.getAsTemplate(),
- Arg.getAsTemplate(), Info, Deduced);
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
+ if (A.getKind() == TemplateArgument::Template)
+ return DeduceTemplateArguments(S, TemplateParams, P.getAsTemplate(),
+ A.getAsTemplate(), Info, Deduced);
+ Info.FirstArg = P;
+ Info.SecondArg = A;
return Sema::TDK_NonDeducedMismatch;
case TemplateArgument::TemplateExpansion:
llvm_unreachable("caller should handle pack expansions");
case TemplateArgument::Declaration:
- if (Arg.getKind() == TemplateArgument::Declaration &&
- isSameDeclaration(Param.getAsDecl(), Arg.getAsDecl()))
+ if (A.getKind() == TemplateArgument::Declaration &&
+ isSameDeclaration(P.getAsDecl(), A.getAsDecl()))
return Sema::TDK_Success;
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
+ Info.FirstArg = P;
+ Info.SecondArg = A;
return Sema::TDK_NonDeducedMismatch;
case TemplateArgument::NullPtr:
- if (Arg.getKind() == TemplateArgument::NullPtr &&
- S.Context.hasSameType(Param.getNullPtrType(), Arg.getNullPtrType()))
+ if (A.getKind() == TemplateArgument::NullPtr &&
+ S.Context.hasSameType(P.getNullPtrType(), A.getNullPtrType()))
return Sema::TDK_Success;
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
+ Info.FirstArg = P;
+ Info.SecondArg = A;
return Sema::TDK_NonDeducedMismatch;
case TemplateArgument::Integral:
- if (Arg.getKind() == TemplateArgument::Integral) {
- if (hasSameExtendedValue(Param.getAsIntegral(), Arg.getAsIntegral()))
+ if (A.getKind() == TemplateArgument::Integral) {
+ if (hasSameExtendedValue(P.getAsIntegral(), A.getAsIntegral()))
return Sema::TDK_Success;
-
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
- return Sema::TDK_NonDeducedMismatch;
}
-
- if (Arg.getKind() == TemplateArgument::Expression) {
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
- return Sema::TDK_NonDeducedMismatch;
- }
-
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
+ Info.FirstArg = P;
+ Info.SecondArg = A;
return Sema::TDK_NonDeducedMismatch;
case TemplateArgument::Expression:
if (const NonTypeTemplateParmDecl *NTTP =
- getDeducedParameterFromExpr(Info, Param.getAsExpr())) {
- if (Arg.getKind() == TemplateArgument::Integral)
- return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
- Arg.getAsIntegral(),
- Arg.getIntegralType(),
- /*ArrayBound=*/false,
- Info, Deduced);
- if (Arg.getKind() == TemplateArgument::NullPtr)
+ getDeducedParameterFromExpr(Info, P.getAsExpr())) {
+ if (A.getKind() == TemplateArgument::Integral)
+ return DeduceNonTypeTemplateArgument(
+ S, TemplateParams, NTTP, A.getAsIntegral(), A.getIntegralType(),
+ /*ArrayBound=*/false, Info, Deduced);
+ if (A.getKind() == TemplateArgument::NullPtr)
return DeduceNullPtrTemplateArgument(S, TemplateParams, NTTP,
- Arg.getNullPtrType(),
- Info, Deduced);
- if (Arg.getKind() == TemplateArgument::Expression)
+ A.getNullPtrType(), Info, Deduced);
+ if (A.getKind() == TemplateArgument::Expression)
return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
- Arg.getAsExpr(), Info, Deduced);
- if (Arg.getKind() == TemplateArgument::Declaration)
- return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP,
- Arg.getAsDecl(),
- Arg.getParamTypeForDecl(),
- Info, Deduced);
+ A.getAsExpr(), Info, Deduced);
+ if (A.getKind() == TemplateArgument::Declaration)
+ return DeduceNonTypeTemplateArgument(
+ S, TemplateParams, NTTP, A.getAsDecl(), A.getParamTypeForDecl(),
+ Info, Deduced);
- Info.FirstArg = Param;
- Info.SecondArg = Arg;
+ Info.FirstArg = P;
+ Info.SecondArg = A;
return Sema::TDK_NonDeducedMismatch;
}
// Can't deduce anything, but that's okay.
return Sema::TDK_Success;
-
case TemplateArgument::Pack:
llvm_unreachable("Argument packs should be expanded by the caller!");
}
@@ -2464,8 +2328,8 @@ static bool hasPackExpansionBeforeEnd(ArrayRef<TemplateArgument> Args) {
static Sema::TemplateDeductionResult
DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
- ArrayRef<TemplateArgument> Params,
- ArrayRef<TemplateArgument> Args,
+ ArrayRef<TemplateArgument> Ps,
+ ArrayRef<TemplateArgument> As,
TemplateDeductionInfo &Info,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
bool NumberOfArgumentsMustMatch) {
@@ -2473,7 +2337,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
// If the template argument list of P contains a pack expansion that is not
// the last template argument, the entire template argument list is a
// non-deduced context.
- if (hasPackExpansionBeforeEnd(Params))
+ if (hasPackExpansionBeforeEnd(Ps))
return Sema::TDK_Success;
// C++0x [temp.deduct.type]p9:
@@ -2481,12 +2345,13 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
// respective template argument list P is compared with the corresponding
// argument Ai of the corresponding template argument list of A.
unsigned ArgIdx = 0, ParamIdx = 0;
- for (; hasTemplateArgumentForDeduction(Params, ParamIdx); ++ParamIdx) {
- if (!Params[ParamIdx].isPackExpansion()) {
+ for (; hasTemplateArgumentForDeduction(Ps, ParamIdx); ++ParamIdx) {
+ const TemplateArgument &P = Ps[ParamIdx];
+ if (!P.isPackExpansion()) {
// The simple case: deduce template arguments by matching Pi and Ai.
// Check whether we have enough arguments.
- if (!hasTemplateArgumentForDeduction(Args, ArgIdx))
+ if (!hasTemplateArgumentForDeduction(As, ArgIdx))
return NumberOfArgumentsMustMatch
? Sema::TDK_MiscellaneousDeductionFailure
: Sema::TDK_Success;
@@ -2494,14 +2359,12 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
// C++1z [temp.deduct.type]p9:
// During partial ordering, if Ai was originally a pack expansion [and]
// Pi is not a pack expansion, template argument deduction fails.
- if (Args[ArgIdx].isPackExpansion())
+ if (As[ArgIdx].isPackExpansion())
return Sema::TDK_MiscellaneousDeductionFailure;
// Perform deduction for this Pi/Ai pair.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArguments(S, TemplateParams,
- Params[ParamIdx], Args[ArgIdx],
- Info, Deduced))
+ if (auto Result = DeduceTemplateArguments(S, TemplateParams, P,
+ As[ArgIdx], Info, Deduced))
return Result;
// Move to the next argument.
@@ -2516,7 +2379,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
// each remaining argument in the template argument list of A. Each
// comparison deduces template arguments for subsequent positions in the
// template parameter packs expanded by Pi.
- TemplateArgument Pattern = Params[ParamIdx].getPackExpansionPattern();
+ TemplateArgument Pattern = P.getPackExpansionPattern();
// Prepare to deduce the packs within the pattern.
PackDeductionScope PackScope(S, TemplateParams, Deduced, Info, Pattern);
@@ -2524,13 +2387,12 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
// Keep track of the deduced template arguments for each parameter pack
// expanded by this pack expansion (the outer index) and for each
// template argument (the inner SmallVectors).
- for (; hasTemplateArgumentForDeduction(Args, ArgIdx) &&
+ for (; hasTemplateArgumentForDeduction(As, ArgIdx) &&
PackScope.hasNextElement();
++ArgIdx) {
// Deduce template arguments from the pattern.
- if (Sema::TemplateDeductionResult Result
- = DeduceTemplateArguments(S, TemplateParams, Pattern, Args[ArgIdx],
- Info, Deduced))
+ if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pattern,
+ As[ArgIdx], Info, Deduced))
return Result;
PackScope.nextPackElement();
@@ -2546,15 +2408,14 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
}
static Sema::TemplateDeductionResult
-DeduceTemplateArguments(Sema &S,
- TemplateParameterList *TemplateParams,
+DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams,
const TemplateArgumentList &ParamList,
const TemplateArgumentList &ArgList,
TemplateDeductionInfo &Info,
SmallVectorImpl<DeducedTemplateArgument> &Deduced) {
return DeduceTemplateArguments(S, TemplateParams, ParamList.asArray(),
ArgList.asArray(), Info, Deduced,
- /*NumberOfArgumentsMustMatch*/false);
+ /*NumberOfArgumentsMustMatch=*/false);
}
/// Determine whether two template arguments are the same.
@@ -2858,9 +2719,24 @@ static Sema::TemplateDeductionResult ConvertDeducedTemplateArguments(
return Sema::TDK_Incomplete;
}
- TemplateArgumentLoc DefArg = S.SubstDefaultTemplateArgumentIfAvailable(
- TD, TD->getLocation(), TD->getSourceRange().getEnd(), Param, Builder,
- HasDefaultArg);
+ TemplateArgumentLoc DefArg;
+ {
+ Qualifiers ThisTypeQuals;
+ CXXRecordDecl *ThisContext = nullptr;
+ if (auto *Rec = dyn_cast<CXXRecordDecl>(TD->getDeclContext()))
+ if (Rec->isLambda())
+ if (auto *Method = dyn_cast<CXXMethodDecl>(Rec->getDeclContext())) {
+ ThisContext = Method->getParent();
+ ThisTypeQuals = Method->getMethodQualifiers();
+ }
+
+ Sema::CXXThisScopeRAII ThisScope(S, ThisContext, ThisTypeQuals,
+ S.getLangOpts().CPlusPlus17);
+
+ DefArg = S.SubstDefaultTemplateArgumentIfAvailable(
+ TD, TD->getLocation(), TD->getSourceRange().getEnd(), Param, Builder,
+ HasDefaultArg);
+ }
// If there was no default argument, deduction is incomplete.
if (DefArg.getArgument().isNull()) {
@@ -2964,14 +2840,13 @@ FinishTemplateArgumentDeduction(
auto *Template = Partial->getSpecializedTemplate();
const ASTTemplateArgumentListInfo *PartialTemplArgInfo =
Partial->getTemplateArgsAsWritten();
- const TemplateArgumentLoc *PartialTemplateArgs =
- PartialTemplArgInfo->getTemplateArgs();
TemplateArgumentListInfo InstArgs(PartialTemplArgInfo->LAngleLoc,
PartialTemplArgInfo->RAngleLoc);
- if (S.Subst(PartialTemplateArgs, PartialTemplArgInfo->NumTemplateArgs,
- InstArgs, MultiLevelTemplateArgumentList(*DeducedArgumentList))) {
+ if (S.SubstTemplateArguments(
+ PartialTemplArgInfo->arguments(),
+ MultiLevelTemplateArgumentList(*DeducedArgumentList), InstArgs)) {
unsigned ArgIdx = InstArgs.size(), ParamIdx = ArgIdx;
if (ParamIdx >= Partial->getTemplateParameters()->size())
ParamIdx = Partial->getTemplateParameters()->size() - 1;
@@ -2979,7 +2854,7 @@ FinishTemplateArgumentDeduction(
Decl *Param = const_cast<NamedDecl *>(
Partial->getTemplateParameters()->getParam(ParamIdx));
Info.Param = makeTemplateParameter(Param);
- Info.FirstArg = PartialTemplateArgs[ArgIdx].getArgument();
+ Info.FirstArg = (*PartialTemplArgInfo)[ArgIdx].getArgument();
return Sema::TDK_SubstitutionFailure;
}
@@ -3879,8 +3754,9 @@ static bool AdjustFunctionParmAndArgTypesForDeduction(
// "lvalue reference to A" is used in place of A for type deduction.
if (isForwardingReference(QualType(ParamRefType, 0), FirstInnerIndex) &&
Arg->isLValue()) {
- if (S.getLangOpts().OpenCL && !ArgType.hasAddressSpace())
- ArgType = S.Context.getAddrSpaceQualType(ArgType, LangAS::opencl_generic);
+ if (S.getLangOpts().OpenCL && !ArgType.hasAddressSpace())
+ ArgType = S.Context.getAddrSpaceQualType(
+ ArgType, S.Context.getDefaultOpenCLPointeeAddrSpace());
ArgType = S.Context.getLValueReferenceType(ArgType);
}
} else {
@@ -4342,11 +4218,11 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments(
bool HasDeducedReturnType = false;
if (getLangOpts().CPlusPlus14 && IsAddressOfFunction &&
Function->getReturnType()->getContainedAutoType()) {
- FunctionType = SubstAutoType(FunctionType, Context.DependentTy);
+ FunctionType = SubstAutoTypeDependent(FunctionType);
HasDeducedReturnType = true;
}
- if (!ArgFunctionType.isNull()) {
+ if (!ArgFunctionType.isNull() && !FunctionType.isNull()) {
unsigned TDF =
TDF_TopLevelParameterTypeList | TDF_AllowCompatibleFunctionType;
// Deduce template arguments from the function type.
@@ -4776,12 +4652,8 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result,
ExprResult ER = CheckPlaceholderExpr(Init);
if (ER.isInvalid())
return DAR_FailedAlreadyDiagnosed;
- Init = ER.get();
- QualType Deduced = BuildDecltypeType(Init, Init->getBeginLoc(), false);
- if (Deduced.isNull())
- return DAR_FailedAlreadyDiagnosed;
- // FIXME: Support a non-canonical deduced type for 'auto'.
- Deduced = Context.getCanonicalType(Deduced);
+ QualType Deduced = getDecltypeForExpr(ER.get());
+ assert(!Deduced.isNull());
if (AT->isConstrained() && !IgnoreConstraints) {
auto ConstraintsResult =
CheckDeducedPlaceholderConstraints(*this, *AT,
@@ -4816,7 +4688,7 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result,
Context, Loc, Loc, TemplParamPtr, Loc, nullptr);
QualType FuncParam =
- SubstituteDeducedTypeTransform(*this, TemplArg, /*UseTypeSugar*/false)
+ SubstituteDeducedTypeTransform(*this, TemplArg, /*UseTypeSugar*/ true)
.Apply(Type);
assert(!FuncParam.isNull() &&
"substituting template parameter for 'auto' failed");
@@ -4930,27 +4802,29 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result,
QualType Sema::SubstAutoType(QualType TypeWithAuto,
QualType TypeToReplaceAuto) {
- if (TypeToReplaceAuto->isDependentType())
- return SubstituteDeducedTypeTransform(
- *this, DependentAuto{
- TypeToReplaceAuto->containsUnexpandedParameterPack()})
- .TransformType(TypeWithAuto);
+ assert(TypeToReplaceAuto != Context.DependentTy);
return SubstituteDeducedTypeTransform(*this, TypeToReplaceAuto)
.TransformType(TypeWithAuto);
}
TypeSourceInfo *Sema::SubstAutoTypeSourceInfo(TypeSourceInfo *TypeWithAuto,
QualType TypeToReplaceAuto) {
- if (TypeToReplaceAuto->isDependentType())
- return SubstituteDeducedTypeTransform(
- *this,
- DependentAuto{
- TypeToReplaceAuto->containsUnexpandedParameterPack()})
- .TransformType(TypeWithAuto);
+ assert(TypeToReplaceAuto != Context.DependentTy);
return SubstituteDeducedTypeTransform(*this, TypeToReplaceAuto)
.TransformType(TypeWithAuto);
}
+QualType Sema::SubstAutoTypeDependent(QualType TypeWithAuto) {
+ return SubstituteDeducedTypeTransform(*this, DependentAuto{false})
+ .TransformType(TypeWithAuto);
+}
+
+TypeSourceInfo *
+Sema::SubstAutoTypeSourceInfoDependent(TypeSourceInfo *TypeWithAuto) {
+ return SubstituteDeducedTypeTransform(*this, DependentAuto{false})
+ .TransformType(TypeWithAuto);
+}
+
QualType Sema::ReplaceAutoType(QualType TypeWithAuto,
QualType TypeToReplaceAuto) {
return SubstituteDeducedTypeTransform(*this, TypeToReplaceAuto,
@@ -5145,6 +5019,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S,
Args2.resize(NumComparedArguments);
if (Reversed)
std::reverse(Args2.begin(), Args2.end());
+
if (DeduceTemplateArguments(S, TemplateParams, Args2.data(), Args2.size(),
Args1.data(), Args1.size(), Info, Deduced,
TDF_None, /*PartialOrdering=*/true))
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index f18f77d3442a..7d4c000e7e90 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -161,10 +161,9 @@ Sema::getTemplateInstantiationArgs(NamedDecl *D,
if (isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function))
break;
- } else if (FunctionTemplateDecl *FunTmpl
- = Function->getDescribedFunctionTemplate()) {
- // Add the "injected" template arguments.
- Result.addOuterTemplateArguments(FunTmpl->getInjectedTemplateArgs());
+ } else if (Function->getDescribedFunctionTemplate()) {
+ assert(Result.getNumSubstitutedLevels() == 0 &&
+ "Outer template not instantiated?");
}
// If this is a friend declaration and it declares an entity at
@@ -180,11 +179,8 @@ Sema::getTemplateInstantiationArgs(NamedDecl *D,
}
} else if (CXXRecordDecl *Rec = dyn_cast<CXXRecordDecl>(Ctx)) {
if (ClassTemplateDecl *ClassTemplate = Rec->getDescribedClassTemplate()) {
- QualType T = ClassTemplate->getInjectedClassNameSpecialization();
- const TemplateSpecializationType *TST =
- cast<TemplateSpecializationType>(Context.getCanonicalType(T));
- Result.addOuterTemplateArguments(
- llvm::makeArrayRef(TST->getArgs(), TST->getNumArgs()));
+ assert(Result.getNumSubstitutedLevels() == 0 &&
+ "Outer template not instantiated?");
if (ClassTemplate->isMemberSpecialization())
break;
}
@@ -1934,25 +1930,23 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
return Req;
Sema::SFINAETrap Trap(SemaRef);
- TemplateDeductionInfo Info(Req->getExpr()->getBeginLoc());
llvm::PointerUnion<Expr *, concepts::Requirement::SubstitutionDiagnostic *>
TransExpr;
if (Req->isExprSubstitutionFailure())
TransExpr = Req->getExprSubstitutionDiagnostic();
else {
- Sema::InstantiatingTemplate ExprInst(SemaRef, Req->getExpr()->getBeginLoc(),
- Req, Info,
- Req->getExpr()->getSourceRange());
+ Expr *E = Req->getExpr();
+ TemplateDeductionInfo Info(E->getBeginLoc());
+ Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req, Info,
+ E->getSourceRange());
if (ExprInst.isInvalid())
return nullptr;
- ExprResult TransExprRes = TransformExpr(Req->getExpr());
+ ExprResult TransExprRes = TransformExpr(E);
if (TransExprRes.isInvalid() || Trap.hasErrorOccurred())
- TransExpr = createSubstDiag(SemaRef, Info,
- [&] (llvm::raw_ostream& OS) {
- Req->getExpr()->printPretty(OS, nullptr,
- SemaRef.getPrintingPolicy());
- });
+ TransExpr = createSubstDiag(SemaRef, Info, [&](llvm::raw_ostream &OS) {
+ E->printPretty(OS, nullptr, SemaRef.getPrintingPolicy());
+ });
else
TransExpr = TransExprRes.get();
}
@@ -1966,6 +1960,7 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
else if (RetReq.isTypeConstraint()) {
TemplateParameterList *OrigTPL =
RetReq.getTypeConstraintTemplateParameterList();
+ TemplateDeductionInfo Info(OrigTPL->getTemplateLoc());
Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(),
Req, Info, OrigTPL->getSourceRange());
if (TPLInst.isInvalid())
@@ -2309,6 +2304,29 @@ namespace {
} // namespace
+bool Sema::SubstTypeConstraint(
+ TemplateTypeParmDecl *Inst, const TypeConstraint *TC,
+ const MultiLevelTemplateArgumentList &TemplateArgs) {
+ const ASTTemplateArgumentListInfo *TemplArgInfo =
+ TC->getTemplateArgsAsWritten();
+ TemplateArgumentListInfo InstArgs;
+
+ if (TemplArgInfo) {
+ InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
+ InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
+ if (SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs,
+ InstArgs))
+ return true;
+ }
+ return AttachTypeConstraint(
+ TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
+ TC->getNamedConcept(), &InstArgs, Inst,
+ Inst->isParameterPack()
+ ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
+ ->getEllipsisLoc()
+ : SourceLocation());
+}
+
ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
const MultiLevelTemplateArgumentList &TemplateArgs,
int indexAdjustment,
@@ -2373,24 +2391,7 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
if (Inst && !Inst->getTypeConstraint()) {
// TODO: Concepts: do not instantiate the constraint (delayed constraint
// substitution)
- const ASTTemplateArgumentListInfo *TemplArgInfo
- = TC->getTemplateArgsAsWritten();
- TemplateArgumentListInfo InstArgs;
-
- if (TemplArgInfo) {
- InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
- InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
- if (Subst(TemplArgInfo->getTemplateArgs(),
- TemplArgInfo->NumTemplateArgs, InstArgs, TemplateArgs))
- return nullptr;
- }
- if (AttachTypeConstraint(
- TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
- TC->getNamedConcept(), TemplArgInfo ? &InstArgs : nullptr, Inst,
- TTP->isParameterPack()
- ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
- ->getEllipsisLoc()
- : SourceLocation()))
+ if (SubstTypeConstraint(Inst, TC, TemplateArgs))
return nullptr;
}
}
@@ -3538,15 +3539,6 @@ Sema::SubstTemplateName(NestedNameSpecifierLoc QualifierLoc,
return Instantiator.TransformTemplateName(SS, Name, Loc);
}
-bool Sema::Subst(const TemplateArgumentLoc *Args, unsigned NumArgs,
- TemplateArgumentListInfo &Result,
- const MultiLevelTemplateArgumentList &TemplateArgs) {
- TemplateInstantiator Instantiator(*this, TemplateArgs, SourceLocation(),
- DeclarationName());
-
- return Instantiator.TransformTemplateArguments(Args, NumArgs, Result);
-}
-
static const Decl *getCanonicalParmVarDecl(const Decl *D) {
// When storing ParmVarDecls in the local instantiation scope, we always
// want to use the ParmVarDecl from the canonical function declaration,
@@ -3666,7 +3658,7 @@ void LocalInstantiationScope::MakeInstantiatedLocalArgPack(const Decl *D) {
bool LocalInstantiationScope::isLocalPackExpansion(const Decl *D) {
for (DeclArgumentPack *Pack : ArgumentPacks)
- if (std::find(Pack->begin(), Pack->end(), D) != Pack->end())
+ if (llvm::is_contained(*Pack, D))
return true;
return false;
}
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index be4c51930789..27ac2cd08f2a 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -436,11 +436,12 @@ static void instantiateOMPDeclareVariantAttr(
return;
Expr *E = VariantFuncRef.get();
+
// Check function/variant ref for `omp declare variant` but not for `omp
// begin declare variant` (which use implicit attributes).
Optional<std::pair<FunctionDecl *, Expr *>> DeclVarData =
- S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New),
- VariantFuncRef.get(), TI,
+ S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New), E, TI,
+ Attr.appendArgs_size(),
Attr.getRange());
if (!DeclVarData)
@@ -481,7 +482,28 @@ static void instantiateOMPDeclareVariantAttr(
}
}
- S.ActOnOpenMPDeclareVariantDirective(FD, E, TI, Attr.getRange());
+ SmallVector<Expr *, 8> NothingExprs;
+ SmallVector<Expr *, 8> NeedDevicePtrExprs;
+ SmallVector<OMPDeclareVariantAttr::InteropType, 8> AppendArgs;
+
+ for (Expr *E : Attr.adjustArgsNothing()) {
+ ExprResult ER = Subst(E);
+ if (ER.isInvalid())
+ continue;
+ NothingExprs.push_back(ER.get());
+ }
+ for (Expr *E : Attr.adjustArgsNeedDevicePtr()) {
+ ExprResult ER = Subst(E);
+ if (ER.isInvalid())
+ continue;
+ NeedDevicePtrExprs.push_back(ER.get());
+ }
+ for (auto A : Attr.appendArgs())
+ AppendArgs.push_back(A);
+
+ S.ActOnOpenMPDeclareVariantDirective(
+ FD, E, TI, NothingExprs, NeedDevicePtrExprs, AppendArgs, SourceLocation(),
+ SourceLocation(), Attr.getRange());
}
static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr(
@@ -556,30 +578,10 @@ static void instantiateDependentAMDGPUWavesPerEUAttr(
static void instantiateDependentSYCLKernelAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const SYCLKernelAttr &Attr, Decl *New) {
- // Functions cannot be partially specialized, so if we are being instantiated,
- // we are obviously a complete specialization. Since this attribute is only
- // valid on function template declarations, we know that this is a full
- // instantiation of a kernel.
- S.AddSYCLKernelLambda(cast<FunctionDecl>(New));
-
- // Evaluate whether this would change any of the already evaluated
- // __builtin_sycl_unique_stable_name values.
- for (auto &Itr : S.Context.SYCLUniqueStableNameEvaluatedValues) {
- const std::string &CurName = Itr.first->ComputeName(S.Context);
- if (Itr.second != CurName) {
- S.Diag(New->getLocation(),
- diag::err_kernel_invalidates_sycl_unique_stable_name);
- S.Diag(Itr.first->getLocation(),
- diag::note_sycl_unique_stable_name_evaluated_here);
- // Update this so future diagnostics work correctly.
- Itr.second = CurName;
- }
- }
-
New->addAttr(Attr.clone(S.getASTContext()));
}
-/// Determine whether the attribute A might be relevent to the declaration D.
+/// Determine whether the attribute A might be relevant to the declaration D.
/// If not, we can skip instantiating it. The attribute may or may not have
/// been instantiated yet.
static bool isRelevantAttr(Sema &S, const Decl *D, const Attr *A) {
@@ -1087,7 +1089,7 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
StartingScope, InstantiatingVarTemplate);
- if (D->isNRVOVariable()) {
+ if (D->isNRVOVariable() && !Var->isInvalidDecl()) {
QualType RT;
if (auto *F = dyn_cast<FunctionDecl>(DC))
RT = F->getReturnType();
@@ -1816,9 +1818,7 @@ TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
CXXRecordDecl *PrevDecl = nullptr;
- if (D->isInjectedClassName())
- PrevDecl = cast<CXXRecordDecl>(Owner);
- else if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) {
+ if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) {
NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(),
PatternPrev,
TemplateArgs);
@@ -1827,6 +1827,7 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
}
CXXRecordDecl *Record = nullptr;
+ bool IsInjectedClassName = D->isInjectedClassName();
if (D->isLambda())
Record = CXXRecordDecl::CreateLambda(
SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(),
@@ -1835,7 +1836,11 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
else
Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner,
D->getBeginLoc(), D->getLocation(),
- D->getIdentifier(), PrevDecl);
+ D->getIdentifier(), PrevDecl,
+ /*DelayTypeCreation=*/IsInjectedClassName);
+ // Link the type of the injected-class-name to that of the outer class.
+ if (IsInjectedClassName)
+ (void)SemaRef.Context.getTypeDeclType(Record, cast<CXXRecordDecl>(Owner));
// Substitute the nested name specifier, if any.
if (SubstQualifier(D, Record))
@@ -1850,7 +1855,7 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
// specifier. Remove once this area of the code gets sorted out.
if (D->getAccess() != AS_none)
Record->setAccess(D->getAccess());
- if (!D->isInjectedClassName())
+ if (!IsInjectedClassName)
Record->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation);
// If the original function was part of a friend declaration,
@@ -1903,6 +1908,9 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
SemaRef.DiagnoseUnusedNestedTypedefs(Record);
+ if (IsInjectedClassName)
+ assert(Record->isInjectedClassName() && "Broken injected-class-name");
+
return Record;
}
@@ -2051,8 +2059,8 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
} else {
Function = FunctionDecl::Create(
SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo,
- D->getCanonicalDecl()->getStorageClass(), D->isInlineSpecified(),
- D->hasWrittenPrototype(), D->getConstexprKind(),
+ D->getCanonicalDecl()->getStorageClass(), D->UsesFPIntrin(),
+ D->isInlineSpecified(), D->hasWrittenPrototype(), D->getConstexprKind(),
TrailingRequiresClause);
Function->setRangeEnd(D->getSourceRange().getEnd());
}
@@ -2149,8 +2157,8 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
// Instantiate the explicit template arguments.
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
- if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
- ExplicitArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs,
+ ExplicitArgs))
return nullptr;
// Map the candidate templates to their instantiations.
@@ -2177,8 +2185,8 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
// Instantiate the explicit template arguments.
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
- if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
- ExplicitArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs,
+ ExplicitArgs))
return nullptr;
if (SemaRef.CheckFunctionTemplateSpecialization(Function,
@@ -2407,15 +2415,16 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(D)) {
Method = CXXConstructorDecl::Create(
SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
- InstantiatedExplicitSpecifier, Constructor->isInlineSpecified(), false,
+ InstantiatedExplicitSpecifier, Constructor->UsesFPIntrin(),
+ Constructor->isInlineSpecified(), false,
Constructor->getConstexprKind(), InheritedConstructor(),
TrailingRequiresClause);
Method->setRangeEnd(Constructor->getEndLoc());
} else if (CXXDestructorDecl *Destructor = dyn_cast<CXXDestructorDecl>(D)) {
Method = CXXDestructorDecl::Create(
SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
- Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(),
- TrailingRequiresClause);
+ Destructor->UsesFPIntrin(), Destructor->isInlineSpecified(), false,
+ Destructor->getConstexprKind(), TrailingRequiresClause);
Method->setRangeEnd(Destructor->getEndLoc());
Method->setDeclName(SemaRef.Context.DeclarationNames.getCXXDestructorName(
SemaRef.Context.getCanonicalType(
@@ -2423,15 +2432,15 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
} else if (CXXConversionDecl *Conversion = dyn_cast<CXXConversionDecl>(D)) {
Method = CXXConversionDecl::Create(
SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo,
- Conversion->isInlineSpecified(), InstantiatedExplicitSpecifier,
- Conversion->getConstexprKind(), Conversion->getEndLoc(),
- TrailingRequiresClause);
+ Conversion->UsesFPIntrin(), Conversion->isInlineSpecified(),
+ InstantiatedExplicitSpecifier, Conversion->getConstexprKind(),
+ Conversion->getEndLoc(), TrailingRequiresClause);
} else {
StorageClass SC = D->isStatic() ? SC_Static : SC_None;
- Method = CXXMethodDecl::Create(SemaRef.Context, Record, StartLoc, NameInfo,
- T, TInfo, SC, D->isInlineSpecified(),
- D->getConstexprKind(), D->getEndLoc(),
- TrailingRequiresClause);
+ Method = CXXMethodDecl::Create(
+ SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, SC,
+ D->UsesFPIntrin(), D->isInlineSpecified(), D->getConstexprKind(),
+ D->getEndLoc(), TrailingRequiresClause);
}
if (D->isInlined())
@@ -2512,8 +2521,8 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
// Instantiate the explicit template arguments.
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
- if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
- ExplicitArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs,
+ ExplicitArgs))
return nullptr;
// Map the candidate templates to their instantiations.
@@ -2539,8 +2548,8 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(
TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(),
Info->getRAngleLoc());
- if (SemaRef.Subst(Info->getTemplateArgs(), Info->getNumTemplateArgs(),
- ExplicitArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs,
+ ExplicitArgs))
return nullptr;
if (SemaRef.CheckFunctionTemplateSpecialization(Method,
@@ -2708,25 +2717,7 @@ Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl(
// TODO: Concepts: do not instantiate the constraint (delayed constraint
// substitution)
- const ASTTemplateArgumentListInfo *TemplArgInfo
- = TC->getTemplateArgsAsWritten();
- TemplateArgumentListInfo InstArgs;
-
- if (TemplArgInfo) {
- InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc);
- InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc);
- if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
- TemplArgInfo->NumTemplateArgs,
- InstArgs, TemplateArgs))
- return nullptr;
- }
- if (SemaRef.AttachTypeConstraint(
- TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(),
- TC->getNamedConcept(), &InstArgs, Inst,
- D->isParameterPack()
- ? cast<CXXFoldExpr>(TC->getImmediatelyDeclaredConstraint())
- ->getEllipsisLoc()
- : SourceLocation()))
+ if (SemaRef.SubstTypeConstraint(Inst, TC, TemplateArgs))
return nullptr;
}
}
@@ -3380,12 +3371,23 @@ Decl *TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl *D) {
SmallVector<OMPClause *, 4> Clauses;
// Copy map clauses from the original mapper.
for (OMPClause *C : D->clauselists()) {
- auto *AC = cast<OMPAllocatorClause>(C);
- ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs);
- if (!NewE.isUsable())
- continue;
- OMPClause *IC = SemaRef.ActOnOpenMPAllocatorClause(
- NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc());
+ OMPClause *IC = nullptr;
+ if (auto *AC = dyn_cast<OMPAllocatorClause>(C)) {
+ ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs);
+ if (!NewE.isUsable())
+ continue;
+ IC = SemaRef.ActOnOpenMPAllocatorClause(
+ NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc());
+ } else if (auto *AC = dyn_cast<OMPAlignClause>(C)) {
+ ExprResult NewE = SemaRef.SubstExpr(AC->getAlignment(), TemplateArgs);
+ if (!NewE.isUsable())
+ continue;
+ IC = SemaRef.ActOnOpenMPAlignClause(NewE.get(), AC->getBeginLoc(),
+ AC->getLParenLoc(), AC->getEndLoc());
+ // If align clause value ends up being invalid, this can end up null.
+ if (!IC)
+ continue;
+ }
Clauses.push_back(IC);
}
@@ -3624,8 +3626,7 @@ TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl(
SmallVector<TemplateArgumentLoc, 4> ArgLocs;
for (unsigned I = 0; I != Loc.getNumArgs(); ++I)
ArgLocs.push_back(Loc.getArgLoc(I));
- if (SemaRef.Subst(ArgLocs.data(), ArgLocs.size(),
- InstTemplateArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(ArgLocs, TemplateArgs, InstTemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this
@@ -3750,8 +3751,8 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc());
VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc());
- if (SemaRef.Subst(TemplateArgsInfo.getArgumentArray(),
- TemplateArgsInfo.size(), VarTemplateArgsInfo, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs,
+ VarTemplateArgsInfo))
return nullptr;
// Check that the template argument list is well-formed for this template.
@@ -4018,9 +4019,8 @@ TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization(
= PartialSpec->getTemplateArgsAsWritten();
TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc,
TemplArgInfo->RAngleLoc);
- if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
- TemplArgInfo->NumTemplateArgs,
- InstTemplateArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs,
+ InstTemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this
@@ -4146,9 +4146,8 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization(
= PartialSpec->getTemplateArgsAsWritten();
TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc,
TemplArgInfo->RAngleLoc);
- if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(),
- TemplArgInfo->NumTemplateArgs,
- InstTemplateArgs, TemplateArgs))
+ if (SemaRef.SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs,
+ InstTemplateArgs))
return nullptr;
// Check that the template argument list is well-formed for this
@@ -5985,11 +5984,11 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
const MultiLevelTemplateArgumentList &TemplateArgs,
bool FindingInstantiatedContext) {
DeclContext *ParentDC = D->getDeclContext();
- // Determine whether our parent context depends on any of the tempalte
+ // Determine whether our parent context depends on any of the template
// arguments we're currently substituting.
bool ParentDependsOnArgs = isDependentContextAtLevel(
ParentDC, TemplateArgs.getNumRetainedOuterLevels());
- // FIXME: Parmeters of pointer to functions (y below) that are themselves
+ // FIXME: Parameters of pointer to functions (y below) that are themselves
// parameters (p below) can have their ParentDC set to the translation-unit
// - thus we can not consistently check if the ParentDC of such a parameter
// is Dependent or/and a FunctionOrMethod.
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 1951aec3d17d..c0bb310e64fb 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -308,8 +308,7 @@ Sema::DiagnoseUnexpandedParameterPacks(SourceLocation Loc,
}
return declaresSameEntity(Pack.first.get<NamedDecl *>(), LocalPack);
};
- if (std::find_if(LSI->LocalPacks.begin(), LSI->LocalPacks.end(),
- DeclaresThisPack) != LSI->LocalPacks.end())
+ if (llvm::any_of(LSI->LocalPacks, DeclaresThisPack))
LambdaParamPackReferences.push_back(Pack);
}
@@ -328,8 +327,8 @@ Sema::DiagnoseUnexpandedParameterPacks(SourceLocation Loc,
bool EnclosingStmtExpr = false;
for (unsigned N = FunctionScopes.size(); N; --N) {
sema::FunctionScopeInfo *Func = FunctionScopes[N-1];
- if (std::any_of(
- Func->CompoundScopes.begin(), Func->CompoundScopes.end(),
+ if (llvm::any_of(
+ Func->CompoundScopes,
[](sema::CompoundScopeInfo &CSI) { return CSI.IsStmtExpr; })) {
EnclosingStmtExpr = true;
break;
@@ -893,6 +892,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
case TST_Fract:
case TST_Float16:
case TST_float128:
+ case TST_ibm128:
case TST_bool:
case TST_decimal32:
case TST_decimal64:
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index b78331cdfe91..d2ee669debd0 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -1369,8 +1369,7 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
// value being declared, poison it as invalid so we don't get chains of
// errors.
declarator.setInvalidType(true);
- } else if ((S.getLangOpts().OpenCLVersion >= 200 ||
- S.getLangOpts().OpenCLCPlusPlus) &&
+ } else if (S.getLangOpts().getOpenCLCompatibleVersion() >= 200 &&
DS.isTypeSpecPipe()) {
S.Diag(DeclLoc, diag::err_missing_actual_pipe_type)
<< DS.getSourceRange();
@@ -1525,18 +1524,20 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
break;
case DeclSpec::TST_float: Result = Context.FloatTy; break;
case DeclSpec::TST_double:
+ if (DS.getTypeSpecWidth() == TypeSpecifierWidth::Long)
+ Result = Context.LongDoubleTy;
+ else
+ Result = Context.DoubleTy;
if (S.getLangOpts().OpenCL) {
if (!S.getOpenCLOptions().isSupported("cl_khr_fp64", S.getLangOpts()))
- S.Diag(DS.getTypeSpecTypeLoc(),
- diag::err_opencl_double_requires_extension)
- << (S.getLangOpts().OpenCLVersion >= 300);
+ S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
+ << 0 << Result
+ << (S.getLangOpts().getOpenCLCompatibleVersion() == 300
+ ? "cl_khr_fp64 and __opencl_c_fp64"
+ : "cl_khr_fp64");
else if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp64", S.getLangOpts()))
S.Diag(DS.getTypeSpecTypeLoc(), diag::ext_opencl_double_without_pragma);
}
- if (DS.getTypeSpecWidth() == TypeSpecifierWidth::Long)
- Result = Context.LongDoubleTy;
- else
- Result = Context.DoubleTy;
break;
case DeclSpec::TST_float128:
if (!S.Context.getTargetInfo().hasFloat128Type() &&
@@ -1546,6 +1547,13 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
<< "__float128";
Result = Context.Float128Ty;
break;
+ case DeclSpec::TST_ibm128:
+ if (!S.Context.getTargetInfo().hasIbm128Type() &&
+ !S.getLangOpts().SYCLIsDevice &&
+ !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
+ S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__ibm128";
+ Result = Context.Ibm128Ty;
+ break;
case DeclSpec::TST_bool:
Result = Context.BoolTy; // _Bool or bool
break;
@@ -1614,7 +1622,7 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
Expr *E = DS.getRepAsExpr();
assert(E && "Didn't get an expression for typeof?");
// TypeQuals handled by caller.
- Result = S.BuildTypeofExprType(E, DS.getTypeSpecTypeLoc());
+ Result = S.BuildTypeofExprType(E);
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
@@ -1625,7 +1633,7 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
Expr *E = DS.getRepAsExpr();
assert(E && "Didn't get an expression for decltype?");
// TypeQuals handled by caller.
- Result = S.BuildDecltypeType(E, DS.getTypeSpecTypeLoc());
+ Result = S.BuildDecltypeType(E);
if (Result.isNull()) {
Result = Context.IntTy;
declarator.setInvalidType(true);
@@ -1724,21 +1732,29 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
if (S.getLangOpts().OpenCL) {
const auto &OpenCLOptions = S.getOpenCLOptions();
- StringRef OptName;
+ bool IsOpenCLC30Compatible =
+ S.getLangOpts().getOpenCLCompatibleVersion() == 300;
// OpenCL C v3.0 s6.3.3 - OpenCL image types require __opencl_c_images
- // support
+ // support.
+ // OpenCL C v3.0 s6.2.1 - OpenCL 3d image write types requires support
+ // for OpenCL C 2.0, or OpenCL C 3.0 or newer and the
+ // __opencl_c_3d_image_writes feature. OpenCL C v3.0 API s4.2 - For devices
+ // that support OpenCL 3.0, cl_khr_3d_image_writes must be returned when and
+ // only when the optional feature is supported
if ((Result->isImageType() || Result->isSamplerT()) &&
- (S.getLangOpts().OpenCLVersion >= 300 &&
- !OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts())))
- OptName = "__opencl_c_images";
- else if (Result->isOCLImage3dWOType() &&
- !OpenCLOptions.isSupported("cl_khr_3d_image_writes",
- S.getLangOpts()))
- OptName = "cl_khr_3d_image_writes";
-
- if (!OptName.empty()) {
+ (IsOpenCLC30Compatible &&
+ !OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts()))) {
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
- << 0 << Result << OptName;
+ << 0 << Result << "__opencl_c_images";
+ declarator.setInvalidType();
+ } else if (Result->isOCLImage3dWOType() &&
+ !OpenCLOptions.isSupported("cl_khr_3d_image_writes",
+ S.getLangOpts())) {
+ S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
+ << 0 << Result
+ << (IsOpenCLC30Compatible
+ ? "cl_khr_3d_image_writes and __opencl_c_3d_image_writes"
+ : "cl_khr_3d_image_writes");
declarator.setInvalidType();
}
}
@@ -2076,9 +2092,7 @@ static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) {
!PointeeType->isSamplerT() &&
!PointeeType.hasAddressSpace())
PointeeType = S.getASTContext().getAddrSpaceQualType(
- PointeeType, S.getLangOpts().OpenCLGenericAddressSpace
- ? LangAS::opencl_generic
- : LangAS::opencl_private);
+ PointeeType, S.getASTContext().getDefaultOpenCLPointeeAddrSpace());
return PointeeType;
}
@@ -4245,8 +4259,8 @@ static void fixItNullability(Sema &S, DiagBuilderT &Diag,
InsertionText = InsertionText.drop_back().drop_front();
else
InsertionText = InsertionText.drop_front();
- } else if (!isIdentifierBody(NextChar[0], /*allow dollar*/true) &&
- !isIdentifierBody(NextChar[-1], /*allow dollar*/true)) {
+ } else if (!isAsciiIdentifierContinue(NextChar[0], /*allow dollar*/ true) &&
+ !isAsciiIdentifierContinue(NextChar[-1], /*allow dollar*/ true)) {
InsertionText = InsertionText.drop_back().drop_front();
}
@@ -5084,7 +5098,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
"__cl_clang_variadic_functions", S.getLangOpts()) &&
!(D.getIdentifier() &&
((D.getIdentifier()->getName() == "printf" &&
- (LangOpts.OpenCLCPlusPlus || LangOpts.OpenCLVersion >= 120)) ||
+ LangOpts.getOpenCLCompatibleVersion() >= 120) ||
D.getIdentifier()->getName().startswith("__")))) {
S.Diag(D.getIdentifierLoc(), diag::err_opencl_variadic_function);
D.setInvalidType(true);
@@ -5414,7 +5428,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
// Avoid emitting extra errors if we already errored on the scope.
D.setInvalidType(true);
} else if (S.isDependentScopeSpecifier(SS) ||
- dyn_cast_or_null<CXXRecordDecl>(S.computeDeclContext(SS))) {
+ isa_and_nonnull<CXXRecordDecl>(S.computeDeclContext(SS))) {
NestedNameSpecifier *NNS = SS.getScopeRep();
NestedNameSpecifier *NNSPrefix = NNS->getPrefix();
switch (NNS->getKind()) {
@@ -5507,7 +5521,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
break;
case DeclaratorChunk::Function: {
const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
- // We supress the warning when there's no LParen location, as this
+ // We suppress the warning when there's no LParen location, as this
// indicates the declaration was an implicit declaration, which gets
// warned about separately via -Wimplicit-function-declaration.
if (FTI.NumParams == 0 && !FTI.isVariadic && FTI.getLParenLoc().isValid())
@@ -5886,13 +5900,16 @@ namespace {
void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
Visit(TL.getUnqualifiedLoc());
}
+ // Allow to fill pointee's type locations, e.g.,
+ // int __attr * __attr * __attr *p;
+ void VisitPointerTypeLoc(PointerTypeLoc TL) { Visit(TL.getNextTypeLoc()); }
void VisitTypedefTypeLoc(TypedefTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeLoc());
}
void VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
TL.setNameLoc(DS.getTypeSpecTypeLoc());
// FIXME. We should have DS.getTypeSpecTypeEndLoc(). But, it requires
- // addition field. What we have is good enough for dispay of location
+ // addition field. What we have is good enough for display of location
// of 'fixit' on interface name.
TL.setNameEndLoc(DS.getEndLoc());
}
@@ -6486,6 +6503,34 @@ QualType Sema::BuildAddressSpaceAttr(QualType &T, Expr *AddrSpace,
return BuildAddressSpaceAttr(T, ASIdx, AddrSpace, AttrLoc);
}
+static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr,
+ TypeProcessingState &State) {
+ Sema &S = State.getSema();
+
+ // Check the number of attribute arguments.
+ if (Attr.getNumArgs() != 1) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
+ << Attr << 1;
+ Attr.setInvalid();
+ return;
+ }
+
+ // Ensure the argument is a string.
+ auto *StrLiteral = dyn_cast<StringLiteral>(Attr.getArgAsExpr(0));
+ if (!StrLiteral) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
+ << Attr << AANT_ArgumentString;
+ Attr.setInvalid();
+ return;
+ }
+
+ ASTContext &Ctx = S.Context;
+ StringRef BTFTypeTag = StrLiteral->getString();
+ Type = State.getAttributedType(
+ ::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type, Type);
+ return;
+}
+
/// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the
/// specified type. The attribute contains 1 argument, the id of the address
/// space for the type.
@@ -7800,7 +7845,7 @@ static bool isPermittedNeonBaseType(QualType &Ty,
static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
llvm::APSInt &Result) {
const auto *AttrExpr = Attr.getArgAsExpr(0);
- if (!AttrExpr->isTypeDependent() && !AttrExpr->isValueDependent()) {
+ if (!AttrExpr->isTypeDependent()) {
if (Optional<llvm::APSInt> Res =
AttrExpr->getIntegerConstantExpr(S.Context)) {
Result = *Res;
@@ -7875,8 +7920,10 @@ static void HandleArmSveVectorBitsTypeAttr(QualType &CurType, ParsedAttr &Attr,
return;
}
- // Attribute is unsupported if '-msve-vector-bits=<bits>' isn't specified.
- if (!S.getLangOpts().ArmSveVectorBits) {
+ // Attribute is unsupported if '-msve-vector-bits=<bits>' isn't specified, or
+ // if <bits>+ syntax is used.
+ if (!S.getLangOpts().VScaleMin ||
+ S.getLangOpts().VScaleMin != S.getLangOpts().VScaleMax) {
S.Diag(Attr.getLoc(), diag::err_attribute_arm_feature_sve_bits_unsupported)
<< Attr;
Attr.setInvalid();
@@ -7899,9 +7946,9 @@ static void HandleArmSveVectorBitsTypeAttr(QualType &CurType, ParsedAttr &Attr,
unsigned VecSize = static_cast<unsigned>(SveVectorSizeInBits.getZExtValue());
// The attribute vector size must match -msve-vector-bits.
- if (VecSize != S.getLangOpts().ArmSveVectorBits) {
+ if (VecSize != S.getLangOpts().VScaleMin * 128) {
S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)
- << VecSize << S.getLangOpts().ArmSveVectorBits;
+ << VecSize << S.getLangOpts().VScaleMin * 128;
Attr.setInvalid();
return;
}
@@ -8113,6 +8160,11 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
case ParsedAttr::IgnoredAttribute:
break;
+ case ParsedAttr::AT_BTFTypeTag:
+ HandleBTFTypeTagAttribute(type, attr, state);
+ attr.setUsedAsTypeAttr();
+ break;
+
case ParsedAttr::AT_MayAlias:
// FIXME: This attribute needs to actually be handled, but if we ignore
// it it breaks large amounts of Linux software.
@@ -8294,10 +8346,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
attr.getMacroExpansionLoc());
}
}
-
- if (!state.getSema().getLangOpts().OpenCL ||
- type.getAddressSpace() != LangAS::Default)
- return;
}
void Sema::completeExprArrayBound(Expr *E) {
@@ -8877,7 +8925,7 @@ QualType Sema::getElaboratedType(ElaboratedTypeKeyword Keyword,
return Context.getElaboratedType(Keyword, NNS, T, OwnedTagDecl);
}
-QualType Sema::BuildTypeofExprType(Expr *E, SourceLocation Loc) {
+QualType Sema::BuildTypeofExprType(Expr *E) {
assert(!E->hasPlaceholderType() && "unexpected placeholder");
if (!getLangOpts().CPlusPlus && E->refersToBitField())
@@ -8891,35 +8939,12 @@ QualType Sema::BuildTypeofExprType(Expr *E, SourceLocation Loc) {
return Context.getTypeOfExprType(E);
}
-/// getDecltypeForParenthesizedExpr - Given an expr, will return the type for
-/// that expression, as in [dcl.type.simple]p4 but without taking id-expressions
-/// and class member access into account.
-QualType Sema::getDecltypeForParenthesizedExpr(Expr *E) {
- // C++11 [dcl.type.simple]p4:
- // [...]
- QualType T = E->getType();
- switch (E->getValueKind()) {
- // - otherwise, if e is an xvalue, decltype(e) is T&&, where T is the
- // type of e;
- case VK_XValue:
- return Context.getRValueReferenceType(T);
- // - otherwise, if e is an lvalue, decltype(e) is T&, where T is the
- // type of e;
- case VK_LValue:
- return Context.getLValueReferenceType(T);
- // - otherwise, decltype(e) is the type of e.
- case VK_PRValue:
- return T;
- }
- llvm_unreachable("Unknown value kind");
-}
-
/// getDecltypeForExpr - Given an expr, will return the decltype for
/// that expression, according to the rules in C++11
/// [dcl.type.simple]p4 and C++11 [expr.lambda.prim]p18.
-static QualType getDecltypeForExpr(Sema &S, Expr *E) {
+QualType Sema::getDecltypeForExpr(Expr *E) {
if (E->isTypeDependent())
- return S.Context.DependentTy;
+ return Context.DependentTy;
Expr *IDExpr = E;
if (auto *ImplCastExpr = dyn_cast<ImplicitCastExpr>(E))
@@ -8936,7 +8961,7 @@ static QualType getDecltypeForExpr(Sema &S, Expr *E) {
// parameter object. This rule makes no difference before C++20 so we apply
// it unconditionally.
if (const auto *SNTTPE = dyn_cast<SubstNonTypeTemplateParmExpr>(IDExpr))
- return SNTTPE->getParameterType(S.Context);
+ return SNTTPE->getParameterType(Context);
// - if e is an unparenthesized id-expression or an unparenthesized class
// member access (5.2.5), decltype(e) is the type of the entity named
@@ -8944,22 +8969,21 @@ static QualType getDecltypeForExpr(Sema &S, Expr *E) {
// functions, the program is ill-formed;
//
// We apply the same rules for Objective-C ivar and property references.
- if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(IDExpr)) {
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(IDExpr)) {
const ValueDecl *VD = DRE->getDecl();
- if (auto *TPO = dyn_cast<TemplateParamObjectDecl>(VD))
- return TPO->getType().getUnqualifiedType();
- return VD->getType();
- } else if (const MemberExpr *ME = dyn_cast<MemberExpr>(IDExpr)) {
- if (const ValueDecl *VD = ME->getMemberDecl())
+ QualType T = VD->getType();
+ return isa<TemplateParamObjectDecl>(VD) ? T.getUnqualifiedType() : T;
+ }
+ if (const auto *ME = dyn_cast<MemberExpr>(IDExpr)) {
+ if (const auto *VD = ME->getMemberDecl())
if (isa<FieldDecl>(VD) || isa<VarDecl>(VD))
return VD->getType();
- } else if (const ObjCIvarRefExpr *IR = dyn_cast<ObjCIvarRefExpr>(IDExpr)) {
+ } else if (const auto *IR = dyn_cast<ObjCIvarRefExpr>(IDExpr)) {
return IR->getDecl()->getType();
- } else if (const ObjCPropertyRefExpr *PR =
- dyn_cast<ObjCPropertyRefExpr>(IDExpr)) {
+ } else if (const auto *PR = dyn_cast<ObjCPropertyRefExpr>(IDExpr)) {
if (PR->isExplicitProperty())
return PR->getExplicitProperty()->getType();
- } else if (auto *PE = dyn_cast<PredefinedExpr>(IDExpr)) {
+ } else if (const auto *PE = dyn_cast<PredefinedExpr>(IDExpr)) {
return PE->getType();
}
@@ -8970,24 +8994,20 @@ static QualType getDecltypeForExpr(Sema &S, Expr *E) {
// access to a corresponding data member of the closure type that
// would have been declared if x were an odr-use of the denoted
// entity.
- using namespace sema;
- if (S.getCurLambda()) {
- if (isa<ParenExpr>(IDExpr)) {
- if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(IDExpr->IgnoreParens())) {
- if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
- QualType T = S.getCapturedDeclRefType(Var, DRE->getLocation());
- if (!T.isNull())
- return S.Context.getLValueReferenceType(T);
- }
+ if (getCurLambda() && isa<ParenExpr>(IDExpr)) {
+ if (auto *DRE = dyn_cast<DeclRefExpr>(IDExpr->IgnoreParens())) {
+ if (auto *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
+ QualType T = getCapturedDeclRefType(Var, DRE->getLocation());
+ if (!T.isNull())
+ return Context.getLValueReferenceType(T);
}
}
}
- return S.getDecltypeForParenthesizedExpr(E);
+ return Context.getReferenceQualifiedType(E);
}
-QualType Sema::BuildDecltypeType(Expr *E, SourceLocation Loc,
- bool AsUnevaluated) {
+QualType Sema::BuildDecltypeType(Expr *E, bool AsUnevaluated) {
assert(!E->hasPlaceholderType() && "unexpected placeholder");
if (AsUnevaluated && CodeSynthesisContexts.empty() &&
@@ -8998,8 +9018,7 @@ QualType Sema::BuildDecltypeType(Expr *E, SourceLocation Loc,
// used to build SFINAE gadgets.
Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context);
}
-
- return Context.getDecltypeType(E, getDecltypeForExpr(*this, E));
+ return Context.getDecltypeType(E, getDecltypeForExpr(E));
}
QualType Sema::BuildUnaryTransformType(QualType BaseType,
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 70ba631dbfc6..7f3326c13263 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -161,7 +161,7 @@ public:
/// Wether CXXConstructExpr can be skipped when they are implicit.
/// They will be reconstructed when used if needed.
- /// This is usefull when the user that cause rebuilding of the
+ /// This is useful when the user that cause rebuilding of the
/// CXXConstructExpr is outside of the expression at which the TreeTransform
/// started.
bool AllowSkippingCXXConstructExpr() { return true; }
@@ -522,7 +522,7 @@ public:
///
/// By default, transforms the types of conversion function, constructor,
/// and destructor names and then (if needed) rebuilds the declaration name.
- /// Identifiers and selectors are returned unmodified. Sublcasses may
+ /// Identifiers and selectors are returned unmodified. Subclasses may
/// override this function to provide alternate behavior.
DeclarationNameInfo
TransformDeclarationNameInfo(const DeclarationNameInfo &NameInfo);
@@ -1320,12 +1320,12 @@ public:
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
- StmtResult RebuildIfStmt(SourceLocation IfLoc, bool IsConstexpr,
+ StmtResult RebuildIfStmt(SourceLocation IfLoc, IfStatementKind Kind,
SourceLocation LParenLoc, Sema::ConditionResult Cond,
SourceLocation RParenLoc, Stmt *Init, Stmt *Then,
SourceLocation ElseLoc, Stmt *Else) {
- return getSema().ActOnIfStmt(IfLoc, IsConstexpr, LParenLoc, Init, Cond,
- RParenLoc, Then, ElseLoc, Else);
+ return getSema().ActOnIfStmt(IfLoc, Kind, LParenLoc, Init, Cond, RParenLoc,
+ Then, ElseLoc, Else);
}
/// Start building a new switch statement.
@@ -1929,10 +1929,10 @@ public:
OpenMPMapClauseKind MapType, bool IsMapTypeImplicit,
SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
const OMPVarListLocTy &Locs, ArrayRef<Expr *> UnresolvedMappers) {
- return getSema().ActOnOpenMPMapClause(MapTypeModifiers, MapTypeModifiersLoc,
- MapperIdScopeSpec, MapperId, MapType,
- IsMapTypeImplicit, MapLoc, ColonLoc,
- VarList, Locs, UnresolvedMappers);
+ return getSema().ActOnOpenMPMapClause(
+ MapTypeModifiers, MapTypeModifiersLoc, MapperIdScopeSpec, MapperId,
+ MapType, IsMapTypeImplicit, MapLoc, ColonLoc, VarList, Locs,
+ /*NoDiagnose=*/false, UnresolvedMappers);
}
/// Build a new OpenMP 'allocate' clause.
@@ -2256,6 +2256,29 @@ public:
EndLoc);
}
+ /// Build a new OpenMP 'bind' clause.
+ ///
+ /// By default, performs semantic analysis to build the new OpenMP clause.
+ /// Subclasses may override this routine to provide different behavior.
+ OMPClause *RebuildOMPBindClause(OpenMPBindClauseKind Kind,
+ SourceLocation KindLoc,
+ SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ return getSema().ActOnOpenMPBindClause(Kind, KindLoc, StartLoc, LParenLoc,
+ EndLoc);
+ }
+
+ /// Build a new OpenMP 'align' clause.
+ ///
+ /// By default, performs semantic analysis to build the new OpenMP clause.
+ /// Subclasses may override this routine to provide different behavior.
+ OMPClause *RebuildOMPAlignClause(Expr *A, SourceLocation StartLoc,
+ SourceLocation LParenLoc,
+ SourceLocation EndLoc) {
+ return getSema().ActOnOpenMPAlignClause(A, StartLoc, LParenLoc, EndLoc);
+ }
+
/// Rebuild the operand to an Objective-C \@synchronized statement.
///
/// By default, performs semantic analysis to build the new statement.
@@ -3840,8 +3863,10 @@ ExprResult TreeTransform<Derived>::TransformInitializer(Expr *Init,
if (auto *FE = dyn_cast<FullExpr>(Init))
Init = FE->getSubExpr();
- if (auto *AIL = dyn_cast<ArrayInitLoopExpr>(Init))
- Init = AIL->getCommonExpr();
+ if (auto *AIL = dyn_cast<ArrayInitLoopExpr>(Init)) {
+ OpaqueValueExpr *OVE = AIL->getCommonExpr();
+ Init = OVE->getSourceExpr();
+ }
if (MaterializeTemporaryExpr *MTE = dyn_cast<MaterializeTemporaryExpr>(Init))
Init = MTE->getSubExpr();
@@ -4745,8 +4770,8 @@ QualType TreeTransform<Derived>::RebuildQualifiedType(QualType T,
SourceLocation Loc = TL.getBeginLoc();
Qualifiers Quals = TL.getType().getLocalQualifiers();
- if (((T.getAddressSpace() != LangAS::Default &&
- Quals.getAddressSpace() != LangAS::Default)) &&
+ if ((T.getAddressSpace() != LangAS::Default &&
+ Quals.getAddressSpace() != LangAS::Default) &&
T.getAddressSpace() != Quals.getAddressSpace()) {
SemaRef.Diag(Loc, diag::err_address_space_mismatch_templ_inst)
<< TL.getType() << T;
@@ -5945,7 +5970,7 @@ bool TreeTransform<Derived>::TransformExceptionSpec(
ExceptionSpecificationType EST = ESI.Type;
NoexceptExpr =
- getSema().ActOnNoexceptSpec(Loc, NoexceptExpr.get(), EST);
+ getSema().ActOnNoexceptSpec(NoexceptExpr.get(), EST);
if (NoexceptExpr.isInvalid())
return true;
@@ -6561,7 +6586,7 @@ QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
T->isDependentType() || T->isConstrained()) {
// FIXME: Maybe don't rebuild if all template arguments are the same.
llvm::SmallVector<TemplateArgument, 4> NewArgList;
- NewArgList.reserve(NewArgList.size());
+ NewArgList.reserve(NewTemplateArgs.size());
for (const auto &ArgLoc : NewTemplateArgs.arguments())
NewArgList.push_back(ArgLoc.getArgument());
Result = getDerived().RebuildAutoType(NewDeduced, T->getKeyword(), NewCD,
@@ -6578,7 +6603,7 @@ QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
NewTL.setFoundDecl(TL.getFoundDecl());
NewTL.setLAngleLoc(TL.getLAngleLoc());
NewTL.setRAngleLoc(TL.getRAngleLoc());
- for (unsigned I = 0; I < TL.getNumArgs(); ++I)
+ for (unsigned I = 0; I < NewTL.getNumArgs(); ++I)
NewTL.setArgLocInfo(I, NewTemplateArgs.arguments()[I].getLocInfo());
return Result;
@@ -7371,13 +7396,16 @@ TreeTransform<Derived>::TransformIfStmt(IfStmt *S) {
if (Init.isInvalid())
return StmtError();
- // Transform the condition
- Sema::ConditionResult Cond = getDerived().TransformCondition(
- S->getIfLoc(), S->getConditionVariable(), S->getCond(),
- S->isConstexpr() ? Sema::ConditionKind::ConstexprIf
- : Sema::ConditionKind::Boolean);
- if (Cond.isInvalid())
- return StmtError();
+ Sema::ConditionResult Cond;
+ if (!S->isConsteval()) {
+ // Transform the condition
+ Cond = getDerived().TransformCondition(
+ S->getIfLoc(), S->getConditionVariable(), S->getCond(),
+ S->isConstexpr() ? Sema::ConditionKind::ConstexprIf
+ : Sema::ConditionKind::Boolean);
+ if (Cond.isInvalid())
+ return StmtError();
+ }
// If this is a constexpr if, determine which arm we should instantiate.
llvm::Optional<bool> ConstexprConditionValue;
@@ -7410,7 +7438,7 @@ TreeTransform<Derived>::TransformIfStmt(IfStmt *S) {
return S;
return getDerived().RebuildIfStmt(
- S->getIfLoc(), S->isConstexpr(), S->getLParenLoc(), Cond,
+ S->getIfLoc(), S->getStatementKind(), S->getLParenLoc(), Cond,
S->getRParenLoc(), Init.get(), Then.get(), S->getElseLoc(), Else.get());
}
@@ -8506,6 +8534,15 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
template <typename Derived>
StmtResult
+TreeTransform<Derived>::TransformOMPMetaDirective(OMPMetaDirective *D) {
+ // TODO: Fix This
+ SemaRef.Diag(D->getBeginLoc(), diag::err_omp_instantiation_not_supported)
+ << getOpenMPDirectiveName(D->getDirectiveKind());
+ return StmtError();
+}
+
+template <typename Derived>
+StmtResult
TreeTransform<Derived>::TransformOMPParallelDirective(OMPParallelDirective *D) {
DeclarationNameInfo DirName;
getDerived().getSema().StartOpenMPDSABlock(OMPD_parallel, DirName, nullptr,
@@ -9160,6 +9197,17 @@ TreeTransform<Derived>::TransformOMPMaskedDirective(OMPMaskedDirective *D) {
return Res;
}
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPGenericLoopDirective(
+ OMPGenericLoopDirective *D) {
+ DeclarationNameInfo DirName;
+ getDerived().getSema().StartOpenMPDSABlock(OMPD_loop, DirName, nullptr,
+ D->getBeginLoc());
+ StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+ getDerived().getSema().EndOpenMPDSABlock(Res.get());
+ return Res;
+}
+
//===----------------------------------------------------------------------===//
// OpenMP clause transformation
//===----------------------------------------------------------------------===//
@@ -9510,6 +9558,15 @@ TreeTransform<Derived>::TransformOMPFilterClause(OMPFilterClause *C) {
}
template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPAlignClause(OMPAlignClause *C) {
+ ExprResult E = getDerived().TransformExpr(C->getAlignment());
+ if (E.isInvalid())
+ return nullptr;
+ return getDerived().RebuildOMPAlignClause(E.get(), C->getBeginLoc(),
+ C->getLParenLoc(), C->getEndLoc());
+}
+
+template <typename Derived>
OMPClause *TreeTransform<Derived>::TransformOMPUnifiedAddressClause(
OMPUnifiedAddressClause *C) {
llvm_unreachable("unified_address clause cannot appear in dependent context");
@@ -10219,6 +10276,13 @@ OMPClause *TreeTransform<Derived>::TransformOMPOrderClause(OMPOrderClause *C) {
C->getEndLoc());
}
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPBindClause(OMPBindClause *C) {
+ return getDerived().RebuildOMPBindClause(
+ C->getBindKind(), C->getBindKindLoc(), C->getBeginLoc(),
+ C->getLParenLoc(), C->getEndLoc());
+}
+
//===----------------------------------------------------------------------===//
// Expression transformation
//===----------------------------------------------------------------------===//
@@ -10944,14 +11008,10 @@ ExprResult TreeTransform<Derived>::TransformCXXRewrittenBinaryOperator(
if (RHS.isInvalid())
return ExprError();
- if (!getDerived().AlwaysRebuild() &&
- LHS.get() == Decomp.LHS &&
- RHS.get() == Decomp.RHS)
- return E;
-
// Extract the already-resolved callee declarations so that we can restrict
// ourselves to using them as the unqualified lookup results when rebuilding.
UnresolvedSet<2> UnqualLookups;
+ bool ChangedAnyLookups = false;
Expr *PossibleBinOps[] = {E->getSemanticForm(),
const_cast<Expr *>(Decomp.InnerBinOp)};
for (Expr *PossibleBinOp : PossibleBinOps) {
@@ -10968,9 +11028,23 @@ ExprResult TreeTransform<Derived>::TransformCXXRewrittenBinaryOperator(
E->getOperatorLoc(), Callee->getFoundDecl()));
if (!Found)
return ExprError();
+ if (Found != Callee->getFoundDecl())
+ ChangedAnyLookups = true;
UnqualLookups.addDecl(Found);
}
+ if (!getDerived().AlwaysRebuild() && !ChangedAnyLookups &&
+ LHS.get() == Decomp.LHS && RHS.get() == Decomp.RHS) {
+ // Mark all functions used in the rewrite as referenced. Note that when
+ // a < b is rewritten to (a <=> b) < 0, both the <=> and the < might be
+ // function calls, and/or there might be a user-defined conversion sequence
+ // applied to the operands of the <.
+ // FIXME: this is a bit instantiation-specific.
+ const Expr *StopAt[] = {Decomp.LHS, Decomp.RHS};
+ SemaRef.MarkDeclarationsReferencedInExpr(E, false, StopAt);
+ return E;
+ }
+
return getDerived().RebuildCXXRewrittenBinaryOperator(
E->getOperatorLoc(), Decomp.Opcode, UnqualLookups, LHS.get(), RHS.get());
}
@@ -14437,10 +14511,10 @@ QualType TreeTransform<Derived>::RebuildUnresolvedUsingType(SourceLocation Loc,
return SemaRef.Context.getTypeDeclType(Ty);
}
-template<typename Derived>
+template <typename Derived>
QualType TreeTransform<Derived>::RebuildTypeOfExprType(Expr *E,
- SourceLocation Loc) {
- return SemaRef.BuildTypeofExprType(E, Loc);
+ SourceLocation) {
+ return SemaRef.BuildTypeofExprType(E);
}
template<typename Derived>
@@ -14448,10 +14522,9 @@ QualType TreeTransform<Derived>::RebuildTypeOfType(QualType Underlying) {
return SemaRef.Context.getTypeOfType(Underlying);
}
-template<typename Derived>
-QualType TreeTransform<Derived>::RebuildDecltypeType(Expr *E,
- SourceLocation Loc) {
- return SemaRef.BuildDecltypeType(E, Loc);
+template <typename Derived>
+QualType TreeTransform<Derived>::RebuildDecltypeType(Expr *E, SourceLocation) {
+ return SemaRef.BuildDecltypeType(E);
}
template<typename Derived>
diff --git a/clang/lib/Sema/UsedDeclVisitor.h b/clang/lib/Sema/UsedDeclVisitor.h
index c33d30478e2a..24b7342b3fb4 100644
--- a/clang/lib/Sema/UsedDeclVisitor.h
+++ b/clang/lib/Sema/UsedDeclVisitor.h
@@ -72,7 +72,8 @@ public:
QualType Destroyed = S.Context.getBaseElementType(DestroyedOrNull);
if (const RecordType *DestroyedRec = Destroyed->getAs<RecordType>()) {
CXXRecordDecl *Record = cast<CXXRecordDecl>(DestroyedRec->getDecl());
- asImpl().visitUsedDecl(E->getBeginLoc(), S.LookupDestructor(Record));
+ if (Record->getDefinition())
+ asImpl().visitUsedDecl(E->getBeginLoc(), S.LookupDestructor(Record));
}
}
diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp
index 5fe1f96327dd..c60f87a23985 100644
--- a/clang/lib/Serialization/ASTCommon.cpp
+++ b/clang/lib/Serialization/ASTCommon.cpp
@@ -168,6 +168,9 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) {
case BuiltinType::Float128:
ID = PREDEF_TYPE_FLOAT128_ID;
break;
+ case BuiltinType::Ibm128:
+ ID = PREDEF_TYPE_IBM128_ID;
+ break;
case BuiltinType::NullPtr:
ID = PREDEF_TYPE_NULLPTR_ID;
break;
@@ -474,7 +477,7 @@ bool serialization::needsAnonymousDeclarationNumber(const NamedDecl *D) {
// Otherwise, we only care about anonymous class members / block-scope decls.
// FIXME: We need to handle lambdas and blocks within inline / templated
// variables too.
- if (D->getDeclName() || !isa<CXXRecordDecl>(D->getLexicalDeclContext()))
+ if (D->getDeclName() || !isa<RecordDecl>(D->getLexicalDeclContext()))
return false;
return isa<TagDecl>(D) || isa<FieldDecl>(D);
}
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 83bade9941b3..a033bccbe506 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10,15 +10,13 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Basic/OpenMPKinds.h"
-#include "clang/Serialization/ASTRecordReader.h"
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
-#include "clang/AST/AbstractTypeReader.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/ASTUnresolvedSet.h"
+#include "clang/AST/AbstractTypeReader.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
@@ -31,8 +29,8 @@
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/NestedNameSpecifier.h"
-#include "clang/AST/OpenMPClause.h"
#include "clang/AST/ODRHash.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/TemplateName.h"
@@ -42,6 +40,7 @@
#include "clang/AST/UnresolvedSet.h"
#include "clang/Basic/CommentOptions.h"
#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticError.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/ExceptionSpecificationType.h"
#include "clang/Basic/FileManager.h"
@@ -51,6 +50,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/ObjCRuntime.h"
+#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/PragmaKinds.h"
#include "clang/Basic/Sanitizers.h"
@@ -76,6 +76,7 @@
#include "clang/Sema/Weak.h"
#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/ASTDeserializationListener.h"
+#include "clang/Serialization/ASTRecordReader.h"
#include "clang/Serialization/ContinuousRangeMap.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "clang/Serialization/InMemoryModuleCache.h"
@@ -555,7 +556,8 @@ static Module *getTopImportImplicitModule(ModuleManager &ModuleMgr,
StringRef ModuleName = TopImport->ModuleName;
assert(!ModuleName.empty() && "diagnostic options read before module name");
- Module *M = PP.getHeaderSearchInfo().lookupModule(ModuleName);
+ Module *M =
+ PP.getHeaderSearchInfo().lookupModule(ModuleName, TopImport->ImportLoc);
assert(M && "missing module");
return M;
}
@@ -726,8 +728,7 @@ static bool checkPreprocessorOptions(const PreprocessorOptions &PPOpts,
if (File == ExistingPPOpts.ImplicitPCHInclude)
continue;
- if (std::find(PPOpts.Includes.begin(), PPOpts.Includes.end(), File)
- != PPOpts.Includes.end())
+ if (llvm::is_contained(PPOpts.Includes, File))
continue;
SuggestedPredefines += "#include \"";
@@ -737,9 +738,7 @@ static bool checkPreprocessorOptions(const PreprocessorOptions &PPOpts,
for (unsigned I = 0, N = ExistingPPOpts.MacroIncludes.size(); I != N; ++I) {
StringRef File = ExistingPPOpts.MacroIncludes[I];
- if (std::find(PPOpts.MacroIncludes.begin(), PPOpts.MacroIncludes.end(),
- File)
- != PPOpts.MacroIncludes.end())
+ if (llvm::is_contained(PPOpts.MacroIncludes, File))
continue;
SuggestedPredefines += "#__include_macros \"";
@@ -1263,7 +1262,29 @@ void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
}
void ASTReader::Error(llvm::Error &&Err) const {
- Error(toString(std::move(Err)));
+ llvm::Error RemainingErr =
+ handleErrors(std::move(Err), [this](const DiagnosticError &E) {
+ auto Diag = E.getDiagnostic().second;
+
+ // Ideally we'd just emit it, but have to handle a possible in-flight
+ // diagnostic. Note that the location is currently ignored as well.
+ auto NumArgs = Diag.getStorage()->NumDiagArgs;
+ assert(NumArgs <= 3 && "Can only have up to 3 arguments");
+ StringRef Arg1, Arg2, Arg3;
+ switch (NumArgs) {
+ case 3:
+ Arg3 = Diag.getStringArg(2);
+ LLVM_FALLTHROUGH;
+ case 2:
+ Arg2 = Diag.getStringArg(1);
+ LLVM_FALLTHROUGH;
+ case 1:
+ Arg1 = Diag.getStringArg(0);
+ }
+ Error(Diag.getDiagID(), Arg1, Arg2, Arg3);
+ });
+ if (RemainingErr)
+ Error(toString(std::move(RemainingErr)));
}
//===----------------------------------------------------------------------===//
@@ -1271,9 +1292,7 @@ void ASTReader::Error(llvm::Error &&Err) const {
//===----------------------------------------------------------------------===//
/// Read the line table in the source manager block.
-/// \returns true if there was an error.
-bool ASTReader::ParseLineTable(ModuleFile &F,
- const RecordData &Record) {
+void ASTReader::ParseLineTable(ModuleFile &F, const RecordData &Record) {
unsigned Idx = 0;
LineTableInfo &LineTable = SourceMgr.getLineTable();
@@ -1312,12 +1331,10 @@ bool ASTReader::ParseLineTable(ModuleFile &F,
}
LineTable.AddEntry(FileID::get(FID), Entries);
}
-
- return false;
}
/// Read a source manager block
-bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
+llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
using namespace SrcMgr;
BitstreamCursor &SLocEntryCursor = F.SLocEntryCursor;
@@ -1329,36 +1346,29 @@ bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
SLocEntryCursor = F.Stream;
// The stream itself is going to skip over the source manager block.
- if (llvm::Error Err = F.Stream.SkipBlock()) {
- Error(std::move(Err));
- return true;
- }
+ if (llvm::Error Err = F.Stream.SkipBlock())
+ return Err;
// Enter the source manager block.
- if (llvm::Error Err =
- SLocEntryCursor.EnterSubBlock(SOURCE_MANAGER_BLOCK_ID)) {
- Error(std::move(Err));
- return true;
- }
+ if (llvm::Error Err = SLocEntryCursor.EnterSubBlock(SOURCE_MANAGER_BLOCK_ID))
+ return Err;
F.SourceManagerBlockStartOffset = SLocEntryCursor.GetCurrentBitNo();
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeE =
SLocEntryCursor.advanceSkippingSubblocks();
- if (!MaybeE) {
- Error(MaybeE.takeError());
- return true;
- }
+ if (!MaybeE)
+ return MaybeE.takeError();
llvm::BitstreamEntry E = MaybeE.get();
switch (E.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
- Error("malformed block record in AST file");
- return true;
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed block record in AST file");
case llvm::BitstreamEntry::EndBlock:
- return false;
+ return llvm::Error::success();
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
@@ -1369,10 +1379,8 @@ bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
StringRef Blob;
Expected<unsigned> MaybeRecord =
SLocEntryCursor.readRecord(E.ID, Record, &Blob);
- if (!MaybeRecord) {
- Error(MaybeRecord.takeError());
- return true;
- }
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
@@ -1381,7 +1389,7 @@ bool ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
case SM_SLOC_BUFFER_ENTRY:
case SM_SLOC_EXPANSION_ENTRY:
// Once we hit one of the source location entries, we're done.
- return false;
+ return llvm::Error::success();
}
}
}
@@ -1632,13 +1640,11 @@ SourceLocation ASTReader::getImportLocation(ModuleFile *F) {
/// Enter a subblock of the specified BlockID with the specified cursor. Read
/// the abbreviations that are at the top of the block and then leave the cursor
/// pointing into the block.
-bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID,
- uint64_t *StartOfBlockOffset) {
- if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
- // FIXME this drops errors on the floor.
- consumeError(std::move(Err));
- return true;
- }
+llvm::Error ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor,
+ unsigned BlockID,
+ uint64_t *StartOfBlockOffset) {
+ if (llvm::Error Err = Cursor.EnterSubBlock(BlockID))
+ return Err;
if (StartOfBlockOffset)
*StartOfBlockOffset = Cursor.GetCurrentBitNo();
@@ -1646,27 +1652,18 @@ bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID,
while (true) {
uint64_t Offset = Cursor.GetCurrentBitNo();
Expected<unsigned> MaybeCode = Cursor.ReadCode();
- if (!MaybeCode) {
- // FIXME this drops errors on the floor.
- consumeError(MaybeCode.takeError());
- return true;
- }
+ if (!MaybeCode)
+ return MaybeCode.takeError();
unsigned Code = MaybeCode.get();
// We expect all abbrevs to be at the start of the block.
if (Code != llvm::bitc::DEFINE_ABBREV) {
- if (llvm::Error Err = Cursor.JumpToBit(Offset)) {
- // FIXME this drops errors on the floor.
- consumeError(std::move(Err));
- return true;
- }
- return false;
- }
- if (llvm::Error Err = Cursor.ReadAbbrevRecord()) {
- // FIXME this drops errors on the floor.
- consumeError(std::move(Err));
- return true;
+ if (llvm::Error Err = Cursor.JumpToBit(Offset))
+ return Err;
+ return llvm::Error::success();
}
+ if (llvm::Error Err = Cursor.ReadAbbrevRecord())
+ return Err;
}
}
@@ -2380,17 +2377,24 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
}
}
- enum ModificationType {
- Size,
- ModTime,
- Content,
- None,
+ struct Change {
+ enum ModificationKind {
+ Size,
+ ModTime,
+ Content,
+ None,
+ } Kind;
+ llvm::Optional<int64_t> Old = llvm::None;
+ llvm::Optional<int64_t> New = llvm::None;
};
auto HasInputFileChanged = [&]() {
if (StoredSize != File->getSize())
- return ModificationType::Size;
+ return Change{Change::Size, StoredSize, File->getSize()};
if (!shouldDisableValidationForFile(F) && StoredTime &&
StoredTime != File->getModificationTime()) {
+ Change MTimeChange = {Change::ModTime, StoredTime,
+ File->getModificationTime()};
+
// In case the modification time changes but not the content,
// accept the cached file as legit.
if (ValidateASTInputFilesContent &&
@@ -2398,28 +2402,30 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
auto MemBuffOrError = FileMgr.getBufferForFile(File);
if (!MemBuffOrError) {
if (!Complain)
- return ModificationType::ModTime;
+ return MTimeChange;
std::string ErrorStr = "could not get buffer for file '";
ErrorStr += File->getName();
ErrorStr += "'";
Error(ErrorStr);
- return ModificationType::ModTime;
+ return MTimeChange;
}
+ // FIXME: hash_value is not guaranteed to be stable!
auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
if (StoredContentHash == static_cast<uint64_t>(ContentHash))
- return ModificationType::None;
- return ModificationType::Content;
+ return Change{Change::None};
+
+ return Change{Change::Content};
}
- return ModificationType::ModTime;
+ return MTimeChange;
}
- return ModificationType::None;
+ return Change{Change::None};
};
bool IsOutOfDate = false;
auto FileChange = HasInputFileChanged();
// For an overridden file, there is nothing to validate.
- if (!Overridden && FileChange != ModificationType::None) {
+ if (!Overridden && FileChange.Kind != Change::None) {
if (Complain && !Diags.isDiagnosticInFlight()) {
// Build a list of the PCH imports that got us here (in reverse).
SmallVector<ModuleFile *, 4> ImportStack(1, &F);
@@ -2430,7 +2436,10 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
StringRef TopLevelPCHName(ImportStack.back()->FileName);
Diag(diag::err_fe_ast_file_modified)
<< Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
- << TopLevelPCHName << FileChange;
+ << TopLevelPCHName << FileChange.Kind
+ << (FileChange.Old && FileChange.New)
+ << llvm::itostr(FileChange.Old.getValueOr(0))
+ << llvm::itostr(FileChange.New.getValueOr(0));
// Print the import stack.
if (ImportStack.size() > 1) {
@@ -2912,7 +2921,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
// If we've already loaded a module map file covering this module, we may
// have a better path for it (relative to the current build).
Module *M = PP.getHeaderSearchInfo().lookupModule(
- F.ModuleName, /*AllowSearch*/ true,
+ F.ModuleName, SourceLocation(), /*AllowSearch*/ true,
/*AllowExtraModuleMapSearch*/ true);
if (M && M->Directory) {
// If we're implicitly loading a module, the base directory can't
@@ -2954,30 +2963,27 @@ ASTReader::ReadControlBlock(ModuleFile &F,
}
}
-ASTReader::ASTReadResult
-ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
+llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
+ unsigned ClientLoadCapabilities) {
BitstreamCursor &Stream = F.Stream;
- if (llvm::Error Err = Stream.EnterSubBlock(AST_BLOCK_ID)) {
- Error(std::move(Err));
- return Failure;
- }
+ if (llvm::Error Err = Stream.EnterSubBlock(AST_BLOCK_ID))
+ return Err;
F.ASTBlockStartOffset = Stream.GetCurrentBitNo();
// Read all of the records and blocks for the AST file.
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
- if (!MaybeEntry) {
- Error(MaybeEntry.takeError());
- return Failure;
- }
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
- Error("error at end of module block in AST file");
- return Failure;
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "error at end of module block in AST file");
case llvm::BitstreamEntry::EndBlock:
// Outside of C++, we do not store a lookup map for the translation unit.
// Instead, mark it as needing a lookup map to be built if this module
@@ -2990,7 +2996,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
DC->setMustBuildLookupTable();
}
- return Success;
+ return llvm::Error::success();
case llvm::BitstreamEntry::SubBlock:
switch (Entry.ID) {
case DECLTYPES_BLOCK_ID:
@@ -2999,15 +3005,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
// cursor to it, enter the block and read the abbrevs in that block.
// With the main cursor, we just skip over it.
F.DeclsCursor = Stream;
- if (llvm::Error Err = Stream.SkipBlock()) {
- Error(std::move(Err));
- return Failure;
- }
- if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID,
- &F.DeclsBlockStartOffset)) {
- Error("malformed block record in AST file");
- return Failure;
- }
+ if (llvm::Error Err = Stream.SkipBlock())
+ return Err;
+ if (llvm::Error Err = ReadBlockAbbrevs(
+ F.DeclsCursor, DECLTYPES_BLOCK_ID, &F.DeclsBlockStartOffset))
+ return Err;
break;
case PREPROCESSOR_BLOCK_ID:
@@ -3015,14 +3017,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
if (!PP.getExternalSource())
PP.setExternalSource(this);
- if (llvm::Error Err = Stream.SkipBlock()) {
- Error(std::move(Err));
- return Failure;
- }
- if (ReadBlockAbbrevs(F.MacroCursor, PREPROCESSOR_BLOCK_ID)) {
- Error("malformed block record in AST file");
- return Failure;
- }
+ if (llvm::Error Err = Stream.SkipBlock())
+ return Err;
+ if (llvm::Error Err =
+ ReadBlockAbbrevs(F.MacroCursor, PREPROCESSOR_BLOCK_ID))
+ return Err;
F.MacroStartOffset = F.MacroCursor.GetCurrentBitNo();
break;
@@ -3030,14 +3029,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
F.PreprocessorDetailCursor = Stream;
if (llvm::Error Err = Stream.SkipBlock()) {
- Error(std::move(Err));
- return Failure;
- }
- if (ReadBlockAbbrevs(F.PreprocessorDetailCursor,
- PREPROCESSOR_DETAIL_BLOCK_ID)) {
- Error("malformed preprocessor detail record in AST file");
- return Failure;
+ return Err;
}
+ if (llvm::Error Err = ReadBlockAbbrevs(F.PreprocessorDetailCursor,
+ PREPROCESSOR_DETAIL_BLOCK_ID))
+ return Err;
F.PreprocessorDetailStartOffset
= F.PreprocessorDetailCursor.GetCurrentBitNo();
@@ -3048,36 +3044,29 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case SOURCE_MANAGER_BLOCK_ID:
- if (ReadSourceManagerBlock(F))
- return Failure;
+ if (llvm::Error Err = ReadSourceManagerBlock(F))
+ return Err;
break;
case SUBMODULE_BLOCK_ID:
- if (ASTReadResult Result =
- ReadSubmoduleBlock(F, ClientLoadCapabilities))
- return Result;
+ if (llvm::Error Err = ReadSubmoduleBlock(F, ClientLoadCapabilities))
+ return Err;
break;
case COMMENTS_BLOCK_ID: {
BitstreamCursor C = Stream;
- if (llvm::Error Err = Stream.SkipBlock()) {
- Error(std::move(Err));
- return Failure;
- }
- if (ReadBlockAbbrevs(C, COMMENTS_BLOCK_ID)) {
- Error("malformed comments block in AST file");
- return Failure;
- }
+ if (llvm::Error Err = Stream.SkipBlock())
+ return Err;
+ if (llvm::Error Err = ReadBlockAbbrevs(C, COMMENTS_BLOCK_ID))
+ return Err;
CommentsCursors.push_back(std::make_pair(C, &F));
break;
}
default:
- if (llvm::Error Err = Stream.SkipBlock()) {
- Error(std::move(Err));
- return Failure;
- }
+ if (llvm::Error Err = Stream.SkipBlock())
+ return Err;
break;
}
continue;
@@ -3092,10 +3081,8 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
StringRef Blob;
Expected<unsigned> MaybeRecordType =
Stream.readRecord(Entry.ID, Record, &Blob);
- if (!MaybeRecordType) {
- Error(MaybeRecordType.takeError());
- return Failure;
- }
+ if (!MaybeRecordType)
+ return MaybeRecordType.takeError();
ASTRecordTypes RecordType = (ASTRecordTypes)MaybeRecordType.get();
// If we're not loading an AST context, we don't care about most records.
@@ -3126,10 +3113,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case TYPE_OFFSET: {
- if (F.LocalNumTypes != 0) {
- Error("duplicate TYPE_OFFSET record in AST file");
- return Failure;
- }
+ if (F.LocalNumTypes != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "duplicate TYPE_OFFSET record in AST file");
F.TypeOffsets = reinterpret_cast<const UnderalignedInt64 *>(Blob.data());
F.LocalNumTypes = Record[0];
unsigned LocalBaseTypeIndex = Record[1];
@@ -3150,10 +3137,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
}
case DECL_OFFSET: {
- if (F.LocalNumDecls != 0) {
- Error("duplicate DECL_OFFSET record in AST file");
- return Failure;
- }
+ if (F.LocalNumDecls != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "duplicate DECL_OFFSET record in AST file");
F.DeclOffsets = (const DeclOffset *)Blob.data();
F.LocalNumDecls = Record[0];
unsigned LocalBaseDeclID = Record[1];
@@ -3218,10 +3205,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case IDENTIFIER_OFFSET: {
- if (F.LocalNumIdentifiers != 0) {
- Error("duplicate IDENTIFIER_OFFSET record in AST file");
- return Failure;
- }
+ if (F.LocalNumIdentifiers != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "duplicate IDENTIFIER_OFFSET record in AST file");
F.IdentifierOffsets = (const uint32_t *)Blob.data();
F.LocalNumIdentifiers = Record[0];
unsigned LocalBaseIdentifierID = Record[1];
@@ -3272,10 +3259,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
}
- if (SpecialTypes.size() != Record.size()) {
- Error("invalid special-types record");
- return Failure;
- }
+ if (SpecialTypes.size() != Record.size())
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid special-types record");
for (unsigned I = 0, N = Record.size(); I != N; ++I) {
serialization::TypeID ID = getGlobalTypeID(F, Record[I]);
@@ -3304,10 +3290,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case WEAK_UNDECLARED_IDENTIFIERS:
- if (Record.size() % 4 != 0) {
- Error("invalid weak identifiers record");
- return Failure;
- }
+ if (Record.size() % 4 != 0)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid weak identifiers record");
// FIXME: Ignore weak undeclared identifiers from non-original PCH
// files. This isn't the way to do it :)
@@ -3414,10 +3399,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
SLocSpaceSize);
- if (!F.SLocEntryBaseID) {
- Error("ran out of source locations");
- break;
- }
+ if (!F.SLocEntryBaseID)
+ return llvm::createStringError(std::errc::invalid_argument,
+ "ran out of source locations");
// Make our entry in the range map. BaseID is negative and growing, so
// we invert it. Because we invert it, though, we need the other end of
// the range.
@@ -3448,19 +3432,16 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case SOURCE_MANAGER_LINE_TABLE:
- if (ParseLineTable(F, Record)) {
- Error("malformed SOURCE_MANAGER_LINE_TABLE in AST file");
- return Failure;
- }
+ ParseLineTable(F, Record);
break;
case SOURCE_LOCATION_PRELOADS: {
// Need to transform from the local view (1-based IDs) to the global view,
// which is based off F.SLocEntryBaseID.
- if (!F.PreloadSLocEntries.empty()) {
- Error("Multiple SOURCE_LOCATION_PRELOADS records in AST file");
- return Failure;
- }
+ if (!F.PreloadSLocEntries.empty())
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "Multiple SOURCE_LOCATION_PRELOADS records in AST file");
F.PreloadSLocEntries.swap(Record);
break;
@@ -3472,10 +3453,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case VTABLE_USES:
- if (Record.size() % 3 != 0) {
- Error("Invalid VTABLE_USES record");
- return Failure;
- }
+ if (Record.size() % 3 != 0)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "Invalid VTABLE_USES record");
// Later tables overwrite earlier ones.
// FIXME: Modules will have some trouble with this. This is clearly not
@@ -3491,15 +3471,15 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case PENDING_IMPLICIT_INSTANTIATIONS:
- if (PendingInstantiations.size() % 2 != 0) {
- Error("Invalid existing PendingInstantiations");
- return Failure;
- }
+ if (PendingInstantiations.size() % 2 != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "Invalid existing PendingInstantiations");
- if (Record.size() % 2 != 0) {
- Error("Invalid PENDING_IMPLICIT_INSTANTIATIONS block");
- return Failure;
- }
+ if (Record.size() % 2 != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "Invalid PENDING_IMPLICIT_INSTANTIATIONS block");
for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
PendingInstantiations.push_back(getGlobalDeclID(F, Record[I++]));
@@ -3509,10 +3489,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case SEMA_DECL_REFS:
- if (Record.size() != 3) {
- Error("Invalid SEMA_DECL_REFS block");
- return Failure;
- }
+ if (Record.size() != 3)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "Invalid SEMA_DECL_REFS block");
for (unsigned I = 0, N = Record.size(); I != N; ++I)
SemaDeclRefs.push_back(getGlobalDeclID(F, Record[I]));
break;
@@ -3568,10 +3547,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
}
case DECL_UPDATE_OFFSETS:
- if (Record.size() % 2 != 0) {
- Error("invalid DECL_UPDATE_OFFSETS block in AST file");
- return Failure;
- }
+ if (Record.size() % 2 != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "invalid DECL_UPDATE_OFFSETS block in AST file");
for (unsigned I = 0, N = Record.size(); I != N; I += 2) {
GlobalDeclID ID = getGlobalDeclID(F, Record[I]);
DeclUpdateOffsets[ID].push_back(std::make_pair(&F, Record[I + 1]));
@@ -3585,10 +3564,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case OBJC_CATEGORIES_MAP:
- if (F.LocalNumObjCCategoriesInMap != 0) {
- Error("duplicate OBJC_CATEGORIES_MAP record in AST file");
- return Failure;
- }
+ if (F.LocalNumObjCCategoriesInMap != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "duplicate OBJC_CATEGORIES_MAP record in AST file");
F.LocalNumObjCCategoriesInMap = Record[0];
F.ObjCCategoriesMap = (const ObjCCategoriesInfo *)Blob.data();
@@ -3653,15 +3632,13 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case UNDEFINED_BUT_USED:
- if (UndefinedButUsed.size() % 2 != 0) {
- Error("Invalid existing UndefinedButUsed");
- return Failure;
- }
+ if (UndefinedButUsed.size() % 2 != 0)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "Invalid existing UndefinedButUsed");
- if (Record.size() % 2 != 0) {
- Error("invalid undefined-but-used record");
- return Failure;
- }
+ if (Record.size() % 2 != 0)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid undefined-but-used record");
for (unsigned I = 0, N = Record.size(); I != N; /* in loop */) {
UndefinedButUsed.push_back(getGlobalDeclID(F, Record[I++]));
UndefinedButUsed.push_back(
@@ -3700,10 +3677,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case MACRO_OFFSET: {
- if (F.LocalNumMacros != 0) {
- Error("duplicate MACRO_OFFSET record in AST file");
- return Failure;
- }
+ if (F.LocalNumMacros != 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "duplicate MACRO_OFFSET record in AST file");
F.MacroOffsets = (const uint32_t *)Blob.data();
F.LocalNumMacros = Record[0];
unsigned LocalBaseMacroID = Record[1];
@@ -3731,26 +3708,24 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case OPTIMIZE_PRAGMA_OPTIONS:
- if (Record.size() != 1) {
- Error("invalid pragma optimize record");
- return Failure;
- }
+ if (Record.size() != 1)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid pragma optimize record");
OptimizeOffPragmaLocation = ReadSourceLocation(F, Record[0]);
break;
case MSSTRUCT_PRAGMA_OPTIONS:
- if (Record.size() != 1) {
- Error("invalid pragma ms_struct record");
- return Failure;
- }
+ if (Record.size() != 1)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid pragma ms_struct record");
PragmaMSStructState = Record[0];
break;
case POINTERS_TO_MEMBERS_PRAGMA_OPTIONS:
- if (Record.size() != 2) {
- Error("invalid pragma ms_struct record");
- return Failure;
- }
+ if (Record.size() != 2)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "invalid pragma pointers to members record");
PragmaMSPointersToMembersState = Record[0];
PointersToMembersPragmaLocation = ReadSourceLocation(F, Record[1]);
break;
@@ -3762,18 +3737,16 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH:
- if (Record.size() != 1) {
- Error("invalid cuda pragma options record");
- return Failure;
- }
+ if (Record.size() != 1)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid cuda pragma options record");
ForceCUDAHostDeviceDepth = Record[0];
break;
case ALIGN_PACK_PRAGMA_OPTIONS: {
- if (Record.size() < 3) {
- Error("invalid pragma pack record");
- return Failure;
- }
+ if (Record.size() < 3)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid pragma pack record");
PragmaAlignPackCurrentValue = ReadAlignPackInfo(Record[0]);
PragmaAlignPackCurrentLocation = ReadSourceLocation(F, Record[1]);
unsigned NumStackEntries = Record[2];
@@ -3793,10 +3766,9 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
}
case FLOAT_CONTROL_PRAGMA_OPTIONS: {
- if (Record.size() < 3) {
- Error("invalid pragma pack record");
- return Failure;
- }
+ if (Record.size() < 3)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "invalid pragma float control record");
FpPragmaCurrentValue = FPOptionsOverride::getFromOpaqueInt(Record[0]);
FpPragmaCurrentLocation = ReadSourceLocation(F, Record[1]);
unsigned NumStackEntries = Record[2];
@@ -3935,7 +3907,8 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
if (F.Kind == MK_ImplicitModule && ModuleMgr.begin()->Kind != MK_MainFile) {
// An implicitly-loaded module file should have its module listed in some
// module map file that we've already loaded.
- Module *M = PP.getHeaderSearchInfo().lookupModule(F.ModuleName);
+ Module *M =
+ PP.getHeaderSearchInfo().lookupModule(F.ModuleName, F.ImportLoc);
auto &Map = PP.getHeaderSearchInfo().getModuleMap();
const FileEntry *ModMap = M ? Map.getModuleMapFileForUniquing(M) : nullptr;
// Don't emit module relocation error if we have -fno-validate-pch
@@ -4240,8 +4213,11 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
PreviousGeneration = incrementGeneration(*ContextObj);
unsigned NumModules = ModuleMgr.size();
- auto removeModulesAndReturn = [&](ASTReadResult ReadResult) {
- assert(ReadResult && "expected to return error");
+ SmallVector<ImportedModule, 4> Loaded;
+ if (ASTReadResult ReadResult =
+ ReadASTCore(FileName, Type, ImportLoc,
+ /*ImportedBy=*/nullptr, Loaded, 0, 0, ASTFileSignature(),
+ ClientLoadCapabilities)) {
ModuleMgr.removeModules(ModuleMgr.begin() + NumModules,
PP.getLangOpts().Modules
? &PP.getHeaderSearchInfo().getModuleMap()
@@ -4252,45 +4228,38 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName,
GlobalIndex.reset();
ModuleMgr.setGlobalIndex(nullptr);
return ReadResult;
- };
-
- SmallVector<ImportedModule, 4> Loaded;
- switch (ASTReadResult ReadResult =
- ReadASTCore(FileName, Type, ImportLoc,
- /*ImportedBy=*/nullptr, Loaded, 0, 0,
- ASTFileSignature(), ClientLoadCapabilities)) {
- case Failure:
- case Missing:
- case OutOfDate:
- case VersionMismatch:
- case ConfigurationMismatch:
- case HadErrors:
- return removeModulesAndReturn(ReadResult);
- case Success:
- break;
}
- // Here comes stuff that we only do once the entire chain is loaded.
+ // Here comes stuff that we only do once the entire chain is loaded. Do *not*
+ // remove modules from this point. Various fields are updated during reading
+ // the AST block and removing the modules would result in dangling pointers.
+ // They are generally only incidentally dereferenced, ie. a binary search
+ // runs over `GlobalSLocEntryMap`, which could cause an invalid module to
+ // be dereferenced but it wouldn't actually be used.
- // Load the AST blocks of all of the modules that we loaded. We can still
+ // Load the AST blocks of all of the modules that we loaded. We can still
// hit errors parsing the ASTs at this point.
for (ImportedModule &M : Loaded) {
ModuleFile &F = *M.Mod;
// Read the AST block.
- if (ASTReadResult Result = ReadASTBlock(F, ClientLoadCapabilities))
- return removeModulesAndReturn(Result);
+ if (llvm::Error Err = ReadASTBlock(F, ClientLoadCapabilities)) {
+ Error(std::move(Err));
+ return Failure;
+ }
// The AST block should always have a definition for the main module.
if (F.isModule() && !F.DidReadTopLevelSubmodule) {
Error(diag::err_module_file_missing_top_level_submodule, F.FileName);
- return removeModulesAndReturn(Failure);
+ return Failure;
}
// Read the extension blocks.
while (!SkipCursorToBlock(F.Stream, EXTENSION_BLOCK_ID)) {
- if (ASTReadResult Result = ReadExtensionBlock(F))
- return removeModulesAndReturn(Result);
+ if (llvm::Error Err = ReadExtensionBlock(F)) {
+ Error(std::move(Err));
+ return Failure;
+ }
}
// Once read, set the ModuleFile bit base offset and update the size in
@@ -4754,7 +4723,9 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
// Read and process a record.
Record.clear();
- Expected<unsigned> MaybeRecordType = Stream.readRecord(Entry.ID, Record);
+ StringRef Blob;
+ Expected<unsigned> MaybeRecordType =
+ Stream.readRecord(Entry.ID, Record, &Blob);
if (!MaybeRecordType) {
// FIXME this drops the error.
return Failure;
@@ -4786,6 +4757,17 @@ ASTReader::ASTReadResult ASTReader::readUnhashedControlBlockImpl(
F->PragmaDiagMappings.insert(F->PragmaDiagMappings.end(),
Record.begin(), Record.end());
break;
+ case HEADER_SEARCH_ENTRY_USAGE:
+ if (!F)
+ break;
+ unsigned Count = Record[0];
+ const char *Byte = Blob.data();
+ F->SearchPathUsage = llvm::BitVector(Count, 0);
+ for (unsigned I = 0; I < Count; ++Byte)
+ for (unsigned Bit = 0; Bit < 8 && I < Count; ++Bit, ++I)
+ if (*Byte & (1 << Bit))
+ F->SearchPathUsage[I] = 1;
+ break;
}
}
}
@@ -4811,32 +4793,26 @@ static bool parseModuleFileExtensionMetadata(
return false;
}
-ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
+llvm::Error ASTReader::ReadExtensionBlock(ModuleFile &F) {
BitstreamCursor &Stream = F.Stream;
RecordData Record;
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
- if (!MaybeEntry) {
- Error(MaybeEntry.takeError());
- return Failure;
- }
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock:
- if (llvm::Error Err = Stream.SkipBlock()) {
- Error(std::move(Err));
- return Failure;
- }
+ if (llvm::Error Err = Stream.SkipBlock())
+ return Err;
continue;
-
case llvm::BitstreamEntry::EndBlock:
- return Success;
-
+ return llvm::Error::success();
case llvm::BitstreamEntry::Error:
- return HadErrors;
-
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed block record in AST file");
case llvm::BitstreamEntry::Record:
break;
}
@@ -4845,17 +4821,15 @@ ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
StringRef Blob;
Expected<unsigned> MaybeRecCode =
Stream.readRecord(Entry.ID, Record, &Blob);
- if (!MaybeRecCode) {
- Error(MaybeRecCode.takeError());
- return Failure;
- }
+ if (!MaybeRecCode)
+ return MaybeRecCode.takeError();
switch (MaybeRecCode.get()) {
case EXTENSION_METADATA: {
ModuleFileExtensionMetadata Metadata;
- if (parseModuleFileExtensionMetadata(Record, Blob, Metadata)) {
- Error("malformed EXTENSION_METADATA in AST file");
- return Failure;
- }
+ if (parseModuleFileExtensionMetadata(Record, Blob, Metadata))
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "malformed EXTENSION_METADATA in AST file");
// Find a module file extension with this block name.
auto Known = ModuleFileExtensions.find(Metadata.BlockName);
@@ -4872,7 +4846,7 @@ ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
}
}
- return Success;
+ return llvm::Error::success();
}
void ASTReader::InitializeContext() {
@@ -5452,13 +5426,11 @@ bool ASTReader::isAcceptableASTFile(StringRef Filename, FileManager &FileMgr,
/*ValidateDiagnosticOptions=*/true);
}
-ASTReader::ASTReadResult
-ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
+llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
+ unsigned ClientLoadCapabilities) {
// Enter the submodule block.
- if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID)) {
- Error(std::move(Err));
- return Failure;
- }
+ if (llvm::Error Err = F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID))
+ return Err;
ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
bool First = true;
@@ -5467,19 +5439,17 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
while (true) {
Expected<llvm::BitstreamEntry> MaybeEntry =
F.Stream.advanceSkippingSubblocks();
- if (!MaybeEntry) {
- Error(MaybeEntry.takeError());
- return Failure;
- }
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
case llvm::BitstreamEntry::Error:
- Error("malformed block record in AST file");
- return Failure;
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed block record in AST file");
case llvm::BitstreamEntry::EndBlock:
- return Success;
+ return llvm::Error::success();
case llvm::BitstreamEntry::Record:
// The interesting case.
break;
@@ -5489,16 +5459,14 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
StringRef Blob;
Record.clear();
Expected<unsigned> MaybeKind = F.Stream.readRecord(Entry.ID, Record, &Blob);
- if (!MaybeKind) {
- Error(MaybeKind.takeError());
- return Failure;
- }
+ if (!MaybeKind)
+ return MaybeKind.takeError();
unsigned Kind = MaybeKind.get();
- if ((Kind == SUBMODULE_METADATA) != First) {
- Error("submodule metadata record should be at beginning of block");
- return Failure;
- }
+ if ((Kind == SUBMODULE_METADATA) != First)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "submodule metadata record should be at beginning of block");
First = false;
// Submodule information is only valid if we have a current module.
@@ -5512,10 +5480,9 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case SUBMODULE_DEFINITION: {
- if (Record.size() < 12) {
- Error("malformed module definition");
- return Failure;
- }
+ if (Record.size() < 12)
+ return llvm::createStringError(std::errc::illegal_byte_sequence,
+ "malformed module definition");
StringRef Name = Blob;
unsigned Idx = 0;
@@ -5547,10 +5514,9 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
if (GlobalIndex >= SubmodulesLoaded.size() ||
- SubmodulesLoaded[GlobalIndex]) {
- Error("too many submodules");
- return Failure;
- }
+ SubmodulesLoaded[GlobalIndex])
+ return llvm::createStringError(std::errc::invalid_argument,
+ "too many submodules");
if (!ParentModule) {
if (const FileEntry *CurFile = CurrentModule->getASTFile()) {
@@ -5558,10 +5524,12 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
DisableValidationForModuleKind::Module) &&
CurFile != F.File) {
- Error(diag::err_module_file_conflict,
- CurrentModule->getTopLevelModuleName(), CurFile->getName(),
- F.File->getName());
- return Failure;
+ auto ConflictError =
+ PartialDiagnostic(diag::err_module_file_conflict,
+ ContextObj->DiagAllocator)
+ << CurrentModule->getTopLevelModuleName() << CurFile->getName()
+ << F.File->getName();
+ return DiagnosticError::create(CurrentImportLoc, ConflictError);
}
}
@@ -5605,17 +5573,20 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
}
case SUBMODULE_UMBRELLA_HEADER: {
+ // FIXME: This doesn't work for framework modules as `Filename` is the
+ // name as written in the module file and does not include
+ // `Headers/`, so this path will never exist.
std::string Filename = std::string(Blob);
ResolveImportedPath(F, Filename);
if (auto Umbrella = PP.getFileManager().getFile(Filename)) {
- if (!CurrentModule->getUmbrellaHeader())
+ if (!CurrentModule->getUmbrellaHeader()) {
// FIXME: NameAsWritten
ModMap.setUmbrellaHeader(CurrentModule, *Umbrella, Blob, "");
- else if (CurrentModule->getUmbrellaHeader().Entry != *Umbrella) {
- if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
- Error("mismatched umbrella headers in submodule");
- return OutOfDate;
}
+ // Note that it's too late at this point to return out of date if the
+ // name from the PCM doesn't match up with the one in the module map,
+ // but also quite unlikely since we will have already checked the
+ // modification time and size of the module map file itself.
}
break;
}
@@ -5639,16 +5610,13 @@ ASTReader::ReadSubmoduleBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
break;
case SUBMODULE_UMBRELLA_DIR: {
+ // See comments in SUBMODULE_UMBRELLA_HEADER
std::string Dirname = std::string(Blob);
ResolveImportedPath(F, Dirname);
if (auto Umbrella = PP.getFileManager().getDirectory(Dirname)) {
- if (!CurrentModule->getUmbrellaDir())
+ if (!CurrentModule->getUmbrellaDir()) {
// FIXME: NameAsWritten
ModMap.setUmbrellaDir(CurrentModule, *Umbrella, Blob, "");
- else if (CurrentModule->getUmbrellaDir().Entry != *Umbrella) {
- if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
- Error("mismatched umbrella directories in submodule");
- return OutOfDate;
}
}
break;
@@ -6984,6 +6952,9 @@ QualType ASTReader::GetType(TypeID ID) {
case PREDEF_TYPE_FLOAT128_ID:
T = Context.Float128Ty;
break;
+ case PREDEF_TYPE_IBM128_ID:
+ T = Context.Ibm128Ty;
+ break;
case PREDEF_TYPE_OVERLOAD_ID:
T = Context.OverloadTy;
break;
@@ -7730,24 +7701,17 @@ void ASTReader::StartTranslationUnit(ASTConsumer *Consumer) {
void ASTReader::PrintStats() {
std::fprintf(stderr, "*** AST File Statistics:\n");
- unsigned NumTypesLoaded
- = TypesLoaded.size() - std::count(TypesLoaded.begin(), TypesLoaded.end(),
- QualType());
- unsigned NumDeclsLoaded
- = DeclsLoaded.size() - std::count(DeclsLoaded.begin(), DeclsLoaded.end(),
- (Decl *)nullptr);
- unsigned NumIdentifiersLoaded
- = IdentifiersLoaded.size() - std::count(IdentifiersLoaded.begin(),
- IdentifiersLoaded.end(),
- (IdentifierInfo *)nullptr);
- unsigned NumMacrosLoaded
- = MacrosLoaded.size() - std::count(MacrosLoaded.begin(),
- MacrosLoaded.end(),
- (MacroInfo *)nullptr);
- unsigned NumSelectorsLoaded
- = SelectorsLoaded.size() - std::count(SelectorsLoaded.begin(),
- SelectorsLoaded.end(),
- Selector());
+ unsigned NumTypesLoaded =
+ TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
+ unsigned NumDeclsLoaded =
+ DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
+ unsigned NumIdentifiersLoaded =
+ IdentifiersLoaded.size() -
+ llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);
+ unsigned NumMacrosLoaded =
+ MacrosLoaded.size() - llvm::count(MacrosLoaded, (MacroInfo *)nullptr);
+ unsigned NumSelectorsLoaded =
+ SelectorsLoaded.size() - llvm::count(SelectorsLoaded, Selector());
if (unsigned TotalNumSLocEntries = getTotalNumSLocs())
std::fprintf(stderr, " %u/%u source location entries read (%f%%)\n",
@@ -8187,13 +8151,16 @@ namespace serialization {
if (Reader.DeserializationListener)
Reader.DeserializationListener->SelectorRead(Data.ID, Sel);
- InstanceMethods.append(Data.Instance.begin(), Data.Instance.end());
- FactoryMethods.append(Data.Factory.begin(), Data.Factory.end());
+ // Append methods in the reverse order, so that later we can process them
+ // in the order they appear in the source code by iterating through
+ // the vector in the reverse order.
+ InstanceMethods.append(Data.Instance.rbegin(), Data.Instance.rend());
+ FactoryMethods.append(Data.Factory.rbegin(), Data.Factory.rend());
InstanceBits = Data.InstanceBits;
FactoryBits = Data.FactoryBits;
InstanceHasMoreThanOneDecl = Data.InstanceHasMoreThanOneDecl;
FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl;
- return true;
+ return false;
}
/// Retrieve the instance methods found by this visitor.
@@ -8222,9 +8189,8 @@ namespace serialization {
/// Add the given set of methods to the method list.
static void addMethodsToPool(Sema &S, ArrayRef<ObjCMethodDecl *> Methods,
ObjCMethodList &List) {
- for (unsigned I = 0, N = Methods.size(); I != N; ++I) {
- S.addMethodToGlobalList(&List, Methods[I]);
- }
+ for (auto I = Methods.rbegin(), E = Methods.rend(); I != E; ++I)
+ S.addMethodToGlobalList(&List, *I);
}
void ASTReader::ReadMethodPool(Selector Sel) {
@@ -8249,8 +8215,9 @@ void ASTReader::ReadMethodPool(Selector Sel) {
return;
Sema &S = *getSema();
- Sema::GlobalMethodPool::iterator Pos
- = S.MethodPool.insert(std::make_pair(Sel, Sema::GlobalMethods())).first;
+ Sema::GlobalMethodPool::iterator Pos =
+ S.MethodPool.insert(std::make_pair(Sel, Sema::GlobalMethodPool::Lists()))
+ .first;
Pos->second.first.setBits(Visitor.getInstanceBits());
Pos->second.first.setHasMoreThanOneDecl(Visitor.instanceHasMoreThanOneDecl());
@@ -8456,6 +8423,8 @@ void ASTReader::ReadLateParsedTemplates(
LPTMap.insert(std::make_pair(FD, std::move(LT)));
}
}
+
+ LateParsedTemplates.clear();
}
void ASTReader::LoadSelector(Selector Sel) {
@@ -12002,6 +11971,12 @@ OMPClause *OMPClauseReader::readClause() {
case llvm::omp::OMPC_filter:
C = new (Context) OMPFilterClause();
break;
+ case llvm::omp::OMPC_bind:
+ C = OMPBindClause::CreateEmpty(Context);
+ break;
+ case llvm::omp::OMPC_align:
+ C = new (Context) OMPAlignClause();
+ break;
#define OMP_CLAUSE_NO_CLASS(Enum, Str) \
case llvm::omp::Enum: \
break;
@@ -12986,6 +12961,17 @@ void OMPClauseReader::VisitOMPFilterClause(OMPFilterClause *C) {
C->setLParenLoc(Record.readSourceLocation());
}
+void OMPClauseReader::VisitOMPBindClause(OMPBindClause *C) {
+ C->setBindKind(Record.readEnum<OpenMPBindClauseKind>());
+ C->setLParenLoc(Record.readSourceLocation());
+ C->setBindKindLoc(Record.readSourceLocation());
+}
+
+void OMPClauseReader::VisitOMPAlignClause(OMPAlignClause *C) {
+ C->setAlignment(Record.readExpr());
+ C->setLParenLoc(Record.readSourceLocation());
+}
+
OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() {
OMPTraitInfo &TI = getContext().getNewOMPTraitInfo();
TI.Sets.resize(readUInt32());
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index ff79f91e5db1..62a31f299d6b 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -332,7 +332,7 @@ namespace clang {
RedeclarableResult VisitTagDecl(TagDecl *TD);
void VisitEnumDecl(EnumDecl *ED);
RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD);
- void VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); }
+ void VisitRecordDecl(RecordDecl *RD);
RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D);
void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); }
RedeclarableResult VisitClassTemplateSpecializationDeclImpl(
@@ -808,6 +808,34 @@ ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) {
return Redecl;
}
+void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) {
+ VisitRecordDeclImpl(RD);
+
+ // Maintain the invariant of a redeclaration chain containing only
+ // a single definition.
+ if (RD->isCompleteDefinition()) {
+ RecordDecl *Canon = static_cast<RecordDecl *>(RD->getCanonicalDecl());
+ RecordDecl *&OldDef = Reader.RecordDefinitions[Canon];
+ if (!OldDef) {
+ // This is the first time we've seen an imported definition. Look for a
+ // local definition before deciding that we are the first definition.
+ for (auto *D : merged_redecls(Canon)) {
+ if (!D->isFromASTFile() && D->isCompleteDefinition()) {
+ OldDef = D;
+ break;
+ }
+ }
+ }
+ if (OldDef) {
+ Reader.MergedDeclContexts.insert(std::make_pair(RD, OldDef));
+ RD->setCompleteDefinition(false);
+ Reader.mergeDefinitionVisibility(OldDef, RD);
+ } else {
+ OldDef = RD;
+ }
+ }
+}
+
void ASTDeclReader::VisitValueDecl(ValueDecl *VD) {
VisitNamedDecl(VD);
// For function declarations, defer reading the type in case the function has
@@ -1149,6 +1177,13 @@ void ASTDeclReader::ReadObjCDefinitionData(
void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D,
struct ObjCInterfaceDecl::DefinitionData &&NewDD) {
+ struct ObjCInterfaceDecl::DefinitionData &DD = D->data();
+ if (DD.Definition != NewDD.Definition) {
+ Reader.MergedDeclContexts.insert(
+ std::make_pair(NewDD.Definition, DD.Definition));
+ Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition);
+ }
+
// FIXME: odr checking?
}
@@ -1215,6 +1250,13 @@ void ASTDeclReader::ReadObjCDefinitionData(
void ASTDeclReader::MergeDefinitionData(ObjCProtocolDecl *D,
struct ObjCProtocolDecl::DefinitionData &&NewDD) {
+ struct ObjCProtocolDecl::DefinitionData &DD = D->data();
+ if (DD.Definition != NewDD.Definition) {
+ Reader.MergedDeclContexts.insert(
+ std::make_pair(NewDD.Definition, DD.Definition));
+ Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition);
+ }
+
// FIXME: odr checking?
}
@@ -1736,7 +1778,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
Data.HasODRHash = true;
if (Record.readInt()) {
- Reader.DefinitionSource[D] =
+ Reader.DefinitionSource[D] =
Loc.F->Kind == ModuleKind::MK_MainFile ||
Reader.getContext().getLangOpts().BuildingPCHWithObjectFile;
}
@@ -2645,7 +2687,7 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) {
if (!ND)
return false;
// TODO: implement merge for other necessary decls.
- if (isa<EnumConstantDecl>(ND))
+ if (isa<EnumConstantDecl, FieldDecl, IndirectFieldDecl>(ND))
return true;
return false;
}
@@ -3315,10 +3357,16 @@ DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
return DD->Definition;
}
+ if (auto *RD = dyn_cast<RecordDecl>(DC))
+ return RD->getDefinition();
+
if (auto *ED = dyn_cast<EnumDecl>(DC))
return ED->getASTContext().getLangOpts().CPlusPlus? ED->getDefinition()
: nullptr;
+ if (auto *OID = dyn_cast<ObjCInterfaceDecl>(DC))
+ return OID->getDefinition();
+
// We can see the TU here only if we have no Sema object. In that case,
// there's no TU scope to look in, so using the DC alone is sufficient.
if (auto *TU = dyn_cast<TranslationUnitDecl>(DC))
@@ -3398,6 +3446,9 @@ ASTDeclReader::getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC) {
if (auto *MD = dyn_cast<ObjCMethodDecl>(D))
if (MD->isThisDeclarationADefinition())
return MD;
+ if (auto *RD = dyn_cast<RecordDecl>(D))
+ if (RD->isThisDeclarationADefinition())
+ return RD;
}
// No merged definition yet.
@@ -3817,7 +3868,7 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
Expected<unsigned> MaybeDeclCode = Record.readRecord(DeclsCursor, Code);
if (!MaybeDeclCode)
llvm::report_fatal_error(
- "ASTReader::readDeclRecord failed reading decl code: " +
+ Twine("ASTReader::readDeclRecord failed reading decl code: ") +
toString(MaybeDeclCode.takeError()));
switch ((DeclCode)MaybeDeclCode.get()) {
case DECL_CONTEXT_LEXICAL:
@@ -4202,12 +4253,12 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
if (llvm::Error JumpFailed = Cursor.JumpToBit(Offset))
// FIXME don't do a fatal error.
llvm::report_fatal_error(
- "ASTReader::loadDeclUpdateRecords failed jumping: " +
+ Twine("ASTReader::loadDeclUpdateRecords failed jumping: ") +
toString(std::move(JumpFailed)));
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode)
llvm::report_fatal_error(
- "ASTReader::loadDeclUpdateRecords failed reading code: " +
+ Twine("ASTReader::loadDeclUpdateRecords failed reading code: ") +
toString(MaybeCode.takeError()));
unsigned Code = MaybeCode.get();
ASTRecordReader Record(*this, *F);
@@ -4216,7 +4267,7 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
"Expected DECL_UPDATES record!");
else
llvm::report_fatal_error(
- "ASTReader::loadDeclUpdateRecords failed reading rec code: " +
+ Twine("ASTReader::loadDeclUpdateRecords failed reading rec code: ") +
toString(MaybeCode.takeError()));
ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID,
@@ -4283,14 +4334,14 @@ void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error JumpFailed = Cursor.JumpToBit(LocalOffset))
llvm::report_fatal_error(
- "ASTReader::loadPendingDeclChain failed jumping: " +
+ Twine("ASTReader::loadPendingDeclChain failed jumping: ") +
toString(std::move(JumpFailed)));
RecordData Record;
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode)
llvm::report_fatal_error(
- "ASTReader::loadPendingDeclChain failed reading code: " +
+ Twine("ASTReader::loadPendingDeclChain failed reading code: ") +
toString(MaybeCode.takeError()));
unsigned Code = MaybeCode.get();
if (Expected<unsigned> MaybeRecCode = Cursor.readRecord(Code, Record))
@@ -4298,7 +4349,7 @@ void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
"expected LOCAL_REDECLARATIONS record!");
else
llvm::report_fatal_error(
- "ASTReader::loadPendingDeclChain failed reading rec code: " +
+ Twine("ASTReader::loadPendingDeclChain failed reading rec code: ") +
toString(MaybeCode.takeError()));
// FIXME: We have several different dispatches on decl kind here; maybe
@@ -4706,9 +4757,10 @@ void ASTDeclReader::UpdateDecl(Decl *D,
auto AllocatorKind =
static_cast<OMPAllocateDeclAttr::AllocatorTypeTy>(Record.readInt());
Expr *Allocator = Record.readExpr();
+ Expr *Alignment = Record.readExpr();
SourceRange SR = readSourceRange();
D->addAttr(OMPAllocateDeclAttr::CreateImplicit(
- Reader.getContext(), AllocatorKind, Allocator, SR,
+ Reader.getContext(), AllocatorKind, Allocator, Alignment, SR,
AttributeCommonInfo::AS_Pragma));
break;
}
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index b100f946f558..b82a334b763c 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -213,11 +213,11 @@ void ASTStmtReader::VisitAttributedStmt(AttributedStmt *S) {
void ASTStmtReader::VisitIfStmt(IfStmt *S) {
VisitStmt(S);
- S->setConstexpr(Record.readInt());
bool HasElse = Record.readInt();
bool HasVar = Record.readInt();
bool HasInit = Record.readInt();
+ S->setStatementKind(static_cast<IfStatementKind>(Record.readInt()));
S->setCond(Record.readSubExpr());
S->setThen(Record.readSubStmt());
if (HasElse)
@@ -2307,6 +2307,13 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
VisitOMPLoopBasedDirective(D);
}
+void ASTStmtReader::VisitOMPMetaDirective(OMPMetaDirective *D) {
+ VisitStmt(D);
+ // The NumClauses field was read in ReadStmtFromStream.
+ Record.skipInts(1);
+ VisitOMPExecutableDirective(D);
+}
+
void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) {
VisitStmt(D);
VisitOMPExecutableDirective(D);
@@ -2317,12 +2324,18 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
VisitOMPLoopDirective(D);
}
-void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
+void ASTStmtReader::VisitOMPLoopTransformationDirective(
+ OMPLoopTransformationDirective *D) {
VisitOMPLoopBasedDirective(D);
+ D->setNumGeneratedLoops(Record.readUInt32());
+}
+
+void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
}
void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
- VisitOMPLoopBasedDirective(D);
+ VisitOMPLoopTransformationDirective(D);
}
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
@@ -2403,6 +2416,8 @@ void ASTStmtReader::VisitOMPBarrierDirective(OMPBarrierDirective *D) {
void ASTStmtReader::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
VisitStmt(D);
+ // The NumClauses field was read in ReadStmtFromStream.
+ Record.skipInts(1);
VisitOMPExecutableDirective(D);
}
@@ -2619,6 +2634,10 @@ void ASTStmtReader::VisitOMPMaskedDirective(OMPMaskedDirective *D) {
VisitOMPExecutableDirective(D);
}
+void ASTStmtReader::VisitOMPGenericLoopDirective(OMPGenericLoopDirective *D) {
+ VisitOMPLoopDirective(D);
+}
+
//===----------------------------------------------------------------------===//
// ASTReader Implementation
//===----------------------------------------------------------------------===//
@@ -2746,9 +2765,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
case STMT_IF:
S = IfStmt::CreateEmpty(
Context,
- /* HasElse=*/Record[ASTStmtReader::NumStmtFields + 1],
- /* HasVar=*/Record[ASTStmtReader::NumStmtFields + 2],
- /* HasInit=*/Record[ASTStmtReader::NumStmtFields + 3]);
+ /* HasElse=*/Record[ASTStmtReader::NumStmtFields],
+ /* HasVar=*/Record[ASTStmtReader::NumStmtFields + 1],
+ /* HasInit=*/Record[ASTStmtReader::NumStmtFields + 2]);
break;
case STMT_SWITCH:
@@ -3183,6 +3202,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = OMPCanonicalLoop::createEmpty(Context);
break;
+ case STMT_OMP_META_DIRECTIVE:
+ S = OMPMetaDirective::CreateEmpty(
+ Context, Record[ASTStmtReader::NumStmtFields], Empty);
+ break;
+
case STMT_OMP_PARALLEL_DIRECTIVE:
S =
OMPParallelDirective::CreateEmpty(Context,
@@ -3291,7 +3315,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
break;
case STMT_OMP_TASKWAIT_DIRECTIVE:
- S = OMPTaskwaitDirective::CreateEmpty(Context, Empty);
+ S = OMPTaskwaitDirective::CreateEmpty(
+ Context, Record[ASTStmtReader::NumStmtFields], Empty);
break;
case STMT_OMP_TASKGROUP_DIRECTIVE:
@@ -3560,6 +3585,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
Context, Record[ASTStmtReader::NumStmtFields], Empty);
break;
+ case STMT_OMP_GENERIC_LOOP_DIRECTIVE: {
+ unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields];
+ unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+ S = OMPGenericLoopDirective::CreateEmpty(Context, NumClauses,
+ CollapsedNum, Empty);
+ break;
+ }
+
case EXPR_CXX_OPERATOR_CALL:
S = CXXOperatorCallExpr::CreateEmpty(
Context, /*NumArgs=*/Record[ASTStmtReader::NumExprFields],
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 66c207ad9243..a1972f5c6496 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -132,6 +132,18 @@ static StringRef bytes(const SmallVectorImpl<T> &v) {
sizeof(T) * v.size());
}
+static std::string bytes(const std::vector<bool> &V) {
+ std::string Str;
+ Str.reserve(V.size() / 8);
+ for (unsigned I = 0, E = V.size(); I < E;) {
+ char Byte = 0;
+ for (unsigned Bit = 0; Bit < 8 && I < E; ++Bit, ++I)
+ Byte |= V[I] << Bit;
+ Str += Byte;
+ }
+ return Str;
+}
+
//===----------------------------------------------------------------------===//
// Type serialization
//===----------------------------------------------------------------------===//
@@ -149,6 +161,59 @@ static TypeCode getTypeCodeForTypeClass(Type::TypeClass id) {
namespace {
+std::set<const FileEntry *> GetAllModuleMaps(const HeaderSearch &HS,
+ Module *RootModule) {
+ std::set<const FileEntry *> ModuleMaps{};
+ std::set<Module *> ProcessedModules;
+ SmallVector<Module *> ModulesToProcess{RootModule};
+
+ SmallVector<const FileEntry *, 16> FilesByUID;
+ HS.getFileMgr().GetUniqueIDMapping(FilesByUID);
+
+ if (FilesByUID.size() > HS.header_file_size())
+ FilesByUID.resize(HS.header_file_size());
+
+ for (unsigned UID = 0, LastUID = FilesByUID.size(); UID != LastUID; ++UID) {
+ const FileEntry *File = FilesByUID[UID];
+ if (!File)
+ continue;
+
+ const HeaderFileInfo *HFI =
+ HS.getExistingFileInfo(File, /*WantExternal*/ false);
+ if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
+ continue;
+
+ for (const auto &KH : HS.findAllModulesForHeader(File)) {
+ if (!KH.getModule())
+ continue;
+ ModulesToProcess.push_back(KH.getModule());
+ }
+ }
+
+ while (!ModulesToProcess.empty()) {
+ auto *CurrentModule = ModulesToProcess.pop_back_val();
+ ProcessedModules.insert(CurrentModule);
+
+ auto *ModuleMapFile =
+ HS.getModuleMap().getModuleMapFileForUniquing(CurrentModule);
+ if (!ModuleMapFile) {
+ continue;
+ }
+
+ ModuleMaps.insert(ModuleMapFile);
+
+ for (auto *ImportedModule : (CurrentModule)->Imports) {
+ if (!ImportedModule ||
+ ProcessedModules.find(ImportedModule) != ProcessedModules.end()) {
+ continue;
+ }
+ ModulesToProcess.push_back(ImportedModule);
+ }
+ }
+
+ return ModuleMaps;
+}
+
class ASTTypeWriter {
ASTWriter &Writer;
ASTWriter::RecordData Record;
@@ -1050,6 +1115,8 @@ ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
ASTContext &Context) {
+ using namespace llvm;
+
// Flush first to prepare the PCM hash (signature).
Stream.FlushToWord();
auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3;
@@ -1093,10 +1160,24 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
// Note: we don't serialize the log or serialization file names, because they
// are generally transient files and will almost always be overridden.
Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record);
+ Record.clear();
// Write out the diagnostic/pragma mappings.
WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule);
+ // Header search entry usage.
+ auto HSEntryUsage = PP.getHeaderSearchInfo().computeUserEntryUsage();
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_ENTRY_USAGE));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of bits.
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Bit vector.
+ unsigned HSUsageAbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
+ {
+ RecordData::value_type Record[] = {HEADER_SEARCH_ENTRY_USAGE,
+ HSEntryUsage.size()};
+ Stream.EmitRecordWithBlob(HSUsageAbbrevCode, Record, bytes(HSEntryUsage));
+ }
+
// Leave the options block.
Stream.ExitBlock();
return Signature;
@@ -1396,9 +1477,15 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
Stream.EmitRecordWithBlob(AbbrevCode, Record, origDir);
}
+ std::set<const FileEntry *> AffectingModuleMaps;
+ if (WritingModule) {
+ AffectingModuleMaps =
+ GetAllModuleMaps(PP.getHeaderSearchInfo(), WritingModule);
+ }
+
WriteInputFiles(Context.SourceMgr,
PP.getHeaderSearchInfo().getHeaderSearchOpts(),
- PP.getLangOpts().Modules);
+ AffectingModuleMaps);
Stream.ExitBlock();
}
@@ -1416,9 +1503,9 @@ struct InputFileEntry {
} // namespace
-void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
- HeaderSearchOptions &HSOpts,
- bool Modules) {
+void ASTWriter::WriteInputFiles(
+ SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
+ std::set<const FileEntry *> &AffectingModuleMaps) {
using namespace llvm;
Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
@@ -1458,6 +1545,16 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
if (!Cache->OrigEntry)
continue;
+ if (isModuleMap(File.getFileCharacteristic()) &&
+ !isSystem(File.getFileCharacteristic()) &&
+ !AffectingModuleMaps.empty() &&
+ AffectingModuleMaps.find(Cache->OrigEntry) ==
+ AffectingModuleMaps.end()) {
+ SkippedModuleMaps.insert(Cache->OrigEntry);
+ // Do not emit modulemaps that do not affect current module.
+ continue;
+ }
+
InputFileEntry Entry;
Entry.File = Cache->OrigEntry;
Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
@@ -1971,11 +2068,17 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
Record.push_back(SLoc->getOffset() - 2);
if (SLoc->isFile()) {
const SrcMgr::FileInfo &File = SLoc->getFile();
+ const SrcMgr::ContentCache *Content = &File.getContentCache();
+ if (Content->OrigEntry && !SkippedModuleMaps.empty() &&
+ SkippedModuleMaps.find(Content->OrigEntry) !=
+ SkippedModuleMaps.end()) {
+ // Do not emit files that were not listed as inputs.
+ continue;
+ }
AddSourceLocation(File.getIncludeLoc(), Record);
Record.push_back(File.getFileCharacteristic()); // FIXME: stable encoding
Record.push_back(File.hasLineDirectives());
- const SrcMgr::ContentCache *Content = &File.getContentCache();
bool EmitBlob = false;
if (Content->OrigEntry) {
assert(Content->OrigEntry == Content->ContentsEntry &&
@@ -3017,11 +3120,11 @@ public:
unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts
for (const ObjCMethodList *Method = &Methods.Instance; Method;
Method = Method->getNext())
- if (Method->getMethod())
+ if (ShouldWriteMethodListNode(Method))
DataLen += 4;
for (const ObjCMethodList *Method = &Methods.Factory; Method;
Method = Method->getNext())
- if (Method->getMethod())
+ if (ShouldWriteMethodListNode(Method))
DataLen += 4;
return emitULEBKeyDataLength(KeyLen, DataLen, Out);
}
@@ -3052,13 +3155,13 @@ public:
unsigned NumInstanceMethods = 0;
for (const ObjCMethodList *Method = &Methods.Instance; Method;
Method = Method->getNext())
- if (Method->getMethod())
+ if (ShouldWriteMethodListNode(Method))
++NumInstanceMethods;
unsigned NumFactoryMethods = 0;
for (const ObjCMethodList *Method = &Methods.Factory; Method;
Method = Method->getNext())
- if (Method->getMethod())
+ if (ShouldWriteMethodListNode(Method))
++NumFactoryMethods;
unsigned InstanceBits = Methods.Instance.getBits();
@@ -3079,15 +3182,20 @@ public:
LE.write<uint16_t>(FullFactoryBits);
for (const ObjCMethodList *Method = &Methods.Instance; Method;
Method = Method->getNext())
- if (Method->getMethod())
+ if (ShouldWriteMethodListNode(Method))
LE.write<uint32_t>(Writer.getDeclID(Method->getMethod()));
for (const ObjCMethodList *Method = &Methods.Factory; Method;
Method = Method->getNext())
- if (Method->getMethod())
+ if (ShouldWriteMethodListNode(Method))
LE.write<uint32_t>(Writer.getDeclID(Method->getMethod()));
assert(Out.tell() - Start == DataLen && "Data length is wrong");
}
+
+private:
+ static bool ShouldWriteMethodListNode(const ObjCMethodList *Node) {
+ return (Node->getMethod() && !Node->getMethod()->isFromASTFile());
+ }
};
} // namespace
@@ -3130,15 +3238,21 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) {
if (Chain && ID < FirstSelectorID) {
// Selector already exists. Did it change?
bool changed = false;
- for (ObjCMethodList *M = &Data.Instance;
- !changed && M && M->getMethod(); M = M->getNext()) {
- if (!M->getMethod()->isFromASTFile())
+ for (ObjCMethodList *M = &Data.Instance; M && M->getMethod();
+ M = M->getNext()) {
+ if (!M->getMethod()->isFromASTFile()) {
changed = true;
+ Data.Instance = *M;
+ break;
+ }
}
- for (ObjCMethodList *M = &Data.Factory; !changed && M && M->getMethod();
+ for (ObjCMethodList *M = &Data.Factory; M && M->getMethod();
M = M->getNext()) {
- if (!M->getMethod()->isFromASTFile())
+ if (!M->getMethod()->isFromASTFile()) {
changed = true;
+ Data.Factory = *M;
+ break;
+ }
}
if (!changed)
continue;
@@ -3390,11 +3504,9 @@ public:
// Only emit declarations that aren't from a chained PCH, though.
SmallVector<NamedDecl *, 16> Decls(IdResolver.begin(II),
IdResolver.end());
- for (SmallVectorImpl<NamedDecl *>::reverse_iterator D = Decls.rbegin(),
- DEnd = Decls.rend();
- D != DEnd; ++D)
+ for (NamedDecl *D : llvm::reverse(Decls))
LE.write<uint32_t>(
- Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), *D)));
+ Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), D)));
}
}
};
@@ -4987,6 +5099,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
auto *A = D->getAttr<OMPAllocateDeclAttr>();
Record.push_back(A->getAllocatorType());
Record.AddStmt(A->getAllocator());
+ Record.AddStmt(A->getAlignment());
Record.AddSourceRange(A->getRange());
break;
}
@@ -6191,6 +6304,11 @@ void OMPClauseWriter::VisitOMPFilterClause(OMPFilterClause *C) {
Record.AddSourceLocation(C->getLParenLoc());
}
+void OMPClauseWriter::VisitOMPAlignClause(OMPAlignClause *C) {
+ Record.AddStmt(C->getAlignment());
+ Record.AddSourceLocation(C->getLParenLoc());
+}
+
void OMPClauseWriter::VisitOMPPrivateClause(OMPPrivateClause *C) {
Record.push_back(C->varlist_size());
Record.AddSourceLocation(C->getLParenLoc());
@@ -6693,6 +6811,12 @@ void OMPClauseWriter::VisitOMPAffinityClause(OMPAffinityClause *C) {
Record.AddStmt(E);
}
+void OMPClauseWriter::VisitOMPBindClause(OMPBindClause *C) {
+ Record.writeEnum(C->getBindKind());
+ Record.AddSourceLocation(C->getLParenLoc());
+ Record.AddSourceLocation(C->getBindKindLoc());
+}
+
void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) {
writeUInt32(TI->Sets.size());
for (const auto &Set : TI->Sets) {
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index e9315f67d553..06cb60823db6 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2260,7 +2260,7 @@ void ASTWriter::WriteDeclAbbrevs() {
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Defaulted
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ExplicitlyDefaulted
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ImplicitReturnZero
- Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Constexpr
+ Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // Constexpr
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // UsesSEHTry
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // SkippedBody
Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // MultiVersion
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 2bb5e4f3563d..2d92dec76dc9 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -138,11 +138,10 @@ void ASTStmtWriter::VisitIfStmt(IfStmt *S) {
bool HasVar = S->getConditionVariableDeclStmt() != nullptr;
bool HasInit = S->getInit() != nullptr;
- Record.push_back(S->isConstexpr());
Record.push_back(HasElse);
Record.push_back(HasVar);
Record.push_back(HasInit);
-
+ Record.push_back(static_cast<uint64_t>(S->getStatementKind()));
Record.AddStmt(S->getCond());
Record.AddStmt(S->getThen());
if (HasElse)
@@ -1483,8 +1482,8 @@ void ASTStmtWriter::VisitObjCAtTryStmt(ObjCAtTryStmt *S) {
Record.push_back(S->getNumCatchStmts());
Record.push_back(S->getFinallyStmt() != nullptr);
Record.AddStmt(S->getTryBody());
- for (unsigned I = 0, N = S->getNumCatchStmts(); I != N; ++I)
- Record.AddStmt(S->getCatchStmt(I));
+ for (ObjCAtCatchStmt *C : S->catch_stmts())
+ Record.AddStmt(C);
if (S->getFinallyStmt())
Record.AddStmt(S->getFinallyStmt());
Record.AddSourceLocation(S->getAtTryLoc());
@@ -2205,6 +2204,13 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
VisitOMPLoopBasedDirective(D);
}
+void ASTStmtWriter::VisitOMPMetaDirective(OMPMetaDirective *D) {
+ VisitStmt(D);
+ Record.push_back(D->getNumClauses());
+ VisitOMPExecutableDirective(D);
+ Code = serialization::STMT_OMP_META_DIRECTIVE;
+}
+
void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) {
VisitStmt(D);
VisitOMPExecutableDirective(D);
@@ -2217,13 +2223,19 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) {
Code = serialization::STMT_OMP_SIMD_DIRECTIVE;
}
-void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
+void ASTStmtWriter::VisitOMPLoopTransformationDirective(
+ OMPLoopTransformationDirective *D) {
VisitOMPLoopBasedDirective(D);
+ Record.writeUInt32(D->getNumGeneratedLoops());
+}
+
+void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
+ VisitOMPLoopTransformationDirective(D);
Code = serialization::STMT_OMP_TILE_DIRECTIVE;
}
void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
- VisitOMPLoopBasedDirective(D);
+ VisitOMPLoopTransformationDirective(D);
Code = serialization::STMT_OMP_UNROLL_DIRECTIVE;
}
@@ -2368,6 +2380,7 @@ void ASTStmtWriter::VisitOMPBarrierDirective(OMPBarrierDirective *D) {
void ASTStmtWriter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) {
VisitStmt(D);
+ Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
Code = serialization::STMT_OMP_TASKWAIT_DIRECTIVE;
}
@@ -2577,6 +2590,11 @@ void ASTStmtWriter::VisitOMPMaskedDirective(OMPMaskedDirective *D) {
Code = serialization::STMT_OMP_MASKED_DIRECTIVE;
}
+void ASTStmtWriter::VisitOMPGenericLoopDirective(OMPGenericLoopDirective *D) {
+ VisitOMPLoopDirective(D);
+ Code = serialization::STMT_OMP_GENERIC_LOOP_DIRECTIVE;
+}
+
//===----------------------------------------------------------------------===//
// ASTWriter Implementation
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index d869796b82c1..6ec5c42e8b82 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -50,7 +50,8 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
Module *Module = nullptr;
if (PP.getLangOpts().isCompilingModule()) {
Module = PP.getHeaderSearchInfo().lookupModule(
- PP.getLangOpts().CurrentModule, /*AllowSearch*/ false);
+ PP.getLangOpts().CurrentModule, SourceLocation(),
+ /*AllowSearch*/ false);
if (!Module) {
assert(hasErrors && "emitting module but current module doesn't exist");
return;
diff --git a/clang/lib/Serialization/ModuleFileExtension.cpp b/clang/lib/Serialization/ModuleFileExtension.cpp
index 6b7fd1d54340..95fff41e0d7a 100644
--- a/clang/lib/Serialization/ModuleFileExtension.cpp
+++ b/clang/lib/Serialization/ModuleFileExtension.cpp
@@ -11,12 +11,10 @@ using namespace clang;
char ModuleFileExtension::ID = 0;
-ModuleFileExtension::~ModuleFileExtension() { }
+ModuleFileExtension::~ModuleFileExtension() {}
-llvm::hash_code ModuleFileExtension::hashExtension(llvm::hash_code Code) const {
- return Code;
-}
+void ModuleFileExtension::hashExtension(ExtensionHashBuilder &HBuilder) const {}
-ModuleFileExtensionWriter::~ModuleFileExtensionWriter() { }
+ModuleFileExtensionWriter::~ModuleFileExtensionWriter() {}
-ModuleFileExtensionReader::~ModuleFileExtensionReader() { }
+ModuleFileExtensionReader::~ModuleFileExtensionReader() {}
diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp
index 40ffa6cfee8f..f4882c7be3f7 100644
--- a/clang/lib/Serialization/ModuleManager.cpp
+++ b/clang/lib/Serialization/ModuleManager.cpp
@@ -270,8 +270,7 @@ void ModuleManager::removeModules(ModuleIterator First, ModuleMap *modMap) {
I->Imports.remove_if(IsVictim);
I->ImportedBy.remove_if(IsVictim);
}
- Roots.erase(std::remove_if(Roots.begin(), Roots.end(), IsVictim),
- Roots.end());
+ llvm::erase_if(Roots, IsVictim);
// Remove the modules from the PCH chain.
for (auto I = First; I != Last; ++I) {
@@ -384,16 +383,14 @@ void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
// For any module that this module depends on, push it on the
// stack (if it hasn't already been marked as visited).
- for (auto M = CurrentModule->Imports.rbegin(),
- MEnd = CurrentModule->Imports.rend();
- M != MEnd; ++M) {
+ for (ModuleFile *M : llvm::reverse(CurrentModule->Imports)) {
// Remove our current module as an impediment to visiting the
// module we depend on. If we were the last unvisited module
// that depends on this particular module, push it into the
// queue to be visited.
- unsigned &NumUnusedEdges = UnusedIncomingEdges[(*M)->Index];
+ unsigned &NumUnusedEdges = UnusedIncomingEdges[M->Index];
if (NumUnusedEdges && (--NumUnusedEdges == 0))
- Queue.push_back(*M);
+ Queue.push_back(M);
}
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
index c06604b6cffe..6154eeb3419c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
@@ -93,11 +93,10 @@ void AnalyzerStatsChecker::checkEndAnalysis(ExplodedGraph &G,
if (!Loc.isValid())
return;
- if (isa<FunctionDecl>(D) || isa<ObjCMethodDecl>(D)) {
+ if (isa<FunctionDecl, ObjCMethodDecl>(D)) {
const NamedDecl *ND = cast<NamedDecl>(D);
output << *ND;
- }
- else if (isa<BlockDecl>(D)) {
+ } else if (isa<BlockDecl>(D)) {
output << "block(line:" << Loc.getLine() << ":col:" << Loc.getColumn();
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index a86a410ebcbc..2c210fb6cdb9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
@@ -20,9 +19,11 @@
#include "clang/AST/StmtObjC.h"
#include "clang/Analysis/DomainSpecific/CocoaConventions.h"
#include "clang/Analysis/SelectorExtras.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
@@ -533,10 +534,12 @@ void CFNumberChecker::checkPreStmt(const CallExpr *CE,
namespace {
class CFRetainReleaseChecker : public Checker<check::PreCall> {
mutable APIMisuse BT{this, "null passed to CF memory management function"};
- CallDescription CFRetain{"CFRetain", 1},
- CFRelease{"CFRelease", 1},
- CFMakeCollectable{"CFMakeCollectable", 1},
- CFAutorelease{"CFAutorelease", 1};
+ const CallDescriptionSet ModelledCalls = {
+ {"CFRetain", 1},
+ {"CFRelease", 1},
+ {"CFMakeCollectable", 1},
+ {"CFAutorelease", 1},
+ };
public:
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
@@ -550,8 +553,7 @@ void CFRetainReleaseChecker::checkPreCall(const CallEvent &Call,
return;
// Check if we called CFRetain/CFRelease/CFMakeCollectable/CFAutorelease.
- if (!(Call.isCalled(CFRetain) || Call.isCalled(CFRelease) ||
- Call.isCalled(CFMakeCollectable) || Call.isCalled(CFAutorelease)))
+ if (!ModelledCalls.contains(Call))
return;
// Get the argument's value.
diff --git a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
index 2752b37f9b3f..8416ab39e194 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BlockInCriticalSectionChecker.cpp
@@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -96,14 +97,7 @@ void BlockInCriticalSectionChecker::initIdentifierInfo(ASTContext &Ctx) const {
}
bool BlockInCriticalSectionChecker::isBlockingFunction(const CallEvent &Call) const {
- if (Call.isCalled(SleepFn)
- || Call.isCalled(GetcFn)
- || Call.isCalled(FgetsFn)
- || Call.isCalled(ReadFn)
- || Call.isCalled(RecvFn)) {
- return true;
- }
- return false;
+ return matchesAny(Call, SleepFn, GetcFn, FgetsFn, ReadFn, RecvFn);
}
bool BlockInCriticalSectionChecker::isLockFunction(const CallEvent &Call) const {
@@ -113,15 +107,8 @@ bool BlockInCriticalSectionChecker::isLockFunction(const CallEvent &Call) const
return true;
}
- if (Call.isCalled(LockFn)
- || Call.isCalled(PthreadLockFn)
- || Call.isCalled(PthreadTryLockFn)
- || Call.isCalled(MtxLock)
- || Call.isCalled(MtxTimedLock)
- || Call.isCalled(MtxTryLock)) {
- return true;
- }
- return false;
+ return matchesAny(Call, LockFn, PthreadLockFn, PthreadTryLockFn, MtxLock,
+ MtxTimedLock, MtxTryLock);
}
bool BlockInCriticalSectionChecker::isUnlockFunction(const CallEvent &Call) const {
@@ -132,12 +119,7 @@ bool BlockInCriticalSectionChecker::isUnlockFunction(const CallEvent &Call) cons
return true;
}
- if (Call.isCalled(UnlockFn)
- || Call.isCalled(PthreadUnlockFn)
- || Call.isCalled(MtxUnlock)) {
- return true;
- }
- return false;
+ return matchesAny(Call, UnlockFn, PthreadUnlockFn, MtxUnlock);
}
void BlockInCriticalSectionChecker::checkPostCall(const CallEvent &Call,
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 69b90be9aa7e..475cee9ce04b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
@@ -2271,11 +2272,10 @@ CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
if (!FD)
return nullptr;
- if (Call.isCalled(StdCopy)) {
+ if (StdCopy.matches(Call))
return &CStringChecker::evalStdCopy;
- } else if (Call.isCalled(StdCopyBackward)) {
+ if (StdCopyBackward.matches(Call))
return &CStringChecker::evalStdCopyBackward;
- }
// Pro-actively check that argument types are safe to do arithmetic upon.
// We do not want to crash if someone accidentally passes a structure
diff --git a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
index 131c1345af99..4235c0c13821 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
@@ -20,6 +20,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp
index 175dfcef0df4..a13de306eac8 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCInstMethSignature.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a CheckObjCInstMethSignature, a flow-insenstive check
+// This file defines a CheckObjCInstMethSignature, a flow-insensitive check
// that determines if an Objective-C class interface incorrectly redefines
// the method signature in a subclass.
//
diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
index d06c87631bfb..61ff5e59f06d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
@@ -785,9 +785,8 @@ void WalkAST::checkDeprecatedOrUnsafeBufferHandling(const CallExpr *CE,
// real flow analysis.
auto FormatString =
dyn_cast<StringLiteral>(CE->getArg(ArgIndex)->IgnoreParenImpCasts());
- if (FormatString &&
- FormatString->getString().find("%s") == StringRef::npos &&
- FormatString->getString().find("%[") == StringRef::npos)
+ if (FormatString && !FormatString->getString().contains("%s") &&
+ !FormatString->getString().contains("%["))
BoundsProvided = true;
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
index fd53c04f4bbf..ce8d6c879870 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
@@ -14,6 +14,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
@@ -63,11 +64,11 @@ private:
} // end anonymous namespace
bool ChrootChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
- if (Call.isCalled(Chroot)) {
+ if (Chroot.matches(Call)) {
evalChroot(Call, C);
return true;
}
- if (Call.isCalled(Chdir)) {
+ if (Chdir.matches(Call)) {
evalChdir(Call, C);
return true;
}
@@ -115,7 +116,7 @@ void ChrootChecker::evalChdir(const CallEvent &Call, CheckerContext &C) const {
void ChrootChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
// Ignore chroot and chdir.
- if (Call.isCalled(Chroot) || Call.isCalled(Chdir))
+ if (matchesAny(Call, Chroot, Chdir))
return;
// If jail state is ROOT_CHANGED, generate BugReport.
diff --git a/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp
index 1a7f0d5ab74c..77a3218f55fb 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
@@ -71,42 +72,27 @@ public:
SVal) const;
CallDescriptionMap<NoItParamFn> NoIterParamFunctions = {
- {{0, "clear", 0},
- &ContainerModeling::handleClear},
- {{0, "assign", 2},
- &ContainerModeling::handleAssign},
- {{0, "push_back", 1},
- &ContainerModeling::handlePushBack},
- {{0, "emplace_back", 1},
- &ContainerModeling::handlePushBack},
- {{0, "pop_back", 0},
- &ContainerModeling::handlePopBack},
- {{0, "push_front", 1},
- &ContainerModeling::handlePushFront},
- {{0, "emplace_front", 1},
- &ContainerModeling::handlePushFront},
- {{0, "pop_front", 0},
- &ContainerModeling::handlePopFront},
+ {{"clear", 0}, &ContainerModeling::handleClear},
+ {{"assign", 2}, &ContainerModeling::handleAssign},
+ {{"push_back", 1}, &ContainerModeling::handlePushBack},
+ {{"emplace_back", 1}, &ContainerModeling::handlePushBack},
+ {{"pop_back", 0}, &ContainerModeling::handlePopBack},
+ {{"push_front", 1}, &ContainerModeling::handlePushFront},
+ {{"emplace_front", 1}, &ContainerModeling::handlePushFront},
+ {{"pop_front", 0}, &ContainerModeling::handlePopFront},
};
-
+
CallDescriptionMap<OneItParamFn> OneIterParamFunctions = {
- {{0, "insert", 2},
- &ContainerModeling::handleInsert},
- {{0, "emplace", 2},
- &ContainerModeling::handleInsert},
- {{0, "erase", 1},
- &ContainerModeling::handleErase},
- {{0, "erase_after", 1},
- &ContainerModeling::handleEraseAfter},
+ {{"insert", 2}, &ContainerModeling::handleInsert},
+ {{"emplace", 2}, &ContainerModeling::handleInsert},
+ {{"erase", 1}, &ContainerModeling::handleErase},
+ {{"erase_after", 1}, &ContainerModeling::handleEraseAfter},
};
-
+
CallDescriptionMap<TwoItParamFn> TwoIterParamFunctions = {
- {{0, "erase", 2},
- &ContainerModeling::handleErase},
- {{0, "erase_after", 2},
- &ContainerModeling::handleEraseAfter},
+ {{"erase", 2}, &ContainerModeling::handleErase},
+ {{"erase_after", 2}, &ContainerModeling::handleEraseAfter},
};
-
};
bool isBeginCall(const FunctionDecl *Func);
diff --git a/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp
index 4216a6883119..8da482a2aec9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ConversionChecker.cpp
@@ -49,7 +49,8 @@ private:
bool isLossOfSign(const ImplicitCastExpr *Cast, CheckerContext &C) const;
- void reportBug(ExplodedNode *N, CheckerContext &C, const char Msg[]) const;
+ void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C,
+ const char Msg[]) const;
};
}
@@ -108,20 +109,21 @@ void ConversionChecker::checkPreStmt(const ImplicitCastExpr *Cast,
if (!N)
return;
if (LossOfSign)
- reportBug(N, C, "Loss of sign in implicit conversion");
+ reportBug(N, Cast, C, "Loss of sign in implicit conversion");
if (LossOfPrecision)
- reportBug(N, C, "Loss of precision in implicit conversion");
+ reportBug(N, Cast, C, "Loss of precision in implicit conversion");
}
}
-void ConversionChecker::reportBug(ExplodedNode *N, CheckerContext &C,
- const char Msg[]) const {
+void ConversionChecker::reportBug(ExplodedNode *N, const Expr *E,
+ CheckerContext &C, const char Msg[]) const {
if (!BT)
BT.reset(
new BuiltinBug(this, "Conversion", "Possible loss of sign/precision."));
// Generate a report for this bug.
auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+ bugreporter::trackExpressionValue(N, E, *R);
C.emitReport(std::move(R));
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp
index 6fed999ffc80..47fd57c7db9b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DebugContainerModeling.cpp
@@ -13,6 +13,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -40,10 +41,10 @@ class DebugContainerModeling
CheckerContext &) const;
CallDescriptionMap<FnCheck> Callbacks = {
- {{0, "clang_analyzer_container_begin", 1},
- &DebugContainerModeling::analyzerContainerBegin},
- {{0, "clang_analyzer_container_end", 1},
- &DebugContainerModeling::analyzerContainerEnd},
+ {{"clang_analyzer_container_begin", 1},
+ &DebugContainerModeling::analyzerContainerBegin},
+ {{"clang_analyzer_container_end", 1},
+ &DebugContainerModeling::analyzerContainerEnd},
};
public:
diff --git a/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp
index 5833eea56da8..6add9a007a87 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DebugIteratorModeling.cpp
@@ -13,6 +13,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -41,12 +42,12 @@ class DebugIteratorModeling
CheckerContext &) const;
CallDescriptionMap<FnCheck> Callbacks = {
- {{0, "clang_analyzer_iterator_position", 1},
- &DebugIteratorModeling::analyzerIteratorPosition},
- {{0, "clang_analyzer_iterator_container", 1},
- &DebugIteratorModeling::analyzerIteratorContainer},
- {{0, "clang_analyzer_iterator_validity", 1},
- &DebugIteratorModeling::analyzerIteratorValidity},
+ {{"clang_analyzer_iterator_position", 1},
+ &DebugIteratorModeling::analyzerIteratorPosition},
+ {{"clang_analyzer_iterator_container", 1},
+ &DebugIteratorModeling::analyzerIteratorContainer},
+ {{"clang_analyzer_iterator_validity", 1},
+ &DebugIteratorModeling::analyzerIteratorValidity},
};
public:
diff --git a/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp b/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
index df88b71ff063..49486ea796c2 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
@@ -44,8 +44,8 @@ static bool DefaultMethodFilter(const ObjCMethodDecl *M) {
M->getMethodFamily() == OMF_dealloc ||
M->getMethodFamily() == OMF_copy ||
M->getMethodFamily() == OMF_mutableCopy ||
- M->getSelector().getNameForSlot(0).find("init") != StringRef::npos ||
- M->getSelector().getNameForSlot(0).find("Init") != StringRef::npos;
+ M->getSelector().getNameForSlot(0).contains("init") ||
+ M->getSelector().getNameForSlot(0).contains("Init");
}
class DirectIvarAssignment :
diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 14ba5d769969..b07f59125a82 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -384,7 +384,7 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
// FIXME: Instead of relying on the ParentMap, we should have the
// trigger-statement (InitListExpr in this case) available in this
// callback, ideally as part of CallEvent.
- if (dyn_cast_or_null<InitListExpr>(
+ if (isa_and_nonnull<InitListExpr>(
LCtx->getParentMap().getParent(Ctor->getOriginExpr())))
return;
diff --git a/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp
index 0e94b915a468..e5088fb266bc 100644
--- a/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/EnumCastOutOfRangeChecker.cpp
@@ -94,10 +94,10 @@ void EnumCastOutOfRangeChecker::checkPreStmt(const CastExpr *CE,
// Only perform enum range check on casts where such checks are valid. For
// all other cast kinds (where enum range checks are unnecessary or invalid),
- // just return immediately. TODO: The set of casts whitelisted for enum
- // range checking may be incomplete. Better to add a missing cast kind to
- // enable a missing check than to generate false negatives and have to remove
- // those later.
+ // just return immediately. TODO: The set of casts allowed for enum range
+ // checking may be incomplete. Better to add a missing cast kind to enable a
+ // missing check than to generate false negatives and have to remove those
+ // later.
switch (CE->getCastKind()) {
case CK_IntegralCast:
break;
diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 42c777eb2c52..66ef781871ec 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -260,7 +260,7 @@ private:
}
bool isDestinationArgument(unsigned ArgNum) const {
- return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
+ return llvm::is_contained(DstArgs, ArgNum);
}
static bool isTaintedOrPointsToTainted(const Expr *E,
@@ -435,7 +435,6 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
.Case("getch", {{}, {ReturnValueIndex}})
.Case("getchar", {{}, {ReturnValueIndex}})
.Case("getchar_unlocked", {{}, {ReturnValueIndex}})
- .Case("getenv", {{}, {ReturnValueIndex}})
.Case("gets", {{}, {0, ReturnValueIndex}})
.Case("scanf", {{}, {}, VariadicType::Dst, 1})
.Case("socket", {{},
@@ -468,6 +467,16 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
if (!Rule.isNull())
return Rule;
+
+ // `getenv` returns taint only in untrusted environments.
+ if (FData.FullName == "getenv") {
+ if (C.getAnalysisManager()
+ .getAnalyzerOptions()
+ .ShouldAssumeControlledEnvironment)
+ return {};
+ return {{}, {ReturnValueIndex}};
+ }
+
assert(FData.FDecl);
// Check if it's one of the memory setting/copying functions.
@@ -505,7 +514,7 @@ GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
if (OneOf("snprintf"))
return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
if (OneOf("sprintf"))
- return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
+ return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 2};
if (OneOf("strcpy", "stpcpy", "strcat"))
return {{1}, {0, ReturnValueIndex}};
if (OneOf("bcopy"))
@@ -780,7 +789,7 @@ bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
// variable named stdin with the proper type.
if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
D = D->getCanonicalDecl();
- if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
+ if (D->getName().contains("stdin") && D->isExternC()) {
const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
C.getASTContext().getFILEType().getCanonicalType())
@@ -807,7 +816,7 @@ static bool getPrintfFormatArgumentNum(const CallEvent &Call,
}
// Or if a function is named setproctitle (this is a heuristic).
- if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
+ if (C.getCalleeName(FDecl).contains("setproctitle")) {
ArgNum = 0;
return true;
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp
index bcae73378028..6f9867b9607d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/InnerPointerChecker.cpp
@@ -13,11 +13,12 @@
//===----------------------------------------------------------------------===//
#include "AllocationState.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "InterCheckerAPI.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -125,19 +126,15 @@ bool InnerPointerChecker::isInvalidatingMemberFunction(
return true;
return false;
}
- return (isa<CXXDestructorCall>(Call) || Call.isCalled(AppendFn) ||
- Call.isCalled(AssignFn) || Call.isCalled(ClearFn) ||
- Call.isCalled(EraseFn) || Call.isCalled(InsertFn) ||
- Call.isCalled(PopBackFn) || Call.isCalled(PushBackFn) ||
- Call.isCalled(ReplaceFn) || Call.isCalled(ReserveFn) ||
- Call.isCalled(ResizeFn) || Call.isCalled(ShrinkToFitFn) ||
- Call.isCalled(SwapFn));
+ return isa<CXXDestructorCall>(Call) ||
+ matchesAny(Call, AppendFn, AssignFn, ClearFn, EraseFn, InsertFn,
+ PopBackFn, PushBackFn, ReplaceFn, ReserveFn, ResizeFn,
+ ShrinkToFitFn, SwapFn);
}
bool InnerPointerChecker::isInnerPointerAccessFunction(
const CallEvent &Call) const {
- return (Call.isCalled(CStrFn) || Call.isCalled(DataFn) ||
- Call.isCalled(DataMemberFn));
+ return matchesAny(Call, CStrFn, DataFn, DataMemberFn);
}
void InnerPointerChecker::markPtrSymbolsReleased(const CallEvent &Call,
@@ -184,7 +181,7 @@ void InnerPointerChecker::checkFunctionArguments(const CallEvent &Call,
// std::addressof function accepts a non-const reference as an argument,
// but doesn't modify it.
- if (Call.isCalled(AddressofFn))
+ if (AddressofFn.matches(Call))
continue;
markPtrSymbolsReleased(Call, State, ArgRegion, C);
diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
index ab5e6a1c9991..235c9010412a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
@@ -64,10 +64,11 @@
// making an assumption e.g. `S1 + n == S2 + m` we store `S1 - S2 == m - n` as
// a constraint which we later retrieve when doing an actual comparison.
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/DeclTemplate.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
index a47484497771..c682449921ac 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
@@ -14,10 +14,10 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
-
#include "Iterator.h"
using namespace clang;
diff --git a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
index 28d3e058fee2..b57c5dc6de56 100644
--- a/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
@@ -949,7 +949,7 @@ void NonLocalizedStringChecker::checkPostCall(const CallEvent &Call,
const IdentifierInfo *Identifier = Call.getCalleeIdentifier();
SVal sv = Call.getReturnValue();
- if (isAnnotatedAsReturningLocalized(D) || LSF.count(Identifier) != 0) {
+ if (isAnnotatedAsReturningLocalized(D) || LSF.contains(Identifier)) {
setLocalizedState(sv, C);
} else if (isNSStringType(RT, C.getASTContext()) &&
!hasLocalizedState(sv, C)) {
@@ -1339,7 +1339,10 @@ bool PluralMisuseChecker::MethodCrawler::EndVisitIfStmt(IfStmt *I) {
}
bool PluralMisuseChecker::MethodCrawler::VisitIfStmt(const IfStmt *I) {
- const Expr *Condition = I->getCond()->IgnoreParenImpCasts();
+ const Expr *Condition = I->getCond();
+ if (!Condition)
+ return true;
+ Condition = Condition->IgnoreParenImpCasts();
if (isCheckingPlurality(Condition)) {
MatchingStatements.push_back(I);
InMatchingStatement = true;
diff --git a/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp
index b72d72580c28..5bf96acc0462 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp
@@ -27,6 +27,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -180,7 +181,7 @@ static bool isInMIGCall(CheckerContext &C) {
}
void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
- if (Call.isCalled(OsRefRetain)) {
+ if (OsRefRetain.matches(Call)) {
// If the code is doing reference counting over the parameter,
// it opens up an opportunity for safely calling a destructor function.
// TODO: We should still check for over-releases.
@@ -198,7 +199,7 @@ void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
auto I = llvm::find_if(Deallocators,
[&](const std::pair<CallDescription, unsigned> &Item) {
- return Call.isCalled(Item.first);
+ return Item.first.matches(Call);
});
if (I == Deallocators.end())
return;
diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index a157ee2da5df..635eb00e4ca9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -160,7 +160,7 @@ static bool isEnclosingFunctionParam(const Expr *E) {
E = E->IgnoreParenCasts();
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
const ValueDecl *VD = DRE->getDecl();
- if (isa<ImplicitParamDecl>(VD) || isa<ParmVarDecl>(VD))
+ if (isa<ImplicitParamDecl, ParmVarDecl>(VD))
return true;
}
return false;
@@ -199,8 +199,7 @@ unsigned MacOSKeychainAPIChecker::getTrackedFunctionIndex(StringRef Name,
static bool isBadDeallocationArgument(const MemRegion *Arg) {
if (!Arg)
return false;
- return isa<AllocaRegion>(Arg) || isa<BlockDataRegion>(Arg) ||
- isa<TypedRegion>(Arg);
+ return isa<AllocaRegion, BlockDataRegion, TypedRegion>(Arg);
}
/// Given the address expression, retrieve the value it's pointing to. Assume
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index a6470da09c45..10ed6149528c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -48,9 +48,13 @@
#include "InterCheckerAPI.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ParentMap.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/ProgramPoint.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
@@ -60,20 +64,26 @@
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include <climits>
#include <functional>
#include <utility>
@@ -298,6 +308,8 @@ public:
/// which might free a pointer are annotated.
DefaultBool ShouldIncludeOwnershipAnnotatedFunctions;
+ DefaultBool ShouldRegisterNoOwnershipChangeVisitor;
+
/// Many checkers are essentially built into this one, so enabling them will
/// make MallocChecker perform additional modeling and reporting.
enum CheckKind {
@@ -722,11 +734,169 @@ private:
bool isArgZERO_SIZE_PTR(ProgramStateRef State, CheckerContext &C,
SVal ArgVal) const;
};
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Definition of NoOwnershipChangeVisitor.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class NoOwnershipChangeVisitor final : public NoStateChangeFuncVisitor {
+ SymbolRef Sym;
+ using OwnerSet = llvm::SmallPtrSet<const MemRegion *, 8>;
+
+ // Collect which entities point to the allocated memory, and could be
+ // responsible for deallocating it.
+ class OwnershipBindingsHandler : public StoreManager::BindingsHandler {
+ SymbolRef Sym;
+ OwnerSet &Owners;
+
+ public:
+ OwnershipBindingsHandler(SymbolRef Sym, OwnerSet &Owners)
+ : Sym(Sym), Owners(Owners) {}
+
+ bool HandleBinding(StoreManager &SMgr, Store Store, const MemRegion *Region,
+ SVal Val) override {
+ if (Val.getAsSymbol() == Sym)
+ Owners.insert(Region);
+ return true;
+ }
+
+ LLVM_DUMP_METHOD void dump() const { dumpToStream(llvm::errs()); }
+ LLVM_DUMP_METHOD void dumpToStream(llvm::raw_ostream &out) const {
+ out << "Owners: {\n";
+ for (const MemRegion *Owner : Owners) {
+ out << " ";
+ Owner->dumpToStream(out);
+ out << ",\n";
+ }
+ out << "}\n";
+ }
+ };
+
+protected:
+ OwnerSet getOwnersAtNode(const ExplodedNode *N) {
+ OwnerSet Ret;
+
+ ProgramStateRef State = N->getState();
+ OwnershipBindingsHandler Handler{Sym, Ret};
+ State->getStateManager().getStoreManager().iterBindings(State->getStore(),
+ Handler);
+ return Ret;
+ }
+
+ LLVM_DUMP_METHOD static std::string
+ getFunctionName(const ExplodedNode *CallEnterN) {
+ if (const CallExpr *CE = llvm::dyn_cast_or_null<CallExpr>(
+ CallEnterN->getLocationAs<CallEnter>()->getCallExpr()))
+ if (const FunctionDecl *FD = CE->getDirectCallee())
+ return FD->getQualifiedNameAsString();
+ return "";
+ }
+
+ bool doesFnIntendToHandleOwnership(const Decl *Callee, ASTContext &ACtx) {
+ using namespace clang::ast_matchers;
+ const FunctionDecl *FD = dyn_cast<FunctionDecl>(Callee);
+ if (!FD)
+ return false;
+ // TODO: Operator delete is hardly the only deallocator -- Can we reuse
+ // isFreeingCall() or something thats already here?
+ auto Deallocations = match(
+ stmt(hasDescendant(cxxDeleteExpr().bind("delete"))
+ ), *FD->getBody(), ACtx);
+ // TODO: Ownership my change with an attempt to store the allocated memory.
+ return !Deallocations.empty();
+ }
+
+ virtual bool
+ wasModifiedInFunction(const ExplodedNode *CallEnterN,
+ const ExplodedNode *CallExitEndN) override {
+ if (!doesFnIntendToHandleOwnership(
+ CallExitEndN->getFirstPred()->getLocationContext()->getDecl(),
+ CallExitEndN->getState()->getAnalysisManager().getASTContext()))
+ return true;
+
+ if (CallEnterN->getState()->get<RegionState>(Sym) !=
+ CallExitEndN->getState()->get<RegionState>(Sym))
+ return true;
+
+ OwnerSet CurrOwners = getOwnersAtNode(CallEnterN);
+ OwnerSet ExitOwners = getOwnersAtNode(CallExitEndN);
+
+ // Owners in the current set may be purged from the analyzer later on.
+ // If a variable is dead (is not referenced directly or indirectly after
+ // some point), it will be removed from the Store before the end of its
+ // actual lifetime.
+ // This means that that if the ownership status didn't change, CurrOwners
+ // must be a superset of, but not necessarily equal to ExitOwners.
+ return !llvm::set_is_subset(ExitOwners, CurrOwners);
+ }
+
+ static PathDiagnosticPieceRef emitNote(const ExplodedNode *N) {
+ PathDiagnosticLocation L = PathDiagnosticLocation::create(
+ N->getLocation(),
+ N->getState()->getStateManager().getContext().getSourceManager());
+ return std::make_shared<PathDiagnosticEventPiece>(
+ L, "Returning without deallocating memory or storing the pointer for "
+ "later deallocation");
+ }
+
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R,
+ const ObjCMethodCall &Call,
+ const ExplodedNode *N) override {
+ // TODO: Implement.
+ return nullptr;
+ }
+
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForCXXThis(PathSensitiveBugReport &R,
+ const CXXConstructorCall &Call,
+ const ExplodedNode *N) override {
+ // TODO: Implement.
+ return nullptr;
+ }
+
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call,
+ const ExplodedNode *N) override {
+ // TODO: Factor the logic of "what constitutes as an entity being passed
+ // into a function call" out by reusing the code in
+ // NoStoreFuncVisitor::maybeEmitNoteForParameters, maybe by incorporating
+ // the printing technology in UninitializedObject's FieldChainInfo.
+ ArrayRef<ParmVarDecl *> Parameters = Call.parameters();
+ for (unsigned I = 0; I < Call.getNumArgs() && I < Parameters.size(); ++I) {
+ SVal V = Call.getArgSVal(I);
+ if (V.getAsSymbol() == Sym)
+ return emitNote(N);
+ }
+ return nullptr;
+ }
+
+public:
+ NoOwnershipChangeVisitor(SymbolRef Sym)
+ : NoStateChangeFuncVisitor(bugreporter::TrackingKind::Thorough),
+ Sym(Sym) {}
+
+ void Profile(llvm::FoldingSetNodeID &ID) const override {
+ static int Tag = 0;
+ ID.AddPointer(&Tag);
+ ID.AddPointer(Sym);
+ }
+
+ void *getTag() const {
+ static int Tag = 0;
+ return static_cast<void *>(&Tag);
+ }
+};
+
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Definition of MallocBugVisitor.
//===----------------------------------------------------------------------===//
+namespace {
/// The bug visitor which allows us to print extra diagnostics along the
/// BugReport path. For example, showing the allocation site of the leaked
/// region.
@@ -767,7 +937,7 @@ public:
/// Did not track -> allocated. Other state (released) -> allocated.
static inline bool isAllocated(const RefState *RSCurr, const RefState *RSPrev,
const Stmt *Stmt) {
- return (Stmt && (isa<CallExpr>(Stmt) || isa<CXXNewExpr>(Stmt)) &&
+ return (isa_and_nonnull<CallExpr, CXXNewExpr>(Stmt) &&
(RSCurr &&
(RSCurr->isAllocated() || RSCurr->isAllocatedOfSizeZero())) &&
(!RSPrev ||
@@ -780,8 +950,7 @@ public:
const Stmt *Stmt) {
bool IsReleased =
(RSCurr && RSCurr->isReleased()) && (!RSPrev || !RSPrev->isReleased());
- assert(!IsReleased ||
- (Stmt && (isa<CallExpr>(Stmt) || isa<CXXDeleteExpr>(Stmt))) ||
+ assert(!IsReleased || (isa_and_nonnull<CallExpr, CXXDeleteExpr>(Stmt)) ||
(!Stmt && RSCurr->getAllocationFamily() == AF_InnerBuffer));
return IsReleased;
}
@@ -789,11 +958,10 @@ public:
/// Did not track -> relinquished. Other state (allocated) -> relinquished.
static inline bool isRelinquished(const RefState *RSCurr,
const RefState *RSPrev, const Stmt *Stmt) {
- return (Stmt &&
- (isa<CallExpr>(Stmt) || isa<ObjCMessageExpr>(Stmt) ||
- isa<ObjCPropertyRefExpr>(Stmt)) &&
- (RSCurr && RSCurr->isRelinquished()) &&
- (!RSPrev || !RSPrev->isRelinquished()));
+ return (
+ isa_and_nonnull<CallExpr, ObjCMessageExpr, ObjCPropertyRefExpr>(Stmt) &&
+ (RSCurr && RSCurr->isRelinquished()) &&
+ (!RSPrev || !RSPrev->isRelinquished()));
}
/// If the expression is not a call, and the state change is
@@ -803,7 +971,7 @@ public:
static inline bool hasReallocFailed(const RefState *RSCurr,
const RefState *RSPrev,
const Stmt *Stmt) {
- return ((!Stmt || !isa<CallExpr>(Stmt)) &&
+ return ((!isa_and_nonnull<CallExpr>(Stmt)) &&
(RSCurr &&
(RSCurr->isAllocated() || RSCurr->isAllocatedOfSizeZero())) &&
(RSPrev &&
@@ -851,7 +1019,6 @@ private:
}
};
};
-
} // end anonymous namespace
// A map from the freed symbol to the symbol representing the return value of
@@ -1753,7 +1920,7 @@ ProgramStateRef MallocChecker::FreeMemAux(
// Parameters, locals, statics, globals, and memory returned by
// __builtin_alloca() shouldn't be freed.
- if (!(isa<UnknownSpaceRegion>(MS) || isa<HeapSpaceRegion>(MS))) {
+ if (!isa<UnknownSpaceRegion, HeapSpaceRegion>(MS)) {
// FIXME: at the time this code was written, malloc() regions were
// represented by conjured symbols, which are all in UnknownSpaceRegion.
// This means that there isn't actually anything from HeapSpaceRegion
@@ -2303,7 +2470,8 @@ void MallocChecker::HandleUseZeroAlloc(CheckerContext &C, SourceRange Range,
categories::MemoryError));
auto R = std::make_unique<PathSensitiveBugReport>(
- *BT_UseZerroAllocated[*CheckKind], "Use of zero-allocated memory", N);
+ *BT_UseZerroAllocated[*CheckKind],
+ "Use of memory allocated with size zero", N);
R->addRange(Range);
if (Sym) {
@@ -2579,6 +2747,8 @@ void MallocChecker::HandleLeak(SymbolRef Sym, ExplodedNode *N,
AllocNode->getLocationContext()->getDecl());
R->markInteresting(Sym);
R->addVisitor<MallocBugVisitor>(Sym, true);
+ if (ShouldRegisterNoOwnershipChangeVisitor)
+ R->addVisitor<NoOwnershipChangeVisitor>(Sym);
C.emitReport(std::move(R));
}
@@ -2733,7 +2903,7 @@ void MallocChecker::checkEscapeOnReturn(const ReturnStmt *S,
// the callee could still free the memory.
// TODO: This logic should be a part of generic symbol escape callback.
if (const MemRegion *MR = RetVal.getAsRegion())
- if (isa<FieldRegion>(MR) || isa<ElementRegion>(MR))
+ if (isa<FieldRegion, ElementRegion>(MR))
if (const SymbolicRegion *BMR =
dyn_cast<SymbolicRegion>(MR->getBaseRegion()))
Sym = BMR->getSymbol();
@@ -2916,7 +3086,7 @@ bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly(
// TODO: If we want to be more optimistic here, we'll need to make sure that
// regions escape to C++ containers. They seem to do that even now, but for
// mysterious reasons.
- if (!(isa<SimpleFunctionCall>(Call) || isa<ObjCMethodCall>(Call)))
+ if (!isa<SimpleFunctionCall, ObjCMethodCall>(Call))
return true;
// Check Objective-C messages by selector name.
@@ -3024,7 +3194,7 @@ bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly(
const Expr *ArgE = Call->getArgExpr(0)->IgnoreParenCasts();
if (const DeclRefExpr *ArgDRE = dyn_cast<DeclRefExpr>(ArgE))
if (const VarDecl *D = dyn_cast<VarDecl>(ArgDRE->getDecl()))
- if (D->getCanonicalDecl()->getName().find("std") != StringRef::npos)
+ if (D->getCanonicalDecl()->getName().contains("std"))
return true;
}
}
@@ -3395,6 +3565,9 @@ void ento::registerDynamicMemoryModeling(CheckerManager &mgr) {
auto *checker = mgr.registerChecker<MallocChecker>();
checker->ShouldIncludeOwnershipAnnotatedFunctions =
mgr.getAnalyzerOptions().getCheckerBooleanOption(checker, "Optimistic");
+ checker->ShouldRegisterNoOwnershipChangeVisitor =
+ mgr.getAnalyzerOptions().getCheckerBooleanOption(
+ checker, "AddNoOwnershipChangeNotes");
}
bool ento::shouldRegisterDynamicMemoryModeling(const CheckerManager &mgr) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
index e31630f63b5a..a6e8fcd425d5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
@@ -32,12 +32,14 @@ using llvm::APSInt;
namespace {
struct MallocOverflowCheck {
+ const CallExpr *call;
const BinaryOperator *mulop;
const Expr *variable;
APSInt maxVal;
- MallocOverflowCheck(const BinaryOperator *m, const Expr *v, APSInt val)
- : mulop(m), variable(v), maxVal(std::move(val)) {}
+ MallocOverflowCheck(const CallExpr *call, const BinaryOperator *m,
+ const Expr *v, APSInt val)
+ : call(call), mulop(m), variable(v), maxVal(std::move(val)) {}
};
class MallocOverflowSecurityChecker : public Checker<check::ASTCodeBody> {
@@ -46,8 +48,8 @@ public:
BugReporter &BR) const;
void CheckMallocArgument(
- SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows,
- const Expr *TheArgument, ASTContext &Context) const;
+ SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows,
+ const CallExpr *TheCall, ASTContext &Context) const;
void OutputPossibleOverflows(
SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows,
@@ -62,16 +64,15 @@ static inline bool EvaluatesToZero(APSInt &Val, BinaryOperatorKind op) {
}
void MallocOverflowSecurityChecker::CheckMallocArgument(
- SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows,
- const Expr *TheArgument,
- ASTContext &Context) const {
+ SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows,
+ const CallExpr *TheCall, ASTContext &Context) const {
/* Look for a linear combination with a single variable, and at least
one multiplication.
Reject anything that applies to the variable: an explicit cast,
conditional expression, an operation that could reduce the range
of the result, or anything too complicated :-). */
- const Expr *e = TheArgument;
+ const Expr *e = TheCall->getArg(0);
const BinaryOperator * mulop = nullptr;
APSInt maxVal;
@@ -101,8 +102,7 @@ void MallocOverflowSecurityChecker::CheckMallocArgument(
e = rhs;
} else
return;
- }
- else if (isa<DeclRefExpr>(e) || isa<MemberExpr>(e))
+ } else if (isa<DeclRefExpr, MemberExpr>(e))
break;
else
return;
@@ -115,9 +115,8 @@ void MallocOverflowSecurityChecker::CheckMallocArgument(
// the data so when the body of the function is completely available
// we can check for comparisons.
- // TODO: Could push this into the innermost scope where 'e' is
- // defined, rather than the whole function.
- PossibleMallocOverflows.push_back(MallocOverflowCheck(mulop, e, maxVal));
+ PossibleMallocOverflows.push_back(
+ MallocOverflowCheck(TheCall, mulop, e, maxVal));
}
namespace {
@@ -153,17 +152,19 @@ private:
return getDecl(CheckDR) == getDecl(DR) && Pred(Check);
return false;
};
- toScanFor.erase(std::remove_if(toScanFor.begin(), toScanFor.end(), P),
- toScanFor.end());
+ llvm::erase_if(toScanFor, P);
}
void CheckExpr(const Expr *E_p) {
- auto PredTrue = [](const MallocOverflowCheck &) { return true; };
const Expr *E = E_p->IgnoreParenImpCasts();
+ const auto PrecedesMalloc = [E, this](const MallocOverflowCheck &c) {
+ return Context.getSourceManager().isBeforeInTranslationUnit(
+ E->getExprLoc(), c.call->getExprLoc());
+ };
if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
- Erase<DeclRefExpr>(DR, PredTrue);
+ Erase<DeclRefExpr>(DR, PrecedesMalloc);
else if (const auto *ME = dyn_cast<MemberExpr>(E)) {
- Erase<MemberExpr>(ME, PredTrue);
+ Erase<MemberExpr>(ME, PrecedesMalloc);
}
}
@@ -322,7 +323,7 @@ void MallocOverflowSecurityChecker::checkASTCodeBody(const Decl *D,
if (FnInfo->isStr ("malloc") || FnInfo->isStr ("_MALLOC")) {
if (TheCall->getNumArgs() == 1)
- CheckMallocArgument(PossibleMallocOverflows, TheCall->getArg(0),
+ CheckMallocArgument(PossibleMallocOverflows, TheCall,
mgr.getASTContext());
}
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp
index 5d63d6efd234..517a5d78271b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MmapWriteExecChecker.cpp
@@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -46,7 +47,7 @@ int MmapWriteExecChecker::ProtRead = 0x01;
void MmapWriteExecChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
- if (Call.isCalled(MmapFn) || Call.isCalled(MprotectFn)) {
+ if (matchesAny(Call, MmapFn, MprotectFn)) {
SVal ProtVal = Call.getArgSVal(2);
Optional<nonloc::ConcreteInt> ProtLoc = ProtVal.getAs<nonloc::ConcreteInt>();
int64_t Prot = ProtLoc->getValue().getSExtValue();
diff --git a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp
index cbe938982000..4a232c6f4b3f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp
@@ -553,8 +553,8 @@ MoveChecker::classifyObject(const MemRegion *MR,
// For the purposes of this checker, we classify move-safe STL types
// as not-"STL" types, because that's how the checker treats them.
MR = unwrapRValueReferenceIndirection(MR);
- bool IsLocal =
- MR && isa<VarRegion>(MR) && isa<StackSpaceRegion>(MR->getMemorySpace());
+ bool IsLocal = isa_and_nonnull<VarRegion>(MR) &&
+ isa<StackSpaceRegion>(MR->getMemorySpace());
if (!RD || !RD->getDeclContext()->isStdNamespace())
return { IsLocal, SK_NonStd };
@@ -712,12 +712,9 @@ ProgramStateRef MoveChecker::checkRegionChanges(
// directly, but not all of them end up being invalidated.
// But when they do, they appear in the InvalidatedRegions array as well.
for (const auto *Region : RequestedRegions) {
- if (ThisRegion != Region) {
- if (llvm::find(InvalidatedRegions, Region) !=
- std::end(InvalidatedRegions)) {
- State = removeFromState(State, Region);
- }
- }
+ if (ThisRegion != Region &&
+ llvm::is_contained(InvalidatedRegions, Region))
+ State = removeFromState(State, Region);
}
} else {
// For invalidations that aren't caused by calls, assume nothing. In
diff --git a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
index 90c5583d8969..dcca8be55e33 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a CheckNSError, a flow-insenstive check
+// This file defines a CheckNSError, a flow-insensitive check
// that determines if an Objective-C class interface correctly returns
// a non-void return type.
//
diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
index ee71b55a39e6..f4e9a67438e7 100644
--- a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
@@ -21,6 +21,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -681,9 +682,7 @@ ProgramStateRef PthreadLockChecker::checkRegionChanges(
// We assume that system library function wouldn't touch the mutex unless
// it takes the mutex explicitly as an argument.
// FIXME: This is a bit quadratic.
- if (IsLibraryFunction &&
- std::find(ExplicitRegions.begin(), ExplicitRegions.end(), R) ==
- ExplicitRegions.end())
+ if (IsLibraryFunction && !llvm::is_contained(ExplicitRegions, R))
continue;
State = State->remove<LockMap>(R);
diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp
index 3f3267ff9391..0bde088d0e85 100644
--- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp
@@ -290,7 +290,7 @@ void RetainCountChecker::checkPostStmt(const ObjCIvarRefExpr *IRE,
ProgramStateRef State = C.getState();
SymbolRef Sym = State->getSVal(*IVarLoc).getAsSymbol();
- if (!Sym || !dyn_cast_or_null<ObjCIvarRegion>(Sym->getOriginRegion()))
+ if (!Sym || !isa_and_nonnull<ObjCIvarRegion>(Sym->getOriginRegion()))
return;
// Accessing an ivar directly is unusual. If we've done that, be more
@@ -1188,14 +1188,14 @@ ProgramStateRef RetainCountChecker::checkRegionChanges(
if (!invalidated)
return state;
- llvm::SmallPtrSet<SymbolRef, 8> WhitelistedSymbols;
+ llvm::SmallPtrSet<SymbolRef, 8> AllowedSymbols;
for (const MemRegion *I : ExplicitRegions)
if (const SymbolicRegion *SR = I->StripCasts()->getAs<SymbolicRegion>())
- WhitelistedSymbols.insert(SR->getSymbol());
+ AllowedSymbols.insert(SR->getSymbol());
for (SymbolRef sym : *invalidated) {
- if (WhitelistedSymbols.count(sym))
+ if (AllowedSymbols.count(sym))
continue;
// Remove any existing reference-count binding.
state = removeRefBinding(state, sym);
diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp
index 64ac6bc4c06b..41ef45d317cd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp
@@ -73,11 +73,8 @@ RefCountBug::RefCountBug(CheckerNameRef Checker, RefCountBugKind BT)
static bool isNumericLiteralExpression(const Expr *E) {
// FIXME: This set of cases was copied from SemaExprObjC.
- return isa<IntegerLiteral>(E) ||
- isa<CharacterLiteral>(E) ||
- isa<FloatingLiteral>(E) ||
- isa<ObjCBoolLiteralExpr>(E) ||
- isa<CXXBoolLiteralExpr>(E);
+ return isa<IntegerLiteral, CharacterLiteral, FloatingLiteral,
+ ObjCBoolLiteralExpr, CXXBoolLiteralExpr>(E);
}
/// If type represents a pointer to CXXRecordDecl,
diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
index 885750218b9e..c4dc06d4a077 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
@@ -79,16 +80,44 @@ void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS,
"Returned pointer value points outside the original object "
"(potential buffer overflow)"));
- // FIXME: It would be nice to eventually make this diagnostic more clear,
- // e.g., by referencing the original declaration or by saying *why* this
- // reference is outside the range.
-
// Generate a report for this bug.
- auto report =
+ auto Report =
std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N);
-
- report->addRange(RetE->getSourceRange());
- C.emitReport(std::move(report));
+ Report->addRange(RetE->getSourceRange());
+
+ const auto ConcreteElementCount = ElementCount.getAs<nonloc::ConcreteInt>();
+ const auto ConcreteIdx = Idx.getAs<nonloc::ConcreteInt>();
+
+ const auto *DeclR = ER->getSuperRegion()->getAs<DeclRegion>();
+
+ if (DeclR)
+ Report->addNote("Original object declared here",
+ {DeclR->getDecl(), C.getSourceManager()});
+
+ if (ConcreteElementCount) {
+ SmallString<128> SBuf;
+ llvm::raw_svector_ostream OS(SBuf);
+ OS << "Original object ";
+ if (DeclR) {
+ OS << "'";
+ DeclR->getDecl()->printName(OS);
+ OS << "' ";
+ }
+ OS << "is an array of " << ConcreteElementCount->getValue() << " '";
+ ER->getValueType().print(OS,
+ PrintingPolicy(C.getASTContext().getLangOpts()));
+ OS << "' objects";
+ if (ConcreteIdx) {
+ OS << ", returned pointer points at index " << ConcreteIdx->getValue();
+ }
+
+ Report->addNote(SBuf,
+ {RetE, C.getSourceManager(), C.getLocationContext()});
+ }
+
+ bugreporter::trackExpressionValue(N, RetE, *Report);
+
+ C.emitReport(std::move(Report));
}
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
index 14ecede17083..cd502241ef61 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
@@ -14,6 +14,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "llvm/ADT/Optional.h"
diff --git a/clang/lib/StaticAnalyzer/Checkers/STLAlgorithmModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/STLAlgorithmModeling.cpp
index 933e0146ff59..ea72ebe3ed57 100644
--- a/clang/lib/StaticAnalyzer/Checkers/STLAlgorithmModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/STLAlgorithmModeling.cpp
@@ -12,6 +12,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
diff --git a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
index 8d380ed1b93d..1de5d7285f65 100644
--- a/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
@@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include <utility>
@@ -119,7 +120,7 @@ void SimpleStreamChecker::checkPostCall(const CallEvent &Call,
if (!Call.isGlobalCFunction())
return;
- if (!Call.isCalled(OpenFn))
+ if (!OpenFn.matches(Call))
return;
// Get the symbolic value corresponding to the file handle.
@@ -138,7 +139,7 @@ void SimpleStreamChecker::checkPreCall(const CallEvent &Call,
if (!Call.isGlobalCFunction())
return;
- if (!Call.isCalled(CloseFn))
+ if (!CloseFn.matches(Call))
return;
// Get the symbolic value corresponding to the file handle.
diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
index 09e885e8133f..c789a8dbcca1 100644
--- a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
@@ -23,6 +23,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
@@ -102,12 +103,8 @@ static bool hasStdClassWithName(const CXXRecordDecl *RD,
ArrayRef<llvm::StringLiteral> Names) {
if (!RD || !RD->getDeclContext()->isStdNamespace())
return false;
- if (RD->getDeclName().isIdentifier()) {
- StringRef Name = RD->getName();
- return llvm::any_of(Names, [&Name](StringRef GivenName) -> bool {
- return Name == GivenName;
- });
- }
+ if (RD->getDeclName().isIdentifier())
+ return llvm::is_contained(Names, RD->getName());
return false;
}
@@ -289,7 +286,7 @@ bool SmartPtrModeling::evalCall(const CallEvent &Call,
if (ModelSmartPtrDereference && isStdOstreamOperatorCall(Call))
return handleOstreamOperator(Call, C);
- if (Call.isCalled(StdSwapCall)) {
+ if (StdSwapCall.matches(Call)) {
// Check the first arg, if it is of std::unique_ptr type.
assert(Call.getNumArgs() == 2 && "std::swap should have two arguments");
const Expr *FirstArg = Call.getArgExpr(0);
@@ -298,8 +295,7 @@ bool SmartPtrModeling::evalCall(const CallEvent &Call,
return handleSwap(State, Call.getArgSVal(0), Call.getArgSVal(1), C);
}
- if (Call.isCalled(StdMakeUniqueCall) ||
- Call.isCalled(StdMakeUniqueForOverwriteCall)) {
+ if (matchesAny(Call, StdMakeUniqueCall, StdMakeUniqueForOverwriteCall)) {
if (!ModelSmartPtrDereference)
return false;
diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index b5c9356322fc..d5e86e86424d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/AST/ExprCXX.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
@@ -303,21 +303,53 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
class CallBack : public StoreManager::BindingsHandler {
private:
CheckerContext &Ctx;
- const StackFrameContext *CurSFC;
+ const StackFrameContext *PoppedFrame;
+
+ /// Look for stack variables referring to popped stack variables.
+ /// Returns true only if it found some dangling stack variables
+ /// referred by an other stack variable from different stack frame.
+ bool checkForDanglingStackVariable(const MemRegion *Referrer,
+ const MemRegion *Referred) {
+ const auto *ReferrerMemSpace =
+ Referrer->getMemorySpace()->getAs<StackSpaceRegion>();
+ const auto *ReferredMemSpace =
+ Referred->getMemorySpace()->getAs<StackSpaceRegion>();
+
+ if (!ReferrerMemSpace || !ReferredMemSpace)
+ return false;
+
+ const auto *ReferrerFrame = ReferrerMemSpace->getStackFrame();
+ const auto *ReferredFrame = ReferredMemSpace->getStackFrame();
+
+ if (ReferrerMemSpace && ReferredMemSpace) {
+ if (ReferredFrame == PoppedFrame &&
+ ReferrerFrame->isParentOf(PoppedFrame)) {
+ V.emplace_back(Referrer, Referred);
+ return true;
+ }
+ }
+ return false;
+ }
public:
SmallVector<std::pair<const MemRegion *, const MemRegion *>, 10> V;
- CallBack(CheckerContext &CC) : Ctx(CC), CurSFC(CC.getStackFrame()) {}
+ CallBack(CheckerContext &CC) : Ctx(CC), PoppedFrame(CC.getStackFrame()) {}
bool HandleBinding(StoreManager &SMgr, Store S, const MemRegion *Region,
SVal Val) override {
+ const MemRegion *VR = Val.getAsRegion();
+ if (!VR)
+ return true;
+
+ if (checkForDanglingStackVariable(Region, VR))
+ return true;
+ // Check the globals for the same.
if (!isa<GlobalsSpaceRegion>(Region->getMemorySpace()))
return true;
- const MemRegion *VR = Val.getAsRegion();
- if (VR && isa<StackSpaceRegion>(VR->getMemorySpace()) &&
- !isArcManagedBlock(VR, Ctx) && !isNotInCurrentFrame(VR, Ctx))
+ if (VR && VR->hasStackStorage() && !isArcManagedBlock(VR, Ctx) &&
+ !isNotInCurrentFrame(VR, Ctx))
V.emplace_back(Region, VR);
return true;
}
@@ -344,19 +376,41 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
"invalid after returning from the function");
for (const auto &P : Cb.V) {
+ const MemRegion *Referrer = P.first;
+ const MemRegion *Referred = P.second;
+
// Generate a report for this bug.
+ const StringRef CommonSuffix =
+ "upon returning to the caller. This will be a dangling reference";
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
- SourceRange Range = genName(Out, P.second, Ctx.getASTContext());
- Out << " is still referred to by the ";
- if (isa<StaticGlobalSpaceRegion>(P.first->getMemorySpace()))
- Out << "static";
- else
- Out << "global";
- Out << " variable '";
- const VarRegion *VR = cast<VarRegion>(P.first->getBaseRegion());
- Out << *VR->getDecl()
- << "' upon returning to the caller. This will be a dangling reference";
+ const SourceRange Range = genName(Out, Referred, Ctx.getASTContext());
+
+ if (isa<CXXTempObjectRegion>(Referrer)) {
+ Out << " is still referred to by a temporary object on the stack "
+ << CommonSuffix;
+ auto Report =
+ std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
+ Ctx.emitReport(std::move(Report));
+ return;
+ }
+
+ const StringRef ReferrerMemorySpace = [](const MemSpaceRegion *Space) {
+ if (isa<StaticGlobalSpaceRegion>(Space))
+ return "static";
+ if (isa<GlobalsSpaceRegion>(Space))
+ return "global";
+ assert(isa<StackSpaceRegion>(Space));
+ return "stack";
+ }(Referrer->getMemorySpace());
+
+ // This cast supposed to succeed.
+ const VarRegion *ReferrerVar = cast<VarRegion>(Referrer->getBaseRegion());
+ const std::string ReferrerVarName =
+ ReferrerVar->getDecl()->getDeclName().getAsString();
+
+ Out << " is still referred to by the " << ReferrerMemorySpace
+ << " variable '" << ReferrerVarName << "' " << CommonSuffix;
auto Report =
std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
if (Range.isValid())
diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
index e758b465af1b..e8b963a535d8 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
@@ -568,6 +568,7 @@ public:
bool DisplayLoadedSummaries = false;
bool ModelPOSIX = false;
+ bool ShouldAssumeControlledEnvironment = false;
private:
Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
@@ -1433,6 +1434,20 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
RetType{Ssize_tTy}),
GetLineSummary);
+ {
+ Summary GetenvSummary = Summary(NoEvalCall)
+ .ArgConstraint(NotNull(ArgNo(0)))
+ .Case({NotNull(Ret)});
+ // In untrusted environments the envvar might not exist.
+ if (!ShouldAssumeControlledEnvironment)
+ GetenvSummary.Case({NotNull(Ret)->negate()});
+
+ // char *getenv(const char *name);
+ addToFunctionSummaryMap(
+ "getenv", Signature(ArgTypes{ConstCharPtrTy}, RetType{CharPtrTy}),
+ std::move(GetenvSummary));
+ }
+
if (ModelPOSIX) {
// long a64l(const char *str64);
@@ -2645,11 +2660,12 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
auto *Checker = mgr.registerChecker<StdLibraryFunctionsChecker>();
+ const AnalyzerOptions &Opts = mgr.getAnalyzerOptions();
Checker->DisplayLoadedSummaries =
- mgr.getAnalyzerOptions().getCheckerBooleanOption(
- Checker, "DisplayLoadedSummaries");
- Checker->ModelPOSIX =
- mgr.getAnalyzerOptions().getCheckerBooleanOption(Checker, "ModelPOSIX");
+ Opts.getCheckerBooleanOption(Checker, "DisplayLoadedSummaries");
+ Checker->ModelPOSIX = Opts.getCheckerBooleanOption(Checker, "ModelPOSIX");
+ Checker->ShouldAssumeControlledEnvironment =
+ Opts.ShouldAssumeControlledEnvironment;
}
bool ento::shouldRegisterStdCLibraryFunctionsChecker(
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index dd65f8c035aa..26218b8e0454 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -14,6 +14,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
@@ -1118,4 +1119,4 @@ void ento::registerStreamTesterChecker(CheckerManager &Mgr) {
bool ento::shouldRegisterStreamTesterChecker(const CheckerManager &Mgr) {
return true;
-} \ No newline at end of file
+}
diff --git a/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp
new file mode 100644
index 000000000000..0d745f5d8d6f
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp
@@ -0,0 +1,105 @@
+//=== StringChecker.cpp -------------------------------------------*- C++ -*--//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the modeling of the std::basic_string type.
+// This involves checking preconditions of the operations and applying the
+// effects of the operations, e.g. their post-conditions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class StringChecker : public Checker<check::PreCall> {
+ BugType BT_Null{this, "Dereference of null pointer", categories::LogicError};
+ mutable const FunctionDecl *StringConstCharPtrCtor = nullptr;
+ mutable CanQualType SizeTypeTy;
+ const CallDescription TwoParamStdStringCtor = {
+ {"std", "basic_string", "basic_string"}, 2, 2};
+
+ bool isCharToStringCtor(const CallEvent &Call, const ASTContext &ACtx) const;
+
+public:
+ void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+};
+
+bool StringChecker::isCharToStringCtor(const CallEvent &Call,
+ const ASTContext &ACtx) const {
+ if (!TwoParamStdStringCtor.matches(Call))
+ return false;
+ const auto *FD = dyn_cast<FunctionDecl>(Call.getDecl());
+ assert(FD);
+
+ // See if we already cached it.
+ if (StringConstCharPtrCtor && StringConstCharPtrCtor == FD)
+ return true;
+
+ // Verify that the parameters have the expected types:
+ // - arg 1: `const CharT *`
+ // - arg 2: some allocator - which is definately not `size_t`.
+ const QualType Arg1Ty = Call.getArgExpr(0)->getType().getCanonicalType();
+ const QualType Arg2Ty = Call.getArgExpr(1)->getType().getCanonicalType();
+
+ if (!Arg1Ty->isPointerType())
+ return false;
+
+ // It makes sure that we don't select the `string(const char* p, size_t len)`
+ // overload accidentally.
+ if (Arg2Ty.getCanonicalType() == ACtx.getSizeType())
+ return false;
+
+ StringConstCharPtrCtor = FD; // Cache the decl of the right overload.
+ return true;
+}
+
+void StringChecker::checkPreCall(const CallEvent &Call,
+ CheckerContext &C) const {
+ if (!isCharToStringCtor(Call, C.getASTContext()))
+ return;
+ const auto Param = Call.getArgSVal(0).getAs<Loc>();
+ if (!Param.hasValue())
+ return;
+
+ // We managed to constrain the parameter to non-null.
+ ProgramStateRef NotNull, Null;
+ std::tie(NotNull, Null) = C.getState()->assume(*Param);
+
+ if (NotNull) {
+ const auto Callback = [Param](PathSensitiveBugReport &BR) -> std::string {
+ return BR.isInteresting(*Param) ? "Assuming the pointer is not null."
+ : "";
+ };
+
+ // Emit note only if this operation constrained the pointer to be null.
+ C.addTransition(NotNull, Null ? C.getNoteTag(Callback) : nullptr);
+ return;
+ }
+
+ // We found a path on which the parameter is NULL.
+ if (ExplodedNode *N = C.generateErrorNode(C.getState())) {
+ auto R = std::make_unique<PathSensitiveBugReport>(
+ BT_Null, "The parameter must not be null", N);
+ bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
+ C.emitReport(std::move(R));
+ }
+}
+
+} // end anonymous namespace
+
+void ento::registerStringChecker(CheckerManager &Mgr) {
+ Mgr.registerChecker<StringChecker>();
+}
+
+bool ento::shouldRegisterStringChecker(const CheckerManager &) { return true; }
diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
index 381334de068e..2244cdb96d4f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
@@ -110,7 +110,7 @@ void UnixAPIMisuseChecker::checkPreStmt(const CallExpr *CE,
// Don't treat functions in namespaces with the same name a Unix function
// as a call to the Unix function.
const DeclContext *NamespaceCtx = FD->getEnclosingNamespaceContext();
- if (NamespaceCtx && isa<NamespaceDecl>(NamespaceCtx))
+ if (isa_and_nonnull<NamespaceDecl>(NamespaceCtx))
return;
StringRef FName = C.getCalleeName(FD);
@@ -466,7 +466,7 @@ void UnixAPIPortabilityChecker::checkPreStmt(const CallExpr *CE,
// Don't treat functions in namespaces with the same name a Unix function
// as a call to the Unix function.
const DeclContext *NamespaceCtx = FD->getEnclosingNamespaceContext();
- if (NamespaceCtx && isa<NamespaceDecl>(NamespaceCtx))
+ if (isa_and_nonnull<NamespaceDecl>(NamespaceCtx))
return;
StringRef FName = C.getCalleeName(FD);
diff --git a/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
index dde5912b6d6e..60da4fca12e6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ValistChecker.cpp
@@ -15,6 +15,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
@@ -126,15 +127,15 @@ void ValistChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
if (!Call.isGlobalCFunction())
return;
- if (Call.isCalled(VaStart))
+ if (VaStart.matches(Call))
checkVAListStartCall(Call, C, false);
- else if (Call.isCalled(VaCopy))
+ else if (VaCopy.matches(Call))
checkVAListStartCall(Call, C, true);
- else if (Call.isCalled(VaEnd))
+ else if (VaEnd.matches(Call))
checkVAListEndCall(Call, C);
else {
for (auto FuncInfo : VAListAccepters) {
- if (!Call.isCalled(FuncInfo.Func))
+ if (!FuncInfo.Func.matches(Call))
continue;
bool Symbolic;
const MemRegion *VAList =
diff --git a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
index 8f147026ae19..04e6603b4cbe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
@@ -9,7 +9,7 @@
// This file defines vfork checker which checks for dangerous uses of vfork.
// Vforked process shares memory (including stack) with parent so it's
// range of actions is significantly limited: can't write variables,
-// can't call functions not in whitelist, etc. For more details, see
+// can't call functions not in the allowed list, etc. For more details, see
// http://man7.org/linux/man-pages/man2/vfork.2.html
//
// This checker checks for prohibited constructs in vforked process.
@@ -44,13 +44,14 @@ namespace {
class VforkChecker : public Checker<check::PreCall, check::PostCall,
check::Bind, check::PreStmt<ReturnStmt>> {
mutable std::unique_ptr<BuiltinBug> BT;
- mutable llvm::SmallSet<const IdentifierInfo *, 10> VforkWhitelist;
+ mutable llvm::SmallSet<const IdentifierInfo *, 10> VforkAllowlist;
mutable const IdentifierInfo *II_vfork;
static bool isChildProcess(const ProgramStateRef State);
bool isVforkCall(const Decl *D, CheckerContext &C) const;
- bool isCallWhitelisted(const IdentifierInfo *II, CheckerContext &C) const;
+ bool isCallExplicitelyAllowed(const IdentifierInfo *II,
+ CheckerContext &C) const;
void reportBug(const char *What, CheckerContext &C,
const char *Details = nullptr) const;
@@ -93,9 +94,9 @@ bool VforkChecker::isVforkCall(const Decl *D, CheckerContext &C) const {
}
// Returns true iff ok to call function after successful vfork.
-bool VforkChecker::isCallWhitelisted(const IdentifierInfo *II,
- CheckerContext &C) const {
- if (VforkWhitelist.empty()) {
+bool VforkChecker::isCallExplicitelyAllowed(const IdentifierInfo *II,
+ CheckerContext &C) const {
+ if (VforkAllowlist.empty()) {
// According to manpage.
const char *ids[] = {
"_Exit",
@@ -112,10 +113,10 @@ bool VforkChecker::isCallWhitelisted(const IdentifierInfo *II,
ASTContext &AC = C.getASTContext();
for (const char **id = ids; *id; ++id)
- VforkWhitelist.insert(&AC.Idents.get(*id));
+ VforkAllowlist.insert(&AC.Idents.get(*id));
}
- return VforkWhitelist.count(II);
+ return VforkAllowlist.count(II);
}
void VforkChecker::reportBug(const char *What, CheckerContext &C,
@@ -179,12 +180,13 @@ void VforkChecker::checkPostCall(const CallEvent &Call,
C.addTransition(ChildState);
}
-// Prohibit calls to non-whitelist functions in child process.
+// Prohibit calls to functions in child process which are not explicitly
+// allowed.
void VforkChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
- if (isChildProcess(State)
- && !isCallWhitelisted(Call.getCalleeIdentifier(), C))
+ if (isChildProcess(State) &&
+ !isCallExplicitelyAllowed(Call.getCalleeIdentifier(), C))
reportBug("This function call", C);
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
index ed4577755457..ec6a7144fa45 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h
@@ -29,7 +29,7 @@ class Expr;
/// values).
///
/// For more context see Static Analyzer checkers documentation - specifically
-/// webkit.UncountedCallArgsChecker checker. Whitelist of transformations:
+/// webkit.UncountedCallArgsChecker checker. Allowed list of transformations:
/// - constructors of ref-counted types (including factory methods)
/// - getters of ref-counted types
/// - member overloaded operators
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
index d70bd9489d2c..e6d0948f71bb 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
@@ -68,8 +68,7 @@ public:
if (auto *F = CE->getDirectCallee()) {
// Skip the first argument for overloaded member operators (e. g. lambda
// or std::function call operator).
- unsigned ArgIdx =
- isa<CXXOperatorCallExpr>(CE) && dyn_cast_or_null<CXXMethodDecl>(F);
+ unsigned ArgIdx = isa<CXXOperatorCallExpr>(CE) && isa_and_nonnull<CXXMethodDecl>(F);
for (auto P = F->param_begin();
// FIXME: Also check variadic function parameters.
diff --git a/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp
new file mode 100644
index 000000000000..d7d573cd2d3b
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp
@@ -0,0 +1,280 @@
+//== InvalidPtrChecker.cpp ------------------------------------- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines InvalidPtrChecker which finds usages of possibly
+// invalidated pointer.
+// CERT SEI Rules ENV31-C and ENV34-C
+// For more information see:
+// https://wiki.sei.cmu.edu/confluence/x/8tYxBQ
+// https://wiki.sei.cmu.edu/confluence/x/5NUxBQ
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+
+class InvalidPtrChecker
+ : public Checker<check::Location, check::BeginFunction, check::PostCall> {
+private:
+ BugType BT{this, "Use of invalidated pointer", categories::MemoryError};
+
+ void EnvpInvalidatingCall(const CallEvent &Call, CheckerContext &C) const;
+
+ using HandlerFn = void (InvalidPtrChecker::*)(const CallEvent &Call,
+ CheckerContext &C) const;
+
+ // SEI CERT ENV31-C
+ const CallDescriptionMap<HandlerFn> EnvpInvalidatingFunctions = {
+ {{"setenv", 3}, &InvalidPtrChecker::EnvpInvalidatingCall},
+ {{"unsetenv", 1}, &InvalidPtrChecker::EnvpInvalidatingCall},
+ {{"putenv", 1}, &InvalidPtrChecker::EnvpInvalidatingCall},
+ {{"_putenv_s", 2}, &InvalidPtrChecker::EnvpInvalidatingCall},
+ {{"_wputenv_s", 2}, &InvalidPtrChecker::EnvpInvalidatingCall},
+ };
+
+ void postPreviousReturnInvalidatingCall(const CallEvent &Call,
+ CheckerContext &C) const;
+
+ // SEI CERT ENV34-C
+ const CallDescriptionMap<HandlerFn> PreviousCallInvalidatingFunctions = {
+ {{"getenv", 1}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall},
+ {{"setlocale", 2},
+ &InvalidPtrChecker::postPreviousReturnInvalidatingCall},
+ {{"strerror", 1}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall},
+ {{"localeconv", 0},
+ &InvalidPtrChecker::postPreviousReturnInvalidatingCall},
+ {{"asctime", 1}, &InvalidPtrChecker::postPreviousReturnInvalidatingCall},
+ };
+
+public:
+ // Obtain the environment pointer from 'main()' (if present).
+ void checkBeginFunction(CheckerContext &C) const;
+
+ // Handle functions in EnvpInvalidatingFunctions, that invalidate environment
+ // pointer from 'main()'
+ // Handle functions in PreviousCallInvalidatingFunctions.
+ // Also, check if invalidated region is passed to a
+ // conservatively evaluated function call as an argument.
+ void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+
+ // Check if invalidated region is being dereferenced.
+ void checkLocation(SVal l, bool isLoad, const Stmt *S,
+ CheckerContext &C) const;
+};
+
+} // namespace
+
+// Set of memory regions that were invalidated
+REGISTER_SET_WITH_PROGRAMSTATE(InvalidMemoryRegions, const MemRegion *)
+
+// Stores the region of the environment pointer of 'main' (if present).
+// Note: This pointer has type 'const MemRegion *', however the trait is only
+// specialized to 'const void*' and 'void*'
+REGISTER_TRAIT_WITH_PROGRAMSTATE(EnvPtrRegion, const void *)
+
+// Stores key-value pairs, where key is function declaration and value is
+// pointer to memory region returned by previous call of this function
+REGISTER_MAP_WITH_PROGRAMSTATE(PreviousCallResultMap, const FunctionDecl *,
+ const MemRegion *)
+
+void InvalidPtrChecker::EnvpInvalidatingCall(const CallEvent &Call,
+ CheckerContext &C) const {
+ StringRef FunctionName = Call.getCalleeIdentifier()->getName();
+ ProgramStateRef State = C.getState();
+ const auto *Reg = State->get<EnvPtrRegion>();
+ if (!Reg)
+ return;
+ const auto *SymbolicEnvPtrRegion =
+ reinterpret_cast<const MemRegion *>(const_cast<const void *>(Reg));
+
+ State = State->add<InvalidMemoryRegions>(SymbolicEnvPtrRegion);
+
+ const NoteTag *Note =
+ C.getNoteTag([SymbolicEnvPtrRegion, FunctionName](
+ PathSensitiveBugReport &BR, llvm::raw_ostream &Out) {
+ if (!BR.isInteresting(SymbolicEnvPtrRegion))
+ return;
+ Out << '\'' << FunctionName
+ << "' call may invalidate the environment parameter of 'main'";
+ });
+
+ C.addTransition(State, Note);
+}
+
+void InvalidPtrChecker::postPreviousReturnInvalidatingCall(
+ const CallEvent &Call, CheckerContext &C) const {
+ ProgramStateRef State = C.getState();
+
+ const NoteTag *Note = nullptr;
+ const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
+ // Invalidate the region of the previously returned pointer - if there was
+ // one.
+ if (const MemRegion *const *Reg = State->get<PreviousCallResultMap>(FD)) {
+ const MemRegion *PrevReg = *Reg;
+ State = State->add<InvalidMemoryRegions>(PrevReg);
+ Note = C.getNoteTag([PrevReg, FD](PathSensitiveBugReport &BR,
+ llvm::raw_ostream &Out) {
+ if (!BR.isInteresting(PrevReg))
+ return;
+ Out << '\'';
+ FD->getNameForDiagnostic(Out, FD->getASTContext().getLangOpts(), true);
+ Out << "' call may invalidate the the result of the previous " << '\'';
+ FD->getNameForDiagnostic(Out, FD->getASTContext().getLangOpts(), true);
+ Out << '\'';
+ });
+ }
+
+ const LocationContext *LCtx = C.getLocationContext();
+ const auto *CE = cast<CallExpr>(Call.getOriginExpr());
+
+ // Function call will return a pointer to the new symbolic region.
+ DefinedOrUnknownSVal RetVal = C.getSValBuilder().conjureSymbolVal(
+ CE, LCtx, CE->getType(), C.blockCount());
+ State = State->BindExpr(CE, LCtx, RetVal);
+
+ // Remember to this region.
+ const auto *SymRegOfRetVal = cast<SymbolicRegion>(RetVal.getAsRegion());
+ const MemRegion *MR =
+ const_cast<MemRegion *>(SymRegOfRetVal->getBaseRegion());
+ State = State->set<PreviousCallResultMap>(FD, MR);
+
+ ExplodedNode *Node = C.addTransition(State, Note);
+ const NoteTag *PreviousCallNote =
+ C.getNoteTag([MR](PathSensitiveBugReport &BR, llvm::raw_ostream &Out) {
+ if (!BR.isInteresting(MR))
+ return;
+ Out << '\'' << "'previous function call was here" << '\'';
+ });
+
+ C.addTransition(State, Node, PreviousCallNote);
+}
+
+// TODO: This seems really ugly. Simplify this.
+static const MemRegion *findInvalidatedSymbolicBase(ProgramStateRef State,
+ const MemRegion *Reg) {
+ while (Reg) {
+ if (State->contains<InvalidMemoryRegions>(Reg))
+ return Reg;
+ const auto *SymBase = Reg->getSymbolicBase();
+ if (!SymBase)
+ break;
+ const auto *SRV = dyn_cast<SymbolRegionValue>(SymBase->getSymbol());
+ if (!SRV)
+ break;
+ Reg = SRV->getRegion();
+ if (const auto *VarReg = dyn_cast<VarRegion>(SRV->getRegion()))
+ Reg = VarReg;
+ }
+ return nullptr;
+}
+
+// Handle functions in EnvpInvalidatingFunctions, that invalidate environment
+// pointer from 'main()' Also, check if invalidated region is passed to a
+// function call as an argument.
+void InvalidPtrChecker::checkPostCall(const CallEvent &Call,
+ CheckerContext &C) const {
+ // Check if function invalidates 'envp' argument of 'main'
+ if (const auto *Handler = EnvpInvalidatingFunctions.lookup(Call))
+ (this->**Handler)(Call, C);
+
+ // Check if function invalidates the result of previous call
+ if (const auto *Handler = PreviousCallInvalidatingFunctions.lookup(Call))
+ (this->**Handler)(Call, C);
+
+ // Check if one of the arguments of the function call is invalidated
+
+ // If call was inlined, don't report invalidated argument
+ if (C.wasInlined)
+ return;
+
+ ProgramStateRef State = C.getState();
+
+ for (unsigned I = 0, NumArgs = Call.getNumArgs(); I < NumArgs; ++I) {
+
+ if (const auto *SR = dyn_cast_or_null<SymbolicRegion>(
+ Call.getArgSVal(I).getAsRegion())) {
+ if (const MemRegion *InvalidatedSymbolicBase =
+ findInvalidatedSymbolicBase(State, SR)) {
+ ExplodedNode *ErrorNode = C.generateNonFatalErrorNode();
+ if (!ErrorNode)
+ return;
+
+ SmallString<256> Msg;
+ llvm::raw_svector_ostream Out(Msg);
+ Out << "use of invalidated pointer '";
+ Call.getArgExpr(I)->printPretty(Out, /*Helper=*/nullptr,
+ C.getASTContext().getPrintingPolicy());
+ Out << "' in a function call";
+
+ auto Report =
+ std::make_unique<PathSensitiveBugReport>(BT, Out.str(), ErrorNode);
+ Report->markInteresting(InvalidatedSymbolicBase);
+ Report->addRange(Call.getArgSourceRange(I));
+ C.emitReport(std::move(Report));
+ }
+ }
+ }
+}
+
+// Obtain the environment pointer from 'main()', if present.
+void InvalidPtrChecker::checkBeginFunction(CheckerContext &C) const {
+ if (!C.inTopFrame())
+ return;
+
+ const auto *FD = dyn_cast<FunctionDecl>(C.getLocationContext()->getDecl());
+ if (!FD || FD->param_size() != 3 || !FD->isMain())
+ return;
+
+ ProgramStateRef State = C.getState();
+ const MemRegion *EnvpReg =
+ State->getRegion(FD->parameters()[2], C.getLocationContext());
+
+ // Save the memory region pointed by the environment pointer parameter of
+ // 'main'.
+ State = State->set<EnvPtrRegion>(
+ reinterpret_cast<void *>(const_cast<MemRegion *>(EnvpReg)));
+ C.addTransition(State);
+}
+
+// Check if invalidated region is being dereferenced.
+void InvalidPtrChecker::checkLocation(SVal Loc, bool isLoad, const Stmt *S,
+ CheckerContext &C) const {
+ ProgramStateRef State = C.getState();
+
+ // Ignore memory operations involving 'non-invalidated' locations.
+ const MemRegion *InvalidatedSymbolicBase =
+ findInvalidatedSymbolicBase(State, Loc.getAsRegion());
+ if (!InvalidatedSymbolicBase)
+ return;
+
+ ExplodedNode *ErrorNode = C.generateNonFatalErrorNode();
+ if (!ErrorNode)
+ return;
+
+ auto Report = std::make_unique<PathSensitiveBugReport>(
+ BT, "dereferencing an invalid pointer", ErrorNode);
+ Report->markInteresting(InvalidatedSymbolicBase);
+ C.emitReport(std::move(Report));
+}
+
+void ento::registerInvalidPtrChecker(CheckerManager &Mgr) {
+ Mgr.registerChecker<InvalidPtrChecker>();
+}
+
+bool ento::shouldRegisterInvalidPtrChecker(const CheckerManager &) {
+ return true;
+}
diff --git a/clang/lib/StaticAnalyzer/Checkers/cert/PutenvWithAutoChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/cert/PutenvWithAutoChecker.cpp
index 1c67bbd77ec8..ed3bdafad084 100644
--- a/clang/lib/StaticAnalyzer/Checkers/cert/PutenvWithAutoChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/cert/PutenvWithAutoChecker.cpp
@@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
@@ -38,7 +39,7 @@ public:
void PutenvWithAutoChecker::checkPostCall(const CallEvent &Call,
CheckerContext &C) const {
- if (!Call.isCalled(Putenv))
+ if (!Putenv.matches(Call))
return;
SVal ArgV = Call.getArgSVal(0);
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index d6f69ae03afe..771ed2578f6d 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -188,6 +188,9 @@ public:
PathPieces &getMutablePieces() { return PD->getMutablePieces(); }
bool shouldAddPathEdges() const { return Consumer->shouldAddPathEdges(); }
+ bool shouldAddControlNotes() const {
+ return Consumer->shouldAddControlNotes();
+ }
bool shouldGenerateDiagnostics() const {
return Consumer->shouldGenerateDiagnostics();
}
@@ -534,10 +537,10 @@ static void removeEdgesToDefaultInitializers(PathPieces &Pieces) {
if (auto *CF = dyn_cast<PathDiagnosticControlFlowPiece>(I->get())) {
const Stmt *Start = CF->getStartLocation().asStmt();
const Stmt *End = CF->getEndLocation().asStmt();
- if (Start && isa<CXXDefaultInitExpr>(Start)) {
+ if (isa_and_nonnull<CXXDefaultInitExpr>(Start)) {
I = Pieces.erase(I);
continue;
- } else if (End && isa<CXXDefaultInitExpr>(End)) {
+ } else if (isa_and_nonnull<CXXDefaultInitExpr>(End)) {
PathPieces::iterator Next = std::next(I);
if (Next != E) {
if (auto *NextCF =
@@ -1232,8 +1235,11 @@ void PathDiagnosticBuilder::generatePathDiagnosticsForNode(
} else if (auto BE = P.getAs<BlockEdge>()) {
- if (!C.shouldAddPathEdges()) {
+ if (C.shouldAddControlNotes()) {
generateMinimalDiagForBlockEdge(C, *BE);
+ }
+
+ if (!C.shouldAddPathEdges()) {
return;
}
@@ -1254,12 +1260,14 @@ void PathDiagnosticBuilder::generatePathDiagnosticsForNode(
// do-while statements are explicitly excluded here
auto p = std::make_shared<PathDiagnosticEventPiece>(
- L, "Looping back to the head "
- "of the loop");
+ L, "Looping back to the head of the loop");
p->setPrunable(true);
addEdgeToPath(C.getActivePath(), PrevLoc, p->getLocation());
- C.getActivePath().push_front(std::move(p));
+ // We might've added a very similar control node already
+ if (!C.shouldAddControlNotes()) {
+ C.getActivePath().push_front(std::move(p));
+ }
if (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) {
addEdgeToPath(C.getActivePath(), PrevLoc,
@@ -1300,10 +1308,13 @@ void PathDiagnosticBuilder::generatePathDiagnosticsForNode(
auto PE = std::make_shared<PathDiagnosticEventPiece>(L, str);
PE->setPrunable(true);
addEdgeToPath(C.getActivePath(), PrevLoc, PE->getLocation());
- C.getActivePath().push_front(std::move(PE));
+
+ // We might've added a very similar control node already
+ if (!C.shouldAddControlNotes()) {
+ C.getActivePath().push_front(std::move(PE));
+ }
}
- } else if (isa<BreakStmt>(Term) || isa<ContinueStmt>(Term) ||
- isa<GotoStmt>(Term)) {
+ } else if (isa<BreakStmt, ContinueStmt, GotoStmt>(Term)) {
PathDiagnosticLocation L(Term, SM, C.getCurrLocationContext());
addEdgeToPath(C.getActivePath(), PrevLoc, L);
}
@@ -1342,9 +1353,7 @@ static const Stmt *getStmtParent(const Stmt *S, const ParentMap &PM) {
if (!S)
break;
- if (isa<FullExpr>(S) ||
- isa<CXXBindTemporaryExpr>(S) ||
- isa<SubstNonTypeTemplateParmExpr>(S))
+ if (isa<FullExpr, CXXBindTemporaryExpr, SubstNonTypeTemplateParmExpr>(S))
continue;
break;
@@ -1446,7 +1455,7 @@ static void addContextEdges(PathPieces &pieces, const LocationContext *LC) {
break;
// If the source is in the same context, we're already good.
- if (llvm::find(SrcContexts, DstContext) != SrcContexts.end())
+ if (llvm::is_contained(SrcContexts, DstContext))
break;
// Update the subexpression node to point to the context edge.
@@ -1540,9 +1549,8 @@ static void simplifySimpleBranches(PathPieces &pieces) {
// We only perform this transformation for specific branch kinds.
// We don't want to do this for do..while, for example.
- if (!(isa<ForStmt>(s1Start) || isa<WhileStmt>(s1Start) ||
- isa<IfStmt>(s1Start) || isa<ObjCForCollectionStmt>(s1Start) ||
- isa<CXXForRangeStmt>(s1Start)))
+ if (!isa<ForStmt, WhileStmt, IfStmt, ObjCForCollectionStmt,
+ CXXForRangeStmt>(s1Start))
continue;
// Is s1End the branch condition?
@@ -3181,7 +3189,7 @@ findExecutedLines(const SourceManager &SM, const ExplodedNode *N) {
P = N->getParentMap().getParent(RS);
}
- if (P && (isa<SwitchCase>(P) || isa<LabelStmt>(P)))
+ if (isa_and_nonnull<SwitchCase, LabelStmt>(P))
populateExecutedLinesWithStmt(P, SM, *ExecutedLines);
}
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index d06a2d493303..8774dc3323ab 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -344,45 +344,178 @@ BugReporterVisitor::getDefaultEndPath(const BugReporterContext &BRC,
}
//===----------------------------------------------------------------------===//
+// Implementation of NoStateChangeFuncVisitor.
+//===----------------------------------------------------------------------===//
+
+bool NoStateChangeFuncVisitor::isModifiedInFrame(const ExplodedNode *N) {
+ const LocationContext *Ctx = N->getLocationContext();
+ const StackFrameContext *SCtx = Ctx->getStackFrame();
+ if (!FramesModifyingCalculated.count(SCtx))
+ findModifyingFrames(N);
+ return FramesModifying.count(SCtx);
+}
+
+void NoStateChangeFuncVisitor::markFrameAsModifying(
+ const StackFrameContext *SCtx) {
+ while (!SCtx->inTopFrame()) {
+ auto p = FramesModifying.insert(SCtx);
+ if (!p.second)
+ break; // Frame and all its parents already inserted.
+
+ SCtx = SCtx->getParent()->getStackFrame();
+ }
+}
+
+static const ExplodedNode *getMatchingCallExitEnd(const ExplodedNode *N) {
+ assert(N->getLocationAs<CallEnter>());
+ // The stackframe of the callee is only found in the nodes succeeding
+ // the CallEnter node. CallEnter's stack frame refers to the caller.
+ const StackFrameContext *OrigSCtx = N->getFirstSucc()->getStackFrame();
+
+ // Similarly, the nodes preceding CallExitEnd refer to the callee's stack
+ // frame.
+ auto IsMatchingCallExitEnd = [OrigSCtx](const ExplodedNode *N) {
+ return N->getLocationAs<CallExitEnd>() &&
+ OrigSCtx == N->getFirstPred()->getStackFrame();
+ };
+ while (N && !IsMatchingCallExitEnd(N)) {
+ assert(N->succ_size() <= 1 &&
+ "This function is to be used on the trimmed ExplodedGraph!");
+ N = N->getFirstSucc();
+ }
+ return N;
+}
+
+void NoStateChangeFuncVisitor::findModifyingFrames(
+ const ExplodedNode *const CallExitBeginN) {
+
+ assert(CallExitBeginN->getLocationAs<CallExitBegin>());
+
+ const StackFrameContext *const OriginalSCtx =
+ CallExitBeginN->getLocationContext()->getStackFrame();
+
+ const ExplodedNode *CurrCallExitBeginN = CallExitBeginN;
+ const StackFrameContext *CurrentSCtx = OriginalSCtx;
+
+ for (const ExplodedNode *CurrN = CallExitBeginN; CurrN;
+ CurrN = CurrN->getFirstPred()) {
+ // Found a new inlined call.
+ if (CurrN->getLocationAs<CallExitBegin>()) {
+ CurrCallExitBeginN = CurrN;
+ CurrentSCtx = CurrN->getStackFrame();
+ FramesModifyingCalculated.insert(CurrentSCtx);
+ // We won't see a change in between two identical exploded nodes: skip.
+ continue;
+ }
+
+ if (auto CE = CurrN->getLocationAs<CallEnter>()) {
+ if (const ExplodedNode *CallExitEndN = getMatchingCallExitEnd(CurrN))
+ if (wasModifiedInFunction(CurrN, CallExitEndN))
+ markFrameAsModifying(CurrentSCtx);
+
+ // We exited this inlined call, lets actualize the stack frame.
+ CurrentSCtx = CurrN->getStackFrame();
+
+ // Stop calculating at the current function, but always regard it as
+ // modifying, so we can avoid notes like this:
+ // void f(Foo &F) {
+ // F.field = 0; // note: 0 assigned to 'F.field'
+ // // note: returning without writing to 'F.field'
+ // }
+ if (CE->getCalleeContext() == OriginalSCtx) {
+ markFrameAsModifying(CurrentSCtx);
+ break;
+ }
+ }
+
+ if (wasModifiedBeforeCallExit(CurrN, CurrCallExitBeginN))
+ markFrameAsModifying(CurrentSCtx);
+ }
+}
+
+PathDiagnosticPieceRef NoStateChangeFuncVisitor::VisitNode(
+ const ExplodedNode *N, BugReporterContext &BR, PathSensitiveBugReport &R) {
+
+ const LocationContext *Ctx = N->getLocationContext();
+ const StackFrameContext *SCtx = Ctx->getStackFrame();
+ ProgramStateRef State = N->getState();
+ auto CallExitLoc = N->getLocationAs<CallExitBegin>();
+
+ // No diagnostic if region was modified inside the frame.
+ if (!CallExitLoc || isModifiedInFrame(N))
+ return nullptr;
+
+ CallEventRef<> Call =
+ BR.getStateManager().getCallEventManager().getCaller(SCtx, State);
+
+ // Optimistically suppress uninitialized value bugs that result
+ // from system headers having a chance to initialize the value
+ // but failing to do so. It's too unlikely a system header's fault.
+ // It's much more likely a situation in which the function has a failure
+ // mode that the user decided not to check. If we want to hunt such
+ // omitted checks, we should provide an explicit function-specific note
+ // describing the precondition under which the function isn't supposed to
+ // initialize its out-parameter, and additionally check that such
+ // precondition can actually be fulfilled on the current path.
+ if (Call->isInSystemHeader()) {
+ // We make an exception for system header functions that have no branches.
+ // Such functions unconditionally fail to initialize the variable.
+ // If they call other functions that have more paths within them,
+ // this suppression would still apply when we visit these inner functions.
+ // One common example of a standard function that doesn't ever initialize
+ // its out parameter is operator placement new; it's up to the follow-up
+ // constructor (if any) to initialize the memory.
+ if (!N->getStackFrame()->getCFG()->isLinear()) {
+ static int i = 0;
+ R.markInvalid(&i, nullptr);
+ }
+ return nullptr;
+ }
+
+ if (const auto *MC = dyn_cast<ObjCMethodCall>(Call)) {
+ // If we failed to construct a piece for self, we still want to check
+ // whether the entity of interest is in a parameter.
+ if (PathDiagnosticPieceRef Piece = maybeEmitNoteForObjCSelf(R, *MC, N))
+ return Piece;
+ }
+
+ if (const auto *CCall = dyn_cast<CXXConstructorCall>(Call)) {
+ // Do not generate diagnostics for not modified parameters in
+ // constructors.
+ return maybeEmitNoteForCXXThis(R, *CCall, N);
+ }
+
+ return maybeEmitNoteForParameters(R, *Call, N);
+}
+
+//===----------------------------------------------------------------------===//
// Implementation of NoStoreFuncVisitor.
//===----------------------------------------------------------------------===//
namespace {
-
/// Put a diagnostic on return statement of all inlined functions
/// for which the region of interest \p RegionOfInterest was passed into,
/// but not written inside, and it has caused an undefined read or a null
/// pointer dereference outside.
-class NoStoreFuncVisitor final : public BugReporterVisitor {
+class NoStoreFuncVisitor final : public NoStateChangeFuncVisitor {
const SubRegion *RegionOfInterest;
MemRegionManager &MmrMgr;
const SourceManager &SM;
const PrintingPolicy &PP;
- bugreporter::TrackingKind TKind;
/// Recursion limit for dereferencing fields when looking for the
/// region of interest.
/// The limit of two indicates that we will dereference fields only once.
static const unsigned DEREFERENCE_LIMIT = 2;
- /// Frames writing into \c RegionOfInterest.
- /// This visitor generates a note only if a function does not write into
- /// a region of interest. This information is not immediately available
- /// by looking at the node associated with the exit from the function
- /// (usually the return statement). To avoid recomputing the same information
- /// many times (going up the path for each node and checking whether the
- /// region was written into) we instead lazily compute the
- /// stack frames along the path which write into the region of interest.
- llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingRegion;
- llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingCalculated;
-
using RegionVector = SmallVector<const MemRegion *, 5>;
public:
NoStoreFuncVisitor(const SubRegion *R, bugreporter::TrackingKind TKind)
- : RegionOfInterest(R), MmrMgr(R->getMemRegionManager()),
+ : NoStateChangeFuncVisitor(TKind), RegionOfInterest(R),
+ MmrMgr(R->getMemRegionManager()),
SM(MmrMgr.getContext().getSourceManager()),
- PP(MmrMgr.getContext().getPrintingPolicy()), TKind(TKind) {}
+ PP(MmrMgr.getContext().getPrintingPolicy()) {}
void Profile(llvm::FoldingSetNodeID &ID) const override {
static int Tag = 0;
@@ -395,11 +528,13 @@ public:
return static_cast<void *>(&Tag);
}
- PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
- BugReporterContext &BR,
- PathSensitiveBugReport &R) override;
-
private:
+ /// \return Whether \c RegionOfInterest was modified at \p CurrN compared to
+ /// the value it holds in \p CallExitBeginN.
+ virtual bool
+ wasModifiedBeforeCallExit(const ExplodedNode *CurrN,
+ const ExplodedNode *CallExitBeginN) override;
+
/// Attempts to find the region of interest in a given record decl,
/// by either following the base classes or fields.
/// Dereferences fields up to a given recursion limit.
@@ -411,20 +546,21 @@ private:
const MemRegion *R, const RegionVector &Vec = {},
int depth = 0);
- /// Check and lazily calculate whether the region of interest is
- /// modified in the stack frame to which \p N belongs.
- /// The calculation is cached in FramesModifyingRegion.
- bool isRegionOfInterestModifiedInFrame(const ExplodedNode *N) {
- const LocationContext *Ctx = N->getLocationContext();
- const StackFrameContext *SCtx = Ctx->getStackFrame();
- if (!FramesModifyingCalculated.count(SCtx))
- findModifyingFrames(N);
- return FramesModifyingRegion.count(SCtx);
- }
+ // Region of interest corresponds to an IVar, exiting a method
+ // which could have written into that IVar, but did not.
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R,
+ const ObjCMethodCall &Call,
+ const ExplodedNode *N) override final;
+
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForCXXThis(PathSensitiveBugReport &R,
+ const CXXConstructorCall &Call,
+ const ExplodedNode *N) override final;
- /// Write to \c FramesModifyingRegion all stack frames along
- /// the path in the current stack frame which modify \c RegionOfInterest.
- void findModifyingFrames(const ExplodedNode *N);
+ virtual PathDiagnosticPieceRef
+ maybeEmitNoteForParameters(PathSensitiveBugReport &R, const CallEvent &Call,
+ const ExplodedNode *N) override final;
/// Consume the information on the no-store stack frame in order to
/// either emit a note or suppress the report enirely.
@@ -436,22 +572,18 @@ private:
const MemRegion *MatchedRegion, StringRef FirstElement,
bool FirstIsReferenceType, unsigned IndirectionLevel);
- /// Pretty-print region \p MatchedRegion to \p os.
- /// \return Whether printing succeeded.
- bool prettyPrintRegionName(StringRef FirstElement, bool FirstIsReferenceType,
+ bool prettyPrintRegionName(const RegionVector &FieldChain,
const MemRegion *MatchedRegion,
- const RegionVector &FieldChain,
- int IndirectionLevel,
+ StringRef FirstElement, bool FirstIsReferenceType,
+ unsigned IndirectionLevel,
llvm::raw_svector_ostream &os);
- /// Print first item in the chain, return new separator.
- static StringRef prettyPrintFirstElement(StringRef FirstElement,
- bool MoreItemsExpected,
- int IndirectionLevel,
- llvm::raw_svector_ostream &os);
+ StringRef prettyPrintFirstElement(StringRef FirstElement,
+ bool MoreItemsExpected,
+ int IndirectionLevel,
+ llvm::raw_svector_ostream &os);
};
-
-} // end of anonymous namespace
+} // namespace
/// \return Whether the method declaration \p Parent
/// syntactically has a binary operation writing into the ivar \p Ivar.
@@ -486,25 +618,6 @@ static bool potentiallyWritesIntoIvar(const Decl *Parent,
return false;
}
-/// Get parameters associated with runtime definition in order
-/// to get the correct parameter name.
-static ArrayRef<ParmVarDecl *> getCallParameters(CallEventRef<> Call) {
- // Use runtime definition, if available.
- RuntimeDefinition RD = Call->getRuntimeDefinition();
- if (const auto *FD = dyn_cast_or_null<FunctionDecl>(RD.getDecl()))
- return FD->parameters();
- if (const auto *MD = dyn_cast_or_null<ObjCMethodDecl>(RD.getDecl()))
- return MD->parameters();
-
- return Call->parameters();
-}
-
-/// \return whether \p Ty points to a const type, or is a const reference.
-static bool isPointerToConst(QualType Ty) {
- return !Ty->getPointeeType().isNull() &&
- Ty->getPointeeType().getCanonicalType().isConstQualified();
-}
-
/// Attempts to find the region of interest in a given CXX decl,
/// by either following the base classes or fields.
/// Dereferences fields up to a given recursion limit.
@@ -564,68 +677,66 @@ NoStoreFuncVisitor::findRegionOfInterestInRecord(
}
PathDiagnosticPieceRef
-NoStoreFuncVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BR,
- PathSensitiveBugReport &R) {
-
- const LocationContext *Ctx = N->getLocationContext();
- const StackFrameContext *SCtx = Ctx->getStackFrame();
- ProgramStateRef State = N->getState();
- auto CallExitLoc = N->getLocationAs<CallExitBegin>();
-
- // No diagnostic if region was modified inside the frame.
- if (!CallExitLoc || isRegionOfInterestModifiedInFrame(N))
- return nullptr;
-
- CallEventRef<> Call =
- BR.getStateManager().getCallEventManager().getCaller(SCtx, State);
-
- // Region of interest corresponds to an IVar, exiting a method
- // which could have written into that IVar, but did not.
- if (const auto *MC = dyn_cast<ObjCMethodCall>(Call)) {
- if (const auto *IvarR = dyn_cast<ObjCIvarRegion>(RegionOfInterest)) {
- const MemRegion *SelfRegion = MC->getReceiverSVal().getAsRegion();
- if (RegionOfInterest->isSubRegionOf(SelfRegion) &&
- potentiallyWritesIntoIvar(Call->getRuntimeDefinition().getDecl(),
- IvarR->getDecl()))
- return maybeEmitNote(R, *Call, N, {}, SelfRegion, "self",
- /*FirstIsReferenceType=*/false, 1);
- }
+NoStoreFuncVisitor::maybeEmitNoteForObjCSelf(PathSensitiveBugReport &R,
+ const ObjCMethodCall &Call,
+ const ExplodedNode *N) {
+ if (const auto *IvarR = dyn_cast<ObjCIvarRegion>(RegionOfInterest)) {
+ const MemRegion *SelfRegion = Call.getReceiverSVal().getAsRegion();
+ if (RegionOfInterest->isSubRegionOf(SelfRegion) &&
+ potentiallyWritesIntoIvar(Call.getRuntimeDefinition().getDecl(),
+ IvarR->getDecl()))
+ return maybeEmitNote(R, Call, N, {}, SelfRegion, "self",
+ /*FirstIsReferenceType=*/false, 1);
}
+ return nullptr;
+}
- if (const auto *CCall = dyn_cast<CXXConstructorCall>(Call)) {
- const MemRegion *ThisR = CCall->getCXXThisVal().getAsRegion();
- if (RegionOfInterest->isSubRegionOf(ThisR) &&
- !CCall->getDecl()->isImplicit())
- return maybeEmitNote(R, *Call, N, {}, ThisR, "this",
- /*FirstIsReferenceType=*/false, 1);
+PathDiagnosticPieceRef
+NoStoreFuncVisitor::maybeEmitNoteForCXXThis(PathSensitiveBugReport &R,
+ const CXXConstructorCall &Call,
+ const ExplodedNode *N) {
+ const MemRegion *ThisR = Call.getCXXThisVal().getAsRegion();
+ if (RegionOfInterest->isSubRegionOf(ThisR) && !Call.getDecl()->isImplicit())
+ return maybeEmitNote(R, Call, N, {}, ThisR, "this",
+ /*FirstIsReferenceType=*/false, 1);
+
+ // Do not generate diagnostics for not modified parameters in
+ // constructors.
+ return nullptr;
+}
- // Do not generate diagnostics for not modified parameters in
- // constructors.
- return nullptr;
- }
+/// \return whether \p Ty points to a const type, or is a const reference.
+static bool isPointerToConst(QualType Ty) {
+ return !Ty->getPointeeType().isNull() &&
+ Ty->getPointeeType().getCanonicalType().isConstQualified();
+}
- ArrayRef<ParmVarDecl *> parameters = getCallParameters(Call);
- for (unsigned I = 0; I < Call->getNumArgs() && I < parameters.size(); ++I) {
- const ParmVarDecl *PVD = parameters[I];
- SVal V = Call->getArgSVal(I);
+PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNoteForParameters(
+ PathSensitiveBugReport &R, const CallEvent &Call, const ExplodedNode *N) {
+ ArrayRef<ParmVarDecl *> Parameters = Call.parameters();
+ for (unsigned I = 0; I < Call.getNumArgs() && I < Parameters.size(); ++I) {
+ const ParmVarDecl *PVD = Parameters[I];
+ SVal V = Call.getArgSVal(I);
bool ParamIsReferenceType = PVD->getType()->isReferenceType();
std::string ParamName = PVD->getNameAsString();
- int IndirectionLevel = 1;
+ unsigned IndirectionLevel = 1;
QualType T = PVD->getType();
while (const MemRegion *MR = V.getAsRegion()) {
if (RegionOfInterest->isSubRegionOf(MR) && !isPointerToConst(T))
- return maybeEmitNote(R, *Call, N, {}, MR, ParamName,
+ return maybeEmitNote(R, Call, N, {}, MR, ParamName,
ParamIsReferenceType, IndirectionLevel);
QualType PT = T->getPointeeType();
if (PT.isNull() || PT->isVoidType())
break;
+ ProgramStateRef State = N->getState();
+
if (const RecordDecl *RD = PT->getAsRecordDecl())
if (Optional<RegionVector> P =
findRegionOfInterestInRecord(RD, State, MR))
- return maybeEmitNote(R, *Call, N, *P, RegionOfInterest, ParamName,
+ return maybeEmitNote(R, Call, N, *P, RegionOfInterest, ParamName,
ParamIsReferenceType, IndirectionLevel);
V = State->getSVal(MR, PT);
@@ -637,40 +748,11 @@ NoStoreFuncVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BR,
return nullptr;
}
-void NoStoreFuncVisitor::findModifyingFrames(const ExplodedNode *N) {
- assert(N->getLocationAs<CallExitBegin>());
- ProgramStateRef LastReturnState = N->getState();
- SVal ValueAtReturn = LastReturnState->getSVal(RegionOfInterest);
- const LocationContext *Ctx = N->getLocationContext();
- const StackFrameContext *OriginalSCtx = Ctx->getStackFrame();
-
- do {
- ProgramStateRef State = N->getState();
- auto CallExitLoc = N->getLocationAs<CallExitBegin>();
- if (CallExitLoc) {
- LastReturnState = State;
- ValueAtReturn = LastReturnState->getSVal(RegionOfInterest);
- }
-
- FramesModifyingCalculated.insert(N->getLocationContext()->getStackFrame());
-
- if (wasRegionOfInterestModifiedAt(RegionOfInterest, N, ValueAtReturn)) {
- const StackFrameContext *SCtx = N->getStackFrame();
- while (!SCtx->inTopFrame()) {
- auto p = FramesModifyingRegion.insert(SCtx);
- if (!p.second)
- break; // Frame and all its parents already inserted.
- SCtx = SCtx->getParent()->getStackFrame();
- }
- }
-
- // Stop calculation at the call to the current function.
- if (auto CE = N->getLocationAs<CallEnter>())
- if (CE->getCalleeContext() == OriginalSCtx)
- break;
-
- N = N->getFirstPred();
- } while (N);
+bool NoStoreFuncVisitor::wasModifiedBeforeCallExit(
+ const ExplodedNode *CurrN, const ExplodedNode *CallExitBeginN) {
+ return ::wasRegionOfInterestModifiedAt(
+ RegionOfInterest, CurrN,
+ CallExitBeginN->getState()->getSVal(RegionOfInterest));
}
static llvm::StringLiteral WillBeUsedForACondition =
@@ -681,27 +763,6 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote(
const RegionVector &FieldChain, const MemRegion *MatchedRegion,
StringRef FirstElement, bool FirstIsReferenceType,
unsigned IndirectionLevel) {
- // Optimistically suppress uninitialized value bugs that result
- // from system headers having a chance to initialize the value
- // but failing to do so. It's too unlikely a system header's fault.
- // It's much more likely a situation in which the function has a failure
- // mode that the user decided not to check. If we want to hunt such
- // omitted checks, we should provide an explicit function-specific note
- // describing the precondition under which the function isn't supposed to
- // initialize its out-parameter, and additionally check that such
- // precondition can actually be fulfilled on the current path.
- if (Call.isInSystemHeader()) {
- // We make an exception for system header functions that have no branches.
- // Such functions unconditionally fail to initialize the variable.
- // If they call other functions that have more paths within them,
- // this suppression would still apply when we visit these inner functions.
- // One common example of a standard function that doesn't ever initialize
- // its out parameter is operator placement new; it's up to the follow-up
- // constructor (if any) to initialize the memory.
- if (!N->getStackFrame()->getCFG()->isLinear())
- R.markInvalid(getTag(), nullptr);
- return nullptr;
- }
PathDiagnosticLocation L =
PathDiagnosticLocation::create(N->getLocation(), SM);
@@ -717,8 +778,8 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote(
os << "Returning without writing to '";
// Do not generate the note if failed to pretty-print.
- if (!prettyPrintRegionName(FirstElement, FirstIsReferenceType, MatchedRegion,
- FieldChain, IndirectionLevel, os))
+ if (!prettyPrintRegionName(FieldChain, MatchedRegion, FirstElement,
+ FirstIsReferenceType, IndirectionLevel, os))
return nullptr;
os << "'";
@@ -727,11 +788,11 @@ PathDiagnosticPieceRef NoStoreFuncVisitor::maybeEmitNote(
return std::make_shared<PathDiagnosticEventPiece>(L, os.str());
}
-bool NoStoreFuncVisitor::prettyPrintRegionName(StringRef FirstElement,
- bool FirstIsReferenceType,
+bool NoStoreFuncVisitor::prettyPrintRegionName(const RegionVector &FieldChain,
const MemRegion *MatchedRegion,
- const RegionVector &FieldChain,
- int IndirectionLevel,
+ StringRef FirstElement,
+ bool FirstIsReferenceType,
+ unsigned IndirectionLevel,
llvm::raw_svector_ostream &os) {
if (FirstIsReferenceType)
@@ -754,7 +815,7 @@ bool NoStoreFuncVisitor::prettyPrintRegionName(StringRef FirstElement,
// Just keep going up to the base region.
// Element regions may appear due to casts.
- if (isa<CXXBaseObjectRegion>(R) || isa<CXXTempObjectRegion>(R))
+ if (isa<CXXBaseObjectRegion, CXXTempObjectRegion>(R))
continue;
if (Sep.empty())
@@ -1153,7 +1214,7 @@ class StoreSiteFinder final : public TrackingBugReporterVisitor {
public:
/// \param V We're searching for the store where \c R received this value.
/// \param R The region we're tracking.
- /// \param TKind May limit the amount of notes added to the bug report.
+ /// \param Options Tracking behavior options.
/// \param OriginSFC Only adds notes when the last store happened in a
/// different stackframe to this one. Disregarded if the tracking kind
/// is thorough.
@@ -2674,9 +2735,8 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex,
const Expr *OriginalExpr = Ex;
Ex = Ex->IgnoreParenCasts();
- if (isa<GNUNullExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) ||
- isa<CXXBoolLiteralExpr>(Ex) || isa<IntegerLiteral>(Ex) ||
- isa<FloatingLiteral>(Ex)) {
+ if (isa<GNUNullExpr, ObjCBoolLiteralExpr, CXXBoolLiteralExpr, IntegerLiteral,
+ FloatingLiteral>(Ex)) {
// Use heuristics to determine if the expression is a macro
// expanding to a literal and if so, use the macro's name.
SourceLocation BeginLoc = OriginalExpr->getBeginLoc();
diff --git a/clang/lib/StaticAnalyzer/Core/CallDescription.cpp b/clang/lib/StaticAnalyzer/Core/CallDescription.cpp
new file mode 100644
index 000000000000..810fe365d021
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Core/CallDescription.cpp
@@ -0,0 +1,146 @@
+//===- CallDescription.cpp - function/method call matching --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file defines a generic mechanism for matching for function and
+/// method calls of C, C++, and Objective-C languages. Instances of these
+/// classes are frequently used together with the CallEvent classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include <iterator>
+
+using namespace llvm;
+using namespace clang;
+
+using MaybeCount = Optional<unsigned>;
+
+// A constructor helper.
+static MaybeCount readRequiredParams(MaybeCount RequiredArgs,
+ MaybeCount RequiredParams) {
+ if (RequiredParams)
+ return RequiredParams;
+ if (RequiredArgs)
+ return RequiredArgs;
+ return None;
+}
+
+ento::CallDescription::CallDescription(CallDescriptionFlags Flags,
+ ArrayRef<const char *> QualifiedName,
+ MaybeCount RequiredArgs /*= None*/,
+ MaybeCount RequiredParams /*= None*/)
+ : RequiredArgs(RequiredArgs),
+ RequiredParams(readRequiredParams(RequiredArgs, RequiredParams)),
+ Flags(Flags) {
+ assert(!QualifiedName.empty());
+ this->QualifiedName.reserve(QualifiedName.size());
+ llvm::copy(QualifiedName, std::back_inserter(this->QualifiedName));
+}
+
+/// Construct a CallDescription with default flags.
+ento::CallDescription::CallDescription(ArrayRef<const char *> QualifiedName,
+ MaybeCount RequiredArgs /*= None*/,
+ MaybeCount RequiredParams /*= None*/)
+ : CallDescription(CDF_None, QualifiedName, RequiredArgs, RequiredParams) {}
+
+bool ento::CallDescription::matches(const CallEvent &Call) const {
+ // FIXME: Add ObjC Message support.
+ if (Call.getKind() == CE_ObjCMessage)
+ return false;
+
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
+ if (!FD)
+ return false;
+
+ if (Flags & CDF_MaybeBuiltin) {
+ return CheckerContext::isCLibraryFunction(FD, getFunctionName()) &&
+ (!RequiredArgs || *RequiredArgs <= Call.getNumArgs()) &&
+ (!RequiredParams || *RequiredParams <= Call.parameters().size());
+ }
+
+ if (!II.hasValue()) {
+ II = &Call.getState()->getStateManager().getContext().Idents.get(
+ getFunctionName());
+ }
+
+ const auto MatchNameOnly = [](const CallDescription &CD,
+ const NamedDecl *ND) -> bool {
+ DeclarationName Name = ND->getDeclName();
+ if (const auto *II = Name.getAsIdentifierInfo())
+ return II == CD.II.getValue(); // Fast case.
+
+ // Fallback to the slow stringification and comparison for:
+ // C++ overloaded operators, constructors, destructors, etc.
+ // FIXME This comparison is way SLOWER than comparing pointers.
+ // At some point in the future, we should compare FunctionDecl pointers.
+ return Name.getAsString() == CD.getFunctionName();
+ };
+
+ const auto ExactMatchArgAndParamCounts =
+ [](const CallEvent &Call, const CallDescription &CD) -> bool {
+ const bool ArgsMatch =
+ !CD.RequiredArgs || *CD.RequiredArgs == Call.getNumArgs();
+ const bool ParamsMatch =
+ !CD.RequiredParams || *CD.RequiredParams == Call.parameters().size();
+ return ArgsMatch && ParamsMatch;
+ };
+
+ const auto MatchQualifiedNameParts = [](const CallDescription &CD,
+ const Decl *D) -> bool {
+ const auto FindNextNamespaceOrRecord =
+ [](const DeclContext *Ctx) -> const DeclContext * {
+ while (Ctx && !isa<NamespaceDecl, RecordDecl>(Ctx))
+ Ctx = Ctx->getParent();
+ return Ctx;
+ };
+
+ auto QualifierPartsIt = CD.begin_qualified_name_parts();
+ const auto QualifierPartsEndIt = CD.end_qualified_name_parts();
+
+ // Match namespace and record names. Skip unrelated names if they don't
+ // match.
+ const DeclContext *Ctx = FindNextNamespaceOrRecord(D->getDeclContext());
+ for (; Ctx && QualifierPartsIt != QualifierPartsEndIt;
+ Ctx = FindNextNamespaceOrRecord(Ctx->getParent())) {
+ // If not matched just continue and try matching for the next one.
+ if (cast<NamedDecl>(Ctx)->getName() != *QualifierPartsIt)
+ continue;
+ ++QualifierPartsIt;
+ }
+
+ // We matched if we consumed all expected qualifier segments.
+ return QualifierPartsIt == QualifierPartsEndIt;
+ };
+
+ // Let's start matching...
+ if (!ExactMatchArgAndParamCounts(Call, *this))
+ return false;
+
+ if (!MatchNameOnly(*this, FD))
+ return false;
+
+ if (!hasQualifiedNameParts())
+ return true;
+
+ return MatchQualifiedNameParts(*this, FD);
+}
+
+ento::CallDescriptionSet::CallDescriptionSet(
+ std::initializer_list<CallDescription> &&List) {
+ Impl.LinearMap.reserve(List.size());
+ for (const CallDescription &CD : List)
+ Impl.LinearMap.push_back({CD, /*unused*/ true});
+}
+
+bool ento::CallDescriptionSet::contains(const CallEvent &Call) const {
+ return static_cast<bool>(Impl.lookup(Call));
+}
diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
index 3785f498414f..764dad3e7ab4 100644
--- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -36,6 +36,7 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/CrossTU/CrossTranslationUnit.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h"
@@ -73,26 +74,7 @@ QualType CallEvent::getResultType() const {
const Expr *E = getOriginExpr();
if (!E)
return Ctx.VoidTy;
- assert(E);
-
- QualType ResultTy = E->getType();
-
- // A function that returns a reference to 'int' will have a result type
- // of simply 'int'. Check the origin expr's value kind to recover the
- // proper type.
- switch (E->getValueKind()) {
- case VK_LValue:
- ResultTy = Ctx.getLValueReferenceType(ResultTy);
- break;
- case VK_XValue:
- ResultTy = Ctx.getRValueReferenceType(ResultTy);
- break;
- case VK_PRValue:
- // No adjustment is necessary.
- break;
- }
-
- return ResultTy;
+ return Ctx.getReferenceQualifiedType(E);
}
static bool isCallback(QualType T) {
@@ -321,64 +303,6 @@ ProgramPoint CallEvent::getProgramPoint(bool IsPreVisit,
return PostImplicitCall(D, Loc, getLocationContext(), Tag);
}
-bool CallEvent::isCalled(const CallDescription &CD) const {
- // FIXME: Add ObjC Message support.
- if (getKind() == CE_ObjCMessage)
- return false;
-
- const IdentifierInfo *II = getCalleeIdentifier();
- if (!II)
- return false;
- const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(getDecl());
- if (!FD)
- return false;
-
- if (CD.Flags & CDF_MaybeBuiltin) {
- return CheckerContext::isCLibraryFunction(FD, CD.getFunctionName()) &&
- (!CD.RequiredArgs || CD.RequiredArgs <= getNumArgs()) &&
- (!CD.RequiredParams || CD.RequiredParams <= parameters().size());
- }
-
- if (!CD.IsLookupDone) {
- CD.IsLookupDone = true;
- CD.II = &getState()->getStateManager().getContext().Idents.get(
- CD.getFunctionName());
- }
-
- if (II != CD.II)
- return false;
-
- // If CallDescription provides prefix names, use them to improve matching
- // accuracy.
- if (CD.QualifiedName.size() > 1 && FD) {
- const DeclContext *Ctx = FD->getDeclContext();
- // See if we'll be able to match them all.
- size_t NumUnmatched = CD.QualifiedName.size() - 1;
- for (; Ctx && isa<NamedDecl>(Ctx); Ctx = Ctx->getParent()) {
- if (NumUnmatched == 0)
- break;
-
- if (const auto *ND = dyn_cast<NamespaceDecl>(Ctx)) {
- if (ND->getName() == CD.QualifiedName[NumUnmatched - 1])
- --NumUnmatched;
- continue;
- }
-
- if (const auto *RD = dyn_cast<RecordDecl>(Ctx)) {
- if (RD->getName() == CD.QualifiedName[NumUnmatched - 1])
- --NumUnmatched;
- continue;
- }
- }
-
- if (NumUnmatched > 0)
- return false;
- }
-
- return (!CD.RequiredArgs || CD.RequiredArgs == getNumArgs()) &&
- (!CD.RequiredParams || CD.RequiredParams == parameters().size());
-}
-
SVal CallEvent::getArgSVal(unsigned Index) const {
const Expr *ArgE = getArgExpr(Index);
if (!ArgE)
@@ -406,7 +330,6 @@ void CallEvent::dump(raw_ostream &Out) const {
ASTContext &Ctx = getState()->getStateManager().getContext();
if (const Expr *E = getOriginExpr()) {
E->printPretty(Out, nullptr, Ctx.getPrintingPolicy());
- Out << "\n";
return;
}
@@ -420,9 +343,7 @@ void CallEvent::dump(raw_ostream &Out) const {
}
bool CallEvent::isCallStmt(const Stmt *S) {
- return isa<CallExpr>(S) || isa<ObjCMessageExpr>(S)
- || isa<CXXConstructExpr>(S)
- || isa<CXXNewExpr>(S);
+ return isa<CallExpr, ObjCMessageExpr, CXXConstructExpr, CXXNewExpr>(S);
}
QualType CallEvent::getDeclaredResultType(const Decl *D) {
@@ -676,7 +597,7 @@ bool AnyFunctionCall::argumentsMayEscape() const {
// - NSXXInsertXX, for example NSMapInsertIfAbsent, since they can
// be deallocated by NSMapRemove.
- if (FName.startswith("NS") && (FName.find("Insert") != StringRef::npos))
+ if (FName.startswith("NS") && FName.contains("Insert"))
return true;
// - Many CF containers allow objects to escape through custom
@@ -1058,12 +979,12 @@ const PseudoObjectExpr *ObjCMethodCall::getContainingPseudoObjectExpr() const {
static const Expr *
getSyntacticFromForPseudoObjectExpr(const PseudoObjectExpr *POE) {
- const Expr *Syntactic = POE->getSyntacticForm();
+ const Expr *Syntactic = POE->getSyntacticForm()->IgnoreParens();
// This handles the funny case of assigning to the result of a getter.
// This can happen if the getter returns a non-const reference.
if (const auto *BO = dyn_cast<BinaryOperator>(Syntactic))
- Syntactic = BO->getLHS();
+ Syntactic = BO->getLHS()->IgnoreParens();
return Syntactic;
}
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
index 3d64ce453479..4c684c3ffd9b 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -38,7 +38,7 @@ StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const {
}
StringRef CheckerContext::getDeclDescription(const Decl *D) {
- if (isa<ObjCMethodDecl>(D) || isa<CXXMethodDecl>(D))
+ if (isa<ObjCMethodDecl, CXXMethodDecl>(D))
return "method";
if (isa<BlockDecl>(D))
return "anonymous block";
@@ -55,7 +55,7 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
if (Name.empty())
return true;
StringRef BName = FD->getASTContext().BuiltinInfo.getName(BId);
- if (BName.find(Name) != StringRef::npos)
+ if (BName.contains(Name))
return true;
}
@@ -83,11 +83,10 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
if (FName.equals(Name))
return true;
- if (FName.startswith("__inline") && (FName.find(Name) != StringRef::npos))
+ if (FName.startswith("__inline") && FName.contains(Name))
return true;
- if (FName.startswith("__") && FName.endswith("_chk") &&
- FName.find(Name) != StringRef::npos)
+ if (FName.startswith("__") && FName.endswith("_chk") && FName.contains(Name))
return true;
return false;
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
index e09399a83589..94287b7992dd 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include <cassert>
#include <vector>
@@ -655,7 +656,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst,
ExprEngine &Eng,
const EvalCallOptions &CallOpts) {
for (auto *const Pred : Src) {
- bool anyEvaluated = false;
+ Optional<CheckerNameRef> evaluatorChecker;
ExplodedNodeSet checkDst;
NodeBuilder B(Pred, checkDst, Eng.getBuilderContext());
@@ -674,10 +675,26 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst,
CheckerContext C(B, Eng, Pred, L);
evaluated = EvalCallChecker(Call, C);
}
- assert(!(evaluated && anyEvaluated)
- && "There are more than one checkers evaluating the call");
+#ifndef NDEBUG
+ if (evaluated && evaluatorChecker) {
+ const auto toString = [](const CallEvent &Call) -> std::string {
+ std::string Buf;
+ llvm::raw_string_ostream OS(Buf);
+ Call.dump(OS);
+ OS.flush();
+ return Buf;
+ };
+ std::string AssertionMessage = llvm::formatv(
+ "The '{0}' call has been already evaluated by the {1} checker, "
+ "while the {2} checker also tried to evaluate the same call. At "
+ "most one checker supposed to evaluate a call.",
+ toString(Call), evaluatorChecker->getName(),
+ EvalCallChecker.Checker->getCheckerName());
+ llvm_unreachable(AssertionMessage.c_str());
+ }
+#endif
if (evaluated) {
- anyEvaluated = true;
+ evaluatorChecker = EvalCallChecker.Checker->getCheckerName();
Dst.insert(checkDst);
#ifdef NDEBUG
break; // on release don't check that no other checker also evals.
@@ -686,7 +703,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst,
}
// If none of the checkers evaluated the call, ask ExprEngine to handle it.
- if (!anyEvaluated) {
+ if (!evaluatorChecker) {
NodeBuilder B(Pred, Dst, Eng.getBuilderContext());
Eng.defaultEvalCall(B, Pred, Call, CallOpts);
}
diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp
index ee7474592528..64e915a09ceb 100644
--- a/clang/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp
@@ -88,7 +88,7 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry,
const Stmt *S = Entry.getStmt();
assert(!isa<ObjCForCollectionStmt>(S) &&
"Use ExprEngine::hasMoreIteration()!");
- assert((isa<Expr>(S) || isa<ReturnStmt>(S)) &&
+ assert((isa<Expr, ReturnStmt>(S)) &&
"Environment can only argue about Exprs, since only they express "
"a value! Any non-expression statement stored in Environment is a "
"result of a hack!");
diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 635495e9bf60..294572b7dbe4 100644
--- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -50,8 +50,7 @@ ExplodedGraph::~ExplodedGraph() = default;
bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) {
if (!Ex->isLValue())
return false;
- return isa<DeclRefExpr>(Ex) || isa<MemberExpr>(Ex) ||
- isa<ObjCIvarRefExpr>(Ex) || isa<ArraySubscriptExpr>(Ex);
+ return isa<DeclRefExpr, MemberExpr, ObjCIvarRefExpr, ArraySubscriptExpr>(Ex);
}
bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 66332d3b848c..12b005d43c55 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -393,8 +393,7 @@ ProgramStateRef ExprEngine::createTemporaryRegionIfNeeded(
SVal BaseReg = Reg;
// Make the necessary adjustments to obtain the sub-object.
- for (auto I = Adjustments.rbegin(), E = Adjustments.rend(); I != E; ++I) {
- const SubobjectAdjustment &Adj = *I;
+ for (const SubobjectAdjustment &Adj : llvm::reverse(Adjustments)) {
switch (Adj.Kind) {
case SubobjectAdjustment::DerivedToBaseAdjustment:
Reg = StoreMgr.evalDerivedToBase(Reg, Adj.DerivedToBase.BasePath);
@@ -1297,8 +1296,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::OMPInteropDirectiveClass:
case Stmt::OMPDispatchDirectiveClass:
case Stmt::OMPMaskedDirectiveClass:
+ case Stmt::OMPGenericLoopDirectiveClass:
case Stmt::CapturedStmtClass:
- case Stmt::OMPUnrollDirectiveClass: {
+ case Stmt::OMPUnrollDirectiveClass:
+ case Stmt::OMPMetaDirectiveClass: {
const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
Engine.addAbortedBlock(node, currBldrCtx->getBlock());
break;
@@ -1988,8 +1989,7 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 &&
AMgr.options.ShouldWidenLoops) {
const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
- if (!(Term &&
- (isa<ForStmt>(Term) || isa<WhileStmt>(Term) || isa<DoStmt>(Term))))
+ if (!isa_and_nonnull<ForStmt, WhileStmt, DoStmt>(Term))
return;
// Widen.
const LocationContext *LCtx = Pred->getLocationContext();
@@ -2265,7 +2265,7 @@ void ExprEngine::processBranch(const Stmt *Condition,
continue;
}
if (StTrue && StFalse)
- assert(!isa<ObjCForCollectionStmt>(Condition));;
+ assert(!isa<ObjCForCollectionStmt>(Condition));
// Process the true branch.
if (builder.isFeasible(true)) {
@@ -2593,7 +2593,7 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D,
ProgramPoint::PostLValueKind);
return;
}
- if (isa<FieldDecl>(D) || isa<IndirectFieldDecl>(D)) {
+ if (isa<FieldDecl, IndirectFieldDecl>(D)) {
// Delegate all work related to pointer to members to the surrounding
// operator&.
return;
@@ -2670,7 +2670,7 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
// Handle static member variables and enum constants accessed via
// member syntax.
- if (isa<VarDecl>(Member) || isa<EnumConstantDecl>(Member)) {
+ if (isa<VarDecl, EnumConstantDecl>(Member)) {
for (const auto I : CheckedSet)
VisitCommonDeclRefExpr(M, Member, I, EvalSet);
} else {
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 7ad3dca831ac..69d67cf9b465 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -550,7 +550,7 @@ void ExprEngine::VisitCompoundLiteralExpr(const CompoundLiteralExpr *CL,
const Expr *Init = CL->getInitializer();
SVal V = State->getSVal(CL->getInitializer(), LCtx);
- if (isa<CXXConstructExpr>(Init) || isa<CXXStdInitializerListExpr>(Init)) {
+ if (isa<CXXConstructExpr, CXXStdInitializerListExpr>(Init)) {
// No work needed. Just pass the value up to this expression.
} else {
assert(isa<InitListExpr>(Init));
@@ -757,9 +757,8 @@ void ExprEngine::VisitInitListExpr(const InitListExpr *IE,
return;
}
- for (InitListExpr::const_reverse_iterator it = IE->rbegin(),
- ei = IE->rend(); it != ei; ++it) {
- SVal V = state->getSVal(cast<Expr>(*it), LCtx);
+ for (const Stmt *S : llvm::reverse(*IE)) {
+ SVal V = state->getSVal(cast<Expr>(S), LCtx);
vals = getBasicVals().prependSVal(V, vals);
}
@@ -984,8 +983,7 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, ExplodedNode *Pred,
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Ex)) {
const ValueDecl *VD = DRE->getDecl();
- if (isa<CXXMethodDecl>(VD) || isa<FieldDecl>(VD) ||
- isa<IndirectFieldDecl>(VD)) {
+ if (isa<CXXMethodDecl, FieldDecl, IndirectFieldDecl>(VD)) {
ProgramStateRef State = (*I)->getState();
const LocationContext *LCtx = (*I)->getLocationContext();
SVal SV = svalBuilder.getMemberPointer(cast<NamedDecl>(VD));
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index cab65687444b..ba105f34a915 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -531,7 +531,7 @@ void ExprEngine::handleConstructor(const Expr *E,
// FIXME: Instead of relying on the ParentMap, we should have the
// trigger-statement (InitListExpr in this case) passed down from CFG or
// otherwise always available during construction.
- if (dyn_cast_or_null<InitListExpr>(LCtx->getParentMap().getParent(E))) {
+ if (isa_and_nonnull<InitListExpr>(LCtx->getParentMap().getParent(E))) {
MemRegionManager &MRMgr = getSValBuilder().getRegionManager();
Target = loc::MemRegionVal(MRMgr.getCXXTempObjectRegion(E, LCtx));
CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true;
diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index 64fc32ea7554..3b847d6f0d87 100644
--- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -27,6 +27,8 @@
#include "clang/Rewrite/Core/Rewriter.h"
#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
@@ -57,6 +59,8 @@ using namespace ento;
namespace {
+class ArrowMap;
+
class HTMLDiagnostics : public PathDiagnosticConsumer {
PathDiagnosticConsumerOptions DiagOpts;
std::string Directory;
@@ -77,60 +81,93 @@ public:
void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
FilesMade *filesMade) override;
- StringRef getName() const override {
- return "HTMLDiagnostics";
- }
+ StringRef getName() const override { return "HTMLDiagnostics"; }
bool supportsCrossFileDiagnostics() const override {
return SupportsCrossFileDiagnostics;
}
- unsigned ProcessMacroPiece(raw_ostream &os,
- const PathDiagnosticMacroPiece& P,
+ unsigned ProcessMacroPiece(raw_ostream &os, const PathDiagnosticMacroPiece &P,
unsigned num);
+ unsigned ProcessControlFlowPiece(Rewriter &R, FileID BugFileID,
+ const PathDiagnosticControlFlowPiece &P,
+ unsigned Number);
+
void HandlePiece(Rewriter &R, FileID BugFileID, const PathDiagnosticPiece &P,
const std::vector<SourceRange> &PopUpRanges, unsigned num,
unsigned max);
- void HighlightRange(Rewriter& R, FileID BugFileID, SourceRange Range,
+ void HighlightRange(Rewriter &R, FileID BugFileID, SourceRange Range,
const char *HighlightStart = "<span class=\"mrange\">",
const char *HighlightEnd = "</span>");
- void ReportDiag(const PathDiagnostic& D,
- FilesMade *filesMade);
+ void ReportDiag(const PathDiagnostic &D, FilesMade *filesMade);
// Generate the full HTML report
- std::string GenerateHTML(const PathDiagnostic& D, Rewriter &R,
- const SourceManager& SMgr, const PathPieces& path,
+ std::string GenerateHTML(const PathDiagnostic &D, Rewriter &R,
+ const SourceManager &SMgr, const PathPieces &path,
const char *declName);
// Add HTML header/footers to file specified by FID
- void FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
- const SourceManager& SMgr, const PathPieces& path,
+ void FinalizeHTML(const PathDiagnostic &D, Rewriter &R,
+ const SourceManager &SMgr, const PathPieces &path,
FileID FID, const FileEntry *Entry, const char *declName);
// Rewrite the file specified by FID with HTML formatting.
- void RewriteFile(Rewriter &R, const PathPieces& path, FileID FID);
+ void RewriteFile(Rewriter &R, const PathPieces &path, FileID FID);
+ PathGenerationScheme getGenerationScheme() const override {
+ return Everything;
+ }
private:
+ void addArrowSVGs(Rewriter &R, FileID BugFileID,
+ const ArrowMap &ArrowIndices);
+
/// \return Javascript for displaying shortcuts help;
StringRef showHelpJavascript();
/// \return Javascript for navigating the HTML report using j/k keys.
StringRef generateKeyboardNavigationJavascript();
+ /// \return Javascript for drawing control-flow arrows.
+ StringRef generateArrowDrawingJavascript();
+
/// \return JavaScript for an option to only show relevant lines.
- std::string showRelevantLinesJavascript(
- const PathDiagnostic &D, const PathPieces &path);
+ std::string showRelevantLinesJavascript(const PathDiagnostic &D,
+ const PathPieces &path);
/// Write executed lines from \p D in JSON format into \p os.
- void dumpCoverageData(const PathDiagnostic &D,
- const PathPieces &path,
+ void dumpCoverageData(const PathDiagnostic &D, const PathPieces &path,
llvm::raw_string_ostream &os);
};
+bool isArrowPiece(const PathDiagnosticPiece &P) {
+ return isa<PathDiagnosticControlFlowPiece>(P) && P.getString().empty();
+}
+
+unsigned getPathSizeWithoutArrows(const PathPieces &Path) {
+ unsigned TotalPieces = Path.size();
+ unsigned TotalArrowPieces = llvm::count_if(
+ Path, [](const PathDiagnosticPieceRef &P) { return isArrowPiece(*P); });
+ return TotalPieces - TotalArrowPieces;
+}
+
+class ArrowMap : public std::vector<unsigned> {
+ using Base = std::vector<unsigned>;
+
+public:
+ ArrowMap(unsigned Size) : Base(Size, 0) {}
+ unsigned getTotalNumberOfArrows() const { return at(0); }
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const ArrowMap &Indices) {
+ OS << "[ ";
+ llvm::interleave(Indices, OS, ",");
+ return OS << " ]";
+}
+
} // namespace
void ento::createHTMLDiagnosticConsumer(
@@ -208,6 +245,18 @@ void HTMLDiagnostics::FlushDiagnosticsImpl(
ReportDiag(*Diag, filesMade);
}
+static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D,
+ const Preprocessor &PP) {
+ SourceManager &SMgr = PP.getSourceManager();
+ PathDiagnosticLocation UPDLoc = D.getUniqueingLoc();
+ FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid()
+ ? UPDLoc.asLocation()
+ : D.getLocation().asLocation()),
+ SMgr);
+ return getIssueHash(L, D.getCheckerName(), D.getBugType(),
+ D.getDeclWithIssue(), PP.getLangOpts());
+}
+
void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
FilesMade *filesMade) {
// Create the HTML directory if it is missing.
@@ -234,11 +283,6 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
// Create a new rewriter to generate HTML.
Rewriter R(const_cast<SourceManager&>(SMgr), PP.getLangOpts());
- // The file for the first path element is considered the main report file, it
- // will usually be equivalent to SMgr.getMainFileID(); however, it might be a
- // header when -analyzer-opt-analyze-headers is used.
- FileID ReportFile = path.front()->getLocation().asLocation().getExpansionLoc().getFileID();
-
// Get the function/method name
SmallString<128> declName("unknown");
int offsetDecl = 0;
@@ -265,46 +309,52 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
// Create a path for the target HTML file.
int FD;
- SmallString<128> Model, ResultPath;
-
- if (!DiagOpts.ShouldWriteStableReportFilename) {
- llvm::sys::path::append(Model, Directory, "report-%%%%%%.html");
- if (std::error_code EC =
- llvm::sys::fs::make_absolute(Model)) {
- llvm::errs() << "warning: could not make '" << Model
- << "' absolute: " << EC.message() << '\n';
- return;
- }
- if (std::error_code EC = llvm::sys::fs::createUniqueFile(
- Model, FD, ResultPath, llvm::sys::fs::OF_Text)) {
- llvm::errs() << "warning: could not create file in '" << Directory
- << "': " << EC.message() << '\n';
- return;
- }
- } else {
- int i = 1;
- std::error_code EC;
- do {
- // Find a filename which is not already used
- const FileEntry* Entry = SMgr.getFileEntryForID(ReportFile);
- std::stringstream filename;
- Model = "";
- filename << "report-"
- << llvm::sys::path::filename(Entry->getName()).str()
- << "-" << declName.c_str()
- << "-" << offsetDecl
- << "-" << i << ".html";
- llvm::sys::path::append(Model, Directory,
- filename.str());
- EC = llvm::sys::fs::openFileForReadWrite(
- Model, FD, llvm::sys::fs::CD_CreateNew, llvm::sys::fs::OF_None);
- if (EC && EC != llvm::errc::file_exists) {
- llvm::errs() << "warning: could not create file '" << Model
- << "': " << EC.message() << '\n';
- return;
- }
- i++;
- } while (EC);
+
+ SmallString<128> FileNameStr;
+ llvm::raw_svector_ostream FileName(FileNameStr);
+ FileName << "report-";
+
+ // Historically, neither the stable report filename nor the unstable report
+ // filename were actually stable. That said, the stable report filename
+ // was more stable because it was mostly composed of information
+ // about the bug report instead of being completely random.
+ // Now both stable and unstable report filenames are in fact stable
+ // but the stable report filename is still more verbose.
+ if (DiagOpts.ShouldWriteVerboseReportFilename) {
+ // FIXME: This code relies on knowing what constitutes the issue hash.
+ // Otherwise deduplication won't work correctly.
+ FileID ReportFile =
+ path.back()->getLocation().asLocation().getExpansionLoc().getFileID();
+
+ const FileEntry *Entry = SMgr.getFileEntryForID(ReportFile);
+
+ FileName << llvm::sys::path::filename(Entry->getName()).str() << "-"
+ << declName.c_str() << "-" << offsetDecl << "-";
+ }
+
+ FileName << StringRef(getIssueHash(D, PP)).substr(0, 6).str() << ".html";
+
+ SmallString<128> ResultPath;
+ llvm::sys::path::append(ResultPath, Directory, FileName.str());
+ if (std::error_code EC = llvm::sys::fs::make_absolute(ResultPath)) {
+ llvm::errs() << "warning: could not make '" << ResultPath
+ << "' absolute: " << EC.message() << '\n';
+ return;
+ }
+
+ if (std::error_code EC = llvm::sys::fs::openFileForReadWrite(
+ ResultPath, FD, llvm::sys::fs::CD_CreateNew,
+ llvm::sys::fs::OF_Text)) {
+ // Existence of the file corresponds to the situation where a different
+ // Clang instance has emitted a bug report with the same issue hash.
+ // This is an entirely normal situation that does not deserve a warning,
+ // as apart from hash collisions this can happen because the reports
+ // are in fact similar enough to be considered duplicates of each other.
+ if (EC != llvm::errc::file_exists) {
+ llvm::errs() << "warning: could not create file in '" << Directory
+ << "': " << EC.message() << '\n';
+ }
+ return;
}
llvm::raw_fd_ostream os(FD, true);
@@ -452,10 +502,11 @@ window.addEventListener("keydown", function (event) {
if (event.defaultPrevented) {
return;
}
- if (event.key == "S") {
+ // SHIFT + S
+ if (event.shiftKey && event.keyCode == 83) {
var checked = document.getElementsByName("showCounterexample")[0].checked;
filterCounterexample(!checked);
- document.getElementsByName("showCounterexample")[0].checked = !checked;
+ document.getElementsByName("showCounterexample")[0].click();
} else {
return;
}
@@ -475,6 +526,11 @@ document.addEventListener("DOMContentLoaded", function() {
<label for="showCounterexample">
Show only relevant lines
</label>
+ <input type="checkbox" name="showArrows"
+ id="showArrows" style="margin-left: 10px" />
+ <label for="showArrows">
+ Show control flow arrows
+ </label>
</form>
)<<<";
@@ -503,6 +559,9 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
R.InsertTextBefore(SMgr.getLocForStartOfFile(FID),
generateKeyboardNavigationJavascript());
+ R.InsertTextBefore(SMgr.getLocForStartOfFile(FID),
+ generateArrowDrawingJavascript());
+
// Checkbox and javascript for filtering the output to the counterexample.
R.InsertTextBefore(SMgr.getLocForStartOfFile(FID),
showRelevantLinesJavascript(D, path));
@@ -570,6 +629,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
<a href="#" onclick="toggleHelp(); return false;">Close</a>
</div>
)<<<";
+
R.InsertTextBefore(SMgr.getLocForStartOfFile(FID), os.str());
}
@@ -591,7 +651,6 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
? UPDLoc.asLocation()
: D.getLocation().asLocation()),
SMgr);
- const Decl *DeclWithIssue = D.getDeclWithIssue();
StringRef BugCategory = D.getCategory();
if (!BugCategory.empty())
@@ -603,9 +662,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
os << "\n<!-- FUNCTIONNAME " << declName << " -->\n";
- os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT "
- << getIssueHash(L, D.getCheckerName(), D.getBugType(), DeclWithIssue,
- PP.getLangOpts())
+ os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " << getIssueHash(D, PP)
<< " -->\n";
os << "\n<!-- BUGLINE "
@@ -616,7 +673,7 @@ void HTMLDiagnostics::FinalizeHTML(const PathDiagnostic& D, Rewriter &R,
<< ColumnNumber
<< " -->\n";
- os << "\n<!-- BUGPATHLENGTH " << path.size() << " -->\n";
+ os << "\n<!-- BUGPATHLENGTH " << getPathSizeWithoutArrows(path) << " -->\n";
// Mark the end of the tags.
os << "\n<!-- BUGMETAEND -->\n";
@@ -695,8 +752,7 @@ static void HandlePopUpPieceEndTag(Rewriter &R,
Out << "</div></td><td>" << Piece.getString() << "</td></tr>";
// If no report made at this range mark the variable and add the end tags.
- if (std::find(PopUpRanges.begin(), PopUpRanges.end(), Range) ==
- PopUpRanges.end()) {
+ if (!llvm::is_contained(PopUpRanges, Range)) {
// Store that we create a report at this range.
PopUpRanges.push_back(Range);
@@ -711,30 +767,33 @@ static void HandlePopUpPieceEndTag(Rewriter &R,
}
}
-void HTMLDiagnostics::RewriteFile(Rewriter &R,
- const PathPieces& path, FileID FID) {
+void HTMLDiagnostics::RewriteFile(Rewriter &R, const PathPieces &path,
+ FileID FID) {
+
// Process the path.
// Maintain the counts of extra note pieces separately.
- unsigned TotalPieces = path.size();
- unsigned TotalNotePieces = std::count_if(
- path.begin(), path.end(), [](const PathDiagnosticPieceRef &p) {
+ unsigned TotalPieces = getPathSizeWithoutArrows(path);
+ unsigned TotalNotePieces =
+ llvm::count_if(path, [](const PathDiagnosticPieceRef &p) {
return isa<PathDiagnosticNotePiece>(*p);
});
- unsigned PopUpPieceCount = std::count_if(
- path.begin(), path.end(), [](const PathDiagnosticPieceRef &p) {
+ unsigned PopUpPieceCount =
+ llvm::count_if(path, [](const PathDiagnosticPieceRef &p) {
return isa<PathDiagnosticPopUpPiece>(*p);
});
unsigned TotalRegularPieces = TotalPieces - TotalNotePieces - PopUpPieceCount;
unsigned NumRegularPieces = TotalRegularPieces;
unsigned NumNotePieces = TotalNotePieces;
+ unsigned NumberOfArrows = 0;
// Stores the count of the regular piece indices.
std::map<int, int> IndexMap;
+ ArrowMap ArrowIndices(TotalRegularPieces + 1);
// Stores the different ranges where we have reported something.
std::vector<SourceRange> PopUpRanges;
- for (auto I = path.rbegin(), E = path.rend(); I != E; ++I) {
- const auto &Piece = *I->get();
+ for (const PathDiagnosticPieceRef &I : llvm::reverse(path)) {
+ const auto &Piece = *I.get();
if (isa<PathDiagnosticPopUpPiece>(Piece)) {
++IndexMap[NumRegularPieces];
@@ -744,18 +803,40 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R,
// as a separate pass through the piece list.
HandlePiece(R, FID, Piece, PopUpRanges, NumNotePieces, TotalNotePieces);
--NumNotePieces;
+
+ } else if (isArrowPiece(Piece)) {
+ NumberOfArrows = ProcessControlFlowPiece(
+ R, FID, cast<PathDiagnosticControlFlowPiece>(Piece), NumberOfArrows);
+ ArrowIndices[NumRegularPieces] = NumberOfArrows;
+
} else {
HandlePiece(R, FID, Piece, PopUpRanges, NumRegularPieces,
TotalRegularPieces);
--NumRegularPieces;
+ ArrowIndices[NumRegularPieces] = ArrowIndices[NumRegularPieces + 1];
}
}
+ ArrowIndices[0] = NumberOfArrows;
+
+ // At this point ArrowIndices represent the following data structure:
+ // [a_0, a_1, ..., a_N]
+ // where N is the number of events in the path.
+ //
+ // Then for every event with index i \in [0, N - 1], we can say that
+ // arrows with indices \in [a_(i+1), a_i) correspond to that event.
+ // We can say that because arrows with these indices appeared in the
+ // path in between the i-th and the (i+1)-th events.
+ assert(ArrowIndices.back() == 0 &&
+ "No arrows should be after the last event");
+ // This assertion also guarantees that all indices in are <= NumberOfArrows.
+ assert(llvm::is_sorted(ArrowIndices, std::greater<unsigned>()) &&
+ "Incorrect arrow indices map");
// Secondary indexing if we are having multiple pop-ups between two notes.
// (e.g. [(13) 'a' is 'true']; [(13.1) 'b' is 'false']; [(13.2) 'c' is...)
NumRegularPieces = TotalRegularPieces;
- for (auto I = path.rbegin(), E = path.rend(); I != E; ++I) {
- const auto &Piece = *I->get();
+ for (const PathDiagnosticPieceRef &I : llvm::reverse(path)) {
+ const auto &Piece = *I.get();
if (const auto *PopUpP = dyn_cast<PathDiagnosticPopUpPiece>(&Piece)) {
int PopUpPieceIndex = IndexMap[NumRegularPieces];
@@ -771,7 +852,7 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R,
if (PopUpPieceIndex > 0)
--IndexMap[NumRegularPieces];
- } else if (!isa<PathDiagnosticNotePiece>(Piece)) {
+ } else if (!isa<PathDiagnosticNotePiece>(Piece) && !isArrowPiece(Piece)) {
--NumRegularPieces;
}
}
@@ -783,6 +864,8 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R,
html::EscapeText(R, FID);
html::AddLineNumbers(R, FID);
+ addArrowSVGs(R, FID, ArrowIndices);
+
// If we have a preprocessor, relex the file and syntax highlight.
// We might not have a preprocessor if we come from a deserialized AST file,
// for example.
@@ -1007,8 +1090,7 @@ void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID,
ArrayRef<SourceRange> Ranges = P.getRanges();
for (const auto &Range : Ranges) {
// If we have already highlighted the range as a pop-up there is no work.
- if (std::find(PopUpRanges.begin(), PopUpRanges.end(), Range) !=
- PopUpRanges.end())
+ if (llvm::is_contained(PopUpRanges, Range))
continue;
HighlightRange(R, LPosInfo.first, Range);
@@ -1049,6 +1131,104 @@ unsigned HTMLDiagnostics::ProcessMacroPiece(raw_ostream &os,
return num;
}
+void HTMLDiagnostics::addArrowSVGs(Rewriter &R, FileID BugFileID,
+ const ArrowMap &ArrowIndices) {
+ std::string S;
+ llvm::raw_string_ostream OS(S);
+
+ OS << R"<<<(
+<style type="text/css">
+ svg {
+ position:absolute;
+ top:0;
+ left:0;
+ height:100%;
+ width:100%;
+ pointer-events: none;
+ overflow: visible
+ }
+ .arrow {
+ stroke-opacity: 0.2;
+ stroke-width: 1;
+ marker-end: url(#arrowhead);
+ }
+
+ .arrow.selected {
+ stroke-opacity: 0.6;
+ stroke-width: 2;
+ marker-end: url(#arrowheadSelected);
+ }
+
+ .arrowhead {
+ orient: auto;
+ stroke: none;
+ opacity: 0.6;
+ fill: blue;
+ }
+</style>
+<svg xmlns="http://www.w3.org/2000/svg">
+ <defs>
+ <marker id="arrowheadSelected" class="arrowhead" opacity="0.6"
+ viewBox="0 0 10 10" refX="3" refY="5"
+ markerWidth="4" markerHeight="4">
+ <path d="M 0 0 L 10 5 L 0 10 z" />
+ </marker>
+ <marker id="arrowhead" class="arrowhead" opacity="0.2"
+ viewBox="0 0 10 10" refX="3" refY="5"
+ markerWidth="4" markerHeight="4">
+ <path d="M 0 0 L 10 5 L 0 10 z" />
+ </marker>
+ </defs>
+ <g id="arrows" fill="none" stroke="blue" visibility="hidden">
+)<<<";
+
+ for (unsigned Index : llvm::seq(0u, ArrowIndices.getTotalNumberOfArrows())) {
+ OS << " <path class=\"arrow\" id=\"arrow" << Index << "\"/>\n";
+ }
+
+ OS << R"<<<(
+ </g>
+</svg>
+<script type='text/javascript'>
+const arrowIndices = )<<<";
+
+ OS << ArrowIndices << "\n</script>\n";
+
+ R.InsertTextBefore(R.getSourceMgr().getLocForStartOfFile(BugFileID),
+ OS.str());
+}
+
+std::string getSpanBeginForControl(const char *ClassName, unsigned Index) {
+ std::string Result;
+ llvm::raw_string_ostream OS(Result);
+ OS << "<span id=\"" << ClassName << Index << "\">";
+ return OS.str();
+}
+
+std::string getSpanBeginForControlStart(unsigned Index) {
+ return getSpanBeginForControl("start", Index);
+}
+
+std::string getSpanBeginForControlEnd(unsigned Index) {
+ return getSpanBeginForControl("end", Index);
+}
+
+unsigned HTMLDiagnostics::ProcessControlFlowPiece(
+ Rewriter &R, FileID BugFileID, const PathDiagnosticControlFlowPiece &P,
+ unsigned Number) {
+ for (const PathDiagnosticLocationPair &LPair : P) {
+ std::string Start = getSpanBeginForControlStart(Number),
+ End = getSpanBeginForControlEnd(Number++);
+
+ HighlightRange(R, BugFileID, LPair.getStart().asRange().getBegin(),
+ Start.c_str());
+ HighlightRange(R, BugFileID, LPair.getEnd().asRange().getBegin(),
+ End.c_str());
+ }
+
+ return Number;
+}
+
void HTMLDiagnostics::HighlightRange(Rewriter& R, FileID BugFileID,
SourceRange Range,
const char *HighlightStart,
@@ -1109,7 +1289,7 @@ document.addEventListener("DOMContentLoaded", function() {
});
var findNum = function() {
- var s = document.querySelector(".selected");
+ var s = document.querySelector(".msg.selected");
if (!s || s.id == "EndPath") {
return 0;
}
@@ -1117,14 +1297,32 @@ var findNum = function() {
return out;
};
+var classListAdd = function(el, theClass) {
+ if(!el.className.baseVal)
+ el.className += " " + theClass;
+ else
+ el.className.baseVal += " " + theClass;
+};
+
+var classListRemove = function(el, theClass) {
+ var className = (!el.className.baseVal) ?
+ el.className : el.className.baseVal;
+ className = className.replace(" " + theClass, "");
+ if(!el.className.baseVal)
+ el.className = className;
+ else
+ el.className.baseVal = className;
+};
+
var scrollTo = function(el) {
querySelectorAllArray(".selected").forEach(function(s) {
- s.classList.remove("selected");
+ classListRemove(s, "selected");
});
- el.classList.add("selected");
+ classListAdd(el, "selected");
window.scrollBy(0, el.getBoundingClientRect().top -
(window.innerHeight / 2));
-}
+ highlightArrowsForSelectedEvent();
+};
var move = function(num, up, numItems) {
if (num == 1 && up || num == numItems - 1 && !up) {
@@ -1159,9 +1357,11 @@ window.addEventListener("keydown", function (event) {
if (event.defaultPrevented) {
return;
}
- if (event.key == "j") {
+ // key 'j'
+ if (event.keyCode == 74) {
navigateTo(/*up=*/false);
- } else if (event.key == "k") {
+ // key 'k'
+ } else if (event.keyCode == 75) {
navigateTo(/*up=*/true);
} else {
return;
@@ -1171,3 +1371,258 @@ window.addEventListener("keydown", function (event) {
</script>
)<<<";
}
+
+StringRef HTMLDiagnostics::generateArrowDrawingJavascript() {
+ return R"<<<(
+<script type='text/javascript'>
+// Return range of numbers from a range [lower, upper).
+function range(lower, upper) {
+ var array = [];
+ for (var i = lower; i <= upper; ++i) {
+ array.push(i);
+ }
+ return array;
+}
+
+var getRelatedArrowIndices = function(pathId) {
+ // HTML numeration of events is a bit different than it is in the path.
+ // Everything is rotated one step to the right, so the last element
+ // (error diagnostic) has index 0.
+ if (pathId == 0) {
+ // arrowIndices has at least 2 elements
+ pathId = arrowIndices.length - 1;
+ }
+
+ return range(arrowIndices[pathId], arrowIndices[pathId - 1]);
+}
+
+var highlightArrowsForSelectedEvent = function() {
+ const selectedNum = findNum();
+ const arrowIndicesToHighlight = getRelatedArrowIndices(selectedNum);
+ arrowIndicesToHighlight.forEach((index) => {
+ var arrow = document.querySelector("#arrow" + index);
+ if(arrow) {
+ classListAdd(arrow, "selected")
+ }
+ });
+}
+
+var getAbsoluteBoundingRect = function(element) {
+ const relative = element.getBoundingClientRect();
+ return {
+ left: relative.left + window.pageXOffset,
+ right: relative.right + window.pageXOffset,
+ top: relative.top + window.pageYOffset,
+ bottom: relative.bottom + window.pageYOffset,
+ height: relative.height,
+ width: relative.width
+ };
+}
+
+var drawArrow = function(index) {
+ // This function is based on the great answer from SO:
+ // https://stackoverflow.com/a/39575674/11582326
+ var start = document.querySelector("#start" + index);
+ var end = document.querySelector("#end" + index);
+ var arrow = document.querySelector("#arrow" + index);
+
+ var startRect = getAbsoluteBoundingRect(start);
+ var endRect = getAbsoluteBoundingRect(end);
+
+ // It is an arrow from a token to itself, no need to visualize it.
+ if (startRect.top == endRect.top &&
+ startRect.left == endRect.left)
+ return;
+
+ // Each arrow is a very simple Bézier curve, with two nodes and
+ // two handles. So, we need to calculate four points in the window:
+ // * start node
+ var posStart = { x: 0, y: 0 };
+ // * end node
+ var posEnd = { x: 0, y: 0 };
+ // * handle for the start node
+ var startHandle = { x: 0, y: 0 };
+ // * handle for the end node
+ var endHandle = { x: 0, y: 0 };
+ // One can visualize it as follows:
+ //
+ // start handle
+ // /
+ // X"""_.-""""X
+ // .' \
+ // / start node
+ // |
+ // |
+ // | end node
+ // \ /
+ // `->X
+ // X-'
+ // \
+ // end handle
+ //
+ // NOTE: (0, 0) is the top left corner of the window.
+
+ // We have 3 similar, but still different scenarios to cover:
+ //
+ // 1. Two tokens on different lines.
+ // -xxx
+ // /
+ // \
+ // -> xxx
+ // In this situation, we draw arrow on the left curving to the left.
+ // 2. Two tokens on the same line, and the destination is on the right.
+ // ____
+ // / \
+ // / V
+ // xxx xxx
+ // In this situation, we draw arrow above curving upwards.
+ // 3. Two tokens on the same line, and the destination is on the left.
+ // xxx xxx
+ // ^ /
+ // \____/
+ // In this situation, we draw arrow below curving downwards.
+ const onDifferentLines = startRect.top <= endRect.top - 5 ||
+ startRect.top >= endRect.top + 5;
+ const leftToRight = startRect.left < endRect.left;
+
+ // NOTE: various magic constants are chosen empirically for
+ // better positioning and look
+ if (onDifferentLines) {
+ // Case #1
+ const topToBottom = startRect.top < endRect.top;
+ posStart.x = startRect.left - 1;
+ // We don't want to start it at the top left corner of the token,
+ // it doesn't feel like this is where the arrow comes from.
+ // For this reason, we start it in the middle of the left side
+ // of the token.
+ posStart.y = startRect.top + startRect.height / 2;
+
+ // End node has arrow head and we give it a bit more space.
+ posEnd.x = endRect.left - 4;
+ posEnd.y = endRect.top;
+
+ // Utility object with x and y offsets for handles.
+ var curvature = {
+ // We want bottom-to-top arrow to curve a bit more, so it doesn't
+ // overlap much with top-to-bottom curves (much more frequent).
+ x: topToBottom ? 15 : 25,
+ y: Math.min((posEnd.y - posStart.y) / 3, 10)
+ }
+
+ // When destination is on the different line, we can make a
+ // curvier arrow because we have space for it.
+ // So, instead of using
+ //
+ // startHandle.x = posStart.x - curvature.x
+ // endHandle.x = posEnd.x - curvature.x
+ //
+ // We use the leftmost of these two values for both handles.
+ startHandle.x = Math.min(posStart.x, posEnd.x) - curvature.x;
+ endHandle.x = startHandle.x;
+
+ // Curving downwards from the start node...
+ startHandle.y = posStart.y + curvature.y;
+ // ... and upwards from the end node.
+ endHandle.y = posEnd.y - curvature.y;
+
+ } else if (leftToRight) {
+ // Case #2
+ // Starting from the top right corner...
+ posStart.x = startRect.right - 1;
+ posStart.y = startRect.top;
+
+ // ...and ending at the top left corner of the end token.
+ posEnd.x = endRect.left + 1;
+ posEnd.y = endRect.top - 1;
+
+ // Utility object with x and y offsets for handles.
+ var curvature = {
+ x: Math.min((posEnd.x - posStart.x) / 3, 15),
+ y: 5
+ }
+
+ // Curving to the right...
+ startHandle.x = posStart.x + curvature.x;
+ // ... and upwards from the start node.
+ startHandle.y = posStart.y - curvature.y;
+
+ // And to the left...
+ endHandle.x = posEnd.x - curvature.x;
+ // ... and upwards from the end node.
+ endHandle.y = posEnd.y - curvature.y;
+
+ } else {
+ // Case #3
+ // Starting from the bottom right corner...
+ posStart.x = startRect.right;
+ posStart.y = startRect.bottom;
+
+ // ...and ending also at the bottom right corner, but of the end token.
+ posEnd.x = endRect.right - 1;
+ posEnd.y = endRect.bottom + 1;
+
+ // Utility object with x and y offsets for handles.
+ var curvature = {
+ x: Math.min((posStart.x - posEnd.x) / 3, 15),
+ y: 5
+ }
+
+ // Curving to the left...
+ startHandle.x = posStart.x - curvature.x;
+ // ... and downwards from the start node.
+ startHandle.y = posStart.y + curvature.y;
+
+ // And to the right...
+ endHandle.x = posEnd.x + curvature.x;
+ // ... and downwards from the end node.
+ endHandle.y = posEnd.y + curvature.y;
+ }
+
+ // Put it all together into a path.
+ // More information on the format:
+ // https://developer.mozilla.org/en-US/docs/Web/SVG/Tutorial/Paths
+ var pathStr = "M" + posStart.x + "," + posStart.y + " " +
+ "C" + startHandle.x + "," + startHandle.y + " " +
+ endHandle.x + "," + endHandle.y + " " +
+ posEnd.x + "," + posEnd.y;
+
+ arrow.setAttribute("d", pathStr);
+};
+
+var drawArrows = function() {
+ const numOfArrows = document.querySelectorAll("path[id^=arrow]").length;
+ for (var i = 0; i < numOfArrows; ++i) {
+ drawArrow(i);
+ }
+}
+
+var toggleArrows = function(event) {
+ const arrows = document.querySelector("#arrows");
+ if (event.target.checked) {
+ arrows.setAttribute("visibility", "visible");
+ } else {
+ arrows.setAttribute("visibility", "hidden");
+ }
+}
+
+window.addEventListener("resize", drawArrows);
+document.addEventListener("DOMContentLoaded", function() {
+ // Whenever we show invocation, locations change, i.e. we
+ // need to redraw arrows.
+ document
+ .querySelector('input[id="showinvocation"]')
+ .addEventListener("click", drawArrows);
+ // Hiding irrelevant lines also should cause arrow rerender.
+ document
+ .querySelector('input[name="showCounterexample"]')
+ .addEventListener("change", drawArrows);
+ document
+ .querySelector('input[name="showArrows"]')
+ .addEventListener("change", toggleArrows);
+ drawArrows();
+ // Default highlighting for the last event.
+ highlightArrowsForSelectedEvent();
+});
+</script>
+ )<<<";
+}
diff --git a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
index e5f4e9ea30c9..8bf6fc085c6a 100644
--- a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
+++ b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
@@ -69,7 +69,7 @@ namespace clang {
namespace ento {
static bool isLoopStmt(const Stmt *S) {
- return S && (isa<ForStmt>(S) || isa<WhileStmt>(S) || isa<DoStmt>(S));
+ return isa_and_nonnull<ForStmt, WhileStmt, DoStmt>(S);
}
ProgramStateRef processLoopEnd(const Stmt *LoopStmt, ProgramStateRef State) {
diff --git a/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp b/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp
index 47e34dd84b9a..748c65f578a8 100644
--- a/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp
+++ b/clang/lib/StaticAnalyzer/Core/LoopWidening.cpp
@@ -45,8 +45,7 @@ ProgramStateRef getWidenedLoopState(ProgramStateRef PrevState,
const LocationContext *LCtx,
unsigned BlockCount, const Stmt *LoopStmt) {
- assert(isa<ForStmt>(LoopStmt) || isa<WhileStmt>(LoopStmt) ||
- isa<DoStmt>(LoopStmt));
+ assert((isa<ForStmt, WhileStmt, DoStmt>(LoopStmt)));
// Invalidate values in the current state.
// TODO Make this more conservative by only invalidating values that might
diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index bd725ee9eaa3..f77fcb030a15 100644
--- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -28,6 +28,7 @@
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
@@ -768,14 +769,39 @@ DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR,
return UnknownVal();
QualType Ty = cast<TypedValueRegion>(SR)->getDesugaredValueType(Ctx);
- DefinedOrUnknownSVal Size = getElementExtent(Ty, SVB);
+ const DefinedOrUnknownSVal Size = getElementExtent(Ty, SVB);
+
+ // We currently don't model flexible array members (FAMs), which are:
+ // - int array[]; of IncompleteArrayType
+ // - int array[0]; of ConstantArrayType with size 0
+ // - int array[1]; of ConstantArrayType with size 1 (*)
+ // (*): Consider single element array object members as FAM candidates only
+ // if the consider-single-element-arrays-as-flexible-array-members
+ // analyzer option is true.
+ // https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
+ const auto isFlexibleArrayMemberCandidate = [this,
+ &SVB](QualType Ty) -> bool {
+ const ArrayType *AT = Ctx.getAsArrayType(Ty);
+ if (!AT)
+ return false;
+ if (isa<IncompleteArrayType>(AT))
+ return true;
+
+ if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
+ const llvm::APInt &Size = CAT->getSize();
+ if (Size.isZero())
+ return true;
+
+ const AnalyzerOptions &Opts = SVB.getAnalyzerOptions();
+ if (Opts.ShouldConsiderSingleElementArraysAsFlexibleArrayMembers &&
+ Size.isOne())
+ return true;
+ }
+ return false;
+ };
- // A zero-length array at the end of a struct often stands for dynamically
- // allocated extra memory.
- if (Size.isZeroConstant()) {
- if (isa<ConstantArrayType>(Ty))
- return UnknownVal();
- }
+ if (isFlexibleArrayMemberCandidate(Ty))
+ return UnknownVal();
return Size;
}
@@ -948,9 +974,9 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D,
// First handle the globals defined in system headers.
if (Ctx.getSourceManager().isInSystemHeader(D->getLocation())) {
- // Whitelist the system globals which often DO GET modified, assume the
+ // Allow the system globals which often DO GET modified, assume the
// rest are immutable.
- if (D->getName().find("errno") != StringRef::npos)
+ if (D->getName().contains("errno"))
sReg = getGlobalsRegion(MemRegion::GlobalSystemSpaceRegionKind);
else
sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind);
@@ -986,14 +1012,15 @@ const VarRegion *MemRegionManager::getVarRegion(const VarDecl *D,
sReg = getUnknownRegion();
} else {
if (D->hasLocalStorage()) {
- sReg = isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D)
- ? static_cast<const MemRegion*>(getStackArgumentsRegion(STC))
- : static_cast<const MemRegion*>(getStackLocalsRegion(STC));
+ sReg =
+ isa<ParmVarDecl, ImplicitParamDecl>(D)
+ ? static_cast<const MemRegion *>(getStackArgumentsRegion(STC))
+ : static_cast<const MemRegion *>(getStackLocalsRegion(STC));
}
else {
assert(D->isStaticLocal());
const Decl *STCD = STC->getDecl();
- if (isa<FunctionDecl>(STCD) || isa<ObjCMethodDecl>(STCD))
+ if (isa<FunctionDecl, ObjCMethodDecl>(STCD))
sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind,
getFunctionCodeRegion(cast<NamedDecl>(STCD)));
else if (const auto *BD = dyn_cast<BlockDecl>(STCD)) {
@@ -1257,9 +1284,7 @@ bool MemRegion::hasStackParametersStorage() const {
}
bool MemRegion::hasGlobalsOrParametersStorage() const {
- const MemSpaceRegion *MS = getMemorySpace();
- return isa<StackArgumentsSpaceRegion>(MS) ||
- isa<GlobalsSpaceRegion>(MS);
+ return isa<StackArgumentsSpaceRegion, GlobalsSpaceRegion>(getMemorySpace());
}
// getBaseRegion strips away all elements and fields, and get the base region
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 69554576bdb2..74403a160b8e 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -494,15 +494,17 @@ RangeSet RangeSet::Factory::deletePoint(RangeSet From,
return intersect(From, Upper, Lower);
}
-void Range::dump(raw_ostream &OS) const {
+LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
}
+LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
-void RangeSet::dump(raw_ostream &OS) const {
+LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
OS << "{ ";
llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
OS << " }";
}
+LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
@@ -599,6 +601,10 @@ public:
LLVM_NODISCARD static inline Optional<bool>
areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
+ /// Remove one member from the class.
+ LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
+ const SymbolRef Old);
+
/// Iterate over all symbols and try to simplify them.
LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB,
RangeSet::Factory &F,
@@ -655,6 +661,7 @@ private:
inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
SymbolSet Members, EquivalenceClass Other,
SymbolSet OtherMembers);
+
static inline bool
addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
RangeSet::Factory &F, ProgramStateRef State,
@@ -1112,7 +1119,7 @@ private:
if (!SSE)
return llvm::None;
- BinaryOperatorKind CurrentOP = SSE->getOpcode();
+ const BinaryOperatorKind CurrentOP = SSE->getOpcode();
// We currently do not support <=> (C++20).
if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
@@ -1126,7 +1133,12 @@ private:
SymbolManager &SymMgr = State->getSymbolManager();
- int UnknownStates = 0;
+ // We use this variable to store the last queried operator (`QueriedOP`)
+ // for which the `getCmpOpState` returned with `Unknown`. If there are two
+ // different OPs that returned `Unknown` then we have to query the special
+ // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
+ // never returns `Unknown`, so `CurrentOP` is a good initial value.
+ BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
// Loop goes through all of the columns exept the last one ('UnknownX2').
// We treat `UnknownX2` column separately at the end of the loop body.
@@ -1163,15 +1175,18 @@ private:
CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
if (BranchState == OperatorRelationsTable::Unknown) {
- if (++UnknownStates == 2)
- // If we met both Unknown states.
+ if (LastQueriedOpToUnknown != CurrentOP &&
+ LastQueriedOpToUnknown != QueriedOP) {
+ // If we got the Unknown state for both different operators.
// if (x <= y) // assume true
// if (x != y) // assume true
// if (x < y) // would be also true
// Get a state from `UnknownX2` column.
BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
- else
+ } else {
+ LastQueriedOpToUnknown = QueriedOP;
continue;
+ }
}
return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
@@ -1382,6 +1397,113 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
}
//===----------------------------------------------------------------------===//
+// Constraint manager implementation details
+//===----------------------------------------------------------------------===//
+
+class RangeConstraintManager : public RangedConstraintManager {
+public:
+ RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
+ : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
+
+ //===------------------------------------------------------------------===//
+ // Implementation for interface from ConstraintManager.
+ //===------------------------------------------------------------------===//
+
+ bool haveEqualConstraints(ProgramStateRef S1,
+ ProgramStateRef S2) const override {
+ // NOTE: ClassMembers are as simple as back pointers for ClassMap,
+ // so comparing constraint ranges and class maps should be
+ // sufficient.
+ return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
+ S1->get<ClassMap>() == S2->get<ClassMap>();
+ }
+
+ bool canReasonAbout(SVal X) const override;
+
+ ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
+
+ const llvm::APSInt *getSymVal(ProgramStateRef State,
+ SymbolRef Sym) const override;
+
+ ProgramStateRef removeDeadBindings(ProgramStateRef State,
+ SymbolReaper &SymReaper) override;
+
+ void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
+ unsigned int Space = 0, bool IsDot = false) const override;
+ void printConstraints(raw_ostream &Out, ProgramStateRef State,
+ const char *NL = "\n", unsigned int Space = 0,
+ bool IsDot = false) const;
+ void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
+ const char *NL = "\n", unsigned int Space = 0,
+ bool IsDot = false) const;
+ void printDisequalities(raw_ostream &Out, ProgramStateRef State,
+ const char *NL = "\n", unsigned int Space = 0,
+ bool IsDot = false) const;
+
+ //===------------------------------------------------------------------===//
+ // Implementation for interface from RangedConstraintManager.
+ //===------------------------------------------------------------------===//
+
+ ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
+ const llvm::APSInt &V,
+ const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymWithinInclusiveRange(
+ ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+ const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
+
+ ProgramStateRef assumeSymOutsideInclusiveRange(
+ ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+ const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
+
+private:
+ RangeSet::Factory F;
+
+ RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
+ RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
+ ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
+ RangeSet Range);
+ ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
+ RangeSet Range);
+
+ RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+ RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
+ const llvm::APSInt &Int,
+ const llvm::APSInt &Adjustment);
+};
+
+//===----------------------------------------------------------------------===//
// Constraint assignment logic
//===----------------------------------------------------------------------===//
@@ -1492,7 +1614,28 @@ public:
return Assignor.assign(CoS, NewConstraint);
}
+ /// Handle expressions like: a % b != 0.
+ template <typename SymT>
+ bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
+ if (Sym->getOpcode() != BO_Rem)
+ return true;
+ // a % b != 0 implies that a != 0.
+ if (!Constraint.containsZero()) {
+ SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
+ if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
+ State = State->assume(*NonLocSymSVal, true);
+ if (!State)
+ return false;
+ }
+ }
+ return true;
+ }
+
inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
+ inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
+ RangeSet Constraint) {
+ return handleRemainderOp(Sym, Constraint);
+ }
inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
RangeSet Constraint);
@@ -1568,11 +1711,9 @@ private:
assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
if (Constraint.getConcreteValue())
- return !Constraint.getConcreteValue()->isNullValue();
+ return !Constraint.getConcreteValue()->isZero();
- APSIntType T{Constraint.getMinValue()};
- Const Zero = T.getZeroValue();
- if (!Constraint.contains(Zero))
+ if (!Constraint.containsZero())
return true;
return llvm::None;
@@ -1583,112 +1724,6 @@ private:
RangeSet::Factory &RangeFactory;
};
-//===----------------------------------------------------------------------===//
-// Constraint manager implementation details
-//===----------------------------------------------------------------------===//
-
-class RangeConstraintManager : public RangedConstraintManager {
-public:
- RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
- : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
-
- //===------------------------------------------------------------------===//
- // Implementation for interface from ConstraintManager.
- //===------------------------------------------------------------------===//
-
- bool haveEqualConstraints(ProgramStateRef S1,
- ProgramStateRef S2) const override {
- // NOTE: ClassMembers are as simple as back pointers for ClassMap,
- // so comparing constraint ranges and class maps should be
- // sufficient.
- return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
- S1->get<ClassMap>() == S2->get<ClassMap>();
- }
-
- bool canReasonAbout(SVal X) const override;
-
- ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
-
- const llvm::APSInt *getSymVal(ProgramStateRef State,
- SymbolRef Sym) const override;
-
- ProgramStateRef removeDeadBindings(ProgramStateRef State,
- SymbolReaper &SymReaper) override;
-
- void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
- unsigned int Space = 0, bool IsDot = false) const override;
- void printConstraints(raw_ostream &Out, ProgramStateRef State,
- const char *NL = "\n", unsigned int Space = 0,
- bool IsDot = false) const;
- void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
- const char *NL = "\n", unsigned int Space = 0,
- bool IsDot = false) const;
- void printDisequalities(raw_ostream &Out, ProgramStateRef State,
- const char *NL = "\n", unsigned int Space = 0,
- bool IsDot = false) const;
-
- //===------------------------------------------------------------------===//
- // Implementation for interface from RangedConstraintManager.
- //===------------------------------------------------------------------===//
-
- ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
- const llvm::APSInt &V,
- const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymWithinInclusiveRange(
- ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
- const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
-
- ProgramStateRef assumeSymOutsideInclusiveRange(
- ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
- const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
-
-private:
- RangeSet::Factory F;
-
- RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
- RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
- ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
- RangeSet Range);
- ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
- RangeSet Range);
-
- RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
- RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
- const llvm::APSInt &Int,
- const llvm::APSInt &Adjustment);
-};
bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
const llvm::APSInt &Constraint) {
@@ -1716,11 +1751,26 @@ bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
return false;
}
+ // We may have trivial equivalence classes in the disequality info as
+ // well, and we need to simplify them.
+ DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
+ for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
+ DisequalityInfo) {
+ EquivalenceClass Class = DisequalityEntry.first;
+ ClassSet DisequalClasses = DisequalityEntry.second;
+ State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
+ if (!State)
+ return false;
+ }
+
return true;
}
bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
RangeSet Constraint) {
+ if (!handleRemainderOp(Sym, Constraint))
+ return false;
+
Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
if (!ConstraintAsBool)
@@ -2086,6 +2136,61 @@ inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
return llvm::None;
}
+LLVM_NODISCARD ProgramStateRef
+EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
+
+ SymbolSet ClsMembers = getClassMembers(State);
+ assert(ClsMembers.contains(Old));
+
+ // We don't remove `Old`'s Sym->Class relation for two reasons:
+ // 1) This way constraints for the old symbol can still be found via it's
+ // equivalence class that it used to be the member of.
+ // 2) Performance and resource reasons. We can spare one removal and thus one
+ // additional tree in the forest of `ClassMap`.
+
+ // Remove `Old`'s Class->Sym relation.
+ SymbolSet::Factory &F = getMembersFactory(State);
+ ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
+ ClsMembers = F.remove(ClsMembers, Old);
+ // Ensure another precondition of the removeMember function (we can check
+ // this only with isEmpty, thus we have to do the remove first).
+ assert(!ClsMembers.isEmpty() &&
+ "Class should have had at least two members before member removal");
+ // Overwrite the existing members assigned to this class.
+ ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
+ ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
+ State = State->set<ClassMembers>(ClassMembersMap);
+
+ return State;
+}
+
+// Re-evaluate an SVal with top-level `State->assume` logic.
+LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
+ const RangeSet *Constraint,
+ SVal TheValue) {
+ if (!Constraint)
+ return State;
+
+ const auto DefinedVal = TheValue.castAs<DefinedSVal>();
+
+ // If the SVal is 0, we can simply interpret that as `false`.
+ if (Constraint->encodesFalseRange())
+ return State->assume(DefinedVal, false);
+
+ // If the constraint does not encode 0 then we can interpret that as `true`
+ // AND as a Range(Set).
+ if (Constraint->encodesTrueRange()) {
+ State = State->assume(DefinedVal, true);
+ if (!State)
+ return nullptr;
+ // Fall through, re-assume based on the range values as well.
+ }
+ // Overestimate the individual Ranges with the RangeSet' lowest and
+ // highest values.
+ return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
+ Constraint->getMaxValue(), true);
+}
+
// Iterate over all symbols and try to simplify them. Once a symbol is
// simplified then we check if we can merge the simplified symbol's equivalence
// class to this class. This way, we simplify not just the symbols but the
@@ -2096,14 +2201,62 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
ProgramStateRef State, EquivalenceClass Class) {
SymbolSet ClassMembers = Class.getClassMembers(State);
for (const SymbolRef &MemberSym : ClassMembers) {
- SymbolRef SimplifiedMemberSym = ento::simplify(State, MemberSym);
+
+ const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
+ const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
+
+ // The symbol is collapsed to a constant, check if the current State is
+ // still feasible.
+ if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
+ const llvm::APSInt &SV = CI->getValue();
+ const RangeSet *ClassConstraint = getConstraint(State, Class);
+ // We have found a contradiction.
+ if (ClassConstraint && !ClassConstraint->contains(SV))
+ return nullptr;
+ }
+
if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
// The simplified symbol should be the member of the original Class,
// however, it might be in another existing class at the moment. We
// have to merge these classes.
+ ProgramStateRef OldState = State;
State = merge(F, State, MemberSym, SimplifiedMemberSym);
if (!State)
return nullptr;
+ // No state change, no merge happened actually.
+ if (OldState == State)
+ continue;
+
+ assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
+ // Remove the old and more complex symbol.
+ State = find(State, MemberSym).removeMember(State, MemberSym);
+
+ // Query the class constraint again b/c that may have changed during the
+ // merge above.
+ const RangeSet *ClassConstraint = getConstraint(State, Class);
+
+ // Re-evaluate an SVal with top-level `State->assume`, this ignites
+ // a RECURSIVE algorithm that will reach a FIXPOINT.
+ //
+ // About performance and complexity: Let us assume that in a State we
+ // have N non-trivial equivalence classes and that all constraints and
+ // disequality info is related to non-trivial classes. In the worst case,
+ // we can simplify only one symbol of one class in each iteration. The
+ // number of symbols in one class cannot grow b/c we replace the old
+ // symbol with the simplified one. Also, the number of the equivalence
+ // classes can decrease only, b/c the algorithm does a merge operation
+ // optionally. We need N iterations in this case to reach the fixpoint.
+ // Thus, the steps needed to be done in the worst case is proportional to
+ // N*N.
+ //
+ // This worst case scenario can be extended to that case when we have
+ // trivial classes in the constraints and in the disequality map. This
+ // case can be reduced to the case with a State where there are only
+ // non-trivial classes. This is because a merge operation on two trivial
+ // classes results in one non-trivial class.
+ State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
+ if (!State)
+ return nullptr;
}
}
return State;
diff --git a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp
index d227c025fb20..892d64ea4e4e 100644
--- a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp
@@ -41,7 +41,12 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State,
return assumeSymRel(State, SIE->getLHS(), op, SIE->getRHS());
}
- } else if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) {
+ // Handle adjustment with non-comparison ops.
+ const llvm::APSInt &Zero = getBasicVals().getValue(0, SIE->getType());
+ return assumeSymRel(State, SIE, (Assumption ? BO_NE : BO_EQ), Zero);
+ }
+
+ if (const auto *SSE = dyn_cast<SymSymExpr>(Sym)) {
BinaryOperator::Opcode Op = SSE->getOpcode();
assert(BinaryOperator::isComparisonOp(Op));
@@ -226,9 +231,13 @@ void RangedConstraintManager::computeAdjustment(SymbolRef &Sym,
}
}
-SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) {
+SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym) {
SValBuilder &SVB = State->getStateManager().getSValBuilder();
- SVal SimplifiedVal = SVB.simplifySVal(State, SVB.makeSymbolVal(Sym));
+ return SVB.simplifySVal(State, SVB.makeSymbolVal(Sym));
+}
+
+SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) {
+ SVal SimplifiedVal = simplifyToSVal(State, Sym);
if (SymbolRef SimplifiedSym = SimplifiedVal.getAsSymbol())
return SimplifiedSym;
return Sym;
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 4ffa1aacb41f..135130b35ba7 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -62,8 +62,8 @@ private:
: P(r, k), Data(offset) {
assert(r && "Must have known regions.");
assert(getOffset() == offset && "Failed to store offset");
- assert((r == r->getBaseRegion() || isa<ObjCIvarRegion>(r) ||
- isa <CXXDerivedObjectRegion>(r)) &&
+ assert((r == r->getBaseRegion() ||
+ isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) &&
"Not a base");
}
public:
@@ -437,6 +437,15 @@ public:
RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B,
const SubRegion *R);
+ Optional<SVal>
+ getConstantValFromConstArrayInitializer(RegionBindingsConstRef B,
+ const ElementRegion *R);
+ Optional<SVal>
+ getSValFromInitListExpr(const InitListExpr *ILE,
+ const SmallVector<uint64_t, 2> &ConcreteOffsets,
+ QualType ElemT);
+ SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset,
+ QualType ElemT);
public: // Part of public interface to class.
@@ -1135,7 +1144,7 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
if (Regions)
Regions->push_back(baseR);
- if (isa<AllocaRegion>(baseR) || isa<SymbolicRegion>(baseR)) {
+ if (isa<AllocaRegion, SymbolicRegion>(baseR)) {
// Invalidate the region by setting its default value to
// conjured symbol. The type of the symbol is irrelevant.
DefinedOrUnknownSVal V =
@@ -1224,7 +1233,7 @@ void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
// detection.
SVal V = I.getData();
const MemRegion *R = V.getAsRegion();
- if (R && isa<SymbolicRegion>(R))
+ if (isa_and_nonnull<SymbolicRegion>(R))
VisitBinding(V);
}
}
@@ -1625,6 +1634,280 @@ RegionStoreManager::findLazyBinding(RegionBindingsConstRef B,
return Result;
}
+/// This is a helper function for `getConstantValFromConstArrayInitializer`.
+///
+/// Return an array of extents of the declared array type.
+///
+/// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }.
+static SmallVector<uint64_t, 2>
+getConstantArrayExtents(const ConstantArrayType *CAT) {
+ assert(CAT && "ConstantArrayType should not be null");
+ CAT = cast<ConstantArrayType>(CAT->getCanonicalTypeInternal());
+ SmallVector<uint64_t, 2> Extents;
+ do {
+ Extents.push_back(CAT->getSize().getZExtValue());
+ } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType())));
+ return Extents;
+}
+
+/// This is a helper function for `getConstantValFromConstArrayInitializer`.
+///
+/// Return an array of offsets from nested ElementRegions and a root base
+/// region. The array is never empty and a base region is never null.
+///
+/// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }.
+/// This represents an access through indirection: `arr[1][2][3];`
+///
+/// \param ER The given (possibly nested) ElementRegion.
+///
+/// \note The result array is in the reverse order of indirection expression:
+/// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n
+/// is a number of indirections. It may not affect performance in real-life
+/// code, though.
+static std::pair<SmallVector<SVal, 2>, const MemRegion *>
+getElementRegionOffsetsWithBase(const ElementRegion *ER) {
+ assert(ER && "ConstantArrayType should not be null");
+ const MemRegion *Base;
+ SmallVector<SVal, 2> SValOffsets;
+ do {
+ SValOffsets.push_back(ER->getIndex());
+ Base = ER->getSuperRegion();
+ ER = dyn_cast<ElementRegion>(Base);
+ } while (ER);
+ return {SValOffsets, Base};
+}
+
+/// This is a helper function for `getConstantValFromConstArrayInitializer`.
+///
+/// Convert array of offsets from `SVal` to `uint64_t` in consideration of
+/// respective array extents.
+/// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed
+/// order (expectedly received from `getElementRegionOffsetsWithBase`).
+/// \param ArrayExtents [in] The array of extents.
+/// \param DstOffsets [out] The array of offsets of type `uint64_t`.
+/// \returns:
+/// - `None` for successful convertion.
+/// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal
+/// will be returned as a suitable value of the access operation.
+/// which should be returned as a correct
+///
+/// \example:
+/// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 }
+/// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) }
+/// // DstOffsets { 4, 5, 6 }
+/// // returns None
+/// int x2 = arr[42][5][-6]; // returns UndefinedVal
+/// int x3 = arr[4][5][x2]; // returns UnknownVal
+static Optional<SVal>
+convertOffsetsFromSvalToUnsigneds(const SmallVector<SVal, 2> &SrcOffsets,
+ const SmallVector<uint64_t, 2> ArrayExtents,
+ SmallVector<uint64_t, 2> &DstOffsets) {
+ // Check offsets for being out of bounds.
+ // C++20 [expr.add] 7.6.6.4 (excerpt):
+ // If P points to an array element i of an array object x with n
+ // elements, where i < 0 or i > n, the behavior is undefined.
+ // Dereferencing is not allowed on the "one past the last
+ // element", when i == n.
+ // Example:
+ // const int arr[3][2] = {{1, 2}, {3, 4}};
+ // arr[0][0]; // 1
+ // arr[0][1]; // 2
+ // arr[0][2]; // UB
+ // arr[1][0]; // 3
+ // arr[1][1]; // 4
+ // arr[1][-1]; // UB
+ // arr[2][0]; // 0
+ // arr[2][1]; // 0
+ // arr[-2][0]; // UB
+ DstOffsets.resize(SrcOffsets.size());
+ auto ExtentIt = ArrayExtents.begin();
+ auto OffsetIt = DstOffsets.begin();
+ // Reverse `SValOffsets` to make it consistent with `ArrayExtents`.
+ for (SVal V : llvm::reverse(SrcOffsets)) {
+ if (auto CI = V.getAs<nonloc::ConcreteInt>()) {
+ // When offset is out of array's bounds, result is UB.
+ const llvm::APSInt &Offset = CI->getValue();
+ if (Offset.isNegative() || Offset.uge(*(ExtentIt++)))
+ return UndefinedVal();
+ // Store index in a reversive order.
+ *(OffsetIt++) = Offset.getZExtValue();
+ continue;
+ }
+ // Symbolic index presented. Return Unknown value.
+ // FIXME: We also need to take ElementRegions with symbolic indexes into
+ // account.
+ return UnknownVal();
+ }
+ return None;
+}
+
+Optional<SVal> RegionStoreManager::getConstantValFromConstArrayInitializer(
+ RegionBindingsConstRef B, const ElementRegion *R) {
+ assert(R && "ElementRegion should not be null");
+
+ // Treat an n-dimensional array.
+ SmallVector<SVal, 2> SValOffsets;
+ const MemRegion *Base;
+ std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R);
+ const VarRegion *VR = dyn_cast<VarRegion>(Base);
+ if (!VR)
+ return None;
+
+ assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the "
+ "offsets vector is not empty.");
+
+ // Check if the containing array has an initialized value that we can trust.
+ // We can trust a const value or a value of a global initializer in main().
+ const VarDecl *VD = VR->getDecl();
+ if (!VD->getType().isConstQualified() &&
+ !R->getElementType().isConstQualified() &&
+ (!B.isMainAnalysis() || !VD->hasGlobalStorage()))
+ return None;
+
+ // Array's declaration should have `ConstantArrayType` type, because only this
+ // type contains an array extent. It may happen that array type can be of
+ // `IncompleteArrayType` type. To get the declaration of `ConstantArrayType`
+ // type, we should find the declaration in the redeclarations chain that has
+ // the initialization expression.
+ // NOTE: `getAnyInitializer` has an out-parameter, which returns a new `VD`
+ // from which an initializer is obtained. We replace current `VD` with the new
+ // `VD`. If the return value of the function is null than `VD` won't be
+ // replaced.
+ const Expr *Init = VD->getAnyInitializer(VD);
+ // NOTE: If `Init` is non-null, then a new `VD` is non-null for sure. So check
+ // `Init` for null only and don't worry about the replaced `VD`.
+ if (!Init)
+ return None;
+
+ // Array's declaration should have ConstantArrayType type, because only this
+ // type contains an array extent.
+ const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(VD->getType());
+ if (!CAT)
+ return None;
+
+ // Get array extents.
+ SmallVector<uint64_t, 2> Extents = getConstantArrayExtents(CAT);
+
+ // The number of offsets should equal to the numbers of extents,
+ // otherwise wrong type punning occured. For instance:
+ // int arr[1][2][3];
+ // auto ptr = (int(*)[42])arr;
+ // auto x = ptr[4][2]; // UB
+ // FIXME: Should return UndefinedVal.
+ if (SValOffsets.size() != Extents.size())
+ return None;
+
+ SmallVector<uint64_t, 2> ConcreteOffsets;
+ if (Optional<SVal> V = convertOffsetsFromSvalToUnsigneds(SValOffsets, Extents,
+ ConcreteOffsets))
+ return *V;
+
+ // Handle InitListExpr.
+ // Example:
+ // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 };
+ if (const auto *ILE = dyn_cast<InitListExpr>(Init))
+ return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType());
+
+ // Handle StringLiteral.
+ // Example:
+ // const char arr[] = "abc";
+ if (const auto *SL = dyn_cast<StringLiteral>(Init))
+ return getSValFromStringLiteral(SL, ConcreteOffsets.front(),
+ R->getElementType());
+
+ // FIXME: Handle CompoundLiteralExpr.
+
+ return None;
+}
+
+/// Returns an SVal, if possible, for the specified position of an
+/// initialization list.
+///
+/// \param ILE The given initialization list.
+/// \param Offsets The array of unsigned offsets. E.g. for the expression
+/// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }.
+/// \param ElemT The type of the result SVal expression.
+/// \return Optional SVal for the particular position in the initialization
+/// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets:
+/// - {1, 1} returns SVal{4}, because it's the second position in the second
+/// sublist;
+/// - {3, 0} returns SVal{0}, because there's no explicit value at this
+/// position in the sublist.
+///
+/// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets
+/// for the given initialization list. Otherwise SVal can be an equivalent to 0
+/// or lead to assertion.
+Optional<SVal> RegionStoreManager::getSValFromInitListExpr(
+ const InitListExpr *ILE, const SmallVector<uint64_t, 2> &Offsets,
+ QualType ElemT) {
+ assert(ILE && "InitListExpr should not be null");
+
+ for (uint64_t Offset : Offsets) {
+ // C++20 [dcl.init.string] 9.4.2.1:
+ // An array of ordinary character type [...] can be initialized by [...]
+ // an appropriately-typed string-literal enclosed in braces.
+ // Example:
+ // const char arr[] = { "abc" };
+ if (ILE->isStringLiteralInit())
+ if (const auto *SL = dyn_cast<StringLiteral>(ILE->getInit(0)))
+ return getSValFromStringLiteral(SL, Offset, ElemT);
+
+ // C++20 [expr.add] 9.4.17.5 (excerpt):
+ // i-th array element is value-initialized for each k < i ≤ n,
+ // where k is an expression-list size and n is an array extent.
+ if (Offset >= ILE->getNumInits())
+ return svalBuilder.makeZeroVal(ElemT);
+
+ const Expr *E = ILE->getInit(Offset);
+ const auto *IL = dyn_cast<InitListExpr>(E);
+ if (!IL)
+ // Return a constant value, if it is presented.
+ // FIXME: Support other SVals.
+ return svalBuilder.getConstantVal(E);
+
+ // Go to the nested initializer list.
+ ILE = IL;
+ }
+ llvm_unreachable(
+ "Unhandled InitListExpr sub-expressions or invalid offsets.");
+}
+
+/// Returns an SVal, if possible, for the specified position in a string
+/// literal.
+///
+/// \param SL The given string literal.
+/// \param Offset The unsigned offset. E.g. for the expression
+/// `char x = str[42];` an offset should be 42.
+/// E.g. for the string "abc" offset:
+/// - 1 returns SVal{b}, because it's the second position in the string.
+/// - 42 returns SVal{0}, because there's no explicit value at this
+/// position in the string.
+/// \param ElemT The type of the result SVal expression.
+///
+/// NOTE: We return `0` for every offset >= the literal length for array
+/// declarations, like:
+/// const char str[42] = "123"; // Literal length is 4.
+/// char c = str[41]; // Offset is 41.
+/// FIXME: Nevertheless, we can't do the same for pointer declaraions, like:
+/// const char * const str = "123"; // Literal length is 4.
+/// char c = str[41]; // Offset is 41. Returns `0`, but Undef
+/// // expected.
+/// It should be properly handled before reaching this point.
+/// The main problem is that we can't distinguish between these declarations,
+/// because in case of array we can get the Decl from VarRegion, but in case
+/// of pointer the region is a StringRegion, which doesn't contain a Decl.
+/// Possible solution could be passing an array extent along with the offset.
+SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL,
+ uint64_t Offset,
+ QualType ElemT) {
+ assert(SL && "StringLiteral should not be null");
+ // C++20 [dcl.init.string] 9.4.2.3:
+ // If there are fewer initializers than there are array elements, each
+ // element not explicitly initialized shall be zero-initialized [dcl.init].
+ uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset);
+ return svalBuilder.makeIntVal(Code, ElemT);
+}
+
SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
const ElementRegion* R) {
// Check if the region has a binding.
@@ -1636,59 +1919,21 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
// Check if the region is an element region of a string literal.
if (const StringRegion *StrR = dyn_cast<StringRegion>(superR)) {
// FIXME: Handle loads from strings where the literal is treated as
- // an integer, e.g., *((unsigned int*)"hello")
+ // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according
+ // to C++20 7.2.1.11 [basic.lval].
QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType();
if (!Ctx.hasSameUnqualifiedType(T, R->getElementType()))
return UnknownVal();
-
- const StringLiteral *Str = StrR->getStringLiteral();
- SVal Idx = R->getIndex();
- if (Optional<nonloc::ConcreteInt> CI = Idx.getAs<nonloc::ConcreteInt>()) {
- int64_t i = CI->getValue().getSExtValue();
- // Abort on string underrun. This can be possible by arbitrary
- // clients of getBindingForElement().
- if (i < 0)
+ if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) {
+ const llvm::APSInt &Idx = CI->getValue();
+ if (Idx < 0)
return UndefinedVal();
- int64_t length = Str->getLength();
- // Technically, only i == length is guaranteed to be null.
- // However, such overflows should be caught before reaching this point;
- // the only time such an access would be made is if a string literal was
- // used to initialize a larger array.
- char c = (i >= length) ? '\0' : Str->getCodeUnit(i);
- return svalBuilder.makeIntVal(c, T);
- }
- } else if (const VarRegion *VR = dyn_cast<VarRegion>(superR)) {
- // Check if the containing array has an initialized value that we can trust.
- // We can trust a const value or a value of a global initializer in main().
- const VarDecl *VD = VR->getDecl();
- if (VD->getType().isConstQualified() ||
- R->getElementType().isConstQualified() ||
- (B.isMainAnalysis() && VD->hasGlobalStorage())) {
- if (const Expr *Init = VD->getAnyInitializer()) {
- if (const auto *InitList = dyn_cast<InitListExpr>(Init)) {
- // The array index has to be known.
- if (auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) {
- int64_t i = CI->getValue().getSExtValue();
- // If it is known that the index is out of bounds, we can return
- // an undefined value.
- if (i < 0)
- return UndefinedVal();
-
- if (auto CAT = Ctx.getAsConstantArrayType(VD->getType()))
- if (CAT->getSize().sle(i))
- return UndefinedVal();
-
- // If there is a list, but no init, it must be zero.
- if (i >= InitList->getNumInits())
- return svalBuilder.makeZeroVal(R->getElementType());
-
- if (const Expr *ElemInit = InitList->getInit(i))
- if (Optional<SVal> V = svalBuilder.getConstantVal(ElemInit))
- return *V;
- }
- }
- }
+ const StringLiteral *SL = StrR->getStringLiteral();
+ return getSValFromStringLiteral(SL, Idx.getZExtValue(), T);
}
+ } else if (isa<ElementRegion, VarRegion>(superR)) {
+ if (Optional<SVal> V = getConstantValFromConstArrayInitializer(B, R))
+ return *V;
}
// Check for loads from a code text region. For such loads, just give up.
diff --git a/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp
index 7395622a659c..04165a443fff 100644
--- a/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp
@@ -1,9 +1,8 @@
//== SMTConstraintManager.cpp -----------------------------------*- C++ -*--==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index b459b5adb511..71bfc86ab8f7 100644
--- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -49,6 +49,16 @@ using namespace ento;
void SValBuilder::anchor() {}
+SValBuilder::SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
+ ProgramStateManager &stateMgr)
+ : Context(context), BasicVals(context, alloc),
+ SymMgr(context, BasicVals, alloc), MemMgr(context, alloc),
+ StateMgr(stateMgr),
+ AnOpts(
+ stateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions()),
+ ArrayIndexTy(context.LongLongTy),
+ ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {}
+
DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) {
if (Loc::isLocType(type))
return makeNull();
@@ -244,8 +254,7 @@ SValBuilder::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
}
DefinedSVal SValBuilder::getMemberPointer(const NamedDecl *ND) {
- assert(!ND || isa<CXXMethodDecl>(ND) || isa<FieldDecl>(ND) ||
- isa<IndirectFieldDecl>(ND));
+ assert(!ND || (isa<CXXMethodDecl, FieldDecl, IndirectFieldDecl>(ND)));
if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(ND)) {
// Sema treats pointers to static member functions as have function pointer
@@ -405,9 +414,7 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op,
// TODO: When the Max Complexity is reached, we should conjure a symbol
// instead of generating an Unknown value and propagate the taint info to it.
- const unsigned MaxComp = StateMgr.getOwningEngine()
- .getAnalysisManager()
- .options.MaxSymbolComplexity;
+ const unsigned MaxComp = AnOpts.MaxSymbolComplexity;
if (symLHS && symRHS &&
(symLHS->computeComplexity() + symRHS->computeComplexity()) < MaxComp)
@@ -725,16 +732,12 @@ SVal SValBuilder::evalCastSubKind(loc::MemRegionVal V, QualType CastTy,
// This change is needed for architectures with varying
// pointer widths. See the amdgcn opencl reproducer with
// this change as an example: solver-sym-simplification-ptr-bool.cl
- // FIXME: We could encounter a reference here,
- // try returning a concrete 'true' since it might
- // be easier on the solver.
// FIXME: Cleanup remainder of `getZeroWithPtrWidth ()`
// and `getIntWithPtrWidth()` functions to prevent future
// confusion
- const llvm::APSInt &Zero = Ty->isReferenceType()
- ? BasicVals.getZeroWithPtrWidth()
- : BasicVals.getZeroWithTypeSize(Ty);
- return makeNonLoc(Sym, BO_NE, Zero, CastTy);
+ if (!Ty->isReferenceType())
+ return makeNonLoc(Sym, BO_NE, BasicVals.getZeroWithTypeSize(Ty),
+ CastTy);
}
// Non-symbolic memory regions are always true.
return makeTruthVal(true, CastTy);
diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index e57d92fbcebb..681a1f64eadc 100644
--- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
@@ -25,7 +24,7 @@ class SimpleSValBuilder : public SValBuilder {
public:
SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
ProgramStateManager &stateMgr)
- : SValBuilder(alloc, context, stateMgr) {}
+ : SValBuilder(alloc, context, stateMgr) {}
~SimpleSValBuilder() override {}
SVal evalMinus(NonLoc val) override;
@@ -129,14 +128,14 @@ SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
// a&0 and a&(~0)
if (RHS == 0)
return makeIntVal(0, resultTy);
- else if (RHS.isAllOnesValue())
+ else if (RHS.isAllOnes())
isIdempotent = true;
break;
case BO_Or:
// a|0 and a|(~0)
if (RHS == 0)
isIdempotent = true;
- else if (RHS.isAllOnesValue()) {
+ else if (RHS.isAllOnes()) {
const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
return nonloc::ConcreteInt(Result);
}
@@ -320,13 +319,10 @@ static Optional<NonLoc> tryRearrange(ProgramStateRef State,
// We expect everything to be of the same type - this type.
QualType SingleTy;
- auto &Opts =
- StateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions();
-
// FIXME: After putting complexity threshold to the symbols we can always
// rearrange additive operations but rearrange comparisons only if
// option is set.
- if(!Opts.ShouldAggressivelySimplifyBinaryOperation)
+ if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
return None;
SymbolRef LSym = Lhs.getAsSymbol();
@@ -513,7 +509,7 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
continue;
case BO_Shr:
// (~0)>>a
- if (LHSValue.isAllOnesValue() && LHSValue.isSigned())
+ if (LHSValue.isAllOnes() && LHSValue.isSigned())
return evalCast(lhs, resultTy, QualType{});
LLVM_FALLTHROUGH;
case BO_Shl:
diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp
index b867b0746f90..3cc0cd224d7a 100644
--- a/clang/lib/StaticAnalyzer/Core/Store.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Store.cpp
@@ -84,7 +84,7 @@ Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R,
// involved. Blocks can be casted to/from 'id', as they can be treated
// as Objective-C objects. This could possibly be handled by enhancing
// our reasoning of downcasts of symbolic objects.
- if (isa<CodeTextRegion>(R) || isa<SymbolicRegion>(R))
+ if (isa<CodeTextRegion, SymbolicRegion>(R))
return R;
// We don't know what to make of it. Return a NULL region, which
@@ -96,18 +96,24 @@ Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R,
// already be handled.
QualType PointeeTy = CastToTy->getPointeeType();
QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy);
+ CanonPointeeTy = CanonPointeeTy.getLocalUnqualifiedType();
// Handle casts to void*. We just pass the region through.
- if (CanonPointeeTy.getLocalUnqualifiedType() == Ctx.VoidTy)
+ if (CanonPointeeTy == Ctx.VoidTy)
return R;
- // Handle casts from compatible types.
- if (R->isBoundable())
+ const auto IsSameRegionType = [&Ctx](const MemRegion *R, QualType OtherTy) {
if (const auto *TR = dyn_cast<TypedValueRegion>(R)) {
QualType ObjTy = Ctx.getCanonicalType(TR->getValueType());
- if (CanonPointeeTy == ObjTy)
- return R;
+ if (OtherTy == ObjTy.getLocalUnqualifiedType())
+ return true;
}
+ return false;
+ };
+
+ // Handle casts from compatible types.
+ if (R->isBoundable() && IsSameRegionType(R, CanonPointeeTy))
+ return R;
// Process region cast according to the kind of the region being cast.
switch (R->getKind()) {
@@ -174,16 +180,11 @@ Optional<const MemRegion *> StoreManager::castRegion(const MemRegion *R,
CharUnits off = rawOff.getOffset();
if (off.isZero()) {
- // Edge case: we are at 0 bytes off the beginning of baseR. We
- // check to see if type we are casting to is the same as the base
- // region. If so, just return the base region.
- if (const auto *TR = dyn_cast<TypedValueRegion>(baseR)) {
- QualType ObjTy = Ctx.getCanonicalType(TR->getValueType());
- QualType CanonPointeeTy = Ctx.getCanonicalType(PointeeTy);
- if (CanonPointeeTy == ObjTy)
- return baseR;
- }
-
+ // Edge case: we are at 0 bytes off the beginning of baseR. We check to
+ // see if the type we are casting to is the same as the type of the base
+ // region. If so, just return the base region.
+ if (IsSameRegionType(baseR, CanonPointeeTy))
+ return baseR;
// Otherwise, create a new ElementRegion at offset 0.
return MakeElementRegion(cast<SubRegion>(baseR), PointeeTy);
}
@@ -442,6 +443,19 @@ SVal StoreManager::getLValueIvar(const ObjCIvarDecl *decl, SVal base) {
SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset,
SVal Base) {
+
+ // Special case, if index is 0, return the same type as if
+ // this was not an array dereference.
+ if (Offset.isZeroConstant()) {
+ QualType BT = Base.getType(this->Ctx);
+ if (!BT.isNull() && !elementType.isNull()) {
+ QualType PointeeTy = BT->getPointeeType();
+ if (!PointeeTy.isNull() &&
+ PointeeTy.getCanonicalType() == elementType.getCanonicalType())
+ return Base;
+ }
+ }
+
// If the base is an unknown or undefined value, just return it back.
// FIXME: For absolute pointer addresses, we just return that value back as
// well, although in reality we should return the offset added to that
diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
index 79a8eef30576..1ae1f97efd2e 100644
--- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -425,19 +425,7 @@ bool SymbolReaper::isLiveRegion(const MemRegion *MR) {
// tell if anything still refers to this region. Unlike SymbolicRegions,
// AllocaRegions don't have associated symbols, though, so we don't actually
// have a way to track their liveness.
- if (isa<AllocaRegion>(MR))
- return true;
-
- if (isa<CXXThisRegion>(MR))
- return true;
-
- if (isa<MemSpaceRegion>(MR))
- return true;
-
- if (isa<CodeTextRegion>(MR))
- return true;
-
- return false;
+ return isa<AllocaRegion, CXXThisRegion, MemSpaceRegion, CodeTextRegion>(MR);
}
bool SymbolReaper::isLive(SymbolRef sym) {
diff --git a/clang/lib/Tooling/CommonOptionsParser.cpp b/clang/lib/Tooling/CommonOptionsParser.cpp
index 6301544dbb28..7d48dd505464 100644
--- a/clang/lib/Tooling/CommonOptionsParser.cpp
+++ b/clang/lib/Tooling/CommonOptionsParser.cpp
@@ -170,7 +170,7 @@ CommonOptionsParser::CommonOptionsParser(
llvm::Error Err = init(argc, argv, Category, OccurrencesFlag, Overview);
if (Err) {
llvm::report_fatal_error(
- "CommonOptionsParser: failed to parse command-line arguments. " +
+ Twine("CommonOptionsParser: failed to parse command-line arguments. ") +
llvm::toString(std::move(Err)));
}
}
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
index 4f3e574719d2..4b6c87aba62f 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
@@ -15,9 +15,9 @@ using namespace dependencies;
DependencyScanningService::DependencyScanningService(
ScanningMode Mode, ScanningOutputFormat Format, bool ReuseFileManager,
- bool SkipExcludedPPRanges)
+ bool SkipExcludedPPRanges, bool OptimizeArgs)
: Mode(Mode), Format(Format), ReuseFileManager(ReuseFileManager),
- SkipExcludedPPRanges(SkipExcludedPPRanges) {
+ SkipExcludedPPRanges(SkipExcludedPPRanges), OptimizeArgs(OptimizeArgs) {
// Initialize targets for object file support.
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
index 2fd12f7e12b1..739712baadd0 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp
@@ -24,8 +24,6 @@ std::vector<std::string> FullDependencies::getAdditionalArgs(
ClangModuleDeps, LookupPCMPath, LookupModuleDeps, PCMPaths, ModMapPaths);
for (const std::string &PCMPath : PCMPaths)
Ret.push_back("-fmodule-file=" + PCMPath);
- for (const std::string &ModMapPath : ModMapPaths)
- Ret.push_back("-fmodule-map-file=" + ModMapPath);
return Ret;
}
@@ -37,10 +35,8 @@ FullDependencies::getAdditionalArgsWithoutModulePaths() const {
"-fno-implicit-module-maps",
};
- for (const PrebuiltModuleDep &PMD : PrebuiltModuleDeps) {
- Args.push_back("-fmodule-file=" + PMD.ModuleName + "=" + PMD.PCMFile);
- Args.push_back("-fmodule-map-file=" + PMD.ModuleMapFile);
- }
+ for (const PrebuiltModuleDep &PMD : PrebuiltModuleDeps)
+ Args.push_back("-fmodule-file=" + PMD.PCMFile);
return Args;
}
@@ -50,7 +46,8 @@ DependencyScanningTool::DependencyScanningTool(
: Worker(Service) {}
llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
- const tooling::CompilationDatabase &Compilations, StringRef CWD) {
+ const std::vector<std::string> &CommandLine, StringRef CWD,
+ llvm::Optional<StringRef> ModuleName) {
/// Prints out all of the gathered dependencies into a string.
class MakeDependencyPrinterConsumer : public DependencyConsumer {
public:
@@ -102,17 +99,9 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
std::vector<std::string> Dependencies;
};
- // We expect a single command here because if a source file occurs multiple
- // times in the original CDB, then `computeDependencies` would run the
- // `DependencyScanningAction` once for every time the input occured in the
- // CDB. Instead we split up the CDB into single command chunks to avoid this
- // behavior.
- assert(Compilations.getAllCompileCommands().size() == 1 &&
- "Expected a compilation database with a single command!");
- std::string Input = Compilations.getAllCompileCommands().front().Filename;
-
MakeDependencyPrinterConsumer Consumer;
- auto Result = Worker.computeDependencies(Input, CWD, Compilations, Consumer);
+ auto Result =
+ Worker.computeDependencies(CWD, CommandLine, Consumer, ModuleName);
if (Result)
return std::move(Result);
std::string Output;
@@ -122,8 +111,9 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
llvm::Expected<FullDependenciesResult>
DependencyScanningTool::getFullDependencies(
- const tooling::CompilationDatabase &Compilations, StringRef CWD,
- const llvm::StringSet<> &AlreadySeen) {
+ const std::vector<std::string> &CommandLine, StringRef CWD,
+ const llvm::StringSet<> &AlreadySeen,
+ llvm::Optional<StringRef> ModuleName) {
class FullDependencyPrinterConsumer : public DependencyConsumer {
public:
FullDependencyPrinterConsumer(const llvm::StringSet<> &AlreadySeen)
@@ -180,24 +170,15 @@ DependencyScanningTool::getFullDependencies(
private:
std::vector<std::string> Dependencies;
std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
- std::unordered_map<std::string, ModuleDeps> ClangModuleDeps;
+ std::map<std::string, ModuleDeps> ClangModuleDeps;
std::string ContextHash;
std::vector<std::string> OutputPaths;
const llvm::StringSet<> &AlreadySeen;
};
- // We expect a single command here because if a source file occurs multiple
- // times in the original CDB, then `computeDependencies` would run the
- // `DependencyScanningAction` once for every time the input occured in the
- // CDB. Instead we split up the CDB into single command chunks to avoid this
- // behavior.
- assert(Compilations.getAllCompileCommands().size() == 1 &&
- "Expected a compilation database with a single command!");
- std::string Input = Compilations.getAllCompileCommands().front().Filename;
-
FullDependencyPrinterConsumer Consumer(AlreadySeen);
llvm::Error Result =
- Worker.computeDependencies(Input, CWD, Compilations, Consumer);
+ Worker.computeDependencies(CWD, CommandLine, Consumer, ModuleName);
if (Result)
return std::move(Result);
return Consumer.getFullDependencies();
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index d651ff23b387..7fdc49271791 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -133,6 +133,16 @@ deduceDepTarget(const std::string &OutputFile,
return makeObjFileName(InputFiles.front().getFile());
}
+/// Sanitize diagnostic options for dependency scan.
+static void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
+ // Don't print 'X warnings and Y errors generated'.
+ DiagOpts.ShowCarets = false;
+ // Don't write out diagnostic file.
+ DiagOpts.DiagnosticSerializationFile.clear();
+ // Don't treat warnings as errors.
+ DiagOpts.Warnings.push_back("no-error");
+}
+
/// A clang tool that runs the preprocessor in a mode that's optimized for
/// dependency scanning for the given compiler invocation.
class DependencyScanningAction : public tooling::ToolAction {
@@ -141,10 +151,11 @@ public:
StringRef WorkingDirectory, DependencyConsumer &Consumer,
llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings,
- ScanningOutputFormat Format)
+ ScanningOutputFormat Format, bool OptimizeArgs,
+ llvm::Optional<StringRef> ModuleName = None)
: WorkingDirectory(WorkingDirectory), Consumer(Consumer),
- DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings),
- Format(Format) {}
+ DepFS(std::move(DepFS)), PPSkipMappings(PPSkipMappings), Format(Format),
+ OptimizeArgs(OptimizeArgs), ModuleName(ModuleName) {}
bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
FileManager *FileMgr,
@@ -154,39 +165,34 @@ public:
CompilerInvocation OriginalInvocation(*Invocation);
// Create a compiler instance to handle the actual work.
- CompilerInstance Compiler(std::move(PCHContainerOps));
- Compiler.setInvocation(std::move(Invocation));
-
- // Don't print 'X warnings and Y errors generated'.
- Compiler.getDiagnosticOpts().ShowCarets = false;
- // Don't write out diagnostic file.
- Compiler.getDiagnosticOpts().DiagnosticSerializationFile.clear();
- // Don't treat warnings as errors.
- Compiler.getDiagnosticOpts().Warnings.push_back("no-error");
+ CompilerInstance ScanInstance(std::move(PCHContainerOps));
+ ScanInstance.setInvocation(std::move(Invocation));
+
// Create the compiler's actual diagnostics engine.
- Compiler.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
- if (!Compiler.hasDiagnostics())
+ sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
+ ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
+ if (!ScanInstance.hasDiagnostics())
return false;
- Compiler.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath = true;
+ ScanInstance.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath =
+ true;
FileMgr->getFileSystemOpts().WorkingDir = std::string(WorkingDirectory);
- Compiler.setFileManager(FileMgr);
- Compiler.createSourceManager(*FileMgr);
+ ScanInstance.setFileManager(FileMgr);
+ ScanInstance.createSourceManager(*FileMgr);
llvm::StringSet<> PrebuiltModulesInputFiles;
// Store the list of prebuilt module files into header search options. This
// will prevent the implicit build to create duplicate modules and will
// force reuse of the existing prebuilt module files instead.
- if (!Compiler.getPreprocessorOpts().ImplicitPCHInclude.empty())
+ if (!ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
visitPrebuiltModule(
- Compiler.getPreprocessorOpts().ImplicitPCHInclude, Compiler,
- Compiler.getHeaderSearchOpts().PrebuiltModuleFiles,
+ ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance,
+ ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles,
PrebuiltModulesInputFiles, /*VisitInputFiles=*/DepFS != nullptr);
// Use the dependency scanning optimized file system if requested to do so.
if (DepFS) {
- const CompilerInvocation &CI = Compiler.getInvocation();
DepFS->clearIgnoredFiles();
// Ignore any files that contributed to prebuilt modules. The implicit
// build validates the modules by comparing the reported sizes of their
@@ -197,20 +203,20 @@ public:
// Add any filenames that were explicity passed in the build settings and
// that might be opened, as we want to ensure we don't run source
// minimization on them.
- for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries)
- DepFS->ignoreFile(Entry.Path);
- for (const auto &Entry : CI.getHeaderSearchOpts().VFSOverlayFiles)
- DepFS->ignoreFile(Entry);
+ for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries)
+ DepFS->ignoreFile(E.Path);
+ for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles)
+ DepFS->ignoreFile(F);
// Support for virtual file system overlays on top of the caching
// filesystem.
FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation(
- CI, Compiler.getDiagnostics(), DepFS));
+ ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS));
// Pass the skip mappings which should speed up excluded conditional block
// skipping in the preprocessor.
if (PPSkipMappings)
- Compiler.getPreprocessorOpts()
+ ScanInstance.getPreprocessorOpts()
.ExcludedConditionalDirectiveSkipMappings = PPSkipMappings;
}
@@ -222,35 +228,43 @@ public:
// which ensures that the compiler won't create new dependency collectors,
// and thus won't write out the extra '.d' files to disk.
auto Opts = std::make_unique<DependencyOutputOptions>();
- std::swap(*Opts, Compiler.getInvocation().getDependencyOutputOpts());
+ std::swap(*Opts, ScanInstance.getInvocation().getDependencyOutputOpts());
// We need at least one -MT equivalent for the generator of make dependency
// files to work.
if (Opts->Targets.empty())
- Opts->Targets = {deduceDepTarget(Compiler.getFrontendOpts().OutputFile,
- Compiler.getFrontendOpts().Inputs)};
+ Opts->Targets = {
+ deduceDepTarget(ScanInstance.getFrontendOpts().OutputFile,
+ ScanInstance.getFrontendOpts().Inputs)};
Opts->IncludeSystemHeaders = true;
switch (Format) {
case ScanningOutputFormat::Make:
- Compiler.addDependencyCollector(
+ ScanInstance.addDependencyCollector(
std::make_shared<DependencyConsumerForwarder>(std::move(Opts),
Consumer));
break;
case ScanningOutputFormat::Full:
- Compiler.addDependencyCollector(std::make_shared<ModuleDepCollector>(
- std::move(Opts), Compiler, Consumer, std::move(OriginalInvocation)));
+ ScanInstance.addDependencyCollector(std::make_shared<ModuleDepCollector>(
+ std::move(Opts), ScanInstance, Consumer,
+ std::move(OriginalInvocation), OptimizeArgs));
break;
}
// Consider different header search and diagnostic options to create
// different modules. This avoids the unsound aliasing of module PCMs.
//
- // TODO: Implement diagnostic bucketing and header search pruning to reduce
- // the impact of strict context hashing.
- Compiler.getHeaderSearchOpts().ModulesStrictContextHash = true;
+ // TODO: Implement diagnostic bucketing to reduce the impact of strict
+ // context hashing.
+ ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
- auto Action = std::make_unique<ReadPCHAndPreprocessAction>();
- const bool Result = Compiler.ExecuteAction(*Action);
+ std::unique_ptr<FrontendAction> Action;
+
+ if (ModuleName.hasValue())
+ Action = std::make_unique<GetDependenciesByModuleNameAction>(*ModuleName);
+ else
+ Action = std::make_unique<ReadPCHAndPreprocessAction>();
+
+ const bool Result = ScanInstance.ExecuteAction(*Action);
if (!DepFS)
FileMgr->clearStatCache();
return Result;
@@ -262,15 +276,15 @@ private:
llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings;
ScanningOutputFormat Format;
+ bool OptimizeArgs;
+ llvm::Optional<StringRef> ModuleName;
};
} // end anonymous namespace
DependencyScanningWorker::DependencyScanningWorker(
DependencyScanningService &Service)
- : Format(Service.getFormat()) {
- DiagOpts = new DiagnosticOptions();
-
+ : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()) {
PCHContainerOps = std::make_shared<PCHContainerOperations>();
PCHContainerOps->registerReader(
std::make_unique<ObjectFilePCHContainerReader>());
@@ -279,7 +293,12 @@ DependencyScanningWorker::DependencyScanningWorker(
PCHContainerOps->registerWriter(
std::make_unique<ObjectFilePCHContainerWriter>());
- RealFS = llvm::vfs::createPhysicalFileSystem();
+ auto OverlayFS = llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+ llvm::vfs::createPhysicalFileSystem());
+ InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ OverlayFS->pushOverlay(InMemoryFS);
+ RealFS = OverlayFS;
+
if (Service.canSkipExcludedPPRanges())
PPSkipMappings =
std::make_unique<ExcludedPreprocessorDirectiveSkipMapping>();
@@ -290,36 +309,64 @@ DependencyScanningWorker::DependencyScanningWorker(
Files = new FileManager(FileSystemOptions(), RealFS);
}
-static llvm::Error runWithDiags(
- DiagnosticOptions *DiagOpts,
- llvm::function_ref<bool(DiagnosticConsumer &DC)> BodyShouldSucceed) {
+static llvm::Error
+runWithDiags(DiagnosticOptions *DiagOpts,
+ llvm::function_ref<bool(DiagnosticConsumer &, DiagnosticOptions &)>
+ BodyShouldSucceed) {
+ sanitizeDiagOpts(*DiagOpts);
+
// Capture the emitted diagnostics and report them to the client
// in the case of a failure.
std::string DiagnosticOutput;
llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, DiagOpts);
- if (BodyShouldSucceed(DiagPrinter))
+ if (BodyShouldSucceed(DiagPrinter, *DiagOpts))
return llvm::Error::success();
return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
llvm::inconvertibleErrorCode());
}
llvm::Error DependencyScanningWorker::computeDependencies(
- const std::string &Input, StringRef WorkingDirectory,
- const CompilationDatabase &CDB, DependencyConsumer &Consumer) {
+ StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
+ DependencyConsumer &Consumer, llvm::Optional<StringRef> ModuleName) {
+ // Reset what might have been modified in the previous worker invocation.
RealFS->setCurrentWorkingDirectory(WorkingDirectory);
- return runWithDiags(DiagOpts.get(), [&](DiagnosticConsumer &DC) {
- /// Create the tool that uses the underlying file system to ensure that any
- /// file system requests that are made by the driver do not go through the
- /// dependency scanning filesystem.
- tooling::ClangTool Tool(CDB, Input, PCHContainerOps, RealFS, Files);
- Tool.clearArgumentsAdjusters();
- Tool.setRestoreWorkingDir(false);
- Tool.setPrintErrorMessage(false);
- Tool.setDiagnosticConsumer(&DC);
- DependencyScanningAction Action(WorkingDirectory, Consumer, DepFS,
- PPSkipMappings.get(), Format);
- return !Tool.run(&Action);
- });
+ if (Files)
+ Files->setVirtualFileSystem(RealFS);
+
+ llvm::IntrusiveRefCntPtr<FileManager> CurrentFiles =
+ Files ? Files : new FileManager(FileSystemOptions(), RealFS);
+
+ Optional<std::vector<std::string>> ModifiedCommandLine;
+ if (ModuleName.hasValue()) {
+ ModifiedCommandLine = CommandLine;
+ InMemoryFS->addFile(*ModuleName, 0, llvm::MemoryBuffer::getMemBuffer(""));
+ ModifiedCommandLine->emplace_back(*ModuleName);
+ }
+
+ const std::vector<std::string> &FinalCommandLine =
+ ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
+
+ std::vector<const char *> FinalCCommandLine(CommandLine.size(), nullptr);
+ llvm::transform(CommandLine, FinalCCommandLine.begin(),
+ [](const std::string &Str) { return Str.c_str(); });
+
+ return runWithDiags(CreateAndPopulateDiagOpts(FinalCCommandLine).release(),
+ [&](DiagnosticConsumer &DC, DiagnosticOptions &DiagOpts) {
+ DependencyScanningAction Action(
+ WorkingDirectory, Consumer, DepFS,
+ PPSkipMappings.get(), Format, OptimizeArgs,
+ ModuleName);
+ // Create an invocation that uses the underlying file
+ // system to ensure that any file system requests that
+ // are made by the driver do not go through the
+ // dependency scanning filesystem.
+ ToolInvocation Invocation(FinalCommandLine, &Action,
+ CurrentFiles.get(),
+ PCHContainerOps);
+ Invocation.setDiagnosticConsumer(&DC);
+ Invocation.setDiagnosticOptions(&DiagOpts);
+ return Invocation.run();
+ });
}
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 88cee63c98aa..383a850301a1 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -1,9 +1,8 @@
//===- ModuleDepCollector.cpp - Callbacks to collect deps -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,11 +17,26 @@ using namespace clang;
using namespace tooling;
using namespace dependencies;
+static void optimizeHeaderSearchOpts(HeaderSearchOptions &Opts,
+ ASTReader &Reader,
+ const serialization::ModuleFile &MF) {
+ // Only preserve search paths that were used during the dependency scan.
+ std::vector<HeaderSearchOptions::Entry> Entries = Opts.UserEntries;
+ Opts.UserEntries.clear();
+ for (unsigned I = 0; I < Entries.size(); ++I)
+ if (MF.SearchPathUsage[I])
+ Opts.UserEntries.push_back(Entries[I]);
+}
+
CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
- const ModuleDeps &Deps) const {
+ const ModuleDeps &Deps,
+ llvm::function_ref<void(CompilerInvocation &)> Optimize) const {
// Make a deep copy of the original Clang invocation.
CompilerInvocation CI(OriginalInvocation);
+ CI.getLangOpts()->resetNonModularOptions();
+ CI.getPreprocessorOpts().resetNonModularOptions();
+
// Remove options incompatible with explicit module build.
CI.getFrontendOpts().Inputs.clear();
CI.getFrontendOpts().OutputFile.clear();
@@ -34,12 +48,18 @@ CompilerInvocation ModuleDepCollector::makeInvocationForModuleBuildWithoutPaths(
CI.getLangOpts()->ImplicitModules = false;
// Report the prebuilt modules this module uses.
- for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps) {
+ for (const auto &PrebuiltModule : Deps.PrebuiltModuleDeps)
CI.getFrontendOpts().ModuleFiles.push_back(PrebuiltModule.PCMFile);
- CI.getFrontendOpts().ModuleMapFiles.push_back(PrebuiltModule.ModuleMapFile);
- }
- CI.getPreprocessorOpts().ImplicitPCHInclude.clear();
+ Optimize(CI);
+
+ // The original invocation probably didn't have strict context hash enabled.
+ // We will use the context hash of this invocation to distinguish between
+ // multiple incompatible versions of the same module and will use it when
+ // reporting dependencies to the clients. Let's make sure we're using
+ // **strict** context hash in order to prevent accidental sharing of
+ // incompatible modules (e.g. with differences in search paths).
+ CI.getHeaderSearchOpts().ModulesStrictContextHash = true;
return CI;
}
@@ -62,7 +82,7 @@ serializeCompilerInvocation(const CompilerInvocation &CI) {
std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
std::function<StringRef(ModuleID)> LookupPCMPath,
std::function<const ModuleDeps &(ModuleID)> LookupModuleDeps) const {
- CompilerInvocation CI(Invocation);
+ CompilerInvocation CI(BuildInvocation);
FrontendOptions &FrontendOpts = CI.getFrontendOpts();
InputKind ModuleMapInputKind(FrontendOpts.DashX.getLanguage(),
@@ -79,7 +99,7 @@ std::vector<std::string> ModuleDeps::getCanonicalCommandLine(
std::vector<std::string>
ModuleDeps::getCanonicalCommandLineWithoutModulePaths() const {
- return serializeCompilerInvocation(Invocation);
+ return serializeCompilerInvocation(BuildInvocation);
}
void dependencies::detail::collectPCMAndModuleMapPaths(
@@ -112,15 +132,15 @@ void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
FileID PrevFID) {
if (Reason != PPCallbacks::EnterFile)
return;
-
+
// This has to be delayed as the context hash can change at the start of
// `CompilerInstance::ExecuteAction`.
if (MDC.ContextHash.empty()) {
- MDC.ContextHash = Instance.getInvocation().getModuleHash();
+ MDC.ContextHash = MDC.ScanInstance.getInvocation().getModuleHash();
MDC.Consumer.handleContextHash(MDC.ContextHash);
}
- SourceManager &SM = Instance.getSourceManager();
+ SourceManager &SM = MDC.ScanInstance.getSourceManager();
// Dependency generation really does want to go all the way to the
// file entry for a source location to find out what is depended on.
@@ -163,12 +183,14 @@ void ModuleDepCollectorPP::handleImport(const Module *Imported) {
}
void ModuleDepCollectorPP::EndOfMainFile() {
- FileID MainFileID = Instance.getSourceManager().getMainFileID();
- MDC.MainFile = std::string(
- Instance.getSourceManager().getFileEntryForID(MainFileID)->getName());
+ FileID MainFileID = MDC.ScanInstance.getSourceManager().getMainFileID();
+ MDC.MainFile = std::string(MDC.ScanInstance.getSourceManager()
+ .getFileEntryForID(MainFileID)
+ ->getName());
- if (!Instance.getPreprocessorOpts().ImplicitPCHInclude.empty())
- MDC.FileDeps.push_back(Instance.getPreprocessorOpts().ImplicitPCHInclude);
+ if (!MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude.empty())
+ MDC.FileDeps.push_back(
+ MDC.ScanInstance.getPreprocessorOpts().ImplicitPCHInclude);
for (const Module *M : DirectModularDeps) {
// A top-level module might not be actually imported as a module when
@@ -207,15 +229,16 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
MD.IsSystem = M->IsSystem;
- const FileEntry *ModuleMap = Instance.getPreprocessor()
+ const FileEntry *ModuleMap = MDC.ScanInstance.getPreprocessor()
.getHeaderSearchInfo()
.getModuleMap()
.getModuleMapFileForUniquing(M);
MD.ClangModuleMapFile = std::string(ModuleMap ? ModuleMap->getName() : "");
serialization::ModuleFile *MF =
- MDC.Instance.getASTReader()->getModuleManager().lookup(M->getASTFile());
- MDC.Instance.getASTReader()->visitInputFiles(
+ MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
+ M->getASTFile());
+ MDC.ScanInstance.getASTReader()->visitInputFiles(
*MF, true, true, [&](const serialization::InputFile &IF, bool isSystem) {
// __inferred_module.map is the result of the way in which an implicit
// module build handles inferred modules. It adds an overlay VFS with
@@ -232,10 +255,16 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
// Add direct prebuilt module dependencies now, so that we can use them when
// creating a CompilerInvocation and computing context hash for this
// ModuleDeps instance.
- addDirectPrebuiltModuleDeps(M, MD);
-
- MD.Invocation = MDC.makeInvocationForModuleBuildWithoutPaths(MD);
- MD.ID.ContextHash = MD.Invocation.getModuleHash();
+ llvm::DenseSet<const Module *> SeenModules;
+ addAllSubmodulePrebuiltDeps(M, MD, SeenModules);
+
+ MD.BuildInvocation = MDC.makeInvocationForModuleBuildWithoutPaths(
+ MD, [&](CompilerInvocation &BuildInvocation) {
+ if (MDC.OptimizeArgs)
+ optimizeHeaderSearchOpts(BuildInvocation.getHeaderSearchOpts(),
+ *MDC.ScanInstance.getASTReader(), *MF);
+ });
+ MD.ID.ContextHash = MD.BuildInvocation.getModuleHash();
llvm::DenseSet<const Module *> AddedModules;
addAllSubmoduleDeps(M, MD, AddedModules);
@@ -243,12 +272,23 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
return MD.ID;
}
-void ModuleDepCollectorPP::addDirectPrebuiltModuleDeps(const Module *M,
- ModuleDeps &MD) {
+void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps(
+ const Module *M, ModuleDeps &MD,
+ llvm::DenseSet<const Module *> &SeenSubmodules) {
+ addModulePrebuiltDeps(M, MD, SeenSubmodules);
+
+ for (const Module *SubM : M->submodules())
+ addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
+}
+
+void ModuleDepCollectorPP::addModulePrebuiltDeps(
+ const Module *M, ModuleDeps &MD,
+ llvm::DenseSet<const Module *> &SeenSubmodules) {
for (const Module *Import : M->Imports)
if (Import->getTopLevelModule() != M->getTopLevelModule())
- if (MDC.isPrebuiltModule(Import))
- MD.PrebuiltModuleDeps.emplace_back(Import);
+ if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
+ if (SeenSubmodules.insert(Import->getTopLevelModule()).second)
+ MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
}
void ModuleDepCollectorPP::addAllSubmoduleDeps(
@@ -274,13 +314,14 @@ void ModuleDepCollectorPP::addModuleDep(
}
ModuleDepCollector::ModuleDepCollector(
- std::unique_ptr<DependencyOutputOptions> Opts, CompilerInstance &I,
- DependencyConsumer &C, CompilerInvocation &&OriginalCI)
- : Instance(I), Consumer(C), Opts(std::move(Opts)),
- OriginalInvocation(std::move(OriginalCI)) {}
+ std::unique_ptr<DependencyOutputOptions> Opts,
+ CompilerInstance &ScanInstance, DependencyConsumer &C,
+ CompilerInvocation &&OriginalCI, bool OptimizeArgs)
+ : ScanInstance(ScanInstance), Consumer(C), Opts(std::move(Opts)),
+ OriginalInvocation(std::move(OriginalCI)), OptimizeArgs(OptimizeArgs) {}
void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
- PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(Instance, *this));
+ PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(*this));
}
void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
@@ -288,7 +329,7 @@ void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
bool ModuleDepCollector::isPrebuiltModule(const Module *M) {
std::string Name(M->getTopLevelModuleName());
const auto &PrebuiltModuleFiles =
- Instance.getHeaderSearchOpts().PrebuiltModuleFiles;
+ ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name);
if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
return false;
diff --git a/clang/lib/Tooling/DumpTool/ClangSrcLocDump.cpp b/clang/lib/Tooling/DumpTool/ClangSrcLocDump.cpp
index 8091a467d056..9c825428f2ea 100644
--- a/clang/lib/Tooling/DumpTool/ClangSrcLocDump.cpp
+++ b/clang/lib/Tooling/DumpTool/ClangSrcLocDump.cpp
@@ -91,12 +91,8 @@ int main(int argc, const char **argv) {
llvm::transform(Args, Argv.begin(),
[](const std::string &Arg) { return Arg.c_str(); });
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
- unsigned MissingArgIndex, MissingArgCount;
- auto Opts = driver::getDriverOptTable();
- auto ParsedArgs = Opts.ParseArgs(llvm::makeArrayRef(Argv).slice(1),
- MissingArgIndex, MissingArgCount);
- ParseDiagnosticArgs(*DiagOpts, ParsedArgs);
+ IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts =
+ CreateAndPopulateDiagOpts(Argv);
// Don't output diagnostics, because common scenarios such as
// cross-compiling fail with diagnostics. This is not fatal, but
diff --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp b/clang/lib/Tooling/JSONCompilationDatabase.cpp
index 97ba7e411fbb..5e18d7a576c0 100644
--- a/clang/lib/Tooling/JSONCompilationDatabase.cpp
+++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp
@@ -135,15 +135,12 @@ class CommandLineArgumentParser {
std::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax,
StringRef EscapedCommandLine) {
if (Syntax == JSONCommandLineSyntax::AutoDetect) {
+#ifdef _WIN32
+ // Assume Windows command line parsing on Win32
+ Syntax = JSONCommandLineSyntax::Windows;
+#else
Syntax = JSONCommandLineSyntax::Gnu;
- llvm::Triple Triple(llvm::sys::getProcessTriple());
- if (Triple.getOS() == llvm::Triple::OSType::Win32) {
- // Assume Windows command line parsing on Win32 unless the triple
- // explicitly tells us otherwise.
- if (!Triple.hasEnvironment() ||
- Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC)
- Syntax = JSONCommandLineSyntax::Windows;
- }
+#endif
}
if (Syntax == JSONCommandLineSyntax::Windows) {
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index 07888b5c32fa..fcac2250dd96 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -155,9 +155,8 @@ private:
} // namespace
static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) {
- auto FirstDefaultArg = std::find_if(Args.begin(), Args.end(), [](auto It) {
- return isa<CXXDefaultArgExpr>(It);
- });
+ auto FirstDefaultArg =
+ llvm::find_if(Args, [](auto It) { return isa<CXXDefaultArgExpr>(It); });
return llvm::make_range(Args.begin(), FirstDefaultArg);
}
diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp
index 5242134097da..6314615f83c8 100644
--- a/clang/lib/Tooling/Tooling.cpp
+++ b/clang/lib/Tooling/Tooling.cpp
@@ -343,23 +343,27 @@ bool ToolInvocation::run() {
for (const std::string &Str : CommandLine)
Argv.push_back(Str.c_str());
const char *const BinaryName = Argv[0];
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
- unsigned MissingArgIndex, MissingArgCount;
- llvm::opt::InputArgList ParsedArgs = driver::getDriverOptTable().ParseArgs(
- ArrayRef<const char *>(Argv).slice(1), MissingArgIndex, MissingArgCount);
- ParseDiagnosticArgs(*DiagOpts, ParsedArgs);
- TextDiagnosticPrinter DiagnosticPrinter(
- llvm::errs(), &*DiagOpts);
- DiagnosticsEngine Diagnostics(
- IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
- DiagConsumer ? DiagConsumer : &DiagnosticPrinter, false);
+
+ // Parse diagnostic options from the driver command-line only if none were
+ // explicitly set.
+ IntrusiveRefCntPtr<DiagnosticOptions> ParsedDiagOpts;
+ DiagnosticOptions *DiagOpts = this->DiagOpts;
+ if (!DiagOpts) {
+ ParsedDiagOpts = CreateAndPopulateDiagOpts(Argv);
+ DiagOpts = &*ParsedDiagOpts;
+ }
+
+ TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), DiagOpts);
+ IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics =
+ CompilerInstance::createDiagnostics(
+ &*DiagOpts, DiagConsumer ? DiagConsumer : &DiagnosticPrinter, false);
// Although `Diagnostics` are used only for command-line parsing, the custom
// `DiagConsumer` might expect a `SourceManager` to be present.
- SourceManager SrcMgr(Diagnostics, *Files);
- Diagnostics.setSourceManager(&SrcMgr);
+ SourceManager SrcMgr(*Diagnostics, *Files);
+ Diagnostics->setSourceManager(&SrcMgr);
const std::unique_ptr<driver::Driver> Driver(
- newDriver(&Diagnostics, BinaryName, &Files->getVirtualFileSystem()));
+ newDriver(&*Diagnostics, BinaryName, &Files->getVirtualFileSystem()));
// The "input file not found" diagnostics from the driver are useful.
// The driver is only aware of the VFS working directory, but some clients
// change this at the FileManager level instead.
@@ -371,11 +375,11 @@ bool ToolInvocation::run() {
if (!Compilation)
return false;
const llvm::opt::ArgStringList *const CC1Args = getCC1Arguments(
- &Diagnostics, Compilation.get());
+ &*Diagnostics, Compilation.get());
if (!CC1Args)
return false;
std::unique_ptr<CompilerInvocation> Invocation(
- newInvocation(&Diagnostics, *CC1Args, BinaryName));
+ newInvocation(&*Diagnostics, *CC1Args, BinaryName));
return runInvocation(BinaryName, Compilation.get(), std::move(Invocation),
std::move(PCHContainerOps));
}
diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp
index 66fa04a15594..242db2a16b43 100644
--- a/clang/lib/Tooling/Transformer/Parsing.cpp
+++ b/clang/lib/Tooling/Transformer/Parsing.cpp
@@ -165,7 +165,7 @@ static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
static ExpectedProgress<std::string> parseId(ParseState State) {
State.Input = consumeWhitespace(State.Input);
auto Id = State.Input.take_while(
- [](char c) { return isASCII(c) && isIdentifierBody(c); });
+ [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
if (Id.empty())
return makeParseError(State, "failed to parse name");
return makeParseProgress(advance(State, Id.size()), Id.str());
diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp
index 4dc3544bb06d..8b20ef34c3ff 100644
--- a/clang/lib/Tooling/Transformer/Stencil.cpp
+++ b/clang/lib/Tooling/Transformer/Stencil.cpp
@@ -27,14 +27,15 @@
using namespace clang;
using namespace transformer;
+using ast_matchers::BoundNodes;
using ast_matchers::MatchFinder;
using llvm::errc;
using llvm::Error;
using llvm::Expected;
using llvm::StringError;
-static llvm::Expected<DynTypedNode>
-getNode(const ast_matchers::BoundNodes &Nodes, StringRef Id) {
+static llvm::Expected<DynTypedNode> getNode(const BoundNodes &Nodes,
+ StringRef Id) {
auto &NodesMap = Nodes.getMap();
auto It = NodesMap.find(Id);
if (It == NodesMap.end())
@@ -366,6 +367,73 @@ public:
}
};
+class SelectBoundStencil : public clang::transformer::StencilInterface {
+ static bool containsNoNullStencils(
+ const std::vector<std::pair<std::string, Stencil>> &Cases) {
+ for (const auto &S : Cases)
+ if (S.second == nullptr)
+ return false;
+ return true;
+ }
+
+public:
+ SelectBoundStencil(std::vector<std::pair<std::string, Stencil>> Cases,
+ Stencil Default)
+ : CaseStencils(std::move(Cases)), DefaultStencil(std::move(Default)) {
+ assert(containsNoNullStencils(CaseStencils) &&
+ "cases of selectBound may not be null");
+ }
+ ~SelectBoundStencil() override{};
+
+ llvm::Error eval(const MatchFinder::MatchResult &match,
+ std::string *result) const override {
+ const BoundNodes::IDToNodeMap &NodeMap = match.Nodes.getMap();
+ for (const auto &S : CaseStencils) {
+ if (NodeMap.count(S.first) > 0) {
+ return S.second->eval(match, result);
+ }
+ }
+
+ if (DefaultStencil != nullptr) {
+ return DefaultStencil->eval(match, result);
+ }
+
+ llvm::SmallVector<llvm::StringRef, 2> CaseIDs;
+ CaseIDs.reserve(CaseStencils.size());
+ for (const auto &S : CaseStencils)
+ CaseIDs.emplace_back(S.first);
+
+ return llvm::createStringError(
+ errc::result_out_of_range,
+ llvm::Twine("selectBound failed: no cases bound and no default: {") +
+ llvm::join(CaseIDs, ", ") + "}");
+ }
+
+ std::string toString() const override {
+ std::string Buffer;
+ llvm::raw_string_ostream Stream(Buffer);
+ Stream << "selectBound({";
+ bool First = true;
+ for (const auto &S : CaseStencils) {
+ if (First)
+ First = false;
+ else
+ Stream << "}, ";
+ Stream << "{\"" << S.first << "\", " << S.second->toString();
+ }
+ Stream << "}}";
+ if (DefaultStencil != nullptr) {
+ Stream << ", " << DefaultStencil->toString();
+ }
+ Stream << ")";
+ return Stream.str();
+ }
+
+private:
+ std::vector<std::pair<std::string, Stencil>> CaseStencils;
+ Stencil DefaultStencil;
+};
+
class SequenceStencil : public StencilInterface {
std::vector<Stencil> Stencils;
@@ -462,6 +530,13 @@ Stencil transformer::ifBound(StringRef Id, Stencil TrueStencil,
std::move(FalseStencil));
}
+Stencil transformer::selectBound(
+ std::vector<std::pair<std::string, Stencil>> CaseStencils,
+ Stencil DefaultStencil) {
+ return std::make_shared<SelectBoundStencil>(std::move(CaseStencils),
+ std::move(DefaultStencil));
+}
+
Stencil transformer::run(MatchConsumer<std::string> Fn) {
return std::make_shared<RunStencil>(std::move(Fn));
}
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 144e87f78c64..56dc628869a4 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -19,10 +19,12 @@
#include "clang/Basic/Version.h"
#include "clang/Format/Format.h"
#include "clang/Rewrite/Core/Rewriter.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Process.h"
+#include <fstream>
using namespace llvm;
using clang::tooling::Replacements;
@@ -104,6 +106,17 @@ static cl::opt<bool> SortIncludes(
"SortIncludes style flag"),
cl::cat(ClangFormatCategory));
+static cl::opt<std::string> QualifierAlignment(
+ "qualifier-alignment",
+ cl::desc(
+ "If set, overrides the qualifier alignment style determined by the "
+ "QualifierAlignment style flag"),
+ cl::init(""), cl::cat(ClangFormatCategory));
+
+static cl::opt<std::string>
+ Files("files", cl::desc("Provide a list of files to run clang-format"),
+ cl::init(""), cl::cat(ClangFormatCategory));
+
static cl::opt<bool>
Verbose("verbose", cl::desc("If set, shows the list of processed files"),
cl::cat(ClangFormatCategory));
@@ -353,6 +366,18 @@ static void outputXML(const Replacements &Replaces,
outs() << "</replacements>\n";
}
+class ClangFormatDiagConsumer : public DiagnosticConsumer {
+ virtual void anchor() {}
+
+ void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
+ const Diagnostic &Info) override {
+
+ SmallVector<char, 16> vec;
+ Info.FormatDiagnostic(vec);
+ errs() << "clang-format error:" << vec << "\n";
+ }
+};
+
// Returns true on error.
static bool format(StringRef FileName) {
if (!OutputXML && Inplace && FileName == "-") {
@@ -402,6 +427,27 @@ static bool format(StringRef FileName) {
return true;
}
+ StringRef QualifierAlignmentOrder = QualifierAlignment;
+
+ FormatStyle->QualifierAlignment =
+ StringSwitch<FormatStyle::QualifierAlignmentStyle>(
+ QualifierAlignmentOrder.lower())
+ .Case("right", FormatStyle::QAS_Right)
+ .Case("left", FormatStyle::QAS_Left)
+ .Default(FormatStyle->QualifierAlignment);
+
+ if (FormatStyle->QualifierAlignment == FormatStyle::QAS_Left)
+ FormatStyle->QualifierOrder = {"const", "volatile", "type"};
+ else if (FormatStyle->QualifierAlignment == FormatStyle::QAS_Right)
+ FormatStyle->QualifierOrder = {"type", "const", "volatile"};
+ else if (QualifierAlignmentOrder.contains("type")) {
+ FormatStyle->QualifierAlignment = FormatStyle::QAS_Custom;
+ SmallVector<StringRef> Qualifiers;
+ QualifierAlignmentOrder.split(Qualifiers, " ", /*MaxSplit=*/-1,
+ /*KeepEmpty=*/false);
+ FormatStyle->QualifierOrder = {Qualifiers.begin(), Qualifiers.end()};
+ }
+
if (SortIncludes.getNumOccurrences() != 0) {
if (SortIncludes)
FormatStyle->SortIncludes = FormatStyle::SI_CaseSensitive;
@@ -443,9 +489,12 @@ static bool format(StringRef FileName) {
IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
new llvm::vfs::InMemoryFileSystem);
FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+
+ IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts(new DiagnosticOptions());
+ ClangFormatDiagConsumer IgnoreDiagnostics;
DiagnosticsEngine Diagnostics(
- IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
- new DiagnosticOptions);
+ IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), &*DiagOpts,
+ &IgnoreDiagnostics, false);
SourceManager Sources(Diagnostics, Files);
FileID ID = createInMemoryFile(AssumedFileName, *Code, Sources, Files,
InMemoryFileSystem.get());
@@ -534,6 +583,17 @@ int main(int argc, const char **argv) {
return dumpConfig();
}
+ if (!Files.empty()) {
+ std::ifstream ExternalFileOfFiles{std::string(Files)};
+ std::string Line;
+ unsigned LineNo = 1;
+ while (std::getline(ExternalFileOfFiles, Line)) {
+ FileNames.push_back(Line);
+ LineNo++;
+ }
+ errs() << "Clang-formating " << LineNo << " files\n";
+ }
+
bool Error = false;
if (FileNames.empty()) {
Error = clang::format::format("-");
@@ -545,9 +605,12 @@ int main(int argc, const char **argv) {
"single file.\n";
return 1;
}
+
+ unsigned FileNo = 1;
for (const auto &FileName : FileNames) {
if (Verbose)
- errs() << "Formatting " << FileName << "\n";
+ errs() << "Formatting [" << FileNo++ << "/" << FileNames.size() << "] "
+ << FileName << "\n";
Error |= clang::format::format(FileName);
}
return Error ? 1 : 0;
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
new file mode 100644
index 000000000000..bc5b9a9f1fde
--- /dev/null
+++ b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -0,0 +1,196 @@
+//===-- clang-nvlink-wrapper/ClangNvlinkWrapper.cpp - wrapper over nvlink-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This tool works as a wrapper over nvlink program. It transparently passes
+/// every input option and objects to nvlink except archive files. It reads
+/// each input archive file to extract archived cubin files as temporary files.
+/// These temp (*.cubin) files are passed to nvlink, because nvlink does not
+/// support linking of archive files implicitly.
+///
+/// During linking of heterogeneous device archive libraries, the
+/// clang-offload-bundler creates a device specific archive of cubin files.
+/// Such an archive is then passed to this tool to extract cubin files before
+/// passing to nvlink.
+///
+/// Example:
+/// clang-nvlink-wrapper -o a.out-openmp-nvptx64 /tmp/libTest-nvptx-sm_50.a
+///
+/// 1. Extract (libTest-nvptx-sm_50.a) => /tmp/a.cubin /tmp/b.cubin
+/// 2. nvlink -o a.out-openmp-nvptx64 /tmp/a.cubin /tmp/b.cubin
+//===---------------------------------------------------------------------===//
+
+#include "clang/Basic/Version.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category, everything else (except for -help)
+// will be hidden.
+static cl::OptionCategory
+ ClangNvlinkWrapperCategory("clang-nvlink-wrapper options");
+
+static cl::opt<std::string> NvlinkUserPath("nvlink-path",
+ cl::desc("Path of nvlink binary"),
+ cl::cat(ClangNvlinkWrapperCategory));
+
+// Do not parse nvlink options
+static cl::list<std::string>
+ NVArgs(cl::Sink, cl::desc("<options to be passed to nvlink>..."));
+
+static Error runNVLink(std::string NVLinkPath,
+ SmallVectorImpl<std::string> &Args) {
+ std::vector<StringRef> NVLArgs;
+ NVLArgs.push_back(NVLinkPath);
+ for (auto &Arg : Args) {
+ NVLArgs.push_back(Arg);
+ }
+
+ if (sys::ExecuteAndWait(NVLinkPath.c_str(), NVLArgs))
+ return createStringError(inconvertibleErrorCode(), "'nvlink' failed");
+ return Error::success();
+}
+
+static Error extractArchiveFiles(StringRef Filename,
+ SmallVectorImpl<std::string> &Args,
+ SmallVectorImpl<std::string> &TmpFiles) {
+ std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename, false, false);
+ if (std::error_code EC = BufOrErr.getError())
+ return createFileError(Filename, EC);
+
+ ArchiveBuffers.push_back(std::move(*BufOrErr));
+ Expected<std::unique_ptr<llvm::object::Archive>> LibOrErr =
+ object::Archive::create(ArchiveBuffers.back()->getMemBufferRef());
+ if (!LibOrErr)
+ return LibOrErr.takeError();
+
+ auto Archive = std::move(*LibOrErr);
+
+ Error Err = Error::success();
+ auto ChildEnd = Archive->child_end();
+ for (auto ChildIter = Archive->child_begin(Err); ChildIter != ChildEnd;
+ ++ChildIter) {
+ if (Err)
+ return Err;
+ auto ChildNameOrErr = (*ChildIter).getName();
+ if (!ChildNameOrErr)
+ return ChildNameOrErr.takeError();
+
+ StringRef ChildName = sys::path::filename(ChildNameOrErr.get());
+
+ auto ChildBufferRefOrErr = (*ChildIter).getMemoryBufferRef();
+ if (!ChildBufferRefOrErr)
+ return ChildBufferRefOrErr.takeError();
+
+ auto ChildBuffer =
+ MemoryBuffer::getMemBuffer(ChildBufferRefOrErr.get(), false);
+ auto ChildNameSplit = ChildName.split('.');
+
+ SmallString<16> Path;
+ int FileDesc;
+ if (std::error_code EC = sys::fs::createTemporaryFile(
+ (ChildNameSplit.first), (ChildNameSplit.second), FileDesc, Path))
+ return createFileError(ChildName, EC);
+
+ std::string TmpFileName(Path.str());
+ Args.push_back(TmpFileName);
+ TmpFiles.push_back(TmpFileName);
+ std::error_code EC;
+ raw_fd_ostream OS(Path.c_str(), EC, sys::fs::OF_None);
+ if (EC)
+ return createFileError(TmpFileName, errc::io_error);
+ OS << ChildBuffer->getBuffer();
+ OS.close();
+ }
+ return Err;
+}
+
+static Error cleanupTmpFiles(SmallVectorImpl<std::string> &TmpFiles) {
+ for (auto &TmpFile : TmpFiles) {
+ if (std::error_code EC = sys::fs::remove(TmpFile))
+ return createFileError(TmpFile, errc::no_such_file_or_directory);
+ }
+ return Error::success();
+}
+
+static void PrintVersion(raw_ostream &OS) {
+ OS << clang::getClangToolFullVersion("clang-nvlink-wrapper") << '\n';
+}
+
+int main(int argc, const char **argv) {
+ sys::PrintStackTraceOnErrorSignal(argv[0]);
+ cl::SetVersionPrinter(PrintVersion);
+ cl::HideUnrelatedOptions(ClangNvlinkWrapperCategory);
+ cl::ParseCommandLineOptions(
+ argc, argv,
+ "A wrapper tool over nvlink program. It transparently passes every \n"
+ "input option and objects to nvlink except archive files and path of \n"
+ "nvlink binary. It reads each input archive file to extract archived \n"
+ "cubin files as temporary files.\n");
+
+ if (Help) {
+ cl::PrintHelpMessage();
+ return 0;
+ }
+
+ auto reportError = [argv](Error E) {
+ logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
+ exit(1);
+ };
+
+ std::string NvlinkPath;
+ SmallVector<const char *, 0> Argv(argv, argv + argc);
+ SmallVector<std::string, 0> ArgvSubst;
+ SmallVector<std::string, 0> TmpFiles;
+ BumpPtrAllocator Alloc;
+ StringSaver Saver(Alloc);
+ cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
+
+ for (const std::string &Arg : NVArgs) {
+ if (sys::path::extension(Arg) == ".a") {
+ if (Error Err = extractArchiveFiles(Arg, ArgvSubst, TmpFiles))
+ reportError(std::move(Err));
+ } else {
+ ArgvSubst.push_back(Arg);
+ }
+ }
+
+ NvlinkPath = NvlinkUserPath;
+
+ // If user hasn't specified nvlink binary then search it in PATH
+ if (NvlinkPath.empty()) {
+ ErrorOr<std::string> NvlinkPathErr = sys::findProgramByName("nvlink");
+ if (!NvlinkPathErr) {
+ reportError(createStringError(NvlinkPathErr.getError(),
+ "unable to find 'nvlink' in path"));
+ }
+ NvlinkPath = NvlinkPathErr.get();
+ }
+
+ if (Error Err = runNVLink(NvlinkPath, ArgvSubst))
+ reportError(std::move(Err));
+ if (Error Err = cleanupTmpFiles(TmpFiles))
+ reportError(std::move(Err));
+
+ return 0;
+}
diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp
index ba6bb11abc86..4240b9d425df 100644
--- a/clang/tools/clang-repl/ClangRepl.cpp
+++ b/clang/tools/clang-repl/ClangRepl.cpp
@@ -32,7 +32,7 @@ static llvm::cl::list<std::string> OptInputs(llvm::cl::Positional,
llvm::cl::ZeroOrMore,
llvm::cl::desc("[code to run]"));
-static void LLVMErrorHandler(void *UserData, const std::string &Message,
+static void LLVMErrorHandler(void *UserData, const char *Message,
bool GenCrashDiag) {
auto &Diags = *static_cast<clang::DiagnosticsEngine *>(UserData);
@@ -80,6 +80,9 @@ int main(int argc, const char **argv) {
llvm::install_fatal_error_handler(LLVMErrorHandler,
static_cast<void *>(&CI->getDiagnostics()));
+ // Load any requested plugins.
+ CI->LoadRequestedPlugins();
+
auto Interp = ExitOnErr(clang::Interpreter::create(std::move(CI)));
for (const std::string &input : OptInputs) {
if (auto Err = Interp->ParseAndExecute(input))
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index 396d6ff529f3..fd3b25ccb3cb 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/LinkAllPasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
@@ -57,7 +57,7 @@ using namespace llvm::opt;
// Main driver
//===----------------------------------------------------------------------===//
-static void LLVMErrorHandler(void *UserData, const std::string &Message,
+static void LLVMErrorHandler(void *UserData, const char *Message,
bool GenCrashDiag) {
DiagnosticsEngine &Diags = *static_cast<DiagnosticsEngine*>(UserData);
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index 086ce0ea7787..db3288d75281 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -490,7 +490,7 @@ static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts,
T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI,
Opts.RelaxAll, Opts.IncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
- Str.get()->InitSections(Opts.NoExecStack);
+ Str.get()->initSections(Opts.NoExecStack, *STI);
}
// When -fembed-bitcode is passed to clang_as, a 1-byte marker
@@ -550,7 +550,7 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
return Failed;
}
-static void LLVMErrorHandler(void *UserData, const std::string &Message,
+static void LLVMErrorHandler(void *UserData, const char *Message,
bool GenCrashDiag) {
DiagnosticsEngine &Diags = *static_cast<DiagnosticsEngine*>(UserData);
diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp
index 5a453429e79b..c9129ee9e502 100644
--- a/clang/tools/driver/driver.cpp
+++ b/clang/tools/driver/driver.cpp
@@ -278,27 +278,6 @@ static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient,
DiagClient->setPrefix(std::string(ExeBasename));
}
-// This lets us create the DiagnosticsEngine with a properly-filled-out
-// DiagnosticOptions instance.
-static DiagnosticOptions *
-CreateAndPopulateDiagOpts(ArrayRef<const char *> argv, bool &UseNewCC1Process) {
- auto *DiagOpts = new DiagnosticOptions;
- unsigned MissingArgIndex, MissingArgCount;
- InputArgList Args = getDriverOptTable().ParseArgs(
- argv.slice(1), MissingArgIndex, MissingArgCount);
- // We ignore MissingArgCount and the return value of ParseDiagnosticArgs.
- // Any errors that would be diagnosed here will also be diagnosed later,
- // when the DiagnosticsEngine actually exists.
- (void)ParseDiagnosticArgs(*DiagOpts, Args);
-
- UseNewCC1Process =
- Args.hasFlag(clang::driver::options::OPT_fno_integrated_cc1,
- clang::driver::options::OPT_fintegrated_cc1,
- /*Default=*/CLANG_SPAWN_CC1);
-
- return DiagOpts;
-}
-
static void SetInstallDir(SmallVectorImpl<const char *> &argv,
Driver &TheDriver, bool CanonicalPrefixes) {
// Attempt to find the original path used to invoke the driver, to determine
@@ -398,8 +377,8 @@ int main(int Argc, const char **Argv) {
// Handle -cc1 integrated tools, even if -cc1 was expanded from a response
// file.
- auto FirstArg = std::find_if(Args.begin() + 1, Args.end(),
- [](const char *A) { return A != nullptr; });
+ auto FirstArg = llvm::find_if(llvm::drop_begin(Args),
+ [](const char *A) { return A != nullptr; });
if (FirstArg != Args.end() && StringRef(*FirstArg).startswith("-cc1")) {
// If -cc1 came from a response file, remove the EOL sentinels.
if (MarkEOLs) {
@@ -416,10 +395,10 @@ int main(int Argc, const char **Argv) {
// Skip end-of-line response file markers
if (Args[i] == nullptr)
continue;
- if (StringRef(Args[i]) == "-no-canonical-prefixes") {
+ if (StringRef(Args[i]) == "-canonical-prefixes")
+ CanonicalPrefixes = true;
+ else if (StringRef(Args[i]) == "-no-canonical-prefixes")
CanonicalPrefixes = false;
- break;
- }
}
// Handle CL and _CL_ which permits additional command line options to be
@@ -459,10 +438,15 @@ int main(int Argc, const char **Argv) {
// should spawn a new clang subprocess (old behavior).
// Not having an additional process saves some execution time of Windows,
// and makes debugging and profiling easier.
- bool UseNewCC1Process;
+ bool UseNewCC1Process = CLANG_SPAWN_CC1;
+ for (const char *Arg : Args)
+ UseNewCC1Process = llvm::StringSwitch<bool>(Arg)
+ .Case("-fno-integrated-cc1", true)
+ .Case("-fintegrated-cc1", false)
+ .Default(UseNewCC1Process);
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts =
- CreateAndPopulateDiagOpts(Args, UseNewCC1Process);
+ CreateAndPopulateDiagOpts(Args);
TextDiagnosticPrinter *DiagClient
= new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts);
diff --git a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
index caced02e1e11..c3735f70136d 100644
--- a/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
+++ b/clang/utils/TableGen/ClangASTPropertiesEmitter.cpp
@@ -455,7 +455,7 @@ void ASTPropsEmitter::emitPropertiedReaderWriterBody(HasProperties node,
// Emit code to read all the properties.
visitAllProperties(node, nodeInfo, [&](Property prop) {
// Verify that the creation code refers to this property.
- if (info.IsReader && creationCode.find(prop.getName()) == StringRef::npos)
+ if (info.IsReader && !creationCode.contains(prop.getName()))
PrintFatalError(nodeInfo.Creator.getLoc(),
"creation code for " + node.getName()
+ " doesn't refer to property \""
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index d679d58aaef1..fe05a3466af1 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -2043,7 +2044,7 @@ void PragmaClangAttributeSupport::generateParsingHelpers(raw_ostream &OS) {
OS << " return None;\n";
OS << "}\n\n";
- std::map<const Record *, std::vector<AttributeSubjectMatchRule>>
+ llvm::MapVector<const Record *, std::vector<AttributeSubjectMatchRule>>
SubMatchRules;
for (const auto &Rule : Rules) {
if (!Rule.isSubRule())
@@ -2676,9 +2677,9 @@ static void emitAttrList(raw_ostream &OS, StringRef Class,
// Determines if an attribute has a Pragma spelling.
static bool AttrHasPragmaSpelling(const Record *R) {
std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*R);
- return llvm::find_if(Spellings, [](const FlattenedSpelling &S) {
- return S.variety() == "Pragma";
- }) != Spellings.end();
+ return llvm::any_of(Spellings, [](const FlattenedSpelling &S) {
+ return S.variety() == "Pragma";
+ });
}
namespace {
@@ -3803,14 +3804,8 @@ static void GenerateLangOptRequirements(const Record &R,
if (LangOpts.empty())
return;
- OS << "bool diagLangOpts(Sema &S, const ParsedAttr &Attr) ";
- OS << "const override {\n";
- OS << " auto &LangOpts = S.LangOpts;\n";
- OS << " if (" << GenerateTestExpression(LangOpts) << ")\n";
- OS << " return true;\n\n";
- OS << " S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) ";
- OS << "<< Attr;\n";
- OS << " return false;\n";
+ OS << "bool acceptsLangOpts(const LangOptions &LangOpts) const override {\n";
+ OS << " return " << GenerateTestExpression(LangOpts) << ";\n";
OS << "}\n\n";
}
@@ -3965,6 +3960,27 @@ void EmitClangAttrParsedAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
}
OS << "};\n";
}
+
+ std::vector<std::string> ArgNames;
+ for (const auto &Arg : Attr.getValueAsListOfDefs("Args")) {
+ bool UnusedUnset;
+ if (Arg->getValueAsBitOrUnset("Fake", UnusedUnset))
+ continue;
+ ArgNames.push_back(Arg->getValueAsString("Name").str());
+ for (const auto &Class : Arg->getSuperClasses()) {
+ if (Class.first->getName().startswith("Variadic")) {
+ ArgNames.back().append("...");
+ break;
+ }
+ }
+ }
+ if (!ArgNames.empty()) {
+ OS << "static constexpr const char *" << I->first << "ArgNames[] = {\n";
+ for (const auto &N : ArgNames)
+ OS << '"' << N << "\",";
+ OS << "};\n";
+ }
+
OS << "struct ParsedAttrInfo" << I->first
<< " final : public ParsedAttrInfo {\n";
OS << " ParsedAttrInfo" << I->first << "() {\n";
@@ -3986,6 +4002,8 @@ void EmitClangAttrParsedAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
OS << PragmaAttributeSupport.isAttributedSupported(*I->second) << ";\n";
if (!Spellings.empty())
OS << " Spellings = " << I->first << "Spellings;\n";
+ if (!ArgNames.empty())
+ OS << " ArgNames = " << I->first << "ArgNames;\n";
OS << " }\n";
GenerateAppertainsTo(Attr, OS);
GenerateMutualExclusionsChecks(Attr, Records, OS, MergeDeclOS, MergeStmtOS);
@@ -4210,6 +4228,24 @@ void EmitClangAttrSubjectMatchRulesParserStringSwitches(RecordKeeper &Records,
getPragmaAttributeSupport(Records).generateParsingHelpers(OS);
}
+void EmitClangAttrDocTable(RecordKeeper &Records, raw_ostream &OS) {
+ emitSourceFileHeader("Clang attribute documentation", OS);
+
+ std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
+ for (const auto *A : Attrs) {
+ if (!A->getValueAsBit("ASTNode"))
+ continue;
+ std::vector<Record *> Docs = A->getValueAsListOfDefs("Documentation");
+ assert(!Docs.empty());
+ // Only look at the first documentation if there are several.
+ // (Currently there's only one such attr, revisit if this becomes common).
+ StringRef Text =
+ Docs.front()->getValueAsOptionalString("Content").getValueOr("");
+ OS << "\nstatic const char AttrDoc_" << A->getName() << "[] = "
+ << "R\"reST(" << Text.trim() << ")reST\";\n";
+ }
+}
+
enum class SpellingKind {
GNU,
CXX11,
@@ -4397,7 +4433,13 @@ void EmitClangAttrDocs(RecordKeeper &Records, raw_ostream &OS) {
// Gather the Documentation lists from each of the attributes, based on the
// category provided.
std::vector<Record *> Attrs = Records.getAllDerivedDefinitions("Attr");
- std::map<const Record *, std::vector<DocumentationData>> SplitDocs;
+ struct CategoryLess {
+ bool operator()(const Record *L, const Record *R) const {
+ return L->getValueAsString("Name") < R->getValueAsString("Name");
+ }
+ };
+ std::map<const Record *, std::vector<DocumentationData>, CategoryLess>
+ SplitDocs;
for (const auto *A : Attrs) {
const Record &Attr = *A;
std::vector<Record *> Docs = Attr.getValueAsListOfDefs("Documentation");
diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
index eb2f23191c55..a988a5631aca 100644
--- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
+++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
@@ -83,6 +83,12 @@ static std::string MangleName(StringRef Str) {
default:
Mangled += Str[i];
break;
+ case '(':
+ Mangled += "lparen";
+ break;
+ case ')':
+ Mangled += "rparen";
+ break;
case '[':
Mangled += "lsquare";
break;
diff --git a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
index 014c1adcd809..547ec2c82cb3 100644
--- a/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -129,6 +129,7 @@ namespace {
};
struct GroupInfo {
+ llvm::StringRef GroupName;
std::vector<const Record*> DiagsInGroup;
std::vector<std::string> SubGroups;
unsigned IDNo;
@@ -174,6 +175,7 @@ static void groupDiagnostics(const std::vector<Record*> &Diags,
Record *Group = DiagGroups[i];
GroupInfo &GI =
DiagsInGroup[std::string(Group->getValueAsString("GroupName"))];
+ GI.GroupName = Group->getName();
GI.Defs.push_back(Group);
std::vector<Record*> SubGroups = Group->getValueAsListOfDefs("SubGroups");
@@ -614,7 +616,7 @@ struct DiagnosticTextBuilder {
return It->second.Root;
}
- LLVM_ATTRIBUTE_NORETURN void PrintFatalError(llvm::Twine const &Msg) const {
+ [[noreturn]] void PrintFatalError(llvm::Twine const &Msg) const {
assert(EvaluatingRecord && "not evaluating a record?");
llvm::PrintFatalError(EvaluatingRecord->getLoc(), Msg);
}
@@ -1279,8 +1281,8 @@ void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS,
OS << ", \"";
OS.write_escaped(DiagTextBuilder.buildForDefinition(&R)) << '"';
- // Warning associated with the diagnostic. This is stored as an index into
- // the alphabetically sorted warning table.
+ // Warning group associated with the diagnostic. This is stored as an index
+ // into the alphabetically sorted warning group table.
if (DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Group"))) {
std::map<std::string, GroupInfo>::iterator I = DiagsInGroup.find(
std::string(DI->getDef()->getValueAsString("GroupName")));
@@ -1487,18 +1489,20 @@ static void emitDiagTable(std::map<std::string, GroupInfo> &DiagsInGroup,
for (auto const &I: DiagsInGroup)
MaxLen = std::max(MaxLen, (unsigned)I.first.size());
- OS << "\n#ifdef GET_DIAG_TABLE\n";
+ OS << "\n#ifdef DIAG_ENTRY\n";
unsigned SubGroupIndex = 1, DiagArrayIndex = 1;
for (auto const &I: DiagsInGroup) {
// Group option string.
- OS << " { /* ";
+ OS << "DIAG_ENTRY(";
+ OS << I.second.GroupName << " /* ";
+
if (I.first.find_first_not_of("abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789!@#$%^*-+=:?") !=
std::string::npos)
PrintFatalError("Invalid character in diagnostic group '" + I.first +
"'");
- OS << I.first << " */ " << std::string(MaxLen - I.first.size(), ' ');
+ OS << I.first << " */, ";
// Store a pascal-style length byte at the beginning of the string.
std::string Name = char(I.first.size()) + I.first;
OS << GroupNames.GetOrAddStringOffset(Name, false) << ", ";
@@ -1517,7 +1521,7 @@ static void emitDiagTable(std::map<std::string, GroupInfo> &DiagsInGroup,
DiagArrayIndex += DiagsInPedantic.size();
DiagArrayIndex += V.size() + 1;
} else {
- OS << "/* Empty */ 0, ";
+ OS << "0, ";
}
// Subgroups.
@@ -1530,12 +1534,12 @@ static void emitDiagTable(std::map<std::string, GroupInfo> &DiagsInGroup,
SubGroupIndex += GroupsInPedantic.size();
SubGroupIndex += SubGroups.size() + 1;
} else {
- OS << "/* Empty */ 0";
+ OS << "0";
}
- OS << " },\n";
+ OS << ")\n";
}
- OS << "#endif // GET_DIAG_TABLE\n\n";
+ OS << "#endif // DIAG_ENTRY\n\n";
}
/// Emit the table of diagnostic categories.
diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
index ffeedcdf0ee2..8081096633d9 100644
--- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -124,7 +124,7 @@ void ClangOpcodesEmitter::EmitInterp(raw_ostream &OS, StringRef N, Record *R) {
for (size_t I = 0, N = Args.size(); I < N; ++I) {
OS << " auto V" << I;
OS << " = ";
- OS << "PC.read<" << Args[I]->getValueAsString("Name") << ">();\n";
+ OS << "ReadArg<" << Args[I]->getValueAsString("Name") << ">(S, PC);\n";
}
// Emit a call to the template method and pass arguments.
@@ -161,8 +161,10 @@ void ClangOpcodesEmitter::EmitDisasm(raw_ostream &OS, StringRef N, Record *R) {
OS << " PrintName(\"" << ID << "\");\n";
OS << " OS << \"\\t\"";
- for (auto *Arg : R->getValueAsListOfDefs("Args"))
- OS << " << PC.read<" << Arg->getValueAsString("Name") << ">() << \" \"";
+ for (auto *Arg : R->getValueAsListOfDefs("Args")) {
+ OS << " << ReadArg<" << Arg->getValueAsString("Name") << ">(P, PC)";
+ OS << " << \" \"";
+ }
OS << " << \"\\n\";\n";
OS << " continue;\n";
diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
index a4cb5b7cacd9..4795b008dda3 100644
--- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
@@ -233,19 +233,18 @@ private:
MapVector<BuiltinIndexListTy *, BuiltinTableEntries> SignatureListMap;
};
-// OpenCL builtin test generator. This class processes the same TableGen input
-// as BuiltinNameEmitter, but generates a .cl file that contains a call to each
-// builtin function described in the .td input.
-class OpenCLBuiltinTestEmitter {
+/// Base class for emitting a file (e.g. header or test) from OpenCLBuiltins.td
+class OpenCLBuiltinFileEmitterBase {
public:
- OpenCLBuiltinTestEmitter(RecordKeeper &Records, raw_ostream &OS)
+ OpenCLBuiltinFileEmitterBase(RecordKeeper &Records, raw_ostream &OS)
: Records(Records), OS(OS) {}
+ virtual ~OpenCLBuiltinFileEmitterBase() = default;
// Entrypoint to generate the functions for testing all OpenCL builtin
// functions.
- void emit();
+ virtual void emit() = 0;
-private:
+protected:
struct TypeFlags {
TypeFlags() : IsConst(false), IsVolatile(false), IsPointer(false) {}
bool IsConst : 1;
@@ -282,6 +281,18 @@ private:
expandTypesInSignature(const std::vector<Record *> &Signature,
SmallVectorImpl<SmallVector<std::string, 2>> &Types);
+ // Emit extension enabling pragmas.
+ void emitExtensionSetup();
+
+ // Emit an #if guard for a Builtin's extension. Return the corresponding
+ // closing #endif, or an empty string if no extension #if guard was emitted.
+ std::string emitExtensionGuard(const Record *Builtin);
+
+ // Emit an #if guard for a Builtin's language version. Return the
+ // corresponding closing #endif, or an empty string if no version #if guard
+ // was emitted.
+ std::string emitVersionGuard(const Record *Builtin);
+
// Contains OpenCL builtin functions and related information, stored as
// Record instances. They are coming from the associated TableGen file.
RecordKeeper &Records;
@@ -290,6 +301,19 @@ private:
raw_ostream &OS;
};
+// OpenCL builtin test generator. This class processes the same TableGen input
+// as BuiltinNameEmitter, but generates a .cl file that contains a call to each
+// builtin function described in the .td input.
+class OpenCLBuiltinTestEmitter : public OpenCLBuiltinFileEmitterBase {
+public:
+ OpenCLBuiltinTestEmitter(RecordKeeper &Records, raw_ostream &OS)
+ : OpenCLBuiltinFileEmitterBase(Records, OS) {}
+
+ // Entrypoint to generate the functions for testing all OpenCL builtin
+ // functions.
+ void emit() override;
+};
+
} // namespace
void BuiltinNameEmitter::Emit() {
@@ -472,10 +496,10 @@ void BuiltinNameEmitter::GetOverloads() {
auto Signature = B->getValueAsListOfDefs("Signature");
// Reuse signatures to avoid unnecessary duplicates.
auto it =
- std::find_if(SignaturesList.begin(), SignaturesList.end(),
- [&](const std::pair<std::vector<Record *>, unsigned> &a) {
- return a.first == Signature;
- });
+ llvm::find_if(SignaturesList,
+ [&](const std::pair<std::vector<Record *>, unsigned> &a) {
+ return a.first == Signature;
+ });
unsigned SignIndex;
if (it == SignaturesList.end()) {
VerifySignature(Signature, B);
@@ -923,9 +947,9 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty,
OS << "\n} // OCL2Qual\n";
}
-std::string OpenCLBuiltinTestEmitter::getTypeString(const Record *Type,
- TypeFlags Flags,
- int VectorSize) const {
+std::string OpenCLBuiltinFileEmitterBase::getTypeString(const Record *Type,
+ TypeFlags Flags,
+ int VectorSize) const {
std::string S;
if (Type->getValueAsBit("IsConst") || Flags.IsConst) {
S += "const ";
@@ -970,7 +994,7 @@ std::string OpenCLBuiltinTestEmitter::getTypeString(const Record *Type,
return S;
}
-void OpenCLBuiltinTestEmitter::getTypeLists(
+void OpenCLBuiltinFileEmitterBase::getTypeLists(
Record *Type, TypeFlags &Flags, std::vector<Record *> &TypeList,
std::vector<int64_t> &VectorList) const {
bool isGenType = Type->isSubClassOf("GenericType");
@@ -1003,7 +1027,7 @@ void OpenCLBuiltinTestEmitter::getTypeLists(
VectorList.push_back(Type->getValueAsInt("VecWidth"));
}
-void OpenCLBuiltinTestEmitter::expandTypesInSignature(
+void OpenCLBuiltinFileEmitterBase::expandTypesInSignature(
const std::vector<Record *> &Signature,
SmallVectorImpl<SmallVector<std::string, 2>> &Types) {
// Find out if there are any GenTypes in this signature, and if so, calculate
@@ -1044,10 +1068,7 @@ void OpenCLBuiltinTestEmitter::expandTypesInSignature(
}
}
-void OpenCLBuiltinTestEmitter::emit() {
- emitSourceFileHeader("OpenCL Builtin exhaustive testing", OS);
-
- // Enable some extensions for testing.
+void OpenCLBuiltinFileEmitterBase::emitExtensionSetup() {
OS << R"(
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -1058,6 +1079,60 @@ void OpenCLBuiltinTestEmitter::emit() {
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
)";
+}
+
+std::string
+OpenCLBuiltinFileEmitterBase::emitExtensionGuard(const Record *Builtin) {
+ StringRef Extensions =
+ Builtin->getValueAsDef("Extension")->getValueAsString("ExtName");
+ if (Extensions.empty())
+ return "";
+
+ OS << "#if";
+
+ SmallVector<StringRef, 2> ExtVec;
+ Extensions.split(ExtVec, " ");
+ bool isFirst = true;
+ for (StringRef Ext : ExtVec) {
+ if (!isFirst) {
+ OS << " &&";
+ }
+ OS << " defined(" << Ext << ")";
+ isFirst = false;
+ }
+ OS << "\n";
+
+ return "#endif // Extension\n";
+}
+
+std::string
+OpenCLBuiltinFileEmitterBase::emitVersionGuard(const Record *Builtin) {
+ std::string OptionalEndif;
+ auto PrintOpenCLVersion = [this](int Version) {
+ OS << "CL_VERSION_" << (Version / 100) << "_" << ((Version % 100) / 10);
+ };
+ int MinVersion = Builtin->getValueAsDef("MinVersion")->getValueAsInt("ID");
+ if (MinVersion != 100) {
+ // OpenCL 1.0 is the default minimum version.
+ OS << "#if __OPENCL_C_VERSION__ >= ";
+ PrintOpenCLVersion(MinVersion);
+ OS << "\n";
+ OptionalEndif = "#endif // MinVersion\n" + OptionalEndif;
+ }
+ int MaxVersion = Builtin->getValueAsDef("MaxVersion")->getValueAsInt("ID");
+ if (MaxVersion) {
+ OS << "#if __OPENCL_C_VERSION__ < ";
+ PrintOpenCLVersion(MaxVersion);
+ OS << "\n";
+ OptionalEndif = "#endif // MaxVersion\n" + OptionalEndif;
+ }
+ return OptionalEndif;
+}
+
+void OpenCLBuiltinTestEmitter::emit() {
+ emitSourceFileHeader("OpenCL Builtin exhaustive testing", OS);
+
+ emitExtensionSetup();
// Ensure each test has a unique name by numbering them.
unsigned TestID = 0;
@@ -1071,43 +1146,10 @@ void OpenCLBuiltinTestEmitter::emit() {
expandTypesInSignature(B->getValueAsListOfDefs("Signature"), FTypes);
OS << "// Test " << Name << "\n";
- std::string OptionalEndif;
- StringRef Extensions =
- B->getValueAsDef("Extension")->getValueAsString("ExtName");
- if (!Extensions.empty()) {
- OS << "#if";
- OptionalEndif = "#endif // Extension\n";
-
- SmallVector<StringRef, 2> ExtVec;
- Extensions.split(ExtVec, " ");
- bool isFirst = true;
- for (StringRef Ext : ExtVec) {
- if (!isFirst) {
- OS << " &&";
- }
- OS << " defined(" << Ext << ")";
- isFirst = false;
- }
- OS << "\n";
- }
- auto PrintOpenCLVersion = [this](int Version) {
- OS << "CL_VERSION_" << (Version / 100) << "_" << ((Version % 100) / 10);
- };
- int MinVersion = B->getValueAsDef("MinVersion")->getValueAsInt("ID");
- if (MinVersion != 100) {
- // OpenCL 1.0 is the default minimum version.
- OS << "#if __OPENCL_C_VERSION__ >= ";
- PrintOpenCLVersion(MinVersion);
- OS << "\n";
- OptionalEndif = "#endif // MinVersion\n" + OptionalEndif;
- }
- int MaxVersion = B->getValueAsDef("MaxVersion")->getValueAsInt("ID");
- if (MaxVersion) {
- OS << "#if __OPENCL_C_VERSION__ < ";
- PrintOpenCLVersion(MaxVersion);
- OS << "\n";
- OptionalEndif = "#endif // MaxVersion\n" + OptionalEndif;
- }
+
+ std::string OptionalExtensionEndif = emitExtensionGuard(B);
+ std::string OptionalVersionEndif = emitVersionGuard(B);
+
for (const auto &Signature : FTypes) {
// Emit function declaration.
OS << Signature[0] << " test" << TestID++ << "_" << Name << "(";
@@ -1136,7 +1178,9 @@ void OpenCLBuiltinTestEmitter::emit() {
// End of function body.
OS << "}\n";
}
- OS << OptionalEndif << "\n";
+
+ OS << OptionalVersionEndif;
+ OS << OptionalExtensionEndif;
}
}
diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index 091af2dc52a1..f5b6f4f01688 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -1941,8 +1941,8 @@ void MveEmitter::EmitHeader(raw_ostream &OS) {
void MveEmitter::EmitBuiltinDef(raw_ostream &OS) {
for (const auto &kv : ACLEIntrinsics) {
const ACLEIntrinsic &Int = *kv.second;
- OS << "TARGET_HEADER_BUILTIN(__builtin_arm_mve_" << Int.fullName()
- << ", \"\", \"n\", \"arm_mve.h\", ALL_LANGUAGES, \"\")\n";
+ OS << "BUILTIN(__builtin_arm_mve_" << Int.fullName()
+ << ", \"\", \"n\")\n";
}
std::set<std::string> ShortNamesSeen;
@@ -2151,8 +2151,8 @@ void CdeEmitter::EmitBuiltinDef(raw_ostream &OS) {
if (kv.second->headerOnly())
continue;
const ACLEIntrinsic &Int = *kv.second;
- OS << "TARGET_HEADER_BUILTIN(__builtin_arm_cde_" << Int.fullName()
- << ", \"\", \"ncU\", \"arm_cde.h\", ALL_LANGUAGES, \"\")\n";
+ OS << "BUILTIN(__builtin_arm_cde_" << Int.fullName()
+ << ", \"\", \"ncU\")\n";
}
}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index f0da1a7d2f4e..ff552b66c0e2 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -382,7 +382,7 @@ public:
StringRef Mods = getNextModifiers(Proto, Pos);
while (!Mods.empty()) {
Types.emplace_back(InTS, Mods);
- if (Mods.find('!') != StringRef::npos)
+ if (Mods.contains('!'))
PolymorphicKeyType = Types.size() - 1;
Mods = getNextModifiers(Proto, Pos);
@@ -417,8 +417,7 @@ public:
/// Return true if the intrinsic takes an immediate operand.
bool hasImmediate() const {
- return std::any_of(Types.begin(), Types.end(),
- [](const Type &T) { return T.isImmediate(); });
+ return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
}
/// Return the parameter index of the immediate operand.
@@ -1271,9 +1270,8 @@ void Intrinsic::emitShadowedArgs() {
}
bool Intrinsic::protoHasScalar() const {
- return std::any_of(Types.begin(), Types.end(), [](const Type &T) {
- return T.isScalar() && !T.isImmediate();
- });
+ return llvm::any_of(
+ Types, [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
}
void Intrinsic::emitBodyAsBuiltinCall() {
@@ -1916,10 +1914,9 @@ Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
continue;
unsigned ArgNum = 0;
- bool MatchingArgumentTypes =
- std::all_of(Types.begin(), Types.end(), [&](const auto &Type) {
- return Type == I.getParamType(ArgNum++);
- });
+ bool MatchingArgumentTypes = llvm::all_of(Types, [&](const auto &Type) {
+ return Type == I.getParamType(ArgNum++);
+ });
if (MatchingArgumentTypes)
GoodVec.push_back(&I);
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 24f2250c9ae0..62eef830318f 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -80,7 +80,7 @@ public:
// passing to the BUILTIN() macro in Builtins.def.
const std::string &getBuiltinStr() const { return BuiltinStr; }
- // Return the clang buitlin type for RVV vector type which are used in the
+ // Return the clang builtin type for RVV vector type which are used in the
// riscv_vector.h header file.
const std::string &getClangBuiltinStr() const { return ClangBuiltinStr; }
@@ -140,8 +140,7 @@ enum RISCVExtension : uint8_t {
F = 1 << 1,
D = 1 << 2,
Zfh = 1 << 3,
- Zvamo = 1 << 4,
- Zvlsseg = 1 << 5,
+ Zvlsseg = 1 << 4,
};
// TODO refactor RVVIntrinsic class design after support all intrinsic
@@ -150,13 +149,13 @@ enum RISCVExtension : uint8_t {
class RVVIntrinsic {
private:
- std::string Name; // Builtin name
+ std::string BuiltinName; // Builtin name
+ std::string Name; // C intrinsic name.
std::string MangledName;
std::string IRName;
- bool HasSideEffects;
bool IsMask;
- bool HasMaskedOffOperand;
bool HasVL;
+ bool HasPolicy;
bool HasNoMaskedOverloaded;
bool HasAutoDef; // There is automiatic definition in header
std::string ManualCodegen;
@@ -170,19 +169,19 @@ private:
public:
RVVIntrinsic(StringRef Name, StringRef Suffix, StringRef MangledName,
- StringRef MangledSuffix, StringRef IRName, bool HasSideEffects,
- bool IsMask, bool HasMaskedOffOperand, bool HasVL,
+ StringRef MangledSuffix, StringRef IRName, bool IsMask,
+ bool HasMaskedOffOperand, bool HasVL, bool HasPolicy,
bool HasNoMaskedOverloaded, bool HasAutoDef,
StringRef ManualCodegen, const RVVTypes &Types,
const std::vector<int64_t> &IntrinsicTypes,
StringRef RequiredExtension, unsigned NF);
~RVVIntrinsic() = default;
+ StringRef getBuiltinName() const { return BuiltinName; }
StringRef getName() const { return Name; }
StringRef getMangledName() const { return MangledName; }
- bool hasSideEffects() const { return HasSideEffects; }
- bool hasMaskedOffOperand() const { return HasMaskedOffOperand; }
bool hasVL() const { return HasVL; }
+ bool hasPolicy() const { return HasPolicy; }
bool hasNoMaskedOverloaded() const { return HasNoMaskedOverloaded; }
bool hasManualCodegen() const { return !ManualCodegen.empty(); }
bool hasAutoDef() const { return HasAutoDef; }
@@ -191,6 +190,9 @@ public:
StringRef getManualCodegen() const { return ManualCodegen; }
uint8_t getRISCVExtensions() const { return RISCVExtensions; }
unsigned getNF() const { return NF; }
+ const std::vector<int64_t> &getIntrinsicTypes() const {
+ return IntrinsicTypes;
+ }
// Return the type string for a BUILTIN() macro in Builtins.def.
std::string getBuiltinTypeStr() const;
@@ -200,7 +202,7 @@ public:
void emitCodeGenSwitchBody(raw_ostream &o) const;
// Emit the macros for mapping C/C++ intrinsic function to builtin functions.
- void emitIntrinsicMacro(raw_ostream &o) const;
+ void emitIntrinsicFuncDef(raw_ostream &o) const;
// Emit the mangled function definition.
void emitMangledFuncDef(raw_ostream &o) const;
@@ -231,6 +233,8 @@ public:
private:
/// Create all intrinsics and add them to \p Out
void createRVVIntrinsics(std::vector<std::unique_ptr<RVVIntrinsic>> &Out);
+ /// Create Headers and add them to \p Out
+ void createRVVHeaders(raw_ostream &OS);
/// Compute output and input types by applying different config (basic type
/// and LMUL with type transformers). It also record result of type in legal
/// or illegal set to avoid compute the same config again. The result maybe
@@ -652,7 +656,7 @@ void RVVType::applyModifier(StringRef Transformer) {
assert(Idx != StringRef::npos);
StringRef ComplexType = Transformer.slice(1, Idx);
Transformer = Transformer.drop_front(Idx + 1);
- assert(Transformer.find('(') == StringRef::npos &&
+ assert(!Transformer.contains('(') &&
"Only allow one complex type transformer");
auto UpdateAndCheckComplexProto = [&]() {
@@ -755,19 +759,19 @@ void RVVType::applyModifier(StringRef Transformer) {
//===----------------------------------------------------------------------===//
RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
StringRef NewMangledName, StringRef MangledSuffix,
- StringRef IRName, bool HasSideEffects, bool IsMask,
- bool HasMaskedOffOperand, bool HasVL,
+ StringRef IRName, bool IsMask,
+ bool HasMaskedOffOperand, bool HasVL, bool HasPolicy,
bool HasNoMaskedOverloaded, bool HasAutoDef,
StringRef ManualCodegen, const RVVTypes &OutInTypes,
const std::vector<int64_t> &NewIntrinsicTypes,
StringRef RequiredExtension, unsigned NF)
- : IRName(IRName), HasSideEffects(HasSideEffects), IsMask(IsMask),
- HasMaskedOffOperand(HasMaskedOffOperand), HasVL(HasVL),
+ : IRName(IRName), IsMask(IsMask), HasVL(HasVL), HasPolicy(HasPolicy),
HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef),
ManualCodegen(ManualCodegen.str()), NF(NF) {
- // Init Name and MangledName
- Name = NewName.str();
+ // Init BuiltinName, Name and MangledName
+ BuiltinName = NewName.str();
+ Name = BuiltinName;
if (NewMangledName.empty())
MangledName = NewName.split("_").first.str();
else
@@ -777,8 +781,10 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
if (!MangledSuffix.empty())
MangledName += "_" + MangledSuffix.str();
if (IsMask) {
+ BuiltinName += "_m";
Name += "_m";
}
+
// Init RISC-V extensions
for (const auto &T : OutInTypes) {
if (T->isFloatVector(16) || T->isFloat(16))
@@ -788,8 +794,6 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
else if (T->isFloatVector(64) || T->isFloat(64))
RISCVExtensions |= RISCVExtension::D;
}
- if (RequiredExtension == "Zvamo")
- RISCVExtensions |= RISCVExtension::Zvamo;
if (RequiredExtension == "Zvlsseg")
RISCVExtensions |= RISCVExtension::Zvlsseg;
@@ -831,6 +835,9 @@ void RVVIntrinsic::emitCodeGenSwitchBody(raw_ostream &OS) const {
if (isMask()) {
if (hasVL()) {
OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1);\n";
+ if (hasPolicy())
+ OS << " Ops.push_back(ConstantInt::get(Ops.back()->getType(),"
+ " TAIL_UNDISTURBED));\n";
} else {
OS << " std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());\n";
}
@@ -853,34 +860,30 @@ void RVVIntrinsic::emitCodeGenSwitchBody(raw_ostream &OS) const {
OS << " break;\n";
}
-void RVVIntrinsic::emitIntrinsicMacro(raw_ostream &OS) const {
- OS << "#define " << getName() << "(";
- if (!InputTypes.empty()) {
- ListSeparator LS;
- for (unsigned i = 0, e = InputTypes.size(); i != e; ++i)
- OS << LS << "op" << i;
- }
- OS << ") \\\n";
- OS << "__builtin_rvv_" << getName() << "(";
+void RVVIntrinsic::emitIntrinsicFuncDef(raw_ostream &OS) const {
+ OS << "__attribute__((__clang_builtin_alias__(";
+ OS << "__builtin_rvv_" << getBuiltinName() << ")))\n";
+ OS << OutputType->getTypeStr() << " " << getName() << "(";
+ // Emit function arguments
if (!InputTypes.empty()) {
ListSeparator LS;
- for (unsigned i = 0, e = InputTypes.size(); i != e; ++i)
- OS << LS << "(" << InputTypes[i]->getTypeStr() << ")(op" << i << ")";
+ for (unsigned i = 0; i < InputTypes.size(); ++i)
+ OS << LS << InputTypes[i]->getTypeStr();
}
- OS << ")\n";
+ OS << ");\n";
}
void RVVIntrinsic::emitMangledFuncDef(raw_ostream &OS) const {
- OS << "__attribute__((clang_builtin_alias(";
- OS << "__builtin_rvv_" << getName() << ")))\n";
+ OS << "__attribute__((__clang_builtin_alias__(";
+ OS << "__builtin_rvv_" << getBuiltinName() << ")))\n";
OS << OutputType->getTypeStr() << " " << getMangledName() << "(";
// Emit function arguments
if (!InputTypes.empty()) {
ListSeparator LS;
for (unsigned i = 0; i < InputTypes.size(); ++i)
- OS << LS << InputTypes[i]->getTypeStr() << " op" << i;
+ OS << LS << InputTypes[i]->getTypeStr();
}
- OS << ");\n\n";
+ OS << ");\n";
}
//===----------------------------------------------------------------------===//
@@ -915,6 +918,8 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
OS << "extern \"C\" {\n";
OS << "#endif\n\n";
+ createRVVHeaders(OS);
+
std::vector<std::unique_ptr<RVVIntrinsic>> Defs;
createRVVIntrinsics(Defs);
@@ -971,33 +976,39 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
OS << "#endif\n\n";
// The same extension include in the same arch guard marco.
- std::stable_sort(Defs.begin(), Defs.end(),
- [](const std::unique_ptr<RVVIntrinsic> &A,
- const std::unique_ptr<RVVIntrinsic> &B) {
- return A->getRISCVExtensions() < B->getRISCVExtensions();
- });
+ llvm::stable_sort(Defs, [](const std::unique_ptr<RVVIntrinsic> &A,
+ const std::unique_ptr<RVVIntrinsic> &B) {
+ return A->getRISCVExtensions() < B->getRISCVExtensions();
+ });
+
+ OS << "#define __rvv_ai static __inline__\n";
// Print intrinsic functions with macro
emitArchMacroAndBody(Defs, OS, [](raw_ostream &OS, const RVVIntrinsic &Inst) {
- Inst.emitIntrinsicMacro(OS);
+ OS << "__rvv_ai ";
+ Inst.emitIntrinsicFuncDef(OS);
});
+ OS << "#undef __rvv_ai\n\n";
+
OS << "#define __riscv_v_intrinsic_overloading 1\n";
// Print Overloaded APIs
- OS << "#define __rvv_overloaded static inline "
- "__attribute__((__always_inline__, __nodebug__, __overloadable__))\n";
+ OS << "#define __rvv_aio static __inline__ "
+ "__attribute__((__overloadable__))\n";
emitArchMacroAndBody(Defs, OS, [](raw_ostream &OS, const RVVIntrinsic &Inst) {
if (!Inst.isMask() && !Inst.hasNoMaskedOverloaded())
return;
- OS << "__rvv_overloaded ";
+ OS << "__rvv_aio ";
Inst.emitMangledFuncDef(OS);
});
+ OS << "#undef __rvv_aio\n";
+
OS << "\n#ifdef __cplusplus\n";
OS << "}\n";
- OS << "#endif // __riscv_vector\n";
+ OS << "#endif // __cplusplus\n";
OS << "#endif // __RISCV_VECTOR_H\n";
}
@@ -1005,17 +1016,31 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
std::vector<std::unique_ptr<RVVIntrinsic>> Defs;
createRVVIntrinsics(Defs);
+ // Map to keep track of which builtin names have already been emitted.
+ StringMap<RVVIntrinsic *> BuiltinMap;
+
OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
"ATTRS, \"experimental-v\")\n";
OS << "#endif\n";
for (auto &Def : Defs) {
- OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getName() << ",\""
- << Def->getBuiltinTypeStr() << "\", ";
- if (!Def->hasSideEffects())
- OS << "\"n\")\n";
- else
- OS << "\"\")\n";
+ auto P =
+ BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
+ if (!P.second) {
+ // Verify that this would have produced the same builtin definition.
+ if (P.first->second->hasAutoDef() != Def->hasAutoDef()) {
+ PrintFatalError("Builtin with same name has different hasAutoDef");
+ } else if (!Def->hasAutoDef() && P.first->second->getBuiltinTypeStr() !=
+ Def->getBuiltinTypeStr()) {
+ PrintFatalError("Builtin with same name has different type string");
+ }
+ continue;
+ }
+
+ OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getBuiltinName() << ",\"";
+ if (!Def->hasAutoDef())
+ OS << Def->getBuiltinTypeStr();
+ OS << "\", \"n\")\n";
}
OS << "#undef RISCVV_BUILTIN\n";
}
@@ -1024,11 +1049,14 @@ void RVVEmitter::createCodeGen(raw_ostream &OS) {
std::vector<std::unique_ptr<RVVIntrinsic>> Defs;
createRVVIntrinsics(Defs);
// IR name could be empty, use the stable sort preserves the relative order.
- std::stable_sort(Defs.begin(), Defs.end(),
- [](const std::unique_ptr<RVVIntrinsic> &A,
- const std::unique_ptr<RVVIntrinsic> &B) {
- return A->getIRName() < B->getIRName();
- });
+ llvm::stable_sort(Defs, [](const std::unique_ptr<RVVIntrinsic> &A,
+ const std::unique_ptr<RVVIntrinsic> &B) {
+ return A->getIRName() < B->getIRName();
+ });
+
+ // Map to keep track of which builtin names have already been emitted.
+ StringMap<RVVIntrinsic *> BuiltinMap;
+
// Print switch body when the ir name or ManualCodegen changes from previous
// iteration.
RVVIntrinsic *PrevDef = Defs.begin()->get();
@@ -1039,7 +1067,29 @@ void RVVEmitter::createCodeGen(raw_ostream &OS) {
PrevDef->emitCodeGenSwitchBody(OS);
}
PrevDef = Def.get();
- OS << "case RISCV::BI__builtin_rvv_" << Def->getName() << ":\n";
+
+ auto P =
+ BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
+ if (P.second) {
+ OS << "case RISCVVector::BI__builtin_rvv_" << Def->getBuiltinName()
+ << ":\n";
+ continue;
+ }
+
+ if (P.first->second->getIRName() != Def->getIRName())
+ PrintFatalError("Builtin with same name has different IRName");
+ else if (P.first->second->getManualCodegen() != Def->getManualCodegen())
+ PrintFatalError("Builtin with same name has different ManualCodegen");
+ else if (P.first->second->getNF() != Def->getNF())
+ PrintFatalError("Builtin with same name has different NF");
+ else if (P.first->second->isMask() != Def->isMask())
+ PrintFatalError("Builtin with same name has different isMask");
+ else if (P.first->second->hasVL() != Def->hasVL())
+ PrintFatalError("Builtin with same name has different HasPolicy");
+ else if (P.first->second->hasPolicy() != Def->hasPolicy())
+ PrintFatalError("Builtin with same name has different HasPolicy");
+ else if (P.first->second->getIntrinsicTypes() != Def->getIntrinsicTypes())
+ PrintFatalError("Builtin with same name has different IntrinsicTypes");
}
Defs.back()->emitCodeGenSwitchBody(OS);
OS << "\n";
@@ -1084,8 +1134,8 @@ void RVVEmitter::createRVVIntrinsics(
bool HasMask = R->getValueAsBit("HasMask");
bool HasMaskedOffOperand = R->getValueAsBit("HasMaskedOffOperand");
bool HasVL = R->getValueAsBit("HasVL");
+ bool HasPolicy = R->getValueAsBit("HasPolicy");
bool HasNoMaskedOverloaded = R->getValueAsBit("HasNoMaskedOverloaded");
- bool HasSideEffects = R->getValueAsBit("HasSideEffects");
std::vector<int64_t> Log2LMULList = R->getValueAsListOfInts("Log2LMUL");
StringRef ManualCodegen = R->getValueAsString("ManualCodegen");
StringRef ManualCodegenMask = R->getValueAsString("ManualCodegenMask");
@@ -1157,16 +1207,16 @@ void RVVEmitter::createRVVIntrinsics(
// Create a non-mask intrinsic
Out.push_back(std::make_unique<RVVIntrinsic>(
Name, SuffixStr, MangledName, MangledSuffixStr, IRName,
- HasSideEffects, /*IsMask=*/false, /*HasMaskedOffOperand=*/false,
- HasVL, HasNoMaskedOverloaded, HasAutoDef, ManualCodegen,
- Types.getValue(), IntrinsicTypes, RequiredExtension, NF));
+ /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, HasPolicy,
+ HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, Types.getValue(),
+ IntrinsicTypes, RequiredExtension, NF));
if (HasMask) {
// Create a mask intrinsic
Optional<RVVTypes> MaskTypes =
computeTypes(I, Log2LMUL, NF, ProtoMaskSeq);
Out.push_back(std::make_unique<RVVIntrinsic>(
Name, SuffixStr, MangledName, MangledSuffixStr, IRNameMask,
- HasSideEffects, /*IsMask=*/true, HasMaskedOffOperand, HasVL,
+ /*IsMask=*/true, HasMaskedOffOperand, HasVL, HasPolicy,
HasNoMaskedOverloaded, HasAutoDef, ManualCodegenMask,
MaskTypes.getValue(), IntrinsicTypes, RequiredExtension, NF));
}
@@ -1175,6 +1225,15 @@ void RVVEmitter::createRVVIntrinsics(
}
}
+void RVVEmitter::createRVVHeaders(raw_ostream &OS) {
+ std::vector<Record *> RVVHeaders =
+ Records.getAllDerivedDefinitions("RVVHeader");
+ for (auto *R : RVVHeaders) {
+ StringRef HeaderCodeStr = R->getValueAsString("HeaderCode");
+ OS << HeaderCodeStr.str();
+ }
+}
+
Optional<RVVTypes>
RVVEmitter::computeTypes(BasicType BT, int Log2LMUL, unsigned NF,
ArrayRef<std::string> PrototypeSeq) {
@@ -1245,8 +1304,6 @@ bool RVVEmitter::emitExtDefStr(uint8_t Extents, raw_ostream &OS) {
OS << LS << "defined(__riscv_d)";
if (Extents & RISCVExtension::Zfh)
OS << LS << "defined(__riscv_zfh)";
- if (Extents & RISCVExtension::Zvamo)
- OS << LS << "defined(__riscv_zvamo)";
if (Extents & RISCVExtension::Zvlsseg)
OS << LS << "defined(__riscv_zvlsseg)";
OS << "\n";
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 7fb5d0acc6f3..bb9366e2b7fc 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -30,6 +30,7 @@ enum ActionType {
GenClangAttrSubjectMatchRulesParserStringSwitches,
GenClangAttrImpl,
GenClangAttrList,
+ GenClangAttrDocTable,
GenClangAttrSubjectMatchRuleList,
GenClangAttrPCHRead,
GenClangAttrPCHWrite,
@@ -115,6 +116,8 @@ cl::opt<ActionType> Action(
"Generate clang attribute implementations"),
clEnumValN(GenClangAttrList, "gen-clang-attr-list",
"Generate a clang attribute list"),
+ clEnumValN(GenClangAttrDocTable, "gen-clang-attr-doc-table",
+ "Generate a table of attribute documentation"),
clEnumValN(GenClangAttrSubjectMatchRuleList,
"gen-clang-attr-subject-match-rule-list",
"Generate a clang attribute subject match rule list"),
@@ -280,6 +283,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenClangAttrList:
EmitClangAttrList(Records, OS);
break;
+ case GenClangAttrDocTable:
+ EmitClangAttrDocTable(Records, OS);
+ break;
case GenClangAttrSubjectMatchRuleList:
EmitClangAttrSubjectMatchRuleList(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index bf40c7b1d18f..fd8b9fcda20f 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -61,6 +61,7 @@ void EmitClangAttrTextNodeDump(llvm::RecordKeeper &Records,
llvm::raw_ostream &OS);
void EmitClangAttrNodeTraverse(llvm::RecordKeeper &Records,
llvm::raw_ostream &OS);
+void EmitClangAttrDocTable(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangDiagsDefs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS,
const std::string &Component);
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 08a642469627..008b8dde5820 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
Inc->getHash()->getZExtValue()))
-INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
- ConstantExpr::getBitCast(CounterPtr, \
- llvm::Type::getInt64PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr)
/* This is used to map function pointers for the indirect call targets to
* function name hashes during the conversion from raw to merged profile
* data.
@@ -129,15 +127,16 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
-INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
+ (uintptr_t)CountersBegin - (uintptr_t)DataBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -646,7 +645,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 6
+#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
/* Coverage mapping format version (start from 0). */
diff --git a/compiler-rt/include/sanitizer/asan_interface.h b/compiler-rt/include/sanitizer/asan_interface.h
index 792ef9cfaa32..9bff21c117b3 100644
--- a/compiler-rt/include/sanitizer/asan_interface.h
+++ b/compiler-rt/include/sanitizer/asan_interface.h
@@ -316,7 +316,7 @@ void *__asan_addr_is_in_fake_stack(void *fake_stack, void *addr, void **beg,
void __asan_handle_no_return(void);
/// Update allocation stack trace for the given allocation to the current stack
-/// trace. Returns 1 if successfull, 0 if not.
+/// trace. Returns 1 if successful, 0 if not.
int __asan_update_allocation_context(void* addr);
#ifdef __cplusplus
diff --git a/compiler-rt/include/sanitizer/common_interface_defs.h b/compiler-rt/include/sanitizer/common_interface_defs.h
index cd69285b8d4a..692b8f70c969 100644
--- a/compiler-rt/include/sanitizer/common_interface_defs.h
+++ b/compiler-rt/include/sanitizer/common_interface_defs.h
@@ -28,7 +28,7 @@ typedef struct {
// Enable sandbox support in sanitizer coverage.
int coverage_sandboxed;
// File descriptor to write coverage data to. If -1 is passed, a file will
- // be pre-opened by __sanitizer_sandobx_on_notify(). This field has no
+ // be pre-opened by __sanitizer_sandbox_on_notify(). This field has no
// effect if coverage_sandboxed == 0.
intptr_t coverage_fd;
// If non-zero, split the coverage data into well-formed blocks. This is
diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h
index cd3b6d6e2b16..d6209a3ea2b2 100644
--- a/compiler-rt/include/sanitizer/dfsan_interface.h
+++ b/compiler-rt/include/sanitizer/dfsan_interface.h
@@ -150,8 +150,7 @@ int dfsan_get_track_origins(void);
#ifdef __cplusplus
} // extern "C"
-template <typename T>
-void dfsan_set_label(dfsan_label label, T &data) { // NOLINT
+template <typename T> void dfsan_set_label(dfsan_label label, T &data) {
dfsan_set_label(label, (void *)&data, sizeof(T));
}
diff --git a/compiler-rt/include/sanitizer/linux_syscall_hooks.h b/compiler-rt/include/sanitizer/linux_syscall_hooks.h
index 56eae3d40f96..3f3f1e78dfb8 100644
--- a/compiler-rt/include/sanitizer/linux_syscall_hooks.h
+++ b/compiler-rt/include/sanitizer/linux_syscall_hooks.h
@@ -20,1493 +20,1502 @@
#ifndef SANITIZER_LINUX_SYSCALL_HOOKS_H
#define SANITIZER_LINUX_SYSCALL_HOOKS_H
-#define __sanitizer_syscall_pre_time(tloc) \
+#define __sanitizer_syscall_pre_time(tloc) \
__sanitizer_syscall_pre_impl_time((long)(tloc))
-#define __sanitizer_syscall_post_time(res, tloc) \
+#define __sanitizer_syscall_post_time(res, tloc) \
__sanitizer_syscall_post_impl_time(res, (long)(tloc))
-#define __sanitizer_syscall_pre_stime(tptr) \
+#define __sanitizer_syscall_pre_stime(tptr) \
__sanitizer_syscall_pre_impl_stime((long)(tptr))
-#define __sanitizer_syscall_post_stime(res, tptr) \
+#define __sanitizer_syscall_post_stime(res, tptr) \
__sanitizer_syscall_post_impl_stime(res, (long)(tptr))
-#define __sanitizer_syscall_pre_gettimeofday(tv, tz) \
+#define __sanitizer_syscall_pre_gettimeofday(tv, tz) \
__sanitizer_syscall_pre_impl_gettimeofday((long)(tv), (long)(tz))
-#define __sanitizer_syscall_post_gettimeofday(res, tv, tz) \
+#define __sanitizer_syscall_post_gettimeofday(res, tv, tz) \
__sanitizer_syscall_post_impl_gettimeofday(res, (long)(tv), (long)(tz))
-#define __sanitizer_syscall_pre_settimeofday(tv, tz) \
+#define __sanitizer_syscall_pre_settimeofday(tv, tz) \
__sanitizer_syscall_pre_impl_settimeofday((long)(tv), (long)(tz))
-#define __sanitizer_syscall_post_settimeofday(res, tv, tz) \
+#define __sanitizer_syscall_post_settimeofday(res, tv, tz) \
__sanitizer_syscall_post_impl_settimeofday(res, (long)(tv), (long)(tz))
-#define __sanitizer_syscall_pre_adjtimex(txc_p) \
+#define __sanitizer_syscall_pre_adjtimex(txc_p) \
__sanitizer_syscall_pre_impl_adjtimex((long)(txc_p))
-#define __sanitizer_syscall_post_adjtimex(res, txc_p) \
+#define __sanitizer_syscall_post_adjtimex(res, txc_p) \
__sanitizer_syscall_post_impl_adjtimex(res, (long)(txc_p))
-#define __sanitizer_syscall_pre_times(tbuf) \
+#define __sanitizer_syscall_pre_times(tbuf) \
__sanitizer_syscall_pre_impl_times((long)(tbuf))
-#define __sanitizer_syscall_post_times(res, tbuf) \
+#define __sanitizer_syscall_post_times(res, tbuf) \
__sanitizer_syscall_post_impl_times(res, (long)(tbuf))
#define __sanitizer_syscall_pre_gettid() __sanitizer_syscall_pre_impl_gettid()
-#define __sanitizer_syscall_post_gettid(res) \
+#define __sanitizer_syscall_post_gettid(res) \
__sanitizer_syscall_post_impl_gettid(res)
-#define __sanitizer_syscall_pre_nanosleep(rqtp, rmtp) \
+#define __sanitizer_syscall_pre_nanosleep(rqtp, rmtp) \
__sanitizer_syscall_pre_impl_nanosleep((long)(rqtp), (long)(rmtp))
-#define __sanitizer_syscall_post_nanosleep(res, rqtp, rmtp) \
+#define __sanitizer_syscall_post_nanosleep(res, rqtp, rmtp) \
__sanitizer_syscall_post_impl_nanosleep(res, (long)(rqtp), (long)(rmtp))
-#define __sanitizer_syscall_pre_alarm(seconds) \
+#define __sanitizer_syscall_pre_alarm(seconds) \
__sanitizer_syscall_pre_impl_alarm((long)(seconds))
-#define __sanitizer_syscall_post_alarm(res, seconds) \
+#define __sanitizer_syscall_post_alarm(res, seconds) \
__sanitizer_syscall_post_impl_alarm(res, (long)(seconds))
#define __sanitizer_syscall_pre_getpid() __sanitizer_syscall_pre_impl_getpid()
-#define __sanitizer_syscall_post_getpid(res) \
+#define __sanitizer_syscall_post_getpid(res) \
__sanitizer_syscall_post_impl_getpid(res)
#define __sanitizer_syscall_pre_getppid() __sanitizer_syscall_pre_impl_getppid()
-#define __sanitizer_syscall_post_getppid(res) \
+#define __sanitizer_syscall_post_getppid(res) \
__sanitizer_syscall_post_impl_getppid(res)
#define __sanitizer_syscall_pre_getuid() __sanitizer_syscall_pre_impl_getuid()
-#define __sanitizer_syscall_post_getuid(res) \
+#define __sanitizer_syscall_post_getuid(res) \
__sanitizer_syscall_post_impl_getuid(res)
#define __sanitizer_syscall_pre_geteuid() __sanitizer_syscall_pre_impl_geteuid()
-#define __sanitizer_syscall_post_geteuid(res) \
+#define __sanitizer_syscall_post_geteuid(res) \
__sanitizer_syscall_post_impl_geteuid(res)
#define __sanitizer_syscall_pre_getgid() __sanitizer_syscall_pre_impl_getgid()
-#define __sanitizer_syscall_post_getgid(res) \
+#define __sanitizer_syscall_post_getgid(res) \
__sanitizer_syscall_post_impl_getgid(res)
#define __sanitizer_syscall_pre_getegid() __sanitizer_syscall_pre_impl_getegid()
-#define __sanitizer_syscall_post_getegid(res) \
+#define __sanitizer_syscall_post_getegid(res) \
__sanitizer_syscall_post_impl_getegid(res)
-#define __sanitizer_syscall_pre_getresuid(ruid, euid, suid) \
- __sanitizer_syscall_pre_impl_getresuid((long)(ruid), (long)(euid), \
+#define __sanitizer_syscall_pre_getresuid(ruid, euid, suid) \
+ __sanitizer_syscall_pre_impl_getresuid((long)(ruid), (long)(euid), \
(long)(suid))
-#define __sanitizer_syscall_post_getresuid(res, ruid, euid, suid) \
- __sanitizer_syscall_post_impl_getresuid(res, (long)(ruid), (long)(euid), \
+#define __sanitizer_syscall_post_getresuid(res, ruid, euid, suid) \
+ __sanitizer_syscall_post_impl_getresuid(res, (long)(ruid), (long)(euid), \
(long)(suid))
-#define __sanitizer_syscall_pre_getresgid(rgid, egid, sgid) \
- __sanitizer_syscall_pre_impl_getresgid((long)(rgid), (long)(egid), \
+#define __sanitizer_syscall_pre_getresgid(rgid, egid, sgid) \
+ __sanitizer_syscall_pre_impl_getresgid((long)(rgid), (long)(egid), \
(long)(sgid))
-#define __sanitizer_syscall_post_getresgid(res, rgid, egid, sgid) \
- __sanitizer_syscall_post_impl_getresgid(res, (long)(rgid), (long)(egid), \
+#define __sanitizer_syscall_post_getresgid(res, rgid, egid, sgid) \
+ __sanitizer_syscall_post_impl_getresgid(res, (long)(rgid), (long)(egid), \
(long)(sgid))
-#define __sanitizer_syscall_pre_getpgid(pid) \
+#define __sanitizer_syscall_pre_getpgid(pid) \
__sanitizer_syscall_pre_impl_getpgid((long)(pid))
-#define __sanitizer_syscall_post_getpgid(res, pid) \
+#define __sanitizer_syscall_post_getpgid(res, pid) \
__sanitizer_syscall_post_impl_getpgid(res, (long)(pid))
#define __sanitizer_syscall_pre_getpgrp() __sanitizer_syscall_pre_impl_getpgrp()
-#define __sanitizer_syscall_post_getpgrp(res) \
+#define __sanitizer_syscall_post_getpgrp(res) \
__sanitizer_syscall_post_impl_getpgrp(res)
-#define __sanitizer_syscall_pre_getsid(pid) \
+#define __sanitizer_syscall_pre_getsid(pid) \
__sanitizer_syscall_pre_impl_getsid((long)(pid))
-#define __sanitizer_syscall_post_getsid(res, pid) \
+#define __sanitizer_syscall_post_getsid(res, pid) \
__sanitizer_syscall_post_impl_getsid(res, (long)(pid))
-#define __sanitizer_syscall_pre_getgroups(gidsetsize, grouplist) \
+#define __sanitizer_syscall_pre_getgroups(gidsetsize, grouplist) \
__sanitizer_syscall_pre_impl_getgroups((long)(gidsetsize), (long)(grouplist))
-#define __sanitizer_syscall_post_getgroups(res, gidsetsize, grouplist) \
- __sanitizer_syscall_post_impl_getgroups(res, (long)(gidsetsize), \
+#define __sanitizer_syscall_post_getgroups(res, gidsetsize, grouplist) \
+ __sanitizer_syscall_post_impl_getgroups(res, (long)(gidsetsize), \
(long)(grouplist))
-#define __sanitizer_syscall_pre_setregid(rgid, egid) \
+#define __sanitizer_syscall_pre_setregid(rgid, egid) \
__sanitizer_syscall_pre_impl_setregid((long)(rgid), (long)(egid))
-#define __sanitizer_syscall_post_setregid(res, rgid, egid) \
+#define __sanitizer_syscall_post_setregid(res, rgid, egid) \
__sanitizer_syscall_post_impl_setregid(res, (long)(rgid), (long)(egid))
-#define __sanitizer_syscall_pre_setgid(gid) \
+#define __sanitizer_syscall_pre_setgid(gid) \
__sanitizer_syscall_pre_impl_setgid((long)(gid))
-#define __sanitizer_syscall_post_setgid(res, gid) \
+#define __sanitizer_syscall_post_setgid(res, gid) \
__sanitizer_syscall_post_impl_setgid(res, (long)(gid))
-#define __sanitizer_syscall_pre_setreuid(ruid, euid) \
+#define __sanitizer_syscall_pre_setreuid(ruid, euid) \
__sanitizer_syscall_pre_impl_setreuid((long)(ruid), (long)(euid))
-#define __sanitizer_syscall_post_setreuid(res, ruid, euid) \
+#define __sanitizer_syscall_post_setreuid(res, ruid, euid) \
__sanitizer_syscall_post_impl_setreuid(res, (long)(ruid), (long)(euid))
-#define __sanitizer_syscall_pre_setuid(uid) \
+#define __sanitizer_syscall_pre_setuid(uid) \
__sanitizer_syscall_pre_impl_setuid((long)(uid))
-#define __sanitizer_syscall_post_setuid(res, uid) \
+#define __sanitizer_syscall_post_setuid(res, uid) \
__sanitizer_syscall_post_impl_setuid(res, (long)(uid))
-#define __sanitizer_syscall_pre_setresuid(ruid, euid, suid) \
- __sanitizer_syscall_pre_impl_setresuid((long)(ruid), (long)(euid), \
+#define __sanitizer_syscall_pre_setresuid(ruid, euid, suid) \
+ __sanitizer_syscall_pre_impl_setresuid((long)(ruid), (long)(euid), \
(long)(suid))
-#define __sanitizer_syscall_post_setresuid(res, ruid, euid, suid) \
- __sanitizer_syscall_post_impl_setresuid(res, (long)(ruid), (long)(euid), \
+#define __sanitizer_syscall_post_setresuid(res, ruid, euid, suid) \
+ __sanitizer_syscall_post_impl_setresuid(res, (long)(ruid), (long)(euid), \
(long)(suid))
-#define __sanitizer_syscall_pre_setresgid(rgid, egid, sgid) \
- __sanitizer_syscall_pre_impl_setresgid((long)(rgid), (long)(egid), \
+#define __sanitizer_syscall_pre_setresgid(rgid, egid, sgid) \
+ __sanitizer_syscall_pre_impl_setresgid((long)(rgid), (long)(egid), \
(long)(sgid))
-#define __sanitizer_syscall_post_setresgid(res, rgid, egid, sgid) \
- __sanitizer_syscall_post_impl_setresgid(res, (long)(rgid), (long)(egid), \
+#define __sanitizer_syscall_post_setresgid(res, rgid, egid, sgid) \
+ __sanitizer_syscall_post_impl_setresgid(res, (long)(rgid), (long)(egid), \
(long)(sgid))
-#define __sanitizer_syscall_pre_setfsuid(uid) \
+#define __sanitizer_syscall_pre_setfsuid(uid) \
__sanitizer_syscall_pre_impl_setfsuid((long)(uid))
-#define __sanitizer_syscall_post_setfsuid(res, uid) \
+#define __sanitizer_syscall_post_setfsuid(res, uid) \
__sanitizer_syscall_post_impl_setfsuid(res, (long)(uid))
-#define __sanitizer_syscall_pre_setfsgid(gid) \
+#define __sanitizer_syscall_pre_setfsgid(gid) \
__sanitizer_syscall_pre_impl_setfsgid((long)(gid))
-#define __sanitizer_syscall_post_setfsgid(res, gid) \
+#define __sanitizer_syscall_post_setfsgid(res, gid) \
__sanitizer_syscall_post_impl_setfsgid(res, (long)(gid))
-#define __sanitizer_syscall_pre_setpgid(pid, pgid) \
+#define __sanitizer_syscall_pre_setpgid(pid, pgid) \
__sanitizer_syscall_pre_impl_setpgid((long)(pid), (long)(pgid))
-#define __sanitizer_syscall_post_setpgid(res, pid, pgid) \
+#define __sanitizer_syscall_post_setpgid(res, pid, pgid) \
__sanitizer_syscall_post_impl_setpgid(res, (long)(pid), (long)(pgid))
#define __sanitizer_syscall_pre_setsid() __sanitizer_syscall_pre_impl_setsid()
-#define __sanitizer_syscall_post_setsid(res) \
+#define __sanitizer_syscall_post_setsid(res) \
__sanitizer_syscall_post_impl_setsid(res)
-#define __sanitizer_syscall_pre_setgroups(gidsetsize, grouplist) \
+#define __sanitizer_syscall_pre_setgroups(gidsetsize, grouplist) \
__sanitizer_syscall_pre_impl_setgroups((long)(gidsetsize), (long)(grouplist))
-#define __sanitizer_syscall_post_setgroups(res, gidsetsize, grouplist) \
- __sanitizer_syscall_post_impl_setgroups(res, (long)(gidsetsize), \
+#define __sanitizer_syscall_post_setgroups(res, gidsetsize, grouplist) \
+ __sanitizer_syscall_post_impl_setgroups(res, (long)(gidsetsize), \
(long)(grouplist))
-#define __sanitizer_syscall_pre_acct(name) \
+#define __sanitizer_syscall_pre_acct(name) \
__sanitizer_syscall_pre_impl_acct((long)(name))
-#define __sanitizer_syscall_post_acct(res, name) \
+#define __sanitizer_syscall_post_acct(res, name) \
__sanitizer_syscall_post_impl_acct(res, (long)(name))
-#define __sanitizer_syscall_pre_capget(header, dataptr) \
+#define __sanitizer_syscall_pre_capget(header, dataptr) \
__sanitizer_syscall_pre_impl_capget((long)(header), (long)(dataptr))
-#define __sanitizer_syscall_post_capget(res, header, dataptr) \
+#define __sanitizer_syscall_post_capget(res, header, dataptr) \
__sanitizer_syscall_post_impl_capget(res, (long)(header), (long)(dataptr))
-#define __sanitizer_syscall_pre_capset(header, data) \
+#define __sanitizer_syscall_pre_capset(header, data) \
__sanitizer_syscall_pre_impl_capset((long)(header), (long)(data))
-#define __sanitizer_syscall_post_capset(res, header, data) \
+#define __sanitizer_syscall_post_capset(res, header, data) \
__sanitizer_syscall_post_impl_capset(res, (long)(header), (long)(data))
-#define __sanitizer_syscall_pre_personality(personality) \
+#define __sanitizer_syscall_pre_personality(personality) \
__sanitizer_syscall_pre_impl_personality((long)(personality))
-#define __sanitizer_syscall_post_personality(res, personality) \
+#define __sanitizer_syscall_post_personality(res, personality) \
__sanitizer_syscall_post_impl_personality(res, (long)(personality))
-#define __sanitizer_syscall_pre_sigpending(set) \
+#define __sanitizer_syscall_pre_sigpending(set) \
__sanitizer_syscall_pre_impl_sigpending((long)(set))
-#define __sanitizer_syscall_post_sigpending(res, set) \
+#define __sanitizer_syscall_post_sigpending(res, set) \
__sanitizer_syscall_post_impl_sigpending(res, (long)(set))
-#define __sanitizer_syscall_pre_sigprocmask(how, set, oset) \
- __sanitizer_syscall_pre_impl_sigprocmask((long)(how), (long)(set), \
+#define __sanitizer_syscall_pre_sigprocmask(how, set, oset) \
+ __sanitizer_syscall_pre_impl_sigprocmask((long)(how), (long)(set), \
(long)(oset))
-#define __sanitizer_syscall_post_sigprocmask(res, how, set, oset) \
- __sanitizer_syscall_post_impl_sigprocmask(res, (long)(how), (long)(set), \
+#define __sanitizer_syscall_post_sigprocmask(res, how, set, oset) \
+ __sanitizer_syscall_post_impl_sigprocmask(res, (long)(how), (long)(set), \
(long)(oset))
-#define __sanitizer_syscall_pre_getitimer(which, value) \
+#define __sanitizer_syscall_pre_getitimer(which, value) \
__sanitizer_syscall_pre_impl_getitimer((long)(which), (long)(value))
-#define __sanitizer_syscall_post_getitimer(res, which, value) \
+#define __sanitizer_syscall_post_getitimer(res, which, value) \
__sanitizer_syscall_post_impl_getitimer(res, (long)(which), (long)(value))
-#define __sanitizer_syscall_pre_setitimer(which, value, ovalue) \
- __sanitizer_syscall_pre_impl_setitimer((long)(which), (long)(value), \
+#define __sanitizer_syscall_pre_setitimer(which, value, ovalue) \
+ __sanitizer_syscall_pre_impl_setitimer((long)(which), (long)(value), \
(long)(ovalue))
-#define __sanitizer_syscall_post_setitimer(res, which, value, ovalue) \
- __sanitizer_syscall_post_impl_setitimer(res, (long)(which), (long)(value), \
+#define __sanitizer_syscall_post_setitimer(res, which, value, ovalue) \
+ __sanitizer_syscall_post_impl_setitimer(res, (long)(which), (long)(value), \
(long)(ovalue))
-#define __sanitizer_syscall_pre_timer_create(which_clock, timer_event_spec, \
- created_timer_id) \
- __sanitizer_syscall_pre_impl_timer_create( \
+#define __sanitizer_syscall_pre_timer_create(which_clock, timer_event_spec, \
+ created_timer_id) \
+ __sanitizer_syscall_pre_impl_timer_create( \
(long)(which_clock), (long)(timer_event_spec), (long)(created_timer_id))
-#define __sanitizer_syscall_post_timer_create( \
- res, which_clock, timer_event_spec, created_timer_id) \
- __sanitizer_syscall_post_impl_timer_create(res, (long)(which_clock), \
- (long)(timer_event_spec), \
+#define __sanitizer_syscall_post_timer_create( \
+ res, which_clock, timer_event_spec, created_timer_id) \
+ __sanitizer_syscall_post_impl_timer_create(res, (long)(which_clock), \
+ (long)(timer_event_spec), \
(long)(created_timer_id))
-#define __sanitizer_syscall_pre_timer_gettime(timer_id, setting) \
+#define __sanitizer_syscall_pre_timer_gettime(timer_id, setting) \
__sanitizer_syscall_pre_impl_timer_gettime((long)(timer_id), (long)(setting))
-#define __sanitizer_syscall_post_timer_gettime(res, timer_id, setting) \
- __sanitizer_syscall_post_impl_timer_gettime(res, (long)(timer_id), \
+#define __sanitizer_syscall_post_timer_gettime(res, timer_id, setting) \
+ __sanitizer_syscall_post_impl_timer_gettime(res, (long)(timer_id), \
(long)(setting))
-#define __sanitizer_syscall_pre_timer_getoverrun(timer_id) \
+#define __sanitizer_syscall_pre_timer_getoverrun(timer_id) \
__sanitizer_syscall_pre_impl_timer_getoverrun((long)(timer_id))
-#define __sanitizer_syscall_post_timer_getoverrun(res, timer_id) \
+#define __sanitizer_syscall_post_timer_getoverrun(res, timer_id) \
__sanitizer_syscall_post_impl_timer_getoverrun(res, (long)(timer_id))
-#define __sanitizer_syscall_pre_timer_settime(timer_id, flags, new_setting, \
- old_setting) \
- __sanitizer_syscall_pre_impl_timer_settime((long)(timer_id), (long)(flags), \
- (long)(new_setting), \
+#define __sanitizer_syscall_pre_timer_settime(timer_id, flags, new_setting, \
+ old_setting) \
+ __sanitizer_syscall_pre_impl_timer_settime((long)(timer_id), (long)(flags), \
+ (long)(new_setting), \
(long)(old_setting))
-#define __sanitizer_syscall_post_timer_settime(res, timer_id, flags, \
- new_setting, old_setting) \
- __sanitizer_syscall_post_impl_timer_settime( \
- res, (long)(timer_id), (long)(flags), (long)(new_setting), \
+#define __sanitizer_syscall_post_timer_settime(res, timer_id, flags, \
+ new_setting, old_setting) \
+ __sanitizer_syscall_post_impl_timer_settime( \
+ res, (long)(timer_id), (long)(flags), (long)(new_setting), \
(long)(old_setting))
-#define __sanitizer_syscall_pre_timer_delete(timer_id) \
+#define __sanitizer_syscall_pre_timer_delete(timer_id) \
__sanitizer_syscall_pre_impl_timer_delete((long)(timer_id))
-#define __sanitizer_syscall_post_timer_delete(res, timer_id) \
+#define __sanitizer_syscall_post_timer_delete(res, timer_id) \
__sanitizer_syscall_post_impl_timer_delete(res, (long)(timer_id))
-#define __sanitizer_syscall_pre_clock_settime(which_clock, tp) \
+#define __sanitizer_syscall_pre_clock_settime(which_clock, tp) \
__sanitizer_syscall_pre_impl_clock_settime((long)(which_clock), (long)(tp))
-#define __sanitizer_syscall_post_clock_settime(res, which_clock, tp) \
- __sanitizer_syscall_post_impl_clock_settime(res, (long)(which_clock), \
+#define __sanitizer_syscall_post_clock_settime(res, which_clock, tp) \
+ __sanitizer_syscall_post_impl_clock_settime(res, (long)(which_clock), \
(long)(tp))
-#define __sanitizer_syscall_pre_clock_gettime(which_clock, tp) \
+#define __sanitizer_syscall_pre_clock_gettime(which_clock, tp) \
__sanitizer_syscall_pre_impl_clock_gettime((long)(which_clock), (long)(tp))
-#define __sanitizer_syscall_post_clock_gettime(res, which_clock, tp) \
- __sanitizer_syscall_post_impl_clock_gettime(res, (long)(which_clock), \
+#define __sanitizer_syscall_post_clock_gettime(res, which_clock, tp) \
+ __sanitizer_syscall_post_impl_clock_gettime(res, (long)(which_clock), \
(long)(tp))
-#define __sanitizer_syscall_pre_clock_adjtime(which_clock, tx) \
+#define __sanitizer_syscall_pre_clock_adjtime(which_clock, tx) \
__sanitizer_syscall_pre_impl_clock_adjtime((long)(which_clock), (long)(tx))
-#define __sanitizer_syscall_post_clock_adjtime(res, which_clock, tx) \
- __sanitizer_syscall_post_impl_clock_adjtime(res, (long)(which_clock), \
+#define __sanitizer_syscall_post_clock_adjtime(res, which_clock, tx) \
+ __sanitizer_syscall_post_impl_clock_adjtime(res, (long)(which_clock), \
(long)(tx))
-#define __sanitizer_syscall_pre_clock_getres(which_clock, tp) \
+#define __sanitizer_syscall_pre_clock_getres(which_clock, tp) \
__sanitizer_syscall_pre_impl_clock_getres((long)(which_clock), (long)(tp))
-#define __sanitizer_syscall_post_clock_getres(res, which_clock, tp) \
- __sanitizer_syscall_post_impl_clock_getres(res, (long)(which_clock), \
+#define __sanitizer_syscall_post_clock_getres(res, which_clock, tp) \
+ __sanitizer_syscall_post_impl_clock_getres(res, (long)(which_clock), \
(long)(tp))
-#define __sanitizer_syscall_pre_clock_nanosleep(which_clock, flags, rqtp, \
- rmtp) \
- __sanitizer_syscall_pre_impl_clock_nanosleep( \
+#define __sanitizer_syscall_pre_clock_nanosleep(which_clock, flags, rqtp, \
+ rmtp) \
+ __sanitizer_syscall_pre_impl_clock_nanosleep( \
(long)(which_clock), (long)(flags), (long)(rqtp), (long)(rmtp))
-#define __sanitizer_syscall_post_clock_nanosleep(res, which_clock, flags, \
- rqtp, rmtp) \
- __sanitizer_syscall_post_impl_clock_nanosleep( \
+#define __sanitizer_syscall_post_clock_nanosleep(res, which_clock, flags, \
+ rqtp, rmtp) \
+ __sanitizer_syscall_post_impl_clock_nanosleep( \
res, (long)(which_clock), (long)(flags), (long)(rqtp), (long)(rmtp))
-#define __sanitizer_syscall_pre_nice(increment) \
+#define __sanitizer_syscall_pre_nice(increment) \
__sanitizer_syscall_pre_impl_nice((long)(increment))
-#define __sanitizer_syscall_post_nice(res, increment) \
+#define __sanitizer_syscall_post_nice(res, increment) \
__sanitizer_syscall_post_impl_nice(res, (long)(increment))
#define __sanitizer_syscall_pre_sched_setscheduler(pid, policy, param) \
__sanitizer_syscall_pre_impl_sched_setscheduler((long)(pid), (long)(policy), \
(long)(param))
-#define __sanitizer_syscall_post_sched_setscheduler(res, pid, policy, param) \
- __sanitizer_syscall_post_impl_sched_setscheduler( \
+#define __sanitizer_syscall_post_sched_setscheduler(res, pid, policy, param) \
+ __sanitizer_syscall_post_impl_sched_setscheduler( \
res, (long)(pid), (long)(policy), (long)(param))
-#define __sanitizer_syscall_pre_sched_setparam(pid, param) \
+#define __sanitizer_syscall_pre_sched_setparam(pid, param) \
__sanitizer_syscall_pre_impl_sched_setparam((long)(pid), (long)(param))
-#define __sanitizer_syscall_post_sched_setparam(res, pid, param) \
+#define __sanitizer_syscall_post_sched_setparam(res, pid, param) \
__sanitizer_syscall_post_impl_sched_setparam(res, (long)(pid), (long)(param))
-#define __sanitizer_syscall_pre_sched_getscheduler(pid) \
+#define __sanitizer_syscall_pre_sched_getscheduler(pid) \
__sanitizer_syscall_pre_impl_sched_getscheduler((long)(pid))
-#define __sanitizer_syscall_post_sched_getscheduler(res, pid) \
+#define __sanitizer_syscall_post_sched_getscheduler(res, pid) \
__sanitizer_syscall_post_impl_sched_getscheduler(res, (long)(pid))
-#define __sanitizer_syscall_pre_sched_getparam(pid, param) \
+#define __sanitizer_syscall_pre_sched_getparam(pid, param) \
__sanitizer_syscall_pre_impl_sched_getparam((long)(pid), (long)(param))
-#define __sanitizer_syscall_post_sched_getparam(res, pid, param) \
+#define __sanitizer_syscall_post_sched_getparam(res, pid, param) \
__sanitizer_syscall_post_impl_sched_getparam(res, (long)(pid), (long)(param))
-#define __sanitizer_syscall_pre_sched_setaffinity(pid, len, user_mask_ptr) \
- __sanitizer_syscall_pre_impl_sched_setaffinity((long)(pid), (long)(len), \
+#define __sanitizer_syscall_pre_sched_setaffinity(pid, len, user_mask_ptr) \
+ __sanitizer_syscall_pre_impl_sched_setaffinity((long)(pid), (long)(len), \
(long)(user_mask_ptr))
-#define __sanitizer_syscall_post_sched_setaffinity(res, pid, len, \
- user_mask_ptr) \
- __sanitizer_syscall_post_impl_sched_setaffinity( \
+#define __sanitizer_syscall_post_sched_setaffinity(res, pid, len, \
+ user_mask_ptr) \
+ __sanitizer_syscall_post_impl_sched_setaffinity( \
res, (long)(pid), (long)(len), (long)(user_mask_ptr))
-#define __sanitizer_syscall_pre_sched_getaffinity(pid, len, user_mask_ptr) \
- __sanitizer_syscall_pre_impl_sched_getaffinity((long)(pid), (long)(len), \
+#define __sanitizer_syscall_pre_sched_getaffinity(pid, len, user_mask_ptr) \
+ __sanitizer_syscall_pre_impl_sched_getaffinity((long)(pid), (long)(len), \
(long)(user_mask_ptr))
-#define __sanitizer_syscall_post_sched_getaffinity(res, pid, len, \
- user_mask_ptr) \
- __sanitizer_syscall_post_impl_sched_getaffinity( \
+#define __sanitizer_syscall_post_sched_getaffinity(res, pid, len, \
+ user_mask_ptr) \
+ __sanitizer_syscall_post_impl_sched_getaffinity( \
res, (long)(pid), (long)(len), (long)(user_mask_ptr))
-#define __sanitizer_syscall_pre_sched_yield() \
+#define __sanitizer_syscall_pre_sched_yield() \
__sanitizer_syscall_pre_impl_sched_yield()
-#define __sanitizer_syscall_post_sched_yield(res) \
+#define __sanitizer_syscall_post_sched_yield(res) \
__sanitizer_syscall_post_impl_sched_yield(res)
-#define __sanitizer_syscall_pre_sched_get_priority_max(policy) \
+#define __sanitizer_syscall_pre_sched_get_priority_max(policy) \
__sanitizer_syscall_pre_impl_sched_get_priority_max((long)(policy))
-#define __sanitizer_syscall_post_sched_get_priority_max(res, policy) \
+#define __sanitizer_syscall_post_sched_get_priority_max(res, policy) \
__sanitizer_syscall_post_impl_sched_get_priority_max(res, (long)(policy))
-#define __sanitizer_syscall_pre_sched_get_priority_min(policy) \
+#define __sanitizer_syscall_pre_sched_get_priority_min(policy) \
__sanitizer_syscall_pre_impl_sched_get_priority_min((long)(policy))
-#define __sanitizer_syscall_post_sched_get_priority_min(res, policy) \
+#define __sanitizer_syscall_post_sched_get_priority_min(res, policy) \
__sanitizer_syscall_post_impl_sched_get_priority_min(res, (long)(policy))
-#define __sanitizer_syscall_pre_sched_rr_get_interval(pid, interval) \
- __sanitizer_syscall_pre_impl_sched_rr_get_interval((long)(pid), \
+#define __sanitizer_syscall_pre_sched_rr_get_interval(pid, interval) \
+ __sanitizer_syscall_pre_impl_sched_rr_get_interval((long)(pid), \
(long)(interval))
-#define __sanitizer_syscall_post_sched_rr_get_interval(res, pid, interval) \
- __sanitizer_syscall_post_impl_sched_rr_get_interval(res, (long)(pid), \
+#define __sanitizer_syscall_post_sched_rr_get_interval(res, pid, interval) \
+ __sanitizer_syscall_post_impl_sched_rr_get_interval(res, (long)(pid), \
(long)(interval))
-#define __sanitizer_syscall_pre_setpriority(which, who, niceval) \
- __sanitizer_syscall_pre_impl_setpriority((long)(which), (long)(who), \
+#define __sanitizer_syscall_pre_setpriority(which, who, niceval) \
+ __sanitizer_syscall_pre_impl_setpriority((long)(which), (long)(who), \
(long)(niceval))
-#define __sanitizer_syscall_post_setpriority(res, which, who, niceval) \
- __sanitizer_syscall_post_impl_setpriority(res, (long)(which), (long)(who), \
+#define __sanitizer_syscall_post_setpriority(res, which, who, niceval) \
+ __sanitizer_syscall_post_impl_setpriority(res, (long)(which), (long)(who), \
(long)(niceval))
-#define __sanitizer_syscall_pre_getpriority(which, who) \
+#define __sanitizer_syscall_pre_getpriority(which, who) \
__sanitizer_syscall_pre_impl_getpriority((long)(which), (long)(who))
-#define __sanitizer_syscall_post_getpriority(res, which, who) \
+#define __sanitizer_syscall_post_getpriority(res, which, who) \
__sanitizer_syscall_post_impl_getpriority(res, (long)(which), (long)(who))
-#define __sanitizer_syscall_pre_shutdown(arg0, arg1) \
+#define __sanitizer_syscall_pre_shutdown(arg0, arg1) \
__sanitizer_syscall_pre_impl_shutdown((long)(arg0), (long)(arg1))
-#define __sanitizer_syscall_post_shutdown(res, arg0, arg1) \
+#define __sanitizer_syscall_post_shutdown(res, arg0, arg1) \
__sanitizer_syscall_post_impl_shutdown(res, (long)(arg0), (long)(arg1))
-#define __sanitizer_syscall_pre_reboot(magic1, magic2, cmd, arg) \
- __sanitizer_syscall_pre_impl_reboot((long)(magic1), (long)(magic2), \
+#define __sanitizer_syscall_pre_reboot(magic1, magic2, cmd, arg) \
+ __sanitizer_syscall_pre_impl_reboot((long)(magic1), (long)(magic2), \
(long)(cmd), (long)(arg))
-#define __sanitizer_syscall_post_reboot(res, magic1, magic2, cmd, arg) \
- __sanitizer_syscall_post_impl_reboot(res, (long)(magic1), (long)(magic2), \
+#define __sanitizer_syscall_post_reboot(res, magic1, magic2, cmd, arg) \
+ __sanitizer_syscall_post_impl_reboot(res, (long)(magic1), (long)(magic2), \
(long)(cmd), (long)(arg))
-#define __sanitizer_syscall_pre_restart_syscall() \
+#define __sanitizer_syscall_pre_restart_syscall() \
__sanitizer_syscall_pre_impl_restart_syscall()
-#define __sanitizer_syscall_post_restart_syscall(res) \
+#define __sanitizer_syscall_post_restart_syscall(res) \
__sanitizer_syscall_post_impl_restart_syscall(res)
-#define __sanitizer_syscall_pre_kexec_load(entry, nr_segments, segments, \
- flags) \
- __sanitizer_syscall_pre_impl_kexec_load((long)(entry), (long)(nr_segments), \
+#define __sanitizer_syscall_pre_kexec_load(entry, nr_segments, segments, \
+ flags) \
+ __sanitizer_syscall_pre_impl_kexec_load((long)(entry), (long)(nr_segments), \
(long)(segments), (long)(flags))
#define __sanitizer_syscall_post_kexec_load(res, entry, nr_segments, segments, \
flags) \
__sanitizer_syscall_post_impl_kexec_load(res, (long)(entry), \
(long)(nr_segments), \
(long)(segments), (long)(flags))
-#define __sanitizer_syscall_pre_exit(error_code) \
+#define __sanitizer_syscall_pre_exit(error_code) \
__sanitizer_syscall_pre_impl_exit((long)(error_code))
-#define __sanitizer_syscall_post_exit(res, error_code) \
+#define __sanitizer_syscall_post_exit(res, error_code) \
__sanitizer_syscall_post_impl_exit(res, (long)(error_code))
-#define __sanitizer_syscall_pre_exit_group(error_code) \
+#define __sanitizer_syscall_pre_exit_group(error_code) \
__sanitizer_syscall_pre_impl_exit_group((long)(error_code))
-#define __sanitizer_syscall_post_exit_group(res, error_code) \
+#define __sanitizer_syscall_post_exit_group(res, error_code) \
__sanitizer_syscall_post_impl_exit_group(res, (long)(error_code))
-#define __sanitizer_syscall_pre_wait4(pid, stat_addr, options, ru) \
- __sanitizer_syscall_pre_impl_wait4((long)(pid), (long)(stat_addr), \
+#define __sanitizer_syscall_pre_wait4(pid, stat_addr, options, ru) \
+ __sanitizer_syscall_pre_impl_wait4((long)(pid), (long)(stat_addr), \
(long)(options), (long)(ru))
-#define __sanitizer_syscall_post_wait4(res, pid, stat_addr, options, ru) \
- __sanitizer_syscall_post_impl_wait4(res, (long)(pid), (long)(stat_addr), \
+#define __sanitizer_syscall_post_wait4(res, pid, stat_addr, options, ru) \
+ __sanitizer_syscall_post_impl_wait4(res, (long)(pid), (long)(stat_addr), \
(long)(options), (long)(ru))
-#define __sanitizer_syscall_pre_waitid(which, pid, infop, options, ru) \
- __sanitizer_syscall_pre_impl_waitid( \
+#define __sanitizer_syscall_pre_waitid(which, pid, infop, options, ru) \
+ __sanitizer_syscall_pre_impl_waitid( \
(long)(which), (long)(pid), (long)(infop), (long)(options), (long)(ru))
-#define __sanitizer_syscall_post_waitid(res, which, pid, infop, options, ru) \
- __sanitizer_syscall_post_impl_waitid(res, (long)(which), (long)(pid), \
- (long)(infop), (long)(options), \
+#define __sanitizer_syscall_post_waitid(res, which, pid, infop, options, ru) \
+ __sanitizer_syscall_post_impl_waitid(res, (long)(which), (long)(pid), \
+ (long)(infop), (long)(options), \
(long)(ru))
-#define __sanitizer_syscall_pre_waitpid(pid, stat_addr, options) \
- __sanitizer_syscall_pre_impl_waitpid((long)(pid), (long)(stat_addr), \
+#define __sanitizer_syscall_pre_waitpid(pid, stat_addr, options) \
+ __sanitizer_syscall_pre_impl_waitpid((long)(pid), (long)(stat_addr), \
(long)(options))
-#define __sanitizer_syscall_post_waitpid(res, pid, stat_addr, options) \
- __sanitizer_syscall_post_impl_waitpid(res, (long)(pid), (long)(stat_addr), \
+#define __sanitizer_syscall_post_waitpid(res, pid, stat_addr, options) \
+ __sanitizer_syscall_post_impl_waitpid(res, (long)(pid), (long)(stat_addr), \
(long)(options))
-#define __sanitizer_syscall_pre_set_tid_address(tidptr) \
+#define __sanitizer_syscall_pre_set_tid_address(tidptr) \
__sanitizer_syscall_pre_impl_set_tid_address((long)(tidptr))
-#define __sanitizer_syscall_post_set_tid_address(res, tidptr) \
+#define __sanitizer_syscall_post_set_tid_address(res, tidptr) \
__sanitizer_syscall_post_impl_set_tid_address(res, (long)(tidptr))
-#define __sanitizer_syscall_pre_init_module(umod, len, uargs) \
- __sanitizer_syscall_pre_impl_init_module((long)(umod), (long)(len), \
+#define __sanitizer_syscall_pre_init_module(umod, len, uargs) \
+ __sanitizer_syscall_pre_impl_init_module((long)(umod), (long)(len), \
(long)(uargs))
-#define __sanitizer_syscall_post_init_module(res, umod, len, uargs) \
- __sanitizer_syscall_post_impl_init_module(res, (long)(umod), (long)(len), \
+#define __sanitizer_syscall_post_init_module(res, umod, len, uargs) \
+ __sanitizer_syscall_post_impl_init_module(res, (long)(umod), (long)(len), \
(long)(uargs))
-#define __sanitizer_syscall_pre_delete_module(name_user, flags) \
+#define __sanitizer_syscall_pre_delete_module(name_user, flags) \
__sanitizer_syscall_pre_impl_delete_module((long)(name_user), (long)(flags))
-#define __sanitizer_syscall_post_delete_module(res, name_user, flags) \
- __sanitizer_syscall_post_impl_delete_module(res, (long)(name_user), \
+#define __sanitizer_syscall_post_delete_module(res, name_user, flags) \
+ __sanitizer_syscall_post_impl_delete_module(res, (long)(name_user), \
(long)(flags))
-#define __sanitizer_syscall_pre_rt_sigprocmask(how, set, oset, sigsetsize) \
- __sanitizer_syscall_pre_impl_rt_sigprocmask( \
+#define __sanitizer_syscall_pre_rt_sigprocmask(how, set, oset, sigsetsize) \
+ __sanitizer_syscall_pre_impl_rt_sigprocmask( \
(long)(how), (long)(set), (long)(oset), (long)(sigsetsize))
-#define __sanitizer_syscall_post_rt_sigprocmask(res, how, set, oset, \
- sigsetsize) \
- __sanitizer_syscall_post_impl_rt_sigprocmask( \
+#define __sanitizer_syscall_post_rt_sigprocmask(res, how, set, oset, \
+ sigsetsize) \
+ __sanitizer_syscall_post_impl_rt_sigprocmask( \
res, (long)(how), (long)(set), (long)(oset), (long)(sigsetsize))
-#define __sanitizer_syscall_pre_rt_sigpending(set, sigsetsize) \
+#define __sanitizer_syscall_pre_rt_sigpending(set, sigsetsize) \
__sanitizer_syscall_pre_impl_rt_sigpending((long)(set), (long)(sigsetsize))
-#define __sanitizer_syscall_post_rt_sigpending(res, set, sigsetsize) \
- __sanitizer_syscall_post_impl_rt_sigpending(res, (long)(set), \
+#define __sanitizer_syscall_post_rt_sigpending(res, set, sigsetsize) \
+ __sanitizer_syscall_post_impl_rt_sigpending(res, (long)(set), \
(long)(sigsetsize))
-#define __sanitizer_syscall_pre_rt_sigtimedwait(uthese, uinfo, uts, \
- sigsetsize) \
- __sanitizer_syscall_pre_impl_rt_sigtimedwait( \
+#define __sanitizer_syscall_pre_rt_sigtimedwait(uthese, uinfo, uts, \
+ sigsetsize) \
+ __sanitizer_syscall_pre_impl_rt_sigtimedwait( \
(long)(uthese), (long)(uinfo), (long)(uts), (long)(sigsetsize))
-#define __sanitizer_syscall_post_rt_sigtimedwait(res, uthese, uinfo, uts, \
- sigsetsize) \
- __sanitizer_syscall_post_impl_rt_sigtimedwait( \
+#define __sanitizer_syscall_post_rt_sigtimedwait(res, uthese, uinfo, uts, \
+ sigsetsize) \
+ __sanitizer_syscall_post_impl_rt_sigtimedwait( \
res, (long)(uthese), (long)(uinfo), (long)(uts), (long)(sigsetsize))
-#define __sanitizer_syscall_pre_rt_tgsigqueueinfo(tgid, pid, sig, uinfo) \
- __sanitizer_syscall_pre_impl_rt_tgsigqueueinfo((long)(tgid), (long)(pid), \
+#define __sanitizer_syscall_pre_rt_tgsigqueueinfo(tgid, pid, sig, uinfo) \
+ __sanitizer_syscall_pre_impl_rt_tgsigqueueinfo((long)(tgid), (long)(pid), \
(long)(sig), (long)(uinfo))
#define __sanitizer_syscall_post_rt_tgsigqueueinfo(res, tgid, pid, sig, uinfo) \
__sanitizer_syscall_post_impl_rt_tgsigqueueinfo( \
res, (long)(tgid), (long)(pid), (long)(sig), (long)(uinfo))
-#define __sanitizer_syscall_pre_kill(pid, sig) \
+#define __sanitizer_syscall_pre_kill(pid, sig) \
__sanitizer_syscall_pre_impl_kill((long)(pid), (long)(sig))
-#define __sanitizer_syscall_post_kill(res, pid, sig) \
+#define __sanitizer_syscall_post_kill(res, pid, sig) \
__sanitizer_syscall_post_impl_kill(res, (long)(pid), (long)(sig))
-#define __sanitizer_syscall_pre_tgkill(tgid, pid, sig) \
+#define __sanitizer_syscall_pre_tgkill(tgid, pid, sig) \
__sanitizer_syscall_pre_impl_tgkill((long)(tgid), (long)(pid), (long)(sig))
-#define __sanitizer_syscall_post_tgkill(res, tgid, pid, sig) \
- __sanitizer_syscall_post_impl_tgkill(res, (long)(tgid), (long)(pid), \
+#define __sanitizer_syscall_post_tgkill(res, tgid, pid, sig) \
+ __sanitizer_syscall_post_impl_tgkill(res, (long)(tgid), (long)(pid), \
(long)(sig))
-#define __sanitizer_syscall_pre_tkill(pid, sig) \
+#define __sanitizer_syscall_pre_tkill(pid, sig) \
__sanitizer_syscall_pre_impl_tkill((long)(pid), (long)(sig))
-#define __sanitizer_syscall_post_tkill(res, pid, sig) \
+#define __sanitizer_syscall_post_tkill(res, pid, sig) \
__sanitizer_syscall_post_impl_tkill(res, (long)(pid), (long)(sig))
-#define __sanitizer_syscall_pre_rt_sigqueueinfo(pid, sig, uinfo) \
- __sanitizer_syscall_pre_impl_rt_sigqueueinfo((long)(pid), (long)(sig), \
+#define __sanitizer_syscall_pre_rt_sigqueueinfo(pid, sig, uinfo) \
+ __sanitizer_syscall_pre_impl_rt_sigqueueinfo((long)(pid), (long)(sig), \
(long)(uinfo))
#define __sanitizer_syscall_post_rt_sigqueueinfo(res, pid, sig, uinfo) \
__sanitizer_syscall_post_impl_rt_sigqueueinfo(res, (long)(pid), (long)(sig), \
(long)(uinfo))
-#define __sanitizer_syscall_pre_sgetmask() \
+#define __sanitizer_syscall_pre_sgetmask() \
__sanitizer_syscall_pre_impl_sgetmask()
-#define __sanitizer_syscall_post_sgetmask(res) \
+#define __sanitizer_syscall_post_sgetmask(res) \
__sanitizer_syscall_post_impl_sgetmask(res)
-#define __sanitizer_syscall_pre_ssetmask(newmask) \
+#define __sanitizer_syscall_pre_ssetmask(newmask) \
__sanitizer_syscall_pre_impl_ssetmask((long)(newmask))
-#define __sanitizer_syscall_post_ssetmask(res, newmask) \
+#define __sanitizer_syscall_post_ssetmask(res, newmask) \
__sanitizer_syscall_post_impl_ssetmask(res, (long)(newmask))
-#define __sanitizer_syscall_pre_signal(sig, handler) \
+#define __sanitizer_syscall_pre_signal(sig, handler) \
__sanitizer_syscall_pre_impl_signal((long)(sig), (long)(handler))
-#define __sanitizer_syscall_post_signal(res, sig, handler) \
+#define __sanitizer_syscall_post_signal(res, sig, handler) \
__sanitizer_syscall_post_impl_signal(res, (long)(sig), (long)(handler))
#define __sanitizer_syscall_pre_pause() __sanitizer_syscall_pre_impl_pause()
-#define __sanitizer_syscall_post_pause(res) \
+#define __sanitizer_syscall_post_pause(res) \
__sanitizer_syscall_post_impl_pause(res)
#define __sanitizer_syscall_pre_sync() __sanitizer_syscall_pre_impl_sync()
-#define __sanitizer_syscall_post_sync(res) \
+#define __sanitizer_syscall_post_sync(res) \
__sanitizer_syscall_post_impl_sync(res)
-#define __sanitizer_syscall_pre_fsync(fd) \
+#define __sanitizer_syscall_pre_fsync(fd) \
__sanitizer_syscall_pre_impl_fsync((long)(fd))
-#define __sanitizer_syscall_post_fsync(res, fd) \
+#define __sanitizer_syscall_post_fsync(res, fd) \
__sanitizer_syscall_post_impl_fsync(res, (long)(fd))
-#define __sanitizer_syscall_pre_fdatasync(fd) \
+#define __sanitizer_syscall_pre_fdatasync(fd) \
__sanitizer_syscall_pre_impl_fdatasync((long)(fd))
-#define __sanitizer_syscall_post_fdatasync(res, fd) \
+#define __sanitizer_syscall_post_fdatasync(res, fd) \
__sanitizer_syscall_post_impl_fdatasync(res, (long)(fd))
-#define __sanitizer_syscall_pre_bdflush(func, data) \
+#define __sanitizer_syscall_pre_bdflush(func, data) \
__sanitizer_syscall_pre_impl_bdflush((long)(func), (long)(data))
-#define __sanitizer_syscall_post_bdflush(res, func, data) \
+#define __sanitizer_syscall_post_bdflush(res, func, data) \
__sanitizer_syscall_post_impl_bdflush(res, (long)(func), (long)(data))
-#define __sanitizer_syscall_pre_mount(dev_name, dir_name, type, flags, data) \
- __sanitizer_syscall_pre_impl_mount((long)(dev_name), (long)(dir_name), \
- (long)(type), (long)(flags), \
+#define __sanitizer_syscall_pre_mount(dev_name, dir_name, type, flags, data) \
+ __sanitizer_syscall_pre_impl_mount((long)(dev_name), (long)(dir_name), \
+ (long)(type), (long)(flags), \
(long)(data))
#define __sanitizer_syscall_post_mount(res, dev_name, dir_name, type, flags, \
data) \
__sanitizer_syscall_post_impl_mount(res, (long)(dev_name), (long)(dir_name), \
(long)(type), (long)(flags), \
(long)(data))
-#define __sanitizer_syscall_pre_umount(name, flags) \
+#define __sanitizer_syscall_pre_umount(name, flags) \
__sanitizer_syscall_pre_impl_umount((long)(name), (long)(flags))
-#define __sanitizer_syscall_post_umount(res, name, flags) \
+#define __sanitizer_syscall_post_umount(res, name, flags) \
__sanitizer_syscall_post_impl_umount(res, (long)(name), (long)(flags))
-#define __sanitizer_syscall_pre_oldumount(name) \
+#define __sanitizer_syscall_pre_oldumount(name) \
__sanitizer_syscall_pre_impl_oldumount((long)(name))
-#define __sanitizer_syscall_post_oldumount(res, name) \
+#define __sanitizer_syscall_post_oldumount(res, name) \
__sanitizer_syscall_post_impl_oldumount(res, (long)(name))
-#define __sanitizer_syscall_pre_truncate(path, length) \
+#define __sanitizer_syscall_pre_truncate(path, length) \
__sanitizer_syscall_pre_impl_truncate((long)(path), (long)(length))
-#define __sanitizer_syscall_post_truncate(res, path, length) \
+#define __sanitizer_syscall_post_truncate(res, path, length) \
__sanitizer_syscall_post_impl_truncate(res, (long)(path), (long)(length))
-#define __sanitizer_syscall_pre_ftruncate(fd, length) \
+#define __sanitizer_syscall_pre_ftruncate(fd, length) \
__sanitizer_syscall_pre_impl_ftruncate((long)(fd), (long)(length))
-#define __sanitizer_syscall_post_ftruncate(res, fd, length) \
+#define __sanitizer_syscall_post_ftruncate(res, fd, length) \
__sanitizer_syscall_post_impl_ftruncate(res, (long)(fd), (long)(length))
-#define __sanitizer_syscall_pre_stat(filename, statbuf) \
+#define __sanitizer_syscall_pre_stat(filename, statbuf) \
__sanitizer_syscall_pre_impl_stat((long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_post_stat(res, filename, statbuf) \
+#define __sanitizer_syscall_post_stat(res, filename, statbuf) \
__sanitizer_syscall_post_impl_stat(res, (long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_pre_statfs(path, buf) \
+#define __sanitizer_syscall_pre_statfs(path, buf) \
__sanitizer_syscall_pre_impl_statfs((long)(path), (long)(buf))
-#define __sanitizer_syscall_post_statfs(res, path, buf) \
+#define __sanitizer_syscall_post_statfs(res, path, buf) \
__sanitizer_syscall_post_impl_statfs(res, (long)(path), (long)(buf))
-#define __sanitizer_syscall_pre_statfs64(path, sz, buf) \
+#define __sanitizer_syscall_pre_statfs64(path, sz, buf) \
__sanitizer_syscall_pre_impl_statfs64((long)(path), (long)(sz), (long)(buf))
-#define __sanitizer_syscall_post_statfs64(res, path, sz, buf) \
- __sanitizer_syscall_post_impl_statfs64(res, (long)(path), (long)(sz), \
+#define __sanitizer_syscall_post_statfs64(res, path, sz, buf) \
+ __sanitizer_syscall_post_impl_statfs64(res, (long)(path), (long)(sz), \
(long)(buf))
-#define __sanitizer_syscall_pre_fstatfs(fd, buf) \
+#define __sanitizer_syscall_pre_fstatfs(fd, buf) \
__sanitizer_syscall_pre_impl_fstatfs((long)(fd), (long)(buf))
-#define __sanitizer_syscall_post_fstatfs(res, fd, buf) \
+#define __sanitizer_syscall_post_fstatfs(res, fd, buf) \
__sanitizer_syscall_post_impl_fstatfs(res, (long)(fd), (long)(buf))
-#define __sanitizer_syscall_pre_fstatfs64(fd, sz, buf) \
+#define __sanitizer_syscall_pre_fstatfs64(fd, sz, buf) \
__sanitizer_syscall_pre_impl_fstatfs64((long)(fd), (long)(sz), (long)(buf))
-#define __sanitizer_syscall_post_fstatfs64(res, fd, sz, buf) \
- __sanitizer_syscall_post_impl_fstatfs64(res, (long)(fd), (long)(sz), \
+#define __sanitizer_syscall_post_fstatfs64(res, fd, sz, buf) \
+ __sanitizer_syscall_post_impl_fstatfs64(res, (long)(fd), (long)(sz), \
(long)(buf))
-#define __sanitizer_syscall_pre_lstat(filename, statbuf) \
+#define __sanitizer_syscall_pre_lstat(filename, statbuf) \
__sanitizer_syscall_pre_impl_lstat((long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_post_lstat(res, filename, statbuf) \
+#define __sanitizer_syscall_post_lstat(res, filename, statbuf) \
__sanitizer_syscall_post_impl_lstat(res, (long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_pre_fstat(fd, statbuf) \
+#define __sanitizer_syscall_pre_fstat(fd, statbuf) \
__sanitizer_syscall_pre_impl_fstat((long)(fd), (long)(statbuf))
-#define __sanitizer_syscall_post_fstat(res, fd, statbuf) \
+#define __sanitizer_syscall_post_fstat(res, fd, statbuf) \
__sanitizer_syscall_post_impl_fstat(res, (long)(fd), (long)(statbuf))
-#define __sanitizer_syscall_pre_newstat(filename, statbuf) \
+#define __sanitizer_syscall_pre_newstat(filename, statbuf) \
__sanitizer_syscall_pre_impl_newstat((long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_post_newstat(res, filename, statbuf) \
+#define __sanitizer_syscall_post_newstat(res, filename, statbuf) \
__sanitizer_syscall_post_impl_newstat(res, (long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_pre_newlstat(filename, statbuf) \
+#define __sanitizer_syscall_pre_newlstat(filename, statbuf) \
__sanitizer_syscall_pre_impl_newlstat((long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_post_newlstat(res, filename, statbuf) \
+#define __sanitizer_syscall_post_newlstat(res, filename, statbuf) \
__sanitizer_syscall_post_impl_newlstat(res, (long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_pre_newfstat(fd, statbuf) \
+#define __sanitizer_syscall_pre_newfstat(fd, statbuf) \
__sanitizer_syscall_pre_impl_newfstat((long)(fd), (long)(statbuf))
-#define __sanitizer_syscall_post_newfstat(res, fd, statbuf) \
+#define __sanitizer_syscall_post_newfstat(res, fd, statbuf) \
__sanitizer_syscall_post_impl_newfstat(res, (long)(fd), (long)(statbuf))
-#define __sanitizer_syscall_pre_ustat(dev, ubuf) \
+#define __sanitizer_syscall_pre_ustat(dev, ubuf) \
__sanitizer_syscall_pre_impl_ustat((long)(dev), (long)(ubuf))
-#define __sanitizer_syscall_post_ustat(res, dev, ubuf) \
+#define __sanitizer_syscall_post_ustat(res, dev, ubuf) \
__sanitizer_syscall_post_impl_ustat(res, (long)(dev), (long)(ubuf))
-#define __sanitizer_syscall_pre_stat64(filename, statbuf) \
+#define __sanitizer_syscall_pre_stat64(filename, statbuf) \
__sanitizer_syscall_pre_impl_stat64((long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_post_stat64(res, filename, statbuf) \
+#define __sanitizer_syscall_post_stat64(res, filename, statbuf) \
__sanitizer_syscall_post_impl_stat64(res, (long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_pre_fstat64(fd, statbuf) \
+#define __sanitizer_syscall_pre_fstat64(fd, statbuf) \
__sanitizer_syscall_pre_impl_fstat64((long)(fd), (long)(statbuf))
-#define __sanitizer_syscall_post_fstat64(res, fd, statbuf) \
+#define __sanitizer_syscall_post_fstat64(res, fd, statbuf) \
__sanitizer_syscall_post_impl_fstat64(res, (long)(fd), (long)(statbuf))
-#define __sanitizer_syscall_pre_lstat64(filename, statbuf) \
+#define __sanitizer_syscall_pre_lstat64(filename, statbuf) \
__sanitizer_syscall_pre_impl_lstat64((long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_post_lstat64(res, filename, statbuf) \
+#define __sanitizer_syscall_post_lstat64(res, filename, statbuf) \
__sanitizer_syscall_post_impl_lstat64(res, (long)(filename), (long)(statbuf))
-#define __sanitizer_syscall_pre_setxattr(path, name, value, size, flags) \
- __sanitizer_syscall_pre_impl_setxattr( \
+#define __sanitizer_syscall_pre_setxattr(path, name, value, size, flags) \
+ __sanitizer_syscall_pre_impl_setxattr( \
(long)(path), (long)(name), (long)(value), (long)(size), (long)(flags))
#define __sanitizer_syscall_post_setxattr(res, path, name, value, size, flags) \
__sanitizer_syscall_post_impl_setxattr(res, (long)(path), (long)(name), \
(long)(value), (long)(size), \
(long)(flags))
-#define __sanitizer_syscall_pre_lsetxattr(path, name, value, size, flags) \
- __sanitizer_syscall_pre_impl_lsetxattr( \
+#define __sanitizer_syscall_pre_lsetxattr(path, name, value, size, flags) \
+ __sanitizer_syscall_pre_impl_lsetxattr( \
(long)(path), (long)(name), (long)(value), (long)(size), (long)(flags))
-#define __sanitizer_syscall_post_lsetxattr(res, path, name, value, size, \
- flags) \
- __sanitizer_syscall_post_impl_lsetxattr(res, (long)(path), (long)(name), \
- (long)(value), (long)(size), \
+#define __sanitizer_syscall_post_lsetxattr(res, path, name, value, size, \
+ flags) \
+ __sanitizer_syscall_post_impl_lsetxattr(res, (long)(path), (long)(name), \
+ (long)(value), (long)(size), \
(long)(flags))
-#define __sanitizer_syscall_pre_fsetxattr(fd, name, value, size, flags) \
- __sanitizer_syscall_pre_impl_fsetxattr( \
+#define __sanitizer_syscall_pre_fsetxattr(fd, name, value, size, flags) \
+ __sanitizer_syscall_pre_impl_fsetxattr( \
(long)(fd), (long)(name), (long)(value), (long)(size), (long)(flags))
-#define __sanitizer_syscall_post_fsetxattr(res, fd, name, value, size, flags) \
- __sanitizer_syscall_post_impl_fsetxattr(res, (long)(fd), (long)(name), \
- (long)(value), (long)(size), \
+#define __sanitizer_syscall_post_fsetxattr(res, fd, name, value, size, flags) \
+ __sanitizer_syscall_post_impl_fsetxattr(res, (long)(fd), (long)(name), \
+ (long)(value), (long)(size), \
(long)(flags))
-#define __sanitizer_syscall_pre_getxattr(path, name, value, size) \
- __sanitizer_syscall_pre_impl_getxattr((long)(path), (long)(name), \
+#define __sanitizer_syscall_pre_getxattr(path, name, value, size) \
+ __sanitizer_syscall_pre_impl_getxattr((long)(path), (long)(name), \
(long)(value), (long)(size))
-#define __sanitizer_syscall_post_getxattr(res, path, name, value, size) \
- __sanitizer_syscall_post_impl_getxattr(res, (long)(path), (long)(name), \
+#define __sanitizer_syscall_post_getxattr(res, path, name, value, size) \
+ __sanitizer_syscall_post_impl_getxattr(res, (long)(path), (long)(name), \
(long)(value), (long)(size))
-#define __sanitizer_syscall_pre_lgetxattr(path, name, value, size) \
- __sanitizer_syscall_pre_impl_lgetxattr((long)(path), (long)(name), \
+#define __sanitizer_syscall_pre_lgetxattr(path, name, value, size) \
+ __sanitizer_syscall_pre_impl_lgetxattr((long)(path), (long)(name), \
(long)(value), (long)(size))
-#define __sanitizer_syscall_post_lgetxattr(res, path, name, value, size) \
- __sanitizer_syscall_post_impl_lgetxattr(res, (long)(path), (long)(name), \
+#define __sanitizer_syscall_post_lgetxattr(res, path, name, value, size) \
+ __sanitizer_syscall_post_impl_lgetxattr(res, (long)(path), (long)(name), \
(long)(value), (long)(size))
-#define __sanitizer_syscall_pre_fgetxattr(fd, name, value, size) \
- __sanitizer_syscall_pre_impl_fgetxattr((long)(fd), (long)(name), \
+#define __sanitizer_syscall_pre_fgetxattr(fd, name, value, size) \
+ __sanitizer_syscall_pre_impl_fgetxattr((long)(fd), (long)(name), \
(long)(value), (long)(size))
-#define __sanitizer_syscall_post_fgetxattr(res, fd, name, value, size) \
- __sanitizer_syscall_post_impl_fgetxattr(res, (long)(fd), (long)(name), \
+#define __sanitizer_syscall_post_fgetxattr(res, fd, name, value, size) \
+ __sanitizer_syscall_post_impl_fgetxattr(res, (long)(fd), (long)(name), \
(long)(value), (long)(size))
-#define __sanitizer_syscall_pre_listxattr(path, list, size) \
- __sanitizer_syscall_pre_impl_listxattr((long)(path), (long)(list), \
+#define __sanitizer_syscall_pre_listxattr(path, list, size) \
+ __sanitizer_syscall_pre_impl_listxattr((long)(path), (long)(list), \
(long)(size))
-#define __sanitizer_syscall_post_listxattr(res, path, list, size) \
- __sanitizer_syscall_post_impl_listxattr(res, (long)(path), (long)(list), \
+#define __sanitizer_syscall_post_listxattr(res, path, list, size) \
+ __sanitizer_syscall_post_impl_listxattr(res, (long)(path), (long)(list), \
(long)(size))
-#define __sanitizer_syscall_pre_llistxattr(path, list, size) \
- __sanitizer_syscall_pre_impl_llistxattr((long)(path), (long)(list), \
+#define __sanitizer_syscall_pre_llistxattr(path, list, size) \
+ __sanitizer_syscall_pre_impl_llistxattr((long)(path), (long)(list), \
(long)(size))
-#define __sanitizer_syscall_post_llistxattr(res, path, list, size) \
- __sanitizer_syscall_post_impl_llistxattr(res, (long)(path), (long)(list), \
+#define __sanitizer_syscall_post_llistxattr(res, path, list, size) \
+ __sanitizer_syscall_post_impl_llistxattr(res, (long)(path), (long)(list), \
(long)(size))
-#define __sanitizer_syscall_pre_flistxattr(fd, list, size) \
- __sanitizer_syscall_pre_impl_flistxattr((long)(fd), (long)(list), \
+#define __sanitizer_syscall_pre_flistxattr(fd, list, size) \
+ __sanitizer_syscall_pre_impl_flistxattr((long)(fd), (long)(list), \
(long)(size))
-#define __sanitizer_syscall_post_flistxattr(res, fd, list, size) \
- __sanitizer_syscall_post_impl_flistxattr(res, (long)(fd), (long)(list), \
+#define __sanitizer_syscall_post_flistxattr(res, fd, list, size) \
+ __sanitizer_syscall_post_impl_flistxattr(res, (long)(fd), (long)(list), \
(long)(size))
-#define __sanitizer_syscall_pre_removexattr(path, name) \
+#define __sanitizer_syscall_pre_removexattr(path, name) \
__sanitizer_syscall_pre_impl_removexattr((long)(path), (long)(name))
-#define __sanitizer_syscall_post_removexattr(res, path, name) \
+#define __sanitizer_syscall_post_removexattr(res, path, name) \
__sanitizer_syscall_post_impl_removexattr(res, (long)(path), (long)(name))
-#define __sanitizer_syscall_pre_lremovexattr(path, name) \
+#define __sanitizer_syscall_pre_lremovexattr(path, name) \
__sanitizer_syscall_pre_impl_lremovexattr((long)(path), (long)(name))
-#define __sanitizer_syscall_post_lremovexattr(res, path, name) \
+#define __sanitizer_syscall_post_lremovexattr(res, path, name) \
__sanitizer_syscall_post_impl_lremovexattr(res, (long)(path), (long)(name))
-#define __sanitizer_syscall_pre_fremovexattr(fd, name) \
+#define __sanitizer_syscall_pre_fremovexattr(fd, name) \
__sanitizer_syscall_pre_impl_fremovexattr((long)(fd), (long)(name))
-#define __sanitizer_syscall_post_fremovexattr(res, fd, name) \
+#define __sanitizer_syscall_post_fremovexattr(res, fd, name) \
__sanitizer_syscall_post_impl_fremovexattr(res, (long)(fd), (long)(name))
-#define __sanitizer_syscall_pre_brk(brk) \
+#define __sanitizer_syscall_pre_brk(brk) \
__sanitizer_syscall_pre_impl_brk((long)(brk))
-#define __sanitizer_syscall_post_brk(res, brk) \
+#define __sanitizer_syscall_post_brk(res, brk) \
__sanitizer_syscall_post_impl_brk(res, (long)(brk))
-#define __sanitizer_syscall_pre_mprotect(start, len, prot) \
- __sanitizer_syscall_pre_impl_mprotect((long)(start), (long)(len), \
+#define __sanitizer_syscall_pre_mprotect(start, len, prot) \
+ __sanitizer_syscall_pre_impl_mprotect((long)(start), (long)(len), \
(long)(prot))
-#define __sanitizer_syscall_post_mprotect(res, start, len, prot) \
- __sanitizer_syscall_post_impl_mprotect(res, (long)(start), (long)(len), \
+#define __sanitizer_syscall_post_mprotect(res, start, len, prot) \
+ __sanitizer_syscall_post_impl_mprotect(res, (long)(start), (long)(len), \
(long)(prot))
-#define __sanitizer_syscall_pre_mremap(addr, old_len, new_len, flags, \
- new_addr) \
- __sanitizer_syscall_pre_impl_mremap((long)(addr), (long)(old_len), \
- (long)(new_len), (long)(flags), \
+#define __sanitizer_syscall_pre_mremap(addr, old_len, new_len, flags, \
+ new_addr) \
+ __sanitizer_syscall_pre_impl_mremap((long)(addr), (long)(old_len), \
+ (long)(new_len), (long)(flags), \
(long)(new_addr))
-#define __sanitizer_syscall_post_mremap(res, addr, old_len, new_len, flags, \
- new_addr) \
- __sanitizer_syscall_post_impl_mremap(res, (long)(addr), (long)(old_len), \
- (long)(new_len), (long)(flags), \
+#define __sanitizer_syscall_post_mremap(res, addr, old_len, new_len, flags, \
+ new_addr) \
+ __sanitizer_syscall_post_impl_mremap(res, (long)(addr), (long)(old_len), \
+ (long)(new_len), (long)(flags), \
(long)(new_addr))
-#define __sanitizer_syscall_pre_remap_file_pages(start, size, prot, pgoff, \
- flags) \
- __sanitizer_syscall_pre_impl_remap_file_pages( \
+#define __sanitizer_syscall_pre_remap_file_pages(start, size, prot, pgoff, \
+ flags) \
+ __sanitizer_syscall_pre_impl_remap_file_pages( \
(long)(start), (long)(size), (long)(prot), (long)(pgoff), (long)(flags))
-#define __sanitizer_syscall_post_remap_file_pages(res, start, size, prot, \
- pgoff, flags) \
- __sanitizer_syscall_post_impl_remap_file_pages(res, (long)(start), \
- (long)(size), (long)(prot), \
+#define __sanitizer_syscall_post_remap_file_pages(res, start, size, prot, \
+ pgoff, flags) \
+ __sanitizer_syscall_post_impl_remap_file_pages(res, (long)(start), \
+ (long)(size), (long)(prot), \
(long)(pgoff), (long)(flags))
-#define __sanitizer_syscall_pre_msync(start, len, flags) \
+#define __sanitizer_syscall_pre_msync(start, len, flags) \
__sanitizer_syscall_pre_impl_msync((long)(start), (long)(len), (long)(flags))
-#define __sanitizer_syscall_post_msync(res, start, len, flags) \
- __sanitizer_syscall_post_impl_msync(res, (long)(start), (long)(len), \
+#define __sanitizer_syscall_post_msync(res, start, len, flags) \
+ __sanitizer_syscall_post_impl_msync(res, (long)(start), (long)(len), \
(long)(flags))
-#define __sanitizer_syscall_pre_munmap(addr, len) \
+#define __sanitizer_syscall_pre_munmap(addr, len) \
__sanitizer_syscall_pre_impl_munmap((long)(addr), (long)(len))
-#define __sanitizer_syscall_post_munmap(res, addr, len) \
+#define __sanitizer_syscall_post_munmap(res, addr, len) \
__sanitizer_syscall_post_impl_munmap(res, (long)(addr), (long)(len))
-#define __sanitizer_syscall_pre_mlock(start, len) \
+#define __sanitizer_syscall_pre_mlock(start, len) \
__sanitizer_syscall_pre_impl_mlock((long)(start), (long)(len))
-#define __sanitizer_syscall_post_mlock(res, start, len) \
+#define __sanitizer_syscall_post_mlock(res, start, len) \
__sanitizer_syscall_post_impl_mlock(res, (long)(start), (long)(len))
-#define __sanitizer_syscall_pre_munlock(start, len) \
+#define __sanitizer_syscall_pre_munlock(start, len) \
__sanitizer_syscall_pre_impl_munlock((long)(start), (long)(len))
-#define __sanitizer_syscall_post_munlock(res, start, len) \
+#define __sanitizer_syscall_post_munlock(res, start, len) \
__sanitizer_syscall_post_impl_munlock(res, (long)(start), (long)(len))
-#define __sanitizer_syscall_pre_mlockall(flags) \
+#define __sanitizer_syscall_pre_mlockall(flags) \
__sanitizer_syscall_pre_impl_mlockall((long)(flags))
-#define __sanitizer_syscall_post_mlockall(res, flags) \
+#define __sanitizer_syscall_post_mlockall(res, flags) \
__sanitizer_syscall_post_impl_mlockall(res, (long)(flags))
-#define __sanitizer_syscall_pre_munlockall() \
+#define __sanitizer_syscall_pre_munlockall() \
__sanitizer_syscall_pre_impl_munlockall()
-#define __sanitizer_syscall_post_munlockall(res) \
+#define __sanitizer_syscall_post_munlockall(res) \
__sanitizer_syscall_post_impl_munlockall(res)
-#define __sanitizer_syscall_pre_madvise(start, len, behavior) \
- __sanitizer_syscall_pre_impl_madvise((long)(start), (long)(len), \
+#define __sanitizer_syscall_pre_madvise(start, len, behavior) \
+ __sanitizer_syscall_pre_impl_madvise((long)(start), (long)(len), \
(long)(behavior))
-#define __sanitizer_syscall_post_madvise(res, start, len, behavior) \
- __sanitizer_syscall_post_impl_madvise(res, (long)(start), (long)(len), \
+#define __sanitizer_syscall_post_madvise(res, start, len, behavior) \
+ __sanitizer_syscall_post_impl_madvise(res, (long)(start), (long)(len), \
(long)(behavior))
-#define __sanitizer_syscall_pre_mincore(start, len, vec) \
+#define __sanitizer_syscall_pre_mincore(start, len, vec) \
__sanitizer_syscall_pre_impl_mincore((long)(start), (long)(len), (long)(vec))
-#define __sanitizer_syscall_post_mincore(res, start, len, vec) \
- __sanitizer_syscall_post_impl_mincore(res, (long)(start), (long)(len), \
+#define __sanitizer_syscall_post_mincore(res, start, len, vec) \
+ __sanitizer_syscall_post_impl_mincore(res, (long)(start), (long)(len), \
(long)(vec))
-#define __sanitizer_syscall_pre_pivot_root(new_root, put_old) \
+#define __sanitizer_syscall_pre_pivot_root(new_root, put_old) \
__sanitizer_syscall_pre_impl_pivot_root((long)(new_root), (long)(put_old))
-#define __sanitizer_syscall_post_pivot_root(res, new_root, put_old) \
- __sanitizer_syscall_post_impl_pivot_root(res, (long)(new_root), \
+#define __sanitizer_syscall_post_pivot_root(res, new_root, put_old) \
+ __sanitizer_syscall_post_impl_pivot_root(res, (long)(new_root), \
(long)(put_old))
-#define __sanitizer_syscall_pre_chroot(filename) \
+#define __sanitizer_syscall_pre_chroot(filename) \
__sanitizer_syscall_pre_impl_chroot((long)(filename))
-#define __sanitizer_syscall_post_chroot(res, filename) \
+#define __sanitizer_syscall_post_chroot(res, filename) \
__sanitizer_syscall_post_impl_chroot(res, (long)(filename))
-#define __sanitizer_syscall_pre_mknod(filename, mode, dev) \
- __sanitizer_syscall_pre_impl_mknod((long)(filename), (long)(mode), \
+#define __sanitizer_syscall_pre_mknod(filename, mode, dev) \
+ __sanitizer_syscall_pre_impl_mknod((long)(filename), (long)(mode), \
(long)(dev))
-#define __sanitizer_syscall_post_mknod(res, filename, mode, dev) \
- __sanitizer_syscall_post_impl_mknod(res, (long)(filename), (long)(mode), \
+#define __sanitizer_syscall_post_mknod(res, filename, mode, dev) \
+ __sanitizer_syscall_post_impl_mknod(res, (long)(filename), (long)(mode), \
(long)(dev))
-#define __sanitizer_syscall_pre_link(oldname, newname) \
+#define __sanitizer_syscall_pre_link(oldname, newname) \
__sanitizer_syscall_pre_impl_link((long)(oldname), (long)(newname))
-#define __sanitizer_syscall_post_link(res, oldname, newname) \
+#define __sanitizer_syscall_post_link(res, oldname, newname) \
__sanitizer_syscall_post_impl_link(res, (long)(oldname), (long)(newname))
-#define __sanitizer_syscall_pre_symlink(old, new_) \
+#define __sanitizer_syscall_pre_symlink(old, new_) \
__sanitizer_syscall_pre_impl_symlink((long)(old), (long)(new_))
-#define __sanitizer_syscall_post_symlink(res, old, new_) \
+#define __sanitizer_syscall_post_symlink(res, old, new_) \
__sanitizer_syscall_post_impl_symlink(res, (long)(old), (long)(new_))
-#define __sanitizer_syscall_pre_unlink(pathname) \
+#define __sanitizer_syscall_pre_unlink(pathname) \
__sanitizer_syscall_pre_impl_unlink((long)(pathname))
-#define __sanitizer_syscall_post_unlink(res, pathname) \
+#define __sanitizer_syscall_post_unlink(res, pathname) \
__sanitizer_syscall_post_impl_unlink(res, (long)(pathname))
-#define __sanitizer_syscall_pre_rename(oldname, newname) \
+#define __sanitizer_syscall_pre_rename(oldname, newname) \
__sanitizer_syscall_pre_impl_rename((long)(oldname), (long)(newname))
-#define __sanitizer_syscall_post_rename(res, oldname, newname) \
+#define __sanitizer_syscall_post_rename(res, oldname, newname) \
__sanitizer_syscall_post_impl_rename(res, (long)(oldname), (long)(newname))
-#define __sanitizer_syscall_pre_chmod(filename, mode) \
+#define __sanitizer_syscall_pre_chmod(filename, mode) \
__sanitizer_syscall_pre_impl_chmod((long)(filename), (long)(mode))
-#define __sanitizer_syscall_post_chmod(res, filename, mode) \
+#define __sanitizer_syscall_post_chmod(res, filename, mode) \
__sanitizer_syscall_post_impl_chmod(res, (long)(filename), (long)(mode))
-#define __sanitizer_syscall_pre_fchmod(fd, mode) \
+#define __sanitizer_syscall_pre_fchmod(fd, mode) \
__sanitizer_syscall_pre_impl_fchmod((long)(fd), (long)(mode))
-#define __sanitizer_syscall_post_fchmod(res, fd, mode) \
+#define __sanitizer_syscall_post_fchmod(res, fd, mode) \
__sanitizer_syscall_post_impl_fchmod(res, (long)(fd), (long)(mode))
-#define __sanitizer_syscall_pre_fcntl(fd, cmd, arg) \
+#define __sanitizer_syscall_pre_fcntl(fd, cmd, arg) \
__sanitizer_syscall_pre_impl_fcntl((long)(fd), (long)(cmd), (long)(arg))
-#define __sanitizer_syscall_post_fcntl(res, fd, cmd, arg) \
+#define __sanitizer_syscall_post_fcntl(res, fd, cmd, arg) \
__sanitizer_syscall_post_impl_fcntl(res, (long)(fd), (long)(cmd), (long)(arg))
-#define __sanitizer_syscall_pre_fcntl64(fd, cmd, arg) \
+#define __sanitizer_syscall_pre_fcntl64(fd, cmd, arg) \
__sanitizer_syscall_pre_impl_fcntl64((long)(fd), (long)(cmd), (long)(arg))
-#define __sanitizer_syscall_post_fcntl64(res, fd, cmd, arg) \
- __sanitizer_syscall_post_impl_fcntl64(res, (long)(fd), (long)(cmd), \
+#define __sanitizer_syscall_post_fcntl64(res, fd, cmd, arg) \
+ __sanitizer_syscall_post_impl_fcntl64(res, (long)(fd), (long)(cmd), \
(long)(arg))
-#define __sanitizer_syscall_pre_pipe(fildes) \
+#define __sanitizer_syscall_pre_pipe(fildes) \
__sanitizer_syscall_pre_impl_pipe((long)(fildes))
-#define __sanitizer_syscall_post_pipe(res, fildes) \
+#define __sanitizer_syscall_post_pipe(res, fildes) \
__sanitizer_syscall_post_impl_pipe(res, (long)(fildes))
-#define __sanitizer_syscall_pre_pipe2(fildes, flags) \
+#define __sanitizer_syscall_pre_pipe2(fildes, flags) \
__sanitizer_syscall_pre_impl_pipe2((long)(fildes), (long)(flags))
-#define __sanitizer_syscall_post_pipe2(res, fildes, flags) \
+#define __sanitizer_syscall_post_pipe2(res, fildes, flags) \
__sanitizer_syscall_post_impl_pipe2(res, (long)(fildes), (long)(flags))
-#define __sanitizer_syscall_pre_dup(fildes) \
+#define __sanitizer_syscall_pre_dup(fildes) \
__sanitizer_syscall_pre_impl_dup((long)(fildes))
-#define __sanitizer_syscall_post_dup(res, fildes) \
+#define __sanitizer_syscall_post_dup(res, fildes) \
__sanitizer_syscall_post_impl_dup(res, (long)(fildes))
-#define __sanitizer_syscall_pre_dup2(oldfd, newfd) \
+#define __sanitizer_syscall_pre_dup2(oldfd, newfd) \
__sanitizer_syscall_pre_impl_dup2((long)(oldfd), (long)(newfd))
-#define __sanitizer_syscall_post_dup2(res, oldfd, newfd) \
+#define __sanitizer_syscall_post_dup2(res, oldfd, newfd) \
__sanitizer_syscall_post_impl_dup2(res, (long)(oldfd), (long)(newfd))
-#define __sanitizer_syscall_pre_dup3(oldfd, newfd, flags) \
+#define __sanitizer_syscall_pre_dup3(oldfd, newfd, flags) \
__sanitizer_syscall_pre_impl_dup3((long)(oldfd), (long)(newfd), (long)(flags))
-#define __sanitizer_syscall_post_dup3(res, oldfd, newfd, flags) \
- __sanitizer_syscall_post_impl_dup3(res, (long)(oldfd), (long)(newfd), \
+#define __sanitizer_syscall_post_dup3(res, oldfd, newfd, flags) \
+ __sanitizer_syscall_post_impl_dup3(res, (long)(oldfd), (long)(newfd), \
(long)(flags))
-#define __sanitizer_syscall_pre_ioperm(from, num, on) \
+#define __sanitizer_syscall_pre_ioperm(from, num, on) \
__sanitizer_syscall_pre_impl_ioperm((long)(from), (long)(num), (long)(on))
-#define __sanitizer_syscall_post_ioperm(res, from, num, on) \
- __sanitizer_syscall_post_impl_ioperm(res, (long)(from), (long)(num), \
+#define __sanitizer_syscall_post_ioperm(res, from, num, on) \
+ __sanitizer_syscall_post_impl_ioperm(res, (long)(from), (long)(num), \
(long)(on))
-#define __sanitizer_syscall_pre_ioctl(fd, cmd, arg) \
+#define __sanitizer_syscall_pre_ioctl(fd, cmd, arg) \
__sanitizer_syscall_pre_impl_ioctl((long)(fd), (long)(cmd), (long)(arg))
-#define __sanitizer_syscall_post_ioctl(res, fd, cmd, arg) \
+#define __sanitizer_syscall_post_ioctl(res, fd, cmd, arg) \
__sanitizer_syscall_post_impl_ioctl(res, (long)(fd), (long)(cmd), (long)(arg))
-#define __sanitizer_syscall_pre_flock(fd, cmd) \
+#define __sanitizer_syscall_pre_flock(fd, cmd) \
__sanitizer_syscall_pre_impl_flock((long)(fd), (long)(cmd))
-#define __sanitizer_syscall_post_flock(res, fd, cmd) \
+#define __sanitizer_syscall_post_flock(res, fd, cmd) \
__sanitizer_syscall_post_impl_flock(res, (long)(fd), (long)(cmd))
-#define __sanitizer_syscall_pre_io_setup(nr_reqs, ctx) \
+#define __sanitizer_syscall_pre_io_setup(nr_reqs, ctx) \
__sanitizer_syscall_pre_impl_io_setup((long)(nr_reqs), (long)(ctx))
-#define __sanitizer_syscall_post_io_setup(res, nr_reqs, ctx) \
+#define __sanitizer_syscall_post_io_setup(res, nr_reqs, ctx) \
__sanitizer_syscall_post_impl_io_setup(res, (long)(nr_reqs), (long)(ctx))
-#define __sanitizer_syscall_pre_io_destroy(ctx) \
+#define __sanitizer_syscall_pre_io_destroy(ctx) \
__sanitizer_syscall_pre_impl_io_destroy((long)(ctx))
-#define __sanitizer_syscall_post_io_destroy(res, ctx) \
+#define __sanitizer_syscall_post_io_destroy(res, ctx) \
__sanitizer_syscall_post_impl_io_destroy(res, (long)(ctx))
-#define __sanitizer_syscall_pre_io_getevents(ctx_id, min_nr, nr, events, \
- timeout) \
- __sanitizer_syscall_pre_impl_io_getevents((long)(ctx_id), (long)(min_nr), \
- (long)(nr), (long)(events), \
+#define __sanitizer_syscall_pre_io_getevents(ctx_id, min_nr, nr, events, \
+ timeout) \
+ __sanitizer_syscall_pre_impl_io_getevents((long)(ctx_id), (long)(min_nr), \
+ (long)(nr), (long)(events), \
(long)(timeout))
#define __sanitizer_syscall_post_io_getevents(res, ctx_id, min_nr, nr, events, \
timeout) \
__sanitizer_syscall_post_impl_io_getevents(res, (long)(ctx_id), \
(long)(min_nr), (long)(nr), \
(long)(events), (long)(timeout))
-#define __sanitizer_syscall_pre_io_submit(ctx_id, arg1, arg2) \
- __sanitizer_syscall_pre_impl_io_submit((long)(ctx_id), (long)(arg1), \
+#define __sanitizer_syscall_pre_io_submit(ctx_id, arg1, arg2) \
+ __sanitizer_syscall_pre_impl_io_submit((long)(ctx_id), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_post_io_submit(res, ctx_id, arg1, arg2) \
- __sanitizer_syscall_post_impl_io_submit(res, (long)(ctx_id), (long)(arg1), \
+#define __sanitizer_syscall_post_io_submit(res, ctx_id, arg1, arg2) \
+ __sanitizer_syscall_post_impl_io_submit(res, (long)(ctx_id), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_io_cancel(ctx_id, iocb, result) \
- __sanitizer_syscall_pre_impl_io_cancel((long)(ctx_id), (long)(iocb), \
+#define __sanitizer_syscall_pre_io_cancel(ctx_id, iocb, result) \
+ __sanitizer_syscall_pre_impl_io_cancel((long)(ctx_id), (long)(iocb), \
(long)(result))
-#define __sanitizer_syscall_post_io_cancel(res, ctx_id, iocb, result) \
- __sanitizer_syscall_post_impl_io_cancel(res, (long)(ctx_id), (long)(iocb), \
+#define __sanitizer_syscall_post_io_cancel(res, ctx_id, iocb, result) \
+ __sanitizer_syscall_post_impl_io_cancel(res, (long)(ctx_id), (long)(iocb), \
(long)(result))
-#define __sanitizer_syscall_pre_sendfile(out_fd, in_fd, offset, count) \
- __sanitizer_syscall_pre_impl_sendfile((long)(out_fd), (long)(in_fd), \
+#define __sanitizer_syscall_pre_sendfile(out_fd, in_fd, offset, count) \
+ __sanitizer_syscall_pre_impl_sendfile((long)(out_fd), (long)(in_fd), \
(long)(offset), (long)(count))
-#define __sanitizer_syscall_post_sendfile(res, out_fd, in_fd, offset, count) \
- __sanitizer_syscall_post_impl_sendfile(res, (long)(out_fd), (long)(in_fd), \
+#define __sanitizer_syscall_post_sendfile(res, out_fd, in_fd, offset, count) \
+ __sanitizer_syscall_post_impl_sendfile(res, (long)(out_fd), (long)(in_fd), \
(long)(offset), (long)(count))
-#define __sanitizer_syscall_pre_sendfile64(out_fd, in_fd, offset, count) \
- __sanitizer_syscall_pre_impl_sendfile64((long)(out_fd), (long)(in_fd), \
+#define __sanitizer_syscall_pre_sendfile64(out_fd, in_fd, offset, count) \
+ __sanitizer_syscall_pre_impl_sendfile64((long)(out_fd), (long)(in_fd), \
(long)(offset), (long)(count))
#define __sanitizer_syscall_post_sendfile64(res, out_fd, in_fd, offset, count) \
__sanitizer_syscall_post_impl_sendfile64(res, (long)(out_fd), (long)(in_fd), \
(long)(offset), (long)(count))
-#define __sanitizer_syscall_pre_readlink(path, buf, bufsiz) \
- __sanitizer_syscall_pre_impl_readlink((long)(path), (long)(buf), \
+#define __sanitizer_syscall_pre_readlink(path, buf, bufsiz) \
+ __sanitizer_syscall_pre_impl_readlink((long)(path), (long)(buf), \
(long)(bufsiz))
-#define __sanitizer_syscall_post_readlink(res, path, buf, bufsiz) \
- __sanitizer_syscall_post_impl_readlink(res, (long)(path), (long)(buf), \
+#define __sanitizer_syscall_post_readlink(res, path, buf, bufsiz) \
+ __sanitizer_syscall_post_impl_readlink(res, (long)(path), (long)(buf), \
(long)(bufsiz))
-#define __sanitizer_syscall_pre_creat(pathname, mode) \
+#define __sanitizer_syscall_pre_creat(pathname, mode) \
__sanitizer_syscall_pre_impl_creat((long)(pathname), (long)(mode))
-#define __sanitizer_syscall_post_creat(res, pathname, mode) \
+#define __sanitizer_syscall_post_creat(res, pathname, mode) \
__sanitizer_syscall_post_impl_creat(res, (long)(pathname), (long)(mode))
-#define __sanitizer_syscall_pre_open(filename, flags, mode) \
- __sanitizer_syscall_pre_impl_open((long)(filename), (long)(flags), \
+#define __sanitizer_syscall_pre_open(filename, flags, mode) \
+ __sanitizer_syscall_pre_impl_open((long)(filename), (long)(flags), \
(long)(mode))
-#define __sanitizer_syscall_post_open(res, filename, flags, mode) \
- __sanitizer_syscall_post_impl_open(res, (long)(filename), (long)(flags), \
+#define __sanitizer_syscall_post_open(res, filename, flags, mode) \
+ __sanitizer_syscall_post_impl_open(res, (long)(filename), (long)(flags), \
(long)(mode))
-#define __sanitizer_syscall_pre_close(fd) \
+#define __sanitizer_syscall_pre_close(fd) \
__sanitizer_syscall_pre_impl_close((long)(fd))
-#define __sanitizer_syscall_post_close(res, fd) \
+#define __sanitizer_syscall_post_close(res, fd) \
__sanitizer_syscall_post_impl_close(res, (long)(fd))
-#define __sanitizer_syscall_pre_access(filename, mode) \
+#define __sanitizer_syscall_pre_access(filename, mode) \
__sanitizer_syscall_pre_impl_access((long)(filename), (long)(mode))
-#define __sanitizer_syscall_post_access(res, filename, mode) \
+#define __sanitizer_syscall_post_access(res, filename, mode) \
__sanitizer_syscall_post_impl_access(res, (long)(filename), (long)(mode))
#define __sanitizer_syscall_pre_vhangup() __sanitizer_syscall_pre_impl_vhangup()
-#define __sanitizer_syscall_post_vhangup(res) \
+#define __sanitizer_syscall_post_vhangup(res) \
__sanitizer_syscall_post_impl_vhangup(res)
-#define __sanitizer_syscall_pre_chown(filename, user, group) \
- __sanitizer_syscall_pre_impl_chown((long)(filename), (long)(user), \
+#define __sanitizer_syscall_pre_chown(filename, user, group) \
+ __sanitizer_syscall_pre_impl_chown((long)(filename), (long)(user), \
(long)(group))
-#define __sanitizer_syscall_post_chown(res, filename, user, group) \
- __sanitizer_syscall_post_impl_chown(res, (long)(filename), (long)(user), \
+#define __sanitizer_syscall_post_chown(res, filename, user, group) \
+ __sanitizer_syscall_post_impl_chown(res, (long)(filename), (long)(user), \
(long)(group))
-#define __sanitizer_syscall_pre_lchown(filename, user, group) \
- __sanitizer_syscall_pre_impl_lchown((long)(filename), (long)(user), \
+#define __sanitizer_syscall_pre_lchown(filename, user, group) \
+ __sanitizer_syscall_pre_impl_lchown((long)(filename), (long)(user), \
(long)(group))
-#define __sanitizer_syscall_post_lchown(res, filename, user, group) \
- __sanitizer_syscall_post_impl_lchown(res, (long)(filename), (long)(user), \
+#define __sanitizer_syscall_post_lchown(res, filename, user, group) \
+ __sanitizer_syscall_post_impl_lchown(res, (long)(filename), (long)(user), \
(long)(group))
-#define __sanitizer_syscall_pre_fchown(fd, user, group) \
+#define __sanitizer_syscall_pre_fchown(fd, user, group) \
__sanitizer_syscall_pre_impl_fchown((long)(fd), (long)(user), (long)(group))
-#define __sanitizer_syscall_post_fchown(res, fd, user, group) \
- __sanitizer_syscall_post_impl_fchown(res, (long)(fd), (long)(user), \
+#define __sanitizer_syscall_post_fchown(res, fd, user, group) \
+ __sanitizer_syscall_post_impl_fchown(res, (long)(fd), (long)(user), \
(long)(group))
-#define __sanitizer_syscall_pre_chown16(filename, user, group) \
- __sanitizer_syscall_pre_impl_chown16((long)(filename), (long)user, \
+#define __sanitizer_syscall_pre_chown16(filename, user, group) \
+ __sanitizer_syscall_pre_impl_chown16((long)(filename), (long)user, \
(long)group)
-#define __sanitizer_syscall_post_chown16(res, filename, user, group) \
- __sanitizer_syscall_post_impl_chown16(res, (long)(filename), (long)user, \
+#define __sanitizer_syscall_post_chown16(res, filename, user, group) \
+ __sanitizer_syscall_post_impl_chown16(res, (long)(filename), (long)user, \
(long)group)
-#define __sanitizer_syscall_pre_lchown16(filename, user, group) \
- __sanitizer_syscall_pre_impl_lchown16((long)(filename), (long)user, \
+#define __sanitizer_syscall_pre_lchown16(filename, user, group) \
+ __sanitizer_syscall_pre_impl_lchown16((long)(filename), (long)user, \
(long)group)
-#define __sanitizer_syscall_post_lchown16(res, filename, user, group) \
- __sanitizer_syscall_post_impl_lchown16(res, (long)(filename), (long)user, \
+#define __sanitizer_syscall_post_lchown16(res, filename, user, group) \
+ __sanitizer_syscall_post_impl_lchown16(res, (long)(filename), (long)user, \
(long)group)
-#define __sanitizer_syscall_pre_fchown16(fd, user, group) \
+#define __sanitizer_syscall_pre_fchown16(fd, user, group) \
__sanitizer_syscall_pre_impl_fchown16((long)(fd), (long)user, (long)group)
-#define __sanitizer_syscall_post_fchown16(res, fd, user, group) \
- __sanitizer_syscall_post_impl_fchown16(res, (long)(fd), (long)user, \
+#define __sanitizer_syscall_post_fchown16(res, fd, user, group) \
+ __sanitizer_syscall_post_impl_fchown16(res, (long)(fd), (long)user, \
(long)group)
-#define __sanitizer_syscall_pre_setregid16(rgid, egid) \
+#define __sanitizer_syscall_pre_setregid16(rgid, egid) \
__sanitizer_syscall_pre_impl_setregid16((long)rgid, (long)egid)
-#define __sanitizer_syscall_post_setregid16(res, rgid, egid) \
+#define __sanitizer_syscall_post_setregid16(res, rgid, egid) \
__sanitizer_syscall_post_impl_setregid16(res, (long)rgid, (long)egid)
-#define __sanitizer_syscall_pre_setgid16(gid) \
+#define __sanitizer_syscall_pre_setgid16(gid) \
__sanitizer_syscall_pre_impl_setgid16((long)gid)
-#define __sanitizer_syscall_post_setgid16(res, gid) \
+#define __sanitizer_syscall_post_setgid16(res, gid) \
__sanitizer_syscall_post_impl_setgid16(res, (long)gid)
-#define __sanitizer_syscall_pre_setreuid16(ruid, euid) \
+#define __sanitizer_syscall_pre_setreuid16(ruid, euid) \
__sanitizer_syscall_pre_impl_setreuid16((long)ruid, (long)euid)
-#define __sanitizer_syscall_post_setreuid16(res, ruid, euid) \
+#define __sanitizer_syscall_post_setreuid16(res, ruid, euid) \
__sanitizer_syscall_post_impl_setreuid16(res, (long)ruid, (long)euid)
-#define __sanitizer_syscall_pre_setuid16(uid) \
+#define __sanitizer_syscall_pre_setuid16(uid) \
__sanitizer_syscall_pre_impl_setuid16((long)uid)
-#define __sanitizer_syscall_post_setuid16(res, uid) \
+#define __sanitizer_syscall_post_setuid16(res, uid) \
__sanitizer_syscall_post_impl_setuid16(res, (long)uid)
-#define __sanitizer_syscall_pre_setresuid16(ruid, euid, suid) \
+#define __sanitizer_syscall_pre_setresuid16(ruid, euid, suid) \
__sanitizer_syscall_pre_impl_setresuid16((long)ruid, (long)euid, (long)suid)
-#define __sanitizer_syscall_post_setresuid16(res, ruid, euid, suid) \
- __sanitizer_syscall_post_impl_setresuid16(res, (long)ruid, (long)euid, \
+#define __sanitizer_syscall_post_setresuid16(res, ruid, euid, suid) \
+ __sanitizer_syscall_post_impl_setresuid16(res, (long)ruid, (long)euid, \
(long)suid)
-#define __sanitizer_syscall_pre_getresuid16(ruid, euid, suid) \
- __sanitizer_syscall_pre_impl_getresuid16((long)(ruid), (long)(euid), \
+#define __sanitizer_syscall_pre_getresuid16(ruid, euid, suid) \
+ __sanitizer_syscall_pre_impl_getresuid16((long)(ruid), (long)(euid), \
(long)(suid))
-#define __sanitizer_syscall_post_getresuid16(res, ruid, euid, suid) \
- __sanitizer_syscall_post_impl_getresuid16(res, (long)(ruid), (long)(euid), \
+#define __sanitizer_syscall_post_getresuid16(res, ruid, euid, suid) \
+ __sanitizer_syscall_post_impl_getresuid16(res, (long)(ruid), (long)(euid), \
(long)(suid))
-#define __sanitizer_syscall_pre_setresgid16(rgid, egid, sgid) \
+#define __sanitizer_syscall_pre_setresgid16(rgid, egid, sgid) \
__sanitizer_syscall_pre_impl_setresgid16((long)rgid, (long)egid, (long)sgid)
-#define __sanitizer_syscall_post_setresgid16(res, rgid, egid, sgid) \
- __sanitizer_syscall_post_impl_setresgid16(res, (long)rgid, (long)egid, \
+#define __sanitizer_syscall_post_setresgid16(res, rgid, egid, sgid) \
+ __sanitizer_syscall_post_impl_setresgid16(res, (long)rgid, (long)egid, \
(long)sgid)
-#define __sanitizer_syscall_pre_getresgid16(rgid, egid, sgid) \
- __sanitizer_syscall_pre_impl_getresgid16((long)(rgid), (long)(egid), \
+#define __sanitizer_syscall_pre_getresgid16(rgid, egid, sgid) \
+ __sanitizer_syscall_pre_impl_getresgid16((long)(rgid), (long)(egid), \
(long)(sgid))
-#define __sanitizer_syscall_post_getresgid16(res, rgid, egid, sgid) \
- __sanitizer_syscall_post_impl_getresgid16(res, (long)(rgid), (long)(egid), \
+#define __sanitizer_syscall_post_getresgid16(res, rgid, egid, sgid) \
+ __sanitizer_syscall_post_impl_getresgid16(res, (long)(rgid), (long)(egid), \
(long)(sgid))
-#define __sanitizer_syscall_pre_setfsuid16(uid) \
+#define __sanitizer_syscall_pre_setfsuid16(uid) \
__sanitizer_syscall_pre_impl_setfsuid16((long)uid)
-#define __sanitizer_syscall_post_setfsuid16(res, uid) \
+#define __sanitizer_syscall_post_setfsuid16(res, uid) \
__sanitizer_syscall_post_impl_setfsuid16(res, (long)uid)
-#define __sanitizer_syscall_pre_setfsgid16(gid) \
+#define __sanitizer_syscall_pre_setfsgid16(gid) \
__sanitizer_syscall_pre_impl_setfsgid16((long)gid)
-#define __sanitizer_syscall_post_setfsgid16(res, gid) \
+#define __sanitizer_syscall_post_setfsgid16(res, gid) \
__sanitizer_syscall_post_impl_setfsgid16(res, (long)gid)
-#define __sanitizer_syscall_pre_getgroups16(gidsetsize, grouplist) \
- __sanitizer_syscall_pre_impl_getgroups16((long)(gidsetsize), \
+#define __sanitizer_syscall_pre_getgroups16(gidsetsize, grouplist) \
+ __sanitizer_syscall_pre_impl_getgroups16((long)(gidsetsize), \
(long)(grouplist))
-#define __sanitizer_syscall_post_getgroups16(res, gidsetsize, grouplist) \
- __sanitizer_syscall_post_impl_getgroups16(res, (long)(gidsetsize), \
+#define __sanitizer_syscall_post_getgroups16(res, gidsetsize, grouplist) \
+ __sanitizer_syscall_post_impl_getgroups16(res, (long)(gidsetsize), \
(long)(grouplist))
-#define __sanitizer_syscall_pre_setgroups16(gidsetsize, grouplist) \
- __sanitizer_syscall_pre_impl_setgroups16((long)(gidsetsize), \
+#define __sanitizer_syscall_pre_setgroups16(gidsetsize, grouplist) \
+ __sanitizer_syscall_pre_impl_setgroups16((long)(gidsetsize), \
(long)(grouplist))
-#define __sanitizer_syscall_post_setgroups16(res, gidsetsize, grouplist) \
- __sanitizer_syscall_post_impl_setgroups16(res, (long)(gidsetsize), \
+#define __sanitizer_syscall_post_setgroups16(res, gidsetsize, grouplist) \
+ __sanitizer_syscall_post_impl_setgroups16(res, (long)(gidsetsize), \
(long)(grouplist))
-#define __sanitizer_syscall_pre_getuid16() \
+#define __sanitizer_syscall_pre_getuid16() \
__sanitizer_syscall_pre_impl_getuid16()
-#define __sanitizer_syscall_post_getuid16(res) \
+#define __sanitizer_syscall_post_getuid16(res) \
__sanitizer_syscall_post_impl_getuid16(res)
-#define __sanitizer_syscall_pre_geteuid16() \
+#define __sanitizer_syscall_pre_geteuid16() \
__sanitizer_syscall_pre_impl_geteuid16()
-#define __sanitizer_syscall_post_geteuid16(res) \
+#define __sanitizer_syscall_post_geteuid16(res) \
__sanitizer_syscall_post_impl_geteuid16(res)
-#define __sanitizer_syscall_pre_getgid16() \
+#define __sanitizer_syscall_pre_getgid16() \
__sanitizer_syscall_pre_impl_getgid16()
-#define __sanitizer_syscall_post_getgid16(res) \
+#define __sanitizer_syscall_post_getgid16(res) \
__sanitizer_syscall_post_impl_getgid16(res)
-#define __sanitizer_syscall_pre_getegid16() \
+#define __sanitizer_syscall_pre_getegid16() \
__sanitizer_syscall_pre_impl_getegid16()
-#define __sanitizer_syscall_post_getegid16(res) \
+#define __sanitizer_syscall_post_getegid16(res) \
__sanitizer_syscall_post_impl_getegid16(res)
-#define __sanitizer_syscall_pre_utime(filename, times) \
+#define __sanitizer_syscall_pre_utime(filename, times) \
__sanitizer_syscall_pre_impl_utime((long)(filename), (long)(times))
-#define __sanitizer_syscall_post_utime(res, filename, times) \
+#define __sanitizer_syscall_post_utime(res, filename, times) \
__sanitizer_syscall_post_impl_utime(res, (long)(filename), (long)(times))
-#define __sanitizer_syscall_pre_utimes(filename, utimes) \
+#define __sanitizer_syscall_pre_utimes(filename, utimes) \
__sanitizer_syscall_pre_impl_utimes((long)(filename), (long)(utimes))
-#define __sanitizer_syscall_post_utimes(res, filename, utimes) \
+#define __sanitizer_syscall_post_utimes(res, filename, utimes) \
__sanitizer_syscall_post_impl_utimes(res, (long)(filename), (long)(utimes))
-#define __sanitizer_syscall_pre_lseek(fd, offset, origin) \
+#define __sanitizer_syscall_pre_lseek(fd, offset, origin) \
__sanitizer_syscall_pre_impl_lseek((long)(fd), (long)(offset), (long)(origin))
-#define __sanitizer_syscall_post_lseek(res, fd, offset, origin) \
- __sanitizer_syscall_post_impl_lseek(res, (long)(fd), (long)(offset), \
+#define __sanitizer_syscall_post_lseek(res, fd, offset, origin) \
+ __sanitizer_syscall_post_impl_lseek(res, (long)(fd), (long)(offset), \
(long)(origin))
-#define __sanitizer_syscall_pre_llseek(fd, offset_high, offset_low, result, \
- origin) \
- __sanitizer_syscall_pre_impl_llseek((long)(fd), (long)(offset_high), \
- (long)(offset_low), (long)(result), \
+#define __sanitizer_syscall_pre_llseek(fd, offset_high, offset_low, result, \
+ origin) \
+ __sanitizer_syscall_pre_impl_llseek((long)(fd), (long)(offset_high), \
+ (long)(offset_low), (long)(result), \
(long)(origin))
-#define __sanitizer_syscall_post_llseek(res, fd, offset_high, offset_low, \
- result, origin) \
- __sanitizer_syscall_post_impl_llseek(res, (long)(fd), (long)(offset_high), \
- (long)(offset_low), (long)(result), \
+#define __sanitizer_syscall_post_llseek(res, fd, offset_high, offset_low, \
+ result, origin) \
+ __sanitizer_syscall_post_impl_llseek(res, (long)(fd), (long)(offset_high), \
+ (long)(offset_low), (long)(result), \
(long)(origin))
-#define __sanitizer_syscall_pre_read(fd, buf, count) \
+#define __sanitizer_syscall_pre_read(fd, buf, count) \
__sanitizer_syscall_pre_impl_read((long)(fd), (long)(buf), (long)(count))
-#define __sanitizer_syscall_post_read(res, fd, buf, count) \
- __sanitizer_syscall_post_impl_read(res, (long)(fd), (long)(buf), \
+#define __sanitizer_syscall_post_read(res, fd, buf, count) \
+ __sanitizer_syscall_post_impl_read(res, (long)(fd), (long)(buf), \
(long)(count))
-#define __sanitizer_syscall_pre_readv(fd, vec, vlen) \
+#define __sanitizer_syscall_pre_readv(fd, vec, vlen) \
__sanitizer_syscall_pre_impl_readv((long)(fd), (long)(vec), (long)(vlen))
-#define __sanitizer_syscall_post_readv(res, fd, vec, vlen) \
- __sanitizer_syscall_post_impl_readv(res, (long)(fd), (long)(vec), \
+#define __sanitizer_syscall_post_readv(res, fd, vec, vlen) \
+ __sanitizer_syscall_post_impl_readv(res, (long)(fd), (long)(vec), \
(long)(vlen))
-#define __sanitizer_syscall_pre_write(fd, buf, count) \
+#define __sanitizer_syscall_pre_write(fd, buf, count) \
__sanitizer_syscall_pre_impl_write((long)(fd), (long)(buf), (long)(count))
-#define __sanitizer_syscall_post_write(res, fd, buf, count) \
- __sanitizer_syscall_post_impl_write(res, (long)(fd), (long)(buf), \
+#define __sanitizer_syscall_post_write(res, fd, buf, count) \
+ __sanitizer_syscall_post_impl_write(res, (long)(fd), (long)(buf), \
(long)(count))
-#define __sanitizer_syscall_pre_writev(fd, vec, vlen) \
+#define __sanitizer_syscall_pre_writev(fd, vec, vlen) \
__sanitizer_syscall_pre_impl_writev((long)(fd), (long)(vec), (long)(vlen))
-#define __sanitizer_syscall_post_writev(res, fd, vec, vlen) \
- __sanitizer_syscall_post_impl_writev(res, (long)(fd), (long)(vec), \
+#define __sanitizer_syscall_post_writev(res, fd, vec, vlen) \
+ __sanitizer_syscall_post_impl_writev(res, (long)(fd), (long)(vec), \
(long)(vlen))
#ifdef _LP64
#define __sanitizer_syscall_pre_pread64(fd, buf, count, pos) \
__sanitizer_syscall_pre_impl_pread64((long)(fd), (long)(buf), (long)(count), \
(long)(pos))
-#define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos) \
- __sanitizer_syscall_post_impl_pread64(res, (long)(fd), (long)(buf), \
+#define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos) \
+ __sanitizer_syscall_post_impl_pread64(res, (long)(fd), (long)(buf), \
(long)(count), (long)(pos))
-#define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos) \
- __sanitizer_syscall_pre_impl_pwrite64((long)(fd), (long)(buf), \
+#define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos) \
+ __sanitizer_syscall_pre_impl_pwrite64((long)(fd), (long)(buf), \
(long)(count), (long)(pos))
-#define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos) \
- __sanitizer_syscall_post_impl_pwrite64(res, (long)(fd), (long)(buf), \
+#define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos) \
+ __sanitizer_syscall_post_impl_pwrite64(res, (long)(fd), (long)(buf), \
(long)(count), (long)(pos))
#else
#define __sanitizer_syscall_pre_pread64(fd, buf, count, pos0, pos1) \
__sanitizer_syscall_pre_impl_pread64((long)(fd), (long)(buf), (long)(count), \
(long)(pos0), (long)(pos1))
-#define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos0, pos1) \
- __sanitizer_syscall_post_impl_pread64(res, (long)(fd), (long)(buf), \
- (long)(count), (long)(pos0), \
- (long)(pos1))
-#define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos0, pos1) \
- __sanitizer_syscall_pre_impl_pwrite64( \
+#define __sanitizer_syscall_post_pread64(res, fd, buf, count, pos0, pos1) \
+ __sanitizer_syscall_post_impl_pread64( \
+ res, (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1))
+#define __sanitizer_syscall_pre_pwrite64(fd, buf, count, pos0, pos1) \
+ __sanitizer_syscall_pre_impl_pwrite64( \
(long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1))
-#define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos0, pos1) \
- __sanitizer_syscall_post_impl_pwrite64( \
+#define __sanitizer_syscall_post_pwrite64(res, fd, buf, count, pos0, pos1) \
+ __sanitizer_syscall_post_impl_pwrite64( \
res, (long)(fd), (long)(buf), (long)(count), (long)(pos0), (long)(pos1))
#endif
-#define __sanitizer_syscall_pre_preadv(fd, vec, vlen, pos_l, pos_h) \
- __sanitizer_syscall_pre_impl_preadv((long)(fd), (long)(vec), (long)(vlen), \
+#define __sanitizer_syscall_pre_preadv(fd, vec, vlen, pos_l, pos_h) \
+ __sanitizer_syscall_pre_impl_preadv((long)(fd), (long)(vec), (long)(vlen), \
(long)(pos_l), (long)(pos_h))
-#define __sanitizer_syscall_post_preadv(res, fd, vec, vlen, pos_l, pos_h) \
- __sanitizer_syscall_post_impl_preadv(res, (long)(fd), (long)(vec), \
- (long)(vlen), (long)(pos_l), \
+#define __sanitizer_syscall_post_preadv(res, fd, vec, vlen, pos_l, pos_h) \
+ __sanitizer_syscall_post_impl_preadv(res, (long)(fd), (long)(vec), \
+ (long)(vlen), (long)(pos_l), \
(long)(pos_h))
-#define __sanitizer_syscall_pre_pwritev(fd, vec, vlen, pos_l, pos_h) \
- __sanitizer_syscall_pre_impl_pwritev((long)(fd), (long)(vec), (long)(vlen), \
+#define __sanitizer_syscall_pre_pwritev(fd, vec, vlen, pos_l, pos_h) \
+ __sanitizer_syscall_pre_impl_pwritev((long)(fd), (long)(vec), (long)(vlen), \
(long)(pos_l), (long)(pos_h))
-#define __sanitizer_syscall_post_pwritev(res, fd, vec, vlen, pos_l, pos_h) \
- __sanitizer_syscall_post_impl_pwritev(res, (long)(fd), (long)(vec), \
- (long)(vlen), (long)(pos_l), \
+#define __sanitizer_syscall_post_pwritev(res, fd, vec, vlen, pos_l, pos_h) \
+ __sanitizer_syscall_post_impl_pwritev(res, (long)(fd), (long)(vec), \
+ (long)(vlen), (long)(pos_l), \
(long)(pos_h))
-#define __sanitizer_syscall_pre_getcwd(buf, size) \
+#define __sanitizer_syscall_pre_getcwd(buf, size) \
__sanitizer_syscall_pre_impl_getcwd((long)(buf), (long)(size))
-#define __sanitizer_syscall_post_getcwd(res, buf, size) \
+#define __sanitizer_syscall_post_getcwd(res, buf, size) \
__sanitizer_syscall_post_impl_getcwd(res, (long)(buf), (long)(size))
-#define __sanitizer_syscall_pre_mkdir(pathname, mode) \
+#define __sanitizer_syscall_pre_mkdir(pathname, mode) \
__sanitizer_syscall_pre_impl_mkdir((long)(pathname), (long)(mode))
-#define __sanitizer_syscall_post_mkdir(res, pathname, mode) \
+#define __sanitizer_syscall_post_mkdir(res, pathname, mode) \
__sanitizer_syscall_post_impl_mkdir(res, (long)(pathname), (long)(mode))
-#define __sanitizer_syscall_pre_chdir(filename) \
+#define __sanitizer_syscall_pre_chdir(filename) \
__sanitizer_syscall_pre_impl_chdir((long)(filename))
-#define __sanitizer_syscall_post_chdir(res, filename) \
+#define __sanitizer_syscall_post_chdir(res, filename) \
__sanitizer_syscall_post_impl_chdir(res, (long)(filename))
-#define __sanitizer_syscall_pre_fchdir(fd) \
+#define __sanitizer_syscall_pre_fchdir(fd) \
__sanitizer_syscall_pre_impl_fchdir((long)(fd))
-#define __sanitizer_syscall_post_fchdir(res, fd) \
+#define __sanitizer_syscall_post_fchdir(res, fd) \
__sanitizer_syscall_post_impl_fchdir(res, (long)(fd))
-#define __sanitizer_syscall_pre_rmdir(pathname) \
+#define __sanitizer_syscall_pre_rmdir(pathname) \
__sanitizer_syscall_pre_impl_rmdir((long)(pathname))
-#define __sanitizer_syscall_post_rmdir(res, pathname) \
+#define __sanitizer_syscall_post_rmdir(res, pathname) \
__sanitizer_syscall_post_impl_rmdir(res, (long)(pathname))
-#define __sanitizer_syscall_pre_lookup_dcookie(cookie64, buf, len) \
- __sanitizer_syscall_pre_impl_lookup_dcookie((long)(cookie64), (long)(buf), \
+#define __sanitizer_syscall_pre_lookup_dcookie(cookie64, buf, len) \
+ __sanitizer_syscall_pre_impl_lookup_dcookie((long)(cookie64), (long)(buf), \
(long)(len))
-#define __sanitizer_syscall_post_lookup_dcookie(res, cookie64, buf, len) \
- __sanitizer_syscall_post_impl_lookup_dcookie(res, (long)(cookie64), \
+#define __sanitizer_syscall_post_lookup_dcookie(res, cookie64, buf, len) \
+ __sanitizer_syscall_post_impl_lookup_dcookie(res, (long)(cookie64), \
(long)(buf), (long)(len))
-#define __sanitizer_syscall_pre_quotactl(cmd, special, id, addr) \
- __sanitizer_syscall_pre_impl_quotactl((long)(cmd), (long)(special), \
+#define __sanitizer_syscall_pre_quotactl(cmd, special, id, addr) \
+ __sanitizer_syscall_pre_impl_quotactl((long)(cmd), (long)(special), \
(long)(id), (long)(addr))
-#define __sanitizer_syscall_post_quotactl(res, cmd, special, id, addr) \
- __sanitizer_syscall_post_impl_quotactl(res, (long)(cmd), (long)(special), \
+#define __sanitizer_syscall_post_quotactl(res, cmd, special, id, addr) \
+ __sanitizer_syscall_post_impl_quotactl(res, (long)(cmd), (long)(special), \
(long)(id), (long)(addr))
-#define __sanitizer_syscall_pre_getdents(fd, dirent, count) \
- __sanitizer_syscall_pre_impl_getdents((long)(fd), (long)(dirent), \
+#define __sanitizer_syscall_pre_getdents(fd, dirent, count) \
+ __sanitizer_syscall_pre_impl_getdents((long)(fd), (long)(dirent), \
(long)(count))
-#define __sanitizer_syscall_post_getdents(res, fd, dirent, count) \
- __sanitizer_syscall_post_impl_getdents(res, (long)(fd), (long)(dirent), \
+#define __sanitizer_syscall_post_getdents(res, fd, dirent, count) \
+ __sanitizer_syscall_post_impl_getdents(res, (long)(fd), (long)(dirent), \
(long)(count))
-#define __sanitizer_syscall_pre_getdents64(fd, dirent, count) \
- __sanitizer_syscall_pre_impl_getdents64((long)(fd), (long)(dirent), \
+#define __sanitizer_syscall_pre_getdents64(fd, dirent, count) \
+ __sanitizer_syscall_pre_impl_getdents64((long)(fd), (long)(dirent), \
(long)(count))
-#define __sanitizer_syscall_post_getdents64(res, fd, dirent, count) \
- __sanitizer_syscall_post_impl_getdents64(res, (long)(fd), (long)(dirent), \
+#define __sanitizer_syscall_post_getdents64(res, fd, dirent, count) \
+ __sanitizer_syscall_post_impl_getdents64(res, (long)(fd), (long)(dirent), \
(long)(count))
#define __sanitizer_syscall_pre_setsockopt(fd, level, optname, optval, optlen) \
__sanitizer_syscall_pre_impl_setsockopt((long)(fd), (long)(level), \
(long)(optname), (long)(optval), \
(long)(optlen))
-#define __sanitizer_syscall_post_setsockopt(res, fd, level, optname, optval, \
- optlen) \
- __sanitizer_syscall_post_impl_setsockopt(res, (long)(fd), (long)(level), \
- (long)(optname), (long)(optval), \
+#define __sanitizer_syscall_post_setsockopt(res, fd, level, optname, optval, \
+ optlen) \
+ __sanitizer_syscall_post_impl_setsockopt(res, (long)(fd), (long)(level), \
+ (long)(optname), (long)(optval), \
(long)(optlen))
#define __sanitizer_syscall_pre_getsockopt(fd, level, optname, optval, optlen) \
__sanitizer_syscall_pre_impl_getsockopt((long)(fd), (long)(level), \
(long)(optname), (long)(optval), \
(long)(optlen))
-#define __sanitizer_syscall_post_getsockopt(res, fd, level, optname, optval, \
- optlen) \
- __sanitizer_syscall_post_impl_getsockopt(res, (long)(fd), (long)(level), \
- (long)(optname), (long)(optval), \
+#define __sanitizer_syscall_post_getsockopt(res, fd, level, optname, optval, \
+ optlen) \
+ __sanitizer_syscall_post_impl_getsockopt(res, (long)(fd), (long)(level), \
+ (long)(optname), (long)(optval), \
(long)(optlen))
-#define __sanitizer_syscall_pre_bind(arg0, arg1, arg2) \
+#define __sanitizer_syscall_pre_bind(arg0, arg1, arg2) \
__sanitizer_syscall_pre_impl_bind((long)(arg0), (long)(arg1), (long)(arg2))
-#define __sanitizer_syscall_post_bind(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_bind(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_bind(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_bind(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_connect(arg0, arg1, arg2) \
+#define __sanitizer_syscall_pre_connect(arg0, arg1, arg2) \
__sanitizer_syscall_pre_impl_connect((long)(arg0), (long)(arg1), (long)(arg2))
-#define __sanitizer_syscall_post_connect(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_connect(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_connect(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_connect(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_accept(arg0, arg1, arg2) \
+#define __sanitizer_syscall_pre_accept(arg0, arg1, arg2) \
__sanitizer_syscall_pre_impl_accept((long)(arg0), (long)(arg1), (long)(arg2))
-#define __sanitizer_syscall_post_accept(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_accept(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_accept(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_accept(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_accept4(arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_pre_impl_accept4((long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_pre_accept4(arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_pre_impl_accept4((long)(arg0), (long)(arg1), \
(long)(arg2), (long)(arg3))
-#define __sanitizer_syscall_post_accept4(res, arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_post_impl_accept4(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_accept4(res, arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_post_impl_accept4(res, (long)(arg0), (long)(arg1), \
(long)(arg2), (long)(arg3))
-#define __sanitizer_syscall_pre_getsockname(arg0, arg1, arg2) \
- __sanitizer_syscall_pre_impl_getsockname((long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_pre_getsockname(arg0, arg1, arg2) \
+ __sanitizer_syscall_pre_impl_getsockname((long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_post_getsockname(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_getsockname(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_getsockname(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_getsockname(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_getpeername(arg0, arg1, arg2) \
- __sanitizer_syscall_pre_impl_getpeername((long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_pre_getpeername(arg0, arg1, arg2) \
+ __sanitizer_syscall_pre_impl_getpeername((long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_post_getpeername(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_getpeername(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_getpeername(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_getpeername(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_send(arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_pre_impl_send((long)(arg0), (long)(arg1), (long)(arg2), \
+#define __sanitizer_syscall_pre_send(arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_pre_impl_send((long)(arg0), (long)(arg1), (long)(arg2), \
(long)(arg3))
-#define __sanitizer_syscall_post_send(res, arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_post_impl_send(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_send(res, arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_post_impl_send(res, (long)(arg0), (long)(arg1), \
(long)(arg2), (long)(arg3))
-#define __sanitizer_syscall_pre_sendto(arg0, arg1, arg2, arg3, arg4, arg5) \
- __sanitizer_syscall_pre_impl_sendto((long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_pre_sendto(arg0, arg1, arg2, arg3, arg4, arg5) \
+ __sanitizer_syscall_pre_impl_sendto((long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
-#define __sanitizer_syscall_post_sendto(res, arg0, arg1, arg2, arg3, arg4, \
- arg5) \
- __sanitizer_syscall_post_impl_sendto(res, (long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_post_sendto(res, arg0, arg1, arg2, arg3, arg4, \
+ arg5) \
+ __sanitizer_syscall_post_impl_sendto(res, (long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
-#define __sanitizer_syscall_pre_sendmsg(fd, msg, flags) \
+#define __sanitizer_syscall_pre_sendmsg(fd, msg, flags) \
__sanitizer_syscall_pre_impl_sendmsg((long)(fd), (long)(msg), (long)(flags))
-#define __sanitizer_syscall_post_sendmsg(res, fd, msg, flags) \
- __sanitizer_syscall_post_impl_sendmsg(res, (long)(fd), (long)(msg), \
+#define __sanitizer_syscall_post_sendmsg(res, fd, msg, flags) \
+ __sanitizer_syscall_post_impl_sendmsg(res, (long)(fd), (long)(msg), \
(long)(flags))
#define __sanitizer_syscall_pre_sendmmsg(fd, msg, vlen, flags) \
__sanitizer_syscall_pre_impl_sendmmsg((long)(fd), (long)(msg), (long)(vlen), \
(long)(flags))
-#define __sanitizer_syscall_post_sendmmsg(res, fd, msg, vlen, flags) \
- __sanitizer_syscall_post_impl_sendmmsg(res, (long)(fd), (long)(msg), \
+#define __sanitizer_syscall_post_sendmmsg(res, fd, msg, vlen, flags) \
+ __sanitizer_syscall_post_impl_sendmmsg(res, (long)(fd), (long)(msg), \
(long)(vlen), (long)(flags))
-#define __sanitizer_syscall_pre_recv(arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_pre_impl_recv((long)(arg0), (long)(arg1), (long)(arg2), \
+#define __sanitizer_syscall_pre_recv(arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_pre_impl_recv((long)(arg0), (long)(arg1), (long)(arg2), \
(long)(arg3))
-#define __sanitizer_syscall_post_recv(res, arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_post_impl_recv(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_recv(res, arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_post_impl_recv(res, (long)(arg0), (long)(arg1), \
(long)(arg2), (long)(arg3))
-#define __sanitizer_syscall_pre_recvfrom(arg0, arg1, arg2, arg3, arg4, arg5) \
- __sanitizer_syscall_pre_impl_recvfrom((long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_pre_recvfrom(arg0, arg1, arg2, arg3, arg4, arg5) \
+ __sanitizer_syscall_pre_impl_recvfrom((long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
-#define __sanitizer_syscall_post_recvfrom(res, arg0, arg1, arg2, arg3, arg4, \
- arg5) \
- __sanitizer_syscall_post_impl_recvfrom(res, (long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_post_recvfrom(res, arg0, arg1, arg2, arg3, arg4, \
+ arg5) \
+ __sanitizer_syscall_post_impl_recvfrom(res, (long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
-#define __sanitizer_syscall_pre_recvmsg(fd, msg, flags) \
+#define __sanitizer_syscall_pre_recvmsg(fd, msg, flags) \
__sanitizer_syscall_pre_impl_recvmsg((long)(fd), (long)(msg), (long)(flags))
-#define __sanitizer_syscall_post_recvmsg(res, fd, msg, flags) \
- __sanitizer_syscall_post_impl_recvmsg(res, (long)(fd), (long)(msg), \
+#define __sanitizer_syscall_post_recvmsg(res, fd, msg, flags) \
+ __sanitizer_syscall_post_impl_recvmsg(res, (long)(fd), (long)(msg), \
(long)(flags))
#define __sanitizer_syscall_pre_recvmmsg(fd, msg, vlen, flags, timeout) \
__sanitizer_syscall_pre_impl_recvmmsg((long)(fd), (long)(msg), (long)(vlen), \
(long)(flags), (long)(timeout))
-#define __sanitizer_syscall_post_recvmmsg(res, fd, msg, vlen, flags, timeout) \
- __sanitizer_syscall_post_impl_recvmmsg(res, (long)(fd), (long)(msg), \
- (long)(vlen), (long)(flags), \
+#define __sanitizer_syscall_post_recvmmsg(res, fd, msg, vlen, flags, timeout) \
+ __sanitizer_syscall_post_impl_recvmmsg(res, (long)(fd), (long)(msg), \
+ (long)(vlen), (long)(flags), \
(long)(timeout))
-#define __sanitizer_syscall_pre_socket(arg0, arg1, arg2) \
+#define __sanitizer_syscall_pre_socket(arg0, arg1, arg2) \
__sanitizer_syscall_pre_impl_socket((long)(arg0), (long)(arg1), (long)(arg2))
-#define __sanitizer_syscall_post_socket(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_socket(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_socket(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_socket(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_socketpair(arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_pre_impl_socketpair((long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_pre_socketpair(arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_pre_impl_socketpair((long)(arg0), (long)(arg1), \
(long)(arg2), (long)(arg3))
-#define __sanitizer_syscall_post_socketpair(res, arg0, arg1, arg2, arg3) \
- __sanitizer_syscall_post_impl_socketpair(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_socketpair(res, arg0, arg1, arg2, arg3) \
+ __sanitizer_syscall_post_impl_socketpair(res, (long)(arg0), (long)(arg1), \
(long)(arg2), (long)(arg3))
-#define __sanitizer_syscall_pre_socketcall(call, args) \
+#define __sanitizer_syscall_pre_socketcall(call, args) \
__sanitizer_syscall_pre_impl_socketcall((long)(call), (long)(args))
-#define __sanitizer_syscall_post_socketcall(res, call, args) \
+#define __sanitizer_syscall_post_socketcall(res, call, args) \
__sanitizer_syscall_post_impl_socketcall(res, (long)(call), (long)(args))
-#define __sanitizer_syscall_pre_listen(arg0, arg1) \
+#define __sanitizer_syscall_pre_listen(arg0, arg1) \
__sanitizer_syscall_pre_impl_listen((long)(arg0), (long)(arg1))
-#define __sanitizer_syscall_post_listen(res, arg0, arg1) \
+#define __sanitizer_syscall_post_listen(res, arg0, arg1) \
__sanitizer_syscall_post_impl_listen(res, (long)(arg0), (long)(arg1))
-#define __sanitizer_syscall_pre_poll(ufds, nfds, timeout) \
+#define __sanitizer_syscall_pre_poll(ufds, nfds, timeout) \
__sanitizer_syscall_pre_impl_poll((long)(ufds), (long)(nfds), (long)(timeout))
-#define __sanitizer_syscall_post_poll(res, ufds, nfds, timeout) \
- __sanitizer_syscall_post_impl_poll(res, (long)(ufds), (long)(nfds), \
+#define __sanitizer_syscall_post_poll(res, ufds, nfds, timeout) \
+ __sanitizer_syscall_post_impl_poll(res, (long)(ufds), (long)(nfds), \
(long)(timeout))
-#define __sanitizer_syscall_pre_select(n, inp, outp, exp, tvp) \
- __sanitizer_syscall_pre_impl_select((long)(n), (long)(inp), (long)(outp), \
+#define __sanitizer_syscall_pre_select(n, inp, outp, exp, tvp) \
+ __sanitizer_syscall_pre_impl_select((long)(n), (long)(inp), (long)(outp), \
(long)(exp), (long)(tvp))
-#define __sanitizer_syscall_post_select(res, n, inp, outp, exp, tvp) \
- __sanitizer_syscall_post_impl_select(res, (long)(n), (long)(inp), \
+#define __sanitizer_syscall_post_select(res, n, inp, outp, exp, tvp) \
+ __sanitizer_syscall_post_impl_select(res, (long)(n), (long)(inp), \
(long)(outp), (long)(exp), (long)(tvp))
-#define __sanitizer_syscall_pre_old_select(arg) \
+#define __sanitizer_syscall_pre_old_select(arg) \
__sanitizer_syscall_pre_impl_old_select((long)(arg))
-#define __sanitizer_syscall_post_old_select(res, arg) \
+#define __sanitizer_syscall_post_old_select(res, arg) \
__sanitizer_syscall_post_impl_old_select(res, (long)(arg))
-#define __sanitizer_syscall_pre_epoll_create(size) \
+#define __sanitizer_syscall_pre_epoll_create(size) \
__sanitizer_syscall_pre_impl_epoll_create((long)(size))
-#define __sanitizer_syscall_post_epoll_create(res, size) \
+#define __sanitizer_syscall_post_epoll_create(res, size) \
__sanitizer_syscall_post_impl_epoll_create(res, (long)(size))
-#define __sanitizer_syscall_pre_epoll_create1(flags) \
+#define __sanitizer_syscall_pre_epoll_create1(flags) \
__sanitizer_syscall_pre_impl_epoll_create1((long)(flags))
-#define __sanitizer_syscall_post_epoll_create1(res, flags) \
+#define __sanitizer_syscall_post_epoll_create1(res, flags) \
__sanitizer_syscall_post_impl_epoll_create1(res, (long)(flags))
#define __sanitizer_syscall_pre_epoll_ctl(epfd, op, fd, event) \
__sanitizer_syscall_pre_impl_epoll_ctl((long)(epfd), (long)(op), (long)(fd), \
(long)(event))
-#define __sanitizer_syscall_post_epoll_ctl(res, epfd, op, fd, event) \
- __sanitizer_syscall_post_impl_epoll_ctl(res, (long)(epfd), (long)(op), \
+#define __sanitizer_syscall_post_epoll_ctl(res, epfd, op, fd, event) \
+ __sanitizer_syscall_post_impl_epoll_ctl(res, (long)(epfd), (long)(op), \
(long)(fd), (long)(event))
-#define __sanitizer_syscall_pre_epoll_wait(epfd, events, maxevents, timeout) \
- __sanitizer_syscall_pre_impl_epoll_wait((long)(epfd), (long)(events), \
+#define __sanitizer_syscall_pre_epoll_wait(epfd, events, maxevents, timeout) \
+ __sanitizer_syscall_pre_impl_epoll_wait((long)(epfd), (long)(events), \
(long)(maxevents), (long)(timeout))
-#define __sanitizer_syscall_post_epoll_wait(res, epfd, events, maxevents, \
- timeout) \
- __sanitizer_syscall_post_impl_epoll_wait(res, (long)(epfd), (long)(events), \
+#define __sanitizer_syscall_post_epoll_wait(res, epfd, events, maxevents, \
+ timeout) \
+ __sanitizer_syscall_post_impl_epoll_wait(res, (long)(epfd), (long)(events), \
(long)(maxevents), (long)(timeout))
-#define __sanitizer_syscall_pre_epoll_pwait(epfd, events, maxevents, timeout, \
- sigmask, sigsetsize) \
- __sanitizer_syscall_pre_impl_epoll_pwait( \
- (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \
+#define __sanitizer_syscall_pre_epoll_pwait(epfd, events, maxevents, timeout, \
+ sigmask, sigsetsize) \
+ __sanitizer_syscall_pre_impl_epoll_pwait( \
+ (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \
+ (long)(sigmask), (long)(sigsetsize))
+#define __sanitizer_syscall_post_epoll_pwait(res, epfd, events, maxevents, \
+ timeout, sigmask, sigsetsize) \
+ __sanitizer_syscall_post_impl_epoll_pwait( \
+ res, (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \
(long)(sigmask), (long)(sigsetsize))
-#define __sanitizer_syscall_post_epoll_pwait(res, epfd, events, maxevents, \
- timeout, sigmask, sigsetsize) \
- __sanitizer_syscall_post_impl_epoll_pwait( \
- res, (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \
+#define __sanitizer_syscall_pre_epoll_pwait2(epfd, events, maxevents, timeout, \
+ sigmask, sigsetsize) \
+ __sanitizer_syscall_pre_impl_epoll_pwait2( \
+ (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \
(long)(sigmask), (long)(sigsetsize))
-#define __sanitizer_syscall_pre_gethostname(name, len) \
+#define __sanitizer_syscall_post_epoll_pwait2(res, epfd, events, maxevents, \
+ timeout, sigmask, sigsetsize) \
+ __sanitizer_syscall_post_impl_epoll_pwait2( \
+ res, (long)(epfd), (long)(events), (long)(maxevents), (long)(timeout), \
+ (long)(sigmask), (long)(sigsetsize))
+#define __sanitizer_syscall_pre_gethostname(name, len) \
__sanitizer_syscall_pre_impl_gethostname((long)(name), (long)(len))
-#define __sanitizer_syscall_post_gethostname(res, name, len) \
+#define __sanitizer_syscall_post_gethostname(res, name, len) \
__sanitizer_syscall_post_impl_gethostname(res, (long)(name), (long)(len))
-#define __sanitizer_syscall_pre_sethostname(name, len) \
+#define __sanitizer_syscall_pre_sethostname(name, len) \
__sanitizer_syscall_pre_impl_sethostname((long)(name), (long)(len))
-#define __sanitizer_syscall_post_sethostname(res, name, len) \
+#define __sanitizer_syscall_post_sethostname(res, name, len) \
__sanitizer_syscall_post_impl_sethostname(res, (long)(name), (long)(len))
-#define __sanitizer_syscall_pre_setdomainname(name, len) \
+#define __sanitizer_syscall_pre_setdomainname(name, len) \
__sanitizer_syscall_pre_impl_setdomainname((long)(name), (long)(len))
-#define __sanitizer_syscall_post_setdomainname(res, name, len) \
+#define __sanitizer_syscall_post_setdomainname(res, name, len) \
__sanitizer_syscall_post_impl_setdomainname(res, (long)(name), (long)(len))
-#define __sanitizer_syscall_pre_newuname(name) \
+#define __sanitizer_syscall_pre_newuname(name) \
__sanitizer_syscall_pre_impl_newuname((long)(name))
-#define __sanitizer_syscall_post_newuname(res, name) \
+#define __sanitizer_syscall_post_newuname(res, name) \
__sanitizer_syscall_post_impl_newuname(res, (long)(name))
-#define __sanitizer_syscall_pre_uname(arg0) \
+#define __sanitizer_syscall_pre_uname(arg0) \
__sanitizer_syscall_pre_impl_uname((long)(arg0))
-#define __sanitizer_syscall_post_uname(res, arg0) \
+#define __sanitizer_syscall_post_uname(res, arg0) \
__sanitizer_syscall_post_impl_uname(res, (long)(arg0))
-#define __sanitizer_syscall_pre_olduname(arg0) \
+#define __sanitizer_syscall_pre_olduname(arg0) \
__sanitizer_syscall_pre_impl_olduname((long)(arg0))
-#define __sanitizer_syscall_post_olduname(res, arg0) \
+#define __sanitizer_syscall_post_olduname(res, arg0) \
__sanitizer_syscall_post_impl_olduname(res, (long)(arg0))
-#define __sanitizer_syscall_pre_getrlimit(resource, rlim) \
+#define __sanitizer_syscall_pre_getrlimit(resource, rlim) \
__sanitizer_syscall_pre_impl_getrlimit((long)(resource), (long)(rlim))
-#define __sanitizer_syscall_post_getrlimit(res, resource, rlim) \
+#define __sanitizer_syscall_post_getrlimit(res, resource, rlim) \
__sanitizer_syscall_post_impl_getrlimit(res, (long)(resource), (long)(rlim))
-#define __sanitizer_syscall_pre_old_getrlimit(resource, rlim) \
+#define __sanitizer_syscall_pre_old_getrlimit(resource, rlim) \
__sanitizer_syscall_pre_impl_old_getrlimit((long)(resource), (long)(rlim))
-#define __sanitizer_syscall_post_old_getrlimit(res, resource, rlim) \
- __sanitizer_syscall_post_impl_old_getrlimit(res, (long)(resource), \
+#define __sanitizer_syscall_post_old_getrlimit(res, resource, rlim) \
+ __sanitizer_syscall_post_impl_old_getrlimit(res, (long)(resource), \
(long)(rlim))
-#define __sanitizer_syscall_pre_setrlimit(resource, rlim) \
+#define __sanitizer_syscall_pre_setrlimit(resource, rlim) \
__sanitizer_syscall_pre_impl_setrlimit((long)(resource), (long)(rlim))
-#define __sanitizer_syscall_post_setrlimit(res, resource, rlim) \
+#define __sanitizer_syscall_post_setrlimit(res, resource, rlim) \
__sanitizer_syscall_post_impl_setrlimit(res, (long)(resource), (long)(rlim))
-#define __sanitizer_syscall_pre_prlimit64(pid, resource, new_rlim, old_rlim) \
- __sanitizer_syscall_pre_impl_prlimit64((long)(pid), (long)(resource), \
+#define __sanitizer_syscall_pre_prlimit64(pid, resource, new_rlim, old_rlim) \
+ __sanitizer_syscall_pre_impl_prlimit64((long)(pid), (long)(resource), \
(long)(new_rlim), (long)(old_rlim))
-#define __sanitizer_syscall_post_prlimit64(res, pid, resource, new_rlim, \
- old_rlim) \
- __sanitizer_syscall_post_impl_prlimit64(res, (long)(pid), (long)(resource), \
+#define __sanitizer_syscall_post_prlimit64(res, pid, resource, new_rlim, \
+ old_rlim) \
+ __sanitizer_syscall_post_impl_prlimit64(res, (long)(pid), (long)(resource), \
(long)(new_rlim), (long)(old_rlim))
-#define __sanitizer_syscall_pre_getrusage(who, ru) \
+#define __sanitizer_syscall_pre_getrusage(who, ru) \
__sanitizer_syscall_pre_impl_getrusage((long)(who), (long)(ru))
-#define __sanitizer_syscall_post_getrusage(res, who, ru) \
+#define __sanitizer_syscall_post_getrusage(res, who, ru) \
__sanitizer_syscall_post_impl_getrusage(res, (long)(who), (long)(ru))
-#define __sanitizer_syscall_pre_umask(mask) \
+#define __sanitizer_syscall_pre_umask(mask) \
__sanitizer_syscall_pre_impl_umask((long)(mask))
-#define __sanitizer_syscall_post_umask(res, mask) \
+#define __sanitizer_syscall_post_umask(res, mask) \
__sanitizer_syscall_post_impl_umask(res, (long)(mask))
-#define __sanitizer_syscall_pre_msgget(key, msgflg) \
+#define __sanitizer_syscall_pre_msgget(key, msgflg) \
__sanitizer_syscall_pre_impl_msgget((long)(key), (long)(msgflg))
-#define __sanitizer_syscall_post_msgget(res, key, msgflg) \
+#define __sanitizer_syscall_post_msgget(res, key, msgflg) \
__sanitizer_syscall_post_impl_msgget(res, (long)(key), (long)(msgflg))
-#define __sanitizer_syscall_pre_msgsnd(msqid, msgp, msgsz, msgflg) \
- __sanitizer_syscall_pre_impl_msgsnd((long)(msqid), (long)(msgp), \
+#define __sanitizer_syscall_pre_msgsnd(msqid, msgp, msgsz, msgflg) \
+ __sanitizer_syscall_pre_impl_msgsnd((long)(msqid), (long)(msgp), \
(long)(msgsz), (long)(msgflg))
-#define __sanitizer_syscall_post_msgsnd(res, msqid, msgp, msgsz, msgflg) \
- __sanitizer_syscall_post_impl_msgsnd(res, (long)(msqid), (long)(msgp), \
+#define __sanitizer_syscall_post_msgsnd(res, msqid, msgp, msgsz, msgflg) \
+ __sanitizer_syscall_post_impl_msgsnd(res, (long)(msqid), (long)(msgp), \
(long)(msgsz), (long)(msgflg))
-#define __sanitizer_syscall_pre_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg) \
- __sanitizer_syscall_pre_impl_msgrcv((long)(msqid), (long)(msgp), \
- (long)(msgsz), (long)(msgtyp), \
+#define __sanitizer_syscall_pre_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg) \
+ __sanitizer_syscall_pre_impl_msgrcv((long)(msqid), (long)(msgp), \
+ (long)(msgsz), (long)(msgtyp), \
(long)(msgflg))
-#define __sanitizer_syscall_post_msgrcv(res, msqid, msgp, msgsz, msgtyp, \
- msgflg) \
- __sanitizer_syscall_post_impl_msgrcv(res, (long)(msqid), (long)(msgp), \
- (long)(msgsz), (long)(msgtyp), \
+#define __sanitizer_syscall_post_msgrcv(res, msqid, msgp, msgsz, msgtyp, \
+ msgflg) \
+ __sanitizer_syscall_post_impl_msgrcv(res, (long)(msqid), (long)(msgp), \
+ (long)(msgsz), (long)(msgtyp), \
(long)(msgflg))
-#define __sanitizer_syscall_pre_msgctl(msqid, cmd, buf) \
+#define __sanitizer_syscall_pre_msgctl(msqid, cmd, buf) \
__sanitizer_syscall_pre_impl_msgctl((long)(msqid), (long)(cmd), (long)(buf))
-#define __sanitizer_syscall_post_msgctl(res, msqid, cmd, buf) \
- __sanitizer_syscall_post_impl_msgctl(res, (long)(msqid), (long)(cmd), \
+#define __sanitizer_syscall_post_msgctl(res, msqid, cmd, buf) \
+ __sanitizer_syscall_post_impl_msgctl(res, (long)(msqid), (long)(cmd), \
(long)(buf))
-#define __sanitizer_syscall_pre_semget(key, nsems, semflg) \
- __sanitizer_syscall_pre_impl_semget((long)(key), (long)(nsems), \
+#define __sanitizer_syscall_pre_semget(key, nsems, semflg) \
+ __sanitizer_syscall_pre_impl_semget((long)(key), (long)(nsems), \
(long)(semflg))
-#define __sanitizer_syscall_post_semget(res, key, nsems, semflg) \
- __sanitizer_syscall_post_impl_semget(res, (long)(key), (long)(nsems), \
+#define __sanitizer_syscall_post_semget(res, key, nsems, semflg) \
+ __sanitizer_syscall_post_impl_semget(res, (long)(key), (long)(nsems), \
(long)(semflg))
-#define __sanitizer_syscall_pre_semop(semid, sops, nsops) \
+#define __sanitizer_syscall_pre_semop(semid, sops, nsops) \
__sanitizer_syscall_pre_impl_semop((long)(semid), (long)(sops), (long)(nsops))
-#define __sanitizer_syscall_post_semop(res, semid, sops, nsops) \
- __sanitizer_syscall_post_impl_semop(res, (long)(semid), (long)(sops), \
+#define __sanitizer_syscall_post_semop(res, semid, sops, nsops) \
+ __sanitizer_syscall_post_impl_semop(res, (long)(semid), (long)(sops), \
(long)(nsops))
-#define __sanitizer_syscall_pre_semctl(semid, semnum, cmd, arg) \
- __sanitizer_syscall_pre_impl_semctl((long)(semid), (long)(semnum), \
+#define __sanitizer_syscall_pre_semctl(semid, semnum, cmd, arg) \
+ __sanitizer_syscall_pre_impl_semctl((long)(semid), (long)(semnum), \
(long)(cmd), (long)(arg))
-#define __sanitizer_syscall_post_semctl(res, semid, semnum, cmd, arg) \
- __sanitizer_syscall_post_impl_semctl(res, (long)(semid), (long)(semnum), \
+#define __sanitizer_syscall_post_semctl(res, semid, semnum, cmd, arg) \
+ __sanitizer_syscall_post_impl_semctl(res, (long)(semid), (long)(semnum), \
(long)(cmd), (long)(arg))
-#define __sanitizer_syscall_pre_semtimedop(semid, sops, nsops, timeout) \
- __sanitizer_syscall_pre_impl_semtimedop((long)(semid), (long)(sops), \
+#define __sanitizer_syscall_pre_semtimedop(semid, sops, nsops, timeout) \
+ __sanitizer_syscall_pre_impl_semtimedop((long)(semid), (long)(sops), \
(long)(nsops), (long)(timeout))
-#define __sanitizer_syscall_post_semtimedop(res, semid, sops, nsops, timeout) \
- __sanitizer_syscall_post_impl_semtimedop(res, (long)(semid), (long)(sops), \
+#define __sanitizer_syscall_post_semtimedop(res, semid, sops, nsops, timeout) \
+ __sanitizer_syscall_post_impl_semtimedop(res, (long)(semid), (long)(sops), \
(long)(nsops), (long)(timeout))
-#define __sanitizer_syscall_pre_shmat(shmid, shmaddr, shmflg) \
- __sanitizer_syscall_pre_impl_shmat((long)(shmid), (long)(shmaddr), \
+#define __sanitizer_syscall_pre_shmat(shmid, shmaddr, shmflg) \
+ __sanitizer_syscall_pre_impl_shmat((long)(shmid), (long)(shmaddr), \
(long)(shmflg))
-#define __sanitizer_syscall_post_shmat(res, shmid, shmaddr, shmflg) \
- __sanitizer_syscall_post_impl_shmat(res, (long)(shmid), (long)(shmaddr), \
+#define __sanitizer_syscall_post_shmat(res, shmid, shmaddr, shmflg) \
+ __sanitizer_syscall_post_impl_shmat(res, (long)(shmid), (long)(shmaddr), \
(long)(shmflg))
-#define __sanitizer_syscall_pre_shmget(key, size, flag) \
+#define __sanitizer_syscall_pre_shmget(key, size, flag) \
__sanitizer_syscall_pre_impl_shmget((long)(key), (long)(size), (long)(flag))
-#define __sanitizer_syscall_post_shmget(res, key, size, flag) \
- __sanitizer_syscall_post_impl_shmget(res, (long)(key), (long)(size), \
+#define __sanitizer_syscall_post_shmget(res, key, size, flag) \
+ __sanitizer_syscall_post_impl_shmget(res, (long)(key), (long)(size), \
(long)(flag))
-#define __sanitizer_syscall_pre_shmdt(shmaddr) \
+#define __sanitizer_syscall_pre_shmdt(shmaddr) \
__sanitizer_syscall_pre_impl_shmdt((long)(shmaddr))
-#define __sanitizer_syscall_post_shmdt(res, shmaddr) \
+#define __sanitizer_syscall_post_shmdt(res, shmaddr) \
__sanitizer_syscall_post_impl_shmdt(res, (long)(shmaddr))
-#define __sanitizer_syscall_pre_shmctl(shmid, cmd, buf) \
+#define __sanitizer_syscall_pre_shmctl(shmid, cmd, buf) \
__sanitizer_syscall_pre_impl_shmctl((long)(shmid), (long)(cmd), (long)(buf))
-#define __sanitizer_syscall_post_shmctl(res, shmid, cmd, buf) \
- __sanitizer_syscall_post_impl_shmctl(res, (long)(shmid), (long)(cmd), \
+#define __sanitizer_syscall_post_shmctl(res, shmid, cmd, buf) \
+ __sanitizer_syscall_post_impl_shmctl(res, (long)(shmid), (long)(cmd), \
(long)(buf))
#define __sanitizer_syscall_pre_ipc(call, first, second, third, ptr, fifth) \
__sanitizer_syscall_pre_impl_ipc((long)(call), (long)(first), \
(long)(second), (long)(third), (long)(ptr), \
(long)(fifth))
-#define __sanitizer_syscall_post_ipc(res, call, first, second, third, ptr, \
- fifth) \
- __sanitizer_syscall_post_impl_ipc(res, (long)(call), (long)(first), \
- (long)(second), (long)(third), \
+#define __sanitizer_syscall_post_ipc(res, call, first, second, third, ptr, \
+ fifth) \
+ __sanitizer_syscall_post_impl_ipc(res, (long)(call), (long)(first), \
+ (long)(second), (long)(third), \
(long)(ptr), (long)(fifth))
-#define __sanitizer_syscall_pre_mq_open(name, oflag, mode, attr) \
- __sanitizer_syscall_pre_impl_mq_open((long)(name), (long)(oflag), \
+#define __sanitizer_syscall_pre_mq_open(name, oflag, mode, attr) \
+ __sanitizer_syscall_pre_impl_mq_open((long)(name), (long)(oflag), \
(long)(mode), (long)(attr))
-#define __sanitizer_syscall_post_mq_open(res, name, oflag, mode, attr) \
- __sanitizer_syscall_post_impl_mq_open(res, (long)(name), (long)(oflag), \
+#define __sanitizer_syscall_post_mq_open(res, name, oflag, mode, attr) \
+ __sanitizer_syscall_post_impl_mq_open(res, (long)(name), (long)(oflag), \
(long)(mode), (long)(attr))
-#define __sanitizer_syscall_pre_mq_unlink(name) \
+#define __sanitizer_syscall_pre_mq_unlink(name) \
__sanitizer_syscall_pre_impl_mq_unlink((long)(name))
-#define __sanitizer_syscall_post_mq_unlink(res, name) \
+#define __sanitizer_syscall_post_mq_unlink(res, name) \
__sanitizer_syscall_post_impl_mq_unlink(res, (long)(name))
#define __sanitizer_syscall_pre_mq_timedsend(mqdes, msg_ptr, msg_len, \
msg_prio, abs_timeout) \
__sanitizer_syscall_pre_impl_mq_timedsend((long)(mqdes), (long)(msg_ptr), \
(long)(msg_len), (long)(msg_prio), \
(long)(abs_timeout))
-#define __sanitizer_syscall_post_mq_timedsend(res, mqdes, msg_ptr, msg_len, \
- msg_prio, abs_timeout) \
- __sanitizer_syscall_post_impl_mq_timedsend( \
- res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \
+#define __sanitizer_syscall_post_mq_timedsend(res, mqdes, msg_ptr, msg_len, \
+ msg_prio, abs_timeout) \
+ __sanitizer_syscall_post_impl_mq_timedsend( \
+ res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \
(long)(abs_timeout))
-#define __sanitizer_syscall_pre_mq_timedreceive(mqdes, msg_ptr, msg_len, \
- msg_prio, abs_timeout) \
- __sanitizer_syscall_pre_impl_mq_timedreceive( \
- (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \
+#define __sanitizer_syscall_pre_mq_timedreceive(mqdes, msg_ptr, msg_len, \
+ msg_prio, abs_timeout) \
+ __sanitizer_syscall_pre_impl_mq_timedreceive( \
+ (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \
(long)(abs_timeout))
#define __sanitizer_syscall_post_mq_timedreceive(res, mqdes, msg_ptr, msg_len, \
msg_prio, abs_timeout) \
__sanitizer_syscall_post_impl_mq_timedreceive( \
res, (long)(mqdes), (long)(msg_ptr), (long)(msg_len), (long)(msg_prio), \
(long)(abs_timeout))
-#define __sanitizer_syscall_pre_mq_notify(mqdes, notification) \
+#define __sanitizer_syscall_pre_mq_notify(mqdes, notification) \
__sanitizer_syscall_pre_impl_mq_notify((long)(mqdes), (long)(notification))
-#define __sanitizer_syscall_post_mq_notify(res, mqdes, notification) \
- __sanitizer_syscall_post_impl_mq_notify(res, (long)(mqdes), \
+#define __sanitizer_syscall_post_mq_notify(res, mqdes, notification) \
+ __sanitizer_syscall_post_impl_mq_notify(res, (long)(mqdes), \
(long)(notification))
-#define __sanitizer_syscall_pre_mq_getsetattr(mqdes, mqstat, omqstat) \
- __sanitizer_syscall_pre_impl_mq_getsetattr((long)(mqdes), (long)(mqstat), \
+#define __sanitizer_syscall_pre_mq_getsetattr(mqdes, mqstat, omqstat) \
+ __sanitizer_syscall_pre_impl_mq_getsetattr((long)(mqdes), (long)(mqstat), \
(long)(omqstat))
-#define __sanitizer_syscall_post_mq_getsetattr(res, mqdes, mqstat, omqstat) \
- __sanitizer_syscall_post_impl_mq_getsetattr(res, (long)(mqdes), \
+#define __sanitizer_syscall_post_mq_getsetattr(res, mqdes, mqstat, omqstat) \
+ __sanitizer_syscall_post_impl_mq_getsetattr(res, (long)(mqdes), \
(long)(mqstat), (long)(omqstat))
-#define __sanitizer_syscall_pre_pciconfig_iobase(which, bus, devfn) \
- __sanitizer_syscall_pre_impl_pciconfig_iobase((long)(which), (long)(bus), \
+#define __sanitizer_syscall_pre_pciconfig_iobase(which, bus, devfn) \
+ __sanitizer_syscall_pre_impl_pciconfig_iobase((long)(which), (long)(bus), \
(long)(devfn))
-#define __sanitizer_syscall_post_pciconfig_iobase(res, which, bus, devfn) \
- __sanitizer_syscall_post_impl_pciconfig_iobase(res, (long)(which), \
+#define __sanitizer_syscall_post_pciconfig_iobase(res, which, bus, devfn) \
+ __sanitizer_syscall_post_impl_pciconfig_iobase(res, (long)(which), \
(long)(bus), (long)(devfn))
-#define __sanitizer_syscall_pre_pciconfig_read(bus, dfn, off, len, buf) \
- __sanitizer_syscall_pre_impl_pciconfig_read( \
+#define __sanitizer_syscall_pre_pciconfig_read(bus, dfn, off, len, buf) \
+ __sanitizer_syscall_pre_impl_pciconfig_read( \
(long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf))
-#define __sanitizer_syscall_post_pciconfig_read(res, bus, dfn, off, len, buf) \
- __sanitizer_syscall_post_impl_pciconfig_read( \
+#define __sanitizer_syscall_post_pciconfig_read(res, bus, dfn, off, len, buf) \
+ __sanitizer_syscall_post_impl_pciconfig_read( \
res, (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf))
-#define __sanitizer_syscall_pre_pciconfig_write(bus, dfn, off, len, buf) \
- __sanitizer_syscall_pre_impl_pciconfig_write( \
+#define __sanitizer_syscall_pre_pciconfig_write(bus, dfn, off, len, buf) \
+ __sanitizer_syscall_pre_impl_pciconfig_write( \
(long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf))
#define __sanitizer_syscall_post_pciconfig_write(res, bus, dfn, off, len, buf) \
__sanitizer_syscall_post_impl_pciconfig_write( \
res, (long)(bus), (long)(dfn), (long)(off), (long)(len), (long)(buf))
-#define __sanitizer_syscall_pre_swapon(specialfile, swap_flags) \
+#define __sanitizer_syscall_pre_swapon(specialfile, swap_flags) \
__sanitizer_syscall_pre_impl_swapon((long)(specialfile), (long)(swap_flags))
-#define __sanitizer_syscall_post_swapon(res, specialfile, swap_flags) \
- __sanitizer_syscall_post_impl_swapon(res, (long)(specialfile), \
+#define __sanitizer_syscall_post_swapon(res, specialfile, swap_flags) \
+ __sanitizer_syscall_post_impl_swapon(res, (long)(specialfile), \
(long)(swap_flags))
-#define __sanitizer_syscall_pre_swapoff(specialfile) \
+#define __sanitizer_syscall_pre_swapoff(specialfile) \
__sanitizer_syscall_pre_impl_swapoff((long)(specialfile))
-#define __sanitizer_syscall_post_swapoff(res, specialfile) \
+#define __sanitizer_syscall_post_swapoff(res, specialfile) \
__sanitizer_syscall_post_impl_swapoff(res, (long)(specialfile))
-#define __sanitizer_syscall_pre_sysctl(args) \
+#define __sanitizer_syscall_pre_sysctl(args) \
__sanitizer_syscall_pre_impl_sysctl((long)(args))
-#define __sanitizer_syscall_post_sysctl(res, args) \
+#define __sanitizer_syscall_post_sysctl(res, args) \
__sanitizer_syscall_post_impl_sysctl(res, (long)(args))
-#define __sanitizer_syscall_pre_sysinfo(info) \
+#define __sanitizer_syscall_pre_sysinfo(info) \
__sanitizer_syscall_pre_impl_sysinfo((long)(info))
-#define __sanitizer_syscall_post_sysinfo(res, info) \
+#define __sanitizer_syscall_post_sysinfo(res, info) \
__sanitizer_syscall_post_impl_sysinfo(res, (long)(info))
-#define __sanitizer_syscall_pre_sysfs(option, arg1, arg2) \
+#define __sanitizer_syscall_pre_sysfs(option, arg1, arg2) \
__sanitizer_syscall_pre_impl_sysfs((long)(option), (long)(arg1), (long)(arg2))
-#define __sanitizer_syscall_post_sysfs(res, option, arg1, arg2) \
- __sanitizer_syscall_post_impl_sysfs(res, (long)(option), (long)(arg1), \
+#define __sanitizer_syscall_post_sysfs(res, option, arg1, arg2) \
+ __sanitizer_syscall_post_impl_sysfs(res, (long)(option), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_syslog(type, buf, len) \
+#define __sanitizer_syscall_pre_syslog(type, buf, len) \
__sanitizer_syscall_pre_impl_syslog((long)(type), (long)(buf), (long)(len))
-#define __sanitizer_syscall_post_syslog(res, type, buf, len) \
- __sanitizer_syscall_post_impl_syslog(res, (long)(type), (long)(buf), \
+#define __sanitizer_syscall_post_syslog(res, type, buf, len) \
+ __sanitizer_syscall_post_impl_syslog(res, (long)(type), (long)(buf), \
(long)(len))
-#define __sanitizer_syscall_pre_uselib(library) \
+#define __sanitizer_syscall_pre_uselib(library) \
__sanitizer_syscall_pre_impl_uselib((long)(library))
-#define __sanitizer_syscall_post_uselib(res, library) \
+#define __sanitizer_syscall_post_uselib(res, library) \
__sanitizer_syscall_post_impl_uselib(res, (long)(library))
-#define __sanitizer_syscall_pre_ni_syscall() \
+#define __sanitizer_syscall_pre_ni_syscall() \
__sanitizer_syscall_pre_impl_ni_syscall()
-#define __sanitizer_syscall_post_ni_syscall(res) \
+#define __sanitizer_syscall_post_ni_syscall(res) \
__sanitizer_syscall_post_impl_ni_syscall(res)
-#define __sanitizer_syscall_pre_ptrace(request, pid, addr, data) \
- __sanitizer_syscall_pre_impl_ptrace((long)(request), (long)(pid), \
+#define __sanitizer_syscall_pre_ptrace(request, pid, addr, data) \
+ __sanitizer_syscall_pre_impl_ptrace((long)(request), (long)(pid), \
(long)(addr), (long)(data))
-#define __sanitizer_syscall_post_ptrace(res, request, pid, addr, data) \
- __sanitizer_syscall_post_impl_ptrace(res, (long)(request), (long)(pid), \
+#define __sanitizer_syscall_post_ptrace(res, request, pid, addr, data) \
+ __sanitizer_syscall_post_impl_ptrace(res, (long)(request), (long)(pid), \
(long)(addr), (long)(data))
-#define __sanitizer_syscall_pre_add_key(_type, _description, _payload, plen, \
- destringid) \
- __sanitizer_syscall_pre_impl_add_key((long)(_type), (long)(_description), \
- (long)(_payload), (long)(plen), \
+#define __sanitizer_syscall_pre_add_key(_type, _description, _payload, plen, \
+ destringid) \
+ __sanitizer_syscall_pre_impl_add_key((long)(_type), (long)(_description), \
+ (long)(_payload), (long)(plen), \
(long)(destringid))
-#define __sanitizer_syscall_post_add_key(res, _type, _description, _payload, \
- plen, destringid) \
- __sanitizer_syscall_post_impl_add_key( \
- res, (long)(_type), (long)(_description), (long)(_payload), \
+#define __sanitizer_syscall_post_add_key(res, _type, _description, _payload, \
+ plen, destringid) \
+ __sanitizer_syscall_post_impl_add_key( \
+ res, (long)(_type), (long)(_description), (long)(_payload), \
(long)(plen), (long)(destringid))
-#define __sanitizer_syscall_pre_request_key(_type, _description, \
- _callout_info, destringid) \
- __sanitizer_syscall_pre_impl_request_key( \
- (long)(_type), (long)(_description), (long)(_callout_info), \
+#define __sanitizer_syscall_pre_request_key(_type, _description, \
+ _callout_info, destringid) \
+ __sanitizer_syscall_pre_impl_request_key( \
+ (long)(_type), (long)(_description), (long)(_callout_info), \
(long)(destringid))
-#define __sanitizer_syscall_post_request_key(res, _type, _description, \
- _callout_info, destringid) \
- __sanitizer_syscall_post_impl_request_key( \
- res, (long)(_type), (long)(_description), (long)(_callout_info), \
+#define __sanitizer_syscall_post_request_key(res, _type, _description, \
+ _callout_info, destringid) \
+ __sanitizer_syscall_post_impl_request_key( \
+ res, (long)(_type), (long)(_description), (long)(_callout_info), \
(long)(destringid))
#define __sanitizer_syscall_pre_keyctl(cmd, arg2, arg3, arg4, arg5) \
__sanitizer_syscall_pre_impl_keyctl((long)(cmd), (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
-#define __sanitizer_syscall_post_keyctl(res, cmd, arg2, arg3, arg4, arg5) \
- __sanitizer_syscall_post_impl_keyctl(res, (long)(cmd), (long)(arg2), \
- (long)(arg3), (long)(arg4), \
+#define __sanitizer_syscall_post_keyctl(res, cmd, arg2, arg3, arg4, arg5) \
+ __sanitizer_syscall_post_impl_keyctl(res, (long)(cmd), (long)(arg2), \
+ (long)(arg3), (long)(arg4), \
(long)(arg5))
-#define __sanitizer_syscall_pre_ioprio_set(which, who, ioprio) \
- __sanitizer_syscall_pre_impl_ioprio_set((long)(which), (long)(who), \
+#define __sanitizer_syscall_pre_ioprio_set(which, who, ioprio) \
+ __sanitizer_syscall_pre_impl_ioprio_set((long)(which), (long)(who), \
(long)(ioprio))
-#define __sanitizer_syscall_post_ioprio_set(res, which, who, ioprio) \
- __sanitizer_syscall_post_impl_ioprio_set(res, (long)(which), (long)(who), \
+#define __sanitizer_syscall_post_ioprio_set(res, which, who, ioprio) \
+ __sanitizer_syscall_post_impl_ioprio_set(res, (long)(which), (long)(who), \
(long)(ioprio))
-#define __sanitizer_syscall_pre_ioprio_get(which, who) \
+#define __sanitizer_syscall_pre_ioprio_get(which, who) \
__sanitizer_syscall_pre_impl_ioprio_get((long)(which), (long)(who))
-#define __sanitizer_syscall_post_ioprio_get(res, which, who) \
+#define __sanitizer_syscall_post_ioprio_get(res, which, who) \
__sanitizer_syscall_post_impl_ioprio_get(res, (long)(which), (long)(who))
-#define __sanitizer_syscall_pre_set_mempolicy(mode, nmask, maxnode) \
- __sanitizer_syscall_pre_impl_set_mempolicy((long)(mode), (long)(nmask), \
+#define __sanitizer_syscall_pre_set_mempolicy(mode, nmask, maxnode) \
+ __sanitizer_syscall_pre_impl_set_mempolicy((long)(mode), (long)(nmask), \
(long)(maxnode))
-#define __sanitizer_syscall_post_set_mempolicy(res, mode, nmask, maxnode) \
- __sanitizer_syscall_post_impl_set_mempolicy(res, (long)(mode), \
+#define __sanitizer_syscall_post_set_mempolicy(res, mode, nmask, maxnode) \
+ __sanitizer_syscall_post_impl_set_mempolicy(res, (long)(mode), \
(long)(nmask), (long)(maxnode))
-#define __sanitizer_syscall_pre_migrate_pages(pid, maxnode, from, to) \
- __sanitizer_syscall_pre_impl_migrate_pages((long)(pid), (long)(maxnode), \
+#define __sanitizer_syscall_pre_migrate_pages(pid, maxnode, from, to) \
+ __sanitizer_syscall_pre_impl_migrate_pages((long)(pid), (long)(maxnode), \
(long)(from), (long)(to))
-#define __sanitizer_syscall_post_migrate_pages(res, pid, maxnode, from, to) \
- __sanitizer_syscall_post_impl_migrate_pages( \
+#define __sanitizer_syscall_post_migrate_pages(res, pid, maxnode, from, to) \
+ __sanitizer_syscall_post_impl_migrate_pages( \
res, (long)(pid), (long)(maxnode), (long)(from), (long)(to))
-#define __sanitizer_syscall_pre_move_pages(pid, nr_pages, pages, nodes, \
- status, flags) \
- __sanitizer_syscall_pre_impl_move_pages((long)(pid), (long)(nr_pages), \
- (long)(pages), (long)(nodes), \
+#define __sanitizer_syscall_pre_move_pages(pid, nr_pages, pages, nodes, \
+ status, flags) \
+ __sanitizer_syscall_pre_impl_move_pages((long)(pid), (long)(nr_pages), \
+ (long)(pages), (long)(nodes), \
(long)(status), (long)(flags))
#define __sanitizer_syscall_post_move_pages(res, pid, nr_pages, pages, nodes, \
status, flags) \
@@ -1517,322 +1526,320 @@
__sanitizer_syscall_pre_impl_mbind((long)(start), (long)(len), (long)(mode), \
(long)(nmask), (long)(maxnode), \
(long)(flags))
-#define __sanitizer_syscall_post_mbind(res, start, len, mode, nmask, maxnode, \
- flags) \
- __sanitizer_syscall_post_impl_mbind(res, (long)(start), (long)(len), \
- (long)(mode), (long)(nmask), \
+#define __sanitizer_syscall_post_mbind(res, start, len, mode, nmask, maxnode, \
+ flags) \
+ __sanitizer_syscall_post_impl_mbind(res, (long)(start), (long)(len), \
+ (long)(mode), (long)(nmask), \
(long)(maxnode), (long)(flags))
-#define __sanitizer_syscall_pre_get_mempolicy(policy, nmask, maxnode, addr, \
- flags) \
- __sanitizer_syscall_pre_impl_get_mempolicy((long)(policy), (long)(nmask), \
- (long)(maxnode), (long)(addr), \
+#define __sanitizer_syscall_pre_get_mempolicy(policy, nmask, maxnode, addr, \
+ flags) \
+ __sanitizer_syscall_pre_impl_get_mempolicy((long)(policy), (long)(nmask), \
+ (long)(maxnode), (long)(addr), \
(long)(flags))
-#define __sanitizer_syscall_post_get_mempolicy(res, policy, nmask, maxnode, \
- addr, flags) \
- __sanitizer_syscall_post_impl_get_mempolicy(res, (long)(policy), \
- (long)(nmask), (long)(maxnode), \
+#define __sanitizer_syscall_post_get_mempolicy(res, policy, nmask, maxnode, \
+ addr, flags) \
+ __sanitizer_syscall_post_impl_get_mempolicy(res, (long)(policy), \
+ (long)(nmask), (long)(maxnode), \
(long)(addr), (long)(flags))
-#define __sanitizer_syscall_pre_inotify_init() \
+#define __sanitizer_syscall_pre_inotify_init() \
__sanitizer_syscall_pre_impl_inotify_init()
-#define __sanitizer_syscall_post_inotify_init(res) \
+#define __sanitizer_syscall_post_inotify_init(res) \
__sanitizer_syscall_post_impl_inotify_init(res)
-#define __sanitizer_syscall_pre_inotify_init1(flags) \
+#define __sanitizer_syscall_pre_inotify_init1(flags) \
__sanitizer_syscall_pre_impl_inotify_init1((long)(flags))
-#define __sanitizer_syscall_post_inotify_init1(res, flags) \
+#define __sanitizer_syscall_post_inotify_init1(res, flags) \
__sanitizer_syscall_post_impl_inotify_init1(res, (long)(flags))
-#define __sanitizer_syscall_pre_inotify_add_watch(fd, path, mask) \
- __sanitizer_syscall_pre_impl_inotify_add_watch((long)(fd), (long)(path), \
+#define __sanitizer_syscall_pre_inotify_add_watch(fd, path, mask) \
+ __sanitizer_syscall_pre_impl_inotify_add_watch((long)(fd), (long)(path), \
(long)(mask))
-#define __sanitizer_syscall_post_inotify_add_watch(res, fd, path, mask) \
- __sanitizer_syscall_post_impl_inotify_add_watch(res, (long)(fd), \
+#define __sanitizer_syscall_post_inotify_add_watch(res, fd, path, mask) \
+ __sanitizer_syscall_post_impl_inotify_add_watch(res, (long)(fd), \
(long)(path), (long)(mask))
-#define __sanitizer_syscall_pre_inotify_rm_watch(fd, wd) \
+#define __sanitizer_syscall_pre_inotify_rm_watch(fd, wd) \
__sanitizer_syscall_pre_impl_inotify_rm_watch((long)(fd), (long)(wd))
-#define __sanitizer_syscall_post_inotify_rm_watch(res, fd, wd) \
+#define __sanitizer_syscall_post_inotify_rm_watch(res, fd, wd) \
__sanitizer_syscall_post_impl_inotify_rm_watch(res, (long)(fd), (long)(wd))
-#define __sanitizer_syscall_pre_spu_run(fd, unpc, ustatus) \
- __sanitizer_syscall_pre_impl_spu_run((long)(fd), (long)(unpc), \
+#define __sanitizer_syscall_pre_spu_run(fd, unpc, ustatus) \
+ __sanitizer_syscall_pre_impl_spu_run((long)(fd), (long)(unpc), \
(long)(ustatus))
-#define __sanitizer_syscall_post_spu_run(res, fd, unpc, ustatus) \
- __sanitizer_syscall_post_impl_spu_run(res, (long)(fd), (long)(unpc), \
+#define __sanitizer_syscall_post_spu_run(res, fd, unpc, ustatus) \
+ __sanitizer_syscall_post_impl_spu_run(res, (long)(fd), (long)(unpc), \
(long)(ustatus))
-#define __sanitizer_syscall_pre_spu_create(name, flags, mode, fd) \
- __sanitizer_syscall_pre_impl_spu_create((long)(name), (long)(flags), \
+#define __sanitizer_syscall_pre_spu_create(name, flags, mode, fd) \
+ __sanitizer_syscall_pre_impl_spu_create((long)(name), (long)(flags), \
(long)(mode), (long)(fd))
-#define __sanitizer_syscall_post_spu_create(res, name, flags, mode, fd) \
- __sanitizer_syscall_post_impl_spu_create(res, (long)(name), (long)(flags), \
+#define __sanitizer_syscall_post_spu_create(res, name, flags, mode, fd) \
+ __sanitizer_syscall_post_impl_spu_create(res, (long)(name), (long)(flags), \
(long)(mode), (long)(fd))
-#define __sanitizer_syscall_pre_mknodat(dfd, filename, mode, dev) \
- __sanitizer_syscall_pre_impl_mknodat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_mknodat(dfd, filename, mode, dev) \
+ __sanitizer_syscall_pre_impl_mknodat((long)(dfd), (long)(filename), \
(long)(mode), (long)(dev))
-#define __sanitizer_syscall_post_mknodat(res, dfd, filename, mode, dev) \
- __sanitizer_syscall_post_impl_mknodat(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_mknodat(res, dfd, filename, mode, dev) \
+ __sanitizer_syscall_post_impl_mknodat(res, (long)(dfd), (long)(filename), \
(long)(mode), (long)(dev))
-#define __sanitizer_syscall_pre_mkdirat(dfd, pathname, mode) \
- __sanitizer_syscall_pre_impl_mkdirat((long)(dfd), (long)(pathname), \
+#define __sanitizer_syscall_pre_mkdirat(dfd, pathname, mode) \
+ __sanitizer_syscall_pre_impl_mkdirat((long)(dfd), (long)(pathname), \
(long)(mode))
-#define __sanitizer_syscall_post_mkdirat(res, dfd, pathname, mode) \
- __sanitizer_syscall_post_impl_mkdirat(res, (long)(dfd), (long)(pathname), \
+#define __sanitizer_syscall_post_mkdirat(res, dfd, pathname, mode) \
+ __sanitizer_syscall_post_impl_mkdirat(res, (long)(dfd), (long)(pathname), \
(long)(mode))
-#define __sanitizer_syscall_pre_unlinkat(dfd, pathname, flag) \
- __sanitizer_syscall_pre_impl_unlinkat((long)(dfd), (long)(pathname), \
+#define __sanitizer_syscall_pre_unlinkat(dfd, pathname, flag) \
+ __sanitizer_syscall_pre_impl_unlinkat((long)(dfd), (long)(pathname), \
(long)(flag))
-#define __sanitizer_syscall_post_unlinkat(res, dfd, pathname, flag) \
- __sanitizer_syscall_post_impl_unlinkat(res, (long)(dfd), (long)(pathname), \
+#define __sanitizer_syscall_post_unlinkat(res, dfd, pathname, flag) \
+ __sanitizer_syscall_post_impl_unlinkat(res, (long)(dfd), (long)(pathname), \
(long)(flag))
-#define __sanitizer_syscall_pre_symlinkat(oldname, newdfd, newname) \
- __sanitizer_syscall_pre_impl_symlinkat((long)(oldname), (long)(newdfd), \
+#define __sanitizer_syscall_pre_symlinkat(oldname, newdfd, newname) \
+ __sanitizer_syscall_pre_impl_symlinkat((long)(oldname), (long)(newdfd), \
(long)(newname))
-#define __sanitizer_syscall_post_symlinkat(res, oldname, newdfd, newname) \
- __sanitizer_syscall_post_impl_symlinkat(res, (long)(oldname), \
+#define __sanitizer_syscall_post_symlinkat(res, oldname, newdfd, newname) \
+ __sanitizer_syscall_post_impl_symlinkat(res, (long)(oldname), \
(long)(newdfd), (long)(newname))
-#define __sanitizer_syscall_pre_linkat(olddfd, oldname, newdfd, newname, \
- flags) \
- __sanitizer_syscall_pre_impl_linkat((long)(olddfd), (long)(oldname), \
- (long)(newdfd), (long)(newname), \
+#define __sanitizer_syscall_pre_linkat(olddfd, oldname, newdfd, newname, \
+ flags) \
+ __sanitizer_syscall_pre_impl_linkat((long)(olddfd), (long)(oldname), \
+ (long)(newdfd), (long)(newname), \
(long)(flags))
#define __sanitizer_syscall_post_linkat(res, olddfd, oldname, newdfd, newname, \
flags) \
__sanitizer_syscall_post_impl_linkat(res, (long)(olddfd), (long)(oldname), \
(long)(newdfd), (long)(newname), \
(long)(flags))
-#define __sanitizer_syscall_pre_renameat(olddfd, oldname, newdfd, newname) \
- __sanitizer_syscall_pre_impl_renameat((long)(olddfd), (long)(oldname), \
+#define __sanitizer_syscall_pre_renameat(olddfd, oldname, newdfd, newname) \
+ __sanitizer_syscall_pre_impl_renameat((long)(olddfd), (long)(oldname), \
(long)(newdfd), (long)(newname))
#define __sanitizer_syscall_post_renameat(res, olddfd, oldname, newdfd, \
newname) \
__sanitizer_syscall_post_impl_renameat(res, (long)(olddfd), (long)(oldname), \
(long)(newdfd), (long)(newname))
-#define __sanitizer_syscall_pre_futimesat(dfd, filename, utimes) \
- __sanitizer_syscall_pre_impl_futimesat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_futimesat(dfd, filename, utimes) \
+ __sanitizer_syscall_pre_impl_futimesat((long)(dfd), (long)(filename), \
(long)(utimes))
-#define __sanitizer_syscall_post_futimesat(res, dfd, filename, utimes) \
- __sanitizer_syscall_post_impl_futimesat(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_futimesat(res, dfd, filename, utimes) \
+ __sanitizer_syscall_post_impl_futimesat(res, (long)(dfd), (long)(filename), \
(long)(utimes))
-#define __sanitizer_syscall_pre_faccessat(dfd, filename, mode) \
- __sanitizer_syscall_pre_impl_faccessat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_faccessat(dfd, filename, mode) \
+ __sanitizer_syscall_pre_impl_faccessat((long)(dfd), (long)(filename), \
(long)(mode))
-#define __sanitizer_syscall_post_faccessat(res, dfd, filename, mode) \
- __sanitizer_syscall_post_impl_faccessat(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_faccessat(res, dfd, filename, mode) \
+ __sanitizer_syscall_post_impl_faccessat(res, (long)(dfd), (long)(filename), \
(long)(mode))
-#define __sanitizer_syscall_pre_fchmodat(dfd, filename, mode) \
- __sanitizer_syscall_pre_impl_fchmodat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_fchmodat(dfd, filename, mode) \
+ __sanitizer_syscall_pre_impl_fchmodat((long)(dfd), (long)(filename), \
(long)(mode))
-#define __sanitizer_syscall_post_fchmodat(res, dfd, filename, mode) \
- __sanitizer_syscall_post_impl_fchmodat(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_fchmodat(res, dfd, filename, mode) \
+ __sanitizer_syscall_post_impl_fchmodat(res, (long)(dfd), (long)(filename), \
(long)(mode))
-#define __sanitizer_syscall_pre_fchownat(dfd, filename, user, group, flag) \
- __sanitizer_syscall_pre_impl_fchownat((long)(dfd), (long)(filename), \
- (long)(user), (long)(group), \
+#define __sanitizer_syscall_pre_fchownat(dfd, filename, user, group, flag) \
+ __sanitizer_syscall_pre_impl_fchownat((long)(dfd), (long)(filename), \
+ (long)(user), (long)(group), \
(long)(flag))
-#define __sanitizer_syscall_post_fchownat(res, dfd, filename, user, group, \
- flag) \
- __sanitizer_syscall_post_impl_fchownat(res, (long)(dfd), (long)(filename), \
- (long)(user), (long)(group), \
+#define __sanitizer_syscall_post_fchownat(res, dfd, filename, user, group, \
+ flag) \
+ __sanitizer_syscall_post_impl_fchownat(res, (long)(dfd), (long)(filename), \
+ (long)(user), (long)(group), \
(long)(flag))
-#define __sanitizer_syscall_pre_openat(dfd, filename, flags, mode) \
- __sanitizer_syscall_pre_impl_openat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_openat(dfd, filename, flags, mode) \
+ __sanitizer_syscall_pre_impl_openat((long)(dfd), (long)(filename), \
(long)(flags), (long)(mode))
-#define __sanitizer_syscall_post_openat(res, dfd, filename, flags, mode) \
- __sanitizer_syscall_post_impl_openat(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_openat(res, dfd, filename, flags, mode) \
+ __sanitizer_syscall_post_impl_openat(res, (long)(dfd), (long)(filename), \
(long)(flags), (long)(mode))
-#define __sanitizer_syscall_pre_newfstatat(dfd, filename, statbuf, flag) \
- __sanitizer_syscall_pre_impl_newfstatat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_newfstatat(dfd, filename, statbuf, flag) \
+ __sanitizer_syscall_pre_impl_newfstatat((long)(dfd), (long)(filename), \
(long)(statbuf), (long)(flag))
#define __sanitizer_syscall_post_newfstatat(res, dfd, filename, statbuf, flag) \
__sanitizer_syscall_post_impl_newfstatat(res, (long)(dfd), (long)(filename), \
(long)(statbuf), (long)(flag))
-#define __sanitizer_syscall_pre_fstatat64(dfd, filename, statbuf, flag) \
- __sanitizer_syscall_pre_impl_fstatat64((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_fstatat64(dfd, filename, statbuf, flag) \
+ __sanitizer_syscall_pre_impl_fstatat64((long)(dfd), (long)(filename), \
(long)(statbuf), (long)(flag))
-#define __sanitizer_syscall_post_fstatat64(res, dfd, filename, statbuf, flag) \
- __sanitizer_syscall_post_impl_fstatat64(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_fstatat64(res, dfd, filename, statbuf, flag) \
+ __sanitizer_syscall_post_impl_fstatat64(res, (long)(dfd), (long)(filename), \
(long)(statbuf), (long)(flag))
-#define __sanitizer_syscall_pre_readlinkat(dfd, path, buf, bufsiz) \
- __sanitizer_syscall_pre_impl_readlinkat((long)(dfd), (long)(path), \
+#define __sanitizer_syscall_pre_readlinkat(dfd, path, buf, bufsiz) \
+ __sanitizer_syscall_pre_impl_readlinkat((long)(dfd), (long)(path), \
(long)(buf), (long)(bufsiz))
-#define __sanitizer_syscall_post_readlinkat(res, dfd, path, buf, bufsiz) \
- __sanitizer_syscall_post_impl_readlinkat(res, (long)(dfd), (long)(path), \
+#define __sanitizer_syscall_post_readlinkat(res, dfd, path, buf, bufsiz) \
+ __sanitizer_syscall_post_impl_readlinkat(res, (long)(dfd), (long)(path), \
(long)(buf), (long)(bufsiz))
-#define __sanitizer_syscall_pre_utimensat(dfd, filename, utimes, flags) \
- __sanitizer_syscall_pre_impl_utimensat((long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_pre_utimensat(dfd, filename, utimes, flags) \
+ __sanitizer_syscall_pre_impl_utimensat((long)(dfd), (long)(filename), \
(long)(utimes), (long)(flags))
-#define __sanitizer_syscall_post_utimensat(res, dfd, filename, utimes, flags) \
- __sanitizer_syscall_post_impl_utimensat(res, (long)(dfd), (long)(filename), \
+#define __sanitizer_syscall_post_utimensat(res, dfd, filename, utimes, flags) \
+ __sanitizer_syscall_post_impl_utimensat(res, (long)(dfd), (long)(filename), \
(long)(utimes), (long)(flags))
-#define __sanitizer_syscall_pre_unshare(unshare_flags) \
+#define __sanitizer_syscall_pre_unshare(unshare_flags) \
__sanitizer_syscall_pre_impl_unshare((long)(unshare_flags))
-#define __sanitizer_syscall_post_unshare(res, unshare_flags) \
+#define __sanitizer_syscall_post_unshare(res, unshare_flags) \
__sanitizer_syscall_post_impl_unshare(res, (long)(unshare_flags))
-#define __sanitizer_syscall_pre_splice(fd_in, off_in, fd_out, off_out, len, \
- flags) \
- __sanitizer_syscall_pre_impl_splice((long)(fd_in), (long)(off_in), \
- (long)(fd_out), (long)(off_out), \
+#define __sanitizer_syscall_pre_splice(fd_in, off_in, fd_out, off_out, len, \
+ flags) \
+ __sanitizer_syscall_pre_impl_splice((long)(fd_in), (long)(off_in), \
+ (long)(fd_out), (long)(off_out), \
(long)(len), (long)(flags))
-#define __sanitizer_syscall_post_splice(res, fd_in, off_in, fd_out, off_out, \
- len, flags) \
- __sanitizer_syscall_post_impl_splice(res, (long)(fd_in), (long)(off_in), \
- (long)(fd_out), (long)(off_out), \
+#define __sanitizer_syscall_post_splice(res, fd_in, off_in, fd_out, off_out, \
+ len, flags) \
+ __sanitizer_syscall_post_impl_splice(res, (long)(fd_in), (long)(off_in), \
+ (long)(fd_out), (long)(off_out), \
(long)(len), (long)(flags))
-#define __sanitizer_syscall_pre_vmsplice(fd, iov, nr_segs, flags) \
- __sanitizer_syscall_pre_impl_vmsplice((long)(fd), (long)(iov), \
+#define __sanitizer_syscall_pre_vmsplice(fd, iov, nr_segs, flags) \
+ __sanitizer_syscall_pre_impl_vmsplice((long)(fd), (long)(iov), \
(long)(nr_segs), (long)(flags))
-#define __sanitizer_syscall_post_vmsplice(res, fd, iov, nr_segs, flags) \
- __sanitizer_syscall_post_impl_vmsplice(res, (long)(fd), (long)(iov), \
+#define __sanitizer_syscall_post_vmsplice(res, fd, iov, nr_segs, flags) \
+ __sanitizer_syscall_post_impl_vmsplice(res, (long)(fd), (long)(iov), \
(long)(nr_segs), (long)(flags))
-#define __sanitizer_syscall_pre_tee(fdin, fdout, len, flags) \
- __sanitizer_syscall_pre_impl_tee((long)(fdin), (long)(fdout), (long)(len), \
+#define __sanitizer_syscall_pre_tee(fdin, fdout, len, flags) \
+ __sanitizer_syscall_pre_impl_tee((long)(fdin), (long)(fdout), (long)(len), \
(long)(flags))
-#define __sanitizer_syscall_post_tee(res, fdin, fdout, len, flags) \
- __sanitizer_syscall_post_impl_tee(res, (long)(fdin), (long)(fdout), \
+#define __sanitizer_syscall_post_tee(res, fdin, fdout, len, flags) \
+ __sanitizer_syscall_post_impl_tee(res, (long)(fdin), (long)(fdout), \
(long)(len), (long)(flags))
-#define __sanitizer_syscall_pre_get_robust_list(pid, head_ptr, len_ptr) \
- __sanitizer_syscall_pre_impl_get_robust_list((long)(pid), (long)(head_ptr), \
+#define __sanitizer_syscall_pre_get_robust_list(pid, head_ptr, len_ptr) \
+ __sanitizer_syscall_pre_impl_get_robust_list((long)(pid), (long)(head_ptr), \
(long)(len_ptr))
-#define __sanitizer_syscall_post_get_robust_list(res, pid, head_ptr, len_ptr) \
- __sanitizer_syscall_post_impl_get_robust_list( \
+#define __sanitizer_syscall_post_get_robust_list(res, pid, head_ptr, len_ptr) \
+ __sanitizer_syscall_post_impl_get_robust_list( \
res, (long)(pid), (long)(head_ptr), (long)(len_ptr))
-#define __sanitizer_syscall_pre_set_robust_list(head, len) \
+#define __sanitizer_syscall_pre_set_robust_list(head, len) \
__sanitizer_syscall_pre_impl_set_robust_list((long)(head), (long)(len))
-#define __sanitizer_syscall_post_set_robust_list(res, head, len) \
+#define __sanitizer_syscall_post_set_robust_list(res, head, len) \
__sanitizer_syscall_post_impl_set_robust_list(res, (long)(head), (long)(len))
-#define __sanitizer_syscall_pre_getcpu(cpu, node, cache) \
+#define __sanitizer_syscall_pre_getcpu(cpu, node, cache) \
__sanitizer_syscall_pre_impl_getcpu((long)(cpu), (long)(node), (long)(cache))
-#define __sanitizer_syscall_post_getcpu(res, cpu, node, cache) \
- __sanitizer_syscall_post_impl_getcpu(res, (long)(cpu), (long)(node), \
+#define __sanitizer_syscall_post_getcpu(res, cpu, node, cache) \
+ __sanitizer_syscall_post_impl_getcpu(res, (long)(cpu), (long)(node), \
(long)(cache))
-#define __sanitizer_syscall_pre_signalfd(ufd, user_mask, sizemask) \
- __sanitizer_syscall_pre_impl_signalfd((long)(ufd), (long)(user_mask), \
+#define __sanitizer_syscall_pre_signalfd(ufd, user_mask, sizemask) \
+ __sanitizer_syscall_pre_impl_signalfd((long)(ufd), (long)(user_mask), \
(long)(sizemask))
-#define __sanitizer_syscall_post_signalfd(res, ufd, user_mask, sizemask) \
- __sanitizer_syscall_post_impl_signalfd(res, (long)(ufd), (long)(user_mask), \
+#define __sanitizer_syscall_post_signalfd(res, ufd, user_mask, sizemask) \
+ __sanitizer_syscall_post_impl_signalfd(res, (long)(ufd), (long)(user_mask), \
(long)(sizemask))
-#define __sanitizer_syscall_pre_signalfd4(ufd, user_mask, sizemask, flags) \
- __sanitizer_syscall_pre_impl_signalfd4((long)(ufd), (long)(user_mask), \
+#define __sanitizer_syscall_pre_signalfd4(ufd, user_mask, sizemask, flags) \
+ __sanitizer_syscall_pre_impl_signalfd4((long)(ufd), (long)(user_mask), \
(long)(sizemask), (long)(flags))
#define __sanitizer_syscall_post_signalfd4(res, ufd, user_mask, sizemask, \
flags) \
__sanitizer_syscall_post_impl_signalfd4(res, (long)(ufd), (long)(user_mask), \
(long)(sizemask), (long)(flags))
-#define __sanitizer_syscall_pre_timerfd_create(clockid, flags) \
+#define __sanitizer_syscall_pre_timerfd_create(clockid, flags) \
__sanitizer_syscall_pre_impl_timerfd_create((long)(clockid), (long)(flags))
-#define __sanitizer_syscall_post_timerfd_create(res, clockid, flags) \
- __sanitizer_syscall_post_impl_timerfd_create(res, (long)(clockid), \
+#define __sanitizer_syscall_post_timerfd_create(res, clockid, flags) \
+ __sanitizer_syscall_post_impl_timerfd_create(res, (long)(clockid), \
(long)(flags))
-#define __sanitizer_syscall_pre_timerfd_settime(ufd, flags, utmr, otmr) \
- __sanitizer_syscall_pre_impl_timerfd_settime((long)(ufd), (long)(flags), \
+#define __sanitizer_syscall_pre_timerfd_settime(ufd, flags, utmr, otmr) \
+ __sanitizer_syscall_pre_impl_timerfd_settime((long)(ufd), (long)(flags), \
(long)(utmr), (long)(otmr))
-#define __sanitizer_syscall_post_timerfd_settime(res, ufd, flags, utmr, otmr) \
- __sanitizer_syscall_post_impl_timerfd_settime( \
+#define __sanitizer_syscall_post_timerfd_settime(res, ufd, flags, utmr, otmr) \
+ __sanitizer_syscall_post_impl_timerfd_settime( \
res, (long)(ufd), (long)(flags), (long)(utmr), (long)(otmr))
-#define __sanitizer_syscall_pre_timerfd_gettime(ufd, otmr) \
+#define __sanitizer_syscall_pre_timerfd_gettime(ufd, otmr) \
__sanitizer_syscall_pre_impl_timerfd_gettime((long)(ufd), (long)(otmr))
-#define __sanitizer_syscall_post_timerfd_gettime(res, ufd, otmr) \
+#define __sanitizer_syscall_post_timerfd_gettime(res, ufd, otmr) \
__sanitizer_syscall_post_impl_timerfd_gettime(res, (long)(ufd), (long)(otmr))
-#define __sanitizer_syscall_pre_eventfd(count) \
+#define __sanitizer_syscall_pre_eventfd(count) \
__sanitizer_syscall_pre_impl_eventfd((long)(count))
-#define __sanitizer_syscall_post_eventfd(res, count) \
+#define __sanitizer_syscall_post_eventfd(res, count) \
__sanitizer_syscall_post_impl_eventfd(res, (long)(count))
-#define __sanitizer_syscall_pre_eventfd2(count, flags) \
+#define __sanitizer_syscall_pre_eventfd2(count, flags) \
__sanitizer_syscall_pre_impl_eventfd2((long)(count), (long)(flags))
-#define __sanitizer_syscall_post_eventfd2(res, count, flags) \
+#define __sanitizer_syscall_post_eventfd2(res, count, flags) \
__sanitizer_syscall_post_impl_eventfd2(res, (long)(count), (long)(flags))
-#define __sanitizer_syscall_pre_old_readdir(arg0, arg1, arg2) \
- __sanitizer_syscall_pre_impl_old_readdir((long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_pre_old_readdir(arg0, arg1, arg2) \
+ __sanitizer_syscall_pre_impl_old_readdir((long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_post_old_readdir(res, arg0, arg1, arg2) \
- __sanitizer_syscall_post_impl_old_readdir(res, (long)(arg0), (long)(arg1), \
+#define __sanitizer_syscall_post_old_readdir(res, arg0, arg1, arg2) \
+ __sanitizer_syscall_post_impl_old_readdir(res, (long)(arg0), (long)(arg1), \
(long)(arg2))
-#define __sanitizer_syscall_pre_pselect6(arg0, arg1, arg2, arg3, arg4, arg5) \
- __sanitizer_syscall_pre_impl_pselect6((long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_pre_pselect6(arg0, arg1, arg2, arg3, arg4, arg5) \
+ __sanitizer_syscall_pre_impl_pselect6((long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
-#define __sanitizer_syscall_post_pselect6(res, arg0, arg1, arg2, arg3, arg4, \
- arg5) \
- __sanitizer_syscall_post_impl_pselect6(res, (long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_post_pselect6(res, arg0, arg1, arg2, arg3, arg4, \
+ arg5) \
+ __sanitizer_syscall_post_impl_pselect6(res, (long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4), (long)(arg5))
#define __sanitizer_syscall_pre_ppoll(arg0, arg1, arg2, arg3, arg4) \
__sanitizer_syscall_pre_impl_ppoll((long)(arg0), (long)(arg1), (long)(arg2), \
(long)(arg3), (long)(arg4))
-#define __sanitizer_syscall_post_ppoll(res, arg0, arg1, arg2, arg3, arg4) \
- __sanitizer_syscall_post_impl_ppoll(res, (long)(arg0), (long)(arg1), \
- (long)(arg2), (long)(arg3), \
+#define __sanitizer_syscall_post_ppoll(res, arg0, arg1, arg2, arg3, arg4) \
+ __sanitizer_syscall_post_impl_ppoll(res, (long)(arg0), (long)(arg1), \
+ (long)(arg2), (long)(arg3), \
(long)(arg4))
-#define __sanitizer_syscall_pre_syncfs(fd) \
+#define __sanitizer_syscall_pre_syncfs(fd) \
__sanitizer_syscall_pre_impl_syncfs((long)(fd))
-#define __sanitizer_syscall_post_syncfs(res, fd) \
+#define __sanitizer_syscall_post_syncfs(res, fd) \
__sanitizer_syscall_post_impl_syncfs(res, (long)(fd))
#define __sanitizer_syscall_pre_perf_event_open(attr_uptr, pid, cpu, group_fd, \
flags) \
__sanitizer_syscall_pre_impl_perf_event_open((long)(attr_uptr), (long)(pid), \
(long)(cpu), (long)(group_fd), \
(long)(flags))
-#define __sanitizer_syscall_post_perf_event_open(res, attr_uptr, pid, cpu, \
- group_fd, flags) \
- __sanitizer_syscall_post_impl_perf_event_open( \
- res, (long)(attr_uptr), (long)(pid), (long)(cpu), (long)(group_fd), \
+#define __sanitizer_syscall_post_perf_event_open(res, attr_uptr, pid, cpu, \
+ group_fd, flags) \
+ __sanitizer_syscall_post_impl_perf_event_open( \
+ res, (long)(attr_uptr), (long)(pid), (long)(cpu), (long)(group_fd), \
(long)(flags))
-#define __sanitizer_syscall_pre_mmap_pgoff(addr, len, prot, flags, fd, pgoff) \
- __sanitizer_syscall_pre_impl_mmap_pgoff((long)(addr), (long)(len), \
- (long)(prot), (long)(flags), \
+#define __sanitizer_syscall_pre_mmap_pgoff(addr, len, prot, flags, fd, pgoff) \
+ __sanitizer_syscall_pre_impl_mmap_pgoff((long)(addr), (long)(len), \
+ (long)(prot), (long)(flags), \
(long)(fd), (long)(pgoff))
-#define __sanitizer_syscall_post_mmap_pgoff(res, addr, len, prot, flags, fd, \
- pgoff) \
- __sanitizer_syscall_post_impl_mmap_pgoff(res, (long)(addr), (long)(len), \
- (long)(prot), (long)(flags), \
+#define __sanitizer_syscall_post_mmap_pgoff(res, addr, len, prot, flags, fd, \
+ pgoff) \
+ __sanitizer_syscall_post_impl_mmap_pgoff(res, (long)(addr), (long)(len), \
+ (long)(prot), (long)(flags), \
(long)(fd), (long)(pgoff))
-#define __sanitizer_syscall_pre_old_mmap(arg) \
+#define __sanitizer_syscall_pre_old_mmap(arg) \
__sanitizer_syscall_pre_impl_old_mmap((long)(arg))
-#define __sanitizer_syscall_post_old_mmap(res, arg) \
+#define __sanitizer_syscall_post_old_mmap(res, arg) \
__sanitizer_syscall_post_impl_old_mmap(res, (long)(arg))
-#define __sanitizer_syscall_pre_name_to_handle_at(dfd, name, handle, mnt_id, \
- flag) \
- __sanitizer_syscall_pre_impl_name_to_handle_at( \
+#define __sanitizer_syscall_pre_name_to_handle_at(dfd, name, handle, mnt_id, \
+ flag) \
+ __sanitizer_syscall_pre_impl_name_to_handle_at( \
(long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), (long)(flag))
-#define __sanitizer_syscall_post_name_to_handle_at(res, dfd, name, handle, \
- mnt_id, flag) \
- __sanitizer_syscall_post_impl_name_to_handle_at( \
- res, (long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), \
+#define __sanitizer_syscall_post_name_to_handle_at(res, dfd, name, handle, \
+ mnt_id, flag) \
+ __sanitizer_syscall_post_impl_name_to_handle_at( \
+ res, (long)(dfd), (long)(name), (long)(handle), (long)(mnt_id), \
(long)(flag))
-#define __sanitizer_syscall_pre_open_by_handle_at(mountdirfd, handle, flags) \
- __sanitizer_syscall_pre_impl_open_by_handle_at( \
+#define __sanitizer_syscall_pre_open_by_handle_at(mountdirfd, handle, flags) \
+ __sanitizer_syscall_pre_impl_open_by_handle_at( \
(long)(mountdirfd), (long)(handle), (long)(flags))
-#define __sanitizer_syscall_post_open_by_handle_at(res, mountdirfd, handle, \
- flags) \
- __sanitizer_syscall_post_impl_open_by_handle_at( \
+#define __sanitizer_syscall_post_open_by_handle_at(res, mountdirfd, handle, \
+ flags) \
+ __sanitizer_syscall_post_impl_open_by_handle_at( \
res, (long)(mountdirfd), (long)(handle), (long)(flags))
-#define __sanitizer_syscall_pre_setns(fd, nstype) \
+#define __sanitizer_syscall_pre_setns(fd, nstype) \
__sanitizer_syscall_pre_impl_setns((long)(fd), (long)(nstype))
-#define __sanitizer_syscall_post_setns(res, fd, nstype) \
+#define __sanitizer_syscall_post_setns(res, fd, nstype) \
__sanitizer_syscall_post_impl_setns(res, (long)(fd), (long)(nstype))
-#define __sanitizer_syscall_pre_process_vm_readv(pid, lvec, liovcnt, rvec, \
- riovcnt, flags) \
- __sanitizer_syscall_pre_impl_process_vm_readv( \
- (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
+#define __sanitizer_syscall_pre_process_vm_readv(pid, lvec, liovcnt, rvec, \
+ riovcnt, flags) \
+ __sanitizer_syscall_pre_impl_process_vm_readv( \
+ (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
(long)(riovcnt), (long)(flags))
-#define __sanitizer_syscall_post_process_vm_readv(res, pid, lvec, liovcnt, \
- rvec, riovcnt, flags) \
- __sanitizer_syscall_post_impl_process_vm_readv( \
- res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
+#define __sanitizer_syscall_post_process_vm_readv(res, pid, lvec, liovcnt, \
+ rvec, riovcnt, flags) \
+ __sanitizer_syscall_post_impl_process_vm_readv( \
+ res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
(long)(riovcnt), (long)(flags))
-#define __sanitizer_syscall_pre_process_vm_writev(pid, lvec, liovcnt, rvec, \
- riovcnt, flags) \
- __sanitizer_syscall_pre_impl_process_vm_writev( \
- (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
+#define __sanitizer_syscall_pre_process_vm_writev(pid, lvec, liovcnt, rvec, \
+ riovcnt, flags) \
+ __sanitizer_syscall_pre_impl_process_vm_writev( \
+ (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
(long)(riovcnt), (long)(flags))
-#define __sanitizer_syscall_post_process_vm_writev(res, pid, lvec, liovcnt, \
- rvec, riovcnt, flags) \
- __sanitizer_syscall_post_impl_process_vm_writev( \
- res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
+#define __sanitizer_syscall_post_process_vm_writev(res, pid, lvec, liovcnt, \
+ rvec, riovcnt, flags) \
+ __sanitizer_syscall_post_impl_process_vm_writev( \
+ res, (long)(pid), (long)(lvec), (long)(liovcnt), (long)(rvec), \
(long)(riovcnt), (long)(flags))
-#define __sanitizer_syscall_pre_fork() \
- __sanitizer_syscall_pre_impl_fork()
-#define __sanitizer_syscall_post_fork(res) \
+#define __sanitizer_syscall_pre_fork() __sanitizer_syscall_pre_impl_fork()
+#define __sanitizer_syscall_post_fork(res) \
__sanitizer_syscall_post_impl_fork(res)
-#define __sanitizer_syscall_pre_vfork() \
- __sanitizer_syscall_pre_impl_vfork()
-#define __sanitizer_syscall_post_vfork(res) \
+#define __sanitizer_syscall_pre_vfork() __sanitizer_syscall_pre_impl_vfork()
+#define __sanitizer_syscall_post_vfork(res) \
__sanitizer_syscall_post_impl_vfork(res)
#define __sanitizer_syscall_pre_sigaction(signum, act, oldact) \
__sanitizer_syscall_pre_impl_sigaction((long)signum, (long)act, (long)oldact)
@@ -2699,6 +2706,13 @@ void __sanitizer_syscall_pre_impl_epoll_pwait(long epfd, long events,
void __sanitizer_syscall_post_impl_epoll_pwait(long res, long epfd, long events,
long maxevents, long timeout,
long sigmask, long sigsetsize);
+void __sanitizer_syscall_pre_impl_epoll_pwait2(long epfd, long events,
+ long maxevents, long timeout,
+ long sigmask, long sigsetsize);
+void __sanitizer_syscall_post_impl_epoll_pwait2(long res, long epfd,
+ long events, long maxevents,
+ long timeout, long sigmask,
+ long sigsetsize);
void __sanitizer_syscall_pre_impl_gethostname(long name, long len);
void __sanitizer_syscall_post_impl_gethostname(long res, long name, long len);
void __sanitizer_syscall_pre_impl_sethostname(long name, long len);
@@ -3080,7 +3094,7 @@ void __sanitizer_syscall_post_impl_rt_sigaction(long res, long signum, long act,
void __sanitizer_syscall_pre_impl_sigaltstack(long ss, long oss);
void __sanitizer_syscall_post_impl_sigaltstack(long res, long ss, long oss);
#ifdef __cplusplus
-} // extern "C"
+} // extern "C"
#endif
-#endif // SANITIZER_LINUX_SYSCALL_HOOKS_H
+#endif // SANITIZER_LINUX_SYSCALL_HOOKS_H
diff --git a/compiler-rt/include/sanitizer/tsan_interface.h b/compiler-rt/include/sanitizer/tsan_interface.h
index 565aa391a9fa..2782e61fb8c7 100644
--- a/compiler-rt/include/sanitizer/tsan_interface.h
+++ b/compiler-rt/include/sanitizer/tsan_interface.h
@@ -169,6 +169,9 @@ void __tsan_on_initialize();
// if TSan should exit as if issues were detected.
int __tsan_on_finalize(int failed);
+// Release TSan internal memory in a best-effort manner.
+void __tsan_flush_memory();
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp
index 414fba3b427d..3fa36742060b 100644
--- a/compiler-rt/lib/asan/asan_allocator.cpp
+++ b/compiler-rt/lib/asan/asan_allocator.cpp
@@ -102,19 +102,18 @@ class ChunkHeader {
public:
uptr UsedSize() const {
- uptr R = user_requested_size_lo;
- if (sizeof(uptr) > sizeof(user_requested_size_lo))
- R += (uptr)user_requested_size_hi << (8 * sizeof(user_requested_size_lo));
- return R;
+ static_assert(sizeof(user_requested_size_lo) == 4,
+ "Expression below requires this");
+ return FIRST_32_SECOND_64(0, ((uptr)user_requested_size_hi << 32)) +
+ user_requested_size_lo;
}
void SetUsedSize(uptr size) {
user_requested_size_lo = size;
- if (sizeof(uptr) > sizeof(user_requested_size_lo)) {
- size >>= (8 * sizeof(user_requested_size_lo));
- user_requested_size_hi = size;
- CHECK_EQ(user_requested_size_hi, size);
- }
+ static_assert(sizeof(user_requested_size_lo) == 4,
+ "Expression below requires this");
+ user_requested_size_hi = FIRST_32_SECOND_64(0, size >> 32);
+ CHECK_EQ(UsedSize(), size);
}
void SetAllocContext(u32 tid, u32 stack) {
@@ -522,7 +521,7 @@ struct Allocator {
size > max_user_defined_malloc_size) {
if (AllocatorMayReturnNull()) {
Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
- (void*)size);
+ size);
return nullptr;
}
uptr malloc_limit =
@@ -908,13 +907,6 @@ AllocType AsanChunkView::GetAllocType() const {
return (AllocType)chunk_->alloc_type;
}
-static StackTrace GetStackTraceFromId(u32 id) {
- CHECK(id);
- StackTrace res = StackDepotGet(id);
- CHECK(res.trace);
- return res;
-}
-
u32 AsanChunkView::GetAllocStackId() const {
u32 tid = 0;
u32 stack = 0;
@@ -931,14 +923,6 @@ u32 AsanChunkView::GetFreeStackId() const {
return stack;
}
-StackTrace AsanChunkView::GetAllocStack() const {
- return GetStackTraceFromId(GetAllocStackId());
-}
-
-StackTrace AsanChunkView::GetFreeStack() const {
- return GetStackTraceFromId(GetFreeStackId());
-}
-
void InitializeAllocator(const AllocatorOptions &options) {
instance.InitLinkerInitialized(options);
}
diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h
index 2963e979b55c..27d826fb613a 100644
--- a/compiler-rt/lib/asan/asan_allocator.h
+++ b/compiler-rt/lib/asan/asan_allocator.h
@@ -64,8 +64,6 @@ class AsanChunkView {
bool Eq(const AsanChunkView &c) const { return chunk_ == c.chunk_; }
u32 GetAllocStackId() const;
u32 GetFreeStackId() const;
- StackTrace GetAllocStack() const;
- StackTrace GetFreeStack() const;
AllocType GetAllocType() const;
bool AddrIsInside(uptr addr, uptr access_size, sptr *offset) const {
if (addr >= Beg() && (addr + access_size) <= End()) {
diff --git a/compiler-rt/lib/asan/asan_debugging.cpp b/compiler-rt/lib/asan/asan_debugging.cpp
index c01360b52fc9..0b4bf52f2490 100644
--- a/compiler-rt/lib/asan/asan_debugging.cpp
+++ b/compiler-rt/lib/asan/asan_debugging.cpp
@@ -19,6 +19,7 @@
#include "asan_mapping.h"
#include "asan_report.h"
#include "asan_thread.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
namespace {
using namespace __asan;
@@ -54,11 +55,11 @@ uptr AsanGetStack(uptr addr, uptr *trace, u32 size, u32 *thread_id,
StackTrace stack(nullptr, 0);
if (alloc_stack) {
if (chunk.AllocTid() == kInvalidTid) return 0;
- stack = chunk.GetAllocStack();
+ stack = StackDepotGet(chunk.GetAllocStackId());
if (thread_id) *thread_id = chunk.AllocTid();
} else {
if (chunk.FreeTid() == kInvalidTid) return 0;
- stack = chunk.GetFreeStack();
+ stack = StackDepotGet(chunk.GetFreeStackId());
if (thread_id) *thread_id = chunk.FreeTid();
}
diff --git a/compiler-rt/lib/asan/asan_descriptions.cpp b/compiler-rt/lib/asan/asan_descriptions.cpp
index 2ba8a02f8410..d7d961685793 100644
--- a/compiler-rt/lib/asan/asan_descriptions.cpp
+++ b/compiler-rt/lib/asan/asan_descriptions.cpp
@@ -251,7 +251,7 @@ static void PrintAccessAndVarIntersection(const StackVarDescr &var, uptr addr,
}
str.append("'");
if (var.line > 0) {
- str.append(" (line %d)", var.line);
+ str.append(" (line %zd)", var.line);
}
if (pos_descr) {
Decorator d;
@@ -318,7 +318,8 @@ bool DescribeAddressIfGlobal(uptr addr, uptr access_size,
}
void ShadowAddressDescription::Print() const {
- Printf("Address %p is located in the %s area.\n", addr, ShadowNames[kind]);
+ Printf("Address %p is located in the %s area.\n", (void *)addr,
+ ShadowNames[kind]);
}
void GlobalAddressDescription::Print(const char *bug_type) const {
@@ -356,7 +357,7 @@ bool GlobalAddressDescription::PointsInsideTheSameVariable(
void StackAddressDescription::Print() const {
Decorator d;
Printf("%s", d.Location());
- Printf("Address %p is located in stack of thread %s", addr,
+ Printf("Address %p is located in stack of thread %s", (void *)addr,
AsanThreadIdAndName(tid).c_str());
if (!frame_descr) {
@@ -469,7 +470,7 @@ AddressDescription::AddressDescription(uptr addr, uptr access_size,
void WildAddressDescription::Print() const {
Printf("Address %p is a wild pointer inside of access range of size %p.\n",
- addr, access_size);
+ (void *)addr, (void *)access_size);
}
void PrintAddressDescription(uptr addr, uptr access_size,
diff --git a/compiler-rt/lib/asan/asan_errors.cpp b/compiler-rt/lib/asan/asan_errors.cpp
index 45166c064877..7cd9fe911afa 100644
--- a/compiler-rt/lib/asan/asan_errors.cpp
+++ b/compiler-rt/lib/asan/asan_errors.cpp
@@ -46,10 +46,9 @@ void ErrorDeadlySignal::Print() {
void ErrorDoubleFree::Print() {
Decorator d;
Printf("%s", d.Error());
- Report(
- "ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
- scariness.GetDescription(), addr_description.addr,
- AsanThreadIdAndName(tid).c_str());
+ Report("ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
+ scariness.GetDescription(), (void *)addr_description.addr,
+ AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
scariness.Print();
GET_STACK_TRACE_FATAL(second_free_stack->trace[0],
@@ -62,10 +61,9 @@ void ErrorDoubleFree::Print() {
void ErrorNewDeleteTypeMismatch::Print() {
Decorator d;
Printf("%s", d.Error());
- Report(
- "ERROR: AddressSanitizer: %s on %p in thread %s:\n",
- scariness.GetDescription(), addr_description.addr,
- AsanThreadIdAndName(tid).c_str());
+ Report("ERROR: AddressSanitizer: %s on %p in thread %s:\n",
+ scariness.GetDescription(), (void *)addr_description.addr,
+ AsanThreadIdAndName(tid).c_str());
Printf("%s object passed to delete has wrong type:\n", d.Default());
if (delete_size != 0) {
Printf(
@@ -106,7 +104,7 @@ void ErrorFreeNotMalloced::Print() {
Report(
"ERROR: AddressSanitizer: attempting free on address "
"which was not malloc()-ed: %p in thread %s\n",
- addr_description.Address(), AsanThreadIdAndName(tid).c_str());
+ (void *)addr_description.Address(), AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
CHECK_GT(free_stack->size, 0);
scariness.Print();
@@ -126,7 +124,7 @@ void ErrorAllocTypeMismatch::Print() {
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s (%s vs %s) on %p\n",
scariness.GetDescription(), alloc_names[alloc_type],
- dealloc_names[dealloc_type], addr_description.Address());
+ dealloc_names[dealloc_type], (void *)addr_description.Address());
Printf("%s", d.Default());
CHECK_GT(dealloc_stack->size, 0);
scariness.Print();
@@ -145,7 +143,7 @@ void ErrorMallocUsableSizeNotOwned::Print() {
Report(
"ERROR: AddressSanitizer: attempting to call malloc_usable_size() for "
"pointer which is not owned: %p\n",
- addr_description.Address());
+ (void *)addr_description.Address());
Printf("%s", d.Default());
stack->Print();
addr_description.Print();
@@ -158,7 +156,7 @@ void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
Report(
"ERROR: AddressSanitizer: attempting to call "
"__sanitizer_get_allocated_size() for pointer which is not owned: %p\n",
- addr_description.Address());
+ (void *)addr_description.Address());
Printf("%s", d.Default());
stack->Print();
addr_description.Print();
@@ -298,9 +296,10 @@ void ErrorStringFunctionMemoryRangesOverlap::Print() {
Report(
"ERROR: AddressSanitizer: %s: memory ranges [%p,%p) and [%p, %p) "
"overlap\n",
- bug_type, addr1_description.Address(),
- addr1_description.Address() + length1, addr2_description.Address(),
- addr2_description.Address() + length2);
+ bug_type, (void *)addr1_description.Address(),
+ (void *)(addr1_description.Address() + length1),
+ (void *)addr2_description.Address(),
+ (void *)(addr2_description.Address() + length2));
Printf("%s", d.Default());
scariness.Print();
stack->Print();
@@ -329,10 +328,10 @@ void ErrorBadParamsToAnnotateContiguousContainer::Print() {
" end : %p\n"
" old_mid : %p\n"
" new_mid : %p\n",
- beg, end, old_mid, new_mid);
+ (void *)beg, (void *)end, (void *)old_mid, (void *)new_mid);
uptr granularity = SHADOW_GRANULARITY;
if (!IsAligned(beg, granularity))
- Report("ERROR: beg is not aligned by %d\n", granularity);
+ Report("ERROR: beg is not aligned by %zu\n", granularity);
stack->Print();
ReportErrorSummary(scariness.GetDescription(), stack);
}
@@ -341,7 +340,7 @@ void ErrorODRViolation::Print() {
Decorator d;
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(),
- global1.beg);
+ (void *)global1.beg);
Printf("%s", d.Default());
InternalScopedString g1_loc;
InternalScopedString g2_loc;
@@ -371,7 +370,8 @@ void ErrorInvalidPointerPair::Print() {
Decorator d;
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(),
- addr1_description.Address(), addr2_description.Address());
+ (void *)addr1_description.Address(),
+ (void *)addr2_description.Address());
Printf("%s", d.Default());
GET_STACK_TRACE_FATAL(pc, bp);
stack.Print();
@@ -538,7 +538,8 @@ static void PrintLegend(InternalScopedString *str) {
static void PrintShadowBytes(InternalScopedString *str, const char *before,
u8 *bytes, u8 *guilty, uptr n) {
Decorator d;
- if (before) str->append("%s%p:", before, bytes);
+ if (before)
+ str->append("%s%p:", before, (void *)bytes);
for (uptr i = 0; i < n; i++) {
u8 *p = bytes + i;
const char *before =
@@ -575,7 +576,7 @@ void ErrorGeneric::Print() {
Printf("%s", d.Error());
uptr addr = addr_description.Address();
Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n",
- bug_descr, (void *)addr, pc, bp, sp);
+ bug_descr, (void *)addr, (void *)pc, (void *)bp, (void *)sp);
Printf("%s", d.Default());
Printf("%s%s of size %zu at %p thread %s%s\n", d.Access(),
diff --git a/compiler-rt/lib/asan/asan_fake_stack.cpp b/compiler-rt/lib/asan/asan_fake_stack.cpp
index bf5c342ee59d..07681c10de91 100644
--- a/compiler-rt/lib/asan/asan_fake_stack.cpp
+++ b/compiler-rt/lib/asan/asan_fake_stack.cpp
@@ -54,10 +54,11 @@ FakeStack *FakeStack::Create(uptr stack_size_log) {
: MmapOrDie(size, "FakeStack"));
res->stack_size_log_ = stack_size_log;
u8 *p = reinterpret_cast<u8 *>(res);
- VReport(1, "T%d: FakeStack created: %p -- %p stack_size_log: %zd; "
+ VReport(1,
+ "T%d: FakeStack created: %p -- %p stack_size_log: %zd; "
"mmapped %zdK, noreserve=%d \n",
- GetCurrentTidOrInvalid(), p,
- p + FakeStack::RequiredSize(stack_size_log), stack_size_log,
+ GetCurrentTidOrInvalid(), (void *)p,
+ (void *)(p + FakeStack::RequiredSize(stack_size_log)), stack_size_log,
size >> 10, flags()->uar_noreserve);
return res;
}
diff --git a/compiler-rt/lib/asan/asan_fuchsia.cpp b/compiler-rt/lib/asan/asan_fuchsia.cpp
index b0c7255144ac..15381d5bd0e5 100644
--- a/compiler-rt/lib/asan/asan_fuchsia.cpp
+++ b/compiler-rt/lib/asan/asan_fuchsia.cpp
@@ -31,7 +31,8 @@ namespace __asan {
// AsanInitInternal->InitializeHighMemEnd (asan_rtl.cpp).
// Just do some additional sanity checks here.
void InitializeShadowMemory() {
- if (Verbosity()) PrintAddressSpaceLayout();
+ if (Verbosity())
+ PrintAddressSpaceLayout();
// Make sure SHADOW_OFFSET doesn't use __asan_shadow_memory_dynamic_address.
__asan_shadow_memory_dynamic_address = kDefaultShadowSentinel;
@@ -62,7 +63,34 @@ void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
UNIMPLEMENTED();
}
-bool PlatformUnpoisonStacks() { return false; }
+bool PlatformUnpoisonStacks() {
+ // The current sp might not point to the default stack. This
+ // could be because we are in a crash stack from fuzzing for example.
+ // Unpoison the default stack and the current stack page.
+ AsanThread *curr_thread = GetCurrentThread();
+ CHECK(curr_thread != nullptr);
+ uptr top = curr_thread->stack_top();
+ uptr bottom = curr_thread->stack_bottom();
+ // The default stack grows from top to bottom. (bottom < top).
+
+ uptr local_stack = reinterpret_cast<uptr>(__builtin_frame_address(0));
+ if (local_stack >= bottom && local_stack <= top) {
+ // The current stack is the default stack.
+ // We only need to unpoison from where we are using until the end.
+ bottom = RoundDownTo(local_stack, GetPageSize());
+ UnpoisonStack(bottom, top, "default");
+ } else {
+ // The current stack is not the default stack.
+ // Unpoison the entire default stack and the current stack page.
+ UnpoisonStack(bottom, top, "default");
+ bottom = RoundDownTo(local_stack, GetPageSize());
+ top = bottom + GetPageSize();
+ UnpoisonStack(bottom, top, "unknown");
+ return true;
+ }
+
+ return false;
+}
// We can use a plain thread_local variable for TSD.
static thread_local void *per_thread;
@@ -90,14 +118,12 @@ struct AsanThread::InitOptions {
// Shared setup between thread creation and startup for the initial thread.
static AsanThread *CreateAsanThread(StackTrace *stack, u32 parent_tid,
- uptr user_id, bool detached,
- const char *name) {
+ bool detached, const char *name) {
// In lieu of AsanThread::Create.
AsanThread *thread = (AsanThread *)MmapOrDie(AsanThreadMmapSize(), __func__);
AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
- u32 tid =
- asanThreadRegistry().CreateThread(user_id, detached, parent_tid, &args);
+ u32 tid = asanThreadRegistry().CreateThread(0, detached, parent_tid, &args);
asanThreadRegistry().SetThreadName(tid, name);
return thread;
@@ -124,7 +150,7 @@ AsanThread *CreateMainThread() {
CHECK_NE(__sanitizer::MainThreadStackBase, 0);
CHECK_GT(__sanitizer::MainThreadStackSize, 0);
AsanThread *t = CreateAsanThread(
- nullptr, 0, reinterpret_cast<uptr>(self), true,
+ nullptr, 0, true,
_zx_object_get_property(thrd_get_zx_handle(self), ZX_PROP_NAME, name,
sizeof(name)) == ZX_OK
? name
@@ -148,13 +174,13 @@ static void *BeforeThreadCreateHook(uptr user_id, bool detached,
uptr stack_size) {
EnsureMainThreadIDIsCorrect();
// Strict init-order checking is thread-hostile.
- if (flags()->strict_init_order) StopInitOrderChecking();
+ if (flags()->strict_init_order)
+ StopInitOrderChecking();
GET_STACK_TRACE_THREAD;
u32 parent_tid = GetCurrentTidOrInvalid();
- AsanThread *thread =
- CreateAsanThread(&stack, parent_tid, user_id, detached, name);
+ AsanThread *thread = CreateAsanThread(&stack, parent_tid, detached, name);
// On other systems, AsanThread::Init() is called from the new
// thread itself. But on Fuchsia we already know the stack address
diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp
index 9d7dbc6f264c..5f56fe6f457d 100644
--- a/compiler-rt/lib/asan/asan_globals.cpp
+++ b/compiler-rt/lib/asan/asan_globals.cpp
@@ -35,7 +35,7 @@ struct ListOfGlobals {
ListOfGlobals *next;
};
-static BlockingMutex mu_for_globals(LINKER_INITIALIZED);
+static Mutex mu_for_globals;
static LowLevelAllocator allocator_for_globals;
static ListOfGlobals *list_of_all_globals;
@@ -85,12 +85,12 @@ static void ReportGlobal(const Global &g, const char *prefix) {
Report(
"%s Global[%p]: beg=%p size=%zu/%zu name=%s module=%s dyn_init=%zu "
"odr_indicator=%p\n",
- prefix, &g, (void *)g.beg, g.size, g.size_with_redzone, g.name,
+ prefix, (void *)&g, (void *)g.beg, g.size, g.size_with_redzone, g.name,
g.module_name, g.has_dynamic_init, (void *)g.odr_indicator);
if (g.location) {
- Report(" location (%p): name=%s[%p], %d %d\n", g.location,
- g.location->filename, g.location->filename, g.location->line_no,
- g.location->column_no);
+ Report(" location (%p): name=%s[%p], %d %d\n", (void *)g.location,
+ g.location->filename, (void *)g.location->filename,
+ g.location->line_no, g.location->column_no);
}
}
@@ -108,7 +108,7 @@ static u32 FindRegistrationSite(const Global *g) {
int GetGlobalsForAddress(uptr addr, Global *globals, u32 *reg_sites,
int max_globals) {
if (!flags()->report_globals) return 0;
- BlockingMutexLock lock(&mu_for_globals);
+ Lock lock(&mu_for_globals);
int res = 0;
for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
const Global &g = *l->g;
@@ -257,7 +257,7 @@ static void UnregisterGlobal(const Global *g) {
}
void StopInitOrderChecking() {
- BlockingMutexLock lock(&mu_for_globals);
+ Lock lock(&mu_for_globals);
if (!flags()->check_initialization_order || !dynamic_init_globals)
return;
flags()->check_initialization_order = false;
@@ -359,7 +359,7 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
if (!flags()->report_globals) return;
GET_STACK_TRACE_MALLOC;
u32 stack_id = StackDepotPut(stack);
- BlockingMutexLock lock(&mu_for_globals);
+ Lock lock(&mu_for_globals);
if (!global_registration_site_vector) {
global_registration_site_vector =
new (allocator_for_globals) GlobalRegistrationSiteVector;
@@ -369,7 +369,8 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
global_registration_site_vector->push_back(site);
if (flags()->report_globals >= 2) {
PRINT_CURRENT_STACK();
- Printf("=== ID %d; %p %p\n", stack_id, &globals[0], &globals[n - 1]);
+ Printf("=== ID %d; %p %p\n", stack_id, (void *)&globals[0],
+ (void *)&globals[n - 1]);
}
for (uptr i = 0; i < n; i++) {
if (SANITIZER_WINDOWS && globals[i].beg == 0) {
@@ -398,7 +399,7 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
// We must do this when a shared objects gets dlclosed.
void __asan_unregister_globals(__asan_global *globals, uptr n) {
if (!flags()->report_globals) return;
- BlockingMutexLock lock(&mu_for_globals);
+ Lock lock(&mu_for_globals);
for (uptr i = 0; i < n; i++) {
if (SANITIZER_WINDOWS && globals[i].beg == 0) {
// Skip globals that look like padding from the MSVC incremental linker.
@@ -424,7 +425,7 @@ void __asan_before_dynamic_init(const char *module_name) {
bool strict_init_order = flags()->strict_init_order;
CHECK(module_name);
CHECK(asan_inited);
- BlockingMutexLock lock(&mu_for_globals);
+ Lock lock(&mu_for_globals);
if (flags()->report_globals >= 3)
Printf("DynInitPoison module: %s\n", module_name);
for (uptr i = 0, n = dynamic_init_globals->size(); i < n; ++i) {
@@ -448,7 +449,7 @@ void __asan_after_dynamic_init() {
!dynamic_init_globals)
return;
CHECK(asan_inited);
- BlockingMutexLock lock(&mu_for_globals);
+ Lock lock(&mu_for_globals);
// FIXME: Optionally report that we're unpoisoning globals from a module.
for (uptr i = 0, n = dynamic_init_globals->size(); i < n; ++i) {
DynInitGlobal &dyn_g = (*dynamic_init_globals)[i];
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index d0a6dd48a748..b28909152e20 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -49,8 +49,8 @@ namespace __asan {
ASAN_READ_RANGE((ctx), (s), \
common_flags()->strict_string_checks ? (len) + 1 : (n))
-#define ASAN_READ_STRING(ctx, s, n) \
- ASAN_READ_STRING_OF_LEN((ctx), (s), REAL(strlen)(s), (n))
+# define ASAN_READ_STRING(ctx, s, n) \
+ ASAN_READ_STRING_OF_LEN((ctx), (s), internal_strlen(s), (n))
static inline uptr MaybeRealStrnlen(const char *s, uptr maxlen) {
#if SANITIZER_INTERCEPT_STRNLEN
@@ -370,9 +370,9 @@ DEFINE_REAL(char*, index, const char *string, int c)
ASAN_INTERCEPTOR_ENTER(ctx, strcat);
ENSURE_ASAN_INITED();
if (flags()->replace_str) {
- uptr from_length = REAL(strlen)(from);
+ uptr from_length = internal_strlen(from);
ASAN_READ_RANGE(ctx, from, from_length + 1);
- uptr to_length = REAL(strlen)(to);
+ uptr to_length = internal_strlen(to);
ASAN_READ_STRING_OF_LEN(ctx, to, to_length, to_length);
ASAN_WRITE_RANGE(ctx, to + to_length, from_length + 1);
// If the copying actually happens, the |from| string should not overlap
@@ -394,7 +394,7 @@ INTERCEPTOR(char*, strncat, char *to, const char *from, uptr size) {
uptr from_length = MaybeRealStrnlen(from, size);
uptr copy_length = Min(size, from_length + 1);
ASAN_READ_RANGE(ctx, from, copy_length);
- uptr to_length = REAL(strlen)(to);
+ uptr to_length = internal_strlen(to);
ASAN_READ_STRING_OF_LEN(ctx, to, to_length, to_length);
ASAN_WRITE_RANGE(ctx, to + to_length, from_length + 1);
if (from_length > 0) {
@@ -419,7 +419,7 @@ INTERCEPTOR(char *, strcpy, char *to, const char *from) {
}
ENSURE_ASAN_INITED();
if (flags()->replace_str) {
- uptr from_size = REAL(strlen)(from) + 1;
+ uptr from_size = internal_strlen(from) + 1;
CHECK_RANGES_OVERLAP("strcpy", to, from_size, from, from_size);
ASAN_READ_RANGE(ctx, from, from_size);
ASAN_WRITE_RANGE(ctx, to, from_size);
@@ -432,7 +432,7 @@ INTERCEPTOR(char*, strdup, const char *s) {
ASAN_INTERCEPTOR_ENTER(ctx, strdup);
if (UNLIKELY(!asan_inited)) return internal_strdup(s);
ENSURE_ASAN_INITED();
- uptr length = REAL(strlen)(s);
+ uptr length = internal_strlen(s);
if (flags()->replace_str) {
ASAN_READ_RANGE(ctx, s, length + 1);
}
@@ -448,7 +448,7 @@ INTERCEPTOR(char*, __strdup, const char *s) {
ASAN_INTERCEPTOR_ENTER(ctx, strdup);
if (UNLIKELY(!asan_inited)) return internal_strdup(s);
ENSURE_ASAN_INITED();
- uptr length = REAL(strlen)(s);
+ uptr length = internal_strlen(s);
if (flags()->replace_str) {
ASAN_READ_RANGE(ctx, s, length + 1);
}
@@ -581,7 +581,7 @@ INTERCEPTOR(int, atexit, void (*func)()) {
#if CAN_SANITIZE_LEAKS
__lsan::ScopedInterceptorDisabler disabler;
#endif
- // Avoid calling real atexit as it is unrechable on at least on Linux.
+ // Avoid calling real atexit as it is unreachable on at least on Linux.
int res = REAL(__cxa_atexit)((void (*)(void *a))func, nullptr, nullptr);
REAL(__cxa_atexit)(AtCxaAtexit, nullptr, nullptr);
return res;
diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h
index a9249dea45b9..047b044c8bf4 100644
--- a/compiler-rt/lib/asan/asan_interceptors.h
+++ b/compiler-rt/lib/asan/asan_interceptors.h
@@ -133,29 +133,30 @@ DECLARE_REAL(char*, strncpy, char *to, const char *from, uptr size)
DECLARE_REAL(uptr, strnlen, const char *s, uptr maxlen)
DECLARE_REAL(char*, strstr, const char *s1, const char *s2)
-#if !SANITIZER_MAC
-#define ASAN_INTERCEPT_FUNC(name) \
- do { \
- if (!INTERCEPT_FUNCTION(name)) \
- VReport(1, "AddressSanitizer: failed to intercept '%s'\n", #name); \
- } while (0)
-#define ASAN_INTERCEPT_FUNC_VER(name, ver) \
- do { \
- if (!INTERCEPT_FUNCTION_VER(name, ver)) \
- VReport(1, "AddressSanitizer: failed to intercept '%s@@%s'\n", #name, \
- #ver); \
- } while (0)
-#define ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
- do { \
- if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
- VReport(1, "AddressSanitizer: failed to intercept '%s@@%s' or '%s'\n", \
- #name, #ver, #name); \
- } while (0)
-
-#else
+# if !SANITIZER_MAC
+# define ASAN_INTERCEPT_FUNC(name) \
+ do { \
+ if (!INTERCEPT_FUNCTION(name)) \
+ VReport(1, "AddressSanitizer: failed to intercept '%s'\n", #name); \
+ } while (0)
+# define ASAN_INTERCEPT_FUNC_VER(name, ver) \
+ do { \
+ if (!INTERCEPT_FUNCTION_VER(name, ver)) \
+ VReport(1, "AddressSanitizer: failed to intercept '%s@@%s'\n", \
+ #name, ver); \
+ } while (0)
+# define ASAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
+ do { \
+ if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
+ VReport(1, \
+ "AddressSanitizer: failed to intercept '%s@@%s' or '%s'\n", \
+ #name, ver, #name); \
+ } while (0)
+
+# else
// OS X interceptors don't need to be initialized with INTERCEPT_FUNCTION.
-#define ASAN_INTERCEPT_FUNC(name)
-#endif // SANITIZER_MAC
+# define ASAN_INTERCEPT_FUNC(name)
+# endif // SANITIZER_MAC
#endif // !SANITIZER_FUCHSIA
diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp
index 4bcbe5d02e33..ad3693d5e6a2 100644
--- a/compiler-rt/lib/asan/asan_linux.cpp
+++ b/compiler-rt/lib/asan/asan_linux.cpp
@@ -128,8 +128,8 @@ void AsanCheckIncompatibleRT() {}
#else
static int FindFirstDSOCallback(struct dl_phdr_info *info, size_t size,
void *data) {
- VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n",
- info->dlpi_name, info->dlpi_addr);
+ VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n", info->dlpi_name,
+ (void *)info->dlpi_addr);
// Continue until the first dynamic library is found
if (!info->dlpi_name || info->dlpi_name[0] == 0)
diff --git a/compiler-rt/lib/asan/asan_malloc_linux.cpp b/compiler-rt/lib/asan/asan_malloc_linux.cpp
index c6bec8551bc5..bab80b96f584 100644
--- a/compiler-rt/lib/asan/asan_malloc_linux.cpp
+++ b/compiler-rt/lib/asan/asan_malloc_linux.cpp
@@ -21,129 +21,66 @@
# include "asan_interceptors.h"
# include "asan_internal.h"
# include "asan_stack.h"
+# include "lsan/lsan_common.h"
# include "sanitizer_common/sanitizer_allocator_checks.h"
+# include "sanitizer_common/sanitizer_allocator_dlsym.h"
# include "sanitizer_common/sanitizer_errno.h"
# include "sanitizer_common/sanitizer_tls_get_addr.h"
// ---------------------- Replacement functions ---------------- {{{1
using namespace __asan;
-static uptr allocated_for_dlsym;
-static uptr last_dlsym_alloc_size_in_words;
-static const uptr kDlsymAllocPoolSize = 1024;
-static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
-
-static inline bool IsInDlsymAllocPool(const void *ptr) {
- uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- return off < allocated_for_dlsym * sizeof(alloc_memory_for_dlsym[0]);
-}
-
-static void *AllocateFromLocalPool(uptr size_in_bytes) {
- uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
- void *mem = (void*)&alloc_memory_for_dlsym[allocated_for_dlsym];
- last_dlsym_alloc_size_in_words = size_in_words;
- allocated_for_dlsym += size_in_words;
- CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
- return mem;
-}
-
-static void DeallocateFromLocalPool(const void *ptr) {
- // Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
- // error messages and instead uses malloc followed by free. To avoid pool
- // exhaustion due to long object filenames, handle that special case here.
- uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
- void *prev_mem = (void*)&alloc_memory_for_dlsym[prev_offset];
- if (prev_mem == ptr) {
- REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
- allocated_for_dlsym = prev_offset;
- last_dlsym_alloc_size_in_words = 0;
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return asan_init_is_running; }
+ static void OnAllocate(const void *ptr, uptr size) {
+# if CAN_SANITIZE_LEAKS
+ // Suppress leaks from dlerror(). Previously dlsym hack on global array was
+ // used by leak sanitizer as a root region.
+ __lsan_register_root_region(ptr, size);
+# endif
}
-}
-
-static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
- uptr size_in_bytes) {
- if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
- return errno_EINVAL;
-
- CHECK(alignment >= kWordSize);
-
- uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
- uptr aligned_addr = RoundUpTo(addr, alignment);
- uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
-
- uptr *end_mem = (uptr*)(aligned_addr + aligned_size);
- uptr allocated = end_mem - alloc_memory_for_dlsym;
- if (allocated >= kDlsymAllocPoolSize)
- return errno_ENOMEM;
-
- allocated_for_dlsym = allocated;
- *memptr = (void*)aligned_addr;
- return 0;
-}
-
-static inline bool MaybeInDlsym() {
- // Fuchsia doesn't use dlsym-based interceptors.
- return !SANITIZER_FUCHSIA && asan_init_is_running;
-}
-
-static inline bool UseLocalPool() { return MaybeInDlsym(); }
-
-static void *ReallocFromLocalPool(void *ptr, uptr size) {
- const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
- void *new_ptr;
- if (UNLIKELY(UseLocalPool())) {
- new_ptr = AllocateFromLocalPool(size);
- } else {
- ENSURE_ASAN_INITED();
- GET_STACK_TRACE_MALLOC;
- new_ptr = asan_malloc(size, &stack);
+ static void OnFree(const void *ptr, uptr size) {
+# if CAN_SANITIZE_LEAKS
+ __lsan_unregister_root_region(ptr, size);
+# endif
}
- internal_memcpy(new_ptr, ptr, copy_size);
- return new_ptr;
-}
+};
INTERCEPTOR(void, free, void *ptr) {
- if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
- DeallocateFromLocalPool(ptr);
- return;
- }
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
GET_STACK_TRACE_FREE;
asan_free(ptr, &stack, FROM_MALLOC);
}
#if SANITIZER_INTERCEPT_CFREE
INTERCEPTOR(void, cfree, void *ptr) {
- if (UNLIKELY(IsInDlsymAllocPool(ptr)))
- return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
GET_STACK_TRACE_FREE;
asan_free(ptr, &stack, FROM_MALLOC);
}
#endif // SANITIZER_INTERCEPT_CFREE
INTERCEPTOR(void*, malloc, uptr size) {
- if (UNLIKELY(UseLocalPool()))
- // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
- return AllocateFromLocalPool(size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Allocate(size);
ENSURE_ASAN_INITED();
GET_STACK_TRACE_MALLOC;
return asan_malloc(size, &stack);
}
INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
- if (UNLIKELY(UseLocalPool()))
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- return AllocateFromLocalPool(nmemb * size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
ENSURE_ASAN_INITED();
GET_STACK_TRACE_MALLOC;
return asan_calloc(nmemb, size, &stack);
}
INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
- if (UNLIKELY(IsInDlsymAllocPool(ptr)))
- return ReallocFromLocalPool(ptr, size);
- if (UNLIKELY(UseLocalPool()))
- return AllocateFromLocalPool(size);
+ if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Realloc(ptr, size);
ENSURE_ASAN_INITED();
GET_STACK_TRACE_MALLOC;
return asan_realloc(ptr, size, &stack);
@@ -205,8 +142,6 @@ INTERCEPTOR(int, mallopt, int cmd, int value) {
#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
- if (UNLIKELY(UseLocalPool()))
- return PosixMemalignFromLocalPool(memptr, alignment, size);
GET_STACK_TRACE_MALLOC;
return asan_posix_memalign(memptr, alignment, size, &stack);
}
diff --git a/compiler-rt/lib/asan/asan_poisoning.cpp b/compiler-rt/lib/asan/asan_poisoning.cpp
index 5f215fe0f9bb..d97af91e692d 100644
--- a/compiler-rt/lib/asan/asan_poisoning.cpp
+++ b/compiler-rt/lib/asan/asan_poisoning.cpp
@@ -66,7 +66,7 @@ void AsanPoisonOrUnpoisonIntraObjectRedzone(uptr ptr, uptr size, bool poison) {
uptr end = ptr + size;
if (Verbosity()) {
Printf("__asan_%spoison_intra_object_redzone [%p,%p) %zd\n",
- poison ? "" : "un", ptr, end, size);
+ poison ? "" : "un", (void *)ptr, (void *)end, size);
if (Verbosity() >= 2)
PRINT_CURRENT_STACK();
}
diff --git a/compiler-rt/lib/asan/asan_report.cpp b/compiler-rt/lib/asan/asan_report.cpp
index 03f1ed2b0186..1f266334b311 100644
--- a/compiler-rt/lib/asan/asan_report.cpp
+++ b/compiler-rt/lib/asan/asan_report.cpp
@@ -32,12 +32,12 @@ namespace __asan {
static void (*error_report_callback)(const char*);
static char *error_message_buffer = nullptr;
static uptr error_message_buffer_pos = 0;
-static BlockingMutex error_message_buf_mutex(LINKER_INITIALIZED);
+static Mutex error_message_buf_mutex;
static const unsigned kAsanBuggyPcPoolSize = 25;
static __sanitizer::atomic_uintptr_t AsanBuggyPcPool[kAsanBuggyPcPoolSize];
void AppendToErrorMessageBuffer(const char *buffer) {
- BlockingMutexLock l(&error_message_buf_mutex);
+ Lock l(&error_message_buf_mutex);
if (!error_message_buffer) {
error_message_buffer =
(char*)MmapOrDieQuietly(kErrorMessageBufferSize, __func__);
@@ -67,14 +67,14 @@ static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
const char *zone_name) {
if (zone_ptr) {
if (zone_name) {
- Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n",
- ptr, zone_ptr, zone_name);
+ Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n", (void *)ptr,
+ (void *)zone_ptr, zone_name);
} else {
Printf("malloc_zone_from_ptr(%p) = %p, which doesn't have a name\n",
- ptr, zone_ptr);
+ (void *)ptr, (void *)zone_ptr);
}
} else {
- Printf("malloc_zone_from_ptr(%p) = 0\n", ptr);
+ Printf("malloc_zone_from_ptr(%p) = 0\n", (void *)ptr);
}
}
@@ -155,10 +155,10 @@ class ScopedInErrorReport {
DumpProcessMap();
// Copy the message buffer so that we could start logging without holding a
- // lock that gets aquired during printing.
+ // lock that gets acquired during printing.
InternalMmapVector<char> buffer_copy(kErrorMessageBufferSize);
{
- BlockingMutexLock l(&error_message_buf_mutex);
+ Lock l(&error_message_buf_mutex);
internal_memcpy(buffer_copy.data(),
error_message_buffer, kErrorMessageBufferSize);
// Clear error_message_buffer so that if we find other errors
@@ -435,9 +435,10 @@ static inline void CheckForInvalidPointerPair(void *p1, void *p2) {
void ReportMacMzReallocUnknown(uptr addr, uptr zone_ptr, const char *zone_name,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report;
- Printf("mz_realloc(%p) -- attempting to realloc unallocated memory.\n"
- "This is an unrecoverable problem, exiting now.\n",
- addr);
+ Printf(
+ "mz_realloc(%p) -- attempting to realloc unallocated memory.\n"
+ "This is an unrecoverable problem, exiting now.\n",
+ (void *)addr);
PrintZoneForPointer(addr, zone_ptr, zone_name);
stack->Print();
DescribeAddressIfHeap(addr);
@@ -490,7 +491,7 @@ void __asan_report_error(uptr pc, uptr bp, uptr sp, uptr addr, int is_write,
}
void NOINLINE __asan_set_error_report_callback(void (*callback)(const char*)) {
- BlockingMutexLock l(&error_message_buf_mutex);
+ Lock l(&error_message_buf_mutex);
error_report_callback = callback;
}
diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp
index bfaa3bc27027..1b150b393cfe 100644
--- a/compiler-rt/lib/asan/asan_rtl.cpp
+++ b/compiler-rt/lib/asan/asan_rtl.cpp
@@ -557,7 +557,8 @@ void UnpoisonStack(uptr bottom, uptr top, const char *type) {
"False positive error reports may follow\n"
"For details see "
"https://github.com/google/sanitizers/issues/189\n",
- type, top, bottom, top - bottom, top - bottom);
+ type, (void *)top, (void *)bottom, (void *)(top - bottom),
+ top - bottom);
return;
}
PoisonShadow(bottom, RoundUpTo(top - bottom, SHADOW_GRANULARITY), 0);
diff --git a/compiler-rt/lib/asan/asan_shadow_setup.cpp b/compiler-rt/lib/asan/asan_shadow_setup.cpp
index 6e6260d3413f..fc6de39622b5 100644
--- a/compiler-rt/lib/asan/asan_shadow_setup.cpp
+++ b/compiler-rt/lib/asan/asan_shadow_setup.cpp
@@ -33,7 +33,7 @@ static void ProtectGap(uptr addr, uptr size) {
"protect_shadow_gap=0:"
" not protecting shadow gap, allocating gap's shadow\n"
"|| `[%p, %p]` || ShadowGap's shadow ||\n",
- GapShadowBeg, GapShadowEnd);
+ (void*)GapShadowBeg, (void*)GapShadowEnd);
ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd,
"unprotected gap shadow");
return;
@@ -113,7 +113,7 @@ void InitializeShadowMemory() {
"Shadow memory range interleaves with an existing memory mapping. "
"ASan cannot proceed correctly. ABORTING.\n");
Report("ASan shadow was supposed to be located in the [%p-%p] range.\n",
- shadow_start, kHighShadowEnd);
+ (void*)shadow_start, (void*)kHighShadowEnd);
MaybeReportLinuxPIEBug();
DumpProcessMap();
Die();
diff --git a/compiler-rt/lib/asan/asan_stats.cpp b/compiler-rt/lib/asan/asan_stats.cpp
index 00ded8f5ef50..9a715ea76fee 100644
--- a/compiler-rt/lib/asan/asan_stats.cpp
+++ b/compiler-rt/lib/asan/asan_stats.cpp
@@ -62,11 +62,11 @@ void AsanStats::MergeFrom(const AsanStats *stats) {
dst_ptr[i] += src_ptr[i];
}
-static BlockingMutex print_lock(LINKER_INITIALIZED);
+static Mutex print_lock;
static AsanStats unknown_thread_stats(LINKER_INITIALIZED);
static AsanStats dead_threads_stats(LINKER_INITIALIZED);
-static BlockingMutex dead_threads_stats_lock(LINKER_INITIALIZED);
+static Mutex dead_threads_stats_lock;
// Required for malloc_zone_statistics() on OS X. This can't be stored in
// per-thread AsanStats.
static uptr max_malloced_memory;
@@ -87,7 +87,7 @@ static void GetAccumulatedStats(AsanStats *stats) {
}
stats->MergeFrom(&unknown_thread_stats);
{
- BlockingMutexLock lock(&dead_threads_stats_lock);
+ Lock lock(&dead_threads_stats_lock);
stats->MergeFrom(&dead_threads_stats);
}
// This is not very accurate: we may miss allocation peaks that happen
@@ -99,7 +99,7 @@ static void GetAccumulatedStats(AsanStats *stats) {
}
void FlushToDeadThreadStats(AsanStats *stats) {
- BlockingMutexLock lock(&dead_threads_stats_lock);
+ Lock lock(&dead_threads_stats_lock);
dead_threads_stats.MergeFrom(stats);
stats->Clear();
}
@@ -122,11 +122,11 @@ static void PrintAccumulatedStats() {
AsanStats stats;
GetAccumulatedStats(&stats);
// Use lock to keep reports from mixing up.
- BlockingMutexLock lock(&print_lock);
+ Lock lock(&print_lock);
stats.Print();
- StackDepotStats *stack_depot_stats = StackDepotGetStats();
+ StackDepotStats stack_depot_stats = StackDepotGetStats();
Printf("Stats: StackDepot: %zd ids; %zdM allocated\n",
- stack_depot_stats->n_uniq_ids, stack_depot_stats->allocated >> 20);
+ stack_depot_stats.n_uniq_ids, stack_depot_stats.allocated >> 20);
PrintInternalAllocatorStats();
}
diff --git a/compiler-rt/lib/asan/asan_thread.cpp b/compiler-rt/lib/asan/asan_thread.cpp
index 35d4467e7b53..930139968ec3 100644
--- a/compiler-rt/lib/asan/asan_thread.cpp
+++ b/compiler-rt/lib/asan/asan_thread.cpp
@@ -43,11 +43,11 @@ void AsanThreadContext::OnFinished() {
static ALIGNED(16) char thread_registry_placeholder[sizeof(ThreadRegistry)];
static ThreadRegistry *asan_thread_registry;
-static BlockingMutex mu_for_thread_context(LINKER_INITIALIZED);
+static Mutex mu_for_thread_context;
static LowLevelAllocator allocator_for_thread_context;
static ThreadContextBase *GetAsanThreadContext(u32 tid) {
- BlockingMutexLock lock(&mu_for_thread_context);
+ Lock lock(&mu_for_thread_context);
return new(allocator_for_thread_context) AsanThreadContext(tid);
}
@@ -83,8 +83,7 @@ AsanThread *AsanThread::Create(thread_callback_t start_routine, void *arg,
thread->start_routine_ = start_routine;
thread->arg_ = arg;
AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
- asanThreadRegistry().CreateThread(*reinterpret_cast<uptr *>(thread), detached,
- parent_tid, &args);
+ asanThreadRegistry().CreateThread(0, detached, parent_tid, &args);
return thread;
}
@@ -254,7 +253,7 @@ void AsanThread::Init(const InitOptions *options) {
int local = 0;
VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
(void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
- &local);
+ (void *)&local);
}
// Fuchsia doesn't use ThreadStart.
@@ -443,7 +442,7 @@ AsanThread *GetCurrentThread() {
void SetCurrentThread(AsanThread *t) {
CHECK(t->context());
- VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(),
+ VReport(2, "SetCurrentThread: %p for thread %p\n", (void *)t->context(),
(void *)GetThreadSelf());
// Make sure we do not reset the current AsanThread.
CHECK_EQ(0, AsanTSDGet());
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index d66d725e7ab5..53d656d5086d 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -271,8 +271,8 @@ switchu8
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
-// SJLJ for exceptions, each function with a catch clause or destuctors needs
-// to save and restore all registers in it prolog and epliog. But there is
+// SJLJ for exceptions, each function with a catch clause or destructors needs
+// to save and restore all registers in it prolog and epilog. But there is
// no way to access vector and high float registers from thumb1 code, so the
// compiler must add call outs to these helper functions in the prolog and
// epilog.
@@ -311,9 +311,9 @@ double __floatsidfvfp(int a); // Appears to convert from
float __floatsisfvfp(int a); // Appears to convert from
// int to float.
double __floatunssidfvfp(unsigned int a); // Appears to convert from
- // unisgned int to double.
+ // unsigned int to double.
float __floatunssisfvfp(unsigned int a); // Appears to convert from
- // unisgned int to float.
+ // unsigned int to float.
int __gedf2vfp(double a, double b); // Appears to return __gedf2
// (a >= b)
int __gesf2vfp(float a, float b); // Appears to return __gesf2
diff --git a/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S b/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
index a3c0a73466e9..e1c171262a78 100644
--- a/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
+++ b/compiler-rt/lib/builtins/arm/truncdfsf2vfp.S
@@ -11,9 +11,9 @@
//
// extern float __truncdfsf2vfp(double a);
//
-// Converts double precision float to signle precision result.
+// Converts double precision float to single precision result.
// Uses Darwin calling convention where a double precision parameter is
-// passed in a R0/R1 pair and a signle precision result is returned in R0.
+// passed in a R0/R1 pair and a single precision result is returned in R0.
//
.syntax unified
.p2align 2
diff --git a/compiler-rt/lib/builtins/atomic.c b/compiler-rt/lib/builtins/atomic.c
index 64bf72dfa345..4c3ebb99a513 100644
--- a/compiler-rt/lib/builtins/atomic.c
+++ b/compiler-rt/lib/builtins/atomic.c
@@ -336,6 +336,18 @@ OPTIMISED_CASES
return tmp; \
}
+#define ATOMIC_RMW_NAND(n, lockfree, type) \
+ type __atomic_fetch_nand_##n(type *ptr, type val, int model) { \
+ if (lockfree(ptr)) \
+ return __c11_atomic_fetch_nand((_Atomic(type) *)ptr, val, model); \
+ Lock *l = lock_for_pointer(ptr); \
+ lock(l); \
+ type tmp = *ptr; \
+ *ptr = ~(tmp & val); \
+ unlock(l); \
+ return tmp; \
+ }
+
#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +)
OPTIMISED_CASES
#undef OPTIMISED_CASE
@@ -351,3 +363,6 @@ OPTIMISED_CASES
#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^)
OPTIMISED_CASES
#undef OPTIMISED_CASE
+#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW_NAND(n, lockfree, type)
+OPTIMISED_CASES
+#undef OPTIMISED_CASE
diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c
index 3c12b74e8fa6..da0715914b41 100644
--- a/compiler-rt/lib/builtins/clear_cache.c
+++ b/compiler-rt/lib/builtins/clear_cache.c
@@ -35,7 +35,7 @@ uintptr_t GetCurrentProcess(void);
#include <machine/sysarch.h>
#endif
-#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__))
+#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv))
// clang-format off
#include <sys/types.h>
#include <machine/sysarch.h>
@@ -166,6 +166,13 @@ void __clear_cache(void *start, void *end) {
: "=r"(start_reg)
: "r"(start_reg), "r"(end_reg), "r"(flags), "r"(syscall_nr));
assert(start_reg == 0 && "Cache flush syscall failed.");
+#elif defined(__riscv) && defined(__OpenBSD__)
+ struct riscv_sync_icache_args arg;
+
+ arg.addr = (uintptr_t)start;
+ arg.len = (uintptr_t)end - (uintptr_t)start;
+
+ sysarch(RISCV_SYNC_ICACHE, &arg);
#else
#if __APPLE__
// On Darwin, sys_icache_invalidate() provides this functionality
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index 6ee42911b204..b8d807ed651c 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -422,6 +422,22 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = INTEL_COREI7_ICELAKE_CLIENT;
break;
+ // Tigerlake:
+ case 0x8c:
+ case 0x8d:
+ CPU = "tigerlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_TIGERLAKE;
+ break;
+
+ // Alderlake:
+ case 0x97:
+ case 0x9a:
+ CPU = "alderlake";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_ALDERLAKE;
+ break;
+
// Icelake Xeon:
case 0x6a:
case 0x6c:
diff --git a/compiler-rt/lib/builtins/emutls.c b/compiler-rt/lib/builtins/emutls.c
index 98cabd917d6c..e112fdf51440 100644
--- a/compiler-rt/lib/builtins/emutls.c
+++ b/compiler-rt/lib/builtins/emutls.c
@@ -150,7 +150,7 @@ static void win_error(DWORD last_err, const char *hint) {
NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
fprintf(stderr, "Windows error: %s\n", buffer);
} else {
- fprintf(stderr, "Unkown Windows error: %s\n", hint);
+ fprintf(stderr, "Unknown Windows error: %s\n", hint);
}
LocalFree(buffer);
}
@@ -374,6 +374,21 @@ emutls_get_address_array(uintptr_t index) {
return array;
}
+#ifndef _WIN32
+// Our emulated TLS implementation relies on local state (e.g. for the pthread
+// key), and if we duplicate this state across different shared libraries,
+// accesses to the same TLS variable from different shared libraries will yield
+// different results (see https://github.com/android/ndk/issues/1551 for an
+// example). __emutls_get_address is the only external entry point for emulated
+// TLS, and by making it default visibility and weak, we can rely on the dynamic
+// linker to coalesce multiple copies at runtime and ensure a single unique copy
+// of TLS state. This is a best effort; it won't work if the user is linking
+// with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows,
+// where the dynamic linker has no notion of coalescing weak symbols at runtime.
+// A more robust solution would be to create a separate shared library for
+// emulated TLS, to ensure a single copy of its state.
+__attribute__((visibility("default"), weak))
+#endif
void *__emutls_get_address(__emutls_control *control) {
uintptr_t index = emutls_get_index(control);
emutls_address_array *array = emutls_get_address_array(index--);
diff --git a/compiler-rt/lib/builtins/fixdfdi.c b/compiler-rt/lib/builtins/fixdfdi.c
index 511568fc12fd..a48facb68598 100644
--- a/compiler-rt/lib/builtins/fixdfdi.c
+++ b/compiler-rt/lib/builtins/fixdfdi.c
@@ -42,3 +42,7 @@ AEABI_RTABI di_int __aeabi_d2lz(fp_t a) { return __fixdfdi(a); }
COMPILER_RT_ALIAS(__fixdfdi, __aeabi_d2lz)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__fixdfdi, __dtoi64)
+#endif
diff --git a/compiler-rt/lib/builtins/fixsfdi.c b/compiler-rt/lib/builtins/fixsfdi.c
index 0cf71c30311a..3a66fb9e2f06 100644
--- a/compiler-rt/lib/builtins/fixsfdi.c
+++ b/compiler-rt/lib/builtins/fixsfdi.c
@@ -42,3 +42,7 @@ AEABI_RTABI di_int __aeabi_f2lz(fp_t a) { return __fixsfdi(a); }
COMPILER_RT_ALIAS(__fixsfdi, __aeabi_f2lz)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__fixsfdi, __stoi64)
+#endif
diff --git a/compiler-rt/lib/builtins/fixunsdfdi.c b/compiler-rt/lib/builtins/fixunsdfdi.c
index ccb256d2c7e0..f15f86788e85 100644
--- a/compiler-rt/lib/builtins/fixunsdfdi.c
+++ b/compiler-rt/lib/builtins/fixunsdfdi.c
@@ -40,3 +40,7 @@ AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) { return __fixunsdfdi(a); }
COMPILER_RT_ALIAS(__fixunsdfdi, __aeabi_d2ulz)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__fixunsdfdi, __dtou64)
+#endif
diff --git a/compiler-rt/lib/builtins/fixunssfdi.c b/compiler-rt/lib/builtins/fixunssfdi.c
index 647185fbabf1..e8f600df9766 100644
--- a/compiler-rt/lib/builtins/fixunssfdi.c
+++ b/compiler-rt/lib/builtins/fixunssfdi.c
@@ -41,3 +41,7 @@ AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) { return __fixunssfdi(a); }
COMPILER_RT_ALIAS(__fixunssfdi, __aeabi_f2ulz)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__fixunssfdi, __stou64)
+#endif
diff --git a/compiler-rt/lib/builtins/fixunsxfdi.c b/compiler-rt/lib/builtins/fixunsxfdi.c
index 097a4e55e931..c8a8061b2cf0 100644
--- a/compiler-rt/lib/builtins/fixunsxfdi.c
+++ b/compiler-rt/lib/builtins/fixunsxfdi.c
@@ -26,7 +26,7 @@
// mmmm mmmm mmmm
#if defined(_MSC_VER) && !defined(__clang__)
-// MSVC throws a warning about 'unitialized variable use' here,
+// MSVC throws a warning about 'uninitialized variable use' here,
// disable it for builds that warn-as-error
#pragma warning(push)
#pragma warning(disable : 4700)
diff --git a/compiler-rt/lib/builtins/fixunsxfsi.c b/compiler-rt/lib/builtins/fixunsxfsi.c
index 3bc1288d38a1..154abcbd35e7 100644
--- a/compiler-rt/lib/builtins/fixunsxfsi.c
+++ b/compiler-rt/lib/builtins/fixunsxfsi.c
@@ -26,7 +26,7 @@
// mmmm mmmm mmmm
#if defined(_MSC_VER) && !defined(__clang__)
-// MSVC throws a warning about 'unitialized variable use' here,
+// MSVC throws a warning about 'uninitialized variable use' here,
// disable it for builds that warn-as-error
#pragma warning(push)
#pragma warning(disable : 4700)
diff --git a/compiler-rt/lib/builtins/fixxfdi.c b/compiler-rt/lib/builtins/fixxfdi.c
index a7a0464feb9d..86cf3767b75d 100644
--- a/compiler-rt/lib/builtins/fixxfdi.c
+++ b/compiler-rt/lib/builtins/fixxfdi.c
@@ -25,7 +25,7 @@
// mmmm mmmm mmmm
#if defined(_MSC_VER) && !defined(__clang__)
-// MSVC throws a warning about 'unitialized variable use' here,
+// MSVC throws a warning about 'uninitialized variable use' here,
// disable it for builds that warn-as-error
#pragma warning(push)
#pragma warning(disable : 4700)
diff --git a/compiler-rt/lib/builtins/floatdidf.c b/compiler-rt/lib/builtins/floatdidf.c
index 7ecb30bca71e..d37c43b1f2f9 100644
--- a/compiler-rt/lib/builtins/floatdidf.c
+++ b/compiler-rt/lib/builtins/floatdidf.c
@@ -101,3 +101,7 @@ AEABI_RTABI double __aeabi_l2d(di_int a) { return __floatdidf(a); }
COMPILER_RT_ALIAS(__floatdidf, __aeabi_l2d)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__floatdidf, __i64tod)
+#endif
diff --git a/compiler-rt/lib/builtins/floatdisf.c b/compiler-rt/lib/builtins/floatdisf.c
index faaa1bcb3c8e..5c6316431e39 100644
--- a/compiler-rt/lib/builtins/floatdisf.c
+++ b/compiler-rt/lib/builtins/floatdisf.c
@@ -73,3 +73,7 @@ AEABI_RTABI float __aeabi_l2f(di_int a) { return __floatdisf(a); }
COMPILER_RT_ALIAS(__floatdisf, __aeabi_l2f)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__floatdisf, __i64tos)
+#endif
diff --git a/compiler-rt/lib/builtins/floatundidf.c b/compiler-rt/lib/builtins/floatundidf.c
index e5e533042a34..2ec802cdc134 100644
--- a/compiler-rt/lib/builtins/floatundidf.c
+++ b/compiler-rt/lib/builtins/floatundidf.c
@@ -104,3 +104,7 @@ AEABI_RTABI double __aeabi_ul2d(du_int a) { return __floatundidf(a); }
COMPILER_RT_ALIAS(__floatundidf, __aeabi_ul2d)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__floatundidf, __u64tod)
+#endif
diff --git a/compiler-rt/lib/builtins/floatundisf.c b/compiler-rt/lib/builtins/floatundisf.c
index 00d61b0c6310..2a4157dc5e4b 100644
--- a/compiler-rt/lib/builtins/floatundisf.c
+++ b/compiler-rt/lib/builtins/floatundisf.c
@@ -70,3 +70,7 @@ AEABI_RTABI float __aeabi_ul2f(du_int a) { return __floatundisf(a); }
COMPILER_RT_ALIAS(__floatundisf, __aeabi_ul2f)
#endif
#endif
+
+#if defined(__MINGW32__) && defined(__arm__)
+COMPILER_RT_ALIAS(__floatundisf, __u64tos)
+#endif
diff --git a/compiler-rt/lib/builtins/mingw_fixfloat.c b/compiler-rt/lib/builtins/mingw_fixfloat.c
deleted file mode 100644
index 945be9d4344a..000000000000
--- a/compiler-rt/lib/builtins/mingw_fixfloat.c
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- mingw_fixfloat.c - Wrap int/float conversions for arm/windows -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "int_lib.h"
-
-COMPILER_RT_ABI di_int __fixdfdi(double a);
-COMPILER_RT_ABI di_int __fixsfdi(float a);
-COMPILER_RT_ABI du_int __fixunsdfdi(double a);
-COMPILER_RT_ABI du_int __fixunssfdi(float a);
-COMPILER_RT_ABI double __floatdidf(di_int a);
-COMPILER_RT_ABI float __floatdisf(di_int a);
-COMPILER_RT_ABI double __floatundidf(du_int a);
-COMPILER_RT_ABI float __floatundisf(du_int a);
-
-COMPILER_RT_ABI di_int __dtoi64(double a) { return __fixdfdi(a); }
-
-COMPILER_RT_ABI di_int __stoi64(float a) { return __fixsfdi(a); }
-
-COMPILER_RT_ABI du_int __dtou64(double a) { return __fixunsdfdi(a); }
-
-COMPILER_RT_ABI du_int __stou64(float a) { return __fixunssfdi(a); }
-
-COMPILER_RT_ABI double __i64tod(di_int a) { return __floatdidf(a); }
-
-COMPILER_RT_ABI float __i64tos(di_int a) { return __floatdisf(a); }
-
-COMPILER_RT_ABI double __u64tod(du_int a) { return __floatundidf(a); }
-
-COMPILER_RT_ABI float __u64tos(du_int a) { return __floatundisf(a); }
diff --git a/compiler-rt/lib/builtins/riscv/restore.S b/compiler-rt/lib/builtins/riscv/restore.S
index 12f0d3365655..73f64a920d66 100644
--- a/compiler-rt/lib/builtins/riscv/restore.S
+++ b/compiler-rt/lib/builtins/riscv/restore.S
@@ -93,7 +93,7 @@ __riscv_restore_0:
__riscv_restore_12:
ld s11, 8(sp)
addi sp, sp, 16
- // fallthrough into __riscv_restore_11/10/9/8
+ // fallthrough into __riscv_restore_11/10
.globl __riscv_restore_11
.type __riscv_restore_11,@function
@@ -143,10 +143,6 @@ __riscv_restore_4:
.type __riscv_restore_3,@function
.globl __riscv_restore_2
.type __riscv_restore_2,@function
- .globl __riscv_restore_1
- .type __riscv_restore_1,@function
- .globl __riscv_restore_0
- .type __riscv_restore_0,@function
__riscv_restore_3:
__riscv_restore_2:
ld s2, 0(sp)
@@ -154,6 +150,10 @@ __riscv_restore_2:
addi sp, sp, 16
// fallthrough into __riscv_restore_1/0
+ .globl __riscv_restore_1
+ .type __riscv_restore_1,@function
+ .globl __riscv_restore_0
+ .type __riscv_restore_0,@function
__riscv_restore_1:
__riscv_restore_0:
ld s0, 0(sp)
diff --git a/compiler-rt/lib/builtins/riscv/save.S b/compiler-rt/lib/builtins/riscv/save.S
index d811bf584fc3..85501aeb4c2e 100644
--- a/compiler-rt/lib/builtins/riscv/save.S
+++ b/compiler-rt/lib/builtins/riscv/save.S
@@ -174,6 +174,8 @@ __riscv_save_2:
.type __riscv_save_1,@function
.globl __riscv_save_0
.type __riscv_save_0,@function
+__riscv_save_1:
+__riscv_save_0:
addi sp, sp, -16
sd s0, 0(sp)
sd ra, 8(sp)
diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp
index f691cfb94cfc..95853208f951 100644
--- a/compiler-rt/lib/cfi/cfi.cpp
+++ b/compiler-rt/lib/cfi/cfi.cpp
@@ -320,7 +320,7 @@ void InitShadow() {
}
THREADLOCAL int in_loader;
-BlockingMutex shadow_update_lock(LINKER_INITIALIZED);
+Mutex shadow_update_lock;
void EnterLoader() NO_THREAD_SAFETY_ANALYSIS {
if (in_loader == 0) {
@@ -359,7 +359,7 @@ ALWAYS_INLINE void CfiSlowPathCommon(u64 CallSiteTypeId, void *Ptr,
return;
}
CFICheckFn cfi_check = sv.get_cfi_check();
- VReport(2, "__cfi_check at %p\n", cfi_check);
+ VReport(2, "__cfi_check at %p\n", (void *)cfi_check);
cfi_check(CallSiteTypeId, Ptr, DiagData);
}
@@ -436,11 +436,11 @@ INTERCEPTOR(int, dlclose, void *handle) {
return res;
}
-static BlockingMutex interceptor_init_lock(LINKER_INITIALIZED);
+static Mutex interceptor_init_lock;
static bool interceptors_inited = false;
static void EnsureInterceptorsInitialized() {
- BlockingMutexLock lock(&interceptor_init_lock);
+ Lock lock(&interceptor_init_lock);
if (interceptors_inited)
return;
diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index 6f9ae141d7ab..ce2c04df83a8 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -369,37 +369,6 @@ static void SetOrigin(const void *dst, uptr size, u32 origin) {
*(u32 *)(end - kOriginAlign) = origin;
}
-static void WriteShadowInRange(dfsan_label label, uptr beg_shadow_addr,
- uptr end_shadow_addr) {
- // TODO: After changing dfsan_label to 8bit, use internal_memset when label
- // is not 0.
- dfsan_label *labelp = (dfsan_label *)beg_shadow_addr;
- if (label) {
- for (; (uptr)labelp < end_shadow_addr; ++labelp) *labelp = label;
- return;
- }
-
- for (; (uptr)labelp < end_shadow_addr; ++labelp) {
- // Don't write the label if it is already the value we need it to be.
- // In a program where most addresses are not labeled, it is common that
- // a page of shadow memory is entirely zeroed. The Linux copy-on-write
- // implementation will share all of the zeroed pages, making a copy of a
- // page when any value is written. The un-sharing will happen even if
- // the value written does not change the value in memory. Avoiding the
- // write when both |label| and |*labelp| are zero dramatically reduces
- // the amount of real memory used by large programs.
- if (!*labelp)
- continue;
-
- *labelp = 0;
- }
-}
-
-static void WriteShadowWithSize(dfsan_label label, uptr shadow_addr,
- uptr size) {
- WriteShadowInRange(label, shadow_addr, shadow_addr + size * sizeof(label));
-}
-
#define RET_CHAIN_ORIGIN(id) \
GET_CALLER_PC_BP_SP; \
(void)sp; \
@@ -451,21 +420,6 @@ void dfsan_copy_memory(void *dst, const void *src, uptr size) {
dfsan_mem_origin_transfer(dst, src, size);
}
-} // namespace __dfsan
-
-// If the label s is tainted, set the size bytes from the address p to be a new
-// origin chain with the previous ID o and the current stack trace. This is
-// used by instrumentation to reduce code size when too much code is inserted.
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin(
- dfsan_label s, void *p, uptr size, dfsan_origin o) {
- if (UNLIKELY(s)) {
- GET_CALLER_PC_BP_SP;
- (void)sp;
- GET_STORE_STACK_TRACE_PC_BP(pc, bp);
- SetOrigin(p, size, ChainOrigin(o, &stack));
- }
-}
-
// Releases the pages within the origin address range.
static void ReleaseOrigins(void *addr, uptr size) {
const uptr beg_origin_addr = (uptr)__dfsan::origin_for(addr);
@@ -484,6 +438,19 @@ static void ReleaseOrigins(void *addr, uptr size) {
Die();
}
+static void WriteZeroShadowInRange(uptr beg, uptr end) {
+ // Don't write the label if it is already the value we need it to be.
+ // In a program where most addresses are not labeled, it is common that
+ // a page of shadow memory is entirely zeroed. The Linux copy-on-write
+ // implementation will share all of the zeroed pages, making a copy of a
+ // page when any value is written. The un-sharing will happen even if
+ // the value written does not change the value in memory. Avoiding the
+ // write when both |label| and |*labelp| are zero dramatically reduces
+ // the amount of real memory used by large programs.
+ if (!mem_is_zero((const char *)beg, end - beg))
+ internal_memset((void *)beg, 0, end - beg);
+}
+
// Releases the pages within the shadow address range, and sets
// the shadow addresses not on the pages to be 0.
static void ReleaseOrClearShadows(void *addr, uptr size) {
@@ -492,20 +459,22 @@ static void ReleaseOrClearShadows(void *addr, uptr size) {
const uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr);
if (end_shadow_addr - beg_shadow_addr <
- common_flags()->clear_shadow_mmap_threshold)
- return WriteShadowWithSize(0, beg_shadow_addr, size);
+ common_flags()->clear_shadow_mmap_threshold) {
+ WriteZeroShadowInRange(beg_shadow_addr, end_shadow_addr);
+ return;
+ }
const uptr page_size = GetPageSizeCached();
const uptr beg_aligned = RoundUpTo(beg_shadow_addr, page_size);
const uptr end_aligned = RoundDownTo(end_shadow_addr, page_size);
if (beg_aligned >= end_aligned) {
- WriteShadowWithSize(0, beg_shadow_addr, size);
+ WriteZeroShadowInRange(beg_shadow_addr, end_shadow_addr);
} else {
if (beg_aligned != beg_shadow_addr)
- WriteShadowInRange(0, beg_shadow_addr, beg_aligned);
+ WriteZeroShadowInRange(beg_shadow_addr, beg_aligned);
if (end_aligned != end_shadow_addr)
- WriteShadowInRange(0, end_aligned, end_shadow_addr);
+ WriteZeroShadowInRange(end_aligned, end_shadow_addr);
if (!MmapFixedSuperNoReserve(beg_aligned, end_aligned - beg_aligned))
Die();
}
@@ -514,7 +483,7 @@ static void ReleaseOrClearShadows(void *addr, uptr size) {
void SetShadow(dfsan_label label, void *addr, uptr size, dfsan_origin origin) {
if (0 != label) {
const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr);
- WriteShadowWithSize(label, beg_shadow_addr, size);
+ internal_memset((void *)beg_shadow_addr, label, size);
if (dfsan_get_track_origins())
SetOrigin(addr, size, origin);
return;
@@ -526,9 +495,24 @@ void SetShadow(dfsan_label label, void *addr, uptr size, dfsan_origin origin) {
ReleaseOrClearShadows(addr, size);
}
+} // namespace __dfsan
+
+// If the label s is tainted, set the size bytes from the address p to be a new
+// origin chain with the previous ID o and the current stack trace. This is
+// used by instrumentation to reduce code size when too much code is inserted.
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin(
+ dfsan_label s, void *p, uptr size, dfsan_origin o) {
+ if (UNLIKELY(s)) {
+ GET_CALLER_PC_BP_SP;
+ (void)sp;
+ GET_STORE_STACK_TRACE_PC_BP(pc, bp);
+ SetOrigin(p, size, ChainOrigin(o, &stack));
+ }
+}
+
extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label(
dfsan_label label, dfsan_origin origin, void *addr, uptr size) {
- SetShadow(label, addr, size, origin);
+ __dfsan::SetShadow(label, addr, size, origin);
}
SANITIZER_INTERFACE_ATTRIBUTE
@@ -539,7 +523,7 @@ void dfsan_set_label(dfsan_label label, void *addr, uptr size) {
GET_STORE_STACK_TRACE_PC_BP(pc, bp);
init_origin = ChainOrigin(0, &stack, true);
}
- SetShadow(label, addr, size, init_origin);
+ __dfsan::SetShadow(label, addr, size, init_origin);
}
SANITIZER_INTERFACE_ATTRIBUTE
@@ -709,9 +693,9 @@ extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace(
PrintInvalidOriginWarning(label, addr);
}
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE size_t
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
dfsan_sprint_origin_trace(const void *addr, const char *description,
- char *out_buf, size_t out_buf_size) {
+ char *out_buf, uptr out_buf_size) {
CHECK(out_buf);
if (!dfsan_get_track_origins()) {
@@ -780,8 +764,8 @@ extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() {
stack.Print();
}
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE size_t
-dfsan_sprint_stack_trace(char *out_buf, size_t out_buf_size) {
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
+dfsan_sprint_stack_trace(char *out_buf, uptr out_buf_size) {
CHECK(out_buf);
GET_CALLER_PC_BP;
GET_STORE_STACK_TRACE_PC_BP(pc, bp);
@@ -932,7 +916,7 @@ static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) {
// Consider refactoring these into a shared implementation.
bool InitShadow(bool init_origins) {
// Let user know mapping parameters first.
- VPrintf(1, "dfsan_init %p\n", &__dfsan::dfsan_init);
+ VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init);
for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout[i].name, kMemoryLayout[i].start,
kMemoryLayout[i].end - 1);
@@ -1007,7 +991,7 @@ static void DFsanInit(int argc, char **argv, char **envp) {
DFsanThread *main_thread = DFsanThread::Create(nullptr, nullptr, nullptr);
SetCurrentThread(main_thread);
- main_thread->ThreadStart();
+ main_thread->Init();
dfsan_init_is_running = false;
dfsan_inited = true;
diff --git a/compiler-rt/lib/dfsan/dfsan.h b/compiler-rt/lib/dfsan/dfsan.h
index b212298157eb..b529e008f300 100644
--- a/compiler-rt/lib/dfsan/dfsan.h
+++ b/compiler-rt/lib/dfsan/dfsan.h
@@ -49,7 +49,7 @@ void dfsan_mem_origin_transfer(const void *dst, const void *src, uptr len);
} // extern "C"
template <typename T>
-void dfsan_set_label(dfsan_label label, T &data) { // NOLINT
+void dfsan_set_label(dfsan_label label, T &data) {
dfsan_set_label(label, (void *)&data, sizeof(T));
}
diff --git a/compiler-rt/lib/dfsan/dfsan_custom.cpp b/compiler-rt/lib/dfsan/dfsan_custom.cpp
index 3185184f29c8..217bd35c1c54 100644
--- a/compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -583,7 +583,7 @@ SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strcat(char *dest, const char *src,
dfsan_label src_label,
dfsan_label *ret_label) {
size_t dest_len = strlen(dest);
- char *ret = strcat(dest, src); // NOLINT
+ char *ret = strcat(dest, src);
dfsan_label *sdest = shadow_for(dest + dest_len);
const dfsan_label *ssrc = shadow_for(src);
internal_memcpy((void *)sdest, (const void *)ssrc,
@@ -597,7 +597,7 @@ SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strcat(
dfsan_label *ret_label, dfsan_origin dest_origin, dfsan_origin src_origin,
dfsan_origin *ret_origin) {
size_t dest_len = strlen(dest);
- char *ret = strcat(dest, src); // NOLINT
+ char *ret = strcat(dest, src);
dfsan_label *sdest = shadow_for(dest + dest_len);
const dfsan_label *ssrc = shadow_for(src);
size_t src_len = strlen(src);
@@ -755,6 +755,8 @@ SANITIZER_INTERFACE_ATTRIBUTE void *__dfso_dlopen(
static void *DFsanThreadStartFunc(void *arg) {
DFsanThread *t = (DFsanThread *)arg;
SetCurrentThread(t);
+ t->Init();
+ SetSigProcMask(&t->starting_sigset_, nullptr);
return t->ThreadStart();
}
@@ -775,6 +777,7 @@ static int dfsan_pthread_create(pthread_t *thread, const pthread_attr_t *attr,
DFsanThread *t =
DFsanThread::Create(start_routine_trampoline,
(thread_callback_t)start_routine, arg, track_origins);
+ ScopedBlockSignals block(&t->starting_sigset_);
int res = pthread_create(thread, attr, DFsanThreadStartFunc, t);
if (attr == &myattr)
@@ -1026,6 +1029,33 @@ char *__dfso_get_current_dir_name(dfsan_label *ret_label,
return __dfsw_get_current_dir_name(ret_label);
}
+// This function is only available for glibc 2.25 or newer. Mark it weak so
+// linking succeeds with older glibcs.
+SANITIZER_WEAK_ATTRIBUTE int getentropy(void *buffer, size_t length);
+
+SANITIZER_INTERFACE_ATTRIBUTE int __dfsw_getentropy(void *buffer, size_t length,
+ dfsan_label buffer_label,
+ dfsan_label length_label,
+ dfsan_label *ret_label) {
+ int ret = getentropy(buffer, length);
+ if (ret == 0) {
+ dfsan_set_label(0, buffer, length);
+ }
+ *ret_label = 0;
+ return ret;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE int __dfso_getentropy(void *buffer, size_t length,
+ dfsan_label buffer_label,
+ dfsan_label length_label,
+ dfsan_label *ret_label,
+ dfsan_origin buffer_origin,
+ dfsan_origin length_origin,
+ dfsan_origin *ret_origin) {
+ return __dfsw_getentropy(buffer, length, buffer_label, length_label,
+ ret_label);
+}
+
SANITIZER_INTERFACE_ATTRIBUTE
int __dfsw_gethostname(char *name, size_t len, dfsan_label name_label,
dfsan_label len_label, dfsan_label *ret_label) {
@@ -1088,7 +1118,7 @@ int __dfso_getrusage(int who, struct rusage *usage, dfsan_label who_label,
SANITIZER_INTERFACE_ATTRIBUTE
char *__dfsw_strcpy(char *dest, const char *src, dfsan_label dst_label,
dfsan_label src_label, dfsan_label *ret_label) {
- char *ret = strcpy(dest, src); // NOLINT
+ char *ret = strcpy(dest, src);
if (ret) {
internal_memcpy(shadow_for(dest), shadow_for(src),
sizeof(dfsan_label) * (strlen(src) + 1));
@@ -1102,7 +1132,7 @@ char *__dfso_strcpy(char *dest, const char *src, dfsan_label dst_label,
dfsan_label src_label, dfsan_label *ret_label,
dfsan_origin dst_origin, dfsan_origin src_origin,
dfsan_origin *ret_origin) {
- char *ret = strcpy(dest, src); // NOLINT
+ char *ret = strcpy(dest, src);
if (ret) {
size_t str_len = strlen(src) + 1;
dfsan_mem_origin_transfer(dest, src, str_len);
@@ -2489,7 +2519,8 @@ pid_t __dfso_fork(dfsan_label *ret_label, dfsan_origin *ret_origin) {
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard, u32 *) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_guard_init, u32 *,
u32 *) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, const uptr *beg,
+ const uptr *end) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __dfsw___sanitizer_cov_trace_cmp, void) {}
diff --git a/compiler-rt/lib/dfsan/dfsan_interceptors.cpp b/compiler-rt/lib/dfsan/dfsan_interceptors.cpp
index 92be4fc87d49..d8fb9ea86618 100644
--- a/compiler-rt/lib/dfsan/dfsan_interceptors.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_interceptors.cpp
@@ -17,6 +17,7 @@
#include "dfsan/dfsan.h"
#include "dfsan/dfsan_thread.h"
#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_errno.h"
@@ -26,11 +27,11 @@
using namespace __sanitizer;
-namespace {
+static bool interceptors_initialized;
-bool interceptors_initialized;
-
-} // namespace
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return !__dfsan::dfsan_inited; }
+};
INTERCEPTOR(void *, reallocarray, void *ptr, SIZE_T nmemb, SIZE_T size) {
return __dfsan::dfsan_reallocarray(ptr, nmemb, size);
@@ -47,63 +48,37 @@ INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) {
return __dfsan::dfsan_aligned_alloc(alignment, size);
}
-static uptr allocated_for_dlsym;
-static const uptr kDlsymAllocPoolSize = 1024;
-static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
-
-static bool IsInDlsymAllocPool(const void *ptr) {
- uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- return off < sizeof(alloc_memory_for_dlsym);
-}
-
-static void *AllocateFromLocalPool(uptr size_in_bytes) {
- uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
- void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
- allocated_for_dlsym += size_in_words;
- CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
- return mem;
-}
-
INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) {
- if (UNLIKELY(!__dfsan::dfsan_inited))
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- return AllocateFromLocalPool(nmemb * size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
return __dfsan::dfsan_calloc(nmemb, size);
}
INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
- if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
- uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
- void *new_ptr;
- if (UNLIKELY(!__dfsan::dfsan_inited)) {
- new_ptr = AllocateFromLocalPool(copy_size);
- } else {
- copy_size = size;
- new_ptr = __dfsan::dfsan_malloc(copy_size);
- }
- internal_memcpy(new_ptr, ptr, copy_size);
- return new_ptr;
- }
+ if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Realloc(ptr, size);
return __dfsan::dfsan_realloc(ptr, size);
}
INTERCEPTOR(void *, malloc, SIZE_T size) {
- if (UNLIKELY(!__dfsan::dfsan_inited))
- // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
- return AllocateFromLocalPool(size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Allocate(size);
return __dfsan::dfsan_malloc(size);
}
INTERCEPTOR(void, free, void *ptr) {
- if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr)))
+ if (!ptr)
return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
return __dfsan::dfsan_deallocate(ptr);
}
INTERCEPTOR(void, cfree, void *ptr) {
- if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr)))
+ if (!ptr)
return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
return __dfsan::dfsan_deallocate(ptr);
}
@@ -152,12 +127,12 @@ INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
if (__dfsan::dfsan_init_is_running) \
return REAL(func)(__VA_ARGS__); \
ENSURE_DFSAN_INITED(); \
- dfsan_set_label(0, __errno_location(), sizeof(int)); /* NOLINT */
+ dfsan_set_label(0, __errno_location(), sizeof(int));
INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
int fd, OFF_T offset) {
if (common_flags()->detect_write_exec)
- ReportMmapWriteExec(prot);
+ ReportMmapWriteExec(prot, flags);
if (!__dfsan::dfsan_inited)
return (void *)internal_mmap(addr, length, prot, flags, fd, offset);
COMMON_INTERCEPTOR_ENTER(mmap, addr, length, prot, flags, fd, offset);
@@ -171,7 +146,7 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags,
int fd, OFF64_T offset) {
if (common_flags()->detect_write_exec)
- ReportMmapWriteExec(prot);
+ ReportMmapWriteExec(prot, flags);
if (!__dfsan::dfsan_inited)
return (void *)internal_mmap(addr, length, prot, flags, fd, offset);
COMMON_INTERCEPTOR_ENTER(mmap64, addr, length, prot, flags, fd, offset);
diff --git a/compiler-rt/lib/dfsan/dfsan_thread.cpp b/compiler-rt/lib/dfsan/dfsan_thread.cpp
index 6869cf231587..df7e4d9b7421 100644
--- a/compiler-rt/lib/dfsan/dfsan_thread.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_thread.cpp
@@ -67,8 +67,6 @@ void DFsanThread::Destroy() {
}
thread_return_t DFsanThread::ThreadStart() {
- Init();
-
if (!start_routine_) {
// start_routine_ == 0 if we're on the main thread or on one of the
// OS X libdispatch worker threads. But nobody is supposed to call
diff --git a/compiler-rt/lib/dfsan/dfsan_thread.h b/compiler-rt/lib/dfsan/dfsan_thread.h
index 8dde626f5569..1c33a1854997 100644
--- a/compiler-rt/lib/dfsan/dfsan_thread.h
+++ b/compiler-rt/lib/dfsan/dfsan_thread.h
@@ -1,5 +1,4 @@
-//===-- dfsan_thread.h -------------------------------------------*- C++
-//-*-===//
+//===-- dfsan_thread.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,6 +15,7 @@
#include "dfsan_allocator.h"
#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_posix.h"
namespace __dfsan {
@@ -46,6 +46,7 @@ class DFsanThread {
DFsanThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; }
int destructor_iterations_;
+ __sanitizer_sigset_t starting_sigset_;
private:
void SetThreadStackAndTls();
diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt
index 3c2670e04c29..eef7c48948cc 100644
--- a/compiler-rt/lib/dfsan/done_abilist.txt
+++ b/compiler-rt/lib/dfsan/done_abilist.txt
@@ -218,6 +218,7 @@ fun:fgets=custom
fun:fstat=custom
fun:getcwd=custom
fun:get_current_dir_name=custom
+fun:getentropy=custom
fun:gethostname=custom
fun:getpeername=custom
fun:getrlimit=custom
@@ -268,7 +269,7 @@ fun:strrchr=custom
fun:strstr=custom
# Functions which take action based on global state, such as running a callback
-# set by a sepperate function.
+# set by a separate function.
fun:write=custom
# Functions that take a callback (wrap the callback manually).
diff --git a/compiler-rt/lib/dfsan/libc_ubuntu1404_abilist.txt b/compiler-rt/lib/dfsan/libc_ubuntu1404_abilist.txt
index a1ea0a06b537..433092e2b27b 100644
--- a/compiler-rt/lib/dfsan/libc_ubuntu1404_abilist.txt
+++ b/compiler-rt/lib/dfsan/libc_ubuntu1404_abilist.txt
@@ -1852,6 +1852,7 @@ fun:getdirentries64=uninstrumented
fun:getdomainname=uninstrumented
fun:getdtablesize=uninstrumented
fun:getegid=uninstrumented
+fun:getentropy=uninstrumented
fun:getenv=uninstrumented
fun:geteuid=uninstrumented
fun:getfsent=uninstrumented
diff --git a/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h b/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
index ab191b60ef6e..421dee7f6603 100644
--- a/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
+++ b/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
@@ -41,7 +41,8 @@ inline uint32_t Clzll(uint64_t X) {
#if !defined(_M_ARM) && !defined(_M_X64)
// Scan the high 32 bits.
if (_BitScanReverse(&LeadZeroIdx, static_cast<unsigned long>(X >> 32)))
- return static_cast<int>(63 - (LeadZeroIdx + 32)); // Create a bit offset from the MSB.
+ return static_cast<int>(
+ 63 - (LeadZeroIdx + 32)); // Create a bit offset from the MSB.
// Scan the low 32 bits.
if (_BitScanReverse(&LeadZeroIdx, static_cast<unsigned long>(X)))
return static_cast<int>(63 - LeadZeroIdx);
diff --git a/compiler-rt/lib/fuzzer/FuzzerCommand.h b/compiler-rt/lib/fuzzer/FuzzerCommand.h
index 87308864af53..f653fe358768 100644
--- a/compiler-rt/lib/fuzzer/FuzzerCommand.h
+++ b/compiler-rt/lib/fuzzer/FuzzerCommand.h
@@ -33,7 +33,7 @@ public:
Command() : CombinedOutAndErr(false) {}
- explicit Command(const Vector<std::string> &ArgsToAdd)
+ explicit Command(const std::vector<std::string> &ArgsToAdd)
: Args(ArgsToAdd), CombinedOutAndErr(false) {}
explicit Command(const Command &Other)
@@ -58,7 +58,7 @@ public:
// Gets all of the current command line arguments, **including** those after
// "-ignore-remaining-args=1".
- const Vector<std::string> &getArguments() const { return Args; }
+ const std::vector<std::string> &getArguments() const { return Args; }
// Adds the given argument before "-ignore_remaining_args=1", or at the end
// if that flag isn't present.
@@ -68,7 +68,7 @@ public:
// Adds all given arguments before "-ignore_remaining_args=1", or at the end
// if that flag isn't present.
- void addArguments(const Vector<std::string> &ArgsToAdd) {
+ void addArguments(const std::vector<std::string> &ArgsToAdd) {
Args.insert(endMutableArgs(), ArgsToAdd.begin(), ArgsToAdd.end());
}
@@ -155,16 +155,16 @@ private:
Command(Command &&Other) = delete;
Command &operator=(Command &&Other) = delete;
- Vector<std::string>::iterator endMutableArgs() {
+ std::vector<std::string>::iterator endMutableArgs() {
return std::find(Args.begin(), Args.end(), ignoreRemainingArgs());
}
- Vector<std::string>::const_iterator endMutableArgs() const {
+ std::vector<std::string>::const_iterator endMutableArgs() const {
return std::find(Args.begin(), Args.end(), ignoreRemainingArgs());
}
// The command arguments. Args[0] is the command name.
- Vector<std::string> Args;
+ std::vector<std::string> Args;
// True indicates stderr is redirected to stdout.
bool CombinedOutAndErr;
diff --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h
index f8c126072c96..e01891e18fe3 100644
--- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h
+++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h
@@ -39,13 +39,13 @@ struct InputInfo {
bool MayDeleteFile = false;
bool Reduced = false;
bool HasFocusFunction = false;
- Vector<uint32_t> UniqFeatureSet;
- Vector<uint8_t> DataFlowTraceForFocusFunction;
+ std::vector<uint32_t> UniqFeatureSet;
+ std::vector<uint8_t> DataFlowTraceForFocusFunction;
// Power schedule.
bool NeedsEnergyUpdate = false;
double Energy = 0.0;
double SumIncidence = 0.0;
- Vector<std::pair<uint32_t, uint16_t>> FeatureFreqs;
+ std::vector<std::pair<uint32_t, uint16_t>> FeatureFreqs;
// Delete feature Idx and its frequency from FeatureFreqs.
bool DeleteFeatureFreq(uint32_t Idx) {
@@ -209,7 +209,7 @@ public:
InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
bool HasFocusFunction, bool NeverReduce,
std::chrono::microseconds TimeOfUnit,
- const Vector<uint32_t> &FeatureSet,
+ const std::vector<uint32_t> &FeatureSet,
const DataFlowTrace &DFT, const InputInfo *BaseII) {
assert(!U.empty());
if (FeatureDebug)
@@ -258,7 +258,7 @@ public:
}
// Debug-only
- void PrintFeatureSet(const Vector<uint32_t> &FeatureSet) {
+ void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) {
if (!FeatureDebug) return;
Printf("{");
for (uint32_t Feature: FeatureSet)
@@ -284,7 +284,8 @@ public:
}
}
- void Replace(InputInfo *II, const Unit &U) {
+ void Replace(InputInfo *II, const Unit &U,
+ std::chrono::microseconds TimeOfUnit) {
assert(II->U.size() > U.size());
Hashes.erase(Sha1ToString(II->Sha1));
DeleteFile(*II);
@@ -292,6 +293,7 @@ public:
Hashes.insert(Sha1ToString(II->Sha1));
II->U = U;
II->Reduced = true;
+ II->TimeOfUnit = TimeOfUnit;
DistributionNeedsUpdate = true;
}
@@ -325,7 +327,8 @@ public:
const auto &II = *Inputs[i];
Printf(" [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n", i,
Sha1ToString(II.Sha1).c_str(), II.U.size(),
- II.NumExecutedMutations, II.NumSuccessfullMutations, II.HasFocusFunction);
+ II.NumExecutedMutations, II.NumSuccessfullMutations,
+ II.HasFocusFunction);
}
}
@@ -563,11 +566,11 @@ private:
}
std::piecewise_constant_distribution<double> CorpusDistribution;
- Vector<double> Intervals;
- Vector<double> Weights;
+ std::vector<double> Intervals;
+ std::vector<double> Weights;
std::unordered_set<std::string> Hashes;
- Vector<InputInfo*> Inputs;
+ std::vector<InputInfo *> Inputs;
size_t NumAddedFeatures = 0;
size_t NumUpdatedFeatures = 0;
@@ -577,7 +580,7 @@ private:
bool DistributionNeedsUpdate = true;
uint16_t FreqOfMostAbundantRareFeature = 0;
uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {};
- Vector<uint32_t> RareFeatures;
+ std::vector<uint32_t> RareFeatures;
std::string OutputCorpus;
};
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
index 23d422590d19..2f9a4d2d7adc 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
@@ -37,7 +37,7 @@ bool BlockCoverage::AppendCoverage(const std::string &S) {
// Coverage lines have this form:
// CN X Y Z T
// where N is the number of the function, T is the total number of instrumented
-// BBs, and X,Y,Z, if present, are the indecies of covered BB.
+// BBs, and X,Y,Z, if present, are the indices of covered BB.
// BB #0, which is the entry block, is not explicitly listed.
bool BlockCoverage::AppendCoverage(std::istream &IN) {
std::string L;
@@ -52,7 +52,7 @@ bool BlockCoverage::AppendCoverage(std::istream &IN) {
continue;
}
if (L[0] != 'C') continue;
- Vector<uint32_t> CoveredBlocks;
+ std::vector<uint32_t> CoveredBlocks;
while (true) {
uint32_t BB = 0;
SS >> BB;
@@ -68,7 +68,7 @@ bool BlockCoverage::AppendCoverage(std::istream &IN) {
auto It = Functions.find(FunctionId);
auto &Counters =
It == Functions.end()
- ? Functions.insert({FunctionId, Vector<uint32_t>(NumBlocks)})
+ ? Functions.insert({FunctionId, std::vector<uint32_t>(NumBlocks)})
.first->second
: It->second;
@@ -86,8 +86,8 @@ bool BlockCoverage::AppendCoverage(std::istream &IN) {
// * any uncovered function gets weight 0.
// * a function with lots of uncovered blocks gets bigger weight.
// * a function with a less frequently executed code gets bigger weight.
-Vector<double> BlockCoverage::FunctionWeights(size_t NumFunctions) const {
- Vector<double> Res(NumFunctions);
+std::vector<double> BlockCoverage::FunctionWeights(size_t NumFunctions) const {
+ std::vector<double> Res(NumFunctions);
for (auto It : Functions) {
auto FunctionID = It.first;
auto Counters = It.second;
@@ -104,7 +104,7 @@ Vector<double> BlockCoverage::FunctionWeights(size_t NumFunctions) const {
}
void DataFlowTrace::ReadCoverage(const std::string &DirPath) {
- Vector<SizedFile> Files;
+ std::vector<SizedFile> Files;
GetSizedFilesFromDir(DirPath, &Files);
for (auto &SF : Files) {
auto Name = Basename(SF.File);
@@ -115,16 +115,16 @@ void DataFlowTrace::ReadCoverage(const std::string &DirPath) {
}
}
-static void DFTStringAppendToVector(Vector<uint8_t> *DFT,
+static void DFTStringAppendToVector(std::vector<uint8_t> *DFT,
const std::string &DFTString) {
assert(DFT->size() == DFTString.size());
for (size_t I = 0, Len = DFT->size(); I < Len; I++)
(*DFT)[I] = DFTString[I] == '1';
}
-// converts a string of '0' and '1' into a Vector<uint8_t>
-static Vector<uint8_t> DFTStringToVector(const std::string &DFTString) {
- Vector<uint8_t> DFT(DFTString.size());
+// converts a string of '0' and '1' into a std::vector<uint8_t>
+static std::vector<uint8_t> DFTStringToVector(const std::string &DFTString) {
+ std::vector<uint8_t> DFT(DFTString.size());
DFTStringAppendToVector(&DFT, DFTString);
return DFT;
}
@@ -159,14 +159,14 @@ static bool ParseDFTLine(const std::string &Line, size_t *FunctionNum,
}
bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
- Vector<SizedFile> &CorporaFiles, Random &Rand) {
+ std::vector<SizedFile> &CorporaFiles, Random &Rand) {
if (DirPath.empty()) return false;
Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
- Vector<SizedFile> Files;
+ std::vector<SizedFile> Files;
GetSizedFilesFromDir(DirPath, &Files);
std::string L;
size_t FocusFuncIdx = SIZE_MAX;
- Vector<std::string> FunctionNames;
+ std::vector<std::string> FunctionNames;
// Collect the hashes of the corpus files.
for (auto &SF : CorporaFiles)
@@ -191,7 +191,7 @@ bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
// * chooses a random function according to the weights.
ReadCoverage(DirPath);
auto Weights = Coverage.FunctionWeights(NumFunctions);
- Vector<double> Intervals(NumFunctions + 1);
+ std::vector<double> Intervals(NumFunctions + 1);
std::iota(Intervals.begin(), Intervals.end(), 0);
auto Distribution = std::piecewise_constant_distribution<double>(
Intervals.begin(), Intervals.end(), Weights.begin());
@@ -247,7 +247,7 @@ bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
}
int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
- const Vector<SizedFile> &CorporaFiles) {
+ const std::vector<SizedFile> &CorporaFiles) {
Printf("INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size());
if (CorporaFiles.empty()) {
@@ -265,7 +265,7 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
// we then request tags in [0,Size/2) and [Size/2, Size), and so on.
// Function number => DFT.
auto OutPath = DirPlusFile(DirPath, Hash(FileToVector(F.File)));
- std::unordered_map<size_t, Vector<uint8_t>> DFTMap;
+ std::unordered_map<size_t, std::vector<uint8_t>> DFTMap;
std::unordered_set<std::string> Cov;
Command Cmd;
Cmd.addArgument(DFTBinary);
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
index 07c03bb25651..054dce1bdcb6 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
@@ -39,7 +39,7 @@
namespace fuzzer {
int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
- const Vector<SizedFile> &CorporaFiles);
+ const std::vector<SizedFile> &CorporaFiles);
class BlockCoverage {
public:
@@ -77,11 +77,11 @@ public:
return Result;
}
- Vector<double> FunctionWeights(size_t NumFunctions) const;
+ std::vector<double> FunctionWeights(size_t NumFunctions) const;
void clear() { Functions.clear(); }
private:
- typedef Vector<uint32_t> CoverageVector;
+ typedef std::vector<uint32_t> CoverageVector;
uint32_t NumberOfCoveredBlocks(const CoverageVector &Counters) const {
uint32_t Res = 0;
@@ -117,9 +117,9 @@ class DataFlowTrace {
public:
void ReadCoverage(const std::string &DirPath);
bool Init(const std::string &DirPath, std::string *FocusFunction,
- Vector<SizedFile> &CorporaFiles, Random &Rand);
+ std::vector<SizedFile> &CorporaFiles, Random &Rand);
void Clear() { Traces.clear(); }
- const Vector<uint8_t> *Get(const std::string &InputSha1) const {
+ const std::vector<uint8_t> *Get(const std::string &InputSha1) const {
auto It = Traces.find(InputSha1);
if (It != Traces.end())
return &It->second;
@@ -128,9 +128,9 @@ class DataFlowTrace {
private:
// Input's sha1 => DFT for the FocusFunction.
- std::unordered_map<std::string, Vector<uint8_t> > Traces;
- BlockCoverage Coverage;
- std::unordered_set<std::string> CorporaHashes;
+ std::unordered_map<std::string, std::vector<uint8_t>> Traces;
+ BlockCoverage Coverage;
+ std::unordered_set<std::string> CorporaHashes;
};
} // namespace fuzzer
diff --git a/compiler-rt/lib/fuzzer/FuzzerDefs.h b/compiler-rt/lib/fuzzer/FuzzerDefs.h
index 1a2752af2f4d..db1f74a545e3 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDefs.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDefs.h
@@ -38,28 +38,8 @@ struct ExternalFunctions;
// Global interface to functions that may or may not be available.
extern ExternalFunctions *EF;
-// We are using a custom allocator to give a different symbol name to STL
-// containers in order to avoid ODR violations.
-template<typename T>
- class fuzzer_allocator: public std::allocator<T> {
- public:
- fuzzer_allocator() = default;
-
- template<class U>
- fuzzer_allocator(const fuzzer_allocator<U>&) {}
-
- template<class Other>
- struct rebind { typedef fuzzer_allocator<Other> other; };
- };
-
-template<typename T>
-using Vector = std::vector<T, fuzzer_allocator<T>>;
-
-template<typename T>
-using Set = std::set<T, std::less<T>, fuzzer_allocator<T>>;
-
-typedef Vector<uint8_t> Unit;
-typedef Vector<Unit> UnitVector;
+typedef std::vector<uint8_t> Unit;
+typedef std::vector<Unit> UnitVector;
typedef int (*UserCallback)(const uint8_t *Data, size_t Size);
int FuzzerDriver(int *argc, char ***argv, UserCallback Callback);
diff --git a/compiler-rt/lib/fuzzer/FuzzerDictionary.h b/compiler-rt/lib/fuzzer/FuzzerDictionary.h
index db55907d9363..48f063c7ee4e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDictionary.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDictionary.h
@@ -52,10 +52,13 @@ class DictionaryEntry {
public:
DictionaryEntry() {}
DictionaryEntry(Word W) : W(W) {}
- DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {}
+ DictionaryEntry(Word W, size_t PositionHint)
+ : W(W), PositionHint(PositionHint) {}
const Word &GetW() const { return W; }
- bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); }
+ bool HasPositionHint() const {
+ return PositionHint != std::numeric_limits<size_t>::max();
+ }
size_t GetPositionHint() const {
assert(HasPositionHint());
return PositionHint;
@@ -108,12 +111,12 @@ private:
};
// Parses one dictionary entry.
-// If successful, write the enty to Unit and returns true,
+// If successful, writes the entry to Unit and returns true,
// otherwise returns false.
bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
// Parses the dictionary file, fills Units, returns true iff all lines
// were parsed successfully.
-bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units);
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
} // namespace fuzzer
diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index ceaa9070512f..6b007f2ad45c 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -86,7 +86,7 @@ static const FlagDescription FlagDescriptions [] {
static const size_t kNumFlags =
sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]);
-static Vector<std::string> *Inputs;
+static std::vector<std::string> *Inputs;
static std::string *ProgName;
static void PrintHelp() {
@@ -187,7 +187,7 @@ static bool ParseOneFlag(const char *Param) {
}
// We don't use any library to minimize dependencies.
-static void ParseFlags(const Vector<std::string> &Args,
+static void ParseFlags(const std::vector<std::string> &Args,
const ExternalFunctions *EF) {
for (size_t F = 0; F < kNumFlags; F++) {
if (FlagDescriptions[F].IntFlag)
@@ -206,7 +206,7 @@ static void ParseFlags(const Vector<std::string> &Args,
"Disabling -len_control by default.\n", EF->LLVMFuzzerCustomMutator);
}
- Inputs = new Vector<std::string>;
+ Inputs = new std::vector<std::string>;
for (size_t A = 1; A < Args.size(); A++) {
if (ParseOneFlag(Args[A].c_str())) {
if (Flags.ignore_remaining_args)
@@ -272,7 +272,7 @@ static void ValidateDirectoryExists(const std::string &Path,
exit(1);
}
-std::string CloneArgsWithoutX(const Vector<std::string> &Args,
+std::string CloneArgsWithoutX(const std::vector<std::string> &Args,
const char *X1, const char *X2) {
std::string Cmd;
for (auto &S : Args) {
@@ -283,18 +283,19 @@ std::string CloneArgsWithoutX(const Vector<std::string> &Args,
return Cmd;
}
-static int RunInMultipleProcesses(const Vector<std::string> &Args,
+static int RunInMultipleProcesses(const std::vector<std::string> &Args,
unsigned NumWorkers, unsigned NumJobs) {
std::atomic<unsigned> Counter(0);
std::atomic<bool> HasErrors(false);
Command Cmd(Args);
Cmd.removeFlag("jobs");
Cmd.removeFlag("workers");
- Vector<std::thread> V;
+ std::vector<std::thread> V;
std::thread Pulse(PulseThread);
Pulse.detach();
for (unsigned i = 0; i < NumWorkers; i++)
- V.push_back(std::thread(WorkerThread, std::ref(Cmd), &Counter, NumJobs, &HasErrors));
+ V.push_back(std::thread(WorkerThread, std::ref(Cmd), &Counter, NumJobs,
+ &HasErrors));
for (auto &T : V)
T.join();
return HasErrors ? 1 : 0;
@@ -348,8 +349,8 @@ static std::string GetDedupTokenFromCmdOutput(const std::string &S) {
return S.substr(Beg, End - Beg);
}
-int CleanseCrashInput(const Vector<std::string> &Args,
- const FuzzingOptions &Options) {
+int CleanseCrashInput(const std::vector<std::string> &Args,
+ const FuzzingOptions &Options) {
if (Inputs->size() != 1 || !Flags.exact_artifact_path) {
Printf("ERROR: -cleanse_crash should be given one input file and"
" -exact_artifact_path\n");
@@ -372,7 +373,7 @@ int CleanseCrashInput(const Vector<std::string> &Args,
auto U = FileToVector(CurrentFilePath);
size_t Size = U.size();
- const Vector<uint8_t> ReplacementBytes = {' ', 0xff};
+ const std::vector<uint8_t> ReplacementBytes = {' ', 0xff};
for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) {
bool Changed = false;
for (size_t Idx = 0; Idx < Size; Idx++) {
@@ -403,7 +404,7 @@ int CleanseCrashInput(const Vector<std::string> &Args,
return 0;
}
-int MinimizeCrashInput(const Vector<std::string> &Args,
+int MinimizeCrashInput(const std::vector<std::string> &Args,
const FuzzingOptions &Options) {
if (Inputs->size() != 1) {
Printf("ERROR: -minimize_crash should be given one input file\n");
@@ -503,14 +504,15 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) {
return 0;
}
-void Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args,
- const Vector<std::string> &Corpora, const char *CFPathOrNull) {
+void Merge(Fuzzer *F, FuzzingOptions &Options,
+ const std::vector<std::string> &Args,
+ const std::vector<std::string> &Corpora, const char *CFPathOrNull) {
if (Corpora.size() < 2) {
Printf("INFO: Merge requires two or more corpus dirs\n");
exit(0);
}
- Vector<SizedFile> OldCorpus, NewCorpus;
+ std::vector<SizedFile> OldCorpus, NewCorpus;
GetSizedFilesFromDir(Corpora[0], &OldCorpus);
for (size_t i = 1; i < Corpora.size(); i++)
GetSizedFilesFromDir(Corpora[i], &NewCorpus);
@@ -518,10 +520,10 @@ void Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args,
std::sort(NewCorpus.begin(), NewCorpus.end());
std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath("Merge", ".txt");
- Vector<std::string> NewFiles;
- Set<uint32_t> NewFeatures, NewCov;
+ std::vector<std::string> NewFiles;
+ std::set<uint32_t> NewFeatures, NewCov;
CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures,
- {}, &NewCov, CFPath, true);
+ {}, &NewCov, CFPath, true, Flags.set_cover_merge);
for (auto &Path : NewFiles)
F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen));
// We are done, delete the control file if it was a temporary one.
@@ -531,17 +533,17 @@ void Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args,
exit(0);
}
-int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict,
- UnitVector& Corpus) {
+int AnalyzeDictionary(Fuzzer *F, const std::vector<Unit> &Dict,
+ UnitVector &Corpus) {
Printf("Started dictionary minimization (up to %d tests)\n",
Dict.size() * Corpus.size() * 2);
// Scores and usage count for each dictionary unit.
- Vector<int> Scores(Dict.size());
- Vector<int> Usages(Dict.size());
+ std::vector<int> Scores(Dict.size());
+ std::vector<int> Usages(Dict.size());
- Vector<size_t> InitialFeatures;
- Vector<size_t> ModifiedFeatures;
+ std::vector<size_t> InitialFeatures;
+ std::vector<size_t> ModifiedFeatures;
for (auto &C : Corpus) {
// Get coverage for the testcase without modifications.
F->ExecuteCallback(C.data(), C.size());
@@ -551,7 +553,7 @@ int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict,
});
for (size_t i = 0; i < Dict.size(); ++i) {
- Vector<uint8_t> Data = C;
+ std::vector<uint8_t> Data = C;
auto StartPos = std::search(Data.begin(), Data.end(),
Dict[i].begin(), Dict[i].end());
// Skip dictionary unit, if the testcase does not contain it.
@@ -597,9 +599,9 @@ int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict,
return 0;
}
-Vector<std::string> ParseSeedInuts(const char *seed_inputs) {
+std::vector<std::string> ParseSeedInuts(const char *seed_inputs) {
// Parse -seed_inputs=file1,file2,... or -seed_inputs=@seed_inputs_file
- Vector<std::string> Files;
+ std::vector<std::string> Files;
if (!seed_inputs) return Files;
std::string SeedInputs;
if (Flags.seed_inputs[0] == '@')
@@ -620,9 +622,10 @@ Vector<std::string> ParseSeedInuts(const char *seed_inputs) {
return Files;
}
-static Vector<SizedFile> ReadCorpora(const Vector<std::string> &CorpusDirs,
- const Vector<std::string> &ExtraSeedFiles) {
- Vector<SizedFile> SizedFiles;
+static std::vector<SizedFile>
+ReadCorpora(const std::vector<std::string> &CorpusDirs,
+ const std::vector<std::string> &ExtraSeedFiles) {
+ std::vector<SizedFile> SizedFiles;
size_t LastNumFiles = 0;
for (auto &Dir : CorpusDirs) {
GetSizedFilesFromDir(Dir, &SizedFiles);
@@ -645,7 +648,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
EF->LLVMFuzzerInitialize(argc, argv);
if (EF->__msan_scoped_disable_interceptor_checks)
EF->__msan_scoped_disable_interceptor_checks();
- const Vector<std::string> Args(*argv, *argv + *argc);
+ const std::vector<std::string> Args(*argv, *argv + *argc);
assert(!Args.empty());
ProgName = new std::string(Args[0]);
if (Argv0 != *ProgName) {
@@ -734,7 +737,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
ValidateDirectoryExists(DirName(Options.ExactArtifactPath),
Flags.create_missing_dirs);
}
- Vector<Unit> Dictionary;
+ std::vector<Unit> Dictionary;
if (Flags.dict)
if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary))
return 1;
@@ -794,7 +797,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
if (Flags.verbosity)
Printf("INFO: Seed: %u\n", Seed);
- if (Flags.collect_data_flow && !Flags.fork && !Flags.merge) {
+ if (Flags.collect_data_flow && !Flags.fork &&
+ !(Flags.merge || Flags.set_cover_merge)) {
if (RunIndividualFiles)
return CollectDataFlow(Flags.collect_data_flow, Flags.data_flow_trace,
ReadCorpora({}, *Inputs));
@@ -866,10 +870,11 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
exit(0);
}
+ Options.ForkCorpusGroups = Flags.fork_corpus_groups;
if (Flags.fork)
FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork);
- if (Flags.merge)
+ if (Flags.merge || Flags.set_cover_merge)
Merge(F, Options, Args, *Inputs, Flags.merge_control_file);
if (Flags.merge_inner) {
@@ -877,7 +882,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
if (Options.MaxLen == 0)
F->SetMaxInputLen(kDefaultMaxMergeLen);
assert(Flags.merge_control_file);
- F->CrashResistantMergeInternalStep(Flags.merge_control_file);
+ F->CrashResistantMergeInternalStep(Flags.merge_control_file,
+ !strncmp(Flags.merge_inner, "2", 1));
exit(0);
}
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp b/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
index 04f569a1a879..54ecbf7c62f1 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
@@ -31,12 +31,4 @@ void ClearExtraCounters() { // hand-written memset, don't asan-ify.
} // namespace fuzzer
-#else
-// TODO: implement for other platforms.
-namespace fuzzer {
-uint8_t *ExtraCountersBegin() { return nullptr; }
-uint8_t *ExtraCountersEnd() { return nullptr; }
-void ClearExtraCounters() {}
-} // namespace fuzzer
-
#endif
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtraCountersDarwin.cpp b/compiler-rt/lib/fuzzer/FuzzerExtraCountersDarwin.cpp
new file mode 100644
index 000000000000..2321ba8a3d40
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerExtraCountersDarwin.cpp
@@ -0,0 +1,22 @@
+//===- FuzzerExtraCountersDarwin.cpp - Extra coverage counters for Darwin -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Extra coverage counters defined by user code for Darwin.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerPlatform.h"
+#include <cstdint>
+
+#if LIBFUZZER_APPLE
+
+namespace fuzzer {
+uint8_t *ExtraCountersBegin() { return nullptr; }
+uint8_t *ExtraCountersEnd() { return nullptr; }
+void ClearExtraCounters() {}
+} // namespace fuzzer
+
+#endif
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtraCountersWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerExtraCountersWindows.cpp
new file mode 100644
index 000000000000..102f5febdaec
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerExtraCountersWindows.cpp
@@ -0,0 +1,80 @@
+//===- FuzzerExtraCountersWindows.cpp - Extra coverage counters for Win32 -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Extra coverage counters defined by user code for Windows.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerPlatform.h"
+#include <cstdint>
+
+#if LIBFUZZER_WINDOWS
+#include <windows.h>
+
+namespace fuzzer {
+
+//
+// The __start___libfuzzer_extra_counters variable is align 16, size 16 to
+// ensure the padding between it and the next variable in this section (either
+// __libfuzzer_extra_counters or __stop___libfuzzer_extra_counters) will be
+// located at (__start___libfuzzer_extra_counters +
+// sizeof(__start___libfuzzer_extra_counters)). Otherwise, the calculation of
+// (stop - (start + sizeof(start))) might be skewed.
+//
+// The section name, __libfuzzer_extra_countaaa ends with "aaa", so it sorts
+// before __libfuzzer_extra_counters alphabetically. We want the start symbol to
+// be placed in the section just before the user supplied counters (if present).
+//
+#pragma section(".data$__libfuzzer_extra_countaaa")
+ATTRIBUTE_ALIGNED(16)
+__declspec(allocate(".data$__libfuzzer_extra_countaaa")) uint8_t
+ __start___libfuzzer_extra_counters[16] = {0};
+
+//
+// Example of what the user-supplied counters should look like. First, the
+// pragma to create the section name. It will fall alphabetically between
+// ".data$__libfuzzer_extra_countaaa" and ".data$__libfuzzer_extra_countzzz".
+// Next, the declspec to allocate the variable inside the specified section.
+// Finally, some array, struct, whatever that is used to track the counter data.
+// The size of this variable is computed at runtime by finding the difference of
+// __stop___libfuzzer_extra_counters and __start___libfuzzer_extra_counters +
+// sizeof(__start___libfuzzer_extra_counters).
+//
+
+//
+// #pragma section(".data$__libfuzzer_extra_counters")
+// __declspec(allocate(".data$__libfuzzer_extra_counters"))
+// uint8_t any_name_variable[64 * 1024];
+//
+
+//
+// Here, the section name, __libfuzzer_extra_countzzz ends with "zzz", so it
+// sorts after __libfuzzer_extra_counters alphabetically. We want the stop
+// symbol to be placed in the section just after the user supplied counters (if
+// present). Align to 1 so there isn't any padding placed between this and the
+// previous variable.
+//
+#pragma section(".data$__libfuzzer_extra_countzzz")
+ATTRIBUTE_ALIGNED(1)
+__declspec(allocate(".data$__libfuzzer_extra_countzzz")) uint8_t
+ __stop___libfuzzer_extra_counters = 0;
+
+uint8_t *ExtraCountersBegin() {
+ return __start___libfuzzer_extra_counters +
+ sizeof(__start___libfuzzer_extra_counters);
+}
+
+uint8_t *ExtraCountersEnd() { return &__stop___libfuzzer_extra_counters; }
+
+ATTRIBUTE_NO_SANITIZE_ALL
+void ClearExtraCounters() {
+ uint8_t *Beg = ExtraCountersBegin();
+ SecureZeroMemory(Beg, ExtraCountersEnd() - Beg);
+}
+
+} // namespace fuzzer
+
+#endif
diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def
index ab31da0ae5d6..11815349b014 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFlags.def
+++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def
@@ -58,12 +58,21 @@ FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
FUZZER_FLAG_INT(help, 0, "Print help.")
FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens "
"in a subprocess")
+FUZZER_FLAG_INT(fork_corpus_groups, 0, "For fork mode, enable the corpus-group "
+ "strategy, The main corpus will be grouped according to size, "
+ "and each sub-process will randomly select seeds from different "
+ "groups as the sub-corpus.")
FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode")
FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode")
FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode")
FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
"merged into the 1-st corpus. Only interesting units will be taken. "
"This flag can be used to minimize a corpus.")
+FUZZER_FLAG_INT(set_cover_merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
+ "merged into the 1-st corpus. Same as the 'merge' flag, but uses the "
+ "standard greedy algorithm for the set cover problem to "
+ "compute an approximation of the minimum set of testcases that "
+ "provide the same coverage as the initial corpora")
FUZZER_FLAG_STRING(stop_file, "Stop fuzzing ASAP if this file exists")
FUZZER_FLAG_STRING(merge_inner, "internal flag")
FUZZER_FLAG_STRING(merge_control_file,
diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
index 5134a5d979e6..d59d51384201 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
@@ -86,18 +86,21 @@ struct FuzzJob {
};
struct GlobalEnv {
- Vector<std::string> Args;
- Vector<std::string> CorpusDirs;
+ std::vector<std::string> Args;
+ std::vector<std::string> CorpusDirs;
std::string MainCorpusDir;
std::string TempDir;
std::string DFTDir;
std::string DataFlowBinary;
- Set<uint32_t> Features, Cov;
- Set<std::string> FilesWithDFT;
- Vector<std::string> Files;
+ std::set<uint32_t> Features, Cov;
+ std::set<std::string> FilesWithDFT;
+ std::vector<std::string> Files;
+ std::vector<std::size_t> FilesSizes;
Random *Rand;
std::chrono::system_clock::time_point ProcessStartTime;
int Verbosity = 0;
+ int Group = 0;
+ int NumCorpuses = 8;
size_t NumTimeouts = 0;
size_t NumOOMs = 0;
@@ -136,10 +139,24 @@ struct GlobalEnv {
if (size_t CorpusSubsetSize =
std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
auto Time1 = std::chrono::system_clock::now();
- for (size_t i = 0; i < CorpusSubsetSize; i++) {
- auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
- Seeds += (Seeds.empty() ? "" : ",") + SF;
- CollectDFT(SF);
+ if (Group) { // whether to group the corpus.
+ size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
+ size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
+ for (size_t i = 0; i < CorpusSubsetSize; i++) {
+ size_t RandNum = (*Rand)(AverageCorpusSize);
+ size_t Index = RandNum + StartIndex;
+ Index = Index < Files.size() ? Index
+ : Rand->SkewTowardsLast(Files.size());
+ auto &SF = Files[Index];
+ Seeds += (Seeds.empty() ? "" : ",") + SF;
+ CollectDFT(SF);
+ }
+ } else {
+ for (size_t i = 0; i < CorpusSubsetSize; i++) {
+ auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
+ Seeds += (Seeds.empty() ? "" : ",") + SF;
+ CollectDFT(SF);
+ }
}
auto Time2 = std::chrono::system_clock::now();
auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
@@ -183,7 +200,7 @@ struct GlobalEnv {
auto Stats = ParseFinalStatsFromLog(Job->LogPath);
NumRuns += Stats.number_of_executed_units;
- Vector<SizedFile> TempFiles, MergeCandidates;
+ std::vector<SizedFile> TempFiles, MergeCandidates;
// Read all newly created inputs and their feature sets.
// Choose only those inputs that have new features.
GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
@@ -193,7 +210,7 @@ struct GlobalEnv {
FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
auto FeatureBytes = FileToVector(FeatureFile, 0, false);
assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
- Vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
+ std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
for (auto Ft : NewFeatures) {
if (!Features.count(Ft)) {
@@ -211,15 +228,27 @@ struct GlobalEnv {
if (MergeCandidates.empty()) return;
- Vector<std::string> FilesToAdd;
- Set<uint32_t> NewFeatures, NewCov;
+ std::vector<std::string> FilesToAdd;
+ std::set<uint32_t> NewFeatures, NewCov;
+ bool IsSetCoverMerge =
+ !Job->Cmd.getFlagValue("set_cover_merge").compare("1");
CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
- &NewFeatures, Cov, &NewCov, Job->CFPath, false);
+ &NewFeatures, Cov, &NewCov, Job->CFPath, false,
+ IsSetCoverMerge);
for (auto &Path : FilesToAdd) {
auto U = FileToVector(Path);
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
WriteToFile(U, NewPath);
- Files.push_back(NewPath);
+ if (Group) { // Insert the queue according to the size of the seed.
+ size_t UnitSize = U.size();
+ auto Idx =
+ std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
+ FilesSizes.begin();
+ FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
+ Files.insert(Files.begin() + Idx, NewPath);
+ } else {
+ Files.push_back(NewPath);
+ }
}
Features.insert(NewFeatures.begin(), NewFeatures.end());
Cov.insert(NewCov.begin(), NewCov.end());
@@ -228,10 +257,8 @@ struct GlobalEnv {
if (TPC.PcIsFuncEntry(TE))
PrintPC(" NEW_FUNC: %p %F %L\n", "",
TPC.GetNextInstructionPc(TE->PC));
-
}
-
void CollectDFT(const std::string &InputPath) {
if (DataFlowBinary.empty()) return;
if (!FilesWithDFT.insert(InputPath).second) return;
@@ -283,8 +310,8 @@ void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
// This is just a skeleton of an experimental -fork=1 feature.
void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
- const Vector<std::string> &Args,
- const Vector<std::string> &CorpusDirs, int NumJobs) {
+ const std::vector<std::string> &Args,
+ const std::vector<std::string> &CorpusDirs, int NumJobs) {
Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
GlobalEnv Env;
@@ -294,8 +321,9 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
Env.Verbosity = Options.Verbosity;
Env.ProcessStartTime = std::chrono::system_clock::now();
Env.DataFlowBinary = Options.CollectDataFlow;
+ Env.Group = Options.ForkCorpusGroups;
- Vector<SizedFile> SeedFiles;
+ std::vector<SizedFile> SeedFiles;
for (auto &Dir : CorpusDirs)
GetSizedFilesFromDir(Dir, &SeedFiles);
std::sort(SeedFiles.begin(), SeedFiles.end());
@@ -316,13 +344,20 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
Env.Files.push_back(File.File);
} else {
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
- Set<uint32_t> NewFeatures, NewCov;
+ std::set<uint32_t> NewFeatures, NewCov;
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
- &NewFeatures, Env.Cov, &NewCov, CFPath, false);
+ &NewFeatures, Env.Cov, &NewCov, CFPath,
+ /*Verbose=*/false, /*IsSetCoverMerge=*/false);
Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
RemoveFile(CFPath);
}
+
+ if (Env.Group) {
+ for (auto &path : Env.Files)
+ Env.FilesSizes.push_back(FileSize(path));
+ }
+
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
Env.Files.size(), Env.TempDir.c_str());
@@ -337,8 +372,10 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
WriteToFile(Unit({1}), Env.StopFile());
};
+ size_t MergeCycle = 20;
+ size_t JobExecuted = 0;
size_t JobId = 1;
- Vector<std::thread> Threads;
+ std::vector<std::thread> Threads;
for (int t = 0; t < NumJobs; t++) {
Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
FuzzQ.Push(Env.CreateNewJob(JobId++));
@@ -358,7 +395,46 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
Env.RunOneMergeJob(Job.get());
- // Continue if our crash is one of the ignorred ones.
+ // merge the corpus .
+ JobExecuted++;
+ if (Env.Group && JobExecuted >= MergeCycle) {
+ std::vector<SizedFile> CurrentSeedFiles;
+ for (auto &Dir : CorpusDirs)
+ GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
+ std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
+
+ auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
+ std::set<uint32_t> TmpNewFeatures, TmpNewCov;
+ std::set<uint32_t> TmpFeatures, TmpCov;
+ Env.Files.clear();
+ Env.FilesSizes.clear();
+ CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
+ TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
+ CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false);
+ for (auto &path : Env.Files)
+ Env.FilesSizes.push_back(FileSize(path));
+ RemoveFile(CFPath);
+ JobExecuted = 0;
+ MergeCycle += 5;
+ }
+
+ // Since the number of corpus seeds will gradually increase, in order to
+ // control the number in each group to be about three times the number of
+ // seeds selected each time, the number of groups is dynamically adjusted.
+ if (Env.Files.size() < 2000)
+ Env.NumCorpuses = 12;
+ else if (Env.Files.size() < 6000)
+ Env.NumCorpuses = 20;
+ else if (Env.Files.size() < 12000)
+ Env.NumCorpuses = 32;
+ else if (Env.Files.size() < 16000)
+ Env.NumCorpuses = 40;
+ else if (Env.Files.size() < 24000)
+ Env.NumCorpuses = 60;
+ else
+ Env.NumCorpuses = 80;
+
+ // Continue if our crash is one of the ignored ones.
if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
Env.NumTimeouts++;
else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
diff --git a/compiler-rt/lib/fuzzer/FuzzerFork.h b/compiler-rt/lib/fuzzer/FuzzerFork.h
index b29a43e13fbc..fc3e9d636cbc 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFork.h
+++ b/compiler-rt/lib/fuzzer/FuzzerFork.h
@@ -17,8 +17,8 @@
namespace fuzzer {
void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
- const Vector<std::string> &Args,
- const Vector<std::string> &CorpusDirs, int NumJobs);
+ const std::vector<std::string> &Args,
+ const std::vector<std::string> &CorpusDirs, int NumJobs);
} // namespace fuzzer
#endif // LLVM_FUZZER_FORK_H
diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp
index 7f149ac6c485..0a58c5377b34 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp
@@ -23,6 +23,14 @@ namespace fuzzer {
static FILE *OutputFile = stderr;
+FILE *GetOutputFile() {
+ return OutputFile;
+}
+
+void SetOutputFile(FILE *NewOutputFile) {
+ OutputFile = NewOutputFile;
+}
+
long GetEpoch(const std::string &Path) {
struct stat St;
if (stat(Path.c_str(), &St))
@@ -90,11 +98,11 @@ void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path) {
fclose(Out);
}
-void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, long *Epoch,
+void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, long *Epoch,
size_t MaxSize, bool ExitOnError,
- Vector<std::string> *VPaths) {
+ std::vector<std::string> *VPaths) {
long E = Epoch ? *Epoch : 0;
- Vector<std::string> Files;
+ std::vector<std::string> Files;
ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true);
size_t NumLoaded = 0;
for (size_t i = 0; i < Files.size(); i++) {
@@ -112,8 +120,8 @@ void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, long *Epoch,
}
}
-void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V) {
- Vector<std::string> Files;
+void GetSizedFilesFromDir(const std::string &Dir, std::vector<SizedFile> *V) {
+ std::vector<std::string> Files;
ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
for (auto &File : Files)
if (size_t Size = FileSize(File))
diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h
index bde18267ea36..401afa0b4477 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIO.h
+++ b/compiler-rt/lib/fuzzer/FuzzerIO.h
@@ -32,9 +32,9 @@ void WriteToFile(const Unit &U, const std::string &Path);
void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path);
void AppendToFile(const std::string &Data, const std::string &Path);
-void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V, long *Epoch,
+void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, long *Epoch,
size_t MaxSize, bool ExitOnError,
- Vector<std::string> *VPaths = 0);
+ std::vector<std::string> *VPaths = 0);
// Returns "Dir/FileName" or equivalent for the current OS.
std::string DirPlusFile(const std::string &DirPath,
@@ -54,6 +54,10 @@ void DupAndCloseStderr();
void CloseStdout();
+// For testing.
+FILE *GetOutputFile();
+void SetOutputFile(FILE *NewOutputFile);
+
void Printf(const char *Fmt, ...);
void VPrintf(bool Verbose, const char *Fmt, ...);
@@ -66,7 +70,7 @@ bool IsDirectory(const std::string &Path);
size_t FileSize(const std::string &Path);
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
- Vector<std::string> *V, bool TopDir);
+ std::vector<std::string> *V, bool TopDir);
bool MkDirRecursive(const std::string &Dir);
void RmDirRecursive(const std::string &Dir);
@@ -85,7 +89,7 @@ struct SizedFile {
bool operator<(const SizedFile &B) const { return Size < B.Size; }
};
-void GetSizedFilesFromDir(const std::string &Dir, Vector<SizedFile> *V);
+void GetSizedFilesFromDir(const std::string &Dir, std::vector<SizedFile> *V);
char GetSeparator();
bool IsSeparator(char C);
diff --git a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
index 4706a40959be..3700fb098e55 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
@@ -53,7 +53,7 @@ std::string Basename(const std::string &Path) {
}
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
- Vector<std::string> *V, bool TopDir) {
+ std::vector<std::string> *V, bool TopDir) {
auto E = GetEpoch(Dir);
if (Epoch)
if (E && *Epoch >= E) return;
@@ -78,7 +78,6 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
*Epoch = E;
}
-
void IterateDirRecursive(const std::string &Dir,
void (*DirPreCallback)(const std::string &Dir),
void (*DirPostCallback)(const std::string &Dir),
diff --git a/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
index 61ad35e281f5..6771fc173c91 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
@@ -111,7 +111,7 @@ size_t FileSize(const std::string &Path) {
}
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
- Vector<std::string> *V, bool TopDir) {
+ std::vector<std::string> *V, bool TopDir) {
auto E = GetEpoch(Dir);
if (Epoch)
if (E && *Epoch >= E) return;
@@ -159,7 +159,6 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
*Epoch = E;
}
-
void IterateDirRecursive(const std::string &Dir,
void (*DirPreCallback)(const std::string &Dir),
void (*DirPostCallback)(const std::string &Dir),
@@ -297,9 +296,8 @@ static size_t ParseServerAndShare(const std::string &FileName,
return Pos - Offset;
}
-// Parse the given Ref string from the position Offset, to exactly match the given
-// string Patt.
-// Returns number of characters considered if successful.
+// Parse the given Ref string from the position Offset, to exactly match the
+// given string Patt. Returns number of characters considered if successful.
static size_t ParseCustomString(const std::string &Ref, size_t Offset,
const char *Patt) {
size_t Len = strlen(Patt);
diff --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h
index 37c8a01dc3c6..6637b0034e55 100644
--- a/compiler-rt/lib/fuzzer/FuzzerInternal.h
+++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h
@@ -35,8 +35,8 @@ public:
Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
FuzzingOptions Options);
~Fuzzer();
- void Loop(Vector<SizedFile> &CorporaFiles);
- void ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles);
+ void Loop(std::vector<SizedFile> &CorporaFiles);
+ void ReadAndExecuteSeedCorpora(std::vector<SizedFile> &CorporaFiles);
void MinimizeCrashLoop(const Unit &U);
void RereadOutputCorpus(size_t MaxSize);
@@ -72,8 +72,9 @@ public:
void TPCUpdateObservedPCs();
// Merge Corpora[1:] into Corpora[0].
- void Merge(const Vector<std::string> &Corpora);
- void CrashResistantMergeInternalStep(const std::string &ControlFilePath);
+ void Merge(const std::vector<std::string> &Corpora);
+ void CrashResistantMergeInternalStep(const std::string &ControlFilePath,
+ bool IsSetCoverMerge);
MutationDispatcher &GetMD() { return MD; }
void PrintFinalStats();
void SetMaxInputLen(size_t MaxInputLen);
@@ -141,7 +142,7 @@ private:
size_t MaxMutationLen = 0;
size_t TmpMaxMutationLen = 0;
- Vector<uint32_t> UniqFeatureSetTmp;
+ std::vector<uint32_t> UniqFeatureSetTmp;
// Need to know our own thread.
static thread_local bool IsMyThread;
diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index 86a78ab75174..3205942f6d84 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -388,7 +388,7 @@ void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) {
void Fuzzer::CheckExitOnSrcPosOrItem() {
if (!Options.ExitOnSrcPos.empty()) {
- static auto *PCsSet = new Set<uintptr_t>;
+ static auto *PCsSet = new std::set<uintptr_t>;
auto HandlePC = [&](const TracePC::PCTableEntry *TE) {
if (!PCsSet->insert(TE->PC).second)
return;
@@ -413,8 +413,8 @@ void Fuzzer::CheckExitOnSrcPosOrItem() {
void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
if (Options.OutputCorpus.empty() || !Options.ReloadIntervalSec)
return;
- Vector<Unit> AdditionalCorpus;
- Vector<std::string> AdditionalCorpusPaths;
+ std::vector<Unit> AdditionalCorpus;
+ std::vector<std::string> AdditionalCorpusPaths;
ReadDirToVectorOfUnits(
Options.OutputCorpus.c_str(), &AdditionalCorpus,
&EpochOfLastReadOfOutputCorpus, MaxSize,
@@ -457,7 +457,7 @@ void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
static void WriteFeatureSetToFile(const std::string &FeaturesDir,
const std::string &FileName,
- const Vector<uint32_t> &FeatureSet) {
+ const std::vector<uint32_t> &FeatureSet) {
if (FeaturesDir.empty() || FeatureSet.empty()) return;
WriteToFile(reinterpret_cast<const uint8_t *>(FeatureSet.data()),
FeatureSet.size() * sizeof(FeatureSet[0]),
@@ -548,7 +548,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
FoundUniqFeaturesOfII == II->UniqFeatureSet.size() &&
II->U.size() > Size) {
auto OldFeaturesFile = Sha1ToString(II->Sha1);
- Corpus.Replace(II, {Data, Data + Size});
+ Corpus.Replace(II, {Data, Data + Size}, TimeOfUnit);
RenameFeatureSetFile(Options.FeaturesDir, OldFeaturesFile,
Sha1ToString(II->Sha1));
return true;
@@ -784,7 +784,7 @@ void Fuzzer::PurgeAllocator() {
LastAllocatorPurgeAttemptTime = system_clock::now();
}
-void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
+void Fuzzer::ReadAndExecuteSeedCorpora(std::vector<SizedFile> &CorporaFiles) {
const size_t kMaxSaneLen = 1 << 20;
const size_t kMinDefaultLen = 4096;
size_t MaxSize = 0;
@@ -849,7 +849,7 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
}
}
-void Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) {
+void Fuzzer::Loop(std::vector<SizedFile> &CorporaFiles) {
auto FocusFunctionOrAuto = Options.FocusFunction;
DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles,
MD.GetRand());
diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp
index 162453ceae2c..24bd11958e80 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp
@@ -77,8 +77,8 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) {
size_t ExpectedStartMarker = 0;
const size_t kInvalidStartMarker = -1;
size_t LastSeenStartMarker = kInvalidStartMarker;
- Vector<uint32_t> TmpFeatures;
- Set<uint32_t> PCs;
+ std::vector<uint32_t> TmpFeatures;
+ std::set<uint32_t> PCs;
while (std::getline(IS, Line, '\n')) {
std::istringstream ISS1(Line);
std::string Marker;
@@ -132,15 +132,16 @@ size_t Merger::ApproximateMemoryConsumption() const {
// Decides which files need to be merged (add those to NewFiles).
// Returns the number of new features added.
-size_t Merger::Merge(const Set<uint32_t> &InitialFeatures,
- Set<uint32_t> *NewFeatures,
- const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
- Vector<std::string> *NewFiles) {
+size_t Merger::Merge(const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov,
+ std::vector<std::string> *NewFiles) {
NewFiles->clear();
NewFeatures->clear();
NewCov->clear();
assert(NumFilesInFirstCorpus <= Files.size());
- Set<uint32_t> AllFeatures = InitialFeatures;
+ std::set<uint32_t> AllFeatures = InitialFeatures;
// What features are in the initial corpus?
for (size_t i = 0; i < NumFilesInFirstCorpus; i++) {
@@ -150,7 +151,7 @@ size_t Merger::Merge(const Set<uint32_t> &InitialFeatures,
// Remove all features that we already know from all other inputs.
for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) {
auto &Cur = Files[i].Features;
- Vector<uint32_t> Tmp;
+ std::vector<uint32_t> Tmp;
std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(),
AllFeatures.end(), std::inserter(Tmp, Tmp.begin()));
Cur.swap(Tmp);
@@ -188,15 +189,16 @@ size_t Merger::Merge(const Set<uint32_t> &InitialFeatures,
return NewFeatures->size();
}
-Set<uint32_t> Merger::AllFeatures() const {
- Set<uint32_t> S;
+std::set<uint32_t> Merger::AllFeatures() const {
+ std::set<uint32_t> S;
for (auto &File : Files)
S.insert(File.Features.begin(), File.Features.end());
return S;
}
// Inner process. May crash if the target crashes.
-void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
+void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath,
+ bool IsSetCoverMerge) {
Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
Merger M;
std::ifstream IF(CFPath);
@@ -212,11 +214,11 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
M.Files.size() - M.FirstNotProcessedFile);
std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app);
- Set<size_t> AllFeatures;
+ std::set<size_t> AllFeatures;
auto PrintStatsWrapper = [this, &AllFeatures](const char* Where) {
this->PrintStats(Where, "\n", 0, AllFeatures.size());
};
- Set<const TracePC::PCTableEntry *> AllPCs;
+ std::set<const TracePC::PCTableEntry *> AllPCs;
for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) {
Fuzzer::MaybeExitGracefully();
auto U = FileToVector(M.Files[i].Name);
@@ -234,13 +236,14 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
// Collect coverage. We are iterating over the files in this order:
// * First, files in the initial corpus ordered by size, smallest first.
// * Then, all other files, smallest first.
- // So it makes no sense to record all features for all files, instead we
- // only record features that were not seen before.
- Set<size_t> UniqFeatures;
- TPC.CollectFeatures([&](size_t Feature) {
- if (AllFeatures.insert(Feature).second)
- UniqFeatures.insert(Feature);
- });
+ std::set<size_t> Features;
+ if (IsSetCoverMerge)
+ TPC.CollectFeatures([&](size_t Feature) { Features.insert(Feature); });
+ else
+ TPC.CollectFeatures([&](size_t Feature) {
+ if (AllFeatures.insert(Feature).second)
+ Features.insert(Feature);
+ });
TPC.UpdateObservedPCs();
// Show stats.
if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
@@ -249,7 +252,7 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
PrintStatsWrapper("LOADED");
// Write the post-run marker and the coverage.
OF << "FT " << i;
- for (size_t F : UniqFeatures)
+ for (size_t F : Features)
OF << " " << F;
OF << "\n";
OF << "COV " << i;
@@ -263,15 +266,137 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
PrintStatsWrapper("DONE ");
}
-static size_t WriteNewControlFile(const std::string &CFPath,
- const Vector<SizedFile> &OldCorpus,
- const Vector<SizedFile> &NewCorpus,
- const Vector<MergeFileInfo> &KnownFiles) {
+// Merges all corpora into the first corpus. A file is added into
+// the first corpus only if it adds new features. Unlike `Merger::Merge`,
+// this implementation calculates an approximation of the minimum set
+// of corpora files, that cover all known features (set cover problem).
+// Generally, this means that files with more features are preferred for
+// merge into the first corpus. When two files have the same number of
+// features, the smaller one is preferred.
+size_t Merger::SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov,
+ std::vector<std::string> *NewFiles) {
+ assert(NumFilesInFirstCorpus <= Files.size());
+ NewFiles->clear();
+ NewFeatures->clear();
+ NewCov->clear();
+ std::set<uint32_t> AllFeatures;
+ // 1 << 21 - 1 is the maximum feature index.
+ // See 'kFeatureSetSize' in 'FuzzerCorpus.h'.
+ const uint32_t kFeatureSetSize = 1 << 21;
+ std::vector<bool> Covered(kFeatureSetSize, false);
+ size_t NumCovered = 0;
+
+ std::set<uint32_t> ExistingFeatures = InitialFeatures;
+ for (size_t i = 0; i < NumFilesInFirstCorpus; ++i)
+ ExistingFeatures.insert(Files[i].Features.begin(), Files[i].Features.end());
+
+ // Mark the existing features as covered.
+ for (const auto &F : ExistingFeatures) {
+ if (!Covered[F % kFeatureSetSize]) {
+ ++NumCovered;
+ Covered[F % kFeatureSetSize] = true;
+ }
+ // Calculate an underestimation of the set of covered features
+ // since the `Covered` bitvector is smaller than the feature range.
+ AllFeatures.insert(F % kFeatureSetSize);
+ }
+
+ std::set<size_t> RemainingFiles;
+ for (size_t i = NumFilesInFirstCorpus; i < Files.size(); ++i) {
+ // Construct an incremental sequence which represent the
+ // indices to all files (excluding those in the initial corpus).
+ // RemainingFiles = range(NumFilesInFirstCorpus..Files.size()).
+ RemainingFiles.insert(i);
+ // Insert this file's unique features to all features.
+ for (const auto &F : Files[i].Features)
+ AllFeatures.insert(F % kFeatureSetSize);
+ }
+
+ // Integrate files into Covered until set is complete.
+ while (NumCovered != AllFeatures.size()) {
+ // Index to file with largest number of unique features.
+ size_t MaxFeaturesIndex = NumFilesInFirstCorpus;
+ // Indices to remove from RemainingFiles.
+ std::set<size_t> RemoveIndices;
+ // Running max unique feature count.
+ // Updated upon finding a file with more features.
+ size_t MaxNumFeatures = 0;
+
+ // Iterate over all files not yet integrated into Covered,
+ // to find the file which has the largest number of
+ // features that are not already in Covered.
+ for (const auto &i : RemainingFiles) {
+ const auto &File = Files[i];
+ size_t CurrentUnique = 0;
+ // Count number of features in this file
+ // which are not yet in Covered.
+ for (const auto &F : File.Features)
+ if (!Covered[F % kFeatureSetSize])
+ ++CurrentUnique;
+
+ if (CurrentUnique == 0) {
+ // All features in this file are already in Covered: skip next time.
+ RemoveIndices.insert(i);
+ } else if (CurrentUnique > MaxNumFeatures ||
+ (CurrentUnique == MaxNumFeatures &&
+ File.Size < Files[MaxFeaturesIndex].Size)) {
+ // Update the max features file based on unique features
+ // Break ties by selecting smaller files.
+ MaxNumFeatures = CurrentUnique;
+ MaxFeaturesIndex = i;
+ }
+ }
+ // Must be a valid index/
+ assert(MaxFeaturesIndex < Files.size());
+ // Remove any feature-less files found.
+ for (const auto &i : RemoveIndices)
+ RemainingFiles.erase(i);
+ if (MaxNumFeatures == 0) {
+ // Did not find a file that adds unique features.
+ // This means that we should have no remaining files.
+ assert(RemainingFiles.size() == 0);
+ assert(NumCovered == AllFeatures.size());
+ break;
+ }
+
+ // MaxFeaturesIndex must be an element of Remaining.
+ assert(RemainingFiles.find(MaxFeaturesIndex) != RemainingFiles.end());
+ // Remove the file with the most features from Remaining.
+ RemainingFiles.erase(MaxFeaturesIndex);
+ const auto &MaxFeatureFile = Files[MaxFeaturesIndex];
+ // Add the features of the max feature file to Covered.
+ for (const auto &F : MaxFeatureFile.Features) {
+ if (!Covered[F % kFeatureSetSize]) {
+ ++NumCovered;
+ Covered[F % kFeatureSetSize] = true;
+ NewFeatures->insert(F);
+ }
+ }
+ // Add the index to this file to the result.
+ NewFiles->push_back(MaxFeatureFile.Name);
+ // Update NewCov with the additional coverage
+ // that MaxFeatureFile provides.
+ for (const auto &C : MaxFeatureFile.Cov)
+ if (InitialCov.find(C) == InitialCov.end())
+ NewCov->insert(C);
+ }
+
+ return NewFeatures->size();
+}
+
+static size_t
+WriteNewControlFile(const std::string &CFPath,
+ const std::vector<SizedFile> &OldCorpus,
+ const std::vector<SizedFile> &NewCorpus,
+ const std::vector<MergeFileInfo> &KnownFiles) {
std::unordered_set<std::string> FilesToSkip;
for (auto &SF: KnownFiles)
FilesToSkip.insert(SF.Name);
- Vector<std::string> FilesToUse;
+ std::vector<std::string> FilesToUse;
auto MaybeUseFile = [=, &FilesToUse](std::string Name) {
if (FilesToSkip.find(Name) == FilesToSkip.end())
FilesToUse.push_back(Name);
@@ -299,19 +424,19 @@ static size_t WriteNewControlFile(const std::string &CFPath,
}
// Outer process. Does not call the target code and thus should not fail.
-void CrashResistantMerge(const Vector<std::string> &Args,
- const Vector<SizedFile> &OldCorpus,
- const Vector<SizedFile> &NewCorpus,
- Vector<std::string> *NewFiles,
- const Set<uint32_t> &InitialFeatures,
- Set<uint32_t> *NewFeatures,
- const Set<uint32_t> &InitialCov,
- Set<uint32_t> *NewCov,
- const std::string &CFPath,
- bool V /*Verbose*/) {
+void CrashResistantMerge(const std::vector<std::string> &Args,
+ const std::vector<SizedFile> &OldCorpus,
+ const std::vector<SizedFile> &NewCorpus,
+ std::vector<std::string> *NewFiles,
+ const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov, const std::string &CFPath,
+ bool V, /*Verbose*/
+ bool IsSetCoverMerge) {
if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge.
size_t NumAttempts = 0;
- Vector<MergeFileInfo> KnownFiles;
+ std::vector<MergeFileInfo> KnownFiles;
if (FileSize(CFPath)) {
VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n",
CFPath.c_str());
@@ -363,6 +488,7 @@ void CrashResistantMerge(const Vector<std::string> &Args,
// Every inner process should execute at least one input.
Command BaseCmd(Args);
BaseCmd.removeFlag("merge");
+ BaseCmd.removeFlag("set_cover_merge");
BaseCmd.removeFlag("fork");
BaseCmd.removeFlag("collect_data_flow");
for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) {
@@ -370,14 +496,16 @@ void CrashResistantMerge(const Vector<std::string> &Args,
VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt);
Command Cmd(BaseCmd);
Cmd.addFlag("merge_control_file", CFPath);
- Cmd.addFlag("merge_inner", "1");
+ // If we are going to use the set cover implementation for
+ // minimization add the merge_inner=2 internal flag.
+ Cmd.addFlag("merge_inner", IsSetCoverMerge ? "2" : "1");
if (!V) {
Cmd.setOutputFile(getDevNull());
Cmd.combineOutAndErr();
}
auto ExitCode = ExecuteCommand(Cmd);
if (!ExitCode) {
- VPrintf(V, "MERGE-OUTER: succesfull in %zd attempt(s)\n", Attempt);
+ VPrintf(V, "MERGE-OUTER: successful in %zd attempt(s)\n", Attempt);
break;
}
}
@@ -395,7 +523,10 @@ void CrashResistantMerge(const Vector<std::string> &Args,
M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb());
M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end());
- M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
+ if (IsSetCoverMerge)
+ M.SetCoverMerge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
+ else
+ M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; "
"%zd new coverage edges\n",
NewFiles->size(), NewFeatures->size(), NewCov->size());
diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.h b/compiler-rt/lib/fuzzer/FuzzerMerge.h
index e0c6bc539bdb..42f798e1da18 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMerge.h
+++ b/compiler-rt/lib/fuzzer/FuzzerMerge.h
@@ -41,6 +41,7 @@
#define LLVM_FUZZER_MERGE_H
#include "FuzzerDefs.h"
+#include "FuzzerIO.h"
#include <istream>
#include <ostream>
@@ -52,11 +53,11 @@ namespace fuzzer {
struct MergeFileInfo {
std::string Name;
size_t Size = 0;
- Vector<uint32_t> Features, Cov;
+ std::vector<uint32_t> Features, Cov;
};
struct Merger {
- Vector<MergeFileInfo> Files;
+ std::vector<MergeFileInfo> Files;
size_t NumFilesInFirstCorpus = 0;
size_t FirstNotProcessedFile = 0;
std::string LastFailure;
@@ -64,23 +65,28 @@ struct Merger {
bool Parse(std::istream &IS, bool ParseCoverage);
bool Parse(const std::string &Str, bool ParseCoverage);
void ParseOrExit(std::istream &IS, bool ParseCoverage);
- size_t Merge(const Set<uint32_t> &InitialFeatures, Set<uint32_t> *NewFeatures,
- const Set<uint32_t> &InitialCov, Set<uint32_t> *NewCov,
- Vector<std::string> *NewFiles);
+ size_t Merge(const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
+ std::vector<std::string> *NewFiles);
+ size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov,
+ std::vector<std::string> *NewFiles);
size_t ApproximateMemoryConsumption() const;
- Set<uint32_t> AllFeatures() const;
+ std::set<uint32_t> AllFeatures() const;
};
-void CrashResistantMerge(const Vector<std::string> &Args,
- const Vector<SizedFile> &OldCorpus,
- const Vector<SizedFile> &NewCorpus,
- Vector<std::string> *NewFiles,
- const Set<uint32_t> &InitialFeatures,
- Set<uint32_t> *NewFeatures,
- const Set<uint32_t> &InitialCov,
- Set<uint32_t> *NewCov,
- const std::string &CFPath,
- bool Verbose);
+void CrashResistantMerge(const std::vector<std::string> &Args,
+ const std::vector<SizedFile> &OldCorpus,
+ const std::vector<SizedFile> &NewCorpus,
+ std::vector<std::string> *NewFiles,
+ const std::set<uint32_t> &InitialFeatures,
+ std::set<uint32_t> *NewFeatures,
+ const std::set<uint32_t> &InitialCov,
+ std::set<uint32_t> *NewCov, const std::string &CFPath,
+ bool Verbose, bool IsSetCoverMerge);
} // namespace fuzzer
diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp
index 4650f1beceac..d663900fdc3a 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp
@@ -485,7 +485,7 @@ void MutationDispatcher::RecordSuccessfulMutationSequence() {
}
void MutationDispatcher::PrintRecommendedDictionary() {
- Vector<DictionaryEntry> V;
+ std::vector<DictionaryEntry> V;
for (auto &DE : PersistentAutoDictionary)
if (!ManualDictionary.ContainsWord(DE.GetW()))
V.push_back(DE);
@@ -540,7 +540,7 @@ size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size,
// Mutates Data in place, returns new size.
size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size,
size_t MaxSize,
- Vector<Mutator> &Mutators) {
+ std::vector<Mutator> &Mutators) {
assert(MaxSize > 0);
// Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize),
// in which case they will return 0.
@@ -562,7 +562,7 @@ size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size,
// Mask represents the set of Data bytes that are worth mutating.
size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size,
size_t MaxSize,
- const Vector<uint8_t> &Mask) {
+ const std::vector<uint8_t> &Mask) {
size_t MaskedSize = std::min(Size, Mask.size());
// * Copy the worthy bytes into a temporary array T
// * Mutate T
diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h
index fd37191156d3..97704e2160aa 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMutate.h
+++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h
@@ -77,7 +77,7 @@ public:
/// that have '1' in Mask.
/// Mask.size() should be >= Size.
size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize,
- const Vector<uint8_t> &Mask);
+ const std::vector<uint8_t> &Mask);
/// Applies one of the default mutations. Provided as a service
/// to mutation authors.
@@ -104,7 +104,7 @@ public:
size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size,
size_t MaxSize);
size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize,
- Vector<Mutator> &Mutators);
+ std::vector<Mutator> &Mutators);
size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To,
size_t ToSize, size_t MaxToSize);
@@ -133,22 +133,22 @@ public:
// entries that led to successful discoveries in the past mutations.
Dictionary PersistentAutoDictionary;
- Vector<DictionaryEntry *> CurrentDictionaryEntrySequence;
+ std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence;
static const size_t kCmpDictionaryEntriesDequeSize = 16;
DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize];
size_t CmpDictionaryEntriesDequeIdx = 0;
const Unit *CrossOverWith = nullptr;
- Vector<uint8_t> MutateInPlaceHere;
- Vector<uint8_t> MutateWithMaskTemp;
+ std::vector<uint8_t> MutateInPlaceHere;
+ std::vector<uint8_t> MutateWithMaskTemp;
// CustomCrossOver needs its own buffer as a custom implementation may call
// LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere.
- Vector<uint8_t> CustomCrossOverInPlaceHere;
+ std::vector<uint8_t> CustomCrossOverInPlaceHere;
- Vector<Mutator> Mutators;
- Vector<Mutator> DefaultMutators;
- Vector<Mutator> CurrentMutatorSequence;
+ std::vector<Mutator> Mutators;
+ std::vector<Mutator> DefaultMutators;
+ std::vector<Mutator> CurrentMutatorSequence;
};
} // namespace fuzzer
diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h
index d0c285a6821d..72e256106194 100644
--- a/compiler-rt/lib/fuzzer/FuzzerOptions.h
+++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h
@@ -47,6 +47,7 @@ struct FuzzingOptions {
int ReportSlowUnits = 10;
bool OnlyASCII = false;
bool Entropic = true;
+ bool ForkCorpusGroups = false;
size_t EntropicFeatureFrequencyThreshold = 0xFF;
size_t EntropicNumberOfRarestFeatures = 100;
bool EntropicScalePerExecTime = false;
diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
index d808b9b00fa3..af8d1ce50f3f 100644
--- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
@@ -157,7 +157,7 @@ ALWAYS_INLINE uintptr_t TracePC::GetNextInstructionPc(uintptr_t PC) {
}
void TracePC::UpdateObservedPCs() {
- Vector<uintptr_t> CoveredFuncs;
+ std::vector<uintptr_t> CoveredFuncs;
auto ObservePC = [&](const PCTableEntry *TE) {
if (ObservedPCs.insert(TE).second && DoPrintNewPCs) {
PrintPC("\tNEW_PC: %p %F %L", "\tNEW_PC: %p",
@@ -300,8 +300,8 @@ void TracePC::PrintCoverage(bool PrintAllCounters) {
FunctionStr = FunctionStr.substr(3);
std::string LineStr = DescribePC("%l", VisualizePC);
size_t NumEdges = Last - First;
- Vector<uintptr_t> UncoveredPCs;
- Vector<uintptr_t> CoveredPCs;
+ std::vector<uintptr_t> UncoveredPCs;
+ std::vector<uintptr_t> CoveredPCs;
for (auto TE = First; TE < Last; TE++)
if (!ObservedPCs.count(TE))
UncoveredPCs.push_back(TE->PC);
@@ -391,6 +391,7 @@ void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) {
ValueProfileMap.AddValue(PC * 128 + 64 + AbsoluteDistance);
}
+ATTRIBUTE_NO_SANITIZE_MEMORY
static size_t InternalStrnlen(const char *S, size_t MaxLen) {
size_t Len = 0;
for (; Len < MaxLen && S[Len]; Len++) {}
@@ -398,7 +399,8 @@ static size_t InternalStrnlen(const char *S, size_t MaxLen) {
}
// Finds min of (strlen(S1), strlen(S2)).
-// Needed bacause one of these strings may actually be non-zero terminated.
+// Needed because one of these strings may actually be non-zero terminated.
+ATTRIBUTE_NO_SANITIZE_MEMORY
static size_t InternalStrnlen2(const char *S1, const char *S2) {
size_t Len = 0;
for (; S1[Len] && S2[Len]; Len++) {}
diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.h b/compiler-rt/lib/fuzzer/FuzzerTracePC.h
index a93732972f7d..af1f9d81e950 100644
--- a/compiler-rt/lib/fuzzer/FuzzerTracePC.h
+++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.h
@@ -169,7 +169,7 @@ private:
size_t NumPCTables;
size_t NumPCsInPCTables;
- Set<const PCTableEntry*> ObservedPCs;
+ std::set<const PCTableEntry *> ObservedPCs;
std::unordered_map<uintptr_t, uintptr_t> ObservedFuncs; // PC => Counter.
uint8_t *FocusFunctionCounterPtr = nullptr;
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp
index 05185499bdd1..aeab70f20c28 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp
@@ -43,7 +43,7 @@ void PrintASCIIByte(uint8_t Byte) {
else if (Byte >= 32 && Byte < 127)
Printf("%c", Byte);
else
- Printf("\\x%02x", Byte);
+ Printf("\\%03o", Byte);
}
void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter) {
@@ -124,7 +124,7 @@ bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) {
return true;
}
-bool ParseDictionaryFile(const std::string &Text, Vector<Unit> *Units) {
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units) {
if (Text.empty()) {
Printf("ParseDictionaryFile: file does not exist or is empty\n");
return false;
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.h b/compiler-rt/lib/fuzzer/FuzzerUtil.h
index a188a7be32a5..71d49097e559 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtil.h
+++ b/compiler-rt/lib/fuzzer/FuzzerUtil.h
@@ -66,10 +66,10 @@ int CloseProcessPipe(FILE *F);
const void *SearchMemory(const void *haystack, size_t haystacklen,
const void *needle, size_t needlelen);
-std::string CloneArgsWithoutX(const Vector<std::string> &Args,
+std::string CloneArgsWithoutX(const std::vector<std::string> &Args,
const char *X1, const char *X2);
-inline std::string CloneArgsWithoutX(const Vector<std::string> &Args,
+inline std::string CloneArgsWithoutX(const std::vector<std::string> &Args,
const char *X) {
return CloneArgsWithoutX(Args, X, X);
}
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
index 5034b4a28d3f..d80b80cccb80 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
@@ -52,6 +52,12 @@ void CrashTrampolineAsm() __asm__("CrashTrampolineAsm");
namespace {
+// The signal handler thread uses Zircon exceptions to resume crashed threads
+// into libFuzzer's POSIX signal handlers. The associated event is used to
+// signal when the thread is running, and when it should stop.
+std::thread SignalHandler;
+zx_handle_t SignalHandlerEvent = ZX_HANDLE_INVALID;
+
// Helper function to handle Zircon syscall failures.
void ExitOnErr(zx_status_t Status, const char *Syscall) {
if (Status != ZX_OK) {
@@ -68,23 +74,6 @@ void AlarmHandler(int Seconds) {
}
}
-// CFAOffset is used to reference the stack pointer before entering the
-// trampoline (Stack Pointer + CFAOffset = prev Stack Pointer). Before jumping
-// to the trampoline we copy all the registers onto the stack. We need to make
-// sure that the new stack has enough space to store all the registers.
-//
-// The trampoline holds CFI information regarding the registers stored in the
-// stack, which is then used by the unwinder to restore them.
-#if defined(__x86_64__)
-// In x86_64 the crashing function might also be using the red zone (128 bytes
-// on top of their rsp).
-constexpr size_t CFAOffset = 128 + sizeof(zx_thread_state_general_regs_t);
-#elif defined(__aarch64__)
-// In aarch64 we need to always have the stack pointer aligned to 16 bytes, so we
-// make sure that we are keeping that same alignment.
-constexpr size_t CFAOffset = (sizeof(zx_thread_state_general_regs_t) + 15) & -(uintptr_t)16;
-#endif
-
// For the crash handler, we need to call Fuzzer::StaticCrashSignalCallback
// without POSIX signal handlers. To achieve this, we use an assembly function
// to add the necessary CFI unwinding information and a C function to bridge
@@ -163,10 +152,10 @@ constexpr size_t CFAOffset = (sizeof(zx_thread_state_general_regs_t) + 15) & -(u
// Produces an assembler immediate operand for the named or numbered register.
// This operand contains the offset of the register relative to the CFA.
-#define ASM_OPERAND_REG(reg) \
- [reg] "i"(offsetof(zx_thread_state_general_regs_t, reg) - CFAOffset),
-#define ASM_OPERAND_NUM(num) \
- [x##num] "i"(offsetof(zx_thread_state_general_regs_t, r[num]) - CFAOffset),
+#define ASM_OPERAND_REG(reg) \
+ [reg] "i"(offsetof(zx_thread_state_general_regs_t, reg)),
+#define ASM_OPERAND_NUM(num) \
+ [x##num] "i"(offsetof(zx_thread_state_general_regs_t, r[num])),
// Trampoline to bridge from the assembly below to the static C++ crash
// callback.
@@ -178,62 +167,57 @@ static void StaticCrashHandler() {
}
}
-// Creates the trampoline with the necessary CFI information to unwind through
-// to the crashing call stack:
-// * Defining the CFA so that it points to the stack pointer at the point
-// of crash.
-// * Storing all registers at the point of crash in the stack and refer to them
-// via CFI information (relative to the CFA).
-// * Setting the return column so the unwinder knows how to continue unwinding.
-// * (x86_64) making sure rsp is aligned before calling StaticCrashHandler.
-// * Calling StaticCrashHandler that will trigger the unwinder.
+// This trampoline function has the necessary CFI information to unwind
+// and get a backtrace:
+// * The stack contains a copy of all the registers at the point of crash,
+// the code has CFI directives specifying how to restore them.
+// * A call to StaticCrashHandler, which will print the stacktrace and exit
+// the fuzzer, generating a crash artifact.
//
// The __attribute__((used)) is necessary because the function
// is never called; it's just a container around the assembly to allow it to
// use operands for compile-time computed constants.
__attribute__((used))
void MakeTrampoline() {
- __asm__(".cfi_endproc\n"
- ".pushsection .text.CrashTrampolineAsm\n"
- ".type CrashTrampolineAsm,STT_FUNC\n"
-"CrashTrampolineAsm:\n"
- ".cfi_startproc simple\n"
- ".cfi_signal_frame\n"
+ __asm__(
+ ".cfi_endproc\n"
+ ".pushsection .text.CrashTrampolineAsm\n"
+ ".type CrashTrampolineAsm,STT_FUNC\n"
+ "CrashTrampolineAsm:\n"
+ ".cfi_startproc simple\n"
+ ".cfi_signal_frame\n"
#if defined(__x86_64__)
- ".cfi_return_column rip\n"
- ".cfi_def_cfa rsp, %c[CFAOffset]\n"
- FOREACH_REGISTER(CFI_OFFSET_REG, CFI_OFFSET_NUM)
- "mov %%rsp, %%rbp\n"
- ".cfi_def_cfa_register rbp\n"
- "andq $-16, %%rsp\n"
- "call %c[StaticCrashHandler]\n"
- "ud2\n"
+ ".cfi_return_column rip\n"
+ ".cfi_def_cfa rsp, 0\n"
+ FOREACH_REGISTER(CFI_OFFSET_REG, CFI_OFFSET_NUM)
+ "call %c[StaticCrashHandler]\n"
+ "ud2\n"
#elif defined(__aarch64__)
- ".cfi_return_column 33\n"
- ".cfi_def_cfa sp, %c[CFAOffset]\n"
- FOREACH_REGISTER(CFI_OFFSET_REG, CFI_OFFSET_NUM)
- ".cfi_offset 33, %c[pc]\n"
- ".cfi_offset 30, %c[lr]\n"
- "bl %c[StaticCrashHandler]\n"
- "brk 1\n"
+ ".cfi_return_column 33\n"
+ ".cfi_def_cfa sp, 0\n"
+ FOREACH_REGISTER(CFI_OFFSET_REG, CFI_OFFSET_NUM)
+ ".cfi_offset 33, %c[pc]\n"
+ ".cfi_offset 30, %c[lr]\n"
+ "bl %c[StaticCrashHandler]\n"
+ "brk 1\n"
#else
#error "Unsupported architecture for fuzzing on Fuchsia"
#endif
- ".cfi_endproc\n"
- ".size CrashTrampolineAsm, . - CrashTrampolineAsm\n"
- ".popsection\n"
- ".cfi_startproc\n"
- : // No outputs
- : FOREACH_REGISTER(ASM_OPERAND_REG, ASM_OPERAND_NUM)
+ ".cfi_endproc\n"
+ ".size CrashTrampolineAsm, . - CrashTrampolineAsm\n"
+ ".popsection\n"
+ ".cfi_startproc\n"
+ : // No outputs
+ : FOREACH_REGISTER(ASM_OPERAND_REG, ASM_OPERAND_NUM)
#if defined(__aarch64__)
- ASM_OPERAND_REG(pc)
- ASM_OPERAND_REG(lr)
+ ASM_OPERAND_REG(pc) ASM_OPERAND_REG(lr)
#endif
- [StaticCrashHandler] "i" (StaticCrashHandler),
- [CFAOffset] "i" (CFAOffset));
+ [StaticCrashHandler] "i"(StaticCrashHandler));
}
-void CrashHandler(zx_handle_t *Event) {
+void CrashHandler() {
+ assert(SignalHandlerEvent != ZX_HANDLE_INVALID);
+
// This structure is used to ensure we close handles to objects we create in
// this handler.
struct ScopedHandle {
@@ -251,16 +235,30 @@ void CrashHandler(zx_handle_t *Event) {
Self, ZX_EXCEPTION_CHANNEL_DEBUGGER, &Channel.Handle),
"_zx_task_create_exception_channel");
- ExitOnErr(_zx_object_signal(*Event, 0, ZX_USER_SIGNAL_0),
+ ExitOnErr(_zx_object_signal(SignalHandlerEvent, 0, ZX_USER_SIGNAL_0),
"_zx_object_signal");
// This thread lives as long as the process in order to keep handling
// crashes. In practice, the first crashed thread to reach the end of the
// StaticCrashHandler will end the process.
while (true) {
- ExitOnErr(_zx_object_wait_one(Channel.Handle, ZX_CHANNEL_READABLE,
- ZX_TIME_INFINITE, nullptr),
- "_zx_object_wait_one");
+ zx_wait_item_t WaitItems[] = {
+ {
+ .handle = SignalHandlerEvent,
+ .waitfor = ZX_SIGNAL_HANDLE_CLOSED,
+ .pending = 0,
+ },
+ {
+ .handle = Channel.Handle,
+ .waitfor = ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED,
+ .pending = 0,
+ },
+ };
+ auto Status = _zx_object_wait_many(
+ WaitItems, sizeof(WaitItems) / sizeof(WaitItems[0]), ZX_TIME_INFINITE);
+ if (Status != ZX_OK || (WaitItems[1].pending & ZX_CHANNEL_READABLE) == 0) {
+ break;
+ }
zx_exception_info_t ExceptionInfo;
ScopedHandle Exception;
@@ -296,14 +294,17 @@ void CrashHandler(zx_handle_t *Event) {
// onto the stack and jump into a trampoline with CFI instructions on how
// to restore it.
#if defined(__x86_64__)
- uintptr_t StackPtr = GeneralRegisters.rsp - CFAOffset;
+ uintptr_t StackPtr =
+ (GeneralRegisters.rsp - (128 + sizeof(GeneralRegisters))) &
+ -(uintptr_t)16;
__unsanitized_memcpy(reinterpret_cast<void *>(StackPtr), &GeneralRegisters,
sizeof(GeneralRegisters));
GeneralRegisters.rsp = StackPtr;
GeneralRegisters.rip = reinterpret_cast<zx_vaddr_t>(CrashTrampolineAsm);
#elif defined(__aarch64__)
- uintptr_t StackPtr = GeneralRegisters.sp - CFAOffset;
+ uintptr_t StackPtr =
+ (GeneralRegisters.sp - sizeof(GeneralRegisters)) & -(uintptr_t)16;
__unsanitized_memcpy(reinterpret_cast<void *>(StackPtr), &GeneralRegisters,
sizeof(GeneralRegisters));
GeneralRegisters.sp = StackPtr;
@@ -327,6 +328,13 @@ void CrashHandler(zx_handle_t *Event) {
}
}
+void StopSignalHandler() {
+ _zx_handle_close(SignalHandlerEvent);
+ if (SignalHandler.joinable()) {
+ SignalHandler.join();
+ }
+}
+
} // namespace
// Platform specific functions.
@@ -356,16 +364,14 @@ void SetSignalHandler(const FuzzingOptions &Options) {
return;
// Set up the crash handler and wait until it is ready before proceeding.
- zx_handle_t Event;
- ExitOnErr(_zx_event_create(0, &Event), "_zx_event_create");
+ ExitOnErr(_zx_event_create(0, &SignalHandlerEvent), "_zx_event_create");
- std::thread T(CrashHandler, &Event);
- zx_status_t Status =
- _zx_object_wait_one(Event, ZX_USER_SIGNAL_0, ZX_TIME_INFINITE, nullptr);
- _zx_handle_close(Event);
+ SignalHandler = std::thread(CrashHandler);
+ zx_status_t Status = _zx_object_wait_one(SignalHandlerEvent, ZX_USER_SIGNAL_0,
+ ZX_TIME_INFINITE, nullptr);
ExitOnErr(Status, "_zx_object_wait_one");
- T.detach();
+ std::atexit(StopSignalHandler);
}
void SleepSeconds(int Seconds) {
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
index 1a54bb569eca..3598758dbb4f 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
@@ -204,7 +204,7 @@ const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt,
}
std::string DisassembleCmd(const std::string &FileName) {
- Vector<std::string> command_vector;
+ std::vector<std::string> command_vector;
command_vector.push_back("dumpbin /summary > nul");
if (ExecuteCommand(Command(command_vector)) == 0)
return "dumpbin /disasm " + FileName;
diff --git a/compiler-rt/lib/gwp_asan/common.h b/compiler-rt/lib/gwp_asan/common.h
index 7ce367e3ffe9..6b238ad9ecbd 100644
--- a/compiler-rt/lib/gwp_asan/common.h
+++ b/compiler-rt/lib/gwp_asan/common.h
@@ -19,7 +19,28 @@
#include <stdint.h>
namespace gwp_asan {
-enum class Error {
+
+// Magic header that resides in the AllocatorState so that GWP-ASan bugreports
+// can be understood by tools at different versions. Out-of-process crash
+// handlers, like crashpad on Fuchsia, take the raw contents of the
+// AllocationMetatada array and the AllocatorState, and shove them into the
+// minidump. Online unpacking of these structs needs to know from which version
+// of GWP-ASan it's extracting the information, as the structures are not
+// stable.
+struct AllocatorVersionMagic {
+ // The values are copied into the structure at runtime, during
+ // `GuardedPoolAllocator::init()` so that GWP-ASan remains completely in the
+ // `.bss` segment.
+ static constexpr uint8_t kAllocatorVersionMagic[4] = {'A', 'S', 'A', 'N'};
+ uint8_t Magic[4] = {};
+ // Update the version number when the AllocatorState or AllocationMetadata
+ // change.
+ static constexpr uint16_t kAllocatorVersion = 1;
+ uint16_t Version = 0;
+ uint16_t Reserved = 0;
+};
+
+enum class Error : uint8_t {
UNKNOWN,
USE_AFTER_FREE,
DOUBLE_FREE,
@@ -84,6 +105,7 @@ struct AllocationMetadata {
// set of information required for understanding a GWP-ASan crash.
struct AllocatorState {
constexpr AllocatorState() {}
+ AllocatorVersionMagic VersionMagic{};
// Returns whether the provided pointer is a current sampled allocation that
// is owned by this pool.
@@ -123,5 +145,38 @@ struct AllocatorState {
uintptr_t FailureAddress = 0;
};
+// Below are various compile-time checks that the layout of the internal
+// GWP-ASan structures are undisturbed. If they are disturbed, the version magic
+// number needs to be increased by one, and the asserts need to be updated.
+// Out-of-process crash handlers, like breakpad/crashpad, may copy the internal
+// GWP-ASan structures into a minidump for offline reconstruction of the crash.
+// In order to accomplish this, the offline reconstructor needs to know the
+// version of GWP-ASan internal structures that it's unpacking (along with the
+// architecture-specific layout info, which is left as an exercise to the crash
+// handler).
+static_assert(offsetof(AllocatorState, VersionMagic) == 0, "");
+static_assert(sizeof(AllocatorVersionMagic) == 8, "");
+#if defined(__x86_64__)
+static_assert(sizeof(AllocatorState) == 56, "");
+static_assert(offsetof(AllocatorState, FailureAddress) == 48, "");
+static_assert(sizeof(AllocationMetadata) == 568, "");
+static_assert(offsetof(AllocationMetadata, IsDeallocated) == 560, "");
+#elif defined(__aarch64__)
+static_assert(sizeof(AllocatorState) == 56, "");
+static_assert(offsetof(AllocatorState, FailureAddress) == 48, "");
+static_assert(sizeof(AllocationMetadata) == 568, "");
+static_assert(offsetof(AllocationMetadata, IsDeallocated) == 560, "");
+#elif defined(__i386__)
+static_assert(sizeof(AllocatorState) == 32, "");
+static_assert(offsetof(AllocatorState, FailureAddress) == 28, "");
+static_assert(sizeof(AllocationMetadata) == 548, "");
+static_assert(offsetof(AllocationMetadata, IsDeallocated) == 544, "");
+#elif defined(__arm__)
+static_assert(sizeof(AllocatorState) == 32, "");
+static_assert(offsetof(AllocatorState, FailureAddress) == 28, "");
+static_assert(sizeof(AllocationMetadata) == 560, "");
+static_assert(offsetof(AllocationMetadata, IsDeallocated) == 552, "");
+#endif // defined($ARCHITECTURE)
+
} // namespace gwp_asan
#endif // GWP_ASAN_COMMON_H_
diff --git a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp
index 8ce5fc9c4dfc..7096b428764c 100644
--- a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp
+++ b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp
@@ -59,6 +59,13 @@ void GuardedPoolAllocator::init(const options::Options &Opts) {
SingletonPtr = this;
Backtrace = Opts.Backtrace;
+ State.VersionMagic = {{AllocatorVersionMagic::kAllocatorVersionMagic[0],
+ AllocatorVersionMagic::kAllocatorVersionMagic[1],
+ AllocatorVersionMagic::kAllocatorVersionMagic[2],
+ AllocatorVersionMagic::kAllocatorVersionMagic[3]},
+ AllocatorVersionMagic::kAllocatorVersion,
+ 0};
+
State.MaxSimultaneousAllocations = Opts.MaxSimultaneousAllocations;
const size_t PageSize = getPlatformPageSize();
diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp
index cbe0dee66dcd..6f0ea64472c6 100644
--- a/compiler-rt/lib/hwasan/hwasan.cpp
+++ b/compiler-rt/lib/hwasan/hwasan.cpp
@@ -16,6 +16,7 @@
#include "hwasan_checks.h"
#include "hwasan_dynamic_shadow.h"
#include "hwasan_globals.h"
+#include "hwasan_mapping.h"
#include "hwasan_poisoning.h"
#include "hwasan_report.h"
#include "hwasan_thread.h"
@@ -141,7 +142,7 @@ static void CheckUnwind() {
static void HwasanFormatMemoryUsage(InternalScopedString &s) {
HwasanThreadList &thread_list = hwasanThreadList();
auto thread_stats = thread_list.GetThreadStats();
- auto *sds = StackDepotGetStats();
+ auto sds = StackDepotGetStats();
AllocatorStatCounters asc;
GetAllocatorStats(asc);
s.append(
@@ -151,7 +152,7 @@ static void HwasanFormatMemoryUsage(InternalScopedString &s) {
internal_getpid(), GetRSS(), thread_stats.n_live_threads,
thread_stats.total_stack_size,
thread_stats.n_live_threads * thread_list.MemoryUsedPerThread(),
- sds->allocated, sds->n_uniq_ids, asc[AllocatorStatMapped]);
+ sds.allocated, sds.n_uniq_ids, asc[AllocatorStatMapped]);
}
#if SANITIZER_ANDROID
@@ -319,7 +320,7 @@ void __hwasan_init_static() {
InitializeSingleGlobal(global);
}
-void __hwasan_init() {
+__attribute__((constructor(0))) void __hwasan_init() {
CHECK(!hwasan_init_is_running);
if (hwasan_inited) return;
hwasan_init_is_running = 1;
@@ -344,7 +345,7 @@ void __hwasan_init() {
// Needs to be called here because flags()->random_tags might not have been
// initialized when InitInstrumentation() was called.
- GetCurrentThread()->InitRandomState();
+ GetCurrentThread()->EnsureRandomStateInited();
SetPrintfAndReportCallback(AppendToErrorMessageBuffer);
// This may call libc -> needs initialized shadow.
@@ -360,6 +361,7 @@ void __hwasan_init() {
HwasanTSDThreadInit();
HwasanAllocatorInit();
+ HwasanInstallAtForkHandler();
#if HWASAN_CONTAINS_UBSAN
__ubsan::InitAsPlugin();
@@ -390,8 +392,15 @@ void __hwasan_print_shadow(const void *p, uptr sz) {
uptr shadow_last = MemToShadow(ptr_raw + sz - 1);
Printf("HWASan shadow map for %zx .. %zx (pointer tag %x)\n", ptr_raw,
ptr_raw + sz, GetTagFromPointer((uptr)p));
- for (uptr s = shadow_first; s <= shadow_last; ++s)
- Printf(" %zx: %x\n", ShadowToMem(s), *(tag_t *)s);
+ for (uptr s = shadow_first; s <= shadow_last; ++s) {
+ tag_t mem_tag = *reinterpret_cast<tag_t *>(s);
+ uptr granule_addr = ShadowToMem(s);
+ if (mem_tag && mem_tag < kShadowAlignment)
+ Printf(" %zx: %02x(%02x)\n", granule_addr, mem_tag,
+ *reinterpret_cast<tag_t *>(granule_addr + kShadowAlignment - 1));
+ else
+ Printf(" %zx: %02x\n", granule_addr, mem_tag);
+ }
}
sptr __hwasan_test_shadow(const void *p, uptr sz) {
diff --git a/compiler-rt/lib/hwasan/hwasan.h b/compiler-rt/lib/hwasan/hwasan.h
index 7338b696ad34..371c43f3cbde 100644
--- a/compiler-rt/lib/hwasan/hwasan.h
+++ b/compiler-rt/lib/hwasan/hwasan.h
@@ -107,6 +107,8 @@ void InitThreads();
void InitializeInterceptors();
void HwasanAllocatorInit();
+void HwasanAllocatorLock();
+void HwasanAllocatorUnlock();
void *hwasan_malloc(uptr size, StackTrace *stack);
void *hwasan_calloc(uptr nmemb, uptr size, StackTrace *stack);
@@ -140,6 +142,8 @@ void HwasanAtExit();
void HwasanOnDeadlySignal(int signo, void *info, void *context);
+void HwasanInstallAtForkHandler();
+
void UpdateMemoryUsage();
void AppendToErrorMessageBuffer(const char *buffer);
@@ -183,25 +187,34 @@ void HwasanTagMismatch(uptr addr, uptr access_info, uptr *registers_frame,
RunFreeHooks(ptr); \
} while (false)
-#if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
+#if HWASAN_WITH_INTERCEPTORS
// For both bionic and glibc __sigset_t is an unsigned long.
typedef unsigned long __hw_sigset_t;
// Setjmp and longjmp implementations are platform specific, and hence the
-// interception code is platform specific too. As yet we've only implemented
-// the interception for AArch64.
-typedef unsigned long long __hw_register_buf[22];
+// interception code is platform specific too.
+# if defined(__aarch64__)
+constexpr size_t kHwRegisterBufSize = 22;
+# elif defined(__x86_64__)
+constexpr size_t kHwRegisterBufSize = 8;
+# endif
+typedef unsigned long long __hw_register_buf[kHwRegisterBufSize];
struct __hw_jmp_buf_struct {
// NOTE: The machine-dependent definition of `__sigsetjmp'
// assume that a `__hw_jmp_buf' begins with a `__hw_register_buf' and that
// `__mask_was_saved' follows it. Do not move these members or add others
// before it.
+ //
+ // We add a __magic field to our struct to catch cases where libc's setjmp
+ // populated the jmp_buf instead of our interceptor.
__hw_register_buf __jmpbuf; // Calling environment.
- int __mask_was_saved; // Saved the signal mask?
+ unsigned __mask_was_saved : 1; // Saved the signal mask?
+ unsigned __magic : 31; // Used to distinguish __hw_jmp_buf from jmp_buf.
__hw_sigset_t __saved_mask; // Saved signal mask.
};
typedef struct __hw_jmp_buf_struct __hw_jmp_buf[1];
typedef struct __hw_jmp_buf_struct __hw_sigjmp_buf[1];
-#endif // HWASAN_WITH_INTERCEPTORS && __aarch64__
+constexpr unsigned kHwJmpBufMagic = 0x248ACE77;
+#endif // HWASAN_WITH_INTERCEPTORS
#define ENSURE_HWASAN_INITED() \
do { \
diff --git a/compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp b/compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp
index 6c2a6077866f..9cd82dbabd19 100644
--- a/compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_allocation_functions.cpp
@@ -14,28 +14,21 @@
#include "hwasan.h"
#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
#include "sanitizer_common/sanitizer_tls_get_addr.h"
-using namespace __hwasan;
+#if !SANITIZER_FUCHSIA
-static uptr allocated_for_dlsym;
-static const uptr kDlsymAllocPoolSize = 1024;
-static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
+using namespace __hwasan;
-static bool IsInDlsymAllocPool(const void *ptr) {
- uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- return off < sizeof(alloc_memory_for_dlsym);
-}
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return !hwasan_inited; }
+};
-static void *AllocateFromLocalPool(uptr size_in_bytes) {
- uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
- void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
- allocated_for_dlsym += size_in_words;
- CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
- return mem;
-}
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
int __sanitizer_posix_memalign(void **memptr, uptr alignment, uptr size) {
GET_MALLOC_STACK_TRACE;
CHECK_NE(memptr, 0);
@@ -43,16 +36,19 @@ int __sanitizer_posix_memalign(void **memptr, uptr alignment, uptr size) {
return res;
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_memalign(uptr alignment, uptr size) {
GET_MALLOC_STACK_TRACE;
return hwasan_memalign(alignment, size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_aligned_alloc(uptr alignment, uptr size) {
GET_MALLOC_STACK_TRACE;
return hwasan_aligned_alloc(alignment, size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer___libc_memalign(uptr alignment, uptr size) {
GET_MALLOC_STACK_TRACE;
void *ptr = hwasan_memalign(alignment, size, &stack);
@@ -61,87 +57,92 @@ void *__sanitizer___libc_memalign(uptr alignment, uptr size) {
return ptr;
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_valloc(uptr size) {
GET_MALLOC_STACK_TRACE;
return hwasan_valloc(size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_pvalloc(uptr size) {
GET_MALLOC_STACK_TRACE;
return hwasan_pvalloc(size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void __sanitizer_free(void *ptr) {
- GET_MALLOC_STACK_TRACE;
- if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr)))
+ if (!ptr)
return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
+ GET_MALLOC_STACK_TRACE;
hwasan_free(ptr, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void __sanitizer_cfree(void *ptr) {
- GET_MALLOC_STACK_TRACE;
- if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr)))
+ if (!ptr)
return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
+ GET_MALLOC_STACK_TRACE;
hwasan_free(ptr, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
uptr __sanitizer_malloc_usable_size(const void *ptr) {
return __sanitizer_get_allocated_size(ptr);
}
+SANITIZER_INTERFACE_ATTRIBUTE
struct __sanitizer_struct_mallinfo __sanitizer_mallinfo() {
__sanitizer_struct_mallinfo sret;
internal_memset(&sret, 0, sizeof(sret));
return sret;
}
+SANITIZER_INTERFACE_ATTRIBUTE
int __sanitizer_mallopt(int cmd, int value) { return 0; }
+SANITIZER_INTERFACE_ATTRIBUTE
void __sanitizer_malloc_stats(void) {
// FIXME: implement, but don't call REAL(malloc_stats)!
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_calloc(uptr nmemb, uptr size) {
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
GET_MALLOC_STACK_TRACE;
- if (UNLIKELY(!hwasan_inited))
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- return AllocateFromLocalPool(nmemb * size);
return hwasan_calloc(nmemb, size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_realloc(void *ptr, uptr size) {
+ if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Realloc(ptr, size);
GET_MALLOC_STACK_TRACE;
- if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
- uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
- void *new_ptr;
- if (UNLIKELY(!hwasan_inited)) {
- new_ptr = AllocateFromLocalPool(copy_size);
- } else {
- copy_size = size;
- new_ptr = hwasan_malloc(copy_size, &stack);
- }
- internal_memcpy(new_ptr, ptr, copy_size);
- return new_ptr;
- }
return hwasan_realloc(ptr, size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_reallocarray(void *ptr, uptr nmemb, uptr size) {
GET_MALLOC_STACK_TRACE;
return hwasan_reallocarray(ptr, nmemb, size, &stack);
}
+SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_malloc(uptr size) {
- GET_MALLOC_STACK_TRACE;
if (UNLIKELY(!hwasan_init_is_running))
ENSURE_HWASAN_INITED();
- if (UNLIKELY(!hwasan_inited))
- // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
- return AllocateFromLocalPool(size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Allocate(size);
+ GET_MALLOC_STACK_TRACE;
return hwasan_malloc(size, &stack);
}
+} // extern "C"
+
#if HWASAN_WITH_INTERCEPTORS
# define INTERCEPTOR_ALIAS(RET, FN, ARGS...) \
extern "C" SANITIZER_INTERFACE_ATTRIBUTE RET WRAP(FN)(ARGS) \
@@ -170,3 +171,5 @@ INTERCEPTOR_ALIAS(int, mallopt, int cmd, int value);
INTERCEPTOR_ALIAS(void, malloc_stats, void);
# endif
#endif // #if HWASAN_WITH_INTERCEPTORS
+
+#endif // SANITIZER_FUCHSIA
diff --git a/compiler-rt/lib/hwasan/hwasan_allocator.cpp b/compiler-rt/lib/hwasan/hwasan_allocator.cpp
index ef6d4d6c7678..9e1729964e27 100644
--- a/compiler-rt/lib/hwasan/hwasan_allocator.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_allocator.cpp
@@ -107,6 +107,10 @@ void HwasanAllocatorInit() {
tail_magic[i] = GetCurrentThread()->GenerateRandomTag();
}
+void HwasanAllocatorLock() { allocator.ForceLock(); }
+
+void HwasanAllocatorUnlock() { allocator.ForceUnlock(); }
+
void AllocatorSwallowThreadLocalCache(AllocatorCache *cache) {
allocator.SwallowCache(cache);
}
@@ -158,8 +162,11 @@ static void *HwasanAllocate(StackTrace *stack, uptr orig_size, uptr alignment,
internal_memset(allocated, flags()->malloc_fill_byte, fill_size);
}
if (size != orig_size) {
- internal_memcpy(reinterpret_cast<u8 *>(allocated) + orig_size, tail_magic,
- size - orig_size - 1);
+ u8 *tail = reinterpret_cast<u8 *>(allocated) + orig_size;
+ uptr tail_length = size - orig_size;
+ internal_memcpy(tail, tail_magic, tail_length - 1);
+ // Short granule is excluded from magic tail, so we explicitly untag.
+ tail[tail_length - 1] = 0;
}
void *user_ptr = allocated;
@@ -201,21 +208,37 @@ static bool PointerAndMemoryTagsMatch(void *tagged_ptr) {
return PossiblyShortTagMatches(mem_tag, tagged_uptr, 1);
}
+static bool CheckInvalidFree(StackTrace *stack, void *untagged_ptr,
+ void *tagged_ptr) {
+ // This function can return true if halt_on_error is false.
+ if (!MemIsApp(reinterpret_cast<uptr>(untagged_ptr)) ||
+ !PointerAndMemoryTagsMatch(tagged_ptr)) {
+ ReportInvalidFree(stack, reinterpret_cast<uptr>(tagged_ptr));
+ return true;
+ }
+ return false;
+}
+
static void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) {
CHECK(tagged_ptr);
HWASAN_FREE_HOOK(tagged_ptr);
- if (!PointerAndMemoryTagsMatch(tagged_ptr))
- ReportInvalidFree(stack, reinterpret_cast<uptr>(tagged_ptr));
+ bool in_taggable_region =
+ InTaggableRegion(reinterpret_cast<uptr>(tagged_ptr));
+ void *untagged_ptr = in_taggable_region ? UntagPtr(tagged_ptr) : tagged_ptr;
+
+ if (CheckInvalidFree(stack, untagged_ptr, tagged_ptr))
+ return;
- void *untagged_ptr = InTaggableRegion(reinterpret_cast<uptr>(tagged_ptr))
- ? UntagPtr(tagged_ptr)
- : tagged_ptr;
void *aligned_ptr = reinterpret_cast<void *>(
RoundDownTo(reinterpret_cast<uptr>(untagged_ptr), kShadowAlignment));
tag_t pointer_tag = GetTagFromPointer(reinterpret_cast<uptr>(tagged_ptr));
Metadata *meta =
reinterpret_cast<Metadata *>(allocator.GetMetaData(aligned_ptr));
+ if (!meta) {
+ ReportInvalidFree(stack, reinterpret_cast<uptr>(tagged_ptr));
+ return;
+ }
uptr orig_size = meta->get_requested_size();
u32 free_context_id = StackDepotPut(*stack);
u32 alloc_context_id = meta->alloc_context_id;
@@ -228,7 +251,11 @@ static void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) {
CHECK_LT(tail_size, kShadowAlignment);
void *tail_beg = reinterpret_cast<void *>(
reinterpret_cast<uptr>(aligned_ptr) + orig_size);
- if (tail_size && internal_memcmp(tail_beg, tail_magic, tail_size))
+ tag_t short_granule_memtag = *(reinterpret_cast<tag_t *>(
+ reinterpret_cast<uptr>(tail_beg) + tail_size));
+ if (tail_size &&
+ (internal_memcmp(tail_beg, tail_magic, tail_size) ||
+ (in_taggable_region && pointer_tag != short_granule_memtag)))
ReportTailOverwritten(stack, reinterpret_cast<uptr>(tagged_ptr),
orig_size, tail_magic);
}
@@ -243,8 +270,7 @@ static void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) {
Min(TaggedSize(orig_size), (uptr)flags()->max_free_fill_size);
internal_memset(aligned_ptr, flags()->free_fill_byte, fill_size);
}
- if (InTaggableRegion(reinterpret_cast<uptr>(tagged_ptr)) &&
- flags()->tag_in_free && malloc_bisect(stack, 0) &&
+ if (in_taggable_region && flags()->tag_in_free && malloc_bisect(stack, 0) &&
atomic_load_relaxed(&hwasan_allocator_tagging_enabled)) {
// Always store full 8-bit tags on free to maximize UAF detection.
tag_t tag;
@@ -278,13 +304,15 @@ static void HwasanDeallocate(StackTrace *stack, void *tagged_ptr) {
static void *HwasanReallocate(StackTrace *stack, void *tagged_ptr_old,
uptr new_size, uptr alignment) {
- if (!PointerAndMemoryTagsMatch(tagged_ptr_old))
- ReportInvalidFree(stack, reinterpret_cast<uptr>(tagged_ptr_old));
-
+ void *untagged_ptr_old =
+ InTaggableRegion(reinterpret_cast<uptr>(tagged_ptr_old))
+ ? UntagPtr(tagged_ptr_old)
+ : tagged_ptr_old;
+ if (CheckInvalidFree(stack, untagged_ptr_old, tagged_ptr_old))
+ return nullptr;
void *tagged_ptr_new =
HwasanAllocate(stack, new_size, alignment, false /*zeroise*/);
if (tagged_ptr_old && tagged_ptr_new) {
- void *untagged_ptr_old = UntagPtr(tagged_ptr_old);
Metadata *meta =
reinterpret_cast<Metadata *>(allocator.GetMetaData(untagged_ptr_old));
internal_memcpy(
@@ -305,6 +333,8 @@ static void *HwasanCalloc(StackTrace *stack, uptr nmemb, uptr size) {
}
HwasanChunkView FindHeapChunkByAddress(uptr address) {
+ if (!allocator.PointerIsMine(reinterpret_cast<void *>(address)))
+ return HwasanChunkView();
void *block = allocator.GetBlockBegin(reinterpret_cast<void*>(address));
if (!block)
return HwasanChunkView();
diff --git a/compiler-rt/lib/hwasan/hwasan_exceptions.cpp b/compiler-rt/lib/hwasan/hwasan_exceptions.cpp
index 169e7876cb58..6ed1da335428 100644
--- a/compiler-rt/lib/hwasan/hwasan_exceptions.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_exceptions.cpp
@@ -29,8 +29,8 @@ typedef _Unwind_Reason_Code PersonalityFn(int version, _Unwind_Action actions,
// is statically linked and the sanitizer runtime and the program are linked
// against different unwinders. The _Unwind_Context data structure is opaque so
// it may be incompatible between unwinders.
-typedef _Unwind_Word GetGRFn(_Unwind_Context* context, int index);
-typedef _Unwind_Word GetCFAFn(_Unwind_Context* context);
+typedef uintptr_t GetGRFn(_Unwind_Context* context, int index);
+typedef uintptr_t GetCFAFn(_Unwind_Context* context);
extern "C" SANITIZER_INTERFACE_ATTRIBUTE _Unwind_Reason_Code
__hwasan_personality_wrapper(int version, _Unwind_Action actions,
diff --git a/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp b/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp
index e299a7e862eb..94e5c5fb69c7 100644
--- a/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_fuchsia.cpp
@@ -130,7 +130,7 @@ static void ThreadCreateHook(void *hook, bool aborted) {
static void ThreadStartHook(void *hook, thrd_t self) {
Thread *thread = static_cast<Thread *>(hook);
FinishThreadInitialization(thread);
- thread->InitRandomState();
+ thread->EnsureRandomStateInited();
}
// This is the function that sets up the stack ring buffer and enables us to use
@@ -180,6 +180,8 @@ void HwasanTSDThreadInit() {}
// function is unneeded.
void InstallAtExitHandler() {}
+void HwasanInstallAtForkHandler() {}
+
// TODO(fxbug.dev/81499): Once we finalize the tagged pointer ABI in zircon, we should come back
// here and implement the appropriate check that TBI is enabled.
void InitializeOsSupport() {}
diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
index 68f8adec0776..f96ed8804102 100644
--- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
@@ -49,15 +49,14 @@ INTERCEPTOR(int, pthread_create, void *th, void *attr, void *(*callback)(void*),
DEFINE_REAL(int, vfork)
DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork)
-#endif // HWASAN_WITH_INTERCEPTORS
-#if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
// Get and/or change the set of blocked signals.
extern "C" int sigprocmask(int __how, const __hw_sigset_t *__restrict __set,
__hw_sigset_t *__restrict __oset);
#define SIG_BLOCK 0
#define SIG_SETMASK 2
extern "C" int __sigjmp_save(__hw_sigjmp_buf env, int savemask) {
+ env[0].__magic = kHwJmpBufMagic;
env[0].__mask_was_saved =
(savemask && sigprocmask(SIG_BLOCK, (__hw_sigset_t *)0,
&env[0].__saved_mask) == 0);
@@ -66,8 +65,14 @@ extern "C" int __sigjmp_save(__hw_sigjmp_buf env, int savemask) {
static void __attribute__((always_inline))
InternalLongjmp(__hw_register_buf env, int retval) {
+# if defined(__aarch64__)
+ constexpr size_t kSpIndex = 13;
+# elif defined(__x86_64__)
+ constexpr size_t kSpIndex = 6;
+# endif
+
// Clear all memory tags on the stack between here and where we're going.
- unsigned long long stack_pointer = env[13];
+ unsigned long long stack_pointer = env[kSpIndex];
// The stack pointer should never be tagged, so we don't need to clear the
// tag for this function call.
__hwasan_handle_longjmp((void *)stack_pointer);
@@ -78,6 +83,7 @@ InternalLongjmp(__hw_register_buf env, int retval) {
// Must implement this ourselves, since we don't know the order of registers
// in different libc implementations and many implementations mangle the
// stack pointer so we can't use it without knowing the demangling scheme.
+# if defined(__aarch64__)
register long int retval_tmp asm("x1") = retval;
register void *env_address asm("x0") = &env[0];
asm volatile("ldp x19, x20, [%0, #0<<3];"
@@ -100,9 +106,36 @@ InternalLongjmp(__hw_register_buf env, int retval) {
"br x30;"
: "+r"(env_address)
: "r"(retval_tmp));
+# elif defined(__x86_64__)
+ register long int retval_tmp asm("%rsi") = retval;
+ register void *env_address asm("%rdi") = &env[0];
+ asm volatile(
+ // Restore registers.
+ "mov (0*8)(%0),%%rbx;"
+ "mov (1*8)(%0),%%rbp;"
+ "mov (2*8)(%0),%%r12;"
+ "mov (3*8)(%0),%%r13;"
+ "mov (4*8)(%0),%%r14;"
+ "mov (5*8)(%0),%%r15;"
+ "mov (6*8)(%0),%%rsp;"
+ "mov (7*8)(%0),%%rdx;"
+ // Return 1 if retval is 0.
+ "mov $1,%%rax;"
+ "test %1,%1;"
+ "cmovnz %1,%%rax;"
+ "jmp *%%rdx;" ::"r"(env_address),
+ "r"(retval_tmp));
+# endif
}
INTERCEPTOR(void, siglongjmp, __hw_sigjmp_buf env, int val) {
+ if (env[0].__magic != kHwJmpBufMagic) {
+ Printf(
+ "WARNING: Unexpected bad jmp_buf. Either setjmp was not called or "
+ "there is a bug in HWASan.\n");
+ return REAL(siglongjmp)(env, val);
+ }
+
if (env[0].__mask_was_saved)
// Restore the saved signal mask.
(void)sigprocmask(SIG_SETMASK, &env[0].__saved_mask,
@@ -114,32 +147,24 @@ INTERCEPTOR(void, siglongjmp, __hw_sigjmp_buf env, int val) {
// _setjmp on start_thread. Hence we have to intercept the longjmp on
// pthread_exit so the __hw_jmp_buf order matches.
INTERCEPTOR(void, __libc_longjmp, __hw_jmp_buf env, int val) {
+ if (env[0].__magic != kHwJmpBufMagic)
+ return REAL(__libc_longjmp)(env, val);
InternalLongjmp(env[0].__jmpbuf, val);
}
INTERCEPTOR(void, longjmp, __hw_jmp_buf env, int val) {
+ if (env[0].__magic != kHwJmpBufMagic) {
+ Printf(
+ "WARNING: Unexpected bad jmp_buf. Either setjmp was not called or "
+ "there is a bug in HWASan.\n");
+ return REAL(longjmp)(env, val);
+ }
InternalLongjmp(env[0].__jmpbuf, val);
}
#undef SIG_BLOCK
#undef SIG_SETMASK
-#endif // HWASAN_WITH_INTERCEPTORS && __aarch64__
-
-static void BeforeFork() {
- StackDepotLockAll();
-}
-
-static void AfterFork() {
- StackDepotUnlockAll();
-}
-
-INTERCEPTOR(int, fork, void) {
- ENSURE_HWASAN_INITED();
- BeforeFork();
- int pid = REAL(fork)();
- AfterFork();
- return pid;
-}
+# endif // HWASAN_WITH_INTERCEPTORS
namespace __hwasan {
@@ -156,10 +181,11 @@ void InitializeInterceptors() {
static int inited = 0;
CHECK_EQ(inited, 0);
- INTERCEPT_FUNCTION(fork);
-
#if HWASAN_WITH_INTERCEPTORS
#if defined(__linux__)
+ INTERCEPT_FUNCTION(__libc_longjmp);
+ INTERCEPT_FUNCTION(longjmp);
+ INTERCEPT_FUNCTION(siglongjmp);
INTERCEPT_FUNCTION(vfork);
#endif // __linux__
INTERCEPT_FUNCTION(pthread_create);
diff --git a/compiler-rt/lib/hwasan/hwasan_interface_internal.h b/compiler-rt/lib/hwasan/hwasan_interface_internal.h
index 25c0f94fe51f..ef771add411c 100644
--- a/compiler-rt/lib/hwasan/hwasan_interface_internal.h
+++ b/compiler-rt/lib/hwasan/hwasan_interface_internal.h
@@ -169,54 +169,6 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __hwasan_print_memory_usage();
SANITIZER_INTERFACE_ATTRIBUTE
-int __sanitizer_posix_memalign(void **memptr, uptr alignment, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_memalign(uptr alignment, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_aligned_alloc(uptr alignment, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer___libc_memalign(uptr alignment, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_valloc(uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_pvalloc(uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_free(void *ptr);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_cfree(void *ptr);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-uptr __sanitizer_malloc_usable_size(const void *ptr);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-__hwasan::__sanitizer_struct_mallinfo __sanitizer_mallinfo();
-
-SANITIZER_INTERFACE_ATTRIBUTE
-int __sanitizer_mallopt(int cmd, int value);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_malloc_stats(void);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_calloc(uptr nmemb, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_realloc(void *ptr, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_reallocarray(void *ptr, uptr nmemb, uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void * __sanitizer_malloc(uptr size);
-
-SANITIZER_INTERFACE_ATTRIBUTE
void *__hwasan_memcpy(void *dst, const void *src, uptr size);
SANITIZER_INTERFACE_ATTRIBUTE
void *__hwasan_memset(void *s, int c, uptr n);
diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp
index e22723529f44..ba9e23621cc2 100644
--- a/compiler-rt/lib/hwasan/hwasan_linux.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp
@@ -15,30 +15,30 @@
#include "sanitizer_common/sanitizer_platform.h"
#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
-#include "hwasan.h"
-#include "hwasan_dynamic_shadow.h"
-#include "hwasan_interface_internal.h"
-#include "hwasan_mapping.h"
-#include "hwasan_report.h"
-#include "hwasan_thread.h"
-#include "hwasan_thread_list.h"
-
-#include <dlfcn.h>
-#include <elf.h>
-#include <link.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <unwind.h>
-#include <sys/prctl.h>
-#include <errno.h>
-
-#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
+# include <dlfcn.h>
+# include <elf.h>
+# include <errno.h>
+# include <link.h>
+# include <pthread.h>
+# include <signal.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <sys/prctl.h>
+# include <sys/resource.h>
+# include <sys/time.h>
+# include <unistd.h>
+# include <unwind.h>
+
+# include "hwasan.h"
+# include "hwasan_dynamic_shadow.h"
+# include "hwasan_interface_internal.h"
+# include "hwasan_mapping.h"
+# include "hwasan_report.h"
+# include "hwasan_thread.h"
+# include "hwasan_thread_list.h"
+# include "sanitizer_common/sanitizer_common.h"
+# include "sanitizer_common/sanitizer_procmaps.h"
+# include "sanitizer_common/sanitizer_stackdepot.h"
// Configurations of HWASAN_WITH_INTERCEPTORS and SANITIZER_ANDROID.
//
@@ -50,10 +50,10 @@
// Tested with check-hwasan on x86_64-linux.
// HWASAN_WITH_INTERCEPTORS=ON, SANITIZER_ANDROID=ON
// Tested with check-hwasan on aarch64-linux-android.
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
SANITIZER_INTERFACE_ATTRIBUTE
THREADLOCAL uptr __hwasan_tls;
-#endif
+# endif
namespace __hwasan {
@@ -111,9 +111,9 @@ static void InitializeShadowBaseAddress(uptr shadow_size_bytes) {
}
void InitializeOsSupport() {
-#define PR_SET_TAGGED_ADDR_CTRL 55
-#define PR_GET_TAGGED_ADDR_CTRL 56
-#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+# define PR_SET_TAGGED_ADDR_CTRL 55
+# define PR_GET_TAGGED_ADDR_CTRL 56
+# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
// Check we're running on a kernel that can use the tagged address ABI.
int local_errno = 0;
if (internal_iserror(internal_prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0),
@@ -164,9 +164,9 @@ void InitializeOsSupport() {
Die();
}
}
-#undef PR_SET_TAGGED_ADDR_CTRL
-#undef PR_GET_TAGGED_ADDR_CTRL
-#undef PR_TAGGED_ADDR_ENABLE
+# undef PR_SET_TAGGED_ADDR_CTRL
+# undef PR_GET_TAGGED_ADDR_CTRL
+# undef PR_TAGGED_ADDR_ENABLE
}
bool InitShadow() {
@@ -241,17 +241,16 @@ bool MemIsApp(uptr p) {
CHECK(GetTagFromPointer(p) == 0);
# endif
- return p >= kHighMemStart || (p >= kLowMemStart && p <= kLowMemEnd);
+ return (p >= kHighMemStart && p <= kHighMemEnd) ||
+ (p >= kLowMemStart && p <= kLowMemEnd);
}
-void InstallAtExitHandler() {
- atexit(HwasanAtExit);
-}
+void InstallAtExitHandler() { atexit(HwasanAtExit); }
// ---------------------- TSD ---------------- {{{1
extern "C" void __hwasan_thread_enter() {
- hwasanThreadList().CreateCurrentThread()->InitRandomState();
+ hwasanThreadList().CreateCurrentThread()->EnsureRandomStateInited();
}
extern "C" void __hwasan_thread_exit() {
@@ -262,7 +261,7 @@ extern "C" void __hwasan_thread_exit() {
hwasanThreadList().ReleaseThread(t);
}
-#if HWASAN_WITH_INTERCEPTORS
+# if HWASAN_WITH_INTERCEPTORS
static pthread_key_t tsd_key;
static bool tsd_key_inited = false;
@@ -286,22 +285,18 @@ void HwasanTSDInit() {
tsd_key_inited = true;
CHECK_EQ(0, pthread_key_create(&tsd_key, HwasanTSDDtor));
}
-#else
+# else
void HwasanTSDInit() {}
void HwasanTSDThreadInit() {}
-#endif
+# endif
-#if SANITIZER_ANDROID
-uptr *GetCurrentThreadLongPtr() {
- return (uptr *)get_android_tls_ptr();
-}
-#else
-uptr *GetCurrentThreadLongPtr() {
- return &__hwasan_tls;
-}
-#endif
+# if SANITIZER_ANDROID
+uptr *GetCurrentThreadLongPtr() { return (uptr *)get_android_tls_ptr(); }
+# else
+uptr *GetCurrentThreadLongPtr() { return &__hwasan_tls; }
+# endif
-#if SANITIZER_ANDROID
+# if SANITIZER_ANDROID
void AndroidTestTlsSlot() {
uptr kMagicValue = 0x010203040A0B0C0D;
uptr *tls_ptr = GetCurrentThreadLongPtr();
@@ -316,9 +311,9 @@ void AndroidTestTlsSlot() {
}
*tls_ptr = old_value;
}
-#else
+# else
void AndroidTestTlsSlot() {}
-#endif
+# endif
static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
// Access type is passed in a platform dependent way (see below) and encoded
@@ -326,32 +321,32 @@ static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
// recoverable. Valid values of Y are 0 to 4, which are interpreted as
// log2(access_size), and 0xF, which means that access size is passed via
// platform dependent register (see below).
-#if defined(__aarch64__)
+# if defined(__aarch64__)
// Access type is encoded in BRK immediate as 0x900 + 0xXY. For Y == 0xF,
// access size is stored in X1 register. Access address is always in X0
// register.
uptr pc = (uptr)info->si_addr;
const unsigned code = ((*(u32 *)pc) >> 5) & 0xffff;
if ((code & 0xff00) != 0x900)
- return AccessInfo{}; // Not ours.
+ return AccessInfo{}; // Not ours.
const bool is_store = code & 0x10;
const bool recover = code & 0x20;
const uptr addr = uc->uc_mcontext.regs[0];
const unsigned size_log = code & 0xf;
if (size_log > 4 && size_log != 0xf)
- return AccessInfo{}; // Not ours.
+ return AccessInfo{}; // Not ours.
const uptr size = size_log == 0xf ? uc->uc_mcontext.regs[1] : 1U << size_log;
-#elif defined(__x86_64__)
+# elif defined(__x86_64__)
// Access type is encoded in the instruction following INT3 as
// NOP DWORD ptr [EAX + 0x40 + 0xXY]. For Y == 0xF, access size is stored in
// RSI register. Access address is always in RDI register.
uptr pc = (uptr)uc->uc_mcontext.gregs[REG_RIP];
- uint8_t *nop = (uint8_t*)pc;
- if (*nop != 0x0f || *(nop + 1) != 0x1f || *(nop + 2) != 0x40 ||
+ uint8_t *nop = (uint8_t *)pc;
+ if (*nop != 0x0f || *(nop + 1) != 0x1f || *(nop + 2) != 0x40 ||
*(nop + 3) < 0x40)
- return AccessInfo{}; // Not ours.
+ return AccessInfo{}; // Not ours.
const unsigned code = *(nop + 3);
const bool is_store = code & 0x10;
@@ -359,13 +354,13 @@ static AccessInfo GetAccessInfo(siginfo_t *info, ucontext_t *uc) {
const uptr addr = uc->uc_mcontext.gregs[REG_RDI];
const unsigned size_log = code & 0xf;
if (size_log > 4 && size_log != 0xf)
- return AccessInfo{}; // Not ours.
+ return AccessInfo{}; // Not ours.
const uptr size =
size_log == 0xf ? uc->uc_mcontext.gregs[REG_RSI] : 1U << size_log;
-#else
-# error Unsupported architecture
-#endif
+# else
+# error Unsupported architecture
+# endif
return AccessInfo{addr, size, is_store, !is_store, recover};
}
@@ -378,12 +373,12 @@ static bool HwasanOnSIGTRAP(int signo, siginfo_t *info, ucontext_t *uc) {
SignalContext sig{info, uc};
HandleTagMismatch(ai, StackTrace::GetNextInstructionPc(sig.pc), sig.bp, uc);
-#if defined(__aarch64__)
+# if defined(__aarch64__)
uc->uc_mcontext.pc += 4;
-#elif defined(__x86_64__)
-#else
-# error Unsupported architecture
-#endif
+# elif defined(__x86_64__)
+# else
+# error Unsupported architecture
+# endif
return true;
}
@@ -396,7 +391,7 @@ static void OnStackUnwind(const SignalContext &sig, const void *,
void HwasanOnDeadlySignal(int signo, void *info, void *context) {
// Probably a tag mismatch.
if (signo == SIGTRAP)
- if (HwasanOnSIGTRAP(signo, (siginfo_t *)info, (ucontext_t*)context))
+ if (HwasanOnSIGTRAP(signo, (siginfo_t *)info, (ucontext_t *)context))
return;
HandleDeadlySignal(info, context, GetTid(), &OnStackUnwind, nullptr);
@@ -435,6 +430,18 @@ uptr TagMemoryAligned(uptr p, uptr size, tag_t tag) {
return AddTagToPointer(p, tag);
}
-} // namespace __hwasan
+void HwasanInstallAtForkHandler() {
+ auto before = []() {
+ HwasanAllocatorLock();
+ StackDepotLockAll();
+ };
+ auto after = []() {
+ StackDepotUnlockAll();
+ HwasanAllocatorUnlock();
+ };
+ pthread_atfork(before, after, after);
+}
+
+} // namespace __hwasan
-#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD
diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp
index 44047c9fdaf8..66d3d155d409 100644
--- a/compiler-rt/lib/hwasan/hwasan_report.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_report.cpp
@@ -37,7 +37,7 @@ namespace __hwasan {
class ScopedReport {
public:
ScopedReport(bool fatal = false) : error_message_(1), fatal(fatal) {
- BlockingMutexLock lock(&error_message_lock_);
+ Lock lock(&error_message_lock_);
error_message_ptr_ = fatal ? &error_message_ : nullptr;
++hwasan_report_count;
}
@@ -45,7 +45,7 @@ class ScopedReport {
~ScopedReport() {
void (*report_cb)(const char *);
{
- BlockingMutexLock lock(&error_message_lock_);
+ Lock lock(&error_message_lock_);
report_cb = error_report_callback_;
error_message_ptr_ = nullptr;
}
@@ -61,7 +61,7 @@ class ScopedReport {
}
static void MaybeAppendToErrorMessage(const char *msg) {
- BlockingMutexLock lock(&error_message_lock_);
+ Lock lock(&error_message_lock_);
if (!error_message_ptr_)
return;
uptr len = internal_strlen(msg);
@@ -72,7 +72,7 @@ class ScopedReport {
}
static void SetErrorReportCallback(void (*callback)(const char *)) {
- BlockingMutexLock lock(&error_message_lock_);
+ Lock lock(&error_message_lock_);
error_report_callback_ = callback;
}
@@ -82,12 +82,12 @@ class ScopedReport {
bool fatal;
static InternalMmapVector<char> *error_message_ptr_;
- static BlockingMutex error_message_lock_;
+ static Mutex error_message_lock_;
static void (*error_report_callback_)(const char *);
};
InternalMmapVector<char> *ScopedReport::error_message_ptr_;
-BlockingMutex ScopedReport::error_message_lock_;
+Mutex ScopedReport::error_message_lock_;
void (*ScopedReport::error_report_callback_)(const char *);
// If there is an active ScopedReport, append to its error message.
@@ -351,14 +351,16 @@ static void ShowHeapOrGlobalCandidate(uptr untagged_addr, tag_t *candidate,
uptr size = GetGlobalSizeFromDescriptor(mem);
if (size == 0)
// We couldn't find the size of the global from the descriptors.
- Printf("%p is located to the %s of a global variable in (%s+0x%x)\n",
- untagged_addr, candidate == left ? "right" : "left", module_name,
- module_address);
+ Printf(
+ "%p is located to the %s of a global variable in "
+ "\n #0 0x%x (%s+0x%x)\n",
+ untagged_addr, candidate == left ? "right" : "left", mem,
+ module_name, module_address);
else
Printf(
"%p is located to the %s of a %zd-byte global variable in "
- "(%s+0x%x)\n",
- untagged_addr, candidate == left ? "right" : "left", size,
+ "\n #0 0x%x (%s+0x%x)\n",
+ untagged_addr, candidate == left ? "right" : "left", size, mem,
module_name, module_address);
}
Printf("%s", d.Default());
@@ -372,6 +374,12 @@ void PrintAddressDescription(
int num_descriptions_printed = 0;
uptr untagged_addr = UntagAddr(tagged_addr);
+ if (MemIsShadow(untagged_addr)) {
+ Printf("%s%p is HWAsan shadow memory.\n%s", d.Location(), untagged_addr,
+ d.Default());
+ return;
+ }
+
// Print some very basic information about the address, if it's a heap.
HwasanChunkView chunk = FindHeapChunkByAddress(untagged_addr);
if (uptr beg = chunk.Beg()) {
@@ -510,7 +518,7 @@ static void PrintTagInfoAroundAddr(tag_t *tag_ptr, uptr num_rows,
InternalScopedString s;
for (tag_t *row = beg_row; row < end_row; row += row_len) {
s.append("%s", row == center_row_beg ? "=>" : " ");
- s.append("%p:", row);
+ s.append("%p:", (void *)row);
for (uptr i = 0; i < row_len; i++) {
s.append("%s", row + i == tag_ptr ? "[" : " ");
print_tag(s, &row[i]);
@@ -549,28 +557,48 @@ static void PrintTagsAroundAddr(tag_t *tag_ptr) {
"description of short granule tags\n");
}
+uptr GetTopPc(StackTrace *stack) {
+ return stack->size ? StackTrace::GetPreviousInstructionPc(stack->trace[0])
+ : 0;
+}
+
void ReportInvalidFree(StackTrace *stack, uptr tagged_addr) {
ScopedReport R(flags()->halt_on_error);
uptr untagged_addr = UntagAddr(tagged_addr);
tag_t ptr_tag = GetTagFromPointer(tagged_addr);
- tag_t *tag_ptr = reinterpret_cast<tag_t*>(MemToShadow(untagged_addr));
- tag_t mem_tag = *tag_ptr;
+ tag_t *tag_ptr = nullptr;
+ tag_t mem_tag = 0;
+ if (MemIsApp(untagged_addr)) {
+ tag_ptr = reinterpret_cast<tag_t *>(MemToShadow(untagged_addr));
+ if (MemIsShadow(reinterpret_cast<uptr>(tag_ptr)))
+ mem_tag = *tag_ptr;
+ else
+ tag_ptr = nullptr;
+ }
Decorator d;
Printf("%s", d.Error());
- uptr pc = stack->size ? stack->trace[0] : 0;
+ uptr pc = GetTopPc(stack);
const char *bug_type = "invalid-free";
- Report("ERROR: %s: %s on address %p at pc %p\n", SanitizerToolName, bug_type,
- untagged_addr, pc);
+ const Thread *thread = GetCurrentThread();
+ if (thread) {
+ Report("ERROR: %s: %s on address %p at pc %p on thread T%zd\n",
+ SanitizerToolName, bug_type, untagged_addr, pc, thread->unique_id());
+ } else {
+ Report("ERROR: %s: %s on address %p at pc %p on unknown thread\n",
+ SanitizerToolName, bug_type, untagged_addr, pc);
+ }
Printf("%s", d.Access());
- Printf("tags: %02x/%02x (ptr/mem)\n", ptr_tag, mem_tag);
+ if (tag_ptr)
+ Printf("tags: %02x/%02x (ptr/mem)\n", ptr_tag, mem_tag);
Printf("%s", d.Default());
stack->Print();
PrintAddressDescription(tagged_addr, 0, nullptr);
- PrintTagsAroundAddr(tag_ptr);
+ if (tag_ptr)
+ PrintTagsAroundAddr(tag_ptr);
ReportErrorSummary(bug_type, stack);
}
@@ -578,6 +606,15 @@ void ReportInvalidFree(StackTrace *stack, uptr tagged_addr) {
void ReportTailOverwritten(StackTrace *stack, uptr tagged_addr, uptr orig_size,
const u8 *expected) {
uptr tail_size = kShadowAlignment - (orig_size % kShadowAlignment);
+ u8 actual_expected[kShadowAlignment];
+ internal_memcpy(actual_expected, expected, tail_size);
+ tag_t ptr_tag = GetTagFromPointer(tagged_addr);
+ // Short granule is stashed in the last byte of the magic string. To avoid
+ // confusion, make the expected magic string contain the short granule tag.
+ if (orig_size % kShadowAlignment != 0) {
+ actual_expected[tail_size - 1] = ptr_tag;
+ }
+
ScopedReport R(flags()->halt_on_error);
Decorator d;
uptr untagged_addr = UntagAddr(tagged_addr);
@@ -614,14 +651,13 @@ void ReportTailOverwritten(StackTrace *stack, uptr tagged_addr, uptr orig_size,
s.append("Expected: ");
for (uptr i = 0; i < kShadowAlignment - tail_size; i++)
s.append(".. ");
- for (uptr i = 0; i < tail_size; i++)
- s.append("%02x ", expected[i]);
+ for (uptr i = 0; i < tail_size; i++) s.append("%02x ", actual_expected[i]);
s.append("\n");
s.append(" ");
for (uptr i = 0; i < kShadowAlignment - tail_size; i++)
s.append(" ");
for (uptr i = 0; i < tail_size; i++)
- s.append("%s ", expected[i] != tail[i] ? "^^" : " ");
+ s.append("%s ", actual_expected[i] != tail[i] ? "^^" : " ");
s.append("\nThis error occurs when a buffer overflow overwrites memory\n"
"to the right of a heap object, but within the %zd-byte granule, e.g.\n"
@@ -647,11 +683,11 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size,
GetCurrentThread()->stack_allocations());
Decorator d;
- Printf("%s", d.Error());
uptr untagged_addr = UntagAddr(tagged_addr);
// TODO: when possible, try to print heap-use-after-free, etc.
const char *bug_type = "tag-mismatch";
- uptr pc = stack->size ? stack->trace[0] : 0;
+ uptr pc = GetTopPc(stack);
+ Printf("%s", d.Error());
Report("ERROR: %s: %s on address %p at pc %p\n", SanitizerToolName, bug_type,
untagged_addr, pc);
@@ -666,12 +702,33 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size,
tag_t mem_tag = *tag_ptr;
Printf("%s", d.Access());
- Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n",
- is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
- mem_tag, t->unique_id());
+ if (mem_tag && mem_tag < kShadowAlignment) {
+ tag_t *granule_ptr = reinterpret_cast<tag_t *>((untagged_addr + offset) &
+ ~(kShadowAlignment - 1));
+ // If offset is 0, (untagged_addr + offset) is not aligned to granules.
+ // This is the offset of the leftmost accessed byte within the bad granule.
+ u8 in_granule_offset = (untagged_addr + offset) & (kShadowAlignment - 1);
+ tag_t short_tag = granule_ptr[kShadowAlignment - 1];
+ // The first mismatch was a short granule that matched the ptr_tag.
+ if (short_tag == ptr_tag) {
+ // If the access starts after the end of the short granule, then the first
+ // bad byte is the first byte of the access; otherwise it is the first
+ // byte past the end of the short granule
+ if (mem_tag > in_granule_offset) {
+ offset += mem_tag - in_granule_offset;
+ }
+ }
+ Printf(
+ "%s of size %zu at %p tags: %02x/%02x(%02x) (ptr/mem) in thread T%zd\n",
+ is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
+ mem_tag, short_tag, t->unique_id());
+ } else {
+ Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n",
+ is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag,
+ mem_tag, t->unique_id());
+ }
if (offset != 0)
- Printf("Invalid access starting at offset [%zu, %zu)\n", offset,
- Min(access_size, static_cast<uptr>(offset) + (1 << kShadowScale)));
+ Printf("Invalid access starting at offset %zu\n", offset);
Printf("%s", d.Default());
stack->Print();
diff --git a/compiler-rt/lib/hwasan/hwasan_setjmp.S b/compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S
index 381af63363cc..744748a5101f 100644
--- a/compiler-rt/lib/hwasan/hwasan_setjmp.S
+++ b/compiler-rt/lib/hwasan/hwasan_setjmp_aarch64.S
@@ -1,4 +1,4 @@
-//===-- hwasan_setjmp.S --------------------------------------------------------===//
+//===-- hwasan_setjmp_aarch64.S -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -29,7 +29,7 @@
// Hence we have to write this function in assembly.
.section .text
-.file "hwasan_setjmp.S"
+.file "hwasan_setjmp_aarch64.S"
.global __interceptor_setjmp
ASM_TYPE_FUNCTION(__interceptor_setjmp)
@@ -80,24 +80,19 @@ __interceptor_sigsetjmp:
ASM_SIZE(__interceptor_sigsetjmp)
-.macro ALIAS first second
- .globl \second
+.macro WEAK_ALIAS first second
+ .weak \second
.equ \second\(), \first
.endm
#if SANITIZER_ANDROID
-ALIAS __interceptor_sigsetjmp, sigsetjmp
-.weak sigsetjmp
-
-ALIAS __interceptor_setjmp_bionic, setjmp
-.weak setjmp
+WEAK_ALIAS __interceptor_sigsetjmp, sigsetjmp
+WEAK_ALIAS __interceptor_setjmp_bionic, setjmp
#else
-ALIAS __interceptor_sigsetjmp, __sigsetjmp
-.weak __sigsetjmp
+WEAK_ALIAS __interceptor_sigsetjmp, __sigsetjmp
#endif
-ALIAS __interceptor_setjmp, _setjmp
-.weak _setjmp
+WEAK_ALIAS __interceptor_setjmp, _setjmp
#endif
// We do not need executable stack.
diff --git a/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S b/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S
new file mode 100644
index 000000000000..7566c1ea0a57
--- /dev/null
+++ b/compiler-rt/lib/hwasan/hwasan_setjmp_x86_64.S
@@ -0,0 +1,82 @@
+//===-- hwasan_setjmp_x86_64.S --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// setjmp interceptor for x86_64.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+#if HWASAN_WITH_INTERCEPTORS && defined(__x86_64__)
+#include "sanitizer_common/sanitizer_platform.h"
+
+// We want to save the context of the calling function.
+// That requires
+// 1) No modification of the return address by this function.
+// 2) No modification of the stack pointer by this function.
+// 3) (no modification of any other saved register, but that's not really going
+// to occur, and hence isn't as much of a worry).
+//
+// There's essentially no way to ensure that the compiler will not modify the
+// stack pointer when compiling a C function.
+// Hence we have to write this function in assembly.
+//
+// TODO: Handle Intel CET.
+
+.section .text
+.file "hwasan_setjmp_x86_64.S"
+
+.global __interceptor_setjmp
+ASM_TYPE_FUNCTION(__interceptor_setjmp)
+__interceptor_setjmp:
+ CFI_STARTPROC
+ _CET_ENDBR
+ xorl %esi, %esi
+ jmp __interceptor_sigsetjmp
+ CFI_ENDPROC
+ASM_SIZE(__interceptor_setjmp)
+
+.global __interceptor_sigsetjmp
+ASM_TYPE_FUNCTION(__interceptor_sigsetjmp)
+__interceptor_sigsetjmp:
+ CFI_STARTPROC
+ _CET_ENDBR
+
+ // Save callee save registers.
+ mov %rbx, (0*8)(%rdi)
+ mov %rbp, (1*8)(%rdi)
+ mov %r12, (2*8)(%rdi)
+ mov %r13, (3*8)(%rdi)
+ mov %r14, (4*8)(%rdi)
+ mov %r15, (5*8)(%rdi)
+
+ // Save SP as it was in caller's frame.
+ lea 8(%rsp), %rdx
+ mov %rdx, (6*8)(%rdi)
+
+ // Save return address.
+ mov (%rsp), %rax
+ mov %rax, (7*8)(%rdi)
+
+ jmp __sigjmp_save
+
+ CFI_ENDPROC
+ASM_SIZE(__interceptor_sigsetjmp)
+
+
+.macro WEAK_ALIAS first second
+ .weak \second
+ .equ \second\(), \first
+.endm
+
+WEAK_ALIAS __interceptor_sigsetjmp, __sigsetjmp
+WEAK_ALIAS __interceptor_setjmp, _setjmp
+#endif
+
+// We do not need executable stack.
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/hwasan/hwasan_thread.cpp b/compiler-rt/lib/hwasan/hwasan_thread.cpp
index ee747a3beea5..c776ae179cec 100644
--- a/compiler-rt/lib/hwasan/hwasan_thread.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_thread.cpp
@@ -1,15 +1,15 @@
+#include "hwasan_thread.h"
+
#include "hwasan.h"
+#include "hwasan_interface_internal.h"
#include "hwasan_mapping.h"
-#include "hwasan_thread.h"
#include "hwasan_poisoning.h"
-#include "hwasan_interface_internal.h"
-
+#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_file.h"
#include "sanitizer_common/sanitizer_placement_new.h"
#include "sanitizer_common/sanitizer_tls_get_addr.h"
-
namespace __hwasan {
static u32 RandomSeed() {
@@ -27,6 +27,7 @@ static u32 RandomSeed() {
void Thread::InitRandomState() {
random_state_ = flags()->random_tags ? RandomSeed() : unique_id_;
+ random_state_inited_ = true;
// Push a random number of zeros onto the ring buffer so that the first stack
// tag base will be random.
@@ -40,18 +41,19 @@ void Thread::Init(uptr stack_buffer_start, uptr stack_buffer_size,
CHECK_EQ(0, stack_top_);
CHECK_EQ(0, stack_bottom_);
- static u64 unique_id;
- unique_id_ = unique_id++;
+ static atomic_uint64_t unique_id;
+ unique_id_ = atomic_fetch_add(&unique_id, 1, memory_order_relaxed);
+
if (auto sz = flags()->heap_history_size)
heap_allocations_ = HeapAllocationsRingBuffer::New(sz);
- InitStackAndTls(state);
#if !SANITIZER_FUCHSIA
// Do not initialize the stack ring buffer just yet on Fuchsia. Threads will
// be initialized before we enter the thread itself, so we will instead call
// this later.
InitStackRingBuffer(stack_buffer_start, stack_buffer_size);
#endif
+ InitStackAndTls(state);
}
void Thread::InitStackRingBuffer(uptr stack_buffer_start,
@@ -108,10 +110,9 @@ void Thread::Destroy() {
}
void Thread::Print(const char *Prefix) {
- Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix,
- unique_id_, this, stack_bottom(), stack_top(),
- stack_top() - stack_bottom(),
- tls_begin(), tls_end());
+ Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix, unique_id_,
+ (void *)this, stack_bottom(), stack_top(),
+ stack_top() - stack_bottom(), tls_begin(), tls_end());
}
static u32 xorshift(u32 state) {
@@ -124,17 +125,21 @@ static u32 xorshift(u32 state) {
// Generate a (pseudo-)random non-zero tag.
tag_t Thread::GenerateRandomTag(uptr num_bits) {
DCHECK_GT(num_bits, 0);
- if (tagging_disabled_) return 0;
+ if (tagging_disabled_)
+ return 0;
tag_t tag;
const uptr tag_mask = (1ULL << num_bits) - 1;
do {
if (flags()->random_tags) {
- if (!random_buffer_)
+ if (!random_buffer_) {
+ EnsureRandomStateInited();
random_buffer_ = random_state_ = xorshift(random_state_);
+ }
CHECK(random_buffer_);
tag = random_buffer_ & tag_mask;
random_buffer_ >>= num_bits;
} else {
+ EnsureRandomStateInited();
random_state_ += 1;
tag = random_state_ & tag_mask;
}
diff --git a/compiler-rt/lib/hwasan/hwasan_thread.h b/compiler-rt/lib/hwasan/hwasan_thread.h
index 9f20afe1dc76..3db7c1a9454f 100644
--- a/compiler-rt/lib/hwasan/hwasan_thread.h
+++ b/compiler-rt/lib/hwasan/hwasan_thread.h
@@ -28,12 +28,17 @@ class Thread {
void Init(uptr stack_buffer_start, uptr stack_buffer_size,
const InitState *state = nullptr);
- void InitRandomState();
+
void InitStackAndTls(const InitState *state = nullptr);
// Must be called from the thread itself.
void InitStackRingBuffer(uptr stack_buffer_start, uptr stack_buffer_size);
+ inline void EnsureRandomStateInited() {
+ if (UNLIKELY(!random_state_inited_))
+ InitRandomState();
+ }
+
void Destroy();
uptr stack_top() { return stack_top_; }
@@ -70,6 +75,7 @@ class Thread {
// via mmap() and *must* be valid in zero-initialized state.
void ClearShadowForThreadStackAndTLS();
void Print(const char *prefix);
+ void InitRandomState();
uptr vfork_spill_;
uptr stack_top_;
uptr stack_bottom_;
@@ -89,6 +95,8 @@ class Thread {
bool announced_;
+ bool random_state_inited_; // Whether InitRandomState() has been called.
+
friend struct ThreadListHead;
};
diff --git a/compiler-rt/lib/hwasan/hwasan_type_test.cpp b/compiler-rt/lib/hwasan/hwasan_type_test.cpp
index 8cff495bae15..5307073fb40b 100644
--- a/compiler-rt/lib/hwasan/hwasan_type_test.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_type_test.cpp
@@ -19,7 +19,7 @@
#define CHECK_TYPE_SIZE_FITS(TYPE) \
COMPILER_CHECK(sizeof(__hw_##TYPE) <= sizeof(TYPE))
-#if HWASAN_WITH_INTERCEPTORS && defined(__aarch64__)
+#if HWASAN_WITH_INTERCEPTORS
CHECK_TYPE_SIZE_FITS(jmp_buf);
CHECK_TYPE_SIZE_FITS(sigjmp_buf);
#endif
diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 98bc756ae53a..38b8c058246a 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -56,7 +56,7 @@
// tramp: jmp QWORD [addr]
// addr: .bytes <hook>
//
-// Note: <real> is equilavent to <label>.
+// Note: <real> is equivalent to <label>.
//
// 3) HotPatch
//
@@ -398,8 +398,42 @@ static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) {
return allocated_space;
}
+// The following prologues cannot be patched because of the short jump
+// jumping to the patching region.
+
+// ntdll!wcslen in Win11
+// 488bc1 mov rax,rcx
+// 0fb710 movzx edx,word ptr [rax]
+// 4883c002 add rax,2
+// 6685d2 test dx,dx
+// 75f4 jne -12
+static const u8 kPrologueWithShortJump1[] = {
+ 0x48, 0x8b, 0xc1, 0x0f, 0xb7, 0x10, 0x48, 0x83,
+ 0xc0, 0x02, 0x66, 0x85, 0xd2, 0x75, 0xf4,
+};
+
+// ntdll!strrchr in Win11
+// 4c8bc1 mov r8,rcx
+// 8a01 mov al,byte ptr [rcx]
+// 48ffc1 inc rcx
+// 84c0 test al,al
+// 75f7 jne -9
+static const u8 kPrologueWithShortJump2[] = {
+ 0x4c, 0x8b, 0xc1, 0x8a, 0x01, 0x48, 0xff, 0xc1,
+ 0x84, 0xc0, 0x75, 0xf7,
+};
+
// Returns 0 on error.
static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
+#if SANITIZER_WINDOWS64
+ if (memcmp((u8*)address, kPrologueWithShortJump1,
+ sizeof(kPrologueWithShortJump1)) == 0 ||
+ memcmp((u8*)address, kPrologueWithShortJump2,
+ sizeof(kPrologueWithShortJump2)) == 0) {
+ return 0;
+ }
+#endif
+
switch (*(u64*)address) {
case 0x90909090909006EB: // stub: jmp over 6 x nop.
return 8;
@@ -477,6 +511,14 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
case 0xA1: // A1 XX XX XX XX XX XX XX XX :
// movabs eax, dword ptr ds:[XXXXXXXX]
return 9;
+
+ case 0x83:
+ const u8 next_byte = *(u8*)(address + 1);
+ const u8 mod = next_byte >> 6;
+ const u8 rm = next_byte & 7;
+ if (mod == 1 && rm == 4)
+ return 5; // 83 ModR/M SIB Disp8 Imm8
+ // add|or|adc|sbb|and|sub|xor|cmp [r+disp8], imm8
}
switch (*(u16*)address) {
@@ -493,6 +535,8 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
case 0x5641: // push r14
case 0x5741: // push r15
case 0x9066: // Two-byte NOP
+ case 0xc084: // test al, al
+ case 0x018a: // mov al, byte ptr [rcx]
return 2;
case 0x058B: // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
@@ -509,6 +553,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
case 0xd12b48: // 48 2b d1 : sub rdx, rcx
case 0x07c1f6: // f6 c1 07 : test cl, 0x7
case 0xc98548: // 48 85 C9 : test rcx, rcx
+ case 0xd28548: // 48 85 d2 : test rdx, rdx
case 0xc0854d: // 4d 85 c0 : test r8, r8
case 0xc2b60f: // 0f b6 c2 : movzx eax, dl
case 0xc03345: // 45 33 c0 : xor r8d, r8d
@@ -522,6 +567,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
case 0xca2b48: // 48 2b ca : sub rcx, rdx
case 0x10b70f: // 0f b7 10 : movzx edx, WORD PTR [rax]
case 0xc00b4d: // 3d 0b c0 : or r8, r8
+ case 0xc08b41: // 41 8b c0 : mov eax, r8d
case 0xd18b48: // 48 8b d1 : mov rdx, rcx
case 0xdc8b4c: // 4c 8b dc : mov r11, rsp
case 0xd18b4c: // 4c 8b d1 : mov r10, rcx
diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h
index 9d763789154f..45c6ac406f8a 100644
--- a/compiler-rt/lib/lsan/lsan_allocator.h
+++ b/compiler-rt/lib/lsan/lsan_allocator.h
@@ -50,7 +50,7 @@ struct ChunkMetadata {
};
#if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \
- defined(__arm__) || SANITIZER_RISCV64
+ defined(__arm__) || SANITIZER_RISCV64 || defined(__hexagon__)
template <typename AddressSpaceViewTy>
struct AP32 {
static const uptr kSpaceBeg = 0;
diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp
index 74400d2e8426..308dbb3e41da 100644
--- a/compiler-rt/lib/lsan/lsan_common.cpp
+++ b/compiler-rt/lib/lsan/lsan_common.cpp
@@ -30,7 +30,7 @@ namespace __lsan {
// This mutex is used to prevent races between DoLeakCheck and IgnoreObject, and
// also to protect the global list of root regions.
-BlockingMutex global_mutex(LINKER_INITIALIZED);
+Mutex global_mutex;
Flags lsan_flags;
@@ -79,7 +79,8 @@ class LeakSuppressionContext {
int suppression_types_num)
: context(supprression_types, suppression_types_num) {}
- Suppression *GetSuppressionForStack(u32 stack_trace_id);
+ Suppression *GetSuppressionForStack(u32 stack_trace_id,
+ const StackTrace &stack);
const InternalMmapVector<u32> &GetSortedSuppressedStacks() {
if (!suppressed_stacks_sorted) {
@@ -130,18 +131,13 @@ static LeakSuppressionContext *GetSuppressionContext() {
return suppression_ctx;
}
-static InternalMmapVector<RootRegion> *root_regions;
+static InternalMmapVectorNoCtor<RootRegion> root_regions;
-InternalMmapVector<RootRegion> const *GetRootRegions() { return root_regions; }
-
-void InitializeRootRegions() {
- CHECK(!root_regions);
- ALIGNED(64) static char placeholder[sizeof(InternalMmapVector<RootRegion>)];
- root_regions = new (placeholder) InternalMmapVector<RootRegion>();
+InternalMmapVectorNoCtor<RootRegion> const *GetRootRegions() {
+ return &root_regions;
}
void InitCommonLsan() {
- InitializeRootRegions();
if (common_flags()->detect_leaks) {
// Initialization which can fail or print warnings should only be done if
// LSan is actually enabled.
@@ -187,7 +183,8 @@ void ScanRangeForPointers(uptr begin, uptr end,
const char *region_type, ChunkTag tag) {
CHECK(tag == kReachable || tag == kIndirectlyLeaked);
const uptr alignment = flags()->pointer_alignment();
- LOG_POINTERS("Scanning %s range %p-%p.\n", region_type, begin, end);
+ LOG_POINTERS("Scanning %s range %p-%p.\n", region_type, (void *)begin,
+ (void *)end);
uptr pp = begin;
if (pp % alignment)
pp = pp + alignment - pp % alignment;
@@ -206,13 +203,15 @@ void ScanRangeForPointers(uptr begin, uptr end,
LOG_POINTERS(
"%p is poisoned: ignoring %p pointing into chunk %p-%p of size "
"%zu.\n",
- pp, p, chunk, chunk + m.requested_size(), m.requested_size());
+ (void *)pp, p, (void *)chunk, (void *)(chunk + m.requested_size()),
+ m.requested_size());
continue;
}
m.set_tag(tag);
- LOG_POINTERS("%p: found %p pointing into chunk %p-%p of size %zu.\n", pp, p,
- chunk, chunk + m.requested_size(), m.requested_size());
+ LOG_POINTERS("%p: found %p pointing into chunk %p-%p of size %zu.\n",
+ (void *)pp, p, (void *)chunk,
+ (void *)(chunk + m.requested_size()), m.requested_size());
if (frontier)
frontier->push_back(chunk);
}
@@ -280,7 +279,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
InternalMmapVector<uptr> registers;
for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) {
tid_t os_id = static_cast<tid_t>(suspended_threads.GetThreadID(i));
- LOG_THREADS("Processing thread %d.\n", os_id);
+ LOG_THREADS("Processing thread %llu.\n", os_id);
uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end;
DTLS *dtls;
bool thread_found = GetThreadRangesLocked(os_id, &stack_begin, &stack_end,
@@ -289,14 +288,14 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
if (!thread_found) {
// If a thread can't be found in the thread registry, it's probably in the
// process of destruction. Log this event and move on.
- LOG_THREADS("Thread %d not found in registry.\n", os_id);
+ LOG_THREADS("Thread %llu not found in registry.\n", os_id);
continue;
}
uptr sp;
PtraceRegistersStatus have_registers =
suspended_threads.GetRegistersAndSP(i, &registers, &sp);
if (have_registers != REGISTERS_AVAILABLE) {
- Report("Unable to get registers from thread %d.\n", os_id);
+ Report("Unable to get registers from thread %llu.\n", os_id);
// If unable to get SP, consider the entire stack to be reachable unless
// GetRegistersAndSP failed with ESRCH.
if (have_registers == REGISTERS_UNAVAILABLE_FATAL) continue;
@@ -312,7 +311,8 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
}
if (flags()->use_stacks) {
- LOG_THREADS("Stack at %p-%p (SP = %p).\n", stack_begin, stack_end, sp);
+ LOG_THREADS("Stack at %p-%p (SP = %p).\n", (void *)stack_begin,
+ (void *)stack_end, (void *)sp);
if (sp < stack_begin || sp >= stack_end) {
// SP is outside the recorded stack range (e.g. the thread is running a
// signal handler on alternate stack, or swapcontext was used).
@@ -326,7 +326,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
stack_begin += page_size;
}
LOG_THREADS("Skipped %d guard page(s) to obtain stack %p-%p.\n",
- skipped, stack_begin, stack_end);
+ skipped, (void *)stack_begin, (void *)stack_end);
} else {
// Shrink the stack range to ignore out-of-scope values.
stack_begin = sp;
@@ -338,7 +338,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
if (flags()->use_tls) {
if (tls_begin) {
- LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end);
+ LOG_THREADS("TLS at %p-%p.\n", (void *)tls_begin, (void *)tls_end);
// If the tls and cache ranges don't overlap, scan full tls range,
// otherwise, only scan the non-overlapping portions
if (cache_begin == cache_end || tls_end < cache_begin ||
@@ -372,7 +372,8 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
uptr dtls_beg = dtv.beg;
uptr dtls_end = dtls_beg + dtv.size;
if (dtls_beg < dtls_end) {
- LOG_THREADS("DTLS %zu at %p-%p.\n", id, dtls_beg, dtls_end);
+ LOG_THREADS("DTLS %d at %p-%p.\n", id, (void *)dtls_beg,
+ (void *)dtls_end);
ScanRangeForPointers(dtls_beg, dtls_end, frontier, "DTLS",
kReachable);
}
@@ -380,7 +381,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
} else {
// We are handling a thread with DTLS under destruction. Log about
// this and continue.
- LOG_THREADS("Thread %d has DTLS under destruction.\n", os_id);
+ LOG_THREADS("Thread %llu has DTLS under destruction.\n", os_id);
}
#endif
}
@@ -398,8 +399,9 @@ void ScanRootRegion(Frontier *frontier, const RootRegion &root_region,
uptr intersection_end = Min(region_end, root_region.begin + root_region.size);
if (intersection_begin >= intersection_end) return;
LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n",
- root_region.begin, root_region.begin + root_region.size,
- region_begin, region_end,
+ (void *)root_region.begin,
+ (void *)(root_region.begin + root_region.size),
+ (void *)region_begin, (void *)region_end,
is_readable ? "readable" : "unreadable");
if (is_readable)
ScanRangeForPointers(intersection_begin, intersection_end, frontier, "ROOT",
@@ -419,10 +421,8 @@ static void ProcessRootRegion(Frontier *frontier,
// Scans root regions for heap pointers.
static void ProcessRootRegions(Frontier *frontier) {
if (!flags()->use_root_regions) return;
- CHECK(root_regions);
- for (uptr i = 0; i < root_regions->size(); i++) {
- ProcessRootRegion(frontier, (*root_regions)[i]);
- }
+ for (uptr i = 0; i < root_regions.size(); i++)
+ ProcessRootRegion(frontier, root_regions[i]);
}
static void FloodFillTag(Frontier *frontier, ChunkTag tag) {
@@ -459,8 +459,8 @@ static void IgnoredSuppressedCb(uptr chunk, void *arg) {
if (idx >= suppressed.size() || m.stack_trace_id() != suppressed[idx])
return;
- LOG_POINTERS("Suppressed: chunk %p-%p of size %zu.\n", chunk,
- chunk + m.requested_size(), m.requested_size());
+ LOG_POINTERS("Suppressed: chunk %p-%p of size %zu.\n", (void *)chunk,
+ (void *)(chunk + m.requested_size()), m.requested_size());
m.set_tag(kIgnored);
}
@@ -471,15 +471,13 @@ static void CollectIgnoredCb(uptr chunk, void *arg) {
chunk = GetUserBegin(chunk);
LsanMetadata m(chunk);
if (m.allocated() && m.tag() == kIgnored) {
- LOG_POINTERS("Ignored: chunk %p-%p of size %zu.\n",
- chunk, chunk + m.requested_size(), m.requested_size());
+ LOG_POINTERS("Ignored: chunk %p-%p of size %zu.\n", (void *)chunk,
+ (void *)(chunk + m.requested_size()), m.requested_size());
reinterpret_cast<Frontier *>(arg)->push_back(chunk);
}
}
-static uptr GetCallerPC(u32 stack_id, StackDepotReverseMap *map) {
- CHECK(stack_id);
- StackTrace stack = map->Get(stack_id);
+static uptr GetCallerPC(const StackTrace &stack) {
// The top frame is our malloc/calloc/etc. The next frame is the caller.
if (stack.size >= 2)
return stack.trace[1];
@@ -488,7 +486,6 @@ static uptr GetCallerPC(u32 stack_id, StackDepotReverseMap *map) {
struct InvalidPCParam {
Frontier *frontier;
- StackDepotReverseMap *stack_depot_reverse_map;
bool skip_linker_allocations;
};
@@ -503,7 +500,7 @@ static void MarkInvalidPCCb(uptr chunk, void *arg) {
u32 stack_id = m.stack_trace_id();
uptr caller_pc = 0;
if (stack_id > 0)
- caller_pc = GetCallerPC(stack_id, param->stack_depot_reverse_map);
+ caller_pc = GetCallerPC(StackDepotGet(stack_id));
// If caller_pc is unknown, this chunk may be allocated in a coroutine. Mark
// it as reachable, as we can't properly report its allocation stack anyway.
if (caller_pc == 0 || (param->skip_linker_allocations &&
@@ -534,11 +531,9 @@ static void MarkInvalidPCCb(uptr chunk, void *arg) {
// which we don't care about).
// On all other platforms, this simply checks to ensure that the caller pc is
// valid before reporting chunks as leaked.
-void ProcessPC(Frontier *frontier) {
- StackDepotReverseMap stack_depot_reverse_map;
+static void ProcessPC(Frontier *frontier) {
InvalidPCParam arg;
arg.frontier = frontier;
- arg.stack_depot_reverse_map = &stack_depot_reverse_map;
arg.skip_linker_allocations =
flags()->use_tls && flags()->use_ld_allocations && GetLinker() != nullptr;
ForEachChunk(MarkInvalidPCCb, &arg);
@@ -584,11 +579,6 @@ static void ResetTagsCb(uptr chunk, void *arg) {
m.set_tag(kDirectlyLeaked);
}
-static void PrintStackTraceById(u32 stack_trace_id) {
- CHECK(stack_trace_id);
- StackDepotGet(stack_trace_id).Print();
-}
-
// ForEachChunk callback. Aggregates information about unreachable chunks into
// a LeakReport.
static void CollectLeaksCb(uptr chunk, void *arg) {
@@ -598,16 +588,7 @@ static void CollectLeaksCb(uptr chunk, void *arg) {
LsanMetadata m(chunk);
if (!m.allocated()) return;
if (m.tag() == kDirectlyLeaked || m.tag() == kIndirectlyLeaked) {
- u32 resolution = flags()->resolution;
- u32 stack_trace_id = 0;
- if (resolution > 0) {
- StackTrace stack = StackDepotGet(m.stack_trace_id());
- stack.size = Min(stack.size, resolution);
- stack_trace_id = StackDepotPut(stack);
- } else {
- stack_trace_id = m.stack_trace_id();
- }
- leak_report->AddLeakedChunk(chunk, stack_trace_id, m.requested_size(),
+ leak_report->AddLeakedChunk(chunk, m.stack_trace_id(), m.requested_size(),
m.tag());
}
}
@@ -635,8 +616,9 @@ static void ReportIfNotSuspended(ThreadContextBase *tctx, void *arg) {
if (tctx->status == ThreadStatusRunning) {
uptr i = InternalLowerBound(suspended_threads, tctx->os_id);
if (i >= suspended_threads.size() || suspended_threads[i] != tctx->os_id)
- Report("Running thread %d was not suspended. False leaks are possible.\n",
- tctx->os_id);
+ Report(
+ "Running thread %llu was not suspended. False leaks are possible.\n",
+ tctx->os_id);
}
}
@@ -742,7 +724,7 @@ static bool has_reported_leaks = false;
bool HasReportedLeaks() { return has_reported_leaks; }
void DoLeakCheck() {
- BlockingMutexLock l(&global_mutex);
+ Lock l(&global_mutex);
static bool already_done;
if (already_done) return;
already_done = true;
@@ -751,7 +733,7 @@ void DoLeakCheck() {
}
static int DoRecoverableLeakCheck() {
- BlockingMutexLock l(&global_mutex);
+ Lock l(&global_mutex);
bool have_leaks = CheckForLeaks();
return have_leaks ? 1 : 0;
}
@@ -780,9 +762,8 @@ Suppression *LeakSuppressionContext::GetSuppressionForAddr(uptr addr) {
}
Suppression *LeakSuppressionContext::GetSuppressionForStack(
- u32 stack_trace_id) {
+ u32 stack_trace_id, const StackTrace &stack) {
LazyInit();
- StackTrace stack = StackDepotGet(stack_trace_id);
for (uptr i = 0; i < stack.size; i++) {
Suppression *s = GetSuppressionForAddr(
StackTrace::GetPreviousInstructionPc(stack.trace[i]));
@@ -807,6 +788,13 @@ const uptr kMaxLeaksConsidered = 5000;
void LeakReport::AddLeakedChunk(uptr chunk, u32 stack_trace_id,
uptr leaked_size, ChunkTag tag) {
CHECK(tag == kDirectlyLeaked || tag == kIndirectlyLeaked);
+
+ if (u32 resolution = flags()->resolution) {
+ StackTrace stack = StackDepotGet(stack_trace_id);
+ stack.size = Min(stack.size, resolution);
+ stack_trace_id = StackDepotPut(stack);
+ }
+
bool is_directly_leaked = (tag == kDirectlyLeaked);
uptr i;
for (i = 0; i < leaks_.size(); i++) {
@@ -869,7 +857,8 @@ void LeakReport::PrintReportForLeak(uptr index) {
leaks_[index].total_size, leaks_[index].hit_count);
Printf("%s", d.Default());
- PrintStackTraceById(leaks_[index].stack_trace_id);
+ CHECK(leaks_[index].stack_trace_id);
+ StackDepotGet(leaks_[index].stack_trace_id).Print();
if (flags()->report_objects) {
Printf("Objects leaked above:\n");
@@ -882,7 +871,7 @@ void LeakReport::PrintLeakedObjectsForLeak(uptr index) {
u32 leak_id = leaks_[index].id;
for (uptr j = 0; j < leaked_objects_.size(); j++) {
if (leaked_objects_[j].leak_id == leak_id)
- Printf("%p (%zu bytes)\n", leaked_objects_[j].addr,
+ Printf("%p (%zu bytes)\n", (void *)leaked_objects_[j].addr,
leaked_objects_[j].size);
}
}
@@ -905,8 +894,8 @@ uptr LeakReport::ApplySuppressions() {
LeakSuppressionContext *suppressions = GetSuppressionContext();
uptr new_suppressions = false;
for (uptr i = 0; i < leaks_.size(); i++) {
- Suppression *s =
- suppressions->GetSuppressionForStack(leaks_[i].stack_trace_id);
+ Suppression *s = suppressions->GetSuppressionForStack(
+ leaks_[i].stack_trace_id, StackDepotGet(leaks_[i].stack_trace_id));
if (s) {
s->weight += leaks_[i].total_size;
atomic_store_relaxed(&s->hit_count, atomic_load_relaxed(&s->hit_count) +
@@ -954,7 +943,7 @@ void __lsan_ignore_object(const void *p) {
return;
// Cannot use PointsIntoChunk or LsanMetadata here, since the allocator is not
// locked.
- BlockingMutexLock l(&global_mutex);
+ Lock l(&global_mutex);
IgnoreObjectResult res = IgnoreObjectLocked(p);
if (res == kIgnoreObjectInvalid)
VReport(1, "__lsan_ignore_object(): no heap object found at %p", p);
@@ -969,34 +958,32 @@ void __lsan_ignore_object(const void *p) {
SANITIZER_INTERFACE_ATTRIBUTE
void __lsan_register_root_region(const void *begin, uptr size) {
#if CAN_SANITIZE_LEAKS
- BlockingMutexLock l(&global_mutex);
- CHECK(root_regions);
+ Lock l(&global_mutex);
RootRegion region = {reinterpret_cast<uptr>(begin), size};
- root_regions->push_back(region);
- VReport(1, "Registered root region at %p of size %llu\n", begin, size);
+ root_regions.push_back(region);
+ VReport(1, "Registered root region at %p of size %zu\n", begin, size);
#endif // CAN_SANITIZE_LEAKS
}
SANITIZER_INTERFACE_ATTRIBUTE
void __lsan_unregister_root_region(const void *begin, uptr size) {
#if CAN_SANITIZE_LEAKS
- BlockingMutexLock l(&global_mutex);
- CHECK(root_regions);
+ Lock l(&global_mutex);
bool removed = false;
- for (uptr i = 0; i < root_regions->size(); i++) {
- RootRegion region = (*root_regions)[i];
+ for (uptr i = 0; i < root_regions.size(); i++) {
+ RootRegion region = root_regions[i];
if (region.begin == reinterpret_cast<uptr>(begin) && region.size == size) {
removed = true;
- uptr last_index = root_regions->size() - 1;
- (*root_regions)[i] = (*root_regions)[last_index];
- root_regions->pop_back();
- VReport(1, "Unregistered root region at %p of size %llu\n", begin, size);
+ uptr last_index = root_regions.size() - 1;
+ root_regions[i] = root_regions[last_index];
+ root_regions.pop_back();
+ VReport(1, "Unregistered root region at %p of size %zu\n", begin, size);
break;
}
}
if (!removed) {
Report(
- "__lsan_unregister_root_region(): region at %p of size %llu has not "
+ "__lsan_unregister_root_region(): region at %p of size %zu has not "
"been registered.\n",
begin, size);
Die();
diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h
index 776ca60b1e97..f9b55e4e8006 100644
--- a/compiler-rt/lib/lsan/lsan_common.h
+++ b/compiler-rt/lib/lsan/lsan_common.h
@@ -18,6 +18,7 @@
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_platform.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
#include "sanitizer_common/sanitizer_stoptheworld.h"
#include "sanitizer_common/sanitizer_symbolizer.h"
@@ -139,7 +140,7 @@ struct CheckForLeaksParam {
bool success = false;
};
-InternalMmapVector<RootRegion> const *GetRootRegions();
+InternalMmapVectorNoCtor<RootRegion> const *GetRootRegions();
void ScanRootRegion(Frontier *frontier, RootRegion const &region,
uptr region_begin, uptr region_end, bool is_readable);
void ForEachExtraStackRangeCb(uptr begin, uptr end, void* arg);
@@ -279,6 +280,13 @@ int __lsan_is_turned_off();
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
const char *__lsan_default_suppressions();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __lsan_register_root_region(const void *p, __lsan::uptr size);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __lsan_unregister_root_region(const void *p, __lsan::uptr size);
+
} // extern "C"
#endif // LSAN_COMMON_H
diff --git a/compiler-rt/lib/lsan/lsan_common_mac.cpp b/compiler-rt/lib/lsan/lsan_common_mac.cpp
index 8516a176eb46..4301dcc615d7 100644
--- a/compiler-rt/lib/lsan/lsan_common_mac.cpp
+++ b/compiler-rt/lib/lsan/lsan_common_mac.cpp
@@ -149,7 +149,7 @@ void ProcessPlatformSpecificAllocations(Frontier *frontier) {
kern_return_t err = KERN_SUCCESS;
mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
- InternalMmapVector<RootRegion> const *root_regions = GetRootRegions();
+ InternalMmapVectorNoCtor<RootRegion> const *root_regions = GetRootRegions();
while (err == KERN_SUCCESS) {
struct vm_region_submap_info_64 info;
diff --git a/compiler-rt/lib/lsan/lsan_fuchsia.cpp b/compiler-rt/lib/lsan/lsan_fuchsia.cpp
index 40e65c6fb729..cb9f62a0341c 100644
--- a/compiler-rt/lib/lsan/lsan_fuchsia.cpp
+++ b/compiler-rt/lib/lsan/lsan_fuchsia.cpp
@@ -62,7 +62,7 @@ void InitializeMainThread() {
OnCreatedArgs args;
__sanitizer::GetThreadStackTopAndBottom(true, &args.stack_end,
&args.stack_begin);
- u32 tid = ThreadCreate(0, GetThreadSelf(), true, &args);
+ u32 tid = ThreadCreate(kMainTid, true, &args);
CHECK_EQ(tid, 0);
ThreadStart(tid);
}
@@ -86,14 +86,13 @@ void GetAllThreadAllocatorCachesLocked(InternalMmapVector<uptr> *caches) {
void *__sanitizer_before_thread_create_hook(thrd_t thread, bool detached,
const char *name, void *stack_base,
size_t stack_size) {
- uptr user_id = reinterpret_cast<uptr>(thread);
ENSURE_LSAN_INITED;
EnsureMainThreadIDIsCorrect();
OnCreatedArgs args;
args.stack_begin = reinterpret_cast<uptr>(stack_base);
args.stack_end = args.stack_begin + stack_size;
u32 parent_tid = GetCurrentThread();
- u32 tid = ThreadCreate(parent_tid, user_id, detached, &args);
+ u32 tid = ThreadCreate(parent_tid, detached, &args);
return reinterpret_cast<void *>(static_cast<uptr>(tid));
}
diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp
index 90a90a56c54c..ee723f210c9d 100644
--- a/compiler-rt/lib/lsan/lsan_interceptors.cpp
+++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp
@@ -13,6 +13,7 @@
#include "interception/interception.h"
#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
#include "sanitizer_common/sanitizer_allocator_report.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
@@ -43,6 +44,22 @@ int pthread_key_create(unsigned *key, void (*destructor)(void* v));
int pthread_setspecific(unsigned key, const void *v);
}
+struct DlsymAlloc : DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return lsan_init_is_running; }
+ static void OnAllocate(const void *ptr, uptr size) {
+#if CAN_SANITIZE_LEAKS
+ // Suppress leaks from dlerror(). Previously dlsym hack on global array was
+ // used by leak sanitizer as a root region.
+ __lsan_register_root_region(ptr, size);
+#endif
+ }
+ static void OnFree(const void *ptr, uptr size) {
+#if CAN_SANITIZE_LEAKS
+ __lsan_unregister_root_region(ptr, size);
+#endif
+ }
+};
+
///// Malloc/free interceptors. /////
namespace std {
@@ -52,41 +69,34 @@ namespace std {
#if !SANITIZER_MAC
INTERCEPTOR(void*, malloc, uptr size) {
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Allocate(size);
ENSURE_LSAN_INITED;
GET_STACK_TRACE_MALLOC;
return lsan_malloc(size, stack);
}
INTERCEPTOR(void, free, void *p) {
+ if (DlsymAlloc::PointerIsMine(p))
+ return DlsymAlloc::Free(p);
ENSURE_LSAN_INITED;
lsan_free(p);
}
INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
- // This hack is not required for Fuchsia because there are no dlsym calls
- // involved in setting up interceptors.
-#if !SANITIZER_FUCHSIA
- if (lsan_init_is_running) {
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- const uptr kCallocPoolSize = 1024;
- static uptr calloc_memory_for_dlsym[kCallocPoolSize];
- static uptr allocated;
- uptr size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
- void *mem = (void*)&calloc_memory_for_dlsym[allocated];
- allocated += size_in_words;
- CHECK(allocated < kCallocPoolSize);
- return mem;
- }
-#endif // !SANITIZER_FUCHSIA
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
ENSURE_LSAN_INITED;
GET_STACK_TRACE_MALLOC;
return lsan_calloc(nmemb, size, stack);
}
-INTERCEPTOR(void*, realloc, void *q, uptr size) {
+INTERCEPTOR(void *, realloc, void *ptr, uptr size) {
+ if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Realloc(ptr, size);
ENSURE_LSAN_INITED;
GET_STACK_TRACE_MALLOC;
- return lsan_realloc(q, size, stack);
+ return lsan_realloc(ptr, size, stack);
}
INTERCEPTOR(void*, reallocarray, void *q, uptr nmemb, uptr size) {
@@ -458,8 +468,7 @@ INTERCEPTOR(int, pthread_create, void *th, void *attr,
res = REAL(pthread_create)(th, attr, __lsan_thread_start_func, &p);
}
if (res == 0) {
- int tid = ThreadCreate(GetCurrentThread(), *(uptr *)th,
- IsStateDetached(detached));
+ int tid = ThreadCreate(GetCurrentThread(), IsStateDetached(detached));
CHECK_NE(tid, kMainTid);
atomic_store(&p.tid, tid, memory_order_release);
while (atomic_load(&p.tid, memory_order_acquire) != 0)
@@ -470,24 +479,6 @@ INTERCEPTOR(int, pthread_create, void *th, void *attr,
return res;
}
-INTERCEPTOR(int, pthread_join, void *th, void **ret) {
- ENSURE_LSAN_INITED;
- int tid = ThreadTid((uptr)th);
- int res = REAL(pthread_join)(th, ret);
- if (res == 0)
- ThreadJoin(tid);
- return res;
-}
-
-INTERCEPTOR(int, pthread_detach, void *th) {
- ENSURE_LSAN_INITED;
- int tid = ThreadTid((uptr)th);
- int res = REAL(pthread_detach)(th);
- if (res == 0)
- ThreadDetach(tid);
- return res;
-}
-
INTERCEPTOR(void, _exit, int status) {
if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode;
REAL(_exit)(status);
@@ -520,8 +511,6 @@ void InitializeInterceptors() {
LSAN_MAYBE_INTERCEPT_MALLINFO;
LSAN_MAYBE_INTERCEPT_MALLOPT;
INTERCEPT_FUNCTION(pthread_create);
- INTERCEPT_FUNCTION(pthread_detach);
- INTERCEPT_FUNCTION(pthread_join);
INTERCEPT_FUNCTION(_exit);
LSAN_MAYBE_INTERCEPT__LWP_EXIT;
diff --git a/compiler-rt/lib/lsan/lsan_mac.cpp b/compiler-rt/lib/lsan/lsan_mac.cpp
index b96893e2801b..10a73f8fa93d 100644
--- a/compiler-rt/lib/lsan/lsan_mac.cpp
+++ b/compiler-rt/lib/lsan/lsan_mac.cpp
@@ -68,7 +68,7 @@ typedef struct {
ALWAYS_INLINE
void lsan_register_worker_thread(int parent_tid) {
if (GetCurrentThread() == kInvalidTid) {
- u32 tid = ThreadCreate(parent_tid, 0, true);
+ u32 tid = ThreadCreate(parent_tid, true);
ThreadStart(tid, GetTid());
SetCurrentThread(tid);
}
diff --git a/compiler-rt/lib/lsan/lsan_posix.cpp b/compiler-rt/lib/lsan/lsan_posix.cpp
index 5d1c3f6260dd..77118a29f2ea 100644
--- a/compiler-rt/lib/lsan/lsan_posix.cpp
+++ b/compiler-rt/lib/lsan/lsan_posix.cpp
@@ -75,7 +75,7 @@ bool GetThreadRangesLocked(tid_t os_id, uptr *stack_begin, uptr *stack_end,
}
void InitializeMainThread() {
- u32 tid = ThreadCreate(kMainTid, 0, true);
+ u32 tid = ThreadCreate(kMainTid, true);
CHECK_EQ(tid, kMainTid);
ThreadStart(tid, GetTid());
}
diff --git a/compiler-rt/lib/lsan/lsan_thread.cpp b/compiler-rt/lib/lsan/lsan_thread.cpp
index 1d224ebca693..ca3dfd03b109 100644
--- a/compiler-rt/lib/lsan/lsan_thread.cpp
+++ b/compiler-rt/lib/lsan/lsan_thread.cpp
@@ -44,8 +44,8 @@ void ThreadContextLsanBase::OnFinished() {
DTLS_Destroy();
}
-u32 ThreadCreate(u32 parent_tid, uptr user_id, bool detached, void *arg) {
- return thread_registry->CreateThread(user_id, detached, parent_tid, arg);
+u32 ThreadCreate(u32 parent_tid, bool detached, void *arg) {
+ return thread_registry->CreateThread(0, detached, parent_tid, arg);
}
void ThreadContextLsanBase::ThreadStart(u32 tid, tid_t os_id,
@@ -68,28 +68,6 @@ ThreadContext *CurrentThreadContext() {
return (ThreadContext *)thread_registry->GetThreadLocked(GetCurrentThread());
}
-static bool FindThreadByUid(ThreadContextBase *tctx, void *arg) {
- uptr uid = (uptr)arg;
- if (tctx->user_id == uid && tctx->status != ThreadStatusInvalid) {
- return true;
- }
- return false;
-}
-
-u32 ThreadTid(uptr uid) {
- return thread_registry->FindThread(FindThreadByUid, (void *)uid);
-}
-
-void ThreadDetach(u32 tid) {
- CHECK_NE(tid, kInvalidTid);
- thread_registry->DetachThread(tid, /* arg */ nullptr);
-}
-
-void ThreadJoin(u32 tid) {
- CHECK_NE(tid, kInvalidTid);
- thread_registry->JoinThread(tid, /* arg */ nullptr);
-}
-
void EnsureMainThreadIDIsCorrect() {
if (GetCurrentThread() == kMainTid)
CurrentThreadContext()->os_id = GetTid();
diff --git a/compiler-rt/lib/lsan/lsan_thread.h b/compiler-rt/lib/lsan/lsan_thread.h
index 36643753d019..6ab4172092ae 100644
--- a/compiler-rt/lib/lsan/lsan_thread.h
+++ b/compiler-rt/lib/lsan/lsan_thread.h
@@ -45,11 +45,8 @@ class ThreadContext;
void InitializeThreadRegistry();
void InitializeMainThread();
-u32 ThreadCreate(u32 tid, uptr uid, bool detached, void *arg = nullptr);
+u32 ThreadCreate(u32 tid, bool detached, void *arg = nullptr);
void ThreadFinish();
-void ThreadDetach(u32 tid);
-void ThreadJoin(u32 tid);
-u32 ThreadTid(uptr uid);
u32 GetCurrentThread();
void SetCurrentThread(u32 tid);
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 6f01d4dfcb84..696f64d8c324 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -15,6 +15,9 @@
#include "memprof_allocator.h"
#include "memprof_mapping.h"
+#include "memprof_meminfoblock.h"
+#include "memprof_mibmap.h"
+#include "memprof_rawprofile.h"
#include "memprof_stack.h"
#include "memprof_thread.h"
#include "sanitizer_common/sanitizer_allocator_checks.h"
@@ -25,10 +28,11 @@
#include "sanitizer_common/sanitizer_flags.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_list.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_vector.h"
#include <sched.h>
-#include <stdlib.h>
#include <time.h>
namespace __memprof {
@@ -166,244 +170,6 @@ AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
return &ms->allocator_cache;
}
-struct MemInfoBlock {
- u32 alloc_count;
- u64 total_access_count, min_access_count, max_access_count;
- u64 total_size;
- u32 min_size, max_size;
- u32 alloc_timestamp, dealloc_timestamp;
- u64 total_lifetime;
- u32 min_lifetime, max_lifetime;
- u32 alloc_cpu_id, dealloc_cpu_id;
- u32 num_migrated_cpu;
-
- // Only compared to prior deallocated object currently.
- u32 num_lifetime_overlaps;
- u32 num_same_alloc_cpu;
- u32 num_same_dealloc_cpu;
-
- u64 data_type_id; // TODO: hash of type name
-
- MemInfoBlock() : alloc_count(0) {}
-
- MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp,
- u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu)
- : alloc_count(1), total_access_count(access_count),
- min_access_count(access_count), max_access_count(access_count),
- total_size(size), min_size(size), max_size(size),
- alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
- total_lifetime(dealloc_timestamp - alloc_timestamp),
- min_lifetime(total_lifetime), max_lifetime(total_lifetime),
- alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
- num_lifetime_overlaps(0), num_same_alloc_cpu(0),
- num_same_dealloc_cpu(0) {
- num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
- }
-
- void Print(u64 id) {
- u64 p;
- if (flags()->print_terse) {
- p = total_size * 100 / alloc_count;
- Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100,
- min_size, max_size);
- p = total_access_count * 100 / alloc_count;
- Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count,
- max_access_count);
- p = total_lifetime * 100 / alloc_count;
- Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime);
- Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps,
- num_same_alloc_cpu, num_same_dealloc_cpu);
- } else {
- p = total_size * 100 / alloc_count;
- Printf("Memory allocation stack id = %llu\n", id);
- Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n",
- alloc_count, p / 100, p % 100, min_size, max_size);
- p = total_access_count * 100 / alloc_count;
- Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100,
- p % 100, min_access_count, max_access_count);
- p = total_lifetime * 100 / alloc_count;
- Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100,
- min_lifetime, max_lifetime);
- Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
- "cpu: %u, num same dealloc_cpu: %u\n",
- num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu,
- num_same_dealloc_cpu);
- }
- }
-
- static void printHeader() {
- CHECK(flags()->print_terse);
- Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/"
- "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/"
- "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/"
- "NumSameDeallocCpu\n");
- }
-
- void Merge(MemInfoBlock &newMIB) {
- alloc_count += newMIB.alloc_count;
-
- total_access_count += newMIB.total_access_count;
- min_access_count = Min(min_access_count, newMIB.min_access_count);
- max_access_count = Max(max_access_count, newMIB.max_access_count);
-
- total_size += newMIB.total_size;
- min_size = Min(min_size, newMIB.min_size);
- max_size = Max(max_size, newMIB.max_size);
-
- total_lifetime += newMIB.total_lifetime;
- min_lifetime = Min(min_lifetime, newMIB.min_lifetime);
- max_lifetime = Max(max_lifetime, newMIB.max_lifetime);
-
- // We know newMIB was deallocated later, so just need to check if it was
- // allocated before last one deallocated.
- num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
- alloc_timestamp = newMIB.alloc_timestamp;
- dealloc_timestamp = newMIB.dealloc_timestamp;
-
- num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
- num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
- alloc_cpu_id = newMIB.alloc_cpu_id;
- dealloc_cpu_id = newMIB.dealloc_cpu_id;
- }
-};
-
-static u32 AccessCount = 0;
-static u32 MissCount = 0;
-
-struct SetEntry {
- SetEntry() : id(0), MIB() {}
- bool Empty() { return id == 0; }
- void Print() {
- CHECK(!Empty());
- MIB.Print(id);
- }
- // The stack id
- u64 id;
- MemInfoBlock MIB;
-};
-
-struct CacheSet {
- enum { kSetSize = 4 };
-
- void PrintAll() {
- for (int i = 0; i < kSetSize; i++) {
- if (Entries[i].Empty())
- continue;
- Entries[i].Print();
- }
- }
- void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
- AccessCount++;
- SetAccessCount++;
-
- for (int i = 0; i < kSetSize; i++) {
- auto id = Entries[i].id;
- // Check if this is a hit or an empty entry. Since we always move any
- // filled locations to the front of the array (see below), we don't need
- // to look after finding the first empty entry.
- if (id == new_id || !id) {
- if (id == 0) {
- Entries[i].id = new_id;
- Entries[i].MIB = newMIB;
- } else {
- Entries[i].MIB.Merge(newMIB);
- }
- // Assuming some id locality, we try to swap the matching entry
- // into the first set position.
- if (i != 0) {
- auto tmp = Entries[0];
- Entries[0] = Entries[i];
- Entries[i] = tmp;
- }
- return;
- }
- }
-
- // Miss
- MissCount++;
- SetMissCount++;
-
- // We try to find the entries with the lowest alloc count to be evicted:
- int min_idx = 0;
- u64 min_count = Entries[0].MIB.alloc_count;
- for (int i = 1; i < kSetSize; i++) {
- CHECK(!Entries[i].Empty());
- if (Entries[i].MIB.alloc_count < min_count) {
- min_idx = i;
- min_count = Entries[i].MIB.alloc_count;
- }
- }
-
- // Print the evicted entry profile information
- if (!flags()->print_terse)
- Printf("Evicted:\n");
- Entries[min_idx].Print();
-
- // Similar to the hit case, put new MIB in first set position.
- if (min_idx != 0)
- Entries[min_idx] = Entries[0];
- Entries[0].id = new_id;
- Entries[0].MIB = newMIB;
- }
-
- void PrintMissRate(int i) {
- u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0;
- Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount,
- SetAccessCount, p / 100, p % 100);
- }
-
- SetEntry Entries[kSetSize];
- u32 SetAccessCount = 0;
- u32 SetMissCount = 0;
-};
-
-struct MemInfoBlockCache {
- MemInfoBlockCache() {
- if (common_flags()->print_module_map)
- DumpProcessMap();
- if (flags()->print_terse)
- MemInfoBlock::printHeader();
- Sets =
- (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
- Constructed = true;
- }
-
- ~MemInfoBlockCache() { free(Sets); }
-
- void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
- u64 hv = new_id;
-
- // Use mod method where number of entries should be a prime close to power
- // of 2.
- hv %= flags()->mem_info_cache_entries;
-
- return Sets[hv].insertOrMerge(new_id, newMIB);
- }
-
- void PrintAll() {
- for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
- Sets[i].PrintAll();
- }
- }
-
- void PrintMissRate() {
- if (!flags()->print_mem_info_cache_miss_rate)
- return;
- u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
- Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount,
- p / 100, p % 100);
- if (flags()->print_mem_info_cache_miss_rate_details)
- for (int i = 0; i < flags()->mem_info_cache_entries; i++)
- Sets[i].PrintMissRate(i);
- }
-
- CacheSet *Sets;
- // Flag when the Sets have been allocated, in case a deallocation is called
- // very early before the static init of the Allocator and therefore this table
- // have completed.
- bool Constructed = false;
-};
-
// Accumulates the access count from the shadow for the given pointer and size.
u64 GetShadowCount(uptr p, u32 size) {
u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
@@ -454,24 +220,66 @@ struct Allocator {
uptr max_user_defined_malloc_size;
atomic_uint8_t rss_limit_exceeded;
- MemInfoBlockCache MemInfoBlockTable;
- bool destructing;
+ // Holds the mapping of stack ids to MemInfoBlocks.
+ MIBMapTy MIBMap;
+
+ atomic_uint8_t destructing;
+ atomic_uint8_t constructed;
+ bool print_text;
// ------------------- Initialization ------------------------
- explicit Allocator(LinkerInitialized) : destructing(false) {}
+ explicit Allocator(LinkerInitialized) : print_text(flags()->print_text) {
+ atomic_store_relaxed(&destructing, 0);
+ atomic_store_relaxed(&constructed, 1);
+ }
- ~Allocator() { FinishAndPrint(); }
+ ~Allocator() {
+ atomic_store_relaxed(&destructing, 1);
+ FinishAndWrite();
+ }
+
+ static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value,
+ void *Arg) {
+ SpinMutexLock(&Value->mutex);
+ Value->mib.Print(Key, bool(Arg));
+ }
+
+ void FinishAndWrite() {
+ if (print_text && common_flags()->print_module_map)
+ DumpProcessMap();
- void FinishAndPrint() {
- if (!flags()->print_terse)
- Printf("Live on exit:\n");
allocator.ForceLock();
+
+ InsertLiveBlocks();
+ if (print_text) {
+ MIBMap.ForEach(PrintCallback,
+ reinterpret_cast<void *>(flags()->print_terse));
+ StackDepotPrintAll();
+ } else {
+ // Serialize the contents to a raw profile. Format documented in
+ // memprof_rawprofile.h.
+ char *Buffer = nullptr;
+
+ MemoryMappingLayout Layout(/*cache_enabled=*/true);
+ u64 BytesSerialized = SerializeToRawProfile(MIBMap, Layout, Buffer);
+ CHECK(Buffer && BytesSerialized && "could not serialize to buffer");
+ report_file.Write(Buffer, BytesSerialized);
+ }
+
+ allocator.ForceUnlock();
+ }
+
+ // Inserts any blocks which have been allocated but not yet deallocated.
+ void InsertLiveBlocks() {
+ if (print_text && !flags()->print_terse)
+ Printf("Live on exit:\n");
+
allocator.ForEachChunk(
[](uptr chunk, void *alloc) {
u64 user_requested_size;
+ Allocator *A = (Allocator *)alloc;
MemprofChunk *m =
- ((Allocator *)alloc)
- ->GetMemprofChunk((void *)chunk, user_requested_size);
+ A->GetMemprofChunk((void *)chunk, user_requested_size);
if (!m)
return;
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
@@ -479,16 +287,9 @@ struct Allocator {
long curtime = GetTimestamp();
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
m->cpu_id, GetCpuId());
- ((Allocator *)alloc)
- ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+ InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
},
this);
- allocator.ForceUnlock();
-
- destructing = true;
- MemInfoBlockTable.PrintMissRate();
- MemInfoBlockTable.PrintAll();
- StackDepotPrintAll();
}
void InitLinkerInitialized() {
@@ -541,8 +342,7 @@ struct Allocator {
if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
size > max_user_defined_malloc_size) {
if (AllocatorMayReturnNull()) {
- Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n",
- (void *)size);
+ Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n", size);
return nullptr;
}
uptr malloc_limit =
@@ -621,17 +421,15 @@ struct Allocator {
u64 user_requested_size =
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
- if (memprof_inited && memprof_init_done && !destructing &&
- MemInfoBlockTable.Constructed) {
+ if (memprof_inited && memprof_init_done &&
+ atomic_load_relaxed(&constructed) &&
+ !atomic_load_relaxed(&destructing)) {
u64 c = GetShadowCount(p, user_requested_size);
long curtime = GetTimestamp();
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
m->cpu_id, GetCpuId());
- {
- SpinMutexLock l(&fallback_mutex);
- MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
- }
+ InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
}
MemprofStats &thread_stats = GetCurrentThreadStats();
@@ -898,7 +696,7 @@ uptr __sanitizer_get_allocated_size(const void *p) {
}
int __memprof_profile_dump() {
- instance.FinishAndPrint();
+ instance.FinishAndWrite();
// In the future we may want to return non-zero if there are any errors
// detected during the dumping process.
return 0;
diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
index 035fd15b9288..ee0760ddc302 100644
--- a/compiler-rt/lib/memprof/memprof_flags.inc
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -35,15 +35,7 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
"realloc(p, 0) is equivalent to free(p) by default (Same as the "
"POSIX standard). If set to false, realloc(p, 0) will return a "
"pointer to an allocated space which can not be used.")
+MEMPROF_FLAG(bool, print_text, false,
+ "If set, prints the heap profile in text format. Else use the raw binary serialization format.")
MEMPROF_FLAG(bool, print_terse, false,
- "If set, prints memory profile in a terse format.")
-
-MEMPROF_FLAG(
- int, mem_info_cache_entries, 16381,
- "Size in entries of the mem info block cache, should be closest prime"
- " number to a power of two for best hashing.")
-MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false,
- "If set, prints the miss rate of the mem info block cache.")
-MEMPROF_FLAG(
- bool, print_mem_info_cache_miss_rate_details, false,
- "If set, prints detailed miss rates of the mem info block cache sets.")
+ "If set, prints memory profile in a terse format. Only applicable if print_text = true.")
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index e22768061e70..5575ae2fe444 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -204,9 +204,9 @@ INTERCEPTOR(char *, strcat, char *to, const char *from) {
void *ctx;
MEMPROF_INTERCEPTOR_ENTER(ctx, strcat);
ENSURE_MEMPROF_INITED();
- uptr from_length = REAL(strlen)(from);
+ uptr from_length = internal_strlen(from);
MEMPROF_READ_RANGE(from, from_length + 1);
- uptr to_length = REAL(strlen)(to);
+ uptr to_length = internal_strlen(to);
MEMPROF_READ_STRING(to, to_length);
MEMPROF_WRITE_RANGE(to + to_length, from_length + 1);
return REAL(strcat)(to, from);
@@ -219,7 +219,7 @@ INTERCEPTOR(char *, strncat, char *to, const char *from, uptr size) {
uptr from_length = MaybeRealStrnlen(from, size);
uptr copy_length = Min(size, from_length + 1);
MEMPROF_READ_RANGE(from, copy_length);
- uptr to_length = REAL(strlen)(to);
+ uptr to_length = internal_strlen(to);
MEMPROF_READ_STRING(to, to_length);
MEMPROF_WRITE_RANGE(to + to_length, from_length + 1);
return REAL(strncat)(to, from, size);
@@ -232,7 +232,7 @@ INTERCEPTOR(char *, strcpy, char *to, const char *from) {
return REAL(strcpy)(to, from);
}
ENSURE_MEMPROF_INITED();
- uptr from_size = REAL(strlen)(from) + 1;
+ uptr from_size = internal_strlen(from) + 1;
MEMPROF_READ_RANGE(from, from_size);
MEMPROF_WRITE_RANGE(to, from_size);
return REAL(strcpy)(to, from);
@@ -244,7 +244,7 @@ INTERCEPTOR(char *, strdup, const char *s) {
if (UNLIKELY(!memprof_inited))
return internal_strdup(s);
ENSURE_MEMPROF_INITED();
- uptr length = REAL(strlen)(s);
+ uptr length = internal_strlen(s);
MEMPROF_READ_RANGE(s, length + 1);
GET_STACK_TRACE_MALLOC;
void *new_mem = memprof_malloc(length + 1, &stack);
@@ -258,7 +258,7 @@ INTERCEPTOR(char *, __strdup, const char *s) {
if (UNLIKELY(!memprof_inited))
return internal_strdup(s);
ENSURE_MEMPROF_INITED();
- uptr length = REAL(strlen)(s);
+ uptr length = internal_strlen(s);
MEMPROF_READ_RANGE(s, length + 1);
GET_STACK_TRACE_MALLOC;
void *new_mem = memprof_malloc(length + 1, &stack);
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.h b/compiler-rt/lib/memprof/memprof_interceptors.h
index ca5f3690430a..879a1e1061e5 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.h
+++ b/compiler-rt/lib/memprof/memprof_interceptors.h
@@ -48,13 +48,13 @@ DECLARE_REAL(char *, strstr, const char *s1, const char *s2)
#define MEMPROF_INTERCEPT_FUNC_VER(name, ver) \
do { \
if (!INTERCEPT_FUNCTION_VER(name, ver)) \
- VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, #ver); \
+ VReport(1, "MemProfiler: failed to intercept '%s@@%s'\n", #name, ver); \
} while (0)
#define MEMPROF_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
do { \
if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
VReport(1, "MemProfiler: failed to intercept '%s@@%s' or '%s'\n", #name, \
- #ver, #name); \
+ ver, #name); \
} while (0)
#endif // MEMPROF_INTERCEPTORS_H
diff --git a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
index c7330f4619a1..ef753fcaa4ad 100644
--- a/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
+++ b/compiler-rt/lib/memprof/memprof_malloc_linux.cpp
@@ -23,125 +23,52 @@
#include "memprof_internal.h"
#include "memprof_stack.h"
#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
#include "sanitizer_common/sanitizer_errno.h"
#include "sanitizer_common/sanitizer_tls_get_addr.h"
// ---------------------- Replacement functions ---------------- {{{1
using namespace __memprof;
-static uptr allocated_for_dlsym;
-static uptr last_dlsym_alloc_size_in_words;
-static const uptr kDlsymAllocPoolSize = 1024;
-static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
-
-static inline bool IsInDlsymAllocPool(const void *ptr) {
- uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- return off < allocated_for_dlsym * sizeof(alloc_memory_for_dlsym[0]);
-}
-
-static void *AllocateFromLocalPool(uptr size_in_bytes) {
- uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
- void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
- last_dlsym_alloc_size_in_words = size_in_words;
- allocated_for_dlsym += size_in_words;
- CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
- return mem;
-}
-
-static void DeallocateFromLocalPool(const void *ptr) {
- // Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
- // error messages and instead uses malloc followed by free. To avoid pool
- // exhaustion due to long object filenames, handle that special case here.
- uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
- void *prev_mem = (void *)&alloc_memory_for_dlsym[prev_offset];
- if (prev_mem == ptr) {
- REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
- allocated_for_dlsym = prev_offset;
- last_dlsym_alloc_size_in_words = 0;
- }
-}
-
-static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
- uptr size_in_bytes) {
- if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
- return errno_EINVAL;
-
- CHECK(alignment >= kWordSize);
-
- uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
- uptr aligned_addr = RoundUpTo(addr, alignment);
- uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
-
- uptr *end_mem = (uptr *)(aligned_addr + aligned_size);
- uptr allocated = end_mem - alloc_memory_for_dlsym;
- if (allocated >= kDlsymAllocPoolSize)
- return errno_ENOMEM;
-
- allocated_for_dlsym = allocated;
- *memptr = (void *)aligned_addr;
- return 0;
-}
-
-static inline bool MaybeInDlsym() { return memprof_init_is_running; }
-
-static inline bool UseLocalPool() { return MaybeInDlsym(); }
-
-static void *ReallocFromLocalPool(void *ptr, uptr size) {
- const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
- void *new_ptr;
- if (UNLIKELY(UseLocalPool())) {
- new_ptr = AllocateFromLocalPool(size);
- } else {
- ENSURE_MEMPROF_INITED();
- GET_STACK_TRACE_MALLOC;
- new_ptr = memprof_malloc(size, &stack);
- }
- internal_memcpy(new_ptr, ptr, copy_size);
- return new_ptr;
-}
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return memprof_init_is_running; }
+};
INTERCEPTOR(void, free, void *ptr) {
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
GET_STACK_TRACE_FREE;
- if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
- DeallocateFromLocalPool(ptr);
- return;
- }
memprof_free(ptr, &stack, FROM_MALLOC);
}
#if SANITIZER_INTERCEPT_CFREE
INTERCEPTOR(void, cfree, void *ptr) {
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
GET_STACK_TRACE_FREE;
- if (UNLIKELY(IsInDlsymAllocPool(ptr)))
- return;
memprof_free(ptr, &stack, FROM_MALLOC);
}
#endif // SANITIZER_INTERCEPT_CFREE
INTERCEPTOR(void *, malloc, uptr size) {
- if (UNLIKELY(UseLocalPool()))
- // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
- return AllocateFromLocalPool(size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Allocate(size);
ENSURE_MEMPROF_INITED();
GET_STACK_TRACE_MALLOC;
return memprof_malloc(size, &stack);
}
INTERCEPTOR(void *, calloc, uptr nmemb, uptr size) {
- if (UNLIKELY(UseLocalPool()))
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- return AllocateFromLocalPool(nmemb * size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
ENSURE_MEMPROF_INITED();
GET_STACK_TRACE_MALLOC;
return memprof_calloc(nmemb, size, &stack);
}
INTERCEPTOR(void *, realloc, void *ptr, uptr size) {
- if (UNLIKELY(IsInDlsymAllocPool(ptr)))
- return ReallocFromLocalPool(ptr, size);
- if (UNLIKELY(UseLocalPool()))
- return AllocateFromLocalPool(size);
+ if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Realloc(ptr, size);
ENSURE_MEMPROF_INITED();
GET_STACK_TRACE_MALLOC;
return memprof_realloc(ptr, size, &stack);
@@ -201,8 +128,6 @@ INTERCEPTOR(int, mallopt, int cmd, int value) { return 0; }
#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
- if (UNLIKELY(UseLocalPool()))
- return PosixMemalignFromLocalPool(memptr, alignment, size);
GET_STACK_TRACE_MALLOC;
return memprof_posix_memalign(memptr, alignment, size, &stack);
}
diff --git a/compiler-rt/lib/memprof/memprof_meminfoblock.h b/compiler-rt/lib/memprof/memprof_meminfoblock.h
new file mode 100644
index 000000000000..19e424435e79
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_meminfoblock.h
@@ -0,0 +1,116 @@
+#ifndef MEMPROF_MEMINFOBLOCK_H_
+#define MEMPROF_MEMINFOBLOCK_H_
+
+#include "memprof_interface_internal.h" // For u32, u64 TODO: Move these out of the internal header.
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __memprof {
+
+using __sanitizer::Printf;
+
+struct MemInfoBlock {
+ u32 alloc_count;
+ u64 total_access_count, min_access_count, max_access_count;
+ u64 total_size;
+ u32 min_size, max_size;
+ u32 alloc_timestamp, dealloc_timestamp;
+ u64 total_lifetime;
+ u32 min_lifetime, max_lifetime;
+ u32 alloc_cpu_id, dealloc_cpu_id;
+ u32 num_migrated_cpu;
+
+ // Only compared to prior deallocated object currently.
+ u32 num_lifetime_overlaps;
+ u32 num_same_alloc_cpu;
+ u32 num_same_dealloc_cpu;
+
+ u64 data_type_id; // TODO: hash of type name
+
+ MemInfoBlock() : alloc_count(0) {}
+
+ MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp,
+ u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu)
+ : alloc_count(1), total_access_count(access_count),
+ min_access_count(access_count), max_access_count(access_count),
+ total_size(size), min_size(size), max_size(size),
+ alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
+ total_lifetime(dealloc_timestamp - alloc_timestamp),
+ min_lifetime(total_lifetime), max_lifetime(total_lifetime),
+ alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
+ num_lifetime_overlaps(0), num_same_alloc_cpu(0),
+ num_same_dealloc_cpu(0) {
+ num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
+ }
+
+ void Print(u64 id, bool print_terse) const {
+ u64 p;
+
+ if (print_terse) {
+ p = total_size * 100 / alloc_count;
+ Printf("MIB:%llu/%u/%llu.%02llu/%u/%u/", id, alloc_count, p / 100,
+ p % 100, min_size, max_size);
+ p = total_access_count * 100 / alloc_count;
+ Printf("%llu.%02llu/%llu/%llu/", p / 100, p % 100, min_access_count,
+ max_access_count);
+ p = total_lifetime * 100 / alloc_count;
+ Printf("%llu.%02llu/%u/%u/", p / 100, p % 100, min_lifetime,
+ max_lifetime);
+ Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps,
+ num_same_alloc_cpu, num_same_dealloc_cpu);
+ } else {
+ p = total_size * 100 / alloc_count;
+ Printf("Memory allocation stack id = %llu\n", id);
+ Printf("\talloc_count %u, size (ave/min/max) %llu.%02llu / %u / %u\n",
+ alloc_count, p / 100, p % 100, min_size, max_size);
+ p = total_access_count * 100 / alloc_count;
+ Printf("\taccess_count (ave/min/max): %llu.%02llu / %llu / %llu\n",
+ p / 100, p % 100, min_access_count, max_access_count);
+ p = total_lifetime * 100 / alloc_count;
+ Printf("\tlifetime (ave/min/max): %llu.%02llu / %u / %u\n", p / 100,
+ p % 100, min_lifetime, max_lifetime);
+ Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
+ "cpu: %u, num same dealloc_cpu: %u\n",
+ num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu,
+ num_same_dealloc_cpu);
+ }
+ }
+
+ static void printHeader() {
+ Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/"
+ "MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/"
+ "NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/"
+ "NumSameDeallocCpu\n");
+ }
+
+ void Merge(const MemInfoBlock &newMIB) {
+ alloc_count += newMIB.alloc_count;
+
+ total_access_count += newMIB.total_access_count;
+ min_access_count = Min(min_access_count, newMIB.min_access_count);
+ max_access_count = Max(max_access_count, newMIB.max_access_count);
+
+ total_size += newMIB.total_size;
+ min_size = Min(min_size, newMIB.min_size);
+ max_size = Max(max_size, newMIB.max_size);
+
+ total_lifetime += newMIB.total_lifetime;
+ min_lifetime = Min(min_lifetime, newMIB.min_lifetime);
+ max_lifetime = Max(max_lifetime, newMIB.max_lifetime);
+
+ // We know newMIB was deallocated later, so just need to check if it was
+ // allocated before last one deallocated.
+ num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
+ alloc_timestamp = newMIB.alloc_timestamp;
+ dealloc_timestamp = newMIB.dealloc_timestamp;
+
+ num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
+ num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
+ alloc_cpu_id = newMIB.alloc_cpu_id;
+ dealloc_cpu_id = newMIB.dealloc_cpu_id;
+ }
+
+} __attribute__((packed));
+
+} // namespace __memprof
+
+#endif // MEMPROF_MEMINFOBLOCK_H_
diff --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp
new file mode 100644
index 000000000000..47449cf9612b
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp
@@ -0,0 +1,35 @@
+//===-- memprof_mibmap.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "memprof_mibmap.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+namespace __memprof {
+
+void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
+ MIBMapTy::Handle h(&Map, static_cast<uptr>(Id), /*remove=*/false,
+ /*create=*/true);
+ if (h.created()) {
+ LockedMemInfoBlock *lmib =
+ (LockedMemInfoBlock *)InternalAlloc(sizeof(LockedMemInfoBlock));
+ lmib->mutex.Init();
+ lmib->mib = Block;
+ *h = lmib;
+ } else {
+ LockedMemInfoBlock *lmib = *h;
+ SpinMutexLock lock(&lmib->mutex);
+ lmib->mib.Merge(Block);
+ }
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_mibmap.h b/compiler-rt/lib/memprof/memprof_mibmap.h
new file mode 100644
index 000000000000..ed5dda174fe5
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mibmap.h
@@ -0,0 +1,24 @@
+#ifndef MEMPROF_MIBMAP_H_
+#define MEMPROF_MIBMAP_H_
+
+#include "memprof_meminfoblock.h"
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+namespace __memprof {
+
+struct LockedMemInfoBlock {
+ __sanitizer::StaticSpinMutex mutex;
+ MemInfoBlock mib;
+};
+
+// The MIB map stores a mapping from stack ids to MemInfoBlocks.
+typedef __sanitizer::AddrHashMap<LockedMemInfoBlock *, 200003> MIBMapTy;
+
+// Insert a new MemInfoBlock or merge with an existing block identified by the
+// stack id.
+void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map);
+
+} // namespace __memprof
+
+#endif // MEMPROF_MIBMAP_H_
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
new file mode 100644
index 000000000000..96f315f95b24
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -0,0 +1,250 @@
+#include "memprof_rawprofile.h"
+#include "memprof_meminfoblock.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_stackdepotbase.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_vector.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+namespace __memprof {
+using ::__sanitizer::Vector;
+
+namespace {
+typedef struct __attribute__((__packed__)) {
+ u64 start;
+ u64 end;
+ u64 offset;
+ u8 buildId[32];
+} SegmentEntry;
+
+typedef struct __attribute__((__packed__)) {
+ u64 magic;
+ u64 version;
+ u64 total_size;
+ u64 segment_offset;
+ u64 mib_offset;
+ u64 stack_offset;
+} Header;
+
+template <class T> char *WriteBytes(T Pod, char *&Buffer) {
+ *(T *)Buffer = Pod;
+ return Buffer + sizeof(T);
+}
+
+void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock *const &MIB,
+ void *Arg) {
+ // No need to touch the MIB value here since we are only recording the key.
+ auto *StackIds = reinterpret_cast<Vector<u64> *>(Arg);
+ StackIds->PushBack(Key);
+}
+} // namespace
+
+u64 SegmentSizeBytes(MemoryMappingLayoutBase &Layout) {
+ u64 NumSegmentsToRecord = 0;
+ MemoryMappedSegment segment;
+ for (Layout.Reset(); Layout.Next(&segment);)
+ if (segment.IsReadable() && segment.IsExecutable())
+ NumSegmentsToRecord++;
+
+ return sizeof(u64) // A header which stores the number of records.
+ + sizeof(SegmentEntry) * NumSegmentsToRecord;
+}
+
+// The segment section uses the following format:
+// ---------- Segment Info
+// Num Entries
+// ---------- Segment Entry
+// Start
+// End
+// Offset
+// BuildID 32B
+// ----------
+// ...
+void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout,
+ const u64 ExpectedNumBytes, char *&Buffer) {
+ char *Ptr = Buffer;
+ // Reserve space for the final count.
+ Ptr += sizeof(u64);
+
+ u64 NumSegmentsRecorded = 0;
+ MemoryMappedSegment segment;
+
+ for (Layout.Reset(); Layout.Next(&segment);) {
+ if (segment.IsReadable() && segment.IsExecutable()) {
+ SegmentEntry entry{};
+ entry.start = segment.start;
+ entry.end = segment.end;
+ entry.offset = segment.offset;
+ memcpy(entry.buildId, segment.uuid, sizeof(segment.uuid));
+ memcpy(Ptr, &entry, sizeof(SegmentEntry));
+ Ptr += sizeof(SegmentEntry);
+ NumSegmentsRecorded++;
+ }
+ }
+
+ // Store the number of segments we recorded in the space we reserved.
+ *((u64 *)Buffer) = NumSegmentsRecorded;
+ CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) &&
+ "Expected num bytes != actual bytes written");
+}
+
+u64 StackSizeBytes(const Vector<u64> &StackIds) {
+ u64 NumBytesToWrite = sizeof(u64);
+
+ const u64 NumIds = StackIds.Size();
+ for (unsigned k = 0; k < NumIds; ++k) {
+ const u64 Id = StackIds[k];
+ // One entry for the id and then one more for the number of stack pcs.
+ NumBytesToWrite += 2 * sizeof(u64);
+ const StackTrace St = StackDepotGet(Id);
+
+ CHECK(St.trace != nullptr && St.size > 0 && "Empty stack trace");
+ for (uptr i = 0; i < St.size && St.trace[i] != 0; i++) {
+ NumBytesToWrite += sizeof(u64);
+ }
+ }
+ return NumBytesToWrite;
+}
+
+// The stack info section uses the following format:
+//
+// ---------- Stack Info
+// Num Entries
+// ---------- Stack Entry
+// Num Stacks
+// PC1
+// PC2
+// ...
+// ----------
+void SerializeStackToBuffer(const Vector<u64> &StackIds,
+ const u64 ExpectedNumBytes, char *&Buffer) {
+ const u64 NumIds = StackIds.Size();
+ char *Ptr = Buffer;
+ Ptr = WriteBytes(static_cast<u64>(NumIds), Ptr);
+
+ for (unsigned k = 0; k < NumIds; ++k) {
+ const u64 Id = StackIds[k];
+ Ptr = WriteBytes(Id, Ptr);
+ Ptr += sizeof(u64); // Bump it by u64, we will fill this in later.
+ u64 Count = 0;
+ const StackTrace St = StackDepotGet(Id);
+ for (uptr i = 0; i < St.size && St.trace[i] != 0; i++) {
+ // PCs in stack traces are actually the return addresses, that is,
+ // addresses of the next instructions after the call.
+ uptr pc = StackTrace::GetPreviousInstructionPc(St.trace[i]);
+ Ptr = WriteBytes(static_cast<u64>(pc), Ptr);
+ ++Count;
+ }
+ // Store the count in the space we reserved earlier.
+ *(u64 *)(Ptr - (Count + 1) * sizeof(u64)) = Count;
+ }
+
+ CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) &&
+ "Expected num bytes != actual bytes written");
+}
+
+// The MIB section has the following format:
+// ---------- MIB Info
+// Num Entries
+// ---------- MIB Entry 0
+// Alloc Count
+// ...
+// ---------- MIB Entry 1
+// Alloc Count
+// ...
+// ----------
+void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
+ const u64 ExpectedNumBytes, char *&Buffer) {
+ char *Ptr = Buffer;
+ const u64 NumEntries = StackIds.Size();
+ Ptr = WriteBytes(NumEntries, Ptr);
+
+ for (u64 i = 0; i < NumEntries; i++) {
+ const u64 Key = StackIds[i];
+ MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false);
+ CHECK(h.exists());
+ Ptr = WriteBytes(Key, Ptr);
+ Ptr = WriteBytes((*h)->mib, Ptr);
+ }
+
+ CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) &&
+ "Expected num bytes != actual bytes written");
+}
+
+// Format
+// ---------- Header
+// Magic
+// Version
+// Total Size
+// Segment Offset
+// MIB Info Offset
+// Stack Offset
+// ---------- Segment Info
+// Num Entries
+// ---------- Segment Entry
+// Start
+// End
+// Offset
+// BuildID 32B
+// ----------
+// ...
+// ---------- MIB Info
+// Num Entries
+// ---------- MIB Entry
+// Alloc Count
+// ...
+// ---------- Stack Info
+// Num Entries
+// ---------- Stack Entry
+// Num Stacks
+// PC1
+// PC2
+// ...
+// ----------
+// ...
+u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout,
+ char *&Buffer) {
+ const u64 NumSegmentBytes = SegmentSizeBytes(Layout);
+
+ Vector<u64> StackIds;
+ MIBMap.ForEach(RecordStackId, reinterpret_cast<void *>(&StackIds));
+ // The first 8b are for the total number of MIB records. Each MIB record is
+ // preceded by a 8b stack id which is associated with stack frames in the next
+ // section.
+ const u64 NumMIBInfoBytes =
+ sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock));
+
+ const u64 NumStackBytes = StackSizeBytes(StackIds);
+
+ const u64 TotalSizeBytes =
+ sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes;
+
+ // Allocate the memory for the entire buffer incl. info blocks.
+ Buffer = (char *)InternalAlloc(TotalSizeBytes);
+ char *Ptr = Buffer;
+
+ Header header{MEMPROF_RAW_MAGIC_64,
+ MEMPROF_RAW_VERSION,
+ static_cast<u64>(TotalSizeBytes),
+ sizeof(Header),
+ sizeof(Header) + NumSegmentBytes,
+ sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
+ Ptr = WriteBytes(header, Ptr);
+
+ SerializeSegmentsToBuffer(Layout, NumSegmentBytes, Ptr);
+ Ptr += NumSegmentBytes;
+
+ SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
+ Ptr += NumMIBInfoBytes;
+
+ SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);
+
+ return TotalSizeBytes;
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h
new file mode 100644
index 000000000000..052bac3267f1
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.h
@@ -0,0 +1,21 @@
+#ifndef MEMPROF_RAWPROFILE_H_
+#define MEMPROF_RAWPROFILE_H_
+
+#include "memprof_mibmap.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+namespace __memprof {
+
+// TODO: pull these in from MemProfData.inc
+#define MEMPROF_RAW_MAGIC_64 \
+ (u64)255 << 56 | (u64)'m' << 48 | (u64)'p' << 40 | (u64)'r' << 32 | \
+ (u64)'o' << 24 | (u64)'f' << 16 | (u64)'r' << 8 | (u64)129
+
+#define MEMPROF_RAW_VERSION 1ULL
+
+u64 SerializeToRawProfile(MIBMapTy &BlockCache, MemoryMappingLayoutBase &Layout,
+ char *&Buffer);
+
+} // namespace __memprof
+
+#endif // MEMPROF_RAWPROFILE_H_
diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp
index fee2912d64d4..fb2ef37e51a2 100644
--- a/compiler-rt/lib/memprof/memprof_rtl.cpp
+++ b/compiler-rt/lib/memprof/memprof_rtl.cpp
@@ -264,14 +264,9 @@ void __memprof_record_access(void const volatile *addr) {
__memprof::RecordAccess((uptr)addr);
}
-// We only record the access on the first location in the range,
-// since we will later accumulate the access counts across the
-// full allocation, and we don't want to inflate the hotness from
-// a memory intrinsic on a large range of memory.
-// TODO: Should we do something else so we can better track utilization?
-void __memprof_record_access_range(void const volatile *addr,
- UNUSED uptr size) {
- __memprof::RecordAccess((uptr)addr);
+void __memprof_record_access_range(void const volatile *addr, uptr size) {
+ for (uptr a = (uptr)addr; a < (uptr)addr + size; a += kWordSize)
+ __memprof::RecordAccess(a);
}
extern "C" SANITIZER_INTERFACE_ATTRIBUTE u16
diff --git a/compiler-rt/lib/memprof/memprof_stats.cpp b/compiler-rt/lib/memprof/memprof_stats.cpp
index 8a50d270dc6a..c8faebfa12de 100644
--- a/compiler-rt/lib/memprof/memprof_stats.cpp
+++ b/compiler-rt/lib/memprof/memprof_stats.cpp
@@ -62,11 +62,11 @@ void MemprofStats::MergeFrom(const MemprofStats *stats) {
dst_ptr[i] += src_ptr[i];
}
-static BlockingMutex print_lock(LINKER_INITIALIZED);
+static Mutex print_lock;
static MemprofStats unknown_thread_stats(LINKER_INITIALIZED);
static MemprofStats dead_threads_stats(LINKER_INITIALIZED);
-static BlockingMutex dead_threads_stats_lock(LINKER_INITIALIZED);
+static Mutex dead_threads_stats_lock;
// Required for malloc_zone_statistics() on OS X. This can't be stored in
// per-thread MemprofStats.
static uptr max_malloced_memory;
@@ -87,7 +87,7 @@ static void GetAccumulatedStats(MemprofStats *stats) {
}
stats->MergeFrom(&unknown_thread_stats);
{
- BlockingMutexLock lock(&dead_threads_stats_lock);
+ Lock lock(&dead_threads_stats_lock);
stats->MergeFrom(&dead_threads_stats);
}
// This is not very accurate: we may miss allocation peaks that happen
@@ -99,7 +99,7 @@ static void GetAccumulatedStats(MemprofStats *stats) {
}
void FlushToDeadThreadStats(MemprofStats *stats) {
- BlockingMutexLock lock(&dead_threads_stats_lock);
+ Lock lock(&dead_threads_stats_lock);
dead_threads_stats.MergeFrom(stats);
stats->Clear();
}
@@ -113,11 +113,11 @@ static void PrintAccumulatedStats() {
MemprofStats stats;
GetAccumulatedStats(&stats);
// Use lock to keep reports from mixing up.
- BlockingMutexLock lock(&print_lock);
+ Lock lock(&print_lock);
stats.Print();
- StackDepotStats *stack_depot_stats = StackDepotGetStats();
+ StackDepotStats stack_depot_stats = StackDepotGetStats();
Printf("Stats: StackDepot: %zd ids; %zdM allocated\n",
- stack_depot_stats->n_uniq_ids, stack_depot_stats->allocated >> 20);
+ stack_depot_stats.n_uniq_ids, stack_depot_stats.allocated >> 20);
PrintInternalAllocatorStats();
}
diff --git a/compiler-rt/lib/memprof/memprof_thread.cpp b/compiler-rt/lib/memprof/memprof_thread.cpp
index 5ae7a2ee85b9..9512a87cf98e 100644
--- a/compiler-rt/lib/memprof/memprof_thread.cpp
+++ b/compiler-rt/lib/memprof/memprof_thread.cpp
@@ -40,11 +40,11 @@ void MemprofThreadContext::OnFinished() {
static ALIGNED(16) char thread_registry_placeholder[sizeof(ThreadRegistry)];
static ThreadRegistry *memprof_thread_registry;
-static BlockingMutex mu_for_thread_context(LINKER_INITIALIZED);
+static Mutex mu_for_thread_context;
static LowLevelAllocator allocator_for_thread_context;
static ThreadContextBase *GetMemprofThreadContext(u32 tid) {
- BlockingMutexLock lock(&mu_for_thread_context);
+ Lock lock(&mu_for_thread_context);
return new (allocator_for_thread_context) MemprofThreadContext(tid);
}
@@ -80,8 +80,7 @@ MemprofThread *MemprofThread::Create(thread_callback_t start_routine, void *arg,
thread->start_routine_ = start_routine;
thread->arg_ = arg;
MemprofThreadContext::CreateThreadContextArgs args = {thread, stack};
- memprofThreadRegistry().CreateThread(*reinterpret_cast<uptr *>(thread),
- detached, parent_tid, &args);
+ memprofThreadRegistry().CreateThread(0, detached, parent_tid, &args);
return thread;
}
@@ -131,7 +130,7 @@ void MemprofThread::Init(const InitOptions *options) {
int local = 0;
VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
(void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
- &local);
+ (void *)&local);
}
thread_return_t
@@ -198,7 +197,7 @@ MemprofThread *GetCurrentThread() {
void SetCurrentThread(MemprofThread *t) {
CHECK(t->context());
- VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(),
+ VReport(2, "SetCurrentThread: %p for thread %p\n", (void *)t->context(),
(void *)GetThreadSelf());
// Make sure we do not reset the current MemprofThread.
CHECK_EQ(0, TSDGet());
diff --git a/compiler-rt/lib/memprof/tests/driver.cpp b/compiler-rt/lib/memprof/tests/driver.cpp
new file mode 100644
index 000000000000..b402cec1126b
--- /dev/null
+++ b/compiler-rt/lib/memprof/tests/driver.cpp
@@ -0,0 +1,14 @@
+//===-- driver.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv) {
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp
new file mode 100644
index 000000000000..4404ab86092e
--- /dev/null
+++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp
@@ -0,0 +1,188 @@
+#include "memprof/memprof_rawprofile.h"
+
+#include "memprof/memprof_meminfoblock.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <memory>
+
+namespace {
+
+using ::__memprof::MemInfoBlock;
+using ::__memprof::MIBMapTy;
+using ::__memprof::SerializeToRawProfile;
+using ::__sanitizer::MemoryMappedSegment;
+using ::__sanitizer::MemoryMappingLayoutBase;
+using ::__sanitizer::StackDepotPut;
+using ::__sanitizer::StackTrace;
+using ::testing::_;
+using ::testing::Action;
+using ::testing::DoAll;
+using ::testing::Return;
+using ::testing::SetArgPointee;
+
+class MockMemoryMappingLayout final : public MemoryMappingLayoutBase {
+public:
+ MOCK_METHOD(bool, Next, (MemoryMappedSegment *), (override));
+ MOCK_METHOD(void, Reset, (), (override));
+};
+
+u64 PopulateFakeMap(const MemInfoBlock &FakeMIB, uptr StackPCBegin,
+ MIBMapTy &FakeMap) {
+ constexpr int kSize = 5;
+ uptr array[kSize];
+ for (int i = 0; i < kSize; i++) {
+ array[i] = StackPCBegin + i;
+ }
+ StackTrace St(array, kSize);
+ u32 Id = StackDepotPut(St);
+
+ InsertOrMerge(Id, FakeMIB, FakeMap);
+ return Id;
+}
+
+template <class T = u64> T Read(char *&Buffer) {
+ static_assert(std::is_pod<T>::value, "Must be a POD type.");
+ T t = *reinterpret_cast<T *>(Buffer);
+ Buffer += sizeof(T);
+ return t;
+}
+
+TEST(MemProf, Basic) {
+ MockMemoryMappingLayout Layout;
+ MemoryMappedSegment FakeSegment;
+ memset(&FakeSegment, 0, sizeof(FakeSegment));
+ FakeSegment.start = 0x10;
+ FakeSegment.end = 0x20;
+ FakeSegment.offset = 0x10;
+ uint8_t uuid[__sanitizer::kModuleUUIDSize] = {0xC, 0x0, 0xF, 0xF, 0xE, 0xE};
+ memcpy(FakeSegment.uuid, uuid, __sanitizer::kModuleUUIDSize);
+ FakeSegment.protection =
+ __sanitizer::kProtectionExecute | __sanitizer::kProtectionRead;
+
+ const Action<bool(MemoryMappedSegment *)> SetSegment =
+ DoAll(SetArgPointee<0>(FakeSegment), Return(true));
+ EXPECT_CALL(Layout, Next(_))
+ .WillOnce(SetSegment)
+ .WillOnce(Return(false))
+ .WillOnce(SetSegment)
+ .WillRepeatedly(Return(false));
+
+ EXPECT_CALL(Layout, Reset).Times(2);
+
+ MIBMapTy FakeMap;
+ MemInfoBlock FakeMIB;
+ // Since we want to override the constructor set vals to make it easier to
+ // test.
+ memset(&FakeMIB, 0, sizeof(MemInfoBlock));
+ FakeMIB.alloc_count = 0x1;
+ FakeMIB.total_access_count = 0x2;
+
+ u64 FakeIds[2];
+ FakeIds[0] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/2, FakeMap);
+ FakeIds[1] = PopulateFakeMap(FakeMIB, /*StackPCBegin=*/3, FakeMap);
+
+ char *Ptr = nullptr;
+ u64 NumBytes = SerializeToRawProfile(FakeMap, Layout, Ptr);
+ const char *Buffer = Ptr;
+
+ ASSERT_GT(NumBytes, 0ULL);
+ ASSERT_TRUE(Ptr);
+
+ // Check the header.
+ EXPECT_THAT(Read(Ptr), MEMPROF_RAW_MAGIC_64);
+ EXPECT_THAT(Read(Ptr), MEMPROF_RAW_VERSION);
+ const u64 TotalSize = Read(Ptr);
+ const u64 SegmentOffset = Read(Ptr);
+ const u64 MIBOffset = Read(Ptr);
+ const u64 StackOffset = Read(Ptr);
+
+ // ============= Check sizes.
+ EXPECT_EQ(TotalSize, NumBytes);
+
+ // Should be equal to the size of the raw profile header.
+ EXPECT_EQ(SegmentOffset, 48ULL);
+
+ // We expect only 1 segment entry, 8b for the count and 56b for SegmentEntry
+ // in memprof_rawprofile.cpp.
+ EXPECT_EQ(MIBOffset - SegmentOffset, 64ULL);
+
+ EXPECT_EQ(MIBOffset, 112ULL);
+ // We expect 2 mib entry, 8b for the count and sizeof(u64) +
+ // sizeof(MemInfoBlock) contains stack id + MeminfoBlock.
+ EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock)));
+
+ EXPECT_EQ(StackOffset, 336ULL);
+ // We expect 2 stack entries, with 5 frames - 8b for total count,
+ // 2 * (8b for id, 8b for frame count and 5*8b for fake frames)
+ EXPECT_EQ(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8));
+
+ // ============= Check contents.
+ unsigned char ExpectedSegmentBytes[64] = {
+ 0x01, 0, 0, 0, 0, 0, 0, 0, // Number of entries
+ 0x10, 0, 0, 0, 0, 0, 0, 0, // Start
+ 0x20, 0, 0, 0, 0, 0, 0, 0, // End
+ 0x10, 0, 0, 0, 0, 0, 0, 0, // Offset
+ 0x0C, 0x0, 0xF, 0xF, 0xE, 0xE, // Uuid
+ };
+ EXPECT_EQ(memcmp(Buffer + SegmentOffset, ExpectedSegmentBytes, 64), 0);
+
+ // Check that the number of entries is 2.
+ EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + MIBOffset), 2ULL);
+ // Check that stack id is set.
+ EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + MIBOffset + 8), FakeIds[0]);
+
+ // Only check a few fields of the first MemInfoBlock.
+ unsigned char ExpectedMIBBytes[sizeof(MemInfoBlock)] = {
+ 0x01, 0, 0, 0, // Alloc count
+ 0x02, 0, 0, 0, // Total access count
+ };
+ // Compare contents of 1st MIB after skipping count and stack id.
+ EXPECT_EQ(
+ memcmp(Buffer + MIBOffset + 16, ExpectedMIBBytes, sizeof(MemInfoBlock)),
+ 0);
+ // Compare contents of 2nd MIB after skipping count and stack id for the first
+ // and only the id for the second.
+ EXPECT_EQ(memcmp(Buffer + MIBOffset + 16 + sizeof(MemInfoBlock) + 8,
+ ExpectedMIBBytes, sizeof(MemInfoBlock)),
+ 0);
+
+ // Check that the number of entries is 2.
+ EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + StackOffset), 2ULL);
+ // Check that the 1st stack id is set.
+ EXPECT_EQ(*reinterpret_cast<const u64 *>(Buffer + StackOffset + 8),
+ FakeIds[0]);
+ // Contents are num pcs, value of each pc - 1.
+ unsigned char ExpectedStackBytes[2][6 * 8] = {
+ {
+ 0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs
+ 0x1, 0, 0, 0, 0, 0, 0, 0, // PC ...
+ 0x2, 0, 0, 0, 0, 0, 0, 0, 0x3, 0, 0, 0, 0, 0, 0, 0,
+ 0x4, 0, 0, 0, 0, 0, 0, 0, 0x5, 0, 0, 0, 0, 0, 0, 0,
+ },
+ {
+ 0x5, 0, 0, 0, 0, 0, 0, 0, // Number of PCs
+ 0x2, 0, 0, 0, 0, 0, 0, 0, // PC ...
+ 0x3, 0, 0, 0, 0, 0, 0, 0, 0x4, 0, 0, 0, 0, 0, 0, 0,
+ 0x5, 0, 0, 0, 0, 0, 0, 0, 0x6, 0, 0, 0, 0, 0, 0, 0,
+ },
+ };
+ EXPECT_EQ(memcmp(Buffer + StackOffset + 16, ExpectedStackBytes[0],
+ sizeof(ExpectedStackBytes[0])),
+ 0);
+
+ // Check that the 2nd stack id is set.
+ EXPECT_EQ(
+ *reinterpret_cast<const u64 *>(Buffer + StackOffset + 8 + 6 * 8 + 8),
+ FakeIds[1]);
+
+ EXPECT_EQ(memcmp(Buffer + StackOffset + 16 + 6 * 8 + 8, ExpectedStackBytes[1],
+ sizeof(ExpectedStackBytes[1])),
+ 0);
+}
+
+} // namespace
diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index 4fa772fdcb6e..c554a830e755 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -470,7 +470,7 @@ void __msan_init() {
MsanThread *main_thread = MsanThread::Create(nullptr, nullptr);
SetCurrentThread(main_thread);
- main_thread->ThreadStart();
+ main_thread->Init();
#if MSAN_CONTAINS_UBSAN
__ubsan::InitAsPlugin();
@@ -515,6 +515,7 @@ void __msan_dump_shadow(const void *x, uptr size) {
}
unsigned char *s = (unsigned char*)MEM_TO_SHADOW(x);
+ Printf("%p[%p] ", (void *)s, x);
for (uptr i = 0; i < size; i++)
Printf("%x%x ", s[i] >> 4, s[i] & 0xf);
Printf("\n");
@@ -604,7 +605,7 @@ void __msan_set_alloca_origin4(void *a, uptr size, char *descr, uptr pc) {
id = Origin::CreateStackOrigin(idx).raw_id();
*id_ptr = id;
if (print)
- Printf("First time: idx=%d id=%d %s %p \n", idx, id, descr + 4, pc);
+ Printf("First time: idx=%d id=%d %s 0x%zx \n", idx, id, descr + 4, pc);
}
if (print)
Printf("__msan_set_alloca_origin: descr=%s id=%x\n", descr + 4, id);
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 963b94a54087..4b2cec31756a 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -121,7 +121,7 @@ const MappingDesc kMemoryLayout[] = {
// The mappings below are used only for 48-bits VMA.
// TODO(unknown): 48-bit mapping ony covers the usual PIE, non-PIE
// segments and some more segments totalizing 262144GB of VMA (which cover
- // only 0.32% of all 48-bit VMA). Memory avaliability can be increase by
+ // only 0.32% of all 48-bit VMA). Memory availability can be increase by
// adding multiple application segments like 39 and 42 mapping.
{0x0040000000000ULL, 0x0041000000000ULL, MappingDesc::INVALID, "invalid"},
{0x0041000000000ULL, 0x0042000000000ULL, MappingDesc::APP, "app-10"},
@@ -219,7 +219,7 @@ const MappingDesc kMemoryLayout[] = {
#elif SANITIZER_NETBSD || (SANITIZER_LINUX && SANITIZER_WORDSIZE == 64)
#ifdef MSAN_LINUX_X86_64_OLD_MAPPING
-// Requries PIE binary and ASLR enabled.
+// Requires PIE binary and ASLR enabled.
// Main thread stack and DSOs at 0x7f0000000000 (sometimes 0x7e0000000000).
// Heap at 0x600000000000.
const MappingDesc kMemoryLayout[] = {
diff --git a/compiler-rt/lib/msan/msan_chained_origin_depot.cpp b/compiler-rt/lib/msan/msan_chained_origin_depot.cpp
index 5dee80fd4692..49b14131a89b 100644
--- a/compiler-rt/lib/msan/msan_chained_origin_depot.cpp
+++ b/compiler-rt/lib/msan/msan_chained_origin_depot.cpp
@@ -19,7 +19,7 @@ namespace __msan {
static ChainedOriginDepot chainedOriginDepot;
-StackDepotStats *ChainedOriginDepotGetStats() {
+StackDepotStats ChainedOriginDepotGetStats() {
return chainedOriginDepot.GetStats();
}
diff --git a/compiler-rt/lib/msan/msan_chained_origin_depot.h b/compiler-rt/lib/msan/msan_chained_origin_depot.h
index 60ab182fa4c8..ea51c77a905b 100644
--- a/compiler-rt/lib/msan/msan_chained_origin_depot.h
+++ b/compiler-rt/lib/msan/msan_chained_origin_depot.h
@@ -19,7 +19,7 @@
namespace __msan {
// Gets the statistic of the origin chain storage.
-StackDepotStats *ChainedOriginDepotGetStats();
+StackDepotStats ChainedOriginDepotGetStats();
// Stores a chain with StackDepot ID here_id and previous chain ID prev_id.
// If successful, returns true and the new chain id new_id.
diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp
index 760f74e927d0..eaa3b3ae9404 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -18,21 +18,22 @@
#include "msan.h"
#include "msan_chained_origin_depot.h"
#include "msan_origin.h"
+#include "msan_poisoning.h"
#include "msan_report.h"
#include "msan_thread.h"
-#include "msan_poisoning.h"
-#include "sanitizer_common/sanitizer_errno_codes.h"
-#include "sanitizer_common/sanitizer_platform_limits_posix.h"
-#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
#include "sanitizer_common/sanitizer_allocator_interface.h"
-#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_errno.h"
-#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_errno_codes.h"
+#include "sanitizer_common/sanitizer_glibc_version.h"
#include "sanitizer_common/sanitizer_libc.h"
#include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
+#include "sanitizer_common/sanitizer_platform_limits_posix.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
#include "sanitizer_common/sanitizer_tls_get_addr.h"
#include "sanitizer_common/sanitizer_vector.h"
@@ -74,22 +75,9 @@ bool IsInInterceptorScope() {
return in_interceptor_scope;
}
-static uptr allocated_for_dlsym;
-static const uptr kDlsymAllocPoolSize = 1024;
-static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
-
-static bool IsInDlsymAllocPool(const void *ptr) {
- uptr off = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- return off < sizeof(alloc_memory_for_dlsym);
-}
-
-static void *AllocateFromLocalPool(uptr size_in_bytes) {
- uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
- void *mem = (void *)&alloc_memory_for_dlsym[allocated_for_dlsym];
- allocated_for_dlsym += size_in_words;
- CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
- return mem;
-}
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return !msan_inited; }
+};
#define ENSURE_MSAN_INITED() do { \
CHECK(!msan_init_is_running); \
@@ -220,18 +208,24 @@ INTERCEPTOR(void *, pvalloc, SIZE_T size) {
#endif
INTERCEPTOR(void, free, void *ptr) {
+ if (UNLIKELY(!ptr))
+ return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
GET_MALLOC_STACK_TRACE;
- if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return;
MsanDeallocate(&stack, ptr);
}
#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
INTERCEPTOR(void, cfree, void *ptr) {
+ if (UNLIKELY(!ptr))
+ return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
GET_MALLOC_STACK_TRACE;
- if (!ptr || UNLIKELY(IsInDlsymAllocPool(ptr))) return;
MsanDeallocate(&stack, ptr);
}
-#define MSAN_MAYBE_INTERCEPT_CFREE INTERCEPT_FUNCTION(cfree)
+# define MSAN_MAYBE_INTERCEPT_CFREE INTERCEPT_FUNCTION(cfree)
#else
#define MSAN_MAYBE_INTERCEPT_CFREE
#endif
@@ -286,7 +280,7 @@ INTERCEPTOR(void, malloc_stats, void) {
INTERCEPTOR(char *, strcpy, char *dest, const char *src) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T n = REAL(strlen)(src);
+ SIZE_T n = internal_strlen(src);
CHECK_UNPOISONED_STRING(src + n, 0);
char *res = REAL(strcpy)(dest, src);
CopyShadowAndOrigin(dest, src, n + 1, &stack);
@@ -296,7 +290,7 @@ INTERCEPTOR(char *, strcpy, char *dest, const char *src) {
INTERCEPTOR(char *, strncpy, char *dest, const char *src, SIZE_T n) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T copy_size = REAL(strnlen)(src, n);
+ SIZE_T copy_size = internal_strnlen(src, n);
if (copy_size < n)
copy_size++; // trailing \0
char *res = REAL(strncpy)(dest, src, n);
@@ -309,7 +303,7 @@ INTERCEPTOR(char *, strncpy, char *dest, const char *src, SIZE_T n) {
INTERCEPTOR(char *, stpcpy, char *dest, const char *src) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T n = REAL(strlen)(src);
+ SIZE_T n = internal_strlen(src);
CHECK_UNPOISONED_STRING(src + n, 0);
char *res = REAL(stpcpy)(dest, src);
CopyShadowAndOrigin(dest, src, n + 1, &stack);
@@ -325,7 +319,7 @@ INTERCEPTOR(char *, strdup, char *src) {
GET_STORE_STACK_TRACE;
// On FreeBSD strdup() leverages strlen().
InterceptorScope interceptor_scope;
- SIZE_T n = REAL(strlen)(src);
+ SIZE_T n = internal_strlen(src);
CHECK_UNPOISONED_STRING(src + n, 0);
char *res = REAL(strdup)(src);
CopyShadowAndOrigin(res, src, n + 1, &stack);
@@ -336,7 +330,7 @@ INTERCEPTOR(char *, strdup, char *src) {
INTERCEPTOR(char *, __strdup, char *src) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T n = REAL(strlen)(src);
+ SIZE_T n = internal_strlen(src);
CHECK_UNPOISONED_STRING(src + n, 0);
char *res = REAL(__strdup)(src);
CopyShadowAndOrigin(res, src, n + 1, &stack);
@@ -351,7 +345,7 @@ INTERCEPTOR(char *, __strdup, char *src) {
INTERCEPTOR(char *, gcvt, double number, SIZE_T ndigit, char *buf) {
ENSURE_MSAN_INITED();
char *res = REAL(gcvt)(number, ndigit, buf);
- SIZE_T n = REAL(strlen)(buf);
+ SIZE_T n = internal_strlen(buf);
__msan_unpoison(buf, n + 1);
return res;
}
@@ -363,8 +357,8 @@ INTERCEPTOR(char *, gcvt, double number, SIZE_T ndigit, char *buf) {
INTERCEPTOR(char *, strcat, char *dest, const char *src) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T src_size = REAL(strlen)(src);
- SIZE_T dest_size = REAL(strlen)(dest);
+ SIZE_T src_size = internal_strlen(src);
+ SIZE_T dest_size = internal_strlen(dest);
CHECK_UNPOISONED_STRING(src + src_size, 0);
CHECK_UNPOISONED_STRING(dest + dest_size, 0);
char *res = REAL(strcat)(dest, src);
@@ -375,8 +369,8 @@ INTERCEPTOR(char *, strcat, char *dest, const char *src) {
INTERCEPTOR(char *, strncat, char *dest, const char *src, SIZE_T n) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T dest_size = REAL(strlen)(dest);
- SIZE_T copy_size = REAL(strnlen)(src, n);
+ SIZE_T dest_size = internal_strlen(dest);
+ SIZE_T copy_size = internal_strnlen(src, n);
CHECK_UNPOISONED_STRING(dest + dest_size, 0);
char *res = REAL(strncat)(dest, src, n);
CopyShadowAndOrigin(dest + dest_size, src, copy_size, &stack);
@@ -612,7 +606,8 @@ INTERCEPTOR(char *, fcvt, double x, int a, int *b, int *c) {
char *res = REAL(fcvt)(x, a, b, c);
__msan_unpoison(b, sizeof(*b));
__msan_unpoison(c, sizeof(*c));
- if (res) __msan_unpoison(res, REAL(strlen)(res) + 1);
+ if (res)
+ __msan_unpoison(res, internal_strlen(res) + 1);
return res;
}
#define MSAN_MAYBE_INTERCEPT_FCVT INTERCEPT_FUNCTION(fcvt)
@@ -625,7 +620,8 @@ INTERCEPTOR(char *, getenv, char *name) {
return REAL(getenv)(name);
ENSURE_MSAN_INITED();
char *res = REAL(getenv)(name);
- if (res) __msan_unpoison(res, REAL(strlen)(res) + 1);
+ if (res)
+ __msan_unpoison(res, internal_strlen(res) + 1);
return res;
}
@@ -635,7 +631,7 @@ static void UnpoisonEnviron() {
char **envp = environ;
for (; *envp; ++envp) {
__msan_unpoison(envp, sizeof(*envp));
- __msan_unpoison(*envp, REAL(strlen)(*envp) + 1);
+ __msan_unpoison(*envp, internal_strlen(*envp) + 1);
}
// Trailing NULL pointer.
__msan_unpoison(envp, sizeof(*envp));
@@ -656,7 +652,8 @@ INTERCEPTOR(int, putenv, char *string) {
return res;
}
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+#define SANITIZER_STAT_LINUX (SANITIZER_LINUX && __GLIBC_PREREQ(2, 33))
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_STAT_LINUX
INTERCEPTOR(int, fstat, int fd, void *buf) {
ENSURE_MSAN_INITED();
int res = REAL(fstat)(fd, buf);
@@ -664,7 +661,7 @@ INTERCEPTOR(int, fstat, int fd, void *buf) {
__msan_unpoison(buf, __sanitizer::struct_stat_sz);
return res;
}
-#define MSAN_MAYBE_INTERCEPT_FSTAT INTERCEPT_FUNCTION(fstat)
+# define MSAN_MAYBE_INTERCEPT_FSTAT MSAN_INTERCEPT_FUNC(fstat)
#else
#define MSAN_MAYBE_INTERCEPT_FSTAT
#endif
@@ -677,7 +674,7 @@ INTERCEPTOR(int, __fxstat, int magic, int fd, void *buf) {
__msan_unpoison(buf, __sanitizer::struct_stat_sz);
return res;
}
-#define MSAN_MAYBE_INTERCEPT___FXSTAT INTERCEPT_FUNCTION(__fxstat)
+# define MSAN_MAYBE_INTERCEPT___FXSTAT MSAN_INTERCEPT_FUNC(__fxstat)
#else
#define MSAN_MAYBE_INTERCEPT___FXSTAT
#endif
@@ -690,20 +687,24 @@ INTERCEPTOR(int, __fxstat64, int magic, int fd, void *buf) {
__msan_unpoison(buf, __sanitizer::struct_stat64_sz);
return res;
}
-#define MSAN_MAYBE_INTERCEPT___FXSTAT64 INTERCEPT_FUNCTION(__fxstat64)
+# define MSAN_MAYBE_INTERCEPT___FXSTAT64 MSAN_INTERCEPT_FUNC(__fxstat64)
#else
-#define MSAN_MAYBE_INTERCEPT___FXSTAT64
+# define MSAN_MAYBE_INTERCEPT___FXSTAT64
#endif
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_STAT_LINUX
INTERCEPTOR(int, fstatat, int fd, char *pathname, void *buf, int flags) {
ENSURE_MSAN_INITED();
int res = REAL(fstatat)(fd, pathname, buf, flags);
if (!res) __msan_unpoison(buf, __sanitizer::struct_stat_sz);
return res;
}
-# define MSAN_INTERCEPT_FSTATAT INTERCEPT_FUNCTION(fstatat)
+# define MSAN_MAYBE_INTERCEPT_FSTATAT MSAN_INTERCEPT_FUNC(fstatat)
#else
+# define MSAN_MAYBE_INTERCEPT_FSTATAT
+#endif
+
+#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
INTERCEPTOR(int, __fxstatat, int magic, int fd, char *pathname, void *buf,
int flags) {
ENSURE_MSAN_INITED();
@@ -711,7 +712,9 @@ INTERCEPTOR(int, __fxstatat, int magic, int fd, char *pathname, void *buf,
if (!res) __msan_unpoison(buf, __sanitizer::struct_stat_sz);
return res;
}
-# define MSAN_INTERCEPT_FSTATAT INTERCEPT_FUNCTION(__fxstatat)
+# define MSAN_MAYBE_INTERCEPT___FXSTATAT MSAN_INTERCEPT_FUNC(__fxstatat)
+#else
+# define MSAN_MAYBE_INTERCEPT___FXSTATAT
#endif
#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
@@ -722,9 +725,9 @@ INTERCEPTOR(int, __fxstatat64, int magic, int fd, char *pathname, void *buf,
if (!res) __msan_unpoison(buf, __sanitizer::struct_stat64_sz);
return res;
}
-#define MSAN_MAYBE_INTERCEPT___FXSTATAT64 INTERCEPT_FUNCTION(__fxstatat64)
+# define MSAN_MAYBE_INTERCEPT___FXSTATAT64 MSAN_INTERCEPT_FUNC(__fxstatat64)
#else
-#define MSAN_MAYBE_INTERCEPT___FXSTATAT64
+# define MSAN_MAYBE_INTERCEPT___FXSTATAT64
#endif
INTERCEPTOR(int, pipe, int pipefd[2]) {
@@ -758,7 +761,7 @@ INTERCEPTOR(char *, fgets_unlocked, char *s, int size, void *stream) {
ENSURE_MSAN_INITED();
char *res = REAL(fgets_unlocked)(s, size, stream);
if (res)
- __msan_unpoison(s, REAL(strlen)(s) + 1);
+ __msan_unpoison(s, internal_strlen(s) + 1);
return res;
}
#define MSAN_MAYBE_INTERCEPT_FGETS_UNLOCKED INTERCEPT_FUNCTION(fgets_unlocked)
@@ -829,7 +832,7 @@ INTERCEPTOR(int, gethostname, char *name, SIZE_T len) {
ENSURE_MSAN_INITED();
int res = REAL(gethostname)(name, len);
if (!res || (res == -1 && errno == errno_ENAMETOOLONG)) {
- SIZE_T real_len = REAL(strnlen)(name, len);
+ SIZE_T real_len = internal_strnlen(name, len);
if (real_len < len)
++real_len;
__msan_unpoison(name, real_len);
@@ -869,27 +872,15 @@ INTERCEPTOR(int, epoll_pwait, int epfd, void *events, int maxevents,
INTERCEPTOR(void *, calloc, SIZE_T nmemb, SIZE_T size) {
GET_MALLOC_STACK_TRACE;
- if (UNLIKELY(!msan_inited))
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- return AllocateFromLocalPool(nmemb * size);
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
return msan_calloc(nmemb, size, &stack);
}
INTERCEPTOR(void *, realloc, void *ptr, SIZE_T size) {
+ if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Realloc(ptr, size);
GET_MALLOC_STACK_TRACE;
- if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
- uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
- uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
- void *new_ptr;
- if (UNLIKELY(!msan_inited)) {
- new_ptr = AllocateFromLocalPool(copy_size);
- } else {
- copy_size = size;
- new_ptr = msan_malloc(copy_size, &stack);
- }
- internal_memcpy(new_ptr, ptr, copy_size);
- return new_ptr;
- }
return msan_realloc(ptr, size, &stack);
}
@@ -899,16 +890,15 @@ INTERCEPTOR(void *, reallocarray, void *ptr, SIZE_T nmemb, SIZE_T size) {
}
INTERCEPTOR(void *, malloc, SIZE_T size) {
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Allocate(size);
GET_MALLOC_STACK_TRACE;
- if (UNLIKELY(!msan_inited))
- // Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
- return AllocateFromLocalPool(size);
return msan_malloc(size, &stack);
}
void __msan_allocated_memory(const void *data, uptr size) {
- GET_MALLOC_STACK_TRACE;
if (flags()->poison_in_malloc) {
+ GET_MALLOC_STACK_TRACE;
stack.tag = STACK_TRACE_TAG_POISON;
PoisonMemory(data, size, &stack);
}
@@ -920,8 +910,8 @@ void __msan_copy_shadow(void *dest, const void *src, uptr n) {
}
void __sanitizer_dtor_callback(const void *data, uptr size) {
- GET_MALLOC_STACK_TRACE;
if (flags()->poison_in_dtor) {
+ GET_MALLOC_STACK_TRACE;
stack.tag = STACK_TRACE_TAG_POISON;
PoisonMemory(data, size, &stack);
}
@@ -1023,6 +1013,8 @@ extern "C" int pthread_attr_destroy(void *attr);
static void *MsanThreadStartFunc(void *arg) {
MsanThread *t = (MsanThread *)arg;
SetCurrentThread(t);
+ t->Init();
+ SetSigProcMask(&t->starting_sigset_, nullptr);
return t->ThreadStart();
}
@@ -1038,7 +1030,7 @@ INTERCEPTOR(int, pthread_create, void *th, void *attr, void *(*callback)(void*),
AdjustStackSize(attr);
MsanThread *t = MsanThread::Create(callback, param);
-
+ ScopedBlockSignals block(&t->starting_sigset_);
int res = REAL(pthread_create)(th, attr, MsanThreadStartFunc, t);
if (attr == &myattr)
@@ -1080,9 +1072,9 @@ INTERCEPTOR(void, tzset, int fake) {
InterceptorScope interceptor_scope;
REAL(tzset)(fake);
if (tzname[0])
- __msan_unpoison(tzname[0], REAL(strlen)(tzname[0]) + 1);
+ __msan_unpoison(tzname[0], internal_strlen(tzname[0]) + 1);
if (tzname[1])
- __msan_unpoison(tzname[1], REAL(strlen)(tzname[1]) + 1);
+ __msan_unpoison(tzname[1], internal_strlen(tzname[1]) + 1);
return;
}
@@ -1092,7 +1084,7 @@ struct MSanAtExitRecord {
};
struct InterceptorContext {
- BlockingMutex atexit_mu;
+ Mutex atexit_mu;
Vector<struct MSanAtExitRecord *> AtExitStack;
InterceptorContext()
@@ -1108,7 +1100,7 @@ InterceptorContext *interceptor_ctx() {
void MSanAtExitWrapper() {
MSanAtExitRecord *r;
{
- BlockingMutexLock l(&interceptor_ctx()->atexit_mu);
+ Lock l(&interceptor_ctx()->atexit_mu);
uptr element = interceptor_ctx()->AtExitStack.Size() - 1;
r = interceptor_ctx()->AtExitStack[element];
@@ -1142,7 +1134,7 @@ INTERCEPTOR(int, __cxa_atexit, void (*func)(void *), void *arg,
// Unpoison argument shadow for C++ module destructors.
INTERCEPTOR(int, atexit, void (*func)()) {
- // Avoid calling real atexit as it is unrechable on at least on Linux.
+ // Avoid calling real atexit as it is unreachable on at least on Linux.
if (msan_init_is_running)
return REAL(__cxa_atexit)((void (*)(void *a))func, 0, 0);
return setup_at_exit_wrapper((void(*)())func, 0, 0);
@@ -1159,7 +1151,7 @@ static int setup_at_exit_wrapper(void(*f)(), void *arg, void *dso) {
// NetBSD does not preserve the 2nd argument if dso is equal to 0
// Store ctx in a local stack-like structure
- BlockingMutexLock l(&interceptor_ctx()->atexit_mu);
+ Lock l(&interceptor_ctx()->atexit_mu);
res = REAL(__cxa_atexit)((void (*)(void *a))MSanAtExitWrapper, 0, 0);
if (!res) {
@@ -1256,13 +1248,13 @@ int OnExit() {
do { \
if (!INTERCEPT_FUNCTION_VER(name, ver)) \
VReport(1, "MemorySanitizer: failed to intercept '%s@@%s'\n", #name, \
- #ver); \
+ ver); \
} while (0)
#define MSAN_INTERCEPT_FUNC_VER_UNVERSIONED_FALLBACK(name, ver) \
do { \
if (!INTERCEPT_FUNCTION_VER(name, ver) && !INTERCEPT_FUNCTION(name)) \
VReport(1, "MemorySanitizer: failed to intercept '%s@@%s' or '%s'\n", \
- #name, #ver, #name); \
+ #name, ver, #name); \
} while (0)
#define COMMON_INTERCEPT_FUNCTION(name) MSAN_INTERCEPT_FUNC(name)
@@ -1278,14 +1270,15 @@ int OnExit() {
CHECK_UNPOISONED_CTX(ctx, ptr, size)
#define COMMON_INTERCEPTOR_INITIALIZE_RANGE(ptr, size) \
__msan_unpoison(ptr, size)
-#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
- if (msan_init_is_running) return REAL(func)(__VA_ARGS__); \
- ENSURE_MSAN_INITED(); \
- MSanInterceptorContext msan_ctx = {IsInInterceptorScope()}; \
- ctx = (void *)&msan_ctx; \
- (void)ctx; \
- InterceptorScope interceptor_scope; \
- __msan_unpoison(__errno_location(), sizeof(int)); /* NOLINT */
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
+ if (msan_init_is_running) \
+ return REAL(func)(__VA_ARGS__); \
+ ENSURE_MSAN_INITED(); \
+ MSanInterceptorContext msan_ctx = {IsInInterceptorScope()}; \
+ ctx = (void *)&msan_ctx; \
+ (void)ctx; \
+ InterceptorScope interceptor_scope; \
+ __msan_unpoison(__errno_location(), sizeof(int));
#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path) \
do { \
} while (false)
@@ -1454,9 +1447,9 @@ INTERCEPTOR(int, dladdr, void *addr, dlinfo *info) {
if (res != 0) {
__msan_unpoison(info, sizeof(*info));
if (info->dli_fname)
- __msan_unpoison(info->dli_fname, REAL(strlen)(info->dli_fname) + 1);
+ __msan_unpoison(info->dli_fname, internal_strlen(info->dli_fname) + 1);
if (info->dli_sname)
- __msan_unpoison(info->dli_sname, REAL(strlen)(info->dli_sname) + 1);
+ __msan_unpoison(info->dli_sname, internal_strlen(info->dli_sname) + 1);
}
return res;
}
@@ -1465,7 +1458,8 @@ INTERCEPTOR(char *, dlerror, int fake) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, dlerror, fake);
char *res = REAL(dlerror)(fake);
- if (res) __msan_unpoison(res, REAL(strlen)(res) + 1);
+ if (res)
+ __msan_unpoison(res, internal_strlen(res) + 1);
return res;
}
@@ -1483,7 +1477,7 @@ static int msan_dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
if (info->dlpi_phdr && info->dlpi_phnum)
__msan_unpoison(info->dlpi_phdr, struct_ElfW_Phdr_sz * info->dlpi_phnum);
if (info->dlpi_name)
- __msan_unpoison(info->dlpi_name, REAL(strlen)(info->dlpi_name) + 1);
+ __msan_unpoison(info->dlpi_name, internal_strlen(info->dlpi_name) + 1);
}
dl_iterate_phdr_data *cbdata = (dl_iterate_phdr_data *)data;
UnpoisonParam(3);
@@ -1525,7 +1519,7 @@ INTERCEPTOR(wchar_t *, wcscpy, wchar_t *dest, const wchar_t *src) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
wchar_t *res = REAL(wcscpy)(dest, src);
- CopyShadowAndOrigin(dest, src, sizeof(wchar_t) * (REAL(wcslen)(src) + 1),
+ CopyShadowAndOrigin(dest, src, sizeof(wchar_t) * (internal_wcslen(src) + 1),
&stack);
return res;
}
@@ -1533,7 +1527,7 @@ INTERCEPTOR(wchar_t *, wcscpy, wchar_t *dest, const wchar_t *src) {
INTERCEPTOR(wchar_t *, wcsncpy, wchar_t *dest, const wchar_t *src, SIZE_T n) {
ENSURE_MSAN_INITED();
GET_STORE_STACK_TRACE;
- SIZE_T copy_size = REAL(wcsnlen)(src, n);
+ SIZE_T copy_size = internal_wcsnlen(src, n);
if (copy_size < n) copy_size++; // trailing \0
wchar_t *res = REAL(wcsncpy)(dest, src, n);
CopyShadowAndOrigin(dest, src, copy_size * sizeof(wchar_t), &stack);
@@ -1597,7 +1591,7 @@ void *__msan_memmove(void *dest, const void *src, SIZE_T n) {
void __msan_unpoison_string(const char* s) {
if (!MEM_IS_APP(s)) return;
- __msan_unpoison(s, REAL(strlen)(s) + 1);
+ __msan_unpoison(s, internal_strlen(s) + 1);
}
namespace __msan {
@@ -1686,7 +1680,8 @@ void InitializeInterceptors() {
MSAN_MAYBE_INTERCEPT_FCVT;
MSAN_MAYBE_INTERCEPT_FSTAT;
MSAN_MAYBE_INTERCEPT___FXSTAT;
- MSAN_INTERCEPT_FSTATAT;
+ MSAN_MAYBE_INTERCEPT_FSTATAT;
+ MSAN_MAYBE_INTERCEPT___FXSTATAT;
MSAN_MAYBE_INTERCEPT___FXSTAT64;
MSAN_MAYBE_INTERCEPT___FXSTATAT64;
INTERCEPT_FUNCTION(pipe);
diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h
index 1edacbc7504f..c72c91c3c160 100644
--- a/compiler-rt/lib/msan/msan_interface_internal.h
+++ b/compiler-rt/lib/msan/msan_interface_internal.h
@@ -31,7 +31,7 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __msan_warning();
// Print a warning and die.
-// Intrumentation inserts calls to this function when building in "fast" mode
+// Instrumentation inserts calls to this function when building in "fast" mode
// (i.e. -mllvm -msan-keep-going)
SANITIZER_INTERFACE_ATTRIBUTE __attribute__((noreturn))
void __msan_warning_noreturn();
diff --git a/compiler-rt/lib/msan/msan_linux.cpp b/compiler-rt/lib/msan/msan_linux.cpp
index d5baee38e710..bced00ba2428 100644
--- a/compiler-rt/lib/msan/msan_linux.cpp
+++ b/compiler-rt/lib/msan/msan_linux.cpp
@@ -37,7 +37,7 @@ namespace __msan {
void ReportMapRange(const char *descr, uptr beg, uptr size) {
if (size > 0) {
uptr end = beg + size - 1;
- VPrintf(1, "%s : %p - %p\n", descr, beg, end);
+ VPrintf(1, "%s : 0x%zx - 0x%zx\n", descr, beg, end);
}
}
@@ -45,7 +45,7 @@ static bool CheckMemoryRangeAvailability(uptr beg, uptr size) {
if (size > 0) {
uptr end = beg + size - 1;
if (!MemoryRangeIsAvailable(beg, end)) {
- Printf("FATAL: Memory range %p - %p is not available.\n", beg, end);
+ Printf("FATAL: Memory range 0x%zx - 0x%zx is not available.\n", beg, end);
return false;
}
}
@@ -65,8 +65,8 @@ static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) {
}
if ((uptr)addr != beg) {
uptr end = beg + size - 1;
- Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg, end,
- name);
+ Printf("FATAL: Cannot protect memory range 0x%zx - 0x%zx (%s).\n", beg,
+ end, name);
return false;
}
}
@@ -106,7 +106,7 @@ static void CheckMemoryLayoutSanity() {
bool InitShadow(bool init_origins) {
// Let user know mapping parameters first.
- VPrintf(1, "__msan_init %p\n", &__msan_init);
+ VPrintf(1, "__msan_init %p\n", reinterpret_cast<void *>(&__msan_init));
for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout[i].name, kMemoryLayout[i].start,
kMemoryLayout[i].end - 1);
@@ -115,7 +115,7 @@ bool InitShadow(bool init_origins) {
if (!MEM_IS_APP(&__msan_init)) {
Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
- (uptr)&__msan_init);
+ reinterpret_cast<void *>(&__msan_init));
return false;
}
diff --git a/compiler-rt/lib/msan/msan_poisoning.cpp b/compiler-rt/lib/msan/msan_poisoning.cpp
index 15892392f74a..af01aa69f78f 100644
--- a/compiler-rt/lib/msan/msan_poisoning.cpp
+++ b/compiler-rt/lib/msan/msan_poisoning.cpp
@@ -14,6 +14,7 @@
#include "interception/interception.h"
#include "msan_origin.h"
+#include "msan_thread.h"
#include "sanitizer_common/sanitizer_common.h"
DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
@@ -241,6 +242,9 @@ void PoisonMemory(const void *dst, uptr size, StackTrace *stack) {
SetShadow(dst, size, (u8)-1);
if (__msan_get_track_origins()) {
+ MsanThread *t = GetCurrentThread();
+ if (t && t->InSignalHandler())
+ return;
Origin o = Origin::CreateHeapOrigin(stack);
SetOrigin(dst, size, o.raw_id());
}
diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp
index e10d9eb62231..ff3e38c7db9e 100644
--- a/compiler-rt/lib/msan/msan_report.cpp
+++ b/compiler-rt/lib/msan/msan_report.cpp
@@ -122,17 +122,17 @@ void ReportStats() {
ScopedErrorReportLock l;
if (__msan_get_track_origins() > 0) {
- StackDepotStats *stack_depot_stats = StackDepotGetStats();
+ StackDepotStats stack_depot_stats = StackDepotGetStats();
// FIXME: we want this at normal exit, too!
// FIXME: but only with verbosity=1 or something
- Printf("Unique heap origins: %zu\n", stack_depot_stats->n_uniq_ids);
- Printf("Stack depot allocated bytes: %zu\n", stack_depot_stats->allocated);
+ Printf("Unique heap origins: %zu\n", stack_depot_stats.n_uniq_ids);
+ Printf("Stack depot allocated bytes: %zu\n", stack_depot_stats.allocated);
- StackDepotStats *chained_origin_depot_stats = ChainedOriginDepotGetStats();
+ StackDepotStats chained_origin_depot_stats = ChainedOriginDepotGetStats();
Printf("Unique origin histories: %zu\n",
- chained_origin_depot_stats->n_uniq_ids);
+ chained_origin_depot_stats.n_uniq_ids);
Printf("History depot allocated bytes: %zu\n",
- chained_origin_depot_stats->allocated);
+ chained_origin_depot_stats.allocated);
}
}
@@ -201,13 +201,18 @@ void DescribeMemoryRange(const void *x, uptr size) {
Decorator d;
Printf("%s", d.Warning());
- Printf("Shadow map of [%p, %p), %zu bytes:\n", start, end, end - start);
+ uptr start_x = reinterpret_cast<uptr>(x);
+ Printf("Shadow map [%p, %p) of [%p, %p), %zu bytes:\n",
+ reinterpret_cast<void *>(start), reinterpret_cast<void *>(end),
+ reinterpret_cast<void *>(start_x),
+ reinterpret_cast<void *>(start_x + end - start), end - start);
Printf("%s", d.Default());
while (s < e) {
// Line start.
if (pos % 16 == 0) {
for (int i = 0; i < 4; ++i) origin_ids[i] = -1;
- Printf("%p:", s);
+ Printf("%p[%p]:", reinterpret_cast<void *>(s),
+ reinterpret_cast<void *>(start_x - start + s));
}
// Group start.
if (pos % 4 == 0) {
diff --git a/compiler-rt/lib/msan/msan_thread.cpp b/compiler-rt/lib/msan/msan_thread.cpp
index 6ae012acd9a2..40ad6a5019c4 100644
--- a/compiler-rt/lib/msan/msan_thread.cpp
+++ b/compiler-rt/lib/msan/msan_thread.cpp
@@ -66,8 +66,6 @@ void MsanThread::Destroy() {
}
thread_return_t MsanThread::ThreadStart() {
- Init();
-
if (!start_routine_) {
// start_routine_ == 0 if we're on the main thread or on one of the
// OS X libdispatch worker threads. But nobody is supposed to call
diff --git a/compiler-rt/lib/msan/msan_thread.h b/compiler-rt/lib/msan/msan_thread.h
index fe795e3a547a..f6ed1534cccd 100644
--- a/compiler-rt/lib/msan/msan_thread.h
+++ b/compiler-rt/lib/msan/msan_thread.h
@@ -15,7 +15,7 @@
#include "msan_allocator.h"
#include "sanitizer_common/sanitizer_common.h"
-
+#include "sanitizer_common/sanitizer_posix.h"
namespace __msan {
class MsanThread {
@@ -45,6 +45,7 @@ class MsanThread {
MsanThreadLocalMallocStorage &malloc_storage() { return malloc_storage_; }
int destructor_iterations_;
+ __sanitizer_sigset_t starting_sigset_;
private:
// NOTE: There is no MsanThread constructor. It is allocated
diff --git a/compiler-rt/lib/orc/c_api.h b/compiler-rt/lib/orc/c_api.h
index 6677da06ede5..47f46b891d96 100644
--- a/compiler-rt/lib/orc/c_api.h
+++ b/compiler-rt/lib/orc/c_api.h
@@ -50,7 +50,7 @@ ORC_RT_C_EXTERN_C_BEGIN
typedef union {
char *ValuePtr;
- char Value[sizeof(ValuePtr)];
+ char Value[sizeof(char *)];
} __orc_rt_CWrapperFunctionResultDataUnion;
/**
@@ -91,15 +91,15 @@ __orc_rt_CWrapperFunctionResultInit(__orc_rt_CWrapperFunctionResult *R) {
* Create an __orc_rt_CWrapperFunctionResult with an uninitialized buffer of
* size Size. The buffer is returned via the DataPtr argument.
*/
-static inline char *
-__orc_rt_CWrapperFunctionResultAllocate(__orc_rt_CWrapperFunctionResult *R,
- size_t Size) {
- R->Size = Size;
- if (Size <= sizeof(R->Data.Value))
- return R->Data.Value;
-
- R->Data.ValuePtr = (char *)malloc(Size);
- return R->Data.ValuePtr;
+static inline __orc_rt_CWrapperFunctionResult
+__orc_rt_CWrapperFunctionResultAllocate(size_t Size) {
+ __orc_rt_CWrapperFunctionResult R;
+ R.Size = Size;
+ // If Size is 0 ValuePtr must be 0 or it is considered an out-of-band error.
+ R.Data.ValuePtr = 0;
+ if (Size > sizeof(R.Data.Value))
+ R.Data.ValuePtr = (char *)malloc(Size);
+ return R;
}
/**
@@ -135,8 +135,8 @@ __orc_rt_CreateCWrapperFunctionResultFromString(const char *Source) {
* Create an __orc_rt_CWrapperFunctionResult representing an out-of-band
* error.
*
- * This function takes ownership of the string argument which must have been
- * allocated with malloc.
+ * This function copies the input string. The client is responsible for freeing
+ * the ErrMsg arg.
*/
static inline __orc_rt_CWrapperFunctionResult
__orc_rt_CreateCWrapperFunctionResultFromOutOfBandError(const char *ErrMsg) {
@@ -163,9 +163,9 @@ __orc_rt_DisposeCWrapperFunctionResult(__orc_rt_CWrapperFunctionResult *R) {
* Get a pointer to the data contained in the given
* __orc_rt_CWrapperFunctionResult.
*/
-static inline const char *
-__orc_rt_CWrapperFunctionResultData(const __orc_rt_CWrapperFunctionResult *R) {
- assert((R->Size != 0 || R->Data.ValuePtr == nullptr) &&
+static inline char *
+__orc_rt_CWrapperFunctionResultData(__orc_rt_CWrapperFunctionResult *R) {
+ assert((R->Size != 0 || R->Data.ValuePtr == NULL) &&
"Cannot get data for out-of-band error value");
return R->Size > sizeof(R->Data.Value) ? R->Data.ValuePtr : R->Data.Value;
}
@@ -177,7 +177,7 @@ __orc_rt_CWrapperFunctionResultData(const __orc_rt_CWrapperFunctionResult *R) {
*/
static inline size_t
__orc_rt_CWrapperFunctionResultSize(const __orc_rt_CWrapperFunctionResult *R) {
- assert((R->Size != 0 || R->Data.ValuePtr == nullptr) &&
+ assert((R->Size != 0 || R->Data.ValuePtr == NULL) &&
"Cannot get size for out-of-band error value");
return R->Size;
}
diff --git a/compiler-rt/lib/orc/elfnix_platform.cpp b/compiler-rt/lib/orc/elfnix_platform.cpp
new file mode 100644
index 000000000000..0352f6c4e853
--- /dev/null
+++ b/compiler-rt/lib/orc/elfnix_platform.cpp
@@ -0,0 +1,584 @@
+//===- elfnix_platform.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code required to load the rest of the ELF-on-*IX runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "elfnix_platform.h"
+#include "common.h"
+#include "error.h"
+#include "wrapper_function_utils.h"
+
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <unordered_map>
+#include <vector>
+
+using namespace __orc_rt;
+using namespace __orc_rt::elfnix;
+
+// Declare function tags for functions in the JIT process.
+ORC_RT_JIT_DISPATCH_TAG(__orc_rt_elfnix_get_initializers_tag)
+ORC_RT_JIT_DISPATCH_TAG(__orc_rt_elfnix_get_deinitializers_tag)
+ORC_RT_JIT_DISPATCH_TAG(__orc_rt_elfnix_symbol_lookup_tag)
+
+// eh-frame registration functions.
+// We expect these to be available for all processes.
+extern "C" void __register_frame(const void *);
+extern "C" void __deregister_frame(const void *);
+
+namespace {
+
+Error validatePointerSectionExtent(const char *SectionName,
+ const ExecutorAddrRange &SE) {
+ if (SE.size().getValue() % sizeof(uintptr_t)) {
+ std::ostringstream ErrMsg;
+ ErrMsg << std::hex << "Size of " << SectionName << " 0x"
+ << SE.Start.getValue() << " -- 0x" << SE.End.getValue()
+ << " is not a pointer multiple";
+ return make_error<StringError>(ErrMsg.str());
+ }
+ return Error::success();
+}
+
+Error runInitArray(const std::vector<ExecutorAddrRange> &InitArraySections,
+ const ELFNixJITDylibInitializers &MOJDIs) {
+
+ for (const auto &ModInits : InitArraySections) {
+ if (auto Err = validatePointerSectionExtent(".init_array", ModInits))
+ return Err;
+
+ using InitFunc = void (*)();
+ for (auto *Init : ModInits.toSpan<InitFunc>())
+ (*Init)();
+ }
+
+ return Error::success();
+}
+struct TLSInfoEntry {
+ unsigned long Key = 0;
+ unsigned long DataAddress = 0;
+};
+
+class ELFNixPlatformRuntimeState {
+private:
+ struct AtExitEntry {
+ void (*Func)(void *);
+ void *Arg;
+ };
+
+ using AtExitsVector = std::vector<AtExitEntry>;
+
+ struct PerJITDylibState {
+ void *Header = nullptr;
+ size_t RefCount = 0;
+ bool AllowReinitialization = false;
+ AtExitsVector AtExits;
+ };
+
+public:
+ static void initialize(void *DSOHandle);
+ static ELFNixPlatformRuntimeState &get();
+ static void destroy();
+
+ ELFNixPlatformRuntimeState(void *DSOHandle)
+ : PlatformJDDSOHandle(DSOHandle) {}
+
+ // Delete copy and move constructors.
+ ELFNixPlatformRuntimeState(const ELFNixPlatformRuntimeState &) = delete;
+ ELFNixPlatformRuntimeState &
+ operator=(const ELFNixPlatformRuntimeState &) = delete;
+ ELFNixPlatformRuntimeState(ELFNixPlatformRuntimeState &&) = delete;
+ ELFNixPlatformRuntimeState &operator=(ELFNixPlatformRuntimeState &&) = delete;
+
+ Error registerObjectSections(ELFNixPerObjectSectionsToRegister POSR);
+ Error deregisterObjectSections(ELFNixPerObjectSectionsToRegister POSR);
+
+ const char *dlerror();
+ void *dlopen(string_view Name, int Mode);
+ int dlclose(void *DSOHandle);
+ void *dlsym(void *DSOHandle, string_view Symbol);
+
+ int registerAtExit(void (*F)(void *), void *Arg, void *DSOHandle);
+ void runAtExits(void *DSOHandle);
+
+ /// Returns the base address of the section containing ThreadData.
+ Expected<std::pair<const char *, size_t>>
+ getThreadDataSectionFor(const char *ThreadData);
+
+ void *getPlatformJDDSOHandle() { return PlatformJDDSOHandle; }
+
+private:
+ PerJITDylibState *getJITDylibStateByHeaderAddr(void *DSOHandle);
+ PerJITDylibState *getJITDylibStateByName(string_view Path);
+ PerJITDylibState &
+ getOrCreateJITDylibState(ELFNixJITDylibInitializers &MOJDIs);
+
+ Error registerThreadDataSection(span<const char> ThreadDataSection);
+
+ Expected<ExecutorAddr> lookupSymbolInJITDylib(void *DSOHandle,
+ string_view Symbol);
+
+ Expected<ELFNixJITDylibInitializerSequence>
+ getJITDylibInitializersByName(string_view Path);
+ Expected<void *> dlopenInitialize(string_view Path, int Mode);
+ Error initializeJITDylib(ELFNixJITDylibInitializers &MOJDIs);
+
+ static ELFNixPlatformRuntimeState *MOPS;
+
+ using InitSectionHandler =
+ Error (*)(const std::vector<ExecutorAddrRange> &Sections,
+ const ELFNixJITDylibInitializers &MOJDIs);
+ const std::vector<std::pair<const char *, InitSectionHandler>> InitSections =
+ {{".init_array", runInitArray}};
+
+ void *PlatformJDDSOHandle;
+
+ // FIXME: Move to thread-state.
+ std::string DLFcnError;
+
+ std::recursive_mutex JDStatesMutex;
+ std::unordered_map<void *, PerJITDylibState> JDStates;
+ std::unordered_map<std::string, void *> JDNameToHeader;
+
+ std::mutex ThreadDataSectionsMutex;
+ std::map<const char *, size_t> ThreadDataSections;
+};
+
+ELFNixPlatformRuntimeState *ELFNixPlatformRuntimeState::MOPS = nullptr;
+
+void ELFNixPlatformRuntimeState::initialize(void *DSOHandle) {
+ assert(!MOPS && "ELFNixPlatformRuntimeState should be null");
+ MOPS = new ELFNixPlatformRuntimeState(DSOHandle);
+}
+
+ELFNixPlatformRuntimeState &ELFNixPlatformRuntimeState::get() {
+ assert(MOPS && "ELFNixPlatformRuntimeState not initialized");
+ return *MOPS;
+}
+
+void ELFNixPlatformRuntimeState::destroy() {
+ assert(MOPS && "ELFNixPlatformRuntimeState not initialized");
+ delete MOPS;
+}
+
+Error ELFNixPlatformRuntimeState::registerObjectSections(
+ ELFNixPerObjectSectionsToRegister POSR) {
+ if (POSR.EHFrameSection.Start)
+ __register_frame(POSR.EHFrameSection.Start.toPtr<const char *>());
+
+ if (POSR.ThreadDataSection.Start) {
+ if (auto Err = registerThreadDataSection(
+ POSR.ThreadDataSection.toSpan<const char>()))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+Error ELFNixPlatformRuntimeState::deregisterObjectSections(
+ ELFNixPerObjectSectionsToRegister POSR) {
+ if (POSR.EHFrameSection.Start)
+ __deregister_frame(POSR.EHFrameSection.Start.toPtr<const char *>());
+
+ return Error::success();
+}
+
+const char *ELFNixPlatformRuntimeState::dlerror() { return DLFcnError.c_str(); }
+
+void *ELFNixPlatformRuntimeState::dlopen(string_view Path, int Mode) {
+ std::lock_guard<std::recursive_mutex> Lock(JDStatesMutex);
+
+ // Use fast path if all JITDylibs are already loaded and don't require
+ // re-running initializers.
+ if (auto *JDS = getJITDylibStateByName(Path)) {
+ if (!JDS->AllowReinitialization) {
+ ++JDS->RefCount;
+ return JDS->Header;
+ }
+ }
+
+ auto H = dlopenInitialize(Path, Mode);
+ if (!H) {
+ DLFcnError = toString(H.takeError());
+ return nullptr;
+ }
+
+ return *H;
+}
+
+int ELFNixPlatformRuntimeState::dlclose(void *DSOHandle) {
+ runAtExits(DSOHandle);
+ return 0;
+}
+
+void *ELFNixPlatformRuntimeState::dlsym(void *DSOHandle, string_view Symbol) {
+ auto Addr = lookupSymbolInJITDylib(DSOHandle, Symbol);
+ if (!Addr) {
+ DLFcnError = toString(Addr.takeError());
+ return 0;
+ }
+
+ return Addr->toPtr<void *>();
+}
+
+int ELFNixPlatformRuntimeState::registerAtExit(void (*F)(void *), void *Arg,
+ void *DSOHandle) {
+ // FIXME: Handle out-of-memory errors, returning -1 if OOM.
+ std::lock_guard<std::recursive_mutex> Lock(JDStatesMutex);
+ auto *JDS = getJITDylibStateByHeaderAddr(DSOHandle);
+ assert(JDS && "JITDylib state not initialized");
+ JDS->AtExits.push_back({F, Arg});
+ return 0;
+}
+
+void ELFNixPlatformRuntimeState::runAtExits(void *DSOHandle) {
+ // FIXME: Should atexits be allowed to run concurrently with access to
+ // JDState?
+ AtExitsVector V;
+ {
+ std::lock_guard<std::recursive_mutex> Lock(JDStatesMutex);
+ auto *JDS = getJITDylibStateByHeaderAddr(DSOHandle);
+ assert(JDS && "JITDlybi state not initialized");
+ std::swap(V, JDS->AtExits);
+ }
+
+ while (!V.empty()) {
+ auto &AE = V.back();
+ AE.Func(AE.Arg);
+ V.pop_back();
+ }
+}
+
+Expected<std::pair<const char *, size_t>>
+ELFNixPlatformRuntimeState::getThreadDataSectionFor(const char *ThreadData) {
+ std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
+ auto I = ThreadDataSections.upper_bound(ThreadData);
+ // Check that we have a valid entry conovering this address.
+ if (I == ThreadDataSections.begin())
+ return make_error<StringError>("No thread local data section for key");
+ I = std::prev(I);
+ if (ThreadData >= I->first + I->second)
+ return make_error<StringError>("No thread local data section for key");
+ return *I;
+}
+
+ELFNixPlatformRuntimeState::PerJITDylibState *
+ELFNixPlatformRuntimeState::getJITDylibStateByHeaderAddr(void *DSOHandle) {
+ auto I = JDStates.find(DSOHandle);
+ if (I == JDStates.end())
+ return nullptr;
+ return &I->second;
+}
+
+ELFNixPlatformRuntimeState::PerJITDylibState *
+ELFNixPlatformRuntimeState::getJITDylibStateByName(string_view Name) {
+ // FIXME: Avoid creating string copy here.
+ auto I = JDNameToHeader.find(std::string(Name.data(), Name.size()));
+ if (I == JDNameToHeader.end())
+ return nullptr;
+ void *H = I->second;
+ auto J = JDStates.find(H);
+ assert(J != JDStates.end() &&
+ "JITDylib has name map entry but no header map entry");
+ return &J->second;
+}
+
+ELFNixPlatformRuntimeState::PerJITDylibState &
+ELFNixPlatformRuntimeState::getOrCreateJITDylibState(
+ ELFNixJITDylibInitializers &MOJDIs) {
+ void *Header = MOJDIs.DSOHandleAddress.toPtr<void *>();
+
+ auto &JDS = JDStates[Header];
+
+ // If this entry hasn't been created yet.
+ if (!JDS.Header) {
+ assert(!JDNameToHeader.count(MOJDIs.Name) &&
+ "JITDylib has header map entry but no name map entry");
+ JDNameToHeader[MOJDIs.Name] = Header;
+ JDS.Header = Header;
+ }
+
+ return JDS;
+}
+
+Error ELFNixPlatformRuntimeState::registerThreadDataSection(
+ span<const char> ThreadDataSection) {
+ std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
+ auto I = ThreadDataSections.upper_bound(ThreadDataSection.data());
+ if (I != ThreadDataSections.begin()) {
+ auto J = std::prev(I);
+ if (J->first + J->second > ThreadDataSection.data())
+ return make_error<StringError>("Overlapping .tdata sections");
+ }
+ ThreadDataSections.insert(
+ I, std::make_pair(ThreadDataSection.data(), ThreadDataSection.size()));
+ return Error::success();
+}
+
+Expected<ExecutorAddr>
+ELFNixPlatformRuntimeState::lookupSymbolInJITDylib(void *DSOHandle,
+ string_view Sym) {
+ Expected<ExecutorAddr> Result((ExecutorAddr()));
+ if (auto Err = WrapperFunction<SPSExpected<SPSExecutorAddr>(
+ SPSExecutorAddr, SPSString)>::call(&__orc_rt_elfnix_symbol_lookup_tag,
+ Result,
+ ExecutorAddr::fromPtr(DSOHandle),
+ Sym))
+ return std::move(Err);
+ return Result;
+}
+
+Expected<ELFNixJITDylibInitializerSequence>
+ELFNixPlatformRuntimeState::getJITDylibInitializersByName(string_view Path) {
+ Expected<ELFNixJITDylibInitializerSequence> Result(
+ (ELFNixJITDylibInitializerSequence()));
+ std::string PathStr(Path.data(), Path.size());
+ if (auto Err =
+ WrapperFunction<SPSExpected<SPSELFNixJITDylibInitializerSequence>(
+ SPSString)>::call(&__orc_rt_elfnix_get_initializers_tag, Result,
+ Path))
+ return std::move(Err);
+ return Result;
+}
+
+Expected<void *> ELFNixPlatformRuntimeState::dlopenInitialize(string_view Path,
+ int Mode) {
+ // Either our JITDylib wasn't loaded, or it or one of its dependencies allows
+ // reinitialization. We need to call in to the JIT to see if there's any new
+ // work pending.
+ auto InitSeq = getJITDylibInitializersByName(Path);
+ if (!InitSeq)
+ return InitSeq.takeError();
+
+ // Init sequences should be non-empty.
+ if (InitSeq->empty())
+ return make_error<StringError>(
+ "__orc_rt_elfnix_get_initializers returned an "
+ "empty init sequence");
+
+ // Otherwise register and run initializers for each JITDylib.
+ for (auto &MOJDIs : *InitSeq)
+ if (auto Err = initializeJITDylib(MOJDIs))
+ return std::move(Err);
+
+ // Return the header for the last item in the list.
+ auto *JDS = getJITDylibStateByHeaderAddr(
+ InitSeq->back().DSOHandleAddress.toPtr<void *>());
+ assert(JDS && "Missing state entry for JD");
+ return JDS->Header;
+}
+
+Error ELFNixPlatformRuntimeState::initializeJITDylib(
+ ELFNixJITDylibInitializers &MOJDIs) {
+
+ auto &JDS = getOrCreateJITDylibState(MOJDIs);
+ ++JDS.RefCount;
+
+ for (auto &KV : InitSections) {
+ const auto &Name = KV.first;
+ const auto &Handler = KV.second;
+ auto I = MOJDIs.InitSections.find(Name);
+ if (I != MOJDIs.InitSections.end()) {
+ if (auto Err = Handler(I->second, MOJDIs))
+ return Err;
+ }
+ }
+
+ return Error::success();
+}
+class ELFNixPlatformRuntimeTLVManager {
+public:
+ void *getInstance(const char *ThreadData);
+
+private:
+ std::unordered_map<const char *, char *> Instances;
+ std::unordered_map<const char *, std::unique_ptr<char[]>> AllocatedSections;
+};
+
+void *ELFNixPlatformRuntimeTLVManager::getInstance(const char *ThreadData) {
+ auto I = Instances.find(ThreadData);
+ if (I != Instances.end())
+ return I->second;
+ auto TDS =
+ ELFNixPlatformRuntimeState::get().getThreadDataSectionFor(ThreadData);
+ if (!TDS) {
+ __orc_rt_log_error(toString(TDS.takeError()).c_str());
+ return nullptr;
+ }
+
+ auto &Allocated = AllocatedSections[TDS->first];
+ if (!Allocated) {
+ Allocated = std::make_unique<char[]>(TDS->second);
+ memcpy(Allocated.get(), TDS->first, TDS->second);
+ }
+ size_t ThreadDataDelta = ThreadData - TDS->first;
+ assert(ThreadDataDelta <= TDS->second && "ThreadData outside section bounds");
+
+ char *Instance = Allocated.get() + ThreadDataDelta;
+ Instances[ThreadData] = Instance;
+ return Instance;
+}
+
+void destroyELFNixTLVMgr(void *ELFNixTLVMgr) {
+ delete static_cast<ELFNixPlatformRuntimeTLVManager *>(ELFNixTLVMgr);
+}
+
+} // end anonymous namespace
+
+//------------------------------------------------------------------------------
+// JIT entry points
+//------------------------------------------------------------------------------
+
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_elfnix_platform_bootstrap(char *ArgData, size_t ArgSize) {
+ return WrapperFunction<void(uint64_t)>::handle(
+ ArgData, ArgSize,
+ [](uint64_t &DSOHandle) {
+ ELFNixPlatformRuntimeState::initialize(
+ reinterpret_cast<void *>(DSOHandle));
+ })
+ .release();
+}
+
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_elfnix_platform_shutdown(char *ArgData, size_t ArgSize) {
+ ELFNixPlatformRuntimeState::destroy();
+ return WrapperFunctionResult().release();
+}
+
+/// Wrapper function for registering metadata on a per-object basis.
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_elfnix_register_object_sections(char *ArgData, size_t ArgSize) {
+ return WrapperFunction<SPSError(SPSELFNixPerObjectSectionsToRegister)>::
+ handle(ArgData, ArgSize,
+ [](ELFNixPerObjectSectionsToRegister &POSR) {
+ return ELFNixPlatformRuntimeState::get().registerObjectSections(
+ std::move(POSR));
+ })
+ .release();
+}
+
+/// Wrapper for releasing per-object metadat.
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_elfnix_deregister_object_sections(char *ArgData, size_t ArgSize) {
+ return WrapperFunction<SPSError(SPSELFNixPerObjectSectionsToRegister)>::
+ handle(ArgData, ArgSize,
+ [](ELFNixPerObjectSectionsToRegister &POSR) {
+ return ELFNixPlatformRuntimeState::get()
+ .deregisterObjectSections(std::move(POSR));
+ })
+ .release();
+}
+
+//------------------------------------------------------------------------------
+// TLV support
+//------------------------------------------------------------------------------
+
+ORC_RT_INTERFACE void *__orc_rt_elfnix_tls_get_addr_impl(TLSInfoEntry *D) {
+ auto *TLVMgr = static_cast<ELFNixPlatformRuntimeTLVManager *>(
+ pthread_getspecific(D->Key));
+ if (!TLVMgr)
+ TLVMgr = new ELFNixPlatformRuntimeTLVManager();
+ if (pthread_setspecific(D->Key, TLVMgr)) {
+ __orc_rt_log_error("Call to pthread_setspecific failed");
+ return nullptr;
+ }
+
+ return TLVMgr->getInstance(
+ reinterpret_cast<char *>(static_cast<uintptr_t>(D->DataAddress)));
+}
+
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_elfnix_create_pthread_key(char *ArgData, size_t ArgSize) {
+ return WrapperFunction<SPSExpected<uint64_t>(void)>::handle(
+ ArgData, ArgSize,
+ []() -> Expected<uint64_t> {
+ pthread_key_t Key;
+ if (int Err = pthread_key_create(&Key, destroyELFNixTLVMgr)) {
+ __orc_rt_log_error("Call to pthread_key_create failed");
+ return make_error<StringError>(strerror(Err));
+ }
+ return static_cast<uint64_t>(Key);
+ })
+ .release();
+}
+
+//------------------------------------------------------------------------------
+// cxa_atexit support
+//------------------------------------------------------------------------------
+
+int __orc_rt_elfnix_cxa_atexit(void (*func)(void *), void *arg,
+ void *dso_handle) {
+ return ELFNixPlatformRuntimeState::get().registerAtExit(func, arg,
+ dso_handle);
+}
+
+int __orc_rt_elfnix_atexit(void (*func)(void *)) {
+ auto &PlatformRTState = ELFNixPlatformRuntimeState::get();
+ return ELFNixPlatformRuntimeState::get().registerAtExit(
+ func, NULL, PlatformRTState.getPlatformJDDSOHandle());
+}
+
+void __orc_rt_elfnix_cxa_finalize(void *dso_handle) {
+ ELFNixPlatformRuntimeState::get().runAtExits(dso_handle);
+}
+
+//------------------------------------------------------------------------------
+// JIT'd dlfcn alternatives.
+//------------------------------------------------------------------------------
+
+const char *__orc_rt_elfnix_jit_dlerror() {
+ return ELFNixPlatformRuntimeState::get().dlerror();
+}
+
+void *__orc_rt_elfnix_jit_dlopen(const char *path, int mode) {
+ return ELFNixPlatformRuntimeState::get().dlopen(path, mode);
+}
+
+int __orc_rt_elfnix_jit_dlclose(void *dso_handle) {
+ return ELFNixPlatformRuntimeState::get().dlclose(dso_handle);
+}
+
+void *__orc_rt_elfnix_jit_dlsym(void *dso_handle, const char *symbol) {
+ return ELFNixPlatformRuntimeState::get().dlsym(dso_handle, symbol);
+}
+
+//------------------------------------------------------------------------------
+// ELFNix Run Program
+//------------------------------------------------------------------------------
+
+ORC_RT_INTERFACE int64_t __orc_rt_elfnix_run_program(
+ const char *JITDylibName, const char *EntrySymbolName, int argc,
+ char *argv[]) {
+ using MainTy = int (*)(int, char *[]);
+
+ void *H = __orc_rt_elfnix_jit_dlopen(JITDylibName,
+ __orc_rt::elfnix::ORC_RT_RTLD_LAZY);
+ if (!H) {
+ __orc_rt_log_error(__orc_rt_elfnix_jit_dlerror());
+ return -1;
+ }
+
+ auto *Main =
+ reinterpret_cast<MainTy>(__orc_rt_elfnix_jit_dlsym(H, EntrySymbolName));
+
+ if (!Main) {
+ __orc_rt_log_error(__orc_rt_elfnix_jit_dlerror());
+ return -1;
+ }
+
+ int Result = Main(argc, argv);
+
+ if (__orc_rt_elfnix_jit_dlclose(H) == -1)
+ __orc_rt_log_error(__orc_rt_elfnix_jit_dlerror());
+
+ return Result;
+}
diff --git a/compiler-rt/lib/orc/elfnix_platform.h b/compiler-rt/lib/orc/elfnix_platform.h
new file mode 100644
index 000000000000..12b9591979b7
--- /dev/null
+++ b/compiler-rt/lib/orc/elfnix_platform.h
@@ -0,0 +1,131 @@
+//===- elfnix_platform.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ORC Runtime support for dynamic loading features on ELF-based platforms.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ORC_RT_ELFNIX_PLATFORM_H
+#define ORC_RT_ELFNIX_PLATFORM_H
+
+#include "common.h"
+#include "executor_address.h"
+
+// Atexit functions.
+ORC_RT_INTERFACE int __orc_rt_elfnix_cxa_atexit(void (*func)(void *), void *arg,
+ void *dso_handle);
+ORC_RT_INTERFACE int __orc_rt_elfnix_atexit(void (*func)(void *));
+ORC_RT_INTERFACE void __orc_rt_elfnix_cxa_finalize(void *dso_handle);
+
+// dlfcn functions.
+ORC_RT_INTERFACE const char *__orc_rt_elfnix_jit_dlerror();
+ORC_RT_INTERFACE void *__orc_rt_elfnix_jit_dlopen(const char *path, int mode);
+ORC_RT_INTERFACE int __orc_rt_elfnix_jit_dlclose(void *dso_handle);
+ORC_RT_INTERFACE void *__orc_rt_elfnix_jit_dlsym(void *dso_handle,
+ const char *symbol);
+
+namespace __orc_rt {
+namespace elfnix {
+
+struct ELFNixPerObjectSectionsToRegister {
+ ExecutorAddrRange EHFrameSection;
+ ExecutorAddrRange ThreadDataSection;
+};
+
+struct ELFNixJITDylibInitializers {
+ using SectionList = std::vector<ExecutorAddrRange>;
+
+ ELFNixJITDylibInitializers() = default;
+ ELFNixJITDylibInitializers(std::string Name, ExecutorAddr DSOHandleAddress)
+ : Name(std::move(Name)), DSOHandleAddress(std::move(DSOHandleAddress)) {}
+
+ std::string Name;
+ ExecutorAddr DSOHandleAddress;
+
+ std::unordered_map<std::string, SectionList> InitSections;
+};
+
+class ELFNixJITDylibDeinitializers {};
+
+using ELFNixJITDylibInitializerSequence =
+ std::vector<ELFNixJITDylibInitializers>;
+
+using ELFNixJITDylibDeinitializerSequence =
+ std::vector<ELFNixJITDylibDeinitializers>;
+
+enum dlopen_mode : int {
+ ORC_RT_RTLD_LAZY = 0x1,
+ ORC_RT_RTLD_NOW = 0x2,
+ ORC_RT_RTLD_LOCAL = 0x4,
+ ORC_RT_RTLD_GLOBAL = 0x8
+};
+
+} // end namespace elfnix
+
+using SPSELFNixPerObjectSectionsToRegister =
+ SPSTuple<SPSExecutorAddrRange, SPSExecutorAddrRange>;
+
+template <>
+class SPSSerializationTraits<SPSELFNixPerObjectSectionsToRegister,
+ elfnix::ELFNixPerObjectSectionsToRegister> {
+
+public:
+ static size_t size(const elfnix::ELFNixPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFNixPerObjectSectionsToRegister::AsArgList::size(
+ MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+
+ static bool
+ serialize(SPSOutputBuffer &OB,
+ const elfnix::ELFNixPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFNixPerObjectSectionsToRegister::AsArgList::serialize(
+ OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ elfnix::ELFNixPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFNixPerObjectSectionsToRegister::AsArgList::deserialize(
+ IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+};
+
+using SPSNamedExecutorAddrRangeSequenceMap =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
+
+using SPSELFNixJITDylibInitializers =
+ SPSTuple<SPSString, SPSExecutorAddr, SPSNamedExecutorAddrRangeSequenceMap>;
+
+using SPSELFNixJITDylibInitializerSequence =
+ SPSSequence<SPSELFNixJITDylibInitializers>;
+
+/// Serialization traits for ELFNixJITDylibInitializers.
+template <>
+class SPSSerializationTraits<SPSELFNixJITDylibInitializers,
+ elfnix::ELFNixJITDylibInitializers> {
+public:
+ static size_t size(const elfnix::ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::size(
+ MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const elfnix::ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::serialize(
+ OB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ elfnix::ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::deserialize(
+ IB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+};
+
+} // end namespace __orc_rt
+
+#endif // ORC_RT_ELFNIX_PLATFORM_H
diff --git a/compiler-rt/lib/orc/elfnix_tls.x86-64.S b/compiler-rt/lib/orc/elfnix_tls.x86-64.S
new file mode 100644
index 000000000000..b3e0bef00867
--- /dev/null
+++ b/compiler-rt/lib/orc/elfnix_tls.x86-64.S
@@ -0,0 +1,64 @@
+
+//===-- orc_rt_elfnix_tls_x86-64.s -------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+// The content of this file is x86_64-only
+#if defined(__x86_64__)
+
+#define REGISTER_SAVE_SPACE_SIZE 512
+
+ .text
+
+ // returns address of TLV in %rax, all other registers preserved
+ .globl ___orc_rt_elfnix_tls_get_addr
+___orc_rt_elfnix_tls_get_addr:
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $REGISTER_SAVE_SPACE_SIZE, %rsp
+ movq %rcx, -16(%rbp)
+ movq %rdx, -24(%rbp)
+ movq %rsi, -32(%rbp)
+ movq %rdi, -40(%rbp)
+ movq %r8, -48(%rbp)
+ movq %r9, -56(%rbp)
+ movq %r10, -64(%rbp)
+ movq %r11, -72(%rbp)
+ movdqa %xmm0, -128(%rbp)
+ movdqa %xmm1, -144(%rbp)
+ movdqa %xmm2, -160(%rbp)
+ movdqa %xmm3, -176(%rbp)
+ movdqa %xmm4, -192(%rbp)
+ movdqa %xmm5, -208(%rbp)
+ movdqa %xmm6, -224(%rbp)
+ movdqa %xmm7, -240(%rbp)
+ call __orc_rt_elfnix_tls_get_addr_impl
+ movq -16(%rbp), %rcx
+ movq -24(%rbp), %rdx
+ movq -32(%rbp), %rsi
+ movq -40(%rbp), %rdi
+ movq -48(%rbp), %r8
+ movq -56(%rbp), %r9
+ movq -64(%rbp), %r10
+ movq -72(%rbp), %r11
+ movdqa -128(%rbp), %xmm0
+ movdqa -144(%rbp), %xmm1
+ movdqa -160(%rbp), %xmm2
+ movdqa -176(%rbp), %xmm3
+ movdqa -192(%rbp), %xmm4
+ movdqa -208(%rbp), %xmm5
+ movdqa -224(%rbp), %xmm6
+ movdqa -240(%rbp), %xmm7
+ addq $REGISTER_SAVE_SPACE_SIZE, %rsp
+ popq %rbp
+ ret
+
+#endif // defined(__x86_64__)
diff --git a/compiler-rt/lib/orc/executor_address.h b/compiler-rt/lib/orc/executor_address.h
index cfe985bdb60f..79ad9b7f1409 100644
--- a/compiler-rt/lib/orc/executor_address.h
+++ b/compiler-rt/lib/orc/executor_address.h
@@ -37,19 +37,19 @@ private:
};
/// Represents an address in the executor process.
-class ExecutorAddress {
+class ExecutorAddr {
public:
- ExecutorAddress() = default;
- explicit ExecutorAddress(uint64_t Addr) : Addr(Addr) {}
+ ExecutorAddr() = default;
+ explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {}
- /// Create an ExecutorAddress from the given pointer.
+ /// Create an ExecutorAddr from the given pointer.
/// Warning: This should only be used when JITing in-process.
- template <typename T> static ExecutorAddress fromPtr(T *Value) {
- return ExecutorAddress(
+ template <typename T> static ExecutorAddr fromPtr(T *Value) {
+ return ExecutorAddr(
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Value)));
}
- /// Cast this ExecutorAddress to a pointer of the given type.
+ /// Cast this ExecutorAddr to a pointer of the given type.
/// Warning: This should only be esude when JITing in-process.
template <typename T> T toPtr() const {
static_assert(std::is_pointer<T>::value, "T must be a pointer type");
@@ -65,53 +65,47 @@ public:
explicit operator bool() const { return Addr != 0; }
- friend bool operator==(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator==(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr == RHS.Addr;
}
- friend bool operator!=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator!=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr != RHS.Addr;
}
- friend bool operator<(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator<(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr < RHS.Addr;
}
- friend bool operator<=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator<=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr <= RHS.Addr;
}
- friend bool operator>(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator>(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr > RHS.Addr;
}
- friend bool operator>=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator>=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr >= RHS.Addr;
}
- ExecutorAddress &operator++() {
+ ExecutorAddr &operator++() {
++Addr;
return *this;
}
- ExecutorAddress &operator--() {
+ ExecutorAddr &operator--() {
--Addr;
return *this;
}
- ExecutorAddress operator++(int) { return ExecutorAddress(Addr++); }
- ExecutorAddress operator--(int) { return ExecutorAddress(Addr++); }
+ ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); }
+ ExecutorAddr operator--(int) { return ExecutorAddr(Addr++); }
- ExecutorAddress &operator+=(const ExecutorAddrDiff Delta) {
+ ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) {
Addr += Delta.getValue();
return *this;
}
- ExecutorAddress &operator-=(const ExecutorAddrDiff Delta) {
+ ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) {
Addr -= Delta.getValue();
return *this;
}
@@ -121,87 +115,100 @@ private:
};
/// Subtracting two addresses yields an offset.
-inline ExecutorAddrDiff operator-(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+inline ExecutorAddrDiff operator-(const ExecutorAddr &LHS,
+ const ExecutorAddr &RHS) {
return ExecutorAddrDiff(LHS.getValue() - RHS.getValue());
}
/// Adding an offset and an address yields an address.
-inline ExecutorAddress operator+(const ExecutorAddress &LHS,
- const ExecutorAddrDiff &RHS) {
- return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddr &LHS,
+ const ExecutorAddrDiff &RHS) {
+ return ExecutorAddr(LHS.getValue() + RHS.getValue());
}
/// Adding an address and an offset yields an address.
-inline ExecutorAddress operator+(const ExecutorAddrDiff &LHS,
- const ExecutorAddress &RHS) {
- return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS,
+ const ExecutorAddr &RHS) {
+ return ExecutorAddr(LHS.getValue() + RHS.getValue());
}
/// Represents an address range in the exceutor process.
-struct ExecutorAddressRange {
- ExecutorAddressRange() = default;
- ExecutorAddressRange(ExecutorAddress StartAddress, ExecutorAddress EndAddress)
- : StartAddress(StartAddress), EndAddress(EndAddress) {}
+struct ExecutorAddrRange {
+ ExecutorAddrRange() = default;
+ ExecutorAddrRange(ExecutorAddr Start, ExecutorAddr End)
+ : Start(Start), End(End) {}
+ ExecutorAddrRange(ExecutorAddr Start, ExecutorAddrDiff Size)
+ : Start(Start), End(Start + Size) {}
- bool empty() const { return StartAddress == EndAddress; }
- ExecutorAddrDiff size() const { return EndAddress - StartAddress; }
+ bool empty() const { return Start == End; }
+ ExecutorAddrDiff size() const { return End - Start; }
+
+ friend bool operator==(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start == RHS.Start && LHS.End == RHS.End;
+ }
+ friend bool operator!=(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return !(LHS == RHS);
+ }
+ bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; }
+ bool overlaps(const ExecutorAddrRange &Other) {
+ return !(Other.End <= Start || End <= Other.Start);
+ }
template <typename T> span<T> toSpan() const {
assert(size().getValue() % sizeof(T) == 0 &&
"AddressRange is not a multiple of sizeof(T)");
- return span<T>(StartAddress.toPtr<T *>(), size().getValue() / sizeof(T));
+ return span<T>(Start.toPtr<T *>(), size().getValue() / sizeof(T));
}
- ExecutorAddress StartAddress;
- ExecutorAddress EndAddress;
+ ExecutorAddr Start;
+ ExecutorAddr End;
};
-/// SPS serializatior for ExecutorAddress.
-template <> class SPSSerializationTraits<SPSExecutorAddress, ExecutorAddress> {
+/// SPS serializatior for ExecutorAddr.
+template <> class SPSSerializationTraits<SPSExecutorAddr, ExecutorAddr> {
public:
- static size_t size(const ExecutorAddress &EA) {
+ static size_t size(const ExecutorAddr &EA) {
return SPSArgList<uint64_t>::size(EA.getValue());
}
- static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddress &EA) {
+ static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddr &EA) {
return SPSArgList<uint64_t>::serialize(BOB, EA.getValue());
}
- static bool deserialize(SPSInputBuffer &BIB, ExecutorAddress &EA) {
+ static bool deserialize(SPSInputBuffer &BIB, ExecutorAddr &EA) {
uint64_t Tmp;
if (!SPSArgList<uint64_t>::deserialize(BIB, Tmp))
return false;
- EA = ExecutorAddress(Tmp);
+ EA = ExecutorAddr(Tmp);
return true;
}
};
-using SPSExecutorAddressRange =
- SPSTuple<SPSExecutorAddress, SPSExecutorAddress>;
+using SPSExecutorAddrRange = SPSTuple<SPSExecutorAddr, SPSExecutorAddr>;
/// Serialization traits for address ranges.
template <>
-class SPSSerializationTraits<SPSExecutorAddressRange, ExecutorAddressRange> {
+class SPSSerializationTraits<SPSExecutorAddrRange, ExecutorAddrRange> {
public:
- static size_t size(const ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::size(
- Value.StartAddress, Value.EndAddress);
+ static size_t size(const ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::size(Value.Start,
+ Value.End);
}
- static bool serialize(SPSOutputBuffer &BOB,
- const ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::serialize(
- BOB, Value.StartAddress, Value.EndAddress);
+ static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::serialize(
+ BOB, Value.Start, Value.End);
}
- static bool deserialize(SPSInputBuffer &BIB, ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::deserialize(
- BIB, Value.StartAddress, Value.EndAddress);
+ static bool deserialize(SPSInputBuffer &BIB, ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::deserialize(
+ BIB, Value.Start, Value.End);
}
};
-using SPSExecutorAddressRangeSequence = SPSSequence<SPSExecutorAddressRange>;
+using SPSExecutorAddrRangeSequence = SPSSequence<SPSExecutorAddrRange>;
} // End namespace __orc_rt
diff --git a/compiler-rt/lib/orc/macho_ehframe_registration.cpp b/compiler-rt/lib/orc/macho_ehframe_registration.cpp
new file mode 100644
index 000000000000..d0ea7e70201c
--- /dev/null
+++ b/compiler-rt/lib/orc/macho_ehframe_registration.cpp
@@ -0,0 +1,68 @@
+//===- ehframe_registration.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code required to load the rest of the MachO runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "adt.h"
+#include "c_api.h"
+#include "common.h"
+
+using namespace __orc_rt;
+
+// eh-frame registration functions.
+// We expect these to be available for all processes.
+extern "C" void __register_frame(const void *);
+extern "C" void __deregister_frame(const void *);
+
+namespace {
+
+template <typename HandleFDEFn>
+void walkEHFrameSection(span<const char> EHFrameSection,
+ HandleFDEFn HandleFDE) {
+ const char *CurCFIRecord = EHFrameSection.data();
+ uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+
+ while (CurCFIRecord != EHFrameSection.end() && Size != 0) {
+ const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
+ if (Size == 0xffffffff)
+ Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
+ else
+ Size += 4;
+ uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
+
+ if (Offset != 0)
+ HandleFDE(CurCFIRecord);
+
+ CurCFIRecord += Size;
+ Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+ }
+}
+
+} // end anonymous namespace
+
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_macho_register_ehframe_section(char *ArgData, size_t ArgSize) {
+ // NOTE: Does not use SPS to deserialize arg buffer, instead the arg buffer
+ // is taken to be the range of the eh-frame section.
+ bool HasError = false;
+ walkEHFrameSection(span<const char>(ArgData, ArgSize), __register_frame);
+ return __orc_rt_CreateCWrapperFunctionResultFromRange((char*)&HasError,
+ sizeof(HasError));
+}
+
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_macho_deregister_ehframe_section(char *ArgData, size_t ArgSize) {
+ // NOTE: Does not use SPS to deserialize arg buffer, instead the arg buffer
+ // is taken to be the range of the eh-frame section.
+ bool HasError = false;
+ walkEHFrameSection(span<const char>(ArgData, ArgSize), __deregister_frame);
+ return __orc_rt_CreateCWrapperFunctionResultFromRange((char*)&HasError,
+ sizeof(HasError));
+}
diff --git a/compiler-rt/lib/orc/macho_platform.cpp b/compiler-rt/lib/orc/macho_platform.cpp
index 2a960fb548fa..32770219e65c 100644
--- a/compiler-rt/lib/orc/macho_platform.cpp
+++ b/compiler-rt/lib/orc/macho_platform.cpp
@@ -29,11 +29,6 @@ ORC_RT_JIT_DISPATCH_TAG(__orc_rt_macho_get_initializers_tag)
ORC_RT_JIT_DISPATCH_TAG(__orc_rt_macho_get_deinitializers_tag)
ORC_RT_JIT_DISPATCH_TAG(__orc_rt_macho_symbol_lookup_tag)
-// eh-frame registration functions.
-// We expect these to be available for all processes.
-extern "C" void __register_frame(const void *);
-extern "C" void __deregister_frame(const void *);
-
// Objective-C types.
struct objc_class;
struct objc_image_info;
@@ -55,6 +50,7 @@ extern "C" SEL sel_registerName(const char *) ORC_RT_WEAK_IMPORT;
// Swift types.
class ProtocolRecord;
class ProtocolConformanceRecord;
+class TypeMetadataRecord;
extern "C" void
swift_registerProtocols(const ProtocolRecord *begin,
@@ -64,36 +60,18 @@ extern "C" void swift_registerProtocolConformances(
const ProtocolConformanceRecord *begin,
const ProtocolConformanceRecord *end) ORC_RT_WEAK_IMPORT;
-namespace {
+extern "C" void swift_registerTypeMetadataRecords(
+ const TypeMetadataRecord *begin,
+ const TypeMetadataRecord *end) ORC_RT_WEAK_IMPORT;
-template <typename HandleFDEFn>
-void walkEHFrameSection(span<const char> EHFrameSection,
- HandleFDEFn HandleFDE) {
- const char *CurCFIRecord = EHFrameSection.data();
- uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
-
- while (CurCFIRecord != EHFrameSection.end() && Size != 0) {
- const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
- if (Size == 0xffffffff)
- Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
- else
- Size += 4;
- uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
-
- if (Offset != 0)
- HandleFDE(CurCFIRecord);
-
- CurCFIRecord += Size;
- Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
- }
-}
+namespace {
Error validatePointerSectionExtent(const char *SectionName,
- const ExecutorAddressRange &SE) {
+ const ExecutorAddrRange &SE) {
if (SE.size().getValue() % sizeof(uintptr_t)) {
std::ostringstream ErrMsg;
ErrMsg << std::hex << "Size of " << SectionName << " 0x"
- << SE.StartAddress.getValue() << " -- 0x" << SE.EndAddress.getValue()
+ << SE.Start.getValue() << " -- 0x" << SE.End.getValue()
<< " is not a pointer multiple";
return make_error<StringError>(ErrMsg.str());
}
@@ -101,7 +79,7 @@ Error validatePointerSectionExtent(const char *SectionName,
}
Error registerObjCSelectors(
- const std::vector<ExecutorAddressRange> &ObjCSelRefsSections,
+ const std::vector<ExecutorAddrRange> &ObjCSelRefsSections,
const MachOJITDylibInitializers &MOJDIs) {
if (ORC_RT_UNLIKELY(!sel_registerName))
@@ -112,13 +90,10 @@ Error registerObjCSelectors(
if (auto Err = validatePointerSectionExtent("__objc_selrefs", ObjCSelRefs))
return Err;
- fprintf(stderr, "Processing selrefs section at 0x%llx\n",
- ObjCSelRefs.StartAddress.getValue());
- for (uintptr_t SelEntry : ObjCSelRefs.toSpan<uintptr_t>()) {
+ for (uintptr_t &SelEntry : ObjCSelRefs.toSpan<uintptr_t>()) {
const char *SelName = reinterpret_cast<const char *>(SelEntry);
- fprintf(stderr, "Registering selector \"%s\"\n", SelName);
auto Sel = sel_registerName(SelName);
- *reinterpret_cast<SEL *>(SelEntry) = Sel;
+ *reinterpret_cast<SEL *>(&SelEntry) = Sel;
}
}
@@ -126,7 +101,7 @@ Error registerObjCSelectors(
}
Error registerObjCClasses(
- const std::vector<ExecutorAddressRange> &ObjCClassListSections,
+ const std::vector<ExecutorAddrRange> &ObjCClassListSections,
const MachOJITDylibInitializers &MOJDIs) {
if (ObjCClassListSections.empty())
@@ -170,7 +145,7 @@ Error registerObjCClasses(
}
Error registerSwift5Protocols(
- const std::vector<ExecutorAddressRange> &Swift5ProtocolSections,
+ const std::vector<ExecutorAddrRange> &Swift5ProtocolSections,
const MachOJITDylibInitializers &MOJDIs) {
if (ORC_RT_UNLIKELY(!Swift5ProtocolSections.empty() &&
@@ -179,14 +154,14 @@ Error registerSwift5Protocols(
for (const auto &Swift5Protocols : Swift5ProtocolSections)
swift_registerProtocols(
- Swift5Protocols.StartAddress.toPtr<const ProtocolRecord *>(),
- Swift5Protocols.EndAddress.toPtr<const ProtocolRecord *>());
+ Swift5Protocols.Start.toPtr<const ProtocolRecord *>(),
+ Swift5Protocols.End.toPtr<const ProtocolRecord *>());
return Error::success();
}
Error registerSwift5ProtocolConformances(
- const std::vector<ExecutorAddressRange> &Swift5ProtocolConformanceSections,
+ const std::vector<ExecutorAddrRange> &Swift5ProtocolConformanceSections,
const MachOJITDylibInitializers &MOJDIs) {
if (ORC_RT_UNLIKELY(!Swift5ProtocolConformanceSections.empty() &&
@@ -196,13 +171,28 @@ Error registerSwift5ProtocolConformances(
for (const auto &ProtoConfSec : Swift5ProtocolConformanceSections)
swift_registerProtocolConformances(
- ProtoConfSec.StartAddress.toPtr<const ProtocolConformanceRecord *>(),
- ProtoConfSec.EndAddress.toPtr<const ProtocolConformanceRecord *>());
+ ProtoConfSec.Start.toPtr<const ProtocolConformanceRecord *>(),
+ ProtoConfSec.End.toPtr<const ProtocolConformanceRecord *>());
+
+ return Error::success();
+}
+
+Error registerSwift5Types(const std::vector<ExecutorAddrRange> &Sections,
+ const MachOJITDylibInitializers &MOJDIs) {
+
+ if (ORC_RT_UNLIKELY(!Sections.empty() && !swift_registerTypeMetadataRecords))
+ return make_error<StringError>(
+ "swift_registerTypeMetadataRecords is not available");
+
+ for (const auto &Section : Sections)
+ swift_registerTypeMetadataRecords(
+ Section.Start.toPtr<const TypeMetadataRecord *>(),
+ Section.End.toPtr<const TypeMetadataRecord *>());
return Error::success();
}
-Error runModInits(const std::vector<ExecutorAddressRange> &ModInitsSections,
+Error runModInits(const std::vector<ExecutorAddrRange> &ModInitsSections,
const MachOJITDylibInitializers &MOJDIs) {
for (const auto &ModInits : ModInitsSections) {
@@ -253,8 +243,8 @@ public:
MachOPlatformRuntimeState(MachOPlatformRuntimeState &&) = delete;
MachOPlatformRuntimeState &operator=(MachOPlatformRuntimeState &&) = delete;
- Error registerObjectSections(MachOPerObjectSectionsToRegister POSR);
- Error deregisterObjectSections(MachOPerObjectSectionsToRegister POSR);
+ Error registerThreadDataSection(span<const char> ThreadDataSec);
+ Error deregisterThreadDataSection(span<const char> ThreadDataSec);
const char *dlerror();
void *dlopen(string_view Name, int Mode);
@@ -273,10 +263,8 @@ private:
PerJITDylibState *getJITDylibStateByName(string_view Path);
PerJITDylibState &getOrCreateJITDylibState(MachOJITDylibInitializers &MOJDIs);
- Error registerThreadDataSection(span<const char> ThreadDataSec);
-
- Expected<ExecutorAddress> lookupSymbolInJITDylib(void *DSOHandle,
- string_view Symbol);
+ Expected<ExecutorAddr> lookupSymbolInJITDylib(void *DSOHandle,
+ string_view Symbol);
Expected<MachOJITDylibInitializerSequence>
getJITDylibInitializersByName(string_view Path);
@@ -286,13 +274,14 @@ private:
static MachOPlatformRuntimeState *MOPS;
using InitSectionHandler =
- Error (*)(const std::vector<ExecutorAddressRange> &Sections,
+ Error (*)(const std::vector<ExecutorAddrRange> &Sections,
const MachOJITDylibInitializers &MOJDIs);
const std::vector<std::pair<const char *, InitSectionHandler>> InitSections =
{{"__DATA,__objc_selrefs", registerObjCSelectors},
{"__DATA,__objc_classlist", registerObjCClasses},
{"__TEXT,__swift5_protos", registerSwift5Protocols},
{"__TEXT,__swift5_proto", registerSwift5ProtocolConformances},
+ {"__TEXT,__swift5_types", registerSwift5Types},
{"__DATA,__mod_init_func", runModInits}};
// FIXME: Move to thread-state.
@@ -323,27 +312,28 @@ void MachOPlatformRuntimeState::destroy() {
delete MOPS;
}
-Error MachOPlatformRuntimeState::registerObjectSections(
- MachOPerObjectSectionsToRegister POSR) {
- if (POSR.EHFrameSection.StartAddress)
- walkEHFrameSection(POSR.EHFrameSection.toSpan<const char>(),
- __register_frame);
-
- if (POSR.ThreadDataSection.StartAddress) {
- if (auto Err = registerThreadDataSection(
- POSR.ThreadDataSection.toSpan<const char>()))
- return Err;
+Error MachOPlatformRuntimeState::registerThreadDataSection(
+ span<const char> ThreadDataSection) {
+ std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
+ auto I = ThreadDataSections.upper_bound(ThreadDataSection.data());
+ if (I != ThreadDataSections.begin()) {
+ auto J = std::prev(I);
+ if (J->first + J->second > ThreadDataSection.data())
+ return make_error<StringError>("Overlapping __thread_data sections");
}
-
+ ThreadDataSections.insert(
+ I, std::make_pair(ThreadDataSection.data(), ThreadDataSection.size()));
return Error::success();
}
-Error MachOPlatformRuntimeState::deregisterObjectSections(
- MachOPerObjectSectionsToRegister POSR) {
- if (POSR.EHFrameSection.StartAddress)
- walkEHFrameSection(POSR.EHFrameSection.toSpan<const char>(),
- __deregister_frame);
-
+Error MachOPlatformRuntimeState::deregisterThreadDataSection(
+ span<const char> ThreadDataSection) {
+ std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
+ auto I = ThreadDataSections.find(ThreadDataSection.data());
+ if (I == ThreadDataSections.end())
+ return make_error<StringError>("Attempt to deregister unknown thread data "
+ "section");
+ ThreadDataSections.erase(I);
return Error::success();
}
@@ -465,28 +455,15 @@ MachOPlatformRuntimeState::getOrCreateJITDylibState(
return JDS;
}
-Error MachOPlatformRuntimeState::registerThreadDataSection(
- span<const char> ThreadDataSection) {
- std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
- auto I = ThreadDataSections.upper_bound(ThreadDataSection.data());
- if (I != ThreadDataSections.begin()) {
- auto J = std::prev(I);
- if (J->first + J->second > ThreadDataSection.data())
- return make_error<StringError>("Overlapping __thread_data sections");
- }
- ThreadDataSections.insert(
- I, std::make_pair(ThreadDataSection.data(), ThreadDataSection.size()));
- return Error::success();
-}
-
-Expected<ExecutorAddress>
+Expected<ExecutorAddr>
MachOPlatformRuntimeState::lookupSymbolInJITDylib(void *DSOHandle,
string_view Sym) {
- Expected<ExecutorAddress> Result((ExecutorAddress()));
- if (auto Err = WrapperFunction<SPSExpected<SPSExecutorAddress>(
- SPSExecutorAddress,
- SPSString)>::call(&__orc_rt_macho_symbol_lookup_tag, Result,
- ExecutorAddress::fromPtr(DSOHandle), Sym))
+ Expected<ExecutorAddr> Result((ExecutorAddr()));
+ if (auto Err = WrapperFunction<SPSExpected<SPSExecutorAddr>(
+ SPSExecutorAddr, SPSString)>::call(&__orc_rt_macho_symbol_lookup_tag,
+ Result,
+ ExecutorAddr::fromPtr(DSOHandle),
+ Sym))
return std::move(Err);
return Result;
}
@@ -589,6 +566,13 @@ void destroyMachOTLVMgr(void *MachOTLVMgr) {
delete static_cast<MachOPlatformRuntimeTLVManager *>(MachOTLVMgr);
}
+Error runWrapperFunctionCalls(std::vector<WrapperFunctionCall> WFCs) {
+ for (auto &WFC : WFCs)
+ if (auto Err = WFC.runWithSPSRet())
+ return Err;
+ return Error::success();
+}
+
} // end anonymous namespace
//------------------------------------------------------------------------------
@@ -607,30 +591,41 @@ __orc_rt_macho_platform_shutdown(char *ArgData, size_t ArgSize) {
return WrapperFunctionResult().release();
}
-/// Wrapper function for registering metadata on a per-object basis.
ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
-__orc_rt_macho_register_object_sections(char *ArgData, size_t ArgSize) {
- return WrapperFunction<SPSError(SPSMachOPerObjectSectionsToRegister)>::handle(
- ArgData, ArgSize,
- [](MachOPerObjectSectionsToRegister &POSR) {
- return MachOPlatformRuntimeState::get().registerObjectSections(
- std::move(POSR));
+__orc_rt_macho_register_thread_data_section(char *ArgData, size_t ArgSize) {
+ // NOTE: Does not use SPS to deserialize arg buffer, instead the arg buffer
+ // is taken to be the range of the thread data section.
+ return WrapperFunction<SPSError()>::handle(
+ nullptr, 0,
+ [&]() {
+ return MachOPlatformRuntimeState::get()
+ .registerThreadDataSection(
+ span<const char>(ArgData, ArgSize));
})
.release();
}
-/// Wrapper for releasing per-object metadat.
ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
-__orc_rt_macho_deregister_object_sections(char *ArgData, size_t ArgSize) {
- return WrapperFunction<SPSError(SPSMachOPerObjectSectionsToRegister)>::handle(
- ArgData, ArgSize,
- [](MachOPerObjectSectionsToRegister &POSR) {
- return MachOPlatformRuntimeState::get().deregisterObjectSections(
- std::move(POSR));
+__orc_rt_macho_deregister_thread_data_section(char *ArgData, size_t ArgSize) {
+ // NOTE: Does not use SPS to deserialize arg buffer, instead the arg buffer
+ // is taken to be the range of the thread data section.
+ return WrapperFunction<SPSError()>::handle(
+ nullptr, 0,
+ [&]() {
+ return MachOPlatformRuntimeState::get()
+ .deregisterThreadDataSection(
+ span<const char>(ArgData, ArgSize));
})
.release();
}
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_macho_run_wrapper_function_calls(char *ArgData, size_t ArgSize) {
+ return WrapperFunction<SPSError(SPSSequence<SPSWrapperFunctionCall>)>::handle(
+ ArgData, ArgSize, runWrapperFunctionCalls)
+ .release();
+}
+
//------------------------------------------------------------------------------
// TLV support
//------------------------------------------------------------------------------
diff --git a/compiler-rt/lib/orc/macho_platform.h b/compiler-rt/lib/orc/macho_platform.h
index 6c05e844b0cd..5b9820a0d1f9 100644
--- a/compiler-rt/lib/orc/macho_platform.h
+++ b/compiler-rt/lib/orc/macho_platform.h
@@ -31,23 +31,17 @@ ORC_RT_INTERFACE void *__orc_rt_macho_jit_dlsym(void *dso_handle,
namespace __orc_rt {
namespace macho {
-struct MachOPerObjectSectionsToRegister {
- ExecutorAddressRange EHFrameSection;
- ExecutorAddressRange ThreadDataSection;
-};
-
struct MachOJITDylibInitializers {
- using SectionList = std::vector<ExecutorAddressRange>;
+ using SectionList = std::vector<ExecutorAddrRange>;
MachOJITDylibInitializers() = default;
- MachOJITDylibInitializers(std::string Name,
- ExecutorAddress MachOHeaderAddress)
+ MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress)
: Name(std::move(Name)),
MachOHeaderAddress(std::move(MachOHeaderAddress)) {}
std::string Name;
- ExecutorAddress MachOHeaderAddress;
- ExecutorAddress ObjCImageInfoAddress;
+ ExecutorAddr MachOHeaderAddress;
+ ExecutorAddr ObjCImageInfoAddress;
std::unordered_map<std::string, SectionList> InitSections;
};
@@ -68,38 +62,12 @@ enum dlopen_mode : int {
} // end namespace macho
-using SPSMachOPerObjectSectionsToRegister =
- SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>;
-
-template <>
-class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
- macho::MachOPerObjectSectionsToRegister> {
-
-public:
- static size_t size(const macho::MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::size(
- MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-
- static bool serialize(SPSOutputBuffer &OB,
- const macho::MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize(
- OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-
- static bool deserialize(SPSInputBuffer &IB,
- macho::MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize(
- IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-};
-
-using SPSNamedExecutorAddressRangeSequenceMap =
- SPSSequence<SPSTuple<SPSString, SPSExecutorAddressRangeSequence>>;
+using SPSNamedExecutorAddrRangeSequenceMap =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
using SPSMachOJITDylibInitializers =
- SPSTuple<SPSString, SPSExecutorAddress, SPSExecutorAddress,
- SPSNamedExecutorAddressRangeSequenceMap>;
+ SPSTuple<SPSString, SPSExecutorAddr, SPSExecutorAddr,
+ SPSNamedExecutorAddrRangeSequenceMap>;
using SPSMachOJITDylibInitializerSequence =
SPSSequence<SPSMachOJITDylibInitializers>;
diff --git a/compiler-rt/lib/orc/macho_tlv.arm64.S b/compiler-rt/lib/orc/macho_tlv.arm64.S
new file mode 100644
index 000000000000..f6eb9fc4da39
--- /dev/null
+++ b/compiler-rt/lib/orc/macho_tlv.arm64.S
@@ -0,0 +1,92 @@
+//===-- macho_tlv.arm64.s ---------------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+// The content of this file is arm64-only
+#if defined(__arm64__) || defined(__aarch64__)
+
+#define REGISTER_SAVE_SPACE_SIZE 32 * 24
+
+ .text
+
+ // returns address of TLV in x0, all other registers preserved
+ .globl ___orc_rt_macho_tlv_get_addr
+___orc_rt_macho_tlv_get_addr:
+ sub sp, sp, #REGISTER_SAVE_SPACE_SIZE
+ stp x29, x30, [sp, #16 * 1]
+ stp x27, x28, [sp, #16 * 2]
+ stp x25, x26, [sp, #16 * 3]
+ stp x23, x24, [sp, #16 * 4]
+ stp x21, x22, [sp, #16 * 5]
+ stp x19, x20, [sp, #16 * 6]
+ stp x17, x18, [sp, #16 * 7]
+ stp x15, x16, [sp, #16 * 8]
+ stp x13, x14, [sp, #16 * 9]
+ stp x11, x12, [sp, #16 * 10]
+ stp x9, x10, [sp, #16 * 11]
+ stp x7, x8, [sp, #16 * 12]
+ stp x5, x6, [sp, #16 * 13]
+ stp x3, x4, [sp, #16 * 14]
+ stp x1, x2, [sp, #16 * 15]
+ stp q30, q31, [sp, #32 * 8]
+ stp q28, q29, [sp, #32 * 9]
+ stp q26, q27, [sp, #32 * 10]
+ stp q24, q25, [sp, #32 * 11]
+ stp q22, q23, [sp, #32 * 12]
+ stp q20, q21, [sp, #32 * 13]
+ stp q18, q19, [sp, #32 * 14]
+ stp q16, q17, [sp, #32 * 15]
+ stp q14, q15, [sp, #32 * 16]
+ stp q12, q13, [sp, #32 * 17]
+ stp q10, q11, [sp, #32 * 18]
+ stp q8, q9, [sp, #32 * 19]
+ stp q6, q7, [sp, #32 * 20]
+ stp q4, q5, [sp, #32 * 21]
+ stp q2, q3, [sp, #32 * 22]
+ stp q0, q1, [sp, #32 * 23]
+
+ bl ___orc_rt_macho_tlv_get_addr_impl
+
+ ldp q0, q1, [sp, #32 * 23]
+ ldp q2, q3, [sp, #32 * 22]
+ ldp q4, q5, [sp, #32 * 21]
+ ldp q6, q7, [sp, #32 * 20]
+ ldp q8, q9, [sp, #32 * 19]
+ ldp q10, q11, [sp, #32 * 18]
+ ldp q12, q13, [sp, #32 * 17]
+ ldp q14, q15, [sp, #32 * 16]
+ ldp q16, q17, [sp, #32 * 15]
+ ldp q18, q19, [sp, #32 * 14]
+ ldp q20, q21, [sp, #32 * 13]
+ ldp q22, q23, [sp, #32 * 12]
+ ldp q24, q25, [sp, #32 * 11]
+ ldp q26, q27, [sp, #32 * 10]
+ ldp q28, q29, [sp, #32 * 9]
+ ldp q30, q31, [sp, #32 * 8]
+ ldp x1, x2, [sp, #16 * 15]
+ ldp x3, x4, [sp, #16 * 14]
+ ldp x5, x6, [sp, #16 * 13]
+ ldp x7, x8, [sp, #16 * 12]
+ ldp x9, x10, [sp, #16 * 11]
+ ldp x11, x12, [sp, #16 * 10]
+ ldp x13, x14, [sp, #16 * 9]
+ ldp x15, x16, [sp, #16 * 8]
+ ldp x17, x18, [sp, #16 * 7]
+ ldp x19, x20, [sp, #16 * 6]
+ ldp x21, x22, [sp, #16 * 5]
+ ldp x23, x24, [sp, #16 * 4]
+ ldp x25, x26, [sp, #16 * 3]
+ ldp x27, x28, [sp, #16 * 2]
+ ldp x29, x30, [sp, #16 * 1]
+ add sp, sp, #REGISTER_SAVE_SPACE_SIZE
+ ret
+
+#endif // defined(__arm64__) || defined(__aarch64__)
diff --git a/compiler-rt/lib/orc/macho_tlv.x86-64.S b/compiler-rt/lib/orc/macho_tlv.x86-64.S
index 0affe403eec2..e3daf23e3029 100644
--- a/compiler-rt/lib/orc/macho_tlv.x86-64.S
+++ b/compiler-rt/lib/orc/macho_tlv.x86-64.S
@@ -10,6 +10,9 @@
//
//===----------------------------------------------------------------------===//
+// The content of this file is x86_64-only
+#if defined(__x86_64__)
+
#define REGISTER_SAVE_SPACE_SIZE 512
.text
@@ -66,3 +69,5 @@ ___orc_rt_macho_tlv_get_addr:
addq $REGISTER_SAVE_SPACE_SIZE, %rsp
popq %rbp
ret
+
+#endif // defined(__x86_64__)
diff --git a/compiler-rt/lib/orc/simple_packed_serialization.h b/compiler-rt/lib/orc/simple_packed_serialization.h
index b561a19d8f04..ec43130a2ef5 100644
--- a/compiler-rt/lib/orc/simple_packed_serialization.h
+++ b/compiler-rt/lib/orc/simple_packed_serialization.h
@@ -176,7 +176,7 @@ public:
class SPSEmpty {};
/// Represents an address in the executor.
-class SPSExecutorAddress {};
+class SPSExecutorAddr {};
/// SPS tag type for tuples.
///
@@ -354,6 +354,27 @@ public:
}
};
+/// Trivial serialization / deserialization for span<char>
+template <> class SPSSerializationTraits<SPSSequence<char>, span<const char>> {
+public:
+ static size_t size(const span<const char> &S) {
+ return SPSArgList<uint64_t>::size(static_cast<uint64_t>(S.size())) +
+ S.size();
+ }
+ static bool serialize(SPSOutputBuffer &OB, const span<const char> &S) {
+ if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(S.size())))
+ return false;
+ return OB.write(S.data(), S.size());
+ }
+ static bool deserialize(SPSInputBuffer &IB, span<const char> &S) {
+ uint64_t Size;
+ if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+ return false;
+ S = span<const char>(IB.data(), Size);
+ return IB.skip(Size);
+ }
+};
+
/// SPSTuple serialization for std::pair.
template <typename SPSTagT1, typename SPSTagT2, typename T1, typename T2>
class SPSSerializationTraits<SPSTuple<SPSTagT1, SPSTagT2>, std::pair<T1, T2>> {
@@ -396,10 +417,12 @@ public:
uint64_t Size;
if (!SPSArgList<uint64_t>::deserialize(IB, Size))
return false;
+ if (Size > std::numeric_limits<size_t>::max())
+ return false;
Data = IB.data();
if (!IB.skip(Size))
return false;
- S = {Data, Size};
+ S = {Data, static_cast<size_t>(Size)};
return true;
}
};
diff --git a/compiler-rt/lib/orc/wrapper_function_utils.h b/compiler-rt/lib/orc/wrapper_function_utils.h
index 49faa03e5eb8..23385e1bd794 100644
--- a/compiler-rt/lib/orc/wrapper_function_utils.h
+++ b/compiler-rt/lib/orc/wrapper_function_utils.h
@@ -16,6 +16,7 @@
#include "c_api.h"
#include "common.h"
#include "error.h"
+#include "executor_address.h"
#include "simple_packed_serialization.h"
#include <type_traits>
@@ -61,7 +62,7 @@ public:
}
/// Get a pointer to the data contained in this instance.
- const char *data() const { return __orc_rt_CWrapperFunctionResultData(&R); }
+ char *data() { return __orc_rt_CWrapperFunctionResultData(&R); }
/// Returns the size of the data contained in this instance.
size_t size() const { return __orc_rt_CWrapperFunctionResultSize(&R); }
@@ -72,10 +73,10 @@ public:
/// Create a WrapperFunctionResult with the given size and return a pointer
/// to the underlying memory.
- static char *allocate(WrapperFunctionResult &R, size_t Size) {
- __orc_rt_DisposeCWrapperFunctionResult(&R.R);
- __orc_rt_CWrapperFunctionResultInit(&R.R);
- return __orc_rt_CWrapperFunctionResultAllocate(&R.R, Size);
+ static WrapperFunctionResult allocate(size_t Size) {
+ WrapperFunctionResult R;
+ R.R = __orc_rt_CWrapperFunctionResultAllocate(Size);
+ return R;
}
/// Copy from the given char range.
@@ -103,6 +104,16 @@ public:
return createOutOfBandError(Msg.c_str());
}
+ template <typename SPSArgListT, typename... ArgTs>
+ static WrapperFunctionResult fromSPSArgs(const ArgTs &...Args) {
+ auto Result = allocate(SPSArgListT::size(Args...));
+ SPSOutputBuffer OB(Result.data(), Result.size());
+ if (!SPSArgListT::serialize(OB, Args...))
+ return createOutOfBandError(
+ "Error serializing arguments to blob in call");
+ return Result;
+ }
+
/// If this value is an out-of-band error then this returns the error message,
/// otherwise returns nullptr.
const char *getOutOfBandError() const {
@@ -115,19 +126,6 @@ private:
namespace detail {
-template <typename SPSArgListT, typename... ArgTs>
-Expected<WrapperFunctionResult>
-serializeViaSPSToWrapperFunctionResult(const ArgTs &...Args) {
- WrapperFunctionResult Result;
- char *DataPtr =
- WrapperFunctionResult::allocate(Result, SPSArgListT::size(Args...));
- SPSOutputBuffer OB(DataPtr, Result.size());
- if (!SPSArgListT::serialize(OB, Args...))
- return make_error<StringError>(
- "Error serializing arguments to blob in call");
- return std::move(Result);
-}
-
template <typename RetT> class WrapperFunctionHandlerCaller {
public:
template <typename HandlerT, typename ArgTupleT, std::size_t... I>
@@ -173,12 +171,8 @@ public:
auto HandlerResult = WrapperFunctionHandlerCaller<RetT>::call(
std::forward<HandlerT>(H), Args, ArgIndices{});
- if (auto Result = ResultSerializer<decltype(HandlerResult)>::serialize(
- std::move(HandlerResult)))
- return std::move(*Result);
- else
- return WrapperFunctionResult::createOutOfBandError(
- toString(Result.takeError()));
+ return ResultSerializer<decltype(HandlerResult)>::serialize(
+ std::move(HandlerResult));
}
private:
@@ -188,13 +182,12 @@ private:
SPSInputBuffer IB(ArgData, ArgSize);
return SPSArgList<SPSTagTs...>::deserialize(IB, std::get<I>(Args)...);
}
-
};
-// Map function references to function types.
+// Map function pointers to function types.
template <typename RetT, typename... ArgTs,
template <typename> class ResultSerializer, typename... SPSTagTs>
-class WrapperFunctionHandlerHelper<RetT (&)(ArgTs...), ResultSerializer,
+class WrapperFunctionHandlerHelper<RetT (*)(ArgTs...), ResultSerializer,
SPSTagTs...>
: public WrapperFunctionHandlerHelper<RetT(ArgTs...), ResultSerializer,
SPSTagTs...> {};
@@ -217,16 +210,15 @@ class WrapperFunctionHandlerHelper<RetT (ClassT::*)(ArgTs...) const,
template <typename SPSRetTagT, typename RetT> class ResultSerializer {
public:
- static Expected<WrapperFunctionResult> serialize(RetT Result) {
- return serializeViaSPSToWrapperFunctionResult<SPSArgList<SPSRetTagT>>(
- Result);
+ static WrapperFunctionResult serialize(RetT Result) {
+ return WrapperFunctionResult::fromSPSArgs<SPSArgList<SPSRetTagT>>(Result);
}
};
template <typename SPSRetTagT> class ResultSerializer<SPSRetTagT, Error> {
public:
- static Expected<WrapperFunctionResult> serialize(Error Err) {
- return serializeViaSPSToWrapperFunctionResult<SPSArgList<SPSRetTagT>>(
+ static WrapperFunctionResult serialize(Error Err) {
+ return WrapperFunctionResult::fromSPSArgs<SPSArgList<SPSRetTagT>>(
toSPSSerializable(std::move(Err)));
}
};
@@ -234,8 +226,8 @@ public:
template <typename SPSRetTagT, typename T>
class ResultSerializer<SPSRetTagT, Expected<T>> {
public:
- static Expected<WrapperFunctionResult> serialize(Expected<T> E) {
- return serializeViaSPSToWrapperFunctionResult<SPSArgList<SPSRetTagT>>(
+ static WrapperFunctionResult serialize(Expected<T> E) {
+ return WrapperFunctionResult::fromSPSArgs<SPSArgList<SPSRetTagT>>(
toSPSSerializable(std::move(E)));
}
};
@@ -310,14 +302,12 @@ public:
return make_error<StringError>("__orc_rt_jit_dispatch not set");
auto ArgBuffer =
- detail::serializeViaSPSToWrapperFunctionResult<SPSArgList<SPSTagTs...>>(
- Args...);
- if (!ArgBuffer)
- return ArgBuffer.takeError();
-
- WrapperFunctionResult ResultBuffer =
- __orc_rt_jit_dispatch(&__orc_rt_jit_dispatch_ctx, FnTag,
- ArgBuffer->data(), ArgBuffer->size());
+ WrapperFunctionResult::fromSPSArgs<SPSArgList<SPSTagTs...>>(Args...);
+ if (const char *ErrMsg = ArgBuffer.getOutOfBandError())
+ return make_error<StringError>(ErrMsg);
+
+ WrapperFunctionResult ResultBuffer = __orc_rt_jit_dispatch(
+ &__orc_rt_jit_dispatch_ctx, FnTag, ArgBuffer.data(), ArgBuffer.size());
if (auto ErrMsg = ResultBuffer.getOutOfBandError())
return make_error<StringError>(ErrMsg);
@@ -329,8 +319,8 @@ public:
static WrapperFunctionResult handle(const char *ArgData, size_t ArgSize,
HandlerT &&Handler) {
using WFHH =
- detail::WrapperFunctionHandlerHelper<HandlerT, ResultSerializer,
- SPSTagTs...>;
+ detail::WrapperFunctionHandlerHelper<std::remove_reference_t<HandlerT>,
+ ResultSerializer, SPSTagTs...>;
return WFHH::apply(std::forward<HandlerT>(Handler), ArgData, ArgSize);
}
@@ -362,6 +352,106 @@ public:
using WrapperFunction<SPSEmpty(SPSTagTs...)>::handle;
};
+/// A function object that takes an ExecutorAddr as its first argument,
+/// casts that address to a ClassT*, then calls the given method on that
+/// pointer passing in the remaining function arguments. This utility
+/// removes some of the boilerplate from writing wrappers for method calls.
+///
+/// @code{.cpp}
+/// class MyClass {
+/// public:
+/// void myMethod(uint32_t, bool) { ... }
+/// };
+///
+/// // SPS Method signature -- note MyClass object address as first argument.
+/// using SPSMyMethodWrapperSignature =
+/// SPSTuple<SPSExecutorAddr, uint32_t, bool>;
+///
+/// WrapperFunctionResult
+/// myMethodCallWrapper(const char *ArgData, size_t ArgSize) {
+/// return WrapperFunction<SPSMyMethodWrapperSignature>::handle(
+/// ArgData, ArgSize, makeMethodWrapperHandler(&MyClass::myMethod));
+/// }
+/// @endcode
+///
+template <typename RetT, typename ClassT, typename... ArgTs>
+class MethodWrapperHandler {
+public:
+ using MethodT = RetT (ClassT::*)(ArgTs...);
+ MethodWrapperHandler(MethodT M) : M(M) {}
+ RetT operator()(ExecutorAddr ObjAddr, ArgTs &...Args) {
+ return (ObjAddr.toPtr<ClassT *>()->*M)(std::forward<ArgTs>(Args)...);
+ }
+
+private:
+ MethodT M;
+};
+
+/// Create a MethodWrapperHandler object from the given method pointer.
+template <typename RetT, typename ClassT, typename... ArgTs>
+MethodWrapperHandler<RetT, ClassT, ArgTs...>
+makeMethodWrapperHandler(RetT (ClassT::*Method)(ArgTs...)) {
+ return MethodWrapperHandler<RetT, ClassT, ArgTs...>(Method);
+}
+
+/// Represents a call to a wrapper function.
+struct WrapperFunctionCall {
+ ExecutorAddr Func;
+ ExecutorAddrRange ArgData;
+
+ WrapperFunctionCall() = default;
+ WrapperFunctionCall(ExecutorAddr Func, ExecutorAddrRange ArgData)
+ : Func(Func), ArgData(ArgData) {}
+
+ /// Run and return result as WrapperFunctionResult.
+ WrapperFunctionResult run() {
+ WrapperFunctionResult WFR(
+ Func.toPtr<__orc_rt_CWrapperFunctionResult (*)(const char *, size_t)>()(
+ ArgData.Start.toPtr<const char *>(),
+ static_cast<size_t>(ArgData.size().getValue())));
+ return WFR;
+ }
+
+ /// Run call and deserialize result using SPS.
+ template <typename SPSRetT, typename RetT> Error runWithSPSRet(RetT &RetVal) {
+ auto WFR = run();
+ if (const char *ErrMsg = WFR.getOutOfBandError())
+ return make_error<StringError>(ErrMsg);
+ SPSInputBuffer IB(WFR.data(), WFR.size());
+ if (!SPSSerializationTraits<SPSRetT, RetT>::deserialize(IB, RetVal))
+ return make_error<StringError>("Could not deserialize result from "
+ "serialized wrapper function call");
+ return Error::success();
+ }
+
+ /// Overload for SPS functions returning void.
+ Error runWithSPSRet() {
+ SPSEmpty E;
+ return runWithSPSRet<SPSEmpty>(E);
+ }
+};
+
+class SPSWrapperFunctionCall {};
+
+template <>
+class SPSSerializationTraits<SPSWrapperFunctionCall, WrapperFunctionCall> {
+public:
+ static size_t size(const WrapperFunctionCall &WFC) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddrRange>::size(WFC.Func,
+ WFC.ArgData);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const WrapperFunctionCall &WFC) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddrRange>::serialize(
+ OB, WFC.Func, WFC.ArgData);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, WrapperFunctionCall &WFC) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddrRange>::deserialize(
+ IB, WFC.Func, WFC.ArgData);
+ }
+};
+
} // end namespace __orc_rt
#endif // ORC_RT_WRAPPER_FUNCTION_UTILS_H
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 237acb33ffa1..5b88d7178012 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -150,7 +150,7 @@ int __llvm_profile_write_file(void);
int __llvm_orderfile_write_file(void);
/*!
* \brief this is a wrapper interface to \c __llvm_profile_write_file.
- * After this interface is invoked, a arleady dumped flag will be set
+ * After this interface is invoked, an already dumped flag will be set
* so that profile won't be dumped again during program exit.
* Invocation of interface __llvm_profile_reset_counters will clear
* the flag. This interface is designed to be used to collect profile
@@ -194,7 +194,8 @@ int __llvm_orderfile_dump(void);
void __llvm_profile_set_filename(const char *Name);
/*!
- * \brief Set the FILE object for writing instrumentation data.
+ * \brief Set the FILE object for writing instrumentation data. Return 0 if set
+ * successfully or return 1 if failed.
*
* Sets the FILE object to be used for subsequent calls to
* \a __llvm_profile_write_file(). The profile file name set by environment
@@ -213,13 +214,12 @@ void __llvm_profile_set_filename(const char *Name);
* instrumented image/DSO). This API only modifies the file object within the
* copy of the runtime available to the calling image.
*
- * Warning: This is a no-op if continuous mode (\ref
- * __llvm_profile_is_continuous_mode_enabled) is on. The reason for this is
- * that in continuous mode, profile counters are mmap()'d to the profile at
- * program initialization time. Support for transferring the mmap'd profile
- * counts to a new file has not been implemented.
+ * Warning: This is a no-op if EnableMerge is 0 in continuous mode (\ref
+ * __llvm_profile_is_continuous_mode_enabled), because disable merging requires
+ * copying the old profile file to new profile file and this function is usually
+ * used when the proess doesn't have permission to open file.
*/
-void __llvm_profile_set_file_object(FILE *File, int EnableMerge);
+int __llvm_profile_set_file_object(FILE *File, int EnableMerge);
/*! \brief Register to write instrumentation data to file at exit. */
int __llvm_profile_register_write_file_atexit(void);
@@ -301,14 +301,12 @@ void __llvm_profile_set_dumped();
COMPILER_RT_VISIBILITY extern int INSTR_PROF_PROFILE_RUNTIME_VAR;
/*!
- * This variable is defined in InstrProfiling.c. Its main purpose is to
- * encode the raw profile version value and other format related information
- * such as whether the profile is from IR based instrumentation. The variable
- * is defined as weak so that compiler can emit an overriding definition
- * depending on user option. Since we don't support mixing FE and IR based
- * data in the same raw profile data file (in other words, shared libs and
- * main program are expected to be instrumented in the same way), there is
- * no need for this variable to be hidden.
+ * This variable is defined in InstrProfilingVersionVar.c as a hidden symbol
+ * (except on Apple platforms where this symbol is checked by TAPI). Its main
+ * purpose is to encode the raw profile version value and other format related
+ * information such as whether the profile is from IR based instrumentation. The
+ * variable is defined as weak so that compiler can emit an overriding
+ * definition depending on user option.
*/
extern uint64_t INSTR_PROF_RAW_VERSION_VAR; /* __llvm_profile_raw_version */
diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c
index 21fa7ba1ddd6..68b4f5cd6f52 100644
--- a/compiler-rt/lib/profile/InstrProfilingBuffer.c
+++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c
@@ -116,7 +116,7 @@ uint64_t __llvm_profile_get_size_for_buffer_internal(
DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
&PaddingBytesAfterCounters, &PaddingBytesAfterNames);
- return sizeof(__llvm_profile_header) +
+ return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
(DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters +
(CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters +
NamesSize + PaddingBytesAfterNames;
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index 518447e3e422..efd9f06ac6ee 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -92,6 +92,146 @@ static lprofFilename lprofCurFilename = {0, 0, 0, {0}, NULL,
{0}, 0, 0, 0, PNS_unknown};
static int ProfileMergeRequested = 0;
+static int getProfileFileSizeForMerging(FILE *ProfileFile,
+ uint64_t *ProfileFileSize);
+
+#if defined(__APPLE__)
+static const int ContinuousModeSupported = 1;
+static const int UseBiasVar = 0;
+static const char *FileOpenMode = "a+b";
+static void *BiasAddr = NULL;
+static void *BiasDefaultAddr = NULL;
+static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) {
+ /* Get the sizes of various profile data sections. Taken from
+ * __llvm_profile_get_size_for_buffer(). */
+ const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
+ const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
+ const uint64_t *CountersBegin = __llvm_profile_begin_counters();
+ const uint64_t *CountersEnd = __llvm_profile_end_counters();
+ const char *NamesBegin = __llvm_profile_begin_names();
+ const char *NamesEnd = __llvm_profile_end_names();
+ const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
+ uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
+ uint64_t CountersSize = CountersEnd - CountersBegin;
+
+ /* Check that the counter and data sections in this image are
+ * page-aligned. */
+ unsigned PageSize = getpagesize();
+ if ((intptr_t)CountersBegin % PageSize != 0) {
+ PROF_ERR("Counters section not page-aligned (start = %p, pagesz = %u).\n",
+ CountersBegin, PageSize);
+ return 1;
+ }
+ if ((intptr_t)DataBegin % PageSize != 0) {
+ PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n",
+ DataBegin, PageSize);
+ return 1;
+ }
+ int Fileno = fileno(File);
+ /* Determine how much padding is needed before/after the counters and
+ * after the names. */
+ uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
+ PaddingBytesAfterNames;
+ __llvm_profile_get_padding_sizes_for_counters(
+ DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
+ &PaddingBytesAfterCounters, &PaddingBytesAfterNames);
+
+ uint64_t PageAlignedCountersLength =
+ (CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters;
+ uint64_t FileOffsetToCounters =
+ CurrentFileOffset + sizeof(__llvm_profile_header) +
+ (DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters;
+ uint64_t *CounterMmap = (uint64_t *)mmap(
+ (void *)CountersBegin, PageAlignedCountersLength, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToCounters);
+ if (CounterMmap != CountersBegin) {
+ PROF_ERR(
+ "Continuous counter sync mode is enabled, but mmap() failed (%s).\n"
+ " - CountersBegin: %p\n"
+ " - PageAlignedCountersLength: %" PRIu64 "\n"
+ " - Fileno: %d\n"
+ " - FileOffsetToCounters: %" PRIu64 "\n",
+ strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno,
+ FileOffsetToCounters);
+ return 1;
+ }
+ return 0;
+}
+#elif defined(__ELF__) || defined(_WIN32)
+
+#define INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR \
+ INSTR_PROF_CONCAT(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR, _default)
+intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0;
+
+/* This variable is a weak external reference which could be used to detect
+ * whether or not the compiler defined this symbol. */
+#if defined(_MSC_VER)
+COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
+#if defined(_M_IX86) || defined(__i386__)
+#define WIN_SYM_PREFIX "_"
+#else
+#define WIN_SYM_PREFIX
+#endif
+#pragma comment( \
+ linker, "/alternatename:" WIN_SYM_PREFIX INSTR_PROF_QUOTE( \
+ INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" WIN_SYM_PREFIX \
+ INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
+#else
+COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR
+ __attribute__((weak, alias(INSTR_PROF_QUOTE(
+ INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))));
+#endif
+static const int ContinuousModeSupported = 1;
+static const int UseBiasVar = 1;
+/* TODO: If there are two DSOs, the second DSO initilization will truncate the
+ * first profile file. */
+static const char *FileOpenMode = "w+b";
+/* This symbol is defined by the compiler when runtime counter relocation is
+ * used and runtime provides a weak alias so we can check if it's defined. */
+static void *BiasAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
+static void *BiasDefaultAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR;
+static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) {
+ /* Get the sizes of various profile data sections. Taken from
+ * __llvm_profile_get_size_for_buffer(). */
+ const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
+ const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
+ const uint64_t *CountersBegin = __llvm_profile_begin_counters();
+ const uint64_t *CountersEnd = __llvm_profile_end_counters();
+ uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
+ /* Get the file size. */
+ uint64_t FileSize = 0;
+ if (getProfileFileSizeForMerging(File, &FileSize))
+ return 1;
+
+ /* Map the profile. */
+ char *Profile = (char *)mmap(NULL, FileSize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(File), 0);
+ if (Profile == MAP_FAILED) {
+ PROF_ERR("Unable to mmap profile: %s\n", strerror(errno));
+ return 1;
+ }
+ const uint64_t CountersOffsetInBiasMode =
+ sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
+ (DataSize * sizeof(__llvm_profile_data));
+ /* Update the profile fields based on the current mapping. */
+ INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
+ (intptr_t)Profile - (uintptr_t)CountersBegin + CountersOffsetInBiasMode;
+
+ /* Return the memory allocated for counters to OS. */
+ lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd);
+ return 0;
+}
+#else
+static const int ContinuousModeSupported = 0;
+static const int UseBiasVar = 0;
+static const char *FileOpenMode = "a+b";
+static void *BiasAddr = NULL;
+static void *BiasDefaultAddr = NULL;
+static int mmapForContinuousMode(uint64_t CurrentFileOffset, FILE *File) {
+ return 0;
+}
+#endif
+
static int isProfileMergeRequested() { return ProfileMergeRequested; }
static void setProfileMergeRequested(int EnableMerge) {
ProfileMergeRequested = EnableMerge;
@@ -101,18 +241,6 @@ static FILE *ProfileFile = NULL;
static FILE *getProfileFile() { return ProfileFile; }
static void setProfileFile(FILE *File) { ProfileFile = File; }
-COMPILER_RT_VISIBILITY void __llvm_profile_set_file_object(FILE *File,
- int EnableMerge) {
- if (__llvm_profile_is_continuous_mode_enabled()) {
- PROF_WARN("__llvm_profile_set_file_object(fd=%d) not supported, because "
- "continuous sync mode (%%c) is enabled",
- fileno(File));
- return;
- }
- setProfileFile(File);
- setProfileMergeRequested(EnableMerge);
-}
-
static int getCurFilenameLength();
static const char *getCurFilename(char *FilenameBuf, int ForceUseBuf);
static unsigned doMerging() {
@@ -426,13 +554,6 @@ static void truncateCurrentFile(void) {
fclose(File);
}
-// TODO: Move these functions into InstrProfilingPlatform* files.
-#if defined(__APPLE__)
-static void assertIsZero(int *i) {
- if (*i)
- PROF_WARN("Expected flag to be 0, but got: %d\n", *i);
-}
-
/* Write a partial profile to \p Filename, which is required to be backed by
* the open file object \p File. */
static int writeProfileWithFileObject(const char *Filename, FILE *File) {
@@ -444,215 +565,22 @@ static int writeProfileWithFileObject(const char *Filename, FILE *File) {
return rc;
}
-/* Unlock the profile \p File and clear the unlock flag. */
-static void unlockProfile(int *ProfileRequiresUnlock, FILE *File) {
- if (!*ProfileRequiresUnlock) {
- PROF_WARN("%s", "Expected to require profile unlock\n");
- }
-
- lprofUnlockFileHandle(File);
- *ProfileRequiresUnlock = 0;
-}
-
static void initializeProfileForContinuousMode(void) {
if (!__llvm_profile_is_continuous_mode_enabled())
return;
-
- /* Get the sizes of various profile data sections. Taken from
- * __llvm_profile_get_size_for_buffer(). */
- const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
- const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
- const uint64_t *CountersBegin = __llvm_profile_begin_counters();
- const uint64_t *CountersEnd = __llvm_profile_end_counters();
- const char *NamesBegin = __llvm_profile_begin_names();
- const char *NamesEnd = __llvm_profile_end_names();
- const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
- uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
- uint64_t CountersSize = CountersEnd - CountersBegin;
-
- /* Check that the counter and data sections in this image are page-aligned. */
- unsigned PageSize = getpagesize();
- if ((intptr_t)CountersBegin % PageSize != 0) {
- PROF_ERR("Counters section not page-aligned (start = %p, pagesz = %u).\n",
- CountersBegin, PageSize);
- return;
- }
- if ((intptr_t)DataBegin % PageSize != 0) {
- PROF_ERR("Data section not page-aligned (start = %p, pagesz = %u).\n",
- DataBegin, PageSize);
+ if (!ContinuousModeSupported) {
+ PROF_ERR("%s\n", "continuous mode is unsupported on this platform");
return;
}
-
- int Length = getCurFilenameLength();
- char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
- const char *Filename = getCurFilename(FilenameBuf, 0);
- if (!Filename)
- return;
-
- FILE *File = NULL;
- off_t CurrentFileOffset = 0;
- off_t OffsetModPage = 0;
-
- /* Whether an exclusive lock on the profile must be dropped after init.
- * Use a cleanup to warn if the unlock does not occur. */
- COMPILER_RT_CLEANUP(assertIsZero) int ProfileRequiresUnlock = 0;
-
- if (!doMerging()) {
- /* We are not merging profiles, so open the raw profile in append mode. */
- File = fopen(Filename, "a+b");
- if (!File)
- return;
-
- /* Check that the offset within the file is page-aligned. */
- CurrentFileOffset = ftello(File);
- OffsetModPage = CurrentFileOffset % PageSize;
- if (OffsetModPage != 0) {
- PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
- "page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
- (uint64_t)CurrentFileOffset, PageSize);
- return;
- }
-
- /* Grow the profile so that mmap() can succeed. Leak the file handle, as
- * the file should stay open. */
- if (writeProfileWithFileObject(Filename, File) != 0)
- return;
- } else {
- /* We are merging profiles. Map the counter section as shared memory into
- * the profile, i.e. into each participating process. An increment in one
- * process should be visible to every other process with the same counter
- * section mapped. */
- File = lprofOpenFileEx(Filename);
- if (!File)
- return;
-
- ProfileRequiresUnlock = 1;
-
- uint64_t ProfileFileSize;
- if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1)
- return unlockProfile(&ProfileRequiresUnlock, File);
-
- if (ProfileFileSize == 0) {
- /* Grow the profile so that mmap() can succeed. Leak the file handle, as
- * the file should stay open. */
- if (writeProfileWithFileObject(Filename, File) != 0)
- return unlockProfile(&ProfileRequiresUnlock, File);
- } else {
- /* The merged profile has a non-zero length. Check that it is compatible
- * with the data in this process. */
- char *ProfileBuffer;
- if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1 ||
- munmap(ProfileBuffer, ProfileFileSize) == -1)
- return unlockProfile(&ProfileRequiresUnlock, File);
- }
- }
-
- /* mmap() the profile counters so long as there is at least one counter.
- * If there aren't any counters, mmap() would fail with EINVAL. */
- if (CountersSize > 0) {
- int Fileno = fileno(File);
-
- /* Determine how much padding is needed before/after the counters and after
- * the names. */
- uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
- PaddingBytesAfterNames;
- __llvm_profile_get_padding_sizes_for_counters(
- DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
- &PaddingBytesAfterCounters, &PaddingBytesAfterNames);
-
- uint64_t PageAlignedCountersLength =
- (CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters;
- uint64_t FileOffsetToCounters =
- CurrentFileOffset + sizeof(__llvm_profile_header) +
- (DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters;
-
- uint64_t *CounterMmap = (uint64_t *)mmap(
- (void *)CountersBegin, PageAlignedCountersLength, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_SHARED, Fileno, FileOffsetToCounters);
- if (CounterMmap != CountersBegin) {
- PROF_ERR(
- "Continuous counter sync mode is enabled, but mmap() failed (%s).\n"
- " - CountersBegin: %p\n"
- " - PageAlignedCountersLength: %" PRIu64 "\n"
- " - Fileno: %d\n"
- " - FileOffsetToCounters: %" PRIu64 "\n",
- strerror(errno), CountersBegin, PageAlignedCountersLength, Fileno,
- FileOffsetToCounters);
- }
- }
-
- if (ProfileRequiresUnlock)
- unlockProfile(&ProfileRequiresUnlock, File);
-}
-#elif defined(__ELF__) || defined(_WIN32)
-
-#define INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR \
- INSTR_PROF_CONCAT(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR, _default)
-intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR = 0;
-
-/* This variable is a weak external reference which could be used to detect
- * whether or not the compiler defined this symbol. */
-#if defined(_WIN32)
-COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
-#pragma comment(linker, "/alternatename:" \
- INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_VAR) "=" \
- INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))
-#else
-COMPILER_RT_VISIBILITY extern intptr_t INSTR_PROF_PROFILE_COUNTER_BIAS_VAR
- __attribute__((weak, alias(INSTR_PROF_QUOTE(
- INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR))));
-#endif
-
-static int writeMMappedFile(FILE *OutputFile, char **Profile) {
- if (!OutputFile)
- return -1;
-
- /* Write the data into a file. */
- setupIOBuffer();
- ProfDataWriter fileWriter;
- initFileWriter(&fileWriter, OutputFile);
- if (lprofWriteData(&fileWriter, NULL, 0)) {
- PROF_ERR("Failed to write profile: %s\n", strerror(errno));
- return -1;
- }
- fflush(OutputFile);
-
- /* Get the file size. */
- uint64_t FileSize = ftell(OutputFile);
-
- /* Map the profile. */
- *Profile = (char *)mmap(
- NULL, FileSize, PROT_READ | PROT_WRITE, MAP_SHARED, fileno(OutputFile), 0);
- if (*Profile == MAP_FAILED) {
- PROF_ERR("Unable to mmap profile: %s\n", strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
-static void initializeProfileForContinuousMode(void) {
- if (!__llvm_profile_is_continuous_mode_enabled())
- return;
-
- /* This symbol is defined by the compiler when runtime counter relocation is
- * used and runtime provides a weak alias so we can check if it's defined. */
- void *BiasAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_VAR;
- void *BiasDefaultAddr = &INSTR_PROF_PROFILE_COUNTER_BIAS_DEFAULT_VAR;
- if (BiasAddr == BiasDefaultAddr) {
+ if (UseBiasVar && BiasAddr == BiasDefaultAddr) {
PROF_ERR("%s\n", "__llvm_profile_counter_bias is undefined");
return;
}
- /* Get the sizes of various profile data sections. Taken from
- * __llvm_profile_get_size_for_buffer(). */
- const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
- const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
+ /* Get the sizes of counter section. */
const uint64_t *CountersBegin = __llvm_profile_begin_counters();
const uint64_t *CountersEnd = __llvm_profile_end_counters();
- uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
- const uint64_t CountersOffset =
- sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
+ uint64_t CountersSize = CountersEnd - CountersBegin;
int Length = getCurFilenameLength();
char *FilenameBuf = (char *)COMPILER_RT_ALLOCA(Length + 1);
@@ -661,18 +589,12 @@ static void initializeProfileForContinuousMode(void) {
return;
FILE *File = NULL;
- char *Profile = NULL;
-
- if (!doMerging()) {
- File = fopen(Filename, "w+b");
- if (!File)
- return;
-
- if (writeMMappedFile(File, &Profile) == -1) {
- fclose(File);
- return;
- }
- } else {
+ uint64_t CurrentFileOffset = 0;
+ if (doMerging()) {
+ /* We are merging profiles. Map the counter section as shared memory into
+ * the profile, i.e. into each participating process. An increment in one
+ * process should be visible to every other process with the same counter
+ * section mapped. */
File = lprofOpenFileEx(Filename);
if (!File)
return;
@@ -683,37 +605,54 @@ static void initializeProfileForContinuousMode(void) {
fclose(File);
return;
}
-
- if (!ProfileFileSize) {
- if (writeMMappedFile(File, &Profile) == -1) {
+ if (ProfileFileSize == 0) {
+ /* Grow the profile so that mmap() can succeed. Leak the file handle, as
+ * the file should stay open. */
+ if (writeProfileWithFileObject(Filename, File) != 0) {
+ lprofUnlockFileHandle(File);
fclose(File);
return;
}
} else {
/* The merged profile has a non-zero length. Check that it is compatible
* with the data in this process. */
- if (mmapProfileForMerging(File, ProfileFileSize, &Profile) == -1) {
+ char *ProfileBuffer;
+ if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1) {
+ lprofUnlockFileHandle(File);
fclose(File);
return;
}
+ (void)munmap(ProfileBuffer, ProfileFileSize);
+ }
+ } else {
+ File = fopen(Filename, FileOpenMode);
+ if (!File)
+ return;
+ /* Check that the offset within the file is page-aligned. */
+ CurrentFileOffset = ftell(File);
+ unsigned PageSize = getpagesize();
+ if (CurrentFileOffset % PageSize != 0) {
+ PROF_ERR("Continuous counter sync mode is enabled, but raw profile is not"
+ "page-aligned. CurrentFileOffset = %" PRIu64 ", pagesz = %u.\n",
+ (uint64_t)CurrentFileOffset, PageSize);
+ return;
+ }
+ if (writeProfileWithFileObject(Filename, File) != 0) {
+ fclose(File);
+ return;
}
-
- lprofUnlockFileHandle(File);
}
- /* Update the profile fields based on the current mapping. */
- INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
- (intptr_t)Profile - (uintptr_t)CountersBegin +
- CountersOffset;
+ /* mmap() the profile counters so long as there is at least one counter.
+ * If there aren't any counters, mmap() would fail with EINVAL. */
+ if (CountersSize > 0)
+ mmapForContinuousMode(CurrentFileOffset, File);
- /* Return the memory allocated for counters to OS. */
- lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, (uintptr_t)CountersEnd);
-}
-#else
-static void initializeProfileForContinuousMode(void) {
- PROF_ERR("%s\n", "continuous mode is unsupported on this platform");
+ if (doMerging()) {
+ lprofUnlockFileHandle(File);
+ fclose(File);
+ }
}
-#endif
static const char *DefaultProfileName = "default.profraw";
static void resetFilenameToDefault(void) {
@@ -1205,4 +1144,53 @@ int __llvm_profile_register_write_file_atexit(void) {
return atexit(writeFileWithoutReturn);
}
+COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
+ int EnableMerge) {
+ if (__llvm_profile_is_continuous_mode_enabled()) {
+ if (!EnableMerge) {
+ PROF_WARN("__llvm_profile_set_file_object(fd=%d) not supported in "
+ "continuous sync mode when merging is disabled\n",
+ fileno(File));
+ return 1;
+ }
+ if (lprofLockFileHandle(File) != 0) {
+ PROF_WARN("Data may be corrupted during profile merging : %s\n",
+ "Fail to obtain file lock due to system limit.");
+ }
+ uint64_t ProfileFileSize = 0;
+ if (getProfileFileSizeForMerging(File, &ProfileFileSize) == -1) {
+ lprofUnlockFileHandle(File);
+ return 1;
+ }
+ if (ProfileFileSize == 0) {
+ FreeHook = &free;
+ setupIOBuffer();
+ ProfDataWriter fileWriter;
+ initFileWriter(&fileWriter, File);
+ if (lprofWriteData(&fileWriter, 0, 0)) {
+ lprofUnlockFileHandle(File);
+ PROF_ERR("Failed to write file \"%d\": %s\n", fileno(File),
+ strerror(errno));
+ return 1;
+ }
+ fflush(File);
+ } else {
+ /* The merged profile has a non-zero length. Check that it is compatible
+ * with the data in this process. */
+ char *ProfileBuffer;
+ if (mmapProfileForMerging(File, ProfileFileSize, &ProfileBuffer) == -1) {
+ lprofUnlockFileHandle(File);
+ return 1;
+ }
+ (void)munmap(ProfileBuffer, ProfileFileSize);
+ }
+ mmapForContinuousMode(0, File);
+ lprofUnlockFileHandle(File);
+ } else {
+ setProfileFile(File);
+ setProfileMergeRequested(EnableMerge);
+ }
+ return 0;
+}
+
#endif
diff --git a/compiler-rt/lib/profile/InstrProfilingInternal.h b/compiler-rt/lib/profile/InstrProfilingInternal.h
index ffa790a4cb66..1394ea8c42f8 100644
--- a/compiler-rt/lib/profile/InstrProfilingInternal.h
+++ b/compiler-rt/lib/profile/InstrProfilingInternal.h
@@ -145,8 +145,8 @@ typedef struct VPDataReaderType {
uint32_t N);
} VPDataReaderType;
-/* Write profile data to destinitation. If SkipNameDataWrite is set to 1,
- the name data is already in destintation, we just skip over it. */
+/* Write profile data to destination. If SkipNameDataWrite is set to 1,
+ the name data is already in destination, we just skip over it. */
int lprofWriteData(ProfDataWriter *Writer, VPDataReaderType *VPDataReader,
int SkipNameDataWrite);
int lprofWriteDataImpl(ProfDataWriter *Writer,
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 913228513259..674b1898b046 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -22,6 +22,7 @@ void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *);
COMPILER_RT_VISIBILITY
uint64_t lprofGetLoadModuleSignature() {
/* A very fast way to compute a module signature. */
+ uint64_t Version = __llvm_profile_get_version();
uint64_t CounterSize = (uint64_t)(__llvm_profile_end_counters() -
__llvm_profile_begin_counters());
uint64_t DataSize = __llvm_profile_get_data_size(__llvm_profile_begin_data(),
@@ -33,7 +34,7 @@ uint64_t lprofGetLoadModuleSignature() {
const __llvm_profile_data *FirstD = __llvm_profile_begin_data();
return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) +
- (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0);
+ (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version;
}
/* Returns 1 if profile is not structurally compatible. */
@@ -44,7 +45,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
__llvm_profile_header *Header = (__llvm_profile_header *)ProfileData;
__llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData;
SrcDataStart =
- (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
+ (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) +
+ Header->BinaryIdsSize);
SrcDataEnd = SrcDataStart + Header->DataSize;
if (ProfileSize < sizeof(__llvm_profile_header))
@@ -63,7 +65,7 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
Header->ValueKindLast != IPVK_Last)
return 1;
- if (ProfileSize < sizeof(__llvm_profile_header) +
+ if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize +
Header->DataSize * sizeof(__llvm_profile_data) +
Header->NamesSize + Header->CountersSize)
return 1;
@@ -81,6 +83,14 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
return 0;
}
+static uintptr_t signextIfWin64(void *V) {
+#ifdef _WIN64
+ return (uintptr_t)(int32_t)(uintptr_t)V;
+#else
+ return (uintptr_t)V;
+#endif
+}
+
COMPILER_RT_VISIBILITY
int __llvm_profile_merge_from_buffer(const char *ProfileData,
uint64_t ProfileSize) {
@@ -89,9 +99,11 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
uint64_t *SrcCountersStart;
const char *SrcNameStart;
const char *SrcValueProfDataStart, *SrcValueProfData;
+ uintptr_t CountersDelta = Header->CountersDelta;
SrcDataStart =
- (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
+ (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) +
+ Header->BinaryIdsSize);
SrcDataEnd = SrcDataStart + Header->DataSize;
SrcCountersStart = (uint64_t *)SrcDataEnd;
SrcNameStart = (const char *)(SrcCountersStart + Header->CountersSize);
@@ -105,15 +117,30 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
DstData = (__llvm_profile_data *)__llvm_profile_begin_data(),
SrcValueProfData = SrcValueProfDataStart;
SrcData < SrcDataEnd; ++SrcData, ++DstData) {
- uint64_t *DstCounters = (uint64_t *)DstData->CounterPtr;
+ // For the in-memory destination, CounterPtr is the distance from the start
+ // address of the data to the start address of the counter. On WIN64,
+ // CounterPtr is a truncated 32-bit value due to COFF limitation. Sign
+ // extend CounterPtr to get the original value.
+ uint64_t *DstCounters =
+ (uint64_t *)((uintptr_t)DstData + signextIfWin64(DstData->CounterPtr));
unsigned NVK = 0;
+ // SrcData is a serialized representation of the memory image. We need to
+ // compute the in-buffer counter offset from the in-memory address distance.
+ // The initial CountersDelta is the in-memory address difference
+ // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr -
+ // CountersDelta computes the offset into the in-buffer counter section.
+ //
+ // On WIN64, CountersDelta is truncated as well, so no need for signext.
+ uint64_t *SrcCounters =
+ SrcCountersStart +
+ ((uintptr_t)SrcData->CounterPtr - CountersDelta) / sizeof(uint64_t);
+ // CountersDelta needs to be decreased as we advance to the next data
+ // record.
+ CountersDelta -= sizeof(*SrcData);
unsigned NC = SrcData->NumCounters;
if (NC == 0)
return 1;
- uint64_t *SrcCounters = SrcCountersStart + ((size_t)SrcData->CounterPtr -
- Header->CountersDelta) /
- sizeof(uint64_t);
if (SrcCounters < SrcCountersStart ||
(const char *)SrcCounters >= SrcNameStart ||
(const char *)(SrcCounters + NC) > SrcNameStart)
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
index 0146b14c193f..9bea795e8e3a 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
@@ -52,7 +52,7 @@ static inline void lprofWrite(const char *fmt, ...) {
int ret = vsnprintf(s, sizeof(s), fmt, ap);
va_end(ap);
- __sanitizer_log_write(s, ret + 1);
+ __sanitizer_log_write(s, ret);
}
struct lprofVMOWriterCtx {
@@ -120,7 +120,8 @@ void __llvm_profile_initialize(void) {
const uint64_t *CountersEnd = __llvm_profile_end_counters();
const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
const uint64_t CountersOffset =
- sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
+ sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
+ (DataSize * sizeof(__llvm_profile_data));
uint64_t CountersSize = CountersEnd - CountersBegin;
/* Don't publish a VMO if there are no counters. */
@@ -178,9 +179,6 @@ void __llvm_profile_initialize(void) {
* also consumes the VMO handle. */
__sanitizer_publish_data(ProfileSinkName, Vmo);
- /* Use the dumpfile symbolizer markup element to write the name of VMO. */
- lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName);
-
/* Update the profile fields based on the current mapping. */
INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
(intptr_t)Mapping - (uintptr_t)CountersBegin + CountersOffset;
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 508624a80cd6..e61f90b2cef9 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -17,6 +17,15 @@
#include "InstrProfiling.h"
#include "InstrProfilingInternal.h"
+#if defined(__FreeBSD__) && !defined(ElfW)
+/*
+ * FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet.
+ * If this is added to all supported FreeBSD versions in the future, this
+ * compatibility macro can be removed.
+ */
+#define ElfW(type) __ElfN(type)
+#endif
+
#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
@@ -76,6 +85,7 @@ COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;
+#ifdef NT_GNU_BUILD_ID
static size_t RoundUp(size_t size, size_t align) {
return (size + align - 1) & ~(align - 1);
}
@@ -84,11 +94,14 @@ static size_t RoundUp(size_t size, size_t align) {
* Write binary id length and then its data, because binary id does not
* have a fixed length.
*/
-int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
- const uint8_t *BinaryIdData) {
+static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
+ const uint8_t *BinaryIdData,
+ uint64_t BinaryIdPadding) {
ProfDataIOVec BinaryIdIOVec[] = {
{&BinaryIdLen, sizeof(uint64_t), 1, 0},
- {BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}};
+ {BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0},
+ {NULL, sizeof(uint8_t), BinaryIdPadding, 1},
+ };
if (Writer->Write(Writer, BinaryIdIOVec,
sizeof(BinaryIdIOVec) / sizeof(*BinaryIdIOVec)))
return -1;
@@ -109,7 +122,8 @@ int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
* Note sections like .note.ABI-tag and .note.gnu.build-id are aligned
* to 4 bytes, so round n_namesz and n_descsz to the nearest 4 bytes.
*/
-int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) {
+static int WriteBinaryIdForNote(ProfDataWriter *Writer,
+ const ElfW(Nhdr) * Note) {
int BinaryIdSize = 0;
const char *NoteName = (const char *)Note + sizeof(ElfW(Nhdr));
@@ -119,11 +133,12 @@ int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) {
uint64_t BinaryIdLen = Note->n_descsz;
const uint8_t *BinaryIdData =
(const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4));
- if (Writer != NULL &&
- WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData) == -1)
+ uint8_t BinaryIdPadding = __llvm_profile_get_num_padding_bytes(BinaryIdLen);
+ if (Writer != NULL && WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData,
+ BinaryIdPadding) == -1)
return -1;
- BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen;
+ BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen + BinaryIdPadding;
}
return BinaryIdSize;
@@ -134,8 +149,8 @@ int WriteBinaryIdForNote(ProfDataWriter *Writer, const ElfW(Nhdr) * Note) {
* If writer is given, write binary ids into profiles.
* If an error happens while writing, return -1.
*/
-int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
- const ElfW(Nhdr) * NotesEnd) {
+static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
+ const ElfW(Nhdr) * NotesEnd) {
int TotalBinaryIdsSize = 0;
while (Note < NotesEnd) {
int Result = WriteBinaryIdForNote(Writer, Note);
@@ -179,5 +194,14 @@ COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
return 0;
}
+#else /* !NT_GNU_BUILD_ID */
+/*
+ * Fallback implementation for targets that don't support the GNU
+ * extensions NT_GNU_BUILD_ID and __ehdr_start.
+ */
+COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
+ return 0;
+}
+#endif
#endif
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c
index 0e59148e2044..48946ce94253 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformOther.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformOther.c
@@ -46,17 +46,19 @@ void __llvm_profile_register_function(void *Data_) {
if (!DataFirst) {
DataFirst = Data;
DataLast = Data + 1;
- CountersFirst = Data->CounterPtr;
- CountersLast = (uint64_t *)Data->CounterPtr + Data->NumCounters;
+ CountersFirst = (uint64_t *)((uintptr_t)Data_ + Data->CounterPtr);
+ CountersLast = CountersFirst + Data->NumCounters;
return;
}
DataFirst = (const __llvm_profile_data *)getMinAddr(DataFirst, Data);
- CountersFirst = (uint64_t *)getMinAddr(CountersFirst, Data->CounterPtr);
+ CountersFirst = (uint64_t *)getMinAddr(
+ CountersFirst, (uint64_t *)((uintptr_t)Data_ + Data->CounterPtr));
DataLast = (const __llvm_profile_data *)getMaxAddr(DataLast, Data + 1);
CountersLast = (uint64_t *)getMaxAddr(
- CountersLast, (uint64_t *)Data->CounterPtr + Data->NumCounters);
+ CountersLast,
+ (uint64_t *)((uintptr_t)Data_ + Data->CounterPtr) + Data->NumCounters);
}
COMPILER_RT_VISIBILITY
diff --git a/compiler-rt/lib/profile/InstrProfilingUtil.c b/compiler-rt/lib/profile/InstrProfilingUtil.c
index 4fa792b72eac..d563e333aca8 100644
--- a/compiler-rt/lib/profile/InstrProfilingUtil.c
+++ b/compiler-rt/lib/profile/InstrProfilingUtil.c
@@ -34,9 +34,15 @@
#endif
#if defined(__Fuchsia__)
+#include <zircon/process.h>
#include <zircon/syscalls.h>
#endif
+#if defined(__FreeBSD__)
+#include <signal.h>
+#include <sys/procctl.h>
+#endif
+
#include "InstrProfiling.h"
#include "InstrProfilingUtil.h"
@@ -325,6 +331,12 @@ COMPILER_RT_VISIBILITY int lprofSuspendSigKill() {
if (prctl(PR_GET_PDEATHSIG, &PDeachSig) == 0 && PDeachSig == SIGKILL)
prctl(PR_SET_PDEATHSIG, 0);
return (PDeachSig == SIGKILL);
+#elif defined(__FreeBSD__)
+ int PDeachSig = 0, PDisableSig = 0;
+ if (procctl(P_PID, 0, PROC_PDEATHSIG_STATUS, &PDeachSig) == 0 &&
+ PDeachSig == SIGKILL)
+ procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &PDisableSig);
+ return (PDeachSig == SIGKILL);
#else
return 0;
#endif
@@ -333,11 +345,18 @@ COMPILER_RT_VISIBILITY int lprofSuspendSigKill() {
COMPILER_RT_VISIBILITY void lprofRestoreSigKill() {
#if defined(__linux__)
prctl(PR_SET_PDEATHSIG, SIGKILL);
+#elif defined(__FreeBSD__)
+ int PEnableSig = SIGKILL;
+ procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &PEnableSig);
#endif
}
COMPILER_RT_VISIBILITY int lprofReleaseMemoryPagesToOS(uintptr_t Begin,
uintptr_t End) {
+#if defined(__ve__)
+ // VE doesn't support madvise.
+ return 0;
+#else
size_t PageSize = getpagesize();
uintptr_t BeginAligned = lprofRoundUpTo((uintptr_t)Begin, PageSize);
uintptr_t EndAligned = lprofRoundDownTo((uintptr_t)End, PageSize);
@@ -352,4 +371,5 @@ COMPILER_RT_VISIBILITY int lprofReleaseMemoryPagesToOS(uintptr_t Begin,
#endif
}
return 0;
+#endif
}
diff --git a/compiler-rt/lib/profile/InstrProfilingValue.c b/compiler-rt/lib/profile/InstrProfilingValue.c
index 7f368b9f8d4e..08197fdd9ea2 100644
--- a/compiler-rt/lib/profile/InstrProfilingValue.c
+++ b/compiler-rt/lib/profile/InstrProfilingValue.c
@@ -253,7 +253,7 @@ __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
/*
* A wrapper struct that represents value profile runtime data.
* Like InstrProfRecord class which is used by profiling host tools,
- * ValueProfRuntimeRecord also implements the abstract intefaces defined in
+ * ValueProfRuntimeRecord also implements the abstract interfaces defined in
* ValueProfRecordClosure so that the runtime data can be serialized using
* shared C implementation.
*/
diff --git a/compiler-rt/lib/profile/InstrProfilingVersionVar.c b/compiler-rt/lib/profile/InstrProfilingVersionVar.c
index a6f222150794..e49d171cce41 100644
--- a/compiler-rt/lib/profile/InstrProfilingVersionVar.c
+++ b/compiler-rt/lib/profile/InstrProfilingVersionVar.c
@@ -13,5 +13,14 @@
* The runtime should only provide its own definition of this symbol when the
* user has not specified one. Set this up by moving the runtime's copy of this
* symbol to an object file within the archive.
+ *
+ * Hide this symbol everywhere except Apple platforms, where its presence is
+ * checked by the TAPI tool.
*/
-COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
+#if !defined(__APPLE__)
+#define VERSION_VAR_VISIBILITY COMPILER_RT_VISIBILITY
+#else
+#define VERSION_VAR_VISIBILITY
+#endif
+VERSION_VAR_VISIBILITY COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR =
+ INSTR_PROF_RAW_VERSION;
diff --git a/compiler-rt/lib/profile/InstrProfilingWriter.c b/compiler-rt/lib/profile/InstrProfilingWriter.c
index 25f630293227..9cb05570989d 100644
--- a/compiler-rt/lib/profile/InstrProfilingWriter.c
+++ b/compiler-rt/lib/profile/InstrProfilingWriter.c
@@ -32,7 +32,7 @@ static uint32_t VPDataArraySize = sizeof(VPDataArray) / sizeof(*VPDataArray);
COMPILER_RT_VISIBILITY uint8_t *DynamicBufferIOBuffer = 0;
COMPILER_RT_VISIBILITY uint32_t VPBufferSize = 0;
-/* The buffer writer is reponsponsible in keeping writer state
+/* The buffer writer is responsible in keeping writer state
* across the call.
*/
COMPILER_RT_VISIBILITY uint32_t lprofBufferWriter(ProfDataWriter *This,
@@ -283,6 +283,12 @@ lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
#define INSTR_PROF_RAW_HEADER(Type, Name, Init) Header.Name = Init;
#include "profile/InstrProfData.inc"
+ /* On WIN64, label differences are truncated 32-bit values. Truncate
+ * CountersDelta to match. */
+#ifdef _WIN64
+ Header.CountersDelta = (uint32_t)Header.CountersDelta;
+#endif
+
/* Write the profile header. */
ProfDataIOVec IOVec[] = {{&Header, sizeof(__llvm_profile_header), 1, 0}};
if (Writer->Write(Writer, IOVec, sizeof(IOVec) / sizeof(*IOVec)))
diff --git a/compiler-rt/lib/sanitizer_common/sancov_flags.inc b/compiler-rt/lib/sanitizer_common/sancov_flags.inc
index cca33fc359f4..de9ede217fc3 100644
--- a/compiler-rt/lib/sanitizer_common/sancov_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sancov_flags.inc
@@ -14,7 +14,7 @@
#endif
SANCOV_FLAG(bool, symbolize, true,
- "If set, converage information will be symbolized by sancov tool "
+ "If set, coverage information will be symbolized by sancov tool "
"after dumping.")
SANCOV_FLAG(bool, help, false, "Print flags help.")
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_addrhashmap.h b/compiler-rt/lib/sanitizer_common/sanitizer_addrhashmap.h
index 15f81a04350f..7e2fa91089f1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_addrhashmap.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_addrhashmap.h
@@ -39,6 +39,11 @@ namespace __sanitizer {
// the current thread has exclusive access to the data
// if !h.exists() then the element never existed
// }
+// {
+// Map::Handle h(&m, addr, false, true);
+// this will create a new element or return a handle to an existing element
+// if !h.created() this thread does *not* have exclusive access to the data
+// }
template<typename T, uptr kSize>
class AddrHashMap {
private:
@@ -56,7 +61,7 @@ class AddrHashMap {
static const uptr kBucketSize = 3;
struct Bucket {
- RWMutex mtx;
+ Mutex mtx;
atomic_uintptr_t add;
Cell cells[kBucketSize];
};
@@ -89,6 +94,12 @@ class AddrHashMap {
bool create_;
};
+ typedef void (*ForEachCallback)(const uptr key, const T &val, void *arg);
+ // ForEach acquires a lock on each bucket while iterating over
+ // elements. Note that this only ensures that the structure of the hashmap is
+ // unchanged, there may be a data race to the element itself.
+ void ForEach(ForEachCallback cb, void *arg);
+
private:
friend class Handle;
Bucket *table_;
@@ -98,6 +109,33 @@ class AddrHashMap {
uptr calcHash(uptr addr);
};
+template <typename T, uptr kSize>
+void AddrHashMap<T, kSize>::ForEach(ForEachCallback cb, void *arg) {
+ for (uptr n = 0; n < kSize; n++) {
+ Bucket *bucket = &table_[n];
+
+ ReadLock lock(&bucket->mtx);
+
+ for (uptr i = 0; i < kBucketSize; i++) {
+ Cell *c = &bucket->cells[i];
+ uptr addr1 = atomic_load(&c->addr, memory_order_acquire);
+ if (addr1 != 0)
+ cb(addr1, c->val, arg);
+ }
+
+ // Iterate over any additional cells.
+ if (AddBucket *add =
+ (AddBucket *)atomic_load(&bucket->add, memory_order_acquire)) {
+ for (uptr i = 0; i < add->size; i++) {
+ Cell *c = &add->cells[i];
+ uptr addr1 = atomic_load(&c->addr, memory_order_acquire);
+ if (addr1 != 0)
+ cb(addr1, c->val, arg);
+ }
+ }
+ }
+}
+
template<typename T, uptr kSize>
AddrHashMap<T, kSize>::Handle::Handle(AddrHashMap<T, kSize> *map, uptr addr) {
map_ = map;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h
index 5ec47416fe0c..ec23465d9584 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.h
@@ -14,6 +14,7 @@
#define SANITIZER_ALLOCATOR_H
#include "sanitizer_common.h"
+#include "sanitizer_flat_map.h"
#include "sanitizer_internal_defs.h"
#include "sanitizer_lfstack.h"
#include "sanitizer_libc.h"
@@ -43,12 +44,6 @@ void SetAllocatorOutOfMemory();
void PrintHintAllocatorCannotReturnNull();
-// Allocators call these callbacks on mmap/munmap.
-struct NoOpMapUnmapCallback {
- void OnMap(uptr p, uptr size) const { }
- void OnUnmap(uptr p, uptr size) const { }
-};
-
// Callback type for iterating over chunks.
typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);
@@ -70,7 +65,6 @@ inline void RandomShuffle(T *a, u32 n, u32 *rand_state) {
#include "sanitizer_allocator_size_class_map.h"
#include "sanitizer_allocator_stats.h"
#include "sanitizer_allocator_primary64.h"
-#include "sanitizer_allocator_bytemap.h"
#include "sanitizer_allocator_primary32.h"
#include "sanitizer_allocator_local_cache.h"
#include "sanitizer_allocator_secondary.h"
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_bytemap.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_bytemap.h
deleted file mode 100644
index 0084bb62c83c..000000000000
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_bytemap.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//===-- sanitizer_allocator_bytemap.h ---------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Part of the Sanitizer Allocator.
-//
-//===----------------------------------------------------------------------===//
-#ifndef SANITIZER_ALLOCATOR_H
-#error This file must be included inside sanitizer_allocator.h
-#endif
-
-// Maps integers in rage [0, kSize) to u8 values.
-template <u64 kSize, typename AddressSpaceViewTy = LocalAddressSpaceView>
-class FlatByteMap {
- public:
- using AddressSpaceView = AddressSpaceViewTy;
- void Init() {
- internal_memset(map_, 0, sizeof(map_));
- }
-
- void set(uptr idx, u8 val) {
- CHECK_LT(idx, kSize);
- CHECK_EQ(0U, map_[idx]);
- map_[idx] = val;
- }
- u8 operator[] (uptr idx) {
- CHECK_LT(idx, kSize);
- // FIXME: CHECK may be too expensive here.
- return map_[idx];
- }
- private:
- u8 map_[kSize];
-};
-
-// TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
-// It is implemented as a two-dimensional array: array of kSize1 pointers
-// to kSize2-byte arrays. The secondary arrays are mmaped on demand.
-// Each value is initially zero and can be set to something else only once.
-// Setting and getting values from multiple threads is safe w/o extra locking.
-template <u64 kSize1, u64 kSize2,
- typename AddressSpaceViewTy = LocalAddressSpaceView,
- class MapUnmapCallback = NoOpMapUnmapCallback>
-class TwoLevelByteMap {
- public:
- using AddressSpaceView = AddressSpaceViewTy;
- void Init() {
- internal_memset(map1_, 0, sizeof(map1_));
- mu_.Init();
- }
-
- void TestOnlyUnmap() {
- for (uptr i = 0; i < kSize1; i++) {
- u8 *p = Get(i);
- if (!p) continue;
- MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), kSize2);
- UnmapOrDie(p, kSize2);
- }
- }
-
- uptr size() const { return kSize1 * kSize2; }
- uptr size1() const { return kSize1; }
- uptr size2() const { return kSize2; }
-
- void set(uptr idx, u8 val) {
- CHECK_LT(idx, kSize1 * kSize2);
- u8 *map2 = GetOrCreate(idx / kSize2);
- CHECK_EQ(0U, map2[idx % kSize2]);
- map2[idx % kSize2] = val;
- }
-
- u8 operator[] (uptr idx) const {
- CHECK_LT(idx, kSize1 * kSize2);
- u8 *map2 = Get(idx / kSize2);
- if (!map2) return 0;
- auto value_ptr = AddressSpaceView::Load(&map2[idx % kSize2]);
- return *value_ptr;
- }
-
- private:
- u8 *Get(uptr idx) const {
- CHECK_LT(idx, kSize1);
- return reinterpret_cast<u8 *>(
- atomic_load(&map1_[idx], memory_order_acquire));
- }
-
- u8 *GetOrCreate(uptr idx) {
- u8 *res = Get(idx);
- if (!res) {
- SpinMutexLock l(&mu_);
- if (!(res = Get(idx))) {
- res = (u8*)MmapOrDie(kSize2, "TwoLevelByteMap");
- MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
- atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
- memory_order_release);
- }
- }
- return res;
- }
-
- atomic_uintptr_t map1_[kSize1];
- StaticSpinMutex mu_;
-};
-
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h
index 0e81e6764f9a..9a3602f730b3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h
@@ -112,15 +112,13 @@ class CombinedAllocator {
return new_p;
}
- bool PointerIsMine(void *p) {
+ bool PointerIsMine(const void *p) const {
if (primary_.PointerIsMine(p))
return true;
return secondary_.PointerIsMine(p);
}
- bool FromPrimary(void *p) {
- return primary_.PointerIsMine(p);
- }
+ bool FromPrimary(const void *p) const { return primary_.PointerIsMine(p); }
void *GetMetaData(const void *p) {
if (primary_.PointerIsMine(p))
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h
new file mode 100644
index 000000000000..92b1373ef84d
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h
@@ -0,0 +1,79 @@
+//===-- sanitizer_allocator_dlsym.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Hack: Sanitizer initializer calls dlsym which may need to allocate and call
+// back into uninitialized sanitizer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_ALLOCATOR_DLSYM_H
+#define SANITIZER_ALLOCATOR_DLSYM_H
+
+#include "sanitizer_allocator_internal.h"
+
+namespace __sanitizer {
+
+template <typename Details>
+struct DlSymAllocator {
+ static bool Use() {
+ // Fuchsia doesn't use dlsym-based interceptors.
+ return !SANITIZER_FUCHSIA && UNLIKELY(Details::UseImpl());
+ }
+
+ static bool PointerIsMine(const void *ptr) {
+ // Fuchsia doesn't use dlsym-based interceptors.
+ return !SANITIZER_FUCHSIA &&
+ UNLIKELY(internal_allocator()->FromPrimary(ptr));
+ }
+
+ static void *Allocate(uptr size_in_bytes) {
+ void *ptr = InternalAlloc(size_in_bytes, nullptr, kWordSize);
+ CHECK(internal_allocator()->FromPrimary(ptr));
+ Details::OnAllocate(ptr,
+ internal_allocator()->GetActuallyAllocatedSize(ptr));
+ return ptr;
+ }
+
+ static void *Callocate(SIZE_T nmemb, SIZE_T size) {
+ void *ptr = InternalCalloc(nmemb, size);
+ CHECK(internal_allocator()->FromPrimary(ptr));
+ Details::OnAllocate(ptr,
+ internal_allocator()->GetActuallyAllocatedSize(ptr));
+ return ptr;
+ }
+
+ static void Free(void *ptr) {
+ uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr);
+ Details::OnFree(ptr, size);
+ InternalFree(ptr);
+ }
+
+ static void *Realloc(void *ptr, uptr new_size) {
+ if (!ptr)
+ return Allocate(new_size);
+ CHECK(internal_allocator()->FromPrimary(ptr));
+ if (!new_size) {
+ Free(ptr);
+ return nullptr;
+ }
+ uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr);
+ uptr memcpy_size = Min(new_size, size);
+ void *new_ptr = Allocate(new_size);
+ if (new_ptr)
+ internal_memcpy(new_ptr, ptr, memcpy_size);
+ Free(ptr);
+ return new_ptr;
+ }
+
+ static void OnAllocate(const void *ptr, uptr size) {}
+ static void OnFree(const void *ptr, uptr size) {}
+};
+
+} // namespace __sanitizer
+
+#endif // SANITIZER_ALLOCATOR_DLSYM_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h
index 38d2a7d117fb..ae1b7e0d5f1c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h
@@ -189,7 +189,7 @@ class SizeClassAllocator32 {
sci->free_list.push_front(b);
}
- bool PointerIsMine(const void *p) {
+ bool PointerIsMine(const void *p) const {
uptr mem = reinterpret_cast<uptr>(p);
if (SANITIZER_SIGN_EXTENDED_ADDRESSES)
mem &= (kSpaceSize - 1);
@@ -198,8 +198,9 @@ class SizeClassAllocator32 {
return GetSizeClass(p) != 0;
}
- uptr GetSizeClass(const void *p) {
- return possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
+ uptr GetSizeClass(const void *p) const {
+ uptr id = ComputeRegionId(reinterpret_cast<uptr>(p));
+ return possible_regions.contains(id) ? possible_regions[id] : 0;
}
void *GetBlockBegin(const void *p) {
@@ -251,9 +252,9 @@ class SizeClassAllocator32 {
// Iterate over all existing chunks.
// The allocator must be locked when calling this function.
- void ForEachChunk(ForEachChunkCallback callback, void *arg) {
+ void ForEachChunk(ForEachChunkCallback callback, void *arg) const {
for (uptr region = 0; region < kNumPossibleRegions; region++)
- if (possible_regions[region]) {
+ if (possible_regions.contains(region) && possible_regions[region]) {
uptr chunk_size = ClassIdToSize(possible_regions[region]);
uptr max_chunks_in_region = kRegionSize / (chunk_size + kMetadataSize);
uptr region_beg = region * kRegionSize;
@@ -292,9 +293,7 @@ class SizeClassAllocator32 {
return res;
}
- uptr ComputeRegionBeg(uptr mem) {
- return mem & ~(kRegionSize - 1);
- }
+ uptr ComputeRegionBeg(uptr mem) const { return mem & ~(kRegionSize - 1); }
uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
DCHECK_LT(class_id, kNumClasses);
@@ -305,7 +304,7 @@ class SizeClassAllocator32 {
MapUnmapCallback().OnMap(res, kRegionSize);
stat->Add(AllocatorStatMapped, kRegionSize);
CHECK(IsAligned(res, kRegionSize));
- possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
+ possible_regions[ComputeRegionId(res)] = class_id;
return res;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h
index b142ee0131b2..f917310cfebb 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h
@@ -161,7 +161,7 @@ class SizeClassAllocator64 {
void ForceReleaseToOS() {
MemoryMapperT memory_mapper(*this);
for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
- BlockingMutexLock l(&GetRegionInfo(class_id)->mutex);
+ Lock l(&GetRegionInfo(class_id)->mutex);
MaybeReleaseToOS(&memory_mapper, class_id, true /*force*/);
}
}
@@ -178,7 +178,7 @@ class SizeClassAllocator64 {
uptr region_beg = GetRegionBeginBySizeClass(class_id);
CompactPtrT *free_array = GetFreeArray(region_beg);
- BlockingMutexLock l(&region->mutex);
+ Lock l(&region->mutex);
uptr old_num_chunks = region->num_freed_chunks;
uptr new_num_freed_chunks = old_num_chunks + n_chunks;
// Failure to allocate free array space while releasing memory is non
@@ -204,7 +204,7 @@ class SizeClassAllocator64 {
uptr region_beg = GetRegionBeginBySizeClass(class_id);
CompactPtrT *free_array = GetFreeArray(region_beg);
- BlockingMutexLock l(&region->mutex);
+ Lock l(&region->mutex);
#if SANITIZER_WINDOWS
/* On Windows unmapping of memory during __sanitizer_purge_allocator is
explicit and immediate, so unmapped regions must be explicitly mapped back
@@ -282,6 +282,8 @@ class SizeClassAllocator64 {
CHECK(kMetadataSize);
uptr class_id = GetSizeClass(p);
uptr size = ClassIdToSize(class_id);
+ if (!size)
+ return nullptr;
uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
uptr region_beg = GetRegionBeginBySizeClass(class_id);
return reinterpret_cast<void *>(GetMetadataEnd(region_beg) -
@@ -300,9 +302,8 @@ class SizeClassAllocator64 {
UnmapWithCallbackOrDie((uptr)address_range.base(), address_range.size());
}
- static void FillMemoryProfile(uptr start, uptr rss, bool file, uptr *stats,
- uptr stats_size) {
- for (uptr class_id = 0; class_id < stats_size; class_id++)
+ static void FillMemoryProfile(uptr start, uptr rss, bool file, uptr *stats) {
+ for (uptr class_id = 0; class_id < kNumClasses; class_id++)
if (stats[class_id] == start)
stats[class_id] = rss;
}
@@ -315,7 +316,7 @@ class SizeClassAllocator64 {
Printf(
"%s %02zd (%6zd): mapped: %6zdK allocs: %7zd frees: %7zd inuse: %6zd "
"num_freed_chunks %7zd avail: %6zd rss: %6zdK releases: %6zd "
- "last released: %6zdK region: 0x%zx\n",
+ "last released: %6lldK region: 0x%zx\n",
region->exhausted ? "F" : " ", class_id, ClassIdToSize(class_id),
region->mapped_user >> 10, region->stats.n_allocated,
region->stats.n_freed, in_use, region->num_freed_chunks, avail_chunks,
@@ -328,7 +329,7 @@ class SizeClassAllocator64 {
uptr rss_stats[kNumClasses];
for (uptr class_id = 0; class_id < kNumClasses; class_id++)
rss_stats[class_id] = SpaceBeg() + kRegionSize * class_id;
- GetMemoryProfile(FillMemoryProfile, rss_stats, kNumClasses);
+ GetMemoryProfile(FillMemoryProfile, rss_stats);
uptr total_mapped = 0;
uptr total_rss = 0;
@@ -623,7 +624,7 @@ class SizeClassAllocator64 {
static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
// FreeArray is the array of free-d chunks (stored as 4-byte offsets).
- // In the worst case it may reguire kRegionSize/SizeClassMap::kMinSize
+ // In the worst case it may require kRegionSize/SizeClassMap::kMinSize
// elements, but in reality this will not happen. For simplicity we
// dedicate 1/8 of the region's virtual space to FreeArray.
static const uptr kFreeArraySize = kRegionSize / 8;
@@ -665,7 +666,7 @@ class SizeClassAllocator64 {
};
struct ALIGNED(SANITIZER_CACHE_LINE_SIZE) RegionInfo {
- BlockingMutex mutex;
+ Mutex mutex;
uptr num_freed_chunks; // Number of elements in the freearray.
uptr mapped_free_array; // Bytes mapped for freearray.
uptr allocated_user; // Bytes allocated for user memory.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h
index dd34fe85cc3a..c24354cb5b2a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_secondary.h
@@ -161,7 +161,7 @@ class LargeMmapAllocator {
return res;
}
- bool PointerIsMine(const void *p) {
+ bool PointerIsMine(const void *p) const {
return GetBlockBegin(p) != nullptr;
}
@@ -179,7 +179,7 @@ class LargeMmapAllocator {
return GetHeader(p) + 1;
}
- void *GetBlockBegin(const void *ptr) {
+ void *GetBlockBegin(const void *ptr) const {
uptr p = reinterpret_cast<uptr>(ptr);
SpinMutexLock l(&mutex_);
uptr nearest_chunk = 0;
@@ -301,7 +301,7 @@ class LargeMmapAllocator {
return GetHeader(reinterpret_cast<uptr>(p));
}
- void *GetUser(const Header *h) {
+ void *GetUser(const Header *h) const {
CHECK(IsAligned((uptr)h, page_size_));
return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
}
@@ -318,5 +318,5 @@ class LargeMmapAllocator {
struct Stats {
uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
} stats;
- StaticSpinMutex mutex_;
+ mutable StaticSpinMutex mutex_;
};
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
index c50d13303ede..361793f2490a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_size_class_map.h
@@ -193,13 +193,13 @@ class SizeClassMap {
uptr cached = MaxCachedHint(s) * s;
if (i == kBatchClassID)
d = p = l = 0;
- Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
- "cached: %zd %zd; id %zd\n",
- i, Size(i), d, p, l, MaxCachedHint(s), cached, ClassID(s));
+ Printf(
+ "c%02zu => s: %zu diff: +%zu %02zu%% l %zu cached: %zu %zu; id %zu\n",
+ i, Size(i), d, p, l, MaxCachedHint(s), cached, ClassID(s));
total_cached += cached;
prev_s = s;
}
- Printf("Total cached: %zd\n", total_cached);
+ Printf("Total cached: %zu\n", total_cached);
}
static void Validate() {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_asm.h b/compiler-rt/lib/sanitizer_common/sanitizer_asm.h
index 803af3285e18..9ebba91da73f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_asm.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_asm.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Various support for assemebler.
+// Various support for assembler.
//
//===----------------------------------------------------------------------===//
@@ -61,8 +61,15 @@
#if defined(__ELF__) && (defined(__GNU__) || defined(__FreeBSD__) || \
defined(__Fuchsia__) || defined(__linux__))
// clang-format off
-#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits // NOLINT
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
// clang-format on
#else
#define NO_EXEC_STACK_DIRECTIVE
#endif
+
+#if (defined(__x86_64__) || defined(__i386__)) && defined(__has_include) && __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#ifndef _CET_ENDBR
+#define _CET_ENDBR
+#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h
index 2b39097112d4..f3d3052e5b7c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang_mips.h
@@ -18,7 +18,7 @@ namespace __sanitizer {
// MIPS32 does not support atomics > 4 bytes. To address this lack of
// functionality, the sanitizer library provides helper methods which use an
-// internal spin lock mechanism to emulate atomic oprations when the size is
+// internal spin lock mechanism to emulate atomic operations when the size is
// 8 bytes.
static void __spin_lock(volatile int *lock) {
while (__sync_lock_test_and_set(lock, 1))
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.cpp
index 250ac39e1301..472b83d63a08 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.cpp
@@ -11,16 +11,57 @@
#include "sanitizer_chained_origin_depot.h"
+#include "sanitizer_stackdepotbase.h"
+
namespace __sanitizer {
-bool ChainedOriginDepot::ChainedOriginDepotNode::eq(
- u32 hash, const args_type &args) const {
- return here_id == args.here_id && prev_id == args.prev_id;
-}
+namespace {
+struct ChainedOriginDepotDesc {
+ u32 here_id;
+ u32 prev_id;
+};
-uptr ChainedOriginDepot::ChainedOriginDepotNode::storage_size(
- const args_type &args) {
- return sizeof(ChainedOriginDepotNode);
+struct ChainedOriginDepotNode {
+ using hash_type = u32;
+ u32 link;
+ u32 here_id;
+ u32 prev_id;
+
+ typedef ChainedOriginDepotDesc args_type;
+
+ bool eq(hash_type hash, const args_type &args) const;
+
+ static uptr allocated() { return 0; }
+
+ static hash_type hash(const args_type &args);
+
+ static bool is_valid(const args_type &args);
+
+ void store(u32 id, const args_type &args, hash_type other_hash);
+
+ args_type load(u32 id) const;
+
+ struct Handle {
+ const ChainedOriginDepotNode *node_ = nullptr;
+ u32 id_ = 0;
+ Handle(const ChainedOriginDepotNode *node, u32 id) : node_(node), id_(id) {}
+ bool valid() const { return node_; }
+ u32 id() const { return id_; }
+ int here_id() const { return node_->here_id; }
+ int prev_id() const { return node_->prev_id; }
+ };
+
+ static Handle get_handle(u32 id);
+
+ typedef Handle handle_type;
+};
+
+} // namespace
+
+static StackDepotBase<ChainedOriginDepotNode, 4, 20> depot;
+
+bool ChainedOriginDepotNode::eq(hash_type hash, const args_type &args) const {
+ return here_id == args.here_id && prev_id == args.prev_id;
}
/* This is murmur2 hash for the 64->32 bit case.
@@ -36,7 +77,8 @@ uptr ChainedOriginDepot::ChainedOriginDepotNode::storage_size(
split, or one of two reserved values (-1) or (-2). Either case can
dominate depending on the workload.
*/
-u32 ChainedOriginDepot::ChainedOriginDepotNode::hash(const args_type &args) {
+ChainedOriginDepotNode::hash_type ChainedOriginDepotNode::hash(
+ const args_type &args) {
const u32 m = 0x5bd1e995;
const u32 seed = 0x9747b28c;
const u32 r = 24;
@@ -61,37 +103,33 @@ u32 ChainedOriginDepot::ChainedOriginDepotNode::hash(const args_type &args) {
return h;
}
-bool ChainedOriginDepot::ChainedOriginDepotNode::is_valid(
- const args_type &args) {
- return true;
-}
+bool ChainedOriginDepotNode::is_valid(const args_type &args) { return true; }
-void ChainedOriginDepot::ChainedOriginDepotNode::store(const args_type &args,
- u32 other_hash) {
+void ChainedOriginDepotNode::store(u32 id, const args_type &args,
+ hash_type other_hash) {
here_id = args.here_id;
prev_id = args.prev_id;
}
-ChainedOriginDepot::ChainedOriginDepotNode::args_type
-ChainedOriginDepot::ChainedOriginDepotNode::load() const {
+ChainedOriginDepotNode::args_type ChainedOriginDepotNode::load(u32 id) const {
args_type ret = {here_id, prev_id};
return ret;
}
-ChainedOriginDepot::ChainedOriginDepotNode::Handle
-ChainedOriginDepot::ChainedOriginDepotNode::get_handle() {
- return Handle(this);
+ChainedOriginDepotNode::Handle ChainedOriginDepotNode::get_handle(u32 id) {
+ return Handle(&depot.nodes[id], id);
}
ChainedOriginDepot::ChainedOriginDepot() {}
-StackDepotStats *ChainedOriginDepot::GetStats() { return depot.GetStats(); }
+StackDepotStats ChainedOriginDepot::GetStats() const {
+ return depot.GetStats();
+}
bool ChainedOriginDepot::Put(u32 here_id, u32 prev_id, u32 *new_id) {
ChainedOriginDepotDesc desc = {here_id, prev_id};
bool inserted;
- ChainedOriginDepotNode::Handle h = depot.Put(desc, &inserted);
- *new_id = h.valid() ? h.id() : 0;
+ *new_id = depot.Put(desc, &inserted);
return inserted;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.h b/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.h
index 453cdf6b5449..2e800964a45d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.h
@@ -13,7 +13,6 @@
#define SANITIZER_CHAINED_ORIGIN_DEPOT_H
#include "sanitizer_common.h"
-#include "sanitizer_stackdepotbase.h"
namespace __sanitizer {
@@ -22,7 +21,7 @@ class ChainedOriginDepot {
ChainedOriginDepot();
// Gets the statistic of the origin chain storage.
- StackDepotStats *GetStats();
+ StackDepotStats GetStats() const;
// Stores a chain with StackDepot ID here_id and previous chain ID prev_id.
// If successful, returns true and the new chain id new_id.
@@ -37,48 +36,6 @@ class ChainedOriginDepot {
void UnlockAll();
private:
- struct ChainedOriginDepotDesc {
- u32 here_id;
- u32 prev_id;
- };
-
- struct ChainedOriginDepotNode {
- ChainedOriginDepotNode *link;
- u32 id;
- u32 here_id;
- u32 prev_id;
-
- typedef ChainedOriginDepotDesc args_type;
-
- bool eq(u32 hash, const args_type &args) const;
-
- static uptr storage_size(const args_type &args);
-
- static u32 hash(const args_type &args);
-
- static bool is_valid(const args_type &args);
-
- void store(const args_type &args, u32 other_hash);
-
- args_type load() const;
-
- struct Handle {
- ChainedOriginDepotNode *node_;
- Handle() : node_(nullptr) {}
- explicit Handle(ChainedOriginDepotNode *node) : node_(node) {}
- bool valid() { return node_; }
- u32 id() { return node_->id; }
- int here_id() { return node_->here_id; }
- int prev_id() { return node_->prev_id; }
- };
-
- Handle get_handle();
-
- typedef Handle handle_type;
- };
-
- StackDepotBase<ChainedOriginDepotNode, 4, 20> depot;
-
ChainedOriginDepot(const ChainedOriginDepot &) = delete;
void operator=(const ChainedOriginDepot &) = delete;
};
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index cbdbb0c4c4bd..065154496eb5 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -192,12 +192,13 @@ class ReservedAddressRange {
};
typedef void (*fill_profile_f)(uptr start, uptr rss, bool file,
- /*out*/uptr *stats, uptr stats_size);
+ /*out*/ uptr *stats);
// Parse the contents of /proc/self/smaps and generate a memory profile.
-// |cb| is a tool-specific callback that fills the |stats| array containing
-// |stats_size| elements.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size);
+// |cb| is a tool-specific callback that fills the |stats| array.
+void GetMemoryProfile(fill_profile_f cb, uptr *stats);
+void ParseUnixMemoryProfile(fill_profile_f cb, uptr *stats, char *smaps,
+ uptr smaps_len);
// Simple low-level (mmap-based) allocator for internal use. Doesn't have
// constructor, so all instances of LowLevelAllocator should be
@@ -222,8 +223,8 @@ void CatastrophicErrorWrite(const char *buffer, uptr length);
void RawWrite(const char *buffer);
bool ColorizeReports();
void RemoveANSIEscapeSequencesFromString(char *buffer);
-void Printf(const char *format, ...);
-void Report(const char *format, ...);
+void Printf(const char *format, ...) FORMAT(1, 2);
+void Report(const char *format, ...) FORMAT(1, 2);
void SetPrintfAndReportCallback(void (*callback)(const char *));
#define VReport(level, ...) \
do { \
@@ -371,7 +372,7 @@ void ReportErrorSummary(const char *error_type, const AddressInfo &info,
void ReportErrorSummary(const char *error_type, const StackTrace *trace,
const char *alt_tool_name = nullptr);
-void ReportMmapWriteExec(int prot);
+void ReportMmapWriteExec(int prot, int mflags);
// Math
#if SANITIZER_WINDOWS && !defined(__clang__) && !defined(__GNUC__)
@@ -419,9 +420,7 @@ inline uptr LeastSignificantSetBitIndex(uptr x) {
return up;
}
-inline bool IsPowerOfTwo(uptr x) {
- return (x & (x - 1)) == 0;
-}
+inline constexpr bool IsPowerOfTwo(uptr x) { return (x & (x - 1)) == 0; }
inline uptr RoundUpToPowerOfTwo(uptr size) {
CHECK(size);
@@ -433,16 +432,16 @@ inline uptr RoundUpToPowerOfTwo(uptr size) {
return 1ULL << (up + 1);
}
-inline uptr RoundUpTo(uptr size, uptr boundary) {
+inline constexpr uptr RoundUpTo(uptr size, uptr boundary) {
RAW_CHECK(IsPowerOfTwo(boundary));
return (size + boundary - 1) & ~(boundary - 1);
}
-inline uptr RoundDownTo(uptr x, uptr boundary) {
+inline constexpr uptr RoundDownTo(uptr x, uptr boundary) {
return x & ~(boundary - 1);
}
-inline bool IsAligned(uptr a, uptr alignment) {
+inline constexpr bool IsAligned(uptr a, uptr alignment) {
return (a & (alignment - 1)) == 0;
}
@@ -618,7 +617,7 @@ class InternalScopedString {
buffer_.resize(1);
buffer_[0] = '\0';
}
- void append(const char *format, ...);
+ void append(const char *format, ...) FORMAT(2, 3);
const char *data() const { return buffer_.data(); }
char *data() { return buffer_.data(); }
@@ -697,7 +696,8 @@ enum ModuleArch {
kModuleArchARMV7S,
kModuleArchARMV7K,
kModuleArchARM64,
- kModuleArchRISCV64
+ kModuleArchRISCV64,
+ kModuleArchHexagon
};
// Sorts and removes duplicates from the container.
@@ -721,12 +721,15 @@ void SortAndDedup(Container &v, Compare comp = {}) {
v.resize(last + 1);
}
+constexpr uptr kDefaultFileMaxSize = FIRST_32_SECOND_64(1 << 26, 1 << 28);
+
// Opens the file 'file_name" and reads up to 'max_len' bytes.
// The resulting buffer is mmaped and stored in '*buff'.
// Returns true if file was successfully opened and read.
bool ReadFileToVector(const char *file_name,
InternalMmapVectorNoCtor<char> *buff,
- uptr max_len = 1 << 26, error_t *errno_p = nullptr);
+ uptr max_len = kDefaultFileMaxSize,
+ error_t *errno_p = nullptr);
// Opens the file 'file_name" and reads up to 'max_len' bytes.
// This function is less I/O efficient than ReadFileToVector as it may reread
@@ -737,7 +740,7 @@ bool ReadFileToVector(const char *file_name,
// The total number of read bytes is stored in '*read_len'.
// Returns true if file was successfully opened and read.
bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
- uptr *read_len, uptr max_len = 1 << 26,
+ uptr *read_len, uptr max_len = kDefaultFileMaxSize,
error_t *errno_p = nullptr);
// When adding a new architecture, don't forget to also update
@@ -764,6 +767,8 @@ inline const char *ModuleArchToString(ModuleArch arch) {
return "arm64";
case kModuleArchRISCV64:
return "riscv64";
+ case kModuleArchHexagon:
+ return "hexagon";
}
CHECK(0 && "Invalid module arch");
return "";
@@ -1063,17 +1068,10 @@ class ArrayRef {
T *end_ = nullptr;
};
-#define PRINTF_128(v) \
- (*((u8 *)&v + 0)), (*((u8 *)&v + 1)), (*((u8 *)&v + 2)), (*((u8 *)&v + 3)), \
- (*((u8 *)&v + 4)), (*((u8 *)&v + 5)), (*((u8 *)&v + 6)), \
- (*((u8 *)&v + 7)), (*((u8 *)&v + 8)), (*((u8 *)&v + 9)), \
- (*((u8 *)&v + 10)), (*((u8 *)&v + 11)), (*((u8 *)&v + 12)), \
- (*((u8 *)&v + 13)), (*((u8 *)&v + 14)), (*((u8 *)&v + 15))
-
} // namespace __sanitizer
inline void *operator new(__sanitizer::operator_new_size_type size,
- __sanitizer::LowLevelAllocator &alloc) { // NOLINT
+ __sanitizer::LowLevelAllocator &alloc) {
return alloc.Allocate(size);
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 6205d853a4c9..abb38ccfa15d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -204,7 +204,7 @@ extern const short *_tolower_tab_;
#define COMMON_INTERCEPTOR_READ_STRING(ctx, s, n) \
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \
- common_flags()->strict_string_checks ? (REAL(strlen)(s)) + 1 : (n) )
+ common_flags()->strict_string_checks ? (internal_strlen(s)) + 1 : (n) )
#ifndef COMMON_INTERCEPTOR_ON_DLOPEN
#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
@@ -435,7 +435,7 @@ INTERCEPTOR(char*, textdomain, const char *domainname) {
if (domainname) COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0);
char *domain = REAL(textdomain)(domainname);
if (domain) {
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(domain, REAL(strlen)(domain) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(domain, internal_strlen(domain) + 1);
}
return domain;
}
@@ -575,8 +575,8 @@ INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T size) {
#if SANITIZER_INTERCEPT_STRSTR || SANITIZER_INTERCEPT_STRCASESTR
static inline void StrstrCheck(void *ctx, char *r, const char *s1,
const char *s2) {
- uptr len1 = REAL(strlen)(s1);
- uptr len2 = REAL(strlen)(s2);
+ uptr len1 = internal_strlen(s1);
+ uptr len2 = internal_strlen(s2);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1);
}
@@ -640,10 +640,10 @@ INTERCEPTOR(char*, strtok, char *str, const char *delimiters) {
// for subsequent calls). We do not need to check strtok's result.
// As the delimiters can change, we check them every call.
if (str != nullptr) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
}
COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters,
- REAL(strlen)(delimiters) + 1);
+ internal_strlen(delimiters) + 1);
return REAL(strtok)(str, delimiters);
} else {
// However, when strict_string_checks is disabled we cannot check the
@@ -657,11 +657,11 @@ INTERCEPTOR(char*, strtok, char *str, const char *delimiters) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, 1);
char *result = REAL(strtok)(str, delimiters);
if (result != nullptr) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, result, REAL(strlen)(result) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, result, internal_strlen(result) + 1);
} else if (str != nullptr) {
// No delimiter were found, it's safe to assume that the entire str was
// scanned.
- COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
}
return result;
}
@@ -706,7 +706,7 @@ INTERCEPTOR(char*, strchr, const char *s, int c) {
if (common_flags()->intercept_strchr) {
// Keep strlen as macro argument, as macro may ignore it.
COMMON_INTERCEPTOR_READ_STRING(ctx, s,
- (result ? result - s : REAL(strlen)(s)) + 1);
+ (result ? result - s : internal_strlen(s)) + 1);
}
return result;
}
@@ -737,7 +737,7 @@ INTERCEPTOR(char*, strrchr, const char *s, int c) {
return internal_strrchr(s, c);
COMMON_INTERCEPTOR_ENTER(ctx, strrchr, s, c);
if (common_flags()->intercept_strchr)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
return REAL(strrchr)(s, c);
}
#define INIT_STRRCHR COMMON_INTERCEPT_FUNCTION(strrchr)
@@ -751,7 +751,7 @@ INTERCEPTOR(SIZE_T, strspn, const char *s1, const char *s2) {
COMMON_INTERCEPTOR_ENTER(ctx, strspn, s1, s2);
SIZE_T r = REAL(strspn)(s1, s2);
if (common_flags()->intercept_strspn) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, REAL(strlen)(s2) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r + 1);
}
return r;
@@ -762,7 +762,7 @@ INTERCEPTOR(SIZE_T, strcspn, const char *s1, const char *s2) {
COMMON_INTERCEPTOR_ENTER(ctx, strcspn, s1, s2);
SIZE_T r = REAL(strcspn)(s1, s2);
if (common_flags()->intercept_strspn) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, REAL(strlen)(s2) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r + 1);
}
return r;
@@ -781,9 +781,9 @@ INTERCEPTOR(char *, strpbrk, const char *s1, const char *s2) {
COMMON_INTERCEPTOR_ENTER(ctx, strpbrk, s1, s2);
char *r = REAL(strpbrk)(s1, s2);
if (common_flags()->intercept_strpbrk) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, REAL(strlen)(s2) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
COMMON_INTERCEPTOR_READ_STRING(ctx, s1,
- r ? r - s1 + 1 : REAL(strlen)(s1) + 1);
+ r ? r - s1 + 1 : internal_strlen(s1) + 1);
}
return r;
}
@@ -1251,7 +1251,7 @@ INTERCEPTOR(char *, fgets, char *s, SIZE_T size, void *file) {
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(fgets)(s, size, file);
if (res)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
return res;
}
#define INIT_FGETS COMMON_INTERCEPT_FUNCTION(fgets)
@@ -1265,7 +1265,7 @@ INTERCEPTOR_WITH_SUFFIX(int, fputs, char *s, void *file) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fputs, s, file);
if (!SANITIZER_MAC || s) { // `fputs(NULL, file)` is supported on Darwin.
- COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
}
return REAL(fputs)(s, file);
}
@@ -1280,7 +1280,7 @@ INTERCEPTOR(int, puts, char *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
if (!SANITIZER_MAC || s) { // `puts(NULL)` is supported on Darwin.
- COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
}
return REAL(puts)(s);
}
@@ -1334,7 +1334,7 @@ static void unpoison_tm(void *ctx, __sanitizer_tm *tm) {
// Can not use COMMON_INTERCEPTOR_WRITE_RANGE here, because tm->tm_zone
// can point to shared memory and tsan would report a data race.
COMMON_INTERCEPTOR_INITIALIZE_RANGE(tm->tm_zone,
- REAL(strlen(tm->tm_zone)) + 1);
+ internal_strlen(tm->tm_zone) + 1);
}
#endif
}
@@ -1387,7 +1387,7 @@ INTERCEPTOR(char *, ctime, unsigned long *timep) {
char *res = REAL(ctime)(timep);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
@@ -1400,7 +1400,7 @@ INTERCEPTOR(char *, ctime_r, unsigned long *timep, char *result) {
char *res = REAL(ctime_r)(timep, result);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
@@ -1413,7 +1413,7 @@ INTERCEPTOR(char *, asctime, __sanitizer_tm *tm) {
char *res = REAL(asctime)(tm);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
@@ -1426,7 +1426,7 @@ INTERCEPTOR(char *, asctime_r, __sanitizer_tm *tm, char *result) {
char *res = REAL(asctime_r)(tm, result);
if (res) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
}
return res;
}
@@ -1463,7 +1463,7 @@ INTERCEPTOR(char *, strptime, char *s, char *format, __sanitizer_tm *tm) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strptime, s, format, tm);
if (format)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, format, REAL(strlen)(format) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -1843,9 +1843,9 @@ INTERCEPTOR(int, ioctl, int d, unsigned long request, ...) {
const ioctl_desc *desc = ioctl_lookup(request);
ioctl_desc decoded_desc;
if (!desc) {
- VPrintf(2, "Decoding unknown ioctl 0x%x\n", request);
+ VPrintf(2, "Decoding unknown ioctl 0x%lx\n", request);
if (!ioctl_decode(request, &decoded_desc))
- Printf("WARNING: failed decoding unknown ioctl 0x%x\n", request);
+ Printf("WARNING: failed decoding unknown ioctl 0x%lx\n", request);
else
desc = &decoded_desc;
}
@@ -1869,26 +1869,26 @@ UNUSED static void unpoison_passwd(void *ctx, __sanitizer_passwd *pwd) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd, sizeof(*pwd));
if (pwd->pw_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_name,
- REAL(strlen)(pwd->pw_name) + 1);
+ internal_strlen(pwd->pw_name) + 1);
if (pwd->pw_passwd)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_passwd,
- REAL(strlen)(pwd->pw_passwd) + 1);
+ internal_strlen(pwd->pw_passwd) + 1);
#if !SANITIZER_ANDROID
if (pwd->pw_gecos)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_gecos,
- REAL(strlen)(pwd->pw_gecos) + 1);
+ internal_strlen(pwd->pw_gecos) + 1);
#endif
#if SANITIZER_MAC || SANITIZER_FREEBSD || SANITIZER_NETBSD
if (pwd->pw_class)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_class,
- REAL(strlen)(pwd->pw_class) + 1);
+ internal_strlen(pwd->pw_class) + 1);
#endif
if (pwd->pw_dir)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_dir,
- REAL(strlen)(pwd->pw_dir) + 1);
+ internal_strlen(pwd->pw_dir) + 1);
if (pwd->pw_shell)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_shell,
- REAL(strlen)(pwd->pw_shell) + 1);
+ internal_strlen(pwd->pw_shell) + 1);
}
}
@@ -1897,13 +1897,13 @@ UNUSED static void unpoison_group(void *ctx, __sanitizer_group *grp) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp, sizeof(*grp));
if (grp->gr_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_name,
- REAL(strlen)(grp->gr_name) + 1);
+ internal_strlen(grp->gr_name) + 1);
if (grp->gr_passwd)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_passwd,
- REAL(strlen)(grp->gr_passwd) + 1);
+ internal_strlen(grp->gr_passwd) + 1);
char **p = grp->gr_mem;
for (; *p; ++p) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, REAL(strlen)(*p) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, internal_strlen(*p) + 1);
}
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_mem,
(p - grp->gr_mem + 1) * sizeof(*p));
@@ -1916,7 +1916,7 @@ INTERCEPTOR(__sanitizer_passwd *, getpwnam, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwnam, name);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
__sanitizer_passwd *res = REAL(getpwnam)(name);
unpoison_passwd(ctx, res);
return res;
@@ -1931,7 +1931,7 @@ INTERCEPTOR(__sanitizer_passwd *, getpwuid, u32 uid) {
INTERCEPTOR(__sanitizer_group *, getgrnam, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrnam, name);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
__sanitizer_group *res = REAL(getgrnam)(name);
unpoison_group(ctx, res);
return res;
@@ -1957,7 +1957,7 @@ INTERCEPTOR(int, getpwnam_r, const char *name, __sanitizer_passwd *pwd,
char *buf, SIZE_T buflen, __sanitizer_passwd **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpwnam_r, name, pwd, buf, buflen, result);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -1984,7 +1984,7 @@ INTERCEPTOR(int, getgrnam_r, const char *name, __sanitizer_group *grp,
char *buf, SIZE_T buflen, __sanitizer_group **result) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getgrnam_r, name, grp, buf, buflen, result);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -2229,8 +2229,20 @@ INTERCEPTOR(int, clock_getcpuclockid, pid_t pid,
return res;
}
-#define INIT_CLOCK_GETCPUCLOCKID \
- COMMON_INTERCEPT_FUNCTION(clock_getcpuclockid);
+INTERCEPTOR(int, pthread_getcpuclockid, uptr thread,
+ __sanitizer_clockid_t *clockid) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, pthread_getcpuclockid, thread, clockid);
+ int res = REAL(pthread_getcpuclockid)(thread, clockid);
+ if (!res && clockid) {
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
+ }
+ return res;
+}
+
+#define INIT_CLOCK_GETCPUCLOCKID \
+ COMMON_INTERCEPT_FUNCTION(clock_getcpuclockid); \
+ COMMON_INTERCEPT_FUNCTION(pthread_getcpuclockid);
#else
#define INIT_CLOCK_GETCPUCLOCKID
#endif
@@ -2289,7 +2301,7 @@ static void unpoison_glob_t(void *ctx, __sanitizer_glob_t *pglob) {
ctx, pglob->gl_pathv, (pglob->gl_pathc + 1) * sizeof(*pglob->gl_pathv));
for (SIZE_T i = 0; i < pglob->gl_pathc; ++i) {
char *p = pglob->gl_pathv[i];
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, REAL(strlen)(p) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, internal_strlen(p) + 1);
}
}
@@ -2319,19 +2331,19 @@ static void *wrapped_gl_readdir(void *dir) {
static void *wrapped_gl_opendir(const char *s) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
return pglob_copy->gl_opendir(s);
}
static int wrapped_gl_lstat(const char *s, void *st) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
return pglob_copy->gl_lstat(s, st);
}
static int wrapped_gl_stat(const char *s, void *st) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
return pglob_copy->gl_stat(s, st);
}
@@ -2410,6 +2422,60 @@ INTERCEPTOR(int, glob64, const char *pattern, int flags,
#define INIT_GLOB64
#endif // SANITIZER_INTERCEPT_GLOB64
+#if SANITIZER_INTERCEPT_POSIX_SPAWN
+
+template <class RealSpawnPtr>
+static int PosixSpawnImpl(void *ctx, RealSpawnPtr *real_posix_spawn, pid_t *pid,
+ const char *file_or_path, const void *file_actions,
+ const void *attrp, char *const argv[],
+ char *const envp[]) {
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, file_or_path,
+ internal_strlen(file_or_path) + 1);
+ if (argv) {
+ for (char *const *s = argv; ; ++s) {
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
+ if (!*s) break;
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1);
+ }
+ }
+ if (envp) {
+ for (char *const *s = envp; ; ++s) {
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
+ if (!*s) break;
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1);
+ }
+ }
+ int res =
+ real_posix_spawn(pid, file_or_path, file_actions, attrp, argv, envp);
+ if (res == 0)
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pid, sizeof(*pid));
+ return res;
+}
+INTERCEPTOR(int, posix_spawn, pid_t *pid, const char *path,
+ const void *file_actions, const void *attrp, char *const argv[],
+ char *const envp[]) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, posix_spawn, pid, path, file_actions, attrp,
+ argv, envp);
+ return PosixSpawnImpl(ctx, REAL(posix_spawn), pid, path, file_actions, attrp,
+ argv, envp);
+}
+INTERCEPTOR(int, posix_spawnp, pid_t *pid, const char *file,
+ const void *file_actions, const void *attrp, char *const argv[],
+ char *const envp[]) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, posix_spawnp, pid, file, file_actions, attrp,
+ argv, envp);
+ return PosixSpawnImpl(ctx, REAL(posix_spawnp), pid, file, file_actions, attrp,
+ argv, envp);
+}
+# define INIT_POSIX_SPAWN \
+ COMMON_INTERCEPT_FUNCTION(posix_spawn); \
+ COMMON_INTERCEPT_FUNCTION(posix_spawnp);
+#else // SANITIZER_INTERCEPT_POSIX_SPAWN
+# define INIT_POSIX_SPAWN
+#endif // SANITIZER_INTERCEPT_POSIX_SPAWN
+
#if SANITIZER_INTERCEPT_WAIT
// According to sys/wait.h, wait(), waitid(), waitpid() may have symbol version
// suffixes on Darwin. See the declaration of INTERCEPTOR_WITH_SUFFIX for
@@ -2519,7 +2585,7 @@ INTERCEPTOR(char *, inet_ntop, int af, const void *src, char *dst, u32 size) {
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(inet_ntop)(af, src, dst, size);
- if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(int, inet_pton, int af, const char *src, void *dst) {
@@ -2548,7 +2614,7 @@ INTERCEPTOR(int, inet_pton, int af, const char *src, void *dst) {
INTERCEPTOR(int, inet_aton, const char *cp, void *dst) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, inet_aton, cp, dst);
- if (cp) COMMON_INTERCEPTOR_READ_RANGE(ctx, cp, REAL(strlen)(cp) + 1);
+ if (cp) COMMON_INTERCEPTOR_READ_RANGE(ctx, cp, internal_strlen(cp) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -2590,9 +2656,9 @@ INTERCEPTOR(int, getaddrinfo, char *node, char *service,
struct __sanitizer_addrinfo **out) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getaddrinfo, node, service, hints, out);
- if (node) COMMON_INTERCEPTOR_READ_RANGE(ctx, node, REAL(strlen)(node) + 1);
+ if (node) COMMON_INTERCEPTOR_READ_RANGE(ctx, node, internal_strlen(node) + 1);
if (service)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, service, REAL(strlen)(service) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, service, internal_strlen(service) + 1);
if (hints)
COMMON_INTERCEPTOR_READ_RANGE(ctx, hints, sizeof(__sanitizer_addrinfo));
// FIXME: under ASan the call below may write to freed memory and corrupt
@@ -2608,7 +2674,7 @@ INTERCEPTOR(int, getaddrinfo, char *node, char *service,
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ai_addr, p->ai_addrlen);
if (p->ai_canonname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ai_canonname,
- REAL(strlen)(p->ai_canonname) + 1);
+ internal_strlen(p->ai_canonname) + 1);
p = p->ai_next;
}
}
@@ -2634,9 +2700,9 @@ INTERCEPTOR(int, getnameinfo, void *sockaddr, unsigned salen, char *host,
REAL(getnameinfo)(sockaddr, salen, host, hostlen, serv, servlen, flags);
if (res == 0) {
if (host && hostlen)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, host, REAL(strlen)(host) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, host, internal_strlen(host) + 1);
if (serv && servlen)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, serv, REAL(strlen)(serv) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, serv, internal_strlen(serv) + 1);
}
return res;
}
@@ -2646,17 +2712,20 @@ INTERCEPTOR(int, getnameinfo, void *sockaddr, unsigned salen, char *host,
#endif
#if SANITIZER_INTERCEPT_GETSOCKNAME
-INTERCEPTOR(int, getsockname, int sock_fd, void *addr, int *addrlen) {
+INTERCEPTOR(int, getsockname, int sock_fd, void *addr, unsigned *addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getsockname, sock_fd, addr, addrlen);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
- int addrlen_in = *addrlen;
+ unsigned addr_sz;
+ if (addrlen) {
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
+ addr_sz = *addrlen;
+ }
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getsockname)(sock_fd, addr, addrlen);
- if (res == 0) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addrlen_in, *addrlen));
+ if (!res && addr && addrlen) {
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
}
return res;
}
@@ -2669,10 +2738,10 @@ INTERCEPTOR(int, getsockname, int sock_fd, void *addr, int *addrlen) {
static void write_hostent(void *ctx, struct __sanitizer_hostent *h) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h, sizeof(__sanitizer_hostent));
if (h->h_name)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h->h_name, REAL(strlen)(h->h_name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h->h_name, internal_strlen(h->h_name) + 1);
char **p = h->h_aliases;
while (*p) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, REAL(strlen)(*p) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, internal_strlen(*p) + 1);
++p;
}
COMMON_INTERCEPTOR_WRITE_RANGE(
@@ -3161,13 +3230,17 @@ INTERCEPTOR(int, getpeername, int sockfd, void *addr, unsigned *addrlen) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpeername, sockfd, addr, addrlen);
unsigned addr_sz;
- if (addrlen) addr_sz = *addrlen;
+ if (addrlen) {
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
+ addr_sz = *addrlen;
+ }
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(getpeername)(sockfd, addr, addrlen);
- if (!res && addr && addrlen)
+ if (!res && addr && addrlen) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
+ }
return res;
}
#define INIT_GETPEERNAME COMMON_INTERCEPT_FUNCTION(getpeername);
@@ -3196,7 +3269,7 @@ INTERCEPTOR(int, sysinfo, void *info) {
INTERCEPTOR(__sanitizer_dirent *, opendir, const char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, opendir, path);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
__sanitizer_dirent *res = REAL(opendir)(path);
if (res)
COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path);
@@ -3351,10 +3424,10 @@ INTERCEPTOR(char *, setlocale, int category, char *locale) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setlocale, category, locale);
if (locale)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, locale, REAL(strlen)(locale) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, locale, internal_strlen(locale) + 1);
char *res = REAL(setlocale)(category, locale);
if (res) {
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
unpoison_ctype_arrays(ctx);
}
return res;
@@ -3373,7 +3446,7 @@ INTERCEPTOR(char *, getcwd, char *buf, SIZE_T size) {
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(getcwd)(buf, size);
- if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
#define INIT_GETCWD COMMON_INTERCEPT_FUNCTION(getcwd);
@@ -3389,7 +3462,7 @@ INTERCEPTOR(char *, get_current_dir_name, int fake) {
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(get_current_dir_name)(fake);
- if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
@@ -3663,7 +3736,7 @@ INTERCEPTOR(int, tcgetattr, int fd, void *termios_p) {
INTERCEPTOR(char *, realpath, const char *path, char *resolved_path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, realpath, path, resolved_path);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// Workaround a bug in glibc where dlsym(RTLD_NEXT, ...) returns the oldest
// version of a versioned symbol. For realpath(), this gives us something
@@ -3674,11 +3747,12 @@ INTERCEPTOR(char *, realpath, const char *path, char *resolved_path) {
allocated_path = resolved_path = (char *)WRAP(malloc)(path_max + 1);
char *res = REAL(realpath)(path, resolved_path);
- if (allocated_path && !res) WRAP(free)(allocated_path);
- if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ if (allocated_path && !res)
+ WRAP(free)(allocated_path);
+ if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
-#define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath);
+# define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath);
#else
#define INIT_REALPATH
#endif
@@ -3687,9 +3761,9 @@ INTERCEPTOR(char *, realpath, const char *path, char *resolved_path) {
INTERCEPTOR(char *, canonicalize_file_name, const char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, canonicalize_file_name, path);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
char *res = REAL(canonicalize_file_name)(path);
- if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
#define INIT_CANONICALIZE_FILE_NAME \
@@ -3750,7 +3824,7 @@ INTERCEPTOR(char *, strerror, int errnum) {
COMMON_INTERCEPTOR_ENTER(ctx, strerror, errnum);
COMMON_INTERCEPTOR_STRERROR();
char *res = REAL(strerror)(errnum);
- if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_STRERROR COMMON_INTERCEPT_FUNCTION(strerror);
@@ -3792,9 +3866,9 @@ INTERCEPTOR(char *, strerror_r, int errnum, char *buf, SIZE_T buflen) {
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(strerror_r)(errnum, buf, buflen);
if (res == buf)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
else
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#endif //(_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE ||
@@ -3814,7 +3888,7 @@ INTERCEPTOR(int, __xpg_strerror_r, int errnum, char *buf, SIZE_T buflen) {
int res = REAL(__xpg_strerror_r)(errnum, buf, buflen);
// This version always returns a null-terminated string.
if (buf && buflen)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
return res;
}
#define INIT_XPG_STRERROR_R COMMON_INTERCEPT_FUNCTION(__xpg_strerror_r);
@@ -3850,7 +3924,7 @@ INTERCEPTOR(int, scandir, char *dirp, __sanitizer_dirent ***namelist,
scandir_filter_f filter, scandir_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, scandir, dirp, namelist, filter, compar);
- if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, REAL(strlen)(dirp) + 1);
+ if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, internal_strlen(dirp) + 1);
scandir_filter = filter;
scandir_compar = compar;
// FIXME: under ASan the call below may write to freed memory and corrupt
@@ -3903,7 +3977,7 @@ INTERCEPTOR(int, scandir64, char *dirp, __sanitizer_dirent64 ***namelist,
scandir64_filter_f filter, scandir64_compar_f compar) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, scandir64, dirp, namelist, filter, compar);
- if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, REAL(strlen)(dirp) + 1);
+ if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, internal_strlen(dirp) + 1);
scandir64_filter = filter;
scandir64_compar = compar;
// FIXME: under ASan the call below may write to freed memory and corrupt
@@ -3999,19 +4073,20 @@ INTERCEPTOR(int, ppoll, __sanitizer_pollfd *fds, __sanitizer_nfds_t nfds,
INTERCEPTOR(int, wordexp, char *s, __sanitizer_wordexp_t *p, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wordexp, s, p, flags);
- if (s) COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ if (s) COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
int res = REAL(wordexp)(s, p, flags);
if (!res && p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
- if (p->we_wordc)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->we_wordv,
- sizeof(*p->we_wordv) * p->we_wordc);
- for (uptr i = 0; i < p->we_wordc; ++i) {
+ uptr we_wordc =
+ ((flags & wordexp_wrde_dooffs) ? p->we_offs : 0) + p->we_wordc;
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->we_wordv,
+ sizeof(*p->we_wordv) * (we_wordc + 1));
+ for (uptr i = 0; i < we_wordc; ++i) {
char *w = p->we_wordv[i];
- if (w) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, w, REAL(strlen)(w) + 1);
+ if (w) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, w, internal_strlen(w) + 1);
}
}
return res;
@@ -4217,7 +4292,7 @@ INTERCEPTOR(char **, backtrace_symbols, void **buffer, int size) {
if (res && size) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, size * sizeof(*res));
for (int i = 0; i < size; ++i)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res[i], REAL(strlen(res[i])) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res[i], internal_strlen(res[i]) + 1);
}
return res;
}
@@ -4335,16 +4410,16 @@ static void write_mntent(void *ctx, __sanitizer_mntent *mnt) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt, sizeof(*mnt));
if (mnt->mnt_fsname)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_fsname,
- REAL(strlen)(mnt->mnt_fsname) + 1);
+ internal_strlen(mnt->mnt_fsname) + 1);
if (mnt->mnt_dir)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_dir,
- REAL(strlen)(mnt->mnt_dir) + 1);
+ internal_strlen(mnt->mnt_dir) + 1);
if (mnt->mnt_type)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_type,
- REAL(strlen)(mnt->mnt_type) + 1);
+ internal_strlen(mnt->mnt_type) + 1);
if (mnt->mnt_opts)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_opts,
- REAL(strlen)(mnt->mnt_opts) + 1);
+ internal_strlen(mnt->mnt_opts) + 1);
}
#endif
@@ -4379,7 +4454,7 @@ INTERCEPTOR(__sanitizer_mntent *, getmntent_r, void *fp,
INTERCEPTOR(int, statfs, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statfs, path, buf);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4408,7 +4483,7 @@ INTERCEPTOR(int, fstatfs, int fd, void *buf) {
INTERCEPTOR(int, statfs64, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statfs64, path, buf);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4437,7 +4512,7 @@ INTERCEPTOR(int, fstatfs64, int fd, void *buf) {
INTERCEPTOR(int, statvfs, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs, path, buf);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4471,7 +4546,7 @@ INTERCEPTOR(int, fstatvfs, int fd, void *buf) {
INTERCEPTOR(int, statvfs64, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs64, path, buf);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4500,7 +4575,7 @@ INTERCEPTOR(int, fstatvfs64, int fd, void *buf) {
INTERCEPTOR(int, initgroups, char *user, u32 group) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, initgroups, user, group);
- if (user) COMMON_INTERCEPTOR_READ_RANGE(ctx, user, REAL(strlen)(user) + 1);
+ if (user) COMMON_INTERCEPTOR_READ_RANGE(ctx, user, internal_strlen(user) + 1);
int res = REAL(initgroups)(user, group);
return res;
}
@@ -4515,13 +4590,13 @@ INTERCEPTOR(char *, ether_ntoa, __sanitizer_ether_addr *addr) {
COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa, addr);
if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
char *res = REAL(ether_ntoa)(addr);
- if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(__sanitizer_ether_addr *, ether_aton, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_aton, buf);
- if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+ if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, internal_strlen(buf) + 1);
__sanitizer_ether_addr *res = REAL(ether_aton)(buf);
if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, sizeof(*res));
return res;
@@ -4543,14 +4618,14 @@ INTERCEPTOR(int, ether_ntohost, char *hostname, __sanitizer_ether_addr *addr) {
// https://github.com/google/sanitizers/issues/321.
int res = REAL(ether_ntohost)(hostname, addr);
if (!res && hostname)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, REAL(strlen)(hostname) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
return res;
}
INTERCEPTOR(int, ether_hostton, char *hostname, __sanitizer_ether_addr *addr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_hostton, hostname, addr);
if (hostname)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, hostname, REAL(strlen)(hostname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4562,7 +4637,7 @@ INTERCEPTOR(int, ether_line, char *line, __sanitizer_ether_addr *addr,
char *hostname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_line, line, addr, hostname);
- if (line) COMMON_INTERCEPTOR_READ_RANGE(ctx, line, REAL(strlen)(line) + 1);
+ if (line) COMMON_INTERCEPTOR_READ_RANGE(ctx, line, internal_strlen(line) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4570,7 +4645,7 @@ INTERCEPTOR(int, ether_line, char *line, __sanitizer_ether_addr *addr,
if (!res) {
if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
if (hostname)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, REAL(strlen)(hostname) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
}
return res;
}
@@ -4591,14 +4666,14 @@ INTERCEPTOR(char *, ether_ntoa_r, __sanitizer_ether_addr *addr, char *buf) {
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(ether_ntoa_r)(addr, buf);
- if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
INTERCEPTOR(__sanitizer_ether_addr *, ether_aton_r, char *buf,
__sanitizer_ether_addr *addr) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, ether_aton_r, buf, addr);
- if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+ if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, internal_strlen(buf) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -4864,9 +4939,9 @@ INTERCEPTOR(char *, tmpnam, char *s) {
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
else
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
@@ -4883,7 +4958,7 @@ INTERCEPTOR(char *, tmpnam_r, char *s) {
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(tmpnam_r)(s);
- if (res && s) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, REAL(strlen)(s) + 1);
+ if (res && s) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
return res;
}
#define INIT_TMPNAM_R COMMON_INTERCEPT_FUNCTION(tmpnam_r);
@@ -4897,7 +4972,7 @@ INTERCEPTOR(char *, ptsname, int fd) {
COMMON_INTERCEPTOR_ENTER(ctx, ptsname, fd);
char *res = REAL(ptsname)(fd);
if (res != nullptr)
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_PTSNAME COMMON_INTERCEPT_FUNCTION(ptsname);
@@ -4911,7 +4986,7 @@ INTERCEPTOR(int, ptsname_r, int fd, char *name, SIZE_T namesize) {
COMMON_INTERCEPTOR_ENTER(ctx, ptsname_r, fd, name, namesize);
int res = REAL(ptsname_r)(fd, name, namesize);
if (res == 0)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
return res;
}
#define INIT_PTSNAME_R COMMON_INTERCEPT_FUNCTION(ptsname_r);
@@ -4925,7 +5000,7 @@ INTERCEPTOR(char *, ttyname, int fd) {
COMMON_INTERCEPTOR_ENTER(ctx, ttyname, fd);
char *res = REAL(ttyname)(fd);
if (res != nullptr)
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_TTYNAME COMMON_INTERCEPT_FUNCTION(ttyname);
@@ -4939,7 +5014,7 @@ INTERCEPTOR(int, ttyname_r, int fd, char *name, SIZE_T namesize) {
COMMON_INTERCEPTOR_ENTER(ctx, ttyname_r, fd, name, namesize);
int res = REAL(ttyname_r)(fd, name, namesize);
if (res == 0)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
return res;
}
#define INIT_TTYNAME_R COMMON_INTERCEPT_FUNCTION(ttyname_r);
@@ -4951,10 +5026,10 @@ INTERCEPTOR(int, ttyname_r, int fd, char *name, SIZE_T namesize) {
INTERCEPTOR(char *, tempnam, char *dir, char *pfx) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, tempnam, dir, pfx);
- if (dir) COMMON_INTERCEPTOR_READ_RANGE(ctx, dir, REAL(strlen)(dir) + 1);
- if (pfx) COMMON_INTERCEPTOR_READ_RANGE(ctx, pfx, REAL(strlen)(pfx) + 1);
+ if (dir) COMMON_INTERCEPTOR_READ_RANGE(ctx, dir, internal_strlen(dir) + 1);
+ if (pfx) COMMON_INTERCEPTOR_READ_RANGE(ctx, pfx, internal_strlen(pfx) + 1);
char *res = REAL(tempnam)(dir, pfx);
- if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
return res;
}
#define INIT_TEMPNAM COMMON_INTERCEPT_FUNCTION(tempnam);
@@ -5414,7 +5489,7 @@ asm(
INTERCEPTOR(SSIZE_T, listxattr, const char *path, char *list, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, listxattr, path, list, size);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -5427,7 +5502,7 @@ INTERCEPTOR(SSIZE_T, listxattr, const char *path, char *list, SIZE_T size) {
INTERCEPTOR(SSIZE_T, llistxattr, const char *path, char *list, SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, llistxattr, path, list, size);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -5458,8 +5533,8 @@ INTERCEPTOR(SSIZE_T, getxattr, const char *path, const char *name, char *value,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getxattr, path, name, value, size);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
- if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
+ if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -5471,8 +5546,8 @@ INTERCEPTOR(SSIZE_T, lgetxattr, const char *path, const char *name, char *value,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, lgetxattr, path, name, value, size);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
- if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
+ if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -5484,7 +5559,7 @@ INTERCEPTOR(SSIZE_T, fgetxattr, int fd, const char *name, char *value,
SIZE_T size) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fgetxattr, fd, name, value, size);
- if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -5554,7 +5629,7 @@ INTERCEPTOR(int, getifaddrs, __sanitizer_ifaddrs **ifap) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(__sanitizer_ifaddrs));
if (p->ifa_name)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_name,
- REAL(strlen)(p->ifa_name) + 1);
+ internal_strlen(p->ifa_name) + 1);
if (p->ifa_addr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_addr, struct_sockaddr_sz);
if (p->ifa_netmask)
@@ -5584,14 +5659,14 @@ INTERCEPTOR(char *, if_indextoname, unsigned int ifindex, char* ifname) {
// https://github.com/google/sanitizers/issues/321.
char *res = REAL(if_indextoname)(ifindex, ifname);
if (res && ifname)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifname, REAL(strlen)(ifname) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifname, internal_strlen(ifname) + 1);
return res;
}
INTERCEPTOR(unsigned int, if_nametoindex, const char* ifname) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, if_nametoindex, ifname);
if (ifname)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, ifname, REAL(strlen)(ifname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, ifname, internal_strlen(ifname) + 1);
return REAL(if_nametoindex)(ifname);
}
#define INIT_IF_INDEXTONAME \
@@ -5849,7 +5924,7 @@ INTERCEPTOR(int, xdr_string, __sanitizer_XDR *xdrs, char **p,
COMMON_INTERCEPTOR_ENTER(ctx, xdr_string, xdrs, p, maxsize);
if (p && xdrs->x_op == __sanitizer_XDR_ENCODE) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p));
- COMMON_INTERCEPTOR_READ_RANGE(ctx, *p, REAL(strlen)(*p) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, *p, internal_strlen(*p) + 1);
}
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
@@ -5858,7 +5933,7 @@ INTERCEPTOR(int, xdr_string, __sanitizer_XDR *xdrs, char **p,
if (p && xdrs->x_op == __sanitizer_XDR_DECODE) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
if (res && *p)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, REAL(strlen)(*p) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, internal_strlen(*p) + 1);
}
return res;
}
@@ -6069,8 +6144,8 @@ INTERCEPTOR(int, __woverflow, __sanitizer_FILE *fp, int ch) {
INTERCEPTOR(__sanitizer_FILE *, fopen, const char *path, const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fopen, path, mode);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, REAL(strlen)(mode) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
__sanitizer_FILE *res = REAL(fopen)(path, mode);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
if (res) unpoison_file(res);
@@ -6079,7 +6154,7 @@ INTERCEPTOR(__sanitizer_FILE *, fopen, const char *path, const char *mode) {
INTERCEPTOR(__sanitizer_FILE *, fdopen, int fd, const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fdopen, fd, mode);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, REAL(strlen)(mode) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
__sanitizer_FILE *res = REAL(fdopen)(fd, mode);
if (res) unpoison_file(res);
return res;
@@ -6088,8 +6163,8 @@ INTERCEPTOR(__sanitizer_FILE *, freopen, const char *path, const char *mode,
__sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, freopen, path, mode, fp);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, REAL(strlen)(mode) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
__sanitizer_FILE *res = REAL(freopen)(path, mode, fp);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
@@ -6113,7 +6188,7 @@ INTERCEPTOR(int, flopen, const char *path, int flags, ...) {
va_end(ap);
COMMON_INTERCEPTOR_ENTER(ctx, flopen, path, flags, mode);
if (path) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
}
return REAL(flopen)(path, flags, mode);
}
@@ -6126,7 +6201,7 @@ INTERCEPTOR(int, flopenat, int dirfd, const char *path, int flags, ...) {
va_end(ap);
COMMON_INTERCEPTOR_ENTER(ctx, flopen, path, flags, mode);
if (path) {
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
}
return REAL(flopenat)(dirfd, path, flags, mode);
}
@@ -6142,8 +6217,8 @@ INTERCEPTOR(int, flopenat, int dirfd, const char *path, int flags, ...) {
INTERCEPTOR(__sanitizer_FILE *, fopen64, const char *path, const char *mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, fopen64, path, mode);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, REAL(strlen)(mode) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
__sanitizer_FILE *res = REAL(fopen64)(path, mode);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
if (res) unpoison_file(res);
@@ -6153,8 +6228,8 @@ INTERCEPTOR(__sanitizer_FILE *, freopen64, const char *path, const char *mode,
__sanitizer_FILE *fp) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, freopen64, path, mode, fp);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, REAL(strlen)(mode) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
__sanitizer_FILE *res = REAL(freopen64)(path, mode, fp);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
@@ -6332,9 +6407,9 @@ INTERCEPTOR(char *, getpass, const char *prompt) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getpass, prompt);
if (prompt)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, prompt, REAL(strlen)(prompt)+1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, prompt, internal_strlen(prompt)+1);
char *res = REAL(getpass)(prompt);
- if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res)+1);
+ if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res)+1);
return res;
}
@@ -6538,17 +6613,42 @@ INTERCEPTOR(int, sem_getvalue, __sanitizer_sem_t *s, int *sval) {
}
return res;
}
-#define INIT_SEM \
- COMMON_INTERCEPT_FUNCTION(sem_init); \
- COMMON_INTERCEPT_FUNCTION(sem_destroy); \
- COMMON_INTERCEPT_FUNCTION(sem_wait); \
- COMMON_INTERCEPT_FUNCTION(sem_trywait); \
- COMMON_INTERCEPT_FUNCTION(sem_timedwait); \
- COMMON_INTERCEPT_FUNCTION(sem_post); \
- COMMON_INTERCEPT_FUNCTION(sem_getvalue);
+
+INTERCEPTOR(__sanitizer_sem_t *, sem_open, const char *name, int oflag, ...) {
+ void *ctx;
+ va_list ap;
+ va_start(ap, oflag);
+ u32 mode = va_arg(ap, u32);
+ u32 value = va_arg(ap, u32);
+ COMMON_INTERCEPTOR_ENTER(ctx, sem_open, name, oflag, mode, value);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
+ __sanitizer_sem_t *s = REAL(sem_open)(name, oflag, mode, value);
+ if (s)
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, sizeof(*s));
+ va_end(ap);
+ return s;
+}
+
+INTERCEPTOR(int, sem_unlink, const char *name) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, sem_unlink, name);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
+ return REAL(sem_unlink)(name);
+}
+
+# define INIT_SEM \
+ COMMON_INTERCEPT_FUNCTION(sem_init); \
+ COMMON_INTERCEPT_FUNCTION(sem_destroy); \
+ COMMON_INTERCEPT_FUNCTION(sem_wait); \
+ COMMON_INTERCEPT_FUNCTION(sem_trywait); \
+ COMMON_INTERCEPT_FUNCTION(sem_timedwait); \
+ COMMON_INTERCEPT_FUNCTION(sem_post); \
+ COMMON_INTERCEPT_FUNCTION(sem_getvalue); \
+ COMMON_INTERCEPT_FUNCTION(sem_open); \
+ COMMON_INTERCEPT_FUNCTION(sem_unlink);
#else
-#define INIT_SEM
-#endif // SANITIZER_INTERCEPT_SEM
+# define INIT_SEM
+#endif // SANITIZER_INTERCEPT_SEM
#if SANITIZER_INTERCEPT_PTHREAD_SETCANCEL
INTERCEPTOR(int, pthread_setcancelstate, int state, int *oldstate) {
@@ -6631,7 +6731,7 @@ INTERCEPTOR(char *, ctermid, char *s) {
COMMON_INTERCEPTOR_ENTER(ctx, ctermid, s);
char *res = REAL(ctermid)(s);
if (res) {
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
@@ -6646,7 +6746,7 @@ INTERCEPTOR(char *, ctermid_r, char *s) {
COMMON_INTERCEPTOR_ENTER(ctx, ctermid_r, s);
char *res = REAL(ctermid_r)(s);
if (res) {
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
}
return res;
}
@@ -6983,8 +7083,8 @@ INTERCEPTOR(SIZE_T, wcsnlen, const wchar_t *s, SIZE_T n) {
INTERCEPTOR(wchar_t *, wcscat, wchar_t *dst, const wchar_t *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcscat, dst, src);
- SIZE_T src_size = REAL(wcslen)(src);
- SIZE_T dst_size = REAL(wcslen)(dst);
+ SIZE_T src_size = internal_wcslen(src);
+ SIZE_T dst_size = internal_wcslen(dst);
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, (src_size + 1) * sizeof(wchar_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
@@ -6995,8 +7095,8 @@ INTERCEPTOR(wchar_t *, wcscat, wchar_t *dst, const wchar_t *src) {
INTERCEPTOR(wchar_t *, wcsncat, wchar_t *dst, const wchar_t *src, SIZE_T n) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsncat, dst, src, n);
- SIZE_T src_size = REAL(wcsnlen)(src, n);
- SIZE_T dst_size = REAL(wcslen)(dst);
+ SIZE_T src_size = internal_wcsnlen(src, n);
+ SIZE_T dst_size = internal_wcslen(dst);
COMMON_INTERCEPTOR_READ_RANGE(ctx, src,
Min(src_size + 1, n) * sizeof(wchar_t));
COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
@@ -7015,7 +7115,7 @@ INTERCEPTOR(wchar_t *, wcsncat, wchar_t *dst, const wchar_t *src, SIZE_T n) {
INTERCEPTOR(wchar_t *, wcsdup, wchar_t *s) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, wcsdup, s);
- SIZE_T len = REAL(wcslen)(s);
+ SIZE_T len = internal_wcslen(s);
COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * (len + 1));
wchar_t *result = REAL(wcsdup)(s);
if (result)
@@ -7029,9 +7129,9 @@ INTERCEPTOR(wchar_t *, wcsdup, wchar_t *s) {
#endif
#if SANITIZER_INTERCEPT_STRXFRM
-static SIZE_T RealStrLen(const char *str) { return REAL(strlen)(str); }
+static SIZE_T RealStrLen(const char *str) { return internal_strlen(str); }
-static SIZE_T RealStrLen(const wchar_t *str) { return REAL(wcslen)(str); }
+static SIZE_T RealStrLen(const wchar_t *str) { return internal_wcslen(str); }
#define STRXFRM_INTERCEPTOR_IMPL(strxfrm, dest, src, len, ...) \
{ \
@@ -7105,7 +7205,7 @@ INTERCEPTOR(int, acct, const char *file) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, acct, file);
if (file)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, file, REAL(strlen)(file) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, file, internal_strlen(file) + 1);
return REAL(acct)(file);
}
#define INIT_ACCT COMMON_INTERCEPT_FUNCTION(acct)
@@ -7120,7 +7220,7 @@ INTERCEPTOR(const char *, user_from_uid, u32 uid, int nouser) {
COMMON_INTERCEPTOR_ENTER(ctx, user_from_uid, uid, nouser);
user = REAL(user_from_uid)(uid, nouser);
if (user)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, user, REAL(strlen)(user) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, user, internal_strlen(user) + 1);
return user;
}
#define INIT_USER_FROM_UID COMMON_INTERCEPT_FUNCTION(user_from_uid)
@@ -7134,7 +7234,7 @@ INTERCEPTOR(int, uid_from_user, const char *name, u32 *uid) {
int res;
COMMON_INTERCEPTOR_ENTER(ctx, uid_from_user, name, uid);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
res = REAL(uid_from_user)(name, uid);
if (uid)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, uid, sizeof(*uid));
@@ -7152,7 +7252,7 @@ INTERCEPTOR(const char *, group_from_gid, u32 gid, int nogroup) {
COMMON_INTERCEPTOR_ENTER(ctx, group_from_gid, gid, nogroup);
group = REAL(group_from_gid)(gid, nogroup);
if (group)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, group, REAL(strlen)(group) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, group, internal_strlen(group) + 1);
return group;
}
#define INIT_GROUP_FROM_GID COMMON_INTERCEPT_FUNCTION(group_from_gid)
@@ -7166,7 +7266,7 @@ INTERCEPTOR(int, gid_from_group, const char *group, u32 *gid) {
int res;
COMMON_INTERCEPTOR_ENTER(ctx, gid_from_group, group, gid);
if (group)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, group, REAL(strlen)(group) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, group, internal_strlen(group) + 1);
res = REAL(gid_from_group)(group, gid);
if (gid)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, gid, sizeof(*gid));
@@ -7182,7 +7282,7 @@ INTERCEPTOR(int, access, const char *path, int mode) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, access, path, mode);
if (path)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
return REAL(access)(path, mode);
}
#define INIT_ACCESS COMMON_INTERCEPT_FUNCTION(access)
@@ -7195,7 +7295,7 @@ INTERCEPTOR(int, faccessat, int fd, const char *path, int mode, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, faccessat, fd, path, mode, flags);
if (path)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
return REAL(faccessat)(fd, path, mode, flags);
}
#define INIT_FACCESSAT COMMON_INTERCEPT_FUNCTION(faccessat)
@@ -7210,7 +7310,7 @@ INTERCEPTOR(int, getgrouplist, const char *name, u32 basegid, u32 *groups,
int res;
COMMON_INTERCEPTOR_ENTER(ctx, getgrouplist, name, basegid, groups, ngroups);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
if (ngroups)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ngroups, sizeof(*ngroups));
res = REAL(getgrouplist)(name, basegid, groups, ngroups);
@@ -7234,7 +7334,7 @@ INTERCEPTOR(int, getgroupmembership, const char *name, u32 basegid, u32 *groups,
COMMON_INTERCEPTOR_ENTER(ctx, getgroupmembership, name, basegid, groups,
maxgrp, ngroups);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
res = REAL(getgroupmembership)(name, basegid, groups, maxgrp, ngroups);
if (!res && groups && ngroups) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, groups, sizeof(*groups) * (*ngroups));
@@ -7252,7 +7352,7 @@ INTERCEPTOR(int, getgroupmembership, const char *name, u32 basegid, u32 *groups,
INTERCEPTOR(SSIZE_T, readlink, const char *path, char *buf, SIZE_T bufsiz) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readlink, path, buf, bufsiz);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
SSIZE_T res = REAL(readlink)(path, buf, bufsiz);
if (res > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
@@ -7269,7 +7369,7 @@ INTERCEPTOR(SSIZE_T, readlinkat, int dirfd, const char *path, char *buf,
SIZE_T bufsiz) {
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, readlinkat, dirfd, path, buf, bufsiz);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
SSIZE_T res = REAL(readlinkat)(dirfd, path, buf, bufsiz);
if (res > 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
@@ -7287,7 +7387,7 @@ INTERCEPTOR(int, name_to_handle_at, int dirfd, const char *pathname,
void* ctx;
COMMON_INTERCEPTOR_ENTER(ctx, name_to_handle_at, dirfd, pathname, handle,
mount_id, flags);
- COMMON_INTERCEPTOR_READ_RANGE(ctx, pathname, REAL(strlen)(pathname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, pathname, internal_strlen(pathname) + 1);
__sanitizer_file_handle *sanitizer_handle =
reinterpret_cast<__sanitizer_file_handle*>(handle);
@@ -7351,7 +7451,7 @@ INTERCEPTOR(SIZE_T, strlcpy, char *dst, char *src, SIZE_T size) {
ctx, src, Min(internal_strnlen(src, size), size - 1) + 1);
}
res = REAL(strlcpy)(dst, src, size);
- COMMON_INTERCEPTOR_COPY_STRING(ctx, dst, src, REAL(strlen)(dst) + 1);
+ COMMON_INTERCEPTOR_COPY_STRING(ctx, dst, src, internal_strlen(dst) + 1);
return res;
}
@@ -7379,7 +7479,7 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags, int fd,
OFF_T off) {
void *ctx;
if (common_flags()->detect_write_exec)
- ReportMmapWriteExec(prot);
+ ReportMmapWriteExec(prot, flags);
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
@@ -7389,7 +7489,7 @@ INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags, int fd,
INTERCEPTOR(int, mprotect, void *addr, SIZE_T sz, int prot) {
void *ctx;
if (common_flags()->detect_write_exec)
- ReportMmapWriteExec(prot);
+ ReportMmapWriteExec(prot, 0);
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return (int)internal_mprotect(addr, sz, prot);
COMMON_INTERCEPTOR_ENTER(ctx, mprotect, addr, sz, prot);
@@ -7408,7 +7508,7 @@ INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags, int fd,
OFF64_T off) {
void *ctx;
if (common_flags()->detect_write_exec)
- ReportMmapWriteExec(prot);
+ ReportMmapWriteExec(prot, flags);
if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
@@ -7426,7 +7526,7 @@ INTERCEPTOR(char *, devname, u64 dev, u32 type) {
COMMON_INTERCEPTOR_ENTER(ctx, devname, dev, type);
name = REAL(devname)(dev, type);
if (name)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
return name;
}
#define INIT_DEVNAME COMMON_INTERCEPT_FUNCTION(devname);
@@ -7448,7 +7548,7 @@ INTERCEPTOR(DEVNAME_R_RETTYPE, devname_r, u64 dev, u32 type, char *path,
COMMON_INTERCEPTOR_ENTER(ctx, devname_r, dev, type, path, len);
DEVNAME_R_RETTYPE res = REAL(devname_r)(dev, type, path, len);
if (DEVNAME_R_SUCCESS(res))
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, path, internal_strlen(path) + 1);
return res;
}
#define INIT_DEVNAME_R COMMON_INTERCEPT_FUNCTION(devname_r);
@@ -7478,7 +7578,7 @@ INTERCEPTOR(void, strmode, u32 mode, char *bp) {
COMMON_INTERCEPTOR_ENTER(ctx, strmode, mode, bp);
REAL(strmode)(mode, bp);
if (bp)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, bp, REAL(strlen)(bp) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, bp, internal_strlen(bp) + 1);
}
#define INIT_STRMODE COMMON_INTERCEPT_FUNCTION(strmode)
#else
@@ -7498,37 +7598,42 @@ INTERCEPTOR(struct __sanitizer_ttyent *, getttynam, char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getttynam, name);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
struct __sanitizer_ttyent *ttyent = REAL(getttynam)(name);
if (ttyent)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ttyent, struct_ttyent_sz);
return ttyent;
}
+#define INIT_TTYENT \
+ COMMON_INTERCEPT_FUNCTION(getttyent); \
+ COMMON_INTERCEPT_FUNCTION(getttynam);
+#else
+#define INIT_TTYENT
+#endif
+
+#if SANITIZER_INTERCEPT_TTYENTPATH
INTERCEPTOR(int, setttyentpath, char *path) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, setttyentpath, path);
if (path)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
return REAL(setttyentpath)(path);
}
-#define INIT_TTYENT \
- COMMON_INTERCEPT_FUNCTION(getttyent); \
- COMMON_INTERCEPT_FUNCTION(getttynam); \
- COMMON_INTERCEPT_FUNCTION(setttyentpath)
+#define INIT_TTYENTPATH COMMON_INTERCEPT_FUNCTION(setttyentpath);
#else
-#define INIT_TTYENT
+#define INIT_TTYENTPATH
#endif
#if SANITIZER_INTERCEPT_PROTOENT
static void write_protoent(void *ctx, struct __sanitizer_protoent *p) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_name, REAL(strlen)(p->p_name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_name, internal_strlen(p->p_name) + 1);
SIZE_T pp_size = 1; // One handles the trailing \0
for (char **pp = p->p_aliases; *pp; ++pp, ++pp_size)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *pp, REAL(strlen)(*pp) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *pp, internal_strlen(*pp) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_aliases,
pp_size * sizeof(char **));
@@ -7547,7 +7652,7 @@ INTERCEPTOR(struct __sanitizer_protoent *, getprotobyname, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getprotobyname, name);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
struct __sanitizer_protoent *p = REAL(getprotobyname)(name);
if (p)
write_protoent(ctx, p);
@@ -7591,7 +7696,7 @@ INTERCEPTOR(int, getprotobyname_r, const char *name,
COMMON_INTERCEPTOR_ENTER(ctx, getprotobyname_r, name, result_buf, buf,
buflen, result);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
int res = REAL(getprotobyname_r)(name, result_buf, buf, buflen, result);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
@@ -7630,12 +7735,12 @@ INTERCEPTOR(struct __sanitizer_netent *, getnetent) {
if (n) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, REAL(strlen)(n->n_name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);
SIZE_T nn_size = 1; // One handles the trailing \0
for (char **nn = n->n_aliases; *nn; ++nn, ++nn_size)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, REAL(strlen)(*nn) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, internal_strlen(*nn) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
nn_size * sizeof(char **));
@@ -7647,17 +7752,17 @@ INTERCEPTOR(struct __sanitizer_netent *, getnetbyname, const char *name) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getnetbyname, name);
if (name)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
struct __sanitizer_netent *n = REAL(getnetbyname)(name);
if (n) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, REAL(strlen)(n->n_name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);
SIZE_T nn_size = 1; // One handles the trailing \0
for (char **nn = n->n_aliases; *nn; ++nn, ++nn_size)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, REAL(strlen)(*nn) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, internal_strlen(*nn) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
nn_size * sizeof(char **));
@@ -7672,12 +7777,12 @@ INTERCEPTOR(struct __sanitizer_netent *, getnetbyaddr, u32 net, int type) {
if (n) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, REAL(strlen)(n->n_name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);
SIZE_T nn_size = 1; // One handles the trailing \0
for (char **nn = n->n_aliases; *nn; ++nn, ++nn_size)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, REAL(strlen)(*nn) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *nn, internal_strlen(*nn) + 1);
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
nn_size * sizeof(char **));
@@ -7798,7 +7903,7 @@ INTERCEPTOR(int, regcomp, void *preg, const char *pattern, int cflags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regcomp, preg, pattern, cflags);
if (pattern)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, pattern, REAL(strlen)(pattern) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, pattern, internal_strlen(pattern) + 1);
int res = REAL(regcomp)(preg, pattern, cflags);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, preg, struct_regex_sz);
@@ -7811,7 +7916,7 @@ INTERCEPTOR(int, regexec, const void *preg, const char *string, SIZE_T nmatch,
if (preg)
COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
if (string)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, string, REAL(strlen)(string) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, string, internal_strlen(string) + 1);
int res = REAL(regexec)(preg, string, nmatch, pmatch, eflags);
if (!res && pmatch)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pmatch, nmatch * struct_regmatch_sz);
@@ -7825,7 +7930,7 @@ INTERCEPTOR(SIZE_T, regerror, int errcode, const void *preg, char *errbuf,
COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
SIZE_T res = REAL(regerror)(errcode, preg, errbuf, errbuf_size);
if (errbuf)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errbuf, REAL(strlen)(errbuf) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errbuf, internal_strlen(errbuf) + 1);
return res;
}
INTERCEPTOR(void, regfree, const void *preg) {
@@ -7850,15 +7955,15 @@ INTERCEPTOR(SSIZE_T, regnsub, char *buf, SIZE_T bufsiz, const char *sub,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regnsub, buf, bufsiz, sub, rm, str);
if (sub)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, REAL(strlen)(sub) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, internal_strlen(sub) + 1);
// The implementation demands and hardcodes 10 elements
if (rm)
COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
if (str)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, str, REAL(strlen)(str) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
SSIZE_T res = REAL(regnsub)(buf, bufsiz, sub, rm, str);
if (res > 0 && buf)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
return res;
}
INTERCEPTOR(SSIZE_T, regasub, char **buf, const char *sub,
@@ -7866,16 +7971,16 @@ INTERCEPTOR(SSIZE_T, regasub, char **buf, const char *sub,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, regasub, buf, sub, rm, sstr);
if (sub)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, REAL(strlen)(sub) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, internal_strlen(sub) + 1);
// Hardcode 10 elements as this is hardcoded size
if (rm)
COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
if (sstr)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sstr, REAL(strlen)(sstr) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sstr, internal_strlen(sstr) + 1);
SSIZE_T res = REAL(regasub)(buf, sub, rm, sstr);
if (res > 0 && buf) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sizeof(char *));
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *buf, REAL(strlen)(*buf) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *buf, internal_strlen(*buf) + 1);
}
return res;
}
@@ -7897,7 +8002,7 @@ INTERCEPTOR(void *, fts_open, char *const *path_argv, int options,
COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
if (!*pa)
break;
- COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, REAL(strlen)(*pa) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, internal_strlen(*pa) + 1);
}
}
// TODO(kamil): handle compar callback
@@ -7989,7 +8094,7 @@ INTERCEPTOR(int, sysctlbyname, char *sname, void *oldp, SIZE_T *oldlenp,
COMMON_INTERCEPTOR_ENTER(ctx, sysctlbyname, sname, oldp, oldlenp, newp,
newlen);
if (sname)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, REAL(strlen)(sname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
if (oldlenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, oldlenp, sizeof(*oldlenp));
if (newp && newlen)
@@ -8010,7 +8115,7 @@ INTERCEPTOR(int, sysctlnametomib, const char *sname, int *name,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, sysctlnametomib, sname, name, namelenp);
if (sname)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, REAL(strlen)(sname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
if (namelenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, namelenp, sizeof(*namelenp));
int res = REAL(sysctlnametomib)(sname, name, namelenp);
@@ -8050,7 +8155,7 @@ INTERCEPTOR(void *, asysctlbyname, const char *sname, SIZE_T *len) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, asysctlbyname, sname, len);
if (sname)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, REAL(strlen)(sname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
void *res = REAL(asysctlbyname)(sname, len);
if (res && len) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
@@ -8073,7 +8178,7 @@ INTERCEPTOR(int, sysctlgetmibinfo, char *sname, int *name,
COMMON_INTERCEPTOR_ENTER(ctx, sysctlgetmibinfo, sname, name, namelenp, cname,
csz, rnode, v);
if (sname)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, REAL(strlen)(sname) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
if (namelenp)
COMMON_INTERCEPTOR_READ_RANGE(ctx, namelenp, sizeof(*namelenp));
if (csz)
@@ -8107,7 +8212,7 @@ INTERCEPTOR(char *, nl_langinfo, long item) {
COMMON_INTERCEPTOR_ENTER(ctx, nl_langinfo, item);
char *ret = REAL(nl_langinfo)(item);
if (ret)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, REAL(strlen)(ret) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, internal_strlen(ret) + 1);
return ret;
}
#define INIT_NL_LANGINFO COMMON_INTERCEPT_FUNCTION(nl_langinfo)
@@ -8127,7 +8232,7 @@ INTERCEPTOR(int, modctl, int operation, void *argp) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, ml, sizeof(*ml));
if (ml->ml_filename)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ml->ml_filename,
- REAL(strlen)(ml->ml_filename) + 1);
+ internal_strlen(ml->ml_filename) + 1);
if (ml->ml_props)
COMMON_INTERCEPTOR_READ_RANGE(ctx, ml->ml_props, ml->ml_propslen);
}
@@ -8135,7 +8240,7 @@ INTERCEPTOR(int, modctl, int operation, void *argp) {
} else if (operation == modctl_unload) {
if (argp) {
const char *name = (const char *)argp;
- COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
}
ret = REAL(modctl)(operation, argp);
} else if (operation == modctl_stat) {
@@ -8177,7 +8282,7 @@ INTERCEPTOR(long long, strtonum, const char *nptr, long long minval,
if (errstr) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errstr, sizeof(const char *));
if (*errstr)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *errstr, REAL(strlen)(*errstr) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *errstr, internal_strlen(*errstr) + 1);
}
return ret;
}
@@ -8197,7 +8302,7 @@ INTERCEPTOR(char *, fparseln, __sanitizer_FILE *stream, SIZE_T *len,
COMMON_INTERCEPTOR_READ_RANGE(ctx, delim, sizeof(delim[0]) * 3);
char *ret = REAL(fparseln)(stream, len, lineno, delim, flags);
if (ret) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, REAL(strlen)(ret) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, internal_strlen(ret) + 1);
if (len)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
if (lineno)
@@ -8214,7 +8319,7 @@ INTERCEPTOR(char *, fparseln, __sanitizer_FILE *stream, SIZE_T *len,
INTERCEPTOR(int, statvfs1, const char *path, void *buf, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs1, path, buf, flags);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
int res = REAL(statvfs1)(path, buf, flags);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
return res;
@@ -8495,7 +8600,7 @@ INTERCEPTOR(char *, SHA1File, char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1File, filename, buf);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(SHA1File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
@@ -8506,7 +8611,7 @@ INTERCEPTOR(char *, SHA1FileChunk, char *filename, char *buf, OFF_T offset,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, SHA1FileChunk, filename, buf, offset, length);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(SHA1FileChunk)(filename, buf, offset, length);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
@@ -8582,7 +8687,7 @@ INTERCEPTOR(char *, MD4File, const char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD4File, filename, buf);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(MD4File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
@@ -8665,7 +8770,7 @@ INTERCEPTOR(char *, RMD160File, char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160File, filename, buf);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(RMD160File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
@@ -8676,7 +8781,7 @@ INTERCEPTOR(char *, RMD160FileChunk, char *filename, char *buf, OFF_T offset,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, RMD160FileChunk, filename, buf, offset, length);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(RMD160FileChunk)(filename, buf, offset, length);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
@@ -8752,7 +8857,7 @@ INTERCEPTOR(char *, MD5File, const char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(MD5File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
@@ -8882,7 +8987,7 @@ INTERCEPTOR(char *, MD2File, const char *filename, char *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, MD2File, filename, buf);
if (filename)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
char *ret = REAL(MD2File)(filename, buf);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
@@ -8960,7 +9065,7 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
void *ctx; \
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_File, filename, buf); \
if (filename) \
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);\
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
char *ret = REAL(SHA##LEN##_File)(filename, buf); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
@@ -8972,7 +9077,7 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_FileChunk, filename, buf, offset, \
length); \
if (filename) \
- COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, REAL(strlen)(filename) + 1);\
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
char *ret = REAL(SHA##LEN##_FileChunk)(filename, buf, offset, length); \
if (ret) \
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
@@ -8989,10 +9094,10 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
return ret; \
}
-SHA2_INTERCEPTORS(224, u32);
-SHA2_INTERCEPTORS(256, u32);
-SHA2_INTERCEPTORS(384, u64);
-SHA2_INTERCEPTORS(512, u64);
+SHA2_INTERCEPTORS(224, u32)
+SHA2_INTERCEPTORS(256, u32)
+SHA2_INTERCEPTORS(384, u64)
+SHA2_INTERCEPTORS(512, u64)
#define INIT_SHA2_INTECEPTORS(LEN) \
COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \
@@ -9036,7 +9141,7 @@ INTERCEPTOR(int, strvis, char *dst, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strvis, dst, src, flag);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int len = REAL(strvis)(dst, src, flag);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
@@ -9046,7 +9151,7 @@ INTERCEPTOR(int, stravis, char **dst, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, stravis, dst, src, flag);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int len = REAL(stravis)(dst, src, flag);
if (dst) {
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(char *));
@@ -9059,7 +9164,7 @@ INTERCEPTOR(int, strnvis, char *dst, SIZE_T dlen, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnvis, dst, dlen, src, flag);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int len = REAL(strnvis)(dst, dlen, src, flag);
// The interface will be valid even if there is no space for NULL char
if (dst && len > 0)
@@ -9109,7 +9214,7 @@ INTERCEPTOR(char *, svis, char *dst, int c, int flag, int nextc,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, svis, dst, c, flag, nextc, extra);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
char *end = REAL(svis)(dst, c, flag, nextc, extra);
if (dst && end)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
@@ -9120,7 +9225,7 @@ INTERCEPTOR(char *, snvis, char *dst, SIZE_T dlen, int c, int flag, int nextc,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, snvis, dst, dlen, c, flag, nextc, extra);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
char *end = REAL(snvis)(dst, dlen, c, flag, nextc, extra);
if (dst && end)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst,
@@ -9132,9 +9237,9 @@ INTERCEPTOR(int, strsvis, char *dst, const char *src, int flag,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsvis, dst, src, flag, extra);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int len = REAL(strsvis)(dst, src, flag, extra);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
@@ -9145,9 +9250,9 @@ INTERCEPTOR(int, strsnvis, char *dst, SIZE_T dlen, const char *src, int flag,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strsnvis, dst, dlen, src, flag, extra);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int len = REAL(strsnvis)(dst, dlen, src, flag, extra);
// The interface will be valid even if there is no space for NULL char
if (dst && len >= 0)
@@ -9161,7 +9266,7 @@ INTERCEPTOR(int, strsvisx, char *dst, const char *src, SIZE_T len, int flag,
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int ret = REAL(strsvisx)(dst, src, len, flag, extra);
if (dst)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
@@ -9174,7 +9279,7 @@ INTERCEPTOR(int, strsnvisx, char *dst, SIZE_T dlen, const char *src, SIZE_T len,
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
int ret = REAL(strsnvisx)(dst, dlen, src, len, flag, extra);
if (dst && ret >= 0)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
@@ -9188,7 +9293,7 @@ INTERCEPTOR(int, strsenvisx, char *dst, SIZE_T dlen, const char *src,
if (src)
COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
if (extra)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, REAL(strlen)(extra) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
// FIXME: only need to be checked when "flag | VIS_NOLOCALE" doesn't hold
// according to the implementation
if (cerr_ptr)
@@ -9215,7 +9320,7 @@ INTERCEPTOR(int, strunvis, char *dst, const char *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strunvis, dst, src);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strunvis)(dst, src);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
@@ -9225,7 +9330,7 @@ INTERCEPTOR(int, strnunvis, char *dst, SIZE_T dlen, const char *src) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnunvis, dst, dlen, src);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strnunvis)(dst, dlen, src);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
@@ -9235,7 +9340,7 @@ INTERCEPTOR(int, strunvisx, char *dst, const char *src, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strunvisx, dst, src, flag);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strunvisx)(dst, src, flag);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
@@ -9246,7 +9351,7 @@ INTERCEPTOR(int, strnunvisx, char *dst, SIZE_T dlen, const char *src,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strnunvisx, dst, dlen, src, flag);
if (src)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, src, REAL(strlen)(src) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
int ret = REAL(strnunvisx)(dst, dlen, src, flag);
if (ret != -1)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
@@ -9282,7 +9387,7 @@ INTERCEPTOR(struct __sanitizer_cdbr *, cdbr_open, const char *path, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, cdbr_open, path, flags);
if (path)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
struct __sanitizer_cdbr *cdbr = REAL(cdbr_open)(path, flags);
if (cdbr)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbr, sizeof(*cdbr));
@@ -9474,7 +9579,7 @@ INTERCEPTOR(void *, getfsspec, const char *spec) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getfsspec, spec);
if (spec)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, spec, REAL(strlen)(spec) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, spec, internal_strlen(spec) + 1);
void *ret = REAL(getfsspec)(spec);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
@@ -9485,7 +9590,7 @@ INTERCEPTOR(void *, getfsfile, const char *file) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, getfsfile, file);
if (file)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, file, REAL(strlen)(file) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, file, internal_strlen(file) + 1);
void *ret = REAL(getfsfile)(file);
if (ret)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
@@ -9529,9 +9634,9 @@ INTERCEPTOR(__sanitizer_FILE *, popen, const char *command, const char *type) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, popen, command, type);
if (command)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, command, REAL(strlen)(command) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, command, internal_strlen(command) + 1);
if (type)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, type, REAL(strlen)(type) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, type, internal_strlen(type) + 1);
__sanitizer_FILE *res = REAL(popen)(command, type);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, nullptr);
if (res) unpoison_file(res);
@@ -9548,13 +9653,13 @@ INTERCEPTOR(__sanitizer_FILE *, popenve, const char *path,
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, popenve, path, argv, envp, type);
if (path)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
if (argv) {
for (char *const *pa = argv; ; ++pa) {
COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
if (!*pa)
break;
- COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, REAL(strlen)(*pa) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, internal_strlen(*pa) + 1);
}
}
if (envp) {
@@ -9562,11 +9667,11 @@ INTERCEPTOR(__sanitizer_FILE *, popenve, const char *path,
COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
if (!*pa)
break;
- COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, REAL(strlen)(*pa) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, *pa, internal_strlen(*pa) + 1);
}
}
if (type)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, type, REAL(strlen)(type) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, type, internal_strlen(type) + 1);
__sanitizer_FILE *res = REAL(popenve)(path, argv, envp, type);
COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, nullptr);
if (res) unpoison_file(res);
@@ -9762,7 +9867,7 @@ INTERCEPTOR(char *, fdevname, int fd) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
char *name = REAL(fdevname)(fd);
if (name) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, REAL(strlen)(name) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
if (fd > 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
@@ -9775,7 +9880,7 @@ INTERCEPTOR(char *, fdevname_r, int fd, char *buf, SIZE_T len) {
COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
char *name = REAL(fdevname_r)(fd, buf, len);
if (name && buf && len > 0) {
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
if (fd > 0)
COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
}
@@ -9795,7 +9900,7 @@ INTERCEPTOR(char *, getusershell) {
COMMON_INTERCEPTOR_ENTER(ctx, getusershell);
char *res = REAL(getusershell)();
if (res)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
@@ -9820,7 +9925,7 @@ INTERCEPTOR(int, sl_add, void *sl, char *item) {
if (sl)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
if (item)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, item, REAL(strlen)(item) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, item, internal_strlen(item) + 1);
int res = REAL(sl_add)(sl, item);
if (!res)
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
@@ -9833,10 +9938,10 @@ INTERCEPTOR(char *, sl_find, void *sl, const char *item) {
if (sl)
COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
if (item)
- COMMON_INTERCEPTOR_READ_RANGE(ctx, item, REAL(strlen)(item) + 1);
+ COMMON_INTERCEPTOR_READ_RANGE(ctx, item, internal_strlen(item) + 1);
char *res = REAL(sl_find)(sl, item);
if (res)
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
return res;
}
@@ -9922,7 +10027,52 @@ INTERCEPTOR(int, getentropy, void *buf, SIZE_T buflen) {
#define INIT_GETENTROPY
#endif
-#if SANITIZER_INTERCEPT_QSORT
+#if SANITIZER_INTERCEPT_QSORT_R
+typedef int (*qsort_r_compar_f)(const void *, const void *, void *);
+struct qsort_r_compar_params {
+ SIZE_T size;
+ qsort_r_compar_f compar;
+ void *arg;
+};
+static int wrapped_qsort_r_compar(const void *a, const void *b, void *arg) {
+ qsort_r_compar_params *params = (qsort_r_compar_params *)arg;
+ COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, params->size);
+ COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, params->size);
+ return params->compar(a, b, params->arg);
+}
+
+INTERCEPTOR(void, qsort_r, void *base, SIZE_T nmemb, SIZE_T size,
+ qsort_r_compar_f compar, void *arg) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, qsort_r, base, nmemb, size, compar, arg);
+ // Run the comparator over all array elements to detect any memory issues.
+ if (nmemb > 1) {
+ for (SIZE_T i = 0; i < nmemb - 1; ++i) {
+ void *p = (void *)((char *)base + i * size);
+ void *q = (void *)((char *)base + (i + 1) * size);
+ COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
+ compar(p, q, arg);
+ }
+ }
+ qsort_r_compar_params params = {size, compar, arg};
+ REAL(qsort_r)(base, nmemb, size, wrapped_qsort_r_compar, &params);
+ COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
+}
+# define INIT_QSORT_R COMMON_INTERCEPT_FUNCTION(qsort_r)
+#else
+# define INIT_QSORT_R
+#endif
+
+#if SANITIZER_INTERCEPT_QSORT && SANITIZER_INTERCEPT_QSORT_R
+INTERCEPTOR(void, qsort, void *base, SIZE_T nmemb, SIZE_T size,
+ qsort_r_compar_f compar) {
+ void *ctx;
+ COMMON_INTERCEPTOR_ENTER(ctx, qsort, base, nmemb, size, compar);
+ WRAP(qsort_r)(base, nmemb, size, compar, nullptr);
+}
+# define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
+#elif SANITIZER_INTERCEPT_QSORT && !SANITIZER_INTERCEPT_QSORT_R
// Glibc qsort uses a temporary buffer allocated either on stack or on heap.
// Poisoned memory from there may get copied into the comparator arguments,
// where it needs to be dealt with. But even that is not enough - the results of
@@ -9937,7 +10087,7 @@ INTERCEPTOR(int, getentropy, void *buf, SIZE_T buflen) {
typedef int (*qsort_compar_f)(const void *, const void *);
static THREADLOCAL qsort_compar_f qsort_compar;
static THREADLOCAL SIZE_T qsort_size;
-int wrapped_qsort_compar(const void *a, const void *b) {
+static int wrapped_qsort_compar(const void *a, const void *b) {
COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, qsort_size);
COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, qsort_size);
@@ -9979,60 +10129,34 @@ INTERCEPTOR(void, qsort, void *base, SIZE_T nmemb, SIZE_T size,
}
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
}
-#define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
+# define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
#else
-#define INIT_QSORT
+# define INIT_QSORT
#endif
-#if SANITIZER_INTERCEPT_QSORT_R
-typedef int (*qsort_r_compar_f)(const void *, const void *, void *);
-static THREADLOCAL qsort_r_compar_f qsort_r_compar;
-static THREADLOCAL SIZE_T qsort_r_size;
-int wrapped_qsort_r_compar(const void *a, const void *b, void *arg) {
- COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, qsort_r_size);
- COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, qsort_r_size);
- return qsort_r_compar(a, b, arg);
+#if SANITIZER_INTERCEPT_BSEARCH
+typedef int (*bsearch_compar_f)(const void *, const void *);
+struct bsearch_compar_params {
+ const void *key;
+ bsearch_compar_f compar;
+};
+
+static int wrapped_bsearch_compar(const void *key, const void *b) {
+ const bsearch_compar_params *params = (const bsearch_compar_params *)key;
+ COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
+ return params->compar(params->key, b);
}
-INTERCEPTOR(void, qsort_r, void *base, SIZE_T nmemb, SIZE_T size,
- qsort_r_compar_f compar, void *arg) {
+INTERCEPTOR(void *, bsearch, const void *key, const void *base, SIZE_T nmemb,
+ SIZE_T size, bsearch_compar_f compar) {
void *ctx;
- COMMON_INTERCEPTOR_ENTER(ctx, qsort_r, base, nmemb, size, compar, arg);
- // Run the comparator over all array elements to detect any memory issues.
- if (nmemb > 1) {
- for (SIZE_T i = 0; i < nmemb - 1; ++i) {
- void *p = (void *)((char *)base + i * size);
- void *q = (void *)((char *)base + (i + 1) * size);
- COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
- compar(p, q, arg);
- }
- }
- qsort_r_compar_f old_compar = qsort_r_compar;
- SIZE_T old_size = qsort_r_size;
- // Handle qsort_r() implementations that recurse using an
- // interposable function call:
- bool already_wrapped = compar == wrapped_qsort_r_compar;
- if (already_wrapped) {
- // This case should only happen if the qsort() implementation calls itself
- // using a preemptible function call (e.g. the FreeBSD libc version).
- // Check that the size and comparator arguments are as expected.
- CHECK_NE(compar, qsort_r_compar);
- CHECK_EQ(qsort_r_size, size);
- } else {
- qsort_r_compar = compar;
- qsort_r_size = size;
- }
- REAL(qsort_r)(base, nmemb, size, wrapped_qsort_r_compar, arg);
- if (!already_wrapped) {
- qsort_r_compar = old_compar;
- qsort_r_size = old_size;
- }
- COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
+ COMMON_INTERCEPTOR_ENTER(ctx, bsearch, key, base, nmemb, size, compar);
+ bsearch_compar_params params = {key, compar};
+ return REAL(bsearch)(&params, base, nmemb, size, wrapped_bsearch_compar);
}
-#define INIT_QSORT_R COMMON_INTERCEPT_FUNCTION(qsort_r)
+# define INIT_BSEARCH COMMON_INTERCEPT_FUNCTION(bsearch)
#else
-#define INIT_QSORT_R
+# define INIT_BSEARCH
#endif
#if SANITIZER_INTERCEPT_SIGALTSTACK
@@ -10166,6 +10290,7 @@ static void InitializeCommonInterceptors() {
INIT_TIME;
INIT_GLOB;
INIT_GLOB64;
+ INIT_POSIX_SPAWN;
INIT_WAIT;
INIT_WAIT4;
INIT_INET;
@@ -10401,6 +10526,7 @@ static void InitializeCommonInterceptors() {
INIT_GETENTROPY;
INIT_QSORT;
INIT_QSORT_R;
+ INIT_BSEARCH;
INIT_SIGALTSTACK;
INIT_UNAME;
INIT___XUNAME;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
index 082398ba960a..220abb89c3be 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc
@@ -324,8 +324,8 @@ static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
continue;
int size = scanf_get_value_size(&dir);
if (size == FSS_INVALID) {
- Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
- SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
+ Report("%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
+ SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
break;
}
void *argp = va_arg(aq, void *);
@@ -469,7 +469,7 @@ static int printf_get_value_size(PrintfDirective *dir) {
break; \
default: \
Report("WARNING: unexpected floating-point arg size" \
- " in printf interceptor: %d\n", size); \
+ " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
return; \
} \
} else { \
@@ -484,7 +484,7 @@ static int printf_get_value_size(PrintfDirective *dir) {
break; \
default: \
Report("WARNING: unexpected arg size" \
- " in printf interceptor: %d\n", size); \
+ " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
return; \
} \
} \
@@ -530,7 +530,7 @@ static void printf_common(void *ctx, const char *format, va_list aq) {
Report(
"%s: WARNING: unexpected format specifier in printf "
"interceptor: %.*s (reported once per process)\n",
- SanitizerToolName, dir.end - dir.begin, dir.begin);
+ SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
break;
}
if (dir.convSpecifier == 'n') {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_netbsd_compat.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_netbsd_compat.inc
index 6aa73ec8c6a2..f6ac3fa5af18 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_netbsd_compat.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_netbsd_compat.inc
@@ -33,7 +33,7 @@
INTERCEPTOR(int, statvfs, char *path, void *buf) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs, path, buf);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
@@ -99,7 +99,7 @@ INTERCEPTOR(int, getvfsstat, void *buf, SIZE_T bufsize, int flags) {
INTERCEPTOR(int, statvfs1, const char *path, void *buf, int flags) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, statvfs1, path, buf, flags);
- if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+ if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
int res = REAL(statvfs1)(path, buf, flags);
if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs90_sz);
return res;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
index ed693819c6d4..f60b05d157bb 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
@@ -6,6 +6,7 @@
.globl ASM_WRAPPER_NAME(vfork)
ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
ASM_WRAPPER_NAME(vfork):
+ _CET_ENDBR
// Store return address in the spill area and tear down the stack frame.
sub $12, %esp
call COMMON_INTERCEPTOR_SPILL_AREA
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
index 8147cdd09247..8fd18ea67ffd 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
@@ -6,6 +6,7 @@
.globl ASM_WRAPPER_NAME(vfork)
ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
ASM_WRAPPER_NAME(vfork):
+ _CET_ENDBR
// Store return address in the spill area and tear down the stack frame.
push %rcx
call COMMON_INTERCEPTOR_SPILL_AREA
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index 01ccacc6f320..bc4b477e350f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -26,9 +26,7 @@ void SetSoftRssLimitExceededCallback(void (*Callback)(bool exceeded)) {
#if (SANITIZER_LINUX || SANITIZER_NETBSD) && !SANITIZER_GO
// Weak default implementation for when sanitizer_stackdepot is not linked in.
-SANITIZER_WEAK_ATTRIBUTE StackDepotStats *StackDepotGetStats() {
- return nullptr;
-}
+SANITIZER_WEAK_ATTRIBUTE StackDepotStats StackDepotGetStats() { return {}; }
void *BackgroundThread(void *arg) {
const uptr hard_rss_limit_mb = common_flags()->hard_rss_limit_mb;
@@ -48,16 +46,12 @@ void *BackgroundThread(void *arg) {
prev_reported_rss = current_rss_mb;
}
// If stack depot has grown 10% since last time, print it too.
- StackDepotStats *stack_depot_stats = StackDepotGetStats();
- if (stack_depot_stats) {
- if (prev_reported_stack_depot_size * 11 / 10 <
- stack_depot_stats->allocated) {
- Printf("%s: StackDepot: %zd ids; %zdM allocated\n",
- SanitizerToolName,
- stack_depot_stats->n_uniq_ids,
- stack_depot_stats->allocated >> 20);
- prev_reported_stack_depot_size = stack_depot_stats->allocated;
- }
+ StackDepotStats stack_depot_stats = StackDepotGetStats();
+ if (prev_reported_stack_depot_size * 11 / 10 <
+ stack_depot_stats.allocated) {
+ Printf("%s: StackDepot: %zd ids; %zdM allocated\n", SanitizerToolName,
+ stack_depot_stats.n_uniq_ids, stack_depot_stats.allocated >> 20);
+ prev_reported_stack_depot_size = stack_depot_stats.allocated;
}
}
// Check RSS against the limit.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp
index 9a4e5388f24d..a20602d8b95a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp
@@ -25,6 +25,7 @@ void LogMessageOnPrintf(const char *str) {}
#endif
void WriteToSyslog(const char *buffer) {}
void Abort() { internal__exit(1); }
+bool CreateDir(const char *pathname) { return false; }
#endif // !SANITIZER_WINDOWS
#if !SANITIZER_WINDOWS && !SANITIZER_MAC
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc
index 1b89d6e17684..a38b134085aa 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc
@@ -43,45 +43,47 @@
#include "sanitizer_platform.h"
#if SANITIZER_LINUX
-#include "sanitizer_libc.h"
+# include "sanitizer_libc.h"
-#define PRE_SYSCALL(name) \
- SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_syscall_pre_impl_##name
-#define PRE_READ(p, s) COMMON_SYSCALL_PRE_READ_RANGE(p, s)
-#define PRE_WRITE(p, s) COMMON_SYSCALL_PRE_WRITE_RANGE(p, s)
+# define PRE_SYSCALL(name) \
+ SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_syscall_pre_impl_##name
+# define PRE_READ(p, s) COMMON_SYSCALL_PRE_READ_RANGE(p, s)
+# define PRE_WRITE(p, s) COMMON_SYSCALL_PRE_WRITE_RANGE(p, s)
-#define POST_SYSCALL(name) \
- SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_syscall_post_impl_##name
-#define POST_READ(p, s) COMMON_SYSCALL_POST_READ_RANGE(p, s)
-#define POST_WRITE(p, s) COMMON_SYSCALL_POST_WRITE_RANGE(p, s)
+# define POST_SYSCALL(name) \
+ SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_syscall_post_impl_##name
+# define POST_READ(p, s) COMMON_SYSCALL_POST_READ_RANGE(p, s)
+# define POST_WRITE(p, s) COMMON_SYSCALL_POST_WRITE_RANGE(p, s)
-#ifndef COMMON_SYSCALL_ACQUIRE
-# define COMMON_SYSCALL_ACQUIRE(addr) ((void)(addr))
-#endif
+# ifndef COMMON_SYSCALL_ACQUIRE
+# define COMMON_SYSCALL_ACQUIRE(addr) ((void)(addr))
+# endif
-#ifndef COMMON_SYSCALL_RELEASE
-# define COMMON_SYSCALL_RELEASE(addr) ((void)(addr))
-#endif
+# ifndef COMMON_SYSCALL_RELEASE
+# define COMMON_SYSCALL_RELEASE(addr) ((void)(addr))
+# endif
-#ifndef COMMON_SYSCALL_FD_CLOSE
-# define COMMON_SYSCALL_FD_CLOSE(fd) ((void)(fd))
-#endif
+# ifndef COMMON_SYSCALL_FD_CLOSE
+# define COMMON_SYSCALL_FD_CLOSE(fd) ((void)(fd))
+# endif
-#ifndef COMMON_SYSCALL_FD_ACQUIRE
-# define COMMON_SYSCALL_FD_ACQUIRE(fd) ((void)(fd))
-#endif
+# ifndef COMMON_SYSCALL_FD_ACQUIRE
+# define COMMON_SYSCALL_FD_ACQUIRE(fd) ((void)(fd))
+# endif
-#ifndef COMMON_SYSCALL_FD_RELEASE
-# define COMMON_SYSCALL_FD_RELEASE(fd) ((void)(fd))
-#endif
+# ifndef COMMON_SYSCALL_FD_RELEASE
+# define COMMON_SYSCALL_FD_RELEASE(fd) ((void)(fd))
+# endif
-#ifndef COMMON_SYSCALL_PRE_FORK
-# define COMMON_SYSCALL_PRE_FORK() {}
-#endif
+# ifndef COMMON_SYSCALL_PRE_FORK
+# define COMMON_SYSCALL_PRE_FORK() \
+ {}
+# endif
-#ifndef COMMON_SYSCALL_POST_FORK
-# define COMMON_SYSCALL_POST_FORK(res) {}
-#endif
+# ifndef COMMON_SYSCALL_POST_FORK
+# define COMMON_SYSCALL_POST_FORK(res) \
+ {}
+# endif
// FIXME: do some kind of PRE_READ for all syscall arguments (int(s) and such).
@@ -130,8 +132,8 @@ struct sanitizer_kernel_sockaddr {
// Declare it "void" to catch sizeof(kernel_sigset_t).
typedef void kernel_sigset_t;
-static void kernel_write_iovec(const __sanitizer_iovec *iovec,
- SIZE_T iovlen, SIZE_T maxlen) {
+static void kernel_write_iovec(const __sanitizer_iovec *iovec, SIZE_T iovlen,
+ SIZE_T maxlen) {
for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
SSIZE_T sz = Min(iovec[i].iov_len, maxlen);
POST_WRITE(iovec[i].iov_base, sz);
@@ -141,8 +143,8 @@ static void kernel_write_iovec(const __sanitizer_iovec *iovec,
// This functions uses POST_READ, because it needs to run after syscall to know
// the real read range.
-static void kernel_read_iovec(const __sanitizer_iovec *iovec,
- SIZE_T iovlen, SIZE_T maxlen) {
+static void kernel_read_iovec(const __sanitizer_iovec *iovec, SIZE_T iovlen,
+ SIZE_T maxlen) {
POST_READ(iovec, sizeof(*iovec) * iovlen);
for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
SSIZE_T sz = Min(iovec[i].iov_len, maxlen);
@@ -155,8 +157,8 @@ PRE_SYSCALL(recvmsg)(long sockfd, sanitizer_kernel_msghdr *msg, long flags) {
PRE_READ(msg, sizeof(*msg));
}
-POST_SYSCALL(recvmsg)(long res, long sockfd, sanitizer_kernel_msghdr *msg,
- long flags) {
+POST_SYSCALL(recvmsg)
+(long res, long sockfd, sanitizer_kernel_msghdr *msg, long flags) {
if (res >= 0) {
if (msg) {
for (unsigned long i = 0; i < msg->msg_iovlen; ++i) {
@@ -167,13 +169,14 @@ POST_SYSCALL(recvmsg)(long res, long sockfd, sanitizer_kernel_msghdr *msg,
}
}
-PRE_SYSCALL(recvmmsg)(long fd, sanitizer_kernel_mmsghdr *msg, long vlen,
- long flags, void *timeout) {
+PRE_SYSCALL(recvmmsg)
+(long fd, sanitizer_kernel_mmsghdr *msg, long vlen, long flags, void *timeout) {
PRE_READ(msg, vlen * sizeof(*msg));
}
-POST_SYSCALL(recvmmsg)(long res, long fd, sanitizer_kernel_mmsghdr *msg,
- long vlen, long flags, void *timeout) {
+POST_SYSCALL(recvmmsg)
+(long res, long fd, sanitizer_kernel_mmsghdr *msg, long vlen, long flags,
+ void *timeout) {
if (res >= 0) {
if (msg) {
for (unsigned long i = 0; i < msg->msg_hdr.msg_iovlen; ++i) {
@@ -183,7 +186,8 @@ POST_SYSCALL(recvmmsg)(long res, long fd, sanitizer_kernel_mmsghdr *msg,
POST_WRITE(msg->msg_hdr.msg_control, msg->msg_hdr.msg_controllen);
POST_WRITE(&msg->msg_len, sizeof(msg->msg_len));
}
- if (timeout) POST_WRITE(timeout, struct_timespec_sz);
+ if (timeout)
+ POST_WRITE(timeout, struct_timespec_sz);
}
}
@@ -203,7 +207,8 @@ PRE_SYSCALL(time)(void *tloc) {}
POST_SYSCALL(time)(long res, void *tloc) {
if (res >= 0) {
- if (tloc) POST_WRITE(tloc, sizeof(long));
+ if (tloc)
+ POST_WRITE(tloc, sizeof(long));
}
}
@@ -211,7 +216,8 @@ PRE_SYSCALL(stime)(void *tptr) {}
POST_SYSCALL(stime)(long res, void *tptr) {
if (res >= 0) {
- if (tptr) POST_WRITE(tptr, sizeof(long));
+ if (tptr)
+ POST_WRITE(tptr, sizeof(long));
}
}
@@ -219,8 +225,10 @@ PRE_SYSCALL(gettimeofday)(void *tv, void *tz) {}
POST_SYSCALL(gettimeofday)(long res, void *tv, void *tz) {
if (res >= 0) {
- if (tv) POST_WRITE(tv, timeval_sz);
- if (tz) POST_WRITE(tz, struct_timezone_sz);
+ if (tv)
+ POST_WRITE(tv, timeval_sz);
+ if (tz)
+ POST_WRITE(tz, struct_timezone_sz);
}
}
@@ -228,26 +236,30 @@ PRE_SYSCALL(settimeofday)(void *tv, void *tz) {}
POST_SYSCALL(settimeofday)(long res, void *tv, void *tz) {
if (res >= 0) {
- if (tv) POST_WRITE(tv, timeval_sz);
- if (tz) POST_WRITE(tz, struct_timezone_sz);
+ if (tv)
+ POST_WRITE(tv, timeval_sz);
+ if (tz)
+ POST_WRITE(tz, struct_timezone_sz);
}
}
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
PRE_SYSCALL(adjtimex)(void *txc_p) {}
POST_SYSCALL(adjtimex)(long res, void *txc_p) {
if (res >= 0) {
- if (txc_p) POST_WRITE(txc_p, struct_timex_sz);
+ if (txc_p)
+ POST_WRITE(txc_p, struct_timex_sz);
}
}
-#endif
+# endif
PRE_SYSCALL(times)(void *tbuf) {}
POST_SYSCALL(times)(long res, void *tbuf) {
if (res >= 0) {
- if (tbuf) POST_WRITE(tbuf, struct_tms_sz);
+ if (tbuf)
+ POST_WRITE(tbuf, struct_tms_sz);
}
}
@@ -259,8 +271,10 @@ PRE_SYSCALL(nanosleep)(void *rqtp, void *rmtp) {}
POST_SYSCALL(nanosleep)(long res, void *rqtp, void *rmtp) {
if (res >= 0) {
- if (rqtp) POST_WRITE(rqtp, struct_timespec_sz);
- if (rmtp) POST_WRITE(rmtp, struct_timespec_sz);
+ if (rqtp)
+ POST_WRITE(rqtp, struct_timespec_sz);
+ if (rmtp)
+ POST_WRITE(rmtp, struct_timespec_sz);
}
}
@@ -296,9 +310,12 @@ PRE_SYSCALL(getresuid)(void *ruid, void *euid, void *suid) {}
POST_SYSCALL(getresuid)(long res, void *ruid, void *euid, void *suid) {
if (res >= 0) {
- if (ruid) POST_WRITE(ruid, sizeof(unsigned));
- if (euid) POST_WRITE(euid, sizeof(unsigned));
- if (suid) POST_WRITE(suid, sizeof(unsigned));
+ if (ruid)
+ POST_WRITE(ruid, sizeof(unsigned));
+ if (euid)
+ POST_WRITE(euid, sizeof(unsigned));
+ if (suid)
+ POST_WRITE(suid, sizeof(unsigned));
}
}
@@ -306,9 +323,12 @@ PRE_SYSCALL(getresgid)(void *rgid, void *egid, void *sgid) {}
POST_SYSCALL(getresgid)(long res, void *rgid, void *egid, void *sgid) {
if (res >= 0) {
- if (rgid) POST_WRITE(rgid, sizeof(unsigned));
- if (egid) POST_WRITE(egid, sizeof(unsigned));
- if (sgid) POST_WRITE(sgid, sizeof(unsigned));
+ if (rgid)
+ POST_WRITE(rgid, sizeof(unsigned));
+ if (egid)
+ POST_WRITE(egid, sizeof(unsigned));
+ if (sgid)
+ POST_WRITE(sgid, sizeof(unsigned));
}
}
@@ -326,10 +346,11 @@ POST_SYSCALL(getsid)(long res, long pid) {}
PRE_SYSCALL(getgroups)(long gidsetsize, void *grouplist) {}
-POST_SYSCALL(getgroups)(long res, long gidsetsize,
- __sanitizer___kernel_gid_t *grouplist) {
+POST_SYSCALL(getgroups)
+(long res, long gidsetsize, __sanitizer___kernel_gid_t *grouplist) {
if (res >= 0) {
- if (grouplist) POST_WRITE(grouplist, res * sizeof(*grouplist));
+ if (grouplist)
+ POST_WRITE(grouplist, res * sizeof(*grouplist));
}
}
@@ -374,11 +395,12 @@ PRE_SYSCALL(setsid)() {}
POST_SYSCALL(setsid)(long res) {}
PRE_SYSCALL(setgroups)(long gidsetsize, __sanitizer___kernel_gid_t *grouplist) {
- if (grouplist) POST_WRITE(grouplist, gidsetsize * sizeof(*grouplist));
+ if (grouplist)
+ POST_WRITE(grouplist, gidsetsize * sizeof(*grouplist));
}
-POST_SYSCALL(setgroups)(long res, long gidsetsize,
- __sanitizer___kernel_gid_t *grouplist) {}
+POST_SYSCALL(setgroups)
+(long res, long gidsetsize, __sanitizer___kernel_gid_t *grouplist) {}
PRE_SYSCALL(acct)(const void *name) {
if (name)
@@ -388,17 +410,21 @@ PRE_SYSCALL(acct)(const void *name) {
POST_SYSCALL(acct)(long res, const void *name) {}
PRE_SYSCALL(capget)(void *header, void *dataptr) {
- if (header) PRE_READ(header, __user_cap_header_struct_sz);
+ if (header)
+ PRE_READ(header, __user_cap_header_struct_sz);
}
POST_SYSCALL(capget)(long res, void *header, void *dataptr) {
if (res >= 0)
- if (dataptr) POST_WRITE(dataptr, __user_cap_data_struct_sz);
+ if (dataptr)
+ POST_WRITE(dataptr, __user_cap_data_struct_sz);
}
PRE_SYSCALL(capset)(void *header, const void *data) {
- if (header) PRE_READ(header, __user_cap_header_struct_sz);
- if (data) PRE_READ(data, __user_cap_data_struct_sz);
+ if (header)
+ PRE_READ(header, __user_cap_header_struct_sz);
+ if (data)
+ PRE_READ(data, __user_cap_data_struct_sz);
}
POST_SYSCALL(capset)(long res, void *header, const void *data) {}
@@ -411,7 +437,8 @@ PRE_SYSCALL(sigpending)(void *set) {}
POST_SYSCALL(sigpending)(long res, void *set) {
if (res >= 0) {
- if (set) POST_WRITE(set, old_sigset_t_sz);
+ if (set)
+ POST_WRITE(set, old_sigset_t_sz);
}
}
@@ -419,8 +446,10 @@ PRE_SYSCALL(sigprocmask)(long how, void *set, void *oset) {}
POST_SYSCALL(sigprocmask)(long res, long how, void *set, void *oset) {
if (res >= 0) {
- if (set) POST_WRITE(set, old_sigset_t_sz);
- if (oset) POST_WRITE(oset, old_sigset_t_sz);
+ if (set)
+ POST_WRITE(set, old_sigset_t_sz);
+ if (oset)
+ POST_WRITE(oset, old_sigset_t_sz);
}
}
@@ -428,7 +457,8 @@ PRE_SYSCALL(getitimer)(long which, void *value) {}
POST_SYSCALL(getitimer)(long res, long which, void *value) {
if (res >= 0) {
- if (value) POST_WRITE(value, struct_itimerval_sz);
+ if (value)
+ POST_WRITE(value, struct_itimerval_sz);
}
}
@@ -436,19 +466,23 @@ PRE_SYSCALL(setitimer)(long which, void *value, void *ovalue) {}
POST_SYSCALL(setitimer)(long res, long which, void *value, void *ovalue) {
if (res >= 0) {
- if (value) POST_WRITE(value, struct_itimerval_sz);
- if (ovalue) POST_WRITE(ovalue, struct_itimerval_sz);
+ if (value)
+ POST_WRITE(value, struct_itimerval_sz);
+ if (ovalue)
+ POST_WRITE(ovalue, struct_itimerval_sz);
}
}
-PRE_SYSCALL(timer_create)(long which_clock, void *timer_event_spec,
- void *created_timer_id) {}
+PRE_SYSCALL(timer_create)
+(long which_clock, void *timer_event_spec, void *created_timer_id) {}
-POST_SYSCALL(timer_create)(long res, long which_clock, void *timer_event_spec,
- void *created_timer_id) {
+POST_SYSCALL(timer_create)
+(long res, long which_clock, void *timer_event_spec, void *created_timer_id) {
if (res >= 0) {
- if (timer_event_spec) POST_WRITE(timer_event_spec, struct_sigevent_sz);
- if (created_timer_id) POST_WRITE(created_timer_id, sizeof(long));
+ if (timer_event_spec)
+ POST_WRITE(timer_event_spec, struct_sigevent_sz);
+ if (created_timer_id)
+ POST_WRITE(created_timer_id, sizeof(long));
}
}
@@ -456,7 +490,8 @@ PRE_SYSCALL(timer_gettime)(long timer_id, void *setting) {}
POST_SYSCALL(timer_gettime)(long res, long timer_id, void *setting) {
if (res >= 0) {
- if (setting) POST_WRITE(setting, struct_itimerspec_sz);
+ if (setting)
+ POST_WRITE(setting, struct_itimerspec_sz);
}
}
@@ -464,15 +499,18 @@ PRE_SYSCALL(timer_getoverrun)(long timer_id) {}
POST_SYSCALL(timer_getoverrun)(long res, long timer_id) {}
-PRE_SYSCALL(timer_settime)(long timer_id, long flags, const void *new_setting,
- void *old_setting) {
- if (new_setting) PRE_READ(new_setting, struct_itimerspec_sz);
+PRE_SYSCALL(timer_settime)
+(long timer_id, long flags, const void *new_setting, void *old_setting) {
+ if (new_setting)
+ PRE_READ(new_setting, struct_itimerspec_sz);
}
-POST_SYSCALL(timer_settime)(long res, long timer_id, long flags,
- const void *new_setting, void *old_setting) {
+POST_SYSCALL(timer_settime)
+(long res, long timer_id, long flags, const void *new_setting,
+ void *old_setting) {
if (res >= 0) {
- if (old_setting) POST_WRITE(old_setting, struct_itimerspec_sz);
+ if (old_setting)
+ POST_WRITE(old_setting, struct_itimerspec_sz);
}
}
@@ -481,7 +519,8 @@ PRE_SYSCALL(timer_delete)(long timer_id) {}
POST_SYSCALL(timer_delete)(long res, long timer_id) {}
PRE_SYSCALL(clock_settime)(long which_clock, const void *tp) {
- if (tp) PRE_READ(tp, struct_timespec_sz);
+ if (tp)
+ PRE_READ(tp, struct_timespec_sz);
}
POST_SYSCALL(clock_settime)(long res, long which_clock, const void *tp) {}
@@ -490,37 +529,42 @@ PRE_SYSCALL(clock_gettime)(long which_clock, void *tp) {}
POST_SYSCALL(clock_gettime)(long res, long which_clock, void *tp) {
if (res >= 0) {
- if (tp) POST_WRITE(tp, struct_timespec_sz);
+ if (tp)
+ POST_WRITE(tp, struct_timespec_sz);
}
}
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
PRE_SYSCALL(clock_adjtime)(long which_clock, void *tx) {}
POST_SYSCALL(clock_adjtime)(long res, long which_clock, void *tx) {
if (res >= 0) {
- if (tx) POST_WRITE(tx, struct_timex_sz);
+ if (tx)
+ POST_WRITE(tx, struct_timex_sz);
}
}
-#endif
+# endif
PRE_SYSCALL(clock_getres)(long which_clock, void *tp) {}
POST_SYSCALL(clock_getres)(long res, long which_clock, void *tp) {
if (res >= 0) {
- if (tp) POST_WRITE(tp, struct_timespec_sz);
+ if (tp)
+ POST_WRITE(tp, struct_timespec_sz);
}
}
-PRE_SYSCALL(clock_nanosleep)(long which_clock, long flags, const void *rqtp,
- void *rmtp) {
- if (rqtp) PRE_READ(rqtp, struct_timespec_sz);
+PRE_SYSCALL(clock_nanosleep)
+(long which_clock, long flags, const void *rqtp, void *rmtp) {
+ if (rqtp)
+ PRE_READ(rqtp, struct_timespec_sz);
}
-POST_SYSCALL(clock_nanosleep)(long res, long which_clock, long flags,
- const void *rqtp, void *rmtp) {
+POST_SYSCALL(clock_nanosleep)
+(long res, long which_clock, long flags, const void *rqtp, void *rmtp) {
if (res >= 0) {
- if (rmtp) POST_WRITE(rmtp, struct_timespec_sz);
+ if (rmtp)
+ POST_WRITE(rmtp, struct_timespec_sz);
}
}
@@ -532,12 +576,14 @@ PRE_SYSCALL(sched_setscheduler)(long pid, long policy, void *param) {}
POST_SYSCALL(sched_setscheduler)(long res, long pid, long policy, void *param) {
if (res >= 0) {
- if (param) POST_WRITE(param, struct_sched_param_sz);
+ if (param)
+ POST_WRITE(param, struct_sched_param_sz);
}
}
PRE_SYSCALL(sched_setparam)(long pid, void *param) {
- if (param) PRE_READ(param, struct_sched_param_sz);
+ if (param)
+ PRE_READ(param, struct_sched_param_sz);
}
POST_SYSCALL(sched_setparam)(long res, long pid, void *param) {}
@@ -550,23 +596,26 @@ PRE_SYSCALL(sched_getparam)(long pid, void *param) {}
POST_SYSCALL(sched_getparam)(long res, long pid, void *param) {
if (res >= 0) {
- if (param) POST_WRITE(param, struct_sched_param_sz);
+ if (param)
+ POST_WRITE(param, struct_sched_param_sz);
}
}
PRE_SYSCALL(sched_setaffinity)(long pid, long len, void *user_mask_ptr) {
- if (user_mask_ptr) PRE_READ(user_mask_ptr, len);
+ if (user_mask_ptr)
+ PRE_READ(user_mask_ptr, len);
}
-POST_SYSCALL(sched_setaffinity)(long res, long pid, long len,
- void *user_mask_ptr) {}
+POST_SYSCALL(sched_setaffinity)
+(long res, long pid, long len, void *user_mask_ptr) {}
PRE_SYSCALL(sched_getaffinity)(long pid, long len, void *user_mask_ptr) {}
-POST_SYSCALL(sched_getaffinity)(long res, long pid, long len,
- void *user_mask_ptr) {
+POST_SYSCALL(sched_getaffinity)
+(long res, long pid, long len, void *user_mask_ptr) {
if (res >= 0) {
- if (user_mask_ptr) POST_WRITE(user_mask_ptr, len);
+ if (user_mask_ptr)
+ POST_WRITE(user_mask_ptr, len);
}
}
@@ -586,7 +635,8 @@ PRE_SYSCALL(sched_rr_get_interval)(long pid, void *interval) {}
POST_SYSCALL(sched_rr_get_interval)(long res, long pid, void *interval) {
if (res >= 0) {
- if (interval) POST_WRITE(interval, struct_timespec_sz);
+ if (interval)
+ POST_WRITE(interval, struct_timespec_sz);
}
}
@@ -610,13 +660,14 @@ PRE_SYSCALL(restart_syscall)() {}
POST_SYSCALL(restart_syscall)(long res) {}
-PRE_SYSCALL(kexec_load)(long entry, long nr_segments, void *segments,
- long flags) {}
+PRE_SYSCALL(kexec_load)
+(long entry, long nr_segments, void *segments, long flags) {}
-POST_SYSCALL(kexec_load)(long res, long entry, long nr_segments, void *segments,
- long flags) {
+POST_SYSCALL(kexec_load)
+(long res, long entry, long nr_segments, void *segments, long flags) {
if (res >= 0) {
- if (segments) POST_WRITE(segments, struct_kexec_segment_sz);
+ if (segments)
+ POST_WRITE(segments, struct_kexec_segment_sz);
}
}
@@ -630,22 +681,26 @@ POST_SYSCALL(exit_group)(long res, long error_code) {}
PRE_SYSCALL(wait4)(long pid, void *stat_addr, long options, void *ru) {}
-POST_SYSCALL(wait4)(long res, long pid, void *stat_addr, long options,
- void *ru) {
+POST_SYSCALL(wait4)
+(long res, long pid, void *stat_addr, long options, void *ru) {
if (res >= 0) {
- if (stat_addr) POST_WRITE(stat_addr, sizeof(int));
- if (ru) POST_WRITE(ru, struct_rusage_sz);
+ if (stat_addr)
+ POST_WRITE(stat_addr, sizeof(int));
+ if (ru)
+ POST_WRITE(ru, struct_rusage_sz);
}
}
-PRE_SYSCALL(waitid)(long which, long pid, void *infop, long options, void *ru) {
-}
+PRE_SYSCALL(waitid)
+(long which, long pid, void *infop, long options, void *ru) {}
-POST_SYSCALL(waitid)(long res, long which, long pid, void *infop, long options,
- void *ru) {
+POST_SYSCALL(waitid)
+(long res, long which, long pid, void *infop, long options, void *ru) {
if (res >= 0) {
- if (infop) POST_WRITE(infop, siginfo_t_sz);
- if (ru) POST_WRITE(ru, struct_rusage_sz);
+ if (infop)
+ POST_WRITE(infop, siginfo_t_sz);
+ if (ru)
+ POST_WRITE(ru, struct_rusage_sz);
}
}
@@ -653,7 +708,8 @@ PRE_SYSCALL(waitpid)(long pid, void *stat_addr, long options) {}
POST_SYSCALL(waitpid)(long res, long pid, void *stat_addr, long options) {
if (res >= 0) {
- if (stat_addr) POST_WRITE(stat_addr, sizeof(int));
+ if (stat_addr)
+ POST_WRITE(stat_addr, sizeof(int));
}
}
@@ -661,7 +717,8 @@ PRE_SYSCALL(set_tid_address)(void *tidptr) {}
POST_SYSCALL(set_tid_address)(long res, void *tidptr) {
if (res >= 0) {
- if (tidptr) POST_WRITE(tidptr, sizeof(int));
+ if (tidptr)
+ POST_WRITE(tidptr, sizeof(int));
}
}
@@ -682,11 +739,14 @@ POST_SYSCALL(delete_module)(long res, const void *name_user, long flags) {}
PRE_SYSCALL(rt_sigprocmask)(long how, void *set, void *oset, long sigsetsize) {}
-POST_SYSCALL(rt_sigprocmask)(long res, long how, kernel_sigset_t *set,
- kernel_sigset_t *oset, long sigsetsize) {
+POST_SYSCALL(rt_sigprocmask)
+(long res, long how, kernel_sigset_t *set, kernel_sigset_t *oset,
+ long sigsetsize) {
if (res >= 0) {
- if (set) POST_WRITE(set, sigsetsize);
- if (oset) POST_WRITE(oset, sigsetsize);
+ if (set)
+ POST_WRITE(set, sigsetsize);
+ if (oset)
+ POST_WRITE(oset, sigsetsize);
}
}
@@ -694,29 +754,34 @@ PRE_SYSCALL(rt_sigpending)(void *set, long sigsetsize) {}
POST_SYSCALL(rt_sigpending)(long res, kernel_sigset_t *set, long sigsetsize) {
if (res >= 0) {
- if (set) POST_WRITE(set, sigsetsize);
+ if (set)
+ POST_WRITE(set, sigsetsize);
}
}
-PRE_SYSCALL(rt_sigtimedwait)(const kernel_sigset_t *uthese, void *uinfo,
- const void *uts, long sigsetsize) {
- if (uthese) PRE_READ(uthese, sigsetsize);
- if (uts) PRE_READ(uts, struct_timespec_sz);
+PRE_SYSCALL(rt_sigtimedwait)
+(const kernel_sigset_t *uthese, void *uinfo, const void *uts, long sigsetsize) {
+ if (uthese)
+ PRE_READ(uthese, sigsetsize);
+ if (uts)
+ PRE_READ(uts, struct_timespec_sz);
}
-POST_SYSCALL(rt_sigtimedwait)(long res, const void *uthese, void *uinfo,
- const void *uts, long sigsetsize) {
+POST_SYSCALL(rt_sigtimedwait)
+(long res, const void *uthese, void *uinfo, const void *uts, long sigsetsize) {
if (res >= 0) {
- if (uinfo) POST_WRITE(uinfo, siginfo_t_sz);
+ if (uinfo)
+ POST_WRITE(uinfo, siginfo_t_sz);
}
}
PRE_SYSCALL(rt_tgsigqueueinfo)(long tgid, long pid, long sig, void *uinfo) {}
-POST_SYSCALL(rt_tgsigqueueinfo)(long res, long tgid, long pid, long sig,
- void *uinfo) {
+POST_SYSCALL(rt_tgsigqueueinfo)
+(long res, long tgid, long pid, long sig, void *uinfo) {
if (res >= 0) {
- if (uinfo) POST_WRITE(uinfo, siginfo_t_sz);
+ if (uinfo)
+ POST_WRITE(uinfo, siginfo_t_sz);
}
}
@@ -736,7 +801,8 @@ PRE_SYSCALL(rt_sigqueueinfo)(long pid, long sig, void *uinfo) {}
POST_SYSCALL(rt_sigqueueinfo)(long res, long pid, long sig, void *uinfo) {
if (res >= 0) {
- if (uinfo) POST_WRITE(uinfo, siginfo_t_sz);
+ if (uinfo)
+ POST_WRITE(uinfo, siginfo_t_sz);
}
}
@@ -772,11 +838,11 @@ PRE_SYSCALL(bdflush)(long func, long data) {}
POST_SYSCALL(bdflush)(long res, long func, long data) {}
-PRE_SYSCALL(mount)(void *dev_name, void *dir_name, void *type, long flags,
- void *data) {}
+PRE_SYSCALL(mount)
+(void *dev_name, void *dir_name, void *type, long flags, void *data) {}
-POST_SYSCALL(mount)(long res, void *dev_name, void *dir_name, void *type,
- long flags, void *data) {
+POST_SYSCALL(mount)
+(long res, void *dev_name, void *dir_name, void *type, long flags, void *data) {
if (res >= 0) {
if (dev_name)
POST_WRITE(dev_name,
@@ -826,11 +892,12 @@ PRE_SYSCALL(stat)(const void *filename, void *statbuf) {
POST_SYSCALL(stat)(long res, const void *filename, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct___old_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct___old_kernel_stat_sz);
}
}
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
PRE_SYSCALL(statfs)(const void *path, void *buf) {
if (path)
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
@@ -838,7 +905,8 @@ PRE_SYSCALL(statfs)(const void *path, void *buf) {
POST_SYSCALL(statfs)(long res, const void *path, void *buf) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, struct_statfs_sz);
+ if (buf)
+ POST_WRITE(buf, struct_statfs_sz);
}
}
@@ -849,7 +917,8 @@ PRE_SYSCALL(statfs64)(const void *path, long sz, void *buf) {
POST_SYSCALL(statfs64)(long res, const void *path, long sz, void *buf) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, struct_statfs64_sz);
+ if (buf)
+ POST_WRITE(buf, struct_statfs64_sz);
}
}
@@ -857,7 +926,8 @@ PRE_SYSCALL(fstatfs)(long fd, void *buf) {}
POST_SYSCALL(fstatfs)(long res, long fd, void *buf) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, struct_statfs_sz);
+ if (buf)
+ POST_WRITE(buf, struct_statfs_sz);
}
}
@@ -865,10 +935,11 @@ PRE_SYSCALL(fstatfs64)(long fd, long sz, void *buf) {}
POST_SYSCALL(fstatfs64)(long res, long fd, long sz, void *buf) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, struct_statfs64_sz);
+ if (buf)
+ POST_WRITE(buf, struct_statfs64_sz);
}
}
-#endif // !SANITIZER_ANDROID
+# endif // !SANITIZER_ANDROID
PRE_SYSCALL(lstat)(const void *filename, void *statbuf) {
if (filename)
@@ -878,7 +949,8 @@ PRE_SYSCALL(lstat)(const void *filename, void *statbuf) {
POST_SYSCALL(lstat)(long res, const void *filename, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct___old_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct___old_kernel_stat_sz);
}
}
@@ -886,7 +958,8 @@ PRE_SYSCALL(fstat)(long fd, void *statbuf) {}
POST_SYSCALL(fstat)(long res, long fd, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct___old_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct___old_kernel_stat_sz);
}
}
@@ -898,7 +971,8 @@ PRE_SYSCALL(newstat)(const void *filename, void *statbuf) {
POST_SYSCALL(newstat)(long res, const void *filename, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat_sz);
}
}
@@ -910,7 +984,8 @@ PRE_SYSCALL(newlstat)(const void *filename, void *statbuf) {
POST_SYSCALL(newlstat)(long res, const void *filename, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat_sz);
}
}
@@ -918,19 +993,21 @@ PRE_SYSCALL(newfstat)(long fd, void *statbuf) {}
POST_SYSCALL(newfstat)(long res, long fd, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat_sz);
}
}
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
PRE_SYSCALL(ustat)(long dev, void *ubuf) {}
POST_SYSCALL(ustat)(long res, long dev, void *ubuf) {
if (res >= 0) {
- if (ubuf) POST_WRITE(ubuf, struct_ustat_sz);
+ if (ubuf)
+ POST_WRITE(ubuf, struct_ustat_sz);
}
}
-#endif // !SANITIZER_ANDROID
+# endif // !SANITIZER_ANDROID
PRE_SYSCALL(stat64)(const void *filename, void *statbuf) {
if (filename)
@@ -940,7 +1017,8 @@ PRE_SYSCALL(stat64)(const void *filename, void *statbuf) {
POST_SYSCALL(stat64)(long res, const void *filename, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat64_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat64_sz);
}
}
@@ -948,7 +1026,8 @@ PRE_SYSCALL(fstat64)(long fd, void *statbuf) {}
POST_SYSCALL(fstat64)(long res, long fd, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat64_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat64_sz);
}
}
@@ -960,71 +1039,80 @@ PRE_SYSCALL(lstat64)(const void *filename, void *statbuf) {
POST_SYSCALL(lstat64)(long res, const void *filename, void *statbuf) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat64_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat64_sz);
}
}
-PRE_SYSCALL(setxattr)(const void *path, const void *name, const void *value,
- long size, long flags) {
+PRE_SYSCALL(setxattr)
+(const void *path, const void *name, const void *value, long size, long flags) {
if (path)
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
if (name)
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
- if (value) PRE_READ(value, size);
+ if (value)
+ PRE_READ(value, size);
}
-POST_SYSCALL(setxattr)(long res, const void *path, const void *name,
- const void *value, long size, long flags) {}
+POST_SYSCALL(setxattr)
+(long res, const void *path, const void *name, const void *value, long size,
+ long flags) {}
-PRE_SYSCALL(lsetxattr)(const void *path, const void *name, const void *value,
- long size, long flags) {
+PRE_SYSCALL(lsetxattr)
+(const void *path, const void *name, const void *value, long size, long flags) {
if (path)
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
if (name)
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
- if (value) PRE_READ(value, size);
+ if (value)
+ PRE_READ(value, size);
}
-POST_SYSCALL(lsetxattr)(long res, const void *path, const void *name,
- const void *value, long size, long flags) {}
+POST_SYSCALL(lsetxattr)
+(long res, const void *path, const void *name, const void *value, long size,
+ long flags) {}
-PRE_SYSCALL(fsetxattr)(long fd, const void *name, const void *value, long size,
- long flags) {
+PRE_SYSCALL(fsetxattr)
+(long fd, const void *name, const void *value, long size, long flags) {
if (name)
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
- if (value) PRE_READ(value, size);
+ if (value)
+ PRE_READ(value, size);
}
-POST_SYSCALL(fsetxattr)(long res, long fd, const void *name, const void *value,
- long size, long flags) {}
+POST_SYSCALL(fsetxattr)
+(long res, long fd, const void *name, const void *value, long size,
+ long flags) {}
-PRE_SYSCALL(getxattr)(const void *path, const void *name, void *value,
- long size) {
+PRE_SYSCALL(getxattr)
+(const void *path, const void *name, void *value, long size) {
if (path)
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
if (name)
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
}
-POST_SYSCALL(getxattr)(long res, const void *path, const void *name,
- void *value, long size) {
+POST_SYSCALL(getxattr)
+(long res, const void *path, const void *name, void *value, long size) {
if (size && res > 0) {
- if (value) POST_WRITE(value, res);
+ if (value)
+ POST_WRITE(value, res);
}
}
-PRE_SYSCALL(lgetxattr)(const void *path, const void *name, void *value,
- long size) {
+PRE_SYSCALL(lgetxattr)
+(const void *path, const void *name, void *value, long size) {
if (path)
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
if (name)
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
}
-POST_SYSCALL(lgetxattr)(long res, const void *path, const void *name,
- void *value, long size) {
+POST_SYSCALL(lgetxattr)
+(long res, const void *path, const void *name, void *value, long size) {
if (size && res > 0) {
- if (value) POST_WRITE(value, res);
+ if (value)
+ POST_WRITE(value, res);
}
}
@@ -1033,10 +1121,11 @@ PRE_SYSCALL(fgetxattr)(long fd, const void *name, void *value, long size) {
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
}
-POST_SYSCALL(fgetxattr)(long res, long fd, const void *name, void *value,
- long size) {
+POST_SYSCALL(fgetxattr)
+(long res, long fd, const void *name, void *value, long size) {
if (size && res > 0) {
- if (value) POST_WRITE(value, res);
+ if (value)
+ POST_WRITE(value, res);
}
}
@@ -1047,7 +1136,8 @@ PRE_SYSCALL(listxattr)(const void *path, void *list, long size) {
POST_SYSCALL(listxattr)(long res, const void *path, void *list, long size) {
if (size && res > 0) {
- if (list) POST_WRITE(list, res);
+ if (list)
+ POST_WRITE(list, res);
}
}
@@ -1058,7 +1148,8 @@ PRE_SYSCALL(llistxattr)(const void *path, void *list, long size) {
POST_SYSCALL(llistxattr)(long res, const void *path, void *list, long size) {
if (size && res > 0) {
- if (list) POST_WRITE(list, res);
+ if (list)
+ POST_WRITE(list, res);
}
}
@@ -1066,7 +1157,8 @@ PRE_SYSCALL(flistxattr)(long fd, void *list, long size) {}
POST_SYSCALL(flistxattr)(long res, long fd, void *list, long size) {
if (size && res > 0) {
- if (list) POST_WRITE(list, res);
+ if (list)
+ POST_WRITE(list, res);
}
}
@@ -1103,17 +1195,17 @@ PRE_SYSCALL(mprotect)(long start, long len, long prot) {}
POST_SYSCALL(mprotect)(long res, long start, long len, long prot) {}
-PRE_SYSCALL(mremap)(long addr, long old_len, long new_len, long flags,
- long new_addr) {}
+PRE_SYSCALL(mremap)
+(long addr, long old_len, long new_len, long flags, long new_addr) {}
-POST_SYSCALL(mremap)(long res, long addr, long old_len, long new_len,
- long flags, long new_addr) {}
+POST_SYSCALL(mremap)
+(long res, long addr, long old_len, long new_len, long flags, long new_addr) {}
-PRE_SYSCALL(remap_file_pages)(long start, long size, long prot, long pgoff,
- long flags) {}
+PRE_SYSCALL(remap_file_pages)
+(long start, long size, long prot, long pgoff, long flags) {}
-POST_SYSCALL(remap_file_pages)(long res, long start, long size, long prot,
- long pgoff, long flags) {}
+POST_SYSCALL(remap_file_pages)
+(long res, long start, long size, long prot, long pgoff, long flags) {}
PRE_SYSCALL(msync)(long start, long len, long flags) {}
@@ -1189,7 +1281,8 @@ PRE_SYSCALL(link)(const void *oldname, const void *newname) {
POST_SYSCALL(link)(long res, const void *oldname, const void *newname) {}
PRE_SYSCALL(symlink)(const void *old, const void *new_) {
- if (old) PRE_READ(old, __sanitizer::internal_strlen((const char *)old) + 1);
+ if (old)
+ PRE_READ(old, __sanitizer::internal_strlen((const char *)old) + 1);
if (new_)
PRE_READ(new_, __sanitizer::internal_strlen((const char *)new_) + 1);
}
@@ -1237,14 +1330,16 @@ PRE_SYSCALL(pipe)(void *fildes) {}
POST_SYSCALL(pipe)(long res, void *fildes) {
if (res >= 0)
- if (fildes) POST_WRITE(fildes, sizeof(int) * 2);
+ if (fildes)
+ POST_WRITE(fildes, sizeof(int) * 2);
}
PRE_SYSCALL(pipe2)(void *fildes, long flags) {}
POST_SYSCALL(pipe2)(long res, void *fildes, long flags) {
if (res >= 0)
- if (fildes) POST_WRITE(fildes, sizeof(int) * 2);
+ if (fildes)
+ POST_WRITE(fildes, sizeof(int) * 2);
}
PRE_SYSCALL(dup)(long fildes) {}
@@ -1272,16 +1367,19 @@ PRE_SYSCALL(flock)(long fd, long cmd) {}
POST_SYSCALL(flock)(long res, long fd, long cmd) {}
PRE_SYSCALL(io_setup)(long nr_reqs, void **ctx) {
- if (ctx) PRE_WRITE(ctx, sizeof(*ctx));
+ if (ctx)
+ PRE_WRITE(ctx, sizeof(*ctx));
}
POST_SYSCALL(io_setup)(long res, long nr_reqs, void **ctx) {
if (res >= 0) {
- if (ctx) POST_WRITE(ctx, sizeof(*ctx));
+ if (ctx)
+ POST_WRITE(ctx, sizeof(*ctx));
// (*ctx) is actually a pointer to a kernel mapped page, and there are
// people out there who are crazy enough to peek into that page's 32-byte
// header.
- if (*ctx) POST_WRITE(*ctx, 32);
+ if (*ctx)
+ POST_WRITE(*ctx, 32);
}
}
@@ -1289,16 +1387,21 @@ PRE_SYSCALL(io_destroy)(long ctx) {}
POST_SYSCALL(io_destroy)(long res, long ctx) {}
-PRE_SYSCALL(io_getevents)(long ctx_id, long min_nr, long nr,
- __sanitizer_io_event *ioevpp, void *timeout) {
- if (timeout) PRE_READ(timeout, struct_timespec_sz);
+PRE_SYSCALL(io_getevents)
+(long ctx_id, long min_nr, long nr, __sanitizer_io_event *ioevpp,
+ void *timeout) {
+ if (timeout)
+ PRE_READ(timeout, struct_timespec_sz);
}
-POST_SYSCALL(io_getevents)(long res, long ctx_id, long min_nr, long nr,
- __sanitizer_io_event *ioevpp, void *timeout) {
+POST_SYSCALL(io_getevents)
+(long res, long ctx_id, long min_nr, long nr, __sanitizer_io_event *ioevpp,
+ void *timeout) {
if (res >= 0) {
- if (ioevpp) POST_WRITE(ioevpp, res * sizeof(*ioevpp));
- if (timeout) POST_WRITE(timeout, struct_timespec_sz);
+ if (ioevpp)
+ POST_WRITE(ioevpp, res * sizeof(*ioevpp));
+ if (timeout)
+ POST_WRITE(timeout, struct_timespec_sz);
}
for (long i = 0; i < res; i++) {
// We synchronize io_submit -> io_getevents/io_cancel using the
@@ -1308,26 +1411,26 @@ POST_SYSCALL(io_getevents)(long res, long ctx_id, long min_nr, long nr,
// synchronize on 0. But there does not seem to be a better solution
// (except wrapping all operations in own context, which is unreliable).
// We can not reliably extract fildes in io_getevents.
- COMMON_SYSCALL_ACQUIRE((void*)ioevpp[i].data);
+ COMMON_SYSCALL_ACQUIRE((void *)ioevpp[i].data);
}
}
PRE_SYSCALL(io_submit)(long ctx_id, long nr, __sanitizer_iocb **iocbpp) {
for (long i = 0; i < nr; ++i) {
uptr op = iocbpp[i]->aio_lio_opcode;
- void *data = (void*)iocbpp[i]->aio_data;
- void *buf = (void*)iocbpp[i]->aio_buf;
+ void *data = (void *)iocbpp[i]->aio_data;
+ void *buf = (void *)iocbpp[i]->aio_buf;
uptr len = (uptr)iocbpp[i]->aio_nbytes;
if (op == iocb_cmd_pwrite && buf && len) {
PRE_READ(buf, len);
} else if (op == iocb_cmd_pread && buf && len) {
POST_WRITE(buf, len);
} else if (op == iocb_cmd_pwritev) {
- __sanitizer_iovec *iovec = (__sanitizer_iovec*)buf;
+ __sanitizer_iovec *iovec = (__sanitizer_iovec *)buf;
for (uptr v = 0; v < len; v++)
PRE_READ(iovec[v].iov_base, iovec[v].iov_len);
} else if (op == iocb_cmd_preadv) {
- __sanitizer_iovec *iovec = (__sanitizer_iovec*)buf;
+ __sanitizer_iovec *iovec = (__sanitizer_iovec *)buf;
for (uptr v = 0; v < len; v++)
POST_WRITE(iovec[v].iov_base, iovec[v].iov_len);
}
@@ -1336,19 +1439,18 @@ PRE_SYSCALL(io_submit)(long ctx_id, long nr, __sanitizer_iocb **iocbpp) {
}
}
-POST_SYSCALL(io_submit)(long res, long ctx_id, long nr,
- __sanitizer_iocb **iocbpp) {}
+POST_SYSCALL(io_submit)
+(long res, long ctx_id, long nr, __sanitizer_iocb **iocbpp) {}
-PRE_SYSCALL(io_cancel)(long ctx_id, __sanitizer_iocb *iocb,
- __sanitizer_io_event *result) {
-}
+PRE_SYSCALL(io_cancel)
+(long ctx_id, __sanitizer_iocb *iocb, __sanitizer_io_event *result) {}
-POST_SYSCALL(io_cancel)(long res, long ctx_id, __sanitizer_iocb *iocb,
- __sanitizer_io_event *result) {
+POST_SYSCALL(io_cancel)
+(long res, long ctx_id, __sanitizer_iocb *iocb, __sanitizer_io_event *result) {
if (res == 0) {
if (result) {
// See comment in io_getevents.
- COMMON_SYSCALL_ACQUIRE((void*)result->data);
+ COMMON_SYSCALL_ACQUIRE((void *)result->data);
POST_WRITE(result, sizeof(*result));
}
if (iocb)
@@ -1358,19 +1460,23 @@ POST_SYSCALL(io_cancel)(long res, long ctx_id, __sanitizer_iocb *iocb,
PRE_SYSCALL(sendfile)(long out_fd, long in_fd, void *offset, long count) {}
-POST_SYSCALL(sendfile)(long res, long out_fd, long in_fd,
- __sanitizer___kernel_off_t *offset, long count) {
+POST_SYSCALL(sendfile)
+(long res, long out_fd, long in_fd, __sanitizer___kernel_off_t *offset,
+ long count) {
if (res >= 0) {
- if (offset) POST_WRITE(offset, sizeof(*offset));
+ if (offset)
+ POST_WRITE(offset, sizeof(*offset));
}
}
PRE_SYSCALL(sendfile64)(long out_fd, long in_fd, void *offset, long count) {}
-POST_SYSCALL(sendfile64)(long res, long out_fd, long in_fd,
- __sanitizer___kernel_loff_t *offset, long count) {
+POST_SYSCALL(sendfile64)
+(long res, long out_fd, long in_fd, __sanitizer___kernel_loff_t *offset,
+ long count) {
if (res >= 0) {
- if (offset) POST_WRITE(offset, sizeof(*offset));
+ if (offset)
+ POST_WRITE(offset, sizeof(*offset));
}
}
@@ -1402,9 +1508,7 @@ PRE_SYSCALL(open)(const void *filename, long flags, long mode) {
POST_SYSCALL(open)(long res, const void *filename, long flags, long mode) {}
-PRE_SYSCALL(close)(long fd) {
- COMMON_SYSCALL_FD_CLOSE((int)fd);
-}
+PRE_SYSCALL(close)(long fd) { COMMON_SYSCALL_FD_CLOSE((int)fd); }
POST_SYSCALL(close)(long res, long fd) {}
@@ -1440,7 +1544,7 @@ PRE_SYSCALL(fchown)(long fd, long user, long group) {}
POST_SYSCALL(fchown)(long res, long fd, long user, long group) {}
-#if SANITIZER_USES_UID16_SYSCALLS
+# if SANITIZER_USES_UID16_SYSCALLS
PRE_SYSCALL(chown16)(const void *filename, long user, long group) {
if (filename)
PRE_READ(filename,
@@ -1483,13 +1587,16 @@ POST_SYSCALL(setresuid16)(long res, long ruid, long euid, long suid) {}
PRE_SYSCALL(getresuid16)(void *ruid, void *euid, void *suid) {}
-POST_SYSCALL(getresuid16)(long res, __sanitizer___kernel_old_uid_t *ruid,
- __sanitizer___kernel_old_uid_t *euid,
- __sanitizer___kernel_old_uid_t *suid) {
+POST_SYSCALL(getresuid16)
+(long res, __sanitizer___kernel_old_uid_t *ruid,
+ __sanitizer___kernel_old_uid_t *euid, __sanitizer___kernel_old_uid_t *suid) {
if (res >= 0) {
- if (ruid) POST_WRITE(ruid, sizeof(*ruid));
- if (euid) POST_WRITE(euid, sizeof(*euid));
- if (suid) POST_WRITE(suid, sizeof(*suid));
+ if (ruid)
+ POST_WRITE(ruid, sizeof(*ruid));
+ if (euid)
+ POST_WRITE(euid, sizeof(*euid));
+ if (suid)
+ POST_WRITE(suid, sizeof(*suid));
}
}
@@ -1499,13 +1606,16 @@ POST_SYSCALL(setresgid16)(long res, long rgid, long egid, long sgid) {}
PRE_SYSCALL(getresgid16)(void *rgid, void *egid, void *sgid) {}
-POST_SYSCALL(getresgid16)(long res, __sanitizer___kernel_old_gid_t *rgid,
- __sanitizer___kernel_old_gid_t *egid,
- __sanitizer___kernel_old_gid_t *sgid) {
+POST_SYSCALL(getresgid16)
+(long res, __sanitizer___kernel_old_gid_t *rgid,
+ __sanitizer___kernel_old_gid_t *egid, __sanitizer___kernel_old_gid_t *sgid) {
if (res >= 0) {
- if (rgid) POST_WRITE(rgid, sizeof(*rgid));
- if (egid) POST_WRITE(egid, sizeof(*egid));
- if (sgid) POST_WRITE(sgid, sizeof(*sgid));
+ if (rgid)
+ POST_WRITE(rgid, sizeof(*rgid));
+ if (egid)
+ POST_WRITE(egid, sizeof(*egid));
+ if (sgid)
+ POST_WRITE(sgid, sizeof(*sgid));
}
}
@@ -1517,23 +1627,25 @@ PRE_SYSCALL(setfsgid16)(long gid) {}
POST_SYSCALL(setfsgid16)(long res, long gid) {}
-PRE_SYSCALL(getgroups16)(long gidsetsize,
- __sanitizer___kernel_old_gid_t *grouplist) {}
+PRE_SYSCALL(getgroups16)
+(long gidsetsize, __sanitizer___kernel_old_gid_t *grouplist) {}
-POST_SYSCALL(getgroups16)(long res, long gidsetsize,
- __sanitizer___kernel_old_gid_t *grouplist) {
+POST_SYSCALL(getgroups16)
+(long res, long gidsetsize, __sanitizer___kernel_old_gid_t *grouplist) {
if (res >= 0) {
- if (grouplist) POST_WRITE(grouplist, res * sizeof(*grouplist));
+ if (grouplist)
+ POST_WRITE(grouplist, res * sizeof(*grouplist));
}
}
-PRE_SYSCALL(setgroups16)(long gidsetsize,
- __sanitizer___kernel_old_gid_t *grouplist) {
- if (grouplist) POST_WRITE(grouplist, gidsetsize * sizeof(*grouplist));
+PRE_SYSCALL(setgroups16)
+(long gidsetsize, __sanitizer___kernel_old_gid_t *grouplist) {
+ if (grouplist)
+ POST_WRITE(grouplist, gidsetsize * sizeof(*grouplist));
}
-POST_SYSCALL(setgroups16)(long res, long gidsetsize,
- __sanitizer___kernel_old_gid_t *grouplist) {}
+POST_SYSCALL(setgroups16)
+(long res, long gidsetsize, __sanitizer___kernel_old_gid_t *grouplist) {}
PRE_SYSCALL(getuid16)() {}
@@ -1550,7 +1662,7 @@ POST_SYSCALL(getgid16)(long res) {}
PRE_SYSCALL(getegid16)() {}
POST_SYSCALL(getegid16)(long res) {}
-#endif // SANITIZER_USES_UID16_SYSCALLS
+# endif // SANITIZER_USES_UID16_SYSCALLS
PRE_SYSCALL(utime)(void *filename, void *times) {}
@@ -1559,7 +1671,8 @@ POST_SYSCALL(utime)(long res, void *filename, void *times) {
if (filename)
POST_WRITE(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
- if (times) POST_WRITE(times, struct_utimbuf_sz);
+ if (times)
+ POST_WRITE(times, struct_utimbuf_sz);
}
}
@@ -1570,7 +1683,8 @@ POST_SYSCALL(utimes)(long res, void *filename, void *utimes) {
if (filename)
POST_WRITE(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
- if (utimes) POST_WRITE(utimes, timeval_sz);
+ if (utimes)
+ POST_WRITE(utimes, timeval_sz);
}
}
@@ -1578,91 +1692,104 @@ PRE_SYSCALL(lseek)(long fd, long offset, long origin) {}
POST_SYSCALL(lseek)(long res, long fd, long offset, long origin) {}
-PRE_SYSCALL(llseek)(long fd, long offset_high, long offset_low, void *result,
- long origin) {}
+PRE_SYSCALL(llseek)
+(long fd, long offset_high, long offset_low, void *result, long origin) {}
-POST_SYSCALL(llseek)(long res, long fd, long offset_high, long offset_low,
- void *result, long origin) {
+POST_SYSCALL(llseek)
+(long res, long fd, long offset_high, long offset_low, void *result,
+ long origin) {
if (res >= 0) {
- if (result) POST_WRITE(result, sizeof(long long));
+ if (result)
+ POST_WRITE(result, sizeof(long long));
}
}
PRE_SYSCALL(readv)(long fd, const __sanitizer_iovec *vec, long vlen) {}
-POST_SYSCALL(readv)(long res, long fd, const __sanitizer_iovec *vec,
- long vlen) {
+POST_SYSCALL(readv)
+(long res, long fd, const __sanitizer_iovec *vec, long vlen) {
if (res >= 0) {
- if (vec) kernel_write_iovec(vec, vlen, res);
+ if (vec)
+ kernel_write_iovec(vec, vlen, res);
}
}
PRE_SYSCALL(write)(long fd, const void *buf, long count) {
- if (buf) PRE_READ(buf, count);
+ if (buf)
+ PRE_READ(buf, count);
}
POST_SYSCALL(write)(long res, long fd, const void *buf, long count) {}
PRE_SYSCALL(writev)(long fd, const __sanitizer_iovec *vec, long vlen) {}
-POST_SYSCALL(writev)(long res, long fd, const __sanitizer_iovec *vec,
- long vlen) {
+POST_SYSCALL(writev)
+(long res, long fd, const __sanitizer_iovec *vec, long vlen) {
if (res >= 0) {
- if (vec) kernel_read_iovec(vec, vlen, res);
+ if (vec)
+ kernel_read_iovec(vec, vlen, res);
}
}
-#ifdef _LP64
+# ifdef _LP64
PRE_SYSCALL(pread64)(long fd, void *buf, long count, long pos) {}
POST_SYSCALL(pread64)(long res, long fd, void *buf, long count, long pos) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, res);
+ if (buf)
+ POST_WRITE(buf, res);
}
}
PRE_SYSCALL(pwrite64)(long fd, const void *buf, long count, long pos) {
- if (buf) PRE_READ(buf, count);
+ if (buf)
+ PRE_READ(buf, count);
}
-POST_SYSCALL(pwrite64)(long res, long fd, const void *buf, long count,
- long pos) {}
-#else
+POST_SYSCALL(pwrite64)
+(long res, long fd, const void *buf, long count, long pos) {}
+# else
PRE_SYSCALL(pread64)(long fd, void *buf, long count, long pos0, long pos1) {}
-POST_SYSCALL(pread64)(long res, long fd, void *buf, long count, long pos0,
- long pos1) {
+POST_SYSCALL(pread64)
+(long res, long fd, void *buf, long count, long pos0, long pos1) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, res);
+ if (buf)
+ POST_WRITE(buf, res);
}
}
-PRE_SYSCALL(pwrite64)(long fd, const void *buf, long count, long pos0,
- long pos1) {
- if (buf) PRE_READ(buf, count);
+PRE_SYSCALL(pwrite64)
+(long fd, const void *buf, long count, long pos0, long pos1) {
+ if (buf)
+ PRE_READ(buf, count);
}
-POST_SYSCALL(pwrite64)(long res, long fd, const void *buf, long count,
- long pos0, long pos1) {}
-#endif
+POST_SYSCALL(pwrite64)
+(long res, long fd, const void *buf, long count, long pos0, long pos1) {}
+# endif
-PRE_SYSCALL(preadv)(long fd, const __sanitizer_iovec *vec, long vlen,
- long pos_l, long pos_h) {}
+PRE_SYSCALL(preadv)
+(long fd, const __sanitizer_iovec *vec, long vlen, long pos_l, long pos_h) {}
-POST_SYSCALL(preadv)(long res, long fd, const __sanitizer_iovec *vec, long vlen,
- long pos_l, long pos_h) {
+POST_SYSCALL(preadv)
+(long res, long fd, const __sanitizer_iovec *vec, long vlen, long pos_l,
+ long pos_h) {
if (res >= 0) {
- if (vec) kernel_write_iovec(vec, vlen, res);
+ if (vec)
+ kernel_write_iovec(vec, vlen, res);
}
}
-PRE_SYSCALL(pwritev)(long fd, const __sanitizer_iovec *vec, long vlen,
- long pos_l, long pos_h) {}
+PRE_SYSCALL(pwritev)
+(long fd, const __sanitizer_iovec *vec, long vlen, long pos_l, long pos_h) {}
-POST_SYSCALL(pwritev)(long res, long fd, const __sanitizer_iovec *vec,
- long vlen, long pos_l, long pos_h) {
+POST_SYSCALL(pwritev)
+(long res, long fd, const __sanitizer_iovec *vec, long vlen, long pos_l,
+ long pos_h) {
if (res >= 0) {
- if (vec) kernel_read_iovec(vec, vlen, res);
+ if (vec)
+ kernel_read_iovec(vec, vlen, res);
}
}
@@ -1717,14 +1844,15 @@ PRE_SYSCALL(quotactl)(long cmd, const void *special, long id, void *addr) {
PRE_READ(special, __sanitizer::internal_strlen((const char *)special) + 1);
}
-POST_SYSCALL(quotactl)(long res, long cmd, const void *special, long id,
- void *addr) {}
+POST_SYSCALL(quotactl)
+(long res, long cmd, const void *special, long id, void *addr) {}
PRE_SYSCALL(getdents)(long fd, void *dirent, long count) {}
POST_SYSCALL(getdents)(long res, long fd, void *dirent, long count) {
if (res >= 0) {
- if (dirent) POST_WRITE(dirent, res);
+ if (dirent)
+ POST_WRITE(dirent, res);
}
}
@@ -1732,15 +1860,16 @@ PRE_SYSCALL(getdents64)(long fd, void *dirent, long count) {}
POST_SYSCALL(getdents64)(long res, long fd, void *dirent, long count) {
if (res >= 0) {
- if (dirent) POST_WRITE(dirent, res);
+ if (dirent)
+ POST_WRITE(dirent, res);
}
}
-PRE_SYSCALL(setsockopt)(long fd, long level, long optname, void *optval,
- long optlen) {}
+PRE_SYSCALL(setsockopt)
+(long fd, long level, long optname, void *optval, long optlen) {}
-POST_SYSCALL(setsockopt)(long res, long fd, long level, long optname,
- void *optval, long optlen) {
+POST_SYSCALL(setsockopt)
+(long res, long fd, long level, long optname, void *optval, long optlen) {
if (res >= 0) {
if (optval)
POST_WRITE(optval,
@@ -1748,77 +1877,88 @@ POST_SYSCALL(setsockopt)(long res, long fd, long level, long optname,
}
}
-PRE_SYSCALL(getsockopt)(long fd, long level, long optname, void *optval,
- void *optlen) {}
+PRE_SYSCALL(getsockopt)
+(long fd, long level, long optname, void *optval, void *optlen) {}
-POST_SYSCALL(getsockopt)(long res, long fd, long level, long optname,
- void *optval, void *optlen) {
+POST_SYSCALL(getsockopt)
+(long res, long fd, long level, long optname, void *optval, void *optlen) {
if (res >= 0) {
if (optval)
POST_WRITE(optval,
__sanitizer::internal_strlen((const char *)optval) + 1);
- if (optlen) POST_WRITE(optlen, sizeof(int));
+ if (optlen)
+ POST_WRITE(optlen, sizeof(int));
}
}
PRE_SYSCALL(bind)(long arg0, sanitizer_kernel_sockaddr *arg1, long arg2) {}
-POST_SYSCALL(bind)(long res, long arg0, sanitizer_kernel_sockaddr *arg1,
- long arg2) {
+POST_SYSCALL(bind)
+(long res, long arg0, sanitizer_kernel_sockaddr *arg1, long arg2) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
}
}
PRE_SYSCALL(connect)(long arg0, sanitizer_kernel_sockaddr *arg1, long arg2) {}
-POST_SYSCALL(connect)(long res, long arg0, sanitizer_kernel_sockaddr *arg1,
- long arg2) {
+POST_SYSCALL(connect)
+(long res, long arg0, sanitizer_kernel_sockaddr *arg1, long arg2) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
}
}
PRE_SYSCALL(accept)(long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2) {}
-POST_SYSCALL(accept)(long res, long arg0, sanitizer_kernel_sockaddr *arg1,
- void *arg2) {
+POST_SYSCALL(accept)
+(long res, long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
- if (arg2) POST_WRITE(arg2, sizeof(unsigned));
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
+ if (arg2)
+ POST_WRITE(arg2, sizeof(unsigned));
}
}
-PRE_SYSCALL(accept4)(long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2,
- long arg3) {}
+PRE_SYSCALL(accept4)
+(long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2, long arg3) {}
-POST_SYSCALL(accept4)(long res, long arg0, sanitizer_kernel_sockaddr *arg1,
- void *arg2, long arg3) {
+POST_SYSCALL(accept4)
+(long res, long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2, long arg3) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
- if (arg2) POST_WRITE(arg2, sizeof(unsigned));
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
+ if (arg2)
+ POST_WRITE(arg2, sizeof(unsigned));
}
}
-PRE_SYSCALL(getsockname)(long arg0, sanitizer_kernel_sockaddr *arg1,
- void *arg2) {}
+PRE_SYSCALL(getsockname)
+(long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2) {}
-POST_SYSCALL(getsockname)(long res, long arg0, sanitizer_kernel_sockaddr *arg1,
- void *arg2) {
+POST_SYSCALL(getsockname)
+(long res, long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
- if (arg2) POST_WRITE(arg2, sizeof(unsigned));
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
+ if (arg2)
+ POST_WRITE(arg2, sizeof(unsigned));
}
}
-PRE_SYSCALL(getpeername)(long arg0, sanitizer_kernel_sockaddr *arg1,
- void *arg2) {}
+PRE_SYSCALL(getpeername)
+(long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2) {}
-POST_SYSCALL(getpeername)(long res, long arg0, sanitizer_kernel_sockaddr *arg1,
- void *arg2) {
+POST_SYSCALL(getpeername)
+(long res, long arg0, sanitizer_kernel_sockaddr *arg1, void *arg2) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
- if (arg2) POST_WRITE(arg2, sizeof(unsigned));
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
+ if (arg2)
+ POST_WRITE(arg2, sizeof(unsigned));
}
}
@@ -1826,18 +1966,23 @@ PRE_SYSCALL(send)(long arg0, void *arg1, long arg2, long arg3) {}
POST_SYSCALL(send)(long res, long arg0, void *arg1, long arg2, long arg3) {
if (res) {
- if (arg1) POST_READ(arg1, res);
+ if (arg1)
+ POST_READ(arg1, res);
}
}
-PRE_SYSCALL(sendto)(long arg0, void *arg1, long arg2, long arg3,
- sanitizer_kernel_sockaddr *arg4, long arg5) {}
+PRE_SYSCALL(sendto)
+(long arg0, void *arg1, long arg2, long arg3, sanitizer_kernel_sockaddr *arg4,
+ long arg5) {}
-POST_SYSCALL(sendto)(long res, long arg0, void *arg1, long arg2, long arg3,
- sanitizer_kernel_sockaddr *arg4, long arg5) {
+POST_SYSCALL(sendto)
+(long res, long arg0, void *arg1, long arg2, long arg3,
+ sanitizer_kernel_sockaddr *arg4, long arg5) {
if (res >= 0) {
- if (arg1) POST_READ(arg1, res);
- if (arg4) POST_WRITE(arg4, sizeof(*arg4));
+ if (arg1)
+ POST_READ(arg1, res);
+ if (arg4)
+ POST_WRITE(arg4, sizeof(*arg4));
}
}
@@ -1857,19 +2002,25 @@ PRE_SYSCALL(recv)(long arg0, void *buf, long len, long flags) {}
POST_SYSCALL(recv)(long res, void *buf, long len, long flags) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, res);
+ if (buf)
+ POST_WRITE(buf, res);
}
}
-PRE_SYSCALL(recvfrom)(long arg0, void *buf, long len, long flags,
- sanitizer_kernel_sockaddr *arg4, void *arg5) {}
+PRE_SYSCALL(recvfrom)
+(long arg0, void *buf, long len, long flags, sanitizer_kernel_sockaddr *arg4,
+ void *arg5) {}
-POST_SYSCALL(recvfrom)(long res, long arg0, void *buf, long len, long flags,
- sanitizer_kernel_sockaddr *arg4, void *arg5) {
+POST_SYSCALL(recvfrom)
+(long res, long arg0, void *buf, long len, long flags,
+ sanitizer_kernel_sockaddr *arg4, void *arg5) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, res);
- if (arg4) POST_WRITE(arg4, sizeof(*arg4));
- if (arg5) POST_WRITE(arg5, sizeof(int));
+ if (buf)
+ POST_WRITE(buf, res);
+ if (arg4)
+ POST_WRITE(arg4, sizeof(*arg4));
+ if (arg5)
+ POST_WRITE(arg5, sizeof(int));
}
}
@@ -1881,14 +2032,16 @@ PRE_SYSCALL(socketpair)(long arg0, long arg1, long arg2, int *sv) {}
POST_SYSCALL(socketpair)(long res, long arg0, long arg1, long arg2, int *sv) {
if (res >= 0)
- if (sv) POST_WRITE(sv, sizeof(int) * 2);
+ if (sv)
+ POST_WRITE(sv, sizeof(int) * 2);
}
PRE_SYSCALL(socketcall)(long call, void *args) {}
POST_SYSCALL(socketcall)(long res, long call, void *args) {
if (res >= 0) {
- if (args) POST_WRITE(args, sizeof(long));
+ if (args)
+ POST_WRITE(args, sizeof(long));
}
}
@@ -1898,25 +2051,31 @@ POST_SYSCALL(listen)(long res, long arg0, long arg1) {}
PRE_SYSCALL(poll)(void *ufds, long nfds, long timeout) {}
-POST_SYSCALL(poll)(long res, __sanitizer_pollfd *ufds, long nfds,
- long timeout) {
+POST_SYSCALL(poll)
+(long res, __sanitizer_pollfd *ufds, long nfds, long timeout) {
if (res >= 0) {
- if (ufds) POST_WRITE(ufds, nfds * sizeof(*ufds));
+ if (ufds)
+ POST_WRITE(ufds, nfds * sizeof(*ufds));
}
}
-PRE_SYSCALL(select)(long n, __sanitizer___kernel_fd_set *inp,
- __sanitizer___kernel_fd_set *outp,
- __sanitizer___kernel_fd_set *exp, void *tvp) {}
+PRE_SYSCALL(select)
+(long n, __sanitizer___kernel_fd_set *inp, __sanitizer___kernel_fd_set *outp,
+ __sanitizer___kernel_fd_set *exp, void *tvp) {}
-POST_SYSCALL(select)(long res, long n, __sanitizer___kernel_fd_set *inp,
- __sanitizer___kernel_fd_set *outp,
- __sanitizer___kernel_fd_set *exp, void *tvp) {
+POST_SYSCALL(select)
+(long res, long n, __sanitizer___kernel_fd_set *inp,
+ __sanitizer___kernel_fd_set *outp, __sanitizer___kernel_fd_set *exp,
+ void *tvp) {
if (res >= 0) {
- if (inp) POST_WRITE(inp, sizeof(*inp));
- if (outp) POST_WRITE(outp, sizeof(*outp));
- if (exp) POST_WRITE(exp, sizeof(*exp));
- if (tvp) POST_WRITE(tvp, timeval_sz);
+ if (inp)
+ POST_WRITE(inp, sizeof(*inp));
+ if (outp)
+ POST_WRITE(outp, sizeof(*outp));
+ if (exp)
+ POST_WRITE(exp, sizeof(*exp));
+ if (tvp)
+ POST_WRITE(tvp, timeval_sz);
}
}
@@ -1936,29 +2095,55 @@ PRE_SYSCALL(epoll_ctl)(long epfd, long op, long fd, void *event) {}
POST_SYSCALL(epoll_ctl)(long res, long epfd, long op, long fd, void *event) {
if (res >= 0) {
- if (event) POST_WRITE(event, struct_epoll_event_sz);
+ if (event)
+ POST_WRITE(event, struct_epoll_event_sz);
}
}
-PRE_SYSCALL(epoll_wait)(long epfd, void *events, long maxevents, long timeout) {
+PRE_SYSCALL(epoll_wait)
+(long epfd, void *events, long maxevents, long timeout) {}
+
+POST_SYSCALL(epoll_wait)
+(long res, long epfd, void *events, long maxevents, long timeout) {
+ if (res >= 0) {
+ if (events)
+ POST_WRITE(events, res * struct_epoll_event_sz);
+ }
+}
+
+PRE_SYSCALL(epoll_pwait)
+(long epfd, void *events, long maxevents, long timeout,
+ const kernel_sigset_t *sigmask, long sigsetsize) {
+ if (sigmask)
+ PRE_READ(sigmask, sigsetsize);
}
-POST_SYSCALL(epoll_wait)(long res, long epfd, void *events, long maxevents,
- long timeout) {
+POST_SYSCALL(epoll_pwait)
+(long res, long epfd, void *events, long maxevents, long timeout,
+ const void *sigmask, long sigsetsize) {
if (res >= 0) {
- if (events) POST_WRITE(events, struct_epoll_event_sz);
+ if (events)
+ POST_WRITE(events, res * struct_epoll_event_sz);
}
}
-PRE_SYSCALL(epoll_pwait)(long epfd, void *events, long maxevents, long timeout,
- const kernel_sigset_t *sigmask, long sigsetsize) {
- if (sigmask) PRE_READ(sigmask, sigsetsize);
+PRE_SYSCALL(epoll_pwait2)
+(long epfd, void *events, long maxevents,
+ const sanitizer_kernel_timespec *timeout, const kernel_sigset_t *sigmask,
+ long sigsetsize) {
+ if (timeout)
+ PRE_READ(timeout, sizeof(timeout));
+ if (sigmask)
+ PRE_READ(sigmask, sigsetsize);
}
-POST_SYSCALL(epoll_pwait)(long res, long epfd, void *events, long maxevents,
- long timeout, const void *sigmask, long sigsetsize) {
+POST_SYSCALL(epoll_pwait2)
+(long res, long epfd, void *events, long maxevents,
+ const sanitizer_kernel_timespec *timeout, const void *sigmask,
+ long sigsetsize) {
if (res >= 0) {
- if (events) POST_WRITE(events, struct_epoll_event_sz);
+ if (events)
+ POST_WRITE(events, res * struct_epoll_event_sz);
}
}
@@ -1993,7 +2178,8 @@ PRE_SYSCALL(newuname)(void *name) {}
POST_SYSCALL(newuname)(long res, void *name) {
if (res >= 0) {
- if (name) POST_WRITE(name, struct_new_utsname_sz);
+ if (name)
+ POST_WRITE(name, struct_new_utsname_sz);
}
}
@@ -2001,7 +2187,8 @@ PRE_SYSCALL(uname)(void *arg0) {}
POST_SYSCALL(uname)(long res, void *arg0) {
if (res >= 0) {
- if (arg0) POST_WRITE(arg0, struct_old_utsname_sz);
+ if (arg0)
+ POST_WRITE(arg0, struct_old_utsname_sz);
}
}
@@ -2009,7 +2196,8 @@ PRE_SYSCALL(olduname)(void *arg0) {}
POST_SYSCALL(olduname)(long res, void *arg0) {
if (res >= 0) {
- if (arg0) POST_WRITE(arg0, struct_oldold_utsname_sz);
+ if (arg0)
+ POST_WRITE(arg0, struct_oldold_utsname_sz);
}
}
@@ -2017,7 +2205,8 @@ PRE_SYSCALL(getrlimit)(long resource, void *rlim) {}
POST_SYSCALL(getrlimit)(long res, long resource, void *rlim) {
if (res >= 0) {
- if (rlim) POST_WRITE(rlim, struct_rlimit_sz);
+ if (rlim)
+ POST_WRITE(rlim, struct_rlimit_sz);
}
}
@@ -2025,7 +2214,8 @@ PRE_SYSCALL(old_getrlimit)(long resource, void *rlim) {}
POST_SYSCALL(old_getrlimit)(long res, long resource, void *rlim) {
if (res >= 0) {
- if (rlim) POST_WRITE(rlim, struct_rlimit_sz);
+ if (rlim)
+ POST_WRITE(rlim, struct_rlimit_sz);
}
}
@@ -2033,29 +2223,33 @@ PRE_SYSCALL(setrlimit)(long resource, void *rlim) {}
POST_SYSCALL(setrlimit)(long res, long resource, void *rlim) {
if (res >= 0) {
- if (rlim) POST_WRITE(rlim, struct_rlimit_sz);
+ if (rlim)
+ POST_WRITE(rlim, struct_rlimit_sz);
}
}
-#if !SANITIZER_ANDROID
-PRE_SYSCALL(prlimit64)(long pid, long resource, const void *new_rlim,
- void *old_rlim) {
- if (new_rlim) PRE_READ(new_rlim, struct_rlimit64_sz);
+# if !SANITIZER_ANDROID
+PRE_SYSCALL(prlimit64)
+(long pid, long resource, const void *new_rlim, void *old_rlim) {
+ if (new_rlim)
+ PRE_READ(new_rlim, struct_rlimit64_sz);
}
-POST_SYSCALL(prlimit64)(long res, long pid, long resource, const void *new_rlim,
- void *old_rlim) {
+POST_SYSCALL(prlimit64)
+(long res, long pid, long resource, const void *new_rlim, void *old_rlim) {
if (res >= 0) {
- if (old_rlim) POST_WRITE(old_rlim, struct_rlimit64_sz);
+ if (old_rlim)
+ POST_WRITE(old_rlim, struct_rlimit64_sz);
}
}
-#endif
+# endif
PRE_SYSCALL(getrusage)(long who, void *ru) {}
POST_SYSCALL(getrusage)(long res, long who, void *ru) {
if (res >= 0) {
- if (ru) POST_WRITE(ru, struct_rusage_sz);
+ if (ru)
+ POST_WRITE(ru, struct_rusage_sz);
}
}
@@ -2068,31 +2262,34 @@ PRE_SYSCALL(msgget)(long key, long msgflg) {}
POST_SYSCALL(msgget)(long res, long key, long msgflg) {}
PRE_SYSCALL(msgsnd)(long msqid, void *msgp, long msgsz, long msgflg) {
- if (msgp) PRE_READ(msgp, msgsz);
+ if (msgp)
+ PRE_READ(msgp, msgsz);
}
-POST_SYSCALL(msgsnd)(long res, long msqid, void *msgp, long msgsz,
- long msgflg) {}
+POST_SYSCALL(msgsnd)
+(long res, long msqid, void *msgp, long msgsz, long msgflg) {}
-PRE_SYSCALL(msgrcv)(long msqid, void *msgp, long msgsz, long msgtyp,
- long msgflg) {}
+PRE_SYSCALL(msgrcv)
+(long msqid, void *msgp, long msgsz, long msgtyp, long msgflg) {}
-POST_SYSCALL(msgrcv)(long res, long msqid, void *msgp, long msgsz, long msgtyp,
- long msgflg) {
+POST_SYSCALL(msgrcv)
+(long res, long msqid, void *msgp, long msgsz, long msgtyp, long msgflg) {
if (res >= 0) {
- if (msgp) POST_WRITE(msgp, res);
+ if (msgp)
+ POST_WRITE(msgp, res);
}
}
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
PRE_SYSCALL(msgctl)(long msqid, long cmd, void *buf) {}
POST_SYSCALL(msgctl)(long res, long msqid, long cmd, void *buf) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, struct_msqid_ds_sz);
+ if (buf)
+ POST_WRITE(buf, struct_msqid_ds_sz);
}
}
-#endif
+# endif
PRE_SYSCALL(semget)(long key, long nsems, long semflg) {}
@@ -2106,13 +2303,14 @@ PRE_SYSCALL(semctl)(long semid, long semnum, long cmd, void *arg) {}
POST_SYSCALL(semctl)(long res, long semid, long semnum, long cmd, void *arg) {}
-PRE_SYSCALL(semtimedop)(long semid, void *sops, long nsops,
- const void *timeout) {
- if (timeout) PRE_READ(timeout, struct_timespec_sz);
+PRE_SYSCALL(semtimedop)
+(long semid, void *sops, long nsops, const void *timeout) {
+ if (timeout)
+ PRE_READ(timeout, struct_timespec_sz);
}
-POST_SYSCALL(semtimedop)(long res, long semid, void *sops, long nsops,
- const void *timeout) {}
+POST_SYSCALL(semtimedop)
+(long res, long semid, void *sops, long nsops, const void *timeout) {}
PRE_SYSCALL(shmat)(long shmid, void *shmaddr, long shmflg) {}
@@ -2138,18 +2336,20 @@ POST_SYSCALL(shmdt)(long res, void *shmaddr) {
}
}
-PRE_SYSCALL(ipc)(long call, long first, long second, long third, void *ptr,
- long fifth) {}
+PRE_SYSCALL(ipc)
+(long call, long first, long second, long third, void *ptr, long fifth) {}
-POST_SYSCALL(ipc)(long res, long call, long first, long second, long third,
- void *ptr, long fifth) {}
+POST_SYSCALL(ipc)
+(long res, long call, long first, long second, long third, void *ptr,
+ long fifth) {}
-#if !SANITIZER_ANDROID
+# if !SANITIZER_ANDROID
PRE_SYSCALL(shmctl)(long shmid, long cmd, void *buf) {}
POST_SYSCALL(shmctl)(long res, long shmid, long cmd, void *buf) {
if (res >= 0) {
- if (buf) POST_WRITE(buf, sizeof(__sanitizer_shmid_ds));
+ if (buf)
+ POST_WRITE(buf, sizeof(__sanitizer_shmid_ds));
}
}
@@ -2158,10 +2358,11 @@ PRE_SYSCALL(mq_open)(const void *name, long oflag, long mode, void *attr) {
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
}
-POST_SYSCALL(mq_open)(long res, const void *name, long oflag, long mode,
- void *attr) {
+POST_SYSCALL(mq_open)
+(long res, const void *name, long oflag, long mode, void *attr) {
if (res >= 0) {
- if (attr) POST_WRITE(attr, struct_mq_attr_sz);
+ if (attr)
+ POST_WRITE(attr, struct_mq_attr_sz);
}
}
@@ -2172,62 +2373,73 @@ PRE_SYSCALL(mq_unlink)(const void *name) {
POST_SYSCALL(mq_unlink)(long res, const void *name) {}
-PRE_SYSCALL(mq_timedsend)(long mqdes, const void *msg_ptr, long msg_len,
- long msg_prio, const void *abs_timeout) {
- if (msg_ptr) PRE_READ(msg_ptr, msg_len);
- if (abs_timeout) PRE_READ(abs_timeout, struct_timespec_sz);
+PRE_SYSCALL(mq_timedsend)
+(long mqdes, const void *msg_ptr, long msg_len, long msg_prio,
+ const void *abs_timeout) {
+ if (msg_ptr)
+ PRE_READ(msg_ptr, msg_len);
+ if (abs_timeout)
+ PRE_READ(abs_timeout, struct_timespec_sz);
}
-POST_SYSCALL(mq_timedsend)(long res, long mqdes, const void *msg_ptr,
- long msg_len, long msg_prio,
- const void *abs_timeout) {}
+POST_SYSCALL(mq_timedsend)
+(long res, long mqdes, const void *msg_ptr, long msg_len, long msg_prio,
+ const void *abs_timeout) {}
-PRE_SYSCALL(mq_timedreceive)(long mqdes, void *msg_ptr, long msg_len,
- void *msg_prio, const void *abs_timeout) {
- if (abs_timeout) PRE_READ(abs_timeout, struct_timespec_sz);
+PRE_SYSCALL(mq_timedreceive)
+(long mqdes, void *msg_ptr, long msg_len, void *msg_prio,
+ const void *abs_timeout) {
+ if (abs_timeout)
+ PRE_READ(abs_timeout, struct_timespec_sz);
}
-POST_SYSCALL(mq_timedreceive)(long res, long mqdes, void *msg_ptr, long msg_len,
- int *msg_prio, const void *abs_timeout) {
+POST_SYSCALL(mq_timedreceive)
+(long res, long mqdes, void *msg_ptr, long msg_len, int *msg_prio,
+ const void *abs_timeout) {
if (res >= 0) {
- if (msg_ptr) POST_WRITE(msg_ptr, res);
- if (msg_prio) POST_WRITE(msg_prio, sizeof(*msg_prio));
+ if (msg_ptr)
+ POST_WRITE(msg_ptr, res);
+ if (msg_prio)
+ POST_WRITE(msg_prio, sizeof(*msg_prio));
}
}
PRE_SYSCALL(mq_notify)(long mqdes, const void *notification) {
- if (notification) PRE_READ(notification, struct_sigevent_sz);
+ if (notification)
+ PRE_READ(notification, struct_sigevent_sz);
}
POST_SYSCALL(mq_notify)(long res, long mqdes, const void *notification) {}
PRE_SYSCALL(mq_getsetattr)(long mqdes, const void *mqstat, void *omqstat) {
- if (mqstat) PRE_READ(mqstat, struct_mq_attr_sz);
+ if (mqstat)
+ PRE_READ(mqstat, struct_mq_attr_sz);
}
-POST_SYSCALL(mq_getsetattr)(long res, long mqdes, const void *mqstat,
- void *omqstat) {
+POST_SYSCALL(mq_getsetattr)
+(long res, long mqdes, const void *mqstat, void *omqstat) {
if (res >= 0) {
- if (omqstat) POST_WRITE(omqstat, struct_mq_attr_sz);
+ if (omqstat)
+ POST_WRITE(omqstat, struct_mq_attr_sz);
}
}
-#endif // SANITIZER_ANDROID
+# endif // SANITIZER_ANDROID
PRE_SYSCALL(pciconfig_iobase)(long which, long bus, long devfn) {}
POST_SYSCALL(pciconfig_iobase)(long res, long which, long bus, long devfn) {}
-PRE_SYSCALL(pciconfig_read)(long bus, long dfn, long off, long len, void *buf) {
-}
+PRE_SYSCALL(pciconfig_read)
+(long bus, long dfn, long off, long len, void *buf) {}
-POST_SYSCALL(pciconfig_read)(long res, long bus, long dfn, long off, long len,
- void *buf) {}
+POST_SYSCALL(pciconfig_read)
+(long res, long bus, long dfn, long off, long len, void *buf) {}
-PRE_SYSCALL(pciconfig_write)(long bus, long dfn, long off, long len,
- void *buf) {}
+PRE_SYSCALL(pciconfig_write)
+(long bus, long dfn, long off, long len, void *buf) {}
-POST_SYSCALL(pciconfig_write)(long res, long bus, long dfn, long off, long len,
- void *buf) {}
+POST_SYSCALL(pciconfig_write)
+(long res, long bus, long dfn, long off, long len, void *buf) {}
PRE_SYSCALL(swapon)(const void *specialfile, long swap_flags) {
if (specialfile)
@@ -2247,8 +2459,10 @@ POST_SYSCALL(swapoff)(long res, const void *specialfile) {}
PRE_SYSCALL(sysctl)(__sanitizer___sysctl_args *args) {
if (args) {
- if (args->name) PRE_READ(args->name, args->nlen * sizeof(*args->name));
- if (args->newval) PRE_READ(args->name, args->newlen);
+ if (args->name)
+ PRE_READ(args->name, args->nlen * sizeof(*args->name));
+ if (args->newval)
+ PRE_READ(args->name, args->newlen);
}
}
@@ -2265,7 +2479,8 @@ PRE_SYSCALL(sysinfo)(void *info) {}
POST_SYSCALL(sysinfo)(long res, void *info) {
if (res >= 0) {
- if (info) POST_WRITE(info, struct_sysinfo_sz);
+ if (info)
+ POST_WRITE(info, struct_sysinfo_sz);
}
}
@@ -2294,10 +2509,10 @@ PRE_SYSCALL(ni_syscall)() {}
POST_SYSCALL(ni_syscall)(long res) {}
PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
-#if !SANITIZER_ANDROID && \
- (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
- defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
- SANITIZER_RISCV64)
+# if !SANITIZER_ANDROID && \
+ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
+ defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
+ SANITIZER_RISCV64)
if (data) {
if (request == ptrace_setregs) {
PRE_READ((void *)data, struct_user_regs_struct_sz);
@@ -2312,14 +2527,14 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
PRE_READ(iov->iov_base, iov->iov_len);
}
}
-#endif
+# endif
}
POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
-#if !SANITIZER_ANDROID && \
- (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
- defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
- SANITIZER_RISCV64)
+# if !SANITIZER_ANDROID && \
+ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
+ defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
+ SANITIZER_RISCV64)
if (res >= 0 && data) {
// Note that this is different from the interceptor in
// sanitizer_common_interceptors.inc.
@@ -2340,11 +2555,12 @@ POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
POST_WRITE((void *)data, sizeof(void *));
}
}
-#endif
+# endif
}
-PRE_SYSCALL(add_key)(const void *_type, const void *_description,
- const void *_payload, long plen, long destringid) {
+PRE_SYSCALL(add_key)
+(const void *_type, const void *_description, const void *_payload, long plen,
+ long destringid) {
if (_type)
PRE_READ(_type, __sanitizer::internal_strlen((const char *)_type) + 1);
if (_description)
@@ -2352,11 +2568,13 @@ PRE_SYSCALL(add_key)(const void *_type, const void *_description,
__sanitizer::internal_strlen((const char *)_description) + 1);
}
-POST_SYSCALL(add_key)(long res, const void *_type, const void *_description,
- const void *_payload, long plen, long destringid) {}
+POST_SYSCALL(add_key)
+(long res, const void *_type, const void *_description, const void *_payload,
+ long plen, long destringid) {}
-PRE_SYSCALL(request_key)(const void *_type, const void *_description,
- const void *_callout_info, long destringid) {
+PRE_SYSCALL(request_key)
+(const void *_type, const void *_description, const void *_callout_info,
+ long destringid) {
if (_type)
PRE_READ(_type, __sanitizer::internal_strlen((const char *)_type) + 1);
if (_description)
@@ -2367,13 +2585,14 @@ PRE_SYSCALL(request_key)(const void *_type, const void *_description,
__sanitizer::internal_strlen((const char *)_callout_info) + 1);
}
-POST_SYSCALL(request_key)(long res, const void *_type, const void *_description,
- const void *_callout_info, long destringid) {}
+POST_SYSCALL(request_key)
+(long res, const void *_type, const void *_description,
+ const void *_callout_info, long destringid) {}
PRE_SYSCALL(keyctl)(long cmd, long arg2, long arg3, long arg4, long arg5) {}
-POST_SYSCALL(keyctl)(long res, long cmd, long arg2, long arg3, long arg4,
- long arg5) {}
+POST_SYSCALL(keyctl)
+(long res, long cmd, long arg2, long arg3, long arg4, long arg5) {}
PRE_SYSCALL(ioprio_set)(long which, long who, long ioprio) {}
@@ -2387,50 +2606,62 @@ PRE_SYSCALL(set_mempolicy)(long mode, void *nmask, long maxnode) {}
POST_SYSCALL(set_mempolicy)(long res, long mode, void *nmask, long maxnode) {
if (res >= 0) {
- if (nmask) POST_WRITE(nmask, sizeof(long));
+ if (nmask)
+ POST_WRITE(nmask, sizeof(long));
}
}
-PRE_SYSCALL(migrate_pages)(long pid, long maxnode, const void *from,
- const void *to) {
- if (from) PRE_READ(from, sizeof(long));
- if (to) PRE_READ(to, sizeof(long));
+PRE_SYSCALL(migrate_pages)
+(long pid, long maxnode, const void *from, const void *to) {
+ if (from)
+ PRE_READ(from, sizeof(long));
+ if (to)
+ PRE_READ(to, sizeof(long));
}
-POST_SYSCALL(migrate_pages)(long res, long pid, long maxnode, const void *from,
- const void *to) {}
+POST_SYSCALL(migrate_pages)
+(long res, long pid, long maxnode, const void *from, const void *to) {}
-PRE_SYSCALL(move_pages)(long pid, long nr_pages, const void **pages,
- const int *nodes, int *status, long flags) {
- if (pages) PRE_READ(pages, nr_pages * sizeof(*pages));
- if (nodes) PRE_READ(nodes, nr_pages * sizeof(*nodes));
+PRE_SYSCALL(move_pages)
+(long pid, long nr_pages, const void **pages, const int *nodes, int *status,
+ long flags) {
+ if (pages)
+ PRE_READ(pages, nr_pages * sizeof(*pages));
+ if (nodes)
+ PRE_READ(nodes, nr_pages * sizeof(*nodes));
}
-POST_SYSCALL(move_pages)(long res, long pid, long nr_pages, const void **pages,
- const int *nodes, int *status, long flags) {
+POST_SYSCALL(move_pages)
+(long res, long pid, long nr_pages, const void **pages, const int *nodes,
+ int *status, long flags) {
if (res >= 0) {
- if (status) POST_WRITE(status, nr_pages * sizeof(*status));
+ if (status)
+ POST_WRITE(status, nr_pages * sizeof(*status));
}
}
-PRE_SYSCALL(mbind)(long start, long len, long mode, void *nmask, long maxnode,
- long flags) {}
+PRE_SYSCALL(mbind)
+(long start, long len, long mode, void *nmask, long maxnode, long flags) {}
-POST_SYSCALL(mbind)(long res, long start, long len, long mode, void *nmask,
- long maxnode, long flags) {
+POST_SYSCALL(mbind)
+(long res, long start, long len, long mode, void *nmask, long maxnode,
+ long flags) {
if (res >= 0) {
- if (nmask) POST_WRITE(nmask, sizeof(long));
+ if (nmask)
+ POST_WRITE(nmask, sizeof(long));
}
}
-PRE_SYSCALL(get_mempolicy)(void *policy, void *nmask, long maxnode, long addr,
- long flags) {}
+PRE_SYSCALL(get_mempolicy)
+(void *policy, void *nmask, long maxnode, long addr, long flags) {}
-POST_SYSCALL(get_mempolicy)(long res, void *policy, void *nmask, long maxnode,
- long addr, long flags) {
+POST_SYSCALL(get_mempolicy)
+(long res, void *policy, void *nmask, long maxnode, long addr, long flags) {
if (res >= 0) {
- if (policy) POST_WRITE(policy, sizeof(int));
- if (nmask) POST_WRITE(nmask, sizeof(long));
+ if (policy)
+ POST_WRITE(policy, sizeof(int));
+ if (nmask)
+ POST_WRITE(nmask, sizeof(long));
}
}
@@ -2447,8 +2678,8 @@ PRE_SYSCALL(inotify_add_watch)(long fd, const void *path, long mask) {
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
}
-POST_SYSCALL(inotify_add_watch)(long res, long fd, const void *path,
- long mask) {}
+POST_SYSCALL(inotify_add_watch)
+(long res, long fd, const void *path, long mask) {}
PRE_SYSCALL(inotify_rm_watch)(long fd, long wd) {}
@@ -2458,8 +2689,10 @@ PRE_SYSCALL(spu_run)(long fd, void *unpc, void *ustatus) {}
POST_SYSCALL(spu_run)(long res, long fd, unsigned *unpc, unsigned *ustatus) {
if (res >= 0) {
- if (unpc) POST_WRITE(unpc, sizeof(*unpc));
- if (ustatus) POST_WRITE(ustatus, sizeof(*ustatus));
+ if (unpc)
+ POST_WRITE(unpc, sizeof(*unpc));
+ if (ustatus)
+ POST_WRITE(ustatus, sizeof(*ustatus));
}
}
@@ -2468,8 +2701,8 @@ PRE_SYSCALL(spu_create)(const void *name, long flags, long mode, long fd) {
PRE_READ(name, __sanitizer::internal_strlen((const char *)name) + 1);
}
-POST_SYSCALL(spu_create)(long res, const void *name, long flags, long mode,
- long fd) {}
+POST_SYSCALL(spu_create)
+(long res, const void *name, long flags, long mode, long fd) {}
PRE_SYSCALL(mknodat)(long dfd, const void *filename, long mode, long dev) {
if (filename)
@@ -2477,8 +2710,8 @@ PRE_SYSCALL(mknodat)(long dfd, const void *filename, long mode, long dev) {
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(mknodat)(long res, long dfd, const void *filename, long mode,
- long dev) {}
+POST_SYSCALL(mknodat)
+(long res, long dfd, const void *filename, long mode, long dev) {}
PRE_SYSCALL(mkdirat)(long dfd, const void *pathname, long mode) {
if (pathname)
@@ -2503,30 +2736,33 @@ PRE_SYSCALL(symlinkat)(const void *oldname, long newdfd, const void *newname) {
PRE_READ(newname, __sanitizer::internal_strlen((const char *)newname) + 1);
}
-POST_SYSCALL(symlinkat)(long res, const void *oldname, long newdfd,
- const void *newname) {}
+POST_SYSCALL(symlinkat)
+(long res, const void *oldname, long newdfd, const void *newname) {}
-PRE_SYSCALL(linkat)(long olddfd, const void *oldname, long newdfd,
- const void *newname, long flags) {
+PRE_SYSCALL(linkat)
+(long olddfd, const void *oldname, long newdfd, const void *newname,
+ long flags) {
if (oldname)
PRE_READ(oldname, __sanitizer::internal_strlen((const char *)oldname) + 1);
if (newname)
PRE_READ(newname, __sanitizer::internal_strlen((const char *)newname) + 1);
}
-POST_SYSCALL(linkat)(long res, long olddfd, const void *oldname, long newdfd,
- const void *newname, long flags) {}
+POST_SYSCALL(linkat)
+(long res, long olddfd, const void *oldname, long newdfd, const void *newname,
+ long flags) {}
-PRE_SYSCALL(renameat)(long olddfd, const void *oldname, long newdfd,
- const void *newname) {
+PRE_SYSCALL(renameat)
+(long olddfd, const void *oldname, long newdfd, const void *newname) {
if (oldname)
PRE_READ(oldname, __sanitizer::internal_strlen((const char *)oldname) + 1);
if (newname)
PRE_READ(newname, __sanitizer::internal_strlen((const char *)newname) + 1);
}
-POST_SYSCALL(renameat)(long res, long olddfd, const void *oldname, long newdfd,
- const void *newname) {}
+POST_SYSCALL(renameat)
+(long res, long olddfd, const void *oldname, long newdfd, const void *newname) {
+}
PRE_SYSCALL(futimesat)(long dfd, const void *filename, void *utimes) {
if (filename)
@@ -2534,10 +2770,11 @@ PRE_SYSCALL(futimesat)(long dfd, const void *filename, void *utimes) {
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(futimesat)(long res, long dfd, const void *filename,
- void *utimes) {
+POST_SYSCALL(futimesat)
+(long res, long dfd, const void *filename, void *utimes) {
if (res >= 0) {
- if (utimes) POST_WRITE(utimes, timeval_sz);
+ if (utimes)
+ POST_WRITE(utimes, timeval_sz);
}
}
@@ -2557,15 +2794,15 @@ PRE_SYSCALL(fchmodat)(long dfd, const void *filename, long mode) {
POST_SYSCALL(fchmodat)(long res, long dfd, const void *filename, long mode) {}
-PRE_SYSCALL(fchownat)(long dfd, const void *filename, long user, long group,
- long flag) {
+PRE_SYSCALL(fchownat)
+(long dfd, const void *filename, long user, long group, long flag) {
if (filename)
PRE_READ(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(fchownat)(long res, long dfd, const void *filename, long user,
- long group, long flag) {}
+POST_SYSCALL(fchownat)
+(long res, long dfd, const void *filename, long user, long group, long flag) {}
PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) {
if (filename)
@@ -2573,34 +2810,36 @@ PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) {
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(openat)(long res, long dfd, const void *filename, long flags,
- long mode) {}
+POST_SYSCALL(openat)
+(long res, long dfd, const void *filename, long flags, long mode) {}
-PRE_SYSCALL(newfstatat)(long dfd, const void *filename, void *statbuf,
- long flag) {
+PRE_SYSCALL(newfstatat)
+(long dfd, const void *filename, void *statbuf, long flag) {
if (filename)
PRE_READ(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(newfstatat)(long res, long dfd, const void *filename,
- void *statbuf, long flag) {
+POST_SYSCALL(newfstatat)
+(long res, long dfd, const void *filename, void *statbuf, long flag) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat_sz);
}
}
-PRE_SYSCALL(fstatat64)(long dfd, const void *filename, void *statbuf,
- long flag) {
+PRE_SYSCALL(fstatat64)
+(long dfd, const void *filename, void *statbuf, long flag) {
if (filename)
PRE_READ(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(fstatat64)(long res, long dfd, const void *filename, void *statbuf,
- long flag) {
+POST_SYSCALL(fstatat64)
+(long res, long dfd, const void *filename, void *statbuf, long flag) {
if (res >= 0) {
- if (statbuf) POST_WRITE(statbuf, struct_kernel_stat64_sz);
+ if (statbuf)
+ POST_WRITE(statbuf, struct_kernel_stat64_sz);
}
}
@@ -2609,25 +2848,26 @@ PRE_SYSCALL(readlinkat)(long dfd, const void *path, void *buf, long bufsiz) {
PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
}
-POST_SYSCALL(readlinkat)(long res, long dfd, const void *path, void *buf,
- long bufsiz) {
+POST_SYSCALL(readlinkat)
+(long res, long dfd, const void *path, void *buf, long bufsiz) {
if (res >= 0) {
if (buf)
POST_WRITE(buf, __sanitizer::internal_strlen((const char *)buf) + 1);
}
}
-PRE_SYSCALL(utimensat)(long dfd, const void *filename, void *utimes,
- long flags) {
+PRE_SYSCALL(utimensat)
+(long dfd, const void *filename, void *utimes, long flags) {
if (filename)
PRE_READ(filename,
__sanitizer::internal_strlen((const char *)filename) + 1);
}
-POST_SYSCALL(utimensat)(long res, long dfd, const void *filename, void *utimes,
- long flags) {
+POST_SYSCALL(utimensat)
+(long res, long dfd, const void *filename, void *utimes, long flags) {
if (res >= 0) {
- if (utimes) POST_WRITE(utimes, struct_timespec_sz);
+ if (utimes)
+ POST_WRITE(utimes, struct_timespec_sz);
}
}
@@ -2635,24 +2875,28 @@ PRE_SYSCALL(unshare)(long unshare_flags) {}
POST_SYSCALL(unshare)(long res, long unshare_flags) {}
-PRE_SYSCALL(splice)(long fd_in, void *off_in, long fd_out, void *off_out,
- long len, long flags) {}
+PRE_SYSCALL(splice)
+(long fd_in, void *off_in, long fd_out, void *off_out, long len, long flags) {}
-POST_SYSCALL(splice)(long res, long fd_in, void *off_in, long fd_out,
- void *off_out, long len, long flags) {
+POST_SYSCALL(splice)
+(long res, long fd_in, void *off_in, long fd_out, void *off_out, long len,
+ long flags) {
if (res >= 0) {
- if (off_in) POST_WRITE(off_in, sizeof(long long));
- if (off_out) POST_WRITE(off_out, sizeof(long long));
+ if (off_in)
+ POST_WRITE(off_in, sizeof(long long));
+ if (off_out)
+ POST_WRITE(off_out, sizeof(long long));
}
}
-PRE_SYSCALL(vmsplice)(long fd, const __sanitizer_iovec *iov, long nr_segs,
- long flags) {}
+PRE_SYSCALL(vmsplice)
+(long fd, const __sanitizer_iovec *iov, long nr_segs, long flags) {}
-POST_SYSCALL(vmsplice)(long res, long fd, const __sanitizer_iovec *iov,
- long nr_segs, long flags) {
+POST_SYSCALL(vmsplice)
+(long res, long fd, const __sanitizer_iovec *iov, long nr_segs, long flags) {
if (res >= 0) {
- if (iov) kernel_read_iovec(iov, nr_segs, res);
+ if (iov)
+ kernel_read_iovec(iov, nr_segs, res);
}
}
@@ -2662,8 +2906,8 @@ POST_SYSCALL(tee)(long res, long fdin, long fdout, long len, long flags) {}
PRE_SYSCALL(get_robust_list)(long pid, void *head_ptr, void *len_ptr) {}
-POST_SYSCALL(get_robust_list)(long res, long pid, void *head_ptr,
- void *len_ptr) {}
+POST_SYSCALL(get_robust_list)
+(long res, long pid, void *head_ptr, void *len_ptr) {}
PRE_SYSCALL(set_robust_list)(void *head, long len) {}
@@ -2673,27 +2917,31 @@ PRE_SYSCALL(getcpu)(void *cpu, void *node, void *cache) {}
POST_SYSCALL(getcpu)(long res, void *cpu, void *node, void *cache) {
if (res >= 0) {
- if (cpu) POST_WRITE(cpu, sizeof(unsigned));
- if (node) POST_WRITE(node, sizeof(unsigned));
+ if (cpu)
+ POST_WRITE(cpu, sizeof(unsigned));
+ if (node)
+ POST_WRITE(node, sizeof(unsigned));
// The third argument to this system call is nowadays unused.
}
}
PRE_SYSCALL(signalfd)(long ufd, void *user_mask, long sizemask) {}
-POST_SYSCALL(signalfd)(long res, long ufd, kernel_sigset_t *user_mask,
- long sizemask) {
+POST_SYSCALL(signalfd)
+(long res, long ufd, kernel_sigset_t *user_mask, long sizemask) {
if (res >= 0) {
- if (user_mask) POST_WRITE(user_mask, sizemask);
+ if (user_mask)
+ POST_WRITE(user_mask, sizemask);
}
}
PRE_SYSCALL(signalfd4)(long ufd, void *user_mask, long sizemask, long flags) {}
-POST_SYSCALL(signalfd4)(long res, long ufd, kernel_sigset_t *user_mask,
- long sizemask, long flags) {
+POST_SYSCALL(signalfd4)
+(long res, long ufd, kernel_sigset_t *user_mask, long sizemask, long flags) {
if (res >= 0) {
- if (user_mask) POST_WRITE(user_mask, sizemask);
+ if (user_mask)
+ POST_WRITE(user_mask, sizemask);
}
}
@@ -2701,15 +2949,17 @@ PRE_SYSCALL(timerfd_create)(long clockid, long flags) {}
POST_SYSCALL(timerfd_create)(long res, long clockid, long flags) {}
-PRE_SYSCALL(timerfd_settime)(long ufd, long flags, const void *utmr,
- void *otmr) {
- if (utmr) PRE_READ(utmr, struct_itimerspec_sz);
+PRE_SYSCALL(timerfd_settime)
+(long ufd, long flags, const void *utmr, void *otmr) {
+ if (utmr)
+ PRE_READ(utmr, struct_itimerspec_sz);
}
-POST_SYSCALL(timerfd_settime)(long res, long ufd, long flags, const void *utmr,
- void *otmr) {
+POST_SYSCALL(timerfd_settime)
+(long res, long ufd, long flags, const void *utmr, void *otmr) {
if (res >= 0) {
- if (otmr) POST_WRITE(otmr, struct_itimerspec_sz);
+ if (otmr)
+ POST_WRITE(otmr, struct_itimerspec_sz);
}
}
@@ -2717,7 +2967,8 @@ PRE_SYSCALL(timerfd_gettime)(long ufd, void *otmr) {}
POST_SYSCALL(timerfd_gettime)(long res, long ufd, void *otmr) {
if (res >= 0) {
- if (otmr) POST_WRITE(otmr, struct_itimerspec_sz);
+ if (otmr)
+ POST_WRITE(otmr, struct_itimerspec_sz);
}
}
@@ -2735,33 +2986,42 @@ POST_SYSCALL(old_readdir)(long res, long arg0, void *arg1, long arg2) {
// Missing definition of 'struct old_linux_dirent'.
}
-PRE_SYSCALL(pselect6)(long arg0, __sanitizer___kernel_fd_set *arg1,
- __sanitizer___kernel_fd_set *arg2,
- __sanitizer___kernel_fd_set *arg3, void *arg4,
- void *arg5) {}
+PRE_SYSCALL(pselect6)
+(long arg0, __sanitizer___kernel_fd_set *arg1,
+ __sanitizer___kernel_fd_set *arg2, __sanitizer___kernel_fd_set *arg3,
+ void *arg4, void *arg5) {}
-POST_SYSCALL(pselect6)(long res, long arg0, __sanitizer___kernel_fd_set *arg1,
- __sanitizer___kernel_fd_set *arg2,
- __sanitizer___kernel_fd_set *arg3, void *arg4,
- void *arg5) {
+POST_SYSCALL(pselect6)
+(long res, long arg0, __sanitizer___kernel_fd_set *arg1,
+ __sanitizer___kernel_fd_set *arg2, __sanitizer___kernel_fd_set *arg3,
+ void *arg4, void *arg5) {
if (res >= 0) {
- if (arg1) POST_WRITE(arg1, sizeof(*arg1));
- if (arg2) POST_WRITE(arg2, sizeof(*arg2));
- if (arg3) POST_WRITE(arg3, sizeof(*arg3));
- if (arg4) POST_WRITE(arg4, struct_timespec_sz);
+ if (arg1)
+ POST_WRITE(arg1, sizeof(*arg1));
+ if (arg2)
+ POST_WRITE(arg2, sizeof(*arg2));
+ if (arg3)
+ POST_WRITE(arg3, sizeof(*arg3));
+ if (arg4)
+ POST_WRITE(arg4, struct_timespec_sz);
}
}
-PRE_SYSCALL(ppoll)(__sanitizer_pollfd *arg0, long arg1, void *arg2,
- const kernel_sigset_t *arg3, long arg4) {
- if (arg3) PRE_READ(arg3, arg4);
+PRE_SYSCALL(ppoll)
+(__sanitizer_pollfd *arg0, long arg1, void *arg2, const kernel_sigset_t *arg3,
+ long arg4) {
+ if (arg3)
+ PRE_READ(arg3, arg4);
}
-POST_SYSCALL(ppoll)(long res, __sanitizer_pollfd *arg0, long arg1, void *arg2,
- const void *arg3, long arg4) {
+POST_SYSCALL(ppoll)
+(long res, __sanitizer_pollfd *arg0, long arg1, void *arg2, const void *arg3,
+ long arg4) {
if (res >= 0) {
- if (arg0) POST_WRITE(arg0, sizeof(*arg0));
- if (arg2) POST_WRITE(arg2, struct_timespec_sz);
+ if (arg0)
+ POST_WRITE(arg0, sizeof(*arg0));
+ if (arg2)
+ POST_WRITE(arg2, struct_timespec_sz);
}
}
@@ -2769,81 +3029,79 @@ PRE_SYSCALL(syncfs)(long fd) {}
POST_SYSCALL(syncfs)(long res, long fd) {}
-PRE_SYSCALL(perf_event_open)(__sanitizer_perf_event_attr *attr_uptr, long pid,
- long cpu, long group_fd, long flags) {
- if (attr_uptr) PRE_READ(attr_uptr, attr_uptr->size);
+PRE_SYSCALL(perf_event_open)
+(__sanitizer_perf_event_attr *attr_uptr, long pid, long cpu, long group_fd,
+ long flags) {
+ if (attr_uptr)
+ PRE_READ(attr_uptr, attr_uptr->size);
}
-POST_SYSCALL(perf_event_open)(long res, __sanitizer_perf_event_attr *attr_uptr,
- long pid, long cpu, long group_fd, long flags) {}
+POST_SYSCALL(perf_event_open)
+(long res, __sanitizer_perf_event_attr *attr_uptr, long pid, long cpu,
+ long group_fd, long flags) {}
-PRE_SYSCALL(mmap_pgoff)(long addr, long len, long prot, long flags, long fd,
- long pgoff) {}
+PRE_SYSCALL(mmap_pgoff)
+(long addr, long len, long prot, long flags, long fd, long pgoff) {}
-POST_SYSCALL(mmap_pgoff)(long res, long addr, long len, long prot, long flags,
- long fd, long pgoff) {}
+POST_SYSCALL(mmap_pgoff)
+(long res, long addr, long len, long prot, long flags, long fd, long pgoff) {}
PRE_SYSCALL(old_mmap)(void *arg) {}
POST_SYSCALL(old_mmap)(long res, void *arg) {}
-PRE_SYSCALL(name_to_handle_at)(long dfd, const void *name, void *handle,
- void *mnt_id, long flag) {}
+PRE_SYSCALL(name_to_handle_at)
+(long dfd, const void *name, void *handle, void *mnt_id, long flag) {}
-POST_SYSCALL(name_to_handle_at)(long res, long dfd, const void *name,
- void *handle, void *mnt_id, long flag) {}
+POST_SYSCALL(name_to_handle_at)
+(long res, long dfd, const void *name, void *handle, void *mnt_id, long flag) {}
PRE_SYSCALL(open_by_handle_at)(long mountdirfd, void *handle, long flags) {}
-POST_SYSCALL(open_by_handle_at)(long res, long mountdirfd, void *handle,
- long flags) {}
+POST_SYSCALL(open_by_handle_at)
+(long res, long mountdirfd, void *handle, long flags) {}
PRE_SYSCALL(setns)(long fd, long nstype) {}
POST_SYSCALL(setns)(long res, long fd, long nstype) {}
-PRE_SYSCALL(process_vm_readv)(long pid, const __sanitizer_iovec *lvec,
- long liovcnt, const void *rvec, long riovcnt,
- long flags) {}
+PRE_SYSCALL(process_vm_readv)
+(long pid, const __sanitizer_iovec *lvec, long liovcnt, const void *rvec,
+ long riovcnt, long flags) {}
-POST_SYSCALL(process_vm_readv)(long res, long pid,
- const __sanitizer_iovec *lvec, long liovcnt,
- const void *rvec, long riovcnt, long flags) {
+POST_SYSCALL(process_vm_readv)
+(long res, long pid, const __sanitizer_iovec *lvec, long liovcnt,
+ const void *rvec, long riovcnt, long flags) {
if (res >= 0) {
- if (lvec) kernel_write_iovec(lvec, liovcnt, res);
+ if (lvec)
+ kernel_write_iovec(lvec, liovcnt, res);
}
}
-PRE_SYSCALL(process_vm_writev)(long pid, const __sanitizer_iovec *lvec,
- long liovcnt, const void *rvec, long riovcnt,
- long flags) {}
+PRE_SYSCALL(process_vm_writev)
+(long pid, const __sanitizer_iovec *lvec, long liovcnt, const void *rvec,
+ long riovcnt, long flags) {}
-POST_SYSCALL(process_vm_writev)(long res, long pid,
- const __sanitizer_iovec *lvec, long liovcnt,
- const void *rvec, long riovcnt, long flags) {
+POST_SYSCALL(process_vm_writev)
+(long res, long pid, const __sanitizer_iovec *lvec, long liovcnt,
+ const void *rvec, long riovcnt, long flags) {
if (res >= 0) {
- if (lvec) kernel_read_iovec(lvec, liovcnt, res);
+ if (lvec)
+ kernel_read_iovec(lvec, liovcnt, res);
}
}
-PRE_SYSCALL(fork)() {
- COMMON_SYSCALL_PRE_FORK();
-}
+PRE_SYSCALL(fork)() { COMMON_SYSCALL_PRE_FORK(); }
-POST_SYSCALL(fork)(long res) {
- COMMON_SYSCALL_POST_FORK(res);
-}
+POST_SYSCALL(fork)(long res) { COMMON_SYSCALL_POST_FORK(res); }
-PRE_SYSCALL(vfork)() {
- COMMON_SYSCALL_PRE_FORK();
-}
+PRE_SYSCALL(vfork)() { COMMON_SYSCALL_PRE_FORK(); }
-POST_SYSCALL(vfork)(long res) {
- COMMON_SYSCALL_POST_FORK(res);
-}
+POST_SYSCALL(vfork)(long res) { COMMON_SYSCALL_POST_FORK(res); }
-PRE_SYSCALL(sigaction)(long signum, const __sanitizer_kernel_sigaction_t *act,
- __sanitizer_kernel_sigaction_t *oldact) {
+PRE_SYSCALL(sigaction)
+(long signum, const __sanitizer_kernel_sigaction_t *act,
+ __sanitizer_kernel_sigaction_t *oldact) {
if (act) {
PRE_READ(&act->sigaction, sizeof(act->sigaction));
PRE_READ(&act->sa_flags, sizeof(act->sa_flags));
@@ -2851,15 +3109,16 @@ PRE_SYSCALL(sigaction)(long signum, const __sanitizer_kernel_sigaction_t *act,
}
}
-POST_SYSCALL(sigaction)(long res, long signum,
- const __sanitizer_kernel_sigaction_t *act,
- __sanitizer_kernel_sigaction_t *oldact) {
- if (res >= 0 && oldact) POST_WRITE(oldact, sizeof(*oldact));
+POST_SYSCALL(sigaction)
+(long res, long signum, const __sanitizer_kernel_sigaction_t *act,
+ __sanitizer_kernel_sigaction_t *oldact) {
+ if (res >= 0 && oldact)
+ POST_WRITE(oldact, sizeof(*oldact));
}
-PRE_SYSCALL(rt_sigaction)(long signum,
- const __sanitizer_kernel_sigaction_t *act,
- __sanitizer_kernel_sigaction_t *oldact, SIZE_T sz) {
+PRE_SYSCALL(rt_sigaction)
+(long signum, const __sanitizer_kernel_sigaction_t *act,
+ __sanitizer_kernel_sigaction_t *oldact, SIZE_T sz) {
if (act) {
PRE_READ(&act->sigaction, sizeof(act->sigaction));
PRE_READ(&act->sa_flags, sizeof(act->sa_flags));
@@ -2867,9 +3126,9 @@ PRE_SYSCALL(rt_sigaction)(long signum,
}
}
-POST_SYSCALL(rt_sigaction)(long res, long signum,
- const __sanitizer_kernel_sigaction_t *act,
- __sanitizer_kernel_sigaction_t *oldact, SIZE_T sz) {
+POST_SYSCALL(rt_sigaction)
+(long res, long signum, const __sanitizer_kernel_sigaction_t *act,
+ __sanitizer_kernel_sigaction_t *oldact, SIZE_T sz) {
if (res >= 0 && oldact) {
SIZE_T oldact_sz = ((char *)&oldact->sa_mask) - ((char *)oldact) + sz;
POST_WRITE(oldact, oldact_sz);
@@ -2906,11 +3165,11 @@ POST_SYSCALL(sigaltstack)(long res, void *ss, void *oss) {
}
} // extern "C"
-#undef PRE_SYSCALL
-#undef PRE_READ
-#undef PRE_WRITE
-#undef POST_SYSCALL
-#undef POST_READ
-#undef POST_WRITE
+# undef PRE_SYSCALL
+# undef PRE_READ
+# undef PRE_WRITE
+# undef POST_SYSCALL
+# undef POST_READ
+# undef POST_WRITE
#endif // SANITIZER_LINUX
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_fuchsia.cpp
index a52db08433e3..1d0dbe592b93 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_fuchsia.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_fuchsia.cpp
@@ -51,6 +51,8 @@ constexpr const char kSancovSinkName[] = "sancov";
// This class relies on zero-initialization.
class TracePcGuardController final {
public:
+ constexpr TracePcGuardController() {}
+
// For each PC location being tracked, there is a u32 reserved in global
// data called the "guard". At startup, we assign each guard slot a
// unique index into the big results array. Later during runtime, the
@@ -87,7 +89,7 @@ class TracePcGuardController final {
}
void Dump() {
- BlockingMutexLock locked(&setup_lock_);
+ Lock locked(&setup_lock_);
if (array_) {
CHECK_NE(vmo_, ZX_HANDLE_INVALID);
@@ -114,7 +116,7 @@ class TracePcGuardController final {
// We can always spare the 32G of address space.
static constexpr size_t MappingSize = sizeof(uptr) << 32;
- BlockingMutex setup_lock_ = BlockingMutex(LINKER_INITIALIZED);
+ Mutex setup_lock_;
uptr *array_ = nullptr;
u32 next_index_ = 0;
zx_handle_t vmo_ = {};
@@ -123,7 +125,7 @@ class TracePcGuardController final {
size_t DataSize() const { return next_index_ * sizeof(uintptr_t); }
u32 Setup(u32 num_guards) {
- BlockingMutexLock locked(&setup_lock_);
+ Lock locked(&setup_lock_);
DCHECK(common_flags()->coverage);
if (next_index_ == 0) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp
index 73ebeb5fa14a..56220df2ac18 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp
@@ -73,7 +73,7 @@ static void SanitizerDumpCoverage(const uptr* unsorted_pcs, uptr len) {
if (!pc) continue;
if (!__sanitizer_get_module_and_offset_for_pc(pc, nullptr, 0, &pcs[i])) {
- Printf("ERROR: unknown pc 0x%x (may happen if dlclose is used)\n", pc);
+ Printf("ERROR: unknown pc 0x%zx (may happen if dlclose is used)\n", pc);
continue;
}
uptr module_base = pc - pcs[i];
@@ -151,6 +151,55 @@ class TracePcGuardController {
static TracePcGuardController pc_guard_controller;
+// A basic default implementation of callbacks for
+// -fsanitize-coverage=inline-8bit-counters,pc-table.
+// Use TOOL_OPTIONS (UBSAN_OPTIONS, etc) to dump the coverage data:
+// * cov_8bit_counters_out=PATH to dump the 8bit counters.
+// * cov_pcs_out=PATH to dump the pc table.
+//
+// Most users will still need to define their own callbacks for greater
+// flexibility.
+namespace SingletonCounterCoverage {
+
+static char *counters_beg, *counters_end;
+static const uptr *pcs_beg, *pcs_end;
+
+static void DumpCoverage() {
+ const char* file_path = common_flags()->cov_8bit_counters_out;
+ if (file_path && internal_strlen(file_path)) {
+ fd_t fd = OpenFile(file_path);
+ FileCloser file_closer(fd);
+ uptr size = counters_end - counters_beg;
+ WriteToFile(fd, counters_beg, size);
+ if (common_flags()->verbosity)
+ __sanitizer::Printf("cov_8bit_counters_out: written %zd bytes to %s\n",
+ size, file_path);
+ }
+ file_path = common_flags()->cov_pcs_out;
+ if (file_path && internal_strlen(file_path)) {
+ fd_t fd = OpenFile(file_path);
+ FileCloser file_closer(fd);
+ uptr size = (pcs_end - pcs_beg) * sizeof(uptr);
+ WriteToFile(fd, pcs_beg, size);
+ if (common_flags()->verbosity)
+ __sanitizer::Printf("cov_pcs_out: written %zd bytes to %s\n", size,
+ file_path);
+ }
+}
+
+static void Cov8bitCountersInit(char* beg, char* end) {
+ counters_beg = beg;
+ counters_end = end;
+ Atexit(DumpCoverage);
+}
+
+static void CovPcsInit(const uptr* beg, const uptr* end) {
+ pcs_beg = beg;
+ pcs_end = end;
+}
+
+} // namespace SingletonCounterCoverage
+
} // namespace
} // namespace __sancov
@@ -191,7 +240,9 @@ SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump() {
SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_reset() {
__sancov::pc_guard_controller.Reset();
}
-// Default empty implementations (weak). Users should redefine them.
+// Default implementations (weak).
+// Either empty or very simple.
+// Most users should redefine them.
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp1, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_cmp2, void) {}
@@ -206,9 +257,15 @@ SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div4, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_div8, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_gep, void) {}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_trace_pc_indir, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_8bit_counters_init, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_8bit_counters_init,
+ char* start, char* end) {
+ __sancov::SingletonCounterCoverage::Cov8bitCountersInit(start, end);
+}
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_bool_flag_init, void) {}
-SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, void) {}
+SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, const uptr* beg,
+ const uptr* end) {
+ __sancov::SingletonCounterCoverage::CovPcsInit(beg, end);
+}
} // extern "C"
// Weak definition for code instrumented with -fsanitize-coverage=stack-depth
// and later linked with code containing a strong definition.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h b/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h
index b80cff460eda..0749f633b4bc 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_deadlock_detector.h
@@ -293,7 +293,7 @@ class DeadlockDetector {
}
// Returns true iff dtls is empty (no locks are currently held) and we can
- // add the node to the currently held locks w/o chanding the global state.
+ // add the node to the currently held locks w/o changing the global state.
// This operation is thread-safe as it only touches the dtls.
bool onFirstLock(DeadlockDetectorTLS<BV> *dtls, uptr node, u32 stk = 0) {
if (!dtls->empty()) return false;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h
new file mode 100644
index 000000000000..3fa6af76ce29
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h
@@ -0,0 +1,678 @@
+//===- sanitizer_dense_map.h - Dense probed hash table ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is fork of llvm/ADT/DenseMap.h class with the following changes:
+// * Use mmap to allocate.
+// * No iterators.
+// * Does not shrink.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_DENSE_MAP_H
+#define SANITIZER_DENSE_MAP_H
+
+#include "sanitizer_common.h"
+#include "sanitizer_dense_map_info.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_type_traits.h"
+
+namespace __sanitizer {
+
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+ typename BucketT>
+class DenseMapBase {
+ public:
+ using size_type = unsigned;
+ using key_type = KeyT;
+ using mapped_type = ValueT;
+ using value_type = BucketT;
+
+ WARN_UNUSED_RESULT bool empty() const { return getNumEntries() == 0; }
+ unsigned size() const { return getNumEntries(); }
+
+ /// Grow the densemap so that it can contain at least \p NumEntries items
+ /// before resizing again.
+ void reserve(size_type NumEntries) {
+ auto NumBuckets = getMinBucketToReserveForEntries(NumEntries);
+ if (NumBuckets > getNumBuckets())
+ grow(NumBuckets);
+ }
+
+ void clear() {
+ if (getNumEntries() == 0 && getNumTombstones() == 0)
+ return;
+
+ const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+ if (__sanitizer::is_trivially_destructible<ValueT>::value) {
+ // Use a simpler loop when values don't need destruction.
+ for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P)
+ P->getFirst() = EmptyKey;
+ } else {
+ unsigned NumEntries = getNumEntries();
+ for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) {
+ if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) {
+ P->getSecond().~ValueT();
+ --NumEntries;
+ }
+ P->getFirst() = EmptyKey;
+ }
+ }
+ CHECK_EQ(NumEntries, 0);
+ }
+ setNumEntries(0);
+ setNumTombstones(0);
+ }
+
+ /// Return 1 if the specified key is in the map, 0 otherwise.
+ size_type count(const KeyT &Key) const {
+ const BucketT *TheBucket;
+ return LookupBucketFor(Key, TheBucket) ? 1 : 0;
+ }
+
+ value_type *find(const KeyT &Key) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return TheBucket;
+ return nullptr;
+ }
+ const value_type *find(const KeyT &Key) const {
+ const BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return TheBucket;
+ return nullptr;
+ }
+
+ /// Alternate version of find() which allows a different, and possibly
+ /// less expensive, key type.
+ /// The DenseMapInfo is responsible for supplying methods
+ /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key
+ /// type used.
+ template <class LookupKeyT>
+ value_type *find_as(const LookupKeyT &Key) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return TheBucket;
+ return nullptr;
+ }
+ template <class LookupKeyT>
+ const value_type *find_as(const LookupKeyT &Key) const {
+ const BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return TheBucket;
+ return nullptr;
+ }
+
+ /// lookup - Return the entry for the specified key, or a default
+ /// constructed value if no such entry exists.
+ ValueT lookup(const KeyT &Key) const {
+ const BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return TheBucket->getSecond();
+ return ValueT();
+ }
+
+ // Inserts key,value pair into the map if the key isn't already in the map.
+ // If the key is already in the map, it returns false and doesn't update the
+ // value.
+ detail::DenseMapPair<value_type *, bool> insert(const value_type &KV) {
+ return try_emplace(KV.first, KV.second);
+ }
+
+ // Inserts key,value pair into the map if the key isn't already in the map.
+ // If the key is already in the map, it returns false and doesn't update the
+ // value.
+ detail::DenseMapPair<value_type *, bool> insert(value_type &&KV) {
+ return try_emplace(__sanitizer::move(KV.first),
+ __sanitizer::move(KV.second));
+ }
+
+ // Inserts key,value pair into the map if the key isn't already in the map.
+ // The value is constructed in-place if the key is not in the map, otherwise
+ // it is not moved.
+ template <typename... Ts>
+ detail::DenseMapPair<value_type *, bool> try_emplace(KeyT &&Key,
+ Ts &&...Args) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return {TheBucket, false}; // Already in map.
+
+ // Otherwise, insert the new element.
+ TheBucket = InsertIntoBucket(TheBucket, __sanitizer::move(Key),
+ __sanitizer::forward<Ts>(Args)...);
+ return {TheBucket, true};
+ }
+
+ // Inserts key,value pair into the map if the key isn't already in the map.
+ // The value is constructed in-place if the key is not in the map, otherwise
+ // it is not moved.
+ template <typename... Ts>
+ detail::DenseMapPair<value_type *, bool> try_emplace(const KeyT &Key,
+ Ts &&...Args) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return {TheBucket, false}; // Already in map.
+
+ // Otherwise, insert the new element.
+ TheBucket =
+ InsertIntoBucket(TheBucket, Key, __sanitizer::forward<Ts>(Args)...);
+ return {TheBucket, true};
+ }
+
+ /// Alternate version of insert() which allows a different, and possibly
+ /// less expensive, key type.
+ /// The DenseMapInfo is responsible for supplying methods
+ /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key
+ /// type used.
+ template <typename LookupKeyT>
+ detail::DenseMapPair<value_type *, bool> insert_as(value_type &&KV,
+ const LookupKeyT &Val) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Val, TheBucket))
+ return {TheBucket, false}; // Already in map.
+
+ // Otherwise, insert the new element.
+ TheBucket =
+ InsertIntoBucketWithLookup(TheBucket, __sanitizer::move(KV.first),
+ __sanitizer::move(KV.second), Val);
+ return {TheBucket, true};
+ }
+
+ bool erase(const KeyT &Val) {
+ BucketT *TheBucket;
+ if (!LookupBucketFor(Val, TheBucket))
+ return false; // not in map.
+
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
+ return true;
+ }
+
+ void erase(value_type *I) {
+ CHECK_NE(I, nullptr);
+ BucketT *TheBucket = &*I;
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
+ }
+
+ value_type &FindAndConstruct(const KeyT &Key) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return *TheBucket;
+
+ return *InsertIntoBucket(TheBucket, Key);
+ }
+
+ ValueT &operator[](const KeyT &Key) { return FindAndConstruct(Key).second; }
+
+ value_type &FindAndConstruct(KeyT &&Key) {
+ BucketT *TheBucket;
+ if (LookupBucketFor(Key, TheBucket))
+ return *TheBucket;
+
+ return *InsertIntoBucket(TheBucket, __sanitizer::move(Key));
+ }
+
+ ValueT &operator[](KeyT &&Key) {
+ return FindAndConstruct(__sanitizer::move(Key)).second;
+ }
+
+ /// Equality comparison for DenseMap.
+ ///
+ /// Iterates over elements of LHS confirming that each (key, value) pair in
+ /// LHS is also in RHS, and that no additional pairs are in RHS. Equivalent to
+ /// N calls to RHS.find and N value comparisons. Amortized complexity is
+ /// linear, worst case is O(N^2) (if every hash collides).
+ bool operator==(const DenseMapBase &RHS) const {
+ if (size() != RHS.size())
+ return false;
+
+ const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+ for (auto *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) {
+ const KeyT K = P->getFirst();
+ if (!KeyInfoT::isEqual(K, EmptyKey) &&
+ !KeyInfoT::isEqual(K, TombstoneKey)) {
+ const auto *I = RHS.find(K);
+ if (!I || P->getSecond() != I->getSecond())
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ protected:
+ DenseMapBase() = default;
+
+ void destroyAll() {
+ if (getNumBuckets() == 0) // Nothing to do.
+ return;
+
+ const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+ for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) {
+ if (!KeyInfoT::isEqual(P->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(P->getFirst(), TombstoneKey))
+ P->getSecond().~ValueT();
+ P->getFirst().~KeyT();
+ }
+ }
+
+ void initEmpty() {
+ setNumEntries(0);
+ setNumTombstones(0);
+
+ CHECK_EQ((getNumBuckets() & (getNumBuckets() - 1)), 0);
+ const KeyT EmptyKey = getEmptyKey();
+ for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B)
+ ::new (&B->getFirst()) KeyT(EmptyKey);
+ }
+
+ /// Returns the number of buckets to allocate to ensure that the DenseMap can
+ /// accommodate \p NumEntries without need to grow().
+ unsigned getMinBucketToReserveForEntries(unsigned NumEntries) {
+ // Ensure that "NumEntries * 4 < NumBuckets * 3"
+ if (NumEntries == 0)
+ return 0;
+ // +1 is required because of the strict equality.
+ // For example if NumEntries is 48, we need to return 401.
+ return RoundUpToPowerOfTwo((NumEntries * 4 / 3 + 1) + /* NextPowerOf2 */ 1);
+ }
+
+ void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) {
+ initEmpty();
+
+ // Insert all the old elements.
+ const KeyT EmptyKey = getEmptyKey();
+ const KeyT TombstoneKey = getTombstoneKey();
+ for (BucketT *B = OldBucketsBegin, *E = OldBucketsEnd; B != E; ++B) {
+ if (!KeyInfoT::isEqual(B->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B->getFirst(), TombstoneKey)) {
+ // Insert the key/value into the new table.
+ BucketT *DestBucket;
+ bool FoundVal = LookupBucketFor(B->getFirst(), DestBucket);
+ (void)FoundVal; // silence warning.
+ CHECK(!FoundVal);
+ DestBucket->getFirst() = __sanitizer::move(B->getFirst());
+ ::new (&DestBucket->getSecond())
+ ValueT(__sanitizer::move(B->getSecond()));
+ incrementNumEntries();
+
+ // Free the value.
+ B->getSecond().~ValueT();
+ }
+ B->getFirst().~KeyT();
+ }
+ }
+
+ template <typename OtherBaseT>
+ void copyFrom(
+ const DenseMapBase<OtherBaseT, KeyT, ValueT, KeyInfoT, BucketT> &other) {
+ CHECK_NE(&other, this);
+ CHECK_EQ(getNumBuckets(), other.getNumBuckets());
+
+ setNumEntries(other.getNumEntries());
+ setNumTombstones(other.getNumTombstones());
+
+ if (__sanitizer::is_trivially_copyable<KeyT>::value &&
+ __sanitizer::is_trivially_copyable<ValueT>::value)
+ internal_memcpy(reinterpret_cast<void *>(getBuckets()),
+ other.getBuckets(), getNumBuckets() * sizeof(BucketT));
+ else
+ for (uptr i = 0; i < getNumBuckets(); ++i) {
+ ::new (&getBuckets()[i].getFirst())
+ KeyT(other.getBuckets()[i].getFirst());
+ if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) &&
+ !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey()))
+ ::new (&getBuckets()[i].getSecond())
+ ValueT(other.getBuckets()[i].getSecond());
+ }
+ }
+
+ static unsigned getHashValue(const KeyT &Val) {
+ return KeyInfoT::getHashValue(Val);
+ }
+
+ template <typename LookupKeyT>
+ static unsigned getHashValue(const LookupKeyT &Val) {
+ return KeyInfoT::getHashValue(Val);
+ }
+
+ static const KeyT getEmptyKey() { return KeyInfoT::getEmptyKey(); }
+
+ static const KeyT getTombstoneKey() { return KeyInfoT::getTombstoneKey(); }
+
+ private:
+ unsigned getNumEntries() const {
+ return static_cast<const DerivedT *>(this)->getNumEntries();
+ }
+
+ void setNumEntries(unsigned Num) {
+ static_cast<DerivedT *>(this)->setNumEntries(Num);
+ }
+
+ void incrementNumEntries() { setNumEntries(getNumEntries() + 1); }
+
+ void decrementNumEntries() { setNumEntries(getNumEntries() - 1); }
+
+ unsigned getNumTombstones() const {
+ return static_cast<const DerivedT *>(this)->getNumTombstones();
+ }
+
+ void setNumTombstones(unsigned Num) {
+ static_cast<DerivedT *>(this)->setNumTombstones(Num);
+ }
+
+ void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); }
+
+ void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); }
+
+ const BucketT *getBuckets() const {
+ return static_cast<const DerivedT *>(this)->getBuckets();
+ }
+
+ BucketT *getBuckets() { return static_cast<DerivedT *>(this)->getBuckets(); }
+
+ unsigned getNumBuckets() const {
+ return static_cast<const DerivedT *>(this)->getNumBuckets();
+ }
+
+ BucketT *getBucketsEnd() { return getBuckets() + getNumBuckets(); }
+
+ const BucketT *getBucketsEnd() const {
+ return getBuckets() + getNumBuckets();
+ }
+
+ void grow(unsigned AtLeast) { static_cast<DerivedT *>(this)->grow(AtLeast); }
+
+ template <typename KeyArg, typename... ValueArgs>
+ BucketT *InsertIntoBucket(BucketT *TheBucket, KeyArg &&Key,
+ ValueArgs &&...Values) {
+ TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket);
+
+ TheBucket->getFirst() = __sanitizer::forward<KeyArg>(Key);
+ ::new (&TheBucket->getSecond())
+ ValueT(__sanitizer::forward<ValueArgs>(Values)...);
+ return TheBucket;
+ }
+
+ template <typename LookupKeyT>
+ BucketT *InsertIntoBucketWithLookup(BucketT *TheBucket, KeyT &&Key,
+ ValueT &&Value, LookupKeyT &Lookup) {
+ TheBucket = InsertIntoBucketImpl(Key, Lookup, TheBucket);
+
+ TheBucket->getFirst() = __sanitizer::move(Key);
+ ::new (&TheBucket->getSecond()) ValueT(__sanitizer::move(Value));
+ return TheBucket;
+ }
+
+ template <typename LookupKeyT>
+ BucketT *InsertIntoBucketImpl(const KeyT &Key, const LookupKeyT &Lookup,
+ BucketT *TheBucket) {
+ // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+ // the buckets are empty (meaning that many are filled with tombstones),
+ // grow the table.
+ //
+ // The later case is tricky. For example, if we had one empty bucket with
+ // tons of tombstones, failing lookups (e.g. for insertion) would have to
+ // probe almost the entire table until it found the empty bucket. If the
+ // table completely filled with tombstones, no lookup would ever succeed,
+ // causing infinite loops in lookup.
+ unsigned NewNumEntries = getNumEntries() + 1;
+ unsigned NumBuckets = getNumBuckets();
+ if (UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
+ this->grow(NumBuckets * 2);
+ LookupBucketFor(Lookup, TheBucket);
+ NumBuckets = getNumBuckets();
+ } else if (UNLIKELY(NumBuckets - (NewNumEntries + getNumTombstones()) <=
+ NumBuckets / 8)) {
+ this->grow(NumBuckets);
+ LookupBucketFor(Lookup, TheBucket);
+ }
+ CHECK(TheBucket);
+
+ // Only update the state after we've grown our bucket space appropriately
+ // so that when growing buckets we have self-consistent entry count.
+ incrementNumEntries();
+
+ // If we are writing over a tombstone, remember this.
+ const KeyT EmptyKey = getEmptyKey();
+ if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey))
+ decrementNumTombstones();
+
+ return TheBucket;
+ }
+
+ /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in
+ /// FoundBucket. If the bucket contains the key and a value, this returns
+ /// true, otherwise it returns a bucket with an empty marker or tombstone and
+ /// returns false.
+ template <typename LookupKeyT>
+ bool LookupBucketFor(const LookupKeyT &Val,
+ const BucketT *&FoundBucket) const {
+ const BucketT *BucketsPtr = getBuckets();
+ const unsigned NumBuckets = getNumBuckets();
+
+ if (NumBuckets == 0) {
+ FoundBucket = nullptr;
+ return false;
+ }
+
+ // FoundTombstone - Keep track of whether we find a tombstone while probing.
+ const BucketT *FoundTombstone = nullptr;
+ const KeyT EmptyKey = getEmptyKey();
+ const KeyT TombstoneKey = getTombstoneKey();
+ CHECK(!KeyInfoT::isEqual(Val, EmptyKey));
+ CHECK(!KeyInfoT::isEqual(Val, TombstoneKey));
+
+ unsigned BucketNo = getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
+ while (true) {
+ const BucketT *ThisBucket = BucketsPtr + BucketNo;
+ // Found Val's bucket? If so, return it.
+ if (LIKELY(KeyInfoT::isEqual(Val, ThisBucket->getFirst()))) {
+ FoundBucket = ThisBucket;
+ return true;
+ }
+
+ // If we found an empty bucket, the key doesn't exist in the set.
+ // Insert it and return the default value.
+ if (LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) {
+ // If we've already seen a tombstone while probing, fill it in instead
+ // of the empty bucket we eventually probed to.
+ FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
+ return false;
+ }
+
+ // If this is a tombstone, remember it. If Val ends up not in the map, we
+ // prefer to return it than something that would require more probing.
+ if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) &&
+ !FoundTombstone)
+ FoundTombstone = ThisBucket; // Remember the first tombstone found.
+
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= (NumBuckets - 1);
+ }
+ }
+
+ template <typename LookupKeyT>
+ bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
+ const BucketT *ConstFoundBucket;
+ bool Result = const_cast<const DenseMapBase *>(this)->LookupBucketFor(
+ Val, ConstFoundBucket);
+ FoundBucket = const_cast<BucketT *>(ConstFoundBucket);
+ return Result;
+ }
+
+ public:
+ /// Return the approximate size (in bytes) of the actual map.
+ /// This is just the raw memory used by DenseMap.
+ /// If entries are pointers to objects, the size of the referenced objects
+ /// are not included.
+ uptr getMemorySize() const {
+ return RoundUpTo(getNumBuckets() * sizeof(BucketT), GetPageSizeCached());
+ }
+};
+
+/// Inequality comparison for DenseMap.
+///
+/// Equivalent to !(LHS == RHS). See operator== for performance notes.
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+ typename BucketT>
+bool operator!=(
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS,
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) {
+ return !(LHS == RHS);
+}
+
+template <typename KeyT, typename ValueT,
+ typename KeyInfoT = DenseMapInfo<KeyT>,
+ typename BucketT = detail::DenseMapPair<KeyT, ValueT>>
+class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
+ KeyT, ValueT, KeyInfoT, BucketT> {
+ friend class DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>;
+
+ // Lift some types from the dependent base class into this class for
+ // simplicity of referring to them.
+ using BaseT = DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT, BucketT>;
+
+ BucketT *Buckets = nullptr;
+ unsigned NumEntries = 0;
+ unsigned NumTombstones = 0;
+ unsigned NumBuckets = 0;
+
+ public:
+ /// Create a DenseMap with an optional \p InitialReserve that guarantee that
+ /// this number of elements can be inserted in the map without grow()
+ explicit DenseMap(unsigned InitialReserve) { init(InitialReserve); }
+ constexpr DenseMap() = default;
+
+ DenseMap(const DenseMap &other) : BaseT() {
+ init(0);
+ copyFrom(other);
+ }
+
+ DenseMap(DenseMap &&other) : BaseT() {
+ init(0);
+ swap(other);
+ }
+
+ ~DenseMap() {
+ this->destroyAll();
+ deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets);
+ }
+
+ void swap(DenseMap &RHS) {
+ Swap(Buckets, RHS.Buckets);
+ Swap(NumEntries, RHS.NumEntries);
+ Swap(NumTombstones, RHS.NumTombstones);
+ Swap(NumBuckets, RHS.NumBuckets);
+ }
+
+ DenseMap &operator=(const DenseMap &other) {
+ if (&other != this)
+ copyFrom(other);
+ return *this;
+ }
+
+ DenseMap &operator=(DenseMap &&other) {
+ this->destroyAll();
+ deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets, alignof(BucketT));
+ init(0);
+ swap(other);
+ return *this;
+ }
+
+ void copyFrom(const DenseMap &other) {
+ this->destroyAll();
+ deallocate_buffer(Buckets, sizeof(BucketT) * NumBuckets);
+ if (allocateBuckets(other.NumBuckets)) {
+ this->BaseT::copyFrom(other);
+ } else {
+ NumEntries = 0;
+ NumTombstones = 0;
+ }
+ }
+
+ void init(unsigned InitNumEntries) {
+ auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries);
+ if (allocateBuckets(InitBuckets)) {
+ this->BaseT::initEmpty();
+ } else {
+ NumEntries = 0;
+ NumTombstones = 0;
+ }
+ }
+
+ void grow(unsigned AtLeast) {
+ unsigned OldNumBuckets = NumBuckets;
+ BucketT *OldBuckets = Buckets;
+
+ allocateBuckets(RoundUpToPowerOfTwo(Max<unsigned>(64, AtLeast)));
+ CHECK(Buckets);
+ if (!OldBuckets) {
+ this->BaseT::initEmpty();
+ return;
+ }
+
+ this->moveFromOldBuckets(OldBuckets, OldBuckets + OldNumBuckets);
+
+ // Free the old table.
+ deallocate_buffer(OldBuckets, sizeof(BucketT) * OldNumBuckets);
+ }
+
+ private:
+ unsigned getNumEntries() const { return NumEntries; }
+
+ void setNumEntries(unsigned Num) { NumEntries = Num; }
+
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
+ BucketT *getBuckets() const { return Buckets; }
+
+ unsigned getNumBuckets() const { return NumBuckets; }
+
+ bool allocateBuckets(unsigned Num) {
+ NumBuckets = Num;
+ if (NumBuckets == 0) {
+ Buckets = nullptr;
+ return false;
+ }
+
+ uptr Size = sizeof(BucketT) * NumBuckets;
+ if (Size * 2 <= GetPageSizeCached()) {
+ // We always allocate at least a page, so use entire space.
+ unsigned Log2 = MostSignificantSetBitIndex(GetPageSizeCached() / Size);
+ Size <<= Log2;
+ NumBuckets <<= Log2;
+ CHECK_EQ(Size, sizeof(BucketT) * NumBuckets);
+ CHECK_GT(Size * 2, GetPageSizeCached());
+ }
+ Buckets = static_cast<BucketT *>(allocate_buffer(Size));
+ return true;
+ }
+
+ static void *allocate_buffer(uptr Size) {
+ return MmapOrDie(RoundUpTo(Size, GetPageSizeCached()), "DenseMap");
+ }
+
+ static void deallocate_buffer(void *Ptr, uptr Size) {
+ UnmapOrDie(Ptr, RoundUpTo(Size, GetPageSizeCached()));
+ }
+};
+
+} // namespace __sanitizer
+
+#endif // SANITIZER_DENSE_MAP_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h
new file mode 100644
index 000000000000..85c6427906c1
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h
@@ -0,0 +1,260 @@
+//===- sanitizer_dense_map_info.h - Type traits for DenseMap ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_DENSE_MAP_INFO_H
+#define SANITIZER_DENSE_MAP_INFO_H
+
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_type_traits.h"
+
+namespace __sanitizer {
+
+namespace detail {
+
+/// Simplistic combination of 32-bit hash values into 32-bit hash values.
+static inline unsigned combineHashValue(unsigned a, unsigned b) {
+ u64 key = (u64)a << 32 | (u64)b;
+ key += ~(key << 32);
+ key ^= (key >> 22);
+ key += ~(key << 13);
+ key ^= (key >> 8);
+ key += (key << 3);
+ key ^= (key >> 15);
+ key += ~(key << 27);
+ key ^= (key >> 31);
+ return (unsigned)key;
+}
+
+// We extend a pair to allow users to override the bucket type with their own
+// implementation without requiring two members.
+template <typename KeyT, typename ValueT>
+struct DenseMapPair {
+ KeyT first = {};
+ ValueT second = {};
+ DenseMapPair() = default;
+ DenseMapPair(const KeyT &f, const ValueT &s) : first(f), second(s) {}
+
+ template <typename KeyT2, typename ValueT2>
+ DenseMapPair(KeyT2 &&f, ValueT2 &&s)
+ : first(__sanitizer::forward<KeyT2>(f)),
+ second(__sanitizer::forward<ValueT2>(s)) {}
+
+ DenseMapPair(const DenseMapPair &other) = default;
+ DenseMapPair &operator=(const DenseMapPair &other) = default;
+ DenseMapPair(DenseMapPair &&other) = default;
+ DenseMapPair &operator=(DenseMapPair &&other) = default;
+
+ KeyT &getFirst() { return first; }
+ const KeyT &getFirst() const { return first; }
+ ValueT &getSecond() { return second; }
+ const ValueT &getSecond() const { return second; }
+};
+
+} // end namespace detail
+
+template <typename T>
+struct DenseMapInfo {
+ // static inline T getEmptyKey();
+ // static inline T getTombstoneKey();
+ // static unsigned getHashValue(const T &Val);
+ // static bool isEqual(const T &LHS, const T &RHS);
+};
+
+// Provide DenseMapInfo for all pointers. Come up with sentinel pointer values
+// that are aligned to alignof(T) bytes, but try to avoid requiring T to be
+// complete. This allows clients to instantiate DenseMap<T*, ...> with forward
+// declared key types. Assume that no pointer key type requires more than 4096
+// bytes of alignment.
+template <typename T>
+struct DenseMapInfo<T *> {
+ // The following should hold, but it would require T to be complete:
+ // static_assert(alignof(T) <= (1 << Log2MaxAlign),
+ // "DenseMap does not support pointer keys requiring more than "
+ // "Log2MaxAlign bits of alignment");
+ static constexpr uptr Log2MaxAlign = 12;
+
+ static inline T *getEmptyKey() {
+ uptr Val = static_cast<uptr>(-1);
+ Val <<= Log2MaxAlign;
+ return reinterpret_cast<T *>(Val);
+ }
+
+ static inline T *getTombstoneKey() {
+ uptr Val = static_cast<uptr>(-2);
+ Val <<= Log2MaxAlign;
+ return reinterpret_cast<T *>(Val);
+ }
+
+ static unsigned getHashValue(const T *PtrVal) {
+ return (unsigned((uptr)PtrVal) >> 4) ^ (unsigned((uptr)PtrVal) >> 9);
+ }
+
+ static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
+};
+
+// Provide DenseMapInfo for chars.
+template <>
+struct DenseMapInfo<char> {
+ static inline char getEmptyKey() { return ~0; }
+ static inline char getTombstoneKey() { return ~0 - 1; }
+ static unsigned getHashValue(const char &Val) { return Val * 37U; }
+
+ static bool isEqual(const char &LHS, const char &RHS) { return LHS == RHS; }
+};
+
+// Provide DenseMapInfo for unsigned chars.
+template <>
+struct DenseMapInfo<unsigned char> {
+ static inline unsigned char getEmptyKey() { return ~0; }
+ static inline unsigned char getTombstoneKey() { return ~0 - 1; }
+ static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; }
+
+ static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide DenseMapInfo for unsigned shorts.
+template <>
+struct DenseMapInfo<unsigned short> {
+ static inline unsigned short getEmptyKey() { return 0xFFFF; }
+ static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; }
+ static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; }
+
+ static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide DenseMapInfo for unsigned ints.
+template <>
+struct DenseMapInfo<unsigned> {
+ static inline unsigned getEmptyKey() { return ~0U; }
+ static inline unsigned getTombstoneKey() { return ~0U - 1; }
+ static unsigned getHashValue(const unsigned &Val) { return Val * 37U; }
+
+ static bool isEqual(const unsigned &LHS, const unsigned &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide DenseMapInfo for unsigned longs.
+template <>
+struct DenseMapInfo<unsigned long> {
+ static inline unsigned long getEmptyKey() { return ~0UL; }
+ static inline unsigned long getTombstoneKey() { return ~0UL - 1L; }
+
+ static unsigned getHashValue(const unsigned long &Val) {
+ return (unsigned)(Val * 37UL);
+ }
+
+ static bool isEqual(const unsigned long &LHS, const unsigned long &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide DenseMapInfo for unsigned long longs.
+template <>
+struct DenseMapInfo<unsigned long long> {
+ static inline unsigned long long getEmptyKey() { return ~0ULL; }
+ static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
+
+ static unsigned getHashValue(const unsigned long long &Val) {
+ return (unsigned)(Val * 37ULL);
+ }
+
+ static bool isEqual(const unsigned long long &LHS,
+ const unsigned long long &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide DenseMapInfo for shorts.
+template <>
+struct DenseMapInfo<short> {
+ static inline short getEmptyKey() { return 0x7FFF; }
+ static inline short getTombstoneKey() { return -0x7FFF - 1; }
+ static unsigned getHashValue(const short &Val) { return Val * 37U; }
+ static bool isEqual(const short &LHS, const short &RHS) { return LHS == RHS; }
+};
+
+// Provide DenseMapInfo for ints.
+template <>
+struct DenseMapInfo<int> {
+ static inline int getEmptyKey() { return 0x7fffffff; }
+ static inline int getTombstoneKey() { return -0x7fffffff - 1; }
+ static unsigned getHashValue(const int &Val) { return (unsigned)(Val * 37U); }
+
+ static bool isEqual(const int &LHS, const int &RHS) { return LHS == RHS; }
+};
+
+// Provide DenseMapInfo for longs.
+template <>
+struct DenseMapInfo<long> {
+ static inline long getEmptyKey() {
+ return (1UL << (sizeof(long) * 8 - 1)) - 1UL;
+ }
+
+ static inline long getTombstoneKey() { return getEmptyKey() - 1L; }
+
+ static unsigned getHashValue(const long &Val) {
+ return (unsigned)(Val * 37UL);
+ }
+
+ static bool isEqual(const long &LHS, const long &RHS) { return LHS == RHS; }
+};
+
+// Provide DenseMapInfo for long longs.
+template <>
+struct DenseMapInfo<long long> {
+ static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
+ static inline long long getTombstoneKey() {
+ return -0x7fffffffffffffffLL - 1;
+ }
+
+ static unsigned getHashValue(const long long &Val) {
+ return (unsigned)(Val * 37ULL);
+ }
+
+ static bool isEqual(const long long &LHS, const long long &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Provide DenseMapInfo for all pairs whose members have info.
+template <typename T, typename U>
+struct DenseMapInfo<detail::DenseMapPair<T, U>> {
+ using Pair = detail::DenseMapPair<T, U>;
+ using FirstInfo = DenseMapInfo<T>;
+ using SecondInfo = DenseMapInfo<U>;
+
+ static inline Pair getEmptyKey() {
+ return detail::DenseMapPair<T, U>(FirstInfo::getEmptyKey(),
+ SecondInfo::getEmptyKey());
+ }
+
+ static inline Pair getTombstoneKey() {
+ return detail::DenseMapPair<T, U>(FirstInfo::getTombstoneKey(),
+ SecondInfo::getTombstoneKey());
+ }
+
+ static unsigned getHashValue(const Pair &PairVal) {
+ return detail::combineHashValue(FirstInfo::getHashValue(PairVal.first),
+ SecondInfo::getHashValue(PairVal.second));
+ }
+
+ static bool isEqual(const Pair &LHS, const Pair &RHS) {
+ return FirstInfo::isEqual(LHS.first, RHS.first) &&
+ SecondInfo::isEqual(LHS.second, RHS.second);
+ }
+};
+
+} // namespace __sanitizer
+
+#endif // SANITIZER_DENSE_MAP_INFO_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp
index 0b92dccde4a1..5492560df914 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.cpp
@@ -75,6 +75,20 @@ void ReportFile::ReopenIfNecessary() {
fd_pid = pid;
}
+static void RecursiveCreateParentDirs(char *path) {
+ if (path[0] == '\0')
+ return;
+ for (int i = 1; path[i] != '\0'; ++i) {
+ char save = path[i];
+ if (!IsPathSeparator(path[i]))
+ continue;
+ path[i] = '\0';
+ /* Some of these will fail, because the directory exists, ignore it. */
+ CreateDir(path);
+ path[i] = save;
+ }
+}
+
void ReportFile::SetReportPath(const char *path) {
if (path) {
uptr len = internal_strlen(path);
@@ -95,6 +109,7 @@ void ReportFile::SetReportPath(const char *path) {
fd = kStdoutFd;
} else {
internal_snprintf(path_prefix, kMaxPathLength, "%s", path);
+ RecursiveCreateParentDirs(path_prefix);
}
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_file.h b/compiler-rt/lib/sanitizer_common/sanitizer_file.h
index 08671ab67d0f..3d7916171c1e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_file.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_file.h
@@ -81,6 +81,8 @@ bool FileExists(const char *filename);
char *FindPathToBinary(const char *name);
bool IsPathSeparator(const char c);
bool IsAbsolutePath(const char *path);
+// Returns true on success, false on failure.
+bool CreateDir(const char *pathname);
// Starts a subprocess and returs its pid.
// If *_fd parameters are not kInvalidFd their corresponding input/output
// streams will be redirect to the file. The files will always be closed
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h b/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h
index acc71ccd89ee..3ccc6a6fa537 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h
@@ -138,7 +138,7 @@ inline bool FlagHandler<uptr>::Parse(const char *value) {
template <>
inline bool FlagHandler<uptr>::Format(char *buffer, uptr size) {
- uptr num_symbols_should_write = internal_snprintf(buffer, size, "%p", *t_);
+ uptr num_symbols_should_write = internal_snprintf(buffer, size, "0x%zx", *t_);
return num_symbols_should_write < size;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index 3bc44c6b1eb1..95da82b1a1da 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -160,6 +160,10 @@ COMMON_FLAG(
COMMON_FLAG(const char *, coverage_dir, ".",
"Target directory for coverage dumps. Defaults to the current "
"directory.")
+COMMON_FLAG(const char *, cov_8bit_counters_out, "",
+ "If non-empty, write 8bit counters to this file. ")
+COMMON_FLAG(const char *, cov_pcs_out, "",
+ "If non-empty, write the coverage pc table to this file. ")
COMMON_FLAG(bool, full_address_space, false,
"Sanitize complete address space; "
"by default kernel area on 32-bit platforms will not be sanitized")
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h
new file mode 100644
index 000000000000..05fb554d20c1
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h
@@ -0,0 +1,173 @@
+//===-- sanitizer_flat_map.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the Sanitizer Allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_FLAT_MAP_H
+#define SANITIZER_FLAT_MAP_H
+
+#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_local_address_space_view.h"
+#include "sanitizer_mutex.h"
+
+namespace __sanitizer {
+
+// Call these callbacks on mmap/munmap.
+struct NoOpMapUnmapCallback {
+ void OnMap(uptr p, uptr size) const {}
+ void OnUnmap(uptr p, uptr size) const {}
+};
+
+// Maps integers in rage [0, kSize) to values.
+template <typename T, u64 kSize,
+ typename AddressSpaceViewTy = LocalAddressSpaceView>
+class FlatMap {
+ public:
+ using AddressSpaceView = AddressSpaceViewTy;
+ void Init() { internal_memset(map_, 0, sizeof(map_)); }
+
+ constexpr uptr size() const { return kSize; }
+
+ bool contains(uptr idx) const {
+ CHECK_LT(idx, kSize);
+ return true;
+ }
+
+ T &operator[](uptr idx) {
+ DCHECK_LT(idx, kSize);
+ return map_[idx];
+ }
+
+ const T &operator[](uptr idx) const {
+ DCHECK_LT(idx, kSize);
+ return map_[idx];
+ }
+
+ private:
+ T map_[kSize];
+};
+
+// TwoLevelMap maps integers in range [0, kSize1*kSize2) to values.
+// It is implemented as a two-dimensional array: array of kSize1 pointers
+// to kSize2-byte arrays. The secondary arrays are mmaped on demand.
+// Each value is initially zero and can be set to something else only once.
+// Setting and getting values from multiple threads is safe w/o extra locking.
+template <typename T, u64 kSize1, u64 kSize2,
+ typename AddressSpaceViewTy = LocalAddressSpaceView,
+ class MapUnmapCallback = NoOpMapUnmapCallback>
+class TwoLevelMap {
+ static_assert(IsPowerOfTwo(kSize2), "Use a power of two for performance.");
+
+ public:
+ using AddressSpaceView = AddressSpaceViewTy;
+ void Init() {
+ mu_.Init();
+ internal_memset(map1_, 0, sizeof(map1_));
+ }
+
+ void TestOnlyUnmap() {
+ for (uptr i = 0; i < kSize1; i++) {
+ T *p = Get(i);
+ if (!p)
+ continue;
+ MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), MmapSize());
+ UnmapOrDie(p, kSize2);
+ }
+ Init();
+ }
+
+ uptr MemoryUsage() const {
+ uptr res = 0;
+ for (uptr i = 0; i < kSize1; i++) {
+ T *p = Get(i);
+ if (!p)
+ continue;
+ res += MmapSize();
+ }
+ return res;
+ }
+
+ constexpr uptr size() const { return kSize1 * kSize2; }
+ constexpr uptr size1() const { return kSize1; }
+ constexpr uptr size2() const { return kSize2; }
+
+ bool contains(uptr idx) const {
+ CHECK_LT(idx, kSize1 * kSize2);
+ return Get(idx / kSize2);
+ }
+
+ const T &operator[](uptr idx) const {
+ DCHECK_LT(idx, kSize1 * kSize2);
+ T *map2 = GetOrCreate(idx / kSize2);
+ return *AddressSpaceView::Load(&map2[idx % kSize2]);
+ }
+
+ T &operator[](uptr idx) {
+ DCHECK_LT(idx, kSize1 * kSize2);
+ T *map2 = GetOrCreate(idx / kSize2);
+ return *AddressSpaceView::LoadWritable(&map2[idx % kSize2]);
+ }
+
+ private:
+ constexpr uptr MmapSize() const {
+ return RoundUpTo(kSize2 * sizeof(T), GetPageSizeCached());
+ }
+
+ T *Get(uptr idx) const {
+ DCHECK_LT(idx, kSize1);
+ return reinterpret_cast<T *>(
+ atomic_load(&map1_[idx], memory_order_acquire));
+ }
+
+ T *GetOrCreate(uptr idx) const {
+ DCHECK_LT(idx, kSize1);
+ // This code needs to use memory_order_acquire/consume, but we use
+ // memory_order_relaxed for performance reasons (matters for arm64). We
+ // expect memory_order_relaxed to be effectively equivalent to
+ // memory_order_consume in this case for all relevant architectures: all
+ // dependent data is reachable only by dereferencing the resulting pointer.
+ // If relaxed load fails to see stored ptr, the code will fall back to
+ // Create() and reload the value again with locked mutex as a memory
+ // barrier.
+ T *res = reinterpret_cast<T *>(atomic_load_relaxed(&map1_[idx]));
+ if (LIKELY(res))
+ return res;
+ return Create(idx);
+ }
+
+ NOINLINE T *Create(uptr idx) const {
+ SpinMutexLock l(&mu_);
+ T *res = Get(idx);
+ if (!res) {
+ res = reinterpret_cast<T *>(MmapOrDie(MmapSize(), "TwoLevelMap"));
+ MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
+ atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
+ memory_order_release);
+ }
+ return res;
+ }
+
+ mutable StaticSpinMutex mu_;
+ mutable atomic_uintptr_t map1_[kSize1];
+};
+
+template <u64 kSize, typename AddressSpaceViewTy = LocalAddressSpaceView>
+using FlatByteMap = FlatMap<u8, kSize, AddressSpaceViewTy>;
+
+template <u64 kSize1, u64 kSize2,
+ typename AddressSpaceViewTy = LocalAddressSpaceView,
+ class MapUnmapCallback = NoOpMapUnmapCallback>
+using TwoLevelByteMap =
+ TwoLevelMap<u8, kSize1, kSize2, AddressSpaceViewTy, MapUnmapCallback>;
+} // namespace __sanitizer
+
+#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
index 65bc398656c9..c7b30d988365 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
@@ -112,47 +112,6 @@ void FutexWake(atomic_uint32_t *p, u32 count) {
CHECK_EQ(status, ZX_OK);
}
-enum MutexState : int { MtxUnlocked = 0, MtxLocked = 1, MtxSleeping = 2 };
-
-BlockingMutex::BlockingMutex() {
- // NOTE! It's important that this use internal_memset, because plain
- // memset might be intercepted (e.g., actually be __asan_memset).
- // Defining this so the compiler initializes each field, e.g.:
- // BlockingMutex::BlockingMutex() : BlockingMutex(LINKER_INITIALIZED) {}
- // might result in the compiler generating a call to memset, which would
- // have the same problem.
- internal_memset(this, 0, sizeof(*this));
-}
-
-void BlockingMutex::Lock() {
- CHECK_EQ(owner_, 0);
- atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
- if (atomic_exchange(m, MtxLocked, memory_order_acquire) == MtxUnlocked)
- return;
- while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) {
- zx_status_t status =
- _zx_futex_wait(reinterpret_cast<zx_futex_t *>(m), MtxSleeping,
- ZX_HANDLE_INVALID, ZX_TIME_INFINITE);
- if (status != ZX_ERR_BAD_STATE) // Normal race.
- CHECK_EQ(status, ZX_OK);
- }
-}
-
-void BlockingMutex::Unlock() {
- atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
- u32 v = atomic_exchange(m, MtxUnlocked, memory_order_release);
- CHECK_NE(v, MtxUnlocked);
- if (v == MtxSleeping) {
- zx_status_t status = _zx_futex_wake(reinterpret_cast<zx_futex_t *>(m), 1);
- CHECK_EQ(status, ZX_OK);
- }
-}
-
-void BlockingMutex::CheckLocked() const {
- auto m = reinterpret_cast<atomic_uint32_t const *>(&opaque_storage_);
- CHECK_NE(MtxUnlocked, atomic_load(m, memory_order_relaxed));
-}
-
uptr GetPageSize() { return _zx_system_get_page_size(); }
uptr GetMmapGranularity() { return _zx_system_get_page_size(); }
@@ -413,7 +372,7 @@ bool IsAccessibleMemoryRange(uptr beg, uptr size) {
}
// FIXME implement on this platform.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) {}
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}
bool ReadFileToBuffer(const char *file_name, char **buff, uptr *buff_size,
uptr *read_len, uptr max_len, error_t *errno_p) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_hash.h b/compiler-rt/lib/sanitizer_common/sanitizer_hash.h
index 3d97dcc5d280..f7cf9f234e6f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_hash.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_hash.h
@@ -38,6 +38,30 @@ class MurMur2HashBuilder {
return x;
}
};
+
+class MurMur2Hash64Builder {
+ static const u64 m = 0xc6a4a7935bd1e995ull;
+ static const u64 seed = 0x9747b28c9747b28cull;
+ static const u64 r = 47;
+ u64 h;
+
+ public:
+ explicit MurMur2Hash64Builder(u64 init = 0) { h = seed ^ (init * m); }
+ void add(u64 k) {
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+ h ^= k;
+ h *= m;
+ }
+ u64 get() {
+ u64 x = h;
+ x ^= x >> r;
+ x *= m;
+ x ^= x >> r;
+ return x;
+ }
+};
} //namespace __sanitizer
#endif // SANITIZER_HASH_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc b/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc
index 576807ea3a6a..9683b97ab91d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc
@@ -1406,7 +1406,7 @@ static void ioctl_table_fill() {
_(URIO_SEND_COMMAND, READWRITE, struct_urio_command_sz);
_(URIO_RECV_COMMAND, READWRITE, struct_urio_command_sz);
#undef _
-} // NOLINT
+}
static bool ioctl_initialized = false;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h
index 0b001c1c4830..1600d31c30c0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_interface_internal.h
@@ -111,12 +111,13 @@ extern "C" {
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
void __sanitizer_cov_trace_pc_guard_init(__sanitizer::u32*,
__sanitizer::u32*);
- SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
- void __sanitizer_cov_8bit_counters_init();
+ SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
+ __sanitizer_cov_8bit_counters_init(char *, char *);
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
__sanitizer_cov_bool_flag_init();
SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
- __sanitizer_cov_pcs_init();
+ __sanitizer_cov_pcs_init(const __sanitizer::uptr *,
+ const __sanitizer::uptr *);
} // extern "C"
#endif // SANITIZER_INTERFACE_INTERNAL_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index 84053fec2649..d0db0129d4af 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -125,6 +125,10 @@
# define __has_attribute(x) 0
#endif
+#if !defined(__has_cpp_attribute)
+# define __has_cpp_attribute(x) 0
+#endif
+
// For portability reasons we do not include stddef.h, stdint.h or any other
// system header, but we do need some basic types that are not defined
// in a portable way by the language itself.
@@ -135,8 +139,13 @@ namespace __sanitizer {
typedef unsigned long long uptr;
typedef signed long long sptr;
#else
+# if (SANITIZER_WORDSIZE == 64) || SANITIZER_MAC || SANITIZER_WINDOWS
typedef unsigned long uptr;
typedef signed long sptr;
+# else
+typedef unsigned int uptr;
+typedef signed int sptr;
+# endif
#endif // defined(_WIN64)
#if defined(__x86_64__)
// Since x32 uses ILP32 data model in 64-bit hardware mode, we must use
@@ -168,10 +177,9 @@ typedef long pid_t;
typedef int pid_t;
#endif
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD || \
- SANITIZER_MAC || \
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC || \
(SANITIZER_SOLARIS && (defined(_LP64) || _FILE_OFFSET_BITS == 64)) || \
- (SANITIZER_LINUX && defined(__x86_64__))
+ (SANITIZER_LINUX && (defined(__x86_64__) || defined(__hexagon__)))
typedef u64 OFF_T;
#else
typedef uptr OFF_T;
@@ -250,6 +258,12 @@ typedef u64 tid_t;
# define NOEXCEPT throw()
#endif
+#if __has_cpp_attribute(clang::fallthrough)
+# define FALLTHROUGH [[clang::fallthrough]]
+#else
+# define FALLTHROUGH
+#endif
+
// Unaligned versions of basic types.
typedef ALIGNED(1) u16 uu16;
typedef ALIGNED(1) u32 uu32;
@@ -277,14 +291,17 @@ void NORETURN CheckFailed(const char *file, int line, const char *cond,
u64 v1, u64 v2);
// Check macro
-#define RAW_CHECK_MSG(expr, msg) do { \
- if (UNLIKELY(!(expr))) { \
- RawWrite(msg); \
- Die(); \
- } \
-} while (0)
+#define RAW_CHECK_MSG(expr, msg, ...) \
+ do { \
+ if (UNLIKELY(!(expr))) { \
+ const char* msgs[] = {msg, __VA_ARGS__}; \
+ for (const char* m : msgs) RawWrite(m); \
+ Die(); \
+ } \
+ } while (0)
-#define RAW_CHECK(expr) RAW_CHECK_MSG(expr, #expr)
+#define RAW_CHECK(expr) RAW_CHECK_MSG(expr, #expr "\n", )
+#define RAW_CHECK_VA(expr, ...) RAW_CHECK_MSG(expr, #expr "\n", __VA_ARGS__)
#define CHECK_IMPL(c1, op, c2) \
do { \
@@ -409,8 +426,14 @@ inline void Trap() {
(void)enable_fp; \
} while (0)
-constexpr u32 kInvalidTid = -1;
-constexpr u32 kMainTid = 0;
+// Internal thread identifier allocated by ThreadRegistry.
+typedef u32 Tid;
+constexpr Tid kInvalidTid = -1;
+constexpr Tid kMainTid = 0;
+
+// Stack depot stack identifier.
+typedef u32 StackID;
+const StackID kInvalidStackID = 0;
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp
index 4bc04b486870..d3076f0da489 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp
@@ -258,6 +258,18 @@ s64 internal_simple_strtoll(const char *nptr, const char **endptr, int base) {
}
}
+uptr internal_wcslen(const wchar_t *s) {
+ uptr i = 0;
+ while (s[i]) i++;
+ return i;
+}
+
+uptr internal_wcsnlen(const wchar_t *s, uptr maxlen) {
+ uptr i = 0;
+ while (i < maxlen && s[i]) i++;
+ return i;
+}
+
bool mem_is_zero(const char *beg, uptr size) {
CHECK_LE(size, 1ULL << FIRST_32_SECOND_64(30, 40)); // Sanity check.
const char *end = beg + size;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libc.h b/compiler-rt/lib/sanitizer_common/sanitizer_libc.h
index bcb81ebbc803..39a212665d0a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.h
@@ -49,7 +49,10 @@ char *internal_strrchr(const char *s, int c);
char *internal_strstr(const char *haystack, const char *needle);
// Works only for base=10 and doesn't set errno.
s64 internal_simple_strtoll(const char *nptr, const char **endptr, int base);
-int internal_snprintf(char *buffer, uptr length, const char *format, ...);
+int internal_snprintf(char *buffer, uptr length, const char *format, ...)
+ FORMAT(3, 4);
+uptr internal_wcslen(const wchar_t *s);
+uptr internal_wcsnlen(const wchar_t *s, uptr maxlen);
// Return true if all bytes in [mem, mem+size) are zero.
// Optimized for the case when the result is true.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp
index a65d3d896e33..caaba3155a7b 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_libignore.cpp
@@ -22,9 +22,9 @@ LibIgnore::LibIgnore(LinkerInitialized) {
}
void LibIgnore::AddIgnoredLibrary(const char *name_templ) {
- BlockingMutexLock lock(&mutex_);
+ Lock lock(&mutex_);
if (count_ >= kMaxLibs) {
- Report("%s: too many ignored libraries (max: %d)\n", SanitizerToolName,
+ Report("%s: too many ignored libraries (max: %zu)\n", SanitizerToolName,
kMaxLibs);
Die();
}
@@ -36,7 +36,7 @@ void LibIgnore::AddIgnoredLibrary(const char *name_templ) {
}
void LibIgnore::OnLibraryLoaded(const char *name) {
- BlockingMutexLock lock(&mutex_);
+ Lock lock(&mutex_);
// Try to match suppressions with symlink target.
InternalMmapVector<char> buf(kMaxPathLength);
if (name && internal_readlink(name, buf.data(), buf.size() - 1) > 0 &&
@@ -105,7 +105,7 @@ void LibIgnore::OnLibraryLoaded(const char *name) {
continue;
if (IsPcInstrumented(range.beg) && IsPcInstrumented(range.end - 1))
continue;
- VReport(1, "Adding instrumented range %p-%p from library '%s'\n",
+ VReport(1, "Adding instrumented range 0x%zx-0x%zx from library '%s'\n",
range.beg, range.end, mod.full_name());
const uptr idx =
atomic_load(&instrumented_ranges_count_, memory_order_relaxed);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libignore.h b/compiler-rt/lib/sanitizer_common/sanitizer_libignore.h
index 256f685979f4..18e4d83ed77f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_libignore.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_libignore.h
@@ -77,7 +77,7 @@ class LibIgnore {
LibCodeRange instrumented_code_ranges_[kMaxInstrumentedRanges];
// Cold part:
- BlockingMutex mutex_;
+ Mutex mutex_;
uptr count_;
Lib libs_[kMaxLibs];
bool track_instrumented_libs_;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 9b7d87eb85e1..596037d77222 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -150,17 +150,39 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
namespace __sanitizer {
-#if SANITIZER_LINUX && defined(__x86_64__)
-#include "sanitizer_syscall_linux_x86_64.inc"
-#elif SANITIZER_LINUX && SANITIZER_RISCV64
-#include "sanitizer_syscall_linux_riscv64.inc"
-#elif SANITIZER_LINUX && defined(__aarch64__)
-#include "sanitizer_syscall_linux_aarch64.inc"
-#elif SANITIZER_LINUX && defined(__arm__)
-#include "sanitizer_syscall_linux_arm.inc"
-#else
-#include "sanitizer_syscall_generic.inc"
-#endif
+void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *old) {
+ CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, old));
+}
+
+ScopedBlockSignals::ScopedBlockSignals(__sanitizer_sigset_t *copy) {
+ __sanitizer_sigset_t set;
+ internal_sigfillset(&set);
+# if SANITIZER_LINUX && !SANITIZER_ANDROID
+ // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked
+ // on any thread, setuid call hangs.
+ // See test/sanitizer_common/TestCases/Linux/setuid.c.
+ internal_sigdelset(&set, 33);
+# endif
+ SetSigProcMask(&set, &saved_);
+ if (copy)
+ internal_memcpy(copy, &saved_, sizeof(saved_));
+}
+
+ScopedBlockSignals::~ScopedBlockSignals() { SetSigProcMask(&saved_, nullptr); }
+
+# if SANITIZER_LINUX && defined(__x86_64__)
+# include "sanitizer_syscall_linux_x86_64.inc"
+# elif SANITIZER_LINUX && SANITIZER_RISCV64
+# include "sanitizer_syscall_linux_riscv64.inc"
+# elif SANITIZER_LINUX && defined(__aarch64__)
+# include "sanitizer_syscall_linux_aarch64.inc"
+# elif SANITIZER_LINUX && defined(__arm__)
+# include "sanitizer_syscall_linux_arm.inc"
+# elif SANITIZER_LINUX && defined(__hexagon__)
+# include "sanitizer_syscall_linux_hexagon.inc"
+# else
+# include "sanitizer_syscall_generic.inc"
+# endif
// --------------- sanitizer_libc.h
#if !SANITIZER_SOLARIS && !SANITIZER_NETBSD
@@ -415,7 +437,7 @@ uptr internal_unlink(const char *path) {
}
uptr internal_rename(const char *oldpath, const char *newpath) {
-#if defined(__riscv)
+#if defined(__riscv) && defined(__linux__)
return internal_syscall(SYSCALL(renameat2), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
(uptr)newpath, 0);
#elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
@@ -659,48 +681,6 @@ void FutexWake(atomic_uint32_t *p, u32 count) {
# endif
}
-enum { MtxUnlocked = 0, MtxLocked = 1, MtxSleeping = 2 };
-
-BlockingMutex::BlockingMutex() {
- internal_memset(this, 0, sizeof(*this));
-}
-
-void BlockingMutex::Lock() {
- CHECK_EQ(owner_, 0);
- atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
- if (atomic_exchange(m, MtxLocked, memory_order_acquire) == MtxUnlocked)
- return;
- while (atomic_exchange(m, MtxSleeping, memory_order_acquire) != MtxUnlocked) {
-#if SANITIZER_FREEBSD
- _umtx_op(m, UMTX_OP_WAIT_UINT, MtxSleeping, 0, 0);
-#elif SANITIZER_NETBSD
- sched_yield(); /* No userspace futex-like synchronization */
-#else
- internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAIT_PRIVATE, MtxSleeping,
- 0, 0, 0);
-#endif
- }
-}
-
-void BlockingMutex::Unlock() {
- atomic_uint32_t *m = reinterpret_cast<atomic_uint32_t *>(&opaque_storage_);
- u32 v = atomic_exchange(m, MtxUnlocked, memory_order_release);
- CHECK_NE(v, MtxUnlocked);
- if (v == MtxSleeping) {
-#if SANITIZER_FREEBSD
- _umtx_op(m, UMTX_OP_WAKE, 1, 0, 0);
-#elif SANITIZER_NETBSD
- /* No userspace futex-like synchronization */
-#else
- internal_syscall(SYSCALL(futex), (uptr)m, FUTEX_WAKE_PRIVATE, 1, 0, 0, 0);
-#endif
- }
-}
-
-void BlockingMutex::CheckLocked() const {
- auto m = reinterpret_cast<atomic_uint32_t const *>(&opaque_storage_);
- CHECK_NE(MtxUnlocked, atomic_load(m, memory_order_relaxed));
-}
# endif // !SANITIZER_SOLARIS
// ----------------- sanitizer_linux.h
@@ -1217,7 +1197,8 @@ void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)) {
}
#endif
-#if defined(__x86_64__) && SANITIZER_LINUX
+#if SANITIZER_LINUX
+#if defined(__x86_64__)
// We cannot use glibc's clone wrapper, because it messes with the child
// task's TLS. It writes the PID and TID of the child task to its thread
// descriptor, but in our case the child task shares the thread descriptor with
@@ -1399,7 +1380,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
#elif defined(__aarch64__)
uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
int *parent_tidptr, void *newtls, int *child_tidptr) {
- long long res;
+ register long long res __asm__("x0");
if (!fn || !child_stack)
return -EINVAL;
CHECK_EQ(0, (uptr)child_stack % 16);
@@ -1556,7 +1537,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
: "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29");
return res;
}
-#elif defined(__i386__) && SANITIZER_LINUX
+#elif defined(__i386__)
uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
int *parent_tidptr, void *newtls, int *child_tidptr) {
int res;
@@ -1621,7 +1602,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
: "memory");
return res;
}
-#elif defined(__arm__) && SANITIZER_LINUX
+#elif defined(__arm__)
uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
int *parent_tidptr, void *newtls, int *child_tidptr) {
unsigned int res;
@@ -1687,7 +1668,8 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
: "memory");
return res;
}
-#endif // defined(__x86_64__) && SANITIZER_LINUX
+#endif
+#endif // SANITIZER_LINUX
#if SANITIZER_LINUX
int internal_uname(struct utsname *buf) {
@@ -1779,17 +1761,9 @@ HandleSignalMode GetHandleSignalMode(int signum) {
#if !SANITIZER_GO
void *internal_start_thread(void *(*func)(void *arg), void *arg) {
// Start the thread with signals blocked, otherwise it can steal user signals.
- __sanitizer_sigset_t set, old;
- internal_sigfillset(&set);
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
- // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked
- // on any thread, setuid call hangs (see test/tsan/setuid.c).
- internal_sigdelset(&set, 33);
-#endif
- internal_sigprocmask(SIG_SETMASK, &set, &old);
+ ScopedBlockSignals block(nullptr);
void *th;
real_pthread_create(&th, nullptr, func, arg);
- internal_sigprocmask(SIG_SETMASK, &old, nullptr);
return th;
}
@@ -1811,7 +1785,7 @@ struct __sanitizer_esr_context {
static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) {
static const u32 kEsrMagic = 0x45535201;
- u8 *aux = ucontext->uc_mcontext.__reserved;
+ u8 *aux = reinterpret_cast<u8 *>(ucontext->uc_mcontext.__reserved);
while (true) {
_aarch64_ctx *ctx = (_aarch64_ctx *)aux;
if (ctx->size == 0) break;
@@ -1917,7 +1891,11 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
u32 instr = *(u32 *)pc;
return (instr >> 21) & 1 ? WRITE: READ;
#elif defined(__riscv)
+#if SANITIZER_FREEBSD
+ unsigned long pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc;
+#else
unsigned long pc = ucontext->uc_mcontext.__gregs[REG_PC];
+#endif
unsigned faulty_instruction = *(uint16_t *)pc;
#if defined(__riscv_compressed)
@@ -2136,12 +2114,23 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
*sp = ucontext->uc_mcontext.gregs[15];
#elif defined(__riscv)
ucontext_t *ucontext = (ucontext_t*)context;
+# if SANITIZER_FREEBSD
+ *pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc;
+ *bp = ucontext->uc_mcontext.mc_gpregs.gp_s[0];
+ *sp = ucontext->uc_mcontext.mc_gpregs.gp_sp;
+# else
*pc = ucontext->uc_mcontext.__gregs[REG_PC];
*bp = ucontext->uc_mcontext.__gregs[REG_S0];
*sp = ucontext->uc_mcontext.__gregs[REG_SP];
-#else
-# error "Unsupported arch"
-#endif
+# endif
+# elif defined(__hexagon__)
+ ucontext_t *ucontext = (ucontext_t *)context;
+ *pc = ucontext->uc_mcontext.pc;
+ *bp = ucontext->uc_mcontext.r30;
+ *sp = ucontext->uc_mcontext.r29;
+# else
+# error "Unsupported arch"
+# endif
}
void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
index 9a23fcfb3b93..6a235db0ee2e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
@@ -49,7 +49,17 @@ uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count);
uptr internal_sigaltstack(const void* ss, void* oss);
uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
__sanitizer_sigset_t *oldset);
-#if SANITIZER_GLIBC
+
+void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset);
+struct ScopedBlockSignals {
+ explicit ScopedBlockSignals(__sanitizer_sigset_t *copy);
+ ~ScopedBlockSignals();
+
+ private:
+ __sanitizer_sigset_t saved_;
+};
+
+# if SANITIZER_GLIBC
uptr internal_clock_gettime(__sanitizer_clockid_t clk_id, void *tp);
#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cpp
index bb2f5b5f9f7d..74db831b0aad 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cpp
@@ -57,8 +57,10 @@ uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
int *parent_tidptr, void *newtls, int *child_tidptr) {
- if (!fn || !child_stack)
- return -EINVAL;
+ if (!fn || !child_stack) {
+ errno = EINVAL;
+ return -1;
+ }
CHECK_EQ(0, (uptr)child_stack % 16);
// Minimum frame size.
#ifdef __s390x__
@@ -71,9 +73,9 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
// And pass parameters.
((unsigned long *)child_stack)[1] = (uptr)fn;
((unsigned long *)child_stack)[2] = (uptr)arg;
- register long res __asm__("r2");
+ register uptr res __asm__("r2");
register void *__cstack __asm__("r2") = child_stack;
- register int __flags __asm__("r3") = flags;
+ register long __flags __asm__("r3") = flags;
register int * __ptidptr __asm__("r4") = parent_tidptr;
register int * __ctidptr __asm__("r5") = child_tidptr;
register void * __newtls __asm__("r6") = newtls;
@@ -113,6 +115,10 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
"r"(__ctidptr),
"r"(__newtls)
: "memory", "cc");
+ if (res >= (uptr)-4095) {
+ errno = -res;
+ return -1;
+ }
return res;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h b/compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h
index 0e19c4d4a801..a47cfc945cd8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h
@@ -17,7 +17,7 @@
// instantiated with the `LocalAddressSpaceView` type. This type is used to
// load any pointers in instance methods. This implementation is effectively
// a no-op. When an object is to be used in an out-of-process manner it is
-// instansiated with the `RemoteAddressSpaceView` type.
+// instantiated with the `RemoteAddressSpaceView` type.
//
// By making `AddressSpaceView` a template parameter of an object, it can
// change its implementation at compile time which has no run time overhead.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index 083595d1505f..b67203d4c10e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -516,25 +516,6 @@ void FutexWait(atomic_uint32_t *p, u32 cmp) {
void FutexWake(atomic_uint32_t *p, u32 count) {}
-BlockingMutex::BlockingMutex() {
- internal_memset(this, 0, sizeof(*this));
-}
-
-void BlockingMutex::Lock() {
- CHECK(sizeof(OSSpinLock) <= sizeof(opaque_storage_));
- CHECK_EQ(OS_SPINLOCK_INIT, 0);
- CHECK_EQ(owner_, 0);
- OSSpinLockLock((OSSpinLock*)&opaque_storage_);
-}
-
-void BlockingMutex::Unlock() {
- OSSpinLockUnlock((OSSpinLock*)&opaque_storage_);
-}
-
-void BlockingMutex::CheckLocked() const {
- CHECK_NE(*(const OSSpinLock*)&opaque_storage_, 0);
-}
-
u64 NanoTime() {
timeval tv;
internal_memset(&tv, 0, sizeof(tv));
@@ -562,6 +543,9 @@ uptr TlsBaseAddr() {
asm("movq %%gs:0,%0" : "=r"(segbase));
#elif defined(__i386__)
asm("movl %%gs:0,%0" : "=r"(segbase));
+#elif defined(__aarch64__)
+ asm("mrs %x0, tpidrro_el0" : "=r"(segbase));
+ segbase &= 0x07ul; // clearing lower bits, cpu id stored there
#endif
return segbase;
}
@@ -784,8 +768,8 @@ void *internal_start_thread(void *(*func)(void *arg), void *arg) {
void internal_join_thread(void *th) { pthread_join((pthread_t)th, 0); }
#if !SANITIZER_GO
-static BlockingMutex syslog_lock(LINKER_INITIALIZED);
-#endif
+static Mutex syslog_lock;
+# endif
void WriteOneLineToSyslog(const char *s) {
#if !SANITIZER_GO
@@ -800,7 +784,7 @@ void WriteOneLineToSyslog(const char *s) {
// buffer to store crash report application information
static char crashreporter_info_buff[__sanitizer::kErrorMessageBufferSize] = {};
-static BlockingMutex crashreporter_info_mutex(LINKER_INITIALIZED);
+static Mutex crashreporter_info_mutex;
extern "C" {
// Integrate with crash reporter libraries.
@@ -830,7 +814,7 @@ asm(".desc ___crashreporter_info__, 0x10");
} // extern "C"
static void CRAppendCrashLogMessage(const char *msg) {
- BlockingMutexLock l(&crashreporter_info_mutex);
+ Lock l(&crashreporter_info_mutex);
internal_strlcat(crashreporter_info_buff, msg,
sizeof(crashreporter_info_buff));
#if HAVE_CRASHREPORTERCLIENT_H
@@ -874,7 +858,7 @@ void LogFullErrorReport(const char *buffer) {
// the reporting thread holds the thread registry mutex, and asl_log waits
// for GCD to dispatch a new thread, the process will deadlock, because the
// pthread_create wrapper needs to acquire the lock as well.
- BlockingMutexLock l(&syslog_lock);
+ Lock l(&syslog_lock);
if (common_flags()->log_to_syslog)
WriteToSyslog(buffer);
@@ -1330,7 +1314,7 @@ uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
}
// FIXME implement on this platform.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}
void SignalContext::DumpAllRegisters(void *context) {
Report("Register values:\n");
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc b/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc
index e3b664f68b61..764e2cef5e74 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc
@@ -23,6 +23,7 @@
#include <sys/mman.h>
#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
#include "sanitizer_common/sanitizer_mac.h"
// Similar code is used in Google Perftools,
@@ -192,20 +193,15 @@ void *__sanitizer_mz_malloc(malloc_zone_t *zone, uptr size) {
return p;
}
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+ static bool UseImpl() { return !COMMON_MALLOC_SANITIZER_INITIALIZED; }
+};
+
extern "C"
SANITIZER_INTERFACE_ATTRIBUTE
void *__sanitizer_mz_calloc(malloc_zone_t *zone, size_t nmemb, size_t size) {
- if (UNLIKELY(!COMMON_MALLOC_SANITIZER_INITIALIZED)) {
- // Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
- const size_t kCallocPoolSize = 1024;
- static uptr calloc_memory_for_dlsym[kCallocPoolSize];
- static size_t allocated;
- size_t size_in_words = ((nmemb * size) + kWordSize - 1) / kWordSize;
- void *mem = (void*)&calloc_memory_for_dlsym[allocated];
- allocated += size_in_words;
- CHECK(allocated < kCallocPoolSize);
- return mem;
- }
+ if (DlsymAlloc::Use())
+ return DlsymAlloc::Callocate(nmemb, size);
COMMON_MALLOC_CALLOC(nmemb, size);
return p;
}
@@ -223,6 +219,8 @@ extern "C"
SANITIZER_INTERFACE_ATTRIBUTE
void __sanitizer_mz_free(malloc_zone_t *zone, void *ptr) {
if (!ptr) return;
+ if (DlsymAlloc::PointerIsMine(ptr))
+ return DlsymAlloc::Free(ptr);
COMMON_MALLOC_FREE(ptr);
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.cpp
index 46f1d0279ca1..40fe56661250 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.cpp
@@ -73,7 +73,7 @@ void DebugMutexInit() {
// Build adjacency matrix.
bool leaf[kMutexTypeMax];
internal_memset(&leaf, 0, sizeof(leaf));
- int cnt[kMutexTypeMax] = {};
+ int cnt[kMutexTypeMax];
internal_memset(&cnt, 0, sizeof(cnt));
for (int t = 0; t < kMutexTypeMax; t++) {
mutex_type_count = t;
@@ -174,7 +174,7 @@ struct InternalDeadlockDetector {
if (max_idx != MutexInvalid && !mutex_can_lock[max_idx][type]) {
Printf("%s: internal deadlock: can't lock %s under %s mutex\n", SanitizerToolName,
mutex_meta[type].name, mutex_meta[max_idx].name);
- PrintMutexPC(pc);
+ PrintMutexPC(locked[max_idx].pc);
CHECK(0);
}
locked[type].seq = ++sequence;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h
index cbd1c25eb69f..5ec6efaa6490 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mutex.h
@@ -95,7 +95,11 @@ enum {
// Go linker does not support THREADLOCAL variables,
// so we can't use per-thread state.
-#define SANITIZER_CHECK_DEADLOCKS (SANITIZER_DEBUG && !SANITIZER_GO)
+// Disable checked locks on Darwin. Although Darwin platforms support
+// THREADLOCAL variables they are not usable early on during process init when
+// `__sanitizer::Mutex` is used.
+#define SANITIZER_CHECK_DEADLOCKS \
+ (SANITIZER_DEBUG && !SANITIZER_GO && SANITIZER_SUPPORTS_THREADLOCAL && !SANITIZER_MAC)
#if SANITIZER_CHECK_DEADLOCKS
struct MutexMeta {
@@ -111,7 +115,7 @@ struct MutexMeta {
class CheckedMutex {
public:
- constexpr CheckedMutex(MutexType type)
+ explicit constexpr CheckedMutex(MutexType type)
#if SANITIZER_CHECK_DEADLOCKS
: type_(type)
#endif
@@ -154,13 +158,13 @@ class CheckedMutex {
// but this attribute is not supported by some older compilers.
class MUTEX Mutex : CheckedMutex {
public:
- constexpr Mutex(MutexType type = MutexUnchecked) : CheckedMutex(type) {}
+ explicit constexpr Mutex(MutexType type = MutexUnchecked)
+ : CheckedMutex(type) {}
void Lock() ACQUIRE() {
CheckedMutex::Lock();
u64 reset_mask = ~0ull;
u64 state = atomic_load_relaxed(&state_);
- const uptr kMaxSpinIters = 1500;
for (uptr spin_iters = 0;; spin_iters++) {
u64 new_state;
bool locked = (state & (kWriterLock | kReaderLockMask)) != 0;
@@ -189,8 +193,6 @@ class MUTEX Mutex : CheckedMutex {
// We've incremented waiting writers, so now block.
writers_.Wait();
spin_iters = 0;
- state = atomic_load(&state_, memory_order_relaxed);
- DCHECK_NE(state & kWriterSpinWait, 0);
} else {
// We've set kWriterSpinWait, but we are still in active spinning.
}
@@ -199,6 +201,8 @@ class MUTEX Mutex : CheckedMutex {
// Either way we need to reset kWriterSpinWait
// next time we take the lock or block again.
reset_mask = ~kWriterSpinWait;
+ state = atomic_load(&state_, memory_order_relaxed);
+ DCHECK_NE(state & kWriterSpinWait, 0);
}
}
@@ -212,17 +216,16 @@ class MUTEX Mutex : CheckedMutex {
DCHECK_NE(state & kWriterLock, 0);
DCHECK_EQ(state & kReaderLockMask, 0);
new_state = state & ~kWriterLock;
- wake_writer =
- (state & kWriterSpinWait) == 0 && (state & kWaitingWriterMask) != 0;
+ wake_writer = (state & (kWriterSpinWait | kReaderSpinWait)) == 0 &&
+ (state & kWaitingWriterMask) != 0;
if (wake_writer)
new_state = (new_state - kWaitingWriterInc) | kWriterSpinWait;
wake_readers =
- (state & (kWriterSpinWait | kWaitingWriterMask)) != 0
+ wake_writer || (state & kWriterSpinWait) != 0
? 0
: ((state & kWaitingReaderMask) >> kWaitingReaderShift);
if (wake_readers)
- new_state = (new_state & ~kWaitingReaderMask) +
- (wake_readers << kReaderLockShift);
+ new_state = (new_state & ~kWaitingReaderMask) | kReaderSpinWait;
} while (UNLIKELY(!atomic_compare_exchange_weak(&state_, &state, new_state,
memory_order_release)));
if (UNLIKELY(wake_writer))
@@ -233,23 +236,39 @@ class MUTEX Mutex : CheckedMutex {
void ReadLock() ACQUIRE_SHARED() {
CheckedMutex::Lock();
- bool locked;
- u64 new_state;
+ u64 reset_mask = ~0ull;
u64 state = atomic_load_relaxed(&state_);
- do {
- locked =
- (state & kReaderLockMask) == 0 &&
- (state & (kWriterLock | kWriterSpinWait | kWaitingWriterMask)) != 0;
+ for (uptr spin_iters = 0;; spin_iters++) {
+ bool locked = (state & kWriterLock) != 0;
+ u64 new_state;
+ if (LIKELY(!locked)) {
+ new_state = (state + kReaderLockInc) & reset_mask;
+ } else if (spin_iters > kMaxSpinIters) {
+ new_state = (state + kWaitingReaderInc) & reset_mask;
+ } else if ((state & kReaderSpinWait) == 0) {
+ // Active spinning, but denote our presence so that unlocking
+ // thread does not wake up other threads.
+ new_state = state | kReaderSpinWait;
+ } else {
+ // Active spinning.
+ state = atomic_load(&state_, memory_order_relaxed);
+ continue;
+ }
+ if (UNLIKELY(!atomic_compare_exchange_weak(&state_, &state, new_state,
+ memory_order_acquire)))
+ continue;
if (LIKELY(!locked))
- new_state = state + kReaderLockInc;
- else
- new_state = state + kWaitingReaderInc;
- } while (UNLIKELY(!atomic_compare_exchange_weak(&state_, &state, new_state,
- memory_order_acquire)));
- if (UNLIKELY(locked))
- readers_.Wait();
- DCHECK_EQ(atomic_load_relaxed(&state_) & kWriterLock, 0);
- DCHECK_NE(atomic_load_relaxed(&state_) & kReaderLockMask, 0);
+ return; // We've locked the mutex.
+ if (spin_iters > kMaxSpinIters) {
+ // We've incremented waiting readers, so now block.
+ readers_.Wait();
+ spin_iters = 0;
+ } else {
+ // We've set kReaderSpinWait, but we are still in active spinning.
+ }
+ reset_mask = ~kReaderSpinWait;
+ state = atomic_load(&state_, memory_order_relaxed);
+ }
}
void ReadUnlock() RELEASE_SHARED() {
@@ -259,9 +278,10 @@ class MUTEX Mutex : CheckedMutex {
u64 state = atomic_load_relaxed(&state_);
do {
DCHECK_NE(state & kReaderLockMask, 0);
- DCHECK_EQ(state & (kWaitingReaderMask | kWriterLock), 0);
+ DCHECK_EQ(state & kWriterLock, 0);
new_state = state - kReaderLockInc;
- wake = (new_state & (kReaderLockMask | kWriterSpinWait)) == 0 &&
+ wake = (new_state &
+ (kReaderLockMask | kWriterSpinWait | kReaderSpinWait)) == 0 &&
(new_state & kWaitingWriterMask) != 0;
if (wake)
new_state = (new_state - kWaitingWriterInc) | kWriterSpinWait;
@@ -305,16 +325,14 @@ class MUTEX Mutex : CheckedMutex {
// - a writer is awake and spin-waiting
// the flag is used to prevent thundering herd problem
// (new writers are not woken if this flag is set)
+ // - a reader is awake and spin-waiting
//
- // Writer support active spinning, readers does not.
+ // Both writers and readers use active spinning before blocking.
// But readers are more aggressive and always take the mutex
// if there are any other readers.
- // Writers hand off the mutex to readers: after wake up readers
- // already assume ownership of the mutex (don't need to do any
- // state updates). But the mutex is not handed off to writers,
- // after wake up writers compete to lock the mutex again.
- // This is needed to allow repeated write locks even in presence
- // of other blocked writers.
+ // After wake up both writers and readers compete to lock the
+ // mutex again. This is needed to allow repeated locks even in presence
+ // of other blocked threads.
static constexpr u64 kCounterWidth = 20;
static constexpr u64 kReaderLockShift = 0;
static constexpr u64 kReaderLockInc = 1ull << kReaderLockShift;
@@ -330,7 +348,11 @@ class MUTEX Mutex : CheckedMutex {
<< kWaitingWriterShift;
static constexpr u64 kWriterLock = 1ull << (3 * kCounterWidth);
static constexpr u64 kWriterSpinWait = 1ull << (3 * kCounterWidth + 1);
+ static constexpr u64 kReaderSpinWait = 1ull << (3 * kCounterWidth + 2);
+ static constexpr uptr kMaxSpinIters = 1500;
+
+ Mutex(LinkerInitialized) = delete;
Mutex(const Mutex &) = delete;
void operator=(const Mutex &) = delete;
};
@@ -338,111 +360,6 @@ class MUTEX Mutex : CheckedMutex {
void FutexWait(atomic_uint32_t *p, u32 cmp);
void FutexWake(atomic_uint32_t *p, u32 count);
-class MUTEX BlockingMutex {
- public:
- explicit constexpr BlockingMutex(LinkerInitialized)
- : opaque_storage_ {0, }, owner_ {0} {}
- BlockingMutex();
- void Lock() ACQUIRE();
- void Unlock() RELEASE();
-
- // This function does not guarantee an explicit check that the calling thread
- // is the thread which owns the mutex. This behavior, while more strictly
- // correct, causes problems in cases like StopTheWorld, where a parent thread
- // owns the mutex but a child checks that it is locked. Rather than
- // maintaining complex state to work around those situations, the check only
- // checks that the mutex is owned, and assumes callers to be generally
- // well-behaved.
- void CheckLocked() const CHECK_LOCKED();
-
- private:
- // Solaris mutex_t has a member that requires 64-bit alignment.
- ALIGNED(8) uptr opaque_storage_[10];
- uptr owner_; // for debugging
-};
-
-// Reader-writer spin mutex.
-class MUTEX RWMutex {
- public:
- RWMutex() {
- atomic_store(&state_, kUnlocked, memory_order_relaxed);
- }
-
- ~RWMutex() {
- CHECK_EQ(atomic_load(&state_, memory_order_relaxed), kUnlocked);
- }
-
- void Lock() ACQUIRE() {
- u32 cmp = kUnlocked;
- if (atomic_compare_exchange_strong(&state_, &cmp, kWriteLock,
- memory_order_acquire))
- return;
- LockSlow();
- }
-
- void Unlock() RELEASE() {
- u32 prev = atomic_fetch_sub(&state_, kWriteLock, memory_order_release);
- DCHECK_NE(prev & kWriteLock, 0);
- (void)prev;
- }
-
- void ReadLock() ACQUIRE_SHARED() {
- u32 prev = atomic_fetch_add(&state_, kReadLock, memory_order_acquire);
- if ((prev & kWriteLock) == 0)
- return;
- ReadLockSlow();
- }
-
- void ReadUnlock() RELEASE_SHARED() {
- u32 prev = atomic_fetch_sub(&state_, kReadLock, memory_order_release);
- DCHECK_EQ(prev & kWriteLock, 0);
- DCHECK_GT(prev & ~kWriteLock, 0);
- (void)prev;
- }
-
- void CheckLocked() const CHECK_LOCKED() {
- CHECK_NE(atomic_load(&state_, memory_order_relaxed), kUnlocked);
- }
-
- private:
- atomic_uint32_t state_;
-
- enum {
- kUnlocked = 0,
- kWriteLock = 1,
- kReadLock = 2
- };
-
- void NOINLINE LockSlow() {
- for (int i = 0;; i++) {
- if (i < 10)
- proc_yield(10);
- else
- internal_sched_yield();
- u32 cmp = atomic_load(&state_, memory_order_relaxed);
- if (cmp == kUnlocked &&
- atomic_compare_exchange_weak(&state_, &cmp, kWriteLock,
- memory_order_acquire))
- return;
- }
- }
-
- void NOINLINE ReadLockSlow() {
- for (int i = 0;; i++) {
- if (i < 10)
- proc_yield(10);
- else
- internal_sched_yield();
- u32 prev = atomic_load(&state_, memory_order_acquire);
- if ((prev & kWriteLock) == 0)
- return;
- }
- }
-
- RWMutex(const RWMutex &) = delete;
- void operator=(const RWMutex &) = delete;
-};
-
template <typename MutexType>
class SCOPED_LOCK GenericScopedLock {
public:
@@ -475,12 +392,37 @@ class SCOPED_LOCK GenericScopedReadLock {
void operator=(const GenericScopedReadLock &) = delete;
};
+template <typename MutexType>
+class SCOPED_LOCK GenericScopedRWLock {
+ public:
+ ALWAYS_INLINE explicit GenericScopedRWLock(MutexType *mu, bool write)
+ ACQUIRE(mu)
+ : mu_(mu), write_(write) {
+ if (write_)
+ mu_->Lock();
+ else
+ mu_->ReadLock();
+ }
+
+ ALWAYS_INLINE ~GenericScopedRWLock() RELEASE() {
+ if (write_)
+ mu_->Unlock();
+ else
+ mu_->ReadUnlock();
+ }
+
+ private:
+ MutexType *mu_;
+ bool write_;
+
+ GenericScopedRWLock(const GenericScopedRWLock &) = delete;
+ void operator=(const GenericScopedRWLock &) = delete;
+};
+
typedef GenericScopedLock<StaticSpinMutex> SpinMutexLock;
-typedef GenericScopedLock<BlockingMutex> BlockingMutexLock;
-typedef GenericScopedLock<RWMutex> RWMutexLock;
-typedef GenericScopedReadLock<RWMutex> RWMutexReadLock;
typedef GenericScopedLock<Mutex> Lock;
typedef GenericScopedReadLock<Mutex> ReadLock;
+typedef GenericScopedRWLock<Mutex> RWLock;
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.cpp
deleted file mode 100644
index 1ca0375b8a54..000000000000
--- a/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//===-- sanitizer_persistent_allocator.cpp ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is shared between AddressSanitizer and ThreadSanitizer
-// run-time libraries.
-//===----------------------------------------------------------------------===//
-#include "sanitizer_persistent_allocator.h"
-
-namespace __sanitizer {
-
-PersistentAllocator thePersistentAllocator;
-
-} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h b/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h
deleted file mode 100644
index de4fb6ebc3cf..000000000000
--- a/compiler-rt/lib/sanitizer_common/sanitizer_persistent_allocator.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===-- sanitizer_persistent_allocator.h ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// A fast memory allocator that does not support free() nor realloc().
-// All allocations are forever.
-//===----------------------------------------------------------------------===//
-
-#ifndef SANITIZER_PERSISTENT_ALLOCATOR_H
-#define SANITIZER_PERSISTENT_ALLOCATOR_H
-
-#include "sanitizer_internal_defs.h"
-#include "sanitizer_mutex.h"
-#include "sanitizer_atomic.h"
-#include "sanitizer_common.h"
-
-namespace __sanitizer {
-
-class PersistentAllocator {
- public:
- void *alloc(uptr size);
-
- private:
- void *tryAlloc(uptr size);
- StaticSpinMutex mtx; // Protects alloc of new blocks for region allocator.
- atomic_uintptr_t region_pos; // Region allocator for Node's.
- atomic_uintptr_t region_end;
-};
-
-inline void *PersistentAllocator::tryAlloc(uptr size) {
- // Optimisic lock-free allocation, essentially try to bump the region ptr.
- for (;;) {
- uptr cmp = atomic_load(&region_pos, memory_order_acquire);
- uptr end = atomic_load(&region_end, memory_order_acquire);
- if (cmp == 0 || cmp + size > end) return nullptr;
- if (atomic_compare_exchange_weak(&region_pos, &cmp, cmp + size,
- memory_order_acquire))
- return (void *)cmp;
- }
-}
-
-inline void *PersistentAllocator::alloc(uptr size) {
- // First, try to allocate optimisitically.
- void *s = tryAlloc(size);
- if (s) return s;
- // If failed, lock, retry and alloc new superblock.
- SpinMutexLock l(&mtx);
- for (;;) {
- s = tryAlloc(size);
- if (s) return s;
- atomic_store(&region_pos, 0, memory_order_relaxed);
- uptr allocsz = 64 * 1024;
- if (allocsz < size) allocsz = size;
- uptr mem = (uptr)MmapOrDie(allocsz, "stack depot");
- atomic_store(&region_end, mem + allocsz, memory_order_release);
- atomic_store(&region_pos, mem, memory_order_release);
- }
-}
-
-extern PersistentAllocator thePersistentAllocator;
-inline void *PersistentAlloc(uptr sz) {
- return thePersistentAllocator.alloc(sz);
-}
-
-} // namespace __sanitizer
-
-#endif // SANITIZER_PERSISTENT_ALLOCATOR_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
index 4d3c08893c11..3153de34e5a3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
@@ -281,11 +281,12 @@
// mandated by the upstream linux community for all new ports. Other ports
// may still use legacy syscalls.
#ifndef SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
-# if (defined(__aarch64__) || defined(__riscv)) && SANITIZER_LINUX
-# define SANITIZER_USES_CANONICAL_LINUX_SYSCALLS 1
-# else
-# define SANITIZER_USES_CANONICAL_LINUX_SYSCALLS 0
-# endif
+# if (defined(__aarch64__) || defined(__riscv) || defined(__hexagon__)) && \
+ SANITIZER_LINUX
+# define SANITIZER_USES_CANONICAL_LINUX_SYSCALLS 1
+# else
+# define SANITIZER_USES_CANONICAL_LINUX_SYSCALLS 0
+# endif
#endif
// udi16 syscalls can only be used when the following conditions are
@@ -377,4 +378,18 @@
#define SANITIZER_SUPPORTS_INIT_FOR_DLOPEN 0
#endif
+// SANITIZER_SUPPORTS_THREADLOCAL
+// 1 - THREADLOCAL macro is supported by target
+// 0 - THREADLOCAL macro is not supported by target
+#ifndef __has_feature
+// TODO: Support other compilers here
+# define SANITIZER_SUPPORTS_THREADLOCAL 1
+#else
+# if __has_feature(tls)
+# define SANITIZER_SUPPORTS_THREADLOCAL 1
+# else
+# define SANITIZER_SUPPORTS_THREADLOCAL 0
+# endif
+#endif
+
#endif // SANITIZER_PLATFORM_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 5b710c23fd00..14610f2df78d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -229,11 +229,13 @@
(SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
#define SANITIZER_INTERCEPT_CLOCK_GETTIME \
(SI_FREEBSD || SI_NETBSD || SI_LINUX || SI_SOLARIS)
-#define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID SI_LINUX
+#define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID \
+ (SI_LINUX || SI_FREEBSD || SI_NETBSD)
#define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
#define SANITIZER_INTERCEPT_TIME SI_POSIX
#define SANITIZER_INTERCEPT_GLOB (SI_GLIBC || SI_SOLARIS)
#define SANITIZER_INTERCEPT_GLOB64 SI_GLIBC
+#define SANITIZER_INTERCEPT_POSIX_SPAWN SI_POSIX
#define SANITIZER_INTERCEPT_WAIT SI_POSIX
#define SANITIZER_INTERCEPT_INET SI_POSIX
#define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM SI_POSIX
@@ -251,7 +253,8 @@
#define SANITIZER_INTERCEPT_GETHOSTENT_R (SI_FREEBSD || SI_GLIBC || SI_SOLARIS)
#define SANITIZER_INTERCEPT_GETSOCKOPT SI_POSIX
#define SANITIZER_INTERCEPT_ACCEPT SI_POSIX
-#define SANITIZER_INTERCEPT_ACCEPT4 (SI_LINUX_NOT_ANDROID || SI_NETBSD)
+#define SANITIZER_INTERCEPT_ACCEPT4 \
+ (SI_LINUX_NOT_ANDROID || SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_PACCEPT SI_NETBSD
#define SANITIZER_INTERCEPT_MODF SI_POSIX
#define SANITIZER_INTERCEPT_RECVMSG SI_POSIX
@@ -309,7 +312,7 @@
#define SANITIZER_INTERCEPT_PPOLL SI_LINUX_NOT_ANDROID || SI_SOLARIS
#define SANITIZER_INTERCEPT_WORDEXP \
(SI_FREEBSD || SI_NETBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID || \
- SI_SOLARIS) // NOLINT
+ SI_SOLARIS)
#define SANITIZER_INTERCEPT_SIGWAIT SI_POSIX
#define SANITIZER_INTERCEPT_SIGWAITINFO SI_LINUX_NOT_ANDROID || SI_SOLARIS
#define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID || SI_SOLARIS
@@ -337,7 +340,7 @@
#define SANITIZER_INTERCEPT_ETHER_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
#define SANITIZER_INTERCEPT_SHMCTL \
(((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && SANITIZER_WORDSIZE == 64) || \
- SI_NETBSD || SI_SOLARIS) // NOLINT
+ SI_NETBSD || SI_SOLARIS)
#define SANITIZER_INTERCEPT_RANDOM_R SI_GLIBC
#define SANITIZER_INTERCEPT_PTHREAD_ATTR_GET SI_POSIX
#define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED \
@@ -445,7 +448,8 @@
#define SANITIZER_INTERCEPT_SEM \
(SI_LINUX || SI_FREEBSD || SI_NETBSD || SI_SOLARIS)
#define SANITIZER_INTERCEPT_PTHREAD_SETCANCEL SI_POSIX
-#define SANITIZER_INTERCEPT_MINCORE (SI_LINUX || SI_NETBSD || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_MINCORE \
+ (SI_LINUX || SI_NETBSD || SI_FREEBSD || SI_SOLARIS)
#define SANITIZER_INTERCEPT_PROCESS_VM_READV SI_LINUX
#define SANITIZER_INTERCEPT_CTERMID \
(SI_LINUX || SI_MAC || SI_FREEBSD || SI_NETBSD || SI_SOLARIS)
@@ -457,10 +461,13 @@
#define SANITIZER_INTERCEPT_SEND_SENDTO SI_POSIX
#define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX
-#define SANITIZER_INTERCEPT_STAT \
- (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS)
-#define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD)
-#define SANITIZER_INTERCEPT___XSTAT (!SANITIZER_INTERCEPT_STAT && SI_POSIX)
+#define SI_STAT_LINUX (SI_LINUX && __GLIBC_PREREQ(2, 33))
+#define SANITIZER_INTERCEPT_STAT \
+ (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS || \
+ SI_STAT_LINUX)
+#define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD || SI_STAT_LINUX)
+#define SANITIZER_INTERCEPT___XSTAT \
+ ((!SANITIZER_INTERCEPT_STAT && SI_POSIX) || SI_STAT_LINUX)
#define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID
#define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT
#define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID
@@ -474,7 +481,7 @@
(SI_LINUX_NOT_ANDROID || SI_MAC || SI_FREEBSD || SI_NETBSD)
#define SANITIZER_INTERCEPT_MMAP SI_POSIX
-#define SANITIZER_INTERCEPT_MMAP64 SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_MMAP64 SI_LINUX_NOT_ANDROID || SI_SOLARIS
#define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO (SI_GLIBC || SI_ANDROID)
#define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC && !SI_NETBSD)
#define SANITIZER_INTERCEPT___LIBC_MEMALIGN SI_GLIBC
@@ -496,7 +503,8 @@
#define SANITIZER_INTERCEPT_GID_FROM_GROUP SI_NETBSD
#define SANITIZER_INTERCEPT_ACCESS (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_FACCESSAT (SI_NETBSD || SI_FREEBSD)
-#define SANITIZER_INTERCEPT_GETGROUPLIST SI_NETBSD
+#define SANITIZER_INTERCEPT_GETGROUPLIST \
+ (SI_NETBSD || SI_FREEBSD || SI_LINUX)
#define SANITIZER_INTERCEPT_STRLCPY \
(SI_NETBSD || SI_FREEBSD || SI_MAC || SI_ANDROID)
@@ -517,10 +525,11 @@
#define SANITIZER_INTERCEPT_DEVNAME_R (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_FGETLN (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_STRMODE (SI_NETBSD || SI_FREEBSD)
-#define SANITIZER_INTERCEPT_TTYENT SI_NETBSD
-#define SANITIZER_INTERCEPT_PROTOENT (SI_NETBSD || SI_LINUX)
+#define SANITIZER_INTERCEPT_TTYENT (SI_NETBSD || SI_FREEBSD)
+#define SANITIZER_INTERCEPT_TTYENTPATH SI_NETBSD
+#define SANITIZER_INTERCEPT_PROTOENT (SI_LINUX || SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_PROTOENT_R SI_GLIBC
-#define SANITIZER_INTERCEPT_NETENT SI_NETBSD
+#define SANITIZER_INTERCEPT_NETENT (SI_LINUX || SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_SETVBUF \
(SI_NETBSD || SI_FREEBSD || SI_LINUX || SI_MAC)
#define SANITIZER_INTERCEPT_GETMNTINFO (SI_NETBSD || SI_FREEBSD || SI_MAC)
@@ -536,17 +545,17 @@
#define SANITIZER_INTERCEPT_MODCTL SI_NETBSD
#define SANITIZER_INTERCEPT_CAPSICUM SI_FREEBSD
#define SANITIZER_INTERCEPT_STRTONUM (SI_NETBSD || SI_FREEBSD)
-#define SANITIZER_INTERCEPT_FPARSELN SI_NETBSD
+#define SANITIZER_INTERCEPT_FPARSELN (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_STATVFS1 SI_NETBSD
#define SANITIZER_INTERCEPT_STRTOI SI_NETBSD
#define SANITIZER_INTERCEPT_CAPSICUM SI_FREEBSD
#define SANITIZER_INTERCEPT_SHA1 SI_NETBSD
#define SANITIZER_INTERCEPT_MD4 SI_NETBSD
#define SANITIZER_INTERCEPT_RMD160 SI_NETBSD
-#define SANITIZER_INTERCEPT_MD5 SI_NETBSD
+#define SANITIZER_INTERCEPT_MD5 (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_FSEEK (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_MD2 SI_NETBSD
-#define SANITIZER_INTERCEPT_SHA2 SI_NETBSD
+#define SANITIZER_INTERCEPT_SHA2 (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_CDB SI_NETBSD
#define SANITIZER_INTERCEPT_VIS (SI_NETBSD || SI_FREEBSD)
#define SANITIZER_INTERCEPT_POPEN SI_POSIX
@@ -571,6 +580,8 @@
#define SANITIZER_INTERCEPT_QSORT \
(SI_POSIX && !SI_IOSSIM && !SI_WATCHOS && !SI_TVOS && !SI_ANDROID)
#define SANITIZER_INTERCEPT_QSORT_R SI_GLIBC
+#define SANITIZER_INTERCEPT_BSEARCH \
+ (SI_POSIX && !SI_IOSSIM && !SI_WATCHOS && !SI_TVOS && !SI_ANDROID)
// sigaltstack on i386 macOS cannot be intercepted due to setjmp()
// calling it and assuming that it does not clobber registers.
#define SANITIZER_INTERCEPT_SIGALTSTACK \
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp
index b5a45ae72cd9..64535805e40d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.cpp
@@ -69,11 +69,17 @@
#include <semaphore.h>
#include <signal.h>
#include <stddef.h>
+#include <md5.h>
+#include <sha224.h>
+#include <sha256.h>
+#include <sha384.h>
+#include <sha512.h>
#include <stdio.h>
#include <stringlist.h>
#include <term.h>
#include <termios.h>
#include <time.h>
+#include <ttyent.h>
#include <utime.h>
#include <utmpx.h>
#include <vis.h>
@@ -170,9 +176,12 @@ uptr __sanitizer_in_addr_sz(int af) {
unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
int glob_nomatch = GLOB_NOMATCH;
int glob_altdirfunc = GLOB_ALTDIRFUNC;
+const int wordexp_wrde_dooffs = WRDE_DOOFFS;
unsigned path_max = PATH_MAX;
+int struct_ttyent_sz = sizeof(struct ttyent);
+
// ioctl arguments
unsigned struct_ifreq_sz = sizeof(struct ifreq);
unsigned struct_termios_sz = sizeof(struct termios);
@@ -357,6 +366,22 @@ const int si_SEGV_MAPERR = SEGV_MAPERR;
const int si_SEGV_ACCERR = SEGV_ACCERR;
const int unvis_valid = UNVIS_VALID;
const int unvis_validpush = UNVIS_VALIDPUSH;
+
+const unsigned MD5_CTX_sz = sizeof(MD5_CTX);
+const unsigned MD5_return_length = MD5_DIGEST_STRING_LENGTH;
+
+#define SHA2_CONST(LEN) \
+ const unsigned SHA##LEN##_CTX_sz = sizeof(SHA##LEN##_CTX); \
+ const unsigned SHA##LEN##_return_length = SHA##LEN##_DIGEST_STRING_LENGTH; \
+ const unsigned SHA##LEN##_block_length = SHA##LEN##_BLOCK_LENGTH; \
+ const unsigned SHA##LEN##_digest_length = SHA##LEN##_DIGEST_LENGTH
+
+SHA2_CONST(224);
+SHA2_CONST(256);
+SHA2_CONST(384);
+SHA2_CONST(512);
+
+#undef SHA2_CONST
} // namespace __sanitizer
using namespace __sanitizer;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h
index 5e0ca9c7d782..649e64fd1a32 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_freebsd.h
@@ -16,26 +16,26 @@
#if SANITIZER_FREEBSD
-#include "sanitizer_internal_defs.h"
-#include "sanitizer_platform.h"
-#include "sanitizer_platform_limits_posix.h"
+# include "sanitizer_internal_defs.h"
+# include "sanitizer_platform.h"
+# include "sanitizer_platform_limits_posix.h"
// Get sys/_types.h, because that tells us whether 64-bit inodes are
// used in struct dirent below.
-#include <sys/_types.h>
+# include <sys/_types.h>
namespace __sanitizer {
void *__sanitizer_get_link_map_by_dlopen_handle(void *handle);
-#define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
- (link_map *)__sanitizer_get_link_map_by_dlopen_handle(handle)
+# define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) \
+ (link_map *)__sanitizer_get_link_map_by_dlopen_handle(handle)
extern unsigned struct_utsname_sz;
extern unsigned struct_stat_sz;
-#if defined(__powerpc64__)
+# if defined(__powerpc64__)
const unsigned struct___old_kernel_stat_sz = 0;
-#else
+# else
const unsigned struct___old_kernel_stat_sz = 32;
-#endif
+# endif
extern unsigned struct_rusage_sz;
extern unsigned siginfo_t_sz;
extern unsigned struct_itimerval_sz;
@@ -114,11 +114,24 @@ struct __sanitizer_ipc_perm {
long key;
};
-#if !defined(__i386__)
+struct __sanitizer_protoent {
+ char *p_name;
+ char **p_aliases;
+ int p_proto;
+};
+
+struct __sanitizer_netent {
+ char *n_name;
+ char **n_aliases;
+ int n_addrtype;
+ u32 n_net;
+};
+
+# if !defined(__i386__)
typedef long long __sanitizer_time_t;
-#else
+# else
typedef long __sanitizer_time_t;
-#endif
+# endif
struct __sanitizer_shmid_ds {
__sanitizer_ipc_perm shm_perm;
@@ -147,7 +160,7 @@ struct __sanitizer_ifaddrs {
unsigned int ifa_flags;
void *ifa_addr; // (struct sockaddr *)
void *ifa_netmask; // (struct sockaddr *)
-#undef ifa_dstaddr
+# undef ifa_dstaddr
void *ifa_dstaddr; // (struct sockaddr *)
void *ifa_data;
};
@@ -229,12 +242,12 @@ struct __sanitizer_cmsghdr {
};
struct __sanitizer_dirent {
-#if defined(__INO64)
+# if defined(__INO64)
unsigned long long d_fileno;
unsigned long long d_off;
-#else
+# else
unsigned int d_fileno;
-#endif
+# endif
unsigned short d_reclen;
// more fields that we don't care about
};
@@ -243,23 +256,23 @@ struct __sanitizer_dirent {
typedef int __sanitizer_clock_t;
typedef int __sanitizer_clockid_t;
-#if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \
- defined(__mips__)
+# if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \
+ defined(__mips__)
typedef unsigned __sanitizer___kernel_uid_t;
typedef unsigned __sanitizer___kernel_gid_t;
-#else
+# else
typedef unsigned short __sanitizer___kernel_uid_t;
typedef unsigned short __sanitizer___kernel_gid_t;
-#endif
+# endif
typedef long long __sanitizer___kernel_off_t;
-#if defined(__powerpc__) || defined(__mips__)
+# if defined(__powerpc__) || defined(__mips__)
typedef unsigned int __sanitizer___kernel_old_uid_t;
typedef unsigned int __sanitizer___kernel_old_gid_t;
-#else
+# else
typedef unsigned short __sanitizer___kernel_old_uid_t;
typedef unsigned short __sanitizer___kernel_old_gid_t;
-#endif
+# endif
typedef long long __sanitizer___kernel_loff_t;
typedef struct {
@@ -366,9 +379,12 @@ struct __sanitizer_glob_t {
extern int glob_nomatch;
extern int glob_altdirfunc;
+extern const int wordexp_wrde_dooffs;
extern unsigned path_max;
+extern int struct_ttyent_sz;
+
struct __sanitizer_wordexp_t {
uptr we_wordc;
char **we_wordv;
@@ -398,39 +414,49 @@ struct __sanitizer_ifconf {
} ifc_ifcu;
};
-#define IOC_NRBITS 8
-#define IOC_TYPEBITS 8
-#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__)
-#define IOC_SIZEBITS 13
-#define IOC_DIRBITS 3
-#define IOC_NONE 1U
-#define IOC_WRITE 4U
-#define IOC_READ 2U
-#else
-#define IOC_SIZEBITS 14
-#define IOC_DIRBITS 2
-#define IOC_NONE 0U
-#define IOC_WRITE 1U
-#define IOC_READ 2U
-#endif
-#define IOC_NRMASK ((1 << IOC_NRBITS) - 1)
-#define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1)
-#define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1)
-#if defined(IOC_DIRMASK)
-#undef IOC_DIRMASK
-#endif
-#define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1)
-#define IOC_NRSHIFT 0
-#define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS)
-#define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS)
-#define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS)
-#define EVIOC_EV_MAX 0x1f
-#define EVIOC_ABS_MAX 0x3f
-
-#define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
-#define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
-#define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
-#define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK)
+struct __sanitizer__ttyent {
+ char *ty_name;
+ char *ty_getty;
+ char *ty_type;
+ int ty_status;
+ char *ty_window;
+ char *ty_comment;
+ char *ty_group;
+};
+
+# define IOC_NRBITS 8
+# define IOC_TYPEBITS 8
+# if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__)
+# define IOC_SIZEBITS 13
+# define IOC_DIRBITS 3
+# define IOC_NONE 1U
+# define IOC_WRITE 4U
+# define IOC_READ 2U
+# else
+# define IOC_SIZEBITS 14
+# define IOC_DIRBITS 2
+# define IOC_NONE 0U
+# define IOC_WRITE 1U
+# define IOC_READ 2U
+# endif
+# define IOC_NRMASK ((1 << IOC_NRBITS) - 1)
+# define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1)
+# define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1)
+# if defined(IOC_DIRMASK)
+# undef IOC_DIRMASK
+# endif
+# define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1)
+# define IOC_NRSHIFT 0
+# define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS)
+# define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS)
+# define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS)
+# define EVIOC_EV_MAX 0x1f
+# define EVIOC_ABS_MAX 0x3f
+
+# define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK)
+# define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK)
+# define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK)
+# define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK)
extern unsigned struct_ifreq_sz;
extern unsigned struct_termios_sz;
@@ -621,6 +647,22 @@ extern unsigned IOCTL_KDSKBMODE;
extern const int si_SEGV_MAPERR;
extern const int si_SEGV_ACCERR;
+extern const unsigned MD5_CTX_sz;
+extern const unsigned MD5_return_length;
+
+#define SHA2_EXTERN(LEN) \
+ extern const unsigned SHA##LEN##_CTX_sz; \
+ extern const unsigned SHA##LEN##_return_length; \
+ extern const unsigned SHA##LEN##_block_length; \
+ extern const unsigned SHA##LEN##_digest_length
+
+SHA2_EXTERN(224);
+SHA2_EXTERN(256);
+SHA2_EXTERN(384);
+SHA2_EXTERN(512);
+
+#undef SHA2_EXTERN
+
struct __sanitizer_cap_rights {
u64 cr_rights[2];
};
@@ -632,24 +674,24 @@ extern unsigned struct_fstab_sz;
extern unsigned struct_StringList_sz;
} // namespace __sanitizer
-#define CHECK_TYPE_SIZE(TYPE) \
- COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE))
+# define CHECK_TYPE_SIZE(TYPE) \
+ COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE))
-#define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER) \
- COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *)NULL)->MEMBER) == \
- sizeof(((CLASS *)NULL)->MEMBER)); \
- COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) == \
- offsetof(CLASS, MEMBER))
+# define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER) \
+ COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *)NULL)->MEMBER) == \
+ sizeof(((CLASS *)NULL)->MEMBER)); \
+ COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) == \
+ offsetof(CLASS, MEMBER))
// For sigaction, which is a function and struct at the same time,
// and thus requires explicit "struct" in sizeof() expression.
-#define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER) \
- COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *)NULL)->MEMBER) == \
- sizeof(((struct CLASS *)NULL)->MEMBER)); \
- COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) == \
- offsetof(struct CLASS, MEMBER))
+# define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER) \
+ COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *)NULL)->MEMBER) == \
+ sizeof(((struct CLASS *)NULL)->MEMBER)); \
+ COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) == \
+ offsetof(struct CLASS, MEMBER))
-#define SIGACTION_SYMNAME sigaction
+# define SIGACTION_SYMNAME sigaction
#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp
index c51327e1269e..9d577570ea1e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp
@@ -28,32 +28,32 @@
// are not defined anywhere in userspace headers. Fake them. This seems to work
// fine with newer headers, too.
#include <linux/posix_types.h>
-#if defined(__x86_64__) || defined(__mips__)
-#include <sys/stat.h>
-#else
-#define ino_t __kernel_ino_t
-#define mode_t __kernel_mode_t
-#define nlink_t __kernel_nlink_t
-#define uid_t __kernel_uid_t
-#define gid_t __kernel_gid_t
-#define off_t __kernel_off_t
-#define time_t __kernel_time_t
+# if defined(__x86_64__) || defined(__mips__) || defined(__hexagon__)
+# include <sys/stat.h>
+# else
+# define ino_t __kernel_ino_t
+# define mode_t __kernel_mode_t
+# define nlink_t __kernel_nlink_t
+# define uid_t __kernel_uid_t
+# define gid_t __kernel_gid_t
+# define off_t __kernel_off_t
+# define time_t __kernel_time_t
// This header seems to contain the definitions of _kernel_ stat* structs.
-#include <asm/stat.h>
-#undef ino_t
-#undef mode_t
-#undef nlink_t
-#undef uid_t
-#undef gid_t
-#undef off_t
-#endif
-
-#include <linux/aio_abi.h>
-
-#if !SANITIZER_ANDROID
-#include <sys/statfs.h>
-#include <linux/perf_event.h>
-#endif
+# include <asm/stat.h>
+# undef ino_t
+# undef mode_t
+# undef nlink_t
+# undef uid_t
+# undef gid_t
+# undef off_t
+# endif
+
+# include <linux/aio_abi.h>
+
+# if !SANITIZER_ANDROID
+# include <sys/statfs.h>
+# include <linux/perf_event.h>
+# endif
using namespace __sanitizer;
@@ -63,9 +63,9 @@ namespace __sanitizer {
#endif
} // namespace __sanitizer
-#if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
- && !defined(__mips__) && !defined(__s390__)\
- && !defined(__sparc__) && !defined(__riscv)
+# if !defined(__powerpc64__) && !defined(__x86_64__) && \
+ !defined(__aarch64__) && !defined(__mips__) && !defined(__s390__) && \
+ !defined(__sparc__) && !defined(__riscv) && !defined(__hexagon__)
COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp
index c8f2aa5dba4a..531e07f2d4c5 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cpp
@@ -666,6 +666,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
int glob_nomatch = GLOB_NOMATCH;
int glob_altdirfunc = GLOB_ALTDIRFUNC;
+const int wordexp_wrde_dooffs = WRDE_DOOFFS;
unsigned path_max = PATH_MAX;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
index 9e28dcfef041..9407803fc9c3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
@@ -394,6 +394,7 @@ struct __sanitizer_glob_t {
extern int glob_nomatch;
extern int glob_altdirfunc;
+extern const int wordexp_wrde_dooffs;
extern unsigned path_max;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index 6e5c330b98ef..a1c452855ae7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -91,10 +91,10 @@
#if SANITIZER_LINUX
# include <utime.h>
# include <sys/ptrace.h>
-#if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \
- SANITIZER_RISCV64
-# include <asm/ptrace.h>
-# ifdef __arm__
+# if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \
+ defined(__hexagon__) || SANITIZER_RISCV64
+# include <asm/ptrace.h>
+# ifdef __arm__
typedef struct user_fpregs elf_fpregset_t;
# define ARM_VFPREGS_SIZE_ASAN (32 * 8 /*fpregs*/ + 4 /*fpscr*/)
# if !defined(ARM_VFPREGS_SIZE)
@@ -242,12 +242,13 @@ namespace __sanitizer {
defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) || \
defined(__x86_64__) || SANITIZER_RISCV64
#define SIZEOF_STRUCT_USTAT 32
-#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
- || defined(__powerpc__) || defined(__s390__) || defined(__sparc__)
-#define SIZEOF_STRUCT_USTAT 20
-#else
-#error Unknown size of struct ustat
-#endif
+# elif defined(__arm__) || defined(__i386__) || defined(__mips__) || \
+ defined(__powerpc__) || defined(__s390__) || defined(__sparc__) || \
+ defined(__hexagon__)
+# define SIZEOF_STRUCT_USTAT 20
+# else
+# error Unknown size of struct ustat
+# endif
unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
@@ -312,6 +313,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
int glob_altdirfunc = GLOB_ALTDIRFUNC;
#endif
+# if !SANITIZER_ANDROID
+ const int wordexp_wrde_dooffs = WRDE_DOOFFS;
+# endif // !SANITIZER_ANDROID
+
#if SANITIZER_LINUX && !SANITIZER_ANDROID && \
(defined(__i386) || defined(__x86_64) || defined(__mips64) || \
defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 4dd27644ed11..d69b344dd613 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -102,7 +102,10 @@ const unsigned struct_kernel_stat64_sz = 104;
#elif SANITIZER_RISCV64
const unsigned struct_kernel_stat_sz = 128;
const unsigned struct_kernel_stat64_sz = 0; // RISCV64 does not use stat64
-#endif
+# elif defined(__hexagon__)
+const unsigned struct_kernel_stat_sz = 128;
+const unsigned struct_kernel_stat64_sz = 0;
+# endif
struct __sanitizer_perf_event_attr {
unsigned type;
unsigned size;
@@ -367,7 +370,7 @@ struct __sanitizer_group {
char **gr_mem;
};
-#if defined(__x86_64__) && !defined(_LP64)
+# if (defined(__x86_64__) && !defined(_LP64)) || defined(__hexagon__)
typedef long long __sanitizer_time_t;
#else
typedef long __sanitizer_time_t;
@@ -475,23 +478,23 @@ struct __sanitizer_dirent {
unsigned short d_reclen;
// more fields that we don't care about
};
-#elif SANITIZER_ANDROID || defined(__x86_64__)
+# elif SANITIZER_ANDROID || defined(__x86_64__) || defined(__hexagon__)
struct __sanitizer_dirent {
unsigned long long d_ino;
unsigned long long d_off;
unsigned short d_reclen;
// more fields that we don't care about
};
-#else
+# else
struct __sanitizer_dirent {
uptr d_ino;
uptr d_off;
unsigned short d_reclen;
// more fields that we don't care about
};
-#endif
+# endif
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+# if SANITIZER_LINUX && !SANITIZER_ANDROID
struct __sanitizer_dirent64 {
unsigned long long d_ino;
unsigned long long d_off;
@@ -511,8 +514,8 @@ typedef int __sanitizer_clockid_t;
#endif
#if SANITIZER_LINUX
-#if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \
- defined(__mips__)
+# if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \
+ defined(__mips__) || defined(__hexagon__)
typedef unsigned __sanitizer___kernel_uid_t;
typedef unsigned __sanitizer___kernel_gid_t;
#else
@@ -712,6 +715,13 @@ struct __sanitizer_protoent {
int p_proto;
};
+struct __sanitizer_netent {
+ char *n_name;
+ char **n_aliases;
+ int n_addrtype;
+ u32 n_net;
+};
+
struct __sanitizer_addrinfo {
int ai_flags;
int ai_family;
@@ -773,6 +783,10 @@ extern int glob_altdirfunc;
extern unsigned path_max;
+# if !SANITIZER_ANDROID
+extern const int wordexp_wrde_dooffs;
+# endif // !SANITIZER_ANDROID
+
struct __sanitizer_wordexp_t {
uptr we_wordc;
char **we_wordv;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp
index 565b31f68aae..a113cb0d3490 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.cpp
@@ -123,6 +123,7 @@ namespace __sanitizer {
unsigned struct_ElfW_Phdr_sz = sizeof(ElfW(Phdr));
int glob_nomatch = GLOB_NOMATCH;
+ const int wordexp_wrde_dooffs = WRDE_DOOFFS;
unsigned path_max = PATH_MAX;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.h
index 85995e79792d..cbab577bcf26 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_solaris.h
@@ -341,6 +341,7 @@ struct __sanitizer_glob_t {
extern int glob_nomatch;
extern int glob_altdirfunc;
+extern const int wordexp_wrde_dooffs;
extern unsigned path_max;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_posix.h
index b65dae644767..f91e26e74b87 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.h
@@ -20,10 +20,7 @@
#include "sanitizer_platform_limits_posix.h"
#include "sanitizer_platform_limits_solaris.h"
-#if !SANITIZER_POSIX
-// Make it hard to accidentally use any of functions declared in this file:
-#error This file should only be included on POSIX
-#endif
+#if SANITIZER_POSIX
namespace __sanitizer {
@@ -126,4 +123,6 @@ void DecorateMapping(uptr addr, uptr size, const char *name);
} // namespace __sanitizer
+#endif // SANITIZER_POSIX
+
#endif // SANITIZER_POSIX_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
index ddf6844bed13..eed02ce4f6aa 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -151,6 +151,8 @@ int Atexit(void (*function)(void)) {
#endif
}
+bool CreateDir(const char *pathname) { return mkdir(pathname, 0755) == 0; }
+
bool SupportsColoredOutput(fd_t fd) {
return isatty(fd) != 0;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp
index b913c92e16f1..3a9e366d2df9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_printf.cpp
@@ -20,10 +20,6 @@
#include <stdio.h>
#include <stdarg.h>
-#if defined(__x86_64__)
-# include <emmintrin.h>
-#endif
-
#if SANITIZER_WINDOWS && defined(_MSC_VER) && _MSC_VER < 1800 && \
!defined(va_copy)
# define va_copy(dst, src) ((dst) = (src))
@@ -132,8 +128,8 @@ static int AppendPointer(char **buff, const char *buff_end, u64 ptr_value) {
int VSNPrintf(char *buff, int buff_length,
const char *format, va_list args) {
static const char *kPrintfFormatsHelp =
- "Supported Printf formats: %([0-9]*)?(z|ll)?{d,u,x,X,V}; %p; "
- "%[-]([0-9]*)?(\\.\\*)?s; %c\n";
+ "Supported Printf formats: %([0-9]*)?(z|l|ll)?{d,u,x,X}; %p; "
+ "%[-]([0-9]*)?(\\.\\*)?s; %c\nProvided format: ";
RAW_CHECK(format);
RAW_CHECK(buff_length > 0);
const char *buff_end = &buff[buff_length - 1];
@@ -164,9 +160,11 @@ int VSNPrintf(char *buff, int buff_length,
}
bool have_z = (*cur == 'z');
cur += have_z;
- bool have_ll = !have_z && (cur[0] == 'l' && cur[1] == 'l');
+ bool have_l = cur[0] == 'l' && cur[1] != 'l';
+ cur += have_l;
+ bool have_ll = cur[0] == 'l' && cur[1] == 'l';
cur += have_ll * 2;
- const bool have_length = have_z || have_ll;
+ const bool have_length = have_z || have_l || have_ll;
const bool have_flags = have_width || have_length;
// At the moment only %s supports precision and left-justification.
CHECK(!((precision >= 0 || left_justified) && *cur != 's'));
@@ -174,6 +172,7 @@ int VSNPrintf(char *buff, int buff_length,
case 'd': {
s64 dval = have_ll ? va_arg(args, s64)
: have_z ? va_arg(args, sptr)
+ : have_l ? va_arg(args, long)
: va_arg(args, int);
result += AppendSignedDecimal(&buff, buff_end, dval, width,
pad_with_zero);
@@ -184,26 +183,20 @@ int VSNPrintf(char *buff, int buff_length,
case 'X': {
u64 uval = have_ll ? va_arg(args, u64)
: have_z ? va_arg(args, uptr)
+ : have_l ? va_arg(args, unsigned long)
: va_arg(args, unsigned);
bool uppercase = (*cur == 'X');
result += AppendUnsigned(&buff, buff_end, uval, (*cur == 'u') ? 10 : 16,
width, pad_with_zero, uppercase);
break;
}
- case 'V': {
- for (uptr i = 0; i < 16; i++) {
- unsigned x = va_arg(args, unsigned);
- result += AppendUnsigned(&buff, buff_end, x, 16, 2, true, false);
- }
- break;
- }
case 'p': {
- RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
+ RAW_CHECK_VA(!have_flags, kPrintfFormatsHelp, format);
result += AppendPointer(&buff, buff_end, va_arg(args, uptr));
break;
}
case 's': {
- RAW_CHECK_MSG(!have_length, kPrintfFormatsHelp);
+ RAW_CHECK_VA(!have_length, kPrintfFormatsHelp, format);
// Only left-justified width is supported.
CHECK(!have_width || left_justified);
result += AppendString(&buff, buff_end, left_justified ? -width : width,
@@ -211,17 +204,17 @@ int VSNPrintf(char *buff, int buff_length,
break;
}
case 'c': {
- RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
+ RAW_CHECK_VA(!have_flags, kPrintfFormatsHelp, format);
result += AppendChar(&buff, buff_end, va_arg(args, int));
break;
}
case '%' : {
- RAW_CHECK_MSG(!have_flags, kPrintfFormatsHelp);
+ RAW_CHECK_VA(!have_flags, kPrintfFormatsHelp, format);
result += AppendChar(&buff, buff_end, '%');
break;
}
default: {
- RAW_CHECK_MSG(false, kPrintfFormatsHelp);
+ RAW_CHECK_VA(false, kPrintfFormatsHelp, format);
}
}
}
@@ -317,7 +310,6 @@ static void NOINLINE SharedPrintfCode(bool append_pid, const char *format,
format, args);
}
-FORMAT(1, 2)
void Printf(const char *format, ...) {
va_list args;
va_start(args, format);
@@ -326,7 +318,6 @@ void Printf(const char *format, ...) {
}
// Like Printf, but prints the current PID before the output string.
-FORMAT(1, 2)
void Report(const char *format, ...) {
va_list args;
va_start(args, format);
@@ -338,7 +329,6 @@ void Report(const char *format, ...) {
// Returns the number of symbols that should have been written to buffer
// (not including trailing '\0'). Thus, the string is truncated
// iff return value is not less than "length".
-FORMAT(3, 4)
int internal_snprintf(char *buffer, uptr length, const char *format, ...) {
va_list args;
va_start(args, format);
@@ -347,7 +337,6 @@ int internal_snprintf(char *buffer, uptr length, const char *format, ...) {
return needed_length;
}
-FORMAT(2, 3)
void InternalScopedString::append(const char *format, ...) {
uptr prev_len = length();
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h
index a56640db43e8..055af366ef06 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps.h
@@ -65,13 +65,23 @@ class MemoryMappedSegment {
MemoryMappedSegmentData *data_;
};
-class MemoryMappingLayout {
+class MemoryMappingLayoutBase {
+ public:
+ virtual bool Next(MemoryMappedSegment *segment) { UNIMPLEMENTED(); }
+ virtual bool Error() const { UNIMPLEMENTED(); };
+ virtual void Reset() { UNIMPLEMENTED(); }
+
+ protected:
+ ~MemoryMappingLayoutBase() {}
+};
+
+class MemoryMappingLayout final : public MemoryMappingLayoutBase {
public:
explicit MemoryMappingLayout(bool cache_enabled);
~MemoryMappingLayout();
- bool Next(MemoryMappedSegment *segment);
- bool Error() const;
- void Reset();
+ virtual bool Next(MemoryMappedSegment *segment) override;
+ virtual bool Error() const override;
+ virtual void Reset() override;
// In some cases, e.g. when running under a sandbox on Linux, ASan is unable
// to obtain the memory mappings. It should fall back to pre-cached data
// instead of aborting.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_common.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_common.cpp
index 1b7dd46d8de4..eb351b0f06fd 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_common.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_common.cpp
@@ -145,28 +145,44 @@ void MemoryMappingLayout::DumpListOfModules(
}
}
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) {
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {
char *smaps = nullptr;
uptr smaps_cap = 0;
uptr smaps_len = 0;
if (!ReadFileToBuffer("/proc/self/smaps", &smaps, &smaps_cap, &smaps_len))
return;
+ ParseUnixMemoryProfile(cb, stats, smaps, smaps_len);
+ UnmapOrDie(smaps, smaps_cap);
+}
+
+void ParseUnixMemoryProfile(fill_profile_f cb, uptr *stats, char *smaps,
+ uptr smaps_len) {
uptr start = 0;
bool file = false;
const char *pos = smaps;
- while (pos < smaps + smaps_len) {
+ char *end = smaps + smaps_len;
+ if (smaps_len < 2)
+ return;
+ // The following parsing can crash on almost every line
+ // in the case of malformed/truncated input.
+ // Fixing that is hard b/c e.g. ParseDecimal does not
+ // even accept end of the buffer and assumes well-formed input.
+ // So instead we patch end of the input a bit,
+ // it does not affect well-formed complete inputs.
+ *--end = 0;
+ *--end = '\n';
+ while (pos < end) {
if (IsHex(pos[0])) {
start = ParseHex(&pos);
for (; *pos != '/' && *pos > '\n'; pos++) {}
file = *pos == '/';
} else if (internal_strncmp(pos, "Rss:", 4) == 0) {
- while (!IsDecimal(*pos)) pos++;
+ while (pos < end && !IsDecimal(*pos)) pos++;
uptr rss = ParseDecimal(&pos) * 1024;
- cb(start, rss, file, stats, stats_size);
+ cb(start, rss, file, stats);
}
while (*pos++ != '\n') {}
}
- UnmapOrDie(smaps, smaps_cap);
}
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp
index bf813f235bb7..e16c4e938cb2 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp
@@ -55,7 +55,15 @@ bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
internal_snprintf(proc_path, sizeof(proc_path), "/proc/self/path/%s",
xmapentry->pr_mapname);
- internal_readlink(proc_path, segment->filename, segment->filename_size);
+ ssize_t sz = internal_readlink(proc_path, segment->filename,
+ segment->filename_size - 1);
+
+ // If readlink failed, the map is anonymous.
+ if (sz == -1) {
+ segment->filename[0] = '\0';
+ } else if ((size_t)sz < segment->filename_size)
+ // readlink doesn't NUL-terminate.
+ segment->filename[sz] = '\0';
}
data_.current += sizeof(prxmap_t);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_signal_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_signal_interceptors.inc
index cefb870f7e25..475e577d9982 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_signal_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_signal_interceptors.inc
@@ -29,8 +29,16 @@ using namespace __sanitizer;
#endif
#ifndef SIGNAL_INTERCEPTOR_SIGACTION_IMPL
-#define SIGNAL_INTERCEPTOR_SIGACTION_IMPL(signum, act, oldact) \
- { return REAL(sigaction_symname)(signum, act, oldact); }
+# define SIGNAL_INTERCEPTOR_SIGACTION_IMPL(signum, act, oldact) \
+ { \
+ if (!REAL(sigaction_symname)) { \
+ Printf( \
+ "Warning: REAL(sigaction_symname) == nullptr. This may happen " \
+ "if you link with ubsan statically. Sigaction will not work.\n"); \
+ return -1; \
+ } \
+ return REAL(sigaction_symname)(signum, act, oldact); \
+ }
#endif
#if SANITIZER_INTERCEPT_BSD_SIGNAL
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp
index cb53eab8da15..62c40affc9ac 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_solaris.cpp
@@ -225,28 +225,6 @@ void FutexWait(atomic_uint32_t *p, u32 cmp) {
void FutexWake(atomic_uint32_t *p, u32 count) {}
-BlockingMutex::BlockingMutex() {
- CHECK(sizeof(mutex_t) <= sizeof(opaque_storage_));
- internal_memset(this, 0, sizeof(*this));
- CHECK_EQ(mutex_init((mutex_t *)&opaque_storage_, USYNC_THREAD, NULL), 0);
-}
-
-void BlockingMutex::Lock() {
- CHECK(sizeof(mutex_t) <= sizeof(opaque_storage_));
- CHECK_NE(owner_, (uptr)thr_self());
- CHECK_EQ(mutex_lock((mutex_t *)&opaque_storage_), 0);
- CHECK(!owner_);
- owner_ = (uptr)thr_self();
-}
-
-void BlockingMutex::Unlock() {
- CHECK(owner_ == (uptr)thr_self());
- owner_ = 0;
- CHECK_EQ(mutex_unlock((mutex_t *)&opaque_storage_), 0);
-}
-
-void BlockingMutex::CheckLocked() const { CHECK_EQ((uptr)thr_self(), owner_); }
-
} // namespace __sanitizer
#endif // SANITIZER_SOLARIS
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp
new file mode 100644
index 000000000000..ad88e2bbbefc
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp
@@ -0,0 +1,91 @@
+//===-- sanitizer_stack_store.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_stack_store.h"
+
+#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
+#include "sanitizer_stacktrace.h"
+
+namespace __sanitizer {
+
+static constexpr u32 kStackSizeBits = 16;
+
+StackStore::Id StackStore::Store(const StackTrace &trace) {
+ uptr *stack_trace = Alloc(trace.size + 1);
+ CHECK_LT(trace.size, 1 << kStackSizeBits);
+ *stack_trace = trace.size + (trace.tag << kStackSizeBits);
+ internal_memcpy(stack_trace + 1, trace.trace, trace.size * sizeof(uptr));
+ return reinterpret_cast<StackStore::Id>(stack_trace);
+}
+
+StackTrace StackStore::Load(Id id) {
+ const uptr *stack_trace = reinterpret_cast<const uptr *>(id);
+ uptr size = *stack_trace & ((1 << kStackSizeBits) - 1);
+ uptr tag = *stack_trace >> kStackSizeBits;
+ return StackTrace(stack_trace + 1, size, tag);
+}
+
+uptr *StackStore::TryAlloc(uptr count) {
+ // Optimisic lock-free allocation, essentially try to bump the region ptr.
+ for (;;) {
+ uptr cmp = atomic_load(&region_pos_, memory_order_acquire);
+ uptr end = atomic_load(&region_end_, memory_order_acquire);
+ uptr size = count * sizeof(uptr);
+ if (cmp == 0 || cmp + size > end)
+ return nullptr;
+ if (atomic_compare_exchange_weak(&region_pos_, &cmp, cmp + size,
+ memory_order_acquire))
+ return reinterpret_cast<uptr *>(cmp);
+ }
+}
+
+uptr *StackStore::Alloc(uptr count) {
+ // First, try to allocate optimisitically.
+ uptr *s = TryAlloc(count);
+ if (LIKELY(s))
+ return s;
+ return RefillAndAlloc(count);
+}
+
+uptr *StackStore::RefillAndAlloc(uptr count) {
+ // If failed, lock, retry and alloc new superblock.
+ SpinMutexLock l(&mtx_);
+ for (;;) {
+ uptr *s = TryAlloc(count);
+ if (s)
+ return s;
+ atomic_store(&region_pos_, 0, memory_order_relaxed);
+ uptr size = count * sizeof(uptr) + sizeof(BlockInfo);
+ uptr allocsz = RoundUpTo(Max<uptr>(size, 64u * 1024u), GetPageSizeCached());
+ uptr mem = (uptr)MmapOrDie(allocsz, "stack depot");
+ BlockInfo *new_block = (BlockInfo *)(mem + allocsz) - 1;
+ new_block->next = curr_;
+ new_block->ptr = mem;
+ new_block->size = allocsz;
+ curr_ = new_block;
+
+ atomic_fetch_add(&mapped_size_, allocsz, memory_order_relaxed);
+
+ allocsz -= sizeof(BlockInfo);
+ atomic_store(&region_end_, mem + allocsz, memory_order_release);
+ atomic_store(&region_pos_, mem, memory_order_release);
+ }
+}
+
+void StackStore::TestOnlyUnmap() {
+ while (curr_) {
+ uptr mem = curr_->ptr;
+ uptr allocsz = curr_->size;
+ curr_ = curr_->next;
+ UnmapOrDie((void *)mem, allocsz);
+ }
+ internal_memset(this, 0, sizeof(*this));
+}
+
+} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h
new file mode 100644
index 000000000000..b5bbdccc20b1
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h
@@ -0,0 +1,50 @@
+//===-- sanitizer_stack_store.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_STACK_STORE_H
+#define SANITIZER_STACK_STORE_H
+
+#include "sanitizer_atomic.h"
+#include "sanitizer_internal_defs.h"
+#include "sanitizer_mutex.h"
+#include "sanitizer_stacktrace.h"
+
+namespace __sanitizer {
+
+class StackStore {
+ public:
+ constexpr StackStore() = default;
+
+ using Id = uptr;
+
+ Id Store(const StackTrace &trace);
+ StackTrace Load(Id id);
+ uptr Allocated() const { return atomic_load_relaxed(&mapped_size_); }
+
+ void TestOnlyUnmap();
+
+ private:
+ uptr *Alloc(uptr count = 1);
+ uptr *TryAlloc(uptr count);
+ uptr *RefillAndAlloc(uptr count);
+ mutable StaticSpinMutex mtx_ = {}; // Protects alloc of new blocks.
+ atomic_uintptr_t region_pos_ = {}; // Region allocator for Node's.
+ atomic_uintptr_t region_end_ = {};
+ atomic_uintptr_t mapped_size_ = {};
+
+ struct BlockInfo {
+ const BlockInfo *next;
+ uptr ptr;
+ uptr size;
+ };
+ const BlockInfo *curr_ = nullptr;
+};
+
+} // namespace __sanitizer
+
+#endif // SANITIZER_STACK_STORE_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
index 44a95214e38b..e203b2cc4c89 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
@@ -14,93 +14,87 @@
#include "sanitizer_common.h"
#include "sanitizer_hash.h"
+#include "sanitizer_stack_store.h"
#include "sanitizer_stackdepotbase.h"
namespace __sanitizer {
struct StackDepotNode {
- StackDepotNode *link;
- u32 id;
- atomic_uint32_t hash_and_use_count; // hash_bits : 12; use_count : 20;
- u32 size;
- u32 tag;
- uptr stack[1]; // [size]
+ using hash_type = u64;
+ hash_type stack_hash;
+ u32 link;
static const u32 kTabSizeLog = SANITIZER_ANDROID ? 16 : 20;
- // Lower kTabSizeLog bits are equal for all items in one bucket.
- // We use these bits to store the per-stack use counter.
- static const u32 kUseCountBits = kTabSizeLog;
- static const u32 kMaxUseCount = 1 << kUseCountBits;
- static const u32 kUseCountMask = (1 << kUseCountBits) - 1;
- static const u32 kHashMask = ~kUseCountMask;
typedef StackTrace args_type;
- bool eq(u32 hash, const args_type &args) const {
- u32 hash_bits =
- atomic_load(&hash_and_use_count, memory_order_relaxed) & kHashMask;
- if ((hash & kHashMask) != hash_bits || args.size != size || args.tag != tag)
- return false;
- uptr i = 0;
- for (; i < size; i++) {
- if (stack[i] != args.trace[i]) return false;
- }
- return true;
+ bool eq(hash_type hash, const args_type &args) const {
+ return hash == stack_hash;
}
- static uptr storage_size(const args_type &args) {
- return sizeof(StackDepotNode) + (args.size - 1) * sizeof(uptr);
- }
- static u32 hash(const args_type &args) {
- MurMur2HashBuilder H(args.size * sizeof(uptr));
+ static uptr allocated();
+ static hash_type hash(const args_type &args) {
+ MurMur2Hash64Builder H(args.size * sizeof(uptr));
for (uptr i = 0; i < args.size; i++) H.add(args.trace[i]);
+ H.add(args.tag);
return H.get();
}
static bool is_valid(const args_type &args) {
return args.size > 0 && args.trace;
}
- void store(const args_type &args, u32 hash) {
- atomic_store(&hash_and_use_count, hash & kHashMask, memory_order_relaxed);
- size = args.size;
- tag = args.tag;
- internal_memcpy(stack, args.trace, size * sizeof(uptr));
- }
- args_type load() const {
- return args_type(&stack[0], size, tag);
- }
- StackDepotHandle get_handle() { return StackDepotHandle(this); }
+ void store(u32 id, const args_type &args, hash_type hash);
+ args_type load(u32 id) const;
+ static StackDepotHandle get_handle(u32 id);
typedef StackDepotHandle handle_type;
};
-COMPILER_CHECK(StackDepotNode::kMaxUseCount == (u32)kStackDepotMaxUseCount);
-
-u32 StackDepotHandle::id() { return node_->id; }
-int StackDepotHandle::use_count() {
- return atomic_load(&node_->hash_and_use_count, memory_order_relaxed) &
- StackDepotNode::kUseCountMask;
-}
-void StackDepotHandle::inc_use_count_unsafe() {
- u32 prev =
- atomic_fetch_add(&node_->hash_and_use_count, 1, memory_order_relaxed) &
- StackDepotNode::kUseCountMask;
- CHECK_LT(prev + 1, StackDepotNode::kMaxUseCount);
-}
+static StackStore stackStore;
// FIXME(dvyukov): this single reserved bit is used in TSan.
typedef StackDepotBase<StackDepotNode, 1, StackDepotNode::kTabSizeLog>
StackDepot;
static StackDepot theDepot;
+// Keep rarely accessed stack traces out of frequently access nodes to improve
+// caching efficiency.
+static TwoLevelMap<StackStore::Id, StackDepot::kNodesSize1,
+ StackDepot::kNodesSize2>
+ storeIds;
+// Keep mutable data out of frequently access nodes to improve caching
+// efficiency.
+static TwoLevelMap<atomic_uint32_t, StackDepot::kNodesSize1,
+ StackDepot::kNodesSize2>
+ useCounts;
+
+int StackDepotHandle::use_count() const {
+ return atomic_load_relaxed(&useCounts[id_]);
+}
+
+void StackDepotHandle::inc_use_count_unsafe() {
+ atomic_fetch_add(&useCounts[id_], 1, memory_order_relaxed);
+}
-StackDepotStats *StackDepotGetStats() {
- return theDepot.GetStats();
+uptr StackDepotNode::allocated() {
+ return stackStore.Allocated() + storeIds.MemoryUsage() +
+ useCounts.MemoryUsage();
}
-u32 StackDepotPut(StackTrace stack) {
- StackDepotHandle h = theDepot.Put(stack);
- return h.valid() ? h.id() : 0;
+void StackDepotNode::store(u32 id, const args_type &args, hash_type hash) {
+ stack_hash = hash;
+ storeIds[id] = stackStore.Store(args);
}
+StackDepotNode::args_type StackDepotNode::load(u32 id) const {
+ StackStore::Id store_id = storeIds[id];
+ if (!store_id)
+ return {};
+ return stackStore.Load(store_id);
+}
+
+StackDepotStats StackDepotGetStats() { return theDepot.GetStats(); }
+
+u32 StackDepotPut(StackTrace stack) { return theDepot.Put(stack); }
+
StackDepotHandle StackDepotPut_WithHandle(StackTrace stack) {
- return theDepot.Put(stack);
+ return StackDepotNode::get_handle(theDepot.Put(stack));
}
StackTrace StackDepotGet(u32 id) {
@@ -121,34 +115,14 @@ void StackDepotPrintAll() {
#endif
}
-bool StackDepotReverseMap::IdDescPair::IdComparator(
- const StackDepotReverseMap::IdDescPair &a,
- const StackDepotReverseMap::IdDescPair &b) {
- return a.id < b.id;
-}
-
-StackDepotReverseMap::StackDepotReverseMap() {
- map_.reserve(StackDepotGetStats()->n_uniq_ids + 100);
- for (int idx = 0; idx < StackDepot::kTabSize; idx++) {
- atomic_uintptr_t *p = &theDepot.tab[idx];
- uptr v = atomic_load(p, memory_order_consume);
- StackDepotNode *s = (StackDepotNode*)(v & ~1);
- for (; s; s = s->link) {
- IdDescPair pair = {s->id, s};
- map_.push_back(pair);
- }
- }
- Sort(map_.data(), map_.size(), &IdDescPair::IdComparator);
+StackDepotHandle StackDepotNode::get_handle(u32 id) {
+ return StackDepotHandle(&theDepot.nodes[id], id);
}
-StackTrace StackDepotReverseMap::Get(u32 id) {
- if (!map_.size())
- return StackTrace();
- IdDescPair pair = {id, nullptr};
- uptr idx = InternalLowerBound(map_, pair, IdDescPair::IdComparator);
- if (idx > map_.size() || map_[idx].id != id)
- return StackTrace();
- return map_[idx].desc->load();
+void StackDepotTestOnlyUnmap() {
+ theDepot.TestOnlyUnmap();
+ storeIds.TestOnlyUnmap();
+ stackStore.TestOnlyUnmap();
}
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
index 0e26c1fc37c4..56d655d9404c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
@@ -22,18 +22,18 @@ namespace __sanitizer {
// StackDepot efficiently stores huge amounts of stack traces.
struct StackDepotNode;
struct StackDepotHandle {
- StackDepotNode *node_;
- StackDepotHandle() : node_(nullptr) {}
- explicit StackDepotHandle(StackDepotNode *node) : node_(node) {}
- bool valid() { return node_; }
- u32 id();
- int use_count();
+ StackDepotNode *node_ = nullptr;
+ u32 id_ = 0;
+ StackDepotHandle(StackDepotNode *node, u32 id) : node_(node), id_(id) {}
+ bool valid() const { return node_; }
+ u32 id() const { return id_; }
+ int use_count() const;
void inc_use_count_unsafe();
};
const int kStackDepotMaxUseCount = 1U << (SANITIZER_ANDROID ? 16 : 20);
-StackDepotStats *StackDepotGetStats();
+StackDepotStats StackDepotGetStats();
u32 StackDepotPut(StackTrace stack);
StackDepotHandle StackDepotPut_WithHandle(StackTrace stack);
// Retrieves a stored stack trace by the id.
@@ -43,29 +43,7 @@ void StackDepotLockAll();
void StackDepotUnlockAll();
void StackDepotPrintAll();
-// Instantiating this class creates a snapshot of StackDepot which can be
-// efficiently queried with StackDepotGet(). You can use it concurrently with
-// StackDepot, but the snapshot is only guaranteed to contain those stack traces
-// which were stored before it was instantiated.
-class StackDepotReverseMap {
- public:
- StackDepotReverseMap();
- StackTrace Get(u32 id);
-
- private:
- struct IdDescPair {
- u32 id;
- StackDepotNode *desc;
-
- static bool IdComparator(const IdDescPair &a, const IdDescPair &b);
- };
-
- InternalMmapVector<IdDescPair> map_;
-
- // Disallow evil constructors.
- StackDepotReverseMap(const StackDepotReverseMap&);
- void operator=(const StackDepotReverseMap&);
-};
+void StackDepotTestOnlyUnmap();
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
index 1af2c1892eff..96d1ddc87fd0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepotbase.h
@@ -16,71 +16,87 @@
#include <stdio.h>
#include "sanitizer_atomic.h"
+#include "sanitizer_flat_map.h"
#include "sanitizer_internal_defs.h"
#include "sanitizer_mutex.h"
-#include "sanitizer_persistent_allocator.h"
namespace __sanitizer {
template <class Node, int kReservedBits, int kTabSizeLog>
class StackDepotBase {
+ static constexpr u32 kIdSizeLog =
+ sizeof(u32) * 8 - Max(kReservedBits, 1 /* At least 1 bit for locking. */);
+ static constexpr u32 kNodesSize1Log = kIdSizeLog / 2;
+ static constexpr u32 kNodesSize2Log = kIdSizeLog - kNodesSize1Log;
+ static constexpr int kTabSize = 1 << kTabSizeLog; // Hash table size.
+ static constexpr u32 kUnlockMask = (1ull << kIdSizeLog) - 1;
+ static constexpr u32 kLockMask = ~kUnlockMask;
+
public:
typedef typename Node::args_type args_type;
typedef typename Node::handle_type handle_type;
+ typedef typename Node::hash_type hash_type;
+
+ static constexpr u64 kNodesSize1 = 1ull << kNodesSize1Log;
+ static constexpr u64 kNodesSize2 = 1ull << kNodesSize2Log;
+
// Maps stack trace to an unique id.
- handle_type Put(args_type args, bool *inserted = nullptr);
+ u32 Put(args_type args, bool *inserted = nullptr);
// Retrieves a stored stack trace by the id.
args_type Get(u32 id);
- StackDepotStats *GetStats() { return &stats; }
+ StackDepotStats GetStats() const {
+ return {
+ atomic_load_relaxed(&n_uniq_ids),
+ nodes.MemoryUsage() + Node::allocated(),
+ };
+ }
void LockAll();
void UnlockAll();
void PrintAll();
- private:
- static Node *find(Node *s, args_type args, u32 hash);
- static Node *lock(atomic_uintptr_t *p);
- static void unlock(atomic_uintptr_t *p, Node *s);
+ void TestOnlyUnmap() {
+ nodes.TestOnlyUnmap();
+ internal_memset(this, 0, sizeof(*this));
+ }
- static const int kTabSize = 1 << kTabSizeLog; // Hash table size.
- static const int kPartBits = 8;
- static const int kPartShift = sizeof(u32) * 8 - kPartBits - kReservedBits;
- static const int kPartCount =
- 1 << kPartBits; // Number of subparts in the table.
- static const int kPartSize = kTabSize / kPartCount;
- static const int kMaxId = 1 << kPartShift;
+ private:
+ friend Node;
+ u32 find(u32 s, args_type args, hash_type hash) const;
+ static u32 lock(atomic_uint32_t *p);
+ static void unlock(atomic_uint32_t *p, u32 s);
+ atomic_uint32_t tab[kTabSize]; // Hash table of Node's.
- atomic_uintptr_t tab[kTabSize]; // Hash table of Node's.
- atomic_uint32_t seq[kPartCount]; // Unique id generators.
+ atomic_uint32_t n_uniq_ids;
- StackDepotStats stats;
+ TwoLevelMap<Node, kNodesSize1, kNodesSize2> nodes;
friend class StackDepotReverseMap;
};
template <class Node, int kReservedBits, int kTabSizeLog>
-Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::find(Node *s,
- args_type args,
- u32 hash) {
+u32 StackDepotBase<Node, kReservedBits, kTabSizeLog>::find(
+ u32 s, args_type args, hash_type hash) const {
// Searches linked list s for the stack, returns its id.
- for (; s; s = s->link) {
- if (s->eq(hash, args)) {
+ for (; s;) {
+ const Node &node = nodes[s];
+ if (node.eq(hash, args))
return s;
- }
+ s = node.link;
}
- return nullptr;
+ return 0;
}
template <class Node, int kReservedBits, int kTabSizeLog>
-Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(
- atomic_uintptr_t *p) {
+u32 StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(atomic_uint32_t *p) {
// Uses the pointer lsb as mutex.
for (int i = 0;; i++) {
- uptr cmp = atomic_load(p, memory_order_relaxed);
- if ((cmp & 1) == 0 &&
- atomic_compare_exchange_weak(p, &cmp, cmp | 1, memory_order_acquire))
- return (Node *)cmp;
+ u32 cmp = atomic_load(p, memory_order_relaxed);
+ if ((cmp & kLockMask) == 0 &&
+ atomic_compare_exchange_weak(p, &cmp, cmp | kLockMask,
+ memory_order_acquire))
+ return cmp;
if (i < 10)
proc_yield(10);
else
@@ -90,73 +106,57 @@ Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(
template <class Node, int kReservedBits, int kTabSizeLog>
void StackDepotBase<Node, kReservedBits, kTabSizeLog>::unlock(
- atomic_uintptr_t *p, Node *s) {
- DCHECK_EQ((uptr)s & 1, 0);
- atomic_store(p, (uptr)s, memory_order_release);
+ atomic_uint32_t *p, u32 s) {
+ DCHECK_EQ(s & kLockMask, 0);
+ atomic_store(p, s, memory_order_release);
}
template <class Node, int kReservedBits, int kTabSizeLog>
-typename StackDepotBase<Node, kReservedBits, kTabSizeLog>::handle_type
-StackDepotBase<Node, kReservedBits, kTabSizeLog>::Put(args_type args,
- bool *inserted) {
- if (inserted) *inserted = false;
- if (!Node::is_valid(args)) return handle_type();
- uptr h = Node::hash(args);
- atomic_uintptr_t *p = &tab[h % kTabSize];
- uptr v = atomic_load(p, memory_order_consume);
- Node *s = (Node *)(v & ~1);
+u32 StackDepotBase<Node, kReservedBits, kTabSizeLog>::Put(args_type args,
+ bool *inserted) {
+ if (inserted)
+ *inserted = false;
+ if (!LIKELY(Node::is_valid(args)))
+ return 0;
+ hash_type h = Node::hash(args);
+ atomic_uint32_t *p = &tab[h % kTabSize];
+ u32 v = atomic_load(p, memory_order_consume);
+ u32 s = v & kUnlockMask;
// First, try to find the existing stack.
- Node *node = find(s, args, h);
- if (node) return node->get_handle();
+ u32 node = find(s, args, h);
+ if (LIKELY(node))
+ return node;
+
// If failed, lock, retry and insert new.
- Node *s2 = lock(p);
+ u32 s2 = lock(p);
if (s2 != s) {
node = find(s2, args, h);
if (node) {
unlock(p, s2);
- return node->get_handle();
+ return node;
}
}
- uptr part = (h % kTabSize) / kPartSize;
- u32 id = atomic_fetch_add(&seq[part], 1, memory_order_relaxed) + 1;
- stats.n_uniq_ids++;
- CHECK_LT(id, kMaxId);
- id |= part << kPartShift;
- CHECK_NE(id, 0);
- CHECK_EQ(id & (((u32)-1) >> kReservedBits), id);
- uptr memsz = Node::storage_size(args);
- s = (Node *)PersistentAlloc(memsz);
- stats.allocated += memsz;
- s->id = id;
- s->store(args, h);
- s->link = s2;
+ s = atomic_fetch_add(&n_uniq_ids, 1, memory_order_relaxed) + 1;
+ CHECK_EQ(s & kUnlockMask, s);
+ CHECK_EQ(s & (((u32)-1) >> kReservedBits), s);
+ Node &new_node = nodes[s];
+ new_node.store(s, args, h);
+ new_node.link = s2;
unlock(p, s);
if (inserted) *inserted = true;
- return s->get_handle();
+ return s;
}
template <class Node, int kReservedBits, int kTabSizeLog>
typename StackDepotBase<Node, kReservedBits, kTabSizeLog>::args_type
StackDepotBase<Node, kReservedBits, kTabSizeLog>::Get(u32 id) {
- if (id == 0) {
+ if (id == 0)
return args_type();
- }
CHECK_EQ(id & (((u32)-1) >> kReservedBits), id);
- // High kPartBits contain part id, so we need to scan at most kPartSize lists.
- uptr part = id >> kPartShift;
- for (int i = 0; i != kPartSize; i++) {
- uptr idx = part * kPartSize + i;
- CHECK_LT(idx, kTabSize);
- atomic_uintptr_t *p = &tab[idx];
- uptr v = atomic_load(p, memory_order_consume);
- Node *s = (Node *)(v & ~1);
- for (; s; s = s->link) {
- if (s->id == id) {
- return s->load();
- }
- }
- }
- return args_type();
+ if (!nodes.contains(id))
+ return args_type();
+ const Node &node = nodes[id];
+ return node.load(id);
}
template <class Node, int kReservedBits, int kTabSizeLog>
@@ -169,24 +169,23 @@ void StackDepotBase<Node, kReservedBits, kTabSizeLog>::LockAll() {
template <class Node, int kReservedBits, int kTabSizeLog>
void StackDepotBase<Node, kReservedBits, kTabSizeLog>::UnlockAll() {
for (int i = 0; i < kTabSize; ++i) {
- atomic_uintptr_t *p = &tab[i];
+ atomic_uint32_t *p = &tab[i];
uptr s = atomic_load(p, memory_order_relaxed);
- unlock(p, (Node *)(s & ~1UL));
+ unlock(p, s & kUnlockMask);
}
}
template <class Node, int kReservedBits, int kTabSizeLog>
void StackDepotBase<Node, kReservedBits, kTabSizeLog>::PrintAll() {
for (int i = 0; i < kTabSize; ++i) {
- atomic_uintptr_t *p = &tab[i];
- lock(p);
- uptr v = atomic_load(p, memory_order_relaxed);
- Node *s = (Node *)(v & ~1UL);
- for (; s; s = s->link) {
- Printf("Stack for id %u:\n", s->id);
- s->load().Print();
+ atomic_uint32_t *p = &tab[i];
+ u32 s = atomic_load(p, memory_order_consume) & kUnlockMask;
+ for (; s;) {
+ const Node &node = nodes[s];
+ Printf("Stack for id %u:\n", s);
+ node.load(s).Print();
+ s = node.link;
}
- unlock(p, s);
}
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp
index 07e4409f4a5d..37e9e6dd08d7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp
@@ -22,8 +22,9 @@ namespace __sanitizer {
uptr StackTrace::GetNextInstructionPc(uptr pc) {
#if defined(__sparc__) || defined(__mips__)
return pc + 8;
-#elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__)
- return pc + 4;
+#elif defined(__powerpc__) || defined(__arm__) || defined(__aarch64__) || \
+ defined(__hexagon__)
+ return STRIP_PAC_PC((void *)pc) + 4;
#elif SANITIZER_RISCV64
// Current check order is 4 -> 2 -> 6 -> 8
u8 InsnByte = *(u8 *)(pc);
@@ -64,7 +65,7 @@ void BufferedStackTrace::Init(const uptr *pcs, uptr cnt, uptr extra_top_pc) {
top_frame_bp = 0;
}
-// Sparc implemention is in its own file.
+// Sparc implementation is in its own file.
#if !defined(__sparc__)
// In GCC on ARM bp points to saved lr, not fp, so we should check the next
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
index ea330f36f7d7..11c6154b09ea 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -209,11 +209,11 @@ static inline bool IsValidFrame(uptr frame, uptr stack_top, uptr stack_bottom) {
// StackTrace::GetCurrentPc() faster.
#if defined(__x86_64__)
# define GET_CURRENT_PC() \
- ({ \
+ (__extension__({ \
uptr pc; \
asm("lea 0(%%rip), %0" : "=r"(pc)); \
pc; \
- })
+ }))
#else
# define GET_CURRENT_PC() StackTrace::GetCurrentPc()
#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
index f60ea7731748..2d1c03f73221 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
@@ -64,7 +64,7 @@ class StackTraceTextPrinter {
if (dedup_token_->length())
dedup_token_->append("--");
if (stack->info.function != nullptr)
- dedup_token_->append(stack->info.function);
+ dedup_token_->append("%s", stack->info.function);
}
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
index c998322d3944..c6356dae23c1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp
@@ -129,7 +129,7 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
break;
// Frame number and all fields of AddressInfo structure.
case 'n':
- buffer->append("%zu", frame_no);
+ buffer->append("%u", frame_no);
break;
case 'p':
buffer->append("0x%zx", address);
@@ -198,8 +198,8 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
}
break;
default:
- Report("Unsupported specifier in stack frame format: %c (0x%zx)!\n", *p,
- *p);
+ Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p,
+ (void *)p);
Die();
}
}
@@ -244,14 +244,14 @@ void RenderData(InternalScopedString *buffer, const char *format,
buffer->append("%s", StripPathPrefix(DI->file, strip_path_prefix));
break;
case 'l':
- buffer->append("%d", DI->line);
+ buffer->append("%zu", DI->line);
break;
case 'g':
buffer->append("%s", DI->name);
break;
default:
- Report("Unsupported specifier in stack frame format: %c (0x%zx)!\n", *p,
- *p);
+ Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p,
+ (void *)p);
Die();
}
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp
index 34190fb1bbb2..1e635a66978f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_sparc.cpp
@@ -9,7 +9,7 @@
// This file is shared between AddressSanitizer and ThreadSanitizer
// run-time libraries.
//
-// Implemention of fast stack unwinding for Sparc.
+// Implementation of fast stack unwinding for Sparc.
//===----------------------------------------------------------------------===//
#if defined(__sparc__)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 53cfddcfbe0b..403bda1174cc 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -108,7 +108,7 @@ struct TracerThreadArgument {
void *callback_argument;
// The tracer thread waits on this mutex while the parent finishes its
// preparations.
- BlockingMutex mutex;
+ Mutex mutex;
// Tracer thread signals its completion by setting done.
atomic_uintptr_t done;
uptr parent_pid;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
index 9c7cd64255e5..701db72619a3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
@@ -68,7 +68,7 @@ class SuspendedThreadsListNetBSD final : public SuspendedThreadsList {
struct TracerThreadArgument {
StopTheWorldCallback callback;
void *callback_argument;
- BlockingMutex mutex;
+ Mutex mutex;
atomic_uintptr_t done;
uptr parent_pid;
};
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
index 2476b0ea7bf7..42bd157fa627 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -158,7 +158,7 @@ class Symbolizer final {
// its method should be protected by |mu_|.
class ModuleNameOwner {
public:
- explicit ModuleNameOwner(BlockingMutex *synchronized_by)
+ explicit ModuleNameOwner(Mutex *synchronized_by)
: last_match_(nullptr), mu_(synchronized_by) {
storage_.reserve(kInitialCapacity);
}
@@ -169,7 +169,7 @@ class Symbolizer final {
InternalMmapVector<const char*> storage_;
const char *last_match_;
- BlockingMutex *mu_;
+ Mutex *mu_;
} module_names_;
/// Platform-specific function for creating a Symbolizer object.
@@ -192,7 +192,7 @@ class Symbolizer final {
// Mutex locked from public methods of |Symbolizer|, so that the internals
// (including individual symbolizer tools and platform-specific methods) are
// always synchronized.
- BlockingMutex mu_;
+ Mutex mu_;
IntrusiveList<SymbolizerTool> tools_;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index 71de1758b3e9..b8670941a05e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -21,7 +21,7 @@ namespace __sanitizer {
// Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr
// is extracted. When extracting a string, a newly allocated (using
-// InternalAlloc) and null-terminataed buffer is returned. They return a pointer
+// InternalAlloc) and null-terminated buffer is returned. They return a pointer
// to the next characted after the found delimiter.
const char *ExtractToken(const char *str, const char *delims, char **result);
const char *ExtractInt(const char *str, const char *delims, int *result);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 98418b426c37..3fc994fd3deb 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -83,7 +83,7 @@ const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
}
SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
- BlockingMutexLock l(&mu_);
+ Lock l(&mu_);
const char *module_name = nullptr;
uptr module_offset;
ModuleArch arch;
@@ -103,7 +103,7 @@ SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
}
bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
- BlockingMutexLock l(&mu_);
+ Lock l(&mu_);
const char *module_name = nullptr;
uptr module_offset;
ModuleArch arch;
@@ -124,7 +124,7 @@ bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
}
bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
- BlockingMutexLock l(&mu_);
+ Lock l(&mu_);
const char *module_name = nullptr;
if (!FindModuleNameAndOffsetForAddress(
addr, &module_name, &info->module_offset, &info->module_arch))
@@ -141,7 +141,7 @@ bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
uptr *module_address) {
- BlockingMutexLock l(&mu_);
+ Lock l(&mu_);
const char *internal_module_name = nullptr;
ModuleArch arch;
if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
@@ -154,7 +154,7 @@ bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
}
void Symbolizer::Flush() {
- BlockingMutexLock l(&mu_);
+ Lock l(&mu_);
for (auto &tool : tools_) {
SymbolizerScope sym_scope(this);
tool.Flush();
@@ -162,7 +162,7 @@ void Symbolizer::Flush() {
}
const char *Symbolizer::Demangle(const char *name) {
- BlockingMutexLock l(&mu_);
+ Lock l(&mu_);
for (auto &tool : tools_) {
SymbolizerScope sym_scope(this);
if (const char *demangled = tool.Demangle(name))
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
index f330ed36640a..869c8935330d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
@@ -88,11 +88,17 @@ void ReportErrorSummary(const char *error_type, const StackTrace *stack,
#endif
}
-void ReportMmapWriteExec(int prot) {
+void ReportMmapWriteExec(int prot, int flags) {
#if SANITIZER_POSIX && (!SANITIZER_GO && !SANITIZER_ANDROID)
- if ((prot & (PROT_WRITE | PROT_EXEC)) != (PROT_WRITE | PROT_EXEC))
+ int pflags = (PROT_WRITE | PROT_EXEC);
+ if ((prot & pflags) != pflags)
return;
+# if SANITIZER_MAC && defined(MAP_JIT)
+ if ((flags & MAP_JIT) == MAP_JIT)
+ return;
+# endif
+
ScopedErrorReportLock l;
SanitizerCommonDecorator d;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_hexagon.inc b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_hexagon.inc
new file mode 100644
index 000000000000..553bff7503b4
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_syscall_linux_hexagon.inc
@@ -0,0 +1,131 @@
+//===-- sanitizer_syscall_linux_hexagon.inc ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementations of internal_syscall and internal_iserror for Linux/hexagon.
+//
+//===----------------------------------------------------------------------===//
+
+#define SYSCALL(name) __NR_##name
+
+#define __internal_syscall_LL_E(x) \
+ ((union { \
+ long long ll; \
+ long l[2]; \
+ }){.ll = x}) \
+ .l[0], \
+ ((union { \
+ long long ll; \
+ long l[2]; \
+ }){.ll = x}) \
+ .l[1]
+#define __internal_syscall_LL_O(x) 0, __SYSCALL_LL_E((x))
+
+#define __asm_syscall(...) \
+ do { \
+ __asm__ __volatile__("trap0(#1)" : "=r"(r0) : __VA_ARGS__ : "memory"); \
+ return r0; \
+ } while (0)
+
+#define __internal_syscall0(n) (__internal_syscall)(n)
+
+static uptr __internal_syscall(long n) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0");
+ __asm_syscall("r"(r6));
+}
+
+#define __internal_syscall1(n, a1) (__internal_syscall)(n, (long)(a1))
+
+static uptr __internal_syscall(long n, long a) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0") = a;
+ __asm_syscall("r"(r6), "0"(r0));
+}
+
+#define __internal_syscall2(n, a1, a2) \
+ (__internal_syscall)(n, (long)(a1), (long)(a2))
+
+static uptr __internal_syscall(long n, long a, long b) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0") = a;
+ register u32 r1 __asm__("r1") = b;
+ __asm_syscall("r"(r6), "0"(r0), "r"(r1));
+}
+
+#define __internal_syscall3(n, a1, a2, a3) \
+ (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3))
+
+static uptr __internal_syscall(long n, long a, long b, long c) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0") = a;
+ register u32 r1 __asm__("r1") = b;
+ register u32 r2 __asm__("r2") = c;
+ __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2));
+}
+
+#define __internal_syscall4(n, a1, a2, a3, a4) \
+ (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3), (long)(a4))
+
+static uptr __internal_syscall(long n, long a, long b, long c, long d) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0") = a;
+ register u32 r1 __asm__("r1") = b;
+ register u32 r2 __asm__("r2") = c;
+ register u32 r3 __asm__("r3") = d;
+ __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2), "r"(r3));
+}
+
+#define __internal_syscall5(n, a1, a2, a3, a4, a5) \
+ (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3), (long)(a4), \
+ (long)(a5))
+
+static uptr __internal_syscall(long n, long a, long b, long c, long d, long e) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0") = a;
+ register u32 r1 __asm__("r1") = b;
+ register u32 r2 __asm__("r2") = c;
+ register u32 r3 __asm__("r3") = d;
+ register u32 r4 __asm__("r4") = e;
+ __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4));
+}
+
+#define __internal_syscall6(n, a1, a2, a3, a4, a5, a6) \
+ (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3), (long)(a4), \
+ (long)(a5), (long)(a6))
+
+static uptr __internal_syscall(long n, long a, long b, long c, long d, long e,
+ long f) {
+ register u32 r6 __asm__("r6") = n;
+ register u32 r0 __asm__("r0") = a;
+ register u32 r1 __asm__("r1") = b;
+ register u32 r2 __asm__("r2") = c;
+ register u32 r3 __asm__("r3") = d;
+ register u32 r4 __asm__("r4") = e;
+ register u32 r5 __asm__("r5") = f;
+ __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+}
+
+#define __SYSCALL_NARGS_X(a1, a2, a3, a4, a5, a6, a7, a8, n, ...) n
+#define __SYSCALL_NARGS(...) \
+ __SYSCALL_NARGS_X(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0, )
+#define __SYSCALL_CONCAT_X(a, b) a##b
+#define __SYSCALL_CONCAT(a, b) __SYSCALL_CONCAT_X(a, b)
+#define __SYSCALL_DISP(b, ...) \
+ __SYSCALL_CONCAT(b, __SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__)
+
+// Helper function used to avoid clobbering of errno.
+bool internal_iserror(uptr retval, int *rverrno) {
+ if (retval >= (uptr)-4095) {
+ if (rverrno)
+ *rverrno = -retval;
+ return true;
+ }
+ return false;
+}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cpp
index 745fbf76b01f..2e1cd0238812 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cpp
@@ -13,6 +13,8 @@
#include "sanitizer_thread_registry.h"
+#include "sanitizer_placement_new.h"
+
namespace __sanitizer {
ThreadContextBase::ThreadContextBase(u32 tid)
@@ -119,7 +121,7 @@ ThreadRegistry::ThreadRegistry(ThreadContextFactory factory, u32 max_threads,
void ThreadRegistry::GetNumberOfThreads(uptr *total, uptr *running,
uptr *alive) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
if (total)
*total = threads_.size();
if (running) *running = running_threads_;
@@ -127,13 +129,13 @@ void ThreadRegistry::GetNumberOfThreads(uptr *total, uptr *running,
}
uptr ThreadRegistry::GetMaxAliveThreads() {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
return max_alive_threads_;
}
u32 ThreadRegistry::CreateThread(uptr user_id, bool detached, u32 parent_tid,
void *arg) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
u32 tid = kInvalidTid;
ThreadContextBase *tctx = QuarantinePop();
if (tctx) {
@@ -162,6 +164,12 @@ u32 ThreadRegistry::CreateThread(uptr user_id, bool detached, u32 parent_tid,
max_alive_threads_++;
CHECK_EQ(alive_threads_, max_alive_threads_);
}
+ if (user_id) {
+ // Ensure that user_id is unique. If it's not the case we are screwed.
+ // Ignoring this situation may lead to very hard to debug false
+ // positives later (e.g. if we join a wrong thread).
+ CHECK(live_.try_emplace(user_id, tid).second);
+ }
tctx->SetCreated(user_id, total_threads_++, detached,
parent_tid, arg);
return tid;
@@ -179,7 +187,7 @@ void ThreadRegistry::RunCallbackForEachThreadLocked(ThreadCallback cb,
}
u32 ThreadRegistry::FindThread(FindThreadCallback cb, void *arg) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
for (u32 tid = 0; tid < threads_.size(); tid++) {
ThreadContextBase *tctx = threads_[tid];
if (tctx != 0 && cb(tctx, arg))
@@ -211,7 +219,7 @@ ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(tid_t os_id) {
}
void ThreadRegistry::SetThreadName(u32 tid, const char *name) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
ThreadContextBase *tctx = threads_[tid];
CHECK_NE(tctx, 0);
CHECK_EQ(SANITIZER_FUCHSIA ? ThreadStatusCreated : ThreadStatusRunning,
@@ -220,19 +228,13 @@ void ThreadRegistry::SetThreadName(u32 tid, const char *name) {
}
void ThreadRegistry::SetThreadNameByUserId(uptr user_id, const char *name) {
- BlockingMutexLock l(&mtx_);
- for (u32 tid = 0; tid < threads_.size(); tid++) {
- ThreadContextBase *tctx = threads_[tid];
- if (tctx != 0 && tctx->user_id == user_id &&
- tctx->status != ThreadStatusInvalid) {
- tctx->SetName(name);
- return;
- }
- }
+ ThreadRegistryLock l(this);
+ if (const auto *tid = live_.find(user_id))
+ threads_[tid->second]->SetName(name);
}
void ThreadRegistry::DetachThread(u32 tid, void *arg) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
ThreadContextBase *tctx = threads_[tid];
CHECK_NE(tctx, 0);
if (tctx->status == ThreadStatusInvalid) {
@@ -241,6 +243,8 @@ void ThreadRegistry::DetachThread(u32 tid, void *arg) {
}
tctx->OnDetached(arg);
if (tctx->status == ThreadStatusFinished) {
+ if (tctx->user_id)
+ live_.erase(tctx->user_id);
tctx->SetDead();
QuarantinePush(tctx);
} else {
@@ -252,7 +256,7 @@ void ThreadRegistry::JoinThread(u32 tid, void *arg) {
bool destroyed = false;
do {
{
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
ThreadContextBase *tctx = threads_[tid];
CHECK_NE(tctx, 0);
if (tctx->status == ThreadStatusInvalid) {
@@ -260,6 +264,8 @@ void ThreadRegistry::JoinThread(u32 tid, void *arg) {
return;
}
if ((destroyed = tctx->GetDestroyed())) {
+ if (tctx->user_id)
+ live_.erase(tctx->user_id);
tctx->SetJoined(arg);
QuarantinePush(tctx);
}
@@ -275,7 +281,7 @@ void ThreadRegistry::JoinThread(u32 tid, void *arg) {
// thread before trying to create it, and then failed to actually
// create it, and so never called StartThread.
ThreadStatus ThreadRegistry::FinishThread(u32 tid) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
CHECK_GT(alive_threads_, 0);
alive_threads_--;
ThreadContextBase *tctx = threads_[tid];
@@ -292,6 +298,8 @@ ThreadStatus ThreadRegistry::FinishThread(u32 tid) {
}
tctx->SetFinished();
if (dead) {
+ if (tctx->user_id)
+ live_.erase(tctx->user_id);
tctx->SetDead();
QuarantinePush(tctx);
}
@@ -301,7 +309,7 @@ ThreadStatus ThreadRegistry::FinishThread(u32 tid) {
void ThreadRegistry::StartThread(u32 tid, tid_t os_id, ThreadType thread_type,
void *arg) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
running_threads_++;
ThreadContextBase *tctx = threads_[tid];
CHECK_NE(tctx, 0);
@@ -333,14 +341,28 @@ ThreadContextBase *ThreadRegistry::QuarantinePop() {
return tctx;
}
+u32 ThreadRegistry::ConsumeThreadUserId(uptr user_id) {
+ ThreadRegistryLock l(this);
+ u32 tid;
+ auto *t = live_.find(user_id);
+ CHECK(t);
+ tid = t->second;
+ live_.erase(t);
+ auto *tctx = threads_[tid];
+ CHECK_EQ(tctx->user_id, user_id);
+ tctx->user_id = 0;
+ return tid;
+}
+
void ThreadRegistry::SetThreadUserId(u32 tid, uptr user_id) {
- BlockingMutexLock l(&mtx_);
+ ThreadRegistryLock l(this);
ThreadContextBase *tctx = threads_[tid];
CHECK_NE(tctx, 0);
CHECK_NE(tctx->status, ThreadStatusInvalid);
CHECK_NE(tctx->status, ThreadStatusDead);
CHECK_EQ(tctx->user_id, 0);
tctx->user_id = user_id;
+ CHECK(live_.try_emplace(user_id, tctx->tid).second);
}
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
index 0b28bbe6ddf6..a259b324220f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
@@ -15,6 +15,7 @@
#define SANITIZER_THREAD_REGISTRY_H
#include "sanitizer_common.h"
+#include "sanitizer_dense_map.h"
#include "sanitizer_list.h"
#include "sanitizer_mutex.h"
@@ -127,6 +128,7 @@ class MUTEX ThreadRegistry {
// Finishes thread and returns previous status.
ThreadStatus FinishThread(u32 tid);
void StartThread(u32 tid, tid_t os_id, ThreadType thread_type, void *arg);
+ u32 ConsumeThreadUserId(uptr user_id);
void SetThreadUserId(u32 tid, uptr user_id);
private:
@@ -135,7 +137,7 @@ class MUTEX ThreadRegistry {
const u32 thread_quarantine_size_;
const u32 max_reuse_;
- BlockingMutex mtx_;
+ Mutex mtx_;
u64 total_threads_; // Total number of created threads. May be greater than
// max_threads_ if contexts were reused.
@@ -146,6 +148,7 @@ class MUTEX ThreadRegistry {
InternalMmapVector<ThreadContextBase *> threads_;
IntrusiveList<ThreadContextBase> dead_threads_;
IntrusiveList<ThreadContextBase> invalid_threads_;
+ DenseMap<uptr, Tid> live_;
void QuarantinePush(ThreadContextBase *tctx);
ThreadContextBase *QuarantinePop();
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
index 1f664b6cf5b8..b13e2dc9e332 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
@@ -44,7 +44,7 @@ static atomic_uintptr_t number_of_live_dtls;
static const uptr kDestroyedThread = -1;
static void DTLS_Deallocate(DTLS::DTVBlock *block) {
- VReport(2, "__tls_get_addr: DTLS_Deallocate %p %zd\n", block);
+ VReport(2, "__tls_get_addr: DTLS_Deallocate %p\n", (void *)block);
UnmapOrDie(block, sizeof(DTLS::DTVBlock));
atomic_fetch_sub(&number_of_live_dtls, 1, memory_order_relaxed);
}
@@ -66,12 +66,13 @@ static DTLS::DTVBlock *DTLS_NextBlock(atomic_uintptr_t *cur) {
}
uptr num_live_dtls =
atomic_fetch_add(&number_of_live_dtls, 1, memory_order_relaxed);
- VReport(2, "__tls_get_addr: DTLS_NextBlock %p %zd\n", &dtls, num_live_dtls);
+ VReport(2, "__tls_get_addr: DTLS_NextBlock %p %zd\n", (void *)&dtls,
+ num_live_dtls);
return new_dtv;
}
static DTLS::DTV *DTLS_Find(uptr id) {
- VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", &dtls, id);
+ VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", (void *)&dtls, id);
static constexpr uptr kPerBlock = ARRAY_SIZE(DTLS::DTVBlock::dtvs);
DTLS::DTVBlock *cur = DTLS_NextBlock(&dtls.dtv_block);
if (!cur)
@@ -82,7 +83,7 @@ static DTLS::DTV *DTLS_Find(uptr id) {
void DTLS_Destroy() {
if (!common_flags()->intercept_tls_get_addr) return;
- VReport(2, "__tls_get_addr: DTLS_Destroy %p\n", &dtls);
+ VReport(2, "__tls_get_addr: DTLS_Destroy %p\n", (void *)&dtls);
DTLS::DTVBlock *block = (DTLS::DTVBlock *)atomic_exchange(
&dtls.dtv_block, kDestroyedThread, memory_order_release);
while (block) {
@@ -117,26 +118,27 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
return 0;
uptr tls_size = 0;
uptr tls_beg = reinterpret_cast<uptr>(res) - arg->offset - kDtvOffset;
- VReport(2, "__tls_get_addr: %p {%p,%p} => %p; tls_beg: %p; sp: %p "
- "num_live_dtls %zd\n",
- arg, arg->dso_id, arg->offset, res, tls_beg, &tls_beg,
+ VReport(2,
+ "__tls_get_addr: %p {0x%zx,0x%zx} => %p; tls_beg: 0x%zx; sp: %p "
+ "num_live_dtls %zd\n",
+ (void *)arg, arg->dso_id, arg->offset, res, tls_beg, (void *)&tls_beg,
atomic_load(&number_of_live_dtls, memory_order_relaxed));
if (dtls.last_memalign_ptr == tls_beg) {
tls_size = dtls.last_memalign_size;
- VReport(2, "__tls_get_addr: glibc <=2.18 suspected; tls={%p,%p}\n",
- tls_beg, tls_size);
+ VReport(2, "__tls_get_addr: glibc <=2.18 suspected; tls={0x%zx,0x%zx}\n",
+ tls_beg, tls_size);
} else if (tls_beg >= static_tls_begin && tls_beg < static_tls_end) {
// This is the static TLS block which was initialized / unpoisoned at thread
// creation.
- VReport(2, "__tls_get_addr: static tls: %p\n", tls_beg);
+ VReport(2, "__tls_get_addr: static tls: 0x%zx\n", tls_beg);
tls_size = 0;
} else if ((tls_beg % 4096) == sizeof(Glibc_2_19_tls_header)) {
// We may want to check gnu_get_libc_version().
Glibc_2_19_tls_header *header = (Glibc_2_19_tls_header *)tls_beg - 1;
tls_size = header->size;
tls_beg = header->start;
- VReport(2, "__tls_get_addr: glibc >=2.19 suspected; tls={%p %p}\n",
- tls_beg, tls_size);
+ VReport(2, "__tls_get_addr: glibc >=2.19 suspected; tls={0x%zx 0x%zx}\n",
+ tls_beg, tls_size);
} else {
VReport(2, "__tls_get_addr: Can't guess glibc version\n");
// This may happen inside the DTOR of main thread, so just ignore it.
@@ -149,7 +151,7 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
void DTLS_on_libc_memalign(void *ptr, uptr size) {
if (!common_flags()->intercept_tls_get_addr) return;
- VReport(2, "DTLS_on_libc_memalign: %p %p\n", ptr, size);
+ VReport(2, "DTLS_on_libc_memalign: %p 0x%zx\n", ptr, size);
dtls.last_memalign_ptr = reinterpret_cast<uptr>(ptr);
dtls.last_memalign_size = size;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_type_traits.h b/compiler-rt/lib/sanitizer_common/sanitizer_type_traits.h
index 2a58d9874d2c..06a44d1b5c7a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_type_traits.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_type_traits.h
@@ -13,6 +13,8 @@
#ifndef SANITIZER_TYPE_TRAITS_H
#define SANITIZER_TYPE_TRAITS_H
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
namespace __sanitizer {
struct true_type {
@@ -57,6 +59,83 @@ struct conditional<false, T, F> {
using type = F;
};
+template <class T>
+struct remove_reference {
+ using type = T;
+};
+template <class T>
+struct remove_reference<T&> {
+ using type = T;
+};
+template <class T>
+struct remove_reference<T&&> {
+ using type = T;
+};
+
+template <class T>
+WARN_UNUSED_RESULT inline typename remove_reference<T>::type&& move(T&& t) {
+ return static_cast<typename remove_reference<T>::type&&>(t);
+}
+
+template <class T>
+WARN_UNUSED_RESULT inline constexpr T&& forward(
+ typename remove_reference<T>::type& t) {
+ return static_cast<T&&>(t);
+}
+
+template <class T>
+WARN_UNUSED_RESULT inline constexpr T&& forward(
+ typename remove_reference<T>::type&& t) {
+ return static_cast<T&&>(t);
+}
+
+template <class T, T v>
+struct integral_constant {
+ static constexpr const T value = v;
+ typedef T value_type;
+ typedef integral_constant type;
+ constexpr operator value_type() const { return value; }
+ constexpr value_type operator()() const { return value; }
+};
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+#if __has_builtin(__is_trivially_destructible)
+
+template <class T>
+struct is_trivially_destructible
+ : public integral_constant<bool, __is_trivially_destructible(T)> {};
+
+#elif __has_builtin(__has_trivial_destructor)
+
+template <class T>
+struct is_trivially_destructible
+ : public integral_constant<bool, __has_trivial_destructor(T)> {};
+
+#else
+
+template <class T>
+struct is_trivially_destructible
+ : public integral_constant<bool, /* less efficient fallback */ false> {};
+
+#endif
+
+#if __has_builtin(__is_trivially_copyable)
+
+template <class T>
+struct is_trivially_copyable
+ : public integral_constant<bool, __is_trivially_copyable(T)> {};
+
+#else
+
+template <class T>
+struct is_trivially_copyable
+ : public integral_constant<bool, /* less efficient fallback */ false> {};
+
+#endif
+
} // namespace __sanitizer
#endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
index dddd885a45dd..c3607dbed23e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
@@ -16,6 +16,7 @@
#define WIN32_LEAN_AND_MEAN
#define NOGDI
+#include <direct.h>
#include <windows.h>
#include <io.h>
#include <psapi.h>
@@ -565,6 +566,8 @@ void Abort() {
internal__exit(3);
}
+bool CreateDir(const char *pathname) { return _mkdir(pathname) == 0; }
+
#if !SANITIZER_GO
// Read the file to extract the ImageBase field from the PE header. If ASLR is
// disabled and this virtual address is available, the loader will typically
@@ -827,27 +830,6 @@ void FutexWake(atomic_uint32_t *p, u32 count) {
WakeByAddressAll(p);
}
-// ---------------------- BlockingMutex ---------------- {{{1
-
-BlockingMutex::BlockingMutex() {
- CHECK(sizeof(SRWLOCK) <= sizeof(opaque_storage_));
- internal_memset(this, 0, sizeof(*this));
-}
-
-void BlockingMutex::Lock() {
- AcquireSRWLockExclusive((PSRWLOCK)opaque_storage_);
- CHECK_EQ(owner_, 0);
- owner_ = GetThreadSelf();
-}
-
-void BlockingMutex::Unlock() {
- CheckLocked();
- owner_ = 0;
- ReleaseSRWLockExclusive((PSRWLOCK)opaque_storage_);
-}
-
-void BlockingMutex::CheckLocked() const { CHECK_EQ(owner_, GetThreadSelf()); }
-
uptr GetTlsSize() {
return 0;
}
@@ -1131,7 +1113,7 @@ bool IsProcessRunning(pid_t pid) {
int WaitForProcess(pid_t pid) { return -1; }
// FIXME implement on this platform.
-void GetMemoryProfile(fill_profile_f cb, uptr *stats, uptr stats_size) { }
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}
void CheckNoDeepBind(const char *filename, int flag) {
// Do nothing.
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh
index 5c77bea83294..fa05d61a7c6d 100755
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/ar_to_bc.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
function usage() {
echo "Usage: $0 INPUT... OUTPUT"
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
index c793875db099..d1d61fb7ab2a 100755
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -1,4 +1,4 @@
-#!/bin/bash -eu
+#!/usr/bin/env bash
#
# Run as: CLANG=bin/clang ZLIB_SRC=src/zlib \
# build_symbolizer.sh runtime_build/lib/clang/4.0.0/lib/linux/
@@ -71,6 +71,7 @@ TBLGEN=$CLANG_DIR/llvm-tblgen
OPT=$CLANG_DIR/opt
export AR=$CLANG_DIR/llvm-ar
export LINK=$CLANG_DIR/llvm-link
+TARGET_TRIPLE=$($CC -print-target-triple)
for F in $CC $CXX $TBLGEN $LINK $OPT $AR; do
if [[ ! -x "$F" ]]; then
@@ -123,7 +124,7 @@ cd ${LIBCXX_BUILD}
ninja cxx cxxabi
FLAGS="${FLAGS} -fno-rtti -fno-exceptions"
-LLVM_FLAGS="${FLAGS} -nostdinc++ -I${ZLIB_BUILD} -I${LIBCXX_BUILD}/include/c++/v1 -Wno-error=global-constructors"
+LLVM_FLAGS="${FLAGS} -nostdinc++ -I${ZLIB_BUILD} -isystem ${LIBCXX_BUILD}/include/${TARGET_TRIPLE}/c++/v1 -isystem ${LIBCXX_BUILD}/include/c++/v1 -Wno-error=global-constructors"
# Build LLVM.
if [[ ! -d ${LLVM_BUILD} ]]; then
@@ -156,9 +157,11 @@ $AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o
SYMBOLIZER_API_LIST=__sanitizer_symbolize_code,__sanitizer_symbolize_data,__sanitizer_symbolize_flush,__sanitizer_symbolize_demangle
+LIBCXX_ARCHIVE_DIR=$(dirname $(find $LIBCXX_BUILD -name libc++.a | head -n1))
+
# Merge all the object files together and copy the resulting library back.
-$SCRIPT_DIR/ar_to_bc.sh $LIBCXX_BUILD/lib/libc++.a \
- $LIBCXX_BUILD/lib/libc++abi.a \
+$SCRIPT_DIR/ar_to_bc.sh $LIBCXX_ARCHIVE_DIR/libc++.a \
+ $LIBCXX_ARCHIVE_DIR/libc++abi.a \
$LLVM_BUILD/lib/libLLVMSymbolize.a \
$LLVM_BUILD/lib/libLLVMObject.a \
$LLVM_BUILD/lib/libLLVMBinaryFormat.a \
diff --git a/compiler-rt/lib/scudo/scudo_utils.cpp b/compiler-rt/lib/scudo/scudo_utils.cpp
index b7ce8f915817..b0aef752c679 100644
--- a/compiler-rt/lib/scudo/scudo_utils.cpp
+++ b/compiler-rt/lib/scudo/scudo_utils.cpp
@@ -39,7 +39,7 @@ extern int VSNPrintf(char *buff, int buff_length, const char *format,
namespace __scudo {
-FORMAT(1, 2) void NORETURN dieWithMessage(const char *Format, ...) {
+void dieWithMessage(const char *Format, ...) {
static const char ScudoError[] = "Scudo ERROR: ";
static constexpr uptr PrefixSize = sizeof(ScudoError) - 1;
// Our messages are tiny, 256 characters is more than enough.
diff --git a/compiler-rt/lib/scudo/scudo_utils.h b/compiler-rt/lib/scudo/scudo_utils.h
index b657c69d9baf..5a9b32f0b234 100644
--- a/compiler-rt/lib/scudo/scudo_utils.h
+++ b/compiler-rt/lib/scudo/scudo_utils.h
@@ -27,7 +27,7 @@ inline Dest bit_cast(const Source& source) {
return dest;
}
-void NORETURN dieWithMessage(const char *Format, ...);
+void dieWithMessage(const char *Format, ...) NORETURN FORMAT(1, 2);
bool hasHardwareCRC32();
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index fd5360ce0f55..371fb783a06e 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -205,6 +205,16 @@ public:
#endif // GWP_ASAN_HOOKS
}
+#ifdef GWP_ASAN_HOOKS
+ const gwp_asan::AllocationMetadata *getGwpAsanAllocationMetadata() {
+ return GuardedAlloc.getMetadataRegion();
+ }
+
+ const gwp_asan::AllocatorState *getGwpAsanAllocatorState() {
+ return GuardedAlloc.getAllocatorState();
+ }
+#endif // GWP_ASAN_HOOKS
+
ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
TSDRegistry.initThreadMaybe(this, MinimalInit);
}
@@ -910,7 +920,7 @@ public:
if (!Depot->find(Hash, &RingPos, &Size))
return;
for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I)
- Trace[I] = (*Depot)[RingPos + I];
+ Trace[I] = static_cast<uintptr_t>((*Depot)[RingPos + I]);
}
static void getErrorInfo(struct scudo_error_info *ErrorInfo,
diff --git a/compiler-rt/lib/scudo/standalone/internal_defs.h b/compiler-rt/lib/scudo/standalone/internal_defs.h
index c9ffad136b78..621fc9c45e95 100644
--- a/compiler-rt/lib/scudo/standalone/internal_defs.h
+++ b/compiler-rt/lib/scudo/standalone/internal_defs.h
@@ -78,16 +78,16 @@
namespace scudo {
-typedef unsigned long uptr;
-typedef unsigned char u8;
-typedef unsigned short u16;
-typedef unsigned int u32;
-typedef unsigned long long u64;
-typedef signed long sptr;
-typedef signed char s8;
-typedef signed short s16;
-typedef signed int s32;
-typedef signed long long s64;
+typedef uintptr_t uptr;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef intptr_t sptr;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
// The following two functions have platform specific implementations.
void outputRaw(const char *Buffer);
diff --git a/compiler-rt/lib/scudo/standalone/memtag.h b/compiler-rt/lib/scudo/standalone/memtag.h
index c48e228fbe44..df346bce1bd4 100644
--- a/compiler-rt/lib/scudo/standalone/memtag.h
+++ b/compiler-rt/lib/scudo/standalone/memtag.h
@@ -23,7 +23,9 @@ namespace scudo {
// We assume that Top-Byte Ignore is enabled if the architecture supports memory
// tagging. Not all operating systems enable TBI, so we only claim architectural
// support for memory tagging if the operating system enables TBI.
-#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI)
+// HWASan uses the top byte for its own purpose and Scudo should not touch it.
+#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) && \
+ !__has_feature(hwaddress_sanitizer)
inline constexpr bool archSupportsMemoryTagging() { return true; }
#else
inline constexpr bool archSupportsMemoryTagging() { return false; }
@@ -91,9 +93,10 @@ inline bool systemDetectsMemoryTagFaultsTestOnly() {
#ifndef PR_MTE_TCF_MASK
#define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
#endif
- return (static_cast<unsigned long>(
- prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) &
- PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE;
+ int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+ if (res == -1)
+ return false;
+ return (static_cast<unsigned long>(res) & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE;
}
inline void enableSystemMemoryTaggingTestOnly() {
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 13420bf3d222..6c1785512c65 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -164,9 +164,9 @@ public:
PoppedBlocks += Region->Stats.PoppedBlocks;
PushedBlocks += Region->Stats.PushedBlocks;
}
- Str->append("Stats: SizeClassAllocator64: %zuM mapped (%zuM rss) in %zu "
+ Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu "
"allocations; remains %zu\n",
- TotalMapped >> 20, 0, PoppedBlocks,
+ TotalMapped >> 20, 0U, PoppedBlocks,
PoppedBlocks - PushedBlocks);
for (uptr I = 0; I < NumClasses; I++)
diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 630e64d46edf..abb58a2882af 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -485,7 +485,7 @@ void *MapAllocator<Config>::allocate(Options Options, uptr Size, uptr Alignment,
FillContentsMode FillContents) {
if (Options.get(OptionBit::AddLargeAllocationSlack))
Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG;
- Alignment = Max(Alignment, 1UL << SCUDO_MIN_ALIGNMENT_LOG);
+ Alignment = Max(Alignment, uptr(1U) << SCUDO_MIN_ALIGNMENT_LOG);
const uptr PageSize = getPageSizeCached();
uptr RoundedSize =
roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() +
@@ -602,12 +602,11 @@ void MapAllocator<Config>::deallocate(Options Options, void *Ptr) {
template <typename Config>
void MapAllocator<Config>::getStats(ScopedString *Str) const {
- Str->append(
- "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times "
- "(%zuK), remains %zu (%zuK) max %zuM\n",
- NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, FreedBytes >> 10,
- NumberOfAllocs - NumberOfFrees, (AllocatedBytes - FreedBytes) >> 10,
- LargestSize >> 20);
+ Str->append("Stats: MapAllocator: allocated %u times (%zuK), freed %u times "
+ "(%zuK), remains %u (%zuK) max %zuM\n",
+ NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees,
+ FreedBytes >> 10, NumberOfAllocs - NumberOfFrees,
+ (AllocatedBytes - FreedBytes) >> 10, LargestSize >> 20);
}
} // namespace scudo
diff --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h
index ba0f78453bcb..28b16d976e5e 100644
--- a/compiler-rt/lib/scudo/standalone/size_class_map.h
+++ b/compiler-rt/lib/scudo/standalone/size_class_map.h
@@ -335,8 +335,8 @@ template <typename SCMap> inline void printMap() {
const uptr L = S ? getMostSignificantSetBitIndex(S) : 0;
const uptr Cached = SCMap::getMaxCachedHint(S) * S;
Buffer.append(
- "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n",
- I, S, D, P, L, SCMap::getMaxCachedHint(S), Cached,
+ "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %u %zu; id %zu\n", I,
+ S, D, P, L, SCMap::getMaxCachedHint(S), Cached,
SCMap::getClassIdBySize(S));
TotalCached += Cached;
PrevS = S;
diff --git a/compiler-rt/lib/scudo/standalone/string_utils.cpp b/compiler-rt/lib/scudo/standalone/string_utils.cpp
index acf85889fcff..13fdb9c6ca6c 100644
--- a/compiler-rt/lib/scudo/standalone/string_utils.cpp
+++ b/compiler-rt/lib/scudo/standalone/string_utils.cpp
@@ -236,7 +236,6 @@ void ScopedString::append(const char *Format, va_list Args) {
va_end(ArgsCopy);
}
-FORMAT(2, 3)
void ScopedString::append(const char *Format, ...) {
va_list Args;
va_start(Args, Format);
@@ -244,7 +243,6 @@ void ScopedString::append(const char *Format, ...) {
va_end(Args);
}
-FORMAT(1, 2)
void Printf(const char *Format, ...) {
va_list Args;
va_start(Args, Format);
diff --git a/compiler-rt/lib/scudo/standalone/string_utils.h b/compiler-rt/lib/scudo/standalone/string_utils.h
index 06d23d42246d..dd6ff7893b83 100644
--- a/compiler-rt/lib/scudo/standalone/string_utils.h
+++ b/compiler-rt/lib/scudo/standalone/string_utils.h
@@ -26,15 +26,16 @@ public:
String.push_back('\0');
}
void append(const char *Format, va_list Args);
- void append(const char *Format, ...);
+ void append(const char *Format, ...) FORMAT(2, 3);
void output() const { outputRaw(String.data()); }
private:
Vector<char> String;
};
-int formatString(char *Buffer, uptr BufferLength, const char *Format, ...);
-void Printf(const char *Format, ...);
+int formatString(char *Buffer, uptr BufferLength, const char *Format, ...)
+ FORMAT(3, 4);
+void Printf(const char *Format, ...) FORMAT(1, 2);
} // namespace scudo
diff --git a/compiler-rt/lib/scudo/standalone/vector.h b/compiler-rt/lib/scudo/standalone/vector.h
index 2c9a6e2aa655..eae774b56e28 100644
--- a/compiler-rt/lib/scudo/standalone/vector.h
+++ b/compiler-rt/lib/scudo/standalone/vector.h
@@ -19,13 +19,14 @@ namespace scudo {
// small vectors. The current implementation supports only POD types.
template <typename T> class VectorNoCtor {
public:
- void init(uptr InitialCapacity = 0) {
- Data = reinterpret_cast<T *>(&LocalData[0]);
+ constexpr void init(uptr InitialCapacity = 0) {
+ Data = &LocalData[0];
CapacityBytes = sizeof(LocalData);
- reserve(InitialCapacity);
+ if (InitialCapacity > capacity())
+ reserve(InitialCapacity);
}
void destroy() {
- if (Data != reinterpret_cast<T *>(&LocalData[0]))
+ if (Data != &LocalData[0])
unmap(Data, CapacityBytes);
}
T &operator[](uptr I) {
@@ -55,7 +56,7 @@ public:
uptr size() const { return Size; }
const T *data() const { return Data; }
T *data() { return Data; }
- uptr capacity() const { return CapacityBytes / sizeof(T); }
+ constexpr uptr capacity() const { return CapacityBytes / sizeof(T); }
void reserve(uptr NewSize) {
// Never downsize internal buffer.
if (NewSize > capacity())
@@ -91,14 +92,14 @@ private:
}
T *Data = nullptr;
- u8 LocalData[256] = {};
+ T LocalData[256 / sizeof(T)] = {};
uptr CapacityBytes = 0;
uptr Size = 0;
};
template <typename T> class Vector : public VectorNoCtor<T> {
public:
- Vector() { VectorNoCtor<T>::init(); }
+ constexpr Vector() { VectorNoCtor<T>::init(); }
explicit Vector(uptr Count) {
VectorNoCtor<T>::init(Count);
this->resize(Count);
diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.h b/compiler-rt/lib/scudo/standalone/wrappers_c.h
index 6d0cecdc4b41..5f7f51f3cedf 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.h
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.h
@@ -32,6 +32,19 @@ struct __scudo_mallinfo {
__scudo_mallinfo_data_t keepcost;
};
+struct __scudo_mallinfo2 {
+ size_t arena;
+ size_t ordblks;
+ size_t smblks;
+ size_t hblks;
+ size_t hblkhd;
+ size_t usmblks;
+ size_t fsmblks;
+ size_t uordblks;
+ size_t fordblks;
+ size_t keepcost;
+};
+
// Android sometimes includes malloc.h no matter what, which yields to
// conflicting return types for mallinfo() if we use our own structure. So if
// struct mallinfo is declared (#define courtesy of malloc.h), use it directly.
diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
index 43efb02cb860..bbe3617dd0d6 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -54,6 +54,23 @@ INTERFACE WEAK struct SCUDO_MALLINFO SCUDO_PREFIX(mallinfo)(void) {
return Info;
}
+INTERFACE WEAK struct __scudo_mallinfo2 SCUDO_PREFIX(mallinfo2)(void) {
+ struct __scudo_mallinfo2 Info = {};
+ scudo::StatCounters Stats;
+ SCUDO_ALLOCATOR.getStats(Stats);
+ // Space allocated in mmapped regions (bytes)
+ Info.hblkhd = Stats[scudo::StatMapped];
+ // Maximum total allocated space (bytes)
+ Info.usmblks = Info.hblkhd;
+ // Space in freed fastbin blocks (bytes)
+ Info.fsmblks = Stats[scudo::StatFree];
+ // Total allocated space (bytes)
+ Info.uordblks = Stats[scudo::StatAllocated];
+ // Total free space (bytes)
+ Info.fordblks = Info.fsmblks;
+ return Info;
+}
+
INTERFACE WEAK void *SCUDO_PREFIX(malloc)(size_t size) {
return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate(
size, scudo::Chunk::Origin::Malloc, SCUDO_MALLOC_ALIGNMENT));
@@ -226,7 +243,7 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) {
fputs("<malloc version=\"scudo-1\">\n", stream);
for (scudo::uptr i = 0; i != max_size; ++i)
if (sizes[i])
- fprintf(stream, "<alloc size=\"%lu\" count=\"%lu\"/>\n", i, sizes[i]);
+ fprintf(stream, "<alloc size=\"%zu\" count=\"%zu\"/>\n", i, sizes[i]);
fputs("</malloc>\n", stream);
SCUDO_PREFIX(free)(sizes);
return 0;
diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h
index 7fc1a9600e53..ec9c1a104e83 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h
@@ -46,8 +46,10 @@ inline bool checkPosixMemalignAlignment(uptr Alignment) {
// builtin supported by recent clang & GCC if it exists, otherwise fallback to a
// costly division.
inline bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) {
-#if __has_builtin(__builtin_umull_overflow)
+#if __has_builtin(__builtin_umull_overflow) && (SCUDO_WORDSIZE == 64U)
return __builtin_umull_overflow(Size, N, Product);
+#elif __has_builtin(__builtin_umul_overflow) && (SCUDO_WORDSIZE == 32U)
+ return __builtin_umul_overflow(Size, N, Product);
#else
*Product = Size * N;
if (!Size)
diff --git a/compiler-rt/lib/tsan/dd/dd_interceptors.cpp b/compiler-rt/lib/tsan/dd/dd_interceptors.cpp
index f78ef2d44279..2c36f691ec5b 100644
--- a/compiler-rt/lib/tsan/dd/dd_interceptors.cpp
+++ b/compiler-rt/lib/tsan/dd/dd_interceptors.cpp
@@ -285,7 +285,8 @@ static void InitDataSeg() {
if (is_bss) g_data_end = segment.end;
prev_is_data = is_data;
}
- VPrintf(1, "guessed data_start=%p data_end=%p\n", g_data_start, g_data_end);
+ VPrintf(1, "guessed data_start=0x%zx data_end=0x%zx\n", g_data_start,
+ g_data_end);
CHECK_LT(g_data_start, g_data_end);
CHECK_GE((uptr)&g_data_start, g_data_start);
CHECK_LT((uptr)&g_data_start, g_data_end);
diff --git a/compiler-rt/lib/tsan/dd/dd_rtl.cpp b/compiler-rt/lib/tsan/dd/dd_rtl.cpp
index 2095217586a8..35b367c0cecb 100644
--- a/compiler-rt/lib/tsan/dd/dd_rtl.cpp
+++ b/compiler-rt/lib/tsan/dd/dd_rtl.cpp
@@ -38,12 +38,12 @@ static void PrintStackTrace(Thread *thr, u32 stk) {
static void ReportDeadlock(Thread *thr, DDReport *rep) {
if (rep == 0)
return;
- BlockingMutexLock lock(&ctx->report_mutex);
+ Lock lock(&ctx->report_mutex);
Printf("==============================\n");
Printf("WARNING: lock-order-inversion (potential deadlock)\n");
for (int i = 0; i < rep->n; i++) {
- Printf("Thread %d locks mutex %llu while holding mutex %llu:\n",
- rep->loop[i].thr_ctx, rep->loop[i].mtx_ctx1, rep->loop[i].mtx_ctx0);
+ Printf("Thread %lld locks mutex %llu while holding mutex %llu:\n",
+ rep->loop[i].thr_ctx, rep->loop[i].mtx_ctx1, rep->loop[i].mtx_ctx0);
PrintStackTrace(thr, rep->loop[i].stk[1]);
if (rep->loop[i].stk[0]) {
Printf("Mutex %llu was acquired here:\n",
diff --git a/compiler-rt/lib/tsan/dd/dd_rtl.h b/compiler-rt/lib/tsan/dd/dd_rtl.h
index b1e19be57d3f..c812ffbd1393 100644
--- a/compiler-rt/lib/tsan/dd/dd_rtl.h
+++ b/compiler-rt/lib/tsan/dd/dd_rtl.h
@@ -19,7 +19,7 @@ namespace __dsan {
typedef DDFlags Flags;
-struct Mutex {
+struct UserMutex {
DDMutex dd;
};
@@ -37,12 +37,12 @@ struct Callback final : public DDCallback {
u32 Unwind() override;
};
-typedef AddrHashMap<Mutex, 31051> MutexHashMap;
+typedef AddrHashMap<UserMutex, 31051> MutexHashMap;
struct Context {
DDetector *dd;
- BlockingMutex report_mutex;
+ Mutex report_mutex;
MutexHashMap mutex_map;
};
diff --git a/compiler-rt/lib/tsan/go/tsan_go.cpp b/compiler-rt/lib/tsan/go/tsan_go.cpp
index 77987f43bf54..104c5b325aee 100644
--- a/compiler-rt/lib/tsan/go/tsan_go.cpp
+++ b/compiler-rt/lib/tsan/go/tsan_go.cpp
@@ -27,13 +27,9 @@ bool IsExpectedReport(uptr addr, uptr size) {
return false;
}
-void *internal_alloc(MBlockType typ, uptr sz) {
- return InternalAlloc(sz);
-}
+void *Alloc(uptr sz) { return InternalAlloc(sz); }
-void internal_free(void *p) {
- InternalFree(p);
-}
+void FreeImpl(void *p) { InternalFree(p); }
// Callback into Go.
static void (*go_runtime_cb)(uptr cmd, void *ctx);
@@ -103,14 +99,16 @@ ReportLocation *SymbolizeData(uptr addr) {
MBlock *b = ctx->metamap.GetBlock(cbctx.start);
if (!b)
return 0;
- ReportLocation *loc = ReportLocation::New(ReportLocationHeap);
+ auto *loc = New<ReportLocation>();
+ loc->type = ReportLocationHeap;
loc->heap_chunk_start = cbctx.start;
loc->heap_chunk_size = b->siz;
loc->tid = b->tid;
loc->stack = SymbolizeStackId(b->stk);
return loc;
} else {
- ReportLocation *loc = ReportLocation::New(ReportLocationGlobal);
+ auto *loc = New<ReportLocation>();
+ loc->type = ReportLocationGlobal;
loc->global.name = internal_strdup(cbctx.name ? cbctx.name : "??");
loc->global.file = internal_strdup(cbctx.file ? cbctx.file : "??");
loc->global.line = cbctx.line;
@@ -142,8 +140,7 @@ Processor *ThreadState::proc() {
extern "C" {
static ThreadState *AllocGoroutine() {
- ThreadState *thr = (ThreadState*)internal_alloc(MBlockThreadContex,
- sizeof(ThreadState));
+ auto *thr = (ThreadState *)Alloc(sizeof(ThreadState));
internal_memset(thr, 0, sizeof(*thr));
return thr;
}
@@ -170,25 +167,25 @@ void __tsan_map_shadow(uptr addr, uptr size) {
}
void __tsan_read(ThreadState *thr, void *addr, void *pc) {
- MemoryRead(thr, (uptr)pc, (uptr)addr, kSizeLog1);
+ MemoryAccess(thr, (uptr)pc, (uptr)addr, 1, kAccessRead);
}
void __tsan_read_pc(ThreadState *thr, void *addr, uptr callpc, uptr pc) {
if (callpc != 0)
FuncEntry(thr, callpc);
- MemoryRead(thr, (uptr)pc, (uptr)addr, kSizeLog1);
+ MemoryAccess(thr, (uptr)pc, (uptr)addr, 1, kAccessRead);
if (callpc != 0)
FuncExit(thr);
}
void __tsan_write(ThreadState *thr, void *addr, void *pc) {
- MemoryWrite(thr, (uptr)pc, (uptr)addr, kSizeLog1);
+ MemoryAccess(thr, (uptr)pc, (uptr)addr, 1, kAccessWrite);
}
void __tsan_write_pc(ThreadState *thr, void *addr, uptr callpc, uptr pc) {
if (callpc != 0)
FuncEntry(thr, callpc);
- MemoryWrite(thr, (uptr)pc, (uptr)addr, kSizeLog1);
+ MemoryAccess(thr, (uptr)pc, (uptr)addr, 1, kAccessWrite);
if (callpc != 0)
FuncExit(thr);
}
@@ -213,7 +210,7 @@ void __tsan_malloc(ThreadState *thr, uptr pc, uptr p, uptr sz) {
CHECK(inited);
if (thr && pc)
ctx->metamap.AllocBlock(thr, pc, p, sz);
- MemoryResetRange(0, 0, (uptr)p, sz);
+ MemoryResetRange(thr, pc, (uptr)p, sz);
}
void __tsan_free(uptr p, uptr sz) {
@@ -223,13 +220,13 @@ void __tsan_free(uptr p, uptr sz) {
void __tsan_go_start(ThreadState *parent, ThreadState **pthr, void *pc) {
ThreadState *thr = AllocGoroutine();
*pthr = thr;
- int goid = ThreadCreate(parent, (uptr)pc, 0, true);
+ Tid goid = ThreadCreate(parent, (uptr)pc, 0, true);
ThreadStart(thr, goid, 0, ThreadType::Regular);
}
void __tsan_go_end(ThreadState *thr) {
ThreadFinish(thr);
- internal_free(thr);
+ Free(thr);
}
void __tsan_proc_create(Processor **pproc) {
@@ -256,9 +253,7 @@ void __tsan_release_merge(ThreadState *thr, void *addr) {
Release(thr, 0, (uptr)addr);
}
-void __tsan_finalizer_goroutine(ThreadState *thr) {
- AcquireGlobal(thr, 0);
-}
+void __tsan_finalizer_goroutine(ThreadState *thr) { AcquireGlobal(thr); }
void __tsan_mutex_before_lock(ThreadState *thr, uptr addr, uptr write) {
if (write)
@@ -285,9 +280,7 @@ void __tsan_go_ignore_sync_begin(ThreadState *thr) {
ThreadIgnoreSyncBegin(thr, 0);
}
-void __tsan_go_ignore_sync_end(ThreadState *thr) {
- ThreadIgnoreSyncEnd(thr, 0);
-}
+void __tsan_go_ignore_sync_end(ThreadState *thr) { ThreadIgnoreSyncEnd(thr); }
void __tsan_report_count(u64 *pn) {
Lock lock(&ctx->report_mtx);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.cpp b/compiler-rt/lib/tsan/rtl/tsan_clock.cpp
index 61848c21d162..d122b67c0aaa 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_clock.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_clock.cpp
@@ -72,9 +72,9 @@
// clk_ - variable size vector clock, low kClkBits hold timestamp,
// the remaining bits hold "acquired" flag (the actual value is thread's
// reused counter);
-// if acquried == thr->reused_, then the respective thread has already
+// if acquired == thr->reused_, then the respective thread has already
// acquired this clock (except possibly for dirty elements).
-// dirty_ - holds up to two indeces in the vector clock that other threads
+// dirty_ - holds up to two indices in the vector clock that other threads
// need to acquire regardless of "acquired" flag value;
// release_store_tid_ - denotes that the clock state is a result of
// release-store operation by the thread with release_store_tid_ index.
@@ -272,7 +272,7 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
// we could update the existing clock and cache it, or replace it with the
// currently cached clock and release the old one. And for a shared
// existing clock, we could replace it with the currently cached;
- // or unshare, update and cache. But, for simplicity, we currnetly reuse
+ // or unshare, update and cache. But, for simplicity, we currently reuse
// cached clock only when the target clock is empty.
dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
dst->tab_idx_ = cached_idx_;
@@ -285,7 +285,7 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
dst->dirty_[0].epoch = clk_[tid_];
dst->release_store_tid_ = tid_;
dst->release_store_reused_ = reused_;
- // Rememeber that we don't need to acquire it in future.
+ // Remember that we don't need to acquire it in future.
dst->elem(tid_).reused = reused_;
// Grab a reference.
atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
@@ -316,7 +316,7 @@ void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
dst->release_store_tid_ = tid_;
dst->release_store_reused_ = reused_;
- // Rememeber that we don't need to acquire it in future.
+ // Remember that we don't need to acquire it in future.
dst->elem(tid_).reused = reused_;
// If the resulting clock is cachable, cache it for future release operations.
diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.h b/compiler-rt/lib/tsan/rtl/tsan_clock.h
index 31376a1bc9e2..11cbc0c0b86b 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_clock.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_clock.h
@@ -213,7 +213,7 @@ class ThreadClock {
// We reuse it for subsequent store-release operations without intervening
// acquire operations. Since it is shared (and thus constant), clock value
// for the current thread is then stored in dirty entries in the SyncClock.
- // We host a refernece to the table while it is cached here.
+ // We host a reference to the table while it is cached here.
u32 cached_idx_;
u16 cached_size_;
u16 cached_blocks_;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_debugging.cpp b/compiler-rt/lib/tsan/rtl/tsan_debugging.cpp
index d3d6255090b7..1d3c3849a446 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_debugging.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_debugging.cpp
@@ -195,9 +195,9 @@ const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
const char *region_kind = nullptr;
if (name && name_size > 0) name[0] = 0;
- if (IsMetaMem(addr)) {
+ if (IsMetaMem(reinterpret_cast<u32 *>(addr))) {
region_kind = "meta shadow";
- } else if (IsShadowMem(addr)) {
+ } else if (IsShadowMem(reinterpret_cast<RawShadow *>(addr))) {
region_kind = "shadow";
} else {
bool is_stack = false;
@@ -215,9 +215,9 @@ const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
} else {
// TODO(kuba.brecka): We should not lock. This is supposed to be called
// from within the debugger when other threads are stopped.
- ctx->thread_registry->Lock();
+ ctx->thread_registry.Lock();
ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack);
- ctx->thread_registry->Unlock();
+ ctx->thread_registry.Unlock();
if (tctx) {
region_kind = is_stack ? "stack" : "tls";
} else {
@@ -252,7 +252,7 @@ int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
*thread_id = b->tid;
// No locking. This is supposed to be called from within the debugger when
// other threads are stopped.
- ThreadContextBase *tctx = ctx->thread_registry->GetThreadLocked(b->tid);
+ ThreadContextBase *tctx = ctx->thread_registry.GetThreadLocked(b->tid);
*os_id = tctx->os_id;
StackTrace stack = StackDepotGet(b->stk);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h
index f2fb7b1a213f..fe0c1da31599 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_defs.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h
@@ -18,6 +18,24 @@
#include "sanitizer_common/sanitizer_mutex.h"
#include "ubsan/ubsan_platform.h"
+#ifndef TSAN_VECTORIZE
+# define TSAN_VECTORIZE __SSE4_2__
+#endif
+
+#if TSAN_VECTORIZE
+// <emmintrin.h> transitively includes <stdlib.h>,
+// and it's prohibited to include std headers into tsan runtime.
+// So we do this dirty trick.
+# define _MM_MALLOC_H_INCLUDED
+# define __MM_MALLOC_H
+# include <emmintrin.h>
+# include <smmintrin.h>
+# define VECTOR_ALIGNED ALIGNED(16)
+typedef __m128i m128;
+#else
+# define VECTOR_ALIGNED
+#endif
+
// Setup defaults for compile definitions.
#ifndef TSAN_NO_HISTORY
# define TSAN_NO_HISTORY 0
@@ -33,6 +51,19 @@
namespace __tsan {
+constexpr uptr kByteBits = 8;
+
+// Thread slot ID.
+enum class Sid : u8 {};
+constexpr uptr kThreadSlotCount = 256;
+constexpr Sid kFreeSid = static_cast<Sid>(255);
+
+// Abstract time unit, vector clock element.
+enum class Epoch : u16 {};
+constexpr uptr kEpochBits = 14;
+constexpr Epoch kEpochZero = static_cast<Epoch>(0);
+constexpr Epoch kEpochOver = static_cast<Epoch>(1 << kEpochBits);
+
const int kClkBits = 42;
const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
@@ -75,8 +106,9 @@ const uptr kShadowCnt = 4;
// That many user bytes are mapped onto a single shadow cell.
const uptr kShadowCell = 8;
-// Size of a single shadow value (u64).
-const uptr kShadowSize = 8;
+// Single shadow value.
+typedef u64 RawShadow;
+const uptr kShadowSize = sizeof(RawShadow);
// Shadow memory is kShadowMultiplier times larger than user memory.
const uptr kShadowMultiplier = kShadowSize * kShadowCnt / kShadowCell;
@@ -88,6 +120,9 @@ const uptr kMetaShadowCell = 8;
// Size of a single meta shadow value (u32).
const uptr kMetaShadowSize = 4;
+// All addresses and PCs are assumed to be compressable to that many bits.
+const uptr kCompressedAddrBits = 44;
+
#if TSAN_NO_HISTORY
const bool kCollectHistory = false;
#else
@@ -154,12 +189,23 @@ struct ReportStack;
class ReportDesc;
class RegionAlloc;
+typedef uptr AccessType;
+
+enum : AccessType {
+ kAccessWrite = 0,
+ kAccessRead = 1 << 0,
+ kAccessAtomic = 1 << 1,
+ kAccessVptr = 1 << 2, // read or write of an object virtual table pointer
+ kAccessFree = 1 << 3, // synthetic memory access during memory freeing
+ kAccessExternalPC = 1 << 4, // access PC can have kExternalPCBit set
+};
+
// Descriptor of user's memory block.
struct MBlock {
u64 siz : 48;
u64 tag : 16;
- u32 stk;
- u16 tid;
+ StackID stk;
+ Tid tid;
};
COMPILER_CHECK(sizeof(MBlock) == 16);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h
index 68ded43c4f6b..9e15f74a0615 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h
@@ -49,11 +49,7 @@ class DenseSlabAlloc {
static_assert(sizeof(T) > sizeof(IndexT),
"it doesn't make sense to use dense alloc");
- explicit DenseSlabAlloc(LinkerInitialized, const char *name) {
- freelist_ = 0;
- fillpos_ = 0;
- name_ = name;
- }
+ DenseSlabAlloc(LinkerInitialized, const char *name) : name_(name) {}
explicit DenseSlabAlloc(const char *name)
: DenseSlabAlloc(LINKER_INITIALIZED, name) {
@@ -89,6 +85,8 @@ class DenseSlabAlloc {
}
void FlushCache(Cache *c) {
+ if (!c->pos)
+ return;
SpinMutexLock lock(&mtx_);
while (c->pos) {
IndexT idx = c->cache[--c->pos];
@@ -102,33 +100,39 @@ class DenseSlabAlloc {
internal_memset(c->cache, 0, sizeof(c->cache));
}
+ uptr AllocatedMemory() const {
+ return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T);
+ }
+
private:
T *map_[kL1Size];
SpinMutex mtx_;
- IndexT freelist_;
- uptr fillpos_;
- const char *name_;
+ IndexT freelist_ = {0};
+ atomic_uintptr_t fillpos_ = {0};
+ const char *const name_;
void Refill(Cache *c) {
SpinMutexLock lock(&mtx_);
if (freelist_ == 0) {
- if (fillpos_ == kL1Size) {
+ uptr fillpos = atomic_load_relaxed(&fillpos_);
+ if (fillpos == kL1Size) {
Printf("ThreadSanitizer: %s overflow (%zu*%zu). Dying.\n",
name_, kL1Size, kL2Size);
Die();
}
- VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n",
- name_, fillpos_, kL1Size, kL2Size);
+ VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n", name_,
+ fillpos, kL1Size, kL2Size);
T *batch = (T*)MmapOrDie(kL2Size * sizeof(T), name_);
// Reserve 0 as invalid index.
- IndexT start = fillpos_ == 0 ? 1 : 0;
+ IndexT start = fillpos == 0 ? 1 : 0;
for (IndexT i = start; i < kL2Size; i++) {
new(batch + i) T;
- *(IndexT*)(batch + i) = i + 1 + fillpos_ * kL2Size;
+ *(IndexT *)(batch + i) = i + 1 + fillpos * kL2Size;
}
*(IndexT*)(batch + kL2Size - 1) = 0;
- freelist_ = fillpos_ * kL2Size + start;
- map_[fillpos_++] = batch;
+ freelist_ = fillpos * kL2Size + start;
+ map_[fillpos] = batch;
+ atomic_store_relaxed(&fillpos_, fillpos + 1);
}
for (uptr i = 0; i < Cache::kSize / 2 && freelist_ != 0; i++) {
IndexT idx = freelist_;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_external.cpp b/compiler-rt/lib/tsan/rtl/tsan_external.cpp
index a87e12f2936f..19ae174f20a5 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_external.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_external.cpp
@@ -10,9 +10,12 @@
//
//===----------------------------------------------------------------------===//
#include "tsan_rtl.h"
-#include "tsan_interceptors.h"
#include "sanitizer_common/sanitizer_ptrauth.h"
+#if !SANITIZER_GO
+# include "tsan_interceptors.h"
+#endif
+
namespace __tsan {
#define CALLERPC ((uptr)__builtin_return_address(0))
@@ -57,16 +60,14 @@ uptr TagFromShadowStackFrame(uptr pc) {
#if !SANITIZER_GO
-typedef void(*AccessFunc)(ThreadState *, uptr, uptr, int);
-void ExternalAccess(void *addr, uptr caller_pc, void *tag, AccessFunc access) {
+void ExternalAccess(void *addr, uptr caller_pc, void *tag, AccessType typ) {
CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
ThreadState *thr = cur_thread();
if (caller_pc) FuncEntry(thr, caller_pc);
InsertShadowStackFrameForTag(thr, (uptr)tag);
bool in_ignored_lib;
- if (!caller_pc || !libignore()->IsIgnored(caller_pc, &in_ignored_lib)) {
- access(thr, CALLERPC, (uptr)addr, kSizeLog1);
- }
+ if (!caller_pc || !libignore()->IsIgnored(caller_pc, &in_ignored_lib))
+ MemoryAccess(thr, CALLERPC, (uptr)addr, 1, typ);
FuncExit(thr);
if (caller_pc) FuncExit(thr);
}
@@ -92,7 +93,7 @@ void __tsan_external_register_header(void *tag, const char *header) {
header = internal_strdup(header);
char *old_header =
(char *)atomic_exchange(header_ptr, (uptr)header, memory_order_seq_cst);
- if (old_header) internal_free(old_header);
+ Free(old_header);
}
SANITIZER_INTERFACE_ATTRIBUTE
@@ -111,12 +112,12 @@ void __tsan_external_assign_tag(void *addr, void *tag) {
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_external_read(void *addr, void *caller_pc, void *tag) {
- ExternalAccess(addr, STRIP_PAC_PC(caller_pc), tag, MemoryRead);
+ ExternalAccess(addr, STRIP_PAC_PC(caller_pc), tag, kAccessRead);
}
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_external_write(void *addr, void *caller_pc, void *tag) {
- ExternalAccess(addr, STRIP_PAC_PC(caller_pc), tag, MemoryWrite);
+ ExternalAccess(addr, STRIP_PAC_PC(caller_pc), tag, kAccessWrite);
}
} // extern "C"
diff --git a/compiler-rt/lib/tsan/rtl/tsan_fd.cpp b/compiler-rt/lib/tsan/rtl/tsan_fd.cpp
index 50a6b56916aa..255ffa8daf76 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_fd.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_fd.cpp
@@ -26,8 +26,8 @@ struct FdSync {
struct FdDesc {
FdSync *sync;
- int creation_tid;
- u32 creation_stack;
+ Tid creation_tid;
+ StackID creation_stack;
};
struct FdContext {
@@ -115,7 +115,7 @@ static void init(ThreadState *thr, uptr pc, int fd, FdSync *s,
MemoryRangeImitateWrite(thr, pc, (uptr)d, 8);
} else {
// See the dup-related comment in FdClose.
- MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
}
}
@@ -140,7 +140,7 @@ void FdOnFork(ThreadState *thr, uptr pc) {
}
}
-bool FdLocation(uptr addr, int *fd, int *tid, u32 *stack) {
+bool FdLocation(uptr addr, int *fd, Tid *tid, StackID *stack) {
for (int l1 = 0; l1 < kTableSizeL1; l1++) {
FdDesc *tab = (FdDesc*)atomic_load(&fdctx.tab[l1], memory_order_relaxed);
if (tab == 0)
@@ -163,7 +163,7 @@ void FdAcquire(ThreadState *thr, uptr pc, int fd) {
FdDesc *d = fddesc(thr, pc, fd);
FdSync *s = d->sync;
DPrintf("#%d: FdAcquire(%d) -> %p\n", thr->tid, fd, s);
- MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
if (s)
Acquire(thr, pc, (uptr)s);
}
@@ -174,7 +174,7 @@ void FdRelease(ThreadState *thr, uptr pc, int fd) {
FdDesc *d = fddesc(thr, pc, fd);
FdSync *s = d->sync;
DPrintf("#%d: FdRelease(%d) -> %p\n", thr->tid, fd, s);
- MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
if (s)
Release(thr, pc, (uptr)s);
}
@@ -184,7 +184,7 @@ void FdAccess(ThreadState *thr, uptr pc, int fd) {
if (bogusfd(fd))
return;
FdDesc *d = fddesc(thr, pc, fd);
- MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
}
void FdClose(ThreadState *thr, uptr pc, int fd, bool write) {
@@ -194,7 +194,7 @@ void FdClose(ThreadState *thr, uptr pc, int fd, bool write) {
FdDesc *d = fddesc(thr, pc, fd);
if (write) {
// To catch races between fd usage and close.
- MemoryWrite(thr, pc, (uptr)d, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessWrite);
} else {
// This path is used only by dup2/dup3 calls.
// We do read instead of write because there is a number of legitimate
@@ -204,15 +204,15 @@ void FdClose(ThreadState *thr, uptr pc, int fd, bool write) {
// 2. Some daemons dup /dev/null in place of stdin/stdout.
// On the other hand we have not seen cases when write here catches real
// bugs.
- MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
}
// We need to clear it, because if we do not intercept any call out there
// that creates fd, we will hit false postives.
MemoryResetRange(thr, pc, (uptr)d, 8);
unref(thr, pc, d->sync);
d->sync = 0;
- d->creation_tid = 0;
- d->creation_stack = 0;
+ d->creation_tid = kInvalidTid;
+ d->creation_stack = kInvalidStackID;
}
void FdFileCreate(ThreadState *thr, uptr pc, int fd) {
@@ -228,7 +228,7 @@ void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd, bool write) {
return;
// Ignore the case when user dups not yet connected socket.
FdDesc *od = fddesc(thr, pc, oldfd);
- MemoryRead(thr, pc, (uptr)od, kSizeLog8);
+ MemoryAccess(thr, pc, (uptr)od, 8, kAccessRead);
FdClose(thr, pc, newfd, write);
init(thr, pc, newfd, ref(od->sync), write);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_fd.h b/compiler-rt/lib/tsan/rtl/tsan_fd.h
index ce4f2f73bac6..d9648178481c 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_fd.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_fd.h
@@ -53,7 +53,7 @@ void FdSocketCreate(ThreadState *thr, uptr pc, int fd);
void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd);
void FdSocketConnecting(ThreadState *thr, uptr pc, int fd);
void FdSocketConnect(ThreadState *thr, uptr pc, int fd);
-bool FdLocation(uptr addr, int *fd, int *tid, u32 *stack);
+bool FdLocation(uptr addr, int *fd, Tid *tid, StackID *stack);
void FdOnFork(ThreadState *thr, uptr pc);
uptr File2addr(const char *path);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp
index 49e4a9c21da9..ee89862d17bd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp
@@ -55,6 +55,7 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) {
// Override some common flags defaults.
CommonFlags cf;
cf.CopyFrom(*common_flags());
+ cf.external_symbolizer_path = GetEnv("TSAN_SYMBOLIZER_PATH");
cf.allow_addr2line = true;
if (SANITIZER_GO) {
// Does not work as expected for Go: runtime handles SIGABRT and crashes.
diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.inc b/compiler-rt/lib/tsan/rtl/tsan_flags.inc
index 2105c754486f..7954a4307fa1 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_flags.inc
+++ b/compiler-rt/lib/tsan/rtl/tsan_flags.inc
@@ -43,7 +43,6 @@ TSAN_FLAG(
bool, force_seq_cst_atomics, false,
"If set, all atomics are effectively sequentially consistent (seq_cst), "
"regardless of what user actually specified.")
-TSAN_FLAG(bool, print_benign, false, "Print matched \"benign\" races at exit.")
TSAN_FLAG(bool, halt_on_error, false, "Exit after first reported error.")
TSAN_FLAG(int, atexit_sleep_ms, 1000,
"Sleep in main thread before exiting for that many ms "
diff --git a/compiler-rt/lib/tsan/rtl/tsan_ignoreset.cpp b/compiler-rt/lib/tsan/rtl/tsan_ignoreset.cpp
index f6e41f668618..1fca1cf4f9fc 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_ignoreset.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_ignoreset.cpp
@@ -19,7 +19,7 @@ IgnoreSet::IgnoreSet()
: size_() {
}
-void IgnoreSet::Add(u32 stack_id) {
+void IgnoreSet::Add(StackID stack_id) {
if (size_ == kMaxSize)
return;
for (uptr i = 0; i < size_; i++) {
@@ -29,15 +29,7 @@ void IgnoreSet::Add(u32 stack_id) {
stacks_[size_++] = stack_id;
}
-void IgnoreSet::Reset() {
- size_ = 0;
-}
-
-uptr IgnoreSet::Size() const {
- return size_;
-}
-
-u32 IgnoreSet::At(uptr i) const {
+StackID IgnoreSet::At(uptr i) const {
CHECK_LT(i, size_);
CHECK_LE(size_, kMaxSize);
return stacks_[i];
diff --git a/compiler-rt/lib/tsan/rtl/tsan_ignoreset.h b/compiler-rt/lib/tsan/rtl/tsan_ignoreset.h
index 3e318bd674d9..4e2511291ce4 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_ignoreset.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_ignoreset.h
@@ -19,17 +19,16 @@ namespace __tsan {
class IgnoreSet {
public:
- static const uptr kMaxSize = 16;
-
IgnoreSet();
- void Add(u32 stack_id);
- void Reset();
- uptr Size() const;
- u32 At(uptr i) const;
+ void Add(StackID stack_id);
+ void Reset() { size_ = 0; }
+ uptr Size() const { return size_; }
+ StackID At(uptr i) const;
private:
+ static constexpr uptr kMaxSize = 16;
uptr size_;
- u32 stacks_[kMaxSize];
+ StackID stacks_[kMaxSize];
};
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_ilist.h b/compiler-rt/lib/tsan/rtl/tsan_ilist.h
new file mode 100644
index 000000000000..d7d8be219dbe
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_ilist.h
@@ -0,0 +1,189 @@
+//===-- tsan_ilist.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_ILIST_H
+#define TSAN_ILIST_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __tsan {
+
+class INode {
+ public:
+ INode() = default;
+
+ private:
+ INode* next_ = nullptr;
+ INode* prev_ = nullptr;
+
+ template <typename Base, INode Base::*Node, typename Elem>
+ friend class IList;
+ INode(const INode&) = delete;
+ void operator=(const INode&) = delete;
+};
+
+// Intrusive doubly-linked list.
+//
+// The node class (MyNode) needs to include "INode foo" field,
+// then the list can be declared as IList<MyNode, &MyNode::foo>.
+// This design allows to link MyNode into multiple lists using
+// different INode fields.
+// The optional Elem template argument allows to specify node MDT
+// (most derived type) if it's different from MyNode.
+template <typename Base, INode Base::*Node, typename Elem = Base>
+class IList {
+ public:
+ IList();
+
+ void PushFront(Elem* e);
+ void PushBack(Elem* e);
+ void Remove(Elem* e);
+
+ Elem* PopFront();
+ Elem* PopBack();
+ Elem* Front();
+ Elem* Back();
+
+ // Prev links point towards front of the queue.
+ Elem* Prev(Elem* e);
+ // Next links point towards back of the queue.
+ Elem* Next(Elem* e);
+
+ uptr Size() const;
+ bool Empty() const;
+ bool Queued(Elem* e) const;
+
+ private:
+ INode node_;
+ uptr size_ = 0;
+
+ void Push(Elem* e, INode* after);
+ static INode* ToNode(Elem* e);
+ static Elem* ToElem(INode* n);
+
+ IList(const IList&) = delete;
+ void operator=(const IList&) = delete;
+};
+
+template <typename Base, INode Base::*Node, typename Elem>
+IList<Base, Node, Elem>::IList() {
+ node_.next_ = node_.prev_ = &node_;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::PushFront(Elem* e) {
+ Push(e, &node_);
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::PushBack(Elem* e) {
+ Push(e, node_.prev_);
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::Push(Elem* e, INode* after) {
+ INode* n = ToNode(e);
+ DCHECK_EQ(n->next_, nullptr);
+ DCHECK_EQ(n->prev_, nullptr);
+ INode* next = after->next_;
+ n->next_ = next;
+ n->prev_ = after;
+ next->prev_ = n;
+ after->next_ = n;
+ size_++;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::Remove(Elem* e) {
+ INode* n = ToNode(e);
+ INode* next = n->next_;
+ INode* prev = n->prev_;
+ DCHECK(next);
+ DCHECK(prev);
+ DCHECK(size_);
+ next->prev_ = prev;
+ prev->next_ = next;
+ n->prev_ = n->next_ = nullptr;
+ size_--;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::PopFront() {
+ Elem* e = Front();
+ if (e)
+ Remove(e);
+ return e;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::PopBack() {
+ Elem* e = Back();
+ if (e)
+ Remove(e);
+ return e;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Front() {
+ return size_ ? ToElem(node_.next_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Back() {
+ return size_ ? ToElem(node_.prev_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Prev(Elem* e) {
+ INode* n = ToNode(e);
+ DCHECK(n->prev_);
+ return n->prev_ != &node_ ? ToElem(n->prev_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Next(Elem* e) {
+ INode* n = ToNode(e);
+ DCHECK(n->next_);
+ return n->next_ != &node_ ? ToElem(n->next_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+uptr IList<Base, Node, Elem>::Size() const {
+ return size_;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+bool IList<Base, Node, Elem>::Empty() const {
+ return size_ == 0;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+bool IList<Base, Node, Elem>::Queued(Elem* e) const {
+ INode* n = ToNode(e);
+ DCHECK_EQ(!n->next_, !n->prev_);
+ return n->next_;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+INode* IList<Base, Node, Elem>::ToNode(Elem* e) {
+ return &(e->*Node);
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::ToElem(INode* n) {
+ return static_cast<Elem*>(reinterpret_cast<Base*>(
+ reinterpret_cast<uptr>(n) -
+ reinterpret_cast<uptr>(&(reinterpret_cast<Elem*>(0)->*Node))));
+}
+
+} // namespace __tsan
+
+#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors.h b/compiler-rt/lib/tsan/rtl/tsan_interceptors.h
index c5716f53a323..61dbb81ffec4 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors.h
@@ -10,44 +10,61 @@ class ScopedInterceptor {
public:
ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
~ScopedInterceptor();
- void DisableIgnores();
- void EnableIgnores();
+ void DisableIgnores() {
+ if (UNLIKELY(ignoring_))
+ DisableIgnoresImpl();
+ }
+ void EnableIgnores() {
+ if (UNLIKELY(ignoring_))
+ EnableIgnoresImpl();
+ }
+
private:
ThreadState *const thr_;
- const uptr pc_;
bool in_ignored_lib_;
bool ignoring_;
+
+ void DisableIgnoresImpl();
+ void EnableIgnoresImpl();
};
LibIgnore *libignore();
#if !SANITIZER_GO
inline bool in_symbolizer() {
- cur_thread_init();
- return UNLIKELY(cur_thread()->in_symbolizer);
+ return UNLIKELY(cur_thread_init()->in_symbolizer);
}
#endif
} // namespace __tsan
-#define SCOPED_INTERCEPTOR_RAW(func, ...) \
- cur_thread_init(); \
- ThreadState *thr = cur_thread(); \
- const uptr caller_pc = GET_CALLER_PC(); \
- ScopedInterceptor si(thr, #func, caller_pc); \
- const uptr pc = GET_CURRENT_PC(); \
- (void)pc; \
- /**/
-
-#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
- SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
- if (REAL(func) == 0) { \
+#define SCOPED_INTERCEPTOR_RAW(func, ...) \
+ ThreadState *thr = cur_thread_init(); \
+ ScopedInterceptor si(thr, #func, GET_CALLER_PC()); \
+ UNUSED const uptr pc = GET_CURRENT_PC();
+
+#ifdef __powerpc64__
+// Debugging of crashes on powerpc after commit:
+// c80604f7a3 ("tsan: remove real func check from interceptors")
+// Somehow replacing if with DCHECK leads to strange failures in:
+// SanitizerCommon-tsan-powerpc64le-Linux :: Linux/ptrace.cpp
+// https://lab.llvm.org/buildbot/#/builders/105
+// https://lab.llvm.org/buildbot/#/builders/121
+// https://lab.llvm.org/buildbot/#/builders/57
+# define CHECK_REAL_FUNC(func) \
+ if (REAL(func) == 0) { \
Report("FATAL: ThreadSanitizer: failed to intercept %s\n", #func); \
- Die(); \
- } \
- if (!thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib) \
- return REAL(func)(__VA_ARGS__); \
-/**/
+ Die(); \
+ }
+#else
+# define CHECK_REAL_FUNC(func) DCHECK(REAL(func))
+#endif
+
+#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
+ SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
+ CHECK_REAL_FUNC(func); \
+ if (!thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib) \
+ return REAL(func)(__VA_ARGS__);
#define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \
si.DisableIgnores();
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
index 2d400c7e7098..ed064150d005 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp
@@ -365,7 +365,7 @@ static uptr GetOrCreateSyncAddress(uptr addr, ThreadState *thr, uptr pc) {
if (h.created()) {
ThreadIgnoreBegin(thr, pc);
*h = (uptr) user_alloc(thr, pc, /*size=*/1);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
}
return *h;
}
@@ -405,8 +405,8 @@ TSAN_INTERCEPTOR(int, swapcontext, ucontext_t *oucp, const ucontext_t *ucp) {
{
SCOPED_INTERCEPTOR_RAW(swapcontext, oucp, ucp);
}
- // Bacause of swapcontext() semantics we have no option but to copy its
- // impementation here
+ // Because of swapcontext() semantics we have no option but to copy its
+ // implementation here
if (!oucp || !ucp) {
errno = EINVAL;
return -1;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index dd2442842795..25dbe487b280 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -90,15 +90,13 @@ DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
extern "C" void *pthread_self();
extern "C" void _exit(int status);
#if !SANITIZER_NETBSD
extern "C" int fileno_unlocked(void *stream);
extern "C" int dirfd(void *dirp);
#endif
-#if SANITIZER_GLIBC
-extern "C" int mallopt(int param, int value);
-#endif
#if SANITIZER_NETBSD
extern __sanitizer_FILE __sF[];
#else
@@ -156,12 +154,11 @@ const int SIG_SETMASK = 2;
#endif
#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED \
- (cur_thread_init(), !cur_thread()->is_inited)
+ (!cur_thread_init()->is_inited)
namespace __tsan {
struct SignalDesc {
bool armed;
- bool sigaction;
__sanitizer_siginfo siginfo;
ucontext_t ctx;
};
@@ -169,7 +166,6 @@ struct SignalDesc {
struct ThreadSignalContext {
int int_signal_send;
atomic_uintptr_t in_blocking_func;
- atomic_uintptr_t have_pending_signals;
SignalDesc pending_signals[kSigCount];
// emptyset and oldset are too big for stack.
__sanitizer_sigset_t emptyset;
@@ -248,8 +244,8 @@ static ThreadSignalContext *SigCtx(ThreadState *thr) {
ScopedInterceptor::ScopedInterceptor(ThreadState *thr, const char *fname,
uptr pc)
- : thr_(thr), pc_(pc), in_ignored_lib_(false), ignoring_(false) {
- Initialize(thr);
+ : thr_(thr), in_ignored_lib_(false), ignoring_(false) {
+ LazyInitialize(thr);
if (!thr_->is_inited) return;
if (!thr_->ignore_interceptors) FuncEntry(thr, pc);
DPrintf("#%d: intercept %s()\n", thr_->tid, fname);
@@ -269,25 +265,25 @@ ScopedInterceptor::~ScopedInterceptor() {
}
}
-void ScopedInterceptor::EnableIgnores() {
- if (ignoring_) {
- ThreadIgnoreBegin(thr_, pc_, /*save_stack=*/false);
- if (flags()->ignore_noninstrumented_modules) thr_->suppress_reports++;
- if (in_ignored_lib_) {
- DCHECK(!thr_->in_ignored_lib);
- thr_->in_ignored_lib = true;
- }
+NOINLINE
+void ScopedInterceptor::EnableIgnoresImpl() {
+ ThreadIgnoreBegin(thr_, 0);
+ if (flags()->ignore_noninstrumented_modules)
+ thr_->suppress_reports++;
+ if (in_ignored_lib_) {
+ DCHECK(!thr_->in_ignored_lib);
+ thr_->in_ignored_lib = true;
}
}
-void ScopedInterceptor::DisableIgnores() {
- if (ignoring_) {
- ThreadIgnoreEnd(thr_, pc_);
- if (flags()->ignore_noninstrumented_modules) thr_->suppress_reports--;
- if (in_ignored_lib_) {
- DCHECK(thr_->in_ignored_lib);
- thr_->in_ignored_lib = false;
- }
+NOINLINE
+void ScopedInterceptor::DisableIgnoresImpl() {
+ ThreadIgnoreEnd(thr_);
+ if (flags()->ignore_noninstrumented_modules)
+ thr_->suppress_reports--;
+ if (in_ignored_lib_) {
+ DCHECK(thr_->in_ignored_lib);
+ thr_->in_ignored_lib = false;
}
}
@@ -323,7 +319,7 @@ struct BlockingCall {
, ctx(SigCtx(thr)) {
for (;;) {
atomic_store(&ctx->in_blocking_func, 1, memory_order_relaxed);
- if (atomic_load(&ctx->have_pending_signals, memory_order_relaxed) == 0)
+ if (atomic_load(&thr->pending_signals, memory_order_relaxed) == 0)
break;
atomic_store(&ctx->in_blocking_func, 0, memory_order_relaxed);
ProcessPendingSignals(thr);
@@ -385,14 +381,14 @@ static void at_exit_wrapper() {
Acquire(cur_thread(), (uptr)0, (uptr)ctx);
((void(*)())ctx->f)();
- InternalFree(ctx);
+ Free(ctx);
}
static void cxa_at_exit_wrapper(void *arg) {
Acquire(cur_thread(), 0, (uptr)arg);
AtExitCtx *ctx = (AtExitCtx*)arg;
((void(*)(void *arg))ctx->f)(ctx->arg);
- InternalFree(ctx);
+ Free(ctx);
}
static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
@@ -418,7 +414,7 @@ TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) {
static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
void *arg, void *dso) {
- AtExitCtx *ctx = (AtExitCtx*)InternalAlloc(sizeof(AtExitCtx));
+ auto *ctx = New<AtExitCtx>();
ctx->f = f;
ctx->arg = arg;
Release(thr, pc, (uptr)ctx);
@@ -444,7 +440,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
} else {
res = REAL(__cxa_atexit)(cxa_at_exit_wrapper, ctx, dso);
}
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
return res;
}
@@ -455,14 +451,14 @@ static void on_exit_wrapper(int status, void *arg) {
Acquire(thr, pc, (uptr)arg);
AtExitCtx *ctx = (AtExitCtx*)arg;
((void(*)(int status, void *arg))ctx->f)(status, ctx->arg);
- InternalFree(ctx);
+ Free(ctx);
}
TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) {
if (in_symbolizer())
return 0;
SCOPED_TSAN_INTERCEPTOR(on_exit, f, arg);
- AtExitCtx *ctx = (AtExitCtx*)InternalAlloc(sizeof(AtExitCtx));
+ auto *ctx = New<AtExitCtx>();
ctx->f = (void(*)())f;
ctx->arg = arg;
Release(thr, pc, (uptr)ctx);
@@ -470,7 +466,7 @@ TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) {
// because we do not see synchronization around atexit callback list.
ThreadIgnoreBegin(thr, pc);
int res = REAL(on_exit)(on_exit_wrapper, ctx);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
return res;
}
#define TSAN_MAYBE_INTERCEPT_ON_EXIT TSAN_INTERCEPT(on_exit)
@@ -536,10 +532,7 @@ static void LongJmp(ThreadState *thr, uptr *env) {
}
// FIXME: put everything below into a common extern "C" block?
-extern "C" void __tsan_setjmp(uptr sp) {
- cur_thread_init();
- SetJmp(cur_thread(), sp);
-}
+extern "C" void __tsan_setjmp(uptr sp) { SetJmp(cur_thread_init(), sp); }
#if SANITIZER_MAC
TSAN_INTERCEPTOR(int, setjmp, void *env);
@@ -849,6 +842,54 @@ TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
}
#endif
+// Both __cxa_guard_acquire and pthread_once 0-initialize
+// the object initially. pthread_once does not have any
+// other ABI requirements. __cxa_guard_acquire assumes
+// that any non-0 value in the first byte means that
+// initialization is completed. Contents of the remaining
+// bytes are up to us.
+constexpr u32 kGuardInit = 0;
+constexpr u32 kGuardDone = 1;
+constexpr u32 kGuardRunning = 1 << 16;
+constexpr u32 kGuardWaiter = 1 << 17;
+
+static int guard_acquire(ThreadState *thr, uptr pc, atomic_uint32_t *g,
+ bool blocking_hooks = true) {
+ if (blocking_hooks)
+ OnPotentiallyBlockingRegionBegin();
+ auto on_exit = at_scope_exit([blocking_hooks] {
+ if (blocking_hooks)
+ OnPotentiallyBlockingRegionEnd();
+ });
+
+ for (;;) {
+ u32 cmp = atomic_load(g, memory_order_acquire);
+ if (cmp == kGuardInit) {
+ if (atomic_compare_exchange_strong(g, &cmp, kGuardRunning,
+ memory_order_relaxed))
+ return 1;
+ } else if (cmp == kGuardDone) {
+ if (!thr->in_ignored_lib)
+ Acquire(thr, pc, (uptr)g);
+ return 0;
+ } else {
+ if ((cmp & kGuardWaiter) ||
+ atomic_compare_exchange_strong(g, &cmp, cmp | kGuardWaiter,
+ memory_order_relaxed))
+ FutexWait(g, cmp | kGuardWaiter);
+ }
+ }
+}
+
+static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g,
+ u32 v) {
+ if (!thr->in_ignored_lib)
+ Release(thr, pc, (uptr)g);
+ u32 old = atomic_exchange(g, v, memory_order_release);
+ if (old & kGuardWaiter)
+ FutexWake(g, 1 << 30);
+}
+
// __cxa_guard_acquire and friends need to be intercepted in a special way -
// regular interceptors will break statically-linked libstdc++. Linux
// interceptors are especially defined as weak functions (so that they don't
@@ -869,31 +910,17 @@ TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
// Used in thread-safe function static initialization.
STDCXX_INTERCEPTOR(int, __cxa_guard_acquire, atomic_uint32_t *g) {
SCOPED_INTERCEPTOR_RAW(__cxa_guard_acquire, g);
- OnPotentiallyBlockingRegionBegin();
- auto on_exit = at_scope_exit(&OnPotentiallyBlockingRegionEnd);
- for (;;) {
- u32 cmp = atomic_load(g, memory_order_acquire);
- if (cmp == 0) {
- if (atomic_compare_exchange_strong(g, &cmp, 1<<16, memory_order_relaxed))
- return 1;
- } else if (cmp == 1) {
- Acquire(thr, pc, (uptr)g);
- return 0;
- } else {
- internal_sched_yield();
- }
- }
+ return guard_acquire(thr, pc, g);
}
STDCXX_INTERCEPTOR(void, __cxa_guard_release, atomic_uint32_t *g) {
SCOPED_INTERCEPTOR_RAW(__cxa_guard_release, g);
- Release(thr, pc, (uptr)g);
- atomic_store(g, 1, memory_order_release);
+ guard_release(thr, pc, g, kGuardDone);
}
STDCXX_INTERCEPTOR(void, __cxa_guard_abort, atomic_uint32_t *g) {
SCOPED_INTERCEPTOR_RAW(__cxa_guard_abort, g);
- atomic_store(g, 0, memory_order_relaxed);
+ guard_release(thr, pc, g, kGuardInit);
}
namespace __tsan {
@@ -935,17 +962,17 @@ static void thread_finalize(void *v) {
struct ThreadParam {
void* (*callback)(void *arg);
void *param;
- atomic_uintptr_t tid;
+ Tid tid;
+ Semaphore created;
+ Semaphore started;
};
extern "C" void *__tsan_thread_start_func(void *arg) {
ThreadParam *p = (ThreadParam*)arg;
void* (*callback)(void *arg) = p->callback;
void *param = p->param;
- int tid = 0;
{
- cur_thread_init();
- ThreadState *thr = cur_thread();
+ ThreadState *thr = cur_thread_init();
// Thread-local state is not initialized yet.
ScopedIgnoreInterceptors ignore;
#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD
@@ -955,14 +982,13 @@ extern "C" void *__tsan_thread_start_func(void *arg) {
Printf("ThreadSanitizer: failed to set thread key\n");
Die();
}
- ThreadIgnoreEnd(thr, 0);
+ ThreadIgnoreEnd(thr);
#endif
- while ((tid = atomic_load(&p->tid, memory_order_acquire)) == 0)
- internal_sched_yield();
+ p->created.Wait();
Processor *proc = ProcCreate();
ProcWire(proc, thr);
- ThreadStart(thr, tid, GetTid(), ThreadType::Regular);
- atomic_store(&p->tid, 0, memory_order_release);
+ ThreadStart(thr, p->tid, GetTid(), ThreadType::Regular);
+ p->started.Post();
}
void *res = callback(param);
// Prevent the callback from being tail called,
@@ -984,9 +1010,11 @@ TSAN_INTERCEPTOR(int, pthread_create,
"fork is not supported. Dying (set die_after_fork=0 to override)\n");
Die();
} else {
- VPrintf(1, "ThreadSanitizer: starting new threads after multi-threaded "
- "fork is not supported (pid %d). Continuing because of "
- "die_after_fork=0, but you are on your own\n", internal_getpid());
+ VPrintf(1,
+ "ThreadSanitizer: starting new threads after multi-threaded "
+ "fork is not supported (pid %lu). Continuing because of "
+ "die_after_fork=0, but you are on your own\n",
+ internal_getpid());
}
}
__sanitizer_pthread_attr_t myattr;
@@ -1001,18 +1029,18 @@ TSAN_INTERCEPTOR(int, pthread_create,
ThreadParam p;
p.callback = callback;
p.param = param;
- atomic_store(&p.tid, 0, memory_order_relaxed);
+ p.tid = kMainTid;
int res = -1;
{
// Otherwise we see false positives in pthread stack manipulation.
ScopedIgnoreInterceptors ignore;
ThreadIgnoreBegin(thr, pc);
res = REAL(pthread_create)(th, attr, __tsan_thread_start_func, &p);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
}
if (res == 0) {
- int tid = ThreadCreate(thr, pc, *(uptr*)th, IsStateDetached(detached));
- CHECK_NE(tid, 0);
+ p.tid = ThreadCreate(thr, pc, *(uptr *)th, IsStateDetached(detached));
+ CHECK_NE(p.tid, kMainTid);
// Synchronization on p.tid serves two purposes:
// 1. ThreadCreate must finish before the new thread starts.
// Otherwise the new thread can call pthread_detach, but the pthread_t
@@ -1020,9 +1048,8 @@ TSAN_INTERCEPTOR(int, pthread_create,
// 2. ThreadStart must finish before this thread continues.
// Otherwise, this thread can call pthread_detach and reset thr->sync
// before the new thread got a chance to acquire from it in ThreadStart.
- atomic_store(&p.tid, tid, memory_order_release);
- while (atomic_load(&p.tid, memory_order_acquire) != 0)
- internal_sched_yield();
+ p.created.Post();
+ p.started.Wait();
}
if (attr == &myattr)
pthread_attr_destroy(&myattr);
@@ -1031,10 +1058,10 @@ TSAN_INTERCEPTOR(int, pthread_create,
TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
SCOPED_INTERCEPTOR_RAW(pthread_join, th, ret);
- int tid = ThreadConsumeTid(thr, pc, (uptr)th);
+ Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
ThreadIgnoreBegin(thr, pc);
int res = BLOCK_REAL(pthread_join)(th, ret);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
if (res == 0) {
ThreadJoin(thr, pc, tid);
}
@@ -1045,7 +1072,7 @@ DEFINE_REAL_PTHREAD_FUNCTIONS
TSAN_INTERCEPTOR(int, pthread_detach, void *th) {
SCOPED_INTERCEPTOR_RAW(pthread_detach, th);
- int tid = ThreadConsumeTid(thr, pc, (uptr)th);
+ Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
int res = REAL(pthread_detach)(th);
if (res == 0) {
ThreadDetach(thr, pc, tid);
@@ -1066,10 +1093,10 @@ TSAN_INTERCEPTOR(void, pthread_exit, void *retval) {
#if SANITIZER_LINUX
TSAN_INTERCEPTOR(int, pthread_tryjoin_np, void *th, void **ret) {
SCOPED_INTERCEPTOR_RAW(pthread_tryjoin_np, th, ret);
- int tid = ThreadConsumeTid(thr, pc, (uptr)th);
+ Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
ThreadIgnoreBegin(thr, pc);
int res = REAL(pthread_tryjoin_np)(th, ret);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
if (res == 0)
ThreadJoin(thr, pc, tid);
else
@@ -1080,10 +1107,10 @@ TSAN_INTERCEPTOR(int, pthread_tryjoin_np, void *th, void **ret) {
TSAN_INTERCEPTOR(int, pthread_timedjoin_np, void *th, void **ret,
const struct timespec *abstime) {
SCOPED_INTERCEPTOR_RAW(pthread_timedjoin_np, th, ret, abstime);
- int tid = ThreadConsumeTid(thr, pc, (uptr)th);
+ Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
ThreadIgnoreBegin(thr, pc);
int res = BLOCK_REAL(pthread_timedjoin_np)(th, ret, abstime);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
if (res == 0)
ThreadJoin(thr, pc, tid);
else
@@ -1447,14 +1474,14 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_unlock, void *m) {
#if !SANITIZER_MAC
TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) {
SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count);
- MemoryWrite(thr, pc, (uptr)b, kSizeLog1);
+ MemoryAccess(thr, pc, (uptr)b, 1, kAccessWrite);
int res = REAL(pthread_barrier_init)(b, a, count);
return res;
}
TSAN_INTERCEPTOR(int, pthread_barrier_destroy, void *b) {
SCOPED_TSAN_INTERCEPTOR(pthread_barrier_destroy, b);
- MemoryWrite(thr, pc, (uptr)b, kSizeLog1);
+ MemoryAccess(thr, pc, (uptr)b, 1, kAccessWrite);
int res = REAL(pthread_barrier_destroy)(b);
return res;
}
@@ -1462,9 +1489,9 @@ TSAN_INTERCEPTOR(int, pthread_barrier_destroy, void *b) {
TSAN_INTERCEPTOR(int, pthread_barrier_wait, void *b) {
SCOPED_TSAN_INTERCEPTOR(pthread_barrier_wait, b);
Release(thr, pc, (uptr)b);
- MemoryRead(thr, pc, (uptr)b, kSizeLog1);
+ MemoryAccess(thr, pc, (uptr)b, 1, kAccessRead);
int res = REAL(pthread_barrier_wait)(b);
- MemoryRead(thr, pc, (uptr)b, kSizeLog1);
+ MemoryAccess(thr, pc, (uptr)b, 1, kAccessRead);
if (res == 0 || res == PTHREAD_BARRIER_SERIAL_THREAD) {
Acquire(thr, pc, (uptr)b);
}
@@ -1486,20 +1513,11 @@ TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
else
a = static_cast<atomic_uint32_t*>(o);
- u32 v = atomic_load(a, memory_order_acquire);
- if (v == 0 && atomic_compare_exchange_strong(a, &v, 1,
- memory_order_relaxed)) {
+ // Mac OS X appears to use pthread_once() where calling BlockingRegion hooks
+ // result in crashes due to too little stack space.
+ if (guard_acquire(thr, pc, a, !SANITIZER_MAC)) {
(*f)();
- if (!thr->in_ignored_lib)
- Release(thr, pc, (uptr)o);
- atomic_store(a, 2, memory_order_release);
- } else {
- while (v != 2) {
- internal_sched_yield();
- v = atomic_load(a, memory_order_acquire);
- }
- if (!thr->in_ignored_lib)
- Acquire(thr, pc, (uptr)o);
+ guard_release(thr, pc, a, kGuardDone);
}
return 0;
}
@@ -1933,24 +1951,45 @@ TSAN_INTERCEPTOR(int, pthread_sigmask, int how, const __sanitizer_sigset_t *set,
namespace __tsan {
+static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) {
+ VarSizeStackTrace stack;
+ // StackTrace::GetNestInstructionPc(pc) is used because return address is
+ // expected, OutputReport() will undo this.
+ ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack);
+ ThreadRegistryLock l(&ctx->thread_registry);
+ ScopedReport rep(ReportTypeErrnoInSignal);
+ if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
+ rep.AddStack(stack, true);
+ OutputReport(thr, rep);
+ }
+}
+
static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
- bool sigact, int sig,
- __sanitizer_siginfo *info, void *uctx) {
+ int sig, __sanitizer_siginfo *info,
+ void *uctx) {
__sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions;
if (acquire)
Acquire(thr, 0, (uptr)&sigactions[sig]);
// Signals are generally asynchronous, so if we receive a signals when
// ignores are enabled we should disable ignores. This is critical for sync
- // and interceptors, because otherwise we can miss syncronization and report
+ // and interceptors, because otherwise we can miss synchronization and report
// false races.
int ignore_reads_and_writes = thr->ignore_reads_and_writes;
int ignore_interceptors = thr->ignore_interceptors;
int ignore_sync = thr->ignore_sync;
+ // For symbolizer we only process SIGSEGVs synchronously
+ // (bug in symbolizer or in tsan). But we want to reset
+ // in_symbolizer to fail gracefully. Symbolizer and user code
+ // use different memory allocators, so if we don't reset
+ // in_symbolizer we can get memory allocated with one being
+ // feed with another, which can cause more crashes.
+ int in_symbolizer = thr->in_symbolizer;
if (!ctx->after_multithreaded_fork) {
thr->ignore_reads_and_writes = 0;
thr->fast_state.ClearIgnoreBit();
thr->ignore_interceptors = 0;
thr->ignore_sync = 0;
+ thr->in_symbolizer = 0;
}
// Ensure that the handler does not spoil errno.
const int saved_errno = errno;
@@ -1958,13 +1997,14 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
// This code races with sigaction. Be careful to not read sa_sigaction twice.
// Also need to remember pc for reporting before the call,
// because the handler can reset it.
- volatile uptr pc =
- sigact ? (uptr)sigactions[sig].sigaction : (uptr)sigactions[sig].handler;
+ volatile uptr pc = (sigactions[sig].sa_flags & SA_SIGINFO)
+ ? (uptr)sigactions[sig].sigaction
+ : (uptr)sigactions[sig].handler;
if (pc != sig_dfl && pc != sig_ign) {
- if (sigact)
- ((__sanitizer_sigactionhandler_ptr)pc)(sig, info, uctx);
- else
- ((__sanitizer_sighandler_ptr)pc)(sig);
+ // The callback can be either sa_handler or sa_sigaction.
+ // They have different signatures, but we assume that passing
+ // additional arguments to sa_handler works and is harmless.
+ ((__sanitizer_sigactionhandler_ptr)pc)(sig, info, uctx);
}
if (!ctx->after_multithreaded_fork) {
thr->ignore_reads_and_writes = ignore_reads_and_writes;
@@ -1972,6 +2012,7 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
thr->fast_state.SetIgnoreBit();
thr->ignore_interceptors = ignore_interceptors;
thr->ignore_sync = ignore_sync;
+ thr->in_symbolizer = in_symbolizer;
}
// We do not detect errno spoiling for SIGTERM,
// because some SIGTERM handlers do spoil errno but reraise SIGTERM,
@@ -1981,27 +2022,16 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
// from rtl_generic_sighandler) we have not yet received the reraised
// signal; and it looks too fragile to intercept all ways to reraise a signal.
if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM &&
- errno != 99) {
- VarSizeStackTrace stack;
- // StackTrace::GetNestInstructionPc(pc) is used because return address is
- // expected, OutputReport() will undo this.
- ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack);
- ThreadRegistryLock l(ctx->thread_registry);
- ScopedReport rep(ReportTypeErrnoInSignal);
- if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
- rep.AddStack(stack, true);
- OutputReport(thr, rep);
- }
- }
+ errno != 99)
+ ReportErrnoSpoiling(thr, pc);
errno = saved_errno;
}
-void ProcessPendingSignals(ThreadState *thr) {
+void ProcessPendingSignalsImpl(ThreadState *thr) {
+ atomic_store(&thr->pending_signals, 0, memory_order_relaxed);
ThreadSignalContext *sctx = SigCtx(thr);
- if (sctx == 0 ||
- atomic_load(&sctx->have_pending_signals, memory_order_relaxed) == 0)
+ if (sctx == 0)
return;
- atomic_store(&sctx->have_pending_signals, 0, memory_order_relaxed);
atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
internal_sigfillset(&sctx->emptyset);
int res = REAL(pthread_sigmask)(SIG_SETMASK, &sctx->emptyset, &sctx->oldset);
@@ -2010,8 +2040,8 @@ void ProcessPendingSignals(ThreadState *thr) {
SignalDesc *signal = &sctx->pending_signals[sig];
if (signal->armed) {
signal->armed = false;
- CallUserSignalHandler(thr, false, true, signal->sigaction, sig,
- &signal->siginfo, &signal->ctx);
+ CallUserSignalHandler(thr, false, true, sig, &signal->siginfo,
+ &signal->ctx);
}
}
res = REAL(pthread_sigmask)(SIG_SETMASK, &sctx->oldset, 0);
@@ -2028,11 +2058,8 @@ static bool is_sync_signal(ThreadSignalContext *sctx, int sig) {
(sctx && sig == sctx->int_signal_send);
}
-void ALWAYS_INLINE rtl_generic_sighandler(bool sigact, int sig,
- __sanitizer_siginfo *info,
- void *ctx) {
- cur_thread_init();
- ThreadState *thr = cur_thread();
+void sighandler(int sig, __sanitizer_siginfo *info, void *ctx) {
+ ThreadState *thr = cur_thread_init();
ThreadSignalContext *sctx = SigCtx(thr);
if (sig < 0 || sig >= kSigCount) {
VPrintf(1, "ThreadSanitizer: ignoring signal %d\n", sig);
@@ -2048,7 +2075,7 @@ void ALWAYS_INLINE rtl_generic_sighandler(bool sigact, int sig,
atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
if (sctx && atomic_load(&sctx->in_blocking_func, memory_order_relaxed)) {
atomic_store(&sctx->in_blocking_func, 0, memory_order_relaxed);
- CallUserSignalHandler(thr, sync, true, sigact, sig, info, ctx);
+ CallUserSignalHandler(thr, sync, true, sig, info, ctx);
atomic_store(&sctx->in_blocking_func, 1, memory_order_relaxed);
} else {
// Be very conservative with when we do acquire in this case.
@@ -2057,7 +2084,7 @@ void ALWAYS_INLINE rtl_generic_sighandler(bool sigact, int sig,
// SIGSYS looks relatively safe -- it's synchronous and can actually
// need some global state.
bool acq = (sig == SIGSYS);
- CallUserSignalHandler(thr, sync, acq, sigact, sig, info, ctx);
+ CallUserSignalHandler(thr, sync, acq, sig, info, ctx);
}
atomic_fetch_add(&thr->in_signal_handler, -1, memory_order_relaxed);
return;
@@ -2068,23 +2095,12 @@ void ALWAYS_INLINE rtl_generic_sighandler(bool sigact, int sig,
SignalDesc *signal = &sctx->pending_signals[sig];
if (signal->armed == false) {
signal->armed = true;
- signal->sigaction = sigact;
- if (info)
- internal_memcpy(&signal->siginfo, info, sizeof(*info));
- if (ctx)
- internal_memcpy(&signal->ctx, ctx, sizeof(signal->ctx));
- atomic_store(&sctx->have_pending_signals, 1, memory_order_relaxed);
+ internal_memcpy(&signal->siginfo, info, sizeof(*info));
+ internal_memcpy(&signal->ctx, ctx, sizeof(signal->ctx));
+ atomic_store(&thr->pending_signals, 1, memory_order_relaxed);
}
}
-static void rtl_sighandler(int sig) {
- rtl_generic_sighandler(false, sig, 0, 0);
-}
-
-static void rtl_sigaction(int sig, __sanitizer_siginfo *info, void *ctx) {
- rtl_generic_sighandler(true, sig, info, ctx);
-}
-
TSAN_INTERCEPTOR(int, raise, int sig) {
SCOPED_TSAN_INTERCEPTOR(raise, sig);
ThreadSignalContext *sctx = SigCtx(thr);
@@ -2118,11 +2134,11 @@ TSAN_INTERCEPTOR(int, pthread_kill, void *tid, int sig) {
ThreadSignalContext *sctx = SigCtx(thr);
CHECK_NE(sctx, 0);
int prev = sctx->int_signal_send;
- if (tid == pthread_self()) {
+ bool self = pthread_equal(tid, pthread_self());
+ if (self)
sctx->int_signal_send = sig;
- }
int res = REAL(pthread_kill)(tid, sig);
- if (tid == pthread_self()) {
+ if (self) {
CHECK_EQ(sctx->int_signal_send, sig);
sctx->int_signal_send = prev;
}
@@ -2143,7 +2159,7 @@ TSAN_INTERCEPTOR(int, getaddrinfo, void *node, void *service,
// inside of getaddrinfo. So ignore memory accesses.
ThreadIgnoreBegin(thr, pc);
int res = REAL(getaddrinfo)(node, service, hints, rv);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
return res;
}
@@ -2175,7 +2191,7 @@ void atfork_child() {
return;
ThreadState *thr = cur_thread();
const uptr pc = StackTrace::GetCurrentPc();
- ForkChildAfter(thr, pc);
+ ForkChildAfter(thr, pc, true);
FdOnFork(thr, pc);
}
@@ -2196,6 +2212,37 @@ TSAN_INTERCEPTOR(int, vfork, int fake) {
return WRAP(fork)(fake);
}
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags,
+ void *arg, int *parent_tid, void *tls, pid_t *child_tid) {
+ SCOPED_INTERCEPTOR_RAW(clone, fn, stack, flags, arg, parent_tid, tls,
+ child_tid);
+ struct Arg {
+ int (*fn)(void *);
+ void *arg;
+ };
+ auto wrapper = +[](void *p) -> int {
+ auto *thr = cur_thread();
+ uptr pc = GET_CURRENT_PC();
+ // Start the background thread for fork, but not for clone.
+ // For fork we did this always and it's known to work (or user code has
+ // adopted). But if we do this for the new clone interceptor some code
+ // (sandbox2) fails. So model we used to do for years and don't start the
+ // background thread after clone.
+ ForkChildAfter(thr, pc, false);
+ FdOnFork(thr, pc);
+ auto *arg = static_cast<Arg *>(p);
+ return arg->fn(arg->arg);
+ };
+ ForkBefore(thr, pc);
+ Arg arg_wrapper = {fn, arg};
+ int pid = REAL(clone)(wrapper, stack, flags, &arg_wrapper, parent_tid, tls,
+ child_tid);
+ ForkParentAfter(thr, pc);
+ return pid;
+}
+#endif
+
#if !SANITIZER_MAC && !SANITIZER_ANDROID
typedef int (*dl_iterate_phdr_cb_t)(__sanitizer_dl_phdr_info *info, SIZE_T size,
void *data);
@@ -2207,7 +2254,7 @@ struct dl_iterate_phdr_data {
};
static bool IsAppNotRodata(uptr addr) {
- return IsAppMem(addr) && *(u64*)MemToShadow(addr) != kShadowRodata;
+ return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata;
}
static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
@@ -2250,7 +2297,6 @@ static int OnExit(ThreadState *thr) {
struct TsanInterceptorContext {
ThreadState *thr;
- const uptr caller_pc;
const uptr pc;
};
@@ -2291,17 +2337,17 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
((TsanInterceptorContext *) ctx)->pc, (uptr) ptr, size, \
false)
-#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
- SCOPED_TSAN_INTERCEPTOR(func, __VA_ARGS__); \
- TsanInterceptorContext _ctx = {thr, caller_pc, pc}; \
- ctx = (void *)&_ctx; \
- (void) ctx;
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
+ SCOPED_TSAN_INTERCEPTOR(func, __VA_ARGS__); \
+ TsanInterceptorContext _ctx = {thr, pc}; \
+ ctx = (void *)&_ctx; \
+ (void)ctx;
#define COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, func, ...) \
SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
- TsanInterceptorContext _ctx = {thr, caller_pc, pc}; \
+ TsanInterceptorContext _ctx = {thr, pc}; \
ctx = (void *)&_ctx; \
- (void) ctx;
+ (void)ctx;
#define COMMON_INTERCEPTOR_FILE_OPEN(ctx, file, path) \
if (path) \
@@ -2347,8 +2393,11 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
-#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
- __tsan::ctx->thread_registry->SetThreadNameByUserId(thread, name)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name) \
+ if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+ COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name); \
+ else \
+ __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
#define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)
@@ -2420,7 +2469,7 @@ static __sanitizer_sighandler_ptr signal_impl(int sig,
int sigaction_impl(int sig, const __sanitizer_sigaction *act,
__sanitizer_sigaction *old) {
// Note: if we call REAL(sigaction) directly for any reason without proxying
- // the signal handler through rtl_sigaction, very bad things will happen.
+ // the signal handler through sighandler, very bad things will happen.
// The handler will run synchronously and corrupt tsan per-thread state.
SCOPED_INTERCEPTOR_RAW(sigaction, sig, act, old);
if (sig <= 0 || sig >= kSigCount) {
@@ -2448,22 +2497,17 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act,
#endif
internal_memcpy(&newact, act, sizeof(newact));
internal_sigfillset(&newact.sa_mask);
- if ((uptr)act->handler != sig_ign && (uptr)act->handler != sig_dfl) {
- if (newact.sa_flags & SA_SIGINFO)
- newact.sigaction = rtl_sigaction;
- else
- newact.handler = rtl_sighandler;
+ if ((act->sa_flags & SA_SIGINFO) ||
+ ((uptr)act->handler != sig_ign && (uptr)act->handler != sig_dfl)) {
+ newact.sa_flags |= SA_SIGINFO;
+ newact.sigaction = sighandler;
}
ReleaseStore(thr, pc, (uptr)&sigactions[sig]);
act = &newact;
}
int res = REAL(sigaction)(sig, act, old);
- if (res == 0 && old) {
- uptr cb = (uptr)old->sigaction;
- if (cb == (uptr)rtl_sigaction || cb == (uptr)rtl_sighandler) {
- internal_memcpy(old, &old_stored, sizeof(*old));
- }
- }
+ if (res == 0 && old && old->sigaction == sighandler)
+ internal_memcpy(old, &old_stored, sizeof(*old));
return res;
}
@@ -2479,20 +2523,16 @@ static __sanitizer_sighandler_ptr signal_impl(int sig,
return old.handler;
}
-#define TSAN_SYSCALL() \
+#define TSAN_SYSCALL() \
ThreadState *thr = cur_thread(); \
- if (thr->ignore_interceptors) \
- return; \
- ScopedSyscall scoped_syscall(thr) \
-/**/
+ if (thr->ignore_interceptors) \
+ return; \
+ ScopedSyscall scoped_syscall(thr)
struct ScopedSyscall {
ThreadState *thr;
- explicit ScopedSyscall(ThreadState *thr)
- : thr(thr) {
- Initialize(thr);
- }
+ explicit ScopedSyscall(ThreadState *thr) : thr(thr) { LazyInitialize(thr); }
~ScopedSyscall() {
ProcessPendingSignals(thr);
@@ -2508,12 +2548,12 @@ static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) {
static USED void syscall_acquire(uptr pc, uptr addr) {
TSAN_SYSCALL();
Acquire(thr, pc, addr);
- DPrintf("syscall_acquire(%p)\n", addr);
+ DPrintf("syscall_acquire(0x%zx))\n", addr);
}
static USED void syscall_release(uptr pc, uptr addr) {
TSAN_SYSCALL();
- DPrintf("syscall_release(%p)\n", addr);
+ DPrintf("syscall_release(0x%zx)\n", addr);
Release(thr, pc, addr);
}
@@ -2525,12 +2565,12 @@ static void syscall_fd_close(uptr pc, int fd) {
static USED void syscall_fd_acquire(uptr pc, int fd) {
TSAN_SYSCALL();
FdAcquire(thr, pc, fd);
- DPrintf("syscall_fd_acquire(%p)\n", fd);
+ DPrintf("syscall_fd_acquire(%d)\n", fd);
}
static USED void syscall_fd_release(uptr pc, int fd) {
TSAN_SYSCALL();
- DPrintf("syscall_fd_release(%p)\n", fd);
+ DPrintf("syscall_fd_release(%d)\n", fd);
FdRelease(thr, pc, fd);
}
@@ -2540,7 +2580,7 @@ static void syscall_post_fork(uptr pc, int pid) {
ThreadState *thr = cur_thread();
if (pid == 0) {
// child
- ForkChildAfter(thr, pc);
+ ForkChildAfter(thr, pc, true);
FdOnFork(thr, pc);
} else if (pid > 0) {
// parent
@@ -2700,12 +2740,6 @@ void InitializeInterceptors() {
REAL(memcpy) = internal_memcpy;
#endif
- // Instruct libc malloc to consume less memory.
-#if SANITIZER_GLIBC
- mallopt(1, 0); // M_MXFAST
- mallopt(-3, 32*1024); // M_MMAP_THRESHOLD
-#endif
-
new(interceptor_ctx()) InterceptorContext();
InitializeCommonInterceptors();
@@ -2843,6 +2877,9 @@ void InitializeInterceptors() {
TSAN_INTERCEPT(fork);
TSAN_INTERCEPT(vfork);
+#if SANITIZER_LINUX
+ TSAN_INTERCEPT(clone);
+#endif
#if !SANITIZER_ANDROID
TSAN_INTERCEPT(dl_iterate_phdr);
#endif
@@ -2920,25 +2957,36 @@ void InitializeInterceptors() {
// Note that no_sanitize_thread attribute does not turn off atomic interception
// so attaching it to the function defined in user code does not help.
// That's why we now have what we have.
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_testonly_barrier_init(u64 *barrier, u32 count) {
- if (count >= (1 << 8)) {
- Printf("barrier_init: count is too large (%d)\n", count);
- Die();
+constexpr u32 kBarrierThreadBits = 10;
+constexpr u32 kBarrierThreads = 1 << kBarrierThreadBits;
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_init(
+ atomic_uint32_t *barrier, u32 num_threads) {
+ if (num_threads >= kBarrierThreads) {
+ Printf("barrier_init: count is too large (%d)\n", num_threads);
+ Die();
}
- // 8 lsb is thread count, the remaining are count of entered threads.
- *barrier = count;
+ // kBarrierThreadBits lsb is thread count,
+ // the remaining are count of entered threads.
+ atomic_store(barrier, num_threads, memory_order_relaxed);
}
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_testonly_barrier_wait(u64 *barrier) {
- unsigned old = __atomic_fetch_add(barrier, 1 << 8, __ATOMIC_RELAXED);
- unsigned old_epoch = (old >> 8) / (old & 0xff);
+static u32 barrier_epoch(u32 value) {
+ return (value >> kBarrierThreadBits) / (value & (kBarrierThreads - 1));
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_wait(
+ atomic_uint32_t *barrier) {
+ u32 old = atomic_fetch_add(barrier, kBarrierThreads, memory_order_relaxed);
+ u32 old_epoch = barrier_epoch(old);
+ if (barrier_epoch(old + kBarrierThreads) != old_epoch) {
+ FutexWake(barrier, (1 << 30));
+ return;
+ }
for (;;) {
- unsigned cur = __atomic_load_n(barrier, __ATOMIC_RELAXED);
- unsigned cur_epoch = (cur >> 8) / (cur & 0xff);
- if (cur_epoch != old_epoch)
+ u32 cur = atomic_load(barrier, memory_order_relaxed);
+ if (barrier_epoch(cur) != old_epoch)
return;
- internal_sched_yield();
+ FutexWait(barrier, cur);
}
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
index 9bd0e8580b17..048715185151 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
@@ -20,109 +20,58 @@
using namespace __tsan;
-void __tsan_init() {
- cur_thread_init();
- Initialize(cur_thread());
-}
+void __tsan_init() { Initialize(cur_thread_init()); }
void __tsan_flush_memory() {
FlushShadowMemory();
}
void __tsan_read16(void *addr) {
- MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
- MemoryRead(cur_thread(), CALLERPC, (uptr)addr + 8, kSizeLog8);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ MemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
+ MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
}
void __tsan_write16(void *addr) {
- MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
- MemoryWrite(cur_thread(), CALLERPC, (uptr)addr + 8, kSizeLog8);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ MemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
+ MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
}
void __tsan_read16_pc(void *addr, void *pc) {
- MemoryRead(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog8);
- MemoryRead(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr + 8, kSizeLog8);
+ uptr pc_no_pac = STRIP_PAC_PC(pc);
+ ThreadState *thr = cur_thread();
+ MemoryAccess(thr, pc_no_pac, (uptr)addr, 8, kAccessRead);
+ MemoryAccess(thr, pc_no_pac, (uptr)addr + 8, 8, kAccessRead);
}
void __tsan_write16_pc(void *addr, void *pc) {
- MemoryWrite(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog8);
- MemoryWrite(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr + 8, kSizeLog8);
+ uptr pc_no_pac = STRIP_PAC_PC(pc);
+ ThreadState *thr = cur_thread();
+ MemoryAccess(thr, pc_no_pac, (uptr)addr, 8, kAccessWrite);
+ MemoryAccess(thr, pc_no_pac, (uptr)addr + 8, 8, kAccessWrite);
}
// __tsan_unaligned_read/write calls are emitted by compiler.
-void __tsan_unaligned_read2(const void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, false, false);
-}
-
-void __tsan_unaligned_read4(const void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, false, false);
-}
-
-void __tsan_unaligned_read8(const void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, false, false);
-}
-
void __tsan_unaligned_read16(const void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 16, false, false);
-}
-
-void __tsan_unaligned_write2(void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, true, false);
-}
-
-void __tsan_unaligned_write4(void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, true, false);
-}
-
-void __tsan_unaligned_write8(void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, true, false);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
+ UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
}
void __tsan_unaligned_write16(void *addr) {
- UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 16, true, false);
+ uptr pc = CALLERPC;
+ ThreadState *thr = cur_thread();
+ UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
+ UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
}
-// __sanitizer_unaligned_load/store are for user instrumentation.
-
extern "C" {
SANITIZER_INTERFACE_ATTRIBUTE
-u16 __sanitizer_unaligned_load16(const uu16 *addr) {
- __tsan_unaligned_read2(addr);
- return *addr;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-u32 __sanitizer_unaligned_load32(const uu32 *addr) {
- __tsan_unaligned_read4(addr);
- return *addr;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-u64 __sanitizer_unaligned_load64(const uu64 *addr) {
- __tsan_unaligned_read8(addr);
- return *addr;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_unaligned_store16(uu16 *addr, u16 v) {
- __tsan_unaligned_write2(addr);
- *addr = v;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_unaligned_store32(uu32 *addr, u32 v) {
- __tsan_unaligned_write4(addr);
- *addr = v;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
-void __sanitizer_unaligned_store64(uu64 *addr, u64 v) {
- __tsan_unaligned_write8(addr);
- *addr = v;
-}
-
-SANITIZER_INTERFACE_ATTRIBUTE
void *__tsan_get_current_fiber() {
return cur_thread();
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.h b/compiler-rt/lib/tsan/rtl/tsan_interface.h
index 124aa2fd2143..711f064174c2 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.h
@@ -95,9 +95,9 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_write_range(void *addr, unsigned long size);
SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_read_range_pc(void *addr, unsigned long size, void *pc); // NOLINT
+void __tsan_read_range_pc(void *addr, unsigned long size, void *pc);
SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_write_range_pc(void *addr, unsigned long size, void *pc); // NOLINT
+void __tsan_write_range_pc(void *addr, unsigned long size, void *pc);
// User may provide function that would be called right when TSan detects
// an error. The argument 'report' is an opaque pointer that can be used to
@@ -417,12 +417,6 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_go_atomic64_compare_exchange(ThreadState *thr, uptr cpc, uptr pc,
u8 *a);
-SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_on_initialize();
-
-SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_on_finalize(int failed);
-
} // extern "C"
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.inc b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
new file mode 100644
index 000000000000..0031800e851f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
@@ -0,0 +1,182 @@
+//===-- tsan_interface.inc --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+void __tsan_read1(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessRead);
+}
+
+void __tsan_read2(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessRead);
+}
+
+void __tsan_read4(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessRead);
+}
+
+void __tsan_read8(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
+}
+
+void __tsan_write1(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
+}
+
+void __tsan_write2(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessWrite);
+}
+
+void __tsan_write4(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessWrite);
+}
+
+void __tsan_write8(void *addr) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
+}
+
+void __tsan_read1_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_read2_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 2, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_read4_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 4, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_read8_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 8, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_write1_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessWrite | kAccessExternalPC);
+}
+
+void __tsan_write2_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 2, kAccessWrite | kAccessExternalPC);
+}
+
+void __tsan_write4_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 4, kAccessWrite | kAccessExternalPC);
+}
+
+void __tsan_write8_pc(void *addr, void *pc) {
+ MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 8, kAccessWrite | kAccessExternalPC);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_read2(const void *addr) {
+ UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessRead);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_read4(const void *addr) {
+ UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessRead);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_read8(const void *addr) {
+ UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_write2(void *addr) {
+ UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessWrite);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_write4(void *addr) {
+ UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessWrite);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_write8(void *addr) {
+ UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
+}
+
+extern "C" {
+// __sanitizer_unaligned_load/store are for user instrumentation.
+SANITIZER_INTERFACE_ATTRIBUTE
+u16 __sanitizer_unaligned_load16(const uu16 *addr) {
+ __tsan_unaligned_read2(addr);
+ return *addr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+u32 __sanitizer_unaligned_load32(const uu32 *addr) {
+ __tsan_unaligned_read4(addr);
+ return *addr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+u64 __sanitizer_unaligned_load64(const uu64 *addr) {
+ __tsan_unaligned_read8(addr);
+ return *addr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_unaligned_store16(uu16 *addr, u16 v) {
+ *addr = v;
+ __tsan_unaligned_write2(addr);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_unaligned_store32(uu32 *addr, u32 v) {
+ *addr = v;
+ __tsan_unaligned_write4(addr);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_unaligned_store64(uu64 *addr, u64 v) {
+ *addr = v;
+ __tsan_unaligned_write8(addr);
+}
+}
+
+void __tsan_vptr_update(void **vptr_p, void *new_val) {
+ if (*vptr_p == new_val)
+ return;
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)vptr_p, sizeof(*vptr_p),
+ kAccessWrite | kAccessVptr);
+}
+
+void __tsan_vptr_read(void **vptr_p) {
+ MemoryAccess(cur_thread(), CALLERPC, (uptr)vptr_p, sizeof(*vptr_p),
+ kAccessRead | kAccessVptr);
+}
+
+void __tsan_func_entry(void *pc) { FuncEntry(cur_thread(), STRIP_PAC_PC(pc)); }
+
+void __tsan_func_exit() { FuncExit(cur_thread()); }
+
+void __tsan_ignore_thread_begin() { ThreadIgnoreBegin(cur_thread(), CALLERPC); }
+
+void __tsan_ignore_thread_end() { ThreadIgnoreEnd(cur_thread()); }
+
+void __tsan_read_range(void *addr, uptr size) {
+ MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, false);
+}
+
+void __tsan_write_range(void *addr, uptr size) {
+ MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, true);
+}
+
+void __tsan_read_range_pc(void *addr, uptr size, void *pc) {
+ MemoryAccessRange(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, size, false);
+}
+
+void __tsan_write_range_pc(void *addr, uptr size, void *pc) {
+ MemoryAccessRange(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, size, true);
+}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
index 47314f5ad812..6bd72e18d942 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp
@@ -43,15 +43,14 @@ class ScopedAnnotation {
ThreadState *const thr_;
};
-#define SCOPED_ANNOTATION_RET(typ, ret) \
- if (!flags()->enable_annotations) \
- return ret; \
- ThreadState *thr = cur_thread(); \
- const uptr caller_pc = (uptr)__builtin_return_address(0); \
- ScopedAnnotation sa(thr, __func__, caller_pc); \
- const uptr pc = StackTrace::GetCurrentPc(); \
- (void)pc; \
-/**/
+#define SCOPED_ANNOTATION_RET(typ, ret) \
+ if (!flags()->enable_annotations) \
+ return ret; \
+ ThreadState *thr = cur_thread(); \
+ const uptr caller_pc = (uptr)__builtin_return_address(0); \
+ ScopedAnnotation sa(thr, __func__, caller_pc); \
+ const uptr pc = StackTrace::GetCurrentPc(); \
+ (void)pc;
#define SCOPED_ANNOTATION(typ) SCOPED_ANNOTATION_RET(typ, )
@@ -71,7 +70,6 @@ struct ExpectRace {
struct DynamicAnnContext {
Mutex mtx;
- ExpectRace expect;
ExpectRace benign;
DynamicAnnContext() : mtx(MutexTypeAnnotations) {}
@@ -90,7 +88,7 @@ static void AddExpectRace(ExpectRace *list,
return;
}
}
- race = (ExpectRace*)internal_alloc(MBlockExpectRace, sizeof(ExpectRace));
+ race = static_cast<ExpectRace *>(Alloc(sizeof(ExpectRace)));
race->addr = addr;
race->size = size;
race->file = f;
@@ -137,81 +135,12 @@ static void InitList(ExpectRace *list) {
void InitializeDynamicAnnotations() {
dyn_ann_ctx = new(dyn_ann_ctx_placeholder) DynamicAnnContext;
- InitList(&dyn_ann_ctx->expect);
InitList(&dyn_ann_ctx->benign);
}
bool IsExpectedReport(uptr addr, uptr size) {
ReadLock lock(&dyn_ann_ctx->mtx);
- if (CheckContains(&dyn_ann_ctx->expect, addr, size))
- return true;
- if (CheckContains(&dyn_ann_ctx->benign, addr, size))
- return true;
- return false;
-}
-
-static void CollectMatchedBenignRaces(Vector<ExpectRace> *matched,
- int *unique_count, int *hit_count, atomic_uintptr_t ExpectRace::*counter) {
- ExpectRace *list = &dyn_ann_ctx->benign;
- for (ExpectRace *race = list->next; race != list; race = race->next) {
- (*unique_count)++;
- const uptr cnt = atomic_load_relaxed(&(race->*counter));
- if (cnt == 0)
- continue;
- *hit_count += cnt;
- uptr i = 0;
- for (; i < matched->Size(); i++) {
- ExpectRace *race0 = &(*matched)[i];
- if (race->line == race0->line
- && internal_strcmp(race->file, race0->file) == 0
- && internal_strcmp(race->desc, race0->desc) == 0) {
- atomic_fetch_add(&(race0->*counter), cnt, memory_order_relaxed);
- break;
- }
- }
- if (i == matched->Size())
- matched->PushBack(*race);
- }
-}
-
-void PrintMatchedBenignRaces() {
- Lock lock(&dyn_ann_ctx->mtx);
- int unique_count = 0;
- int hit_count = 0;
- int add_count = 0;
- Vector<ExpectRace> hit_matched;
- CollectMatchedBenignRaces(&hit_matched, &unique_count, &hit_count,
- &ExpectRace::hitcount);
- Vector<ExpectRace> add_matched;
- CollectMatchedBenignRaces(&add_matched, &unique_count, &add_count,
- &ExpectRace::addcount);
- if (hit_matched.Size()) {
- Printf("ThreadSanitizer: Matched %d \"benign\" races (pid=%d):\n",
- hit_count, (int)internal_getpid());
- for (uptr i = 0; i < hit_matched.Size(); i++) {
- Printf("%d %s:%d %s\n",
- atomic_load_relaxed(&hit_matched[i].hitcount),
- hit_matched[i].file, hit_matched[i].line, hit_matched[i].desc);
- }
- }
- if (hit_matched.Size()) {
- Printf("ThreadSanitizer: Annotated %d \"benign\" races, %d unique"
- " (pid=%d):\n",
- add_count, unique_count, (int)internal_getpid());
- for (uptr i = 0; i < add_matched.Size(); i++) {
- Printf("%d %s:%d %s\n",
- atomic_load_relaxed(&add_matched[i].addcount),
- add_matched[i].file, add_matched[i].line, add_matched[i].desc);
- }
- }
-}
-
-static void ReportMissedExpectedRace(ExpectRace *race) {
- Printf("==================\n");
- Printf("WARNING: ThreadSanitizer: missed expected data race\n");
- Printf(" %s addr=%zx %s:%d\n",
- race->desc, race->addr, race->file, race->line);
- Printf("==================\n");
+ return CheckContains(&dyn_ann_ctx->benign, addr, size);
}
} // namespace __tsan
@@ -229,20 +158,16 @@ void INTERFACE_ATTRIBUTE AnnotateHappensAfter(char *f, int l, uptr addr) {
}
void INTERFACE_ATTRIBUTE AnnotateCondVarSignal(char *f, int l, uptr cv) {
- SCOPED_ANNOTATION(AnnotateCondVarSignal);
}
void INTERFACE_ATTRIBUTE AnnotateCondVarSignalAll(char *f, int l, uptr cv) {
- SCOPED_ANNOTATION(AnnotateCondVarSignalAll);
}
void INTERFACE_ATTRIBUTE AnnotateMutexIsNotPHB(char *f, int l, uptr mu) {
- SCOPED_ANNOTATION(AnnotateMutexIsNotPHB);
}
void INTERFACE_ATTRIBUTE AnnotateCondVarWait(char *f, int l, uptr cv,
uptr lock) {
- SCOPED_ANNOTATION(AnnotateCondVarWait);
}
void INTERFACE_ATTRIBUTE AnnotateRWLockCreate(char *f, int l, uptr m) {
@@ -279,86 +204,56 @@ void INTERFACE_ATTRIBUTE AnnotateRWLockReleased(char *f, int l, uptr m,
}
void INTERFACE_ATTRIBUTE AnnotateTraceMemory(char *f, int l, uptr mem) {
- SCOPED_ANNOTATION(AnnotateTraceMemory);
}
void INTERFACE_ATTRIBUTE AnnotateFlushState(char *f, int l) {
- SCOPED_ANNOTATION(AnnotateFlushState);
}
void INTERFACE_ATTRIBUTE AnnotateNewMemory(char *f, int l, uptr mem,
uptr size) {
- SCOPED_ANNOTATION(AnnotateNewMemory);
}
void INTERFACE_ATTRIBUTE AnnotateNoOp(char *f, int l, uptr mem) {
- SCOPED_ANNOTATION(AnnotateNoOp);
}
void INTERFACE_ATTRIBUTE AnnotateFlushExpectedRaces(char *f, int l) {
- SCOPED_ANNOTATION(AnnotateFlushExpectedRaces);
- Lock lock(&dyn_ann_ctx->mtx);
- while (dyn_ann_ctx->expect.next != &dyn_ann_ctx->expect) {
- ExpectRace *race = dyn_ann_ctx->expect.next;
- if (atomic_load_relaxed(&race->hitcount) == 0) {
- ctx->nmissed_expected++;
- ReportMissedExpectedRace(race);
- }
- race->prev->next = race->next;
- race->next->prev = race->prev;
- internal_free(race);
- }
}
void INTERFACE_ATTRIBUTE AnnotateEnableRaceDetection(
char *f, int l, int enable) {
- SCOPED_ANNOTATION(AnnotateEnableRaceDetection);
- // FIXME: Reconsider this functionality later. It may be irrelevant.
}
void INTERFACE_ATTRIBUTE AnnotateMutexIsUsedAsCondVar(
char *f, int l, uptr mu) {
- SCOPED_ANNOTATION(AnnotateMutexIsUsedAsCondVar);
}
void INTERFACE_ATTRIBUTE AnnotatePCQGet(
char *f, int l, uptr pcq) {
- SCOPED_ANNOTATION(AnnotatePCQGet);
}
void INTERFACE_ATTRIBUTE AnnotatePCQPut(
char *f, int l, uptr pcq) {
- SCOPED_ANNOTATION(AnnotatePCQPut);
}
void INTERFACE_ATTRIBUTE AnnotatePCQDestroy(
char *f, int l, uptr pcq) {
- SCOPED_ANNOTATION(AnnotatePCQDestroy);
}
void INTERFACE_ATTRIBUTE AnnotatePCQCreate(
char *f, int l, uptr pcq) {
- SCOPED_ANNOTATION(AnnotatePCQCreate);
}
void INTERFACE_ATTRIBUTE AnnotateExpectRace(
char *f, int l, uptr mem, char *desc) {
- SCOPED_ANNOTATION(AnnotateExpectRace);
- Lock lock(&dyn_ann_ctx->mtx);
- AddExpectRace(&dyn_ann_ctx->expect,
- f, l, mem, 1, desc);
- DPrintf("Add expected race: %s addr=%zx %s:%d\n", desc, mem, f, l);
}
-static void BenignRaceImpl(
- char *f, int l, uptr mem, uptr size, char *desc) {
+static void BenignRaceImpl(char *f, int l, uptr mem, uptr size, char *desc) {
Lock lock(&dyn_ann_ctx->mtx);
AddExpectRace(&dyn_ann_ctx->benign,
f, l, mem, size, desc);
DPrintf("Add benign race: %s addr=%zx %s:%d\n", desc, mem, f, l);
}
-// FIXME: Turn it off later. WTF is benign race?1?? Go talk to Hans Boehm.
void INTERFACE_ATTRIBUTE AnnotateBenignRaceSized(
char *f, int l, uptr mem, uptr size, char *desc) {
SCOPED_ANNOTATION(AnnotateBenignRaceSized);
@@ -378,7 +273,7 @@ void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsBegin(char *f, int l) {
void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsEnd(char *f, int l) {
SCOPED_ANNOTATION(AnnotateIgnoreReadsEnd);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
}
void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesBegin(char *f, int l) {
@@ -388,7 +283,7 @@ void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesBegin(char *f, int l) {
void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesEnd(char *f, int l) {
SCOPED_ANNOTATION(AnnotateIgnoreWritesEnd);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreEnd(thr);
}
void INTERFACE_ATTRIBUTE AnnotateIgnoreSyncBegin(char *f, int l) {
@@ -398,17 +293,15 @@ void INTERFACE_ATTRIBUTE AnnotateIgnoreSyncBegin(char *f, int l) {
void INTERFACE_ATTRIBUTE AnnotateIgnoreSyncEnd(char *f, int l) {
SCOPED_ANNOTATION(AnnotateIgnoreSyncEnd);
- ThreadIgnoreSyncEnd(thr, pc);
+ ThreadIgnoreSyncEnd(thr);
}
void INTERFACE_ATTRIBUTE AnnotatePublishMemoryRange(
char *f, int l, uptr addr, uptr size) {
- SCOPED_ANNOTATION(AnnotatePublishMemoryRange);
}
void INTERFACE_ATTRIBUTE AnnotateUnpublishMemoryRange(
char *f, int l, uptr addr, uptr size) {
- SCOPED_ANNOTATION(AnnotateUnpublishMemoryRange);
}
void INTERFACE_ATTRIBUTE AnnotateThreadName(
@@ -421,11 +314,9 @@ void INTERFACE_ATTRIBUTE AnnotateThreadName(
// WTFAnnotateHappensAfter(). Those are being used by Webkit to annotate
// atomic operations, which should be handled by ThreadSanitizer correctly.
void INTERFACE_ATTRIBUTE WTFAnnotateHappensBefore(char *f, int l, uptr addr) {
- SCOPED_ANNOTATION(AnnotateHappensBefore);
}
void INTERFACE_ATTRIBUTE WTFAnnotateHappensAfter(char *f, int l, uptr addr) {
- SCOPED_ANNOTATION(AnnotateHappensAfter);
}
void INTERFACE_ATTRIBUTE WTFAnnotateBenignRaceSized(
@@ -477,15 +368,15 @@ void __tsan_mutex_pre_lock(void *m, unsigned flagz) {
else
MutexPreLock(thr, pc, (uptr)m);
}
- ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
- ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+ ThreadIgnoreBegin(thr, 0);
+ ThreadIgnoreSyncBegin(thr, 0);
}
INTERFACE_ATTRIBUTE
void __tsan_mutex_post_lock(void *m, unsigned flagz, int rec) {
SCOPED_ANNOTATION(__tsan_mutex_post_lock);
- ThreadIgnoreSyncEnd(thr, pc);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreSyncEnd(thr);
+ ThreadIgnoreEnd(thr);
if (!(flagz & MutexFlagTryLockFailed)) {
if (flagz & MutexFlagReadLock)
MutexPostReadLock(thr, pc, (uptr)m, flagz);
@@ -504,44 +395,44 @@ int __tsan_mutex_pre_unlock(void *m, unsigned flagz) {
} else {
ret = MutexUnlock(thr, pc, (uptr)m, flagz);
}
- ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
- ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+ ThreadIgnoreBegin(thr, 0);
+ ThreadIgnoreSyncBegin(thr, 0);
return ret;
}
INTERFACE_ATTRIBUTE
void __tsan_mutex_post_unlock(void *m, unsigned flagz) {
SCOPED_ANNOTATION(__tsan_mutex_post_unlock);
- ThreadIgnoreSyncEnd(thr, pc);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreSyncEnd(thr);
+ ThreadIgnoreEnd(thr);
}
INTERFACE_ATTRIBUTE
void __tsan_mutex_pre_signal(void *addr, unsigned flagz) {
SCOPED_ANNOTATION(__tsan_mutex_pre_signal);
- ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
- ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+ ThreadIgnoreBegin(thr, 0);
+ ThreadIgnoreSyncBegin(thr, 0);
}
INTERFACE_ATTRIBUTE
void __tsan_mutex_post_signal(void *addr, unsigned flagz) {
SCOPED_ANNOTATION(__tsan_mutex_post_signal);
- ThreadIgnoreSyncEnd(thr, pc);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreSyncEnd(thr);
+ ThreadIgnoreEnd(thr);
}
INTERFACE_ATTRIBUTE
void __tsan_mutex_pre_divert(void *addr, unsigned flagz) {
SCOPED_ANNOTATION(__tsan_mutex_pre_divert);
// Exit from ignore region started in __tsan_mutex_pre_lock/unlock/signal.
- ThreadIgnoreSyncEnd(thr, pc);
- ThreadIgnoreEnd(thr, pc);
+ ThreadIgnoreSyncEnd(thr);
+ ThreadIgnoreEnd(thr);
}
INTERFACE_ATTRIBUTE
void __tsan_mutex_post_divert(void *addr, unsigned flagz) {
SCOPED_ANNOTATION(__tsan_mutex_post_divert);
- ThreadIgnoreBegin(thr, pc, /*save_stack=*/false);
- ThreadIgnoreSyncBegin(thr, pc, /*save_stack=*/false);
+ ThreadIgnoreBegin(thr, 0);
+ ThreadIgnoreSyncBegin(thr, 0);
}
} // extern "C"
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
index 89bb75394553..24ba3bb1f65d 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
@@ -32,6 +32,7 @@ using namespace __tsan;
static StaticSpinMutex mutex128;
#endif
+#if SANITIZER_DEBUG
static bool IsLoadOrder(morder mo) {
return mo == mo_relaxed || mo == mo_consume
|| mo == mo_acquire || mo == mo_seq_cst;
@@ -40,6 +41,7 @@ static bool IsLoadOrder(morder mo) {
static bool IsStoreOrder(morder mo) {
return mo == mo_relaxed || mo == mo_release || mo == mo_seq_cst;
}
+#endif
static bool IsReleaseOrder(morder mo) {
return mo == mo_release || mo == mo_acq_rel || mo == mo_seq_cst;
@@ -161,16 +163,16 @@ a128 func_cas(volatile a128 *v, a128 cmp, a128 xch) {
}
#endif
-template<typename T>
-static int SizeLog() {
+template <typename T>
+static int AccessSize() {
if (sizeof(T) <= 1)
- return kSizeLog1;
+ return 1;
else if (sizeof(T) <= 2)
- return kSizeLog2;
+ return 2;
else if (sizeof(T) <= 4)
- return kSizeLog4;
+ return 4;
else
- return kSizeLog8;
+ return 8;
// For 16-byte atomics we also use 8-byte memory access,
// this leads to false negatives only in very obscure cases.
}
@@ -202,7 +204,7 @@ static memory_order to_mo(morder mo) {
case mo_acq_rel: return memory_order_acq_rel;
case mo_seq_cst: return memory_order_seq_cst;
}
- CHECK(0);
+ DCHECK(0);
return memory_order_seq_cst;
}
@@ -219,27 +221,27 @@ static a128 NoTsanAtomicLoad(const volatile a128 *a, morder mo) {
#endif
template <typename T>
-static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a,
- morder mo) NO_THREAD_SAFETY_ANALYSIS {
- CHECK(IsLoadOrder(mo));
+static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
+ DCHECK(IsLoadOrder(mo));
// This fast-path is critical for performance.
// Assume the access is atomic.
if (!IsAcquireOrder(mo)) {
- MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
+ MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
+ kAccessRead | kAccessAtomic);
return NoTsanAtomicLoad(a, mo);
}
// Don't create sync object if it does not exist yet. For example, an atomic
// pointer is initialized to nullptr and then periodically acquire-loaded.
T v = NoTsanAtomicLoad(a, mo);
- SyncVar *s = ctx->metamap.GetIfExistsAndLock((uptr)a, false);
+ SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a);
if (s) {
+ ReadLock l(&s->mtx);
AcquireImpl(thr, pc, &s->clock);
// Re-read under sync mutex because we need a consistent snapshot
// of the value and the clock we acquire.
v = NoTsanAtomicLoad(a, mo);
- s->mtx.ReadUnlock();
}
- MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
+ MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessRead | kAccessAtomic);
return v;
}
@@ -257,9 +259,9 @@ static void NoTsanAtomicStore(volatile a128 *a, a128 v, morder mo) {
template <typename T>
static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
- morder mo) NO_THREAD_SAFETY_ANALYSIS {
- CHECK(IsStoreOrder(mo));
- MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
+ morder mo) {
+ DCHECK(IsStoreOrder(mo));
+ MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
// This fast-path is critical for performance.
// Assume the access is atomic.
// Strictly saying even relaxed store cuts off release sequence,
@@ -269,36 +271,32 @@ static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
return;
}
__sync_synchronize();
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ Lock l(&s->mtx);
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
ReleaseStoreImpl(thr, pc, &s->clock);
NoTsanAtomicStore(a, v, mo);
- s->mtx.Unlock();
}
template <typename T, T (*F)(volatile T *v, T op)>
-static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v,
- morder mo) NO_THREAD_SAFETY_ANALYSIS {
- MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
- SyncVar *s = 0;
- if (mo != mo_relaxed) {
- s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true);
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- if (IsAcqRelOrder(mo))
- AcquireReleaseImpl(thr, pc, &s->clock);
- else if (IsReleaseOrder(mo))
- ReleaseImpl(thr, pc, &s->clock);
- else if (IsAcquireOrder(mo))
- AcquireImpl(thr, pc, &s->clock);
- }
- v = F(a, v);
- if (s)
- s->mtx.Unlock();
- return v;
+static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
+ MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
+ if (LIKELY(mo == mo_relaxed))
+ return F(a, v);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ Lock l(&s->mtx);
+ thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+ if (IsAcqRelOrder(mo))
+ AcquireReleaseImpl(thr, pc, &s->clock);
+ else if (IsReleaseOrder(mo))
+ ReleaseImpl(thr, pc, &s->clock);
+ else if (IsAcquireOrder(mo))
+ AcquireImpl(thr, pc, &s->clock);
+ return F(a, v);
}
template<typename T>
@@ -402,20 +400,26 @@ static T NoTsanAtomicCAS(volatile T *a, T c, T v, morder mo, morder fmo) {
}
template <typename T>
-static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v, morder mo,
- morder fmo) NO_THREAD_SAFETY_ANALYSIS {
+static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v,
+ morder mo, morder fmo) {
// 31.7.2.18: "The failure argument shall not be memory_order_release
// nor memory_order_acq_rel". LLVM (2021-05) fallbacks to Monotonic
// (mo_relaxed) when those are used.
- CHECK(IsLoadOrder(fmo));
-
- MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
- SyncVar *s = 0;
- bool write_lock = IsReleaseOrder(mo);
-
- if (mo != mo_relaxed || fmo != mo_relaxed)
- s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, write_lock);
+ DCHECK(IsLoadOrder(fmo));
+
+ MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
+ if (LIKELY(mo == mo_relaxed && fmo == mo_relaxed)) {
+ T cc = *c;
+ T pr = func_cas(a, cc, v);
+ if (pr == cc)
+ return true;
+ *c = pr;
+ return false;
+ }
+ bool release = IsReleaseOrder(mo);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ RWLock l(&s->mtx, release);
T cc = *c;
T pr = func_cas(a, cc, v);
bool success = pr == cc;
@@ -423,25 +427,16 @@ static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v, morde
*c = pr;
mo = fmo;
}
+ thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- if (s) {
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
-
- if (success && IsAcqRelOrder(mo))
- AcquireReleaseImpl(thr, pc, &s->clock);
- else if (success && IsReleaseOrder(mo))
- ReleaseImpl(thr, pc, &s->clock);
- else if (IsAcquireOrder(mo))
- AcquireImpl(thr, pc, &s->clock);
-
- if (write_lock)
- s->mtx.Unlock();
- else
- s->mtx.ReadUnlock();
- }
-
+ if (success && IsAcqRelOrder(mo))
+ AcquireReleaseImpl(thr, pc, &s->clock);
+ else if (success && IsReleaseOrder(mo))
+ ReleaseImpl(thr, pc, &s->clock);
+ else if (IsAcquireOrder(mo))
+ AcquireImpl(thr, pc, &s->clock);
return success;
}
@@ -485,380 +480,356 @@ static morder convert_morder(morder mo) {
return (morder)(mo & 0x7fff);
}
-#define SCOPED_ATOMIC(func, ...) \
- ThreadState *const thr = cur_thread(); \
- if (UNLIKELY(thr->ignore_sync || thr->ignore_interceptors)) { \
- ProcessPendingSignals(thr); \
- return NoTsanAtomic##func(__VA_ARGS__); \
- } \
- const uptr callpc = (uptr)__builtin_return_address(0); \
- uptr pc = StackTrace::GetCurrentPc(); \
- mo = convert_morder(mo); \
- ScopedAtomic sa(thr, callpc, a, mo, __func__); \
- return Atomic##func(thr, pc, __VA_ARGS__); \
-/**/
-
-class ScopedAtomic {
- public:
- ScopedAtomic(ThreadState *thr, uptr pc, const volatile void *a,
- morder mo, const char *func)
- : thr_(thr) {
- FuncEntry(thr_, pc);
- DPrintf("#%d: %s(%p, %d)\n", thr_->tid, func, a, mo);
- }
- ~ScopedAtomic() {
- ProcessPendingSignals(thr_);
- FuncExit(thr_);
- }
- private:
- ThreadState *thr_;
-};
+# define ATOMIC_IMPL(func, ...) \
+ ThreadState *const thr = cur_thread(); \
+ ProcessPendingSignals(thr); \
+ if (UNLIKELY(thr->ignore_sync || thr->ignore_interceptors)) \
+ return NoTsanAtomic##func(__VA_ARGS__); \
+ mo = convert_morder(mo); \
+ return Atomic##func(thr, GET_CALLER_PC(), __VA_ARGS__);
extern "C" {
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_load(const volatile a8 *a, morder mo) {
- SCOPED_ATOMIC(Load, a, mo);
+ ATOMIC_IMPL(Load, a, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_load(const volatile a16 *a, morder mo) {
- SCOPED_ATOMIC(Load, a, mo);
+ ATOMIC_IMPL(Load, a, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_load(const volatile a32 *a, morder mo) {
- SCOPED_ATOMIC(Load, a, mo);
+ ATOMIC_IMPL(Load, a, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_load(const volatile a64 *a, morder mo) {
- SCOPED_ATOMIC(Load, a, mo);
+ ATOMIC_IMPL(Load, a, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_load(const volatile a128 *a, morder mo) {
- SCOPED_ATOMIC(Load, a, mo);
+ ATOMIC_IMPL(Load, a, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(Store, a, v, mo);
+ ATOMIC_IMPL(Store, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_atomic16_store(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(Store, a, v, mo);
+ ATOMIC_IMPL(Store, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_atomic32_store(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(Store, a, v, mo);
+ ATOMIC_IMPL(Store, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_atomic64_store(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(Store, a, v, mo);
+ ATOMIC_IMPL(Store, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_atomic128_store(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(Store, a, v, mo);
+ ATOMIC_IMPL(Store, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(Exchange, a, v, mo);
+ ATOMIC_IMPL(Exchange, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(Exchange, a, v, mo);
+ ATOMIC_IMPL(Exchange, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(Exchange, a, v, mo);
+ ATOMIC_IMPL(Exchange, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(Exchange, a, v, mo);
+ ATOMIC_IMPL(Exchange, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(Exchange, a, v, mo);
+ ATOMIC_IMPL(Exchange, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(FetchAdd, a, v, mo);
+ ATOMIC_IMPL(FetchAdd, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(FetchAdd, a, v, mo);
+ ATOMIC_IMPL(FetchAdd, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(FetchAdd, a, v, mo);
+ ATOMIC_IMPL(FetchAdd, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(FetchAdd, a, v, mo);
+ ATOMIC_IMPL(FetchAdd, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(FetchAdd, a, v, mo);
+ ATOMIC_IMPL(FetchAdd, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(FetchSub, a, v, mo);
+ ATOMIC_IMPL(FetchSub, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(FetchSub, a, v, mo);
+ ATOMIC_IMPL(FetchSub, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(FetchSub, a, v, mo);
+ ATOMIC_IMPL(FetchSub, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(FetchSub, a, v, mo);
+ ATOMIC_IMPL(FetchSub, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(FetchSub, a, v, mo);
+ ATOMIC_IMPL(FetchSub, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(FetchAnd, a, v, mo);
+ ATOMIC_IMPL(FetchAnd, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(FetchAnd, a, v, mo);
+ ATOMIC_IMPL(FetchAnd, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(FetchAnd, a, v, mo);
+ ATOMIC_IMPL(FetchAnd, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(FetchAnd, a, v, mo);
+ ATOMIC_IMPL(FetchAnd, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(FetchAnd, a, v, mo);
+ ATOMIC_IMPL(FetchAnd, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(FetchOr, a, v, mo);
+ ATOMIC_IMPL(FetchOr, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(FetchOr, a, v, mo);
+ ATOMIC_IMPL(FetchOr, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(FetchOr, a, v, mo);
+ ATOMIC_IMPL(FetchOr, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(FetchOr, a, v, mo);
+ ATOMIC_IMPL(FetchOr, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(FetchOr, a, v, mo);
+ ATOMIC_IMPL(FetchOr, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(FetchXor, a, v, mo);
+ ATOMIC_IMPL(FetchXor, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(FetchXor, a, v, mo);
+ ATOMIC_IMPL(FetchXor, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(FetchXor, a, v, mo);
+ ATOMIC_IMPL(FetchXor, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(FetchXor, a, v, mo);
+ ATOMIC_IMPL(FetchXor, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(FetchXor, a, v, mo);
+ ATOMIC_IMPL(FetchXor, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, morder mo) {
- SCOPED_ATOMIC(FetchNand, a, v, mo);
+ ATOMIC_IMPL(FetchNand, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, morder mo) {
- SCOPED_ATOMIC(FetchNand, a, v, mo);
+ ATOMIC_IMPL(FetchNand, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, morder mo) {
- SCOPED_ATOMIC(FetchNand, a, v, mo);
+ ATOMIC_IMPL(FetchNand, a, v, mo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, morder mo) {
- SCOPED_ATOMIC(FetchNand, a, v, mo);
+ ATOMIC_IMPL(FetchNand, a, v, mo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, morder mo) {
- SCOPED_ATOMIC(FetchNand, a, v, mo);
+ ATOMIC_IMPL(FetchNand, a, v, mo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic16_compare_exchange_strong(volatile a16 *a, a16 *c, a16 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic128_compare_exchange_strong(volatile a128 *a, a128 *c, a128 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic16_compare_exchange_weak(volatile a16 *a, a16 *c, a16 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic32_compare_exchange_weak(volatile a32 *a, a32 *c, a32 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic64_compare_exchange_weak(volatile a64 *a, a64 *c, a64 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
int __tsan_atomic128_compare_exchange_weak(volatile a128 *a, a128 *c, a128 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
SANITIZER_INTERFACE_ATTRIBUTE
a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
#if __TSAN_HAS_INT128
SANITIZER_INTERFACE_ATTRIBUTE
a128 __tsan_atomic128_compare_exchange_val(volatile a128 *a, a128 c, a128 v,
morder mo, morder fmo) {
- SCOPED_ATOMIC(CAS, a, c, v, mo, fmo);
+ ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
}
#endif
SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic_thread_fence(morder mo) {
- char* a = 0;
- SCOPED_ATOMIC(Fence, mo);
-}
+void __tsan_atomic_thread_fence(morder mo) { ATOMIC_IMPL(Fence, mo); }
SANITIZER_INTERFACE_ATTRIBUTE
void __tsan_atomic_signal_fence(morder mo) {
@@ -869,25 +840,23 @@ void __tsan_atomic_signal_fence(morder mo) {
// Go
-#define ATOMIC(func, ...) \
- if (thr->ignore_sync) { \
- NoTsanAtomic##func(__VA_ARGS__); \
- } else { \
- FuncEntry(thr, cpc); \
+# define ATOMIC(func, ...) \
+ if (thr->ignore_sync) { \
+ NoTsanAtomic##func(__VA_ARGS__); \
+ } else { \
+ FuncEntry(thr, cpc); \
Atomic##func(thr, pc, __VA_ARGS__); \
- FuncExit(thr); \
- } \
-/**/
-
-#define ATOMIC_RET(func, ret, ...) \
- if (thr->ignore_sync) { \
- (ret) = NoTsanAtomic##func(__VA_ARGS__); \
- } else { \
- FuncEntry(thr, cpc); \
+ FuncExit(thr); \
+ }
+
+# define ATOMIC_RET(func, ret, ...) \
+ if (thr->ignore_sync) { \
+ (ret) = NoTsanAtomic##func(__VA_ARGS__); \
+ } else { \
+ FuncEntry(thr, cpc); \
(ret) = Atomic##func(thr, pc, __VA_ARGS__); \
- FuncExit(thr); \
- } \
-/**/
+ FuncExit(thr); \
+ }
extern "C" {
SANITIZER_INTERFACE_ATTRIBUTE
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_inl.h b/compiler-rt/lib/tsan/rtl/tsan_interface_inl.h
deleted file mode 100644
index 5e77d4d3d288..000000000000
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_inl.h
+++ /dev/null
@@ -1,133 +0,0 @@
-//===-- tsan_interface_inl.h ------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-//===----------------------------------------------------------------------===//
-
-#include "tsan_interface.h"
-#include "tsan_rtl.h"
-#include "sanitizer_common/sanitizer_ptrauth.h"
-
-#define CALLERPC ((uptr)__builtin_return_address(0))
-
-using namespace __tsan;
-
-void __tsan_read1(void *addr) {
- MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog1);
-}
-
-void __tsan_read2(void *addr) {
- MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog2);
-}
-
-void __tsan_read4(void *addr) {
- MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog4);
-}
-
-void __tsan_read8(void *addr) {
- MemoryRead(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
-}
-
-void __tsan_write1(void *addr) {
- MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog1);
-}
-
-void __tsan_write2(void *addr) {
- MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog2);
-}
-
-void __tsan_write4(void *addr) {
- MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog4);
-}
-
-void __tsan_write8(void *addr) {
- MemoryWrite(cur_thread(), CALLERPC, (uptr)addr, kSizeLog8);
-}
-
-void __tsan_read1_pc(void *addr, void *pc) {
- MemoryRead(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog1);
-}
-
-void __tsan_read2_pc(void *addr, void *pc) {
- MemoryRead(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog2);
-}
-
-void __tsan_read4_pc(void *addr, void *pc) {
- MemoryRead(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog4);
-}
-
-void __tsan_read8_pc(void *addr, void *pc) {
- MemoryRead(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog8);
-}
-
-void __tsan_write1_pc(void *addr, void *pc) {
- MemoryWrite(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog1);
-}
-
-void __tsan_write2_pc(void *addr, void *pc) {
- MemoryWrite(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog2);
-}
-
-void __tsan_write4_pc(void *addr, void *pc) {
- MemoryWrite(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog4);
-}
-
-void __tsan_write8_pc(void *addr, void *pc) {
- MemoryWrite(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, kSizeLog8);
-}
-
-void __tsan_vptr_update(void **vptr_p, void *new_val) {
- CHECK_EQ(sizeof(vptr_p), 8);
- if (*vptr_p != new_val) {
- ThreadState *thr = cur_thread();
- thr->is_vptr_access = true;
- MemoryWrite(thr, CALLERPC, (uptr)vptr_p, kSizeLog8);
- thr->is_vptr_access = false;
- }
-}
-
-void __tsan_vptr_read(void **vptr_p) {
- CHECK_EQ(sizeof(vptr_p), 8);
- ThreadState *thr = cur_thread();
- thr->is_vptr_access = true;
- MemoryRead(thr, CALLERPC, (uptr)vptr_p, kSizeLog8);
- thr->is_vptr_access = false;
-}
-
-void __tsan_func_entry(void *pc) {
- FuncEntry(cur_thread(), STRIP_PAC_PC(pc));
-}
-
-void __tsan_func_exit() {
- FuncExit(cur_thread());
-}
-
-void __tsan_ignore_thread_begin() {
- ThreadIgnoreBegin(cur_thread(), CALLERPC);
-}
-
-void __tsan_ignore_thread_end() {
- ThreadIgnoreEnd(cur_thread(), CALLERPC);
-}
-
-void __tsan_read_range(void *addr, uptr size) {
- MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, false);
-}
-
-void __tsan_write_range(void *addr, uptr size) {
- MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, true);
-}
-
-void __tsan_read_range_pc(void *addr, uptr size, void *pc) {
- MemoryAccessRange(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, size, false);
-}
-
-void __tsan_write_range_pc(void *addr, uptr size, void *pc) {
- MemoryAccessRange(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, size, true);
-}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
index 6aa8a7b1d6a7..c090c1f08cbe 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
@@ -34,52 +34,49 @@ struct JavaContext {
}
};
-class ScopedJavaFunc {
- public:
- ScopedJavaFunc(ThreadState *thr, uptr pc)
- : thr_(thr) {
- Initialize(thr_);
- FuncEntry(thr, pc);
- }
-
- ~ScopedJavaFunc() {
- FuncExit(thr_);
- // FIXME(dvyukov): process pending signals.
- }
-
- private:
- ThreadState *thr_;
-};
-
static u64 jctx_buf[sizeof(JavaContext) / sizeof(u64) + 1];
static JavaContext *jctx;
+MBlock *JavaHeapBlock(uptr addr, uptr *start) {
+ if (!jctx || addr < jctx->heap_begin ||
+ addr >= jctx->heap_begin + jctx->heap_size)
+ return nullptr;
+ for (uptr p = RoundDown(addr, kMetaShadowCell); p >= jctx->heap_begin;
+ p -= kMetaShadowCell) {
+ MBlock *b = ctx->metamap.GetBlock(p);
+ if (!b)
+ continue;
+ if (p + b->siz <= addr)
+ return nullptr;
+ *start = p;
+ return b;
+ }
+ return nullptr;
+}
+
} // namespace __tsan
-#define SCOPED_JAVA_FUNC(func) \
+#define JAVA_FUNC_ENTER(func) \
ThreadState *thr = cur_thread(); \
- const uptr caller_pc = GET_CALLER_PC(); \
- const uptr pc = StackTrace::GetCurrentPc(); \
- (void)pc; \
- ScopedJavaFunc scoped(thr, caller_pc); \
-/**/
+ (void)thr;
void __tsan_java_init(jptr heap_begin, jptr heap_size) {
- SCOPED_JAVA_FUNC(__tsan_java_init);
- DPrintf("#%d: java_init(%p, %p)\n", thr->tid, heap_begin, heap_size);
- CHECK_EQ(jctx, 0);
- CHECK_GT(heap_begin, 0);
- CHECK_GT(heap_size, 0);
- CHECK_EQ(heap_begin % kHeapAlignment, 0);
- CHECK_EQ(heap_size % kHeapAlignment, 0);
- CHECK_LT(heap_begin, heap_begin + heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_init);
+ Initialize(thr);
+ DPrintf("#%d: java_init(0x%zx, 0x%zx)\n", thr->tid, heap_begin, heap_size);
+ DCHECK_EQ(jctx, 0);
+ DCHECK_GT(heap_begin, 0);
+ DCHECK_GT(heap_size, 0);
+ DCHECK_EQ(heap_begin % kHeapAlignment, 0);
+ DCHECK_EQ(heap_size % kHeapAlignment, 0);
+ DCHECK_LT(heap_begin, heap_begin + heap_size);
jctx = new(jctx_buf) JavaContext(heap_begin, heap_size);
}
int __tsan_java_fini() {
- SCOPED_JAVA_FUNC(__tsan_java_fini);
+ JAVA_FUNC_ENTER(__tsan_java_fini);
DPrintf("#%d: java_fini()\n", thr->tid);
- CHECK_NE(jctx, 0);
+ DCHECK_NE(jctx, 0);
// FIXME(dvyukov): this does not call atexit() callbacks.
int status = Finalize(thr);
DPrintf("#%d: java_fini() = %d\n", thr->tid, status);
@@ -87,74 +84,65 @@ int __tsan_java_fini() {
}
void __tsan_java_alloc(jptr ptr, jptr size) {
- SCOPED_JAVA_FUNC(__tsan_java_alloc);
- DPrintf("#%d: java_alloc(%p, %p)\n", thr->tid, ptr, size);
- CHECK_NE(jctx, 0);
- CHECK_NE(size, 0);
- CHECK_EQ(ptr % kHeapAlignment, 0);
- CHECK_EQ(size % kHeapAlignment, 0);
- CHECK_GE(ptr, jctx->heap_begin);
- CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
-
- OnUserAlloc(thr, pc, ptr, size, false);
+ JAVA_FUNC_ENTER(__tsan_java_alloc);
+ DPrintf("#%d: java_alloc(0x%zx, 0x%zx)\n", thr->tid, ptr, size);
+ DCHECK_NE(jctx, 0);
+ DCHECK_NE(size, 0);
+ DCHECK_EQ(ptr % kHeapAlignment, 0);
+ DCHECK_EQ(size % kHeapAlignment, 0);
+ DCHECK_GE(ptr, jctx->heap_begin);
+ DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
+
+ OnUserAlloc(thr, 0, ptr, size, false);
}
void __tsan_java_free(jptr ptr, jptr size) {
- SCOPED_JAVA_FUNC(__tsan_java_free);
- DPrintf("#%d: java_free(%p, %p)\n", thr->tid, ptr, size);
- CHECK_NE(jctx, 0);
- CHECK_NE(size, 0);
- CHECK_EQ(ptr % kHeapAlignment, 0);
- CHECK_EQ(size % kHeapAlignment, 0);
- CHECK_GE(ptr, jctx->heap_begin);
- CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_free);
+ DPrintf("#%d: java_free(0x%zx, 0x%zx)\n", thr->tid, ptr, size);
+ DCHECK_NE(jctx, 0);
+ DCHECK_NE(size, 0);
+ DCHECK_EQ(ptr % kHeapAlignment, 0);
+ DCHECK_EQ(size % kHeapAlignment, 0);
+ DCHECK_GE(ptr, jctx->heap_begin);
+ DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
ctx->metamap.FreeRange(thr->proc(), ptr, size);
}
void __tsan_java_move(jptr src, jptr dst, jptr size) {
- SCOPED_JAVA_FUNC(__tsan_java_move);
- DPrintf("#%d: java_move(%p, %p, %p)\n", thr->tid, src, dst, size);
- CHECK_NE(jctx, 0);
- CHECK_NE(size, 0);
- CHECK_EQ(src % kHeapAlignment, 0);
- CHECK_EQ(dst % kHeapAlignment, 0);
- CHECK_EQ(size % kHeapAlignment, 0);
- CHECK_GE(src, jctx->heap_begin);
- CHECK_LE(src + size, jctx->heap_begin + jctx->heap_size);
- CHECK_GE(dst, jctx->heap_begin);
- CHECK_LE(dst + size, jctx->heap_begin + jctx->heap_size);
- CHECK_NE(dst, src);
- CHECK_NE(size, 0);
+ JAVA_FUNC_ENTER(__tsan_java_move);
+ DPrintf("#%d: java_move(0x%zx, 0x%zx, 0x%zx)\n", thr->tid, src, dst, size);
+ DCHECK_NE(jctx, 0);
+ DCHECK_NE(size, 0);
+ DCHECK_EQ(src % kHeapAlignment, 0);
+ DCHECK_EQ(dst % kHeapAlignment, 0);
+ DCHECK_EQ(size % kHeapAlignment, 0);
+ DCHECK_GE(src, jctx->heap_begin);
+ DCHECK_LE(src + size, jctx->heap_begin + jctx->heap_size);
+ DCHECK_GE(dst, jctx->heap_begin);
+ DCHECK_LE(dst + size, jctx->heap_begin + jctx->heap_size);
+ DCHECK_NE(dst, src);
+ DCHECK_NE(size, 0);
// Assuming it's not running concurrently with threads that do
// memory accesses and mutex operations (stop-the-world phase).
ctx->metamap.MoveMemory(src, dst, size);
- // Move shadow.
- u64 *s = (u64*)MemToShadow(src);
- u64 *d = (u64*)MemToShadow(dst);
- u64 *send = (u64*)MemToShadow(src + size);
- uptr inc = 1;
- if (dst > src) {
- s = (u64*)MemToShadow(src + size) - 1;
- d = (u64*)MemToShadow(dst + size) - 1;
- send = (u64*)MemToShadow(src) - 1;
- inc = -1;
- }
- for (; s != send; s += inc, d += inc) {
- *d = *s;
- *s = 0;
- }
+ // Clear the destination shadow range.
+ // We used to move shadow from src to dst, but the trace format does not
+ // support that anymore as it contains addresses of accesses.
+ RawShadow *d = MemToShadow(dst);
+ RawShadow *dend = MemToShadow(dst + size);
+ internal_memset(d, 0, (dend - d) * sizeof(*d));
}
jptr __tsan_java_find(jptr *from_ptr, jptr to) {
- SCOPED_JAVA_FUNC(__tsan_java_find);
- DPrintf("#%d: java_find(&%p, %p)\n", *from_ptr, to);
- CHECK_EQ((*from_ptr) % kHeapAlignment, 0);
- CHECK_EQ(to % kHeapAlignment, 0);
- CHECK_GE(*from_ptr, jctx->heap_begin);
- CHECK_LE(to, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_find);
+ DPrintf("#%d: java_find(&0x%zx, 0x%zx)\n", thr->tid, *from_ptr, to);
+ DCHECK_EQ((*from_ptr) % kHeapAlignment, 0);
+ DCHECK_EQ(to % kHeapAlignment, 0);
+ DCHECK_GE(*from_ptr, jctx->heap_begin);
+ DCHECK_LE(to, jctx->heap_begin + jctx->heap_size);
for (uptr from = *from_ptr; from < to; from += kHeapAlignment) {
MBlock *b = ctx->metamap.GetBlock(from);
if (b) {
@@ -166,101 +154,105 @@ jptr __tsan_java_find(jptr *from_ptr, jptr to) {
}
void __tsan_java_finalize() {
- SCOPED_JAVA_FUNC(__tsan_java_finalize);
- DPrintf("#%d: java_mutex_finalize()\n", thr->tid);
- AcquireGlobal(thr, 0);
+ JAVA_FUNC_ENTER(__tsan_java_finalize);
+ DPrintf("#%d: java_finalize()\n", thr->tid);
+ AcquireGlobal(thr);
}
void __tsan_java_mutex_lock(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_mutex_lock);
- DPrintf("#%d: java_mutex_lock(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
-
- MutexPostLock(thr, pc, addr, MutexFlagLinkerInit | MutexFlagWriteReentrant |
- MutexFlagDoPreLockOnPostLock);
+ JAVA_FUNC_ENTER(__tsan_java_mutex_lock);
+ DPrintf("#%d: java_mutex_lock(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+ MutexPostLock(thr, 0, addr,
+ MutexFlagLinkerInit | MutexFlagWriteReentrant |
+ MutexFlagDoPreLockOnPostLock);
}
void __tsan_java_mutex_unlock(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_mutex_unlock);
- DPrintf("#%d: java_mutex_unlock(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_mutex_unlock);
+ DPrintf("#%d: java_mutex_unlock(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- MutexUnlock(thr, pc, addr);
+ MutexUnlock(thr, 0, addr);
}
void __tsan_java_mutex_read_lock(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_mutex_read_lock);
- DPrintf("#%d: java_mutex_read_lock(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
-
- MutexPostReadLock(thr, pc, addr, MutexFlagLinkerInit |
- MutexFlagWriteReentrant | MutexFlagDoPreLockOnPostLock);
+ JAVA_FUNC_ENTER(__tsan_java_mutex_read_lock);
+ DPrintf("#%d: java_mutex_read_lock(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+ MutexPostReadLock(thr, 0, addr,
+ MutexFlagLinkerInit | MutexFlagWriteReentrant |
+ MutexFlagDoPreLockOnPostLock);
}
void __tsan_java_mutex_read_unlock(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_mutex_read_unlock);
- DPrintf("#%d: java_mutex_read_unlock(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_mutex_read_unlock);
+ DPrintf("#%d: java_mutex_read_unlock(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- MutexReadUnlock(thr, pc, addr);
+ MutexReadUnlock(thr, 0, addr);
}
void __tsan_java_mutex_lock_rec(jptr addr, int rec) {
- SCOPED_JAVA_FUNC(__tsan_java_mutex_lock_rec);
- DPrintf("#%d: java_mutex_lock_rec(%p, %d)\n", thr->tid, addr, rec);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- CHECK_GT(rec, 0);
-
- MutexPostLock(thr, pc, addr, MutexFlagLinkerInit | MutexFlagWriteReentrant |
- MutexFlagDoPreLockOnPostLock | MutexFlagRecursiveLock, rec);
+ JAVA_FUNC_ENTER(__tsan_java_mutex_lock_rec);
+ DPrintf("#%d: java_mutex_lock_rec(0x%zx, %d)\n", thr->tid, addr, rec);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ DCHECK_GT(rec, 0);
+
+ MutexPostLock(thr, 0, addr,
+ MutexFlagLinkerInit | MutexFlagWriteReentrant |
+ MutexFlagDoPreLockOnPostLock | MutexFlagRecursiveLock,
+ rec);
}
int __tsan_java_mutex_unlock_rec(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_mutex_unlock_rec);
- DPrintf("#%d: java_mutex_unlock_rec(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_mutex_unlock_rec);
+ DPrintf("#%d: java_mutex_unlock_rec(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- return MutexUnlock(thr, pc, addr, MutexFlagRecursiveUnlock);
+ return MutexUnlock(thr, 0, addr, MutexFlagRecursiveUnlock);
}
void __tsan_java_acquire(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_acquire);
- DPrintf("#%d: java_acquire(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_acquire);
+ DPrintf("#%d: java_acquire(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- Acquire(thr, caller_pc, addr);
+ Acquire(thr, 0, addr);
}
void __tsan_java_release(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_release);
- DPrintf("#%d: java_release(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_release);
+ DPrintf("#%d: java_release(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- Release(thr, caller_pc, addr);
+ Release(thr, 0, addr);
}
void __tsan_java_release_store(jptr addr) {
- SCOPED_JAVA_FUNC(__tsan_java_release);
- DPrintf("#%d: java_release_store(%p)\n", thr->tid, addr);
- CHECK_NE(jctx, 0);
- CHECK_GE(addr, jctx->heap_begin);
- CHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+ JAVA_FUNC_ENTER(__tsan_java_release);
+ DPrintf("#%d: java_release_store(0x%zx)\n", thr->tid, addr);
+ DCHECK_NE(jctx, 0);
+ DCHECK_GE(addr, jctx->heap_begin);
+ DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
- ReleaseStore(thr, caller_pc, addr);
+ ReleaseStore(thr, 0, addr);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
index 7765bc070522..ef97ad0bc94e 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
@@ -148,7 +148,7 @@ static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
ObtainCurrentStack(thr, pc, &stack);
if (IsFiredSuppression(ctx, ReportTypeSignalUnsafe, stack))
return;
- ThreadRegistryLock l(ctx->thread_registry);
+ ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(ReportTypeSignalUnsafe);
rep.AddStack(stack, true);
OutputReport(thr, rep);
@@ -218,9 +218,9 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) {
}
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
- DPrintf("#%d: alloc(%zu) = %p\n", thr->tid, sz, p);
+ DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p);
ctx->metamap.AllocBlock(thr, pc, p, sz);
- if (write && thr->ignore_reads_and_writes == 0)
+ if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
else
MemoryResetRange(thr, pc, (uptr)p, sz);
@@ -229,8 +229,8 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
CHECK_NE(p, (void*)0);
uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
- DPrintf("#%d: free(%p, %zu)\n", thr->tid, p, sz);
- if (write && thr->ignore_reads_and_writes == 0)
+ DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz);
+ if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeFreed(thr, pc, (uptr)p, sz);
}
@@ -336,7 +336,7 @@ void invoke_free_hook(void *ptr) {
RunFreeHooks(ptr);
}
-void *internal_alloc(MBlockType typ, uptr sz) {
+void *Alloc(uptr sz) {
ThreadState *thr = cur_thread();
if (thr->nomalloc) {
thr->nomalloc = 0; // CHECK calls internal_malloc().
@@ -345,7 +345,7 @@ void *internal_alloc(MBlockType typ, uptr sz) {
return InternalAlloc(sz, &thr->proc()->internal_alloc_cache);
}
-void internal_free(void *p) {
+void FreeImpl(void *p) {
ThreadState *thr = cur_thread();
if (thr->nomalloc) {
thr->nomalloc = 0; // CHECK calls internal_malloc().
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.h b/compiler-rt/lib/tsan/rtl/tsan_mman.h
index a5280d4472c9..efea5e5abdec 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.h
@@ -47,42 +47,29 @@ uptr user_alloc_usable_size(const void *p);
void invoke_malloc_hook(void *ptr, uptr size);
void invoke_free_hook(void *ptr);
-enum MBlockType {
- MBlockScopedBuf,
- MBlockString,
- MBlockStackTrace,
- MBlockShadowStack,
- MBlockSync,
- MBlockClock,
- MBlockThreadContex,
- MBlockDeadInfo,
- MBlockRacyStacks,
- MBlockRacyAddresses,
- MBlockAtExit,
- MBlockFlag,
- MBlockReport,
- MBlockReportMop,
- MBlockReportThread,
- MBlockReportMutex,
- MBlockReportLoc,
- MBlockReportStack,
- MBlockSuppression,
- MBlockExpectRace,
- MBlockSignal,
- MBlockJmpBuf,
+// For internal data structures.
+void *Alloc(uptr sz);
+void FreeImpl(void *p);
- // This must be the last.
- MBlockTypeCount
-};
+template <typename T, typename... Args>
+T *New(Args &&...args) {
+ return new (Alloc(sizeof(T))) T(static_cast<Args &&>(args)...);
+}
-// For internal data structures.
-void *internal_alloc(MBlockType typ, uptr sz);
-void internal_free(void *p);
+template <typename T>
+void Free(T *&p) {
+ if (p == nullptr)
+ return;
+ FreeImpl(p);
+ p = nullptr;
+}
template <typename T>
-void DestroyAndFree(T *p) {
+void DestroyAndFree(T *&p) {
+ if (p == nullptr)
+ return;
p->~T();
- internal_free(p);
+ Free(p);
}
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
index 813fa3bca936..735179686ba9 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
@@ -10,15 +10,13 @@
//
//===----------------------------------------------------------------------===//
#include "tsan_mutexset.h"
+
+#include "sanitizer_common/sanitizer_placement_new.h"
#include "tsan_rtl.h"
namespace __tsan {
-const uptr MutexSet::kMaxSize;
-
MutexSet::MutexSet() {
- size_ = 0;
- internal_memset(&descs_, 0, sizeof(descs_));
}
void MutexSet::Add(u64 id, bool write, u64 epoch) {
@@ -44,9 +42,12 @@ void MutexSet::Add(u64 id, bool write, u64 epoch) {
CHECK_EQ(size_, kMaxSize - 1);
}
// Add new mutex descriptor.
+ descs_[size_].addr = 0;
+ descs_[size_].stack_id = kInvalidStackID;
descs_[size_].id = id;
descs_[size_].write = write;
descs_[size_].epoch = epoch;
+ descs_[size_].seq = seq_++;
descs_[size_].count = 1;
size_++;
}
@@ -70,6 +71,46 @@ void MutexSet::Remove(u64 id) {
}
}
+void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
+ // Look up existing mutex with the same id.
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].addr == addr) {
+ descs_[i].count++;
+ descs_[i].seq = seq_++;
+ return;
+ }
+ }
+ // On overflow, find the oldest mutex and drop it.
+ if (size_ == kMaxSize) {
+ uptr min = 0;
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].seq < descs_[min].seq)
+ min = i;
+ }
+ RemovePos(min);
+ CHECK_EQ(size_, kMaxSize - 1);
+ }
+ // Add new mutex descriptor.
+ descs_[size_].addr = addr;
+ descs_[size_].stack_id = stack_id;
+ descs_[size_].id = 0;
+ descs_[size_].write = write;
+ descs_[size_].epoch = 0;
+ descs_[size_].seq = seq_++;
+ descs_[size_].count = 1;
+ size_++;
+}
+
+void MutexSet::DelAddr(uptr addr, bool destroy) {
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].addr == addr) {
+ if (destroy || --descs_[i].count == 0)
+ RemovePos(i);
+ return;
+ }
+ }
+}
+
void MutexSet::RemovePos(uptr i) {
CHECK_LT(i, size_);
descs_[i] = descs_[size_ - 1];
@@ -85,4 +126,7 @@ MutexSet::Desc MutexSet::Get(uptr i) const {
return descs_[i];
}
+DynamicMutexSet::DynamicMutexSet() : ptr_(New<MutexSet>()) {}
+DynamicMutexSet::~DynamicMutexSet() { DestroyAndFree(ptr_); }
+
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h
index d63881f40290..93776a664135 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h
@@ -21,12 +21,22 @@ class MutexSet {
public:
// Holds limited number of mutexes.
// The oldest mutexes are discarded on overflow.
- static const uptr kMaxSize = 16;
+ static constexpr uptr kMaxSize = 16;
struct Desc {
+ uptr addr;
+ StackID stack_id;
u64 id;
u64 epoch;
- int count;
+ u32 seq;
+ u32 count;
bool write;
+
+ Desc() { internal_memset(this, 0, sizeof(*this)); }
+ Desc(const Desc& other) { *this = other; }
+ Desc& operator=(const MutexSet::Desc& other) {
+ internal_memcpy(this, &other, sizeof(*this));
+ return *this;
+ }
};
MutexSet();
@@ -34,21 +44,37 @@ class MutexSet {
void Add(u64 id, bool write, u64 epoch);
void Del(u64 id, bool write);
void Remove(u64 id); // Removes the mutex completely (if it's destroyed).
+ void AddAddr(uptr addr, StackID stack_id, bool write);
+ void DelAddr(uptr addr, bool destroy = false);
uptr Size() const;
Desc Get(uptr i) const;
- void operator=(const MutexSet &other) {
- internal_memcpy(this, &other, sizeof(*this));
- }
-
private:
#if !SANITIZER_GO
- uptr size_;
+ u32 seq_ = 0;
+ uptr size_ = 0;
Desc descs_[kMaxSize];
-#endif
void RemovePos(uptr i);
- MutexSet(const MutexSet&);
+#endif
+};
+
+// MutexSet is too large to live on stack.
+// DynamicMutexSet can be use used to create local MutexSet's.
+class DynamicMutexSet {
+ public:
+ DynamicMutexSet();
+ ~DynamicMutexSet();
+ MutexSet* operator->() { return ptr_; }
+ operator MutexSet*() { return ptr_; }
+ DynamicMutexSet(const DynamicMutexSet&) = delete;
+ DynamicMutexSet& operator=(const DynamicMutexSet&) = delete;
+
+ private:
+ MutexSet* ptr_;
+#if SANITIZER_GO
+ MutexSet set_;
+#endif
};
// Go does not have mutexes, so do not spend memory and time.
@@ -59,9 +85,12 @@ MutexSet::MutexSet() {}
void MutexSet::Add(u64 id, bool write, u64 epoch) {}
void MutexSet::Del(u64 id, bool write) {}
void MutexSet::Remove(u64 id) {}
-void MutexSet::RemovePos(uptr i) {}
+void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {}
+void MutexSet::DelAddr(uptr addr, bool destroy) {}
uptr MutexSet::Size() const { return 0; }
MutexSet::Desc MutexSet::Get(uptr i) const { return Desc(); }
+DynamicMutexSet::DynamicMutexSet() : ptr_(&set_) {}
+DynamicMutexSet::~DynamicMutexSet() {}
#endif
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h
index 8bd218e25fd6..7ff0acace8f6 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h
@@ -23,21 +23,19 @@
namespace __tsan {
-#if defined(__x86_64__)
-#define HAS_48_BIT_ADDRESS_SPACE 1
-#elif SANITIZER_IOSSIM // arm64 iOS simulators (order of #if matters)
-#define HAS_48_BIT_ADDRESS_SPACE 1
-#elif SANITIZER_IOS // arm64 iOS devices (order of #if matters)
-#define HAS_48_BIT_ADDRESS_SPACE 0
-#elif SANITIZER_MAC // arm64 macOS (order of #if matters)
-#define HAS_48_BIT_ADDRESS_SPACE 1
-#else
-#define HAS_48_BIT_ADDRESS_SPACE 0
-#endif
-
-#if !SANITIZER_GO
+enum {
+ // App memory is not mapped onto shadow memory range.
+ kBrokenMapping = 1 << 0,
+ // Mapping app memory and back does not produce the same address,
+ // this can lead to wrong addresses in reports and potentially
+ // other bad consequences.
+ kBrokenReverseMapping = 1 << 1,
+ // Mapping is non-linear for linear user range.
+ // This is bad and can lead to unpredictable memory corruptions, etc
+ // because range access functions assume linearity.
+ kBrokenLinearity = 1 << 2,
+};
-#if HAS_48_BIT_ADDRESS_SPACE
/*
C/C++ on linux/x86_64 and freebsd/x86_64
0000 0000 1000 - 0080 0000 0000: main binary and/or MAP_32BIT mappings (512GB)
@@ -65,9 +63,8 @@ C/C++ on netbsd/amd64 can reuse the same mapping:
* Stack on NetBSD/amd64 has prereserved 128MB.
* Heap grows downwards (top-down).
* ASLR must be disabled per-process or globally.
-
*/
-struct Mapping {
+struct Mapping48AddressSpace {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x340000000000ull;
static const uptr kTraceMemBeg = 0x600000000000ull;
@@ -82,13 +79,12 @@ struct Mapping {
static const uptr kMidAppMemEnd = 0x568000000000ull;
static const uptr kHiAppMemBeg = 0x7e8000000000ull;
static const uptr kHiAppMemEnd = 0x800000000000ull;
- static const uptr kAppMemMsk = 0x780000000000ull;
- static const uptr kAppMemXor = 0x040000000000ull;
+ static const uptr kShadowMsk = 0x780000000000ull;
+ static const uptr kShadowXor = 0x040000000000ull;
+ static const uptr kShadowAdd = 0x000000000000ull;
static const uptr kVdsoBeg = 0xf000000000000000ull;
};
-#define TSAN_MID_APP_RANGE 1
-#elif defined(__mips64)
/*
C/C++ on linux/mips64 (40-bit VMA)
0000 0000 00 - 0100 0000 00: - (4 GB)
@@ -105,7 +101,7 @@ fe00 0000 00 - ff00 0000 00: heap (4 GB)
ff00 0000 00 - ff80 0000 00: - (2 GB)
ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB)
*/
-struct Mapping40 {
+struct MappingMips64_40 {
static const uptr kMetaShadowBeg = 0x4000000000ull;
static const uptr kMetaShadowEnd = 0x5000000000ull;
static const uptr kTraceMemBeg = 0xb000000000ull;
@@ -120,14 +116,12 @@ struct Mapping40 {
static const uptr kMidAppMemEnd = 0xab00000000ull;
static const uptr kHiAppMemBeg = 0xff80000000ull;
static const uptr kHiAppMemEnd = 0xffffffffffull;
- static const uptr kAppMemMsk = 0xf800000000ull;
- static const uptr kAppMemXor = 0x0800000000ull;
+ static const uptr kShadowMsk = 0xf800000000ull;
+ static const uptr kShadowXor = 0x0800000000ull;
+ static const uptr kShadowAdd = 0x0000000000ull;
static const uptr kVdsoBeg = 0xfffff00000ull;
};
-#define TSAN_MID_APP_RANGE 1
-#define TSAN_RUNTIME_VMA 1
-#elif defined(__aarch64__) && defined(__APPLE__)
/*
C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM)
0000 0000 00 - 0100 0000 00: - (4 GB)
@@ -141,7 +135,7 @@ C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM)
0f00 0000 00 - 0fc0 0000 00: traces (3 GB)
0fc0 0000 00 - 1000 0000 00: -
*/
-struct Mapping {
+struct MappingAppleAarch64 {
static const uptr kLoAppMemBeg = 0x0100000000ull;
static const uptr kLoAppMemEnd = 0x0200000000ull;
static const uptr kHeapMemBeg = 0x0200000000ull;
@@ -154,18 +148,14 @@ struct Mapping {
static const uptr kTraceMemEnd = 0x0fc0000000ull;
static const uptr kHiAppMemBeg = 0x0fc0000000ull;
static const uptr kHiAppMemEnd = 0x0fc0000000ull;
- static const uptr kAppMemMsk = 0x0ull;
- static const uptr kAppMemXor = 0x0ull;
+ static const uptr kShadowMsk = 0x0ull;
+ static const uptr kShadowXor = 0x0ull;
+ static const uptr kShadowAdd = 0x0ull;
static const uptr kVdsoBeg = 0x7000000000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
};
-#elif defined(__aarch64__) && !defined(__APPLE__)
-// AArch64 supports multiple VMA which leads to multiple address transformation
-// functions. To support these multiple VMAS transformations and mappings TSAN
-// runtime for AArch64 uses an external memory read (vmaSize) to select which
-// mapping to use. Although slower, it make a same instrumented binary run on
-// multiple kernels.
-
/*
C/C++ on linux/aarch64 (39-bit VMA)
0000 0010 00 - 0100 0000 00: main binary
@@ -181,7 +171,7 @@ C/C++ on linux/aarch64 (39-bit VMA)
7c00 0000 00 - 7d00 0000 00: heap
7d00 0000 00 - 7fff ffff ff: modules and main thread stack
*/
-struct Mapping39 {
+struct MappingAarch64_39 {
static const uptr kLoAppMemBeg = 0x0000001000ull;
static const uptr kLoAppMemEnd = 0x0100000000ull;
static const uptr kShadowBeg = 0x0800000000ull;
@@ -196,8 +186,9 @@ struct Mapping39 {
static const uptr kHeapMemEnd = 0x7d00000000ull;
static const uptr kHiAppMemBeg = 0x7e00000000ull;
static const uptr kHiAppMemEnd = 0x7fffffffffull;
- static const uptr kAppMemMsk = 0x7800000000ull;
- static const uptr kAppMemXor = 0x0200000000ull;
+ static const uptr kShadowMsk = 0x7800000000ull;
+ static const uptr kShadowXor = 0x0200000000ull;
+ static const uptr kShadowAdd = 0x0000000000ull;
static const uptr kVdsoBeg = 0x7f00000000ull;
};
@@ -216,7 +207,8 @@ C/C++ on linux/aarch64 (42-bit VMA)
3e000 0000 00 - 3f000 0000 00: heap
3f000 0000 00 - 3ffff ffff ff: modules and main thread stack
*/
-struct Mapping42 {
+struct MappingAarch64_42 {
+ static const uptr kBroken = kBrokenReverseMapping;
static const uptr kLoAppMemBeg = 0x00000001000ull;
static const uptr kLoAppMemEnd = 0x01000000000ull;
static const uptr kShadowBeg = 0x10000000000ull;
@@ -231,12 +223,13 @@ struct Mapping42 {
static const uptr kHeapMemEnd = 0x3f000000000ull;
static const uptr kHiAppMemBeg = 0x3f000000000ull;
static const uptr kHiAppMemEnd = 0x3ffffffffffull;
- static const uptr kAppMemMsk = 0x3c000000000ull;
- static const uptr kAppMemXor = 0x04000000000ull;
+ static const uptr kShadowMsk = 0x3c000000000ull;
+ static const uptr kShadowXor = 0x04000000000ull;
+ static const uptr kShadowAdd = 0x00000000000ull;
static const uptr kVdsoBeg = 0x37f00000000ull;
};
-struct Mapping48 {
+struct MappingAarch64_48 {
static const uptr kLoAppMemBeg = 0x0000000001000ull;
static const uptr kLoAppMemEnd = 0x0000200000000ull;
static const uptr kShadowBeg = 0x0002000000000ull;
@@ -251,22 +244,12 @@ struct Mapping48 {
static const uptr kHeapMemEnd = 0x0ffff00000000ull;
static const uptr kHiAppMemBeg = 0x0ffff00000000ull;
static const uptr kHiAppMemEnd = 0x1000000000000ull;
- static const uptr kAppMemMsk = 0x0fff800000000ull;
- static const uptr kAppMemXor = 0x0000800000000ull;
+ static const uptr kShadowMsk = 0x0fff800000000ull;
+ static const uptr kShadowXor = 0x0000800000000ull;
+ static const uptr kShadowAdd = 0x0000000000000ull;
static const uptr kVdsoBeg = 0xffff000000000ull;
};
-// Indicates the runtime will define the memory regions at runtime.
-#define TSAN_RUNTIME_VMA 1
-// Indicates that mapping defines a mid range memory segment.
-#define TSAN_MID_APP_RANGE 1
-#elif defined(__powerpc64__)
-// PPC64 supports multiple VMA which leads to multiple address transformation
-// functions. To support these multiple VMAS transformations and mappings TSAN
-// runtime for PPC64 uses an external memory read (vmaSize) to select which
-// mapping to use. Although slower, it make a same instrumented binary run on
-// multiple kernels.
-
/*
C/C++ on linux/powerpc64 (44-bit VMA)
0000 0000 0100 - 0001 0000 0000: main binary
@@ -281,7 +264,9 @@ C/C++ on linux/powerpc64 (44-bit VMA)
0f50 0000 0000 - 0f60 0000 0000: -
0f60 0000 0000 - 1000 0000 0000: modules and main thread stack
*/
-struct Mapping44 {
+struct MappingPPC64_44 {
+ static const uptr kBroken =
+ kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity;
static const uptr kMetaShadowBeg = 0x0b0000000000ull;
static const uptr kMetaShadowEnd = 0x0d0000000000ull;
static const uptr kTraceMemBeg = 0x0d0000000000ull;
@@ -294,9 +279,12 @@ struct Mapping44 {
static const uptr kHeapMemEnd = 0x0f5000000000ull;
static const uptr kHiAppMemBeg = 0x0f6000000000ull;
static const uptr kHiAppMemEnd = 0x100000000000ull; // 44 bits
- static const uptr kAppMemMsk = 0x0f0000000000ull;
- static const uptr kAppMemXor = 0x002100000000ull;
+ static const uptr kShadowMsk = 0x0f0000000000ull;
+ static const uptr kShadowXor = 0x002100000000ull;
+ static const uptr kShadowAdd = 0x000000000000ull;
static const uptr kVdsoBeg = 0x3c0000000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
};
/*
@@ -313,7 +301,7 @@ C/C++ on linux/powerpc64 (46-bit VMA)
3e00 0000 0000 - 3e80 0000 0000: -
3e80 0000 0000 - 4000 0000 0000: modules and main thread stack
*/
-struct Mapping46 {
+struct MappingPPC64_46 {
static const uptr kMetaShadowBeg = 0x100000000000ull;
static const uptr kMetaShadowEnd = 0x200000000000ull;
static const uptr kTraceMemBeg = 0x200000000000ull;
@@ -326,9 +314,12 @@ struct Mapping46 {
static const uptr kLoAppMemEnd = 0x010000000000ull;
static const uptr kHiAppMemBeg = 0x3e8000000000ull;
static const uptr kHiAppMemEnd = 0x400000000000ull; // 46 bits
- static const uptr kAppMemMsk = 0x3c0000000000ull;
- static const uptr kAppMemXor = 0x020000000000ull;
+ static const uptr kShadowMsk = 0x3c0000000000ull;
+ static const uptr kShadowXor = 0x020000000000ull;
+ static const uptr kShadowAdd = 0x000000000000ull;
static const uptr kVdsoBeg = 0x7800000000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
};
/*
@@ -345,7 +336,7 @@ C/C++ on linux/powerpc64 (47-bit VMA)
7e00 0000 0000 - 7e80 0000 0000: -
7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
*/
-struct Mapping47 {
+struct MappingPPC64_47 {
static const uptr kMetaShadowBeg = 0x100000000000ull;
static const uptr kMetaShadowEnd = 0x200000000000ull;
static const uptr kTraceMemBeg = 0x200000000000ull;
@@ -358,14 +349,14 @@ struct Mapping47 {
static const uptr kLoAppMemEnd = 0x010000000000ull;
static const uptr kHiAppMemBeg = 0x7e8000000000ull;
static const uptr kHiAppMemEnd = 0x800000000000ull; // 47 bits
- static const uptr kAppMemMsk = 0x7c0000000000ull;
- static const uptr kAppMemXor = 0x020000000000ull;
+ static const uptr kShadowMsk = 0x7c0000000000ull;
+ static const uptr kShadowXor = 0x020000000000ull;
+ static const uptr kShadowAdd = 0x000000000000ull;
static const uptr kVdsoBeg = 0x7800000000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
};
-// Indicates the runtime will define the memory regions at runtime.
-#define TSAN_RUNTIME_VMA 1
-#elif defined(__s390x__)
/*
C/C++ on linux/s390x
While the kernel provides a 64-bit address space, we have to restrict ourselves
@@ -380,7 +371,7 @@ a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
b000 0000 0000 - be00 0000 0000: -
be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator)
*/
-struct Mapping {
+struct MappingS390x {
static const uptr kMetaShadowBeg = 0x900000000000ull;
static const uptr kMetaShadowEnd = 0x980000000000ull;
static const uptr kTraceMemBeg = 0xa00000000000ull;
@@ -393,13 +384,13 @@ struct Mapping {
static const uptr kLoAppMemEnd = 0x0e0000000000ull;
static const uptr kHiAppMemBeg = 0xc00000004000ull;
static const uptr kHiAppMemEnd = 0xc00000004000ull;
- static const uptr kAppMemMsk = 0xb00000000000ull;
- static const uptr kAppMemXor = 0x100000000000ull;
+ static const uptr kShadowMsk = 0xb00000000000ull;
+ static const uptr kShadowXor = 0x100000000000ull;
+ static const uptr kShadowAdd = 0x000000000000ull;
static const uptr kVdsoBeg = 0xfffffffff000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
};
-#endif
-
-#elif SANITIZER_GO && !SANITIZER_WINDOWS && HAS_48_BIT_ADDRESS_SPACE
/* Go on linux, darwin and freebsd on x86_64
0000 0000 1000 - 0000 1000 0000: executable
@@ -414,46 +405,59 @@ struct Mapping {
6200 0000 0000 - 8000 0000 0000: -
*/
-struct Mapping {
+struct MappingGo48 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
static const uptr kTraceMemBeg = 0x600000000000ull;
static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x238000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x00e000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x00e000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x200000000000ull;
};
-#elif SANITIZER_GO && SANITIZER_WINDOWS
-
/* Go on windows
0000 0000 1000 - 0000 1000 0000: executable
0000 1000 0000 - 00f8 0000 0000: -
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 0100 0000 0000: -
0100 0000 0000 - 0500 0000 0000: shadow
-0500 0000 0000 - 0560 0000 0000: -
-0560 0000 0000 - 0760 0000 0000: traces
-0760 0000 0000 - 07d0 0000 0000: metainfo (memory blocks and sync objects)
+0500 0000 0000 - 0700 0000 0000: traces
+0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects)
07d0 0000 0000 - 8000 0000 0000: -
*/
-struct Mapping {
- static const uptr kMetaShadowBeg = 0x076000000000ull;
- static const uptr kMetaShadowEnd = 0x07d000000000ull;
- static const uptr kTraceMemBeg = 0x056000000000ull;
- static const uptr kTraceMemEnd = 0x076000000000ull;
+struct MappingGoWindows {
+ static const uptr kMetaShadowBeg = 0x070000000000ull;
+ static const uptr kMetaShadowEnd = 0x077000000000ull;
+ static const uptr kTraceMemBeg = 0x050000000000ull;
+ static const uptr kTraceMemEnd = 0x070000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
static const uptr kShadowEnd = 0x050000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x00e000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x00e000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x010000000000ull;
};
-#elif SANITIZER_GO && defined(__powerpc64__)
-
-/* Only Mapping46 and Mapping47 are currently supported for powercp64 on Go. */
-
/* Go on linux/powerpc64 (46-bit VMA)
0000 0000 1000 - 0000 1000 0000: executable
0000 1000 0000 - 00c0 0000 0000: -
@@ -467,15 +471,25 @@ struct Mapping {
3800 0000 0000 - 4000 0000 0000: -
*/
-struct Mapping46 {
+struct MappingGoPPC64_46 {
static const uptr kMetaShadowBeg = 0x240000000000ull;
static const uptr kMetaShadowEnd = 0x340000000000ull;
static const uptr kTraceMemBeg = 0x360000000000ull;
static const uptr kTraceMemEnd = 0x380000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x238000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x00e000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x00e000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x200000000000ull;
};
/* Go on linux/powerpc64 (47-bit VMA)
@@ -491,21 +505,27 @@ struct Mapping46 {
6200 0000 0000 - 8000 0000 0000: -
*/
-struct Mapping47 {
+struct MappingGoPPC64_47 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
static const uptr kTraceMemBeg = 0x600000000000ull;
static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x300000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x00e000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x00e000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x200000000000ull;
};
-#define TSAN_RUNTIME_VMA 1
-
-#elif SANITIZER_GO && defined(__aarch64__)
-
/* Go on linux/aarch64 (48-bit VMA) and darwin/aarch64 (47-bit VMA)
0000 0000 1000 - 0000 1000 0000: executable
0000 1000 0000 - 00c0 0000 0000: -
@@ -518,22 +538,27 @@ struct Mapping47 {
6000 0000 0000 - 6200 0000 0000: traces
6200 0000 0000 - 8000 0000 0000: -
*/
-
-struct Mapping {
+struct MappingGoAarch64 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
static const uptr kTraceMemBeg = 0x600000000000ull;
static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x300000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x00e000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x00e000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x200000000000ull;
};
-// Indicates the runtime will define the memory regions at runtime.
-#define TSAN_RUNTIME_VMA 1
-
-#elif SANITIZER_GO && defined(__mips64)
/*
Go on linux/mips64 (47-bit VMA)
0000 0000 1000 - 0000 1000 0000: executable
@@ -547,20 +572,27 @@ Go on linux/mips64 (47-bit VMA)
6000 0000 0000 - 6200 0000 0000: traces
6200 0000 0000 - 8000 0000 0000: -
*/
-struct Mapping47 {
+struct MappingGoMips64_47 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
static const uptr kTraceMemBeg = 0x600000000000ull;
static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x300000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x00e000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x00e000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x200000000000ull;
};
-#define TSAN_RUNTIME_VMA 1
-
-#elif SANITIZER_GO && defined(__s390x__)
/*
Go on linux/s390x
0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB
@@ -571,622 +603,367 @@ Go on linux/s390x
9800 0000 0000 - a000 0000 0000: -
a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
*/
-struct Mapping {
+struct MappingGoS390x {
static const uptr kMetaShadowBeg = 0x900000000000ull;
static const uptr kMetaShadowEnd = 0x980000000000ull;
static const uptr kTraceMemBeg = 0xa00000000000ull;
static const uptr kTraceMemEnd = 0xb00000000000ull;
static const uptr kShadowBeg = 0x400000000000ull;
static const uptr kShadowEnd = 0x800000000000ull;
- static const uptr kAppMemBeg = 0x000000001000ull;
- static const uptr kAppMemEnd = 0x100000000000ull;
+ static const uptr kLoAppMemBeg = 0x000000001000ull;
+ static const uptr kLoAppMemEnd = 0x100000000000ull;
+ static const uptr kMidAppMemBeg = 0;
+ static const uptr kMidAppMemEnd = 0;
+ static const uptr kHiAppMemBeg = 0;
+ static const uptr kHiAppMemEnd = 0;
+ static const uptr kHeapMemBeg = 0;
+ static const uptr kHeapMemEnd = 0;
+ static const uptr kVdsoBeg = 0;
+ static const uptr kShadowMsk = 0;
+ static const uptr kShadowXor = 0;
+ static const uptr kShadowAdd = 0x400000000000ull;
};
-#else
-# error "Unknown platform"
-#endif
-
-
-#ifdef TSAN_RUNTIME_VMA
extern uptr vmaSize;
-#endif
-
-
-enum MappingType {
- MAPPING_LO_APP_BEG,
- MAPPING_LO_APP_END,
- MAPPING_HI_APP_BEG,
- MAPPING_HI_APP_END,
-#ifdef TSAN_MID_APP_RANGE
- MAPPING_MID_APP_BEG,
- MAPPING_MID_APP_END,
-#endif
- MAPPING_HEAP_BEG,
- MAPPING_HEAP_END,
- MAPPING_APP_BEG,
- MAPPING_APP_END,
- MAPPING_SHADOW_BEG,
- MAPPING_SHADOW_END,
- MAPPING_META_SHADOW_BEG,
- MAPPING_META_SHADOW_END,
- MAPPING_TRACE_BEG,
- MAPPING_TRACE_END,
- MAPPING_VDSO_BEG,
-};
-
-template<typename Mapping, int Type>
-uptr MappingImpl(void) {
- switch (Type) {
-#if !SANITIZER_GO
- case MAPPING_LO_APP_BEG: return Mapping::kLoAppMemBeg;
- case MAPPING_LO_APP_END: return Mapping::kLoAppMemEnd;
-# ifdef TSAN_MID_APP_RANGE
- case MAPPING_MID_APP_BEG: return Mapping::kMidAppMemBeg;
- case MAPPING_MID_APP_END: return Mapping::kMidAppMemEnd;
-# endif
- case MAPPING_HI_APP_BEG: return Mapping::kHiAppMemBeg;
- case MAPPING_HI_APP_END: return Mapping::kHiAppMemEnd;
- case MAPPING_HEAP_BEG: return Mapping::kHeapMemBeg;
- case MAPPING_HEAP_END: return Mapping::kHeapMemEnd;
- case MAPPING_VDSO_BEG: return Mapping::kVdsoBeg;
-#else
- case MAPPING_APP_BEG: return Mapping::kAppMemBeg;
- case MAPPING_APP_END: return Mapping::kAppMemEnd;
-#endif
- case MAPPING_SHADOW_BEG: return Mapping::kShadowBeg;
- case MAPPING_SHADOW_END: return Mapping::kShadowEnd;
- case MAPPING_META_SHADOW_BEG: return Mapping::kMetaShadowBeg;
- case MAPPING_META_SHADOW_END: return Mapping::kMetaShadowEnd;
- case MAPPING_TRACE_BEG: return Mapping::kTraceMemBeg;
- case MAPPING_TRACE_END: return Mapping::kTraceMemEnd;
- }
-}
-template<int Type>
-uptr MappingArchImpl(void) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
+template <typename Func, typename Arg>
+ALWAYS_INLINE auto SelectMapping(Arg arg) {
+#if SANITIZER_GO
+# if defined(__powerpc64__)
switch (vmaSize) {
- case 39: return MappingImpl<Mapping39, Type>();
- case 42: return MappingImpl<Mapping42, Type>();
- case 48: return MappingImpl<Mapping48, Type>();
+ case 46:
+ return Func::template Apply<MappingGoPPC64_46>(arg);
+ case 47:
+ return Func::template Apply<MappingGoPPC64_47>(arg);
}
- DCHECK(0);
- return 0;
-#elif defined(__powerpc64__)
+# elif defined(__mips64)
+ return Func::template Apply<MappingGoMips64_47>(arg);
+# elif defined(__s390x__)
+ return Func::template Apply<MappingGoS390x>(arg);
+# elif defined(__aarch64__)
+ return Func::template Apply<MappingGoAarch64>(arg);
+# elif SANITIZER_WINDOWS
+ return Func::template Apply<MappingGoWindows>(arg);
+# else
+ return Func::template Apply<MappingGo48>(arg);
+# endif
+#else // SANITIZER_GO
+# if defined(__x86_64__) || SANITIZER_IOSSIM || SANITIZER_MAC && !SANITIZER_IOS
+ return Func::template Apply<Mapping48AddressSpace>(arg);
+# elif defined(__aarch64__) && defined(__APPLE__)
+ return Func::template Apply<MappingAppleAarch64>(arg);
+# elif defined(__aarch64__) && !defined(__APPLE__)
switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return MappingImpl<Mapping44, Type>();
-#endif
- case 46: return MappingImpl<Mapping46, Type>();
- case 47: return MappingImpl<Mapping47, Type>();
+ case 39:
+ return Func::template Apply<MappingAarch64_39>(arg);
+ case 42:
+ return Func::template Apply<MappingAarch64_42>(arg);
+ case 48:
+ return Func::template Apply<MappingAarch64_48>(arg);
}
- DCHECK(0);
- return 0;
-#elif defined(__mips64)
+# elif defined(__powerpc64__)
switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return MappingImpl<Mapping40, Type>();
-#else
- case 47: return MappingImpl<Mapping47, Type>();
-#endif
+ case 44:
+ return Func::template Apply<MappingPPC64_44>(arg);
+ case 46:
+ return Func::template Apply<MappingPPC64_46>(arg);
+ case 47:
+ return Func::template Apply<MappingPPC64_47>(arg);
}
- DCHECK(0);
- return 0;
-#else
- return MappingImpl<Mapping, Type>();
-#endif
+# elif defined(__mips64)
+ return Func::template Apply<MappingMips64_40>(arg);
+# elif defined(__s390x__)
+ return Func::template Apply<MappingS390x>(arg);
+# else
+# error "unsupported platform"
+# endif
+#endif
+ Die();
+}
+
+template <typename Func>
+void ForEachMapping() {
+ Func::template Apply<Mapping48AddressSpace>();
+ Func::template Apply<MappingMips64_40>();
+ Func::template Apply<MappingAppleAarch64>();
+ Func::template Apply<MappingAarch64_39>();
+ Func::template Apply<MappingAarch64_42>();
+ Func::template Apply<MappingAarch64_48>();
+ Func::template Apply<MappingPPC64_44>();
+ Func::template Apply<MappingPPC64_46>();
+ Func::template Apply<MappingPPC64_47>();
+ Func::template Apply<MappingS390x>();
+ Func::template Apply<MappingGo48>();
+ Func::template Apply<MappingGoWindows>();
+ Func::template Apply<MappingGoPPC64_46>();
+ Func::template Apply<MappingGoPPC64_47>();
+ Func::template Apply<MappingGoAarch64>();
+ Func::template Apply<MappingGoMips64_47>();
+ Func::template Apply<MappingGoS390x>();
}
-#if !SANITIZER_GO
-ALWAYS_INLINE
-uptr LoAppMemBeg(void) {
- return MappingArchImpl<MAPPING_LO_APP_BEG>();
-}
-ALWAYS_INLINE
-uptr LoAppMemEnd(void) {
- return MappingArchImpl<MAPPING_LO_APP_END>();
-}
+enum MappingType {
+ kLoAppMemBeg,
+ kLoAppMemEnd,
+ kHiAppMemBeg,
+ kHiAppMemEnd,
+ kMidAppMemBeg,
+ kMidAppMemEnd,
+ kHeapMemBeg,
+ kHeapMemEnd,
+ kShadowBeg,
+ kShadowEnd,
+ kMetaShadowBeg,
+ kMetaShadowEnd,
+ kTraceMemBeg,
+ kTraceMemEnd,
+ kVdsoBeg,
+};
-#ifdef TSAN_MID_APP_RANGE
-ALWAYS_INLINE
-uptr MidAppMemBeg(void) {
- return MappingArchImpl<MAPPING_MID_APP_BEG>();
-}
-ALWAYS_INLINE
-uptr MidAppMemEnd(void) {
- return MappingArchImpl<MAPPING_MID_APP_END>();
-}
-#endif
+struct MappingField {
+ template <typename Mapping>
+ static uptr Apply(MappingType type) {
+ switch (type) {
+ case kLoAppMemBeg:
+ return Mapping::kLoAppMemBeg;
+ case kLoAppMemEnd:
+ return Mapping::kLoAppMemEnd;
+ case kMidAppMemBeg:
+ return Mapping::kMidAppMemBeg;
+ case kMidAppMemEnd:
+ return Mapping::kMidAppMemEnd;
+ case kHiAppMemBeg:
+ return Mapping::kHiAppMemBeg;
+ case kHiAppMemEnd:
+ return Mapping::kHiAppMemEnd;
+ case kHeapMemBeg:
+ return Mapping::kHeapMemBeg;
+ case kHeapMemEnd:
+ return Mapping::kHeapMemEnd;
+ case kVdsoBeg:
+ return Mapping::kVdsoBeg;
+ case kShadowBeg:
+ return Mapping::kShadowBeg;
+ case kShadowEnd:
+ return Mapping::kShadowEnd;
+ case kMetaShadowBeg:
+ return Mapping::kMetaShadowBeg;
+ case kMetaShadowEnd:
+ return Mapping::kMetaShadowEnd;
+ case kTraceMemBeg:
+ return Mapping::kTraceMemBeg;
+ case kTraceMemEnd:
+ return Mapping::kTraceMemEnd;
+ }
+ Die();
+ }
+};
ALWAYS_INLINE
-uptr HeapMemBeg(void) {
- return MappingArchImpl<MAPPING_HEAP_BEG>();
-}
+uptr LoAppMemBeg(void) { return SelectMapping<MappingField>(kLoAppMemBeg); }
ALWAYS_INLINE
-uptr HeapMemEnd(void) {
- return MappingArchImpl<MAPPING_HEAP_END>();
-}
+uptr LoAppMemEnd(void) { return SelectMapping<MappingField>(kLoAppMemEnd); }
ALWAYS_INLINE
-uptr HiAppMemBeg(void) {
- return MappingArchImpl<MAPPING_HI_APP_BEG>();
-}
+uptr MidAppMemBeg(void) { return SelectMapping<MappingField>(kMidAppMemBeg); }
ALWAYS_INLINE
-uptr HiAppMemEnd(void) {
- return MappingArchImpl<MAPPING_HI_APP_END>();
-}
+uptr MidAppMemEnd(void) { return SelectMapping<MappingField>(kMidAppMemEnd); }
ALWAYS_INLINE
-uptr VdsoBeg(void) {
- return MappingArchImpl<MAPPING_VDSO_BEG>();
-}
-
-#else
+uptr HeapMemBeg(void) { return SelectMapping<MappingField>(kHeapMemBeg); }
+ALWAYS_INLINE
+uptr HeapMemEnd(void) { return SelectMapping<MappingField>(kHeapMemEnd); }
ALWAYS_INLINE
-uptr AppMemBeg(void) {
- return MappingArchImpl<MAPPING_APP_BEG>();
-}
+uptr HiAppMemBeg(void) { return SelectMapping<MappingField>(kHiAppMemBeg); }
ALWAYS_INLINE
-uptr AppMemEnd(void) {
- return MappingArchImpl<MAPPING_APP_END>();
-}
-
-#endif
+uptr HiAppMemEnd(void) { return SelectMapping<MappingField>(kHiAppMemEnd); }
-static inline
-bool GetUserRegion(int i, uptr *start, uptr *end) {
- switch (i) {
- default:
- return false;
-#if !SANITIZER_GO
- case 0:
- *start = LoAppMemBeg();
- *end = LoAppMemEnd();
- return true;
- case 1:
- *start = HiAppMemBeg();
- *end = HiAppMemEnd();
- return true;
- case 2:
- *start = HeapMemBeg();
- *end = HeapMemEnd();
- return true;
-# ifdef TSAN_MID_APP_RANGE
- case 3:
- *start = MidAppMemBeg();
- *end = MidAppMemEnd();
- return true;
-# endif
-#else
- case 0:
- *start = AppMemBeg();
- *end = AppMemEnd();
- return true;
-#endif
- }
-}
+ALWAYS_INLINE
+uptr VdsoBeg(void) { return SelectMapping<MappingField>(kVdsoBeg); }
ALWAYS_INLINE
-uptr ShadowBeg(void) {
- return MappingArchImpl<MAPPING_SHADOW_BEG>();
-}
+uptr ShadowBeg(void) { return SelectMapping<MappingField>(kShadowBeg); }
ALWAYS_INLINE
-uptr ShadowEnd(void) {
- return MappingArchImpl<MAPPING_SHADOW_END>();
-}
+uptr ShadowEnd(void) { return SelectMapping<MappingField>(kShadowEnd); }
ALWAYS_INLINE
-uptr MetaShadowBeg(void) {
- return MappingArchImpl<MAPPING_META_SHADOW_BEG>();
-}
+uptr MetaShadowBeg(void) { return SelectMapping<MappingField>(kMetaShadowBeg); }
ALWAYS_INLINE
-uptr MetaShadowEnd(void) {
- return MappingArchImpl<MAPPING_META_SHADOW_END>();
-}
+uptr MetaShadowEnd(void) { return SelectMapping<MappingField>(kMetaShadowEnd); }
ALWAYS_INLINE
-uptr TraceMemBeg(void) {
- return MappingArchImpl<MAPPING_TRACE_BEG>();
-}
+uptr TraceMemBeg(void) { return SelectMapping<MappingField>(kTraceMemBeg); }
ALWAYS_INLINE
-uptr TraceMemEnd(void) {
- return MappingArchImpl<MAPPING_TRACE_END>();
-}
-
+uptr TraceMemEnd(void) { return SelectMapping<MappingField>(kTraceMemEnd); }
-template<typename Mapping>
-bool IsAppMemImpl(uptr mem) {
-#if !SANITIZER_GO
+struct IsAppMemImpl {
+ template <typename Mapping>
+ static bool Apply(uptr mem) {
return (mem >= Mapping::kHeapMemBeg && mem < Mapping::kHeapMemEnd) ||
-# ifdef TSAN_MID_APP_RANGE
(mem >= Mapping::kMidAppMemBeg && mem < Mapping::kMidAppMemEnd) ||
-# endif
(mem >= Mapping::kLoAppMemBeg && mem < Mapping::kLoAppMemEnd) ||
(mem >= Mapping::kHiAppMemBeg && mem < Mapping::kHiAppMemEnd);
-#else
- return mem >= Mapping::kAppMemBeg && mem < Mapping::kAppMemEnd;
-#endif
-}
-
-ALWAYS_INLINE
-bool IsAppMem(uptr mem) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return IsAppMemImpl<Mapping39>(mem);
- case 42: return IsAppMemImpl<Mapping42>(mem);
- case 48: return IsAppMemImpl<Mapping48>(mem);
- }
- DCHECK(0);
- return false;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return IsAppMemImpl<Mapping44>(mem);
-#endif
- case 46: return IsAppMemImpl<Mapping46>(mem);
- case 47: return IsAppMemImpl<Mapping47>(mem);
}
- DCHECK(0);
- return false;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return IsAppMemImpl<Mapping40>(mem);
-#else
- case 47: return IsAppMemImpl<Mapping47>(mem);
-#endif
- }
- DCHECK(0);
- return false;
-#else
- return IsAppMemImpl<Mapping>(mem);
-#endif
-}
+};
+ALWAYS_INLINE
+bool IsAppMem(uptr mem) { return SelectMapping<IsAppMemImpl>(mem); }
-template<typename Mapping>
-bool IsShadowMemImpl(uptr mem) {
- return mem >= Mapping::kShadowBeg && mem <= Mapping::kShadowEnd;
-}
+struct IsShadowMemImpl {
+ template <typename Mapping>
+ static bool Apply(uptr mem) {
+ return mem >= Mapping::kShadowBeg && mem <= Mapping::kShadowEnd;
+ }
+};
ALWAYS_INLINE
-bool IsShadowMem(uptr mem) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return IsShadowMemImpl<Mapping39>(mem);
- case 42: return IsShadowMemImpl<Mapping42>(mem);
- case 48: return IsShadowMemImpl<Mapping48>(mem);
- }
- DCHECK(0);
- return false;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return IsShadowMemImpl<Mapping44>(mem);
-#endif
- case 46: return IsShadowMemImpl<Mapping46>(mem);
- case 47: return IsShadowMemImpl<Mapping47>(mem);
- }
- DCHECK(0);
- return false;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return IsShadowMemImpl<Mapping40>(mem);
-#else
- case 47: return IsShadowMemImpl<Mapping47>(mem);
-#endif
- }
- DCHECK(0);
- return false;
-#else
- return IsShadowMemImpl<Mapping>(mem);
-#endif
+bool IsShadowMem(RawShadow *p) {
+ return SelectMapping<IsShadowMemImpl>(reinterpret_cast<uptr>(p));
}
-
-template<typename Mapping>
-bool IsMetaMemImpl(uptr mem) {
- return mem >= Mapping::kMetaShadowBeg && mem <= Mapping::kMetaShadowEnd;
-}
+struct IsMetaMemImpl {
+ template <typename Mapping>
+ static bool Apply(uptr mem) {
+ return mem >= Mapping::kMetaShadowBeg && mem <= Mapping::kMetaShadowEnd;
+ }
+};
ALWAYS_INLINE
-bool IsMetaMem(uptr mem) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return IsMetaMemImpl<Mapping39>(mem);
- case 42: return IsMetaMemImpl<Mapping42>(mem);
- case 48: return IsMetaMemImpl<Mapping48>(mem);
- }
- DCHECK(0);
- return false;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return IsMetaMemImpl<Mapping44>(mem);
-#endif
- case 46: return IsMetaMemImpl<Mapping46>(mem);
- case 47: return IsMetaMemImpl<Mapping47>(mem);
+bool IsMetaMem(const u32 *p) {
+ return SelectMapping<IsMetaMemImpl>(reinterpret_cast<uptr>(p));
+}
+
+struct MemToShadowImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr x) {
+ DCHECK(IsAppMemImpl::Apply<Mapping>(x));
+ return (((x) & ~(Mapping::kShadowMsk | (kShadowCell - 1))) ^
+ Mapping::kShadowXor) *
+ kShadowMultiplier +
+ Mapping::kShadowAdd;
}
- DCHECK(0);
- return false;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return IsMetaMemImpl<Mapping40>(mem);
-#else
- case 47: return IsMetaMemImpl<Mapping47>(mem);
-#endif
- }
- DCHECK(0);
- return false;
-#else
- return IsMetaMemImpl<Mapping>(mem);
-#endif
-}
-
-
-template<typename Mapping>
-uptr MemToShadowImpl(uptr x) {
- DCHECK(IsAppMem(x));
-#if !SANITIZER_GO
- return (((x) & ~(Mapping::kAppMemMsk | (kShadowCell - 1)))
- ^ Mapping::kAppMemXor) * kShadowCnt;
-#else
-# ifndef SANITIZER_WINDOWS
- return ((x & ~(kShadowCell - 1)) * kShadowCnt) | Mapping::kShadowBeg;
-# else
- return ((x & ~(kShadowCell - 1)) * kShadowCnt) + Mapping::kShadowBeg;
-# endif
-#endif
-}
+};
ALWAYS_INLINE
-uptr MemToShadow(uptr x) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return MemToShadowImpl<Mapping39>(x);
- case 42: return MemToShadowImpl<Mapping42>(x);
- case 48: return MemToShadowImpl<Mapping48>(x);
- }
- DCHECK(0);
- return 0;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return MemToShadowImpl<Mapping44>(x);
-#endif
- case 46: return MemToShadowImpl<Mapping46>(x);
- case 47: return MemToShadowImpl<Mapping47>(x);
- }
- DCHECK(0);
- return 0;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return MemToShadowImpl<Mapping40>(x);
-#else
- case 47: return MemToShadowImpl<Mapping47>(x);
-#endif
- }
- DCHECK(0);
- return 0;
-#else
- return MemToShadowImpl<Mapping>(x);
-#endif
+RawShadow *MemToShadow(uptr x) {
+ return reinterpret_cast<RawShadow *>(SelectMapping<MemToShadowImpl>(x));
}
-
-template<typename Mapping>
-u32 *MemToMetaImpl(uptr x) {
- DCHECK(IsAppMem(x));
-#if !SANITIZER_GO
- return (u32*)(((((x) & ~(Mapping::kAppMemMsk | (kMetaShadowCell - 1)))) /
- kMetaShadowCell * kMetaShadowSize) | Mapping::kMetaShadowBeg);
-#else
-# ifndef SANITIZER_WINDOWS
- return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
- kMetaShadowCell * kMetaShadowSize) | Mapping::kMetaShadowBeg);
-# else
- return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
- kMetaShadowCell * kMetaShadowSize) + Mapping::kMetaShadowBeg);
-# endif
-#endif
-}
+struct MemToMetaImpl {
+ template <typename Mapping>
+ static u32 *Apply(uptr x) {
+ DCHECK(IsAppMemImpl::Apply<Mapping>(x));
+ return (u32 *)(((((x) & ~(Mapping::kShadowMsk | (kMetaShadowCell - 1)))) /
+ kMetaShadowCell * kMetaShadowSize) |
+ Mapping::kMetaShadowBeg);
+ }
+};
ALWAYS_INLINE
-u32 *MemToMeta(uptr x) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return MemToMetaImpl<Mapping39>(x);
- case 42: return MemToMetaImpl<Mapping42>(x);
- case 48: return MemToMetaImpl<Mapping48>(x);
- }
- DCHECK(0);
- return 0;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return MemToMetaImpl<Mapping44>(x);
-#endif
- case 46: return MemToMetaImpl<Mapping46>(x);
- case 47: return MemToMetaImpl<Mapping47>(x);
+u32 *MemToMeta(uptr x) { return SelectMapping<MemToMetaImpl>(x); }
+
+struct ShadowToMemImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr sp) {
+ if (!IsShadowMemImpl::Apply<Mapping>(sp))
+ return 0;
+ // The shadow mapping is non-linear and we've lost some bits, so we don't
+ // have an easy way to restore the original app address. But the mapping is
+ // a bijection, so we try to restore the address as belonging to
+ // low/mid/high range consecutively and see if shadow->app->shadow mapping
+ // gives us the same address.
+ uptr p =
+ ((sp - Mapping::kShadowAdd) / kShadowMultiplier) ^ Mapping::kShadowXor;
+ if (p >= Mapping::kLoAppMemBeg && p < Mapping::kLoAppMemEnd &&
+ MemToShadowImpl::Apply<Mapping>(p) == sp)
+ return p;
+ if (Mapping::kMidAppMemBeg) {
+ uptr p_mid = p + (Mapping::kMidAppMemBeg & Mapping::kShadowMsk);
+ if (p_mid >= Mapping::kMidAppMemBeg && p_mid < Mapping::kMidAppMemEnd &&
+ MemToShadowImpl::Apply<Mapping>(p_mid) == sp)
+ return p_mid;
+ }
+ return p | Mapping::kShadowMsk;
}
- DCHECK(0);
- return 0;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return MemToMetaImpl<Mapping40>(x);
-#else
- case 47: return MemToMetaImpl<Mapping47>(x);
-#endif
- }
- DCHECK(0);
- return 0;
-#else
- return MemToMetaImpl<Mapping>(x);
-#endif
-}
-
-
-template<typename Mapping>
-uptr ShadowToMemImpl(uptr s) {
- DCHECK(IsShadowMem(s));
-#if !SANITIZER_GO
- // The shadow mapping is non-linear and we've lost some bits, so we don't have
- // an easy way to restore the original app address. But the mapping is a
- // bijection, so we try to restore the address as belonging to low/mid/high
- // range consecutively and see if shadow->app->shadow mapping gives us the
- // same address.
- uptr p = (s / kShadowCnt) ^ Mapping::kAppMemXor;
- if (p >= Mapping::kLoAppMemBeg && p < Mapping::kLoAppMemEnd &&
- MemToShadow(p) == s)
- return p;
-# ifdef TSAN_MID_APP_RANGE
- p = ((s / kShadowCnt) ^ Mapping::kAppMemXor) +
- (Mapping::kMidAppMemBeg & Mapping::kAppMemMsk);
- if (p >= Mapping::kMidAppMemBeg && p < Mapping::kMidAppMemEnd &&
- MemToShadow(p) == s)
- return p;
-# endif
- return ((s / kShadowCnt) ^ Mapping::kAppMemXor) | Mapping::kAppMemMsk;
-#else // #if !SANITIZER_GO
-# ifndef SANITIZER_WINDOWS
- return (s & ~Mapping::kShadowBeg) / kShadowCnt;
-# else
- return (s - Mapping::kShadowBeg) / kShadowCnt;
-# endif // SANITIZER_WINDOWS
-#endif
-}
+};
ALWAYS_INLINE
-uptr ShadowToMem(uptr s) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return ShadowToMemImpl<Mapping39>(s);
- case 42: return ShadowToMemImpl<Mapping42>(s);
- case 48: return ShadowToMemImpl<Mapping48>(s);
- }
- DCHECK(0);
- return 0;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return ShadowToMemImpl<Mapping44>(s);
-#endif
- case 46: return ShadowToMemImpl<Mapping46>(s);
- case 47: return ShadowToMemImpl<Mapping47>(s);
+uptr ShadowToMem(RawShadow *s) {
+ return SelectMapping<ShadowToMemImpl>(reinterpret_cast<uptr>(s));
+}
+
+// Compresses addr to kCompressedAddrBits stored in least significant bits.
+ALWAYS_INLINE uptr CompressAddr(uptr addr) {
+ return addr & ((1ull << kCompressedAddrBits) - 1);
+}
+
+struct RestoreAddrImpl {
+ typedef uptr Result;
+ template <typename Mapping>
+ static Result Apply(uptr addr) {
+ // To restore the address we go over all app memory ranges and check if top
+ // 3 bits of the compressed addr match that of the app range. If yes, we
+ // assume that the compressed address come from that range and restore the
+ // missing top bits to match the app range address.
+ const uptr ranges[] = {
+ Mapping::kLoAppMemBeg, Mapping::kLoAppMemEnd, Mapping::kMidAppMemBeg,
+ Mapping::kMidAppMemEnd, Mapping::kHiAppMemBeg, Mapping::kHiAppMemEnd,
+ Mapping::kHeapMemBeg, Mapping::kHeapMemEnd,
+ };
+ const uptr indicator = 0x0e0000000000ull;
+ const uptr ind_lsb = 1ull << LeastSignificantSetBitIndex(indicator);
+ for (uptr i = 0; i < ARRAY_SIZE(ranges); i += 2) {
+ uptr beg = ranges[i];
+ uptr end = ranges[i + 1];
+ if (beg == end)
+ continue;
+ for (uptr p = beg; p < end; p = RoundDown(p + ind_lsb, ind_lsb)) {
+ if ((addr & indicator) == (p & indicator))
+ return addr | (p & ~(ind_lsb - 1));
+ }
+ }
+ Printf("ThreadSanitizer: failed to restore address 0x%zx\n", addr);
+ Die();
}
- DCHECK(0);
- return 0;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return ShadowToMemImpl<Mapping40>(s);
-#else
- case 47: return ShadowToMemImpl<Mapping47>(s);
-#endif
- }
- DCHECK(0);
- return 0;
-#else
- return ShadowToMemImpl<Mapping>(s);
-#endif
-}
-
+};
+// Restores compressed addr from kCompressedAddrBits to full representation.
+// This is called only during reporting and is not performance-critical.
+inline uptr RestoreAddr(uptr addr) {
+ return SelectMapping<RestoreAddrImpl>(addr);
+}
// The additional page is to catch shadow stack overflow as paging fault.
// Windows wants 64K alignment for mmaps.
const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace)
+ (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1);
-template<typename Mapping>
-uptr GetThreadTraceImpl(int tid) {
- uptr p = Mapping::kTraceMemBeg + (uptr)tid * kTotalTraceSize;
- DCHECK_LT(p, Mapping::kTraceMemEnd);
- return p;
-}
+struct GetThreadTraceImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr tid) {
+ uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize;
+ DCHECK_LT(p, Mapping::kTraceMemEnd);
+ return p;
+ }
+};
ALWAYS_INLINE
-uptr GetThreadTrace(int tid) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return GetThreadTraceImpl<Mapping39>(tid);
- case 42: return GetThreadTraceImpl<Mapping42>(tid);
- case 48: return GetThreadTraceImpl<Mapping48>(tid);
- }
- DCHECK(0);
- return 0;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return GetThreadTraceImpl<Mapping44>(tid);
-#endif
- case 46: return GetThreadTraceImpl<Mapping46>(tid);
- case 47: return GetThreadTraceImpl<Mapping47>(tid);
- }
- DCHECK(0);
- return 0;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return GetThreadTraceImpl<Mapping40>(tid);
-#else
- case 47: return GetThreadTraceImpl<Mapping47>(tid);
-#endif
+uptr GetThreadTrace(int tid) { return SelectMapping<GetThreadTraceImpl>(tid); }
+
+struct GetThreadTraceHeaderImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr tid) {
+ uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize +
+ kTraceSize * sizeof(Event);
+ DCHECK_LT(p, Mapping::kTraceMemEnd);
+ return p;
}
- DCHECK(0);
- return 0;
-#else
- return GetThreadTraceImpl<Mapping>(tid);
-#endif
-}
-
-
-template<typename Mapping>
-uptr GetThreadTraceHeaderImpl(int tid) {
- uptr p = Mapping::kTraceMemBeg + (uptr)tid * kTotalTraceSize
- + kTraceSize * sizeof(Event);
- DCHECK_LT(p, Mapping::kTraceMemEnd);
- return p;
-}
+};
ALWAYS_INLINE
uptr GetThreadTraceHeader(int tid) {
-#if defined(__aarch64__) && !defined(__APPLE__) && !SANITIZER_GO
- switch (vmaSize) {
- case 39: return GetThreadTraceHeaderImpl<Mapping39>(tid);
- case 42: return GetThreadTraceHeaderImpl<Mapping42>(tid);
- case 48: return GetThreadTraceHeaderImpl<Mapping48>(tid);
- }
- DCHECK(0);
- return 0;
-#elif defined(__powerpc64__)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 44: return GetThreadTraceHeaderImpl<Mapping44>(tid);
-#endif
- case 46: return GetThreadTraceHeaderImpl<Mapping46>(tid);
- case 47: return GetThreadTraceHeaderImpl<Mapping47>(tid);
- }
- DCHECK(0);
- return 0;
-#elif defined(__mips64)
- switch (vmaSize) {
-#if !SANITIZER_GO
- case 40: return GetThreadTraceHeaderImpl<Mapping40>(tid);
-#else
- case 47: return GetThreadTraceHeaderImpl<Mapping47>(tid);
-#endif
- }
- DCHECK(0);
- return 0;
-#else
- return GetThreadTraceHeaderImpl<Mapping>(tid);
-#endif
+ return SelectMapping<GetThreadTraceHeaderImpl>(tid);
}
void InitializePlatform();
@@ -1194,7 +971,7 @@ void InitializePlatformEarly();
void CheckAndProtect();
void InitializeShadowMemoryPlatform();
void FlushShadowMemory();
-void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive);
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns);
int ExtractResolvFDs(void *state, int *fds, int nfd);
int ExtractRecvmsgFDs(void *msg, int *fds, int nfd);
uptr ExtractLongJmpSp(uptr *env);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
index cfe597e5380e..73ec14892d28 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
@@ -85,63 +85,68 @@ static void InitializeLongjmpXorKey();
static uptr longjmp_xor_key;
#endif
-#ifdef TSAN_RUNTIME_VMA
// Runtime detected VMA size.
uptr vmaSize;
-#endif
enum {
- MemTotal = 0,
- MemShadow = 1,
- MemMeta = 2,
- MemFile = 3,
- MemMmap = 4,
- MemTrace = 5,
- MemHeap = 6,
- MemOther = 7,
- MemCount = 8,
+ MemTotal,
+ MemShadow,
+ MemMeta,
+ MemFile,
+ MemMmap,
+ MemTrace,
+ MemHeap,
+ MemOther,
+ MemCount,
};
-void FillProfileCallback(uptr p, uptr rss, bool file,
- uptr *mem, uptr stats_size) {
+void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) {
mem[MemTotal] += rss;
if (p >= ShadowBeg() && p < ShadowEnd())
mem[MemShadow] += rss;
else if (p >= MetaShadowBeg() && p < MetaShadowEnd())
mem[MemMeta] += rss;
-#if !SANITIZER_GO
+ else if ((p >= LoAppMemBeg() && p < LoAppMemEnd()) ||
+ (p >= MidAppMemBeg() && p < MidAppMemEnd()) ||
+ (p >= HiAppMemBeg() && p < HiAppMemEnd()))
+ mem[file ? MemFile : MemMmap] += rss;
else if (p >= HeapMemBeg() && p < HeapMemEnd())
mem[MemHeap] += rss;
- else if (p >= LoAppMemBeg() && p < LoAppMemEnd())
- mem[file ? MemFile : MemMmap] += rss;
- else if (p >= HiAppMemBeg() && p < HiAppMemEnd())
- mem[file ? MemFile : MemMmap] += rss;
-#else
- else if (p >= AppMemBeg() && p < AppMemEnd())
- mem[file ? MemFile : MemMmap] += rss;
-#endif
else if (p >= TraceMemBeg() && p < TraceMemEnd())
mem[MemTrace] += rss;
else
mem[MemOther] += rss;
}
-void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
uptr mem[MemCount];
- internal_memset(mem, 0, sizeof(mem[0]) * MemCount);
- __sanitizer::GetMemoryProfile(FillProfileCallback, mem, 7);
- StackDepotStats *stacks = StackDepotGetStats();
- internal_snprintf(buf, buf_size,
- "RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
- " trace:%zd heap:%zd other:%zd stacks=%zd[%zd] nthr=%zd/%zd\n",
- mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20,
- mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemTrace] >> 20,
- mem[MemHeap] >> 20, mem[MemOther] >> 20,
- stacks->allocated >> 20, stacks->n_uniq_ids,
- nlive, nthread);
+ internal_memset(mem, 0, sizeof(mem));
+ GetMemoryProfile(FillProfileCallback, mem);
+ auto meta = ctx->metamap.GetMemoryStats();
+ StackDepotStats stacks = StackDepotGetStats();
+ uptr nthread, nlive;
+ ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
+ uptr internal_stats[AllocatorStatCount];
+ internal_allocator()->GetStats(internal_stats);
+ // All these are allocated from the common mmap region.
+ mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated +
+ internal_stats[AllocatorStatMapped];
+ if (s64(mem[MemMmap]) < 0)
+ mem[MemMmap] = 0;
+ internal_snprintf(
+ buf, buf_size,
+ "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
+ " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
+ " stacks=%zd[%zd] nthr=%zd/%zd\n",
+ uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20,
+ mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20,
+ mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20,
+ mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20,
+ meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20,
+ stacks.n_uniq_ids, nlive, nthread);
}
-#if SANITIZER_LINUX
+# if SANITIZER_LINUX
void FlushShadowMemoryCallback(
const SuspendedThreadsList &suspended_threads_list,
void *argument) {
@@ -178,12 +183,13 @@ static void MapRodata() {
internal_unlink(name); // Unlink it now, so that we can reuse the buffer.
fd_t fd = openrv;
// Fill the file with kShadowRodata.
- const uptr kMarkerSize = 512 * 1024 / sizeof(u64);
- InternalMmapVector<u64> marker(kMarkerSize);
+ const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow);
+ InternalMmapVector<RawShadow> marker(kMarkerSize);
// volatile to prevent insertion of memset
- for (volatile u64 *p = marker.data(); p < marker.data() + kMarkerSize; p++)
+ for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize;
+ p++)
*p = kShadowRodata;
- internal_write(fd, marker.data(), marker.size() * sizeof(u64));
+ internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow));
// Map the file into memory.
uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
@@ -203,9 +209,10 @@ static void MapRodata() {
char *shadow_start = (char *)MemToShadow(segment.start);
char *shadow_end = (char *)MemToShadow(segment.end);
for (char *p = shadow_start; p < shadow_end;
- p += marker.size() * sizeof(u64)) {
- internal_mmap(p, Min<uptr>(marker.size() * sizeof(u64), shadow_end - p),
- PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
+ p += marker.size() * sizeof(RawShadow)) {
+ internal_mmap(
+ p, Min<uptr>(marker.size() * sizeof(RawShadow), shadow_end - p),
+ PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
}
}
}
@@ -219,7 +226,6 @@ void InitializeShadowMemoryPlatform() {
#endif // #if !SANITIZER_GO
void InitializePlatformEarly() {
-#ifdef TSAN_RUNTIME_VMA
vmaSize =
(MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
#if defined(__aarch64__)
@@ -265,7 +271,6 @@ void InitializePlatformEarly() {
}
# endif
#endif
-#endif
}
void InitializePlatform() {
@@ -341,7 +346,7 @@ int ExtractResolvFDs(void *state, int *fds, int nfd) {
}
// Extract file descriptors passed via UNIX domain sockets.
-// This is requried to properly handle "open" of these fds.
+// This is required to properly handle "open" of these fds.
// see 'man recvmsg' and 'man 3 cmsg'.
int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) {
int res = 0;
@@ -447,6 +452,8 @@ static void InitializeLongjmpXorKey() {
}
#endif
+extern "C" void __tsan_tls_initialization() {}
+
void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
// Check that the thr object is in tls;
const uptr thr_beg = (uptr)thr;
@@ -456,9 +463,10 @@ void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
CHECK_GE(thr_end, tls_addr);
CHECK_LE(thr_end, tls_addr + tls_size);
// Since the thr object is huge, skip it.
- MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, thr_beg - tls_addr);
- MemoryRangeImitateWrite(thr, /*pc=*/2, thr_end,
- tls_addr + tls_size - thr_end);
+ const uptr pc = StackTrace::GetNextInstructionPc(
+ reinterpret_cast<uptr>(__tsan_tls_initialization));
+ MemoryRangeImitateWrite(thr, pc, tls_addr, thr_beg - tls_addr);
+ MemoryRangeImitateWrite(thr, pc, thr_end, tls_addr + tls_size - thr_end);
}
// Note: this function runs with async signals enabled,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
index d9719a136b21..3faa2d0c6192 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
@@ -139,7 +139,7 @@ static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
*dirty = dirty_pages * GetPageSizeCached();
}
-void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
uptr shadow_res, shadow_dirty;
uptr meta_res, meta_dirty;
uptr trace_res, trace_dirty;
@@ -156,39 +156,41 @@ void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
RegionMemUsage(HeapMemBeg(), HeapMemEnd(), &heap_res, &heap_dirty);
#else // !SANITIZER_GO
uptr app_res, app_dirty;
- RegionMemUsage(AppMemBeg(), AppMemEnd(), &app_res, &app_dirty);
+ RegionMemUsage(LoAppMemBeg(), LoAppMemEnd(), &app_res, &app_dirty);
#endif
- StackDepotStats *stacks = StackDepotGetStats();
- internal_snprintf(buf, buf_size,
- "shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
- "meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
- "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
-#if !SANITIZER_GO
- "low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
- "high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
- "heap (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
-#else // !SANITIZER_GO
- "app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
-#endif
- "stacks: %zd unique IDs, %zd kB allocated\n"
- "threads: %zd total, %zd live\n"
- "------------------------------\n",
- ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024,
- MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024,
- TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024,
-#if !SANITIZER_GO
- LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024,
- HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024,
- HeapMemBeg(), HeapMemEnd(), heap_res / 1024, heap_dirty / 1024,
-#else // !SANITIZER_GO
- AppMemBeg(), AppMemEnd(), app_res / 1024, app_dirty / 1024,
-#endif
- stacks->n_uniq_ids, stacks->allocated / 1024,
- nthread, nlive);
+ StackDepotStats stacks = StackDepotGetStats();
+ uptr nthread, nlive;
+ ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
+ internal_snprintf(
+ buf, buf_size,
+ "shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+ "meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+ "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+# if !SANITIZER_GO
+ "low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+ "high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+ "heap (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+# else // !SANITIZER_GO
+ "app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+# endif
+ "stacks: %zd unique IDs, %zd kB allocated\n"
+ "threads: %zd total, %zd live\n"
+ "------------------------------\n",
+ ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024,
+ MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024,
+ TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024,
+# if !SANITIZER_GO
+ LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024,
+ HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024,
+ HeapMemBeg(), HeapMemEnd(), heap_res / 1024, heap_dirty / 1024,
+# else // !SANITIZER_GO
+ LoAppMemBeg(), LoAppMemEnd(), app_res / 1024, app_dirty / 1024,
+# endif
+ stacks.n_uniq_ids, stacks.allocated / 1024, nthread, nlive);
}
-#if !SANITIZER_GO
+# if !SANITIZER_GO
void InitializeShadowMemoryPlatform() { }
// On OS X, GCD worker threads are created without a call to pthread_create. We
@@ -215,8 +217,8 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
Processor *proc = ProcCreate();
ProcWire(proc, thr);
ThreadState *parent_thread_state = nullptr; // No parent.
- int tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true);
- CHECK_NE(tid, 0);
+ Tid tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true);
+ CHECK_NE(tid, kMainTid);
ThreadStart(thr, tid, GetTid(), ThreadType::Worker);
}
} else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
@@ -234,11 +236,11 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
#endif
void InitializePlatformEarly() {
-#if !SANITIZER_GO && !HAS_48_BIT_ADDRESS_SPACE
+# if !SANITIZER_GO && SANITIZER_IOS
uptr max_vm = GetMaxUserVirtualAddress() + 1;
- if (max_vm != Mapping::kHiAppMemEnd) {
+ if (max_vm != HiAppMemEnd()) {
Printf("ThreadSanitizer: unsupported vm address limit %p, expected %p.\n",
- max_vm, Mapping::kHiAppMemEnd);
+ (void *)max_vm, (void *)HiAppMemEnd());
Die();
}
#endif
@@ -281,13 +283,17 @@ uptr ExtractLongJmpSp(uptr *env) {
}
#if !SANITIZER_GO
+extern "C" void __tsan_tls_initialization() {}
+
void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
// The pointer to the ThreadState object is stored in the shadow memory
// of the tls.
uptr tls_end = tls_addr + tls_size;
uptr thread_identity = (uptr)pthread_self();
+ const uptr pc = StackTrace::GetNextInstructionPc(
+ reinterpret_cast<uptr>(__tsan_tls_initialization));
if (thread_identity == main_thread_identity) {
- MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, tls_size);
+ MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
} else {
uptr thr_state_start = thread_identity;
uptr thr_state_end = thr_state_start + sizeof(uptr);
@@ -295,10 +301,8 @@ void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
CHECK_LE(thr_state_start, tls_addr + tls_size);
CHECK_GE(thr_state_end, tls_addr);
CHECK_LE(thr_state_end, tls_addr + tls_size);
- MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr,
- thr_state_start - tls_addr);
- MemoryRangeImitateWrite(thr, /*pc=*/2, thr_state_end,
- tls_end - thr_state_end);
+ MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr);
+ MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end);
}
}
#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
index 1c6198cefcd7..763ac444377e 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
@@ -14,12 +14,14 @@
#include "sanitizer_common/sanitizer_platform.h"
#if SANITIZER_POSIX
-#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_errno.h"
-#include "sanitizer_common/sanitizer_libc.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
-#include "tsan_platform.h"
-#include "tsan_rtl.h"
+# include <dlfcn.h>
+
+# include "sanitizer_common/sanitizer_common.h"
+# include "sanitizer_common/sanitizer_errno.h"
+# include "sanitizer_common/sanitizer_libc.h"
+# include "sanitizer_common/sanitizer_procmaps.h"
+# include "tsan_platform.h"
+# include "tsan_rtl.h"
namespace __tsan {
@@ -29,6 +31,7 @@ static const char kShadowMemoryMappingHint[] =
"HINT: if %s is not supported in your environment, you may set "
"TSAN_OPTIONS=%s=0\n";
+# if !SANITIZER_GO
static void DontDumpShadow(uptr addr, uptr size) {
if (common_flags()->use_madv_dontdump)
if (!DontDumpShadowMemory(addr, size)) {
@@ -39,7 +42,6 @@ static void DontDumpShadow(uptr addr, uptr size) {
}
}
-#if !SANITIZER_GO
void InitializeShadowMemory() {
// Map memory shadow.
if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(),
@@ -70,6 +72,11 @@ void InitializeShadowMemory() {
meta, meta + meta_size, meta_size >> 30);
InitializeShadowMemoryPlatform();
+
+ on_initialize = reinterpret_cast<void (*)(void)>(
+ dlsym(RTLD_DEFAULT, "__tsan_on_initialize"));
+ on_finalize =
+ reinterpret_cast<int (*)(int)>(dlsym(RTLD_DEFAULT, "__tsan_on_finalize"));
}
static bool TryProtectRange(uptr beg, uptr end) {
@@ -98,24 +105,24 @@ void CheckAndProtect() {
continue;
if (segment.start >= VdsoBeg()) // vdso
break;
- Printf("FATAL: ThreadSanitizer: unexpected memory mapping %p-%p\n",
+ Printf("FATAL: ThreadSanitizer: unexpected memory mapping 0x%zx-0x%zx\n",
segment.start, segment.end);
Die();
}
-#if defined(__aarch64__) && defined(__APPLE__) && !HAS_48_BIT_ADDRESS_SPACE
+# if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS
ProtectRange(HeapMemEnd(), ShadowBeg());
ProtectRange(ShadowEnd(), MetaShadowBeg());
ProtectRange(MetaShadowEnd(), TraceMemBeg());
#else
ProtectRange(LoAppMemEnd(), ShadowBeg());
ProtectRange(ShadowEnd(), MetaShadowBeg());
-#ifdef TSAN_MID_APP_RANGE
- ProtectRange(MetaShadowEnd(), MidAppMemBeg());
- ProtectRange(MidAppMemEnd(), TraceMemBeg());
-#else
- ProtectRange(MetaShadowEnd(), TraceMemBeg());
-#endif
+ if (MidAppMemBeg()) {
+ ProtectRange(MetaShadowEnd(), MidAppMemBeg());
+ ProtectRange(MidAppMemEnd(), TraceMemBeg());
+ } else {
+ ProtectRange(MetaShadowEnd(), TraceMemBeg());
+ }
// Memory for traces is mapped lazily in MapThreadTrace.
// Protect the whole range for now, so that user does not map something here.
ProtectRange(TraceMemBeg(), TraceMemEnd());
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
index 19437879a41c..fea893768c79 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
@@ -23,8 +23,7 @@ namespace __tsan {
void FlushShadowMemory() {
}
-void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
-}
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {}
void InitializePlatformEarly() {
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_report.cpp
index 8ef9f0cd4fe8..a926c3761ccf 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_report.cpp
@@ -19,22 +19,6 @@
namespace __tsan {
-ReportStack::ReportStack() : frames(nullptr), suppressable(false) {}
-
-ReportStack *ReportStack::New() {
- void *mem = internal_alloc(MBlockReportStack, sizeof(ReportStack));
- return new(mem) ReportStack();
-}
-
-ReportLocation::ReportLocation(ReportLocationType type)
- : type(type), global(), heap_chunk_start(0), heap_chunk_size(0), tid(0),
- fd(0), suppressable(false), stack(nullptr) {}
-
-ReportLocation *ReportLocation::New(ReportLocationType type) {
- void *mem = internal_alloc(MBlockReportStack, sizeof(ReportLocation));
- return new(mem) ReportLocation(type);
-}
-
class Decorator: public __sanitizer::SanitizerCommonDecorator {
public:
Decorator() : SanitizerCommonDecorator() { }
@@ -68,7 +52,7 @@ ReportDesc::~ReportDesc() {
#if !SANITIZER_GO
const int kThreadBufSize = 32;
-const char *thread_name(char *buf, int tid) {
+const char *thread_name(char *buf, Tid tid) {
if (tid == kMainTid)
return "main thread";
internal_snprintf(buf, kThreadBufSize, "thread T%d", tid);
@@ -189,23 +173,25 @@ static void PrintLocation(const ReportLocation *loc) {
if (loc->type == ReportLocationGlobal) {
const DataInfo &global = loc->global;
if (global.size != 0)
- Printf(" Location is global '%s' of size %zu at %p (%s+%p)\n\n",
- global.name, global.size, global.start,
+ Printf(" Location is global '%s' of size %zu at %p (%s+0x%zx)\n\n",
+ global.name, global.size, reinterpret_cast<void *>(global.start),
StripModuleName(global.module), global.module_offset);
else
- Printf(" Location is global '%s' at %p (%s+%p)\n\n", global.name,
- global.start, StripModuleName(global.module),
- global.module_offset);
+ Printf(" Location is global '%s' at %p (%s+0x%zx)\n\n", global.name,
+ reinterpret_cast<void *>(global.start),
+ StripModuleName(global.module), global.module_offset);
} else if (loc->type == ReportLocationHeap) {
char thrbuf[kThreadBufSize];
const char *object_type = GetObjectTypeFromTag(loc->external_tag);
if (!object_type) {
Printf(" Location is heap block of size %zu at %p allocated by %s:\n",
- loc->heap_chunk_size, loc->heap_chunk_start,
+ loc->heap_chunk_size,
+ reinterpret_cast<void *>(loc->heap_chunk_start),
thread_name(thrbuf, loc->tid));
} else {
Printf(" Location is %s of size %zu at %p allocated by %s:\n",
- object_type, loc->heap_chunk_size, loc->heap_chunk_start,
+ object_type, loc->heap_chunk_size,
+ reinterpret_cast<void *>(loc->heap_chunk_start),
thread_name(thrbuf, loc->tid));
}
print_stack = true;
@@ -225,13 +211,14 @@ static void PrintLocation(const ReportLocation *loc) {
static void PrintMutexShort(const ReportMutex *rm, const char *after) {
Decorator d;
- Printf("%sM%zd%s%s", d.Mutex(), rm->id, d.Default(), after);
+ Printf("%sM%lld%s%s", d.Mutex(), rm->id, d.Default(), after);
}
static void PrintMutexShortWithAddress(const ReportMutex *rm,
const char *after) {
Decorator d;
- Printf("%sM%zd (%p)%s%s", d.Mutex(), rm->id, rm->addr, d.Default(), after);
+ Printf("%sM%lld (%p)%s%s", d.Mutex(), rm->id,
+ reinterpret_cast<void *>(rm->addr), d.Default(), after);
}
static void PrintMutex(const ReportMutex *rm) {
@@ -242,7 +229,8 @@ static void PrintMutex(const ReportMutex *rm) {
Printf("%s", d.Default());
} else {
Printf("%s", d.Mutex());
- Printf(" Mutex M%llu (%p) created at:\n", rm->id, rm->addr);
+ Printf(" Mutex M%llu (%p) created at:\n", rm->id,
+ reinterpret_cast<void *>(rm->addr));
Printf("%s", d.Default());
PrintStack(rm->stack);
}
@@ -259,12 +247,13 @@ static void PrintThread(const ReportThread *rt) {
char thrbuf[kThreadBufSize];
const char *thread_status = rt->running ? "running" : "finished";
if (rt->thread_type == ThreadType::Worker) {
- Printf(" (tid=%zu, %s) is a GCD worker thread\n", rt->os_id, thread_status);
+ Printf(" (tid=%llu, %s) is a GCD worker thread\n", rt->os_id,
+ thread_status);
Printf("\n");
Printf("%s", d.Default());
return;
}
- Printf(" (tid=%zu, %s) created by %s", rt->os_id, thread_status,
+ Printf(" (tid=%llu, %s) created by %s", rt->os_id, thread_status,
thread_name(thrbuf, rt->parent_tid));
if (rt->stack)
Printf(" at:");
@@ -394,7 +383,7 @@ void PrintReport(const ReportDesc *rep) {
#else // #if !SANITIZER_GO
-const u32 kMainGoroutineId = 1;
+const Tid kMainGoroutineId = 1;
void PrintStack(const ReportStack *ent) {
if (ent == 0 || ent->frames == 0) {
@@ -405,16 +394,17 @@ void PrintStack(const ReportStack *ent) {
for (int i = 0; frame; frame = frame->next, i++) {
const AddressInfo &info = frame->info;
Printf(" %s()\n %s:%d +0x%zx\n", info.function,
- StripPathPrefix(info.file, common_flags()->strip_path_prefix),
- info.line, (void *)info.module_offset);
+ StripPathPrefix(info.file, common_flags()->strip_path_prefix),
+ info.line, info.module_offset);
}
}
static void PrintMop(const ReportMop *mop, bool first) {
Printf("\n");
Printf("%s at %p by ",
- (first ? (mop->write ? "Write" : "Read")
- : (mop->write ? "Previous write" : "Previous read")), mop->addr);
+ (first ? (mop->write ? "Write" : "Read")
+ : (mop->write ? "Previous write" : "Previous read")),
+ reinterpret_cast<void *>(mop->addr));
if (mop->tid == kMainGoroutineId)
Printf("main goroutine:\n");
else
@@ -426,8 +416,8 @@ static void PrintLocation(const ReportLocation *loc) {
switch (loc->type) {
case ReportLocationHeap: {
Printf("\n");
- Printf("Heap block of size %zu at %p allocated by ",
- loc->heap_chunk_size, loc->heap_chunk_start);
+ Printf("Heap block of size %zu at %p allocated by ", loc->heap_chunk_size,
+ reinterpret_cast<void *>(loc->heap_chunk_start));
if (loc->tid == kMainGoroutineId)
Printf("main goroutine:\n");
else
@@ -438,8 +428,9 @@ static void PrintLocation(const ReportLocation *loc) {
case ReportLocationGlobal: {
Printf("\n");
Printf("Global var %s of size %zu at %p declared at %s:%zu\n",
- loc->global.name, loc->global.size, loc->global.start,
- loc->global.file, loc->global.line);
+ loc->global.name, loc->global.size,
+ reinterpret_cast<void *>(loc->global.start), loc->global.file,
+ loc->global.line);
break;
}
default:
@@ -469,13 +460,13 @@ void PrintReport(const ReportDesc *rep) {
} else if (rep->typ == ReportTypeDeadlock) {
Printf("WARNING: DEADLOCK\n");
for (uptr i = 0; i < rep->mutexes.Size(); i++) {
- Printf("Goroutine %d lock mutex %d while holding mutex %d:\n",
- 999, rep->mutexes[i]->id,
- rep->mutexes[(i+1) % rep->mutexes.Size()]->id);
+ Printf("Goroutine %d lock mutex %llu while holding mutex %llu:\n", 999,
+ rep->mutexes[i]->id,
+ rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
PrintStack(rep->stacks[2*i]);
Printf("\n");
- Printf("Mutex %d was previously locked here:\n",
- rep->mutexes[(i+1) % rep->mutexes.Size()]->id);
+ Printf("Mutex %llu was previously locked here:\n",
+ rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
PrintStack(rep->stacks[2*i + 1]);
Printf("\n");
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_report.h b/compiler-rt/lib/tsan/rtl/tsan_report.h
index b4e4d8989379..d68c2db88828 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_report.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_report.h
@@ -38,12 +38,8 @@ enum ReportType {
};
struct ReportStack {
- SymbolizedStack *frames;
- bool suppressable;
- static ReportStack *New();
-
- private:
- ReportStack();
+ SymbolizedStack *frames = nullptr;
+ bool suppressable = false;
};
struct ReportMopMutex {
@@ -73,28 +69,24 @@ enum ReportLocationType {
};
struct ReportLocation {
- ReportLocationType type;
- DataInfo global;
- uptr heap_chunk_start;
- uptr heap_chunk_size;
- uptr external_tag;
- int tid;
- int fd;
- bool suppressable;
- ReportStack *stack;
-
- static ReportLocation *New(ReportLocationType type);
- private:
- explicit ReportLocation(ReportLocationType type);
+ ReportLocationType type = ReportLocationGlobal;
+ DataInfo global = {};
+ uptr heap_chunk_start = 0;
+ uptr heap_chunk_size = 0;
+ uptr external_tag = 0;
+ Tid tid = kInvalidTid;
+ int fd = 0;
+ bool suppressable = false;
+ ReportStack *stack = nullptr;
};
struct ReportThread {
- int id;
+ Tid id;
tid_t os_id;
bool running;
ThreadType thread_type;
char *name;
- u32 parent_tid;
+ Tid parent_tid;
ReportStack *stack;
};
@@ -114,7 +106,7 @@ class ReportDesc {
Vector<ReportLocation*> locs;
Vector<ReportMutex*> mutexes;
Vector<ReportThread*> threads;
- Vector<int> unique_tids;
+ Vector<Tid> unique_tids;
ReportStack *sleep;
int count;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index a21da9c81c6f..ff7726ef0608 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -28,16 +28,6 @@
#include "tsan_symbolize.h"
#include "ubsan/ubsan_init.h"
-#ifdef __SSE3__
-// <emmintrin.h> transitively includes <stdlib.h>,
-// and it's prohibited to include std headers into tsan runtime.
-// So we do this dirty trick.
-#define _MM_MALLOC_H_INCLUDED
-#define __MM_MALLOC_H
-#include <emmintrin.h>
-typedef __m128i m128;
-#endif
-
volatile int __tsan_resumed = 0;
extern "C" void __tsan_resume() {
@@ -46,11 +36,17 @@ extern "C" void __tsan_resume() {
namespace __tsan {
+#if !SANITIZER_GO
+void (*on_initialize)(void);
+int (*on_finalize)(int);
+#endif
+
#if !SANITIZER_GO && !SANITIZER_MAC
__attribute__((tls_model("initial-exec")))
-THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64);
+THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(
+ SANITIZER_CACHE_LINE_SIZE);
#endif
-static char ctx_placeholder[sizeof(Context)] ALIGNED(64);
+static char ctx_placeholder[sizeof(Context)] ALIGNED(SANITIZER_CACHE_LINE_SIZE);
Context *ctx;
// Can be overriden by a front-end.
@@ -62,24 +58,21 @@ void OnInitialize();
SANITIZER_WEAK_CXX_DEFAULT_IMPL
bool OnFinalize(bool failed) {
#if !SANITIZER_GO
- if (auto *ptr = dlsym(RTLD_DEFAULT, "__tsan_on_finalize"))
- return reinterpret_cast<decltype(&__tsan_on_finalize)>(ptr)(failed);
+ if (on_finalize)
+ return on_finalize(failed);
#endif
return failed;
}
SANITIZER_WEAK_CXX_DEFAULT_IMPL
void OnInitialize() {
#if !SANITIZER_GO
- if (auto *ptr = dlsym(RTLD_DEFAULT, "__tsan_on_initialize")) {
- return reinterpret_cast<decltype(&__tsan_on_initialize)>(ptr)();
- }
+ if (on_initialize)
+ on_initialize();
#endif
}
#endif
-static ALIGNED(64) char thread_registry_placeholder[sizeof(ThreadRegistry)];
-
-static ThreadContextBase *CreateThreadContext(u32 tid) {
+static ThreadContextBase *CreateThreadContext(Tid tid) {
// Map thread trace when context is created.
char name[50];
internal_snprintf(name, sizeof(name), "trace %u", tid);
@@ -98,13 +91,12 @@ static ThreadContextBase *CreateThreadContext(u32 tid) {
ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace));
uptr unused = hdr + sizeof(Trace) - hdr_end;
if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) {
- Report("ThreadSanitizer: failed to mprotect(%p, %p)\n",
- hdr_end, unused);
+ Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end,
+ unused);
CHECK("unable to mprotect" && 0);
}
}
- void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadContext));
- return new(mem) ThreadContext(tid);
+ return New<ThreadContext>(tid);
}
#if !SANITIZER_GO
@@ -117,9 +109,8 @@ Context::Context()
: initialized(),
report_mtx(MutexTypeReport),
nreported(),
- nmissed_expected(),
- thread_registry(new (thread_registry_placeholder) ThreadRegistry(
- CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse)),
+ thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize,
+ kMaxTidReuse),
racy_mtx(MutexTypeRacy),
racy_stacks(),
racy_addresses(),
@@ -129,7 +120,7 @@ Context::Context()
}
// The objects are allocated in TLS, so one may rely on zero-initialization.
-ThreadState::ThreadState(Context *ctx, u32 tid, int unique_id, u64 epoch,
+ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
unsigned reuse_count, uptr stk_addr, uptr stk_size,
uptr tls_addr, uptr tls_size)
: fast_state(tid, epoch)
@@ -155,16 +146,53 @@ ThreadState::ThreadState(Context *ctx, u32 tid, int unique_id, u64 epoch,
last_sleep_clock(tid)
#endif
{
+ CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
+#if !SANITIZER_GO
+ // C/C++ uses fixed size shadow stack.
+ const int kInitStackSize = kShadowStackSize;
+ shadow_stack = static_cast<uptr *>(
+ MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack"));
+ SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack),
+ kInitStackSize * sizeof(uptr));
+#else
+ // Go uses malloc-allocated shadow stack with dynamic size.
+ const int kInitStackSize = 8;
+ shadow_stack = static_cast<uptr *>(Alloc(kInitStackSize * sizeof(uptr)));
+#endif
+ shadow_stack_pos = shadow_stack;
+ shadow_stack_end = shadow_stack + kInitStackSize;
}
#if !SANITIZER_GO
-static void MemoryProfiler(Context *ctx, fd_t fd, int i) {
- uptr n_threads;
- uptr n_running_threads;
- ctx->thread_registry->GetNumberOfThreads(&n_threads, &n_running_threads);
+void MemoryProfiler(u64 uptime) {
+ if (ctx->memprof_fd == kInvalidFd)
+ return;
InternalMmapVector<char> buf(4096);
- WriteMemoryProfile(buf.data(), buf.size(), n_threads, n_running_threads);
- WriteToFile(fd, buf.data(), internal_strlen(buf.data()));
+ WriteMemoryProfile(buf.data(), buf.size(), uptime);
+ WriteToFile(ctx->memprof_fd, buf.data(), internal_strlen(buf.data()));
+}
+
+void InitializeMemoryProfiler() {
+ ctx->memprof_fd = kInvalidFd;
+ const char *fname = flags()->profile_memory;
+ if (!fname || !fname[0])
+ return;
+ if (internal_strcmp(fname, "stdout") == 0) {
+ ctx->memprof_fd = 1;
+ } else if (internal_strcmp(fname, "stderr") == 0) {
+ ctx->memprof_fd = 2;
+ } else {
+ InternalScopedString filename;
+ filename.append("%s.%d", fname, (int)internal_getpid());
+ ctx->memprof_fd = OpenFile(filename.data(), WrOnly);
+ if (ctx->memprof_fd == kInvalidFd) {
+ Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
+ filename.data());
+ return;
+ }
+ }
+ MemoryProfiler(0);
+ MaybeSpawnBackgroundThread();
}
static void *BackgroundThread(void *arg) {
@@ -172,28 +200,9 @@ static void *BackgroundThread(void *arg) {
// We don't use ScopedIgnoreInterceptors, because we want ignores to be
// enabled even when the thread function exits (e.g. during pthread thread
// shutdown code).
- cur_thread_init();
- cur_thread()->ignore_interceptors++;
+ cur_thread_init()->ignore_interceptors++;
const u64 kMs2Ns = 1000 * 1000;
-
- fd_t mprof_fd = kInvalidFd;
- if (flags()->profile_memory && flags()->profile_memory[0]) {
- if (internal_strcmp(flags()->profile_memory, "stdout") == 0) {
- mprof_fd = 1;
- } else if (internal_strcmp(flags()->profile_memory, "stderr") == 0) {
- mprof_fd = 2;
- } else {
- InternalScopedString filename;
- filename.append("%s.%d", flags()->profile_memory, (int)internal_getpid());
- fd_t fd = OpenFile(filename.data(), WrOnly);
- if (fd == kInvalidFd) {
- Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
- filename.data());
- } else {
- mprof_fd = fd;
- }
- }
- }
+ const u64 start = NanoTime();
u64 last_flush = NanoTime();
uptr last_rss = 0;
@@ -211,7 +220,6 @@ static void *BackgroundThread(void *arg) {
last_flush = NanoTime();
}
}
- // GetRSS can be expensive on huge programs, so don't do it every 100ms.
if (flags()->memory_limit_mb > 0) {
uptr rss = GetRSS();
uptr limit = uptr(flags()->memory_limit_mb) << 20;
@@ -227,9 +235,7 @@ static void *BackgroundThread(void *arg) {
last_rss = rss;
}
- // Write memory profile if requested.
- if (mprof_fd != kInvalidFd)
- MemoryProfiler(ctx, mprof_fd, i);
+ MemoryProfiler(now - start);
// Flush symbolizer cache if requested.
if (flags()->flush_symbolizer_ms > 0) {
@@ -260,7 +266,8 @@ static void StopBackgroundThread() {
#endif
void DontNeedShadowFor(uptr addr, uptr size) {
- ReleaseMemoryPagesToOS(MemToShadow(addr), MemToShadow(addr + size));
+ ReleaseMemoryPagesToOS(reinterpret_cast<uptr>(MemToShadow(addr)),
+ reinterpret_cast<uptr>(MemToShadow(addr + size)));
}
#if !SANITIZER_GO
@@ -297,7 +304,7 @@ void MapShadow(uptr addr, uptr size) {
"meta shadow"))
Die();
} else {
- // Mapping continous heap.
+ // Mapping continuous heap.
// Windows wants 64K alignment.
meta_begin = RoundDownTo(meta_begin, 64 << 10);
meta_end = RoundUpTo(meta_end, 64 << 10);
@@ -310,58 +317,22 @@ void MapShadow(uptr addr, uptr size) {
Die();
mapped_meta_end = meta_end;
}
- VPrintf(2, "mapped meta shadow for (%p-%p) at (%p-%p)\n",
- addr, addr+size, meta_begin, meta_end);
+ VPrintf(2, "mapped meta shadow for (0x%zx-0x%zx) at (0x%zx-0x%zx)\n", addr,
+ addr + size, meta_begin, meta_end);
}
void MapThreadTrace(uptr addr, uptr size, const char *name) {
- DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size);
+ DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size);
CHECK_GE(addr, TraceMemBeg());
CHECK_LE(addr + size, TraceMemEnd());
CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment
if (!MmapFixedSuperNoReserve(addr, size, name)) {
- Printf("FATAL: ThreadSanitizer can not mmap thread trace (%p/%p)\n",
- addr, size);
+ Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n",
+ addr, size);
Die();
}
}
-static void CheckShadowMapping() {
- uptr beg, end;
- for (int i = 0; GetUserRegion(i, &beg, &end); i++) {
- // Skip cases for empty regions (heap definition for architectures that
- // do not use 64-bit allocator).
- if (beg == end)
- continue;
- VPrintf(3, "checking shadow region %p-%p\n", beg, end);
- uptr prev = 0;
- for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) {
- for (int x = -(int)kShadowCell; x <= (int)kShadowCell; x += kShadowCell) {
- const uptr p = RoundDown(p0 + x, kShadowCell);
- if (p < beg || p >= end)
- continue;
- const uptr s = MemToShadow(p);
- const uptr m = (uptr)MemToMeta(p);
- VPrintf(3, " checking pointer %p: shadow=%p meta=%p\n", p, s, m);
- CHECK(IsAppMem(p));
- CHECK(IsShadowMem(s));
- CHECK_EQ(p, ShadowToMem(s));
- CHECK(IsMetaMem(m));
- if (prev) {
- // Ensure that shadow and meta mappings are linear within a single
- // user range. Lots of code that processes memory ranges assumes it.
- const uptr prev_s = MemToShadow(prev);
- const uptr prev_m = (uptr)MemToMeta(prev);
- CHECK_EQ(s - prev_s, (p - prev) * kShadowMultiplier);
- CHECK_EQ((m - prev_m) / kMetaShadowSize,
- (p - prev) / kMetaShadowCell);
- }
- prev = p;
- }
- }
- }
-}
-
#if !SANITIZER_GO
static void OnStackUnwind(const SignalContext &sig, const void *,
BufferedStackTrace *stack) {
@@ -386,9 +357,10 @@ void CheckUnwind() {
PrintCurrentStackSlow(StackTrace::GetCurrentPc());
}
+bool is_initialized;
+
void Initialize(ThreadState *thr) {
// Thread safe because done before all threads exist.
- static bool is_initialized = false;
if (is_initialized)
return;
is_initialized = true;
@@ -420,7 +392,6 @@ void Initialize(ThreadState *thr) {
Processor *proc = ProcCreate();
ProcWire(proc, thr);
InitializeInterceptors();
- CheckShadowMapping();
InitializePlatform();
InitializeDynamicAnnotations();
#if !SANITIZER_GO
@@ -440,8 +411,8 @@ void Initialize(ThreadState *thr) {
(int)internal_getpid());
// Initialize thread 0.
- int tid = ThreadCreate(thr, 0, 0, true);
- CHECK_EQ(tid, 0);
+ Tid tid = ThreadCreate(thr, 0, 0, true);
+ CHECK_EQ(tid, kMainTid);
ThreadStart(thr, tid, GetTid(), ThreadType::Regular);
#if TSAN_CONTAINS_UBSAN
__ubsan::InitAsPlugin();
@@ -450,6 +421,7 @@ void Initialize(ThreadState *thr) {
#if !SANITIZER_GO
Symbolizer::LateInitialize();
+ InitializeMemoryProfiler();
#endif
if (flags()->stop_on_start) {
@@ -506,18 +478,8 @@ int Finalize(ThreadState *thr) {
#endif
}
- if (ctx->nmissed_expected) {
- failed = true;
- Printf("ThreadSanitizer: missed %d expected races\n",
- ctx->nmissed_expected);
- }
-
if (common_flags()->print_suppressions)
PrintMatchedSuppressions();
-#if !SANITIZER_GO
- if (flags()->print_benign)
- PrintMatchedBenignRaces();
-#endif
failed = OnFinalize(failed);
@@ -526,7 +488,7 @@ int Finalize(ThreadState *thr) {
#if !SANITIZER_GO
void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
- ctx->thread_registry->Lock();
+ ctx->thread_registry.Lock();
ctx->report_mtx.Lock();
ScopedErrorReportLock::Lock();
// Suppress all reports in the pthread_atfork callbacks.
@@ -545,22 +507,24 @@ void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
thr->ignore_interceptors--;
ScopedErrorReportLock::Unlock();
ctx->report_mtx.Unlock();
- ctx->thread_registry->Unlock();
+ ctx->thread_registry.Unlock();
}
-void ForkChildAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
+void ForkChildAfter(ThreadState *thr, uptr pc,
+ bool start_thread) NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports--; // Enabled in ForkBefore.
thr->ignore_interceptors--;
ScopedErrorReportLock::Unlock();
ctx->report_mtx.Unlock();
- ctx->thread_registry->Unlock();
+ ctx->thread_registry.Unlock();
uptr nthread = 0;
- ctx->thread_registry->GetNumberOfThreads(0, 0, &nthread /* alive threads */);
+ ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */);
VPrintf(1, "ThreadSanitizer: forked new process with pid %d,"
" parent had %d threads\n", (int)internal_getpid(), (int)nthread);
if (nthread == 1) {
- StartBackgroundThread();
+ if (start_thread)
+ StartBackgroundThread();
} else {
// We've just forked a multi-threaded process. We cannot reasonably function
// after that (some mutexes may be locked before fork). So just enable
@@ -578,19 +542,18 @@ NOINLINE
void GrowShadowStack(ThreadState *thr) {
const int sz = thr->shadow_stack_end - thr->shadow_stack;
const int newsz = 2 * sz;
- uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack,
- newsz * sizeof(uptr));
+ auto *newstack = (uptr *)Alloc(newsz * sizeof(uptr));
internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr));
- internal_free(thr->shadow_stack);
+ Free(thr->shadow_stack);
thr->shadow_stack = newstack;
thr->shadow_stack_pos = newstack + sz;
thr->shadow_stack_end = newstack + newsz;
}
#endif
-u32 CurrentStackId(ThreadState *thr, uptr pc) {
+StackID CurrentStackId(ThreadState *thr, uptr pc) {
if (!thr->is_inited) // May happen during bootstrap.
- return 0;
+ return kInvalidStackID;
if (pc != 0) {
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -601,13 +564,78 @@ u32 CurrentStackId(ThreadState *thr, uptr pc) {
thr->shadow_stack_pos[0] = pc;
thr->shadow_stack_pos++;
}
- u32 id = StackDepotPut(
+ StackID id = StackDepotPut(
StackTrace(thr->shadow_stack, thr->shadow_stack_pos - thr->shadow_stack));
if (pc != 0)
thr->shadow_stack_pos--;
return id;
}
+namespace v3 {
+
+NOINLINE
+void TraceSwitchPart(ThreadState *thr) {
+ Trace *trace = &thr->tctx->trace;
+ Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
+ DCHECK_EQ(reinterpret_cast<uptr>(pos + 1) & TracePart::kAlignment, 0);
+ auto *part = trace->parts.Back();
+ DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos);
+ if (part) {
+ // We can get here when we still have space in the current trace part.
+ // The fast-path check in TraceAcquire has false positives in the middle of
+ // the part. Check if we are indeed at the end of the current part or not,
+ // and fill any gaps with NopEvent's.
+ Event *end = &part->events[TracePart::kSize];
+ DCHECK_GE(pos, &part->events[0]);
+ DCHECK_LE(pos, end);
+ if (pos + 1 < end) {
+ if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) ==
+ TracePart::kAlignment)
+ *pos++ = NopEvent;
+ *pos++ = NopEvent;
+ DCHECK_LE(pos + 2, end);
+ atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos));
+ // Ensure we setup trace so that the next TraceAcquire
+ // won't detect trace part end.
+ Event *ev;
+ CHECK(TraceAcquire(thr, &ev));
+ return;
+ }
+ // We are indeed at the end.
+ for (; pos < end; pos++) *pos = NopEvent;
+ }
+#if !SANITIZER_GO
+ if (ctx->after_multithreaded_fork) {
+ // We just need to survive till exec.
+ CHECK(part);
+ atomic_store_relaxed(&thr->trace_pos,
+ reinterpret_cast<uptr>(&part->events[0]));
+ return;
+ }
+#endif
+ part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart();
+ part->trace = trace;
+ thr->trace_prev_pc = 0;
+ {
+ Lock lock(&trace->mtx);
+ trace->parts.PushBack(part);
+ atomic_store_relaxed(&thr->trace_pos,
+ reinterpret_cast<uptr>(&part->events[0]));
+ }
+ // Make this part self-sufficient by restoring the current stack
+ // and mutex set in the beginning of the trace.
+ TraceTime(thr);
+ for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++)
+ CHECK(TryTraceFunc(thr, *pos));
+ for (uptr i = 0; i < thr->mset.Size(); i++) {
+ MutexSet::Desc d = thr->mset.Get(i);
+ TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0,
+ d.addr, d.stack_id);
+ }
+}
+
+} // namespace v3
+
void TraceSwitch(ThreadState *thr) {
#if !SANITIZER_GO
if (ctx->after_multithreaded_fork)
@@ -624,9 +652,7 @@ void TraceSwitch(ThreadState *thr) {
thr->nomalloc--;
}
-Trace *ThreadTrace(int tid) {
- return (Trace*)GetThreadTraceHeader(tid);
-}
+Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); }
uptr TraceTopPC(ThreadState *thr) {
Event *events = (Event*)GetThreadTrace(thr->tid);
@@ -652,435 +678,18 @@ extern "C" void __tsan_report_race() {
}
#endif
-ALWAYS_INLINE
-Shadow LoadShadow(u64 *p) {
- u64 raw = atomic_load((atomic_uint64_t*)p, memory_order_relaxed);
- return Shadow(raw);
-}
-
-ALWAYS_INLINE
-void StoreShadow(u64 *sp, u64 s) {
- atomic_store((atomic_uint64_t*)sp, s, memory_order_relaxed);
-}
-
-ALWAYS_INLINE
-void StoreIfNotYetStored(u64 *sp, u64 *s) {
- StoreShadow(sp, *s);
- *s = 0;
-}
-
-ALWAYS_INLINE
-void HandleRace(ThreadState *thr, u64 *shadow_mem,
- Shadow cur, Shadow old) {
- thr->racy_state[0] = cur.raw();
- thr->racy_state[1] = old.raw();
- thr->racy_shadow_addr = shadow_mem;
-#if !SANITIZER_GO
- HACKY_CALL(__tsan_report_race);
-#else
- ReportRace(thr);
-#endif
-}
-
-static inline bool HappensBefore(Shadow old, ThreadState *thr) {
- return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
-}
-
-ALWAYS_INLINE
-void MemoryAccessImpl1(ThreadState *thr, uptr addr,
- int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
- u64 *shadow_mem, Shadow cur) {
-
- // This potentially can live in an MMX/SSE scratch register.
- // The required intrinsics are:
- // __m128i _mm_move_epi64(__m128i*);
- // _mm_storel_epi64(u64*, __m128i);
- u64 store_word = cur.raw();
- bool stored = false;
-
- // scan all the shadow values and dispatch to 4 categories:
- // same, replace, candidate and race (see comments below).
- // we consider only 3 cases regarding access sizes:
- // equal, intersect and not intersect. initially I considered
- // larger and smaller as well, it allowed to replace some
- // 'candidates' with 'same' or 'replace', but I think
- // it's just not worth it (performance- and complexity-wise).
-
- Shadow old(0);
-
- // It release mode we manually unroll the loop,
- // because empirically gcc generates better code this way.
- // However, we can't afford unrolling in debug mode, because the function
- // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
- // threads, which is not enough for the unrolled loop.
-#if SANITIZER_DEBUG
- for (int idx = 0; idx < 4; idx++) {
-#include "tsan_update_shadow_word_inl.h"
- }
-#else
- int idx = 0;
-#include "tsan_update_shadow_word_inl.h"
- idx = 1;
- if (stored) {
-#include "tsan_update_shadow_word_inl.h"
- } else {
-#include "tsan_update_shadow_word_inl.h"
- }
- idx = 2;
- if (stored) {
-#include "tsan_update_shadow_word_inl.h"
- } else {
-#include "tsan_update_shadow_word_inl.h"
- }
- idx = 3;
- if (stored) {
-#include "tsan_update_shadow_word_inl.h"
- } else {
-#include "tsan_update_shadow_word_inl.h"
- }
-#endif
-
- // we did not find any races and had already stored
- // the current access info, so we are done
- if (LIKELY(stored))
- return;
- // choose a random candidate slot and replace it
- StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
- return;
- RACE:
- HandleRace(thr, shadow_mem, cur, old);
- return;
-}
-
-void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr,
- int size, bool kAccessIsWrite, bool kIsAtomic) {
- while (size) {
- int size1 = 1;
- int kAccessSizeLog = kSizeLog1;
- if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
- size1 = 8;
- kAccessSizeLog = kSizeLog8;
- } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
- size1 = 4;
- kAccessSizeLog = kSizeLog4;
- } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
- size1 = 2;
- kAccessSizeLog = kSizeLog2;
- }
- MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
- addr += size1;
- size -= size1;
- }
-}
-
-ALWAYS_INLINE
-bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
- Shadow cur(a);
- for (uptr i = 0; i < kShadowCnt; i++) {
- Shadow old(LoadShadow(&s[i]));
- if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
- old.TidWithIgnore() == cur.TidWithIgnore() &&
- old.epoch() > sync_epoch &&
- old.IsAtomic() == cur.IsAtomic() &&
- old.IsRead() <= cur.IsRead())
- return true;
- }
- return false;
-}
-
-#if defined(__SSE3__)
-#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \
- _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \
- (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
-ALWAYS_INLINE
-bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
- // This is an optimized version of ContainsSameAccessSlow.
- // load current access into access[0:63]
- const m128 access = _mm_cvtsi64_si128(a);
- // duplicate high part of access in addr0:
- // addr0[0:31] = access[32:63]
- // addr0[32:63] = access[32:63]
- // addr0[64:95] = access[32:63]
- // addr0[96:127] = access[32:63]
- const m128 addr0 = SHUF(access, access, 1, 1, 1, 1);
- // load 4 shadow slots
- const m128 shadow0 = _mm_load_si128((__m128i*)s);
- const m128 shadow1 = _mm_load_si128((__m128i*)s + 1);
- // load high parts of 4 shadow slots into addr_vect:
- // addr_vect[0:31] = shadow0[32:63]
- // addr_vect[32:63] = shadow0[96:127]
- // addr_vect[64:95] = shadow1[32:63]
- // addr_vect[96:127] = shadow1[96:127]
- m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3);
- if (!is_write) {
- // set IsRead bit in addr_vect
- const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15);
- const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
- addr_vect = _mm_or_si128(addr_vect, rw_mask);
- }
- // addr0 == addr_vect?
- const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect);
- // epoch1[0:63] = sync_epoch
- const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch);
- // epoch[0:31] = sync_epoch[0:31]
- // epoch[32:63] = sync_epoch[0:31]
- // epoch[64:95] = sync_epoch[0:31]
- // epoch[96:127] = sync_epoch[0:31]
- const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0);
- // load low parts of shadow cell epochs into epoch_vect:
- // epoch_vect[0:31] = shadow0[0:31]
- // epoch_vect[32:63] = shadow0[64:95]
- // epoch_vect[64:95] = shadow1[0:31]
- // epoch_vect[96:127] = shadow1[64:95]
- const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
- // epoch_vect >= sync_epoch?
- const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch);
- // addr_res & epoch_res
- const m128 res = _mm_and_si128(addr_res, epoch_res);
- // mask[0] = res[7]
- // mask[1] = res[15]
- // ...
- // mask[15] = res[127]
- const int mask = _mm_movemask_epi8(res);
- return mask != 0;
-}
-#endif
-
-ALWAYS_INLINE
-bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
-#if defined(__SSE3__)
- bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
- // NOTE: this check can fail if the shadow is concurrently mutated
- // by other threads. But it still can be useful if you modify
- // ContainsSameAccessFast and want to ensure that it's not completely broken.
- // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
- return res;
-#else
- return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
-#endif
-}
-
-ALWAYS_INLINE USED
-void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
- int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) {
- u64 *shadow_mem = (u64*)MemToShadow(addr);
- DPrintf2("#%d: MemoryAccess: @%p %p size=%d"
- " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
- (int)thr->fast_state.tid(), (void*)pc, (void*)addr,
- (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
- (uptr)shadow_mem[0], (uptr)shadow_mem[1],
- (uptr)shadow_mem[2], (uptr)shadow_mem[3]);
-#if SANITIZER_DEBUG
- if (!IsAppMem(addr)) {
- Printf("Access to non app mem %zx\n", addr);
- DCHECK(IsAppMem(addr));
- }
- if (!IsShadowMem((uptr)shadow_mem)) {
- Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
- DCHECK(IsShadowMem((uptr)shadow_mem));
- }
-#endif
-
- if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
- // Access to .rodata section, no races here.
- // Measurements show that it can be 10-20% of all memory accesses.
- return;
- }
-
- FastState fast_state = thr->fast_state;
- if (UNLIKELY(fast_state.GetIgnoreBit())) {
- return;
- }
-
- Shadow cur(fast_state);
- cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
- cur.SetWrite(kAccessIsWrite);
- cur.SetAtomic(kIsAtomic);
-
- if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
- thr->fast_synch_epoch, kAccessIsWrite))) {
- return;
- }
-
- if (kCollectHistory) {
- fast_state.IncrementEpoch();
- thr->fast_state = fast_state;
- TraceAddEvent(thr, fast_state, EventTypeMop, pc);
- cur.IncrementEpoch();
- }
-
- MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
- shadow_mem, cur);
-}
-
-// Called by MemoryAccessRange in tsan_rtl_thread.cpp
-ALWAYS_INLINE USED
-void MemoryAccessImpl(ThreadState *thr, uptr addr,
- int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
- u64 *shadow_mem, Shadow cur) {
- if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
- thr->fast_synch_epoch, kAccessIsWrite))) {
- return;
- }
-
- MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
- shadow_mem, cur);
-}
-
-static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
- u64 val) {
- (void)thr;
- (void)pc;
- if (size == 0)
- return;
- // FIXME: fix me.
- uptr offset = addr % kShadowCell;
- if (offset) {
- offset = kShadowCell - offset;
- if (size <= offset)
- return;
- addr += offset;
- size -= offset;
- }
- DCHECK_EQ(addr % 8, 0);
- // If a user passes some insane arguments (memset(0)),
- // let it just crash as usual.
- if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
- return;
- // Don't want to touch lots of shadow memory.
- // If a program maps 10MB stack, there is no need reset the whole range.
- size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
- // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
- if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
- u64 *p = (u64*)MemToShadow(addr);
- CHECK(IsShadowMem((uptr)p));
- CHECK(IsShadowMem((uptr)(p + size * kShadowCnt / kShadowCell - 1)));
- // FIXME: may overwrite a part outside the region
- for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
- p[i++] = val;
- for (uptr j = 1; j < kShadowCnt; j++)
- p[i++] = 0;
- }
- } else {
- // The region is big, reset only beginning and end.
- const uptr kPageSize = GetPageSizeCached();
- u64 *begin = (u64*)MemToShadow(addr);
- u64 *end = begin + size / kShadowCell * kShadowCnt;
- u64 *p = begin;
- // Set at least first kPageSize/2 to page boundary.
- while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
- *p++ = val;
- for (uptr j = 1; j < kShadowCnt; j++)
- *p++ = 0;
- }
- // Reset middle part.
- u64 *p1 = p;
- p = RoundDown(end, kPageSize);
- if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
- Die();
- // Set the ending.
- while (p < end) {
- *p++ = val;
- for (uptr j = 1; j < kShadowCnt; j++)
- *p++ = 0;
- }
- }
-}
-
-void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
- MemoryRangeSet(thr, pc, addr, size, 0);
-}
-
-void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
- // Processing more than 1k (4k of shadow) is expensive,
- // can cause excessive memory consumption (user does not necessary touch
- // the whole range) and most likely unnecessary.
- if (size > 1024)
- size = 1024;
- CHECK_EQ(thr->is_freeing, false);
- thr->is_freeing = true;
- MemoryAccessRange(thr, pc, addr, size, true);
- thr->is_freeing = false;
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
- }
- Shadow s(thr->fast_state);
- s.ClearIgnoreBit();
- s.MarkAsFreed();
- s.SetWrite(true);
- s.SetAddr0AndSizeLog(0, 3);
- MemoryRangeSet(thr, pc, addr, size, s.raw());
-}
-
-void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
- }
- Shadow s(thr->fast_state);
- s.ClearIgnoreBit();
- s.SetWrite(true);
- s.SetAddr0AndSizeLog(0, 3);
- MemoryRangeSet(thr, pc, addr, size, s.raw());
-}
-
-void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
- uptr size) {
- if (thr->ignore_reads_and_writes == 0)
- MemoryRangeImitateWrite(thr, pc, addr, size);
- else
- MemoryResetRange(thr, pc, addr, size);
-}
-
-ALWAYS_INLINE USED
-void FuncEntry(ThreadState *thr, uptr pc) {
- DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void*)pc);
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
- }
-
- // Shadow stack maintenance can be replaced with
- // stack unwinding during trace switch (which presumably must be faster).
- DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
-#if !SANITIZER_GO
- DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
-#else
- if (thr->shadow_stack_pos == thr->shadow_stack_end)
- GrowShadowStack(thr);
-#endif
- thr->shadow_stack_pos[0] = pc;
- thr->shadow_stack_pos++;
-}
-
-ALWAYS_INLINE USED
-void FuncExit(ThreadState *thr) {
- DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
- if (kCollectHistory) {
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
- }
-
- DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
-#if !SANITIZER_GO
- DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
-#endif
- thr->shadow_stack_pos--;
-}
-
-void ThreadIgnoreBegin(ThreadState *thr, uptr pc, bool save_stack) {
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc) {
DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid);
thr->ignore_reads_and_writes++;
CHECK_GT(thr->ignore_reads_and_writes, 0);
thr->fast_state.SetIgnoreBit();
#if !SANITIZER_GO
- if (save_stack && !ctx->after_multithreaded_fork)
+ if (pc && !ctx->after_multithreaded_fork)
thr->mop_ignore_set.Add(CurrentStackId(thr, pc));
#endif
}
-void ThreadIgnoreEnd(ThreadState *thr, uptr pc) {
+void ThreadIgnoreEnd(ThreadState *thr) {
DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid);
CHECK_GT(thr->ignore_reads_and_writes, 0);
thr->ignore_reads_and_writes--;
@@ -1100,17 +709,17 @@ uptr __tsan_testonly_shadow_stack_current_size() {
}
#endif
-void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc, bool save_stack) {
+void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) {
DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid);
thr->ignore_sync++;
CHECK_GT(thr->ignore_sync, 0);
#if !SANITIZER_GO
- if (save_stack && !ctx->after_multithreaded_fork)
+ if (pc && !ctx->after_multithreaded_fork)
thr->sync_ignore_set.Add(CurrentStackId(thr, pc));
#endif
}
-void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc) {
+void ThreadIgnoreSyncEnd(ThreadState *thr) {
DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid);
CHECK_GT(thr->ignore_sync, 0);
thr->ignore_sync--;
@@ -1152,8 +761,3 @@ MutexMeta mutex_meta[] = {
void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); }
} // namespace __sanitizer
#endif
-
-#if !SANITIZER_GO
-// Must be included in this file to make sure everything is inlined.
-# include "tsan_interface_inl.h"
-#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 8567d0ade877..c71b27e1cbf5 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -37,14 +37,15 @@
#include "tsan_clock.h"
#include "tsan_defs.h"
#include "tsan_flags.h"
+#include "tsan_ignoreset.h"
#include "tsan_mman.h"
-#include "tsan_sync.h"
-#include "tsan_trace.h"
-#include "tsan_report.h"
-#include "tsan_platform.h"
#include "tsan_mutexset.h"
-#include "tsan_ignoreset.h"
+#include "tsan_platform.h"
+#include "tsan_report.h"
+#include "tsan_shadow.h"
#include "tsan_stack_trace.h"
+#include "tsan_sync.h"
+#include "tsan_trace.h"
#if SANITIZER_WORDSIZE != 64
# error "ThreadSanitizer is supported only on 64-bit platforms"
@@ -69,6 +70,11 @@ struct AP32 {
typedef SizeClassAllocator32<AP32> PrimaryAllocator;
#else
struct AP64 { // Allocator64 parameters. Deliberately using a short name.
+# if defined(__s390x__)
+ typedef MappingS390x Mapping;
+# else
+ typedef Mapping48AddressSpace Mapping;
+# endif
static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
static const uptr kSpaceSize = Mapping::kHeapMemEnd - Mapping::kHeapMemBeg;
static const uptr kMetadataSize = 0;
@@ -84,240 +90,6 @@ typedef Allocator::AllocatorCache AllocatorCache;
Allocator *allocator();
#endif
-const u64 kShadowRodata = (u64)-1; // .rodata shadow marker
-
-// FastState (from most significant bit):
-// ignore : 1
-// tid : kTidBits
-// unused : -
-// history_size : 3
-// epoch : kClkBits
-class FastState {
- public:
- FastState(u64 tid, u64 epoch) {
- x_ = tid << kTidShift;
- x_ |= epoch;
- DCHECK_EQ(tid, this->tid());
- DCHECK_EQ(epoch, this->epoch());
- DCHECK_EQ(GetIgnoreBit(), false);
- }
-
- explicit FastState(u64 x)
- : x_(x) {
- }
-
- u64 raw() const {
- return x_;
- }
-
- u64 tid() const {
- u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
- return res;
- }
-
- u64 TidWithIgnore() const {
- u64 res = x_ >> kTidShift;
- return res;
- }
-
- u64 epoch() const {
- u64 res = x_ & ((1ull << kClkBits) - 1);
- return res;
- }
-
- void IncrementEpoch() {
- u64 old_epoch = epoch();
- x_ += 1;
- DCHECK_EQ(old_epoch + 1, epoch());
- (void)old_epoch;
- }
-
- void SetIgnoreBit() { x_ |= kIgnoreBit; }
- void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
- bool GetIgnoreBit() const { return (s64)x_ < 0; }
-
- void SetHistorySize(int hs) {
- CHECK_GE(hs, 0);
- CHECK_LE(hs, 7);
- x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
- }
-
- ALWAYS_INLINE
- int GetHistorySize() const {
- return (int)((x_ >> kHistoryShift) & kHistoryMask);
- }
-
- void ClearHistorySize() {
- SetHistorySize(0);
- }
-
- ALWAYS_INLINE
- u64 GetTracePos() const {
- const int hs = GetHistorySize();
- // When hs == 0, the trace consists of 2 parts.
- const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
- return epoch() & mask;
- }
-
- private:
- friend class Shadow;
- static const int kTidShift = 64 - kTidBits - 1;
- static const u64 kIgnoreBit = 1ull << 63;
- static const u64 kFreedBit = 1ull << 63;
- static const u64 kHistoryShift = kClkBits;
- static const u64 kHistoryMask = 7;
- u64 x_;
-};
-
-// Shadow (from most significant bit):
-// freed : 1
-// tid : kTidBits
-// is_atomic : 1
-// is_read : 1
-// size_log : 2
-// addr0 : 3
-// epoch : kClkBits
-class Shadow : public FastState {
- public:
- explicit Shadow(u64 x)
- : FastState(x) {
- }
-
- explicit Shadow(const FastState &s)
- : FastState(s.x_) {
- ClearHistorySize();
- }
-
- void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
- DCHECK_EQ((x_ >> kClkBits) & 31, 0);
- DCHECK_LE(addr0, 7);
- DCHECK_LE(kAccessSizeLog, 3);
- x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
- DCHECK_EQ(kAccessSizeLog, size_log());
- DCHECK_EQ(addr0, this->addr0());
- }
-
- void SetWrite(unsigned kAccessIsWrite) {
- DCHECK_EQ(x_ & kReadBit, 0);
- if (!kAccessIsWrite)
- x_ |= kReadBit;
- DCHECK_EQ(kAccessIsWrite, IsWrite());
- }
-
- void SetAtomic(bool kIsAtomic) {
- DCHECK(!IsAtomic());
- if (kIsAtomic)
- x_ |= kAtomicBit;
- DCHECK_EQ(IsAtomic(), kIsAtomic);
- }
-
- bool IsAtomic() const {
- return x_ & kAtomicBit;
- }
-
- bool IsZero() const {
- return x_ == 0;
- }
-
- static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
- u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
- DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
- return shifted_xor == 0;
- }
-
- static ALWAYS_INLINE
- bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
- u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
- return masked_xor == 0;
- }
-
- static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
- unsigned kS2AccessSize) {
- bool res = false;
- u64 diff = s1.addr0() - s2.addr0();
- if ((s64)diff < 0) { // s1.addr0 < s2.addr0
- // if (s1.addr0() + size1) > s2.addr0()) return true;
- if (s1.size() > -diff)
- res = true;
- } else {
- // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
- if (kS2AccessSize > diff)
- res = true;
- }
- DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
- DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
- return res;
- }
-
- u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
- u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
- bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
- bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
-
- // The idea behind the freed bit is as follows.
- // When the memory is freed (or otherwise unaccessible) we write to the shadow
- // values with tid/epoch related to the free and the freed bit set.
- // During memory accesses processing the freed bit is considered
- // as msb of tid. So any access races with shadow with freed bit set
- // (it is as if write from a thread with which we never synchronized before).
- // This allows us to detect accesses to freed memory w/o additional
- // overheads in memory access processing and at the same time restore
- // tid/epoch of free.
- void MarkAsFreed() {
- x_ |= kFreedBit;
- }
-
- bool IsFreed() const {
- return x_ & kFreedBit;
- }
-
- bool GetFreedAndReset() {
- bool res = x_ & kFreedBit;
- x_ &= ~kFreedBit;
- return res;
- }
-
- bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
- bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift)
- | (u64(kIsAtomic) << kAtomicShift));
- DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
- return v;
- }
-
- bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
- bool v = ((x_ >> kReadShift) & 3)
- <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
- DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
- (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
- return v;
- }
-
- bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
- bool v = ((x_ >> kReadShift) & 3)
- >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
- DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
- (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
- return v;
- }
-
- private:
- static const u64 kReadShift = 5 + kClkBits;
- static const u64 kReadBit = 1ull << kReadShift;
- static const u64 kAtomicShift = 6 + kClkBits;
- static const u64 kAtomicBit = 1ull << kAtomicShift;
-
- u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
-
- static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
- if (s1.addr0() == s2.addr0()) return true;
- if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
- return true;
- if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
- return true;
- return false;
- }
-};
-
struct ThreadSignalContext;
struct JmpBuf {
@@ -380,6 +152,7 @@ struct ThreadState {
// We do not distinguish beteween ignoring reads and writes
// for better performance.
int ignore_reads_and_writes;
+ atomic_sint32_t pending_signals;
int ignore_sync;
int suppress_reports;
// Go does not support ignores.
@@ -387,20 +160,18 @@ struct ThreadState {
IgnoreSet mop_ignore_set;
IgnoreSet sync_ignore_set;
#endif
- // C/C++ uses fixed size shadow stack embed into Trace.
- // Go uses malloc-allocated shadow stack with dynamic size.
uptr *shadow_stack;
uptr *shadow_stack_end;
uptr *shadow_stack_pos;
- u64 *racy_shadow_addr;
- u64 racy_state[2];
+ RawShadow *racy_shadow_addr;
+ RawShadow racy_state[2];
MutexSet mset;
ThreadClock clock;
#if !SANITIZER_GO
Vector<JmpBuf> jmp_bufs;
int ignore_interceptors;
#endif
- const u32 tid;
+ const Tid tid;
const int unique_id;
bool in_symbolizer;
bool in_ignored_lib;
@@ -428,7 +199,7 @@ struct ThreadState {
ThreadSignalContext *signal_ctx;
#if !SANITIZER_GO
- u32 last_sleep_stack_id;
+ StackID last_sleep_stack_id;
ThreadClock last_sleep_clock;
#endif
@@ -438,41 +209,49 @@ struct ThreadState {
const ReportDesc *current_report;
- explicit ThreadState(Context *ctx, u32 tid, int unique_id, u64 epoch,
+ // Current position in tctx->trace.Back()->events (Event*).
+ atomic_uintptr_t trace_pos;
+ // PC of the last memory access, used to compute PC deltas in the trace.
+ uptr trace_prev_pc;
+ Sid sid;
+ Epoch epoch;
+
+ explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
unsigned reuse_count, uptr stk_addr, uptr stk_size,
uptr tls_addr, uptr tls_size);
-};
+} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
#if !SANITIZER_GO
#if SANITIZER_MAC || SANITIZER_ANDROID
ThreadState *cur_thread();
void set_cur_thread(ThreadState *thr);
void cur_thread_finalize();
-inline void cur_thread_init() { }
-#else
+inline ThreadState *cur_thread_init() { return cur_thread(); }
+# else
__attribute__((tls_model("initial-exec")))
extern THREADLOCAL char cur_thread_placeholder[];
inline ThreadState *cur_thread() {
return reinterpret_cast<ThreadState *>(cur_thread_placeholder)->current;
}
-inline void cur_thread_init() {
+inline ThreadState *cur_thread_init() {
ThreadState *thr = reinterpret_cast<ThreadState *>(cur_thread_placeholder);
if (UNLIKELY(!thr->current))
thr->current = thr;
+ return thr->current;
}
inline void set_cur_thread(ThreadState *thr) {
reinterpret_cast<ThreadState *>(cur_thread_placeholder)->current = thr;
}
inline void cur_thread_finalize() { }
-#endif // SANITIZER_MAC || SANITIZER_ANDROID
+# endif // SANITIZER_MAC || SANITIZER_ANDROID
#endif // SANITIZER_GO
class ThreadContext final : public ThreadContextBase {
public:
- explicit ThreadContext(int tid);
+ explicit ThreadContext(Tid tid);
~ThreadContext();
ThreadState *thr;
- u32 creation_stack_id;
+ StackID creation_stack_id;
SyncClock sync;
// Epoch at which the thread had started.
// If we see an event from the thread stamped by an older epoch,
@@ -480,6 +259,8 @@ class ThreadContext final : public ThreadContextBase {
u64 epoch0;
u64 epoch1;
+ v3::Trace trace;
+
// Override superclass callbacks.
void OnDead() override;
void OnJoined(void *arg) override;
@@ -492,13 +273,7 @@ class ThreadContext final : public ThreadContextBase {
struct RacyStacks {
MD5Hash hash[2];
- bool operator==(const RacyStacks &other) const {
- if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
- return true;
- if (hash[0] == other.hash[1] && hash[1] == other.hash[0])
- return true;
- return false;
- }
+ bool operator==(const RacyStacks &other) const;
};
struct RacyAddress {
@@ -524,13 +299,12 @@ struct Context {
Mutex report_mtx;
int nreported;
- int nmissed_expected;
atomic_uint64_t last_symbolize_time_ns;
void *background_thread;
atomic_uint32_t stop_background_thread;
- ThreadRegistry *thread_registry;
+ ThreadRegistry thread_registry;
Mutex racy_mtx;
Vector<RacyStacks> racy_stacks;
@@ -543,9 +317,9 @@ struct Context {
ClockAlloc clock_alloc;
Flags flags;
+ fd_t memprof_fd;
- u64 int_alloc_cnt[MBlockTypeCount];
- u64 int_alloc_siz[MBlockTypeCount];
+ Mutex slot_mtx;
};
extern Context *ctx; // The one and the only global runtime context.
@@ -578,12 +352,12 @@ class ScopedReportBase {
const MutexSet *mset);
void AddStack(StackTrace stack, bool suppressable = false);
void AddThread(const ThreadContext *tctx, bool suppressable = false);
- void AddThread(int unique_tid, bool suppressable = false);
- void AddUniqueTid(int unique_tid);
+ void AddThread(Tid unique_tid, bool suppressable = false);
+ void AddUniqueTid(Tid unique_tid);
void AddMutex(const SyncVar *s);
u64 AddMutex(u64 id);
void AddLocation(uptr addr, uptr size);
- void AddSleep(u32 stack_id);
+ void AddSleep(StackID stack_id);
void SetCount(int count);
const ReportDesc *GetReport() const;
@@ -615,7 +389,7 @@ class ScopedReport : public ScopedReportBase {
bool ShouldReport(ThreadState *thr, ReportType typ);
ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
-void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
+void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
MutexSet *mset, uptr *tag = nullptr);
// The stack could look like:
@@ -662,13 +436,12 @@ void InitializeDynamicAnnotations();
void ForkBefore(ThreadState *thr, uptr pc);
void ForkParentAfter(ThreadState *thr, uptr pc);
-void ForkChildAfter(ThreadState *thr, uptr pc);
+void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread);
void ReportRace(ThreadState *thr);
bool OutputReport(ThreadState *thr, const ScopedReport &srep);
bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
bool IsExpectedReport(uptr addr, uptr size);
-void PrintMatchedBenignRaces();
#if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
# define DPrintf Printf
@@ -682,10 +455,11 @@ void PrintMatchedBenignRaces();
# define DPrintf2(...)
#endif
-u32 CurrentStackId(ThreadState *thr, uptr pc);
-ReportStack *SymbolizeStackId(u32 stack_id);
+StackID CurrentStackId(ThreadState *thr, uptr pc);
+ReportStack *SymbolizeStackId(StackID stack_id);
void PrintCurrentStack(ThreadState *thr, uptr pc);
void PrintCurrentStackSlow(uptr pc); // uses libunwind
+MBlock *JavaHeapBlock(uptr addr, uptr *start);
void Initialize(ThreadState *thr);
void MaybeSpawnBackgroundThread();
@@ -701,34 +475,44 @@ void MemoryAccessImpl(ThreadState *thr, uptr addr,
u64 *shadow_mem, Shadow cur);
void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
uptr size, bool is_write);
-void MemoryAccessRangeStep(ThreadState *thr, uptr pc, uptr addr,
- uptr size, uptr step, bool is_write);
-void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr,
- int size, bool kAccessIsWrite, bool kIsAtomic);
+void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ);
const int kSizeLog1 = 0;
const int kSizeLog2 = 1;
const int kSizeLog4 = 2;
const int kSizeLog8 = 3;
-void ALWAYS_INLINE MemoryRead(ThreadState *thr, uptr pc,
- uptr addr, int kAccessSizeLog) {
- MemoryAccess(thr, pc, addr, kAccessSizeLog, false, false);
-}
-
-void ALWAYS_INLINE MemoryWrite(ThreadState *thr, uptr pc,
- uptr addr, int kAccessSizeLog) {
- MemoryAccess(thr, pc, addr, kAccessSizeLog, true, false);
-}
-
-void ALWAYS_INLINE MemoryReadAtomic(ThreadState *thr, uptr pc,
- uptr addr, int kAccessSizeLog) {
- MemoryAccess(thr, pc, addr, kAccessSizeLog, false, true);
-}
-
-void ALWAYS_INLINE MemoryWriteAtomic(ThreadState *thr, uptr pc,
- uptr addr, int kAccessSizeLog) {
- MemoryAccess(thr, pc, addr, kAccessSizeLog, true, true);
+ALWAYS_INLINE
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ) {
+ int size_log;
+ switch (size) {
+ case 1:
+ size_log = kSizeLog1;
+ break;
+ case 2:
+ size_log = kSizeLog2;
+ break;
+ case 4:
+ size_log = kSizeLog4;
+ break;
+ default:
+ DCHECK_EQ(size, 8);
+ size_log = kSizeLog8;
+ break;
+ }
+ bool is_write = !(typ & kAccessRead);
+ bool is_atomic = typ & kAccessAtomic;
+ if (typ & kAccessVptr)
+ thr->is_vptr_access = true;
+ if (typ & kAccessFree)
+ thr->is_freeing = true;
+ MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic);
+ if (typ & kAccessVptr)
+ thr->is_vptr_access = false;
+ if (typ & kAccessFree)
+ thr->is_freeing = false;
}
void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
@@ -737,26 +521,26 @@ void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
uptr size);
-void ThreadIgnoreBegin(ThreadState *thr, uptr pc, bool save_stack = true);
-void ThreadIgnoreEnd(ThreadState *thr, uptr pc);
-void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc, bool save_stack = true);
-void ThreadIgnoreSyncEnd(ThreadState *thr, uptr pc);
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc);
+void ThreadIgnoreEnd(ThreadState *thr);
+void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
+void ThreadIgnoreSyncEnd(ThreadState *thr);
void FuncEntry(ThreadState *thr, uptr pc);
void FuncExit(ThreadState *thr);
-int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
-void ThreadStart(ThreadState *thr, int tid, tid_t os_id,
+Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
+void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadType thread_type);
void ThreadFinish(ThreadState *thr);
-int ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid);
-void ThreadJoin(ThreadState *thr, uptr pc, int tid);
-void ThreadDetach(ThreadState *thr, uptr pc, int tid);
+Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid);
+void ThreadJoin(ThreadState *thr, uptr pc, Tid tid);
+void ThreadDetach(ThreadState *thr, uptr pc, Tid tid);
void ThreadFinalize(ThreadState *thr);
void ThreadSetName(ThreadState *thr, const char *name);
int ThreadCount(ThreadState *thr);
-void ProcessPendingSignals(ThreadState *thr);
-void ThreadNotJoined(ThreadState *thr, uptr pc, int tid, uptr uid);
+void ProcessPendingSignalsImpl(ThreadState *thr);
+void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid);
Processor *ProcCreate();
void ProcDestroy(Processor *proc);
@@ -785,7 +569,7 @@ void Acquire(ThreadState *thr, uptr pc, uptr addr);
// handle Go finalizers. Namely, finalizer goroutine executes AcquireGlobal
// right before executing finalizers. This provides a coarse, but simple
// approximation of the actual required synchronization.
-void AcquireGlobal(ThreadState *thr, uptr pc);
+void AcquireGlobal(ThreadState *thr);
void Release(ThreadState *thr, uptr pc, uptr addr);
void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr);
void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
@@ -821,13 +605,16 @@ void TraceSwitch(ThreadState *thr);
uptr TraceTopPC(ThreadState *thr);
uptr TraceSize();
uptr TraceParts();
-Trace *ThreadTrace(int tid);
+Trace *ThreadTrace(Tid tid);
extern "C" void __tsan_trace_switch();
void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
EventType typ, u64 addr) {
if (!kCollectHistory)
return;
+ // TraceSwitch accesses shadow_stack, but it's called infrequently,
+ // so we check it here proactively.
+ DCHECK(thr->shadow_stack);
DCHECK_GE((int)typ, 0);
DCHECK_LE((int)typ, 7);
DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
@@ -861,6 +648,149 @@ enum FiberSwitchFlags {
FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync
};
+ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) {
+ if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals)))
+ ProcessPendingSignalsImpl(thr);
+}
+
+extern bool is_initialized;
+
+ALWAYS_INLINE
+void LazyInitialize(ThreadState *thr) {
+ // If we can use .preinit_array, assume that __tsan_init
+ // called from .preinit_array initializes runtime before
+ // any instrumented code.
+#if !SANITIZER_CAN_USE_PREINIT_ARRAY
+ if (UNLIKELY(!is_initialized))
+ Initialize(thr);
+#endif
+}
+
+namespace v3 {
+
+void TraceSwitchPart(ThreadState *thr);
+bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
+ uptr size, AccessType typ, VarSizeStackTrace *pstk,
+ MutexSet *pmset, uptr *ptag);
+
+template <typename EventT>
+ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr,
+ EventT **ev) {
+ Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
+#if SANITIZER_DEBUG
+ // TraceSwitch acquires these mutexes,
+ // so we lock them here to detect deadlocks more reliably.
+ { Lock lock(&ctx->slot_mtx); }
+ { Lock lock(&thr->tctx->trace.mtx); }
+ TracePart *current = thr->tctx->trace.parts.Back();
+ if (current) {
+ DCHECK_GE(pos, &current->events[0]);
+ DCHECK_LE(pos, &current->events[TracePart::kSize]);
+ } else {
+ DCHECK_EQ(pos, nullptr);
+ }
+#endif
+ // TracePart is allocated with mmap and is at least 4K aligned.
+ // So the following check is a faster way to check for part end.
+ // It may have false positives in the middle of the trace,
+ // they are filtered out in TraceSwitch.
+ if (UNLIKELY(((uptr)(pos + 1) & TracePart::kAlignment) == 0))
+ return false;
+ *ev = reinterpret_cast<EventT *>(pos);
+ return true;
+}
+
+template <typename EventT>
+ALWAYS_INLINE void TraceRelease(ThreadState *thr, EventT *evp) {
+ DCHECK_LE(evp + 1, &thr->tctx->trace.parts.Back()->events[TracePart::kSize]);
+ atomic_store_relaxed(&thr->trace_pos, (uptr)(evp + 1));
+}
+
+template <typename EventT>
+void TraceEvent(ThreadState *thr, EventT ev) {
+ EventT *evp;
+ if (!TraceAcquire(thr, &evp)) {
+ TraceSwitchPart(thr);
+ UNUSED bool res = TraceAcquire(thr, &evp);
+ DCHECK(res);
+ }
+ *evp = ev;
+ TraceRelease(thr, evp);
+}
+
+ALWAYS_INLINE WARN_UNUSED_RESULT bool TryTraceFunc(ThreadState *thr,
+ uptr pc = 0) {
+ if (!kCollectHistory)
+ return true;
+ EventFunc *ev;
+ if (UNLIKELY(!TraceAcquire(thr, &ev)))
+ return false;
+ ev->is_access = 0;
+ ev->is_func = 1;
+ ev->pc = pc;
+ TraceRelease(thr, ev);
+ return true;
+}
+
+WARN_UNUSED_RESULT
+bool TryTraceMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ);
+WARN_UNUSED_RESULT
+bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ);
+void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ);
+void TraceFunc(ThreadState *thr, uptr pc = 0);
+void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
+ StackID stk);
+void TraceMutexUnlock(ThreadState *thr, uptr addr);
+void TraceTime(ThreadState *thr);
+
+} // namespace v3
+
+void GrowShadowStack(ThreadState *thr);
+
+ALWAYS_INLINE
+void FuncEntry(ThreadState *thr, uptr pc) {
+ DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc);
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
+ }
+
+ // Shadow stack maintenance can be replaced with
+ // stack unwinding during trace switch (which presumably must be faster).
+ DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
+#if !SANITIZER_GO
+ DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#else
+ if (thr->shadow_stack_pos == thr->shadow_stack_end)
+ GrowShadowStack(thr);
+#endif
+ thr->shadow_stack_pos[0] = pc;
+ thr->shadow_stack_pos++;
+}
+
+ALWAYS_INLINE
+void FuncExit(ThreadState *thr) {
+ DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
+ }
+
+ DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
+#if !SANITIZER_GO
+ DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#endif
+ thr->shadow_stack_pos--;
+}
+
+#if !SANITIZER_GO
+extern void (*on_initialize)(void);
+extern int (*on_finalize)(int);
+#endif
+
} // namespace __tsan
#endif // TSAN_RTL_H
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
new file mode 100644
index 000000000000..7365fdaa3038
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
@@ -0,0 +1,604 @@
+//===-- tsan_rtl_access.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Definitions of memory access and function entry/exit entry points.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+namespace v3 {
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
+ uptr addr, uptr size,
+ AccessType typ) {
+ DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
+ if (!kCollectHistory)
+ return true;
+ EventAccess *ev;
+ if (UNLIKELY(!TraceAcquire(thr, &ev)))
+ return false;
+ u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
+ uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
+ thr->trace_prev_pc = pc;
+ if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
+ ev->is_access = 1;
+ ev->is_read = !!(typ & kAccessRead);
+ ev->is_atomic = !!(typ & kAccessAtomic);
+ ev->size_log = size_log;
+ ev->pc_delta = pc_delta;
+ DCHECK_EQ(ev->pc_delta, pc_delta);
+ ev->addr = CompressAddr(addr);
+ TraceRelease(thr, ev);
+ return true;
+ }
+ auto *evex = reinterpret_cast<EventAccessExt *>(ev);
+ evex->is_access = 0;
+ evex->is_func = 0;
+ evex->type = EventType::kAccessExt;
+ evex->is_read = !!(typ & kAccessRead);
+ evex->is_atomic = !!(typ & kAccessAtomic);
+ evex->size_log = size_log;
+ evex->addr = CompressAddr(addr);
+ evex->pc = pc;
+ TraceRelease(thr, evex);
+ return true;
+}
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
+ uptr addr, uptr size,
+ AccessType typ) {
+ if (!kCollectHistory)
+ return true;
+ EventAccessRange *ev;
+ if (UNLIKELY(!TraceAcquire(thr, &ev)))
+ return false;
+ thr->trace_prev_pc = pc;
+ ev->is_access = 0;
+ ev->is_func = 0;
+ ev->type = EventType::kAccessRange;
+ ev->is_read = !!(typ & kAccessRead);
+ ev->is_free = !!(typ & kAccessFree);
+ ev->size_lo = size;
+ ev->pc = CompressAddr(pc);
+ ev->addr = CompressAddr(addr);
+ ev->size_hi = size >> EventAccessRange::kSizeLoBits;
+ TraceRelease(thr, ev);
+ return true;
+}
+
+void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ) {
+ if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
+ return;
+ TraceSwitchPart(thr);
+ UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
+ DCHECK(res);
+}
+
+void TraceFunc(ThreadState *thr, uptr pc) {
+ if (LIKELY(TryTraceFunc(thr, pc)))
+ return;
+ TraceSwitchPart(thr);
+ UNUSED bool res = TryTraceFunc(thr, pc);
+ DCHECK(res);
+}
+
+void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
+ StackID stk) {
+ DCHECK(type == EventType::kLock || type == EventType::kRLock);
+ if (!kCollectHistory)
+ return;
+ EventLock ev;
+ ev.is_access = 0;
+ ev.is_func = 0;
+ ev.type = type;
+ ev.pc = CompressAddr(pc);
+ ev.stack_lo = stk;
+ ev.stack_hi = stk >> EventLock::kStackIDLoBits;
+ ev._ = 0;
+ ev.addr = CompressAddr(addr);
+ TraceEvent(thr, ev);
+}
+
+void TraceMutexUnlock(ThreadState *thr, uptr addr) {
+ if (!kCollectHistory)
+ return;
+ EventUnlock ev;
+ ev.is_access = 0;
+ ev.is_func = 0;
+ ev.type = EventType::kUnlock;
+ ev._ = 0;
+ ev.addr = CompressAddr(addr);
+ TraceEvent(thr, ev);
+}
+
+void TraceTime(ThreadState *thr) {
+ if (!kCollectHistory)
+ return;
+ EventTime ev;
+ ev.is_access = 0;
+ ev.is_func = 0;
+ ev.type = EventType::kTime;
+ ev.sid = static_cast<u64>(thr->sid);
+ ev.epoch = static_cast<u64>(thr->epoch);
+ ev._ = 0;
+ TraceEvent(thr, ev);
+}
+
+} // namespace v3
+
+ALWAYS_INLINE
+Shadow LoadShadow(u64 *p) {
+ u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed);
+ return Shadow(raw);
+}
+
+ALWAYS_INLINE
+void StoreShadow(u64 *sp, u64 s) {
+ atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed);
+}
+
+ALWAYS_INLINE
+void StoreIfNotYetStored(u64 *sp, u64 *s) {
+ StoreShadow(sp, *s);
+ *s = 0;
+}
+
+extern "C" void __tsan_report_race();
+
+ALWAYS_INLINE
+void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) {
+ thr->racy_state[0] = cur.raw();
+ thr->racy_state[1] = old.raw();
+ thr->racy_shadow_addr = shadow_mem;
+#if !SANITIZER_GO
+ HACKY_CALL(__tsan_report_race);
+#else
+ ReportRace(thr);
+#endif
+}
+
+static inline bool HappensBefore(Shadow old, ThreadState *thr) {
+ return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
+}
+
+ALWAYS_INLINE
+void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog,
+ bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem,
+ Shadow cur) {
+ // This potentially can live in an MMX/SSE scratch register.
+ // The required intrinsics are:
+ // __m128i _mm_move_epi64(__m128i*);
+ // _mm_storel_epi64(u64*, __m128i);
+ u64 store_word = cur.raw();
+ bool stored = false;
+
+ // scan all the shadow values and dispatch to 4 categories:
+ // same, replace, candidate and race (see comments below).
+ // we consider only 3 cases regarding access sizes:
+ // equal, intersect and not intersect. initially I considered
+ // larger and smaller as well, it allowed to replace some
+ // 'candidates' with 'same' or 'replace', but I think
+ // it's just not worth it (performance- and complexity-wise).
+
+ Shadow old(0);
+
+ // It release mode we manually unroll the loop,
+ // because empirically gcc generates better code this way.
+ // However, we can't afford unrolling in debug mode, because the function
+ // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
+ // threads, which is not enough for the unrolled loop.
+#if SANITIZER_DEBUG
+ for (int idx = 0; idx < 4; idx++) {
+# include "tsan_update_shadow_word.inc"
+ }
+#else
+ int idx = 0;
+# include "tsan_update_shadow_word.inc"
+ idx = 1;
+ if (stored) {
+# include "tsan_update_shadow_word.inc"
+ } else {
+# include "tsan_update_shadow_word.inc"
+ }
+ idx = 2;
+ if (stored) {
+# include "tsan_update_shadow_word.inc"
+ } else {
+# include "tsan_update_shadow_word.inc"
+ }
+ idx = 3;
+ if (stored) {
+# include "tsan_update_shadow_word.inc"
+ } else {
+# include "tsan_update_shadow_word.inc"
+ }
+#endif
+
+ // we did not find any races and had already stored
+ // the current access info, so we are done
+ if (LIKELY(stored))
+ return;
+ // choose a random candidate slot and replace it
+ StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
+ return;
+RACE:
+ HandleRace(thr, shadow_mem, cur, old);
+ return;
+}
+
+void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ) {
+ DCHECK(!(typ & kAccessAtomic));
+ const bool kAccessIsWrite = !(typ & kAccessRead);
+ const bool kIsAtomic = false;
+ while (size) {
+ int size1 = 1;
+ int kAccessSizeLog = kSizeLog1;
+ if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
+ size1 = 8;
+ kAccessSizeLog = kSizeLog8;
+ } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
+ size1 = 4;
+ kAccessSizeLog = kSizeLog4;
+ } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
+ size1 = 2;
+ kAccessSizeLog = kSizeLog2;
+ }
+ MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
+ addr += size1;
+ size -= size1;
+ }
+}
+
+ALWAYS_INLINE
+bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+ Shadow cur(a);
+ for (uptr i = 0; i < kShadowCnt; i++) {
+ Shadow old(LoadShadow(&s[i]));
+ if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
+ old.TidWithIgnore() == cur.TidWithIgnore() &&
+ old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() &&
+ old.IsRead() <= cur.IsRead())
+ return true;
+ }
+ return false;
+}
+
+#if TSAN_VECTORIZE
+# define SHUF(v0, v1, i0, i1, i2, i3) \
+ _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
+ _mm_castsi128_ps(v1), \
+ (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
+ALWAYS_INLINE
+bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+ // This is an optimized version of ContainsSameAccessSlow.
+ // load current access into access[0:63]
+ const m128 access = _mm_cvtsi64_si128(a);
+ // duplicate high part of access in addr0:
+ // addr0[0:31] = access[32:63]
+ // addr0[32:63] = access[32:63]
+ // addr0[64:95] = access[32:63]
+ // addr0[96:127] = access[32:63]
+ const m128 addr0 = SHUF(access, access, 1, 1, 1, 1);
+ // load 4 shadow slots
+ const m128 shadow0 = _mm_load_si128((__m128i *)s);
+ const m128 shadow1 = _mm_load_si128((__m128i *)s + 1);
+ // load high parts of 4 shadow slots into addr_vect:
+ // addr_vect[0:31] = shadow0[32:63]
+ // addr_vect[32:63] = shadow0[96:127]
+ // addr_vect[64:95] = shadow1[32:63]
+ // addr_vect[96:127] = shadow1[96:127]
+ m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3);
+ if (!is_write) {
+ // set IsRead bit in addr_vect
+ const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15);
+ const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
+ addr_vect = _mm_or_si128(addr_vect, rw_mask);
+ }
+ // addr0 == addr_vect?
+ const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect);
+ // epoch1[0:63] = sync_epoch
+ const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch);
+ // epoch[0:31] = sync_epoch[0:31]
+ // epoch[32:63] = sync_epoch[0:31]
+ // epoch[64:95] = sync_epoch[0:31]
+ // epoch[96:127] = sync_epoch[0:31]
+ const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0);
+ // load low parts of shadow cell epochs into epoch_vect:
+ // epoch_vect[0:31] = shadow0[0:31]
+ // epoch_vect[32:63] = shadow0[64:95]
+ // epoch_vect[64:95] = shadow1[0:31]
+ // epoch_vect[96:127] = shadow1[64:95]
+ const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
+ // epoch_vect >= sync_epoch?
+ const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch);
+ // addr_res & epoch_res
+ const m128 res = _mm_and_si128(addr_res, epoch_res);
+ // mask[0] = res[7]
+ // mask[1] = res[15]
+ // ...
+ // mask[15] = res[127]
+ const int mask = _mm_movemask_epi8(res);
+ return mask != 0;
+}
+#endif
+
+ALWAYS_INLINE
+bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+#if TSAN_VECTORIZE
+ bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
+ // NOTE: this check can fail if the shadow is concurrently mutated
+ // by other threads. But it still can be useful if you modify
+ // ContainsSameAccessFast and want to ensure that it's not completely broken.
+ // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
+ return res;
+#else
+ return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+#endif
+}
+
+ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+ int kAccessSizeLog, bool kAccessIsWrite,
+ bool kIsAtomic) {
+ RawShadow *shadow_mem = MemToShadow(addr);
+ DPrintf2(
+ "#%d: MemoryAccess: @%p %p size=%d"
+ " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
+ (int)thr->fast_state.tid(), (void *)pc, (void *)addr,
+ (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
+ (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2],
+ (uptr)shadow_mem[3]);
+#if SANITIZER_DEBUG
+ if (!IsAppMem(addr)) {
+ Printf("Access to non app mem %zx\n", addr);
+ DCHECK(IsAppMem(addr));
+ }
+ if (!IsShadowMem(shadow_mem)) {
+ Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+ DCHECK(IsShadowMem(shadow_mem));
+ }
+#endif
+
+ if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
+ // Access to .rodata section, no races here.
+ // Measurements show that it can be 10-20% of all memory accesses.
+ return;
+ }
+
+ FastState fast_state = thr->fast_state;
+ if (UNLIKELY(fast_state.GetIgnoreBit())) {
+ return;
+ }
+
+ Shadow cur(fast_state);
+ cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
+ cur.SetWrite(kAccessIsWrite);
+ cur.SetAtomic(kIsAtomic);
+
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+ kAccessIsWrite))) {
+ return;
+ }
+
+ if (kCollectHistory) {
+ fast_state.IncrementEpoch();
+ thr->fast_state = fast_state;
+ TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+ cur.IncrementEpoch();
+ }
+
+ MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+ shadow_mem, cur);
+}
+
+// Called by MemoryAccessRange in tsan_rtl_thread.cpp
+ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr,
+ int kAccessSizeLog,
+ bool kAccessIsWrite, bool kIsAtomic,
+ u64 *shadow_mem, Shadow cur) {
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+ kAccessIsWrite))) {
+ return;
+ }
+
+ MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+ shadow_mem, cur);
+}
+
+static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ u64 val) {
+ (void)thr;
+ (void)pc;
+ if (size == 0)
+ return;
+ // FIXME: fix me.
+ uptr offset = addr % kShadowCell;
+ if (offset) {
+ offset = kShadowCell - offset;
+ if (size <= offset)
+ return;
+ addr += offset;
+ size -= offset;
+ }
+ DCHECK_EQ(addr % 8, 0);
+ // If a user passes some insane arguments (memset(0)),
+ // let it just crash as usual.
+ if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
+ return;
+ // Don't want to touch lots of shadow memory.
+ // If a program maps 10MB stack, there is no need reset the whole range.
+ size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
+ // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
+ if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
+ RawShadow *p = MemToShadow(addr);
+ CHECK(IsShadowMem(p));
+ CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1));
+ // FIXME: may overwrite a part outside the region
+ for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
+ p[i++] = val;
+ for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0;
+ }
+ } else {
+ // The region is big, reset only beginning and end.
+ const uptr kPageSize = GetPageSizeCached();
+ RawShadow *begin = MemToShadow(addr);
+ RawShadow *end = begin + size / kShadowCell * kShadowCnt;
+ RawShadow *p = begin;
+ // Set at least first kPageSize/2 to page boundary.
+ while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
+ *p++ = val;
+ for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+ }
+ // Reset middle part.
+ RawShadow *p1 = p;
+ p = RoundDown(end, kPageSize);
+ if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
+ Die();
+ // Set the ending.
+ while (p < end) {
+ *p++ = val;
+ for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+ }
+ }
+}
+
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+ MemoryRangeSet(thr, pc, addr, size, 0);
+}
+
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+ // Processing more than 1k (4k of shadow) is expensive,
+ // can cause excessive memory consumption (user does not necessary touch
+ // the whole range) and most likely unnecessary.
+ if (size > 1024)
+ size = 1024;
+ CHECK_EQ(thr->is_freeing, false);
+ thr->is_freeing = true;
+ MemoryAccessRange(thr, pc, addr, size, true);
+ thr->is_freeing = false;
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+ }
+ Shadow s(thr->fast_state);
+ s.ClearIgnoreBit();
+ s.MarkAsFreed();
+ s.SetWrite(true);
+ s.SetAddr0AndSizeLog(0, 3);
+ MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+ }
+ Shadow s(thr->fast_state);
+ s.ClearIgnoreBit();
+ s.SetWrite(true);
+ s.SetAddr0AndSizeLog(0, 3);
+ MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
+ uptr size) {
+ if (thr->ignore_reads_and_writes == 0)
+ MemoryRangeImitateWrite(thr, pc, addr, size);
+ else
+ MemoryResetRange(thr, pc, addr, size);
+}
+
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ bool is_write) {
+ if (size == 0)
+ return;
+
+ RawShadow *shadow_mem = MemToShadow(addr);
+ DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid,
+ (void *)pc, (void *)addr, (int)size, is_write);
+
+#if SANITIZER_DEBUG
+ if (!IsAppMem(addr)) {
+ Printf("Access to non app mem %zx\n", addr);
+ DCHECK(IsAppMem(addr));
+ }
+ if (!IsAppMem(addr + size - 1)) {
+ Printf("Access to non app mem %zx\n", addr + size - 1);
+ DCHECK(IsAppMem(addr + size - 1));
+ }
+ if (!IsShadowMem(shadow_mem)) {
+ Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+ DCHECK(IsShadowMem(shadow_mem));
+ }
+ if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) {
+ Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1,
+ addr + size - 1);
+ DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1));
+ }
+#endif
+
+ if (*shadow_mem == kShadowRodata) {
+ DCHECK(!is_write);
+ // Access to .rodata section, no races here.
+ // Measurements show that it can be 10-20% of all memory accesses.
+ return;
+ }
+
+ FastState fast_state = thr->fast_state;
+ if (fast_state.GetIgnoreBit())
+ return;
+
+ fast_state.IncrementEpoch();
+ thr->fast_state = fast_state;
+ TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+
+ bool unaligned = (addr % kShadowCell) != 0;
+
+ // Handle unaligned beginning, if any.
+ for (; addr % kShadowCell && size; addr++, size--) {
+ int const kAccessSizeLog = 0;
+ Shadow cur(fast_state);
+ cur.SetWrite(is_write);
+ cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+ MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+ cur);
+ }
+ if (unaligned)
+ shadow_mem += kShadowCnt;
+ // Handle middle part, if any.
+ for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
+ int const kAccessSizeLog = 3;
+ Shadow cur(fast_state);
+ cur.SetWrite(is_write);
+ cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
+ MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+ cur);
+ shadow_mem += kShadowCnt;
+ }
+ // Handle ending, if any.
+ for (; size; addr++, size--) {
+ int const kAccessSizeLog = 0;
+ Shadow cur(fast_state);
+ cur.SetWrite(is_write);
+ cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+ MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+ cur);
+ }
+}
+
+} // namespace __tsan
+
+#if !SANITIZER_GO
+// Must be included in this file to make sure everything is inlined.
+# include "tsan_interface.inc"
+#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S
index 5913aa360c5d..632b19d18158 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_amd64.S
@@ -13,6 +13,7 @@ ASM_HIDDEN(__tsan_trace_switch)
.globl ASM_SYMBOL(__tsan_trace_switch_thunk)
ASM_SYMBOL(__tsan_trace_switch_thunk):
CFI_STARTPROC
+ _CET_ENDBR
# Save scratch registers.
push %rax
CFI_ADJUST_CFA_OFFSET(8)
@@ -41,6 +42,25 @@ ASM_SYMBOL(__tsan_trace_switch_thunk):
push %r11
CFI_ADJUST_CFA_OFFSET(8)
CFI_REL_OFFSET(%r11, 0)
+ # All XMM registers are caller-saved.
+ sub $0x100, %rsp
+ CFI_ADJUST_CFA_OFFSET(0x100)
+ vmovdqu %xmm0, 0x0(%rsp)
+ vmovdqu %xmm1, 0x10(%rsp)
+ vmovdqu %xmm2, 0x20(%rsp)
+ vmovdqu %xmm3, 0x30(%rsp)
+ vmovdqu %xmm4, 0x40(%rsp)
+ vmovdqu %xmm5, 0x50(%rsp)
+ vmovdqu %xmm6, 0x60(%rsp)
+ vmovdqu %xmm7, 0x70(%rsp)
+ vmovdqu %xmm8, 0x80(%rsp)
+ vmovdqu %xmm9, 0x90(%rsp)
+ vmovdqu %xmm10, 0xa0(%rsp)
+ vmovdqu %xmm11, 0xb0(%rsp)
+ vmovdqu %xmm12, 0xc0(%rsp)
+ vmovdqu %xmm13, 0xd0(%rsp)
+ vmovdqu %xmm14, 0xe0(%rsp)
+ vmovdqu %xmm15, 0xf0(%rsp)
# Align stack frame.
push %rbx # non-scratch
CFI_ADJUST_CFA_OFFSET(8)
@@ -58,6 +78,24 @@ ASM_SYMBOL(__tsan_trace_switch_thunk):
pop %rbx
CFI_ADJUST_CFA_OFFSET(-8)
# Restore scratch registers.
+ vmovdqu 0x0(%rsp), %xmm0
+ vmovdqu 0x10(%rsp), %xmm1
+ vmovdqu 0x20(%rsp), %xmm2
+ vmovdqu 0x30(%rsp), %xmm3
+ vmovdqu 0x40(%rsp), %xmm4
+ vmovdqu 0x50(%rsp), %xmm5
+ vmovdqu 0x60(%rsp), %xmm6
+ vmovdqu 0x70(%rsp), %xmm7
+ vmovdqu 0x80(%rsp), %xmm8
+ vmovdqu 0x90(%rsp), %xmm9
+ vmovdqu 0xa0(%rsp), %xmm10
+ vmovdqu 0xb0(%rsp), %xmm11
+ vmovdqu 0xc0(%rsp), %xmm12
+ vmovdqu 0xd0(%rsp), %xmm13
+ vmovdqu 0xe0(%rsp), %xmm14
+ vmovdqu 0xf0(%rsp), %xmm15
+ add $0x100, %rsp
+ CFI_ADJUST_CFA_OFFSET(-0x100)
pop %r11
CFI_ADJUST_CFA_OFFSET(-8)
pop %r10
@@ -93,6 +131,7 @@ ASM_HIDDEN(__tsan_report_race)
.globl ASM_SYMBOL(__tsan_report_race_thunk)
ASM_SYMBOL(__tsan_report_race_thunk):
CFI_STARTPROC
+ _CET_ENDBR
# Save scratch registers.
push %rax
CFI_ADJUST_CFA_OFFSET(8)
@@ -121,6 +160,25 @@ ASM_SYMBOL(__tsan_report_race_thunk):
push %r11
CFI_ADJUST_CFA_OFFSET(8)
CFI_REL_OFFSET(%r11, 0)
+ # All XMM registers are caller-saved.
+ sub $0x100, %rsp
+ CFI_ADJUST_CFA_OFFSET(0x100)
+ vmovdqu %xmm0, 0x0(%rsp)
+ vmovdqu %xmm1, 0x10(%rsp)
+ vmovdqu %xmm2, 0x20(%rsp)
+ vmovdqu %xmm3, 0x30(%rsp)
+ vmovdqu %xmm4, 0x40(%rsp)
+ vmovdqu %xmm5, 0x50(%rsp)
+ vmovdqu %xmm6, 0x60(%rsp)
+ vmovdqu %xmm7, 0x70(%rsp)
+ vmovdqu %xmm8, 0x80(%rsp)
+ vmovdqu %xmm9, 0x90(%rsp)
+ vmovdqu %xmm10, 0xa0(%rsp)
+ vmovdqu %xmm11, 0xb0(%rsp)
+ vmovdqu %xmm12, 0xc0(%rsp)
+ vmovdqu %xmm13, 0xd0(%rsp)
+ vmovdqu %xmm14, 0xe0(%rsp)
+ vmovdqu %xmm15, 0xf0(%rsp)
# Align stack frame.
push %rbx # non-scratch
CFI_ADJUST_CFA_OFFSET(8)
@@ -138,6 +196,24 @@ ASM_SYMBOL(__tsan_report_race_thunk):
pop %rbx
CFI_ADJUST_CFA_OFFSET(-8)
# Restore scratch registers.
+ vmovdqu 0x0(%rsp), %xmm0
+ vmovdqu 0x10(%rsp), %xmm1
+ vmovdqu 0x20(%rsp), %xmm2
+ vmovdqu 0x30(%rsp), %xmm3
+ vmovdqu 0x40(%rsp), %xmm4
+ vmovdqu 0x50(%rsp), %xmm5
+ vmovdqu 0x60(%rsp), %xmm6
+ vmovdqu 0x70(%rsp), %xmm7
+ vmovdqu 0x80(%rsp), %xmm8
+ vmovdqu 0x90(%rsp), %xmm9
+ vmovdqu 0xa0(%rsp), %xmm10
+ vmovdqu 0xb0(%rsp), %xmm11
+ vmovdqu 0xc0(%rsp), %xmm12
+ vmovdqu 0xd0(%rsp), %xmm13
+ vmovdqu 0xe0(%rsp), %xmm14
+ vmovdqu 0xf0(%rsp), %xmm15
+ add $0x100, %rsp
+ CFI_ADJUST_CFA_OFFSET(-0x100)
pop %r11
CFI_ADJUST_CFA_OFFSET(-8)
pop %r10
@@ -185,6 +261,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
ASM_SYMBOL_INTERCEPTOR(setjmp):
#endif
CFI_STARTPROC
+ _CET_ENDBR
// save env parameter
push %rdi
CFI_ADJUST_CFA_OFFSET(8)
@@ -226,6 +303,7 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
ASM_SYMBOL_INTERCEPTOR(_setjmp):
CFI_STARTPROC
+ _CET_ENDBR
// save env parameter
push %rdi
CFI_ADJUST_CFA_OFFSET(8)
@@ -267,6 +345,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
#endif
CFI_STARTPROC
+ _CET_ENDBR
// save env parameter
push %rdi
CFI_ADJUST_CFA_OFFSET(8)
@@ -323,6 +402,7 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
CFI_STARTPROC
+ _CET_ENDBR
// save env parameter
push %rdi
CFI_ADJUST_CFA_OFFSET(8)
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
index 27ae279d6304..7d6b41116aa6 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
@@ -35,7 +35,7 @@ struct Callback final : public DDCallback {
DDCallback::lt = thr->dd_lt;
}
- u32 Unwind() override { return CurrentStackId(thr, pc); }
+ StackID Unwind() override { return CurrentStackId(thr, pc); }
int UniqueTid() override { return thr->unique_id; }
};
@@ -53,7 +53,7 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
return;
if (!ShouldReport(thr, typ))
return;
- ThreadRegistryLock l(ctx->thread_registry);
+ ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(typ);
rep.AddMutex(mid);
VarSizeStackTrace trace;
@@ -63,51 +63,54 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
OutputReport(thr, rep);
}
-void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFETY_ANALYSIS {
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz);
if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) {
CHECK(!thr->is_freeing);
thr->is_freeing = true;
- MemoryWrite(thr, pc, addr, kSizeLog1);
+ MemoryAccess(thr, pc, addr, 1, kAccessWrite);
thr->is_freeing = false;
}
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock l(&s->mtx);
s->SetFlags(flagz & MutexCreationFlagMask);
+ // Save stack in the case the sync object was created before as atomic.
if (!SANITIZER_GO && s->creation_stack_id == 0)
s->creation_stack_id = CurrentStackId(thr, pc);
- s->mtx.Unlock();
}
-void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFETY_ANALYSIS {
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr);
- SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
- if (s == 0)
- return;
- if ((flagz & MutexFlagLinkerInit)
- || s->IsFlagSet(MutexFlagLinkerInit)
- || ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) {
- // Destroy is no-op for linker-initialized mutexes.
- s->mtx.Unlock();
- return;
- }
- if (common_flags()->detect_deadlocks) {
- Callback cb(thr, pc);
- ctx->dd->MutexDestroy(&cb, &s->dd);
- ctx->dd->MutexInit(&cb, &s->dd);
- }
bool unlock_locked = false;
- if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid &&
- !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- unlock_locked = true;
+ u64 mid = 0;
+ u64 last_lock = 0;
+ {
+ SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
+ if (s == 0)
+ return;
+ Lock l(&s->mtx);
+ if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) ||
+ ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) {
+ // Destroy is no-op for linker-initialized mutexes.
+ return;
+ }
+ if (common_flags()->detect_deadlocks) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexDestroy(&cb, &s->dd);
+ ctx->dd->MutexInit(&cb, &s->dd);
+ }
+ if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid &&
+ !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ unlock_locked = true;
+ }
+ mid = s->GetId();
+ last_lock = s->last_lock;
+ if (!unlock_locked)
+ s->Reset(thr->proc()); // must not reset it before the report is printed
}
- u64 mid = s->GetId();
- u64 last_lock = s->last_lock;
- if (!unlock_locked)
- s->Reset(thr->proc()); // must not reset it before the report is printed
- s->mtx.Unlock();
if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) {
- ThreadRegistryLock l(ctx->thread_registry);
+ ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(ReportTypeMutexDestroyLocked);
rep.AddMutex(mid);
VarSizeStackTrace trace;
@@ -119,43 +122,39 @@ void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAF
rep.AddLocation(addr, 1);
OutputReport(thr, rep);
- SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
+ SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
if (s != 0) {
+ Lock l(&s->mtx);
s->Reset(thr->proc());
- s->mtx.Unlock();
}
}
thr->mset.Remove(mid);
// Imitate a memory write to catch unlock-destroy races.
// Do this outside of sync mutex, because it can report a race which locks
// sync mutexes.
- if (IsAppMem(addr)) {
- CHECK(!thr->is_freeing);
- thr->is_freeing = true;
- MemoryWrite(thr, pc, addr, kSizeLog1);
- thr->is_freeing = false;
- }
+ if (IsAppMem(addr))
+ MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree);
// s will be destroyed and freed in MetaMap::FreeBlock.
}
-void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFETY_ANALYSIS {
+void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
- s->UpdateFlags(flagz);
- if (s->owner_tid != thr->tid) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
- s->mtx.ReadUnlock();
- ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
- } else {
- s->mtx.ReadUnlock();
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ {
+ ReadLock l(&s->mtx);
+ s->UpdateFlags(flagz);
+ if (s->owner_tid != thr->tid) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
+ }
}
+ Callback cb(thr, pc);
+ ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
}
}
-void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz,
- int rec) NO_THREAD_SAFETY_ANALYSIS {
+void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) {
DPrintf("#%d: MutexPostLock %zx flag=0x%x rec=%d\n",
thr->tid, addr, flagz, rec);
if (flagz & MutexFlagRecursiveLock)
@@ -163,43 +162,45 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz,
else
rec = 1;
if (IsAppMem(addr))
- MemoryReadAtomic(thr, pc, addr, kSizeLog1);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
- s->UpdateFlags(flagz);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
- bool report_double_lock = false;
- if (s->owner_tid == kInvalidTid) {
- CHECK_EQ(s->recursion, 0);
- s->owner_tid = thr->tid;
- s->last_lock = thr->fast_state.raw();
- } else if (s->owner_tid == thr->tid) {
- CHECK_GT(s->recursion, 0);
- } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_double_lock = true;
- }
- const bool first = s->recursion == 0;
- s->recursion += rec;
- if (first) {
- AcquireImpl(thr, pc, &s->clock);
- AcquireImpl(thr, pc, &s->read_clock);
- } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) {
- }
- thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
+ MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+ u64 mid = 0;
bool pre_lock = false;
- if (first && common_flags()->detect_deadlocks) {
- pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) &&
- !(flagz & MutexFlagTryLock);
- Callback cb(thr, pc);
- if (pre_lock)
- ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
- ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock);
+ bool first = false;
+ bool report_double_lock = false;
+ {
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock l(&s->mtx);
+ s->UpdateFlags(flagz);
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
+ if (s->owner_tid == kInvalidTid) {
+ CHECK_EQ(s->recursion, 0);
+ s->owner_tid = thr->tid;
+ s->last_lock = thr->fast_state.raw();
+ } else if (s->owner_tid == thr->tid) {
+ CHECK_GT(s->recursion, 0);
+ } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_double_lock = true;
+ }
+ first = s->recursion == 0;
+ s->recursion += rec;
+ if (first) {
+ AcquireImpl(thr, pc, &s->clock);
+ AcquireImpl(thr, pc, &s->read_clock);
+ } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) {
+ }
+ thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
+ if (first && common_flags()->detect_deadlocks) {
+ pre_lock =
+ (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock);
+ Callback cb(thr, pc);
+ if (pre_lock)
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
+ ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock);
+ }
+ mid = s->GetId();
}
- u64 mid = s->GetId();
- s->mtx.Unlock();
- // Can't touch s after this point.
- s = 0;
if (report_double_lock)
ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid);
if (first && pre_lock && common_flags()->detect_deadlocks) {
@@ -208,38 +209,40 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz,
}
}
-int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFETY_ANALYSIS {
+int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz);
if (IsAppMem(addr))
- MemoryReadAtomic(thr, pc, addr, kSizeLog1);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
- int rec = 0;
+ MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+ u64 mid = 0;
bool report_bad_unlock = false;
- if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
- if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_unlock = true;
- }
- } else {
- rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1;
- s->recursion -= rec;
- if (s->recursion == 0) {
- s->owner_tid = kInvalidTid;
- ReleaseStoreImpl(thr, pc, &s->clock);
+ int rec = 0;
+ {
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock l(&s->mtx);
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
+ if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
+ if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_unlock = true;
+ }
} else {
+ rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1;
+ s->recursion -= rec;
+ if (s->recursion == 0) {
+ s->owner_tid = kInvalidTid;
+ ReleaseStoreImpl(thr, pc, &s->clock);
+ } else {
+ }
}
+ thr->mset.Del(s->GetId(), true);
+ if (common_flags()->detect_deadlocks && s->recursion == 0 &&
+ !report_bad_unlock) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true);
+ }
+ mid = s->GetId();
}
- thr->mset.Del(s->GetId(), true);
- if (common_flags()->detect_deadlocks && s->recursion == 0 &&
- !report_bad_unlock) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true);
- }
- u64 mid = s->GetId();
- s->mtx.Unlock();
- // Can't touch s after this point.
if (report_bad_unlock)
ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
if (common_flags()->detect_deadlocks && !report_bad_unlock) {
@@ -249,49 +252,53 @@ int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFET
return rec;
}
-void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFETY_ANALYSIS {
+void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
- s->UpdateFlags(flagz);
+ {
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ ReadLock l(&s->mtx);
+ s->UpdateFlags(flagz);
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
+ }
Callback cb(thr, pc);
- ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
- s->mtx.ReadUnlock();
ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
}
}
-void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREAD_SAFETY_ANALYSIS {
+void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
if (IsAppMem(addr))
- MemoryReadAtomic(thr, pc, addr, kSizeLog1);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
- s->UpdateFlags(flagz);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
+ MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+ u64 mid = 0;
bool report_bad_lock = false;
- if (s->owner_tid != kInvalidTid) {
- if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_lock = true;
- }
- }
- AcquireImpl(thr, pc, &s->clock);
- s->last_lock = thr->fast_state.raw();
- thr->mset.Add(s->GetId(), false, thr->fast_state.epoch());
bool pre_lock = false;
- if (common_flags()->detect_deadlocks) {
- pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) &&
- !(flagz & MutexFlagTryLock);
- Callback cb(thr, pc);
- if (pre_lock)
- ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
- ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock);
+ {
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ ReadLock l(&s->mtx);
+ s->UpdateFlags(flagz);
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
+ if (s->owner_tid != kInvalidTid) {
+ if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_lock = true;
+ }
+ }
+ AcquireImpl(thr, pc, &s->clock);
+ s->last_lock = thr->fast_state.raw();
+ thr->mset.Add(s->GetId(), false, thr->fast_state.epoch());
+ if (common_flags()->detect_deadlocks) {
+ pre_lock =
+ (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock);
+ Callback cb(thr, pc);
+ if (pre_lock)
+ ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
+ ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock);
+ }
+ mid = s->GetId();
}
- u64 mid = s->GetId();
- s->mtx.ReadUnlock();
- // Can't touch s after this point.
- s = 0;
if (report_bad_lock)
ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid);
if (pre_lock && common_flags()->detect_deadlocks) {
@@ -300,28 +307,30 @@ void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) NO_THREA
}
}
-void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr);
if (IsAppMem(addr))
- MemoryReadAtomic(thr, pc, addr, kSizeLog1);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
+ MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+ u64 mid = 0;
bool report_bad_unlock = false;
- if (s->owner_tid != kInvalidTid) {
- if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_unlock = true;
+ {
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock l(&s->mtx);
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
+ if (s->owner_tid != kInvalidTid) {
+ if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_unlock = true;
+ }
}
+ ReleaseImpl(thr, pc, &s->read_clock);
+ if (common_flags()->detect_deadlocks && s->recursion == 0) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false);
+ }
+ mid = s->GetId();
}
- ReleaseImpl(thr, pc, &s->read_clock);
- if (common_flags()->detect_deadlocks && s->recursion == 0) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false);
- }
- u64 mid = s->GetId();
- s->mtx.Unlock();
- // Can't touch s after this point.
thr->mset.Del(mid, false);
if (report_bad_unlock)
ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, mid);
@@ -331,42 +340,44 @@ void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANAL
}
}
-void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr);
if (IsAppMem(addr))
- MemoryReadAtomic(thr, pc, addr, kSizeLog1);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
- bool write = true;
+ MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+ u64 mid = 0;
bool report_bad_unlock = false;
- if (s->owner_tid == kInvalidTid) {
- // Seems to be read unlock.
- write = false;
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
- ReleaseImpl(thr, pc, &s->read_clock);
- } else if (s->owner_tid == thr->tid) {
- // Seems to be write unlock.
- thr->fast_state.IncrementEpoch();
- TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
- CHECK_GT(s->recursion, 0);
- s->recursion--;
- if (s->recursion == 0) {
- s->owner_tid = kInvalidTid;
- ReleaseStoreImpl(thr, pc, &s->clock);
- } else {
+ {
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock l(&s->mtx);
+ bool write = true;
+ if (s->owner_tid == kInvalidTid) {
+ // Seems to be read unlock.
+ write = false;
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
+ ReleaseImpl(thr, pc, &s->read_clock);
+ } else if (s->owner_tid == thr->tid) {
+ // Seems to be write unlock.
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
+ CHECK_GT(s->recursion, 0);
+ s->recursion--;
+ if (s->recursion == 0) {
+ s->owner_tid = kInvalidTid;
+ ReleaseStoreImpl(thr, pc, &s->clock);
+ } else {
+ }
+ } else if (!s->IsFlagSet(MutexFlagBroken)) {
+ s->SetFlags(MutexFlagBroken);
+ report_bad_unlock = true;
}
- } else if (!s->IsFlagSet(MutexFlagBroken)) {
- s->SetFlags(MutexFlagBroken);
- report_bad_unlock = true;
- }
- thr->mset.Del(s->GetId(), write);
- if (common_flags()->detect_deadlocks && s->recursion == 0) {
- Callback cb(thr, pc);
- ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write);
+ thr->mset.Del(s->GetId(), write);
+ if (common_flags()->detect_deadlocks && s->recursion == 0) {
+ Callback cb(thr, pc);
+ ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write);
+ }
+ mid = s->GetId();
}
- u64 mid = s->GetId();
- s->mtx.Unlock();
- // Can't touch s after this point.
if (report_bad_unlock)
ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
if (common_flags()->detect_deadlocks) {
@@ -375,31 +386,29 @@ void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFE
}
}
-void MutexRepair(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void MutexRepair(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ Lock l(&s->mtx);
s->owner_tid = kInvalidTid;
s->recursion = 0;
- s->mtx.Unlock();
}
-void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: MutexInvalidAccess %zx\n", thr->tid, addr);
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
- u64 mid = s->GetId();
- s->mtx.Unlock();
- ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, mid);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+ ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, s->GetId());
}
-void Acquire(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void Acquire(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: Acquire %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, false);
+ SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
if (!s)
return;
+ ReadLock l(&s->mtx);
AcquireImpl(thr, pc, &s->clock);
- s->mtx.ReadUnlock();
}
static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) {
@@ -413,49 +422,48 @@ static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) {
thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
}
-void AcquireGlobal(ThreadState *thr, uptr pc) {
+void AcquireGlobal(ThreadState *thr) {
DPrintf("#%d: AcquireGlobal\n", thr->tid);
if (thr->ignore_sync)
return;
- ThreadRegistryLock l(ctx->thread_registry);
- ctx->thread_registry->RunCallbackForEachThreadLocked(
- UpdateClockCallback, thr);
+ ThreadRegistryLock l(&ctx->thread_registry);
+ ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateClockCallback, thr);
}
-void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+ Lock l(&s->mtx);
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
ReleaseStoreAcquireImpl(thr, pc, &s->clock);
- s->mtx.Unlock();
}
-void Release(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void Release(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: Release %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+ Lock l(&s->mtx);
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
ReleaseImpl(thr, pc, &s->clock);
- s->mtx.Unlock();
}
-void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) NO_THREAD_SAFETY_ANALYSIS {
+void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) {
DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr);
if (thr->ignore_sync)
return;
- SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+ Lock l(&s->mtx);
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
ReleaseStoreImpl(thr, pc, &s->clock);
- s->mtx.Unlock();
}
#if !SANITIZER_GO
@@ -469,13 +477,13 @@ static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) {
}
void AfterSleep(ThreadState *thr, uptr pc) {
- DPrintf("#%d: AfterSleep %zx\n", thr->tid);
+ DPrintf("#%d: AfterSleep\n", thr->tid);
if (thr->ignore_sync)
return;
thr->last_sleep_stack_id = CurrentStackId(thr, pc);
- ThreadRegistryLock l(ctx->thread_registry);
- ctx->thread_registry->RunCallbackForEachThreadLocked(
- UpdateSleepClockCallback, thr);
+ ThreadRegistryLock l(&ctx->thread_registry);
+ ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateSleepClockCallback,
+ thr);
}
#endif
@@ -521,7 +529,7 @@ void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
return;
- ThreadRegistryLock l(ctx->thread_registry);
+ ThreadRegistryLock l(&ctx->thread_registry);
ScopedReport rep(ReportTypeDeadlock);
for (int i = 0; i < r->n; i++) {
rep.AddMutex(r->loop[i].mtx_ctx0);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 3e809e653c70..811695d144c5 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -68,8 +68,10 @@ static void StackStripMain(SymbolizedStack *frames) {
} else if (last && 0 == internal_strcmp(last, "__tsan_thread_start_func")) {
last_frame->ClearAll();
last_frame2->next = nullptr;
- // Strip global ctors init.
- } else if (last && 0 == internal_strcmp(last, "__do_global_ctors_aux")) {
+ // Strip global ctors init, .preinit_array and main caller.
+ } else if (last && (0 == internal_strcmp(last, "__do_global_ctors_aux") ||
+ 0 == internal_strcmp(last, "__libc_csu_init") ||
+ 0 == internal_strcmp(last, "__libc_start_main"))) {
last_frame->ClearAll();
last_frame2->next = nullptr;
// If both are 0, then we probably just failed to symbolize.
@@ -120,7 +122,7 @@ static ReportStack *SymbolizeStack(StackTrace trace) {
}
StackStripMain(top);
- ReportStack *stack = ReportStack::New();
+ auto *stack = New<ReportStack>();
stack->frames = top;
return stack;
}
@@ -132,7 +134,7 @@ bool ShouldReport(ThreadState *thr, ReportType typ) {
CheckedMutex::CheckNoLocks();
// For the same reason check we didn't lock thread_registry yet.
if (SANITIZER_DEBUG)
- ThreadRegistryLock l(ctx->thread_registry);
+ ThreadRegistryLock l(&ctx->thread_registry);
if (!flags()->report_bugs || thr->suppress_reports)
return false;
switch (typ) {
@@ -154,9 +156,8 @@ bool ShouldReport(ThreadState *thr, ReportType typ) {
}
ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) {
- ctx->thread_registry->CheckLocked();
- void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
- rep_ = new(mem) ReportDesc;
+ ctx->thread_registry.CheckLocked();
+ rep_ = New<ReportDesc>();
rep_->typ = typ;
rep_->tag = tag;
ctx->report_mtx.Lock();
@@ -165,7 +166,6 @@ ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) {
ScopedReportBase::~ScopedReportBase() {
ctx->report_mtx.Unlock();
DestroyAndFree(rep_);
- rep_ = nullptr;
}
void ScopedReportBase::AddStack(StackTrace stack, bool suppressable) {
@@ -176,8 +176,7 @@ void ScopedReportBase::AddStack(StackTrace stack, bool suppressable) {
void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
StackTrace stack, const MutexSet *mset) {
- void *mem = internal_alloc(MBlockReportMop, sizeof(ReportMop));
- ReportMop *mop = new(mem) ReportMop;
+ auto *mop = New<ReportMop>();
rep_->mops.PushBack(mop);
mop->tid = s.tid();
mop->addr = addr + s.addr0();
@@ -196,7 +195,7 @@ void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
}
}
-void ScopedReportBase::AddUniqueTid(int unique_tid) {
+void ScopedReportBase::AddUniqueTid(Tid unique_tid) {
rep_->unique_tids.PushBack(unique_tid);
}
@@ -205,8 +204,7 @@ void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) {
if ((u32)rep_->threads[i]->id == tctx->tid)
return;
}
- void *mem = internal_alloc(MBlockReportThread, sizeof(ReportThread));
- ReportThread *rt = new(mem) ReportThread;
+ auto *rt = New<ReportThread>();
rep_->threads.PushBack(rt);
rt->id = tctx->tid;
rt->os_id = tctx->os_id;
@@ -226,17 +224,17 @@ static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) {
return tctx->unique_id == (u32)unique_id;
}
-static ThreadContext *FindThreadByUidLocked(int unique_id) {
- ctx->thread_registry->CheckLocked();
+static ThreadContext *FindThreadByUidLocked(Tid unique_id) {
+ ctx->thread_registry.CheckLocked();
return static_cast<ThreadContext *>(
- ctx->thread_registry->FindThreadContextLocked(
+ ctx->thread_registry.FindThreadContextLocked(
FindThreadByUidLockedCallback, &unique_id));
}
-static ThreadContext *FindThreadByTidLocked(int tid) {
- ctx->thread_registry->CheckLocked();
- return static_cast<ThreadContext*>(
- ctx->thread_registry->GetThreadLocked(tid));
+static ThreadContext *FindThreadByTidLocked(Tid tid) {
+ ctx->thread_registry.CheckLocked();
+ return static_cast<ThreadContext *>(
+ ctx->thread_registry.GetThreadLocked(tid));
}
static bool IsInStackOrTls(ThreadContextBase *tctx_base, void *arg) {
@@ -251,10 +249,10 @@ static bool IsInStackOrTls(ThreadContextBase *tctx_base, void *arg) {
}
ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) {
- ctx->thread_registry->CheckLocked();
- ThreadContext *tctx = static_cast<ThreadContext*>(
- ctx->thread_registry->FindThreadContextLocked(IsInStackOrTls,
- (void*)addr));
+ ctx->thread_registry.CheckLocked();
+ ThreadContext *tctx =
+ static_cast<ThreadContext *>(ctx->thread_registry.FindThreadContextLocked(
+ IsInStackOrTls, (void *)addr));
if (!tctx)
return 0;
ThreadState *thr = tctx->thr;
@@ -264,7 +262,7 @@ ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) {
}
#endif
-void ScopedReportBase::AddThread(int unique_tid, bool suppressable) {
+void ScopedReportBase::AddThread(Tid unique_tid, bool suppressable) {
#if !SANITIZER_GO
if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid))
AddThread(tctx, suppressable);
@@ -276,8 +274,7 @@ void ScopedReportBase::AddMutex(const SyncVar *s) {
if (rep_->mutexes[i]->id == s->uid)
return;
}
- void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
- ReportMutex *rm = new(mem) ReportMutex;
+ auto *rm = New<ReportMutex>();
rep_->mutexes.PushBack(rm);
rm->id = s->uid;
rm->addr = s->addr;
@@ -285,22 +282,21 @@ void ScopedReportBase::AddMutex(const SyncVar *s) {
rm->stack = SymbolizeStackId(s->creation_stack_id);
}
-u64 ScopedReportBase::AddMutex(u64 id) NO_THREAD_SAFETY_ANALYSIS {
+u64 ScopedReportBase::AddMutex(u64 id) {
u64 uid = 0;
u64 mid = id;
uptr addr = SyncVar::SplitId(id, &uid);
- SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr, true);
+ SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
// Check that the mutex is still alive.
// Another mutex can be created at the same address,
// so check uid as well.
if (s && s->CheckId(uid)) {
+ Lock l(&s->mtx);
mid = s->uid;
AddMutex(s);
} else {
AddDeadMutex(id);
}
- if (s)
- s->mtx.Unlock();
return mid;
}
@@ -309,8 +305,7 @@ void ScopedReportBase::AddDeadMutex(u64 id) {
if (rep_->mutexes[i]->id == id)
return;
}
- void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
- ReportMutex *rm = new(mem) ReportMutex;
+ auto *rm = New<ReportMutex>();
rep_->mutexes.PushBack(rm);
rm->id = id;
rm->addr = 0;
@@ -323,10 +318,11 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
return;
#if !SANITIZER_GO
int fd = -1;
- int creat_tid = kInvalidTid;
- u32 creat_stack = 0;
+ Tid creat_tid = kInvalidTid;
+ StackID creat_stack = 0;
if (FdLocation(addr, &fd, &creat_tid, &creat_stack)) {
- ReportLocation *loc = ReportLocation::New(ReportLocationFD);
+ auto *loc = New<ReportLocation>();
+ loc->type = ReportLocationFD;
loc->fd = fd;
loc->tid = creat_tid;
loc->stack = SymbolizeStackId(creat_stack);
@@ -337,15 +333,19 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
return;
}
MBlock *b = 0;
+ uptr block_begin = 0;
Allocator *a = allocator();
if (a->PointerIsMine((void*)addr)) {
- void *block_begin = a->GetBlockBegin((void*)addr);
+ block_begin = (uptr)a->GetBlockBegin((void *)addr);
if (block_begin)
- b = ctx->metamap.GetBlock((uptr)block_begin);
+ b = ctx->metamap.GetBlock(block_begin);
}
+ if (!b)
+ b = JavaHeapBlock(addr, &block_begin);
if (b != 0) {
ThreadContext *tctx = FindThreadByTidLocked(b->tid);
- ReportLocation *loc = ReportLocation::New(ReportLocationHeap);
+ auto *loc = New<ReportLocation>();
+ loc->type = ReportLocationHeap;
loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr);
loc->heap_chunk_size = b->siz;
loc->external_tag = b->tag;
@@ -358,8 +358,8 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
}
bool is_stack = false;
if (ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack)) {
- ReportLocation *loc =
- ReportLocation::New(is_stack ? ReportLocationStack : ReportLocationTLS);
+ auto *loc = New<ReportLocation>();
+ loc->type = is_stack ? ReportLocationStack : ReportLocationTLS;
loc->tid = tctx->tid;
rep_->locs.PushBack(loc);
AddThread(tctx);
@@ -373,7 +373,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
}
#if !SANITIZER_GO
-void ScopedReportBase::AddSleep(u32 stack_id) {
+void ScopedReportBase::AddSleep(StackID stack_id) {
rep_->sleep = SymbolizeStackId(stack_id);
}
#endif
@@ -387,7 +387,7 @@ ScopedReport::ScopedReport(ReportType typ, uptr tag)
ScopedReport::~ScopedReport() {}
-void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
+void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
MutexSet *mset, uptr *tag) {
// This function restores stack trace and mutex set for the thread/epoch.
// It does so by getting stack trace and mutex set at the beginning of
@@ -450,6 +450,232 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
ExtractTagFromStack(stk, tag);
}
+namespace v3 {
+
+// Replays the trace up to last_pos position in the last part
+// or up to the provided epoch/sid (whichever is earlier)
+// and calls the provided function f for each event.
+template <typename Func>
+void TraceReplay(Trace *trace, TracePart *last, Event *last_pos, Sid sid,
+ Epoch epoch, Func f) {
+ TracePart *part = trace->parts.Front();
+ Sid ev_sid = kFreeSid;
+ Epoch ev_epoch = kEpochOver;
+ for (;;) {
+ DCHECK_EQ(part->trace, trace);
+ // Note: an event can't start in the last element.
+ // Since an event can take up to 2 elements,
+ // we ensure we have at least 2 before adding an event.
+ Event *end = &part->events[TracePart::kSize - 1];
+ if (part == last)
+ end = last_pos;
+ for (Event *evp = &part->events[0]; evp < end; evp++) {
+ Event *evp0 = evp;
+ if (!evp->is_access && !evp->is_func) {
+ switch (evp->type) {
+ case EventType::kTime: {
+ auto *ev = reinterpret_cast<EventTime *>(evp);
+ ev_sid = static_cast<Sid>(ev->sid);
+ ev_epoch = static_cast<Epoch>(ev->epoch);
+ if (ev_sid == sid && ev_epoch > epoch)
+ return;
+ break;
+ }
+ case EventType::kAccessExt:
+ FALLTHROUGH;
+ case EventType::kAccessRange:
+ FALLTHROUGH;
+ case EventType::kLock:
+ FALLTHROUGH;
+ case EventType::kRLock:
+ // These take 2 Event elements.
+ evp++;
+ break;
+ case EventType::kUnlock:
+ // This takes 1 Event element.
+ break;
+ }
+ }
+ CHECK_NE(ev_sid, kFreeSid);
+ CHECK_NE(ev_epoch, kEpochOver);
+ f(ev_sid, ev_epoch, evp0);
+ }
+ if (part == last)
+ return;
+ part = trace->parts.Next(part);
+ CHECK(part);
+ }
+ CHECK(0);
+}
+
+static void RestoreStackMatch(VarSizeStackTrace *pstk, MutexSet *pmset,
+ Vector<uptr> *stack, MutexSet *mset, uptr pc,
+ bool *found) {
+ DPrintf2(" MATCHED\n");
+ *pmset = *mset;
+ stack->PushBack(pc);
+ pstk->Init(&(*stack)[0], stack->Size());
+ stack->PopBack();
+ *found = true;
+}
+
+// Checks if addr1|size1 is fully contained in addr2|size2.
+// We check for fully contained instread of just overlapping
+// because a memory access is always traced once, but can be
+// split into multiple accesses in the shadow.
+static constexpr bool IsWithinAccess(uptr addr1, uptr size1, uptr addr2,
+ uptr size2) {
+ return addr1 >= addr2 && addr1 + size1 <= addr2 + size2;
+}
+
+// Replays the trace of thread tid up to the target event identified
+// by sid/epoch/addr/size/typ and restores and returns stack, mutex set
+// and tag for that event. If there are multiple such events, it returns
+// the last one. Returns false if the event is not present in the trace.
+bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
+ uptr size, AccessType typ, VarSizeStackTrace *pstk,
+ MutexSet *pmset, uptr *ptag) {
+ // This function restores stack trace and mutex set for the thread/epoch.
+ // It does so by getting stack trace and mutex set at the beginning of
+ // trace part, and then replaying the trace till the given epoch.
+ DPrintf2("RestoreStack: tid=%u sid=%u@%u addr=0x%zx/%zu typ=%x\n", tid,
+ static_cast<int>(sid), static_cast<int>(epoch), addr, size,
+ static_cast<int>(typ));
+ ctx->slot_mtx.CheckLocked(); // needed to prevent trace part recycling
+ ctx->thread_registry.CheckLocked();
+ ThreadContext *tctx =
+ static_cast<ThreadContext *>(ctx->thread_registry.GetThreadLocked(tid));
+ Trace *trace = &tctx->trace;
+ // Snapshot first/last parts and the current position in the last part.
+ TracePart *first_part;
+ TracePart *last_part;
+ Event *last_pos;
+ {
+ Lock lock(&trace->mtx);
+ first_part = trace->parts.Front();
+ if (!first_part)
+ return false;
+ last_part = trace->parts.Back();
+ last_pos = trace->final_pos;
+ if (tctx->thr)
+ last_pos = (Event *)atomic_load_relaxed(&tctx->thr->trace_pos);
+ }
+ DynamicMutexSet mset;
+ Vector<uptr> stack;
+ uptr prev_pc = 0;
+ bool found = false;
+ bool is_read = typ & kAccessRead;
+ bool is_atomic = typ & kAccessAtomic;
+ bool is_free = typ & kAccessFree;
+ TraceReplay(
+ trace, last_part, last_pos, sid, epoch,
+ [&](Sid ev_sid, Epoch ev_epoch, Event *evp) {
+ bool match = ev_sid == sid && ev_epoch == epoch;
+ if (evp->is_access) {
+ if (evp->is_func == 0 && evp->type == EventType::kAccessExt &&
+ evp->_ == 0) // NopEvent
+ return;
+ auto *ev = reinterpret_cast<EventAccess *>(evp);
+ uptr ev_addr = RestoreAddr(ev->addr);
+ uptr ev_size = 1 << ev->size_log;
+ uptr ev_pc =
+ prev_pc + ev->pc_delta - (1 << (EventAccess::kPCBits - 1));
+ prev_pc = ev_pc;
+ DPrintf2(" Access: pc=0x%zx addr=0x%zx/%zu type=%u/%u\n", ev_pc,
+ ev_addr, ev_size, ev->is_read, ev->is_atomic);
+ if (match && type == EventType::kAccessExt &&
+ IsWithinAccess(addr, size, ev_addr, ev_size) &&
+ is_read == ev->is_read && is_atomic == ev->is_atomic && !is_free)
+ RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
+ return;
+ }
+ if (evp->is_func) {
+ auto *ev = reinterpret_cast<EventFunc *>(evp);
+ if (ev->pc) {
+ DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc);
+ stack.PushBack(ev->pc);
+ } else {
+ DPrintf2(" FuncExit\n");
+ CHECK(stack.Size());
+ stack.PopBack();
+ }
+ return;
+ }
+ switch (evp->type) {
+ case EventType::kAccessExt: {
+ auto *ev = reinterpret_cast<EventAccessExt *>(evp);
+ uptr ev_addr = RestoreAddr(ev->addr);
+ uptr ev_size = 1 << ev->size_log;
+ prev_pc = ev->pc;
+ DPrintf2(" AccessExt: pc=0x%llx addr=0x%zx/%zu type=%u/%u\n",
+ ev->pc, ev_addr, ev_size, ev->is_read, ev->is_atomic);
+ if (match && type == EventType::kAccessExt &&
+ IsWithinAccess(addr, size, ev_addr, ev_size) &&
+ is_read == ev->is_read && is_atomic == ev->is_atomic &&
+ !is_free)
+ RestoreStackMatch(pstk, pmset, &stack, mset, ev->pc, &found);
+ break;
+ }
+ case EventType::kAccessRange: {
+ auto *ev = reinterpret_cast<EventAccessRange *>(evp);
+ uptr ev_addr = RestoreAddr(ev->addr);
+ uptr ev_size =
+ (ev->size_hi << EventAccessRange::kSizeLoBits) + ev->size_lo;
+ uptr ev_pc = RestoreAddr(ev->pc);
+ prev_pc = ev_pc;
+ DPrintf2(" Range: pc=0x%zx addr=0x%zx/%zu type=%u/%u\n", ev_pc,
+ ev_addr, ev_size, ev->is_read, ev->is_free);
+ if (match && type == EventType::kAccessExt &&
+ IsWithinAccess(addr, size, ev_addr, ev_size) &&
+ is_read == ev->is_read && !is_atomic && is_free == ev->is_free)
+ RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
+ break;
+ }
+ case EventType::kLock:
+ FALLTHROUGH;
+ case EventType::kRLock: {
+ auto *ev = reinterpret_cast<EventLock *>(evp);
+ bool is_write = ev->type == EventType::kLock;
+ uptr ev_addr = RestoreAddr(ev->addr);
+ uptr ev_pc = RestoreAddr(ev->pc);
+ StackID stack_id =
+ (ev->stack_hi << EventLock::kStackIDLoBits) + ev->stack_lo;
+ DPrintf2(" Lock: pc=0x%zx addr=0x%zx stack=%u write=%d\n", ev_pc,
+ ev_addr, stack_id, is_write);
+ mset->AddAddr(ev_addr, stack_id, is_write);
+ // Events with ev_pc == 0 are written to the beginning of trace
+ // part as initial mutex set (are not real).
+ if (match && type == EventType::kLock && addr == ev_addr && ev_pc)
+ RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
+ break;
+ }
+ case EventType::kUnlock: {
+ auto *ev = reinterpret_cast<EventUnlock *>(evp);
+ uptr ev_addr = RestoreAddr(ev->addr);
+ DPrintf2(" Unlock: addr=0x%zx\n", ev_addr);
+ mset->DelAddr(ev_addr);
+ break;
+ }
+ case EventType::kTime:
+ // TraceReplay already extracted sid/epoch from it,
+ // nothing else to do here.
+ break;
+ }
+ });
+ ExtractTagFromStack(pstk, ptag);
+ return found;
+}
+
+} // namespace v3
+
+bool RacyStacks::operator==(const RacyStacks &other) const {
+ if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
+ return true;
+ if (hash[0] == other.hash[1] && hash[1] == other.hash[0])
+ return true;
+ return false;
+}
+
static bool FindRacyStacks(const RacyStacks &hash) {
for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
if (hash == ctx->racy_stacks[i]) {
@@ -614,7 +840,7 @@ void ReportRace(ThreadState *thr) {
thr->racy_state[1] = s.raw();
}
- uptr addr = ShadowToMem((uptr)thr->racy_shadow_addr);
+ uptr addr = ShadowToMem(thr->racy_shadow_addr);
uptr addr_min = 0;
uptr addr_max = 0;
{
@@ -669,11 +895,7 @@ void ReportRace(ThreadState *thr) {
if (IsFiredSuppression(ctx, typ, traces[0]))
return;
- // MutexSet is too large to live on stack.
- Vector<u64> mset_buffer;
- mset_buffer.Resize(sizeof(MutexSet) / sizeof(u64) + 1);
- MutexSet *mset2 = new(&mset_buffer[0]) MutexSet();
-
+ DynamicMutexSet mset2;
Shadow s2(thr->racy_state[1]);
RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]);
if (IsFiredSuppression(ctx, typ, traces[1]))
@@ -692,7 +914,7 @@ void ReportRace(ThreadState *thr) {
}
}
- ThreadRegistryLock l0(ctx->thread_registry);
+ ThreadRegistryLock l0(&ctx->thread_registry);
ScopedReport rep(typ, tag);
for (uptr i = 0; i < kMop; i++) {
Shadow s(thr->racy_state[i]);
@@ -702,8 +924,8 @@ void ReportRace(ThreadState *thr) {
for (uptr i = 0; i < kMop; i++) {
FastState s(thr->racy_state[i]);
- ThreadContext *tctx = static_cast<ThreadContext*>(
- ctx->thread_registry->GetThreadLocked(s.tid()));
+ ThreadContext *tctx = static_cast<ThreadContext *>(
+ ctx->thread_registry.GetThreadLocked(s.tid()));
if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1)
continue;
rep.AddThread(tctx);
@@ -738,9 +960,7 @@ void PrintCurrentStack(ThreadState *thr, uptr pc) {
ALWAYS_INLINE USED void PrintCurrentStackSlow(uptr pc) {
#if !SANITIZER_GO
uptr bp = GET_CURRENT_FRAME();
- BufferedStackTrace *ptrace =
- new(internal_alloc(MBlockStackTrace, sizeof(BufferedStackTrace)))
- BufferedStackTrace();
+ auto *ptrace = New<BufferedStackTrace>();
ptrace->Unwind(pc, bp, nullptr, false);
for (uptr i = 0; i < ptrace->size / 2; i++) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
index cdb6e60ebbd0..c8f7124c009d 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
@@ -21,48 +21,14 @@ namespace __tsan {
// ThreadContext implementation.
-ThreadContext::ThreadContext(int tid)
- : ThreadContextBase(tid)
- , thr()
- , sync()
- , epoch0()
- , epoch1() {
-}
+ThreadContext::ThreadContext(Tid tid)
+ : ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {}
#if !SANITIZER_GO
ThreadContext::~ThreadContext() {
}
#endif
-void ThreadContext::OnDead() {
- CHECK_EQ(sync.size(), 0);
-}
-
-void ThreadContext::OnJoined(void *arg) {
- ThreadState *caller_thr = static_cast<ThreadState *>(arg);
- AcquireImpl(caller_thr, 0, &sync);
- sync.Reset(&caller_thr->proc()->clock_cache);
-}
-
-struct OnCreatedArgs {
- ThreadState *thr;
- uptr pc;
-};
-
-void ThreadContext::OnCreated(void *arg) {
- thr = 0;
- if (tid == kMainTid)
- return;
- OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg);
- if (!args->thr) // GCD workers don't have a parent thread.
- return;
- args->thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
- ReleaseImpl(args->thr, 0, &sync);
- creation_stack_id = CurrentStackId(args->thr, args->pc);
-}
-
void ThreadContext::OnReset() {
CHECK_EQ(sync.size(), 0);
uptr trace_p = GetThreadTrace(tid);
@@ -70,94 +36,15 @@ void ThreadContext::OnReset() {
//!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace));
}
-void ThreadContext::OnDetached(void *arg) {
- ThreadState *thr1 = static_cast<ThreadState*>(arg);
- sync.Reset(&thr1->proc()->clock_cache);
-}
-
-struct OnStartedArgs {
- ThreadState *thr;
- uptr stk_addr;
- uptr stk_size;
- uptr tls_addr;
- uptr tls_size;
-};
-
-void ThreadContext::OnStarted(void *arg) {
- OnStartedArgs *args = static_cast<OnStartedArgs*>(arg);
- thr = args->thr;
- // RoundUp so that one trace part does not contain events
- // from different threads.
- epoch0 = RoundUp(epoch1 + 1, kTracePartSize);
- epoch1 = (u64)-1;
- new(thr) ThreadState(ctx, tid, unique_id, epoch0, reuse_count,
- args->stk_addr, args->stk_size, args->tls_addr, args->tls_size);
-#if !SANITIZER_GO
- thr->shadow_stack = &ThreadTrace(thr->tid)->shadow_stack[0];
- thr->shadow_stack_pos = thr->shadow_stack;
- thr->shadow_stack_end = thr->shadow_stack + kShadowStackSize;
-#else
- // Setup dynamic shadow stack.
- const int kInitStackSize = 8;
- thr->shadow_stack = (uptr*)internal_alloc(MBlockShadowStack,
- kInitStackSize * sizeof(uptr));
- thr->shadow_stack_pos = thr->shadow_stack;
- thr->shadow_stack_end = thr->shadow_stack + kInitStackSize;
-#endif
- if (common_flags()->detect_deadlocks)
- thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
- thr->fast_state.SetHistorySize(flags()->history_size);
- // Commit switch to the new part of the trace.
- // TraceAddEvent will reset stack0/mset0 in the new part for us.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
-
- thr->fast_synch_epoch = epoch0;
- AcquireImpl(thr, 0, &sync);
- sync.Reset(&thr->proc()->clock_cache);
- thr->is_inited = true;
- DPrintf("#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
- "tls_addr=%zx tls_size=%zx\n",
- tid, (uptr)epoch0, args->stk_addr, args->stk_size,
- args->tls_addr, args->tls_size);
-}
-
-void ThreadContext::OnFinished() {
-#if SANITIZER_GO
- internal_free(thr->shadow_stack);
- thr->shadow_stack = nullptr;
- thr->shadow_stack_pos = nullptr;
- thr->shadow_stack_end = nullptr;
-#endif
- if (!detached) {
- thr->fast_state.IncrementEpoch();
- // Can't increment epoch w/o writing to the trace as well.
- TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
- ReleaseImpl(thr, 0, &sync);
- }
- epoch1 = thr->fast_state.epoch();
-
- if (common_flags()->detect_deadlocks)
- ctx->dd->DestroyLogicalThread(thr->dd_lt);
- thr->clock.ResetCached(&thr->proc()->clock_cache);
-#if !SANITIZER_GO
- thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
-#endif
-#if !SANITIZER_GO
- PlatformCleanUpThreadState(thr);
-#endif
- thr->~ThreadState();
- thr = 0;
-}
-
#if !SANITIZER_GO
struct ThreadLeak {
ThreadContext *tctx;
int count;
};
-static void MaybeReportThreadLeak(ThreadContextBase *tctx_base, void *arg) {
- Vector<ThreadLeak> &leaks = *(Vector<ThreadLeak>*)arg;
- ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
+static void CollectThreadLeaks(ThreadContextBase *tctx_base, void *arg) {
+ auto &leaks = *static_cast<Vector<ThreadLeak> *>(arg);
+ auto *tctx = static_cast<ThreadContext *>(tctx_base);
if (tctx->detached || tctx->status != ThreadStatusFinished)
return;
for (uptr i = 0; i < leaks.Size(); i++) {
@@ -166,8 +53,7 @@ static void MaybeReportThreadLeak(ThreadContextBase *tctx_base, void *arg) {
return;
}
}
- ThreadLeak leak = {tctx, 1};
- leaks.PushBack(leak);
+ leaks.PushBack({tctx, 1});
}
#endif
@@ -206,10 +92,10 @@ void ThreadFinalize(ThreadState *thr) {
#if !SANITIZER_GO
if (!ShouldReport(thr, ReportTypeThreadLeak))
return;
- ThreadRegistryLock l(ctx->thread_registry);
+ ThreadRegistryLock l(&ctx->thread_registry);
Vector<ThreadLeak> leaks;
- ctx->thread_registry->RunCallbackForEachThreadLocked(
- MaybeReportThreadLeak, &leaks);
+ ctx->thread_registry.RunCallbackForEachThreadLocked(CollectThreadLeaks,
+ &leaks);
for (uptr i = 0; i < leaks.Size(); i++) {
ScopedReport rep(ReportTypeThreadLeak);
rep.AddThread(leaks[i].tctx, true);
@@ -221,20 +107,48 @@ void ThreadFinalize(ThreadState *thr) {
int ThreadCount(ThreadState *thr) {
uptr result;
- ctx->thread_registry->GetNumberOfThreads(0, 0, &result);
+ ctx->thread_registry.GetNumberOfThreads(0, 0, &result);
return (int)result;
}
-int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
+struct OnCreatedArgs {
+ ThreadState *thr;
+ uptr pc;
+};
+
+Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
OnCreatedArgs args = { thr, pc };
u32 parent_tid = thr ? thr->tid : kInvalidTid; // No parent for GCD workers.
- int tid =
- ctx->thread_registry->CreateThread(uid, detached, parent_tid, &args);
+ Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args);
DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid);
return tid;
}
-void ThreadStart(ThreadState *thr, int tid, tid_t os_id,
+void ThreadContext::OnCreated(void *arg) {
+ thr = 0;
+ if (tid == kMainTid)
+ return;
+ OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg);
+ if (!args->thr) // GCD workers don't have a parent thread.
+ return;
+ args->thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
+ ReleaseImpl(args->thr, 0, &sync);
+ creation_stack_id = CurrentStackId(args->thr, args->pc);
+}
+
+extern "C" void __tsan_stack_initialization() {}
+
+struct OnStartedArgs {
+ ThreadState *thr;
+ uptr stk_addr;
+ uptr stk_size;
+ uptr tls_addr;
+ uptr tls_size;
+};
+
+void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadType thread_type) {
uptr stk_addr = 0;
uptr stk_size = 0;
@@ -244,22 +158,13 @@ void ThreadStart(ThreadState *thr, int tid, tid_t os_id,
if (thread_type != ThreadType::Fiber)
GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr,
&tls_size);
-
- if (tid != kMainTid) {
- if (stk_addr && stk_size)
- MemoryRangeImitateWrite(thr, /*pc=*/ 1, stk_addr, stk_size);
-
- if (tls_addr && tls_size) ImitateTlsWrite(thr, tls_addr, tls_size);
- }
#endif
- ThreadRegistry *tr = ctx->thread_registry;
+ ThreadRegistry *tr = &ctx->thread_registry;
OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size };
tr->StartThread(tid, os_id, thread_type, &args);
- tr->Lock();
- thr->tctx = (ThreadContext*)tr->GetThreadLocked(tid);
- tr->Unlock();
+ while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack();
#if !SANITIZER_GO
if (ctx->after_multithreaded_fork) {
@@ -268,6 +173,51 @@ void ThreadStart(ThreadState *thr, int tid, tid_t os_id,
ThreadIgnoreSyncBegin(thr, 0);
}
#endif
+
+#if !SANITIZER_GO
+ // Don't imitate stack/TLS writes for the main thread,
+ // because its initialization is synchronized with all
+ // subsequent threads anyway.
+ if (tid != kMainTid) {
+ if (stk_addr && stk_size) {
+ const uptr pc = StackTrace::GetNextInstructionPc(
+ reinterpret_cast<uptr>(__tsan_stack_initialization));
+ MemoryRangeImitateWrite(thr, pc, stk_addr, stk_size);
+ }
+
+ if (tls_addr && tls_size)
+ ImitateTlsWrite(thr, tls_addr, tls_size);
+ }
+#endif
+}
+
+void ThreadContext::OnStarted(void *arg) {
+ OnStartedArgs *args = static_cast<OnStartedArgs *>(arg);
+ thr = args->thr;
+ // RoundUp so that one trace part does not contain events
+ // from different threads.
+ epoch0 = RoundUp(epoch1 + 1, kTracePartSize);
+ epoch1 = (u64)-1;
+ new (thr)
+ ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr,
+ args->stk_size, args->tls_addr, args->tls_size);
+ if (common_flags()->detect_deadlocks)
+ thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
+ thr->fast_state.SetHistorySize(flags()->history_size);
+ // Commit switch to the new part of the trace.
+ // TraceAddEvent will reset stack0/mset0 in the new part for us.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+
+ thr->fast_synch_epoch = epoch0;
+ AcquireImpl(thr, 0, &sync);
+ sync.Reset(&thr->proc()->clock_cache);
+ thr->tctx = this;
+ thr->is_inited = true;
+ DPrintf(
+ "#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
+ "tls_addr=%zx tls_size=%zx\n",
+ tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr,
+ args->tls_size);
}
void ThreadFinish(ThreadState *thr) {
@@ -277,7 +227,42 @@ void ThreadFinish(ThreadState *thr) {
if (thr->tls_addr && thr->tls_size)
DontNeedShadowFor(thr->tls_addr, thr->tls_size);
thr->is_dead = true;
- ctx->thread_registry->FinishThread(thr->tid);
+ thr->is_inited = false;
+#if !SANITIZER_GO
+ thr->ignore_interceptors++;
+#endif
+ ctx->thread_registry.FinishThread(thr->tid);
+}
+
+void ThreadContext::OnFinished() {
+ if (!detached) {
+ thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+ ReleaseImpl(thr, 0, &sync);
+ }
+ epoch1 = thr->fast_state.epoch();
+
+#if !SANITIZER_GO
+ UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr));
+#else
+ Free(thr->shadow_stack);
+#endif
+ thr->shadow_stack = nullptr;
+ thr->shadow_stack_pos = nullptr;
+ thr->shadow_stack_end = nullptr;
+
+ if (common_flags()->detect_deadlocks)
+ ctx->dd->DestroyLogicalThread(thr->dd_lt);
+ thr->clock.ResetCached(&thr->proc()->clock_cache);
+#if !SANITIZER_GO
+ thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
+#endif
+#if !SANITIZER_GO
+ PlatformCleanUpThreadState(thr);
+#endif
+ thr->~ThreadState();
+ thr = 0;
}
struct ConsumeThreadContext {
@@ -285,131 +270,44 @@ struct ConsumeThreadContext {
ThreadContextBase *tctx;
};
-static bool ConsumeThreadByUid(ThreadContextBase *tctx, void *arg) {
- ConsumeThreadContext *findCtx = (ConsumeThreadContext *)arg;
- if (tctx->user_id == findCtx->uid && tctx->status != ThreadStatusInvalid) {
- if (findCtx->tctx) {
- // Ensure that user_id is unique. If it's not the case we are screwed.
- // Something went wrong before, but now there is no way to recover.
- // Returning a wrong thread is not an option, it may lead to very hard
- // to debug false positives (e.g. if we join a wrong thread).
- Report("ThreadSanitizer: dup thread with used id 0x%zx\n", findCtx->uid);
- Die();
- }
- findCtx->tctx = tctx;
- tctx->user_id = 0;
- }
- return false;
-}
-
-int ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) {
- ConsumeThreadContext findCtx = {uid, nullptr};
- ctx->thread_registry->FindThread(ConsumeThreadByUid, &findCtx);
- int tid = findCtx.tctx ? findCtx.tctx->tid : kInvalidTid;
- DPrintf("#%d: ThreadTid uid=%zu tid=%d\n", thr->tid, uid, tid);
- return tid;
+Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) {
+ return ctx->thread_registry.ConsumeThreadUserId(uid);
}
-void ThreadJoin(ThreadState *thr, uptr pc, int tid) {
+void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) {
CHECK_GT(tid, 0);
CHECK_LT(tid, kMaxTid);
DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid);
- ctx->thread_registry->JoinThread(tid, thr);
+ ctx->thread_registry.JoinThread(tid, thr);
}
-void ThreadDetach(ThreadState *thr, uptr pc, int tid) {
- CHECK_GT(tid, 0);
- CHECK_LT(tid, kMaxTid);
- ctx->thread_registry->DetachThread(tid, thr);
+void ThreadContext::OnJoined(void *arg) {
+ ThreadState *caller_thr = static_cast<ThreadState *>(arg);
+ AcquireImpl(caller_thr, 0, &sync);
+ sync.Reset(&caller_thr->proc()->clock_cache);
}
-void ThreadNotJoined(ThreadState *thr, uptr pc, int tid, uptr uid) {
+void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); }
+
+void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) {
CHECK_GT(tid, 0);
CHECK_LT(tid, kMaxTid);
- ctx->thread_registry->SetThreadUserId(tid, uid);
+ ctx->thread_registry.DetachThread(tid, thr);
}
-void ThreadSetName(ThreadState *thr, const char *name) {
- ctx->thread_registry->SetThreadName(thr->tid, name);
+void ThreadContext::OnDetached(void *arg) {
+ ThreadState *thr1 = static_cast<ThreadState *>(arg);
+ sync.Reset(&thr1->proc()->clock_cache);
}
-void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
- uptr size, bool is_write) {
- if (size == 0)
- return;
-
- u64 *shadow_mem = (u64*)MemToShadow(addr);
- DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n",
- thr->tid, (void*)pc, (void*)addr,
- (int)size, is_write);
-
-#if SANITIZER_DEBUG
- if (!IsAppMem(addr)) {
- Printf("Access to non app mem %zx\n", addr);
- DCHECK(IsAppMem(addr));
- }
- if (!IsAppMem(addr + size - 1)) {
- Printf("Access to non app mem %zx\n", addr + size - 1);
- DCHECK(IsAppMem(addr + size - 1));
- }
- if (!IsShadowMem((uptr)shadow_mem)) {
- Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
- DCHECK(IsShadowMem((uptr)shadow_mem));
- }
- if (!IsShadowMem((uptr)(shadow_mem + size * kShadowCnt / 8 - 1))) {
- Printf("Bad shadow addr %p (%zx)\n",
- shadow_mem + size * kShadowCnt / 8 - 1, addr + size - 1);
- DCHECK(IsShadowMem((uptr)(shadow_mem + size * kShadowCnt / 8 - 1)));
- }
-#endif
-
- if (*shadow_mem == kShadowRodata) {
- DCHECK(!is_write);
- // Access to .rodata section, no races here.
- // Measurements show that it can be 10-20% of all memory accesses.
- return;
- }
-
- FastState fast_state = thr->fast_state;
- if (fast_state.GetIgnoreBit())
- return;
-
- fast_state.IncrementEpoch();
- thr->fast_state = fast_state;
- TraceAddEvent(thr, fast_state, EventTypeMop, pc);
-
- bool unaligned = (addr % kShadowCell) != 0;
+void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) {
+ CHECK_GT(tid, 0);
+ CHECK_LT(tid, kMaxTid);
+ ctx->thread_registry.SetThreadUserId(tid, uid);
+}
- // Handle unaligned beginning, if any.
- for (; addr % kShadowCell && size; addr++, size--) {
- int const kAccessSizeLog = 0;
- Shadow cur(fast_state);
- cur.SetWrite(is_write);
- cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
- MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
- shadow_mem, cur);
- }
- if (unaligned)
- shadow_mem += kShadowCnt;
- // Handle middle part, if any.
- for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
- int const kAccessSizeLog = 3;
- Shadow cur(fast_state);
- cur.SetWrite(is_write);
- cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
- MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
- shadow_mem, cur);
- shadow_mem += kShadowCnt;
- }
- // Handle ending, if any.
- for (; size; addr++, size--) {
- int const kAccessSizeLog = 0;
- Shadow cur(fast_state);
- cur.SetWrite(is_write);
- cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
- MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false,
- shadow_mem, cur);
- }
+void ThreadSetName(ThreadState *thr, const char *name) {
+ ctx->thread_registry.SetThreadName(thr->tid, name);
}
#if !SANITIZER_GO
@@ -421,10 +319,10 @@ void FiberSwitchImpl(ThreadState *from, ThreadState *to) {
}
ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags) {
- void *mem = internal_alloc(MBlockThreadContex, sizeof(ThreadState));
+ void *mem = Alloc(sizeof(ThreadState));
ThreadState *fiber = static_cast<ThreadState *>(mem);
internal_memset(fiber, 0, sizeof(*fiber));
- int tid = ThreadCreate(thr, pc, 0, true);
+ Tid tid = ThreadCreate(thr, pc, 0, true);
FiberSwitchImpl(thr, fiber);
ThreadStart(fiber, tid, 0, ThreadType::Fiber);
FiberSwitchImpl(fiber, thr);
@@ -435,7 +333,7 @@ void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber) {
FiberSwitchImpl(thr, fiber);
ThreadFinish(fiber);
FiberSwitchImpl(fiber, thr);
- internal_free(fiber);
+ Free(fiber);
}
void FiberSwitch(ThreadState *thr, uptr pc,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_shadow.h b/compiler-rt/lib/tsan/rtl/tsan_shadow.h
new file mode 100644
index 000000000000..8b7bc341713e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_shadow.h
@@ -0,0 +1,233 @@
+//===-- tsan_shadow.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_SHADOW_H
+#define TSAN_SHADOW_H
+
+#include "tsan_defs.h"
+#include "tsan_trace.h"
+
+namespace __tsan {
+
+// FastState (from most significant bit):
+// ignore : 1
+// tid : kTidBits
+// unused : -
+// history_size : 3
+// epoch : kClkBits
+class FastState {
+ public:
+ FastState(u64 tid, u64 epoch) {
+ x_ = tid << kTidShift;
+ x_ |= epoch;
+ DCHECK_EQ(tid, this->tid());
+ DCHECK_EQ(epoch, this->epoch());
+ DCHECK_EQ(GetIgnoreBit(), false);
+ }
+
+ explicit FastState(u64 x) : x_(x) {}
+
+ u64 raw() const { return x_; }
+
+ u64 tid() const {
+ u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
+ return res;
+ }
+
+ u64 TidWithIgnore() const {
+ u64 res = x_ >> kTidShift;
+ return res;
+ }
+
+ u64 epoch() const {
+ u64 res = x_ & ((1ull << kClkBits) - 1);
+ return res;
+ }
+
+ void IncrementEpoch() {
+ u64 old_epoch = epoch();
+ x_ += 1;
+ DCHECK_EQ(old_epoch + 1, epoch());
+ (void)old_epoch;
+ }
+
+ void SetIgnoreBit() { x_ |= kIgnoreBit; }
+ void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
+ bool GetIgnoreBit() const { return (s64)x_ < 0; }
+
+ void SetHistorySize(int hs) {
+ CHECK_GE(hs, 0);
+ CHECK_LE(hs, 7);
+ x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
+ }
+
+ ALWAYS_INLINE
+ int GetHistorySize() const {
+ return (int)((x_ >> kHistoryShift) & kHistoryMask);
+ }
+
+ void ClearHistorySize() { SetHistorySize(0); }
+
+ ALWAYS_INLINE
+ u64 GetTracePos() const {
+ const int hs = GetHistorySize();
+ // When hs == 0, the trace consists of 2 parts.
+ const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
+ return epoch() & mask;
+ }
+
+ private:
+ friend class Shadow;
+ static const int kTidShift = 64 - kTidBits - 1;
+ static const u64 kIgnoreBit = 1ull << 63;
+ static const u64 kFreedBit = 1ull << 63;
+ static const u64 kHistoryShift = kClkBits;
+ static const u64 kHistoryMask = 7;
+ u64 x_;
+};
+
+// Shadow (from most significant bit):
+// freed : 1
+// tid : kTidBits
+// is_atomic : 1
+// is_read : 1
+// size_log : 2
+// addr0 : 3
+// epoch : kClkBits
+class Shadow : public FastState {
+ public:
+ explicit Shadow(u64 x) : FastState(x) {}
+
+ explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); }
+
+ void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
+ DCHECK_EQ((x_ >> kClkBits) & 31, 0);
+ DCHECK_LE(addr0, 7);
+ DCHECK_LE(kAccessSizeLog, 3);
+ x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
+ DCHECK_EQ(kAccessSizeLog, size_log());
+ DCHECK_EQ(addr0, this->addr0());
+ }
+
+ void SetWrite(unsigned kAccessIsWrite) {
+ DCHECK_EQ(x_ & kReadBit, 0);
+ if (!kAccessIsWrite)
+ x_ |= kReadBit;
+ DCHECK_EQ(kAccessIsWrite, IsWrite());
+ }
+
+ void SetAtomic(bool kIsAtomic) {
+ DCHECK(!IsAtomic());
+ if (kIsAtomic)
+ x_ |= kAtomicBit;
+ DCHECK_EQ(IsAtomic(), kIsAtomic);
+ }
+
+ bool IsAtomic() const { return x_ & kAtomicBit; }
+
+ bool IsZero() const { return x_ == 0; }
+
+ static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
+ u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
+ DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
+ return shifted_xor == 0;
+ }
+
+ static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1,
+ const Shadow s2) {
+ u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
+ return masked_xor == 0;
+ }
+
+ static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
+ unsigned kS2AccessSize) {
+ bool res = false;
+ u64 diff = s1.addr0() - s2.addr0();
+ if ((s64)diff < 0) { // s1.addr0 < s2.addr0
+ // if (s1.addr0() + size1) > s2.addr0()) return true;
+ if (s1.size() > -diff)
+ res = true;
+ } else {
+ // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
+ if (kS2AccessSize > diff)
+ res = true;
+ }
+ DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
+ DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
+ return res;
+ }
+
+ u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
+ u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
+ bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
+ bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
+
+ // The idea behind the freed bit is as follows.
+ // When the memory is freed (or otherwise unaccessible) we write to the shadow
+ // values with tid/epoch related to the free and the freed bit set.
+ // During memory accesses processing the freed bit is considered
+ // as msb of tid. So any access races with shadow with freed bit set
+ // (it is as if write from a thread with which we never synchronized before).
+ // This allows us to detect accesses to freed memory w/o additional
+ // overheads in memory access processing and at the same time restore
+ // tid/epoch of free.
+ void MarkAsFreed() { x_ |= kFreedBit; }
+
+ bool IsFreed() const { return x_ & kFreedBit; }
+
+ bool GetFreedAndReset() {
+ bool res = x_ & kFreedBit;
+ x_ &= ~kFreedBit;
+ return res;
+ }
+
+ bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
+ bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) |
+ (u64(kIsAtomic) << kAtomicShift));
+ DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
+ return v;
+ }
+
+ bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
+ bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
+ DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
+ (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
+ return v;
+ }
+
+ bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
+ bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
+ DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
+ (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
+ return v;
+ }
+
+ private:
+ static const u64 kReadShift = 5 + kClkBits;
+ static const u64 kReadBit = 1ull << kReadShift;
+ static const u64 kAtomicShift = 6 + kClkBits;
+ static const u64 kAtomicBit = 1ull << kAtomicShift;
+
+ u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
+
+ static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
+ if (s1.addr0() == s2.addr0())
+ return true;
+ if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
+ return true;
+ if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
+ return true;
+ return false;
+ }
+};
+
+const RawShadow kShadowRodata = (RawShadow)-1; // .rodata shadow marker
+
+} // namespace __tsan
+
+#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_stack_trace.cpp b/compiler-rt/lib/tsan/rtl/tsan_stack_trace.cpp
index 6c703d7f2b10..9bbaafb3a85f 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_stack_trace.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_stack_trace.cpp
@@ -23,14 +23,10 @@ VarSizeStackTrace::~VarSizeStackTrace() {
}
void VarSizeStackTrace::ResizeBuffer(uptr new_size) {
- if (trace_buffer) {
- internal_free(trace_buffer);
- }
- trace_buffer =
- (new_size > 0)
- ? (uptr *)internal_alloc(MBlockStackTrace,
- new_size * sizeof(trace_buffer[0]))
- : nullptr;
+ Free(trace_buffer);
+ trace_buffer = (new_size > 0)
+ ? (uptr *)Alloc(new_size * sizeof(trace_buffer[0]))
+ : nullptr;
trace = trace_buffer;
size = new_size;
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_symbolize.cpp b/compiler-rt/lib/tsan/rtl/tsan_symbolize.cpp
index 6478f3a754ac..2e2744d2eae7 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_symbolize.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_symbolize.cpp
@@ -110,7 +110,8 @@ ReportLocation *SymbolizeData(uptr addr) {
DataInfo info;
if (!Symbolizer::GetOrInit()->SymbolizeData(addr, &info))
return 0;
- ReportLocation *ent = ReportLocation::New(ReportLocationGlobal);
+ auto *ent = New<ReportLocation>();
+ ent->type = ReportLocationGlobal;
internal_memcpy(&ent->global, &info, sizeof(info));
return ent;
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.cpp b/compiler-rt/lib/tsan/rtl/tsan_sync.cpp
index 5e226b2d12b1..f042abab74e5 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_sync.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_sync.cpp
@@ -20,13 +20,14 @@ void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);
SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); }
-void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) {
+void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid,
+ bool save_stack) {
this->addr = addr;
this->uid = uid;
this->next = 0;
- creation_stack_id = 0;
- if (!SANITIZER_GO) // Go does not use them
+ creation_stack_id = kInvalidStackID;
+ if (save_stack && !SANITIZER_GO) // Go does not use them
creation_stack_id = CurrentStackId(thr, pc);
if (common_flags()->detect_deadlocks)
DDMutexInit(thr, pc, this);
@@ -34,7 +35,7 @@ void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) {
void SyncVar::Reset(Processor *proc) {
uid = 0;
- creation_stack_id = 0;
+ creation_stack_id = kInvalidStackID;
owner_tid = kInvalidTid;
last_lock = 0;
recursion = 0;
@@ -190,63 +191,41 @@ MBlock* MetaMap::GetBlock(uptr p) {
}
}
-SyncVar* MetaMap::GetOrCreateAndLock(ThreadState *thr, uptr pc,
- uptr addr, bool write_lock) {
- return GetAndLock(thr, pc, addr, write_lock, true);
-}
-
-SyncVar* MetaMap::GetIfExistsAndLock(uptr addr, bool write_lock) {
- return GetAndLock(0, 0, addr, write_lock, false);
-}
-
-SyncVar *MetaMap::GetAndLock(ThreadState *thr, uptr pc, uptr addr, bool write_lock,
- bool create) NO_THREAD_SAFETY_ANALYSIS {
+SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
+ bool save_stack) {
u32 *meta = MemToMeta(addr);
u32 idx0 = *meta;
u32 myidx = 0;
- SyncVar *mys = 0;
+ SyncVar *mys = nullptr;
for (;;) {
- u32 idx = idx0;
- for (;;) {
- if (idx == 0)
- break;
- if (idx & kFlagBlock)
- break;
+ for (u32 idx = idx0; idx && !(idx & kFlagBlock);) {
DCHECK(idx & kFlagSync);
SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
- if (s->addr == addr) {
- if (myidx != 0) {
+ if (LIKELY(s->addr == addr)) {
+ if (UNLIKELY(myidx != 0)) {
mys->Reset(thr->proc());
sync_alloc_.Free(&thr->proc()->sync_cache, myidx);
}
- if (write_lock)
- s->mtx.Lock();
- else
- s->mtx.ReadLock();
return s;
}
idx = s->next;
}
if (!create)
- return 0;
- if (*meta != idx0) {
+ return nullptr;
+ if (UNLIKELY(*meta != idx0)) {
idx0 = *meta;
continue;
}
- if (myidx == 0) {
+ if (LIKELY(myidx == 0)) {
const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache);
mys = sync_alloc_.Map(myidx);
- mys->Init(thr, pc, addr, uid);
+ mys->Init(thr, pc, addr, uid, save_stack);
}
mys->next = idx0;
if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0,
myidx | kFlagSync, memory_order_release)) {
- if (write_lock)
- mys->mtx.Lock();
- else
- mys->mtx.ReadLock();
return mys;
}
}
@@ -290,4 +269,11 @@ void MetaMap::OnProcIdle(Processor *proc) {
sync_alloc_.FlushCache(&proc->sync_cache);
}
+MetaMap::MemoryStats MetaMap::GetMemoryStats() const {
+ MemoryStats stats;
+ stats.mem_block = block_alloc_.AllocatedMemory();
+ stats.sync_obj = sync_alloc_.AllocatedMemory();
+ return stats;
+}
+
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.h b/compiler-rt/lib/tsan/rtl/tsan_sync.h
index 324aa1b0cea1..fc8fa288a841 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_sync.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_sync.h
@@ -46,14 +46,16 @@ enum MutexFlags {
MutexFlagNotStatic,
};
+// SyncVar is a descriptor of a user synchronization object
+// (mutex or an atomic variable).
struct SyncVar {
SyncVar();
uptr addr; // overwritten by DenseSlabAlloc freelist
Mutex mtx;
u64 uid; // Globally unique id.
- u32 creation_stack_id;
- u32 owner_tid; // Set only by exclusive owners.
+ StackID creation_stack_id;
+ Tid owner_tid; // Set only by exclusive owners.
u64 last_lock;
int recursion;
atomic_uint32_t flags;
@@ -64,7 +66,7 @@ struct SyncVar {
// with the mtx. This reduces contention for hot sync objects.
SyncClock clock;
- void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid);
+ void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack);
void Reset(Processor *proc);
u64 GetId() const {
@@ -101,10 +103,8 @@ struct SyncVar {
}
};
-/* MetaMap allows to map arbitrary user pointers onto various descriptors.
- Currently it maps pointers to heap block descriptors and sync var descs.
- It uses 1/2 direct shadow, see tsan_platform.h.
-*/
+// MetaMap maps app addresses to heap block (MBlock) and sync var (SyncVar)
+// descriptors. It uses 1/2 direct shadow, see tsan_platform.h for the mapping.
class MetaMap {
public:
MetaMap();
@@ -115,14 +115,25 @@ class MetaMap {
void ResetRange(Processor *proc, uptr p, uptr sz);
MBlock* GetBlock(uptr p);
- SyncVar* GetOrCreateAndLock(ThreadState *thr, uptr pc,
- uptr addr, bool write_lock);
- SyncVar* GetIfExistsAndLock(uptr addr, bool write_lock);
+ SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr,
+ bool save_stack) {
+ return GetSync(thr, pc, addr, true, save_stack);
+ }
+ SyncVar *GetSyncIfExists(uptr addr) {
+ return GetSync(nullptr, 0, addr, false, false);
+ }
void MoveMemory(uptr src, uptr dst, uptr sz);
void OnProcIdle(Processor *proc);
+ struct MemoryStats {
+ uptr mem_block;
+ uptr sync_obj;
+ };
+
+ MemoryStats GetMemoryStats() const;
+
private:
static const u32 kFlagMask = 3u << 30;
static const u32 kFlagBlock = 1u << 30;
@@ -133,8 +144,8 @@ class MetaMap {
SyncAlloc sync_alloc_;
atomic_uint64_t uid_gen_;
- SyncVar* GetAndLock(ThreadState *thr, uptr pc, uptr addr, bool write_lock,
- bool create);
+ SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
+ bool save_stack);
};
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_trace.h b/compiler-rt/lib/tsan/rtl/tsan_trace.h
index f5e0c407cda8..ffc8c991ece0 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_trace.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_trace.h
@@ -13,8 +13,9 @@
#define TSAN_TRACE_H
#include "tsan_defs.h"
-#include "tsan_stack_trace.h"
+#include "tsan_ilist.h"
#include "tsan_mutexset.h"
+#include "tsan_stack_trace.h"
namespace __tsan {
@@ -67,6 +68,185 @@ struct Trace {
Trace() : mtx(MutexTypeTrace) {}
};
+namespace v3 {
+
+enum class EventType : u64 {
+ kAccessExt,
+ kAccessRange,
+ kLock,
+ kRLock,
+ kUnlock,
+ kTime,
+};
+
+// "Base" type for all events for type dispatch.
+struct Event {
+ // We use variable-length type encoding to give more bits to some event
+ // types that need them. If is_access is set, this is EventAccess.
+ // Otherwise, if is_func is set, this is EventFunc.
+ // Otherwise type denotes the type.
+ u64 is_access : 1;
+ u64 is_func : 1;
+ EventType type : 3;
+ u64 _ : 59;
+};
+static_assert(sizeof(Event) == 8, "bad Event size");
+
+// Nop event used as padding and does not affect state during replay.
+static constexpr Event NopEvent = {1, 0, EventType::kAccessExt, 0};
+
+// Compressed memory access can represent only some events with PCs
+// close enough to each other. Otherwise we fall back to EventAccessExt.
+struct EventAccess {
+ static constexpr uptr kPCBits = 15;
+ static_assert(kPCBits + kCompressedAddrBits + 5 == 64,
+ "unused bits in EventAccess");
+
+ u64 is_access : 1; // = 1
+ u64 is_read : 1;
+ u64 is_atomic : 1;
+ u64 size_log : 2;
+ u64 pc_delta : kPCBits; // signed delta from the previous memory access PC
+ u64 addr : kCompressedAddrBits;
+};
+static_assert(sizeof(EventAccess) == 8, "bad EventAccess size");
+
+// Function entry (pc != 0) or exit (pc == 0).
+struct EventFunc {
+ u64 is_access : 1; // = 0
+ u64 is_func : 1; // = 1
+ u64 pc : 62;
+};
+static_assert(sizeof(EventFunc) == 8, "bad EventFunc size");
+
+// Extended memory access with full PC.
+struct EventAccessExt {
+ // Note: precisely specifying the unused parts of the bitfield is critical for
+ // performance. If we don't specify them, compiler will generate code to load
+ // the old value and shuffle it to extract the unused bits to apply to the new
+ // value. If we specify the unused part and store 0 in there, all that
+ // unnecessary code goes away (store of the 0 const is combined with other
+ // constant parts).
+ static constexpr uptr kUnusedBits = 11;
+ static_assert(kCompressedAddrBits + kUnusedBits + 9 == 64,
+ "unused bits in EventAccessExt");
+
+ u64 is_access : 1; // = 0
+ u64 is_func : 1; // = 0
+ EventType type : 3; // = EventType::kAccessExt
+ u64 is_read : 1;
+ u64 is_atomic : 1;
+ u64 size_log : 2;
+ u64 _ : kUnusedBits;
+ u64 addr : kCompressedAddrBits;
+ u64 pc;
+};
+static_assert(sizeof(EventAccessExt) == 16, "bad EventAccessExt size");
+
+// Access to a memory range.
+struct EventAccessRange {
+ static constexpr uptr kSizeLoBits = 13;
+ static_assert(kCompressedAddrBits + kSizeLoBits + 7 == 64,
+ "unused bits in EventAccessRange");
+
+ u64 is_access : 1; // = 0
+ u64 is_func : 1; // = 0
+ EventType type : 3; // = EventType::kAccessRange
+ u64 is_read : 1;
+ u64 is_free : 1;
+ u64 size_lo : kSizeLoBits;
+ u64 pc : kCompressedAddrBits;
+ u64 addr : kCompressedAddrBits;
+ u64 size_hi : 64 - kCompressedAddrBits;
+};
+static_assert(sizeof(EventAccessRange) == 16, "bad EventAccessRange size");
+
+// Mutex lock.
+struct EventLock {
+ static constexpr uptr kStackIDLoBits = 15;
+ static constexpr uptr kStackIDHiBits =
+ sizeof(StackID) * kByteBits - kStackIDLoBits;
+ static constexpr uptr kUnusedBits = 3;
+ static_assert(kCompressedAddrBits + kStackIDLoBits + 5 == 64,
+ "unused bits in EventLock");
+ static_assert(kCompressedAddrBits + kStackIDHiBits + kUnusedBits == 64,
+ "unused bits in EventLock");
+
+ u64 is_access : 1; // = 0
+ u64 is_func : 1; // = 0
+ EventType type : 3; // = EventType::kLock or EventType::kRLock
+ u64 pc : kCompressedAddrBits;
+ u64 stack_lo : kStackIDLoBits;
+ u64 stack_hi : sizeof(StackID) * kByteBits - kStackIDLoBits;
+ u64 _ : kUnusedBits;
+ u64 addr : kCompressedAddrBits;
+};
+static_assert(sizeof(EventLock) == 16, "bad EventLock size");
+
+// Mutex unlock.
+struct EventUnlock {
+ static constexpr uptr kUnusedBits = 15;
+ static_assert(kCompressedAddrBits + kUnusedBits + 5 == 64,
+ "unused bits in EventUnlock");
+
+ u64 is_access : 1; // = 0
+ u64 is_func : 1; // = 0
+ EventType type : 3; // = EventType::kUnlock
+ u64 _ : kUnusedBits;
+ u64 addr : kCompressedAddrBits;
+};
+static_assert(sizeof(EventUnlock) == 8, "bad EventUnlock size");
+
+// Time change event.
+struct EventTime {
+ static constexpr uptr kUnusedBits = 37;
+ static_assert(kUnusedBits + sizeof(Sid) * kByteBits + kEpochBits + 5 == 64,
+ "unused bits in EventTime");
+
+ u64 is_access : 1; // = 0
+ u64 is_func : 1; // = 0
+ EventType type : 3; // = EventType::kTime
+ u64 sid : sizeof(Sid) * kByteBits;
+ u64 epoch : kEpochBits;
+ u64 _ : kUnusedBits;
+};
+static_assert(sizeof(EventTime) == 8, "bad EventTime size");
+
+struct Trace;
+
+struct TraceHeader {
+ Trace* trace = nullptr; // back-pointer to Trace containing this part
+ INode trace_parts; // in Trace::parts
+};
+
+struct TracePart : TraceHeader {
+ // There are a lot of goroutines in Go, so we use smaller parts.
+ static constexpr uptr kByteSize = (SANITIZER_GO ? 128 : 256) << 10;
+ static constexpr uptr kSize =
+ (kByteSize - sizeof(TraceHeader)) / sizeof(Event);
+ // TraceAcquire does a fast event pointer overflow check by comparing
+ // pointer into TracePart::events with kAlignment mask. Since TracePart's
+ // are allocated page-aligned, this check detects end of the array
+ // (it also have false positives in the middle that are filtered separately).
+ // This also requires events to be the last field.
+ static constexpr uptr kAlignment = 0xff0;
+ Event events[kSize];
+
+ TracePart() {}
+};
+static_assert(sizeof(TracePart) == TracePart::kByteSize, "bad TracePart size");
+
+struct Trace {
+ Mutex mtx;
+ IList<TraceHeader, &TraceHeader::trace_parts, TracePart> parts;
+ Event* final_pos =
+ nullptr; // final position in the last part for finished threads
+
+ Trace() : mtx(MutexTypeTrace) {}
+};
+
+} // namespace v3
+
} // namespace __tsan
#endif // TSAN_TRACE_H
diff --git a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word_inl.h b/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc
index d23dfb0ba061..a58ef0f17efa 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word_inl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc
@@ -1,4 +1,4 @@
-//===-- tsan_update_shadow_word_inl.h ---------------------------*- C++ -*-===//
+//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp
new file mode 100644
index 000000000000..278298565d3f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.cpp
@@ -0,0 +1,126 @@
+//===-- tsan_vector_clock.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_vector_clock.h"
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+#if TSAN_VECTORIZE
+const uptr kVectorClockSize = kThreadSlotCount * sizeof(Epoch) / sizeof(m128);
+#endif
+
+VectorClock::VectorClock() { Reset(); }
+
+void VectorClock::Reset() {
+#if !TSAN_VECTORIZE
+ for (uptr i = 0; i < kThreadSlotCount; i++)
+ clk_[i] = kEpochZero;
+#else
+ m128 z = _mm_setzero_si128();
+ m128* vclk = reinterpret_cast<m128*>(clk_);
+ for (uptr i = 0; i < kVectorClockSize; i++) _mm_store_si128(&vclk[i], z);
+#endif
+}
+
+void VectorClock::Acquire(const VectorClock* src) {
+ if (!src)
+ return;
+#if !TSAN_VECTORIZE
+ for (uptr i = 0; i < kThreadSlotCount; i++)
+ clk_[i] = max(clk_[i], src->clk_[i]);
+#else
+ m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
+ m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(src->clk_);
+ for (uptr i = 0; i < kVectorClockSize; i++) {
+ m128 s = _mm_load_si128(&vsrc[i]);
+ m128 d = _mm_load_si128(&vdst[i]);
+ m128 m = _mm_max_epu16(s, d);
+ _mm_store_si128(&vdst[i], m);
+ }
+#endif
+}
+
+static VectorClock* AllocClock(VectorClock** dstp) {
+ if (UNLIKELY(!*dstp))
+ *dstp = New<VectorClock>();
+ return *dstp;
+}
+
+void VectorClock::Release(VectorClock** dstp) const {
+ VectorClock* dst = AllocClock(dstp);
+ dst->Acquire(this);
+}
+
+void VectorClock::ReleaseStore(VectorClock** dstp) const {
+ VectorClock* dst = AllocClock(dstp);
+ *dst = *this;
+}
+
+VectorClock& VectorClock::operator=(const VectorClock& other) {
+#if !TSAN_VECTORIZE
+ for (uptr i = 0; i < kThreadSlotCount; i++)
+ clk_[i] = other.clk_[i];
+#else
+ m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
+ m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(other.clk_);
+ for (uptr i = 0; i < kVectorClockSize; i++) {
+ m128 s = _mm_load_si128(&vsrc[i]);
+ _mm_store_si128(&vdst[i], s);
+ }
+#endif
+ return *this;
+}
+
+void VectorClock::ReleaseStoreAcquire(VectorClock** dstp) {
+ VectorClock* dst = AllocClock(dstp);
+#if !TSAN_VECTORIZE
+ for (uptr i = 0; i < kThreadSlotCount; i++) {
+ Epoch tmp = dst->clk_[i];
+ dst->clk_[i] = clk_[i];
+ clk_[i] = max(clk_[i], tmp);
+ }
+#else
+ m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
+ m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
+ for (uptr i = 0; i < kVectorClockSize; i++) {
+ m128 t = _mm_load_si128(&vdst[i]);
+ m128 c = _mm_load_si128(&vclk[i]);
+ m128 m = _mm_max_epu16(c, t);
+ _mm_store_si128(&vdst[i], c);
+ _mm_store_si128(&vclk[i], m);
+ }
+#endif
+}
+
+void VectorClock::ReleaseAcquire(VectorClock** dstp) {
+ VectorClock* dst = AllocClock(dstp);
+#if !TSAN_VECTORIZE
+ for (uptr i = 0; i < kThreadSlotCount; i++) {
+ dst->clk_[i] = max(dst->clk_[i], clk_[i]);
+ clk_[i] = dst->clk_[i];
+ }
+#else
+ m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
+ m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
+ for (uptr i = 0; i < kVectorClockSize; i++) {
+ m128 c = _mm_load_si128(&vclk[i]);
+ m128 d = _mm_load_si128(&vdst[i]);
+ m128 m = _mm_max_epu16(c, d);
+ _mm_store_si128(&vdst[i], m);
+ _mm_store_si128(&vclk[i], m);
+ }
+#endif
+}
+
+} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_vector_clock.h b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.h
new file mode 100644
index 000000000000..63b206302190
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_vector_clock.h
@@ -0,0 +1,51 @@
+//===-- tsan_vector_clock.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_VECTOR_CLOCK_H
+#define TSAN_VECTOR_CLOCK_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+// Fixed-size vector clock, used both for threads and sync objects.
+class VectorClock {
+ public:
+ VectorClock();
+
+ Epoch Get(Sid sid) const;
+ void Set(Sid sid, Epoch v);
+
+ void Reset();
+ void Acquire(const VectorClock* src);
+ void Release(VectorClock** dstp) const;
+ void ReleaseStore(VectorClock** dstp) const;
+ void ReleaseStoreAcquire(VectorClock** dstp);
+ void ReleaseAcquire(VectorClock** dstp);
+
+ VectorClock& operator=(const VectorClock& other);
+
+ private:
+ Epoch clk_[kThreadSlotCount] VECTOR_ALIGNED;
+};
+
+ALWAYS_INLINE Epoch VectorClock::Get(Sid sid) const {
+ return clk_[static_cast<u8>(sid)];
+}
+
+ALWAYS_INLINE void VectorClock::Set(Sid sid, Epoch v) {
+ DCHECK_GE(v, clk_[static_cast<u8>(sid)]);
+ clk_[static_cast<u8>(sid)] = v;
+}
+
+} // namespace __tsan
+
+#endif // TSAN_VECTOR_CLOCK_H
diff --git a/compiler-rt/lib/ubsan/ubsan_diag.cpp b/compiler-rt/lib/ubsan/ubsan_diag.cpp
index ef2e495cac8e..8de51bc18770 100644
--- a/compiler-rt/lib/ubsan/ubsan_diag.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_diag.cpp
@@ -157,7 +157,7 @@ static void RenderLocation(InternalScopedString *Buffer, Location Loc) {
return;
}
case Location::LK_Memory:
- Buffer->append("%p", Loc.getMemoryLocation());
+ Buffer->append("%p", reinterpret_cast<void *>(Loc.getMemoryLocation()));
return;
case Location::LK_Symbolized: {
const AddressInfo &Info = Loc.getSymbolizedStack()->info;
@@ -169,7 +169,7 @@ static void RenderLocation(InternalScopedString *Buffer, Location Loc) {
RenderModuleLocation(Buffer, Info.module, Info.module_offset,
Info.module_arch, common_flags()->strip_path_prefix);
else
- Buffer->append("%p", Info.address);
+ Buffer->append("%p", reinterpret_cast<void *>(Info.address));
return;
}
case Location::LK_Null:
@@ -286,7 +286,7 @@ static void PrintMemorySnippet(const Decorator &Decor, MemoryLocation Loc,
Buffer.append("\n");
// Emit highlights.
- Buffer.append(Decor.Highlight());
+ Buffer.append("%s", Decor.Highlight());
Range *InRange = upperBound(Min, Ranges, NumRanges);
for (uptr P = Min; P != Max; ++P) {
char Pad = ' ', Byte = ' ';
@@ -355,7 +355,7 @@ Diag::~Diag() {
Buffer.clear();
}
- Buffer.append(Decor.Bold());
+ Buffer.append("%s", Decor.Bold());
RenderLocation(&Buffer, Loc);
Buffer.append(":");
diff --git a/compiler-rt/lib/xray/xray_basic_flags.h b/compiler-rt/lib/xray/xray_basic_flags.h
index 2459effa8bae..b846c1233e8a 100644
--- a/compiler-rt/lib/xray/xray_basic_flags.h
+++ b/compiler-rt/lib/xray/xray_basic_flags.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file is a part of XRay, a dynamic runtime instruementation system.
+// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// XRay Basic Mode runtime flags.
//===----------------------------------------------------------------------===//
diff --git a/compiler-rt/lib/xray/xray_buffer_queue.cpp b/compiler-rt/lib/xray/xray_buffer_queue.cpp
index bad91e036cef..748708ccd0f4 100644
--- a/compiler-rt/lib/xray/xray_buffer_queue.cpp
+++ b/compiler-rt/lib/xray/xray_buffer_queue.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file is a part of XRay, a dynamic runtime instruementation system.
+// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// Defines the interface for a buffer queue implementation.
//
diff --git a/compiler-rt/lib/xray/xray_flags.h b/compiler-rt/lib/xray/xray_flags.h
index edb5a5119f86..cce6fe9d62f9 100644
--- a/compiler-rt/lib/xray/xray_flags.h
+++ b/compiler-rt/lib/xray/xray_flags.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file is a part of XRay, a dynamic runtime instruementation system.
+// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// XRay runtime flags.
//===----------------------------------------------------------------------===//
diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp
index 7669b9ab82be..ddf184c9b857 100644
--- a/compiler-rt/lib/xray/xray_interface.cpp
+++ b/compiler-rt/lib/xray/xray_interface.cpp
@@ -360,7 +360,7 @@ XRayPatchingStatus mprotectAndPatchFunction(int32_t FuncId,
return XRayPatchingStatus::FAILED;
}
- // Here we compute the minumum sled and maximum sled associated with a
+ // Here we compute the minimum sled and maximum sled associated with a
// particular function ID.
auto SledRange = InstrMap.SledsIndex ? InstrMap.SledsIndex[FuncId - 1]
: findFunctionSleds(FuncId, InstrMap);
diff --git a/compiler-rt/lib/xray/xray_profiling.cpp b/compiler-rt/lib/xray/xray_profiling.cpp
index ef16691562cc..81c33fae88c1 100644
--- a/compiler-rt/lib/xray/xray_profiling.cpp
+++ b/compiler-rt/lib/xray/xray_profiling.cpp
@@ -402,7 +402,7 @@ profilingLoggingInit(size_t, size_t, void *Options,
return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
}
- // If we've succeded, set the global pointer to the initialised storage.
+ // If we've succeeded, set the global pointer to the initialised storage.
BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
} else {
BQ->finalize();
diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp
index c58584b3a14b..669d2e85bede 100644
--- a/compiler-rt/lib/xray/xray_x86_64.cpp
+++ b/compiler-rt/lib/xray/xray_x86_64.cpp
@@ -148,7 +148,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
(static_cast<int64_t>(Address) + 11);
if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
- Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline,
+ Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
+ reinterpret_cast<void *>(Trampoline),
reinterpret_cast<void *>(Address));
return false;
}
@@ -195,7 +196,8 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
(static_cast<int64_t>(Address) + 11);
if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
- __xray_FunctionExit, reinterpret_cast<void *>(Address));
+ reinterpret_cast<void *>(__xray_FunctionExit),
+ reinterpret_cast<void *>(Address));
return false;
}
if (Enable) {
@@ -224,7 +226,8 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
(static_cast<int64_t>(Address) + 11);
if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
- __xray_FunctionTailExit, reinterpret_cast<void *>(Address));
+ reinterpret_cast<void *>(__xray_FunctionTailExit),
+ reinterpret_cast<void *>(Address));
return false;
}
if (Enable) {
diff --git a/libcxx/CREDITS.TXT b/libcxx/CREDITS.TXT
index 49f095d29645..597c5fcb7cf4 100644
--- a/libcxx/CREDITS.TXT
+++ b/libcxx/CREDITS.TXT
@@ -12,6 +12,10 @@ N: Saleem Abdulrasool
E: compnerd@compnerd.org
D: Minor patches and Linux fixes.
+N: Muiez Ahmed
+E: muiez@ibm.com
+D: z/OS port.
+
N: Dan Albert
E: danalbert@google.com
D: Android support and test runner improvements.
@@ -24,10 +28,6 @@ N: Holger Arnold
E: holgerar@gmail.com
D: Minor fix.
-N: Ruben Van Boxem
-E: vanboxem dot ruben at gmail dot com
-D: Initial Windows patches.
-
N: David Chisnall
E: theraven at theravensnest dot org
D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
@@ -41,6 +41,10 @@ N: Jonathan B Coe
E: jbcoe@me.com
D: Implementation of propagate_const.
+N: Matthew Dempsky
+E: matthew@dempsky.org
+D: Minor patches and bug fixes.
+
N: Christopher Di Bella
E: cjdb@google.com
E: cjdb.ns@gmail.com
@@ -58,10 +62,6 @@ N: Bill Fisher
E: william.w.fisher@gmail.com
D: Regex bug fixes.
-N: Matthew Dempsky
-E: matthew@dempsky.org
-D: Minor patches and bug fixes.
-
N: Google Inc.
D: Copyright owner and contributor of the CityHash algorithm
@@ -113,6 +113,10 @@ N: Jon Roelofs
E: jroelofS@jroelofs.com
D: Remote testing, Newlib port, baremetal/single-threaded support.
+N: Kent Ross
+E: k@mad.cash
+D: Patches for operator<=> support
+
N: Jonathan Sauer
D: Minor patches, mostly related to constexpr
@@ -131,6 +135,10 @@ N: Stephan Tolksdorf
E: st@quanttec.com
D: Minor <atomic> fix
+N: Ruben Van Boxem
+E: vanboxem dot ruben at gmail dot com
+D: Initial Windows patches.
+
N: Michael van der Westhuizen
E: r1mikey at gmail dot com
@@ -149,11 +157,11 @@ N: Xing Xue
E: xingxue@ca.ibm.com
D: AIX port
-N: Zhihao Yuan
-E: lichray@gmail.com
-D: Standard compatibility fixes.
-
N: Jeffrey Yasskin
E: jyasskin@gmail.com
E: jyasskin@google.com
D: Linux fixes.
+
+N: Zhihao Yuan
+E: lichray@gmail.com
+D: Standard compatibility fixes.
diff --git a/libcxx/include/__algorithm/adjacent_find.h b/libcxx/include/__algorithm/adjacent_find.h
index 0a2aa055c100..621ef5f20f82 100644
--- a/libcxx/include/__algorithm/adjacent_find.h
+++ b/libcxx/include/__algorithm/adjacent_find.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _BinaryPredicate>
@@ -46,6 +43,4 @@ adjacent_find(_ForwardIterator __first, _ForwardIterator __last) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_ADJACENT_FIND_H
diff --git a/libcxx/include/__algorithm/all_of.h b/libcxx/include/__algorithm/all_of.h
index 7d6ed5085963..817a4bc89ca0 100644
--- a/libcxx/include/__algorithm/all_of.h
+++ b/libcxx/include/__algorithm/all_of.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -32,6 +29,4 @@ all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_ALL_OF_H
diff --git a/libcxx/include/__algorithm/any_of.h b/libcxx/include/__algorithm/any_of.h
index d5a6c094a673..f4116d913059 100644
--- a/libcxx/include/__algorithm/any_of.h
+++ b/libcxx/include/__algorithm/any_of.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -32,6 +29,4 @@ any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_ANY_OF_H
diff --git a/libcxx/include/__algorithm/binary_search.h b/libcxx/include/__algorithm/binary_search.h
index 766f5da16296..cd1d7b1a752d 100644
--- a/libcxx/include/__algorithm/binary_search.h
+++ b/libcxx/include/__algorithm/binary_search.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _ForwardIterator, class _Tp>
@@ -53,9 +50,6 @@ binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __va
__less<typename iterator_traits<_ForwardIterator>::value_type, _Tp>());
}
-
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_BINARY_SEARCH_H
diff --git a/libcxx/include/__algorithm/clamp.h b/libcxx/include/__algorithm/clamp.h
index f8eba03cf890..db28735e97a3 100644
--- a/libcxx/include/__algorithm/clamp.h
+++ b/libcxx/include/__algorithm/clamp.h
@@ -17,13 +17,9 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
-// clamp
template<class _Tp, class _Compare>
_LIBCPP_NODISCARD_EXT inline
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
@@ -47,6 +43,4 @@ clamp(const _Tp& __v, const _Tp& __lo, const _Tp& __hi)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_CLAMP_H
diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h
index 2039cf7b3955..b3f971e4f052 100644
--- a/libcxx/include/__algorithm/comp.h
+++ b/libcxx/include/__algorithm/comp.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// I'd like to replace these with _VSTD::equal_to<void>, but can't because:
@@ -92,6 +89,4 @@ struct __less<_T1, const _T1>
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COMP_H
diff --git a/libcxx/include/__algorithm/comp_ref_type.h b/libcxx/include/__algorithm/comp_ref_type.h
index b3bca82c0953..6cc6405686f5 100644
--- a/libcxx/include/__algorithm/comp_ref_type.h
+++ b/libcxx/include/__algorithm/comp_ref_type.h
@@ -10,15 +10,16 @@
#define _LIBCPP___ALGORITHM_COMP_REF_TYPE_H
#include <__config>
-#include <type_traits>
+
+#ifdef _LIBCPP_DEBUG
+# include <__debug>
+# include <__utility/declval.h>
+#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#ifdef _LIBCPP_DEBUG
@@ -73,15 +74,12 @@ struct __comp_ref_type {
// Pass the comparator by lvalue reference. Or in debug mode, using a
// debugging wrapper that stores a reference.
#ifndef _LIBCPP_DEBUG
- typedef typename add_lvalue_reference<_Comp>::type type;
+ typedef _Comp& type;
#else
typedef __debug_less<_Comp> type;
#endif
};
-
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COMP_REF_TYPE_H
diff --git a/libcxx/include/__algorithm/copy.h b/libcxx/include/__algorithm/copy.h
index 9db7434c043d..e7e8b9e51a3e 100644
--- a/libcxx/include/__algorithm/copy.h
+++ b/libcxx/include/__algorithm/copy.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// copy
@@ -77,6 +74,4 @@ copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COPY_H
diff --git a/libcxx/include/__algorithm/copy_backward.h b/libcxx/include/__algorithm/copy_backward.h
index 03a9c5f2d001..4a2f8c0c49cd 100644
--- a/libcxx/include/__algorithm/copy_backward.h
+++ b/libcxx/include/__algorithm/copy_backward.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BidirectionalIterator, class _OutputIterator>
@@ -79,6 +76,4 @@ copy_backward(_BidirectionalIterator1 __first, _BidirectionalIterator1 __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COPY_BACKWARD_H
diff --git a/libcxx/include/__algorithm/copy_if.h b/libcxx/include/__algorithm/copy_if.h
index 153304c7a765..230826f63af4 100644
--- a/libcxx/include/__algorithm/copy_if.h
+++ b/libcxx/include/__algorithm/copy_if.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template<class _InputIterator, class _OutputIterator, class _Predicate>
@@ -43,6 +40,4 @@ copy_if(_InputIterator __first, _InputIterator __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COPY_IF_H
diff --git a/libcxx/include/__algorithm/copy_n.h b/libcxx/include/__algorithm/copy_n.h
index bbfeb8686fbf..38a84a4105a4 100644
--- a/libcxx/include/__algorithm/copy_n.h
+++ b/libcxx/include/__algorithm/copy_n.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template<class _InputIterator, class _Size, class _OutputIterator>
@@ -60,13 +57,12 @@ typename enable_if
>::type
copy_n(_InputIterator __first, _Size __orig_n, _OutputIterator __result)
{
+ typedef typename iterator_traits<_InputIterator>::difference_type difference_type;
typedef decltype(_VSTD::__convert_to_integral(__orig_n)) _IntegralSize;
_IntegralSize __n = __orig_n;
- return _VSTD::copy(__first, __first + __n, __result);
+ return _VSTD::copy(__first, __first + difference_type(__n), __result);
}
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COPY_N_H
diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h
index 7f2d1954cd6b..81a2c186f83b 100644
--- a/libcxx/include/__algorithm/count.h
+++ b/libcxx/include/__algorithm/count.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Tp>
@@ -35,6 +32,4 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_C
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COUNT_H
diff --git a/libcxx/include/__algorithm/count_if.h b/libcxx/include/__algorithm/count_if.h
index a5efffb92603..00f5d671da57 100644
--- a/libcxx/include/__algorithm/count_if.h
+++ b/libcxx/include/__algorithm/count_if.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -35,6 +32,4 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_C
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_COUNT_IF_H
diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h
index bc6755981963..0fe1a21fe526 100644
--- a/libcxx/include/__algorithm/equal.h
+++ b/libcxx/include/__algorithm/equal.h
@@ -12,16 +12,13 @@
#include <__config>
#include <__algorithm/comp.h>
+#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
-#include <iterator> // FIXME: replace with <__iterator/distance.h> when it lands
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
@@ -60,14 +57,14 @@ __equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _Random
if (_VSTD::distance(__first1, __last1) != _VSTD::distance(__first2, __last2))
return false;
return _VSTD::equal<_RandomAccessIterator1, _RandomAccessIterator2,
- typename add_lvalue_reference<_BinaryPredicate>::type>(__first1, __last1, __first2, __pred);
+ _BinaryPredicate&>(__first1, __last1, __first2, __pred);
}
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 bool
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2,
_BinaryPredicate __pred) {
- return _VSTD::__equal<typename add_lvalue_reference<_BinaryPredicate>::type>(
+ return _VSTD::__equal<_BinaryPredicate&>(
__first1, __last1, __first2, __last2, __pred, typename iterator_traits<_InputIterator1>::iterator_category(),
typename iterator_traits<_InputIterator2>::iterator_category());
}
@@ -85,6 +82,4 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_EQUAL_H
diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h
index 9694dae95003..679456e27b43 100644
--- a/libcxx/include/__algorithm/equal_range.h
+++ b/libcxx/include/__algorithm/equal_range.h
@@ -21,9 +21,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _ForwardIterator, class _Tp>
@@ -82,6 +79,4 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_EQUAL_RANGE_H
diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 4fefe86536d3..1fad1de993bf 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp>
@@ -50,6 +47,4 @@ fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FILL_H
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index 34a245edb51c..857ac1415731 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _OutputIterator, class _Size, class _Tp>
@@ -42,6 +39,4 @@ fill_n(_OutputIterator __first, _Size __n, const _Tp& __value_)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FILL_N_H
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index bc593dc54254..2a6dfbe41a94 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Tp>
@@ -32,6 +29,4 @@ find(_InputIterator __first, _InputIterator __last, const _Tp& __value_) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FIND_H
diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h
index f4277f003aa8..5d971c57a4e0 100644
--- a/libcxx/include/__algorithm/find_end.h
+++ b/libcxx/include/__algorithm/find_end.h
@@ -13,15 +13,11 @@
#include <__config>
#include <__algorithm/comp.h>
#include <__iterator/iterator_traits.h>
-#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BinaryPredicate, class _ForwardIterator1, class _ForwardIterator2>
@@ -99,14 +95,16 @@ template <class _BinaryPredicate, class _RandomAccessIterator1, class _RandomAcc
_LIBCPP_CONSTEXPR_AFTER_CXX11 _RandomAccessIterator1 __find_end(
_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
_RandomAccessIterator2 __last2, _BinaryPredicate __pred, random_access_iterator_tag, random_access_iterator_tag) {
+ typedef typename iterator_traits<_RandomAccessIterator1>::difference_type _D1;
+ typedef typename iterator_traits<_RandomAccessIterator2>::difference_type _D2;
// Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern
- typename iterator_traits<_RandomAccessIterator2>::difference_type __len2 = __last2 - __first2;
+ _D2 __len2 = __last2 - __first2;
if (__len2 == 0)
return __last1;
- typename iterator_traits<_RandomAccessIterator1>::difference_type __len1 = __last1 - __first1;
+ _D1 __len1 = __last1 - __first1;
if (__len1 < __len2)
return __last1;
- const _RandomAccessIterator1 __s = __first1 + (__len2 - 1); // End of pattern match can't go before here
+ const _RandomAccessIterator1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here
_RandomAccessIterator1 __l1 = __last1;
_RandomAccessIterator2 __l2 = __last2;
--__l2;
@@ -134,7 +132,7 @@ template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredica
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1
find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
_BinaryPredicate __pred) {
- return _VSTD::__find_end<typename add_lvalue_reference<_BinaryPredicate>::type>(
+ return _VSTD::__find_end<_BinaryPredicate&>(
__first1, __last1, __first2, __last2, __pred, typename iterator_traits<_ForwardIterator1>::iterator_category(),
typename iterator_traits<_ForwardIterator2>::iterator_category());
}
@@ -149,6 +147,4 @@ find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FIND_END_OF_H
diff --git a/libcxx/include/__algorithm/find_first_of.h b/libcxx/include/__algorithm/find_first_of.h
index d956c8dda2f2..79a00acb9ee6 100644
--- a/libcxx/include/__algorithm/find_first_of.h
+++ b/libcxx/include/__algorithm/find_first_of.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
@@ -52,6 +49,4 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_C
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FIND_FIRST_OF_H
diff --git a/libcxx/include/__algorithm/find_if.h b/libcxx/include/__algorithm/find_if.h
index 456cc5b0a34e..a94196a16ac5 100644
--- a/libcxx/include/__algorithm/find_if.h
+++ b/libcxx/include/__algorithm/find_if.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -32,6 +29,4 @@ find_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FIND_IF_H
diff --git a/libcxx/include/__algorithm/find_if_not.h b/libcxx/include/__algorithm/find_if_not.h
index d7d25160cc7d..e057db5efa49 100644
--- a/libcxx/include/__algorithm/find_if_not.h
+++ b/libcxx/include/__algorithm/find_if_not.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -32,6 +29,4 @@ find_if_not(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FIND_IF_NOT_H
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index e71a36a1de40..1612ffa5c002 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Function>
@@ -32,6 +29,4 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Function for_eac
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FOR_EACH_H
diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h
index 77f6c862145f..00e3fb9c1db8 100644
--- a/libcxx/include/__algorithm/for_each_n.h
+++ b/libcxx/include/__algorithm/for_each_n.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -42,6 +39,4 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _InputIterator fo
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_H
diff --git a/libcxx/include/__algorithm/generate.h b/libcxx/include/__algorithm/generate.h
index d3e1133e2bc1..10834cdb7438 100644
--- a/libcxx/include/__algorithm/generate.h
+++ b/libcxx/include/__algorithm/generate.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Generator>
@@ -31,6 +28,4 @@ generate(_ForwardIterator __first, _ForwardIterator __last, _Generator __gen)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_GENERATE_H
diff --git a/libcxx/include/__algorithm/generate_n.h b/libcxx/include/__algorithm/generate_n.h
index c31259858613..595007cdd34b 100644
--- a/libcxx/include/__algorithm/generate_n.h
+++ b/libcxx/include/__algorithm/generate_n.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _OutputIterator, class _Size, class _Generator>
@@ -35,6 +32,4 @@ generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_GENERATE_N_H
diff --git a/libcxx/include/__algorithm/half_positive.h b/libcxx/include/__algorithm/half_positive.h
index b03efc495b1a..5d36ff5da985 100644
--- a/libcxx/include/__algorithm/half_positive.h
+++ b/libcxx/include/__algorithm/half_positive.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// Perform division by two quickly for positive integers (llvm.org/PR39129)
@@ -49,6 +46,4 @@ __half_positive(_Tp __value)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_HALF_POSITIVE_H
diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h
index ff298a51118e..9cc54d938dcf 100644
--- a/libcxx/include/__algorithm/includes.h
+++ b/libcxx/include/__algorithm/includes.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2>
@@ -62,6 +59,4 @@ includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_INCLUDES_H
diff --git a/libcxx/include/__algorithm/is_heap.h b/libcxx/include/__algorithm/is_heap.h
index bc3682d471aa..7fd5d6ff9a00 100644
--- a/libcxx/include/__algorithm/is_heap.h
+++ b/libcxx/include/__algorithm/is_heap.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _RandomAccessIterator, class _Compare>
@@ -43,6 +40,4 @@ is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_IS_HEAP_H
diff --git a/libcxx/include/__algorithm/is_heap_until.h b/libcxx/include/__algorithm/is_heap_until.h
index 8c52edb7d498..99291c102bfc 100644
--- a/libcxx/include/__algorithm/is_heap_until.h
+++ b/libcxx/include/__algorithm/is_heap_until.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _RandomAccessIterator, class _Compare>
@@ -60,6 +57,4 @@ is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_IS_HEAP_UNTIL_H
diff --git a/libcxx/include/__algorithm/is_partitioned.h b/libcxx/include/__algorithm/is_partitioned.h
index 43de6650cbd4..e5b2214aa069 100644
--- a/libcxx/include/__algorithm/is_partitioned.h
+++ b/libcxx/include/__algorithm/is_partitioned.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -38,6 +35,4 @@ is_partitioned(_InputIterator __first, _InputIterator __last, _Predicate __pred)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_IS_PARTITIONED_H
diff --git a/libcxx/include/__algorithm/is_permutation.h b/libcxx/include/__algorithm/is_permutation.h
index 0545eb76370a..344aa763ad0e 100644
--- a/libcxx/include/__algorithm/is_permutation.h
+++ b/libcxx/include/__algorithm/is_permutation.h
@@ -12,17 +12,14 @@
#include <__algorithm/comp.h>
#include <__config>
+#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/next.h>
-#include <iterator> // FIXME: replace with <__iterator/distance.h> when it lands
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
@@ -136,15 +133,14 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 bool __is_permutation(_RandomAccessIterator1 __fir
if (_VSTD::distance(__first1, __last1) != _VSTD::distance(__first2, __last2))
return false;
return _VSTD::is_permutation<_RandomAccessIterator1, _RandomAccessIterator2,
- typename add_lvalue_reference<_BinaryPredicate>::type>(__first1, __last1, __first2,
- __pred);
+ _BinaryPredicate&>(__first1, __last1, __first2, __pred);
}
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 bool
is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
_ForwardIterator2 __last2, _BinaryPredicate __pred) {
- return _VSTD::__is_permutation<typename add_lvalue_reference<_BinaryPredicate>::type>(
+ return _VSTD::__is_permutation<_BinaryPredicate&>(
__first1, __last1, __first2, __last2, __pred, typename iterator_traits<_ForwardIterator1>::iterator_category(),
typename iterator_traits<_ForwardIterator2>::iterator_category());
}
@@ -163,6 +159,4 @@ is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIt
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_IS_PERMUTATION_H
diff --git a/libcxx/include/__algorithm/is_sorted.h b/libcxx/include/__algorithm/is_sorted.h
index 30d8da0499e4..c6954934e8c0 100644
--- a/libcxx/include/__algorithm/is_sorted.h
+++ b/libcxx/include/__algorithm/is_sorted.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
@@ -43,6 +40,4 @@ is_sorted(_ForwardIterator __first, _ForwardIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_IS_SORTED_H
diff --git a/libcxx/include/__algorithm/is_sorted_until.h b/libcxx/include/__algorithm/is_sorted_until.h
index a914b5a566b6..5b6385991a74 100644
--- a/libcxx/include/__algorithm/is_sorted_until.h
+++ b/libcxx/include/__algorithm/is_sorted_until.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
@@ -50,6 +47,4 @@ is_sorted_until(_ForwardIterator __first, _ForwardIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_IS_SORTED_UNTIL_H
diff --git a/libcxx/include/__algorithm/iter_swap.h b/libcxx/include/__algorithm/iter_swap.h
index b63bce62cc0c..9f7d0d77630c 100644
--- a/libcxx/include/__algorithm/iter_swap.h
+++ b/libcxx/include/__algorithm/iter_swap.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator1, class _ForwardIterator2>
@@ -32,6 +29,4 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void iter_swap(_F
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_ITER_SWAP_H
diff --git a/libcxx/include/__algorithm/lexicographical_compare.h b/libcxx/include/__algorithm/lexicographical_compare.h
index 6e4a90bdc03d..a110a58c01c1 100644
--- a/libcxx/include/__algorithm/lexicographical_compare.h
+++ b/libcxx/include/__algorithm/lexicographical_compare.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2>
@@ -63,6 +60,4 @@ lexicographical_compare(_InputIterator1 __first1, _InputIterator1 __last1,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 1448c8963327..ddaecb045b3e 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _ForwardIterator, class _Tp>
@@ -51,8 +48,7 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
_ForwardIterator
lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp)
{
- typedef typename add_lvalue_reference<_Compare>::type _Comp_ref;
- return _VSTD::__lower_bound<_Comp_ref>(__first, __last, __value_, __comp);
+ return _VSTD::__lower_bound<_Compare&>(__first, __last, __value_, __comp);
}
template <class _ForwardIterator, class _Tp>
@@ -67,6 +63,4 @@ lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_LOWER_BOUND_H
diff --git a/libcxx/include/__algorithm/make_heap.h b/libcxx/include/__algorithm/make_heap.h
index eca4013cbd10..b3defd4de072 100644
--- a/libcxx/include/__algorithm/make_heap.h
+++ b/libcxx/include/__algorithm/make_heap.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -59,6 +56,4 @@ make_heap(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MAKE_HEAP_H
diff --git a/libcxx/include/__algorithm/max_element.h b/libcxx/include/__algorithm/max_element.h
index b93b67eacf57..c51519605af9 100644
--- a/libcxx/include/__algorithm/max_element.h
+++ b/libcxx/include/__algorithm/max_element.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
@@ -53,6 +50,4 @@ max_element(_ForwardIterator __first, _ForwardIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MAX_ELEMENT_H
diff --git a/libcxx/include/__algorithm/merge.h b/libcxx/include/__algorithm/merge.h
index ea53ad68f7f7..480380db6caa 100644
--- a/libcxx/include/__algorithm/merge.h
+++ b/libcxx/include/__algorithm/merge.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
@@ -71,6 +68,4 @@ merge(_InputIterator1 __first1, _InputIterator1 __last1,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MERGE_H
diff --git a/libcxx/include/__algorithm/min_element.h b/libcxx/include/__algorithm/min_element.h
index 6bff140e5d92..9bfd0793c69d 100644
--- a/libcxx/include/__algorithm/min_element.h
+++ b/libcxx/include/__algorithm/min_element.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
@@ -52,6 +49,4 @@ min_element(_ForwardIterator __first, _ForwardIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MIN_ELEMENT_H
diff --git a/libcxx/include/__algorithm/minmax.h b/libcxx/include/__algorithm/minmax.h
index 63753f2fecdc..a96a5b252c09 100644
--- a/libcxx/include/__algorithm/minmax.h
+++ b/libcxx/include/__algorithm/minmax.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template<class _Tp, class _Compare>
@@ -96,6 +93,4 @@ minmax(initializer_list<_Tp> __t)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MINMAX_H
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 1eba006de9a1..d21ff6f8dc5a 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Compare>
@@ -85,6 +82,4 @@ minmax_element(_ForwardIterator __first, _ForwardIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MINMAX_ELEMENT_H
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index fdd2bc8c09c1..7a01a985934a 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
@@ -67,6 +64,4 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MISMATCH_H
diff --git a/libcxx/include/__algorithm/move.h b/libcxx/include/__algorithm/move.h
index f5fc74854f05..7430bf087438 100644
--- a/libcxx/include/__algorithm/move.h
+++ b/libcxx/include/__algorithm/move.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// move
@@ -78,6 +75,4 @@ move(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MOVE_H
diff --git a/libcxx/include/__algorithm/move_backward.h b/libcxx/include/__algorithm/move_backward.h
index 1c93b9819732..ee72d39764ca 100644
--- a/libcxx/include/__algorithm/move_backward.h
+++ b/libcxx/include/__algorithm/move_backward.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator>
@@ -79,6 +76,4 @@ move_backward(_BidirectionalIterator1 __first, _BidirectionalIterator1 __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_MOVE_BACKWARD_H
diff --git a/libcxx/include/__algorithm/next_permutation.h b/libcxx/include/__algorithm/next_permutation.h
index a337e5efa098..1d71354eb375 100644
--- a/libcxx/include/__algorithm/next_permutation.h
+++ b/libcxx/include/__algorithm/next_permutation.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _BidirectionalIterator>
@@ -72,6 +69,4 @@ next_permutation(_BidirectionalIterator __first, _BidirectionalIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_NEXT_PERMUTATION_H
diff --git a/libcxx/include/__algorithm/none_of.h b/libcxx/include/__algorithm/none_of.h
index 285691520c4e..10339e2418c8 100644
--- a/libcxx/include/__algorithm/none_of.h
+++ b/libcxx/include/__algorithm/none_of.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _Predicate>
@@ -32,6 +29,4 @@ none_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_NONE_OF_H
diff --git a/libcxx/include/__algorithm/nth_element.h b/libcxx/include/__algorithm/nth_element.h
index 67a03cfc37b0..63feba1ea616 100644
--- a/libcxx/include/__algorithm/nth_element.h
+++ b/libcxx/include/__algorithm/nth_element.h
@@ -16,13 +16,14 @@
#include <__iterator/iterator_traits.h>
#include <__utility/swap.h>
+#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY)
+# include <__algorithm/shuffle.h>
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template<class _Compare, class _RandomAccessIterator>
@@ -225,8 +226,13 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
void
nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp)
{
- typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
- _VSTD::__nth_element<_Comp_ref>(__first, __nth, __last, __comp);
+ _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last);
+ typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
+ _VSTD::__nth_element<_Comp_ref>(__first, __nth, __last, __comp);
+ _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __nth);
+ if (__nth != __last) {
+ _LIBCPP_DEBUG_RANDOMIZE_RANGE(++__nth, __last);
+ }
}
template <class _RandomAccessIterator>
@@ -239,6 +245,4 @@ nth_element(_RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomA
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_NTH_ELEMENT_H
diff --git a/libcxx/include/__algorithm/partial_sort.h b/libcxx/include/__algorithm/partial_sort.h
index 4f9872c4922b..622624ec4f42 100644
--- a/libcxx/include/__algorithm/partial_sort.h
+++ b/libcxx/include/__algorithm/partial_sort.h
@@ -18,13 +18,14 @@
#include <__iterator/iterator_traits.h>
#include <__utility/swap.h>
+#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY)
+# include <__algorithm/shuffle.h>
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -51,8 +52,10 @@ void
partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last,
_Compare __comp)
{
- typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
- _VSTD::__partial_sort<_Comp_ref>(__first, __middle, __last, __comp);
+ _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last);
+ typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
+ _VSTD::__partial_sort<_Comp_ref>(__first, __middle, __last, __comp);
+ _LIBCPP_DEBUG_RANDOMIZE_RANGE(__middle, __last);
}
template <class _RandomAccessIterator>
@@ -66,6 +69,4 @@ partial_sort(_RandomAccessIterator __first, _RandomAccessIterator __middle, _Ran
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PARTIAL_SORT_H
diff --git a/libcxx/include/__algorithm/partial_sort_copy.h b/libcxx/include/__algorithm/partial_sort_copy.h
index 31a12615fa5b..4c0c9f5ad04a 100644
--- a/libcxx/include/__algorithm/partial_sort_copy.h
+++ b/libcxx/include/__algorithm/partial_sort_copy.h
@@ -22,9 +22,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator, class _RandomAccessIterator>
@@ -72,6 +69,4 @@ partial_sort_copy(_InputIterator __first, _InputIterator __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PARTIAL_SORT_COPY_H
diff --git a/libcxx/include/__algorithm/partition.h b/libcxx/include/__algorithm/partition.h
index c859eaca28a3..2614520ccbcf 100644
--- a/libcxx/include/__algorithm/partition.h
+++ b/libcxx/include/__algorithm/partition.h
@@ -13,15 +13,11 @@
#include <__iterator/iterator_traits.h>
#include <__utility/swap.h>
#include <utility> // pair
-#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Predicate, class _ForwardIterator>
@@ -77,12 +73,10 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
_ForwardIterator
partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
{
- return _VSTD::__partition<typename add_lvalue_reference<_Predicate>::type>
- (__first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category());
+ return _VSTD::__partition<_Predicate&>(
+ __first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category());
}
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PARTITION_H
diff --git a/libcxx/include/__algorithm/partition_copy.h b/libcxx/include/__algorithm/partition_copy.h
index 445eacdfbe9a..d34944589b9e 100644
--- a/libcxx/include/__algorithm/partition_copy.h
+++ b/libcxx/include/__algorithm/partition_copy.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator1,
@@ -47,6 +44,4 @@ partition_copy(_InputIterator __first, _InputIterator __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PARTITION_COPY_H
diff --git a/libcxx/include/__algorithm/partition_point.h b/libcxx/include/__algorithm/partition_point.h
index 12ddacf9d756..33aaf33d938c 100644
--- a/libcxx/include/__algorithm/partition_point.h
+++ b/libcxx/include/__algorithm/partition_point.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template<class _ForwardIterator, class _Predicate>
@@ -46,6 +43,4 @@ partition_point(_ForwardIterator __first, _ForwardIterator __last, _Predicate __
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PARTITION_POINT_H
diff --git a/libcxx/include/__algorithm/pop_heap.h b/libcxx/include/__algorithm/pop_heap.h
index 7ebbef25e0ba..e8c801a5c81f 100644
--- a/libcxx/include/__algorithm/pop_heap.h
+++ b/libcxx/include/__algorithm/pop_heap.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -57,6 +54,4 @@ pop_heap(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_POP_HEAP_H
diff --git a/libcxx/include/__algorithm/prev_permutation.h b/libcxx/include/__algorithm/prev_permutation.h
index d6daa73ada00..12c1816da37e 100644
--- a/libcxx/include/__algorithm/prev_permutation.h
+++ b/libcxx/include/__algorithm/prev_permutation.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _BidirectionalIterator>
@@ -72,6 +69,4 @@ prev_permutation(_BidirectionalIterator __first, _BidirectionalIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PREV_PERMUTATION_H
diff --git a/libcxx/include/__algorithm/push_heap.h b/libcxx/include/__algorithm/push_heap.h
index 82a7c123d834..9327fe05b51d 100644
--- a/libcxx/include/__algorithm/push_heap.h
+++ b/libcxx/include/__algorithm/push_heap.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -70,6 +67,4 @@ push_heap(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_PUSH_HEAP_H
diff --git a/libcxx/include/__algorithm/remove.h b/libcxx/include/__algorithm/remove.h
index 4717d7d97310..171d83284a2e 100644
--- a/libcxx/include/__algorithm/remove.h
+++ b/libcxx/include/__algorithm/remove.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp>
@@ -45,6 +42,4 @@ remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REMOVE_H
diff --git a/libcxx/include/__algorithm/remove_copy.h b/libcxx/include/__algorithm/remove_copy.h
index 5d2b6403438a..338ca94300bb 100644
--- a/libcxx/include/__algorithm/remove_copy.h
+++ b/libcxx/include/__algorithm/remove_copy.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator, class _Tp>
@@ -38,6 +35,4 @@ remove_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __res
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REMOVE_COPY_H
diff --git a/libcxx/include/__algorithm/remove_copy_if.h b/libcxx/include/__algorithm/remove_copy_if.h
index 44822564a5cb..a55638722074 100644
--- a/libcxx/include/__algorithm/remove_copy_if.h
+++ b/libcxx/include/__algorithm/remove_copy_if.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator, class _Predicate>
@@ -38,6 +35,4 @@ remove_copy_if(_InputIterator __first, _InputIterator __last, _OutputIterator __
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REMOVE_COPY_IF_H
diff --git a/libcxx/include/__algorithm/remove_if.h b/libcxx/include/__algorithm/remove_if.h
index e506b4c67fba..4df36896afd5 100644
--- a/libcxx/include/__algorithm/remove_if.h
+++ b/libcxx/include/__algorithm/remove_if.h
@@ -12,23 +12,18 @@
#include <__config>
#include <__algorithm/find_if.h>
#include <utility>
-#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Predicate>
_LIBCPP_NODISCARD_EXT _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator
remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
{
- __first = _VSTD::find_if<_ForwardIterator, typename add_lvalue_reference<_Predicate>::type>
- (__first, __last, __pred);
+ __first = _VSTD::find_if<_ForwardIterator, _Predicate&>(__first, __last, __pred);
if (__first != __last)
{
_ForwardIterator __i = __first;
@@ -46,6 +41,4 @@ remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REMOVE_IF_H
diff --git a/libcxx/include/__algorithm/replace.h b/libcxx/include/__algorithm/replace.h
index b723ffeeace4..2bc96ffc87dc 100644
--- a/libcxx/include/__algorithm/replace.h
+++ b/libcxx/include/__algorithm/replace.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp>
@@ -32,6 +29,4 @@ replace(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_valu
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REPLACE_H
diff --git a/libcxx/include/__algorithm/replace_copy.h b/libcxx/include/__algorithm/replace_copy.h
index 1923a57e427c..c6c5fe32e81c 100644
--- a/libcxx/include/__algorithm/replace_copy.h
+++ b/libcxx/include/__algorithm/replace_copy.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator, class _Tp>
@@ -36,6 +33,4 @@ replace_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __re
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REPLACE_COPY_H
diff --git a/libcxx/include/__algorithm/replace_copy_if.h b/libcxx/include/__algorithm/replace_copy_if.h
index 72b6f736970c..274d8e630ef1 100644
--- a/libcxx/include/__algorithm/replace_copy_if.h
+++ b/libcxx/include/__algorithm/replace_copy_if.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator, class _Predicate, class _Tp>
@@ -36,6 +33,4 @@ replace_copy_if(_InputIterator __first, _InputIterator __last, _OutputIterator _
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REPLACE_COPY_IF_H
diff --git a/libcxx/include/__algorithm/replace_if.h b/libcxx/include/__algorithm/replace_if.h
index 49101a5ce16d..bcc3feb2f507 100644
--- a/libcxx/include/__algorithm/replace_if.h
+++ b/libcxx/include/__algorithm/replace_if.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Predicate, class _Tp>
@@ -32,6 +29,4 @@ replace_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REPLACE_IF_H
diff --git a/libcxx/include/__algorithm/reverse.h b/libcxx/include/__algorithm/reverse.h
index e538de11472e..28bd2e84c8ae 100644
--- a/libcxx/include/__algorithm/reverse.h
+++ b/libcxx/include/__algorithm/reverse.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BidirectionalIterator>
@@ -56,6 +53,4 @@ reverse(_BidirectionalIterator __first, _BidirectionalIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REVERSE_H
diff --git a/libcxx/include/__algorithm/reverse_copy.h b/libcxx/include/__algorithm/reverse_copy.h
index 48ce60cf8811..002c0344a794 100644
--- a/libcxx/include/__algorithm/reverse_copy.h
+++ b/libcxx/include/__algorithm/reverse_copy.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BidirectionalIterator, class _OutputIterator>
@@ -32,6 +29,4 @@ reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _Out
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_REVERSE_COPY_H
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index 0c9ccd7bf7b0..fd6d3e9c114f 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -23,9 +23,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator>
@@ -200,6 +197,4 @@ rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __l
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_ROTATE_H
diff --git a/libcxx/include/__algorithm/rotate_copy.h b/libcxx/include/__algorithm/rotate_copy.h
index d5ab7d3b515d..4c682ef93d5a 100644
--- a/libcxx/include/__algorithm/rotate_copy.h
+++ b/libcxx/include/__algorithm/rotate_copy.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _OutputIterator>
@@ -33,6 +30,4 @@ rotate_copy(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterato
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_ROTATE_COPY_H
diff --git a/libcxx/include/__algorithm/sample.h b/libcxx/include/__algorithm/sample.h
index 2aac6ffa8ceb..33264c4ea3a7 100644
--- a/libcxx/include/__algorithm/sample.h
+++ b/libcxx/include/__algorithm/sample.h
@@ -9,8 +9,9 @@
#ifndef _LIBCPP___ALGORITHM_SAMPLE_H
#define _LIBCPP___ALGORITHM_SAMPLE_H
-#include <__config>
#include <__algorithm/min.h>
+#include <__config>
+#include <__debug>
#include <__random/uniform_int_distribution.h>
#include <iterator>
diff --git a/libcxx/include/__algorithm/search.h b/libcxx/include/__algorithm/search.h
index 008b8ebb04ad..cfaec0ed1e17 100644
--- a/libcxx/include/__algorithm/search.h
+++ b/libcxx/include/__algorithm/search.h
@@ -13,16 +13,12 @@
#include <__algorithm/comp.h>
#include <__config>
#include <__iterator/iterator_traits.h>
-#include <type_traits>
#include <utility>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BinaryPredicate, class _ForwardIterator1, class _ForwardIterator2>
@@ -72,7 +68,7 @@ __search(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _Rando
const _D1 __len1 = __last1 - __first1;
if (__len1 < __len2)
return _VSTD::make_pair(__last1, __last1);
- const _RandomAccessIterator1 __s = __last1 - (__len2 - 1); // Start of pattern match can't go beyond here
+ const _RandomAccessIterator1 __s = __last1 - _D1(__len2 - 1); // Start of pattern match can't go beyond here
while (true) {
while (true) {
@@ -87,7 +83,7 @@ __search(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _Rando
_RandomAccessIterator2 __m2 = __first2;
while (true) {
if (++__m2 == __last2)
- return _VSTD::make_pair(__first1, __first1 + __len2);
+ return _VSTD::make_pair(__first1, __first1 + _D1(__len2));
++__m1; // no need to check range on __m1 because __s guarantees we have enough source
if (!__pred(*__m1, *__m2)) {
++__first1;
@@ -101,7 +97,7 @@ template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredica
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator1
search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
_BinaryPredicate __pred) {
- return _VSTD::__search<typename add_lvalue_reference<_BinaryPredicate>::type>(
+ return _VSTD::__search<_BinaryPredicate&>(
__first1, __last1, __first2, __last2, __pred,
typename iterator_traits<_ForwardIterator1>::iterator_category(),
typename iterator_traits<_ForwardIterator2>::iterator_category()).first;
@@ -126,6 +122,4 @@ search(_ForwardIterator __f, _ForwardIterator __l, const _Searcher& __s) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SEARCH_H
diff --git a/libcxx/include/__algorithm/search_n.h b/libcxx/include/__algorithm/search_n.h
index 1584e8e613ce..67d066aa43d5 100644
--- a/libcxx/include/__algorithm/search_n.h
+++ b/libcxx/include/__algorithm/search_n.h
@@ -13,15 +13,12 @@
#include <__config>
#include <__algorithm/comp.h>
#include <__iterator/iterator_traits.h>
-#include <type_traits>
+#include <type_traits> // __convert_to_integral
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BinaryPredicate, class _ForwardIterator, class _Size, class _Tp>
@@ -62,12 +59,13 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 _RandomAccessIterator __search_n(_RandomAccessIter
_RandomAccessIterator __last, _Size __count,
const _Tp& __value_, _BinaryPredicate __pred,
random_access_iterator_tag) {
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
if (__count <= 0)
return __first;
_Size __len = static_cast<_Size>(__last - __first);
if (__len < __count)
return __last;
- const _RandomAccessIterator __s = __last - (__count - 1); // Start of pattern match can't go beyond here
+ const _RandomAccessIterator __s = __last - difference_type(__count - 1); // Start of pattern match can't go beyond here
while (true) {
// Find first element in sequence that matchs __value_, with a mininum of loop checks
while (true) {
@@ -97,7 +95,7 @@ _LIBCPP_CONSTEXPR_AFTER_CXX17 _RandomAccessIterator __search_n(_RandomAccessIter
template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator search_n(
_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value_, _BinaryPredicate __pred) {
- return _VSTD::__search_n<typename add_lvalue_reference<_BinaryPredicate>::type>(
+ return _VSTD::__search_n<_BinaryPredicate&>(
__first, __last, _VSTD::__convert_to_integral(__count), __value_, __pred,
typename iterator_traits<_ForwardIterator>::iterator_category());
}
@@ -111,6 +109,4 @@ search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SEARCH_N_H
diff --git a/libcxx/include/__algorithm/set_difference.h b/libcxx/include/__algorithm/set_difference.h
index f4c985d978cd..d4a9750d6dd7 100644
--- a/libcxx/include/__algorithm/set_difference.h
+++ b/libcxx/include/__algorithm/set_difference.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
@@ -72,6 +69,4 @@ set_difference(_InputIterator1 __first1, _InputIterator1 __last1,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SET_DIFFERENCE_H
diff --git a/libcxx/include/__algorithm/set_intersection.h b/libcxx/include/__algorithm/set_intersection.h
index 9d34b66c23e3..518e5e68b39d 100644
--- a/libcxx/include/__algorithm/set_intersection.h
+++ b/libcxx/include/__algorithm/set_intersection.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
@@ -69,6 +66,4 @@ set_intersection(_InputIterator1 __first1, _InputIterator1 __last1,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SET_INTERSECTION_H
diff --git a/libcxx/include/__algorithm/set_symmetric_difference.h b/libcxx/include/__algorithm/set_symmetric_difference.h
index 5650b836a611..efdf62725709 100644
--- a/libcxx/include/__algorithm/set_symmetric_difference.h
+++ b/libcxx/include/__algorithm/set_symmetric_difference.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
@@ -77,6 +74,4 @@ set_symmetric_difference(_InputIterator1 __first1, _InputIterator1 __last1,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SET_SYMMETRIC_DIFFERENCE_H
diff --git a/libcxx/include/__algorithm/set_union.h b/libcxx/include/__algorithm/set_union.h
index c0874e957461..388f037a73a4 100644
--- a/libcxx/include/__algorithm/set_union.h
+++ b/libcxx/include/__algorithm/set_union.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2, class _OutputIterator>
@@ -72,6 +69,4 @@ set_union(_InputIterator1 __first1, _InputIterator1 __last1,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SET_UNION_H
diff --git a/libcxx/include/__algorithm/shift_left.h b/libcxx/include/__algorithm/shift_left.h
index 961b89cb00ba..8d9bc07d2e48 100644
--- a/libcxx/include/__algorithm/shift_left.h
+++ b/libcxx/include/__algorithm/shift_left.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
@@ -56,6 +53,4 @@ shift_left(_ForwardIterator __first, _ForwardIterator __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SHIFT_LEFT_H
diff --git a/libcxx/include/__algorithm/shift_right.h b/libcxx/include/__algorithm/shift_right.h
index 5cb41950642c..cee17733a6a2 100644
--- a/libcxx/include/__algorithm/shift_right.h
+++ b/libcxx/include/__algorithm/shift_right.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
@@ -101,6 +98,4 @@ shift_right(_ForwardIterator __first, _ForwardIterator __last,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SHIFT_RIGHT_H
diff --git a/libcxx/include/__algorithm/shuffle.h b/libcxx/include/__algorithm/shuffle.h
index 637fca538857..7f6ad50e241e 100644
--- a/libcxx/include/__algorithm/shuffle.h
+++ b/libcxx/include/__algorithm/shuffle.h
@@ -25,6 +25,39 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
+class _LIBCPP_TYPE_VIS __libcpp_debug_randomizer {
+public:
+ __libcpp_debug_randomizer() {
+ __state = __seed();
+ __inc = __state + 0xda3e39cb94b95bdbULL;
+ __inc = (__inc << 1) | 1;
+ }
+ typedef uint_fast32_t result_type;
+
+ static const result_type _Min = 0;
+ static const result_type _Max = 0xFFFFFFFF;
+
+ _LIBCPP_HIDE_FROM_ABI result_type operator()() {
+ uint_fast64_t __oldstate = __state;
+ __state = __oldstate * 6364136223846793005ULL + __inc;
+ return __oldstate >> 32;
+ }
+
+ static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR result_type min() { return _Min; }
+ static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR result_type max() { return _Max; }
+
+private:
+ uint_fast64_t __state;
+ uint_fast64_t __inc;
+ _LIBCPP_HIDE_FROM_ABI static uint_fast64_t __seed() {
+#ifdef _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY_SEED
+ return _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY_SEED;
+#else
+ static char __x;
+ return reinterpret_cast<uintptr_t>(&__x);
+#endif
+ }
+};
#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE) \
|| defined(_LIBCPP_BUILDING_LIBRARY)
diff --git a/libcxx/include/__algorithm/sift_down.h b/libcxx/include/__algorithm/sift_down.h
index dd4b54ed8e5f..4d99ff237c96 100644
--- a/libcxx/include/__algorithm/sift_down.h
+++ b/libcxx/include/__algorithm/sift_down.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -41,7 +38,7 @@ __sift_down(_RandomAccessIterator __first, _RandomAccessIterator /*__last*/,
__child = 2 * __child + 1;
_RandomAccessIterator __child_i = __first + __child;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + 1))) {
+ if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
// right-child exists and is greater than left-child
++__child_i;
++__child;
@@ -66,7 +63,7 @@ __sift_down(_RandomAccessIterator __first, _RandomAccessIterator /*__last*/,
__child = 2 * __child + 1;
__child_i = __first + __child;
- if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + 1))) {
+ if ((__child + 1) < __len && __comp(*__child_i, *(__child_i + difference_type(1)))) {
// right-child exists and is greater than left-child
++__child_i;
++__child;
@@ -79,6 +76,4 @@ __sift_down(_RandomAccessIterator __first, _RandomAccessIterator /*__last*/,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SIFT_DOWN_H
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 39ec21302d20..bc127689a674 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -17,15 +17,15 @@
#include <__algorithm/unwrap_iter.h>
#include <__utility/swap.h>
#include <memory>
-#include <type_traits> // swap
+
+#if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY)
+# include <__algorithm/shuffle.h>
+#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// stable, 2-3 compares, 0-2 swaps
@@ -131,9 +131,7 @@ __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
_BidirectionalIterator __lm1 = __last;
for (--__lm1; __first != __lm1; ++__first)
{
- _BidirectionalIterator __i = _VSTD::min_element<_BidirectionalIterator,
- typename add_lvalue_reference<_Compare>::type>
- (__first, __last, __comp);
+ _BidirectionalIterator __i = _VSTD::min_element(__first, __last, __comp);
if (__i != __first)
swap(*__first, *__i);
}
@@ -162,10 +160,11 @@ template <class _Compare, class _RandomAccessIterator>
void
__insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
{
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- _RandomAccessIterator __j = __first+2;
- _VSTD::__sort3<_Compare>(__first, __first+1, __j, __comp);
- for (_RandomAccessIterator __i = __j+1; __i != __last; ++__i)
+ _RandomAccessIterator __j = __first+difference_type(2);
+ _VSTD::__sort3<_Compare>(__first, __first+difference_type(1), __j, __comp);
+ for (_RandomAccessIterator __i = __j+difference_type(1); __i != __last; ++__i)
{
if (__comp(*__i, *__j))
{
@@ -187,6 +186,7 @@ template <class _Compare, class _RandomAccessIterator>
bool
__insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
{
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
switch (__last - __first)
{
case 0:
@@ -197,21 +197,21 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator
swap(*__first, *__last);
return true;
case 3:
- _VSTD::__sort3<_Compare>(__first, __first+1, --__last, __comp);
+ _VSTD::__sort3<_Compare>(__first, __first+difference_type(1), --__last, __comp);
return true;
case 4:
- _VSTD::__sort4<_Compare>(__first, __first+1, __first+2, --__last, __comp);
+ _VSTD::__sort4<_Compare>(__first, __first+difference_type(1), __first+difference_type(2), --__last, __comp);
return true;
case 5:
- _VSTD::__sort5<_Compare>(__first, __first+1, __first+2, __first+3, --__last, __comp);
+ _VSTD::__sort5<_Compare>(__first, __first+difference_type(1), __first+difference_type(2), __first+difference_type(3), --__last, __comp);
return true;
}
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
- _RandomAccessIterator __j = __first+2;
- _VSTD::__sort3<_Compare>(__first, __first+1, __j, __comp);
+ _RandomAccessIterator __j = __first+difference_type(2);
+ _VSTD::__sort3<_Compare>(__first, __first+difference_type(1), __j, __comp);
const unsigned __limit = 8;
unsigned __count = 0;
- for (_RandomAccessIterator __i = __j+1; __i != __last; ++__i)
+ for (_RandomAccessIterator __i = __j+difference_type(1); __i != __last; ++__i)
{
if (__comp(*__i, *__j))
{
@@ -269,7 +269,8 @@ __insertion_sort_move(_BidirectionalIterator __first1, _BidirectionalIterator __
template <class _Compare, class _RandomAccessIterator>
void
-__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+__introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+ typename iterator_traits<_RandomAccessIterator>::difference_type __depth)
{
typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
@@ -289,13 +290,13 @@ __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __c
swap(*__first, *__last);
return;
case 3:
- _VSTD::__sort3<_Compare>(__first, __first+1, --__last, __comp);
+ _VSTD::__sort3<_Compare>(__first, __first+difference_type(1), --__last, __comp);
return;
case 4:
- _VSTD::__sort4<_Compare>(__first, __first+1, __first+2, --__last, __comp);
+ _VSTD::__sort4<_Compare>(__first, __first+difference_type(1), __first+difference_type(2), --__last, __comp);
return;
case 5:
- _VSTD::__sort5<_Compare>(__first, __first+1, __first+2, __first+3, --__last, __comp);
+ _VSTD::__sort5<_Compare>(__first, __first+difference_type(1), __first+difference_type(2), __first+difference_type(3), --__last, __comp);
return;
}
if (__len <= __limit)
@@ -304,6 +305,13 @@ __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __c
return;
}
// __len > 5
+ if (__depth == 0)
+ {
+ // Fallback to heap sort as Introsort suggests.
+ _VSTD::__partial_sort<_Compare>(__first, __last, __last, __comp);
+ return;
+ }
+ --__depth;
_RandomAccessIterator __m = __first;
_RandomAccessIterator __lm1 = __last;
--__lm1;
@@ -427,7 +435,7 @@ __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __c
if (__n_swaps == 0)
{
bool __fs = _VSTD::__insertion_sort_incomplete<_Compare>(__first, __i, __comp);
- if (_VSTD::__insertion_sort_incomplete<_Compare>(__i+1, __last, __comp))
+ if (_VSTD::__insertion_sort_incomplete<_Compare>(__i+difference_type(1), __last, __comp))
{
if (__fs)
return;
@@ -446,19 +454,34 @@ __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __c
// sort smaller range with recursive call and larger with tail recursion elimination
if (__i - __first < __last - __i)
{
- _VSTD::__sort<_Compare>(__first, __i, __comp);
- // _VSTD::__sort<_Compare>(__i+1, __last, __comp);
- __first = ++__i;
+ _VSTD::__introsort<_Compare>(__first, __i, __comp, __depth);
+ __first = ++__i;
}
else
{
- _VSTD::__sort<_Compare>(__i+1, __last, __comp);
- // _VSTD::__sort<_Compare>(__first, __i, __comp);
- __last = __i;
+ _VSTD::__introsort<_Compare>(__i + difference_type(1), __last, __comp, __depth);
+ __last = __i;
}
}
}
+template <typename _Number>
+inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
+ _Number __log2 = 0;
+ while (__n > 1) {
+ __log2++;
+ __n >>= 1;
+ }
+ return __log2;
+}
+
+template <class _Compare, class _RandomAccessIterator>
+void __sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
+ typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+ difference_type __depth_limit = 2 * __log2i(__last - __first);
+ _VSTD::__introsort<_Compare>(__first, __last, __comp, __depth_limit);
+}
+
template <class _Compare, class _Tp>
inline _LIBCPP_INLINE_VISIBILITY
void
@@ -469,7 +492,9 @@ __sort(_Tp** __first, _Tp** __last, __less<_Tp*>&)
}
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&))
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&))
+#endif
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&))
@@ -485,7 +510,9 @@ _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(d
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<char>&, char*>(char*, char*, __less<char>&))
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&))
+#endif
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&))
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS bool __insertion_sort_incomplete<__less<short>&, short*>(short*, short*, __less<short>&))
@@ -507,12 +534,13 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
void
sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
{
- typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
- if (__libcpp_is_constant_evaluated()) {
- _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp));
- } else {
- _VSTD::__sort<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _Comp_ref(__comp));
- }
+ _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last);
+ typedef typename __comp_ref_type<_Compare>::type _Comp_ref;
+ if (__libcpp_is_constant_evaluated()) {
+ _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp));
+ } else {
+ _VSTD::__sort<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), _Comp_ref(__comp));
+ }
}
template <class _RandomAccessIterator>
@@ -525,6 +553,4 @@ sort(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SORT_H
diff --git a/libcxx/include/__algorithm/sort_heap.h b/libcxx/include/__algorithm/sort_heap.h
index aa8ef762ab13..bf6200c2a08d 100644
--- a/libcxx/include/__algorithm/sort_heap.h
+++ b/libcxx/include/__algorithm/sort_heap.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _RandomAccessIterator>
@@ -53,6 +50,4 @@ sort_heap(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SORT_HEAP_H
diff --git a/libcxx/include/__algorithm/stable_partition.h b/libcxx/include/__algorithm/stable_partition.h
index 931335f44474..323b323c53dd 100644
--- a/libcxx/include/__algorithm/stable_partition.h
+++ b/libcxx/include/__algorithm/stable_partition.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Predicate, class _ForwardIterator, class _Distance, class _Pair>
@@ -85,8 +82,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate
// recurse on [__first, __m), *__first know to be false
// F?????????????????
// f m l
- typedef typename add_lvalue_reference<_Predicate>::type _PredRef;
- _ForwardIterator __first_false = _VSTD::__stable_partition<_PredRef>(__first, __m, __pred, __len2, __p, __fit);
+ _ForwardIterator __first_false = _VSTD::__stable_partition<_Predicate&>(__first, __m, __pred, __len2, __p, __fit);
// TTTFFFFF??????????
// f ff m l
// recurse on [__m, __last], except increase __m until *(__m) is false, *__last know to be true
@@ -101,7 +97,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate
}
// TTTFFFFFTTTF??????
// f ff m m1 l
- __second_false = _VSTD::__stable_partition<_PredRef>(__m1, __last, __pred, __len_half, __p, __fit);
+ __second_false = _VSTD::__stable_partition<_Predicate&>(__m1, __last, __pred, __len_half, __p, __fit);
__second_half_done:
// TTTFFFFFTTTTTFFFFF
// f ff m sf l
@@ -137,8 +133,7 @@ __stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate
__p = _VSTD::get_temporary_buffer<value_type>(__len);
__h.reset(__p.first);
}
- return _VSTD::__stable_partition<typename add_lvalue_reference<_Predicate>::type>
- (__first, __last, __pred, __len, __p, forward_iterator_tag());
+ return _VSTD::__stable_partition<_Predicate&>(__first, __last, __pred, __len, __p, forward_iterator_tag());
}
template <class _Predicate, class _BidirectionalIterator, class _Distance, class _Pair>
@@ -222,8 +217,7 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last
}
// F???TFFF?????????T
// f m1 m l
- typedef typename add_lvalue_reference<_Predicate>::type _PredRef;
- __first_false = _VSTD::__stable_partition<_PredRef>(__first, __m1, __pred, __len_half, __p, __bit);
+ __first_false = _VSTD::__stable_partition<_Predicate&>(__first, __m1, __pred, __len_half, __p, __bit);
__first_half_done:
// TTTFFFFF?????????T
// f ff m l
@@ -240,7 +234,7 @@ __first_half_done:
}
// TTTFFFFFTTTF?????T
// f ff m m1 l
- __second_false = _VSTD::__stable_partition<_PredRef>(__m1, __last, __pred, __len_half, __p, __bit);
+ __second_false = _VSTD::__stable_partition<_Predicate&>(__m1, __last, __pred, __len_half, __p, __bit);
__second_half_done:
// TTTFFFFFTTTTTFFFFF
// f ff m sf l
@@ -285,8 +279,7 @@ __stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last
__p = _VSTD::get_temporary_buffer<value_type>(__len);
__h.reset(__p.first);
}
- return _VSTD::__stable_partition<typename add_lvalue_reference<_Predicate>::type>
- (__first, __last, __pred, __len, __p, bidirectional_iterator_tag());
+ return _VSTD::__stable_partition<_Predicate&>(__first, __last, __pred, __len, __p, bidirectional_iterator_tag());
}
template <class _ForwardIterator, class _Predicate>
@@ -294,12 +287,9 @@ inline _LIBCPP_INLINE_VISIBILITY
_ForwardIterator
stable_partition(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
{
- return _VSTD::__stable_partition<typename add_lvalue_reference<_Predicate>::type>
- (__first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category());
+ return _VSTD::__stable_partition<_Predicate&>(__first, __last, __pred, typename iterator_traits<_ForwardIterator>::iterator_category());
}
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_STABLE_PARTITION_H
diff --git a/libcxx/include/__algorithm/stable_sort.h b/libcxx/include/__algorithm/stable_sort.h
index 32b239a0d72f..41e17bde99ef 100644
--- a/libcxx/include/__algorithm/stable_sort.h
+++ b/libcxx/include/__algorithm/stable_sort.h
@@ -23,9 +23,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _InputIterator1, class _InputIterator2>
@@ -41,14 +38,14 @@ __merge_move_construct(_InputIterator1 __first1, _InputIterator1 __last1,
{
if (__first1 == __last1)
{
- for (; __first2 != __last2; ++__first2, ++__result, (void)__d.template __incr<value_type>())
+ for (; __first2 != __last2; ++__first2, (void) ++__result, __d.template __incr<value_type>())
::new ((void*)__result) value_type(_VSTD::move(*__first2));
__h.release();
return;
}
if (__first2 == __last2)
{
- for (; __first1 != __last1; ++__first1, ++__result, (void)__d.template __incr<value_type>())
+ for (; __first1 != __last1; ++__first1, (void) ++__result, __d.template __incr<value_type>())
::new ((void*)__result) value_type(_VSTD::move(*__first1));
__h.release();
return;
@@ -230,6 +227,4 @@ stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_STABLE_SORT_H
diff --git a/libcxx/include/__algorithm/swap_ranges.h b/libcxx/include/__algorithm/swap_ranges.h
index 3c72dbd24c63..2b099c7361f1 100644
--- a/libcxx/include/__algorithm/swap_ranges.h
+++ b/libcxx/include/__algorithm/swap_ranges.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator1, class _ForwardIterator2>
@@ -32,6 +29,4 @@ swap_ranges(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardItera
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_SWAP_RANGES_H
diff --git a/libcxx/include/__algorithm/transform.h b/libcxx/include/__algorithm/transform.h
index 218f0f12eaca..494cb7128d29 100644
--- a/libcxx/include/__algorithm/transform.h
+++ b/libcxx/include/__algorithm/transform.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _OutputIterator, class _UnaryOperation>
@@ -43,6 +40,4 @@ transform(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __f
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_TRANSFORM_H
diff --git a/libcxx/include/__algorithm/unique.h b/libcxx/include/__algorithm/unique.h
index fb6251a39a82..62f0490b6d63 100644
--- a/libcxx/include/__algorithm/unique.h
+++ b/libcxx/include/__algorithm/unique.h
@@ -14,15 +14,11 @@
#include <__algorithm/adjacent_find.h>
#include <__iterator/iterator_traits.h>
#include <__utility/move.h>
-#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// unique
@@ -31,8 +27,7 @@ template <class _ForwardIterator, class _BinaryPredicate>
_LIBCPP_NODISCARD_EXT _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator
unique(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred)
{
- __first = _VSTD::adjacent_find<_ForwardIterator, typename add_lvalue_reference<_BinaryPredicate>::type>
- (__first, __last, __pred);
+ __first = _VSTD::adjacent_find<_ForwardIterator, _BinaryPredicate&>(__first, __last, __pred);
if (__first != __last)
{
// ... a a ? ...
@@ -58,6 +53,4 @@ unique(_ForwardIterator __first, _ForwardIterator __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_UNIQUE_H
diff --git a/libcxx/include/__algorithm/unique_copy.h b/libcxx/include/__algorithm/unique_copy.h
index 974a7c4df2d4..4c916dc3ada2 100644
--- a/libcxx/include/__algorithm/unique_copy.h
+++ b/libcxx/include/__algorithm/unique_copy.h
@@ -13,15 +13,11 @@
#include <__algorithm/comp.h>
#include <__iterator/iterator_traits.h>
#include <utility>
-#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _BinaryPredicate, class _InputIterator, class _OutputIterator>
@@ -91,8 +87,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
_OutputIterator
unique_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _BinaryPredicate __pred)
{
- return _VSTD::__unique_copy<typename add_lvalue_reference<_BinaryPredicate>::type>
- (__first, __last, __result, __pred,
+ return _VSTD::__unique_copy<_BinaryPredicate&>(__first, __last, __result, __pred,
typename iterator_traits<_InputIterator>::iterator_category(),
typename iterator_traits<_OutputIterator>::iterator_category());
}
@@ -109,6 +104,4 @@ unique_copy(_InputIterator __first, _InputIterator __last, _OutputIterator __res
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_UNIQUE_COPY_H
diff --git a/libcxx/include/__algorithm/unwrap_iter.h b/libcxx/include/__algorithm/unwrap_iter.h
index a45d45cdd864..f77ecca6eee6 100644
--- a/libcxx/include/__algorithm/unwrap_iter.h
+++ b/libcxx/include/__algorithm/unwrap_iter.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// The job of __unwrap_iter is to lower contiguous iterators (such as
@@ -67,12 +64,14 @@ __unwrap_iter(_Iter __i) _NOEXCEPT
}
template<class _OrigIter>
+_LIBCPP_HIDE_FROM_ABI
_OrigIter __rewrap_iter(_OrigIter, _OrigIter __result)
{
return __result;
}
template<class _OrigIter, class _UnwrappedIter>
+_LIBCPP_HIDE_FROM_ABI
_OrigIter __rewrap_iter(_OrigIter __first, _UnwrappedIter __result)
{
// Precondition: __result is reachable from __first
@@ -82,6 +81,4 @@ _OrigIter __rewrap_iter(_OrigIter __first, _UnwrappedIter __result)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_UNWRAP_ITER_H
diff --git a/libcxx/include/__algorithm/upper_bound.h b/libcxx/include/__algorithm/upper_bound.h
index 7be607f82538..4ae7b8f9be1f 100644
--- a/libcxx/include/__algorithm/upper_bound.h
+++ b/libcxx/include/__algorithm/upper_bound.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Compare, class _ForwardIterator, class _Tp>
@@ -51,8 +48,7 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
_ForwardIterator
upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value_, _Compare __comp)
{
- typedef typename add_lvalue_reference<_Compare>::type _Comp_ref;
- return _VSTD::__upper_bound<_Comp_ref>(__first, __last, __value_, __comp);
+ return _VSTD::__upper_bound<_Compare&>(__first, __last, __value_, __comp);
}
template <class _ForwardIterator, class _Tp>
@@ -67,6 +63,4 @@ upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ALGORITHM_UPPER_BOUND_H
diff --git a/libcxx/include/__availability b/libcxx/include/__availability
index 13d11950fd67..87d43ed414bf 100644
--- a/libcxx/include/__availability
+++ b/libcxx/include/__availability
@@ -139,9 +139,9 @@
// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore
// This controls the availability of the C++20 format library.
- // The library is in development and not ABI stable yet. Currently
- // P2216 is aiming to be retroactively accepted in C++20. This paper
- // contains ABI breaking changes.
+ // The library is in development and not ABI stable yet. P2216 is
+ // retroactively accepted in C++20. This paper contains ABI breaking
+ // changes.
# define _LIBCPP_AVAILABILITY_FORMAT
// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format
@@ -238,9 +238,9 @@
# endif
// This controls the availability of the C++20 format library.
- // The library is in development and not ABI stable yet. Currently
- // P2216 is aiming to be retroactively accepted in C++20. This paper
- // contains ABI breaking changes.
+ // The library is in development and not ABI stable yet. P2216 is
+ // retroactively accepted in C++20. This paper contains ABI breaking
+ // changes.
# define _LIBCPP_AVAILABILITY_FORMAT \
__attribute__((unavailable))
# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format
diff --git a/libcxx/include/__bit/bit_cast.h b/libcxx/include/__bit/bit_cast.h
new file mode 100644
index 000000000000..6cfe4d799490
--- /dev/null
+++ b/libcxx/include/__bit/bit_cast.h
@@ -0,0 +1,38 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___BIT_BIT_CAST_H
+#define _LIBCPP___BIT_BIT_CAST_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+template<class _ToType, class _FromType, class = enable_if_t<
+ sizeof(_ToType) == sizeof(_FromType) &&
+ is_trivially_copyable_v<_ToType> &&
+ is_trivially_copyable_v<_FromType>
+>>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI
+constexpr _ToType bit_cast(_FromType const& __from) noexcept {
+ return __builtin_bit_cast(_ToType, __from);
+}
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___BIT_BIT_CAST_H
diff --git a/libcxx/include/__bsd_locale_defaults.h b/libcxx/include/__bsd_locale_defaults.h
index 2ace2a21cb01..0d6506c62576 100644
--- a/libcxx/include/__bsd_locale_defaults.h
+++ b/libcxx/include/__bsd_locale_defaults.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------- __bsd_locale_defaults.h -----------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/__bsd_locale_fallbacks.h b/libcxx/include/__bsd_locale_fallbacks.h
index ed0eabf60ece..2d5c2eca4679 100644
--- a/libcxx/include/__bsd_locale_fallbacks.h
+++ b/libcxx/include/__bsd_locale_fallbacks.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------- __bsd_locale_fallbacks.h ----------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,6 +30,7 @@ decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l)
return MB_CUR_MAX;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
inline _LIBCPP_INLINE_VISIBILITY
wint_t __libcpp_btowc_l(int __c, locale_t __l)
{
@@ -88,6 +89,7 @@ size_t __libcpp_mbrlen_l(const char *__s, size_t __n, mbstate_t *__ps, locale_t
__libcpp_locale_guard __current(__l);
return mbrlen(__s, __n, __ps);
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
inline _LIBCPP_INLINE_VISIBILITY
lconv *__libcpp_localeconv_l(locale_t __l)
@@ -96,6 +98,7 @@ lconv *__libcpp_localeconv_l(locale_t __l)
return localeconv();
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
inline _LIBCPP_INLINE_VISIBILITY
size_t __libcpp_mbsrtowcs_l(wchar_t *__dest, const char **__src, size_t __len,
mbstate_t *__ps, locale_t __l)
@@ -103,6 +106,7 @@ size_t __libcpp_mbsrtowcs_l(wchar_t *__dest, const char **__src, size_t __len,
__libcpp_locale_guard __current(__l);
return mbsrtowcs(__dest, __src, __len, __ps);
}
+#endif
inline
int __libcpp_snprintf_l(char *__s, size_t __n, locale_t __l, const char *__format, ...) {
diff --git a/libcxx/include/__charconv/chars_format.h b/libcxx/include/__charconv/chars_format.h
new file mode 100644
index 000000000000..22e70b56fb8c
--- /dev/null
+++ b/libcxx/include/__charconv/chars_format.h
@@ -0,0 +1,77 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CHARCONV_CHARS_FORMAT_H
+#define _LIBCPP___CHARCONV_CHARS_FORMAT_H
+
+#include <__config>
+#include <__utility/to_underlying.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#ifndef _LIBCPP_CXX03_LANG
+
+enum class _LIBCPP_ENUM_VIS chars_format
+{
+ scientific = 0x1,
+ fixed = 0x2,
+ hex = 0x4,
+ general = fixed | scientific
+};
+
+inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
+operator~(chars_format __x) {
+ return chars_format(~_VSTD::__to_underlying(__x));
+}
+
+inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
+operator&(chars_format __x, chars_format __y) {
+ return chars_format(_VSTD::__to_underlying(__x) &
+ _VSTD::__to_underlying(__y));
+}
+
+inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
+operator|(chars_format __x, chars_format __y) {
+ return chars_format(_VSTD::__to_underlying(__x) |
+ _VSTD::__to_underlying(__y));
+}
+
+inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
+operator^(chars_format __x, chars_format __y) {
+ return chars_format(_VSTD::__to_underlying(__x) ^
+ _VSTD::__to_underlying(__y));
+}
+
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 chars_format&
+operator&=(chars_format& __x, chars_format __y) {
+ __x = __x & __y;
+ return __x;
+}
+
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 chars_format&
+operator|=(chars_format& __x, chars_format __y) {
+ __x = __x | __y;
+ return __x;
+}
+
+inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 chars_format&
+operator^=(chars_format& __x, chars_format __y) {
+ __x = __x ^ __y;
+ return __x;
+}
+
+#endif // _LIBCPP_CXX03_LANG
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CHARCONV_CHARS_FORMAT_H
diff --git a/libcxx/include/__charconv/from_chars_result.h b/libcxx/include/__charconv/from_chars_result.h
new file mode 100644
index 000000000000..fbd7d508115f
--- /dev/null
+++ b/libcxx/include/__charconv/from_chars_result.h
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CHARCONV_FROM_CHARS_RESULT_H
+#define _LIBCPP___CHARCONV_FROM_CHARS_RESULT_H
+
+#include <__config>
+#include <__errc>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#ifndef _LIBCPP_CXX03_LANG
+
+struct _LIBCPP_TYPE_VIS from_chars_result
+{
+ const char* ptr;
+ errc ec;
+# if _LIBCPP_STD_VER > 17
+ _LIBCPP_HIDE_FROM_ABI friend bool operator==(const from_chars_result&, const from_chars_result&) = default;
+# endif
+};
+
+#endif // _LIBCPP_CXX03_LANG
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CHARCONV_FROM_CHARS_RESULT_H
diff --git a/libcxx/include/__charconv/to_chars_result.h b/libcxx/include/__charconv/to_chars_result.h
new file mode 100644
index 000000000000..f515ee3122c1
--- /dev/null
+++ b/libcxx/include/__charconv/to_chars_result.h
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CHARCONV_TO_CHARS_RESULT_H
+#define _LIBCPP___CHARCONV_TO_CHARS_RESULT_H
+
+#include <__config>
+#include <__errc>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#ifndef _LIBCPP_CXX03_LANG
+
+struct _LIBCPP_TYPE_VIS to_chars_result
+{
+ char* ptr;
+ errc ec;
+# if _LIBCPP_STD_VER > 17
+ _LIBCPP_HIDE_FROM_ABI friend bool operator==(const to_chars_result&, const to_chars_result&) = default;
+# endif
+};
+
+#endif // _LIBCPP_CXX03_LANG
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CHARCONV_TO_CHARS_RESULT_H
diff --git a/libcxx/include/__compare/common_comparison_category.h b/libcxx/include/__compare/common_comparison_category.h
new file mode 100644
index 000000000000..37a28db1d650
--- /dev/null
+++ b/libcxx/include/__compare/common_comparison_category.h
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_COMMON_COMPARISON_CATEGORY_H
+#define _LIBCPP___COMPARE_COMMON_COMPARISON_CATEGORY_H
+
+#include <__compare/ordering.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+namespace __comp_detail {
+
+enum _ClassifyCompCategory : unsigned {
+ _None,
+ _PartialOrd,
+ _WeakOrd,
+ _StrongOrd,
+ _CCC_Size
+};
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI
+constexpr _ClassifyCompCategory __type_to_enum() noexcept {
+ if (is_same_v<_Tp, partial_ordering>)
+ return _PartialOrd;
+ if (is_same_v<_Tp, weak_ordering>)
+ return _WeakOrd;
+ if (is_same_v<_Tp, strong_ordering>)
+ return _StrongOrd;
+ return _None;
+}
+
+template <size_t _Size>
+_LIBCPP_HIDE_FROM_ABI
+constexpr _ClassifyCompCategory
+__compute_comp_type(const _ClassifyCompCategory (&__types)[_Size]) {
+ int __seen[_CCC_Size] = {};
+ for (auto __type : __types)
+ ++__seen[__type];
+ if (__seen[_None])
+ return _None;
+ if (__seen[_PartialOrd])
+ return _PartialOrd;
+ if (__seen[_WeakOrd])
+ return _WeakOrd;
+ return _StrongOrd;
+}
+
+template <class ..._Ts, bool _False = false>
+_LIBCPP_HIDE_FROM_ABI
+constexpr auto __get_comp_type() {
+ using _CCC = _ClassifyCompCategory;
+ constexpr _CCC __type_kinds[] = {_StrongOrd, __type_to_enum<_Ts>()...};
+ constexpr _CCC _Cat = __compute_comp_type(__type_kinds);
+ if constexpr (_Cat == _None)
+ return void();
+ else if constexpr (_Cat == _PartialOrd)
+ return partial_ordering::equivalent;
+ else if constexpr (_Cat == _WeakOrd)
+ return weak_ordering::equivalent;
+ else if constexpr (_Cat == _StrongOrd)
+ return strong_ordering::equivalent;
+ else
+ static_assert(_False, "unhandled case");
+}
+} // namespace __comp_detail
+
+// [cmp.common], common comparison category type
+template<class... _Ts>
+struct _LIBCPP_TEMPLATE_VIS common_comparison_category {
+ using type = decltype(__comp_detail::__get_comp_type<_Ts...>());
+};
+
+template<class... _Ts>
+using common_comparison_category_t = typename common_comparison_category<_Ts...>::type;
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_COMMON_COMPARISON_CATEGORY_H
diff --git a/libcxx/include/__compare/compare_three_way.h b/libcxx/include/__compare/compare_three_way.h
new file mode 100644
index 000000000000..3edddf1a1c94
--- /dev/null
+++ b/libcxx/include/__compare/compare_three_way.h
@@ -0,0 +1,41 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_COMPARE_THREE_WAY_H
+#define _LIBCPP___COMPARE_COMPARE_THREE_WAY_H
+
+#include <__config>
+#include <__compare/three_way_comparable.h>
+#include <__utility/forward.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+struct _LIBCPP_TEMPLATE_VIS compare_three_way
+{
+ template<class _T1, class _T2>
+ requires three_way_comparable_with<_T1, _T2>
+ constexpr _LIBCPP_HIDE_FROM_ABI
+ auto operator()(_T1&& __t, _T2&& __u) const
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) <=> _VSTD::forward<_T2>(__u)))
+ { return _VSTD::forward<_T1>(__t) <=> _VSTD::forward<_T2>(__u); }
+
+ using is_transparent = void;
+};
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_COMPARE_THREE_WAY_H
diff --git a/libcxx/include/__compare/compare_three_way_result.h b/libcxx/include/__compare/compare_three_way_result.h
new file mode 100644
index 000000000000..14908c6bbc1b
--- /dev/null
+++ b/libcxx/include/__compare/compare_three_way_result.h
@@ -0,0 +1,43 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_COMPARE_THREE_WAY_RESULT_H
+#define _LIBCPP___COMPARE_COMPARE_THREE_WAY_RESULT_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+template<class, class, class>
+struct _LIBCPP_HIDE_FROM_ABI __compare_three_way_result { };
+
+template<class _Tp, class _Up>
+struct _LIBCPP_HIDE_FROM_ABI __compare_three_way_result<_Tp, _Up, decltype(
+ declval<__make_const_lvalue_ref<_Tp>>() <=> declval<__make_const_lvalue_ref<_Up>>(), void()
+)> {
+ using type = decltype(declval<__make_const_lvalue_ref<_Tp>>() <=> declval<__make_const_lvalue_ref<_Up>>());
+};
+
+template<class _Tp, class _Up = _Tp>
+struct _LIBCPP_TEMPLATE_VIS compare_three_way_result : __compare_three_way_result<_Tp, _Up, void> { };
+
+template<class _Tp, class _Up = _Tp>
+using compare_three_way_result_t = typename compare_three_way_result<_Tp, _Up>::type;
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_COMPARE_THREE_WAY_RESULT_H
diff --git a/libcxx/include/__compare/is_eq.h b/libcxx/include/__compare/is_eq.h
new file mode 100644
index 000000000000..906cb0709e77
--- /dev/null
+++ b/libcxx/include/__compare/is_eq.h
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_IS_EQ_H
+#define _LIBCPP___COMPARE_IS_EQ_H
+
+#include <__compare/ordering.h>
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_eq(partial_ordering __c) noexcept { return __c == 0; }
+_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_neq(partial_ordering __c) noexcept { return __c != 0; }
+_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lt(partial_ordering __c) noexcept { return __c < 0; }
+_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_lteq(partial_ordering __c) noexcept { return __c <= 0; }
+_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gt(partial_ordering __c) noexcept { return __c > 0; }
+_LIBCPP_HIDE_FROM_ABI inline constexpr bool is_gteq(partial_ordering __c) noexcept { return __c >= 0; }
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_IS_EQ_H
diff --git a/libcxx/include/__compare/ordering.h b/libcxx/include/__compare/ordering.h
new file mode 100644
index 000000000000..d4087bc85ba1
--- /dev/null
+++ b/libcxx/include/__compare/ordering.h
@@ -0,0 +1,319 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_ORDERING_H
+#define _LIBCPP___COMPARE_ORDERING_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// exposition only
+enum class _LIBCPP_ENUM_VIS _OrdResult : signed char {
+ __less = -1,
+ __equiv = 0,
+ __greater = 1
+};
+
+enum class _LIBCPP_ENUM_VIS _NCmpResult : signed char {
+ __unordered = -127
+};
+
+class partial_ordering;
+class weak_ordering;
+class strong_ordering;
+
+template<class _Tp, class... _Args>
+inline constexpr bool __one_of_v = (is_same_v<_Tp, _Args> || ...);
+
+struct _CmpUnspecifiedParam {
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEVAL
+ _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) noexcept {}
+
+ template<class _Tp, class = enable_if_t<!__one_of_v<_Tp, int, partial_ordering, weak_ordering, strong_ordering>>>
+ _CmpUnspecifiedParam(_Tp) = delete;
+};
+
+class partial_ordering {
+ using _ValueT = signed char;
+
+ _LIBCPP_HIDE_FROM_ABI
+ explicit constexpr partial_ordering(_OrdResult __v) noexcept
+ : __value_(_ValueT(__v)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ explicit constexpr partial_ordering(_NCmpResult __v) noexcept
+ : __value_(_ValueT(__v)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr bool __is_ordered() const noexcept {
+ return __value_ != _ValueT(_NCmpResult::__unordered);
+ }
+public:
+ // valid values
+ static const partial_ordering less;
+ static const partial_ordering equivalent;
+ static const partial_ordering greater;
+ static const partial_ordering unordered;
+
+ // comparisons
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(partial_ordering, partial_ordering) noexcept = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__is_ordered() && __v.__value_ == 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator< (partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__is_ordered() && __v.__value_ < 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__is_ordered() && __v.__value_ <= 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator> (partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__is_ordered() && __v.__value_ > 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__is_ordered() && __v.__value_ >= 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator< (_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+ return __v.__is_ordered() && 0 < __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+ return __v.__is_ordered() && 0 <= __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator> (_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+ return __v.__is_ordered() && 0 > __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+ return __v.__is_ordered() && 0 >= __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+ return __v < 0 ? partial_ordering::greater : (__v > 0 ? partial_ordering::less : __v);
+ }
+private:
+ _ValueT __value_;
+};
+
+inline constexpr partial_ordering partial_ordering::less(_OrdResult::__less);
+inline constexpr partial_ordering partial_ordering::equivalent(_OrdResult::__equiv);
+inline constexpr partial_ordering partial_ordering::greater(_OrdResult::__greater);
+inline constexpr partial_ordering partial_ordering::unordered(_NCmpResult ::__unordered);
+
+class weak_ordering {
+ using _ValueT = signed char;
+
+ _LIBCPP_HIDE_FROM_ABI
+ explicit constexpr weak_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {}
+
+public:
+ static const weak_ordering less;
+ static const weak_ordering equivalent;
+ static const weak_ordering greater;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr operator partial_ordering() const noexcept {
+ return __value_ == 0 ? partial_ordering::equivalent
+ : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
+ }
+
+ // comparisons
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(weak_ordering, weak_ordering) noexcept = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ == 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator< (weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ < 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ <= 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator> (weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ > 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ >= 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator< (_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+ return 0 < __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+ return 0 <= __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator> (_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+ return 0 > __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+ return 0 >= __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+ return __v < 0 ? weak_ordering::greater : (__v > 0 ? weak_ordering::less : __v);
+ }
+
+private:
+ _ValueT __value_;
+};
+
+inline constexpr weak_ordering weak_ordering::less(_OrdResult::__less);
+inline constexpr weak_ordering weak_ordering::equivalent(_OrdResult::__equiv);
+inline constexpr weak_ordering weak_ordering::greater(_OrdResult::__greater);
+
+class strong_ordering {
+ using _ValueT = signed char;
+
+ _LIBCPP_HIDE_FROM_ABI
+ explicit constexpr strong_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {}
+
+public:
+ static const strong_ordering less;
+ static const strong_ordering equal;
+ static const strong_ordering equivalent;
+ static const strong_ordering greater;
+
+ // conversions
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr operator partial_ordering() const noexcept {
+ return __value_ == 0 ? partial_ordering::equivalent
+ : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr operator weak_ordering() const noexcept {
+ return __value_ == 0 ? weak_ordering::equivalent
+ : (__value_ < 0 ? weak_ordering::less : weak_ordering::greater);
+ }
+
+ // comparisons
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(strong_ordering, strong_ordering) noexcept = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ == 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator< (strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ < 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ <= 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator> (strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ > 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v.__value_ >= 0;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator< (_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+ return 0 < __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+ return 0 <= __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator> (_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+ return 0 > __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+ return 0 >= __v.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+ return __v;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+ return __v < 0 ? strong_ordering::greater : (__v > 0 ? strong_ordering::less : __v);
+ }
+
+private:
+ _ValueT __value_;
+};
+
+inline constexpr strong_ordering strong_ordering::less(_OrdResult::__less);
+inline constexpr strong_ordering strong_ordering::equal(_OrdResult::__equiv);
+inline constexpr strong_ordering strong_ordering::equivalent(_OrdResult::__equiv);
+inline constexpr strong_ordering strong_ordering::greater(_OrdResult::__greater);
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_ORDERING_H
diff --git a/libcxx/include/__compare/synth_three_way.h b/libcxx/include/__compare/synth_three_way.h
new file mode 100644
index 000000000000..3d8e738816dd
--- /dev/null
+++ b/libcxx/include/__compare/synth_three_way.h
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_SYNTH_THREE_WAY_H
+#define _LIBCPP___COMPARE_SYNTH_THREE_WAY_H
+
+#include <__config>
+#include <__compare/ordering.h>
+#include <__compare/three_way_comparable.h>
+#include <__concepts/boolean_testable.h>
+#include <__utility/declval.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [expos.only.func]
+
+_LIBCPP_HIDE_FROM_ABI inline constexpr auto __synth_three_way =
+ []<class _Tp, class _Up>(const _Tp& __t, const _Up& __u)
+ requires requires {
+ { __t < __u } -> __boolean_testable;
+ { __u < __t } -> __boolean_testable;
+ }
+ {
+ if constexpr (three_way_comparable_with<_Tp, _Up>) {
+ return __t <=> __u;
+ } else {
+ if (__t < __u) return weak_ordering::less;
+ if (__u < __t) return weak_ordering::greater;
+ return weak_ordering::equivalent;
+ }
+ };
+
+template <class _Tp, class _Up = _Tp>
+using __synth_three_way_result = decltype(__synth_three_way(declval<_Tp&>(), declval<_Up&>()));
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_SYNTH_THREE_WAY_H
diff --git a/libcxx/include/__compare/three_way_comparable.h b/libcxx/include/__compare/three_way_comparable.h
new file mode 100644
index 000000000000..c4794949007b
--- /dev/null
+++ b/libcxx/include/__compare/three_way_comparable.h
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_THREE_WAY_COMPARABLE_H
+#define _LIBCPP___COMPARE_THREE_WAY_COMPARABLE_H
+
+#include <__compare/common_comparison_category.h>
+#include <__compare/ordering.h>
+#include <__concepts/common_reference_with.h>
+#include <__concepts/equality_comparable.h>
+#include <__concepts/same_as.h>
+#include <__concepts/totally_ordered.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template<class _Tp, class _Cat>
+concept __compares_as =
+ same_as<common_comparison_category_t<_Tp, _Cat>, _Cat>;
+
+template<class _Tp, class _Cat = partial_ordering>
+concept three_way_comparable =
+ __weakly_equality_comparable_with<_Tp, _Tp> &&
+ __partially_ordered_with<_Tp, _Tp> &&
+ requires(__make_const_lvalue_ref<_Tp> __a, __make_const_lvalue_ref<_Tp> __b) {
+ { __a <=> __b } -> __compares_as<_Cat>;
+ };
+
+template<class _Tp, class _Up, class _Cat = partial_ordering>
+concept three_way_comparable_with =
+ three_way_comparable<_Tp, _Cat> &&
+ three_way_comparable<_Up, _Cat> &&
+ common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> &&
+ three_way_comparable<common_reference_t<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>>, _Cat> &&
+ __weakly_equality_comparable_with<_Tp, _Up> &&
+ __partially_ordered_with<_Tp, _Up> &&
+ requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) {
+ { __t <=> __u } -> __compares_as<_Cat>;
+ { __u <=> __t } -> __compares_as<_Cat>;
+ };
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_THREE_WAY_COMPARABLE_H
diff --git a/libcxx/include/__concepts/arithmetic.h b/libcxx/include/__concepts/arithmetic.h
new file mode 100644
index 000000000000..9a1383904db6
--- /dev/null
+++ b/libcxx/include/__concepts/arithmetic.h
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_ARITHMETIC_H
+#define _LIBCPP___CONCEPTS_ARITHMETIC_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concepts.arithmetic], arithmetic concepts
+
+template<class _Tp>
+concept integral = is_integral_v<_Tp>;
+
+template<class _Tp>
+concept signed_integral = integral<_Tp> && is_signed_v<_Tp>;
+
+template<class _Tp>
+concept unsigned_integral = integral<_Tp> && !signed_integral<_Tp>;
+
+template<class _Tp>
+concept floating_point = is_floating_point_v<_Tp>;
+
+// Concept helpers for the internal type traits for the fundamental types.
+
+template <class _Tp>
+concept __libcpp_unsigned_integer = __libcpp_is_unsigned_integer<_Tp>::value;
+template <class _Tp>
+concept __libcpp_signed_integer = __libcpp_is_signed_integer<_Tp>::value;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_ARITHMETIC_H
diff --git a/libcxx/include/__concepts/assignable.h b/libcxx/include/__concepts/assignable.h
new file mode 100644
index 000000000000..9cfc7c0e8318
--- /dev/null
+++ b/libcxx/include/__concepts/assignable.h
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_ASSIGNABLE_H
+#define _LIBCPP___CONCEPTS_ASSIGNABLE_H
+
+#include <__concepts/common_reference_with.h>
+#include <__concepts/same_as.h>
+#include <__config>
+#include <__utility/forward.h>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.assignable]
+
+template<class _Lhs, class _Rhs>
+concept assignable_from =
+ is_lvalue_reference_v<_Lhs> &&
+ common_reference_with<__make_const_lvalue_ref<_Lhs>, __make_const_lvalue_ref<_Rhs>> &&
+ requires (_Lhs __lhs, _Rhs&& __rhs) {
+ { __lhs = _VSTD::forward<_Rhs>(__rhs) } -> same_as<_Lhs>;
+ };
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_ASSIGNABLE_H
diff --git a/libcxx/include/__concepts/boolean_testable.h b/libcxx/include/__concepts/boolean_testable.h
new file mode 100644
index 000000000000..638fc3b20330
--- /dev/null
+++ b/libcxx/include/__concepts/boolean_testable.h
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_BOOLEAN_TESTABLE_H
+#define _LIBCPP___CONCEPTS_BOOLEAN_TESTABLE_H
+
+#include <__concepts/convertible_to.h>
+#include <__config>
+#include <__utility/forward.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concepts.booleantestable]
+
+template<class _Tp>
+concept __boolean_testable_impl = convertible_to<_Tp, bool>;
+
+template<class _Tp>
+concept __boolean_testable = __boolean_testable_impl<_Tp> && requires(_Tp&& __t) {
+ { !_VSTD::forward<_Tp>(__t) } -> __boolean_testable_impl;
+};
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_BOOLEAN_TESTABLE_H
diff --git a/libcxx/include/__concepts/class_or_enum.h b/libcxx/include/__concepts/class_or_enum.h
new file mode 100644
index 000000000000..43c7636d9c81
--- /dev/null
+++ b/libcxx/include/__concepts/class_or_enum.h
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_CLASS_OR_ENUM_H
+#define _LIBCPP___CONCEPTS_CLASS_OR_ENUM_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// Whether a type is a class type or enumeration type according to the Core wording.
+
+template<class _Tp>
+concept __class_or_enum = is_class_v<_Tp> || is_union_v<_Tp> || is_enum_v<_Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_CLASS_OR_ENUM_H
diff --git a/libcxx/include/__concepts/common_reference_with.h b/libcxx/include/__concepts/common_reference_with.h
new file mode 100644
index 000000000000..3269e3ae89fe
--- /dev/null
+++ b/libcxx/include/__concepts/common_reference_with.h
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_COMMON_REFERENCE_WITH_H
+#define _LIBCPP___CONCEPTS_COMMON_REFERENCE_WITH_H
+
+#include <__concepts/convertible_to.h>
+#include <__concepts/same_as.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.commonref]
+
+template<class _Tp, class _Up>
+concept common_reference_with =
+ same_as<common_reference_t<_Tp, _Up>, common_reference_t<_Up, _Tp>> &&
+ convertible_to<_Tp, common_reference_t<_Tp, _Up>> &&
+ convertible_to<_Up, common_reference_t<_Tp, _Up>>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_COMMON_REFERENCE_WITH_H
diff --git a/libcxx/include/__concepts/common_with.h b/libcxx/include/__concepts/common_with.h
new file mode 100644
index 000000000000..b575aea5f77f
--- /dev/null
+++ b/libcxx/include/__concepts/common_with.h
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_COMMON_WITH_H
+#define _LIBCPP___CONCEPTS_COMMON_WITH_H
+
+#include <__concepts/common_reference_with.h>
+#include <__concepts/same_as.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.common]
+
+template<class _Tp, class _Up>
+concept common_with =
+ same_as<common_type_t<_Tp, _Up>, common_type_t<_Up, _Tp>> &&
+ requires {
+ static_cast<common_type_t<_Tp, _Up>>(declval<_Tp>());
+ static_cast<common_type_t<_Tp, _Up>>(declval<_Up>());
+ } &&
+ common_reference_with<
+ add_lvalue_reference_t<const _Tp>,
+ add_lvalue_reference_t<const _Up>> &&
+ common_reference_with<
+ add_lvalue_reference_t<common_type_t<_Tp, _Up>>,
+ common_reference_t<
+ add_lvalue_reference_t<const _Tp>,
+ add_lvalue_reference_t<const _Up>>>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_COMMON_WITH_H
diff --git a/libcxx/include/__concepts/constructible.h b/libcxx/include/__concepts/constructible.h
new file mode 100644
index 000000000000..9bba8118b899
--- /dev/null
+++ b/libcxx/include/__concepts/constructible.h
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_CONSTRUCTIBLE_H
+#define _LIBCPP___CONCEPTS_CONSTRUCTIBLE_H
+
+#include <__concepts/convertible_to.h>
+#include <__concepts/destructible.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.constructible]
+template<class _Tp, class... _Args>
+concept constructible_from =
+ destructible<_Tp> && is_constructible_v<_Tp, _Args...>;
+
+// [concept.default.init]
+
+template<class _Tp>
+concept __default_initializable = requires { ::new _Tp; };
+
+template<class _Tp>
+concept default_initializable = constructible_from<_Tp> &&
+ requires { _Tp{}; } && __default_initializable<_Tp>;
+
+// [concept.moveconstructible]
+template<class _Tp>
+concept move_constructible =
+ constructible_from<_Tp, _Tp> && convertible_to<_Tp, _Tp>;
+
+// [concept.copyconstructible]
+template<class _Tp>
+concept copy_constructible =
+ move_constructible<_Tp> &&
+ constructible_from<_Tp, _Tp&> && convertible_to<_Tp&, _Tp> &&
+ constructible_from<_Tp, const _Tp&> && convertible_to<const _Tp&, _Tp> &&
+ constructible_from<_Tp, const _Tp> && convertible_to<const _Tp, _Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_CONSTRUCTIBLE_H
diff --git a/libcxx/include/__concepts/convertible_to.h b/libcxx/include/__concepts/convertible_to.h
new file mode 100644
index 000000000000..ec68967106d5
--- /dev/null
+++ b/libcxx/include/__concepts/convertible_to.h
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_CONVERTIBLE_TO_H
+#define _LIBCPP___CONCEPTS_CONVERTIBLE_TO_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.convertible]
+
+template<class _From, class _To>
+concept convertible_to =
+ is_convertible_v<_From, _To> &&
+ requires (add_rvalue_reference_t<_From> (&__f)()) {
+ static_cast<_To>(__f());
+ };
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_CONVERTIBLE_TO_H
diff --git a/libcxx/include/__concepts/copyable.h b/libcxx/include/__concepts/copyable.h
new file mode 100644
index 000000000000..cfeeec86917e
--- /dev/null
+++ b/libcxx/include/__concepts/copyable.h
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_COPYABLE_H
+#define _LIBCPP___CONCEPTS_COPYABLE_H
+
+#include <__concepts/assignable.h>
+#include <__concepts/constructible.h>
+#include <__concepts/movable.h>
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concepts.object]
+
+template<class _Tp>
+concept copyable =
+ copy_constructible<_Tp> &&
+ movable<_Tp> &&
+ assignable_from<_Tp&, _Tp&> &&
+ assignable_from<_Tp&, const _Tp&> &&
+ assignable_from<_Tp&, const _Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_COPYABLE_H
diff --git a/libcxx/include/__concepts/derived_from.h b/libcxx/include/__concepts/derived_from.h
new file mode 100644
index 000000000000..f7c83bf31fba
--- /dev/null
+++ b/libcxx/include/__concepts/derived_from.h
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_DERIVED_FROM_H
+#define _LIBCPP___CONCEPTS_DERIVED_FROM_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.derived]
+
+template<class _Dp, class _Bp>
+concept derived_from =
+ is_base_of_v<_Bp, _Dp> &&
+ is_convertible_v<const volatile _Dp*, const volatile _Bp*>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_DERIVED_FROM_H
diff --git a/libcxx/include/__concepts/destructible.h b/libcxx/include/__concepts/destructible.h
new file mode 100644
index 000000000000..800ee2d56f04
--- /dev/null
+++ b/libcxx/include/__concepts/destructible.h
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_DESTRUCTIBLE_H
+#define _LIBCPP___CONCEPTS_DESTRUCTIBLE_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.destructible]
+
+template<class _Tp>
+concept destructible = is_nothrow_destructible_v<_Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_DESTRUCTIBLE_H
diff --git a/libcxx/include/__concepts/different_from.h b/libcxx/include/__concepts/different_from.h
new file mode 100644
index 000000000000..5def31e652a5
--- /dev/null
+++ b/libcxx/include/__concepts/different_from.h
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_DIFFERENT_FROM_H
+#define _LIBCPP___CONCEPTS_DIFFERENT_FROM_H
+
+#include <__concepts/same_as.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template<class _Tp, class _Up>
+concept __different_from = !same_as<remove_cvref_t<_Tp>, remove_cvref_t<_Up>>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_DIFFERENT_FROM_H
diff --git a/libcxx/include/__concepts/equality_comparable.h b/libcxx/include/__concepts/equality_comparable.h
new file mode 100644
index 000000000000..5df812c2600d
--- /dev/null
+++ b/libcxx/include/__concepts/equality_comparable.h
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_EQUALITY_COMPARABLE_H
+#define _LIBCPP___CONCEPTS_EQUALITY_COMPARABLE_H
+
+#include <__concepts/boolean_testable.h>
+#include <__concepts/common_reference_with.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.equalitycomparable]
+
+template<class _Tp, class _Up>
+concept __weakly_equality_comparable_with =
+ requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) {
+ { __t == __u } -> __boolean_testable;
+ { __t != __u } -> __boolean_testable;
+ { __u == __t } -> __boolean_testable;
+ { __u != __t } -> __boolean_testable;
+ };
+
+template<class _Tp>
+concept equality_comparable = __weakly_equality_comparable_with<_Tp, _Tp>;
+
+template<class _Tp, class _Up>
+concept equality_comparable_with =
+ equality_comparable<_Tp> && equality_comparable<_Up> &&
+ common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> &&
+ equality_comparable<
+ common_reference_t<
+ __make_const_lvalue_ref<_Tp>,
+ __make_const_lvalue_ref<_Up>>> &&
+ __weakly_equality_comparable_with<_Tp, _Up>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_EQUALITY_COMPARABLE_H
diff --git a/libcxx/include/__concepts/invocable.h b/libcxx/include/__concepts/invocable.h
new file mode 100644
index 000000000000..0a8d9b7255ab
--- /dev/null
+++ b/libcxx/include/__concepts/invocable.h
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_INVOCABLE_H
+#define _LIBCPP___CONCEPTS_INVOCABLE_H
+
+#include <__config>
+#include <__functional/invoke.h>
+#include <__utility/forward.h>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.invocable]
+
+template<class _Fn, class... _Args>
+concept invocable = requires(_Fn&& __fn, _Args&&... __args) {
+ _VSTD::invoke(_VSTD::forward<_Fn>(__fn), _VSTD::forward<_Args>(__args)...); // not required to be equality preserving
+};
+
+// [concept.regular.invocable]
+
+template<class _Fn, class... _Args>
+concept regular_invocable = invocable<_Fn, _Args...>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_INVOCABLE_H
diff --git a/libcxx/include/__concepts/movable.h b/libcxx/include/__concepts/movable.h
new file mode 100644
index 000000000000..dd0b8fb56d5b
--- /dev/null
+++ b/libcxx/include/__concepts/movable.h
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_MOVABLE_H
+#define _LIBCPP___CONCEPTS_MOVABLE_H
+
+#include <__concepts/assignable.h>
+#include <__concepts/constructible.h>
+#include <__concepts/swappable.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concepts.object]
+
+template<class _Tp>
+concept movable =
+ is_object_v<_Tp> &&
+ move_constructible<_Tp> &&
+ assignable_from<_Tp&, _Tp> &&
+ swappable<_Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_MOVABLE_H
diff --git a/libcxx/include/__concepts/predicate.h b/libcxx/include/__concepts/predicate.h
new file mode 100644
index 000000000000..8e885406316d
--- /dev/null
+++ b/libcxx/include/__concepts/predicate.h
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_PREDICATE_H
+#define _LIBCPP___CONCEPTS_PREDICATE_H
+
+#include <__concepts/boolean_testable.h>
+#include <__concepts/invocable.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.predicate]
+
+template<class _Fn, class... _Args>
+concept predicate =
+ regular_invocable<_Fn, _Args...> && __boolean_testable<invoke_result_t<_Fn, _Args...>>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_PREDICATE_H
diff --git a/libcxx/include/__concepts/regular.h b/libcxx/include/__concepts/regular.h
new file mode 100644
index 000000000000..d292e8d72dbe
--- /dev/null
+++ b/libcxx/include/__concepts/regular.h
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_REGULAR_H
+#define _LIBCPP___CONCEPTS_REGULAR_H
+
+#include <__concepts/equality_comparable.h>
+#include <__concepts/semiregular.h>
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.object]
+
+template<class _Tp>
+concept regular = semiregular<_Tp> && equality_comparable<_Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_REGULAR_H
diff --git a/libcxx/include/__concepts/relation.h b/libcxx/include/__concepts/relation.h
new file mode 100644
index 000000000000..c6ff20d15195
--- /dev/null
+++ b/libcxx/include/__concepts/relation.h
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_RELATION_H
+#define _LIBCPP___CONCEPTS_RELATION_H
+
+#include <__concepts/predicate.h>
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.relation]
+
+template<class _Rp, class _Tp, class _Up>
+concept relation =
+ predicate<_Rp, _Tp, _Tp> && predicate<_Rp, _Up, _Up> &&
+ predicate<_Rp, _Tp, _Up> && predicate<_Rp, _Up, _Tp>;
+
+// [concept.equiv]
+
+template<class _Rp, class _Tp, class _Up>
+concept equivalence_relation = relation<_Rp, _Tp, _Up>;
+
+// [concept.strictweakorder]
+
+template<class _Rp, class _Tp, class _Up>
+concept strict_weak_order = relation<_Rp, _Tp, _Up>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_RELATION_H
diff --git a/libcxx/include/__concepts/same_as.h b/libcxx/include/__concepts/same_as.h
new file mode 100644
index 000000000000..5a912b6f41c8
--- /dev/null
+++ b/libcxx/include/__concepts/same_as.h
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_SAME_AS_H
+#define _LIBCPP___CONCEPTS_SAME_AS_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.same]
+
+template<class _Tp, class _Up>
+concept __same_as_impl = _IsSame<_Tp, _Up>::value;
+
+template<class _Tp, class _Up>
+concept same_as = __same_as_impl<_Tp, _Up> && __same_as_impl<_Up, _Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_SAME_AS_H
diff --git a/libcxx/include/__concepts/semiregular.h b/libcxx/include/__concepts/semiregular.h
new file mode 100644
index 000000000000..4b96fe6dfba6
--- /dev/null
+++ b/libcxx/include/__concepts/semiregular.h
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_SEMIREGULAR_H
+#define _LIBCPP___CONCEPTS_SEMIREGULAR_H
+
+#include <__concepts/constructible.h>
+#include <__concepts/copyable.h>
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.object]
+
+template<class _Tp>
+concept semiregular = copyable<_Tp> && default_initializable<_Tp>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_SEMIREGULAR_H
diff --git a/libcxx/include/__concepts/swappable.h b/libcxx/include/__concepts/swappable.h
new file mode 100644
index 000000000000..423b3a89fa40
--- /dev/null
+++ b/libcxx/include/__concepts/swappable.h
@@ -0,0 +1,115 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_SWAPPABLE_H
+#define _LIBCPP___CONCEPTS_SWAPPABLE_H
+
+#include <__concepts/assignable.h>
+#include <__concepts/class_or_enum.h>
+#include <__concepts/common_reference_with.h>
+#include <__concepts/constructible.h>
+#include <__config>
+#include <__utility/exchange.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.swappable]
+namespace ranges::__swap {
+ // Deleted to inhibit ADL
+ template<class _Tp>
+ void swap(_Tp&, _Tp&) = delete;
+
+
+ // [1]
+ template<class _Tp, class _Up>
+ concept __unqualified_swappable_with =
+ (__class_or_enum<remove_cvref_t<_Tp>> || __class_or_enum<remove_cvref_t<_Up>>) &&
+ requires(_Tp&& __t, _Up&& __u) {
+ swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u));
+ };
+
+ struct __fn;
+
+ template<class _Tp, class _Up, size_t _Size>
+ concept __swappable_arrays =
+ !__unqualified_swappable_with<_Tp(&)[_Size], _Up(&)[_Size]> &&
+ extent_v<_Tp> == extent_v<_Up> &&
+ requires(_Tp(& __t)[_Size], _Up(& __u)[_Size], const __fn& __swap) {
+ __swap(__t[0], __u[0]);
+ };
+
+ template<class _Tp>
+ concept __exchangeable =
+ !__unqualified_swappable_with<_Tp&, _Tp&> &&
+ move_constructible<_Tp> &&
+ assignable_from<_Tp&, _Tp>;
+
+ struct __fn {
+ // 2.1 `S` is `(void)swap(E1, E2)`* if `E1` or `E2` has class or enumeration type and...
+ // *The name `swap` is used here unqualified.
+ template<class _Tp, class _Up>
+ requires __unqualified_swappable_with<_Tp, _Up>
+ constexpr void operator()(_Tp&& __t, _Up&& __u) const
+ noexcept(noexcept(swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ {
+ swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u));
+ }
+
+ // 2.2 Otherwise, if `E1` and `E2` are lvalues of array types with equal extent and...
+ template<class _Tp, class _Up, size_t _Size>
+ requires __swappable_arrays<_Tp, _Up, _Size>
+ constexpr void operator()(_Tp(& __t)[_Size], _Up(& __u)[_Size]) const
+ noexcept(noexcept((*this)(*__t, *__u)))
+ {
+ // TODO(cjdb): replace with `ranges::swap_ranges`.
+ for (size_t __i = 0; __i < _Size; ++__i) {
+ (*this)(__t[__i], __u[__i]);
+ }
+ }
+
+ // 2.3 Otherwise, if `E1` and `E2` are lvalues of the same type `T` that models...
+ template<__exchangeable _Tp>
+ constexpr void operator()(_Tp& __x, _Tp& __y) const
+ noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_move_assignable_v<_Tp>)
+ {
+ __y = _VSTD::exchange(__x, _VSTD::move(__y));
+ }
+ };
+} // namespace ranges::__swap
+
+namespace ranges::inline __cpo {
+ inline constexpr auto swap = __swap::__fn{};
+} // namespace ranges::__cpo
+
+template<class _Tp>
+concept swappable = requires(_Tp& __a, _Tp& __b) { ranges::swap(__a, __b); };
+
+template<class _Tp, class _Up>
+concept swappable_with =
+ common_reference_with<_Tp, _Up> &&
+ requires(_Tp&& __t, _Up&& __u) {
+ ranges::swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Tp>(__t));
+ ranges::swap(_VSTD::forward<_Up>(__u), _VSTD::forward<_Up>(__u));
+ ranges::swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u));
+ ranges::swap(_VSTD::forward<_Up>(__u), _VSTD::forward<_Tp>(__t));
+ };
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_SWAPPABLE_H
diff --git a/libcxx/include/__concepts/totally_ordered.h b/libcxx/include/__concepts/totally_ordered.h
new file mode 100644
index 000000000000..d8dd4a4944d0
--- /dev/null
+++ b/libcxx/include/__concepts/totally_ordered.h
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CONCEPTS_TOTALLY_ORDERED_H
+#define _LIBCPP___CONCEPTS_TOTALLY_ORDERED_H
+
+#include <__concepts/boolean_testable.h>
+#include <__concepts/equality_comparable.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [concept.totallyordered]
+
+template<class _Tp, class _Up>
+concept __partially_ordered_with =
+ requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) {
+ { __t < __u } -> __boolean_testable;
+ { __t > __u } -> __boolean_testable;
+ { __t <= __u } -> __boolean_testable;
+ { __t >= __u } -> __boolean_testable;
+ { __u < __t } -> __boolean_testable;
+ { __u > __t } -> __boolean_testable;
+ { __u <= __t } -> __boolean_testable;
+ { __u >= __t } -> __boolean_testable;
+ };
+
+template<class _Tp>
+concept totally_ordered = equality_comparable<_Tp> && __partially_ordered_with<_Tp, _Tp>;
+
+template<class _Tp, class _Up>
+concept totally_ordered_with =
+ totally_ordered<_Tp> && totally_ordered<_Up> &&
+ equality_comparable_with<_Tp, _Up> &&
+ totally_ordered<
+ common_reference_t<
+ __make_const_lvalue_ref<_Tp>,
+ __make_const_lvalue_ref<_Up>>> &&
+ __partially_ordered_with<_Tp, _Up>;
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___CONCEPTS_TOTALLY_ORDERED_H
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 3cf23694f878..dbf4383cd6e3 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- __config ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -24,17 +24,7 @@
#ifdef __cplusplus
-#ifdef __GNUC__
-# define _GNUC_VER (__GNUC__ * 100 + __GNUC_MINOR__)
-// The _GNUC_VER_NEW macro better represents the new GCC versioning scheme
-// introduced in GCC 5.0.
-# define _GNUC_VER_NEW (_GNUC_VER * 10 + __GNUC_PATCHLEVEL__)
-#else
-# define _GNUC_VER 0
-# define _GNUC_VER_NEW 0
-#endif
-
-#define _LIBCPP_VERSION 13000
+#define _LIBCPP_VERSION 14000
#ifndef _LIBCPP_ABI_VERSION
# define _LIBCPP_ABI_VERSION 1
@@ -92,6 +82,11 @@
// its vtable and typeinfo to libc++ rather than having all other libraries
// using that class define their own copies.
# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
+// Override the default return value of exception::what() for
+// bad_function_call::what() with a string that is specific to
+// bad_function_call (see http://wg21.link/LWG2233). This is an ABI break
+// because it changes the vtable layout of bad_function_call.
+# define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
// Enable optimized version of __do_get_(un)signed which avoids redundant copies.
# define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
// In C++20 and later, don't derive std::plus from std::binary_function,
@@ -137,6 +132,11 @@
// reduces the number of weak definitions generated in programs that use
// iostreams by providing a single strong definition in the shared library.
# define _LIBCPP_ABI_ENABLE_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1
+
+// Define a key function for `bad_function_call` in the library, to centralize
+// its vtable and typeinfo to libc++ rather than having all other libraries
+// using that class define their own copies.
+# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
#endif
#define _LIBCPP_CONCAT1(_LIBCPP_X,_LIBCPP_Y) _LIBCPP_X##_LIBCPP_Y
@@ -265,9 +265,6 @@
# else // _BYTE_ORDER == _LITTLE_ENDIAN
# define _LIBCPP_BIG_ENDIAN
# endif // _BYTE_ORDER == _LITTLE_ENDIAN
-# ifndef __LONG_LONG_SUPPORTED
-# define _LIBCPP_HAS_NO_LONG_LONG
-# endif // __LONG_LONG_SUPPORTED
#endif // __FreeBSD__
#if defined(__NetBSD__) || defined(__OpenBSD__)
@@ -318,7 +315,12 @@
# endif
#endif // __sun__
-#if defined(__OpenBSD__) || defined(__CloudABI__)
+#if defined(_AIX) && !defined(__64BIT__)
+ // The size of wchar is 2 byte on 32-bit mode on AIX.
+# define _LIBCPP_SHORT_WCHAR 1
+#endif
+
+#if defined(__OpenBSD__)
// Certain architectures provide arc4random(). Prefer using
// arc4random() over /dev/{u,}random to make it possible to obtain
// random data even when using sandboxing mechanisms such as chroots,
@@ -354,6 +356,16 @@
# define _LIBCPP_NO_CFI
#endif
+// If the compiler supports using_if_exists, pretend we have those functions and they'll
+// be picked up if the C library provides them.
+//
+// TODO: Once we drop support for Clang 12, we can assume the compiler supports using_if_exists
+// for platforms that don't have a conforming C11 library, so we can drop this whole thing.
+#if __has_attribute(using_if_exists)
+# define _LIBCPP_HAS_TIMESPEC_GET
+# define _LIBCPP_HAS_QUICK_EXIT
+# define _LIBCPP_HAS_ALIGNED_ALLOC
+#else
#if (defined(__ISO_C_VISIBLE) && (__ISO_C_VISIBLE >= 2011)) || __cplusplus >= 201103L
# if defined(__FreeBSD__)
# define _LIBCPP_HAS_ALIGNED_ALLOC
@@ -408,6 +420,7 @@
# endif
# endif // __APPLE__
#endif
+#endif // __has_attribute(using_if_exists)
#ifndef _LIBCPP_CXX03_LANG
# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
@@ -481,14 +494,6 @@ typedef __char32_t char32_t;
# define _LIBCPP_HAS_BLOCKS_RUNTIME
#endif
-#if !(__has_feature(cxx_relaxed_constexpr))
-#define _LIBCPP_HAS_NO_CXX14_CONSTEXPR
-#endif
-
-#if !(__has_feature(cxx_variable_templates))
-#define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES
-#endif
-
#if !(__has_feature(cxx_noexcept))
#define _LIBCPP_HAS_NO_NOEXCEPT
#endif
@@ -502,26 +507,8 @@ typedef __char32_t char32_t;
#define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow")))
#endif
-#if __has_builtin(__builtin_launder)
-#define _LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
-#endif
-
-#if __has_builtin(__builtin_constant_p)
-#define _LIBCPP_COMPILER_HAS_BUILTIN_CONSTANT_P
-#endif
-
-#if !__is_identifier(__has_unique_object_representations)
-#define _LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS
-#endif
-
#define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__))
-// Literal operators ""d and ""y are supported starting with LLVM Clang 8 and AppleClang 10.0.1
-#if (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 800) || \
- (defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1001)
-#define _LIBCPP_HAS_NO_CXX20_CHRONO_LITERALS
-#endif
-
#define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__
#elif defined(_LIBCPP_COMPILER_GCC)
@@ -535,26 +522,10 @@ typedef __char32_t char32_t;
# define _LIBCPP_NO_EXCEPTIONS
#endif
-// Determine if GCC supports relaxed constexpr
-#if !defined(__cpp_constexpr) || __cpp_constexpr < 201304L
-#define _LIBCPP_HAS_NO_CXX14_CONSTEXPR
-#endif
-
-// GCC 5 supports variable templates
-#if !defined(__cpp_variable_templates) || __cpp_variable_templates < 201304L
-#define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES
-#endif
-
#if !defined(__SANITIZE_ADDRESS__)
#define _LIBCPP_HAS_NO_ASAN
#endif
-#if _GNUC_VER >= 700
-#define _LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
-#define _LIBCPP_COMPILER_HAS_BUILTIN_CONSTANT_P
-#define _LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS
-#endif
-
#define _LIBCPP_ALWAYS_INLINE __attribute__ ((__always_inline__))
#define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__
@@ -569,8 +540,6 @@ typedef __char32_t char32_t;
#error "MSVC versions prior to Visual Studio 2015 are not supported"
#endif
-#define _LIBCPP_HAS_NO_CXX14_CONSTEXPR
-#define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES
#define __alignof__ __alignof
#define _LIBCPP_NORETURN __declspec(noreturn)
#define _ALIGNAS(x) __declspec(align(x))
@@ -594,7 +563,6 @@ typedef __char32_t char32_t;
#define _LIBCPP_NORETURN __attribute__((noreturn))
#define _LIBCPP_HAS_NO_UNICODE_CHARS
-#define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES
#if defined(_AIX)
#define __MULTILOCALE_API
@@ -825,7 +793,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD
#ifdef _LIBCPP_HAS_NO_UNICODE_CHARS
typedef unsigned short char16_t;
typedef unsigned int char32_t;
-#endif // _LIBCPP_HAS_NO_UNICODE_CHARS
+#endif
#ifndef __SIZEOF_INT128__
#define _LIBCPP_HAS_NO_INT128
@@ -862,12 +830,6 @@ typedef unsigned int char32_t;
# define _LIBCPP_DEFAULT = default;
#endif
-#ifdef _LIBCPP_CXX03_LANG
-# define _LIBCPP_EQUAL_DELETE
-#else
-# define _LIBCPP_EQUAL_DELETE = delete
-#endif
-
#ifdef __GNUC__
# define _LIBCPP_NOALIAS __attribute__((__malloc__))
#else
@@ -896,16 +858,35 @@ typedef unsigned int char32_t;
// _LIBCPP_DEBUG potential values:
// - undefined: No assertions. This is the default.
// - 0: Basic assertions
-// - 1: Basic assertions + iterator validity checks.
-#if !defined(_LIBCPP_DEBUG)
-# define _LIBCPP_DEBUG_LEVEL 0
-#elif _LIBCPP_DEBUG == 0
-# define _LIBCPP_DEBUG_LEVEL 1
-#elif _LIBCPP_DEBUG == 1
-# define _LIBCPP_DEBUG_LEVEL 2
-#else
-# error Supported values for _LIBCPP_DEBUG are 0 and 1
-#endif
+// - 1: Basic assertions + iterator validity checks + unspecified behavior randomization.
+# if !defined(_LIBCPP_DEBUG)
+# define _LIBCPP_DEBUG_LEVEL 0
+# elif _LIBCPP_DEBUG == 0
+# define _LIBCPP_DEBUG_LEVEL 1
+# elif _LIBCPP_DEBUG == 1
+# define _LIBCPP_DEBUG_LEVEL 2
+# else
+# error Supported values for _LIBCPP_DEBUG are 0 and 1
+# endif
+
+# if _LIBCPP_DEBUG_LEVEL >= 2 && !defined(_LIBCPP_CXX03_LANG)
+# define _LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY
+# endif
+
+# if defined(_LIBCPP_DEBUG_RANDOMIZE_UNSPECIFIED_STABILITY)
+# if defined(_LIBCPP_CXX03_LANG)
+# error Support for unspecified stability is only for C++11 and higher
+# endif
+# define _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last) \
+ do { \
+ if (!__builtin_is_constant_evaluated()) \
+ _VSTD::shuffle(__first, __last, __libcpp_debug_randomizer()); \
+ } while (false)
+# else
+# define _LIBCPP_DEBUG_RANDOMIZE_RANGE(__first, __last) \
+ do { \
+ } while (false)
+# endif
// Libc++ allows disabling extern template instantiation declarations by
// means of users defining _LIBCPP_DISABLE_EXTERN_TEMPLATE.
@@ -929,7 +910,7 @@ typedef unsigned int char32_t;
#endif
#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || \
- defined(__sun__) || defined(__NetBSD__) || defined(__CloudABI__)
+ defined(__sun__) || defined(__NetBSD__)
#define _LIBCPP_LOCALE__L_EXTENSIONS 1
#endif
@@ -949,13 +930,6 @@ typedef unsigned int char32_t;
# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
#endif
-#if defined(__APPLE__)
-# if !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \
- defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
-# define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__
-# endif
-#endif // defined(__APPLE__)
-
#if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) || \
(!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606)
# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION
@@ -1038,62 +1012,54 @@ typedef unsigned int char32_t;
# define _LIBCPP_EXPLICIT_AFTER_CXX11 explicit
#endif
-#if _LIBCPP_STD_VER > 11 && !defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR)
+#if _LIBCPP_STD_VER > 11
# define _LIBCPP_CONSTEXPR_AFTER_CXX11 constexpr
#else
# define _LIBCPP_CONSTEXPR_AFTER_CXX11
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR)
+#if _LIBCPP_STD_VER > 14
# define _LIBCPP_CONSTEXPR_AFTER_CXX14 constexpr
#else
# define _LIBCPP_CONSTEXPR_AFTER_CXX14
#endif
-#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR)
+#if _LIBCPP_STD_VER > 17
# define _LIBCPP_CONSTEXPR_AFTER_CXX17 constexpr
#else
# define _LIBCPP_CONSTEXPR_AFTER_CXX17
#endif
-// The _LIBCPP_NODISCARD_ATTRIBUTE should only be used to define other
-// NODISCARD macros to the correct attribute.
#if __has_cpp_attribute(nodiscard) || defined(_LIBCPP_COMPILER_MSVC)
-# define _LIBCPP_NODISCARD_ATTRIBUTE [[nodiscard]]
+# define _LIBCPP_NODISCARD [[nodiscard]]
#elif defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(_LIBCPP_CXX03_LANG)
-# define _LIBCPP_NODISCARD_ATTRIBUTE [[clang::warn_unused_result]]
+# define _LIBCPP_NODISCARD [[clang::warn_unused_result]]
#else
// We can't use GCC's [[gnu::warn_unused_result]] and
// __attribute__((warn_unused_result)), because GCC does not silence them via
// (void) cast.
-# define _LIBCPP_NODISCARD_ATTRIBUTE
+# define _LIBCPP_NODISCARD
#endif
// _LIBCPP_NODISCARD_EXT may be used to apply [[nodiscard]] to entities not
// specified as such as an extension.
#if defined(_LIBCPP_ENABLE_NODISCARD) && !defined(_LIBCPP_DISABLE_NODISCARD_EXT)
-# define _LIBCPP_NODISCARD_EXT _LIBCPP_NODISCARD_ATTRIBUTE
+# define _LIBCPP_NODISCARD_EXT _LIBCPP_NODISCARD
#else
# define _LIBCPP_NODISCARD_EXT
#endif
#if !defined(_LIBCPP_DISABLE_NODISCARD_AFTER_CXX17) && \
(_LIBCPP_STD_VER > 17 || defined(_LIBCPP_ENABLE_NODISCARD))
-# define _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_NODISCARD_ATTRIBUTE
+# define _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_NODISCARD
#else
# define _LIBCPP_NODISCARD_AFTER_CXX17
#endif
-#if _LIBCPP_STD_VER > 14 && defined(__cpp_inline_variables) && (__cpp_inline_variables >= 201606L)
-# define _LIBCPP_INLINE_VAR inline
+#if !defined(_LIBCPP_DEBUG) && _LIBCPP_STD_VER > 11
+# define _LIBCPP_CONSTEXPR_IF_NODEBUG constexpr
#else
-# define _LIBCPP_INLINE_VAR
-#endif
-
-#if defined(_LIBCPP_DEBUG) || defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR)
# define _LIBCPP_CONSTEXPR_IF_NODEBUG
-#else
-# define _LIBCPP_CONSTEXPR_IF_NODEBUG constexpr
#endif
#if __has_attribute(no_destroy)
@@ -1133,11 +1099,9 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
defined(__linux__) || \
defined(__GNU__) || \
defined(__APPLE__) || \
- defined(__CloudABI__) || \
defined(__sun__) || \
defined(__MVS__) || \
- defined(_AIX) || \
- (defined(__MINGW32__) && __has_include(<pthread.h>))
+ defined(_AIX)
# define _LIBCPP_HAS_THREAD_API_PTHREAD
# elif defined(__Fuchsia__)
// TODO(44575): Switch to C11 thread API when possible.
@@ -1207,21 +1171,6 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION
#endif
-// Systems that use capability-based security (FreeBSD with Capsicum,
-// Nuxi CloudABI) may only provide local filesystem access (using *at()).
-// Functions like open(), rename(), unlink() and stat() should not be
-// used, as they attempt to access the global filesystem namespace.
-#ifdef __CloudABI__
-#define _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
-#endif
-
-// CloudABI is intended for running networked services. Processes do not
-// have standard input and output channels.
-#ifdef __CloudABI__
-#define _LIBCPP_HAS_NO_STDIN
-#define _LIBCPP_HAS_NO_STDOUT
-#endif
-
// Some systems do not provide gets() in their C library, for security reasons.
#if defined(_LIBCPP_MSVCRT) || \
(defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) || \
@@ -1229,28 +1178,21 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
# define _LIBCPP_C_HAS_NO_GETS
#endif
-#if defined(__BIONIC__) || defined(__CloudABI__) || defined(__NuttX__) || \
+#if defined(__BIONIC__) || defined(__NuttX__) || \
defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \
defined(__MVS__) || defined(__OpenBSD__)
#define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
#endif
-// Thread-unsafe functions such as strtok() and localtime()
-// are not available.
-#ifdef __CloudABI__
-#define _LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS
-#endif
-
#if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic)
# define _LIBCPP_HAS_C_ATOMIC_IMP
#elif defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_HAS_GCC_ATOMIC_IMP
#endif
-#if (!defined(_LIBCPP_HAS_C_ATOMIC_IMP) && \
- !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \
- !defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP)) \
- || defined(_LIBCPP_HAS_NO_THREADS)
+#if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && \
+ !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \
+ !defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP)
# define _LIBCPP_HAS_NO_ATOMIC_HEADER
#else
# ifndef _LIBCPP_ATOMIC_FLAG_TYPE
@@ -1288,14 +1230,6 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
# define _LIBCPP_SAFE_STATIC
#endif
-#if !__has_builtin(__builtin_addressof) && _GNUC_VER < 700
-#define _LIBCPP_HAS_NO_BUILTIN_ADDRESSOF
-#endif
-
-#if !__has_builtin(__builtin_is_constant_evaluated) && _GNUC_VER < 900
-#define _LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED
-#endif
-
#if __has_attribute(diagnose_if) && !defined(_LIBCPP_DISABLE_ADDITIONAL_DIAGNOSTICS)
# define _LIBCPP_DIAGNOSE_WARNING(...) \
__attribute__((diagnose_if(__VA_ARGS__, "warning")))
@@ -1311,7 +1245,7 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
# define _LIBCPP_FALLTHROUGH() [[fallthrough]]
#elif __has_cpp_attribute(clang::fallthrough)
# define _LIBCPP_FALLTHROUGH() [[clang::fallthrough]]
-#elif __has_attribute(fallthrough) || _GNUC_VER >= 700
+#elif __has_attribute(__fallthrough__)
# define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__))
#else
# define _LIBCPP_FALLTHROUGH() ((void)0)
@@ -1323,12 +1257,6 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
#define _LIBCPP_NODEBUG
#endif
-#if __has_attribute(__nodebug__) && (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 900)
-# define _LIBCPP_NODEBUG_TYPE __attribute__((nodebug))
-#else
-# define _LIBCPP_NODEBUG_TYPE
-#endif
-
#if __has_attribute(__standalone_debug__)
#define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__))
#else
@@ -1341,6 +1269,16 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
#define _LIBCPP_PREFERRED_NAME(x)
#endif
+// We often repeat things just for handling wide characters in the library.
+// When wide characters are disabled, it can be useful to have a quick way of
+// disabling it without having to resort to #if-#endif, which has a larger
+// impact on readability.
+#if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
+# define _LIBCPP_IF_WIDE_CHARACTERS(...)
+#else
+# define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__
+#endif
+
#if defined(_LIBCPP_ABI_MICROSOFT) && \
(defined(_LIBCPP_COMPILER_MSVC) || __has_declspec_attribute(empty_bases))
# define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases)
@@ -1363,20 +1301,8 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
#define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS
#endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES
-#if !defined(__cpp_deduction_guides) || __cpp_deduction_guides < 201611
-#define _LIBCPP_HAS_NO_DEDUCTION_GUIDES
-#endif
-
-#if !__has_keyword(__is_aggregate) && (_GNUC_VER_NEW < 7001)
-#define _LIBCPP_HAS_NO_IS_AGGREGATE
-#endif
-
-#if !defined(__cpp_coroutines) || __cpp_coroutines < 201703L
-#define _LIBCPP_HAS_NO_COROUTINES
-#endif
-
-#if !defined(__cpp_impl_three_way_comparison) || __cpp_impl_three_way_comparison < 201907L
-#define _LIBCPP_HAS_NO_SPACESHIP_OPERATOR
+#if !defined(__cpp_impl_coroutine) || __cpp_impl_coroutine < 201902L
+#define _LIBCPP_HAS_NO_CXX20_COROUTINES
#endif
#if defined(_LIBCPP_COMPILER_IBM)
@@ -1430,12 +1356,6 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
# define _LIBCPP_FOPEN_CLOEXEC_MODE
#endif
-#ifdef _LIBCPP_COMPILER_HAS_BUILTIN_CONSTANT_P
-#define _LIBCPP_BUILTIN_CONSTANT_P(x) __builtin_constant_p(x)
-#else
-#define _LIBCPP_BUILTIN_CONSTANT_P(x) false
-#endif
-
// Support for _FILE_OFFSET_BITS=64 landed gradually in Android, so the full set
// of functions used in cstdio may not be available for low API levels when
// using 64-bit file offsets on LP32.
@@ -1444,7 +1364,13 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
#endif
#if __has_attribute(init_priority)
-# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(101)))
+ // TODO: Remove this once we drop support for building libc++ with old Clangs
+# if (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1200) || \
+ (defined(__apple_build_version__) && __apple_build_version__ < 13000000)
+# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(101)))
+# else
+# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(100)))
+# endif
#else
# define _LIBCPP_INIT_PRIORITY_MAX
#endif
diff --git a/libcxx/include/__coroutine/coroutine_handle.h b/libcxx/include/__coroutine/coroutine_handle.h
new file mode 100644
index 000000000000..64657c0585b9
--- /dev/null
+++ b/libcxx/include/__coroutine/coroutine_handle.h
@@ -0,0 +1,202 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COROUTINE_COROUTINE_HANDLE_H
+#define _LIBCPP___COROUTINE_COROUTINE_HANDLE_H
+
+#include <__config>
+#include <__debug>
+#include <__functional/hash.h>
+#include <__memory/addressof.h>
+#include <compare>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// [coroutine.handle]
+template <class _Promise = void>
+struct _LIBCPP_TEMPLATE_VIS coroutine_handle;
+
+template <>
+struct _LIBCPP_TEMPLATE_VIS coroutine_handle<void> {
+public:
+ // [coroutine.handle.con], construct/reset
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr coroutine_handle() noexcept = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr coroutine_handle(nullptr_t) noexcept {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ coroutine_handle& operator=(nullptr_t) noexcept {
+ __handle_ = nullptr;
+ return *this;
+ }
+
+ // [coroutine.handle.export.import], export/import
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void* address() const noexcept { return __handle_; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ static constexpr coroutine_handle from_address(void* __addr) noexcept {
+ coroutine_handle __tmp;
+ __tmp.__handle_ = __addr;
+ return __tmp;
+ }
+
+ // [coroutine.handle.observers], observers
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit operator bool() const noexcept {
+ return __handle_ != nullptr;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ bool done() const {
+ _LIBCPP_ASSERT(__is_suspended(), "done() can be called only on suspended coroutines");
+ return __builtin_coro_done(__handle_);
+ }
+
+ // [coroutine.handle.resumption], resumption
+ _LIBCPP_HIDE_FROM_ABI
+ void operator()() const { resume(); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ void resume() const {
+ _LIBCPP_ASSERT(__is_suspended(), "resume() can be called only on suspended coroutines");
+ _LIBCPP_ASSERT(!done(), "resume() has undefined behavior when the coroutine is done");
+ __builtin_coro_resume(__handle_);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ void destroy() const {
+ _LIBCPP_ASSERT(__is_suspended(), "destroy() can be called only on suspended coroutines");
+ __builtin_coro_destroy(__handle_);
+ }
+
+private:
+ bool __is_suspended() const {
+ // FIXME actually implement a check for if the coro is suspended.
+ return __handle_ != nullptr;
+ }
+
+ void* __handle_ = nullptr;
+};
+
+// [coroutine.handle.compare]
+inline _LIBCPP_HIDE_FROM_ABI
+constexpr bool operator==(coroutine_handle<> __x, coroutine_handle<> __y) noexcept {
+ return __x.address() == __y.address();
+}
+inline _LIBCPP_HIDE_FROM_ABI
+constexpr strong_ordering operator<=>(coroutine_handle<> __x, coroutine_handle<> __y) noexcept {
+ return compare_three_way()(__x.address(), __y.address());
+}
+
+template <class _Promise>
+struct _LIBCPP_TEMPLATE_VIS coroutine_handle {
+public:
+ // [coroutine.handle.con], construct/reset
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr coroutine_handle() noexcept = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr coroutine_handle(nullptr_t) noexcept {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ static coroutine_handle from_promise(_Promise& __promise) {
+ using _RawPromise = typename remove_cv<_Promise>::type;
+ coroutine_handle __tmp;
+ __tmp.__handle_ =
+ __builtin_coro_promise(_VSTD::addressof(const_cast<_RawPromise&>(__promise)), alignof(_Promise), true);
+ return __tmp;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ coroutine_handle& operator=(nullptr_t) noexcept {
+ __handle_ = nullptr;
+ return *this;
+ }
+
+ // [coroutine.handle.export.import], export/import
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void* address() const noexcept { return __handle_; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ static constexpr coroutine_handle from_address(void* __addr) noexcept {
+ coroutine_handle __tmp;
+ __tmp.__handle_ = __addr;
+ return __tmp;
+ }
+
+ // [coroutine.handle.conv], conversion
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr operator coroutine_handle<>() const noexcept {
+ return coroutine_handle<>::from_address(address());
+ }
+
+ // [coroutine.handle.observers], observers
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit operator bool() const noexcept {
+ return __handle_ != nullptr;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ bool done() const {
+ _LIBCPP_ASSERT(__is_suspended(), "done() can be called only on suspended coroutines");
+ return __builtin_coro_done(__handle_);
+ }
+
+ // [coroutine.handle.resumption], resumption
+ _LIBCPP_HIDE_FROM_ABI
+ void operator()() const { resume(); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ void resume() const {
+ _LIBCPP_ASSERT(__is_suspended(), "resume() can be called only on suspended coroutines");
+ _LIBCPP_ASSERT(!done(), "resume() has undefined behavior when the coroutine is done");
+ __builtin_coro_resume(__handle_);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ void destroy() const {
+ _LIBCPP_ASSERT(__is_suspended(), "destroy() can be called only on suspended coroutines");
+ __builtin_coro_destroy(__handle_);
+ }
+
+ // [coroutine.handle.promise], promise access
+ _LIBCPP_HIDE_FROM_ABI
+ _Promise& promise() const {
+ return *static_cast<_Promise*>(__builtin_coro_promise(this->__handle_, alignof(_Promise), false));
+ }
+
+private:
+ bool __is_suspended() const {
+ // FIXME actually implement a check for if the coro is suspended.
+ return __handle_ != nullptr;
+ }
+ void* __handle_ = nullptr;
+};
+
+// [coroutine.handle.hash]
+template <class _Tp>
+struct hash<coroutine_handle<_Tp>> {
+ _LIBCPP_HIDE_FROM_ABI
+ size_t operator()(const coroutine_handle<_Tp>& __v) const noexcept { return hash<void*>()(__v.address()); }
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // __LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+#endif // _LIBCPP___COROUTINE_COROUTINE_HANDLE_H
diff --git a/libcxx/include/__coroutine/coroutine_traits.h b/libcxx/include/__coroutine/coroutine_traits.h
new file mode 100644
index 000000000000..bfa69552bd79
--- /dev/null
+++ b/libcxx/include/__coroutine/coroutine_traits.h
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COROUTINE_COROUTINE_TRAITS_H
+#define _LIBCPP___COROUTINE_COROUTINE_TRAITS_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// [coroutine.traits]
+// [coroutine.traits.primary]
+// The header <coroutine> defined the primary template coroutine_traits such that
+// if ArgTypes is a parameter pack of types and if the qualified-id R::promise_type
+// is valid and denotes a type ([temp.deduct]), then coroutine_traits<R, ArgTypes...>
+// has the following publicly accessible memebr:
+//
+// using promise_type = typename R::promise_type;
+//
+// Otherwise, coroutine_traits<R, ArgTypes...> has no members.
+template <class _Tp, class = void>
+struct __coroutine_traits_sfinae {};
+
+template <class _Tp>
+struct __coroutine_traits_sfinae<
+ _Tp, typename __void_t<typename _Tp::promise_type>::type>
+{
+ using promise_type = typename _Tp::promise_type;
+};
+
+template <class _Ret, class... _Args>
+struct coroutine_traits
+ : public __coroutine_traits_sfinae<_Ret>
+{
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // __LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+#endif // _LIBCPP___COROUTINE_COROUTINE_TRAITS_H
diff --git a/libcxx/include/__coroutine/noop_coroutine_handle.h b/libcxx/include/__coroutine/noop_coroutine_handle.h
new file mode 100644
index 000000000000..9dbf21aac5e6
--- /dev/null
+++ b/libcxx/include/__coroutine/noop_coroutine_handle.h
@@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COROUTINE_NOOP_COROUTINE_HANDLE_H
+#define _LIBCPP___COROUTINE_NOOP_COROUTINE_HANDLE_H
+
+#include <__config>
+#include <__coroutine/coroutine_handle.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if __has_builtin(__builtin_coro_noop)
+// [coroutine.noop]
+// [coroutine.promise.noop]
+struct noop_coroutine_promise {};
+
+// [coroutine.handle.noop]
+template <>
+struct _LIBCPP_TEMPLATE_VIS coroutine_handle<noop_coroutine_promise> {
+public:
+ // [coroutine.handle.noop.conv], conversion
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr operator coroutine_handle<>() const noexcept {
+ return coroutine_handle<>::from_address(address());
+ }
+
+ // [coroutine.handle.noop.observers], observers
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit operator bool() const noexcept { return true; }
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr bool done() const noexcept { return false; }
+
+ // [coroutine.handle.noop.resumption], resumption
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void operator()() const noexcept {}
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void resume() const noexcept {}
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void destroy() const noexcept {}
+
+ // [coroutine.handle.noop.promise], promise access
+ _LIBCPP_HIDE_FROM_ABI
+ noop_coroutine_promise& promise() const noexcept {
+ return *static_cast<noop_coroutine_promise*>(
+ __builtin_coro_promise(this->__handle_, alignof(noop_coroutine_promise), false));
+ }
+
+ // [coroutine.handle.noop.address], address
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void* address() const noexcept { return __handle_; }
+
+private:
+ _LIBCPP_HIDE_FROM_ABI
+ friend coroutine_handle<noop_coroutine_promise> noop_coroutine() noexcept;
+
+ _LIBCPP_HIDE_FROM_ABI coroutine_handle() noexcept {
+ this->__handle_ = __builtin_coro_noop();
+ }
+
+ void* __handle_ = nullptr;
+};
+
+using noop_coroutine_handle = coroutine_handle<noop_coroutine_promise>;
+
+// [coroutine.noop.coroutine]
+inline _LIBCPP_HIDE_FROM_ABI
+noop_coroutine_handle noop_coroutine() noexcept { return noop_coroutine_handle(); }
+
+#endif // __has_builtin(__builtin_coro_noop)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // __LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+#endif // _LIBCPP___COROUTINE_NOOP_COROUTINE_HANDLE_H
diff --git a/libcxx/include/__coroutine/trivial_awaitables.h b/libcxx/include/__coroutine/trivial_awaitables.h
new file mode 100644
index 000000000000..c434f83b78bb
--- /dev/null
+++ b/libcxx/include/__coroutine/trivial_awaitables.h
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LIBCPP___COROUTINE_TRIVIAL_AWAITABLES_H
+#define __LIBCPP___COROUTINE_TRIVIAL_AWAITABLES_H
+
+#include <__config>
+#include <__coroutine/coroutine_handle.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// [coroutine.trivial.awaitables]
+struct suspend_never {
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr bool await_ready() const noexcept { return true; }
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void await_suspend(coroutine_handle<>) const noexcept {}
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void await_resume() const noexcept {}
+};
+
+struct suspend_always {
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr bool await_ready() const noexcept { return false; }
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void await_suspend(coroutine_handle<>) const noexcept {}
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void await_resume() const noexcept {}
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // __LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_COROUTINES)
+
+#endif // __LIBCPP___COROUTINE_TRIVIAL_AWAITABLES_H
diff --git a/libcxx/include/__debug b/libcxx/include/__debug
index 771e4316320b..e25039c088c6 100644
--- a/libcxx/include/__debug
+++ b/libcxx/include/__debug
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- __debug ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/__errc b/libcxx/include/__errc
index 81da2e1970c8..68d5fa320150 100644
--- a/libcxx/include/__errc
+++ b/libcxx/include/__errc
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- __errc ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h
new file mode 100644
index 000000000000..a9a8c1f0da03
--- /dev/null
+++ b/libcxx/include/__format/format_arg.h
@@ -0,0 +1,256 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMAT_ARG_H
+#define _LIBCPP___FORMAT_FORMAT_ARG_H
+
+#include <__concepts/arithmetic.h>
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__functional_base>
+#include <__variant/monostate.h>
+#include <string>
+#include <string_view>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format {
+/** The type stored in @ref basic_format_arg. */
+enum class _LIBCPP_ENUM_VIS __arg_t : uint8_t {
+ __none,
+ __boolean,
+ __char_type,
+ __int,
+ __long_long,
+#ifndef _LIBCPP_HAS_NO_INT128
+ __i128,
+#endif
+ __unsigned,
+ __unsigned_long_long,
+#ifndef _LIBCPP_HAS_NO_INT128
+ __u128,
+#endif
+ __float,
+ __double,
+ __long_double,
+ __const_char_type_ptr,
+ __string_view,
+ __ptr
+};
+} // namespace __format
+
+template <class _Visitor, class _Context>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT decltype(auto)
+visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
+ switch (__arg.__type_) {
+ case __format::__arg_t::__none:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), monostate{});
+ case __format::__arg_t::__boolean:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__boolean);
+ case __format::__arg_t::__char_type:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__char_type);
+ case __format::__arg_t::__int:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__int);
+ case __format::__arg_t::__long_long:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__long_long);
+#ifndef _LIBCPP_HAS_NO_INT128
+ case __format::__arg_t::__i128:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__i128);
+#endif
+ case __format::__arg_t::__unsigned:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__unsigned);
+ case __format::__arg_t::__unsigned_long_long:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis),
+ __arg.__unsigned_long_long);
+#ifndef _LIBCPP_HAS_NO_INT128
+ case __format::__arg_t::__u128:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__u128);
+#endif
+ case __format::__arg_t::__float:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__float);
+ case __format::__arg_t::__double:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__double);
+ case __format::__arg_t::__long_double:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__long_double);
+ case __format::__arg_t::__const_char_type_ptr:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis),
+ __arg.__const_char_type_ptr);
+ case __format::__arg_t::__string_view:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__string_view);
+ case __format::__arg_t::__ptr:
+ return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__ptr);
+ }
+ _LIBCPP_UNREACHABLE();
+}
+
+template <class _Context>
+class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
+public:
+ // TODO FMT Define the handle class.
+ class handle;
+
+ _LIBCPP_HIDE_FROM_ABI basic_format_arg() noexcept
+ : __type_{__format::__arg_t::__none} {}
+
+ _LIBCPP_HIDE_FROM_ABI explicit operator bool() const noexcept {
+ return __type_ != __format::__arg_t::__none;
+ }
+
+private:
+ using char_type = typename _Context::char_type;
+
+ // TODO FMT Implement constrain [format.arg]/4
+ // Constraints: The template specialization
+ // typename Context::template formatter_type<T>
+ // meets the Formatter requirements ([formatter.requirements]). The extent
+ // to which an implementation determines that the specialization meets the
+ // Formatter requirements is unspecified, except that as a minimum the
+ // expression
+ // typename Context::template formatter_type<T>()
+ // .format(declval<const T&>(), declval<Context&>())
+ // shall be well-formed when treated as an unevaluated operand.
+
+ template <class _Ctx, class... _Args>
+ _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_AVAILABILITY_FORMAT friend __format_arg_store<_Ctx, _Args...>
+ _VSTD::make_format_args(const _Args&...);
+
+ template <class _Visitor, class _Ctx>
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT friend decltype(auto)
+ _VSTD::visit_format_arg(_Visitor&& __vis, basic_format_arg<_Ctx> __arg);
+
+ union {
+ bool __boolean;
+ char_type __char_type;
+ int __int;
+ unsigned __unsigned;
+ long long __long_long;
+ unsigned long long __unsigned_long_long;
+#ifndef _LIBCPP_HAS_NO_INT128
+ __int128_t __i128;
+ __uint128_t __u128;
+#endif
+ float __float;
+ double __double;
+ long double __long_double;
+ const char_type* __const_char_type_ptr;
+ basic_string_view<char_type> __string_view;
+ const void* __ptr;
+ // TODO FMT Add the handle.
+ };
+ __format::__arg_t __type_;
+
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(bool __v) noexcept
+ : __boolean(__v), __type_(__format::__arg_t::__boolean) {}
+
+ template <class _Tp>
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp __v) noexcept
+ requires(same_as<_Tp, char_type> ||
+ (same_as<_Tp, char> && same_as<char_type, wchar_t>))
+ : __char_type(__v), __type_(__format::__arg_t::__char_type) {}
+
+ template <__libcpp_signed_integer _Tp>
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp __v) noexcept {
+ if constexpr (sizeof(_Tp) <= sizeof(int)) {
+ __int = static_cast<int>(__v);
+ __type_ = __format::__arg_t::__int;
+ } else if constexpr (sizeof(_Tp) <= sizeof(long long)) {
+ __long_long = static_cast<long long>(__v);
+ __type_ = __format::__arg_t::__long_long;
+ }
+#ifndef _LIBCPP_HAS_NO_INT128
+ else if constexpr (sizeof(_Tp) == sizeof(__int128_t)) {
+ __i128 = __v;
+ __type_ = __format::__arg_t::__i128;
+ }
+#endif
+ else
+ static_assert(sizeof(_Tp) == 0, "An unsupported signed integer was used");
+ }
+
+ template <__libcpp_unsigned_integer _Tp>
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp __v) noexcept {
+ if constexpr (sizeof(_Tp) <= sizeof(unsigned)) {
+ __unsigned = static_cast<unsigned>(__v);
+ __type_ = __format::__arg_t::__unsigned;
+ } else if constexpr (sizeof(_Tp) <= sizeof(unsigned long long)) {
+ __unsigned_long_long = static_cast<unsigned long long>(__v);
+ __type_ = __format::__arg_t::__unsigned_long_long;
+ }
+#ifndef _LIBCPP_HAS_NO_INT128
+ else if constexpr (sizeof(_Tp) == sizeof(__int128_t)) {
+ __u128 = __v;
+ __type_ = __format::__arg_t::__u128;
+ }
+#endif
+ else
+ static_assert(sizeof(_Tp) == 0,
+ "An unsupported unsigned integer was used");
+ }
+
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(float __v) noexcept
+ : __float(__v), __type_(__format::__arg_t::__float) {}
+
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(double __v) noexcept
+ : __double(__v), __type_(__format::__arg_t::__double) {}
+
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(long double __v) noexcept
+ : __long_double(__v), __type_(__format::__arg_t::__long_double) {}
+
+ // Note not a 'noexcept' function.
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(const char_type* __s)
+ : __const_char_type_ptr(__s),
+ __type_(__format::__arg_t::__const_char_type_ptr) {
+ _LIBCPP_ASSERT(__s, "Used a nullptr argument to initialize a C-string");
+ }
+
+ template <class _Traits>
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(
+ basic_string_view<char_type, _Traits> __s) noexcept
+ : __string_view{__s.data(), __s.size()},
+ __type_(__format::__arg_t::__string_view) {}
+
+ template <class _Traits, class _Allocator>
+ _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(
+ const basic_string<char_type, _Traits, _Allocator>& __s) noexcept
+ : __string_view{__s.data(), __s.size()},
+ __type_(__format::__arg_t::__string_view) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ explicit basic_format_arg(nullptr_t) noexcept
+ : __ptr(nullptr), __type_(__format::__arg_t::__ptr) {}
+
+ // TODO FMT Implement the _Tp* constructor.
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMAT_ARG_H
diff --git a/libcxx/include/__format/format_args.h b/libcxx/include/__format/format_args.h
new file mode 100644
index 000000000000..0a26b95d1b47
--- /dev/null
+++ b/libcxx/include/__format/format_args.h
@@ -0,0 +1,71 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMAT_ARGS_H
+#define _LIBCPP___FORMAT_FORMAT_ARGS_H
+
+#include <__availability>
+#include <__config>
+#include <__format/format_fwd.h>
+#include <cstddef>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template <class _Context>
+class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_args {
+public:
+ // TODO FMT Implement [format.args]/5
+ // [Note 1: Implementations are encouraged to optimize the representation of
+ // basic_format_args for small number of formatting arguments by storing
+ // indices of type alternatives separately from values and packing the
+ // former. - end note]
+ // Note: Change __format_arg_store to use a built-in array.
+ _LIBCPP_HIDE_FROM_ABI basic_format_args() noexcept = default;
+
+ template <class... _Args>
+ _LIBCPP_HIDE_FROM_ABI basic_format_args(
+ const __format_arg_store<_Context, _Args...>& __store) noexcept
+ : __size_(sizeof...(_Args)), __data_(__store.__args.data()) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ basic_format_arg<_Context> get(size_t __id) const noexcept {
+ return __id < __size_ ? __data_[__id] : basic_format_arg<_Context>{};
+ }
+
+ _LIBCPP_HIDE_FROM_ABI size_t __size() const noexcept { return __size_; }
+
+private:
+ size_t __size_{0};
+ const basic_format_arg<_Context>* __data_{nullptr};
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMAT_ARGS_H
diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h
new file mode 100644
index 000000000000..b4fe5cc7b12c
--- /dev/null
+++ b/libcxx/include/__format/format_context.h
@@ -0,0 +1,168 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMAT_CONTEXT_H
+#define _LIBCPP___FORMAT_FORMAT_CONTEXT_H
+
+#include <__availability>
+#include <__config>
+#include <__format/format_args.h>
+#include <__format/format_fwd.h>
+#include <__iterator/concepts.h>
+#include <concepts>
+#include <iterator>
+#include <string>
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#include <locale>
+#include <optional>
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template <class _OutIt, class _CharT>
+requires output_iterator<_OutIt, const _CharT&>
+class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_context;
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+/**
+ * Helper to create a basic_format_context.
+ *
+ * This is needed since the constructor is private.
+ */
+template <class _OutIt, class _CharT>
+_LIBCPP_HIDE_FROM_ABI basic_format_context<_OutIt, _CharT>
+__format_context_create(
+ _OutIt __out_it,
+ basic_format_args<basic_format_context<_OutIt, _CharT>> __args,
+ optional<_VSTD::locale>&& __loc = nullopt) {
+ return _VSTD::basic_format_context(_VSTD::move(__out_it), __args,
+ _VSTD::move(__loc));
+}
+#else
+template <class _OutIt, class _CharT>
+_LIBCPP_HIDE_FROM_ABI basic_format_context<_OutIt, _CharT>
+__format_context_create(
+ _OutIt __out_it,
+ basic_format_args<basic_format_context<_OutIt, _CharT>> __args) {
+ return _VSTD::basic_format_context(_VSTD::move(__out_it), __args);
+}
+#endif
+
+// TODO FMT Implement [format.context]/4
+// [Note 1: For a given type charT, implementations are encouraged to provide a
+// single instantiation of basic_format_context for appending to
+// basic_string<charT>, vector<charT>, or any other container with contiguous
+// storage by wrapping those in temporary objects with a uniform interface
+// (such as a span<charT>) and polymorphic reallocation. - end note]
+
+using format_context = basic_format_context<back_insert_iterator<string>, char>;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+using wformat_context = basic_format_context<back_insert_iterator<wstring>, wchar_t>;
+#endif
+
+template <class _OutIt, class _CharT>
+requires output_iterator<_OutIt, const _CharT&>
+class
+ // clang-format off
+ _LIBCPP_TEMPLATE_VIS
+ _LIBCPP_AVAILABILITY_FORMAT
+ _LIBCPP_PREFERRED_NAME(format_context)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wformat_context))
+ // clang-format on
+ basic_format_context {
+public:
+ using iterator = _OutIt;
+ using char_type = _CharT;
+ template <class _Tp>
+ using formatter_type = formatter<_Tp, _CharT>;
+
+ basic_format_context(const basic_format_context&) = delete;
+ basic_format_context& operator=(const basic_format_context&) = delete;
+
+ _LIBCPP_HIDE_FROM_ABI basic_format_arg<basic_format_context>
+ arg(size_t __id) const {
+ return __args_.get(__id);
+ }
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+ _LIBCPP_HIDE_FROM_ABI _VSTD::locale locale() {
+ if (!__loc_)
+ __loc_ = _VSTD::locale{};
+ return *__loc_;
+ }
+#endif
+ _LIBCPP_HIDE_FROM_ABI iterator out() { return __out_it_; }
+ _LIBCPP_HIDE_FROM_ABI void advance_to(iterator __it) { __out_it_ = __it; }
+
+private:
+ iterator __out_it_;
+ basic_format_args<basic_format_context> __args_;
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+
+ // The Standard doesn't specify how the locale is stored.
+ // [format.context]/6
+ // std::locale locale();
+ // Returns: The locale passed to the formatting function if the latter
+ // takes one, and std::locale() otherwise.
+ // This is done by storing the locale of the constructor in this optional. If
+ // locale() is called and the optional has no value the value will be created.
+ // This allows the implementation to lazily create the locale.
+ // TODO FMT Validate whether lazy creation is the best solution.
+ optional<_VSTD::locale> __loc_;
+
+ template <class __OutIt, class __CharT>
+ friend _LIBCPP_HIDE_FROM_ABI basic_format_context<__OutIt, __CharT>
+ _VSTD::__format_context_create(
+ __OutIt, basic_format_args<basic_format_context<__OutIt, __CharT>>,
+ optional<_VSTD::locale>&&);
+
+ // Note: the Standard doesn't specify the required constructors.
+ _LIBCPP_HIDE_FROM_ABI
+ explicit basic_format_context(_OutIt __out_it,
+ basic_format_args<basic_format_context> __args,
+ optional<_VSTD::locale>&& __loc)
+ : __out_it_(_VSTD::move(__out_it)), __args_(__args),
+ __loc_(_VSTD::move(__loc)) {}
+#else
+ template <class __OutIt, class __CharT>
+ friend _LIBCPP_HIDE_FROM_ABI basic_format_context<__OutIt, __CharT>
+ _VSTD::__format_context_create(
+ __OutIt, basic_format_args<basic_format_context<__OutIt, __CharT>>);
+
+ _LIBCPP_HIDE_FROM_ABI
+ explicit basic_format_context(_OutIt __out_it,
+ basic_format_args<basic_format_context> __args)
+ : __out_it_(_VSTD::move(__out_it)), __args_(__args) {}
+#endif
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMAT_CONTEXT_H
diff --git a/libcxx/include/__format/format_error.h b/libcxx/include/__format/format_error.h
index f983d0ce4ac4..ac1d708030d6 100644
--- a/libcxx/include/__format/format_error.h
+++ b/libcxx/include/__format/format_error.h
@@ -21,9 +21,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
@@ -51,6 +48,4 @@ __throw_format_error(const char* __s) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___FORMAT_FORMAT_ERROR_H
diff --git a/libcxx/include/__format/format_fwd.h b/libcxx/include/__format/format_fwd.h
new file mode 100644
index 000000000000..7da30aec5188
--- /dev/null
+++ b/libcxx/include/__format/format_fwd.h
@@ -0,0 +1,56 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMAT_FWD_H
+#define _LIBCPP___FORMAT_FORMAT_FWD_H
+
+#include <__availability>
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__utility/forward.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template <class _Context>
+class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg;
+
+template <class _Context, class... _Args>
+struct _LIBCPP_TEMPLATE_VIS __format_arg_store;
+
+template <class _Ctx, class... _Args>
+_LIBCPP_HIDE_FROM_ABI __format_arg_store<_Ctx, _Args...>
+make_format_args(const _Args&...);
+
+template <class _Tp, class _CharT = char>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter;
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMAT_FWD_H
diff --git a/libcxx/include/__format/format_parse_context.h b/libcxx/include/__format/format_parse_context.h
index db39c1b54830..289cab9f0ee4 100644
--- a/libcxx/include/__format/format_parse_context.h
+++ b/libcxx/include/__format/format_parse_context.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
@@ -29,8 +26,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// If the compiler has no concepts support, the format header will be disabled.
// Without concepts support enable_if needs to be used and that too much effort
// to support compilers with partial C++20 support.
-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && \
- !defined(_LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED)
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
template <class _CharT>
class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_parse_context {
@@ -100,14 +96,14 @@ private:
};
using format_parse_context = basic_format_parse_context<char>;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
using wformat_parse_context = basic_format_parse_context<wchar_t>;
+#endif
-#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED)
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
#endif //_LIBCPP_STD_VER > 17
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___FORMAT_FORMAT_PARSE_CONTEXT_H
diff --git a/libcxx/include/__format/format_string.h b/libcxx/include/__format/format_string.h
new file mode 100644
index 000000000000..885e572fc068
--- /dev/null
+++ b/libcxx/include/__format/format_string.h
@@ -0,0 +1,169 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMAT_STRING_H
+#define _LIBCPP___FORMAT_FORMAT_STRING_H
+
+#include <__config>
+#include <__debug>
+#include <__format/format_error.h>
+#include <cstddef>
+#include <cstdint>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format {
+
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS __parse_number_result {
+ const _CharT* __ptr;
+ uint32_t __value;
+};
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __parse_number_result<_CharT>
+__parse_number(const _CharT* __begin, const _CharT* __end);
+
+/**
+ * The maximum value of a numeric argument.
+ *
+ * This is used for:
+ * * arg-id
+ * * width as value or arg-id.
+ * * precision as value or arg-id.
+ *
+ * The value is compatible with the maximum formatting width and precision
+ * using the `%*` syntax on a 32-bit system.
+ */
+inline constexpr uint32_t __number_max = INT32_MAX;
+
+namespace __detail {
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __parse_number_result<_CharT>
+__parse_zero(const _CharT* __begin, const _CharT*, auto& __parse_ctx) {
+ __parse_ctx.check_arg_id(0);
+ return {++__begin, 0}; // can never be larger than the maximum.
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __parse_number_result<_CharT>
+__parse_automatic(const _CharT* __begin, const _CharT*, auto& __parse_ctx) {
+ size_t __value = __parse_ctx.next_arg_id();
+ _LIBCPP_ASSERT(__value <= __number_max,
+ "Compilers don't support this number of arguments");
+
+ return {__begin, uint32_t(__value)};
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __parse_number_result<_CharT>
+__parse_manual(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
+ __parse_number_result<_CharT> __r = __parse_number(__begin, __end);
+ __parse_ctx.check_arg_id(__r.__value);
+ return __r;
+}
+
+} // namespace __detail
+
+/**
+ * Parses a number.
+ *
+ * The number is used for the 31-bit values @em width and @em precision. This
+ * allows a maximum value of 2147483647.
+ */
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __parse_number_result<_CharT>
+__parse_number(const _CharT* __begin, const _CharT* __end_input) {
+ static_assert(__format::__number_max == INT32_MAX,
+ "The algorithm is implemented based on this value.");
+ /*
+ * Limit the input to 9 digits, otherwise we need two checks during every
+ * iteration:
+ * - Are we at the end of the input?
+ * - Does the value exceed width of an uint32_t? (Switching to uint64_t would
+ * have the same issue, but with a higher maximum.)
+ */
+ const _CharT* __end = __end_input - __begin > 9 ? __begin + 9 : __end_input;
+ uint32_t __value = *__begin - _CharT('0');
+ while (++__begin != __end) {
+ if (*__begin < _CharT('0') || *__begin > _CharT('9'))
+ return {__begin, __value};
+
+ __value = __value * 10 + *__begin - _CharT('0');
+ }
+
+ if (__begin != __end_input && *__begin >= _CharT('0') &&
+ *__begin <= _CharT('9')) {
+
+ /*
+ * There are more than 9 digits, do additional validations:
+ * - Does the 10th digit exceed the maximum allowed value?
+ * - Are there more than 10 digits?
+ * (More than 10 digits always overflows the maximum.)
+ */
+ uint64_t __v = uint64_t(__value) * 10 + *__begin++ - _CharT('0');
+ if (__v > __number_max ||
+ (__begin != __end_input && *__begin >= _CharT('0') &&
+ *__begin <= _CharT('9')))
+ __throw_format_error("The numeric value of the format-spec is too large");
+
+ __value = __v;
+ }
+
+ return {__begin, __value};
+}
+
+/**
+ * Multiplexer for all parse functions.
+ *
+ * The parser will return a pointer beyond the last consumed character. This
+ * should be the closing '}' of the arg-id.
+ */
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __parse_number_result<_CharT>
+__parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
+ switch (*__begin) {
+ case _CharT('0'):
+ return __detail::__parse_zero(__begin, __end, __parse_ctx);
+
+ case _CharT(':'):
+ // This case is conditionally valid. It's allowed in an arg-id in the
+ // replacement-field, but not in the std-format-spec. The caller can
+ // provide a better diagnostic, so accept it here unconditionally.
+ case _CharT('}'):
+ return __detail::__parse_automatic(__begin, __end, __parse_ctx);
+ }
+ if (*__begin < _CharT('0') || *__begin > _CharT('9'))
+ __throw_format_error(
+ "The arg-id of the format-spec starts with an invalid character");
+
+ return __detail::__parse_manual(__begin, __end, __parse_ctx);
+}
+
+} // namespace __format
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FORMAT_STRING_H
diff --git a/libcxx/include/__format/format_to_n_result.h b/libcxx/include/__format/format_to_n_result.h
new file mode 100644
index 000000000000..b973dc5c1dfe
--- /dev/null
+++ b/libcxx/include/__format/format_to_n_result.h
@@ -0,0 +1,41 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMAT_TO_N_RESULT_H
+#define _LIBCPP___FORMAT_FORMAT_TO_N_RESULT_H
+
+#include <__config>
+#include <__iterator/incrementable_traits.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template <class _OutIt>
+struct _LIBCPP_TEMPLATE_VIS format_to_n_result {
+ _OutIt out;
+ iter_difference_t<_OutIt> size;
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FORMAT_TO_N_RESULT_H
diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h
new file mode 100644
index 000000000000..1adce75a8611
--- /dev/null
+++ b/libcxx/include/__format/formatter.h
@@ -0,0 +1,268 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_H
+#define _LIBCPP___FORMAT_FORMATTER_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/fill_n.h>
+#include <__availability>
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/format_string.h>
+#include <__format/parser_std_format_spec.h>
+#include <concepts>
+#include <string_view>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// Currently not implemented specializations throw an exception when used. This
+// does not conform to the Standard. However not all Standard defined formatters
+// have been implemented yet. Until that time the current behavior is intended.
+// TODO FMT Disable the default template.
+template <class _Tp, class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter {
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI auto parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+ __throw();
+ }
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI auto format(_Tp, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ __throw();
+ }
+
+private:
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI void __throw() {
+ __throw_format_error("Argument type not implemented yet");
+ }
+};
+
+namespace __format_spec {
+
+_LIBCPP_HIDE_FROM_ABI inline char* __insert_sign(char* __buf, bool __negative,
+ _Flags::_Sign __sign) {
+ if (__negative)
+ *__buf++ = '-';
+ else
+ switch (__sign) {
+ case _Flags::_Sign::__default:
+ case _Flags::_Sign::__minus:
+ // No sign added.
+ break;
+ case _Flags::_Sign::__plus:
+ *__buf++ = '+';
+ break;
+ case _Flags::_Sign::__space:
+ *__buf++ = ' ';
+ break;
+ }
+
+ return __buf;
+}
+
+_LIBCPP_HIDE_FROM_ABI constexpr char __hex_to_upper(char c) {
+ switch (c) {
+ case 'a':
+ return 'A';
+ case 'b':
+ return 'B';
+ case 'c':
+ return 'C';
+ case 'd':
+ return 'D';
+ case 'e':
+ return 'E';
+ case 'f':
+ return 'F';
+ }
+ return c;
+}
+
+} // namespace __format_spec
+
+namespace __formatter {
+
+/** The character types that formatters are specialized for. */
+template <class _CharT>
+concept __char_type = same_as<_CharT, char> || same_as<_CharT, wchar_t>;
+
+struct _LIBCPP_TEMPLATE_VIS __padding_size_result {
+ size_t __before;
+ size_t __after;
+};
+
+_LIBCPP_HIDE_FROM_ABI constexpr __padding_size_result
+__padding_size(size_t __size, size_t __width,
+ __format_spec::_Flags::_Alignment __align) {
+ _LIBCPP_ASSERT(__width > __size,
+ "Don't call this function when no padding is required");
+ _LIBCPP_ASSERT(
+ __align != __format_spec::_Flags::_Alignment::__default,
+ "Caller should adjust the default to the value required by the type");
+
+ size_t __fill = __width - __size;
+ switch (__align) {
+ case __format_spec::_Flags::_Alignment::__default:
+ _LIBCPP_UNREACHABLE();
+
+ case __format_spec::_Flags::_Alignment::__left:
+ return {0, __fill};
+
+ case __format_spec::_Flags::_Alignment::__center: {
+ // The extra padding is divided per [format.string.std]/3
+ // __before = floor(__fill, 2);
+ // __after = ceil(__fill, 2);
+ size_t __before = __fill / 2;
+ size_t __after = __fill - __before;
+ return {__before, __after};
+ }
+ case __format_spec::_Flags::_Alignment::__right:
+ return {__fill, 0};
+ }
+ _LIBCPP_UNREACHABLE();
+}
+
+/**
+ * Writes the input to the output with the required padding.
+ *
+ * Since the output column width is specified the function can be used for
+ * ASCII and Unicode input.
+ *
+ * @pre [@a __first, @a __last) is a valid range.
+ * @pre @a __size <= @a __width. Using this function when this pre-condition
+ * doesn't hold incurs an unwanted overhead.
+ *
+ * @param __out_it The output iterator to write to.
+ * @param __first Pointer to the first element to write.
+ * @param __last Pointer beyond the last element to write.
+ * @param __size The (estimated) output column width. When the elements
+ * to be written are ASCII the following condition holds
+ * @a __size == @a __last - @a __first.
+ * @param __width The number of output columns to write.
+ * @param __fill The character used for the alignment of the output.
+ * TODO FMT Will probably change to support Unicode grapheme
+ * cluster.
+ * @param __alignment The requested alignment.
+ *
+ * @returns An iterator pointing beyond the last element written.
+ *
+ * @note The type of the elements in range [@a __first, @a __last) can differ
+ * from the type of @a __fill. Integer output uses @c std::to_chars for its
+ * conversion, which means the [@a __first, @a __last) always contains elements
+ * of the type @c char.
+ */
+template <class _CharT, class _Fill>
+_LIBCPP_HIDE_FROM_ABI auto
+__write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first,
+ const _CharT* __last, size_t __size, size_t __width, _Fill __fill,
+ __format_spec::_Flags::_Alignment __alignment) -> decltype(__out_it) {
+
+ _LIBCPP_ASSERT(__first <= __last, "Not a valid range");
+ _LIBCPP_ASSERT(__size < __width, "Precondition failure");
+
+ __padding_size_result __padding =
+ __padding_size(__size, __width, __alignment);
+ __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill);
+ __out_it = _VSTD::copy(__first, __last, _VSTD::move(__out_it));
+ return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
+}
+
+/**
+ * @overload
+ *
+ * Uses a transformation operation before writing an element.
+ *
+ * TODO FMT Fill will probably change to support Unicode grapheme cluster.
+ */
+template <class _CharT, class _UnaryOperation, class _Fill>
+_LIBCPP_HIDE_FROM_ABI auto
+__write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first,
+ const _CharT* __last, size_t __size, _UnaryOperation __op,
+ size_t __width, _Fill __fill,
+ __format_spec::_Flags::_Alignment __alignment) -> decltype(__out_it) {
+
+ _LIBCPP_ASSERT(__first <= __last, "Not a valid range");
+ _LIBCPP_ASSERT(__size < __width, "Precondition failure");
+
+ __padding_size_result __padding =
+ __padding_size(__size, __width, __alignment);
+ __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill);
+ __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it), __op);
+ return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
+}
+
+/**
+ * Writes Unicode input to the output with the required padding.
+ *
+ * This function does almost the same as the @ref __write function, but handles
+ * the width estimation of the Unicode input.
+ *
+ * @param __str The range [@a __first, @a __last).
+ * @param __precision The width to truncate the input string to, use @c -1 for
+ * no limit.
+ */
+template <class _CharT, class _Fill>
+_LIBCPP_HIDE_FROM_ABI auto
+__write_unicode(output_iterator<const _CharT&> auto __out_it,
+ basic_string_view<_CharT> __str, ptrdiff_t __width,
+ ptrdiff_t __precision, _Fill __fill,
+ __format_spec::_Flags::_Alignment __alignment)
+ -> decltype(__out_it) {
+
+ // This value changes when there Unicode column width limits the output
+ // size.
+ auto __last = __str.end();
+ if (__width != 0 || __precision != -1) {
+ __format_spec::__string_alignment<_CharT> __format_traits =
+ __format_spec::__get_string_alignment(__str.begin(), __str.end(),
+ __width, __precision);
+
+ if (__format_traits.__align)
+ return __write(_VSTD::move(__out_it), __str.begin(),
+ __format_traits.__last, __format_traits.__size, __width,
+ __fill, __alignment);
+
+ // No alignment required update the output based on the precision.
+ // This might be the same as __str.end().
+ __last = __format_traits.__last;
+ }
+
+ // Copy the input to the output. The output size might be limited by the
+ // precision.
+ return _VSTD::copy(__str.begin(), __last, _VSTD::move(__out_it));
+}
+
+} // namespace __formatter
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMATTER_H
diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h
new file mode 100644
index 000000000000..fdd1d75355d2
--- /dev/null
+++ b/libcxx/include/__format/formatter_bool.h
@@ -0,0 +1,147 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_BOOL_H
+#define _LIBCPP___FORMAT_FORMATTER_BOOL_H
+
+#include <__availability>
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+#include <string_view>
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#include <locale>
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_bool : public __parser_integral<_CharT> {
+public:
+ _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+ auto __it = __parser_integral<_CharT>::__parse(__parse_ctx);
+
+ switch (this->__type) {
+ case _Flags::_Type::__default:
+ this->__type = _Flags::_Type::__string;
+ [[fallthrough]];
+ case _Flags::_Type::__string:
+ this->__handle_bool();
+ break;
+
+ case _Flags::_Type::__char:
+ this->__handle_char();
+ break;
+
+ case _Flags::_Type::__binary_lower_case:
+ case _Flags::_Type::__binary_upper_case:
+ case _Flags::_Type::__octal:
+ case _Flags::_Type::__decimal:
+ case _Flags::_Type::__hexadecimal_lower_case:
+ case _Flags::_Type::__hexadecimal_upper_case:
+ this->__handle_integer();
+ break;
+
+ default:
+ __throw_format_error(
+ "The format-spec type has a type not supported for a bool argument");
+ }
+
+ return __it;
+ }
+};
+
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS __bool_strings;
+
+template <>
+struct _LIBCPP_TEMPLATE_VIS __bool_strings<char> {
+ static constexpr string_view __true{"true"};
+ static constexpr string_view __false{"false"};
+};
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <>
+struct _LIBCPP_TEMPLATE_VIS __bool_strings<wchar_t> {
+ static constexpr wstring_view __true{L"true"};
+ static constexpr wstring_view __false{L"false"};
+};
+#endif
+
+template <class _CharT>
+using __formatter_bool = __formatter_integral<__parser_bool<_CharT>>;
+
+} //namespace __format_spec
+
+// [format.formatter.spec]/2.3
+// For each charT, for each cv-unqualified arithmetic type ArithmeticT other
+// than char, wchar_t, char8_t, char16_t, or char32_t, a specialization
+
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<bool, _CharT>
+ : public __format_spec::__formatter_bool<_CharT> {
+ using _Base = __format_spec::__formatter_bool<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(bool __value, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ if (this->__type != __format_spec::_Flags::_Type::__string)
+ return _Base::format(static_cast<unsigned char>(__value), __ctx);
+
+ if (this->__width_needs_substitution())
+ this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+ if (this->__locale_specific_form) {
+ const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale());
+ basic_string<_CharT> __str = __value ? __np.truename() : __np.falsename();
+ return __formatter::__write_unicode(
+ __ctx.out(), basic_string_view<_CharT>{__str}, this->__width, -1,
+ this->__fill, this->__alignment);
+ }
+#endif
+ basic_string_view<_CharT> __str =
+ __value ? __format_spec::__bool_strings<_CharT>::__true
+ : __format_spec::__bool_strings<_CharT>::__false;
+
+ // The output only uses ASCII so every character is one column.
+ unsigned __size = __str.size();
+ if (__size >= this->__width)
+ return _VSTD::copy(__str.begin(), __str.end(), __ctx.out());
+
+ return __formatter::__write(__ctx.out(), __str.begin(), __str.end(), __size,
+ this->__width, this->__fill, this->__alignment);
+ }
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FORMATTER_BOOL_H
diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h
new file mode 100644
index 000000000000..2131de0778f2
--- /dev/null
+++ b/libcxx/include/__format/formatter_char.h
@@ -0,0 +1,104 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_CHAR_H
+#define _LIBCPP___FORMAT_FORMATTER_CHAR_H
+
+#include <__availability>
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_char : public __parser_integral<_CharT> {
+public:
+ _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+ auto __it = __parser_integral<_CharT>::__parse(__parse_ctx);
+
+ switch (this->__type) {
+ case _Flags::_Type::__default:
+ this->__type = _Flags::_Type::__char;
+ [[fallthrough]];
+ case _Flags::_Type::__char:
+ this->__handle_char();
+ break;
+
+ case _Flags::_Type::__binary_lower_case:
+ case _Flags::_Type::__binary_upper_case:
+ case _Flags::_Type::__octal:
+ case _Flags::_Type::__decimal:
+ case _Flags::_Type::__hexadecimal_lower_case:
+ case _Flags::_Type::__hexadecimal_upper_case:
+ this->__handle_integer();
+ break;
+
+ default:
+ __throw_format_error(
+ "The format-spec type has a type not supported for a char argument");
+ }
+
+ return __it;
+ }
+};
+
+template <class _CharT>
+using __formatter_char = __formatter_integral<__parser_char<_CharT>>;
+
+} // namespace __format_spec
+
+// [format.formatter.spec]/2.1 The specializations
+
+template <>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<char, char>
+ : public __format_spec::__formatter_char<char> {};
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<char, wchar_t>
+ : public __format_spec::__formatter_char<wchar_t> {
+ using _Base = __format_spec::__formatter_char<wchar_t>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(char __value, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ return _Base::format(static_cast<wchar_t>(__value), __ctx);
+ }
+};
+
+template <>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<wchar_t, wchar_t>
+ : public __format_spec::__formatter_char<wchar_t> {};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FORMATTER_CHAR_H
diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h
new file mode 100644
index 000000000000..767df36e61eb
--- /dev/null
+++ b/libcxx/include/__format/formatter_integer.h
@@ -0,0 +1,170 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_INTEGER_H
+#define _LIBCPP___FORMAT_FORMATTER_INTEGER_H
+
+#include <__availability>
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_integer : public __parser_integral<_CharT> {
+public:
+ _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+ auto __it = __parser_integral<_CharT>::__parse(__parse_ctx);
+
+ switch (this->__type) {
+ case _Flags::_Type::__default:
+ this->__type = _Flags::_Type::__decimal;
+ [[fallthrough]];
+
+ case _Flags::_Type::__binary_lower_case:
+ case _Flags::_Type::__binary_upper_case:
+ case _Flags::_Type::__octal:
+ case _Flags::_Type::__decimal:
+ case _Flags::_Type::__hexadecimal_lower_case:
+ case _Flags::_Type::__hexadecimal_upper_case:
+ this->__handle_integer();
+ break;
+
+ case _Flags::_Type::__char:
+ this->__handle_char();
+ break;
+
+ default:
+ __throw_format_error("The format-spec type has a type not supported for "
+ "an integer argument");
+ }
+ return __it;
+ }
+};
+
+template <class _CharT>
+using __formatter_integer = __formatter_integral<__parser_integer<_CharT>>;
+
+} // namespace __format_spec
+
+// [format.formatter.spec]/2.3
+// For each charT, for each cv-unqualified arithmetic type ArithmeticT other
+// than char, wchar_t, char8_t, char16_t, or char32_t, a specialization
+
+// Signed integral types.
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<signed char, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<short, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<int, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<long, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<long long, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+#ifndef _LIBCPP_HAS_NO_INT128
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<__int128_t, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {
+ using _Base = __format_spec::__formatter_integer<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(__int128_t __value, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ // TODO FMT Implement full 128 bit support.
+ using _To = long long;
+ if (__value < numeric_limits<_To>::min() ||
+ __value > numeric_limits<_To>::max())
+ __throw_format_error("128-bit value is outside of implemented range");
+
+ return _Base::format(static_cast<_To>(__value), __ctx);
+ }
+};
+#endif
+
+// Unsigned integral types.
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<unsigned char, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<unsigned short, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<unsigned, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<unsigned long, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<unsigned long long, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {};
+#ifndef _LIBCPP_HAS_NO_INT128
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<__uint128_t, _CharT>
+ : public __format_spec::__formatter_integer<_CharT> {
+ using _Base = __format_spec::__formatter_integer<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(__uint128_t __value, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ // TODO FMT Implement full 128 bit support.
+ using _To = unsigned long long;
+ if (__value < numeric_limits<_To>::min() ||
+ __value > numeric_limits<_To>::max())
+ __throw_format_error("128-bit value is outside of implemented range");
+
+ return _Base::format(static_cast<_To>(__value), __ctx);
+ }
+};
+#endif
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMATTER_INTEGER_H
diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
new file mode 100644
index 000000000000..5f1353effd77
--- /dev/null
+++ b/libcxx/include/__format/formatter_integral.h
@@ -0,0 +1,463 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
+#define _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
+
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/formatter.h>
+#include <__format/parser_std_format_spec.h>
+#include <__algorithm/copy.h>
+#include <__algorithm/copy_n.h>
+#include <__algorithm/fill_n.h>
+#include <__algorithm/transform.h>
+#include <array>
+#include <charconv>
+#include <concepts>
+#include <limits>
+#include <string>
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#include <locale>
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+/**
+ * Integral formatting classes.
+ *
+ * There are two types used here:
+ * * C++-type, the type as used in C++.
+ * * format-type, the output type specified in the std-format-spec.
+ *
+ * Design of the integral formatters consists of several layers.
+ * * @ref __parser_integral The basic std-format-spec parser for all integral
+ * classes. This parser does the basic sanity checks. It also contains some
+ * helper functions that are nice to have available for all parsers.
+ * * A C++-type specific parser. These parsers must derive from
+ * @ref __parser_integral. Their task is to validate whether the parsed
+ * std-format-spec is valid for the C++-type and selected format-type. After
+ * validation they need to make sure all members are properly set. For
+ * example, when the alignment hasn't changed it needs to set the proper
+ * default alignment for the format-type. The following parsers are available:
+ * - @ref __parser_integer
+ * - @ref __parser_char
+ * - @ref __parser_bool
+ * * A general formatter for all integral types @ref __formatter_integral. This
+ * formatter can handle all formatting of integers and characters. The class
+ * derives from the proper formatter.
+ * Note the boolean string format-type isn't supported in this class.
+ * * A typedef C++-type group combining the @ref __formatter_integral with a
+ * parser:
+ * * @ref __formatter_integer
+ * * @ref __formatter_char
+ * * @ref __formatter_bool
+ * * Then every C++-type has its own formatter specializations. They inherit
+ * from the C++-type group typedef. Most specializations need nothing else.
+ * Others need some additional specializations in this class.
+ */
+namespace __format_spec {
+
+/** Wrapper around @ref to_chars, returning the output pointer. */
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last,
+ _Tp __value, int __base) {
+ // TODO FMT Evaluate code overhead due to not calling the internal function
+ // directly. (Should be zero overhead.)
+ to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __base);
+ _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+ return __r.ptr;
+}
+
+/**
+ * Helper to determine the buffer size to output a integer in Base @em x.
+ *
+ * There are several overloads for the supported bases. The function uses the
+ * base as template argument so it can be used in a constant expression.
+ */
+template <unsigned_integral _Tp, size_t _Base>
+_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
+ requires(_Base == 2) {
+ return numeric_limits<_Tp>::digits // The number of binary digits.
+ + 2 // Reserve space for the '0[Bb]' prefix.
+ + 1; // Reserve space for the sign.
+}
+
+template <unsigned_integral _Tp, size_t _Base>
+_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
+ requires(_Base == 8) {
+ return numeric_limits<_Tp>::digits // The number of binary digits.
+ / 3 // Adjust to octal.
+ + 1 // Turn floor to ceil.
+ + 1 // Reserve space for the '0' prefix.
+ + 1; // Reserve space for the sign.
+}
+
+template <unsigned_integral _Tp, size_t _Base>
+_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
+ requires(_Base == 10) {
+ return numeric_limits<_Tp>::digits10 // The floored value.
+ + 1 // Turn floor to ceil.
+ + 1; // Reserve space for the sign.
+}
+
+template <unsigned_integral _Tp, size_t _Base>
+_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
+ requires(_Base == 16) {
+ return numeric_limits<_Tp>::digits // The number of binary digits.
+ / 4 // Adjust to hexadecimal.
+ + 2 // Reserve space for the '0[Xx]' prefix.
+ + 1; // Reserve space for the sign.
+}
+
+/**
+ * Determines the required grouping based on the size of the input.
+ *
+ * The grouping's last element will be repeated. For simplicity this repeating
+ * is unwrapped based on the length of the input. (When the input is short some
+ * groups are not processed.)
+ *
+ * @returns The size of the groups to write. This means the number of
+ * separator characters written is size() - 1.
+ *
+ * @note Since zero-sized groups cause issues they are silently ignored.
+ *
+ * @note The grouping field of the locale is always a @c std::string,
+ * regardless whether the @c std::numpunct's type is @c char or @c wchar_t.
+ */
+_LIBCPP_HIDE_FROM_ABI inline string
+__determine_grouping(ptrdiff_t __size, const string& __grouping) {
+ _LIBCPP_ASSERT(!__grouping.empty() && __size > __grouping[0],
+ "The slow grouping formatting is used while there will be no "
+ "separators written");
+ string __r;
+ auto __end = __grouping.end() - 1;
+ auto __ptr = __grouping.begin();
+
+ while (true) {
+ __size -= *__ptr;
+ if (__size > 0)
+ __r.push_back(*__ptr);
+ else {
+ // __size <= 0 so the value pushed will be <= *__ptr.
+ __r.push_back(*__ptr + __size);
+ return __r;
+ }
+
+ // Proceed to the next group.
+ if (__ptr != __end) {
+ do {
+ ++__ptr;
+ // Skip grouping with a width of 0.
+ } while (*__ptr == 0 && __ptr != __end);
+ }
+ }
+
+ _LIBCPP_UNREACHABLE();
+}
+
+template <class _Parser>
+requires __formatter::__char_type<typename _Parser::char_type>
+class _LIBCPP_TEMPLATE_VIS __formatter_integral : public _Parser {
+public:
+ using _CharT = typename _Parser::char_type;
+
+ template <integral _Tp>
+ _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ if (this->__width_needs_substitution())
+ this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+ if (this->__type == _Flags::_Type::__char)
+ return __format_as_char(__value, __ctx);
+
+ if constexpr (unsigned_integral<_Tp>)
+ return __format_unsigned_integral(__value, false, __ctx);
+ else {
+ // Depending on the std-format-spec string the sign and the value
+ // might not be outputted together:
+ // - alternate form may insert a prefix string.
+ // - zero-padding may insert additional '0' characters.
+ // Therefore the value is processed as a positive unsigned value.
+ // The function @ref __insert_sign will a '-' when the value was negative.
+ auto __r = __to_unsigned_like(__value);
+ bool __negative = __value < 0;
+ if (__negative)
+ __r = __complement(__r);
+
+ return __format_unsigned_integral(__r, __negative, __ctx);
+ }
+ }
+
+private:
+ /** Generic formatting for format-type c. */
+ _LIBCPP_HIDE_FROM_ABI auto __format_as_char(integral auto __value,
+ auto& __ctx)
+ -> decltype(__ctx.out()) {
+ if (this->__alignment == _Flags::_Alignment::__default)
+ this->__alignment = _Flags::_Alignment::__right;
+
+ using _Tp = decltype(__value);
+ if constexpr (!same_as<_CharT, _Tp>) {
+ // cmp_less and cmp_greater can't be used for character types.
+ if constexpr (signed_integral<_CharT> == signed_integral<_Tp>) {
+ if (__value < numeric_limits<_CharT>::min() ||
+ __value > numeric_limits<_CharT>::max())
+ __throw_format_error(
+ "Integral value outside the range of the char type");
+ } else if constexpr (signed_integral<_CharT>) {
+ // _CharT is signed _Tp is unsigned
+ if (__value >
+ static_cast<make_unsigned_t<_CharT>>(numeric_limits<_CharT>::max()))
+ __throw_format_error(
+ "Integral value outside the range of the char type");
+ } else {
+ // _CharT is unsigned _Tp is signed
+ if (__value < 0 || static_cast<make_unsigned_t<_Tp>>(__value) >
+ numeric_limits<_CharT>::max())
+ __throw_format_error(
+ "Integral value outside the range of the char type");
+ }
+ }
+
+ const auto __c = static_cast<_CharT>(__value);
+ return __write(_VSTD::addressof(__c), _VSTD::addressof(__c) + 1,
+ __ctx.out());
+ }
+
+ /**
+ * Generic formatting for format-type bBdoxX.
+ *
+ * This small wrapper allocates a buffer with the required size. Then calls
+ * the real formatter with the buffer and the prefix for the base.
+ */
+ _LIBCPP_HIDE_FROM_ABI auto
+ __format_unsigned_integral(unsigned_integral auto __value, bool __negative,
+ auto& __ctx) -> decltype(__ctx.out()) {
+ switch (this->__type) {
+ case _Flags::_Type::__binary_lower_case: {
+ array<char, __buffer_size<decltype(__value), 2>()> __array;
+ return __format_unsigned_integral(__array.begin(), __array.end(), __value,
+ __negative, 2, __ctx, "0b");
+ }
+ case _Flags::_Type::__binary_upper_case: {
+ array<char, __buffer_size<decltype(__value), 2>()> __array;
+ return __format_unsigned_integral(__array.begin(), __array.end(), __value,
+ __negative, 2, __ctx, "0B");
+ }
+ case _Flags::_Type::__octal: {
+ // Octal is special; if __value == 0 there's no prefix.
+ array<char, __buffer_size<decltype(__value), 8>()> __array;
+ return __format_unsigned_integral(__array.begin(), __array.end(), __value,
+ __negative, 8, __ctx,
+ __value != 0 ? "0" : nullptr);
+ }
+ case _Flags::_Type::__decimal: {
+ array<char, __buffer_size<decltype(__value), 10>()> __array;
+ return __format_unsigned_integral(__array.begin(), __array.end(), __value,
+ __negative, 10, __ctx, nullptr);
+ }
+ case _Flags::_Type::__hexadecimal_lower_case: {
+ array<char, __buffer_size<decltype(__value), 16>()> __array;
+ return __format_unsigned_integral(__array.begin(), __array.end(), __value,
+ __negative, 16, __ctx, "0x");
+ }
+ case _Flags::_Type::__hexadecimal_upper_case: {
+ array<char, __buffer_size<decltype(__value), 16>()> __array;
+ return __format_unsigned_integral(__array.begin(), __array.end(), __value,
+ __negative, 16, __ctx, "0X");
+ }
+ default:
+ _LIBCPP_ASSERT(false, "The parser should have validated the type");
+ _LIBCPP_UNREACHABLE();
+ }
+ }
+
+ template <class _Tp>
+ requires(same_as<char, _Tp> || same_as<wchar_t, _Tp>) _LIBCPP_HIDE_FROM_ABI
+ auto __write(const _Tp* __first, const _Tp* __last, auto __out_it)
+ -> decltype(__out_it) {
+
+ unsigned __size = __last - __first;
+ if (this->__type != _Flags::_Type::__hexadecimal_upper_case) [[likely]] {
+ if (__size >= this->__width)
+ return _VSTD::copy(__first, __last, _VSTD::move(__out_it));
+
+ return __formatter::__write(_VSTD::move(__out_it), __first, __last,
+ __size, this->__width, this->__fill,
+ this->__alignment);
+ }
+
+ // this->__type == _Flags::_Type::__hexadecimal_upper_case
+ // This means all characters in the range [a-f] need to be changed to their
+ // uppercase representation. The transformation is done as transformation
+ // in the output routine instead of before. This avoids another pass over
+ // the data.
+ // TODO FMT See whether it's possible to do this transformation during the
+ // conversion. (This probably requires changing std::to_chars' alphabet.)
+ if (__size >= this->__width)
+ return _VSTD::transform(__first, __last, _VSTD::move(__out_it),
+ __hex_to_upper);
+
+ return __formatter::__write(_VSTD::move(__out_it), __first, __last, __size,
+ __hex_to_upper, this->__width, this->__fill,
+ this->__alignment);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI auto
+ __format_unsigned_integral(char* __begin, char* __end,
+ unsigned_integral auto __value, bool __negative,
+ int __base, auto& __ctx, const char* __prefix)
+ -> decltype(__ctx.out()) {
+ char* __first = __insert_sign(__begin, __negative, this->__sign);
+ if (this->__alternate_form && __prefix)
+ while (*__prefix)
+ *__first++ = *__prefix++;
+
+ char* __last = __to_buffer(__first, __end, __value, __base);
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+ if (this->__locale_specific_form) {
+ const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale());
+ string __grouping = __np.grouping();
+ ptrdiff_t __size = __last - __first;
+ // Writing the grouped form has more overhead than the normal output
+ // routines. If there will be no separators written the locale-specific
+ // form is identical to the normal routine. Test whether to grouped form
+ // is required.
+ if (!__grouping.empty() && __size > __grouping[0])
+ return __format_grouping(__ctx.out(), __begin, __first, __last,
+ __determine_grouping(__size, __grouping),
+ __np.thousands_sep());
+ }
+#endif
+ auto __out_it = __ctx.out();
+ if (this->__alignment != _Flags::_Alignment::__default)
+ __first = __begin;
+ else {
+ // __buf contains [sign][prefix]data
+ // ^ location of __first
+ // The zero padding is done like:
+ // - Write [sign][prefix]
+ // - Write data right aligned with '0' as fill character.
+ __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it));
+ this->__alignment = _Flags::_Alignment::__right;
+ this->__fill = _CharT('0');
+ uint32_t __size = __first - __begin;
+ this->__width -= _VSTD::min(__size, this->__width);
+ }
+
+ return __write(__first, __last, _VSTD::move(__out_it));
+ }
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+ /** Format's the locale-specific form's groupings. */
+ template <class _OutIt, class _CharT>
+ _LIBCPP_HIDE_FROM_ABI _OutIt
+ __format_grouping(_OutIt __out_it, const char* __begin, const char* __first,
+ const char* __last, string&& __grouping, _CharT __sep) {
+
+ // TODO FMT This function duplicates some functionality of the normal
+ // output routines. Evaluate whether these parts can be efficiently
+ // combined with the existing routines.
+
+ unsigned __size = (__first - __begin) + // [sign][prefix]
+ (__last - __first) + // data
+ (__grouping.size() - 1); // number of separator characters
+
+ __formatter::__padding_size_result __padding = {0, 0};
+ if (this->__alignment == _Flags::_Alignment::__default) {
+ // Write [sign][prefix].
+ __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it));
+
+ if (this->__width > __size) {
+ // Write zero padding.
+ __padding.__before = this->__width - __size;
+ __out_it = _VSTD::fill_n(_VSTD::move(__out_it), this->__width - __size,
+ _CharT('0'));
+ }
+ } else {
+ if (this->__width > __size) {
+ // Determine padding and write padding.
+ __padding = __formatter::__padding_size(__size, this->__width,
+ this->__alignment);
+
+ __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before,
+ this->__fill);
+ }
+ // Write [sign][prefix].
+ __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it));
+ }
+
+ auto __r = __grouping.rbegin();
+ auto __e = __grouping.rend() - 1;
+ _LIBCPP_ASSERT(__r != __e, "The slow grouping formatting is used while "
+ "there will be no separators written.");
+ // The output is divided in small groups of numbers to write:
+ // - A group before the first separator.
+ // - A separator and a group, repeated for the number of separators.
+ // - A group after the last separator.
+ // This loop achieves that process by testing the termination condition
+ // midway in the loop.
+ //
+ // TODO FMT This loop evaluates the loop invariant `this->__type !=
+ // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
+ // happens in the __write call.) Benchmark whether making two loops and
+ // hoisting the invariant is worth the effort.
+ while (true) {
+ if (this->__type == _Flags::_Type::__hexadecimal_upper_case) {
+ __last = __first + *__r;
+ __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it),
+ __hex_to_upper);
+ __first = __last;
+ } else {
+ __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it));
+ __first += *__r;
+ }
+
+ if (__r == __e)
+ break;
+
+ ++__r;
+ *__out_it++ = __sep;
+ }
+
+ return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after,
+ this->__fill);
+ }
+#endif // _LIBCPP_HAS_NO_LOCALIZATION
+};
+
+} // namespace __format_spec
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
new file mode 100644
index 000000000000..2be36a1ba947
--- /dev/null
+++ b/libcxx/include/__format/formatter_string.h
@@ -0,0 +1,163 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H
+#define _LIBCPP___FORMAT_FORMATTER_STRING_H
+
+#include <__config>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/format_string.h>
+#include <__format/formatter.h>
+#include <__format/parser_std_format_spec.h>
+#include <algorithm>
+#include <string_view>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatter_string : public __parser_string<_CharT> {
+public:
+ _LIBCPP_HIDE_FROM_ABI auto format(basic_string_view<_CharT> __str,
+ auto& __ctx) -> decltype(__ctx.out()) {
+
+ _LIBCPP_ASSERT(this->__alignment != _Flags::_Alignment::__default,
+ "The parser should not use these defaults");
+
+ if (this->__width_needs_substitution())
+ this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+ if (this->__precision_needs_substitution())
+ this->__substitute_precision_arg_id(__ctx.arg(this->__precision));
+
+ return __formatter::__write_unicode(
+ __ctx.out(), __str, this->__width,
+ this->__has_precision_field() ? this->__precision : -1, this->__fill,
+ this->__alignment);
+ }
+};
+
+} //namespace __format_spec
+
+// [format.formatter.spec]/2.2 For each charT, the string type specializations
+
+// Formatter const char*.
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<const _CharT*, _CharT>
+ : public __format_spec::__formatter_string<_CharT> {
+ using _Base = __format_spec::__formatter_string<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(const _CharT* __str, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ _LIBCPP_ASSERT(__str, "The basic_format_arg constructor should have "
+ "prevented an invalid pointer.");
+
+ // When using a center or right alignment and the width option the length
+ // of __str must be known to add the padding upfront. This case is handled
+ // by the base class by converting the argument to a basic_string_view.
+ //
+ // When using left alignment and the width option the padding is added
+ // after outputting __str so the length can be determined while outputting
+ // __str. The same holds true for the precision, during outputting __str it
+ // can be validated whether the precision threshold has been reached. For
+ // now these optimizations aren't implemented. Instead the base class
+ // handles these options.
+ // TODO FMT Implement these improvements.
+ if (this->__has_width_field() || this->__has_precision_field())
+ return _Base::format(__str, __ctx);
+
+ // No formatting required, copy the string to the output.
+ auto __out_it = __ctx.out();
+ while (*__str)
+ *__out_it++ = *__str++;
+ return __out_it;
+ }
+};
+
+// Formatter char*.
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<_CharT*, _CharT> : public formatter<const _CharT*, _CharT> {
+ using _Base = formatter<const _CharT*, _CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(_CharT* __str, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ return _Base::format(__str, __ctx);
+ }
+};
+
+// Formatter const char[].
+template <class _CharT, size_t _Size>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<const _CharT[_Size], _CharT>
+ : public __format_spec::__formatter_string<_CharT> {
+ using _Base = __format_spec::__formatter_string<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto format(const _CharT __str[_Size], auto& __ctx)
+ -> decltype(__ctx.out()) {
+ return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx);
+ }
+};
+
+// Formatter std::string.
+template <class _CharT, class _Traits, class _Allocator>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
+ formatter<basic_string<_CharT, _Traits, _Allocator>, _CharT>
+ : public __format_spec::__formatter_string<_CharT> {
+ using _Base = __format_spec::__formatter_string<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto
+ format(const basic_string<_CharT, _Traits, _Allocator>& __str, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ // drop _Traits and _Allocator
+ return _Base::format(basic_string_view<_CharT>(__str.data(), __str.size()), __ctx);
+ }
+};
+
+// Formatter std::string_view.
+template <class _CharT, class _Traits>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<basic_string_view<_CharT, _Traits>, _CharT>
+ : public __format_spec::__formatter_string<_CharT> {
+ using _Base = __format_spec::__formatter_string<_CharT>;
+
+ _LIBCPP_HIDE_FROM_ABI auto
+ format(basic_string_view<_CharT, _Traits> __str, auto& __ctx)
+ -> decltype(__ctx.out()) {
+ // drop _Traits
+ return _Base::format(basic_string_view<_CharT>(__str.data(), __str.size()), __ctx);
+ }
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMATTER_STRING_H
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
new file mode 100644
index 000000000000..9b713b811484
--- /dev/null
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -0,0 +1,1184 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
+#define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
+
+#include <__algorithm/find_if.h>
+#include <__algorithm/min.h>
+#include <__config>
+#include <__debug>
+#include <__format/format_arg.h>
+#include <__format/format_error.h>
+#include <__format/format_string.h>
+#include <__variant/monostate.h>
+#include <bit>
+#include <concepts>
+#include <cstdint>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+# if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+/**
+ * Contains the flags for the std-format-spec.
+ *
+ * Some format-options can only be used for specific C++types and may depend on
+ * the selected format-type.
+ * * The C++type filtering can be done using the proper policies for
+ * @ref __parser_std.
+ * * The format-type filtering needs to be done post parsing in the parser
+ * derived from @ref __parser_std.
+ */
+class _LIBCPP_TYPE_VIS _Flags {
+public:
+ enum class _LIBCPP_ENUM_VIS _Alignment : uint8_t {
+ /**
+ * No alignment is set in the format string.
+ *
+ * Zero-padding is ignored when an alignment is selected.
+ * The default alignment depends on the selected format-type.
+ */
+ __default,
+ __left,
+ __center,
+ __right
+ };
+ enum class _LIBCPP_ENUM_VIS _Sign : uint8_t {
+ /**
+ * No sign is set in the format string.
+ *
+ * The sign isn't allowed for certain format-types. By using this value
+ * it's possible to detect whether or not the user explicitly set the sign
+ * flag. For formatting purposes it behaves the same as @ref __minus.
+ */
+ __default,
+ __minus,
+ __plus,
+ __space
+ };
+
+ _Alignment __alignment : 2 {_Alignment::__default};
+ _Sign __sign : 2 {_Sign::__default};
+ uint8_t __alternate_form : 1 {false};
+ uint8_t __zero_padding : 1 {false};
+ uint8_t __locale_specific_form : 1 {false};
+
+ enum class _LIBCPP_ENUM_VIS _Type : uint8_t {
+ __default,
+ __string,
+ __binary_lower_case,
+ __binary_upper_case,
+ __octal,
+ __decimal,
+ __hexadecimal_lower_case,
+ __hexadecimal_upper_case,
+ __pointer,
+ __char,
+ __float_hexadecimal_lower_case,
+ __float_hexadecimal_upper_case,
+ __scientific_lower_case,
+ __scientific_upper_case,
+ __fixed_lower_case,
+ __fixed_upper_case,
+ __general_lower_case,
+ __general_upper_case
+ };
+
+ _Type __type{_Type::__default};
+};
+
+namespace __detail {
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr bool
+__parse_alignment(_CharT __c, _Flags& __flags) noexcept {
+ switch (__c) {
+ case _CharT('<'):
+ __flags.__alignment = _Flags::_Alignment::__left;
+ return true;
+
+ case _CharT('^'):
+ __flags.__alignment = _Flags::_Alignment::__center;
+ return true;
+
+ case _CharT('>'):
+ __flags.__alignment = _Flags::_Alignment::__right;
+ return true;
+ }
+ return false;
+}
+} // namespace __detail
+
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_fill_align {
+public:
+ // TODO FMT The standard doesn't specify this character is a Unicode
+ // character. Validate what fmt and MSVC have implemented.
+ _CharT __fill{_CharT(' ')};
+
+protected:
+ _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+ __parse(const _CharT* __begin, const _CharT* __end, _Flags& __flags) {
+ _LIBCPP_ASSERT(__begin != __end,
+ "When called with an empty input the function will cause "
+ "undefined behavior by evaluating data not in the input");
+ if (__begin + 1 != __end) {
+ if (__detail::__parse_alignment(*(__begin + 1), __flags)) {
+ if (*__begin == _CharT('{') || *__begin == _CharT('}'))
+ __throw_format_error(
+ "The format-spec fill field contains an invalid character");
+ __fill = *__begin;
+ return __begin + 2;
+ }
+ }
+
+ if (__detail::__parse_alignment(*__begin, __flags))
+ return __begin + 1;
+
+ return __begin;
+ }
+};
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__parse_sign(const _CharT* __begin, _Flags& __flags) noexcept {
+ switch (*__begin) {
+ case _CharT('-'):
+ __flags.__sign = _Flags::_Sign::__minus;
+ break;
+ case _CharT('+'):
+ __flags.__sign = _Flags::_Sign::__plus;
+ break;
+ case _CharT(' '):
+ __flags.__sign = _Flags::_Sign::__space;
+ break;
+ default:
+ return __begin;
+ }
+ return __begin + 1;
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__parse_alternate_form(const _CharT* __begin, _Flags& __flags) noexcept {
+ if (*__begin == _CharT('#')) {
+ __flags.__alternate_form = true;
+ ++__begin;
+ }
+
+ return __begin;
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__parse_zero_padding(const _CharT* __begin, _Flags& __flags) noexcept {
+ if (*__begin == _CharT('0')) {
+ __flags.__zero_padding = true;
+ ++__begin;
+ }
+
+ return __begin;
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
+__parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
+ // This function is a wrapper to call the real parser. But it does the
+ // validation for the pre-conditions and post-conditions.
+ if (__begin == __end)
+ __throw_format_error("End of input while parsing format-spec arg-id");
+
+ __format::__parse_number_result __r =
+ __format::__parse_arg_id(__begin, __end, __parse_ctx);
+
+ if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
+ __throw_format_error("A format-spec arg-id should terminate at a '}'");
+
+ ++__r.__ptr;
+ return __r;
+}
+
+template <class _Context>
+_LIBCPP_HIDE_FROM_ABI constexpr uint32_t
+__substitute_arg_id(basic_format_arg<_Context> __arg) {
+ return visit_format_arg(
+ [](auto __arg) -> uint32_t {
+ using _Type = decltype(__arg);
+ if constexpr (integral<_Type>) {
+ if constexpr (signed_integral<_Type>) {
+ if (__arg < 0)
+ __throw_format_error("A format-spec arg-id replacement shouldn't "
+ "have a negative value");
+ }
+
+ using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
+ if (static_cast<_CT>(__arg) >
+ static_cast<_CT>(__format::__number_max))
+ __throw_format_error("A format-spec arg-id replacement exceeds "
+ "the maximum supported value");
+
+ return __arg;
+ } else if constexpr (same_as<_Type, monostate>)
+ __throw_format_error("Argument index out of bounds");
+ else
+ __throw_format_error("A format-spec arg-id replacement argument "
+ "isn't an integral type");
+ },
+ __arg);
+}
+
+class _LIBCPP_TYPE_VIS __parser_width {
+public:
+ /** Contains a width or an arg-id. */
+ uint32_t __width : 31 {0};
+ /** Determines whether the value stored is a width or an arg-id. */
+ uint32_t __width_as_arg : 1 {0};
+
+protected:
+ /**
+ * Does the supplied std-format-spec contain a width field?
+ *
+ * When the field isn't present there's no padding required. This can be used
+ * to optimize the formatting.
+ */
+ constexpr bool __has_width_field() const noexcept {
+ return __width_as_arg || __width;
+ }
+
+ /**
+ * Does the supplied width field contain an arg-id?
+ *
+ * If @c true the formatter needs to call @ref __substitute_width_arg_id.
+ */
+ constexpr bool __width_needs_substitution() const noexcept {
+ return __width_as_arg;
+ }
+
+ template <class _CharT>
+ _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+ __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
+ if (*__begin == _CharT('0'))
+ __throw_format_error(
+ "A format-spec width field shouldn't have a leading zero");
+
+ if (*__begin == _CharT('{')) {
+ __format::__parse_number_result __r =
+ __parse_arg_id(++__begin, __end, __parse_ctx);
+ __width = __r.__value;
+ __width_as_arg = 1;
+ return __r.__ptr;
+ }
+
+ if (*__begin < _CharT('0') || *__begin > _CharT('9'))
+ return __begin;
+
+ __format::__parse_number_result __r =
+ __format::__parse_number(__begin, __end);
+ __width = __r.__value;
+ _LIBCPP_ASSERT(__width != 0,
+ "A zero value isn't allowed and should be impossible, "
+ "due to validations in this function");
+ return __r.__ptr;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_width_arg_id(auto __arg) {
+ _LIBCPP_ASSERT(__width_as_arg == 1,
+ "Substitute width called when no substitution is required");
+
+ // The clearing of the flag isn't required but looks better when debugging
+ // the code.
+ __width_as_arg = 0;
+ __width = __substitute_arg_id(__arg);
+ if (__width == 0)
+ __throw_format_error(
+ "A format-spec width field replacement should have a positive value");
+ }
+};
+
+class _LIBCPP_TYPE_VIS __parser_precision {
+public:
+ /** Contains a precision or an arg-id. */
+ uint32_t __precision : 31 {__format::__number_max};
+ /**
+ * Determines whether the value stored is a precision or an arg-id.
+ *
+ * @note Since @ref __precision == @ref __format::__number_max is a valid
+ * value, the default value contains an arg-id of INT32_MAX. (This number of
+ * arguments isn't supported by compilers.) This is used to detect whether
+ * the std-format-spec contains a precision field.
+ */
+ uint32_t __precision_as_arg : 1 {1};
+
+protected:
+ /**
+ * Does the supplied std-format-spec contain a precision field?
+ *
+ * When the field isn't present there's no truncating required. This can be
+ * used to optimize the formatting.
+ */
+ constexpr bool __has_precision_field() const noexcept {
+
+ return __precision_as_arg == 0 || // Contains a value?
+ __precision != __format::__number_max; // The arg-id is valid?
+ }
+
+ /**
+ * Does the supplied precision field contain an arg-id?
+ *
+ * If @c true the formatter needs to call @ref __substitute_precision_arg_id.
+ */
+ constexpr bool __precision_needs_substitution() const noexcept {
+ return __precision_as_arg && __precision != __format::__number_max;
+ }
+
+ template <class _CharT>
+ _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+ __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
+ if (*__begin != _CharT('.'))
+ return __begin;
+
+ ++__begin;
+ if (__begin == __end)
+ __throw_format_error("End of input while parsing format-spec precision");
+
+ if (*__begin == _CharT('0')) {
+ ++__begin;
+ if (__begin != __end && *__begin >= '0' && *__begin <= '9')
+ __throw_format_error(
+ "A format-spec precision field shouldn't have a leading zero");
+
+ __precision = 0;
+ __precision_as_arg = 0;
+ return __begin;
+ }
+
+ if (*__begin == _CharT('{')) {
+ __format::__parse_number_result __arg_id =
+ __parse_arg_id(++__begin, __end, __parse_ctx);
+ _LIBCPP_ASSERT(__arg_id.__value != __format::__number_max,
+ "Unsupported number of arguments, since this number of "
+ "arguments is used a special value");
+ __precision = __arg_id.__value;
+ return __arg_id.__ptr;
+ }
+
+ if (*__begin < _CharT('0') || *__begin > _CharT('9'))
+ __throw_format_error(
+ "The format-spec precision field doesn't contain a value or arg-id");
+
+ __format::__parse_number_result __r =
+ __format::__parse_number(__begin, __end);
+ __precision = __r.__value;
+ __precision_as_arg = 0;
+ return __r.__ptr;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr void __substitute_precision_arg_id(
+ auto __arg) {
+ _LIBCPP_ASSERT(
+ __precision_as_arg == 1 && __precision != __format::__number_max,
+ "Substitute precision called when no substitution is required");
+
+ // The clearing of the flag isn't required but looks better when debugging
+ // the code.
+ __precision_as_arg = 0;
+ __precision = __substitute_arg_id(__arg);
+ }
+};
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__parse_locale_specific_form(const _CharT* __begin, _Flags& __flags) noexcept {
+ if (*__begin == _CharT('L')) {
+ __flags.__locale_specific_form = true;
+ ++__begin;
+ }
+
+ return __begin;
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__parse_type(const _CharT* __begin, _Flags& __flags) {
+
+ // Determines the type. It does not validate whether the selected type is
+ // valid. Most formatters have optional fields that are only allowed for
+ // certain types. These parsers need to do validation after the type has
+ // been parsed. So its easier to implement the validation for all types in
+ // the specific parse function.
+ switch (*__begin) {
+ case 'A':
+ __flags.__type = _Flags::_Type::__float_hexadecimal_upper_case;
+ break;
+ case 'B':
+ __flags.__type = _Flags::_Type::__binary_upper_case;
+ break;
+ case 'E':
+ __flags.__type = _Flags::_Type::__scientific_upper_case;
+ break;
+ case 'F':
+ __flags.__type = _Flags::_Type::__fixed_upper_case;
+ break;
+ case 'G':
+ __flags.__type = _Flags::_Type::__general_upper_case;
+ break;
+ case 'X':
+ __flags.__type = _Flags::_Type::__hexadecimal_upper_case;
+ break;
+ case 'a':
+ __flags.__type = _Flags::_Type::__float_hexadecimal_lower_case;
+ break;
+ case 'b':
+ __flags.__type = _Flags::_Type::__binary_lower_case;
+ break;
+ case 'c':
+ __flags.__type = _Flags::_Type::__char;
+ break;
+ case 'd':
+ __flags.__type = _Flags::_Type::__decimal;
+ break;
+ case 'e':
+ __flags.__type = _Flags::_Type::__scientific_lower_case;
+ break;
+ case 'f':
+ __flags.__type = _Flags::_Type::__fixed_lower_case;
+ break;
+ case 'g':
+ __flags.__type = _Flags::_Type::__general_lower_case;
+ break;
+ case 'o':
+ __flags.__type = _Flags::_Type::__octal;
+ break;
+ case 'p':
+ __flags.__type = _Flags::_Type::__pointer;
+ break;
+ case 's':
+ __flags.__type = _Flags::_Type::__string;
+ break;
+ case 'x':
+ __flags.__type = _Flags::_Type::__hexadecimal_lower_case;
+ break;
+ default:
+ return __begin;
+ }
+ return ++__begin;
+}
+
+/**
+ * The parser for the std-format-spec.
+ *
+ * [format.string.std]/1 specifies the std-format-spec:
+ * fill-and-align sign # 0 width precision L type
+ *
+ * All these fields are optional. Whether these fields can be used depend on:
+ * - The type supplied to the format string.
+ * E.g. A string never uses the sign field so the field may not be set.
+ * This constrain is validated by the parsers in this file.
+ * - The supplied value for the optional type field.
+ * E.g. A int formatted as decimal uses the sign field.
+ * When formatted as a char the sign field may no longer be set.
+ * This constrain isn't validated by the parsers in this file.
+ *
+ * The base classes are ordered to minimize the amount of padding.
+ *
+ * This implements the parser for the string types.
+ */
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_string
+ : public __parser_width, // provides __width(|as_arg)
+ public __parser_precision, // provides __precision(|as_arg)
+ public __parser_fill_align<_CharT>, // provides __fill and uses __flags
+ public _Flags // provides __flags
+{
+public:
+ using char_type = _CharT;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __parser_string() {
+ this->__alignment = _Flags::_Alignment::__left;
+ }
+
+ /**
+ * The low-level std-format-spec parse function.
+ *
+ * @pre __begin points at the beginning of the std-format-spec. This means
+ * directly after the ':'.
+ * @pre The std-format-spec parses the entire input, or the first unmatched
+ * character is a '}'.
+ *
+ * @returns The iterator pointing at the last parsed character.
+ */
+ _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+ auto __it = __parse(__parse_ctx);
+ __process_display_type();
+ return __it;
+ }
+
+private:
+ /**
+ * Parses the std-format-spec.
+ *
+ * @throws __throw_format_error When @a __parse_ctx contains an ill-formed
+ * std-format-spec.
+ *
+ * @returns An iterator to the end of input or point at the closing '}'.
+ */
+ _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+
+ auto __begin = __parse_ctx.begin();
+ auto __end = __parse_ctx.end();
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
+ static_cast<_Flags&>(*this));
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
+
+ if (__begin != __end && *__begin != _CharT('}'))
+ __throw_format_error(
+ "The format-spec should consume the input or end with a '}'");
+
+ return __begin;
+ }
+
+ /** Processes the parsed std-format-spec based on the parsed display type. */
+ _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
+ switch (this->__type) {
+ case _Flags::_Type::__default:
+ case _Flags::_Type::__string:
+ break;
+
+ default:
+ __throw_format_error("The format-spec type has a type not supported for "
+ "a string argument");
+ }
+ }
+};
+
+/**
+ * The parser for the std-format-spec.
+ *
+ * This implements the parser for the integral types. This includes the
+ * character type and boolean type.
+ *
+ * See @ref __parser_string.
+ */
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_integral
+ : public __parser_width, // provides __width(|as_arg)
+ public __parser_fill_align<_CharT>, // provides __fill and uses __flags
+ public _Flags // provides __flags
+{
+public:
+ using char_type = _CharT;
+
+protected:
+ /**
+ * The low-level std-format-spec parse function.
+ *
+ * @pre __begin points at the beginning of the std-format-spec. This means
+ * directly after the ':'.
+ * @pre The std-format-spec parses the entire input, or the first unmatched
+ * character is a '}'.
+ *
+ * @returns The iterator pointing at the last parsed character.
+ */
+ _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
+ -> decltype(__parse_ctx.begin()) {
+ auto __begin = __parse_ctx.begin();
+ auto __end = __parse_ctx.end();
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
+ static_cast<_Flags&>(*this));
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
+ if (__begin == __end)
+ return __begin;
+
+ __begin =
+ __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
+ if (__begin == __end)
+ return __begin;
+
+ __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
+
+ if (__begin != __end && *__begin != _CharT('}'))
+ __throw_format_error(
+ "The format-spec should consume the input or end with a '}'");
+
+ return __begin;
+ }
+
+ /**
+ * Handles the post-parsing updates for the integer types.
+ *
+ * Updates the zero-padding and alignment for integer types.
+ *
+ * [format.string.std]/13
+ * If the 0 character and an align option both appear, the 0 character is
+ * ignored.
+ *
+ * For the formatter a @ref __default alignment means zero-padding. Update
+ * the alignment based on parsed format string.
+ */
+ _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept {
+ this->__zero_padding &= this->__alignment == _Flags::_Alignment::__default;
+ if (!this->__zero_padding &&
+ this->__alignment == _Flags::_Alignment::__default)
+ this->__alignment = _Flags::_Alignment::__right;
+ }
+
+ /**
+ * Handles the post-parsing updates for the character types.
+ *
+ * Sets the alignment and validates the format flags set for a character type.
+ *
+ * At the moment the validation for a character and a Boolean behave the
+ * same, but this may change in the future.
+ * Specifically at the moment the locale-specific form is allowed for the
+ * char output type, but it has no effect on the output.
+ */
+ _LIBCPP_HIDE_FROM_ABI constexpr void __handle_char() { __handle_bool(); }
+
+ /**
+ * Handles the post-parsing updates for the Boolean types.
+ *
+ * Sets the alignment and validates the format flags set for a Boolean type.
+ */
+ _LIBCPP_HIDE_FROM_ABI constexpr void __handle_bool() {
+ if (this->__sign != _Flags::_Sign::__default)
+ __throw_format_error("A sign field isn't allowed in this format-spec");
+
+ if (this->__alternate_form)
+ __throw_format_error(
+ "An alternate form field isn't allowed in this format-spec");
+
+ if (this->__zero_padding)
+ __throw_format_error(
+ "A zero-padding field isn't allowed in this format-spec");
+
+ if (this->__alignment == _Flags::_Alignment::__default)
+ this->__alignment = _Flags::_Alignment::__left;
+ }
+};
+
+// TODO FMT Add a parser for floating-point values.
+// TODO FMT Add a parser for pointer values.
+
+/** Helper struct returned from @ref __get_string_alignment. */
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS __string_alignment {
+ /** Points beyond the last character to write to the output. */
+ const _CharT* __last;
+ /**
+ * The estimated number of columns in the output or 0.
+ *
+ * Only when the output needs to be aligned it's required to know the exact
+ * number of columns in the output. So if the formatted output has only a
+ * minimum width the exact size isn't important. It's only important to know
+ * the minimum has been reached. The minimum width is the width specified in
+ * the format-spec.
+ *
+ * For example in this code @code std::format("{:10}", MyString); @endcode
+ * the width estimation can stop once the algorithm has determined the output
+ * width is 10 columns.
+ *
+ * So if:
+ * * @ref __align == @c true the @ref __size is the estimated number of
+ * columns required.
+ * * @ref __align == @c false the @ref __size is the estimated number of
+ * columns required or 0 when the estimation algorithm stopped prematurely.
+ */
+ ptrdiff_t __size;
+ /**
+ * Does the output need to be aligned.
+ *
+ * When alignment is needed the output algorithm needs to add the proper
+ * padding. Else the output algorithm just needs to copy the input up to
+ * @ref __last.
+ */
+ bool __align;
+};
+
+#ifndef _LIBCPP_HAS_NO_UNICODE
+namespace __detail {
+
+/**
+ * Unicode column width estimates.
+ *
+ * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
+ * Depending on format the relation between the number of code units stored and
+ * the number of output columns differs. The first relation is the number of
+ * code units forming a code point. (The text assumes the code units are
+ * unsigned.)
+ * - UTF-8 The number of code units is between one and four. The first 127
+ * Unicode code points match the ASCII character set. When the highest bit is
+ * set it means the code point has more than one code unit.
+ * - UTF-16: The number of code units is between 1 and 2. When the first
+ * code unit is in the range [0xd800,0xdfff) it means the code point uses two
+ * code units.
+ * - UTF-32: The number of code units is always one.
+ *
+ * The code point to the number of columns isn't well defined. The code uses the
+ * estimations defined in [format.string.std]/11. This list might change in the
+ * future.
+ *
+ * The algorithm of @ref __get_string_alignment uses two different scanners:
+ * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes
+ * 1 code unit is 1 column. This scanner stops when it can't be sure the
+ * assumption is valid:
+ * - UTF-8 when the code point is encoded in more than 1 code unit.
+ * - UTF-16 and UTF-32 when the first multi-column code point is encountered.
+ * (The code unit's value is lower than 0xd800 so the 2 code unit encoding
+ * is irrelevant for this scanner.)
+ * Due to these assumptions the scanner is faster than the full scanner. It
+ * can process all text only containing ASCII. For UTF-16/32 it can process
+ * most (all?) European languages. (Note the set it can process might be
+ * reduced in the future, due to updates in the scanning rules.)
+ * - The full scanner @ref __estimate_column_width. This scanner, if needed,
+ * converts multiple code units into one code point then converts the code
+ * point to a column width.
+ *
+ * See also:
+ * - [format.string.general]/11
+ * - https://en.wikipedia.org/wiki/UTF-8#Encoding
+ * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
+ */
+
+/**
+ * The first 2 column code point.
+ *
+ * This is the point where the fast UTF-16/32 scanner needs to stop processing.
+ */
+inline constexpr uint32_t __two_column_code_point = 0x1100;
+
+/** Helper concept for an UTF-8 character type. */
+template <class _CharT>
+concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>;
+
+/** Helper concept for an UTF-16 character type. */
+template <class _CharT>
+concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>;
+
+/** Helper concept for an UTF-32 character type. */
+template <class _CharT>
+concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>;
+
+/** Helper concept for an UTF-16 or UTF-32 character type. */
+template <class _CharT>
+concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>;
+
+/**
+ * Converts a code point to the column width.
+ *
+ * The estimations are conforming to [format.string.general]/11
+ *
+ * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
+ * character.
+ */
+_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept {
+ _LIBCPP_ASSERT(__c < 0x1'0000,
+ "Use __column_width_4 or __column_width for larger values");
+
+ // clang-format off
+ return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
+ (__c >= 0x2329 && (__c <= 0x232a ||
+ (__c >= 0x2e80 && (__c <= 0x303e ||
+ (__c >= 0x3040 && (__c <= 0xa4cf ||
+ (__c >= 0xac00 && (__c <= 0xd7a3 ||
+ (__c >= 0xf900 && (__c <= 0xfaff ||
+ (__c >= 0xfe10 && (__c <= 0xfe19 ||
+ (__c >= 0xfe30 && (__c <= 0xfe6f ||
+ (__c >= 0xff00 && (__c <= 0xff60 ||
+ (__c >= 0xffe0 && (__c <= 0xffe6
+ ))))))))))))))))))));
+ // clang-format on
+}
+
+/**
+ * @overload
+ *
+ * This version expects a value greater than or equal to 0x1'0000, which is a
+ * 4-byte UTF-8 character.
+ */
+_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept {
+ _LIBCPP_ASSERT(__c >= 0x1'0000,
+ "Use __column_width_3 or __column_width for smaller values");
+
+ // clang-format off
+ return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
+ (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
+ (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
+ (__c >= 0x3'0000 && (__c <= 0x3'fffd
+ ))))))));
+ // clang-format on
+}
+
+/**
+ * @overload
+ *
+ * The general case, accepting all values.
+ */
+_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept {
+ if (__c < 0x1'0000)
+ return __column_width_3(__c);
+
+ return __column_width_4(__c);
+}
+
+/**
+ * Estimate the column width for the UTF-8 sequence using the fast algorithm.
+ */
+template <__utf8_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__estimate_column_width_fast(const _CharT* __first,
+ const _CharT* __last) noexcept {
+ return _VSTD::find_if(__first, __last,
+ [](unsigned char __c) { return __c & 0x80; });
+}
+
+/**
+ * @overload
+ *
+ * The implementation for UTF-16/32.
+ */
+template <__utf16_or_32_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__estimate_column_width_fast(const _CharT* __first,
+ const _CharT* __last) noexcept {
+ return _VSTD::find_if(__first, __last,
+ [](uint32_t __c) { return __c >= 0x1100; });
+}
+
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS __column_width_result {
+ /** The number of output columns. */
+ size_t __width;
+ /**
+ * The last parsed element.
+ *
+ * This limits the original output to fit in the wanted number of columns.
+ */
+ const _CharT* __ptr;
+};
+
+/**
+ * Small helper to determine the width of malformed Unicode.
+ *
+ * @note This function's only needed for UTF-8. During scanning UTF-8 there
+ * are multiple place where it can be detected that the Unicode is malformed.
+ * UTF-16 only requires 1 test and UTF-32 requires no testing.
+ */
+template <__utf8_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width_malformed(const _CharT* __first, const _CharT* __last,
+ size_t __maximum, size_t __result) noexcept {
+ size_t __size = __last - __first;
+ size_t __n = _VSTD::min(__size, __maximum);
+ return {__result + __n, __first + __n};
+}
+
+/**
+ * Determines the number of output columns needed to render the input.
+ *
+ * @note When the scanner encounters malformed Unicode it acts as-if every code
+ * unit at the end of the input is one output column. It's expected the output
+ * terminal will replace these malformed code units with a one column
+ * replacement characters.
+ *
+ * @param __first Points to the first element of the input range.
+ * @param __last Points beyond the last element of the input range.
+ * @param __maximum The maximum number of output columns. The returned number
+ * of estimated output columns will not exceed this value.
+ */
+template <__utf8_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width(const _CharT* __first, const _CharT* __last,
+ size_t __maximum) noexcept {
+ size_t __result = 0;
+
+ while (__first != __last) {
+ // Based on the number of leading 1 bits the number of code units in the
+ // code point can be determined. See
+ // https://en.wikipedia.org/wiki/UTF-8#Encoding
+ switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) {
+ case 0: // 1-code unit encoding: all 1 column
+ ++__result;
+ ++__first;
+ break;
+
+ case 2: // 2-code unit encoding: all 1 column
+ // Malformed Unicode.
+ if (__last - __first < 2) [[unlikely]]
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ __first += 2;
+ ++__result;
+ break;
+
+ case 3: // 3-code unit encoding: either 1 or 2 columns
+ // Malformed Unicode.
+ if (__last - __first < 3) [[unlikely]]
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ {
+ uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __result += __column_width_3(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first - 3};
+ }
+ break;
+ case 4: // 4-code unit encoding: either 1 or 2 columns
+ // Malformed Unicode.
+ if (__last - __first < 4) [[unlikely]]
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ {
+ uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __result += __column_width_4(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first - 4};
+ }
+ break;
+ default:
+ // Malformed Unicode.
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ }
+
+ if (__result >= __maximum)
+ return {__result, __first};
+ }
+ return {__result, __first};
+}
+
+template <__utf16_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width(const _CharT* __first, const _CharT* __last,
+ size_t __maximum) noexcept {
+ size_t __result = 0;
+
+ while (__first != __last) {
+ uint32_t __c = *__first;
+ // Is the code unit part of a surrogate pair? See
+ // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
+ if (__c >= 0xd800 && __c <= 0xDfff) {
+ // Malformed Unicode.
+ if (__last - __first < 2) [[unlikely]]
+ return {__result + 1, __first + 1};
+
+ __c -= 0xd800;
+ __c <<= 10;
+ __c += (*(__first + 1) - 0xdc00);
+ __c += 0x10'000;
+
+ __result += __column_width_4(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first};
+ __first += 2;
+ } else {
+ __result += __column_width_3(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first};
+ ++__first;
+ }
+
+ if (__result >= __maximum)
+ return {__result, __first};
+ }
+
+ return {__result, __first};
+}
+
+template <__utf32_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width(const _CharT* __first, const _CharT* __last,
+ size_t __maximum) noexcept {
+ size_t __result = 0;
+
+ while (__first != __last) {
+ wchar_t __c = *__first;
+ __result += __column_width(__c);
+
+ if (__result > __maximum)
+ return {__result - 2, __first};
+
+ ++__first;
+ if (__result >= __maximum)
+ return {__result, __first};
+ }
+
+ return {__result, __first};
+}
+
+} // namespace __detail
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
+__get_string_alignment(const _CharT* __first, const _CharT* __last,
+ ptrdiff_t __width, ptrdiff_t __precision) noexcept {
+ _LIBCPP_ASSERT(__width != 0 || __precision != -1,
+ "The function has no effect and shouldn't be used");
+
+ // TODO FMT There might be more optimizations possible:
+ // If __precision == __format::__number_max and the encoding is:
+ // * UTF-8 : 4 * (__last - __first) >= __width
+ // * UTF-16 : 2 * (__last - __first) >= __width
+ // * UTF-32 : (__last - __first) >= __width
+ // In these cases it's certain the output is at least the requested width.
+ // It's unknown how often this happens in practice. For now the improvement
+ // isn't implemented.
+
+ /*
+ * First assume there are no special Unicode code units in the input.
+ * - Apply the precision (this may reduce the size of the input). When
+ * __precison == -1 this step is omitted.
+ * - Scan for special code units in the input.
+ * If our assumption was correct the __pos will be at the end of the input.
+ */
+ const ptrdiff_t __length = __last - __first;
+ const _CharT* __limit =
+ __first +
+ (__precision == -1 ? __length : _VSTD::min(__length, __precision));
+ ptrdiff_t __size = __limit - __first;
+ const _CharT* __pos =
+ __detail::__estimate_column_width_fast(__first, __limit);
+
+ if (__pos == __limit)
+ return {__limit, __size, __size < __width};
+
+ /*
+ * Our assumption was wrong, there are special Unicode code units.
+ * The range [__first, __pos) contains a set of code units with the
+ * following property:
+ * Every _CharT in the range will be rendered in 1 column.
+ *
+ * If there's no maximum width and the parsed size already exceeds the
+ * minimum required width. The real size isn't important. So bail out.
+ */
+ if (__precision == -1 && (__pos - __first) >= __width)
+ return {__last, 0, false};
+
+ /* If there's a __precision, truncate the output to that width. */
+ ptrdiff_t __prefix = __pos - __first;
+ if (__precision != -1) {
+ _LIBCPP_ASSERT(__precision > __prefix, "Logic error.");
+ auto __lengh_info = __detail::__estimate_column_width(
+ __pos, __last, __precision - __prefix);
+ __size = __lengh_info.__width + __prefix;
+ return {__lengh_info.__ptr, __size, __size < __width};
+ }
+
+ /* Else use __width to determine the number of required padding characters. */
+ _LIBCPP_ASSERT(__width > __prefix, "Logic error.");
+ /*
+ * The column width is always one or two columns. For the precision the wanted
+ * column width is the maximum, for the width it's the minimum. Using the
+ * width estimation with its truncating behavior will result in the wrong
+ * result in the following case:
+ * - The last code unit processed requires two columns and exceeds the
+ * maximum column width.
+ * By increasing the __maximum by one avoids this issue. (It means it may
+ * pass one code point more than required to determine the proper result;
+ * that however isn't a problem for the algorithm.)
+ */
+ size_t __maximum = 1 + __width - __prefix;
+ auto __lengh_info =
+ __detail::__estimate_column_width(__pos, __last, __maximum);
+ if (__lengh_info.__ptr != __last) {
+ // Consumed the width number of code units. The exact size of the string
+ // is unknown. We only know we don't need to align the output.
+ _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >=
+ __width,
+ "Logic error");
+ return {__last, 0, false};
+ }
+
+ __size = __lengh_info.__width + __prefix;
+ return {__last, __size, __size < __width};
+}
+#else // _LIBCPP_HAS_NO_UNICODE
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
+__get_string_alignment(const _CharT* __first, const _CharT* __last,
+ ptrdiff_t __width, ptrdiff_t __precision) noexcept {
+ const ptrdiff_t __length = __last - __first;
+ const _CharT* __limit =
+ __first +
+ (__precision == -1 ? __length : _VSTD::min(__length, __precision));
+ ptrdiff_t __size = __limit - __first;
+ return {__limit, __size, __size < __width};
+}
+#endif // _LIBCPP_HAS_NO_UNICODE
+
+} // namespace __format_spec
+
+# endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
diff --git a/libcxx/include/__function_like.h b/libcxx/include/__function_like.h
index 8a3597bacdcd..4075355174d9 100644
--- a/libcxx/include/__function_like.h
+++ b/libcxx/include/__function_like.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -51,6 +48,4 @@ protected:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_FUNCTION_LIKE_H
diff --git a/libcxx/include/__functional/bind.h b/libcxx/include/__functional/bind.h
index 79dfad723c68..0b74d91b7746 100644
--- a/libcxx/include/__functional/bind.h
+++ b/libcxx/include/__functional/bind.h
@@ -29,7 +29,7 @@ template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_bind_expression
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t is_bind_expression_v = is_bind_expression<_Tp>::value;
+inline constexpr size_t is_bind_expression_v = is_bind_expression<_Tp>::value;
#endif
template<class _Tp> struct __is_placeholder : public integral_constant<int, 0> {};
@@ -38,7 +38,7 @@ template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_placeholder
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t is_placeholder_v = is_placeholder<_Tp>::value;
+inline constexpr size_t is_placeholder_v = is_placeholder<_Tp>::value;
#endif
namespace placeholders
@@ -58,16 +58,16 @@ _LIBCPP_FUNC_VIS extern const __ph<8> _8;
_LIBCPP_FUNC_VIS extern const __ph<9> _9;
_LIBCPP_FUNC_VIS extern const __ph<10> _10;
#else
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<1> _1{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<2> _2{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<3> _3{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<4> _4{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<5> _5{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<6> _6{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<7> _7{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<8> _8{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<9> _9{};
-/* _LIBCPP_INLINE_VAR */ constexpr __ph<10> _10{};
+/* inline */ constexpr __ph<1> _1{};
+/* inline */ constexpr __ph<2> _2{};
+/* inline */ constexpr __ph<3> _3{};
+/* inline */ constexpr __ph<4> _4{};
+/* inline */ constexpr __ph<5> _5{};
+/* inline */ constexpr __ph<6> _6{};
+/* inline */ constexpr __ph<7> _7{};
+/* inline */ constexpr __ph<8> _8{};
+/* inline */ constexpr __ph<9> _9{};
+/* inline */ constexpr __ph<10> _10{};
#endif // defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY)
} // placeholders
@@ -97,7 +97,7 @@ __mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>)
template <class _Ti, class ..._Uj>
inline _LIBCPP_INLINE_VISIBILITY
-typename _EnableIf
+typename __enable_if_t
<
is_bind_expression<_Ti>::value,
__invoke_of<_Ti&, _Uj...>
diff --git a/libcxx/include/__functional/bind_back.h b/libcxx/include/__functional/bind_back.h
new file mode 100644
index 000000000000..a0089e1fb090
--- /dev/null
+++ b/libcxx/include/__functional/bind_back.h
@@ -0,0 +1,65 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FUNCTIONAL_BIND_BACK_H
+#define _LIBCPP___FUNCTIONAL_BIND_BACK_H
+
+#include <__config>
+#include <__functional/invoke.h>
+#include <__functional/perfect_forward.h>
+#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
+#include <tuple>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+template <size_t _NBound, class = make_index_sequence<_NBound>>
+struct __bind_back_op;
+
+template <size_t _NBound, size_t ..._Ip>
+struct __bind_back_op<_NBound, index_sequence<_Ip...>> {
+ template <class _Fn, class _Bound, class ..._Args>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Fn&& __f, _Bound&& __bound, _Args&& ...__args) const
+ noexcept(noexcept(_VSTD::invoke(_VSTD::forward<_Fn>(__f), _VSTD::forward<_Args>(__args)..., _VSTD::get<_Ip>(_VSTD::forward<_Bound>(__bound))...)))
+ -> decltype( _VSTD::invoke(_VSTD::forward<_Fn>(__f), _VSTD::forward<_Args>(__args)..., _VSTD::get<_Ip>(_VSTD::forward<_Bound>(__bound))...))
+ { return _VSTD::invoke(_VSTD::forward<_Fn>(__f), _VSTD::forward<_Args>(__args)..., _VSTD::get<_Ip>(_VSTD::forward<_Bound>(__bound))...); }
+};
+
+template <class _Fn, class _BoundArgs>
+struct __bind_back_t : __perfect_forward<__bind_back_op<tuple_size_v<_BoundArgs>>, _Fn, _BoundArgs> {
+ using __perfect_forward<__bind_back_op<tuple_size_v<_BoundArgs>>, _Fn, _BoundArgs>::__perfect_forward;
+};
+
+template <class _Fn, class ..._Args, class = enable_if_t<
+ _And<
+ is_constructible<decay_t<_Fn>, _Fn>,
+ is_move_constructible<decay_t<_Fn>>,
+ is_constructible<decay_t<_Args>, _Args>...,
+ is_move_constructible<decay_t<_Args>>...
+ >::value
+>>
+_LIBCPP_HIDE_FROM_ABI
+constexpr auto __bind_back(_Fn&& __f, _Args&&... __args)
+ noexcept(noexcept(__bind_back_t<decay_t<_Fn>, tuple<decay_t<_Args>...>>(_VSTD::forward<_Fn>(__f), _VSTD::forward_as_tuple(_VSTD::forward<_Args>(__args)...))))
+ -> decltype( __bind_back_t<decay_t<_Fn>, tuple<decay_t<_Args>...>>(_VSTD::forward<_Fn>(__f), _VSTD::forward_as_tuple(_VSTD::forward<_Args>(__args)...)))
+ { return __bind_back_t<decay_t<_Fn>, tuple<decay_t<_Args>...>>(_VSTD::forward<_Fn>(__f), _VSTD::forward_as_tuple(_VSTD::forward<_Args>(__args)...)); }
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FUNCTIONAL_BIND_BACK_H
diff --git a/libcxx/include/__functional/bind_front.h b/libcxx/include/__functional/bind_front.h
index 8690499f2b0c..86d4594b6571 100644
--- a/libcxx/include/__functional/bind_front.h
+++ b/libcxx/include/__functional/bind_front.h
@@ -24,25 +24,31 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
-struct __bind_front_op
-{
- template<class... _Args>
- constexpr static auto __call(_Args&&... __args)
- noexcept(noexcept(_VSTD::invoke(_VSTD::forward<_Args>(__args)...)))
- -> decltype( _VSTD::invoke(_VSTD::forward<_Args>(__args)...))
- { return _VSTD::invoke(_VSTD::forward<_Args>(__args)...); }
+struct __bind_front_op {
+ template <class ..._Args>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Args&& ...__args) const
+ noexcept(noexcept(_VSTD::invoke(_VSTD::forward<_Args>(__args)...)))
+ -> decltype( _VSTD::invoke(_VSTD::forward<_Args>(__args)...))
+ { return _VSTD::invoke(_VSTD::forward<_Args>(__args)...); }
};
-template<class _Fn, class... _Args,
- class = _EnableIf<conjunction<is_constructible<decay_t<_Fn>, _Fn>,
- is_move_constructible<decay_t<_Fn>>,
- is_constructible<decay_t<_Args>, _Args>...,
- is_move_constructible<decay_t<_Args>>...
- >::value>>
-constexpr auto bind_front(_Fn&& __f, _Args&&... __args)
-{
- return __perfect_forward<__bind_front_op, _Fn, _Args...>(_VSTD::forward<_Fn>(__f),
- _VSTD::forward<_Args>(__args)...);
+template <class _Fn, class ..._BoundArgs>
+struct __bind_front_t : __perfect_forward<__bind_front_op, _Fn, _BoundArgs...> {
+ using __perfect_forward<__bind_front_op, _Fn, _BoundArgs...>::__perfect_forward;
+};
+
+template <class _Fn, class... _Args, class = enable_if_t<
+ _And<
+ is_constructible<decay_t<_Fn>, _Fn>,
+ is_move_constructible<decay_t<_Fn>>,
+ is_constructible<decay_t<_Args>, _Args>...,
+ is_move_constructible<decay_t<_Args>>...
+ >::value
+>>
+_LIBCPP_HIDE_FROM_ABI
+constexpr auto bind_front(_Fn&& __f, _Args&&... __args) {
+ return __bind_front_t<decay_t<_Fn>, decay_t<_Args>...>(_VSTD::forward<_Fn>(__f), _VSTD::forward<_Args>(__args)...);
}
#endif // _LIBCPP_STD_VER > 17
diff --git a/libcxx/include/__functional/compose.h b/libcxx/include/__functional/compose.h
new file mode 100644
index 000000000000..d9d75875c2a5
--- /dev/null
+++ b/libcxx/include/__functional/compose.h
@@ -0,0 +1,52 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FUNCTIONAL_COMPOSE_H
+#define _LIBCPP___FUNCTIONAL_COMPOSE_H
+
+#include <__config>
+#include <__functional/invoke.h>
+#include <__functional/perfect_forward.h>
+#include <__utility/forward.h>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+struct __compose_op {
+ template<class _Fn1, class _Fn2, class ..._Args>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Fn1&& __f1, _Fn2&& __f2, _Args&&... __args) const
+ noexcept(noexcept(_VSTD::invoke(_VSTD::forward<_Fn1>(__f1), _VSTD::invoke(_VSTD::forward<_Fn2>(__f2), _VSTD::forward<_Args>(__args)...))))
+ -> decltype( _VSTD::invoke(_VSTD::forward<_Fn1>(__f1), _VSTD::invoke(_VSTD::forward<_Fn2>(__f2), _VSTD::forward<_Args>(__args)...)))
+ { return _VSTD::invoke(_VSTD::forward<_Fn1>(__f1), _VSTD::invoke(_VSTD::forward<_Fn2>(__f2), _VSTD::forward<_Args>(__args)...)); }
+};
+
+template <class _Fn1, class _Fn2>
+struct __compose_t : __perfect_forward<__compose_op, _Fn1, _Fn2> {
+ using __perfect_forward<__compose_op, _Fn1, _Fn2>::__perfect_forward;
+};
+
+template <class _Fn1, class _Fn2>
+_LIBCPP_HIDE_FROM_ABI
+constexpr auto __compose(_Fn1&& __f1, _Fn2&& __f2)
+ noexcept(noexcept(__compose_t<decay_t<_Fn1>, decay_t<_Fn2>>(_VSTD::forward<_Fn1>(__f1), _VSTD::forward<_Fn2>(__f2))))
+ -> decltype( __compose_t<decay_t<_Fn1>, decay_t<_Fn2>>(_VSTD::forward<_Fn1>(__f1), _VSTD::forward<_Fn2>(__f2)))
+ { return __compose_t<decay_t<_Fn1>, decay_t<_Fn2>>(_VSTD::forward<_Fn1>(__f1), _VSTD::forward<_Fn2>(__f2)); }
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FUNCTIONAL_COMPOSE_H
diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h
index ba629e1d145e..83fd7c12de2f 100644
--- a/libcxx/include/__functional/function.h
+++ b/libcxx/include/__functional/function.h
@@ -11,6 +11,7 @@
#define _LIBCPP___FUNCTIONAL_FUNCTION_H
#include <__config>
+#include <__debug>
#include <__functional/binary_function.h>
#include <__functional/invoke.h>
#include <__functional/unary_function.h>
@@ -34,10 +35,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD
class _LIBCPP_EXCEPTION_ABI bad_function_call
: public exception
{
-#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
public:
+// Note that when a key function is not used, every translation unit that uses
+// bad_function_call will end up containing a weak definition of the vtable and
+// typeinfo.
+#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
virtual ~bad_function_call() _NOEXCEPT;
+#else
+ virtual ~bad_function_call() _NOEXCEPT {}
+#endif
+#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
virtual const char* what() const _NOEXCEPT;
#endif
};
@@ -126,8 +134,8 @@ class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)>
__compressed_pair<_Fp, _Ap> __f_;
public:
- typedef _LIBCPP_NODEBUG_TYPE _Fp _Target;
- typedef _LIBCPP_NODEBUG_TYPE _Ap _Alloc;
+ typedef _LIBCPP_NODEBUG _Fp _Target;
+ typedef _LIBCPP_NODEBUG _Ap _Alloc;
_LIBCPP_INLINE_VISIBILITY
const _Target& __target() const { return __f_.first(); }
@@ -204,7 +212,7 @@ class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> {
_Fp __f_;
public:
- typedef _LIBCPP_NODEBUG_TYPE _Fp _Target;
+ typedef _LIBCPP_NODEBUG _Fp _Target;
_LIBCPP_INLINE_VISIBILITY
const _Target& __target() const { return __f_; }
@@ -1044,7 +1052,7 @@ public:
#endif // _LIBCPP_NO_RTTI
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _Rp, class ..._Ap>
function(_Rp(*)(_Ap...)) -> function<_Rp(_Ap...)>;
@@ -1089,7 +1097,7 @@ struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile & noexcept> { usin
template<class _Fp, class _Stripped = typename __strip_signature<decltype(&_Fp::operator())>::type>
function(_Fp) -> function<_Stripped>;
-#endif // !_LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#endif // _LIBCPP_STD_VER >= 17
template<class _Rp, class ..._ArgTypes>
function<_Rp(_ArgTypes...)>::function(const function& __f) : __f_(__f.__f_) {}
diff --git a/libcxx/include/__functional/hash.h b/libcxx/include/__functional/hash.h
index ebcbbad13387..b1a3ad94ae2d 100644
--- a/libcxx/include/__functional/hash.h
+++ b/libcxx/include/__functional/hash.h
@@ -26,9 +26,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Size>
@@ -564,6 +561,7 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
#endif // _LIBCPP_HAS_NO_UNICODE_CHARS
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
template <>
struct _LIBCPP_TEMPLATE_VIS hash<wchar_t>
@@ -579,6 +577,7 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
_LIBCPP_INLINE_VISIBILITY
size_t operator()(wchar_t __v) const _NOEXCEPT {return static_cast<size_t>(__v);}
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
template <>
@@ -839,35 +838,33 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
#ifndef _LIBCPP_CXX03_LANG
template <class _Key, class _Hash>
-using __check_hash_requirements _LIBCPP_NODEBUG_TYPE = integral_constant<bool,
+using __check_hash_requirements _LIBCPP_NODEBUG = integral_constant<bool,
is_copy_constructible<_Hash>::value &&
is_move_constructible<_Hash>::value &&
__invokable_r<size_t, _Hash, _Key const&>::value
>;
template <class _Key, class _Hash = hash<_Key> >
-using __has_enabled_hash _LIBCPP_NODEBUG_TYPE = integral_constant<bool,
+using __has_enabled_hash _LIBCPP_NODEBUG = integral_constant<bool,
__check_hash_requirements<_Key, _Hash>::value &&
is_default_constructible<_Hash>::value
>;
#if _LIBCPP_STD_VER > 14
template <class _Type, class>
-using __enable_hash_helper_imp _LIBCPP_NODEBUG_TYPE = _Type;
+using __enable_hash_helper_imp _LIBCPP_NODEBUG = _Type;
template <class _Type, class ..._Keys>
-using __enable_hash_helper _LIBCPP_NODEBUG_TYPE = __enable_hash_helper_imp<_Type,
+using __enable_hash_helper _LIBCPP_NODEBUG = __enable_hash_helper_imp<_Type,
typename enable_if<__all<__has_enabled_hash<_Keys>::value...>::value>::type
>;
#else
template <class _Type, class ...>
-using __enable_hash_helper _LIBCPP_NODEBUG_TYPE = _Type;
+using __enable_hash_helper _LIBCPP_NODEBUG = _Type;
#endif
#endif // !_LIBCPP_CXX03_LANG
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___FUNCTIONAL_HASH_H
diff --git a/libcxx/include/__functional/not_fn.h b/libcxx/include/__functional/not_fn.h
index 632be5ff096b..81fe112c88ba 100644
--- a/libcxx/include/__functional/not_fn.h
+++ b/libcxx/include/__functional/not_fn.h
@@ -23,21 +23,27 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
-struct __not_fn_op
-{
- template<class... _Args>
- static _LIBCPP_CONSTEXPR_AFTER_CXX17 auto __call(_Args&&... __args)
- noexcept(noexcept(!_VSTD::invoke(_VSTD::forward<_Args>(__args)...)))
- -> decltype( !_VSTD::invoke(_VSTD::forward<_Args>(__args)...))
- { return !_VSTD::invoke(_VSTD::forward<_Args>(__args)...); }
+struct __not_fn_op {
+ template <class... _Args>
+ _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR_AFTER_CXX17 auto operator()(_Args&&... __args) const
+ noexcept(noexcept(!_VSTD::invoke(_VSTD::forward<_Args>(__args)...)))
+ -> decltype( !_VSTD::invoke(_VSTD::forward<_Args>(__args)...))
+ { return !_VSTD::invoke(_VSTD::forward<_Args>(__args)...); }
};
-template<class _Fn,
- class = _EnableIf<is_constructible_v<decay_t<_Fn>, _Fn> &&
- is_move_constructible_v<_Fn>>>
-_LIBCPP_CONSTEXPR_AFTER_CXX17 auto not_fn(_Fn&& __f)
-{
- return __perfect_forward<__not_fn_op, _Fn>(_VSTD::forward<_Fn>(__f));
+template <class _Fn>
+struct __not_fn_t : __perfect_forward<__not_fn_op, _Fn> {
+ using __perfect_forward<__not_fn_op, _Fn>::__perfect_forward;
+};
+
+template <class _Fn, class = enable_if_t<
+ is_constructible_v<decay_t<_Fn>, _Fn> &&
+ is_move_constructible_v<decay_t<_Fn>>
+>>
+_LIBCPP_HIDE_FROM_ABI
+_LIBCPP_CONSTEXPR_AFTER_CXX17 auto not_fn(_Fn&& __f) {
+ return __not_fn_t<decay_t<_Fn>>(_VSTD::forward<_Fn>(__f));
}
#endif // _LIBCPP_STD_VER > 14
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
index 667d17988bc4..0c7c6d4fcfaf 100644
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -53,9 +53,9 @@ struct _LIBCPP_TEMPLATE_VIS plus<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) + _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -90,9 +90,9 @@ struct _LIBCPP_TEMPLATE_VIS minus<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) - _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -127,9 +127,9 @@ struct _LIBCPP_TEMPLATE_VIS multiplies<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) * _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -164,9 +164,9 @@ struct _LIBCPP_TEMPLATE_VIS divides<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) / _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -201,9 +201,9 @@ struct _LIBCPP_TEMPLATE_VIS modulus<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) % _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -237,9 +237,9 @@ struct _LIBCPP_TEMPLATE_VIS negate<void>
template <class _Tp>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_Tp&& __x) const
- _NOEXCEPT_(noexcept(- _VSTD::forward<_Tp>(__x)))
- -> decltype (- _VSTD::forward<_Tp>(__x))
- { return - _VSTD::forward<_Tp>(__x); }
+ noexcept(noexcept(- _VSTD::forward<_Tp>(__x)))
+ -> decltype( - _VSTD::forward<_Tp>(__x))
+ { return - _VSTD::forward<_Tp>(__x); }
typedef void is_transparent;
};
#endif
@@ -276,9 +276,9 @@ struct _LIBCPP_TEMPLATE_VIS bit_and<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) & _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -307,9 +307,9 @@ struct _LIBCPP_TEMPLATE_VIS bit_not<void>
template <class _Tp>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_Tp&& __x) const
- _NOEXCEPT_(noexcept(~_VSTD::forward<_Tp>(__x)))
- -> decltype (~_VSTD::forward<_Tp>(__x))
- { return ~_VSTD::forward<_Tp>(__x); }
+ noexcept(noexcept(~_VSTD::forward<_Tp>(__x)))
+ -> decltype( ~_VSTD::forward<_Tp>(__x))
+ { return ~_VSTD::forward<_Tp>(__x); }
typedef void is_transparent;
};
#endif
@@ -344,9 +344,9 @@ struct _LIBCPP_TEMPLATE_VIS bit_or<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) | _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -381,9 +381,9 @@ struct _LIBCPP_TEMPLATE_VIS bit_xor<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) ^ _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -420,9 +420,9 @@ struct _LIBCPP_TEMPLATE_VIS equal_to<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) == _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -457,9 +457,9 @@ struct _LIBCPP_TEMPLATE_VIS not_equal_to<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) != _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -494,9 +494,9 @@ struct _LIBCPP_TEMPLATE_VIS less<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) < _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -531,9 +531,9 @@ struct _LIBCPP_TEMPLATE_VIS less_equal<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) <= _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -568,9 +568,9 @@ struct _LIBCPP_TEMPLATE_VIS greater_equal<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) >= _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -605,9 +605,9 @@ struct _LIBCPP_TEMPLATE_VIS greater<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) > _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -644,9 +644,9 @@ struct _LIBCPP_TEMPLATE_VIS logical_and<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) && _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
@@ -680,9 +680,9 @@ struct _LIBCPP_TEMPLATE_VIS logical_not<void>
template <class _Tp>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_Tp&& __x) const
- _NOEXCEPT_(noexcept(!_VSTD::forward<_Tp>(__x)))
- -> decltype (!_VSTD::forward<_Tp>(__x))
- { return !_VSTD::forward<_Tp>(__x); }
+ noexcept(noexcept(!_VSTD::forward<_Tp>(__x)))
+ -> decltype( !_VSTD::forward<_Tp>(__x))
+ { return !_VSTD::forward<_Tp>(__x); }
typedef void is_transparent;
};
#endif
@@ -717,9 +717,9 @@ struct _LIBCPP_TEMPLATE_VIS logical_or<void>
template <class _T1, class _T2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
auto operator()(_T1&& __t, _T2&& __u) const
- _NOEXCEPT_(noexcept(_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u)))
- -> decltype (_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u))
- { return _VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u); }
+ noexcept(noexcept(_VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u)))
+ -> decltype( _VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u))
+ { return _VSTD::forward<_T1>(__t) || _VSTD::forward<_T2>(__u); }
typedef void is_transparent;
};
#endif
diff --git a/libcxx/include/__functional/perfect_forward.h b/libcxx/include/__functional/perfect_forward.h
index a5678e1593bb..308b304a76dc 100644
--- a/libcxx/include/__functional/perfect_forward.h
+++ b/libcxx/include/__functional/perfect_forward.h
@@ -11,9 +11,11 @@
#define _LIBCPP___FUNCTIONAL_PERFECT_FORWARD_H
#include <__config>
+#include <__utility/declval.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
#include <tuple>
#include <type_traits>
-#include <utility>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
@@ -23,63 +25,68 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
-template<class _Op, class _Tuple,
- class _Idxs = typename __make_tuple_indices<tuple_size<_Tuple>::value>::type>
+template <class _Op, class _Indices, class ..._Bound>
struct __perfect_forward_impl;
-template<class _Op, class... _Bound, size_t... _Idxs>
-struct __perfect_forward_impl<_Op, __tuple_types<_Bound...>, __tuple_indices<_Idxs...>>
-{
+template <class _Op, size_t ..._Idx, class ..._Bound>
+struct __perfect_forward_impl<_Op, index_sequence<_Idx...>, _Bound...> {
+private:
tuple<_Bound...> __bound_;
- template<class... _Args>
- _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) &
- noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...)))
- -> decltype( _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...))
- {return _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...);}
-
- template<class... _Args>
- _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) const&
- noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...)))
- -> decltype( _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...))
- {return _Op::__call(_VSTD::get<_Idxs>(__bound_)..., _VSTD::forward<_Args>(__args)...);}
-
- template<class... _Args>
- _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) &&
- noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))...,
- _VSTD::forward<_Args>(__args)...)))
- -> decltype( _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))...,
- _VSTD::forward<_Args>(__args)...))
- {return _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))...,
- _VSTD::forward<_Args>(__args)...);}
-
- template<class... _Args>
- _LIBCPP_INLINE_VISIBILITY constexpr auto operator()(_Args&&... __args) const&&
- noexcept(noexcept(_Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))...,
- _VSTD::forward<_Args>(__args)...)))
- -> decltype( _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))...,
- _VSTD::forward<_Args>(__args)...))
- {return _Op::__call(_VSTD::get<_Idxs>(_VSTD::move(__bound_))...,
- _VSTD::forward<_Args>(__args)...);}
-
- template<class _Fn = typename tuple_element<0, tuple<_Bound...>>::type,
- class = _EnableIf<is_copy_constructible_v<_Fn>>>
- constexpr __perfect_forward_impl(__perfect_forward_impl const& __other)
- : __bound_(__other.__bound_) {}
-
- template<class _Fn = typename tuple_element<0, tuple<_Bound...>>::type,
- class = _EnableIf<is_move_constructible_v<_Fn>>>
- constexpr __perfect_forward_impl(__perfect_forward_impl && __other)
- : __bound_(_VSTD::move(__other.__bound_)) {}
-
- template<class... _BoundArgs>
- explicit constexpr __perfect_forward_impl(_BoundArgs&&... __bound) :
- __bound_(_VSTD::forward<_BoundArgs>(__bound)...) { }
+public:
+ template <class ..._BoundArgs, class = enable_if_t<
+ is_constructible_v<tuple<_Bound...>, _BoundArgs&&...>
+ >>
+ explicit constexpr __perfect_forward_impl(_BoundArgs&& ...__bound)
+ : __bound_(_VSTD::forward<_BoundArgs>(__bound)...)
+ { }
+
+ __perfect_forward_impl(__perfect_forward_impl const&) = default;
+ __perfect_forward_impl(__perfect_forward_impl&&) = default;
+
+ __perfect_forward_impl& operator=(__perfect_forward_impl const&) = default;
+ __perfect_forward_impl& operator=(__perfect_forward_impl&&) = default;
+
+ template <class ..._Args, class = enable_if_t<is_invocable_v<_Op, _Bound&..., _Args...>>>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Args&&... __args) &
+ noexcept(noexcept(_Op()(_VSTD::get<_Idx>(__bound_)..., _VSTD::forward<_Args>(__args)...)))
+ -> decltype( _Op()(_VSTD::get<_Idx>(__bound_)..., _VSTD::forward<_Args>(__args)...))
+ { return _Op()(_VSTD::get<_Idx>(__bound_)..., _VSTD::forward<_Args>(__args)...); }
+
+ template <class ..._Args, class = enable_if_t<!is_invocable_v<_Op, _Bound&..., _Args...>>>
+ auto operator()(_Args&&...) & = delete;
+
+ template <class ..._Args, class = enable_if_t<is_invocable_v<_Op, _Bound const&..., _Args...>>>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Args&&... __args) const&
+ noexcept(noexcept(_Op()(_VSTD::get<_Idx>(__bound_)..., _VSTD::forward<_Args>(__args)...)))
+ -> decltype( _Op()(_VSTD::get<_Idx>(__bound_)..., _VSTD::forward<_Args>(__args)...))
+ { return _Op()(_VSTD::get<_Idx>(__bound_)..., _VSTD::forward<_Args>(__args)...); }
+
+ template <class ..._Args, class = enable_if_t<!is_invocable_v<_Op, _Bound const&..., _Args...>>>
+ auto operator()(_Args&&...) const& = delete;
+
+ template <class ..._Args, class = enable_if_t<is_invocable_v<_Op, _Bound..., _Args...>>>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Args&&... __args) &&
+ noexcept(noexcept(_Op()(_VSTD::get<_Idx>(_VSTD::move(__bound_))..., _VSTD::forward<_Args>(__args)...)))
+ -> decltype( _Op()(_VSTD::get<_Idx>(_VSTD::move(__bound_))..., _VSTD::forward<_Args>(__args)...))
+ { return _Op()(_VSTD::get<_Idx>(_VSTD::move(__bound_))..., _VSTD::forward<_Args>(__args)...); }
+
+ template <class ..._Args, class = enable_if_t<!is_invocable_v<_Op, _Bound..., _Args...>>>
+ auto operator()(_Args&&...) && = delete;
+
+ template <class ..._Args, class = enable_if_t<is_invocable_v<_Op, _Bound const..., _Args...>>>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Args&&... __args) const&&
+ noexcept(noexcept(_Op()(_VSTD::get<_Idx>(_VSTD::move(__bound_))..., _VSTD::forward<_Args>(__args)...)))
+ -> decltype( _Op()(_VSTD::get<_Idx>(_VSTD::move(__bound_))..., _VSTD::forward<_Args>(__args)...))
+ { return _Op()(_VSTD::get<_Idx>(_VSTD::move(__bound_))..., _VSTD::forward<_Args>(__args)...); }
+
+ template <class ..._Args, class = enable_if_t<!is_invocable_v<_Op, _Bound const..., _Args...>>>
+ auto operator()(_Args&&...) const&& = delete;
};
-template<class _Op, class... _Args>
-using __perfect_forward =
- __perfect_forward_impl<_Op, __tuple_types<decay_t<_Args>...>>;
+// __perfect_forward implements a perfect-forwarding call wrapper as explained in [func.require].
+template <class _Op, class ..._Args>
+using __perfect_forward = __perfect_forward_impl<_Op, index_sequence_for<_Args...>, _Args...>;
#endif // _LIBCPP_STD_VER > 14
diff --git a/libcxx/include/__functional/reference_wrapper.h b/libcxx/include/__functional/reference_wrapper.h
index 09f4a6494502..e1e4abd80c23 100644
--- a/libcxx/include/__functional/reference_wrapper.h
+++ b/libcxx/include/__functional/reference_wrapper.h
@@ -46,7 +46,7 @@ public:
reference_wrapper(type& __f) _NOEXCEPT
: __f_(_VSTD::addressof(__f)) {}
#else
- template <class _Up, class = _EnableIf<!__is_same_uncvref<_Up, reference_wrapper>::value, decltype(__fun(declval<_Up>())) >>
+ template <class _Up, class = __enable_if_t<!__is_same_uncvref<_Up, reference_wrapper>::value, decltype(__fun(declval<_Up>())) >>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
reference_wrapper(_Up&& __u) _NOEXCEPT_(noexcept(__fun(declval<_Up>()))) {
type& __f = static_cast<_Up&&>(__u);
@@ -176,7 +176,7 @@ public:
#endif // _LIBCPP_CXX03_LANG
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template <class _Tp>
reference_wrapper(_Tp&) -> reference_wrapper<_Tp>;
#endif
diff --git a/libcxx/include/__functional/unary_function.h b/libcxx/include/__functional/unary_function.h
index 8084ef4b0343..499f9964676d 100644
--- a/libcxx/include/__functional/unary_function.h
+++ b/libcxx/include/__functional/unary_function.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Arg, class _Result>
@@ -29,6 +26,4 @@ struct _LIBCPP_TEMPLATE_VIS unary_function
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___FUNCTIONAL_UNARY_FUNCTION_H
diff --git a/libcxx/include/__functional/unwrap_ref.h b/libcxx/include/__functional/unwrap_ref.h
index 4d091ec35c5d..dc309add90df 100644
--- a/libcxx/include/__functional/unwrap_ref.h
+++ b/libcxx/include/__functional/unwrap_ref.h
@@ -15,19 +15,16 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
-struct __unwrap_reference { typedef _LIBCPP_NODEBUG_TYPE _Tp type; };
+struct __unwrap_reference { typedef _LIBCPP_NODEBUG _Tp type; };
template <class _Tp>
class reference_wrapper;
template <class _Tp>
-struct __unwrap_reference<reference_wrapper<_Tp> > { typedef _LIBCPP_NODEBUG_TYPE _Tp& type; };
+struct __unwrap_reference<reference_wrapper<_Tp> > { typedef _LIBCPP_NODEBUG _Tp& type; };
template <class _Tp>
struct decay;
@@ -57,6 +54,4 @@ struct __unwrap_ref_decay
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___FUNCTIONAL_UNWRAP_REF_H
diff --git a/libcxx/include/__functional/weak_result_type.h b/libcxx/include/__functional/weak_result_type.h
index 2ee85acf1ef4..32b1e0b1c6c4 100644
--- a/libcxx/include/__functional/weak_result_type.h
+++ b/libcxx/include/__functional/weak_result_type.h
@@ -89,7 +89,7 @@ struct __weak_result_type_imp // bool is true
: public __maybe_derive_from_unary_function<_Tp>,
public __maybe_derive_from_binary_function<_Tp>
{
- typedef _LIBCPP_NODEBUG_TYPE typename _Tp::result_type result_type;
+ typedef _LIBCPP_NODEBUG typename _Tp::result_type result_type;
};
template <class _Tp>
@@ -110,19 +110,19 @@ struct __weak_result_type
template <class _Rp>
struct __weak_result_type<_Rp ()>
{
- typedef _LIBCPP_NODEBUG_TYPE _Rp result_type;
+ typedef _LIBCPP_NODEBUG _Rp result_type;
};
template <class _Rp>
struct __weak_result_type<_Rp (&)()>
{
- typedef _LIBCPP_NODEBUG_TYPE _Rp result_type;
+ typedef _LIBCPP_NODEBUG _Rp result_type;
};
template <class _Rp>
struct __weak_result_type<_Rp (*)()>
{
- typedef _LIBCPP_NODEBUG_TYPE _Rp result_type;
+ typedef _LIBCPP_NODEBUG _Rp result_type;
};
// 1 argument case
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index df0f7c80db2e..126e1884a664 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -298,7 +298,7 @@ public:
__hash_iterator(const __hash_iterator& __i)
: __node_(__i.__node_)
{
- __get_db()->__iterator_copy(this, &__i);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
}
_LIBCPP_INLINE_VISIBILITY
@@ -1541,7 +1541,7 @@ template <class _Tp, class _Hash, class _Equal, class _Alloc>
__hash_table<_Tp, _Hash, _Equal, _Alloc>&
__hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(const __hash_table& __u)
{
- if (this != &__u)
+ if (this != _VSTD::addressof(__u))
{
__copy_assign_alloc(__u);
hash_function() = __u.hash_function();
@@ -2348,7 +2348,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc)
size_type __chash = __constrain_hash(__cp->__hash(), __nbc);
__bucket_list_[__chash] = __pp;
size_type __phash = __chash;
- for (__pp = __cp, __cp = __cp->__next_; __cp != nullptr;
+ for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr;
__cp = __pp->__next_)
{
__chash = __constrain_hash(__cp->__hash(), __nbc);
@@ -2758,7 +2758,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::bucket_size(size_type __n) const
{
for (__np = __np->__next_; __np != nullptr &&
__constrain_hash(__np->__hash(), __bc) == __n;
- __np = __np->__next_, ++__r)
+ __np = __np->__next_, (void) ++__r)
;
}
return __r;
diff --git a/libcxx/include/__iterator/access.h b/libcxx/include/__iterator/access.h
index c0576b45902b..5e0d6b35153b 100644
--- a/libcxx/include/__iterator/access.h
+++ b/libcxx/include/__iterator/access.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, size_t _Np>
@@ -129,6 +126,4 @@ end(const _Cp& __c)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ACCESS_H
diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h
index 47bce1ddfbe2..a60052a08f0d 100644
--- a/libcxx/include/__iterator/advance.h
+++ b/libcxx/include/__iterator/advance.h
@@ -26,9 +26,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIter>
@@ -58,7 +55,7 @@ void __advance(_RandIter& __i, typename iterator_traits<_RandIter>::difference_t
template <
class _InputIter, class _Distance,
class _IntegralDistance = decltype(_VSTD::__convert_to_integral(declval<_Distance>())),
- class = _EnableIf<is_integral<_IntegralDistance>::value> >
+ class = __enable_if_t<is_integral<_IntegralDistance>::value> >
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14
void advance(_InputIter& __i, _Distance __orig_n) {
typedef typename iterator_traits<_InputIter>::difference_type _Difference;
@@ -195,6 +192,4 @@ inline constexpr auto advance = __advance_fn(__function_like::__tag());
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ADVANCE_H
diff --git a/libcxx/include/__iterator/back_insert_iterator.h b/libcxx/include/__iterator/back_insert_iterator.h
index f34cb863bc09..844babe5c5ed 100644
--- a/libcxx/include/__iterator/back_insert_iterator.h
+++ b/libcxx/include/__iterator/back_insert_iterator.h
@@ -21,9 +21,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -48,20 +45,20 @@ public:
typedef void reference;
typedef _Container container_type;
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit back_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(const typename _Container::value_type& __value_)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 explicit back_insert_iterator(_Container& __x) : container(_VSTD::addressof(__x)) {}
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(const typename _Container::value_type& __value_)
{container->push_back(__value_); return *this;}
#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(typename _Container::value_type&& __value_)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator=(typename _Container::value_type&& __value_)
{container->push_back(_VSTD::move(__value_)); return *this;}
#endif // _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator*() {return *this;}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator++() {return *this;}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator operator++(int) {return *this;}
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator*() {return *this;}
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator& operator++() {return *this;}
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 back_insert_iterator operator++(int) {return *this;}
};
template <class _Container>
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
back_insert_iterator<_Container>
back_inserter(_Container& __x)
{
@@ -70,6 +67,4 @@ back_inserter(_Container& __x)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_BACK_INSERT_ITERATOR_H
diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h
index fb01d8bd4b95..9a142769e55a 100644
--- a/libcxx/include/__iterator/common_iterator.h
+++ b/libcxx/include/__iterator/common_iterator.h
@@ -25,9 +25,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -296,6 +293,4 @@ struct iterator_traits<common_iterator<_Iter, _Sent>> {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_COMMON_ITERATOR_H
diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h
index 6eb4aef10528..531acdf0a5b2 100644
--- a/libcxx/include/__iterator/concepts.h
+++ b/libcxx/include/__iterator/concepts.h
@@ -24,9 +24,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -72,6 +69,8 @@ concept __signed_integer_like = signed_integral<_Tp>;
template<class _Ip>
concept weakly_incrementable =
+ // TODO: remove this once the clang bug is fixed (bugs.llvm.org/PR48173).
+ !same_as<_Ip, bool> && // Currently, clang does not handle bool correctly.
movable<_Ip> &&
requires(_Ip __i) {
typename iter_difference_t<_Ip>;
@@ -172,7 +171,6 @@ concept contiguous_iterator =
derived_from<_ITER_CONCEPT<_Ip>, contiguous_iterator_tag> &&
is_lvalue_reference_v<iter_reference_t<_Ip>> &&
same_as<iter_value_t<_Ip>, remove_cvref_t<iter_reference_t<_Ip>>> &&
- (is_pointer_v<_Ip> || requires { sizeof(__pointer_traits_element_type<_Ip>); }) &&
requires(const _Ip& __i) {
{ _VSTD::to_address(__i) } -> same_as<add_pointer_t<iter_reference_t<_Ip>>>;
};
@@ -267,6 +265,4 @@ concept indirectly_movable_storable =
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_CONCEPTS_H
diff --git a/libcxx/include/__iterator/counted_iterator.h b/libcxx/include/__iterator/counted_iterator.h
index 7136aaf02584..55979fe5571e 100644
--- a/libcxx/include/__iterator/counted_iterator.h
+++ b/libcxx/include/__iterator/counted_iterator.h
@@ -19,6 +19,8 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/readable_traits.h>
#include <__memory/pointer_traits.h>
+#include <__utility/move.h>
+#include <compare>
#include <concepts>
#include <type_traits>
@@ -26,9 +28,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -301,6 +300,4 @@ struct iterator_traits<counted_iterator<_Iter>> : iterator_traits<_Iter> {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_COUNTED_ITERATOR_H
diff --git a/libcxx/include/__iterator/data.h b/libcxx/include/__iterator/data.h
index cd8e37b96b6e..5e4946cc10b4 100644
--- a/libcxx/include/__iterator/data.h
+++ b/libcxx/include/__iterator/data.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -51,6 +48,4 @@ constexpr const _Ep* data(initializer_list<_Ep> __il) noexcept { return __il.beg
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_DATA_H
diff --git a/libcxx/include/__iterator/default_sentinel.h b/libcxx/include/__iterator/default_sentinel.h
index 934a56fd9e29..7172a748febc 100644
--- a/libcxx/include/__iterator/default_sentinel.h
+++ b/libcxx/include/__iterator/default_sentinel.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -30,6 +27,4 @@ inline constexpr default_sentinel_t default_sentinel{};
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_DEFAULT_SENTINEL_H
diff --git a/libcxx/include/__iterator/distance.h b/libcxx/include/__iterator/distance.h
index 33e4af84d36e..70e8e52398f9 100644
--- a/libcxx/include/__iterator/distance.h
+++ b/libcxx/include/__iterator/distance.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIter>
@@ -51,6 +48,4 @@ distance(_InputIter __first, _InputIter __last)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_DISTANCE_H
diff --git a/libcxx/include/__iterator/empty.h b/libcxx/include/__iterator/empty.h
index 4dd59f5cccbd..39cd560a276f 100644
--- a/libcxx/include/__iterator/empty.h
+++ b/libcxx/include/__iterator/empty.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -44,6 +41,4 @@ constexpr bool empty(initializer_list<_Ep> __il) noexcept { return __il.size() =
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_EMPTY_H
diff --git a/libcxx/include/__iterator/erase_if_container.h b/libcxx/include/__iterator/erase_if_container.h
index a5dfd0720535..08f6e2248239 100644
--- a/libcxx/include/__iterator/erase_if_container.h
+++ b/libcxx/include/__iterator/erase_if_container.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Container, class _Predicate>
@@ -40,6 +37,4 @@ __libcpp_erase_if_container(_Container& __c, _Predicate& __pred) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ERASE_IF_CONTAINER_H
diff --git a/libcxx/include/__iterator/front_insert_iterator.h b/libcxx/include/__iterator/front_insert_iterator.h
index 0421dd5c4b21..b229a99f1104 100644
--- a/libcxx/include/__iterator/front_insert_iterator.h
+++ b/libcxx/include/__iterator/front_insert_iterator.h
@@ -21,9 +21,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -70,6 +67,4 @@ front_inserter(_Container& __x)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_FRONT_INSERT_ITERATOR_H
diff --git a/libcxx/include/__iterator/incrementable_traits.h b/libcxx/include/__iterator/incrementable_traits.h
index 5a433982ba27..fd5015ddf1b1 100644
--- a/libcxx/include/__iterator/incrementable_traits.h
+++ b/libcxx/include/__iterator/incrementable_traits.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -72,6 +69,4 @@ using iter_difference_t = typename conditional_t<__is_primary_template<iterator_
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_INCREMENTABLE_TRAITS_H
diff --git a/libcxx/include/__iterator/insert_iterator.h b/libcxx/include/__iterator/insert_iterator.h
index 265814182475..33117419881b 100644
--- a/libcxx/include/__iterator/insert_iterator.h
+++ b/libcxx/include/__iterator/insert_iterator.h
@@ -14,6 +14,7 @@
#include <__iterator/iterator.h>
#include <__iterator/iterator_traits.h>
#include <__memory/addressof.h>
+#include <__ranges/access.h>
#include <__utility/move.h>
#include <cstddef>
@@ -21,11 +22,16 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
+template <class _Container>
+using __insert_iterator_iter_t = ranges::iterator_t<_Container>;
+#else
+template <class _Container>
+using __insert_iterator_iter_t = typename _Container::iterator;
+#endif
+
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
template <class _Container>
class _LIBCPP_TEMPLATE_VIS insert_iterator
@@ -36,7 +42,7 @@ class _LIBCPP_TEMPLATE_VIS insert_iterator
_LIBCPP_SUPPRESS_DEPRECATED_POP
protected:
_Container* container;
- typename _Container::iterator iter; // FIXME: `ranges::iterator_t<Container>` in C++20 mode
+ __insert_iterator_iter_t<_Container> iter;
public:
typedef output_iterator_tag iterator_category;
typedef void value_type;
@@ -49,7 +55,7 @@ public:
typedef void reference;
typedef _Container container_type;
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator(_Container& __x, typename _Container::iterator __i)
+ _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator(_Container& __x, __insert_iterator_iter_t<_Container> __i)
: container(_VSTD::addressof(__x)), iter(__i) {}
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 insert_iterator& operator=(const typename _Container::value_type& __value_)
{iter = container->insert(iter, __value_); ++iter; return *this;}
@@ -65,13 +71,11 @@ public:
template <class _Container>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
insert_iterator<_Container>
-inserter(_Container& __x, typename _Container::iterator __i)
+inserter(_Container& __x, __insert_iterator_iter_t<_Container> __i)
{
return insert_iterator<_Container>(__x, __i);
}
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_INSERT_ITERATOR_H
diff --git a/libcxx/include/__iterator/istream_iterator.h b/libcxx/include/__iterator/istream_iterator.h
index f39faa6d590b..979d714edf5d 100644
--- a/libcxx/include/__iterator/istream_iterator.h
+++ b/libcxx/include/__iterator/istream_iterator.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -70,12 +67,6 @@ public:
bool
operator==(const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __x,
const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __y);
-
- template <class _Up, class _CharU, class _TraitsU, class _DistanceU>
- friend _LIBCPP_INLINE_VISIBILITY
- bool
- operator==(const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __x,
- const istream_iterator<_Up, _CharU, _TraitsU, _DistanceU>& __y);
};
template <class _Tp, class _CharT, class _Traits, class _Distance>
@@ -98,6 +89,4 @@ operator!=(const istream_iterator<_Tp, _CharT, _Traits, _Distance>& __x,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ISTREAM_ITERATOR_H
diff --git a/libcxx/include/__iterator/istreambuf_iterator.h b/libcxx/include/__iterator/istreambuf_iterator.h
index 119698d54ce9..0c7676f16908 100644
--- a/libcxx/include/__iterator/istreambuf_iterator.h
+++ b/libcxx/include/__iterator/istreambuf_iterator.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -105,6 +102,4 @@ bool operator!=(const istreambuf_iterator<_CharT,_Traits>& __a,
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ISTREAMBUF_ITERATOR_H
diff --git a/libcxx/include/__iterator/iter_move.h b/libcxx/include/__iterator/iter_move.h
index 5540799e197f..a2951f764b0c 100644
--- a/libcxx/include/__iterator/iter_move.h
+++ b/libcxx/include/__iterator/iter_move.h
@@ -21,9 +21,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -86,6 +83,4 @@ using iter_rvalue_reference_t = decltype(ranges::iter_move(declval<_Tp&>()));
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ITER_MOVE_H
diff --git a/libcxx/include/__iterator/iter_swap.h b/libcxx/include/__iterator/iter_swap.h
index d70da09b4ab8..a6c3bc8c663e 100644
--- a/libcxx/include/__iterator/iter_swap.h
+++ b/libcxx/include/__iterator/iter_swap.h
@@ -15,6 +15,8 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/readable_traits.h>
#include <__ranges/access.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
#include <concepts>
#include <type_traits>
@@ -22,9 +24,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -102,6 +101,4 @@ concept indirectly_swappable =
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ITER_SWAP_H
diff --git a/libcxx/include/__iterator/iterator.h b/libcxx/include/__iterator/iterator.h
index dfd481e35712..be298ee5228e 100644
--- a/libcxx/include/__iterator/iterator.h
+++ b/libcxx/include/__iterator/iterator.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template<class _Category, class _Tp, class _Distance = ptrdiff_t,
@@ -35,6 +32,4 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 iterator
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ITERATOR_H
diff --git a/libcxx/include/__iterator/iterator_traits.h b/libcxx/include/__iterator/iterator_traits.h
index 5275705c117c..54c3e11e6738 100644
--- a/libcxx/include/__iterator/iterator_traits.h
+++ b/libcxx/include/__iterator/iterator_traits.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -79,7 +76,7 @@ struct __iter_concept_category_test {
};
struct __iter_concept_random_fallback {
template <class _Iter>
- using _Apply = _EnableIf<
+ using _Apply = __enable_if_t<
__is_primary_template<iterator_traits<_Iter> >::value,
random_access_iterator_tag
>;
@@ -477,7 +474,7 @@ struct __is_exactly_cpp17_input_iterator
__has_iterator_category_convertible_to<_Tp, input_iterator_tag>::value &&
!__has_iterator_category_convertible_to<_Tp, forward_iterator_tag>::value> {};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator>
using __iter_value_type = typename iterator_traits<_InputIterator>::value_type;
@@ -491,10 +488,8 @@ template<class _InputIterator>
using __iter_to_alloc_type = pair<
add_const_t<typename iterator_traits<_InputIterator>::value_type::first_type>,
typename iterator_traits<_InputIterator>::value_type::second_type>;
-#endif // _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#endif // _LIBCPP_STD_VER >= 17
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_ITERATOR_TRAITS_H
diff --git a/libcxx/include/__iterator/move_iterator.h b/libcxx/include/__iterator/move_iterator.h
index 7819743bdb39..eac9264df30a 100644
--- a/libcxx/include/__iterator/move_iterator.h
+++ b/libcxx/include/__iterator/move_iterator.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter>
@@ -57,13 +54,13 @@ public:
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
explicit move_iterator(_Iter __x) : __i(__x) {}
- template <class _Up, class = _EnableIf<
+ template <class _Up, class = __enable_if_t<
!is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value
> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
move_iterator(const move_iterator<_Up>& __u) : __i(__u.base()) {}
- template <class _Up, class = _EnableIf<
+ template <class _Up, class = __enable_if_t<
!is_same<_Up, _Iter>::value &&
is_convertible<_Up const&, _Iter>::value &&
is_assignable<_Iter&, _Up const&>::value
@@ -184,6 +181,4 @@ make_move_iterator(_Iter __i)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_MOVE_ITERATOR_H
diff --git a/libcxx/include/__iterator/next.h b/libcxx/include/__iterator/next.h
index 1eecaa9750ba..0464708607d4 100644
--- a/libcxx/include/__iterator/next.h
+++ b/libcxx/include/__iterator/next.h
@@ -23,9 +23,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIter>
@@ -82,6 +79,4 @@ inline constexpr auto next = __next_fn(__function_like::__tag());
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_PRIMITIVES_H
diff --git a/libcxx/include/__iterator/ostream_iterator.h b/libcxx/include/__iterator/ostream_iterator.h
index 5b4466c86399..20a36742ccab 100644
--- a/libcxx/include/__iterator/ostream_iterator.h
+++ b/libcxx/include/__iterator/ostream_iterator.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -70,6 +67,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_OSTREAM_ITERATOR_H
diff --git a/libcxx/include/__iterator/ostreambuf_iterator.h b/libcxx/include/__iterator/ostreambuf_iterator.h
index 90309dacd429..3272f6c99d74 100644
--- a/libcxx/include/__iterator/ostreambuf_iterator.h
+++ b/libcxx/include/__iterator/ostreambuf_iterator.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -76,6 +73,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_OSTREAMBUF_ITERATOR_H
diff --git a/libcxx/include/__iterator/prev.h b/libcxx/include/__iterator/prev.h
index cb8a57135504..cbe1e8759af3 100644
--- a/libcxx/include/__iterator/prev.h
+++ b/libcxx/include/__iterator/prev.h
@@ -23,9 +23,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIter>
@@ -74,6 +71,4 @@ inline constexpr auto prev = __prev_fn(__function_like::__tag());
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_PREV_H
diff --git a/libcxx/include/__iterator/projected.h b/libcxx/include/__iterator/projected.h
index 7064a5eb9198..c24c86b4e06e 100644
--- a/libcxx/include/__iterator/projected.h
+++ b/libcxx/include/__iterator/projected.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -40,6 +37,4 @@ struct incrementable_traits<projected<_It, _Proj>> {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_PROJECTED_H
diff --git a/libcxx/include/__iterator/readable_traits.h b/libcxx/include/__iterator/readable_traits.h
index fbad106e4ee1..90121bea8073 100644
--- a/libcxx/include/__iterator/readable_traits.h
+++ b/libcxx/include/__iterator/readable_traits.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -86,6 +83,4 @@ using iter_value_t = typename conditional_t<__is_primary_template<iterator_trait
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_READABLE_TRAITS_H
diff --git a/libcxx/include/__iterator/reverse_access.h b/libcxx/include/__iterator/reverse_access.h
index 66cc3568c1c1..643aede01c72 100644
--- a/libcxx/include/__iterator/reverse_access.h
+++ b/libcxx/include/__iterator/reverse_access.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_CXX03_LANG)
@@ -104,6 +101,4 @@ auto crend(const _Cp& __c) -> decltype(_VSTD::rend(__c))
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_REVERSE_ACCESS_H
diff --git a/libcxx/include/__iterator/reverse_iterator.h b/libcxx/include/__iterator/reverse_iterator.h
index 76424a89a19c..f7a948950df2 100644
--- a/libcxx/include/__iterator/reverse_iterator.h
+++ b/libcxx/include/__iterator/reverse_iterator.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class = void>
@@ -78,7 +75,7 @@ public:
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
explicit reverse_iterator(_Iter __x) : __t(__x), current(__x) {}
- template <class _Up, class = _EnableIf<
+ template <class _Up, class = __enable_if_t<
!is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value
> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
@@ -86,10 +83,10 @@ public:
: __t(__u.base()), current(__u.base())
{ }
- template <class _Up, class = _EnableIf<
+ template <class _Up, class = __enable_if_t<
!is_same<_Up, _Iter>::value &&
is_convertible<_Up const&, _Iter>::value &&
- is_assignable<_Up const&, _Iter>::value
+ is_assignable<_Iter, _Up const&>::value
> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
reverse_iterator& operator=(const reverse_iterator<_Up>& __u) {
@@ -103,7 +100,7 @@ public:
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
explicit reverse_iterator(_Iter __x) : current(__x) {}
- template <class _Up, class = _EnableIf<
+ template <class _Up, class = __enable_if_t<
!is_same<_Up, _Iter>::value && is_convertible<_Up const&, _Iter>::value
> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
@@ -111,10 +108,10 @@ public:
: current(__u.base())
{ }
- template <class _Up, class = _EnableIf<
+ template <class _Up, class = __enable_if_t<
!is_same<_Up, _Iter>::value &&
is_convertible<_Up const&, _Iter>::value &&
- is_assignable<_Up const&, _Iter>::value
+ is_assignable<_Iter, _Up const&>::value
> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
reverse_iterator& operator=(const reverse_iterator<_Up>& __u) {
@@ -234,6 +231,4 @@ reverse_iterator<_Iter> make_reverse_iterator(_Iter __i)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_REVERSE_ITERATOR_H
diff --git a/libcxx/include/__iterator/size.h b/libcxx/include/__iterator/size.h
index 259424f1d314..2e6a7d386cb1 100644
--- a/libcxx/include/__iterator/size.h
+++ b/libcxx/include/__iterator/size.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -53,6 +50,4 @@ constexpr ptrdiff_t ssize(const _Tp (&)[_Sz]) noexcept { return _Sz; }
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___ITERATOR_SIZE_H
diff --git a/libcxx/include/__iterator/unreachable_sentinel.h b/libcxx/include/__iterator/unreachable_sentinel.h
new file mode 100644
index 000000000000..cbbccd7bb288
--- /dev/null
+++ b/libcxx/include/__iterator/unreachable_sentinel.h
@@ -0,0 +1,38 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___ITERATOR_UNREACHABLE_SENTINEL_H
+#define _LIBCPP___ITERATOR_UNREACHABLE_SENTINEL_H
+
+#include <__config>
+#include <__iterator/concepts.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+struct unreachable_sentinel_t {
+ template<weakly_incrementable _Iter>
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(unreachable_sentinel_t, const _Iter&) noexcept {
+ return false;
+ }
+};
+
+inline constexpr unreachable_sentinel_t unreachable_sentinel{};
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___ITERATOR_UNREACHABLE_SENTINEL_H
diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h
index e35a372b4267..28872f9fa41a 100644
--- a/libcxx/include/__iterator/wrap_iter.h
+++ b/libcxx/include/__iterator/wrap_iter.h
@@ -13,16 +13,14 @@
#include <__config>
#include <__debug>
#include <__iterator/iterator_traits.h>
-#include <__memory/pointer_traits.h> // __to_address
+#include <__memory/addressof.h>
+#include <__memory/pointer_traits.h>
#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Iter>
@@ -57,7 +55,7 @@ public:
: __i(__u.base())
{
#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->__iterator_copy(this, &__u);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__u));
#endif
}
#if _LIBCPP_DEBUG_LEVEL == 2
@@ -65,14 +63,14 @@ public:
__wrap_iter(const __wrap_iter& __x)
: __i(__x.base())
{
- __get_db()->__iterator_copy(this, &__x);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
}
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
__wrap_iter& operator=(const __wrap_iter& __x)
{
- if (this != &__x)
+ if (this != _VSTD::addressof(__x))
{
- __get_db()->__iterator_copy(this, &__x);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
__i = __x.__i;
}
return *this;
@@ -183,7 +181,7 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),
+ _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(_VSTD::addressof(__x), _VSTD::addressof(__y)),
"Attempted to compare incomparable iterators");
#endif
return __x.base() < __y.base();
@@ -267,7 +265,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC
#endif // C++03
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),
+ _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(_VSTD::addressof(__x), _VSTD::addressof(__y)),
"Attempted to subtract incompatible iterators");
#endif
return __x.base() - __y.base();
@@ -286,15 +284,19 @@ template <class _It>
struct __is_cpp17_contiguous_iterator<__wrap_iter<_It> > : true_type {};
#endif
-template <class _Iter>
-_LIBCPP_CONSTEXPR
-decltype(_VSTD::__to_address(declval<_Iter>()))
-__to_address(__wrap_iter<_Iter> __w) _NOEXCEPT {
- return _VSTD::__to_address(__w.base());
-}
+template <class _It>
+struct _LIBCPP_TEMPLATE_VIS pointer_traits<__wrap_iter<_It> >
+{
+ typedef __wrap_iter<_It> pointer;
+ typedef typename pointer_traits<_It>::element_type element_type;
+ typedef typename pointer_traits<_It>::difference_type difference_type;
-_LIBCPP_END_NAMESPACE_STD
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
+ static element_type *to_address(pointer __w) _NOEXCEPT {
+ return _VSTD::__to_address(__w.base());
+ }
+};
-_LIBCPP_POP_MACROS
+_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ITERATOR_WRAP_ITER_H
diff --git a/libcxx/include/__libcpp_version b/libcxx/include/__libcpp_version
index 09514aa4db92..bfed81939856 100644
--- a/libcxx/include/__libcpp_version
+++ b/libcxx/include/__libcpp_version
@@ -1 +1 @@
-13000
+14000
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index ad742997d9ac..4296adbbd8e9 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -22,8 +22,6 @@
#if defined(_LIBCPP_MSVCRT_LIKE)
# include <cstring>
# include <__support/win32/locale_win32.h>
-#elif defined(__NuttX__)
-# include <__support/nuttx/xlocale.h>
#elif defined(_AIX) || defined(__MVS__)
# include <__support/ibm/xlocale.h>
#elif defined(__ANDROID__)
@@ -340,7 +338,9 @@ collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) const
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate<wchar_t>)
+#endif
// template <class CharT> class collate_byname;
@@ -365,6 +365,7 @@ protected:
virtual string_type do_transform(const char_type* __lo, const char_type* __hi) const;
};
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS collate_byname<wchar_t>
: public collate<wchar_t>
@@ -384,6 +385,7 @@ protected:
const char_type* __lo2, const char_type* __hi2) const;
virtual string_type do_transform(const char_type* __lo, const char_type* __hi) const;
};
+#endif
template <class _CharT, class _Traits, class _Allocator>
bool
@@ -518,6 +520,7 @@ public:
template <class _CharT> class _LIBCPP_TEMPLATE_VIS ctype;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS ctype<wchar_t>
: public locale::facet,
@@ -619,6 +622,7 @@ protected:
virtual char do_narrow(char_type, char __dfault) const;
virtual const char_type* do_narrow(const char_type* __low, const char_type* __high, char __dfault, char* __dest) const;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS ctype<char>
@@ -763,6 +767,7 @@ protected:
virtual const char_type* do_tolower(char_type* __low, const char_type* __high) const;
};
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS ctype_byname<wchar_t>
: public ctype<wchar_t>
@@ -788,6 +793,7 @@ protected:
virtual char do_narrow(char_type, char __dfault) const;
virtual const char_type* do_narrow(const char_type* __low, const char_type* __high, char __dfault, char* __dest) const;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _CharT>
inline _LIBCPP_INLINE_VISIBILITY
@@ -994,6 +1000,7 @@ protected:
// template <> class codecvt<wchar_t, char, mbstate_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS codecvt<wchar_t, char, mbstate_t>
: public locale::facet,
@@ -1074,6 +1081,7 @@ protected:
virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, size_t __mx) const;
virtual int do_max_length() const _NOEXCEPT;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// template <> class codecvt<char16_t, char, mbstate_t> // deprecated in C++20
@@ -1452,7 +1460,9 @@ codecvt_byname<_InternT, _ExternT, _StateT>::~codecvt_byname()
_LIBCPP_SUPPRESS_DEPRECATED_POP
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char, char, mbstate_t>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<wchar_t, char, mbstate_t>)
+#endif
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char16_t, char, mbstate_t>) // deprecated in C++20
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname<char32_t, char, mbstate_t>) // deprecated in C++20
#ifndef _LIBCPP_HAS_NO_CHAR8_T
@@ -1683,6 +1693,7 @@ protected:
string __grouping_;
};
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS numpunct<wchar_t>
: public locale::facet
@@ -1713,6 +1724,7 @@ protected:
char_type __thousands_sep_;
string __grouping_;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// template <class charT> class numpunct_byname
@@ -1736,6 +1748,7 @@ private:
void __init(const char*);
};
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS numpunct_byname<wchar_t>
: public numpunct<wchar_t>
@@ -1753,6 +1766,7 @@ protected:
private:
void __init(const char*);
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__mbstate_t.h b/libcxx/include/__mbstate_t.h
new file mode 100644
index 000000000000..3489f9cc0e3a
--- /dev/null
+++ b/libcxx/include/__mbstate_t.h
@@ -0,0 +1,44 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___MBSTATE_T_H
+#define _LIBCPP___MBSTATE_T_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+// TODO(ldionne):
+// The goal of this header is to provide mbstate_t without having to pull in
+// <wchar.h> or <uchar.h>. This is necessary because we need that type even
+// when we don't have (or try to provide) support for wchar_t, because several
+// types like std::fpos are defined in terms of mbstate_t.
+//
+// This is a gruesome hack, but I don't know how to make it cleaner for
+// the time being.
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+# include <wchar.h> // for mbstate_t
+#elif __has_include(<bits/types/mbstate_t.h>)
+# include <bits/types/mbstate_t.h> // works on most Unixes
+#elif __has_include(<sys/_types/_mbstate_t.h>)
+# include <sys/_types/_mbstate_t.h> // works on Darwin
+#else
+# error "The library was configured without support for wide-characters, but we don't know how to get the definition of mbstate_t without <wchar.h> on your platform."
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+using ::mbstate_t _LIBCPP_USING_IF_EXISTS;
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___MBSTATE_T_H
diff --git a/libcxx/include/__memory/addressof.h b/libcxx/include/__memory/addressof.h
index 5efdb5878625..c45dedfaec9b 100644
--- a/libcxx/include/__memory/addressof.h
+++ b/libcxx/include/__memory/addressof.h
@@ -16,13 +16,8 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
-#ifndef _LIBCPP_HAS_NO_BUILTIN_ADDRESSOF
-
template <class _Tp>
inline _LIBCPP_CONSTEXPR_AFTER_CXX14
_LIBCPP_NO_CFI _LIBCPP_INLINE_VISIBILITY
@@ -32,19 +27,6 @@ addressof(_Tp& __x) _NOEXCEPT
return __builtin_addressof(__x);
}
-#else
-
-template <class _Tp>
-inline _LIBCPP_NO_CFI _LIBCPP_INLINE_VISIBILITY
-_Tp*
-addressof(_Tp& __x) _NOEXCEPT
-{
- return reinterpret_cast<_Tp *>(
- const_cast<char *>(&reinterpret_cast<const volatile char &>(__x)));
-}
-
-#endif // _LIBCPP_HAS_NO_BUILTIN_ADDRESSOF
-
#if defined(_LIBCPP_HAS_OBJC_ARC) && !defined(_LIBCPP_PREDEFINED_OBJC_ARC_ADDRESSOF)
// Objective-C++ Automatic Reference Counting uses qualified pointers
// that require special addressof() signatures. When
@@ -91,6 +73,4 @@ template <class _Tp> _Tp* addressof(const _Tp&&) noexcept = delete;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_ADDRESSOF_H
diff --git a/libcxx/include/__memory/allocation_guard.h b/libcxx/include/__memory/allocation_guard.h
index 4987af293d14..6412677aaf14 100644
--- a/libcxx/include/__memory/allocation_guard.h
+++ b/libcxx/include/__memory/allocation_guard.h
@@ -19,10 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-
_LIBCPP_BEGIN_NAMESPACE_STD
// Helper class to allocate memory using an Allocator in an exception safe
@@ -84,6 +80,4 @@ private:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_ALLOCATION_GUARD_H
diff --git a/libcxx/include/__memory/allocator.h b/libcxx/include/__memory/allocator.h
index 2c21a16e7e84..283212fb703d 100644
--- a/libcxx/include/__memory/allocator.h
+++ b/libcxx/include/__memory/allocator.h
@@ -22,9 +22,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp> class allocator;
@@ -83,6 +80,7 @@ template <class _Tp>
class _LIBCPP_TEMPLATE_VIS allocator
: private __non_trivial_if<!is_void<_Tp>::value, allocator<_Tp> >
{
+ static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types");
public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
@@ -100,8 +98,7 @@ public:
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
_Tp* allocate(size_t __n) {
if (__n > allocator_traits<allocator>::max_size(*this))
- __throw_length_error("allocator<T>::allocate(size_t n)"
- " 'n' exceeds maximum supported size");
+ __throw_bad_array_new_length();
if (__libcpp_is_constant_evaluated()) {
return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp)));
} else {
@@ -165,6 +162,7 @@ template <class _Tp>
class _LIBCPP_TEMPLATE_VIS allocator<const _Tp>
: private __non_trivial_if<!is_void<_Tp>::value, allocator<const _Tp> >
{
+ static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types");
public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
@@ -182,8 +180,7 @@ public:
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
const _Tp* allocate(size_t __n) {
if (__n > allocator_traits<allocator>::max_size(*this))
- __throw_length_error("allocator<const T>::allocate(size_t n)"
- " 'n' exceeds maximum supported size");
+ __throw_bad_array_new_length();
if (__libcpp_is_constant_evaluated()) {
return static_cast<const _Tp*>(::operator new(__n * sizeof(_Tp)));
} else {
@@ -249,6 +246,4 @@ bool operator!=(const allocator<_Tp>&, const allocator<_Up>&) _NOEXCEPT {return
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_ALLOCATOR_H
diff --git a/libcxx/include/__memory/allocator_arg_t.h b/libcxx/include/__memory/allocator_arg_t.h
index 830c6b8148eb..f5a116dbbd7e 100644
--- a/libcxx/include/__memory/allocator_arg_t.h
+++ b/libcxx/include/__memory/allocator_arg_t.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef _LIBCPP___FUNCTIONAL___ALLOCATOR_ARG_T_H
-#define _LIBCPP___FUNCTIONAL___ALLOCATOR_ARG_T_H
+#ifndef _LIBCPP___FUNCTIONAL_ALLOCATOR_ARG_T_H
+#define _LIBCPP___FUNCTIONAL_ALLOCATOR_ARG_T_H
#include <__config>
#include <__memory/uses_allocator.h>
@@ -26,7 +26,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_arg_t { explicit allocator_arg_t() = defau
#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY)
extern _LIBCPP_EXPORTED_FROM_ABI const allocator_arg_t allocator_arg;
#else
-/* _LIBCPP_INLINE_VAR */ constexpr allocator_arg_t allocator_arg = allocator_arg_t();
+/* inline */ constexpr allocator_arg_t allocator_arg = allocator_arg_t();
#endif
#ifndef _LIBCPP_CXX03_LANG
@@ -36,7 +36,7 @@ extern _LIBCPP_EXPORTED_FROM_ABI const allocator_arg_t allocator_arg;
template <class _Tp, class _Alloc, class ..._Args>
struct __uses_alloc_ctor_imp
{
- typedef _LIBCPP_NODEBUG_TYPE typename __uncvref<_Alloc>::type _RawAlloc;
+ typedef _LIBCPP_NODEBUG typename __uncvref<_Alloc>::type _RawAlloc;
static const bool __ua = uses_allocator<_Tp, _RawAlloc>::value;
static const bool __ic =
is_constructible<_Tp, allocator_arg_t, _Alloc, _Args...>::value;
@@ -75,4 +75,4 @@ void __user_alloc_construct_impl (integral_constant<int, 2>, _Tp *__storage, con
_LIBCPP_END_NAMESPACE_STD
-#endif // _LIBCPP___FUNCTIONAL___ALLOCATOR_ARG_T_H
+#endif // _LIBCPP___FUNCTIONAL_ALLOCATOR_ARG_T_H
diff --git a/libcxx/include/__memory/allocator_traits.h b/libcxx/include/__memory/allocator_traits.h
index a02af0deafc9..cc32352ae11c 100644
--- a/libcxx/include/__memory/allocator_traits.h
+++ b/libcxx/include/__memory/allocator_traits.h
@@ -36,11 +36,11 @@ template <class _Tp, class _Alloc,
class _RawAlloc = typename remove_reference<_Alloc>::type,
bool = __has_pointer<_RawAlloc>::value>
struct __pointer {
- using type _LIBCPP_NODEBUG_TYPE = typename _RawAlloc::pointer;
+ using type _LIBCPP_NODEBUG = typename _RawAlloc::pointer;
};
template <class _Tp, class _Alloc, class _RawAlloc>
struct __pointer<_Tp, _Alloc, _RawAlloc, false> {
- using type _LIBCPP_NODEBUG_TYPE = _Tp*;
+ using type _LIBCPP_NODEBUG = _Tp*;
};
// __const_pointer
@@ -48,14 +48,14 @@ _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_pointer, const_pointer);
template <class _Tp, class _Ptr, class _Alloc,
bool = __has_const_pointer<_Alloc>::value>
struct __const_pointer {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::const_pointer;
+ using type _LIBCPP_NODEBUG = typename _Alloc::const_pointer;
};
template <class _Tp, class _Ptr, class _Alloc>
struct __const_pointer<_Tp, _Ptr, _Alloc, false> {
#ifdef _LIBCPP_CXX03_LANG
using type = typename pointer_traits<_Ptr>::template rebind<const _Tp>::other;
#else
- using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind<const _Tp>;
+ using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<const _Tp>;
#endif
};
@@ -64,14 +64,14 @@ _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_void_pointer, void_pointer);
template <class _Ptr, class _Alloc,
bool = __has_void_pointer<_Alloc>::value>
struct __void_pointer {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::void_pointer;
+ using type _LIBCPP_NODEBUG = typename _Alloc::void_pointer;
};
template <class _Ptr, class _Alloc>
struct __void_pointer<_Ptr, _Alloc, false> {
#ifdef _LIBCPP_CXX03_LANG
- using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind<void>::other;
+ using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<void>::other;
#else
- using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind<void>;
+ using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<void>;
#endif
};
@@ -80,14 +80,14 @@ _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_void_pointer, const_void_pointer);
template <class _Ptr, class _Alloc,
bool = __has_const_void_pointer<_Alloc>::value>
struct __const_void_pointer {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::const_void_pointer;
+ using type _LIBCPP_NODEBUG = typename _Alloc::const_void_pointer;
};
template <class _Ptr, class _Alloc>
struct __const_void_pointer<_Ptr, _Alloc, false> {
#ifdef _LIBCPP_CXX03_LANG
- using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind<const void>::other;
+ using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<const void>::other;
#else
- using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind<const void>;
+ using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind<const void>;
#endif
};
@@ -97,18 +97,18 @@ template <class _Alloc, class _DiffType, bool = __has_size_type<_Alloc>::value>
struct __size_type : make_unsigned<_DiffType> { };
template <class _Alloc, class _DiffType>
struct __size_type<_Alloc, _DiffType, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::size_type;
+ using type _LIBCPP_NODEBUG = typename _Alloc::size_type;
};
// __alloc_traits_difference_type
_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_alloc_traits_difference_type, difference_type);
template <class _Alloc, class _Ptr, bool = __has_alloc_traits_difference_type<_Alloc>::value>
struct __alloc_traits_difference_type {
- using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::difference_type;
+ using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::difference_type;
};
template <class _Alloc, class _Ptr>
struct __alloc_traits_difference_type<_Alloc, _Ptr, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::difference_type;
+ using type _LIBCPP_NODEBUG = typename _Alloc::difference_type;
};
// __propagate_on_container_copy_assignment
@@ -117,7 +117,7 @@ template <class _Alloc, bool = __has_propagate_on_container_copy_assignment<_All
struct __propagate_on_container_copy_assignment : false_type { };
template <class _Alloc>
struct __propagate_on_container_copy_assignment<_Alloc, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::propagate_on_container_copy_assignment;
+ using type _LIBCPP_NODEBUG = typename _Alloc::propagate_on_container_copy_assignment;
};
// __propagate_on_container_move_assignment
@@ -126,7 +126,7 @@ template <class _Alloc, bool = __has_propagate_on_container_move_assignment<_All
struct __propagate_on_container_move_assignment : false_type { };
template <class _Alloc>
struct __propagate_on_container_move_assignment<_Alloc, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::propagate_on_container_move_assignment;
+ using type _LIBCPP_NODEBUG = typename _Alloc::propagate_on_container_move_assignment;
};
// __propagate_on_container_swap
@@ -135,7 +135,7 @@ template <class _Alloc, bool = __has_propagate_on_container_swap<_Alloc>::value>
struct __propagate_on_container_swap : false_type { };
template <class _Alloc>
struct __propagate_on_container_swap<_Alloc, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::propagate_on_container_swap;
+ using type _LIBCPP_NODEBUG = typename _Alloc::propagate_on_container_swap;
};
// __is_always_equal
@@ -144,7 +144,7 @@ template <class _Alloc, bool = __has_is_always_equal<_Alloc>::value>
struct __is_always_equal : is_empty<_Alloc> { };
template <class _Alloc>
struct __is_always_equal<_Alloc, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::is_always_equal;
+ using type _LIBCPP_NODEBUG = typename _Alloc::is_always_equal;
};
// __allocator_traits_rebind
@@ -158,15 +158,15 @@ struct __has_rebind_other<_Tp, _Up, typename __void_t<
template <class _Tp, class _Up, bool = __has_rebind_other<_Tp, _Up>::value>
struct __allocator_traits_rebind {
- using type _LIBCPP_NODEBUG_TYPE = typename _Tp::template rebind<_Up>::other;
+ using type _LIBCPP_NODEBUG = typename _Tp::template rebind<_Up>::other;
};
template <template <class, class...> class _Alloc, class _Tp, class ..._Args, class _Up>
struct __allocator_traits_rebind<_Alloc<_Tp, _Args...>, _Up, true> {
- using type _LIBCPP_NODEBUG_TYPE = typename _Alloc<_Tp, _Args...>::template rebind<_Up>::other;
+ using type _LIBCPP_NODEBUG = typename _Alloc<_Tp, _Args...>::template rebind<_Up>::other;
};
template <template <class, class...> class _Alloc, class _Tp, class ..._Args, class _Up>
struct __allocator_traits_rebind<_Alloc<_Tp, _Args...>, _Up, false> {
- using type _LIBCPP_NODEBUG_TYPE = _Alloc<_Up, _Args...>;
+ using type _LIBCPP_NODEBUG = _Alloc<_Up, _Args...>;
};
_LIBCPP_SUPPRESS_DEPRECATED_POP
@@ -263,7 +263,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
}
template <class _Ap = _Alloc, class =
- _EnableIf<__has_allocate_hint<_Ap, size_type, const_void_pointer>::value> >
+ __enable_if_t<__has_allocate_hint<_Ap, size_type, const_void_pointer>::value> >
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static pointer allocate(allocator_type& __a, size_type __n, const_void_pointer __hint) {
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -271,7 +271,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
_LIBCPP_SUPPRESS_DEPRECATED_POP
}
template <class _Ap = _Alloc, class = void, class =
- _EnableIf<!__has_allocate_hint<_Ap, size_type, const_void_pointer>::value> >
+ __enable_if_t<!__has_allocate_hint<_Ap, size_type, const_void_pointer>::value> >
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static pointer allocate(allocator_type& __a, size_type __n, const_void_pointer) {
return __a.allocate(__n);
@@ -283,7 +283,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
}
template <class _Tp, class... _Args, class =
- _EnableIf<__has_construct<allocator_type, _Tp*, _Args...>::value> >
+ __enable_if_t<__has_construct<allocator_type, _Tp*, _Args...>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static void construct(allocator_type& __a, _Tp* __p, _Args&&... __args) {
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -291,7 +291,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
_LIBCPP_SUPPRESS_DEPRECATED_POP
}
template <class _Tp, class... _Args, class = void, class =
- _EnableIf<!__has_construct<allocator_type, _Tp*, _Args...>::value> >
+ __enable_if_t<!__has_construct<allocator_type, _Tp*, _Args...>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static void construct(allocator_type&, _Tp* __p, _Args&&... __args) {
#if _LIBCPP_STD_VER > 17
@@ -302,7 +302,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
}
template <class _Tp, class =
- _EnableIf<__has_destroy<allocator_type, _Tp*>::value> >
+ __enable_if_t<__has_destroy<allocator_type, _Tp*>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static void destroy(allocator_type& __a, _Tp* __p) {
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -310,7 +310,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
_LIBCPP_SUPPRESS_DEPRECATED_POP
}
template <class _Tp, class = void, class =
- _EnableIf<!__has_destroy<allocator_type, _Tp*>::value> >
+ __enable_if_t<!__has_destroy<allocator_type, _Tp*>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static void destroy(allocator_type&, _Tp* __p) {
#if _LIBCPP_STD_VER > 17
@@ -321,7 +321,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
}
template <class _Ap = _Alloc, class =
- _EnableIf<__has_max_size<const _Ap>::value> >
+ __enable_if_t<__has_max_size<const _Ap>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static size_type max_size(const allocator_type& __a) _NOEXCEPT {
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
@@ -329,30 +329,38 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
_LIBCPP_SUPPRESS_DEPRECATED_POP
}
template <class _Ap = _Alloc, class = void, class =
- _EnableIf<!__has_max_size<const _Ap>::value> >
+ __enable_if_t<!__has_max_size<const _Ap>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static size_type max_size(const allocator_type&) _NOEXCEPT {
return numeric_limits<size_type>::max() / sizeof(value_type);
}
template <class _Ap = _Alloc, class =
- _EnableIf<__has_select_on_container_copy_construction<const _Ap>::value> >
+ __enable_if_t<__has_select_on_container_copy_construction<const _Ap>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static allocator_type select_on_container_copy_construction(const allocator_type& __a) {
return __a.select_on_container_copy_construction();
}
template <class _Ap = _Alloc, class = void, class =
- _EnableIf<!__has_select_on_container_copy_construction<const _Ap>::value> >
+ __enable_if_t<!__has_select_on_container_copy_construction<const _Ap>::value> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
static allocator_type select_on_container_copy_construction(const allocator_type& __a) {
return __a;
}
};
+// A version of `allocator_traits` for internal usage that SFINAEs away if the
+// given allocator doesn't have a nested `value_type`. This helps avoid hard
+// errors when forming implicit deduction guides for a container that has an
+// invalid Allocator type. See https://wg21.link/LWGXXXXX.
+// TODO(varconst): use the actual link once available.
+template <class _Alloc, class _ValueType = typename _Alloc::value_type>
+struct _LIBCPP_TEMPLATE_VIS __allocator_traits : allocator_traits<_Alloc> {};
+
template <class _Traits, class _Tp>
struct __rebind_alloc_helper {
#ifndef _LIBCPP_CXX03_LANG
- using type _LIBCPP_NODEBUG_TYPE = typename _Traits::template rebind_alloc<_Tp>;
+ using type _LIBCPP_NODEBUG = typename _Traits::template rebind_alloc<_Tp>;
#else
using type = typename _Traits::template rebind_alloc<_Tp>::other;
#endif
@@ -374,7 +382,7 @@ struct __is_cpp17_move_insertable
{ };
template <class _Alloc>
-struct __is_cpp17_move_insertable<_Alloc, _EnableIf<
+struct __is_cpp17_move_insertable<_Alloc, __enable_if_t<
!__is_default_allocator<_Alloc>::value &&
__has_construct<_Alloc, typename _Alloc::value_type*, typename _Alloc::value_type&&>::value
> > : true_type { };
@@ -389,7 +397,7 @@ struct __is_cpp17_copy_insertable
{ };
template <class _Alloc>
-struct __is_cpp17_copy_insertable<_Alloc, _EnableIf<
+struct __is_cpp17_copy_insertable<_Alloc, __enable_if_t<
!__is_default_allocator<_Alloc>::value &&
__has_construct<_Alloc, typename _Alloc::value_type*, const typename _Alloc::value_type&>::value
> >
diff --git a/libcxx/include/__memory/auto_ptr.h b/libcxx/include/__memory/auto_ptr.h
index f8d2b507b4c6..492cbabab8ac 100644
--- a/libcxx/include/__memory/auto_ptr.h
+++ b/libcxx/include/__memory/auto_ptr.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
@@ -81,6 +78,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_AUTO_PTR_H
diff --git a/libcxx/include/__memory/compressed_pair.h b/libcxx/include/__memory/compressed_pair.h
index 08f0318a10f4..fd1fcbe5bf39 100644
--- a/libcxx/include/__memory/compressed_pair.h
+++ b/libcxx/include/__memory/compressed_pair.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// Tag used to default initialize one or both of the pair's elements.
@@ -118,8 +115,8 @@ public:
"The current implementation is NOT ABI-compatible with the previous "
"implementation for this configuration");
- typedef _LIBCPP_NODEBUG_TYPE __compressed_pair_elem<_T1, 0> _Base1;
- typedef _LIBCPP_NODEBUG_TYPE __compressed_pair_elem<_T2, 1> _Base2;
+ typedef _LIBCPP_NODEBUG __compressed_pair_elem<_T1, 0> _Base1;
+ typedef _LIBCPP_NODEBUG __compressed_pair_elem<_T2, 1> _Base2;
template <bool _Dummy = true,
class = typename enable_if<
@@ -196,6 +193,4 @@ void swap(__compressed_pair<_T1, _T2>& __x, __compressed_pair<_T1, _T2>& __y)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_COMPRESSED_PAIR_H
diff --git a/libcxx/include/__memory/construct_at.h b/libcxx/include/__memory/construct_at.h
index 7ab19315dcd8..789677d7a613 100644
--- a/libcxx/include/__memory/construct_at.h
+++ b/libcxx/include/__memory/construct_at.h
@@ -12,16 +12,16 @@
#include <__config>
#include <__debug>
+#include <__iterator/access.h>
+#include <__memory/addressof.h>
#include <__utility/forward.h>
+#include <type_traits>
#include <utility>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// construct_at
@@ -43,17 +43,43 @@ constexpr _Tp* construct_at(_Tp* __location, _Args&& ...__args) {
#if _LIBCPP_STD_VER > 14
-template <class _Tp>
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+template <class _ForwardIterator>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+void destroy(_ForwardIterator, _ForwardIterator);
+
+template <class _Tp, enable_if_t<!is_array_v<_Tp>, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
void destroy_at(_Tp* __loc) {
_LIBCPP_ASSERT(__loc, "null pointer given to destroy_at");
__loc->~_Tp();
}
+#if _LIBCPP_STD_VER > 17
+template <class _Tp, enable_if_t<is_array_v<_Tp>, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+void destroy_at(_Tp* __loc) {
+ _LIBCPP_ASSERT(__loc, "null pointer given to destroy_at");
+ _VSTD::destroy(_VSTD::begin(*__loc), _VSTD::end(*__loc));
+}
#endif
-_LIBCPP_END_NAMESPACE_STD
+template <class _ForwardIterator>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+void destroy(_ForwardIterator __first, _ForwardIterator __last) {
+ for (; __first != __last; ++__first)
+ _VSTD::destroy_at(_VSTD::addressof(*__first));
+}
+
+template <class _ForwardIterator, class _Size>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
+_ForwardIterator destroy_n(_ForwardIterator __first, _Size __n) {
+ for (; __n > 0; (void)++__first, --__n)
+ _VSTD::destroy_at(_VSTD::addressof(*__first));
+ return __first;
+}
-_LIBCPP_POP_MACROS
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___MEMORY_CONSTRUCT_AT_H
diff --git a/libcxx/include/__memory/pointer_safety.h b/libcxx/include/__memory/pointer_safety.h
deleted file mode 100644
index 87a6a9659e0c..000000000000
--- a/libcxx/include/__memory/pointer_safety.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// -*- C++ -*-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP___MEMORY_POINTER_SAFETY_H
-#define _LIBCPP___MEMORY_POINTER_SAFETY_H
-
-#include <__config>
-#include <cstddef>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#pragma GCC system_header
-#endif
-
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-#if !defined(_LIBCPP_CXX03_LANG)
-
-enum class pointer_safety : unsigned char {
- relaxed,
- preferred,
- strict
-};
-
-inline _LIBCPP_INLINE_VISIBILITY
-pointer_safety get_pointer_safety() _NOEXCEPT {
- return pointer_safety::relaxed;
-}
-
-_LIBCPP_FUNC_VIS void declare_reachable(void* __p);
-_LIBCPP_FUNC_VIS void declare_no_pointers(char* __p, size_t __n);
-_LIBCPP_FUNC_VIS void undeclare_no_pointers(char* __p, size_t __n);
-_LIBCPP_FUNC_VIS void* __undeclare_reachable(void* __p);
-
-template <class _Tp>
-inline _LIBCPP_INLINE_VISIBILITY
-_Tp*
-undeclare_reachable(_Tp* __p)
-{
- return static_cast<_Tp*>(__undeclare_reachable(__p));
-}
-
-#endif // !C++03
-
-_LIBCPP_END_NAMESPACE_STD
-
-_LIBCPP_POP_MACROS
-
-#endif // _LIBCPP___MEMORY_POINTER_SAFETY_H
diff --git a/libcxx/include/__memory/pointer_traits.h b/libcxx/include/__memory/pointer_traits.h
index d5442b84e2aa..07bb6d437d7e 100644
--- a/libcxx/include/__memory/pointer_traits.h
+++ b/libcxx/include/__memory/pointer_traits.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp, class = void>
@@ -36,19 +33,19 @@ struct __pointer_traits_element_type;
template <class _Ptr>
struct __pointer_traits_element_type<_Ptr, true>
{
- typedef _LIBCPP_NODEBUG_TYPE typename _Ptr::element_type type;
+ typedef _LIBCPP_NODEBUG typename _Ptr::element_type type;
};
template <template <class, class...> class _Sp, class _Tp, class ..._Args>
struct __pointer_traits_element_type<_Sp<_Tp, _Args...>, true>
{
- typedef _LIBCPP_NODEBUG_TYPE typename _Sp<_Tp, _Args...>::element_type type;
+ typedef _LIBCPP_NODEBUG typename _Sp<_Tp, _Args...>::element_type type;
};
template <template <class, class...> class _Sp, class _Tp, class ..._Args>
struct __pointer_traits_element_type<_Sp<_Tp, _Args...>, false>
{
- typedef _LIBCPP_NODEBUG_TYPE _Tp type;
+ typedef _LIBCPP_NODEBUG _Tp type;
};
template <class _Tp, class = void>
@@ -61,13 +58,13 @@ struct __has_difference_type<_Tp,
template <class _Ptr, bool = __has_difference_type<_Ptr>::value>
struct __pointer_traits_difference_type
{
- typedef _LIBCPP_NODEBUG_TYPE ptrdiff_t type;
+ typedef _LIBCPP_NODEBUG ptrdiff_t type;
};
template <class _Ptr>
struct __pointer_traits_difference_type<_Ptr, true>
{
- typedef _LIBCPP_NODEBUG_TYPE typename _Ptr::difference_type type;
+ typedef _LIBCPP_NODEBUG typename _Ptr::difference_type type;
};
template <class _Tp, class _Up>
@@ -87,9 +84,9 @@ template <class _Tp, class _Up, bool = __has_rebind<_Tp, _Up>::value>
struct __pointer_traits_rebind
{
#ifndef _LIBCPP_CXX03_LANG
- typedef _LIBCPP_NODEBUG_TYPE typename _Tp::template rebind<_Up> type;
+ typedef _LIBCPP_NODEBUG typename _Tp::template rebind<_Up> type;
#else
- typedef _LIBCPP_NODEBUG_TYPE typename _Tp::template rebind<_Up>::other type;
+ typedef _LIBCPP_NODEBUG typename _Tp::template rebind<_Up>::other type;
#endif
};
@@ -97,9 +94,9 @@ template <template <class, class...> class _Sp, class _Tp, class ..._Args, class
struct __pointer_traits_rebind<_Sp<_Tp, _Args...>, _Up, true>
{
#ifndef _LIBCPP_CXX03_LANG
- typedef _LIBCPP_NODEBUG_TYPE typename _Sp<_Tp, _Args...>::template rebind<_Up> type;
+ typedef _LIBCPP_NODEBUG typename _Sp<_Tp, _Args...>::template rebind<_Up> type;
#else
- typedef _LIBCPP_NODEBUG_TYPE typename _Sp<_Tp, _Args...>::template rebind<_Up>::other type;
+ typedef _LIBCPP_NODEBUG typename _Sp<_Tp, _Args...>::template rebind<_Up>::other type;
#endif
};
@@ -176,7 +173,9 @@ _Tp* __to_address(_Tp* __p) _NOEXCEPT {
}
// enable_if is needed here to avoid instantiating checks for fancy pointers on raw pointers
-template <class _Pointer, class = _EnableIf<!is_pointer<_Pointer>::value> >
+template <class _Pointer, class = __enable_if_t<
+ !is_pointer<_Pointer>::value && !is_array<_Pointer>::value && !is_function<_Pointer>::value
+> >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
typename decay<decltype(__to_address_helper<_Pointer>::__call(declval<const _Pointer&>()))>::type
__to_address(const _Pointer& __p) _NOEXCEPT {
@@ -187,7 +186,7 @@ template <class _Pointer, class>
struct __to_address_helper {
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
static decltype(_VSTD::__to_address(declval<const _Pointer&>().operator->()))
- __call(const _Pointer&__p) _NOEXCEPT {
+ __call(const _Pointer& __p) _NOEXCEPT {
return _VSTD::__to_address(__p.operator->());
}
};
@@ -196,12 +195,18 @@ template <class _Pointer>
struct __to_address_helper<_Pointer, decltype((void)pointer_traits<_Pointer>::to_address(declval<const _Pointer&>()))> {
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
static decltype(pointer_traits<_Pointer>::to_address(declval<const _Pointer&>()))
- __call(const _Pointer&__p) _NOEXCEPT {
+ __call(const _Pointer& __p) _NOEXCEPT {
return pointer_traits<_Pointer>::to_address(__p);
}
};
#if _LIBCPP_STD_VER > 17
+template <class _Tp>
+inline _LIBCPP_INLINE_VISIBILITY constexpr
+auto to_address(_Tp *__p) noexcept {
+ return _VSTD::__to_address(__p);
+}
+
template <class _Pointer>
inline _LIBCPP_INLINE_VISIBILITY constexpr
auto to_address(const _Pointer& __p) noexcept {
@@ -211,6 +216,4 @@ auto to_address(const _Pointer& __p) noexcept {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_POINTER_TRAITS_H
diff --git a/libcxx/include/__memory/raw_storage_iterator.h b/libcxx/include/__memory/raw_storage_iterator.h
index e8f82b219e67..abe9677cb668 100644
--- a/libcxx/include/__memory/raw_storage_iterator.h
+++ b/libcxx/include/__memory/raw_storage_iterator.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR)
@@ -68,6 +65,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_RAW_STORAGE_ITERATOR_H
diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h
index 04161c4b73ed..9c7df8845956 100644
--- a/libcxx/include/__memory/shared_ptr.h
+++ b/libcxx/include/__memory/shared_ptr.h
@@ -43,18 +43,15 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Alloc>
class __allocator_destructor
{
- typedef _LIBCPP_NODEBUG_TYPE allocator_traits<_Alloc> __alloc_traits;
+ typedef _LIBCPP_NODEBUG allocator_traits<_Alloc> __alloc_traits;
public:
- typedef _LIBCPP_NODEBUG_TYPE typename __alloc_traits::pointer pointer;
- typedef _LIBCPP_NODEBUG_TYPE typename __alloc_traits::size_type size_type;
+ typedef _LIBCPP_NODEBUG typename __alloc_traits::pointer pointer;
+ typedef _LIBCPP_NODEBUG typename __alloc_traits::size_type size_type;
private:
_Alloc& __alloc_;
size_type __s_;
@@ -433,14 +430,20 @@ private:
element_type* __ptr_;
__shared_weak_count* __cntrl_;
- struct __nat {int __for_bool_;};
public:
- _LIBCPP_INLINE_VISIBILITY
- _LIBCPP_CONSTEXPR shared_ptr() _NOEXCEPT;
- _LIBCPP_INLINE_VISIBILITY
- _LIBCPP_CONSTEXPR shared_ptr(nullptr_t) _NOEXCEPT;
+ _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR shared_ptr() _NOEXCEPT
+ : __ptr_(nullptr),
+ __cntrl_(nullptr)
+ { }
- template<class _Yp, class = _EnableIf<
+ _LIBCPP_HIDE_FROM_ABI
+ _LIBCPP_CONSTEXPR shared_ptr(nullptr_t) _NOEXCEPT
+ : __ptr_(nullptr),
+ __cntrl_(nullptr)
+ { }
+
+ template<class _Yp, class = __enable_if_t<
_And<
__compatible_with<_Yp, _Tp>
// In C++03 we get errors when trying to do SFINAE with the
@@ -460,643 +463,488 @@ public:
__enable_weak_this(__p, __p);
}
- template<class _Yp, class _Dp>
- shared_ptr(_Yp* __p, _Dp __d,
- typename enable_if<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, element_type>::value, __nat>::type = __nat());
- template<class _Yp, class _Dp, class _Alloc>
- shared_ptr(_Yp* __p, _Dp __d, _Alloc __a,
- typename enable_if<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, element_type>::value, __nat>::type = __nat());
- template <class _Dp> shared_ptr(nullptr_t __p, _Dp __d);
- template <class _Dp, class _Alloc> shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a);
- template<class _Yp> _LIBCPP_INLINE_VISIBILITY shared_ptr(const shared_ptr<_Yp>& __r, element_type* __p) _NOEXCEPT;
- _LIBCPP_INLINE_VISIBILITY
- shared_ptr(const shared_ptr& __r) _NOEXCEPT;
- template<class _Yp>
- _LIBCPP_INLINE_VISIBILITY
- shared_ptr(const shared_ptr<_Yp>& __r,
- typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat())
- _NOEXCEPT;
- _LIBCPP_INLINE_VISIBILITY
- shared_ptr(shared_ptr&& __r) _NOEXCEPT;
- template<class _Yp> _LIBCPP_INLINE_VISIBILITY shared_ptr(shared_ptr<_Yp>&& __r,
- typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat())
- _NOEXCEPT;
- template<class _Yp> explicit shared_ptr(const weak_ptr<_Yp>& __r,
- typename enable_if<is_convertible<_Yp*, element_type*>::value, __nat>::type= __nat());
-#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR)
- template<class _Yp>
- shared_ptr(auto_ptr<_Yp>&& __r,
- typename enable_if<is_convertible<_Yp*, element_type*>::value, __nat>::type = __nat());
-#endif
- template <class _Yp, class _Dp>
- shared_ptr(unique_ptr<_Yp, _Dp>&&,
- typename enable_if
- <
- !is_lvalue_reference<_Dp>::value &&
- is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
- __nat
- >::type = __nat());
- template <class _Yp, class _Dp>
- shared_ptr(unique_ptr<_Yp, _Dp>&&,
- typename enable_if
- <
- is_lvalue_reference<_Dp>::value &&
- is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
- __nat
- >::type = __nat());
-
- ~shared_ptr();
+ template<class _Yp, class _Dp, class = __enable_if_t<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, element_type>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(_Yp* __p, _Dp __d)
+ : __ptr_(__p)
+ {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ try
+ {
+#endif // _LIBCPP_NO_EXCEPTIONS
+ typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
+ typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk;
+#ifndef _LIBCPP_CXX03_LANG
+ __cntrl_ = new _CntrlBlk(__p, _VSTD::move(__d), _AllocT());
+#else
+ __cntrl_ = new _CntrlBlk(__p, __d, _AllocT());
+#endif // not _LIBCPP_CXX03_LANG
+ __enable_weak_this(__p, __p);
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ }
+ catch (...)
+ {
+ __d(__p);
+ throw;
+ }
+#endif // _LIBCPP_NO_EXCEPTIONS
+ }
- _LIBCPP_INLINE_VISIBILITY
- shared_ptr& operator=(const shared_ptr& __r) _NOEXCEPT;
- template<class _Yp>
- typename enable_if
- <
- __compatible_with<_Yp, element_type>::value,
- shared_ptr&
- >::type
- _LIBCPP_INLINE_VISIBILITY
- operator=(const shared_ptr<_Yp>& __r) _NOEXCEPT;
- _LIBCPP_INLINE_VISIBILITY
- shared_ptr& operator=(shared_ptr&& __r) _NOEXCEPT;
- template<class _Yp>
- typename enable_if
- <
- __compatible_with<_Yp, element_type>::value,
- shared_ptr&
- >::type
- _LIBCPP_INLINE_VISIBILITY
- operator=(shared_ptr<_Yp>&& __r);
-#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR)
- template<class _Yp>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- !is_array<_Yp>::value &&
- is_convertible<_Yp*, element_type*>::value,
- shared_ptr
- >::type&
- operator=(auto_ptr<_Yp>&& __r);
-#endif
- template <class _Yp, class _Dp>
- typename enable_if
- <
- is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
- shared_ptr&
- >::type
- _LIBCPP_INLINE_VISIBILITY
- operator=(unique_ptr<_Yp, _Dp>&& __r);
+ template<class _Yp, class _Dp, class _Alloc, class = __enable_if_t<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, element_type>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(_Yp* __p, _Dp __d, _Alloc __a)
+ : __ptr_(__p)
+ {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ try
+ {
+#endif // _LIBCPP_NO_EXCEPTIONS
+ typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk;
+ typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
+ typedef __allocator_destructor<_A2> _D2;
+ _A2 __a2(__a);
+ unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
+ ::new ((void*)_VSTD::addressof(*__hold2.get()))
+#ifndef _LIBCPP_CXX03_LANG
+ _CntrlBlk(__p, _VSTD::move(__d), __a);
+#else
+ _CntrlBlk(__p, __d, __a);
+#endif // not _LIBCPP_CXX03_LANG
+ __cntrl_ = _VSTD::addressof(*__hold2.release());
+ __enable_weak_this(__p, __p);
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ }
+ catch (...)
+ {
+ __d(__p);
+ throw;
+ }
+#endif // _LIBCPP_NO_EXCEPTIONS
+ }
+
+ template<class _Dp>
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(nullptr_t __p, _Dp __d)
+ : __ptr_(nullptr)
+ {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ try
+ {
+#endif // _LIBCPP_NO_EXCEPTIONS
+ typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT;
+ typedef __shared_ptr_pointer<nullptr_t, _Dp, _AllocT > _CntrlBlk;
+#ifndef _LIBCPP_CXX03_LANG
+ __cntrl_ = new _CntrlBlk(__p, _VSTD::move(__d), _AllocT());
+#else
+ __cntrl_ = new _CntrlBlk(__p, __d, _AllocT());
+#endif // not _LIBCPP_CXX03_LANG
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ }
+ catch (...)
+ {
+ __d(__p);
+ throw;
+ }
+#endif // _LIBCPP_NO_EXCEPTIONS
+ }
+
+ template<class _Dp, class _Alloc>
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a)
+ : __ptr_(nullptr)
+ {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ try
+ {
+#endif // _LIBCPP_NO_EXCEPTIONS
+ typedef __shared_ptr_pointer<nullptr_t, _Dp, _Alloc> _CntrlBlk;
+ typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
+ typedef __allocator_destructor<_A2> _D2;
+ _A2 __a2(__a);
+ unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
+ ::new ((void*)_VSTD::addressof(*__hold2.get()))
+#ifndef _LIBCPP_CXX03_LANG
+ _CntrlBlk(__p, _VSTD::move(__d), __a);
+#else
+ _CntrlBlk(__p, __d, __a);
+#endif // not _LIBCPP_CXX03_LANG
+ __cntrl_ = _VSTD::addressof(*__hold2.release());
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ }
+ catch (...)
+ {
+ __d(__p);
+ throw;
+ }
+#endif // _LIBCPP_NO_EXCEPTIONS
+ }
- _LIBCPP_INLINE_VISIBILITY
- void swap(shared_ptr& __r) _NOEXCEPT;
- _LIBCPP_INLINE_VISIBILITY
- void reset() _NOEXCEPT;
template<class _Yp>
- typename enable_if
- <
- __compatible_with<_Yp, element_type>::value,
- void
- >::type
- _LIBCPP_INLINE_VISIBILITY
- reset(_Yp* __p);
- template<class _Yp, class _Dp>
- typename enable_if
- <
- __compatible_with<_Yp, element_type>::value,
- void
- >::type
- _LIBCPP_INLINE_VISIBILITY
- reset(_Yp* __p, _Dp __d);
- template<class _Yp, class _Dp, class _Alloc>
- typename enable_if
- <
- __compatible_with<_Yp, element_type>::value,
- void
- >::type
- _LIBCPP_INLINE_VISIBILITY
- reset(_Yp* __p, _Dp __d, _Alloc __a);
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(const shared_ptr<_Yp>& __r, element_type *__p) _NOEXCEPT
+ : __ptr_(__p),
+ __cntrl_(__r.__cntrl_)
+ {
+ if (__cntrl_)
+ __cntrl_->__add_shared();
+ }
- _LIBCPP_INLINE_VISIBILITY
- element_type* get() const _NOEXCEPT {return __ptr_;}
- _LIBCPP_INLINE_VISIBILITY
- typename add_lvalue_reference<element_type>::type operator*() const _NOEXCEPT
- {return *__ptr_;}
- _LIBCPP_INLINE_VISIBILITY
- element_type* operator->() const _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(const shared_ptr& __r) _NOEXCEPT
+ : __ptr_(__r.__ptr_),
+ __cntrl_(__r.__cntrl_)
{
- static_assert(!is_array<_Tp>::value,
- "std::shared_ptr<T>::operator-> is only valid when T is not an array type.");
- return __ptr_;
+ if (__cntrl_)
+ __cntrl_->__add_shared();
}
- _LIBCPP_INLINE_VISIBILITY
- long use_count() const _NOEXCEPT {return __cntrl_ ? __cntrl_->use_count() : 0;}
- _LIBCPP_INLINE_VISIBILITY
- bool unique() const _NOEXCEPT {return use_count() == 1;}
- _LIBCPP_INLINE_VISIBILITY
- explicit operator bool() const _NOEXCEPT {return get() != nullptr;}
- template <class _Up>
- _LIBCPP_INLINE_VISIBILITY
- bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT
- {return __cntrl_ < __p.__cntrl_;}
- template <class _Up>
- _LIBCPP_INLINE_VISIBILITY
- bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT
- {return __cntrl_ < __p.__cntrl_;}
- _LIBCPP_INLINE_VISIBILITY
- bool
- __owner_equivalent(const shared_ptr& __p) const
- {return __cntrl_ == __p.__cntrl_;}
-#if _LIBCPP_STD_VER > 14
- typename add_lvalue_reference<element_type>::type
- _LIBCPP_INLINE_VISIBILITY
- operator[](ptrdiff_t __i) const
+ template<class _Yp, class = __enable_if_t<__compatible_with<_Yp, _Tp>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(const shared_ptr<_Yp>& __r) _NOEXCEPT
+ : __ptr_(__r.__ptr_),
+ __cntrl_(__r.__cntrl_)
{
- static_assert(is_array<_Tp>::value,
- "std::shared_ptr<T>::operator[] is only valid when T is an array type.");
- return __ptr_[__i];
+ if (__cntrl_)
+ __cntrl_->__add_shared();
}
-#endif
-#ifndef _LIBCPP_NO_RTTI
- template <class _Dp>
- _LIBCPP_INLINE_VISIBILITY
- _Dp* __get_deleter() const _NOEXCEPT
- {return static_cast<_Dp*>(__cntrl_
- ? const_cast<void *>(__cntrl_->__get_deleter(typeid(_Dp)))
- : nullptr);}
-#endif // _LIBCPP_NO_RTTI
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(shared_ptr&& __r) _NOEXCEPT
+ : __ptr_(__r.__ptr_),
+ __cntrl_(__r.__cntrl_)
+ {
+ __r.__ptr_ = nullptr;
+ __r.__cntrl_ = nullptr;
+ }
- template<class _Yp, class _CntrlBlk>
- static shared_ptr<_Tp>
- __create_with_control_block(_Yp* __p, _CntrlBlk* __cntrl) _NOEXCEPT
+ template<class _Yp, class = __enable_if_t<__compatible_with<_Yp, _Tp>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(shared_ptr<_Yp>&& __r) _NOEXCEPT
+ : __ptr_(__r.__ptr_),
+ __cntrl_(__r.__cntrl_)
{
- shared_ptr<_Tp> __r;
- __r.__ptr_ = __p;
- __r.__cntrl_ = __cntrl;
- __r.__enable_weak_this(__r.__ptr_, __r.__ptr_);
- return __r;
+ __r.__ptr_ = nullptr;
+ __r.__cntrl_ = nullptr;
}
-private:
- template <class _Yp, bool = is_function<_Yp>::value>
- struct __shared_ptr_default_allocator
- {
- typedef allocator<_Yp> type;
- };
+ template<class _Yp, class = __enable_if_t<__compatible_with<_Yp, _Tp>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ explicit shared_ptr(const weak_ptr<_Yp>& __r)
+ : __ptr_(__r.__ptr_),
+ __cntrl_(__r.__cntrl_ ? __r.__cntrl_->lock() : __r.__cntrl_)
+ {
+ if (__cntrl_ == nullptr)
+ __throw_bad_weak_ptr();
+ }
- template <class _Yp>
- struct __shared_ptr_default_allocator<_Yp, true>
+#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR)
+ template<class _Yp, class = __enable_if_t<is_convertible<_Yp*, element_type*>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(auto_ptr<_Yp>&& __r)
+ : __ptr_(__r.get())
+ {
+ typedef __shared_ptr_pointer<_Yp*, default_delete<_Yp>, allocator<_Yp> > _CntrlBlk;
+ __cntrl_ = new _CntrlBlk(__r.get(), default_delete<_Yp>(), allocator<_Yp>());
+ __enable_weak_this(__r.get(), __r.get());
+ __r.release();
+ }
+#endif
+
+ template <class _Yp, class _Dp, class = __enable_if_t<
+ !is_lvalue_reference<_Dp>::value &&
+ is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(unique_ptr<_Yp, _Dp>&& __r)
+ : __ptr_(__r.get())
+ {
+#if _LIBCPP_STD_VER > 11
+ if (__ptr_ == nullptr)
+ __cntrl_ = nullptr;
+ else
+#endif
{
- typedef allocator<__shared_ptr_dummy_rebind_allocator_type> type;
- };
+ typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
+ typedef __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer, _Dp, _AllocT > _CntrlBlk;
+ __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), _AllocT());
+ __enable_weak_this(__r.get(), __r.get());
+ }
+ __r.release();
+ }
- template <class _Yp, class _OrigPtr>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if<is_convertible<_OrigPtr*,
- const enable_shared_from_this<_Yp>*
- >::value,
- void>::type
- __enable_weak_this(const enable_shared_from_this<_Yp>* __e,
- _OrigPtr* __ptr) _NOEXCEPT
+ template <class _Yp, class _Dp, class = void, class = __enable_if_t<
+ is_lvalue_reference<_Dp>::value &&
+ is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr(unique_ptr<_Yp, _Dp>&& __r)
+ : __ptr_(__r.get())
+ {
+#if _LIBCPP_STD_VER > 11
+ if (__ptr_ == nullptr)
+ __cntrl_ = nullptr;
+ else
+#endif
{
- typedef typename remove_cv<_Yp>::type _RawYp;
- if (__e && __e->__weak_this_.expired())
- {
- __e->__weak_this_ = shared_ptr<_RawYp>(*this,
- const_cast<_RawYp*>(static_cast<const _Yp*>(__ptr)));
- }
+ typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
+ typedef __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer,
+ reference_wrapper<typename remove_reference<_Dp>::type>,
+ _AllocT > _CntrlBlk;
+ __cntrl_ = new _CntrlBlk(__r.get(), _VSTD::ref(__r.get_deleter()), _AllocT());
+ __enable_weak_this(__r.get(), __r.get());
}
+ __r.release();
+ }
- _LIBCPP_INLINE_VISIBILITY void __enable_weak_this(...) _NOEXCEPT {}
+ _LIBCPP_HIDE_FROM_ABI
+ ~shared_ptr()
+ {
+ if (__cntrl_)
+ __cntrl_->__release_shared();
+ }
- template <class, class _Yp>
- struct __shared_ptr_default_delete
- : default_delete<_Yp> {};
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr<_Tp>& operator=(const shared_ptr& __r) _NOEXCEPT
+ {
+ shared_ptr(__r).swap(*this);
+ return *this;
+ }
- template <class _Yp, class _Un, size_t _Sz>
- struct __shared_ptr_default_delete<_Yp[_Sz], _Un>
- : default_delete<_Yp[]> {};
+ template<class _Yp, class = __enable_if_t<__compatible_with<_Yp, _Tp>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr<_Tp>& operator=(const shared_ptr<_Yp>& __r) _NOEXCEPT
+ {
+ shared_ptr(__r).swap(*this);
+ return *this;
+ }
- template <class _Yp, class _Un>
- struct __shared_ptr_default_delete<_Yp[], _Un>
- : default_delete<_Yp[]> {};
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr<_Tp>& operator=(shared_ptr&& __r) _NOEXCEPT
+ {
+ shared_ptr(_VSTD::move(__r)).swap(*this);
+ return *this;
+ }
- template <class _Up> friend class _LIBCPP_TEMPLATE_VIS shared_ptr;
- template <class _Up> friend class _LIBCPP_TEMPLATE_VIS weak_ptr;
-};
+ template<class _Yp, class = __enable_if_t<__compatible_with<_Yp, _Tp>::value> >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr<_Tp>& operator=(shared_ptr<_Yp>&& __r)
+ {
+ shared_ptr(_VSTD::move(__r)).swap(*this);
+ return *this;
+ }
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
-template<class _Tp>
-shared_ptr(weak_ptr<_Tp>) -> shared_ptr<_Tp>;
-template<class _Tp, class _Dp>
-shared_ptr(unique_ptr<_Tp, _Dp>) -> shared_ptr<_Tp>;
+#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR)
+ template<class _Yp, class = __enable_if_t<
+ !is_array<_Yp>::value &&
+ is_convertible<_Yp*, typename shared_ptr<_Tp>::element_type*>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr<_Tp>& operator=(auto_ptr<_Yp>&& __r)
+ {
+ shared_ptr(_VSTD::move(__r)).swap(*this);
+ return *this;
+ }
#endif
-template<class _Tp>
-inline
-_LIBCPP_CONSTEXPR
-shared_ptr<_Tp>::shared_ptr() _NOEXCEPT
- : __ptr_(nullptr),
- __cntrl_(nullptr)
-{
-}
+ template <class _Yp, class _Dp, class = __enable_if_t<
+ is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ shared_ptr<_Tp>& operator=(unique_ptr<_Yp, _Dp>&& __r)
+ {
+ shared_ptr(_VSTD::move(__r)).swap(*this);
+ return *this;
+ }
-template<class _Tp>
-inline
-_LIBCPP_CONSTEXPR
-shared_ptr<_Tp>::shared_ptr(nullptr_t) _NOEXCEPT
- : __ptr_(nullptr),
- __cntrl_(nullptr)
-{
-}
+ _LIBCPP_HIDE_FROM_ABI
+ void swap(shared_ptr& __r) _NOEXCEPT
+ {
+ _VSTD::swap(__ptr_, __r.__ptr_);
+ _VSTD::swap(__cntrl_, __r.__cntrl_);
+ }
-template<class _Tp>
-template<class _Yp, class _Dp>
-shared_ptr<_Tp>::shared_ptr(_Yp* __p, _Dp __d,
- typename enable_if<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, element_type>::value, __nat>::type)
- : __ptr_(__p)
-{
-#ifndef _LIBCPP_NO_EXCEPTIONS
- try
+ _LIBCPP_HIDE_FROM_ABI
+ void reset() _NOEXCEPT
{
-#endif // _LIBCPP_NO_EXCEPTIONS
- typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
- typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk;
-#ifndef _LIBCPP_CXX03_LANG
- __cntrl_ = new _CntrlBlk(__p, _VSTD::move(__d), _AllocT());
-#else
- __cntrl_ = new _CntrlBlk(__p, __d, _AllocT());
-#endif // not _LIBCPP_CXX03_LANG
- __enable_weak_this(__p, __p);
-#ifndef _LIBCPP_NO_EXCEPTIONS
+ shared_ptr().swap(*this);
}
- catch (...)
+
+ template<class _Yp, class = __enable_if_t<
+ __compatible_with<_Yp, _Tp>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ void reset(_Yp* __p)
{
- __d(__p);
- throw;
+ shared_ptr(__p).swap(*this);
}
-#endif // _LIBCPP_NO_EXCEPTIONS
-}
-template<class _Tp>
-template<class _Dp>
-shared_ptr<_Tp>::shared_ptr(nullptr_t __p, _Dp __d)
- : __ptr_(nullptr)
-{
-#ifndef _LIBCPP_NO_EXCEPTIONS
- try
+ template<class _Yp, class _Dp, class = __enable_if_t<
+ __compatible_with<_Yp, _Tp>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ void reset(_Yp* __p, _Dp __d)
{
-#endif // _LIBCPP_NO_EXCEPTIONS
- typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT;
- typedef __shared_ptr_pointer<nullptr_t, _Dp, _AllocT > _CntrlBlk;
-#ifndef _LIBCPP_CXX03_LANG
- __cntrl_ = new _CntrlBlk(__p, _VSTD::move(__d), _AllocT());
-#else
- __cntrl_ = new _CntrlBlk(__p, __d, _AllocT());
-#endif // not _LIBCPP_CXX03_LANG
-#ifndef _LIBCPP_NO_EXCEPTIONS
+ shared_ptr(__p, __d).swap(*this);
}
- catch (...)
+
+ template<class _Yp, class _Dp, class _Alloc, class = __enable_if_t<
+ __compatible_with<_Yp, _Tp>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ void reset(_Yp* __p, _Dp __d, _Alloc __a)
{
- __d(__p);
- throw;
+ shared_ptr(__p, __d, __a).swap(*this);
}
-#endif // _LIBCPP_NO_EXCEPTIONS
-}
-template<class _Tp>
-template<class _Yp, class _Dp, class _Alloc>
-shared_ptr<_Tp>::shared_ptr(_Yp* __p, _Dp __d, _Alloc __a,
- typename enable_if<__shared_ptr_deleter_ctor_reqs<_Dp, _Yp, element_type>::value, __nat>::type)
- : __ptr_(__p)
-{
-#ifndef _LIBCPP_NO_EXCEPTIONS
- try
+ _LIBCPP_HIDE_FROM_ABI
+ element_type* get() const _NOEXCEPT
{
-#endif // _LIBCPP_NO_EXCEPTIONS
- typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk;
- typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
- typedef __allocator_destructor<_A2> _D2;
- _A2 __a2(__a);
- unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
- ::new ((void*)_VSTD::addressof(*__hold2.get()))
-#ifndef _LIBCPP_CXX03_LANG
- _CntrlBlk(__p, _VSTD::move(__d), __a);
-#else
- _CntrlBlk(__p, __d, __a);
-#endif // not _LIBCPP_CXX03_LANG
- __cntrl_ = _VSTD::addressof(*__hold2.release());
- __enable_weak_this(__p, __p);
-#ifndef _LIBCPP_NO_EXCEPTIONS
+ return __ptr_;
}
- catch (...)
+
+ _LIBCPP_HIDE_FROM_ABI
+ typename add_lvalue_reference<element_type>::type operator*() const _NOEXCEPT
{
- __d(__p);
- throw;
+ return *__ptr_;
}
-#endif // _LIBCPP_NO_EXCEPTIONS
-}
-template<class _Tp>
-template<class _Dp, class _Alloc>
-shared_ptr<_Tp>::shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a)
- : __ptr_(nullptr)
-{
-#ifndef _LIBCPP_NO_EXCEPTIONS
- try
+ _LIBCPP_HIDE_FROM_ABI
+ element_type* operator->() const _NOEXCEPT
{
-#endif // _LIBCPP_NO_EXCEPTIONS
- typedef __shared_ptr_pointer<nullptr_t, _Dp, _Alloc> _CntrlBlk;
- typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2;
- typedef __allocator_destructor<_A2> _D2;
- _A2 __a2(__a);
- unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1));
- ::new ((void*)_VSTD::addressof(*__hold2.get()))
-#ifndef _LIBCPP_CXX03_LANG
- _CntrlBlk(__p, _VSTD::move(__d), __a);
-#else
- _CntrlBlk(__p, __d, __a);
-#endif // not _LIBCPP_CXX03_LANG
- __cntrl_ = _VSTD::addressof(*__hold2.release());
-#ifndef _LIBCPP_NO_EXCEPTIONS
+ static_assert(!is_array<_Tp>::value,
+ "std::shared_ptr<T>::operator-> is only valid when T is not an array type.");
+ return __ptr_;
}
- catch (...)
+
+ _LIBCPP_HIDE_FROM_ABI
+ long use_count() const _NOEXCEPT
{
- __d(__p);
- throw;
+ return __cntrl_ ? __cntrl_->use_count() : 0;
}
-#endif // _LIBCPP_NO_EXCEPTIONS
-}
-template<class _Tp>
-template<class _Yp>
-inline
-shared_ptr<_Tp>::shared_ptr(const shared_ptr<_Yp>& __r, element_type *__p) _NOEXCEPT
- : __ptr_(__p),
- __cntrl_(__r.__cntrl_)
-{
- if (__cntrl_)
- __cntrl_->__add_shared();
-}
+ _LIBCPP_HIDE_FROM_ABI
+ bool unique() const _NOEXCEPT
+ {
+ return use_count() == 1;
+ }
-template<class _Tp>
-inline
-shared_ptr<_Tp>::shared_ptr(const shared_ptr& __r) _NOEXCEPT
- : __ptr_(__r.__ptr_),
- __cntrl_(__r.__cntrl_)
-{
- if (__cntrl_)
- __cntrl_->__add_shared();
-}
+ _LIBCPP_HIDE_FROM_ABI
+ explicit operator bool() const _NOEXCEPT
+ {
+ return get() != nullptr;
+ }
-template<class _Tp>
-template<class _Yp>
-inline
-shared_ptr<_Tp>::shared_ptr(const shared_ptr<_Yp>& __r,
- typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type)
- _NOEXCEPT
- : __ptr_(__r.__ptr_),
- __cntrl_(__r.__cntrl_)
-{
- if (__cntrl_)
- __cntrl_->__add_shared();
-}
+ template <class _Up>
+ _LIBCPP_HIDE_FROM_ABI
+ bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT
+ {
+ return __cntrl_ < __p.__cntrl_;
+ }
-template<class _Tp>
-inline
-shared_ptr<_Tp>::shared_ptr(shared_ptr&& __r) _NOEXCEPT
- : __ptr_(__r.__ptr_),
- __cntrl_(__r.__cntrl_)
-{
- __r.__ptr_ = nullptr;
- __r.__cntrl_ = nullptr;
-}
+ template <class _Up>
+ _LIBCPP_HIDE_FROM_ABI
+ bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT
+ {
+ return __cntrl_ < __p.__cntrl_;
+ }
-template<class _Tp>
-template<class _Yp>
-inline
-shared_ptr<_Tp>::shared_ptr(shared_ptr<_Yp>&& __r,
- typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type)
- _NOEXCEPT
- : __ptr_(__r.__ptr_),
- __cntrl_(__r.__cntrl_)
-{
- __r.__ptr_ = nullptr;
- __r.__cntrl_ = nullptr;
-}
+ _LIBCPP_HIDE_FROM_ABI
+ bool __owner_equivalent(const shared_ptr& __p) const
+ {
+ return __cntrl_ == __p.__cntrl_;
+ }
-#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR)
-template<class _Tp>
-template<class _Yp>
-shared_ptr<_Tp>::shared_ptr(auto_ptr<_Yp>&& __r,
- typename enable_if<is_convertible<_Yp*, element_type*>::value, __nat>::type)
- : __ptr_(__r.get())
-{
- typedef __shared_ptr_pointer<_Yp*, default_delete<_Yp>, allocator<_Yp> > _CntrlBlk;
- __cntrl_ = new _CntrlBlk(__r.get(), default_delete<_Yp>(), allocator<_Yp>());
- __enable_weak_this(__r.get(), __r.get());
- __r.release();
-}
+#if _LIBCPP_STD_VER > 14
+ _LIBCPP_HIDE_FROM_ABI
+ typename add_lvalue_reference<element_type>::type operator[](ptrdiff_t __i) const
+ {
+ static_assert(is_array<_Tp>::value,
+ "std::shared_ptr<T>::operator[] is only valid when T is an array type.");
+ return __ptr_[__i];
+ }
#endif
-template<class _Tp>
-template <class _Yp, class _Dp>
-shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r,
- typename enable_if
- <
- !is_lvalue_reference<_Dp>::value &&
- is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
- __nat
- >::type)
- : __ptr_(__r.get())
-{
-#if _LIBCPP_STD_VER > 11
- if (__ptr_ == nullptr)
- __cntrl_ = nullptr;
- else
-#endif
+#ifndef _LIBCPP_NO_RTTI
+ template <class _Dp>
+ _LIBCPP_HIDE_FROM_ABI
+ _Dp* __get_deleter() const _NOEXCEPT
{
- typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
- typedef __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer, _Dp, _AllocT > _CntrlBlk;
- __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), _AllocT());
- __enable_weak_this(__r.get(), __r.get());
+ return static_cast<_Dp*>(__cntrl_
+ ? const_cast<void *>(__cntrl_->__get_deleter(typeid(_Dp)))
+ : nullptr);
}
- __r.release();
-}
+#endif // _LIBCPP_NO_RTTI
-template<class _Tp>
-template <class _Yp, class _Dp>
-shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r,
- typename enable_if
- <
- is_lvalue_reference<_Dp>::value &&
- is_convertible<typename unique_ptr<_Yp, _Dp>::pointer, element_type*>::value,
- __nat
- >::type)
- : __ptr_(__r.get())
-{
-#if _LIBCPP_STD_VER > 11
- if (__ptr_ == nullptr)
- __cntrl_ = nullptr;
- else
-#endif
+ template<class _Yp, class _CntrlBlk>
+ _LIBCPP_HIDE_FROM_ABI
+ static shared_ptr<_Tp> __create_with_control_block(_Yp* __p, _CntrlBlk* __cntrl) _NOEXCEPT
{
- typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT;
- typedef __shared_ptr_pointer<typename unique_ptr<_Yp, _Dp>::pointer,
- reference_wrapper<typename remove_reference<_Dp>::type>,
- _AllocT > _CntrlBlk;
- __cntrl_ = new _CntrlBlk(__r.get(), _VSTD::ref(__r.get_deleter()), _AllocT());
- __enable_weak_this(__r.get(), __r.get());
+ shared_ptr<_Tp> __r;
+ __r.__ptr_ = __p;
+ __r.__cntrl_ = __cntrl;
+ __r.__enable_weak_this(__r.__ptr_, __r.__ptr_);
+ return __r;
}
- __r.release();
-}
-
-template<class _Tp>
-shared_ptr<_Tp>::~shared_ptr()
-{
- if (__cntrl_)
- __cntrl_->__release_shared();
-}
-
-template<class _Tp>
-inline
-shared_ptr<_Tp>&
-shared_ptr<_Tp>::operator=(const shared_ptr& __r) _NOEXCEPT
-{
- shared_ptr(__r).swap(*this);
- return *this;
-}
-
-template<class _Tp>
-template<class _Yp>
-inline
-typename enable_if
-<
- __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value,
- shared_ptr<_Tp>&
->::type
-shared_ptr<_Tp>::operator=(const shared_ptr<_Yp>& __r) _NOEXCEPT
-{
- shared_ptr(__r).swap(*this);
- return *this;
-}
-template<class _Tp>
-inline
-shared_ptr<_Tp>&
-shared_ptr<_Tp>::operator=(shared_ptr&& __r) _NOEXCEPT
-{
- shared_ptr(_VSTD::move(__r)).swap(*this);
- return *this;
-}
+private:
+ template <class _Yp, bool = is_function<_Yp>::value>
+ struct __shared_ptr_default_allocator
+ {
+ typedef allocator<_Yp> type;
+ };
-template<class _Tp>
-template<class _Yp>
-inline
-typename enable_if
-<
- __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value,
- shared_ptr<_Tp>&
->::type
-shared_ptr<_Tp>::operator=(shared_ptr<_Yp>&& __r)
-{
- shared_ptr(_VSTD::move(__r)).swap(*this);
- return *this;
-}
+ template <class _Yp>
+ struct __shared_ptr_default_allocator<_Yp, true>
+ {
+ typedef allocator<__shared_ptr_dummy_rebind_allocator_type> type;
+ };
-#if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR)
-template<class _Tp>
-template<class _Yp>
-inline
-typename enable_if
-<
- !is_array<_Yp>::value &&
- is_convertible<_Yp*, typename shared_ptr<_Tp>::element_type*>::value,
- shared_ptr<_Tp>
->::type&
-shared_ptr<_Tp>::operator=(auto_ptr<_Yp>&& __r)
-{
- shared_ptr(_VSTD::move(__r)).swap(*this);
- return *this;
-}
-#endif
+ template <class _Yp, class _OrigPtr, class = __enable_if_t<
+ is_convertible<_OrigPtr*, const enable_shared_from_this<_Yp>*>::value
+ > >
+ _LIBCPP_HIDE_FROM_ABI
+ void __enable_weak_this(const enable_shared_from_this<_Yp>* __e, _OrigPtr* __ptr) _NOEXCEPT
+ {
+ typedef typename remove_cv<_Yp>::type _RawYp;
+ if (__e && __e->__weak_this_.expired())
+ {
+ __e->__weak_this_ = shared_ptr<_RawYp>(*this,
+ const_cast<_RawYp*>(static_cast<const _Yp*>(__ptr)));
+ }
+ }
-template<class _Tp>
-template <class _Yp, class _Dp>
-inline
-typename enable_if
-<
- is_convertible<typename unique_ptr<_Yp, _Dp>::pointer,
- typename shared_ptr<_Tp>::element_type*>::value,
- shared_ptr<_Tp>&
->::type
-shared_ptr<_Tp>::operator=(unique_ptr<_Yp, _Dp>&& __r)
-{
- shared_ptr(_VSTD::move(__r)).swap(*this);
- return *this;
-}
+ _LIBCPP_HIDE_FROM_ABI void __enable_weak_this(...) _NOEXCEPT { }
-template<class _Tp>
-inline
-void
-shared_ptr<_Tp>::swap(shared_ptr& __r) _NOEXCEPT
-{
- _VSTD::swap(__ptr_, __r.__ptr_);
- _VSTD::swap(__cntrl_, __r.__cntrl_);
-}
+ template <class, class _Yp>
+ struct __shared_ptr_default_delete
+ : default_delete<_Yp>
+ { };
-template<class _Tp>
-inline
-void
-shared_ptr<_Tp>::reset() _NOEXCEPT
-{
- shared_ptr().swap(*this);
-}
+ template <class _Yp, class _Un, size_t _Sz>
+ struct __shared_ptr_default_delete<_Yp[_Sz], _Un>
+ : default_delete<_Yp[]>
+ { };
-template<class _Tp>
-template<class _Yp>
-inline
-typename enable_if
-<
- __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value,
- void
->::type
-shared_ptr<_Tp>::reset(_Yp* __p)
-{
- shared_ptr(__p).swap(*this);
-}
+ template <class _Yp, class _Un>
+ struct __shared_ptr_default_delete<_Yp[], _Un>
+ : default_delete<_Yp[]>
+ { };
-template<class _Tp>
-template<class _Yp, class _Dp>
-inline
-typename enable_if
-<
- __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value,
- void
->::type
-shared_ptr<_Tp>::reset(_Yp* __p, _Dp __d)
-{
- shared_ptr(__p, __d).swap(*this);
-}
+ template <class _Up> friend class _LIBCPP_TEMPLATE_VIS shared_ptr;
+ template <class _Up> friend class _LIBCPP_TEMPLATE_VIS weak_ptr;
+};
+#if _LIBCPP_STD_VER > 14
template<class _Tp>
-template<class _Yp, class _Dp, class _Alloc>
-inline
-typename enable_if
-<
- __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value,
- void
->::type
-shared_ptr<_Tp>::reset(_Yp* __p, _Dp __d, _Alloc __a)
-{
- shared_ptr(__p, __d, __a).swap(*this);
-}
+shared_ptr(weak_ptr<_Tp>) -> shared_ptr<_Tp>;
+template<class _Tp, class _Dp>
+shared_ptr(unique_ptr<_Tp, _Dp>) -> shared_ptr<_Tp>;
+#endif
//
// std::allocate_shared and std::make_shared
//
-template<class _Tp, class _Alloc, class ..._Args, class = _EnableIf<!is_array<_Tp>::value> >
+template<class _Tp, class _Alloc, class ..._Args, class = __enable_if_t<!is_array<_Tp>::value> >
_LIBCPP_HIDE_FROM_ABI
shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&& ...__args)
{
@@ -1108,7 +956,7 @@ shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&& ...__args)
return shared_ptr<_Tp>::__create_with_control_block((*__control_block).__get_elem(), _VSTD::addressof(*__control_block));
}
-template<class _Tp, class ..._Args, class = _EnableIf<!is_array<_Tp>::value> >
+template<class _Tp, class ..._Args, class = __enable_if_t<!is_array<_Tp>::value> >
_LIBCPP_HIDE_FROM_ABI
shared_ptr<_Tp> make_shared(_Args&& ...__args)
{
@@ -1326,7 +1174,12 @@ template<class _Tp>
class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS weak_ptr
{
public:
+#if _LIBCPP_STD_VER > 14
+ typedef remove_extent_t<_Tp> element_type;
+#else
typedef _Tp element_type;
+#endif
+
private:
element_type* __ptr_;
__shared_weak_count* __cntrl_;
@@ -1335,18 +1188,18 @@ public:
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR weak_ptr() _NOEXCEPT;
template<class _Yp> _LIBCPP_INLINE_VISIBILITY weak_ptr(shared_ptr<_Yp> const& __r,
- typename enable_if<is_convertible<_Yp*, _Tp*>::value, __nat*>::type = 0)
+ typename enable_if<__compatible_with<_Yp, _Tp>::value, __nat*>::type = 0)
_NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
weak_ptr(weak_ptr const& __r) _NOEXCEPT;
template<class _Yp> _LIBCPP_INLINE_VISIBILITY weak_ptr(weak_ptr<_Yp> const& __r,
- typename enable_if<is_convertible<_Yp*, _Tp*>::value, __nat*>::type = 0)
+ typename enable_if<__compatible_with<_Yp, _Tp>::value, __nat*>::type = 0)
_NOEXCEPT;
_LIBCPP_INLINE_VISIBILITY
weak_ptr(weak_ptr&& __r) _NOEXCEPT;
template<class _Yp> _LIBCPP_INLINE_VISIBILITY weak_ptr(weak_ptr<_Yp>&& __r,
- typename enable_if<is_convertible<_Yp*, _Tp*>::value, __nat*>::type = 0)
+ typename enable_if<__compatible_with<_Yp, _Tp>::value, __nat*>::type = 0)
_NOEXCEPT;
~weak_ptr();
@@ -1355,7 +1208,7 @@ public:
template<class _Yp>
typename enable_if
<
- is_convertible<_Yp*, element_type*>::value,
+ __compatible_with<_Yp, _Tp>::value,
weak_ptr&
>::type
_LIBCPP_INLINE_VISIBILITY
@@ -1366,7 +1219,7 @@ public:
template<class _Yp>
typename enable_if
<
- is_convertible<_Yp*, element_type*>::value,
+ __compatible_with<_Yp, _Tp>::value,
weak_ptr&
>::type
_LIBCPP_INLINE_VISIBILITY
@@ -1375,7 +1228,7 @@ public:
template<class _Yp>
typename enable_if
<
- is_convertible<_Yp*, element_type*>::value,
+ __compatible_with<_Yp, _Tp>::value,
weak_ptr&
>::type
_LIBCPP_INLINE_VISIBILITY
@@ -1406,7 +1259,7 @@ public:
template <class _Up> friend class _LIBCPP_TEMPLATE_VIS shared_ptr;
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER > 14
template<class _Tp>
weak_ptr(shared_ptr<_Tp>) -> weak_ptr<_Tp>;
#endif
@@ -1434,7 +1287,7 @@ template<class _Tp>
template<class _Yp>
inline
weak_ptr<_Tp>::weak_ptr(shared_ptr<_Yp> const& __r,
- typename enable_if<is_convertible<_Yp*, _Tp*>::value, __nat*>::type)
+ typename enable_if<__compatible_with<_Yp, _Tp>::value, __nat*>::type)
_NOEXCEPT
: __ptr_(__r.__ptr_),
__cntrl_(__r.__cntrl_)
@@ -1447,7 +1300,7 @@ template<class _Tp>
template<class _Yp>
inline
weak_ptr<_Tp>::weak_ptr(weak_ptr<_Yp> const& __r,
- typename enable_if<is_convertible<_Yp*, _Tp*>::value, __nat*>::type)
+ typename enable_if<__compatible_with<_Yp, _Tp>::value, __nat*>::type)
_NOEXCEPT
: __ptr_(__r.__ptr_),
__cntrl_(__r.__cntrl_)
@@ -1470,7 +1323,7 @@ template<class _Tp>
template<class _Yp>
inline
weak_ptr<_Tp>::weak_ptr(weak_ptr<_Yp>&& __r,
- typename enable_if<is_convertible<_Yp*, _Tp*>::value, __nat*>::type)
+ typename enable_if<__compatible_with<_Yp, _Tp>::value, __nat*>::type)
_NOEXCEPT
: __ptr_(__r.__ptr_),
__cntrl_(__r.__cntrl_)
@@ -1500,7 +1353,7 @@ template<class _Yp>
inline
typename enable_if
<
- is_convertible<_Yp*, _Tp*>::value,
+ __compatible_with<_Yp, _Tp>::value,
weak_ptr<_Tp>&
>::type
weak_ptr<_Tp>::operator=(weak_ptr<_Yp> const& __r) _NOEXCEPT
@@ -1523,7 +1376,7 @@ template<class _Yp>
inline
typename enable_if
<
- is_convertible<_Yp*, _Tp*>::value,
+ __compatible_with<_Yp, _Tp>::value,
weak_ptr<_Tp>&
>::type
weak_ptr<_Tp>::operator=(weak_ptr<_Yp>&& __r) _NOEXCEPT
@@ -1537,7 +1390,7 @@ template<class _Yp>
inline
typename enable_if
<
- is_convertible<_Yp*, _Tp*>::value,
+ __compatible_with<_Yp, _Tp>::value,
weak_ptr<_Tp>&
>::type
weak_ptr<_Tp>::operator=(shared_ptr<_Yp> const& __r) _NOEXCEPT
@@ -1572,17 +1425,6 @@ weak_ptr<_Tp>::reset() _NOEXCEPT
}
template<class _Tp>
-template<class _Yp>
-shared_ptr<_Tp>::shared_ptr(const weak_ptr<_Yp>& __r,
- typename enable_if<is_convertible<_Yp*, element_type*>::value, __nat>::type)
- : __ptr_(__r.__ptr_),
- __cntrl_(__r.__cntrl_ ? __r.__cntrl_->lock() : __r.__cntrl_)
-{
- if (__cntrl_ == nullptr)
- __throw_bad_weak_ptr();
-}
-
-template<class _Tp>
shared_ptr<_Tp>
weak_ptr<_Tp>::lock() const _NOEXCEPT
{
@@ -1730,7 +1572,7 @@ basic_ostream<_CharT, _Traits>&
operator<<(basic_ostream<_CharT, _Traits>& __os, shared_ptr<_Yp> const& __p);
-#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER)
+#if !defined(_LIBCPP_HAS_NO_THREADS)
class _LIBCPP_TYPE_VIS __sp_mut
{
@@ -1870,10 +1712,8 @@ atomic_compare_exchange_weak_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v
return atomic_compare_exchange_weak(__p, __v, __w);
}
-#endif // !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER)
+#endif // !defined(_LIBCPP_HAS_NO_THREADS)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_SHARED_PTR_H
diff --git a/libcxx/include/__memory/temporary_buffer.h b/libcxx/include/__memory/temporary_buffer.h
index 6d1884f722b0..06aa6c2936fe 100644
--- a/libcxx/include/__memory/temporary_buffer.h
+++ b/libcxx/include/__memory/temporary_buffer.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
@@ -84,6 +81,4 @@ struct __return_temporary_buffer
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_TEMPORARY_BUFFER_H
diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h
index 39edabbd8e4f..e83d62e0db08 100644
--- a/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/libcxx/include/__memory/uninitialized_algorithms.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _InputIterator, class _ForwardIterator>
@@ -123,21 +120,6 @@ uninitialized_fill_n(_ForwardIterator __f, _Size __n, const _Tp& __x)
#if _LIBCPP_STD_VER > 14
template <class _ForwardIterator>
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-void destroy(_ForwardIterator __first, _ForwardIterator __last) {
- for (; __first != __last; ++__first)
- _VSTD::destroy_at(_VSTD::addressof(*__first));
-}
-
-template <class _ForwardIterator, class _Size>
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_ForwardIterator destroy_n(_ForwardIterator __first, _Size __n) {
- for (; __n > 0; (void)++__first, --__n)
- _VSTD::destroy_at(_VSTD::addressof(*__first));
- return __first;
-}
-
-template <class _ForwardIterator>
inline _LIBCPP_INLINE_VISIBILITY
void uninitialized_default_construct(_ForwardIterator __first, _ForwardIterator __last) {
using _Vt = typename iterator_traits<_ForwardIterator>::value_type;
@@ -163,7 +145,7 @@ _ForwardIterator uninitialized_default_construct_n(_ForwardIterator __first, _Si
#ifndef _LIBCPP_NO_EXCEPTIONS
try {
#endif
- for (; __n > 0; (void)++__idx, --__n)
+ for (; __n > 0; ++__idx, (void) --__n)
::new ((void*)_VSTD::addressof(*__idx)) _Vt;
return __idx;
#ifndef _LIBCPP_NO_EXCEPTIONS
@@ -201,7 +183,7 @@ _ForwardIterator uninitialized_value_construct_n(_ForwardIterator __first, _Size
#ifndef _LIBCPP_NO_EXCEPTIONS
try {
#endif
- for (; __n > 0; (void)++__idx, --__n)
+ for (; __n > 0; ++__idx, (void) --__n)
::new ((void*)_VSTD::addressof(*__idx)) _Vt();
return __idx;
#ifndef _LIBCPP_NO_EXCEPTIONS
@@ -221,7 +203,7 @@ _ForwardIt uninitialized_move(_InputIt __first, _InputIt __last, _ForwardIt __fi
#ifndef _LIBCPP_NO_EXCEPTIONS
try {
#endif
- for (; __first != __last; (void)++__idx, ++__first)
+ for (; __first != __last; ++__idx, (void) ++__first)
::new ((void*)_VSTD::addressof(*__idx)) _Vt(_VSTD::move(*__first));
return __idx;
#ifndef _LIBCPP_NO_EXCEPTIONS
@@ -241,7 +223,7 @@ uninitialized_move_n(_InputIt __first, _Size __n, _ForwardIt __first_res) {
#ifndef _LIBCPP_NO_EXCEPTIONS
try {
#endif
- for (; __n > 0; ++__idx, (void)++__first, --__n)
+ for (; __n > 0; ++__idx, (void) ++__first, --__n)
::new ((void*)_VSTD::addressof(*__idx)) _Vt(_VSTD::move(*__first));
return {__first, __idx};
#ifndef _LIBCPP_NO_EXCEPTIONS
@@ -256,6 +238,4 @@ uninitialized_move_n(_InputIt __first, _Size __n, _ForwardIt __first_res) {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_UNINITIALIZED_ALGORITHMS_H
diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h
index 083e0a8c250d..838960269c97 100644
--- a/libcxx/include/__memory/unique_ptr.h
+++ b/libcxx/include/__memory/unique_ptr.h
@@ -29,9 +29,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
@@ -122,7 +119,7 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr {
public:
typedef _Tp element_type;
typedef _Dp deleter_type;
- typedef _LIBCPP_NODEBUG_TYPE typename __pointer<_Tp, deleter_type>::type pointer;
+ typedef _LIBCPP_NODEBUG typename __pointer<_Tp, deleter_type>::type pointer;
static_assert(!is_rvalue_reference<deleter_type>::value,
"the specified deleter type cannot be an rvalue reference");
@@ -132,38 +129,38 @@ private:
struct __nat { int __for_bool_; };
- typedef _LIBCPP_NODEBUG_TYPE __unique_ptr_deleter_sfinae<_Dp> _DeleterSFINAE;
+ typedef _LIBCPP_NODEBUG __unique_ptr_deleter_sfinae<_Dp> _DeleterSFINAE;
template <bool _Dummy>
- using _LValRefType _LIBCPP_NODEBUG_TYPE =
+ using _LValRefType _LIBCPP_NODEBUG =
typename __dependent_type<_DeleterSFINAE, _Dummy>::__lval_ref_type;
template <bool _Dummy>
- using _GoodRValRefType _LIBCPP_NODEBUG_TYPE =
+ using _GoodRValRefType _LIBCPP_NODEBUG =
typename __dependent_type<_DeleterSFINAE, _Dummy>::__good_rval_ref_type;
template <bool _Dummy>
- using _BadRValRefType _LIBCPP_NODEBUG_TYPE =
+ using _BadRValRefType _LIBCPP_NODEBUG =
typename __dependent_type<_DeleterSFINAE, _Dummy>::__bad_rval_ref_type;
template <bool _Dummy, class _Deleter = typename __dependent_type<
__identity<deleter_type>, _Dummy>::type>
- using _EnableIfDeleterDefaultConstructible _LIBCPP_NODEBUG_TYPE =
+ using _EnableIfDeleterDefaultConstructible _LIBCPP_NODEBUG =
typename enable_if<is_default_constructible<_Deleter>::value &&
!is_pointer<_Deleter>::value>::type;
template <class _ArgType>
- using _EnableIfDeleterConstructible _LIBCPP_NODEBUG_TYPE =
+ using _EnableIfDeleterConstructible _LIBCPP_NODEBUG =
typename enable_if<is_constructible<deleter_type, _ArgType>::value>::type;
template <class _UPtr, class _Up>
- using _EnableIfMoveConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if<
+ using _EnableIfMoveConvertible _LIBCPP_NODEBUG = typename enable_if<
is_convertible<typename _UPtr::pointer, pointer>::value &&
!is_array<_Up>::value
>::type;
template <class _UDel>
- using _EnableIfDeleterConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if<
+ using _EnableIfDeleterConvertible _LIBCPP_NODEBUG = typename enable_if<
(is_reference<_Dp>::value && is_same<_Dp, _UDel>::value) ||
(!is_reference<_Dp>::value && is_convertible<_UDel, _Dp>::value)
>::type;
@@ -350,35 +347,35 @@ private:
typedef __unique_ptr_deleter_sfinae<_Dp> _DeleterSFINAE;
template <bool _Dummy>
- using _LValRefType _LIBCPP_NODEBUG_TYPE =
+ using _LValRefType _LIBCPP_NODEBUG =
typename __dependent_type<_DeleterSFINAE, _Dummy>::__lval_ref_type;
template <bool _Dummy>
- using _GoodRValRefType _LIBCPP_NODEBUG_TYPE =
+ using _GoodRValRefType _LIBCPP_NODEBUG =
typename __dependent_type<_DeleterSFINAE, _Dummy>::__good_rval_ref_type;
template <bool _Dummy>
- using _BadRValRefType _LIBCPP_NODEBUG_TYPE =
+ using _BadRValRefType _LIBCPP_NODEBUG =
typename __dependent_type<_DeleterSFINAE, _Dummy>::__bad_rval_ref_type;
template <bool _Dummy, class _Deleter = typename __dependent_type<
__identity<deleter_type>, _Dummy>::type>
- using _EnableIfDeleterDefaultConstructible _LIBCPP_NODEBUG_TYPE =
+ using _EnableIfDeleterDefaultConstructible _LIBCPP_NODEBUG =
typename enable_if<is_default_constructible<_Deleter>::value &&
!is_pointer<_Deleter>::value>::type;
template <class _ArgType>
- using _EnableIfDeleterConstructible _LIBCPP_NODEBUG_TYPE =
+ using _EnableIfDeleterConstructible _LIBCPP_NODEBUG =
typename enable_if<is_constructible<deleter_type, _ArgType>::value>::type;
template <class _Pp>
- using _EnableIfPointerConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if<
+ using _EnableIfPointerConvertible _LIBCPP_NODEBUG = typename enable_if<
_CheckArrayPointerConversion<_Pp>::value
>::type;
template <class _UPtr, class _Up,
class _ElemT = typename _UPtr::element_type>
- using _EnableIfMoveConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if<
+ using _EnableIfMoveConvertible _LIBCPP_NODEBUG = typename enable_if<
is_array<_Up>::value &&
is_same<pointer, element_type*>::value &&
is_same<typename _UPtr::pointer, _ElemT*>::value &&
@@ -386,13 +383,13 @@ private:
>::type;
template <class _UDel>
- using _EnableIfDeleterConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if<
+ using _EnableIfDeleterConvertible _LIBCPP_NODEBUG = typename enable_if<
(is_reference<_Dp>::value && is_same<_Dp, _UDel>::value) ||
(!is_reference<_Dp>::value && is_convertible<_UDel, _Dp>::value)
>::type;
template <class _UDel>
- using _EnableIfDeleterAssignable _LIBCPP_NODEBUG_TYPE = typename enable_if<
+ using _EnableIfDeleterAssignable _LIBCPP_NODEBUG = typename enable_if<
is_assignable<_Dp&, _UDel&&>::value
>::type;
@@ -768,6 +765,4 @@ struct _LIBCPP_TEMPLATE_VIS hash<__enable_hash_helper<
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___MEMORY_UNIQUE_PTR_H
diff --git a/libcxx/include/__memory/uses_allocator.h b/libcxx/include/__memory/uses_allocator.h
index 36e752057545..2e186207408b 100644
--- a/libcxx/include/__memory/uses_allocator.h
+++ b/libcxx/include/__memory/uses_allocator.h
@@ -52,7 +52,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator
#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Alloc>
-_LIBCPP_INLINE_VAR constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Alloc>::value;
+inline constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Alloc>::value;
#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__mutex_base b/libcxx/include/__mutex_base
index 77590a8fd4f1..da2967164a68 100644
--- a/libcxx/include/__mutex_base
+++ b/libcxx/include/__mutex_base
@@ -68,9 +68,9 @@ extern _LIBCPP_EXPORTED_FROM_ABI const adopt_lock_t adopt_lock;
#else
-/* _LIBCPP_INLINE_VAR */ constexpr defer_lock_t defer_lock = defer_lock_t();
-/* _LIBCPP_INLINE_VAR */ constexpr try_to_lock_t try_to_lock = try_to_lock_t();
-/* _LIBCPP_INLINE_VAR */ constexpr adopt_lock_t adopt_lock = adopt_lock_t();
+/* inline */ constexpr defer_lock_t defer_lock = defer_lock_t();
+/* inline */ constexpr try_to_lock_t try_to_lock = try_to_lock_t();
+/* inline */ constexpr adopt_lock_t adopt_lock = adopt_lock_t();
#endif
@@ -96,8 +96,8 @@ public:
~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) {__m_.unlock();}
private:
- lock_guard(lock_guard const&) _LIBCPP_EQUAL_DELETE;
- lock_guard& operator=(lock_guard const&) _LIBCPP_EQUAL_DELETE;
+ lock_guard(lock_guard const&) = delete;
+ lock_guard& operator=(lock_guard const&) = delete;
};
template <class _Mutex>
diff --git a/libcxx/include/__node_handle b/libcxx/include/__node_handle
index f3ffa5e3ac38..f313409bb682 100644
--- a/libcxx/include/__node_handle
+++ b/libcxx/include/__node_handle
@@ -10,6 +10,54 @@
#ifndef _LIBCPP___NODE_HANDLE
#define _LIBCPP___NODE_HANDLE
+/*
+
+template<unspecified>
+class node-handle {
+public:
+ using value_type = see below; // not present for map containers
+ using key_type = see below; // not present for set containers
+ using mapped_type = see below; // not present for set containers
+ using allocator_type = see below;
+
+private:
+ using container_node_type = unspecified; // exposition only
+ using ator_traits = allocator_traits<allocator_type>; // exposition only
+
+ typename ator_traits::template
+ rebind_traits<container_node_type>::pointer ptr_; // exposition only
+ optional<allocator_type> alloc_; // exposition only
+
+public:
+ // [container.node.cons], constructors, copy, and assignment
+ constexpr node-handle() noexcept : ptr_(), alloc_() {}
+ node-handle(node-handle&&) noexcept;
+ node-handle& operator=(node-handle&&);
+
+ // [container.node.dtor], destructor
+ ~node-handle();
+
+ // [container.node.observers], observers
+ value_type& value() const; // not present for map containers
+ key_type& key() const; // not present for set containers
+ mapped_type& mapped() const; // not present for set containers
+
+ allocator_type get_allocator() const;
+ explicit operator bool() const noexcept;
+ [[nodiscard]] bool empty() const noexcept; // nodiscard since C++20
+
+ // [container.node.modifiers], modifiers
+ void swap(node-handle&)
+ noexcept(ator_traits::propagate_on_container_swap::value ||
+ ator_traits::is_always_equal::value);
+
+ friend void swap(node-handle& x, node-handle& y) noexcept(noexcept(x.swap(y))) {
+ x.swap(y);
+ }
+};
+
+*/
+
#include <__config>
#include <__debug>
#include <memory>
@@ -19,9 +67,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -204,6 +249,5 @@ struct _LIBCPP_TEMPLATE_VIS __insert_return_type
#endif // _LIBCPP_STD_VER > 14
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
#endif // _LIBCPP___NODE_HANDLE
diff --git a/libcxx/include/__nullptr b/libcxx/include/__nullptr
index e14751164237..d02be215ef1d 100644
--- a/libcxx/include/__nullptr
+++ b/libcxx/include/__nullptr
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- __nullptr --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h
index add848887c11..b0b89c0eeea3 100644
--- a/libcxx/include/__ranges/access.h
+++ b/libcxx/include/__ranges/access.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ __ranges/access.h ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -13,7 +13,8 @@
#include <__iterator/concepts.h>
#include <__iterator/readable_traits.h>
#include <__ranges/enable_borrowed_range.h>
-#include <__utility/__decay_copy.h>
+#include <__utility/as_const.h>
+#include <__utility/decay_copy.h>
#include <__utility/forward.h>
#include <concepts>
#include <type_traits>
@@ -22,9 +23,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -217,6 +215,4 @@ namespace ranges::inline __cpo {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_ACCESS_H
diff --git a/libcxx/include/__ranges/all.h b/libcxx/include/__ranges/all.h
index d678d3e5d357..affe13ee0862 100644
--- a/libcxx/include/__ranges/all.h
+++ b/libcxx/include/__ranges/all.h
@@ -14,9 +14,10 @@
#include <__iterator/iterator_traits.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
+#include <__ranges/range_adaptor.h>
#include <__ranges/ref_view.h>
#include <__ranges/subrange.h>
-#include <__utility/__decay_copy.h>
+#include <__utility/decay_copy.h>
#include <__utility/declval.h>
#include <__utility/forward.h>
#include <type_traits>
@@ -25,20 +26,17 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
-namespace views {
+namespace ranges::views {
namespace __all {
- struct __fn {
+ struct __fn : __range_adaptor_closure<__fn> {
template<class _Tp>
requires ranges::view<decay_t<_Tp>>
- _LIBCPP_HIDE_FROM_ABI
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
constexpr auto operator()(_Tp&& __t) const
noexcept(noexcept(_VSTD::__decay_copy(_VSTD::forward<_Tp>(__t))))
{
@@ -48,7 +46,7 @@ namespace __all {
template<class _Tp>
requires (!ranges::view<decay_t<_Tp>>) &&
requires (_Tp&& __t) { ranges::ref_view{_VSTD::forward<_Tp>(__t)}; }
- _LIBCPP_HIDE_FROM_ABI
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
constexpr auto operator()(_Tp&& __t) const
noexcept(noexcept(ranges::ref_view{_VSTD::forward<_Tp>(__t)}))
{
@@ -59,7 +57,7 @@ namespace __all {
requires (!ranges::view<decay_t<_Tp>> &&
!requires (_Tp&& __t) { ranges::ref_view{_VSTD::forward<_Tp>(__t)}; } &&
requires (_Tp&& __t) { ranges::subrange{_VSTD::forward<_Tp>(__t)}; })
- _LIBCPP_HIDE_FROM_ABI
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
constexpr auto operator()(_Tp&& __t) const
noexcept(noexcept(ranges::subrange{_VSTD::forward<_Tp>(__t)}))
{
@@ -75,12 +73,10 @@ inline namespace __cpo {
template<ranges::viewable_range _Range>
using all_t = decltype(views::all(declval<_Range>()));
-} // namespace views
+} // namespace ranges::views
#endif // !defined(_LIBCPP_HAS_NO_RANGES)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_ALL_H
diff --git a/libcxx/include/__ranges/common_view.h b/libcxx/include/__ranges/common_view.h
index dab82602f671..a58554f66e50 100644
--- a/libcxx/include/__ranges/common_view.h
+++ b/libcxx/include/__ranges/common_view.h
@@ -16,8 +16,11 @@
#include <__ranges/all.h>
#include <__ranges/concepts.h>
#include <__ranges/enable_borrowed_range.h>
+#include <__ranges/range_adaptor.h>
#include <__ranges/size.h>
#include <__ranges/view_interface.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
#include <concepts>
#include <type_traits>
@@ -25,9 +28,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -102,12 +102,34 @@ common_view(_Range&&)
template<class _View>
inline constexpr bool enable_borrowed_range<common_view<_View>> = enable_borrowed_range<_View>;
+namespace views {
+namespace __common {
+ struct __fn : __range_adaptor_closure<__fn> {
+ template<class _Range>
+ requires common_range<_Range>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range) const
+ noexcept(noexcept(views::all(_VSTD::forward<_Range>(__range))))
+ -> decltype( views::all(_VSTD::forward<_Range>(__range)))
+ { return views::all(_VSTD::forward<_Range>(__range)); }
+
+ template<class _Range>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range) const
+ noexcept(noexcept(common_view{_VSTD::forward<_Range>(__range)}))
+ -> decltype( common_view{_VSTD::forward<_Range>(__range)})
+ { return common_view{_VSTD::forward<_Range>(__range)}; }
+ };
+}
+
+inline namespace __cpo {
+ inline constexpr auto common = __common::__fn{};
+}
+} // namespace views
} // namespace ranges
#endif // !defined(_LIBCPP_HAS_NO_RANGES)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_COMMON_VIEW_H
diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h
index 2f912c2841e8..dc1cece33b8d 100644
--- a/libcxx/include/__ranges/concepts.h
+++ b/libcxx/include/__ranges/concepts.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------- __ranges/concepts.h ----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,9 +27,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// clang-format off
@@ -133,6 +130,4 @@ namespace ranges {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_CONCEPTS_H
diff --git a/libcxx/include/__ranges/copyable_box.h b/libcxx/include/__ranges/copyable_box.h
index f2d3843f79f5..4811690adaec 100644
--- a/libcxx/include/__ranges/copyable_box.h
+++ b/libcxx/include/__ranges/copyable_box.h
@@ -22,9 +22,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -91,6 +88,10 @@ namespace ranges {
_LIBCPP_HIDE_FROM_ABI constexpr _Tp const& operator*() const noexcept { return *__val_; }
_LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() noexcept { return *__val_; }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr const _Tp *operator->() const noexcept { return __val_.operator->(); }
+ _LIBCPP_HIDE_FROM_ABI constexpr _Tp *operator->() noexcept { return __val_.operator->(); }
+
_LIBCPP_HIDE_FROM_ABI constexpr bool __has_value() const noexcept { return __val_.has_value(); }
};
@@ -162,6 +163,10 @@ namespace ranges {
_LIBCPP_HIDE_FROM_ABI constexpr _Tp const& operator*() const noexcept { return __val_; }
_LIBCPP_HIDE_FROM_ABI constexpr _Tp& operator*() noexcept { return __val_; }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr const _Tp *operator->() const noexcept { return _VSTD::addressof(__val_); }
+ _LIBCPP_HIDE_FROM_ABI constexpr _Tp *operator->() noexcept { return _VSTD::addressof(__val_); }
+
_LIBCPP_HIDE_FROM_ABI constexpr bool __has_value() const noexcept { return true; }
};
} // namespace ranges
@@ -170,6 +175,4 @@ namespace ranges {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_COPYABLE_BOX_H
diff --git a/libcxx/include/__ranges/counted.h b/libcxx/include/__ranges/counted.h
new file mode 100644
index 000000000000..d292bcbb1849
--- /dev/null
+++ b/libcxx/include/__ranges/counted.h
@@ -0,0 +1,94 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___RANGES_COUNTED_H
+#define _LIBCPP___RANGES_COUNTED_H
+
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__iterator/counted_iterator.h>
+#include <__iterator/default_sentinel.h>
+#include <__iterator/incrementable_traits.h>
+#include <__iterator/iterator_traits.h>
+#include <__memory/pointer_traits.h>
+#include <__ranges/concepts.h>
+#include <__ranges/subrange.h>
+#include <__utility/decay_copy.h>
+#include <__utility/declval.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
+#include <span>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+namespace ranges::views {
+
+namespace __counted {
+ template<class _From, class _To>
+ concept __explicitly_convertible = requires {
+ _To(_From{});
+ };
+
+ struct __fn {
+ template<class _Iter, class _Diff>
+ requires contiguous_iterator<decay_t<_Iter>> &&
+ __explicitly_convertible<_Diff, iter_difference_t<_Iter>>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Iter&& __it, _Diff __c) const
+ noexcept(noexcept(
+ span(_VSTD::to_address(__it), static_cast<iter_difference_t<_Iter>>(__c))
+ ))
+ {
+ return span(_VSTD::to_address(__it), static_cast<iter_difference_t<_Iter>>(__c));
+ }
+
+ template<class _Iter, class _Diff>
+ requires random_access_iterator<decay_t<_Iter>> &&
+ __explicitly_convertible<_Diff, iter_difference_t<_Iter>>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Iter&& __it, _Diff __c) const
+ noexcept(
+ noexcept(__it + static_cast<iter_difference_t<_Iter>>(__c)) &&
+ noexcept(ranges::subrange(_VSTD::forward<_Iter>(__it), _VSTD::__decay_copy(__it)))
+ )
+ {
+ auto __last = __it + static_cast<iter_difference_t<_Iter>>(__c);
+ return ranges::subrange(_VSTD::forward<_Iter>(__it), _VSTD::move(__last));
+ }
+
+ template<class _Iter, class _Diff>
+ requires __explicitly_convertible<_Diff, iter_difference_t<_Iter>>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Iter&& __it, _Diff __c) const
+ noexcept(noexcept(
+ ranges::subrange(counted_iterator(_VSTD::forward<_Iter>(__it), __c), default_sentinel)
+ ))
+ {
+ return ranges::subrange(counted_iterator(_VSTD::forward<_Iter>(__it), __c), default_sentinel);
+ }
+ };
+}
+
+inline namespace __cpo {
+ inline constexpr auto counted = __counted::__fn{};
+} // namespace __cpo
+
+} // namespace ranges::views
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANGES_COUNTED_H
diff --git a/libcxx/include/__ranges/dangling.h b/libcxx/include/__ranges/dangling.h
index deb02a1a448d..45ff83b205e5 100644
--- a/libcxx/include/__ranges/dangling.h
+++ b/libcxx/include/__ranges/dangling.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -42,6 +39,4 @@ using borrowed_iterator_t = _If<borrowed_range<_Rp>, iterator_t<_Rp>, dangling>;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_DANGLING_H
diff --git a/libcxx/include/__ranges/data.h b/libcxx/include/__ranges/data.h
index dae30984d3f9..7eade494cceb 100644
--- a/libcxx/include/__ranges/data.h
+++ b/libcxx/include/__ranges/data.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ __ranges/data.h ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -22,9 +22,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -71,7 +68,7 @@ namespace __data {
} // end namespace __data
inline namespace __cpo {
- inline constexpr const auto data = __data::__fn{};
+ inline constexpr auto data = __data::__fn{};
} // namespace __cpo
} // namespace ranges
@@ -81,6 +78,4 @@ inline namespace __cpo {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_DATA_H
diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h
index 099fd227111d..6adb09cf677e 100644
--- a/libcxx/include/__ranges/drop_view.h
+++ b/libcxx/include/__ranges/drop_view.h
@@ -10,6 +10,7 @@
#define _LIBCPP___RANGES_DROP_VIEW_H
#include <__config>
+#include <__debug>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/next.h>
@@ -28,9 +29,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -76,7 +74,7 @@ public:
auto __tmp = ranges::next(ranges::begin(__base_), __count_, ranges::end(__base_));
if constexpr (_UseCache)
- __cached_begin_.__set(__tmp);
+ __cached_begin_.__emplace(__tmp);
return __tmp;
}
@@ -126,6 +124,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_DROP_VIEW_H
diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h
index 73892a8c1035..fc6a938fd86e 100644
--- a/libcxx/include/__ranges/empty.h
+++ b/libcxx/include/__ranges/empty.h
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -81,6 +78,4 @@ inline namespace __cpo {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_EMPTY_H
diff --git a/libcxx/include/__ranges/empty_view.h b/libcxx/include/__ranges/empty_view.h
index 7c0f307c8243..4a98a6f324e7 100644
--- a/libcxx/include/__ranges/empty_view.h
+++ b/libcxx/include/__ranges/empty_view.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -41,6 +38,4 @@ namespace ranges {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_EMPTY_VIEW_H
diff --git a/libcxx/include/__ranges/enable_borrowed_range.h b/libcxx/include/__ranges/enable_borrowed_range.h
index 618b2223c716..20b1d42b26c8 100644
--- a/libcxx/include/__ranges/enable_borrowed_range.h
+++ b/libcxx/include/__ranges/enable_borrowed_range.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------ __ranges/enable_borrowed_range.h ------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -20,9 +20,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
@@ -41,6 +38,4 @@ inline constexpr bool enable_borrowed_range = false;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_ENABLE_BORROWED_RANGE_H
diff --git a/libcxx/include/__ranges/enable_view.h b/libcxx/include/__ranges/enable_view.h
index 2628d51ced49..a09de11da81e 100644
--- a/libcxx/include/__ranges/enable_view.h
+++ b/libcxx/include/__ranges/enable_view.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -37,6 +34,4 @@ inline constexpr bool enable_view = derived_from<_Tp, view_base>;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_ENABLE_VIEW_H
diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h
new file mode 100644
index 000000000000..da712b8e6f4f
--- /dev/null
+++ b/libcxx/include/__ranges/iota_view.h
@@ -0,0 +1,408 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___RANGES_IOTA_VIEW_H
+#define _LIBCPP___RANGES_IOTA_VIEW_H
+
+#include <__compare/three_way_comparable.h>
+#include <__concepts/arithmetic.h>
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/copyable.h>
+#include <__concepts/equality_comparable.h>
+#include <__concepts/invocable.h>
+#include <__concepts/same_as.h>
+#include <__concepts/semiregular.h>
+#include <__concepts/totally_ordered.h>
+#include <__config>
+#include <__debug>
+#include <__functional/ranges_operations.h>
+#include <__iterator/concepts.h>
+#include <__iterator/incrementable_traits.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/unreachable_sentinel.h>
+#include <__ranges/copyable_box.h>
+#include <__ranges/enable_borrowed_range.h>
+#include <__ranges/view_interface.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+namespace ranges {
+ template<class _Int>
+ struct __get_wider_signed {
+ static auto __call() {
+ if constexpr (sizeof(_Int) < sizeof(short)) return type_identity<short>{};
+ else if constexpr (sizeof(_Int) < sizeof(int)) return type_identity<int>{};
+ else if constexpr (sizeof(_Int) < sizeof(long)) return type_identity<long>{};
+ else return type_identity<long long>{};
+
+ static_assert(sizeof(_Int) <= sizeof(long long),
+ "Found integer-like type that is bigger than largest integer like type.");
+ }
+
+ using type = typename decltype(__call())::type;
+ };
+
+ template<class _Start>
+ using _IotaDiffT = typename _If<
+ (!integral<_Start> || sizeof(iter_difference_t<_Start>) > sizeof(_Start)),
+ type_identity<iter_difference_t<_Start>>,
+ __get_wider_signed<_Start>
+ >::type;
+
+ template<class _Iter>
+ concept __decrementable = incrementable<_Iter> && requires(_Iter __i) {
+ { --__i } -> same_as<_Iter&>;
+ { __i-- } -> same_as<_Iter>;
+ };
+
+ template<class _Iter>
+ concept __advanceable =
+ __decrementable<_Iter> && totally_ordered<_Iter> &&
+ requires(_Iter __i, const _Iter __j, const _IotaDiffT<_Iter> __n) {
+ { __i += __n } -> same_as<_Iter&>;
+ { __i -= __n } -> same_as<_Iter&>;
+ _Iter(__j + __n);
+ _Iter(__n + __j);
+ _Iter(__j - __n);
+ { __j - __j } -> convertible_to<_IotaDiffT<_Iter>>;
+ };
+
+ template<class>
+ struct __iota_iterator_category {};
+
+ template<incrementable _Tp>
+ struct __iota_iterator_category<_Tp> {
+ using iterator_category = input_iterator_tag;
+ };
+
+ template<weakly_incrementable _Start, semiregular _Bound = unreachable_sentinel_t>
+ requires __weakly_equality_comparable_with<_Start, _Bound> && copyable<_Start>
+ class iota_view : public view_interface<iota_view<_Start, _Bound>> {
+ struct __iterator : public __iota_iterator_category<_Start> {
+ friend class iota_view;
+
+ using iterator_concept =
+ _If<__advanceable<_Start>, random_access_iterator_tag,
+ _If<__decrementable<_Start>, bidirectional_iterator_tag,
+ _If<incrementable<_Start>, forward_iterator_tag,
+ /*Else*/ input_iterator_tag>>>;
+
+ using value_type = _Start;
+ using difference_type = _IotaDiffT<_Start>;
+
+ _Start __value_ = _Start();
+
+ _LIBCPP_HIDE_FROM_ABI
+ __iterator() requires default_initializable<_Start> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit __iterator(_Start __value) : __value_(_VSTD::move(__value)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _Start operator*() const noexcept(is_nothrow_copy_constructible_v<_Start>) {
+ return __value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator& operator++() {
+ ++__value_;
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void operator++(int) { ++*this; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator operator++(int) requires incrementable<_Start> {
+ auto __tmp = *this;
+ ++*this;
+ return __tmp;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator& operator--() requires __decrementable<_Start> {
+ --__value_;
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator operator--(int) requires __decrementable<_Start> {
+ auto __tmp = *this;
+ --*this;
+ return __tmp;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator& operator+=(difference_type __n)
+ requires __advanceable<_Start>
+ {
+ if constexpr (__integer_like<_Start> && !__signed_integer_like<_Start>) {
+ if (__n >= difference_type(0)) {
+ __value_ += static_cast<_Start>(__n);
+ } else {
+ __value_ -= static_cast<_Start>(-__n);
+ }
+ } else {
+ __value_ += __n;
+ }
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator& operator-=(difference_type __n)
+ requires __advanceable<_Start>
+ {
+ if constexpr (__integer_like<_Start> && !__signed_integer_like<_Start>) {
+ if (__n >= difference_type(0)) {
+ __value_ -= static_cast<_Start>(__n);
+ } else {
+ __value_ += static_cast<_Start>(-__n);
+ }
+ } else {
+ __value_ -= __n;
+ }
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _Start operator[](difference_type __n) const
+ requires __advanceable<_Start>
+ {
+ return _Start(__value_ + __n);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(const __iterator& __x, const __iterator& __y)
+ requires equality_comparable<_Start>
+ {
+ return __x.__value_ == __y.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<(const __iterator& __x, const __iterator& __y)
+ requires totally_ordered<_Start>
+ {
+ return __x.__value_ < __y.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>(const __iterator& __x, const __iterator& __y)
+ requires totally_ordered<_Start>
+ {
+ return __y < __x;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator<=(const __iterator& __x, const __iterator& __y)
+ requires totally_ordered<_Start>
+ {
+ return !(__y < __x);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator>=(const __iterator& __x, const __iterator& __y)
+ requires totally_ordered<_Start>
+ {
+ return !(__x < __y);
+ }
+
+ friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y)
+ requires totally_ordered<_Start> && three_way_comparable<_Start>
+ {
+ return __x.__value_ <=> __y.__value_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr __iterator operator+(__iterator __i, difference_type __n)
+ requires __advanceable<_Start>
+ {
+ __i += __n;
+ return __i;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr __iterator operator+(difference_type __n, __iterator __i)
+ requires __advanceable<_Start>
+ {
+ return __i + __n;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr __iterator operator-(__iterator __i, difference_type __n)
+ requires __advanceable<_Start>
+ {
+ __i -= __n;
+ return __i;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y)
+ requires __advanceable<_Start>
+ {
+ if constexpr (__integer_like<_Start>) {
+ if constexpr (__signed_integer_like<_Start>) {
+ return difference_type(difference_type(__x.__value_) - difference_type(__y.__value_));
+ }
+ if (__y.__value_ > __x.__value_) {
+ return difference_type(-difference_type(__y.__value_ - __x.__value_));
+ }
+ return difference_type(__x.__value_ - __y.__value_);
+ }
+ return __x.__value_ - __y.__value_;
+ }
+ };
+
+ struct __sentinel {
+ friend class iota_view;
+
+ private:
+ _Bound __bound_ = _Bound();
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ __sentinel() = default;
+ constexpr explicit __sentinel(_Bound __bound) : __bound_(_VSTD::move(__bound)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(const __iterator& __x, const __sentinel& __y) {
+ return __x.__value_ == __y.__bound_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr iter_difference_t<_Start> operator-(const __iterator& __x, const __sentinel& __y)
+ requires sized_sentinel_for<_Bound, _Start>
+ {
+ return __x.__value_ - __y.__bound_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr iter_difference_t<_Start> operator-(const __sentinel& __x, const __iterator& __y)
+ requires sized_sentinel_for<_Bound, _Start>
+ {
+ return -(__y - __x);
+ }
+ };
+
+ _Start __value_ = _Start();
+ _Bound __bound_ = _Bound();
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ iota_view() requires default_initializable<_Start> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit iota_view(_Start __value) : __value_(_VSTD::move(__value)) { }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr iota_view(type_identity_t<_Start> __value, type_identity_t<_Bound> __bound)
+ : __value_(_VSTD::move(__value)), __bound_(_VSTD::move(__bound)) {
+ // Validate the precondition if possible.
+ if constexpr (totally_ordered_with<_Start, _Bound>) {
+ _LIBCPP_ASSERT(ranges::less_equal()(__value_, __bound_),
+ "Precondition violated: value is greater than bound.");
+ }
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr iota_view(__iterator __first, __iterator __last)
+ requires same_as<_Start, _Bound>
+ : iota_view(_VSTD::move(__first.__value_), _VSTD::move(__last.__value_)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr iota_view(__iterator __first, _Bound __last)
+ requires same_as<_Bound, unreachable_sentinel_t>
+ : iota_view(_VSTD::move(__first.__value_), _VSTD::move(__last)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr iota_view(__iterator __first, __sentinel __last)
+ requires (!same_as<_Start, _Bound> && !same_as<_Start, unreachable_sentinel_t>)
+ : iota_view(_VSTD::move(__first.__value_), _VSTD::move(__last.__bound_)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator begin() const { return __iterator{__value_}; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto end() const {
+ if constexpr (same_as<_Bound, unreachable_sentinel_t>)
+ return unreachable_sentinel;
+ else
+ return __sentinel{__bound_};
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator end() const requires same_as<_Start, _Bound> {
+ return __iterator{__bound_};
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto size() const
+ requires (same_as<_Start, _Bound> && __advanceable<_Start>) ||
+ (integral<_Start> && integral<_Bound>) ||
+ sized_sentinel_for<_Bound, _Start>
+ {
+ if constexpr (__integer_like<_Start> && __integer_like<_Bound>) {
+ if (__value_ < 0) {
+ if (__bound_ < 0) {
+ return _VSTD::__to_unsigned_like(-__value_) - _VSTD::__to_unsigned_like(-__bound_);
+ }
+ return _VSTD::__to_unsigned_like(__bound_) + _VSTD::__to_unsigned_like(-__value_);
+ }
+ return _VSTD::__to_unsigned_like(__bound_) - _VSTD::__to_unsigned_like(__value_);
+ }
+ return _VSTD::__to_unsigned_like(__bound_ - __value_);
+ }
+ };
+
+ template<class _Start, class _Bound>
+ requires (!__integer_like<_Start> || !__integer_like<_Bound> ||
+ (__signed_integer_like<_Start> == __signed_integer_like<_Bound>))
+ iota_view(_Start, _Bound) -> iota_view<_Start, _Bound>;
+
+ template<class _Start, class _Bound>
+ inline constexpr bool enable_borrowed_range<iota_view<_Start, _Bound>> = true;
+
+namespace views {
+namespace __iota {
+ struct __fn {
+ template<class _Start>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Start&& __start) const
+ noexcept(noexcept(ranges::iota_view(_VSTD::forward<_Start>(__start))))
+ -> decltype( ranges::iota_view(_VSTD::forward<_Start>(__start)))
+ { return ranges::iota_view(_VSTD::forward<_Start>(__start)); }
+
+ template<class _Start, class _Bound>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Start&& __start, _Bound&& __bound) const
+ noexcept(noexcept(ranges::iota_view(_VSTD::forward<_Start>(__start), _VSTD::forward<_Bound>(__bound))))
+ -> decltype( ranges::iota_view(_VSTD::forward<_Start>(__start), _VSTD::forward<_Bound>(__bound)))
+ { return ranges::iota_view(_VSTD::forward<_Start>(__start), _VSTD::forward<_Bound>(__bound)); }
+ };
+} // namespace __iota
+
+inline namespace __cpo {
+ inline constexpr auto iota = __iota::__fn{};
+}
+} // namespace views
+} // namespace ranges
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANGES_IOTA_VIEW_H
diff --git a/libcxx/include/__ranges/join_view.h b/libcxx/include/__ranges/join_view.h
new file mode 100644
index 000000000000..9aa69da76cf0
--- /dev/null
+++ b/libcxx/include/__ranges/join_view.h
@@ -0,0 +1,350 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___RANGES_JOIN_VIEW_H
+#define _LIBCPP___RANGES_JOIN_VIEW_H
+
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__iterator/iterator_traits.h>
+#include <__ranges/access.h>
+#include <__ranges/all.h>
+#include <__ranges/concepts.h>
+#include <__ranges/non_propagating_cache.h>
+#include <__ranges/ref_view.h>
+#include <__ranges/subrange.h>
+#include <__ranges/view_interface.h>
+#include <__utility/declval.h>
+#include <__utility/forward.h>
+#include <optional>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+namespace ranges {
+ template<class>
+ struct __join_view_iterator_category {};
+
+ template<class _View>
+ requires is_reference_v<range_reference_t<_View>> &&
+ forward_range<_View> &&
+ forward_range<range_reference_t<_View>>
+ struct __join_view_iterator_category<_View> {
+ using _OuterC = typename iterator_traits<iterator_t<_View>>::iterator_category;
+ using _InnerC = typename iterator_traits<iterator_t<range_reference_t<_View>>>::iterator_category;
+
+ using iterator_category = _If<
+ derived_from<_OuterC, bidirectional_iterator_tag> && derived_from<_InnerC, bidirectional_iterator_tag>,
+ bidirectional_iterator_tag,
+ _If<
+ derived_from<_OuterC, forward_iterator_tag> && derived_from<_InnerC, forward_iterator_tag>,
+ forward_iterator_tag,
+ input_iterator_tag
+ >
+ >;
+ };
+
+ template<input_range _View>
+ requires view<_View> && input_range<range_reference_t<_View>>
+ class join_view
+ : public view_interface<join_view<_View>> {
+ private:
+ using _InnerRange = range_reference_t<_View>;
+
+ template<bool> struct __iterator;
+ template<bool> struct __sentinel;
+
+ static constexpr bool _UseCache = !is_reference_v<_InnerRange>;
+ using _Cache = _If<_UseCache, __non_propagating_cache<remove_cvref_t<_InnerRange>>, __empty_cache>;
+ [[no_unique_address]] _Cache __cache_;
+ _View __base_ = _View(); // TODO: [[no_unique_address]] makes clang crash! File a bug :)
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ join_view() requires default_initializable<_View> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit join_view(_View __base)
+ : __base_(_VSTD::move(__base)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _View base() const& requires copy_constructible<_View> { return __base_; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _View base() && { return _VSTD::move(__base_); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto begin() {
+ constexpr bool __use_const = __simple_view<_View> &&
+ is_reference_v<range_reference_t<_View>>;
+ return __iterator<__use_const>{*this, ranges::begin(__base_)};
+ }
+
+ template<class _V2 = _View>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto begin() const
+ requires input_range<const _V2> &&
+ is_reference_v<range_reference_t<const _V2>>
+ {
+ return __iterator<true>{*this, ranges::begin(__base_)};
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto end() {
+ if constexpr (forward_range<_View> &&
+ is_reference_v<_InnerRange> &&
+ forward_range<_InnerRange> &&
+ common_range<_View> &&
+ common_range<_InnerRange>)
+ return __iterator<__simple_view<_View>>{*this, ranges::end(__base_)};
+ else
+ return __sentinel<__simple_view<_View>>{*this};
+ }
+
+ template<class _V2 = _View>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto end() const
+ requires input_range<const _V2> &&
+ is_reference_v<range_reference_t<const _V2>>
+ {
+ using _ConstInnerRange = range_reference_t<const _View>;
+ if constexpr (forward_range<const _View> &&
+ is_reference_v<_ConstInnerRange> &&
+ forward_range<_ConstInnerRange> &&
+ common_range<const _View> &&
+ common_range<_ConstInnerRange>) {
+ return __iterator<true>{*this, ranges::end(__base_)};
+ } else {
+ return __sentinel<true>{*this};
+ }
+ }
+ };
+
+ template<input_range _View>
+ requires view<_View> && input_range<range_reference_t<_View>>
+ template<bool _Const> struct join_view<_View>::__sentinel {
+ template<bool> friend struct __sentinel;
+
+ private:
+ using _Parent = __maybe_const<_Const, join_view>;
+ using _Base = __maybe_const<_Const, _View>;
+ sentinel_t<_Base> __end_ = sentinel_t<_Base>();
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ __sentinel() = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit __sentinel(_Parent& __parent)
+ : __end_(ranges::end(__parent.__base_)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __sentinel(__sentinel<!_Const> __s)
+ requires _Const && convertible_to<sentinel_t<_View>, sentinel_t<_Base>>
+ : __end_(_VSTD::move(__s.__end_)) {}
+
+ template<bool _OtherConst>
+ requires sentinel_for<sentinel_t<_Base>, iterator_t<__maybe_const<_OtherConst, _View>>>
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(const __iterator<_OtherConst>& __x, const __sentinel& __y) {
+ return __x.__outer_ == __y.__end_;
+ }
+ };
+
+ template<input_range _View>
+ requires view<_View> && input_range<range_reference_t<_View>>
+ template<bool _Const> struct join_view<_View>::__iterator
+ : public __join_view_iterator_category<__maybe_const<_Const, _View>> {
+
+ template<bool> friend struct __iterator;
+
+ private:
+ using _Parent = __maybe_const<_Const, join_view>;
+ using _Base = __maybe_const<_Const, _View>;
+ using _Outer = iterator_t<_Base>;
+ using _Inner = iterator_t<range_reference_t<_Base>>;
+
+ static constexpr bool __ref_is_glvalue = is_reference_v<range_reference_t<_Base>>;
+
+ public:
+ _Outer __outer_ = _Outer();
+
+ private:
+ optional<_Inner> __inner_;
+ _Parent *__parent_ = nullptr;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void __satisfy() {
+ for (; __outer_ != ranges::end(__parent_->__base_); ++__outer_) {
+ auto&& __inner = [&]() -> auto&& {
+ if constexpr (__ref_is_glvalue)
+ return *__outer_;
+ else
+ return __parent_->__cache_.__emplace_from([&]() -> decltype(auto) { return *__outer_; });
+ }();
+ __inner_ = ranges::begin(__inner);
+ if (*__inner_ != ranges::end(__inner))
+ return;
+ }
+
+ if constexpr (__ref_is_glvalue)
+ __inner_.reset();
+ }
+
+ public:
+ using iterator_concept = _If<
+ __ref_is_glvalue && bidirectional_range<_Base> && bidirectional_range<range_reference_t<_Base>>,
+ bidirectional_iterator_tag,
+ _If<
+ __ref_is_glvalue && forward_range<_Base> && forward_range<range_reference_t<_Base>>,
+ forward_iterator_tag,
+ input_iterator_tag
+ >
+ >;
+
+ using value_type = range_value_t<range_reference_t<_Base>>;
+
+ using difference_type = common_type_t<
+ range_difference_t<_Base>, range_difference_t<range_reference_t<_Base>>>;
+
+ _LIBCPP_HIDE_FROM_ABI
+ __iterator() requires default_initializable<_Outer> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator(_Parent& __parent, _Outer __outer)
+ : __outer_(_VSTD::move(__outer))
+ , __parent_(_VSTD::addressof(__parent)) {
+ __satisfy();
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator(__iterator<!_Const> __i)
+ requires _Const &&
+ convertible_to<iterator_t<_View>, _Outer> &&
+ convertible_to<iterator_t<_InnerRange>, _Inner>
+ : __outer_(_VSTD::move(__i.__outer_))
+ , __inner_(_VSTD::move(__i.__inner_))
+ , __parent_(__i.__parent_) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr decltype(auto) operator*() const {
+ return **__inner_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _Inner operator->() const
+ requires __has_arrow<_Inner> && copyable<_Inner>
+ {
+ return *__inner_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator& operator++() {
+ auto&& __inner = [&]() -> auto&& {
+ if constexpr (__ref_is_glvalue)
+ return *__outer_;
+ else
+ return *__parent_->__cache_;
+ }();
+ if (++*__inner_ == ranges::end(__inner)) {
+ ++__outer_;
+ __satisfy();
+ }
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr void operator++(int) {
+ ++*this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator operator++(int)
+ requires __ref_is_glvalue &&
+ forward_range<_Base> &&
+ forward_range<range_reference_t<_Base>>
+ {
+ auto __tmp = *this;
+ ++*this;
+ return __tmp;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator& operator--()
+ requires __ref_is_glvalue &&
+ bidirectional_range<_Base> &&
+ bidirectional_range<range_reference_t<_Base>> &&
+ common_range<range_reference_t<_Base>>
+ {
+ if (__outer_ == ranges::end(__parent_->__base_))
+ __inner_ = ranges::end(*--__outer_);
+
+ // Skip empty inner ranges when going backwards.
+ while (*__inner_ == ranges::begin(*__outer_)) {
+ __inner_ = ranges::end(*--__outer_);
+ }
+
+ --*__inner_;
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __iterator operator--(int)
+ requires __ref_is_glvalue &&
+ bidirectional_range<_Base> &&
+ bidirectional_range<range_reference_t<_Base>> &&
+ common_range<range_reference_t<_Base>>
+ {
+ auto __tmp = *this;
+ --*this;
+ return __tmp;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(const __iterator& __x, const __iterator& __y)
+ requires __ref_is_glvalue &&
+ equality_comparable<iterator_t<_Base>> &&
+ equality_comparable<iterator_t<range_reference_t<_Base>>>
+ {
+ return __x.__outer_ == __y.__outer_ && __x.__inner_ == __y.__inner_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr decltype(auto) iter_move(const __iterator& __i)
+ noexcept(noexcept(ranges::iter_move(*__i.__inner_)))
+ {
+ return ranges::iter_move(*__i.__inner_);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr void iter_swap(const __iterator& __x, const __iterator& __y)
+ noexcept(noexcept(ranges::iter_swap(*__x.__inner_, *__y.__inner_)))
+ requires indirectly_swappable<_Inner>
+ {
+ return ranges::iter_swap(*__x.__inner_, *__y.__inner_);
+ }
+ };
+
+ template<class _Range>
+ explicit join_view(_Range&&) -> join_view<views::all_t<_Range>>;
+
+} // namespace ranges
+
+#undef _CONSTEXPR_TERNARY
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANGES_JOIN_VIEW_H
diff --git a/libcxx/include/__ranges/non_propagating_cache.h b/libcxx/include/__ranges/non_propagating_cache.h
index 878f7070a07f..456e08d8c971 100644
--- a/libcxx/include/__ranges/non_propagating_cache.h
+++ b/libcxx/include/__ranges/non_propagating_cache.h
@@ -13,6 +13,7 @@
#include <__iterator/concepts.h> // indirectly_readable
#include <__iterator/iterator_traits.h> // iter_reference_t
#include <__memory/addressof.h>
+#include <__utility/forward.h>
#include <concepts> // constructible_from
#include <optional>
#include <type_traits>
@@ -21,13 +22,8 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
-// clang-format off
-
#if !defined(_LIBCPP_HAS_NO_RANGES)
namespace ranges {
@@ -42,7 +38,20 @@ namespace ranges {
template<class _Tp>
requires is_object_v<_Tp>
class _LIBCPP_TEMPLATE_VIS __non_propagating_cache {
- optional<_Tp> __value_ = nullopt;
+ struct __from_tag { };
+ struct __forward_tag { };
+
+ // This helper class is needed to perform copy and move elision when
+ // constructing the contained type from an iterator.
+ struct __wrapper {
+ template<class ..._Args>
+ constexpr explicit __wrapper(__forward_tag, _Args&& ...__args) : __t_(_VSTD::forward<_Args>(__args)...) { }
+ template<class _Fn>
+ constexpr explicit __wrapper(__from_tag, _Fn const& __f) : __t_(__f()) { }
+ _Tp __t_;
+ };
+
+ optional<__wrapper> __value_ = nullopt;
public:
_LIBCPP_HIDE_FROM_ABI __non_propagating_cache() = default;
@@ -75,16 +84,24 @@ namespace ranges {
}
_LIBCPP_HIDE_FROM_ABI
- constexpr _Tp& operator*() { return *__value_; }
+ constexpr _Tp& operator*() { return __value_->__t_; }
_LIBCPP_HIDE_FROM_ABI
- constexpr _Tp const& operator*() const { return *__value_; }
+ constexpr _Tp const& operator*() const { return __value_->__t_; }
_LIBCPP_HIDE_FROM_ABI
constexpr bool __has_value() const { return __value_.has_value(); }
+
+ template<class _Fn>
_LIBCPP_HIDE_FROM_ABI
- constexpr void __set(_Tp const& __value) { __value_.emplace(__value); }
+ constexpr _Tp& __emplace_from(_Fn const& __f) {
+ return __value_.emplace(__from_tag{}, __f).__t_;
+ }
+
+ template<class ..._Args>
_LIBCPP_HIDE_FROM_ABI
- constexpr void __set(_Tp&& __value) { __value_.emplace(_VSTD::move(__value)); }
+ constexpr _Tp& __emplace(_Args&& ...__args) {
+ return __value_.emplace(__forward_tag{}, _VSTD::forward<_Args>(__args)...).__t_;
+ }
};
struct __empty_cache { };
@@ -94,6 +111,4 @@ namespace ranges {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_NON_PROPAGATING_CACHE_H
diff --git a/libcxx/include/__ranges/range_adaptor.h b/libcxx/include/__ranges/range_adaptor.h
new file mode 100644
index 000000000000..74aea3187e89
--- /dev/null
+++ b/libcxx/include/__ranges/range_adaptor.h
@@ -0,0 +1,73 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANGES_RANGE_ADAPTOR_H
+#define _LIBCPP___RANGES_RANGE_ADAPTOR_H
+
+#include <__config>
+#include <__functional/compose.h>
+#include <__functional/invoke.h>
+#include <__ranges/concepts.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
+#include <concepts>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+// CRTP base that one can derive from in order to be considered a range adaptor closure
+// by the library. When deriving from this class, a pipe operator will be provided to
+// make the following hold:
+// - `x | f` is equivalent to `f(x)`
+// - `f1 | f2` is an adaptor closure `g` such that `g(x)` is equivalent to `f2(f1(x))`
+template <class _Tp>
+struct __range_adaptor_closure;
+
+// Type that wraps an arbitrary function object and makes it into a range adaptor closure,
+// i.e. something that can be called via the `x | f` notation.
+template <class _Fn>
+struct __range_adaptor_closure_t : _Fn, __range_adaptor_closure<__range_adaptor_closure_t<_Fn>> {
+ constexpr explicit __range_adaptor_closure_t(_Fn&& __f) : _Fn(_VSTD::move(__f)) { }
+};
+
+template <class _Tp>
+concept _RangeAdaptorClosure = derived_from<remove_cvref_t<_Tp>, __range_adaptor_closure<remove_cvref_t<_Tp>>>;
+
+template <class _Tp>
+struct __range_adaptor_closure {
+ template <ranges::viewable_range _View, _RangeAdaptorClosure _Closure>
+ requires same_as<_Tp, remove_cvref_t<_Closure>> &&
+ invocable<_Closure, _View>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ friend constexpr decltype(auto) operator|(_View&& __view, _Closure&& __closure)
+ noexcept(is_nothrow_invocable_v<_Closure, _View>)
+ { return _VSTD::invoke(_VSTD::forward<_Closure>(__closure), _VSTD::forward<_View>(__view)); }
+
+ template <_RangeAdaptorClosure _Closure, _RangeAdaptorClosure _OtherClosure>
+ requires same_as<_Tp, remove_cvref_t<_Closure>> &&
+ constructible_from<decay_t<_Closure>, _Closure> &&
+ constructible_from<decay_t<_OtherClosure>, _OtherClosure>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ friend constexpr auto operator|(_Closure&& __c1, _OtherClosure&& __c2)
+ noexcept(is_nothrow_constructible_v<decay_t<_Closure>, _Closure> &&
+ is_nothrow_constructible_v<decay_t<_OtherClosure>, _OtherClosure>)
+ { return __range_adaptor_closure_t(_VSTD::__compose(_VSTD::forward<_OtherClosure>(__c2), _VSTD::forward<_Closure>(__c1))); }
+};
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANGES_RANGE_ADAPTOR_H
diff --git a/libcxx/include/__ranges/ref_view.h b/libcxx/include/__ranges/ref_view.h
index fb45a359863b..7567ac48f255 100644
--- a/libcxx/include/__ranges/ref_view.h
+++ b/libcxx/include/__ranges/ref_view.h
@@ -20,6 +20,7 @@
#include <__ranges/empty.h>
#include <__ranges/size.h>
#include <__ranges/view_interface.h>
+#include <__utility/forward.h>
#include <concepts>
#include <type_traits>
@@ -27,9 +28,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -82,6 +80,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_REF_VIEW_H
diff --git a/libcxx/include/__ranges/reverse_view.h b/libcxx/include/__ranges/reverse_view.h
new file mode 100644
index 000000000000..618be80457b6
--- /dev/null
+++ b/libcxx/include/__ranges/reverse_view.h
@@ -0,0 +1,190 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___RANGES_REVERSE_VIEW_H
+#define _LIBCPP___RANGES_REVERSE_VIEW_H
+
+#include <__concepts/constructible.h>
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__iterator/next.h>
+#include <__iterator/reverse_iterator.h>
+#include <__ranges/access.h>
+#include <__ranges/all.h>
+#include <__ranges/concepts.h>
+#include <__ranges/enable_borrowed_range.h>
+#include <__ranges/non_propagating_cache.h>
+#include <__ranges/range_adaptor.h>
+#include <__ranges/size.h>
+#include <__ranges/subrange.h>
+#include <__ranges/view_interface.h>
+#include <__utility/forward.h>
+#include <__utility/move.h>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+namespace ranges {
+ template<view _View>
+ requires bidirectional_range<_View>
+ class reverse_view : public view_interface<reverse_view<_View>> {
+ // We cache begin() whenever ranges::next is not guaranteed O(1) to provide an
+ // amortized O(1) begin() method.
+ static constexpr bool _UseCache = !random_access_range<_View> && !common_range<_View>;
+ using _Cache = _If<_UseCache, __non_propagating_cache<reverse_iterator<iterator_t<_View>>>, __empty_cache>;
+ [[no_unique_address]] _Cache __cached_begin_ = _Cache();
+ [[no_unique_address]] _View __base_ = _View();
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ reverse_view() requires default_initializable<_View> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit reverse_view(_View __view) : __base_(_VSTD::move(__view)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _View base() const& requires copy_constructible<_View> { return __base_; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _View base() && { return _VSTD::move(__base_); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr reverse_iterator<iterator_t<_View>> begin() {
+ if constexpr (_UseCache)
+ if (__cached_begin_.__has_value())
+ return *__cached_begin_;
+
+ auto __tmp = _VSTD::make_reverse_iterator(ranges::next(ranges::begin(__base_), ranges::end(__base_)));
+ if constexpr (_UseCache)
+ __cached_begin_.__emplace(__tmp);
+ return __tmp;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr reverse_iterator<iterator_t<_View>> begin() requires common_range<_View> {
+ return _VSTD::make_reverse_iterator(ranges::end(__base_));
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto begin() const requires common_range<const _View> {
+ return _VSTD::make_reverse_iterator(ranges::end(__base_));
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr reverse_iterator<iterator_t<_View>> end() {
+ return _VSTD::make_reverse_iterator(ranges::begin(__base_));
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto end() const requires common_range<const _View> {
+ return _VSTD::make_reverse_iterator(ranges::begin(__base_));
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto size() requires sized_range<_View> {
+ return ranges::size(__base_);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto size() const requires sized_range<const _View> {
+ return ranges::size(__base_);
+ }
+ };
+
+ template<class _Range>
+ reverse_view(_Range&&) -> reverse_view<views::all_t<_Range>>;
+
+ template<class _Tp>
+ inline constexpr bool enable_borrowed_range<reverse_view<_Tp>> = enable_borrowed_range<_Tp>;
+
+ namespace views {
+ namespace __reverse {
+ template<class _Tp>
+ constexpr bool __is_reverse_view = false;
+
+ template<class _Tp>
+ constexpr bool __is_reverse_view<reverse_view<_Tp>> = true;
+
+ template<class _Tp>
+ constexpr bool __is_sized_reverse_subrange = false;
+
+ template<class _Iter>
+ constexpr bool __is_sized_reverse_subrange<subrange<reverse_iterator<_Iter>, reverse_iterator<_Iter>, subrange_kind::sized>> = true;
+
+ template<class _Tp>
+ constexpr bool __is_unsized_reverse_subrange = false;
+
+ template<class _Iter, subrange_kind _Kind>
+ constexpr bool __is_unsized_reverse_subrange<subrange<reverse_iterator<_Iter>, reverse_iterator<_Iter>, _Kind>> = _Kind == subrange_kind::unsized;
+
+ template<class _Tp>
+ struct __unwrapped_reverse_subrange {
+ using type = void; // avoid SFINAE-ing out the overload below -- let the concept requirements do it for better diagnostics
+ };
+
+ template<class _Iter, subrange_kind _Kind>
+ struct __unwrapped_reverse_subrange<subrange<reverse_iterator<_Iter>, reverse_iterator<_Iter>, _Kind>> {
+ using type = subrange<_Iter, _Iter, _Kind>;
+ };
+
+ struct __fn : __range_adaptor_closure<__fn> {
+ template<class _Range>
+ requires __is_reverse_view<remove_cvref_t<_Range>>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range) const
+ noexcept(noexcept(_VSTD::forward<_Range>(__range).base()))
+ -> decltype( _VSTD::forward<_Range>(__range).base())
+ { return _VSTD::forward<_Range>(__range).base(); }
+
+ template<class _Range,
+ class _UnwrappedSubrange = typename __unwrapped_reverse_subrange<remove_cvref_t<_Range>>::type>
+ requires __is_sized_reverse_subrange<remove_cvref_t<_Range>>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range) const
+ noexcept(noexcept(_UnwrappedSubrange(__range.end().base(), __range.begin().base(), __range.size())))
+ -> decltype( _UnwrappedSubrange(__range.end().base(), __range.begin().base(), __range.size()))
+ { return _UnwrappedSubrange(__range.end().base(), __range.begin().base(), __range.size()); }
+
+ template<class _Range,
+ class _UnwrappedSubrange = typename __unwrapped_reverse_subrange<remove_cvref_t<_Range>>::type>
+ requires __is_unsized_reverse_subrange<remove_cvref_t<_Range>>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range) const
+ noexcept(noexcept(_UnwrappedSubrange(__range.end().base(), __range.begin().base())))
+ -> decltype( _UnwrappedSubrange(__range.end().base(), __range.begin().base()))
+ { return _UnwrappedSubrange(__range.end().base(), __range.begin().base()); }
+
+ template<class _Range>
+ requires (!__is_reverse_view<remove_cvref_t<_Range>> &&
+ !__is_sized_reverse_subrange<remove_cvref_t<_Range>> &&
+ !__is_unsized_reverse_subrange<remove_cvref_t<_Range>>)
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range) const
+ noexcept(noexcept(reverse_view{_VSTD::forward<_Range>(__range)}))
+ -> decltype( reverse_view{_VSTD::forward<_Range>(__range)})
+ { return reverse_view{_VSTD::forward<_Range>(__range)}; }
+ };
+ }
+
+ inline namespace __cpo {
+ inline constexpr auto reverse = __reverse::__fn{};
+ }
+ } // namespace views
+} // namespace ranges
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANGES_REVERSE_VIEW_H
diff --git a/libcxx/include/__ranges/single_view.h b/libcxx/include/__ranges/single_view.h
new file mode 100644
index 000000000000..412fa9b64643
--- /dev/null
+++ b/libcxx/include/__ranges/single_view.h
@@ -0,0 +1,81 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___RANGES_SINGLE_VIEW_H
+#define _LIBCPP___RANGES_SINGLE_VIEW_H
+
+#include <__config>
+#include <__ranges/view_interface.h>
+#include <__ranges/copyable_box.h>
+#include <__utility/forward.h>
+#include <__utility/in_place.h>
+#include <__utility/move.h>
+#include <concepts>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+namespace ranges {
+ template<copy_constructible _Tp>
+ requires is_object_v<_Tp>
+ class single_view : public view_interface<single_view<_Tp>> {
+ __copyable_box<_Tp> __value_;
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ single_view() requires default_initializable<_Tp> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit single_view(const _Tp& __t) : __value_(in_place, __t) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit single_view(_Tp&& __t) : __value_(in_place, _VSTD::move(__t)) {}
+
+ template<class... _Args>
+ requires constructible_from<_Tp, _Args...>
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit single_view(in_place_t, _Args&&... __args)
+ : __value_{in_place, _VSTD::forward<_Args>(__args)...} {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _Tp* begin() noexcept { return data(); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr const _Tp* begin() const noexcept { return data(); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _Tp* end() noexcept { return data() + 1; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr const _Tp* end() const noexcept { return data() + 1; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ static constexpr size_t size() noexcept { return 1; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _Tp* data() noexcept { return __value_.operator->(); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr const _Tp* data() const noexcept { return __value_.operator->(); }
+ };
+
+ template<class _Tp>
+ single_view(_Tp) -> single_view<_Tp>;
+} // namespace ranges
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANGES_SINGLE_VIEW_H
diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h
index ce7183e15447..af0a8479f2ec 100644
--- a/libcxx/include/__ranges/size.h
+++ b/libcxx/include/__ranges/size.h
@@ -13,7 +13,7 @@
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__ranges/access.h>
-#include <__utility/__decay_copy.h>
+#include <__utility/decay_copy.h>
#include <__utility/forward.h>
#include <concepts>
#include <type_traits>
@@ -22,9 +22,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -117,7 +114,7 @@ namespace __ssize {
}
inline namespace __cpo {
- inline constexpr const auto ssize = __ssize::__fn{};
+ inline constexpr auto ssize = __ssize::__fn{};
} // namespace __cpo
} // namespace ranges
@@ -127,6 +124,4 @@ inline namespace __cpo {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_SIZE_H
diff --git a/libcxx/include/__ranges/subrange.h b/libcxx/include/__ranges/subrange.h
index 25d333db02de..af4e27600625 100644
--- a/libcxx/include/__ranges/subrange.h
+++ b/libcxx/include/__ranges/subrange.h
@@ -9,32 +9,35 @@
#ifndef _LIBCPP___RANGES_SUBRANGE_H
#define _LIBCPP___RANGES_SUBRANGE_H
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/copyable.h>
+#include <__concepts/derived_from.h>
+#include <__concepts/different_from.h>
#include <__config>
+#include <__debug>
+#include <__iterator/advance.h>
#include <__iterator/concepts.h>
#include <__iterator/incrementable_traits.h>
#include <__iterator/iterator_traits.h>
-#include <__iterator/advance.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
#include <__ranges/enable_borrowed_range.h>
#include <__ranges/size.h>
#include <__ranges/view_interface.h>
-#include <concepts>
+#include <__tuple>
+#include <__utility/move.h>
#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
-// clang-format off
namespace ranges {
template<class _From, class _To>
concept __convertible_to_non_slicing =
@@ -64,59 +67,43 @@ namespace ranges {
enum class _LIBCPP_ENUM_VIS subrange_kind : bool { unsized, sized };
- template<class _Iter, class _Sent, bool>
- struct __subrange_base {
- static constexpr bool __store_size = false;
- _Iter __begin_ = _Iter();
- _Sent __end_ = _Sent();
-
- _LIBCPP_HIDE_FROM_ABI
- constexpr __subrange_base() = default;
-
- _LIBCPP_HIDE_FROM_ABI
- constexpr __subrange_base(_Iter __iter, _Sent __sent, make_unsigned_t<iter_difference_t<_Iter>> = 0)
- : __begin_(_VSTD::move(__iter)), __end_(__sent) { }
- };
-
- template<class _Iter, class _Sent>
- struct __subrange_base<_Iter, _Sent, true> {
- static constexpr bool __store_size = true;
- _Iter __begin_ = _Iter();
- _Sent __end_ = _Sent();
- make_unsigned_t<iter_difference_t<_Iter>> __size_ = 0;
-
- _LIBCPP_HIDE_FROM_ABI
- constexpr __subrange_base() = default;
-
- _LIBCPP_HIDE_FROM_ABI
- constexpr __subrange_base(_Iter __iter, _Sent __sent, decltype(__size_) __size)
- : __begin_(_VSTD::move(__iter)), __end_(__sent), __size_(__size) { }
- };
-
template<input_or_output_iterator _Iter, sentinel_for<_Iter> _Sent = _Iter,
subrange_kind _Kind = sized_sentinel_for<_Sent, _Iter>
? subrange_kind::sized
: subrange_kind::unsized>
requires (_Kind == subrange_kind::sized || !sized_sentinel_for<_Sent, _Iter>)
- struct _LIBCPP_TEMPLATE_VIS subrange
- : public view_interface<subrange<_Iter, _Sent, _Kind>>,
- private __subrange_base<_Iter, _Sent, _Kind == subrange_kind::sized && !sized_sentinel_for<_Sent, _Iter>> {
-
- using _Base = __subrange_base<_Iter, _Sent, _Kind == subrange_kind::sized && !sized_sentinel_for<_Sent, _Iter>>;
-
+ class _LIBCPP_TEMPLATE_VIS subrange
+ : public view_interface<subrange<_Iter, _Sent, _Kind>>
+ {
+ private:
+ static constexpr bool _StoreSize = (_Kind == subrange_kind::sized && !sized_sentinel_for<_Sent, _Iter>);
+ static constexpr bool _MustProvideSizeAtConstruction = !_StoreSize; // just to improve compiler diagnostics
+ struct _Empty { constexpr _Empty(auto) noexcept { } };
+ using _Size = conditional_t<_StoreSize, make_unsigned_t<iter_difference_t<_Iter>>, _Empty>;
+ [[no_unique_address]] _Iter __begin_ = _Iter();
+ [[no_unique_address]] _Sent __end_ = _Sent();
+ [[no_unique_address]] _Size __size_ = 0;
+
+ public:
_LIBCPP_HIDE_FROM_ABI
subrange() requires default_initializable<_Iter> = default;
_LIBCPP_HIDE_FROM_ABI
constexpr subrange(__convertible_to_non_slicing<_Iter> auto __iter, _Sent __sent)
- requires (!_Base::__store_size)
- : _Base(_VSTD::move(__iter), __sent) {}
+ requires _MustProvideSizeAtConstruction
+ : __begin_(_VSTD::move(__iter)), __end_(std::move(__sent))
+ { }
_LIBCPP_HIDE_FROM_ABI
constexpr subrange(__convertible_to_non_slicing<_Iter> auto __iter, _Sent __sent,
make_unsigned_t<iter_difference_t<_Iter>> __n)
requires (_Kind == subrange_kind::sized)
- : _Base(_VSTD::move(__iter), __sent, __n) { }
+ : __begin_(_VSTD::move(__iter)), __end_(std::move(__sent)), __size_(__n)
+ {
+ if constexpr (sized_sentinel_for<_Sent, _Iter>)
+ _LIBCPP_ASSERT((__end_ - __begin_) == static_cast<iter_difference_t<_Iter>>(__n),
+ "std::ranges::subrange was passed an invalid size hint");
+ }
template<__different_from<subrange> _Range>
requires borrowed_range<_Range> &&
@@ -124,8 +111,9 @@ namespace ranges {
convertible_to<sentinel_t<_Range>, _Sent>
_LIBCPP_HIDE_FROM_ABI
constexpr subrange(_Range&& __range)
- requires (!_Base::__store_size)
- : subrange(ranges::begin(__range), ranges::end(__range)) { }
+ requires (!_StoreSize)
+ : subrange(ranges::begin(__range), ranges::end(__range))
+ { }
template<__different_from<subrange> _Range>
requires borrowed_range<_Range> &&
@@ -133,9 +121,9 @@ namespace ranges {
convertible_to<sentinel_t<_Range>, _Sent>
_LIBCPP_HIDE_FROM_ABI
constexpr subrange(_Range&& __range)
- requires _Base::__store_size && sized_range<_Range>
- : subrange(__range, ranges::size(__range)) { }
-
+ requires _StoreSize && sized_range<_Range>
+ : subrange(__range, ranges::size(__range))
+ { }
template<borrowed_range _Range>
requires __convertible_to_non_slicing<iterator_t<_Range>, _Iter> &&
@@ -143,39 +131,47 @@ namespace ranges {
_LIBCPP_HIDE_FROM_ABI
constexpr subrange(_Range&& __range, make_unsigned_t<iter_difference_t<_Iter>> __n)
requires (_Kind == subrange_kind::sized)
- : subrange(ranges::begin(__range), ranges::end(__range), __n) { }
+ : subrange(ranges::begin(__range), ranges::end(__range), __n)
+ { }
template<__different_from<subrange> _Pair>
requires __pair_like_convertible_from<_Pair, const _Iter&, const _Sent&>
_LIBCPP_HIDE_FROM_ABI
- constexpr operator _Pair() const { return _Pair(this->__begin_, this->__end_); }
+ constexpr operator _Pair() const {
+ return _Pair(__begin_, __end_);
+ }
_LIBCPP_HIDE_FROM_ABI
constexpr _Iter begin() const requires copyable<_Iter> {
- return this->__begin_;
+ return __begin_;
}
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Iter begin() requires (!copyable<_Iter>) {
- return _VSTD::move(this->__begin_);
+ return _VSTD::move(__begin_);
}
_LIBCPP_HIDE_FROM_ABI
- constexpr _Sent end() const { return this->__end_; }
+ constexpr _Sent end() const {
+ return __end_;
+ }
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const { return this->__begin_ == this->__end_; }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool empty() const {
+ return __begin_ == __end_;
+ }
_LIBCPP_HIDE_FROM_ABI
constexpr make_unsigned_t<iter_difference_t<_Iter>> size() const
requires (_Kind == subrange_kind::sized)
{
- if constexpr (_Base::__store_size)
- return this->__size_;
+ if constexpr (_StoreSize)
+ return __size_;
else
- return __to_unsigned_like(this->__end_ - this->__begin_);
+ return _VSTD::__to_unsigned_like(__end_ - __begin_);
}
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr subrange next(iter_difference_t<_Iter> __n = 1) const&
- requires forward_iterator<_Iter> {
+ requires forward_iterator<_Iter>
+ {
auto __tmp = *this;
__tmp.advance(__n);
return __tmp;
@@ -187,7 +183,8 @@ namespace ranges {
}
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr subrange prev(iter_difference_t<_Iter> __n = 1) const
- requires bidirectional_iterator<_Iter> {
+ requires bidirectional_iterator<_Iter>
+ {
auto __tmp = *this;
__tmp.advance(-__n);
return __tmp;
@@ -197,16 +194,16 @@ namespace ranges {
constexpr subrange& advance(iter_difference_t<_Iter> __n) {
if constexpr (bidirectional_iterator<_Iter>) {
if (__n < 0) {
- ranges::advance(this->__begin_, __n);
- if constexpr (_Base::__store_size)
- this->__size_ += _VSTD::__to_unsigned_like(-__n);
+ ranges::advance(__begin_, __n);
+ if constexpr (_StoreSize)
+ __size_ += _VSTD::__to_unsigned_like(-__n);
return *this;
}
}
- auto __d = __n - ranges::advance(this->__begin_, __n, this->__end_);
- if constexpr (_Base::__store_size)
- this->__size_ -= _VSTD::__to_unsigned_like(__d);
+ auto __d = __n - ranges::advance(__begin_, __n, __end_);
+ if constexpr (_StoreSize)
+ __size_ -= _VSTD::__to_unsigned_like(__d);
return *this;
}
};
@@ -251,17 +248,40 @@ namespace ranges {
inline constexpr bool enable_borrowed_range<subrange<_Ip, _Sp, _Kp>> = true;
template<range _Rp>
- using borrowed_subrange_t = _If<borrowed_range<_Rp>, subrange<iterator_t<_Rp> >, dangling>;
+ using borrowed_subrange_t = _If<borrowed_range<_Rp>, subrange<iterator_t<_Rp>>, dangling>;
} // namespace ranges
+// [range.subrange.general]
+
using ranges::get;
-// clang-format off
+// [ranges.syn]
+
+template<class _Ip, class _Sp, ranges::subrange_kind _Kp>
+struct tuple_size<ranges::subrange<_Ip, _Sp, _Kp>> : integral_constant<size_t, 2> {};
+
+template<class _Ip, class _Sp, ranges::subrange_kind _Kp>
+struct tuple_element<0, ranges::subrange<_Ip, _Sp, _Kp>> {
+ using type = _Ip;
+};
+
+template<class _Ip, class _Sp, ranges::subrange_kind _Kp>
+struct tuple_element<1, ranges::subrange<_Ip, _Sp, _Kp>> {
+ using type = _Sp;
+};
+
+template<class _Ip, class _Sp, ranges::subrange_kind _Kp>
+struct tuple_element<0, const ranges::subrange<_Ip, _Sp, _Kp>> {
+ using type = _Ip;
+};
+
+template<class _Ip, class _Sp, ranges::subrange_kind _Kp>
+struct tuple_element<1, const ranges::subrange<_Ip, _Sp, _Kp>> {
+ using type = _Sp;
+};
#endif // !defined(_LIBCPP_HAS_NO_RANGES)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_SUBRANGE_H
diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h
new file mode 100644
index 000000000000..73a57fe96c08
--- /dev/null
+++ b/libcxx/include/__ranges/take_view.h
@@ -0,0 +1,185 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___RANGES_TAKE_VIEW_H
+#define _LIBCPP___RANGES_TAKE_VIEW_H
+
+#include <__algorithm/min.h>
+#include <__config>
+#include <__iterator/concepts.h>
+#include <__iterator/counted_iterator.h>
+#include <__iterator/default_sentinel.h>
+#include <__iterator/iterator_traits.h>
+#include <__ranges/access.h>
+#include <__ranges/all.h>
+#include <__ranges/concepts.h>
+#include <__ranges/enable_borrowed_range.h>
+#include <__ranges/size.h>
+#include <__ranges/view_interface.h>
+#include <__utility/move.h>
+#include <concepts>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+
+namespace ranges {
+ template<view _View>
+ class take_view : public view_interface<take_view<_View>> {
+ [[no_unique_address]] _View __base_ = _View();
+ range_difference_t<_View> __count_ = 0;
+
+ template<bool> class __sentinel;
+
+ public:
+ _LIBCPP_HIDE_FROM_ABI
+ take_view() requires default_initializable<_View> = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr take_view(_View __base, range_difference_t<_View> __count)
+ : __base_(_VSTD::move(__base)), __count_(__count) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _View base() const& requires copy_constructible<_View> { return __base_; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr _View base() && { return _VSTD::move(__base_); }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto begin() requires (!__simple_view<_View>) {
+ if constexpr (sized_range<_View>) {
+ if constexpr (random_access_range<_View>) {
+ return ranges::begin(__base_);
+ } else {
+ using _DifferenceT = range_difference_t<_View>;
+ auto __size = size();
+ return counted_iterator(ranges::begin(__base_), static_cast<_DifferenceT>(__size));
+ }
+ } else {
+ return counted_iterator(ranges::begin(__base_), __count_);
+ }
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto begin() const requires range<const _View> {
+ if constexpr (sized_range<const _View>) {
+ if constexpr (random_access_range<const _View>) {
+ return ranges::begin(__base_);
+ } else {
+ using _DifferenceT = range_difference_t<const _View>;
+ auto __size = size();
+ return counted_iterator(ranges::begin(__base_), static_cast<_DifferenceT>(__size));
+ }
+ } else {
+ return counted_iterator(ranges::begin(__base_), __count_);
+ }
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto end() requires (!__simple_view<_View>) {
+ if constexpr (sized_range<_View>) {
+ if constexpr (random_access_range<_View>) {
+ return ranges::begin(__base_) + size();
+ } else {
+ return default_sentinel;
+ }
+ } else {
+ return __sentinel<false>{ranges::end(__base_)};
+ }
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto end() const requires range<const _View> {
+ if constexpr (sized_range<const _View>) {
+ if constexpr (random_access_range<const _View>) {
+ return ranges::begin(__base_) + size();
+ } else {
+ return default_sentinel;
+ }
+ } else {
+ return __sentinel<true>{ranges::end(__base_)};
+ }
+ }
+
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto size() requires sized_range<_View> {
+ auto __n = ranges::size(__base_);
+ // TODO: use ranges::min here.
+ return _VSTD::min(__n, static_cast<decltype(__n)>(__count_));
+ }
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr auto size() const requires sized_range<const _View> {
+ auto __n = ranges::size(__base_);
+ // TODO: use ranges::min here.
+ return _VSTD::min(__n, static_cast<decltype(__n)>(__count_));
+ }
+ };
+
+ template<view _View>
+ template<bool _Const>
+ class take_view<_View>::__sentinel {
+ using _Base = __maybe_const<_Const, _View>;
+ template<bool _OtherConst>
+ using _Iter = counted_iterator<iterator_t<__maybe_const<_OtherConst, _View>>>;
+ [[no_unique_address]] sentinel_t<_Base> __end_ = sentinel_t<_Base>();
+
+ template<bool>
+ friend class take_view<_View>::__sentinel;
+
+public:
+ _LIBCPP_HIDE_FROM_ABI
+ __sentinel() = default;
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr explicit __sentinel(sentinel_t<_Base> __end) : __end_(_VSTD::move(__end)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr __sentinel(__sentinel<!_Const> __s)
+ requires _Const && convertible_to<sentinel_t<_View>, sentinel_t<_Base>>
+ : __end_(_VSTD::move(__s.__end_)) {}
+
+ _LIBCPP_HIDE_FROM_ABI
+ constexpr sentinel_t<_Base> base() const { return __end_; }
+
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(const _Iter<_Const>& __lhs, const __sentinel& __rhs) {
+ return __lhs.count() == 0 || __lhs.base() == __rhs.__end_;
+ }
+
+ template<bool _OtherConst = !_Const>
+ requires sentinel_for<sentinel_t<_Base>, iterator_t<__maybe_const<_OtherConst, _View>>>
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr bool operator==(const _Iter<_Const>& __lhs, const __sentinel& __rhs) {
+ return __lhs.count() == 0 || __lhs.base() == __rhs.__end_;
+ }
+ };
+
+ template<class _Range>
+ take_view(_Range&&, range_difference_t<_Range>) -> take_view<views::all_t<_Range>>;
+
+ template<class _Tp>
+ inline constexpr bool enable_borrowed_range<take_view<_Tp>> = enable_borrowed_range<_Tp>;
+} // namespace ranges
+
+#endif // !defined(_LIBCPP_HAS_NO_RANGES)
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANGES_TAKE_VIEW_H
diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h
index 4243dc0366e9..208a9a22694a 100644
--- a/libcxx/include/__ranges/transform_view.h
+++ b/libcxx/include/__ranges/transform_view.h
@@ -9,27 +9,37 @@
#ifndef _LIBCPP___RANGES_TRANSFORM_VIEW_H
#define _LIBCPP___RANGES_TRANSFORM_VIEW_H
+#include <__compare/three_way_comparable.h>
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/copyable.h>
+#include <__concepts/derived_from.h>
+#include <__concepts/equality_comparable.h>
+#include <__concepts/invocable.h>
#include <__config>
+#include <__functional/bind_back.h>
+#include <__functional/invoke.h>
#include <__iterator/concepts.h>
#include <__iterator/iter_swap.h>
#include <__iterator/iterator_traits.h>
+#include <__memory/addressof.h>
#include <__ranges/access.h>
#include <__ranges/all.h>
#include <__ranges/concepts.h>
#include <__ranges/copyable_box.h>
#include <__ranges/empty.h>
+#include <__ranges/range_adaptor.h>
#include <__ranges/size.h>
#include <__ranges/view_interface.h>
-#include <concepts>
+#include <__utility/forward.h>
+#include <__utility/in_place.h>
+#include <__utility/move.h>
#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -293,13 +303,12 @@ public:
return __x.__current_ >= __y.__current_;
}
-// TODO: Fix this as soon as soon as three_way_comparable is implemented.
-// _LIBCPP_HIDE_FROM_ABI
-// friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y)
-// requires random_access_range<_Base> && three_way_comparable<iterator_t<_Base>>
-// {
-// return __x.__current_ <=> __y.__current_;
-// }
+ _LIBCPP_HIDE_FROM_ABI
+ friend constexpr auto operator<=>(const __iterator& __x, const __iterator& __y)
+ requires random_access_range<_Base> && three_way_comparable<iterator_t<_Base>>
+ {
+ return __x.__current_ <=> __y.__current_;
+ }
_LIBCPP_HIDE_FROM_ABI
friend constexpr __iterator operator+(__iterator __i, difference_type __n)
@@ -397,12 +406,34 @@ public:
}
};
+namespace views {
+namespace __transform {
+ struct __fn {
+ template<class _Range, class _Fn>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Range&& __range, _Fn&& __f) const
+ noexcept(noexcept(transform_view(_VSTD::forward<_Range>(__range), _VSTD::forward<_Fn>(__f))))
+ -> decltype( transform_view(_VSTD::forward<_Range>(__range), _VSTD::forward<_Fn>(__f)))
+ { return transform_view(_VSTD::forward<_Range>(__range), _VSTD::forward<_Fn>(__f)); }
+
+ template<class _Fn>
+ requires constructible_from<decay_t<_Fn>, _Fn>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI
+ constexpr auto operator()(_Fn&& __f) const
+ noexcept(is_nothrow_constructible_v<decay_t<_Fn>, _Fn>)
+ { return __range_adaptor_closure_t(_VSTD::__bind_back(*this, _VSTD::forward<_Fn>(__f))); }
+ };
+}
+
+inline namespace __cpo {
+ inline constexpr auto transform = __transform::__fn{};
+}
+} // namespace views
+
} // namespace ranges
#endif // !defined(_LIBCPP_HAS_NO_RANGES)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_TRANSFORM_VIEW_H
diff --git a/libcxx/include/__ranges/view_interface.h b/libcxx/include/__ranges/view_interface.h
index 62cc5fd24362..8a1f5d8c9251 100644
--- a/libcxx/include/__ranges/view_interface.h
+++ b/libcxx/include/__ranges/view_interface.h
@@ -10,6 +10,7 @@
#define _LIBCPP___RANGES_VIEW_INTERFACE_H
#include <__config>
+#include <__debug>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/prev.h>
@@ -25,9 +26,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if !defined(_LIBCPP_HAS_NO_RANGES)
@@ -45,11 +43,13 @@ template<class _Derived>
class view_interface : public view_base {
_LIBCPP_HIDE_FROM_ABI
constexpr _Derived& __derived() noexcept {
+ static_assert(sizeof(_Derived) && derived_from<_Derived, view_interface> && view<_Derived>);
return static_cast<_Derived&>(*this);
}
_LIBCPP_HIDE_FROM_ABI
constexpr _Derived const& __derived() const noexcept {
+ static_assert(sizeof(_Derived) && derived_from<_Derived, view_interface> && view<_Derived>);
return static_cast<_Derived const&>(*this);
}
@@ -193,6 +193,4 @@ public:
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___RANGES_VIEW_INTERFACE_H
diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer
index 901c0374aa1c..2642cf6fca49 100644
--- a/libcxx/include/__split_buffer
+++ b/libcxx/include/__split_buffer
@@ -17,17 +17,8 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-template <bool>
-class __split_buffer_common
-{
-protected:
- _LIBCPP_NORETURN void __throw_length_error() const;
- _LIBCPP_NORETURN void __throw_out_of_range() const;
-};
-
template <class _Tp, class _Allocator = allocator<_Tp> >
struct __split_buffer
- : private __split_buffer_common<true>
{
private:
__split_buffer(const __split_buffer&);
@@ -248,7 +239,7 @@ __split_buffer<_Tp, _Allocator>::__construct_at_end(_InputIter __first, _InputIt
size_type __old_cap = __end_cap() - __first_;
size_type __new_cap = _VSTD::max<size_type>(2 * __old_cap, 8);
__split_buffer __buf(__new_cap, 0, __a);
- for (pointer __p = __begin_; __p != __end_; ++__p, ++__buf.__end_)
+ for (pointer __p = __begin_; __p != __end_; ++__p, (void) ++__buf.__end_)
__alloc_traits::construct(__buf.__alloc(),
_VSTD::__to_address(__buf.__end_), _VSTD::move(*__p));
swap(__buf);
@@ -268,7 +259,7 @@ typename enable_if
__split_buffer<_Tp, _Allocator>::__construct_at_end(_ForwardIterator __first, _ForwardIterator __last)
{
_ConstructTransaction __tx(&this->__end_, _VSTD::distance(__first, __last));
- for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, ++__first) {
+ for (; __tx.__pos_ != __tx.__end_; ++__tx.__pos_, (void) ++__first) {
__alloc_traits::construct(this->__alloc(),
_VSTD::__to_address(__tx.__pos_), *__first);
}
diff --git a/libcxx/include/__string b/libcxx/include/__string
index b77a7fb4f8d3..890fb21dd3f1 100644
--- a/libcxx/include/__string
+++ b/libcxx/include/__string
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- __string ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -23,8 +23,12 @@
#include <cstdio> // for EOF
#include <cstdint> // for uint_least16_t
#include <cstring> // for memcpy
-#include <cwchar> // for wmemcpy
#include <type_traits> // for __libcpp_is_constant_evaluated
+#include <iosfwd> // for streampos & friends
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+# include <cwchar> // for wmemcpy
+#endif
#include <__debug>
@@ -265,7 +269,9 @@ inline _LIBCPP_CONSTEXPR_AFTER_CXX17
_CharT*
char_traits<_CharT>::copy(char_type* __s1, const char_type* __s2, size_t __n)
{
- _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ if (!__libcpp_is_constant_evaluated()) {
+ _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ }
char_type* __r = __s1;
for (; __n; --__n, ++__s1, ++__s2)
assign(*__s1, *__s2);
@@ -348,7 +354,9 @@ struct _LIBCPP_TEMPLATE_VIS char_traits<char>
static inline _LIBCPP_CONSTEXPR_AFTER_CXX17
char_type* copy(char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT
{
- _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ if (!__libcpp_is_constant_evaluated()) {
+ _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ }
return __libcpp_is_constant_evaluated()
? _VSTD::__copy_constexpr(__s1, __s2, __n)
: __n == 0 ? __s1 : (char_type*)_VSTD::memcpy(__s1, __s2, __n);
@@ -419,6 +427,7 @@ char_traits<char>::find(const char_type* __s, size_t __n, const char_type& __a)
// char_traits<wchar_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
struct _LIBCPP_TEMPLATE_VIS char_traits<wchar_t>
{
@@ -451,7 +460,9 @@ struct _LIBCPP_TEMPLATE_VIS char_traits<wchar_t>
static inline _LIBCPP_CONSTEXPR_AFTER_CXX17
char_type* copy(char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT
{
- _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ if (!__libcpp_is_constant_evaluated()) {
+ _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ }
return __libcpp_is_constant_evaluated()
? _VSTD::__copy_constexpr(__s1, __s2, __n)
: __n == 0 ? __s1 : _VSTD::wmemcpy(__s1, __s2, __n);
@@ -497,18 +508,6 @@ char_traits<wchar_t>::compare(const char_type* __s1, const char_type* __s2, size
#endif
}
-
-template <class _Traits>
-_LIBCPP_INLINE_VISIBILITY
-_LIBCPP_CONSTEXPR
-inline size_t __char_traits_length_checked(const typename _Traits::char_type* __s) _NOEXCEPT {
-#if _LIBCPP_DEBUG_LEVEL >= 1
- return __s ? _Traits::length(__s) : (_VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, "p == nullptr", "null pointer pass to non-null argument of char_traits<...>::length")), 0);
-#else
- return _Traits::length(__s);
-#endif
-}
-
inline _LIBCPP_CONSTEXPR_AFTER_CXX14
size_t
char_traits<wchar_t>::length(const char_type* __s) _NOEXCEPT
@@ -545,7 +544,18 @@ char_traits<wchar_t>::find(const char_type* __s, size_t __n, const char_type& __
return nullptr;
#endif
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <class _Traits>
+_LIBCPP_INLINE_VISIBILITY
+_LIBCPP_CONSTEXPR
+inline size_t __char_traits_length_checked(const typename _Traits::char_type* __s) _NOEXCEPT {
+#if _LIBCPP_DEBUG_LEVEL >= 1
+ return __s ? _Traits::length(__s) : (_VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, "p == nullptr", "null pointer pass to non-null argument of char_traits<...>::length")), 0);
+#else
+ return _Traits::length(__s);
+#endif
+}
#ifndef _LIBCPP_HAS_NO_CHAR8_T
@@ -584,8 +594,10 @@ struct _LIBCPP_TEMPLATE_VIS char_traits<char8_t>
static _LIBCPP_CONSTEXPR_AFTER_CXX17
char_type* copy(char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT
- {
- _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ {
+ if (!__libcpp_is_constant_evaluated()) {
+ _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ }
return __libcpp_is_constant_evaluated()
? _VSTD::__copy_constexpr(__s1, __s2, __n)
: __n == 0 ? __s1 : (char_type*)_VSTD::memcpy(__s1, __s2, __n);
@@ -761,7 +773,9 @@ inline _LIBCPP_CONSTEXPR_AFTER_CXX17
char16_t*
char_traits<char16_t>::copy(char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT
{
- _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ if (!__libcpp_is_constant_evaluated()) {
+ _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ }
char_type* __r = __s1;
for (; __n; --__n, ++__s1, ++__s2)
assign(*__s1, *__s2);
@@ -881,7 +895,9 @@ inline _LIBCPP_CONSTEXPR_AFTER_CXX17
char32_t*
char_traits<char32_t>::copy(char_type* __s1, const char_type* __s2, size_t __n) _NOEXCEPT
{
- _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ if (!__libcpp_is_constant_evaluated()) {
+ _LIBCPP_ASSERT(__s2 < __s1 || __s2 >= __s1+__n, "char_traits::copy overlapped range");
+ }
char_type* __r = __s1;
for (; __n; --__n, ++__s1, ++__s2)
assign(*__s1, *__s2);
diff --git a/libcxx/include/__thread/poll_with_backoff.h b/libcxx/include/__thread/poll_with_backoff.h
new file mode 100644
index 000000000000..e1d8a9c90c56
--- /dev/null
+++ b/libcxx/include/__thread/poll_with_backoff.h
@@ -0,0 +1,68 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___THREAD_POLL_WITH_BACKOFF_H
+#define _LIBCPP___THREAD_POLL_WITH_BACKOFF_H
+
+#include <__config>
+#include <chrono>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+static _LIBCPP_CONSTEXPR const int __libcpp_polling_count = 64;
+
+// Polls a thread for a condition given by a predicate, and backs off based on a backoff policy
+// before polling again.
+//
+// - __f is the "test function" that should return true if polling succeeded, and false if it failed.
+//
+// - __bf is the "backoff policy", which is called with the duration since we started polling. It should
+// return false in order to resume polling, and true if polling should stop entirely for some reason.
+// In general, backoff policies sleep for some time before returning control to the polling loop.
+//
+// - __max_elapsed is the maximum duration to try polling for. If the maximum duration is exceeded,
+// the polling loop will return false to report a timeout.
+template<class _Fn, class _BFn>
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
+bool __libcpp_thread_poll_with_backoff(_Fn&& __f, _BFn&& __bf, chrono::nanoseconds __max_elapsed = chrono::nanoseconds::zero()) {
+ auto const __start = chrono::high_resolution_clock::now();
+ for (int __count = 0;;) {
+ if (__f())
+ return true; // _Fn completion means success
+ if (__count < __libcpp_polling_count) {
+ __count += 1;
+ continue;
+ }
+ chrono::nanoseconds const __elapsed = chrono::high_resolution_clock::now() - __start;
+ if (__max_elapsed != chrono::nanoseconds::zero() && __max_elapsed < __elapsed)
+ return false; // timeout failure
+ if (__bf(__elapsed))
+ return false; // _BFn completion means failure
+ }
+}
+
+// A trivial backoff policy that always immediately returns the control to
+// the polling loop.
+//
+// This is not very well-behaved since it will cause the polling loop to spin,
+// so this should most likely only be used on single-threaded systems where there
+// are no other threads to compete with.
+struct __spinning_backoff_policy {
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
+ bool operator()(chrono::nanoseconds const&) const {
+ return false;
+ }
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___THREAD_POLL_WITH_BACKOFF_H
diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support
index 4d867167c2b1..0094fca2fb38 100644
--- a/libcxx/include/__threading_support
+++ b/libcxx/include/__threading_support
@@ -12,6 +12,7 @@
#include <__availability>
#include <__config>
+#include <__thread/poll_with_backoff.h>
#include <chrono>
#include <errno.h>
#include <iosfwd>
@@ -29,16 +30,9 @@
# include <__external_threading>
#elif !defined(_LIBCPP_HAS_NO_THREADS)
-#if defined(__APPLE__) || defined(__MVS__)
-# define _LIBCPP_NO_NATIVE_SEMAPHORES
-#endif
-
#if defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
# include <pthread.h>
# include <sched.h>
-# ifndef _LIBCPP_NO_NATIVE_SEMAPHORES
-# include <semaphore.h>
-# endif
#elif defined(_LIBCPP_HAS_THREAD_API_C11)
# include <threads.h>
#endif
@@ -78,12 +72,6 @@ typedef pthread_mutex_t __libcpp_recursive_mutex_t;
typedef pthread_cond_t __libcpp_condvar_t;
#define _LIBCPP_CONDVAR_INITIALIZER PTHREAD_COND_INITIALIZER
-#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES
-// Semaphore
-typedef sem_t __libcpp_semaphore_t;
-# define _LIBCPP_SEMAPHORE_MAX SEM_VALUE_MAX
-#endif
-
// Execute once
typedef pthread_once_t __libcpp_exec_once_flag;
#define _LIBCPP_EXEC_ONCE_INITIALIZER PTHREAD_ONCE_INIT
@@ -149,12 +137,6 @@ typedef void* __libcpp_recursive_mutex_t[5];
typedef void* __libcpp_condvar_t;
#define _LIBCPP_CONDVAR_INITIALIZER 0
-// Semaphore
-typedef void* __libcpp_semaphore_t;
-#if defined(_LIBCPP_HAS_THREAD_API_WIN32)
-# define _LIBCPP_SEMAPHORE_MAX (::std::numeric_limits<long>::max())
-#endif
-
// Execute Once
typedef void* __libcpp_exec_once_flag;
#define _LIBCPP_EXEC_ONCE_INITIALIZER 0
@@ -219,26 +201,6 @@ int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m,
_LIBCPP_THREAD_ABI_VISIBILITY
int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv);
-#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES
-
-// Semaphore
-_LIBCPP_THREAD_ABI_VISIBILITY
-bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init);
-
-_LIBCPP_THREAD_ABI_VISIBILITY
-bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem);
-
-_LIBCPP_THREAD_ABI_VISIBILITY
-bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem);
-
-_LIBCPP_THREAD_ABI_VISIBILITY
-bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem);
-
-_LIBCPP_THREAD_ABI_VISIBILITY
-bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns);
-
-#endif // _LIBCPP_NO_NATIVE_SEMAPHORES
-
// Execute once
_LIBCPP_THREAD_ABI_VISIBILITY
int __libcpp_execute_once(__libcpp_exec_once_flag *flag,
@@ -306,29 +268,6 @@ struct __libcpp_timed_backoff_policy {
}
};
-static _LIBCPP_CONSTEXPR const int __libcpp_polling_count = 64;
-
-template<class _Fn, class _BFn>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
-bool __libcpp_thread_poll_with_backoff(
- _Fn && __f, _BFn && __bf, chrono::nanoseconds __max_elapsed = chrono::nanoseconds::zero())
-{
- auto const __start = chrono::high_resolution_clock::now();
- for(int __count = 0;;) {
- if(__f())
- return true; // _Fn completion means success
- if(__count < __libcpp_polling_count) {
- __count += 1;
- continue;
- }
- chrono::nanoseconds const __elapsed = chrono::high_resolution_clock::now() - __start;
- if(__max_elapsed != chrono::nanoseconds::zero() && __max_elapsed < __elapsed)
- return false; // timeout failure
- if(__bf(__elapsed))
- return false; // _BFn completion means failure
- }
-}
-
#if (!defined(_LIBCPP_HAS_THREAD_LIBRARY_EXTERNAL) || \
defined(_LIBCPP_BUILDING_THREAD_LIBRARY_EXTERNAL))
@@ -452,38 +391,6 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t *__cv)
return pthread_cond_destroy(__cv);
}
-#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES
-
-// Semaphore
-bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init)
-{
- return sem_init(__sem, 0, __init) == 0;
-}
-
-bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem)
-{
- return sem_destroy(__sem) == 0;
-}
-
-bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem)
-{
- return sem_post(__sem) == 0;
-}
-
-bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem)
-{
- return sem_wait(__sem) == 0;
-}
-
-bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns)
-{
- auto const __abs_time = chrono::system_clock::now().time_since_epoch() + __ns;
- __libcpp_timespec_t __ts = __thread_detail::__convert_to_timespec(__abs_time);
- return sem_timedwait(__sem, &__ts) == 0;
-}
-
-#endif //_LIBCPP_NO_NATIVE_SEMAPHORES
-
// Execute once
int __libcpp_execute_once(__libcpp_exec_once_flag *flag,
void (*init_routine)()) {
diff --git a/libcxx/include/__tree b/libcxx/include/__tree
index 6113322ed99d..f3f2e8d15f5d 100644
--- a/libcxx/include/__tree
+++ b/libcxx/include/__tree
@@ -738,9 +738,9 @@ public:
}
private:
- ~__tree_node_base() _LIBCPP_EQUAL_DELETE;
- __tree_node_base(__tree_node_base const&) _LIBCPP_EQUAL_DELETE;
- __tree_node_base& operator=(__tree_node_base const&) _LIBCPP_EQUAL_DELETE;
+ ~__tree_node_base() = delete;
+ __tree_node_base(__tree_node_base const&) = delete;
+ __tree_node_base& operator=(__tree_node_base const&) = delete;
};
template <class _Tp, class _VoidPtr>
@@ -753,9 +753,9 @@ public:
__node_value_type __value_;
private:
- ~__tree_node() _LIBCPP_EQUAL_DELETE;
- __tree_node(__tree_node const&) _LIBCPP_EQUAL_DELETE;
- __tree_node& operator=(__tree_node const&) _LIBCPP_EQUAL_DELETE;
+ ~__tree_node() = delete;
+ __tree_node(__tree_node const&) = delete;
+ __tree_node& operator=(__tree_node const&) = delete;
};
@@ -1612,7 +1612,7 @@ template <class _Tp, class _Compare, class _Allocator>
__tree<_Tp, _Compare, _Allocator>&
__tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t)
{
- if (this != &__t)
+ if (this != _VSTD::addressof(__t))
{
value_comp() = __t.value_comp();
__copy_assign_alloc(__t);
diff --git a/libcxx/include/__tuple b/libcxx/include/__tuple
index 082ec869eec7..11fbba260238 100644
--- a/libcxx/include/__tuple
+++ b/libcxx/include/__tuple
@@ -58,19 +58,19 @@ template <size_t _Ip, class _Tp> struct _LIBCPP_TEMPLATE_VIS tuple_element;
template <size_t _Ip, class _Tp>
struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, const _Tp>
{
- typedef _LIBCPP_NODEBUG_TYPE typename add_const<typename tuple_element<_Ip, _Tp>::type>::type type;
+ typedef _LIBCPP_NODEBUG typename add_const<typename tuple_element<_Ip, _Tp>::type>::type type;
};
template <size_t _Ip, class _Tp>
struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, volatile _Tp>
{
- typedef _LIBCPP_NODEBUG_TYPE typename add_volatile<typename tuple_element<_Ip, _Tp>::type>::type type;
+ typedef _LIBCPP_NODEBUG typename add_volatile<typename tuple_element<_Ip, _Tp>::type>::type type;
};
template <size_t _Ip, class _Tp>
struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, const volatile _Tp>
{
- typedef _LIBCPP_NODEBUG_TYPE typename add_cv<typename tuple_element<_Ip, _Tp>::type>::type type;
+ typedef _LIBCPP_NODEBUG typename add_cv<typename tuple_element<_Ip, _Tp>::type>::type type;
};
template <class _Tp> struct __tuple_like : false_type {};
@@ -99,7 +99,7 @@ namespace __detail {
template<typename _Tp, size_t ..._Extra> struct __repeat;
template<typename _Tp, _Tp ..._Np, size_t ..._Extra> struct __repeat<__integer_sequence<_Tp, _Np...>, _Extra...> {
- typedef _LIBCPP_NODEBUG_TYPE __integer_sequence<_Tp,
+ typedef _LIBCPP_NODEBUG __integer_sequence<_Tp,
_Np...,
sizeof...(_Np) + _Np...,
2 * sizeof...(_Np) + _Np...,
@@ -253,7 +253,7 @@ template <class ..._Tp> struct __tuple_types {};
namespace __indexer_detail {
template <size_t _Idx, class _Tp>
-struct __indexed { using type _LIBCPP_NODEBUG_TYPE = _Tp; };
+struct __indexed { using type _LIBCPP_NODEBUG = _Tp; };
template <class _Types, class _Indexes> struct __indexer;
@@ -268,7 +268,7 @@ __indexed<_Idx, _Tp> __at_index(__indexed<_Idx, _Tp> const&);
} // namespace __indexer_detail
template <size_t _Idx, class ..._Types>
-using __type_pack_element _LIBCPP_NODEBUG_TYPE = typename decltype(
+using __type_pack_element _LIBCPP_NODEBUG = typename decltype(
__indexer_detail::__at_index<_Idx>(
__indexer_detail::__indexer<
__tuple_types<_Types...>,
@@ -281,7 +281,7 @@ template <size_t _Ip, class ..._Types>
struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, __tuple_types<_Types...>>
{
static_assert(_Ip < sizeof...(_Types), "tuple_element index out of range");
- typedef _LIBCPP_NODEBUG_TYPE __type_pack_element<_Ip, _Types...> type;
+ typedef _LIBCPP_NODEBUG __type_pack_element<_Ip, _Types...> type;
};
@@ -301,34 +301,34 @@ struct __apply_cv_mf<false, false, false> {
};
template <>
struct __apply_cv_mf<false, true, false> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = const _Tp;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = const _Tp;
};
template <>
struct __apply_cv_mf<false, false, true> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = volatile _Tp;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = volatile _Tp;
};
template <>
struct __apply_cv_mf<false, true, true> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = const volatile _Tp;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = const volatile _Tp;
};
template <>
struct __apply_cv_mf<true, false, false> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = _Tp&;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = _Tp&;
};
template <>
struct __apply_cv_mf<true, true, false> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = const _Tp&;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = const _Tp&;
};
template <>
struct __apply_cv_mf<true, false, true> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = volatile _Tp&;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = volatile _Tp&;
};
template <>
struct __apply_cv_mf<true, true, true> {
- template <class _Tp> using __apply _LIBCPP_NODEBUG_TYPE = const volatile _Tp&;
+ template <class _Tp> using __apply _LIBCPP_NODEBUG = const volatile _Tp&;
};
template <class _Tp, class _RawTp = typename remove_reference<_Tp>::type>
-using __apply_cv_t _LIBCPP_NODEBUG_TYPE = __apply_cv_mf<
+using __apply_cv_t _LIBCPP_NODEBUG = __apply_cv_mf<
is_lvalue_reference<_Tp>::value,
is_const<_RawTp>::value,
is_volatile<_RawTp>::value>;
@@ -347,7 +347,7 @@ template <template <class...> class _Tuple, class ..._Types, size_t ..._Idx>
struct __make_tuple_types_flat<_Tuple<_Types...>, __tuple_indices<_Idx...>> {
// Specialization for pair, tuple, and __tuple_types
template <class _Tp, class _ApplyFn = __apply_cv_t<_Tp>>
- using __apply_quals _LIBCPP_NODEBUG_TYPE = __tuple_types<
+ using __apply_quals _LIBCPP_NODEBUG = __tuple_types<
typename _ApplyFn::template __apply<__type_pack_element<_Idx, _Types...>>...
>;
};
@@ -375,12 +375,12 @@ struct __make_tuple_types
template <class ..._Types, size_t _Ep>
struct __make_tuple_types<tuple<_Types...>, _Ep, 0, true> {
- typedef _LIBCPP_NODEBUG_TYPE __tuple_types<_Types...> type;
+ typedef _LIBCPP_NODEBUG __tuple_types<_Types...> type;
};
template <class ..._Types, size_t _Ep>
struct __make_tuple_types<__tuple_types<_Types...>, _Ep, 0, true> {
- typedef _LIBCPP_NODEBUG_TYPE __tuple_types<_Types...> type;
+ typedef _LIBCPP_NODEBUG __tuple_types<_Types...> type;
};
template <bool ..._Preds>
@@ -454,12 +454,12 @@ struct __tuple_assignable<_Tp, _Up, true, true>
template <size_t _Ip, class ..._Tp>
struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, tuple<_Tp...> >
{
- typedef _LIBCPP_NODEBUG_TYPE typename tuple_element<_Ip, __tuple_types<_Tp...> >::type type;
+ typedef _LIBCPP_NODEBUG typename tuple_element<_Ip, __tuple_types<_Tp...> >::type type;
};
#if _LIBCPP_STD_VER > 11
template <size_t _Ip, class ..._Tp>
-using tuple_element_t _LIBCPP_NODEBUG_TYPE = typename tuple_element <_Ip, _Tp...>::type;
+using tuple_element_t _LIBCPP_NODEBUG = typename tuple_element <_Ip, _Tp...>::type;
#endif
template <bool _IsTuple, class _SizeTrait, size_t _Expected>
@@ -471,7 +471,7 @@ struct __tuple_like_with_size_imp<true, _SizeTrait, _Expected>
template <class _Tuple, size_t _ExpectedSize,
class _RawTuple = typename __uncvref<_Tuple>::type>
-using __tuple_like_with_size _LIBCPP_NODEBUG_TYPE = __tuple_like_with_size_imp<
+using __tuple_like_with_size _LIBCPP_NODEBUG = __tuple_like_with_size_imp<
__tuple_like<_RawTuple>::value,
tuple_size<_RawTuple>, _ExpectedSize
>;
diff --git a/libcxx/include/__undef_macros b/libcxx/include/__undef_macros
index 4923ee6b4a1a..40b2b64d0a6f 100644
--- a/libcxx/include/__undef_macros
+++ b/libcxx/include/__undef_macros
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ __undef_macros ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/__utility/as_const.h b/libcxx/include/__utility/as_const.h
index 2f23eb431efa..52da739875e3 100644
--- a/libcxx/include/__utility/as_const.h
+++ b/libcxx/include/__utility/as_const.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -33,6 +30,4 @@ void as_const(const _Tp&&) = delete;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_AS_CONST_H
diff --git a/libcxx/include/__utility/cmp.h b/libcxx/include/__utility/cmp.h
index a14e373c3f4b..4fc96b054f4d 100644
--- a/libcxx/include/__utility/cmp.h
+++ b/libcxx/include/__utility/cmp.h
@@ -30,14 +30,17 @@ struct _IsSameAsAny : _Or<_IsSame<_Tp, _Up>...> {};
template<class _Tp>
concept __is_safe_integral_cmp = is_integral_v<_Tp> &&
- !_IsSameAsAny<_Tp, bool, char,
+ !_IsSameAsAny<_Tp, bool, char
#ifndef _LIBCPP_HAS_NO_CHAR8_T
- char8_t,
+ , char8_t
#endif
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
- char16_t, char32_t,
+ , char16_t, char32_t
#endif
- wchar_t>::value;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ , wchar_t
+#endif
+ >::value;
template<__is_safe_integral_cmp _Tp, __is_safe_integral_cmp _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
diff --git a/libcxx/include/__utility/__decay_copy.h b/libcxx/include/__utility/decay_copy.h
index eda8db6a60f5..5c9716a89279 100644
--- a/libcxx/include/__utility/__decay_copy.h
+++ b/libcxx/include/__utility/decay_copy.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef _LIBCPP___TYPE_TRAITS_DECAY_COPY_H
-#define _LIBCPP___TYPE_TRAITS_DECAY_COPY_H
+#ifndef _LIBCPP___UTILITY_DECAY_COPY_H
+#define _LIBCPP___UTILITY_DECAY_COPY_H
#include <__config>
#include <__utility/forward.h>
@@ -18,15 +18,13 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
-inline _LIBCPP_INLINE_VISIBILITY typename decay<_Tp>::type __decay_copy(_Tp&& __t)
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
+typename decay<_Tp>::type __decay_copy(_Tp&& __t)
#if _LIBCPP_STD_VER > 17
- noexcept(is_nothrow_convertible_v<_Tp, remove_reference_t<_Tp> >)
+ noexcept(is_nothrow_convertible_v<_Tp, remove_reference_t<_Tp>>)
#endif
{
return _VSTD::forward<_Tp>(__t);
@@ -34,6 +32,4 @@ inline _LIBCPP_INLINE_VISIBILITY typename decay<_Tp>::type __decay_copy(_Tp&& __
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
-#endif // _LIBCPP___TYPE_TRAITS_DECAY_COPY_H
+#endif // _LIBCPP___UTILITY_DECAY_COPY_H
diff --git a/libcxx/include/__utility/declval.h b/libcxx/include/__utility/declval.h
index 185527cc6cb5..6a9dcd9a1949 100644
--- a/libcxx/include/__utility/declval.h
+++ b/libcxx/include/__utility/declval.h
@@ -15,9 +15,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
// Suppress deprecation notice for volatile-qualified return type resulting
@@ -34,6 +31,4 @@ decltype(__declval<_Tp>(0)) declval() _NOEXCEPT;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_DECLVAL_H
diff --git a/libcxx/include/__utility/exchange.h b/libcxx/include/__utility/exchange.h
index 4d5211d94ad1..f9c92c622f54 100644
--- a/libcxx/include/__utility/exchange.h
+++ b/libcxx/include/__utility/exchange.h
@@ -12,20 +12,19 @@
#include <__config>
#include <__utility/forward.h>
#include <__utility/move.h>
+#include <type_traits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 11
template<class _T1, class _T2 = _T1>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_T1 exchange(_T1& __obj, _T2 && __new_value)
+_T1 exchange(_T1& __obj, _T2&& __new_value)
+ noexcept(is_nothrow_move_constructible<_T1>::value && is_nothrow_assignable<_T1&, _T2>::value)
{
_T1 __old_value = _VSTD::move(__obj);
__obj = _VSTD::forward<_T2>(__new_value);
@@ -35,6 +34,4 @@ _T1 exchange(_T1& __obj, _T2 && __new_value)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_EXCHANGE_H
diff --git a/libcxx/include/__utility/forward.h b/libcxx/include/__utility/forward.h
index c994f129007b..7629a87d74ff 100644
--- a/libcxx/include/__utility/forward.h
+++ b/libcxx/include/__utility/forward.h
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
@@ -37,6 +34,4 @@ forward(typename remove_reference<_Tp>::type&& __t) _NOEXCEPT {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_FORWARD_H
diff --git a/libcxx/include/__utility/in_place.h b/libcxx/include/__utility/in_place.h
index 964d08332747..846b4a6d4dfb 100644
--- a/libcxx/include/__utility/in_place.h
+++ b/libcxx/include/__utility/in_place.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -26,21 +23,21 @@ _LIBCPP_BEGIN_NAMESPACE_STD
struct _LIBCPP_TYPE_VIS in_place_t {
explicit in_place_t() = default;
};
-_LIBCPP_INLINE_VAR constexpr in_place_t in_place{};
+inline constexpr in_place_t in_place{};
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS in_place_type_t {
explicit in_place_type_t() = default;
};
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr in_place_type_t<_Tp> in_place_type{};
+inline constexpr in_place_type_t<_Tp> in_place_type{};
template <size_t _Idx>
struct _LIBCPP_TEMPLATE_VIS in_place_index_t {
explicit in_place_index_t() = default;
};
template <size_t _Idx>
-_LIBCPP_INLINE_VAR constexpr in_place_index_t<_Idx> in_place_index{};
+inline constexpr in_place_index_t<_Idx> in_place_index{};
template <class _Tp> struct __is_inplace_type_imp : false_type {};
template <class _Tp> struct __is_inplace_type_imp<in_place_type_t<_Tp>> : true_type {};
@@ -58,6 +55,4 @@ using __is_inplace_index = __is_inplace_index_imp<__uncvref_t<_Tp>>;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_IN_PLACE_H
diff --git a/libcxx/include/__utility/integer_sequence.h b/libcxx/include/__utility/integer_sequence.h
index 963c4a967070..738ac6c02b05 100644
--- a/libcxx/include/__utility/integer_sequence.h
+++ b/libcxx/include/__utility/integer_sequence.h
@@ -16,9 +16,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 11
@@ -42,11 +39,11 @@ template<size_t... _Ip>
#if __has_builtin(__make_integer_seq) && !defined(_LIBCPP_TESTING_FALLBACK_MAKE_INTEGER_SEQUENCE)
template <class _Tp, _Tp _Ep>
-using __make_integer_sequence _LIBCPP_NODEBUG_TYPE = __make_integer_seq<integer_sequence, _Tp, _Ep>;
+using __make_integer_sequence _LIBCPP_NODEBUG = __make_integer_seq<integer_sequence, _Tp, _Ep>;
#else
-template<typename _Tp, _Tp _Np> using __make_integer_sequence_unchecked _LIBCPP_NODEBUG_TYPE =
+template<typename _Tp, _Tp _Np> using __make_integer_sequence_unchecked _LIBCPP_NODEBUG =
typename __detail::__make<_Np>::type::template __convert<integer_sequence, _Tp>;
template <class _Tp, _Tp _Ep>
@@ -57,11 +54,11 @@ struct __make_integer_sequence_checked
static_assert(0 <= _Ep, "std::make_integer_sequence must have a non-negative sequence length");
// Workaround GCC bug by preventing bad installations when 0 <= _Ep
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68929
- typedef _LIBCPP_NODEBUG_TYPE __make_integer_sequence_unchecked<_Tp, 0 <= _Ep ? _Ep : 0> type;
+ typedef _LIBCPP_NODEBUG __make_integer_sequence_unchecked<_Tp, 0 <= _Ep ? _Ep : 0> type;
};
template <class _Tp, _Tp _Ep>
-using __make_integer_sequence _LIBCPP_NODEBUG_TYPE = typename __make_integer_sequence_checked<_Tp, _Ep>::type;
+using __make_integer_sequence _LIBCPP_NODEBUG = typename __make_integer_sequence_checked<_Tp, _Ep>::type;
#endif
@@ -78,6 +75,4 @@ template<class... _Tp>
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_INTEGER_SEQUENCE_H
diff --git a/libcxx/include/__utility/move.h b/libcxx/include/__utility/move.h
index d3c56f93ef91..75d715dc66da 100644
--- a/libcxx/include/__utility/move.h
+++ b/libcxx/include/__utility/move.h
@@ -17,15 +17,12 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Tp>
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename remove_reference<_Tp>::type&&
move(_Tp&& __t) _NOEXCEPT {
- typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tp>::type _Up;
+ typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type _Up;
return static_cast<_Up&&>(__t);
}
@@ -47,6 +44,4 @@ move_if_noexcept(_Tp& __x) _NOEXCEPT {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_MOVE_H
diff --git a/libcxx/include/__utility/pair.h b/libcxx/include/__utility/pair.h
index e0216f3402b8..f1114696884a 100644
--- a/libcxx/include/__utility/pair.h
+++ b/libcxx/include/__utility/pair.h
@@ -9,6 +9,8 @@
#ifndef _LIBCPP___UTILITY_PAIR_H
#define _LIBCPP___UTILITY_PAIR_H
+#include <__compare/common_comparison_category.h>
+#include <__compare/synth_three_way.h>
#include <__config>
#include <__functional/unwrap_ref.h>
#include <__tuple>
@@ -22,12 +24,8 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
-
#if defined(_LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR)
template <class, class>
struct __non_trivially_copyable_base {
@@ -75,9 +73,6 @@ struct _LIBCPP_TEMPLATE_VIS pair
return *this;
}
#else
- template <bool _Val>
- using _EnableB _LIBCPP_NODEBUG_TYPE = typename enable_if<_Val, bool>::type;
-
struct _CheckArgs {
template <int&...>
static constexpr bool __enable_explicit_default() {
@@ -110,7 +105,7 @@ struct _LIBCPP_TEMPLATE_VIS pair
};
template <bool _MaybeEnable>
- using _CheckArgsDep _LIBCPP_NODEBUG_TYPE = typename conditional<
+ using _CheckArgsDep _LIBCPP_NODEBUG = typename conditional<
_MaybeEnable, _CheckArgs, __check_tuple_constructor_fail>::type;
struct _CheckTupleLikeConstructor {
@@ -132,112 +127,122 @@ struct _LIBCPP_TEMPLATE_VIS pair
};
template <class _Tuple>
- using _CheckTLC _LIBCPP_NODEBUG_TYPE = typename conditional<
+ using _CheckTLC _LIBCPP_NODEBUG = typename conditional<
__tuple_like_with_size<_Tuple, 2>::value
&& !is_same<typename decay<_Tuple>::type, pair>::value,
_CheckTupleLikeConstructor,
__check_tuple_constructor_fail
>::type;
- template<bool _Dummy = true, _EnableB<
+ template<bool _Dummy = true, typename enable_if<
_CheckArgsDep<_Dummy>::__enable_explicit_default()
- > = false>
+ >::type* = nullptr>
explicit _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
pair() _NOEXCEPT_(is_nothrow_default_constructible<first_type>::value &&
is_nothrow_default_constructible<second_type>::value)
: first(), second() {}
- template<bool _Dummy = true, _EnableB<
+ template<bool _Dummy = true, typename enable_if<
_CheckArgsDep<_Dummy>::__enable_implicit_default()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
pair() _NOEXCEPT_(is_nothrow_default_constructible<first_type>::value &&
is_nothrow_default_constructible<second_type>::value)
: first(), second() {}
- template <bool _Dummy = true, _EnableB<
+ template <bool _Dummy = true, typename enable_if<
_CheckArgsDep<_Dummy>::template __enable_explicit<_T1 const&, _T2 const&>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
explicit pair(_T1 const& __t1, _T2 const& __t2)
_NOEXCEPT_(is_nothrow_copy_constructible<first_type>::value &&
is_nothrow_copy_constructible<second_type>::value)
: first(__t1), second(__t2) {}
- template<bool _Dummy = true, _EnableB<
+ template<bool _Dummy = true, typename enable_if<
_CheckArgsDep<_Dummy>::template __enable_implicit<_T1 const&, _T2 const&>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
pair(_T1 const& __t1, _T2 const& __t2)
_NOEXCEPT_(is_nothrow_copy_constructible<first_type>::value &&
is_nothrow_copy_constructible<second_type>::value)
: first(__t1), second(__t2) {}
- template<class _U1, class _U2, _EnableB<
- _CheckArgs::template __enable_explicit<_U1, _U2>()
- > = false>
+ template <
+#if _LIBCPP_STD_VER > 20 // http://wg21.link/P1951
+ class _U1 = _T1, class _U2 = _T2,
+#else
+ class _U1, class _U2,
+#endif
+ typename enable_if<_CheckArgs::template __enable_explicit<_U1, _U2>()>::type* = nullptr
+ >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
explicit pair(_U1&& __u1, _U2&& __u2)
_NOEXCEPT_((is_nothrow_constructible<first_type, _U1>::value &&
is_nothrow_constructible<second_type, _U2>::value))
: first(_VSTD::forward<_U1>(__u1)), second(_VSTD::forward<_U2>(__u2)) {}
- template<class _U1, class _U2, _EnableB<
- _CheckArgs::template __enable_implicit<_U1, _U2>()
- > = false>
+ template <
+#if _LIBCPP_STD_VER > 20 // http://wg21.link/P1951
+ class _U1 = _T1, class _U2 = _T2,
+#else
+ class _U1, class _U2,
+#endif
+ typename enable_if<_CheckArgs::template __enable_implicit<_U1, _U2>()>::type* = nullptr
+ >
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
pair(_U1&& __u1, _U2&& __u2)
_NOEXCEPT_((is_nothrow_constructible<first_type, _U1>::value &&
is_nothrow_constructible<second_type, _U2>::value))
: first(_VSTD::forward<_U1>(__u1)), second(_VSTD::forward<_U2>(__u2)) {}
- template<class _U1, class _U2, _EnableB<
+ template<class _U1, class _U2, typename enable_if<
_CheckArgs::template __enable_explicit<_U1 const&, _U2 const&>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
explicit pair(pair<_U1, _U2> const& __p)
_NOEXCEPT_((is_nothrow_constructible<first_type, _U1 const&>::value &&
is_nothrow_constructible<second_type, _U2 const&>::value))
: first(__p.first), second(__p.second) {}
- template<class _U1, class _U2, _EnableB<
+ template<class _U1, class _U2, typename enable_if<
_CheckArgs::template __enable_implicit<_U1 const&, _U2 const&>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
pair(pair<_U1, _U2> const& __p)
_NOEXCEPT_((is_nothrow_constructible<first_type, _U1 const&>::value &&
is_nothrow_constructible<second_type, _U2 const&>::value))
: first(__p.first), second(__p.second) {}
- template<class _U1, class _U2, _EnableB<
+ template<class _U1, class _U2, typename enable_if<
_CheckArgs::template __enable_explicit<_U1, _U2>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
explicit pair(pair<_U1, _U2>&&__p)
_NOEXCEPT_((is_nothrow_constructible<first_type, _U1&&>::value &&
is_nothrow_constructible<second_type, _U2&&>::value))
: first(_VSTD::forward<_U1>(__p.first)), second(_VSTD::forward<_U2>(__p.second)) {}
- template<class _U1, class _U2, _EnableB<
+ template<class _U1, class _U2, typename enable_if<
_CheckArgs::template __enable_implicit<_U1, _U2>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
pair(pair<_U1, _U2>&& __p)
_NOEXCEPT_((is_nothrow_constructible<first_type, _U1&&>::value &&
is_nothrow_constructible<second_type, _U2&&>::value))
: first(_VSTD::forward<_U1>(__p.first)), second(_VSTD::forward<_U2>(__p.second)) {}
- template<class _Tuple, _EnableB<
+ template<class _Tuple, typename enable_if<
_CheckTLC<_Tuple>::template __enable_explicit<_Tuple>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
explicit pair(_Tuple&& __p)
: first(_VSTD::get<0>(_VSTD::forward<_Tuple>(__p))),
second(_VSTD::get<1>(_VSTD::forward<_Tuple>(__p))) {}
- template<class _Tuple, _EnableB<
+ template<class _Tuple, typename enable_if<
_CheckTLC<_Tuple>::template __enable_implicit<_Tuple>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
pair(_Tuple&& __p)
: first(_VSTD::get<0>(_VSTD::forward<_Tuple>(__p))),
@@ -279,9 +284,9 @@ struct _LIBCPP_TEMPLATE_VIS pair
return *this;
}
- template <class _Tuple, _EnableB<
+ template <class _Tuple, typename enable_if<
_CheckTLC<_Tuple>::template __enable_assign<_Tuple>()
- > = false>
+ >::type* = nullptr>
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
pair& operator=(_Tuple&& __p) {
first = _VSTD::get<0>(_VSTD::forward<_Tuple>(__p));
@@ -310,10 +315,12 @@ private:
#endif
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _T1, class _T2>
pair(_T1, _T2) -> pair<_T1, _T2>;
-#endif // _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#endif
+
+// [pairs.spec], specialized algorithms
template <class _T1, class _T2>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
@@ -323,6 +330,23 @@ operator==(const pair<_T1,_T2>& __x, const pair<_T1,_T2>& __y)
return __x.first == __y.first && __x.second == __y.second;
}
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template <class _T1, class _T2>
+_LIBCPP_HIDE_FROM_ABI constexpr
+common_comparison_category_t<
+ __synth_three_way_result<_T1>,
+ __synth_three_way_result<_T2> >
+operator<=>(const pair<_T1,_T2>& __x, const pair<_T1,_T2>& __y)
+{
+ if (auto __c = _VSTD::__synth_three_way(__x.first, __y.first); __c != 0) {
+ return __c;
+ }
+ return _VSTD::__synth_three_way(__x.second, __y.second);
+}
+
+#else // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
template <class _T1, class _T2>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
bool
@@ -363,6 +387,8 @@ operator<=(const pair<_T1,_T2>& __x, const pair<_T1,_T2>& __y)
return !(__y < __x);
}
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
template <class _T1, class _T2>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
typename enable_if
@@ -414,13 +440,13 @@ struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, pair<_T1, _T2> >
template <class _T1, class _T2>
struct _LIBCPP_TEMPLATE_VIS tuple_element<0, pair<_T1, _T2> >
{
- typedef _LIBCPP_NODEBUG_TYPE _T1 type;
+ typedef _LIBCPP_NODEBUG _T1 type;
};
template <class _T1, class _T2>
struct _LIBCPP_TEMPLATE_VIS tuple_element<1, pair<_T1, _T2> >
{
- typedef _LIBCPP_NODEBUG_TYPE _T2 type;
+ typedef _LIBCPP_NODEBUG _T2 type;
};
template <size_t _Ip> struct __get_pair;
@@ -580,6 +606,4 @@ constexpr _T1 const && get(pair<_T2, _T1> const&& __p) _NOEXCEPT
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_PAIR_H
diff --git a/libcxx/include/__utility/piecewise_construct.h b/libcxx/include/__utility/piecewise_construct.h
index 8bef01c9f269..4dc44b38fe99 100644
--- a/libcxx/include/__utility/piecewise_construct.h
+++ b/libcxx/include/__utility/piecewise_construct.h
@@ -15,20 +15,15 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
struct _LIBCPP_TEMPLATE_VIS piecewise_construct_t { explicit piecewise_construct_t() = default; };
#if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_LIBRARY)
extern _LIBCPP_EXPORTED_FROM_ABI const piecewise_construct_t piecewise_construct;// = piecewise_construct_t();
#else
-/* _LIBCPP_INLINE_VAR */ constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t();
+/* inline */ constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t();
#endif
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_PIECEWISE_CONSTRUCT_H
diff --git a/libcxx/include/__utility/rel_ops.h b/libcxx/include/__utility/rel_ops.h
index b900da80f48d..c94b8fddafee 100644
--- a/libcxx/include/__utility/rel_ops.h
+++ b/libcxx/include/__utility/rel_ops.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
namespace rel_ops
@@ -62,6 +59,4 @@ operator>=(const _Tp& __x, const _Tp& __y)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_REL_OPS_H
diff --git a/libcxx/include/__utility/swap.h b/libcxx/include/__utility/swap.h
index 8af83a9a0f45..6c07511686f0 100644
--- a/libcxx/include/__utility/swap.h
+++ b/libcxx/include/__utility/swap.h
@@ -19,9 +19,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#ifndef _LIBCPP_CXX03_LANG
@@ -50,6 +47,4 @@ swap(_Tp (&__a)[_Np], _Tp (&__b)[_Np]) _NOEXCEPT_(__is_nothrow_swappable<_Tp>::v
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_SWAP_H
diff --git a/libcxx/include/__utility/to_underlying.h b/libcxx/include/__utility/to_underlying.h
index fd22f89ff595..54f99108a38c 100644
--- a/libcxx/include/__utility/to_underlying.h
+++ b/libcxx/include/__utility/to_underlying.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------- __utility/to_underlying.h --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,9 +17,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#ifndef _LIBCPP_CXX03_LANG
@@ -40,6 +37,4 @@ to_underlying(_Tp __val) noexcept {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___UTILITY_TO_UNDERLYING_H
diff --git a/libcxx/include/__variant/monostate.h b/libcxx/include/__variant/monostate.h
index 36e3eead1a47..d575847340ac 100644
--- a/libcxx/include/__variant/monostate.h
+++ b/libcxx/include/__variant/monostate.h
@@ -18,9 +18,6 @@
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 14
@@ -60,6 +57,4 @@ struct _LIBCPP_TEMPLATE_VIS hash<monostate> {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP___VARIANT_MONOSTATE_H
diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm
index 849302a7d5da..b28c8cd49890 100644
--- a/libcxx/include/algorithm
+++ b/libcxx/include/algorithm
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- algorithm ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -758,11 +758,6 @@ template <class BidirectionalIterator, class Compare>
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-_LIBCPP_POP_MACROS
-
#if defined(_LIBCPP_HAS_PARALLEL_ALGORITHMS) && _LIBCPP_STD_VER >= 17
# include <__pstl_algorithm>
#endif
diff --git a/libcxx/include/any b/libcxx/include/any
index 3a826c4d50b4..c5b7af2953b9 100644
--- a/libcxx/include/any
+++ b/libcxx/include/any
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------ any -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/array b/libcxx/include/array
index 022172b86ec9..06884eab4a80 100644
--- a/libcxx/include/array
+++ b/libcxx/include/array
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- array -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -356,9 +356,9 @@ struct _LIBCPP_TEMPLATE_VIS array<_Tp, 0>
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _Tp, class... _Args,
- class = _EnableIf<__all<_IsSame<_Tp, _Args>::value...>::value>
+ class = enable_if_t<__all<_IsSame<_Tp, _Args>::value...>::value>
>
array(_Tp, _Args...)
-> array<_Tp, 1 + sizeof...(_Args)>;
diff --git a/libcxx/include/atomic b/libcxx/include/atomic
index 0f6aee83ce82..cfd0e1d054a8 100644
--- a/libcxx/include/atomic
+++ b/libcxx/include/atomic
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- atomic -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -263,196 +263,137 @@ struct atomic<T*>
};
-template <class T>
- bool atomic_is_lock_free(const volatile atomic<T>* obj) noexcept;
-
-template <class T>
- bool atomic_is_lock_free(const atomic<T>* obj) noexcept;
-
-template <class T>
- void atomic_store(volatile atomic<T>* obj, T desr) noexcept;
-
-template <class T>
- void atomic_store(atomic<T>* obj, T desr) noexcept;
-
-template <class T>
- void atomic_store_explicit(volatile atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
- void atomic_store_explicit(atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
- T atomic_load(const volatile atomic<T>* obj) noexcept;
-
-template <class T>
- T atomic_load(const atomic<T>* obj) noexcept;
-
-template <class T>
- T atomic_load_explicit(const volatile atomic<T>* obj, memory_order m) noexcept;
-
-template <class T>
- T atomic_load_explicit(const atomic<T>* obj, memory_order m) noexcept;
-
-template <class T>
- T atomic_exchange(volatile atomic<T>* obj, T desr) noexcept;
-
-template <class T>
- T atomic_exchange(atomic<T>* obj, T desr) noexcept;
-
-template <class T>
- T atomic_exchange_explicit(volatile atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
- T atomic_exchange_explicit(atomic<T>* obj, T desr, memory_order m) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_weak(volatile atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_weak(atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_strong(volatile atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_strong(atomic<T>* obj, T* expc, T desr) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_weak_explicit(volatile atomic<T>* obj, T* expc,
- T desr,
- memory_order s, memory_order f) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_weak_explicit(atomic<T>* obj, T* expc, T desr,
- memory_order s, memory_order f) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_strong_explicit(volatile atomic<T>* obj,
- T* expc, T desr,
- memory_order s, memory_order f) noexcept;
-
-template <class T>
- bool atomic_compare_exchange_strong_explicit(atomic<T>* obj, T* expc,
- T desr,
- memory_order s, memory_order f) noexcept;
-
-template <class T>
- void atomic_wait(const volatile atomic<T>* obj, T old) noexcept;
-
-template <class T>
- void atomic_wait(const atomic<T>* obj, T old) noexcept;
-
-template <class T>
- void atomic_wait_explicit(const volatile atomic<T>* obj, T old, memory_order m) noexcept;
-
-template <class T>
- void atomic_wait_explicit(const atomic<T>* obj, T old, memory_order m) noexcept;
-
-template <class T>
- void atomic_one(volatile atomic<T>* obj) noexcept;
-
-template <class T>
- void atomic_one(atomic<T>* obj) noexcept;
-
-template <class T>
- void atomic_all(volatile atomic<T>* obj) noexcept;
-
-template <class T>
- void atomic_all(atomic<T>* obj) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_add(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_add(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_add_explicit(volatile atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-template <class Integral>
- Integral atomic_fetch_add_explicit(atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-template <class Integral>
- Integral atomic_fetch_sub(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_sub(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_sub_explicit(volatile atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_sub_explicit(atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_and(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_and(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_and_explicit(volatile atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_and_explicit(atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_or(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_or(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_or_explicit(volatile atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_or_explicit(atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_xor(volatile atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_xor(atomic<Integral>* obj, Integral op) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_xor_explicit(volatile atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class Integral>
- Integral atomic_fetch_xor_explicit(atomic<Integral>* obj, Integral op,
- memory_order m) noexcept;
-
-template <class T>
- T* atomic_fetch_add(volatile atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
- T* atomic_fetch_add(atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
- T* atomic_fetch_add_explicit(volatile atomic<T*>* obj, ptrdiff_t op,
- memory_order m) noexcept;
-
-template <class T>
- T* atomic_fetch_add_explicit(atomic<T*>* obj, ptrdiff_t op, memory_order m) noexcept;
-
-template <class T>
- T* atomic_fetch_sub(volatile atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
- T* atomic_fetch_sub(atomic<T*>* obj, ptrdiff_t op) noexcept;
-
-template <class T>
- T* atomic_fetch_sub_explicit(volatile atomic<T*>* obj, ptrdiff_t op,
- memory_order m) noexcept;
-
-template <class T>
- T* atomic_fetch_sub_explicit(atomic<T*>* obj, ptrdiff_t op, memory_order m) noexcept;
+// [atomics.nonmembers], non-member functions
+template<class T>
+ bool atomic_is_lock_free(const volatile atomic<T>*) noexcept;
+template<class T>
+ bool atomic_is_lock_free(const atomic<T>*) noexcept;
+template<class T>
+ void atomic_store(volatile atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ void atomic_store(atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ void atomic_store_explicit(volatile atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ void atomic_store_explicit(atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_load(const volatile atomic<T>*) noexcept;
+template<class T>
+ T atomic_load(const atomic<T>*) noexcept;
+template<class T>
+ T atomic_load_explicit(const volatile atomic<T>*, memory_order) noexcept;
+template<class T>
+ T atomic_load_explicit(const atomic<T>*, memory_order) noexcept;
+template<class T>
+ T atomic_exchange(volatile atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_exchange(atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_exchange_explicit(volatile atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_exchange_explicit(atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ bool atomic_compare_exchange_weak(volatile atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type) noexcept;
+template<class T>
+ bool atomic_compare_exchange_weak(atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type) noexcept;
+template<class T>
+ bool atomic_compare_exchange_strong(volatile atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type) noexcept;
+template<class T>
+ bool atomic_compare_exchange_strong(atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type) noexcept;
+template<class T>
+ bool atomic_compare_exchange_weak_explicit(volatile atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type,
+ memory_order, memory_order) noexcept;
+template<class T>
+ bool atomic_compare_exchange_weak_explicit(atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type,
+ memory_order, memory_order) noexcept;
+template<class T>
+ bool atomic_compare_exchange_strong_explicit(volatile atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type,
+ memory_order, memory_order) noexcept;
+template<class T>
+ bool atomic_compare_exchange_strong_explicit(atomic<T>*, atomic<T>::value_type*,
+ atomic<T>::value_type,
+ memory_order, memory_order) noexcept;
+
+template<class T>
+ T atomic_fetch_add(volatile atomic<T>*, atomic<T>::difference_type) noexcept;
+template<class T>
+ T atomic_fetch_add(atomic<T>*, atomic<T>::difference_type) noexcept;
+template<class T>
+ T atomic_fetch_add_explicit(volatile atomic<T>*, atomic<T>::difference_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_add_explicit(atomic<T>*, atomic<T>::difference_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_sub(volatile atomic<T>*, atomic<T>::difference_type) noexcept;
+template<class T>
+ T atomic_fetch_sub(atomic<T>*, atomic<T>::difference_type) noexcept;
+template<class T>
+ T atomic_fetch_sub_explicit(volatile atomic<T>*, atomic<T>::difference_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_sub_explicit(atomic<T>*, atomic<T>::difference_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_and(volatile atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_fetch_and(atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_fetch_and_explicit(volatile atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_and_explicit(atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_or(volatile atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_fetch_or(atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_fetch_or_explicit(volatile atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_or_explicit(atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_xor(volatile atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_fetch_xor(atomic<T>*, atomic<T>::value_type) noexcept;
+template<class T>
+ T atomic_fetch_xor_explicit(volatile atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+template<class T>
+ T atomic_fetch_xor_explicit(atomic<T>*, atomic<T>::value_type,
+ memory_order) noexcept;
+
+template<class T>
+ void atomic_wait(const volatile atomic<T>*, atomic<T>::value_type);
+template<class T>
+ void atomic_wait(const atomic<T>*, atomic<T>::value_type);
+template<class T>
+ void atomic_wait_explicit(const volatile atomic<T>*, atomic<T>::value_type,
+ memory_order);
+template<class T>
+ void atomic_wait_explicit(const atomic<T>*, atomic<T>::value_type,
+ memory_order);
+template<class T>
+ void atomic_notify_one(volatile atomic<T>*);
+template<class T>
+ void atomic_notify_one(atomic<T>*);
+template<class T>
+ void atomic_notify_all(volatile atomic<T>*);
+template<class T>
+ void atomic_notify_all(atomic<T>*);
// Atomics for standard typedef types
@@ -564,10 +505,10 @@ void atomic_signal_fence(memory_order m) noexcept;
// deprecated
template <class T>
- void atomic_init(volatile atomic<T>* obj, typename atomic<T>::value_type desr) noexcept;
+ void atomic_init(volatile atomic<T>* obj, atomic<T>::value_type desr) noexcept;
template <class T>
- void atomic_init(atomic<T>* obj, typename atomic<T>::value_type desr) noexcept;
+ void atomic_init(atomic<T>* obj, atomic<T>::value_type desr) noexcept;
#define ATOMIC_VAR_INIT(value) see below
@@ -579,20 +520,21 @@ template <class T>
#include <__availability>
#include <__config>
-#include <__threading_support>
+#include <__thread/poll_with_backoff.h>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>
#include <version>
+#ifndef _LIBCPP_HAS_NO_THREADS
+# include <__threading_support>
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-#ifdef _LIBCPP_HAS_NO_THREADS
-# error <atomic> is not supported on this single threaded system
-#endif
#ifdef _LIBCPP_HAS_NO_ATOMIC_HEADER
# error <atomic> is not implemented
#endif
@@ -1473,7 +1415,9 @@ template<> struct __cxx_is_always_lock_free<char8_t> { enum { __value = 2 == ATO
#endif
template<> struct __cxx_is_always_lock_free<char16_t> { enum { __value = 2 == ATOMIC_CHAR16_T_LOCK_FREE }; };
template<> struct __cxx_is_always_lock_free<char32_t> { enum { __value = 2 == ATOMIC_CHAR32_T_LOCK_FREE }; };
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template<> struct __cxx_is_always_lock_free<wchar_t> { enum { __value = 2 == ATOMIC_WCHAR_T_LOCK_FREE }; };
+#endif
template<> struct __cxx_is_always_lock_free<short> { enum { __value = 2 == ATOMIC_SHORT_LOCK_FREE }; };
template<> struct __cxx_is_always_lock_free<unsigned short> { enum { __value = 2 == ATOMIC_SHORT_LOCK_FREE }; };
template<> struct __cxx_is_always_lock_free<int> { enum { __value = 2 == ATOMIC_INT_LOCK_FREE }; };
@@ -1496,11 +1440,8 @@ template <typename _Tp,
typename _Base = __cxx_atomic_base_impl<_Tp> >
#endif //_LIBCPP_ATOMIC_ONLY_USE_BUILTINS
struct __cxx_atomic_impl : public _Base {
-
-#if _GNUC_VER >= 501
static_assert(is_trivially_copyable<_Tp>::value,
- "std::atomic<Tp> requires that 'Tp' be a trivially copyable type");
-#endif
+ "std::atomic<T> requires that 'T' be a trivially copyable type");
_LIBCPP_INLINE_VISIBILITY __cxx_atomic_impl() _NOEXCEPT _LIBCPP_DEFAULT
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp value) _NOEXCEPT
@@ -1515,6 +1456,16 @@ struct __cxx_atomic_impl : public _Base {
using __cxx_atomic_contention_t = __cxx_atomic_impl<__cxx_contention_t>;
+#if defined(_LIBCPP_HAS_NO_THREADS)
+# define _LIBCPP_HAS_NO_PLATFORM_WAIT
+#endif
+
+// TODO:
+// _LIBCPP_HAS_NO_PLATFORM_WAIT is currently a "dead" macro, in the sense that
+// it is not tied anywhere into the build system or even documented. We should
+// clean it up because it is technically never defined except when threads are
+// disabled. We should clean it up in its own changeset in case we break "bad"
+// users.
#ifndef _LIBCPP_HAS_NO_PLATFORM_WAIT
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*);
@@ -1566,7 +1517,12 @@ _LIBCPP_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp> co
template <class _Atp, class _Fn>
_LIBCPP_INLINE_VISIBILITY bool __cxx_atomic_wait(_Atp*, _Fn && __test_fn)
{
- return __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
+#if defined(_LIBCPP_HAS_NO_THREADS)
+ using _Policy = __spinning_backoff_policy;
+#else
+ using _Policy = __libcpp_timed_backoff_policy;
+#endif
+ return __libcpp_thread_poll_with_backoff(__test_fn, _Policy());
}
#endif // _LIBCPP_HAS_NO_PLATFORM_WAIT
@@ -1847,19 +1803,32 @@ struct atomic<_Tp*>
{__base::store(__d); return __d;}
_LIBCPP_INLINE_VISIBILITY
- _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
- volatile _NOEXCEPT
- {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
+ _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT {
+ // __atomic_fetch_add accepts function pointers, guard against them.
+ static_assert(!is_function<typename remove_pointer<_Tp>::type>::value, "Pointer to function isn't allowed");
+ return __cxx_atomic_fetch_add(&this->__a_, __op, __m);
+ }
+
_LIBCPP_INLINE_VISIBILITY
- _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
- {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
+ _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT {
+ // __atomic_fetch_add accepts function pointers, guard against them.
+ static_assert(!is_function<typename remove_pointer<_Tp>::type>::value, "Pointer to function isn't allowed");
+ return __cxx_atomic_fetch_add(&this->__a_, __op, __m);
+ }
+
_LIBCPP_INLINE_VISIBILITY
- _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
- volatile _NOEXCEPT
- {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
+ _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT {
+ // __atomic_fetch_add accepts function pointers, guard against them.
+ static_assert(!is_function<typename remove_pointer<_Tp>::type>::value, "Pointer to function isn't allowed");
+ return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);
+ }
+
_LIBCPP_INLINE_VISIBILITY
- _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT
- {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
+ _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT {
+ // __atomic_fetch_add accepts function pointers, guard against them.
+ static_assert(!is_function<typename remove_pointer<_Tp>::type>::value, "Pointer to function isn't allowed");
+ return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);
+ }
_LIBCPP_INLINE_VISIBILITY
_Tp* operator++(int) volatile _NOEXCEPT {return fetch_add(1);}
@@ -2195,11 +2164,7 @@ void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
+_Tp
atomic_fetch_add(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
{
return __o->fetch_add(__op);
@@ -2207,70 +2172,24 @@ atomic_fetch_add(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_typ
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
+_Tp
atomic_fetch_add(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
{
return __o->fetch_add(__op);
}
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
-{
- return __o->fetch_add(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
-{
- return __o->fetch_add(__op);
-}
-
// atomic_fetch_add_explicit
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
-atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
- return __o->fetch_add(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
-atomic_fetch_add_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
- return __o->fetch_add(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
+_Tp atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
{
return __o->fetch_add(__op, __m);
}
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_add_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
+_Tp atomic_fetch_add_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
{
return __o->fetch_add(__op, __m);
}
@@ -2279,40 +2198,14 @@ atomic_fetch_add_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_t
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
-atomic_fetch_sub(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
-{
- return __o->fetch_sub(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
-atomic_fetch_sub(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
-{
- return __o->fetch_sub(__op);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
+_Tp atomic_fetch_sub(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
{
return __o->fetch_sub(__op);
}
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op) _NOEXCEPT
+_Tp atomic_fetch_sub(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op) _NOEXCEPT
{
return __o->fetch_sub(__op);
}
@@ -2321,40 +2214,14 @@ atomic_fetch_sub(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op)
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
-atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
+_Tp atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
{
return __o->fetch_sub(__op, __m);
}
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY
-typename enable_if
-<
- is_integral<_Tp>::value && !is_same<_Tp, bool>::value && !is_const<_Tp>::value,
- _Tp
->::type
-atomic_fetch_sub_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
-{
- return __o->fetch_sub(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
-{
- return __o->fetch_sub(__op, __m);
-}
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-_Tp*
-atomic_fetch_sub_explicit(atomic<_Tp*>* __o, typename atomic<_Tp*>::difference_type __op, memory_order __m) _NOEXCEPT
+_Tp atomic_fetch_sub_explicit(atomic<_Tp>* __o, typename atomic<_Tp>::difference_type __op, memory_order __m) _NOEXCEPT
{
return __o->fetch_sub(__op, __m);
}
@@ -2764,7 +2631,9 @@ typedef atomic<char8_t> atomic_char8_t;
#endif
typedef atomic<char16_t> atomic_char16_t;
typedef atomic<char32_t> atomic_char32_t;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef atomic<wchar_t> atomic_wchar_t;
+#endif
typedef atomic<int_least8_t> atomic_int_least8_t;
typedef atomic<uint_least8_t> atomic_uint_least8_t;
diff --git a/libcxx/include/barrier b/libcxx/include/barrier
index e26dba65329a..aef88556a011 100644
--- a/libcxx/include/barrier
+++ b/libcxx/include/barrier
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- barrier ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/bit b/libcxx/include/bit
index c64e45c5fa30..634475b99879 100644
--- a/libcxx/include/bit
+++ b/libcxx/include/bit
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------ bit ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,6 +14,9 @@
bit synopsis
namespace std {
+ // [bit.cast], bit_cast
+ template<class To, class From>
+ constexpr To bit_cast(const From& from) noexcept; // C++20
// [bit.pow.two], integral powers of 2
template <class T>
@@ -54,8 +57,9 @@ namespace std {
*/
-#include <__config>
+#include <__bit/bit_cast.h>
#include <__bits> // __libcpp_clz
+#include <__config>
#include <__debug>
#include <limits>
#include <type_traits>
@@ -222,7 +226,7 @@ bool __has_single_bit(_Tp __t) _NOEXCEPT
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
rotl(_Tp __t, unsigned int __cnt) noexcept
{
return __rotl(__t, __cnt);
@@ -230,7 +234,7 @@ rotl(_Tp __t, unsigned int __cnt) noexcept
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
rotr(_Tp __t, unsigned int __cnt) noexcept
{
return __rotr(__t, __cnt);
@@ -238,7 +242,7 @@ rotr(_Tp __t, unsigned int __cnt) noexcept
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, int>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, int>
countl_zero(_Tp __t) noexcept
{
return __countl_zero(__t);
@@ -246,7 +250,7 @@ countl_zero(_Tp __t) noexcept
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, int>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, int>
countl_one(_Tp __t) noexcept
{
return __countl_one(__t);
@@ -254,7 +258,7 @@ countl_one(_Tp __t) noexcept
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, int>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, int>
countr_zero(_Tp __t) noexcept
{
return __countr_zero(__t);
@@ -262,7 +266,7 @@ countr_zero(_Tp __t) noexcept
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, int>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, int>
countr_one(_Tp __t) noexcept
{
return __countr_one(__t);
@@ -270,7 +274,7 @@ countr_one(_Tp __t) noexcept
template<class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, int>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, int>
popcount(_Tp __t) noexcept
{
return __popcount(__t);
@@ -278,7 +282,7 @@ popcount(_Tp __t) noexcept
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, bool>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, bool>
has_single_bit(_Tp __t) noexcept
{
return __has_single_bit(__t);
@@ -286,7 +290,7 @@ has_single_bit(_Tp __t) noexcept
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
bit_floor(_Tp __t) noexcept
{
return __t == 0 ? 0 : _Tp{1} << __bit_log2(__t);
@@ -294,7 +298,7 @@ bit_floor(_Tp __t) noexcept
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
bit_ceil(_Tp __t) noexcept
{
if (__t < 2) return 1;
@@ -313,7 +317,7 @@ bit_ceil(_Tp __t) noexcept
template <class _Tp>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
+enable_if_t<__libcpp_is_unsigned_integer<_Tp>::value, _Tp>
bit_width(_Tp __t) noexcept
{
return __t == 0 ? 0 : __bit_log2(__t) + 1;
diff --git a/libcxx/include/bitset b/libcxx/include/bitset
index 4b8827e774a3..8f538e92e7ff 100644
--- a/libcxx/include/bitset
+++ b/libcxx/include/bitset
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- bitset ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -679,7 +679,7 @@ public:
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR bitset() _NOEXCEPT {}
_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
bitset(unsigned long long __v) _NOEXCEPT : base(__v) {}
- template<class _CharT, class = _EnableIf<_IsCharLikeType<_CharT>::value> >
+ template<class _CharT, class = __enable_if_t<_IsCharLikeType<_CharT>::value> >
explicit bitset(const _CharT* __str,
typename basic_string<_CharT>::size_type __n = basic_string<_CharT>::npos,
_CharT __zero = _CharT('0'), _CharT __one = _CharT('1'));
diff --git a/libcxx/include/cassert b/libcxx/include/cassert
index 25a0a746b8d1..ebd5a56bb046 100644
--- a/libcxx/include/cassert
+++ b/libcxx/include/cassert
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- cassert -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex
index cea450912841..3402fc9a3915 100644
--- a/libcxx/include/ccomplex
+++ b/libcxx/include/ccomplex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- ccomplex ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cctype b/libcxx/include/cctype
index b078056862a7..4235cd6acc8c 100644
--- a/libcxx/include/cctype
+++ b/libcxx/include/cctype
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- cctype ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cerrno b/libcxx/include/cerrno
index 1388d7eac226..486588e31fc4 100644
--- a/libcxx/include/cerrno
+++ b/libcxx/include/cerrno
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- cerrno ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cfenv b/libcxx/include/cfenv
index 05b55ee364f2..94ab79377c85 100644
--- a/libcxx/include/cfenv
+++ b/libcxx/include/cfenv
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- cfenv -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cfloat b/libcxx/include/cfloat
index 77ff5261e3a0..76374083407a 100644
--- a/libcxx/include/cfloat
+++ b/libcxx/include/cfloat
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cfloat -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/charconv b/libcxx/include/charconv
index de34112d8eb3..3c969dc79ab0 100644
--- a/libcxx/include/charconv
+++ b/libcxx/include/charconv
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------ charconv ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,10 +27,13 @@ namespace std {
struct to_chars_result {
char* ptr;
errc ec;
+ friend bool operator==(const to_chars_result&, const to_chars_result&) = default; // since C++20
};
to_chars_result to_chars(char* first, char* last, see below value,
int base = 10);
+ to_chars_result to_chars(char* first, char* last, bool value,
+ int base = 10) = delete;
to_chars_result to_chars(char* first, char* last, float value);
to_chars_result to_chars(char* first, char* last, double value);
@@ -54,6 +57,7 @@ namespace std {
struct from_chars_result {
const char* ptr;
errc ec;
+ friend bool operator==(const from_chars_result&, const from_chars_result&) = default; // since C++20
};
from_chars_result from_chars(const char* first, const char* last,
@@ -74,9 +78,12 @@ namespace std {
*/
#include <__availability>
+#include <__bits>
+#include <__charconv/chars_format.h>
+#include <__charconv/from_chars_result.h>
+#include <__charconv/to_chars_result.h>
#include <__config>
#include <__errc>
-#include <__utility/to_underlying.h>
#include <cmath> // for log2f
#include <cstdint>
#include <cstdlib> // for _LIBCPP_UNREACHABLE
@@ -102,69 +109,8 @@ _LIBCPP_AVAILABILITY_TO_CHARS _LIBCPP_FUNC_VIS char* __u32toa(uint32_t __value,
#ifndef _LIBCPP_CXX03_LANG
-enum class _LIBCPP_ENUM_VIS chars_format
-{
- scientific = 0x1,
- fixed = 0x2,
- hex = 0x4,
- general = fixed | scientific
-};
-
-inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
-operator~(chars_format __x) {
- return chars_format(~_VSTD::__to_underlying(__x));
-}
-
-inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
-operator&(chars_format __x, chars_format __y) {
- return chars_format(_VSTD::__to_underlying(__x) &
- _VSTD::__to_underlying(__y));
-}
-
-inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
-operator|(chars_format __x, chars_format __y) {
- return chars_format(_VSTD::__to_underlying(__x) |
- _VSTD::__to_underlying(__y));
-}
-
-inline _LIBCPP_INLINE_VISIBILITY constexpr chars_format
-operator^(chars_format __x, chars_format __y) {
- return chars_format(_VSTD::__to_underlying(__x) ^
- _VSTD::__to_underlying(__y));
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 chars_format&
-operator&=(chars_format& __x, chars_format __y) {
- __x = __x & __y;
- return __x;
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 chars_format&
-operator|=(chars_format& __x, chars_format __y) {
- __x = __x | __y;
- return __x;
-}
-
-inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 chars_format&
-operator^=(chars_format& __x, chars_format __y) {
- __x = __x ^ __y;
- return __x;
-}
-
-struct _LIBCPP_TYPE_VIS to_chars_result
-{
- char* ptr;
- errc ec;
-};
-
-struct _LIBCPP_TYPE_VIS from_chars_result
-{
- const char* ptr;
- errc ec;
-};
-
-void to_chars(char*, char*, bool, int = 10) = delete;
-void from_chars(const char*, const char*, bool, int = 10) = delete;
+to_chars_result to_chars(char*, char*, bool, int = 10) = delete;
+from_chars_result from_chars(const char*, const char*, bool, int = 10) = delete;
namespace __itoa
{
@@ -204,13 +150,11 @@ struct _LIBCPP_HIDDEN __traits_base
{
using type = uint64_t;
-#if !defined(_LIBCPP_COMPILER_MSVC)
static _LIBCPP_INLINE_VISIBILITY int __width(_Tp __v)
{
- auto __t = (64 - __builtin_clzll(__v | 1)) * 1233 >> 12;
+ auto __t = (64 - _VSTD::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __pow10_64[__t]) + 1;
}
-#endif
_LIBCPP_AVAILABILITY_TO_CHARS
static _LIBCPP_INLINE_VISIBILITY char* __convert(_Tp __v, char* __p)
@@ -227,13 +171,11 @@ struct _LIBCPP_HIDDEN
{
using type = uint32_t;
-#if !defined(_LIBCPP_COMPILER_MSVC)
static _LIBCPP_INLINE_VISIBILITY int __width(_Tp __v)
{
- auto __t = (32 - __builtin_clz(__v | 1)) * 1233 >> 12;
+ auto __t = (32 - _VSTD::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __pow10_32[__t]) + 1;
}
-#endif
_LIBCPP_AVAILABILITY_TO_CHARS
static _LIBCPP_INLINE_VISIBILITY char* __convert(_Tp __v, char* __p)
@@ -354,28 +296,10 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type)
using __tx = __itoa::__traits<_Tp>;
auto __diff = __last - __first;
-#if !defined(_LIBCPP_COMPILER_MSVC)
if (__tx::digits <= __diff || __tx::__width(__value) <= __diff)
return {__tx::__convert(__value, __first), errc(0)};
else
return {__last, errc::value_too_large};
-#else
- if (__tx::digits <= __diff)
- return {__tx::__convert(__value, __first), {}};
- else
- {
- char __buf[__tx::digits];
- auto __p = __tx::__convert(__value, __buf);
- auto __len = __p - __buf;
- if (__len <= __diff)
- {
- _VSTD::memcpy(__first, __buf, __len);
- return {__first + __len, {}};
- }
- else
- return {__last, errc::value_too_large};
- }
-#endif
}
template <typename _Tp>
diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 6bdb92912ad0..d29734f6c358 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- chrono ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -148,11 +148,6 @@ template <class Clock, class Duration1, class Duration2>
namespace chrono {
-
-template<class T> struct is_clock; // C++20
-template<class T> inline constexpr bool is_clock_v = is_clock<T>::value; // C++20
-
-
// duration arithmetic
template <class Rep1, class Period1, class Rep2, class Period2>
constexpr
@@ -281,19 +276,23 @@ template <class Duration>
using sys_seconds = sys_time<seconds>; // C++20
using sys_days = sys_time<days>; // C++20
-class utc_clock; // C++20
-
-template <class Duration>
- using utc_time = time_point<utc_clock, Duration>; // C++20
-using utc_seconds = utc_time<seconds>; // C++20
+class file_clock // C++20
+{
+public:
+ typedef see-below rep;
+ typedef nano period;
+ typedef chrono::duration<rep, period> duration;
+ typedef chrono::time_point<file_clock> time_point;
+ static constexpr bool is_steady = false;
-class tai_clock; // C++20
+ static time_point now() noexcept;
-template <class Duration>
- using tai_time = time_point<tai_clock, Duration>; // C++20
-using tai_seconds = tai_time<seconds>; // C++20
+ template<class Duration>
+ static sys_time<see-below> to_sys(const file_time<Duration>&);
-class file_clock; // C++20
+ template<class Duration>
+ static file_time<see-below> from_sys(const sys_time<Duration>&);
+};
template<class Duration>
using file_time = time_point<file_clock, Duration>; // C++20
@@ -319,12 +318,6 @@ template<class Duration>
using local_seconds = local_time<seconds>;
using local_days = local_time<days>;
-// 25.7.9, time_point conversions template<class DestClock, class SourceClock> // C++20
-struct clock_time_conversion;
-
-template<class DestClock, class SourceClock, class Duration>
- auto clock_cast(const time_point<SourceClock, Duration>& t);
-
// 25.8.2, class last_spec // C++20
struct last_spec;
@@ -639,39 +632,12 @@ public:
constexpr precision to_duration() const noexcept;
};
-template <class charT, class traits, class Duration>
- basic_ostream<charT, traits>&
- operator<<(basic_ostream<charT, traits>& os, hh_mm_ss<Duration> const& hms);
-
// 26.10, 12/24 hour functions
constexpr bool is_am(hours const& h) noexcept;
constexpr bool is_pm(hours const& h) noexcept;
constexpr hours make12(const hours& h) noexcept;
constexpr hours make24(const hours& h, bool is_pm) noexcept;
-
-// 25.10.2, time zone database // C++20
-struct tzdb;
-class tzdb_list;
-
-// 25.10.2.3, time zone database access // C++20
-const tzdb& get_tzdb();
-tzdb_list& get_tzdb_list();
-const time_zone* locate_zone(string_view tz_name);
-const time_zone* current_zone();
-
-// 25.10.2.4, remote time zone database support // C++20
-const tzdb& reload_tzdb();
-string remote_version();
-
-// 25.10.3, exception classes // C++20
-class nonexistent_local_time;
-class ambiguous_local_time;
-
-// 25.10.4, information classes // C++20
-struct sys_info;
-struct local_info;
-
// 25.10.5, class time_zone // C++20
enum class choose {earliest, latest};
class time_zone;
@@ -682,101 +648,6 @@ bool operator>(const time_zone& x, const time_zone& y) noexcept;
bool operator<=(const time_zone& x, const time_zone& y) noexcept;
bool operator>=(const time_zone& x, const time_zone& y) noexcept;
-// 25.10.6, class template zoned_traits // C++20
-template<class T> struct zoned_traits;
-
-// 25.10.7, class template zoned_time // C++20
-template<class Duration, class TimeZonePtr = const time_zone*> class zoned_time;
-using zoned_seconds = zoned_time<seconds>;
-
-template<class Duration1, class Duration2, class TimeZonePtr>
- bool operator==(const zoned_time<Duration1, TimeZonePtr>& x,
- const zoned_time<Duration2, TimeZonePtr>& y);
-template<class Duration1, class Duration2, class TimeZonePtr>
- bool operator!=(const zoned_time<Duration1, TimeZonePtr>& x,
- const zoned_time<Duration2, TimeZonePtr>& y);
-
-// 25.10.8, leap second support // C++20
-class leap;
-
-bool operator==(const leap& x, const leap& y);
-bool operator!=(const leap& x, const leap& y);
-bool operator< (const leap& x, const leap& y);
-bool operator> (const leap& x, const leap& y);
-bool operator<=(const leap& x, const leap& y);
-bool operator>=(const leap& x, const leap& y);
-template<class Duration>
- bool operator==(const leap& x, const sys_time<Duration>& y);
-template<class Duration>
- bool operator==(const sys_time<Duration>& x, const leap& y);
-template<class Duration>
- bool operator!=(const leap& x, const sys_time<Duration>& y);
-template<class Duration>
- bool operator!=(const sys_time<Duration>& x, const leap& y);
-template<class Duration>
- bool operator< (const leap& x, const sys_time<Duration>& y);
-template<class Duration>
- bool operator< (const sys_time<Duration>& x, const leap& y);
-template<class Duration>
- bool operator> (const leap& x, const sys_time<Duration>& y);
-template<class Duration>
- bool operator> (const sys_time<Duration>& x, const leap& y);
-template<class Duration>
- bool operator<=(const leap& x, const sys_time<Duration>& y);
-template<class Duration>
- bool operator<=(const sys_time<Duration>& x, const leap& y);
-template<class Duration>
- bool operator>=(const leap& x, const sys_time<Duration>& y);
-template<class Duration>
- bool operator>=(const sys_time<Duration>& x, const leap& y);
-
-// 25.10.9, class link // C++20
-class link;
-bool operator==(const link& x, const link& y);
-bool operator!=(const link& x, const link& y);
-bool operator< (const link& x, const link& y);
-bool operator> (const link& x, const link& y);
-bool operator<=(const link& x, const link& y);
-bool operator>=(const link& x, const link& y);
-
-// 25.11, formatting // C++20
-template<class charT, class Streamable>
- basic_string<charT>
- format(const charT* fmt, const Streamable& s);
-
-template<class charT, class Streamable>
- basic_string<charT>
- format(const locale& loc, const charT* fmt, const Streamable& s);
-
-template<class charT, class traits, class Alloc, class Streamable>
- basic_string<charT, traits, Alloc>
- format(const basic_string<charT, traits, Alloc>& fmt, const Streamable& s);
-
-template<class charT, class traits, class Alloc, class Streamable>
- basic_string<charT, traits, Alloc>
- format(const locale& loc, const basic_string<charT, traits, Alloc>& fmt,
- const Streamable& s);
-
-// 25.12, parsing // C++20
-template<class charT, class traits, class Alloc, class Parsable>
-unspecified
- parse(const basic_string<charT, traits, Alloc>& format, Parsable& tp);
-
-template<class charT, class traits, class Alloc, class Parsable>
-unspecified
- parse(const basic_string<charT, traits, Alloc>& format, Parsable& tp,
- basic_string<charT, traits, Alloc>& abbrev);
-
-template<class charT, class traits, class Alloc, class Parsable>
-unspecified
- parse(const basic_string<charT, traits, Alloc>& format, Parsable& tp,
- minutes& offset);
-
-template<class charT, class traits, class Alloc, class Parsable>
-unspecified
- parse(const basic_string<charT, traits, Alloc>& format, Parsable& tp,
- basic_string<charT, traits, Alloc>& abbrev, minutes& offset);
-
// calendrical constants
inline constexpr last_spec last{}; // C++20
inline constexpr chrono::weekday Sunday{0}; // C++20
@@ -950,10 +821,9 @@ duration_cast(const duration<_Rep, _Period>& __fd)
template <class _Rep>
struct _LIBCPP_TEMPLATE_VIS treat_as_floating_point : is_floating_point<_Rep> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Rep>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool treat_as_floating_point_v
- = treat_as_floating_point<_Rep>::value;
+inline constexpr bool treat_as_floating_point_v = treat_as_floating_point<_Rep>::value;
#endif
template <class _Rep>
@@ -1840,6 +1710,7 @@ class weekday_last;
class weekday {
private:
unsigned char __wd;
+ static constexpr unsigned char __weekday_from_days(int __days) noexcept;
public:
weekday() = default;
inline explicit constexpr weekday(unsigned __val) noexcept : __wd(static_cast<unsigned char>(__val == 7 ? 0 : __val)) {}
@@ -1859,9 +1730,6 @@ public:
inline constexpr bool ok() const noexcept { return __wd <= 6; }
constexpr weekday_indexed operator[](unsigned __index) const noexcept;
constexpr weekday_last operator[](last_spec) const noexcept;
-
- // TODO: Make private?
- static constexpr unsigned char __weekday_from_days(int __days) noexcept;
};
@@ -2907,7 +2775,7 @@ inline namespace literals
return chrono::duration<long double, nano> (__ns);
}
-#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CXX20_CHRONO_LITERALS)
+#if _LIBCPP_STD_VER > 17
constexpr chrono::day operator ""d(unsigned long long __d) noexcept
{
return chrono::day(static_cast<unsigned>(__d));
@@ -2947,18 +2815,19 @@ struct _FilesystemClock {
_LIBCPP_AVAILABILITY_FILESYSTEM _LIBCPP_FUNC_VIS static time_point now() noexcept;
- _LIBCPP_INLINE_VISIBILITY
- static time_t to_time_t(const time_point& __t) noexcept {
- typedef chrono::duration<rep> __secs;
- return time_t(
- chrono::duration_cast<__secs>(__t.time_since_epoch()).count());
+#if _LIBCPP_STD_VER > 17
+ template <class _Duration>
+ _LIBCPP_HIDE_FROM_ABI
+ static chrono::sys_time<_Duration> to_sys(const chrono::file_time<_Duration>& __t) {
+ return chrono::sys_time<_Duration>(__t.time_since_epoch());
}
- _LIBCPP_INLINE_VISIBILITY
- static time_point from_time_t(time_t __t) noexcept {
- typedef chrono::duration<rep> __secs;
- return time_point(__secs(__t));
+ template <class _Duration>
+ _LIBCPP_HIDE_FROM_ABI
+ static chrono::file_time<_Duration> from_sys(const chrono::sys_time<_Duration>& __t) {
+ return chrono::file_time<_Duration>(__t.time_since_epoch());
}
+#endif // _LIBCPP_STD_VER > 17
};
_LIBCPP_END_NAMESPACE_FILESYSTEM
#endif // !_LIBCPP_CXX03_LANG
diff --git a/libcxx/include/cinttypes b/libcxx/include/cinttypes
index 7ce4a8ad5d80..07d54a3b626a 100644
--- a/libcxx/include/cinttypes
+++ b/libcxx/include/cinttypes
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cinttypes --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646
index c37f6379746b..b2538d06a8dc 100644
--- a/libcxx/include/ciso646
+++ b/libcxx/include/ciso646
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- ciso646 ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/climits b/libcxx/include/climits
index 217ec6286e8c..2040e1ad02a4 100644
--- a/libcxx/include/climits
+++ b/libcxx/include/climits
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- climits ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/clocale b/libcxx/include/clocale
index 8fe91fc5a3a4..ed8c7d240633 100644
--- a/libcxx/include/clocale
+++ b/libcxx/include/clocale
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- clocale ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -44,9 +44,7 @@ lconv* localeconv();
_LIBCPP_BEGIN_NAMESPACE_STD
using ::lconv _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS
using ::setlocale _LIBCPP_USING_IF_EXISTS;
-#endif
using ::localeconv _LIBCPP_USING_IF_EXISTS;
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/cmath b/libcxx/include/cmath
index adf83c2b0a7b..3a7985f7d454 100644
--- a/libcxx/include/cmath
+++ b/libcxx/include/cmath
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- cmath -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -535,7 +535,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double hypot( long double x, long double y
template <class _A1, class _A2, class _A3>
inline _LIBCPP_INLINE_VISIBILITY
-typename _EnableIf
+typename enable_if_t
<
is_arithmetic<_A1>::value &&
is_arithmetic<_A2>::value &&
diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt
index ec7d4a780958..60d3db882c03 100644
--- a/libcxx/include/codecvt
+++ b/libcxx/include/codecvt
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- codecvt -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -74,6 +74,7 @@ enum codecvt_mode
template <class _Elem> class __codecvt_utf8;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS __codecvt_utf8<wchar_t>
: public codecvt<wchar_t, char, mbstate_t>
@@ -108,6 +109,7 @@ protected:
size_t __mx) const;
virtual int do_max_length() const _NOEXCEPT;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
template <>
@@ -203,6 +205,7 @@ public:
template <class _Elem, bool _LittleEndian> class __codecvt_utf16;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS __codecvt_utf16<wchar_t, false>
: public codecvt<wchar_t, char, mbstate_t>
@@ -272,6 +275,7 @@ protected:
size_t __mx) const;
virtual int do_max_length() const _NOEXCEPT;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
template <>
@@ -443,6 +447,7 @@ public:
template <class _Elem> class __codecvt_utf8_utf16;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
class _LIBCPP_TYPE_VIS __codecvt_utf8_utf16<wchar_t>
: public codecvt<wchar_t, char, mbstate_t>
@@ -477,6 +482,7 @@ protected:
size_t __mx) const;
virtual int do_max_length() const _NOEXCEPT;
};
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
template <>
diff --git a/libcxx/include/compare b/libcxx/include/compare
index e924bffed165..8a2a82907062 100644
--- a/libcxx/include/compare
+++ b/libcxx/include/compare
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- compare -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -20,8 +20,8 @@ namespace std {
class strong_ordering;
// named comparison functions
- constexpr bool is_eq (partial_ordering cmp) noexcept { return cmp == 0; }
- constexpr bool is_neq (partial_ordering cmp) noexcept { return cmp != 0; }
+ constexpr bool is_eq (partial_ordering cmp) noexcept { return cmp == 0; }
+ constexpr bool is_neq (partial_ordering cmp) noexcept { return cmp != 0; }
constexpr bool is_lt (partial_ordering cmp) noexcept { return cmp < 0; }
constexpr bool is_lteq(partial_ordering cmp) noexcept { return cmp <= 0; }
constexpr bool is_gt (partial_ordering cmp) noexcept { return cmp > 0; }
@@ -35,6 +35,21 @@ namespace std {
template<class... Ts>
using common_comparison_category_t = typename common_comparison_category<Ts...>::type;
+ // [cmp.concept], concept three_way_comparable
+ template<class T, class Cat = partial_ordering>
+ concept three_way_comparable = see below;
+ template<class T, class U, class Cat = partial_ordering>
+ concept three_way_comparable_with = see below;
+
+ // [cmp.result], result of three-way comparison
+ template<class T, class U = T> struct compare_three_way_result;
+
+ template<class T, class U = T>
+ using compare_three_way_result_t = typename compare_three_way_result<T, U>::type;
+
+ // [comparisons.three.way], class compare_three_way
+ struct compare_three_way; // C++20
+
// [cmp.alg], comparison algorithms
template<class T> constexpr strong_ordering strong_order(const T& a, const T& b);
template<class T> constexpr weak_ordering weak_order(const T& a, const T& b);
@@ -120,8 +135,13 @@ namespace std {
}
*/
+#include <__compare/common_comparison_category.h>
+#include <__compare/compare_three_way.h>
+#include <__compare/compare_three_way_result.h>
+#include <__compare/is_eq.h>
+#include <__compare/ordering.h>
+#include <__compare/three_way_comparable.h>
#include <__config>
-#include <type_traits>
#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
#pragma GCC system_header
@@ -129,352 +149,7 @@ namespace std {
_LIBCPP_BEGIN_NAMESPACE_STD
-#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_SPACESHIP_OPERATOR)
-// exposition only
-enum class _LIBCPP_ENUM_VIS _EqResult : unsigned char {
- __zero = 0,
- __equal = __zero,
- __equiv = __equal,
- __nonequal = 1,
- __nonequiv = __nonequal
-};
-
-enum class _LIBCPP_ENUM_VIS _OrdResult : signed char {
- __less = -1,
- __greater = 1
-};
-
-enum class _LIBCPP_ENUM_VIS _NCmpResult : signed char {
- __unordered = -127
-};
-
-class partial_ordering;
-class weak_ordering;
-class strong_ordering;
-
-template<class _Tp, class... _Args>
-inline constexpr bool __one_of_v = (is_same_v<_Tp, _Args> || ...);
-
-struct _CmpUnspecifiedParam {
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEVAL
- _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) noexcept {}
-
- template<class _Tp, class = enable_if_t<!__one_of_v<_Tp, int, partial_ordering, weak_ordering, strong_ordering>>>
- _CmpUnspecifiedParam(_Tp) = delete;
-};
-
-class partial_ordering {
- using _ValueT = signed char;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr partial_ordering(_EqResult __v) noexcept
- : __value_(_ValueT(__v)) {}
-
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr partial_ordering(_OrdResult __v) noexcept
- : __value_(_ValueT(__v)) {}
-
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr partial_ordering(_NCmpResult __v) noexcept
- : __value_(_ValueT(__v)) {}
-
- constexpr bool __is_ordered() const noexcept {
- return __value_ != _ValueT(_NCmpResult::__unordered);
- }
-public:
- // valid values
- static const partial_ordering less;
- static const partial_ordering equivalent;
- static const partial_ordering greater;
- static const partial_ordering unordered;
-
- // comparisons
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(partial_ordering, partial_ordering) noexcept = default;
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__is_ordered() && __v.__value_ == 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (partial_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__is_ordered() && __v.__value_ < 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__is_ordered() && __v.__value_ <= 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (partial_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__is_ordered() && __v.__value_ > 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__is_ordered() && __v.__value_ >= 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (_CmpUnspecifiedParam, partial_ordering __v) noexcept {
- return __v.__is_ordered() && 0 < __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
- return __v.__is_ordered() && 0 <= __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (_CmpUnspecifiedParam, partial_ordering __v) noexcept {
- return __v.__is_ordered() && 0 > __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
- return __v.__is_ordered() && 0 >= __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
- return __v < 0 ? partial_ordering::greater : (__v > 0 ? partial_ordering::less : __v);
- }
-private:
- _ValueT __value_;
-};
-
-_LIBCPP_INLINE_VAR constexpr partial_ordering partial_ordering::less(_OrdResult::__less);
-_LIBCPP_INLINE_VAR constexpr partial_ordering partial_ordering::equivalent(_EqResult::__equiv);
-_LIBCPP_INLINE_VAR constexpr partial_ordering partial_ordering::greater(_OrdResult::__greater);
-_LIBCPP_INLINE_VAR constexpr partial_ordering partial_ordering::unordered(_NCmpResult ::__unordered);
-
-class weak_ordering {
- using _ValueT = signed char;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr weak_ordering(_EqResult __v) noexcept : __value_(_ValueT(__v)) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr weak_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {}
-
-public:
- static const weak_ordering less;
- static const weak_ordering equivalent;
- static const weak_ordering greater;
-
- _LIBCPP_INLINE_VISIBILITY
- constexpr operator partial_ordering() const noexcept {
- return __value_ == 0 ? partial_ordering::equivalent
- : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
- }
-
- // comparisons
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(weak_ordering, weak_ordering) noexcept = default;
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ == 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (weak_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ < 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ <= 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (weak_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ > 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ >= 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (_CmpUnspecifiedParam, weak_ordering __v) noexcept {
- return 0 < __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
- return 0 <= __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (_CmpUnspecifiedParam, weak_ordering __v) noexcept {
- return 0 > __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
- return 0 >= __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
- return __v < 0 ? weak_ordering::greater : (__v > 0 ? weak_ordering::less : __v);
- }
-
-private:
- _ValueT __value_;
-};
-
-_LIBCPP_INLINE_VAR constexpr weak_ordering weak_ordering::less(_OrdResult::__less);
-_LIBCPP_INLINE_VAR constexpr weak_ordering weak_ordering::equivalent(_EqResult::__equiv);
-_LIBCPP_INLINE_VAR constexpr weak_ordering weak_ordering::greater(_OrdResult::__greater);
-class strong_ordering {
- using _ValueT = signed char;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr strong_ordering(_EqResult __v) noexcept : __value_(_ValueT(__v)) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit constexpr strong_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {}
-
-public:
- static const strong_ordering less;
- static const strong_ordering equal;
- static const strong_ordering equivalent;
- static const strong_ordering greater;
-
- // conversions
- _LIBCPP_INLINE_VISIBILITY
- constexpr operator partial_ordering() const noexcept {
- return __value_ == 0 ? partial_ordering::equivalent
- : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
- }
-
- _LIBCPP_INLINE_VISIBILITY
- constexpr operator weak_ordering() const noexcept {
- return __value_ == 0 ? weak_ordering::equivalent
- : (__value_ < 0 ? weak_ordering::less : weak_ordering::greater);
- }
-
- // comparisons
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(strong_ordering, strong_ordering) noexcept = default;
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ == 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (strong_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ < 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ <= 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (strong_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ > 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v.__value_ >= 0;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator< (_CmpUnspecifiedParam, strong_ordering __v) noexcept {
- return 0 < __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
- return 0 <= __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator> (_CmpUnspecifiedParam, strong_ordering __v) noexcept {
- return 0 > __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
- return 0 >= __v.__value_;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
- return __v;
- }
-
- _LIBCPP_INLINE_VISIBILITY friend constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
- return __v < 0 ? strong_ordering::greater : (__v > 0 ? strong_ordering::less : __v);
- }
-
-private:
- _ValueT __value_;
-};
-
-_LIBCPP_INLINE_VAR constexpr strong_ordering strong_ordering::less(_OrdResult::__less);
-_LIBCPP_INLINE_VAR constexpr strong_ordering strong_ordering::equal(_EqResult::__equal);
-_LIBCPP_INLINE_VAR constexpr strong_ordering strong_ordering::equivalent(_EqResult::__equiv);
-_LIBCPP_INLINE_VAR constexpr strong_ordering strong_ordering::greater(_OrdResult::__greater);
-
-// named comparison functions
-_LIBCPP_INLINE_VISIBILITY
-constexpr bool is_lt(partial_ordering __cmp) noexcept { return __cmp < 0; }
-
-_LIBCPP_INLINE_VISIBILITY
-constexpr bool is_lteq(partial_ordering __cmp) noexcept { return __cmp <= 0; }
-
-_LIBCPP_INLINE_VISIBILITY
-constexpr bool is_gt(partial_ordering __cmp) noexcept { return __cmp > 0; }
-
-_LIBCPP_INLINE_VISIBILITY
-constexpr bool is_gteq(partial_ordering __cmp) noexcept { return __cmp >= 0; }
-
-namespace __comp_detail {
-
-enum _ClassifyCompCategory : unsigned{
- _None,
- _PartialOrd,
- _WeakOrd,
- _StrongOrd,
- _CCC_Size
-};
-
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY
-constexpr _ClassifyCompCategory __type_to_enum() noexcept {
- if (is_same_v<_Tp, partial_ordering>)
- return _PartialOrd;
- if (is_same_v<_Tp, weak_ordering>)
- return _WeakOrd;
- if (is_same_v<_Tp, strong_ordering>)
- return _StrongOrd;
- return _None;
-}
-
-template <size_t _Size>
-constexpr _ClassifyCompCategory
-__compute_comp_type(const _ClassifyCompCategory (&__types)[_Size]) {
- int __seen[_CCC_Size] = {};
- for (auto __type : __types)
- ++__seen[__type];
- if (__seen[_None])
- return _None;
- if (__seen[_PartialOrd])
- return _PartialOrd;
- if (__seen[_WeakOrd])
- return _WeakOrd;
- return _StrongOrd;
-}
-
-template <class ..._Ts, bool _False = false>
-constexpr auto __get_comp_type() {
- using _CCC = _ClassifyCompCategory;
- constexpr _CCC __type_kinds[] = {_StrongOrd, __type_to_enum<_Ts>()...};
- constexpr _CCC _Cat = __compute_comp_type(__type_kinds);
- if constexpr (_Cat == _None)
- return void();
- else if constexpr (_Cat == _PartialOrd)
- return partial_ordering::equivalent;
- else if constexpr (_Cat == _WeakOrd)
- return weak_ordering::equivalent;
- else if constexpr (_Cat == _StrongOrd)
- return strong_ordering::equivalent;
- else
- static_assert(_False, "unhandled case");
-}
-} // namespace __comp_detail
-
-// [cmp.common], common comparison category type
-template<class... _Ts>
-struct _LIBCPP_TEMPLATE_VIS common_comparison_category {
- using type = decltype(__comp_detail::__get_comp_type<_Ts...>());
-};
-
-template<class... _Ts>
-using common_comparison_category_t = typename common_comparison_category<_Ts...>::type;
+#if _LIBCPP_STD_VER > 17
// [cmp.alg], comparison algorithms
// TODO: unimplemented
@@ -482,7 +157,7 @@ template<class _Tp> constexpr strong_ordering strong_order(const _Tp& __lhs, con
template<class _Tp> constexpr weak_ordering weak_order(const _Tp& __lhs, const _Tp& __rhs);
template<class _Tp> constexpr partial_ordering partial_order(const _Tp& __lhs, const _Tp& __rhs);
-#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_SPACESHIP_OPERATOR)
+#endif // _LIBCPP_STD_VER > 17
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/complex b/libcxx/include/complex
index fc52697f7749..6b74435c6101 100644
--- a/libcxx/include/complex
+++ b/libcxx/include/complex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- complex ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -1056,6 +1056,9 @@ complex<_Tp>
exp(const complex<_Tp>& __x)
{
_Tp __i = __x.imag();
+ if (__i == 0) {
+ return complex<_Tp>(exp(__x.real()), copysign(_Tp(0), __x.imag()));
+ }
if (__libcpp_isinf_or_builtin(__x.real()))
{
if (__x.real() < _Tp(0))
@@ -1070,8 +1073,6 @@ exp(const complex<_Tp>& __x)
return complex<_Tp>(__x.real(), __i);
}
}
- else if (__libcpp_isnan_or_builtin(__x.real()) && __x.imag() == 0)
- return __x;
_Tp __e = exp(__x.real());
return complex<_Tp>(__e * cos(__i), __e * sin(__i));
}
@@ -1269,8 +1270,8 @@ tanh(const complex<_Tp>& __x)
if (__libcpp_isinf_or_builtin(__x.real()))
{
if (!__libcpp_isfinite_or_builtin(__x.imag()))
- return complex<_Tp>(_Tp(1), _Tp(0));
- return complex<_Tp>(_Tp(1), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
+ return complex<_Tp>(copysign(_Tp(1), __x.real()), _Tp(0));
+ return complex<_Tp>(copysign(_Tp(1), __x.real()), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
}
if (__libcpp_isnan_or_builtin(__x.real()) && __x.imag() == 0)
return __x;
diff --git a/libcxx/include/complex.h b/libcxx/include/complex.h
index cdf97ac4671e..d44a5dd15796 100644
--- a/libcxx/include/complex.h
+++ b/libcxx/include/complex.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- complex.h --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/concepts b/libcxx/include/concepts
index 3dec9b527901..bfa27ddca0d4 100644
--- a/libcxx/include/concepts
+++ b/libcxx/include/concepts
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- concepts ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -129,327 +129,33 @@ namespace std {
*/
+#include <__concepts/arithmetic.h>
+#include <__concepts/assignable.h>
+#include <__concepts/boolean_testable.h>
+#include <__concepts/class_or_enum.h>
+#include <__concepts/common_reference_with.h>
+#include <__concepts/common_with.h>
+#include <__concepts/constructible.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/copyable.h>
+#include <__concepts/derived_from.h>
+#include <__concepts/destructible.h>
+#include <__concepts/different_from.h>
+#include <__concepts/equality_comparable.h>
+#include <__concepts/invocable.h>
+#include <__concepts/movable.h>
+#include <__concepts/predicate.h>
+#include <__concepts/regular.h>
+#include <__concepts/relation.h>
+#include <__concepts/same_as.h>
+#include <__concepts/semiregular.h>
+#include <__concepts/swappable.h>
+#include <__concepts/totally_ordered.h>
#include <__config>
-#include <__functional/invoke.h>
-#include <__functional_base>
-#include <type_traits>
-#include <utility>
#include <version>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
-
-// [concept.same]
-
-template<class _Tp, class _Up>
-concept __same_as_impl = _VSTD::_IsSame<_Tp, _Up>::value;
-
-template<class _Tp, class _Up>
-concept same_as = __same_as_impl<_Tp, _Up> && __same_as_impl<_Up, _Tp>;
-
-// [concept.derived]
-template<class _Dp, class _Bp>
-concept derived_from =
- is_base_of_v<_Bp, _Dp> &&
- is_convertible_v<const volatile _Dp*, const volatile _Bp*>;
-
-// [concept.convertible]
-template<class _From, class _To>
-concept convertible_to =
- is_convertible_v<_From, _To> &&
- requires(add_rvalue_reference_t<_From> (&__f)()) {
- static_cast<_To>(__f());
- };
-
-// [concept.commonref]
-template<class _Tp, class _Up>
-concept common_reference_with =
- same_as<common_reference_t<_Tp, _Up>, common_reference_t<_Up, _Tp>> &&
- convertible_to<_Tp, common_reference_t<_Tp, _Up>> &&
- convertible_to<_Up, common_reference_t<_Tp, _Up>>;
-
-// [concept.common]
-template<class _Tp, class _Up>
-concept common_with =
- same_as<common_type_t<_Tp, _Up>, common_type_t<_Up, _Tp>> &&
- requires {
- static_cast<common_type_t<_Tp, _Up>>(declval<_Tp>());
- static_cast<common_type_t<_Tp, _Up>>(declval<_Up>());
- } &&
- common_reference_with<
- add_lvalue_reference_t<const _Tp>,
- add_lvalue_reference_t<const _Up>> &&
- common_reference_with<
- add_lvalue_reference_t<common_type_t<_Tp, _Up>>,
- common_reference_t<
- add_lvalue_reference_t<const _Tp>,
- add_lvalue_reference_t<const _Up>>>;
-
-// [concepts.arithmetic], arithmetic concepts
-template<class _Tp>
-concept integral = is_integral_v<_Tp>;
-
-template<class _Tp>
-concept signed_integral = integral<_Tp> && is_signed_v<_Tp>;
-
-template<class _Tp>
-concept unsigned_integral = integral<_Tp> && !signed_integral<_Tp>;
-
-template<class _Tp>
-concept floating_point = is_floating_point_v<_Tp>;
-
-// [concept.assignable]
-template<class _Lhs, class _Rhs>
-concept assignable_from =
- is_lvalue_reference_v<_Lhs> &&
- common_reference_with<__make_const_lvalue_ref<_Lhs>, __make_const_lvalue_ref<_Rhs>> &&
- requires (_Lhs __lhs, _Rhs&& __rhs) {
- { __lhs = _VSTD::forward<_Rhs>(__rhs) } -> same_as<_Lhs>;
- };
-
-// [concept.destructible]
-
-template<class _Tp>
-concept destructible = is_nothrow_destructible_v<_Tp>;
-
-// [concept.constructible]
-template<class _Tp, class... _Args>
-concept constructible_from =
- destructible<_Tp> && is_constructible_v<_Tp, _Args...>;
-
-// [concept.default.init]
-
-template<class _Tp>
-concept __default_initializable = requires { ::new _Tp; };
-
-template<class _Tp>
-concept default_initializable = constructible_from<_Tp> &&
- requires { _Tp{}; } && __default_initializable<_Tp>;
-
-// [concept.moveconstructible]
-template<class _Tp>
-concept move_constructible =
- constructible_from<_Tp, _Tp> && convertible_to<_Tp, _Tp>;
-
-// [concept.copyconstructible]
-template<class _Tp>
-concept copy_constructible =
- move_constructible<_Tp> &&
- constructible_from<_Tp, _Tp&> && convertible_to<_Tp&, _Tp> &&
- constructible_from<_Tp, const _Tp&> && convertible_to<const _Tp&, _Tp> &&
- constructible_from<_Tp, const _Tp> && convertible_to<const _Tp, _Tp>;
-
-// Whether a type is a class type or enumeration type according to the Core wording.
-template<class _Tp>
-concept __class_or_enum = is_class_v<_Tp> || is_union_v<_Tp> || is_enum_v<_Tp>;
-
-// [concept.swappable]
-namespace ranges::__swap {
- // Deleted to inhibit ADL
- template<class _Tp>
- void swap(_Tp&, _Tp&) = delete;
-
-
- // [1]
- template<class _Tp, class _Up>
- concept __unqualified_swappable_with =
- (__class_or_enum<remove_cvref_t<_Tp>> || __class_or_enum<remove_cvref_t<_Up>>) &&
- requires(_Tp&& __t, _Up&& __u) {
- swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u));
- };
-
- struct __fn;
-
- template<class _Tp, class _Up, size_t _Size>
- concept __swappable_arrays =
- !__unqualified_swappable_with<_Tp(&)[_Size], _Up(&)[_Size]> &&
- extent_v<_Tp> == extent_v<_Up> &&
- requires(_Tp(& __t)[_Size], _Up(& __u)[_Size], const __fn& __swap) {
- __swap(__t[0], __u[0]);
- };
-
- template<class _Tp>
- concept __exchangeable =
- !__unqualified_swappable_with<_Tp&, _Tp&> &&
- move_constructible<_Tp> &&
- assignable_from<_Tp&, _Tp>;
-
- struct __fn {
- // 2.1 `S` is `(void)swap(E1, E2)`* if `E1` or `E2` has class or enumeration type and...
- // *The name `swap` is used here unqualified.
- template<class _Tp, class _Up>
- requires __unqualified_swappable_with<_Tp, _Up>
- constexpr void operator()(_Tp&& __t, _Up&& __u) const
- noexcept(noexcept(swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
- {
- swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u));
- }
-
- // 2.2 Otherwise, if `E1` and `E2` are lvalues of array types with equal extent and...
- template<class _Tp, class _Up, size_t _Size>
- requires __swappable_arrays<_Tp, _Up, _Size>
- constexpr void operator()(_Tp(& __t)[_Size], _Up(& __u)[_Size]) const
- noexcept(noexcept((*this)(*__t, *__u)))
- {
- // TODO(cjdb): replace with `ranges::swap_ranges`.
- for (size_t __i = 0; __i < _Size; ++__i) {
- (*this)(__t[__i], __u[__i]);
- }
- }
-
- // 2.3 Otherwise, if `E1` and `E2` are lvalues of the same type `T` that models...
- template<__exchangeable _Tp>
- constexpr void operator()(_Tp& __x, _Tp& __y) const
- noexcept(is_nothrow_move_constructible_v<_Tp> && is_nothrow_move_assignable_v<_Tp>)
- {
- __y = _VSTD::exchange(__x, _VSTD::move(__y));
- }
- };
-} // namespace ranges::__swap
-
-namespace ranges::inline __cpo {
- inline constexpr auto swap = __swap::__fn{};
-} // namespace ranges::__cpo
-
-template<class _Tp>
-concept swappable = requires(_Tp& __a, _Tp& __b) { ranges::swap(__a, __b); };
-
-template<class _Tp, class _Up>
-concept swappable_with =
- common_reference_with<_Tp, _Up> &&
- requires(_Tp&& __t, _Up&& __u) {
- ranges::swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Tp>(__t));
- ranges::swap(_VSTD::forward<_Up>(__u), _VSTD::forward<_Up>(__u));
- ranges::swap(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u));
- ranges::swap(_VSTD::forward<_Up>(__u), _VSTD::forward<_Tp>(__t));
- };
-
-// [concept.booleantestable]
-template<class _Tp>
-concept __boolean_testable_impl = convertible_to<_Tp, bool>;
-
-template<class _Tp>
-concept __boolean_testable = __boolean_testable_impl<_Tp> && requires(_Tp&& __t) {
- { !std::forward<_Tp>(__t) } -> __boolean_testable_impl;
-};
-
-// [concept.equalitycomparable]
-template<class _Tp, class _Up>
-concept __weakly_equality_comparable_with =
- requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) {
- { __t == __u } -> __boolean_testable;
- { __t != __u } -> __boolean_testable;
- { __u == __t } -> __boolean_testable;
- { __u != __t } -> __boolean_testable;
- };
-
-template<class _Tp>
-concept equality_comparable = __weakly_equality_comparable_with<_Tp, _Tp>;
-
-template<class _Tp, class _Up>
-concept equality_comparable_with =
- equality_comparable<_Tp> && equality_comparable<_Up> &&
- common_reference_with<__make_const_lvalue_ref<_Tp>, __make_const_lvalue_ref<_Up>> &&
- equality_comparable<
- common_reference_t<
- __make_const_lvalue_ref<_Tp>,
- __make_const_lvalue_ref<_Up>>> &&
- __weakly_equality_comparable_with<_Tp, _Up>;
-
-// [concept.totallyordered]
-
-template<class _Tp, class _Up>
-concept __partially_ordered_with =
- requires(__make_const_lvalue_ref<_Tp> __t, __make_const_lvalue_ref<_Up> __u) {
- { __t < __u } -> __boolean_testable;
- { __t > __u } -> __boolean_testable;
- { __t <= __u } -> __boolean_testable;
- { __t >= __u } -> __boolean_testable;
- { __u < __t } -> __boolean_testable;
- { __u > __t } -> __boolean_testable;
- { __u <= __t } -> __boolean_testable;
- { __u >= __t } -> __boolean_testable;
- };
-
-template<class _Tp>
-concept totally_ordered = equality_comparable<_Tp> && __partially_ordered_with<_Tp, _Tp>;
-
-template<class _Tp, class _Up>
-concept totally_ordered_with =
- totally_ordered<_Tp> && totally_ordered<_Up> &&
- equality_comparable_with<_Tp, _Up> &&
- totally_ordered<
- common_reference_t<
- __make_const_lvalue_ref<_Tp>,
- __make_const_lvalue_ref<_Up>>> &&
- __partially_ordered_with<_Tp, _Up>;
-
-// [concepts.object]
-template<class _Tp>
-concept movable =
- is_object_v<_Tp> &&
- move_constructible<_Tp> &&
- assignable_from<_Tp&, _Tp> &&
- swappable<_Tp>;
-
-template<class _Tp>
-concept copyable =
- copy_constructible<_Tp> &&
- movable<_Tp> &&
- assignable_from<_Tp&, _Tp&> &&
- assignable_from<_Tp&, const _Tp&> &&
- assignable_from<_Tp&, const _Tp>;
-
-template<class _Tp>
-concept semiregular = copyable<_Tp> && default_initializable<_Tp>;
-
-template<class _Tp>
-concept regular = semiregular<_Tp> && equality_comparable<_Tp>;
-
-// [concept.invocable]
-template<class _Fn, class... _Args>
-concept invocable = requires(_Fn&& __fn, _Args&&... __args) {
- _VSTD::invoke(_VSTD::forward<_Fn>(__fn), _VSTD::forward<_Args>(__args)...); // not required to be equality preserving
-};
-
-// [concept.regular.invocable]
-template<class _Fn, class... _Args>
-concept regular_invocable = invocable<_Fn, _Args...>;
-
-// [concept.predicate]
-template<class _Fn, class... _Args>
-concept predicate =
- regular_invocable<_Fn, _Args...> && __boolean_testable<invoke_result_t<_Fn, _Args...>>;
-
-// [concept.relation]
-template<class _Rp, class _Tp, class _Up>
-concept relation =
- predicate<_Rp, _Tp, _Tp> && predicate<_Rp, _Up, _Up> &&
- predicate<_Rp, _Tp, _Up> && predicate<_Rp, _Up, _Tp>;
-
-// [concept.equiv]
-template<class _Rp, class _Tp, class _Up>
-concept equivalence_relation = relation<_Rp, _Tp, _Up>;
-
-// [concept.strictweakorder]
-template<class _Rp, class _Tp, class _Up>
-concept strict_weak_order = relation<_Rp, _Tp, _Up>;
-
-template<class _Tp, class _Up>
-concept __different_from = !same_as<remove_cvref_t<_Tp>, remove_cvref_t<_Up>>;
-
-#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
-
-_LIBCPP_END_NAMESPACE_STD
-
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP_CONCEPTS
diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable
index a33250c67794..0569e2254d1d 100644
--- a/libcxx/include/condition_variable
+++ b/libcxx/include/condition_variable
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------- condition_variable ----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/coroutine b/libcxx/include/coroutine
new file mode 100644
index 000000000000..4e140ab3fed7
--- /dev/null
+++ b/libcxx/include/coroutine
@@ -0,0 +1,52 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_COROUTINE
+#define _LIBCPP_COROUTINE
+
+/**
+ coroutine synopsis
+
+namespace std {
+// [coroutine.traits]
+template <class R, class... ArgTypes>
+ struct coroutine_traits;
+// [coroutine.handle]
+template <class Promise = void>
+ struct coroutine_handle;
+// [coroutine.handle.compare]
+constexpr bool operator==(coroutine_handle<> x, coroutine_handle<> y) noexcept;
+constexpr strong_ordering operator<=>(coroutine_handle<> x, coroutine_handle<> y) noexcept;
+// [coroutine.handle.hash]
+template <class T> struct hash;
+template <class P> struct hash<coroutine_handle<P>>;
+// [coroutine.noop]
+struct noop_coroutine_promise;
+template<> struct coroutine_handle<noop_coroutine_promise>;
+using noop_coroutine_handle = coroutine_handle<noop_coroutine_promise>;
+noop_coroutine_handle noop_coroutine() noexcept;
+// [coroutine.trivial.awaitables]
+struct suspend_never;
+struct suspend_always;
+} // namespace std
+
+ */
+
+#include <__config>
+#include <__coroutine/coroutine_handle.h>
+#include <__coroutine/noop_coroutine_handle.h>
+#include <__coroutine/coroutine_traits.h>
+#include <__coroutine/trivial_awaitables.h>
+#include <version>
+
+#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
+#pragma GCC system_header
+#endif
+
+#endif // _LIBCPP_COROUTINE
diff --git a/libcxx/include/csetjmp b/libcxx/include/csetjmp
index 41902f0e4710..f7afe6168751 100644
--- a/libcxx/include/csetjmp
+++ b/libcxx/include/csetjmp
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- csetjmp ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/csignal b/libcxx/include/csignal
index 3b262b561a40..e2c71a987692 100644
--- a/libcxx/include/csignal
+++ b/libcxx/include/csignal
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- csignal ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cstdarg b/libcxx/include/cstdarg
index 352db8b8c1dc..17fa5a550525 100644
--- a/libcxx/include/cstdarg
+++ b/libcxx/include/cstdarg
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cstdarg ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool
index 7708537ca8e0..b7bdb9a555b6 100644
--- a/libcxx/include/cstdbool
+++ b/libcxx/include/cstdbool
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cstdbool ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cstddef b/libcxx/include/cstddef
index 1b54e7b2e0dc..ed5aea6f7a19 100644
--- a/libcxx/include/cstddef
+++ b/libcxx/include/cstddef
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cstddef ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -58,14 +58,16 @@ template <> struct __libcpp_is_integral<bool> { enum { va
template <> struct __libcpp_is_integral<char> { enum { value = 1 }; };
template <> struct __libcpp_is_integral<signed char> { enum { value = 1 }; };
template <> struct __libcpp_is_integral<unsigned char> { enum { value = 1 }; };
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <> struct __libcpp_is_integral<wchar_t> { enum { value = 1 }; };
+#endif
#ifndef _LIBCPP_HAS_NO_CHAR8_T
template <> struct __libcpp_is_integral<char8_t> { enum { value = 1 }; };
#endif
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
template <> struct __libcpp_is_integral<char16_t> { enum { value = 1 }; };
template <> struct __libcpp_is_integral<char32_t> { enum { value = 1 }; };
-#endif // _LIBCPP_HAS_NO_UNICODE_CHARS
+#endif
template <> struct __libcpp_is_integral<short> { enum { value = 1 }; };
template <> struct __libcpp_is_integral<unsigned short> { enum { value = 1 }; };
template <> struct __libcpp_is_integral<int> { enum { value = 1 }; };
diff --git a/libcxx/include/cstdint b/libcxx/include/cstdint
index aa7c8b5cbf56..8fbdd6e523fd 100644
--- a/libcxx/include/cstdint
+++ b/libcxx/include/cstdint
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cstdint ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cstdio b/libcxx/include/cstdio
index b480f800a7b4..492439f675c6 100644
--- a/libcxx/include/cstdio
+++ b/libcxx/include/cstdio
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- cstdio ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -145,30 +145,24 @@ using ::feof _LIBCPP_USING_IF_EXISTS;
using ::ferror _LIBCPP_USING_IF_EXISTS;
using ::perror _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
using ::fopen _LIBCPP_USING_IF_EXISTS;
using ::freopen _LIBCPP_USING_IF_EXISTS;
using ::remove _LIBCPP_USING_IF_EXISTS;
using ::rename _LIBCPP_USING_IF_EXISTS;
using ::tmpfile _LIBCPP_USING_IF_EXISTS;
using ::tmpnam _LIBCPP_USING_IF_EXISTS;
-#endif
-#ifndef _LIBCPP_HAS_NO_STDIN
using ::getchar _LIBCPP_USING_IF_EXISTS;
#if _LIBCPP_STD_VER <= 11 && !defined(_LIBCPP_C_HAS_NO_GETS)
using ::gets _LIBCPP_USING_IF_EXISTS;
#endif
using ::scanf _LIBCPP_USING_IF_EXISTS;
using ::vscanf _LIBCPP_USING_IF_EXISTS;
-#endif
-#ifndef _LIBCPP_HAS_NO_STDOUT
using ::printf _LIBCPP_USING_IF_EXISTS;
using ::putchar _LIBCPP_USING_IF_EXISTS;
using ::puts _LIBCPP_USING_IF_EXISTS;
using ::vprintf _LIBCPP_USING_IF_EXISTS;
-#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/cstdlib b/libcxx/include/cstdlib
index ced0321aa8e5..219c68c6d371 100644
--- a/libcxx/include/cstdlib
+++ b/libcxx/include/cstdlib
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cstdlib ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -99,26 +99,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD
using ::size_t _LIBCPP_USING_IF_EXISTS;
using ::div_t _LIBCPP_USING_IF_EXISTS;
using ::ldiv_t _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::lldiv_t _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::atof _LIBCPP_USING_IF_EXISTS;
using ::atoi _LIBCPP_USING_IF_EXISTS;
using ::atol _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::atoll _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::strtod _LIBCPP_USING_IF_EXISTS;
using ::strtof _LIBCPP_USING_IF_EXISTS;
using ::strtold _LIBCPP_USING_IF_EXISTS;
using ::strtol _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::strtoll _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::strtoul _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::strtoull _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::rand _LIBCPP_USING_IF_EXISTS;
using ::srand _LIBCPP_USING_IF_EXISTS;
using ::calloc _LIBCPP_USING_IF_EXISTS;
@@ -137,14 +129,10 @@ using ::bsearch _LIBCPP_USING_IF_EXISTS;
using ::qsort _LIBCPP_USING_IF_EXISTS;
using ::abs _LIBCPP_USING_IF_EXISTS;
using ::labs _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::llabs _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::div _LIBCPP_USING_IF_EXISTS;
using ::ldiv _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::lldiv _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::mblen _LIBCPP_USING_IF_EXISTS;
using ::mbtowc _LIBCPP_USING_IF_EXISTS;
using ::wctomb _LIBCPP_USING_IF_EXISTS;
diff --git a/libcxx/include/cstring b/libcxx/include/cstring
index 34449ab3330b..91036f575a32 100644
--- a/libcxx/include/cstring
+++ b/libcxx/include/cstring
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cstring ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -84,9 +84,7 @@ using ::strpbrk _LIBCPP_USING_IF_EXISTS;
using ::strrchr _LIBCPP_USING_IF_EXISTS;
using ::strspn _LIBCPP_USING_IF_EXISTS;
using ::strstr _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS
using ::strtok _LIBCPP_USING_IF_EXISTS;
-#endif
using ::memset _LIBCPP_USING_IF_EXISTS;
using ::strerror _LIBCPP_USING_IF_EXISTS;
using ::strlen _LIBCPP_USING_IF_EXISTS;
diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath
index 41f7f0a172ea..108e948800de 100644
--- a/libcxx/include/ctgmath
+++ b/libcxx/include/ctgmath
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- ctgmath -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ctime b/libcxx/include/ctime
index 8b2efd7449ca..779187d0ef51 100644
--- a/libcxx/include/ctime
+++ b/libcxx/include/ctime
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- ctime -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -59,7 +59,7 @@ int timespec_get( struct timespec *ts, int base); // C++17
// we're detecting this here instead of in <__config> because we can't include
// system headers from <__config>, since it leads to circular module dependencies.
// This is also meant to be a very temporary workaround until the SDKs are fixed.
-#if defined(__APPLE__)
+#if defined(__APPLE__) && !__has_attribute(using_if_exists)
# include <sys/cdefs.h>
# if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL)
# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED
@@ -79,12 +79,10 @@ using ::clock _LIBCPP_USING_IF_EXISTS;
using ::difftime _LIBCPP_USING_IF_EXISTS;
using ::mktime _LIBCPP_USING_IF_EXISTS;
using ::time _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS
using ::asctime _LIBCPP_USING_IF_EXISTS;
using ::ctime _LIBCPP_USING_IF_EXISTS;
using ::gmtime _LIBCPP_USING_IF_EXISTS;
using ::localtime _LIBCPP_USING_IF_EXISTS;
-#endif
using ::strftime _LIBCPP_USING_IF_EXISTS;
#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_TIMESPEC_GET) && !defined(_LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED)
using ::timespec_get _LIBCPP_USING_IF_EXISTS;
diff --git a/libcxx/include/ctype.h b/libcxx/include/ctype.h
index 967b6be1e390..308d6e457c13 100644
--- a/libcxx/include/ctype.h
+++ b/libcxx/include/ctype.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- ctype.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/cwchar b/libcxx/include/cwchar
index f39046f0a9c2..e07f2df98450 100644
--- a/libcxx/include/cwchar
+++ b/libcxx/include/cwchar
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cwchar -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -137,13 +137,9 @@ using ::wcstod _LIBCPP_USING_IF_EXISTS;
using ::wcstof _LIBCPP_USING_IF_EXISTS;
using ::wcstold _LIBCPP_USING_IF_EXISTS;
using ::wcstol _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::wcstoll _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::wcstoul _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
using ::wcstoull _LIBCPP_USING_IF_EXISTS;
-#endif // _LIBCPP_HAS_NO_LONG_LONG
using ::wcscpy _LIBCPP_USING_IF_EXISTS;
using ::wcsncpy _LIBCPP_USING_IF_EXISTS;
using ::wcscat _LIBCPP_USING_IF_EXISTS;
@@ -175,17 +171,13 @@ using ::wcrtomb _LIBCPP_USING_IF_EXISTS;
using ::mbsrtowcs _LIBCPP_USING_IF_EXISTS;
using ::wcsrtombs _LIBCPP_USING_IF_EXISTS;
-#ifndef _LIBCPP_HAS_NO_STDIN
using ::getwchar _LIBCPP_USING_IF_EXISTS;
using ::vwscanf _LIBCPP_USING_IF_EXISTS;
using ::wscanf _LIBCPP_USING_IF_EXISTS;
-#endif
-#ifndef _LIBCPP_HAS_NO_STDOUT
using ::putwchar _LIBCPP_USING_IF_EXISTS;
using ::vwprintf _LIBCPP_USING_IF_EXISTS;
using ::wprintf _LIBCPP_USING_IF_EXISTS;
-#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/cwctype b/libcxx/include/cwctype
index 17c68d6d4544..b3ef1ae905ad 100644
--- a/libcxx/include/cwctype
+++ b/libcxx/include/cwctype
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- cwctype ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -59,6 +59,7 @@ wctrans_t wctrans(const char* property);
_LIBCPP_BEGIN_NAMESPACE_STD
+#if defined(_LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H)
using ::wint_t _LIBCPP_USING_IF_EXISTS;
using ::wctrans_t _LIBCPP_USING_IF_EXISTS;
using ::wctype_t _LIBCPP_USING_IF_EXISTS;
@@ -80,6 +81,7 @@ using ::towlower _LIBCPP_USING_IF_EXISTS;
using ::towupper _LIBCPP_USING_IF_EXISTS;
using ::towctrans _LIBCPP_USING_IF_EXISTS;
using ::wctrans _LIBCPP_USING_IF_EXISTS;
+#endif // _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/deque b/libcxx/include/deque
index aff93deb1010..9ab6ea748d53 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- deque -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -129,7 +129,7 @@ public:
template <class InputIterator, class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
deque(InputIterator, InputIterator, Allocator = Allocator())
- -> deque<typename iterator_traits<InputIterator>::value_type, Allocator>;
+ -> deque<typename iterator_traits<InputIterator>::value_type, Allocator>; // C++17
template <class T, class Allocator>
bool operator==(const deque<T,Allocator>& x, const deque<T,Allocator>& y);
@@ -162,6 +162,7 @@ template <class T, class Allocator, class Predicate>
#include <__config>
#include <__debug>
+#include <__iterator/iterator_traits.h>
#include <__split_buffer>
#include <__utility/forward.h>
#include <algorithm>
@@ -908,31 +909,8 @@ move_backward(__deque_iterator<_V1, _P1, _R1, _M1, _D1, _B1> __f,
return __r;
}
-template <bool>
-class __deque_base_common
-{
-protected:
- _LIBCPP_NORETURN void __throw_length_error() const;
- _LIBCPP_NORETURN void __throw_out_of_range() const;
-};
-
-template <bool __b>
-void
-__deque_base_common<__b>::__throw_length_error() const
-{
- _VSTD::__throw_length_error("deque");
-}
-
-template <bool __b>
-void
-__deque_base_common<__b>::__throw_out_of_range() const
-{
- _VSTD::__throw_out_of_range("deque");
-}
-
template <class _Tp, class _Allocator>
class __deque_base
- : protected __deque_base_common<true>
{
__deque_base(const __deque_base& __c);
__deque_base& operator=(const __deque_base& __c);
@@ -1281,20 +1259,20 @@ public:
static_assert((is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
- typedef __deque_base<value_type, allocator_type> __base;
+ typedef __deque_base<value_type, allocator_type> __base;
- typedef typename __base::__alloc_traits __alloc_traits;
- typedef typename __base::reference reference;
- typedef typename __base::const_reference const_reference;
- typedef typename __base::iterator iterator;
- typedef typename __base::const_iterator const_iterator;
- typedef typename __base::size_type size_type;
- typedef typename __base::difference_type difference_type;
+ typedef typename __base::__alloc_traits __alloc_traits;
+ typedef typename __base::reference reference;
+ typedef typename __base::const_reference const_reference;
+ typedef typename __base::iterator iterator;
+ typedef typename __base::const_iterator const_iterator;
+ typedef typename __allocator_traits<allocator_type>::size_type size_type;
+ typedef typename __base::difference_type difference_type;
- typedef typename __base::pointer pointer;
- typedef typename __base::const_pointer const_pointer;
- typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
- typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef typename __base::pointer pointer;
+ typedef typename __base::const_pointer const_pointer;
+ typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
+ typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
using typename __base::__deque_range;
using typename __base::__deque_block_range;
@@ -1588,23 +1566,24 @@ public:
void __move_assign(deque& __c, false_type);
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Alloc = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
deque(_InputIterator, _InputIterator)
-> deque<__iter_value_type<_InputIterator>, _Alloc>;
template<class _InputIterator,
class _Alloc,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
deque(_InputIterator, _InputIterator, _Alloc)
-> deque<__iter_value_type<_InputIterator>, _Alloc>;
#endif
-
template <class _Tp, class _Allocator>
deque<_Tp, _Allocator>::deque(size_type __n)
{
@@ -1672,7 +1651,7 @@ template <class _Tp, class _Allocator>
deque<_Tp, _Allocator>&
deque<_Tp, _Allocator>::operator=(const deque& __c)
{
- if (this != &__c)
+ if (this != _VSTD::addressof(__c))
{
__copy_assign_alloc(__c);
assign(__c.begin(), __c.end());
@@ -1873,7 +1852,7 @@ typename deque<_Tp, _Allocator>::reference
deque<_Tp, _Allocator>::at(size_type __i)
{
if (__i >= __base::size())
- __base::__throw_out_of_range();
+ _VSTD::__throw_out_of_range("deque");
size_type __p = __base::__start_ + __i;
return *(*(__base::__map_.begin() + __p / __base::__block_size) + __p % __base::__block_size);
}
@@ -1884,7 +1863,7 @@ typename deque<_Tp, _Allocator>::const_reference
deque<_Tp, _Allocator>::at(size_type __i) const
{
if (__i >= __base::size())
- __base::__throw_out_of_range();
+ _VSTD::__throw_out_of_range("deque");
size_type __p = __base::__start_ + __i;
return *(*(__base::__map_.begin() + __p / __base::__block_size) + __p % __base::__block_size);
}
@@ -2237,7 +2216,7 @@ deque<_Tp, _Allocator>::insert(const_iterator __p, size_type __n, const value_ty
size_type __de = __base::size() - __pos;
if (__n > __de)
{
- for (size_type __m = __n - __de; __m; --__m, ++__i, ++__base::size())
+ for (size_type __m = __n - __de; __m; --__m, (void) ++__i, ++__base::size())
__alloc_traits::construct(__a, _VSTD::addressof(*__i), __v);
__n = __de;
}
@@ -2340,7 +2319,7 @@ deque<_Tp, _Allocator>::insert(const_iterator __p, _BiIter __f, _BiIter __l,
if (__n > 0)
{
iterator __oen = __old_end - __n;
- for (iterator __j = __oen; __j != __old_end; ++__i, ++__j, ++__base::size())
+ for (iterator __j = __oen; __j != __old_end; ++__i, (void) ++__j, ++__base::size())
__alloc_traits::construct(__a, _VSTD::addressof(*__i), _VSTD::move(*__j));
if (__n < __de)
__old_end = _VSTD::move_backward(__old_end - __de, __oen, __old_end);
diff --git a/libcxx/include/errno.h b/libcxx/include/errno.h
index 59950c24cea2..a0bbec46bdf9 100644
--- a/libcxx/include/errno.h
+++ b/libcxx/include/errno.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- errno.h -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/exception b/libcxx/include/exception
index 816f259f878b..5f5486149ac0 100644
--- a/libcxx/include/exception
+++ b/libcxx/include/exception
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- exception ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/execution b/libcxx/include/execution
index e25cb82d552c..32b05b85dac6 100644
--- a/libcxx/include/execution
+++ b/libcxx/include/execution
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- execution ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/__config b/libcxx/include/experimental/__config
index f85426d050d1..c7a7d68118a2 100644
--- a/libcxx/include/experimental/__config
+++ b/libcxx/include/experimental/__config
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- __config ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -45,6 +45,10 @@
#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_FILESYSTEM \
} } _LIBCPP_END_NAMESPACE_EXPERIMENTAL
+#if !defined(__cpp_coroutines) || __cpp_coroutines < 201703L
+#define _LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES
+#endif
+
#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_COROUTINES \
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace coroutines_v1 {
diff --git a/libcxx/include/experimental/algorithm b/libcxx/include/experimental/algorithm
index 79fd7b1b22de..bcf372cafd7a 100644
--- a/libcxx/include/experimental/algorithm
+++ b/libcxx/include/experimental/algorithm
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- algorithm ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -41,10 +41,6 @@ ForwardIterator search(ForwardIterator first, ForwardIterator last,
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-
_LIBCPP_BEGIN_NAMESPACE_LFTS
template <class _ForwardIterator, class _Searcher>
@@ -54,6 +50,4 @@ _ForwardIterator search(_ForwardIterator __f, _ForwardIterator __l, const _Searc
_LIBCPP_END_NAMESPACE_LFTS
-_LIBCPP_POP_MACROS
-
#endif /* _LIBCPP_EXPERIMENTAL_ALGORITHM */
diff --git a/libcxx/include/experimental/coroutine b/libcxx/include/experimental/coroutine
index 54ec74b9f984..16b4028765bc 100644
--- a/libcxx/include/experimental/coroutine
+++ b/libcxx/include/experimental/coroutine
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------------- coroutine -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -57,7 +57,7 @@ template <class P> struct hash<coroutine_handle<P>>;
#pragma GCC system_header
#endif
-#ifdef _LIBCPP_HAS_NO_COROUTINES
+#ifdef _LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES
# if defined(_LIBCPP_WARNING)
_LIBCPP_WARNING("<experimental/coroutine> cannot be used with this compiler")
# else
@@ -65,7 +65,7 @@ template <class P> struct hash<coroutine_handle<P>>;
# endif
#endif
-#ifndef _LIBCPP_HAS_NO_COROUTINES
+#ifndef _LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_COROUTINES
@@ -329,6 +329,6 @@ struct hash<_VSTD_CORO::coroutine_handle<_Tp> > {
_LIBCPP_END_NAMESPACE_STD
-#endif // !defined(_LIBCPP_HAS_NO_COROUTINES)
+#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_COROUTINES)
#endif /* _LIBCPP_EXPERIMENTAL_COROUTINE */
diff --git a/libcxx/include/experimental/deque b/libcxx/include/experimental/deque
index 73c2787c7a04..594ddff22f98 100644
--- a/libcxx/include/experimental/deque
+++ b/libcxx/include/experimental/deque
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- deque ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/filesystem b/libcxx/include/experimental/filesystem
index d2e6237df348..45d80b66a874 100644
--- a/libcxx/include/experimental/filesystem
+++ b/libcxx/include/experimental/filesystem
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- filesystem -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/forward_list b/libcxx/include/experimental/forward_list
index 93f6debe9cb3..6781424cf2c6 100644
--- a/libcxx/include/experimental/forward_list
+++ b/libcxx/include/experimental/forward_list
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- forward_list -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/functional b/libcxx/include/experimental/functional
index e3220e16caeb..bcff51e8056f 100644
--- a/libcxx/include/experimental/functional
+++ b/libcxx/include/experimental/functional
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- functional --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator
index 09ea2cbcc7dc..10c903832d8e 100644
--- a/libcxx/include/experimental/iterator
+++ b/libcxx/include/experimental/iterator
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------------- iterator -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/list b/libcxx/include/experimental/list
index adc64a8b537a..099d80fd8db5 100644
--- a/libcxx/include/experimental/list
+++ b/libcxx/include/experimental/list
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- list ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/map b/libcxx/include/experimental/map
index 965d7582c9dd..27ff7e862e27 100644
--- a/libcxx/include/experimental/map
+++ b/libcxx/include/experimental/map
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------------- map ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/memory_resource b/libcxx/include/experimental/memory_resource
index 816d21f51368..71a4f51c50e5 100644
--- a/libcxx/include/experimental/memory_resource
+++ b/libcxx/include/experimental/memory_resource
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ memory_resource -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -183,11 +183,8 @@ public:
// 8.6.3, memory.polymorphic.allocator.mem
_LIBCPP_INLINE_VISIBILITY
_ValueType* allocate(size_t __n) {
- if (__n > __max_size()) {
- __throw_length_error(
- "std::experimental::pmr::polymorphic_allocator<T>::allocate(size_t n)"
- " 'n' exceeds maximum supported size");
- }
+ if (__n > __max_size())
+ __throw_bad_array_new_length();
return static_cast<_ValueType*>(
__res_->allocate(__n * sizeof(_ValueType), _LIBCPP_ALIGNOF(_ValueType))
);
@@ -384,11 +381,8 @@ public:
private:
virtual void * do_allocate(size_t __bytes, size_t)
{
- if (__bytes > __max_size()) {
- __throw_length_error(
- "std::experimental::pmr::resource_adaptor<T>::do_allocate(size_t bytes, size_t align)"
- " 'bytes' exceeds maximum supported size");
- }
+ if (__bytes > __max_size())
+ __throw_bad_array_new_length();
size_t __s = __aligned_allocation_size(__bytes, _MaxAlign) / _MaxAlign;
return __alloc_.allocate(__s);
}
diff --git a/libcxx/include/experimental/propagate_const b/libcxx/include/experimental/propagate_const
index ce4b879b7eab..12376dcec242 100644
--- a/libcxx/include/experimental/propagate_const
+++ b/libcxx/include/experimental/propagate_const
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ propagate_const -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/regex b/libcxx/include/experimental/regex
index 17193cf2f6f6..ced0e950a127 100644
--- a/libcxx/include/experimental/regex
+++ b/libcxx/include/experimental/regex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------------- regex ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -52,9 +52,11 @@ using match_results =
polymorphic_allocator<_VSTD::sub_match<_BiDirIter>>>;
typedef match_results<const char*> cmatch;
-typedef match_results<const wchar_t*> wcmatch;
typedef match_results<_VSTD_LFTS_PMR::string::const_iterator> smatch;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+typedef match_results<const wchar_t*> wcmatch;
typedef match_results<_VSTD_LFTS_PMR::wstring::const_iterator> wsmatch;
+#endif
_LIBCPP_END_NAMESPACE_LFTS_PMR
diff --git a/libcxx/include/experimental/set b/libcxx/include/experimental/set
index 52f4df384278..891510bbb8d9 100644
--- a/libcxx/include/experimental/set
+++ b/libcxx/include/experimental/set
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- list ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/simd b/libcxx/include/experimental/simd
index d1aaf5504488..1f17ee96f0b5 100644
--- a/libcxx/include/experimental/simd
+++ b/libcxx/include/experimental/simd
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------- simd ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -963,7 +963,7 @@ template <int _Np>
using fixed_size = __simd_abi<_StorageKind::_Array, _Np>;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t max_fixed_size = 32;
+inline constexpr size_t max_fixed_size = 32;
template <class _Tp>
using compatible = fixed_size<16 / sizeof(_Tp)>;
@@ -990,10 +990,10 @@ struct element_aligned_tag {};
struct vector_aligned_tag {};
template <size_t>
struct overaligned_tag {};
-_LIBCPP_INLINE_VAR constexpr element_aligned_tag element_aligned{};
-_LIBCPP_INLINE_VAR constexpr vector_aligned_tag vector_aligned{};
+inline constexpr element_aligned_tag element_aligned{};
+inline constexpr vector_aligned_tag vector_aligned{};
template <size_t _Np>
-_LIBCPP_INLINE_VAR constexpr overaligned_tag<_Np> overaligned{};
+inline constexpr overaligned_tag<_Np> overaligned{};
// traits [simd.traits]
template <class _Tp>
@@ -1032,14 +1032,13 @@ struct is_simd_flag_type<overaligned_tag<_Align>>
: std::integral_constant<bool, true> {};
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
+inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_simd_v = is_simd<_Tp>::value;
+inline constexpr bool is_simd_v = is_simd<_Tp>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
+inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_simd_flag_type_v =
- is_simd_flag_type<_Tp>::value;
+inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
template <class _Tp, size_t _Np>
struct abi_for_size {
using type = simd_abi::fixed_size<_Np>;
@@ -1064,11 +1063,10 @@ template <class _Tp, class _Up = typename _Tp::value_type>
struct memory_alignment;
template <class _Tp, class _Abi = simd_abi::compatible<_Tp>>
-_LIBCPP_INLINE_VAR constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
+inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
template <class _Tp, class _Up = typename _Tp::value_type>
-_LIBCPP_INLINE_VAR constexpr size_t memory_alignment_v =
- memory_alignment<_Tp, _Up>::value;
+inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
// class template simd [simd.class]
template <class _Tp>
diff --git a/libcxx/include/experimental/string b/libcxx/include/experimental/string
index 264ff9236822..b881fcf3af1c 100644
--- a/libcxx/include/experimental/string
+++ b/libcxx/include/experimental/string
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- string ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -54,7 +54,9 @@ using basic_string =
typedef basic_string<char> string;
typedef basic_string<char16_t> u16string;
typedef basic_string<char32_t> u32string;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef basic_string<wchar_t> wstring;
+#endif
_LIBCPP_END_NAMESPACE_LFTS_PMR
diff --git a/libcxx/include/experimental/type_traits b/libcxx/include/experimental/type_traits
index ea1335f96af5..408e62d5cb86 100644
--- a/libcxx/include/experimental/type_traits
+++ b/libcxx/include/experimental/type_traits
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- type_traits -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/unordered_map b/libcxx/include/experimental/unordered_map
index eca9cea793a4..fc8cc7f77bf0 100644
--- a/libcxx/include/experimental/unordered_map
+++ b/libcxx/include/experimental/unordered_map
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- unordered_map ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/unordered_set b/libcxx/include/experimental/unordered_set
index 323868f785a5..39342da5f679 100644
--- a/libcxx/include/experimental/unordered_set
+++ b/libcxx/include/experimental/unordered_set
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- unordered_set ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/utility b/libcxx/include/experimental/utility
index 0bca0f7c9c46..6d819da9bb1d 100644
--- a/libcxx/include/experimental/utility
+++ b/libcxx/include/experimental/utility
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- utility ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/experimental/vector b/libcxx/include/experimental/vector
index 9b810120695a..a22698ef7ce4 100644
--- a/libcxx/include/experimental/vector
+++ b/libcxx/include/experimental/vector
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- vector ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ext/__hash b/libcxx/include/ext/__hash
index fbeddf03a404..268577f3c922 100644
--- a/libcxx/include/ext/__hash
+++ b/libcxx/include/ext/__hash
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- hash_set ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map
index 60e32b09e3e1..6c757e2fba3e 100644
--- a/libcxx/include/ext/hash_map
+++ b/libcxx/include/ext/hash_map
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- hash_map ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set
index af3f9c5de0c5..b61f5f1da448 100644
--- a/libcxx/include/ext/hash_set
+++ b/libcxx/include/ext/hash_set
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- hash_set ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/fenv.h b/libcxx/include/fenv.h
index 988cd9b112fb..b3ce2378219f 100644
--- a/libcxx/include/fenv.h
+++ b/libcxx/include/fenv.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- math.h ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem
index 61d6c659cb47..dcbdbbae6985 100644
--- a/libcxx/include/filesystem
+++ b/libcxx/include/filesystem
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- filesystem -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,7 +11,7 @@
/*
filesystem synopsis
- namespace std { namespace filesystem {
+ namespace std::filesystem {
class path;
@@ -48,13 +48,13 @@
// enable directory_iterator range-based for statements
directory_iterator begin(directory_iterator iter) noexcept;
- directory_iterator end(const directory_iterator&) noexcept;
+ directory_iterator end(directory_iterator) noexcept;
class recursive_directory_iterator;
// enable recursive_directory_iterator range-based for statements
recursive_directory_iterator begin(recursive_directory_iterator iter) noexcept;
- recursive_directory_iterator end(const recursive_directory_iterator&) noexcept;
+ recursive_directory_iterator end(recursive_directory_iterator) noexcept;
class file_status;
@@ -224,14 +224,25 @@
path weakly_canonical(path const& p);
path weakly_canonical(path const& p, error_code& ec);
+} // namespace std::filesystem
-} } // namespaces std::filesystem
+template <>
+inline constexpr bool std::ranges::enable_borrowed_range<std::filesystem::directory_iterator> = true;
+template <>
+inline constexpr bool std::ranges::enable_borrowed_range<std::filesystem::recursive_directory_iterator> = true;
+
+template <>
+inline constexpr bool std::ranges::enable_view<std::filesystem::directory_iterator> = true;
+template <>
+inline constexpr bool std::ranges::enable_view<std::filesystem::recursive_directory_iterator> = true;
*/
#include <__availability>
#include <__config>
#include <__debug>
+#include <__ranges/enable_borrowed_range.h>
+#include <__ranges/enable_view.h>
#include <__utility/forward.h>
#include <chrono>
#include <compare>
@@ -253,7 +264,7 @@
#endif
#if defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY)
-# error "The Filesystem library is not supported by this configuration of libc++"
+# error "The Filesystem library is not supported since libc++ has been configured with LIBCXX_ENABLE_FILESYSTEM disabled"
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -277,7 +288,7 @@ struct _LIBCPP_TYPE_VIS space_info {
uintmax_t available;
};
-// On Windows, the library never identifies files as block, character, fifo
+// On Windows, the library never identifies files as block, character, fifo
// or socket.
enum class _LIBCPP_ENUM_VIS file_type : signed char {
none = 0,
@@ -955,6 +966,7 @@ public:
_PathCVT<_ItVal>::__append_range(__pn_, __first, __last);
}
+/*
#if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
// TODO Implement locale conversions.
template <class _Source, class = _EnableIfPathable<_Source, void> >
@@ -963,6 +975,7 @@ public:
path(_InputIt __first, _InputIt _last, const locale& __loc,
format = format::auto_format);
#endif
+*/
_LIBCPP_INLINE_VISIBILITY
~path() = default;
@@ -1283,9 +1296,11 @@ public:
return __s;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const {
return string<wchar_t>();
}
+#endif
_LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const {
return string<char16_t>();
}
@@ -1310,7 +1325,9 @@ public:
return string<_ECharT, _Traits, _Allocator>(__a);
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_VSTD::wstring generic_wstring() const { return string<wchar_t>(); }
+#endif
_VSTD::u16string generic_u16string() const { return string<char16_t>(); }
_VSTD::u32string generic_u32string() const { return string<char32_t>(); }
#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */
@@ -2865,7 +2882,7 @@ begin(directory_iterator __iter) noexcept {
}
inline _LIBCPP_INLINE_VISIBILITY directory_iterator
-end(const directory_iterator&) noexcept {
+end(directory_iterator) noexcept {
return directory_iterator();
}
@@ -2997,7 +3014,7 @@ begin(recursive_directory_iterator __iter) noexcept {
}
inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator
-end(const recursive_directory_iterator&) noexcept {
+end(recursive_directory_iterator) noexcept {
return recursive_directory_iterator();
}
@@ -3005,6 +3022,18 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_POP
_LIBCPP_END_NAMESPACE_FILESYSTEM
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+template <>
+inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true;
+template <>
+inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true;
+
+template <>
+inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true;
+template <>
+inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true;
+#endif
+
#endif // !_LIBCPP_CXX03_LANG
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/float.h b/libcxx/include/float.h
index 399ab4f11a5d..e3b4f9f3442b 100644
--- a/libcxx/include/float.h
+++ b/libcxx/include/float.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- float.h ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/format b/libcxx/include/format
index 0ec4b85ca0a5..e1d47c9f84dd 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- format -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -13,6 +13,215 @@
/*
namespace std {
+ // [format.context], class template basic_format_context
+ template<class Out, class charT>
+ class basic_format_context {
+ basic_format_args<basic_format_context> args_; // exposition only
+ Out out_; // exposition only
+
+ public:
+ using iterator = Out;
+ using char_type = charT;
+ template<class T> using formatter_type = formatter<T, charT>;
+
+ basic_format_arg<basic_format_context> arg(size_t id) const;
+ std::locale locale();
+
+ iterator out();
+ void advance_to(iterator it);
+ };
+ using format_context = basic_format_context<unspecified, char>;
+ using wformat_context = basic_format_context<unspecified, wchar_t>;
+
+ // [format.args], class template basic_format_args
+ template<class Context>
+ class basic_format_args {
+ size_t size_; // exposition only
+ const basic_format_arg<Context>* data_; // exposition only
+
+ public:
+ basic_format_args() noexcept;
+
+ template<class... Args>
+ basic_format_args(const format-arg-store<Context, Args...>& store) noexcept;
+
+ basic_format_arg<Context> get(size_t i) const noexcept;
+ };
+ using format_args = basic_format_args<format_context>;
+ using wformat_args = basic_format_args<wformat_context>;
+
+
+ template<class Out, class charT>
+ using format_args_t = basic_format_args<basic_format_context<Out, charT>>;
+
+ // [format.functions], formatting functions
+ template<class... Args>
+ string format(string_view fmt, const Args&... args);
+ template<class... Args>
+ wstring format(wstring_view fmt, const Args&... args);
+ template<class... Args>
+ string format(const locale& loc, string_view fmt, const Args&... args);
+ template<class... Args>
+ wstring format(const locale& loc, wstring_view fmt, const Args&... args);
+
+ string vformat(string_view fmt, format_args args);
+ wstring vformat(wstring_view fmt, wformat_args args);
+ string vformat(const locale& loc, string_view fmt, format_args args);
+ wstring vformat(const locale& loc, wstring_view fmt, wformat_args args);
+
+ template<class Out, class... Args>
+ Out format_to(Out out, string_view fmt, const Args&... args);
+ template<class Out, class... Args>
+ Out format_to(Out out, wstring_view fmt, const Args&... args);
+ template<class Out, class... Args>
+ Out format_to(Out out, const locale& loc, string_view fmt, const Args&... args);
+ template<class Out, class... Args>
+ Out format_to(Out out, const locale& loc, wstring_view fmt, const Args&... args);
+
+ template<class Out>
+ Out vformat_to(Out out, string_view fmt,
+ format_args_t<type_identity_t<Out>, char> args);
+ template<class Out>
+ Out vformat_to(Out out, wstring_view fmt,
+ format_args_t<type_identity_t<Out>, wchar_t> args);
+ template<class Out>
+ Out vformat_to(Out out, const locale& loc, string_view fmt,
+ format_args_t<type_identity_t<Out>, char> args);
+ template<class Out>
+ Out vformat_to(Out out, const locale& loc, wstring_view fmt,
+ format_args_t<type_identity_t<Out>, wchar_t> args);
+
+ template<class Out> struct format_to_n_result {
+ Out out;
+ iter_difference_t<Out> size;
+ };
+
+ template<class Out, class... Args>
+ format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
+ string_view fmt, const Args&... args);
+ template<class Out, class... Args>
+ format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
+ wstring_view fmt, const Args&... args);
+ template<class Out, class... Args>
+ format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
+ const locale& loc, string_view fmt,
+ const Args&... args);
+ template<class Out, class... Args>
+ format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
+ const locale& loc, wstring_view fmt,
+ const Args&... args);
+
+ template<class... Args>
+ size_t formatted_size(string_view fmt, const Args&... args);
+ template<class... Args>
+ size_t formatted_size(wstring_view fmt, const Args&... args);
+ template<class... Args>
+ size_t formatted_size(const locale& loc, string_view fmt, const Args&... args);
+ template<class... Args>
+ size_t formatted_size(const locale& loc, wstring_view fmt, const Args&... args);
+
+ // [format.formatter], formatter
+ template<> struct formatter<char, char>;
+ template<> struct formatter<char, wchar_t>;
+ template<> struct formatter<wchar_t, wchar_t>;
+
+ template<> struct formatter<charT*, charT>;
+ template<> struct formatter<const charT*, charT>;
+ template<size_t N> struct formatter<const charT[N], charT>;
+ template<class traits, class Allocator>
+ struct formatter<basic_string<charT, traits, Allocator>, charT>;
+ template<class traits>
+ struct formatter<basic_string_view<charT, traits>, charT>;
+
+ // [format.parse.ctx], class template basic_format_parse_context
+ template<class charT>
+ class basic_format_parse_context {
+ public:
+ using char_type = charT;
+ using const_iterator = typename basic_string_view<charT>::const_iterator;
+ using iterator = const_iterator;
+
+ private:
+ iterator begin_; // exposition only
+ iterator end_; // exposition only
+ enum indexing { unknown, manual, automatic }; // exposition only
+ indexing indexing_; // exposition only
+ size_t next_arg_id_; // exposition only
+ size_t num_args_; // exposition only
+
+ public:
+ constexpr explicit basic_format_parse_context(basic_string_view<charT> fmt,
+ size_t num_args = 0) noexcept;
+ basic_format_parse_context(const basic_format_parse_context&) = delete;
+ basic_format_parse_context& operator=(const basic_format_parse_context&) = delete;
+
+ constexpr const_iterator begin() const noexcept;
+ constexpr const_iterator end() const noexcept;
+ constexpr void advance_to(const_iterator it);
+
+ constexpr size_t next_arg_id();
+ constexpr void check_arg_id(size_t id);
+ };
+ using format_parse_context = basic_format_parse_context<char>;
+ using wformat_parse_context = basic_format_parse_context<wchar_t>;
+
+ // [format.arguments], arguments
+ // [format.arg], class template basic_format_arg
+ template<class Context>
+ class basic_format_arg {
+ public:
+ class handle;
+
+ private:
+ using char_type = typename Context::char_type; // exposition only
+
+ variant<monostate, bool, char_type,
+ int, unsigned int, long long int, unsigned long long int,
+ float, double, long double,
+ const char_type*, basic_string_view<char_type>,
+ const void*, handle> value; // exposition only
+
+ template<class T> explicit basic_format_arg(const T& v) noexcept; // exposition only
+ explicit basic_format_arg(float n) noexcept; // exposition only
+ explicit basic_format_arg(double n) noexcept; // exposition only
+ explicit basic_format_arg(long double n) noexcept; // exposition only
+ explicit basic_format_arg(const char_type* s); // exposition only
+
+ template<class traits>
+ explicit basic_format_arg(
+ basic_string_view<char_type, traits> s) noexcept; // exposition only
+
+ template<class traits, class Allocator>
+ explicit basic_format_arg(
+ const basic_string<char_type, traits, Allocator>& s) noexcept; // exposition only
+
+ explicit basic_format_arg(nullptr_t) noexcept; // exposition only
+
+ template<class T>
+ explicit basic_format_arg(const T* p) noexcept; // exposition only
+
+ public:
+ basic_format_arg() noexcept;
+
+ explicit operator bool() const noexcept;
+ };
+
+ template<class Visitor, class Context>
+ see below visit_format_arg(Visitor&& vis, basic_format_arg<Context> arg);
+
+ // [format.arg.store], class template format-arg-store
+ template<class Context, class... Args>
+ struct format-arg-store { // exposition only
+ array<basic_format_arg<Context>, sizeof...(Args)> args;
+ };
+
+ template<class Context = format_context, class... Args>
+ format-arg-store<Context, Args...>
+ make_format_args(const Args&... args);
+ template<class... Args>
+ format-arg-store<wformat_context, Args...>
+ make_wformat_args(const Args&... args);
+
// [format.error], class format_error
class format_error : public runtime_error {
public:
@@ -55,17 +264,40 @@ namespace std {
*/
+// Make sure all feature-test macros are available.
+#include <version>
+// Enable the contents of the header only when libc++ was built with LIBCXX_ENABLE_INCOMPLETE_FEATURES.
+#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT)
+
#include <__config>
+#include <__debug>
+#include <__format/format_arg.h>
+#include <__format/format_args.h>
+#include <__format/format_context.h>
#include <__format/format_error.h>
+#include <__format/format_fwd.h>
#include <__format/format_parse_context.h>
-#include <version>
+#include <__format/format_string.h>
+#include <__format/format_to_n_result.h>
+#include <__format/formatter.h>
+#include <__format/formatter_bool.h>
+#include <__format/formatter_char.h>
+#include <__format/formatter_integer.h>
+#include <__format/formatter_string.h>
+#include <__format/parser_std_format_spec.h>
+#include <__variant/monostate.h>
+#include <array>
+#include <concepts>
+#include <string>
+#include <string_view>
+#include <type_traits>
-#if defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT)
-# error "The Format library is not supported since libc++ has been configured with LIBCXX_ENABLE_INCOMPLETE_FEATURES disabled"
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#include <locale>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-# pragma GCC system_header
+#pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
@@ -75,10 +307,395 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// TODO FMT Move the implementation in this file to its own granular headers.
+
+// TODO FMT Evaluate which templates should be external templates. This
+// improves the efficiency of the header. However since the header is still
+// under heavy development and not all classes are stable it makes no sense
+// to do this optimization now.
+
+using format_args = basic_format_args<format_context>;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+using wformat_args = basic_format_args<wformat_context>;
+#endif
+
+template <class _OutIt, class _CharT>
+using format_args_t = basic_format_args<basic_format_context<_OutIt, _CharT>>;
+
+template <class _Context, class... _Args>
+struct _LIBCPP_TEMPLATE_VIS __format_arg_store {
+ // TODO FMT Use a built-in array.
+ array<basic_format_arg<_Context>, sizeof...(_Args)> __args;
+};
+
+template <class _Context = format_context, class... _Args>
+_LIBCPP_HIDE_FROM_ABI __format_arg_store<_Context, _Args...>
+make_format_args(const _Args&... __args) {
+ return {basic_format_arg<_Context>(__args)...};
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI __format_arg_store<wformat_context, _Args...>
+make_wformat_args(const _Args&... __args) {
+ return _VSTD::make_format_args<wformat_context>(__args...);
+}
+#endif
+
+namespace __format {
+
+template <class _CharT, class _ParseCtx, class _Ctx>
+_LIBCPP_HIDE_FROM_ABI const _CharT*
+__handle_replacement_field(const _CharT* __begin, const _CharT* __end,
+ _ParseCtx& __parse_ctx, _Ctx& __ctx) {
+ __format::__parse_number_result __r =
+ __format::__parse_arg_id(__begin, __end, __parse_ctx);
+
+ switch (*__r.__ptr) {
+ case _CharT(':'):
+ // The arg-id has a format-specifier, advance the input to the format-spec.
+ __parse_ctx.advance_to(__r.__ptr + 1);
+ break;
+ case _CharT('}'):
+ // The arg-id has no format-specifier.
+ __parse_ctx.advance_to(__r.__ptr);
+ break;
+ default:
+ __throw_format_error(
+ "The replacement field arg-id should terminate at a ':' or '}'");
+ }
+
+ _VSTD::visit_format_arg(
+ [&](auto __arg) {
+ if constexpr (same_as<decltype(__arg), monostate>)
+ __throw_format_error("Argument index out of bounds");
+ else {
+ formatter<decltype(__arg), _CharT> __formatter;
+ __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
+ __ctx.advance_to(__formatter.format(__arg, __ctx));
+ }
+ },
+ __ctx.arg(__r.__value));
+
+ __begin = __parse_ctx.begin();
+ if (__begin == __end || *__begin != _CharT('}'))
+ __throw_format_error("The replacement field misses a terminating '}'");
+
+ return ++__begin;
+}
+
+template <class _ParseCtx, class _Ctx>
+_LIBCPP_HIDE_FROM_ABI typename _Ctx::iterator
+__vformat_to(_ParseCtx&& __parse_ctx, _Ctx&& __ctx) {
+ using _CharT = typename _ParseCtx::char_type;
+ static_assert(same_as<typename _Ctx::char_type, _CharT>);
+
+ const _CharT* __begin = __parse_ctx.begin();
+ const _CharT* __end = __parse_ctx.end();
+ typename _Ctx::iterator __out_it = __ctx.out();
+ while (__begin != __end) {
+ switch (*__begin) {
+ case _CharT('{'):
+ ++__begin;
+ if (__begin == __end)
+ __throw_format_error("The format string terminates at a '{'");
+
+ if (*__begin != _CharT('{')) [[likely]] {
+ __ctx.advance_to(_VSTD::move(__out_it));
+ __begin =
+ __handle_replacement_field(__begin, __end, __parse_ctx, __ctx);
+ __out_it = __ctx.out();
+
+ // The output is written and __begin points to the next character. So
+ // start the next iteration.
+ continue;
+ }
+ // The string is an escape character.
+ break;
+
+ case _CharT('}'):
+ ++__begin;
+ if (__begin == __end || *__begin != _CharT('}'))
+ __throw_format_error(
+ "The format string contains an invalid escape sequence");
+
+ break;
+ }
+
+ // Copy the character to the output verbatim.
+ *__out_it++ = *__begin++;
+ }
+ return __out_it;
+}
+
+} // namespace __format
+
+template <class _OutIt, class _CharT>
+requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt
+ __vformat_to(_OutIt __out_it, basic_string_view<_CharT> __fmt,
+ format_args_t<type_identity_t<_OutIt>, _CharT> __args) {
+ return __format::__vformat_to(
+ basic_format_parse_context{__fmt, __args.__size()},
+ _VSTD::__format_context_create(_VSTD::move(__out_it), __args));
+}
+
+template <output_iterator<const char&> _OutIt>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+vformat_to(_OutIt __out_it, string_view __fmt,
+ format_args_t<type_identity_t<_OutIt>, char> __args) {
+ return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args);
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <output_iterator<const wchar_t&> _OutIt>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+vformat_to(_OutIt __out_it, wstring_view __fmt,
+ format_args_t<type_identity_t<_OutIt>, wchar_t> __args) {
+ return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args);
+}
+#endif
+
+template <output_iterator<const char&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+format_to(_OutIt __out_it, string_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat_to(
+ _VSTD::move(__out_it), __fmt,
+ _VSTD::make_format_args<basic_format_context<_OutIt, char>>(__args...));
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <output_iterator<const wchar_t&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+format_to(_OutIt __out_it, wstring_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat_to(
+ _VSTD::move(__out_it), __fmt,
+ _VSTD::make_format_args<basic_format_context<_OutIt, wchar_t>>(
+ __args...));
+}
+#endif
+
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+vformat(string_view __fmt, format_args __args) {
+ string __res;
+ _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args);
+ return __res;
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+vformat(wstring_view __fmt, wformat_args __args) {
+ wstring __res;
+ _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args);
+ return __res;
+}
+#endif
+
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+format(string_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat(__fmt, _VSTD::make_format_args(__args...));
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+format(wstring_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat(__fmt, _VSTD::make_wformat_args(__args...));
+}
+#endif
+
+template <output_iterator<const char&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt>
+format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, string_view __fmt,
+ const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ string __str = _VSTD::vformat(__fmt, _VSTD::make_format_args(__args...));
+ iter_difference_t<_OutIt> __s = __str.size();
+ iter_difference_t<_OutIt> __m =
+ _VSTD::clamp(__n, iter_difference_t<_OutIt>(0), __s);
+ __out_it = _VSTD::copy_n(__str.begin(), __m, _VSTD::move(__out_it));
+ return {_VSTD::move(__out_it), __s};
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <output_iterator<const wchar_t&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt>
+format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, wstring_view __fmt,
+ const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ wstring __str = _VSTD::vformat(__fmt, _VSTD::make_wformat_args(__args...));
+ iter_difference_t<_OutIt> __s = __str.size();
+ iter_difference_t<_OutIt> __m =
+ _VSTD::clamp(__n, iter_difference_t<_OutIt>(0), __s);
+ __out_it = _VSTD::copy_n(__str.begin(), __m, _VSTD::move(__out_it));
+ return {_VSTD::move(__out_it), __s};
+}
+#endif
+
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t
+formatted_size(string_view __fmt, const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ return _VSTD::vformat(__fmt, _VSTD::make_format_args(__args...)).size();
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t
+formatted_size(wstring_view __fmt, const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ return _VSTD::vformat(__fmt, _VSTD::make_wformat_args(__args...)).size();
+}
+#endif
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+
+template <class _OutIt, class _CharT>
+requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt
+ __vformat_to(_OutIt __out_it, locale __loc, basic_string_view<_CharT> __fmt,
+ format_args_t<type_identity_t<_OutIt>, _CharT> __args) {
+ return __format::__vformat_to(
+ basic_format_parse_context{__fmt, __args.__size()},
+ _VSTD::__format_context_create(_VSTD::move(__out_it), __args,
+ _VSTD::move(__loc)));
+}
+
+template <output_iterator<const char&> _OutIt>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+vformat_to(_OutIt __out_it, locale __loc, string_view __fmt,
+ format_args_t<type_identity_t<_OutIt>, char> __args) {
+ return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
+ __args);
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <output_iterator<const wchar_t&> _OutIt>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+vformat_to(_OutIt __out_it, locale __loc, wstring_view __fmt,
+ format_args_t<type_identity_t<_OutIt>, wchar_t> __args) {
+ return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
+ __args);
+}
+#endif
+
+template <output_iterator<const char&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(
+ _OutIt __out_it, locale __loc, string_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat_to(
+ _VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
+ _VSTD::make_format_args<basic_format_context<_OutIt, char>>(__args...));
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <output_iterator<const wchar_t&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(
+ _OutIt __out_it, locale __loc, wstring_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat_to(
+ _VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
+ _VSTD::make_format_args<basic_format_context<_OutIt, wchar_t>>(
+ __args...));
+}
+#endif
+
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+vformat(locale __loc, string_view __fmt, format_args __args) {
+ string __res;
+ _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt,
+ __args);
+ return __res;
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+vformat(locale __loc, wstring_view __fmt, wformat_args __args) {
+ wstring __res;
+ _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt,
+ __args);
+ return __res;
+}
+#endif
+
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+format(locale __loc, string_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat(_VSTD::move(__loc), __fmt,
+ _VSTD::make_format_args(__args...));
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+format(locale __loc, wstring_view __fmt, const _Args&... __args) {
+ return _VSTD::vformat(_VSTD::move(__loc), __fmt,
+ _VSTD::make_wformat_args(__args...));
+}
+#endif
+
+template <output_iterator<const char&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt>
+format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc,
+ string_view __fmt, const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ string __str = _VSTD::vformat(_VSTD::move(__loc), __fmt,
+ _VSTD::make_format_args(__args...));
+ iter_difference_t<_OutIt> __s = __str.size();
+ iter_difference_t<_OutIt> __m =
+ _VSTD::clamp(__n, iter_difference_t<_OutIt>(0), __s);
+ __out_it = _VSTD::copy_n(__str.begin(), __m, _VSTD::move(__out_it));
+ return {_VSTD::move(__out_it), __s};
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <output_iterator<const wchar_t&> _OutIt, class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt>
+format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc,
+ wstring_view __fmt, const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ wstring __str = _VSTD::vformat(_VSTD::move(__loc), __fmt,
+ _VSTD::make_wformat_args(__args...));
+ iter_difference_t<_OutIt> __s = __str.size();
+ iter_difference_t<_OutIt> __m =
+ _VSTD::clamp(__n, iter_difference_t<_OutIt>(0), __s);
+ __out_it = _VSTD::copy_n(__str.begin(), __m, _VSTD::move(__out_it));
+ return {_VSTD::move(__out_it), __s};
+}
+#endif
+
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t
+formatted_size(locale __loc, string_view __fmt, const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ return _VSTD::vformat(_VSTD::move(__loc), __fmt,
+ _VSTD::make_format_args(__args...))
+ .size();
+}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template <class... _Args>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t
+formatted_size(locale __loc, wstring_view __fmt, const _Args&... __args) {
+ // TODO FMT Improve PoC: using std::string is inefficient.
+ return _VSTD::vformat(_VSTD::move(__loc), __fmt,
+ _VSTD::make_wformat_args(__args...))
+ .size();
+}
+#endif
+
+#endif // _LIBCPP_HAS_NO_LOCALIZATION
+
+#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
#endif //_LIBCPP_STD_VER > 17
_LIBCPP_END_NAMESPACE_STD
_LIBCPP_POP_MACROS
+#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT)
+
#endif // _LIBCPP_FORMAT
diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list
index 0ae8f19ec20c..9d19e741f061 100644
--- a/libcxx/include/forward_list
+++ b/libcxx/include/forward_list
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------- forward_list ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -647,7 +647,7 @@ public:
typedef const value_type& const_reference;
typedef typename allocator_traits<allocator_type>::pointer pointer;
typedef typename allocator_traits<allocator_type>::const_pointer const_pointer;
- typedef typename allocator_traits<allocator_type>::size_type size_type;
+ typedef typename __allocator_traits<allocator_type>::size_type size_type;
typedef typename allocator_traits<allocator_type>::difference_type difference_type;
typedef typename base::iterator iterator;
@@ -870,17 +870,19 @@ private:
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Alloc = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
forward_list(_InputIterator, _InputIterator)
-> forward_list<__iter_value_type<_InputIterator>, _Alloc>;
template<class _InputIterator,
class _Alloc,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
forward_list(_InputIterator, _InputIterator, _Alloc)
-> forward_list<__iter_value_type<_InputIterator>, _Alloc>;
@@ -990,7 +992,7 @@ template <class _Tp, class _Alloc>
forward_list<_Tp, _Alloc>&
forward_list<_Tp, _Alloc>::operator=(const forward_list& __x)
{
- if (this != &__x)
+ if (this != _VSTD::addressof(__x))
{
base::__copy_assign_alloc(__x);
assign(__x.begin(), __x.end());
@@ -1585,7 +1587,7 @@ template <class _Compare>
void
forward_list<_Tp, _Alloc>::merge(forward_list& __x, _Compare __comp)
{
- if (this != &__x)
+ if (this != _VSTD::addressof(__x))
{
base::__before_begin()->__next_ = __merge(base::__before_begin()->__next_,
__x.__before_begin()->__next_,
diff --git a/libcxx/include/fstream b/libcxx/include/fstream
index c522b8ab110d..3d64adcb23d1 100644
--- a/libcxx/include/fstream
+++ b/libcxx/include/fstream
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- fstream ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -230,7 +230,6 @@ public:
// 27.9.1.4 Members:
_LIBCPP_INLINE_VISIBILITY
bool is_open() const;
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
basic_filebuf* open(const char* __s, ios_base::openmode __mode);
#ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
basic_filebuf* open(const wchar_t* __s, ios_base::openmode __mode);
@@ -246,7 +245,6 @@ public:
#endif
_LIBCPP_INLINE_VISIBILITY
basic_filebuf* __open(int __fd, ios_base::openmode __mode);
-#endif
basic_filebuf* close();
_LIBCPP_INLINE_VISIBILITY
@@ -542,7 +540,6 @@ const char* basic_filebuf<_CharT, _Traits>::__make_mdstring(
_LIBCPP_UNREACHABLE();
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
basic_filebuf<_CharT, _Traits>*
basic_filebuf<_CharT, _Traits>::open(const char* __s, ios_base::openmode __mode)
@@ -685,7 +682,6 @@ basic_filebuf<_CharT, _Traits>::open(const string& __s, ios_base::openmode __mod
{
return open(__s.c_str(), __mode);
}
-#endif
template <class _CharT, class _Traits>
basic_filebuf<_CharT, _Traits>*
@@ -1141,7 +1137,6 @@ public:
_LIBCPP_INLINE_VISIBILITY
basic_ifstream();
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
_LIBCPP_INLINE_VISIBILITY
explicit basic_ifstream(const char* __s, ios_base::openmode __mode = ios_base::in);
#ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
@@ -1155,7 +1150,6 @@ public:
explicit basic_ifstream(const filesystem::path& __p, ios_base::openmode __mode = ios_base::in)
: basic_ifstream(__p.c_str(), __mode) {}
#endif // _LIBCPP_STD_VER >= 17
-#endif
_LIBCPP_INLINE_VISIBILITY
basic_ifstream(basic_ifstream&& __rhs);
_LIBCPP_INLINE_VISIBILITY
@@ -1167,7 +1161,6 @@ public:
basic_filebuf<char_type, traits_type>* rdbuf() const;
_LIBCPP_INLINE_VISIBILITY
bool is_open() const;
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
void open(const char* __s, ios_base::openmode __mode = ios_base::in);
#ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::in);
@@ -1183,7 +1176,6 @@ public:
_LIBCPP_INLINE_VISIBILITY
void __open(int __fd, ios_base::openmode __mode);
-#endif
_LIBCPP_INLINE_VISIBILITY
void close();
@@ -1198,7 +1190,6 @@ basic_ifstream<_CharT, _Traits>::basic_ifstream()
{
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
inline
basic_ifstream<_CharT, _Traits>::basic_ifstream(const char* __s, ios_base::openmode __mode)
@@ -1227,7 +1218,6 @@ basic_ifstream<_CharT, _Traits>::basic_ifstream(const string& __s, ios_base::ope
if (__sb_.open(__s, __mode | ios_base::in) == nullptr)
this->setstate(ios_base::failbit);
}
-#endif
template <class _CharT, class _Traits>
inline
@@ -1281,7 +1271,6 @@ basic_ifstream<_CharT, _Traits>::is_open() const
return __sb_.is_open();
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
void
basic_ifstream<_CharT, _Traits>::open(const char* __s, ios_base::openmode __mode)
@@ -1323,7 +1312,6 @@ void basic_ifstream<_CharT, _Traits>::__open(int __fd,
else
this->setstate(ios_base::failbit);
}
-#endif
template <class _CharT, class _Traits>
inline
@@ -1375,7 +1363,6 @@ public:
basic_filebuf<char_type, traits_type>* rdbuf() const;
_LIBCPP_INLINE_VISIBILITY
bool is_open() const;
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
void open(const char* __s, ios_base::openmode __mode = ios_base::out);
#ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::out);
@@ -1390,7 +1377,6 @@ public:
_LIBCPP_INLINE_VISIBILITY
void __open(int __fd, ios_base::openmode __mode);
-#endif
_LIBCPP_INLINE_VISIBILITY
void close();
@@ -1405,7 +1391,6 @@ basic_ofstream<_CharT, _Traits>::basic_ofstream()
{
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
inline
basic_ofstream<_CharT, _Traits>::basic_ofstream(const char* __s, ios_base::openmode __mode)
@@ -1434,7 +1419,6 @@ basic_ofstream<_CharT, _Traits>::basic_ofstream(const string& __s, ios_base::ope
if (__sb_.open(__s, __mode | ios_base::out) == nullptr)
this->setstate(ios_base::failbit);
}
-#endif
template <class _CharT, class _Traits>
inline
@@ -1488,7 +1472,6 @@ basic_ofstream<_CharT, _Traits>::is_open() const
return __sb_.is_open();
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
void
basic_ofstream<_CharT, _Traits>::open(const char* __s, ios_base::openmode __mode)
@@ -1530,7 +1513,6 @@ void basic_ofstream<_CharT, _Traits>::__open(int __fd,
else
this->setstate(ios_base::failbit);
}
-#endif
template <class _CharT, class _Traits>
inline
@@ -1556,7 +1538,6 @@ public:
_LIBCPP_INLINE_VISIBILITY
basic_fstream();
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
_LIBCPP_INLINE_VISIBILITY
explicit basic_fstream(const char* __s, ios_base::openmode __mode = ios_base::in | ios_base::out);
#ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
@@ -1572,7 +1553,6 @@ public:
: basic_fstream(__p.c_str(), __mode) {}
#endif // _LIBCPP_STD_VER >= 17
-#endif
_LIBCPP_INLINE_VISIBILITY
basic_fstream(basic_fstream&& __rhs);
@@ -1586,7 +1566,6 @@ public:
basic_filebuf<char_type, traits_type>* rdbuf() const;
_LIBCPP_INLINE_VISIBILITY
bool is_open() const;
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
void open(const char* __s, ios_base::openmode __mode = ios_base::in | ios_base::out);
#ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
void open(const wchar_t* __s, ios_base::openmode __mode = ios_base::in | ios_base::out);
@@ -1599,7 +1578,6 @@ public:
{ return open(__p.c_str(), __mode); }
#endif // _LIBCPP_STD_VER >= 17
-#endif
_LIBCPP_INLINE_VISIBILITY
void close();
@@ -1614,7 +1592,6 @@ basic_fstream<_CharT, _Traits>::basic_fstream()
{
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
inline
basic_fstream<_CharT, _Traits>::basic_fstream(const char* __s, ios_base::openmode __mode)
@@ -1643,7 +1620,6 @@ basic_fstream<_CharT, _Traits>::basic_fstream(const string& __s, ios_base::openm
if (__sb_.open(__s, __mode) == nullptr)
this->setstate(ios_base::failbit);
}
-#endif
template <class _CharT, class _Traits>
inline
@@ -1697,7 +1673,6 @@ basic_fstream<_CharT, _Traits>::is_open() const
return __sb_.is_open();
}
-#ifndef _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE
template <class _CharT, class _Traits>
void
basic_fstream<_CharT, _Traits>::open(const char* __s, ios_base::openmode __mode)
@@ -1729,7 +1704,6 @@ basic_fstream<_CharT, _Traits>::open(const string& __s, ios_base::openmode __mod
else
this->setstate(ios_base::failbit);
}
-#endif
template <class _CharT, class _Traits>
inline
diff --git a/libcxx/include/functional b/libcxx/include/functional
index ecbc5667af18..53a5f2bc3770 100644
--- a/libcxx/include/functional
+++ b/libcxx/include/functional
@@ -135,6 +135,9 @@ struct less_equal {
bool operator()(const T& x, const T& y) const;
};
+// [comparisons.three.way], class compare_three_way
+struct compare_three_way;
+
template <class T> // <class T=void> in C++14
struct logical_and {
bool operator()(const T& x, const T& y) const;
@@ -488,14 +491,17 @@ POLICY: For non-variadic implementations, the number of arguments is limited
*/
#include <__algorithm/search.h>
+#include <__compare/compare_three_way.h>
#include <__config>
#include <__debug>
#include <__functional/binary_function.h> // TODO: deprecate
#include <__functional/binary_negate.h>
+#include <__functional/bind_back.h>
#include <__functional/bind_front.h>
#include <__functional/bind.h>
#include <__functional/binder1st.h>
#include <__functional/binder2nd.h>
+#include <__functional/compose.h>
#include <__functional/default_searcher.h>
#include <__functional/function.h>
#include <__functional/hash.h>
diff --git a/libcxx/include/future b/libcxx/include/future
index 349e6afc43e4..99df8831a778 100644
--- a/libcxx/include/future
+++ b/libcxx/include/future
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- future -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -366,7 +366,7 @@ template <class R, class Alloc> struct uses_allocator<packaged_task<R>, Alloc>;
#include <__debug>
#include <__memory/allocator_arg_t.h>
#include <__memory/uses_allocator.h>
-#include <__utility/__decay_copy.h>
+#include <__utility/decay_copy.h>
#include <__utility/forward.h>
#include <chrono>
#include <exception>
diff --git a/libcxx/include/initializer_list b/libcxx/include/initializer_list
index ea1f234675f7..d867ee6af017 100644
--- a/libcxx/include/initializer_list
+++ b/libcxx/include/initializer_list
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------- initializer_list -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/inttypes.h b/libcxx/include/inttypes.h
index 6b7cb2648a5e..69508140763c 100644
--- a/libcxx/include/inttypes.h
+++ b/libcxx/include/inttypes.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- inttypes.h -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/iomanip b/libcxx/include/iomanip
index 47d573b5fc8a..ba434b983b1c 100644
--- a/libcxx/include/iomanip
+++ b/libcxx/include/iomanip
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- iomanip ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ios b/libcxx/include/ios
index 3128bca89999..b7d32946bff0 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- ios -------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -607,8 +607,15 @@ public:
static_assert((is_same<_CharT, typename traits_type::char_type>::value),
"traits_type::char_type must be the same type as CharT");
+#ifdef _LIBCPP_CXX03_LANG
+ // Preserve the ability to compare with literal 0,
+ // and implicitly convert to bool, but not implicitly convert to int.
+ _LIBCPP_INLINE_VISIBILITY
+ operator void*() const {return fail() ? nullptr : (void*)this;}
+#else
_LIBCPP_INLINE_VISIBILITY
explicit operator bool() const {return !fail();}
+#endif
_LIBCPP_INLINE_VISIBILITY bool operator!() const {return fail();}
_LIBCPP_INLINE_VISIBILITY iostate rdstate() const {return ios_base::rdstate();}
diff --git a/libcxx/include/iosfwd b/libcxx/include/iosfwd
index 499839f7d04d..938d712cf36b 100644
--- a/libcxx/include/iosfwd
+++ b/libcxx/include/iosfwd
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- iosfwd -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -95,7 +95,7 @@ using u32streampos = fpos<char_traits<char32_t>::state_type>;
*/
#include <__config>
-#include <wchar.h> // for mbstate_t
+#include <__mbstate_t.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
@@ -112,7 +112,9 @@ template<> struct char_traits<char8_t>;
#endif
template<> struct char_traits<char16_t>;
template<> struct char_traits<char32_t>;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template<> struct char_traits<wchar_t>;
+#endif
template<class _Tp> class _LIBCPP_TEMPLATE_VIS allocator;
@@ -156,7 +158,9 @@ template <class _CharT, class _Traits = char_traits<_CharT> >
class _LIBCPP_TEMPLATE_VIS ostreambuf_iterator;
typedef basic_ios<char> ios;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef basic_ios<wchar_t> wios;
+#endif
typedef basic_streambuf<char> streambuf;
typedef basic_istream<char> istream;
@@ -173,6 +177,7 @@ typedef basic_ifstream<char> ifstream;
typedef basic_ofstream<char> ofstream;
typedef basic_fstream<char> fstream;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef basic_streambuf<wchar_t> wstreambuf;
typedef basic_istream<wchar_t> wistream;
typedef basic_ostream<wchar_t> wostream;
@@ -187,36 +192,37 @@ typedef basic_filebuf<wchar_t> wfilebuf;
typedef basic_ifstream<wchar_t> wifstream;
typedef basic_ofstream<wchar_t> wofstream;
typedef basic_fstream<wchar_t> wfstream;
+#endif
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(ios) _LIBCPP_PREFERRED_NAME(wios) basic_ios;
+ class _LIBCPP_PREFERRED_NAME(ios) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wios)) basic_ios;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(streambuf) _LIBCPP_PREFERRED_NAME(wstreambuf) basic_streambuf;
+ class _LIBCPP_PREFERRED_NAME(streambuf) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wstreambuf)) basic_streambuf;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(istream) _LIBCPP_PREFERRED_NAME(wistream) basic_istream;
+ class _LIBCPP_PREFERRED_NAME(istream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wistream)) basic_istream;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(ostream) _LIBCPP_PREFERRED_NAME(wostream) basic_ostream;
+ class _LIBCPP_PREFERRED_NAME(ostream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wostream)) basic_ostream;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(iostream) _LIBCPP_PREFERRED_NAME(wiostream) basic_iostream;
+ class _LIBCPP_PREFERRED_NAME(iostream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wiostream)) basic_iostream;
template <class _CharT, class _Traits, class _Allocator>
- class _LIBCPP_PREFERRED_NAME(stringbuf) _LIBCPP_PREFERRED_NAME(wstringbuf) basic_stringbuf;
+ class _LIBCPP_PREFERRED_NAME(stringbuf) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wstringbuf)) basic_stringbuf;
template <class _CharT, class _Traits, class _Allocator>
- class _LIBCPP_PREFERRED_NAME(istringstream) _LIBCPP_PREFERRED_NAME(wistringstream) basic_istringstream;
+ class _LIBCPP_PREFERRED_NAME(istringstream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wistringstream)) basic_istringstream;
template <class _CharT, class _Traits, class _Allocator>
- class _LIBCPP_PREFERRED_NAME(ostringstream) _LIBCPP_PREFERRED_NAME(wostringstream) basic_ostringstream;
+ class _LIBCPP_PREFERRED_NAME(ostringstream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wostringstream)) basic_ostringstream;
template <class _CharT, class _Traits, class _Allocator>
- class _LIBCPP_PREFERRED_NAME(stringstream) _LIBCPP_PREFERRED_NAME(wstringstream) basic_stringstream;
+ class _LIBCPP_PREFERRED_NAME(stringstream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wstringstream)) basic_stringstream;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(filebuf) _LIBCPP_PREFERRED_NAME(wfilebuf) basic_filebuf;
+ class _LIBCPP_PREFERRED_NAME(filebuf) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wfilebuf)) basic_filebuf;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(ifstream) _LIBCPP_PREFERRED_NAME(wifstream) basic_ifstream;
+ class _LIBCPP_PREFERRED_NAME(ifstream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wifstream)) basic_ifstream;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(ofstream) _LIBCPP_PREFERRED_NAME(wofstream) basic_ofstream;
+ class _LIBCPP_PREFERRED_NAME(ofstream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wofstream)) basic_ofstream;
template <class _CharT, class _Traits>
- class _LIBCPP_PREFERRED_NAME(fstream) _LIBCPP_PREFERRED_NAME(wfstream) basic_fstream;
+ class _LIBCPP_PREFERRED_NAME(fstream) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wfstream)) basic_fstream;
template <class _State> class _LIBCPP_TEMPLATE_VIS fpos;
typedef fpos<mbstate_t> streampos;
@@ -227,7 +233,7 @@ typedef fpos<mbstate_t> u8streampos;
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
typedef fpos<mbstate_t> u16streampos;
typedef fpos<mbstate_t> u32streampos;
-#endif // _LIBCPP_HAS_NO_UNICODE_CHARS
+#endif
#if defined(_NEWLIB_VERSION)
// On newlib, off_t is 'long int'
@@ -241,10 +247,12 @@ template <class _CharT, // for <stdexcept>
class _Allocator = allocator<_CharT> >
class _LIBCPP_TEMPLATE_VIS basic_string;
typedef basic_string<char, char_traits<char>, allocator<char> > string;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef basic_string<wchar_t, char_traits<wchar_t>, allocator<wchar_t> > wstring;
+#endif
template <class _CharT, class _Traits, class _Allocator>
- class _LIBCPP_PREFERRED_NAME(string) _LIBCPP_PREFERRED_NAME(wstring) basic_string;
+ class _LIBCPP_PREFERRED_NAME(string) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wstring)) basic_string;
// Include other forward declarations here
template <class _Tp, class _Alloc = allocator<_Tp> >
diff --git a/libcxx/include/iostream b/libcxx/include/iostream
index 5811fc37c85d..7397acfc678b 100644
--- a/libcxx/include/iostream
+++ b/libcxx/include/iostream
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- iostream ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -45,18 +45,17 @@ extern wostream wclog;
_LIBCPP_BEGIN_NAMESPACE_STD
-#ifndef _LIBCPP_HAS_NO_STDIN
extern _LIBCPP_FUNC_VIS istream cin;
-extern _LIBCPP_FUNC_VIS wistream wcin;
-#endif
-#ifndef _LIBCPP_HAS_NO_STDOUT
extern _LIBCPP_FUNC_VIS ostream cout;
-extern _LIBCPP_FUNC_VIS wostream wcout;
-#endif
extern _LIBCPP_FUNC_VIS ostream cerr;
-extern _LIBCPP_FUNC_VIS wostream wcerr;
extern _LIBCPP_FUNC_VIS ostream clog;
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+extern _LIBCPP_FUNC_VIS wistream wcin;
+extern _LIBCPP_FUNC_VIS wostream wcout;
+extern _LIBCPP_FUNC_VIS wostream wcerr;
extern _LIBCPP_FUNC_VIS wostream wclog;
+#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/istream b/libcxx/include/istream
index 17ca68388f52..2f39e35f4646 100644
--- a/libcxx/include/istream
+++ b/libcxx/include/istream
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- istream ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -1628,7 +1628,9 @@ operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Size>& __x)
}
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_istream<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_istream<wchar_t>)
+#endif
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_iostream<char>)
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/iterator b/libcxx/include/iterator
index e0b25200bd9d..4dd9902d79a2 100644
--- a/libcxx/include/iterator
+++ b/libcxx/include/iterator
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- iterator ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -410,6 +410,10 @@ template<input_iterator I>
requires see below
struct iterator_traits<counted_iterator<I>>;
+// [unreachable.sentinel], unreachable sentinel
+struct unreachable_sentinel_t;
+inline constexpr unreachable_sentinel_t unreachable_sentinel{};
+
template <class T, class charT = char, class traits = char_traits<charT>, class Distance = ptrdiff_t>
class istream_iterator
: public iterator<input_iterator_tag, T, Distance, const T*, const T&> // until C++17
@@ -606,6 +610,7 @@ template <class E> constexpr const E* data(initializer_list<E> il) noexcept;
#include <__iterator/reverse_access.h>
#include <__iterator/reverse_iterator.h>
#include <__iterator/size.h>
+#include <__iterator/unreachable_sentinel.h>
#include <__iterator/wrap_iter.h>
#include <__memory/addressof.h>
#include <__memory/pointer_traits.h>
diff --git a/libcxx/include/latch b/libcxx/include/latch
index 10ae5721fbec..e65825991b59 100644
--- a/libcxx/include/latch
+++ b/libcxx/include/latch
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- latch -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/limits b/libcxx/include/limits
index a6d517852493..245c84eea883 100644
--- a/libcxx/include/limits
+++ b/libcxx/include/limits
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- limits ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/limits.h b/libcxx/include/limits.h
index 938f8875bc54..026bcfe0fc42 100644
--- a/libcxx/include/limits.h
+++ b/libcxx/include/limits.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- limits.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/list b/libcxx/include/list
index 23da5fcfc92d..6282983ad20a 100644
--- a/libcxx/include/list
+++ b/libcxx/include/list
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- list ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -330,7 +330,7 @@ public:
__list_iterator(const __list_iterator& __p)
: __ptr_(__p.__ptr_)
{
- __get_db()->__iterator_copy(this, &__p);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__p));
}
_LIBCPP_INLINE_VISIBILITY
@@ -342,9 +342,9 @@ public:
_LIBCPP_INLINE_VISIBILITY
__list_iterator& operator=(const __list_iterator& __p)
{
- if (this != &__p)
+ if (this != _VSTD::addressof(__p))
{
- __get_db()->__iterator_copy(this, &__p);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__p));
__ptr_ = __p.__ptr_;
}
return *this;
@@ -448,7 +448,7 @@ public:
: __ptr_(__p.__ptr_)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->__iterator_copy(this, &__p);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__p));
#endif
}
@@ -458,7 +458,7 @@ public:
__list_const_iterator(const __list_const_iterator& __p)
: __ptr_(__p.__ptr_)
{
- __get_db()->__iterator_copy(this, &__p);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__p));
}
_LIBCPP_INLINE_VISIBILITY
@@ -470,9 +470,9 @@ public:
_LIBCPP_INLINE_VISIBILITY
__list_const_iterator& operator=(const __list_const_iterator& __p)
{
- if (this != &__p)
+ if (this != _VSTD::addressof(__p))
{
- __get_db()->__iterator_copy(this, &__p);
+ __get_db()->__iterator_copy(this, _VSTD::addressof(__p));
__ptr_ = __p.__ptr_;
}
return *this;
@@ -797,7 +797,7 @@ __list_imp<_Tp, _Alloc>::swap(__list_imp& __c)
#if _LIBCPP_DEBUG_LEVEL == 2
__libcpp_db* __db = __get_db();
__c_node* __cn1 = __db->__find_c_and_lock(this);
- __c_node* __cn2 = __db->__find_c(&__c);
+ __c_node* __cn2 = __db->__find_c(_VSTD::addressof(__c));
_VSTD::swap(__cn1->beg_, __cn2->beg_);
_VSTD::swap(__cn1->end_, __cn2->end_);
_VSTD::swap(__cn1->cap_, __cn2->cap_);
@@ -845,24 +845,24 @@ class _LIBCPP_TEMPLATE_VIS list
typedef typename base::__link_pointer __link_pointer;
public:
- typedef _Tp value_type;
- typedef _Alloc allocator_type;
+ typedef _Tp value_type;
+ typedef _Alloc allocator_type;
static_assert((is_same<value_type, typename allocator_type::value_type>::value),
"Invalid allocator::value_type");
- typedef value_type& reference;
- typedef const value_type& const_reference;
- typedef typename base::pointer pointer;
- typedef typename base::const_pointer const_pointer;
- typedef typename base::size_type size_type;
- typedef typename base::difference_type difference_type;
- typedef typename base::iterator iterator;
- typedef typename base::const_iterator const_iterator;
- typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
- typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef typename base::pointer pointer;
+ typedef typename base::const_pointer const_pointer;
+ typedef typename __allocator_traits<allocator_type>::size_type size_type;
+ typedef typename base::difference_type difference_type;
+ typedef typename base::iterator iterator;
+ typedef typename base::const_iterator const_iterator;
+ typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
+ typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
#if _LIBCPP_STD_VER > 17
- typedef size_type __remove_return_type;
+ typedef size_type __remove_return_type;
#else
- typedef void __remove_return_type;
+ typedef void __remove_return_type;
#endif
_LIBCPP_INLINE_VISIBILITY
@@ -1141,17 +1141,19 @@ private:
void __move_assign(list& __c, false_type);
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Alloc = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
list(_InputIterator, _InputIterator)
-> list<__iter_value_type<_InputIterator>, _Alloc>;
template<class _InputIterator,
class _Alloc,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
list(_InputIterator, _InputIterator, _Alloc)
-> list<__iter_value_type<_InputIterator>, _Alloc>;
@@ -1392,7 +1394,7 @@ inline
list<_Tp, _Alloc>&
list<_Tp, _Alloc>::operator=(const list& __c)
{
- if (this != &__c)
+ if (this != _VSTD::addressof(__c))
{
base::__copy_assign_alloc(__c);
assign(__c.begin(), __c.end());
@@ -1408,7 +1410,7 @@ list<_Tp, _Alloc>::assign(_InpIter __f, _InpIter __l,
{
iterator __i = begin();
iterator __e = end();
- for (; __f != __l && __i != __e; ++__f, ++__i)
+ for (; __f != __l && __i != __e; ++__f, (void) ++__i)
*__i = *__f;
if (__i == __e)
insert(__e, __f, __l);
@@ -1425,7 +1427,7 @@ list<_Tp, _Alloc>::assign(size_type __n, const value_type& __x)
{
iterator __i = begin();
iterator __e = end();
- for (; __n > 0 && __i != __e; --__n, ++__i)
+ for (; __n > 0 && __i != __e; --__n, (void) ++__i)
*__i = __x;
if (__i == __e)
insert(__e, __n, __x);
@@ -1449,7 +1451,7 @@ typename list<_Tp, _Alloc>::iterator
list<_Tp, _Alloc>::insert(const_iterator __p, const value_type& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::insert(iterator, x) called with an iterator not"
" referring to this list");
#endif
@@ -1470,7 +1472,7 @@ typename list<_Tp, _Alloc>::iterator
list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::insert(iterator, n, x) called with an iterator not"
" referring to this list");
iterator __r(__p.__ptr_, this);
@@ -1495,7 +1497,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& _
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
- for (--__n; __n != 0; --__n, ++__e, ++__ds)
+ for (--__n; __n != 0; --__n, (void) ++__e, ++__ds)
{
__hold.reset(__node_alloc_traits::allocate(__na, 1));
__node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), __x);
@@ -1536,7 +1538,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l,
typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::insert(iterator, range) called with an iterator not"
" referring to this list");
iterator __r(__p.__ptr_, this);
@@ -1561,7 +1563,7 @@ list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l,
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
- for (++__f; __f != __l; ++__f, (void) ++__e, (void) ++__ds)
+ for (++__f; __f != __l; ++__f, (void) ++__e, ++__ds)
{
__hold.reset(__node_alloc_traits::allocate(__na, 1));
__node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), *__f);
@@ -1695,7 +1697,7 @@ typename list<_Tp, _Alloc>::iterator
list<_Tp, _Alloc>::emplace(const_iterator __p, _Args&&... __args)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::emplace(iterator, args...) called with an iterator not"
" referring to this list");
#endif
@@ -1718,7 +1720,7 @@ typename list<_Tp, _Alloc>::iterator
list<_Tp, _Alloc>::insert(const_iterator __p, value_type&& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::insert(iterator, x) called with an iterator not"
" referring to this list");
#endif
@@ -1801,7 +1803,7 @@ typename list<_Tp, _Alloc>::iterator
list<_Tp, _Alloc>::erase(const_iterator __p)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::erase(iterator) called with an iterator not"
" referring to this list");
#endif
@@ -1842,10 +1844,10 @@ typename list<_Tp, _Alloc>::iterator
list<_Tp, _Alloc>::erase(const_iterator __f, const_iterator __l)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__f) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__f)) == this,
"list::erase(iterator, iterator) called with an iterator not"
" referring to this list");
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__l) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__l)) == this,
"list::erase(iterator, iterator) called with an iterator not"
" referring to this list");
#endif
@@ -1909,7 +1911,7 @@ list<_Tp, _Alloc>::resize(size_type __n)
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
- for (--__n; __n != 0; --__n, ++__e, ++__ds)
+ for (--__n; __n != 0; --__n, (void) ++__e, ++__ds)
{
__hold.reset(__node_alloc_traits::allocate(__na, 1));
__node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_));
@@ -1967,7 +1969,7 @@ list<_Tp, _Alloc>::resize(size_type __n, const value_type& __x)
try
{
#endif // _LIBCPP_NO_EXCEPTIONS
- for (--__n; __n != 0; --__n, ++__e, ++__ds)
+ for (--__n; __n != 0; --__n, (void) ++__e, ++__ds)
{
__hold.reset(__node_alloc_traits::allocate(__na, 1));
__node_alloc_traits::construct(__na, _VSTD::addressof(__hold->__value_), __x);
@@ -2004,10 +2006,10 @@ template <class _Tp, class _Alloc>
void
list<_Tp, _Alloc>::splice(const_iterator __p, list& __c)
{
- _LIBCPP_ASSERT(this != &__c,
+ _LIBCPP_ASSERT(this != _VSTD::addressof(__c),
"list::splice(iterator, list) called with this == &list");
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::splice(iterator, list) called with an iterator not"
" referring to this list");
#endif
@@ -2020,10 +2022,10 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c)
base::__sz() += __c.__sz();
__c.__sz() = 0;
#if _LIBCPP_DEBUG_LEVEL == 2
- if (&__c != this) {
+ if (_VSTD::addressof(__c) != this) {
__libcpp_db* __db = __get_db();
__c_node* __cn1 = __db->__find_c_and_lock(this);
- __c_node* __cn2 = __db->__find_c(&__c);
+ __c_node* __cn2 = __db->__find_c(_VSTD::addressof(__c));
for (__i_node** __ip = __cn2->end_; __ip != __cn2->beg_;)
{
--__ip;
@@ -2047,13 +2049,13 @@ void
list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::splice(iterator, list, iterator) called with the first iterator"
" not referring to this list");
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__i) == &__c,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__i)) == _VSTD::addressof(__c),
"list::splice(iterator, list, iterator) called with the second iterator"
" not referring to the list argument");
- _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(&__i),
+ _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(_VSTD::addressof(__i)),
"list::splice(iterator, list, iterator) called with the second iterator"
" not dereferenceable");
#endif
@@ -2065,10 +2067,10 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i)
--__c.__sz();
++base::__sz();
#if _LIBCPP_DEBUG_LEVEL == 2
- if (&__c != this) {
+ if (_VSTD::addressof(__c) != this) {
__libcpp_db* __db = __get_db();
__c_node* __cn1 = __db->__find_c_and_lock(this);
- __c_node* __cn2 = __db->__find_c(&__c);
+ __c_node* __cn2 = __db->__find_c(_VSTD::addressof(__c));
for (__i_node** __ip = __cn2->end_; __ip != __cn2->beg_;)
{
--__ip;
@@ -2092,16 +2094,16 @@ void
list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, const_iterator __l)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
"list::splice(iterator, list, iterator, iterator) called with first iterator not"
" referring to this list");
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__f) == &__c,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__f)) == _VSTD::addressof(__c),
"list::splice(iterator, list, iterator, iterator) called with second iterator not"
" referring to the list argument");
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__l) == &__c,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__l)) == _VSTD::addressof(__c),
"list::splice(iterator, list, iterator, iterator) called with third iterator not"
" referring to the list argument");
- if (this == &__c)
+ if (this == _VSTD::addressof(__c))
{
for (const_iterator __i = __f; __i != __l; ++__i)
_LIBCPP_ASSERT(__i != __p,
@@ -2115,7 +2117,7 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, con
__link_pointer __first = __f.__ptr_;
--__l;
__link_pointer __last = __l.__ptr_;
- if (this != &__c)
+ if (this != _VSTD::addressof(__c))
{
size_type __s = _VSTD::distance(__f, __l) + 1;
__c.__sz() -= __s;
@@ -2124,10 +2126,10 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, con
base::__unlink_nodes(__first, __last);
__link_nodes(__p.__ptr_, __first, __last);
#if _LIBCPP_DEBUG_LEVEL == 2
- if (&__c != this) {
+ if (_VSTD::addressof(__c) != this) {
__libcpp_db* __db = __get_db();
__c_node* __cn1 = __db->__find_c_and_lock(this);
- __c_node* __cn2 = __db->__find_c(&__c);
+ __c_node* __cn2 = __db->__find_c(_VSTD::addressof(__c));
for (__i_node** __ip = __cn2->end_; __ip != __cn2->beg_;)
{
--__ip;
@@ -2244,7 +2246,7 @@ list<_Tp, _Alloc>::merge(list& __c, _Comp __comp)
{
size_type __ds = 1;
iterator __m2 = _VSTD::next(__f2);
- for (; __m2 != __e2 && __comp(*__m2, *__f1); ++__m2, ++__ds)
+ for (; __m2 != __e2 && __comp(*__m2, *__f1); ++__m2, (void) ++__ds)
;
base::__sz() += __ds;
__c.__sz() -= __ds;
@@ -2263,7 +2265,7 @@ list<_Tp, _Alloc>::merge(list& __c, _Comp __comp)
#if _LIBCPP_DEBUG_LEVEL == 2
__libcpp_db* __db = __get_db();
__c_node* __cn1 = __db->__find_c_and_lock(this);
- __c_node* __cn2 = __db->__find_c(&__c);
+ __c_node* __cn2 = __db->__find_c(_VSTD::addressof(__c));
for (__i_node** __p = __cn2->end_; __p != __cn2->beg_;)
{
--__p;
diff --git a/libcxx/include/locale b/libcxx/include/locale
index 8e584005da08..86af26c3e35e 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- locale ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -230,7 +230,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
#if defined(__APPLE__) || defined(__FreeBSD__)
# define _LIBCPP_GET_C_LOCALE 0
-#elif defined(__CloudABI__) || defined(__NetBSD__)
+#elif defined(__NetBSD__)
# define _LIBCPP_GET_C_LOCALE LC_C_LOCALE
#else
# define _LIBCPP_GET_C_LOCALE __cloc()
@@ -573,7 +573,9 @@ __num_get<_CharT>::__stage2_float_loop(_CharT __ct, bool& __in_units, char& __ex
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get<wchar_t>)
+#endif
template <class _CharT, class _InputIterator = istreambuf_iterator<_CharT> >
class _LIBCPP_TEMPLATE_VIS num_get
@@ -1111,7 +1113,9 @@ num_get<_CharT, _InputIterator>::do_get(iter_type __b, iter_type __e,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_get<wchar_t>)
+#endif
struct _LIBCPP_TYPE_VIS __num_put_base
{
@@ -1261,7 +1265,9 @@ __num_put<_CharT>::__widen_and_group_float(char* __nb, char* __np, char* __ne,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_put<wchar_t>)
+#endif
template <class _CharT, class _OutputIterator = ostreambuf_iterator<_CharT> >
class _LIBCPP_TEMPLATE_VIS num_put
@@ -1354,6 +1360,18 @@ protected:
long double __v) const;
virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl,
const void* __v) const;
+
+ template <class _Integral>
+ _LIBCPP_HIDE_FROM_ABI inline
+ _OutputIterator __do_put_integral(iter_type __s, ios_base& __iob,
+ char_type __fl, _Integral __v,
+ char const* __len) const;
+
+ template <class _Float>
+ _LIBCPP_HIDE_FROM_ABI inline
+ _OutputIterator __do_put_floating_point(iter_type __s, ios_base& __iob,
+ char_type __fl, _Float __v,
+ char const* __len) const;
};
template <class _CharT, class _OutputIterator>
@@ -1450,22 +1468,28 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
}
template <class _CharT, class _OutputIterator>
+template <class _Integral>
+_LIBCPP_HIDE_FROM_ABI inline
_OutputIterator
-num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
- char_type __fl, long __v) const
+num_put<_CharT, _OutputIterator>::__do_put_integral(iter_type __s, ios_base& __iob,
+ char_type __fl, _Integral __v,
+ char const* __len) const
{
// Stage 1 - Get number in narrow char
- char __fmt[6] = {'%', 0};
- const char* __len = "l";
- this->__format_int(__fmt+1, __len, true, __iob.flags());
+ char __fmt[8] = {'%', 0};
+ this->__format_int(__fmt+1, __len, is_signed<_Integral>::value, __iob.flags());
// Worst case is octal, with showbase enabled. Note that octal is always
// printed as an unsigned value.
+ using _Unsigned = typename make_unsigned<_Integral>::type;
_LIBCPP_CONSTEXPR const unsigned __nbuf
- = (numeric_limits<unsigned long>::digits / 3) // 1 char per 3 bits
- + ((numeric_limits<unsigned long>::digits % 3) != 0) // round up
+ = (numeric_limits<_Unsigned>::digits / 3) // 1 char per 3 bits
+ + ((numeric_limits<_Unsigned>::digits % 3) != 0) // round up
+ 2; // base prefix + terminating null character
char __nar[__nbuf];
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wformat-nonliteral"
int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
+#pragma clang diagnostic pop
char* __ne = __nar + __nc;
char* __np = this->__identify_padding(__nar, __ne, __iob);
// Stage 2 - Widen __nar while adding thousands separators
@@ -1481,30 +1505,17 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
template <class _CharT, class _OutputIterator>
_OutputIterator
num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
+ char_type __fl, long __v) const
+{
+ return this->__do_put_integral(__s, __iob, __fl, __v, "l");
+}
+
+template <class _CharT, class _OutputIterator>
+_OutputIterator
+num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
char_type __fl, long long __v) const
{
- // Stage 1 - Get number in narrow char
- char __fmt[8] = {'%', 0};
- const char* __len = "ll";
- this->__format_int(__fmt+1, __len, true, __iob.flags());
- // Worst case is octal, with showbase enabled. Note that octal is always
- // printed as an unsigned value.
- _LIBCPP_CONSTEXPR const unsigned __nbuf
- = (numeric_limits<unsigned long long>::digits / 3) // 1 char per 3 bits
- + ((numeric_limits<unsigned long long>::digits % 3) != 0) // round up
- + 2; // base prefix + terminating null character
- char __nar[__nbuf];
- int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
- char* __ne = __nar + __nc;
- char* __np = this->__identify_padding(__nar, __ne, __iob);
- // Stage 2 - Widen __nar while adding thousands separators
- char_type __o[2*(__nbuf-1) - 1];
- char_type* __op; // pad here
- char_type* __oe; // end of output
- this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc());
- // [__o, __oe) contains thousands_sep'd wide number
- // Stage 3 & 4
- return __pad_and_output(__s, __o, __op, __oe, __iob, __fl);
+ return this->__do_put_integral(__s, __iob, __fl, __v, "ll");
}
template <class _CharT, class _OutputIterator>
@@ -1512,27 +1523,7 @@ _OutputIterator
num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
char_type __fl, unsigned long __v) const
{
- // Stage 1 - Get number in narrow char
- char __fmt[6] = {'%', 0};
- const char* __len = "l";
- this->__format_int(__fmt+1, __len, false, __iob.flags());
- // Worst case is octal, with showbase enabled.
- _LIBCPP_CONSTEXPR const unsigned __nbuf
- = (numeric_limits<unsigned long>::digits / 3) // 1 char per 3 bits
- + ((numeric_limits<unsigned long>::digits % 3) != 0) // round up
- + 2; // base prefix + terminating null character
- char __nar[__nbuf];
- int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
- char* __ne = __nar + __nc;
- char* __np = this->__identify_padding(__nar, __ne, __iob);
- // Stage 2 - Widen __nar while adding thousands separators
- char_type __o[2*(__nbuf-1) - 1];
- char_type* __op; // pad here
- char_type* __oe; // end of output
- this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc());
- // [__o, __oe) contains thousands_sep'd wide number
- // Stage 3 & 4
- return __pad_and_output(__s, __o, __op, __oe, __iob, __fl);
+ return this->__do_put_integral(__s, __iob, __fl, __v, "l");
}
template <class _CharT, class _OutputIterator>
@@ -1540,42 +1531,26 @@ _OutputIterator
num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
char_type __fl, unsigned long long __v) const
{
- // Stage 1 - Get number in narrow char
- char __fmt[8] = {'%', 0};
- const char* __len = "ll";
- this->__format_int(__fmt+1, __len, false, __iob.flags());
- // Worst case is octal, with showbase enabled.
- _LIBCPP_CONSTEXPR const unsigned __nbuf
- = (numeric_limits<unsigned long long>::digits / 3) // 1 char per 3 bits
- + ((numeric_limits<unsigned long long>::digits % 3) != 0) // round up
- + 2; // base prefix + terminating null character
- char __nar[__nbuf];
- int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
- char* __ne = __nar + __nc;
- char* __np = this->__identify_padding(__nar, __ne, __iob);
- // Stage 2 - Widen __nar while adding thousands separators
- char_type __o[2*(__nbuf-1) - 1];
- char_type* __op; // pad here
- char_type* __oe; // end of output
- this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc());
- // [__o, __oe) contains thousands_sep'd wide number
- // Stage 3 & 4
- return __pad_and_output(__s, __o, __op, __oe, __iob, __fl);
+ return this->__do_put_integral(__s, __iob, __fl, __v, "ll");
}
template <class _CharT, class _OutputIterator>
+template <class _Float>
+_LIBCPP_HIDE_FROM_ABI inline
_OutputIterator
-num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
- char_type __fl, double __v) const
+num_put<_CharT, _OutputIterator>::__do_put_floating_point(iter_type __s, ios_base& __iob,
+ char_type __fl, _Float __v,
+ char const* __len) const
{
// Stage 1 - Get number in narrow char
char __fmt[8] = {'%', 0};
- const char* __len = "";
bool __specify_precision = this->__format_float(__fmt+1, __len, __iob.flags());
const unsigned __nbuf = 30;
char __nar[__nbuf];
char* __nb = __nar;
int __nc;
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wformat-nonliteral"
if (__specify_precision)
__nc = __libcpp_snprintf_l(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt,
(int)__iob.precision(), __v);
@@ -1592,6 +1567,7 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
__throw_bad_alloc();
__nbh.reset(__nb);
}
+#pragma clang diagnostic pop
char* __ne = __nb + __nc;
char* __np = this->__identify_padding(__nb, __ne, __iob);
// Stage 2 - Widen __nar while adding thousands separators
@@ -1617,52 +1593,17 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
template <class _CharT, class _OutputIterator>
_OutputIterator
num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
+ char_type __fl, double __v) const
+{
+ return this->__do_put_floating_point(__s, __iob, __fl, __v, "");
+}
+
+template <class _CharT, class _OutputIterator>
+_OutputIterator
+num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
char_type __fl, long double __v) const
{
- // Stage 1 - Get number in narrow char
- char __fmt[8] = {'%', 0};
- const char* __len = "L";
- bool __specify_precision = this->__format_float(__fmt+1, __len, __iob.flags());
- const unsigned __nbuf = 30;
- char __nar[__nbuf];
- char* __nb = __nar;
- int __nc;
- if (__specify_precision)
- __nc = __libcpp_snprintf_l(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt,
- (int)__iob.precision(), __v);
- else
- __nc = __libcpp_snprintf_l(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt, __v);
- unique_ptr<char, void(*)(void*)> __nbh(nullptr, free);
- if (__nc > static_cast<int>(__nbuf-1))
- {
- if (__specify_precision)
- __nc = __libcpp_asprintf_l(&__nb, _LIBCPP_GET_C_LOCALE, __fmt, (int)__iob.precision(), __v);
- else
- __nc = __libcpp_asprintf_l(&__nb, _LIBCPP_GET_C_LOCALE, __fmt, __v);
- if (__nc == -1)
- __throw_bad_alloc();
- __nbh.reset(__nb);
- }
- char* __ne = __nb + __nc;
- char* __np = this->__identify_padding(__nb, __ne, __iob);
- // Stage 2 - Widen __nar while adding thousands separators
- char_type __o[2*(__nbuf-1) - 1];
- char_type* __ob = __o;
- unique_ptr<char_type, void(*)(void*)> __obh(0, free);
- if (__nb != __nar)
- {
- __ob = (char_type*)malloc(2*static_cast<size_t>(__nc)*sizeof(char_type));
- if (__ob == 0)
- __throw_bad_alloc();
- __obh.reset(__ob);
- }
- char_type* __op; // pad here
- char_type* __oe; // end of output
- this->__widen_and_group_float(__nb, __np, __ne, __ob, __op, __oe, __iob.getloc());
- // [__o, __oe) contains thousands_sep'd wide number
- // Stage 3 & 4
- __s = __pad_and_output(__s, __ob, __op, __oe, __iob, __fl);
- return __s;
+ return this->__do_put_floating_point(__s, __iob, __fl, __v, "L");
}
template <class _CharT, class _OutputIterator>
@@ -1671,10 +1612,9 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
char_type __fl, const void* __v) const
{
// Stage 1 - Get pointer in narrow char
- char __fmt[6] = "%p";
const unsigned __nbuf = 20;
char __nar[__nbuf];
- int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v);
+ int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, "%p", __v);
char* __ne = __nar + __nc;
char* __np = this->__identify_padding(__nar, __ne, __iob);
// Stage 2 - Widen __nar
@@ -1694,7 +1634,9 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS num_put<wchar_t>)
+#endif
template <class _CharT, class _InputIterator>
_LIBCPP_HIDDEN
@@ -1761,6 +1703,7 @@ template <> _LIBCPP_FUNC_VIS const string& __time_get_c_storage<char>::__r() con
template <> _LIBCPP_FUNC_VIS const string& __time_get_c_storage<char>::__x() const;
template <> _LIBCPP_FUNC_VIS const string& __time_get_c_storage<char>::__X() const;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <> _LIBCPP_FUNC_VIS const wstring* __time_get_c_storage<wchar_t>::__weeks() const;
template <> _LIBCPP_FUNC_VIS const wstring* __time_get_c_storage<wchar_t>::__months() const;
template <> _LIBCPP_FUNC_VIS const wstring* __time_get_c_storage<wchar_t>::__am_pm() const;
@@ -1768,6 +1711,7 @@ template <> _LIBCPP_FUNC_VIS const wstring& __time_get_c_storage<wchar_t>::__c()
template <> _LIBCPP_FUNC_VIS const wstring& __time_get_c_storage<wchar_t>::__r() const;
template <> _LIBCPP_FUNC_VIS const wstring& __time_get_c_storage<wchar_t>::__x() const;
template <> _LIBCPP_FUNC_VIS const wstring& __time_get_c_storage<wchar_t>::__X() const;
+#endif
template <class _CharT, class _InputIterator = istreambuf_iterator<_CharT> >
class _LIBCPP_TEMPLATE_VIS time_get
@@ -2380,7 +2324,9 @@ time_get<_CharT, _InputIterator>::do_get(iter_type __b, iter_type __e,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get<wchar_t>)
+#endif
class _LIBCPP_TYPE_VIS __time_get
{
@@ -2480,7 +2426,9 @@ private:
};
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_get_byname<wchar_t>)
+#endif
class _LIBCPP_TYPE_VIS __time_put
{
@@ -2593,7 +2541,9 @@ time_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base&,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put<wchar_t>)
+#endif
template <class _CharT, class _OutputIterator = ostreambuf_iterator<_CharT> >
class _LIBCPP_TEMPLATE_VIS time_put_byname
@@ -2614,7 +2564,9 @@ protected:
};
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS time_put_byname<wchar_t>)
+#endif
// money_base
@@ -2682,8 +2634,10 @@ moneypunct<_CharT, _International>::intl;
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct<char, false>)
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct<char, true>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct<wchar_t, false>)
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct<wchar_t, true>)
+#endif
// moneypunct_byname
@@ -2734,13 +2688,15 @@ private:
template<> _LIBCPP_FUNC_VIS void moneypunct_byname<char, false>::init(const char*);
template<> _LIBCPP_FUNC_VIS void moneypunct_byname<char, true>::init(const char*);
-template<> _LIBCPP_FUNC_VIS void moneypunct_byname<wchar_t, false>::init(const char*);
-template<> _LIBCPP_FUNC_VIS void moneypunct_byname<wchar_t, true>::init(const char*);
-
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname<char, false>)
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname<char, true>)
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template<> _LIBCPP_FUNC_VIS void moneypunct_byname<wchar_t, false>::init(const char*);
+template<> _LIBCPP_FUNC_VIS void moneypunct_byname<wchar_t, true>::init(const char*);
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname<wchar_t, false>)
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS moneypunct_byname<wchar_t, true>)
+#endif
// money_get
@@ -2797,7 +2753,9 @@ __money_get<_CharT>::__gather_info(bool __intl, const locale& __loc,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_get<wchar_t>)
+#endif
template <class _CharT, class _InputIterator = istreambuf_iterator<_CharT> >
class _LIBCPP_TEMPLATE_VIS money_get
@@ -2891,6 +2849,10 @@ money_get<_CharT, _InputIterator>::__do_get(iter_type& __b, iter_type __e,
unique_ptr<char_type, void(*)(void*)>& __wb,
char_type*& __wn, char_type* __we)
{
+ if (__b == __e) {
+ __err |= ios_base::failbit;
+ return false;
+ }
const unsigned __bz = 100;
unsigned __gbuf[__bz];
unique_ptr<unsigned, void(*)(void*)> __gb(__gbuf, __do_nothing);
@@ -2935,51 +2897,31 @@ money_get<_CharT, _InputIterator>::__do_get(iter_type& __b, iter_type __e,
}
break;
case money_base::sign:
- if (__psn.size() + __nsn.size() > 0)
+ if (__psn.size() > 0 && *__b == __psn[0])
{
- if (__psn.size() == 0 || __nsn.size() == 0)
- { // sign is optional
- if (__psn.size() > 0)
- { // __nsn.size() == 0
- if (*__b == __psn[0])
- {
- ++__b;
- if (__psn.size() > 1)
- __trailing_sign = &__psn;
- }
- else
- __neg = true;
- }
- else if (*__b == __nsn[0]) // __nsn.size() > 0 && __psn.size() == 0
- {
- ++__b;
- __neg = true;
- if (__nsn.size() > 1)
- __trailing_sign = &__nsn;
- }
- }
- else // sign is required
- {
- if (*__b == __psn[0])
- {
- ++__b;
- if (__psn.size() > 1)
- __trailing_sign = &__psn;
- }
- else if (*__b == __nsn[0])
- {
- ++__b;
- __neg = true;
- if (__nsn.size() > 1)
- __trailing_sign = &__nsn;
- }
- else
- {
- __err |= ios_base::failbit;
- return false;
- }
- }
+ ++__b;
+ __neg = false;
+ if (__psn.size() > 1)
+ __trailing_sign = &__psn;
+ break;
+ }
+ if (__nsn.size() > 0 && *__b == __nsn[0])
+ {
+ ++__b;
+ __neg = true;
+ if (__nsn.size() > 1)
+ __trailing_sign = &__nsn;
+ break;
}
+ if (__psn.size() > 0 && __nsn.size() > 0)
+ { // sign is required
+ __err |= ios_base::failbit;
+ return false;
+ }
+ if (__psn.size() == 0 && __nsn.size() == 0)
+ // locale has no way of specifying a sign. Use the initial value of __neg as a default
+ break;
+ __neg = (__nsn.size() == 0);
break;
case money_base::symbol:
{
@@ -3180,7 +3122,9 @@ money_get<_CharT, _InputIterator>::do_get(iter_type __b, iter_type __e,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_get<wchar_t>)
+#endif
// money_put
@@ -3355,7 +3299,9 @@ __money_put<_CharT>::__format(char_type* __mb, char_type*& __mi, char_type*& __m
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __money_put<wchar_t>)
+#endif
template <class _CharT, class _OutputIterator = ostreambuf_iterator<_CharT> >
class _LIBCPP_TEMPLATE_VIS money_put
@@ -3508,7 +3454,9 @@ money_put<_CharT, _OutputIterator>::do_put(iter_type __s, bool __intl,
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS money_put<wchar_t>)
+#endif
// messages
@@ -3624,7 +3572,9 @@ messages<_CharT>::do_close(catalog __c) const
}
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages<wchar_t>)
+#endif
template <class _CharT>
class _LIBCPP_TEMPLATE_VIS messages_byname
@@ -3648,7 +3598,9 @@ protected:
};
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname<wchar_t>)
+#endif
template<class _Codecvt, class _Elem = wchar_t,
class _Wide_alloc = allocator<_Elem>,
diff --git a/libcxx/include/locale.h b/libcxx/include/locale.h
index 65b2bd5fd724..215e46d9ccd7 100644
--- a/libcxx/include/locale.h
+++ b/libcxx/include/locale.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- locale.h --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -36,7 +36,7 @@ Functions:
#include <__config>
#if defined(_LIBCPP_HAS_NO_LOCALIZATION)
-# error "Localization is not supported by this configuration of libc++"
+# error "The Localization library is not supported since libc++ has been configured with LIBCXX_ENABLE_LOCALIZATION disabled"
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/map b/libcxx/include/map
index 513a04dd7923..7654a8fc2847 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------------- map ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -223,6 +223,25 @@ public:
pair<const_iterator,const_iterator> equal_range(const K& x) const; // C++14
};
+template <class InputIterator,
+ class Compare = less<iter_key_t<InputIterator>>,
+ class Allocator = allocator<iter_to_alloc_t<InputIterator>>>
+map(InputIterator, InputIterator, Compare = Compare(), Allocator = Allocator())
+ -> map<iter_key_t<InputIterator>, iter_val_t<InputIterator>, Compare, Allocator>; // C++17
+
+template<class Key, class T, class Compare = less<Key>,
+ class Allocator = allocator<pair<const Key, T>>>
+map(initializer_list<pair<const Key, T>>, Compare = Compare(), Allocator = Allocator())
+ -> map<Key, T, Compare, Allocator>; // C++17
+
+template <class InputIterator, class Allocator>
+map(InputIterator, InputIterator, Allocator)
+ -> map<iter_key_t<InputIterator>, iter_val_t<InputIterator>, less<iter_key_t<InputIterator>>,
+ Allocator>; // C++17
+
+template<class Key, class T, class Allocator>
+map(initializer_list<pair<const Key, T>>, Allocator) -> map<Key, T, less<Key>, Allocator>; // C++17
+
template <class Key, class T, class Compare, class Allocator>
bool
operator==(const map<Key, T, Compare, Allocator>& x,
@@ -444,6 +463,26 @@ public:
pair<const_iterator,const_iterator> equal_range(const K& x) const; // C++14
};
+template <class InputIterator,
+ class Compare = less<iter_key_t<InputIterator>>,
+ class Allocator = allocator<iter_to_alloc_t<InputIterator>>>
+multimap(InputIterator, InputIterator, Compare = Compare(), Allocator = Allocator())
+ -> multimap<iter_key_t<InputIterator>, iter_val_t<InputIterator>, Compare, Allocator>; // C++17
+
+template<class Key, class T, class Compare = less<Key>,
+ class Allocator = allocator<pair<const Key, T>>>
+multimap(initializer_list<pair<const Key, T>>, Compare = Compare(), Allocator = Allocator())
+ -> multimap<Key, T, Compare, Allocator>; // C++17
+
+template <class InputIterator, class Allocator>
+multimap(InputIterator, InputIterator, Allocator)
+ -> multimap<iter_key_t<InputIterator>, iter_val_t<InputIterator>,
+ less<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class Key, class T, class Allocator>
+multimap(initializer_list<pair<const Key, T>>, Allocator)
+ -> multimap<Key, T, less<Key>, Allocator>; // C++17
+
template <class Key, class T, class Compare, class Allocator>
bool
operator==(const multimap<Key, T, Compare, Allocator>& x,
@@ -492,6 +531,7 @@ erase_if(multimap<Key, T, Compare, Allocator>& c, Predicate pred); // C++20
#include <__config>
#include <__debug>
#include <__functional/is_transparent.h>
+#include <__iterator/iterator_traits.h>
#include <__node_handle>
#include <__tree>
#include <__utility/forward.h>
@@ -535,7 +575,7 @@ public:
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _Key& __x, const _CP& __y) const
{return static_cast<const _Compare&>(*this)(__x, __y.__get_value().first);}
- void swap(__map_value_compare&__y)
+ void swap(__map_value_compare& __y)
_NOEXCEPT_(__is_nothrow_swappable<_Compare>::value)
{
using _VSTD::swap;
@@ -545,15 +585,13 @@ public:
#if _LIBCPP_STD_VER > 11
template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
- typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type
- operator () ( const _K2& __x, const _CP& __y ) const
- {return static_cast<const _Compare&>(*this) (__x, __y.__get_value().first);}
+ bool operator()(const _K2& __x, const _CP& __y) const
+ {return static_cast<const _Compare&>(*this)(__x, __y.__get_value().first);}
template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
- typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type
- operator () (const _CP& __x, const _K2& __y) const
- {return static_cast<const _Compare&>(*this) (__x.__get_value().first, __y);}
+ bool operator()(const _CP& __x, const _K2& __y) const
+ {return static_cast<const _Compare&>(*this)(__x.__get_value().first, __y);}
#endif
};
@@ -583,7 +621,7 @@ public:
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _Key& __x, const _CP& __y) const
{return comp(__x, __y.__get_value().first);}
- void swap(__map_value_compare&__y)
+ void swap(__map_value_compare& __y)
_NOEXCEPT_(__is_nothrow_swappable<_Compare>::value)
{
using _VSTD::swap;
@@ -593,15 +631,13 @@ public:
#if _LIBCPP_STD_VER > 11
template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
- typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type
- operator () ( const _K2& __x, const _CP& __y ) const
- {return comp (__x, __y.__get_value().first);}
+ bool operator()(const _K2& __x, const _CP& __y) const
+ {return comp(__x, __y.__get_value().first);}
template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
- typename enable_if<__is_transparent<_Compare, _K2>::value, bool>::type
- operator () (const _CP& __x, const _K2& __y) const
- {return comp (__x.__get_value().first, __y);}
+ bool operator()(const _CP& __x, const _K2& __y) const
+ {return comp(__x.__get_value().first, __y);}
#endif
};
@@ -747,10 +783,10 @@ public:
}
private:
- __value_type() _LIBCPP_EQUAL_DELETE;
- ~__value_type() _LIBCPP_EQUAL_DELETE;
- __value_type(const __value_type& __v) _LIBCPP_EQUAL_DELETE;
- __value_type(__value_type&& __v) _LIBCPP_EQUAL_DELETE;
+ __value_type() = delete;
+ ~__value_type() = delete;
+ __value_type(const __value_type&) = delete;
+ __value_type(__value_type&&) = delete;
};
#else
@@ -1040,7 +1076,7 @@ public:
#ifndef _LIBCPP_CXX03_LANG
__tree_ = __m.__tree_;
#else
- if (this != &__m) {
+ if (this != _VSTD::addressof(__m)) {
__tree_.clear();
__tree_.value_comp() = __m.__tree_.value_comp();
__tree_.__copy_assign_alloc(__m.__tree_);
@@ -1502,29 +1538,31 @@ private:
#endif
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>,
class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
map(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
-> map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare, _Allocator>;
template<class _Key, class _Tp, class _Compare = less<remove_const_t<_Key>>,
class _Allocator = allocator<pair<const _Key, _Tp>>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
map(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare(), _Allocator = _Allocator())
-> map<remove_const_t<_Key>, _Tp, _Compare, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
map(_InputIterator, _InputIterator, _Allocator)
-> map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
less<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _Key, class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
map(initializer_list<pair<_Key, _Tp>>, _Allocator)
-> map<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
#endif
@@ -1824,7 +1862,7 @@ public:
#ifndef _LIBCPP_CXX03_LANG
__tree_ = __m.__tree_;
#else
- if (this != &__m) {
+ if (this != _VSTD::addressof(__m)) {
__tree_.clear();
__tree_.value_comp() = __m.__tree_.value_comp();
__tree_.__copy_assign_alloc(__m.__tree_);
@@ -2175,29 +2213,31 @@ private:
typedef unique_ptr<__node, _Dp> __node_holder;
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>,
class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
multimap(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
-> multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare, _Allocator>;
template<class _Key, class _Tp, class _Compare = less<remove_const_t<_Key>>,
class _Allocator = allocator<pair<const _Key, _Tp>>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
multimap(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare(), _Allocator = _Allocator())
-> multimap<remove_const_t<_Key>, _Tp, _Compare, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
multimap(_InputIterator, _InputIterator, _Allocator)
-> multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
less<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _Key, class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
-> multimap<remove_const_t<_Key>, _Tp, less<remove_const_t<_Key>>, _Allocator>;
#endif
diff --git a/libcxx/include/math.h b/libcxx/include/math.h
index 77762d554512..850cdcfb32f6 100644
--- a/libcxx/include/math.h
+++ b/libcxx/include/math.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- math.h ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -831,7 +831,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double atan2(long double __lcpp_y, long do
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -926,7 +926,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double fmod(long double __lcpp_x, long dou
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1004,7 +1004,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double pow(long double __lcpp_x, long doub
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1158,7 +1158,7 @@ template <class _A1, class _A2>
_LIBCPP_CONSTEXPR
#endif
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1185,7 +1185,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double copysign(long double __lcpp_x, long
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1242,7 +1242,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double fdim(long double __lcpp_x, long dou
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1277,7 +1277,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double fma(long double __lcpp_x, long doub
template <class _A1, class _A2, class _A3>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value &&
@@ -1304,7 +1304,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double fmax(long double __lcpp_x, long dou
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1325,7 +1325,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double fmin(long double __lcpp_x, long dou
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1346,7 +1346,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double hypot(long double __lcpp_x, long do
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1553,7 +1553,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double nextafter(long double __lcpp_x, lon
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1584,7 +1584,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double remainder(long double __lcpp_x, lon
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
@@ -1605,7 +1605,7 @@ inline _LIBCPP_INLINE_VISIBILITY long double remquo(long double __lcpp_x, long d
template <class _A1, class _A2>
inline _LIBCPP_INLINE_VISIBILITY
-typename std::_EnableIf
+typename std::__enable_if_t
<
std::is_arithmetic<_A1>::value &&
std::is_arithmetic<_A2>::value,
diff --git a/libcxx/include/memory b/libcxx/include/memory
index fdb894353008..f12f3c70eadd 100644
--- a/libcxx/include/memory
+++ b/libcxx/include/memory
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- memory ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -405,7 +405,8 @@ template<class T>
class shared_ptr
{
public:
- typedef T element_type;
+ typedef T element_type; // until C++17
+ typedef remove_extent_t<T> element_type; // since C++17
typedef weak_ptr<T> weak_type; // C++17
// constructors:
@@ -525,7 +526,8 @@ template<class T>
class weak_ptr
{
public:
- typedef T element_type;
+ typedef T element_type; // until C++17
+ typedef remove_extent_t<T> element_type; // since C++17
// constructors
constexpr weak_ptr() noexcept;
@@ -653,14 +655,6 @@ template <class T> struct hash<shared_ptr<T> >;
template <class T, class Alloc>
inline constexpr bool uses_allocator_v = uses_allocator<T, Alloc>::value;
-// Pointer safety
-enum class pointer_safety { relaxed, preferred, strict }; // since C++11
-void declare_reachable(void *p); // since C++11
-template <class T> T *undeclare_reachable(T *p); // since C++11
-void declare_no_pointers(char *p, size_t n); // since C++11
-void undeclare_no_pointers(char *p, size_t n); // since C++11
-pointer_safety get_pointer_safety() noexcept; // since C++11
-
void* align(size_t alignment, size_t size, void*& ptr, size_t& space);
} // std
@@ -676,7 +670,6 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space);
#include <__memory/allocator_traits.h>
#include <__memory/compressed_pair.h>
#include <__memory/construct_at.h>
-#include <__memory/pointer_safety.h>
#include <__memory/pointer_traits.h>
#include <__memory/raw_storage_iterator.h>
#include <__memory/shared_ptr.h>
@@ -706,10 +699,6 @@ void* align(size_t alignment, size_t size, void*& ptr, size_t& space);
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-
_LIBCPP_BEGIN_NAMESPACE_STD
template <class _Alloc, class _Ptr>
@@ -960,8 +949,6 @@ struct __builtin_new_allocator {
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#if defined(_LIBCPP_HAS_PARALLEL_ALGORITHMS) && _LIBCPP_STD_VER >= 17
# include <__pstl_memory>
#endif
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index b5781b027f96..f34442ed5c9a 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -335,6 +335,10 @@ module std [system] {
module bit {
header "bit"
export *
+
+ module __bit {
+ module bit_cast { private header "__bit/bit_cast.h" }
+ }
}
module bitset {
header "bitset"
@@ -346,6 +350,13 @@ module std [system] {
module charconv {
header "charconv"
export *
+
+ module __charconv {
+ module chars_format { private header "__charconv/chars_format.h" }
+ module from_chars_result { private header "__charconv/from_chars_result.h" }
+ module to_chars_result { private header "__charconv/to_chars_result.h" }
+ }
+
}
module chrono {
header "chrono"
@@ -358,6 +369,16 @@ module std [system] {
module compare {
header "compare"
export *
+
+ module __compare {
+ module common_comparison_category { private header "__compare/common_comparison_category.h" }
+ module compare_three_way { private header "__compare/compare_three_way.h" }
+ module compare_three_way_result { private header "__compare/compare_three_way_result.h" }
+ module is_eq { private header "__compare/is_eq.h" }
+ module ordering { private header "__compare/ordering.h" }
+ module synth_three_way { private header "__compare/synth_three_way.h" }
+ module three_way_comparable { private header "__compare/three_way_comparable.h" }
+ }
}
module complex {
header "complex"
@@ -366,11 +387,49 @@ module std [system] {
module concepts {
header "concepts"
export *
+
+ module __concepts {
+ module arithmetic { private header "__concepts/arithmetic.h" }
+ module assignable { private header "__concepts/assignable.h" }
+ module boolean_testable { private header "__concepts/boolean_testable.h" }
+ module class_or_enum { private header "__concepts/class_or_enum.h" }
+ module common_reference_with { private header "__concepts/common_reference_with.h" }
+ module common_with { private header "__concepts/common_with.h" }
+ module constructible { private header "__concepts/constructible.h" }
+ module convertible_to { private header "__concepts/convertible_to.h" }
+ module copyable { private header "__concepts/copyable.h" }
+ module derived_from { private header "__concepts/derived_from.h" }
+ module destructible { private header "__concepts/destructible.h" }
+ module different_from { private header "__concepts/different_from.h" }
+ module equality_comparable { private header "__concepts/equality_comparable.h" }
+ module invocable { private header "__concepts/invocable.h" }
+ module movable { private header "__concepts/movable.h" }
+ module predicate { private header "__concepts/predicate.h" }
+ module regular { private header "__concepts/regular.h" }
+ module relation { private header "__concepts/relation.h" }
+ module same_as { private header "__concepts/same_as.h" }
+ module semiregular { private header "__concepts/semiregular.h" }
+ module swappable { private header "__concepts/swappable.h" }
+ module totally_ordered { private header "__concepts/totally_ordered.h" }
+ }
}
module condition_variable {
header "condition_variable"
export *
}
+ module coroutine {
+ requires coroutines
+ header "coroutine"
+ export compare
+ export *
+
+ module __coroutine {
+ module coroutine_handle { private header "__coroutine/coroutine_handle.h" }
+ module coroutine_traits { private header "__coroutine/coroutine_traits.h" }
+ module trivial_awaitables { private header "__coroutine/trivial_awaitables.h" }
+ module noop_coroutine_handle { private header "__coroutine/noop_coroutine_handle.h" }
+ }
+ }
module deque {
header "deque"
export initializer_list
@@ -393,8 +452,25 @@ module std [system] {
export *
module __format {
- module format_error { private header "__format/format_error.h" }
- module format_parse_context { private header "__format/format_parse_context.h" }
+ module format_arg { private header "__format/format_arg.h" }
+ module format_args { private header "__format/format_args.h" }
+ module format_context {
+ private header "__format/format_context.h"
+ export optional
+ export locale
+ }
+ module format_error { private header "__format/format_error.h" }
+ module format_fwd { private header "__format/format_fwd.h" }
+ module format_parse_context { private header "__format/format_parse_context.h" }
+ module format_string { private header "__format/format_string.h" }
+ module format_to_n_result { private header "__format/format_to_n_result.h" }
+ module formatter { private header "__format/formatter.h" }
+ module formatter_bool { private header "__format/formatter_bool.h" }
+ module formatter_char { private header "__format/formatter_char.h" }
+ module formatter_integer { private header "__format/formatter_integer.h" }
+ module formatter_integral { private header "__format/formatter_integral.h" }
+ module formatter_string { private header "__format/formatter_string.h" }
+ module parser_std_format_spec { private header "__format/parser_std_format_spec.h" }
}
}
module forward_list {
@@ -414,15 +490,17 @@ module std [system] {
module binary_function { private header "__functional/binary_function.h" }
module binary_negate { private header "__functional/binary_negate.h" }
module bind { private header "__functional/bind.h" }
+ module bind_back { private header "__functional/bind_back.h" }
module bind_front { private header "__functional/bind_front.h" }
module binder1st { private header "__functional/binder1st.h" }
module binder2nd { private header "__functional/binder2nd.h" }
+ module compose { private header "__functional/compose.h" }
module default_searcher { private header "__functional/default_searcher.h" }
module function { private header "__functional/function.h" }
module hash { private header "__functional/hash.h" }
module identity { private header "__functional/identity.h" }
- module is_transparent { private header "__functional/is_transparent.h" }
module invoke { private header "__functional/invoke.h" }
+ module is_transparent { private header "__functional/is_transparent.h" }
module mem_fn { private header "__functional/mem_fn.h" }
module mem_fun_ref { private header "__functional/mem_fun_ref.h" }
module not_fn { private header "__functional/not_fn.h" }
@@ -516,6 +594,7 @@ module std [system] {
module reverse_access { private header "__iterator/reverse_access.h" }
module reverse_iterator { private header "__iterator/reverse_iterator.h" }
module size { private header "__iterator/size.h" }
+ module unreachable_sentinel { private header "__iterator/unreachable_sentinel.h" }
module wrap_iter { private header "__iterator/wrap_iter.h" }
}
}
@@ -555,7 +634,6 @@ module std [system] {
module auto_ptr { private header "__memory/auto_ptr.h" }
module compressed_pair { private header "__memory/compressed_pair.h" }
module construct_at { private header "__memory/construct_at.h" }
- module pointer_safety { private header "__memory/pointer_safety.h" }
module pointer_traits { private header "__memory/pointer_traits.h" }
module raw_storage_iterator { private header "__memory/raw_storage_iterator.h" }
module shared_ptr { private header "__memory/shared_ptr.h" }
@@ -613,10 +691,15 @@ module std [system] {
module __ranges {
module access { private header "__ranges/access.h" }
- module all { private header "__ranges/all.h" }
+ module all {
+ private header "__ranges/all.h"
+ export functional.__functional.compose
+ export functional.__functional.perfect_forward
+ }
module common_view { private header "__ranges/common_view.h" }
module concepts { private header "__ranges/concepts.h" }
module copyable_box { private header "__ranges/copyable_box.h" }
+ module counted { private header "__ranges/counted.h" }
module dangling { private header "__ranges/dangling.h" }
module data { private header "__ranges/data.h" }
module drop_view { private header "__ranges/drop_view.h" }
@@ -624,11 +707,21 @@ module std [system] {
module empty_view { private header "__ranges/empty_view.h" }
module enable_borrowed_range { private header "__ranges/enable_borrowed_range.h" }
module enable_view { private header "__ranges/enable_view.h" }
+ module iota_view { private header "__ranges/iota_view.h" }
+ module join_view { private header "__ranges/join_view.h" }
module non_propagating_cache { private header "__ranges/non_propagating_cache.h" }
+ module range_adaptor { private header "__ranges/range_adaptor.h" }
module ref_view { private header "__ranges/ref_view.h" }
+ module reverse_view { private header "__ranges/reverse_view.h" }
module size { private header "__ranges/size.h" }
+ module single_view { private header "__ranges/single_view.h" }
module subrange { private header "__ranges/subrange.h" }
- module transform_view { private header "__ranges/transform_view.h" }
+ module take_view { private header "__ranges/take_view.h" }
+ module transform_view {
+ private header "__ranges/transform_view.h"
+ export functional.__functional.bind_back
+ export functional.__functional.perfect_forward
+ }
module view_interface { private header "__ranges/view_interface.h" }
}
}
@@ -706,6 +799,10 @@ module std [system] {
module thread {
header "thread"
export *
+
+ module __thread {
+ module poll_with_backoff { private header "__thread/poll_with_backoff.h" }
+ }
}
module tuple {
header "tuple"
@@ -740,9 +837,9 @@ module std [system] {
export *
module __utility {
- module __decay_copy { private header "__utility/__decay_copy.h" }
module as_const { private header "__utility/as_const.h" }
module cmp { private header "__utility/cmp.h" }
+ module decay_copy { private header "__utility/decay_copy.h" }
module declval { private header "__utility/declval.h" }
module exchange { private header "__utility/exchange.h" }
module forward { private header "__utility/forward.h" }
@@ -789,6 +886,7 @@ module std [system] {
module __function_like { private header "__function_like.h" export * }
module __hash_table { header "__hash_table" export * }
module __locale { private header "__locale" export * }
+ module __mbstate { private header "__mbstate_t.h" export * }
module __mutex_base { private header "__mutex_base" export * }
module __node_handle { private header "__node_handle" export * }
module __nullptr { header "__nullptr" export * }
diff --git a/libcxx/include/mutex b/libcxx/include/mutex
index eb8e54ad4421..822abe1a3f17 100644
--- a/libcxx/include/mutex
+++ b/libcxx/include/mutex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- mutex ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -112,7 +112,7 @@ template <class... MutexTypes>
class scoped_lock // C++17
{
public:
- using mutex_type = Mutex; // If MutexTypes... consists of the single type Mutex
+ using mutex_type = Mutex; // Only if sizeof...(MutexTypes) == 1
explicit scoped_lock(MutexTypes&... m);
scoped_lock(adopt_lock_t, MutexTypes&... m);
diff --git a/libcxx/include/new b/libcxx/include/new
index aefc08c161ca..593af9d5c619 100644
--- a/libcxx/include/new
+++ b/libcxx/include/new
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===----------------------------- new ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -149,6 +149,16 @@ _LIBCPP_FUNC_VIS new_handler get_new_handler() _NOEXCEPT;
_LIBCPP_NORETURN _LIBCPP_FUNC_VIS void __throw_bad_alloc(); // not in C++ spec
+_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY
+void __throw_bad_array_new_length()
+{
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ throw bad_array_new_length();
+#else
+ _VSTD::abort();
+#endif
+}
+
#if !defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) && \
!defined(_LIBCPP_ABI_VCRUNTIME)
#ifndef _LIBCPP_CXX03_LANG
@@ -164,7 +174,7 @@ enum align_val_t { __zero = 0, __max = (size_t)-1 };
struct destroying_delete_t {
explicit destroying_delete_t() = default;
};
-_LIBCPP_INLINE_VAR constexpr destroying_delete_t destroying_delete{};
+inline constexpr destroying_delete_t destroying_delete{};
#endif // _LIBCPP_STD_VER > 17
} // std
@@ -332,22 +342,17 @@ void __libcpp_aligned_free(void* __ptr) {
template <class _Tp>
-_LIBCPP_NODISCARD_AFTER_CXX17 inline
+_LIBCPP_NODISCARD_AFTER_CXX17 inline _LIBCPP_HIDE_FROM_ABI
_LIBCPP_CONSTEXPR _Tp* __launder(_Tp* __p) _NOEXCEPT
{
static_assert (!(is_function<_Tp>::value), "can't launder functions" );
static_assert (!(is_same<void, typename remove_cv<_Tp>::type>::value), "can't launder cv-void" );
-#ifdef _LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
return __builtin_launder(__p);
-#else
- return __p;
-#endif
}
-
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_NODISCARD_AFTER_CXX17 inline _LIBCPP_INLINE_VISIBILITY
+_LIBCPP_NODISCARD_AFTER_CXX17 inline _LIBCPP_HIDE_FROM_ABI
constexpr _Tp* launder(_Tp* __p) noexcept
{
return _VSTD::__launder(__p);
diff --git a/libcxx/include/numbers b/libcxx/include/numbers
index 52c67dd35930..ede4e33c7a88 100644
--- a/libcxx/include/numbers
+++ b/libcxx/include/numbers
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- numbers ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -69,9 +69,6 @@ namespace std::numbers {
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
namespace numbers {
@@ -131,8 +128,6 @@ inline constexpr double phi = phi_v<double>;
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
-
#endif //_LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
#endif // _LIBCPP_NUMBERS
diff --git a/libcxx/include/numeric b/libcxx/include/numeric
index d42cbf97e511..fc44efff761d 100644
--- a/libcxx/include/numeric
+++ b/libcxx/include/numeric
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- numeric ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/optional b/libcxx/include/optional
index 118db66a4abc..837f867328fb 100644
--- a/libcxx/include/optional
+++ b/libcxx/include/optional
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- optional ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -163,10 +163,6 @@ template<class T>
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-
namespace std // purposefully not using versioning namespace
{
@@ -202,7 +198,7 @@ struct nullopt_t
_LIBCPP_INLINE_VISIBILITY constexpr explicit nullopt_t(__secret_tag, __secret_tag) noexcept {}
};
-_LIBCPP_INLINE_VAR constexpr nullopt_t nullopt{nullopt_t::__secret_tag{}, nullopt_t::__secret_tag{}};
+inline constexpr nullopt_t nullopt{nullopt_t::__secret_tag{}, nullopt_t::__secret_tag{}};
template <class _Tp, bool = is_trivially_destructible<_Tp>::value>
struct __optional_destruct_base;
@@ -695,7 +691,7 @@ public:
_LIBCPP_INLINE_VISIBILITY constexpr optional(optional&&) = default;
_LIBCPP_INLINE_VISIBILITY constexpr optional(nullopt_t) noexcept {}
- template <class _InPlaceT, class... _Args, class = _EnableIf<
+ template <class _InPlaceT, class... _Args, class = enable_if_t<
_And<
_IsSame<_InPlaceT, in_place_t>,
is_constructible<value_type, _Args...>
@@ -706,21 +702,21 @@ public:
constexpr explicit optional(_InPlaceT, _Args&&... __args)
: __base(in_place, _VSTD::forward<_Args>(__args)...) {}
- template <class _Up, class... _Args, class = _EnableIf<
+ template <class _Up, class... _Args, class = enable_if_t<
is_constructible_v<value_type, initializer_list<_Up>&, _Args...>>
>
_LIBCPP_INLINE_VISIBILITY
constexpr explicit optional(in_place_t, initializer_list<_Up> __il, _Args&&... __args)
: __base(in_place, __il, _VSTD::forward<_Args>(__args)...) {}
- template <class _Up = value_type, _EnableIf<
+ template <class _Up = value_type, enable_if_t<
_CheckOptionalArgsCtor<_Up>::template __enable_implicit<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
constexpr optional(_Up&& __v)
: __base(in_place, _VSTD::forward<_Up>(__v)) {}
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalArgsCtor<_Up>::template __enable_explicit<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -728,7 +724,7 @@ public:
: __base(in_place, _VSTD::forward<_Up>(__v)) {}
// LWG2756: conditionally explicit conversion from const optional<_Up>&
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_implicit<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -736,7 +732,7 @@ public:
{
this->__construct_from(__v);
}
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalLikeCtor<_Up, _Up const&>::template __enable_explicit<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -746,7 +742,7 @@ public:
}
// LWG2756: conditionally explicit conversion from optional<_Up>&&
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalLikeCtor<_Up, _Up &&>::template __enable_implicit<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -754,7 +750,7 @@ public:
{
this->__construct_from(_VSTD::move(__v));
}
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalLikeCtor<_Up, _Up &&>::template __enable_explicit<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -775,7 +771,7 @@ public:
// LWG2756
template <class _Up = value_type,
- class = _EnableIf<
+ class = enable_if_t<
_And<
_IsNotSame<__uncvref_t<_Up>, optional>,
_Or<
@@ -798,7 +794,7 @@ public:
}
// LWG2756
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalLikeAssign<_Up, _Up const&>::template __enable_assign<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -810,7 +806,7 @@ public:
}
// LWG2756
- template <class _Up, _EnableIf<
+ template <class _Up, enable_if_t<
_CheckOptionalLikeCtor<_Up, _Up &&>::template __enable_assign<_Up>()
, int> = 0>
_LIBCPP_INLINE_VISIBILITY
@@ -822,7 +818,7 @@ public:
}
template <class... _Args,
- class = _EnableIf
+ class = enable_if_t
<
is_constructible_v<value_type, _Args...>
>
@@ -837,7 +833,7 @@ public:
}
template <class _Up, class... _Args,
- class = _EnableIf
+ class = enable_if_t
<
is_constructible_v<value_type, initializer_list<_Up>&, _Args...>
>
@@ -883,11 +879,7 @@ public:
operator->() const
{
_LIBCPP_ASSERT(this->has_value(), "optional operator-> called on a disengaged value");
-#ifndef _LIBCPP_HAS_NO_BUILTIN_ADDRESSOF
return _VSTD::addressof(this->__get());
-#else
- return __operator_arrow(__has_operator_addressof<value_type>{}, this->__get());
-#endif
}
_LIBCPP_INLINE_VISIBILITY
@@ -896,11 +888,7 @@ public:
operator->()
{
_LIBCPP_ASSERT(this->has_value(), "optional operator-> called on a disengaged value");
-#ifndef _LIBCPP_HAS_NO_BUILTIN_ADDRESSOF
return _VSTD::addressof(this->__get());
-#else
- return __operator_arrow(__has_operator_addressof<value_type>{}, this->__get());
-#endif
}
_LIBCPP_INLINE_VISIBILITY
@@ -1006,26 +994,9 @@ public:
}
using __base::reset;
-
-private:
- template <class _Up>
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR_AFTER_CXX17 _Up*
- __operator_arrow(true_type, _Up& __x)
- {
- return _VSTD::addressof(__x);
- }
-
- template <class _Up>
- _LIBCPP_INLINE_VISIBILITY
- static constexpr _Up*
- __operator_arrow(false_type, _Up& __x)
- {
- return &__x;
- }
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class T>
optional(T) -> optional<T>;
#endif
@@ -1033,7 +1004,7 @@ template<class T>
// Comparisons between optionals
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() ==
declval<const _Up&>()), bool>,
bool
@@ -1049,7 +1020,7 @@ operator==(const optional<_Tp>& __x, const optional<_Up>& __y)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() !=
declval<const _Up&>()), bool>,
bool
@@ -1065,7 +1036,7 @@ operator!=(const optional<_Tp>& __x, const optional<_Up>& __y)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() <
declval<const _Up&>()), bool>,
bool
@@ -1081,7 +1052,7 @@ operator<(const optional<_Tp>& __x, const optional<_Up>& __y)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() >
declval<const _Up&>()), bool>,
bool
@@ -1097,7 +1068,7 @@ operator>(const optional<_Tp>& __x, const optional<_Up>& __y)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() <=
declval<const _Up&>()), bool>,
bool
@@ -1113,7 +1084,7 @@ operator<=(const optional<_Tp>& __x, const optional<_Up>& __y)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() >=
declval<const _Up&>()), bool>,
bool
@@ -1227,7 +1198,7 @@ operator>=(nullopt_t, const optional<_Tp>& __x) noexcept
// Comparisons with T
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() ==
declval<const _Up&>()), bool>,
bool
@@ -1239,7 +1210,7 @@ operator==(const optional<_Tp>& __x, const _Up& __v)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() ==
declval<const _Up&>()), bool>,
bool
@@ -1251,7 +1222,7 @@ operator==(const _Tp& __v, const optional<_Up>& __x)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() !=
declval<const _Up&>()), bool>,
bool
@@ -1263,7 +1234,7 @@ operator!=(const optional<_Tp>& __x, const _Up& __v)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() !=
declval<const _Up&>()), bool>,
bool
@@ -1275,7 +1246,7 @@ operator!=(const _Tp& __v, const optional<_Up>& __x)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() <
declval<const _Up&>()), bool>,
bool
@@ -1287,7 +1258,7 @@ operator<(const optional<_Tp>& __x, const _Up& __v)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() <
declval<const _Up&>()), bool>,
bool
@@ -1299,7 +1270,7 @@ operator<(const _Tp& __v, const optional<_Up>& __x)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() <=
declval<const _Up&>()), bool>,
bool
@@ -1311,7 +1282,7 @@ operator<=(const optional<_Tp>& __x, const _Up& __v)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() <=
declval<const _Up&>()), bool>,
bool
@@ -1323,7 +1294,7 @@ operator<=(const _Tp& __v, const optional<_Up>& __x)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() >
declval<const _Up&>()), bool>,
bool
@@ -1335,7 +1306,7 @@ operator>(const optional<_Tp>& __x, const _Up& __v)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() >
declval<const _Up&>()), bool>,
bool
@@ -1347,7 +1318,7 @@ operator>(const _Tp& __v, const optional<_Up>& __x)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() >=
declval<const _Up&>()), bool>,
bool
@@ -1359,7 +1330,7 @@ operator>=(const optional<_Tp>& __x, const _Up& __v)
template <class _Tp, class _Up>
_LIBCPP_INLINE_VISIBILITY constexpr
-_EnableIf<
+enable_if_t<
is_convertible_v<decltype(declval<const _Tp&>() >=
declval<const _Up&>()), bool>,
bool
@@ -1372,7 +1343,7 @@ operator>=(const _Tp& __v, const optional<_Up>& __x)
template <class _Tp>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_EnableIf<
+enable_if_t<
is_move_constructible_v<_Tp> && is_swappable_v<_Tp>,
void
>
@@ -1423,6 +1394,4 @@ _LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER > 14
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP_OPTIONAL
diff --git a/libcxx/include/ostream b/libcxx/include/ostream
index efeaee253eb9..98f36ea7acc1 100644
--- a/libcxx/include/ostream
+++ b/libcxx/include/ostream
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- ostream -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -55,6 +55,7 @@ public:
basic_ostream& operator<<(double f);
basic_ostream& operator<<(long double f);
basic_ostream& operator<<(const void* p);
+ basic_ostream& operator<<(const volatile void* val); // C++23
basic_ostream& operator<<(basic_streambuf<char_type,traits>* sb);
basic_ostream& operator<<(nullptr_t);
@@ -210,6 +211,14 @@ public:
basic_ostream& operator<<(double __f);
basic_ostream& operator<<(long double __f);
basic_ostream& operator<<(const void* __p);
+
+#if _LIBCPP_STD_VER > 20
+ _LIBCPP_HIDE_FROM_ABI
+ basic_ostream& operator<<(const volatile void* __p) {
+ return operator<<(const_cast<const void*>(__p));
+ }
+#endif
+
basic_ostream& operator<<(basic_streambuf<char_type, traits_type>* __sb);
_LIBCPP_INLINE_VISIBILITY
@@ -1087,7 +1096,9 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Size>& __x)
}
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_ostream<char>)
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_ostream<wchar_t>)
+#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/queue b/libcxx/include/queue
index 42470e3a1022..03081eb844ba 100644
--- a/libcxx/include/queue
+++ b/libcxx/include/queue
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- queue ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -115,27 +115,39 @@ public:
priority_queue() : priority_queue(Compare()) {} // C++20
explicit priority_queue(const Compare& x) : priority_queue(x, Container()) {}
priority_queue(const Compare& x, const Container&);
- explicit priority_queue(const Compare& x = Compare(), Container&&= Container()); // before C++20
+ explicit priority_queue(const Compare& x = Compare(), Container&& = Container()); // before C++20
priority_queue(const Compare& x, Container&&); // C++20
template <class InputIterator>
priority_queue(InputIterator first, InputIterator last,
const Compare& comp = Compare());
template <class InputIterator>
priority_queue(InputIterator first, InputIterator last,
- const Compare& comp, const container_type& c);
+ const Compare& comp, const Container& c);
template <class InputIterator>
priority_queue(InputIterator first, InputIterator last,
- const Compare& comp, container_type&& c);
+ const Compare& comp, Container&& c);
template <class Alloc>
explicit priority_queue(const Alloc& a);
template <class Alloc>
priority_queue(const Compare& comp, const Alloc& a);
template <class Alloc>
- priority_queue(const Compare& comp, const container_type& c,
+ priority_queue(const Compare& comp, const Container& c,
const Alloc& a);
template <class Alloc>
- priority_queue(const Compare& comp, container_type&& c,
+ priority_queue(const Compare& comp, Container&& c,
+ const Alloc& a);
+ template <class InputIterator>
+ priority_queue(InputIterator first, InputIterator last,
const Alloc& a);
+ template <class InputIterator>
+ priority_queue(InputIterator first, InputIterator last,
+ const Compare& comp, const Alloc& a);
+ template <class InputIterator>
+ priority_queue(InputIterator first, InputIterator last,
+ const Compare& comp, const Container& c, const Alloc& a);
+ template <class InputIterator>
+ priority_queue(InputIterator first, InputIterator last,
+ const Compare& comp, Container&& c, const Alloc& a);
template <class Alloc>
priority_queue(const priority_queue& q, const Alloc& a);
template <class Alloc>
@@ -160,15 +172,30 @@ priority_queue(Compare, Container)
-> priority_queue<typename Container::value_type, Container, Compare>; // C++17
template<class InputIterator,
- class Compare = less<typename iterator_traits<InputIterator>::value_type>,
- class Container = vector<typename iterator_traits<InputIterator>::value_type>>
+ class Compare = less<iter-value-type<InputIterator>>,
+ class Container = vector<iter-value-type<InputIterator>>>
priority_queue(InputIterator, InputIterator, Compare = Compare(), Container = Container())
- -> priority_queue<typename iterator_traits<InputIterator>::value_type, Container, Compare>; // C++17
+ -> priority_queue<iter-value-type<InputIterator>, Container, Compare>; // C++17
template<class Compare, class Container, class Allocator>
priority_queue(Compare, Container, Allocator)
-> priority_queue<typename Container::value_type, Container, Compare>; // C++17
+template<class InputIterator, class Allocator>
+priority_queue(InputIterator, InputIterator, Allocator)
+ -> priority_queue<iter-value-type<InputIterator>,
+ vector<iter-value-type<InputIterator>, Allocator>,
+ less<iter-value-type<InputIterator>>>; // C++17
+
+template<class InputIterator, class Compare, class Allocator>
+priority_queue(InputIterator, InputIterator, Compare, Allocator)
+ -> priority_queue<iter-value-type<InputIterator>,
+ vector<iter-value-type<InputIterator>, Allocator>, Compare>; // C++17
+
+template<class InputIterator, class Compare, class Container, class Allocator>
+priority_queue(InputIterator, InputIterator, Compare, Container, Allocator)
+ -> priority_queue<typename Container::value_type, Container, Compare>; // C++17
+
template <class T, class Container, class Compare>
void swap(priority_queue<T, Container, Compare>& x,
priority_queue<T, Container, Compare>& y)
@@ -252,28 +279,28 @@ public:
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
explicit queue(const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(__a) {}
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
queue(const queue& __q, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(__q.c, __a) {}
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
queue(const container_type& __c, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(__c, __a) {}
#ifndef _LIBCPP_CXX03_LANG
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
queue(container_type&& __c, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(_VSTD::move(__c), __a) {}
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
queue(queue&& __q, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(_VSTD::move(__q.c), __a) {}
#endif // _LIBCPP_CXX03_LANG
@@ -331,17 +358,17 @@ public:
operator< (const queue<_T1, _C1>& __x,const queue<_T1, _C1>& __y);
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _Container,
- class = _EnableIf<!__is_allocator<_Container>::value>
+ class = enable_if_t<!__is_allocator<_Container>::value>
>
queue(_Container)
-> queue<typename _Container::value_type, _Container>;
template<class _Container,
class _Alloc,
- class = _EnableIf<!__is_allocator<_Container>::value>,
- class = _EnableIf<uses_allocator<_Container, _Alloc>::value>
+ class = enable_if_t<!__is_allocator<_Container>::value>,
+ class = enable_if_t<uses_allocator<_Container, _Alloc>::value>
>
queue(_Container, _Alloc)
-> queue<typename _Container::value_type, _Container>;
@@ -397,7 +424,7 @@ operator<=(const queue<_Tp, _Container>& __x,const queue<_Tp, _Container>& __y)
template <class _Tp, class _Container>
inline _LIBCPP_INLINE_VISIBILITY
-_EnableIf<__is_swappable<_Container>::value, void>
+__enable_if_t<__is_swappable<_Container>::value, void>
swap(queue<_Tp, _Container>& __x, queue<_Tp, _Container>& __y)
_NOEXCEPT_(_NOEXCEPT_(__x.swap(__y)))
{
@@ -464,16 +491,16 @@ public:
_LIBCPP_INLINE_VISIBILITY
priority_queue(const value_compare& __comp, container_type&& __c);
#endif
- template <class _InputIter>
+ template <class _InputIter, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
_LIBCPP_INLINE_VISIBILITY
priority_queue(_InputIter __f, _InputIter __l,
const value_compare& __comp = value_compare());
- template <class _InputIter>
+ template <class _InputIter, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
_LIBCPP_INLINE_VISIBILITY
priority_queue(_InputIter __f, _InputIter __l,
const value_compare& __comp, const container_type& __c);
#ifndef _LIBCPP_CXX03_LANG
- template <class _InputIter>
+ template <class _InputIter, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
_LIBCPP_INLINE_VISIBILITY
priority_queue(_InputIter __f, _InputIter __l,
const value_compare& __comp, container_type&& __c);
@@ -481,32 +508,57 @@ public:
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
explicit priority_queue(const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0);
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
priority_queue(const value_compare& __comp, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0);
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
priority_queue(const value_compare& __comp, const container_type& __c,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0);
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
priority_queue(const priority_queue& __q, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0);
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
#ifndef _LIBCPP_CXX03_LANG
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
priority_queue(const value_compare& __comp, container_type&& __c,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0);
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
priority_queue(priority_queue&& __q, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0);
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
#endif // _LIBCPP_CXX03_LANG
+ template <class _InputIter, class _Alloc, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
+ _LIBCPP_INLINE_VISIBILITY
+ priority_queue(_InputIter __f, _InputIter __l, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
+
+ template <class _InputIter, class _Alloc, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
+ _LIBCPP_INLINE_VISIBILITY
+ priority_queue(_InputIter __f, _InputIter __l,
+ const value_compare& __comp, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
+
+ template <class _InputIter, class _Alloc, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
+ _LIBCPP_INLINE_VISIBILITY
+ priority_queue(_InputIter __f, _InputIter __l,
+ const value_compare& __comp, const container_type& __c, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
+
+#ifndef _LIBCPP_CXX03_LANG
+ template <class _InputIter, class _Alloc, class = __enable_if_t<__is_cpp17_input_iterator<_InputIter>::value> >
+ _LIBCPP_INLINE_VISIBILITY
+ priority_queue(_InputIter __f, _InputIter __l,
+ const value_compare& __comp, container_type&& __c, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0);
+#endif // _LIBCPP_CXX03_LANG
+
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
bool empty() const {return c.empty();}
_LIBCPP_INLINE_VISIBILITY
@@ -532,11 +584,11 @@ public:
__is_nothrow_swappable<value_compare>::value);
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template <class _Compare,
class _Container,
- class = _EnableIf<!__is_allocator<_Compare>::value>,
- class = _EnableIf<!__is_allocator<_Container>::value>
+ class = enable_if_t<!__is_allocator<_Compare>::value>,
+ class = enable_if_t<!__is_allocator<_Container>::value>
>
priority_queue(_Compare, _Container)
-> priority_queue<typename _Container::value_type, _Container, _Compare>;
@@ -544,9 +596,9 @@ priority_queue(_Compare, _Container)
template<class _InputIterator,
class _Compare = less<__iter_value_type<_InputIterator>>,
class _Container = vector<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value>,
- class = _EnableIf<!__is_allocator<_Compare>::value>,
- class = _EnableIf<!__is_allocator<_Container>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Compare>::value>,
+ class = enable_if_t<!__is_allocator<_Container>::value>
>
priority_queue(_InputIterator, _InputIterator, _Compare = _Compare(), _Container = _Container())
-> priority_queue<__iter_value_type<_InputIterator>, _Container, _Compare>;
@@ -554,12 +606,39 @@ priority_queue(_InputIterator, _InputIterator, _Compare = _Compare(), _Container
template<class _Compare,
class _Container,
class _Alloc,
- class = _EnableIf<!__is_allocator<_Compare>::value>,
- class = _EnableIf<!__is_allocator<_Container>::value>,
- class = _EnableIf<uses_allocator<_Container, _Alloc>::value>
+ class = enable_if_t<!__is_allocator<_Compare>::value>,
+ class = enable_if_t<!__is_allocator<_Container>::value>,
+ class = enable_if_t<uses_allocator<_Container, _Alloc>::value>
>
priority_queue(_Compare, _Container, _Alloc)
-> priority_queue<typename _Container::value_type, _Container, _Compare>;
+
+template<class _InputIterator, class _Allocator,
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>
+>
+priority_queue(_InputIterator, _InputIterator, _Allocator)
+ -> priority_queue<__iter_value_type<_InputIterator>,
+ vector<__iter_value_type<_InputIterator>, _Allocator>,
+ less<__iter_value_type<_InputIterator>>>;
+
+template<class _InputIterator, class _Compare, class _Allocator,
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Compare>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>
+>
+priority_queue(_InputIterator, _InputIterator, _Compare, _Allocator)
+ -> priority_queue<__iter_value_type<_InputIterator>,
+ vector<__iter_value_type<_InputIterator>, _Allocator>, _Compare>;
+
+template<class _InputIterator, class _Compare, class _Container, class _Alloc,
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Compare>::value>,
+ class = enable_if_t<!__is_allocator<_Container>::value>,
+ class = enable_if_t<uses_allocator<_Container, _Alloc>::value>
+>
+priority_queue(_InputIterator, _InputIterator, _Compare, _Container, _Alloc)
+ -> priority_queue<typename _Container::value_type, _Container, _Compare>;
#endif
template <class _Tp, class _Container, class _Compare>
@@ -587,7 +666,7 @@ priority_queue<_Tp, _Container, _Compare>::priority_queue(const value_compare& _
#endif // _LIBCPP_CXX03_LANG
template <class _Tp, class _Container, class _Compare>
-template <class _InputIter>
+template <class _InputIter, class>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(_InputIter __f, _InputIter __l,
const value_compare& __comp)
@@ -598,7 +677,7 @@ priority_queue<_Tp, _Container, _Compare>::priority_queue(_InputIter __f, _Input
}
template <class _Tp, class _Container, class _Compare>
-template <class _InputIter>
+template <class _InputIter, class>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(_InputIter __f, _InputIter __l,
const value_compare& __comp,
@@ -613,7 +692,7 @@ priority_queue<_Tp, _Container, _Compare>::priority_queue(_InputIter __f, _Input
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp, class _Container, class _Compare>
-template <class _InputIter>
+template <class _InputIter, class>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(_InputIter __f, _InputIter __l,
const value_compare& __comp,
@@ -631,7 +710,7 @@ template <class _Tp, class _Container, class _Compare>
template <class _Alloc>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>*)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
: c(__a)
{
}
@@ -641,7 +720,7 @@ template <class _Alloc>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(const value_compare& __comp,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>*)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
: c(__a),
comp(__comp)
{
@@ -653,7 +732,7 @@ inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(const value_compare& __comp,
const container_type& __c,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>*)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
: c(__c, __a),
comp(__comp)
{
@@ -665,11 +744,10 @@ template <class _Alloc>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(const priority_queue& __q,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>*)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
: c(__q.c, __a),
comp(__q.comp)
{
- _VSTD::make_heap(c.begin(), c.end(), comp);
}
#ifndef _LIBCPP_CXX03_LANG
@@ -680,7 +758,7 @@ inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(const value_compare& __comp,
container_type&& __c,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>*)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
: c(_VSTD::move(__c), __a),
comp(__comp)
{
@@ -692,14 +770,68 @@ template <class _Alloc>
inline
priority_queue<_Tp, _Container, _Compare>::priority_queue(priority_queue&& __q,
const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>*)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
: c(_VSTD::move(__q.c), __a),
comp(_VSTD::move(__q.comp))
{
+}
+
+#endif // _LIBCPP_CXX03_LANG
+
+template <class _Tp, class _Container, class _Compare>
+template <class _InputIter, class _Alloc, class>
+inline
+priority_queue<_Tp, _Container, _Compare>::priority_queue(
+ _InputIter __f, _InputIter __l, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
+ : c(__f, __l, __a),
+ comp()
+{
_VSTD::make_heap(c.begin(), c.end(), comp);
}
-#endif // _LIBCPP_CXX03_LANG
+template <class _Tp, class _Container, class _Compare>
+template <class _InputIter, class _Alloc, class>
+inline
+priority_queue<_Tp, _Container, _Compare>::priority_queue(
+ _InputIter __f, _InputIter __l,
+ const value_compare& __comp, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
+ : c(__f, __l, __a),
+ comp(__comp)
+{
+ _VSTD::make_heap(c.begin(), c.end(), comp);
+}
+
+template <class _Tp, class _Container, class _Compare>
+template <class _InputIter, class _Alloc, class>
+inline
+priority_queue<_Tp, _Container, _Compare>::priority_queue(
+ _InputIter __f, _InputIter __l,
+ const value_compare& __comp, const container_type& __c, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
+ : c(__c, __a),
+ comp(__comp)
+{
+ c.insert(c.end(), __f, __l);
+ _VSTD::make_heap(c.begin(), c.end(), comp);
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+template <class _Tp, class _Container, class _Compare>
+template <class _InputIter, class _Alloc, class>
+inline
+priority_queue<_Tp, _Container, _Compare>::priority_queue(
+ _InputIter __f, _InputIter __l, const value_compare& __comp,
+ container_type&& __c, const _Alloc& __a,
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>*)
+ : c(_VSTD::move(__c), __a),
+ comp(__comp)
+{
+ c.insert(c.end(), __f, __l);
+ _VSTD::make_heap(c.begin(), c.end(), comp);
+}
+#endif // _LIBCPP_CXX03_LANG
template <class _Tp, class _Container, class _Compare>
inline
@@ -756,7 +888,7 @@ priority_queue<_Tp, _Container, _Compare>::swap(priority_queue& __q)
template <class _Tp, class _Container, class _Compare>
inline _LIBCPP_INLINE_VISIBILITY
-_EnableIf<
+__enable_if_t<
__is_swappable<_Container>::value && __is_swappable<_Compare>::value,
void
>
diff --git a/libcxx/include/random b/libcxx/include/random
index 4a8c2a79267b..72d9855765f8 100644
--- a/libcxx/include/random
+++ b/libcxx/include/random
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- random -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/ranges b/libcxx/include/ranges
index 47f66fd3f622..8a99ee64cfc9 100644
--- a/libcxx/include/ranges
+++ b/libcxx/include/ranges
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- ranges -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -149,59 +149,97 @@ namespace std::ranges {
can-reference<invoke_result_t<F&, range_reference_t<V>>>
class transform_view;
+ // [range.counted], counted view
+ namespace views { inline constexpr unspecified counted = unspecified; }
+
// [range.common], common view
template<view V>
requires (!common_range<V> && copyable<iterator_t<V>>)
class common_view;
+ // [range.reverse], reverse view
+ template<view V>
+ requires bidirectional_range<V>
+ class reverse_view;
+
+ template<class T>
+ inline constexpr bool enable_borrowed_range<reverse_view<T>> = enable_borrowed_range<T>;
+
template<class T>
inline constexpr bool enable_borrowed_range<common_view<T>> = enable_borrowed_range<T>;
+
+ // [range.take], take view
+ template<view> class take_view;
+
+ template<class T>
+ inline constexpr bool enable_borrowed_range<take_view<T>> = enable_borrowed_range<T>;
+
+ template<copy_constructible T>
+ requires is_object_v<T>
+ class single_view;
+
+ template<weakly_incrementable W, semiregular Bound = unreachable_sentinel_t>
+ requires weakly-equality-comparable-with<W, Bound> && copyable<W>
+ class iota_view;
+
+ template<class W, class Bound>
+ inline constexpr bool enable_borrowed_range<iota_view<W, Bound>> = true;
+
+ // [range.join], join view
+ template<input_range V>
+ requires view<V> && input_range<range_reference_t<V>>
+ class join_view;
}
*/
+// Make sure all feature-test macros are available.
+#include <version>
+// Enable the contents of the header only when libc++ was built with LIBCXX_ENABLE_INCOMPLETE_FEATURES.
+#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+
#include <__config>
#include <__ranges/access.h>
#include <__ranges/all.h>
#include <__ranges/common_view.h>
#include <__ranges/concepts.h>
+#include <__ranges/counted.h>
#include <__ranges/dangling.h>
#include <__ranges/data.h>
#include <__ranges/drop_view.h>
-#include <__ranges/empty.h>
#include <__ranges/empty_view.h>
+#include <__ranges/empty.h>
#include <__ranges/enable_borrowed_range.h>
#include <__ranges/enable_view.h>
+#include <__ranges/iota_view.h>
+#include <__ranges/join_view.h>
#include <__ranges/ref_view.h>
+#include <__ranges/reverse_view.h>
+#include <__ranges/single_view.h>
#include <__ranges/size.h>
#include <__ranges/subrange.h>
+#include <__ranges/take_view.h>
#include <__ranges/transform_view.h>
#include <__ranges/view_interface.h>
#include <compare> // Required by the standard.
#include <initializer_list> // Required by the standard.
#include <iterator> // Required by the standard.
#include <type_traits>
-#include <version>
-
-#if defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
-# error "The Ranges library is not supported since libc++ has been configured with LIBCXX_ENABLE_INCOMPLETE_FEATURES disabled"
-#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
+namespace views = ranges::views;
+
#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
_LIBCPP_END_NAMESPACE_STD
-_LIBCPP_POP_MACROS
+#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
#endif // _LIBCPP_RANGES
diff --git a/libcxx/include/ratio b/libcxx/include/ratio
index 091ea53accce..16b45a28ed8b 100644
--- a/libcxx/include/ratio
+++ b/libcxx/include/ratio
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- ratio -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -499,30 +499,24 @@ struct __ratio_gcd
__static_lcm<_R1::den, _R2::den>::value> type;
};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _R1, class _R2>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_equal_v
- = ratio_equal<_R1, _R2>::value;
+inline constexpr bool ratio_equal_v = ratio_equal<_R1, _R2>::value;
template <class _R1, class _R2>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_not_equal_v
- = ratio_not_equal<_R1, _R2>::value;
+inline constexpr bool ratio_not_equal_v = ratio_not_equal<_R1, _R2>::value;
template <class _R1, class _R2>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_less_v
- = ratio_less<_R1, _R2>::value;
+inline constexpr bool ratio_less_v = ratio_less<_R1, _R2>::value;
template <class _R1, class _R2>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_less_equal_v
- = ratio_less_equal<_R1, _R2>::value;
+inline constexpr bool ratio_less_equal_v = ratio_less_equal<_R1, _R2>::value;
template <class _R1, class _R2>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_greater_v
- = ratio_greater<_R1, _R2>::value;
+inline constexpr bool ratio_greater_v = ratio_greater<_R1, _R2>::value;
template <class _R1, class _R2>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_greater_equal_v
- = ratio_greater_equal<_R1, _R2>::value;
+inline constexpr bool ratio_greater_equal_v = ratio_greater_equal<_R1, _R2>::value;
#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/regex b/libcxx/include/regex
index e47cf9fca5d2..815ff7d3862d 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- regex ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -1072,32 +1072,38 @@ private:
template <class _ForwardIterator>
string_type
__transform_primary(_ForwardIterator __f, _ForwardIterator __l, char) const;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _ForwardIterator>
string_type
__transform_primary(_ForwardIterator __f, _ForwardIterator __l, wchar_t) const;
-
+#endif
template <class _ForwardIterator>
string_type
__lookup_collatename(_ForwardIterator __f, _ForwardIterator __l, char) const;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _ForwardIterator>
string_type
__lookup_collatename(_ForwardIterator __f, _ForwardIterator __l, wchar_t) const;
-
+#endif
template <class _ForwardIterator>
char_class_type
__lookup_classname(_ForwardIterator __f, _ForwardIterator __l,
bool __icase, char) const;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _ForwardIterator>
char_class_type
__lookup_classname(_ForwardIterator __f, _ForwardIterator __l,
bool __icase, wchar_t) const;
+#endif
static int __regex_traits_value(unsigned char __ch, int __radix);
_LIBCPP_INLINE_VISIBILITY
int __regex_traits_value(char __ch, int __radix) const
{return __regex_traits_value(static_cast<unsigned char>(__ch), __radix);}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_INLINE_VISIBILITY
int __regex_traits_value(wchar_t __ch, int __radix) const;
+#endif
};
template <class _CharT>
@@ -1168,6 +1174,7 @@ regex_traits<_CharT>::__transform_primary(_ForwardIterator __f,
return __d;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _CharT>
template <class _ForwardIterator>
typename regex_traits<_CharT>::string_type
@@ -1189,6 +1196,7 @@ regex_traits<_CharT>::__transform_primary(_ForwardIterator __f,
}
return __d;
}
+#endif
// lookup_collatename is very FreeBSD-specific
@@ -1217,6 +1225,7 @@ regex_traits<_CharT>::__lookup_collatename(_ForwardIterator __f,
return __r;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _CharT>
template <class _ForwardIterator>
typename regex_traits<_CharT>::string_type
@@ -1250,6 +1259,7 @@ regex_traits<_CharT>::__lookup_collatename(_ForwardIterator __f,
}
return __r;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// lookup_classname
@@ -1268,6 +1278,7 @@ regex_traits<_CharT>::__lookup_classname(_ForwardIterator __f,
return __get_classname(__s.c_str(), __icase);
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _CharT>
template <class _ForwardIterator>
typename regex_traits<_CharT>::char_class_type
@@ -1288,6 +1299,7 @@ regex_traits<_CharT>::__lookup_classname(_ForwardIterator __f,
}
return __get_classname(__n.c_str(), __icase);
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _CharT>
bool
@@ -1318,6 +1330,7 @@ regex_traits<_CharT>::__regex_traits_value(unsigned char __ch, int __radix)
return -1;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class _CharT>
inline
int
@@ -1325,6 +1338,7 @@ regex_traits<_CharT>::__regex_traits_value(wchar_t __ch, int __radix) const
{
return __regex_traits_value(static_cast<unsigned char>(__ct_->narrow(__ch, char_type())), __radix);
}
+#endif
template <class _CharT> class __node;
@@ -2135,7 +2149,9 @@ public:
};
template <> _LIBCPP_FUNC_VIS void __match_any_but_newline<char>::__exec(__state&) const;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <> _LIBCPP_FUNC_VIS void __match_any_but_newline<wchar_t>::__exec(__state&) const;
+#endif
// __match_char
@@ -2542,13 +2558,15 @@ template <class _CharT, class _Traits = regex_traits<_CharT> >
class _LIBCPP_TEMPLATE_VIS basic_regex;
typedef basic_regex<char> regex;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef basic_regex<wchar_t> wregex;
+#endif
template <class _CharT, class _Traits>
class
_LIBCPP_TEMPLATE_VIS
_LIBCPP_PREFERRED_NAME(regex)
- _LIBCPP_PREFERRED_NAME(wregex)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wregex))
basic_regex
{
public:
@@ -3014,7 +3032,7 @@ private:
template <class, class> friend class __lookahead;
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template <class _ForwardIterator,
class = typename enable_if<__is_cpp17_forward_iterator<_ForwardIterator>::value, nullptr_t>::type
>
@@ -4897,17 +4915,19 @@ basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp,
// sub_match
typedef sub_match<const char*> csub_match;
-typedef sub_match<const wchar_t*> wcsub_match;
typedef sub_match<string::const_iterator> ssub_match;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+typedef sub_match<const wchar_t*> wcsub_match;
typedef sub_match<wstring::const_iterator> wssub_match;
+#endif
template <class _BidirectionalIterator>
class
_LIBCPP_TEMPLATE_VIS
_LIBCPP_PREFERRED_NAME(csub_match)
- _LIBCPP_PREFERRED_NAME(wcsub_match)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wcsub_match))
_LIBCPP_PREFERRED_NAME(ssub_match)
- _LIBCPP_PREFERRED_NAME(wssub_match)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wssub_match))
sub_match
: public pair<_BidirectionalIterator, _BidirectionalIterator>
{
@@ -5326,17 +5346,19 @@ operator<<(basic_ostream<_CharT, _ST>& __os, const sub_match<_BiIter>& __m)
}
typedef match_results<const char*> cmatch;
-typedef match_results<const wchar_t*> wcmatch;
typedef match_results<string::const_iterator> smatch;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+typedef match_results<const wchar_t*> wcmatch;
typedef match_results<wstring::const_iterator> wsmatch;
+#endif
template <class _BidirectionalIterator, class _Allocator>
class
_LIBCPP_TEMPLATE_VIS
_LIBCPP_PREFERRED_NAME(cmatch)
- _LIBCPP_PREFERRED_NAME(wcmatch)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wcmatch))
_LIBCPP_PREFERRED_NAME(smatch)
- _LIBCPP_PREFERRED_NAME(wsmatch)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wsmatch))
match_results
{
public:
@@ -6244,17 +6266,19 @@ template <class _BidirectionalIterator,
class _LIBCPP_TEMPLATE_VIS regex_iterator;
typedef regex_iterator<const char*> cregex_iterator;
-typedef regex_iterator<const wchar_t*> wcregex_iterator;
typedef regex_iterator<string::const_iterator> sregex_iterator;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+typedef regex_iterator<const wchar_t*> wcregex_iterator;
typedef regex_iterator<wstring::const_iterator> wsregex_iterator;
+#endif
template <class _BidirectionalIterator, class _CharT, class _Traits>
class
_LIBCPP_TEMPLATE_VIS
_LIBCPP_PREFERRED_NAME(cregex_iterator)
- _LIBCPP_PREFERRED_NAME(wcregex_iterator)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wcregex_iterator))
_LIBCPP_PREFERRED_NAME(sregex_iterator)
- _LIBCPP_PREFERRED_NAME(wsregex_iterator)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wsregex_iterator))
regex_iterator
{
public:
@@ -6372,17 +6396,19 @@ template <class _BidirectionalIterator,
class _LIBCPP_TEMPLATE_VIS regex_token_iterator;
typedef regex_token_iterator<const char*> cregex_token_iterator;
-typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
typedef regex_token_iterator<string::const_iterator> sregex_token_iterator;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
typedef regex_token_iterator<wstring::const_iterator> wsregex_token_iterator;
+#endif
template <class _BidirectionalIterator, class _CharT, class _Traits>
class
_LIBCPP_TEMPLATE_VIS
_LIBCPP_PREFERRED_NAME(cregex_token_iterator)
- _LIBCPP_PREFERRED_NAME(wcregex_token_iterator)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wcregex_token_iterator))
_LIBCPP_PREFERRED_NAME(sregex_token_iterator)
- _LIBCPP_PREFERRED_NAME(wsregex_token_iterator)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wsregex_token_iterator))
regex_token_iterator
{
public:
diff --git a/libcxx/include/scoped_allocator b/libcxx/include/scoped_allocator
index dc24d30b510f..2b15655e2c0a 100644
--- a/libcxx/include/scoped_allocator
+++ b/libcxx/include/scoped_allocator
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- scoped_allocator --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -91,6 +91,10 @@ public:
scoped_allocator_adaptor select_on_container_copy_construction() const noexcept;
};
+template<class OuterAlloc, class... InnerAllocs>
+ scoped_allocator_adaptor(OuterAlloc, InnerAllocs...)
+ -> scoped_allocator_adaptor<OuterAlloc, InnerAllocs...>;
+
template <class OuterA1, class OuterA2, class... InnerAllocs>
bool
operator==(const scoped_allocator_adaptor<OuterA1, InnerAllocs...>& a,
@@ -649,6 +653,12 @@ private:
template <class...> friend class __scoped_allocator_storage;
};
+#if _LIBCPP_STD_VER > 14
+template<class _OuterAlloc, class... _InnerAllocs>
+ scoped_allocator_adaptor(_OuterAlloc, _InnerAllocs...)
+ -> scoped_allocator_adaptor<_OuterAlloc, _InnerAllocs...>;
+#endif
+
template <class _OuterA1, class _OuterA2>
inline _LIBCPP_INLINE_VISIBILITY
bool
diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore
index 906f62e0f07a..2c2518bce46a 100644
--- a/libcxx/include/semaphore
+++ b/libcxx/include/semaphore
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- semaphore --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -67,10 +67,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
/*
-__atomic_semaphore_base is the general-case implementation, to be used for
-user-requested least-max values that exceed the OS implementation support
-(incl. when the OS has no support of its own) and for binary semaphores.
-
+__atomic_semaphore_base is the general-case implementation.
It is a typical Dijkstra semaphore algorithm over atomics, wait and notify
functions. It avoids contention against users' own use of those facilities.
@@ -82,7 +79,7 @@ class __atomic_semaphore_base
public:
_LIBCPP_INLINE_VISIBILITY
- __atomic_semaphore_base(ptrdiff_t __count) : __a(__count)
+ constexpr explicit __atomic_semaphore_base(ptrdiff_t __count) : __a(__count)
{
}
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
@@ -108,81 +105,30 @@ public:
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
bool try_acquire_for(chrono::duration<Rep, Period> const& __rel_time)
{
- auto const __test_fn = [this]() -> bool {
- auto __old = __a.load(memory_order_acquire);
- while(1) {
- if (__old == 0)
- return false;
- if(__a.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed))
- return true;
- }
- };
+ if (__rel_time == chrono::duration<Rep, Period>::zero())
+ return try_acquire();
+ auto const __test_fn = [this]() { return try_acquire(); };
return __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy(), __rel_time);
}
-};
-
-#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES
-
-/*
-
-__platform_semaphore_base a simple wrapper for the OS semaphore type. That
-is, every call is routed to the OS in the most direct manner possible.
-
-*/
-
-class __platform_semaphore_base
-{
- __libcpp_semaphore_t __semaphore;
-
-public:
- _LIBCPP_INLINE_VISIBILITY
- __platform_semaphore_base(ptrdiff_t __count) :
- __semaphore()
- {
- __libcpp_semaphore_init(&__semaphore, __count);
- }
- _LIBCPP_INLINE_VISIBILITY
- ~__platform_semaphore_base() {
- __libcpp_semaphore_destroy(&__semaphore);
- }
- _LIBCPP_INLINE_VISIBILITY
- void release(ptrdiff_t __update)
- {
- for(; __update; --__update)
- __libcpp_semaphore_post(&__semaphore);
- }
- _LIBCPP_INLINE_VISIBILITY
- void acquire()
- {
- __libcpp_semaphore_wait(&__semaphore);
- }
- _LIBCPP_INLINE_VISIBILITY
- bool try_acquire_for(chrono::nanoseconds __rel_time)
+ _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
+ bool try_acquire()
{
- return __libcpp_semaphore_wait_timed(&__semaphore, __rel_time);
+ auto __old = __a.load(memory_order_acquire);
+ while (true) {
+ if (__old == 0)
+ return false;
+ if (__a.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed))
+ return true;
+ }
}
};
-template<ptrdiff_t __least_max_value>
-using __semaphore_base =
- typename conditional<(__least_max_value > 1 && __least_max_value <= _LIBCPP_SEMAPHORE_MAX),
- __platform_semaphore_base,
- __atomic_semaphore_base>::type;
-
-#else
-
-template<ptrdiff_t __least_max_value>
-using __semaphore_base =
- __atomic_semaphore_base;
-
#define _LIBCPP_SEMAPHORE_MAX (numeric_limits<ptrdiff_t>::max())
-#endif //_LIBCPP_NO_NATIVE_SEMAPHORES
-
template<ptrdiff_t __least_max_value = _LIBCPP_SEMAPHORE_MAX>
class counting_semaphore
{
- __semaphore_base<__least_max_value> __semaphore;
+ __atomic_semaphore_base __semaphore;
public:
static constexpr ptrdiff_t max() noexcept {
@@ -190,7 +136,7 @@ public:
}
_LIBCPP_INLINE_VISIBILITY
- counting_semaphore(ptrdiff_t __count = 0) : __semaphore(__count) { }
+ constexpr explicit counting_semaphore(ptrdiff_t __count) : __semaphore(__count) { }
~counting_semaphore() = default;
counting_semaphore(const counting_semaphore&) = delete;
@@ -215,14 +161,14 @@ public:
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
bool try_acquire()
{
- return try_acquire_for(chrono::nanoseconds::zero());
+ return __semaphore.try_acquire();
}
template <class Clock, class Duration>
_LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
bool try_acquire_until(chrono::time_point<Clock, Duration> const& __abs_time)
{
auto const current = Clock::now();
- if(current >= __abs_time)
+ if (current >= __abs_time)
return try_acquire();
else
return try_acquire_for(__abs_time - current);
diff --git a/libcxx/include/set b/libcxx/include/set
index 21ec8435dd84..803175296a34 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- set -------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -183,6 +183,25 @@ public:
pair<const_iterator,const_iterator> equal_range(const K& x) const; // C++14
};
+template <class InputIterator,
+ class Compare = less<typename iterator_traits<InputIterator>::value_type>,
+ class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
+set(InputIterator, InputIterator,
+ Compare = Compare(), Allocator = Allocator())
+ -> set<typename iterator_traits<InputIterator>::value_type, Compare, Allocator>; // C++17
+
+template<class Key, class Compare = less<Key>, class Allocator = allocator<Key>>
+set(initializer_list<Key>, Compare = Compare(), Allocator = Allocator())
+ -> set<Key, Compare, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+set(InputIterator, InputIterator, Allocator)
+ -> set<typename iterator_traits<InputIterator>::value_type,
+ less<typename iterator_traits<InputIterator>::value_type>, Allocator>; // C++17
+
+template<class Key, class Allocator>
+set(initializer_list<Key>, Allocator) -> set<Key, less<Key>, Allocator>; // C++17
+
template <class Key, class Compare, class Allocator>
bool
operator==(const set<Key, Compare, Allocator>& x,
@@ -389,6 +408,25 @@ public:
pair<const_iterator,const_iterator> equal_range(const K& x) const; // C++14
};
+template <class InputIterator,
+ class Compare = less<typename iterator_traits<InputIterator>::value_type>,
+ class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
+multiset(InputIterator, InputIterator,
+ Compare = Compare(), Allocator = Allocator())
+ -> multiset<typename iterator_traits<InputIterator>::value_type, Compare, Allocator>; // C++17
+
+template<class Key, class Compare = less<Key>, class Allocator = allocator<Key>>
+multiset(initializer_list<Key>, Compare = Compare(), Allocator = Allocator())
+ -> multiset<Key, Compare, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+multiset(InputIterator, InputIterator, Allocator)
+ -> multiset<typename iterator_traits<InputIterator>::value_type,
+ less<typename iterator_traits<InputIterator>::value_type>, Allocator>; // C++17
+
+template<class Key, class Allocator>
+multiset(initializer_list<Key>, Allocator) -> multiset<Key, less<Key>, Allocator>; // C++17
+
template <class Key, class Compare, class Allocator>
bool
operator==(const multiset<Key, Compare, Allocator>& x,
@@ -436,6 +474,7 @@ erase_if(multiset<Key, Compare, Allocator>& c, Predicate pred); // C++20
#include <__config>
#include <__debug>
#include <__functional/is_transparent.h>
+#include <__iterator/iterator_traits.h>
#include <__node_handle>
#include <__tree>
#include <__utility/forward.h>
@@ -462,7 +501,7 @@ public:
// types:
typedef _Key key_type;
typedef key_type value_type;
- typedef _Compare key_compare;
+ typedef __identity_t<_Compare> key_compare;
typedef key_compare value_compare;
typedef __identity_t<_Allocator> allocator_type;
typedef value_type& reference;
@@ -474,7 +513,6 @@ public:
private:
typedef __tree<value_type, value_compare, allocator_type> __base;
typedef allocator_traits<allocator_type> __alloc_traits;
- typedef typename __base::__node_holder __node_holder;
__base __tree_;
@@ -868,30 +906,32 @@ public:
#endif
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Compare = less<__iter_value_type<_InputIterator>>,
class _Allocator = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>,
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>>
set(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
-> set<__iter_value_type<_InputIterator>, _Compare, _Allocator>;
template<class _Key, class _Compare = less<_Key>,
class _Allocator = allocator<_Key>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>>
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
set(initializer_list<_Key>, _Compare = _Compare(), _Allocator = _Allocator())
-> set<_Key, _Compare, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
set(_InputIterator, _InputIterator, _Allocator)
-> set<__iter_value_type<_InputIterator>,
less<__iter_value_type<_InputIterator>>, _Allocator>;
template<class _Key, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
set(initializer_list<_Key>, _Allocator)
-> set<_Key, less<_Key>, _Allocator>;
#endif
@@ -994,7 +1034,7 @@ public:
// types:
typedef _Key key_type;
typedef key_type value_type;
- typedef _Compare key_compare;
+ typedef __identity_t<_Compare> key_compare;
typedef key_compare value_compare;
typedef __identity_t<_Allocator> allocator_type;
typedef value_type& reference;
@@ -1006,7 +1046,6 @@ public:
private:
typedef __tree<value_type, value_compare, allocator_type> __base;
typedef allocator_traits<allocator_type> __alloc_traits;
- typedef typename __base::__node_holder __node_holder;
__base __tree_;
@@ -1399,30 +1438,32 @@ public:
#endif
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Compare = less<__iter_value_type<_InputIterator>>,
class _Allocator = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>,
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>>
multiset(_InputIterator, _InputIterator, _Compare = _Compare(), _Allocator = _Allocator())
-> multiset<__iter_value_type<_InputIterator>, _Compare, _Allocator>;
template<class _Key, class _Compare = less<_Key>,
class _Allocator = allocator<_Key>,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>,
- class = _EnableIf<!__is_allocator<_Compare>::value, void>>
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>,
+ class = enable_if_t<!__is_allocator<_Compare>::value, void>>
multiset(initializer_list<_Key>, _Compare = _Compare(), _Allocator = _Allocator())
-> multiset<_Key, _Compare, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value, void>,
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
multiset(_InputIterator, _InputIterator, _Allocator)
-> multiset<__iter_value_type<_InputIterator>,
less<__iter_value_type<_InputIterator>>, _Allocator>;
template<class _Key, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value, void>>
+ class = enable_if_t<__is_allocator<_Allocator>::value, void>>
multiset(initializer_list<_Key>, _Allocator)
-> multiset<_Key, less<_Key>, _Allocator>;
#endif
diff --git a/libcxx/include/setjmp.h b/libcxx/include/setjmp.h
index f9fb2ffe00b2..3ecaeca720cd 100644
--- a/libcxx/include/setjmp.h
+++ b/libcxx/include/setjmp.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- setjmp.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex
index 8205c3e0af0c..f866443b8e25 100644
--- a/libcxx/include/shared_mutex
+++ b/libcxx/include/shared_mutex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ shared_mutex --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/span b/libcxx/include/span
index 260a74404fe5..d33ad09a388f 100644
--- a/libcxx/include/span
+++ b/libcxx/include/span
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------ span ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -56,18 +56,18 @@ public:
// [span.cons], span constructors, copy, assignment, and destructor
constexpr span() noexcept;
- constexpr explicit(Extent != dynamic_extent) span(pointer ptr, size_type count);
- constexpr explicit(Extent != dynamic_extent) span(pointer firstElem, pointer lastElem);
+ template <class It>
+ constexpr explicit(Extent != dynamic_extent) span(It first, size_type count);
+ template <class It, class End>
+ constexpr explicit(Extent != dynamic_extent) span(It first, End last);
template <size_t N>
- constexpr span(element_type (&arr)[N]) noexcept;
+ constexpr span(type_identity_t<element_type> (&arr)[N]) noexcept;
template <size_t N>
constexpr span(array<value_type, N>& arr) noexcept;
template <size_t N>
constexpr span(const array<value_type, N>& arr) noexcept;
- template <class Container>
- constexpr explicit(Extent != dynamic_extent) span(Container& cont);
- template <class Container>
- constexpr explicit(Extent != dynamic_extent) span(const Container& cont);
+ template<class R>
+ constexpr explicit(Extent != dynamic_extent) span(R&& r);
constexpr span(const span& other) noexcept = default;
template <class OtherElementType, size_t OtherExtent>
constexpr explicit(Extent != dynamic_extent) span(const span<OtherElementType, OtherExtent>& s) noexcept;
@@ -89,7 +89,7 @@ public:
// [span.obs], span observers
constexpr size_type size() const noexcept;
constexpr size_type size_bytes() const noexcept;
- constexpr bool empty() const noexcept;
+ [[nodiscard]] constexpr bool empty() const noexcept;
// [span.elem], span element access
constexpr reference operator[](size_type idx) const;
@@ -108,6 +108,9 @@ private:
size_type size_; // exposition only
};
+template<class It, class EndOrSize>
+ span(It, EndOrSize) -> span<remove_reference_t<iter_reference_t<_It>>>;
+
template<class T, size_t N>
span(T (&)[N]) -> span<T, N>;
@@ -117,11 +120,8 @@ template<class T, size_t N>
template<class T, size_t N>
span(const array<T, N>&) -> span<const T, N>;
-template<class Container>
- span(Container&) -> span<typename Container::value_type>;
-
-template<class Container>
- span(const Container&) -> span<const typename Container::value_type>;
+template<class R>
+ span(R&&) -> span<remove_reference_t<ranges::range_reference_t<R>>>;
} // namespace std
@@ -129,9 +129,13 @@ template<class Container>
#include <__config>
#include <__debug>
+#include <__iterator/concepts.h>
#include <__iterator/wrap_iter.h>
+#include <__ranges/concepts.h>
+#include <__ranges/data.h>
#include <__ranges/enable_borrowed_range.h>
#include <__ranges/enable_view.h>
+#include <__ranges/size.h>
#include <array> // for array
#include <cstddef> // for byte
#include <iterator> // for iterators
@@ -155,46 +159,28 @@ template <typename _Tp, size_t _Extent = dynamic_extent> class span;
template <class _Tp>
-struct __is_span_impl : public false_type {};
+struct __is_std_array : false_type {};
-template <class _Tp, size_t _Extent>
-struct __is_span_impl<span<_Tp, _Extent>> : public true_type {};
-
-template <class _Tp>
-struct __is_span : public __is_span_impl<remove_cv_t<_Tp>> {};
+template <class _Tp, size_t _Sz>
+struct __is_std_array<array<_Tp, _Sz>> : true_type {};
template <class _Tp>
-struct __is_std_array_impl : public false_type {};
+struct __is_std_span : false_type {};
template <class _Tp, size_t _Sz>
-struct __is_std_array_impl<array<_Tp, _Sz>> : public true_type {};
-
-template <class _Tp>
-struct __is_std_array : public __is_std_array_impl<remove_cv_t<_Tp>> {};
-
-template <class _Tp, class _ElementType, class = void>
-struct __is_span_compatible_container : public false_type {};
-
-template <class _Tp, class _ElementType>
-struct __is_span_compatible_container<_Tp, _ElementType,
- void_t<
- // is not a specialization of span
- typename enable_if<!__is_span<_Tp>::value, nullptr_t>::type,
- // is not a specialization of array
- typename enable_if<!__is_std_array<_Tp>::value, nullptr_t>::type,
- // is_array_v<Container> is false,
- typename enable_if<!is_array_v<_Tp>, nullptr_t>::type,
- // data(cont) and size(cont) are well formed
- decltype(data(declval<_Tp>())),
- decltype(size(declval<_Tp>())),
- // remove_pointer_t<decltype(data(cont))>(*)[] is convertible to ElementType(*)[]
- typename enable_if<
- is_convertible_v<remove_pointer_t<decltype(data(declval<_Tp &>()))>(*)[],
- _ElementType(*)[]>,
- nullptr_t>::type
- >>
- : public true_type {};
+struct __is_std_span<span<_Tp, _Sz>> : true_type {};
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+template <class _Range, class _ElementType>
+concept __span_compatible_range =
+ ranges::contiguous_range<_Range> &&
+ ranges::sized_range<_Range> &&
+ (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) &&
+ !__is_std_span<remove_cvref_t<_Range>>::value &&
+ !__is_std_array<remove_cvref_t<_Range>>::value &&
+ !is_array_v<remove_cvref_t<_Range>> &&
+ is_convertible_v<remove_reference_t<ranges::range_reference_t<_Range>>(*)[], _ElementType(*)[]>;
+#endif
template <typename _Tp, size_t _Extent>
class _LIBCPP_TEMPLATE_VIS span {
@@ -224,12 +210,33 @@ public:
constexpr span (const span&) noexcept = default;
constexpr span& operator=(const span&) noexcept = default;
- _LIBCPP_INLINE_VISIBILITY constexpr explicit span(pointer __ptr, size_type __count) : __data{__ptr}
- { (void)__count; _LIBCPP_ASSERT(_Extent == __count, "size mismatch in span's constructor (ptr, len)"); }
- _LIBCPP_INLINE_VISIBILITY constexpr explicit span(pointer __f, pointer __l) : __data{__f}
- { (void)__l; _LIBCPP_ASSERT(_Extent == distance(__f, __l), "size mismatch in span's constructor (ptr, ptr)"); }
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+ template <class _It,
+ enable_if_t<contiguous_iterator<_It> &&
+ is_convertible_v<remove_reference_t<iter_reference_t<_It>>(*)[], element_type (*)[]>,
+ nullptr_t> = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ constexpr explicit span(_It __first, size_type __count)
+ : __data{_VSTD::to_address(__first)} {
+ (void)__count;
+ _LIBCPP_ASSERT(_Extent == __count, "size mismatch in span's constructor (iterator, len)");
+ }
- _LIBCPP_INLINE_VISIBILITY constexpr span(element_type (&__arr)[_Extent]) noexcept : __data{__arr} {}
+ template <
+ class _It, class _End,
+ enable_if_t<is_convertible_v<remove_reference_t<iter_reference_t<_It> > (*)[], element_type (*)[]> &&
+ contiguous_iterator<_It> && sized_sentinel_for<_End, _It> && !is_convertible_v<_End, size_t>,
+ nullptr_t> = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ constexpr explicit span(_It __first, _End __last) : __data{_VSTD::to_address(__first)} {
+ (void)__last;
+ _LIBCPP_ASSERT((__last - __first >= 0), "invalid range in span's constructor (iterator, sentinel)");
+ _LIBCPP_ASSERT(__last - __first == _Extent,
+ "invalid range in span's constructor (iterator, sentinel): last - first != extent");
+ }
+#endif
+
+ _LIBCPP_INLINE_VISIBILITY constexpr span(type_identity_t<element_type> (&__arr)[_Extent]) noexcept : __data{__arr} {}
template <class _OtherElementType,
enable_if_t<is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr>
@@ -241,21 +248,13 @@ public:
_LIBCPP_INLINE_VISIBILITY
constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {}
- template <class _Container>
- _LIBCPP_INLINE_VISIBILITY
- constexpr explicit span( _Container& __c,
- enable_if_t<__is_span_compatible_container<_Container, _Tp>::value, nullptr_t> = nullptr)
- : __data{_VSTD::data(__c)} {
- _LIBCPP_ASSERT(_Extent == _VSTD::size(__c), "size mismatch in span's constructor (range)");
- }
-
- template <class _Container>
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+ template <__span_compatible_range<element_type> _Range>
_LIBCPP_INLINE_VISIBILITY
- constexpr explicit span(const _Container& __c,
- enable_if_t<__is_span_compatible_container<const _Container, _Tp>::value, nullptr_t> = nullptr)
- : __data{_VSTD::data(__c)} {
- _LIBCPP_ASSERT(_Extent == _VSTD::size(__c), "size mismatch in span's constructor (range)");
- }
+ constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} {
+ _LIBCPP_ASSERT(ranges::size(__r) == _Extent, "size mismatch in span's constructor (range)");
+ }
+#endif
template <class _OtherElementType>
_LIBCPP_INLINE_VISIBILITY
@@ -331,9 +330,9 @@ public:
return {data() + __offset, __count};
}
- _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return _Extent; }
- _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return _Extent * sizeof(element_type); }
- _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return _Extent == 0; }
+ _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return _Extent; }
+ _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return _Extent * sizeof(element_type); }
+ [[nodiscard]] _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return _Extent == 0; }
_LIBCPP_INLINE_VISIBILITY constexpr reference operator[](size_type __idx) const noexcept
{
@@ -402,12 +401,28 @@ public:
constexpr span (const span&) noexcept = default;
constexpr span& operator=(const span&) noexcept = default;
- _LIBCPP_INLINE_VISIBILITY constexpr span(pointer __ptr, size_type __count) : __data{__ptr}, __size{__count} {}
- _LIBCPP_INLINE_VISIBILITY constexpr span(pointer __f, pointer __l) : __data{__f}, __size{static_cast<size_t>(distance(__f, __l))} {}
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+ template <class _It,
+ enable_if_t<contiguous_iterator<_It> &&
+ is_convertible_v<remove_reference_t<iter_reference_t<_It> > (*)[], element_type (*)[]>,
+ nullptr_t> = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ constexpr span(_It __first, size_type __count)
+ : __data{_VSTD::to_address(__first)}, __size{__count} {}
+
+ template <
+ class _It, class _End,
+ enable_if_t<is_convertible_v<remove_reference_t<iter_reference_t<_It> > (*)[], element_type (*)[]> &&
+ contiguous_iterator<_It> && sized_sentinel_for<_End, _It> && !is_convertible_v<_End, size_t>,
+ nullptr_t> = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ constexpr span(_It __first, _End __last)
+ : __data(_VSTD::to_address(__first)), __size(__last - __first) {}
+#endif
template <size_t _Sz>
_LIBCPP_INLINE_VISIBILITY
- constexpr span(element_type (&__arr)[_Sz]) noexcept : __data{__arr}, __size{_Sz} {}
+ constexpr span(type_identity_t<element_type> (&__arr)[_Sz]) noexcept : __data{__arr}, __size{_Sz} {}
template <class _OtherElementType, size_t _Sz,
enable_if_t<is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr>
@@ -419,18 +434,11 @@ public:
_LIBCPP_INLINE_VISIBILITY
constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {}
- template <class _Container>
- _LIBCPP_INLINE_VISIBILITY
- constexpr span( _Container& __c,
- enable_if_t<__is_span_compatible_container<_Container, _Tp>::value, nullptr_t> = nullptr)
- : __data{_VSTD::data(__c)}, __size{(size_type) _VSTD::size(__c)} {}
-
- template <class _Container>
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+ template <__span_compatible_range<element_type> _Range>
_LIBCPP_INLINE_VISIBILITY
- constexpr span(const _Container& __c,
- enable_if_t<__is_span_compatible_container<const _Container, _Tp>::value, nullptr_t> = nullptr)
- : __data{_VSTD::data(__c)}, __size{(size_type) _VSTD::size(__c)} {}
-
+ constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {}
+# endif
template <class _OtherElementType, size_t _OtherExtent>
_LIBCPP_INLINE_VISIBILITY
@@ -493,9 +501,9 @@ public:
return {data() + __offset, __count};
}
- _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return __size; }
- _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return __size * sizeof(element_type); }
- _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return __size == 0; }
+ _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return __size; }
+ _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return __size * sizeof(element_type); }
+ [[nodiscard]] _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return __size == 0; }
_LIBCPP_INLINE_VISIBILITY constexpr reference operator[](size_type __idx) const noexcept
{
@@ -556,7 +564,12 @@ auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept
-> enable_if_t<!is_const_v<_Tp>, decltype(__s.__as_writable_bytes())>
{ return __s.__as_writable_bytes(); }
+#if !defined(_LIBCPP_HAS_NO_RANGES)
// Deduction guides
+template<contiguous_iterator _It, class _EndOrSize>
+ span(_It, _EndOrSize) -> span<remove_reference_t<iter_reference_t<_It>>>;
+#endif
+
template<class _Tp, size_t _Sz>
span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>;
@@ -566,11 +579,10 @@ template<class _Tp, size_t _Sz>
template<class _Tp, size_t _Sz>
span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>;
-template<class _Container>
- span(_Container&) -> span<typename _Container::value_type>;
-
-template<class _Container>
- span(const _Container&) -> span<const typename _Container::value_type>;
+#if !defined(_LIBCPP_HAS_NO_RANGES)
+template<ranges::contiguous_range _Range>
+ span(_Range&&) -> span<remove_reference_t<ranges::range_reference_t<_Range>>>;
+#endif
#endif // _LIBCPP_STD_VER > 17
diff --git a/libcxx/include/sstream b/libcxx/include/sstream
index fbe5ffcab4c6..e63d1434ac76 100644
--- a/libcxx/include/sstream
+++ b/libcxx/include/sstream
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- sstream ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/stack b/libcxx/include/stack
index aefef31ac97b..5d959c33c742 100644
--- a/libcxx/include/stack
+++ b/libcxx/include/stack
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- stack -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -158,28 +158,28 @@ public:
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
explicit stack(const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(__a) {}
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
stack(const container_type& __c, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(__c, __a) {}
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
stack(const stack& __s, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(__s.c, __a) {}
#ifndef _LIBCPP_CXX03_LANG
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
stack(container_type&& __c, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(_VSTD::move(__c), __a) {}
template <class _Alloc>
_LIBCPP_INLINE_VISIBILITY
stack(stack&& __s, const _Alloc& __a,
- _EnableIf<uses_allocator<container_type, _Alloc>::value>* = 0)
+ __enable_if_t<uses_allocator<container_type, _Alloc>::value>* = 0)
: c(_VSTD::move(__s.c), __a) {}
#endif // _LIBCPP_CXX03_LANG
@@ -231,17 +231,17 @@ public:
operator< (const stack<T1, _C1>& __x, const stack<T1, _C1>& __y);
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _Container,
- class = _EnableIf<!__is_allocator<_Container>::value>
+ class = enable_if_t<!__is_allocator<_Container>::value>
>
stack(_Container)
-> stack<typename _Container::value_type, _Container>;
template<class _Container,
class _Alloc,
- class = _EnableIf<!__is_allocator<_Container>::value>,
- class = _EnableIf<uses_allocator<_Container, _Alloc>::value>
+ class = enable_if_t<!__is_allocator<_Container>::value>,
+ class = enable_if_t<uses_allocator<_Container, _Alloc>::value>
>
stack(_Container, _Alloc)
-> stack<typename _Container::value_type, _Container>;
@@ -297,7 +297,7 @@ operator<=(const stack<_Tp, _Container>& __x, const stack<_Tp, _Container>& __y)
template <class _Tp, class _Container>
inline _LIBCPP_INLINE_VISIBILITY
-_EnableIf<__is_swappable<_Container>::value, void>
+__enable_if_t<__is_swappable<_Container>::value, void>
swap(stack<_Tp, _Container>& __x, stack<_Tp, _Container>& __y)
_NOEXCEPT_(_NOEXCEPT_(__x.swap(__y)))
{
diff --git a/libcxx/include/stdbool.h b/libcxx/include/stdbool.h
index 69f7719f26cd..98d43214f08a 100644
--- a/libcxx/include/stdbool.h
+++ b/libcxx/include/stdbool.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- stdbool.h --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/stddef.h b/libcxx/include/stddef.h
index 2f8c0a5f5bf2..13944ef8a851 100644
--- a/libcxx/include/stddef.h
+++ b/libcxx/include/stddef.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- stddef.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/stdexcept b/libcxx/include/stdexcept
index c0470d1e1dd3..ddbc6303b624 100644
--- a/libcxx/include/stdexcept
+++ b/libcxx/include/stdexcept
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- stdexcept --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/stdint.h b/libcxx/include/stdint.h
index 5d64910bfe55..66676b6402d6 100644
--- a/libcxx/include/stdint.h
+++ b/libcxx/include/stdint.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- stdint.h --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/stdio.h b/libcxx/include/stdio.h
index f84122034891..24224d995964 100644
--- a/libcxx/include/stdio.h
+++ b/libcxx/include/stdio.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- stdio.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/stdlib.h b/libcxx/include/stdlib.h
index 242eedc22a5b..6ae6bb66b053 100644
--- a/libcxx/include/stdlib.h
+++ b/libcxx/include/stdlib.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- stdlib.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -96,10 +96,14 @@ void *aligned_alloc(size_t alignment, size_t size); // C11
extern "C++" {
// abs
-#undef abs
-#undef labs
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
-#undef llabs
+#ifdef abs
+# undef abs
+#endif
+#ifdef labs
+# undef labs
+#endif
+#ifdef llabs
+# undef llabs
#endif
// MSVCRT already has the correct prototype in <stdlib.h> if __cplusplus is defined
@@ -107,11 +111,9 @@ extern "C++" {
inline _LIBCPP_INLINE_VISIBILITY long abs(long __x) _NOEXCEPT {
return __builtin_labs(__x);
}
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
inline _LIBCPP_INLINE_VISIBILITY long long abs(long long __x) _NOEXCEPT {
return __builtin_llabs(__x);
}
-#endif // _LIBCPP_HAS_NO_LONG_LONG
#endif // !defined(_LIBCPP_MSVCRT) && !defined(__sun__)
#if !defined(__sun__)
@@ -131,10 +133,14 @@ abs(long double __lcpp_x) _NOEXCEPT {
// div
-#undef div
-#undef ldiv
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
-#undef lldiv
+#ifdef div
+# undef div
+#endif
+#ifdef ldiv
+# undef ldiv
+#endif
+#ifdef lldiv
+# undef lldiv
#endif
// MSVCRT already has the correct prototype in <stdlib.h> if __cplusplus is defined
@@ -142,12 +148,12 @@ abs(long double __lcpp_x) _NOEXCEPT {
inline _LIBCPP_INLINE_VISIBILITY ldiv_t div(long __x, long __y) _NOEXCEPT {
return ::ldiv(__x, __y);
}
-#ifndef _LIBCPP_HAS_NO_LONG_LONG
+#if !(defined(__FreeBSD__) && !defined(__LONG_LONG_SUPPORTED))
inline _LIBCPP_INLINE_VISIBILITY lldiv_t div(long long __x,
long long __y) _NOEXCEPT {
return ::lldiv(__x, __y);
}
-#endif // _LIBCPP_HAS_NO_LONG_LONG
+#endif
#endif // _LIBCPP_MSVCRT / __sun__
} // extern "C++"
#endif // __cplusplus
diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf
index 385d48694580..db3078d809a5 100644
--- a/libcxx/include/streambuf
+++ b/libcxx/include/streambuf
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------- streambuf ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/string b/libcxx/include/string
index 4940021b0c68..313a7b5c2376 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- string -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -522,8 +522,8 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
#include <algorithm>
#include <compare>
#include <cstdio> // EOF
+#include <cstdlib>
#include <cstring>
-#include <cwchar>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
@@ -534,6 +534,10 @@ basic_string<char32_t> operator "" s( const char32_t *str, size_t len ); // C++1
#include <utility>
#include <version>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+# include <cwchar>
+#endif
+
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
# include <cstdint>
#endif
@@ -612,28 +616,14 @@ operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, _CharT __y);
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+<char, char_traits<char>, allocator<char> >(char const*, string const&))
template <bool>
-class _LIBCPP_TEMPLATE_VIS __basic_string_common
-{
-protected:
- _LIBCPP_NORETURN void __throw_length_error() const;
- _LIBCPP_NORETURN void __throw_out_of_range() const;
-};
-
-template <bool __b>
-void
-__basic_string_common<__b>::__throw_length_error() const
-{
- _VSTD::__throw_length_error("basic_string");
-}
+struct __basic_string_common;
-template <bool __b>
-void
-__basic_string_common<__b>::__throw_out_of_range() const
-{
- _VSTD::__throw_out_of_range("basic_string");
-}
-
-_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __basic_string_common<true>)
+template <>
+struct __basic_string_common<true> {
+ // Both are defined in string.cpp
+ _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_length_error() const;
+ _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_out_of_range() const;
+};
template <class _Iter>
struct __string_is_trivial_iterator : public false_type {};
@@ -674,7 +664,7 @@ typedef basic_string<char8_t> u8string;
#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
typedef basic_string<char16_t> u16string;
typedef basic_string<char32_t> u32string;
-#endif // _LIBCPP_HAS_NO_UNICODE_CHARS
+#endif
template<class _CharT, class _Traits, class _Allocator>
class
@@ -687,7 +677,7 @@ class
_LIBCPP_PREFERRED_NAME(u32string)
#endif
basic_string
- : private __basic_string_common<true>
+ : private __basic_string_common<true> // This base class is historical, but it needs to remain for ABI compatibility
{
public:
typedef basic_string __self;
@@ -831,7 +821,7 @@ public:
basic_string(basic_string&& __str, const allocator_type& __a);
#endif // _LIBCPP_CXX03_LANG
- template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
+ template <class = __enable_if_t<__is_allocator<_Allocator>::value, nullptr_t> >
_LIBCPP_INLINE_VISIBILITY
basic_string(const _CharT* __s) : __r_(__default_init_tag(), __default_init_tag()) {
_LIBCPP_ASSERT(__s != nullptr, "basic_string(const char*) detected nullptr");
@@ -841,7 +831,7 @@ public:
# endif
}
- template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
+ template <class = __enable_if_t<__is_allocator<_Allocator>::value, nullptr_t> >
_LIBCPP_INLINE_VISIBILITY
basic_string(const _CharT* __s, const _Allocator& __a);
@@ -856,7 +846,7 @@ public:
_LIBCPP_INLINE_VISIBILITY
basic_string(size_type __n, _CharT __c);
- template <class = _EnableIf<__is_allocator<_Allocator>::value, nullptr_t> >
+ template <class = __enable_if_t<__is_allocator<_Allocator>::value, nullptr_t> >
_LIBCPP_INLINE_VISIBILITY
basic_string(size_type __n, _CharT __c, const _Allocator& __a);
@@ -866,24 +856,24 @@ public:
basic_string(const basic_string& __str, size_type __pos,
const _Allocator& __a = _Allocator());
- template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
+ template<class _Tp, class = __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
basic_string(const _Tp& __t, size_type __pos, size_type __n,
const allocator_type& __a = allocator_type());
- template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
+ template<class _Tp, class = __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value &&
!__is_same_uncvref<_Tp, basic_string>::value> >
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
explicit basic_string(const _Tp& __t);
- template<class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
+ template<class _Tp, class = __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
explicit basic_string(const _Tp& __t, const allocator_type& __a);
- template<class _InputIterator, class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value> >
+ template<class _InputIterator, class = __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value> >
_LIBCPP_INLINE_VISIBILITY
basic_string(_InputIterator __first, _InputIterator __last);
- template<class _InputIterator, class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value> >
+ template<class _InputIterator, class = __enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value> >
_LIBCPP_INLINE_VISIBILITY
basic_string(_InputIterator __first, _InputIterator __last, const allocator_type& __a);
#ifndef _LIBCPP_CXX03_LANG
@@ -900,7 +890,7 @@ public:
basic_string& operator=(const basic_string& __str);
- template <class _Tp, class = _EnableIf<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
+ template <class _Tp, class = __enable_if_t<__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value> >
basic_string& operator=(const _Tp& __t)
{__self_view __sv = __t; return assign(__sv);}
@@ -1003,7 +993,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string >::value,
@@ -1021,7 +1011,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf<
+ __enable_if_t<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
@@ -1031,7 +1021,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string>::value,
@@ -1047,7 +1037,7 @@ public:
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
basic_string&
@@ -1060,7 +1050,7 @@ public:
}
template<class _ForwardIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string&
@@ -1083,7 +1073,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
@@ -1100,7 +1090,7 @@ public:
basic_string& assign(const basic_string& __str, size_type __pos, size_type __n=npos);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string>::value,
@@ -1112,7 +1102,7 @@ public:
basic_string& assign(size_type __n, value_type __c);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
basic_string&
@@ -1120,7 +1110,7 @@ public:
assign(_InputIterator __first, _InputIterator __last);
template<class _ForwardIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string&
@@ -1136,7 +1126,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
@@ -1146,7 +1136,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
@@ -1161,7 +1151,7 @@ public:
iterator insert(const_iterator __pos, size_type __n, value_type __c);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
iterator
@@ -1169,7 +1159,7 @@ public:
insert(const_iterator __pos, _InputIterator __first, _InputIterator __last);
template<class _ForwardIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
iterator
@@ -1192,7 +1182,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
@@ -1201,7 +1191,7 @@ public:
basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str, size_type __pos2, size_type __n2=npos);
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
basic_string&
@@ -1215,7 +1205,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
basic_string&
@@ -1230,7 +1220,7 @@ public:
basic_string& replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c);
template<class _InputIterator>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__is_cpp17_input_iterator<_InputIterator>::value,
basic_string&
@@ -1272,7 +1262,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
@@ -1288,7 +1278,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
@@ -1304,7 +1294,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
@@ -1321,7 +1311,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
@@ -1338,7 +1328,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
@@ -1355,7 +1345,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
size_type
@@ -1372,7 +1362,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
@@ -1381,7 +1371,7 @@ public:
template <class _Tp>
_LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
@@ -1394,7 +1384,7 @@ public:
template <class _Tp>
inline _LIBCPP_INLINE_VISIBILITY
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string>::value,
int
@@ -1405,28 +1395,28 @@ public:
int compare(size_type __pos1, size_type __n1, const value_type* __s, size_type __n2) const;
#if _LIBCPP_STD_VER > 17
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool starts_with(__self_view __sv) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool starts_with(__self_view __sv) const noexcept
{ return __self_view(data(), size()).starts_with(__sv); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool starts_with(value_type __c) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool starts_with(value_type __c) const noexcept
{ return !empty() && _Traits::eq(front(), __c); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool starts_with(const value_type* __s) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool starts_with(const value_type* __s) const noexcept
{ return starts_with(__self_view(__s)); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool ends_with(__self_view __sv) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool ends_with(__self_view __sv) const noexcept
{ return __self_view(data(), size()).ends_with( __sv); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool ends_with(value_type __c) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool ends_with(value_type __c) const noexcept
{ return !empty() && _Traits::eq(back(), __c); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool ends_with(const value_type* __s) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool ends_with(const value_type* __s) const noexcept
{ return ends_with(__self_view(__s)); }
#endif
@@ -1590,7 +1580,7 @@ private:
template <class _InputIterator>
inline
- _EnableIf
+ __enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value
>
@@ -1598,7 +1588,7 @@ private:
template <class _ForwardIterator>
inline
- _EnableIf
+ __enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value
>
@@ -1714,6 +1704,24 @@ private:
return data() <= __p && __p <= data() + size();
}
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_length_error() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __basic_string_common<true>::__throw_length_error();
+#else
+ _VSTD::abort();
+#endif
+ }
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_out_of_range() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __basic_string_common<true>::__throw_out_of_range();
+#else
+ _VSTD::abort();
+#endif
+ }
+
friend basic_string operator+<>(const basic_string&, const basic_string&);
friend basic_string operator+<>(const value_type*, const basic_string&);
friend basic_string operator+<>(value_type, const basic_string&);
@@ -1724,20 +1732,24 @@ private:
// These declarations must appear before any functions are implicitly used
// so that they have the correct visibility specifier.
#ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
-_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
-_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
+ _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
+# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
+# endif
#else
-_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
-_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
+ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, char)
+# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE, wchar_t)
+# endif
#endif
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _CharT = __iter_value_type<_InputIterator>,
class _Allocator = allocator<_CharT>,
- class = _EnableIf<__is_cpp17_input_iterator<_InputIterator>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>
>
basic_string(_InputIterator, _InputIterator, _Allocator = _Allocator())
-> basic_string<_CharT, char_traits<_CharT>, _Allocator>;
@@ -1745,7 +1757,7 @@ basic_string(_InputIterator, _InputIterator, _Allocator = _Allocator())
template<class _CharT,
class _Traits,
class _Allocator = allocator<_CharT>,
- class = _EnableIf<__is_allocator<_Allocator>::value>
+ class = enable_if_t<__is_allocator<_Allocator>::value>
>
explicit basic_string(basic_string_view<_CharT, _Traits>, const _Allocator& = _Allocator())
-> basic_string<_CharT, _Traits, _Allocator>;
@@ -1753,7 +1765,7 @@ explicit basic_string(basic_string_view<_CharT, _Traits>, const _Allocator& = _A
template<class _CharT,
class _Traits,
class _Allocator = allocator<_CharT>,
- class = _EnableIf<__is_allocator<_Allocator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>,
class _Sz = typename allocator_traits<_Allocator>::size_type
>
basic_string(basic_string_view<_CharT, _Traits>, _Sz, _Sz, const _Allocator& = _Allocator())
@@ -2117,7 +2129,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const _Tp & __t, const _
template <class _CharT, class _Traits, class _Allocator>
template <class _InputIterator>
-_EnableIf
+__enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value
>
@@ -2143,7 +2155,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(_InputIterator __first, _Input
template <class _CharT, class _Traits, class _Allocator>
template <class _ForwardIterator>
-_EnableIf
+__enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value
>
@@ -2354,7 +2366,7 @@ basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::assign(const value_type* __s, size_type __n)
{
_LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::assign received nullptr");
- return (_LIBCPP_BUILTIN_CONSTANT_P(__n) && __n < __min_cap)
+ return (__builtin_constant_p(__n) && __n < __min_cap)
? __assign_short(__s, __n)
: __assign_external(__s, __n);
}
@@ -2472,7 +2484,7 @@ basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str)
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator>
-_EnableIf
+__enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -2486,7 +2498,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(_InputIterator __first, _Input
template <class _CharT, class _Traits, class _Allocator>
template<class _ForwardIterator>
-_EnableIf
+__enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -2506,7 +2518,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(_ForwardIterator __first, _For
__grow_by(__cap, __n - __cap, __sz, 0, __sz);
}
pointer __p = __get_pointer();
- for (; __first != __last; ++__first, ++__p)
+ for (; __first != __last; ++__p, (void) ++__first)
traits_type::assign(*__p, *__first);
traits_type::assign(*__p, value_type());
__set_size(__n);
@@ -2532,7 +2544,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const basic_string& __str, siz
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
@@ -2559,7 +2571,7 @@ basic_string<_CharT, _Traits, _Allocator>&
basic_string<_CharT, _Traits, _Allocator>::assign(const value_type* __s)
{
_LIBCPP_ASSERT(__s != nullptr, "string::assign received nullptr");
- return _LIBCPP_BUILTIN_CONSTANT_P(*__s)
+ return __builtin_constant_p(*__s)
? (traits_type::length(__s) < __min_cap
? __assign_short(__s, traits_type::length(__s))
: __assign_external(__s, traits_type::length(__s)))
@@ -2646,7 +2658,7 @@ basic_string<_CharT, _Traits, _Allocator>::push_back(value_type __c)
if (__sz == __cap)
{
__grow_by(__cap, 1, __sz, __sz, 0);
- __is_short = !__is_long();
+ __is_short = false; // the string is always long after __grow_by
}
pointer __p;
if (__is_short)
@@ -2665,7 +2677,7 @@ basic_string<_CharT, _Traits, _Allocator>::push_back(value_type __c)
template <class _CharT, class _Traits, class _Allocator>
template<class _ForwardIterator>
-_EnableIf
+__enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -2684,7 +2696,7 @@ basic_string<_CharT, _Traits, _Allocator>::append(
if (__cap - __sz < __n)
__grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
pointer __p = __get_pointer() + __sz;
- for (; __first != __last; ++__p, ++__first)
+ for (; __first != __last; ++__p, (void) ++__first)
traits_type::assign(*__p, *__first);
traits_type::assign(*__p, value_type());
__set_size(__sz + __n);
@@ -2718,7 +2730,7 @@ basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str, siz
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
- _EnableIf
+ __enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -2807,7 +2819,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, size_type __n
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator>
-_EnableIf
+__enable_if_t
<
__is_exactly_cpp17_input_iterator<_InputIterator>::value,
typename basic_string<_CharT, _Traits, _Allocator>::iterator
@@ -2825,7 +2837,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _InputIt
template <class _CharT, class _Traits, class _Allocator>
template<class _ForwardIterator>
-_EnableIf
+__enable_if_t
<
__is_cpp17_forward_iterator<_ForwardIterator>::value,
typename basic_string<_CharT, _Traits, _Allocator>::iterator
@@ -2862,7 +2874,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _Forward
__sz += __n;
__set_size(__sz);
traits_type::assign(__p[__sz], value_type());
- for (__p += __ip; __first != __last; ++__p, ++__first)
+ for (__p += __ip; __first != __last; ++__p, (void) ++__first)
traits_type::assign(*__p, *__first);
}
else
@@ -2895,7 +2907,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -3052,7 +3064,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
template <class _CharT, class _Traits, class _Allocator>
template<class _InputIterator>
-_EnableIf
+__enable_if_t
<
__is_cpp17_input_iterator<_InputIterator>::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -3085,7 +3097,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type _
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value && !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
basic_string<_CharT, _Traits, _Allocator>&
@@ -3544,7 +3556,7 @@ basic_string<_CharT, _Traits, _Allocator>::find(const basic_string& __str,
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
@@ -3602,7 +3614,7 @@ basic_string<_CharT, _Traits, _Allocator>::rfind(const basic_string& __str,
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
@@ -3660,7 +3672,7 @@ basic_string<_CharT, _Traits, _Allocator>::find_first_of(const basic_string& __s
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
@@ -3718,7 +3730,7 @@ basic_string<_CharT, _Traits, _Allocator>::find_last_of(const basic_string& __st
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
@@ -3776,7 +3788,7 @@ basic_string<_CharT, _Traits, _Allocator>::find_first_not_of(const basic_string&
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
@@ -3835,7 +3847,7 @@ basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(const basic_string&
template<class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
typename basic_string<_CharT, _Traits, _Allocator>::size_type
@@ -3873,7 +3885,7 @@ basic_string<_CharT, _Traits, _Allocator>::find_last_not_of(value_type __c,
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
@@ -3927,7 +3939,7 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value,
int
@@ -3952,7 +3964,7 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
template <class _CharT, class _Traits, class _Allocator>
template <class _Tp>
-_EnableIf
+__enable_if_t
<
__can_be_converted_to_string_view<_CharT, _Traits, _Tp>::value
&& !__is_same_uncvref<_Tp, basic_string<_CharT, _Traits, _Allocator> >::value,
@@ -4390,6 +4402,7 @@ _LIBCPP_FUNC_VIS string to_string(float __val);
_LIBCPP_FUNC_VIS string to_string(double __val);
_LIBCPP_FUNC_VIS string to_string(long double __val);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_FUNC_VIS int stoi (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS long stol (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
_LIBCPP_FUNC_VIS unsigned long stoul (const wstring& __str, size_t* __idx = nullptr, int __base = 10);
@@ -4409,6 +4422,7 @@ _LIBCPP_FUNC_VIS wstring to_wstring(unsigned long long __val);
_LIBCPP_FUNC_VIS wstring to_wstring(float __val);
_LIBCPP_FUNC_VIS wstring to_wstring(double __val);
_LIBCPP_FUNC_VIS wstring to_wstring(long double __val);
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template<class _CharT, class _Traits, class _Allocator>
_LIBCPP_TEMPLATE_DATA_VIS
@@ -4530,11 +4544,13 @@ inline namespace literals
return basic_string<char> (__str, __len);
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
inline _LIBCPP_INLINE_VISIBILITY
basic_string<wchar_t> operator "" s( const wchar_t *__str, size_t __len )
{
return basic_string<wchar_t> (__str, __len);
}
+#endif
#ifndef _LIBCPP_HAS_NO_CHAR8_T
inline _LIBCPP_INLINE_VISIBILITY
diff --git a/libcxx/include/string.h b/libcxx/include/string.h
index 2d563444b7f9..e5a8cfaf1928 100644
--- a/libcxx/include/string.h
+++ b/libcxx/include/string.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- string.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index 2c94cb85269f..0ad7dcce9848 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ string_view ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -85,6 +85,8 @@ namespace std {
constexpr basic_string_view(const charT* str);
basic_string_view(nullptr_t) = delete; // C++2b
constexpr basic_string_view(const charT* str, size_type len);
+ template <class It, class End>
+ constexpr basic_string_view(It begin, End end); // C++20
// 7.4, basic_string_view iterator support
constexpr const_iterator begin() const noexcept;
@@ -166,6 +168,10 @@ namespace std {
size_type size_; // exposition only
};
+ // basic_string_view deduction guides
+ template<class It, class End>
+ basic_string_view(It, End) -> basic_string_view<iter_value_t<It>>; // C++20
+
// 7.11, Hash support
template <class T> struct hash;
template <> struct hash<string_view>;
@@ -185,6 +191,8 @@ namespace std {
*/
+#include <__concepts/convertible_to.h>
+#include <__concepts/same_as.h>
#include <__config>
#include <__debug>
#include <__ranges/enable_borrowed_range.h>
@@ -217,7 +225,9 @@ typedef basic_string_view<char8_t> u8string_view;
#endif
typedef basic_string_view<char16_t> u16string_view;
typedef basic_string_view<char32_t> u32string_view;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
typedef basic_string_view<wchar_t> wstring_view;
+#endif
template<class _CharT, class _Traits>
class
@@ -227,7 +237,7 @@ class
#endif
_LIBCPP_PREFERRED_NAME(u16string_view)
_LIBCPP_PREFERRED_NAME(u32string_view)
- _LIBCPP_PREFERRED_NAME(wstring_view)
+ _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wstring_view))
basic_string_view {
public:
// types
@@ -255,10 +265,10 @@ public:
_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
basic_string_view() _NOEXCEPT : __data (nullptr), __size(0) {}
- _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
+ _LIBCPP_INLINE_VISIBILITY
basic_string_view(const basic_string_view&) _NOEXCEPT = default;
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
+ _LIBCPP_INLINE_VISIBILITY
basic_string_view& operator=(const basic_string_view&) _NOEXCEPT = default;
_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
@@ -270,6 +280,16 @@ public:
#endif
}
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
+ template <contiguous_iterator _It, sized_sentinel_for<_It> _End>
+ requires (same_as<iter_value_t<_It>, _CharT> && !convertible_to<_End, size_type>)
+ constexpr _LIBCPP_HIDE_FROM_ABI basic_string_view(_It __begin, _End __end)
+ : __data(_VSTD::to_address(__begin)), __size(__end - __begin)
+ {
+ _LIBCPP_ASSERT((__end - __begin) >= 0, "std::string_view::string_view(iterator, sentinel) received invalid range");
+ }
+#endif
+
_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
basic_string_view(const _CharT* __s)
: __data(__s), __size(_VSTD::__char_traits_length_checked<_Traits>(__s)) {}
@@ -618,28 +638,28 @@ public:
}
#if _LIBCPP_STD_VER > 17
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool starts_with(basic_string_view __s) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool starts_with(basic_string_view __s) const noexcept
{ return size() >= __s.size() && compare(0, __s.size(), __s) == 0; }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool starts_with(value_type __c) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool starts_with(value_type __c) const noexcept
{ return !empty() && _Traits::eq(front(), __c); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool starts_with(const value_type* __s) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool starts_with(const value_type* __s) const noexcept
{ return starts_with(basic_string_view(__s)); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool ends_with(basic_string_view __s) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool ends_with(basic_string_view __s) const noexcept
{ return size() >= __s.size() && compare(size() - __s.size(), npos, __s) == 0; }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool ends_with(value_type __c) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool ends_with(value_type __c) const noexcept
{ return !empty() && _Traits::eq(back(), __c); }
- _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
- bool ends_with(const value_type* __s) const _NOEXCEPT
+ constexpr _LIBCPP_INLINE_VISIBILITY
+ bool ends_with(const value_type* __s) const noexcept
{ return ends_with(basic_string_view(__s)); }
#endif
@@ -670,6 +690,13 @@ template <class _CharT, class _Traits>
inline constexpr bool ranges::enable_borrowed_range<basic_string_view<_CharT, _Traits> > = true;
#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
+// [string.view.deduct]
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
+template <contiguous_iterator _It, sized_sentinel_for<_It> _End>
+ basic_string_view(_It, _End) -> basic_string_view<iter_value_t<_It>>;
+#endif
+
// [string.view.comparison]
// operator ==
template<class _CharT, class _Traits>
@@ -860,11 +887,13 @@ inline namespace literals
return basic_string_view<char> (__str, __len);
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
basic_string_view<wchar_t> operator "" sv(const wchar_t *__str, size_t __len) _NOEXCEPT
{
return basic_string_view<wchar_t> (__str, __len);
}
+#endif
#ifndef _LIBCPP_HAS_NO_CHAR8_T
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
diff --git a/libcxx/include/strstream b/libcxx/include/strstream
index ca837aef670f..a5f17a9dc319 100644
--- a/libcxx/include/strstream
+++ b/libcxx/include/strstream
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- strstream --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/system_error b/libcxx/include/system_error
index aab97681156c..3aa869acff41 100644
--- a/libcxx/include/system_error
+++ b/libcxx/include/system_error
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===---------------------------- system_error ----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -46,10 +46,10 @@ template <class T> struct is_error_condition_enum
: public false_type {};
template <class _Tp>
-inline constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value; // C++17
+inline constexpr bool is_error_condition_enum_v = is_error_condition_enum<_Tp>::value; // C++17
template <class _Tp>
-inline constexpr size_t is_error_code_enum_v = is_error_code_enum<_Tp>::value; // C++17
+inline constexpr bool is_error_code_enum_v = is_error_code_enum<_Tp>::value; // C++17
class error_code
{
@@ -165,7 +165,7 @@ struct _LIBCPP_TEMPLATE_VIS is_error_code_enum
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t is_error_code_enum_v = is_error_code_enum<_Tp>::value;
+inline constexpr bool is_error_code_enum_v = is_error_code_enum<_Tp>::value;
#endif
// is_error_condition_enum
@@ -176,7 +176,7 @@ struct _LIBCPP_TEMPLATE_VIS is_error_condition_enum
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value;
+inline constexpr bool is_error_condition_enum_v = is_error_condition_enum<_Tp>::value;
#endif
template <>
diff --git a/libcxx/include/tgmath.h b/libcxx/include/tgmath.h
index ba9396e6abb7..412bde1d5eaf 100644
--- a/libcxx/include/tgmath.h
+++ b/libcxx/include/tgmath.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- tgmath.h ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/thread b/libcxx/include/thread
index acfc20bce2dd..a51a11c0d3c8 100644
--- a/libcxx/include/thread
+++ b/libcxx/include/thread
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- thread -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -86,8 +86,9 @@ void sleep_for(const chrono::duration<Rep, Period>& rel_time);
#include <__debug>
#include <__functional_base>
#include <__mutex_base>
+#include <__thread/poll_with_backoff.h>
#include <__threading_support>
-#include <__utility/__decay_copy.h>
+#include <__utility/decay_copy.h>
#include <__utility/forward.h>
#include <chrono>
#include <cstddef>
diff --git a/libcxx/include/tuple b/libcxx/include/tuple
index e1019ef999d5..2e3d19627fab 100644
--- a/libcxx/include/tuple
+++ b/libcxx/include/tuple
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- tuple ------------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -132,11 +132,14 @@ template <class T1, class... T>
// 20.4.1.6, relational operators:
template<class... T, class... U> bool operator==(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14
-template<class... T, class... U> bool operator<(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14
-template<class... T, class... U> bool operator!=(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14
-template<class... T, class... U> bool operator>(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14
-template<class... T, class... U> bool operator<=(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14
-template<class... T, class... U> bool operator>=(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14
+template<class... T, class... U> bool operator<(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14, removed in C++20
+template<class... T, class... U> bool operator!=(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14, removed in C++20
+template<class... T, class... U> bool operator>(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14, removed in C++20
+template<class... T, class... U> bool operator<=(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14, removed in C++20
+template<class... T, class... U> bool operator>=(const tuple<T...>&, const tuple<U...>&); // constexpr in C++14, removed in C++20
+template<class... T, class... U>
+ constexpr common_comparison_category_t<synth-three-way-result<T, U>...>
+ operator<=>(const tuple<T...>&, const tuple<U...>&); // since C++20
template <class... Types, class Alloc>
struct uses_allocator<tuple<Types...>, Alloc>;
@@ -149,6 +152,8 @@ template <class... Types>
*/
+#include <__compare/common_comparison_category.h>
+#include <__compare/synth_three_way.h>
#include <__config>
#include <__functional/unwrap_ref.h>
#include <__functional_base>
@@ -156,6 +161,7 @@ template <class... Types>
#include <__memory/uses_allocator.h>
#include <__tuple>
#include <__utility/forward.h>
+#include <__utility/integer_sequence.h>
#include <__utility/move.h>
#include <compare>
#include <cstddef>
@@ -231,7 +237,7 @@ public:
"Attempted to default construct a reference element in a tuple");}
template <class _Tp,
- class = _EnableIf<
+ class = __enable_if_t<
_And<
_IsNotSame<__uncvref_t<_Tp>, __tuple_leaf>,
is_constructible<_Hp, _Tp>
@@ -304,7 +310,7 @@ public:
: _Hp(__a) {}
template <class _Tp,
- class = _EnableIf<
+ class = __enable_if_t<
_And<
_IsNotSame<__uncvref_t<_Tp>, __tuple_leaf>,
is_constructible<_Hp, _Tp>
@@ -466,7 +472,7 @@ public:
// [tuple.cnstr]
// tuple() constructors (including allocator_arg_t variants)
- template <template<class...> class _IsImpDefault = __is_implicitly_default_constructible, _EnableIf<
+ template <template<class...> class _IsImpDefault = __is_implicitly_default_constructible, __enable_if_t<
_And<
_IsImpDefault<_Tp>... // explicit check
>::value
@@ -477,7 +483,7 @@ public:
{ }
template <template<class...> class _IsImpDefault = __is_implicitly_default_constructible,
- template<class...> class _IsDefault = is_default_constructible, _EnableIf<
+ template<class...> class _IsDefault = is_default_constructible, __enable_if_t<
_And<
_IsDefault<_Tp>...,
_Not<_Lazy<_And, _IsImpDefault<_Tp>...> > // explicit check
@@ -488,7 +494,7 @@ public:
_NOEXCEPT_(_And<is_nothrow_default_constructible<_Tp>...>::value)
{ }
- template <class _Alloc, template<class...> class _IsImpDefault = __is_implicitly_default_constructible, _EnableIf<
+ template <class _Alloc, template<class...> class _IsImpDefault = __is_implicitly_default_constructible, __enable_if_t<
_And<
_IsImpDefault<_Tp>... // explicit check
>::value
@@ -502,7 +508,7 @@ public:
template <class _Alloc,
template<class...> class _IsImpDefault = __is_implicitly_default_constructible,
- template<class...> class _IsDefault = is_default_constructible, _EnableIf<
+ template<class...> class _IsDefault = is_default_constructible, __enable_if_t<
_And<
_IsDefault<_Tp>...,
_Not<_Lazy<_And, _IsImpDefault<_Tp>...> > // explicit check
@@ -516,7 +522,7 @@ public:
__tuple_types<_Tp...>()) {}
// tuple(const T&...) constructors (including allocator_arg_t variants)
- template <template<class...> class _And = _And, _EnableIf<
+ template <template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) >= 1>,
is_copy_constructible<_Tp>...,
@@ -533,7 +539,7 @@ public:
__t...
) {}
- template <template<class...> class _And = _And, _EnableIf<
+ template <template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) >= 1>,
is_copy_constructible<_Tp>...,
@@ -550,7 +556,7 @@ public:
__t...
) {}
- template <class _Alloc, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) >= 1>,
is_copy_constructible<_Tp>...,
@@ -567,7 +573,7 @@ public:
__t...
) {}
- template <class _Alloc, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) >= 1>,
is_copy_constructible<_Tp>...,
@@ -595,7 +601,7 @@ public:
is_constructible<_Tp, _Up>...
> { };
- template <class ..._Up, _EnableIf<
+ template <class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableUTypesCtor<_Up...>,
@@ -611,7 +617,7 @@ public:
typename __make_tuple_types<tuple, sizeof...(_Tp), sizeof...(_Up)>::type(),
_VSTD::forward<_Up>(__u)...) {}
- template <class ..._Up, _EnableIf<
+ template <class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableUTypesCtor<_Up...>,
@@ -627,7 +633,7 @@ public:
typename __make_tuple_types<tuple, sizeof...(_Tp), sizeof...(_Up)>::type(),
_VSTD::forward<_Up>(__u)...) {}
- template <class _Alloc, class ..._Up, _EnableIf<
+ template <class _Alloc, class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableUTypesCtor<_Up...>,
@@ -643,7 +649,7 @@ public:
typename __make_tuple_types<tuple, sizeof...(_Tp), sizeof...(_Up)>::type(),
_VSTD::forward<_Up>(__u)...) {}
- template <class _Alloc, class ..._Up, _EnableIf<
+ template <class _Alloc, class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableUTypesCtor<_Up...>,
@@ -663,14 +669,14 @@ public:
tuple(const tuple&) = default;
tuple(tuple&&) = default;
- template <class _Alloc, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, template<class...> class _And = _And, __enable_if_t<
_And<is_copy_constructible<_Tp>...>::value
, int> = 0>
tuple(allocator_arg_t, const _Alloc& __alloc, const tuple& __t)
: __base_(allocator_arg_t(), __alloc, __t)
{ }
- template <class _Alloc, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, template<class...> class _And = _And, __enable_if_t<
_And<is_move_constructible<_Tp>...>::value
, int> = 0>
tuple(allocator_arg_t, const _Alloc& __alloc, tuple&& __t)
@@ -693,7 +699,7 @@ public:
is_constructible<_Tp, const _Up&>...
> { };
- template <class ..._Up, _EnableIf<
+ template <class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableCopyFromOtherTuple<_Up...>,
@@ -706,7 +712,7 @@ public:
: __base_(__t)
{ }
- template <class ..._Up, _EnableIf<
+ template <class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableCopyFromOtherTuple<_Up...>,
@@ -719,7 +725,7 @@ public:
: __base_(__t)
{ }
- template <class ..._Up, class _Alloc, _EnableIf<
+ template <class ..._Up, class _Alloc, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableCopyFromOtherTuple<_Up...>,
@@ -731,7 +737,7 @@ public:
: __base_(allocator_arg_t(), __a, __t)
{ }
- template <class ..._Up, class _Alloc, _EnableIf<
+ template <class ..._Up, class _Alloc, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableCopyFromOtherTuple<_Up...>,
@@ -759,7 +765,7 @@ public:
is_constructible<_Tp, _Up>...
> { };
- template <class ..._Up, _EnableIf<
+ template <class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableMoveFromOtherTuple<_Up...>,
@@ -772,7 +778,7 @@ public:
: __base_(_VSTD::move(__t))
{ }
- template <class ..._Up, _EnableIf<
+ template <class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableMoveFromOtherTuple<_Up...>,
@@ -785,7 +791,7 @@ public:
: __base_(_VSTD::move(__t))
{ }
- template <class _Alloc, class ..._Up, _EnableIf<
+ template <class _Alloc, class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableMoveFromOtherTuple<_Up...>,
@@ -797,7 +803,7 @@ public:
: __base_(allocator_arg_t(), __a, _VSTD::move(__t))
{ }
- template <class _Alloc, class ..._Up, _EnableIf<
+ template <class _Alloc, class ..._Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Up) == sizeof...(_Tp)>,
_EnableMoveFromOtherTuple<_Up...>,
@@ -826,7 +832,7 @@ public:
_Not<is_convertible<const _Up2&, _SecondType<_DependentTp...> > >
> { };
- template <class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableImplicitCopyFromPair<_Up1, _Up2, _Tp...>
@@ -841,7 +847,7 @@ public:
: __base_(__p)
{ }
- template <class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableExplicitCopyFromPair<_Up1, _Up2, _Tp...>
@@ -856,7 +862,7 @@ public:
: __base_(__p)
{ }
- template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableImplicitCopyFromPair<_Up1, _Up2, _Tp...>
@@ -867,7 +873,7 @@ public:
: __base_(allocator_arg_t(), __a, __p)
{ }
- template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableExplicitCopyFromPair<_Up1, _Up2, _Tp...>
@@ -895,7 +901,7 @@ public:
_Not<is_convertible<_Up2, _SecondType<_DependentTp...> > >
> { };
- template <class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableImplicitMoveFromPair<_Up1, _Up2, _Tp...>
@@ -910,7 +916,7 @@ public:
: __base_(_VSTD::move(__p))
{ }
- template <class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableExplicitMoveFromPair<_Up1, _Up2, _Tp...>
@@ -925,7 +931,7 @@ public:
: __base_(_VSTD::move(__p))
{ }
- template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableImplicitMoveFromPair<_Up1, _Up2, _Tp...>
@@ -936,7 +942,7 @@ public:
: __base_(allocator_arg_t(), __a, _VSTD::move(__p))
{ }
- template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, _EnableIf<
+ template <class _Alloc, class _Up1, class _Up2, template<class...> class _And = _And, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == 2>,
_EnableExplicitMoveFromPair<_Up1, _Up2, _Tp...>
@@ -967,7 +973,7 @@ public:
return *this;
}
- template<class... _Up, _EnableIf<
+ template<class... _Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == sizeof...(_Up)>,
is_assignable<_Tp&, _Up const&>...
@@ -982,7 +988,7 @@ public:
return *this;
}
- template<class... _Up, _EnableIf<
+ template<class... _Up, __enable_if_t<
_And<
_BoolConstant<sizeof...(_Tp) == sizeof...(_Up)>,
is_assignable<_Tp&, _Up>...
@@ -998,7 +1004,7 @@ public:
return *this;
}
- template<class _Up1, class _Up2, class _Dep = true_type, _EnableIf<
+ template<class _Up1, class _Up2, class _Dep = true_type, __enable_if_t<
_And<_Dep,
_BoolConstant<sizeof...(_Tp) == 2>,
is_assignable<_FirstType<_Tp..., _Dep>&, _Up1 const&>,
@@ -1017,7 +1023,7 @@ public:
return *this;
}
- template<class _Up1, class _Up2, class _Dep = true_type, _EnableIf<
+ template<class _Up1, class _Up2, class _Dep = true_type, __enable_if_t<
_And<_Dep,
_BoolConstant<sizeof...(_Tp) == 2>,
is_assignable<_FirstType<_Tp..., _Dep>&, _Up1>,
@@ -1037,7 +1043,7 @@ public:
}
// EXTENSION
- template<class _Up, size_t _Np, class = _EnableIf<
+ template<class _Up, size_t _Np, class = __enable_if_t<
_And<
_BoolConstant<_Np == sizeof...(_Tp)>,
is_assignable<_Tp&, _Up const&>...
@@ -1053,7 +1059,7 @@ public:
}
// EXTENSION
- template<class _Up, size_t _Np, class = void, class = _EnableIf<
+ template<class _Up, size_t _Np, class = void, class = __enable_if_t<
_And<
_BoolConstant<_Np == sizeof...(_Tp)>,
is_assignable<_Tp&, _Up>...
@@ -1097,7 +1103,7 @@ public:
void swap(tuple&) _NOEXCEPT {}
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template <class ..._Tp>
tuple(_Tp...) -> tuple<_Tp...>;
template <class _Tp1, class _Tp2>
@@ -1128,7 +1134,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
typename tuple_element<_Ip, tuple<_Tp...> >::type&
get(tuple<_Tp...>& __t) _NOEXCEPT
{
- typedef _LIBCPP_NODEBUG_TYPE typename tuple_element<_Ip, tuple<_Tp...> >::type type;
+ typedef _LIBCPP_NODEBUG typename tuple_element<_Ip, tuple<_Tp...> >::type type;
return static_cast<__tuple_leaf<_Ip, type>&>(__t.__base_).get();
}
@@ -1137,7 +1143,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
const typename tuple_element<_Ip, tuple<_Tp...> >::type&
get(const tuple<_Tp...>& __t) _NOEXCEPT
{
- typedef _LIBCPP_NODEBUG_TYPE typename tuple_element<_Ip, tuple<_Tp...> >::type type;
+ typedef _LIBCPP_NODEBUG typename tuple_element<_Ip, tuple<_Tp...> >::type type;
return static_cast<const __tuple_leaf<_Ip, type>&>(__t.__base_).get();
}
@@ -1146,7 +1152,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
typename tuple_element<_Ip, tuple<_Tp...> >::type&&
get(tuple<_Tp...>&& __t) _NOEXCEPT
{
- typedef _LIBCPP_NODEBUG_TYPE typename tuple_element<_Ip, tuple<_Tp...> >::type type;
+ typedef _LIBCPP_NODEBUG typename tuple_element<_Ip, tuple<_Tp...> >::type type;
return static_cast<type&&>(
static_cast<__tuple_leaf<_Ip, type>&&>(__t.__base_).get());
}
@@ -1156,7 +1162,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
const typename tuple_element<_Ip, tuple<_Tp...> >::type&&
get(const tuple<_Tp...>&& __t) _NOEXCEPT
{
- typedef _LIBCPP_NODEBUG_TYPE typename tuple_element<_Ip, tuple<_Tp...> >::type type;
+ typedef _LIBCPP_NODEBUG typename tuple_element<_Ip, tuple<_Tp...> >::type type;
return static_cast<const type&&>(
static_cast<const __tuple_leaf<_Ip, type>&&>(__t.__base_).get());
}
@@ -1250,7 +1256,7 @@ struct __ignore_t
};
namespace {
- _LIBCPP_INLINE_VAR constexpr __ignore_t<unsigned char> ignore = __ignore_t<unsigned char>();
+ constexpr __ignore_t<unsigned char> ignore = __ignore_t<unsigned char>();
}
template <class... _Tp>
@@ -1300,6 +1306,30 @@ operator==(const tuple<_Tp...>& __x, const tuple<_Up...>& __y)
return __tuple_equal<sizeof...(_Tp)>()(__x, __y);
}
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// operator<=>
+
+template <class ..._Tp, class ..._Up, size_t ..._Is>
+_LIBCPP_HIDE_FROM_ABI constexpr
+auto
+__tuple_compare_three_way(const tuple<_Tp...>& __x, const tuple<_Up...>& __y, index_sequence<_Is...>) {
+ common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...> __result = strong_ordering::equal;
+ static_cast<void>(((__result = _VSTD::__synth_three_way(_VSTD::get<_Is>(__x), _VSTD::get<_Is>(__y)), __result != 0) || ...));
+ return __result;
+}
+
+template <class ..._Tp, class ..._Up>
+requires (sizeof...(_Tp) == sizeof...(_Up))
+_LIBCPP_HIDE_FROM_ABI constexpr
+common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...>
+operator<=>(const tuple<_Tp...>& __x, const tuple<_Up...>& __y)
+{
+ return _VSTD::__tuple_compare_three_way(__x, __y, index_sequence_for<_Tp...>{});
+}
+
+#else // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
template <class ..._Tp, class ..._Up>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
bool
@@ -1368,6 +1398,8 @@ operator<=(const tuple<_Tp...>& __x, const tuple<_Up...>& __y)
return !(__y < __x);
}
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
// tuple_cat
template <class _Tp, class _Up> struct __tuple_cat_type;
@@ -1375,7 +1407,7 @@ template <class _Tp, class _Up> struct __tuple_cat_type;
template <class ..._Ttypes, class ..._Utypes>
struct __tuple_cat_type<tuple<_Ttypes...>, __tuple_types<_Utypes...> >
{
- typedef _LIBCPP_NODEBUG_TYPE tuple<_Ttypes..., _Utypes...> type;
+ typedef _LIBCPP_NODEBUG tuple<_Ttypes..., _Utypes...> type;
};
template <class _ResultTuple, bool _Is_Tuple0TupleLike, class ..._Tuples>
@@ -1386,7 +1418,7 @@ struct __tuple_cat_return_1
template <class ..._Types, class _Tuple0>
struct __tuple_cat_return_1<tuple<_Types...>, true, _Tuple0>
{
- typedef _LIBCPP_NODEBUG_TYPE typename __tuple_cat_type<tuple<_Types...>,
+ typedef _LIBCPP_NODEBUG typename __tuple_cat_type<tuple<_Types...>,
typename __make_tuple_types<typename __uncvref<_Tuple0>::type>::type>::type
type;
};
@@ -1416,7 +1448,7 @@ struct __tuple_cat_return<_Tuple0, _Tuples...>
template <>
struct __tuple_cat_return<>
{
- typedef _LIBCPP_NODEBUG_TYPE tuple<> type;
+ typedef _LIBCPP_NODEBUG tuple<> type;
};
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
@@ -1432,7 +1464,7 @@ struct __tuple_cat_return_ref_imp;
template <class ..._Types, size_t ..._I0, class _Tuple0>
struct __tuple_cat_return_ref_imp<tuple<_Types...>, __tuple_indices<_I0...>, _Tuple0>
{
- typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tuple0>::type _T0;
+ typedef _LIBCPP_NODEBUG typename remove_reference<_Tuple0>::type _T0;
typedef tuple<_Types..., typename __apply_cv<_Tuple0,
typename tuple_element<_I0, _T0>::type>::type&&...> type;
};
@@ -1480,8 +1512,8 @@ struct __tuple_cat<tuple<_Types...>, __tuple_indices<_I0...>, __tuple_indices<_J
typename __tuple_cat_return_ref<tuple<_Types...>&&, _Tuple0&&, _Tuple1&&, _Tuples&&...>::type
operator()(tuple<_Types...> __t, _Tuple0&& __t0, _Tuple1&& __t1, _Tuples&& ...__tpls)
{
- typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tuple0>::type _T0;
- typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tuple1>::type _T1;
+ typedef _LIBCPP_NODEBUG typename remove_reference<_Tuple0>::type _T0;
+ typedef _LIBCPP_NODEBUG typename remove_reference<_Tuple1>::type _T1;
return __tuple_cat<
tuple<_Types...,
typename __apply_cv<_Tuple0, typename tuple_element<
@@ -1501,7 +1533,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
typename __tuple_cat_return<_Tuple0, _Tuples...>::type
tuple_cat(_Tuple0&& __t0, _Tuples&&... __tpls)
{
- typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tuple0>::type _T0;
+ typedef _LIBCPP_NODEBUG typename remove_reference<_Tuple0>::type _T0;
return __tuple_cat<tuple<>, __tuple_indices<>,
typename __make_tuple_indices<tuple_size<_T0>::value>::type>()
(tuple<>(), _VSTD::forward<_Tuple0>(__t0),
@@ -1525,7 +1557,7 @@ pair<_T1, _T2>::pair(piecewise_construct_t,
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
+inline constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
#define _LIBCPP_NOEXCEPT_RETURN(...) noexcept(noexcept(__VA_ARGS__)) { return __VA_ARGS__; }
diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits
index b15c7a2a5f35..e9d5e06f36dc 100644
--- a/libcxx/include/type_traits
+++ b/libcxx/include/type_traits
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------ type_traits ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -455,58 +455,63 @@ using bool_constant = integral_constant<bool, __b>;
#define _LIBCPP_BOOL_CONSTANT(__b) integral_constant<bool,(__b)>
#endif
+template <bool, class _Tp = void> struct _LIBCPP_TEMPLATE_VIS enable_if {};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS enable_if<true, _Tp> {typedef _Tp type;};
+
+template <bool _Bp, class _Tp = void> using __enable_if_t _LIBCPP_NODEBUG = typename enable_if<_Bp, _Tp>::type;
+
+#if _LIBCPP_STD_VER > 11
+template <bool _Bp, class _Tp = void> using enable_if_t = typename enable_if<_Bp, _Tp>::type;
+#endif
+
typedef _LIBCPP_BOOL_CONSTANT(true) true_type;
typedef _LIBCPP_BOOL_CONSTANT(false) false_type;
template <bool _Val>
-using _BoolConstant _LIBCPP_NODEBUG_TYPE = integral_constant<bool, _Val>;
+using _BoolConstant _LIBCPP_NODEBUG = integral_constant<bool, _Val>;
template <bool> struct _MetaBase;
template <>
struct _MetaBase<true> {
template <class _Tp, class _Up>
- using _SelectImpl _LIBCPP_NODEBUG_TYPE = _Tp;
+ using _SelectImpl _LIBCPP_NODEBUG = _Tp;
template <template <class...> class _FirstFn, template <class...> class, class ..._Args>
- using _SelectApplyImpl _LIBCPP_NODEBUG_TYPE = _FirstFn<_Args...>;
+ using _SelectApplyImpl _LIBCPP_NODEBUG = _FirstFn<_Args...>;
template <class _First, class...>
- using _FirstImpl _LIBCPP_NODEBUG_TYPE = _First;
+ using _FirstImpl _LIBCPP_NODEBUG = _First;
template <class, class _Second, class...>
- using _SecondImpl _LIBCPP_NODEBUG_TYPE = _Second;
- template <class _Tp = void>
- using _EnableIfImpl _LIBCPP_NODEBUG_TYPE = _Tp;
+ using _SecondImpl _LIBCPP_NODEBUG = _Second;
template <class _Result, class _First, class ..._Rest>
- using _OrImpl _LIBCPP_NODEBUG_TYPE = typename _MetaBase<_First::value != true && sizeof...(_Rest) != 0>::template _OrImpl<_First, _Rest...>;
+ using _OrImpl _LIBCPP_NODEBUG = typename _MetaBase<_First::value != true && sizeof...(_Rest) != 0>::template _OrImpl<_First, _Rest...>;
};
template <>
struct _MetaBase<false> {
template <class _Tp, class _Up>
- using _SelectImpl _LIBCPP_NODEBUG_TYPE = _Up;
+ using _SelectImpl _LIBCPP_NODEBUG = _Up;
template <template <class...> class, template <class...> class _SecondFn, class ..._Args>
- using _SelectApplyImpl _LIBCPP_NODEBUG_TYPE = _SecondFn<_Args...>;
+ using _SelectApplyImpl _LIBCPP_NODEBUG = _SecondFn<_Args...>;
template <class _Result, class ...>
- using _OrImpl _LIBCPP_NODEBUG_TYPE = _Result;
+ using _OrImpl _LIBCPP_NODEBUG = _Result;
};
-template <bool _Cond, class _Ret = void>
-using _EnableIf _LIBCPP_NODEBUG_TYPE = typename _MetaBase<_Cond>::template _EnableIfImpl<_Ret>;
template <bool _Cond, class _IfRes, class _ElseRes>
-using _If _LIBCPP_NODEBUG_TYPE = typename _MetaBase<_Cond>::template _SelectImpl<_IfRes, _ElseRes>;
+using _If _LIBCPP_NODEBUG = typename _MetaBase<_Cond>::template _SelectImpl<_IfRes, _ElseRes>;
template <class ..._Rest>
-using _Or _LIBCPP_NODEBUG_TYPE = typename _MetaBase< sizeof...(_Rest) != 0 >::template _OrImpl<false_type, _Rest...>;
+using _Or _LIBCPP_NODEBUG = typename _MetaBase< sizeof...(_Rest) != 0 >::template _OrImpl<false_type, _Rest...>;
template <class _Pred>
struct _Not : _BoolConstant<!_Pred::value> {};
template <class ..._Args>
-using _FirstType _LIBCPP_NODEBUG_TYPE = typename _MetaBase<(sizeof...(_Args) >= 1)>::template _FirstImpl<_Args...>;
+using _FirstType _LIBCPP_NODEBUG = typename _MetaBase<(sizeof...(_Args) >= 1)>::template _FirstImpl<_Args...>;
template <class ..._Args>
-using _SecondType _LIBCPP_NODEBUG_TYPE = typename _MetaBase<(sizeof...(_Args) >= 2)>::template _SecondImpl<_Args...>;
+using _SecondType _LIBCPP_NODEBUG = typename _MetaBase<(sizeof...(_Args) >= 2)>::template _SecondImpl<_Args...>;
template <class ...> using __expand_to_true = true_type;
template <class ..._Pred>
-__expand_to_true<_EnableIf<_Pred::value>...> __and_helper(int);
+__expand_to_true<__enable_if_t<_Pred::value>...> __and_helper(int);
template <class ...>
false_type __and_helper(...);
template <class ..._Pred>
-using _And _LIBCPP_NODEBUG_TYPE = decltype(__and_helper<_Pred...>(0));
+using _And _LIBCPP_NODEBUG = decltype(__and_helper<_Pred...>(0));
template <template <class...> class _Func, class ..._Args>
struct _Lazy : _Func<_Args...> {};
@@ -519,7 +524,7 @@ template <template <class...> class, class ...>
false_type __sfinae_test_impl(...);
template <template <class ...> class _Templ, class ..._Args>
-using _IsValidExpansion _LIBCPP_NODEBUG_TYPE = decltype(__sfinae_test_impl<_Templ, _Args...>(0));
+using _IsValidExpansion _LIBCPP_NODEBUG = decltype(__sfinae_test_impl<_Templ, _Args...>(0));
template <class>
struct __void_t { typedef void type; };
@@ -528,7 +533,7 @@ template <class _Tp>
struct __identity { typedef _Tp type; };
template <class _Tp>
-using __identity_t _LIBCPP_NODEBUG_TYPE = typename __identity<_Tp>::type;
+using __identity_t _LIBCPP_NODEBUG = typename __identity<_Tp>::type;
template <class _Tp, bool>
struct _LIBCPP_TEMPLATE_VIS __dependent_type : public _Tp {};
@@ -543,13 +548,6 @@ template <class _If, class _Then>
template <bool _Bp, class _If, class _Then> using conditional_t = typename conditional<_Bp, _If, _Then>::type;
#endif
-template <bool, class _Tp = void> struct _LIBCPP_TEMPLATE_VIS enable_if {};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS enable_if<true, _Tp> {typedef _Tp type;};
-
-#if _LIBCPP_STD_VER > 11
-template <bool _Bp, class _Tp = void> using enable_if_t = typename enable_if<_Bp, _Tp>::type;
-#endif
-
// is_same
#if __has_keyword(__is_same)
@@ -557,9 +555,9 @@ template <bool _Bp, class _Tp = void> using enable_if_t = typename enable_if<_Bp
template <class _Tp, class _Up>
struct _LIBCPP_TEMPLATE_VIS is_same : _BoolConstant<__is_same(_Tp, _Up)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Up>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_same_v = __is_same(_Tp, _Up);
+inline constexpr bool is_same_v = __is_same(_Tp, _Up);
#endif
#else
@@ -567,10 +565,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_same_v = __is_same(_Tp, _Up);
template <class _Tp, class _Up> struct _LIBCPP_TEMPLATE_VIS is_same : public false_type {};
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_same<_Tp, _Tp> : public true_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Up>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_same_v
- = is_same<_Tp, _Up>::value;
+inline constexpr bool is_same_v = is_same<_Tp, _Up>::value;
#endif
#endif // __is_same
@@ -595,7 +592,7 @@ using _IsNotSame = _BoolConstant<
template <class _Tp>
-using __test_for_primary_template = _EnableIf<
+using __test_for_primary_template = __enable_if_t<
_IsSame<_Tp, typename _Tp::__primary_template>::value
>;
template <class _Tp>
@@ -614,9 +611,9 @@ struct __two {char __lx[2];};
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_const : _BoolConstant<__is_const(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_const_v = __is_const(_Tp);
+inline constexpr bool is_const_v = __is_const(_Tp);
#endif
#else
@@ -624,10 +621,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_const_v = __is_const(_Tp);
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_const : public false_type {};
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_const<_Tp const> : public true_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_const_v
- = is_const<_Tp>::value;
+inline constexpr bool is_const_v = is_const<_Tp>::value;
#endif
#endif // __has_keyword(__is_const)
@@ -639,9 +635,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_const_v
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_volatile : _BoolConstant<__is_volatile(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_volatile_v = __is_volatile(_Tp);
+inline constexpr bool is_volatile_v = __is_volatile(_Tp);
#endif
#else
@@ -649,10 +645,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_volatile_v = __is_volatile(_Tp);
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_volatile : public false_type {};
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_volatile<_Tp volatile> : public true_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_volatile_v
- = is_volatile<_Tp>::value;
+inline constexpr bool is_volatile_v = is_volatile<_Tp>::value;
#endif
#endif // __has_keyword(__is_volatile)
@@ -727,9 +722,9 @@ template <class _Tp> using remove_cv_t = typename remove_cv<_Tp>::type;
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_void : _BoolConstant<__is_void(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_void_v = __is_void(_Tp);
+inline constexpr bool is_void_v = __is_void(_Tp);
#endif
#else
@@ -737,10 +732,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_void_v = __is_void(_Tp);
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_void
: public is_same<typename remove_cv<_Tp>::type, void> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_void_v
- = is_void<_Tp>::value;
+inline constexpr bool is_void_v = is_void<_Tp>::value;
#endif
#endif // __has_keyword(__is_void)
@@ -757,10 +751,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS __is_nullptr_t
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_null_pointer
: public __is_nullptr_t_impl<typename remove_cv<_Tp>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_null_pointer_v
- = is_null_pointer<_Tp>::value;
+inline constexpr bool is_null_pointer_v = is_null_pointer<_Tp>::value;
#endif
#endif // _LIBCPP_STD_VER > 11
@@ -773,7 +766,7 @@ struct _LIBCPP_TEMPLATE_VIS is_integral : _BoolConstant<__is_integral(_Tp)> { };
#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v = __is_integral(_Tp);
+inline constexpr bool is_integral_v = __is_integral(_Tp);
#endif
#else
@@ -781,16 +774,13 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v = __is_integral(_Tp);
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_integral
: public _BoolConstant<__libcpp_is_integral<typename remove_cv<_Tp>::type>::value> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v
- = is_integral<_Tp>::value;
+inline constexpr bool is_integral_v = is_integral<_Tp>::value;
#endif
#endif // __has_keyword(__is_integral)
-// __libcpp_is_signed_integer, __libcpp_is_unsigned_integer
-
// [basic.fundamental] defines five standard signed integer types;
// __int128_t is an extended signed integer type.
// The signed and unsigned integer types, plus bool and the
@@ -817,6 +807,7 @@ template <> struct __libcpp_is_unsigned_integer<__uint128_t> : public tru
#endif
// is_floating_point
+// <concepts> implements __libcpp_floating_point
template <class _Tp> struct __libcpp_is_floating_point : public false_type {};
template <> struct __libcpp_is_floating_point<float> : public true_type {};
@@ -826,10 +817,9 @@ template <> struct __libcpp_is_floating_point<long double> : public tru
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_floating_point
: public __libcpp_is_floating_point<typename remove_cv<_Tp>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_floating_point_v
- = is_floating_point<_Tp>::value;
+inline constexpr bool is_floating_point_v = is_floating_point<_Tp>::value;
#endif
// is_array
@@ -839,9 +829,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_floating_point_v
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_array : _BoolConstant<__is_array(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_array_v = __is_array(_Tp);
+inline constexpr bool is_array_v = __is_array(_Tp);
#endif
#else
@@ -853,27 +843,25 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[]>
template <class _Tp, size_t _Np> struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[_Np]>
: public true_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_array_v
- = is_array<_Tp>::value;
+inline constexpr bool is_array_v = is_array<_Tp>::value;
#endif
#endif // __has_keyword(__is_array)
// is_pointer
-// Before Clang 11 / AppleClang 12.0.5, __is_pointer didn't work for Objective-C types.
+// Before AppleClang 12.0.5, __is_pointer didn't work for Objective-C types.
#if __has_keyword(__is_pointer) && \
- !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1100) && \
!(defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1205)
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_pointer : _BoolConstant<__is_pointer(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_pointer_v = __is_pointer(_Tp);
+inline constexpr bool is_pointer_v = __is_pointer(_Tp);
#endif
#else // __has_keyword(__is_pointer)
@@ -892,10 +880,9 @@ template <class _Tp> struct __libcpp_remove_objc_qualifiers<_Tp __unsafe_unretai
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pointer
: public __libcpp_is_pointer<typename __libcpp_remove_objc_qualifiers<typename remove_cv<_Tp>::type>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_pointer_v
- = is_pointer<_Tp>::value;
+inline constexpr bool is_pointer_v = is_pointer<_Tp>::value;
#endif
#endif // __has_keyword(__is_pointer)
@@ -915,15 +902,13 @@ struct _LIBCPP_TEMPLATE_VIS is_rvalue_reference : _BoolConstant<__is_rvalue_refe
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_reference : _BoolConstant<__is_reference(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_reference_v = __is_reference(_Tp);
-
+inline constexpr bool is_reference_v = __is_reference(_Tp);
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_lvalue_reference_v = __is_lvalue_reference(_Tp);
-
+inline constexpr bool is_lvalue_reference_v = __is_lvalue_reference(_Tp);
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_rvalue_reference_v = __is_rvalue_reference(_Tp);
+inline constexpr bool is_rvalue_reference_v = __is_rvalue_reference(_Tp);
#endif
#else // __has_keyword(__is_lvalue_reference) && etc...
@@ -938,18 +923,15 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_reference : public fa
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&> : public true_type {};
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&&> : public true_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_reference_v
- = is_reference<_Tp>::value;
+inline constexpr bool is_reference_v = is_reference<_Tp>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_lvalue_reference_v
- = is_lvalue_reference<_Tp>::value;
+inline constexpr bool is_lvalue_reference_v = is_lvalue_reference<_Tp>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_rvalue_reference_v
- = is_rvalue_reference<_Tp>::value;
+inline constexpr bool is_rvalue_reference_v = is_rvalue_reference<_Tp>::value;
#endif
#endif // __has_keyword(__is_lvalue_reference) && etc...
@@ -969,10 +951,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_union
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_union_v
- = is_union<_Tp>::value;
+inline constexpr bool is_union_v = is_union<_Tp>::value;
#endif
// is_class
@@ -995,10 +976,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_class
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_class_v
- = is_class<_Tp>::value;
+inline constexpr bool is_class_v = is_class<_Tp>::value;
#endif
// is_function
@@ -1013,10 +993,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_function
> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_function_v
- = is_function<_Tp>::value;
+inline constexpr bool is_function_v = is_function<_Tp>::value;
#endif
template <class _Tp> struct __libcpp_is_member_pointer {
@@ -1040,10 +1019,9 @@ template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer
: _BoolConstant<__is_member_function_pointer(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_function_pointer_v
- = __is_member_function_pointer(_Tp);
+inline constexpr bool is_member_function_pointer_v = __is_member_function_pointer(_Tp);
#endif
#else // __has_keyword(__is_member_function_pointer)
@@ -1051,10 +1029,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_function_pointer_v
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer
: public _BoolConstant< __libcpp_is_member_pointer<typename remove_cv<_Tp>::type>::__is_func > {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_function_pointer_v
- = is_member_function_pointer<_Tp>::value;
+inline constexpr bool is_member_function_pointer_v = is_member_function_pointer<_Tp>::value;
#endif
#endif // __has_keyword(__is_member_function_pointer)
@@ -1066,9 +1043,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_function_pointer_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_member_pointer : _BoolConstant<__is_member_pointer(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_pointer_v = __is_member_pointer(_Tp);
+inline constexpr bool is_member_pointer_v = __is_member_pointer(_Tp);
#endif
#else // __has_keyword(__is_member_pointer)
@@ -1076,10 +1053,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_pointer_v = __is_member_poin
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_pointer
: public _BoolConstant< __libcpp_is_member_pointer<typename remove_cv<_Tp>::type>::__is_member > {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_pointer_v
- = is_member_pointer<_Tp>::value;
+inline constexpr bool is_member_pointer_v = is_member_pointer<_Tp>::value;
#endif
#endif // __has_keyword(__is_member_pointer)
@@ -1092,10 +1068,9 @@ template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer
: _BoolConstant<__is_member_object_pointer(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_object_pointer_v
- = __is_member_object_pointer(_Tp);
+inline constexpr bool is_member_object_pointer_v = __is_member_object_pointer(_Tp);
#endif
#else // __has_keyword(__is_member_object_pointer)
@@ -1103,10 +1078,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_object_pointer_v
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer
: public _BoolConstant< __libcpp_is_member_pointer<typename remove_cv<_Tp>::type>::__is_obj > {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_object_pointer_v
- = is_member_object_pointer<_Tp>::value;
+inline constexpr bool is_member_object_pointer_v = is_member_object_pointer<_Tp>::value;
#endif
#endif // __has_keyword(__is_member_object_pointer)
@@ -1118,9 +1092,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_object_pointer_v
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_enum
: public integral_constant<bool, __is_enum(_Tp)> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_enum_v = __is_enum(_Tp);
+inline constexpr bool is_enum_v = __is_enum(_Tp);
#endif
#else
@@ -1137,10 +1111,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_enum
!is_class<_Tp>::value &&
!is_function<_Tp>::value > {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_enum_v
- = is_enum<_Tp>::value;
+inline constexpr bool is_enum_v = is_enum<_Tp>::value;
#endif
#endif // __has_feature(is_enum) || defined(_LIBCPP_COMPILER_GCC)
@@ -1152,10 +1125,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_arithmetic
: public integral_constant<bool, is_integral<_Tp>::value ||
is_floating_point<_Tp>::value> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_arithmetic_v
- = is_arithmetic<_Tp>::value;
+inline constexpr bool is_arithmetic_v = is_arithmetic<_Tp>::value;
#endif
// is_fundamental
@@ -1169,9 +1141,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_arithmetic_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_fundamental : _BoolConstant<__is_fundamental(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_fundamental_v = __is_fundamental(_Tp);
+inline constexpr bool is_fundamental_v = __is_fundamental(_Tp);
#endif
#else // __has_keyword(__is_fundamental)
@@ -1181,10 +1153,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_fundamental
__is_nullptr_t<_Tp>::value ||
is_arithmetic<_Tp>::value> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_fundamental_v
- = is_fundamental<_Tp>::value;
+inline constexpr bool is_fundamental_v = is_fundamental<_Tp>::value;
#endif
#endif // __has_keyword(__is_fundamental)
@@ -1197,9 +1168,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_fundamental_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_scalar : _BoolConstant<__is_scalar(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scalar_v = __is_scalar(_Tp);
+inline constexpr bool is_scalar_v = __is_scalar(_Tp);
#endif
#else // __has_keyword(__is_scalar)
@@ -1219,10 +1190,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_scalar
template <> struct _LIBCPP_TEMPLATE_VIS is_scalar<nullptr_t> : public true_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scalar_v
- = is_scalar<_Tp>::value;
+inline constexpr bool is_scalar_v = is_scalar<_Tp>::value;
#endif
#endif // __has_keyword(__is_scalar)
@@ -1234,9 +1204,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scalar_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_object : _BoolConstant<__is_object(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_object_v = __is_object(_Tp);
+inline constexpr bool is_object_v = __is_object(_Tp);
#endif
#else // __has_keyword(__is_object)
@@ -1247,10 +1217,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_object
is_union<_Tp>::value ||
is_class<_Tp>::value > {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_object_v
- = is_object<_Tp>::value;
+inline constexpr bool is_object_v = is_object<_Tp>::value;
#endif
#endif // __has_keyword(__is_object)
@@ -1263,9 +1232,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_object_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_compound : _BoolConstant<__is_compound(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_compound_v = __is_compound(_Tp);
+inline constexpr bool is_compound_v = __is_compound(_Tp);
#endif
#else // __has_keyword(__is_compound)
@@ -1273,10 +1242,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_compound_v = __is_compound(_Tp);
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_compound
: public integral_constant<bool, !is_fundamental<_Tp>::value> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_compound_v
- = is_compound<_Tp>::value;
+inline constexpr bool is_compound_v = is_compound<_Tp>::value;
#endif
#endif // __has_keyword(__is_compound)
@@ -1296,7 +1264,7 @@ struct __is_referenceable : integral_constant<bool,
// add_const
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS add_const {
- typedef _LIBCPP_NODEBUG_TYPE const _Tp type;
+ typedef _LIBCPP_NODEBUG const _Tp type;
};
#if _LIBCPP_STD_VER > 11
@@ -1306,7 +1274,7 @@ template <class _Tp> using add_const_t = typename add_const<_Tp>::type;
// add_volatile
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS add_volatile {
- typedef _LIBCPP_NODEBUG_TYPE volatile _Tp type;
+ typedef _LIBCPP_NODEBUG volatile _Tp type;
};
#if _LIBCPP_STD_VER > 11
@@ -1315,7 +1283,7 @@ template <class _Tp> using add_volatile_t = typename add_volatile<_Tp>::type;
// add_cv
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS add_cv {
- typedef _LIBCPP_NODEBUG_TYPE const volatile _Tp type;
+ typedef _LIBCPP_NODEBUG const volatile _Tp type;
};
#if _LIBCPP_STD_VER > 11
@@ -1331,9 +1299,9 @@ struct _LIBCPP_TEMPLATE_VIS remove_reference { typedef __remove_reference(_Tp) t
#else // __has_keyword(__remove_reference)
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_reference {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&> {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&&> {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_reference {typedef _LIBCPP_NODEBUG _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&> {typedef _LIBCPP_NODEBUG _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_reference<_Tp&&> {typedef _LIBCPP_NODEBUG _Tp type;};
#if _LIBCPP_STD_VER > 11
template <class _Tp> using remove_reference_t = typename remove_reference<_Tp>::type;
@@ -1343,21 +1311,21 @@ template <class _Tp> using remove_reference_t = typename remove_reference<_Tp>::
// add_lvalue_reference
-template <class _Tp, bool = __is_referenceable<_Tp>::value> struct __add_lvalue_reference_impl { typedef _LIBCPP_NODEBUG_TYPE _Tp type; };
-template <class _Tp > struct __add_lvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG_TYPE _Tp& type; };
+template <class _Tp, bool = __is_referenceable<_Tp>::value> struct __add_lvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; };
+template <class _Tp > struct __add_lvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG _Tp& type; };
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS add_lvalue_reference
-{typedef _LIBCPP_NODEBUG_TYPE typename __add_lvalue_reference_impl<_Tp>::type type;};
+{typedef _LIBCPP_NODEBUG typename __add_lvalue_reference_impl<_Tp>::type type;};
#if _LIBCPP_STD_VER > 11
template <class _Tp> using add_lvalue_reference_t = typename add_lvalue_reference<_Tp>::type;
#endif
-template <class _Tp, bool = __is_referenceable<_Tp>::value> struct __add_rvalue_reference_impl { typedef _LIBCPP_NODEBUG_TYPE _Tp type; };
-template <class _Tp > struct __add_rvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG_TYPE _Tp&& type; };
+template <class _Tp, bool = __is_referenceable<_Tp>::value> struct __add_rvalue_reference_impl { typedef _LIBCPP_NODEBUG _Tp type; };
+template <class _Tp > struct __add_rvalue_reference_impl<_Tp, true> { typedef _LIBCPP_NODEBUG _Tp&& type; };
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS add_rvalue_reference
-{typedef _LIBCPP_NODEBUG_TYPE typename __add_rvalue_reference_impl<_Tp>::type type;};
+{typedef _LIBCPP_NODEBUG typename __add_rvalue_reference_impl<_Tp>::type type;};
#if _LIBCPP_STD_VER > 11
template <class _Tp> using add_rvalue_reference_t = typename add_rvalue_reference<_Tp>::type;
@@ -1378,17 +1346,17 @@ declval() _NOEXCEPT;
template <class _Tp>
struct __uncvref {
- typedef _LIBCPP_NODEBUG_TYPE typename remove_cv<typename remove_reference<_Tp>::type>::type type;
+ typedef _LIBCPP_NODEBUG typename remove_cv<typename remove_reference<_Tp>::type>::type type;
};
template <class _Tp>
struct __unconstref {
- typedef _LIBCPP_NODEBUG_TYPE typename remove_const<typename remove_reference<_Tp>::type>::type type;
+ typedef _LIBCPP_NODEBUG typename remove_const<typename remove_reference<_Tp>::type>::type type;
};
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp>
-using __uncvref_t _LIBCPP_NODEBUG_TYPE = typename __uncvref<_Tp>::type;
+using __uncvref_t _LIBCPP_NODEBUG = typename __uncvref<_Tp>::type;
#endif
// __is_same_uncvref
@@ -1413,11 +1381,11 @@ struct __any
// remove_pointer
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp*> {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const> {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* volatile> {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
-template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const volatile> {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer {typedef _LIBCPP_NODEBUG _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp*> {typedef _LIBCPP_NODEBUG _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const> {typedef _LIBCPP_NODEBUG _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* volatile> {typedef _LIBCPP_NODEBUG _Tp type;};
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS remove_pointer<_Tp* const volatile> {typedef _LIBCPP_NODEBUG _Tp type;};
#if _LIBCPP_STD_VER > 11
template <class _Tp> using remove_pointer_t = typename remove_pointer<_Tp>::type;
@@ -1429,12 +1397,12 @@ template <class _Tp,
bool = __is_referenceable<_Tp>::value ||
_IsSame<typename remove_cv<_Tp>::type, void>::value>
struct __add_pointer_impl
- {typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tp>::type* type;};
+ {typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type* type;};
template <class _Tp> struct __add_pointer_impl<_Tp, false>
- {typedef _LIBCPP_NODEBUG_TYPE _Tp type;};
+ {typedef _LIBCPP_NODEBUG _Tp type;};
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS add_pointer
- {typedef _LIBCPP_NODEBUG_TYPE typename __add_pointer_impl<_Tp>::type type;};
+ {typedef _LIBCPP_NODEBUG typename __add_pointer_impl<_Tp>::type type;};
#if _LIBCPP_STD_VER > 11
template <class _Tp> using add_pointer_t = typename add_pointer<_Tp>::type;
@@ -1455,9 +1423,9 @@ template<class _Tp> using type_identity_t = typename type_identity<_Tp>::type;
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_signed : _BoolConstant<__is_signed(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_signed_v = __is_signed(_Tp);
+inline constexpr bool is_signed_v = __is_signed(_Tp);
#endif
#else // __has_keyword(__is_signed)
@@ -1475,10 +1443,9 @@ template <class _Tp> struct __libcpp_is_signed<_Tp, false> : public false_type {
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_signed : public __libcpp_is_signed<_Tp> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_signed_v
- = is_signed<_Tp>::value;
+inline constexpr bool is_signed_v = is_signed<_Tp>::value;
#endif
#endif // __has_keyword(__is_signed)
@@ -1494,9 +1461,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_signed_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_unsigned : _BoolConstant<__is_unsigned(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_unsigned_v = __is_unsigned(_Tp);
+inline constexpr bool is_unsigned_v = __is_unsigned(_Tp);
#endif
#else // __has_keyword(__is_unsigned)
@@ -1514,10 +1481,9 @@ template <class _Tp> struct __libcpp_is_unsigned<_Tp, false> : public false_type
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_unsigned : public __libcpp_is_unsigned<_Tp> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_unsigned_v
- = is_unsigned<_Tp>::value;
+inline constexpr bool is_unsigned_v = is_unsigned<_Tp>::value;
#endif
#endif // __has_keyword(__is_unsigned)
@@ -1531,10 +1497,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS rank<_Tp[]>
template <class _Tp, size_t _Np> struct _LIBCPP_TEMPLATE_VIS rank<_Tp[_Np]>
: public integral_constant<size_t, rank<_Tp>::value + 1> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t rank_v
- = rank<_Tp>::value;
+inline constexpr size_t rank_v = rank<_Tp>::value;
#endif
// extent
@@ -1545,9 +1510,9 @@ template<class _Tp, size_t _Dim = 0>
struct _LIBCPP_TEMPLATE_VIS extent
: integral_constant<size_t, __array_extent(_Tp, _Dim)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, unsigned _Ip = 0>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t extent_v = __array_extent(_Tp, _Ip);
+inline constexpr size_t extent_v = __array_extent(_Tp, _Ip);
#endif
#else // __has_keyword(__array_extent)
@@ -1563,10 +1528,9 @@ template <class _Tp, size_t _Np> struct _LIBCPP_TEMPLATE_VIS extent<_Tp[_Np], 0>
template <class _Tp, size_t _Np, unsigned _Ip> struct _LIBCPP_TEMPLATE_VIS extent<_Tp[_Np], _Ip>
: public integral_constant<size_t, extent<_Tp, _Ip-1>::value> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, unsigned _Ip = 0>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t extent_v
- = extent<_Tp, _Ip>::value;
+inline constexpr size_t extent_v = extent<_Tp, _Ip>::value;
#endif
#endif // __has_keyword(__array_extent)
@@ -1604,7 +1568,7 @@ template <class> struct _LIBCPP_TEMPLATE_VIS is_bounded_array
template <class _Tp, size_t _Np> struct _LIBCPP_TEMPLATE_VIS is_bounded_array<_Tp[_Np]> : true_type {};
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR
+inline constexpr
bool is_bounded_array_v = is_bounded_array<_Tp>::value;
// is_unbounded_array
@@ -1613,7 +1577,7 @@ template <class> struct _LIBCPP_TEMPLATE_VIS is_unbounded_array : fal
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_unbounded_array<_Tp[]> : true_type {};
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR
+inline constexpr
bool is_unbounded_array_v = is_unbounded_array<_Tp>::value;
#endif
@@ -1621,13 +1585,13 @@ bool is_unbounded_array_v = is_unbounded_array<_Tp>::value;
template <class _Up, bool>
struct __decay {
- typedef _LIBCPP_NODEBUG_TYPE typename remove_cv<_Up>::type type;
+ typedef _LIBCPP_NODEBUG typename remove_cv<_Up>::type type;
};
template <class _Up>
struct __decay<_Up, true> {
public:
- typedef _LIBCPP_NODEBUG_TYPE typename conditional
+ typedef _LIBCPP_NODEBUG typename conditional
<
is_array<_Up>::value,
typename remove_extent<_Up>::type*,
@@ -1644,9 +1608,9 @@ template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS decay
{
private:
- typedef _LIBCPP_NODEBUG_TYPE typename remove_reference<_Tp>::type _Up;
+ typedef _LIBCPP_NODEBUG typename remove_reference<_Tp>::type _Up;
public:
- typedef _LIBCPP_NODEBUG_TYPE typename __decay<_Up, __is_referenceable<_Up>::value>::type type;
+ typedef _LIBCPP_NODEBUG typename __decay<_Up, __is_referenceable<_Up>::value>::type type;
};
#if _LIBCPP_STD_VER > 11
@@ -1658,10 +1622,9 @@ template <class _Tp> using decay_t = typename decay<_Tp>::type;
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_abstract
: public integral_constant<bool, __is_abstract(_Tp)> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_abstract_v
- = is_abstract<_Tp>::value;
+inline constexpr bool is_abstract_v = is_abstract<_Tp>::value;
#endif
// is_final
@@ -1674,25 +1637,21 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS
is_final : public integral_constant<bool, __is_final(_Tp)> {};
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_final_v
- = is_final<_Tp>::value;
+inline constexpr bool is_final_v = is_final<_Tp>::value;
#endif
// is_aggregate
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_IS_AGGREGATE)
+#if _LIBCPP_STD_VER > 14
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS
is_aggregate : public integral_constant<bool, __is_aggregate(_Tp)> {};
-#if !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_aggregate_v
- = is_aggregate<_Tp>::value;
-#endif
+inline constexpr bool is_aggregate_v = is_aggregate<_Tp>::value;
-#endif // _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_IS_AGGREGATE)
+#endif // _LIBCPP_STD_VER > 14
// is_base_of
@@ -1700,10 +1659,9 @@ template <class _Bp, class _Dp>
struct _LIBCPP_TEMPLATE_VIS is_base_of
: public integral_constant<bool, __is_base_of(_Bp, _Dp)> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Bp, class _Dp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_base_of_v
- = is_base_of<_Bp, _Dp>::value;
+inline constexpr bool is_base_of_v = is_base_of<_Bp, _Dp>::value;
#endif
// __is_core_convertible
@@ -1797,10 +1755,9 @@ template <class _T1, class _T2> struct _LIBCPP_TEMPLATE_VIS is_convertible
#endif // __has_feature(is_convertible_to)
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _From, class _To>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_convertible_v
- = is_convertible<_From, _To>::value;
+inline constexpr bool is_convertible_v = is_convertible<_From, _To>::value;
#endif
// is_nothrow_convertible
@@ -1860,10 +1817,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_empty : public __libcpp_empt
#endif // __has_feature(is_empty)
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_empty_v
- = is_empty<_Tp>::value;
+inline constexpr bool is_empty_v = is_empty<_Tp>::value;
#endif
// is_polymorphic
@@ -1886,10 +1842,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_polymorphic
#endif // __has_feature(is_polymorphic)
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_polymorphic_v
- = is_polymorphic<_Tp>::value;
+inline constexpr bool is_polymorphic_v = is_polymorphic<_Tp>::value;
#endif
// has_virtual_destructor
@@ -1906,25 +1861,21 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS has_virtual_destructor
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool has_virtual_destructor_v
- = has_virtual_destructor<_Tp>::value;
+inline constexpr bool has_virtual_destructor_v = has_virtual_destructor<_Tp>::value;
#endif
// has_unique_object_representations
-#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS)
+#if _LIBCPP_STD_VER > 14
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS has_unique_object_representations
: public integral_constant<bool,
__has_unique_object_representations(remove_cv_t<remove_all_extents_t<_Tp>>)> {};
-#if !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool has_unique_object_representations_v
- = has_unique_object_representations<_Tp>::value;
-#endif
+inline constexpr bool has_unique_object_representations_v = has_unique_object_representations<_Tp>::value;
#endif
@@ -1933,10 +1884,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool has_unique_object_representations_v
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS alignment_of
: public integral_constant<size_t, _LIBCPP_ALIGNOF(_Tp)> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t alignment_of_v
- = alignment_of<_Tp>::value;
+inline constexpr size_t alignment_of_v = alignment_of<_Tp>::value;
#endif
// aligned_storage
@@ -2214,26 +2164,26 @@ template <class _TypeList, size_t _Size, bool = _Size <= sizeof(typename _TypeLi
template <class _Hp, class _Tp, size_t _Size>
struct __find_first<__type_list<_Hp, _Tp>, _Size, true>
{
- typedef _LIBCPP_NODEBUG_TYPE _Hp type;
+ typedef _LIBCPP_NODEBUG _Hp type;
};
template <class _Hp, class _Tp, size_t _Size>
struct __find_first<__type_list<_Hp, _Tp>, _Size, false>
{
- typedef _LIBCPP_NODEBUG_TYPE typename __find_first<_Tp, _Size>::type type;
+ typedef _LIBCPP_NODEBUG typename __find_first<_Tp, _Size>::type type;
};
template <class _Tp, class _Up, bool = is_const<typename remove_reference<_Tp>::type>::value,
bool = is_volatile<typename remove_reference<_Tp>::type>::value>
struct __apply_cv
{
- typedef _LIBCPP_NODEBUG_TYPE _Up type;
+ typedef _LIBCPP_NODEBUG _Up type;
};
template <class _Tp, class _Up>
struct __apply_cv<_Tp, _Up, true, false>
{
- typedef _LIBCPP_NODEBUG_TYPE const _Up type;
+ typedef _LIBCPP_NODEBUG const _Up type;
};
template <class _Tp, class _Up>
@@ -2340,7 +2290,7 @@ template <class _Tp> using make_unsigned_t = typename make_unsigned<_Tp>::type;
#ifndef _LIBCPP_CXX03_LANG
template <class _Tp>
-_LIBCPP_NODISCARD_ATTRIBUTE _LIBCPP_INLINE_VISIBILITY constexpr
+_LIBCPP_HIDE_FROM_ABI constexpr
typename make_unsigned<_Tp>::type __to_unsigned_like(_Tp __x) noexcept {
return static_cast<typename make_unsigned<_Tp>::type>(__x);
}
@@ -2379,7 +2329,7 @@ struct __common_type2_imp<_Tp, _Up,
true ? declval<_Tp>() : declval<_Up>()
)>::type>
{
- typedef _LIBCPP_NODEBUG_TYPE typename decay<decltype(
+ typedef _LIBCPP_NODEBUG typename decay<decltype(
true ? declval<_Tp>() : declval<_Up>()
)>::type type;
};
@@ -2672,16 +2622,16 @@ template <class...> struct common_reference {};
// is_assignable
-template<typename, typename _Tp> struct __select_2nd { typedef _LIBCPP_NODEBUG_TYPE _Tp type; };
+template<typename, typename _Tp> struct __select_2nd { typedef _LIBCPP_NODEBUG _Tp type; };
#if __has_keyword(__is_assignable)
template<class _Tp, class _Up>
struct _LIBCPP_TEMPLATE_VIS is_assignable : _BoolConstant<__is_assignable(_Tp, _Up)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Arg>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_assignable_v = __is_assignable(_Tp, _Arg);
+inline constexpr bool is_assignable_v = __is_assignable(_Tp, _Arg);
#endif
#else // __has_keyword(__is_assignable)
@@ -2708,10 +2658,9 @@ template <class _Tp, class _Arg>
struct is_assignable
: public __is_assignable_imp<_Tp, _Arg> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Arg>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_assignable_v
- = is_assignable<_Tp, _Arg>::value;
+inline constexpr bool is_assignable_v = is_assignable<_Tp, _Arg>::value;
#endif
#endif // __has_keyword(__is_assignable)
@@ -2722,10 +2671,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_copy_assignable
: public is_assignable<typename add_lvalue_reference<_Tp>::type,
typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_copy_assignable_v
- = is_copy_assignable<_Tp>::value;
+inline constexpr bool is_copy_assignable_v = is_copy_assignable<_Tp>::value;
#endif
// is_move_assignable
@@ -2734,10 +2682,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_move_assignable
: public is_assignable<typename add_lvalue_reference<_Tp>::type,
typename add_rvalue_reference<_Tp>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_move_assignable_v
- = is_move_assignable<_Tp>::value;
+inline constexpr bool is_move_assignable_v = is_move_assignable<_Tp>::value;
#endif
// is_destructible
@@ -2747,9 +2694,9 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_move_assignable_v
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_destructible : _BoolConstant<__is_destructible(_Tp)> { };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_destructible_v = __is_destructible(_Tp);
+inline constexpr bool is_destructible_v = __is_destructible(_Tp);
#endif
#else // __has_keyword(__is_destructible)
@@ -2810,10 +2757,9 @@ template <>
struct is_destructible<void>
: public false_type {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_destructible_v
- = is_destructible<_Tp>::value;
+inline constexpr bool is_destructible_v = is_destructible<_Tp>::value;
#endif
#endif // __has_keyword(__is_destructible)
@@ -3049,140 +2995,14 @@ struct __member_pointer_class_type<_Ret _ClassType::*> {
// template <class T, class... Args> struct is_constructible;
-#if defined(_LIBCPP_COMPILER_GCC) && _GNUC_VER_NEW >= 10000
-# define _LIBCPP_GCC_SUPPORTS_IS_CONSTRUCTIBLE
-#endif
-
-#if !defined(_LIBCPP_CXX03_LANG) && !__has_feature(is_constructible) && !defined(_LIBCPP_GCC_SUPPORTS_IS_CONSTRUCTIBLE)
-
-template <class _Tp, class... _Args>
-struct __libcpp_is_constructible;
-
-template <class _To, class _From>
-struct __is_invalid_base_to_derived_cast {
- static_assert(is_reference<_To>::value, "Wrong specialization");
- using _RawFrom = __uncvref_t<_From>;
- using _RawTo = __uncvref_t<_To>;
- static const bool value = _And<
- _IsNotSame<_RawFrom, _RawTo>,
- is_base_of<_RawFrom, _RawTo>,
- _Not<__libcpp_is_constructible<_RawTo, _From>>
- >::value;
-};
-
-template <class _To, class _From>
-struct __is_invalid_lvalue_to_rvalue_cast : false_type {
- static_assert(is_reference<_To>::value, "Wrong specialization");
-};
-
-template <class _ToRef, class _FromRef>
-struct __is_invalid_lvalue_to_rvalue_cast<_ToRef&&, _FromRef&> {
- using _RawFrom = __uncvref_t<_FromRef>;
- using _RawTo = __uncvref_t<_ToRef>;
- static const bool value = _And<
- _Not<is_function<_RawTo>>,
- _Or<
- _IsSame<_RawFrom, _RawTo>,
- is_base_of<_RawTo, _RawFrom>>
- >::value;
-};
-
-struct __is_constructible_helper
-{
- template <class _To>
- static void __eat(_To);
-
- // This overload is needed to work around a Clang bug that disallows
- // static_cast<T&&>(e) for non-reference-compatible types.
- // Example: static_cast<int&&>(declval<double>());
- // NOTE: The static_cast implementation below is required to support
- // classes with explicit conversion operators.
- template <class _To, class _From,
- class = decltype(__eat<_To>(declval<_From>()))>
- static true_type __test_cast(int);
-
- template <class _To, class _From,
- class = decltype(static_cast<_To>(declval<_From>()))>
- static integral_constant<bool,
- !__is_invalid_base_to_derived_cast<_To, _From>::value &&
- !__is_invalid_lvalue_to_rvalue_cast<_To, _From>::value
- > __test_cast(long);
-
- template <class, class>
- static false_type __test_cast(...);
-
- template <class _Tp, class ..._Args,
- class = decltype(_Tp(declval<_Args>()...))>
- static true_type __test_nary(int);
- template <class _Tp, class...>
- static false_type __test_nary(...);
-
- template <class _Tp, class _A0, class = decltype(::new _Tp(declval<_A0>()))>
- static is_destructible<_Tp> __test_unary(int);
- template <class, class>
- static false_type __test_unary(...);
-};
-
-template <class _Tp, bool = is_void<_Tp>::value>
-struct __is_default_constructible
- : decltype(__is_constructible_helper::__test_nary<_Tp>(0))
-{};
-
-template <class _Tp>
-struct __is_default_constructible<_Tp, true> : false_type {};
-
-template <class _Tp>
-struct __is_default_constructible<_Tp[], false> : false_type {};
-
-template <class _Tp, size_t _Nx>
-struct __is_default_constructible<_Tp[_Nx], false>
- : __is_default_constructible<typename remove_all_extents<_Tp>::type> {};
-
-template <class _Tp, class... _Args>
-struct __libcpp_is_constructible
-{
- static_assert(sizeof...(_Args) > 1, "Wrong specialization");
- typedef decltype(__is_constructible_helper::__test_nary<_Tp, _Args...>(0))
- type;
-};
-
-template <class _Tp>
-struct __libcpp_is_constructible<_Tp> : __is_default_constructible<_Tp> {};
-
-template <class _Tp, class _A0>
-struct __libcpp_is_constructible<_Tp, _A0>
- : public decltype(__is_constructible_helper::__test_unary<_Tp, _A0>(0))
-{};
-
-template <class _Tp, class _A0>
-struct __libcpp_is_constructible<_Tp&, _A0>
- : public decltype(__is_constructible_helper::
- __test_cast<_Tp&, _A0>(0))
-{};
-
-template <class _Tp, class _A0>
-struct __libcpp_is_constructible<_Tp&&, _A0>
- : public decltype(__is_constructible_helper::
- __test_cast<_Tp&&, _A0>(0))
-{};
-
-#endif
-
-#if __has_feature(is_constructible) || defined(_LIBCPP_GCC_SUPPORTS_IS_CONSTRUCTIBLE)
template <class _Tp, class ..._Args>
struct _LIBCPP_TEMPLATE_VIS is_constructible
: public integral_constant<bool, __is_constructible(_Tp, _Args...)>
- {};
-#else
-template <class _Tp, class... _Args>
-struct _LIBCPP_TEMPLATE_VIS is_constructible
- : public __libcpp_is_constructible<_Tp, _Args...>::type {};
-#endif
+{ };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class ..._Args>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_constructible_v
- = is_constructible<_Tp, _Args...>::value;
+inline constexpr bool is_constructible_v = is_constructible<_Tp, _Args...>::value;
#endif
// is_default_constructible
@@ -3192,10 +3012,9 @@ struct _LIBCPP_TEMPLATE_VIS is_default_constructible
: public is_constructible<_Tp>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_default_constructible_v
- = is_default_constructible<_Tp>::value;
+inline constexpr bool is_default_constructible_v = is_default_constructible<_Tp>::value;
#endif
#ifndef _LIBCPP_CXX03_LANG
@@ -3229,10 +3048,9 @@ struct _LIBCPP_TEMPLATE_VIS is_copy_constructible
: public is_constructible<_Tp,
typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_copy_constructible_v
- = is_copy_constructible<_Tp>::value;
+inline constexpr bool is_copy_constructible_v = is_copy_constructible<_Tp>::value;
#endif
// is_move_constructible
@@ -3242,65 +3060,22 @@ struct _LIBCPP_TEMPLATE_VIS is_move_constructible
: public is_constructible<_Tp, typename add_rvalue_reference<_Tp>::type>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_move_constructible_v
- = is_move_constructible<_Tp>::value;
+inline constexpr bool is_move_constructible_v = is_move_constructible<_Tp>::value;
#endif
// is_trivially_constructible
-#if __has_feature(is_trivially_constructible) || _GNUC_VER >= 501
-
template <class _Tp, class... _Args>
struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible
: integral_constant<bool, __is_trivially_constructible(_Tp, _Args...)>
{
};
-#else // !__has_feature(is_trivially_constructible)
-
-template <class _Tp, class... _Args>
-struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible
- : false_type
-{
-};
-
-template <class _Tp>
-struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible<_Tp>
-#if __has_feature(has_trivial_constructor) || defined(_LIBCPP_COMPILER_GCC)
- : integral_constant<bool, __has_trivial_constructor(_Tp)>
-#else
- : integral_constant<bool, is_scalar<_Tp>::value>
-#endif
-{
-};
-
-template <class _Tp>
-struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible<_Tp, _Tp&&>
- : integral_constant<bool, is_scalar<_Tp>::value>
-{
-};
-
-template <class _Tp>
-struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible<_Tp, const _Tp&>
- : integral_constant<bool, is_scalar<_Tp>::value>
-{
-};
-
-template <class _Tp>
-struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible<_Tp, _Tp&>
- : integral_constant<bool, is_scalar<_Tp>::value>
-{
-};
-
-#endif // !__has_feature(is_trivially_constructible)
-
-
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class... _Args>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_constructible_v
- = is_trivially_constructible<_Tp, _Args...>::value;
+inline constexpr bool is_trivially_constructible_v = is_trivially_constructible<_Tp, _Args...>::value;
#endif
// is_trivially_default_constructible
@@ -3309,10 +3084,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_default_constructi
: public is_trivially_constructible<_Tp>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_default_constructible_v
- = is_trivially_default_constructible<_Tp>::value;
+inline constexpr bool is_trivially_default_constructible_v = is_trivially_default_constructible<_Tp>::value;
#endif
// is_trivially_copy_constructible
@@ -3321,10 +3095,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_copy_constructible
: public is_trivially_constructible<_Tp, typename add_lvalue_reference<const _Tp>::type>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copy_constructible_v
- = is_trivially_copy_constructible<_Tp>::value;
+inline constexpr bool is_trivially_copy_constructible_v = is_trivially_copy_constructible<_Tp>::value;
#endif
// is_trivially_move_constructible
@@ -3333,50 +3106,21 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_move_constructible
: public is_trivially_constructible<_Tp, typename add_rvalue_reference<_Tp>::type>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_move_constructible_v
- = is_trivially_move_constructible<_Tp>::value;
+inline constexpr bool is_trivially_move_constructible_v = is_trivially_move_constructible<_Tp>::value;
#endif
// is_trivially_assignable
-#if __has_feature(is_trivially_assignable) || _GNUC_VER >= 501
-
template <class _Tp, class _Arg>
struct is_trivially_assignable
: integral_constant<bool, __is_trivially_assignable(_Tp, _Arg)>
-{
-};
-
-#else // !__has_feature(is_trivially_assignable)
-
-template <class _Tp, class _Arg>
-struct is_trivially_assignable
- : public false_type {};
-
-template <class _Tp>
-struct is_trivially_assignable<_Tp&, _Tp>
- : integral_constant<bool, is_scalar<_Tp>::value> {};
-
-template <class _Tp>
-struct is_trivially_assignable<_Tp&, _Tp&>
- : integral_constant<bool, is_scalar<_Tp>::value> {};
-
-template <class _Tp>
-struct is_trivially_assignable<_Tp&, const _Tp&>
- : integral_constant<bool, is_scalar<_Tp>::value> {};
-
-template <class _Tp>
-struct is_trivially_assignable<_Tp&, _Tp&&>
- : integral_constant<bool, is_scalar<_Tp>::value> {};
-
-#endif // !__has_feature(is_trivially_assignable)
+{ };
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Arg>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_assignable_v
- = is_trivially_assignable<_Tp, _Arg>::value;
+inline constexpr bool is_trivially_assignable_v = is_trivially_assignable<_Tp, _Arg>::value;
#endif
// is_trivially_copy_assignable
@@ -3385,10 +3129,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_copy_assignable
: public is_trivially_assignable<typename add_lvalue_reference<_Tp>::type,
typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copy_assignable_v
- = is_trivially_copy_assignable<_Tp>::value;
+inline constexpr bool is_trivially_copy_assignable_v = is_trivially_copy_assignable<_Tp>::value;
#endif
// is_trivially_move_assignable
@@ -3398,10 +3141,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_move_assignable
typename add_rvalue_reference<_Tp>::type>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_move_assignable_v
- = is_trivially_move_assignable<_Tp>::value;
+inline constexpr bool is_trivially_move_assignable_v = is_trivially_move_assignable<_Tp>::value;
#endif
// is_trivially_destructible
@@ -3430,10 +3172,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_destructible<_Tp[]
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_destructible_v
- = is_trivially_destructible<_Tp>::value;
+inline constexpr bool is_trivially_destructible_v = is_trivially_destructible<_Tp>::value;
#endif
// is_nothrow_constructible
@@ -3484,10 +3225,9 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_constructible<_Tp[_Ns]>
#endif // _LIBCPP_HAS_NO_NOEXCEPT
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class ..._Args>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_constructible_v
- = is_nothrow_constructible<_Tp, _Args...>::value;
+inline constexpr bool is_nothrow_constructible_v = is_nothrow_constructible<_Tp, _Args...>::value;
#endif
// is_nothrow_default_constructible
@@ -3496,10 +3236,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_default_constructibl
: public is_nothrow_constructible<_Tp>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_default_constructible_v
- = is_nothrow_default_constructible<_Tp>::value;
+inline constexpr bool is_nothrow_default_constructible_v = is_nothrow_default_constructible<_Tp>::value;
#endif
// is_nothrow_copy_constructible
@@ -3508,10 +3247,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_copy_constructible
: public is_nothrow_constructible<_Tp,
typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_copy_constructible_v
- = is_nothrow_copy_constructible<_Tp>::value;
+inline constexpr bool is_nothrow_copy_constructible_v = is_nothrow_copy_constructible<_Tp>::value;
#endif
// is_nothrow_move_constructible
@@ -3520,10 +3258,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_move_constructible
: public is_nothrow_constructible<_Tp, typename add_rvalue_reference<_Tp>::type>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_move_constructible_v
- = is_nothrow_move_constructible<_Tp>::value;
+inline constexpr bool is_nothrow_move_constructible_v = is_nothrow_move_constructible<_Tp>::value;
#endif
// is_nothrow_assignable
@@ -3558,10 +3295,9 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_assignable
#endif // _LIBCPP_HAS_NO_NOEXCEPT
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp, class _Arg>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_assignable_v
- = is_nothrow_assignable<_Tp, _Arg>::value;
+inline constexpr bool is_nothrow_assignable_v = is_nothrow_assignable<_Tp, _Arg>::value;
#endif
// is_nothrow_copy_assignable
@@ -3570,10 +3306,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_copy_assignable
: public is_nothrow_assignable<typename add_lvalue_reference<_Tp>::type,
typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_copy_assignable_v
- = is_nothrow_copy_assignable<_Tp>::value;
+inline constexpr bool is_nothrow_copy_assignable_v = is_nothrow_copy_assignable<_Tp>::value;
#endif
// is_nothrow_move_assignable
@@ -3583,10 +3318,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_move_assignable
typename add_rvalue_reference<_Tp>::type>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_move_assignable_v
- = is_nothrow_move_assignable<_Tp>::value;
+inline constexpr bool is_nothrow_move_assignable_v = is_nothrow_move_assignable<_Tp>::value;
#endif
// is_nothrow_destructible
@@ -3646,10 +3380,9 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_destructible<_Tp[]>
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_destructible_v
- = is_nothrow_destructible<_Tp>::value;
+inline constexpr bool is_nothrow_destructible_v = is_nothrow_destructible<_Tp>::value;
#endif
// is_pod
@@ -3669,10 +3402,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pod
#endif
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_pod_v
- = is_pod<_Tp>::value;
+inline constexpr bool is_pod_v = is_pod<_Tp>::value;
#endif
// is_literal_type;
@@ -3682,11 +3414,10 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 is_
: public integral_constant<bool, __is_literal_type(_Tp)>
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_literal_type_v
- = is_literal_type<_Tp>::value;
-#endif // _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+_LIBCPP_DEPRECATED_IN_CXX17 inline constexpr bool is_literal_type_v = is_literal_type<_Tp>::value;
+#endif // _LIBCPP_STD_VER > 14
#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS)
// is_standard_layout;
@@ -3699,28 +3430,20 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_standard_layout
#endif
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_standard_layout_v
- = is_standard_layout<_Tp>::value;
+inline constexpr bool is_standard_layout_v = is_standard_layout<_Tp>::value;
#endif
// is_trivially_copyable;
template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_copyable
-#if __has_feature(is_trivially_copyable)
: public integral_constant<bool, __is_trivially_copyable(_Tp)>
-#elif _GNUC_VER >= 501
- : public integral_constant<bool, !is_volatile<_Tp>::value && __is_trivially_copyable(_Tp)>
-#else
- : integral_constant<bool, is_scalar<typename remove_all_extents<_Tp>::type>::value>
-#endif
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copyable_v
- = is_trivially_copyable<_Tp>::value;
+inline constexpr bool is_trivially_copyable_v = is_trivially_copyable<_Tp>::value;
#endif
// is_trivial;
@@ -3734,10 +3457,9 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivial
#endif
{};
-#if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+#if _LIBCPP_STD_VER > 14
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivial_v
- = is_trivial<_Tp>::value;
+inline constexpr bool is_trivial_v = is_trivial<_Tp>::value;
#endif
template <class _Tp> struct __is_reference_wrapper_impl : public false_type {};
@@ -3811,10 +3533,6 @@ using __enable_if_bullet6 = typename enable_if
// fall back - none of the bullets
-#define _LIBCPP_INVOKE_RETURN(...) \
- noexcept(noexcept(__VA_ARGS__)) -> decltype(__VA_ARGS__) \
- { return __VA_ARGS__; }
-
template <class ..._Args>
auto __invoke(__any, _Args&& ...__args) -> __nat;
@@ -3828,42 +3546,54 @@ template <class _Fp, class _A0, class ..._Args,
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _A0&& __a0, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN((static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept((static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( (static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...))
+ { return (static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...); }
template <class _Fp, class _A0, class ..._Args,
class = __enable_if_bullet1<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _A0&& __a0, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN((static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept((static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( (static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...))
+ { return (static_cast<_A0&&>(__a0).*__f)(static_cast<_Args&&>(__args)...); }
template <class _Fp, class _A0, class ..._Args,
class = __enable_if_bullet2<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _A0&& __a0, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN((__a0.get().*__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept((__a0.get().*__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( (__a0.get().*__f)(static_cast<_Args&&>(__args)...))
+ { return (__a0.get().*__f)(static_cast<_Args&&>(__args)...); }
template <class _Fp, class _A0, class ..._Args,
class = __enable_if_bullet2<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _A0&& __a0, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN((__a0.get().*__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept((__a0.get().*__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( (__a0.get().*__f)(static_cast<_Args&&>(__args)...))
+ { return (__a0.get().*__f)(static_cast<_Args&&>(__args)...); }
template <class _Fp, class _A0, class ..._Args,
class = __enable_if_bullet3<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _A0&& __a0, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN(((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept(((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( ((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...))
+ { return ((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...); }
template <class _Fp, class _A0, class ..._Args,
class = __enable_if_bullet3<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _A0&& __a0, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN(((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept(((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( ((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...))
+ { return ((*static_cast<_A0&&>(__a0)).*__f)(static_cast<_Args&&>(__args)...); }
// bullets 4, 5 and 6
@@ -3872,42 +3602,54 @@ template <class _Fp, class _A0,
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _A0&& __a0)
-_LIBCPP_INVOKE_RETURN(static_cast<_A0&&>(__a0).*__f)
+ noexcept(noexcept(static_cast<_A0&&>(__a0).*__f))
+ -> decltype( static_cast<_A0&&>(__a0).*__f)
+ { return static_cast<_A0&&>(__a0).*__f; }
template <class _Fp, class _A0,
class = __enable_if_bullet4<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _A0&& __a0)
-_LIBCPP_INVOKE_RETURN(static_cast<_A0&&>(__a0).*__f)
+ noexcept(noexcept(static_cast<_A0&&>(__a0).*__f))
+ -> decltype( static_cast<_A0&&>(__a0).*__f)
+ { return static_cast<_A0&&>(__a0).*__f; }
template <class _Fp, class _A0,
class = __enable_if_bullet5<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _A0&& __a0)
-_LIBCPP_INVOKE_RETURN(__a0.get().*__f)
+ noexcept(noexcept(__a0.get().*__f))
+ -> decltype( __a0.get().*__f)
+ { return __a0.get().*__f; }
template <class _Fp, class _A0,
class = __enable_if_bullet5<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _A0&& __a0)
-_LIBCPP_INVOKE_RETURN(__a0.get().*__f)
+ noexcept(noexcept(__a0.get().*__f))
+ -> decltype( __a0.get().*__f)
+ { return __a0.get().*__f; }
template <class _Fp, class _A0,
class = __enable_if_bullet6<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _A0&& __a0)
-_LIBCPP_INVOKE_RETURN((*static_cast<_A0&&>(__a0)).*__f)
+ noexcept(noexcept((*static_cast<_A0&&>(__a0)).*__f))
+ -> decltype( (*static_cast<_A0&&>(__a0)).*__f)
+ { return (*static_cast<_A0&&>(__a0)).*__f; }
template <class _Fp, class _A0,
class = __enable_if_bullet6<_Fp, _A0>>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _A0&& __a0)
-_LIBCPP_INVOKE_RETURN((*static_cast<_A0&&>(__a0)).*__f)
+ noexcept(noexcept((*static_cast<_A0&&>(__a0)).*__f))
+ -> decltype( (*static_cast<_A0&&>(__a0)).*__f)
+ { return (*static_cast<_A0&&>(__a0)).*__f; }
// bullet 7
@@ -3915,15 +3657,17 @@ template <class _Fp, class ..._Args>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR_AFTER_CXX17 auto
__invoke(_Fp&& __f, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN(static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...))
+ noexcept(noexcept(static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...))
+ { return static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...); }
template <class _Fp, class ..._Args>
inline _LIBCPP_INLINE_VISIBILITY
_LIBCPP_CONSTEXPR auto
__invoke_constexpr(_Fp&& __f, _Args&& ...__args)
-_LIBCPP_INVOKE_RETURN(static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...))
-
-#undef _LIBCPP_INVOKE_RETURN
+ noexcept(noexcept(static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...)))
+ -> decltype( static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...))
+ { return static_cast<_Fp&&>(__f)(static_cast<_Args&&>(__args)...); }
// __invokable
template <class _Ret, class _Fp, class ..._Args>
@@ -4122,12 +3866,10 @@ struct _LIBCPP_TEMPLATE_VIS is_invocable_r
: integral_constant<bool, __invokable_r<_Ret, _Fn, _Args...>::value> {};
template <class _Fn, class ..._Args>
-_LIBCPP_INLINE_VAR constexpr bool is_invocable_v
- = is_invocable<_Fn, _Args...>::value;
+inline constexpr bool is_invocable_v = is_invocable<_Fn, _Args...>::value;
template <class _Ret, class _Fn, class ..._Args>
-_LIBCPP_INLINE_VAR constexpr bool is_invocable_r_v
- = is_invocable_r<_Ret, _Fn, _Args...>::value;
+inline constexpr bool is_invocable_r_v = is_invocable_r<_Ret, _Fn, _Args...>::value;
// is_nothrow_invocable
@@ -4140,12 +3882,10 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_invocable_r
: integral_constant<bool, __nothrow_invokable_r<_Ret, _Fn, _Args...>::value> {};
template <class _Fn, class ..._Args>
-_LIBCPP_INLINE_VAR constexpr bool is_nothrow_invocable_v
- = is_nothrow_invocable<_Fn, _Args...>::value;
+inline constexpr bool is_nothrow_invocable_v = is_nothrow_invocable<_Fn, _Args...>::value;
template <class _Ret, class _Fn, class ..._Args>
-_LIBCPP_INLINE_VAR constexpr bool is_nothrow_invocable_r_v
- = is_nothrow_invocable_r<_Ret, _Fn, _Args...>::value;
+inline constexpr bool is_nothrow_invocable_r_v = is_nothrow_invocable_r<_Ret, _Fn, _Args...>::value;
#endif // _LIBCPP_STD_VER > 14
@@ -4268,20 +4008,16 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_swappable
};
template <class _Tp, class _Up>
-_LIBCPP_INLINE_VAR constexpr bool is_swappable_with_v
- = is_swappable_with<_Tp, _Up>::value;
+inline constexpr bool is_swappable_with_v = is_swappable_with<_Tp, _Up>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_swappable_v
- = is_swappable<_Tp>::value;
+inline constexpr bool is_swappable_v = is_swappable<_Tp>::value;
template <class _Tp, class _Up>
-_LIBCPP_INLINE_VAR constexpr bool is_nothrow_swappable_with_v
- = is_nothrow_swappable_with<_Tp, _Up>::value;
+inline constexpr bool is_nothrow_swappable_with_v = is_nothrow_swappable_with<_Tp, _Up>::value;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool is_nothrow_swappable_v
- = is_nothrow_swappable<_Tp>::value;
+inline constexpr bool is_nothrow_swappable_v = is_nothrow_swappable<_Tp>::value;
#endif // _LIBCPP_STD_VER > 14
@@ -4349,40 +4085,6 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR
typename __sfinae_underlying_type<_Tp>::__promoted_type
__convert_to_integral(_Tp __val) { return __val; }
-#ifndef _LIBCPP_CXX03_LANG
-
-template <class _Tp>
-struct __has_operator_addressof_member_imp
-{
- template <class _Up>
- static auto __test(int)
- -> typename __select_2nd<decltype(declval<_Up>().operator&()), true_type>::type;
- template <class>
- static auto __test(long) -> false_type;
-
- static const bool value = decltype(__test<_Tp>(0))::value;
-};
-
-template <class _Tp>
-struct __has_operator_addressof_free_imp
-{
- template <class _Up>
- static auto __test(int)
- -> typename __select_2nd<decltype(operator&(declval<_Up>())), true_type>::type;
- template <class>
- static auto __test(long) -> false_type;
-
- static const bool value = decltype(__test<_Tp>(0))::value;
-};
-
-template <class _Tp>
-struct __has_operator_addressof
- : public integral_constant<bool, __has_operator_addressof_member_imp<_Tp>::value
- || __has_operator_addressof_free_imp<_Tp>::value>
-{};
-
-#endif // _LIBCPP_CXX03_LANG
-
// is_scoped_enum [meta.unary.prop]
#if _LIBCPP_STD_VER > 20
@@ -4398,8 +4100,7 @@ struct _LIBCPP_TEMPLATE_VIS is_scoped_enum
: public __is_scoped_enum_helper<_Tp> {};
template <class _Tp>
-_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scoped_enum_v =
- is_scoped_enum<_Tp>::value;
+inline constexpr bool is_scoped_enum_v = is_scoped_enum<_Tp>::value;
#endif
#if _LIBCPP_STD_VER > 14
@@ -4407,20 +4108,17 @@ _LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scoped_enum_v =
template <class... _Args>
struct conjunction : _And<_Args...> {};
template<class... _Args>
-_LIBCPP_INLINE_VAR constexpr bool conjunction_v
- = conjunction<_Args...>::value;
+inline constexpr bool conjunction_v = conjunction<_Args...>::value;
template <class... _Args>
struct disjunction : _Or<_Args...> {};
template<class... _Args>
-_LIBCPP_INLINE_VAR constexpr bool disjunction_v
- = disjunction<_Args...>::value;
+inline constexpr bool disjunction_v = disjunction<_Args...>::value;
template <class _Tp>
struct negation : _Not<_Tp> {};
template<class _Tp>
-_LIBCPP_INLINE_VAR constexpr bool negation_v
- = negation<_Tp>::value;
+inline constexpr bool negation_v = negation<_Tp>::value;
#endif // _LIBCPP_STD_VER > 14
// These traits are used in __tree and __hash_table
@@ -4453,7 +4151,6 @@ template <class _ValTy, class _Key, class _RawValTy>
struct __can_extract_map_key<_ValTy, _Key, _Key, _RawValTy>
: false_type {};
-#ifndef _LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED
#if _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
inline constexpr bool is_constant_evaluated() noexcept {
@@ -4463,10 +4160,6 @@ inline constexpr bool is_constant_evaluated() noexcept {
inline _LIBCPP_CONSTEXPR
bool __libcpp_is_constant_evaluated() _NOEXCEPT { return __builtin_is_constant_evaluated(); }
-#else
-inline _LIBCPP_CONSTEXPR
-bool __libcpp_is_constant_evaluated() _NOEXCEPT { return false; }
-#endif
template <class _CharT>
using _IsCharLikeType = _And<is_standard_layout<_CharT>, is_trivial<_CharT> >;
@@ -4481,13 +4174,4 @@ using __maybe_const = conditional_t<_Const, const _Tp, _Tp>;
_LIBCPP_END_NAMESPACE_STD
-#if _LIBCPP_STD_VER > 14
-// std::byte
-namespace std // purposefully not versioned
-{
-
-
-}
-#endif
-
#endif // _LIBCPP_TYPE_TRAITS
diff --git a/libcxx/include/typeindex b/libcxx/include/typeindex
index 88bb9ef03d61..790aea4d4763 100644
--- a/libcxx/include/typeindex
+++ b/libcxx/include/typeindex
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- typeindex ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo
index 6026038ba5f9..d0f9db36a627 100644
--- a/libcxx/include/typeinfo
+++ b/libcxx/include/typeinfo
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- typeinfo ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index ea0382de7d40..53ddb95663d1 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- unordered_map -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -216,6 +216,47 @@ public:
void reserve(size_type n);
};
+template<class InputIterator,
+ class Hash = hash<iter_key_t<InputIterator>>, class Pred = equal_to<iter_key_t<InputIterator>>,
+ class Allocator = allocator<iter_to_alloc_t<InputIterator>>>
+unordered_map(InputIterator, InputIterator, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_map<iter_key_t<InputIterator>, iter_value_t<InputIterator>, Hash, Pred,
+ Allocator>; // C++17
+
+template<class Key, class T, class Hash = hash<Key>,
+ class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+unordered_map(initializer_list<pair<const Key, T>>, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_map<Key, T, Hash, Pred, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+unordered_map(InputIterator, InputIterator, typename see below::size_type, Allocator)
+ -> unordered_map<iter_key_t<InputIterator>, iter_val_t<InputIterator>,
+ hash<iter_key_t<InputIterator>>, equal_to<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+unordered_map(InputIterator, InputIterator, Allocator)
+ -> unordered_map<iter_key_t<InputIterator>, iter_val_t<InputIterator>,
+ hash<iter_key_t<InputIterator>>, equal_to<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class InputIterator, class Hash, class Allocator>
+unordered_map(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+ -> unordered_map<iter_key_t<InputIterator>, iter_val_t<InputIterator>, Hash,
+ equal_to<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class Key, class T, typename Allocator>
+unordered_map(initializer_list<pair<const Key, T>>, typename see below::size_type, Allocator)
+ -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>; // C++17
+
+template<class Key, class T, typename Allocator>
+unordered_map(initializer_list<pair<const Key, T>>, Allocator)
+ -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>; // C++17
+
+template<class Key, class T, class Hash, class Allocator>
+unordered_map(initializer_list<pair<const Key, T>>, typename see below::size_type, Hash, Allocator)
+ -> unordered_map<Key, T, Hash, equal_to<Key>, Allocator>; // C++17
+
template <class Key, class T, class Hash, class Pred, class Alloc>
void swap(unordered_map<Key, T, Hash, Pred, Alloc>& x,
unordered_map<Key, T, Hash, Pred, Alloc>& y)
@@ -404,6 +445,48 @@ public:
void reserve(size_type n);
};
+template<class InputIterator,
+ class Hash = hash<iter_key_t<InputIterator>>, class Pred = equal_to<iter_key_t<InputIterator>>,
+ class Allocator = allocator<iter_to_alloc_t<InputIterator>>>
+unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_multimap<iter_key_t<InputIterator>, iter_value_t<InputIterator>, Hash, Pred,
+ Allocator>; // C++17
+
+template<class Key, class T, class Hash = hash<Key>,
+ class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+unordered_multimap(initializer_list<pair<const Key, T>>, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_multimap<Key, T, Hash, Pred, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator)
+ -> unordered_multimap<iter_key_t<InputIterator>, iter_val_t<InputIterator>,
+ hash<iter_key_t<InputIterator>>, equal_to<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+unordered_multimap(InputIterator, InputIterator, Allocator)
+ -> unordered_multimap<iter_key_t<InputIterator>, iter_val_t<InputIterator>,
+ hash<iter_key_t<InputIterator>>, equal_to<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class InputIterator, class Hash, class Allocator>
+unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+ -> unordered_multimap<iter_key_t<InputIterator>, iter_val_t<InputIterator>, Hash,
+ equal_to<iter_key_t<InputIterator>>, Allocator>; // C++17
+
+template<class Key, class T, typename Allocator>
+unordered_multimap(initializer_list<pair<const Key, T>>, typename see below::size_type, Allocator)
+ -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>; // C++17
+
+template<class Key, class T, typename Allocator>
+unordered_multimap(initializer_list<pair<const Key, T>>, Allocator)
+ -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>; // C++17
+
+template<class Key, class T, class Hash, class Allocator>
+unordered_multimap(initializer_list<pair<const Key, T>>, typename see below::size_type, Hash,
+ Allocator)
+ -> unordered_multimap<Key, T, Hash, equal_to<Key>, Allocator>; // C++17
+
template <class Key, class T, class Hash, class Pred, class Alloc>
void swap(unordered_multimap<Key, T, Hash, Pred, Alloc>& x,
unordered_multimap<Key, T, Hash, Pred, Alloc>& y)
@@ -434,6 +517,7 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
#include <__config>
#include <__debug>
#include <__functional/is_transparent.h>
+#include <__iterator/iterator_traits.h>
#include <__hash_table>
#include <__node_handle>
#include <__utility/forward.h>
@@ -473,12 +557,13 @@ public:
size_t operator()(const _Key& __x) const
{return static_cast<const _Hash&>(*this)(__x);}
#if _LIBCPP_STD_VER > 17
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
size_t operator()(const _K2& __x) const
{return static_cast<const _Hash&>(*this)(__x);}
#endif
- void swap(__unordered_map_hasher&__y)
+ _LIBCPP_INLINE_VISIBILITY
+ void swap(__unordered_map_hasher& __y)
_NOEXCEPT_(__is_nothrow_swappable<_Hash>::value)
{
using _VSTD::swap;
@@ -508,12 +593,13 @@ public:
size_t operator()(const _Key& __x) const
{return __hash_(__x);}
#if _LIBCPP_STD_VER > 17
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
size_t operator()(const _K2& __x) const
{return __hash_(__x);}
#endif
- void swap(__unordered_map_hasher&__y)
+ _LIBCPP_INLINE_VISIBILITY
+ void swap(__unordered_map_hasher& __y)
_NOEXCEPT_(__is_nothrow_swappable<_Hash>::value)
{
using _VSTD::swap;
@@ -557,24 +643,25 @@ public:
bool operator()(const _Key& __x, const _Cp& __y) const
{return static_cast<const _Pred&>(*this)(__x, __y.__get_value().first);}
#if _LIBCPP_STD_VER > 17
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _Cp& __x, const _K2& __y) const
{return static_cast<const _Pred&>(*this)(__x.__get_value().first, __y);}
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _K2& __x, const _Cp& __y) const
{return static_cast<const _Pred&>(*this)(__x, __y.__get_value().first);}
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _Key& __x, const _K2& __y) const
{return static_cast<const _Pred&>(*this)(__x, __y);}
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _K2& __x, const _Key& __y) const
{return static_cast<const _Pred&>(*this)(__x, __y);}
#endif
- void swap(__unordered_map_equal&__y)
+ _LIBCPP_INLINE_VISIBILITY
+ void swap(__unordered_map_equal& __y)
_NOEXCEPT_(__is_nothrow_swappable<_Pred>::value)
{
using _VSTD::swap;
@@ -607,24 +694,25 @@ public:
bool operator()(const _Key& __x, const _Cp& __y) const
{return __pred_(__x, __y.__get_value().first);}
#if _LIBCPP_STD_VER > 17
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _Cp& __x, const _K2& __y) const
{return __pred_(__x.__get_value().first, __y);}
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _K2& __x, const _Cp& __y) const
{return __pred_(__x, __y.__get_value().first);}
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _Key& __x, const _K2& __y) const
{return __pred_(__x, __y);}
- template <typename _K2, typename = _EnableIf<__is_transparent<_Hash, _K2>::value && __is_transparent<_Pred, _K2>::value>>
+ template <typename _K2>
_LIBCPP_INLINE_VISIBILITY
bool operator()(const _K2& __x, const _Key& __y) const
{return __pred_(__x, __y);}
#endif
- void swap(__unordered_map_equal&__y)
+ _LIBCPP_INLINE_VISIBILITY
+ void swap(__unordered_map_equal& __y)
_NOEXCEPT_(__is_nothrow_swappable<_Pred>::value)
{
using _VSTD::swap;
@@ -1052,7 +1140,7 @@ public:
#ifndef _LIBCPP_CXX03_LANG
__table_ = __u.__table_;
#else
- if (this != &__u) {
+ if (this != _VSTD::addressof(__u)) {
__table_.clear();
__table_.hash_function() = __u.__table_.hash_function();
__table_.key_eq() = __u.__table_.key_eq();
@@ -1355,51 +1443,48 @@ public:
iterator find(const key_type& __k) {return __table_.find(__k);}
_LIBCPP_INLINE_VISIBILITY
const_iterator find(const key_type& __k) const {return __table_.find(__k);}
-
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, iterator>
- find(const _K2& __k) {return __table_.find(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, const_iterator>
- find(const _K2& __k) const {return __table_.find(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ iterator find(const _K2& __k) {return __table_.find(__k);}
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ const_iterator find(const _K2& __k) const {return __table_.find(__k);}
+#endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
size_type count(const key_type& __k) const {return __table_.__count_unique(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, size_type>
- count(const _K2& __k) const {return __table_.__count_unique(__k);}
- #endif // _LIBCPP_STD_VER > 17
- #if _LIBCPP_STD_VER > 17
- _LIBCPP_INLINE_VISIBILITY
- bool contains(const key_type& __k) const {return find(__k) != end();}
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ size_type count(const _K2& __k) const {return __table_.__count_unique(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
+#if _LIBCPP_STD_VER > 17
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const key_type& __k) const {return find(__k) != end();}
+
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const _K2& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, bool>
- contains(const _K2& __k) const {return find(__k) != end();}
- #endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
pair<iterator, iterator> equal_range(const key_type& __k)
{return __table_.__equal_range_unique(__k);}
_LIBCPP_INLINE_VISIBILITY
pair<const_iterator, const_iterator> equal_range(const key_type& __k) const
{return __table_.__equal_range_unique(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<iterator, iterator>>
- equal_range(const _K2& __k) {return __table_.__equal_range_unique(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<const_iterator, const_iterator>>
- equal_range(const _K2& __k) const {return __table_.__equal_range_unique(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<iterator, iterator> equal_range(const _K2& __k)
+ {return __table_.__equal_range_unique(__k);}
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<const_iterator, const_iterator> equal_range(const _K2& __k) const
+ {return __table_.__equal_range_unique(__k);}
+#endif // _LIBCPP_STD_VER > 17
mapped_type& operator[](const key_type& __k);
#ifndef _LIBCPP_CXX03_LANG
@@ -1447,13 +1532,13 @@ public:
#if _LIBCPP_DEBUG_LEVEL == 2
bool __dereferenceable(const const_iterator* __i) const
- {return __table_.__dereferenceable(&__i->__i_);}
+ {return __table_.__dereferenceable(_VSTD::addressof(__i->__i_));}
bool __decrementable(const const_iterator* __i) const
- {return __table_.__decrementable(&__i->__i_);}
+ {return __table_.__decrementable(_VSTD::addressof(__i->__i_));}
bool __addable(const const_iterator* __i, ptrdiff_t __n) const
- {return __table_.__addable(&__i->__i_, __n);}
+ {return __table_.__addable(_VSTD::addressof(__i->__i_), __n);}
bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const
- {return __table_.__addable(&__i->__i_, __n);}
+ {return __table_.__addable(_VSTD::addressof(__i->__i_), __n);}
#endif // _LIBCPP_DEBUG_LEVEL == 2
@@ -1464,15 +1549,16 @@ private:
#endif
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Hash = hash<__iter_key_type<_InputIterator>>,
class _Pred = equal_to<__iter_key_type<_InputIterator>>,
class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Hash, _Pred, _Allocator>;
@@ -1480,52 +1566,55 @@ unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocat
template<class _Key, class _Tp, class _Hash = hash<remove_const_t<_Key>>,
class _Pred = equal_to<remove_const_t<_Key>>,
class _Allocator = allocator<pair<const _Key, _Tp>>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_map<remove_const_t<_Key>, _Tp, _Hash, _Pred, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(_InputIterator, _InputIterator, _Allocator)
-> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _InputIterator, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
_Hash, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _Key, class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_map<remove_const_t<_Key>, _Tp,
hash<remove_const_t<_Key>>,
equal_to<remove_const_t<_Key>>, _Allocator>;
template<class _Key, class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(initializer_list<pair<_Key, _Tp>>, _Allocator)
-> unordered_map<remove_const_t<_Key>, _Tp,
hash<remove_const_t<_Key>>,
equal_to<remove_const_t<_Key>>, _Allocator>;
template<class _Key, class _Tp, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_map<remove_const_t<_Key>, _Tp, _Hash,
equal_to<remove_const_t<_Key>>, _Allocator>;
@@ -1987,7 +2076,7 @@ public:
#ifndef _LIBCPP_CXX03_LANG
__table_ = __u.__table_;
#else
- if (this != &__u) {
+ if (this != _VSTD::addressof(__u)) {
__table_.clear();
__table_.hash_function() = __u.__table_.hash_function();
__table_.key_eq() = __u.__table_.key_eq();
@@ -2167,49 +2256,48 @@ public:
iterator find(const key_type& __k) {return __table_.find(__k);}
_LIBCPP_INLINE_VISIBILITY
const_iterator find(const key_type& __k) const {return __table_.find(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, iterator>
- find(const _K2& __k) {return __table_.find(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, const_iterator>
- find(const _K2& __k) const {return __table_.find(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ iterator find(const _K2& __k) {return __table_.find(__k);}
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ const_iterator find(const _K2& __k) const {return __table_.find(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
_LIBCPP_INLINE_VISIBILITY
size_type count(const key_type& __k) const {return __table_.__count_multi(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, size_type>
- count(const _K2& __k) const {return __table_.__count_multi(__k);}
- #endif // _LIBCPP_STD_VER > 17
- #if _LIBCPP_STD_VER > 17
- _LIBCPP_INLINE_VISIBILITY
- bool contains(const key_type& __k) const {return find(__k) != end();}
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ size_type count(const _K2& __k) const {return __table_.__count_multi(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
+#if _LIBCPP_STD_VER > 17
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const key_type& __k) const {return find(__k) != end();}
+
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const _K2& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, bool>
- contains(const _K2& __k) const {return find(__k) != end();}
- #endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
pair<iterator, iterator> equal_range(const key_type& __k)
{return __table_.__equal_range_multi(__k);}
_LIBCPP_INLINE_VISIBILITY
pair<const_iterator, const_iterator> equal_range(const key_type& __k) const
{return __table_.__equal_range_multi(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<iterator, iterator>>
- equal_range(const _K2& __k) {return __table_.__equal_range_multi(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<const_iterator, const_iterator>>
- equal_range(const _K2& __k) const {return __table_.__equal_range_multi(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<iterator, iterator> equal_range(const _K2& __k)
+ {return __table_.__equal_range_multi(__k);}
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<const_iterator, const_iterator> equal_range(const _K2& __k) const
+ {return __table_.__equal_range_multi(__k);}
+#endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
size_type bucket_count() const _NOEXCEPT {return __table_.bucket_count();}
@@ -2250,28 +2338,29 @@ public:
#if _LIBCPP_DEBUG_LEVEL == 2
bool __dereferenceable(const const_iterator* __i) const
- {return __table_.__dereferenceable(&__i->__i_);}
+ {return __table_.__dereferenceable(_VSTD::addressof(__i->__i_));}
bool __decrementable(const const_iterator* __i) const
- {return __table_.__decrementable(&__i->__i_);}
+ {return __table_.__decrementable(_VSTD::addressof(__i->__i_));}
bool __addable(const const_iterator* __i, ptrdiff_t __n) const
- {return __table_.__addable(&__i->__i_, __n);}
+ {return __table_.__addable(_VSTD::addressof(__i->__i_), __n);}
bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const
- {return __table_.__addable(&__i->__i_, __n);}
+ {return __table_.__addable(_VSTD::addressof(__i->__i_), __n);}
#endif // _LIBCPP_DEBUG_LEVEL == 2
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Hash = hash<__iter_key_type<_InputIterator>>,
class _Pred = equal_to<__iter_key_type<_InputIterator>>,
class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Hash, _Pred, _Allocator>;
@@ -2279,52 +2368,55 @@ unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Al
template<class _Key, class _Tp, class _Hash = hash<remove_const_t<_Key>>,
class _Pred = equal_to<remove_const_t<_Key>>,
class _Allocator = allocator<pair<const _Key, _Tp>>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash, _Pred, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(_InputIterator, _InputIterator, _Allocator)
-> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _InputIterator, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
_Hash, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
template<class _Key, class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_multimap<remove_const_t<_Key>, _Tp,
hash<remove_const_t<_Key>>,
equal_to<remove_const_t<_Key>>, _Allocator>;
template<class _Key, class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
-> unordered_multimap<remove_const_t<_Key>, _Tp,
hash<remove_const_t<_Key>>,
equal_to<remove_const_t<_Key>>, _Allocator>;
template<class _Key, class _Tp, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash,
equal_to<remove_const_t<_Key>>, _Allocator>;
diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set
index a775a9250268..1b62e31bb918 100644
--- a/libcxx/include/unordered_set
+++ b/libcxx/include/unordered_set
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- unordered_set -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -182,6 +182,43 @@ public:
void reserve(size_type n);
};
+template<class InputIterator,
+ class Hash = hash<typename iterator_traits<InputIterator>::value_type>,
+ class Pred = equal_to<typename iterator_traits<InputIterator>::value_type>,
+ class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
+unordered_set(InputIterator, InputIterator, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_set<typename iterator_traits<InputIterator>::value_type,
+ Hash, Pred, Allocator>; // C++17
+
+template<class T, class Hash = hash<T>,
+ class Pred = equal_to<T>, class Allocator = allocator<T>>
+unordered_set(initializer_list<T>, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_set<T, Hash, Pred, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+unordered_set(InputIterator, InputIterator, typename see below::size_type, Allocator)
+ -> unordered_set<typename iterator_traits<InputIterator>::value_type,
+ hash<typename iterator_traits<InputIterator>::value_type>,
+ equal_to<typename iterator_traits<InputIterator>::value_type>,
+ Allocator>; // C++17
+
+template<class InputIterator, class Hash, class Allocator>
+unordered_set(InputIterator, InputIterator, typename see below::size_type,
+ Hash, Allocator)
+ -> unordered_set<typename iterator_traits<InputIterator>::value_type, Hash,
+ equal_to<typename iterator_traits<InputIterator>::value_type>,
+ Allocator>; // C++17
+
+template<class T, class Allocator>
+unordered_set(initializer_list<T>, typename see below::size_type, Allocator)
+ -> unordered_set<T, hash<T>, equal_to<T>, Allocator>; // C++17
+
+template<class T, class Hash, class Allocator>
+unordered_set(initializer_list<T>, typename see below::size_type, Hash, Allocator)
+ -> unordered_set<T, Hash, equal_to<T>, Allocator>; // C++17
+
template <class Value, class Hash, class Pred, class Alloc>
void swap(unordered_set<Value, Hash, Pred, Alloc>& x,
unordered_set<Value, Hash, Pred, Alloc>& y)
@@ -359,6 +396,42 @@ public:
void reserve(size_type n);
};
+template<class InputIterator,
+ class Hash = hash<typename iterator_traits<InputIterator>::value_type>,
+ class Pred = equal_to<typename iterator_traits<InputIterator>::value_type>,
+ class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
+unordered_multiset(InputIterator, InputIterator, see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_multiset<typename iterator_traits<InputIterator>::value_type,
+ Hash, Pred, Allocator>; // C++17
+
+template<class T, class Hash = hash<T>,
+ class Pred = equal_to<T>, class Allocator = allocator<T>>
+unordered_multiset(initializer_list<T>, typename see below::size_type = see below,
+ Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+ -> unordered_multiset<T, Hash, Pred, Allocator>; // C++17
+
+template<class InputIterator, class Allocator>
+unordered_multiset(InputIterator, InputIterator, typename see below::size_type, Allocator)
+ -> unordered_multiset<typename iterator_traits<InputIterator>::value_type,
+ hash<typename iterator_traits<InputIterator>::value_type>,
+ equal_to<typename iterator_traits<InputIterator>::value_type>,
+ Allocator>; // C++17
+
+template<class InputIterator, class Hash, class Allocator>
+unordered_multiset(InputIterator, InputIterator, typename see below::size_type,
+ Hash, Allocator)
+ -> unordered_multiset<typename iterator_traits<InputIterator>::value_type, Hash,
+ equal_to<typename iterator_traits<InputIterator>::value_type>, Allocator>; // C++17
+
+template<class T, class Allocator>
+unordered_multiset(initializer_list<T>, typename see below::size_type, Allocator)
+ -> unordered_multiset<T, hash<T>, equal_to<T>, Allocator>; // C++17
+
+template<class T, class Hash, class Allocator>
+unordered_multiset(initializer_list<T>, typename see below::size_type, Hash, Allocator)
+ -> unordered_multiset<T, Hash, equal_to<T>, Allocator>; // C++17
+
template <class Value, class Hash, class Pred, class Alloc>
void swap(unordered_multiset<Value, Hash, Pred, Alloc>& x,
unordered_multiset<Value, Hash, Pred, Alloc>& y)
@@ -706,49 +779,48 @@ public:
iterator find(const key_type& __k) {return __table_.find(__k);}
_LIBCPP_INLINE_VISIBILITY
const_iterator find(const key_type& __k) const {return __table_.find(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, iterator>
- find(const _K2& __k) {return __table_.find(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, const_iterator>
- find(const _K2& __k) const {return __table_.find(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ iterator find(const _K2& __k) {return __table_.find(__k);}
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ const_iterator find(const _K2& __k) const {return __table_.find(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
_LIBCPP_INLINE_VISIBILITY
size_type count(const key_type& __k) const {return __table_.__count_unique(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, size_type>
- count(const _K2& __k) const {return __table_.__count_unique(__k);}
- #endif // _LIBCPP_STD_VER > 17
- #if _LIBCPP_STD_VER > 17
- _LIBCPP_INLINE_VISIBILITY
- bool contains(const key_type& __k) const {return find(__k) != end();}
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ size_type count(const _K2& __k) const {return __table_.__count_unique(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
+#if _LIBCPP_STD_VER > 17
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const key_type& __k) const {return find(__k) != end();}
+
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const _K2& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, bool>
- contains(const _K2& __k) const {return find(__k) != end();}
- #endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
pair<iterator, iterator> equal_range(const key_type& __k)
{return __table_.__equal_range_unique(__k);}
_LIBCPP_INLINE_VISIBILITY
pair<const_iterator, const_iterator> equal_range(const key_type& __k) const
{return __table_.__equal_range_unique(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<iterator, iterator>>
- equal_range(const _K2& __k) {return __table_.__equal_range_unique(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<const_iterator, const_iterator>>
- equal_range(const _K2& __k) const {return __table_.__equal_range_unique(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<iterator, iterator> equal_range(const _K2& __k)
+ {return __table_.__equal_range_unique(__k);}
+ template <class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<const_iterator, const_iterator> equal_range(const _K2& __k) const
+ {return __table_.__equal_range_unique(__k);}
+#endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
size_type bucket_count() const _NOEXCEPT {return __table_.bucket_count();}
@@ -799,15 +871,16 @@ public:
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Hash = hash<__iter_value_type<_InputIterator>>,
class _Pred = equal_to<__iter_value_type<_InputIterator>>,
class _Allocator = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_set(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_set<__iter_value_type<_InputIterator>, _Hash, _Pred, _Allocator>;
@@ -815,16 +888,17 @@ unordered_set(_InputIterator, _InputIterator, typename allocator_traits<_Allocat
template<class _Tp, class _Hash = hash<_Tp>,
class _Pred = equal_to<_Tp>,
class _Allocator = allocator<_Tp>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_set(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_set<_Tp, _Hash, _Pred, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_set(_InputIterator, _InputIterator,
typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_set<__iter_value_type<_InputIterator>,
@@ -833,9 +907,10 @@ unordered_set(_InputIterator, _InputIterator,
_Allocator>;
template<class _InputIterator, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_set(_InputIterator, _InputIterator,
typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_set<__iter_value_type<_InputIterator>, _Hash,
@@ -843,14 +918,14 @@ unordered_set(_InputIterator, _InputIterator,
_Allocator>;
template<class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_set(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_set<_Tp, hash<_Tp>, equal_to<_Tp>, _Allocator>;
template<class _Tp, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_set(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_set<_Tp, _Hash, equal_to<_Tp>, _Allocator>;
#endif
@@ -1372,49 +1447,48 @@ public:
iterator find(const key_type& __k) {return __table_.find(__k);}
_LIBCPP_INLINE_VISIBILITY
const_iterator find(const key_type& __k) const {return __table_.find(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, iterator>
- find(const _K2& __k) {return __table_.find(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, const_iterator>
- find(const _K2& __k) const {return __table_.find(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template<class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ iterator find(const _K2& __k) {return __table_.find(__k);}
+ template<class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ const_iterator find(const _K2& __k) const {return __table_.find(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
_LIBCPP_INLINE_VISIBILITY
size_type count(const key_type& __k) const {return __table_.__count_multi(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, size_type>
- count(const _K2& __k) const {return __table_.__count_multi(__k);}
- #endif // _LIBCPP_STD_VER > 17
- #if _LIBCPP_STD_VER > 17
- _LIBCPP_INLINE_VISIBILITY
- bool contains(const key_type& __k) const {return find(__k) != end();}
+#if _LIBCPP_STD_VER > 17
+ template<class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ size_type count(const _K2& __k) const {return __table_.__count_multi(__k);}
+#endif // _LIBCPP_STD_VER > 17
+
+#if _LIBCPP_STD_VER > 17
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const key_type& __k) const {return find(__k) != end();}
+
+ template<class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ bool contains(const _K2& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, bool>
- contains(const _K2& __k) const {return find(__k) != end();}
- #endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
pair<iterator, iterator> equal_range(const key_type& __k)
{return __table_.__equal_range_multi(__k);}
_LIBCPP_INLINE_VISIBILITY
pair<const_iterator, const_iterator> equal_range(const key_type& __k) const
{return __table_.__equal_range_multi(__k);}
- #if _LIBCPP_STD_VER > 17
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<iterator, iterator>>
- equal_range(const _K2& __k) {return __table_.__equal_range_multi(__k);}
- template <typename _K2>
- _LIBCPP_INLINE_VISIBILITY
- _EnableIf<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value, pair<const_iterator, const_iterator>>
- equal_range(const _K2& __k) const {return __table_.__equal_range_multi(__k);}
- #endif // _LIBCPP_STD_VER > 17
+#if _LIBCPP_STD_VER > 17
+ template<class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<iterator, iterator> equal_range(const _K2& __k)
+ {return __table_.__equal_range_multi(__k);}
+ template<class _K2, enable_if_t<__is_transparent<hasher, _K2>::value && __is_transparent<key_equal, _K2>::value>* = nullptr>
+ _LIBCPP_INLINE_VISIBILITY
+ pair<const_iterator, const_iterator> equal_range(const _K2& __k) const
+ {return __table_.__equal_range_multi(__k);}
+#endif // _LIBCPP_STD_VER > 17
_LIBCPP_INLINE_VISIBILITY
size_type bucket_count() const _NOEXCEPT {return __table_.bucket_count();}
@@ -1465,31 +1539,33 @@ public:
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Hash = hash<__iter_value_type<_InputIterator>>,
class _Pred = equal_to<__iter_value_type<_InputIterator>>,
class _Allocator = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_multiset<__iter_value_type<_InputIterator>, _Hash, _Pred, _Allocator>;
template<class _Tp, class _Hash = hash<_Tp>,
class _Pred = equal_to<_Tp>, class _Allocator = allocator<_Tp>,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<!__is_allocator<_Pred>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<!__is_allocator<_Pred>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multiset(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type = 0,
_Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
-> unordered_multiset<_Tp, _Hash, _Pred, _Allocator>;
template<class _InputIterator, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_multiset<__iter_value_type<_InputIterator>,
hash<__iter_value_type<_InputIterator>>,
@@ -1497,9 +1573,10 @@ unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Al
_Allocator>;
template<class _InputIterator, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type,
_Hash, _Allocator)
-> unordered_multiset<__iter_value_type<_InputIterator>, _Hash,
@@ -1507,14 +1584,14 @@ unordered_multiset(_InputIterator, _InputIterator, typename allocator_traits<_Al
_Allocator>;
template<class _Tp, class _Allocator,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multiset(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Allocator)
-> unordered_multiset<_Tp, hash<_Tp>, equal_to<_Tp>, _Allocator>;
template<class _Tp, class _Hash, class _Allocator,
- class = _EnableIf<!__is_allocator<_Hash>::value>,
- class = _EnableIf<!is_integral<_Hash>::value>,
- class = _EnableIf<__is_allocator<_Allocator>::value>>
+ class = enable_if_t<!__is_allocator<_Hash>::value>,
+ class = enable_if_t<!is_integral<_Hash>::value>,
+ class = enable_if_t<__is_allocator<_Allocator>::value>>
unordered_multiset(initializer_list<_Tp>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
-> unordered_multiset<_Tp, _Hash, equal_to<_Tp>, _Allocator>;
#endif
diff --git a/libcxx/include/utility b/libcxx/include/utility
index 83ad035c9f90..2b3c4dfa3f0e 100644
--- a/libcxx/include/utility
+++ b/libcxx/include/utility
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- utility -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -79,7 +79,7 @@ struct pair
pair(pair&&) = default;
explicit(see-below) constexpr pair();
explicit(see-below) pair(const T1& x, const T2& y); // constexpr in C++14
- template <class U, class V> explicit(see-below) pair(U&& x, V&& y); // constexpr in C++14
+ template <class U = T1, class V = T2> explicit(see-below) pair(U&&, V&&); // constexpr in C++14
template <class U, class V> explicit(see-below) pair(const pair<U, V>& p); // constexpr in C++14
template <class U, class V> explicit(see-below) pair(pair<U, V>&& p); // constexpr in C++14
template <class... Args1, class... Args2>
@@ -95,12 +95,18 @@ struct pair
is_nothrow_swappable_v<T2>); // constexpr in C++20
};
+template<class T1, class T2> pair(T1, T2) -> pair<T1, T2>;
+
template <class T1, class T2> bool operator==(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14
-template <class T1, class T2> bool operator!=(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14
-template <class T1, class T2> bool operator< (const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14
-template <class T1, class T2> bool operator> (const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14
-template <class T1, class T2> bool operator>=(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14
-template <class T1, class T2> bool operator<=(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14
+template <class T1, class T2> bool operator!=(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14, removed in C++20
+template <class T1, class T2> bool operator< (const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14, removed in C++20
+template <class T1, class T2> bool operator> (const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14, removed in C++20
+template <class T1, class T2> bool operator>=(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14, removed in C++20
+template <class T1, class T2> bool operator<=(const pair<T1,T2>&, const pair<T1,T2>&); // constexpr in C++14, removed in C++20
+template <class T1, class T2>
+ constexpr common_comparison_type_t<synth-three-way-result<T1>,
+ synth-three-way-result<T2>>
+ operator<=>(const pair<T1,T2>&, const pair<T1,T2>&); // C++20
template <class T1, class T2> pair<V1, V2> make_pair(T1&&, T2&&); // constexpr in C++14
template <class T1, class T2>
@@ -179,7 +185,8 @@ template<class... T>
using index_sequence_for = make_index_sequence<sizeof...(T)>;
template<class T, class U=T>
- T exchange(T& obj, U&& new_value);
+ constexpr T exchange(T& obj, U&& new_value)
+ noexcept(is_nothrow_move_constructible<T>::value && is_nothrow_assignable<T&, U>::value); // constexpr in C++17, noexcept in C++23
// 20.2.7, in-place construction // C++17
struct in_place_t {
diff --git a/libcxx/include/valarray b/libcxx/include/valarray
index 6e25514a6a3a..909e0422c476 100644
--- a/libcxx/include/valarray
+++ b/libcxx/include/valarray
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===-------------------------- valarray ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -105,6 +105,8 @@ public:
void resize(size_t n, value_type x = value_type());
};
+template<class T, size_t cnt> valarray(const T(&)[cnt], size_t) -> valarray<T>;
+
class slice
{
public:
@@ -1081,6 +1083,11 @@ private:
valarray& __assign_range(const value_type* __f, const value_type* __l);
};
+#if _LIBCPP_STD_VER > 14
+template<class _Tp, size_t _Size>
+valarray(const _Tp(&)[_Size], size_t) -> valarray<_Tp>;
+#endif
+
_LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS void valarray<size_t>::resize(size_t, size_t))
template <class _Op, class _Tp>
@@ -3048,7 +3055,7 @@ template <class _Tp>
valarray<_Tp>&
valarray<_Tp>::operator=(const valarray& __v)
{
- if (this != &__v)
+ if (this != _VSTD::addressof(__v))
return __assign_range(__v.__begin_, __v.__end_);
return *this;
}
diff --git a/libcxx/include/variant b/libcxx/include/variant
index 700e6f3f1151..51bcd6ef4e98 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------ variant -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -233,10 +233,7 @@ public:
_LIBCPP_BEGIN_NAMESPACE_STD
-// TODO: GCC 5 lies about its support for C++17 (it says it supports it but it
-// really doesn't). That breaks variant, which uses some C++17 features.
-// Remove this once we drop support for GCC 5.
-#if _LIBCPP_STD_VER > 14 && !(defined(_LIBCPP_COMPILER_GCC) && _GNUC_VER_NEW < 6000)
+#if _LIBCPP_STD_VER > 14
// Light N-dimensional array of function pointers. Used in place of std::array to avoid
// adding a dependency.
@@ -269,7 +266,7 @@ template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS variant_size;
template <class _Tp>
-_LIBCPP_INLINE_VAR constexpr size_t variant_size_v = variant_size<_Tp>::value;
+inline constexpr size_t variant_size_v = variant_size<_Tp>::value;
template <class _Tp>
struct _LIBCPP_TEMPLATE_VIS variant_size<const _Tp> : variant_size<_Tp> {};
@@ -309,7 +306,7 @@ struct _LIBCPP_TEMPLATE_VIS variant_alternative<_Ip, variant<_Types...>> {
using type = __type_pack_element<_Ip, _Types...>;
};
-_LIBCPP_INLINE_VAR constexpr size_t variant_npos = static_cast<size_t>(-1);
+inline constexpr size_t variant_npos = static_cast<size_t>(-1);
constexpr int __choose_index_type(unsigned int __num_elem) {
if (__num_elem < numeric_limits<unsigned char>::max())
@@ -552,7 +549,7 @@ private:
inline _LIBCPP_INLINE_VISIBILITY
static constexpr auto __make_fdiagonal_impl() {
return __make_dispatch<_Fp, _Vs...>(
- index_sequence<(__identity<_Vs>{}, _Ip)...>{});
+ index_sequence<((void)__identity<_Vs>{}, _Ip)...>{});
}
template <class _Fp, class... _Vs, size_t... _Is>
@@ -1196,11 +1193,11 @@ struct __narrowing_check {
template <class _Dest>
static auto __test_impl(_Dest (&&)[1]) -> __identity<_Dest>;
template <class _Dest, class _Source>
- using _Apply _LIBCPP_NODEBUG_TYPE = decltype(__test_impl<_Dest>({declval<_Source>()}));
+ using _Apply _LIBCPP_NODEBUG = decltype(__test_impl<_Dest>({declval<_Source>()}));
};
template <class _Dest, class _Source>
-using __check_for_narrowing _LIBCPP_NODEBUG_TYPE =
+using __check_for_narrowing _LIBCPP_NODEBUG =
typename _If<
#ifdef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT
false &&
@@ -1244,11 +1241,11 @@ struct __make_overloads_imp;
template <size_t ..._Idx>
struct __make_overloads_imp<__tuple_indices<_Idx...> > {
template <class ..._Types>
- using _Apply _LIBCPP_NODEBUG_TYPE = __all_overloads<__overload<_Types, _Idx>...>;
+ using _Apply _LIBCPP_NODEBUG = __all_overloads<__overload<_Types, _Idx>...>;
};
template <class ..._Types>
-using _MakeOverloads _LIBCPP_NODEBUG_TYPE = typename __make_overloads_imp<
+using _MakeOverloads _LIBCPP_NODEBUG = typename __make_overloads_imp<
__make_indices_imp<sizeof...(_Types), 0> >::template _Apply<_Types...>;
template <class _Tp, class... _Types>
@@ -1718,11 +1715,10 @@ inline _LIBCPP_INLINE_VISIBILITY
template <class... _Types>
inline _LIBCPP_INLINE_VISIBILITY
-auto swap(variant<_Types...>& __lhs,
- variant<_Types...>& __rhs) noexcept(noexcept(__lhs.swap(__rhs)))
- -> decltype(__lhs.swap(__rhs)) {
- __lhs.swap(__rhs);
-}
+auto swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs)
+ noexcept(noexcept(__lhs.swap(__rhs)))
+ -> decltype( __lhs.swap(__rhs))
+ { return __lhs.swap(__rhs); }
template <class... _Types>
struct _LIBCPP_TEMPLATE_VIS hash<
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 9189ed44a80c..e41afbaca509 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===------------------------------ vector --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -245,7 +245,7 @@ public:
template <class InputIterator, class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
vector(InputIterator, InputIterator, Allocator = Allocator())
- -> vector<typename iterator_traits<InputIterator>::value_type, Allocator>;
+ -> vector<typename iterator_traits<InputIterator>::value_type, Allocator>; // C++17
template <class Allocator> struct hash<std::vector<bool, Allocator>>;
@@ -275,12 +275,14 @@ erase_if(vector<T, Allocator>& c, Predicate pred); // C++20
#include <__bit_reference>
#include <__debug>
#include <__functional_base>
+#include <__iterator/iterator_traits.h>
#include <__iterator/wrap_iter.h>
#include <__split_buffer>
#include <__utility/forward.h>
#include <algorithm>
#include <climits>
#include <compare>
+#include <cstdlib>
#include <cstring>
#include <initializer_list>
#include <iosfwd> // for forward declaration of vector
@@ -301,196 +303,70 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
template <bool>
-class _LIBCPP_TEMPLATE_VIS __vector_base_common
-{
-protected:
- _LIBCPP_INLINE_VISIBILITY __vector_base_common() {}
- _LIBCPP_NORETURN void __throw_length_error() const;
- _LIBCPP_NORETURN void __throw_out_of_range() const;
-};
-
-template <bool __b>
-void
-__vector_base_common<__b>::__throw_length_error() const
-{
- _VSTD::__throw_length_error("vector");
-}
-
-template <bool __b>
-void
-__vector_base_common<__b>::__throw_out_of_range() const
-{
- _VSTD::__throw_out_of_range("vector");
-}
+struct __vector_base_common;
-_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __vector_base_common<true>)
+template <>
+struct __vector_base_common<true> {
+ // Both are defined in vector.cpp
+ _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_length_error() const;
+ _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_out_of_range() const;
+};
template <class _Tp, class _Allocator>
class __vector_base
- : protected __vector_base_common<true>
+ : protected __vector_base_common<true> // This base class is historical, but it needs to remain for ABI compatibility
{
-public:
- typedef _Allocator allocator_type;
- typedef allocator_traits<allocator_type> __alloc_traits;
- typedef typename __alloc_traits::size_type size_type;
-protected:
- typedef _Tp value_type;
- typedef value_type& reference;
- typedef const value_type& const_reference;
- typedef typename __alloc_traits::difference_type difference_type;
- typedef typename __alloc_traits::pointer pointer;
- typedef typename __alloc_traits::const_pointer const_pointer;
- typedef pointer iterator;
- typedef const_pointer const_iterator;
+ typedef _Allocator allocator_type;
+ typedef typename allocator_traits<allocator_type>::pointer pointer;
- pointer __begin_;
- pointer __end_;
- __compressed_pair<pointer, allocator_type> __end_cap_;
-
- _LIBCPP_INLINE_VISIBILITY
- allocator_type& __alloc() _NOEXCEPT
- {return __end_cap_.second();}
- _LIBCPP_INLINE_VISIBILITY
- const allocator_type& __alloc() const _NOEXCEPT
- {return __end_cap_.second();}
- _LIBCPP_INLINE_VISIBILITY
- pointer& __end_cap() _NOEXCEPT
- {return __end_cap_.first();}
- _LIBCPP_INLINE_VISIBILITY
- const pointer& __end_cap() const _NOEXCEPT
- {return __end_cap_.first();}
+protected:
+ pointer __begin_;
+ pointer __end_;
+ __compressed_pair<pointer, allocator_type> __end_cap_;
_LIBCPP_INLINE_VISIBILITY
__vector_base()
- _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value);
- _LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a);
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT;
-#endif
- ~__vector_base();
-
- _LIBCPP_INLINE_VISIBILITY
- void clear() _NOEXCEPT {__destruct_at_end(__begin_);}
- _LIBCPP_INLINE_VISIBILITY
- size_type capacity() const _NOEXCEPT
- {return static_cast<size_type>(__end_cap() - __begin_);}
-
- _LIBCPP_INLINE_VISIBILITY
- void __destruct_at_end(pointer __new_last) _NOEXCEPT;
-
- _LIBCPP_INLINE_VISIBILITY
- void __copy_assign_alloc(const __vector_base& __c)
- {__copy_assign_alloc(__c, integral_constant<bool,
- __alloc_traits::propagate_on_container_copy_assignment::value>());}
-
- _LIBCPP_INLINE_VISIBILITY
- void __move_assign_alloc(__vector_base& __c)
- _NOEXCEPT_(
- !__alloc_traits::propagate_on_container_move_assignment::value ||
- is_nothrow_move_assignable<allocator_type>::value)
- {__move_assign_alloc(__c, integral_constant<bool,
- __alloc_traits::propagate_on_container_move_assignment::value>());}
-private:
- _LIBCPP_INLINE_VISIBILITY
- void __copy_assign_alloc(const __vector_base& __c, true_type)
- {
- if (__alloc() != __c.__alloc())
- {
- clear();
- __alloc_traits::deallocate(__alloc(), __begin_, capacity());
- __begin_ = __end_ = __end_cap() = nullptr;
- }
- __alloc() = __c.__alloc();
- }
-
- _LIBCPP_INLINE_VISIBILITY
- void __copy_assign_alloc(const __vector_base&, false_type)
- {}
-
- _LIBCPP_INLINE_VISIBILITY
- void __move_assign_alloc(__vector_base& __c, true_type)
- _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
- {
- __alloc() = _VSTD::move(__c.__alloc());
- }
-
- _LIBCPP_INLINE_VISIBILITY
- void __move_assign_alloc(__vector_base&, false_type)
- _NOEXCEPT
- {}
-};
-
-template <class _Tp, class _Allocator>
-inline _LIBCPP_INLINE_VISIBILITY
-void
-__vector_base<_Tp, _Allocator>::__destruct_at_end(pointer __new_last) _NOEXCEPT
-{
- pointer __soon_to_be_end = __end_;
- while (__new_last != __soon_to_be_end)
- __alloc_traits::destroy(__alloc(), _VSTD::__to_address(--__soon_to_be_end));
- __end_ = __new_last;
-}
-
-template <class _Tp, class _Allocator>
-inline _LIBCPP_INLINE_VISIBILITY
-__vector_base<_Tp, _Allocator>::__vector_base()
_NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value)
- : __begin_(nullptr),
- __end_(nullptr),
- __end_cap_(nullptr, __default_init_tag())
-{
-}
+ : __begin_(nullptr),
+ __end_(nullptr),
+ __end_cap_(nullptr, __default_init_tag()) {}
-template <class _Tp, class _Allocator>
-inline _LIBCPP_INLINE_VISIBILITY
-__vector_base<_Tp, _Allocator>::__vector_base(const allocator_type& __a)
- : __begin_(nullptr),
- __end_(nullptr),
- __end_cap_(nullptr, __a)
-{
-}
+ _LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a)
+ : __begin_(nullptr),
+ __end_(nullptr),
+ __end_cap_(nullptr, __a) {}
#ifndef _LIBCPP_CXX03_LANG
-template <class _Tp, class _Allocator>
-inline _LIBCPP_INLINE_VISIBILITY
-__vector_base<_Tp, _Allocator>::__vector_base(allocator_type&& __a) _NOEXCEPT
- : __begin_(nullptr),
- __end_(nullptr),
- __end_cap_(nullptr, _VSTD::move(__a)) {}
+ _LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT
+ : __begin_(nullptr),
+ __end_(nullptr),
+ __end_cap_(nullptr, _VSTD::move(__a)) {}
#endif
-
-template <class _Tp, class _Allocator>
-__vector_base<_Tp, _Allocator>::~__vector_base()
-{
- if (__begin_ != nullptr)
- {
- clear();
- __alloc_traits::deallocate(__alloc(), __begin_, capacity());
- }
-}
+};
template <class _Tp, class _Allocator /* = allocator<_Tp> */>
class _LIBCPP_TEMPLATE_VIS vector
+ // This base class is historical, but it needs to remain for ABI compatibility.
: private __vector_base<_Tp, _Allocator>
{
private:
- typedef __vector_base<_Tp, _Allocator> __base;
- typedef allocator<_Tp> __default_allocator_type;
+ typedef __vector_base<_Tp, _Allocator> __base;
+ typedef allocator<_Tp> __default_allocator_type;
public:
- typedef vector __self;
- typedef _Tp value_type;
- typedef _Allocator allocator_type;
- typedef typename __base::__alloc_traits __alloc_traits;
- typedef typename __base::reference reference;
- typedef typename __base::const_reference const_reference;
- typedef typename __base::size_type size_type;
- typedef typename __base::difference_type difference_type;
- typedef typename __base::pointer pointer;
- typedef typename __base::const_pointer const_pointer;
- typedef __wrap_iter<pointer> iterator;
- typedef __wrap_iter<const_pointer> const_iterator;
- typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
- typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef vector __self;
+ typedef _Tp value_type;
+ typedef _Allocator allocator_type;
+ typedef allocator_traits<allocator_type> __alloc_traits;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef typename __allocator_traits<allocator_type>::size_type size_type;
+ typedef typename __alloc_traits::difference_type difference_type;
+ typedef typename __alloc_traits::pointer pointer;
+ typedef typename __alloc_traits::const_pointer const_pointer;
+ typedef __wrap_iter<pointer> iterator;
+ typedef __wrap_iter<const_pointer> const_iterator;
+ typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
+ typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
static_assert((is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
@@ -552,10 +428,16 @@ public:
_LIBCPP_INLINE_VISIBILITY
~vector()
{
- __annotate_delete();
+ __annotate_delete();
#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->__erase_c(this);
+ __get_db()->__erase_c(this);
#endif
+
+ if (this->__begin_ != nullptr)
+ {
+ __clear();
+ __alloc_traits::deallocate(__alloc(), this->__begin_, capacity());
+ }
}
vector(const vector& __x);
@@ -660,7 +542,7 @@ public:
{return static_cast<size_type>(this->__end_ - this->__begin_);}
_LIBCPP_INLINE_VISIBILITY
size_type capacity() const _NOEXCEPT
- {return __base::capacity();}
+ {return static_cast<size_type>(__end_cap() - this->__begin_);}
_LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY
bool empty() const _NOEXCEPT
{return this->__begin_ == this->__end_;}
@@ -773,7 +655,7 @@ public:
void clear() _NOEXCEPT
{
size_type __old_size = size();
- __base::clear();
+ __clear();
__annotate_shrink(__old_size);
__invalidate_all_iterators();
}
@@ -834,7 +716,7 @@ private:
{
__invalidate_iterators_past(__new_last);
size_type __old_size = size();
- __base::__destruct_at_end(__new_last);
+ __base_destruct_at_end(__new_last);
__annotate_shrink(__old_size);
}
@@ -898,7 +780,7 @@ private:
struct _ConstructTransaction {
explicit _ConstructTransaction(vector &__v, size_type __n)
- : __v_(__v), __pos_(__v.__end_), __new_end_(__v.__end_ + __n) {
+ : __v_(__v), __pos_(__v.__end_), __new_end_(__v.__end_ + __n) {
#ifndef _LIBCPP_HAS_NO_ASAN
__v_.__annotate_increase(__n);
#endif
@@ -929,19 +811,104 @@ private:
_VSTD::forward<_Args>(__args)...);
++__tx.__pos_;
}
+
+ _LIBCPP_INLINE_VISIBILITY
+ allocator_type& __alloc() _NOEXCEPT
+ {return this->__end_cap_.second();}
+ _LIBCPP_INLINE_VISIBILITY
+ const allocator_type& __alloc() const _NOEXCEPT
+ {return this->__end_cap_.second();}
+ _LIBCPP_INLINE_VISIBILITY
+ pointer& __end_cap() _NOEXCEPT
+ {return this->__end_cap_.first();}
+ _LIBCPP_INLINE_VISIBILITY
+ const pointer& __end_cap() const _NOEXCEPT
+ {return this->__end_cap_.first();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __clear() _NOEXCEPT {__base_destruct_at_end(this->__begin_);}
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __base_destruct_at_end(pointer __new_last) _NOEXCEPT {
+ pointer __soon_to_be_end = this->__end_;
+ while (__new_last != __soon_to_be_end)
+ __alloc_traits::destroy(__alloc(), _VSTD::__to_address(--__soon_to_be_end));
+ this->__end_ = __new_last;
+ }
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __copy_assign_alloc(const vector& __c)
+ {__copy_assign_alloc(__c, integral_constant<bool,
+ __alloc_traits::propagate_on_container_copy_assignment::value>());}
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __move_assign_alloc(vector& __c)
+ _NOEXCEPT_(
+ !__alloc_traits::propagate_on_container_move_assignment::value ||
+ is_nothrow_move_assignable<allocator_type>::value)
+ {__move_assign_alloc(__c, integral_constant<bool,
+ __alloc_traits::propagate_on_container_move_assignment::value>());}
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_length_error() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __vector_base_common<true>::__throw_length_error();
+#else
+ _VSTD::abort();
+#endif
+ }
+
+ _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
+ void __throw_out_of_range() const {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+ __vector_base_common<true>::__throw_out_of_range();
+#else
+ _VSTD::abort();
+#endif
+ }
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __copy_assign_alloc(const vector& __c, true_type)
+ {
+ if (__alloc() != __c.__alloc())
+ {
+ __clear();
+ __alloc_traits::deallocate(__alloc(), this->__begin_, capacity());
+ this->__begin_ = this->__end_ = __end_cap() = nullptr;
+ }
+ __alloc() = __c.__alloc();
+ }
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __copy_assign_alloc(const vector&, false_type)
+ {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __move_assign_alloc(vector& __c, true_type)
+ _NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
+ {
+ __alloc() = _VSTD::move(__c.__alloc());
+ }
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __move_assign_alloc(vector&, false_type)
+ _NOEXCEPT
+ {}
};
-#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+#if _LIBCPP_STD_VER >= 17
template<class _InputIterator,
class _Alloc = allocator<__iter_value_type<_InputIterator>>,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
vector(_InputIterator, _InputIterator)
-> vector<__iter_value_type<_InputIterator>, _Alloc>;
template<class _InputIterator,
class _Alloc,
- class = _EnableIf<__is_allocator<_Alloc>::value>
+ class = enable_if_t<__is_cpp17_input_iterator<_InputIterator>::value>,
+ class = enable_if_t<__is_allocator<_Alloc>::value>
>
vector(_InputIterator, _InputIterator, _Alloc)
-> vector<__iter_value_type<_InputIterator>, _Alloc>;
@@ -1043,7 +1010,7 @@ vector<_Tp, _Allocator>::__construct_at_end(size_type __n)
{
_ConstructTransaction __tx(*this, __n);
const_pointer __new_end = __tx.__new_end_;
- for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
+ for (pointer __pos = __tx.__pos_; __pos != __new_end; __tx.__pos_ = ++__pos) {
__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos));
}
}
@@ -1061,7 +1028,7 @@ vector<_Tp, _Allocator>::__construct_at_end(size_type __n, const_reference __x)
{
_ConstructTransaction __tx(*this, __n);
const_pointer __new_end = __tx.__new_end_;
- for (pointer __pos = __tx.__pos_; __pos != __new_end; ++__pos, __tx.__pos_ = __pos) {
+ for (pointer __pos = __tx.__pos_; __pos != __new_end; __tx.__pos_ = ++__pos) {
__alloc_traits::construct(this->__alloc(), _VSTD::__to_address(__pos), __x);
}
}
@@ -1291,7 +1258,7 @@ vector<_Tp, _Allocator>::vector(vector&& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
__get_db()->__insert_c(this);
- __get_db()->swap(this, &__x);
+ __get_db()->swap(this, _VSTD::addressof(__x));
#endif
this->__begin_ = __x.__begin_;
this->__end_ = __x.__end_;
@@ -1314,7 +1281,7 @@ vector<_Tp, _Allocator>::vector(vector&& __x, const __identity_t<allocator_type>
this->__end_cap() = __x.__end_cap();
__x.__begin_ = __x.__end_ = __x.__end_cap() = nullptr;
#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->swap(this, &__x);
+ __get_db()->swap(this, _VSTD::addressof(__x));
#endif
}
else
@@ -1369,7 +1336,7 @@ void
vector<_Tp, _Allocator>::__move_assign(vector& __c, false_type)
_NOEXCEPT_(__alloc_traits::is_always_equal::value)
{
- if (__base::__alloc() != __c.__alloc())
+ if (__alloc() != __c.__alloc())
{
typedef move_iterator<iterator> _Ip;
assign(_Ip(__c.begin()), _Ip(__c.end()));
@@ -1384,13 +1351,13 @@ vector<_Tp, _Allocator>::__move_assign(vector& __c, true_type)
_NOEXCEPT_(is_nothrow_move_assignable<allocator_type>::value)
{
__vdeallocate();
- __base::__move_assign_alloc(__c); // this can throw
+ __move_assign_alloc(__c); // this can throw
this->__begin_ = __c.__begin_;
this->__end_ = __c.__end_;
this->__end_cap() = __c.__end_cap();
__c.__begin_ = __c.__end_ = __c.__end_cap() = nullptr;
#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->swap(this, &__c);
+ __get_db()->swap(this, _VSTD::addressof(__c));
#endif
}
@@ -1401,9 +1368,9 @@ inline _LIBCPP_INLINE_VISIBILITY
vector<_Tp, _Allocator>&
vector<_Tp, _Allocator>::operator=(const vector& __x)
{
- if (this != &__x)
+ if (this != _VSTD::addressof(__x))
{
- __base::__copy_assign_alloc(__x);
+ __copy_assign_alloc(__x);
assign(__x.__begin_, __x.__end_);
}
return *this;
@@ -1585,6 +1552,8 @@ vector<_Tp, _Allocator>::reserve(size_type __n)
{
if (__n > capacity())
{
+ if (__n > max_size())
+ this->__throw_length_error();
allocator_type& __a = this->__alloc();
__split_buffer<value_type, allocator_type&> __v(__n, size(), __a);
__swap_out_circular_buffer(__v);
@@ -1709,7 +1678,7 @@ typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::erase(const_iterator __position)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::erase(iterator) called with an iterator not"
" referring to this vector");
#endif
@@ -1728,11 +1697,11 @@ typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::erase(const_iterator __first, const_iterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
- "vector::erase(iterator, iterator) called with an iterator not"
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this,
+ "vector::erase(iterator, iterator) called with an iterator not"
" referring to this vector");
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this,
- "vector::erase(iterator, iterator) called with an iterator not"
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this,
+ "vector::erase(iterator, iterator) called with an iterator not"
" referring to this vector");
#endif
_LIBCPP_ASSERT(__first <= __last, "vector::erase(first, last) called with invalid range");
@@ -1755,7 +1724,7 @@ vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointe
pointer __i = __from_s + __n;
_ConstructTransaction __tx(*this, __from_e - __i);
for (pointer __pos = __tx.__pos_; __i < __from_e;
- ++__i, ++__pos, __tx.__pos_ = __pos) {
+ ++__i, (void) ++__pos, __tx.__pos_ = __pos) {
__alloc_traits::construct(this->__alloc(),
_VSTD::__to_address(__pos),
_VSTD::move(*__i));
@@ -1769,7 +1738,7 @@ typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::insert(const_iterator __position, const_reference __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::insert(iterator, x) called with an iterator not"
" referring to this vector");
#endif
@@ -1806,7 +1775,7 @@ typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::insert(const_iterator __position, value_type&& __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::insert(iterator, x) called with an iterator not"
" referring to this vector");
#endif
@@ -1839,7 +1808,7 @@ typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::emplace(const_iterator __position, _Args&&... __args)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::emplace(iterator, x) called with an iterator not"
" referring to this vector");
#endif
@@ -1874,7 +1843,7 @@ typename vector<_Tp, _Allocator>::iterator
vector<_Tp, _Allocator>::insert(const_iterator __position, size_type __n, const_reference __x)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::insert(iterator, n, x) called with an iterator not"
" referring to this vector");
#endif
@@ -1925,7 +1894,7 @@ typename enable_if
vector<_Tp, _Allocator>::insert(const_iterator __position, _InputIterator __first, _InputIterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::insert(iterator, range) called with an iterator not"
" referring to this vector");
#endif
@@ -1978,7 +1947,7 @@ typename enable_if
vector<_Tp, _Allocator>::insert(const_iterator __position, _ForwardIterator __first, _ForwardIterator __last)
{
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__position) == this,
+ _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__position)) == this,
"vector::insert(iterator, range) called with an iterator not"
" referring to this vector");
#endif
@@ -2059,7 +2028,7 @@ vector<_Tp, _Allocator>::swap(vector& __x)
_VSTD::__swap_allocator(this->__alloc(), __x.__alloc(),
integral_constant<bool,__alloc_traits::propagate_on_container_swap::value>());
#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->swap(this, &__x);
+ __get_db()->swap(this, _VSTD::addressof(__x));
#endif
}
@@ -2854,7 +2823,7 @@ template <class _Allocator>
vector<bool, _Allocator>&
vector<bool, _Allocator>::operator=(const vector& __v)
{
- if (this != &__v)
+ if (this != _VSTD::addressof(__v))
{
__copy_assign_alloc(__v);
if (__v.__size_)
@@ -2958,7 +2927,7 @@ vector<bool, _Allocator>::assign(size_type __n, const value_type& __x)
__size_ = __n;
else
{
- vector __v(__alloc());
+ vector __v(get_allocator());
__v.reserve(__recommend(__n));
__v.__size_ = __n;
swap(__v);
@@ -3013,7 +2982,9 @@ vector<bool, _Allocator>::reserve(size_type __n)
{
if (__n > capacity())
{
- vector __v(this->__alloc());
+ if (__n > max_size())
+ this->__throw_length_error();
+ vector __v(this->get_allocator());
__v.__vallocate(__n);
__v.__construct_at_end(this->begin(), this->end());
swap(__v);
@@ -3083,7 +3054,7 @@ vector<bool, _Allocator>::insert(const_iterator __position, const value_type& __
}
else
{
- vector __v(__alloc());
+ vector __v(get_allocator());
__v.reserve(__recommend(__size_ + 1));
__v.__size_ = __size_ + 1;
__r = _VSTD::copy(cbegin(), __position, __v.begin());
@@ -3109,7 +3080,7 @@ vector<bool, _Allocator>::insert(const_iterator __position, size_type __n, const
}
else
{
- vector __v(__alloc());
+ vector __v(get_allocator());
__v.reserve(__recommend(__size_ + __n));
__v.__size_ = __size_ + __n;
__r = _VSTD::copy(cbegin(), __position, __v.begin());
@@ -3138,7 +3109,7 @@ vector<bool, _Allocator>::insert(const_iterator __position, _InputIterator __fir
++this->__size_;
back() = *__first;
}
- vector __v(__alloc());
+ vector __v(get_allocator());
if (__first != __last)
{
#ifndef _LIBCPP_NO_EXCEPTIONS
@@ -3188,7 +3159,7 @@ vector<bool, _Allocator>::insert(const_iterator __position, _ForwardIterator __f
}
else
{
- vector __v(__alloc());
+ vector __v(get_allocator());
__v.reserve(__recommend(__size_ + __n));
__v.__size_ = __size_ + __n;
__r = _VSTD::copy(cbegin(), __position, __v.begin());
@@ -3255,7 +3226,7 @@ vector<bool, _Allocator>::resize(size_type __sz, value_type __x)
}
else
{
- vector __v(__alloc());
+ vector __v(get_allocator());
__v.reserve(__recommend(__size_ + __n));
__v.__size_ = __size_ + __n;
__r = _VSTD::copy(cbegin(), cend(), __v.begin());
diff --git a/libcxx/include/version b/libcxx/include/version
index a0ec730a8ea1..d2286f5e8207 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- version ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -199,17 +199,13 @@ __cpp_lib_void_t 201411L <type_traits>
#endif
#if _LIBCPP_STD_VER > 14
-# if !defined(_LIBCPP_HAS_NO_BUILTIN_ADDRESSOF)
-# define __cpp_lib_addressof_constexpr 201603L
-# endif
+# define __cpp_lib_addressof_constexpr 201603L
# define __cpp_lib_allocator_traits_is_always_equal 201411L
# define __cpp_lib_any 201606L
# define __cpp_lib_apply 201603L
# define __cpp_lib_array_constexpr 201603L
# define __cpp_lib_as_const 201510L
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-# define __cpp_lib_atomic_is_always_lock_free 201603L
-# endif
+# define __cpp_lib_atomic_is_always_lock_free 201603L
# define __cpp_lib_bool_constant 201505L
// # define __cpp_lib_boyer_moore_searcher 201603L
# define __cpp_lib_byte 201603L
@@ -222,15 +218,11 @@ __cpp_lib_void_t 201411L <type_traits>
# endif
# define __cpp_lib_gcd_lcm 201606L
// # define __cpp_lib_hardware_interference_size 201703L
-# if defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS)
-# define __cpp_lib_has_unique_object_representations 201606L
-# endif
+# define __cpp_lib_has_unique_object_representations 201606L
# define __cpp_lib_hypot 201603L
# define __cpp_lib_incomplete_container_elements 201505L
# define __cpp_lib_invoke 201411L
-# if !defined(_LIBCPP_HAS_NO_IS_AGGREGATE)
-# define __cpp_lib_is_aggregate 201703L
-# endif
+# define __cpp_lib_is_aggregate 201703L
# define __cpp_lib_is_invocable 201703L
# define __cpp_lib_is_swappable 201603L
# define __cpp_lib_launder 201606L
@@ -267,32 +259,20 @@ __cpp_lib_void_t 201411L <type_traits>
# undef __cpp_lib_array_constexpr
# define __cpp_lib_array_constexpr 201811L
// # define __cpp_lib_assume_aligned 201811L
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-# define __cpp_lib_atomic_flag_test 201907L
-# endif
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-// # define __cpp_lib_atomic_float 201711L
-# endif
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-# define __cpp_lib_atomic_lock_free_type_aliases 201907L
-# endif
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-// # define __cpp_lib_atomic_ref 201806L
-# endif
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-// # define __cpp_lib_atomic_shared_ptr 201711L
-# endif
-# if !defined(_LIBCPP_HAS_NO_THREADS)
-# define __cpp_lib_atomic_value_initialization 201911L
-# endif
-# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait)
+# define __cpp_lib_atomic_flag_test 201907L
+// # define __cpp_lib_atomic_float 201711L
+# define __cpp_lib_atomic_lock_free_type_aliases 201907L
+// # define __cpp_lib_atomic_ref 201806L
+// # define __cpp_lib_atomic_shared_ptr 201711L
+# define __cpp_lib_atomic_value_initialization 201911L
+# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait)
# define __cpp_lib_atomic_wait 201907L
# endif
# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier)
# define __cpp_lib_barrier 201907L
# endif
# define __cpp_lib_bind_front 201907L
-// # define __cpp_lib_bit_cast 201806L
+# define __cpp_lib_bit_cast 201806L
// # define __cpp_lib_bitops 201907L
# define __cpp_lib_bounded_array_traits 201902L
# if !defined(_LIBCPP_HAS_NO_CHAR8_T)
@@ -311,7 +291,7 @@ __cpp_lib_void_t 201411L <type_traits>
# define __cpp_lib_constexpr_tuple 201811L
# define __cpp_lib_constexpr_utility 201811L
// # define __cpp_lib_constexpr_vector 201907L
-// # define __cpp_lib_coroutine 201902L
+# define __cpp_lib_coroutine 201902L
# if _LIBCPP_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L
# define __cpp_lib_destroying_delete 201806L
# endif
@@ -328,9 +308,7 @@ __cpp_lib_void_t 201411L <type_traits>
# define __cpp_lib_integer_comparison_functions 202002L
# endif
# define __cpp_lib_interpolate 201902L
-# if !defined(_LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED)
-# define __cpp_lib_is_constant_evaluated 201811L
-# endif
+# define __cpp_lib_is_constant_evaluated 201811L
// # define __cpp_lib_is_layout_compatible 201907L
# define __cpp_lib_is_nothrow_convertible 201806L
// # define __cpp_lib_is_pointer_interconvertible 201907L
diff --git a/libcxx/include/wchar.h b/libcxx/include/wchar.h
index c556ae890876..4d2f62e45cae 100644
--- a/libcxx/include/wchar.h
+++ b/libcxx/include/wchar.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- wchar.h ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -108,6 +108,10 @@ size_t wcsrtombs(char* restrict dst, const wchar_t** restrict src, size_t len,
#include <__config>
#include <stddef.h>
+#if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
+# error "The <wchar.h> header is not supported since libc++ has been configured with LIBCXX_ENABLE_WIDE_CHARACTERS disabled"
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
@@ -170,13 +174,13 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_PREFERRED_OVERLOAD
}
#endif
-#if defined(__cplusplus) && defined(_LIBCPP_MSVCRT_LIKE)
+#if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__))
extern "C" {
size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t nmc, size_t len, mbstate_t *__restrict ps);
size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
size_t nwc, size_t len, mbstate_t *__restrict ps);
-} // extern "C++"
-#endif // __cplusplus && _LIBCPP_MSVCRT
+} // extern "C"
+#endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__)
#endif // _LIBCPP_WCHAR_H
diff --git a/libcxx/include/wctype.h b/libcxx/include/wctype.h
index 1b4b1461496c..08bef7caab90 100644
--- a/libcxx/include/wctype.h
+++ b/libcxx/include/wctype.h
@@ -1,5 +1,5 @@
// -*- C++ -*-
-//===--------------------------- wctype.h ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -46,12 +46,26 @@ wctrans_t wctrans(const char* property);
#include <__config>
+#if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
+# error "The <wctype.h> header is not supported since libc++ has been configured with LIBCXX_ENABLE_WIDE_CHARACTERS disabled"
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
+// TODO:
+// In the future, we should unconditionally include_next <wctype.h> here and instead
+// have a mode under which the library does not need libc++'s <wctype.h> or <cwctype>
+// at all (i.e. a mode without wchar_t). As it stands, we need to do that to completely
+// bypass the using declarations in <cwctype> when we did not include <wctype.h>.
+// Otherwise, a using declaration like `using ::wint_t` in <cwctype> will refer to
+// nothing (with using_if_exists), and if we include another header that defines one
+// of these declarations (e.g. <wchar.h>), the second `using ::wint_t` with using_if_exists
+// will fail because it does not refer to the same declaration.
#if __has_include_next(<wctype.h>)
# include_next <wctype.h>
+# define _LIBCPP_INCLUDED_C_LIBRARY_WCTYPE_H
#endif
#ifdef __cplusplus
diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp
index 9f11dc38b15b..4cc7c2725aa0 100644
--- a/libcxx/src/algorithm.cpp
+++ b/libcxx/src/algorithm.cpp
@@ -1,4 +1,4 @@
-//===----------------------- algorithm.cpp --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,7 +11,9 @@
_LIBCPP_BEGIN_NAMESPACE_STD
template void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
+#endif
template void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
template void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
template void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
@@ -27,7 +29,9 @@ template void __sort<__less<double>&, double*>(double*, double*, __less<double>&
template void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
template bool __insertion_sort_incomplete<__less<char>&, char*>(char*, char*, __less<char>&);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template bool __insertion_sort_incomplete<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
+#endif
template bool __insertion_sort_incomplete<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
template bool __insertion_sort_incomplete<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
template bool __insertion_sort_incomplete<__less<short>&, short*>(short*, short*, __less<short>&);
diff --git a/libcxx/src/any.cpp b/libcxx/src/any.cpp
index 415d23b0c942..2939fe299697 100644
--- a/libcxx/src/any.cpp
+++ b/libcxx/src/any.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- any.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index 9ae1fb5199bf..9b61a16106c2 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -1,4 +1,4 @@
-//===------------------------- atomic.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/barrier.cpp b/libcxx/src/barrier.cpp
index 9ee476993b81..0f9dad987fad 100644
--- a/libcxx/src/barrier.cpp
+++ b/libcxx/src/barrier.cpp
@@ -1,4 +1,4 @@
-//===------------------------- barrier.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/bind.cpp b/libcxx/src/bind.cpp
index 53efdf9df375..a54df2c0dd7e 100644
--- a/libcxx/src/bind.cpp
+++ b/libcxx/src/bind.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- bind.cpp ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp
index 78439f968314..533e59b04d6f 100644
--- a/libcxx/src/charconv.cpp
+++ b/libcxx/src/charconv.cpp
@@ -1,4 +1,4 @@
-//===------------------------- charconv.cpp -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp
index 13b375947b4d..8ff3faf9df6a 100644
--- a/libcxx/src/chrono.cpp
+++ b/libcxx/src/chrono.cpp
@@ -1,4 +1,4 @@
-//===------------------------- chrono.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -63,6 +63,30 @@ namespace chrono
#if defined(_LIBCPP_WIN32API)
+#if _WIN32_WINNT < _WIN32_WINNT_WIN8
+
+namespace {
+
+typedef void(WINAPI *GetSystemTimeAsFileTimePtr)(LPFILETIME);
+
+class GetSystemTimeInit {
+public:
+ GetSystemTimeInit() {
+ fp = (GetSystemTimeAsFileTimePtr)GetProcAddress(
+ GetModuleHandleW(L"kernel32.dll"), "GetSystemTimePreciseAsFileTime");
+ if (fp == nullptr)
+ fp = GetSystemTimeAsFileTime;
+ }
+ GetSystemTimeAsFileTimePtr fp;
+};
+
+# 83 "chrono.cpp" 1 3
+GetSystemTimeInit GetSystemTimeAsFileTimeFunc _LIBCPP_INIT_PRIORITY_MAX;
+# 85 "chrono.cpp" 2
+} // namespace
+
+#endif
+
static system_clock::time_point __libcpp_system_clock_now() {
// FILETIME is in 100ns units
using filetime_duration =
@@ -74,10 +98,13 @@ static system_clock::time_point __libcpp_system_clock_now() {
static _LIBCPP_CONSTEXPR const seconds nt_to_unix_epoch{11644473600};
FILETIME ft;
-#if _WIN32_WINNT >= _WIN32_WINNT_WIN8 && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#if (_WIN32_WINNT >= _WIN32_WINNT_WIN8 && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)) || \
+ (_WIN32_WINNT >= _WIN32_WINNT_WIN10)
GetSystemTimePreciseAsFileTime(&ft);
-#else
+#elif !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
GetSystemTimeAsFileTime(&ft);
+#else
+ GetSystemTimeAsFileTimeFunc.fp(&ft);
#endif
filetime_duration d{(static_cast<__int64>(ft.dwHighDateTime) << 32) |
diff --git a/libcxx/src/condition_variable.cpp b/libcxx/src/condition_variable.cpp
index 1e29083e6e14..0ade56b48432 100644
--- a/libcxx/src/condition_variable.cpp
+++ b/libcxx/src/condition_variable.cpp
@@ -1,4 +1,4 @@
-//===-------------------- condition_variable.cpp --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/condition_variable_destructor.cpp b/libcxx/src/condition_variable_destructor.cpp
index 44fa240ba78b..350e6b77f244 100644
--- a/libcxx/src/condition_variable_destructor.cpp
+++ b/libcxx/src/condition_variable_destructor.cpp
@@ -1,4 +1,4 @@
-//===---------------- condition_variable_destructor.cpp ------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/debug.cpp b/libcxx/src/debug.cpp
index dd5963fcce16..ae31c91d154f 100644
--- a/libcxx/src/debug.cpp
+++ b/libcxx/src/debug.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- debug.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/exception.cpp b/libcxx/src/exception.cpp
index fce6db7c38b6..efb56c15e793 100644
--- a/libcxx/src/exception.cpp
+++ b/libcxx/src/exception.cpp
@@ -1,4 +1,4 @@
-//===------------------------ exception.cpp -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/experimental/memory_resource.cpp b/libcxx/src/experimental/memory_resource.cpp
index ffe8021514fc..be2fb47fad16 100644
--- a/libcxx/src/experimental/memory_resource.cpp
+++ b/libcxx/src/experimental/memory_resource.cpp
@@ -1,4 +1,4 @@
-//===------------------------ memory_resource.cpp -------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -76,7 +76,9 @@ union ResourceInitHelper {
~ResourceInitHelper() {}
};
+# 79 "memory_resource.cpp" 1 3
_LIBCPP_SAFE_STATIC ResourceInitHelper res_init _LIBCPP_INIT_PRIORITY_MAX;
+# 81 "memory_resource.cpp" 2
} // end namespace
diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index 7b83ba9ff123..6219ceafd36e 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -1,4 +1,4 @@
-//===------------------ directory_iterator.cpp ----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h
index 60d07059e327..70092fe4e24d 100644
--- a/libcxx/src/filesystem/filesystem_common.h
+++ b/libcxx/src/filesystem/filesystem_common.h
@@ -10,12 +10,13 @@
#define FILESYSTEM_COMMON_H
#include "__config"
-#include "filesystem"
#include "array"
#include "chrono"
#include "climits"
+#include "cstdarg"
#include "cstdlib"
#include "ctime"
+#include "filesystem"
#if !defined(_LIBCPP_WIN32API)
# include <unistd.h>
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index 5179eeae42b5..62bcfbff097f 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -1,4 +1,4 @@
-//===--------------------- filesystem/ops.cpp -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/format.cpp b/libcxx/src/format.cpp
index c36c20e60a9e..7ae2af59b7fa 100644
--- a/libcxx/src/format.cpp
+++ b/libcxx/src/format.cpp
@@ -1,4 +1,4 @@
-//===------------------------- format.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/functional.cpp b/libcxx/src/functional.cpp
index cc5f43a9f2e5..d8cfaa70337f 100644
--- a/libcxx/src/functional.cpp
+++ b/libcxx/src/functional.cpp
@@ -1,4 +1,4 @@
-//===----------------------- functional.cpp -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,7 +14,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
bad_function_call::~bad_function_call() noexcept
{
}
+#endif
+#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
const char*
bad_function_call::what() const noexcept
{
diff --git a/libcxx/src/future.cpp b/libcxx/src/future.cpp
index 4c59f89e56a8..177fe7523eb4 100644
--- a/libcxx/src/future.cpp
+++ b/libcxx/src/future.cpp
@@ -1,4 +1,4 @@
-//===------------------------- future.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/hash.cpp b/libcxx/src/hash.cpp
index 89bb736c86c2..b8e921ad0670 100644
--- a/libcxx/src/hash.cpp
+++ b/libcxx/src/hash.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- hash.cpp ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/include/apple_availability.h b/libcxx/src/include/apple_availability.h
index 0f999d3feafb..504700ebd0a9 100644
--- a/libcxx/src/include/apple_availability.h
+++ b/libcxx/src/include/apple_availability.h
@@ -1,4 +1,4 @@
-//===------------------------ apple_availability.h ------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/include/atomic_support.h b/libcxx/src/include/atomic_support.h
index 43c1a00234d2..5a745a2f0091 100644
--- a/libcxx/src/include/atomic_support.h
+++ b/libcxx/src/include/atomic_support.h
@@ -24,7 +24,7 @@
&& defined(__ATOMIC_ACQ_REL) \
&& defined(__ATOMIC_SEQ_CST)
# define _LIBCPP_HAS_ATOMIC_BUILTINS
-#elif !defined(__clang__) && defined(_GNUC_VER) && _GNUC_VER >= 407
+#elif defined(_LIBCPP_COMPILER_GCC)
# define _LIBCPP_HAS_ATOMIC_BUILTINS
#endif
@@ -146,7 +146,7 @@ _ValueType __libcpp_atomic_add(_ValueType* __val, _AddType __a,
template <class _ValueType>
inline _LIBCPP_INLINE_VISIBILITY
_ValueType __libcpp_atomic_exchange(_ValueType* __target,
- _ValueType __value, int __order = _AO_Seq)
+ _ValueType __value, int = _AO_Seq)
{
_ValueType old = *__target;
*__target = __value;
diff --git a/libcxx/src/include/config_elast.h b/libcxx/src/include/config_elast.h
index 7880c733fb85..0ed53a3b20d1 100644
--- a/libcxx/src/include/config_elast.h
+++ b/libcxx/src/include/config_elast.h
@@ -1,4 +1,4 @@
-//===----------------------- config_elast.h -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/include/refstring.h b/libcxx/src/include/refstring.h
index ad6cd162fb5f..0675b393beb4 100644
--- a/libcxx/src/include/refstring.h
+++ b/libcxx/src/include/refstring.h
@@ -1,4 +1,4 @@
-//===------------------------ __refstring ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/ios.cpp b/libcxx/src/ios.cpp
index a8a99015a977..a9bd1dc32397 100644
--- a/libcxx/src/ios.cpp
+++ b/libcxx/src/ios.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- ios.cpp -----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/ios.instantiations.cpp b/libcxx/src/ios.instantiations.cpp
index 1a23687d128d..e1189d0b5bd9 100644
--- a/libcxx/src/ios.instantiations.cpp
+++ b/libcxx/src/ios.instantiations.cpp
@@ -19,15 +19,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// Original explicit instantiations provided in the library
template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_ios<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_ios<wchar_t>;
template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_streambuf<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_streambuf<wchar_t>;
template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_istream<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_istream<wchar_t>;
template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_ostream<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_ostream<wchar_t>;
template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_iostream<char>;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_ios<wchar_t>;
+template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_streambuf<wchar_t>;
+template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_istream<wchar_t>;
+template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_ostream<wchar_t>;
+#endif
+
// Additional instantiations added later. Whether programs rely on these being
// available is protected by _LIBCPP_ABI_ENABLE_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1.
template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS basic_stringbuf<char>;
diff --git a/libcxx/src/iostream.cpp b/libcxx/src/iostream.cpp
index ea95534c7667..6070449621a0 100644
--- a/libcxx/src/iostream.cpp
+++ b/libcxx/src/iostream.cpp
@@ -1,4 +1,4 @@
-//===------------------------ iostream.cpp --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,7 +17,6 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-#ifndef _LIBCPP_HAS_NO_STDIN
_ALIGNAS_TYPE (istream) _LIBCPP_FUNC_VIS char cin[sizeof(istream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?cin@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_istream@DU?$char_traits@D@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
@@ -25,6 +24,8 @@ __asm__("?cin@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_istream@DU?$char_trai
;
_ALIGNAS_TYPE (__stdinbuf<char> ) static char __cin[sizeof(__stdinbuf <char>)];
static mbstate_t mb_cin;
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_ALIGNAS_TYPE (wistream) _LIBCPP_FUNC_VIS char wcin[sizeof(wistream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?wcin@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_istream@_WU?$char_traits@_W@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
@@ -32,9 +33,8 @@ __asm__("?wcin@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_istream@_WU?$char_tr
;
_ALIGNAS_TYPE (__stdinbuf<wchar_t> ) static char __wcin[sizeof(__stdinbuf <wchar_t>)];
static mbstate_t mb_wcin;
-#endif
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
-#ifndef _LIBCPP_HAS_NO_STDOUT
_ALIGNAS_TYPE (ostream) _LIBCPP_FUNC_VIS char cout[sizeof(ostream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?cout@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@DU?$char_traits@D@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
@@ -42,6 +42,8 @@ __asm__("?cout@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@DU?$char_tra
;
_ALIGNAS_TYPE (__stdoutbuf<char>) static char __cout[sizeof(__stdoutbuf<char>)];
static mbstate_t mb_cout;
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_ALIGNAS_TYPE (wostream) _LIBCPP_FUNC_VIS char wcout[sizeof(wostream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?wcout@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@_WU?$char_traits@_W@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
@@ -49,7 +51,7 @@ __asm__("?wcout@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@_WU?$char_t
;
_ALIGNAS_TYPE (__stdoutbuf<wchar_t>) static char __wcout[sizeof(__stdoutbuf<wchar_t>)];
static mbstate_t mb_wcout;
-#endif
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_ALIGNAS_TYPE (ostream) _LIBCPP_FUNC_VIS char cerr[sizeof(ostream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
@@ -58,6 +60,8 @@ __asm__("?cerr@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@DU?$char_tra
;
_ALIGNAS_TYPE (__stdoutbuf<char>) static char __cerr[sizeof(__stdoutbuf<char>)];
static mbstate_t mb_cerr;
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_ALIGNAS_TYPE (wostream) _LIBCPP_FUNC_VIS char wcerr[sizeof(wostream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?wcerr@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@_WU?$char_traits@_W@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
@@ -65,19 +69,28 @@ __asm__("?wcerr@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@_WU?$char_t
;
_ALIGNAS_TYPE (__stdoutbuf<wchar_t>) static char __wcerr[sizeof(__stdoutbuf<wchar_t>)];
static mbstate_t mb_wcerr;
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_ALIGNAS_TYPE (ostream) _LIBCPP_FUNC_VIS char clog[sizeof(ostream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?clog@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@DU?$char_traits@D@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
#endif
;
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
_ALIGNAS_TYPE (wostream) _LIBCPP_FUNC_VIS char wclog[sizeof(wostream)]
#if defined(_LIBCPP_ABI_MICROSOFT) && defined(__clang__)
__asm__("?wclog@" _LIBCPP_ABI_NAMESPACE_STR "@std@@3V?$basic_ostream@_WU?$char_traits@_W@" _LIBCPP_ABI_NAMESPACE_STR "@std@@@12@A")
#endif
;
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
+// Hacky way to make the compiler believe that we're inside a system header so
+// it doesn't flag the use of the init_priority attribute with a value that's
+// reserved for the implementation (we're the implementation).
+# 80 "iostream.cpp" 1 3
_LIBCPP_HIDDEN ios_base::Init __start_std_streams _LIBCPP_INIT_PRIORITY_MAX;
+# 82 "iostream.cpp" 2
// On Windows the TLS storage for locales needs to be initialized before we create
// the standard streams, otherwise it may not be alive during program termination
@@ -107,44 +120,39 @@ DoIOSInit::DoIOSInit()
{
force_locale_initialization();
-#ifndef _LIBCPP_HAS_NO_STDIN
istream* cin_ptr = ::new(cin) istream(::new(__cin) __stdinbuf <char>(stdin, &mb_cin));
- wistream* wcin_ptr = ::new(wcin) wistream(::new(__wcin) __stdinbuf <wchar_t>(stdin, &mb_wcin));
-#endif
-#ifndef _LIBCPP_HAS_NO_STDOUT
ostream* cout_ptr = ::new(cout) ostream(::new(__cout) __stdoutbuf<char>(stdout, &mb_cout));
- wostream* wcout_ptr = ::new(wcout) wostream(::new(__wcout) __stdoutbuf<wchar_t>(stdout, &mb_wcout));
-#endif
ostream* cerr_ptr = ::new(cerr) ostream(::new(__cerr) __stdoutbuf<char>(stderr, &mb_cerr));
::new(clog) ostream(cerr_ptr->rdbuf());
+ cin_ptr->tie(cout_ptr);
+ _VSTD::unitbuf(*cerr_ptr);
+ cerr_ptr->tie(cout_ptr);
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ wistream* wcin_ptr = ::new(wcin) wistream(::new(__wcin) __stdinbuf <wchar_t>(stdin, &mb_wcin));
+ wostream* wcout_ptr = ::new(wcout) wostream(::new(__wcout) __stdoutbuf<wchar_t>(stdout, &mb_wcout));
wostream* wcerr_ptr = ::new(wcerr) wostream(::new(__wcerr) __stdoutbuf<wchar_t>(stderr, &mb_wcerr));
::new(wclog) wostream(wcerr_ptr->rdbuf());
-#if !defined(_LIBCPP_HAS_NO_STDIN) && !defined(_LIBCPP_HAS_NO_STDOUT)
- cin_ptr->tie(cout_ptr);
wcin_ptr->tie(wcout_ptr);
-#endif
- _VSTD::unitbuf(*cerr_ptr);
_VSTD::unitbuf(*wcerr_ptr);
-#ifndef _LIBCPP_HAS_NO_STDOUT
- cerr_ptr->tie(cout_ptr);
wcerr_ptr->tie(wcout_ptr);
#endif
}
DoIOSInit::~DoIOSInit()
{
-#ifndef _LIBCPP_HAS_NO_STDOUT
ostream* cout_ptr = reinterpret_cast<ostream*>(cout);
- wostream* wcout_ptr = reinterpret_cast<wostream*>(wcout);
cout_ptr->flush();
- wcout_ptr->flush();
-#endif
-
ostream* clog_ptr = reinterpret_cast<ostream*>(clog);
- wostream* wclog_ptr = reinterpret_cast<wostream*>(wclog);
clog_ptr->flush();
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ wostream* wcout_ptr = reinterpret_cast<wostream*>(wcout);
+ wcout_ptr->flush();
+ wostream* wclog_ptr = reinterpret_cast<wostream*>(wclog);
wclog_ptr->flush();
+#endif
}
ios_base::Init::Init()
diff --git a/libcxx/src/legacy_pointer_safety.cpp b/libcxx/src/legacy_pointer_safety.cpp
new file mode 100644
index 000000000000..34f5613ae253
--- /dev/null
+++ b/libcxx/src/legacy_pointer_safety.cpp
@@ -0,0 +1,23 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "__config"
+#include <memory>
+
+// Support for garbage collection was removed in C++23 by https://wg21.link/P2186R2. Libc++ implements
+// that removal as an extension in all Standard versions. However, we still define the functions that
+// were once part of the library's ABI for backwards compatibility.
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+_LIBCPP_FUNC_VIS void declare_reachable(void*) {}
+_LIBCPP_FUNC_VIS void declare_no_pointers(char*, size_t) {}
+_LIBCPP_FUNC_VIS void undeclare_no_pointers(char*, size_t) {}
+_LIBCPP_FUNC_VIS void* __undeclare_reachable(void* p) { return p; }
+
+_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index d5ab8fb3b836..79f03b85fab3 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -1,4 +1,4 @@
-//===------------------------- locale.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,28 +12,36 @@
#define _LCONV_C99
#endif
-#include "string"
-#include "locale"
-#include "codecvt"
-#include "vector"
#include "algorithm"
-#include "typeinfo"
-#ifndef _LIBCPP_NO_EXCEPTIONS
-# include "type_traits"
-#endif
#include "clocale"
+#include "codecvt"
+#include "cstdio"
+#include "cstdlib"
#include "cstring"
+#include "locale"
+#include "string"
+#include "type_traits"
+#include "typeinfo"
+#include "vector"
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+# include "cwctype"
+#endif
+
+#if defined(_AIX)
+# include <sys/localedef.h> // for __lc_ctype_ptr
+#endif
+
#if defined(_LIBCPP_MSVCRT)
-#define _CTYPE_DISABLE_MACROS
+# define _CTYPE_DISABLE_MACROS
#endif
-#include "cwctype"
+
#if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__)
-#include "__support/win32/locale_win32.h"
+# include "__support/win32/locale_win32.h"
#elif !defined(__BIONIC__) && !defined(__NuttX__)
-#include <langinfo.h>
+# include <langinfo.h>
#endif
-#include <stdlib.h>
-#include <stdio.h>
+
#include "include/atomic_support.h"
#include "include/sso_allocator.h"
#include "__undef_macros"
@@ -81,33 +89,11 @@ struct release
void operator()(locale::facet* p) {p->__release_shared();}
};
-template <class T, class A0>
-inline
-T&
-make(A0 a0)
-{
- static typename aligned_storage<sizeof(T)>::type buf;
- auto *obj = ::new (&buf) T(a0);
- return *obj;
-}
-
-template <class T, class A0, class A1>
-inline
-T&
-make(A0 a0, A1 a1)
-{
- static typename aligned_storage<sizeof(T)>::type buf;
- ::new (&buf) T(a0, a1);
- return *reinterpret_cast<T*>(&buf);
-}
-
-template <class T, class A0, class A1, class A2>
-inline
-T&
-make(A0 a0, A1 a1, A2 a2)
+template <class T, class ...Args>
+T& make(Args ...args)
{
static typename aligned_storage<sizeof(T)>::type buf;
- auto *obj = ::new (&buf) T(a0, a1, a2);
+ auto *obj = ::new (&buf) T(args...);
return *obj;
}
@@ -197,11 +183,17 @@ locale::__imp::__imp(size_t refs)
{
facets_.clear();
install(&make<_VSTD::collate<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<_VSTD::collate<wchar_t> >(1u));
+#endif
install(&make<_VSTD::ctype<char> >(nullptr, false, 1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<_VSTD::ctype<wchar_t> >(1u));
+#endif
install(&make<codecvt<char, char, mbstate_t> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<codecvt<wchar_t, char, mbstate_t> >(1u));
+#endif
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
install(&make<codecvt<char16_t, char, mbstate_t> >(1u));
install(&make<codecvt<char32_t, char, mbstate_t> >(1u));
@@ -211,25 +203,43 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
install(&make<codecvt<char32_t, char8_t, mbstate_t> >(1u));
#endif
install(&make<numpunct<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<numpunct<wchar_t> >(1u));
+#endif
install(&make<num_get<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<num_get<wchar_t> >(1u));
+#endif
install(&make<num_put<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<num_put<wchar_t> >(1u));
+#endif
install(&make<moneypunct<char, false> >(1u));
install(&make<moneypunct<char, true> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<moneypunct<wchar_t, false> >(1u));
install(&make<moneypunct<wchar_t, true> >(1u));
+#endif
install(&make<money_get<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<money_get<wchar_t> >(1u));
+#endif
install(&make<money_put<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<money_put<wchar_t> >(1u));
+#endif
install(&make<time_get<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<time_get<wchar_t> >(1u));
+#endif
install(&make<time_put<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<time_put<wchar_t> >(1u));
+#endif
install(&make<_VSTD::messages<char> >(1u));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(&make<_VSTD::messages<wchar_t> >(1u));
+#endif
}
locale::__imp::__imp(const string& name, size_t refs)
@@ -246,11 +256,17 @@ locale::__imp::__imp(const string& name, size_t refs)
if (facets_[i])
facets_[i]->__add_shared();
install(new collate_byname<char>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new collate_byname<wchar_t>(name_));
+#endif
install(new ctype_byname<char>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new ctype_byname<wchar_t>(name_));
+#endif
install(new codecvt_byname<char, char, mbstate_t>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new codecvt_byname<wchar_t, char, mbstate_t>(name_));
+#endif
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
install(new codecvt_byname<char16_t, char, mbstate_t>(name_));
install(new codecvt_byname<char32_t, char, mbstate_t>(name_));
@@ -260,17 +276,27 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
install(new codecvt_byname<char32_t, char8_t, mbstate_t>(name_));
#endif
install(new numpunct_byname<char>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new numpunct_byname<wchar_t>(name_));
+#endif
install(new moneypunct_byname<char, false>(name_));
install(new moneypunct_byname<char, true>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new moneypunct_byname<wchar_t, false>(name_));
install(new moneypunct_byname<wchar_t, true>(name_));
+#endif
install(new time_get_byname<char>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new time_get_byname<wchar_t>(name_));
+#endif
install(new time_put_byname<char>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new time_put_byname<wchar_t>(name_));
+#endif
install(new messages_byname<char>(name_));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new messages_byname<wchar_t>(name_));
+#endif
#ifndef _LIBCPP_NO_EXCEPTIONS
}
catch (...)
@@ -283,13 +309,6 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
#endif // _LIBCPP_NO_EXCEPTIONS
}
-// NOTE avoid the `base class should be explicitly initialized in the
-// copy constructor` warning emitted by GCC
-#if defined(__clang__) || _GNUC_VER >= 406
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wextra"
-#endif
-
locale::__imp::__imp(const __imp& other)
: facets_(max<size_t>(N, other.facets_.size())),
name_(other.name_)
@@ -300,10 +319,6 @@ locale::__imp::__imp(const __imp& other)
facets_[i]->__add_shared();
}
-#if defined(__clang__) || _GNUC_VER >= 406
-#pragma GCC diagnostic pop
-#endif
-
locale::__imp::__imp(const __imp& other, const string& name, locale::category c)
: facets_(N),
name_("*")
@@ -319,14 +334,20 @@ locale::__imp::__imp(const __imp& other, const string& name, locale::category c)
if (c & locale::collate)
{
install(new collate_byname<char>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new collate_byname<wchar_t>(name));
+#endif
}
if (c & locale::ctype)
{
install(new ctype_byname<char>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new ctype_byname<wchar_t>(name));
+#endif
install(new codecvt_byname<char, char, mbstate_t>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new codecvt_byname<wchar_t, char, mbstate_t>(name));
+#endif
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
install(new codecvt_byname<char16_t, char, mbstate_t>(name));
install(new codecvt_byname<char32_t, char, mbstate_t>(name));
@@ -340,25 +361,35 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
{
install(new moneypunct_byname<char, false>(name));
install(new moneypunct_byname<char, true>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new moneypunct_byname<wchar_t, false>(name));
install(new moneypunct_byname<wchar_t, true>(name));
+#endif
}
if (c & locale::numeric)
{
install(new numpunct_byname<char>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new numpunct_byname<wchar_t>(name));
+#endif
}
if (c & locale::time)
{
install(new time_get_byname<char>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new time_get_byname<wchar_t>(name));
+#endif
install(new time_put_byname<char>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new time_put_byname<wchar_t>(name));
+#endif
}
if (c & locale::messages)
{
install(new messages_byname<char>(name));
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install(new messages_byname<wchar_t>(name));
+#endif
}
#ifndef _LIBCPP_NO_EXCEPTIONS
}
@@ -396,12 +427,16 @@ locale::__imp::__imp(const __imp& other, const __imp& one, locale::category c)
if (c & locale::collate)
{
install_from<_VSTD::collate<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<_VSTD::collate<wchar_t> >(one);
+#endif
}
if (c & locale::ctype)
{
install_from<_VSTD::ctype<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<_VSTD::ctype<wchar_t> >(one);
+#endif
install_from<_VSTD::codecvt<char, char, mbstate_t> >(one);
_LIBCPP_SUPPRESS_DEPRECATED_PUSH
install_from<_VSTD::codecvt<char16_t, char, mbstate_t> >(one);
@@ -411,39 +446,59 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP
install_from<_VSTD::codecvt<char16_t, char8_t, mbstate_t> >(one);
install_from<_VSTD::codecvt<char32_t, char8_t, mbstate_t> >(one);
#endif
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<_VSTD::codecvt<wchar_t, char, mbstate_t> >(one);
+#endif
}
if (c & locale::monetary)
{
install_from<moneypunct<char, false> >(one);
install_from<moneypunct<char, true> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<moneypunct<wchar_t, false> >(one);
install_from<moneypunct<wchar_t, true> >(one);
+#endif
install_from<money_get<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<money_get<wchar_t> >(one);
+#endif
install_from<money_put<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<money_put<wchar_t> >(one);
+#endif
}
if (c & locale::numeric)
{
install_from<numpunct<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<numpunct<wchar_t> >(one);
+#endif
install_from<num_get<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<num_get<wchar_t> >(one);
+#endif
install_from<num_put<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<num_put<wchar_t> >(one);
+#endif
}
if (c & locale::time)
{
install_from<time_get<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<time_get<wchar_t> >(one);
+#endif
install_from<time_put<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<time_put<wchar_t> >(one);
+#endif
}
if (c & locale::messages)
{
install_from<_VSTD::messages<char> >(one);
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
install_from<_VSTD::messages<wchar_t> >(one);
+#endif
}
#ifndef _LIBCPP_NO_EXCEPTIONS
}
@@ -734,6 +789,7 @@ collate_byname<char>::do_transform(const char_type* lo, const char_type* hi) con
// template <> class collate_byname<wchar_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
collate_byname<wchar_t>::collate_byname(const char* n, size_t refs)
: collate<wchar_t>(refs),
__l(newlocale(LC_ALL_MASK, n, 0))
@@ -779,8 +835,7 @@ collate_byname<wchar_t>::do_transform(const char_type* lo, const char_type* hi)
wcsxfrm_l(const_cast<wchar_t*>(out.c_str()), in.c_str(), out.size()+1, __l);
return out;
}
-
-// template <> class ctype<wchar_t>;
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
const ctype_base::mask ctype_base::space;
const ctype_base::mask ctype_base::print;
@@ -795,6 +850,9 @@ const ctype_base::mask ctype_base::blank;
const ctype_base::mask ctype_base::alnum;
const ctype_base::mask ctype_base::graph;
+// template <> class ctype<wchar_t>;
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
locale::id ctype<wchar_t>::id;
ctype<wchar_t>::~ctype()
@@ -924,11 +982,14 @@ ctype<wchar_t>::do_narrow(const char_type* low, const char_type* high, char dfau
*dest = dfault;
return low;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// template <> class ctype<char>;
locale::id ctype<char>::id;
+const size_t ctype<char>::table_size;
+
ctype<char>::ctype(const mask* tab, bool del, size_t refs)
: locale::facet(refs),
__tab_(tab),
@@ -1255,6 +1316,7 @@ ctype_byname<char>::do_tolower(char_type* low, const char_type* high) const
// template <> class ctype_byname<wchar_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
ctype_byname<wchar_t>::ctype_byname(const char* name, size_t refs)
: ctype<wchar_t>(refs),
__l(newlocale(LC_ALL_MASK, name, 0))
@@ -1454,6 +1516,7 @@ ctype_byname<wchar_t>::do_narrow(const char_type* low, const char_type* high, ch
}
return low;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// template <> class codecvt<char, char, mbstate_t>
@@ -1518,6 +1581,7 @@ codecvt<char, char, mbstate_t>::do_max_length() const noexcept
// template <> class codecvt<wchar_t, char, mbstate_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
locale::id codecvt<wchar_t, char, mbstate_t>::id;
codecvt<wchar_t, char, mbstate_t>::codecvt(size_t refs)
@@ -1730,6 +1794,7 @@ codecvt<wchar_t, char, mbstate_t>::do_max_length() const noexcept
{
return __l == 0 ? 1 : static_cast<int>(__libcpp_mb_cur_max_l(__l));
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// Valid UTF ranges
// UTF-32 UTF-16 UTF-8 # of code points
@@ -3436,6 +3501,7 @@ codecvt<char32_t, char8_t, mbstate_t>::do_max_length() const noexcept
// __codecvt_utf8<wchar_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
__codecvt_utf8<wchar_t>::result
__codecvt_utf8<wchar_t>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
@@ -3517,16 +3583,27 @@ __codecvt_utf8<wchar_t>::do_length(state_type&,
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+ return utf8_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
}
int
__codecvt_utf8<wchar_t>::do_max_length() const noexcept
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ if (_Mode_ & consume_header)
+ return 6;
+ return 3;
+#else
if (_Mode_ & consume_header)
return 7;
return 4;
+#endif
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// __codecvt_utf8<char16_t>
@@ -3680,19 +3757,31 @@ __codecvt_utf8<char32_t>::do_max_length() const noexcept
// __codecvt_utf16<wchar_t, false>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
__codecvt_utf16<wchar_t, false>::result
__codecvt_utf16<wchar_t, false>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
+#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3706,11 +3795,19 @@ __codecvt_utf16<wchar_t, false>::do_in(state_type&,
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3742,15 +3839,25 @@ __codecvt_utf16<wchar_t, false>::do_length(state_type&,
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+ return utf16be_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
}
int
__codecvt_utf16<wchar_t, false>::do_max_length() const noexcept
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ if (_Mode_ & consume_header)
+ return 4;
+ return 2;
+#else
if (_Mode_ & consume_header)
return 6;
return 4;
+#endif
}
// __codecvt_utf16<wchar_t, true>
@@ -3760,14 +3867,25 @@ __codecvt_utf16<wchar_t, true>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
+#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3781,11 +3899,19 @@ __codecvt_utf16<wchar_t, true>::do_in(state_type&,
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3817,16 +3943,27 @@ __codecvt_utf16<wchar_t, true>::do_length(state_type&,
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+ return utf16le_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
}
int
__codecvt_utf16<wchar_t, true>::do_max_length() const noexcept
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ if (_Mode_ & consume_header)
+ return 4;
+ return 2;
+#else
if (_Mode_ & consume_header)
return 6;
return 4;
+#endif
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// __codecvt_utf16<char16_t, false>
@@ -4130,14 +4267,21 @@ __codecvt_utf16<char32_t, true>::do_max_length() const noexcept
// __codecvt_utf8_utf16<wchar_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
__codecvt_utf8_utf16<wchar_t>::result
__codecvt_utf8_utf16<wchar_t>::do_out(state_type&,
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
+#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
@@ -4156,9 +4300,15 @@ __codecvt_utf8_utf16<wchar_t>::do_in(state_type&,
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
+#endif
result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
frm_nxt = frm + (_frm_nxt - _frm);
@@ -4202,6 +4352,7 @@ __codecvt_utf8_utf16<wchar_t>::do_max_length() const noexcept
return 7;
return 4;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// __codecvt_utf8_utf16<char16_t>
@@ -4377,7 +4528,7 @@ __widen_from_utf8<32>::~__widen_from_utf8()
{
}
-
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
static bool checked_string_to_wchar_convert(wchar_t& dest,
const char* ptr,
locale_t loc) {
@@ -4392,6 +4543,19 @@ static bool checked_string_to_wchar_convert(wchar_t& dest,
dest = out;
return true;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
+
+#ifdef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+static bool is_narrow_non_breaking_space(const char* ptr) {
+ // https://www.fileformat.info/info/unicode/char/202f/index.htm
+ return ptr[0] == '\xe2' && ptr[1] == '\x80' && ptr[2] == '\xaf';
+}
+
+static bool is_non_breaking_space(const char* ptr) {
+ // https://www.fileformat.info/info/unicode/char/0a/index.htm
+ return ptr[0] == '\xc2' && ptr[1] == '\xa0';
+}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
static bool checked_string_to_char_convert(char& dest,
const char* ptr,
@@ -4402,6 +4566,8 @@ static bool checked_string_to_char_convert(char& dest,
dest = *ptr;
return true;
}
+
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
// First convert the MBS into a wide char then attempt to narrow it using
// wctob_l.
wchar_t wout;
@@ -4412,7 +4578,7 @@ static bool checked_string_to_char_convert(char& dest,
dest = res;
return true;
}
- // FIXME: Work around specific multibyte sequences that we can reasonable
+ // FIXME: Work around specific multibyte sequences that we can reasonably
// translate into a different single byte.
switch (wout) {
case L'\u202F': // narrow non-breaking space
@@ -4422,6 +4588,16 @@ static bool checked_string_to_char_convert(char& dest,
default:
return false;
}
+#else // _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ // FIXME: Work around specific multibyte sequences that we can reasonably
+ // translate into a different single byte.
+ if (is_narrow_non_breaking_space(ptr) || is_non_breaking_space(ptr)) {
+ dest = ' ';
+ return true;
+ }
+
+ return false;
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
_LIBCPP_UNREACHABLE();
}
@@ -4429,7 +4605,9 @@ static bool checked_string_to_char_convert(char& dest,
// numpunct<char> && numpunct<wchar_t>
locale::id numpunct< char >::id;
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
locale::id numpunct<wchar_t>::id;
+#endif
numpunct<char>::numpunct(size_t refs)
: locale::facet(refs),
@@ -4438,35 +4616,49 @@ numpunct<char>::numpunct(size_t refs)
{
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
numpunct<wchar_t>::numpunct(size_t refs)
: locale::facet(refs),
__decimal_point_(L'.'),
__thousands_sep_(L',')
{
}
+#endif
numpunct<char>::~numpunct()
{
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
numpunct<wchar_t>::~numpunct()
{
}
+#endif
char numpunct< char >::do_decimal_point() const {return __decimal_point_;}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
wchar_t numpunct<wchar_t>::do_decimal_point() const {return __decimal_point_;}
+#endif
char numpunct< char >::do_thousands_sep() const {return __thousands_sep_;}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
wchar_t numpunct<wchar_t>::do_thousands_sep() const {return __thousands_sep_;}
+#endif
string numpunct< char >::do_grouping() const {return __grouping_;}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
string numpunct<wchar_t>::do_grouping() const {return __grouping_;}
+#endif
string numpunct< char >::do_truename() const {return "true";}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
wstring numpunct<wchar_t>::do_truename() const {return L"true";}
+#endif
string numpunct< char >::do_falsename() const {return "false";}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
wstring numpunct<wchar_t>::do_falsename() const {return L"false";}
+#endif
// numpunct_byname<char>
@@ -4489,6 +4681,7 @@ numpunct_byname<char>::~numpunct_byname()
void
numpunct_byname<char>::__init(const char* nm)
{
+ typedef numpunct<char> base;
if (strcmp(nm, "C") != 0)
{
__libcpp_unique_locale loc(nm);
@@ -4497,10 +4690,12 @@ numpunct_byname<char>::__init(const char* nm)
" failed to construct for " + string(nm));
lconv* lc = __libcpp_localeconv_l(loc.get());
- checked_string_to_char_convert(__decimal_point_, lc->decimal_point,
- loc.get());
- checked_string_to_char_convert(__thousands_sep_, lc->thousands_sep,
- loc.get());
+ if (!checked_string_to_char_convert(__decimal_point_, lc->decimal_point,
+ loc.get()))
+ __decimal_point_ = base::do_decimal_point();
+ if (!checked_string_to_char_convert(__thousands_sep_, lc->thousands_sep,
+ loc.get()))
+ __thousands_sep_ = base::do_thousands_sep();
__grouping_ = lc->grouping;
// localization for truename and falsename is not available
}
@@ -4508,6 +4703,7 @@ numpunct_byname<char>::__init(const char* nm)
// numpunct_byname<wchar_t>
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
numpunct_byname<wchar_t>::numpunct_byname(const char* nm, size_t refs)
: numpunct<wchar_t>(refs)
{
@@ -4543,6 +4739,7 @@ numpunct_byname<wchar_t>::__init(const char* nm)
// localization for truename and falsename is not available
}
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// num_get helpers
@@ -4718,6 +4915,7 @@ init_weeks()
return weeks;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
static
wstring*
init_wweeks()
@@ -4739,6 +4937,7 @@ init_wweeks()
weeks[13] = L"Sat";
return weeks;
}
+#endif
template <>
const string*
@@ -4748,6 +4947,7 @@ __time_get_c_storage<char>::__weeks() const
return weeks;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring*
__time_get_c_storage<wchar_t>::__weeks() const
@@ -4755,6 +4955,7 @@ __time_get_c_storage<wchar_t>::__weeks() const
static const wstring* weeks = init_wweeks();
return weeks;
}
+#endif
static
string*
@@ -4788,6 +4989,7 @@ init_months()
return months;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
static
wstring*
init_wmonths()
@@ -4819,6 +5021,7 @@ init_wmonths()
months[23] = L"Dec";
return months;
}
+#endif
template <>
const string*
@@ -4828,6 +5031,7 @@ __time_get_c_storage<char>::__months() const
return months;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring*
__time_get_c_storage<wchar_t>::__months() const
@@ -4835,6 +5039,7 @@ __time_get_c_storage<wchar_t>::__months() const
static const wstring* months = init_wmonths();
return months;
}
+#endif
static
string*
@@ -4846,6 +5051,7 @@ init_am_pm()
return am_pm;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
static
wstring*
init_wam_pm()
@@ -4855,6 +5061,7 @@ init_wam_pm()
am_pm[1] = L"PM";
return am_pm;
}
+#endif
template <>
const string*
@@ -4864,6 +5071,7 @@ __time_get_c_storage<char>::__am_pm() const
return am_pm;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring*
__time_get_c_storage<wchar_t>::__am_pm() const
@@ -4871,6 +5079,7 @@ __time_get_c_storage<wchar_t>::__am_pm() const
static const wstring* am_pm = init_wam_pm();
return am_pm;
}
+#endif
template <>
const string&
@@ -4880,6 +5089,7 @@ __time_get_c_storage<char>::__x() const
return s;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring&
__time_get_c_storage<wchar_t>::__x() const
@@ -4887,6 +5097,7 @@ __time_get_c_storage<wchar_t>::__x() const
static wstring s(L"%m/%d/%y");
return s;
}
+#endif
template <>
const string&
@@ -4896,6 +5107,7 @@ __time_get_c_storage<char>::__X() const
return s;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring&
__time_get_c_storage<wchar_t>::__X() const
@@ -4903,6 +5115,7 @@ __time_get_c_storage<wchar_t>::__X() const
static wstring s(L"%H:%M:%S");
return s;
}
+#endif
template <>
const string&
@@ -4912,6 +5125,7 @@ __time_get_c_storage<char>::__c() const
return s;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring&
__time_get_c_storage<wchar_t>::__c() const
@@ -4919,6 +5133,7 @@ __time_get_c_storage<wchar_t>::__c() const
static wstring s(L"%a %b %d %H:%M:%S %Y");
return s;
}
+#endif
template <>
const string&
@@ -4928,6 +5143,7 @@ __time_get_c_storage<char>::__r() const
return s;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
const wstring&
__time_get_c_storage<wchar_t>::__r() const
@@ -4935,6 +5151,7 @@ __time_get_c_storage<wchar_t>::__r() const
static wstring s(L"%I:%M:%S %p");
return s;
}
+#endif
// time_get_byname
@@ -5113,6 +5330,7 @@ __time_get_storage<char>::__analyze(char fmt, const ctype<char>& ct)
#pragma clang diagnostic ignored "-Wmissing-braces"
#endif
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
wstring
__time_get_storage<wchar_t>::__analyze(char fmt, const ctype<wchar_t>& ct)
@@ -5262,6 +5480,7 @@ __time_get_storage<wchar_t>::__analyze(char fmt, const ctype<wchar_t>& ct)
}
return result;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
void
@@ -5300,6 +5519,7 @@ __time_get_storage<char>::init(const ctype<char>& ct)
__X_ = __analyze('X', ct);
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
void
__time_get_storage<wchar_t>::init(const ctype<wchar_t>& ct)
@@ -5375,6 +5595,7 @@ __time_get_storage<wchar_t>::init(const ctype<wchar_t>& ct)
__x_ = __analyze('x', ct);
__X_ = __analyze('X', ct);
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class CharT>
struct _LIBCPP_HIDDEN __time_get_temp
@@ -5402,6 +5623,7 @@ __time_get_storage<char>::__time_get_storage(const string& __nm)
init(ct);
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
__time_get_storage<wchar_t>::__time_get_storage(const char* __nm)
: __time_get(__nm)
@@ -5417,6 +5639,7 @@ __time_get_storage<wchar_t>::__time_get_storage(const string& __nm)
const __time_get_temp<wchar_t> ct(__nm);
init(ct);
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
time_base::dateorder
@@ -5505,6 +5728,7 @@ __time_get_storage<char>::__do_date_order() const
return time_base::no_order;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
time_base::dateorder
__time_get_storage<wchar_t>::__do_date_order() const
@@ -5591,6 +5815,7 @@ __time_get_storage<wchar_t>::__do_date_order() const
}
return time_base::no_order;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// time_put
@@ -5627,6 +5852,7 @@ __time_put::__do_put(char* __nb, char*& __ne, const tm* __tm,
__ne = __nb + n;
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
void
__time_put::__do_put(wchar_t* __wb, wchar_t*& __we, const tm* __tm,
char __fmt, char __mod) const
@@ -5641,6 +5867,7 @@ __time_put::__do_put(wchar_t* __wb, wchar_t*& __we, const tm* __tm,
__throw_runtime_error("locale not supported");
__we = __wb + j;
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// moneypunct_byname
@@ -6120,6 +6347,7 @@ moneypunct_byname<char, true>::init(const char* nm)
#endif // !_LIBCPP_MSVCRT
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template<>
void
moneypunct_byname<wchar_t, false>::init(const char* nm)
@@ -6267,6 +6495,7 @@ moneypunct_byname<wchar_t, true>::init(const char* nm)
lc->int_n_sign_posn, L' ');
#endif // !_LIBCPP_MSVCRT
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
void __do_nothing(void*) {}
@@ -6280,63 +6509,63 @@ void __throw_runtime_error(const char* msg)
#endif
}
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS collate<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_get<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_get<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_get<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_get<wchar_t>;)
-template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_get<char>;
-template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_get<wchar_t>;
+ template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_get<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_get<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_put<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_put<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_put<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS num_put<wchar_t>;)
-template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_put<char>;
-template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_put<wchar_t>;
+ template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_put<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template struct _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __num_put<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get_byname<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get_byname<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get_byname<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_get_byname<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put_byname<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put_byname<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put_byname<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS time_put_byname<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<char, false>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<char, true>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<wchar_t, false>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<wchar_t, true>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<char, false>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<char, true>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<wchar_t, false>;)
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct<wchar_t, true>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<char, false>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<char, true>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<wchar_t, false>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<wchar_t, true>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<char, false>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<char, true>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<wchar_t, false>;)
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS moneypunct_byname<wchar_t, true>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_get<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_get<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_get<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_get<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_get<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_get<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_get<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_get<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_put<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_put<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_put<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS money_put<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_put<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_put<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_put<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __money_put<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages_byname<char>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages_byname<wchar_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages_byname<char>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS messages_byname<wchar_t>;)
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS codecvt_byname<char, char, mbstate_t>;
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS codecvt_byname<wchar_t, char, mbstate_t>;
+ template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS codecvt_byname<char, char, mbstate_t>;
+_LIBCPP_IF_WIDE_CHARACTERS(template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS codecvt_byname<wchar_t, char, mbstate_t>;)
template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS codecvt_byname<char16_t, char, mbstate_t>;
template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS codecvt_byname<char32_t, char, mbstate_t>;
#ifndef _LIBCPP_HAS_NO_CHAR8_T
diff --git a/libcxx/src/memory.cpp b/libcxx/src/memory.cpp
index 9bd27df26834..4c9bf9f073c8 100644
--- a/libcxx/src/memory.cpp
+++ b/libcxx/src/memory.cpp
@@ -1,4 +1,4 @@
-//===------------------------ memory.cpp ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,11 +8,11 @@
#include "memory"
#ifndef _LIBCPP_HAS_NO_THREADS
-#include "mutex"
-#include "thread"
-#if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB)
-#pragma comment(lib, "pthread")
-#endif
+# include "mutex"
+# include "thread"
+# if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB)
+# pragma comment(lib, "pthread")
+# endif
#endif
#include "include/atomic_support.h"
@@ -130,7 +130,7 @@ __shared_weak_count::__get_deleter(const type_info&) const noexcept
return nullptr;
}
-#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER)
+#if !defined(_LIBCPP_HAS_NO_THREADS)
_LIBCPP_SAFE_STATIC static const std::size_t __sp_mut_count = 16;
_LIBCPP_SAFE_STATIC static __libcpp_mutex_t mut_back[__sp_mut_count] =
@@ -181,28 +181,7 @@ __get_sp_mut(const void* p)
return muts[hash<const void*>()(p) & (__sp_mut_count-1)];
}
-#endif // !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER)
-
-void
-declare_reachable(void*)
-{
-}
-
-void
-declare_no_pointers(char*, size_t)
-{
-}
-
-void
-undeclare_no_pointers(char*, size_t)
-{
-}
-
-void*
-__undeclare_reachable(void* p)
-{
- return p;
-}
+#endif // !defined(_LIBCPP_HAS_NO_THREADS)
void*
align(size_t alignment, size_t size, void*& ptr, size_t& space)
diff --git a/libcxx/src/mutex.cpp b/libcxx/src/mutex.cpp
index 36362e34f37b..d6758fb0ef95 100644
--- a/libcxx/src/mutex.cpp
+++ b/libcxx/src/mutex.cpp
@@ -1,4 +1,4 @@
-//===------------------------- mutex.cpp ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/mutex_destructor.cpp b/libcxx/src/mutex_destructor.cpp
index 07197c3fb4c8..e8b1e42dd54c 100644
--- a/libcxx/src/mutex_destructor.cpp
+++ b/libcxx/src/mutex_destructor.cpp
@@ -1,4 +1,4 @@
-//===--------------------- mutex_destructor.cpp ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/new.cpp b/libcxx/src/new.cpp
index 5486815abb2a..0a9bee4e39a4 100644
--- a/libcxx/src/new.cpp
+++ b/libcxx/src/new.cpp
@@ -1,4 +1,4 @@
-//===--------------------------- new.cpp ----------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/optional.cpp b/libcxx/src/optional.cpp
index 39405bec12a0..251ebe2e6826 100644
--- a/libcxx/src/optional.cpp
+++ b/libcxx/src/optional.cpp
@@ -1,4 +1,4 @@
-//===------------------------ optional.cpp --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/random.cpp b/libcxx/src/random.cpp
index 8ea080842e0d..86740dd49032 100644
--- a/libcxx/src/random.cpp
+++ b/libcxx/src/random.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- random.cpp --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/random_shuffle.cpp b/libcxx/src/random_shuffle.cpp
index be2c47fa0def..df9b7d53c847 100644
--- a/libcxx/src/random_shuffle.cpp
+++ b/libcxx/src/random_shuffle.cpp
@@ -1,4 +1,4 @@
-//===----------------------- random_shuffle.cpp ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/regex.cpp b/libcxx/src/regex.cpp
index d31e49487432..425339a5c9b8 100644
--- a/libcxx/src/regex.cpp
+++ b/libcxx/src/regex.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- regex.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/shared_mutex.cpp b/libcxx/src/shared_mutex.cpp
index 5feef9f4889f..6a5a738a6762 100644
--- a/libcxx/src/shared_mutex.cpp
+++ b/libcxx/src/shared_mutex.cpp
@@ -1,4 +1,4 @@
-//===---------------------- shared_mutex.cpp ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/stdexcept.cpp b/libcxx/src/stdexcept.cpp
index f8f335f34ae1..b9c89703f3e8 100644
--- a/libcxx/src/stdexcept.cpp
+++ b/libcxx/src/stdexcept.cpp
@@ -1,4 +1,4 @@
-//===------------------------ stdexcept.cpp -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp
index 97a773f79a3b..608dcb2c5863 100644
--- a/libcxx/src/string.cpp
+++ b/libcxx/src/string.cpp
@@ -1,4 +1,4 @@
-//===------------------------- string.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,24 +9,37 @@
#include "string"
#include "charconv"
#include "cstdlib"
-#include "cwchar"
#include "cerrno"
#include "limits"
#include "stdexcept"
#include <stdio.h>
#include "__debug"
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+# include "cwchar"
+#endif
+
_LIBCPP_BEGIN_NAMESPACE_STD
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __basic_string_common<true>;
+void __basic_string_common<true>::__throw_length_error() const {
+ _VSTD::__throw_length_error("basic_string");
+}
+
+void __basic_string_common<true>::__throw_out_of_range() const {
+ _VSTD::__throw_out_of_range("basic_string");
+}
#define _LIBCPP_EXTERN_TEMPLATE_DEFINE(...) template __VA_ARGS__;
#ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
-_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, char)
-_LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, wchar_t)
+ _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, char)
+# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ _LIBCPP_STRING_UNSTABLE_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, wchar_t)
+# endif
#else
-_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, char)
-_LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, wchar_t)
+ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, char)
+# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+ _LIBCPP_STRING_V1_EXTERN_TEMPLATE_LIST(_LIBCPP_EXTERN_TEMPLATE_DEFINE, wchar_t)
+# endif
#endif
#undef _LIBCPP_EXTERN_TEMPLATE_DEFINE
@@ -131,6 +144,7 @@ as_integer( const string& func, const string& s, size_t* idx, int base )
return as_integer_helper<unsigned long long>( func, s, idx, base, strtoull );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
// wstring
template<>
inline
@@ -175,6 +189,7 @@ as_integer( const string& func, const wstring& s, size_t* idx, int base )
{
return as_integer_helper<unsigned long long>( func, s, idx, base, wcstoull );
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
// as_float
@@ -226,6 +241,7 @@ as_float( const string& func, const string& s, size_t* idx )
return as_float_helper<long double>( func, s, idx, strtold );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template<>
inline
float
@@ -249,6 +265,7 @@ as_float( const string& func, const wstring& s, size_t* idx )
{
return as_float_helper<long double>( func, s, idx, wcstold );
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
} // unnamed namespace
@@ -258,11 +275,13 @@ stoi(const string& str, size_t* idx, int base)
return as_integer<int>( "stoi", str, idx, base );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
int
stoi(const wstring& str, size_t* idx, int base)
{
return as_integer<int>( "stoi", str, idx, base );
}
+#endif
long
stol(const string& str, size_t* idx, int base)
@@ -270,11 +289,13 @@ stol(const string& str, size_t* idx, int base)
return as_integer<long>( "stol", str, idx, base );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
long
stol(const wstring& str, size_t* idx, int base)
{
return as_integer<long>( "stol", str, idx, base );
}
+#endif
unsigned long
stoul(const string& str, size_t* idx, int base)
@@ -282,11 +303,13 @@ stoul(const string& str, size_t* idx, int base)
return as_integer<unsigned long>( "stoul", str, idx, base );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
unsigned long
stoul(const wstring& str, size_t* idx, int base)
{
return as_integer<unsigned long>( "stoul", str, idx, base );
}
+#endif
long long
stoll(const string& str, size_t* idx, int base)
@@ -294,11 +317,13 @@ stoll(const string& str, size_t* idx, int base)
return as_integer<long long>( "stoll", str, idx, base );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
long long
stoll(const wstring& str, size_t* idx, int base)
{
return as_integer<long long>( "stoll", str, idx, base );
}
+#endif
unsigned long long
stoull(const string& str, size_t* idx, int base)
@@ -306,11 +331,13 @@ stoull(const string& str, size_t* idx, int base)
return as_integer<unsigned long long>( "stoull", str, idx, base );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
unsigned long long
stoull(const wstring& str, size_t* idx, int base)
{
return as_integer<unsigned long long>( "stoull", str, idx, base );
}
+#endif
float
stof(const string& str, size_t* idx)
@@ -318,11 +345,13 @@ stof(const string& str, size_t* idx)
return as_float<float>( "stof", str, idx );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
float
stof(const wstring& str, size_t* idx)
{
return as_float<float>( "stof", str, idx );
}
+#endif
double
stod(const string& str, size_t* idx)
@@ -330,11 +359,13 @@ stod(const string& str, size_t* idx)
return as_float<double>( "stod", str, idx );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
double
stod(const wstring& str, size_t* idx)
{
return as_float<double>( "stod", str, idx );
}
+#endif
long double
stold(const string& str, size_t* idx)
@@ -342,11 +373,13 @@ stold(const string& str, size_t* idx)
return as_float<long double>( "stold", str, idx );
}
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
long double
stold(const wstring& str, size_t* idx)
{
return as_float<long double>( "stold", str, idx );
}
+#endif
// to_string
@@ -397,6 +430,7 @@ struct initial_string<string>
}
};
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
struct initial_string<wstring>
{
@@ -421,6 +455,7 @@ get_swprintf()
return static_cast<int (__cdecl*)(wchar_t* __restrict, size_t, const wchar_t*__restrict, ...)>(_snwprintf);
#endif
}
+#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <typename S, typename V>
S i_to_string(V v)
@@ -444,20 +479,23 @@ string to_string (unsigned val) { return i_to_string< string>(val); }
string to_string (unsigned long val) { return i_to_string< string>(val); }
string to_string (unsigned long long val) { return i_to_string< string>(val); }
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
wstring to_wstring(int val) { return i_to_string<wstring>(val); }
wstring to_wstring(long val) { return i_to_string<wstring>(val); }
wstring to_wstring(long long val) { return i_to_string<wstring>(val); }
wstring to_wstring(unsigned val) { return i_to_string<wstring>(val); }
wstring to_wstring(unsigned long val) { return i_to_string<wstring>(val); }
wstring to_wstring(unsigned long long val) { return i_to_string<wstring>(val); }
-
+#endif
string to_string (float val) { return as_string(snprintf, initial_string< string>()(), "%f", val); }
string to_string (double val) { return as_string(snprintf, initial_string< string>()(), "%f", val); }
string to_string (long double val) { return as_string(snprintf, initial_string< string>()(), "%Lf", val); }
+#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
wstring to_wstring(float val) { return as_string(get_swprintf(), initial_string<wstring>()(), L"%f", val); }
wstring to_wstring(double val) { return as_string(get_swprintf(), initial_string<wstring>()(), L"%f", val); }
wstring to_wstring(long double val) { return as_string(get_swprintf(), initial_string<wstring>()(), L"%Lf", val); }
+#endif
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/src/strstream.cpp b/libcxx/src/strstream.cpp
index ae66806833aa..e62c07768bd2 100644
--- a/libcxx/src/strstream.cpp
+++ b/libcxx/src/strstream.cpp
@@ -1,4 +1,4 @@
-//===------------------------ strstream.cpp -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/support/ibm/mbsnrtowcs.cpp b/libcxx/src/support/ibm/mbsnrtowcs.cpp
new file mode 100644
index 000000000000..d7220fb46d8a
--- /dev/null
+++ b/libcxx/src/support/ibm/mbsnrtowcs.cpp
@@ -0,0 +1,95 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstddef> // size_t
+#include <cwchar> // mbstate_t
+#include <limits.h> // MB_LEN_MAX
+#include <string.h> // wmemcpy
+
+// Returns the number of wide characters found in the multi byte sequence `src`
+// (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
+// elements size). The count returned excludes the null terminator.
+// When `dst` is NULL, no characters are copied to `dst`.
+// Returns (size_t) -1 when an invalid sequence is encountered.
+// Leaves *`src` pointing to the next character to convert or NULL
+// if a null character was converted from *`src`.
+_LIBCPP_FUNC_VIS
+size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t src_size_bytes, size_t max_dest_chars,
+ mbstate_t *__restrict ps) {
+ const size_t terminated_sequence = static_cast<size_t>(0);
+ const size_t invalid_sequence = static_cast<size_t>(-1);
+ const size_t incomplete_sequence = static_cast<size_t>(-2);
+
+ size_t source_converted;
+ size_t dest_converted;
+ size_t result = 0;
+
+ // If `dst` is null then `max_dest_chars` should be ignored according to the
+ // standard. Setting `max_dest_chars` to a large value has this effect.
+ if (dst == nullptr)
+ max_dest_chars = static_cast<size_t>(-1);
+
+ for (dest_converted = source_converted = 0;
+ source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
+ ++dest_converted, source_converted += result) {
+ // Converts one multi byte character.
+ // If result (char_size) is greater than 0, it's the size in bytes of that character.
+ // If result (char_size) is zero, it indicates that the null character has been found.
+ // Otherwise, it's an error and errno may be set.
+ size_t source_remaining = src_size_bytes - source_converted;
+ size_t dest_remaining = max_dest_chars - dest_converted;
+
+ if (dst == nullptr) {
+ result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
+ } else if (dest_remaining >= source_remaining) {
+ // dst has enough space to translate in-place.
+ result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
+ } else {
+ /*
+ * dst may not have enough space, so use a temporary buffer.
+ *
+ * We need to save a copy of the conversion state
+ * here so we can restore it if the multibyte
+ * character is too long for the buffer.
+ */
+ wchar_t buff[MB_LEN_MAX];
+ mbstate_t mbstate_tmp;
+
+ if (ps != nullptr)
+ mbstate_tmp = *ps;
+ result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
+
+ if (result > dest_remaining) {
+ // Multi-byte sequence for character won't fit.
+ if (ps != nullptr)
+ *ps = mbstate_tmp;
+ break;
+ } else {
+ // The buffer was used, so we need copy the translation to dst.
+ wmemcpy(dst, buff, result);
+ }
+ }
+
+ // Don't do anything to change errno from here on.
+ if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
+ break;
+ }
+ }
+
+ if (dst) {
+ if (result == terminated_sequence)
+ *src = NULL;
+ else
+ *src += source_converted;
+ }
+ if (result == invalid_sequence)
+ return invalid_sequence;
+
+ return dest_converted;
+}
diff --git a/libcxx/src/support/ibm/wcsnrtombs.cpp b/libcxx/src/support/ibm/wcsnrtombs.cpp
new file mode 100644
index 000000000000..f595e63283ae
--- /dev/null
+++ b/libcxx/src/support/ibm/wcsnrtombs.cpp
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <cwchar> // mbstate_t
+#include <limits.h> // MB_LEN_MAX
+#include <stdlib.h> // MB_CUR_MAX, size_t
+#include <string.h> // memcpy
+
+// Converts `max_source_chars` from the wide character buffer pointer to by *`src`,
+// into the multi byte character sequence buffer stored at `dst`, which must be
+// `dst_size_bytes` bytes in size. Returns the number of bytes in the sequence
+// converted from *src, excluding the null terminator.
+// Returns (size_t) -1 if an error occurs and sets errno.
+// If `dst` is NULL, `dst_size_bytes` is ignored and no bytes are copied to `dst`.
+_LIBCPP_FUNC_VIS
+size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
+ size_t max_source_chars, size_t dst_size_bytes,
+ mbstate_t *__restrict ps) {
+
+ const size_t invalid_wchar = static_cast<size_t>(-1);
+
+ size_t source_converted;
+ size_t dest_converted;
+ size_t result = 0;
+
+ // If `dst` is null then `dst_size_bytes` should be ignored according to the
+ // standard. Setting dst_size_bytes to a large value has this effect.
+ if (dst == nullptr)
+ dst_size_bytes = static_cast<size_t>(-1);
+
+ for (dest_converted = source_converted = 0;
+ source_converted < max_source_chars && (!dst || dest_converted < dst_size_bytes);
+ ++source_converted, dest_converted += result) {
+ wchar_t c = (*src)[source_converted];
+ size_t dest_remaining = dst_size_bytes - dest_converted;
+
+ if (dst == nullptr) {
+ result = wcrtomb(NULL, c, ps);
+ } else if (dest_remaining >= static_cast<size_t>(MB_CUR_MAX)) {
+ // dst has enough space to translate in-place.
+ result = wcrtomb(dst + dest_converted, c, ps);
+ } else {
+ /*
+ * dst may not have enough space, so use a temporary buffer.
+ *
+ * We need to save a copy of the conversion state
+ * here so we can restore it if the multibyte
+ * character is too long for the buffer.
+ */
+ char buff[MB_LEN_MAX];
+ mbstate_t mbstate_tmp;
+
+ if (ps != nullptr)
+ mbstate_tmp = *ps;
+ result = wcrtomb(buff, c, ps);
+
+ if (result > dest_remaining) {
+ // Multi-byte sequence for character won't fit.
+ if (ps != nullptr)
+ *ps = mbstate_tmp;
+ if (result != invalid_wchar)
+ break;
+ } else {
+ // The buffer was used, so we need copy the translation to dst.
+ memcpy(dst, buff, result);
+ }
+ }
+
+ // result (char_size) contains the size of the multi-byte-sequence converted.
+ // Otherwise, result (char_size) is (size_t) -1 and wcrtomb() sets the errno.
+ if (result == invalid_wchar) {
+ if (dst)
+ *src = *src + source_converted;
+ return invalid_wchar;
+ }
+
+ if (c == L'\0') {
+ if (dst)
+ *src = NULL;
+ return dest_converted;
+ }
+ }
+
+ if (dst)
+ *src = *src + source_converted;
+
+ return dest_converted;
+}
diff --git a/libcxx/src/support/runtime/stdexcept_default.ipp b/libcxx/src/support/runtime/stdexcept_default.ipp
index ad7bd40b61a9..99d30453562f 100644
--- a/libcxx/src/support/runtime/stdexcept_default.ipp
+++ b/libcxx/src/support/runtime/stdexcept_default.ipp
@@ -1,4 +1,4 @@
-//===--------------------- stdexcept_default.ipp --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/support/runtime/stdexcept_vcruntime.ipp b/libcxx/src/support/runtime/stdexcept_vcruntime.ipp
index 94eed465ae97..8a6d939cb09f 100644
--- a/libcxx/src/support/runtime/stdexcept_vcruntime.ipp
+++ b/libcxx/src/support/runtime/stdexcept_vcruntime.ipp
@@ -1,4 +1,4 @@
-//===------------------- stdexcept_vcruntime.ipp --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/system_error.cpp b/libcxx/src/system_error.cpp
index a1ea6c4754ef..82472cbc84bf 100644
--- a/libcxx/src/system_error.cpp
+++ b/libcxx/src/system_error.cpp
@@ -1,4 +1,4 @@
-//===---------------------- system_error.cpp ------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/typeinfo.cpp b/libcxx/src/typeinfo.cpp
index ce3867ea93f0..03eaad7dceb7 100644
--- a/libcxx/src/typeinfo.cpp
+++ b/libcxx/src/typeinfo.cpp
@@ -1,4 +1,4 @@
-//===------------------------- typeinfo.cpp -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/utility.cpp b/libcxx/src/utility.cpp
index 6a690dc2879e..6d2cc4accee8 100644
--- a/libcxx/src/utility.cpp
+++ b/libcxx/src/utility.cpp
@@ -1,4 +1,4 @@
-//===------------------------ utility.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/valarray.cpp b/libcxx/src/valarray.cpp
index 64d26583c74b..5a3a869181c9 100644
--- a/libcxx/src/valarray.cpp
+++ b/libcxx/src/valarray.cpp
@@ -1,4 +1,4 @@
-//===------------------------ valarray.cpp --------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/variant.cpp b/libcxx/src/variant.cpp
index 1fe70a1809c9..d38d3a799d32 100644
--- a/libcxx/src/variant.cpp
+++ b/libcxx/src/variant.cpp
@@ -1,4 +1,4 @@
-//===------------------------ variant.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libcxx/src/vector.cpp b/libcxx/src/vector.cpp
index 3b65e558fd0a..cc3d291e350e 100644
--- a/libcxx/src/vector.cpp
+++ b/libcxx/src/vector.cpp
@@ -1,4 +1,4 @@
-//===------------------------- vector.cpp ---------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,6 +10,12 @@
_LIBCPP_BEGIN_NAMESPACE_STD
-template class _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __vector_base_common<true>;
+void __vector_base_common<true>::__throw_length_error() const {
+ _VSTD::__throw_length_error("vector");
+}
+
+void __vector_base_common<true>::__throw_out_of_range() const {
+ _VSTD::__throw_out_of_range("vector");
+}
_LIBCPP_END_NAMESPACE_STD
diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h
index a50ba053884b..221980a24aeb 100644
--- a/libunwind/include/__libunwind_config.h
+++ b/libunwind/include/__libunwind_config.h
@@ -1,4 +1,4 @@
-//===------------------------- __libunwind_config.h -----------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h
index 0feecd7bd6fc..e187ee27b0db 100644
--- a/libunwind/include/libunwind.h
+++ b/libunwind/include/libunwind.h
@@ -1,4 +1,4 @@
-//===---------------------------- libunwind.h -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -493,77 +493,150 @@ enum {
// 64-bit ARM64 registers
enum {
- UNW_ARM64_X0 = 0,
- UNW_ARM64_X1 = 1,
- UNW_ARM64_X2 = 2,
- UNW_ARM64_X3 = 3,
- UNW_ARM64_X4 = 4,
- UNW_ARM64_X5 = 5,
- UNW_ARM64_X6 = 6,
- UNW_ARM64_X7 = 7,
- UNW_ARM64_X8 = 8,
- UNW_ARM64_X9 = 9,
- UNW_ARM64_X10 = 10,
- UNW_ARM64_X11 = 11,
- UNW_ARM64_X12 = 12,
- UNW_ARM64_X13 = 13,
- UNW_ARM64_X14 = 14,
- UNW_ARM64_X15 = 15,
- UNW_ARM64_X16 = 16,
- UNW_ARM64_X17 = 17,
- UNW_ARM64_X18 = 18,
- UNW_ARM64_X19 = 19,
- UNW_ARM64_X20 = 20,
- UNW_ARM64_X21 = 21,
- UNW_ARM64_X22 = 22,
- UNW_ARM64_X23 = 23,
- UNW_ARM64_X24 = 24,
- UNW_ARM64_X25 = 25,
- UNW_ARM64_X26 = 26,
- UNW_ARM64_X27 = 27,
- UNW_ARM64_X28 = 28,
- UNW_ARM64_X29 = 29,
- UNW_ARM64_FP = 29,
- UNW_ARM64_X30 = 30,
- UNW_ARM64_LR = 30,
- UNW_ARM64_X31 = 31,
- UNW_ARM64_SP = 31,
- UNW_ARM64_PC = 32,
- // reserved block
- UNW_ARM64_RA_SIGN_STATE = 34,
+ UNW_AARCH64_X0 = 0,
+ UNW_AARCH64_X1 = 1,
+ UNW_AARCH64_X2 = 2,
+ UNW_AARCH64_X3 = 3,
+ UNW_AARCH64_X4 = 4,
+ UNW_AARCH64_X5 = 5,
+ UNW_AARCH64_X6 = 6,
+ UNW_AARCH64_X7 = 7,
+ UNW_AARCH64_X8 = 8,
+ UNW_AARCH64_X9 = 9,
+ UNW_AARCH64_X10 = 10,
+ UNW_AARCH64_X11 = 11,
+ UNW_AARCH64_X12 = 12,
+ UNW_AARCH64_X13 = 13,
+ UNW_AARCH64_X14 = 14,
+ UNW_AARCH64_X15 = 15,
+ UNW_AARCH64_X16 = 16,
+ UNW_AARCH64_X17 = 17,
+ UNW_AARCH64_X18 = 18,
+ UNW_AARCH64_X19 = 19,
+ UNW_AARCH64_X20 = 20,
+ UNW_AARCH64_X21 = 21,
+ UNW_AARCH64_X22 = 22,
+ UNW_AARCH64_X23 = 23,
+ UNW_AARCH64_X24 = 24,
+ UNW_AARCH64_X25 = 25,
+ UNW_AARCH64_X26 = 26,
+ UNW_AARCH64_X27 = 27,
+ UNW_AARCH64_X28 = 28,
+ UNW_AARCH64_X29 = 29,
+ UNW_AARCH64_FP = 29,
+ UNW_AARCH64_X30 = 30,
+ UNW_AARCH64_LR = 30,
+ UNW_AARCH64_X31 = 31,
+ UNW_AARCH64_SP = 31,
+ UNW_AARCH64_PC = 32,
+
// reserved block
- UNW_ARM64_D0 = 64,
- UNW_ARM64_D1 = 65,
- UNW_ARM64_D2 = 66,
- UNW_ARM64_D3 = 67,
- UNW_ARM64_D4 = 68,
- UNW_ARM64_D5 = 69,
- UNW_ARM64_D6 = 70,
- UNW_ARM64_D7 = 71,
- UNW_ARM64_D8 = 72,
- UNW_ARM64_D9 = 73,
- UNW_ARM64_D10 = 74,
- UNW_ARM64_D11 = 75,
- UNW_ARM64_D12 = 76,
- UNW_ARM64_D13 = 77,
- UNW_ARM64_D14 = 78,
- UNW_ARM64_D15 = 79,
- UNW_ARM64_D16 = 80,
- UNW_ARM64_D17 = 81,
- UNW_ARM64_D18 = 82,
- UNW_ARM64_D19 = 83,
- UNW_ARM64_D20 = 84,
- UNW_ARM64_D21 = 85,
- UNW_ARM64_D22 = 86,
- UNW_ARM64_D23 = 87,
- UNW_ARM64_D24 = 88,
- UNW_ARM64_D25 = 89,
- UNW_ARM64_D26 = 90,
- UNW_ARM64_D27 = 91,
- UNW_ARM64_D28 = 92,
- UNW_ARM64_D29 = 93,
- UNW_ARM64_D30 = 94,
- UNW_ARM64_D31 = 95,
+ UNW_AARCH64_RA_SIGN_STATE = 34,
+
+ // FP/vector registers
+ UNW_AARCH64_V0 = 64,
+ UNW_AARCH64_V1 = 65,
+ UNW_AARCH64_V2 = 66,
+ UNW_AARCH64_V3 = 67,
+ UNW_AARCH64_V4 = 68,
+ UNW_AARCH64_V5 = 69,
+ UNW_AARCH64_V6 = 70,
+ UNW_AARCH64_V7 = 71,
+ UNW_AARCH64_V8 = 72,
+ UNW_AARCH64_V9 = 73,
+ UNW_AARCH64_V10 = 74,
+ UNW_AARCH64_V11 = 75,
+ UNW_AARCH64_V12 = 76,
+ UNW_AARCH64_V13 = 77,
+ UNW_AARCH64_V14 = 78,
+ UNW_AARCH64_V15 = 79,
+ UNW_AARCH64_V16 = 80,
+ UNW_AARCH64_V17 = 81,
+ UNW_AARCH64_V18 = 82,
+ UNW_AARCH64_V19 = 83,
+ UNW_AARCH64_V20 = 84,
+ UNW_AARCH64_V21 = 85,
+ UNW_AARCH64_V22 = 86,
+ UNW_AARCH64_V23 = 87,
+ UNW_AARCH64_V24 = 88,
+ UNW_AARCH64_V25 = 89,
+ UNW_AARCH64_V26 = 90,
+ UNW_AARCH64_V27 = 91,
+ UNW_AARCH64_V28 = 92,
+ UNW_AARCH64_V29 = 93,
+ UNW_AARCH64_V30 = 94,
+ UNW_AARCH64_V31 = 95,
+
+ // Compatibility aliases
+ UNW_ARM64_X0 = UNW_AARCH64_X0,
+ UNW_ARM64_X1 = UNW_AARCH64_X1,
+ UNW_ARM64_X2 = UNW_AARCH64_X2,
+ UNW_ARM64_X3 = UNW_AARCH64_X3,
+ UNW_ARM64_X4 = UNW_AARCH64_X4,
+ UNW_ARM64_X5 = UNW_AARCH64_X5,
+ UNW_ARM64_X6 = UNW_AARCH64_X6,
+ UNW_ARM64_X7 = UNW_AARCH64_X7,
+ UNW_ARM64_X8 = UNW_AARCH64_X8,
+ UNW_ARM64_X9 = UNW_AARCH64_X9,
+ UNW_ARM64_X10 = UNW_AARCH64_X10,
+ UNW_ARM64_X11 = UNW_AARCH64_X11,
+ UNW_ARM64_X12 = UNW_AARCH64_X12,
+ UNW_ARM64_X13 = UNW_AARCH64_X13,
+ UNW_ARM64_X14 = UNW_AARCH64_X14,
+ UNW_ARM64_X15 = UNW_AARCH64_X15,
+ UNW_ARM64_X16 = UNW_AARCH64_X16,
+ UNW_ARM64_X17 = UNW_AARCH64_X17,
+ UNW_ARM64_X18 = UNW_AARCH64_X18,
+ UNW_ARM64_X19 = UNW_AARCH64_X19,
+ UNW_ARM64_X20 = UNW_AARCH64_X20,
+ UNW_ARM64_X21 = UNW_AARCH64_X21,
+ UNW_ARM64_X22 = UNW_AARCH64_X22,
+ UNW_ARM64_X23 = UNW_AARCH64_X23,
+ UNW_ARM64_X24 = UNW_AARCH64_X24,
+ UNW_ARM64_X25 = UNW_AARCH64_X25,
+ UNW_ARM64_X26 = UNW_AARCH64_X26,
+ UNW_ARM64_X27 = UNW_AARCH64_X27,
+ UNW_ARM64_X28 = UNW_AARCH64_X28,
+ UNW_ARM64_X29 = UNW_AARCH64_X29,
+ UNW_ARM64_FP = UNW_AARCH64_FP,
+ UNW_ARM64_X30 = UNW_AARCH64_X30,
+ UNW_ARM64_LR = UNW_AARCH64_LR,
+ UNW_ARM64_X31 = UNW_AARCH64_X31,
+ UNW_ARM64_SP = UNW_AARCH64_SP,
+ UNW_ARM64_PC = UNW_AARCH64_PC,
+ UNW_ARM64_RA_SIGN_STATE = UNW_AARCH64_RA_SIGN_STATE,
+ UNW_ARM64_D0 = UNW_AARCH64_V0,
+ UNW_ARM64_D1 = UNW_AARCH64_V1,
+ UNW_ARM64_D2 = UNW_AARCH64_V2,
+ UNW_ARM64_D3 = UNW_AARCH64_V3,
+ UNW_ARM64_D4 = UNW_AARCH64_V4,
+ UNW_ARM64_D5 = UNW_AARCH64_V5,
+ UNW_ARM64_D6 = UNW_AARCH64_V6,
+ UNW_ARM64_D7 = UNW_AARCH64_V7,
+ UNW_ARM64_D8 = UNW_AARCH64_V8,
+ UNW_ARM64_D9 = UNW_AARCH64_V9,
+ UNW_ARM64_D10 = UNW_AARCH64_V10,
+ UNW_ARM64_D11 = UNW_AARCH64_V11,
+ UNW_ARM64_D12 = UNW_AARCH64_V12,
+ UNW_ARM64_D13 = UNW_AARCH64_V13,
+ UNW_ARM64_D14 = UNW_AARCH64_V14,
+ UNW_ARM64_D15 = UNW_AARCH64_V15,
+ UNW_ARM64_D16 = UNW_AARCH64_V16,
+ UNW_ARM64_D17 = UNW_AARCH64_V17,
+ UNW_ARM64_D18 = UNW_AARCH64_V18,
+ UNW_ARM64_D19 = UNW_AARCH64_V19,
+ UNW_ARM64_D20 = UNW_AARCH64_V20,
+ UNW_ARM64_D21 = UNW_AARCH64_V21,
+ UNW_ARM64_D22 = UNW_AARCH64_V22,
+ UNW_ARM64_D23 = UNW_AARCH64_V23,
+ UNW_ARM64_D24 = UNW_AARCH64_V24,
+ UNW_ARM64_D25 = UNW_AARCH64_V25,
+ UNW_ARM64_D26 = UNW_AARCH64_V26,
+ UNW_ARM64_D27 = UNW_AARCH64_V27,
+ UNW_ARM64_D28 = UNW_AARCH64_V28,
+ UNW_ARM64_D29 = UNW_AARCH64_V29,
+ UNW_ARM64_D30 = UNW_AARCH64_V30,
+ UNW_ARM64_D31 = UNW_AARCH64_V31,
};
// 32-bit ARM registers. Numbers match DWARF for ARM spec #3.1 Table 1.
diff --git a/libunwind/include/mach-o/compact_unwind_encoding.h b/libunwind/include/mach-o/compact_unwind_encoding.h
index 5301b1055ef9..68d562eec438 100644
--- a/libunwind/include/mach-o/compact_unwind_encoding.h
+++ b/libunwind/include/mach-o/compact_unwind_encoding.h
@@ -1,4 +1,4 @@
-//===------------------ mach-o/compact_unwind_encoding.h ------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/include/unwind.h b/libunwind/include/unwind.h
index 1d3444cd83b4..b8d6020a3367 100644
--- a/libunwind/include/unwind.h
+++ b/libunwind/include/unwind.h
@@ -1,4 +1,4 @@
-//===------------------------------- unwind.h -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -56,211 +56,23 @@ typedef enum {
typedef struct _Unwind_Context _Unwind_Context; // opaque
#if defined(_LIBUNWIND_ARM_EHABI)
-typedef uint32_t _Unwind_State;
-
-static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME = 0;
-static const _Unwind_State _US_UNWIND_FRAME_STARTING = 1;
-static const _Unwind_State _US_UNWIND_FRAME_RESUME = 2;
-static const _Unwind_State _US_ACTION_MASK = 3;
-/* Undocumented flag for force unwinding. */
-static const _Unwind_State _US_FORCE_UNWIND = 8;
-
-typedef uint32_t _Unwind_EHT_Header;
-
-struct _Unwind_Control_Block;
-typedef struct _Unwind_Control_Block _Unwind_Control_Block;
-typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */
-
-struct _Unwind_Control_Block {
- uint64_t exception_class;
- void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*);
-
- /* Unwinder cache, private fields for the unwinder's use */
- struct {
- uint32_t reserved1; /* init reserved1 to 0, then don't touch */
- uint32_t reserved2;
- uint32_t reserved3;
- uint32_t reserved4;
- uint32_t reserved5;
- } unwinder_cache;
-
- /* Propagation barrier cache (valid after phase 1): */
- struct {
- uint32_t sp;
- uint32_t bitpattern[5];
- } barrier_cache;
-
- /* Cleanup cache (preserved over cleanup): */
- struct {
- uint32_t bitpattern[4];
- } cleanup_cache;
-
- /* Pr cache (for pr's benefit): */
- struct {
- uint32_t fnstart; /* function start address */
- _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */
- uint32_t additional;
- uint32_t reserved1;
- } pr_cache;
-
- long long int :0; /* Enforce the 8-byte alignment */
-} __attribute__((__aligned__(8)));
-
-typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
- (_Unwind_State state,
- _Unwind_Exception* exceptionObject,
- struct _Unwind_Context* context);
-
-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
- _Unwind_State state, _Unwind_Exception *exceptionObject,
- struct _Unwind_Context *context);
+#include "unwind_arm_ehabi.h"
#else
-struct _Unwind_Context; // opaque
-struct _Unwind_Exception; // forward declaration
-typedef struct _Unwind_Exception _Unwind_Exception;
-
-struct _Unwind_Exception {
- uint64_t exception_class;
- void (*exception_cleanup)(_Unwind_Reason_Code reason,
- _Unwind_Exception *exc);
-#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
- uintptr_t private_[6];
-#else
- uintptr_t private_1; // non-zero means forced unwind
- uintptr_t private_2; // holds sp that phase1 found for phase2 to use
+#include "unwind_itanium.h"
#endif
-#if __SIZEOF_POINTER__ == 4
- // The implementation of _Unwind_Exception uses an attribute mode on the
- // above fields which has the side effect of causing this whole struct to
- // round up to 32 bytes in size (48 with SEH). To be more explicit, we add
- // pad fields added for binary compatibility.
- uint32_t reserved[3];
-#endif
- // The Itanium ABI requires that _Unwind_Exception objects are "double-word
- // aligned". GCC has interpreted this to mean "use the maximum useful
- // alignment for the target"; so do we.
-} __attribute__((__aligned__));
typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)
(int version,
_Unwind_Action actions,
- uint64_t exceptionClass,
+ _Unwind_Exception_Class exceptionClass,
_Unwind_Exception* exceptionObject,
struct _Unwind_Context* context,
- void* stop_parameter );
-
-typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
- int version, _Unwind_Action actions, uint64_t exceptionClass,
- _Unwind_Exception *exceptionObject, struct _Unwind_Context *context);
-#endif
+ void* stop_parameter);
#ifdef __cplusplus
extern "C" {
#endif
-//
-// The following are the base functions documented by the C++ ABI
-//
-#ifdef __USING_SJLJ_EXCEPTIONS__
-extern _Unwind_Reason_Code
- _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object);
-extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object);
-#else
-extern _Unwind_Reason_Code
- _Unwind_RaiseException(_Unwind_Exception *exception_object);
-extern void _Unwind_Resume(_Unwind_Exception *exception_object);
-#endif
-extern void _Unwind_DeleteException(_Unwind_Exception *exception_object);
-
-#if defined(_LIBUNWIND_ARM_EHABI)
-typedef enum {
- _UVRSC_CORE = 0, /* integer register */
- _UVRSC_VFP = 1, /* vfp */
- _UVRSC_WMMXD = 3, /* Intel WMMX data register */
- _UVRSC_WMMXC = 4 /* Intel WMMX control register */
-} _Unwind_VRS_RegClass;
-
-typedef enum {
- _UVRSD_UINT32 = 0,
- _UVRSD_VFPX = 1,
- _UVRSD_UINT64 = 3,
- _UVRSD_FLOAT = 4,
- _UVRSD_DOUBLE = 5
-} _Unwind_VRS_DataRepresentation;
-
-typedef enum {
- _UVRSR_OK = 0,
- _UVRSR_NOT_IMPLEMENTED = 1,
- _UVRSR_FAILED = 2
-} _Unwind_VRS_Result;
-
-extern void _Unwind_Complete(_Unwind_Exception* exception_object);
-
-extern _Unwind_VRS_Result
-_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
- uint32_t regno, _Unwind_VRS_DataRepresentation representation,
- void *valuep);
-
-extern _Unwind_VRS_Result
-_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
- uint32_t regno, _Unwind_VRS_DataRepresentation representation,
- void *valuep);
-
-extern _Unwind_VRS_Result
-_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
- uint32_t discriminator,
- _Unwind_VRS_DataRepresentation representation);
-#endif
-
-#if !defined(_LIBUNWIND_ARM_EHABI)
-
-extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index);
-extern void _Unwind_SetGR(struct _Unwind_Context *context, int index,
- uintptr_t new_value);
-extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context);
-extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value);
-
-#else // defined(_LIBUNWIND_ARM_EHABI)
-
-#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE)
-#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern
-#else
-#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__
-#endif
-
-// These are de facto helper functions for ARM, which delegate the function
-// calls to _Unwind_VRS_Get/Set(). These are not a part of ARM EHABI
-// specification, thus these function MUST be inlined. Please don't replace
-// these with the "extern" function declaration; otherwise, the program
-// including this <unwind.h> header won't be ABI compatible and will result in
-// link error when we are linking the program with libgcc.
-
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1
-uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) {
- uintptr_t value = 0;
- _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
- return value;
-}
-
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1
-void _Unwind_SetGR(struct _Unwind_Context *context, int index,
- uintptr_t value) {
- _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
-}
-
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1
-uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
- // remove the thumb-bit before returning
- return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1);
-}
-
-_LIBUNWIND_EXPORT_UNWIND_LEVEL1
-void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) {
- uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1);
- _Unwind_SetGR(context, 15, value | thumb_bit);
-}
-#endif // defined(_LIBUNWIND_ARM_EHABI)
-
extern uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context);
extern uintptr_t
_Unwind_GetLanguageSpecificData(struct _Unwind_Context *context);
diff --git a/libunwind/include/unwind_arm_ehabi.h b/libunwind/include/unwind_arm_ehabi.h
new file mode 100644
index 000000000000..dc9d403e264c
--- /dev/null
+++ b/libunwind/include/unwind_arm_ehabi.h
@@ -0,0 +1,169 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// C++ ABI Level 1 ABI documented at:
+// https://github.com/ARM-software/abi-aa/blob/main/ehabi32/ehabi32.rst
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __ARM_EHABI_UNWIND_H__
+#define __ARM_EHABI_UNWIND_H__
+
+typedef uint32_t _Unwind_State;
+
+static const _Unwind_State _US_VIRTUAL_UNWIND_FRAME = 0;
+static const _Unwind_State _US_UNWIND_FRAME_STARTING = 1;
+static const _Unwind_State _US_UNWIND_FRAME_RESUME = 2;
+static const _Unwind_State _US_ACTION_MASK = 3;
+/* Undocumented flag for force unwinding. */
+static const _Unwind_State _US_FORCE_UNWIND = 8;
+
+typedef uint32_t _Unwind_EHT_Header;
+
+struct _Unwind_Control_Block;
+typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+#define _Unwind_Exception _Unwind_Control_Block /* Alias */
+typedef uint8_t _Unwind_Exception_Class[8];
+
+struct _Unwind_Control_Block {
+ _Unwind_Exception_Class exception_class;
+ void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block*);
+
+ /* Unwinder cache, private fields for the unwinder's use */
+ struct {
+ uint32_t reserved1; /* init reserved1 to 0, then don't touch */
+ uint32_t reserved2;
+ uint32_t reserved3;
+ uint32_t reserved4;
+ uint32_t reserved5;
+ } unwinder_cache;
+
+ /* Propagation barrier cache (valid after phase 1): */
+ struct {
+ uint32_t sp;
+ uint32_t bitpattern[5];
+ } barrier_cache;
+
+ /* Cleanup cache (preserved over cleanup): */
+ struct {
+ uint32_t bitpattern[4];
+ } cleanup_cache;
+
+ /* Pr cache (for pr's benefit): */
+ struct {
+ uint32_t fnstart; /* function start address */
+ _Unwind_EHT_Header* ehtp; /* pointer to EHT entry header word */
+ uint32_t additional;
+ uint32_t reserved1;
+ } pr_cache;
+
+ long long int :0; /* Enforce the 8-byte alignment */
+} __attribute__((__aligned__(8)));
+
+typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
+ _Unwind_State state, _Unwind_Exception *exceptionObject,
+ struct _Unwind_Context *context);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// The following are the base functions documented by the C++ ABI
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+ _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+ _Unwind_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_Resume(_Unwind_Exception *exception_object);
+#endif
+extern void _Unwind_DeleteException(_Unwind_Exception *exception_object);
+
+typedef enum {
+ _UVRSC_CORE = 0, /* integer register */
+ _UVRSC_VFP = 1, /* vfp */
+ _UVRSC_WMMXD = 3, /* Intel WMMX data register */
+ _UVRSC_WMMXC = 4 /* Intel WMMX control register */
+} _Unwind_VRS_RegClass;
+
+typedef enum {
+ _UVRSD_UINT32 = 0,
+ _UVRSD_VFPX = 1,
+ _UVRSD_UINT64 = 3,
+ _UVRSD_FLOAT = 4,
+ _UVRSD_DOUBLE = 5
+} _Unwind_VRS_DataRepresentation;
+
+typedef enum {
+ _UVRSR_OK = 0,
+ _UVRSR_NOT_IMPLEMENTED = 1,
+ _UVRSR_FAILED = 2
+} _Unwind_VRS_Result;
+
+extern void _Unwind_Complete(_Unwind_Exception* exception_object);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Get(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+ uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+ void *valuep);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+ uint32_t regno, _Unwind_VRS_DataRepresentation representation,
+ void *valuep);
+
+extern _Unwind_VRS_Result
+_Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
+ uint32_t discriminator,
+ _Unwind_VRS_DataRepresentation representation);
+
+#if defined(_LIBUNWIND_UNWIND_LEVEL1_EXTERNAL_LINKAGE)
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 extern
+#else
+#define _LIBUNWIND_EXPORT_UNWIND_LEVEL1 static __inline__
+#endif
+
+// These are de facto helper functions for ARM, which delegate the function
+// calls to _Unwind_VRS_Get/Set(). These are not a part of ARM EHABI
+// specification, thus these function MUST be inlined. Please don't replace
+// these with the "extern" function declaration; otherwise, the program
+// including this <unwind.h> header won't be ABI compatible and will result in
+// link error when we are linking the program with libgcc.
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) {
+ uintptr_t value = 0;
+ _Unwind_VRS_Get(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
+ return value;
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+ uintptr_t value) {
+ _Unwind_VRS_Set(context, _UVRSC_CORE, (uint32_t)index, _UVRSD_UINT32, &value);
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) {
+ // remove the thumb-bit before returning
+ return _Unwind_GetGR(context, 15) & (~(uintptr_t)0x1);
+}
+
+_LIBUNWIND_EXPORT_UNWIND_LEVEL1
+void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t value) {
+ uintptr_t thumb_bit = _Unwind_GetGR(context, 15) & ((uintptr_t)0x1);
+ _Unwind_SetGR(context, 15, value | thumb_bit);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __ARM_EHABI_UNWIND_H__
diff --git a/libunwind/include/unwind_itanium.h b/libunwind/include/unwind_itanium.h
new file mode 100644
index 000000000000..d94a6183be29
--- /dev/null
+++ b/libunwind/include/unwind_itanium.h
@@ -0,0 +1,76 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// C++ ABI Level 1 ABI documented at:
+// https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __ITANIUM_UNWIND_H__
+#define __ITANIUM_UNWIND_H__
+
+struct _Unwind_Context; // opaque
+struct _Unwind_Exception; // forward declaration
+typedef struct _Unwind_Exception _Unwind_Exception;
+typedef uint64_t _Unwind_Exception_Class;
+
+struct _Unwind_Exception {
+ _Unwind_Exception_Class exception_class;
+ void (*exception_cleanup)(_Unwind_Reason_Code reason,
+ _Unwind_Exception *exc);
+#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)
+ uintptr_t private_[6];
+#else
+ uintptr_t private_1; // non-zero means forced unwind
+ uintptr_t private_2; // holds sp that phase1 found for phase2 to use
+#endif
+#if __SIZEOF_POINTER__ == 4
+ // The implementation of _Unwind_Exception uses an attribute mode on the
+ // above fields which has the side effect of causing this whole struct to
+ // round up to 32 bytes in size (48 with SEH). To be more explicit, we add
+ // pad fields added for binary compatibility.
+ uint32_t reserved[3];
+#endif
+ // The Itanium ABI requires that _Unwind_Exception objects are "double-word
+ // aligned". GCC has interpreted this to mean "use the maximum useful
+ // alignment for the target"; so do we.
+} __attribute__((__aligned__));
+
+typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(
+ int version, _Unwind_Action actions, uint64_t exceptionClass,
+ _Unwind_Exception *exceptionObject, struct _Unwind_Context *context);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//
+// The following are the base functions documented by the C++ ABI
+//
+#ifdef __USING_SJLJ_EXCEPTIONS__
+extern _Unwind_Reason_Code
+ _Unwind_SjLj_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_SjLj_Resume(_Unwind_Exception *exception_object);
+#else
+extern _Unwind_Reason_Code
+ _Unwind_RaiseException(_Unwind_Exception *exception_object);
+extern void _Unwind_Resume(_Unwind_Exception *exception_object);
+#endif
+extern void _Unwind_DeleteException(_Unwind_Exception *exception_object);
+
+
+extern uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index);
+extern void _Unwind_SetGR(struct _Unwind_Context *context, int index,
+ uintptr_t new_value);
+extern uintptr_t _Unwind_GetIP(struct _Unwind_Context *context);
+extern void _Unwind_SetIP(struct _Unwind_Context *, uintptr_t new_value);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __ITANIUM_UNWIND_H__
diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index 171318ff6370..cfceac29537f 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -1,4 +1,4 @@
-//===------------------------- AddressSpace.hpp ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/CompactUnwinder.hpp b/libunwind/src/CompactUnwinder.hpp
index 1c3175dff50a..0b2b5e111bfc 100644
--- a/libunwind/src/CompactUnwinder.hpp
+++ b/libunwind/src/CompactUnwinder.hpp
@@ -1,4 +1,4 @@
-//===-------------------------- CompactUnwinder.hpp -----------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -537,65 +537,65 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless(
uint64_t savedRegisterLoc = registers.getSP() + stackSize;
if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
- registers.setRegister(UNW_ARM64_X19, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X20, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X20, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
- registers.setRegister(UNW_ARM64_X21, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X21, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X22, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X22, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
- registers.setRegister(UNW_ARM64_X23, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X23, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X24, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X24, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
- registers.setRegister(UNW_ARM64_X25, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X25, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X26, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X26, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
- registers.setRegister(UNW_ARM64_X27, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X27, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X28, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X28, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D8,
+ registers.setFloatRegister(UNW_AARCH64_V8,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D9,
+ registers.setFloatRegister(UNW_AARCH64_V9,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D10,
+ registers.setFloatRegister(UNW_AARCH64_V10,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D11,
+ registers.setFloatRegister(UNW_AARCH64_V11,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D12,
+ registers.setFloatRegister(UNW_AARCH64_V12,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D13,
+ registers.setFloatRegister(UNW_AARCH64_V13,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D14,
+ registers.setFloatRegister(UNW_AARCH64_V14,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D15,
+ registers.setFloatRegister(UNW_AARCH64_V15,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
@@ -604,7 +604,7 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrameless(
registers.setSP(savedRegisterLoc);
// set pc to be value in lr
- registers.setIP(registers.getRegister(UNW_ARM64_LR));
+ registers.setIP(registers.getRegister(UNW_AARCH64_LR));
return UNW_STEP_SUCCESS;
}
@@ -616,65 +616,65 @@ int CompactUnwinder_arm64<A>::stepWithCompactEncodingFrame(
uint64_t savedRegisterLoc = registers.getFP() - 8;
if (encoding & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
- registers.setRegister(UNW_ARM64_X19, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X19, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X20, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X20, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
- registers.setRegister(UNW_ARM64_X21, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X21, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X22, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X22, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
- registers.setRegister(UNW_ARM64_X23, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X23, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X24, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X24, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
- registers.setRegister(UNW_ARM64_X25, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X25, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X26, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X26, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
- registers.setRegister(UNW_ARM64_X27, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X27, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setRegister(UNW_ARM64_X28, addressSpace.get64(savedRegisterLoc));
+ registers.setRegister(UNW_AARCH64_X28, addressSpace.get64(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D8,
+ registers.setFloatRegister(UNW_AARCH64_V8,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D9,
+ registers.setFloatRegister(UNW_AARCH64_V9,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D10,
+ registers.setFloatRegister(UNW_AARCH64_V10,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D11,
+ registers.setFloatRegister(UNW_AARCH64_V11,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D12,
+ registers.setFloatRegister(UNW_AARCH64_V12,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D13,
+ registers.setFloatRegister(UNW_AARCH64_V13,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
if (encoding & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
- registers.setFloatRegister(UNW_ARM64_D14,
+ registers.setFloatRegister(UNW_AARCH64_V14,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
- registers.setFloatRegister(UNW_ARM64_D15,
+ registers.setFloatRegister(UNW_AARCH64_V15,
addressSpace.getDouble(savedRegisterLoc));
savedRegisterLoc -= 8;
}
diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp
index 686c6be0d8fd..19835aad668f 100644
--- a/libunwind/src/DwarfInstructions.hpp
+++ b/libunwind/src/DwarfInstructions.hpp
@@ -1,4 +1,4 @@
-//===-------------------------- DwarfInstructions.hpp ---------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -115,12 +115,15 @@ double DwarfInstructions<A, R>::getSavedFloatRegister(
return addressSpace.getDouble(
evaluateExpression((pint_t)savedReg.value, addressSpace,
registers, cfa));
-
+ case CFI_Parser<A>::kRegisterUndefined:
+ return 0.0;
+ case CFI_Parser<A>::kRegisterInRegister:
+#ifndef _LIBUNWIND_TARGET_ARM
+ return registers.getFloatRegister((int)savedReg.value);
+#endif
case CFI_Parser<A>::kRegisterIsExpression:
case CFI_Parser<A>::kRegisterUnused:
- case CFI_Parser<A>::kRegisterUndefined:
case CFI_Parser<A>::kRegisterOffsetFromCFA:
- case CFI_Parser<A>::kRegisterInRegister:
// FIX ME
break;
}
@@ -219,7 +222,7 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
// restored. autia1716 is used instead of autia as autia1716 assembles
// to a NOP on pre-v8.3a architectures.
if ((R::getArch() == REGISTERS_ARM64) &&
- prolog.savedRegisters[UNW_ARM64_RA_SIGN_STATE].value &&
+ prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value &&
returnAddress != 0) {
#if !defined(_LIBUNWIND_IS_NATIVE_ONLY)
return UNW_ECROSSRASIGNING;
diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp
index de0eb6de9d70..2153a71c2ec0 100644
--- a/libunwind/src/DwarfParser.hpp
+++ b/libunwind/src/DwarfParser.hpp
@@ -1,4 +1,4 @@
-//===--------------------------- DwarfParser.hpp --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -154,7 +154,8 @@ public:
uintptr_t sectionLength, pint_t fdeHint, FDE_Info *fdeInfo,
CIE_Info *cieInfo);
static const char *decodeFDE(A &addressSpace, pint_t fdeStart,
- FDE_Info *fdeInfo, CIE_Info *cieInfo);
+ FDE_Info *fdeInfo, CIE_Info *cieInfo,
+ bool useCIEInfo = false);
static bool parseFDEInstructions(A &addressSpace, const FDE_Info &fdeInfo,
const CIE_Info &cieInfo, pint_t upToPC,
int arch, PrologInfo *results);
@@ -162,10 +163,14 @@ public:
static const char *parseCIE(A &addressSpace, pint_t cie, CIE_Info *cieInfo);
};
-/// Parse a FDE into a CIE_Info and an FDE_Info
+/// Parse a FDE into a CIE_Info and an FDE_Info. If useCIEInfo is
+/// true, treat cieInfo as already-parsed CIE_Info (whose start offset
+/// must match the one specified by the FDE) rather than parsing the
+/// one indicated within the FDE.
template <typename A>
const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart,
- FDE_Info *fdeInfo, CIE_Info *cieInfo) {
+ FDE_Info *fdeInfo, CIE_Info *cieInfo,
+ bool useCIEInfo) {
pint_t p = fdeStart;
pint_t cfiLength = (pint_t)addressSpace.get32(p);
p += 4;
@@ -181,9 +186,14 @@ const char *CFI_Parser<A>::decodeFDE(A &addressSpace, pint_t fdeStart,
return "FDE is really a CIE"; // this is a CIE not an FDE
pint_t nextCFI = p + cfiLength;
pint_t cieStart = p - ciePointer;
- const char *err = parseCIE(addressSpace, cieStart, cieInfo);
- if (err != NULL)
- return err;
+ if (useCIEInfo) {
+ if (cieInfo->cieStart != cieStart)
+ return "CIE start does not match";
+ } else {
+ const char *err = parseCIE(addressSpace, cieStart, cieInfo);
+ if (err != NULL)
+ return err;
+ }
p += 4;
// Parse pc begin and range.
pint_t pcStart =
@@ -733,8 +743,8 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
#if defined(_LIBUNWIND_TARGET_AARCH64)
case REGISTERS_ARM64: {
int64_t value =
- results->savedRegisters[UNW_ARM64_RA_SIGN_STATE].value ^ 0x1;
- results->setRegisterValue(UNW_ARM64_RA_SIGN_STATE, value,
+ results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x1;
+ results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value,
initialState);
_LIBUNWIND_TRACE_DWARF("DW_CFA_AARCH64_negate_ra_state\n");
} break;
diff --git a/libunwind/src/EHHeaderParser.hpp b/libunwind/src/EHHeaderParser.hpp
index f97cca54825f..9a38070faba9 100644
--- a/libunwind/src/EHHeaderParser.hpp
+++ b/libunwind/src/EHHeaderParser.hpp
@@ -1,4 +1,4 @@
-//===------------------------- EHHeaderParser.hpp -------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/RWMutex.hpp b/libunwind/src/RWMutex.hpp
index fcd3f4967d17..344d35641f07 100644
--- a/libunwind/src/RWMutex.hpp
+++ b/libunwind/src/RWMutex.hpp
@@ -1,4 +1,4 @@
-//===----------------------------- Registers.hpp --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp
index aea84cc22721..c2d5327eade3 100644
--- a/libunwind/src/Registers.hpp
+++ b/libunwind/src/Registers.hpp
@@ -1,4 +1,4 @@
-//===----------------------------- Registers.hpp --------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,8 +15,9 @@
#include <stdint.h>
#include <string.h>
-#include "libunwind.h"
+#include "cet_unwind.h"
#include "config.h"
+#include "libunwind.h"
namespace libunwind {
@@ -42,6 +43,13 @@ enum {
#if defined(_LIBUNWIND_TARGET_I386)
class _LIBUNWIND_HIDDEN Registers_x86;
extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *);
+
+#if defined(_LIBUNWIND_USE_CET)
+extern "C" void *__libunwind_cet_get_jump_target() {
+ return reinterpret_cast<void *>(&__libunwind_Registers_x86_jumpto);
+}
+#endif
+
/// Registers_x86 holds the register state of a thread in a 32-bit intel
/// process.
class _LIBUNWIND_HIDDEN Registers_x86 {
@@ -253,6 +261,13 @@ inline void Registers_x86::setVectorRegister(int, v128) {
/// process.
class _LIBUNWIND_HIDDEN Registers_x86_64;
extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *);
+
+#if defined(_LIBUNWIND_USE_CET)
+extern "C" void *__libunwind_cet_get_jump_target() {
+ return reinterpret_cast<void *>(&__libunwind_Registers_x86_64_jumpto);
+}
+#endif
+
class _LIBUNWIND_HIDDEN Registers_x86_64 {
public:
Registers_x86_64();
@@ -339,7 +354,7 @@ inline bool Registers_x86_64::validRegister(int regNum) const {
return true;
if (regNum < 0)
return false;
- if (regNum > 15)
+ if (regNum > 16)
return false;
return true;
}
@@ -347,6 +362,7 @@ inline bool Registers_x86_64::validRegister(int regNum) const {
inline uint64_t Registers_x86_64::getRegister(int regNum) const {
switch (regNum) {
case UNW_REG_IP:
+ case UNW_X86_64_RIP:
return _registers.__rip;
case UNW_REG_SP:
return _registers.__rsp;
@@ -389,6 +405,7 @@ inline uint64_t Registers_x86_64::getRegister(int regNum) const {
inline void Registers_x86_64::setRegister(int regNum, uint64_t value) {
switch (regNum) {
case UNW_REG_IP:
+ case UNW_X86_64_RIP:
_registers.__rip = value;
return;
case UNW_REG_SP:
@@ -449,6 +466,7 @@ inline void Registers_x86_64::setRegister(int regNum, uint64_t value) {
inline const char *Registers_x86_64::getRegisterName(int regNum) {
switch (regNum) {
case UNW_REG_IP:
+ case UNW_X86_64_RIP:
return "rip";
case UNW_REG_SP:
return "rsp";
@@ -1847,7 +1865,7 @@ inline bool Registers_arm64::validRegister(int regNum) const {
return false;
if (regNum > 95)
return false;
- if (regNum == UNW_ARM64_RA_SIGN_STATE)
+ if (regNum == UNW_AARCH64_RA_SIGN_STATE)
return true;
if ((regNum > 32) && (regNum < 64))
return false;
@@ -1855,15 +1873,15 @@ inline bool Registers_arm64::validRegister(int regNum) const {
}
inline uint64_t Registers_arm64::getRegister(int regNum) const {
- if (regNum == UNW_REG_IP || regNum == UNW_ARM64_PC)
+ if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC)
return _registers.__pc;
- if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP)
+ if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP)
return _registers.__sp;
- if (regNum == UNW_ARM64_RA_SIGN_STATE)
+ if (regNum == UNW_AARCH64_RA_SIGN_STATE)
return _registers.__ra_sign_state;
- if (regNum == UNW_ARM64_FP)
+ if (regNum == UNW_AARCH64_FP)
return _registers.__fp;
- if (regNum == UNW_ARM64_LR)
+ if (regNum == UNW_AARCH64_LR)
return _registers.__lr;
if ((regNum >= 0) && (regNum < 29))
return _registers.__x[regNum];
@@ -1871,15 +1889,15 @@ inline uint64_t Registers_arm64::getRegister(int regNum) const {
}
inline void Registers_arm64::setRegister(int regNum, uint64_t value) {
- if (regNum == UNW_REG_IP || regNum == UNW_ARM64_PC)
+ if (regNum == UNW_REG_IP || regNum == UNW_AARCH64_PC)
_registers.__pc = value;
- else if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP)
+ else if (regNum == UNW_REG_SP || regNum == UNW_AARCH64_SP)
_registers.__sp = value;
- else if (regNum == UNW_ARM64_RA_SIGN_STATE)
+ else if (regNum == UNW_AARCH64_RA_SIGN_STATE)
_registers.__ra_sign_state = value;
- else if (regNum == UNW_ARM64_FP)
+ else if (regNum == UNW_AARCH64_FP)
_registers.__fp = value;
- else if (regNum == UNW_ARM64_LR)
+ else if (regNum == UNW_AARCH64_LR)
_registers.__lr = value;
else if ((regNum >= 0) && (regNum < 29))
_registers.__x[regNum] = value;
@@ -1893,135 +1911,135 @@ inline const char *Registers_arm64::getRegisterName(int regNum) {
return "pc";
case UNW_REG_SP:
return "sp";
- case UNW_ARM64_X0:
+ case UNW_AARCH64_X0:
return "x0";
- case UNW_ARM64_X1:
+ case UNW_AARCH64_X1:
return "x1";
- case UNW_ARM64_X2:
+ case UNW_AARCH64_X2:
return "x2";
- case UNW_ARM64_X3:
+ case UNW_AARCH64_X3:
return "x3";
- case UNW_ARM64_X4:
+ case UNW_AARCH64_X4:
return "x4";
- case UNW_ARM64_X5:
+ case UNW_AARCH64_X5:
return "x5";
- case UNW_ARM64_X6:
+ case UNW_AARCH64_X6:
return "x6";
- case UNW_ARM64_X7:
+ case UNW_AARCH64_X7:
return "x7";
- case UNW_ARM64_X8:
+ case UNW_AARCH64_X8:
return "x8";
- case UNW_ARM64_X9:
+ case UNW_AARCH64_X9:
return "x9";
- case UNW_ARM64_X10:
+ case UNW_AARCH64_X10:
return "x10";
- case UNW_ARM64_X11:
+ case UNW_AARCH64_X11:
return "x11";
- case UNW_ARM64_X12:
+ case UNW_AARCH64_X12:
return "x12";
- case UNW_ARM64_X13:
+ case UNW_AARCH64_X13:
return "x13";
- case UNW_ARM64_X14:
+ case UNW_AARCH64_X14:
return "x14";
- case UNW_ARM64_X15:
+ case UNW_AARCH64_X15:
return "x15";
- case UNW_ARM64_X16:
+ case UNW_AARCH64_X16:
return "x16";
- case UNW_ARM64_X17:
+ case UNW_AARCH64_X17:
return "x17";
- case UNW_ARM64_X18:
+ case UNW_AARCH64_X18:
return "x18";
- case UNW_ARM64_X19:
+ case UNW_AARCH64_X19:
return "x19";
- case UNW_ARM64_X20:
+ case UNW_AARCH64_X20:
return "x20";
- case UNW_ARM64_X21:
+ case UNW_AARCH64_X21:
return "x21";
- case UNW_ARM64_X22:
+ case UNW_AARCH64_X22:
return "x22";
- case UNW_ARM64_X23:
+ case UNW_AARCH64_X23:
return "x23";
- case UNW_ARM64_X24:
+ case UNW_AARCH64_X24:
return "x24";
- case UNW_ARM64_X25:
+ case UNW_AARCH64_X25:
return "x25";
- case UNW_ARM64_X26:
+ case UNW_AARCH64_X26:
return "x26";
- case UNW_ARM64_X27:
+ case UNW_AARCH64_X27:
return "x27";
- case UNW_ARM64_X28:
+ case UNW_AARCH64_X28:
return "x28";
- case UNW_ARM64_FP:
+ case UNW_AARCH64_FP:
return "fp";
- case UNW_ARM64_LR:
+ case UNW_AARCH64_LR:
return "lr";
- case UNW_ARM64_SP:
+ case UNW_AARCH64_SP:
return "sp";
- case UNW_ARM64_PC:
+ case UNW_AARCH64_PC:
return "pc";
- case UNW_ARM64_D0:
+ case UNW_AARCH64_V0:
return "d0";
- case UNW_ARM64_D1:
+ case UNW_AARCH64_V1:
return "d1";
- case UNW_ARM64_D2:
+ case UNW_AARCH64_V2:
return "d2";
- case UNW_ARM64_D3:
+ case UNW_AARCH64_V3:
return "d3";
- case UNW_ARM64_D4:
+ case UNW_AARCH64_V4:
return "d4";
- case UNW_ARM64_D5:
+ case UNW_AARCH64_V5:
return "d5";
- case UNW_ARM64_D6:
+ case UNW_AARCH64_V6:
return "d6";
- case UNW_ARM64_D7:
+ case UNW_AARCH64_V7:
return "d7";
- case UNW_ARM64_D8:
+ case UNW_AARCH64_V8:
return "d8";
- case UNW_ARM64_D9:
+ case UNW_AARCH64_V9:
return "d9";
- case UNW_ARM64_D10:
+ case UNW_AARCH64_V10:
return "d10";
- case UNW_ARM64_D11:
+ case UNW_AARCH64_V11:
return "d11";
- case UNW_ARM64_D12:
+ case UNW_AARCH64_V12:
return "d12";
- case UNW_ARM64_D13:
+ case UNW_AARCH64_V13:
return "d13";
- case UNW_ARM64_D14:
+ case UNW_AARCH64_V14:
return "d14";
- case UNW_ARM64_D15:
+ case UNW_AARCH64_V15:
return "d15";
- case UNW_ARM64_D16:
+ case UNW_AARCH64_V16:
return "d16";
- case UNW_ARM64_D17:
+ case UNW_AARCH64_V17:
return "d17";
- case UNW_ARM64_D18:
+ case UNW_AARCH64_V18:
return "d18";
- case UNW_ARM64_D19:
+ case UNW_AARCH64_V19:
return "d19";
- case UNW_ARM64_D20:
+ case UNW_AARCH64_V20:
return "d20";
- case UNW_ARM64_D21:
+ case UNW_AARCH64_V21:
return "d21";
- case UNW_ARM64_D22:
+ case UNW_AARCH64_V22:
return "d22";
- case UNW_ARM64_D23:
+ case UNW_AARCH64_V23:
return "d23";
- case UNW_ARM64_D24:
+ case UNW_AARCH64_V24:
return "d24";
- case UNW_ARM64_D25:
+ case UNW_AARCH64_V25:
return "d25";
- case UNW_ARM64_D26:
+ case UNW_AARCH64_V26:
return "d26";
- case UNW_ARM64_D27:
+ case UNW_AARCH64_V27:
return "d27";
- case UNW_ARM64_D28:
+ case UNW_AARCH64_V28:
return "d28";
- case UNW_ARM64_D29:
+ case UNW_AARCH64_V29:
return "d29";
- case UNW_ARM64_D30:
+ case UNW_AARCH64_V30:
return "d30";
- case UNW_ARM64_D31:
+ case UNW_AARCH64_V31:
return "d31";
default:
return "unknown register";
@@ -2029,21 +2047,21 @@ inline const char *Registers_arm64::getRegisterName(int regNum) {
}
inline bool Registers_arm64::validFloatRegister(int regNum) const {
- if (regNum < UNW_ARM64_D0)
+ if (regNum < UNW_AARCH64_V0)
return false;
- if (regNum > UNW_ARM64_D31)
+ if (regNum > UNW_AARCH64_V31)
return false;
return true;
}
inline double Registers_arm64::getFloatRegister(int regNum) const {
assert(validFloatRegister(regNum));
- return _vectorHalfRegisters[regNum - UNW_ARM64_D0];
+ return _vectorHalfRegisters[regNum - UNW_AARCH64_V0];
}
inline void Registers_arm64::setFloatRegister(int regNum, double value) {
assert(validFloatRegister(regNum));
- _vectorHalfRegisters[regNum - UNW_ARM64_D0] = value;
+ _vectorHalfRegisters[regNum - UNW_AARCH64_V0] = value;
}
inline bool Registers_arm64::validVectorRegister(int) const {
diff --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp
index 32b5cbc3be92..d3577c9f7cf8 100644
--- a/libunwind/src/Unwind-EHABI.cpp
+++ b/libunwind/src/Unwind-EHABI.cpp
@@ -1,4 +1,4 @@
-//===--------------------------- Unwind-EHABI.cpp -------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -97,9 +97,11 @@ _Unwind_Reason_Code ProcessDescriptors(
case Descriptor::LU32:
descriptor = getNextWord(descriptor, &length);
descriptor = getNextWord(descriptor, &offset);
+ break;
case Descriptor::LU16:
descriptor = getNextNibble(descriptor, &length);
descriptor = getNextNibble(descriptor, &offset);
+ break;
default:
assert(false);
return _URC_FAILURE;
@@ -461,6 +463,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
return _URC_FATAL_PHASE1_ERROR;
}
+#ifndef NDEBUG
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
@@ -479,6 +482,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
frameInfo.start_ip, functionName,
frameInfo.lsda, frameInfo.handler);
}
+#endif
// If there is a personality routine, ask it if it will want to stop at
// this frame.
@@ -580,6 +584,7 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor
return _URC_FATAL_PHASE2_ERROR;
}
+#ifndef NDEBUG
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
@@ -596,11 +601,12 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor
functionName, sp, frameInfo.lsda,
frameInfo.handler);
}
+#endif
// If there is a personality routine, tell it we are unwinding.
if (frameInfo.handler != 0) {
_Unwind_Personality_Fn p =
- (_Unwind_Personality_Fn)(long)(frameInfo.handler);
+ (_Unwind_Personality_Fn)(intptr_t)(frameInfo.handler);
struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
// EHABI #7.2
exception_object->pr_cache.fnstart = frameInfo.start_ip;
@@ -668,6 +674,114 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor
return _URC_FATAL_PHASE2_ERROR;
}
+static _Unwind_Reason_Code
+unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
+ _Unwind_Exception *exception_object, _Unwind_Stop_Fn stop,
+ void *stop_parameter) {
+ // See comment at the start of unwind_phase1 regarding VRS integrity.
+ __unw_init_local(cursor, uc);
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_force(ex_ojb=%p)",
+ static_cast<void *>(exception_object));
+ // Walk each frame until we reach where search phase said to stop
+ while (true) {
+ // Update info about this frame.
+ unw_proc_info_t frameInfo;
+ if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step "
+ "failed => _URC_END_OF_STACK",
+ (void *)exception_object);
+ return _URC_FATAL_PHASE2_ERROR;
+ }
+
+#ifndef NDEBUG
+ // When tracing, print state information.
+ if (_LIBUNWIND_TRACING_UNWINDING) {
+ char functionBuf[512];
+ const char *functionName = functionBuf;
+ unw_word_t offset;
+ if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
+ &offset) != UNW_ESUCCESS) ||
+ (frameInfo.start_ip + offset > frameInfo.end_ip))
+ functionName = ".anonymous.";
+ _LIBUNWIND_TRACE_UNWINDING(
+ "unwind_phase2_forced(ex_ojb=%p): start_ip=0x%" PRIxPTR
+ ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR,
+ (void *)exception_object, frameInfo.start_ip, functionName,
+ frameInfo.lsda, frameInfo.handler);
+ }
+#endif
+
+ // Call stop function at each frame.
+ _Unwind_Action action =
+ (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE);
+ _Unwind_Reason_Code stopResult =
+ (*stop)(1, action, exception_object->exception_class, exception_object,
+ (_Unwind_Context *)(cursor), stop_parameter);
+ _LIBUNWIND_TRACE_UNWINDING(
+ "unwind_phase2_forced(ex_ojb=%p): stop function returned %d",
+ (void *)exception_object, stopResult);
+ if (stopResult != _URC_NO_REASON) {
+ _LIBUNWIND_TRACE_UNWINDING(
+ "unwind_phase2_forced(ex_ojb=%p): stopped by stop function",
+ (void *)exception_object);
+ return _URC_FATAL_PHASE2_ERROR;
+ }
+
+ // If there is a personality routine, tell it we are unwinding.
+ if (frameInfo.handler != 0) {
+ _Unwind_Personality_Fn p =
+ (_Unwind_Personality_Fn)(uintptr_t)(frameInfo.handler);
+ struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
+ // EHABI #7.2
+ exception_object->pr_cache.fnstart = frameInfo.start_ip;
+ exception_object->pr_cache.ehtp =
+ (_Unwind_EHT_Header *)frameInfo.unwind_info;
+ exception_object->pr_cache.additional = frameInfo.flags;
+ _Unwind_Reason_Code personalityResult =
+ (*p)(_US_FORCE_UNWIND | _US_UNWIND_FRAME_STARTING, exception_object,
+ context);
+ switch (personalityResult) {
+ case _URC_CONTINUE_UNWIND:
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+ "personality returned "
+ "_URC_CONTINUE_UNWIND",
+ (void *)exception_object);
+ // Destructors called, continue unwinding
+ break;
+ case _URC_INSTALL_CONTEXT:
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+ "personality returned "
+ "_URC_INSTALL_CONTEXT",
+ (void *)exception_object);
+ // We may get control back if landing pad calls _Unwind_Resume().
+ __unw_resume(cursor);
+ break;
+ default:
+ // Personality routine returned an unknown result code.
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+ "personality returned %d, "
+ "_URC_FATAL_PHASE2_ERROR",
+ (void *)exception_object, personalityResult);
+ return _URC_FATAL_PHASE2_ERROR;
+ }
+ }
+ }
+
+ // Call stop function one last time and tell it we've reached the end
+ // of the stack.
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop "
+ "function with _UA_END_OF_STACK",
+ (void *)exception_object);
+ _Unwind_Action lastAction =
+ (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK);
+ (*stop)(1, lastAction, exception_object->exception_class, exception_object,
+ (struct _Unwind_Context *)(cursor), stop_parameter);
+
+ // Clean up phase did not resume at the frame that the search phase said it
+ // would.
+ return _URC_FATAL_PHASE2_ERROR;
+}
+
/// Called by __cxa_throw. Only returns if there is a fatal error.
_LIBUNWIND_EXPORT _Unwind_Reason_Code
_Unwind_RaiseException(_Unwind_Exception *exception_object) {
@@ -715,10 +829,13 @@ _Unwind_Resume(_Unwind_Exception *exception_object) {
unw_cursor_t cursor;
__unw_getcontext(&uc);
- // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
- // which is in the same position as private_1 below.
- // TODO(ajwong): Who wronte the above? Why is it true?
- unwind_phase2(&uc, &cursor, exception_object, true);
+ if (exception_object->unwinder_cache.reserved1)
+ unwind_phase2_forced(
+ &uc, &cursor, exception_object,
+ (_Unwind_Stop_Fn)exception_object->unwinder_cache.reserved1,
+ (void *)exception_object->unwinder_cache.reserved3);
+ else
+ unwind_phase2(&uc, &cursor, exception_object, true);
// Clients assume _Unwind_Resume() does not return, so all we can do is abort.
_LIBUNWIND_ABORT("_Unwind_Resume() can't return");
@@ -965,6 +1082,27 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
_LIBUNWIND_ABORT("unsupported register class");
}
+/// Not used by C++.
+/// Unwinds stack, calling "stop" function at each frame.
+/// Could be used to implement longjmp().
+_LIBUNWIND_EXPORT _Unwind_Reason_Code
+_Unwind_ForcedUnwind(_Unwind_Exception *exception_object, _Unwind_Stop_Fn stop,
+ void *stop_parameter) {
+ _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)",
+ (void *)exception_object, (void *)(uintptr_t)stop);
+ unw_context_t uc;
+ unw_cursor_t cursor;
+ __unw_getcontext(&uc);
+
+ // Mark that this is a forced unwind, so _Unwind_Resume() can do
+ // the right thing.
+ exception_object->unwinder_cache.reserved1 = (uintptr_t)stop;
+ exception_object->unwinder_cache.reserved3 = (uintptr_t)stop_parameter;
+
+ return unwind_phase2_forced(&uc, &cursor, exception_object, stop,
+ stop_parameter);
+}
+
/// Called by personality handler during phase 2 to find the start of the
/// function.
_LIBUNWIND_EXPORT uintptr_t
diff --git a/libunwind/src/Unwind-EHABI.h b/libunwind/src/Unwind-EHABI.h
index 6897082a337f..ff3b5fc6fea7 100644
--- a/libunwind/src/Unwind-EHABI.h
+++ b/libunwind/src/Unwind-EHABI.h
@@ -1,4 +1,4 @@
-//===------------------------- Unwind-EHABI.hpp ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/Unwind-seh.cpp b/libunwind/src/Unwind-seh.cpp
index 6e2b4e73e41e..f00bc4721ba4 100644
--- a/libunwind/src/Unwind-seh.cpp
+++ b/libunwind/src/Unwind-seh.cpp
@@ -1,4 +1,4 @@
-//===--------------------------- Unwind-seh.cpp ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -169,8 +169,8 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx,
__unw_get_reg(&cursor, UNW_ARM_R1, &exc->private_[3]);
#elif defined(__aarch64__)
exc->private_[2] = disp->TargetPc;
- __unw_get_reg(&cursor, UNW_ARM64_X0, &retval);
- __unw_get_reg(&cursor, UNW_ARM64_X1, &exc->private_[3]);
+ __unw_get_reg(&cursor, UNW_AARCH64_X0, &retval);
+ __unw_get_reg(&cursor, UNW_AARCH64_X1, &exc->private_[3]);
#endif
__unw_get_reg(&cursor, UNW_REG_IP, &target);
ms_exc->ExceptionCode = STATUS_GCC_UNWIND;
@@ -244,6 +244,7 @@ unwind_phase2_forced(unw_context_t *uc,
return _URC_FATAL_PHASE2_ERROR;
}
+#ifndef NDEBUG
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
@@ -259,6 +260,7 @@ unwind_phase2_forced(unw_context_t *uc,
(void *)exception_object, frameInfo.start_ip, functionName,
frameInfo.lsda, frameInfo.handler);
}
+#endif
// Call stop function at each frame.
_Unwind_Action action =
diff --git a/libunwind/src/Unwind-sjlj.c b/libunwind/src/Unwind-sjlj.c
index fd2a95b74c44..d487995bb78e 100644
--- a/libunwind/src/Unwind-sjlj.c
+++ b/libunwind/src/Unwind-sjlj.c
@@ -1,4 +1,4 @@
-//===--------------------------- Unwind-sjlj.c ----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 757d9808a978..3931df0b3351 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -1,4 +1,4 @@
-//===------------------------- UnwindCursor.hpp ---------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,6 +11,7 @@
#ifndef __UNWINDCURSOR_HPP__
#define __UNWINDCURSOR_HPP__
+#include "cet_unwind.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -449,6 +450,12 @@ public:
#ifdef __arm__
virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
#endif
+
+#if defined(_LIBUNWIND_USE_CET)
+ virtual void *get_registers() {
+ _LIBUNWIND_ABORT("get_registers not implemented");
+ }
+#endif
};
#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
@@ -620,12 +627,12 @@ UnwindCursor<A, R>::UnwindCursor(unw_context_t *context, A &as)
_msContext.D[i - UNW_ARM_D0] = d.w;
}
#elif defined(_LIBUNWIND_TARGET_AARCH64)
- for (int i = UNW_ARM64_X0; i <= UNW_ARM64_X30; ++i)
- _msContext.X[i - UNW_ARM64_X0] = r.getRegister(i);
+ for (int i = UNW_AARCH64_X0; i <= UNW_ARM64_X30; ++i)
+ _msContext.X[i - UNW_AARCH64_X0] = r.getRegister(i);
_msContext.Sp = r.getRegister(UNW_REG_SP);
_msContext.Pc = r.getRegister(UNW_REG_IP);
- for (int i = UNW_ARM64_D0; i <= UNW_ARM64_D31; ++i)
- _msContext.V[i - UNW_ARM64_D0].D[0] = r.getFloatRegister(i);
+ for (int i = UNW_AARCH64_V0; i <= UNW_ARM64_D31; ++i)
+ _msContext.V[i - UNW_AARCH64_V0].D[0] = r.getFloatRegister(i);
#endif
}
@@ -650,7 +657,7 @@ bool UnwindCursor<A, R>::validReg(int regNum) {
#elif defined(_LIBUNWIND_TARGET_ARM)
if (regNum >= UNW_ARM_R0 && regNum <= UNW_ARM_R15) return true;
#elif defined(_LIBUNWIND_TARGET_AARCH64)
- if (regNum >= UNW_ARM64_X0 && regNum <= UNW_ARM64_X30) return true;
+ if (regNum >= UNW_AARCH64_X0 && regNum <= UNW_ARM64_X30) return true;
#endif
return false;
}
@@ -699,7 +706,7 @@ unw_word_t UnwindCursor<A, R>::getReg(int regNum) {
#elif defined(_LIBUNWIND_TARGET_AARCH64)
case UNW_REG_SP: return _msContext.Sp;
case UNW_REG_IP: return _msContext.Pc;
- default: return _msContext.X[regNum - UNW_ARM64_X0];
+ default: return _msContext.X[regNum - UNW_AARCH64_X0];
#endif
}
_LIBUNWIND_ABORT("unsupported register");
@@ -749,37 +756,37 @@ void UnwindCursor<A, R>::setReg(int regNum, unw_word_t value) {
#elif defined(_LIBUNWIND_TARGET_AARCH64)
case UNW_REG_SP: _msContext.Sp = value; break;
case UNW_REG_IP: _msContext.Pc = value; break;
- case UNW_ARM64_X0:
- case UNW_ARM64_X1:
- case UNW_ARM64_X2:
- case UNW_ARM64_X3:
- case UNW_ARM64_X4:
- case UNW_ARM64_X5:
- case UNW_ARM64_X6:
- case UNW_ARM64_X7:
- case UNW_ARM64_X8:
- case UNW_ARM64_X9:
- case UNW_ARM64_X10:
- case UNW_ARM64_X11:
- case UNW_ARM64_X12:
- case UNW_ARM64_X13:
- case UNW_ARM64_X14:
- case UNW_ARM64_X15:
- case UNW_ARM64_X16:
- case UNW_ARM64_X17:
- case UNW_ARM64_X18:
- case UNW_ARM64_X19:
- case UNW_ARM64_X20:
- case UNW_ARM64_X21:
- case UNW_ARM64_X22:
- case UNW_ARM64_X23:
- case UNW_ARM64_X24:
- case UNW_ARM64_X25:
- case UNW_ARM64_X26:
- case UNW_ARM64_X27:
- case UNW_ARM64_X28:
- case UNW_ARM64_FP:
- case UNW_ARM64_LR: _msContext.X[regNum - UNW_ARM64_X0] = value; break;
+ case UNW_AARCH64_X0:
+ case UNW_AARCH64_X1:
+ case UNW_AARCH64_X2:
+ case UNW_AARCH64_X3:
+ case UNW_AARCH64_X4:
+ case UNW_AARCH64_X5:
+ case UNW_AARCH64_X6:
+ case UNW_AARCH64_X7:
+ case UNW_AARCH64_X8:
+ case UNW_AARCH64_X9:
+ case UNW_AARCH64_X10:
+ case UNW_AARCH64_X11:
+ case UNW_AARCH64_X12:
+ case UNW_AARCH64_X13:
+ case UNW_AARCH64_X14:
+ case UNW_AARCH64_X15:
+ case UNW_AARCH64_X16:
+ case UNW_AARCH64_X17:
+ case UNW_AARCH64_X18:
+ case UNW_AARCH64_X19:
+ case UNW_AARCH64_X20:
+ case UNW_AARCH64_X21:
+ case UNW_AARCH64_X22:
+ case UNW_AARCH64_X23:
+ case UNW_AARCH64_X24:
+ case UNW_AARCH64_X25:
+ case UNW_AARCH64_X26:
+ case UNW_AARCH64_X27:
+ case UNW_AARCH64_X28:
+ case UNW_AARCH64_FP:
+ case UNW_AARCH64_LR: _msContext.X[regNum - UNW_ARM64_X0] = value; break;
#endif
default:
_LIBUNWIND_ABORT("unsupported register");
@@ -792,7 +799,7 @@ bool UnwindCursor<A, R>::validFloatReg(int regNum) {
if (regNum >= UNW_ARM_S0 && regNum <= UNW_ARM_S31) return true;
if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) return true;
#elif defined(_LIBUNWIND_TARGET_AARCH64)
- if (regNum >= UNW_ARM64_D0 && regNum <= UNW_ARM64_D31) return true;
+ if (regNum >= UNW_AARCH64_V0 && regNum <= UNW_ARM64_D31) return true;
#else
(void)regNum;
#endif
@@ -820,7 +827,7 @@ unw_fpreg_t UnwindCursor<A, R>::getFloatReg(int regNum) {
}
_LIBUNWIND_ABORT("unsupported float register");
#elif defined(_LIBUNWIND_TARGET_AARCH64)
- return _msContext.V[regNum - UNW_ARM64_D0].D[0];
+ return _msContext.V[regNum - UNW_AARCH64_V0].D[0];
#else
(void)regNum;
_LIBUNWIND_ABORT("float registers unimplemented");
@@ -848,7 +855,7 @@ void UnwindCursor<A, R>::setFloatReg(int regNum, unw_fpreg_t value) {
}
_LIBUNWIND_ABORT("unsupported float register");
#elif defined(_LIBUNWIND_TARGET_AARCH64)
- _msContext.V[regNum - UNW_ARM64_D0].D[0] = value;
+ _msContext.V[regNum - UNW_AARCH64_V0].D[0] = value;
#else
(void)regNum;
(void)value;
@@ -901,6 +908,9 @@ public:
virtual void saveVFPAsX();
#endif
+#if defined(_LIBUNWIND_USE_CET)
+ virtual void *get_registers() { return &_registers; }
+#endif
// libunwind does not and should not depend on C++ library which means that we
// need our own defition of inline placement new.
static void *operator new(size_t, UnwindCursor<A, R> *p) { return p; }
@@ -2061,7 +2071,7 @@ int UnwindCursor<A, R>::stepThroughSigReturn(Registers_arm64 &) {
for (int i = 0; i <= 30; ++i) {
uint64_t value = _addressSpace.get64(sigctx + kOffsetGprs +
static_cast<pint_t>(i * 8));
- _registers.setRegister(UNW_ARM64_X0 + i, value);
+ _registers.setRegister(UNW_AARCH64_X0 + i, value);
}
_registers.setSP(_addressSpace.get64(sigctx + kOffsetSp));
_registers.setIP(_addressSpace.get64(sigctx + kOffsetPc));
@@ -2125,6 +2135,12 @@ bool UnwindCursor<A, R>::getFunctionName(char *buf, size_t bufLen,
buf, bufLen, offset);
}
+#if defined(_LIBUNWIND_USE_CET)
+extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) {
+ AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+ return co->get_registers();
+}
+#endif
} // namespace libunwind
#endif // __UNWINDCURSOR_HPP__
diff --git a/libunwind/src/UnwindLevel1-gcc-ext.c b/libunwind/src/UnwindLevel1-gcc-ext.c
index 310b836d129e..951d5d219a3e 100644
--- a/libunwind/src/UnwindLevel1-gcc-ext.c
+++ b/libunwind/src/UnwindLevel1-gcc-ext.c
@@ -1,4 +1,4 @@
-//===--------------------- UnwindLevel1-gcc-ext.c -------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -25,31 +25,24 @@
#if defined(_LIBUNWIND_BUILD_ZERO_COST_APIS)
#if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND)
-#define private_1 private_[0]
+#define PRIVATE_1 private_[0]
+#elif defined(_LIBUNWIND_ARM_EHABI)
+#define PRIVATE_1 unwinder_cache.reserved1
+#else
+#define PRIVATE_1 private_1
#endif
/// Called by __cxa_rethrow().
_LIBUNWIND_EXPORT _Unwind_Reason_Code
_Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) {
-#if defined(_LIBUNWIND_ARM_EHABI)
- _LIBUNWIND_TRACE_API("_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%ld",
- (void *)exception_object,
- (long)exception_object->unwinder_cache.reserved1);
-#else
- _LIBUNWIND_TRACE_API("_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%" PRIdPTR,
- (void *)exception_object,
- (intptr_t)exception_object->private_1);
-#endif
+ _LIBUNWIND_TRACE_API(
+ "_Unwind_Resume_or_Rethrow(ex_obj=%p), private_1=%" PRIdPTR,
+ (void *)exception_object, (intptr_t)exception_object->PRIVATE_1);
-#if defined(_LIBUNWIND_ARM_EHABI)
- // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
- // which is in the same position as private_1 below.
- return _Unwind_RaiseException(exception_object);
-#else
// If this is non-forced and a stopping place was found, then this is a
// re-throw.
// Call _Unwind_RaiseException() as if this was a new exception
- if (exception_object->private_1 == 0) {
+ if (exception_object->PRIVATE_1 == 0) {
return _Unwind_RaiseException(exception_object);
// Will return if there is no catch clause, so that __cxa_rethrow can call
// std::terminate().
@@ -60,10 +53,8 @@ _Unwind_Resume_or_Rethrow(_Unwind_Exception *exception_object) {
_Unwind_Resume(exception_object);
_LIBUNWIND_ABORT("_Unwind_Resume_or_Rethrow() called _Unwind_RaiseException()"
" which unexpectedly returned");
-#endif
}
-
/// Called by personality handler during phase 2 to get base address for data
/// relative encodings.
_LIBUNWIND_EXPORT uintptr_t
@@ -118,7 +109,7 @@ _Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) {
// Create a mock exception object for force unwinding.
_Unwind_Exception ex;
memset(&ex, '\0', sizeof(ex));
- ex.exception_class = 0x434C4E47554E5700; // CLNGUNW\0
+ strcpy((char *)&ex.exception_class, "CLNGUNW");
#endif
// walk each frame
diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c
index 68e5e48b8c05..5c1f99d4bfa5 100644
--- a/libunwind/src/UnwindLevel1.c
+++ b/libunwind/src/UnwindLevel1.c
@@ -1,4 +1,4 @@
-//===------------------------- UnwindLevel1.c -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -25,6 +25,7 @@
#include <stdio.h>
#include <string.h>
+#include "cet_unwind.h"
#include "config.h"
#include "libunwind.h"
#include "libunwind_ext.h"
@@ -34,6 +35,38 @@
#ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND
+// When CET is enabled, each "call" instruction will push return address to
+// CET shadow stack, each "ret" instruction will pop current CET shadow stack
+// top and compare it with target address which program will return.
+// In exception handing, some stack frames will be skipped before jumping to
+// landing pad and we must adjust CET shadow stack accordingly.
+// _LIBUNWIND_POP_CET_SSP is used to adjust CET shadow stack pointer and we
+// directly jump to __libunwind_Registerts_x86/x86_64_jumpto instead of using
+// a regular function call to avoid pushing to CET shadow stack again.
+#if !defined(_LIBUNWIND_USE_CET)
+#define __unw_phase2_resume(cursor, fn) __unw_resume((cursor))
+#elif defined(_LIBUNWIND_TARGET_I386)
+#define __unw_phase2_resume(cursor, fn) \
+ do { \
+ _LIBUNWIND_POP_CET_SSP((fn)); \
+ void *cetRegContext = __libunwind_cet_get_registers((cursor)); \
+ void *cetJumpAddress = __libunwind_cet_get_jump_target(); \
+ __asm__ volatile("push %%edi\n\t" \
+ "sub $4, %%esp\n\t" \
+ "jmp *%%edx\n\t" :: "D"(cetRegContext), \
+ "d"(cetJumpAddress)); \
+ } while (0)
+#elif defined(_LIBUNWIND_TARGET_X86_64)
+#define __unw_phase2_resume(cursor, fn) \
+ do { \
+ _LIBUNWIND_POP_CET_SSP((fn)); \
+ void *cetRegContext = __libunwind_cet_get_registers((cursor)); \
+ void *cetJumpAddress = __libunwind_cet_get_jump_target(); \
+ __asm__ volatile("jmpq *%%rdx\n\t" :: "D"(cetRegContext), \
+ "d"(cetJumpAddress)); \
+ } while (0)
+#endif
+
static _Unwind_Reason_Code
unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
__unw_init_local(cursor, uc);
@@ -68,6 +101,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
return _URC_FATAL_PHASE1_ERROR;
}
+#ifndef NDEBUG
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
@@ -85,6 +119,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
(void *)exception_object, pc, frameInfo.start_ip, functionName,
frameInfo.lsda, frameInfo.handler);
}
+#endif
// If there is a personality routine, ask it if it will want to stop at
// this frame.
@@ -135,6 +170,9 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
_LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
(void *)exception_object);
+ // uc is initialized by __unw_getcontext in the parent frame. The first stack
+ // frame walked is unwind_phase2.
+ unsigned framesWalked = 1;
// Walk each frame until we reach where search phase said to stop.
while (true) {
@@ -167,6 +205,7 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
return _URC_FATAL_PHASE2_ERROR;
}
+#ifndef NDEBUG
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
@@ -183,7 +222,9 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
functionName, sp, frameInfo.lsda,
frameInfo.handler);
}
+#endif
+ ++framesWalked;
// If there is a personality routine, tell it we are unwinding.
if (frameInfo.handler != 0) {
_Unwind_Personality_Fn p =
@@ -223,8 +264,9 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except
", sp=0x%" PRIxPTR,
(void *)exception_object, pc, sp);
}
- __unw_resume(cursor);
- // __unw_resume() only returns if there was an error.
+
+ __unw_phase2_resume(cursor, framesWalked);
+ // __unw_phase2_resume() only returns if there was an error.
return _URC_FATAL_PHASE2_ERROR;
default:
// Personality routine returned an unknown result code.
@@ -246,6 +288,9 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
_Unwind_Stop_Fn stop, void *stop_parameter) {
__unw_init_local(cursor, uc);
+ // uc is initialized by __unw_getcontext in the parent frame. The first stack
+ // frame walked is unwind_phase2_forced.
+ unsigned framesWalked = 1;
// Walk each frame until we reach where search phase said to stop
while (__unw_step(cursor) > 0) {
@@ -258,6 +303,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
return _URC_FATAL_PHASE2_ERROR;
}
+#ifndef NDEBUG
// When tracing, print state information.
if (_LIBUNWIND_TRACING_UNWINDING) {
char functionBuf[512];
@@ -273,6 +319,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
(void *)exception_object, frameInfo.start_ip, functionName,
frameInfo.lsda, frameInfo.handler);
}
+#endif
// Call stop function at each frame.
_Unwind_Action action =
@@ -290,6 +337,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
return _URC_FATAL_PHASE2_ERROR;
}
+ ++framesWalked;
// If there is a personality routine, tell it we are unwinding.
if (frameInfo.handler != 0) {
_Unwind_Personality_Fn p =
@@ -314,7 +362,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
"_URC_INSTALL_CONTEXT",
(void *)exception_object);
// We may get control back if landing pad calls _Unwind_Resume().
- __unw_resume(cursor);
+ __unw_phase2_resume(cursor, framesWalked);
break;
default:
// Personality routine returned an unknown result code.
diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S
index d8bf1adee416..497bf46dbdd4 100644
--- a/libunwind/src/UnwindRegistersRestore.S
+++ b/libunwind/src/UnwindRegistersRestore.S
@@ -1,4 +1,4 @@
-//===-------------------- UnwindRegistersRestore.S ------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -25,6 +25,8 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
# + return address +
# +-----------------------+ <-- SP
# + +
+
+ _LIBUNWIND_CET_ENDBR
movl 4(%esp), %eax
# set up eax and ret on new stack location
movl 28(%eax), %edx # edx holds new stack pointer
@@ -46,7 +48,8 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_jumpto)
# skip ss
# skip eflags
pop %eax # eax was already pushed on new stack
- ret # eip was already pushed on new stack
+ pop %ecx
+ jmp *%ecx
# skip cs
# skip ds
# skip es
@@ -70,6 +73,7 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
# On entry, thread_state pointer is in rdi
#endif
+ _LIBUNWIND_CET_ENDBR
movq 56(%rdi), %rax # rax holds new stack pointer
subq $16, %rax
movq %rax, 56(%rdi)
@@ -119,7 +123,8 @@ DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_x86_64_jumpto)
#endif
movq 56(%rdi), %rsp # cut back rsp to new location
pop %rdi # rdi was saved here earlier
- ret # rip was saved here
+ pop %rcx
+ jmpq *%rcx
#elif defined(__powerpc64__)
@@ -800,11 +805,12 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv)
l.lwz r30,120(r3)
l.lwz r31,124(r3)
+ # load new pc into ra
+ l.lwz r9, 128(r3)
+
# at last, restore r3
l.lwz r3, 12(r3)
- # load new pc into ra
- l.lwz r9, 128(r3)
# jump to pc
l.jr r9
l.nop
diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S
index f66dc532c23c..e77012e5c613 100644
--- a/libunwind/src/UnwindRegistersSave.S
+++ b/libunwind/src/UnwindRegistersSave.S
@@ -1,4 +1,4 @@
-//===------------------------ UnwindRegistersSave.S -----------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,6 +27,8 @@
# + +
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+
+ _LIBUNWIND_CET_ENDBR
push %eax
movl 8(%esp), %eax
movl %ebx, 4(%eax)
@@ -70,6 +72,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
#define TMP %rsi
#endif
+ _LIBUNWIND_CET_ENDBR
movq %rax, (PTR)
movq %rbx, 8(PTR)
movq %rcx, 16(PTR)
diff --git a/libunwind/src/Unwind_AppleExtras.cpp b/libunwind/src/Unwind_AppleExtras.cpp
index e3d41ca2b4e9..ffb49a89e54f 100644
--- a/libunwind/src/Unwind_AppleExtras.cpp
+++ b/libunwind/src/Unwind_AppleExtras.cpp
@@ -1,4 +1,4 @@
-//===--------------------- Unwind_AppleExtras.cpp -------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h
index 76ef82553283..e38d32336929 100644
--- a/libunwind/src/assembly.h
+++ b/libunwind/src/assembly.h
@@ -15,6 +15,13 @@
#ifndef UNWIND_ASSEMBLY_H
#define UNWIND_ASSEMBLY_H
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux__)
+#include <cet.h>
+#define _LIBUNWIND_CET_ENDBR _CET_ENDBR
+#else
+#define _LIBUNWIND_CET_ENDBR
+#endif
+
#if defined(__powerpc64__)
#define SEPARATOR ;
#define PPC64_OFFS_SRR0 0
diff --git a/libunwind/src/cet_unwind.h b/libunwind/src/cet_unwind.h
new file mode 100644
index 000000000000..e371be20c452
--- /dev/null
+++ b/libunwind/src/cet_unwind.h
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBUNWIND_CET_UNWIND_H
+#define LIBUNWIND_CET_UNWIND_H
+
+#include "libunwind.h"
+
+// Currently, CET is implemented on Linux x86 platforms.
+#if defined(_LIBUNWIND_TARGET_LINUX) && defined(__CET__) && defined(__SHSTK__)
+#define _LIBUNWIND_USE_CET 1
+#endif
+
+#if defined(_LIBUNWIND_USE_CET)
+#include <cet.h>
+#include <immintrin.h>
+
+#define _LIBUNWIND_POP_CET_SSP(x) \
+ do { \
+ unsigned long ssp = _get_ssp(); \
+ if (ssp != 0) { \
+ unsigned int tmp = (x); \
+ while (tmp > 255) { \
+ _inc_ssp(255); \
+ tmp -= 255; \
+ } \
+ _inc_ssp(tmp); \
+ } \
+ } while (0)
+#endif
+
+extern void *__libunwind_cet_get_registers(unw_cursor_t *);
+extern void *__libunwind_cet_get_jump_target();
+
+#endif
diff --git a/libunwind/src/config.h b/libunwind/src/config.h
index 2ab9d2f5e0c9..f469d3c232e6 100644
--- a/libunwind/src/config.h
+++ b/libunwind/src/config.h
@@ -1,4 +1,4 @@
-//===----------------------------- config.h -------------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/dwarf2.h b/libunwind/src/dwarf2.h
index 40f0daf46805..174277d5a795 100644
--- a/libunwind/src/dwarf2.h
+++ b/libunwind/src/dwarf2.h
@@ -1,4 +1,4 @@
-//===------------------------------- dwarf2.h -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp
index 1faf000ce44a..48750ce670fb 100644
--- a/libunwind/src/libunwind.cpp
+++ b/libunwind/src/libunwind.cpp
@@ -1,4 +1,4 @@
-//===--------------------------- libunwind.cpp ----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,8 +11,8 @@
#include <libunwind.h>
-#include "libunwind_ext.h"
#include "config.h"
+#include "libunwind_ext.h"
#include <stdlib.h>
@@ -292,6 +292,35 @@ void __unw_remove_dynamic_fde(unw_word_t fde) {
// fde is own mh_group
DwarfFDECache<LocalAddressSpace>::removeAllIn((LocalAddressSpace::pint_t)fde);
}
+
+void __unw_add_dynamic_eh_frame_section(unw_word_t eh_frame_start) {
+ // The eh_frame section start serves as the mh_group
+ unw_word_t mh_group = eh_frame_start;
+ CFI_Parser<LocalAddressSpace>::CIE_Info cieInfo;
+ CFI_Parser<LocalAddressSpace>::FDE_Info fdeInfo;
+ auto p = (LocalAddressSpace::pint_t)eh_frame_start;
+ while (true) {
+ if (CFI_Parser<LocalAddressSpace>::decodeFDE(
+ LocalAddressSpace::sThisAddressSpace, p, &fdeInfo, &cieInfo,
+ true) == NULL) {
+ DwarfFDECache<LocalAddressSpace>::add((LocalAddressSpace::pint_t)mh_group,
+ fdeInfo.pcStart, fdeInfo.pcEnd,
+ fdeInfo.fdeStart);
+ p += fdeInfo.fdeLength;
+ } else if (CFI_Parser<LocalAddressSpace>::parseCIE(
+ LocalAddressSpace::sThisAddressSpace, p, &cieInfo) == NULL) {
+ p += cieInfo.cieLength;
+ } else
+ return;
+ }
+}
+
+void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start) {
+ // The eh_frame section start serves as the mh_group
+ DwarfFDECache<LocalAddressSpace>::removeAllIn(
+ (LocalAddressSpace::pint_t)eh_frame_start);
+}
+
#endif // defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND)
#endif // !defined(__USING_SJLJ_EXCEPTIONS__)
diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h
index 316dee298246..7065ffcdaeff 100644
--- a/libunwind/src/libunwind_ext.h
+++ b/libunwind/src/libunwind_ext.h
@@ -1,4 +1,4 @@
-//===------------------------ libunwind_ext.h -----------------------------===//
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -51,6 +51,9 @@ extern void __unw_iterate_dwarf_unwind_cache(void (*func)(
extern void __unw_add_dynamic_fde(unw_word_t fde);
extern void __unw_remove_dynamic_fde(unw_word_t fde);
+extern void __unw_add_dynamic_eh_frame_section(unw_word_t eh_frame_start);
+extern void __unw_remove_dynamic_eh_frame_section(unw_word_t eh_frame_start);
+
#if defined(_LIBUNWIND_ARM_EHABI)
extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*);
extern _Unwind_Reason_Code _Unwind_VRS_Interpret(_Unwind_Context *context,
diff --git a/lld/COFF/COFFLinkerContext.cpp b/lld/COFF/COFFLinkerContext.cpp
new file mode 100644
index 000000000000..a280cacb932e
--- /dev/null
+++ b/lld/COFF/COFFLinkerContext.cpp
@@ -0,0 +1,40 @@
+//===- COFFContext.cpp ----------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Description
+//
+//===----------------------------------------------------------------------===//
+
+#include "COFFLinkerContext.h"
+#include "lld/Common/Memory.h"
+#include "llvm/DebugInfo/CodeView/TypeHashing.h"
+
+namespace lld {
+namespace coff {
+
+COFFLinkerContext::COFFLinkerContext()
+ : symtab(*this), rootTimer("Total Linking Time"),
+ inputFileTimer("Input File Reading", rootTimer),
+ ltoTimer("LTO", rootTimer), gcTimer("GC", rootTimer),
+ icfTimer("ICF", rootTimer), codeLayoutTimer("Code Layout", rootTimer),
+ outputCommitTimer("Commit Output File", rootTimer),
+ totalMapTimer("MAP Emission (Cumulative)", rootTimer),
+ symbolGatherTimer("Gather Symbols", totalMapTimer),
+ symbolStringsTimer("Build Symbol Strings", totalMapTimer),
+ writeTimer("Write to File", totalMapTimer),
+ totalPdbLinkTimer("PDB Emission (Cumulative)", rootTimer),
+ addObjectsTimer("Add Objects", totalPdbLinkTimer),
+ typeMergingTimer("Type Merging", addObjectsTimer),
+ loadGHashTimer("Global Type Hashing", addObjectsTimer),
+ mergeGHashTimer("GHash Type Merging", addObjectsTimer),
+ symbolMergingTimer("Symbol Merging", addObjectsTimer),
+ publicsLayoutTimer("Publics Stream Layout", totalPdbLinkTimer),
+ tpiStreamLayoutTimer("TPI Stream Layout", totalPdbLinkTimer),
+ diskCommitTimer("Commit to Disk", totalPdbLinkTimer) {}
+
+} // namespace coff
+} // namespace lld
diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h
new file mode 100644
index 000000000000..e5223da86ef8
--- /dev/null
+++ b/lld/COFF/COFFLinkerContext.h
@@ -0,0 +1,85 @@
+//===- COFFLinkerContext.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_COFF_COFFLinkerContext_H
+#define LLD_COFF_COFFLinkerContext_H
+
+#include "Chunks.h"
+#include "Config.h"
+#include "DebugTypes.h"
+#include "InputFiles.h"
+#include "SymbolTable.h"
+#include "Writer.h"
+#include "lld/Common/Timer.h"
+
+namespace lld {
+namespace coff {
+
+class COFFLinkerContext {
+public:
+ COFFLinkerContext();
+ COFFLinkerContext(const COFFLinkerContext &) = delete;
+ COFFLinkerContext &operator=(const COFFLinkerContext &) = delete;
+ ~COFFLinkerContext() = default;
+
+ void addTpiSource(TpiSource *tpi) { tpiSourceList.push_back(tpi); }
+
+ SymbolTable symtab;
+
+ std::vector<ObjFile *> objFileInstances;
+ std::map<std::string, PDBInputFile *> pdbInputFileInstances;
+ std::vector<ImportFile *> importFileInstances;
+ std::vector<BitcodeFile *> bitcodeFileInstances;
+
+ MergeChunk *mergeChunkInstances[Log2MaxSectionAlignment + 1] = {};
+
+ /// All sources of type information in the program.
+ std::vector<TpiSource *> tpiSourceList;
+
+ std::map<llvm::codeview::GUID, TpiSource *> typeServerSourceMappings;
+ std::map<uint32_t, TpiSource *> precompSourceMappings;
+
+ /// List of all output sections. After output sections are finalized, this
+ /// can be indexed by getOutputSection.
+ std::vector<OutputSection *> outputSections;
+
+ OutputSection *getOutputSection(const Chunk *c) const {
+ return c->osidx == 0 ? nullptr : outputSections[c->osidx - 1];
+ }
+
+ // All timers used in the COFF linker.
+ Timer rootTimer;
+ Timer inputFileTimer;
+ Timer ltoTimer;
+ Timer gcTimer;
+ Timer icfTimer;
+
+ // Writer timers.
+ Timer codeLayoutTimer;
+ Timer outputCommitTimer;
+ Timer totalMapTimer;
+ Timer symbolGatherTimer;
+ Timer symbolStringsTimer;
+ Timer writeTimer;
+
+ // PDB timers.
+ Timer totalPdbLinkTimer;
+ Timer addObjectsTimer;
+ Timer typeMergingTimer;
+ Timer loadGHashTimer;
+ Timer mergeGHashTimer;
+ Timer symbolMergingTimer;
+ Timer publicsLayoutTimer;
+ Timer tpiStreamLayoutTimer;
+ Timer diskCommitTimer;
+};
+
+} // namespace coff
+} // namespace lld
+
+#endif
diff --git a/lld/COFF/CallGraphSort.cpp b/lld/COFF/CallGraphSort.cpp
index d3e5312ce7fd..709e69b24914 100644
--- a/lld/COFF/CallGraphSort.cpp
+++ b/lld/COFF/CallGraphSort.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CallGraphSort.h"
+#include "COFFLinkerContext.h"
#include "InputFiles.h"
#include "SymbolTable.h"
#include "Symbols.h"
@@ -48,7 +49,7 @@ struct Cluster {
class CallGraphSort {
public:
- CallGraphSort();
+ CallGraphSort(const COFFLinkerContext &ctx);
DenseMap<const SectionChunk *, int> run();
@@ -70,7 +71,7 @@ using SectionPair = std::pair<const SectionChunk *, const SectionChunk *>;
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
// Symbols, and generate a graph between InputSections with the provided
// weights.
-CallGraphSort::CallGraphSort() {
+CallGraphSort::CallGraphSort(const COFFLinkerContext &ctx) {
MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
DenseMap<const SectionChunk *, int> secToCluster;
@@ -95,7 +96,7 @@ CallGraphSort::CallGraphSort() {
// output. This messes with the cluster size and density calculations. We
// would also end up moving input sections in other output sections without
// moving them closer to what calls them.
- if (fromSec->getOutputSection() != toSec->getOutputSection())
+ if (ctx.getOutputSection(fromSec) != ctx.getOutputSection(toSec))
continue;
int from = getOrCreateNode(fromSec);
@@ -240,6 +241,7 @@ DenseMap<const SectionChunk *, int> CallGraphSort::run() {
// This first builds a call graph based on the profile data then merges sections
// according to the C³ heuristic. All clusters are then sorted by a density
// metric to further improve locality.
-DenseMap<const SectionChunk *, int> coff::computeCallGraphProfileOrder() {
- return CallGraphSort().run();
+DenseMap<const SectionChunk *, int>
+coff::computeCallGraphProfileOrder(const COFFLinkerContext &ctx) {
+ return CallGraphSort(ctx).run();
}
diff --git a/lld/COFF/CallGraphSort.h b/lld/COFF/CallGraphSort.h
index e4f372137448..99f35d6b6276 100644
--- a/lld/COFF/CallGraphSort.h
+++ b/lld/COFF/CallGraphSort.h
@@ -14,8 +14,10 @@
namespace lld {
namespace coff {
class SectionChunk;
+class COFFLinkerContext;
-llvm::DenseMap<const SectionChunk *, int> computeCallGraphProfileOrder();
+llvm::DenseMap<const SectionChunk *, int>
+computeCallGraphProfileOrder(const COFFLinkerContext &ctx);
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 36d5f371326f..9f6dbd172509 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -7,10 +7,11 @@
//===----------------------------------------------------------------------===//
#include "Chunks.h"
+#include "COFFLinkerContext.h"
#include "InputFiles.h"
+#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
-#include "SymbolTable.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -385,7 +386,7 @@ void SectionChunk::applyRelocation(uint8_t *off,
// section is needed to compute SECREL and SECTION relocations used in debug
// info.
Chunk *c = sym ? sym->getChunk() : nullptr;
- OutputSection *os = c ? c->getOutputSection() : nullptr;
+ OutputSection *os = c ? file->ctx.getOutputSection(c) : nullptr;
// Skip the relocation if it refers to a discarded section, and diagnose it
// as an error if appropriate. If a symbol was discarded early, it may be
@@ -938,18 +939,16 @@ uint8_t Baserel::getDefaultType() {
}
}
-MergeChunk *MergeChunk::instances[Log2MaxSectionAlignment + 1] = {};
-
MergeChunk::MergeChunk(uint32_t alignment)
: builder(StringTableBuilder::RAW, alignment) {
setAlignment(alignment);
}
-void MergeChunk::addSection(SectionChunk *c) {
+void MergeChunk::addSection(COFFLinkerContext &ctx, SectionChunk *c) {
assert(isPowerOf2_32(c->getAlignment()));
uint8_t p2Align = llvm::Log2_32(c->getAlignment());
- assert(p2Align < array_lengthof(instances));
- auto *&mc = instances[p2Align];
+ assert(p2Align < array_lengthof(ctx.mergeChunkInstances));
+ auto *&mc = ctx.mergeChunkInstances[p2Align];
if (!mc)
mc = make<MergeChunk>(c->getAlignment());
mc->sections.push_back(c);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index bdd3faa179a8..daaa043fface 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -101,7 +101,6 @@ public:
// chunk has a back pointer to an output section.
void setOutputSectionIdx(uint16_t o) { osidx = o; }
uint16_t getOutputSectionIdx() const { return osidx; }
- OutputSection *getOutputSection() const;
// Windows-specific.
// Collect all locations that contain absolute addresses for base relocations.
@@ -415,7 +414,7 @@ inline StringRef Chunk::getDebugName() const {
class MergeChunk : public NonSectionChunk {
public:
MergeChunk(uint32_t alignment);
- static void addSection(SectionChunk *c);
+ static void addSection(COFFLinkerContext &ctx, SectionChunk *c);
void finalizeContents();
void assignSubsectionRVAs();
@@ -424,7 +423,6 @@ public:
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;
- static MergeChunk *instances[Log2MaxSectionAlignment + 1];
std::vector<SectionChunk *> sections;
private:
diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index df883b779ee4..3917975e165d 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -10,6 +10,7 @@
#define LLD_COFF_CONFIG_H
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/COFF.h"
@@ -91,7 +92,7 @@ enum class ICFLevel {
// Global configuration.
struct Configuration {
- enum ManifestKind { SideBySide, Embed, No };
+ enum ManifestKind { Default, SideBySide, Embed, No };
bool is64() { return machine == AMD64 || machine == ARM64; }
llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
@@ -123,6 +124,7 @@ struct Configuration {
std::vector<std::string> natvisFiles;
llvm::StringMap<std::string> namedStreams;
llvm::SmallString<128> pdbAltPath;
+ int pdbPageSize = 4096;
llvm::SmallString<128> pdbPath;
llvm::SmallString<128> pdbSourcePath;
std::vector<llvm::StringRef> argv;
@@ -178,9 +180,9 @@ struct Configuration {
std::map<StringRef, uint32_t> section;
// Options for manifest files.
- ManifestKind manifest = No;
+ ManifestKind manifest = Default;
int manifestID = 1;
- StringRef manifestDependency;
+ llvm::SetVector<StringRef> manifestDependencies;
bool manifestUAC = true;
std::vector<std::string> manifestInput;
StringRef manifestLevel = "'asInvoker'";
@@ -223,6 +225,9 @@ struct Configuration {
// Used for /lto-cs-profile-path
llvm::StringRef ltoCSProfileFile;
+ // Used for /lto-pgo-warn-mismatch:
+ bool ltoPGOWarnMismatch = true;
+
// Used for /call-graph-ordering-file:
llvm::MapVector<std::pair<const SectionChunk *, const SectionChunk *>,
uint64_t>
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index b9e12ef4b34d..6fec9df5617d 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "DLL.h"
+#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "SymbolTable.h"
#include "llvm/Object/COFF.h"
@@ -631,7 +632,7 @@ uint64_t DelayLoadContents::getDirSize() {
return dirs.size() * sizeof(delay_import_directory_table_entry);
}
-void DelayLoadContents::create(Defined *h) {
+void DelayLoadContents::create(COFFLinkerContext &ctx, Defined *h) {
helper = h;
std::vector<std::vector<DefinedImportData *>> v = binImports(imports);
@@ -660,13 +661,13 @@ void DelayLoadContents::create(Defined *h) {
// call targets for Control Flow Guard.
StringRef symName = saver.save("__imp_load_" + extName);
s->loadThunkSym =
- cast<DefinedSynthetic>(symtab->addSynthetic(symName, t));
+ cast<DefinedSynthetic>(ctx.symtab.addSynthetic(symName, t));
}
}
thunks.push_back(tm);
StringRef tmName =
saver.save("__tailMerge_" + syms[0]->getDLLName().lower());
- symtab->addSynthetic(tmName, tm);
+ ctx.symtab.addSynthetic(tmName, tm);
// Terminate with null values.
addresses.push_back(make<NullChunk>(8));
names.push_back(make<NullChunk>(8));
diff --git a/lld/COFF/DLL.h b/lld/COFF/DLL.h
index ce0ee01c4a3d..0d594e675bd2 100644
--- a/lld/COFF/DLL.h
+++ b/lld/COFF/DLL.h
@@ -40,7 +40,7 @@ class DelayLoadContents {
public:
void add(DefinedImportData *sym) { imports.push_back(sym); }
bool empty() { return imports.empty(); }
- void create(Defined *helper);
+ void create(COFFLinkerContext &ctx, Defined *helper);
std::vector<Chunk *> getChunks();
std::vector<Chunk *> getDataChunks();
ArrayRef<Chunk *> getCodeChunks() { return thunks; }
diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index 97be5bc79ac3..67b708c5b36a 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "DebugTypes.h"
+#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "Driver.h"
#include "InputFiles.h"
@@ -14,7 +15,6 @@
#include "TypeMerger.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
-#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
@@ -46,8 +46,8 @@ class TypeServerIpiSource;
// before any dependent OBJ.
class TypeServerSource : public TpiSource {
public:
- explicit TypeServerSource(PDBInputFile *f)
- : TpiSource(PDB, nullptr), pdbInputFile(f) {
+ explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
+ : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
if (f->loadErr && *f->loadErr)
return;
pdb::PDBFile &file = f->session->getPDBFile();
@@ -55,7 +55,7 @@ public:
if (!expectedInfo)
return;
Guid = expectedInfo->getGuid();
- auto it = mappings.emplace(Guid, this);
+ auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
assert(it.second);
(void)it;
}
@@ -74,8 +74,6 @@ public:
// The PDB signature GUID.
codeview::GUID Guid;
-
- static std::map<codeview::GUID, TypeServerSource *> mappings;
};
// Companion to TypeServerSource. Stores the index map for the IPI stream in the
@@ -83,7 +81,8 @@ public:
// invariant of one type index space per source.
class TypeServerIpiSource : public TpiSource {
public:
- explicit TypeServerIpiSource() : TpiSource(PDBIpi, nullptr) {}
+ explicit TypeServerIpiSource(COFFLinkerContext &ctx)
+ : TpiSource(ctx, PDBIpi, nullptr) {}
friend class TypeServerSource;
@@ -101,8 +100,8 @@ class UseTypeServerSource : public TpiSource {
Expected<TypeServerSource *> getTypeServerSource();
public:
- UseTypeServerSource(ObjFile *f, TypeServer2Record ts)
- : TpiSource(UsingPDB, f), typeServerDependency(ts) {}
+ UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
+ : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
Error mergeDebugT(TypeMerger *m) override;
@@ -121,11 +120,11 @@ public:
// such files, clang does not.
class PrecompSource : public TpiSource {
public:
- PrecompSource(ObjFile *f) : TpiSource(PCH, f) {
+ PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
if (!f->pchSignature || !*f->pchSignature)
fatal(toString(f) +
" claims to be a PCH object, but does not have a valid signature");
- auto it = mappings.emplace(*f->pchSignature, this);
+ auto it = ctx.precompSourceMappings.emplace(*f->pchSignature, this);
if (!it.second)
fatal("a PCH object with the same signature has already been provided (" +
toString(it.first->second->file) + " and " + toString(file) + ")");
@@ -134,16 +133,14 @@ public:
void loadGHashes() override;
bool isDependency() const override { return true; }
-
- static std::map<uint32_t, PrecompSource *> mappings;
};
// This class represents the debug type stream of an OBJ file that depends on a
// Microsoft precompiled headers OBJ (see PrecompSource).
class UsePrecompSource : public TpiSource {
public:
- UsePrecompSource(ObjFile *f, PrecompRecord precomp)
- : TpiSource(UsingPCH, f), precompDependency(precomp) {}
+ UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
+ : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
Error mergeDebugT(TypeMerger *m) override;
@@ -153,6 +150,10 @@ public:
private:
Error mergeInPrecompHeaderObj();
+ PrecompSource *findObjByName(StringRef fileNameOnly);
+ PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
+ Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
+
public:
// Information about the Precomp OBJ dependency, that needs to be loaded in
// before merging this OBJ.
@@ -160,13 +161,9 @@ public:
};
} // namespace
-std::vector<TpiSource *> TpiSource::instances;
-ArrayRef<TpiSource *> TpiSource::dependencySources;
-ArrayRef<TpiSource *> TpiSource::objectSources;
-
-TpiSource::TpiSource(TpiKind k, ObjFile *f)
- : kind(k), tpiSrcIdx(instances.size()), file(f) {
- instances.push_back(this);
+TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
+ : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
+ ctx.addTpiSource(this);
}
// Vtable key method.
@@ -175,52 +172,35 @@ TpiSource::~TpiSource() {
consumeError(std::move(typeMergingError));
}
-void TpiSource::sortDependencies() {
- // Order dependencies first, but preserve the existing order.
- std::vector<TpiSource *> deps;
- std::vector<TpiSource *> objs;
- for (TpiSource *s : instances)
- (s->isDependency() ? deps : objs).push_back(s);
- uint32_t numDeps = deps.size();
- uint32_t numObjs = objs.size();
- instances = std::move(deps);
- instances.insert(instances.end(), objs.begin(), objs.end());
- for (uint32_t i = 0, e = instances.size(); i < e; ++i)
- instances[i]->tpiSrcIdx = i;
- dependencySources = makeArrayRef(instances.data(), numDeps);
- objectSources = makeArrayRef(instances.data() + numDeps, numObjs);
-}
-
-TpiSource *lld::coff::makeTpiSource(ObjFile *file) {
- return make<TpiSource>(TpiSource::Regular, file);
+TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
+ return make<TpiSource>(ctx, TpiSource::Regular, file);
}
-TpiSource *lld::coff::makeTypeServerSource(PDBInputFile *pdbInputFile) {
+TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
+ PDBInputFile *pdbInputFile) {
// Type server sources come in pairs: the TPI stream, and the IPI stream.
- auto *tpiSource = make<TypeServerSource>(pdbInputFile);
+ auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
- tpiSource->ipiSrc = make<TypeServerIpiSource>();
+ tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
return tpiSource;
}
-TpiSource *lld::coff::makeUseTypeServerSource(ObjFile *file,
+TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
+ ObjFile *file,
TypeServer2Record ts) {
- return make<UseTypeServerSource>(file, ts);
+ return make<UseTypeServerSource>(ctx, file, ts);
}
-TpiSource *lld::coff::makePrecompSource(ObjFile *file) {
- return make<PrecompSource>(file);
+TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
+ return make<PrecompSource>(ctx, file);
}
-TpiSource *lld::coff::makeUsePrecompSource(ObjFile *file,
+TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
+ ObjFile *file,
PrecompRecord precomp) {
- return make<UsePrecompSource>(file, precomp);
+ return make<UsePrecompSource>(ctx, file, precomp);
}
-std::map<codeview::GUID, TypeServerSource *> TypeServerSource::mappings;
-
-std::map<uint32_t, PrecompSource *> PrecompSource::mappings;
-
bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
if (ti.isSimple())
return true;
@@ -345,7 +325,7 @@ Error TpiSource::mergeDebugT(TypeMerger *m) {
m->tpiCounts.resize(m->getTypeTable().size());
m->ipiCounts.resize(m->getIDTable().size());
uint32_t srcIdx = nbHeadIndices;
- for (CVType &ty : types) {
+ for (const CVType &ty : types) {
TypeIndex dstIdx = tpiMap[srcIdx++];
// Type merging may fail, so a complex source type may become the simple
// NotTranslated type, which cannot be used as an array index.
@@ -419,12 +399,12 @@ Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
StringRef tsPath = typeServerDependency.getName();
TypeServerSource *tsSrc;
- auto it = TypeServerSource::mappings.find(tsId);
- if (it != TypeServerSource::mappings.end()) {
- tsSrc = it->second;
+ auto it = ctx.typeServerSourceMappings.find(tsId);
+ if (it != ctx.typeServerSourceMappings.end()) {
+ tsSrc = (TypeServerSource *)it->second;
} else {
// The file failed to load, lookup by name
- PDBInputFile *pdb = PDBInputFile::findFromRecordPath(tsPath, file);
+ PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
if (!pdb)
return createFileError(tsPath, errorCodeToError(std::error_code(
ENOENT, std::generic_category())));
@@ -471,36 +451,37 @@ static bool equalsPath(StringRef path1, StringRef path2) {
}
// Find by name an OBJ provided on the command line
-static PrecompSource *findObjByName(StringRef fileNameOnly) {
+PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
SmallString<128> currentPath;
- for (auto kv : PrecompSource::mappings) {
+ for (auto kv : ctx.precompSourceMappings) {
StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
sys::path::Style::windows);
// Compare based solely on the file name (link.exe behavior)
if (equalsPath(currentFileName, fileNameOnly))
- return kv.second;
+ return (PrecompSource *)kv.second;
}
return nullptr;
}
-static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) {
+PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
+ PrecompRecord &pr) {
// Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
// records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
// the paths embedded in the OBJs are in the Windows format.
SmallString<128> prFileName =
sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
- auto it = PrecompSource::mappings.find(pr.getSignature());
- if (it != PrecompSource::mappings.end()) {
- return it->second;
+ auto it = ctx.precompSourceMappings.find(pr.getSignature());
+ if (it != ctx.precompSourceMappings.end()) {
+ return (PrecompSource *)it->second;
}
// Lookup by name
return findObjByName(prFileName);
}
-static Expected<PrecompSource *> findPrecompMap(ObjFile *file,
- PrecompRecord &pr) {
+Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
+ PrecompRecord &pr) {
PrecompSource *precomp = findPrecompSource(file, pr);
if (!precomp)
@@ -555,22 +536,6 @@ Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
return TpiSource::mergeDebugT(m);
}
-uint32_t TpiSource::countTypeServerPDBs() {
- return TypeServerSource::mappings.size();
-}
-
-uint32_t TpiSource::countPrecompObjs() {
- return PrecompSource::mappings.size();
-}
-
-void TpiSource::clear() {
- // Clean up any owned ghash allocations.
- clearGHashes();
- TpiSource::instances.clear();
- TypeServerSource::mappings.clear();
- PrecompSource::mappings.clear();
-}
-
//===----------------------------------------------------------------------===//
// Parellel GHash type merging implementation.
//===----------------------------------------------------------------------===//
@@ -926,7 +891,8 @@ struct GHashTable {
/// Insert the cell with the given ghash into the table. Return the insertion
/// position in the table. It is safe for the caller to store the insertion
/// position because the table cannot be resized.
- uint32_t insert(GloballyHashedType ghash, GHashCell newCell);
+ uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
+ GHashCell newCell);
};
/// A ghash table cell for deduplicating types from TpiSources.
@@ -965,8 +931,8 @@ public:
bool isItem() const { return data & (1ULL << 63U); }
/// Get the ghash key for this cell.
- GloballyHashedType getGHash() const {
- return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()];
+ GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
+ return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
}
/// The priority function for the cell. The data is stored such that lower
@@ -996,7 +962,8 @@ void GHashTable::init(uint32_t newTableSize) {
tableSize = newTableSize;
}
-uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
+uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
+ GHashCell newCell) {
assert(!newCell.isEmpty() && "cannot insert empty cell value");
// FIXME: The low bytes of SHA1 have low entropy for short records, which
@@ -1015,7 +982,7 @@ uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
// - cell has non-matching key: hash collision, probe next cell
auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
GHashCell oldCell(cellPtr->load());
- while (oldCell.isEmpty() || oldCell.getGHash() == ghash) {
+ while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
// Check if there is an existing ghash entry with a higher priority
// (earlier ordering). If so, this is a duplicate, we are done.
if (!oldCell.isEmpty() && oldCell < newCell)
@@ -1040,22 +1007,22 @@ uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
llvm_unreachable("left infloop");
}
-TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc)
- : typeTable(alloc), idTable(alloc) {}
+TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
+ : typeTable(alloc), idTable(alloc), ctx(c) {}
TypeMerger::~TypeMerger() = default;
void TypeMerger::mergeTypesWithGHash() {
// Load ghashes. Do type servers and PCH objects first.
{
- ScopedTimer t1(loadGHashTimer);
- parallelForEach(TpiSource::dependencySources,
+ ScopedTimer t1(ctx.loadGHashTimer);
+ parallelForEach(dependencySources,
[&](TpiSource *source) { source->loadGHashes(); });
- parallelForEach(TpiSource::objectSources,
+ parallelForEach(objectSources,
[&](TpiSource *source) { source->loadGHashes(); });
}
- ScopedTimer t2(mergeGHashTimer);
+ ScopedTimer t2(ctx.mergeGHashTimer);
GHashState ghashState;
// Estimate the size of hash table needed to deduplicate ghashes. This *must*
@@ -1066,7 +1033,7 @@ void TypeMerger::mergeTypesWithGHash() {
// small compared to total memory usage, at eight bytes per input type record,
// and most input type records are larger than eight bytes.
size_t tableSize = 0;
- for (TpiSource *source : TpiSource::instances)
+ for (TpiSource *source : ctx.tpiSourceList)
tableSize += source->ghashes.size();
// Cap the table size so that we can use 32-bit cell indices. Type indices are
@@ -1080,8 +1047,8 @@ void TypeMerger::mergeTypesWithGHash() {
// position. Because the table does not rehash, the position will not change
// under insertion. After insertion is done, the value of the cell can be read
// to retrieve the final PDB type index.
- parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) {
- TpiSource *source = TpiSource::instances[tpiSrcIdx];
+ parallelForEachN(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
+ TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
source->indexMapStorage.resize(source->ghashes.size());
for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
if (source->shouldOmitFromPdb(i)) {
@@ -1091,7 +1058,7 @@ void TypeMerger::mergeTypesWithGHash() {
GloballyHashedType ghash = source->ghashes[i];
bool isItem = source->isItemIndex.test(i);
uint32_t cellIdx =
- ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i));
+ ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
// Store the ghash cell index as a type index in indexMapStorage. Later
// we will replace it with the PDB type index.
@@ -1137,7 +1104,7 @@ void TypeMerger::mergeTypesWithGHash() {
for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
auto &cell = entries[i];
uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
- TpiSource *source = TpiSource::instances[tpiSrcIdx];
+ TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
source->uniqueTypes.push_back(cell.getGHashIdx());
// Update the ghash table to store the destination PDB type index in the
@@ -1150,21 +1117,37 @@ void TypeMerger::mergeTypesWithGHash() {
}
// In parallel, remap all types.
- for_each(TpiSource::dependencySources, [&](TpiSource *source) {
+ for_each(dependencySources, [&](TpiSource *source) {
source->remapTpiWithGHashes(&ghashState);
});
- parallelForEach(TpiSource::objectSources, [&](TpiSource *source) {
+ parallelForEach(objectSources, [&](TpiSource *source) {
source->remapTpiWithGHashes(&ghashState);
});
// Build a global map of from function ID to function type.
- for (TpiSource *source : TpiSource::instances) {
+ for (TpiSource *source : ctx.tpiSourceList) {
for (auto idToType : source->funcIdToType)
funcIdToType.insert(idToType);
source->funcIdToType.clear();
}
- TpiSource::clearGHashes();
+ clearGHashes();
+}
+
+void TypeMerger::sortDependencies() {
+ // Order dependencies first, but preserve the existing order.
+ std::vector<TpiSource *> deps;
+ std::vector<TpiSource *> objs;
+ for (TpiSource *s : ctx.tpiSourceList)
+ (s->isDependency() ? deps : objs).push_back(s);
+ uint32_t numDeps = deps.size();
+ uint32_t numObjs = objs.size();
+ ctx.tpiSourceList = std::move(deps);
+ ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
+ for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
+ ctx.tpiSourceList[i]->tpiSrcIdx = i;
+ dependencySources = makeArrayRef(ctx.tpiSourceList.data(), numDeps);
+ objectSources = makeArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
}
/// Given the index into the ghash table for a particular type, return the type
@@ -1175,6 +1158,17 @@ static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
return TypeIndex::fromArrayIndex(cell.getGHashIdx());
}
+/// Free heap allocated ghashes.
+void TypeMerger::clearGHashes() {
+ for (TpiSource *src : ctx.tpiSourceList) {
+ if (src->ownedGHashes)
+ delete[] src->ghashes.data();
+ src->ghashes = {};
+ src->isItemIndex.clear();
+ src->uniqueTypes.clear();
+ }
+}
+
// Fill in a TPI or IPI index map using ghashes. For each source type, use its
// ghash to lookup its final type index in the PDB, and store that in the map.
void TpiSource::fillMapFromGHashes(GHashState *g) {
@@ -1187,13 +1181,3 @@ void TpiSource::fillMapFromGHashes(GHashState *g) {
loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
}
}
-
-void TpiSource::clearGHashes() {
- for (TpiSource *src : TpiSource::instances) {
- if (src->ownedGHashes)
- delete[] src->ghashes.data();
- src->ghashes = {};
- src->isItemIndex.clear();
- src->uniqueTypes.clear();
- }
-}
diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h
index faad30b141e9..b02b5b884cf7 100644
--- a/lld/COFF/DebugTypes.h
+++ b/lld/COFF/DebugTypes.h
@@ -37,12 +37,13 @@ class ObjFile;
class PDBInputFile;
class TypeMerger;
struct GHashState;
+class COFFLinkerContext;
class TpiSource {
public:
enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB };
- TpiSource(TpiKind k, ObjFile *f);
+ TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f);
virtual ~TpiSource();
/// Produce a mapping from the type and item indices used in the object
@@ -93,6 +94,8 @@ protected:
// Walk over file->debugTypes and fill in the isItemIndex bit vector.
void fillIsItemIndexFromDebugT();
+ COFFLinkerContext &ctx;
+
public:
bool remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec);
@@ -109,29 +112,6 @@ public:
return ghashIdx == endPrecompGHashIdx;
}
- /// All sources of type information in the program.
- static std::vector<TpiSource *> instances;
-
- /// Dependency type sources, such as type servers or PCH object files. These
- /// must be processed before objects that rely on them. Set by
- /// TpiSources::sortDependencies.
- static ArrayRef<TpiSource *> dependencySources;
-
- /// Object file sources. These must be processed after dependencySources.
- static ArrayRef<TpiSource *> objectSources;
-
- /// Sorts the dependencies and reassigns TpiSource indices.
- static void sortDependencies();
-
- static uint32_t countTypeServerPDBs();
- static uint32_t countPrecompObjs();
-
- /// Free heap allocated ghashes.
- static void clearGHashes();
-
- /// Clear global data structures for TpiSources.
- static void clear();
-
const TpiKind kind;
bool ownedGHashes = true;
uint32_t tpiSrcIdx = 0;
@@ -186,12 +166,13 @@ public:
uint64_t nbTypeRecordsBytes = 0;
};
-TpiSource *makeTpiSource(ObjFile *file);
-TpiSource *makeTypeServerSource(PDBInputFile *pdbInputFile);
-TpiSource *makeUseTypeServerSource(ObjFile *file,
+TpiSource *makeTpiSource(COFFLinkerContext &ctx, ObjFile *f);
+TpiSource *makeTypeServerSource(COFFLinkerContext &ctx,
+ PDBInputFile *pdbInputFile);
+TpiSource *makeUseTypeServerSource(COFFLinkerContext &ctx, ObjFile *file,
llvm::codeview::TypeServer2Record ts);
-TpiSource *makePrecompSource(ObjFile *file);
-TpiSource *makeUsePrecompSource(ObjFile *file,
+TpiSource *makePrecompSource(COFFLinkerContext &ctx, ObjFile *file);
+TpiSource *makeUsePrecompSource(COFFLinkerContext &ctx, ObjFile *file,
llvm::codeview::PrecompRecord ts);
} // namespace coff
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 9ba0db31507f..f1b0c5c0707d 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Driver.h"
+#include "COFFLinkerContext.h"
#include "Config.h"
#include "DebugTypes.h"
#include "ICF.h"
@@ -59,8 +60,6 @@ using namespace llvm::sys;
namespace lld {
namespace coff {
-static Timer inputFileTimer("Input File Reading", Timer::root());
-
Configuration *config;
LinkerDriver *driver;
@@ -70,14 +69,7 @@ bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
lld::stderrOS = &stderrOS;
errorHandler().cleanupCallback = []() {
- TpiSource::clear();
freeArena();
- ObjFile::instances.clear();
- PDBInputFile::instances.clear();
- ImportFile::instances.clear();
- BitcodeFile::instances.clear();
- memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances));
- OutputSection::clear();
};
errorHandler().logName = args::getFilenameWithoutExe(args[0]);
@@ -87,9 +79,9 @@ bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
errorHandler().exitEarly = canExitEarly;
stderrOS.enable_colors(stderrOS.has_colors());
+ COFFLinkerContext ctx;
config = make<Configuration>();
- symtab = make<SymbolTable>();
- driver = make<LinkerDriver>();
+ driver = make<LinkerDriver>(ctx);
driver->linkerMain(args);
@@ -174,8 +166,8 @@ static StringRef mangle(StringRef sym) {
return sym;
}
-static bool findUnderscoreMangle(StringRef sym) {
- Symbol *s = symtab->findMangle(mangle(sym));
+bool LinkerDriver::findUnderscoreMangle(StringRef sym) {
+ Symbol *s = ctx.symtab.findMangle(mangle(sym));
return s && !isa<Undefined>(s);
}
@@ -213,30 +205,30 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
addArchiveBuffer(m, "<whole-archive>", filename, memberIndex++);
return;
}
- symtab->addFile(make<ArchiveFile>(mbref));
+ ctx.symtab.addFile(make<ArchiveFile>(ctx, mbref));
break;
case file_magic::bitcode:
if (lazy)
- symtab->addFile(make<LazyObjFile>(mbref));
+ ctx.symtab.addFile(make<LazyObjFile>(ctx, mbref));
else
- symtab->addFile(make<BitcodeFile>(mbref, "", 0));
+ ctx.symtab.addFile(make<BitcodeFile>(ctx, mbref, "", 0));
break;
case file_magic::coff_object:
case file_magic::coff_import_library:
if (lazy)
- symtab->addFile(make<LazyObjFile>(mbref));
+ ctx.symtab.addFile(make<LazyObjFile>(ctx, mbref));
else
- symtab->addFile(make<ObjFile>(mbref));
+ ctx.symtab.addFile(make<ObjFile>(ctx, mbref));
break;
case file_magic::pdb:
- symtab->addFile(make<PDBInputFile>(mbref));
+ ctx.symtab.addFile(make<PDBInputFile>(ctx, mbref));
break;
case file_magic::coff_cl_gl_object:
error(filename + ": is not a native COFF file. Recompile without /GL");
break;
case file_magic::pecoff_executable:
if (config->mingw) {
- symtab->addFile(make<DLLFile>(mbref));
+ ctx.symtab.addFile(make<DLLFile>(ctx, mbref));
break;
}
if (filename.endswith_insensitive(".dll")) {
@@ -280,24 +272,24 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
uint64_t offsetInArchive) {
file_magic magic = identify_magic(mb.getBuffer());
if (magic == file_magic::coff_import_library) {
- InputFile *imp = make<ImportFile>(mb);
+ InputFile *imp = make<ImportFile>(ctx, mb);
imp->parentName = parentName;
- symtab->addFile(imp);
+ ctx.symtab.addFile(imp);
return;
}
InputFile *obj;
if (magic == file_magic::coff_object) {
- obj = make<ObjFile>(mb);
+ obj = make<ObjFile>(ctx, mb);
} else if (magic == file_magic::bitcode) {
- obj = make<BitcodeFile>(mb, parentName, offsetInArchive);
+ obj = make<BitcodeFile>(ctx, mb, parentName, offsetInArchive);
} else {
error("unknown file type: " + mb.getBufferIdentifier());
return;
}
obj->parentName = parentName;
- symtab->addFile(obj);
+ ctx.symtab.addFile(obj);
log("Loaded " + toString(obj) + " for " + symName);
}
@@ -383,6 +375,7 @@ void LinkerDriver::parseDirectives(InputFile *file) {
for (StringRef inc : directives.includes)
addUndefined(inc);
+ // https://docs.microsoft.com/en-us/cpp/preprocessor/comment-c-cpp?view=msvc-160
for (auto *arg : directives.args) {
switch (arg->getOption().getID()) {
case OPT_aligncomm:
@@ -404,6 +397,9 @@ void LinkerDriver::parseDirectives(InputFile *file) {
case OPT_incl:
addUndefined(arg->getValue());
break;
+ case OPT_manifestdependency:
+ config->manifestDependencies.insert(arg->getValue());
+ break;
case OPT_merge:
parseMerge(arg->getValue());
break;
@@ -543,7 +539,7 @@ void LinkerDriver::addLibSearchPaths() {
}
Symbol *LinkerDriver::addUndefined(StringRef name) {
- Symbol *b = symtab->addUndefined(name);
+ Symbol *b = ctx.symtab.addUndefined(name);
if (!b->isGCRoot) {
b->isGCRoot = true;
config->gcroot.push_back(b);
@@ -558,14 +554,14 @@ StringRef LinkerDriver::mangleMaybe(Symbol *s) {
return "";
// Otherwise, see if a similar, mangled symbol exists in the symbol table.
- Symbol *mangled = symtab->findMangle(unmangled->getName());
+ Symbol *mangled = ctx.symtab.findMangle(unmangled->getName());
if (!mangled)
return "";
// If we find a similar mangled symbol, make this an alias to it and return
// its name.
log(unmangled->getName() + " aliased to " + mangled->getName());
- unmangled->weakAlias = symtab->addUndefined(mangled->getName());
+ unmangled->weakAlias = ctx.symtab.addUndefined(mangled->getName());
return mangled->getName();
}
@@ -651,15 +647,10 @@ static std::string createResponseFile(const opt::InputArgList &args,
case OPT_INPUT:
case OPT_defaultlib:
case OPT_libpath:
- case OPT_manifest:
- case OPT_manifest_colon:
- case OPT_manifestdependency:
- case OPT_manifestfile:
- case OPT_manifestinput:
- case OPT_manifestuac:
break;
case OPT_call_graph_ordering_file:
case OPT_deffile:
+ case OPT_manifestinput:
case OPT_natvis:
os << arg->getSpelling() << quote(rewritePath(arg->getValue())) << '\n';
break;
@@ -677,6 +668,7 @@ static std::string createResponseFile(const opt::InputArgList &args,
break;
}
case OPT_implib:
+ case OPT_manifestfile:
case OPT_pdb:
case OPT_pdbstripped:
case OPT_out:
@@ -831,16 +823,12 @@ static void createImportLibrary(bool asLib) {
exports.push_back(e2);
}
- auto handleError = [](Error &&e) {
- handleAllErrors(std::move(e),
- [](ErrorInfoBase &eib) { error(eib.message()); });
- };
std::string libName = getImportName(asLib);
std::string path = getImplibPath();
if (!config->incremental) {
- handleError(writeImportLibrary(libName, path, exports, config->machine,
- config->mingw));
+ checkError(writeImportLibrary(libName, path, exports, config->machine,
+ config->mingw));
return;
}
@@ -849,8 +837,8 @@ static void createImportLibrary(bool asLib) {
ErrorOr<std::unique_ptr<MemoryBuffer>> oldBuf = MemoryBuffer::getFile(
path, /*IsText=*/false, /*RequiresNullTerminator=*/false);
if (!oldBuf) {
- handleError(writeImportLibrary(libName, path, exports, config->machine,
- config->mingw));
+ checkError(writeImportLibrary(libName, path, exports, config->machine,
+ config->mingw));
return;
}
@@ -862,7 +850,7 @@ static void createImportLibrary(bool asLib) {
if (Error e = writeImportLibrary(libName, tmpName, exports, config->machine,
config->mingw)) {
- handleError(std::move(e));
+ checkError(std::move(e));
return;
}
@@ -870,7 +858,7 @@ static void createImportLibrary(bool asLib) {
tmpName, /*IsText=*/false, /*RequiresNullTerminator=*/false));
if ((*oldBuf)->getBuffer() != newBuf->getBuffer()) {
oldBuf->reset();
- handleError(errorCodeToError(sys::fs::rename(tmpName, path)));
+ checkError(errorCodeToError(sys::fs::rename(tmpName, path)));
} else {
sys::fs::remove(tmpName);
}
@@ -939,7 +927,7 @@ void LinkerDriver::enqueueTask(std::function<void()> task) {
}
bool LinkerDriver::run() {
- ScopedTimer t(inputFileTimer);
+ ScopedTimer t(ctx.inputFileTimer);
bool didWork = !taskQueue.empty();
while (!taskQueue.empty()) {
@@ -952,7 +940,7 @@ bool LinkerDriver::run() {
// Parse an /order file. If an option is given, the linker places
// COMDAT sections in the same order as their names appear in the
// given file.
-static void parseOrderFile(StringRef arg) {
+static void parseOrderFile(COFFLinkerContext &ctx, StringRef arg) {
// For some reason, the MSVC linker requires a filename to be
// preceded by "@".
if (!arg.startswith("@")) {
@@ -962,7 +950,7 @@ static void parseOrderFile(StringRef arg) {
// Get a list of all comdat sections for error checking.
DenseSet<StringRef> set;
- for (Chunk *c : symtab->getChunks())
+ for (Chunk *c : ctx.symtab.getChunks())
if (auto *sec = dyn_cast<SectionChunk>(c))
if (sec->sym)
set.insert(sec->sym->getName());
@@ -996,7 +984,7 @@ static void parseOrderFile(StringRef arg) {
driver->takeBuffer(std::move(mb));
}
-static void parseCallGraphFile(StringRef path) {
+static void parseCallGraphFile(COFFLinkerContext &ctx, StringRef path) {
std::unique_ptr<MemoryBuffer> mb =
CHECK(MemoryBuffer::getFile(path, /*IsText=*/false,
/*RequiresNullTerminator=*/false,
@@ -1005,7 +993,7 @@ static void parseCallGraphFile(StringRef path) {
// Build a map from symbol name to section.
DenseMap<StringRef, Symbol *> map;
- for (ObjFile *file : ObjFile::instances)
+ for (ObjFile *file : ctx.objFileInstances)
for (Symbol *sym : file->getSymbols())
if (sym)
map[sym->getName()] = sym;
@@ -1042,8 +1030,8 @@ static void parseCallGraphFile(StringRef path) {
driver->takeBuffer(std::move(mb));
}
-static void readCallGraphsFromObjectFiles() {
- for (ObjFile *obj : ObjFile::instances) {
+static void readCallGraphsFromObjectFiles(COFFLinkerContext &ctx) {
+ for (ObjFile *obj : ctx.objFileInstances) {
if (obj->callgraphSec) {
ArrayRef<uint8_t> contents;
cantFail(
@@ -1077,7 +1065,7 @@ static void markAddrsig(Symbol *s) {
c->keepUnique = true;
}
-static void findKeepUniqueSections() {
+static void findKeepUniqueSections(COFFLinkerContext &ctx) {
// Exported symbols could be address-significant in other executables or DSOs,
// so we conservatively mark them as address-significant.
for (Export &r : config->exports)
@@ -1085,7 +1073,7 @@ static void findKeepUniqueSections() {
// Visit the address-significance table in each object file and mark each
// referenced symbol as address-significant.
- for (ObjFile *obj : ObjFile::instances) {
+ for (ObjFile *obj : ctx.objFileInstances) {
ArrayRef<Symbol *> syms = obj->getSymbols();
if (obj->addrsigSec) {
ArrayRef<uint8_t> contents;
@@ -1169,7 +1157,7 @@ static void parsePDBAltPath(StringRef altPath) {
void LinkerDriver::convertResources() {
std::vector<ObjFile *> resourceObjFiles;
- for (ObjFile *f : ObjFile::instances) {
+ for (ObjFile *f : ctx.objFileInstances) {
if (f->isResourceObjFile())
resourceObjFiles.push_back(f);
}
@@ -1191,8 +1179,9 @@ void LinkerDriver::convertResources() {
f->includeResourceChunks();
return;
}
- ObjFile *f = make<ObjFile>(convertResToCOFF(resources, resourceObjFiles));
- symtab->addFile(f);
+ ObjFile *f =
+ make<ObjFile>(ctx, convertResToCOFF(resources, resourceObjFiles));
+ ctx.symtab.addFile(f);
f->includeResourceChunks();
}
@@ -1219,9 +1208,9 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) {
if (Optional<StringRef> path = doFindFile(arg->getValue()))
exporter.addWholeArchive(*path);
- symtab->forEachSymbol([&](Symbol *s) {
+ ctx.symtab.forEachSymbol([&](Symbol *s) {
auto *def = dyn_cast<Defined>(s);
- if (!exporter.shouldExport(def))
+ if (!exporter.shouldExport(ctx, def))
return;
if (!def->isGCRoot) {
@@ -1266,7 +1255,7 @@ Optional<std::string> getReproduceFile(const opt::InputArgList &args) {
}
void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
- ScopedTimer rootTimer(Timer::root());
+ ScopedTimer rootTimer(ctx.rootTimer);
// Needed for LTO.
InitializeAllTargetInfos();
@@ -1439,6 +1428,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->pdbPath = arg->getValue();
if (auto *arg = args.getLastArg(OPT_pdbaltpath))
config->pdbAltPath = arg->getValue();
+ if (auto *arg = args.getLastArg(OPT_pdbpagesize))
+ parsePDBPageSize(arg->getValue());
if (args.hasArg(OPT_natvis))
config->natvisFiles = args.getAllArgValues(OPT_natvis);
if (args.hasArg(OPT_pdbstream)) {
@@ -1705,12 +1696,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
for (auto *arg : args.filtered(OPT_aligncomm))
parseAligncomm(arg->getValue());
- // Handle /manifestdependency. This enables /manifest unless /manifest:no is
- // also passed.
- if (auto *arg = args.getLastArg(OPT_manifestdependency)) {
- config->manifestDependency = arg->getValue();
- config->manifest = Configuration::SideBySide;
- }
+ // Handle /manifestdependency.
+ for (auto *arg : args.filtered(OPT_manifestdependency))
+ config->manifestDependencies.insert(arg->getValue());
// Handle /manifest and /manifest:
if (auto *arg = args.getLastArg(OPT_manifest, OPT_manifest_colon)) {
@@ -1750,6 +1738,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate);
config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file);
// Handle miscellaneous boolean flags.
+ config->ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch,
+ OPT_lto_pgo_warn_mismatch_no, true);
config->allowBind = args.hasFlag(OPT_allowbind, OPT_allowbind_no, true);
config->allowIsolation =
args.hasFlag(OPT_allowisolation, OPT_allowisolation_no, true);
@@ -1871,10 +1861,6 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (Optional<StringRef> path = findLib(arg->getValue()))
enqueuePath(*path, false, false);
- // Windows specific -- Create a resource file containing a manifest file.
- if (config->manifest == Configuration::Embed)
- addBuffer(createManifestRes(), false, false);
-
// Read all input files given via the command line.
run();
@@ -2023,32 +2009,32 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->imageBase == uint64_t(-1))
config->imageBase = getDefaultImageBase();
- symtab->addSynthetic(mangle("__ImageBase"), nullptr);
+ ctx.symtab.addSynthetic(mangle("__ImageBase"), nullptr);
if (config->machine == I386) {
- symtab->addAbsolute("___safe_se_handler_table", 0);
- symtab->addAbsolute("___safe_se_handler_count", 0);
+ ctx.symtab.addAbsolute("___safe_se_handler_table", 0);
+ ctx.symtab.addAbsolute("___safe_se_handler_count", 0);
}
- symtab->addAbsolute(mangle("__guard_fids_count"), 0);
- symtab->addAbsolute(mangle("__guard_fids_table"), 0);
- symtab->addAbsolute(mangle("__guard_flags"), 0);
- symtab->addAbsolute(mangle("__guard_iat_count"), 0);
- symtab->addAbsolute(mangle("__guard_iat_table"), 0);
- symtab->addAbsolute(mangle("__guard_longjmp_count"), 0);
- symtab->addAbsolute(mangle("__guard_longjmp_table"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_fids_count"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_fids_table"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_flags"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_iat_count"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_iat_table"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
// Needed for MSVC 2017 15.5 CRT.
- symtab->addAbsolute(mangle("__enclave_config"), 0);
+ ctx.symtab.addAbsolute(mangle("__enclave_config"), 0);
// Needed for MSVC 2019 16.8 CRT.
- symtab->addAbsolute(mangle("__guard_eh_cont_count"), 0);
- symtab->addAbsolute(mangle("__guard_eh_cont_table"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
+ ctx.symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);
if (config->pseudoRelocs) {
- symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
- symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
+ ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
+ ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
}
if (config->mingw) {
- symtab->addAbsolute(mangle("__CTOR_LIST__"), 0);
- symtab->addAbsolute(mangle("__DTOR_LIST__"), 0);
+ ctx.symtab.addAbsolute(mangle("__CTOR_LIST__"), 0);
+ ctx.symtab.addAbsolute(mangle("__DTOR_LIST__"), 0);
}
// This code may add new undefined symbols to the link, which may enqueue more
@@ -2074,12 +2060,12 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
for (auto pair : config->alternateNames) {
StringRef from = pair.first;
StringRef to = pair.second;
- Symbol *sym = symtab->find(from);
+ Symbol *sym = ctx.symtab.find(from);
if (!sym)
continue;
if (auto *u = dyn_cast<Undefined>(sym))
if (!u->weakAlias)
- u->weakAlias = symtab->addUndefined(to);
+ u->weakAlias = ctx.symtab.addUndefined(to);
}
// If any inputs are bitcode files, the LTO code generator may create
@@ -2087,25 +2073,25 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// file's symbol table. If any of those library functions are defined in a
// bitcode file in an archive member, we need to arrange to use LTO to
// compile those archive members by adding them to the link beforehand.
- if (!BitcodeFile::instances.empty())
+ if (!ctx.bitcodeFileInstances.empty())
for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
- symtab->addLibcall(s);
+ ctx.symtab.addLibcall(s);
// Windows specific -- if __load_config_used can be resolved, resolve it.
- if (symtab->findUnderscore("_load_config_used"))
+ if (ctx.symtab.findUnderscore("_load_config_used"))
addUndefined(mangle("_load_config_used"));
} while (run());
if (args.hasArg(OPT_include_optional)) {
// Handle /includeoptional
for (auto *arg : args.filtered(OPT_include_optional))
- if (dyn_cast_or_null<LazyArchive>(symtab->find(arg->getValue())))
+ if (dyn_cast_or_null<LazyArchive>(ctx.symtab.find(arg->getValue())))
addUndefined(arg->getValue());
while (run());
}
// Create wrapped symbols for -wrap option.
- std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args);
+ std::vector<WrappedSymbol> wrapped = addWrappedSymbols(ctx, args);
// Load more object files that might be needed for wrapped symbols.
if (!wrapped.empty())
while (run());
@@ -2131,7 +2117,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// If it ends up pulling in more object files from static libraries,
// (and maybe doing more stdcall fixups along the way), this would need
// to loop these two calls.
- symtab->loadMinGWSymbols();
+ ctx.symtab.loadMinGWSymbols();
run();
}
@@ -2139,8 +2125,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// If we are going to do codegen for link-time optimization, check for
// unresolvable symbols first, so we don't spend time generating code that
// will fail to link anyway.
- if (!BitcodeFile::instances.empty() && !config->forceUnresolved)
- symtab->reportUnresolvable();
+ if (!ctx.bitcodeFileInstances.empty() && !config->forceUnresolved)
+ ctx.symtab.reportUnresolvable();
if (errorCount())
return;
@@ -2154,11 +2140,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Do LTO by compiling bitcode input files to a set of native COFF files then
// link those files (unless -thinlto-index-only was given, in which case we
// resolve symbols and write indices, but don't generate native code or link).
- symtab->addCombinedLTOObjects();
+ ctx.symtab.compileBitcodeFiles();
// If -thinlto-index-only is given, we should create only "index
// files" and not object files. Index file creation is already done
- // in addCombinedLTOObject, so we are done if that's the case.
+ // in compileBitcodeFiles, so we are done if that's the case.
if (config->thinLTOIndexOnly)
return;
@@ -2168,10 +2154,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Apply symbol renames for -wrap.
if (!wrapped.empty())
- wrapSymbols(wrapped);
+ wrapSymbols(ctx, wrapped);
// Resolve remaining undefined symbols and warn about imported locals.
- symtab->resolveRemainingUndefines();
+ ctx.symtab.resolveRemainingUndefines();
if (errorCount())
return;
@@ -2182,12 +2168,12 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// order provided on the command line, while lld will pull in needed
// files from static libraries only after the last object file on the
// command line.
- for (auto i = ObjFile::instances.begin(), e = ObjFile::instances.end();
+ for (auto i = ctx.objFileInstances.begin(), e = ctx.objFileInstances.end();
i != e; i++) {
ObjFile *file = *i;
if (isCrtend(file->getName())) {
- ObjFile::instances.erase(i);
- ObjFile::instances.push_back(file);
+ ctx.objFileInstances.erase(i);
+ ctx.objFileInstances.push_back(file);
break;
}
}
@@ -2212,7 +2198,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
StringRef name = pair.first;
uint32_t alignment = pair.second;
- Symbol *sym = symtab->find(name);
+ Symbol *sym = ctx.symtab.find(name);
if (!sym) {
warn("/aligncomm symbol " + name + " not found");
continue;
@@ -2228,8 +2214,14 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
c->setAlignment(std::max(c->getAlignment(), alignment));
}
- // Windows specific -- Create a side-by-side manifest file.
- if (config->manifest == Configuration::SideBySide)
+ // Windows specific -- Create an embedded or side-by-side manifest.
+ // /manifestdependency: enables /manifest unless an explicit /manifest:no is
+ // also passed.
+ if (config->manifest == Configuration::Embed)
+ addBuffer(createManifestRes(), false, false);
+ else if (config->manifest == Configuration::SideBySide ||
+ (config->manifest == Configuration::Default &&
+ !config->manifestDependencies.empty()))
createSideBySideManifest();
// Handle /order. We want to do this at this moment because we
@@ -2238,16 +2230,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_order)) {
if (args.hasArg(OPT_call_graph_ordering_file))
error("/order and /call-graph-order-file may not be used together");
- parseOrderFile(arg->getValue());
+ parseOrderFile(ctx, arg->getValue());
config->callGraphProfileSort = false;
}
// Handle /call-graph-ordering-file and /call-graph-profile-sort (default on).
if (config->callGraphProfileSort) {
if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) {
- parseCallGraphFile(arg->getValue());
+ parseCallGraphFile(ctx, arg->getValue());
}
- readCallGraphsFromObjectFiles();
+ readCallGraphsFromObjectFiles(ctx);
}
// Handle /print-symbol-order.
@@ -2264,7 +2256,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// functions. This doesn't bring in more object files, but only marks
// functions that already have been included to be retained.
for (const char *n : {"__gxx_personality_v0", "__gcc_personality_v0"}) {
- Defined *d = dyn_cast_or_null<Defined>(symtab->findUnderscore(n));
+ Defined *d = dyn_cast_or_null<Defined>(ctx.symtab.findUnderscore(n));
if (d && !d->isGCRoot) {
d->isGCRoot = true;
config->gcroot.push_back(d);
@@ -2272,7 +2264,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
}
}
- markLive(symtab->getChunks());
+ markLive(ctx);
}
// Needs to happen after the last call to addFile().
@@ -2280,17 +2272,17 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Identify identical COMDAT sections to merge them.
if (config->doICF != ICFLevel::None) {
- findKeepUniqueSections();
- doICF(symtab->getChunks(), config->doICF);
+ findKeepUniqueSections(ctx);
+ doICF(ctx, config->doICF);
}
// Write the result.
- writeResult();
+ writeResult(ctx);
// Stop early so we can print the results.
rootTimer.stop();
if (config->showTiming)
- Timer::root().print();
+ ctx.rootTimer.print();
}
} // namespace coff
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index 5729bed69528..77e67b282665 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -9,6 +9,7 @@
#ifndef LLD_COFF_DRIVER_H
#define LLD_COFF_DRIVER_H
+#include "COFFLinkerContext.h"
#include "Config.h"
#include "SymbolTable.h"
#include "lld/Common/LLVM.h"
@@ -78,6 +79,8 @@ private:
class LinkerDriver {
public:
+ LinkerDriver(COFFLinkerContext &c) : ctx(c) {}
+
void linkerMain(llvm::ArrayRef<const char *> args);
// Used by the resolver to parse .drectve section contents.
@@ -103,6 +106,8 @@ private:
StringRef doFindLib(StringRef filename);
StringRef doFindLibMinGW(StringRef filename);
+ bool findUnderscoreMangle(StringRef sym);
+
// Parses LIB environment which contains a list of search paths.
void addLibSearchPaths();
@@ -148,6 +153,8 @@ private:
std::vector<MemoryBufferRef> resources;
llvm::StringSet<> directivesExports;
+
+ COFFLinkerContext &ctx;
};
// Functions below this line are defined in DriverUtils.cpp.
@@ -169,6 +176,7 @@ void parseSubsystem(StringRef arg, WindowsSubsystem *sys, uint32_t *major,
void parseAlternateName(StringRef);
void parseMerge(StringRef);
+void parsePDBPageSize(StringRef);
void parseSection(StringRef);
void parseAligncomm(StringRef);
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index b5abe8b1196d..cac254ec6828 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -175,6 +175,26 @@ void parseMerge(StringRef s) {
}
}
+void parsePDBPageSize(StringRef s) {
+ int v;
+ if (s.getAsInteger(0, v)) {
+ error("/pdbpagesize: invalid argument: " + s);
+ return;
+ }
+ if (v != 4096 && v != 8192 && v != 16384 && v != 32768) {
+ error("/pdbpagesize: invalid argument: " + s);
+ return;
+ }
+
+ // FIXME: Remove this once other page sizes work.
+ if (v != 4096) {
+ warn("/pdbpagesize: page sizes != 4096 not yet implemented, ignoring flag");
+ v = 4096;
+ }
+
+ config->pdbPageSize = v;
+}
+
static uint32_t parseSectionAttributes(StringRef s) {
uint32_t ret = 0;
for (char c : s.lower()) {
@@ -385,10 +405,10 @@ static std::string createDefaultXml() {
<< " </security>\n"
<< " </trustInfo>\n";
}
- if (!config->manifestDependency.empty()) {
+ for (auto manifestDependency : config->manifestDependencies) {
os << " <dependency>\n"
<< " <dependentAssembly>\n"
- << " <assemblyIdentity " << config->manifestDependency << " />\n"
+ << " <assemblyIdentity " << manifestDependency << " />\n"
<< " </dependentAssembly>\n"
<< " </dependency>\n";
}
@@ -408,7 +428,8 @@ static std::string createManifestXmlWithInternalMt(StringRef defaultXml) {
for (StringRef filename : config->manifestInput) {
std::unique_ptr<MemoryBuffer> manifest =
check(MemoryBuffer::getFile(filename));
- if (auto e = merger.merge(*manifest.get()))
+ // Call takeBuffer to include in /reproduce: output if applicable.
+ if (auto e = merger.merge(driver->takeBuffer(std::move(manifest))))
fatal("internal manifest tool failed on file " + filename + ": " +
toString(std::move(e)));
}
@@ -436,6 +457,11 @@ static std::string createManifestXmlWithExternalMt(StringRef defaultXml) {
for (StringRef filename : config->manifestInput) {
e.add("/manifest");
e.add(filename);
+
+ // Manually add the file to the /reproduce: tar if needed.
+ if (driver->tar)
+ if (auto mbOrErr = MemoryBuffer::getFile(filename))
+ driver->takeBuffer(std::move(*mbOrErr));
}
e.add("/nologo");
e.add("/out:" + StringRef(user.path));
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 732646967296..832362662394 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "ICF.h"
+#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "Symbols.h"
#include "lld/Common/ErrorHandler.h"
@@ -36,12 +37,10 @@ using namespace llvm;
namespace lld {
namespace coff {
-static Timer icfTimer("ICF", Timer::root());
-
class ICF {
public:
- ICF(ICFLevel icfLevel) : icfLevel(icfLevel){};
- void run(ArrayRef<Chunk *> v);
+ ICF(COFFLinkerContext &c, ICFLevel icfLevel) : icfLevel(icfLevel), ctx(c){};
+ void run();
private:
void segregate(size_t begin, size_t end, bool constant);
@@ -64,6 +63,8 @@ private:
int cnt = 0;
std::atomic<bool> repeat = {false};
ICFLevel icfLevel = ICFLevel::All;
+
+ COFFLinkerContext &ctx;
};
// Returns true if section S is subject of ICF.
@@ -246,12 +247,12 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> fn) {
// Merge identical COMDAT sections.
// Two sections are considered the same if their section headers,
// contents and relocations are all the same.
-void ICF::run(ArrayRef<Chunk *> vec) {
- ScopedTimer t(icfTimer);
+void ICF::run() {
+ ScopedTimer t(ctx.icfTimer);
// Collect only mergeable sections and group by hash value.
uint32_t nextId = 1;
- for (Chunk *c : vec) {
+ for (Chunk *c : ctx.symtab.getChunks()) {
if (auto *sc = dyn_cast<SectionChunk>(c)) {
if (isEligible(sc))
chunks.push_back(sc);
@@ -262,7 +263,7 @@ void ICF::run(ArrayRef<Chunk *> vec) {
// Make sure that ICF doesn't merge sections that are being handled by string
// tail merging.
- for (MergeChunk *mc : MergeChunk::instances)
+ for (MergeChunk *mc : ctx.mergeChunkInstances)
if (mc)
for (SectionChunk *sc : mc->sections)
sc->eqClass[0] = nextId++;
@@ -317,8 +318,8 @@ void ICF::run(ArrayRef<Chunk *> vec) {
}
// Entry point to ICF.
-void doICF(ArrayRef<Chunk *> chunks, ICFLevel icfLevel) {
- ICF(icfLevel).run(chunks);
+void doICF(COFFLinkerContext &ctx, ICFLevel icfLevel) {
+ ICF(ctx, icfLevel).run();
}
} // namespace coff
diff --git a/lld/COFF/ICF.h b/lld/COFF/ICF.h
index f8cc8071f9eb..10e6792a5418 100644
--- a/lld/COFF/ICF.h
+++ b/lld/COFF/ICF.h
@@ -17,8 +17,9 @@ namespace lld {
namespace coff {
class Chunk;
+class COFFLinkerContext;
-void doICF(ArrayRef<Chunk *> chunks, ICFLevel);
+void doICF(COFFLinkerContext &ctx, ICFLevel);
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index f32353ca4f94..4b38e3d1a99b 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
+#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "Config.h"
#include "DebugTypes.h"
@@ -69,11 +70,6 @@ std::string lld::toString(const coff::InputFile *file) {
.str();
}
-std::vector<ObjFile *> ObjFile::instances;
-std::map<std::string, PDBInputFile *> PDBInputFile::instances;
-std::vector<ImportFile *> ImportFile::instances;
-std::vector<BitcodeFile *> BitcodeFile::instances;
-
/// Checks that Source is compatible with being a weak alias to Target.
/// If Source is Undefined and has no weak alias set, makes it a weak
/// alias to Target.
@@ -98,7 +94,8 @@ static bool ignoredSymbolName(StringRef name) {
return name == "@feat.00" || name == "@comp.id";
}
-ArchiveFile::ArchiveFile(MemoryBufferRef m) : InputFile(ArchiveKind, m) {}
+ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
+ : InputFile(ctx, ArchiveKind, m) {}
void ArchiveFile::parse() {
// Parse a MemoryBufferRef as an archive file.
@@ -106,7 +103,7 @@ void ArchiveFile::parse() {
// Read the symbol table to construct Lazy objects.
for (const Archive::Symbol &sym : file->symbols())
- symtab->addLazyArchive(this, sym);
+ ctx.symtab.addLazyArchive(this, sym);
}
// Returns a buffer pointing to a member file containing a given symbol.
@@ -144,11 +141,11 @@ void LazyObjFile::fetch() {
InputFile *file;
if (isBitcode(mb))
- file = make<BitcodeFile>(mb, "", 0, std::move(symbols));
+ file = make<BitcodeFile>(ctx, mb, "", 0, std::move(symbols));
else
- file = make<ObjFile>(mb, std::move(symbols));
+ file = make<ObjFile>(ctx, mb, std::move(symbols));
mb = {};
- symtab->addFile(file);
+ ctx.symtab.addFile(file);
}
void LazyObjFile::parse() {
@@ -158,7 +155,7 @@ void LazyObjFile::parse() {
CHECK(lto::InputFile::create(this->mb), this);
for (const lto::InputFile::Symbol &sym : obj->symbols()) {
if (!sym.isUndefined())
- symtab->addLazyObject(this, sym.getName());
+ ctx.symtab.addLazyObject(this, sym.getName());
}
return;
}
@@ -175,7 +172,7 @@ void LazyObjFile::parse() {
StringRef name = check(coffObj->getSymbolName(coffSym));
if (coffSym.isAbsolute() && ignoredSymbolName(name))
continue;
- symtab->addLazyObject(this, name);
+ ctx.symtab.addLazyObject(this, name);
i += coffSym.getNumberOfAuxSymbols();
}
}
@@ -293,7 +290,7 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
// COFF sections that look like string literal sections (i.e. no
// relocations, in .rdata, leader symbol name matches the MSVC name mangling
// for string literals) are subject to string tail merging.
- MergeChunk::addSection(c);
+ MergeChunk::addSection(ctx, c);
else if (name == ".rsrc" || name.startswith(".rsrc$"))
resourceChunks.push_back(c);
else
@@ -387,8 +384,8 @@ Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
if (sym.isExternal()) {
StringRef name = check(coffObj->getSymbolName(sym));
if (sc)
- return symtab->addRegular(this, name, sym.getGeneric(), sc,
- sym.getValue());
+ return ctx.symtab.addRegular(this, name, sym.getGeneric(), sc,
+ sym.getValue());
// For MinGW symbols named .weak.* that point to a discarded section,
// don't create an Undefined symbol. If nothing ever refers to the symbol,
// everything should be fine. If something actually refers to the symbol
@@ -396,7 +393,7 @@ Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
// references at the end.
if (config->mingw && name.startswith(".weak."))
return nullptr;
- return symtab->addUndefined(name, this, false);
+ return ctx.symtab.addUndefined(name, this, false);
}
if (sc)
return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
@@ -464,7 +461,7 @@ void ObjFile::initializeSymbols() {
for (auto &kv : weakAliases) {
Symbol *sym = kv.first;
uint32_t idx = kv.second;
- checkAndSetWeakAlias(symtab, this, sym, symbols[idx]);
+ checkAndSetWeakAlias(&ctx.symtab, this, sym, symbols[idx]);
}
// Free the memory used by sparseChunks now that symbol loading is finished.
@@ -473,7 +470,7 @@ void ObjFile::initializeSymbols() {
Symbol *ObjFile::createUndefined(COFFSymbolRef sym) {
StringRef name = check(coffObj->getSymbolName(sym));
- return symtab->addUndefined(name, this, sym.isWeakExternal());
+ return ctx.symtab.addUndefined(name, this, sym.isWeakExternal());
}
static const coff_aux_section_definition *findSectionDef(COFFObjectFile *obj,
@@ -543,13 +540,13 @@ void ObjFile::handleComdatSelection(
Twine((int)leaderSelection) + " in " + toString(leader->getFile()) +
" and " + Twine((int)selection) + " in " + toString(this))
.str());
- symtab->reportDuplicate(leader, this);
+ ctx.symtab.reportDuplicate(leader, this);
return;
}
switch (selection) {
case IMAGE_COMDAT_SELECT_NODUPLICATES:
- symtab->reportDuplicate(leader, this);
+ ctx.symtab.reportDuplicate(leader, this);
break;
case IMAGE_COMDAT_SELECT_ANY:
@@ -559,14 +556,14 @@ void ObjFile::handleComdatSelection(
case IMAGE_COMDAT_SELECT_SAME_SIZE:
if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
if (!config->mingw) {
- symtab->reportDuplicate(leader, this);
+ ctx.symtab.reportDuplicate(leader, this);
} else {
const coff_aux_section_definition *leaderDef = nullptr;
if (leaderChunk->file)
leaderDef = findSectionDef(leaderChunk->file->getCOFFObj(),
leaderChunk->getSectionNumber());
if (!leaderDef || leaderDef->Length != def->Length)
- symtab->reportDuplicate(leader, this);
+ ctx.symtab.reportDuplicate(leader, this);
}
}
break;
@@ -577,7 +574,7 @@ void ObjFile::handleComdatSelection(
// if the two comdat sections have e.g. different alignment.
// Match that.
if (leaderChunk->getContents() != newChunk.getContents())
- symtab->reportDuplicate(leader, this, &newChunk, sym.getValue());
+ ctx.symtab.reportDuplicate(leader, this, &newChunk, sym.getValue());
break;
}
@@ -620,8 +617,8 @@ Optional<Symbol *> ObjFile::createDefined(
if (sym.isCommon()) {
auto *c = make<CommonChunk>(sym);
chunks.push_back(c);
- return symtab->addCommon(this, getName(), sym.getValue(), sym.getGeneric(),
- c);
+ return ctx.symtab.addCommon(this, getName(), sym.getValue(),
+ sym.getGeneric(), c);
}
if (sym.isAbsolute()) {
@@ -634,7 +631,7 @@ Optional<Symbol *> ObjFile::createDefined(
return nullptr;
if (sym.isExternal())
- return symtab->addAbsolute(name, sym);
+ return ctx.symtab.addAbsolute(name, sym);
return make<DefinedAbsolute>(name, sym);
}
@@ -667,7 +664,7 @@ Optional<Symbol *> ObjFile::createDefined(
if (sym.isExternal()) {
std::tie(leader, prevailing) =
- symtab->addComdat(this, getName(), sym.getGeneric());
+ ctx.symtab.addComdat(this, getName(), sym.getGeneric());
} else {
leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
/*IsExternal*/ false, sym.getGeneric());
@@ -789,12 +786,11 @@ void ObjFile::initializeDependencies() {
else
data = getDebugSection(".debug$T");
- // Don't make a TpiSource for objects with no debug info. If the object has
// symbols but no types, make a plain, empty TpiSource anyway, because it
// simplifies adding the symbols later.
if (data.empty()) {
if (!debugChunks.empty())
- debugTypesObj = makeTpiSource(this);
+ debugTypesObj = makeTpiSource(ctx, this);
return;
}
@@ -812,7 +808,7 @@ void ObjFile::initializeDependencies() {
// This object file is a PCH file that others will depend on.
if (isPCH) {
- debugTypesObj = makePrecompSource(this);
+ debugTypesObj = makePrecompSource(ctx, this);
return;
}
@@ -820,8 +816,8 @@ void ObjFile::initializeDependencies() {
if (firstType->kind() == LF_TYPESERVER2) {
TypeServer2Record ts = cantFail(
TypeDeserializer::deserializeAs<TypeServer2Record>(firstType->data()));
- debugTypesObj = makeUseTypeServerSource(this, ts);
- PDBInputFile::enqueue(ts.getName(), this);
+ debugTypesObj = makeUseTypeServerSource(ctx, this, ts);
+ enqueuePdbFile(ts.getName(), this);
return;
}
@@ -830,14 +826,14 @@ void ObjFile::initializeDependencies() {
if (firstType->kind() == LF_PRECOMP) {
PrecompRecord precomp = cantFail(
TypeDeserializer::deserializeAs<PrecompRecord>(firstType->data()));
- debugTypesObj = makeUsePrecompSource(this, precomp);
+ debugTypesObj = makeUsePrecompSource(ctx, this, precomp);
// Drop the LF_PRECOMP record from the input stream.
debugTypes = debugTypes.drop_front(firstType->RecordData.size());
return;
}
// This is a plain old object file.
- debugTypesObj = makeTpiSource(this);
+ debugTypesObj = makeTpiSource(ctx, this);
}
// Make a PDB path assuming the PDB is in the same folder as the OBJ
@@ -855,7 +851,7 @@ static std::string getPdbBaseName(ObjFile *file, StringRef tSPath) {
// The casing of the PDB path stamped in the OBJ can differ from the actual path
// on disk. With this, we ensure to always use lowercase as a key for the
-// PDBInputFile::instances map, at least on Windows.
+// pdbInputFileInstances map, at least on Windows.
static std::string normalizePdbPath(StringRef path) {
#if defined(_WIN32)
return path.lower();
@@ -879,33 +875,25 @@ static Optional<std::string> findPdbPath(StringRef pdbPath,
return None;
}
-PDBInputFile::PDBInputFile(MemoryBufferRef m) : InputFile(PDBKind, m) {}
+PDBInputFile::PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m)
+ : InputFile(ctx, PDBKind, m) {}
PDBInputFile::~PDBInputFile() = default;
-PDBInputFile *PDBInputFile::findFromRecordPath(StringRef path,
+PDBInputFile *PDBInputFile::findFromRecordPath(const COFFLinkerContext &ctx,
+ StringRef path,
ObjFile *fromFile) {
auto p = findPdbPath(path.str(), fromFile);
if (!p)
return nullptr;
- auto it = PDBInputFile::instances.find(*p);
- if (it != PDBInputFile::instances.end())
+ auto it = ctx.pdbInputFileInstances.find(*p);
+ if (it != ctx.pdbInputFileInstances.end())
return it->second;
return nullptr;
}
-void PDBInputFile::enqueue(StringRef path, ObjFile *fromFile) {
- auto p = findPdbPath(path.str(), fromFile);
- if (!p)
- return;
- auto it = PDBInputFile::instances.emplace(*p, nullptr);
- if (!it.second)
- return; // already scheduled for load
- driver->enqueuePDB(*p);
-}
-
void PDBInputFile::parse() {
- PDBInputFile::instances[mb.getBufferIdentifier().str()] = this;
+ ctx.pdbInputFileInstances[mb.getBufferIdentifier().str()] = this;
std::unique_ptr<pdb::IPDBSession> thisSession;
loadErr.emplace(pdb::NativeSession::createFromPdb(
@@ -923,7 +911,7 @@ void PDBInputFile::parse() {
loadErr.emplace(expectedInfo.takeError());
return;
}
- debugTypesObj = makeTypeServerSource(this);
+ debugTypesObj = makeTypeServerSource(ctx, this);
}
// Used only for DWARF debug info, which is not common (except in MinGW
@@ -957,6 +945,16 @@ Optional<DILineInfo> ObjFile::getDILineInfo(uint32_t offset,
return dwarf->getDILineInfo(offset, sectionIndex);
}
+void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) {
+ auto p = findPdbPath(path.str(), fromFile);
+ if (!p)
+ return;
+ auto it = ctx.pdbInputFileInstances.emplace(*p, nullptr);
+ if (!it.second)
+ return; // already scheduled for load
+ driver->enqueuePDB(*p);
+}
+
void ImportFile::parse() {
const char *buf = mb.getBufferStart();
const auto *hdr = reinterpret_cast<const coff_import_header *>(buf);
@@ -990,31 +988,31 @@ void ImportFile::parse() {
this->hdr = hdr;
externalName = extName;
- impSym = symtab->addImportData(impName, this);
+ impSym = ctx.symtab.addImportData(impName, this);
// If this was a duplicate, we logged an error but may continue;
// in this case, impSym is nullptr.
if (!impSym)
return;
if (hdr->getType() == llvm::COFF::IMPORT_CONST)
- static_cast<void>(symtab->addImportData(name, this));
+ static_cast<void>(ctx.symtab.addImportData(name, this));
// If type is function, we need to create a thunk which jump to an
// address pointed by the __imp_ symbol. (This allows you to call
// DLL functions just like regular non-DLL functions.)
if (hdr->getType() == llvm::COFF::IMPORT_CODE)
- thunkSym = symtab->addImportThunk(
+ thunkSym = ctx.symtab.addImportThunk(
name, cast_or_null<DefinedImportData>(impSym), hdr->Machine);
}
-BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
- uint64_t offsetInArchive)
- : BitcodeFile(mb, archiveName, offsetInArchive, {}) {}
+BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
+ StringRef archiveName, uint64_t offsetInArchive)
+ : BitcodeFile(ctx, mb, archiveName, offsetInArchive, {}) {}
-BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
- uint64_t offsetInArchive,
+BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
+ StringRef archiveName, uint64_t offsetInArchive,
std::vector<Symbol *> &&symbols)
- : InputFile(BitcodeKind, mb), symbols(std::move(symbols)) {
+ : InputFile(ctx, BitcodeKind, mb), symbols(std::move(symbols)) {
std::string path = mb.getBufferIdentifier().str();
if (config->thinLTOIndexOnly)
path = replaceThinLTOSuffix(mb.getBufferIdentifier());
@@ -1069,7 +1067,7 @@ void BitcodeFile::parse() {
for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
// FIXME: Check nodeduplicate
comdat[i] =
- symtab->addComdat(this, saver.save(obj->getComdatTable()[i].first));
+ ctx.symtab.addComdat(this, saver.save(obj->getComdatTable()[i].first));
for (const lto::InputFile::Symbol &objSym : obj->symbols()) {
StringRef symName = saver.save(objSym.getName());
int comdatIndex = objSym.getComdatIndex();
@@ -1080,27 +1078,27 @@ void BitcodeFile::parse() {
else
fakeSC = &ltoDataSectionChunk.chunk;
if (objSym.isUndefined()) {
- sym = symtab->addUndefined(symName, this, false);
+ sym = ctx.symtab.addUndefined(symName, this, false);
} else if (objSym.isCommon()) {
- sym = symtab->addCommon(this, symName, objSym.getCommonSize());
+ sym = ctx.symtab.addCommon(this, symName, objSym.getCommonSize());
} else if (objSym.isWeak() && objSym.isIndirect()) {
// Weak external.
- sym = symtab->addUndefined(symName, this, true);
+ sym = ctx.symtab.addUndefined(symName, this, true);
std::string fallback = std::string(objSym.getCOFFWeakExternalFallback());
- Symbol *alias = symtab->addUndefined(saver.save(fallback));
- checkAndSetWeakAlias(symtab, this, sym, alias);
+ Symbol *alias = ctx.symtab.addUndefined(saver.save(fallback));
+ checkAndSetWeakAlias(&ctx.symtab, this, sym, alias);
} else if (comdatIndex != -1) {
if (symName == obj->getComdatTable()[comdatIndex].first) {
sym = comdat[comdatIndex].first;
if (cast<DefinedRegular>(sym)->data == nullptr)
cast<DefinedRegular>(sym)->data = &fakeSC->repl;
} else if (comdat[comdatIndex].second) {
- sym = symtab->addRegular(this, symName, nullptr, fakeSC);
+ sym = ctx.symtab.addRegular(this, symName, nullptr, fakeSC);
} else {
- sym = symtab->addUndefined(symName, this, false);
+ sym = ctx.symtab.addUndefined(symName, this, false);
}
} else {
- sym = symtab->addRegular(this, symName, nullptr, fakeSC);
+ sym = ctx.symtab.addRegular(this, symName, nullptr, fakeSC);
}
symbols.push_back(sym);
if (objSym.isUsed())
@@ -1185,9 +1183,9 @@ void DLLFile::parse() {
}
StringRef impName = saver.save("__imp_" + symbolName);
- symtab->addLazyDLLSymbol(this, s, impName);
+ ctx.symtab.addLazyDLLSymbol(this, s, impName);
if (code)
- symtab->addLazyDLLSymbol(this, s, symbolName);
+ ctx.symtab.addLazyDLLSymbol(this, s, symbolName);
}
}
@@ -1219,6 +1217,6 @@ void DLLFile::makeImport(DLLFile::Symbol *s) {
p += s->symbolName.size() + 1;
memcpy(p, s->dllName.data(), s->dllName.size());
MemoryBufferRef mbref = MemoryBufferRef(StringRef(buf, size), s->dllName);
- ImportFile *impFile = make<ImportFile>(mbref);
- symtab->addFile(impFile);
+ ImportFile *impFile = make<ImportFile>(ctx, mbref);
+ ctx.symtab.addFile(impFile);
}
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 47b5c588c5af..801c668d3ae4 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -38,6 +38,7 @@ namespace lld {
class DWARFCache;
namespace coff {
+class COFFLinkerContext;
std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
@@ -91,8 +92,11 @@ public:
// Returns .drectve section contents if exist.
StringRef getDirectives() { return directives; }
+ COFFLinkerContext &ctx;
+
protected:
- InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
+ InputFile(COFFLinkerContext &c, Kind k, MemoryBufferRef m)
+ : mb(m), ctx(c), fileKind(k) {}
StringRef directives;
@@ -103,7 +107,7 @@ private:
// .lib or .a file.
class ArchiveFile : public InputFile {
public:
- explicit ArchiveFile(MemoryBufferRef m);
+ explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
void parse() override;
@@ -120,7 +124,8 @@ private:
// .obj or .o file between -start-lib and -end-lib.
class LazyObjFile : public InputFile {
public:
- explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
+ explicit LazyObjFile(COFFLinkerContext &ctx, MemoryBufferRef m)
+ : InputFile(ctx, LazyObjectKind, m) {}
static bool classof(const InputFile *f) {
return f->kind() == LazyObjectKind;
}
@@ -136,9 +141,11 @@ private:
// .obj or .o file. This may be a member of an archive file.
class ObjFile : public InputFile {
public:
- explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
- explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
- : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
+ explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m)
+ : InputFile(ctx, ObjectKind, m) {}
+ explicit ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m,
+ std::vector<Symbol *> &&symbols)
+ : InputFile(ctx, ObjectKind, m), symbols(std::move(symbols)) {}
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
void parse() override;
MachineTypes getMachineType() override;
@@ -175,8 +182,6 @@ public:
bool isResourceObjFile() const { return !resourceChunks.empty(); }
- static std::vector<ObjFile *> instances;
-
// Flags in the absolute @feat.00 symbol if it is present. These usually
// indicate if an object was compiled with certain security features enabled
// like stack guard, safeseh, /guard:cf, or other things.
@@ -228,6 +233,8 @@ private:
return getSection(sym.getSectionNumber());
}
+ void enqueuePdbFile(StringRef path, ObjFile *fromFile);
+
void initializeChunks();
void initializeSymbols();
void initializeFlags();
@@ -318,16 +325,13 @@ private:
// stream.
class PDBInputFile : public InputFile {
public:
- explicit PDBInputFile(MemoryBufferRef m);
+ explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
~PDBInputFile();
static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
void parse() override;
- static void enqueue(StringRef path, ObjFile *fromFile);
-
- static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
-
- static std::map<std::string, PDBInputFile *> instances;
+ static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
+ StringRef path, ObjFile *fromFile);
// Record possible errors while opening the PDB file
llvm::Optional<Error> loadErr;
@@ -344,12 +348,11 @@ public:
// for details about the format.
class ImportFile : public InputFile {
public:
- explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
+ explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m)
+ : InputFile(ctx, ImportKind, m) {}
static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
- static std::vector<ImportFile *> instances;
-
Symbol *impSym = nullptr;
Symbol *thunkSym = nullptr;
std::string dllName;
@@ -377,16 +380,15 @@ public:
// Used for LTO.
class BitcodeFile : public InputFile {
public:
- BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
+ BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive);
- explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
- uint64_t offsetInArchive,
+ explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef m,
+ StringRef archiveName, uint64_t offsetInArchive,
std::vector<Symbol *> &&symbols);
~BitcodeFile();
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
ArrayRef<Symbol *> getSymbols() { return symbols; }
MachineTypes getMachineType() override;
- static std::vector<BitcodeFile *> instances;
std::unique_ptr<llvm::lto::InputFile> obj;
private:
@@ -398,7 +400,8 @@ private:
// .dll file. MinGW only.
class DLLFile : public InputFile {
public:
- explicit DLLFile(MemoryBufferRef m) : InputFile(DLLKind, m) {}
+ explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m)
+ : InputFile(ctx, DLLKind, m) {}
static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
void parse() override;
MachineTypes getMachineType() override;
diff --git a/lld/COFF/LLDMapFile.cpp b/lld/COFF/LLDMapFile.cpp
index 79df33a3535f..639b186dbb6f 100644
--- a/lld/COFF/LLDMapFile.cpp
+++ b/lld/COFF/LLDMapFile.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "LLDMapFile.h"
+#include "COFFLinkerContext.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
@@ -44,9 +45,9 @@ static void writeHeader(raw_ostream &os, uint64_t addr, uint64_t size,
}
// Returns a list of all symbols that we want to print out.
-static std::vector<DefinedRegular *> getSymbols() {
+static std::vector<DefinedRegular *> getSymbols(const COFFLinkerContext &ctx) {
std::vector<DefinedRegular *> v;
- for (ObjFile *file : ObjFile::instances)
+ for (ObjFile *file : ctx.objFileInstances)
for (Symbol *b : file->getSymbols())
if (auto *sym = dyn_cast_or_null<DefinedRegular>(b))
if (sym && !sym->getCOFFSymbol().isSectionDefinition())
@@ -86,7 +87,7 @@ getSymbolStrings(ArrayRef<DefinedRegular *> syms) {
return ret;
}
-void lld::coff::writeLLDMapFile(ArrayRef<OutputSection *> outputSections) {
+void lld::coff::writeLLDMapFile(const COFFLinkerContext &ctx) {
if (config->lldmapFile.empty())
return;
@@ -96,7 +97,7 @@ void lld::coff::writeLLDMapFile(ArrayRef<OutputSection *> outputSections) {
fatal("cannot open " + config->lldmapFile + ": " + ec.message());
// Collect symbol info that we want to print out.
- std::vector<DefinedRegular *> syms = getSymbols();
+ std::vector<DefinedRegular *> syms = getSymbols(ctx);
SymbolMapTy sectionSyms = getSectionSyms(syms);
DenseMap<DefinedRegular *, std::string> symStr = getSymbolStrings(syms);
@@ -104,7 +105,7 @@ void lld::coff::writeLLDMapFile(ArrayRef<OutputSection *> outputSections) {
os << "Address Size Align Out In Symbol\n";
// Print out file contents.
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
writeHeader(os, sec->getRVA(), sec->getVirtualSize(), /*align=*/pageSize);
os << sec->name << '\n';
diff --git a/lld/COFF/LLDMapFile.h b/lld/COFF/LLDMapFile.h
index b731293a8625..72e999a61900 100644
--- a/lld/COFF/LLDMapFile.h
+++ b/lld/COFF/LLDMapFile.h
@@ -9,12 +9,10 @@
#ifndef LLD_COFF_LLDMAPFILE_H
#define LLD_COFF_LLDMAPFILE_H
-#include "llvm/ADT/ArrayRef.h"
-
namespace lld {
namespace coff {
-class OutputSection;
-void writeLLDMapFile(llvm::ArrayRef<OutputSection *> outputSections);
+class COFFLinkerContext;
+void writeLLDMapFile(const COFFLinkerContext &ctx);
}
}
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index d117abf86f79..f117b62192c8 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -20,10 +20,10 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/LTO/Caching.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -87,6 +87,7 @@ static lto::Config createConfig() {
c.DebugPassManager = config->ltoDebugPassManager;
c.CSIRProfile = std::string(config->ltoCSProfileFile);
c.RunCSIRInstr = config->ltoCSProfileGenerate;
+ c.PGOWarnMismatch = config->ltoPGOWarnMismatch;
if (config->saveTemps)
checkError(c.addSaveTemps(std::string(config->outputFile) + ".",
@@ -155,7 +156,7 @@ void BitcodeCompiler::add(BitcodeFile &f) {
// Merge all the bitcode files we have seen, codegen the result
// and return the resulting objects.
-std::vector<InputFile *> BitcodeCompiler::compile() {
+std::vector<InputFile *> BitcodeCompiler::compile(COFFLinkerContext &ctx) {
unsigned maxTasks = ltoObj->getMaxTasks();
buf.resize(maxTasks);
files.resize(maxTasks);
@@ -163,16 +164,17 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
// The /lldltocache option specifies the path to a directory in which to cache
// native object files for ThinLTO incremental builds. If a path was
// specified, configure LTO to use it as the cache directory.
- lto::NativeObjectCache cache;
+ FileCache cache;
if (!config->ltoCache.empty())
- cache = check(lto::localCache(
- config->ltoCache, [&](size_t task, std::unique_ptr<MemoryBuffer> mb) {
- files[task] = std::move(mb);
- }));
+ cache =
+ check(localCache("ThinLTO", "Thin", config->ltoCache,
+ [&](size_t task, std::unique_ptr<MemoryBuffer> mb) {
+ files[task] = std::move(mb);
+ }));
checkError(ltoObj->run(
[&](size_t task) {
- return std::make_unique<lto::NativeObjectStream>(
+ return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(buf[task]));
},
cache));
@@ -223,7 +225,7 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
if (config->saveTemps)
saveBuffer(buf[i], ltoObjName);
- ret.push_back(make<ObjFile>(MemoryBufferRef(objBuf, ltoObjName)));
+ ret.push_back(make<ObjFile>(ctx, MemoryBufferRef(objBuf, ltoObjName)));
}
return ret;
diff --git a/lld/COFF/LTO.h b/lld/COFF/LTO.h
index a2b321df7901..b05befc7146a 100644
--- a/lld/COFF/LTO.h
+++ b/lld/COFF/LTO.h
@@ -38,6 +38,7 @@ namespace coff {
class BitcodeFile;
class InputFile;
+class COFFLinkerContext;
class BitcodeCompiler {
public:
@@ -45,7 +46,7 @@ public:
~BitcodeCompiler();
void add(BitcodeFile &f);
- std::vector<InputFile *> compile();
+ std::vector<InputFile *> compile(COFFLinkerContext &ctx);
private:
std::unique_ptr<llvm::lto::LTO> ltoObj;
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
index 41e169ef56e2..ace614da866b 100644
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -28,6 +28,7 @@
//===----------------------------------------------------------------------===//
#include "MapFile.h"
+#include "COFFLinkerContext.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Writer.h"
@@ -42,11 +43,6 @@ using namespace llvm::object;
using namespace lld;
using namespace lld::coff;
-static Timer totalMapTimer("MAP emission (Cumulative)", Timer::root());
-static Timer symbolGatherTimer("Gather symbols", totalMapTimer);
-static Timer symbolStringsTimer("Build symbol strings", totalMapTimer);
-static Timer writeTimer("Write to file", totalMapTimer);
-
// Print out the first two columns of a line.
static void writeHeader(raw_ostream &os, uint32_t sec, uint64_t addr) {
os << format(" %04x:%08llx", sec, addr);
@@ -98,10 +94,11 @@ static void sortUniqueSymbols(std::vector<Defined *> &syms) {
}
// Returns the lists of all symbols that we want to print out.
-static void getSymbols(std::vector<Defined *> &syms,
+static void getSymbols(const COFFLinkerContext &ctx,
+ std::vector<Defined *> &syms,
std::vector<Defined *> &staticSyms) {
- for (ObjFile *file : ObjFile::instances)
+ for (ObjFile *file : ctx.objFileInstances)
for (Symbol *b : file->getSymbols()) {
if (!b || !b->isLive())
continue;
@@ -119,7 +116,7 @@ static void getSymbols(std::vector<Defined *> &syms,
}
}
- for (ImportFile *file : ImportFile::instances) {
+ for (ImportFile *file : ctx.importFileInstances) {
if (!file->live)
continue;
@@ -142,7 +139,7 @@ static void getSymbols(std::vector<Defined *> &syms,
// Construct a map from symbols to their stringified representations.
static DenseMap<Defined *, std::string>
-getSymbolStrings(ArrayRef<Defined *> syms) {
+getSymbolStrings(const COFFLinkerContext &ctx, ArrayRef<Defined *> syms) {
std::vector<std::string> str(syms.size());
parallelForEachN((size_t)0, syms.size(), [&](size_t i) {
raw_string_ostream os(str[i]);
@@ -161,7 +158,7 @@ getSymbolStrings(ArrayRef<Defined *> syms) {
fileDescr = "<common>";
} else if (Chunk *chunk = sym->getChunk()) {
address = sym->getRVA();
- if (OutputSection *sec = chunk->getOutputSection())
+ if (OutputSection *sec = ctx.getOutputSection(chunk))
address -= sec->header.VirtualAddress;
sectionIdx = chunk->getOutputSectionIdx();
@@ -201,7 +198,7 @@ getSymbolStrings(ArrayRef<Defined *> syms) {
return ret;
}
-void lld::coff::writeMapFile(ArrayRef<OutputSection *> outputSections) {
+void lld::coff::writeMapFile(COFFLinkerContext &ctx) {
if (config->mapFile.empty())
return;
@@ -210,21 +207,22 @@ void lld::coff::writeMapFile(ArrayRef<OutputSection *> outputSections) {
if (ec)
fatal("cannot open " + config->mapFile + ": " + ec.message());
- ScopedTimer t1(totalMapTimer);
+ ScopedTimer t1(ctx.totalMapTimer);
// Collect symbol info that we want to print out.
- ScopedTimer t2(symbolGatherTimer);
+ ScopedTimer t2(ctx.symbolGatherTimer);
std::vector<Defined *> syms;
std::vector<Defined *> staticSyms;
- getSymbols(syms, staticSyms);
+ getSymbols(ctx, syms, staticSyms);
t2.stop();
- ScopedTimer t3(symbolStringsTimer);
- DenseMap<Defined *, std::string> symStr = getSymbolStrings(syms);
- DenseMap<Defined *, std::string> staticSymStr = getSymbolStrings(staticSyms);
+ ScopedTimer t3(ctx.symbolStringsTimer);
+ DenseMap<Defined *, std::string> symStr = getSymbolStrings(ctx, syms);
+ DenseMap<Defined *, std::string> staticSymStr =
+ getSymbolStrings(ctx, staticSyms);
t3.stop();
- ScopedTimer t4(writeTimer);
+ ScopedTimer t4(ctx.writeTimer);
SmallString<128> AppName = sys::path::filename(config->outputFile);
sys::path::replace_extension(AppName, "");
@@ -248,7 +246,7 @@ void lld::coff::writeMapFile(ArrayRef<OutputSection *> outputSections) {
// Print out section table.
os << " Start Length Name Class\n";
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
// Merge display of chunks with same sectionName
std::vector<std::pair<SectionChunk *, SectionChunk *>> ChunkRanges;
for (Chunk *c : sec->chunks) {
@@ -303,7 +301,7 @@ void lld::coff::writeMapFile(ArrayRef<OutputSection *> outputSections) {
Chunk *chunk = entry->getChunk();
entrySecIndex = chunk->getOutputSectionIdx();
entryAddress =
- entry->getRVA() - chunk->getOutputSection()->header.VirtualAddress;
+ entry->getRVA() - ctx.getOutputSection(chunk)->header.VirtualAddress;
}
}
os << " entry point at ";
diff --git a/lld/COFF/MapFile.h b/lld/COFF/MapFile.h
index 2bf01bd07285..d4572332b8b8 100644
--- a/lld/COFF/MapFile.h
+++ b/lld/COFF/MapFile.h
@@ -9,12 +9,10 @@
#ifndef LLD_COFF_MAPFILE_H
#define LLD_COFF_MAPFILE_H
-#include "llvm/ADT/ArrayRef.h"
-
namespace lld {
namespace coff {
-class OutputSection;
-void writeMapFile(llvm::ArrayRef<OutputSection *> outputSections);
+class COFFLinkerContext;
+void writeMapFile(COFFLinkerContext &ctx);
}
}
diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp
index 0afa615a1933..f53005278e22 100644
--- a/lld/COFF/MarkLive.cpp
+++ b/lld/COFF/MarkLive.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "Symbols.h"
#include "lld/Common/Timer.h"
@@ -15,13 +16,11 @@
namespace lld {
namespace coff {
-static Timer gctimer("GC", Timer::root());
-
// Set live bit on for each reachable chunk. Unmarked (unreachable)
// COMDAT chunks will be ignored by Writer, so they will be excluded
// from the final output.
-void markLive(ArrayRef<Chunk *> chunks) {
- ScopedTimer t(gctimer);
+void markLive(COFFLinkerContext &ctx) {
+ ScopedTimer t(ctx.gcTimer);
// We build up a worklist of sections which have been marked as live. We only
// push into the worklist when we discover an unmarked section, and we mark
@@ -31,7 +30,7 @@ void markLive(ArrayRef<Chunk *> chunks) {
// COMDAT section chunks are dead by default. Add non-COMDAT chunks. Do not
// traverse DWARF sections. They are live, but they should not keep other
// sections alive.
- for (Chunk *c : chunks)
+ for (Chunk *c : ctx.symtab.getChunks())
if (auto *sc = dyn_cast<SectionChunk>(c))
if (sc->live && !sc->isDWARF())
worklist.push_back(sc);
@@ -70,6 +69,5 @@ void markLive(ArrayRef<Chunk *> chunks) {
enqueue(&c);
}
}
-
}
}
diff --git a/lld/COFF/MarkLive.h b/lld/COFF/MarkLive.h
index e4e4c31c7c79..6f211487236c 100644
--- a/lld/COFF/MarkLive.h
+++ b/lld/COFF/MarkLive.h
@@ -10,14 +10,13 @@
#define LLD_COFF_MARKLIVE_H
#include "lld/Common/LLVM.h"
-#include "llvm/ADT/ArrayRef.h"
namespace lld {
namespace coff {
-class Chunk;
+class COFFLinkerContext;
-void markLive(ArrayRef<Chunk *> chunks);
+void markLive(COFFLinkerContext &ctx);
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp
index 7c1891e67d45..148ebe5eea66 100644
--- a/lld/COFF/MinGW.cpp
+++ b/lld/COFF/MinGW.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MinGW.h"
+#include "COFFLinkerContext.h"
#include "Driver.h"
#include "InputFiles.h"
#include "SymbolTable.h"
@@ -122,7 +123,8 @@ void AutoExporter::addWholeArchive(StringRef path) {
excludeLibs.erase(libName);
}
-bool AutoExporter::shouldExport(Defined *sym) const {
+bool AutoExporter::shouldExport(const COFFLinkerContext &ctx,
+ Defined *sym) const {
if (!sym || !sym->getChunk())
return false;
@@ -141,7 +143,7 @@ bool AutoExporter::shouldExport(Defined *sym) const {
return false;
// If a corresponding __imp_ symbol exists and is defined, don't export it.
- if (symtab->find(("__imp_" + sym->getName()).str()))
+ if (ctx.symtab.find(("__imp_" + sym->getName()).str()))
return false;
// Check that file is non-null before dereferencing it, symbols not
@@ -192,7 +194,7 @@ static StringRef mangle(Twine sym) {
// like they are not being used at all, so we explicitly set some flags so
// that LTO won't eliminate them.
std::vector<WrappedSymbol>
-lld::coff::addWrappedSymbols(opt::InputArgList &args) {
+lld::coff::addWrappedSymbols(COFFLinkerContext &ctx, opt::InputArgList &args) {
std::vector<WrappedSymbol> v;
DenseSet<StringRef> seen;
@@ -201,18 +203,18 @@ lld::coff::addWrappedSymbols(opt::InputArgList &args) {
if (!seen.insert(name).second)
continue;
- Symbol *sym = symtab->findUnderscore(name);
+ Symbol *sym = ctx.symtab.findUnderscore(name);
if (!sym)
continue;
- Symbol *real = symtab->addUndefined(mangle("__real_" + name));
- Symbol *wrap = symtab->addUndefined(mangle("__wrap_" + name));
+ Symbol *real = ctx.symtab.addUndefined(mangle("__real_" + name));
+ Symbol *wrap = ctx.symtab.addUndefined(mangle("__wrap_" + name));
v.push_back({sym, real, wrap});
// These symbols may seem undefined initially, but don't bail out
- // at symtab->reportUnresolvable() due to them, but let wrapSymbols
+ // at symtab.reportUnresolvable() due to them, but let wrapSymbols
// below sort things out before checking finally with
- // symtab->resolveRemainingUndefines().
+ // symtab.resolveRemainingUndefines().
sym->deferUndefined = true;
real->deferUndefined = true;
// We want to tell LTO not to inline symbols to be overwritten
@@ -233,13 +235,14 @@ lld::coff::addWrappedSymbols(opt::InputArgList &args) {
// When this function is executed, only InputFiles and symbol table
// contain pointers to symbol objects. We visit them to replace pointers,
// so that wrapped symbols are swapped as instructed by the command line.
-void lld::coff::wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
+void lld::coff::wrapSymbols(COFFLinkerContext &ctx,
+ ArrayRef<WrappedSymbol> wrapped) {
DenseMap<Symbol *, Symbol *> map;
for (const WrappedSymbol &w : wrapped) {
map[w.sym] = w.wrap;
map[w.real] = w.sym;
if (Defined *d = dyn_cast<Defined>(w.wrap)) {
- Symbol *imp = symtab->find(("__imp_" + w.sym->getName()).str());
+ Symbol *imp = ctx.symtab.find(("__imp_" + w.sym->getName()).str());
// Create a new defined local import for the wrap symbol. If
// no imp prefixed symbol existed, there's no need for it.
// (We can't easily distinguish whether any object file actually
@@ -247,14 +250,14 @@ void lld::coff::wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
if (imp) {
DefinedLocalImport *wrapimp = make<DefinedLocalImport>(
saver.save("__imp_" + w.wrap->getName()), d);
- symtab->localImportChunks.push_back(wrapimp->getChunk());
+ ctx.symtab.localImportChunks.push_back(wrapimp->getChunk());
map[imp] = wrapimp;
}
}
}
// Update pointers in input files.
- parallelForEach(ObjFile::instances, [&](ObjFile *file) {
+ parallelForEach(ctx.objFileInstances, [&](ObjFile *file) {
MutableArrayRef<Symbol *> syms = file->getMutableSymbols();
for (size_t i = 0, e = syms.size(); i != e; ++i)
if (Symbol *s = map.lookup(syms[i]))
diff --git a/lld/COFF/MinGW.h b/lld/COFF/MinGW.h
index 2f2bd119c33d..8f9343784fa0 100644
--- a/lld/COFF/MinGW.h
+++ b/lld/COFF/MinGW.h
@@ -19,6 +19,7 @@
namespace lld {
namespace coff {
+class COFFLinkerContext;
// Logic for deciding what symbols to export, when exporting all
// symbols for MinGW.
@@ -34,7 +35,7 @@ public:
llvm::StringSet<> excludeLibs;
llvm::StringSet<> excludeObjects;
- bool shouldExport(Defined *sym) const;
+ bool shouldExport(const COFFLinkerContext &ctx, Defined *sym) const;
};
void writeDefFile(StringRef name);
@@ -53,9 +54,10 @@ struct WrappedSymbol {
Symbol *wrap;
};
-std::vector<WrappedSymbol> addWrappedSymbols(llvm::opt::InputArgList &args);
+std::vector<WrappedSymbol> addWrappedSymbols(COFFLinkerContext &ctx,
+ llvm::opt::InputArgList &args);
-void wrapSymbols(ArrayRef<WrappedSymbol> wrapped);
+void wrapSymbols(COFFLinkerContext &ctx, ArrayRef<WrappedSymbol> wrapped);
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td
index 2ce145520ea8..7189088f8be6 100644
--- a/lld/COFF/Options.td
+++ b/lld/COFF/Options.td
@@ -78,8 +78,9 @@ def order : P<"order", "Put functions in order">;
def out : P<"out", "Path to file to write output">;
def natvis : P<"natvis", "Path to natvis file to embed in the PDB">;
def pdb : P<"pdb", "PDB file path">;
-def pdbstripped : P<"pdbstripped", "Stripped PDB file path">;
def pdbaltpath : P<"pdbaltpath", "PDB file path to embed in the image">;
+def pdbpagesize : P<"pdbpagesize", "PDB page size">;
+def pdbstripped : P<"pdbstripped", "Stripped PDB file path">;
def pdbstream : Joined<["/", "-", "/?", "-?"], "pdbstream:">,
MetaVarName<"<name>=<file>">,
HelpText<"Embed the contents of <file> in the PDB as named stream <name>">;
@@ -244,6 +245,10 @@ def lto_cs_profile_generate: F<"lto-cs-profile-generate">,
HelpText<"Perform context sensitive PGO instrumentation">;
def lto_cs_profile_file : P<"lto-cs-profile-file",
"Context sensitive profile file path">;
+defm lto_pgo_warn_mismatch: B<
+ "lto-pgo-warn-mismatch",
+ "turn on warnings about profile cfg mismatch (default)>",
+ "turn off warnings about profile cfg mismatch">;
def dash_dash_version : Flag<["--"], "version">,
HelpText<"Display the version number and exit">;
def threads
@@ -264,9 +269,9 @@ def wrap : P_priv<"wrap">;
// Flags for debugging
def lldmap : F<"lldmap">;
-def lldmap_file : Joined<["/", "-", "/?", "-?"], "lldmap:">;
+def lldmap_file : P_priv<"lldmap">;
def map : F<"map">;
-def map_file : Joined<["/", "-", "/?", "-?"], "map:">;
+def map_file : P_priv<"map">;
def show_timing : F<"time">;
def summary : F<"summary">;
@@ -274,19 +279,21 @@ def summary : F<"summary">;
// The flags below do nothing. They are defined only for link.exe compatibility.
//==============================================================================
-class QF<string name> : Joined<["/", "-", "/?", "-?"], name#":">;
-
def ignoreidl : F<"ignoreidl">;
+def ltcg : F<"ltcg">;
def nologo : F<"nologo">;
def throwingnew : F<"throwingnew">;
def editandcontinue : F<"editandcontinue">;
def fastfail : F<"fastfail">;
-def delay : QF<"delay">;
-def errorreport : QF<"errorreport">;
-def idlout : QF<"idlout">;
-def maxilksize : QF<"maxilksize">;
-def tlbid : QF<"tlbid">;
-def tlbout : QF<"tlbout">;
-def verbose_all : QF<"verbose">;
-def guardsym : QF<"guardsym">;
+def delay : P_priv<"delay">;
+def errorreport : P_priv<"errorreport">;
+def idlout : P_priv<"idlout">;
+def ilk : P_priv<"ilk">;
+def ltcg_opt : P_priv<"ltcg">;
+def ltcgout : P_priv<"ltcgout">;
+def maxilksize : P_priv<"maxilksize">;
+def tlbid : P_priv<"tlbid">;
+def tlbout : P_priv<"tlbout">;
+def verbose_all : P_priv<"verbose">;
+def guardsym : P_priv<"guardsym">;
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index e355857dd93d..a4cef1d0df3b 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "PDB.h"
+#include "COFFLinkerContext.h"
#include "Chunks.h"
#include "Config.h"
#include "DebugTypes.h"
@@ -66,16 +67,6 @@ using llvm::pdb::StringTableFixup;
static ExitOnError exitOnErr;
-static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root());
-static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer);
-Timer lld::coff::loadGHashTimer("Global Type Hashing", addObjectsTimer);
-Timer lld::coff::mergeGHashTimer("GHash Type Merging", addObjectsTimer);
-static Timer typeMergingTimer("Type Merging", addObjectsTimer);
-static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer);
-static Timer publicsLayoutTimer("Publics Stream Layout", totalPdbLinkTimer);
-static Timer tpiStreamLayoutTimer("TPI Stream Layout", totalPdbLinkTimer);
-static Timer diskCommitTimer("Commit to Disk", totalPdbLinkTimer);
-
namespace {
class DebugSHandler;
@@ -83,8 +74,8 @@ class PDBLinker {
friend DebugSHandler;
public:
- PDBLinker(SymbolTable *symtab)
- : symtab(symtab), builder(bAlloc), tMerger(bAlloc) {
+ PDBLinker(COFFLinkerContext &ctx)
+ : builder(bAlloc), tMerger(ctx, bAlloc), ctx(ctx) {
// This isn't strictly necessary, but link.exe usually puts an empty string
// as the first "valid" string in the string table, so we do the same in
// order to maintain as much byte-for-byte compatibility as possible.
@@ -107,7 +98,7 @@ public:
void addPublicsToPDB();
/// Link info for each import file in the symbol table into the PDB.
- void addImportFilesToPDB(ArrayRef<OutputSection *> outputSections);
+ void addImportFilesToPDB();
void createModuleDBI(ObjFile *file);
@@ -144,8 +135,7 @@ public:
std::vector<uint8_t> &storage);
/// Add the section map and section contributions to the PDB.
- void addSections(ArrayRef<OutputSection *> outputSections,
- ArrayRef<uint8_t> sectionTable);
+ void addSections(ArrayRef<uint8_t> sectionTable);
/// Write the PDB to disk and store the Guid generated for it in *Guid.
void commit(codeview::GUID *guid);
@@ -154,12 +144,13 @@ public:
void printStats();
private:
- SymbolTable *symtab;
pdb::PDBFileBuilder builder;
TypeMerger tMerger;
+ COFFLinkerContext &ctx;
+
/// PDBs use a single global string table for filenames in the file checksum
/// table.
DebugStringTableSubsection pdbStrTab;
@@ -266,6 +257,7 @@ static void pdbMakeAbsolute(SmallVectorImpl<char> &fileName) {
if (config->pdbSourcePath.empty()) {
sys::path::native(fileName);
sys::fs::make_absolute(fileName);
+ sys::path::remove_dots(fileName, true);
return;
}
@@ -298,11 +290,12 @@ static void addTypeInfo(pdb::TpiStreamBuilder &tpiBuilder,
});
}
-static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) {
+static void addGHashTypeInfo(COFFLinkerContext &ctx,
+ pdb::PDBFileBuilder &builder) {
// Start the TPI or IPI stream header.
builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80);
builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80);
- for_each(TpiSource::instances, [&](TpiSource *source) {
+ for_each(ctx.tpiSourceList, [&](TpiSource *source) {
builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs,
source->mergedTpi.recSizes,
source->mergedTpi.recHashes);
@@ -718,8 +711,9 @@ Error PDBLinker::commitSymbolsForObject(void *ctx, void *obj,
static_cast<ObjFile *>(obj), writer);
}
-static pdb::SectionContrib createSectionContrib(const Chunk *c, uint32_t modi) {
- OutputSection *os = c ? c->getOutputSection() : nullptr;
+static pdb::SectionContrib createSectionContrib(COFFLinkerContext &ctx,
+ const Chunk *c, uint32_t modi) {
+ OutputSection *os = c ? ctx.getOutputSection(c) : nullptr;
pdb::SectionContrib sc;
memset(&sc, 0, sizeof(sc));
sc.ISect = os ? os->sectionIndex : llvm::pdb::kInvalidStreamIndex;
@@ -981,7 +975,7 @@ void DebugSHandler::finish() {
// size as the original. Otherwise, the file references in the line and
// inlinee line tables will be incorrect.
auto newChecksums = std::make_unique<DebugChecksumsSubsection>(linker.pdbStrTab);
- for (FileChecksumEntry &fc : checksums) {
+ for (const FileChecksumEntry &fc : checksums) {
SmallString<128> filename =
exitOnErr(cvStrTab.getString(fc.FileNameOffset));
pdbMakeAbsolute(filename);
@@ -1022,7 +1016,7 @@ void PDBLinker::addDebugSymbols(TpiSource *source) {
if (!source->file)
return;
- ScopedTimer t(symbolMergingTimer);
+ ScopedTimer t(ctx.symbolMergingTimer);
pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
DebugSHandler dsh(*this, *source->file, source);
// Now do all live .debug$S and .debug$F sections.
@@ -1081,7 +1075,7 @@ void PDBLinker::createModuleDBI(ObjFile *file) {
auto *secChunk = dyn_cast<SectionChunk>(c);
if (!secChunk || !secChunk->live)
continue;
- pdb::SectionContrib sc = createSectionContrib(secChunk, modi);
+ pdb::SectionContrib sc = createSectionContrib(ctx, secChunk, modi);
file->moduleDBI->setFirstSectionContrib(sc);
break;
}
@@ -1094,7 +1088,7 @@ void PDBLinker::addDebug(TpiSource *source) {
// indices to PDB type and item indices. If we are using ghashes, types have
// already been merged.
if (!config->debugGHashes) {
- ScopedTimer t(typeMergingTimer);
+ ScopedTimer t(ctx.typeMergingTimer);
if (Error e = source->mergeDebugT(&tMerger)) {
// If type merging failed, ignore the symbols.
warnUnusable(source->file, std::move(e));
@@ -1112,7 +1106,7 @@ void PDBLinker::addDebug(TpiSource *source) {
addDebugSymbols(source);
}
-static pdb::BulkPublic createPublic(Defined *def) {
+static pdb::BulkPublic createPublic(COFFLinkerContext &ctx, Defined *def) {
pdb::BulkPublic pub;
pub.Name = def->getName().data();
pub.NameLen = def->getName().size();
@@ -1126,7 +1120,7 @@ static pdb::BulkPublic createPublic(Defined *def) {
}
pub.setFlags(flags);
- OutputSection *os = def->getChunk()->getOutputSection();
+ OutputSection *os = ctx.getOutputSection(def->getChunk());
assert(os && "all publics should be in final image");
pub.Offset = def->getRVA() - os->getRVA();
pub.Segment = os->sectionIndex;
@@ -1136,32 +1130,31 @@ static pdb::BulkPublic createPublic(Defined *def) {
// Add all object files to the PDB. Merge .debug$T sections into IpiData and
// TpiData.
void PDBLinker::addObjectsToPDB() {
- ScopedTimer t1(addObjectsTimer);
+ ScopedTimer t1(ctx.addObjectsTimer);
// Create module descriptors
- for_each(ObjFile::instances, [&](ObjFile *obj) { createModuleDBI(obj); });
+ for_each(ctx.objFileInstances, [&](ObjFile *obj) { createModuleDBI(obj); });
// Reorder dependency type sources to come first.
- TpiSource::sortDependencies();
+ tMerger.sortDependencies();
// Merge type information from input files using global type hashing.
if (config->debugGHashes)
tMerger.mergeTypesWithGHash();
// Merge dependencies and then regular objects.
- for_each(TpiSource::dependencySources,
- [&](TpiSource *source) { addDebug(source); });
- for_each(TpiSource::objectSources,
+ for_each(tMerger.dependencySources,
[&](TpiSource *source) { addDebug(source); });
+ for_each(tMerger.objectSources, [&](TpiSource *source) { addDebug(source); });
builder.getStringTableBuilder().setStrings(pdbStrTab);
t1.stop();
// Construct TPI and IPI stream contents.
- ScopedTimer t2(tpiStreamLayoutTimer);
+ ScopedTimer t2(ctx.tpiStreamLayoutTimer);
// Collect all the merged types.
if (config->debugGHashes) {
- addGHashTypeInfo(builder);
+ addGHashTypeInfo(ctx, builder);
} else {
addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
@@ -1169,7 +1162,7 @@ void PDBLinker::addObjectsToPDB() {
t2.stop();
if (config->showSummary) {
- for_each(TpiSource::instances, [&](TpiSource *source) {
+ for_each(ctx.tpiSourceList, [&](TpiSource *source) {
nbTypeRecords += source->nbTypeRecords;
nbTypeRecordsBytes += source->nbTypeRecordsBytes;
});
@@ -1177,11 +1170,11 @@ void PDBLinker::addObjectsToPDB() {
}
void PDBLinker::addPublicsToPDB() {
- ScopedTimer t3(publicsLayoutTimer);
+ ScopedTimer t3(ctx.publicsLayoutTimer);
// Compute the public symbols.
auto &gsiBuilder = builder.getGsiBuilder();
std::vector<pdb::BulkPublic> publics;
- symtab->forEachSymbol([&publics](Symbol *s) {
+ ctx.symtab.forEachSymbol([&publics, this](Symbol *s) {
// Only emit external, defined, live symbols that have a chunk. Static,
// non-external symbols do not appear in the symbol table.
auto *def = dyn_cast<Defined>(s);
@@ -1202,7 +1195,7 @@ void PDBLinker::addPublicsToPDB() {
return;
}
}
- publics.push_back(createPublic(def));
+ publics.push_back(createPublic(ctx, def));
}
});
@@ -1226,10 +1219,10 @@ void PDBLinker::printStats() {
stream << format_decimal(v, 15) << " " << s << '\n';
};
- print(ObjFile::instances.size(),
+ print(ctx.objFileInstances.size(),
"Input OBJ files (expanded from all cmd-line inputs)");
- print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies");
- print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies");
+ print(ctx.typeServerSourceMappings.size(), "PDB type server dependencies");
+ print(ctx.precompSourceMappings.size(), "Precomp OBJ dependencies");
print(nbTypeRecords, "Input type records");
print(nbTypeRecordsBytes, "Input type records bytes");
print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records");
@@ -1354,8 +1347,8 @@ static std::string quote(ArrayRef<StringRef> args) {
for (StringRef a : args) {
if (!r.empty())
r.push_back(' ');
- bool hasWS = a.find(' ') != StringRef::npos;
- bool hasQ = a.find('"') != StringRef::npos;
+ bool hasWS = a.contains(' ');
+ bool hasQ = a.contains('"');
if (hasWS || hasQ)
r.push_back('"');
if (hasQ) {
@@ -1482,13 +1475,13 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod,
}
// Add all import files as modules to the PDB.
-void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
- if (ImportFile::instances.empty())
+void PDBLinker::addImportFilesToPDB() {
+ if (ctx.importFileInstances.empty())
return;
std::map<std::string, llvm::pdb::DbiModuleDescriptorBuilder *> dllToModuleDbi;
- for (ImportFile *file : ImportFile::instances) {
+ for (ImportFile *file : ctx.importFileInstances) {
if (!file->live)
continue;
@@ -1512,7 +1505,7 @@ void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
exitOnErr(dbiBuilder.addModuleInfo(file->dllName));
firstMod.setObjFileName(libPath);
pdb::SectionContrib sc =
- createSectionContrib(nullptr, llvm::pdb::kInvalidStreamIndex);
+ createSectionContrib(ctx, nullptr, llvm::pdb::kInvalidStreamIndex);
firstMod.setFirstSectionContrib(sc);
// The second module is where the import stream goes.
@@ -1522,7 +1515,7 @@ void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
DefinedImportThunk *thunk = cast<DefinedImportThunk>(file->thunkSym);
Chunk *thunkChunk = thunk->getChunk();
- OutputSection *thunkOS = thunkChunk->getOutputSection();
+ OutputSection *thunkOS = ctx.getOutputSection(thunkChunk);
ObjNameSym ons(SymbolRecordKind::ObjNameSym);
Compile3Sym cs(SymbolRecordKind::Compile3Sym);
@@ -1564,28 +1557,27 @@ void PDBLinker::addImportFilesToPDB(ArrayRef<OutputSection *> outputSections) {
mod->addSymbol(newSym);
pdb::SectionContrib sc =
- createSectionContrib(thunk->getChunk(), mod->getModuleIndex());
+ createSectionContrib(ctx, thunk->getChunk(), mod->getModuleIndex());
mod->setFirstSectionContrib(sc);
}
}
// Creates a PDB file.
-void lld::coff::createPDB(SymbolTable *symtab,
- ArrayRef<OutputSection *> outputSections,
+void lld::coff::createPDB(COFFLinkerContext &ctx,
ArrayRef<uint8_t> sectionTable,
llvm::codeview::DebugInfo *buildId) {
- ScopedTimer t1(totalPdbLinkTimer);
- PDBLinker pdb(symtab);
+ ScopedTimer t1(ctx.totalPdbLinkTimer);
+ PDBLinker pdb(ctx);
pdb.initialize(buildId);
pdb.addObjectsToPDB();
- pdb.addImportFilesToPDB(outputSections);
- pdb.addSections(outputSections, sectionTable);
+ pdb.addImportFilesToPDB();
+ pdb.addSections(sectionTable);
pdb.addNatvisFiles();
pdb.addNamedStreams();
pdb.addPublicsToPDB();
- ScopedTimer t2(diskCommitTimer);
+ ScopedTimer t2(ctx.diskCommitTimer);
codeview::GUID guid;
pdb.commit(&guid);
memcpy(&buildId->PDB70.Signature, &guid, 16);
@@ -1596,7 +1588,7 @@ void lld::coff::createPDB(SymbolTable *symtab,
}
void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) {
- exitOnErr(builder.initialize(4096)); // 4096 is blocksize
+ exitOnErr(builder.initialize(config->pdbPageSize));
buildId->Signature.CVSignature = OMF::Signature::PDB70;
// Signature is set to a hash of the PDB contents when the PDB is done.
@@ -1625,8 +1617,7 @@ void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) {
dbiBuilder.setBuildNumber(14, 11);
}
-void PDBLinker::addSections(ArrayRef<OutputSection *> outputSections,
- ArrayRef<uint8_t> sectionTable) {
+void PDBLinker::addSections(ArrayRef<uint8_t> sectionTable) {
// It's not entirely clear what this is, but the * Linker * module uses it.
pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder();
nativePath = config->pdbPath;
@@ -1637,11 +1628,11 @@ void PDBLinker::addSections(ArrayRef<OutputSection *> outputSections,
addCommonLinkerModuleSymbols(nativePath, linkerModule);
// Add section contributions. They must be ordered by ascending RVA.
- for (OutputSection *os : outputSections) {
+ for (OutputSection *os : ctx.outputSections) {
addLinkerModuleSectionSymbol(linkerModule, *os);
for (Chunk *c : os->chunks) {
pdb::SectionContrib sc =
- createSectionContrib(c, linkerModule.getModuleIndex());
+ createSectionContrib(ctx, c, linkerModule.getModuleIndex());
builder.getDbiBuilder().addSectionContrib(sc);
}
}
@@ -1650,7 +1641,7 @@ void PDBLinker::addSections(ArrayRef<OutputSection *> outputSections,
// to provide trampolines thunks for incremental function patching. Set this
// as "unused" because LLD doesn't support /INCREMENTAL link.
pdb::SectionContrib sc =
- createSectionContrib(nullptr, llvm::pdb::kInvalidStreamIndex);
+ createSectionContrib(ctx, nullptr, llvm::pdb::kInvalidStreamIndex);
linkerModule.setFirstSectionContrib(sc);
// Add Section Map stream.
@@ -1790,7 +1781,7 @@ lld::coff::getFileLineCodeView(const SectionChunk *c, uint32_t addr) {
Optional<uint32_t> nameIndex;
Optional<uint32_t> lineNumber;
- for (LineColumnEntry &entry : lines) {
+ for (const LineColumnEntry &entry : lines) {
for (const LineNumberEntry &ln : entry.LineNumbers) {
LineInfo li(ln.Flags);
if (ln.Offset > offsetInLinetable) {
diff --git a/lld/COFF/PDB.h b/lld/COFF/PDB.h
index 53506d40baef..8381374dcc8a 100644
--- a/lld/COFF/PDB.h
+++ b/lld/COFF/PDB.h
@@ -23,21 +23,15 @@ namespace lld {
class Timer;
namespace coff {
-class OutputSection;
class SectionChunk;
-class SymbolTable;
+class COFFLinkerContext;
-void createPDB(SymbolTable *symtab,
- llvm::ArrayRef<OutputSection *> outputSections,
- llvm::ArrayRef<uint8_t> sectionTable,
+void createPDB(COFFLinkerContext &ctx, llvm::ArrayRef<uint8_t> sectionTable,
llvm::codeview::DebugInfo *buildId);
llvm::Optional<std::pair<llvm::StringRef, uint32_t>>
getFileLineCodeView(const SectionChunk *c, uint32_t addr);
-extern Timer loadGHashTimer;
-extern Timer mergeGHashTimer;
-
} // namespace coff
} // namespace lld
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 536f34350724..89bfc5960286 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "SymbolTable.h"
+#include "COFFLinkerContext.h"
#include "Config.h"
#include "Driver.h"
#include "LTO.h"
@@ -34,10 +35,6 @@ StringRef ltrim1(StringRef s, const char *chars) {
return s;
}
-static Timer ltoTimer("LTO", Timer::root());
-
-SymbolTable *symtab;
-
void SymbolTable::addFile(InputFile *file) {
log("Reading " + toString(file));
file->parse();
@@ -52,11 +49,11 @@ void SymbolTable::addFile(InputFile *file) {
}
if (auto *f = dyn_cast<ObjFile>(file)) {
- ObjFile::instances.push_back(f);
+ ctx.objFileInstances.push_back(f);
} else if (auto *f = dyn_cast<BitcodeFile>(file)) {
- BitcodeFile::instances.push_back(f);
+ ctx.bitcodeFileInstances.push_back(f);
} else if (auto *f = dyn_cast<ImportFile>(file)) {
- ImportFile::instances.push_back(f);
+ ctx.importFileInstances.push_back(f);
}
driver->parseDirectives(file);
@@ -268,7 +265,7 @@ void SymbolTable::loadMinGWSymbols() {
if (config->machine == I386 && config->stdcallFixup) {
// Check if we can resolve an undefined decorated symbol by finding
- // the indended target as an undecorated symbol (only with a leading
+ // the intended target as an undecorated symbol (only with a leading
// underscore).
StringRef origName = name;
StringRef baseName = name;
@@ -372,12 +369,9 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
/// defined symbol imported" diagnostic for symbols in localImports.
/// objFiles and bitcodeFiles (if not nullptr) are used to report where
/// undefined symbols are referenced.
-static void
-reportProblemSymbols(const SmallPtrSetImpl<Symbol *> &undefs,
- const DenseMap<Symbol *, Symbol *> *localImports,
- const std::vector<ObjFile *> objFiles,
- const std::vector<BitcodeFile *> *bitcodeFiles) {
-
+static void reportProblemSymbols(
+ const COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
+ const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
// Return early if there is nothing to report (which should be
// the common case).
if (undefs.empty() && (!localImports || localImports->empty()))
@@ -418,11 +412,11 @@ reportProblemSymbols(const SmallPtrSetImpl<Symbol *> &undefs,
}
};
- for (ObjFile *file : objFiles)
+ for (ObjFile *file : ctx.objFileInstances)
processFile(file, file->getSymbols());
- if (bitcodeFiles)
- for (BitcodeFile *file : *bitcodeFiles)
+ if (needBitcodeFiles)
+ for (BitcodeFile *file : ctx.bitcodeFileInstances)
processFile(file, file->getSymbols());
for (const UndefinedDiag &undefDiag : undefDiags)
@@ -451,9 +445,8 @@ void SymbolTable::reportUnresolvable() {
undefs.insert(sym);
}
- reportProblemSymbols(undefs,
- /* localImports */ nullptr, ObjFile::instances,
- &BitcodeFile::instances);
+ reportProblemSymbols(ctx, undefs,
+ /* localImports */ nullptr, true);
}
void SymbolTable::resolveRemainingUndefines() {
@@ -515,8 +508,8 @@ void SymbolTable::resolveRemainingUndefines() {
}
reportProblemSymbols(
- undefs, config->warnLocallyDefinedImported ? &localImports : nullptr,
- ObjFile::instances, /* bitcode files no longer needed */ nullptr);
+ ctx, undefs, config->warnLocallyDefinedImported ? &localImports : nullptr,
+ false);
}
std::pair<Symbol *, bool> SymbolTable::insert(StringRef name) {
@@ -797,20 +790,20 @@ void SymbolTable::addLibcall(StringRef name) {
}
}
-std::vector<Chunk *> SymbolTable::getChunks() {
+std::vector<Chunk *> SymbolTable::getChunks() const {
std::vector<Chunk *> res;
- for (ObjFile *file : ObjFile::instances) {
+ for (ObjFile *file : ctx.objFileInstances) {
ArrayRef<Chunk *> v = file->getChunks();
res.insert(res.end(), v.begin(), v.end());
}
return res;
}
-Symbol *SymbolTable::find(StringRef name) {
+Symbol *SymbolTable::find(StringRef name) const {
return symMap.lookup(CachedHashStringRef(name));
}
-Symbol *SymbolTable::findUnderscore(StringRef name) {
+Symbol *SymbolTable::findUnderscore(StringRef name) const {
if (config->machine == I386)
return find(("_" + name).str());
return find(name);
@@ -872,18 +865,18 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
return addUndefined(name, nullptr, false);
}
-void SymbolTable::addCombinedLTOObjects() {
- if (BitcodeFile::instances.empty())
+void SymbolTable::compileBitcodeFiles() {
+ if (ctx.bitcodeFileInstances.empty())
return;
- ScopedTimer t(ltoTimer);
- lto.reset(new BitcodeCompiler);
- for (BitcodeFile *f : BitcodeFile::instances)
+ ScopedTimer t(ctx.ltoTimer);
+ lto.reset(new BitcodeCompiler());
+ for (BitcodeFile *f : ctx.bitcodeFileInstances)
lto->add(*f);
- for (InputFile *newObj : lto->compile()) {
+ for (InputFile *newObj : lto->compile(ctx)) {
ObjFile *obj = cast<ObjFile>(newObj);
obj->parse();
- ObjFile::instances.push_back(obj);
+ ctx.objFileInstances.push_back(obj);
}
}
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index e88002c88310..3e76b416d1a0 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -25,6 +25,7 @@ namespace coff {
class Chunk;
class CommonChunk;
+class COFFLinkerContext;
class Defined;
class DefinedAbsolute;
class DefinedRegular;
@@ -47,6 +48,8 @@ class Symbol;
// There is one add* function per symbol type.
class SymbolTable {
public:
+ SymbolTable(COFFLinkerContext &ctx) : ctx(ctx) {}
+
void addFile(InputFile *file);
// Emit errors for symbols that cannot be resolved.
@@ -63,11 +66,11 @@ public:
bool handleMinGWAutomaticImport(Symbol *sym, StringRef name);
// Returns a list of chunks of selected symbols.
- std::vector<Chunk *> getChunks();
+ std::vector<Chunk *> getChunks() const;
// Returns a symbol for a given name. Returns a nullptr if not found.
- Symbol *find(StringRef name);
- Symbol *findUnderscore(StringRef name);
+ Symbol *find(StringRef name) const;
+ Symbol *findUnderscore(StringRef name) const;
// Occasionally we have to resolve an undefined symbol to its
// mangled symbol. This function tries to find a mangled name
@@ -78,7 +81,7 @@ public:
// Build a set of COFF objects representing the combined contents of
// BitcodeFiles and add them to the symbol table. Called after all files are
// added and before the writer writes results to a file.
- void addCombinedLTOObjects();
+ void compileBitcodeFiles();
// Creates an Undefined symbol for a given name.
Symbol *addUndefined(StringRef name);
@@ -131,9 +134,9 @@ private:
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
std::unique_ptr<BitcodeCompiler> lto;
-};
-extern SymbolTable *symtab;
+ COFFLinkerContext &ctx;
+};
std::vector<std::string> getSymbolLocations(ObjFile *file, uint32_t symIndex);
diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h
index 72fd5fc72b01..838db691a822 100644
--- a/lld/COFF/TypeMerger.h
+++ b/lld/COFF/TypeMerger.h
@@ -10,6 +10,8 @@
#define LLD_COFF_TYPEMERGER_H
#include "Config.h"
+#include "DebugTypes.h"
+#include "lld/Common/Timer.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeHashing.h"
#include "llvm/Support/Allocator.h"
@@ -25,7 +27,7 @@ struct GHashState;
class TypeMerger {
public:
- TypeMerger(llvm::BumpPtrAllocator &alloc);
+ TypeMerger(COFFLinkerContext &ctx, llvm::BumpPtrAllocator &alloc);
~TypeMerger();
@@ -59,6 +61,22 @@ public:
// keyed by type index.
SmallVector<uint32_t, 0> tpiCounts;
SmallVector<uint32_t, 0> ipiCounts;
+
+ /// Dependency type sources, such as type servers or PCH object files. These
+ /// must be processed before objects that rely on them. Set by
+ /// sortDependencies.
+ ArrayRef<TpiSource *> dependencySources;
+
+ /// Object file sources. These must be processed after dependencySources.
+ ArrayRef<TpiSource *> objectSources;
+
+ /// Sorts the dependencies and reassigns TpiSource indices.
+ void sortDependencies();
+
+private:
+ void clearGHashes();
+
+ COFFLinkerContext &ctx;
};
} // namespace coff
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 37cbe2bb96a8..600d14034dea 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Writer.h"
+#include "COFFLinkerContext.h"
#include "CallGraphSort.h"
#include "Config.h"
#include "DLL.h"
@@ -80,23 +81,14 @@ static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8");
static const int numberOfDataDirectory = 16;
-// Global vector of all output sections. After output sections are finalized,
-// this can be indexed by Chunk::getOutputSection.
-static std::vector<OutputSection *> outputSections;
-
-OutputSection *Chunk::getOutputSection() const {
- return osidx == 0 ? nullptr : outputSections[osidx - 1];
-}
-
-void OutputSection::clear() { outputSections.clear(); }
-
namespace {
class DebugDirectoryChunk : public NonSectionChunk {
public:
- DebugDirectoryChunk(const std::vector<std::pair<COFF::DebugType, Chunk *>> &r,
+ DebugDirectoryChunk(COFFLinkerContext &c,
+ const std::vector<std::pair<COFF::DebugType, Chunk *>> &r,
bool writeRepro)
- : records(r), writeRepro(writeRepro) {}
+ : records(r), writeRepro(writeRepro), ctx(c) {}
size_t getSize() const override {
return (records.size() + int(writeRepro)) * sizeof(debug_directory);
@@ -107,7 +99,7 @@ public:
for (const std::pair<COFF::DebugType, Chunk *>& record : records) {
Chunk *c = record.second;
- OutputSection *os = c->getOutputSection();
+ OutputSection *os = ctx.getOutputSection(c);
uint64_t offs = os->getFileOff() + (c->getRVA() - os->getRVA());
fillEntry(d, record.first, c->getSize(), c->getRVA(), offs);
++d;
@@ -146,6 +138,8 @@ private:
mutable std::vector<support::ulittle32_t *> timeDateStamps;
const std::vector<std::pair<COFF::DebugType, Chunk *>> &records;
bool writeRepro;
+
+ COFFLinkerContext &ctx;
};
class CVDebugRecordChunk : public NonSectionChunk {
@@ -201,7 +195,7 @@ public:
// The writer writes a SymbolTable result to a file.
class Writer {
public:
- Writer() : buffer(errorHandler().outputBuffer) {}
+ Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {}
void run();
private:
@@ -304,13 +298,12 @@ private:
// files, so we need to keep track of them separately.
Chunk *firstPdata = nullptr;
Chunk *lastPdata;
+
+ COFFLinkerContext &ctx;
};
} // anonymous namespace
-static Timer codeLayoutTimer("Code Layout", Timer::root());
-static Timer diskCommitTimer("Commit Output File", Timer::root());
-
-void lld::coff::writeResult() { Writer().run(); }
+void lld::coff::writeResult(COFFLinkerContext &ctx) { Writer(ctx).run(); }
void OutputSection::addChunk(Chunk *c) {
chunks.push_back(c);
@@ -549,7 +542,7 @@ void Writer::finalizeAddresses() {
return;
size_t origNumChunks = 0;
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
sec->origChunks = sec->chunks;
origNumChunks += sec->chunks.size();
}
@@ -561,7 +554,7 @@ void Writer::finalizeAddresses() {
// adding them turned out ok.
bool rangesOk = true;
size_t numChunks = 0;
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
if (!verifyRanges(sec->chunks)) {
rangesOk = false;
break;
@@ -582,7 +575,7 @@ void Writer::finalizeAddresses() {
// If the previous pass didn't work out, reset everything back to the
// original conditions before retrying with a wider margin. This should
// ideally never happen under real circumstances.
- for (OutputSection *sec : outputSections)
+ for (OutputSection *sec : ctx.outputSections)
sec->chunks = sec->origChunks;
margin *= 2;
}
@@ -590,7 +583,7 @@ void Writer::finalizeAddresses() {
// Try adding thunks everywhere where it is needed, with a margin
// to avoid things going out of range due to the added thunks.
bool addressesChanged = false;
- for (OutputSection *sec : outputSections)
+ for (OutputSection *sec : ctx.outputSections)
addressesChanged |= createThunks(sec, margin);
// If the verification above thought we needed thunks, we should have
// added some.
@@ -607,7 +600,7 @@ void Writer::finalizeAddresses() {
// The main function of the writer.
void Writer::run() {
- ScopedTimer t1(codeLayoutTimer);
+ ScopedTimer t1(ctx.codeLayoutTimer);
createImportTables();
createSections();
@@ -645,17 +638,17 @@ void Writer::run() {
if (!config->pdbPath.empty() && config->debug) {
assert(buildId);
- createPDB(symtab, outputSections, sectionTable, buildId->buildId);
+ createPDB(ctx, sectionTable, buildId->buildId);
}
writeBuildId();
- writeLLDMapFile(outputSections);
- writeMapFile(outputSections);
+ writeLLDMapFile(ctx);
+ writeMapFile(ctx);
if (errorCount())
return;
- ScopedTimer t2(diskCommitTimer);
+ ScopedTimer t2(ctx.outputCommitTimer);
if (auto e = buffer->commit())
fatal("failed to write the output file: " + toString(std::move(e)));
}
@@ -816,7 +809,8 @@ static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) {
void Writer::sortSections() {
if (!config->callGraphProfile.empty()) {
- DenseMap<const SectionChunk *, int> order = computeCallGraphProfileOrder();
+ DenseMap<const SectionChunk *, int> order =
+ computeCallGraphProfileOrder(ctx);
for (auto it : order) {
if (DefinedRegular *sym = it.first->sym)
config->order[sym->getName()] = it.second;
@@ -843,7 +837,7 @@ void Writer::createSections() {
OutputSection *&sec = sections[{name, outChars}];
if (!sec) {
sec = make<OutputSection>(name, outChars);
- outputSections.push_back(sec);
+ ctx.outputSections.push_back(sec);
}
return sec;
};
@@ -864,7 +858,7 @@ void Writer::createSections() {
dtorsSec = createSection(".dtors", data | r | w);
// Then bin chunks by name and output characteristics.
- for (Chunk *c : symtab->getChunks()) {
+ for (Chunk *c : ctx.symtab.getChunks()) {
auto *sc = dyn_cast<SectionChunk>(c);
if (sc && !sc->live) {
if (config->verbose)
@@ -941,14 +935,14 @@ void Writer::createSections() {
return 1;
return 0;
};
- llvm::stable_sort(outputSections,
+ llvm::stable_sort(ctx.outputSections,
[&](const OutputSection *s, const OutputSection *t) {
return sectionOrder(s) < sectionOrder(t);
});
}
void Writer::createMiscChunks() {
- for (MergeChunk *p : MergeChunk::instances) {
+ for (MergeChunk *p : ctx.mergeChunkInstances) {
if (p) {
p->finalizeContents();
rdataSec->addChunk(p);
@@ -956,15 +950,16 @@ void Writer::createMiscChunks() {
}
// Create thunks for locally-dllimported symbols.
- if (!symtab->localImportChunks.empty()) {
- for (Chunk *c : symtab->localImportChunks)
+ if (!ctx.symtab.localImportChunks.empty()) {
+ for (Chunk *c : ctx.symtab.localImportChunks)
rdataSec->addChunk(c);
}
// Create Debug Information Chunks
OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec;
if (config->debug || config->repro || config->cetCompat) {
- debugDirectory = make<DebugDirectoryChunk>(debugRecords, config->repro);
+ debugDirectory =
+ make<DebugDirectoryChunk>(ctx, debugRecords, config->repro);
debugDirectory->setAlignment(4);
debugInfoSec->addChunk(debugDirectory);
}
@@ -1013,7 +1008,7 @@ void Writer::createImportTables() {
// Initialize DLLOrder so that import entries are ordered in
// the same order as in the command line. (That affects DLL
// initialization order, and this ordering is MSVC-compatible.)
- for (ImportFile *file : ImportFile::instances) {
+ for (ImportFile *file : ctx.importFileInstances) {
if (!file->live)
continue;
@@ -1036,10 +1031,10 @@ void Writer::createImportTables() {
}
void Writer::appendImportThunks() {
- if (ImportFile::instances.empty())
+ if (ctx.importFileInstances.empty())
return;
- for (ImportFile *file : ImportFile::instances) {
+ for (ImportFile *file : ctx.importFileInstances) {
if (!file->live)
continue;
@@ -1055,7 +1050,7 @@ void Writer::appendImportThunks() {
if (!delayIdata.empty()) {
Defined *helper = cast<Defined>(config->delayLoadHelper);
- delayIdata.create(helper);
+ delayIdata.create(ctx, helper);
for (Chunk *c : delayIdata.getChunks())
didatSec->addChunk(c);
for (Chunk *c : delayIdata.getDataChunks())
@@ -1095,25 +1090,21 @@ void Writer::removeUnusedSections() {
// later. Only remove sections that have no Chunks at all.
return s->chunks.empty();
};
- outputSections.erase(
- std::remove_if(outputSections.begin(), outputSections.end(), isUnused),
- outputSections.end());
+ llvm::erase_if(ctx.outputSections, isUnused);
}
// The Windows loader doesn't seem to like empty sections,
// so we remove them if any.
void Writer::removeEmptySections() {
auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; };
- outputSections.erase(
- std::remove_if(outputSections.begin(), outputSections.end(), isEmpty),
- outputSections.end());
+ llvm::erase_if(ctx.outputSections, isEmpty);
}
void Writer::assignOutputSectionIndices() {
// Assign final output section indices, and assign each chunk to its output
// section.
uint32_t idx = 1;
- for (OutputSection *os : outputSections) {
+ for (OutputSection *os : ctx.outputSections) {
os->sectionIndex = idx;
for (Chunk *c : os->chunks)
c->setOutputSectionIdx(idx);
@@ -1122,7 +1113,7 @@ void Writer::assignOutputSectionIndices() {
// Merge chunks are containers of chunks, so assign those an output section
// too.
- for (MergeChunk *mc : MergeChunk::instances)
+ for (MergeChunk *mc : ctx.mergeChunkInstances)
if (mc)
for (SectionChunk *sc : mc->sections)
if (sc && sc->live)
@@ -1153,7 +1144,7 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
Chunk *c = def->getChunk();
if (!c)
return None;
- OutputSection *os = c->getOutputSection();
+ OutputSection *os = ctx.getOutputSection(c);
if (!os)
return None;
@@ -1200,7 +1191,7 @@ void Writer::createSymbolAndStringTable() {
// solution where discardable sections have long names preserved and
// non-discardable sections have their names truncated, to ensure that any
// section which is mapped at runtime also has its name mapped at runtime.
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
if (sec->name.size() <= COFF::NameSize)
continue;
if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0)
@@ -1214,7 +1205,7 @@ void Writer::createSymbolAndStringTable() {
}
if (config->debugDwarf || config->debugSymtab) {
- for (ObjFile *file : ObjFile::instances) {
+ for (ObjFile *file : ctx.objFileInstances) {
for (Symbol *b : file->getSymbols()) {
auto *d = dyn_cast_or_null<Defined>(b);
if (!d || d->writtenToSymtab)
@@ -1274,7 +1265,7 @@ void Writer::mergeSections() {
void Writer::assignAddresses() {
sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
sizeof(data_directory) * numberOfDataDirectory +
- sizeof(coff_section) * outputSections.size();
+ sizeof(coff_section) * ctx.outputSections.size();
sizeOfHeaders +=
config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign);
@@ -1283,7 +1274,7 @@ void Writer::assignAddresses() {
// The first page is kept unmapped.
uint64_t rva = alignTo(sizeOfHeaders, config->align);
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
if (sec == relocSec)
addBaserels();
uint64_t rawSize = 0, virtualSize = 0;
@@ -1318,7 +1309,7 @@ void Writer::assignAddresses() {
sizeOfImage = alignTo(rva, config->align);
// Assign addresses to sections in MergeChunks.
- for (MergeChunk *mc : MergeChunk::instances)
+ for (MergeChunk *mc : ctx.mergeChunkInstances)
if (mc)
mc->assignSubsectionRVAs();
}
@@ -1353,7 +1344,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
auto *coff = reinterpret_cast<coff_file_header *>(buf);
buf += sizeof(*coff);
coff->Machine = config->machine;
- coff->NumberOfSections = outputSections.size();
+ coff->NumberOfSections = ctx.outputSections.size();
coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE;
if (config->largeAddressAware)
coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE;
@@ -1466,7 +1457,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA();
dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize();
}
- if (Symbol *sym = symtab->findUnderscore("_tls_used")) {
+ if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) {
if (Defined *b = dyn_cast<Defined>(sym)) {
dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA();
dir[TLS_TABLE].Size = config->is64()
@@ -1478,7 +1469,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA();
dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize();
}
- if (Symbol *sym = symtab->findUnderscore("_load_config_used")) {
+ if (Symbol *sym = ctx.symtab.findUnderscore("_load_config_used")) {
if (auto *b = dyn_cast<DefinedRegular>(sym)) {
SectionChunk *sc = b->getChunk();
assert(b->getRVA() >= sc->getRVA());
@@ -1502,12 +1493,12 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
}
// Write section table
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
sec->writeHeaderTo(buf);
buf += sizeof(coff_section);
}
sectionTable = ArrayRef<uint8_t>(
- buf - outputSections.size() * sizeof(coff_section), buf);
+ buf - ctx.outputSections.size() * sizeof(coff_section), buf);
if (outputSymtab.empty() && strtab.empty())
return;
@@ -1535,7 +1526,7 @@ void Writer::openFile(StringRef path) {
void Writer::createSEHTable() {
SymbolRVASet handlers;
- for (ObjFile *file : ObjFile::instances) {
+ for (ObjFile *file : ctx.objFileInstances) {
if (!file->hasSafeSEH())
error("/safeseh: " + file->getName() + " is not compatible with SEH");
markSymbolsForRVATable(file, file->getSXDataChunks(), handlers);
@@ -1544,7 +1535,7 @@ void Writer::createSEHTable() {
// Set the "no SEH" characteristic if there really were no handlers, or if
// there is no load config object to point to the table of handlers.
setNoSEHCharacteristic =
- handlers.empty() || !symtab->findUnderscore("_load_config_used");
+ handlers.empty() || !ctx.symtab.findUnderscore("_load_config_used");
maybeAddRVATable(std::move(handlers), "__safe_se_handler_table",
"__safe_se_handler_count");
@@ -1642,7 +1633,7 @@ void Writer::createGuardCFTables() {
std::vector<Symbol *> giatsSymbols;
SymbolRVASet longJmpTargets;
SymbolRVASet ehContTargets;
- for (ObjFile *file : ObjFile::instances) {
+ for (ObjFile *file : ctx.objFileInstances) {
// If the object was compiled with /guard:cf, the address taken symbols
// are in .gfids$y sections, the longjmp targets are in .gljmp$y sections,
// and ehcont targets are in .gehcont$y sections. If the object was not
@@ -1708,7 +1699,7 @@ void Writer::createGuardCFTables() {
guardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable);
if (config->guardCF & GuardCFLevel::EHCont)
guardFlags |= uint32_t(coff_guard_flags::HasEHContTable);
- Symbol *flagSym = symtab->findUnderscore("__guard_flags");
+ Symbol *flagSym = ctx.symtab.findUnderscore("__guard_flags");
cast<DefinedAbsolute>(flagSym)->setVA(guardFlags);
}
@@ -1780,8 +1771,8 @@ void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
tableChunk = make<RVATableChunk>(std::move(tableSymbols));
rdataSec->addChunk(tableChunk);
- Symbol *t = symtab->findUnderscore(tableSym);
- Symbol *c = symtab->findUnderscore(countSym);
+ Symbol *t = ctx.symtab.findUnderscore(tableSym);
+ Symbol *c = ctx.symtab.findUnderscore(countSym);
replaceSymbol<DefinedSynthetic>(t, t->getName(), tableChunk);
cast<DefinedAbsolute>(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4));
}
@@ -1793,7 +1784,7 @@ void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
void Writer::createRuntimePseudoRelocs() {
std::vector<RuntimePseudoReloc> rels;
- for (Chunk *c : symtab->getChunks()) {
+ for (Chunk *c : ctx.symtab.getChunks()) {
auto *sc = dyn_cast<SectionChunk>(c);
if (!sc || !sc->live)
continue;
@@ -1816,8 +1807,9 @@ void Writer::createRuntimePseudoRelocs() {
EmptyChunk *endOfList = make<EmptyChunk>();
rdataSec->addChunk(endOfList);
- Symbol *headSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__");
- Symbol *endSym = symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__");
+ Symbol *headSym = ctx.symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__");
+ Symbol *endSym =
+ ctx.symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__");
replaceSymbol<DefinedSynthetic>(headSym, headSym->getName(), table);
replaceSymbol<DefinedSynthetic>(endSym, endSym->getName(), endOfList);
}
@@ -1837,8 +1829,8 @@ void Writer::insertCtorDtorSymbols() {
dtorsSec->insertChunkAtStart(dtorListHead);
dtorsSec->addChunk(dtorListEnd);
- Symbol *ctorListSym = symtab->findUnderscore("__CTOR_LIST__");
- Symbol *dtorListSym = symtab->findUnderscore("__DTOR_LIST__");
+ Symbol *ctorListSym = ctx.symtab.findUnderscore("__CTOR_LIST__");
+ Symbol *dtorListSym = ctx.symtab.findUnderscore("__DTOR_LIST__");
replaceSymbol<DefinedSynthetic>(ctorListSym, ctorListSym->getName(),
ctorListHead);
replaceSymbol<DefinedSynthetic>(dtorListSym, dtorListSym->getName(),
@@ -1851,7 +1843,7 @@ void Writer::setSectionPermissions() {
for (auto &p : config->section) {
StringRef name = p.first;
uint32_t perm = p.second;
- for (OutputSection *sec : outputSections)
+ for (OutputSection *sec : ctx.outputSections)
if (sec->name == name)
sec->setPermissions(perm);
}
@@ -1861,10 +1853,10 @@ void Writer::setSectionPermissions() {
void Writer::writeSections() {
// Record the number of sections to apply section index relocations
// against absolute symbols. See applySecIdx in Chunks.cpp..
- DefinedAbsolute::numOutputSections = outputSections.size();
+ DefinedAbsolute::numOutputSections = ctx.outputSections.size();
uint8_t *buf = buffer->getBufferStart();
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
uint8_t *secBuf = buf + sec->getFileOff();
// Fill gaps between functions in .text with INT3 instructions
// instead of leaving as NUL bytes (which can be interpreted as
@@ -1934,7 +1926,7 @@ void Writer::sortExceptionTable() {
return;
// We assume .pdata contains function table entries only.
auto bufAddr = [&](Chunk *c) {
- OutputSection *os = c->getOutputSection();
+ OutputSection *os = ctx.getOutputSection(c);
return buffer->getBufferStart() + os->getFileOff() + c->getRVA() -
os->getRVA();
};
@@ -2002,7 +1994,7 @@ void Writer::sortCRTSectionChunks(std::vector<Chunk *> &chunks) {
}
OutputSection *Writer::findSection(StringRef name) {
- for (OutputSection *sec : outputSections)
+ for (OutputSection *sec : ctx.outputSections)
if (sec->name == name)
return sec;
return nullptr;
@@ -2010,7 +2002,7 @@ OutputSection *Writer::findSection(StringRef name) {
uint32_t Writer::getSizeOfInitializedData() {
uint32_t res = 0;
- for (OutputSection *s : outputSections)
+ for (OutputSection *s : ctx.outputSections)
if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
res += s->getRawSize();
return res;
@@ -2022,7 +2014,7 @@ void Writer::addBaserels() {
return;
relocSec->chunks.clear();
std::vector<Baserel> v;
- for (OutputSection *sec : outputSections) {
+ for (OutputSection *sec : ctx.outputSections) {
if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
continue;
// Collect all locations for base relocations.
@@ -2071,11 +2063,11 @@ PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) {
void Writer::fixTlsAlignment() {
Defined *tlsSym =
- dyn_cast_or_null<Defined>(symtab->findUnderscore("_tls_used"));
+ dyn_cast_or_null<Defined>(ctx.symtab.findUnderscore("_tls_used"));
if (!tlsSym)
return;
- OutputSection *sec = tlsSym->getChunk()->getOutputSection();
+ OutputSection *sec = ctx.getOutputSection(tlsSym->getChunk());
assert(sec && tlsSym->getRVA() >= sec->getRVA() &&
"no output section for _tls_used");
diff --git a/lld/COFF/Writer.h b/lld/COFF/Writer.h
index 2bb26da7d428..d2b3b4b81d3c 100644
--- a/lld/COFF/Writer.h
+++ b/lld/COFF/Writer.h
@@ -19,8 +19,9 @@
namespace lld {
namespace coff {
static const int pageSize = 4096;
+class COFFLinkerContext;
-void writeResult();
+void writeResult(COFFLinkerContext &ctx);
class PartialSection {
public:
@@ -50,9 +51,6 @@ public:
void writeHeaderTo(uint8_t *buf);
void addContributingPartialSection(PartialSection *sec);
- // Clear the output sections static container.
- static void clear();
-
// Returns the size of this section in an executable memory image.
// This may be smaller than the raw size (the raw size is multiple
// of disk sector size, so there may be padding at end), or may be
diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp
index 269a0f62ec65..399b6cac7547 100644
--- a/lld/Common/ErrorHandler.cpp
+++ b/lld/Common/ErrorHandler.cpp
@@ -168,19 +168,36 @@ std::string ErrorHandler::getLocation(const Twine &msg) {
return std::string(logName);
}
+void ErrorHandler::reportDiagnostic(StringRef location, Colors c,
+ StringRef diagKind, const Twine &msg) {
+ SmallString<256> buf;
+ raw_svector_ostream os(buf);
+ os << sep << location << ": ";
+ if (!diagKind.empty()) {
+ if (lld::errs().colors_enabled()) {
+ os.enable_colors(true);
+ os << c << diagKind << ": " << Colors::RESET;
+ } else {
+ os << diagKind << ": ";
+ }
+ }
+ os << msg << '\n';
+ lld::errs() << buf;
+}
+
void ErrorHandler::log(const Twine &msg) {
if (!verbose || disableOutput)
return;
std::lock_guard<std::mutex> lock(mu);
- lld::errs() << logName << ": " << msg << "\n";
+ reportDiagnostic(logName, Colors::RESET, "", msg);
}
-void ErrorHandler::message(const Twine &msg) {
+void ErrorHandler::message(const Twine &msg, llvm::raw_ostream &s) {
if (disableOutput)
return;
std::lock_guard<std::mutex> lock(mu);
- lld::outs() << msg << "\n";
- lld::outs().flush();
+ s << msg << "\n";
+ s.flush();
}
void ErrorHandler::warn(const Twine &msg) {
@@ -190,8 +207,7 @@ void ErrorHandler::warn(const Twine &msg) {
}
std::lock_guard<std::mutex> lock(mu);
- lld::errs() << sep << getLocation(msg) << ": " << Colors::MAGENTA
- << "warning: " << Colors::RESET << msg << "\n";
+ reportDiagnostic(getLocation(msg), Colors::MAGENTA, "warning", msg);
sep = getSeparator(msg);
}
@@ -217,12 +233,9 @@ void ErrorHandler::error(const Twine &msg) {
std::lock_guard<std::mutex> lock(mu);
if (errorLimit == 0 || errorCount < errorLimit) {
- lld::errs() << sep << getLocation(msg) << ": " << Colors::RED
- << "error: " << Colors::RESET << msg << "\n";
+ reportDiagnostic(getLocation(msg), Colors::RED, "error", msg);
} else if (errorCount == errorLimit) {
- lld::errs() << sep << getLocation(msg) << ": " << Colors::RED
- << "error: " << Colors::RESET << errorLimitExceededMsg
- << "\n";
+ reportDiagnostic(logName, Colors::RED, "error", errorLimitExceededMsg);
exit = exitEarly;
}
diff --git a/lld/Common/Timer.cpp b/lld/Common/Timer.cpp
index 16c518e4bf84..40fecd8892c1 100644
--- a/lld/Common/Timer.cpp
+++ b/lld/Common/Timer.cpp
@@ -26,18 +26,14 @@ void ScopedTimer::stop() {
ScopedTimer::~ScopedTimer() { stop(); }
-Timer::Timer(llvm::StringRef name) : name(std::string(name)) {}
-Timer::Timer(llvm::StringRef name, Timer &parent) : name(std::string(name)) {
+Timer::Timer(llvm::StringRef name) : total(0), name(std::string(name)) {}
+Timer::Timer(llvm::StringRef name, Timer &parent)
+ : total(0), name(std::string(name)) {
parent.children.push_back(this);
}
-Timer &Timer::root() {
- static Timer rootTimer("Total Link Time");
- return rootTimer;
-}
-
void Timer::print() {
- double totalDuration = static_cast<double>(root().millis());
+ double totalDuration = static_cast<double>(millis());
// We want to print the grand total under all the intermediate phases, so we
// print all children first, then print the total under that.
@@ -47,7 +43,7 @@ void Timer::print() {
message(std::string(50, '-'));
- root().print(0, root().millis(), false);
+ print(0, millis(), false);
}
double Timer::millis() const {
diff --git a/lld/Common/Version.cpp b/lld/Common/Version.cpp
index f3768091cd0e..ec6eda6a6748 100644
--- a/lld/Common/Version.cpp
+++ b/lld/Common/Version.cpp
@@ -15,18 +15,14 @@
#include "VCSVersion.inc"
// Returns a version string, e.g.:
-// lld 9.0.0 (https://github.com/llvm/llvm-project.git 9efdd7ac5e914d3c9fa1ef)
+// LLD 14.0.0 (https://github.com/llvm/llvm-project.git
+// 2d9759c7902c5cbc9a7e3ab623321d5578d51687)
std::string lld::getLLDVersion() {
#ifdef LLD_VENDOR
#define LLD_VENDOR_DISPLAY LLD_VENDOR " "
#else
#define LLD_VENDOR_DISPLAY
#endif
-#if defined(LLD_REPOSITORY) && defined(LLD_REVISION)
- return LLD_VENDOR_DISPLAY "LLD " LLD_VERSION_STRING " (" LLD_REPOSITORY
- " " LLD_REVISION ")";
-#else
return LLD_VENDOR_DISPLAY "LLD " LLD_VERSION_STRING;
-#endif
#undef LLD_VENDOR_DISPLAY
}
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index c1ab0e97efe2..b57fd61b65cc 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -62,7 +62,6 @@ AArch64::AArch64() {
relativeRel = R_AARCH64_RELATIVE;
iRelativeRel = R_AARCH64_IRELATIVE;
gotRel = R_AARCH64_GLOB_DAT;
- noneRel = R_AARCH64_NONE;
pltRel = R_AARCH64_JUMP_SLOT;
symbolicRel = R_AARCH64_ABS64;
tlsDescRel = R_AARCH64_TLSDESC;
@@ -71,7 +70,6 @@ AArch64::AArch64() {
pltEntrySize = 16;
ipltEntrySize = 16;
defaultMaxPageSize = 65536;
- gotBaseSymInGotPlt = false;
// Align to the 2 MiB page size (known as a superpage or huge page).
// FreeBSD automatically promotes 2 MiB-aligned allocations.
@@ -614,8 +612,7 @@ public:
uint64_t pltEntryAddr) const override;
private:
- bool btiHeader; // bti instruction needed in PLT Header
- bool btiEntry; // bti instruction needed in PLT Entry
+ bool btiHeader; // bti instruction needed in PLT Header and Entry
bool pacEntry; // autia1716 instruction needed in PLT Entry
};
} // namespace
@@ -626,15 +623,14 @@ AArch64BtiPac::AArch64BtiPac() {
// address of the PLT entry can be taken by the program, which permits an
// indirect jump to the PLT entry. This can happen when the address
// of the PLT entry for a function is canonicalised due to the address of
- // the function in an executable being taken by a shared library.
- // FIXME: There is a potential optimization to omit the BTI if we detect
- // that the address of the PLT entry isn't taken.
+ // the function in an executable being taken by a shared library, or
+ // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
+ // relocations.
// The PAC PLT entries require dynamic loader support and this isn't known
// from properties in the objects, so we use the command line flag.
- btiEntry = btiHeader && !config->shared;
pacEntry = config->zPacPlt;
- if (btiEntry || pacEntry) {
+ if (btiHeader || pacEntry) {
pltEntrySize = 24;
ipltEntrySize = 24;
}
@@ -694,7 +690,12 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
};
const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
- if (btiEntry) {
+ // needsPltAddr indicates a non-ifunc canonical PLT entry whose address may
+ // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
+ // address may escape if referenced by a direct relocation. The condition is
+ // conservative.
+ bool hasBti = btiHeader && (sym.needsPltAddr || sym.isInIplt);
+ if (hasBti) {
memcpy(buf, btiData, sizeof(btiData));
buf += sizeof(btiData);
pltEntryAddr += sizeof(btiData);
@@ -711,7 +712,7 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr));
else
memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr));
- if (!btiEntry)
+ if (!hasBti)
// We didn't add the BTI c instruction so round out size with NOP.
memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData));
}
diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index 466ad81922d0..cd7e7830d374 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -40,7 +40,6 @@ public:
AMDGPU::AMDGPU() {
relativeRel = R_AMDGPU_RELATIVE64;
gotRel = R_AMDGPU_ABS64;
- noneRel = R_AMDGPU_NONE;
symbolicRel = R_AMDGPU_ABS64;
}
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index d909a3234c10..f2e4a2a14ad6 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -52,13 +52,11 @@ ARM::ARM() {
relativeRel = R_ARM_RELATIVE;
iRelativeRel = R_ARM_IRELATIVE;
gotRel = R_ARM_GLOB_DAT;
- noneRel = R_ARM_NONE;
pltRel = R_ARM_JUMP_SLOT;
symbolicRel = R_ARM_ABS32;
tlsGotRel = R_ARM_TLS_TPOFF32;
tlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
tlsOffsetRel = R_ARM_TLS_DTPOFF32;
- gotBaseSymInGotPlt = false;
pltHeaderSize = 32;
pltEntrySize = 16;
ipltEntrySize = 16;
@@ -86,6 +84,13 @@ uint32_t ARM::calcEFlags() const {
RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
switch (type) {
+ case R_ARM_ABS32:
+ case R_ARM_MOVW_ABS_NC:
+ case R_ARM_MOVT_ABS:
+ case R_ARM_THM_MOVW_ABS_NC:
+ case R_ARM_THM_MOVT_ABS:
+ return R_ABS;
+ case R_ARM_THM_JUMP8:
case R_ARM_THM_JUMP11:
return R_PC;
case R_ARM_CALL:
@@ -158,7 +163,9 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
// not ARMv4 output, we can just ignore it.
return R_NONE;
default:
- return R_ABS;
+ error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
+ ") against symbol " + toString(s));
+ return R_NONE;
}
}
@@ -382,20 +389,25 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
// or Thumb.
static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
assert(!s.isFunc());
+ const ErrorPlace place = getErrorPlace(loc);
+ std::string hint;
+ if (!place.srcLoc.empty())
+ hint = "; " + place.srcLoc;
if (s.isSection()) {
// Section symbols must be defined and in a section. Users cannot change
// the type. Use the section name as getName() returns an empty string.
- warn(getErrorLocation(loc) + "branch and link relocation: " +
- toString(relt) + " to STT_SECTION symbol " +
- cast<Defined>(s).section->name + " ; interworking not performed");
+ warn(place.loc + "branch and link relocation: " + toString(relt) +
+ " to STT_SECTION symbol " + cast<Defined>(s).section->name +
+ " ; interworking not performed" + hint);
} else {
// Warn with hint on how to alter the symbol type.
warn(getErrorLocation(loc) + "branch and link relocation: " +
toString(relt) + " to non STT_FUNC symbol: " + s.getName() +
" interworking not performed; consider using directive '.type " +
s.getName() +
- ", %function' to give symbol type STT_FUNC if"
- " interworking between ARM and Thumb is required");
+ ", %function' to give symbol type STT_FUNC if interworking between "
+ "ARM and Thumb is required" +
+ hint);
}
}
@@ -509,7 +521,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
checkInt(loc, val, 26, rel);
write32le(loc, (read32le(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff));
break;
+ case R_ARM_THM_JUMP8:
+ // We do a 9 bit check because val is right-shifted by 1 bit.
+ checkInt(loc, val, 9, rel);
+ write16le(loc, (read32le(loc) & 0xff00) | ((val >> 1) & 0x00ff));
+ break;
case R_ARM_THM_JUMP11:
+ // We do a 12 bit check because val is right-shifted by 1 bit.
checkInt(loc, val, 12, rel);
write16le(loc, (read32le(loc) & 0xf800) | ((val >> 1) & 0x07ff));
break;
@@ -699,8 +717,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
break;
}
default:
- error(getErrorLocation(loc) + "unrecognized relocation " +
- toString(rel.type));
+ llvm_unreachable("unknown relocation");
}
}
@@ -738,6 +755,8 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_ARM_PC24:
case R_ARM_PLT32:
return SignExtend64<26>(read32le(buf) << 2);
+ case R_ARM_THM_JUMP8:
+ return SignExtend64<9>(read16le(buf) << 1);
case R_ARM_THM_JUMP11:
return SignExtend64<12>(read16le(buf) << 1);
case R_ARM_THM_JUMP19: {
@@ -838,6 +857,7 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
return u ? imm12 : -imm12;
}
case R_ARM_NONE:
+ case R_ARM_V4BX:
case R_ARM_JUMP_SLOT:
// These relocations are defined as not having an implicit addend.
return 0;
diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp
index d0d247225708..a3051dad9e8b 100644
--- a/lld/ELF/Arch/AVR.cpp
+++ b/lld/ELF/Arch/AVR.cpp
@@ -42,7 +42,6 @@ using namespace lld::elf;
namespace {
class AVR final : public TargetInfo {
public:
- AVR();
uint32_t calcEFlags() const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const override;
@@ -51,8 +50,6 @@ public:
};
} // namespace
-AVR::AVR() { noneRel = R_AVR_NONE; }
-
RelExpr AVR::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
switch (type) {
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 02d872d58caf..300ca675519f 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -44,6 +44,7 @@ Hexagon::Hexagon() {
gotRel = R_HEX_GLOB_DAT;
symbolicRel = R_HEX_32;
+ gotBaseSymInGotPlt = true;
// The zero'th GOT entry is reserved for the address of _DYNAMIC. The
// next 3 are reserved for the dynamic loader.
gotPltHeaderEntriesNum = 4;
@@ -53,7 +54,6 @@ Hexagon::Hexagon() {
// Hexagon Linux uses 64K pages by default.
defaultMaxPageSize = 0x10000;
- noneRel = R_HEX_NONE;
tlsGotRel = R_HEX_TPREL_32;
tlsModuleIndexRel = R_HEX_DTPMOD_32;
tlsOffsetRel = R_HEX_DTPREL_32;
@@ -162,6 +162,28 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s,
}
}
+// There are (arguably too) many relocation masks for the DSP's
+// R_HEX_6_X type. The table below is used to select the correct mask
+// for the given instruction.
+struct InstructionMask {
+ uint32_t cmpMask;
+ uint32_t relocMask;
+};
+static const InstructionMask r6[] = {
+ {0x38000000, 0x0000201f}, {0x39000000, 0x0000201f},
+ {0x3e000000, 0x00001f80}, {0x3f000000, 0x00001f80},
+ {0x40000000, 0x000020f8}, {0x41000000, 0x000007e0},
+ {0x42000000, 0x000020f8}, {0x43000000, 0x000007e0},
+ {0x44000000, 0x000020f8}, {0x45000000, 0x000007e0},
+ {0x46000000, 0x000020f8}, {0x47000000, 0x000007e0},
+ {0x6a000000, 0x00001f80}, {0x7c000000, 0x001f2000},
+ {0x9a000000, 0x00000f60}, {0x9b000000, 0x00000f60},
+ {0x9c000000, 0x00000f60}, {0x9d000000, 0x00000f60},
+ {0x9f000000, 0x001f0100}, {0xab000000, 0x0000003f},
+ {0xad000000, 0x0000003f}, {0xaf000000, 0x00030078},
+ {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
+ {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
+
static bool isDuplex(uint32_t insn) {
// Duplex forms have a fixed mask and parse bits 15:14 are always
// zero. Non-duplex insns will always have at least one bit set in the
@@ -170,29 +192,6 @@ static bool isDuplex(uint32_t insn) {
}
static uint32_t findMaskR6(uint32_t insn) {
- // There are (arguably too) many relocation masks for the DSP's
- // R_HEX_6_X type. The table below is used to select the correct mask
- // for the given instruction.
- struct InstructionMask {
- uint32_t cmpMask;
- uint32_t relocMask;
- };
-
- static const InstructionMask r6[] = {
- {0x38000000, 0x0000201f}, {0x39000000, 0x0000201f},
- {0x3e000000, 0x00001f80}, {0x3f000000, 0x00001f80},
- {0x40000000, 0x000020f8}, {0x41000000, 0x000007e0},
- {0x42000000, 0x000020f8}, {0x43000000, 0x000007e0},
- {0x44000000, 0x000020f8}, {0x45000000, 0x000007e0},
- {0x46000000, 0x000020f8}, {0x47000000, 0x000007e0},
- {0x6a000000, 0x00001f80}, {0x7c000000, 0x001f2000},
- {0x9a000000, 0x00000f60}, {0x9b000000, 0x00000f60},
- {0x9c000000, 0x00000f60}, {0x9d000000, 0x00000f60},
- {0x9f000000, 0x001f0100}, {0xab000000, 0x0000003f},
- {0xad000000, 0x0000003f}, {0xaf000000, 0x00030078},
- {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
- {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
-
if (isDuplex(insn))
return 0x03f00000;
@@ -200,7 +199,7 @@ static uint32_t findMaskR6(uint32_t insn) {
if ((0xff000000 & insn) == i.cmpMask)
return i.relocMask;
- error("unrecognized instruction for R_HEX_6 relocation: 0x" +
+ error("unrecognized instruction for 6_X relocation: 0x" +
utohexstr(insn));
return 0;
}
@@ -232,7 +231,11 @@ static uint32_t findMaskR16(uint32_t insn) {
if (isDuplex(insn))
return 0x03f00000;
- error("unrecognized instruction for R_HEX_16_X relocation: 0x" +
+ for (InstructionMask i : r6)
+ if ((0xff000000 & insn) == i.cmpMask)
+ return i.relocMask;
+
+ error("unrecognized instruction for 16_X type: 0x" +
utohexstr(insn));
return 0;
}
diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp
index a233a01d5bba..8ab076cdeb19 100644
--- a/lld/ELF/Arch/Mips.cpp
+++ b/lld/ELF/Arch/Mips.cpp
@@ -46,11 +46,9 @@ public:
template <class ELFT> MIPS<ELFT>::MIPS() {
gotPltHeaderEntriesNum = 2;
defaultMaxPageSize = 65536;
- gotBaseSymInGotPlt = false;
pltEntrySize = 16;
pltHeaderSize = 32;
copyRel = R_MIPS_COPY;
- noneRel = R_MIPS_NONE;
pltRel = R_MIPS_JUMP_SLOT;
needsThunks = true;
@@ -133,13 +131,14 @@ RelExpr MIPS<ELFT>::getRelExpr(RelType type, const Symbol &s,
case R_MIPS_64:
case R_MIPS_GOT_OFST:
case R_MIPS_SUB:
+ return R_ABS;
case R_MIPS_TLS_DTPREL_HI16:
case R_MIPS_TLS_DTPREL_LO16:
case R_MIPS_TLS_DTPREL32:
case R_MIPS_TLS_DTPREL64:
case R_MICROMIPS_TLS_DTPREL_HI16:
case R_MICROMIPS_TLS_DTPREL_LO16:
- return R_ABS;
+ return R_DTPREL;
case R_MIPS_TLS_TPREL_HI16:
case R_MIPS_TLS_TPREL_LO16:
case R_MIPS_TLS_TPREL32:
diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index aaecef6ee94f..0dda9a40eef7 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -151,12 +151,10 @@ void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
PPC::PPC() {
copyRel = R_PPC_COPY;
gotRel = R_PPC_GLOB_DAT;
- noneRel = R_PPC_NONE;
pltRel = R_PPC_JMP_SLOT;
relativeRel = R_PPC_RELATIVE;
iRelativeRel = R_PPC_IRELATIVE;
symbolicRel = R_PPC_ADDR32;
- gotBaseSymInGotPlt = false;
gotHeaderEntriesNum = 3;
gotPltHeaderEntriesNum = 0;
pltHeaderSize = 0;
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index a0c2d1617caa..d5e73ab9ec97 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -187,11 +187,6 @@ unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) {
return 0;
}
-bool elf::isPPC64SmallCodeModelTocReloc(RelType type) {
- // The only small code model relocations that access the .toc section.
- return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS;
-}
-
void elf::writePrefixedInstruction(uint8_t *loc, uint64_t insn) {
insn = config->isLE ? insn << 32 | insn >> 32 : insn;
write64(loc, insn);
@@ -279,9 +274,6 @@ void elf::addPPC64SaveRestore() {
template <typename ELFT>
static std::pair<Defined *, int64_t>
getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {
- if (tocSec->numRelocations == 0)
- return {};
-
// .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by
// r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the
// relocation index in most cases.
@@ -291,7 +283,10 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {
// points to a relocation with larger r_offset. Do a linear probe then.
// Constants are extremely uncommon in .toc and the extra number of array
// accesses can be seen as a small constant.
- ArrayRef<typename ELFT::Rela> relas = tocSec->template relas<ELFT>();
+ ArrayRef<typename ELFT::Rela> relas =
+ tocSec->template relsOrRelas<ELFT>().relas;
+ if (relas.empty())
+ return {};
uint64_t index = std::min<uint64_t>(offset / 8, relas.size() - 1);
for (;;) {
if (relas[index].r_offset == offset) {
@@ -568,7 +563,6 @@ static uint64_t readPrefixedInstruction(const uint8_t *loc) {
PPC64::PPC64() {
copyRel = R_PPC64_COPY;
gotRel = R_PPC64_GLOB_DAT;
- noneRel = R_PPC64_NONE;
pltRel = R_PPC64_JMP_SLOT;
relativeRel = R_PPC64_RELATIVE;
iRelativeRel = R_PPC64_IRELATIVE;
@@ -576,7 +570,6 @@ PPC64::PPC64() {
pltHeaderSize = 60;
pltEntrySize = 4;
ipltEntrySize = 16; // PPC64PltCallStub::size
- gotBaseSymInGotPlt = false;
gotHeaderEntriesNum = 1;
gotPltHeaderEntriesNum = 2;
needsThunks = true;
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index dad23fff91c1..5ee9e4185f1a 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -75,7 +75,6 @@ static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) {
RISCV::RISCV() {
copyRel = R_RISCV_COPY;
- noneRel = R_RISCV_NONE;
pltRel = R_RISCV_JUMP_SLOT;
relativeRel = R_RISCV_RELATIVE;
iRelativeRel = R_RISCV_IRELATIVE;
@@ -93,7 +92,6 @@ RISCV::RISCV() {
gotRel = symbolicRel;
// .got[0] = _DYNAMIC
- gotBaseSymInGotPlt = false;
gotHeaderEntriesNum = 1;
// .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp
index 9e18ae4753bc..1aebb57951cb 100644
--- a/lld/ELF/Arch/SPARCV9.cpp
+++ b/lld/ELF/Arch/SPARCV9.cpp
@@ -35,7 +35,6 @@ public:
SPARCV9::SPARCV9() {
copyRel = R_SPARC_COPY;
gotRel = R_SPARC_GLOB_DAT;
- noneRel = R_SPARC_NONE;
pltRel = R_SPARC_JMP_SLOT;
relativeRel = R_SPARC_RELATIVE;
symbolicRel = R_SPARC_64;
diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index df769f0a1c8b..5d34b769e80e 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -52,14 +52,15 @@ public:
X86::X86() {
copyRel = R_386_COPY;
gotRel = R_386_GLOB_DAT;
- noneRel = R_386_NONE;
pltRel = R_386_JUMP_SLOT;
iRelativeRel = R_386_IRELATIVE;
relativeRel = R_386_RELATIVE;
symbolicRel = R_386_32;
+ tlsDescRel = R_386_TLS_DESC;
tlsGotRel = R_386_TLS_TPOFF;
tlsModuleIndexRel = R_386_TLS_DTPMOD32;
tlsOffsetRel = R_386_TLS_DTPOFF32;
+ gotBaseSymInGotPlt = true;
pltHeaderSize = 16;
pltEntrySize = 16;
ipltEntrySize = 16;
@@ -71,7 +72,8 @@ X86::X86() {
}
int X86::getTlsGdRelaxSkip(RelType type) const {
- return 2;
+ // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
+ return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
}
RelExpr X86::getRelExpr(RelType type, const Symbol &s,
@@ -143,6 +145,10 @@ RelExpr X86::getRelExpr(RelType type, const Symbol &s,
// the byte, we can determine whether the instruction uses the operand as an
// absolute address (R_GOT) or a register-relative address (R_GOTPLT).
return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
+ case R_386_TLS_GOTDESC:
+ return R_TLSDESC_GOTPLT;
+ case R_386_TLS_DESC_CALL:
+ return R_TLSDESC_CALL;
case R_386_TLS_GOTIE:
return R_GOTPLT;
case R_386_GOTOFF:
@@ -167,7 +173,8 @@ RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
case R_RELAX_TLS_GD_TO_IE:
return R_RELAX_TLS_GD_TO_IE_GOTPLT;
case R_RELAX_TLS_GD_TO_LE:
- return R_RELAX_TLS_GD_TO_LE_NEG;
+ return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
+ : R_RELAX_TLS_GD_TO_LE;
}
}
@@ -259,6 +266,8 @@ int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_386_PC32:
case R_386_PLT32:
case R_386_RELATIVE:
+ case R_386_TLS_GOTDESC:
+ case R_386_TLS_DESC_CALL:
case R_386_TLS_DTPMOD32:
case R_386_TLS_DTPOFF32:
case R_386_TLS_LDO_32:
@@ -273,6 +282,8 @@ int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_386_TLS_TPOFF:
case R_386_TLS_TPOFF32:
return SignExtend64<32>(read32le(buf));
+ case R_386_TLS_DESC:
+ return SignExtend64<32>(read32le(buf + 4));
case R_386_NONE:
case R_386_JUMP_SLOT:
// These relocations are defined as not having an implicit addend.
@@ -323,6 +334,8 @@ void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_386_PC32:
case R_386_PLT32:
case R_386_RELATIVE:
+ case R_386_TLS_GOTDESC:
+ case R_386_TLS_DESC_CALL:
case R_386_TLS_DTPMOD32:
case R_386_TLS_DTPOFF32:
case R_386_TLS_GD:
@@ -337,39 +350,79 @@ void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
checkInt(loc, val, 32, rel);
write32le(loc, val);
break;
+ case R_386_TLS_DESC:
+ // The addend is stored in the second 32-bit word.
+ write32le(loc + 4, val);
+ break;
default:
llvm_unreachable("unknown relocation");
}
}
-void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const {
- // Convert
- // leal x@tlsgd(, %ebx, 1),
- // call __tls_get_addr@plt
- // to
- // movl %gs:0,%eax
- // subl $x@ntpoff,%eax
- const uint8_t inst[] = {
- 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
- 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
- };
- memcpy(loc - 3, inst, sizeof(inst));
- write32le(loc + 5, val);
+void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const {
+ if (rel.type == R_386_TLS_GD) {
+ // Convert
+ // leal x@tlsgd(, %ebx, 1), %eax
+ // call __tls_get_addr@plt
+ // to
+ // movl %gs:0, %eax
+ // subl $x@tpoff, %eax
+ const uint8_t inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+ 0x81, 0xe8, 0, 0, 0, 0, // subl val(%ebx), %eax
+ };
+ memcpy(loc - 3, inst, sizeof(inst));
+ write32le(loc + 5, val);
+ } else if (rel.type == R_386_TLS_GOTDESC) {
+ // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
+ //
+ // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
+ if (memcmp(loc - 2, "\x8d\x83", 2)) {
+ error(getErrorLocation(loc - 2) +
+ "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
+ return;
+ }
+ loc[-1] = 0x05;
+ write32le(loc, val);
+ } else {
+ // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
+ assert(rel.type == R_386_TLS_DESC_CALL);
+ loc[0] = 0x66;
+ loc[1] = 0x90;
+ }
}
-void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const {
- // Convert
- // leal x@tlsgd(, %ebx, 1),
- // call __tls_get_addr@plt
- // to
- // movl %gs:0, %eax
- // addl x@gotntpoff(%ebx), %eax
- const uint8_t inst[] = {
- 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
- 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
- };
- memcpy(loc - 3, inst, sizeof(inst));
- write32le(loc + 5, val);
+void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const {
+ if (rel.type == R_386_TLS_GD) {
+ // Convert
+ // leal x@tlsgd(, %ebx, 1), %eax
+ // call __tls_get_addr@plt
+ // to
+ // movl %gs:0, %eax
+ // addl x@gotntpoff(%ebx), %eax
+ const uint8_t inst[] = {
+ 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
+ 0x03, 0x83, 0, 0, 0, 0, // addl val(%ebx), %eax
+ };
+ memcpy(loc - 3, inst, sizeof(inst));
+ write32le(loc + 5, val);
+ } else if (rel.type == R_386_TLS_GOTDESC) {
+ // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
+ if (memcmp(loc - 2, "\x8d\x83", 2)) {
+ error(getErrorLocation(loc - 2) +
+ "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
+ return;
+ }
+ loc[-2] = 0x8b;
+ write32le(loc, val);
+ } else {
+ // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
+ assert(rel.type == R_386_TLS_DESC_CALL);
+ loc[0] = 0x66;
+ loc[1] = 0x90;
+ }
}
// In some conditions, relocations can be optimized to avoid using GOT.
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 514ddc5ec8b0..40436752399b 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -78,7 +78,6 @@ static const std::vector<std::vector<uint8_t>> nopInstructions = {
X86_64::X86_64() {
copyRel = R_X86_64_COPY;
gotRel = R_X86_64_GLOB_DAT;
- noneRel = R_X86_64_NONE;
pltRel = R_X86_64_JUMP_SLOT;
relativeRel = R_X86_64_RELATIVE;
iRelativeRel = R_X86_64_IRELATIVE;
@@ -87,6 +86,7 @@ X86_64::X86_64() {
tlsGotRel = R_X86_64_TPOFF64;
tlsModuleIndexRel = R_X86_64_DTPMOD64;
tlsOffsetRel = R_X86_64_DTPOFF64;
+ gotBaseSymInGotPlt = true;
gotEntrySize = 8;
pltHeaderSize = 16;
pltEntrySize = 16;
@@ -356,6 +356,8 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
return R_GOT_PC;
case R_X86_64_GOTOFF64:
return R_GOTPLTREL;
+ case R_X86_64_PLTOFF64:
+ return R_PLT_GOTPLT;
case R_X86_64_GOTPC32:
case R_X86_64_GOTPC64:
return R_GOTPLTONLY_PC;
@@ -718,9 +720,12 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_X86_64_GOT64:
case R_X86_64_GOTOFF64:
case R_X86_64_GOTPC64:
+ case R_X86_64_PLTOFF64:
case R_X86_64_IRELATIVE:
case R_X86_64_RELATIVE:
return read64le(buf);
+ case R_X86_64_TLSDESC:
+ return read64le(buf + 8);
case R_X86_64_JUMP_SLOT:
case R_X86_64_NONE:
// These relocations are defined as not having an implicit addend.
@@ -779,8 +784,13 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
case R_X86_64_GOT64:
case R_X86_64_GOTOFF64:
case R_X86_64_GOTPC64:
+ case R_X86_64_PLTOFF64:
write64le(loc, val);
break;
+ case R_X86_64_TLSDESC:
+ // The addend is stored in the second 64-bit word.
+ write64le(loc + 8, val);
+ break;
default:
llvm_unreachable("unknown relocation");
}
@@ -792,8 +802,8 @@ RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend,
// with addend != -4. Such an instruction does not load the full GOT entry, so
// we cannot relax the relocation. E.g. movl x@GOTPCREL+4(%rip), %rax
// (addend=0) loads the high 32 bits of the GOT entry.
- if ((type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX) ||
- addend != -4)
+ if (!config->relax || addend != -4 ||
+ (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX))
return R_GOT_PC;
const uint8_t op = loc[-2];
const uint8_t modRm = loc[-1];
diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp
index 15da4d2414ab..aa00d6eadbf9 100644
--- a/lld/ELF/CallGraphSort.cpp
+++ b/lld/ELF/CallGraphSort.cpp
@@ -259,7 +259,7 @@ DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
return orderMap;
}
-// Sort sections by the profile data provided by -callgraph-profile-file
+// Sort sections by the profile data provided by --callgraph-profile-file.
//
// This first builds a call graph based on the profile data then merges sections
// according to the C³ heuristic. All clusters are then sorted by a density
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 9144347045b9..79c4fe06d7b2 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -38,6 +38,10 @@ enum ELFKind {
ELF64BEKind
};
+// For -Bno-symbolic, -Bsymbolic-non-weak-functions, -Bsymbolic-functions,
+// -Bsymbolic.
+enum class BsymbolicKind { None, NonWeakFunctions, Functions, All };
+
// For --build-id.
enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid };
@@ -82,7 +86,8 @@ struct SymbolVersion {
struct VersionDefinition {
llvm::StringRef name;
uint16_t id;
- std::vector<SymbolVersion> patterns;
+ std::vector<SymbolVersion> nonLocalPatterns;
+ std::vector<SymbolVersion> localPatterns;
};
// This struct contains the global configuration for the linker.
@@ -122,6 +127,7 @@ struct Configuration {
llvm::StringRef sysroot;
llvm::StringRef thinLTOCacheDir;
llvm::StringRef thinLTOIndexOnlyArg;
+ llvm::StringRef whyExtract;
llvm::StringRef ltoBasicBlockSections;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOObjectSuffixReplace;
std::pair<llvm::StringRef, llvm::StringRef> thinLTOPrefixReplace;
@@ -144,8 +150,7 @@ struct Configuration {
bool armHasMovtMovw = false;
bool armJ1J2BranchEncoding = false;
bool asNeeded = false;
- bool bsymbolic = false;
- bool bsymbolicFunctions = false;
+ BsymbolicKind bsymbolic = BsymbolicKind::None;
bool callGraphProfileSort;
bool checkSections;
bool checkDynamicRelocs;
@@ -174,10 +179,10 @@ struct Configuration {
bool ignoreDataAddressEquality;
bool ignoreFunctionAddressEquality;
bool ltoCSProfileGenerate;
+ bool ltoPGOWarnMismatch;
bool ltoDebugPassManager;
bool ltoEmitAsm;
bool ltoNewPassManager;
- bool ltoPseudoProbeForProfiling;
bool ltoUniqueBasicBlockSectionNames;
bool ltoWholeProgramVisibility;
bool mergeArmExidx;
@@ -197,6 +202,7 @@ struct Configuration {
bool pie;
bool printGcSections;
bool printIcfSections;
+ bool relax;
bool relocatable;
bool relrPackDynRelocs;
bool saveTemps;
diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index 707a6ebd1695..4d84c09a0185 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// The -gdb-index option instructs the linker to emit a .gdb_index section.
+// The --gdb-index option instructs the linker to emit a .gdb_index section.
// The section contains information to make gdb startup faster.
// The format of the section is described at
// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html.
@@ -137,9 +137,10 @@ template <class ELFT>
Optional<RelocAddrEntry> LLDDwarfObj<ELFT>::find(const llvm::DWARFSection &s,
uint64_t pos) const {
auto &sec = static_cast<const LLDDWARFSection &>(s);
- if (sec.sec->areRelocsRela)
- return findAux(*sec.sec, pos, sec.sec->template relas<ELFT>());
- return findAux(*sec.sec, pos, sec.sec->template rels<ELFT>());
+ const RelsOrRelas<ELFT> rels = sec.sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ return findAux(*sec.sec, pos, rels.rels);
+ return findAux(*sec.sec, pos, rels.relas);
}
template class elf::LLDDwarfObj<ELF32LE>;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index a15959158653..9fac04558c46 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -94,6 +94,7 @@ bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
objectFiles.clear();
sharedFiles.clear();
backwardReferences.clear();
+ whyExtract.clear();
tar = nullptr;
memset(&in, 0, sizeof(in));
@@ -222,7 +223,6 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) {
readLinkerScript(mbref);
return;
case file_magic::archive: {
- // Handle -whole-archive.
if (inWholeArchive) {
for (const auto &p : getArchiveMembers(mbref))
files.push_back(createObjectFile(p.first, path, p.second));
@@ -262,16 +262,11 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) {
return;
}
- // DSOs usually have DT_SONAME tags in their ELF headers, and the
- // sonames are used to identify DSOs. But if they are missing,
- // they are identified by filenames. We don't know whether the new
- // file has a DT_SONAME or not because we haven't parsed it yet.
- // Here, we set the default soname for the file because we might
- // need it later.
- //
- // If a file was specified by -lfoo, the directory part is not
- // significant, as a user did not specify it. This behavior is
- // compatible with GNU.
+ // Shared objects are identified by soname. soname is (if specified)
+ // DT_SONAME and falls back to filename. If a file was specified by -lfoo,
+ // the directory part is ignored. Note that path may be a temporary and
+ // cannot be stored into SharedFile::soName.
+ path = mbref.getBufferIdentifier();
files.push_back(
make<SharedFile>(mbref, withLOption ? path::filename(path) : path));
return;
@@ -359,10 +354,10 @@ static void checkOptions() {
if (config->executeOnly) {
if (config->emachine != EM_AARCH64)
- error("-execute-only is only supported on AArch64 targets");
+ error("--execute-only is only supported on AArch64 targets");
if (config->singleRoRx && !script->hasSectionsCommand)
- error("-execute-only and -no-rosegment cannot be used together");
+ error("--execute-only and --no-rosegment cannot be used together");
}
if (config->zRetpolineplt && config->zForceIbt)
@@ -489,10 +484,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Handle -v or -version.
//
// A note about "compatible with GNU linkers" message: this is a hack for
- // scripts generated by GNU Libtool 2.4.6 (released in February 2014 and
- // still the newest version in March 2017) or earlier to recognize LLD as
- // a GNU compatible linker. As long as an output for the -v option
- // contains "GNU" or "with BFD", they recognize us as GNU-compatible.
+ // scripts generated by GNU Libtool up to 2021-10 to recognize LLD as
+ // a GNU compatible linker. See
+ // <https://lists.gnu.org/archive/html/libtool/2017-01/msg00007.html>.
//
// This is somewhat ugly hack, but in reality, we had no choice other
// than doing this. Considering the very long release cycle of Libtool,
@@ -572,14 +566,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
}
if (config->timeTraceEnabled) {
- if (auto E = timeTraceProfilerWrite(args.getLastArgValue(OPT_time_trace_file_eq).str(),
- config->outputFile)) {
- handleAllErrors(std::move(E), [&](const StringError &SE) {
- error(SE.getMessage());
- });
- return;
- }
-
+ checkError(timeTraceProfilerWrite(
+ args.getLastArgValue(OPT_time_trace_file_eq).str(),
+ config->outputFile));
timeTraceProfilerCleanup();
}
}
@@ -596,7 +585,7 @@ static void setUnresolvedSymbolPolicy(opt::InputArgList &args) {
OPT_warn_unresolved_symbols, true)
? UnresolvedPolicy::ReportError
: UnresolvedPolicy::Warn;
- // -shared implies -unresolved-symbols=ignore-all because missing
+ // -shared implies --unresolved-symbols=ignore-all because missing
// symbols are likely to be resolved at runtime.
bool diagRegular = !config->shared, diagShlib = !config->shared;
@@ -777,7 +766,7 @@ static bool getP10StubOpt(opt::InputArgList &args) {
// Parse --build-id or --build-id=<style>. We handle "tree" as a
// synonym for "sha1" because all our hash functions including
-// -build-id=sha1 are actually tree hashes for performance reasons.
+// --build-id=sha1 are actually tree hashes for performance reasons.
static std::pair<BuildIdKind, std::vector<uint8_t>>
getBuildId(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_build_id, OPT_build_id_eq);
@@ -814,7 +803,7 @@ static std::pair<bool, bool> getPackDynRelocs(opt::InputArgList &args) {
return {true, true};
if (s != "none")
- error("unknown -pack-dyn-relocs format: " + s);
+ error("unknown --pack-dyn-relocs format: " + s);
return {false, false};
}
@@ -1006,12 +995,15 @@ static void readConfigs(opt::InputArgList &args) {
OPT_no_allow_multiple_definition, false) ||
hasZOption(args, "muldefs");
config->auxiliaryList = args::getStrings(args, OPT_auxiliary);
- if (opt::Arg *arg = args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_functions,
- OPT_Bsymbolic)) {
- if (arg->getOption().matches(OPT_Bsymbolic_functions))
- config->bsymbolicFunctions = true;
+ if (opt::Arg *arg =
+ args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions,
+ OPT_Bsymbolic_functions, OPT_Bsymbolic)) {
+ if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions))
+ config->bsymbolic = BsymbolicKind::NonWeakFunctions;
+ else if (arg->getOption().matches(OPT_Bsymbolic_functions))
+ config->bsymbolic = BsymbolicKind::Functions;
else if (arg->getOption().matches(OPT_Bsymbolic))
- config->bsymbolic = true;
+ config->bsymbolic = BsymbolicKind::All;
}
config->checkSections =
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
@@ -1066,6 +1058,8 @@ static void readConfigs(opt::InputArgList &args) {
config->ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline);
config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate);
config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file);
+ config->ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch,
+ OPT_no_lto_pgo_warn_mismatch, true);
config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm);
config->ltoNewPassManager =
@@ -1078,8 +1072,6 @@ static void readConfigs(opt::InputArgList &args) {
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
- config->ltoPseudoProbeForProfiling =
- args.hasArg(OPT_lto_pseudo_probe_for_profiling);
config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
config->ltoBasicBlockSections =
args.getLastArgValue(OPT_lto_basic_block_sections);
@@ -1123,6 +1115,7 @@ static void readConfigs(opt::InputArgList &args) {
config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats);
config->printSymbolOrder =
args.getLastArgValue(OPT_print_symbol_order);
+ config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
config->rpath = getRpath(args);
config->relocatable = args.hasArg(OPT_relocatable);
config->saveTemps = args.hasArg(OPT_save_temps);
@@ -1166,6 +1159,7 @@ static void readConfigs(opt::InputArgList &args) {
config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
config->warnSymbolOrdering =
args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
+ config->whyExtract = args.getLastArgValue(OPT_why_extract);
config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
config->zForceBti = hasZOption(args, "force-bti");
@@ -1308,7 +1302,7 @@ static void readConfigs(opt::InputArgList &args) {
config->emulation = s;
}
- // Parse -hash-style={sysv,gnu,both}.
+ // Parse --hash-style={sysv,gnu,both}.
if (auto *arg = args.getLastArg(OPT_hash_style)) {
StringRef s = arg->getValue();
if (s == "sysv")
@@ -1318,7 +1312,7 @@ static void readConfigs(opt::InputArgList &args) {
else if (s == "both")
config->sysvHash = config->gnuHash = true;
else
- error("unknown -hash-style: " + s);
+ error("unknown --hash-style: " + s);
}
if (args.hasArg(OPT_print_map))
@@ -1348,18 +1342,19 @@ static void readConfigs(opt::InputArgList &args) {
}
assert(config->versionDefinitions.empty());
- config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}});
config->versionDefinitions.push_back(
- {"global", (uint16_t)VER_NDX_GLOBAL, {}});
+ {"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
+ config->versionDefinitions.push_back(
+ {"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});
// If --retain-symbol-file is used, we'll keep only the symbols listed in
// the file and discard all others.
if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
- config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(
+ config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
{"*", /*isExternCpp=*/false, /*hasWildcard=*/true});
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
for (StringRef s : args::getLines(*buffer))
- config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(
+ config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
{s, /*isExternCpp=*/false, /*hasWildcard=*/false});
}
@@ -1371,22 +1366,26 @@ static void readConfigs(opt::InputArgList &args) {
error(arg->getSpelling() + ": " + toString(pat.takeError()));
}
- // When producing an executable, --dynamic-list specifies non-local defined
- // symbols which are required to be exported. When producing a shared object,
- // symbols not specified by --dynamic-list are non-preemptible.
- config->symbolic = config->bsymbolic || args.hasArg(OPT_dynamic_list);
- for (auto *arg : args.filtered(OPT_dynamic_list))
- if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
- readDynamicList(*buffer);
-
- // --export-dynamic-symbol specifies additional --dynamic-list symbols if any
- // other option expresses a symbolic intention: -no-pie, -pie, -Bsymbolic,
+ // For -no-pie and -pie, --export-dynamic-symbol specifies defined symbols
+ // which should be exported. For -shared, references to matched non-local
+ // STV_DEFAULT symbols are not bound to definitions within the shared object,
+ // even if other options express a symbolic intention: -Bsymbolic,
// -Bsymbolic-functions (if STT_FUNC), --dynamic-list.
for (auto *arg : args.filtered(OPT_export_dynamic_symbol))
config->dynamicList.push_back(
{arg->getValue(), /*isExternCpp=*/false,
/*hasWildcard=*/hasWildcard(arg->getValue())});
+ // --export-dynamic-symbol-list specifies a list of --export-dynamic-symbol
+ // patterns. --dynamic-list is --export-dynamic-symbol-list plus -Bsymbolic
+ // like semantics.
+ config->symbolic =
+ config->bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list);
+ for (auto *arg :
+ args.filtered(OPT_dynamic_list, OPT_export_dynamic_symbol_list))
+ if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
+ readDynamicList(*buffer);
+
for (auto *arg : args.filtered(OPT_version_script))
if (Optional<std::string> path = searchScript(arg->getValue())) {
if (Optional<MemoryBufferRef> buffer = readFile(*path))
@@ -1456,13 +1455,12 @@ static void setConfigs(opt::InputArgList &args) {
args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64);
}
-// Returns a value of "-format" option.
static bool isFormatBinary(StringRef s) {
if (s == "binary")
return true;
if (s == "elf" || s == "default")
return false;
- error("unknown -format value: " + s +
+ error("unknown --format value: " + s +
" (supported formats: elf, default, binary)");
return false;
}
@@ -1487,9 +1485,9 @@ void LinkerDriver::createFiles(opt::InputArgList &args) {
StringRef to;
std::tie(from, to) = StringRef(arg->getValue()).split('=');
if (from.empty() || to.empty())
- error("-defsym: syntax error: " + StringRef(arg->getValue()));
+ error("--defsym: syntax error: " + StringRef(arg->getValue()));
else
- readDefsym(from, MemoryBufferRef(to, "-defsym"));
+ readDefsym(from, MemoryBufferRef(to, "--defsym"));
break;
}
case OPT_script:
@@ -1623,7 +1621,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) {
return val;
}
-// Parses -image-base option.
+// Parses --image-base option.
static Optional<uint64_t> getImageBase(opt::InputArgList &args) {
// Because we are using "Config->maxPageSize" here, this function has to be
// called after the variable is initialized.
@@ -1634,11 +1632,11 @@ static Optional<uint64_t> getImageBase(opt::InputArgList &args) {
StringRef s = arg->getValue();
uint64_t v;
if (!to_integer(s, v)) {
- error("-image-base: number expected, but got " + s);
+ error("--image-base: number expected, but got " + s);
return 0;
}
if ((v % config->maxPageSize) != 0)
- warn("-image-base: address isn't multiple of page size: " + s);
+ warn("--image-base: address isn't multiple of page size: " + s);
return v;
}
@@ -1660,8 +1658,8 @@ static DenseSet<StringRef> getExcludeLibs(opt::InputArgList &args) {
return ret;
}
-// Handles the -exclude-libs option. If a static library file is specified
-// by the -exclude-libs option, all public symbols from the archive become
+// Handles the --exclude-libs option. If a static library file is specified
+// by the --exclude-libs option, all public symbols from the archive become
// private unless otherwise specified by version scripts or something.
// A special library name "ALL" means all archive files.
//
@@ -1686,13 +1684,16 @@ static void excludeLibs(opt::InputArgList &args) {
}
// Force Sym to be entered in the output.
-static void handleUndefined(Symbol *sym) {
+static void handleUndefined(Symbol *sym, const char *option) {
// Since a symbol may not be used inside the program, LTO may
// eliminate it. Mark the symbol as "used" to prevent it.
sym->isUsedInRegularObj = true;
- if (sym->isLazy())
- sym->fetch();
+ if (!sym->isLazy())
+ return;
+ sym->fetch();
+ if (!config->whyExtract.empty())
+ whyExtract.emplace_back(option, sym->file, *sym);
}
// As an extension to GNU linkers, lld supports a variant of `-u`
@@ -1715,7 +1716,7 @@ static void handleUndefinedGlob(StringRef arg) {
}
for (Symbol *sym : syms)
- handleUndefined(sym);
+ handleUndefined(sym, "--undefined-glob");
}
static void handleLibcall(StringRef name) {
@@ -1830,12 +1831,15 @@ static void demoteSharedSymbols() {
llvm::TimeTraceScope timeScope("Demote shared symbols");
for (Symbol *sym : symtab->symbols()) {
auto *s = dyn_cast<SharedSymbol>(sym);
- if (!s || s->getFile().isNeeded)
+ if (!((s && !s->getFile().isNeeded) ||
+ (sym->isLazy() && sym->isUsedInRegularObj)))
continue;
- bool used = s->used;
- s->replace(Undefined{nullptr, s->getName(), STB_WEAK, s->stOther, s->type});
- s->used = used;
+ bool used = sym->used;
+ sym->replace(
+ Undefined{nullptr, sym->getName(), STB_WEAK, sym->stOther, sym->type});
+ sym->used = used;
+ sym->versionId = VER_NDX_GLOBAL;
}
}
@@ -1910,10 +1914,11 @@ template <typename ELFT>
static void readSymbolPartitionSection(InputSectionBase *s) {
// Read the relocation that refers to the partition's entry point symbol.
Symbol *sym;
- if (s->areRelocsRela)
- sym = &s->getFile<ELFT>()->getRelocTargetSym(s->template relas<ELFT>()[0]);
+ const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ sym = &s->getFile<ELFT>()->getRelocTargetSym(rels.rels[0]);
else
- sym = &s->getFile<ELFT>()->getRelocTargetSym(s->template rels<ELFT>()[0]);
+ sym = &s->getFile<ELFT>()->getRelocTargetSym(rels.relas[0]);
if (!isa<Defined>(sym) || !sym->includeInDynsym())
return;
@@ -1991,20 +1996,20 @@ template <class ELFT> void LinkerDriver::compileBitcodeFiles() {
}
// The --wrap option is a feature to rename symbols so that you can write
-// wrappers for existing functions. If you pass `-wrap=foo`, all
+// wrappers for existing functions. If you pass `--wrap=foo`, all
// occurrences of symbol `foo` are resolved to `__wrap_foo` (so, you are
// expected to write `__wrap_foo` function as a wrapper). The original
// symbol becomes accessible as `__real_foo`, so you can call that from your
// wrapper.
//
-// This data structure is instantiated for each -wrap option.
+// This data structure is instantiated for each --wrap option.
struct WrappedSymbol {
Symbol *sym;
Symbol *real;
Symbol *wrap;
};
-// Handles -wrap option.
+// Handles --wrap option.
//
// This function instantiates wrapper symbols. At this point, they seem
// like they are not being used at all, so we explicitly set some flags so
@@ -2046,7 +2051,7 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
return v;
}
-// Do renaming for -wrap and foo@v1 by updating pointers to symbols.
+// Do renaming for --wrap and foo@v1 by updating pointers to symbols.
//
// When this function is executed, only InputFiles and symbol table
// contain pointers to symbol objects. We visit them to replace pointers,
@@ -2065,23 +2070,37 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
if (suffix1[0] != '@' || suffix1[1] == '@')
continue;
- // Check whether the default version foo@@v1 exists. If it exists, the
- // symbol can be found by the name "foo" in the symbol table.
- Symbol *maybeDefault = symtab->find(name);
- if (!maybeDefault)
- continue;
- const char *suffix2 = maybeDefault->getVersionSuffix();
- if (suffix2[0] != '@' || suffix2[1] != '@' ||
- strcmp(suffix1 + 1, suffix2 + 2) != 0)
+ // Check the existing symbol foo. We have two special cases to handle:
+ //
+ // * There is a definition of foo@v1 and foo@@v1.
+ // * There is a definition of foo@v1 and foo.
+ Defined *sym2 = dyn_cast_or_null<Defined>(symtab->find(name));
+ if (!sym2)
continue;
-
- // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
- map.try_emplace(sym, maybeDefault);
- // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
- // definition error.
- maybeDefault->resolve(*sym);
- // Eliminate foo@v1 from the symbol table.
- sym->symbolKind = Symbol::PlaceholderKind;
+ const char *suffix2 = sym2->getVersionSuffix();
+ if (suffix2[0] == '@' && suffix2[1] == '@' &&
+ strcmp(suffix1 + 1, suffix2 + 2) == 0) {
+ // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
+ map.try_emplace(sym, sym2);
+ // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
+ // definition error.
+ sym2->resolve(*sym);
+ // Eliminate foo@v1 from the symbol table.
+ sym->symbolKind = Symbol::PlaceholderKind;
+ } else if (auto *sym1 = dyn_cast<Defined>(sym)) {
+ if (sym2->versionId > VER_NDX_GLOBAL
+ ? config->versionDefinitions[sym2->versionId].name == suffix1 + 1
+ : sym1->section == sym2->section && sym1->value == sym2->value) {
+ // Due to an assembler design flaw, if foo is defined, .symver foo,
+ // foo@v1 defines both foo and foo@v1. Unless foo is bound to a
+ // different version, GNU ld makes foo@v1 canonical and eliminates foo.
+ // Emulate its behavior, otherwise we would have foo or foo@@v1 beside
+ // foo@v1. foo@v1 and foo combining does not apply if they are not
+ // defined in the same place.
+ map.try_emplace(sym2, sym);
+ sym2->symbolKind = Symbol::PlaceholderKind;
+ }
+ }
}
if (map.empty())
@@ -2145,7 +2164,7 @@ template <class ELFT> static uint32_t getAndFeatures() {
// all linker scripts have already been parsed.
template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link"));
- // If a -hash-style option was not given, set to a default value,
+ // If a --hash-style option was not given, set to a default value,
// which varies depending on the target.
if (!args.hasArg(OPT_hash_style)) {
if (config->emachine == EM_MIPS)
@@ -2168,6 +2187,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
e.message());
if (auto e = tryCreateFile(config->mapFile))
error("cannot open map file " + config->mapFile + ": " + e.message());
+ if (auto e = tryCreateFile(config->whyExtract))
+ error("cannot open --why-extract= file " + config->whyExtract + ": " +
+ e.message());
}
if (errorCount())
return;
@@ -2222,7 +2244,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// If an entry symbol is in a static archive, pull out that file now.
if (Symbol *sym = symtab->find(config->entry))
- handleUndefined(sym);
+ handleUndefined(sym, "--entry");
// Handle the `--undefined-glob <pattern>` options.
for (StringRef pat : args::getStrings(args, OPT_undefined_glob))
@@ -2277,7 +2299,6 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
Out::elfHeader = make<OutputSection>("", 0, SHF_ALLOC);
Out::elfHeader->size = sizeof(typename ELFT::Ehdr);
- // Create wrapped symbols for -wrap option.
std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args);
// We need to create some reserved symbols such as _end. Create them.
@@ -2312,9 +2333,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
if (errorCount())
return;
- // If -thinlto-index-only is given, we should create only "index
+ // If --thinlto-index-only is given, we should create only "index
// files" and not object files. Index file creation is already done
- // in addCombinedLTOObject, so we are done if that's the case.
+ // in compileBitcodeFiles, so we are done if that's the case.
// Likewise, --plugin-opt=emit-llvm and --plugin-opt=emit-asm are the
// options to create output files in bitcode or assembly code
// respectively. No object files are generated.
@@ -2324,7 +2345,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
!config->thinLTOModulesToCompile.empty())
return;
- // Apply symbol renames for -wrap and combine foo@v1 and foo@@v1.
+ // Apply symbol renames for --wrap and combine foo@v1 and foo@@v1.
redirectSymbols(wrapped);
{
@@ -2350,7 +2371,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
}
// We do not want to emit debug sections if --strip-all
- // or -strip-debug are given.
+ // or --strip-debug are given.
if (config->strip == StripPolicy::None)
return false;
diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp
index f49deb9012b2..54d2d0ae6fb9 100644
--- a/lld/ELF/DriverUtils.cpp
+++ b/lld/ELF/DriverUtils.cpp
@@ -52,8 +52,8 @@ static const opt::OptTable::Info optInfo[] = {
ELFOptTable::ELFOptTable() : OptTable(optInfo) {}
-// Set color diagnostics according to -color-diagnostics={auto,always,never}
-// or -no-color-diagnostics flags.
+// Set color diagnostics according to --color-diagnostics={auto,always,never}
+// or --no-color-diagnostics flags.
static void handleColorDiagnostics(opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_color_diagnostics, OPT_color_diagnostics_eq,
OPT_no_color_diagnostics);
@@ -150,11 +150,10 @@ void elf::printHelp() {
"lld", false /*ShowHidden*/, true /*ShowAllAliases*/);
lld::outs() << "\n";
- // Scripts generated by Libtool versions up to at least 2.4.6 (the most
- // recent version as of March 2017) expect /: supported targets:.* elf/
- // in a message for the -help option. If it doesn't match, the scripts
- // assume that the linker doesn't support very basic features such as
- // shared libraries. Therefore, we need to print out at least "elf".
+ // Scripts generated by Libtool versions up to 2021-10 expect /: supported
+ // targets:.* elf/ in a message for the --help option. If it doesn't match,
+ // the scripts assume that the linker doesn't support very basic features
+ // such as shared libraries. Therefore, we need to print out at least "elf".
lld::outs() << config->progName << ": supported targets: elf\n";
}
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 5cf944d39c81..c13969806916 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -239,6 +239,8 @@ template <class ELFT>
template <class RelTy>
bool ICF<ELFT>::constantEq(const InputSection *secA, ArrayRef<RelTy> ra,
const InputSection *secB, ArrayRef<RelTy> rb) {
+ if (ra.size() != rb.size())
+ return false;
for (size_t i = 0; i < ra.size(); ++i) {
if (ra[i].r_offset != rb[i].r_offset ||
ra[i].getType(config->isMips64EL) != rb[i].getType(config->isMips64EL))
@@ -312,8 +314,8 @@ bool ICF<ELFT>::constantEq(const InputSection *secA, ArrayRef<RelTy> ra,
// except relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
- if (a->numRelocations != b->numRelocations || a->flags != b->flags ||
- a->getSize() != b->getSize() || a->data() != b->data())
+ if (a->flags != b->flags || a->getSize() != b->getSize() ||
+ a->data() != b->data())
return false;
// If two sections have different output sections, we cannot merge them.
@@ -321,10 +323,10 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
if (a->getParent() != b->getParent())
return false;
- if (a->areRelocsRela)
- return constantEq(a, a->template relas<ELFT>(), b,
- b->template relas<ELFT>());
- return constantEq(a, a->template rels<ELFT>(), b, b->template rels<ELFT>());
+ const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
+ const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
+ return ra.areRelocsRel() ? constantEq(a, ra.rels, b, rb.rels)
+ : constantEq(a, ra.relas, b, rb.relas);
}
// Compare two lists of relocations. Returns true if all pairs of
@@ -369,10 +371,10 @@ bool ICF<ELFT>::variableEq(const InputSection *secA, ArrayRef<RelTy> ra,
// Compare "moving" part of two InputSections, namely relocation targets.
template <class ELFT>
bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) {
- if (a->areRelocsRela)
- return variableEq(a, a->template relas<ELFT>(), b,
- b->template relas<ELFT>());
- return variableEq(a, a->template rels<ELFT>(), b, b->template rels<ELFT>());
+ const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();
+ const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();
+ return ra.areRelocsRel() ? variableEq(a, ra.rels, b, rb.rels)
+ : variableEq(a, ra.relas, b, rb.relas);
}
template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t begin, size_t end) {
@@ -499,10 +501,11 @@ template <class ELFT> void ICF<ELFT>::run() {
// a large time complexity will have less work to do.
for (unsigned cnt = 0; cnt != 2; ++cnt) {
parallelForEach(sections, [&](InputSection *s) {
- if (s->areRelocsRela)
- combineRelocHashes<ELFT>(cnt, s, s->template relas<ELFT>());
+ const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ combineRelocHashes<ELFT>(cnt, s, rels.rels);
else
- combineRelocHashes<ELFT>(cnt, s, s->template rels<ELFT>());
+ combineRelocHashes<ELFT>(cnt, s, rels.relas);
});
}
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index d5b9efbe18fc..e8a4188ec775 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -426,18 +426,7 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
if (sec.sh_info >= symbols.size())
fatal(toString(this) + ": invalid symbol index");
const typename ELFT::Sym &sym = symbols[sec.sh_info];
- StringRef signature = CHECK(sym.getName(this->stringTable), this);
-
- // As a special case, if a symbol is a section symbol and has no name,
- // we use a section name as a signature.
- //
- // Such SHT_GROUP sections are invalid from the perspective of the ELF
- // standard, but GNU gold 1.14 (the newest version as of July 2017) or
- // older produce such sections as outputs for the -r option, so we need
- // a bug-compatibility.
- if (signature.empty() && sym.getType() == STT_SECTION)
- return getSectionName(sec);
- return signature;
+ return CHECK(sym.getName(this->stringTable), this);
}
template <class ELFT>
@@ -565,10 +554,9 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
const ELFFile<ELFT> &obj = this->getObj();
ArrayRef<Elf_Shdr> objSections = CHECK(obj.sections(), this);
+ StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
uint64_t size = objSections.size();
this->sections.resize(size);
- this->sectionStringTable =
- CHECK(obj.getSectionStringTable(objSections), this);
std::vector<ArrayRef<Elf_Word>> selectedGroups;
@@ -624,7 +612,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
.second;
if (keepGroup) {
if (config->relocatable)
- this->sections[i] = createInputSection(sec);
+ this->sections[i] = createInputSection(i, sec, shstrtab);
selectedGroups.push_back(entries);
continue;
}
@@ -648,7 +636,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
case SHT_NULL:
break;
default:
- this->sections[i] = createInputSection(sec);
+ this->sections[i] = createInputSection(i, sec, shstrtab);
}
}
@@ -665,7 +653,7 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
const Elf_Shdr &sec = objSections[i];
if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA)
- this->sections[i] = createInputSection(sec);
+ this->sections[i] = createInputSection(i, sec, shstrtab);
// A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
// the flag.
@@ -849,21 +837,25 @@ template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) {
}
template <class ELFT>
-InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &sec) {
- uint32_t idx = sec.sh_info;
- if (idx >= this->sections.size())
- fatal(toString(this) + ": invalid relocated section index: " + Twine(idx));
- InputSectionBase *target = this->sections[idx];
-
- // Strictly speaking, a relocation section must be included in the
- // group of the section it relocates. However, LLVM 3.3 and earlier
- // would fail to do so, so we gracefully handle that case.
- if (target == &InputSection::discarded)
- return nullptr;
-
- if (!target)
- fatal(toString(this) + ": unsupported relocation reference");
- return target;
+InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, StringRef name,
+ const Elf_Shdr &sec) {
+ uint32_t info = sec.sh_info;
+ if (info < this->sections.size()) {
+ InputSectionBase *target = this->sections[info];
+
+ // Strictly speaking, a relocation section must be included in the
+ // group of the section it relocates. However, LLVM 3.3 and earlier
+ // would fail to do so, so we gracefully handle that case.
+ if (target == &InputSection::discarded)
+ return nullptr;
+
+ if (target != nullptr)
+ return target;
+ }
+
+ error(toString(this) + Twine(": relocation section ") + name + " (index " +
+ Twine(idx) + ") has invalid sh_info (" + Twine(info) + ")");
+ return nullptr;
}
// Create a regular InputSection class that has the same contents
@@ -874,8 +866,10 @@ static InputSection *toRegularSection(MergeInputSection *sec) {
}
template <class ELFT>
-InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) {
- StringRef name = getSectionName(sec);
+InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
+ const Elf_Shdr &sec,
+ StringRef shstrtab) {
+ StringRef name = CHECK(getObj().getSectionName(sec, shstrtab), this);
if (config->emachine == EM_ARM && sec.sh_type == SHT_ARM_ATTRIBUTES) {
ARMAttributeParser attributes;
@@ -949,7 +943,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) {
// and the group is discarded, even though it's a violation of the
// spec. We handle that situation gracefully by discarding dangling
// relocation sections.
- InputSectionBase *target = getRelocTarget(sec);
+ InputSectionBase *target = getRelocTarget(idx, name, sec);
if (!target)
return nullptr;
@@ -963,22 +957,10 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) {
this->sections[sec.sh_info] = target;
}
- if (target->firstRelocation)
+ if (target->relSecIdx != 0)
fatal(toString(this) +
": multiple relocation sections to one section are not supported");
-
- if (sec.sh_type == SHT_RELA) {
- ArrayRef<Elf_Rela> rels = CHECK(getObj().relas(sec), this);
- target->firstRelocation = rels.begin();
- target->numRelocations = rels.size();
- target->areRelocsRela = true;
- } else {
- ArrayRef<Elf_Rel> rels = CHECK(getObj().rels(sec), this);
- target->firstRelocation = rels.begin();
- target->numRelocations = rels.size();
- target->areRelocsRela = false;
- }
- assert(isUInt<31>(target->numRelocations));
+ target->relSecIdx = idx;
// Relocation sections are usually removed from the output, so return
// `nullptr` for the normal case. However, if -r or --emit-relocs is
@@ -1072,11 +1054,6 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &sec) {
return make<InputSection>(*this, sec, name);
}
-template <class ELFT>
-StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &sec) {
- return CHECK(getObj().getSectionName(sec, sectionStringTable), this);
-}
-
// Initialize this->Symbols. this->Symbols is a parallel array as
// its corresponding ELF symbol table.
template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
@@ -1629,6 +1606,8 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
return EM_ARM;
case Triple::avr:
return EM_AVR;
+ case Triple::hexagon:
+ return EM_HEXAGON;
case Triple::mips:
case Triple::mipsel:
case Triple::mips64:
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index bd72cfcdd050..fb4d46b43f35 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -43,8 +43,7 @@ using llvm::object::Archive;
class Symbol;
-// If -reproduce option is given, all input files are written
-// to this tar archive.
+// If --reproduce is specified, all input files are written to this tar archive.
extern std::unique_ptr<llvm::TarWriter> tar;
// Opens a given file.
@@ -260,9 +259,10 @@ private:
void initializeSymbols();
void initializeJustSymbols();
- InputSectionBase *getRelocTarget(const Elf_Shdr &sec);
- InputSectionBase *createInputSection(const Elf_Shdr &sec);
- StringRef getSectionName(const Elf_Shdr &sec);
+ InputSectionBase *getRelocTarget(uint32_t idx, StringRef name,
+ const Elf_Shdr &sec);
+ InputSectionBase *createInputSection(uint32_t idx, const Elf_Shdr &sec,
+ StringRef shstrtab);
bool shouldMerge(const Elf_Shdr &sec, StringRef name);
@@ -280,9 +280,6 @@ private:
// If the section does not exist (which is common), the array is empty.
ArrayRef<Elf_Word> shndxTable;
- // .shstrtab contents.
- StringRef sectionStringTable;
-
// Debugging information to retrieve source file and line for error
// reporting. Linker may find reasonable number of errors in a
// single object file, so we cache debugging information in order to
@@ -361,7 +358,7 @@ public:
class SharedFile : public ELFFileBase {
public:
SharedFile(MemoryBufferRef m, StringRef defaultSoName)
- : ELFFileBase(SharedKind, m), soName(std::string(defaultSoName)),
+ : ELFFileBase(SharedKind, m), soName(defaultSoName),
isNeeded(!config->asNeeded) {}
// This is actually a vector of Elf_Verdef pointers.
@@ -375,7 +372,7 @@ public:
static unsigned vernauxNum;
std::vector<StringRef> dtNeeded;
- std::string soName;
+ StringRef soName;
static bool classof(const InputFile *f) { return f->kind() == SharedKind; }
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 1f9fa961fc26..74d4dd309c79 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -70,9 +70,6 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags,
if (sectionKind == SectionBase::Merge && rawData.size() > UINT32_MAX)
error(toString(this) + ": section too large");
- numRelocations = 0;
- areRelocsRela = false;
-
// The ELF spec states that a value of 0 means the section has
// no alignment constraints.
uint32_t v = std::max<uint32_t>(alignment, 1);
@@ -88,7 +85,22 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags,
if (!zlib::isAvailable())
error(toString(file) + ": contains a compressed section, " +
"but zlib is not available");
- parseCompressedHeader();
+ switch (config->ekind) {
+ case ELF32LEKind:
+ parseCompressedHeader<ELF32LE>();
+ break;
+ case ELF32BEKind:
+ parseCompressedHeader<ELF32BE>();
+ break;
+ case ELF64LEKind:
+ parseCompressedHeader<ELF64LE>();
+ break;
+ case ELF64BEKind:
+ parseCompressedHeader<ELF64BE>();
+ break;
+ default:
+ llvm_unreachable("unknown ELFT");
+ }
}
}
@@ -102,32 +114,14 @@ static uint64_t getFlags(uint64_t flags) {
return flags;
}
-// GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of
-// March 2017) fail to infer section types for sections starting with
-// ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of
-// SHF_INIT_ARRAY. As a result, the following assembler directive
-// creates ".init_array.100" with SHT_PROGBITS, for example.
-//
-// .section .init_array.100, "aw"
-//
-// This function forces SHT_{INIT,FINI}_ARRAY so that we can handle
-// incorrect inputs as if they were correct from the beginning.
-static uint64_t getType(uint64_t type, StringRef name) {
- if (type == SHT_PROGBITS && name.startswith(".init_array."))
- return SHT_INIT_ARRAY;
- if (type == SHT_PROGBITS && name.startswith(".fini_array."))
- return SHT_FINI_ARRAY;
- return type;
-}
-
template <class ELFT>
InputSectionBase::InputSectionBase(ObjFile<ELFT> &file,
const typename ELFT::Shdr &hdr,
StringRef name, Kind sectionKind)
- : InputSectionBase(&file, getFlags(hdr.sh_flags),
- getType(hdr.sh_type, name), hdr.sh_entsize, hdr.sh_link,
- hdr.sh_info, hdr.sh_addralign,
- getSectionContents(file, hdr), name, sectionKind) {
+ : InputSectionBase(&file, getFlags(hdr.sh_flags), hdr.sh_type,
+ hdr.sh_entsize, hdr.sh_link, hdr.sh_info,
+ hdr.sh_addralign, getSectionContents(file, hdr), name,
+ sectionKind) {
// We reject object files having insanely large alignments even though
// they are allowed by the spec. I think 4GB is a reasonable limitation.
// We might want to relax this in the future.
@@ -165,6 +159,25 @@ uint64_t InputSectionBase::getOffsetInFile() const {
return secStart - fileStart;
}
+template <class ELFT> RelsOrRelas<ELFT> InputSectionBase::relsOrRelas() const {
+ if (relSecIdx == 0)
+ return {};
+ RelsOrRelas<ELFT> ret;
+ const ELFFile<ELFT> obj = cast<ELFFileBase>(file)->getObj<ELFT>();
+ typename ELFT::Shdr shdr = cantFail(obj.sections())[relSecIdx];
+ if (shdr.sh_type == SHT_REL) {
+ ret.rels = makeArrayRef(reinterpret_cast<const typename ELFT::Rel *>(
+ obj.base() + shdr.sh_offset),
+ shdr.sh_size / sizeof(typename ELFT::Rel));
+ } else {
+ assert(shdr.sh_type == SHT_RELA);
+ ret.relas = makeArrayRef(reinterpret_cast<const typename ELFT::Rela *>(
+ obj.base() + shdr.sh_offset),
+ shdr.sh_size / sizeof(typename ELFT::Rela));
+ }
+ return ret;
+}
+
uint64_t SectionBase::getOffset(uint64_t offset) const {
switch (kind()) {
case Output: {
@@ -210,10 +223,7 @@ OutputSection *SectionBase::getOutputSection() {
// When a section is compressed, `rawData` consists with a header followed
// by zlib-compressed data. This function parses a header to initialize
// `uncompressedSize` member and remove the header from `rawData`.
-void InputSectionBase::parseCompressedHeader() {
- using Chdr64 = typename ELF64LE::Chdr;
- using Chdr32 = typename ELF32LE::Chdr;
-
+template <typename ELFT> void InputSectionBase::parseCompressedHeader() {
// Old-style header
if (name.startswith(".zdebug")) {
if (!toStringRef(rawData).startswith("ZLIB")) {
@@ -239,32 +249,13 @@ void InputSectionBase::parseCompressedHeader() {
assert(flags & SHF_COMPRESSED);
flags &= ~(uint64_t)SHF_COMPRESSED;
- // New-style 64-bit header
- if (config->is64) {
- if (rawData.size() < sizeof(Chdr64)) {
- error(toString(this) + ": corrupted compressed section");
- return;
- }
-
- auto *hdr = reinterpret_cast<const Chdr64 *>(rawData.data());
- if (hdr->ch_type != ELFCOMPRESS_ZLIB) {
- error(toString(this) + ": unsupported compression type");
- return;
- }
-
- uncompressedSize = hdr->ch_size;
- alignment = std::max<uint32_t>(hdr->ch_addralign, 1);
- rawData = rawData.slice(sizeof(*hdr));
- return;
- }
-
- // New-style 32-bit header
- if (rawData.size() < sizeof(Chdr32)) {
+ // New-style header
+ if (rawData.size() < sizeof(typename ELFT::Chdr)) {
error(toString(this) + ": corrupted compressed section");
return;
}
- auto *hdr = reinterpret_cast<const Chdr32 *>(rawData.data());
+ auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(rawData.data());
if (hdr->ch_type != ELFCOMPRESS_ZLIB) {
error(toString(this) + ": unsupported compression type");
return;
@@ -293,32 +284,21 @@ Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) {
return nullptr;
}
-// Returns a source location string. Used to construct an error message.
+// Returns an object file location string. Used to construct an error message.
template <class ELFT>
std::string InputSectionBase::getLocation(uint64_t offset) {
- std::string secAndOffset = (name + "+0x" + utohexstr(offset)).str();
+ std::string secAndOffset =
+ (name + "+0x" + Twine::utohexstr(offset) + ")").str();
// We don't have file for synthetic sections.
if (getFile<ELFT>() == nullptr)
- return (config->outputFile + ":(" + secAndOffset + ")")
- .str();
-
- // First check if we can get desired values from debugging information.
- if (Optional<DILineInfo> info = getFile<ELFT>()->getDILineInfo(this, offset))
- return info->FileName + ":" + std::to_string(info->Line) + ":(" +
- secAndOffset + ")";
-
- // File->sourceFile contains STT_FILE symbol that contains a
- // source file name. If it's missing, we use an object file name.
- std::string srcFile = std::string(getFile<ELFT>()->sourceFile);
- if (srcFile.empty())
- srcFile = toString(file);
+ return (config->outputFile + ":(" + secAndOffset).str();
+ std::string file = toString(getFile<ELFT>());
if (Defined *d = getEnclosingFunction<ELFT>(offset))
- return srcFile + ":(function " + toString(*d) + ": " + secAndOffset + ")";
+ return file + ":(function " + toString(*d) + ": " + secAndOffset;
- // If there's no symbol, print out the offset in the section.
- return (srcFile + ":(" + secAndOffset + ")");
+ return file + ":(" + secAndOffset;
}
// This function is intended to be used for constructing an error message.
@@ -515,6 +495,7 @@ static uint32_t getARMUndefinedRelativeWeakVA(RelType type, uint32_t a,
switch (type) {
// Unresolved branch relocations to weak references resolve to next
// instruction, this will be either 2 or 4 bytes on from P.
+ case R_ARM_THM_JUMP8:
case R_ARM_THM_JUMP11:
return p + 2 + a;
case R_ARM_CALL:
@@ -802,6 +783,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
case R_PLT_PC:
case R_PPC64_CALL_PLT:
return sym.getPltVA() + a - p;
+ case R_PLT_GOTPLT:
+ return sym.getPltVA() + a - in.gotPlt->getVA();
case R_PPC32_PLTREL:
// R_PPC_PLTREL24 uses the addend (usually 0 or 0x8000) to indicate r30
// stores _GLOBAL_OFFSET_TABLE_ or .got2+0x8000. The addend is ignored for
@@ -836,7 +819,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
// --noinhibit-exec, even a non-weak undefined reference may reach here.
// Just return A, which matches R_ABS, and the behavior of some dynamic
// loaders.
- if (sym.isUndefined() || sym.isLazy())
+ if (sym.isUndefined())
return a;
return getTlsTpOffset(sym) + a;
case R_RELAX_TLS_GD_TO_LE_NEG:
@@ -850,6 +833,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
return in.got->getGlobalDynAddr(sym) + a;
case R_TLSDESC_PC:
return in.got->getGlobalDynAddr(sym) + a - p;
+ case R_TLSDESC_GOTPLT:
+ return in.got->getGlobalDynAddr(sym) + a - in.gotPlt->getVA();
case R_AARCH64_TLSDESC_PAGE:
return getAArch64Page(in.got->getGlobalDynAddr(sym) + a) -
getAArch64Page(p);
@@ -1016,12 +1001,15 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) {
}
auto *sec = cast<InputSection>(this);
- if (config->relocatable)
+ if (config->relocatable) {
relocateNonAllocForRelocatable(sec, buf);
- else if (sec->areRelocsRela)
- sec->relocateNonAlloc<ELFT>(buf, sec->template relas<ELFT>());
- else
- sec->relocateNonAlloc<ELFT>(buf, sec->template rels<ELFT>());
+ } else {
+ const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ sec->relocateNonAlloc<ELFT>(buf, rels.rels);
+ else
+ sec->relocateNonAlloc<ELFT>(buf, rels.relas);
+ }
}
void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
@@ -1335,10 +1323,11 @@ static unsigned getReloc(IntTy begin, IntTy size, const ArrayRef<RelTy> &rels,
// .eh_frame is a sequence of CIE or FDE records.
// This function splits an input section into records and returns them.
template <class ELFT> void EhInputSection::split() {
- if (areRelocsRela)
- split<ELFT>(relas<ELFT>());
+ const RelsOrRelas<ELFT> rels = relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ split<ELFT>(rels.rels);
else
- split<ELFT>(rels<ELFT>());
+ split<ELFT>(rels.relas);
}
template <class ELFT, class RelTy>
@@ -1475,6 +1464,11 @@ template void InputSection::writeTo<ELF32BE>(uint8_t *);
template void InputSection::writeTo<ELF64LE>(uint8_t *);
template void InputSection::writeTo<ELF64BE>(uint8_t *);
+template RelsOrRelas<ELF32LE> InputSectionBase::relsOrRelas<ELF32LE>() const;
+template RelsOrRelas<ELF32BE> InputSectionBase::relsOrRelas<ELF32BE>() const;
+template RelsOrRelas<ELF64LE> InputSectionBase::relsOrRelas<ELF64LE>() const;
+template RelsOrRelas<ELF64BE> InputSectionBase::relsOrRelas<ELF64BE>() const;
+
template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &,
const ELF32LE::Shdr &, StringRef);
template MergeInputSection::MergeInputSection(ObjFile<ELF32BE> &,
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 5b91c1c90bd2..4bd1f410e388 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -33,6 +33,13 @@ class OutputSection;
extern std::vector<Partition> partitions;
+// Returned by InputSectionBase::relsOrRelas. At least one member is empty.
+template <class ELFT> struct RelsOrRelas {
+ ArrayRef<typename ELFT::Rel> rels;
+ ArrayRef<typename ELFT::Rela> relas;
+ bool areRelocsRel() const { return rels.size(); }
+};
+
// This is the base class of all sections that lld handles. Some are sections in
// input files, some are sections in the produced output file and some exist
// just as a convenience for implementing special ways of combining some
@@ -114,10 +121,8 @@ public:
static bool classof(const SectionBase *s) { return s->kind() != Output; }
- // Relocations that refer to this section.
- unsigned numRelocations : 31;
- unsigned areRelocsRela : 1;
- const void *firstRelocation = nullptr;
+ // Section index of the relocation section if exists.
+ uint32_t relSecIdx = 0;
// The file which contains this section. Its dynamic type is always
// ObjFile<ELFT>, but in order to avoid ELFT, we use InputFile as
@@ -170,19 +175,7 @@ public:
// used by --gc-sections.
InputSectionBase *nextInSectionGroup = nullptr;
- template <class ELFT> ArrayRef<typename ELFT::Rel> rels() const {
- assert(!areRelocsRela);
- return llvm::makeArrayRef(
- static_cast<const typename ELFT::Rel *>(firstRelocation),
- numRelocations);
- }
-
- template <class ELFT> ArrayRef<typename ELFT::Rela> relas() const {
- assert(areRelocsRela);
- return llvm::makeArrayRef(
- static_cast<const typename ELFT::Rela *>(firstRelocation),
- numRelocations);
- }
+ template <class ELFT> RelsOrRelas<ELFT> relsOrRelas() const;
// InputSections that are dependent on us (reverse dependency for GC)
llvm::TinyPtrVector<InputSection *> dependentSections;
@@ -238,6 +231,7 @@ public:
}
protected:
+ template <typename ELFT>
void parseCompressedHeader();
void uncompress() const;
@@ -391,9 +385,9 @@ private:
};
#ifdef _WIN32
-static_assert(sizeof(InputSection) <= 192, "InputSection is too big");
-#else
static_assert(sizeof(InputSection) <= 184, "InputSection is too big");
+#else
+static_assert(sizeof(InputSection) <= 176, "InputSection is too big");
#endif
inline bool isDebugSection(const InputSectionBase &sec) {
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index e8710e3bdb4f..a42d216e4e77 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -23,10 +23,10 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/LTO/Caching.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -112,7 +112,6 @@ static lto::Config createConfig() {
}
}
- c.Options.PseudoProbeForProfiling = config->ltoPseudoProbeForProfiling;
c.Options.UniqueBasicBlockSectionNames =
config->ltoUniqueBasicBlockSectionNames;
@@ -163,6 +162,7 @@ static lto::Config createConfig() {
c.CSIRProfile = std::string(config->ltoCSProfileFile);
c.RunCSIRInstr = config->ltoCSProfileGenerate;
+ c.PGOWarnMismatch = config->ltoPGOWarnMismatch;
if (config->emitLLVM) {
c.PostInternalizeModuleHook = [](size_t task, const Module &m) {
@@ -304,18 +304,18 @@ std::vector<InputFile *> BitcodeCompiler::compile() {
// The --thinlto-cache-dir option specifies the path to a directory in which
// to cache native object files for ThinLTO incremental builds. If a path was
// specified, configure LTO to use it as the cache directory.
- lto::NativeObjectCache cache;
+ FileCache cache;
if (!config->thinLTOCacheDir.empty())
- cache = check(
- lto::localCache(config->thinLTOCacheDir,
- [&](size_t task, std::unique_ptr<MemoryBuffer> mb) {
- files[task] = std::move(mb);
- }));
+ cache =
+ check(localCache("ThinLTO", "Thin", config->thinLTOCacheDir,
+ [&](size_t task, std::unique_ptr<MemoryBuffer> mb) {
+ files[task] = std::move(mb);
+ }));
if (!bitcodeFiles.empty())
checkError(ltoObj->run(
[&](size_t task) {
- return std::make_unique<lto::NativeObjectStream>(
+ return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(buf[task]));
},
cache));
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index a938984ad945..f332b03d757d 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -457,7 +457,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd,
if (!sec->isLive() || sec->parent || seen.contains(i))
continue;
- // For -emit-relocs we have to ignore entries like
+ // For --emit-relocs we have to ignore entries like
// .rela.dyn : { *(.rela.data) }
// which are common because they are in the default bfd script.
// We do not ignore SHT_REL[A] linker-synthesized sections here because
@@ -849,17 +849,8 @@ void LinkerScript::diagnoseOrphanHandling() const {
}
uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) {
- bool isTbss =
- (ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS;
- uint64_t start = isTbss ? dot + ctx->threadBssOffset : dot;
- start = alignTo(start, alignment);
- uint64_t end = start + size;
-
- if (isTbss)
- ctx->threadBssOffset = end - dot;
- else
- dot = end;
- return end;
+ dot = alignTo(dot, alignment) + size;
+ return dot;
}
void LinkerScript::output(InputSection *s) {
@@ -891,34 +882,48 @@ void LinkerScript::switchTo(OutputSection *sec) {
// This function searches for a memory region to place the given output
// section in. If found, a pointer to the appropriate memory region is
-// returned. Otherwise, a nullptr is returned.
-MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *sec) {
+// returned in the first member of the pair. Otherwise, a nullptr is returned.
+// The second member of the pair is a hint that should be passed to the
+// subsequent call of this method.
+std::pair<MemoryRegion *, MemoryRegion *>
+LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) {
+ // Non-allocatable sections are not part of the process image.
+ if (!(sec->flags & SHF_ALLOC)) {
+ if (!sec->memoryRegionName.empty())
+ warn("ignoring memory region assignment for non-allocatable section '" +
+ sec->name + "'");
+ return {nullptr, nullptr};
+ }
+
// If a memory region name was specified in the output section command,
// then try to find that region first.
if (!sec->memoryRegionName.empty()) {
if (MemoryRegion *m = memoryRegions.lookup(sec->memoryRegionName))
- return m;
+ return {m, m};
error("memory region '" + sec->memoryRegionName + "' not declared");
- return nullptr;
+ return {nullptr, nullptr};
}
// If at least one memory region is defined, all sections must
// belong to some memory region. Otherwise, we don't need to do
// anything for memory regions.
if (memoryRegions.empty())
- return nullptr;
+ return {nullptr, nullptr};
+
+ // An orphan section should continue the previous memory region.
+ if (sec->sectionIndex == UINT32_MAX && hint)
+ return {hint, hint};
// See if a region can be found by matching section flags.
for (auto &pair : memoryRegions) {
MemoryRegion *m = pair.second;
if ((m->flags & sec->flags) && (m->negFlags & sec->flags) == 0)
- return m;
+ return {m, nullptr};
}
// Otherwise, no suitable region was found.
- if (sec->flags & SHF_ALLOC)
- error("no memory region specified for section '" + sec->name + "'");
- return nullptr;
+ error("no memory region specified for section '" + sec->name + "'");
+ return {nullptr, nullptr};
}
static OutputSection *findFirstSection(PhdrEntry *load) {
@@ -931,13 +936,24 @@ static OutputSection *findFirstSection(PhdrEntry *load) {
// This function assigns offsets to input sections and an output section
// for a single sections command (e.g. ".text { *(.text); }").
void LinkerScript::assignOffsets(OutputSection *sec) {
+ const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS;
const bool sameMemRegion = ctx->memRegion == sec->memRegion;
const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr;
const uint64_t savedDot = dot;
ctx->memRegion = sec->memRegion;
ctx->lmaRegion = sec->lmaRegion;
- if (sec->flags & SHF_ALLOC) {
+ if (!(sec->flags & SHF_ALLOC)) {
+ // Non-SHF_ALLOC sections have zero addresses.
+ dot = 0;
+ } else if (isTbss) {
+ // Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range
+ // starts from the end address of the previous tbss section.
+ if (ctx->tbssAddr == 0)
+ ctx->tbssAddr = dot;
+ else
+ dot = ctx->tbssAddr;
+ } else {
if (ctx->memRegion)
dot = ctx->memRegion->curPos;
if (sec->addrExpr)
@@ -950,9 +966,6 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
if (ctx->memRegion && ctx->memRegion->curPos < dot)
expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos,
ctx->memRegion->name, sec->name);
- } else {
- // Non-SHF_ALLOC sections have zero addresses.
- dot = 0;
}
switchTo(sec);
@@ -963,12 +976,16 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
// reuse previous lmaOffset; otherwise, reset lmaOffset to 0. This emulates
// heuristics described in
// https://sourceware.org/binutils/docs/ld/Output-Section-LMA.html
- if (sec->lmaExpr)
+ if (sec->lmaExpr) {
ctx->lmaOffset = sec->lmaExpr().getValue() - dot;
- else if (MemoryRegion *mr = sec->lmaRegion)
- ctx->lmaOffset = alignTo(mr->curPos, sec->alignment) - dot;
- else if (!sameMemRegion || !prevLMARegionIsDefault)
+ } else if (MemoryRegion *mr = sec->lmaRegion) {
+ uint64_t lmaStart = alignTo(mr->curPos, sec->alignment);
+ if (mr->curPos < lmaStart)
+ expandMemoryRegion(mr, lmaStart - mr->curPos, mr->name, sec->name);
+ ctx->lmaOffset = lmaStart - dot;
+ } else if (!sameMemRegion || !prevLMARegionIsDefault) {
ctx->lmaOffset = 0;
+ }
// Propagate ctx->lmaOffset to the first "non-header" section.
if (PhdrEntry *l = ctx->outSec->ptLoad)
@@ -1008,11 +1025,16 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
// Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
// as they are not part of the process image.
- if (!(sec->flags & SHF_ALLOC))
+ if (!(sec->flags & SHF_ALLOC)) {
dot = savedDot;
+ } else if (isTbss) {
+ // NOBITS TLS sections are similar. Additionally save the end address.
+ ctx->tbssAddr = dot;
+ dot = savedDot;
+ }
}
-static bool isDiscardable(OutputSection &sec) {
+static bool isDiscardable(const OutputSection &sec) {
if (sec.name == "/DISCARD/")
return true;
@@ -1041,6 +1063,11 @@ static bool isDiscardable(OutputSection &sec) {
return true;
}
+bool LinkerScript::isDiscarded(const OutputSection *sec) const {
+ return hasSectionsCommand && (getFirstInputSection(sec) == nullptr) &&
+ isDiscardable(*sec);
+}
+
static void maybePropagatePhdrs(OutputSection &sec,
std::vector<StringRef> &phdrs) {
if (sec.phdrs.empty()) {
@@ -1128,6 +1155,7 @@ void LinkerScript::adjustSectionsBeforeSorting() {
void LinkerScript::adjustSectionsAfterSorting() {
// Try and find an appropriate memory region to assign offsets in.
+ MemoryRegion *hint = nullptr;
for (BaseCommand *base : sectionCommands) {
if (auto *sec = dyn_cast<OutputSection>(base)) {
if (!sec->lmaRegionName.empty()) {
@@ -1136,7 +1164,7 @@ void LinkerScript::adjustSectionsAfterSorting() {
else
error("memory region '" + sec->lmaRegionName + "' not declared");
}
- sec->memRegion = findMemoryRegion(sec);
+ std::tie(sec->memRegion, hint) = findMemoryRegion(sec, hint);
}
}
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index 0592c52acb84..b366da4f274e 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -247,11 +247,11 @@ class LinkerScript final {
// not be used outside of the scope of a call to the above functions.
struct AddressState {
AddressState();
- uint64_t threadBssOffset = 0;
OutputSection *outSec = nullptr;
MemoryRegion *memRegion = nullptr;
MemoryRegion *lmaRegion = nullptr;
uint64_t lmaOffset = 0;
+ uint64_t tbssAddr = 0;
};
llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
@@ -272,7 +272,8 @@ class LinkerScript final {
std::vector<size_t> getPhdrIndices(OutputSection *sec);
- MemoryRegion *findMemoryRegion(OutputSection *sec);
+ std::pair<MemoryRegion *, MemoryRegion *>
+ findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
void switchTo(OutputSection *sec);
uint64_t advance(uint64_t size, unsigned align);
@@ -317,6 +318,8 @@ public:
void processSymbolAssignments();
void declareSymbols();
+ bool isDiscarded(const OutputSection *sec) const;
+
// Used to handle INSERT AFTER statements.
void processInsertCommands();
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 239c6c394840..c4690ae5aefd 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -215,6 +215,25 @@ void elf::writeMapFile() {
}
}
+void elf::writeWhyExtract() {
+ if (config->whyExtract.empty())
+ return;
+
+ std::error_code ec;
+ raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None);
+ if (ec) {
+ error("cannot open --why-extract= file " + config->whyExtract + ": " +
+ ec.message());
+ return;
+ }
+
+ os << "reference\textracted\tsymbol\n";
+ for (auto &entry : whyExtract) {
+ os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t'
+ << toString(std::get<2>(entry)) << '\n';
+ }
+}
+
static void print(StringRef a, StringRef b) {
lld::outs() << left_justify(a, 49) << " " << b << "\n";
}
diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h
index c4da18f8ad7f..1b8c0168c0de 100644
--- a/lld/ELF/MapFile.h
+++ b/lld/ELF/MapFile.h
@@ -12,6 +12,7 @@
namespace lld {
namespace elf {
void writeMapFile();
+void writeWhyExtract();
void writeCrossReferenceTable();
void writeArchiveStats();
} // namespace elf
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index e828429b421c..11e0466b1157 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -252,13 +252,12 @@ template <class ELFT> void MarkLive<ELFT>::run() {
// referenced by .eh_frame sections, so we scan them for that here.
if (auto *eh = dyn_cast<EhInputSection>(sec)) {
eh->markLive();
- if (!eh->numRelocations)
- continue;
- if (eh->areRelocsRela)
- scanEhFrameSection(*eh, eh->template relas<ELFT>());
- else
- scanEhFrameSection(*eh, eh->template rels<ELFT>());
+ const RelsOrRelas<ELFT> rels = eh->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ scanEhFrameSection(*eh, rels.rels);
+ else if (rels.relas.size())
+ scanEhFrameSection(*eh, rels.relas);
}
if (sec->flags & SHF_GNU_RETAIN) {
@@ -288,13 +287,11 @@ template <class ELFT> void MarkLive<ELFT>::mark() {
while (!queue.empty()) {
InputSectionBase &sec = *queue.pop_back_val();
- if (sec.areRelocsRela) {
- for (const typename ELFT::Rela &rel : sec.template relas<ELFT>())
- resolveReloc(sec, rel, false);
- } else {
- for (const typename ELFT::Rel &rel : sec.template rels<ELFT>())
- resolveReloc(sec, rel, false);
- }
+ const RelsOrRelas<ELFT> rels = sec.template relsOrRelas<ELFT>();
+ for (const typename ELFT::Rel &rel : rels.rels)
+ resolveReloc(sec, rel, false);
+ for (const typename ELFT::Rela &rel : rels.relas)
+ resolveReloc(sec, rel, false);
for (InputSectionBase *isec : sec.dependentSections)
enqueue(isec, 0);
@@ -338,7 +335,7 @@ template <class ELFT> void MarkLive<ELFT>::moveToMain() {
// so that they are emitted to the output file.
template <class ELFT> void elf::markLive() {
llvm::TimeTraceScope timeScope("markLive");
- // If -gc-sections is not given, no sections are removed.
+ // If --gc-sections is not given, retain all input sections.
if (!config->gcSections) {
for (InputSectionBase *sec : inputSections)
sec->markLive();
@@ -353,7 +350,7 @@ template <class ELFT> void elf::markLive() {
// Otherwise, do mark-sweep GC.
//
- // The -gc-sections option works only for SHF_ALLOC sections (sections that
+ // The --gc-sections option works only for SHF_ALLOC sections (sections that
// are memory-mapped at runtime). So we can unconditionally make non-SHF_ALLOC
// sections alive except SHF_LINK_ORDER, SHT_REL/SHT_RELA sections, and
// sections in a group.
@@ -369,7 +366,7 @@ template <class ELFT> void elf::markLive() {
// We are able to garbage collect them.
//
// Note on SHF_REL{,A}: Such sections reach here only when -r
- // or -emit-reloc were given. And they are subject of garbage
+ // or --emit-reloc were given. And they are subject of garbage
// collection because, if we remove a text section, we also
// remove its relocation section.
//
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index bedcf43bbe85..ce82eb8d2754 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -43,6 +43,9 @@ def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind default visibility defined symbols
def Bsymbolic_functions: F<"Bsymbolic-functions">,
HelpText<"Bind default visibility defined function symbols locally for -shared">;
+def Bsymbolic_non_weak_functions: F<"Bsymbolic-non-weak-functions">,
+ HelpText<"Bind default visibility defined STB_GLOBAL function symbols locally for -shared">;
+
def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">;
def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">;
@@ -116,8 +119,8 @@ defm call_graph_profile_sort: BB<"call-graph-profile-sort",
"Reorder sections with call graph profile (default)",
"Do not reorder sections with call graph profile">;
-// -chroot doesn't have a help text because it is an internal option.
-def chroot: Separate<["--", "-"], "chroot">;
+// --chroot doesn't have a help text because it is an internal option.
+def chroot: Separate<["--"], "chroot">;
defm color_diagnostics: B<"color-diagnostics",
"Alias for --color-diagnostics=always",
@@ -153,10 +156,8 @@ def discard_none: F<"discard-none">,
defm dynamic_linker: Eq<"dynamic-linker", "Which dynamic linker to use">;
defm dynamic_list : Eq<"dynamic-list",
- "Read a list of dynamic symbols. (executable) Put matched non-local defined"
- "symbols to the dynamic symbol table. (shared object) References to matched"
- "non-local STV_DEFAULT symbols shouldn't be bound to definitions within the "
- "shared object. Implies -Bsymbolic but does not set DF_SYMBOLIC">,
+ "Similar to --export-dynamic-symbol-list. When creating a shared object, "
+ "this additionally implies -Bsymbolic but does not set DF_SYMBOLIC">,
MetaVarName<"<file>">;
def eb: F<"EB">, HelpText<"Select the big-endian format in OUTPUT_FORMAT">;
@@ -206,6 +207,10 @@ defm export_dynamic_symbol : EEq<"export-dynamic-symbol",
"Does not imply -Bsymbolic.">,
MetaVarName<"glob">;
+defm export_dynamic_symbol_list : EEq<"export-dynamic-symbol-list",
+ "Read a list of dynamic symbol patterns. Apply --export-dynamic-symbol on each pattern">,
+ MetaVarName<"file">;
+
defm fatal_warnings: B<"fatal-warnings",
"Treat warnings as errors",
"Do not treat warnings as errors (default)">;
@@ -245,13 +250,13 @@ def icf_safe: F<"icf=safe">, HelpText<"Enable safe identical code folding">;
def icf_none: F<"icf=none">, HelpText<"Disable identical code folding (default)">;
-def ignore_function_address_equality: F<"ignore-function-address-equality">,
+def ignore_function_address_equality: FF<"ignore-function-address-equality">,
HelpText<"lld can break the address equality of functions">;
-def ignore_data_address_equality: F<"ignore-data-address-equality">,
+def ignore_data_address_equality: FF<"ignore-data-address-equality">,
HelpText<"lld can break the address equality of data">;
-defm image_base: Eq<"image-base", "Set the base address">;
+defm image_base: EEq<"image-base", "Set the base address">;
defm init: Eq<"init", "Specify an initializer function">,
MetaVarName<"<symbol>">;
@@ -309,7 +314,7 @@ defm orphan_handling:
Eq<"orphan-handling", "Control how orphan sections are handled when linker script used">;
defm pack_dyn_relocs:
- Eq<"pack-dyn-relocs", "Pack dynamic relocations in the given format">,
+ EEq<"pack-dyn-relocs", "Pack dynamic relocations in the given format">,
MetaVarName<"[none,android,relr,android+relr]">;
defm use_android_relr_tags: BB<"use-android-relr-tags",
@@ -347,8 +352,12 @@ def push_state: F<"push-state">,
def print_map: F<"print-map">,
HelpText<"Print a link map to the standard output">;
+defm relax: BB<"relax",
+ "Enable target-specific relaxations if supported (default)",
+ "Disable target-specific relaxations">;
+
defm reproduce:
- Eq<"reproduce",
+ EEq<"reproduce",
"Write tar file containing inputs and command to reproduce link">;
defm rosegment: BB<"rosegment",
@@ -399,7 +408,7 @@ defm target2:
MetaVarName<"<type>">;
defm threads
- : Eq<"threads",
+ : EEq<"threads",
"Number of threads. '1' disables multi-threading. By default all "
"available hardware threads are used">;
@@ -487,6 +496,8 @@ defm whole_archive: B<"whole-archive",
"Force load of all members in a static library",
"Do not force load of all members in a static library (default)">;
+def why_extract: JJ<"why-extract=">, HelpText<"Print to a file about why archive members are extracted">;
+
defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
"__real_symbol references to symbol">,
MetaVarName<"<symbol>">;
@@ -494,7 +505,7 @@ defm wrap : Eq<"wrap", "Redirect symbol references to __wrap_symbol and "
def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
HelpText<"Linker option extensions">;
-def visual_studio_diagnostics_format : F<"vs-diagnostics">,
+def visual_studio_diagnostics_format : FF<"vs-diagnostics">,
HelpText<"Format diagnostics for Visual Studio compatibility">;
// Aliases
@@ -517,7 +528,6 @@ def: Separate<["-"], "F">, Alias<filter>, HelpText<"Alias for --filter">;
def: Separate<["-"], "b">, Alias<format>, HelpText<"Alias for --format">;
def: JoinedOrSeparate<["-"], "l">, Alias<library>, HelpText<"Alias for --library">;
def: JoinedOrSeparate<["-"], "L">, Alias<library_path>, HelpText<"Alias for --library-path">;
-def: F<"no-pic-executable">, Alias<no_pie>, HelpText<"Alias for --no-pie">;
def: Flag<["-"], "n">, Alias<nmagic>, HelpText<"Alias for --nmagic">;
def: Flag<["-"], "N">, Alias<omagic>, HelpText<"Alias for --omagic">;
def: Joined<["--"], "output=">, Alias<o>, HelpText<"Alias for -o">;
@@ -558,14 +568,15 @@ def lto_cs_profile_generate: FF<"lto-cs-profile-generate">,
HelpText<"Perform context sensitive PGO instrumentation">;
def lto_cs_profile_file: JJ<"lto-cs-profile-file=">,
HelpText<"Context sensitive profile file path">;
+defm lto_pgo_warn_mismatch: BB<"lto-pgo-warn-mismatch",
+ "turn on warnings about profile cfg mismatch (default)>",
+ "turn off warnings about profile cfg mismatch">;
def lto_obj_path_eq: JJ<"lto-obj-path=">;
def lto_sample_profile: JJ<"lto-sample-profile=">,
HelpText<"Sample profile file path">;
defm lto_whole_program_visibility: BB<"lto-whole-program-visibility",
"Asserts that the LTO link has whole program visibility",
"Asserts that the LTO link does not have whole program visibility">;
-def lto_pseudo_probe_for_profiling: F<"lto-pseudo-probe-for-profiling">,
- HelpText<"Emit pseudo probes for sample profiling">;
def disable_verify: F<"disable-verify">;
defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">;
def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">,
@@ -641,8 +652,6 @@ def: F<"plugin-opt=opt-remarks-with-hotness">,
def: J<"plugin-opt=opt-remarks-hotness-threshold=">,
Alias<opt_remarks_hotness_threshold>,
HelpText<"Alias for --opt-remarks-hotness-threshold">;
-def: J<"plugin-opt=pseudo-probe-for-profiling">,
- Alias<lto_pseudo_probe_for_profiling>, HelpText<"Alias for --lto-pseudo-probe-for-profiling">;
def: J<"plugin-opt=sample-profile=">,
Alias<lto_sample_profile>, HelpText<"Alias for --lto-sample-profile">;
def: F<"plugin-opt=save-temps">, Alias<save_temps>, HelpText<"Alias for --save-temps">;
@@ -687,14 +696,10 @@ def plugin_opt_eq : J<"plugin-opt=">;
def: F<"detect-odr-violations">;
def: Flag<["-"], "g">;
def: F<"long-plt">;
-def: F<"no-add-needed">;
def: F<"no-copy-dt-needed-entries">;
def: F<"no-ctors-in-init-array">;
def: F<"no-keep-memory">;
-def: F<"no-pipeline-knowledge">;
-def: F<"no-relax">;
def: F<"no-warn-mismatch">;
-def: Flag<["-"], "p">;
def: Separate<["--", "-"], "rpath-link">;
def: J<"rpath-link=">;
def: F<"secure-plt">;
@@ -704,7 +709,6 @@ def: F<"warn-execstack">;
def: F<"warn-once">;
def: F<"warn-shared-textrel">;
def: JoinedOrSeparate<["-"], "G">;
-def: F<"Qy">;
// Hidden option used for testing MIPS multi-GOT implementation.
defm mips_got_size:
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 088d1cdc65e4..cc4f0688701a 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -328,7 +328,7 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
if (type == SHT_NOBITS)
return;
- // If -compress-debug-section is specified and if this is a debug section,
+ // If --compress-debug-section is specified and if this is a debug section,
// we've already compressed section contents. If that's the case,
// just write it down.
if (!compressedData.empty()) {
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index e3cc210972b2..9c22ce7d6013 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -101,8 +101,11 @@ void elf::reportRangeError(uint8_t *loc, const Relocation &rel, const Twine &v,
ErrorPlace errPlace = getErrorPlace(loc);
std::string hint;
if (rel.sym && !rel.sym->isLocal())
- hint = "; references " + lld::toString(*rel.sym) +
- getDefinedLocation(*rel.sym);
+ hint = "; references " + lld::toString(*rel.sym);
+ if (!errPlace.srcLoc.empty())
+ hint += "\n>>> referenced by " + errPlace.srcLoc;
+ if (rel.sym && !rel.sym->isLocal())
+ hint += getDefinedLocation(*rel.sym);
if (errPlace.isec && errPlace.isec->name.startswith(".debug"))
hint += "; consider recompiling with -fdebug-types-section to reduce size "
@@ -124,213 +127,27 @@ void elf::reportRangeError(uint8_t *loc, int64_t v, int n, const Symbol &sym,
Twine(llvm::maxIntN(n)) + "]" + hint);
}
-namespace {
-// Build a bitmask with one bit set for each RelExpr.
-//
-// Constexpr function arguments can't be used in static asserts, so we
-// use template arguments to build the mask.
-// But function template partial specializations don't exist (needed
-// for base case of the recursion), so we need a dummy struct.
-template <RelExpr... Exprs> struct RelExprMaskBuilder {
- static inline uint64_t build() { return 0; }
-};
-
-// Specialization for recursive case.
-template <RelExpr Head, RelExpr... Tail>
-struct RelExprMaskBuilder<Head, Tail...> {
- static inline uint64_t build() {
- static_assert(0 <= Head && Head < 64,
- "RelExpr is too large for 64-bit mask!");
- return (uint64_t(1) << Head) | RelExprMaskBuilder<Tail...>::build();
- }
-};
-} // namespace
+// Build a bitmask with one bit set for each 64 subset of RelExpr.
+static constexpr uint64_t buildMask() { return 0; }
-// Return true if `Expr` is one of `Exprs`.
-// There are fewer than 64 RelExpr's, so we can represent any set of
-// RelExpr's as a constant bit mask and test for membership with a
-// couple cheap bitwise operations.
-template <RelExpr... Exprs> bool oneof(RelExpr expr) {
- assert(0 <= expr && (int)expr < 64 &&
- "RelExpr is too large for 64-bit mask!");
- return (uint64_t(1) << expr) & RelExprMaskBuilder<Exprs...>::build();
+template <typename... Tails>
+static constexpr uint64_t buildMask(int head, Tails... tails) {
+ return (0 <= head && head < 64 ? uint64_t(1) << head : 0) |
+ buildMask(tails...);
}
-// This function is similar to the `handleTlsRelocation`. MIPS does not
-// support any relaxations for TLS relocations so by factoring out MIPS
-// handling in to the separate function we can simplify the code and do not
-// pollute other `handleTlsRelocation` by MIPS `ifs` statements.
-// Mips has a custom MipsGotSection that handles the writing of GOT entries
-// without dynamic relocations.
-static unsigned handleMipsTlsRelocation(RelType type, Symbol &sym,
- InputSectionBase &c, uint64_t offset,
- int64_t addend, RelExpr expr) {
- if (expr == R_MIPS_TLSLD) {
- in.mipsGot->addTlsIndex(*c.file);
- c.relocations.push_back({expr, type, offset, addend, &sym});
- return 1;
- }
- if (expr == R_MIPS_TLSGD) {
- in.mipsGot->addDynTlsEntry(*c.file, sym);
- c.relocations.push_back({expr, type, offset, addend, &sym});
- return 1;
- }
- return 0;
-}
-
-// Notes about General Dynamic and Local Dynamic TLS models below. They may
-// require the generation of a pair of GOT entries that have associated dynamic
-// relocations. The pair of GOT entries created are of the form GOT[e0] Module
-// Index (Used to find pointer to TLS block at run-time) GOT[e1] Offset of
-// symbol in TLS block.
-//
-// Returns the number of relocations processed.
-template <class ELFT>
-static unsigned
-handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c,
- typename ELFT::uint offset, int64_t addend, RelExpr expr) {
- if (!sym.isTls())
- return 0;
-
- if (config->emachine == EM_MIPS)
- return handleMipsTlsRelocation(type, sym, c, offset, addend, expr);
-
- if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC>(
- expr) &&
- config->shared) {
- if (in.got->addDynTlsEntry(sym)) {
- uint64_t off = in.got->getGlobalDynOffset(sym);
- mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
- target->tlsDescRel, in.got, off, sym, target->tlsDescRel);
- }
- if (expr != R_TLSDESC_CALL)
- c.relocations.push_back({expr, type, offset, addend, &sym});
- return 1;
- }
-
- // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For
- // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
- // relaxation as well.
- bool toExecRelax = !config->shared && config->emachine != EM_ARM &&
- config->emachine != EM_HEXAGON &&
- config->emachine != EM_RISCV &&
- !c.file->ppc64DisableTLSRelax;
-
- // If we are producing an executable and the symbol is non-preemptable, it
- // must be defined and the code sequence can be relaxed to use Local-Exec.
- //
- // ARM and RISC-V do not support any relaxations for TLS relocations, however,
- // we can omit the DTPMOD dynamic relocations and resolve them at link time
- // because them are always 1. This may be necessary for static linking as
- // DTPMOD may not be expected at load time.
- bool isLocalInExecutable = !sym.isPreemptible && !config->shared;
-
- // Local Dynamic is for access to module local TLS variables, while still
- // being suitable for being dynamically loaded via dlopen. GOT[e0] is the
- // module index, with a special value of 0 for the current module. GOT[e1] is
- // unused. There only needs to be one module index entry.
- if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(
- expr)) {
- // Local-Dynamic relocs can be relaxed to Local-Exec.
- if (toExecRelax) {
- c.relocations.push_back(
- {target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type, offset,
- addend, &sym});
- return target->getTlsGdRelaxSkip(type);
- }
- if (expr == R_TLSLD_HINT)
- return 1;
- if (in.got->addTlsIndex()) {
- if (isLocalInExecutable)
- in.got->relocations.push_back(
- {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym});
- else
- mainPart->relaDyn->addReloc(
- {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()});
- }
- c.relocations.push_back({expr, type, offset, addend, &sym});
- return 1;
- }
-
- // Local-Dynamic relocs can be relaxed to Local-Exec.
- if (expr == R_DTPREL && toExecRelax) {
- c.relocations.push_back({target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE),
- type, offset, addend, &sym});
- return 1;
- }
-
- // Local-Dynamic sequence where offset of tls variable relative to dynamic
- // thread pointer is stored in the got. This cannot be relaxed to Local-Exec.
- if (expr == R_TLSLD_GOT_OFF) {
- if (!sym.isInGot()) {
- in.got->addEntry(sym);
- uint64_t off = sym.getGotOffset();
- in.got->relocations.push_back(
- {R_ABS, target->tlsOffsetRel, off, 0, &sym});
- }
- c.relocations.push_back({expr, type, offset, addend, &sym});
- return 1;
- }
-
- if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
- R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC>(expr)) {
- if (!toExecRelax) {
- if (in.got->addDynTlsEntry(sym)) {
- uint64_t off = in.got->getGlobalDynOffset(sym);
-
- if (isLocalInExecutable)
- // Write one to the GOT slot.
- in.got->relocations.push_back(
- {R_ADDEND, target->symbolicRel, off, 1, &sym});
- else
- mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, in.got,
- off, sym);
-
- // If the symbol is preemptible we need the dynamic linker to write
- // the offset too.
- uint64_t offsetOff = off + config->wordsize;
- if (sym.isPreemptible)
- mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, in.got,
- offsetOff, sym);
- else
- in.got->relocations.push_back(
- {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym});
- }
- c.relocations.push_back({expr, type, offset, addend, &sym});
- return 1;
- }
-
- // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
- // depending on the symbol being locally defined or not.
- if (sym.isPreemptible) {
- c.relocations.push_back(
- {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset,
- addend, &sym});
- if (!sym.isInGot()) {
- in.got->addEntry(sym);
- mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, in.got,
- sym.getGotOffset(), sym);
- }
- } else {
- c.relocations.push_back(
- {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_LE), type, offset,
- addend, &sym});
- }
- return target->getTlsGdRelaxSkip(type);
- }
-
- // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
- // defined.
- if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_OFF,
- R_TLSIE_HINT>(expr) &&
- toExecRelax && isLocalInExecutable) {
- c.relocations.push_back({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
- return 1;
- }
-
- if (expr == R_TLSIE_HINT)
- return 1;
- return 0;
+// Return true if `Expr` is one of `Exprs`.
+// There are more than 64 but less than 128 RelExprs, so we divide the set of
+// exprs into [0, 64) and [64, 128) and represent each range as a constant
+// 64-bit mask. Then we decide which mask to test depending on the value of
+// expr and use a simple shift and bitwise-and to test for membership.
+template <RelExpr... Exprs> static bool oneof(RelExpr expr) {
+ assert(0 <= expr && (int)expr < 128 &&
+ "RelExpr is too large for 128-bit mask!");
+
+ if (expr >= 64)
+ return (uint64_t(1) << (expr - 64)) & buildMask((Exprs - 64)...);
+ return (uint64_t(1) << expr) & buildMask(Exprs...);
}
static RelType getMipsPairType(RelType type, bool isLocal) {
@@ -374,7 +191,8 @@ static bool isAbsoluteValue(const Symbol &sym) {
// Returns true if Expr refers a PLT entry.
static bool needsPlt(RelExpr expr) {
- return oneof<R_PLT_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PLT>(expr);
+ return oneof<R_PLT, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT>(
+ expr);
}
// Returns true if Expr refers a GOT entry. Note that this function
@@ -394,73 +212,6 @@ static bool isRelExpr(RelExpr expr) {
R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr);
}
-// Returns true if a given relocation can be computed at link-time.
-//
-// For instance, we know the offset from a relocation to its target at
-// link-time if the relocation is PC-relative and refers a
-// non-interposable function in the same executable. This function
-// will return true for such relocation.
-//
-// If this function returns false, that means we need to emit a
-// dynamic relocation so that the relocation will be fixed at load-time.
-static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym,
- InputSectionBase &s, uint64_t relOff) {
- // These expressions always compute a constant
- if (oneof<R_DTPREL, R_GOTPLT, R_GOT_OFF, R_TLSLD_GOT_OFF,
- R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL, R_MIPS_GOT_OFF,
- R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD,
- R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
- R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC32_PLTREL,
- R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_TLSDESC_CALL,
- R_TLSDESC_PC, R_AARCH64_TLSDESC_PAGE, R_TLSLD_HINT, R_TLSIE_HINT,
- R_AARCH64_GOT_PAGE>(
- e))
- return true;
-
- // These never do, except if the entire file is position dependent or if
- // only the low bits are used.
- if (e == R_GOT || e == R_PLT || e == R_TLSDESC)
- return target->usesOnlyLowPageBits(type) || !config->isPic;
-
- if (sym.isPreemptible)
- return false;
- if (!config->isPic)
- return true;
-
- // The size of a non preemptible symbol is a constant.
- if (e == R_SIZE)
- return true;
-
- // For the target and the relocation, we want to know if they are
- // absolute or relative.
- bool absVal = isAbsoluteValue(sym);
- bool relE = isRelExpr(e);
- if (absVal && !relE)
- return true;
- if (!absVal && relE)
- return true;
- if (!absVal && !relE)
- return target->usesOnlyLowPageBits(type);
-
- assert(absVal && relE);
-
- // Allow R_PLT_PC (optimized to R_PC here) to a hidden undefined weak symbol
- // in PIC mode. This is a little strange, but it allows us to link function
- // calls to such symbols (e.g. glibc/stdlib/exit.c:__run_exit_handlers).
- // Normally such a call will be guarded with a comparison, which will load a
- // zero from the GOT.
- if (sym.isUndefWeak())
- return true;
-
- // We set the final symbols values for linker script defined symbols later.
- // They always can be computed as a link time constant.
- if (sym.scriptDefined)
- return true;
-
- error("relocation " + toString(type) + " cannot refer to absolute symbol: " +
- toString(sym) + getLocation(s, sym, relOff));
- return true;
-}
static RelExpr toPlt(RelExpr expr) {
switch (expr) {
@@ -486,6 +237,8 @@ static RelExpr fromPlt(RelExpr expr) {
return R_PPC64_CALL;
case R_PLT:
return R_ABS;
+ case R_PLT_GOTPLT:
+ return R_GOTPLTREL;
default:
return expr;
}
@@ -527,6 +280,13 @@ static SmallSet<SharedSymbol *, 4> getSymbolsAt(SharedSymbol &ss) {
if (auto *alias = dyn_cast_or_null<SharedSymbol>(sym))
ret.insert(alias);
}
+
+ // The loop does not check SHT_GNU_verneed, so ret does not contain
+ // non-default version symbols. If ss has a non-default version, ret won't
+ // contain ss. Just add ss unconditionally. If a non-default version alias is
+ // separately copy relocated, it and ss will have different addresses.
+ // Fortunately this case is impractical and fails with GNU ld as well.
+ ret.insert(&ss);
return ret;
}
@@ -1080,33 +840,32 @@ static void addPltEntry(PltSection *plt, GotPltSection *gotPlt,
static void addGotEntry(Symbol &sym) {
in.got->addEntry(sym);
-
- RelExpr expr = sym.isTls() ? R_TPREL : R_ABS;
uint64_t off = sym.getGotOffset();
- // If a GOT slot value can be calculated at link-time, which is now,
- // we can just fill that out.
- //
- // (We don't actually write a value to a GOT slot right now, but we
- // add a static relocation to a Relocations vector so that
- // InputSection::relocate will do the work for us. We may be able
- // to just write a value now, but it is a TODO.)
- bool isLinkTimeConstant =
- !sym.isPreemptible && (!config->isPic || isAbsolute(sym));
- if (isLinkTimeConstant) {
- in.got->relocations.push_back({expr, target->symbolicRel, off, 0, &sym});
+ // If preemptible, emit a GLOB_DAT relocation.
+ if (sym.isPreemptible) {
+ mainPart->relaDyn->addReloc({target->gotRel, in.got, off,
+ DynamicReloc::AgainstSymbol, sym, 0, R_ABS});
return;
}
- // Otherwise, we emit a dynamic relocation to .rel[a].dyn so that
- // the GOT slot will be fixed at load-time.
- if (!sym.isTls() && !sym.isPreemptible && config->isPic) {
+ // Otherwise, the value is either a link-time constant or the load base
+ // plus a constant.
+ if (!config->isPic || isAbsolute(sym))
+ in.got->relocations.push_back({R_ABS, target->symbolicRel, off, 0, &sym});
+ else
addRelativeReloc(in.got, off, sym, 0, R_ABS, target->symbolicRel);
+}
+
+static void addTpOffsetGotEntry(Symbol &sym) {
+ in.got->addEntry(sym);
+ uint64_t off = sym.getGotOffset();
+ if (!sym.isPreemptible && !config->isPic) {
+ in.got->relocations.push_back({R_TPREL, target->symbolicRel, off, 0, &sym});
return;
}
mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
- sym.isTls() ? target->tlsGotRel : target->gotRel, in.got, off, sym,
- target->symbolicRel);
+ target->tlsGotRel, in.got, off, sym, target->symbolicRel);
}
// Return true if we can define a symbol in the executable that
@@ -1129,6 +888,71 @@ static bool canDefineSymbolInExecutable(Symbol &sym) {
(sym.isObject() && config->ignoreDataAddressEquality));
}
+// Returns true if a given relocation can be computed at link-time.
+// This only handles relocation types expected in processRelocAux.
+//
+// For instance, we know the offset from a relocation to its target at
+// link-time if the relocation is PC-relative and refers a
+// non-interposable function in the same executable. This function
+// will return true for such relocation.
+//
+// If this function returns false, that means we need to emit a
+// dynamic relocation so that the relocation will be fixed at load-time.
+static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym,
+ InputSectionBase &s, uint64_t relOff) {
+ // These expressions always compute a constant
+ if (oneof<R_GOTPLT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL,
+ R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC,
+ R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
+ R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT,
+ R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e))
+ return true;
+
+ // These never do, except if the entire file is position dependent or if
+ // only the low bits are used.
+ if (e == R_GOT || e == R_PLT)
+ return target->usesOnlyLowPageBits(type) || !config->isPic;
+
+ if (sym.isPreemptible)
+ return false;
+ if (!config->isPic)
+ return true;
+
+ // The size of a non preemptible symbol is a constant.
+ if (e == R_SIZE)
+ return true;
+
+ // For the target and the relocation, we want to know if they are
+ // absolute or relative.
+ bool absVal = isAbsoluteValue(sym);
+ bool relE = isRelExpr(e);
+ if (absVal && !relE)
+ return true;
+ if (!absVal && relE)
+ return true;
+ if (!absVal && !relE)
+ return target->usesOnlyLowPageBits(type);
+
+ assert(absVal && relE);
+
+ // Allow R_PLT_PC (optimized to R_PC here) to a hidden undefined weak symbol
+ // in PIC mode. This is a little strange, but it allows us to link function
+ // calls to such symbols (e.g. glibc/stdlib/exit.c:__run_exit_handlers).
+ // Normally such a call will be guarded with a comparison, which will load a
+ // zero from the GOT.
+ if (sym.isUndefWeak())
+ return true;
+
+ // We set the final symbols values for linker script defined symbols later.
+ // They always can be computed as a link time constant.
+ if (sym.scriptDefined)
+ return true;
+
+ error("relocation " + toString(type) + " cannot refer to absolute symbol: " +
+ toString(sym) + getLocation(s, sym, relOff));
+ return true;
+}
+
// The reason we have to do this early scan is as follows
// * To mmap the output file, we need to know the size
// * For that, we need to know how many dynamic relocs we will have.
@@ -1142,10 +966,9 @@ static bool canDefineSymbolInExecutable(Symbol &sym) {
// sections. Given that it is ro, we will need an extra PT_LOAD. This
// complicates things for the dynamic linker and means we would have to reserve
// space for the extra PT_LOAD even if we end up not using it.
-template <class ELFT, class RelTy>
+template <class ELFT>
static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type,
- uint64_t offset, Symbol &sym, const RelTy &rel,
- int64_t addend) {
+ uint64_t offset, Symbol &sym, int64_t addend) {
// If the relocation is known to be a link-time constant, we know no dynamic
// relocation will be created, pass the control to relocateAlloc() or
// relocateNonAlloc() to resolve it.
@@ -1273,25 +1096,196 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type,
}
}
- if (config->isPic) {
- if (!canWrite && !isRelExpr(expr))
- errorOrWarn(
- "can't create dynamic relocation " + toString(type) + " against " +
- (sym.getName().empty() ? "local symbol"
- : "symbol: " + toString(sym)) +
- " in readonly segment; recompile object files with -fPIC "
- "or pass '-Wl,-z,notext' to allow text relocations in the output" +
- getLocation(sec, sym, offset));
- else
- errorOrWarn(
- "relocation " + toString(type) + " cannot be used against " +
- (sym.getName().empty() ? "local symbol" : "symbol " + toString(sym)) +
- "; recompile with -fPIC" + getLocation(sec, sym, offset));
- return;
+ errorOrWarn("relocation " + toString(type) + " cannot be used against " +
+ (sym.getName().empty() ? "local symbol"
+ : "symbol '" + toString(sym) + "'") +
+ "; recompile with -fPIC" + getLocation(sec, sym, offset));
+}
+
+// This function is similar to the `handleTlsRelocation`. MIPS does not
+// support any relaxations for TLS relocations so by factoring out MIPS
+// handling in to the separate function we can simplify the code and do not
+// pollute other `handleTlsRelocation` by MIPS `ifs` statements.
+// Mips has a custom MipsGotSection that handles the writing of GOT entries
+// without dynamic relocations.
+static unsigned handleMipsTlsRelocation(RelType type, Symbol &sym,
+ InputSectionBase &c, uint64_t offset,
+ int64_t addend, RelExpr expr) {
+ if (expr == R_MIPS_TLSLD) {
+ in.mipsGot->addTlsIndex(*c.file);
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
+ }
+ if (expr == R_MIPS_TLSGD) {
+ in.mipsGot->addDynTlsEntry(*c.file, sym);
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
+ }
+ return 0;
+}
+
+// Notes about General Dynamic and Local Dynamic TLS models below. They may
+// require the generation of a pair of GOT entries that have associated dynamic
+// relocations. The pair of GOT entries created are of the form GOT[e0] Module
+// Index (Used to find pointer to TLS block at run-time) GOT[e1] Offset of
+// symbol in TLS block.
+//
+// Returns the number of relocations processed.
+template <class ELFT>
+static unsigned
+handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c,
+ typename ELFT::uint offset, int64_t addend, RelExpr expr) {
+ if (!sym.isTls())
+ return 0;
+
+ if (config->emachine == EM_MIPS)
+ return handleMipsTlsRelocation(type, sym, c, offset, addend, expr);
+
+ if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
+ R_TLSDESC_GOTPLT>(expr) &&
+ config->shared) {
+ if (in.got->addDynTlsEntry(sym)) {
+ uint64_t off = in.got->getGlobalDynOffset(sym);
+ mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
+ target->tlsDescRel, in.got, off, sym, target->tlsDescRel);
+ }
+ if (expr != R_TLSDESC_CALL)
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
}
- errorOrWarn("symbol '" + toString(sym) + "' has no type" +
- getLocation(sec, sym, offset));
+ // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For
+ // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
+ // relaxation as well.
+ bool toExecRelax = !config->shared && config->emachine != EM_ARM &&
+ config->emachine != EM_HEXAGON &&
+ config->emachine != EM_RISCV &&
+ !c.file->ppc64DisableTLSRelax;
+
+ // If we are producing an executable and the symbol is non-preemptable, it
+ // must be defined and the code sequence can be relaxed to use Local-Exec.
+ //
+ // ARM and RISC-V do not support any relaxations for TLS relocations, however,
+ // we can omit the DTPMOD dynamic relocations and resolve them at link time
+ // because them are always 1. This may be necessary for static linking as
+ // DTPMOD may not be expected at load time.
+ bool isLocalInExecutable = !sym.isPreemptible && !config->shared;
+
+ // Local Dynamic is for access to module local TLS variables, while still
+ // being suitable for being dynamically loaded via dlopen. GOT[e0] is the
+ // module index, with a special value of 0 for the current module. GOT[e1] is
+ // unused. There only needs to be one module index entry.
+ if (oneof<R_TLSLD_GOT, R_TLSLD_GOTPLT, R_TLSLD_PC, R_TLSLD_HINT>(
+ expr)) {
+ // Local-Dynamic relocs can be relaxed to Local-Exec.
+ if (toExecRelax) {
+ c.relocations.push_back(
+ {target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), type, offset,
+ addend, &sym});
+ return target->getTlsGdRelaxSkip(type);
+ }
+ if (expr == R_TLSLD_HINT)
+ return 1;
+ if (in.got->addTlsIndex()) {
+ if (isLocalInExecutable)
+ in.got->relocations.push_back(
+ {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym});
+ else
+ mainPart->relaDyn->addReloc(
+ {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()});
+ }
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
+ }
+
+ // Local-Dynamic relocs can be relaxed to Local-Exec.
+ if (expr == R_DTPREL) {
+ if (toExecRelax)
+ expr = target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE);
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
+ }
+
+ // Local-Dynamic sequence where offset of tls variable relative to dynamic
+ // thread pointer is stored in the got. This cannot be relaxed to Local-Exec.
+ if (expr == R_TLSLD_GOT_OFF) {
+ if (!sym.isInGot()) {
+ in.got->addEntry(sym);
+ uint64_t off = sym.getGotOffset();
+ in.got->relocations.push_back(
+ {R_ABS, target->tlsOffsetRel, off, 0, &sym});
+ }
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
+ }
+
+ if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
+ R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC>(expr)) {
+ if (!toExecRelax) {
+ if (in.got->addDynTlsEntry(sym)) {
+ uint64_t off = in.got->getGlobalDynOffset(sym);
+
+ if (isLocalInExecutable)
+ // Write one to the GOT slot.
+ in.got->relocations.push_back(
+ {R_ADDEND, target->symbolicRel, off, 1, &sym});
+ else
+ mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, in.got,
+ off, sym);
+
+ // If the symbol is preemptible we need the dynamic linker to write
+ // the offset too.
+ uint64_t offsetOff = off + config->wordsize;
+ if (sym.isPreemptible)
+ mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, in.got,
+ offsetOff, sym);
+ else
+ in.got->relocations.push_back(
+ {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym});
+ }
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ return 1;
+ }
+
+ // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec
+ // depending on the symbol being locally defined or not.
+ if (sym.isPreemptible) {
+ c.relocations.push_back(
+ {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset,
+ addend, &sym});
+ if (!sym.isInGot()) {
+ in.got->addEntry(sym);
+ mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, in.got,
+ sym.getGotOffset(), sym);
+ }
+ } else {
+ c.relocations.push_back(
+ {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_LE), type, offset,
+ addend, &sym});
+ }
+ return target->getTlsGdRelaxSkip(type);
+ }
+
+ if (oneof<R_GOT, R_GOTPLT, R_GOT_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_OFF,
+ R_TLSIE_HINT>(expr)) {
+ // Initial-Exec relocs can be relaxed to Local-Exec if the symbol is locally
+ // defined.
+ if (toExecRelax && isLocalInExecutable) {
+ c.relocations.push_back(
+ {R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
+ } else if (expr != R_TLSIE_HINT) {
+ if (!sym.isInGot())
+ addTpOffsetGotEntry(sym);
+ // R_GOT needs a relative relocation for PIC on i386 and Hexagon.
+ if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type))
+ addRelativeReloc(&c, offset, sym, addend, expr, type);
+ else
+ c.relocations.push_back({expr, type, offset, addend, &sym});
+ }
+ return 1;
+ }
+
+ return 0;
}
template <class ELFT, class RelTy>
@@ -1339,7 +1333,7 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
// have got-based small code model relocs. The .toc sections get placed
// after the end of the linker allocated .got section and we do sort those
// so sections addressed with small code model relocations come first.
- if (isPPC64SmallCodeModelTocReloc(type))
+ if (type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS)
sec.file->ppc64SmallCodeModelTocRelocs = true;
// Record the TOC entry (.toc + addend) as not relaxable. See the comment in
@@ -1393,8 +1387,9 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
// If the relocation does not emit a GOT or GOTPLT entry but its computation
// uses their addresses, we need GOT or GOTPLT to be created.
//
- // The 4 types that relative GOTPLT are all x86 and x86-64 specific.
- if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
+ // The 5 types that relative GOTPLT are all x86 and x86-64 specific.
+ if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_PLT_GOTPLT,
+ R_TLSDESC_GOTPLT, R_TLSGD_GOTPLT>(expr)) {
in.gotPlt->hasGotPltOffRel = true;
} else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC64_TOCBASE, R_PPC64_RELAX_TOC>(
expr)) {
@@ -1536,7 +1531,7 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
}
}
- processRelocAux<ELFT>(sec, expr, type, offset, sym, rel, addend);
+ processRelocAux<ELFT>(sec, expr, type, offset, sym, addend);
}
// R_PPC64_TLSGD/R_PPC64_TLSLD is required to mark `bl __tls_get_addr` for
@@ -1607,10 +1602,11 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef<RelTy> rels) {
}
template <class ELFT> void elf::scanRelocations(InputSectionBase &s) {
- if (s.areRelocsRela)
- scanRelocs<ELFT>(s, s.relas<ELFT>());
+ const RelsOrRelas<ELFT> rels = s.template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ scanRelocs<ELFT>(s, rels.rels);
else
- scanRelocs<ELFT>(s, s.rels<ELFT>());
+ scanRelocs<ELFT>(s, rels.relas);
}
static bool mergeCmp(const InputSection *a, const InputSection *b) {
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index a702aac183a9..86e6cf4bc1f5 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -45,6 +45,7 @@ enum RelExpr {
R_PC,
R_PLT,
R_PLT_PC,
+ R_PLT_GOTPLT,
R_RELAX_GOT_PC,
R_RELAX_GOT_PC_NOPIC,
R_RELAX_TLS_GD_TO_IE,
@@ -62,6 +63,7 @@ enum RelExpr {
R_TLSDESC,
R_TLSDESC_CALL,
R_TLSDESC_PC,
+ R_TLSDESC_GOTPLT,
R_TLSGD_GOT,
R_TLSGD_GOTPLT,
R_TLSGD_PC,
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 2c980eb810c7..ad3b3e61ad59 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -131,7 +131,7 @@ private:
std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>>
readSymbols();
- // True if a script being read is in a subdirectory specified by -sysroot.
+ // True if a script being read is in the --sysroot directory.
bool isUnderSysroot = false;
// A set to detect an INCLUDE() cycle.
@@ -1429,8 +1429,9 @@ Expr ScriptParser::readPrimary() {
return [=] { return *val; };
// Tok is a symbol name.
- tok = unquote(tok);
- if (!isValidSymbolName(tok))
+ if (tok.startswith("\""))
+ tok = unquote(tok);
+ else if (!isValidSymbolName(tok))
setError("malformed number: " + tok);
script->referencedSymbols.push_back(tok);
return [=] { return script->getSymbolValue(tok, location); };
@@ -1496,9 +1497,9 @@ void ScriptParser::readAnonymousDeclaration() {
std::vector<SymbolVersion> globals;
std::tie(locals, globals) = readSymbols();
for (const SymbolVersion &pat : locals)
- config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
+ config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat);
for (const SymbolVersion &pat : globals)
- config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(pat);
+ config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat);
expect(";");
}
@@ -1510,13 +1511,12 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) {
std::vector<SymbolVersion> locals;
std::vector<SymbolVersion> globals;
std::tie(locals, globals) = readSymbols();
- for (const SymbolVersion &pat : locals)
- config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
// Create a new version definition and add that to the global symbols.
VersionDefinition ver;
ver.name = verStr;
- ver.patterns = globals;
+ ver.nonLocalPatterns = std::move(globals);
+ ver.localPatterns = std::move(locals);
ver.id = config->versionDefinitions.size();
config->versionDefinitions.push_back(ver);
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 70aea288c53f..c309957ee5ba 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -32,7 +32,7 @@ using namespace lld::elf;
SymbolTable *elf::symtab;
void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
- // Swap symbols as instructed by -wrap.
+ // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
int &idx1 = symMap[CachedHashStringRef(sym->getName())];
int &idx2 = symMap[CachedHashStringRef(real->getName())];
int &idx3 = symMap[CachedHashStringRef(wrap->getName())];
@@ -134,9 +134,20 @@ static bool canBeVersioned(const Symbol &sym) {
StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() {
if (!demangledSyms) {
demangledSyms.emplace();
+ std::string demangled;
for (Symbol *sym : symVector)
- if (canBeVersioned(*sym))
- (*demangledSyms)[demangleItanium(sym->getName())].push_back(sym);
+ if (canBeVersioned(*sym)) {
+ StringRef name = sym->getName();
+ size_t pos = name.find('@');
+ if (pos == std::string::npos)
+ demangled = demangleItanium(name);
+ else if (pos + 1 == name.size() || name[pos + 1] == '@')
+ demangled = demangleItanium(name.substr(0, pos));
+ else
+ demangled =
+ (demangleItanium(name.substr(0, pos)) + name.substr(pos)).str();
+ (*demangledSyms)[demangled].push_back(sym);
+ }
}
return *demangledSyms;
}
@@ -150,29 +161,38 @@ std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) {
return {};
}
-std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) {
+std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver,
+ bool includeNonDefault) {
std::vector<Symbol *> res;
SingleStringMatcher m(ver.name);
+ auto check = [&](StringRef name) {
+ size_t pos = name.find('@');
+ if (!includeNonDefault)
+ return pos == StringRef::npos;
+ return !(pos + 1 < name.size() && name[pos + 1] == '@');
+ };
if (ver.isExternCpp) {
for (auto &p : getDemangledSyms())
if (m.match(p.first()))
- res.insert(res.end(), p.second.begin(), p.second.end());
+ for (Symbol *sym : p.second)
+ if (check(sym->getName()))
+ res.push_back(sym);
return res;
}
for (Symbol *sym : symVector)
- if (canBeVersioned(*sym) && m.match(sym->getName()))
+ if (canBeVersioned(*sym) && check(sym->getName()) &&
+ m.match(sym->getName()))
res.push_back(sym);
return res;
}
-// Handles -dynamic-list.
void SymbolTable::handleDynamicList() {
for (SymbolVersion &ver : config->dynamicList) {
std::vector<Symbol *> syms;
if (ver.hasWildcard)
- syms = findAllByVersion(ver);
+ syms = findAllByVersion(ver, /*includeNonDefault=*/true);
else
syms = findByVersion(ver);
@@ -181,21 +201,13 @@ void SymbolTable::handleDynamicList() {
}
}
-// Set symbol versions to symbols. This function handles patterns
-// containing no wildcard characters.
-void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
- StringRef versionName) {
- if (ver.hasWildcard)
- return;
-
+// Set symbol versions to symbols. This function handles patterns containing no
+// wildcard characters. Return false if no symbol definition matches ver.
+bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
+ StringRef versionName,
+ bool includeNonDefault) {
// Get a list of symbols which we need to assign the version to.
std::vector<Symbol *> syms = findByVersion(ver);
- if (syms.empty()) {
- if (!config->undefinedVersion)
- error("version script assignment of '" + versionName + "' to symbol '" +
- ver.name + "' failed: symbol not defined");
- return;
- }
auto getName = [](uint16_t ver) -> std::string {
if (ver == VER_NDX_LOCAL)
@@ -207,10 +219,11 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
// Assign the version.
for (Symbol *sym : syms) {
- // Skip symbols containing version info because symbol versions
- // specified by symbol names take precedence over version scripts.
- // See parseSymbolVersion().
- if (sym->getName().contains('@'))
+ // For a non-local versionId, skip symbols containing version info because
+ // symbol versions specified by symbol names take precedence over version
+ // scripts. See parseSymbolVersion().
+ if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
+ sym->getName().contains('@'))
continue;
// If the version has not been assigned, verdefIndex is -1. Use an arbitrary
@@ -225,13 +238,15 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
warn("attempt to reassign symbol '" + ver.name + "' of " +
getName(sym->versionId) + " to " + getName(versionId));
}
+ return !syms.empty();
}
-void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
+void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
+ bool includeNonDefault) {
// Exact matching takes precedence over fuzzy matching,
// so we set a version to a symbol only if no version has been assigned
// to the symbol. This behavior is compatible with GNU.
- for (Symbol *sym : findAllByVersion(ver))
+ for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
if (sym->verdefIndex == UINT32_C(-1)) {
sym->verdefIndex = 0;
sym->versionId = versionId;
@@ -244,26 +259,60 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
// script file, the script does not actually define any symbol version,
// but just specifies symbols visibilities.
void SymbolTable::scanVersionScript() {
+ SmallString<128> buf;
// First, we assign versions to exact matching symbols,
// i.e. version definitions not containing any glob meta-characters.
- for (VersionDefinition &v : config->versionDefinitions)
- for (SymbolVersion &pat : v.patterns)
- assignExactVersion(pat, v.id, v.name);
+ std::vector<Symbol *> syms;
+ for (VersionDefinition &v : config->versionDefinitions) {
+ auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
+ bool found =
+ assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
+ buf.clear();
+ found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
+ pat.isExternCpp, /*hasWildCard=*/false},
+ id, ver, /*includeNonDefault=*/true);
+ if (!found && !config->undefinedVersion)
+ errorOrWarn("version script assignment of '" + ver + "' to symbol '" +
+ pat.name + "' failed: symbol not defined");
+ };
+ for (SymbolVersion &pat : v.nonLocalPatterns)
+ if (!pat.hasWildcard)
+ assignExact(pat, v.id, v.name);
+ for (SymbolVersion pat : v.localPatterns)
+ if (!pat.hasWildcard)
+ assignExact(pat, VER_NDX_LOCAL, "local");
+ }
// Next, assign versions to wildcards that are not "*". Note that because the
// last match takes precedence over previous matches, we iterate over the
// definitions in the reverse order.
- for (VersionDefinition &v : llvm::reverse(config->versionDefinitions))
- for (SymbolVersion &pat : v.patterns)
+ auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
+ assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
+ buf.clear();
+ assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
+ pat.isExternCpp, /*hasWildCard=*/true},
+ id,
+ /*includeNonDefault=*/true);
+ };
+ for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) {
+ for (SymbolVersion &pat : v.nonLocalPatterns)
if (pat.hasWildcard && pat.name != "*")
- assignWildcardVersion(pat, v.id);
+ assignWildcard(pat, v.id, v.name);
+ for (SymbolVersion &pat : v.localPatterns)
+ if (pat.hasWildcard && pat.name != "*")
+ assignWildcard(pat, VER_NDX_LOCAL, v.name);
+ }
// Then, assign versions to "*". In GNU linkers they have lower priority than
// other wildcards.
- for (VersionDefinition &v : config->versionDefinitions)
- for (SymbolVersion &pat : v.patterns)
+ for (VersionDefinition &v : config->versionDefinitions) {
+ for (SymbolVersion &pat : v.nonLocalPatterns)
if (pat.hasWildcard && pat.name == "*")
- assignWildcardVersion(pat, v.id);
+ assignWildcard(pat, v.id, v.name);
+ for (SymbolVersion &pat : v.localPatterns)
+ if (pat.hasWildcard && pat.name == "*")
+ assignWildcard(pat, VER_NDX_LOCAL, v.name);
+ }
// Symbol themselves might know their versions because symbols
// can contain versions in the form of <name>@<version>.
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index 507af8d2be75..54c4b1169ed1 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -65,12 +65,14 @@ public:
private:
std::vector<Symbol *> findByVersion(SymbolVersion ver);
- std::vector<Symbol *> findAllByVersion(SymbolVersion ver);
+ std::vector<Symbol *> findAllByVersion(SymbolVersion ver,
+ bool includeNonDefault);
llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms();
- void assignExactVersion(SymbolVersion ver, uint16_t versionId,
- StringRef versionName);
- void assignWildcardVersion(SymbolVersion ver, uint16_t versionId);
+ bool assignExactVersion(SymbolVersion ver, uint16_t versionId,
+ StringRef versionName, bool includeNonDefault);
+ void assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
+ bool includeNonDefault);
// The order the global symbols are in is not defined. We can use an arbitrary
// order, but it has to be reproducible. That is true even when cross linking.
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 1039be369d9e..5f95a1b3c7ac 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -64,6 +64,8 @@ Defined *ElfSym::riscvGlobalPointer;
Defined *ElfSym::tlsModuleBase;
DenseMap<const Symbol *, std::pair<const InputFile *, const InputFile *>>
elf::backwardReferences;
+SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>, 0>
+ elf::whyExtract;
static uint64_t getSymVA(const Symbol &sym, int64_t &addend) {
switch (sym.kind()) {
@@ -208,6 +210,9 @@ OutputSection *Symbol::getOutputSection() const {
// If a symbol name contains '@', the characters after that is
// a symbol version name. This function parses that.
void Symbol::parseSymbolVersion() {
+ // Return if localized by a local: pattern in a version script.
+ if (versionId == VER_NDX_LOCAL)
+ return;
StringRef s = getName();
size_t pos = s.find('@');
if (pos == 0 || pos == StringRef::npos)
@@ -318,6 +323,11 @@ void elf::printTraceSymbol(const Symbol *sym) {
message(toString(sym->file) + s + sym->getName());
}
+static void recordWhyExtract(const InputFile *reference,
+ const InputFile &extracted, const Symbol &sym) {
+ whyExtract.emplace_back(toString(reference), &extracted, sym);
+}
+
void elf::maybeWarnUnorderableSymbol(const Symbol *sym) {
if (!config->warnSymbolOrdering)
return;
@@ -368,8 +378,12 @@ bool elf::computeIsPreemptible(const Symbol &sym) {
// If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is
// specified and the symbol is STT_FUNC, the symbol is preemptible iff it is
- // in the dynamic list.
- if (config->symbolic || (config->bsymbolicFunctions && sym.isFunc()))
+ // in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of
+ // -Bsymbolic-functions.
+ if (config->symbolic ||
+ (config->bsymbolic == BsymbolicKind::Functions && sym.isFunc()) ||
+ (config->bsymbolic == BsymbolicKind::NonWeakFunctions && sym.isFunc() &&
+ sym.binding != STB_WEAK))
return sym.inDynamicList;
return true;
}
@@ -526,6 +540,9 @@ void Symbol::resolveUndefined(const Undefined &other) {
file->groupId < other.file->groupId;
fetch();
+ if (!config->whyExtract.empty())
+ recordWhyExtract(other.file, *file, *this);
+
// We don't report backward references to weak symbols as they can be
// overridden later.
//
@@ -735,7 +752,10 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
return;
}
+ const InputFile *oldFile = file;
other.fetch();
+ if (!config->whyExtract.empty())
+ recordWhyExtract(oldFile, *file, *this);
}
void Symbol::resolveShared(const SharedSymbol &other) {
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index d486beb26d0e..816d61563021 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
+#include <tuple>
namespace lld {
// Returns a string representation for a symbol for diagnostics.
@@ -162,10 +163,7 @@ public:
// True if this is an undefined weak symbol. This only works once
// all input files have been added.
- bool isUndefWeak() const {
- // See comment on lazy symbols for details.
- return isWeak() && (isUndefined() || isLazy());
- }
+ bool isUndefWeak() const { return isWeak() && isUndefined(); }
StringRef getName() const {
if (nameSize == (uint32_t)-1)
@@ -582,6 +580,11 @@ extern llvm::DenseMap<const Symbol *,
std::pair<const InputFile *, const InputFile *>>
backwardReferences;
+// A tuple of (reference, extractedFile, sym). Used by --why-extract=.
+extern SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>,
+ 0>
+ whyExtract;
+
} // namespace elf
} // namespace lld
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 3496df1d2814..f1594eb8df86 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -431,10 +431,11 @@ template <class ELFT>
void EhFrameSection::addSectionAux(EhInputSection *sec) {
if (!sec->isLive())
return;
- if (sec->areRelocsRela)
- addRecords<ELFT>(sec, sec->template relas<ELFT>());
+ const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ addRecords<ELFT>(sec, rels.rels);
else
- addRecords<ELFT>(sec, sec->template rels<ELFT>());
+ addRecords<ELFT>(sec, rels.relas);
}
void EhFrameSection::addSection(EhInputSection *sec) {
@@ -483,12 +484,11 @@ void EhFrameSection::iterateFDEWithLSDA(
DenseSet<size_t> ciesWithLSDA;
for (EhInputSection *sec : sections) {
ciesWithLSDA.clear();
- if (sec->areRelocsRela)
- iterateFDEWithLSDAAux<ELFT>(*sec, sec->template relas<ELFT>(),
- ciesWithLSDA, fn);
+ const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
+ if (rels.areRelocsRel())
+ iterateFDEWithLSDAAux<ELFT>(*sec, rels.rels, ciesWithLSDA, fn);
else
- iterateFDEWithLSDAAux<ELFT>(*sec, sec->template rels<ELFT>(),
- ciesWithLSDA, fn);
+ iterateFDEWithLSDAAux<ELFT>(*sec, rels.relas, ciesWithLSDA, fn);
}
}
@@ -1119,7 +1119,7 @@ void MipsGotSection::writeTo(uint8_t *buf) {
if (p.first == nullptr && !config->shared)
write(p.second, nullptr, 1);
else if (p.first && !p.first->isPreemptible) {
- // If we are emitting a shared libary with relocations we mustn't write
+ // If we are emitting a shared library with relocations we mustn't write
// anything to the GOT here. When using Elf_Rel relocations the value
// one will be treated as an addend and will cause crashes at runtime
if (!config->shared)
@@ -1356,7 +1356,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
// Set DT_FLAGS and DT_FLAGS_1.
uint32_t dtFlags = 0;
uint32_t dtFlags1 = 0;
- if (config->bsymbolic)
+ if (config->bsymbolic == BsymbolicKind::All)
dtFlags |= DF_SYMBOLIC;
if (config->zGlobal)
dtFlags1 |= DF_1_GLOBAL;
@@ -1668,11 +1668,11 @@ void RelocationBaseSection::finalizeContents() {
else
getParent()->link = 0;
- if (in.relaPlt == this) {
+ if (in.relaPlt == this && in.gotPlt->getParent()) {
getParent()->flags |= ELF::SHF_INFO_LINK;
getParent()->info = in.gotPlt->getParent()->sectionIndex;
}
- if (in.relaIplt == this) {
+ if (in.relaIplt == this && in.igotPlt->getParent()) {
getParent()->flags |= ELF::SHF_INFO_LINK;
getParent()->info = in.igotPlt->getParent()->sectionIndex;
}
@@ -2171,7 +2171,7 @@ size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) {
return sym->dynsymIndex;
// Initializes symbol lookup tables lazily. This is used only for -r,
- // -emit-relocs and dynsyms in partitions other than the main one.
+ // --emit-relocs and dynsyms in partitions other than the main one.
llvm::call_once(onceFlag, [&] {
symbolIndexMap.reserve(symbols.size());
size_t i = 0;
@@ -2350,8 +2350,7 @@ size_t SymtabShndxSection::getSize() const {
// is to help the dynamic linker resolve symbols quickly. If ELF files
// don't have them, the dynamic linker has to do linear search on all
// dynamic symbols, which makes programs slower. Therefore, a .hash
-// section is added to a DSO by default. A .gnu.hash is added if you
-// give the -hash-style=gnu or -hash-style=both option.
+// section is added to a DSO by default.
//
// The Unix semantics of resolving dynamic symbols is somewhat expensive.
// Each ELF file has a list of DSOs that the ELF file depends on and a
@@ -2374,8 +2373,8 @@ size_t SymtabShndxSection::getSize() const {
// and better version of .hash. .hash is just an on-disk hash table, but
// .gnu.hash has a bloom filter in addition to a hash table to skip
// DSOs very quickly. If you are sure that your dynamic linker knows
-// about .gnu.hash, you want to specify -hash-style=gnu. Otherwise, a
-// safe bet is to specify -hash-style=both for backward compatibility.
+// about .gnu.hash, you want to specify --hash-style=gnu. Otherwise, a
+// safe bet is to specify --hash-style=both for backward compatibility.
GnuHashTableSection::GnuHashTableSection()
: SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, config->wordsize, ".gnu.hash") {
}
@@ -2402,7 +2401,7 @@ void GnuHashTableSection::finalizeContents() {
void GnuHashTableSection::writeTo(uint8_t *buf) {
// The output buffer is not guaranteed to be zero-cleared because we pre-
// fill executable sections with trap instructions. This is a precaution
- // for that case, which happens only when -no-rosegment is given.
+ // for that case, which happens only when --no-rosegment is given.
memset(buf, 0, size);
// Write a header.
@@ -3161,10 +3160,10 @@ size_t VersionTableSection::getSize() const {
void VersionTableSection::writeTo(uint8_t *buf) {
buf += 2;
for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) {
- // Use the original versionId for an unfetched lazy symbol (undefined weak),
- // which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error).
- write16(buf, s.sym->isLazy() ? static_cast<uint16_t>(VER_NDX_GLOBAL)
- : s.sym->versionId);
+ // For an unfetched lazy symbol (undefined weak), it must have been
+ // converted to Undefined and have VER_NDX_GLOBAL version here.
+ assert(!s.sym->isLazy());
+ write16(buf, s.sym->versionId);
buf += 2;
}
}
@@ -3591,9 +3590,8 @@ void ARMExidxSyntheticSection::writeTo(uint8_t *buf) {
}
bool ARMExidxSyntheticSection::isNeeded() const {
- return llvm::find_if(exidxSections, [](InputSection *isec) {
- return isec->isLive();
- }) != exidxSections.end();
+ return llvm::any_of(exidxSections,
+ [](InputSection *isec) { return isec->isLive(); });
}
bool ARMExidxSyntheticSection::classof(const SectionBase *d) {
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index d3e54f7387d7..88d3006f9a2d 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -106,8 +106,14 @@ template <class ELFT> static ErrorPlace getErrPlace(const uint8_t *loc) {
assert(isa<SyntheticSection>(isec) && "No data but not synthetic?");
continue;
}
- if (isecLoc <= loc && loc < isecLoc + isec->getSize())
- return {isec, isec->template getLocation<ELFT>(loc - isecLoc) + ": "};
+ if (isecLoc <= loc && loc < isecLoc + isec->getSize()) {
+ auto objLoc = isec->template getLocation<ELFT>(loc - isecLoc);
+ // Return object file location and source file location.
+ // TODO: Refactor getSrcMsg not to take a variable.
+ Undefined dummy(nullptr, "", STB_LOCAL, 0, 0);
+ return {isec, objLoc + ": ",
+ isec->file ? isec->getSrcMsg(dummy, loc - isecLoc) : ""};
+ }
}
return {};
}
@@ -187,7 +193,7 @@ void TargetInfo::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
}
uint64_t TargetInfo::getImageBase() const {
- // Use -image-base if set. Fall back to the target default if not.
+ // Use --image-base if set. Fall back to the target default if not.
if (config->imageBase)
return *config->imageBase;
return config->isPic ? 0 : defaultImageBase;
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 1fe3217c6d1d..e0e97301ca98 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -109,11 +109,11 @@ public:
uint64_t getImageBase() const;
// True if _GLOBAL_OFFSET_TABLE_ is relative to .got.plt, false if .got.
- bool gotBaseSymInGotPlt = true;
+ bool gotBaseSymInGotPlt = false;
+ static constexpr RelType noneRel = 0;
RelType copyRel;
RelType gotRel;
- RelType noneRel;
RelType pltRel;
RelType relativeRel;
RelType iRelativeRel;
@@ -188,6 +188,7 @@ template <class ELFT> TargetInfo *getMipsTargetInfo();
struct ErrorPlace {
InputSectionBase *isec;
std::string loc;
+ std::string srcLoc;
};
// Returns input section and corresponding source string for the given location.
@@ -211,10 +212,6 @@ unsigned getPPCDFormOp(unsigned secondaryOp);
// to the local entry-point.
unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther);
-// Returns true if a relocation is a small code model relocation that accesses
-// the .toc section.
-bool isPPC64SmallCodeModelTocReloc(RelType type);
-
// Write a prefixed instruction, which is a 4-byte prefix followed by a 4-byte
// instruction (regardless of endianness). Therefore, the prefix is always in
// lower memory than the instruction.
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 321599b0055a..6d97852aec43 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -221,18 +221,17 @@ void elf::combineEhSections() {
}
}
- std::vector<InputSectionBase *> &v = inputSections;
- v.erase(std::remove(v.begin(), v.end(), nullptr), v.end());
+ llvm::erase_value(inputSections, nullptr);
}
static Defined *addOptionalRegular(StringRef name, SectionBase *sec,
- uint64_t val, uint8_t stOther = STV_HIDDEN,
- uint8_t binding = STB_GLOBAL) {
+ uint64_t val, uint8_t stOther = STV_HIDDEN) {
Symbol *s = symtab->find(name);
if (!s || s->isDefined())
return nullptr;
- s->resolve(Defined{/*file=*/nullptr, name, binding, stOther, STT_NOTYPE, val,
+ s->resolve(Defined{/*file=*/nullptr, name, STB_GLOBAL, stOther, STT_NOTYPE,
+ val,
/*size=*/0, sec});
return cast<Defined>(s);
}
@@ -595,9 +594,8 @@ template <class ELFT> void Writer<ELFT>::run() {
if (errorCount())
return;
- // If -compressed-debug-sections is specified, we need to compress
- // .debug_* sections. Do it right now because it changes the size of
- // output sections.
+ // If --compressed-debug-sections is specified, compress .debug_* sections.
+ // Do it right now because it changes the size of output sections.
for (OutputSection *sec : outputSections)
sec->maybeCompress<ELFT>();
@@ -622,11 +620,12 @@ template <class ELFT> void Writer<ELFT>::run() {
for (OutputSection *sec : outputSections)
sec->addr = 0;
- // Handle --print-map(-M)/--Map, --cref and --print-archive-stats=. Dump them
- // before checkSections() because the files may be useful in case
- // checkSections() or openFile() fails, for example, due to an erroneous file
- // size.
+ // Handle --print-map(-M)/--Map, --why-extract=, --cref and
+ // --print-archive-stats=. Dump them before checkSections() because the files
+ // may be useful in case checkSections() or openFile() fails, for example, due
+ // to an erroneous file size.
writeMapFile();
+ writeWhyExtract();
writeCrossReferenceTable();
writeArchiveStats();
@@ -727,16 +726,11 @@ static bool shouldKeepInSymtab(const Defined &sym) {
// * --discard-locals is used.
// * The symbol is in a SHF_MERGE section, which is normally the reason for
// the assembler keeping the .L symbol.
- StringRef name = sym.getName();
- bool isLocal = name.startswith(".L") || name.empty();
- if (!isLocal)
- return true;
-
- if (config->discard == DiscardPolicy::Locals)
+ if (sym.getName().startswith(".L") &&
+ (config->discard == DiscardPolicy::Locals ||
+ (sym.section && (sym.section->flags & SHF_MERGE))))
return false;
-
- SectionBase *sec = sym.section;
- return !sec || !(sec->flags & SHF_MERGE);
+ return true;
}
static bool includeInSymtab(const Symbol &b) {
@@ -812,7 +806,7 @@ template <class ELFT> void Writer<ELFT>::addSectionSymbols() {
// Unlike other synthetic sections, mergeable output sections contain data
// copied from input sections, and there may be a relocation pointing to its
- // contents if -r or -emit-reloc are given.
+ // contents if -r or --emit-reloc is given.
if (isa<SyntheticSection>(isec) && !(isec->flags & SHF_MERGE))
continue;
@@ -1097,11 +1091,11 @@ template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() {
// sure that .rela.plt exists in output.
ElfSym::relaIpltStart = addOptionalRegular(
config->isRela ? "__rela_iplt_start" : "__rel_iplt_start",
- Out::elfHeader, 0, STV_HIDDEN, STB_WEAK);
+ Out::elfHeader, 0, STV_HIDDEN);
ElfSym::relaIpltEnd = addOptionalRegular(
config->isRela ? "__rela_iplt_end" : "__rel_iplt_end",
- Out::elfHeader, 0, STV_HIDDEN, STB_WEAK);
+ Out::elfHeader, 0, STV_HIDDEN);
}
template <class ELFT>
@@ -1255,15 +1249,27 @@ findOrphanPos(std::vector<BaseCommand *>::iterator b,
});
if (i == e)
return e;
+ auto foundSec = dyn_cast<OutputSection>(*i);
+ if (!foundSec)
+ return e;
// Consider all existing sections with the same proximity.
int proximity = getRankProximity(sec, *i);
+ unsigned sortRank = sec->sortRank;
+ if (script->hasPhdrsCommands() || !script->memoryRegions.empty())
+ // Prevent the orphan section to be placed before the found section. If
+ // custom program headers are defined, that helps to avoid adding it to a
+ // previous segment and changing flags of that segment, for example, making
+ // a read-only segment writable. If memory regions are defined, an orphan
+ // section should continue the same region as the found section to better
+ // resemble the behavior of GNU ld.
+ sortRank = std::max(sortRank, foundSec->sortRank);
for (; i != e; ++i) {
auto *curSec = dyn_cast<OutputSection>(*i);
if (!curSec || !curSec->hasInputSections)
continue;
if (getRankProximity(sec, curSec) != proximity ||
- sec->sortRank < curSec->sortRank)
+ sortRank < curSec->sortRank)
break;
}
@@ -1331,7 +1337,7 @@ static void maybeShuffle(DenseMap<const InputSectionBase *, int> &order) {
// Builds section order for handling --symbol-ordering-file.
static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
DenseMap<const InputSectionBase *, int> sectionOrder;
- // Use the rarely used option -call-graph-ordering-file to sort sections.
+ // Use the rarely used option --call-graph-ordering-file to sort sections.
if (!config->callGraphProfile.empty())
return computeCallGraphProfileOrder();
@@ -1480,29 +1486,19 @@ static void sortSection(OutputSection *sec,
if (auto *isd = dyn_cast<InputSectionDescription>(b))
sortISDBySectionOrder(isd, order);
- // Sort input sections by section name suffixes for
- // __attribute__((init_priority(N))).
- if (name == ".init_array" || name == ".fini_array") {
- if (!script->hasSectionsCommand)
- sec->sortInitFini();
- return;
- }
-
- // Sort input sections by the special rule for .ctors and .dtors.
- if (name == ".ctors" || name == ".dtors") {
- if (!script->hasSectionsCommand)
- sec->sortCtorsDtors();
+ if (script->hasSectionsCommand)
return;
- }
- // .toc is allocated just after .got and is accessed using GOT-relative
- // relocations. Object files compiled with small code model have an
- // addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations.
- // To reduce the risk of relocation overflow, .toc contents are sorted so that
- // sections having smaller relocation offsets are at beginning of .toc
- if (config->emachine == EM_PPC64 && name == ".toc") {
- if (script->hasSectionsCommand)
- return;
+ if (name == ".init_array" || name == ".fini_array") {
+ sec->sortInitFini();
+ } else if (name == ".ctors" || name == ".dtors") {
+ sec->sortCtorsDtors();
+ } else if (config->emachine == EM_PPC64 && name == ".toc") {
+ // .toc is allocated just after .got and is accessed using GOT-relative
+ // relocations. Object files compiled with small code model have an
+ // addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations.
+ // To reduce the risk of relocation overflow, .toc contents are sorted so
+ // that sections having smaller relocation offsets are at beginning of .toc
assert(sec->sectionCommands.size() == 1);
auto *isd = cast<InputSectionDescription>(sec->sectionCommands[0]);
llvm::stable_sort(isd->sections,
@@ -1510,7 +1506,6 @@ static void sortSection(OutputSection *sec,
return a->file->ppc64SmallCodeModelTocRelocs &&
!b->file->ppc64SmallCodeModelTocRelocs;
});
- return;
}
}
@@ -1965,10 +1960,10 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
OutputSection *sec = findSection(".sdata");
ElfSym::riscvGlobalPointer =
addOptionalRegular("__global_pointer$", sec ? sec : Out::elfHeader,
- 0x800, STV_DEFAULT, STB_GLOBAL);
+ 0x800, STV_DEFAULT);
}
- if (config->emachine == EM_X86_64) {
+ if (config->emachine == EM_386 || config->emachine == EM_X86_64) {
// On targets that support TLSDESC, _TLS_MODULE_BASE_ is defined in such a
// way that:
//
@@ -2243,8 +2238,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
}
// Ensure data sections are not mixed with executable sections when
-// -execute-only is used. -execute-only is a feature to make pages executable
-// but not readable, and the feature is currently supported only on AArch64.
+// --execute-only is used. --execute-only make pages executable but not
+// readable.
template <class ELFT> void Writer<ELFT>::checkExecuteOnly() {
if (!config->executeOnly)
return;
@@ -2281,7 +2276,7 @@ template <class ELFT> void Writer<ELFT>::addStartEndSymbols() {
Default = Out::elfHeader;
auto define = [=](StringRef start, StringRef end, OutputSection *os) {
- if (os) {
+ if (os && !script->isDiscarded(os)) {
addOptionalRegular(start, os, 0);
addOptionalRegular(end, os, -1);
} else {
@@ -2597,9 +2592,10 @@ static uint64_t computeFileOffset(OutputSection *os, uint64_t off) {
return alignTo(off, os->ptLoad->p_align, os->addr);
// File offsets are not significant for .bss sections other than the first one
- // in a PT_LOAD. By convention, we keep section offsets monotonically
+ // in a PT_LOAD/PT_TLS. By convention, we keep section offsets monotonically
// increasing rather than setting to zero.
- if (os->type == SHT_NOBITS)
+ if (os->type == SHT_NOBITS &&
+ (!Out::tlsPhdr || Out::tlsPhdr->firstSec != os))
return off;
// If the section is not in a PT_LOAD, we just have to align it.
@@ -2832,8 +2828,7 @@ template <class ELFT> void Writer<ELFT>::checkSections() {
// 2. the ENTRY(symbol) command in a linker control script;
// 3. the value of the symbol _start, if present;
// 4. the number represented by the entry symbol, if it is a number;
-// 5. the address of the first byte of the .text section, if present;
-// 6. the address 0.
+// 5. the address 0.
static uint64_t getEntryAddr() {
// Case 1, 2 or 3
if (Symbol *b = symtab->find(config->entry))
@@ -2845,14 +2840,6 @@ static uint64_t getEntryAddr() {
return addr;
// Case 5
- if (OutputSection *sec = findSection(".text")) {
- if (config->warnMissingEntry)
- warn("cannot find entry symbol " + config->entry + "; defaulting to 0x" +
- utohexstr(sec->addr));
- return sec->addr;
- }
-
- // Case 6
if (config->warnMissingEntry)
warn("cannot find entry symbol " + config->entry +
"; not setting start address");
@@ -2979,7 +2966,7 @@ template <class ELFT> void Writer<ELFT>::writeTrapInstr() {
// Write section contents to a mmap'ed file.
template <class ELFT> void Writer<ELFT>::writeSections() {
- // In -r or -emit-relocs mode, write the relocation sections first as in
+ // In -r or --emit-relocs mode, write the relocation sections first as in
// ELf_Rel targets we might find out that we need to modify the relocated
// section while doing it.
for (OutputSection *sec : outputSections)
diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 36c3cc639284..001e112ae3ad 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -134,7 +134,13 @@ ARM64::ARM64() : ARM64Common(LP64()) {
stubSize = sizeof(stubCode);
thunkSize = sizeof(thunkCode);
- branchRange = maxIntN(28) - thunkSize;
+
+ // Branch immediate is two's complement 26 bits, which is implicitly
+ // multiplied by 4 (since all functions are 4-aligned: The branch range
+ // is -4*(2**(26-1))..4*(2**(26-1) - 1).
+ backwardBranchRange = 128 * 1024 * 1024;
+ forwardBranchRange = backwardBranchRange - 4;
+
stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
stubHelperEntrySize = sizeof(stubHelperEntryCode);
}
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 78590cff2eef..46cd15a40025 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -53,7 +53,7 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
// multiple thunks to the same destination distributed throughout a large
// program so that all call sites can have one within range.
//
-// The optimal approach is to mix islands for distinations within two hops,
+// The optimal approach is to mix islands for destinations within two hops,
// and use thunks for destinations at greater distance. For now, we only
// implement thunks. TODO: Adding support for branch islands!
//
@@ -109,7 +109,7 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
// thus, we place thunks at monotonically increasing addresses. Once a thunk
// is placed, it and all previous input-section addresses are final.
//
-// * MergedInputSection::finalize() and MergedInputSection::writeTo() merge
+// * ConcatInputSection::finalize() and ConcatInputSection::writeTo() merge
// the inputs and thunks vectors (both ordered by ascending address), which
// is simple and cheap.
@@ -126,7 +126,8 @@ bool ConcatOutputSection::needsThunks() const {
uint64_t isecAddr = addr;
for (InputSection *isec : inputs)
isecAddr = alignTo(isecAddr, isec->align) + isec->getSize();
- if (isecAddr - addr + in.stubs->getSize() <= target->branchRange)
+ if (isecAddr - addr + in.stubs->getSize() <=
+ std::min(target->backwardBranchRange, target->forwardBranchRange))
return false;
// Yes, this program is large enough to need thunks.
for (InputSection *isec : inputs) {
@@ -140,7 +141,7 @@ bool ConcatOutputSection::needsThunks() const {
ThunkInfo &thunkInfo = thunkMap[sym];
// Knowing ThunkInfo call site count will help us know whether or not we
// might need to create more for this referent at the time we are
- // estimating distance to __stubs in .
+ // estimating distance to __stubs in estimateStubsInRangeVA().
++thunkInfo.callSiteCount;
// Knowing InputSection call site count will help us avoid work on those
// that have no BRANCH relocs.
@@ -152,37 +153,43 @@ bool ConcatOutputSection::needsThunks() const {
// Since __stubs is placed after __text, we must estimate the address
// beyond which stubs are within range of a simple forward branch.
+// This is called exactly once, when the last input section has been finalized.
uint64_t ConcatOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
- uint64_t branchRange = target->branchRange;
- size_t endIdx = inputs.size();
- ConcatInputSection *isec = inputs[callIdx];
- uint64_t isecVA = isec->getVA();
- // Tally the non-stub functions which still have call sites
- // remaining to process, which yields the maximum number
- // of thunks we might yet place.
+ // Tally the functions which still have call sites remaining to process,
+ // which yields the maximum number of thunks we might yet place.
size_t maxPotentialThunks = 0;
for (auto &tp : thunkMap) {
ThunkInfo &ti = tp.second;
- maxPotentialThunks +=
- !tp.first->isInStubs() && ti.callSitesUsed < ti.callSiteCount;
+ // This overcounts: Only sections that are in forward jump range from the
+ // currently-active section get finalized, and all input sections are
+ // finalized when estimateStubsInRangeVA() is called. So only backward
+ // jumps will need thunks, but we count all jumps.
+ if (ti.callSitesUsed < ti.callSiteCount)
+ maxPotentialThunks += 1;
}
// Tally the total size of input sections remaining to process.
- uint64_t isecEnd = isec->getVA();
- for (size_t i = callIdx; i < endIdx; i++) {
+ uint64_t isecVA = inputs[callIdx]->getVA();
+ uint64_t isecEnd = isecVA;
+ for (size_t i = callIdx; i < inputs.size(); i++) {
InputSection *isec = inputs[i];
isecEnd = alignTo(isecEnd, isec->align) + isec->getSize();
}
// Estimate the address after which call sites can safely call stubs
// directly rather than through intermediary thunks.
+ uint64_t forwardBranchRange = target->forwardBranchRange;
+ assert(isecEnd > forwardBranchRange &&
+ "should not run thunk insertion if all code fits in jump range");
+ assert(isecEnd - isecVA <= forwardBranchRange &&
+ "should only finalize sections in jump range");
uint64_t stubsInRangeVA = isecEnd + maxPotentialThunks * target->thunkSize +
- in.stubs->getSize() - branchRange;
+ in.stubs->getSize() - forwardBranchRange;
log("thunks = " + std::to_string(thunkMap.size()) +
", potential = " + std::to_string(maxPotentialThunks) +
", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " +
to_hexString(isecVA) + ", threshold = " + to_hexString(stubsInRangeVA) +
", isecEnd = " + to_hexString(isecEnd) +
", tail = " + to_hexString(isecEnd - isecVA) +
- ", slop = " + to_hexString(branchRange - (isecEnd - isecVA)));
+ ", slop = " + to_hexString(forwardBranchRange - (isecEnd - isecVA)));
return stubsInRangeVA;
}
@@ -206,7 +213,8 @@ void ConcatOutputSection::finalize() {
return;
}
- uint64_t branchRange = target->branchRange;
+ uint64_t forwardBranchRange = target->forwardBranchRange;
+ uint64_t backwardBranchRange = target->backwardBranchRange;
uint64_t stubsInRangeVA = TargetInfo::outOfRangeVA;
size_t thunkSize = target->thunkSize;
size_t relocCount = 0;
@@ -214,6 +222,11 @@ void ConcatOutputSection::finalize() {
size_t thunkCallCount = 0;
size_t thunkCount = 0;
+ // Walk all sections in order. Finalize all sections that are less than
+ // forwardBranchRange in front of it.
+ // isecVA is the address of the current section.
+ // isecAddr is the start address of the first non-finalized section.
+
// inputs[finalIdx] is for finalization (address-assignment)
size_t finalIdx = 0;
// Kick-off by ensuring that the first input section has an address
@@ -224,12 +237,22 @@ void ConcatOutputSection::finalize() {
ConcatInputSection *isec = inputs[callIdx];
assert(isec->isFinal);
uint64_t isecVA = isec->getVA();
- // Assign addresses up-to the forward branch-range limit
- while (finalIdx < endIdx &&
- isecAddr + inputs[finalIdx]->getSize() < isecVA + branchRange)
+
+ // Assign addresses up-to the forward branch-range limit.
+ // Every call instruction needs a small number of bytes (on Arm64: 4),
+ // and each inserted thunk needs a slightly larger number of bytes
+ // (on Arm64: 12). If a section starts with a branch instruction and
+ // contains several branch instructions in succession, then the distance
+ // from the current position to the position where the thunks are inserted
+ // grows. So leave room for a bunch of thunks.
+ unsigned slop = 100 * thunkSize;
+ while (finalIdx < endIdx && isecAddr + inputs[finalIdx]->getSize() <
+ isecVA + forwardBranchRange - slop)
finalizeOne(inputs[finalIdx++]);
+
if (isec->callSiteCount == 0)
continue;
+
if (finalIdx == endIdx && stubsInRangeVA == TargetInfo::outOfRangeVA) {
// When we have finalized all input sections, __stubs (destined
// to follow __text) comes within range of forward branches and
@@ -254,20 +277,22 @@ void ConcatOutputSection::finalize() {
++callSiteCount;
// Calculate branch reachability boundaries
uint64_t callVA = isecVA + r.offset;
- uint64_t lowVA = branchRange < callVA ? callVA - branchRange : 0;
- uint64_t highVA = callVA + branchRange;
+ uint64_t lowVA =
+ backwardBranchRange < callVA ? callVA - backwardBranchRange : 0;
+ uint64_t highVA = callVA + forwardBranchRange;
// Calculate our call referent address
auto *funcSym = r.referent.get<Symbol *>();
ThunkInfo &thunkInfo = thunkMap[funcSym];
// The referent is not reachable, so we need to use a thunk ...
if (funcSym->isInStubs() && callVA >= stubsInRangeVA) {
+ assert(callVA != TargetInfo::outOfRangeVA);
// ... Oh, wait! We are close enough to the end that __stubs
// are now within range of a simple forward branch.
continue;
}
uint64_t funcVA = funcSym->resolveBranchVA();
++thunkInfo.callSitesUsed;
- if (lowVA < funcVA && funcVA < highVA) {
+ if (lowVA <= funcVA && funcVA <= highVA) {
// The referent is reachable with a simple call instruction.
continue;
}
@@ -276,32 +301,36 @@ void ConcatOutputSection::finalize() {
// If an existing thunk is reachable, use it ...
if (thunkInfo.sym) {
uint64_t thunkVA = thunkInfo.isec->getVA();
- if (lowVA < thunkVA && thunkVA < highVA) {
+ if (lowVA <= thunkVA && thunkVA <= highVA) {
r.referent = thunkInfo.sym;
continue;
}
}
- // ... otherwise, create a new thunk
+ // ... otherwise, create a new thunk.
if (isecAddr > highVA) {
- // When there is small-to-no margin between highVA and
- // isecAddr and the distance between subsequent call sites is
- // smaller than thunkSize, then a new thunk can go out of
- // range. Fix by unfinalizing inputs[finalIdx] to reduce the
- // distance between callVA and highVA, then shift some thunks
- // to occupy address-space formerly occupied by the
- // unfinalized inputs[finalIdx].
+ // There were too many consecutive branch instructions for `slop`
+ // above. If you hit this: For the current algorithm, just bumping up
+ // slop above and trying again is probably simplest. (See also PR51578
+ // comment 5).
fatal(Twine(__FUNCTION__) + ": FIXME: thunk range overrun");
}
thunkInfo.isec =
make<ConcatInputSection>(isec->getSegName(), isec->getName());
thunkInfo.isec->parent = this;
+
+ // This code runs after dead code removal. Need to set the `live` bit
+ // on the thunk isec so that asserts that check that only live sections
+ // get written are happy.
+ thunkInfo.isec->live = true;
+
StringRef thunkName = saver.save(funcSym->getName() + ".thunk." +
std::to_string(thunkInfo.sequence++));
r.referent = thunkInfo.sym = symtab->addDefined(
thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0,
/*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
/*isThumb=*/false, /*isReferencedDynamically=*/false,
- /*noDeadStrip=*/false);
+ /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
+ thunkInfo.sym->used = true;
target->populateThunk(thunkInfo.isec, funcSym);
finalizeOne(thunkInfo.isec);
thunks.push_back(thunkInfo.isec);
@@ -324,7 +353,7 @@ void ConcatOutputSection::writeTo(uint8_t *buf) const {
size_t i = 0, ie = inputs.size();
size_t t = 0, te = thunks.size();
while (i < ie || t < te) {
- while (i < ie && (t == te || inputs[i]->getSize() == 0 ||
+ while (i < ie && (t == te || inputs[i]->empty() ||
inputs[i]->outSecOff < thunks[t]->outSecOff)) {
inputs[i]->writeTo(buf + inputs[i]->outSecOff);
++i;
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 0f47015b7607..42528185e57c 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -121,6 +121,8 @@ struct Configuration {
bool timeTraceEnabled = false;
bool dataConst = false;
bool dedupLiterals = true;
+ bool omitDebugInfo = false;
+ bool warnDylibInstallName = false;
uint32_t headerPad;
uint32_t dylibCompatibilityVersion = 0;
uint32_t dylibCurrentVersion = 0;
@@ -147,6 +149,7 @@ struct Configuration {
bool deadStripDylibs = false;
bool demangle = false;
bool deadStrip = false;
+ bool errorForArchMismatch = false;
PlatformInfo platformInfo;
NamespaceKind namespaceKind = NamespaceKind::twolevel;
UndefinedSymbolTreatment undefinedSymbolTreatment =
@@ -174,6 +177,8 @@ struct Configuration {
bool zeroModTime = false;
+ llvm::StringRef osoPrefix;
+
llvm::MachO::Architecture arch() const { return platformInfo.target.Arch; }
llvm::MachO::PlatformKind platform() const {
@@ -195,6 +200,13 @@ struct SymbolPriorityEntry {
llvm::DenseMap<llvm::StringRef, size_t> objectFiles;
};
+// Whether to force-load an archive.
+enum class ForceLoad {
+ Default, // Apply -all_load or -ObjC behaviors if those flags are enabled
+ Yes, // Always load the archive, regardless of other flags
+ No, // Never load the archive, regardless of other flags
+};
+
extern Configuration *config;
} // namespace macho
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index a8c11b6994b9..e9d65d3c73f2 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -80,19 +80,39 @@ static HeaderFileType getOutputType(const InputArgList &args) {
}
}
+static DenseMap<CachedHashStringRef, StringRef> resolvedLibraries;
static Optional<StringRef> findLibrary(StringRef name) {
- if (config->searchDylibsFirst) {
- if (Optional<StringRef> path = findPathCombination(
- "lib" + name, config->librarySearchPaths, {".tbd", ".dylib"}))
- return path;
+ CachedHashStringRef key(name);
+ auto entry = resolvedLibraries.find(key);
+ if (entry != resolvedLibraries.end())
+ return entry->second;
+
+ auto doFind = [&] {
+ if (config->searchDylibsFirst) {
+ if (Optional<StringRef> path = findPathCombination(
+ "lib" + name, config->librarySearchPaths, {".tbd", ".dylib"}))
+ return path;
+ return findPathCombination("lib" + name, config->librarySearchPaths,
+ {".a"});
+ }
return findPathCombination("lib" + name, config->librarySearchPaths,
- {".a"});
- }
- return findPathCombination("lib" + name, config->librarySearchPaths,
- {".tbd", ".dylib", ".a"});
+ {".tbd", ".dylib", ".a"});
+ };
+
+ Optional<StringRef> path = doFind();
+ if (path)
+ resolvedLibraries[key] = *path;
+
+ return path;
}
-static Optional<std::string> findFramework(StringRef name) {
+static DenseMap<CachedHashStringRef, StringRef> resolvedFrameworks;
+static Optional<StringRef> findFramework(StringRef name) {
+ CachedHashStringRef key(name);
+ auto entry = resolvedFrameworks.find(key);
+ if (entry != resolvedFrameworks.end())
+ return entry->second;
+
SmallString<260> symlink;
StringRef suffix;
std::tie(name, suffix) = name.split(",");
@@ -108,13 +128,13 @@ static Optional<std::string> findFramework(StringRef name) {
// only append suffix if realpath() succeeds
Twine suffixed = location + suffix;
if (fs::exists(suffixed))
- return suffixed.str();
+ return resolvedFrameworks[key] = saver.save(suffixed.str());
}
// Suffix lookup failed, fall through to the no-suffix case.
}
- if (Optional<std::string> path = resolveDylibPath(symlink))
- return path;
+ if (Optional<StringRef> path = resolveDylibPath(symlink.str()))
+ return resolvedFrameworks[key] = *path;
}
return {};
}
@@ -174,7 +194,7 @@ static std::vector<StringRef> getSystemLibraryRoots(InputArgList &args) {
for (const Arg *arg : args.filtered(OPT_syslibroot))
roots.push_back(arg->getValue());
// NOTE: the final `-syslibroot` being `/` will ignore all roots
- if (roots.size() && roots.back() == "/")
+ if (!roots.empty() && roots.back() == "/")
roots.clear();
// NOTE: roots can never be empty - add an empty root to simplify the library
// and framework search path computation.
@@ -206,7 +226,9 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) {
args.filtered(OPT_thinlto_cache_policy, OPT_prune_interval_lto,
OPT_prune_after_lto, OPT_max_relative_cache_size_lto)) {
switch (arg->getOption().getID()) {
- case OPT_thinlto_cache_policy: add(arg->getValue()); break;
+ case OPT_thinlto_cache_policy:
+ add(arg->getValue());
+ break;
case OPT_prune_interval_lto:
if (!strcmp("-1", arg->getValue()))
add("prune_interval=87600h"); // 10 years
@@ -224,52 +246,9 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) {
return CHECK(parseCachePruningPolicy(ltoPolicy), "invalid LTO cache policy");
}
-namespace {
-struct ArchiveMember {
- MemoryBufferRef mbref;
- uint32_t modTime;
- uint64_t offsetInArchive;
-};
-} // namespace
-
-// Returns slices of MB by parsing MB as an archive file.
-// Each slice consists of a member file in the archive.
-static std::vector<ArchiveMember> getArchiveMembers(MemoryBufferRef mb) {
- std::unique_ptr<Archive> file =
- CHECK(Archive::create(mb),
- mb.getBufferIdentifier() + ": failed to parse archive");
- Archive *archive = file.get();
- make<std::unique_ptr<Archive>>(std::move(file)); // take ownership
-
- std::vector<ArchiveMember> v;
- Error err = Error::success();
-
- // Thin archives refer to .o files, so --reproduce needs the .o files too.
- bool addToTar = archive->isThin() && tar;
-
- for (const Archive::Child &c : archive->children(err)) {
- MemoryBufferRef mbref =
- CHECK(c.getMemoryBufferRef(),
- mb.getBufferIdentifier() +
- ": could not get the buffer for a child of the archive");
- if (addToTar)
- tar->append(relativeToRoot(check(c.getFullName())), mbref.getBuffer());
- uint32_t modTime = toTimeT(
- CHECK(c.getLastModified(), mb.getBufferIdentifier() +
- ": could not get the modification "
- "time for a child of the archive"));
- v.push_back({mbref, modTime, c.getChildOffset()});
- }
- if (err)
- fatal(mb.getBufferIdentifier() +
- ": Archive::children failed: " + toString(std::move(err)));
-
- return v;
-}
-
static DenseMap<StringRef, ArchiveFile *> loadedArchives;
-static InputFile *addFile(StringRef path, bool forceLoadArchive,
+static InputFile *addFile(StringRef path, ForceLoad forceLoadArchive,
bool isExplicit = true, bool isBundleLoader = false) {
Optional<MemoryBufferRef> buffer = readFile(path);
if (!buffer)
@@ -289,48 +268,54 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive,
if (ArchiveFile *cachedFile = loadedArchives[path])
return cachedFile;
- std::unique_ptr<object::Archive> file = CHECK(
+ std::unique_ptr<object::Archive> archive = CHECK(
object::Archive::create(mbref), path + ": failed to parse archive");
- if (!file->isEmpty() && !file->hasSymbolTable())
+ if (!archive->isEmpty() && !archive->hasSymbolTable())
error(path + ": archive has no index; run ranlib to add one");
- if (config->allLoad || forceLoadArchive) {
+ auto *file = make<ArchiveFile>(std::move(archive));
+ if ((forceLoadArchive == ForceLoad::Default && config->allLoad) ||
+ forceLoadArchive == ForceLoad::Yes) {
if (Optional<MemoryBufferRef> buffer = readFile(path)) {
- for (const ArchiveMember &member : getArchiveMembers(*buffer)) {
- if (Optional<InputFile *> file = loadArchiveMember(
- member.mbref, member.modTime, path, /*objCOnly=*/false,
- member.offsetInArchive)) {
- inputFiles.insert(*file);
- printArchiveMemberLoad(
- (forceLoadArchive ? "-force_load" : "-all_load"),
- inputFiles.back());
- }
+ Error e = Error::success();
+ for (const object::Archive::Child &c : file->getArchive().children(e)) {
+ StringRef reason =
+ forceLoadArchive == ForceLoad::Yes ? "-force_load" : "-all_load";
+ if (Error e = file->fetch(c, reason))
+ error(toString(file) + ": " + reason +
+ " failed to load archive member: " + toString(std::move(e)));
}
+ if (e)
+ error(toString(file) +
+ ": Archive::children failed: " + toString(std::move(e)));
}
- } else if (config->forceLoadObjC) {
- for (const object::Archive::Symbol &sym : file->symbols())
+ } else if (forceLoadArchive == ForceLoad::Default &&
+ config->forceLoadObjC) {
+ for (const object::Archive::Symbol &sym : file->getArchive().symbols())
if (sym.getName().startswith(objc::klass))
- symtab->addUndefined(sym.getName(), /*file=*/nullptr,
- /*isWeakRef=*/false);
+ file->fetch(sym);
// TODO: no need to look for ObjC sections for a given archive member if
- // we already found that it contains an ObjC symbol. We should also
- // consider creating a LazyObjFile class in order to avoid double-loading
- // these files here and below (as part of the ArchiveFile).
+ // we already found that it contains an ObjC symbol.
if (Optional<MemoryBufferRef> buffer = readFile(path)) {
- for (const ArchiveMember &member : getArchiveMembers(*buffer)) {
- if (Optional<InputFile *> file = loadArchiveMember(
- member.mbref, member.modTime, path, /*objCOnly=*/true,
- member.offsetInArchive)) {
- inputFiles.insert(*file);
- printArchiveMemberLoad("-ObjC", inputFiles.back());
- }
+ Error e = Error::success();
+ for (const object::Archive::Child &c : file->getArchive().children(e)) {
+ Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+ if (!mb || !hasObjCSection(*mb))
+ continue;
+ if (Error e = file->fetch(c, "-ObjC"))
+ error(toString(file) + ": -ObjC failed to load archive member: " +
+ toString(std::move(e)));
}
+ if (e)
+ error(toString(file) +
+ ": Archive::children failed: " + toString(std::move(e)));
}
}
- newFile = loadedArchives[path] = make<ArchiveFile>(std::move(file));
+ file->addLazySymbols();
+ newFile = loadedArchives[path] = file;
break;
}
case file_magic::macho_object:
@@ -371,10 +356,11 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive,
}
static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
- bool isReexport, bool isExplicit, bool forceLoad) {
+ bool isReexport, bool isExplicit,
+ ForceLoad forceLoadArchive) {
if (Optional<StringRef> path = findLibrary(name)) {
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
- addFile(*path, forceLoad, isExplicit))) {
+ addFile(*path, forceLoadArchive, isExplicit))) {
if (isNeeded)
dylibFile->forceNeeded = true;
if (isWeak)
@@ -390,10 +376,11 @@ static void addLibrary(StringRef name, bool isNeeded, bool isWeak,
}
static void addFramework(StringRef name, bool isNeeded, bool isWeak,
- bool isReexport, bool isExplicit) {
- if (Optional<std::string> path = findFramework(name)) {
+ bool isReexport, bool isExplicit,
+ ForceLoad forceLoadArchive) {
+ if (Optional<StringRef> path = findFramework(name)) {
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
- addFile(*path, /*forceLoadArchive=*/false, isExplicit))) {
+ addFile(*path, forceLoadArchive, isExplicit))) {
if (isNeeded)
dylibFile->forceNeeded = true;
if (isWeak)
@@ -409,9 +396,10 @@ static void addFramework(StringRef name, bool isNeeded, bool isWeak,
}
// Parses LC_LINKER_OPTION contents, which can add additional command line
-// flags.
+// flags. This directly parses the flags instead of using the standard argument
+// parser to improve performance.
void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) {
- SmallVector<const char *, 4> argv;
+ SmallVector<StringRef, 4> argv;
size_t offset = 0;
for (unsigned i = 0; i < argc && offset < data.size(); ++i) {
argv.push_back(data.data() + offset);
@@ -420,31 +408,20 @@ void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) {
if (argv.size() != argc || offset > data.size())
fatal(toString(f) + ": invalid LC_LINKER_OPTION");
- MachOOptTable table;
- unsigned missingIndex, missingCount;
- InputArgList args = table.ParseArgs(argv, missingIndex, missingCount);
- if (missingCount)
- fatal(Twine(args.getArgString(missingIndex)) + ": missing argument");
- for (const Arg *arg : args.filtered(OPT_UNKNOWN))
- error("unknown argument: " + arg->getAsString(args));
-
- for (const Arg *arg : args) {
- switch (arg->getOption().getID()) {
- case OPT_l: {
- StringRef name = arg->getValue();
- bool forceLoad =
- config->forceLoadSwift ? name.startswith("swift") : false;
- addLibrary(name, /*isNeeded=*/false, /*isWeak=*/false,
- /*isReexport=*/false, /*isExplicit=*/false, forceLoad);
- break;
- }
- case OPT_framework:
- addFramework(arg->getValue(), /*isNeeded=*/false, /*isWeak=*/false,
- /*isReexport=*/false, /*isExplicit=*/false);
- break;
- default:
- error(arg->getSpelling() + " is not allowed in LC_LINKER_OPTION");
- }
+ unsigned i = 0;
+ StringRef arg = argv[i];
+ if (arg.consume_front("-l")) {
+ ForceLoad forceLoadArchive =
+ config->forceLoadSwift && arg.startswith("swift") ? ForceLoad::Yes
+ : ForceLoad::No;
+ addLibrary(arg, /*isNeeded=*/false, /*isWeak=*/false,
+ /*isReexport=*/false, /*isExplicit=*/false, forceLoadArchive);
+ } else if (arg == "-framework") {
+ StringRef name = argv[++i];
+ addFramework(name, /*isNeeded=*/false, /*isWeak=*/false,
+ /*isReexport=*/false, /*isExplicit=*/false, ForceLoad::No);
+ } else {
+ error(arg + " is not allowed in LC_LINKER_OPTION");
}
}
@@ -454,7 +431,7 @@ static void addFileList(StringRef path) {
return;
MemoryBufferRef mbref = *buffer;
for (StringRef path : args::getLines(mbref))
- addFile(rerootPath(path), /*forceLoadArchive=*/false);
+ addFile(rerootPath(path), ForceLoad::Default);
}
// An order file has one entry per line, in the following format:
@@ -693,10 +670,12 @@ static PlatformKind parsePlatformVersion(const ArgList &args) {
// Has the side-effect of setting Config::target.
static TargetInfo *createTargetInfo(InputArgList &args) {
StringRef archName = args.getLastArgValue(OPT_arch);
- if (archName.empty())
- fatal("must specify -arch");
- PlatformKind platform = parsePlatformVersion(args);
+ if (archName.empty()) {
+ error("must specify -arch");
+ return nullptr;
+ }
+ PlatformKind platform = parsePlatformVersion(args);
config->platformInfo.target =
MachO::Target(getArchitectureFromName(archName), platform);
@@ -714,7 +693,8 @@ static TargetInfo *createTargetInfo(InputArgList &args) {
case CPU_TYPE_ARM:
return createARMTargetInfo(cpuSubtype);
default:
- fatal("missing or unsupported -arch " + archName);
+ error("missing or unsupported -arch " + archName);
+ return nullptr;
}
}
@@ -745,7 +725,6 @@ getUndefinedSymbolTreatment(const ArgList &args) {
}
static ICFLevel getICFLevel(const ArgList &args) {
- bool noDeduplicate = args.hasArg(OPT_no_deduplicate);
StringRef icfLevelStr = args.getLastArgValue(OPT_icf_eq);
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
.Cases("none", "", ICFLevel::none)
@@ -756,10 +735,6 @@ static ICFLevel getICFLevel(const ArgList &args) {
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
"', defaulting to `none'");
icfLevel = ICFLevel::none;
- } else if (icfLevel != ICFLevel::none && noDeduplicate) {
- warn(Twine("`--icf=" + icfLevelStr +
- "' conflicts with -no_deduplicate, setting to `none'"));
- icfLevel = ICFLevel::none;
} else if (icfLevel == ICFLevel::safe) {
warn(Twine("`--icf=safe' is not yet implemented, reverting to `none'"));
icfLevel = ICFLevel::none;
@@ -794,6 +769,8 @@ static void warnIfUnimplementedOption(const Option &opt) {
case OPT_grp_ignored:
warn("Option `" + opt.getPrefixedName() + "' is ignored.");
break;
+ case OPT_grp_ignored_silently:
+ break;
default:
warn("Option `" + opt.getPrefixedName() +
"' is not yet implemented. Stay tuned...");
@@ -981,7 +958,7 @@ static void handleSymbolPatterns(InputArgList &args,
}
}
-void createFiles(const InputArgList &args) {
+static void createFiles(const InputArgList &args) {
TimeTraceScope timeScope("Load input files");
// This loop should be reserved for options whose exact ordering matters.
// Other options should be handled via filtered() and/or getLastArg().
@@ -992,30 +969,30 @@ void createFiles(const InputArgList &args) {
switch (opt.getID()) {
case OPT_INPUT:
- addFile(rerootPath(arg->getValue()), /*forceLoadArchive=*/false);
+ addFile(rerootPath(arg->getValue()), ForceLoad::Default);
break;
case OPT_needed_library:
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
- addFile(rerootPath(arg->getValue()), false)))
+ addFile(rerootPath(arg->getValue()), ForceLoad::Default)))
dylibFile->forceNeeded = true;
break;
case OPT_reexport_library:
- if (auto *dylibFile = dyn_cast_or_null<DylibFile>(addFile(
- rerootPath(arg->getValue()), /*forceLoadArchive=*/false))) {
+ if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
+ addFile(rerootPath(arg->getValue()), ForceLoad::Default))) {
config->hasReexports = true;
dylibFile->reexport = true;
}
break;
case OPT_weak_library:
if (auto *dylibFile = dyn_cast_or_null<DylibFile>(
- addFile(rerootPath(arg->getValue()), /*forceLoadArchive=*/false)))
+ addFile(rerootPath(arg->getValue()), ForceLoad::Default)))
dylibFile->forceWeakImport = true;
break;
case OPT_filelist:
addFileList(arg->getValue());
break;
case OPT_force_load:
- addFile(rerootPath(arg->getValue()), /*forceLoadArchive=*/true);
+ addFile(rerootPath(arg->getValue()), ForceLoad::Yes);
break;
case OPT_l:
case OPT_needed_l:
@@ -1023,7 +1000,7 @@ void createFiles(const InputArgList &args) {
case OPT_weak_l:
addLibrary(arg->getValue(), opt.getID() == OPT_needed_l,
opt.getID() == OPT_weak_l, opt.getID() == OPT_reexport_l,
- /*isExplicit=*/true, /*forceLoad=*/false);
+ /*isExplicit=*/true, ForceLoad::Default);
break;
case OPT_framework:
case OPT_needed_framework:
@@ -1031,7 +1008,8 @@ void createFiles(const InputArgList &args) {
case OPT_weak_framework:
addFramework(arg->getValue(), opt.getID() == OPT_needed_framework,
opt.getID() == OPT_weak_framework,
- opt.getID() == OPT_reexport_framework, /*isExplicit=*/true);
+ opt.getID() == OPT_reexport_framework, /*isExplicit=*/true,
+ ForceLoad::Default);
break;
default:
break;
@@ -1043,27 +1021,30 @@ static void gatherInputSections() {
TimeTraceScope timeScope("Gathering input sections");
int inputOrder = 0;
for (const InputFile *file : inputFiles) {
- for (const SubsectionMap &map : file->subsections) {
+ for (const Section &section : file->sections) {
+ const Subsections &subsections = section.subsections;
+ if (subsections.empty())
+ continue;
+ if (subsections[0].isec->getName() == section_names::compactUnwind)
+ // Compact unwind entries require special handling elsewhere.
+ continue;
ConcatOutputSection *osec = nullptr;
- for (const SubsectionEntry &entry : map) {
- if (auto *isec = dyn_cast<ConcatInputSection>(entry.isec)) {
+ for (const Subsection &subsection : subsections) {
+ if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
if (isec->isCoalescedWeak())
continue;
- if (isec->getSegName() == segment_names::ld) {
- assert(isec->getName() == section_names::compactUnwind);
- in.unwindInfo->addInput(isec);
- continue;
- }
isec->outSecOff = inputOrder++;
if (!osec)
osec = ConcatOutputSection::getOrCreateForInput(isec);
isec->parent = osec;
inputSections.push_back(isec);
- } else if (auto *isec = dyn_cast<CStringInputSection>(entry.isec)) {
+ } else if (auto *isec =
+ dyn_cast<CStringInputSection>(subsection.isec)) {
if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
in.cStringSection->inputOrder = inputOrder++;
in.cStringSection->addInput(isec);
- } else if (auto *isec = dyn_cast<WordLiteralInputSection>(entry.isec)) {
+ } else if (auto *isec =
+ dyn_cast<WordLiteralInputSection>(subsection.isec)) {
if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
in.wordLiteralSection->inputOrder = inputOrder++;
in.wordLiteralSection->addInput(isec);
@@ -1107,7 +1088,28 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
lld::stdoutOS = &stdoutOS;
lld::stderrOS = &stderrOS;
- errorHandler().cleanupCallback = []() { freeArena(); };
+ errorHandler().cleanupCallback = []() {
+ freeArena();
+
+ resolvedFrameworks.clear();
+ resolvedLibraries.clear();
+ cachedReads.clear();
+ concatOutputSections.clear();
+ inputFiles.clear();
+ inputSections.clear();
+ loadedArchives.clear();
+ syntheticSections.clear();
+ thunkMap.clear();
+
+ firstTLVDataSection = nullptr;
+ tar = nullptr;
+ memset(&in, 0, sizeof(in));
+
+ resetLoadedDylibs();
+ resetOutputSegments();
+ resetWriter();
+ InputFile::resetIdCount();
+ };
errorHandler().logName = args::getFilenameWithoutExe(argsArr[0]);
stderrOS.enable_colors(stderrOS.has_colors());
@@ -1139,6 +1141,33 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
target = createTargetInfo(args);
depTracker =
make<DependencyTracker>(args.getLastArgValue(OPT_dependency_info));
+ if (errorCount())
+ return false;
+
+ config->osoPrefix = args.getLastArgValue(OPT_oso_prefix);
+ if (!config->osoPrefix.empty()) {
+ // Expand special characters, such as ".", "..", or "~", if present.
+ // Note: LD64 only expands "." and not other special characters.
+ // That seems silly to imitate so we will not try to follow it, but rather
+ // just use real_path() to do it.
+
+ // The max path length is 4096, in theory. However that seems quite long
+ // and seems unlikely that any one would want to strip everything from the
+ // path. Hence we've picked a reasonably large number here.
+ SmallString<1024> expanded;
+ if (!fs::real_path(config->osoPrefix, expanded,
+ /*expand_tilde=*/true)) {
+ // Note: LD64 expands "." to be `<current_dir>/`
+ // (ie., it has a slash suffix) whereas real_path() doesn't.
+ // So we have to append '/' to be consistent.
+ StringRef sep = sys::path::get_separator();
+ // real_path removes trailing slashes as part of the normalization, but
+ // these are meaningful for our text based stripping
+ if (config->osoPrefix.equals(".") || config->osoPrefix.endswith(sep))
+ expanded += sep;
+ config->osoPrefix = saver.save(expanded.str());
+ }
+ }
// Must be set before any InputSections and Symbols are created.
config->deadStrip = args.hasArg(OPT_dead_strip);
@@ -1193,11 +1222,13 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
args.hasArg(OPT_print_dylib_search) || getenv("RC_TRACE_DYLIB_SEARCHING");
config->printEachFile = args.hasArg(OPT_t);
config->printWhyLoad = args.hasArg(OPT_why_load);
+ config->omitDebugInfo = args.hasArg(OPT_S);
config->outputType = getOutputType(args);
+ config->errorForArchMismatch = args.hasArg(OPT_arch_errors_fatal);
if (const Arg *arg = args.getLastArg(OPT_bundle_loader)) {
if (config->outputType != MH_BUNDLE)
error("-bundle_loader can only be used with MachO bundle output");
- addFile(arg->getValue(), /*forceLoadArchive=*/false, /*isExplicit=*/false,
+ addFile(arg->getValue(), ForceLoad::Default, /*isExplicit=*/false,
/*isBundleLoader=*/true);
}
if (const Arg *arg = args.getLastArg(OPT_umbrella)) {
@@ -1233,6 +1264,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
config->icfLevel = getICFLevel(args);
config->dedupLiterals = args.hasArg(OPT_deduplicate_literals) ||
config->icfLevel != ICFLevel::none;
+ config->warnDylibInstallName = args.hasFlag(
+ OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
// FIXME: Add a commandline flag for this too.
config->zeroModTime = getenv("ZERO_AR_DATE");
@@ -1249,8 +1282,10 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
#endif
if (const Arg *arg = args.getLastArg(OPT_install_name)) {
- if (config->outputType != MH_DYLIB)
- warn(arg->getAsString(args) + ": ignored, only has effect with -dylib");
+ if (config->warnDylibInstallName && config->outputType != MH_DYLIB)
+ warn(
+ arg->getAsString(args) +
+ ": ignored, only has effect with -dylib [--warn-dylib-install-name]");
else
config->installName = arg->getValue();
} else if (config->outputType == MH_DYLIB) {
@@ -1354,15 +1389,17 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
config->platform() == PlatformKind::macOS);
if (args.hasArg(OPT_v)) {
- message(getLLDVersion());
+ message(getLLDVersion(), lld::errs());
message(StringRef("Library search paths:") +
- (config->librarySearchPaths.empty()
- ? ""
- : "\n\t" + join(config->librarySearchPaths, "\n\t")));
+ (config->librarySearchPaths.empty()
+ ? ""
+ : "\n\t" + join(config->librarySearchPaths, "\n\t")),
+ lld::errs());
message(StringRef("Framework search paths:") +
- (config->frameworkSearchPaths.empty()
- ? ""
- : "\n\t" + join(config->frameworkSearchPaths, "\n\t")));
+ (config->frameworkSearchPaths.empty()
+ ? ""
+ : "\n\t" + join(config->frameworkSearchPaths, "\n\t")),
+ lld::errs());
}
config->progName = argsArr[0];
@@ -1407,6 +1444,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
reexportHandler(arg, extensions);
}
+ cl::ResetAllOptionOccurrences();
+
// Parse LTO options.
if (const Arg *arg = args.getLastArg(OPT_mcpu))
parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())),
@@ -1431,24 +1470,33 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
createSyntheticSymbols();
if (!config->exportedSymbols.empty()) {
- for (Symbol *sym : symtab->getSymbols()) {
+ parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
if (auto *defined = dyn_cast<Defined>(sym)) {
StringRef symbolName = defined->getName();
if (config->exportedSymbols.match(symbolName)) {
if (defined->privateExtern) {
- error("cannot export hidden symbol " + symbolName +
- "\n>>> defined in " + toString(defined->getFile()));
+ if (defined->weakDefCanBeHidden) {
+ // weak_def_can_be_hidden symbols behave similarly to
+ // private_extern symbols in most cases, except for when
+ // it is explicitly exported.
+ // The former can be exported but the latter cannot.
+ defined->privateExtern = false;
+ } else {
+ warn("cannot export hidden symbol " + symbolName +
+ "\n>>> defined in " + toString(defined->getFile()));
+ }
}
} else {
defined->privateExtern = true;
}
}
- }
+ });
} else if (!config->unexportedSymbols.empty()) {
- for (Symbol *sym : symtab->getSymbols())
+ parallelForEach(symtab->getSymbols(), [](Symbol *sym) {
if (auto *defined = dyn_cast<Defined>(sym))
if (config->unexportedSymbols.match(defined->getName()))
defined->privateExtern = true;
+ });
}
for (const Arg *arg : args.filtered(OPT_sectcreate)) {
@@ -1481,12 +1529,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
}
if (config->timeTraceEnabled) {
- if (auto E = timeTraceProfilerWrite(
- args.getLastArgValue(OPT_time_trace_file_eq).str(),
- config->outputFile)) {
- handleAllErrors(std::move(E),
- [&](const StringError &SE) { error(SE.getMessage()); });
- }
+ checkError(timeTraceProfilerWrite(
+ args.getLastArgValue(OPT_time_trace_file_eq).str(),
+ config->outputFile));
timeTraceProfilerCleanup();
}
@@ -1494,5 +1539,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
if (canExitEarly)
exitLld(errorCount() ? 1 : 0);
- return !errorCount();
+ bool ret = errorCount() == 0;
+ errorHandler().reset();
+ return ret;
}
diff --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h
index 10f307e780c2..4a970ac8a084 100644
--- a/lld/MachO/Driver.h
+++ b/lld/MachO/Driver.h
@@ -52,10 +52,11 @@ void parseLCLinkerOption(InputFile *, unsigned argc, StringRef data);
std::string createResponseFile(const llvm::opt::InputArgList &args);
// Check for both libfoo.dylib and libfoo.tbd (in that order).
-llvm::Optional<std::string> resolveDylibPath(llvm::StringRef path);
+llvm::Optional<StringRef> resolveDylibPath(llvm::StringRef path);
DylibFile *loadDylib(llvm::MemoryBufferRef mbref, DylibFile *umbrella = nullptr,
bool isBundleLoader = false);
+void resetLoadedDylibs();
// Search for all possible combinations of `{root}/{name}.{extension}`.
// If \p extensions are not specified, then just search for `{root}/{name}`.
@@ -68,11 +69,6 @@ findPathCombination(const llvm::Twine &name,
// rerooted.
llvm::StringRef rerootPath(llvm::StringRef path);
-llvm::Optional<InputFile *> loadArchiveMember(MemoryBufferRef, uint32_t modTime,
- StringRef archiveName,
- bool objCOnly,
- uint64_t offsetInArchive);
-
uint32_t getModTime(llvm::StringRef path);
void printArchiveMemberLoad(StringRef reason, const InputFile *);
diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index fc25182d9140..3c5440544614 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -18,7 +18,6 @@
#include "lld/Common/Reproduce.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -184,20 +183,20 @@ static void searchedDylib(const Twine &path, bool found) {
depTracker->logFileNotFound(path);
}
-Optional<std::string> macho::resolveDylibPath(StringRef dylibPath) {
+Optional<StringRef> macho::resolveDylibPath(StringRef dylibPath) {
// TODO: if a tbd and dylib are both present, we should check to make sure
// they are consistent.
- bool dylibExists = fs::exists(dylibPath);
- searchedDylib(dylibPath, dylibExists);
- if (dylibExists)
- return std::string(dylibPath);
-
SmallString<261> tbdPath = dylibPath;
path::replace_extension(tbdPath, ".tbd");
bool tbdExists = fs::exists(tbdPath);
searchedDylib(tbdPath, tbdExists);
if (tbdExists)
- return std::string(tbdPath);
+ return saver.save(tbdPath.str());
+
+ bool dylibExists = fs::exists(dylibPath);
+ searchedDylib(dylibPath, dylibExists);
+ if (dylibExists)
+ return saver.save(dylibPath);
return {};
}
@@ -247,6 +246,8 @@ DylibFile *macho::loadDylib(MemoryBufferRef mbref, DylibFile *umbrella,
return newFile;
}
+void macho::resetLoadedDylibs() { loadedDylibs.clear(); }
+
Optional<StringRef>
macho::findPathCombination(const Twine &name,
const std::vector<StringRef> &roots,
@@ -277,30 +278,6 @@ StringRef macho::rerootPath(StringRef path) {
return path;
}
-Optional<InputFile *> macho::loadArchiveMember(MemoryBufferRef mb,
- uint32_t modTime,
- StringRef archiveName,
- bool objCOnly,
- uint64_t offsetInArchive) {
- if (config->zeroModTime)
- modTime = 0;
-
- switch (identify_magic(mb.getBuffer())) {
- case file_magic::macho_object:
- if (!objCOnly || hasObjCSection(mb))
- return make<ObjFile>(mb, modTime, archiveName);
- return None;
- case file_magic::bitcode:
- if (!objCOnly || check(isBitcodeContainingObjCCategory(mb)))
- return make<BitcodeFile>(mb, archiveName, offsetInArchive);
- return None;
- default:
- error(archiveName + ": archive member " + mb.getBufferIdentifier() +
- " has unhandled file type");
- return None;
- }
-}
-
uint32_t macho::getModTime(StringRef path) {
if (config->zeroModTime)
return 0;
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 370a325125ca..f9dea4b861ac 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -105,25 +105,29 @@ static bool equalsConstant(const ConcatInputSection *ia,
return false;
InputSection *isecA, *isecB;
+
+ uint64_t valueA = 0;
+ uint64_t valueB = 0;
if (ra.referent.is<Symbol *>()) {
const auto *sa = ra.referent.get<Symbol *>();
const auto *sb = rb.referent.get<Symbol *>();
if (sa->kind() != sb->kind())
return false;
- if (isa<Defined>(sa)) {
- const auto *da = cast<Defined>(sa);
- const auto *db = cast<Defined>(sb);
- if (da->isec && db->isec) {
- isecA = da->isec;
- isecB = db->isec;
- } else {
- assert(da->isAbsolute() && db->isAbsolute());
- return da->value == db->value;
- }
- } else {
- assert(isa<DylibSymbol>(sa));
+ if (!isa<Defined>(sa)) {
+ // ICF runs before Undefineds are reported.
+ assert(isa<DylibSymbol>(sa) || isa<Undefined>(sa));
return sa == sb;
}
+ const auto *da = cast<Defined>(sa);
+ const auto *db = cast<Defined>(sb);
+ if (!da->isec || !db->isec) {
+ assert(da->isAbsolute() && db->isAbsolute());
+ return da->value == db->value;
+ }
+ isecA = da->isec;
+ valueA = da->value;
+ isecB = db->isec;
+ valueB = db->value;
} else {
isecA = ra.referent.get<InputSection *>();
isecB = rb.referent.get<InputSection *>();
@@ -138,7 +142,8 @@ static bool equalsConstant(const ConcatInputSection *ia,
return true;
// Else we have two literal sections. References to them are equal iff their
// offsets in the output section are equal.
- return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
+ return isecA->getOffset(valueA + ra.addend) ==
+ isecB->getOffset(valueB + rb.addend);
};
return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
f);
@@ -176,8 +181,31 @@ static bool equalsVariable(const ConcatInputSection *ia,
}
return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
};
- return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
- f);
+ if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
+ return false;
+
+ // If there are symbols with associated unwind info, check that the unwind
+ // info matches. For simplicity, we only handle the case where there are only
+ // symbols at offset zero within the section (which is typically the case with
+ // .subsections_via_symbols.)
+ auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
+ auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
+ auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
+ if (itA == ia->symbols.end())
+ return itB == ib->symbols.end();
+ if (itB == ib->symbols.end())
+ return false;
+ const Defined *da = *itA;
+ const Defined *db = *itB;
+ if (da->unwindEntry->icfEqClass[icfPass % 2] !=
+ db->unwindEntry->icfEqClass[icfPass % 2] ||
+ da->value != 0 || db->value != 0)
+ return false;
+ auto isZero = [](Defined *d) { return d->value == 0; };
+ return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
+ ia->symbols.end() &&
+ std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
+ ib->symbols.end();
}
// Find the first InputSection after BEGIN whose equivalence class differs
@@ -248,7 +276,7 @@ void ICF::run() {
} else {
hash += defined->value;
}
- } else
+ } else if (!isa<Undefined>(sym)) // ICF runs before Undefined diags.
llvm_unreachable("foldIdenticalSections symbol kind");
}
}
@@ -311,22 +339,6 @@ void ICF::segregate(
}
}
-template <class Ptr>
-DenseSet<const InputSection *> findFunctionsWithUnwindInfo() {
- DenseSet<const InputSection *> result;
- for (ConcatInputSection *isec : in.unwindInfo->getInputs()) {
- for (size_t i = 0; i < isec->relocs.size(); ++i) {
- Reloc &r = isec->relocs[i];
- assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
- if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
- offsetof(CompactUnwindEntry<Ptr>, functionAddress))
- continue;
- result.insert(r.referent.get<InputSection *>());
- }
- }
- return result;
-}
-
void macho::foldIdenticalSections() {
TimeTraceScope timeScope("Fold Identical Code Sections");
// The ICF equivalence-class segregation algorithm relies on pre-computed
@@ -336,11 +348,6 @@ void macho::foldIdenticalSections() {
// parallelization. Therefore, we hash every InputSection here where we have
// them all accessible as simple vectors.
- // ICF can't fold functions with unwind info
- DenseSet<const InputSection *> functionsWithUnwindInfo =
- target->wordSize == 8 ? findFunctionsWithUnwindInfo<uint64_t>()
- : findFunctionsWithUnwindInfo<uint32_t>();
-
// If an InputSection is ineligible for ICF, we give it a unique ID to force
// it into an unfoldable singleton equivalence class. Begin the unique-ID
// space at inputSections.size(), so that it will never intersect with
@@ -353,13 +360,15 @@ void macho::foldIdenticalSections() {
for (ConcatInputSection *isec : inputSections) {
// FIXME: consider non-code __text sections as hashable?
bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
- !isec->shouldOmitFromOutput() &&
- !functionsWithUnwindInfo.contains(isec) &&
- isec->isHashableForICF();
- if (isHashable)
+ !isec->shouldOmitFromOutput() && isec->isHashableForICF();
+ if (isHashable) {
hashable.push_back(isec);
- else
+ for (Defined *d : isec->symbols)
+ if (d->unwindEntry)
+ hashable.push_back(d->unwindEntry);
+ } else {
isec->icfEqClass[0] = ++icfUniqueID;
+ }
}
parallelForEach(hashable,
[](ConcatInputSection *isec) { isec->hashForICF(); });
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index a4fb9035193c..558de4131cb9 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -70,6 +70,8 @@
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/InterfaceFile.h"
+#include <type_traits>
+
using namespace llvm;
using namespace llvm::MachO;
using namespace llvm::support::endian;
@@ -173,8 +175,19 @@ static bool checkCompatibility(const InputFile *input) {
return true;
}
+// This cache mostly exists to store system libraries (and .tbds) as they're
+// loaded, rather than the input archives, which are already cached at a higher
+// level, and other files like the filelist that are only read once.
+// Theoretically this caching could be more efficient by hoisting it, but that
+// would require altering many callers to track the state.
+DenseMap<CachedHashStringRef, MemoryBufferRef> macho::cachedReads;
// Open a given file path and return it as a memory-mapped file.
Optional<MemoryBufferRef> macho::readFile(StringRef path) {
+ CachedHashStringRef key(path);
+ auto entry = cachedReads.find(key);
+ if (entry != cachedReads.end())
+ return entry->second;
+
ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path);
if (std::error_code ec = mbOrErr.getError()) {
error("cannot open " + path + ": " + ec.message());
@@ -192,7 +205,7 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
read32be(&hdr->magic) != FAT_MAGIC) {
if (tar)
tar->append(relativeToRoot(path), mbref.getBuffer());
- return mbref;
+ return cachedReads[key] = mbref;
}
// Object files and archive files may be fat files, which contain multiple
@@ -217,7 +230,8 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
error(path + ": slice extends beyond end of file");
if (tar)
tar->append(relativeToRoot(path), mbref.getBuffer());
- return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc));
+ return cachedReads[key] = MemoryBufferRef(StringRef(buf + offset, size),
+ path.copy(bAlloc));
}
error("unable to find matching architecture in " + path);
@@ -227,12 +241,32 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
InputFile::InputFile(Kind kind, const InterfaceFile &interface)
: id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
-template <class Section>
-void ObjFile::parseSections(ArrayRef<Section> sections) {
- subsections.reserve(sections.size());
+// Some sections comprise of fixed-size records, so instead of splitting them at
+// symbol boundaries, we split them based on size. Records are distinct from
+// literals in that they may contain references to other sections, instead of
+// being leaf nodes in the InputSection graph.
+//
+// Note that "record" is a term I came up with. In contrast, "literal" is a term
+// used by the Mach-O format.
+static Optional<size_t> getRecordSize(StringRef segname, StringRef name) {
+ if (name == section_names::cfString) {
+ if (config->icfLevel != ICFLevel::none && segname == segment_names::data)
+ return target->wordSize == 8 ? 32 : 16;
+ } else if (name == section_names::compactUnwind) {
+ if (segname == segment_names::ld)
+ return target->wordSize == 8 ? 32 : 20;
+ }
+ return {};
+}
+
+// Parse the sequence of sections within a single LC_SEGMENT(_64).
+// Split each section into subsections.
+template <class SectionHeader>
+void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
+ sections.reserve(sectionHeaders.size());
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
- for (const Section &sec : sections) {
+ for (const SectionHeader &sec : sectionHeaders) {
StringRef name =
StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname)));
StringRef segname =
@@ -243,12 +277,29 @@ void ObjFile::parseSections(ArrayRef<Section> sections) {
if (sec.align >= 32) {
error("alignment " + std::to_string(sec.align) + " of section " + name +
" is too large");
- subsections.push_back({});
+ sections.push_back(sec.addr);
continue;
}
uint32_t align = 1 << sec.align;
uint32_t flags = sec.flags;
+ auto splitRecords = [&](int recordSize) -> void {
+ sections.push_back(sec.addr);
+ if (data.empty())
+ return;
+ Subsections &subsections = sections.back().subsections;
+ subsections.reserve(data.size() / recordSize);
+ auto *isec = make<ConcatInputSection>(
+ segname, name, this, data.slice(0, recordSize), align, flags);
+ subsections.push_back({0, isec});
+ for (uint64_t off = recordSize; off < data.size(); off += recordSize) {
+ // Copying requires less memory than constructing a fresh InputSection.
+ auto *copy = make<ConcatInputSection>(*isec);
+ copy->data = data.slice(off, recordSize);
+ subsections.push_back({off, copy});
+ }
+ };
+
if (sectionType(sec.flags) == S_CSTRING_LITERALS ||
(config->dedupLiterals && isWordLiteralSection(sec.flags))) {
if (sec.nreloc && config->dedupLiterals)
@@ -267,31 +318,34 @@ void ObjFile::parseSections(ArrayRef<Section> sections) {
isec = make<WordLiteralInputSection>(segname, name, this, data, align,
flags);
}
- subsections.push_back({{0, isec}});
- } else if (config->icfLevel != ICFLevel::none &&
- (name == section_names::cfString &&
- segname == segment_names::data)) {
- uint64_t literalSize = target->wordSize == 8 ? 32 : 16;
- subsections.push_back({});
- SubsectionMap &subsecMap = subsections.back();
- for (uint64_t off = 0; off < data.size(); off += literalSize)
- subsecMap.push_back(
- {off, make<ConcatInputSection>(segname, name, this,
- data.slice(off, literalSize), align,
- flags)});
+ sections.push_back(sec.addr);
+ sections.back().subsections.push_back({0, isec});
+ } else if (auto recordSize = getRecordSize(segname, name)) {
+ splitRecords(*recordSize);
+ if (name == section_names::compactUnwind)
+ compactUnwindSection = &sections.back();
+ } else if (segname == segment_names::llvm) {
+ // ld64 does not appear to emit contents from sections within the __LLVM
+ // segment. Symbols within those sections point to bitcode metadata
+ // instead of actual symbols. Global symbols within those sections could
+ // have the same name without causing duplicate symbol errors. Push an
+ // empty entry to ensure indices line up for the remaining sections.
+ // TODO: Evaluate whether the bitcode metadata is needed.
+ sections.push_back(sec.addr);
} else {
auto *isec =
make<ConcatInputSection>(segname, name, this, data, align, flags);
- if (!(isDebugSection(isec->getFlags()) &&
- isec->getSegName() == segment_names::dwarf)) {
- subsections.push_back({{0, isec}});
- } else {
+ if (isDebugSection(isec->getFlags()) &&
+ isec->getSegName() == segment_names::dwarf) {
// Instead of emitting DWARF sections, we emit STABS symbols to the
// object files that contain them. We filter them out early to avoid
// parsing their relocations unnecessarily. But we must still push an
- // empty map to ensure the indices line up for the remaining sections.
- subsections.push_back({});
+ // empty entry to ensure the indices line up for the remaining sections.
+ sections.push_back(sec.addr);
debugSections.push_back(isec);
+ } else {
+ sections.push_back(sec.addr);
+ sections.back().subsections.push_back({0, isec});
}
}
}
@@ -304,18 +358,21 @@ void ObjFile::parseSections(ArrayRef<Section> sections) {
// any subsection splitting has occurred). It will be updated to represent the
// same location as an offset relative to the start of the containing
// subsection.
-static InputSection *findContainingSubsection(SubsectionMap &map,
- uint64_t *offset) {
+template <class T>
+static InputSection *findContainingSubsection(const Subsections &subsections,
+ T *offset) {
+ static_assert(std::is_same<uint64_t, T>::value ||
+ std::is_same<uint32_t, T>::value,
+ "unexpected type for offset");
auto it = std::prev(llvm::upper_bound(
- map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) {
- return value < subsecEntry.offset;
- }));
+ subsections, *offset,
+ [](uint64_t value, Subsection subsec) { return value < subsec.offset; }));
*offset -= it->offset;
return it->isec;
}
-template <class Section>
-static bool validateRelocationInfo(InputFile *file, const Section &sec,
+template <class SectionHeader>
+static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
relocation_info rel) {
const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
bool valid = true;
@@ -346,14 +403,15 @@ static bool validateRelocationInfo(InputFile *file, const Section &sec,
return valid;
}
-template <class Section>
-void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
- const Section &sec, SubsectionMap &subsecMap) {
+template <class SectionHeader>
+void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
+ const SectionHeader &sec,
+ Subsections &subsections) {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
ArrayRef<relocation_info> relInfos(
reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
- auto subsecIt = subsecMap.rbegin();
+ auto subsecIt = subsections.rbegin();
for (size_t i = 0; i < relInfos.size(); i++) {
// Paired relocations serve as Mach-O's method for attaching a
// supplemental datum to a primary relocation record. ELF does not
@@ -380,8 +438,17 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
// and insert them. Storing addends in the instruction stream is
// possible, but inconvenient and more costly at link time.
- int64_t pairedAddend = 0;
relocation_info relInfo = relInfos[i];
+ bool isSubtrahend =
+ target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
+ if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
+ // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
+ // adds local "EH_Frame1" and "func.eh". Ignore them because they have
+ // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
+ ++i;
+ continue;
+ }
+ int64_t pairedAddend = 0;
if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
relInfo = relInfos[++i];
@@ -392,8 +459,6 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
if (relInfo.r_address & R_SCATTERED)
fatal("TODO: Scattered relocations not supported");
- bool isSubtrahend =
- target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
assert(!(embeddedAddend && pairedAddend));
int64_t totalAddend = pairedAddend + embeddedAddend;
@@ -407,7 +472,8 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
r.addend = isSubtrahend ? 0 : totalAddend;
} else {
assert(!isSubtrahend);
- const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
+ const SectionHeader &referentSecHead =
+ sectionHeaders[relInfo.r_symbolnum - 1];
uint64_t referentOffset;
if (relInfo.r_pcrel) {
// The implicit addend for pcrel section relocations is the pcrel offset
@@ -417,14 +483,16 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
// have pcrel section relocations. We may want to factor this out into
// the arch-specific .cpp file.
assert(target->hasAttr(r.type, RelocAttrBits::BYTE4));
- referentOffset =
- sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr;
+ referentOffset = sec.addr + relInfo.r_address + 4 + totalAddend -
+ referentSecHead.addr;
} else {
// The addend for a non-pcrel relocation is its absolute address.
- referentOffset = totalAddend - referentSec.addr;
+ referentOffset = totalAddend - referentSecHead.addr;
}
- SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
- r.referent = findContainingSubsection(referentSubsecMap, &referentOffset);
+ Subsections &referentSubsections =
+ sections[relInfo.r_symbolnum - 1].subsections;
+ r.referent =
+ findContainingSubsection(referentSubsections, &referentOffset);
r.addend = referentOffset;
}
@@ -434,14 +502,14 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
// unsorted relocations (in `-r` mode), so we have a fallback for that
// uncommon case.
InputSection *subsec;
- while (subsecIt != subsecMap.rend() && subsecIt->offset > r.offset)
+ while (subsecIt != subsections.rend() && subsecIt->offset > r.offset)
++subsecIt;
- if (subsecIt == subsecMap.rend() ||
+ if (subsecIt == subsections.rend() ||
subsecIt->offset + subsecIt->isec->getSize() <= r.offset) {
- subsec = findContainingSubsection(subsecMap, &r.offset);
+ subsec = findContainingSubsection(subsections, &r.offset);
// Now that we know the relocs are unsorted, avoid trying the 'fast path'
// for the other relocations.
- subsecIt = subsecMap.rend();
+ subsecIt = subsections.rend();
} else {
subsec = subsecIt->isec;
r.offset -= subsecIt->offset;
@@ -462,10 +530,10 @@ void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
} else {
uint64_t referentOffset =
totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr;
- SubsectionMap &referentSubsecMap =
- subsections[minuendInfo.r_symbolnum - 1];
+ Subsections &referentSubsectVec =
+ sections[minuendInfo.r_symbolnum - 1].subsections;
p.referent =
- findContainingSubsection(referentSubsecMap, &referentOffset);
+ findContainingSubsection(referentSubsectVec, &referentOffset);
p.addend = referentOffset;
}
subsec->relocs.push_back(p);
@@ -520,18 +588,23 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
// with ld64's semantics, because it means the non-private-extern
// definition will continue to take priority if more private extern
// definitions are encountered. With lld's semantics there's no observable
- // difference between a symbol that's isWeakDefCanBeHidden or one that's
- // privateExtern -- neither makes it into the dynamic symbol table. So just
- // promote isWeakDefCanBeHidden to isPrivateExtern here.
- if (isWeakDefCanBeHidden)
+ // difference between a symbol that's isWeakDefCanBeHidden(autohide) or one
+ // that's privateExtern -- neither makes it into the dynamic symbol table,
+ // unless the autohide symbol is explicitly exported.
+ // But if a symbol is both privateExtern and autohide then it can't
+ // be exported.
+ // So we nullify the autohide flag when privateExtern is present
+ // and promote the symbol to privateExtern when it is not already.
+ if (isWeakDefCanBeHidden && isPrivateExtern)
+ isWeakDefCanBeHidden = false;
+ else if (isWeakDefCanBeHidden)
isPrivateExtern = true;
-
return symtab->addDefined(
name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF,
- sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP);
+ sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP,
+ isWeakDefCanBeHidden);
}
-
assert(!isWeakDefCanBeHidden &&
"weak_def_can_be_hidden on already-hidden symbol?");
return make<Defined>(
@@ -550,7 +623,8 @@ static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
return symtab->addDefined(
name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false, sym.n_type & N_PEXT, sym.n_desc & N_ARM_THUMB_DEF,
- /*isReferencedDynamically=*/false, sym.n_desc & N_NO_DEAD_STRIP);
+ /*isReferencedDynamically=*/false, sym.n_desc & N_NO_DEAD_STRIP,
+ /*isWeakDefCanBeHidden=*/false);
}
return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0,
/*isWeakDef=*/false,
@@ -585,8 +659,7 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
}
}
-template <class NList>
-static bool isUndef(const NList &sym) {
+template <class NList> static bool isUndef(const NList &sym) {
return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0;
}
@@ -597,7 +670,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
using NList = typename LP::nlist;
// Groups indices of the symbols by the sections that contain them.
- std::vector<std::vector<uint32_t>> symbolsBySection(subsections.size());
+ std::vector<std::vector<uint32_t>> symbolsBySection(sections.size());
symbols.resize(nList.size());
SmallVector<unsigned, 32> undefineds;
for (uint32_t i = 0; i < nList.size(); ++i) {
@@ -610,9 +683,9 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
StringRef name = strtab + sym.n_strx;
if ((sym.n_type & N_TYPE) == N_SECT) {
- SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
+ Subsections &subsections = sections[sym.n_sect - 1].subsections;
// parseSections() may have chosen not to parse this section.
- if (subsecMap.empty())
+ if (subsections.empty())
continue;
symbolsBySection[sym.n_sect - 1].push_back(i);
} else if (isUndef(sym)) {
@@ -622,28 +695,34 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
}
}
- for (size_t i = 0; i < subsections.size(); ++i) {
- SubsectionMap &subsecMap = subsections[i];
- if (subsecMap.empty())
+ for (size_t i = 0; i < sections.size(); ++i) {
+ Subsections &subsections = sections[i].subsections;
+ if (subsections.empty())
continue;
-
+ InputSection *lastIsec = subsections.back().isec;
+ if (lastIsec->getName() == section_names::ehFrame) {
+ // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
+ // adds local "EH_Frame1" and "func.eh". Ignore them because they have
+ // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
+ continue;
+ }
std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
uint64_t sectionAddr = sectionHeaders[i].addr;
uint32_t sectionAlign = 1u << sectionHeaders[i].align;
- InputSection *isec = subsecMap.back().isec;
- // __cfstring has already been split into subsections during
+ // Record-based sections have already been split into subsections during
// parseSections(), so we simply need to match Symbols to the corresponding
// subsection here.
- if (config->icfLevel != ICFLevel::none && isCfStringSection(isec)) {
+ if (getRecordSize(lastIsec->getSegName(), lastIsec->getName())) {
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
StringRef name = strtab + sym.n_strx;
uint64_t symbolOffset = sym.n_value - sectionAddr;
- InputSection *isec = findContainingSubsection(subsecMap, &symbolOffset);
+ InputSection *isec =
+ findContainingSubsection(subsections, &symbolOffset);
if (symbolOffset != 0) {
- error(toString(this) + ": __cfstring contains symbol " + name +
+ error(toString(lastIsec) + ": symbol " + name +
" at misaligned offset");
continue;
}
@@ -654,19 +733,19 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
// Calculate symbol sizes and create subsections by splitting the sections
// along symbol boundaries.
- // We populate subsecMap by repeatedly splitting the last (highest address)
- // subsection.
+ // We populate subsections by repeatedly splitting the last (highest
+ // address) subsection.
llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
return nList[lhs].n_value < nList[rhs].n_value;
});
- SubsectionEntry subsecEntry = subsecMap.back();
for (size_t j = 0; j < symbolIndices.size(); ++j) {
uint32_t symIndex = symbolIndices[j];
const NList &sym = nList[symIndex];
StringRef name = strtab + sym.n_strx;
- InputSection *isec = subsecEntry.isec;
+ Subsection &subsec = subsections.back();
+ InputSection *isec = subsec.isec;
- uint64_t subsecAddr = sectionAddr + subsecEntry.offset;
+ uint64_t subsecAddr = sectionAddr + subsec.offset;
size_t symbolOffset = sym.n_value - subsecAddr;
uint64_t symbolSize =
j + 1 < symbolIndices.size()
@@ -688,7 +767,6 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
auto *concatIsec = cast<ConcatInputSection>(isec);
auto *nextIsec = make<ConcatInputSection>(*concatIsec);
- nextIsec->numRefs = 0;
nextIsec->wasCoalesced = false;
if (isZeroFill(isec->getFlags())) {
// Zero-fill sections have NULL data.data() non-zero data.size()
@@ -707,8 +785,7 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
// subsection's offset from the last aligned address. We should consider
// emulating that behavior.
nextIsec->align = MinAlign(sectionAlign, sym.n_value);
- subsecMap.push_back({sym.n_value - sectionAddr, nextIsec});
- subsecEntry = subsecMap.back();
+ subsections.push_back({sym.n_value - sectionAddr, nextIsec});
}
}
@@ -734,7 +811,8 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
/*file=*/this, data);
isec->live = true;
- subsections.push_back({{0, isec}});
+ sections.push_back(0);
+ sections.back().subsections.push_back({0, isec});
}
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
@@ -749,7 +827,7 @@ ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
template <class LP> void ObjFile::parse() {
using Header = typename LP::mach_header;
using SegmentCommand = typename LP::segment_command;
- using Section = typename LP::section;
+ using SectionHeader = typename LP::section;
using NList = typename LP::nlist;
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
@@ -757,9 +835,12 @@ template <class LP> void ObjFile::parse() {
Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
if (arch != config->arch()) {
- error(toString(this) + " has architecture " + getArchitectureName(arch) +
- " which is incompatible with target architecture " +
- getArchitectureName(config->arch()));
+ auto msg = config->errorForArchMismatch
+ ? static_cast<void (*)(const Twine &)>(error)
+ : warn;
+ msg(toString(this) + " has architecture " + getArchitectureName(arch) +
+ " which is incompatible with target architecture " +
+ getArchitectureName(config->arch()));
return;
}
@@ -772,11 +853,11 @@ template <class LP> void ObjFile::parse() {
parseLCLinkerOption(this, cmd->count, data);
}
- ArrayRef<Section> sectionHeaders;
+ ArrayRef<SectionHeader> sectionHeaders;
if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
- sectionHeaders =
- ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
+ sectionHeaders = ArrayRef<SectionHeader>{
+ reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
parseSections(sectionHeaders);
}
@@ -792,13 +873,16 @@ template <class LP> void ObjFile::parse() {
// The relocations may refer to the symbols, so we parse them after we have
// parsed all the symbols.
- for (size_t i = 0, n = subsections.size(); i < n; ++i)
- if (!subsections[i].empty())
- parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
+ for (size_t i = 0, n = sections.size(); i < n; ++i)
+ if (!sections[i].subsections.empty())
+ parseRelocations(sectionHeaders, sectionHeaders[i],
+ sections[i].subsections);
parseDebugInfo();
if (config->emitDataInCodeInfo)
parseDataInCode();
+ if (compactUnwindSection)
+ registerCompactUnwind();
}
void ObjFile::parseDebugInfo() {
@@ -839,6 +923,68 @@ void ObjFile::parseDataInCode() {
}));
}
+// Create pointers from symbols to their associated compact unwind entries.
+void ObjFile::registerCompactUnwind() {
+ for (const Subsection &subsection : compactUnwindSection->subsections) {
+ ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
+ // Hack!! Since each CUE contains a different function address, if ICF
+ // operated naively and compared the entire contents of each CUE, entries
+ // with identical unwind info but belonging to different functions would
+ // never be considered equivalent. To work around this problem, we slice
+ // away the function address here. (Note that we do not adjust the offsets
+ // of the corresponding relocations.) We rely on `relocateCompactUnwind()`
+ // to correctly handle these truncated input sections.
+ isec->data = isec->data.slice(target->wordSize);
+
+ ConcatInputSection *referentIsec;
+ for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
+ Reloc &r = *it;
+ // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
+ if (r.offset != 0) {
+ ++it;
+ continue;
+ }
+ uint64_t add = r.addend;
+ if (auto *sym = cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>())) {
+ // Check whether the symbol defined in this file is the prevailing one.
+ // Skip if it is e.g. a weak def that didn't prevail.
+ if (sym->getFile() != this) {
+ ++it;
+ continue;
+ }
+ add += sym->value;
+ referentIsec = cast<ConcatInputSection>(sym->isec);
+ } else {
+ referentIsec =
+ cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
+ }
+ if (referentIsec->getSegName() != segment_names::text)
+ error("compact unwind references address in " + toString(referentIsec) +
+ " which is not in segment __TEXT");
+ // The functionAddress relocations are typically section relocations.
+ // However, unwind info operates on a per-symbol basis, so we search for
+ // the function symbol here.
+ auto symIt = llvm::lower_bound(
+ referentIsec->symbols, add,
+ [](Defined *d, uint64_t add) { return d->value < add; });
+ // The relocation should point at the exact address of a symbol (with no
+ // addend).
+ if (symIt == referentIsec->symbols.end() || (*symIt)->value != add) {
+ assert(referentIsec->wasCoalesced);
+ ++it;
+ continue;
+ }
+ (*symIt)->unwindEntry = isec;
+ // Since we've sliced away the functionAddress, we should remove the
+ // corresponding relocation too. Given that clang emits relocations in
+ // reverse order of address, this relocation should be at the end of the
+ // vector for most of our input object files, so this is typically an O(1)
+ // operation.
+ it = isec->relocs.erase(it);
+ }
+ }
+}
+
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);
@@ -871,7 +1017,7 @@ static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
for (StringRef dir : config->frameworkSearchPaths) {
SmallString<128> candidate = dir;
path::append(candidate, frameworkName);
- if (Optional<std::string> dylibPath = resolveDylibPath(candidate))
+ if (Optional<StringRef> dylibPath = resolveDylibPath(candidate.str()))
return loadDylib(*dylibPath, umbrella);
}
} else if (Optional<StringRef> dylibPath = findPathCombination(
@@ -882,8 +1028,7 @@ static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
// 2. As absolute path.
if (path::is_absolute(path, path::Style::posix))
for (StringRef root : config->systemLibraryRoots)
- if (Optional<std::string> dylibPath =
- resolveDylibPath((root + path).str()))
+ if (Optional<StringRef> dylibPath = resolveDylibPath((root + path).str()))
return loadDylib(*dylibPath, umbrella);
// 3. As relative path.
@@ -912,7 +1057,7 @@ static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
path::remove_filename(newPath);
}
path::append(newPath, rpath, path.drop_front(strlen("@rpath/")));
- if (Optional<std::string> dylibPath = resolveDylibPath(newPath))
+ if (Optional<StringRef> dylibPath = resolveDylibPath(newPath.str()))
return loadDylib(*dylibPath, umbrella);
}
}
@@ -930,7 +1075,7 @@ static DylibFile *findDylib(StringRef path, DylibFile *umbrella,
}
}
- if (Optional<std::string> dylibPath = resolveDylibPath(path))
+ if (Optional<StringRef> dylibPath = resolveDylibPath(path))
return loadDylib(*dylibPath, umbrella);
return nullptr;
@@ -1129,7 +1274,7 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
void DylibFile::parseReexports(const InterfaceFile &interface) {
const InterfaceFile *topLevel =
interface.getParent() == nullptr ? &interface : interface.getParent();
- for (InterfaceFileRef intfRef : interface.reexportedLibraries()) {
+ for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) {
InterfaceFile::const_target_range targets = intfRef.targets();
if (is_contained(skipPlatformChecks, intfRef.getInstallName()) ||
is_contained(targets, config->platformInfo.target))
@@ -1225,47 +1370,75 @@ void DylibFile::checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const {
}
ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f)
- : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
+ : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {}
+
+void ArchiveFile::addLazySymbols() {
for (const object::Archive::Symbol &sym : file->symbols())
symtab->addLazy(sym.getName(), this, sym);
}
-void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
- object::Archive::Child c =
- CHECK(sym.getMember(), toString(this) +
- ": could not get the member for symbol " +
- toMachOString(sym));
+static Expected<InputFile *> loadArchiveMember(MemoryBufferRef mb,
+ uint32_t modTime,
+ StringRef archiveName,
+ uint64_t offsetInArchive) {
+ if (config->zeroModTime)
+ modTime = 0;
+
+ switch (identify_magic(mb.getBuffer())) {
+ case file_magic::macho_object:
+ return make<ObjFile>(mb, modTime, archiveName);
+ case file_magic::bitcode:
+ return make<BitcodeFile>(mb, archiveName, offsetInArchive);
+ default:
+ return createStringError(inconvertibleErrorCode(),
+ mb.getBufferIdentifier() +
+ " has unhandled file type");
+ }
+}
+Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
if (!seen.insert(c.getChildOffset()).second)
- return;
+ return Error::success();
- MemoryBufferRef mb =
- CHECK(c.getMemoryBufferRef(),
- toString(this) +
- ": could not get the buffer for the member defining symbol " +
- toMachOString(sym));
+ Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
+ if (!mb)
+ return mb.takeError();
+ // Thin archives refer to .o files, so --reproduce needs the .o files too.
if (tar && c.getParent()->isThin())
- tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer());
+ tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb->getBuffer());
+
+ Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
+ if (!modTime)
+ return modTime.takeError();
+
+ Expected<InputFile *> file =
+ loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset());
+
+ if (!file)
+ return file.takeError();
+
+ inputFiles.insert(*file);
+ printArchiveMemberLoad(reason, *file);
+ return Error::success();
+}
- uint32_t modTime = toTimeT(
- CHECK(c.getLastModified(), toString(this) +
- ": could not get the modification time "
- "for the member defining symbol " +
- toMachOString(sym)));
+void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
+ object::Archive::Child c =
+ CHECK(sym.getMember(), toString(this) +
+ ": could not get the member defining symbol " +
+ toMachOString(sym));
// `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile>
// and become invalid after that call. Copy it to the stack so we can refer
// to it later.
const object::Archive::Symbol symCopy = sym;
- if (Optional<InputFile *> file = loadArchiveMember(
- mb, modTime, getName(), /*objCOnly=*/false, c.getChildOffset())) {
- inputFiles.insert(*file);
- // ld64 doesn't demangle sym here even with -demangle.
- // Match that: intentionally don't call toMachOString().
- printArchiveMemberLoad(symCopy.getName(), *file);
- }
+ // ld64 doesn't demangle sym here even with -demangle.
+ // Match that: intentionally don't call toMachOString().
+ if (Error e = fetch(c, symCopy.getName()))
+ error(toString(this) + ": could not get the member defining symbol " +
+ toMachOString(symCopy) + ": " + toString(std::move(e)));
}
static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
@@ -1276,8 +1449,6 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
if (objSym.isUndefined())
return symtab->addUndefined(name, &file, /*isWeakRef=*/false);
- assert(!objSym.isCommon() && "TODO: support common symbols in LTO");
-
// TODO: Write a test demonstrating why computing isPrivateExtern before
// LTO compilation is important.
bool isPrivateExtern = false;
@@ -1292,11 +1463,16 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
break;
}
+ if (objSym.isCommon())
+ return symtab->addCommon(name, &file, objSym.getCommonSize(),
+ objSym.getCommonAlignment(), isPrivateExtern);
+
return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0,
/*size=*/0, objSym.isWeak(), isPrivateExtern,
/*isThumb=*/false,
/*isReferencedDynamically=*/false,
- /*noDeadStrip=*/false);
+ /*noDeadStrip=*/false,
+ /*isWeakDefCanBeHidden=*/false);
}
BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 0101fb71c8a3..93794cb5a4aa 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -14,6 +14,7 @@
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
+#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -40,6 +41,7 @@ namespace macho {
struct PlatformInfo;
class ConcatInputSection;
class Symbol;
+class Defined;
struct Reloc;
enum class RefState : uint8_t;
@@ -50,11 +52,18 @@ extern std::unique_ptr<llvm::TarWriter> tar;
// If .subsections_via_symbols is set, each InputSection will be split along
// symbol boundaries. The field offset represents the offset of the subsection
// from the start of the original pre-split InputSection.
-struct SubsectionEntry {
- uint64_t offset;
- InputSection *isec;
+struct Subsection {
+ uint64_t offset = 0;
+ InputSection *isec = nullptr;
+};
+
+using Subsections = std::vector<Subsection>;
+
+struct Section {
+ uint64_t address = 0;
+ Subsections subsections;
+ Section(uint64_t addr) : address(addr){};
};
-using SubsectionMap = std::vector<SubsectionEntry>;
class InputFile {
public:
@@ -69,11 +78,12 @@ public:
virtual ~InputFile() = default;
Kind kind() const { return fileKind; }
StringRef getName() const { return name; }
+ static void resetIdCount() { idCount = 0; }
MemoryBufferRef mb;
std::vector<Symbol *> symbols;
- std::vector<SubsectionMap> subsections;
+ std::vector<Section> sections;
// Provides an easy way to sort InputFiles deterministically.
const int id;
@@ -106,19 +116,22 @@ public:
ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
private:
+ Section *compactUnwindSection = nullptr;
+
template <class LP> void parse();
- template <class Section> void parseSections(ArrayRef<Section>);
+ template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
template <class LP>
void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
ArrayRef<typename LP::nlist> nList, const char *strtab,
bool subsectionsViaSymbols);
template <class NList>
Symbol *parseNonSectionSymbol(const NList &sym, StringRef name);
- template <class Section>
- void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &,
- SubsectionMap &);
+ template <class SectionHeader>
+ void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
+ const SectionHeader &, Subsections &);
void parseDebugInfo();
void parseDataInCode();
+ void registerCompactUnwind();
};
// command-line -sectcreate file
@@ -184,8 +197,13 @@ private:
class ArchiveFile final : public InputFile {
public:
explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
+ void addLazySymbols();
+ void fetch(const llvm::object::Archive::Symbol &);
+ // LLD normally doesn't use Error for error-handling, but the underlying
+ // Archive library does, so this is the cleanest way to wrap it.
+ Error fetch(const llvm::object::Archive::Child &, StringRef reason);
+ const llvm::object::Archive &getArchive() const { return *file; };
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
- void fetch(const llvm::object::Archive::Symbol &sym);
private:
std::unique_ptr<llvm::object::Archive> file;
@@ -204,6 +222,7 @@ public:
};
extern llvm::SetVector<InputFile *> inputFiles;
+extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
llvm::Optional<MemoryBufferRef> readFile(StringRef path);
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index eb5acf6ffed8..96167b72a724 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -26,6 +26,11 @@ using namespace llvm::support;
using namespace lld;
using namespace lld::macho;
+// Verify ConcatInputSection's size on 64-bit builds.
+static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) == 120,
+ "Try to minimize ConcatInputSection's size, we create many "
+ "instances of it");
+
std::vector<ConcatInputSection *> macho::inputSections;
uint64_t InputSection::getFileSize() const {
@@ -93,9 +98,34 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
align = std::max(align, copy->align);
copy->live = false;
copy->wasCoalesced = true;
- numRefs += copy->numRefs;
- copy->numRefs = 0;
copy->replacement = this;
+
+ // Merge the sorted vectors of symbols together.
+ auto it = symbols.begin();
+ for (auto copyIt = copy->symbols.begin(); copyIt != copy->symbols.end();) {
+ if (it == symbols.end()) {
+ symbols.push_back(*copyIt++);
+ it = symbols.end();
+ } else if ((*it)->value > (*copyIt)->value) {
+ std::swap(*it++, *copyIt);
+ } else {
+ ++it;
+ }
+ }
+ copy->symbols.clear();
+
+ // Remove duplicate compact unwind info for symbols at the same address.
+ if (symbols.empty())
+ return;
+ it = symbols.begin();
+ uint64_t v = (*it)->value;
+ for (++it; it != symbols.end(); ++it) {
+ Defined *d = *it;
+ if (d->value == v)
+ d->unwindEntry = nullptr;
+ else
+ v = d->value;
+ }
}
void ConcatInputSection::writeTo(uint8_t *buf) {
@@ -203,14 +233,14 @@ WordLiteralInputSection::WordLiteralInputSection(StringRef segname,
uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
auto *osec = cast<WordLiteralSection>(parent);
- const uint8_t *buf = data.data();
+ const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data());
switch (sectionType(getFlags())) {
case S_4BYTE_LITERALS:
- return osec->getLiteral4Offset(buf + off);
+ return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3);
case S_8BYTE_LITERALS:
- return osec->getLiteral8Offset(buf + off);
+ return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7);
case S_16BYTE_LITERALS:
- return osec->getLiteral16Offset(buf + off);
+ return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15);
default:
llvm_unreachable("invalid literal section type");
}
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index a10457087043..1183e32fbabf 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -11,12 +11,14 @@
#include "Config.h"
#include "Relocations.h"
+#include "Symbols.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/CachedHashString.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/BinaryFormat/MachO.h"
namespace lld {
@@ -24,7 +26,6 @@ namespace macho {
class InputFile;
class OutputSection;
-class Defined;
class InputSection {
public:
@@ -37,6 +38,7 @@ public:
Kind kind() const { return shared->sectionKind; }
virtual ~InputSection() = default;
virtual uint64_t getSize() const { return data.size(); }
+ virtual bool empty() const { return data.empty(); }
InputFile *getFile() const { return shared->file; }
StringRef getName() const { return shared->name; }
StringRef getSegName() const { return shared->segname; }
@@ -61,6 +63,9 @@ public:
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
+ // The symbols that belong to this InputSection, sorted by value. With
+ // .subsections_via_symbols, there is typically only one element here.
+ llvm::TinyPtrVector<Defined *> symbols;
protected:
// The fields in this struct are immutable. Since we create a lot of
@@ -79,15 +84,15 @@ protected:
sectionKind(kind) {}
};
- InputSection(Kind kind, StringRef segname, StringRef name)
- : callSiteCount(0), isFinal(false),
- shared(make<Shared>(nullptr, name, segname, 0, kind)) {}
-
InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file,
ArrayRef<uint8_t> data, uint32_t align, uint32_t flags)
: align(align), callSiteCount(0), isFinal(false), data(data),
shared(make<Shared>(file, name, segname, flags, kind)) {}
+ InputSection(const InputSection &rhs)
+ : align(rhs.align), callSiteCount(0), isFinal(false), data(rhs.data),
+ shared(rhs.shared) {}
+
const Shared *const shared;
};
@@ -96,27 +101,29 @@ protected:
// contents merged before output.
class ConcatInputSection final : public InputSection {
public:
- ConcatInputSection(StringRef segname, StringRef name)
- : InputSection(ConcatKind, segname, name) {}
-
ConcatInputSection(StringRef segname, StringRef name, InputFile *file,
ArrayRef<uint8_t> data, uint32_t align = 1,
uint32_t flags = 0)
: InputSection(ConcatKind, segname, name, file, data, align, flags) {}
+ ConcatInputSection(StringRef segname, StringRef name)
+ : ConcatInputSection(segname, name, /*file=*/nullptr,
+ /*data=*/{},
+ /*align=*/1, /*flags=*/0) {}
+
uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
uint64_t getVA() const { return InputSection::getVA(0); }
// ConcatInputSections are entirely live or dead, so the offset is irrelevant.
bool isLive(uint64_t off) const override { return live; }
void markLive(uint64_t off) override { live = true; }
- bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
+ bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
bool isHashableForICF() const;
void hashForICF();
void writeTo(uint8_t *buf);
void foldIdentical(ConcatInputSection *redundant);
- InputSection *canonical() override {
+ ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
@@ -125,7 +132,7 @@ public:
}
// Points to the surviving section after this one is folded by ICF
- InputSection *replacement = nullptr;
+ ConcatInputSection *replacement = nullptr;
// Equivalence-class ID for ICF
uint64_t icfEqClass[2] = {0, 0};
@@ -136,19 +143,12 @@ public:
// first and not copied to the output.
bool wasCoalesced = false;
bool live = !config->deadStrip;
- // How many symbols refer to this InputSection.
- uint32_t numRefs = 0;
// This variable has two usages. Initially, it represents the input order.
// After assignAddresses is called, it represents the offset from the
// beginning of the output section this section was assigned to.
uint64_t outSecOff = 0;
};
-// Verify ConcatInputSection's size on 64-bit builds.
-static_assert(sizeof(int) != 8 || sizeof(ConcatInputSection) == 112,
- "Try to minimize ConcatInputSection's size, we create many "
- "instances of it");
-
// Helper functions to make it easy to sprinkle asserts.
inline bool shouldOmitFromOutput(InputSection *isec) {
@@ -298,6 +298,7 @@ constexpr const char debugAbbrev[] = "__debug_abbrev";
constexpr const char debugInfo[] = "__debug_info";
constexpr const char debugStr[] = "__debug_str";
constexpr const char ehFrame[] = "__eh_frame";
+constexpr const char gccExceptTab[] = "__gcc_except_tab";
constexpr const char export_[] = "__export";
constexpr const char dataInCode[] = "__data_in_code";
constexpr const char functionStarts[] = "__func_starts";
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index 366193a27eba..c71ea33d2896 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -17,9 +17,9 @@
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
#include "lld/Common/TargetOptionsCommandFlags.h"
-#include "llvm/LTO/Caching.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,6 +37,7 @@ static lto::Config createConfig() {
c.CodeModel = getCodeModelFromCMModel();
c.CPU = getCPUStr();
c.MAttrs = getMAttrs();
+ c.DiagHandler = diagnosticHandler;
c.UseNewPM = config->ltoNewPassManager;
c.PreCodeGenPassesHook = [](legacy::PassManager &pm) {
pm.add(createObjCARCContractPass());
@@ -104,17 +105,17 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
// The -cache_path_lto option specifies the path to a directory in which
// to cache native object files for ThinLTO incremental builds. If a path was
// specified, configure LTO to use it as the cache directory.
- lto::NativeObjectCache cache;
+ FileCache cache;
if (!config->thinLTOCacheDir.empty())
- cache = check(
- lto::localCache(config->thinLTOCacheDir,
- [&](size_t task, std::unique_ptr<MemoryBuffer> mb) {
- files[task] = std::move(mb);
- }));
+ cache =
+ check(localCache("ThinLTO", "Thin", config->thinLTOCacheDir,
+ [&](size_t task, std::unique_ptr<MemoryBuffer> mb) {
+ files[task] = std::move(mb);
+ }));
checkError(ltoObj->run(
[&](size_t task) {
- return std::make_unique<lto::NativeObjectStream>(
+ return std::make_unique<CachedFileStream>(
std::make_unique<raw_svector_ostream>(buf[task]));
},
cache));
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index 116691260985..4269c8342c65 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -45,10 +45,15 @@ void markLive() {
};
auto addSym = [&](Symbol *s) {
+ if (s->used)
+ return;
s->used = true;
- if (auto *d = dyn_cast<Defined>(s))
+ if (auto *d = dyn_cast<Defined>(s)) {
if (d->isec)
enqueue(d->isec, d->value);
+ if (d->unwindEntry)
+ enqueue(d->unwindEntry, 0);
+ }
};
// Add GC roots.
@@ -91,8 +96,7 @@ void markLive() {
}
// -u symbols
for (Symbol *sym : config->explicitUndefineds)
- if (auto *defined = dyn_cast<Defined>(sym))
- addSym(defined);
+ addSym(sym);
// local symbols explicitly marked .no_dead_strip
for (const InputFile *file : inputFiles)
if (auto *objFile = dyn_cast<ObjFile>(file))
@@ -118,31 +122,6 @@ void markLive() {
}
}
- // Dead strip runs before UnwindInfoSection handling so we need to keep
- // __LD,__compact_unwind alive here.
- // But that section contains absolute references to __TEXT,__text and
- // keeps most code alive due to that. So we can't just enqueue() the
- // section: We must skip the relocations for the functionAddress
- // in each CompactUnwindEntry.
- // See also scanEhFrameSection() in lld/ELF/MarkLive.cpp.
- for (ConcatInputSection *isec : in.unwindInfo->getInputs()) {
- isec->live = true;
- const int compactUnwindEntrySize =
- target->wordSize == 8 ? sizeof(CompactUnwindEntry<uint64_t>)
- : sizeof(CompactUnwindEntry<uint32_t>);
- for (const Reloc &r : isec->relocs) {
- // This is the relocation for the address of the function itself.
- // Ignore it, else these would keep everything alive.
- if (r.offset % compactUnwindEntrySize == 0)
- continue;
-
- if (auto *s = r.referent.dyn_cast<Symbol *>())
- addSym(s);
- else
- enqueue(r.referent.get<InputSection *>(), r.addend);
- }
- }
-
do {
// Mark things reachable from GC roots as live.
while (!worklist.empty()) {
@@ -156,6 +135,8 @@ void markLive() {
else
enqueue(r.referent.get<InputSection *>(), r.addend);
}
+ for (Defined *d : s->symbols)
+ addSym(d);
}
// S_ATTR_LIVE_SUPPORT sections are live if they point _to_ a live section.
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 7ed800827f3e..3c40c5d7181b 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -13,14 +13,15 @@
#include "Target.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Bitcode/BitcodeReader.h"
using namespace llvm;
using namespace llvm::MachO;
using namespace lld;
using namespace lld::macho;
-template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
- using Section = typename LP::section;
+template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
+ using SectionHeader = typename LP::section;
auto *hdr =
reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
@@ -29,12 +30,13 @@ template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
if (const auto *c =
findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
- auto sectionHeaders =
- ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
- for (const Section &sec : sectionHeaders) {
- StringRef sectname(sec.sectname,
- strnlen(sec.sectname, sizeof(sec.sectname)));
- StringRef segname(sec.segname, strnlen(sec.segname, sizeof(sec.segname)));
+ auto sectionHeaders = ArrayRef<SectionHeader>{
+ reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
+ for (const SectionHeader &secHead : sectionHeaders) {
+ StringRef sectname(secHead.sectname,
+ strnlen(secHead.sectname, sizeof(secHead.sectname)));
+ StringRef segname(secHead.segname,
+ strnlen(secHead.segname, sizeof(secHead.segname)));
if ((segname == segment_names::data &&
sectname == section_names::objcCatList) ||
(segname == segment_names::text &&
@@ -46,9 +48,20 @@ template <class LP> static bool hasObjCSection(MemoryBufferRef mb) {
return false;
}
-bool macho::hasObjCSection(MemoryBufferRef mb) {
+static bool objectHasObjCSection(MemoryBufferRef mb) {
if (target->wordSize == 8)
- return ::hasObjCSection<LP64>(mb);
+ return ::objectHasObjCSection<LP64>(mb);
else
- return ::hasObjCSection<ILP32>(mb);
+ return ::objectHasObjCSection<ILP32>(mb);
+}
+
+bool macho::hasObjCSection(MemoryBufferRef mb) {
+ switch (identify_magic(mb.getBuffer())) {
+ case file_magic::macho_object:
+ return objectHasObjCSection(mb);
+ case file_magic::bitcode:
+ return check(isBitcodeContainingObjCCategory(mb));
+ default:
+ return false;
+ }
}
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index cda857d605bd..7cd4d01a0a5a 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -73,6 +73,12 @@ def thinlto_cache_policy: Joined<["--"], "thinlto-cache-policy=">,
Group<grp_lld>;
def O : JoinedOrSeparate<["-"], "O">,
HelpText<"Optimize output file size">;
+def no_warn_dylib_install_name: Joined<["--"], "no-warn-dylib-install-name">,
+ HelpText<"Do not warn on -install-name if -dylib is not passed (default)">,
+ Group<grp_lld>;
+def warn_dylib_install_name: Joined<["--"], "warn-dylib-install-name">,
+ HelpText<"Warn on -install-name if -dylib is not passed">,
+ Group<grp_lld>;
// This is a complete Options.td compiled from Apple's ld(1) manpage
// dated 2018-03-07 and cross checked with ld64 source code in repo
@@ -290,6 +296,7 @@ def no_branch_islands : Flag<["-"], "no_branch_islands">,
Group<grp_opts>;
def no_deduplicate : Flag<["-"], "no_deduplicate">,
HelpText<"Disable code deduplicaiton (synonym for `--icf=none')">,
+ Alias<icf_eq>, AliasArgs<["none"]>,
Group<grp_opts>;
def grp_version : OptionGroup<"version">, HelpText<"VERSION TARGETING">;
@@ -556,7 +563,6 @@ def grp_symtab : OptionGroup<"symtab">, HelpText<"SYMBOL TABLE">;
def S : Flag<["-"], "S">,
HelpText<"Strip debug information (STABS or DWARF) from the output">,
- Flags<[HelpHidden]>,
Group<grp_symtab>;
def x : Flag<["-"], "x">,
HelpText<"Exclude non-global symbols from the output symbol table">,
@@ -575,7 +581,6 @@ def non_global_symbols_no_strip_list : Separate<["-"], "non_global_symbols_no_st
def oso_prefix : Separate<["-"], "oso_prefix">,
MetaVarName<"<path>">,
HelpText<"Remove the prefix <path> from OSO symbols in the debug map">,
- Flags<[HelpHidden]>,
Group<grp_symtab>;
def add_ast_path : Separate<["-"], "add_ast_path">,
MetaVarName<"<path>">,
@@ -863,7 +868,6 @@ def no_arch_warnings : Flag<["-"], "no_arch_warnings">,
Group<grp_rare>;
def arch_errors_fatal : Flag<["-"], "arch_errors_fatal">,
HelpText<"Escalate to errors any warnings about inputs whose architecture does not match the -arch option">,
- Flags<[HelpHidden]>,
Group<grp_rare>;
def e : Separate<["-"], "e">,
MetaVarName<"<symbol>">,
@@ -947,6 +951,9 @@ def mllvm : Separate<["-"], "mllvm">,
def mcpu : Separate<["-"], "mcpu">,
HelpText<"Processor family target for LTO code generation">,
Group<grp_rare>;
+def no_dtrace_dof : Flag<["-"], "no_dtrace_dof">,
+ HelpText<"Disable dtrace-dof processing (default).">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
@@ -1272,18 +1279,10 @@ def no_compact_unwind : Flag<["-"], "no_compact_unwind">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
Group<grp_undocumented>;
-def no_dtrace_dof : Flag<["-"], "no_dtrace_dof">,
- HelpText<"This option is undocumented in ld64">,
- Flags<[HelpHidden]>,
- Group<grp_undocumented>;
def no_new_main : Flag<["-"], "no_new_main">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
Group<grp_undocumented>;
-def objc_abi_version : Separate<["-"], "objc_abi_version">,
- HelpText<"This option is undocumented in ld64">,
- Flags<[HelpHidden]>,
- Group<grp_undocumented>;
def pause : Flag<["-"], "pause">,
HelpText<"This option is undocumented in ld64">,
Flags<[HelpHidden]>,
@@ -1327,3 +1326,10 @@ def new_linker : Flag<["-"], "new_linker">,
HelpText<"This option is ignored in ld64">,
Flags<[HelpHidden]>,
Group<grp_ignored>;
+
+def grp_ignored_silently : OptionGroup<"ignored_silently">, HelpText<"IGNORED SILENTLY">;
+
+def objc_abi_version : Separate<["-"], "objc_abi_version">,
+ HelpText<"This option only applies to i386 in ld64">,
+ Flags<[HelpHidden]>,
+ Group<grp_ignored_silently>;
diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp
index 3bbaf7f0304e..da1394c08831 100644
--- a/lld/MachO/OutputSegment.cpp
+++ b/lld/MachO/OutputSegment.cpp
@@ -161,6 +161,11 @@ void macho::sortOutputSegments() {
static DenseMap<StringRef, OutputSegment *> nameToOutputSegment;
std::vector<OutputSegment *> macho::outputSegments;
+void macho::resetOutputSegments() {
+ outputSegments.clear();
+ nameToOutputSegment.clear();
+}
+
static StringRef maybeRenameSegment(StringRef name) {
auto newName = config->segmentRenameMap.find(name);
if (newName != config->segmentRenameMap.end())
diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h
index b3863f4148d9..bff99e28a88f 100644
--- a/lld/MachO/OutputSegment.h
+++ b/lld/MachO/OutputSegment.h
@@ -68,6 +68,7 @@ private:
extern std::vector<OutputSegment *> outputSegments;
void sortOutputSegments();
+void resetOutputSegments();
OutputSegment *getOrCreateOutputSegment(StringRef name);
diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index 03cb6973b6ab..2f316154a1ca 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -17,6 +17,9 @@ using namespace llvm;
using namespace lld;
using namespace lld::macho;
+static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24,
+ "Try to minimize Reloc's size; we create many instances");
+
bool macho::validateSymbolRelocation(const Symbol *sym,
const InputSection *isec, const Reloc &r) {
const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
diff --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index 91b2d00f26a1..9457465ac203 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -56,7 +56,7 @@ struct Reloc {
uint8_t length = 0;
// The offset from the start of the subsection that this relocation belongs
// to.
- uint64_t offset = 0;
+ uint32_t offset = 0;
// Adding this offset to the address of the referent symbol or subsection
// gives the destination that this relocation refers to.
int64_t addend = 0;
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index c5808a89bf57..c212516a4780 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -10,6 +10,7 @@
#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
+#include "InputSection.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "lld/Common/ErrorHandler.h"
@@ -48,8 +49,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb,
- bool isReferencedDynamically,
- bool noDeadStrip) {
+ bool isReferencedDynamically, bool noDeadStrip,
+ bool isWeakDefCanBeHidden) {
Symbol *s;
bool wasInserted;
bool overridesWeakDef = false;
@@ -61,28 +62,32 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
if (!wasInserted) {
if (auto *defined = dyn_cast<Defined>(s)) {
if (isWeakDef) {
+ // See further comment in createDefined() in InputFiles.cpp
if (defined->isWeakDef()) {
- // Both old and new symbol weak (e.g. inline function in two TUs):
- // If one of them isn't private extern, the merged symbol isn't.
defined->privateExtern &= isPrivateExtern;
+ defined->weakDefCanBeHidden &= isWeakDefCanBeHidden;
defined->referencedDynamically |= isReferencedDynamically;
defined->noDeadStrip |= noDeadStrip;
-
- // FIXME: Handle this for bitcode files.
- // FIXME: We currently only do this if both symbols are weak.
- // We could do this if either is weak (but getting the
- // case where !isWeakDef && defined->isWeakDef() right
- // requires some care and testing).
- if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
- concatIsec->wasCoalesced = true;
}
-
+ // FIXME: Handle this for bitcode files.
+ if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
+ concatIsec->wasCoalesced = true;
return defined;
}
- if (!defined->isWeakDef())
+
+ if (defined->isWeakDef()) {
+ // FIXME: Handle this for bitcode files.
+ if (auto concatIsec =
+ dyn_cast_or_null<ConcatInputSection>(defined->isec)) {
+ concatIsec->wasCoalesced = true;
+ concatIsec->symbols.erase(llvm::find(concatIsec->symbols, defined));
+ }
+ } else {
error("duplicate symbol: " + name + "\n>>> defined in " +
toString(defined->getFile()) + "\n>>> defined in " +
toString(file));
+ }
+
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
dysym->unreference();
@@ -93,8 +98,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
Defined *defined = replaceSymbol<Defined>(
s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true,
- isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip);
- defined->overridesWeakDef = overridesWeakDef;
+ isPrivateExtern, isThumb, isReferencedDynamically, noDeadStrip,
+ overridesWeakDef, isWeakDefCanBeHidden);
return defined;
}
@@ -190,10 +195,11 @@ Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec,
uint64_t value, bool isPrivateExtern,
bool includeInSymtab,
bool referencedDynamically) {
- Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0,
- /*isWeakDef=*/false, isPrivateExtern,
- /*isThumb=*/false, referencedDynamically,
- /*noDeadStrip=*/false);
+ Defined *s =
+ addDefined(name, nullptr, isec, value, /*size=*/0,
+ /*isWeakDef=*/false, isPrivateExtern,
+ /*isThumb=*/false, referencedDynamically,
+ /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
s->includeInSymtab = includeInSymtab;
return s;
}
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index 17f1ecbd346b..625f78aa6141 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -40,7 +40,8 @@ public:
Defined *addDefined(StringRef name, InputFile *, InputSection *,
uint64_t value, uint64_t size, bool isWeakDef,
bool isPrivateExtern, bool isThumb,
- bool isReferencedDynamically, bool noDeadStrip);
+ bool isReferencedDynamically, bool noDeadStrip,
+ bool isWeakDefCanBeHidden);
Symbol *addUndefined(StringRef name, InputFile *, bool isWeakRef);
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 47f30d4141fc..bb6d073dcf30 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -14,6 +14,19 @@ using namespace llvm;
using namespace lld;
using namespace lld::macho;
+static_assert(sizeof(void *) != 8 || sizeof(Symbol) == 48,
+ "Try to minimize Symbol's size; we create many instances");
+
+// The Microsoft ABI doesn't support using parent class tail padding for child
+// members, hence the _MSC_VER check.
+#if !defined(_MSC_VER)
+static_assert(sizeof(void *) != 8 || sizeof(Defined) == 80,
+ "Try to minimize Defined's size; we create many instances");
+#endif
+
+static_assert(sizeof(SymbolUnion) == sizeof(Defined),
+ "Defined should be the largest Symbol kind");
+
// Returns a symbol for an error message.
static std::string demangle(StringRef symName) {
if (config->demangle)
@@ -31,26 +44,34 @@ uint64_t Symbol::getStubVA() const { return in.stubs->getVA(stubsIndex); }
uint64_t Symbol::getGotVA() const { return in.got->getVA(gotIndex); }
uint64_t Symbol::getTlvVA() const { return in.tlvPointers->getVA(gotIndex); }
-bool Symbol::isLive() const {
- if (isa<DylibSymbol>(this) || isa<Undefined>(this))
- return used;
-
- if (auto *d = dyn_cast<Defined>(this)) {
- // Non-absolute symbols might be alive because their section is
- // no_dead_strip or live_support. In that case, the section will know
- // that it's live but `used` might be false. Non-absolute symbols always
- // have to use the section's `live` bit as source of truth.
- if (d->isAbsolute())
- return used;
- return d->isec->canonical()->isLive(d->value);
+Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
+ uint64_t value, uint64_t size, bool isWeakDef, bool isExternal,
+ bool isPrivateExtern, bool isThumb,
+ bool isReferencedDynamically, bool noDeadStrip,
+ bool canOverrideWeakDef, bool isWeakDefCanBeHidden)
+ : Symbol(DefinedKind, name, file), overridesWeakDef(canOverrideWeakDef),
+ privateExtern(isPrivateExtern), includeInSymtab(true), thumb(isThumb),
+ referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
+ weakDefCanBeHidden(isWeakDefCanBeHidden), weakDef(isWeakDef),
+ external(isExternal), isec(isec), value(value), size(size) {
+ if (isec) {
+ isec->symbols.push_back(this);
+ // Maintain sorted order.
+ for (auto it = isec->symbols.rbegin(), rend = isec->symbols.rend();
+ it != rend; ++it) {
+ auto next = std::next(it);
+ if (next == rend)
+ break;
+ if ((*it)->value < (*next)->value)
+ std::swap(*next, *it);
+ else
+ break;
+ }
}
+}
- assert(!isa<CommonSymbol>(this) &&
- "replaceCommonSymbols() runs before dead code stripping, and isLive() "
- "should only be called after dead code stripping");
-
- // Assume any other kind of symbol is live.
- return true;
+bool Defined::isTlv() const {
+ return !isAbsolute() && isThreadLocalVariables(isec->getFlags());
}
uint64_t Defined::getVA() const {
@@ -59,7 +80,7 @@ uint64_t Defined::getVA() const {
if (isAbsolute())
return value;
- if (!isec->canonical()->isFinal) {
+ if (!isec->isFinal) {
// A target arch that does not use thunks ought never ask for
// the address of a function that has not yet been finalized.
assert(target->usesThunks());
@@ -70,7 +91,14 @@ uint64_t Defined::getVA() const {
// expedient to return a contrived out-of-range address.
return TargetInfo::outOfRangeVA;
}
- return isec->canonical()->getVA(value);
+ return isec->getVA(value);
+}
+
+void Defined::canonicalize() {
+ if (unwindEntry)
+ unwindEntry = unwindEntry->canonical();
+ if (isec)
+ isec = isec->canonical();
}
uint64_t DylibSymbol::getVA() const {
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index f7aac7c5cde7..cc6f51cc5fd3 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -9,8 +9,8 @@
#ifndef LLD_MACHO_SYMBOLS_H
#define LLD_MACHO_SYMBOLS_H
+#include "Config.h"
#include "InputFiles.h"
-#include "InputSection.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
@@ -20,7 +20,6 @@
namespace lld {
namespace macho {
-class InputSection;
class MachHeaderSection;
struct StringRefZ {
@@ -51,7 +50,7 @@ public:
return {nameData, nameSize};
}
- bool isLive() const;
+ bool isLive() const { return used; }
virtual uint64_t getVA() const { return 0; }
@@ -93,14 +92,14 @@ public:
protected:
Symbol(Kind k, StringRefZ name, InputFile *file)
- : symbolKind(k), nameData(name.data), nameSize(name.size), file(file),
+ : symbolKind(k), nameData(name.data), file(file), nameSize(name.size),
isUsedInRegularObj(!file || isa<ObjFile>(file)),
used(!config->deadStrip) {}
Kind symbolKind;
const char *nameData;
- mutable uint32_t nameSize;
InputFile *file;
+ mutable uint32_t nameSize;
public:
// True if this symbol was referenced by a regular (non-bitcode) object.
@@ -114,38 +113,28 @@ class Defined : public Symbol {
public:
Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,
uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,
- bool isThumb, bool isReferencedDynamically, bool noDeadStrip)
- : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size),
- overridesWeakDef(false), privateExtern(isPrivateExtern),
- includeInSymtab(true), thumb(isThumb),
- referencedDynamically(isReferencedDynamically),
- noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) {
- if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec))
- concatIsec->numRefs++;
- }
+ bool isThumb, bool isReferencedDynamically, bool noDeadStrip,
+ bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false);
bool isWeakDef() const override { return weakDef; }
bool isExternalWeakDef() const {
return isWeakDef() && isExternal() && !privateExtern;
}
- bool isTlv() const override {
- return !isAbsolute() && isThreadLocalVariables(isec->getFlags());
- }
+ bool isTlv() const override;
bool isExternal() const { return external; }
bool isAbsolute() const { return isec == nullptr; }
uint64_t getVA() const override;
- static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
+ // Ensure this symbol's pointers to InputSections point to their canonical
+ // copies.
+ void canonicalize();
- InputSection *isec;
- // Contains the offset from the containing subsection. Note that this is
- // different from nlist::n_value, which is the absolute address of the symbol.
- uint64_t value;
- // size is only calculated for regular (non-bitcode) symbols.
- uint64_t size;
+ static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
+ // Place the bitfields first so that they can get placed in the tail padding
+ // of the parent class, on platforms which support it.
bool overridesWeakDef : 1;
// Whether this symbol should appear in the output binary's export trie.
bool privateExtern : 1;
@@ -166,9 +155,20 @@ public:
// to the output.
bool noDeadStrip : 1;
+ bool weakDefCanBeHidden : 1;
+
private:
const bool weakDef : 1;
const bool external : 1;
+
+public:
+ InputSection *isec;
+ // Contains the offset from the containing subsection. Note that this is
+ // different from nlist::n_value, which is the absolute address of the symbol.
+ uint64_t value;
+ // size is only calculated for regular (non-bitcode) symbols.
+ uint64_t size;
+ ConcatInputSection *unwindEntry = nullptr;
};
// This enum does double-duty: as a symbol property, it indicates whether & how
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index f49340677270..2527389990fa 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -32,7 +32,9 @@
#ifdef LLVM_HAVE_LIBXAR
#include <fcntl.h>
+extern "C" {
#include <xar/xar.h>
+}
#endif
using namespace llvm;
@@ -257,7 +259,7 @@ void NonLazyPointerSectionBase::writeTo(uint8_t *buf) const {
}
GotSection::GotSection()
- : NonLazyPointerSectionBase(segment_names::dataConst, section_names::got) {
+ : NonLazyPointerSectionBase(segment_names::data, section_names::got) {
flags = S_NON_LAZY_SYMBOL_POINTERS;
}
@@ -619,14 +621,14 @@ void StubHelperSection::setup() {
ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
inputSections.push_back(in.imageLoaderCache);
// Since this isn't in the symbol table or in any input file, the noDeadStrip
- // argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
- // setting `live` to true on the backing InputSection.
+ // argument doesn't matter.
dyldPrivate =
make<Defined>("__dyld_private", nullptr, in.imageLoaderCache, 0, 0,
/*isWeakDef=*/false,
/*isExternal=*/false, /*isPrivateExtern=*/false,
/*isThumb=*/false, /*isReferencedDynamically=*/false,
/*noDeadStrip=*/false);
+ dyldPrivate->used = true;
}
LazyPointerSection::LazyPointerSection()
@@ -732,7 +734,7 @@ DataInCodeSection::DataInCodeSection()
template <class LP>
static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
using SegmentCommand = typename LP::segment_command;
- using Section = typename LP::section;
+ using SectionHeader = typename LP::section;
std::vector<MachO::data_in_code_entry> dataInCodeEntries;
for (const InputFile *inputFile : inputFiles) {
@@ -743,8 +745,8 @@ static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
findCommand(objFile->mb.getBufferStart(), LP::segmentLCType));
if (!c)
continue;
- ArrayRef<Section> sections{reinterpret_cast<const Section *>(c + 1),
- c->nsects};
+ ArrayRef<SectionHeader> sectionHeaders{
+ reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
ArrayRef<MachO::data_in_code_entry> entries = objFile->dataInCodeEntries;
if (entries.empty())
@@ -752,15 +754,14 @@ static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
// For each code subsection find 'data in code' entries residing in it.
// Compute the new offset values as
// <offset within subsection> + <subsection address> - <__TEXT address>.
- for (size_t i = 0, n = sections.size(); i < n; ++i) {
- const SubsectionMap &subsecMap = objFile->subsections[i];
- for (const SubsectionEntry &subsecEntry : subsecMap) {
- const InputSection *isec = subsecEntry.isec;
+ for (size_t i = 0, n = sectionHeaders.size(); i < n; ++i) {
+ for (const Subsection &subsec : objFile->sections[i].subsections) {
+ const InputSection *isec = subsec.isec;
if (!isCodeSection(isec))
continue;
if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput())
continue;
- const uint64_t beginAddr = sections[i].addr + subsecEntry.offset;
+ const uint64_t beginAddr = sectionHeaders[i].addr + subsec.offset;
auto it = llvm::lower_bound(
entries, beginAddr,
[](const MachO::data_in_code_entry &entry, uint64_t addr) {
@@ -858,7 +859,10 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
if (!file->archiveName.empty())
path.append({"(", file->getName(), ")"});
- stab.strx = stringTableSection.addString(saver.save(path.str()));
+ StringRef adjustedPath = saver.save(path.str());
+ adjustedPath.consume_front(config->osoPrefix);
+
+ stab.strx = stringTableSection.addString(adjustedPath);
stab.desc = 1;
stab.value = file->modTime;
stabs.emplace_back(std::move(stab));
@@ -871,6 +875,9 @@ void SymtabSection::emitEndFunStab(Defined *defined) {
}
void SymtabSection::emitStabs() {
+ if (config->omitDebugInfo)
+ return;
+
for (const std::string &s : config->astPaths) {
StabsEntry astStab(N_AST);
astStab.strx = stringTableSection.addString(s);
@@ -916,7 +923,7 @@ void SymtabSection::emitStabs() {
}
StabsEntry symStab;
- symStab.sect = defined->isec->canonical()->parent->index;
+ symStab.sect = defined->isec->parent->index;
symStab.strx = stringTableSection.addString(defined->getName());
symStab.value = defined->getVA();
@@ -1041,7 +1048,7 @@ template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const {
nList->n_value = defined->value;
} else {
nList->n_type = scope | N_SECT;
- nList->n_sect = defined->isec->canonical()->parent->index;
+ nList->n_sect = defined->isec->parent->index;
// For the N_SECT symbol type, n_value is the address of the symbol
nList->n_value = defined->getVA();
}
@@ -1102,8 +1109,12 @@ void IndirectSymtabSection::finalizeContents() {
}
static uint32_t indirectValue(const Symbol *sym) {
- return sym->symtabIndex != UINT32_MAX ? sym->symtabIndex
- : INDIRECT_SYMBOL_LOCAL;
+ if (sym->symtabIndex == UINT32_MAX)
+ return INDIRECT_SYMBOL_LOCAL;
+ if (auto *defined = dyn_cast<Defined>(sym))
+ if (defined->privateExtern)
+ return INDIRECT_SYMBOL_LOCAL;
+ return sym->symtabIndex;
}
void IndirectSymtabSection::writeTo(uint8_t *buf) const {
@@ -1160,6 +1171,9 @@ CodeSignatureSection::CodeSignatureSection()
size_t slashIndex = fileName.rfind("/");
if (slashIndex != std::string::npos)
fileName = fileName.drop_front(slashIndex + 1);
+
+ // NOTE: Any changes to these calculations should be repeated
+ // in llvm-objcopy's MachOLayoutBuilder::layoutTail.
allHeadersSize = alignTo<16>(fixedHeadersSize + fileName.size() + 1);
fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
}
@@ -1173,6 +1187,8 @@ uint64_t CodeSignatureSection::getRawSize() const {
}
void CodeSignatureSection::writeHashes(uint8_t *buf) const {
+ // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
+ // MachOWriter::writeSignatureData.
uint8_t *code = buf;
uint8_t *codeEnd = buf + fileOff;
uint8_t *hashes = codeEnd + allHeadersSize;
@@ -1203,6 +1219,8 @@ void CodeSignatureSection::writeHashes(uint8_t *buf) const {
}
void CodeSignatureSection::writeTo(uint8_t *buf) const {
+ // NOTE: Changes to this functionality should be repeated in llvm-objcopy's
+ // MachOWriter::writeSignatureData.
uint32_t signatureSize = static_cast<uint32_t>(getSize());
auto *superBlob = reinterpret_cast<CS_SuperBlob *>(buf);
write32be(&superBlob->magic, CSMAGIC_EMBEDDED_SIGNATURE);
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index bbb7adc37cb3..49b68c77672e 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -345,6 +345,7 @@ public:
ExportSection();
void finalizeContents() override;
uint64_t getRawSize() const override { return size; }
+ bool isNeeded() const override { return size; }
void writeTo(uint8_t *buf) const override;
bool hasWeakSymbol = false;
@@ -476,6 +477,8 @@ public:
// The code signature comes at the very end of the linked output file.
class CodeSignatureSection final : public LinkEditSection {
public:
+ // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
+ // and any changes here, should be repeated there.
static constexpr uint8_t blockSizeShift = 12;
static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
static constexpr size_t hashSize = 256 / 8;
@@ -560,16 +563,16 @@ public:
!literal8Map.empty();
}
- uint64_t getLiteral16Offset(const uint8_t *buf) const {
+ uint64_t getLiteral16Offset(uintptr_t buf) const {
return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16;
}
- uint64_t getLiteral8Offset(const uint8_t *buf) const {
+ uint64_t getLiteral8Offset(uintptr_t buf) const {
return literal16Map.size() * 16 +
literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8;
}
- uint64_t getLiteral4Offset(const uint8_t *buf) const {
+ uint64_t getLiteral4Offset(uintptr_t buf) const {
return literal16Map.size() * 16 + literal8Map.size() * 8 +
literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4;
}
diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index a5da7644a84e..9c021c611f7b 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -88,7 +88,8 @@ public:
size_t wordSize;
size_t thunkSize = 0;
- uint64_t branchRange = 0;
+ uint64_t forwardBranchRange = 0;
+ uint64_t backwardBranchRange = 0;
// We contrive this value as sufficiently far from any valid address that it
// will always be out-of-range for any architecture. UINT64_MAX is not a
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 3efc646c2ad2..690098c7a3b7 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -19,9 +19,12 @@
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Parallel.h"
+
+#include <numeric>
using namespace llvm;
using namespace llvm::MachO;
@@ -92,6 +95,14 @@ using namespace lld::macho;
// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
// TODO(gkm): how do we align the 2nd-level pages?
+template <class Ptr> struct CompactUnwindEntry {
+ Ptr functionAddress;
+ uint32_t functionLength;
+ compact_unwind_encoding_t encoding;
+ Ptr personality;
+ Ptr lsda;
+};
+
using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
struct SecondLevelPage {
@@ -107,23 +118,28 @@ template <class Ptr>
class UnwindInfoSectionImpl final : public UnwindInfoSection {
public:
void prepareRelocations(ConcatInputSection *) override;
- void addInput(ConcatInputSection *) override;
+ void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &);
+ Reloc *findLsdaReloc(ConcatInputSection *) const;
+ void encodePersonalities();
void finalize() override;
void writeTo(uint8_t *buf) const override;
private:
std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
EncodingMap commonEncodingIndexes;
+ // The entries here will be in the same order as their originating symbols
+ // in symbolsVec.
+ std::vector<CompactUnwindEntry<Ptr>> cuEntries;
+ // Indices into the cuEntries vector.
+ std::vector<size_t> cuIndices;
// Indices of personality functions within the GOT.
- std::vector<uint32_t> personalities;
+ std::vector<Ptr> personalities;
SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *>
personalityTable;
- std::vector<unwind_info_section_header_lsda_index_entry> lsdaEntries;
- // Map of function offset (from the image base) to an index within the LSDA
- // array.
- DenseMap<uint32_t, uint32_t> functionToLsdaIndex;
- std::vector<CompactUnwindEntry<Ptr>> cuVector;
- std::vector<CompactUnwindEntry<Ptr> *> cuPtrVector;
+ // Indices into cuEntries for CUEs with a non-null LSDA.
+ std::vector<size_t> entriesWithLsda;
+ // Map of cuEntries index to an index within the LSDA array.
+ DenseMap<size_t, uint32_t> lsdaIndex;
std::vector<SecondLevelPage> secondLevelPages;
uint64_t level2PagesOffset = 0;
};
@@ -131,21 +147,40 @@ private:
UnwindInfoSection::UnwindInfoSection()
: SyntheticSection(segment_names::text, section_names::unwindInfo) {
align = 4;
- compactUnwindSection =
- make<ConcatOutputSection>(section_names::compactUnwind);
}
void UnwindInfoSection::prepareRelocations() {
- for (ConcatInputSection *isec : compactUnwindSection->inputs)
- prepareRelocations(isec);
+ // This iteration needs to be deterministic, since prepareRelocations may add
+ // entries to the GOT. Hence the use of a MapVector for
+ // UnwindInfoSection::symbols.
+ for (const Defined *d : make_second_range(symbols))
+ if (d->unwindEntry)
+ prepareRelocations(d->unwindEntry);
}
-template <class Ptr>
-void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
- assert(isec->getSegName() == segment_names::ld &&
- isec->getName() == section_names::compactUnwind);
- isec->parent = compactUnwindSection;
- compactUnwindSection->addInput(isec);
+// Record function symbols that may need entries emitted in __unwind_info, which
+// stores unwind data for address ranges.
+//
+// Note that if several adjacent functions have the same unwind encoding, LSDA,
+// and personality function, they share one unwind entry. For this to work,
+// functions without unwind info need explicit "no unwind info" unwind entries
+// -- else the unwinder would think they have the unwind info of the closest
+// function with unwind info right before in the image. Thus, we add function
+// symbols for each unique address regardless of whether they have associated
+// unwind info.
+void UnwindInfoSection::addSymbol(const Defined *d) {
+ if (d->unwindEntry)
+ allEntriesAreOmitted = false;
+ // We don't yet know the final output address of this symbol, but we know that
+ // they are uniquely determined by a combination of the isec and value, so
+ // we use that as the key here.
+ auto p = symbols.insert({{d->isec, d->value}, d});
+ // If we have multiple symbols at the same address, only one of them can have
+ // an associated CUE.
+ if (!p.second && d->unwindEntry) {
+ assert(!p.first->second->unwindEntry);
+ p.first->second = d;
+ }
}
// Compact unwind relocations have different semantics, so we handle them in a
@@ -167,29 +202,39 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
Reloc &r = isec->relocs[i];
assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
- if (r.offset % sizeof(CompactUnwindEntry<Ptr>) == 0) {
- InputSection *referentIsec;
- if (auto *isec = r.referent.dyn_cast<InputSection *>())
- referentIsec = isec;
- else
- referentIsec = cast<Defined>(r.referent.dyn_cast<Symbol *>())->isec;
-
- if (!cast<ConcatInputSection>(referentIsec)->shouldOmitFromOutput())
- allEntriesAreOmitted = false;
- continue;
- }
-
+ // Functions and LSDA entries always reside in the same object file as the
+ // compact unwind entries that references them, and thus appear as section
+ // relocs. There is no need to prepare them. We only prepare relocs for
+ // personality functions.
if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
offsetof(CompactUnwindEntry<Ptr>, personality))
continue;
if (auto *s = r.referent.dyn_cast<Symbol *>()) {
+ // Personality functions are nearly always system-defined (e.g.,
+ // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an
+ // application provides its own personality function, it might be
+ // referenced by an extern Defined symbol reloc, or a local section reloc.
+ if (auto *defined = dyn_cast<Defined>(s)) {
+ // XXX(vyng) This is a a special case for handling duplicate personality
+ // symbols. Note that LD64's behavior is a bit different and it is
+ // inconsistent with how symbol resolution usually work
+ //
+ // So we've decided not to follow it. Instead, simply pick the symbol
+ // with the same name from the symbol table to replace the local one.
+ //
+ // (See discussions/alternatives already considered on D107533)
+ if (!defined->isExternal())
+ if (const Symbol *sym = symtab->find(defined->getName()))
+ r.referent = s = const_cast<Symbol *>(sym);
+ }
if (auto *undefined = dyn_cast<Undefined>(s)) {
treatUndefinedSymbol(*undefined);
// treatUndefinedSymbol() can replace s with a DylibSymbol; re-check.
if (isa<Undefined>(s))
continue;
}
+
if (auto *defined = dyn_cast<Defined>(s)) {
// Check if we have created a synthetic symbol at the same address.
Symbol *&personality =
@@ -241,26 +286,27 @@ static ConcatInputSection *checkTextSegment(InputSection *isec) {
return cast<ConcatInputSection>(isec);
}
-template <class Ptr>
-constexpr Ptr TombstoneValue = std::numeric_limits<Ptr>::max();
-
// We need to apply the relocations to the pre-link compact unwind section
// before converting it to post-link form. There should only be absolute
// relocations here: since we are not emitting the pre-link CU section, there
// is no source address to make a relative location meaningful.
template <class Ptr>
-static void
-relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
- std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
- for (const ConcatInputSection *isec : compactUnwindSection->inputs) {
- assert(isec->parent == compactUnwindSection);
-
- uint8_t *buf =
- reinterpret_cast<uint8_t *>(cuVector.data()) + isec->outSecOff;
- memcpy(buf, isec->data.data(), isec->data.size());
-
- for (const Reloc &r : isec->relocs) {
- uint64_t referentVA = TombstoneValue<Ptr>;
+void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
+ std::vector<CompactUnwindEntry<Ptr>> &cuEntries) {
+ parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
+ uint8_t *buf = reinterpret_cast<uint8_t *>(cuEntries.data()) +
+ i * sizeof(CompactUnwindEntry<Ptr>);
+ const Defined *d = symbolsVec[i].second;
+ // Write the functionAddress.
+ writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
+ if (!d->unwindEntry)
+ return;
+
+ // Write the rest of the CUE.
+ memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(),
+ d->unwindEntry->data.size());
+ for (const Reloc &r : d->unwindEntry->relocs) {
+ uint64_t referentVA = 0;
if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
if (!isa<Undefined>(referentSym)) {
if (auto *defined = dyn_cast<Defined>(referentSym))
@@ -272,34 +318,31 @@ relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
}
} else {
auto *referentIsec = r.referent.get<InputSection *>();
- ConcatInputSection *concatIsec = checkTextSegment(referentIsec);
- if (!concatIsec->shouldOmitFromOutput())
- referentVA = referentIsec->getVA(r.addend);
+ checkTextSegment(referentIsec);
+ referentVA = referentIsec->getVA(r.addend);
}
writeAddress(buf + r.offset, referentVA, r.length);
}
- }
+ });
}
// There should only be a handful of unique personality pointers, so we can
// encode them as 2-bit indices into a small array.
-template <class Ptr>
-static void
-encodePersonalities(const std::vector<CompactUnwindEntry<Ptr> *> &cuPtrVector,
- std::vector<uint32_t> &personalities) {
- for (CompactUnwindEntry<Ptr> *cu : cuPtrVector) {
- if (cu->personality == 0)
+template <class Ptr> void UnwindInfoSectionImpl<Ptr>::encodePersonalities() {
+ for (size_t idx : cuIndices) {
+ CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
+ if (cu.personality == 0)
continue;
// Linear search is fast enough for a small array.
- auto it = find(personalities, cu->personality);
+ auto it = find(personalities, cu.personality);
uint32_t personalityIndex; // 1-based index
if (it != personalities.end()) {
personalityIndex = std::distance(personalities.begin(), it) + 1;
} else {
- personalities.push_back(cu->personality);
+ personalities.push_back(cu.personality);
personalityIndex = personalities.size();
}
- cu->encoding |=
+ cu.encoding |=
personalityIndex << countTrailingZeros(
static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
}
@@ -308,40 +351,6 @@ encodePersonalities(const std::vector<CompactUnwindEntry<Ptr> *> &cuPtrVector,
") for compact unwind to encode");
}
-// __unwind_info stores unwind data for address ranges. If several
-// adjacent functions have the same unwind encoding, LSDA, and personality
-// function, they share one unwind entry. For this to work, functions without
-// unwind info need explicit "no unwind info" unwind entries -- else the
-// unwinder would think they have the unwind info of the closest function
-// with unwind info right before in the image.
-template <class Ptr>
-static void addEntriesForFunctionsWithoutUnwindInfo(
- std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
- DenseSet<Ptr> hasUnwindInfo;
- for (CompactUnwindEntry<Ptr> &cuEntry : cuVector)
- if (cuEntry.functionAddress != TombstoneValue<Ptr>)
- hasUnwindInfo.insert(cuEntry.functionAddress);
-
- // Add explicit "has no unwind info" entries for all global and local symbols
- // without unwind info.
- auto markNoUnwindInfo = [&cuVector, &hasUnwindInfo](const Defined *d) {
- if (d->isLive() && d->isec && isCodeSection(d->isec)) {
- Ptr ptr = d->getVA();
- if (!hasUnwindInfo.count(ptr))
- cuVector.push_back({ptr, 0, 0, 0, 0});
- }
- };
- for (Symbol *sym : symtab->getSymbols())
- if (auto *d = dyn_cast<Defined>(sym))
- markNoUnwindInfo(d);
- for (const InputFile *file : inputFiles)
- if (auto *objFile = dyn_cast<ObjFile>(file))
- for (Symbol *sym : objFile->symbols)
- if (auto *d = dyn_cast_or_null<Defined>(sym))
- if (!d->isExternal())
- markNoUnwindInfo(d);
-}
-
static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
// From compact_unwind_encoding.h:
// UNWIND_X86_64_MODE_STACK_IND:
@@ -367,10 +376,24 @@ static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
return true;
}
+template <class Ptr>
+Reloc *
+UnwindInfoSectionImpl<Ptr>::findLsdaReloc(ConcatInputSection *isec) const {
+ if (isec == nullptr)
+ return nullptr;
+ auto it = llvm::find_if(isec->relocs, [](const Reloc &r) {
+ return r.offset % sizeof(CompactUnwindEntry<Ptr>) ==
+ offsetof(CompactUnwindEntry<Ptr>, lsda);
+ });
+ if (it == isec->relocs.end())
+ return nullptr;
+ return &*it;
+}
+
// Scan the __LD,__compact_unwind entries and compute the space needs of
// __TEXT,__unwind_info and __TEXT,__eh_frame
template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
- if (compactUnwindSection == nullptr)
+ if (symbols.empty())
return;
// At this point, the address space for __TEXT,__text has been
@@ -380,75 +403,65 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
// we can fold adjacent CU entries with identical
// encoding+personality+lsda. Folding is necessary because it reduces
// the number of CU entries by as much as 3 orders of magnitude!
- compactUnwindSection->finalize();
- assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry<Ptr>) ==
- 0);
- size_t cuCount =
- compactUnwindSection->getSize() / sizeof(CompactUnwindEntry<Ptr>);
- cuVector.resize(cuCount);
- relocateCompactUnwind(compactUnwindSection, cuVector);
-
- addEntriesForFunctionsWithoutUnwindInfo(cuVector);
+ cuEntries.resize(symbols.size());
+ // The "map" part of the symbols MapVector was only needed for deduplication
+ // in addSymbol(). Now that we are done adding, move the contents to a plain
+ // std::vector for indexed access.
+ symbolsVec = symbols.takeVector();
+ relocateCompactUnwind(cuEntries);
// Rather than sort & fold the 32-byte entries directly, we create a
- // vector of pointers to entries and sort & fold that instead.
- cuPtrVector.reserve(cuVector.size());
- for (CompactUnwindEntry<Ptr> &cuEntry : cuVector)
- cuPtrVector.emplace_back(&cuEntry);
- llvm::sort(cuPtrVector, [](const CompactUnwindEntry<Ptr> *a,
- const CompactUnwindEntry<Ptr> *b) {
- return a->functionAddress < b->functionAddress;
+ // vector of indices to entries and sort & fold that instead.
+ cuIndices.resize(cuEntries.size());
+ std::iota(cuIndices.begin(), cuIndices.end(), 0);
+ llvm::sort(cuIndices, [&](size_t a, size_t b) {
+ return cuEntries[a].functionAddress < cuEntries[b].functionAddress;
});
- // Dead-stripped functions get a functionAddress of TombstoneValue in
- // relocateCompactUnwind(). Filter them out here.
- // FIXME: This doesn't yet collect associated data like LSDAs kept
- // alive only by a now-removed CompactUnwindEntry or other comdat-like
- // data (`kindNoneGroupSubordinate*` in ld64).
- CompactUnwindEntry<Ptr> tombstone;
- tombstone.functionAddress = TombstoneValue<Ptr>;
- cuPtrVector.erase(
- std::lower_bound(cuPtrVector.begin(), cuPtrVector.end(), &tombstone,
- [](const CompactUnwindEntry<Ptr> *a,
- const CompactUnwindEntry<Ptr> *b) {
- return a->functionAddress < b->functionAddress;
- }),
- cuPtrVector.end());
-
- // If there are no entries left after adding explicit "no unwind info"
- // entries and removing entries for dead-stripped functions, don't write
- // an __unwind_info section at all.
- assert(allEntriesAreOmitted == cuPtrVector.empty());
- if (cuPtrVector.empty())
- return;
-
// Fold adjacent entries with matching encoding+personality+lsda
- // We use three iterators on the same cuPtrVector to fold in-situ:
+ // We use three iterators on the same cuIndices to fold in-situ:
// (1) `foldBegin` is the first of a potential sequence of matching entries
// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
// The semi-open interval [ foldBegin .. foldEnd ) contains a range
// entries that can be folded into a single entry and written to ...
// (3) `foldWrite`
- auto foldWrite = cuPtrVector.begin();
- for (auto foldBegin = cuPtrVector.begin(); foldBegin < cuPtrVector.end();) {
+ auto foldWrite = cuIndices.begin();
+ for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) {
auto foldEnd = foldBegin;
- while (++foldEnd < cuPtrVector.end() &&
- (*foldBegin)->encoding == (*foldEnd)->encoding &&
- (*foldBegin)->personality == (*foldEnd)->personality &&
- (*foldBegin)->lsda == (*foldEnd)->lsda &&
- canFoldEncoding((*foldEnd)->encoding))
- ;
+ while (++foldEnd < cuIndices.end() &&
+ cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding &&
+ cuEntries[*foldBegin].personality ==
+ cuEntries[*foldEnd].personality &&
+ canFoldEncoding(cuEntries[*foldEnd].encoding)) {
+ // In most cases, we can just compare the values of cuEntries[*].lsda.
+ // However, it is possible for -rename_section to cause the LSDA section
+ // (__gcc_except_tab) to be finalized after the unwind info section. In
+ // that case, we don't yet have unique addresses for the LSDA entries.
+ // So we check their relocations instead.
+ // FIXME: should we account for an LSDA at an absolute address? ld64 seems
+ // to support it, but it seems unlikely to be used in practice.
+ Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
+ Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
+ if (lsda1 == nullptr && lsda2 == nullptr)
+ continue;
+ if (lsda1 == nullptr || lsda2 == nullptr)
+ break;
+ if (lsda1->referent != lsda2->referent)
+ break;
+ if (lsda1->addend != lsda2->addend)
+ break;
+ }
*foldWrite++ = *foldBegin;
foldBegin = foldEnd;
}
- cuPtrVector.erase(foldWrite, cuPtrVector.end());
+ cuIndices.erase(foldWrite, cuIndices.end());
- encodePersonalities(cuPtrVector, personalities);
+ encodePersonalities();
// Count frequencies of the folded encodings
EncodingMap encodingFrequencies;
- for (const CompactUnwindEntry<Ptr> *cuPtrEntry : cuPtrVector)
- encodingFrequencies[cuPtrEntry->encoding]++;
+ for (size_t idx : cuIndices)
+ encodingFrequencies[cuEntries[idx].encoding]++;
// Make a vector of encodings, sorted by descending frequency
for (const auto &frequency : encodingFrequencies)
@@ -481,19 +494,21 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
// and 127..255 references a local per-second-level-page table.
// First we try the compact format and determine how many entries fit.
// If more entries fit in the regular format, we use that.
- for (size_t i = 0; i < cuPtrVector.size();) {
+ for (size_t i = 0; i < cuIndices.size();) {
+ size_t idx = cuIndices[i];
secondLevelPages.emplace_back();
SecondLevelPage &page = secondLevelPages.back();
page.entryIndex = i;
uintptr_t functionAddressMax =
- cuPtrVector[i]->functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
+ cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
size_t n = commonEncodings.size();
size_t wordsRemaining =
SECOND_LEVEL_PAGE_WORDS -
sizeof(unwind_info_compressed_second_level_page_header) /
sizeof(uint32_t);
- while (wordsRemaining >= 1 && i < cuPtrVector.size()) {
- const CompactUnwindEntry<Ptr> *cuPtr = cuPtrVector[i];
+ while (wordsRemaining >= 1 && i < cuIndices.size()) {
+ idx = cuIndices[i];
+ const CompactUnwindEntry<Ptr> *cuPtr = &cuEntries[idx];
if (cuPtr->functionAddress >= functionAddressMax) {
break;
} else if (commonEncodingIndexes.count(cuPtr->encoding) ||
@@ -515,34 +530,33 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
// entries by using the regular format. This can happen when there
// are many unique encodings, and we we saturated the local
// encoding table early.
- if (i < cuPtrVector.size() &&
+ if (i < cuIndices.size() &&
page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
page.kind = UNWIND_SECOND_LEVEL_REGULAR;
page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
- cuPtrVector.size() - page.entryIndex);
+ cuIndices.size() - page.entryIndex);
i = page.entryIndex + page.entryCount;
} else {
page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
}
}
- for (const CompactUnwindEntry<Ptr> *cu : cuPtrVector) {
- uint32_t functionOffset = cu->functionAddress - in.header->addr;
- functionToLsdaIndex[functionOffset] = lsdaEntries.size();
- if (cu->lsda != 0)
- lsdaEntries.push_back(
- {functionOffset, static_cast<uint32_t>(cu->lsda - in.header->addr)});
+ for (size_t idx : cuIndices) {
+ lsdaIndex[idx] = entriesWithLsda.size();
+ const Defined *d = symbolsVec[idx].second;
+ if (findLsdaReloc(d->unwindEntry))
+ entriesWithLsda.push_back(idx);
}
// compute size of __TEXT,__unwind_info section
- level2PagesOffset =
- sizeof(unwind_info_section_header) +
- commonEncodings.size() * sizeof(uint32_t) +
- personalities.size() * sizeof(uint32_t) +
- // The extra second-level-page entry is for the sentinel
- (secondLevelPages.size() + 1) *
- sizeof(unwind_info_section_header_index_entry) +
- lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
+ level2PagesOffset = sizeof(unwind_info_section_header) +
+ commonEncodings.size() * sizeof(uint32_t) +
+ personalities.size() * sizeof(uint32_t) +
+ // The extra second-level-page entry is for the sentinel
+ (secondLevelPages.size() + 1) *
+ sizeof(unwind_info_section_header_index_entry) +
+ entriesWithLsda.size() *
+ sizeof(unwind_info_section_header_lsda_index_entry);
unwindInfoSize =
level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES;
}
@@ -551,7 +565,7 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
template <class Ptr>
void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
- assert(!cuPtrVector.empty() && "call only if there is unwind info");
+ assert(!cuIndices.empty() && "call only if there is unwind info");
// section header
auto *uip = reinterpret_cast<unwind_info_section_header *>(buf);
@@ -572,7 +586,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
*i32p++ = encoding.first;
// Personalities
- for (const uint32_t &personality : personalities)
+ for (Ptr personality : personalities)
*i32p++ =
in.got->addr + (personality - 1) * target->wordSize - in.header->addr;
@@ -583,39 +597,51 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
uint64_t l2PagesOffset = level2PagesOffset;
auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p);
for (const SecondLevelPage &page : secondLevelPages) {
- iep->functionOffset =
- cuPtrVector[page.entryIndex]->functionAddress - in.header->addr;
+ size_t idx = cuIndices[page.entryIndex];
+ iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr;
iep->secondLevelPagesSectionOffset = l2PagesOffset;
iep->lsdaIndexArraySectionOffset =
- lsdaOffset + functionToLsdaIndex.lookup(iep->functionOffset) *
+ lsdaOffset + lsdaIndex.lookup(idx) *
sizeof(unwind_info_section_header_lsda_index_entry);
iep++;
l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
}
// Level-1 sentinel
- const CompactUnwindEntry<Ptr> &cuEnd = *cuPtrVector.back();
- assert(cuEnd.functionAddress != TombstoneValue<Ptr>);
+ const CompactUnwindEntry<Ptr> &cuEnd = cuEntries[cuIndices.back()];
iep->functionOffset =
cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
iep->secondLevelPagesSectionOffset = 0;
iep->lsdaIndexArraySectionOffset =
- lsdaOffset +
- lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
+ lsdaOffset + entriesWithLsda.size() *
+ sizeof(unwind_info_section_header_lsda_index_entry);
iep++;
// LSDAs
- size_t lsdaBytes =
- lsdaEntries.size() * sizeof(unwind_info_section_header_lsda_index_entry);
- if (lsdaBytes > 0)
- memcpy(iep, lsdaEntries.data(), lsdaBytes);
+ auto *lep =
+ reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
+ for (size_t idx : entriesWithLsda) {
+ const CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
+ const Defined *d = symbolsVec[idx].second;
+ if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
+ uint64_t va;
+ if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
+ va = isec->getVA(r->addend);
+ } else {
+ auto *sym = r->referent.get<Symbol *>();
+ va = sym->getVA() + r->addend;
+ }
+ lep->lsdaOffset = va - in.header->addr;
+ }
+ lep->functionOffset = cu.functionAddress - in.header->addr;
+ lep++;
+ }
// Level-2 pages
- auto *pp = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(iep) +
- lsdaBytes);
+ auto *pp = reinterpret_cast<uint32_t *>(lep);
for (const SecondLevelPage &page : secondLevelPages) {
if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
uintptr_t functionAddressBase =
- cuPtrVector[page.entryIndex]->functionAddress;
+ cuEntries[cuIndices[page.entryIndex]].functionAddress;
auto *p2p =
reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
pp);
@@ -628,14 +654,15 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
p2p->encodingsCount = page.localEncodings.size();
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
- const CompactUnwindEntry<Ptr> *cuep = cuPtrVector[page.entryIndex + i];
- auto it = commonEncodingIndexes.find(cuep->encoding);
+ const CompactUnwindEntry<Ptr> &cue =
+ cuEntries[cuIndices[page.entryIndex + i]];
+ auto it = commonEncodingIndexes.find(cue.encoding);
if (it == commonEncodingIndexes.end())
- it = page.localEncodingIndexes.find(cuep->encoding);
+ it = page.localEncodingIndexes.find(cue.encoding);
*ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) |
- (cuep->functionAddress - functionAddressBase);
+ (cue.functionAddress - functionAddressBase);
}
- if (page.localEncodings.size() != 0)
+ if (!page.localEncodings.empty())
memcpy(ep, page.localEncodings.data(),
page.localEncodings.size() * sizeof(uint32_t));
} else {
@@ -647,9 +674,10 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
p2p->entryCount = page.entryCount;
auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]);
for (size_t i = 0; i < page.entryCount; i++) {
- const CompactUnwindEntry<Ptr> *cuep = cuPtrVector[page.entryIndex + i];
- *ep++ = cuep->functionAddress;
- *ep++ = cuep->encoding;
+ const CompactUnwindEntry<Ptr> &cue =
+ cuEntries[cuIndices[page.entryIndex + i]];
+ *ep++ = cue.functionAddress;
+ *ep++ = cue.encoding;
}
}
pp += SECOND_LEVEL_PAGE_WORDS;
diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index fca11de6eeb1..e1d60bec077c 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -11,37 +11,30 @@
#include "ConcatOutputSection.h"
#include "SyntheticSections.h"
+#include "llvm/ADT/MapVector.h"
#include "mach-o/compact_unwind_encoding.h"
namespace lld {
namespace macho {
-template <class Ptr> struct CompactUnwindEntry {
- Ptr functionAddress;
- uint32_t functionLength;
- compact_unwind_encoding_t encoding;
- Ptr personality;
- Ptr lsda;
-};
-
class UnwindInfoSection : public SyntheticSection {
public:
- bool isNeeded() const override {
- return !compactUnwindSection->inputs.empty() && !allEntriesAreOmitted;
- }
+ // If all functions are free of unwind info, we can omit the unwind info
+ // section entirely.
+ bool isNeeded() const override { return !allEntriesAreOmitted; }
uint64_t getSize() const override { return unwindInfoSize; }
- virtual void addInput(ConcatInputSection *) = 0;
- std::vector<ConcatInputSection *> getInputs() {
- return compactUnwindSection->inputs;
- }
+ void addSymbol(const Defined *);
void prepareRelocations();
protected:
UnwindInfoSection();
virtual void prepareRelocations(ConcatInputSection *) = 0;
- ConcatOutputSection *compactUnwindSection;
+ llvm::MapVector<std::pair<const InputSection *, uint64_t /*Defined::value*/>,
+ const Defined *>
+ symbols;
+ std::vector<decltype(symbols)::value_type> symbolsVec;
uint64_t unwindInfoSize = 0;
bool allEntriesAreOmitted = true;
};
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index d9c9cf570054..093a380d175e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -227,7 +227,7 @@ public:
void writeTo(uint8_t *buf) const override {
using SegmentCommand = typename LP::segment_command;
- using Section = typename LP::section;
+ using SectionHeader = typename LP::section;
auto *c = reinterpret_cast<SegmentCommand *>(buf);
buf += sizeof(SegmentCommand);
@@ -248,8 +248,8 @@ public:
if (osec->isHidden())
continue;
- auto *sectHdr = reinterpret_cast<Section *>(buf);
- buf += sizeof(Section);
+ auto *sectHdr = reinterpret_cast<SectionHeader *>(buf);
+ buf += sizeof(SectionHeader);
memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
memcpy(sectHdr->segname, name.data(), name.size());
@@ -343,6 +343,7 @@ public:
}
static uint32_t getInstanceCount() { return instanceCount; }
+ static void resetInstanceCount() { instanceCount = 0; }
private:
LoadCommandType type;
@@ -671,10 +672,15 @@ void Writer::scanRelocations() {
void Writer::scanSymbols() {
TimeTraceScope timeScope("Scan symbols");
- for (const Symbol *sym : symtab->getSymbols()) {
- if (const auto *defined = dyn_cast<Defined>(sym)) {
- if (defined->overridesWeakDef && defined->isLive())
+ for (Symbol *sym : symtab->getSymbols()) {
+ if (auto *defined = dyn_cast<Defined>(sym)) {
+ if (!defined->isLive())
+ continue;
+ defined->canonicalize();
+ if (defined->overridesWeakDef)
in.weakBinding->addNonWeakDefinition(defined);
+ if (!defined->isAbsolute() && isCodeSection(defined->isec))
+ in.unwindInfo->addSymbol(defined);
} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
// This branch intentionally doesn't check isLive().
if (dysym->isDynamicLookup())
@@ -683,6 +689,20 @@ void Writer::scanSymbols() {
std::max(dysym->getFile()->refState, dysym->getRefState());
}
}
+
+ for (const InputFile *file : inputFiles) {
+ if (auto *objFile = dyn_cast<ObjFile>(file))
+ for (Symbol *sym : objFile->symbols) {
+ if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
+ if (!defined->isLive())
+ continue;
+ defined->canonicalize();
+ if (!defined->isExternal() && !defined->isAbsolute() &&
+ isCodeSection(defined->isec))
+ in.unwindInfo->addSymbol(defined);
+ }
+ }
+ }
}
// TODO: ld64 enforces the old load commands in a few other cases.
@@ -954,7 +974,12 @@ template <class LP> void Writer::createOutputSections() {
for (SyntheticSection *ssec : syntheticSections) {
auto it = concatOutputSections.find({ssec->segname, ssec->name});
- if (ssec->isNeeded()) {
+ // We add all LinkEdit sections here because we don't know if they are
+ // needed until their finalizeContents() methods get called later. While
+ // this means that we add some redundant sections to __LINKEDIT, there is
+ // is no redundancy in the output, as we do not emit section headers for
+ // any LinkEdit sections.
+ if (ssec->isNeeded() || ssec->segname == segment_names::linkEdit) {
if (it == concatOutputSections.end()) {
getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec);
} else {
@@ -1101,11 +1126,19 @@ template <class LP> void Writer::run() {
treatSpecialUndefineds();
if (config->entry && !isa<Undefined>(config->entry))
prepareBranchTarget(config->entry);
+ // Canonicalization of all pointers to InputSections should be handled by
+ // these two methods.
+ scanSymbols();
scanRelocations();
+
+ // Do not proceed if there was an undefined symbol.
+ if (errorCount())
+ return;
+
if (in.stubHelper->isNeeded())
in.stubHelper->setup();
- scanSymbols();
createOutputSections<LP>();
+
// After this point, we create no new segments; HOWEVER, we might
// yet create branch-range extension thunks for architectures whose
// hardware call instructions have limited range, e.g., ARM(64).
@@ -1121,6 +1154,8 @@ template <class LP> void Writer::run() {
template <class LP> void macho::writeResult() { Writer().run<LP>(); }
+void macho::resetWriter() { LCDylib::resetInstanceCount(); }
+
void macho::createSyntheticSections() {
in.header = make<MachHeaderSection>();
if (config->dedupLiterals) {
diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h
index 56f6f7ae6fe7..5ab40cabe64b 100644
--- a/lld/MachO/Writer.h
+++ b/lld/MachO/Writer.h
@@ -26,6 +26,7 @@ public:
};
template <class LP> void writeResult();
+void resetWriter();
void createSyntheticSections();
diff --git a/lld/MachO/ld64-vs-lld.rst b/lld/MachO/ld64-vs-lld.rst
new file mode 100644
index 000000000000..14fb72a2ac8f
--- /dev/null
+++ b/lld/MachO/ld64-vs-lld.rst
@@ -0,0 +1,15 @@
+==================
+LD64 vs LLD-MACHO
+==================
+
+This doc lists all significant deliberate differences in behavior between LD64 and LLD-MachO.
+
+ObjC symbols treatment
+**********************
+There are differences in how LLD and LD64 handle ObjC symbols loaded from archives.
+
+- LD64:
+ * Duplicate ObjC symbols from the same archives will not raise an error. LD64 will pick the first one.
+ * Duplicate ObjC symbols from different archives will raise a "duplicate symbol" error.
+- LLD:
+ * Duplicate symbols, regardless of which archives they are from, will raise errors.
diff --git a/lld/docs/AtomLLD.rst b/lld/docs/AtomLLD.rst
deleted file mode 100644
index 2766094696e0..000000000000
--- a/lld/docs/AtomLLD.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-ATOM-based lld
-==============
-
-Note: this document discuss Mach-O port of LLD. For ELF and COFF,
-see :doc:`index`.
-
-ATOM-based lld is a new set of modular code for creating linker tools.
-Currently it supports Mach-O.
-
-* End-User Features:
-
- * Compatible with existing linker options
- * Reads standard Object Files
- * Writes standard Executable Files
- * Remove clang's reliance on "the system linker"
- * Uses the LLVM `"UIUC" BSD-Style license`__.
-
-* Applications:
-
- * Modular design
- * Support cross linking
- * Easy to add new CPU support
- * Can be built as static tool or library
-
-* Design and Implementation:
-
- * Extensive unit tests
- * Internal linker model can be dumped/read to textual format
- * Additional linking features can be plugged in as "passes"
- * OS specific and CPU specific code factored out
-
-Why a new linker?
------------------
-
-The fact that clang relies on whatever linker tool you happen to have installed
-means that clang has been very conservative adopting features which require a
-recent linker.
-
-In the same way that the MC layer of LLVM has removed clang's reliance on the
-system assembler tool, the lld project will remove clang's reliance on the
-system linker tool.
-
-
-Contents
---------
-
-.. toctree::
- :maxdepth: 2
-
- design
- getting_started
- development
- open_projects
- sphinx_intro
-
-Indices and tables
-------------------
-
-* :ref:`genindex`
-* :ref:`search`
-
-__ https://llvm.org/docs/DeveloperPolicy.html#license
diff --git a/lld/docs/Driver.rst b/lld/docs/Driver.rst
deleted file mode 100644
index 0ac86ff65fca..000000000000
--- a/lld/docs/Driver.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-======
-Driver
-======
-
-Note: this document discuss Mach-O port of LLD. For ELF and COFF,
-see :doc:`index`.
-
-.. contents::
- :local:
-
-Introduction
-============
-
-This document describes the lld driver. The purpose of this document is to
-describe both the motivation and design goals for the driver, as well as details
-of the internal implementation.
-
-Overview
-========
-
-The lld driver is designed to support a number of different command line
-interfaces. The main interfaces we plan to support are binutils' ld, Apple's
-ld, and Microsoft's link.exe.
-
-Flavors
--------
-
-Each of these different interfaces is referred to as a flavor. There is also an
-extra flavor "core" which is used to exercise the core functionality of the
-linker it the test suite.
-
-* gnu
-* darwin
-* link
-* core
-
-Selecting a Flavor
-^^^^^^^^^^^^^^^^^^
-
-There are two different ways to tell lld which flavor to be. They are checked in
-order, so the second overrides the first. The first is to symlink :program:`lld`
-as :program:`lld-{flavor}` or just :program:`{flavor}`. You can also specify
-it as the first command line argument using ``-flavor``::
-
- $ lld -flavor gnu
-
-There is a shortcut for ``-flavor core`` as ``-core``.
-
-
-Adding an Option to an existing Flavor
-======================================
-
-#. Add the option to the desired :file:`lib/Driver/{flavor}Options.td`.
-
-#. Add to :cpp:class:`lld::FlavorLinkingContext` a getter and setter method
- for the option.
-
-#. Modify :cpp:func:`lld::FlavorDriver::parse` in :file:
- `lib/Driver/{Flavor}Driver.cpp` to call the targetInfo setter
- for the option.
-
-#. Modify {Flavor}Reader and {Flavor}Writer to use the new targetInfo option.
-
-
-Adding a Flavor
-===============
-
-#. Add an entry for the flavor in :file:`include/lld/Common/Driver.h` to
- :cpp:class:`lld::UniversalDriver::Flavor`.
-
-#. Add an entry in :file:`lib/Driver/UniversalDriver.cpp` to
- :cpp:func:`lld::Driver::strToFlavor` and
- :cpp:func:`lld::UniversalDriver::link`.
- This allows the flavor to be selected via symlink and `-flavor`.
-
-#. Add a tablegen file called :file:`lib/Driver/{flavor}Options.td` that
- describes the options. If the options are a superset of another driver, that
- driver's td file can simply be included. The :file:`{flavor}Options.td` file
- must also be added to :file:`lib/Driver/CMakeLists.txt`.
-
-#. Add a ``{flavor}Driver`` as a subclass of :cpp:class:`lld::Driver`
- in :file:`lib/Driver/{flavor}Driver.cpp`.
diff --git a/lld/docs/Readers.rst b/lld/docs/Readers.rst
deleted file mode 100644
index eae1717f6e5b..000000000000
--- a/lld/docs/Readers.rst
+++ /dev/null
@@ -1,174 +0,0 @@
-.. _Readers:
-
-Developing lld Readers
-======================
-
-Note: this document discuss Mach-O port of LLD. For ELF and COFF,
-see :doc:`index`.
-
-Introduction
-------------
-
-The purpose of a "Reader" is to take an object file in a particular format
-and create an `lld::File`:cpp:class: (which is a graph of Atoms)
-representing the object file. A Reader inherits from
-`lld::Reader`:cpp:class: which lives in
-:file:`include/lld/Core/Reader.h` and
-:file:`lib/Core/Reader.cpp`.
-
-The Reader infrastructure for an object format ``Foo`` requires the
-following pieces in order to fit into lld:
-
-:file:`include/lld/ReaderWriter/ReaderFoo.h`
-
- .. cpp:class:: ReaderOptionsFoo : public ReaderOptions
-
- This Options class is the only way to configure how the Reader will
- parse any file into an `lld::Reader`:cpp:class: object. This class
- should be declared in the `lld`:cpp:class: namespace.
-
- .. cpp:function:: Reader *createReaderFoo(ReaderOptionsFoo &reader)
-
- This factory function configures and create the Reader. This function
- should be declared in the `lld`:cpp:class: namespace.
-
-:file:`lib/ReaderWriter/Foo/ReaderFoo.cpp`
-
- .. cpp:class:: ReaderFoo : public Reader
-
- This is the concrete Reader class which can be called to parse
- object files. It should be declared in an anonymous namespace or
- if there is shared code with the `lld::WriterFoo`:cpp:class: you
- can make a nested namespace (e.g. `lld::foo`:cpp:class:).
-
-You may have noticed that :cpp:class:`ReaderFoo` is not declared in the
-``.h`` file. An important design aspect of lld is that all Readers are
-created *only* through an object-format-specific
-:cpp:func:`createReaderFoo` factory function. The creation of the Reader is
-parametrized through a :cpp:class:`ReaderOptionsFoo` class. This options
-class is the one-and-only way to control how the Reader operates when
-parsing an input file into an Atom graph. For instance, you may want the
-Reader to only accept certain architectures. The options class can be
-instantiated from command line options or be programmatically configured.
-
-Where to start
---------------
-
-The lld project already has a skeleton of source code for Readers for
-``ELF``, ``PECOFF``, ``MachO``, and lld's native ``YAML`` graph format.
-If your file format is a variant of one of those, you should modify the
-existing Reader to support your variant. This is done by customizing the Options
-class for the Reader and making appropriate changes to the ``.cpp`` file to
-interpret those options and act accordingly.
-
-If your object file format is not a variant of any existing Reader, you'll need
-to create a new Reader subclass with the organization described above.
-
-Readers are factories
----------------------
-
-The linker will usually only instantiate your Reader once. That one Reader will
-have its loadFile() method called many times with different input files.
-To support multithreaded linking, the Reader may be parsing multiple input
-files in parallel. Therefore, there should be no parsing state in you Reader
-object. Any parsing state should be in ivars of your File subclass or in
-some temporary object.
-
-The key function to implement in a reader is::
-
- virtual error_code loadFile(LinkerInput &input,
- std::vector<std::unique_ptr<File>> &result);
-
-It takes a memory buffer (which contains the contents of the object file
-being read) and returns an instantiated lld::File object which is
-a collection of Atoms. The result is a vector of File pointers (instead of
-simple a File pointer) because some file formats allow multiple object
-"files" to be encoded in one file system file.
-
-
-Memory Ownership
-----------------
-
-Atoms are always owned by their File object. During core linking when Atoms
-are coalesced or stripped away, core linking does not delete them.
-Core linking just removes those unused Atoms from its internal list.
-The destructor of a File object is responsible for deleting all Atoms it
-owns, and if ownership of the MemoryBuffer was passed to it, the File
-destructor needs to delete that too.
-
-Making Atoms
-------------
-
-The internal model of lld is purely Atom based. But most object files do not
-have an explicit concept of Atoms, instead most have "sections". The way
-to think of this is that a section is just a list of Atoms with common
-attributes.
-
-The first step in parsing section-based object files is to cleave each
-section into a list of Atoms. The technique may vary by section type. For
-code sections (e.g. .text), there are usually symbols at the start of each
-function. Those symbol addresses are the points at which the section is
-cleaved into discrete Atoms. Some file formats (like ELF) also include the
-length of each symbol in the symbol table. Otherwise, the length of each
-Atom is calculated to run to the start of the next symbol or the end of the
-section.
-
-Other sections types can be implicitly cleaved. For instance c-string literals
-or unwind info (e.g. .eh_frame) can be cleaved by having the Reader look at
-the content of the section. It is important to cleave sections into Atoms
-to remove false dependencies. For instance the .eh_frame section often
-has no symbols, but contains "pointers" to the functions for which it
-has unwind info. If the .eh_frame section was not cleaved (but left as one
-big Atom), there would always be a reference (from the eh_frame Atom) to
-each function. So the linker would be unable to coalesce or dead stripped
-away the function atoms.
-
-The lld Atom model also requires that a reference to an undefined symbol be
-modeled as a Reference to an UndefinedAtom. So the Reader also needs to
-create an UndefinedAtom for each undefined symbol in the object file.
-
-Once all Atoms have been created, the second step is to create References
-(recall that Atoms are "nodes" and References are "edges"). Most References
-are created by looking at the "relocation records" in the object file. If
-a function contains a call to "malloc", there is usually a relocation record
-specifying the address in the section and the symbol table index. Your
-Reader will need to convert the address to an Atom and offset and the symbol
-table index into a target Atom. If "malloc" is not defined in the object file,
-the target Atom of the Reference will be an UndefinedAtom.
-
-
-Performance
------------
-Once you have the above working to parse an object file into Atoms and
-References, you'll want to look at performance. Some techniques that can
-help performance are:
-
-* Use llvm::BumpPtrAllocator or pre-allocate one big vector<Reference> and then
- just have each atom point to its subrange of References in that vector.
- This can be faster that allocating each Reference as separate object.
-* Pre-scan the symbol table and determine how many atoms are in each section
- then allocate space for all the Atom objects at once.
-* Don't copy symbol names or section content to each Atom, instead use
- StringRef and ArrayRef in each Atom to point to its name and content in the
- MemoryBuffer.
-
-
-Testing
--------
-
-We are still working on infrastructure to test Readers. The issue is that
-you don't want to check in binary files to the test suite. And the tools
-for creating your object file from assembly source may not be available on
-every OS.
-
-We are investigating a way to use YAML to describe the section, symbols,
-and content of a file. Then have some code which will write out an object
-file from that YAML description.
-
-Once that is in place, you can write test cases that contain section/symbols
-YAML and is run through the linker to produce Atom/References based YAML which
-is then run through FileCheck to verify the Atoms and References are as
-expected.
-
-
-
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 40439c995f17..1d53177200c3 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -1,19 +1,19 @@
========================
-lld 13.0.0 Release Notes
+lld 14.0.0 Release Notes
========================
.. contents::
:local:
.. warning::
- These are in-progress notes for the upcoming LLVM 13.0.0 release.
+ These are in-progress notes for the upcoming LLVM 14.0.0 release.
Release notes for previous releases can be found on
`the Download Page <https://releases.llvm.org/download.html>`_.
Introduction
============
-This document contains the release notes for the lld linker, release 13.0.0.
+This document contains the release notes for the lld linker, release 14.0.0.
Here we describe the status of lld, including major improvements
from the previous release. All lld releases may be downloaded
from the `LLVM releases web site <https://llvm.org/releases/>`_.
@@ -24,18 +24,26 @@ Non-comprehensive list of changes in this release
ELF Improvements
----------------
-* ``-Bsymbolic -Bsymbolic-functions`` has been changed to behave the same as ``-Bsymbolic-functions``. This matches GNU ld.
- (`D102461 <https://reviews.llvm.org/D102461>`_)
-* ``-Bno-symbolic`` has been added.
- (`D102461 <https://reviews.llvm.org/D102461>`_)
-* A new linker script command ``OVERWRITE_SECTIONS`` has been added.
- (`D103303 <https://reviews.llvm.org/D103303>`_)
+* ``--export-dynamic-symbol-list`` has been added.
+ (`D107317 <https://reviews.llvm.org/D107317>`_)
+* ``--why-extract`` has been added to query why archive members/lazy object files are extracted.
+ (`D109572 <https://reviews.llvm.org/D109572>`_)
+* ``e_entry`` no longer falls back to the address of ``.text`` if the entry symbol does not exist.
+ Instead, a value of 0 will be written.
+ (`D110014 <https://reviews.llvm.org/D110014>`_)
+
+Architecture specific changes:
+
+* The x86-32 port now supports TLSDESC (``-mtls-dialect=gnu2``).
+ (`D112582 <https://reviews.llvm.org/D112582>`_)
+* For x86-64, ``--no-relax`` now suppresses ``R_X86_64_GOTPCRELX`` and
+ ``R_X86_64_REX_GOTPCRELX`` GOT optimization
+ (`D113615 <https://reviews.llvm.org/D113615>`_)
Breaking changes
----------------
-* ``--shuffle-sections=<seed>`` has been changed to ``--shuffle-sections=<section-glob>=<seed>``.
- Specify ``*`` as ``<section-glob>`` to get the previous behavior.
+* ...
COFF Improvements
-----------------
diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst
index c01df99cddb9..ea8917ffc97d 100644
--- a/lld/docs/WebAssembly.rst
+++ b/lld/docs/WebAssembly.rst
@@ -19,7 +19,7 @@ This is the object format that the llvm will produce when run with the
Usage
-----
-The WebAssembly version of lld is installed as **wasm-ld**. It shared many
+The WebAssembly version of lld is installed as **wasm-ld**. It shared many
common linker flags with **ld.lld** but also includes several
WebAssembly-specific options:
@@ -205,6 +205,6 @@ Missing features
supported.
- No support for creating shared libraries. The spec for shared libraries in
WebAssembly is still in flux:
- https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+ https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md
-.. _linking: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+.. _linking: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md
diff --git a/lld/docs/conf.py b/lld/docs/conf.py
index 2df1aa70816b..8d0fec72caf8 100644
--- a/lld/docs/conf.py
+++ b/lld/docs/conf.py
@@ -48,9 +48,9 @@ copyright = u'2011-%d, LLVM Project' % date.today().year
# built documents.
#
# The short version.
-version = '13'
+version = '14'
# The full version, including alpha/beta/rc tags.
-release = '13'
+release = '14'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/lld/docs/design.rst b/lld/docs/design.rst
deleted file mode 100644
index 20d8fe78a641..000000000000
--- a/lld/docs/design.rst
+++ /dev/null
@@ -1,421 +0,0 @@
-.. _design:
-
-Linker Design
-=============
-
-Note: this document discuss Mach-O port of LLD. For ELF and COFF,
-see :doc:`index`.
-
-Introduction
-------------
-
-lld is a new generation of linker. It is not "section" based like traditional
-linkers which mostly just interlace sections from multiple object files into the
-output file. Instead, lld is based on "Atoms". Traditional section based
-linking work well for simple linking, but their model makes advanced linking
-features difficult to implement. Features like dead code stripping, reordering
-functions for locality, and C++ coalescing require the linker to work at a finer
-grain.
-
-An atom is an indivisible chunk of code or data. An atom has a set of
-attributes, such as: name, scope, content-type, alignment, etc. An atom also
-has a list of References. A Reference contains: a kind, an optional offset, an
-optional addend, and an optional target atom.
-
-The Atom model allows the linker to use standard graph theory models for linking
-data structures. Each atom is a node, and each Reference is an edge. The
-feature of dead code stripping is implemented by following edges to mark all
-live atoms, and then delete the non-live atoms.
-
-
-Atom Model
-----------
-
-An atom is an indivisible chunk of code or data. Typically each user written
-function or global variable is an atom. In addition, the compiler may emit
-other atoms, such as for literal c-strings or floating point constants, or for
-runtime data structures like dwarf unwind info or pointers to initializers.
-
-A simple "hello world" object file would be modeled like this:
-
-.. image:: hello.png
-
-There are three atoms: main, a proxy for printf, and an anonymous atom
-containing the c-string literal "hello world". The Atom "main" has two
-references. One is the call site for the call to printf, and the other is a
-reference for the instruction that loads the address of the c-string literal.
-
-There are only four different types of atoms:
-
- * DefinedAtom
- 95% of all atoms. This is a chunk of code or data
-
- * UndefinedAtom
- This is a place holder in object files for a reference to some atom
- outside the translation unit.During core linking it is usually replaced
- by (coalesced into) another Atom.
-
- * SharedLibraryAtom
- If a required symbol name turns out to be defined in a dynamic shared
- library (and not some object file). A SharedLibraryAtom is the
- placeholder Atom used to represent that fact.
-
- It is similar to an UndefinedAtom, but it also tracks information
- about the associated shared library.
-
- * AbsoluteAtom
- This is for embedded support where some stuff is implemented in ROM at
- some fixed address. This atom has no content. It is just an address
- that the Writer needs to fix up any references to point to.
-
-
-File Model
-----------
-
-The linker views the input files as basically containers of Atoms and
-References, and just a few attributes of their own. The linker works with three
-kinds of files: object files, static libraries, and dynamic shared libraries.
-Each kind of file has reader object which presents the file in the model
-expected by the linker.
-
-Object File
-~~~~~~~~~~~
-
-An object file is just a container of atoms. When linking an object file, a
-reader is instantiated which parses the object file and instantiates a set of
-atoms representing all content in the .o file. The linker adds all those atoms
-to a master graph.
-
-Static Library (Archive)
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-This is the traditional unix static archive which is just a collection of object
-files with a "table of contents". When linking with a static library, by default
-nothing is added to the master graph of atoms. Instead, if after merging all
-atoms from object files into a master graph, if any "undefined" atoms are left
-remaining in the master graph, the linker reads the table of contents for each
-static library to see if any have the needed definitions. If so, the set of
-atoms from the specified object file in the static library is added to the
-master graph of atoms.
-
-Dynamic Library (Shared Object)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Dynamic libraries are different than object files and static libraries in that
-they don't directly add any content. Their purpose is to check at build time
-that the remaining undefined references can be resolved at runtime, and provide
-a list of dynamic libraries (SO_NEEDED) that will be needed at runtime. The way
-this is modeled in the linker is that a dynamic library contributes no atoms to
-the initial graph of atoms. Instead, (like static libraries) if there are
-"undefined" atoms in the master graph of all atoms, then each dynamic library is
-checked to see if exports the required symbol. If so, a "shared library" atom is
-instantiated by the by the reader which the linker uses to replace the
-"undefined" atom.
-
-Linking Steps
--------------
-
-Through the use of abstract Atoms, the core of linking is architecture
-independent and file format independent. All command line parsing is factored
-out into a separate "options" abstraction which enables the linker to be driven
-with different command line sets.
-
-The overall steps in linking are:
-
- #. Command line processing
-
- #. Parsing input files
-
- #. Resolving
-
- #. Passes/Optimizations
-
- #. Generate output file
-
-The Resolving and Passes steps are done purely on the master graph of atoms, so
-they have no notion of file formats such as mach-o or ELF.
-
-
-Input Files
-~~~~~~~~~~~
-
-Existing developer tools using different file formats for object files.
-A goal of lld is to be file format independent. This is done
-through a plug-in model for reading object files. The lld::Reader is the base
-class for all object file readers. A Reader follows the factory method pattern.
-A Reader instantiates an lld::File object (which is a graph of Atoms) from a
-given object file (on disk or in-memory).
-
-Every Reader subclass defines its own "options" class (for instance the mach-o
-Reader defines the class ReaderOptionsMachO). This options class is the
-one-and-only way to control how the Reader operates when parsing an input file
-into an Atom graph. For instance, you may want the Reader to only accept
-certain architectures. The options class can be instantiated from command
-line options, or it can be subclassed and the ivars programmatically set.
-
-Resolving
-~~~~~~~~~
-
-The resolving step takes all the atoms' graphs from each object file and
-combines them into one master object graph. Unfortunately, it is not as simple
-as appending the atom list from each file into one big list. There are many
-cases where atoms need to be coalesced. That is, two or more atoms need to be
-coalesced into one atom. This is necessary to support: C language "tentative
-definitions", C++ weak symbols for templates and inlines defined in headers,
-replacing undefined atoms with actual definition atoms, and for merging copies
-of constants like c-strings and floating point constants.
-
-The linker support coalescing by-name and by-content. By-name is used for
-tentative definitions and weak symbols. By-content is used for constant data
-that can be merged.
-
-The resolving process maintains some global linking "state", including a "symbol
-table" which is a map from llvm::StringRef to lld::Atom*. With these data
-structures, the linker iterates all atoms in all input files. For each atom, it
-checks if the atom is named and has a global or hidden scope. If so, the atom
-is added to the symbol table map. If there already is a matching atom in that
-table, that means the current atom needs to be coalesced with the found atom, or
-it is a multiple definition error.
-
-When all initial input file atoms have been processed by the resolver, a scan is
-made to see if there are any undefined atoms in the graph. If there are, the
-linker scans all libraries (both static and dynamic) looking for definitions to
-replace the undefined atoms. It is an error if any undefined atoms are left
-remaining.
-
-Dead code stripping (if requested) is done at the end of resolving. The linker
-does a simple mark-and-sweep. It starts with "root" atoms (like "main" in a main
-executable) and follows each references and marks each Atom that it visits as
-"live". When done, all atoms not marked "live" are removed.
-
-The result of the Resolving phase is the creation of an lld::File object. The
-goal is that the lld::File model is **the** internal representation
-throughout the linker. The file readers parse (mach-o, ELF, COFF) into an
-lld::File. The file writers (mach-o, ELF, COFF) taken an lld::File and produce
-their file kind, and every Pass only operates on an lld::File. This is not only
-a simpler, consistent model, but it enables the state of the linker to be dumped
-at any point in the link for testing purposes.
-
-
-Passes
-~~~~~~
-
-The Passes step is an open ended set of routines that each get a change to
-modify or enhance the current lld::File object. Some example Passes are:
-
- * stub (PLT) generation
-
- * GOT instantiation
-
- * order_file optimization
-
- * branch island generation
-
- * branch shim generation
-
- * Objective-C optimizations (Darwin specific)
-
- * TLV instantiation (Darwin specific)
-
- * DTrace probe processing (Darwin specific)
-
- * compact unwind encoding (Darwin specific)
-
-
-Some of these passes are specific to Darwin's runtime environments. But many of
-the passes are applicable to any OS (such as generating branch island for out of
-range branch instructions).
-
-The general structure of a pass is to iterate through the atoms in the current
-lld::File object, inspecting each atom and doing something. For instance, the
-stub pass, looks for call sites to shared library atoms (e.g. call to printf).
-It then instantiates a "stub" atom (PLT entry) and a "lazy pointer" atom for
-each proxy atom needed, and these new atoms are added to the current lld::File
-object. Next, all the noted call sites to shared library atoms have their
-References altered to point to the stub atom instead of the shared library atom.
-
-
-Generate Output File
-~~~~~~~~~~~~~~~~~~~~
-
-Once the passes are done, the output file writer is given current lld::File
-object. The writer's job is to create the executable content file wrapper and
-place the content of the atoms into it.
-
-lld uses a plug-in model for writing output files. All concrete writers (e.g.
-ELF, mach-o, etc) are subclasses of the lld::Writer class.
-
-Unlike the Reader class which has just one method to instantiate an lld::File,
-the Writer class has multiple methods. The crucial method is to generate the
-output file, but there are also methods which allow the Writer to contribute
-Atoms to the resolver and specify passes to run.
-
-An example of contributing
-atoms is that if the Writer knows a main executable is being linked and such
-an executable requires a specially named entry point (e.g. "_main"), the Writer
-can add an UndefinedAtom with that special name to the resolver. This will
-cause the resolver to issue an error if that symbol is not defined.
-
-Sometimes a Writer supports lazily created symbols, such as names for the start
-of sections. To support this, the Writer can create a File object which vends
-no initial atoms, but does lazily supply atoms by name as needed.
-
-Every Writer subclass defines its own "options" class (for instance the mach-o
-Writer defines the class WriterOptionsMachO). This options class is the
-one-and-only way to control how the Writer operates when producing an output
-file from an Atom graph. For instance, you may want the Writer to optimize
-the output for certain OS versions, or strip local symbols, etc. The options
-class can be instantiated from command line options, or it can be subclassed
-and the ivars programmatically set.
-
-
-lld::File representations
--------------------------
-
-Just as LLVM has three representations of its IR model, lld has two
-representations of its File/Atom/Reference model:
-
- * In memory, abstract C++ classes (lld::Atom, lld::Reference, and lld::File).
-
- * textual (in YAML)
-
-
-Textual representations in YAML
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In designing a textual format we want something easy for humans to read and easy
-for the linker to parse. Since an atom has lots of attributes most of which are
-usually just the default, we should define default values for every attribute so
-that those can be omitted from the text representation. Here is the atoms for a
-simple hello world program expressed in YAML::
-
- target-triple: x86_64-apple-darwin11
-
- atoms:
- - name: _main
- scope: global
- type: code
- content: [ 55, 48, 89, e5, 48, 8d, 3d, 00, 00, 00, 00, 30, c0, e8, 00, 00,
- 00, 00, 31, c0, 5d, c3 ]
- fixups:
- - offset: 07
- kind: pcrel32
- target: 2
- - offset: 0E
- kind: call32
- target: _fprintf
-
- - type: c-string
- content: [ 73, 5A, 00 ]
-
- ...
-
-The biggest use for the textual format will be writing test cases. Writing test
-cases in C is problematic because the compiler may vary its output over time for
-its own optimization reasons which my inadvertently disable or break the linker
-feature trying to be tested. By writing test cases in the linkers own textual
-format, we can exactly specify every attribute of every atom and thus target
-specific linker logic.
-
-The textual/YAML format follows the ReaderWriter patterns used in lld. The lld
-library comes with the classes: ReaderYAML and WriterYAML.
-
-
-Testing
--------
-
-The lld project contains a test suite which is being built up as new code is
-added to lld. All new lld functionality should have a tests added to the test
-suite. The test suite is `lit <https://llvm.org/cmds/lit.html/>`_ driven. Each
-test is a text file with comments telling lit how to run the test and check the
-result To facilitate testing, the lld project builds a tool called lld-core.
-This tool reads a YAML file (default from stdin), parses it into one or more
-lld::File objects in memory and then feeds those lld::File objects to the
-resolver phase.
-
-
-Resolver testing
-~~~~~~~~~~~~~~~~
-
-Basic testing is the "core linking" or resolving phase. That is where the
-linker merges object files. All test cases are written in YAML. One feature of
-YAML is that it allows multiple "documents" to be encoding in one YAML stream.
-That means one text file can appear to the linker as multiple .o files - the
-normal case for the linker.
-
-Here is a simple example of a core linking test case. It checks that an
-undefined atom from one file will be replaced by a definition from another
-file::
-
- # RUN: lld-core %s | FileCheck %s
-
- #
- # Test that undefined atoms are replaced with defined atoms.
- #
-
- ---
- atoms:
- - name: foo
- definition: undefined
- ---
- atoms:
- - name: foo
- scope: global
- type: code
- ...
-
- # CHECK: name: foo
- # CHECK: scope: global
- # CHECK: type: code
- # CHECK-NOT: name: foo
- # CHECK: ...
-
-
-Passes testing
-~~~~~~~~~~~~~~
-
-Since Passes just operate on an lld::File object, the lld-core tool has the
-option to run a particular pass (after resolving). Thus, you can write a YAML
-test case with carefully crafted input to exercise areas of a Pass and the check
-the resulting lld::File object as represented in YAML.
-
-
-Design Issues
--------------
-
-There are a number of open issues in the design of lld. The plan is to wait and
-make these design decisions when we need to.
-
-
-Debug Info
-~~~~~~~~~~
-
-Currently, the lld model says nothing about debug info. But the most popular
-debug format is DWARF and there is some impedance mismatch with the lld model
-and DWARF. In lld there are just Atoms and only Atoms that need to be in a
-special section at runtime have an associated section. Also, Atoms do not have
-addresses. The way DWARF is spec'ed different parts of DWARF are supposed to go
-into specially named sections and the DWARF references function code by address.
-
-CPU and OS specific functionality
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Currently, lld has an abstract "Platform" that deals with any CPU or OS specific
-differences in linking. We just keep adding virtual methods to the base
-Platform class as we find linking areas that might need customization. At some
-point we'll need to structure this better.
-
-
-File Attributes
-~~~~~~~~~~~~~~~
-
-Currently, lld::File just has a path and a way to iterate its atoms. We will
-need to add more attributes on a File. For example, some equivalent to the
-target triple. There is also a number of cached or computed attributes that
-could make various Passes more efficient. For instance, on Darwin there are a
-number of Objective-C optimizations that can be done by a Pass. But it would
-improve the plain C case if the Objective-C optimization Pass did not have to
-scan all atoms looking for any Objective-C data structures. This could be done
-if the lld::File object had an attribute that said if the file had any
-Objective-C data in it. The Resolving phase would then be required to "merge"
-that attribute as object files are added.
diff --git a/lld/docs/development.rst b/lld/docs/development.rst
deleted file mode 100644
index 81b826f64835..000000000000
--- a/lld/docs/development.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-.. _development:
-
-Development
-===========
-
-Note: this document discuss Mach-O port of LLD. For ELF and COFF,
-see :doc:`index`.
-
-lld is developed as part of the `LLVM <https://llvm.org>`_ project.
-
-Creating a Reader
------------------
-
-See the :ref:`Creating a Reader <Readers>` guide.
-
-
-Modifying the Driver
---------------------
-
-See :doc:`Driver`.
-
-
-Debugging
----------
-
-You can run lld with ``-mllvm -debug`` command line options to enable debugging
-printouts. If you want to enable debug information for some specific pass, you
-can run it with ``-mllvm '-debug-only=<pass>'``, where pass is a name used in
-the ``DEBUG_WITH_TYPE()`` macro.
-
-
-
-Documentation
--------------
-
-The project documentation is written in reStructuredText and generated using the
-`Sphinx <http://sphinx.pocoo.org/>`_ documentation generator. For more
-information on writing documentation for the project, see the
-:ref:`sphinx_intro`.
-
-.. toctree::
- :hidden:
-
- Readers
- Driver
diff --git a/lld/docs/getting_started.rst b/lld/docs/getting_started.rst
deleted file mode 100644
index 506cb24dde84..000000000000
--- a/lld/docs/getting_started.rst
+++ /dev/null
@@ -1,87 +0,0 @@
-.. _getting_started:
-
-Getting Started: Building and Running lld
-=========================================
-
-This page gives you the shortest path to checking out and building lld. If you
-run into problems, please file bugs in the `LLVM Bugzilla`__
-
-__ https://bugs.llvm.org/
-
-Building lld
-------------
-
-On Unix-like Systems
-~~~~~~~~~~~~~~~~~~~~
-
-1. Get the required tools.
-
- * `CMake 2.8`_\+.
- * make (or any build system CMake supports).
- * `Clang 3.1`_\+ or GCC 4.7+ (C++11 support is required).
-
- * If using Clang, you will also need `libc++`_.
- * `Python 2.4`_\+ (not 3.x) for running tests.
-
-.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html
-.. _Clang 3.1: http://clang.llvm.org/
-.. _libc++: http://libcxx.llvm.org/
-.. _Python 2.4: http://python.org/download/
-
-2. Check out LLVM and subprojects (including lld)::
-
- $ git clone https://github.com/llvm/llvm-project.git
-
-4. Build LLVM and lld::
-
- $ cd llvm-project
- $ mkdir build && cd build
- $ cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS=lld ../llvm
- $ make
-
- * If you want to build with clang and it is not the default compiler or
- it is installed in an alternate location, you'll need to tell the cmake tool
- the location of the C and C++ compiler via CMAKE_C_COMPILER and
- CMAKE_CXX_COMPILER. For example::
-
- $ cmake -DCMAKE_CXX_COMPILER=/path/to/clang++ -DCMAKE_C_COMPILER=/path/to/clang ...
-
-5. Test::
-
- $ make check-lld
-
-Using Visual Studio
-~~~~~~~~~~~~~~~~~~~
-
-#. Get the required tools.
-
- * `CMake 2.8`_\+.
- * `Visual Studio 12 (2013) or later`_ (required for C++11 support)
- * `Python 2.4`_\+ (not 3.x) for running tests.
-
-.. _CMake 2.8: http://www.cmake.org/cmake/resources/software.html
-.. _Visual Studio 12 (2013) or later: http://www.microsoft.com/visualstudio/11/en-us
-.. _Python 2.4: http://python.org/download/
-
-#. Check out LLVM as above.
-
-#. Generate Visual Studio project files::
-
- $ cd llvm-project/build (out of source build required)
- $ cmake -G "Visual Studio 11" -DLLVM_ENABLE_PROJECTS=lld ../llvm
-
-#. Build
-
- * Open LLVM.sln in Visual Studio.
- * Build the ``ALL_BUILD`` target.
-
-#. Test
-
- * Build the ``lld-test`` target.
-
-More Information
-~~~~~~~~~~~~~~~~
-
-For more information on using CMake see the `LLVM CMake guide`_.
-
-.. _LLVM CMake guide: https://llvm.org/docs/CMake.html
diff --git a/lld/docs/index.rst b/lld/docs/index.rst
index 40da6d77cca8..4b42abadb94a 100644
--- a/lld/docs/index.rst
+++ b/lld/docs/index.rst
@@ -10,9 +10,7 @@ WebAssembly in descending order of completeness. Internally, LLD consists of
several different linkers. The ELF port is the one that will be described in
this document. The PE/COFF port is complete, including
Windows debug info (PDB) support. The WebAssembly port is still a work in
-progress (See :doc:`WebAssembly`). The Mach-O port is built based on a
-different architecture than the others. For the details about Mach-O, please
-read :doc:`AtomLLD`.
+progress (See :doc:`WebAssembly`).
Features
--------
@@ -170,7 +168,6 @@ document soon.
:maxdepth: 1
NewLLD
- AtomLLD
WebAssembly
windows_support
missingkeyfunction
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index ba3b0779e699..843f4a1cc282 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -85,6 +85,9 @@ flag.
.It Fl Bsymbolic-functions
Bind default visibility defined function symbols locally for
.Fl shared.
+.It Fl Bsymbolic-non-weak-functions
+Bind default visibility defined STB_GLOBAL function symbols locally for
+.Fl shared.
.It Fl -build-id Ns = Ns Ar value
Generate a build ID note.
.Ar value
@@ -164,10 +167,9 @@ Specify the dynamic linker to be used for a dynamically linked executable.
This is recorded in an ELF segment of type
.Dv PT_INTERP .
.It Fl -dynamic-list Ns = Ns Ar file
-Read a list of dynamic symbols from
-.Ar file .
-(executable) Put matched non-local defined symbols to the dynamic symbol table.
-(shared object) References to matched non-local STV_DEFAULT symbols shouldn't be bound to definitions within the shared object. Implies
+Similar to
+.Cm --export-dynamic-symbol-list .
+When creating a shared object, implies
.Cm -Bsymbolic
but does not set DF_SYMBOLIC
.It Fl -EB
@@ -222,6 +224,12 @@ Put symbols in the dynamic symbol table.
.Cm -Bsymbolic-functions
or
.Cm --dynamic-list
+.It Fl -export-dynamic-symbol-list Ns = Ns Ar file
+Read a list of dynamic symbol patterns from
+.Ar file .
+Apply
+.Cm --export-dynamic-symbol
+on each pattern.
.It Fl -fatal-warnings
Treat warnings as errors.
.It Fl -filter Ns = Ns Ar value , Fl F Ar value
@@ -337,7 +345,7 @@ Page align sections.
.It Fl -no-omagic
Do not set the text data sections to be writable, page align sections.
.It Fl -no-relax
-Disable target-specific relaxations. This is currently a no-op.
+Disable target-specific relaxations. For x86-64 this disables R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX GOT optimization.
.It Fl -no-rosegment
Do not put read-only non-executable sections in their own segment.
.It Fl -no-undefined-version
@@ -650,6 +658,8 @@ linkers, and may be removed in the future.
Report unresolved symbols as warnings.
.It Fl -whole-archive
Force load of all members in a static library.
+.It Fl -why-extract Ns = Ns Ar file
+Print to a file about why archive members are extracted.
.It Fl -wrap Ns = Ns Ar symbol
Redirect
.Ar symbol
diff --git a/lld/docs/open_projects.rst b/lld/docs/open_projects.rst
deleted file mode 100644
index 36edca4e96dc..000000000000
--- a/lld/docs/open_projects.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-.. _open_projects:
-
-Open Projects
-=============
-
-Documentation TODOs
-~~~~~~~~~~~~~~~~~~~
-
-.. todolist::
diff --git a/lld/docs/sphinx_intro.rst b/lld/docs/sphinx_intro.rst
deleted file mode 100644
index b671cdc3df64..000000000000
--- a/lld/docs/sphinx_intro.rst
+++ /dev/null
@@ -1,127 +0,0 @@
-.. _sphinx_intro:
-
-Sphinx Introduction for LLVM Developers
-=======================================
-
-This document is intended as a short and simple introduction to the Sphinx
-documentation generation system for LLVM developers.
-
-Quickstart
-----------
-
-To get started writing documentation, you will need to:
-
- 1. Have the Sphinx tools :ref:`installed <installing_sphinx>`.
-
- 2. Understand how to :ref:`build the documentation
- <building_the_documentation>`.
-
- 3. Start :ref:`writing documentation <writing_documentation>`!
-
-.. _installing_sphinx:
-
-Installing Sphinx
-~~~~~~~~~~~~~~~~~
-
-You should be able to install Sphinx using the standard Python package
-installation tool ``easy_install``, as follows::
-
- $ sudo easy_install sphinx
- Searching for sphinx
- Reading http://pypi.python.org/simple/sphinx/
- Reading http://sphinx.pocoo.org/
- Best match: Sphinx 1.1.3
- ... more lines here ..
-
-If you do not have root access (or otherwise want to avoid installing Sphinx in
-system directories) see the section on :ref:`installing_sphinx_in_a_venv` .
-
-If you do not have the ``easy_install`` tool on your system, you should be able
-to install it using:
-
- Linux
- Use your distribution's standard package management tool to install it,
- i.e., ``apt-get install easy_install`` or ``yum install easy_install``.
-
- macOS
- All modern macOS systems come with ``easy_install`` as part of the base
- system.
-
- Windows
- See the `setuptools <http://pypi.python.org/pypi/setuptools>`_ package web
- page for instructions.
-
-
-.. _building_the_documentation:
-
-Building the documentation
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In order to build the documentation need to add ``-DLLVM_ENABLE_SPHINX=ON`` to
-your ``cmake`` command. Once you do this you can build the docs using
-``docs-lld-html`` build (``ninja`` or ``make``) target.
-
-That build target will invoke ``sphinx-build`` with the appropriate options for
-the project, and generate the HTML documentation in a ``tools/lld/docs/html``
-subdirectory.
-
-.. _writing_documentation:
-
-Writing documentation
-~~~~~~~~~~~~~~~~~~~~~
-
-The documentation itself is written in the reStructuredText (ReST) format, and
-Sphinx defines additional tags to support features like cross-referencing.
-
-The ReST format itself is organized around documents mostly being readable
-plaintext documents. You should generally be able to write new documentation
-easily just by following the style of the existing documentation.
-
-If you want to understand the formatting of the documents more, the best place
-to start is Sphinx's own `ReST Primer <http://sphinx.pocoo.org/rest.html>`_.
-
-
-Learning More
--------------
-
-If you want to learn more about the Sphinx system, the best place to start is
-the Sphinx documentation itself, available `here
-<http://sphinx.pocoo.org/contents.html>`_.
-
-
-.. _installing_sphinx_in_a_venv:
-
-Installing Sphinx in a Virtual Environment
-------------------------------------------
-
-Most Python developers prefer to work with tools inside a *virtualenv* (virtual
-environment) instance, which functions as an application sandbox. This avoids
-polluting your system installation with different packages used by various
-projects (and ensures that dependencies for different packages don't conflict
-with one another). Of course, you need to first have the virtualenv software
-itself which generally would be installed at the system level::
-
- $ sudo easy_install virtualenv
-
-but after that you no longer need to install additional packages in the system
-directories.
-
-Once you have the *virtualenv* tool itself installed, you can create a
-virtualenv for Sphinx using::
-
- $ virtualenv ~/my-sphinx-install
- New python executable in /Users/dummy/my-sphinx-install/bin/python
- Installing setuptools............done.
- Installing pip...............done.
-
- $ ~/my-sphinx-install/bin/easy_install sphinx
- ... install messages here ...
-
-and from now on you can "activate" the *virtualenv* using::
-
- $ source ~/my-sphinx-install/bin/activate
-
-which will change your PATH to ensure the sphinx-build tool from inside the
-virtual environment will be used. See the `virtualenv website
-<http://www.virtualenv.org/en/latest/index.html>`_ for more information on using
-virtual environments.
diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h
index 95d92f3594ba..d95a2537c1f2 100644
--- a/lld/include/lld/Common/ErrorHandler.h
+++ b/lld/include/lld/Common/ErrorHandler.h
@@ -109,7 +109,7 @@ public:
void error(const Twine &msg, ErrorTag tag, ArrayRef<StringRef> args);
[[noreturn]] void fatal(const Twine &msg);
void log(const Twine &msg);
- void message(const Twine &msg);
+ void message(const Twine &msg, llvm::raw_ostream &s);
void warn(const Twine &msg);
void reset() {
@@ -124,6 +124,8 @@ private:
using Colors = raw_ostream::Colors;
std::string getLocation(const Twine &msg);
+ void reportDiagnostic(StringRef location, Colors c, StringRef diagKind,
+ const Twine &msg);
};
/// Returns the default error handler.
@@ -135,7 +137,9 @@ inline void error(const Twine &msg, ErrorTag tag, ArrayRef<StringRef> args) {
}
[[noreturn]] inline void fatal(const Twine &msg) { errorHandler().fatal(msg); }
inline void log(const Twine &msg) { errorHandler().log(msg); }
-inline void message(const Twine &msg) { errorHandler().message(msg); }
+inline void message(const Twine &msg, llvm::raw_ostream &s = outs()) {
+ errorHandler().message(msg, s);
+}
inline void warn(const Twine &msg) { errorHandler().warn(msg); }
inline uint64_t errorCount() { return errorHandler().errorCount; }
diff --git a/lld/include/lld/Common/LLVM.h b/lld/include/lld/Common/LLVM.h
index c19364ad9f6c..6872adf8d00f 100644
--- a/lld/include/lld/Common/LLVM.h
+++ b/lld/include/lld/Common/LLVM.h
@@ -45,7 +45,6 @@ class WasmSymbol;
namespace wasm {
struct WasmTag;
-struct WasmTagType;
struct WasmFunction;
struct WasmGlobal;
struct WasmGlobalType;
@@ -97,7 +96,6 @@ using llvm::wasm::WasmSignature;
using llvm::wasm::WasmTable;
using llvm::wasm::WasmTableType;
using llvm::wasm::WasmTag;
-using llvm::wasm::WasmTagType;
} // end namespace lld.
namespace std {
diff --git a/lld/include/lld/Common/Timer.h b/lld/include/lld/Common/Timer.h
index b37388cd38c4..7aca966f663c 100644
--- a/lld/include/lld/Common/Timer.h
+++ b/lld/include/lld/Common/Timer.h
@@ -38,7 +38,8 @@ class Timer {
public:
Timer(llvm::StringRef name, Timer &parent);
- static Timer &root();
+ // Creates the root timer.
+ explicit Timer(llvm::StringRef name);
void addToTotal(std::chrono::nanoseconds time) { total += time.count(); }
void print();
@@ -46,7 +47,6 @@ public:
double millis() const;
private:
- explicit Timer(llvm::StringRef name);
void print(int depth, double totalDuration, bool recurse = true) const;
std::atomic<std::chrono::nanoseconds::rep> total;
diff --git a/lld/lib/Core/Resolver.cpp b/lld/lib/Core/Resolver.cpp
index 17a46056f00c..1ed0b1c6e618 100644
--- a/lld/lib/Core/Resolver.cpp
+++ b/lld/lib/Core/Resolver.cpp
@@ -380,11 +380,9 @@ void Resolver::deadStripOptimize() {
markLive(dsrAtom);
// now remove all non-live atoms from _atoms
- _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(),
- [&](OwningAtomPtr<Atom> &a) {
- return _liveAtoms.count(a.get()) == 0;
- }),
- _atoms.end());
+ llvm::erase_if(_atoms, [&](OwningAtomPtr<Atom> &a) {
+ return _liveAtoms.count(a.get()) == 0;
+ });
}
// error out if some undefines remain
@@ -396,10 +394,8 @@ bool Resolver::checkUndefines() {
std::vector<const UndefinedAtom *> undefinedAtoms = _symbolTable.undefines();
if (_ctx.deadStrip()) {
// When dead code stripping, we don't care if dead atoms are undefined.
- undefinedAtoms.erase(
- std::remove_if(undefinedAtoms.begin(), undefinedAtoms.end(),
- [&](const Atom *a) { return _liveAtoms.count(a) == 0; }),
- undefinedAtoms.end());
+ llvm::erase_if(undefinedAtoms,
+ [&](const Atom *a) { return _liveAtoms.count(a) == 0; });
}
if (undefinedAtoms.empty())
@@ -440,12 +436,9 @@ void Resolver::removeCoalescedAwayAtoms() {
DEBUG_WITH_TYPE("resolver",
llvm::dbgs() << "******** Removing coalesced away atoms:\n");
ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms");
- _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(),
- [&](OwningAtomPtr<Atom> &a) {
- return _symbolTable.isCoalescedAway(a.get()) ||
- _deadAtoms.count(a.get());
- }),
- _atoms.end());
+ llvm::erase_if(_atoms, [&](OwningAtomPtr<Atom> &a) {
+ return _symbolTable.isCoalescedAway(a.get()) || _deadAtoms.count(a.get());
+ });
}
bool Resolver::resolve() {
diff --git a/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/lld/lib/ReaderWriter/MachO/GOTPass.cpp
index 10e611c1bd2b..9cb5ab5eab12 100644
--- a/lld/lib/ReaderWriter/MachO/GOTPass.cpp
+++ b/lld/lib/ReaderWriter/MachO/GOTPass.cpp
@@ -121,7 +121,7 @@ private:
}
}
- // Sort and add all created GOT Atoms to master file
+ // Sort and add all created GOT Atoms to merged file
std::vector<const GOTEntryAtom *> entries;
entries.reserve(_targetToGOT.size());
for (auto &it : _targetToGOT)
diff --git a/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/lld/lib/ReaderWriter/MachO/ShimPass.cpp
index a5b34cfe8de6..4c62ef9d330f 100644
--- a/lld/lib/ReaderWriter/MachO/ShimPass.cpp
+++ b/lld/lib/ReaderWriter/MachO/ShimPass.cpp
@@ -78,7 +78,7 @@ public:
return (l->name() < r->name());
});
- // Add all shims to master file.
+ // Add all shims to merged file.
for (const DefinedAtom *shim : shims)
mergedFile.addAtom(*shim);
diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp
index 5d6142f5fd35..bfd4370d3f2d 100644
--- a/lld/tools/lld/lld.cpp
+++ b/lld/tools/lld/lld.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/Process.h"
#include <cstdlib>
using namespace lld;
@@ -199,6 +200,7 @@ static unsigned inTestVerbosity() {
int main(int argc, const char **argv) {
InitLLVM x(argc, argv);
+ sys::Process::UseANSIEscapeCodes(true);
// Not running in lit tests, just take the shortest codepath with global
// exception handling and no memory cleanup on exit.
diff --git a/lldb/bindings/interface/SBDebugger.i b/lldb/bindings/interface/SBDebugger.i
index cf4411980cc3..aae72dd51394 100644
--- a/lldb/bindings/interface/SBDebugger.i
+++ b/lldb/bindings/interface/SBDebugger.i
@@ -479,6 +479,8 @@ public:
lldb::SBTypeSynthetic
GetSyntheticForType (lldb::SBTypeNameSpecifier);
+ SBStructuredData GetScriptInterpreterInfo(ScriptLanguage);
+
STRING_EXTENSION(SBDebugger)
%feature("docstring",
diff --git a/lldb/bindings/interface/SBMemoryRegionInfo.i b/lldb/bindings/interface/SBMemoryRegionInfo.i
index 3460dc0d06e2..0316c5a5be1d 100644
--- a/lldb/bindings/interface/SBMemoryRegionInfo.i
+++ b/lldb/bindings/interface/SBMemoryRegionInfo.i
@@ -20,6 +20,9 @@ public:
SBMemoryRegionInfo (const lldb::SBMemoryRegionInfo &rhs);
+ SBMemoryRegionInfo::SBMemoryRegionInfo(const char *name, lldb::addr_t begin,
+ lldb::addr_t end, uint32_t permissions, bool mapped, bool stack_memory);
+
~SBMemoryRegionInfo ();
void
diff --git a/lldb/bindings/interface/SBMemoryRegionInfoList.i b/lldb/bindings/interface/SBMemoryRegionInfoList.i
index c2e74f1cd0dc..009751277542 100644
--- a/lldb/bindings/interface/SBMemoryRegionInfoList.i
+++ b/lldb/bindings/interface/SBMemoryRegionInfoList.i
@@ -25,6 +25,9 @@ public:
GetSize () const;
bool
+ GetMemoryRegionContainingAddress (lldb::addr_t addr, SBMemoryRegionInfo &region_info);
+
+ bool
GetMemoryRegionAtIndex (uint32_t idx, SBMemoryRegionInfo &region_info);
void
diff --git a/lldb/bindings/interface/SBSymbolContextList.i b/lldb/bindings/interface/SBSymbolContextList.i
index e9d4aa8d62db..14566b3e3720 100644
--- a/lldb/bindings/interface/SBSymbolContextList.i
+++ b/lldb/bindings/interface/SBSymbolContextList.i
@@ -14,7 +14,7 @@ namespace lldb {
For example (from test/python_api/target/TestTargetAPI.py), ::
def find_functions(self, exe_name):
- '''Exercise SBTaget.FindFunctions() API.'''
+ '''Exercise SBTarget.FindFunctions() API.'''
exe = os.path.join(os.getcwd(), exe_name)
# Create a target by the debugger.
diff --git a/lldb/bindings/interface/SBType.i b/lldb/bindings/interface/SBType.i
index 500bc99ca8cd..d6e8db3ab428 100644
--- a/lldb/bindings/interface/SBType.i
+++ b/lldb/bindings/interface/SBType.i
@@ -837,6 +837,21 @@ public:
lldb::SBTypeMemberFunction
GetMemberFunctionAtIndex (uint32_t idx);
+ %feature("docstring",
+ "Returns true if the type is completely defined.
+
+ Language-specific behaviour:
+
+ * C: Returns false for struct types that were only forward declared in the
+ type's `SBTarget`/`SBModule`. Otherwise returns true.
+ * C++: Returns false for template/non-template struct/class types and
+ scoped enums that were only forward declared inside the type's
+ `SBTarget`/`SBModule`. Otherwise returns true.
+ * Objective-C: Follows the same behavior as C for struct types. Objective-C
+ classes are considered complete unless they were only forward declared via
+ ``@class ClassName`` in the type's `SBTarget`/`SBModule`. Otherwise
+ returns true.
+ ") IsTypeComplete;
bool
IsTypeComplete ();
diff --git a/lldb/bindings/lua/lua-typemaps.swig b/lldb/bindings/lua/lua-typemaps.swig
index d912137a5674..e3b3f5718d15 100644
--- a/lldb/bindings/lua/lua-typemaps.swig
+++ b/lldb/bindings/lua/lua-typemaps.swig
@@ -12,7 +12,7 @@
// Primitive integer mapping
%typemap(in,checkfn="lua_isinteger") TYPE
-%{ $1 = (TYPE)lua_tointeger(L, $input); %}
+%{ $1 = ($type)lua_tointeger(L, $input); %}
%typemap(in,checkfn="lua_isinteger") const TYPE&($basetype temp)
%{ temp=($basetype)lua_tointeger(L,$input); $1=&temp;%}
%typemap(out) TYPE
@@ -54,6 +54,7 @@ LLDB_NUMBER_TYPEMAP(signed long);
LLDB_NUMBER_TYPEMAP(long long);
LLDB_NUMBER_TYPEMAP(unsigned long long);
LLDB_NUMBER_TYPEMAP(signed long long);
+LLDB_NUMBER_TYPEMAP(enum SWIGTYPE);
%apply unsigned long { size_t };
%apply const unsigned long & { const size_t & };
@@ -77,7 +78,7 @@ LLDB_NUMBER_TYPEMAP(signed long long);
%typemap(in) (char *dst, size_t dst_len) {
$2 = luaL_checkinteger(L, $input);
if ($2 <= 0) {
- return luaL_error(L, "Positive integer expected");
+ return luaL_error(L, "Positive integer expected");
}
$1 = (char *) malloc($2);
}
@@ -86,6 +87,9 @@ LLDB_NUMBER_TYPEMAP(signed long long);
// as char data instead of byte data.
%typemap(in) (void *char_buf, size_t size) = (char *dst, size_t dst_len);
+// Also SBProcess::ReadMemory.
+%typemap(in) (void *buf, size_t size) = (char *dst, size_t dst_len);
+
// Return the char buffer. Discarding any previous return result
%typemap(argout) (char *dst, size_t dst_len) {
lua_pop(L, 1); // Blow away the previous result
@@ -102,4 +106,211 @@ LLDB_NUMBER_TYPEMAP(signed long long);
// as char data instead of byte data.
%typemap(argout) (void *char_buf, size_t size) = (char *dst, size_t dst_len);
+// Also SBProcess::ReadMemory.
+%typemap(argout) (void *buf, size_t size) = (char *dst, size_t dst_len);
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for handling a snprintf-like API like SBThread::GetStopDescription.
+
+%typemap(in) (char *dst_or_null, size_t dst_len) {
+ $2 = luaL_checkinteger(L, $input);
+ if ($2 <= 0) {
+ return luaL_error(L, "Positive integer expected");
+ }
+ $1 = (char *)malloc($2);
+}
+
+%typemap(argout) (char *dst_or_null, size_t dst_len) {
+ lua_pop(L, 1); // Blow away the previous result
+ lua_pushlstring(L, (const char *)$1, $result);
+ free($1);
+ // SWIG_arg was already incremented
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for handling SBModule::GetVersion
+
+%typemap(in) (uint32_t *versions, uint32_t num_versions) {
+ $2 = 99;
+ $1 = (uint32_t *)malloc(sizeof(uint32_t) * $2);
+}
+
+%typemap(argout) (uint32_t *versions, uint32_t num_versions) {
+ uint32_t count = result;
+ if (count >= $2)
+ count = $2;
+ lua_newtable(L);
+ int i = 0;
+ while (i++ < count) {
+ lua_pushinteger(L, $1[i - 1]);
+ lua_seti(L, -2, i);
+ }
+ SWIG_arg++;
+ free($1);
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for handling SBDebugger::SetLoggingCallback
+
+%typemap(in) (lldb::LogOutputCallback log_callback, void *baton) {
+ $1 = LLDBSwigLuaCallLuaLogOutputCallback;
+ $2 = (void *)L;
+
+ luaL_checktype(L, 2, LUA_TFUNCTION);
+ lua_settop(L, 2);
+
+ lua_pushlightuserdata(L, (void *)&LLDBSwigLuaCallLuaLogOutputCallback);
+ lua_insert(L, 2);
+ lua_settable(L, LUA_REGISTRYINDEX);
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for handling SBEvent::SBEvent(uint32_t event, const char *cstr, uint32_t cstr_len)
+
+%typemap(in) (const char *cstr, uint32_t cstr_len) {
+ $1 = (char *)luaL_checklstring(L, $input, (size_t *)&$2);
+}
+
+// Typemap for handling SBProcess::PutSTDIN
+
+%typemap(in) (const char *src, size_t src_len) {
+ $1 = (char *)luaL_checklstring(L, $input, &$2);
+}
+
+// Typemap for handling SBProcess::WriteMemory, SBTarget::GetInstructions...
+
+%typemap(in) (const void *buf, size_t size),
+ (const void *data, size_t data_len) {
+ $1 = (void *)luaL_checklstring(L, $input, &$2);
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for handling char ** in SBTarget::LaunchSimple, SBTarget::Launch...
+
+// It should accept a Lua table of strings, for stuff like "argv" and "envp".
+
+%typemap(in) char ** {
+ if (lua_istable(L, $input)) {
+ size_t size = lua_rawlen(L, $input);
+ $1 = (char **)malloc((size + 1) * sizeof(char *));
+ int i = 0, j = 0;
+ while (i++ < size) {
+ lua_rawgeti(L, $input, i);
+ if (!lua_isstring(L, -1)) {
+ // if current element cannot be converted to string, raise an error
+ lua_pop(L, 1);
+ return luaL_error(L, "List should only contain strings");
+ }
+ $1[j++] = (char *)lua_tostring(L, -1);
+ lua_pop(L, 1);
+ }
+ $1[j] = 0;
+ } else if (lua_isnil(L, $input)) {
+ // "nil" is also acceptable, equivalent as an empty table
+ $1 = NULL;
+ } else {
+ return luaL_error(L, "A list of strings expected");
+ }
+}
+
+%typemap(freearg) char ** {
+ free((char *) $1);
+}
+
+%typecheck(SWIG_TYPECHECK_STRING_ARRAY) char ** {
+ $1 = (lua_istable(L, $input) || lua_isnil(L, $input));
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for file handles (e.g. used in SBDebugger::SetOutputFile)
+
+%typemap(in) lldb::FileSP {
+ luaL_Stream *p = (luaL_Stream *)luaL_checkudata(L, $input, LUA_FILEHANDLE);
+ lldb::FileSP file_sp;
+ file_sp = std::make_shared<lldb_private::NativeFile>(p->f, false);
+ if (!file_sp->IsValid())
+ return luaL_error(L, "Invalid file");
+ $1 = file_sp;
+}
+
+%typecheck(SWIG_TYPECHECK_POINTER) lldb::FileSP {
+ $1 = (lua_isuserdata(L, $input)) &&
+ (luaL_testudata(L, $input, LUA_FILEHANDLE) != nullptr);
+}
+
+// Typemap for file handles (e.g. used in SBDebugger::GetOutputFileHandle)
+
+%typemap(out) lldb::FileSP {
+ lldb::FileSP &sp = $1;
+ if (sp && sp->IsValid()) {
+ luaL_Stream *p = (luaL_Stream *)lua_newuserdata(L, sizeof(luaL_Stream));
+ p->closef = &LLDBSwigLuaCloseFileHandle;
+ p->f = sp->GetStream();
+ luaL_setmetatable(L, LUA_FILEHANDLE);
+ SWIG_arg++;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for SBData::CreateDataFromUInt64Array, SBData::SetDataFromUInt64Array ...
+
+%typemap(in) (uint64_t* array, size_t array_len),
+ (uint32_t* array, size_t array_len),
+ (int64_t* array, size_t array_len),
+ (int32_t* array, size_t array_len),
+ (double* array, size_t array_len) {
+ if (lua_istable(L, $input)) {
+ // It should accept a table of numbers.
+ $2 = lua_rawlen(L, $input);
+ $1 = ($1_ltype)malloc(($2) * sizeof($*1_type));
+ int i = 0, j = 0;
+ while (i++ < $2) {
+ lua_rawgeti(L, $input, i);
+ if (!lua_isnumber(L, -1)) {
+ // if current element cannot be converted to number, raise an error
+ lua_pop(L, 1);
+ return luaL_error(L, "List should only contain numbers");
+ }
+ $1[j++] = ($*1_ltype)lua_tonumber(L, -1);
+ lua_pop(L, 1);
+ }
+ } else if (lua_isnil(L, $input)) {
+ // "nil" is also acceptable, equivalent as an empty table
+ $1 = NULL;
+ $2 = 0;
+ } else {
+ // else raise an error
+ return luaL_error(L, "A list of numbers expected.");
+ }
+}
+
+%typemap(freearg) (uint64_t* array, size_t array_len),
+ (uint32_t* array, size_t array_len),
+ (int64_t* array, size_t array_len),
+ (int32_t* array, size_t array_len),
+ (double* array, size_t array_len) {
+ free($1);
+}
+
+//===----------------------------------------------------------------------===//
+
+// Typemap for SBCommandReturnObject::PutCString
+
+%typemap(in) (const char *string, int len) {
+ if (lua_isnil(L, $input)) {
+ $1 = NULL;
+ $2 = 0;
+ }
+ else {
+ $1 = (char *)luaL_checklstring(L, $input, (size_t *)&$2);
+ }
+}
+
//===----------------------------------------------------------------------===//
diff --git a/lldb/bindings/lua/lua-wrapper.swig b/lldb/bindings/lua/lua-wrapper.swig
index e070bae23683..c51911bb6bf7 100644
--- a/lldb/bindings/lua/lua-wrapper.swig
+++ b/lldb/bindings/lua/lua-wrapper.swig
@@ -6,6 +6,19 @@ PushSBClass(lua_State* L, T* obj);
%}
+%runtime %{
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void LLDBSwigLuaCallLuaLogOutputCallback(const char *str, void *baton);
+int LLDBSwigLuaCloseFileHandle(lua_State *L);
+
+#ifdef __cplusplus
+}
+#endif
+%}
+
%wrapper %{
// This function is called from Lua::CallBreakpointCallback
@@ -88,5 +101,20 @@ LLDBSwigLuaWatchpointCallbackFunction
return stop;
}
+SWIGEXPORT void
+LLDBSwigLuaCallLuaLogOutputCallback(const char *str, void *baton) {
+ lua_State *L = (lua_State *)baton;
+
+ lua_pushlightuserdata(L, (void *)&LLDBSwigLuaCallLuaLogOutputCallback);
+ lua_gettable(L, LUA_REGISTRYINDEX);
+
+ // FIXME: There's no way to report errors back to the user
+ lua_pushstring(L, str);
+ lua_pcall(L, 1, 0, 0);
+}
+
+int LLDBSwigLuaCloseFileHandle(lua_State *L) {
+ return luaL_error(L, "You cannot close a file handle used by lldb.");
+}
%}
diff --git a/lldb/bindings/lua/lua.swig b/lldb/bindings/lua/lua.swig
index c702e4964081..21fa44c8b4d8 100644
--- a/lldb/bindings/lua/lua.swig
+++ b/lldb/bindings/lua/lua.swig
@@ -17,6 +17,10 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "../bindings/lua/lua-swigsafecast.swig"
+
+// required headers for typemaps
+#include "lldb/Host/File.h"
+
using namespace lldb_private;
using namespace lldb;
%}
diff --git a/lldb/bindings/python/lldb-python b/lldb/bindings/python/lldb-python
new file mode 100755
index 000000000000..3bb3b332d852
--- /dev/null
+++ b/lldb/bindings/python/lldb-python
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+import subprocess
+import os
+import sys
+import json
+
+lldb = os.path.join(os.path.dirname(__file__), 'lldb')
+
+info_json = subprocess.run([lldb, "-l", "python", "-print-script-interpreter-info"],
+ check=True, stdout=subprocess.PIPE, encoding='utf8').stdout
+info = json.loads(info_json)
+
+os.environ["PYTHONPATH"] = (
+ info["lldb-pythonpath"] + os.path.pathsep + os.environ.get("PYTHONPATH", ""))
+
+os.execl(info["executable"], info["executable"], *sys.argv[1:])
diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig
index 091fc29b1057..aa2bcfb8c8ae 100644
--- a/lldb/bindings/python/python-swigsafecast.swig
+++ b/lldb/bindings/python/python-swigsafecast.swig
@@ -1,161 +1,72 @@
-// leaving this undefined ensures we will get a linker error if we try to use SBTypeToSWIGWrapper()
-// for a type for which we did not specialze this function
-template <typename SBClass>
-PyObject*
-SBTypeToSWIGWrapper (SBClass* sb_object);
-
-template <typename SBClass>
-PyObject*
-SBTypeToSWIGWrapper (SBClass& sb_object)
-{
- return SBTypeToSWIGWrapper(&sb_object);
-}
-
-template <typename SBClass>
-PyObject*
-SBTypeToSWIGWrapper (const SBClass& sb_object)
-{
- return SBTypeToSWIGWrapper(&sb_object);
-}
-
-template <>
-PyObject*
-SBTypeToSWIGWrapper (PyObject* py_object)
-{
- return py_object;
-}
-
-template <>
-PyObject*
-SBTypeToSWIGWrapper (unsigned int* c_int)
-{
- if (!c_int)
- return NULL;
- return PyInt_FromLong(*c_int);
-}
-
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBEvent* event_sb)
-{
- return SWIG_NewPointerObj((void *) event_sb, SWIGTYPE_p_lldb__SBEvent, 0);
-}
-
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBProcess* process_sb)
-{
- return SWIG_NewPointerObj((void *) process_sb, SWIGTYPE_p_lldb__SBProcess, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBEvent &event_sb) {
+ return SWIG_NewPointerObj(&event_sb, SWIGTYPE_p_lldb__SBEvent, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBThread* thread_sb)
-{
- return SWIG_NewPointerObj((void *) thread_sb, SWIGTYPE_p_lldb__SBThread, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBProcess &process_sb) {
+ return SWIG_NewPointerObj(&process_sb, SWIGTYPE_p_lldb__SBProcess, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBThreadPlan* thread_plan_sb)
-{
- return SWIG_NewPointerObj((void *) thread_plan_sb, SWIGTYPE_p_lldb__SBThreadPlan, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBThread &thread_sb) {
+ return SWIG_NewPointerObj(&thread_sb, SWIGTYPE_p_lldb__SBThread, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBTarget* target_sb)
-{
- return SWIG_NewPointerObj((void *) target_sb, SWIGTYPE_p_lldb__SBTarget, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBThreadPlan &thread_plan_sb) {
+ return SWIG_NewPointerObj(&thread_plan_sb, SWIGTYPE_p_lldb__SBThreadPlan, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBFrame* frame_sb)
-{
- return SWIG_NewPointerObj((void *) frame_sb, SWIGTYPE_p_lldb__SBFrame, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBTarget &target_sb) {
+ return SWIG_NewPointerObj(&target_sb, SWIGTYPE_p_lldb__SBTarget, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBDebugger* debugger_sb)
-{
- return SWIG_NewPointerObj((void *) debugger_sb, SWIGTYPE_p_lldb__SBDebugger, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBFrame &frame_sb) {
+ return SWIG_NewPointerObj(&frame_sb, SWIGTYPE_p_lldb__SBFrame, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBBreakpoint* breakpoint_sb)
-{
- return SWIG_NewPointerObj((void *) breakpoint_sb, SWIGTYPE_p_lldb__SBBreakpoint, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBDebugger &debugger_sb) {
+ return SWIG_NewPointerObj(&debugger_sb, SWIGTYPE_p_lldb__SBDebugger, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBWatchpoint* watchpoint_sb)
-{
- return SWIG_NewPointerObj((void *) watchpoint_sb, SWIGTYPE_p_lldb__SBWatchpoint, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBBreakpoint &breakpoint_sb) {
+ return SWIG_NewPointerObj(&breakpoint_sb, SWIGTYPE_p_lldb__SBBreakpoint, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBBreakpointLocation* breakpoint_location_sb)
-{
- return SWIG_NewPointerObj((void *) breakpoint_location_sb, SWIGTYPE_p_lldb__SBBreakpointLocation, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBWatchpoint &watchpoint_sb) {
+ return SWIG_NewPointerObj(&watchpoint_sb, SWIGTYPE_p_lldb__SBWatchpoint, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBBreakpointName* breakpoint_name_sb)
-{
- return SWIG_NewPointerObj((void *) breakpoint_name_sb, SWIGTYPE_p_lldb__SBBreakpointName, 0);
+PyObject *
+SBTypeToSWIGWrapper(lldb::SBBreakpointLocation &breakpoint_location_sb) {
+ return SWIG_NewPointerObj(&breakpoint_location_sb,
+ SWIGTYPE_p_lldb__SBBreakpointLocation, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBValue* value_sb)
-{
- return SWIG_NewPointerObj((void *) value_sb, SWIGTYPE_p_lldb__SBValue, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBValue &value_sb) {
+ return SWIG_NewPointerObj(&value_sb, SWIGTYPE_p_lldb__SBValue, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBCommandReturnObject* cmd_ret_obj_sb)
-{
- return SWIG_NewPointerObj((void *) cmd_ret_obj_sb, SWIGTYPE_p_lldb__SBCommandReturnObject, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBCommandReturnObject &cmd_ret_obj_sb) {
+ return SWIG_NewPointerObj(&cmd_ret_obj_sb,
+ SWIGTYPE_p_lldb__SBCommandReturnObject, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBExecutionContext* ctx_sb)
-{
- return SWIG_NewPointerObj((void *) ctx_sb, SWIGTYPE_p_lldb__SBExecutionContext, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBExecutionContext &ctx_sb) {
+ return SWIG_NewPointerObj(&ctx_sb, SWIGTYPE_p_lldb__SBExecutionContext, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBTypeSummaryOptions* summary_options_sb)
-{
- return SWIG_NewPointerObj((void *) summary_options_sb, SWIGTYPE_p_lldb__SBTypeSummaryOptions, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBTypeSummaryOptions &summary_options_sb) {
+ return SWIG_NewPointerObj(&summary_options_sb,
+ SWIGTYPE_p_lldb__SBTypeSummaryOptions, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBStructuredData* structured_data_sb)
-{
- return SWIG_NewPointerObj((void *) structured_data_sb, SWIGTYPE_p_lldb__SBStructuredData, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBStructuredData &structured_data_sb) {
+ return SWIG_NewPointerObj(&structured_data_sb,
+ SWIGTYPE_p_lldb__SBStructuredData, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBSymbolContext* sym_ctx_sb)
-{
- return SWIG_NewPointerObj((void *) sym_ctx_sb, SWIGTYPE_p_lldb__SBSymbolContext, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBSymbolContext &sym_ctx_sb) {
+ return SWIG_NewPointerObj(&sym_ctx_sb, SWIGTYPE_p_lldb__SBSymbolContext, 0);
}
-template <>
-PyObject*
-SBTypeToSWIGWrapper (lldb::SBStream* stream_sb)
-{
- return SWIG_NewPointerObj((void *) stream_sb, SWIGTYPE_p_lldb__SBStream, 0);
+PyObject *SBTypeToSWIGWrapper(lldb::SBStream &stream_sb) {
+ return SWIG_NewPointerObj(&stream_sb, SWIGTYPE_p_lldb__SBStream, 0);
}
diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig
index 4c39e9c2c776..6dc8ca170390 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -1,9 +1,5 @@
%header %{
-template <typename T>
-PyObject *
-SBTypeToSWIGWrapper (T* item);
-
class PyErr_Cleaner
{
public:
@@ -83,8 +79,9 @@ LLDBSwigPythonBreakpointCallbackFunction
if (max_positional_args < 4) {
return pfunc.Call(frame_arg, bp_loc_arg, dict);
} else {
+ // FIXME: SBStructuredData leaked here
lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl);
- PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(args_value));
+ PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value));
return pfunc.Call(frame_arg, bp_loc_arg, args_arg, dict);
}
} ();
@@ -230,12 +227,11 @@ LLDBSwigPythonCreateSyntheticProvider
if (!pfunc.IsAllocated())
Py_RETURN_NONE;
- // I do not want the SBValue to be deallocated when going out of scope because python
- // has ownership of it and will manage memory for this object by itself
+ // FIXME: SBValue leaked here
lldb::SBValue *sb_value = new lldb::SBValue(valobj_sp);
sb_value->SetPreferSyntheticValue(false);
- PythonObject val_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_value));
+ PythonObject val_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*sb_value));
if (!val_arg.IsAllocated())
Py_RETURN_NONE;
@@ -288,7 +284,6 @@ LLDBSwigPythonCreateScriptedProcess
if (python_class_name == NULL || python_class_name[0] == '\0' || !session_dictionary_name)
Py_RETURN_NONE;
-
PyErr_Cleaner py_err_cleaner(true);
auto dict = PythonModule::MainModule().ResolveName<PythonDictionary>(session_dictionary_name);
@@ -300,10 +295,9 @@ LLDBSwigPythonCreateScriptedProcess
return nullptr;
}
- // I do not want the SBTarget to be deallocated when going out of scope
- // because python has ownership of it and will manage memory for this
- // object by itself
- PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(new lldb::SBTarget(target_sp)));
+ // FIXME: SBTarget leaked here
+ PythonObject target_arg(
+ PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBTarget(target_sp)));
if (!target_arg.IsAllocated())
Py_RETURN_NONE;
@@ -323,16 +317,71 @@ LLDBSwigPythonCreateScriptedProcess
PythonObject result = {};
if (arg_info.get().max_positional_args == 2) {
- if (args_impl != nullptr) {
- error_string.assign("args passed, but __init__ does not take an args dictionary");
- Py_RETURN_NONE;
- }
- result = pfunc(target_arg, dict);
- } else if (arg_info.get().max_positional_args >= 3) {
- PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(new lldb::SBStructuredData(args_impl)));
- result = pfunc(target_arg, args_arg, dict);
+ // FIXME: SBStructuredData leaked here
+ PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBStructuredData(args_impl)));
+ result = pfunc(target_arg, args_arg);
} else {
- error_string.assign("wrong number of arguments in __init__, should be 2 or 3 (not including self)");
+ error_string.assign("wrong number of arguments in __init__, should be 2 (not including self)");
+ Py_RETURN_NONE;
+ }
+
+ if (result.IsAllocated())
+ return result.release();
+ Py_RETURN_NONE;
+}
+
+SWIGEXPORT void*
+LLDBSwigPythonCreateScriptedThread
+(
+ const char *python_class_name,
+ const char *session_dictionary_name,
+ const lldb::ProcessSP& process_sp,
+ lldb_private::StructuredDataImpl *args_impl,
+ std::string &error_string
+)
+{
+ if (python_class_name == NULL || python_class_name[0] == '\0' || !session_dictionary_name)
+ Py_RETURN_NONE;
+
+ PyErr_Cleaner py_err_cleaner(true);
+
+ auto dict = PythonModule::MainModule().ResolveName<PythonDictionary>(session_dictionary_name);
+ auto pfunc = PythonObject::ResolveNameWithDictionary<PythonCallable>(python_class_name, dict);
+
+ if (!pfunc.IsAllocated()) {
+ error_string.append("could not find script class: ");
+ error_string.append(python_class_name);
+ return nullptr;
+ }
+
+ // FIXME: This leaks the SBProcess object
+ PythonObject process_arg(
+ PyRefType::Owned,
+ SBTypeToSWIGWrapper(*new lldb::SBProcess(process_sp)));
+
+ if (!process_arg.IsAllocated())
+ Py_RETURN_NONE;
+
+ llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo();
+ if (!arg_info) {
+ llvm::handleAllErrors(
+ arg_info.takeError(),
+ [&](PythonException &E) {
+ error_string.append(E.ReadBacktrace());
+ },
+ [&](const llvm::ErrorInfoBase &E) {
+ error_string.append(E.message());
+ });
+ Py_RETURN_NONE;
+ }
+
+ PythonObject result = {};
+ if (arg_info.get().max_positional_args == 2) {
+ // FIXME: SBStructuredData leaked here
+ PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBStructuredData(args_impl)));
+ result = pfunc(process_arg, args_arg);
+ } else {
+ error_string.assign("wrong number of arguments in __init__, should be 2 (not including self)");
Py_RETURN_NONE;
}
@@ -366,10 +415,10 @@ LLDBSwigPythonCreateScriptedThreadPlan
return nullptr;
}
- // I do not want the SBThreadPlan to be deallocated when going out of scope
- // because python has ownership of it and will manage memory for this
- // object by itself
- PythonObject tp_arg(PyRefType::Owned, SBTypeToSWIGWrapper(new lldb::SBThreadPlan(thread_plan_sp)));
+ // FIXME: SBThreadPlan leaked here
+ PythonObject tp_arg(
+ PyRefType::Owned,
+ SBTypeToSWIGWrapper(*new lldb::SBThreadPlan(thread_plan_sp)));
if (!tp_arg.IsAllocated())
Py_RETURN_NONE;
@@ -395,7 +444,8 @@ LLDBSwigPythonCreateScriptedThreadPlan
}
result = pfunc(tp_arg, dict);
} else if (arg_info.get().max_positional_args >= 3) {
- PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(new lldb::SBStructuredData(args_impl)));
+ // FIXME: SBStructuredData leaked here
+ PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBStructuredData(args_impl)));
result = pfunc(tp_arg, args_arg, dict);
} else {
error_string.assign("wrong number of arguments in __init__, should be 2 or 3 (not including self)");
@@ -477,12 +527,14 @@ LLDBSwigPythonCreateScriptedBreakpointResolver
if (!pfunc.IsAllocated())
return nullptr;
+ // FIXME: SBBreakpoint leaked here
lldb::SBBreakpoint *bkpt_value = new lldb::SBBreakpoint(breakpoint_sp);
- PythonObject bkpt_arg(PyRefType::Owned, SBTypeToSWIGWrapper(bkpt_value));
+ PythonObject bkpt_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*bkpt_value));
+ // FIXME: SBStructuredData leaked here
lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl);
- PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(args_value));
+ PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value));
PythonObject result = pfunc(bkpt_arg, args_arg, dict);
// FIXME: At this point we should check that the class we found supports all the methods
@@ -585,13 +637,14 @@ LLDBSwigPythonCreateScriptedStopHook
return nullptr;
}
+ // FIXME: SBTarget leaked here
lldb::SBTarget *target_val
= new lldb::SBTarget(target_sp);
+ PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*target_val));
- PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(target_val));
-
+ // FIXME: SBStructuredData leaked here
lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl);
- PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(args_value));
+ PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value));
PythonObject result = pfunc(target_arg, args_arg, dict);
@@ -918,6 +971,22 @@ LLDBSWIGPython_CastPyObjectToSBValue
return sb_ptr;
}
+SWIGEXPORT void*
+LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo
+(
+ PyObject* data
+)
+{
+ lldb::SBMemoryRegionInfo* sb_ptr = NULL;
+
+ int valid_cast = SWIG_ConvertPtr(data, (void**)&sb_ptr, SWIGTYPE_p_lldb__SBMemoryRegionInfo, 0);
+
+ if (valid_cast == -1)
+ return NULL;
+
+ return sb_ptr;
+}
+
SWIGEXPORT bool
LLDBSwigPythonCallCommand
(
@@ -940,8 +1009,6 @@ LLDBSwigPythonCallCommand
if (!pfunc.IsAllocated())
return false;
- // pass the pointer-to cmd_retobj_sb or watch the underlying object disappear from under you
- // see comment above for SBCommandReturnObjectReleaser for further details
auto argc = pfunc.GetArgInfo();
if (!argc) {
llvm::consumeError(argc.takeError());
@@ -949,7 +1016,7 @@ LLDBSwigPythonCallCommand
}
PythonObject debugger_arg(PyRefType::Owned, SBTypeToSWIGWrapper(debugger_sb));
PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb));
- PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(&cmd_retobj_sb));
+ PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(cmd_retobj_sb));
if (argc.get().max_positional_args < 5u)
pfunc(debugger_arg, PythonString(args), cmd_retobj_arg, dict);
@@ -981,11 +1048,9 @@ LLDBSwigPythonCallCommandObject
if (!pfunc.IsAllocated())
return false;
- // pass the pointer-to cmd_retobj_sb or watch the underlying object disappear from under you
- // see comment above for SBCommandReturnObjectReleaser for further details
PythonObject debugger_arg(PyRefType::Owned, SBTypeToSWIGWrapper(debugger_sb));
PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb));
- PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(&cmd_retobj_sb));
+ PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(cmd_retobj_sb));
pfunc(debugger_arg, PythonString(args), exe_ctx_arg, cmd_retobj_arg);
@@ -1011,10 +1076,9 @@ LLDBSWIGPythonCreateOSPlugin
if (!pfunc.IsAllocated())
Py_RETURN_NONE;
- // I do not want the SBProcess to be deallocated when going out of scope because python
- // has ownership of it and will manage memory for this object by itself
+ // FIXME: This leaks the SBProcess object
lldb::SBProcess *process_sb = new lldb::SBProcess(process_sp);
- PythonObject process_arg(PyRefType::Owned, SBTypeToSWIGWrapper(process_sb));
+ PythonObject process_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*process_sb));
if (!process_arg.IsAllocated())
Py_RETURN_NONE;
diff --git a/lldb/docs/design/reproducers.rst b/lldb/docs/design/reproducers.rst
index 99e34d812dee..cac8721196d3 100644
--- a/lldb/docs/design/reproducers.rst
+++ b/lldb/docs/design/reproducers.rst
@@ -33,7 +33,7 @@ late to capture initialization of the debugger.
.. code-block:: bash
- > lldb --capture
+ $ lldb --capture
In capture mode, LLDB will keep track of all the information it needs to replay
the current debug session. Most data is captured lazily to limit the impact on
@@ -70,7 +70,7 @@ were passed to LLDB during capture are already part of the reproducer.
.. code-block:: bash
- > lldb --replay /path/to/reproducer
+ $ lldb --replay /path/to/reproducer
During replay LLDB will behave similar to batch mode. The session should be
diff --git a/lldb/docs/design/sbapi.rst b/lldb/docs/design/sbapi.rst
index 676509bbd99e..f4a7ca271be6 100644
--- a/lldb/docs/design/sbapi.rst
+++ b/lldb/docs/design/sbapi.rst
@@ -68,7 +68,7 @@ Like other clang-based tools it requires a compilation database
::
- ./bin/lldb-instr /path/to/lldb/source/API/SBDebugger.cpp
+ $ ./bin/lldb-instr /path/to/lldb/source/API/SBDebugger.cpp
The tool will automatically insert ``LLDB_RECORD`` macros inline, however you
diff --git a/lldb/docs/man/lldb.rst b/lldb/docs/man/lldb.rst
index b75288db380d..10b143cd0de8 100644
--- a/lldb/docs/man/lldb.rst
+++ b/lldb/docs/man/lldb.rst
@@ -111,7 +111,7 @@ COMMANDS
.. option:: --source-quietly
- Tells the debugger to execute this one-line lldb command before any file has been loaded.
+ Tells the debugger not to echo commands while sourcing files or one-line commands provided on the command line.
.. option:: --source <file>
@@ -234,6 +234,10 @@ SCRIPTING
Alias for --script-language
+.. option:: --print-script-interpreter-info
+
+ Prints out a json dictionary with information about the scripting language interpreter.
+
.. option:: --python-path
Prints out the path to the lldb.py file for this version of lldb.
diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h
index ef62141f579d..64081f79205d 100644
--- a/lldb/include/lldb/API/SBDebugger.h
+++ b/lldb/include/lldb/API/SBDebugger.h
@@ -247,6 +247,8 @@ public:
lldb::ScriptLanguage GetScriptingLanguage(const char *script_language_name);
+ SBStructuredData GetScriptInterpreterInfo(ScriptLanguage);
+
static const char *GetVersionString();
static const char *StateAsCString(lldb::StateType state);
diff --git a/lldb/include/lldb/API/SBExecutionContext.h b/lldb/include/lldb/API/SBExecutionContext.h
index 06ece6fbc0fa..70fc83e84e4e 100644
--- a/lldb/include/lldb/API/SBExecutionContext.h
+++ b/lldb/include/lldb/API/SBExecutionContext.h
@@ -50,8 +50,6 @@ public:
SBFrame GetFrame() const;
protected:
- void reset(lldb::ExecutionContextRefSP &event_sp);
-
lldb_private::ExecutionContextRef *get() const;
private:
diff --git a/lldb/include/lldb/API/SBMemoryRegionInfo.h b/lldb/include/lldb/API/SBMemoryRegionInfo.h
index 122226b9a0c5..be55de4ead1f 100644
--- a/lldb/include/lldb/API/SBMemoryRegionInfo.h
+++ b/lldb/include/lldb/API/SBMemoryRegionInfo.h
@@ -20,6 +20,10 @@ public:
SBMemoryRegionInfo(const lldb::SBMemoryRegionInfo &rhs);
+ SBMemoryRegionInfo(const char *name, lldb::addr_t begin, lldb::addr_t end,
+ uint32_t permissions, bool mapped,
+ bool stack_memory = false);
+
~SBMemoryRegionInfo();
const lldb::SBMemoryRegionInfo &
@@ -117,6 +121,8 @@ private:
friend class SBProcess;
friend class SBMemoryRegionInfoList;
+ friend class lldb_private::ScriptInterpreter;
+
lldb_private::MemoryRegionInfo &ref();
const lldb_private::MemoryRegionInfo &ref() const;
diff --git a/lldb/include/lldb/API/SBMemoryRegionInfoList.h b/lldb/include/lldb/API/SBMemoryRegionInfoList.h
index a7122ee9108a..1d939dff55fa 100644
--- a/lldb/include/lldb/API/SBMemoryRegionInfoList.h
+++ b/lldb/include/lldb/API/SBMemoryRegionInfoList.h
@@ -27,6 +27,9 @@ public:
uint32_t GetSize() const;
+ bool GetMemoryRegionContainingAddress(lldb::addr_t addr,
+ SBMemoryRegionInfo &region_info);
+
bool GetMemoryRegionAtIndex(uint32_t idx, SBMemoryRegionInfo &region_info);
void Append(lldb::SBMemoryRegionInfo &region);
diff --git a/lldb/include/lldb/Breakpoint/Breakpoint.h b/lldb/include/lldb/Breakpoint/Breakpoint.h
index f2e2a0d22784..40435d5c3d0f 100644
--- a/lldb/include/lldb/Breakpoint/Breakpoint.h
+++ b/lldb/include/lldb/Breakpoint/Breakpoint.h
@@ -22,6 +22,7 @@
#include "lldb/Breakpoint/Stoppoint.h"
#include "lldb/Breakpoint/StoppointHitCounter.h"
#include "lldb/Core/SearchFilter.h"
+#include "lldb/Target/Statistics.h"
#include "lldb/Utility/Event.h"
#include "lldb/Utility/StringList.h"
#include "lldb/Utility/StructuredData.h"
@@ -576,6 +577,12 @@ public:
static lldb::BreakpointSP CopyFromBreakpoint(lldb::TargetSP new_target,
const Breakpoint &bp_to_copy_from);
+ /// Get statistics associated with this breakpoint in JSON format.
+ llvm::json::Value GetStatistics();
+
+ /// Get the time it took to resolve all locations in this breakpoint.
+ StatsDuration GetResolveTime() const { return m_resolve_time; }
+
protected:
friend class Target;
// Protected Methods
@@ -653,6 +660,8 @@ private:
BreakpointName::Permissions m_permissions;
+ StatsDuration m_resolve_time{0.0};
+
void SendBreakpointChangedEvent(lldb::BreakpointEventType eventKind);
void SendBreakpointChangedEvent(BreakpointEventData *data);
diff --git a/lldb/include/lldb/Core/Address.h b/lldb/include/lldb/Core/Address.h
index ec393a1871e3..dc50e27ca277 100644
--- a/lldb/include/lldb/Core/Address.h
+++ b/lldb/include/lldb/Core/Address.h
@@ -210,6 +210,10 @@ public:
}
};
+ /// Write a description of this object to a Stream.
+ bool GetDescription(Stream &s, Target &target,
+ lldb::DescriptionLevel level) const;
+
/// Dump a description of this object to a Stream.
///
/// Dump a description of the contents of this object to the supplied stream
diff --git a/lldb/include/lldb/Core/AddressRange.h b/lldb/include/lldb/Core/AddressRange.h
index 6fbdc35c9168..4a33c2d79587 100644
--- a/lldb/include/lldb/Core/AddressRange.h
+++ b/lldb/include/lldb/Core/AddressRange.h
@@ -242,8 +242,6 @@ protected:
lldb::addr_t m_byte_size = 0; ///< The size in bytes of this address range.
};
-// bool operator== (const AddressRange& lhs, const AddressRange& rhs);
-
} // namespace lldb_private
#endif // LLDB_CORE_ADDRESSRANGE_H
diff --git a/lldb/include/lldb/Core/Communication.h b/lldb/include/lldb/Core/Communication.h
index 930e927f6783..fdcb6c5fb982 100644
--- a/lldb/include/lldb/Core/Communication.h
+++ b/lldb/include/lldb/Core/Communication.h
@@ -209,6 +209,22 @@ public:
size_t Write(const void *src, size_t src_len, lldb::ConnectionStatus &status,
Status *error_ptr);
+ /// Repeatedly attempt writing until either \a src_len bytes are written
+ /// or a permanent failure occurs.
+ ///
+ /// \param[in] src
+ /// A source buffer that must be at least \a src_len bytes
+ /// long.
+ ///
+ /// \param[in] src_len
+ /// The number of bytes to attempt to write, and also the
+ /// number of bytes are currently available in \a src.
+ ///
+ /// \return
+ /// The number of bytes actually Written.
+ size_t WriteAll(const void *src, size_t src_len,
+ lldb::ConnectionStatus &status, Status *error_ptr);
+
/// Sets the connection that it to be used by this class.
///
/// By making a communication class that uses different connections it
diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h
index 622c23ff6492..0925bf358b9c 100644
--- a/lldb/include/lldb/Core/Disassembler.h
+++ b/lldb/include/lldb/Core/Disassembler.h
@@ -150,6 +150,10 @@ public:
virtual bool HasDelaySlot();
+ virtual bool IsLoad() = 0;
+
+ virtual bool IsAuthenticated() = 0;
+
bool CanSetBreakpoint ();
virtual size_t Decode(const Disassembler &disassembler,
@@ -336,6 +340,10 @@ public:
bool HasDelaySlot() override;
+ bool IsLoad() override;
+
+ bool IsAuthenticated() override;
+
void CalculateMnemonicOperandsAndComment(
const ExecutionContext *exe_ctx) override {
// TODO: fill this in and put opcode name into Instruction::m_opcode_name,
diff --git a/lldb/include/lldb/Core/IOHandler.h b/lldb/include/lldb/Core/IOHandler.h
index 4a3b788e3ea1..7011dd1e8e04 100644
--- a/lldb/include/lldb/Core/IOHandler.h
+++ b/lldb/include/lldb/Core/IOHandler.h
@@ -442,8 +442,6 @@ protected:
bool m_multi_line;
bool m_color_prompts;
bool m_interrupt_exits;
- bool m_editing; // Set to true when fetching a line manually (not using
- // libedit)
std::string m_line_buffer;
};
diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h
index d11d13b63cfc..c0542157f85d 100644
--- a/lldb/include/lldb/Core/Mangled.h
+++ b/lldb/include/lldb/Core/Mangled.h
@@ -44,7 +44,8 @@ public:
eManglingSchemeNone = 0,
eManglingSchemeMSVC,
eManglingSchemeItanium,
- eManglingSchemeRustV0
+ eManglingSchemeRustV0,
+ eManglingSchemeD
};
/// Default constructor.
diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h
index dd7100c4616c..b80f4fd9b85a 100644
--- a/lldb/include/lldb/Core/Module.h
+++ b/lldb/include/lldb/Core/Module.h
@@ -16,6 +16,7 @@
#include "lldb/Symbol/SymbolContextScope.h"
#include "lldb/Symbol/TypeSystem.h"
#include "lldb/Target/PathMappingList.h"
+#include "lldb/Target/Statistics.h"
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/FileSpec.h"
@@ -57,6 +58,15 @@ class TypeList;
class TypeMap;
class VariableList;
+/// Options used by Module::FindFunctions. This cannot be a nested class
+/// because it must be forward-declared in ModuleList.h.
+struct ModuleFunctionSearchOptions {
+ /// Include the symbol table.
+ bool include_symbols = false;
+ /// Include inlined functions.
+ bool include_inlines = false;
+};
+
/// \class Module Module.h "lldb/Core/Module.h"
/// A class that describes an executable image and its associated
/// object and symbol files.
@@ -304,8 +314,9 @@ public:
/// matches.
void FindFunctions(ConstString name,
const CompilerDeclContext &parent_decl_ctx,
- lldb::FunctionNameType name_type_mask, bool symbols_ok,
- bool inlines_ok, SymbolContextList &sc_list);
+ lldb::FunctionNameType name_type_mask,
+ const ModuleFunctionSearchOptions &options,
+ SymbolContextList &sc_list);
/// Find functions by name.
///
@@ -319,8 +330,9 @@ public:
/// \param[out] sc_list
/// A symbol context list that gets filled in with all of the
/// matches.
- void FindFunctions(const RegularExpression &regex, bool symbols_ok,
- bool inlines_ok, SymbolContextList &sc_list);
+ void FindFunctions(const RegularExpression &regex,
+ const ModuleFunctionSearchOptions &options,
+ SymbolContextList &sc_list);
/// Find addresses by file/line
///
@@ -859,6 +871,18 @@ public:
/// Update the ArchSpec to a more specific variant.
bool MergeArchitecture(const ArchSpec &arch_spec);
+ /// Accessor for the symbol table parse time metric.
+ ///
+ /// The value is returned as a reference to allow it to be updated by the
+ /// ElapsedTime RAII object.
+ StatsDuration &GetSymtabParseTime() { return m_symtab_parse_time; }
+
+ /// Accessor for the symbol table index time metric.
+ ///
+ /// The value is returned as a reference to allow it to be updated by the
+ /// ElapsedTime RAII object.
+ StatsDuration &GetSymtabIndexTime() { return m_symtab_index_time; }
+
/// \class LookupInfo Module.h "lldb/Core/Module.h"
/// A class that encapsulates name lookup information.
///
@@ -984,6 +1008,14 @@ protected:
mutable bool m_file_has_changed : 1,
m_first_file_changed_log : 1; /// See if the module was modified after it
/// was initially opened.
+ /// We store a symbol table parse time duration here because we might have
+ /// an object file and a symbol file which both have symbol tables. The parse
+ /// time for the symbol tables can be aggregated here.
+ StatsDuration m_symtab_parse_time{0.0};
+ /// We store a symbol named index time duration here because we might have
+ /// an object file and a symbol file which both have symbol tables. The parse
+ /// time for the symbol tables can be aggregated here.
+ StatsDuration m_symtab_index_time{0.0};
/// Resolve a file or load virtual address.
///
diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h
index 07dddd18357b..6ca5813d9662 100644
--- a/lldb/include/lldb/Core/ModuleList.h
+++ b/lldb/include/lldb/Core/ModuleList.h
@@ -45,6 +45,7 @@ class Target;
class TypeList;
class UUID;
class VariableList;
+struct ModuleFunctionSearchOptions;
class ModuleListProperties : public Properties {
mutable llvm::sys::RWMutex m_symlink_paths_mutex;
@@ -158,7 +159,7 @@ public:
/// ModulesDidLoad may be deferred when adding multiple Modules
/// to the Target, but it must be called at the end,
/// before resuming execution.
- bool AppendIfNeeded(const lldb::ModuleSP &module_sp, bool notify = true);
+ bool AppendIfNeeded(const lldb::ModuleSP &new_module, bool notify = true);
void Append(const ModuleList &module_list);
@@ -252,7 +253,7 @@ public:
/// \see Module::FindFunctions ()
void FindFunctions(ConstString name, lldb::FunctionNameType name_type_mask,
- bool include_symbols, bool include_inlines,
+ const ModuleFunctionSearchOptions &options,
SymbolContextList &sc_list) const;
/// \see Module::FindFunctionSymbols ()
@@ -261,8 +262,9 @@ public:
SymbolContextList &sc_list);
/// \see Module::FindFunctions ()
- void FindFunctions(const RegularExpression &name, bool include_symbols,
- bool include_inlines, SymbolContextList &sc_list);
+ void FindFunctions(const RegularExpression &name,
+ const ModuleFunctionSearchOptions &options,
+ SymbolContextList &sc_list);
/// Find global and static variables by name.
///
diff --git a/lldb/include/lldb/Core/PluginInterface.h b/lldb/include/lldb/Core/PluginInterface.h
index 5bdb2f45b665..e9fd2b263a9e 100644
--- a/lldb/include/lldb/Core/PluginInterface.h
+++ b/lldb/include/lldb/Core/PluginInterface.h
@@ -9,7 +9,7 @@
#ifndef LLDB_CORE_PLUGININTERFACE_H
#define LLDB_CORE_PLUGININTERFACE_H
-#include "lldb/lldb-private.h"
+#include "llvm/ADT/StringRef.h"
namespace lldb_private {
@@ -18,9 +18,7 @@ public:
PluginInterface() = default;
virtual ~PluginInterface() = default;
- virtual ConstString GetPluginName() = 0;
-
- virtual uint32_t GetPluginVersion() = 0;
+ virtual llvm::StringRef GetPluginName() = 0;
PluginInterface(const PluginInterface &) = delete;
PluginInterface &operator=(const PluginInterface &) = delete;
diff --git a/lldb/include/lldb/Core/PluginManager.h b/lldb/include/lldb/Core/PluginManager.h
index be91929c62e1..7dc99bf3e755 100644
--- a/lldb/include/lldb/Core/PluginManager.h
+++ b/lldb/include/lldb/Core/PluginManager.h
@@ -54,7 +54,7 @@ public:
static void Terminate();
// ABI
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
ABICreateInstance create_callback);
static bool UnregisterPlugin(ABICreateInstance create_callback);
@@ -62,7 +62,7 @@ public:
static ABICreateInstance GetABICreateCallbackAtIndex(uint32_t idx);
// Architecture
- static void RegisterPlugin(ConstString name, llvm::StringRef description,
+ static void RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
ArchitectureCreateInstance create_callback);
static void UnregisterPlugin(ArchitectureCreateInstance create_callback);
@@ -71,7 +71,7 @@ public:
CreateArchitectureInstance(const ArchSpec &arch);
// Disassembler
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
DisassemblerCreateInstance create_callback);
static bool UnregisterPlugin(DisassemblerCreateInstance create_callback);
@@ -80,11 +80,11 @@ public:
GetDisassemblerCreateCallbackAtIndex(uint32_t idx);
static DisassemblerCreateInstance
- GetDisassemblerCreateCallbackForPluginName(ConstString name);
+ GetDisassemblerCreateCallbackForPluginName(llvm::StringRef name);
// DynamicLoader
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
DynamicLoaderCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr);
@@ -94,11 +94,11 @@ public:
GetDynamicLoaderCreateCallbackAtIndex(uint32_t idx);
static DynamicLoaderCreateInstance
- GetDynamicLoaderCreateCallbackForPluginName(ConstString name);
+ GetDynamicLoaderCreateCallbackForPluginName(llvm::StringRef name);
// JITLoader
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
JITLoaderCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr);
@@ -108,7 +108,7 @@ public:
GetJITLoaderCreateCallbackAtIndex(uint32_t idx);
// EmulateInstruction
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
EmulateInstructionCreateInstance create_callback);
static bool
@@ -118,10 +118,10 @@ public:
GetEmulateInstructionCreateCallbackAtIndex(uint32_t idx);
static EmulateInstructionCreateInstance
- GetEmulateInstructionCreateCallbackForPluginName(ConstString name);
+ GetEmulateInstructionCreateCallbackForPluginName(llvm::StringRef name);
// OperatingSystem
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
OperatingSystemCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback);
@@ -131,10 +131,10 @@ public:
GetOperatingSystemCreateCallbackAtIndex(uint32_t idx);
static OperatingSystemCreateInstance
- GetOperatingSystemCreateCallbackForPluginName(ConstString name);
+ GetOperatingSystemCreateCallbackForPluginName(llvm::StringRef name);
// Language
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
LanguageCreateInstance create_callback);
static bool UnregisterPlugin(LanguageCreateInstance create_callback);
@@ -143,7 +143,7 @@ public:
// LanguageRuntime
static bool RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
LanguageRuntimeCreateInstance create_callback,
LanguageRuntimeGetCommandObject command_callback = nullptr,
LanguageRuntimeGetExceptionPrecondition precondition_callback = nullptr);
@@ -160,7 +160,7 @@ public:
GetLanguageRuntimeGetExceptionPreconditionAtIndex(uint32_t idx);
// SystemRuntime
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
SystemRuntimeCreateInstance create_callback);
static bool UnregisterPlugin(SystemRuntimeCreateInstance create_callback);
@@ -170,7 +170,7 @@ public:
// ObjectFile
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
ObjectFileCreateInstance create_callback,
ObjectFileCreateMemoryInstance create_memory_callback,
ObjectFileGetModuleSpecifications get_module_specifications,
@@ -188,15 +188,16 @@ public:
GetObjectFileGetModuleSpecificationsCallbackAtIndex(uint32_t idx);
static ObjectFileCreateMemoryInstance
- GetObjectFileCreateMemoryCallbackForPluginName(ConstString name);
+ GetObjectFileCreateMemoryCallbackForPluginName(llvm::StringRef name);
static Status SaveCore(const lldb::ProcessSP &process_sp,
const FileSpec &outfile,
- lldb::SaveCoreStyle &core_style);
+ lldb::SaveCoreStyle &core_style,
+ llvm::StringRef plugin_name);
// ObjectContainer
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
ObjectContainerCreateInstance create_callback,
ObjectFileGetModuleSpecifications get_module_specifications);
@@ -210,7 +211,7 @@ public:
// Platform
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
PlatformCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr);
@@ -219,17 +220,17 @@ public:
static PlatformCreateInstance GetPlatformCreateCallbackAtIndex(uint32_t idx);
static PlatformCreateInstance
- GetPlatformCreateCallbackForPluginName(ConstString name);
+ GetPlatformCreateCallbackForPluginName(llvm::StringRef name);
- static const char *GetPlatformPluginNameAtIndex(uint32_t idx);
+ static llvm::StringRef GetPlatformPluginNameAtIndex(uint32_t idx);
- static const char *GetPlatformPluginDescriptionAtIndex(uint32_t idx);
+ static llvm::StringRef GetPlatformPluginDescriptionAtIndex(uint32_t idx);
static void AutoCompletePlatformName(llvm::StringRef partial_name,
CompletionRequest &request);
// Process
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
ProcessCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr);
@@ -238,17 +239,17 @@ public:
static ProcessCreateInstance GetProcessCreateCallbackAtIndex(uint32_t idx);
static ProcessCreateInstance
- GetProcessCreateCallbackForPluginName(ConstString name);
+ GetProcessCreateCallbackForPluginName(llvm::StringRef name);
- static const char *GetProcessPluginNameAtIndex(uint32_t idx);
+ static llvm::StringRef GetProcessPluginNameAtIndex(uint32_t idx);
- static const char *GetProcessPluginDescriptionAtIndex(uint32_t idx);
+ static llvm::StringRef GetProcessPluginDescriptionAtIndex(uint32_t idx);
static void AutoCompleteProcessName(llvm::StringRef partial_name,
CompletionRequest &request);
// ScriptInterpreter
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
lldb::ScriptLanguage script_lang,
ScriptInterpreterCreateInstance create_callback);
@@ -296,7 +297,7 @@ public:
/// \return
/// Returns true upon success; otherwise, false.
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
StructuredDataPluginCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr,
StructuredDataFilterLaunchInfo filter_callback = nullptr);
@@ -313,7 +314,7 @@ public:
// SymbolFile
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
SymbolFileCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr);
@@ -323,7 +324,7 @@ public:
GetSymbolFileCreateCallbackAtIndex(uint32_t idx);
// SymbolVendor
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
SymbolVendorCreateInstance create_callback);
static bool UnregisterPlugin(SymbolVendorCreateInstance create_callback);
@@ -333,7 +334,7 @@ public:
// Trace
static bool RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
TraceCreateInstanceForSessionFile create_callback_for_session_file,
TraceCreateInstanceForLiveProcess create_callback_for_live_process,
llvm::StringRef schema);
@@ -342,10 +343,10 @@ public:
UnregisterPlugin(TraceCreateInstanceForSessionFile create_callback);
static TraceCreateInstanceForSessionFile
- GetTraceCreateCallback(ConstString plugin_name);
+ GetTraceCreateCallback(llvm::StringRef plugin_name);
static TraceCreateInstanceForLiveProcess
- GetTraceCreateCallbackForLiveProcess(ConstString plugin_name);
+ GetTraceCreateCallbackForLiveProcess(llvm::StringRef plugin_name);
/// Get the JSON schema for a trace session file corresponding to the given
/// plugin.
@@ -356,7 +357,7 @@ public:
/// \return
/// An empty \a StringRef if no plugin was found with that plugin name,
/// otherwise the actual schema is returned.
- static llvm::StringRef GetTraceSchema(ConstString plugin_name);
+ static llvm::StringRef GetTraceSchema(llvm::StringRef plugin_name);
/// Get the JSON schema for a trace session file corresponding to the plugin
/// given by its index.
@@ -375,16 +376,16 @@ public:
/// This callback is used to create a CommandObject that will be listed
/// under "thread trace export". Can be \b null.
static bool RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
TraceExporterCreateInstance create_callback,
ThreadTraceExportCommandCreator create_thread_trace_export_command);
static TraceExporterCreateInstance
- GetTraceExporterCreateCallback(ConstString plugin_name);
+ GetTraceExporterCreateCallback(llvm::StringRef plugin_name);
static bool UnregisterPlugin(TraceExporterCreateInstance create_callback);
- static const char *GetTraceExporterPluginNameAtIndex(uint32_t index);
+ static llvm::StringRef GetTraceExporterPluginNameAtIndex(uint32_t index);
/// Return the callback used to create the CommandObject that will be listed
/// under "thread trace export". Can be \b null.
@@ -392,7 +393,7 @@ public:
GetThreadTraceExportCommandCreatorAtIndex(uint32_t index);
// UnwindAssembly
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
UnwindAssemblyCreateInstance create_callback);
static bool UnregisterPlugin(UnwindAssemblyCreateInstance create_callback);
@@ -401,7 +402,7 @@ public:
GetUnwindAssemblyCreateCallbackAtIndex(uint32_t idx);
// MemoryHistory
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
MemoryHistoryCreateInstance create_callback);
static bool UnregisterPlugin(MemoryHistoryCreateInstance create_callback);
@@ -411,7 +412,7 @@ public:
// InstrumentationRuntime
static bool
- RegisterPlugin(ConstString name, const char *description,
+ RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
InstrumentationRuntimeCreateInstance create_callback,
InstrumentationRuntimeGetType get_type_callback);
@@ -425,7 +426,7 @@ public:
GetInstrumentationRuntimeCreateCallbackAtIndex(uint32_t idx);
// TypeSystem
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
TypeSystemCreateInstance create_callback,
LanguageSet supported_languages_for_types,
LanguageSet supported_languages_for_expressions);
@@ -440,7 +441,7 @@ public:
static LanguageSet GetAllTypeSystemSupportedLanguagesForExpressions();
// REPL
- static bool RegisterPlugin(ConstString name, const char *description,
+ static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
REPLCreateInstance create_callback,
LanguageSet supported_languages);
diff --git a/lldb/include/lldb/Core/RichManglingContext.h b/lldb/include/lldb/Core/RichManglingContext.h
index 48102ec0b1cf..a6b7af8d8d7e 100644
--- a/lldb/include/lldb/Core/RichManglingContext.h
+++ b/lldb/include/lldb/Core/RichManglingContext.h
@@ -42,9 +42,6 @@ public:
/// If this symbol describes a constructor or destructor.
bool IsCtorOrDtor() const;
- /// If this symbol describes a function.
- bool IsFunction() const;
-
/// Get the base name of a function. This doesn't include trailing template
/// arguments, ie "a::b<int>" gives "b". The result will overwrite the
/// internal buffer. It can be obtained via GetBufferRef().
diff --git a/lldb/include/lldb/Core/Section.h b/lldb/include/lldb/Core/Section.h
index 3d4ab154e743..8a9fea374314 100644
--- a/lldb/include/lldb/Core/Section.h
+++ b/lldb/include/lldb/Core/Section.h
@@ -89,6 +89,12 @@ public:
void Clear() { m_sections.clear(); }
+ /// Get the debug information size from all sections that contain debug
+ /// information. Symbol tables are not considered part of the debug
+ /// information for this call, just known sections that contain debug
+ /// information.
+ uint64_t GetDebugInfoSize() const;
+
protected:
collection m_sections;
};
@@ -236,6 +242,13 @@ public:
void SetIsRelocated(bool b) { m_relocated = b; }
+ /// Returns true if this section contains debug information. Symbol tables
+ /// are not considered debug information since some symbols might contain
+ /// debug information (STABS, COFF) but not all symbols do, so to keep this
+ /// fast and simple only sections that contains only debug information should
+ /// return true.
+ bool ContainsOnlyDebugInfo() const;
+
protected:
ObjectFile *m_obj_file; // The object file that data for this section should
// be read from
diff --git a/lldb/include/lldb/Core/StructuredDataImpl.h b/lldb/include/lldb/Core/StructuredDataImpl.h
index 929ce21fb2f9..d6f64451e5c2 100644
--- a/lldb/include/lldb/Core/StructuredDataImpl.h
+++ b/lldb/include/lldb/Core/StructuredDataImpl.h
@@ -152,6 +152,8 @@ public:
return (::snprintf(dst, dst_len, "%s", result.data()));
}
+ StructuredData::ObjectSP GetObjectSP() const { return m_data_sp; }
+
private:
lldb::StructuredDataPluginWP m_plugin_wp;
StructuredData::ObjectSP m_data_sp;
diff --git a/lldb/include/lldb/Core/ValueObjectConstResultImpl.h b/lldb/include/lldb/Core/ValueObjectConstResultImpl.h
index 2536c51fa574..5a7a079d3095 100644
--- a/lldb/include/lldb/Core/ValueObjectConstResultImpl.h
+++ b/lldb/include/lldb/Core/ValueObjectConstResultImpl.h
@@ -68,7 +68,6 @@ private:
ValueObject *m_impl_backend;
lldb::addr_t m_live_address;
AddressType m_live_address_type;
- lldb::ValueObjectSP m_load_addr_backend;
lldb::ValueObjectSP m_address_of_backend;
ValueObjectConstResultImpl(const ValueObjectConstResultImpl &) = delete;
diff --git a/lldb/include/lldb/Core/ValueObjectDynamicValue.h b/lldb/include/lldb/Core/ValueObjectDynamicValue.h
index 8822a1d39249..09dcd0f968be 100644
--- a/lldb/include/lldb/Core/ValueObjectDynamicValue.h
+++ b/lldb/include/lldb/Core/ValueObjectDynamicValue.h
@@ -32,7 +32,7 @@ class Status;
/// set lldb type.
class ValueObjectDynamicValue : public ValueObject {
public:
- ~ValueObjectDynamicValue() override;
+ ~ValueObjectDynamicValue() = default;
llvm::Optional<uint64_t> GetByteSize() override;
@@ -68,14 +68,6 @@ public:
lldb::ValueObjectSP GetStaticValue() override { return m_parent->GetSP(); }
- void SetOwningSP(lldb::ValueObjectSP &owning_sp) {
- if (m_owning_valobj_sp == owning_sp)
- return;
-
- assert(m_owning_valobj_sp.get() == nullptr);
- m_owning_valobj_sp = owning_sp;
- }
-
bool SetValueFromCString(const char *value_str, Status &error) override;
bool SetData(DataExtractor &data, Status &error) override;
@@ -117,7 +109,6 @@ protected:
Address m_address; ///< The variable that this value object is based upon
TypeAndOrName m_dynamic_type_info; // We can have a type_sp or just a name
- lldb::ValueObjectSP m_owning_valobj_sp;
lldb::DynamicValueType m_use_dynamic;
TypeImpl m_type_impl;
diff --git a/lldb/include/lldb/Core/ValueObjectRegister.h b/lldb/include/lldb/Core/ValueObjectRegister.h
index e210b36d2a45..20a7411b6fde 100644
--- a/lldb/include/lldb/Core/ValueObjectRegister.h
+++ b/lldb/include/lldb/Core/ValueObjectRegister.h
@@ -84,7 +84,7 @@ public:
static lldb::ValueObjectSP Create(ExecutionContextScope *exe_scope,
lldb::RegisterContextSP &reg_ctx_sp,
- uint32_t reg_num);
+ const RegisterInfo *reg_info);
llvm::Optional<uint64_t> GetByteSize() override;
@@ -119,15 +119,16 @@ protected:
CompilerType m_compiler_type;
private:
- void ConstructObject(uint32_t reg_num);
+ void ConstructObject(const RegisterInfo *reg_info);
friend class ValueObjectRegisterSet;
ValueObjectRegister(ValueObject &parent, lldb::RegisterContextSP &reg_ctx_sp,
- uint32_t reg_num);
+ const RegisterInfo *reg_info);
ValueObjectRegister(ExecutionContextScope *exe_scope,
ValueObjectManager &manager,
- lldb::RegisterContextSP &reg_ctx_sp, uint32_t reg_num);
+ lldb::RegisterContextSP &reg_ctx_sp,
+ const RegisterInfo *reg_info);
// For ValueObject only
ValueObjectRegister(const ValueObjectRegister &) = delete;
diff --git a/lldb/include/lldb/DataFormatters/FormattersHelpers.h b/lldb/include/lldb/DataFormatters/FormattersHelpers.h
index 892807063b9c..4f8f0e8455cd 100644
--- a/lldb/include/lldb/DataFormatters/FormattersHelpers.h
+++ b/lldb/include/lldb/DataFormatters/FormattersHelpers.h
@@ -56,7 +56,7 @@ void AddFilter(TypeCategoryImpl::SharedPointer category_sp,
size_t ExtractIndexFromString(const char *item_name);
-lldb::addr_t GetArrayAddressOrPointerValue(ValueObject &valobj);
+Address GetArrayAddressOrPointerValue(ValueObject &valobj);
lldb::ValueObjectSP GetValueOfLibCXXCompressedPair(ValueObject &pair);
diff --git a/lldb/include/lldb/DataFormatters/StringPrinter.h b/lldb/include/lldb/DataFormatters/StringPrinter.h
index 4a6e2e9051bf..4169f53e63f3 100644
--- a/lldb/include/lldb/DataFormatters/StringPrinter.h
+++ b/lldb/include/lldb/DataFormatters/StringPrinter.h
@@ -12,9 +12,9 @@
#include <functional>
#include <string>
-#include "lldb/lldb-forward.h"
-
+#include "lldb/Core/Address.h"
#include "lldb/Utility/DataExtractor.h"
+#include "lldb/lldb-forward.h"
namespace lldb_private {
namespace formatters {
@@ -105,21 +105,21 @@ public:
ReadStringAndDumpToStreamOptions(ValueObject &valobj);
- void SetLocation(uint64_t l) { m_location = l; }
+ void SetLocation(Address l) { m_location = std::move(l); }
- uint64_t GetLocation() const { return m_location; }
+ const Address &GetLocation() const { return m_location; }
- void SetProcessSP(lldb::ProcessSP p) { m_process_sp = std::move(p); }
+ void SetTargetSP(lldb::TargetSP t) { m_target_sp = std::move(t); }
- lldb::ProcessSP GetProcessSP() const { return m_process_sp; }
+ lldb::TargetSP GetTargetSP() const { return m_target_sp; }
void SetHasSourceSize(bool e) { m_has_source_size = e; }
bool HasSourceSize() const { return m_has_source_size; }
private:
- uint64_t m_location = 0;
- lldb::ProcessSP m_process_sp;
+ Address m_location;
+ lldb::TargetSP m_target_sp;
/// True iff we know the source size of the string.
bool m_has_source_size = false;
};
@@ -133,9 +133,9 @@ public:
ReadBufferAndDumpToStreamOptions(
const ReadStringAndDumpToStreamOptions &options);
- void SetData(DataExtractor d) { m_data = d; }
+ void SetData(DataExtractor &&d) { m_data = std::move(d); }
- lldb_private::DataExtractor GetData() const { return m_data; }
+ const lldb_private::DataExtractor &GetData() const { return m_data; }
void SetIsTruncated(bool t) { m_is_truncated = t; }
diff --git a/lldb/include/lldb/Expression/IRExecutionUnit.h b/lldb/include/lldb/Expression/IRExecutionUnit.h
index ad3c7372e67d..bb43851e17c9 100644
--- a/lldb/include/lldb/Expression/IRExecutionUnit.h
+++ b/lldb/include/lldb/Expression/IRExecutionUnit.h
@@ -214,26 +214,21 @@ private:
Status DisassembleFunction(Stream &stream, lldb::ProcessSP &process_sp);
- struct SearchSpec;
-
- void CollectCandidateCNames(std::vector<SearchSpec> &C_specs,
+ void CollectCandidateCNames(std::vector<ConstString> &C_names,
ConstString name);
- void CollectCandidateCPlusPlusNames(std::vector<SearchSpec> &CPP_specs,
- const std::vector<SearchSpec> &C_specs,
+ void CollectCandidateCPlusPlusNames(std::vector<ConstString> &CPP_names,
+ const std::vector<ConstString> &C_names,
const SymbolContext &sc);
- void CollectFallbackNames(std::vector<SearchSpec> &fallback_specs,
- const std::vector<SearchSpec> &C_specs);
-
- lldb::addr_t FindInSymbols(const std::vector<SearchSpec> &specs,
+ lldb::addr_t FindInSymbols(const std::vector<ConstString> &names,
const lldb_private::SymbolContext &sc,
bool &symbol_was_missing_weak);
- lldb::addr_t FindInRuntimes(const std::vector<SearchSpec> &specs,
+ lldb::addr_t FindInRuntimes(const std::vector<ConstString> &names,
const lldb_private::SymbolContext &sc);
- lldb::addr_t FindInUserDefinedSymbols(const std::vector<SearchSpec> &specs,
+ lldb::addr_t FindInUserDefinedSymbols(const std::vector<ConstString> &names,
const lldb_private::SymbolContext &sc);
void ReportSymbolLookupError(ConstString name);
diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake
index c667708a90a6..777a6d1be541 100644
--- a/lldb/include/lldb/Host/Config.h.cmake
+++ b/lldb/include/lldb/Host/Config.h.cmake
@@ -13,17 +13,12 @@
#cmakedefine01 LLDB_HAVE_EL_RFUNC_T
-
-#cmakedefine01 HAVE_SYS_TYPES_H
-
#cmakedefine01 HAVE_SYS_EVENT_H
#cmakedefine01 HAVE_PPOLL
#cmakedefine01 HAVE_PTSNAME_R
-#cmakedefine01 HAVE_SIGACTION
-
#cmakedefine01 HAVE_PROCESS_VM_READV
#cmakedefine01 HAVE_NR_PROCESS_VM_READV
diff --git a/lldb/include/lldb/Host/File.h b/lldb/include/lldb/Host/File.h
index d364d954a1c1..d10ec1fe282a 100644
--- a/lldb/include/lldb/Host/File.h
+++ b/lldb/include/lldb/Host/File.h
@@ -10,6 +10,7 @@
#define LLDB_HOST_FILE_H
#include "lldb/Host/PosixApi.h"
+#include "lldb/Host/Terminal.h"
#include "lldb/Utility/IOObject.h"
#include "lldb/Utility/Status.h"
#include "lldb/lldb-private.h"
@@ -39,24 +40,29 @@ public:
// NB this enum is used in the lldb platform gdb-remote packet
// vFile:open: and existing values cannot be modified.
//
- // FIXME
- // These values do not match the values used by GDB
+ // The first set of values is defined by gdb headers and can be found
+ // in the documentation at:
// * https://sourceware.org/gdb/onlinedocs/gdb/Open-Flags.html#Open-Flags
- // * rdar://problem/46788934
+ //
+ // The second half are LLDB extensions and use the highest uint32_t bits
+ // to avoid risk of collisions with future gdb remote protocol changes.
enum OpenOptions : uint32_t {
- eOpenOptionRead = (1u << 0), // Open file for reading
- eOpenOptionWrite = (1u << 1), // Open file for writing
+ eOpenOptionReadOnly = 0x0, // Open file for reading (only)
+ eOpenOptionWriteOnly = 0x1, // Open file for writing (only)
+ eOpenOptionReadWrite = 0x2, // Open file for both reading and writing
eOpenOptionAppend =
- (1u << 2), // Don't truncate file when opening, append to end of file
- eOpenOptionTruncate = (1u << 3), // Truncate file when opening
- eOpenOptionNonBlocking = (1u << 4), // File reads
- eOpenOptionCanCreate = (1u << 5), // Create file if doesn't already exist
+ 0x8, // Don't truncate file when opening, append to end of file
+ eOpenOptionCanCreate = 0x200, // Create file if doesn't already exist
+ eOpenOptionTruncate = 0x400, // Truncate file when opening
eOpenOptionCanCreateNewOnly =
- (1u << 6), // Can create file only if it doesn't already exist
- eOpenOptionDontFollowSymlinks = (1u << 7),
+ 0x800, // Can create file only if it doesn't already exist
+
+ eOpenOptionNonBlocking = (1u << 28), // File reads
+ eOpenOptionDontFollowSymlinks = (1u << 29),
eOpenOptionCloseOnExec =
- (1u << 8), // Close the file when executing a new process
- LLVM_MARK_AS_BITMASK_ENUM(/* largest_value= */ eOpenOptionCloseOnExec)
+ (1u << 30), // Close the file when executing a new process
+ eOpenOptionInvalid = (1u << 31), // Used as invalid value
+ LLVM_MARK_AS_BITMASK_ENUM(/* largest_value= */ eOpenOptionInvalid)
};
static mode_t ConvertOpenOptionsForPOSIXOpen(OpenOptions open_options);
@@ -303,8 +309,8 @@ public:
/// Some options like eOpenOptionDontFollowSymlinks only make
/// sense when a file is being opened (or not at all)
/// and may not be preserved for this method. But any valid
- /// File should return either or both of eOpenOptionRead and
- /// eOpenOptionWrite here.
+ /// File should return either eOpenOptionReadOnly, eOpenOptionWriteOnly
+ /// or eOpenOptionReadWrite here.
///
/// \return
/// OpenOptions flags for this file, or an error.
@@ -428,6 +434,45 @@ private:
const NativeFile &operator=(const NativeFile &) = delete;
};
+class SerialPort : public NativeFile {
+public:
+ struct Options {
+ llvm::Optional<unsigned int> BaudRate = llvm::None;
+ llvm::Optional<Terminal::Parity> Parity = llvm::None;
+ llvm::Optional<Terminal::ParityCheck> ParityCheck = llvm::None;
+ llvm::Optional<unsigned int> StopBits = llvm::None;
+ };
+
+ // Obtain Options corresponding to the passed URL query string
+ // (i.e. the part after '?').
+ static llvm::Expected<Options> OptionsFromURL(llvm::StringRef urlqs);
+
+ static llvm::Expected<std::unique_ptr<SerialPort>>
+ Create(int fd, OpenOptions options, Options serial_options,
+ bool transfer_ownership);
+
+ bool IsValid() const override {
+ return NativeFile::IsValid() && m_is_interactive == eLazyBoolYes;
+ }
+
+ Status Close() override;
+
+ static char ID;
+ virtual bool isA(const void *classID) const override {
+ return classID == &ID || File::isA(classID);
+ }
+ static bool classof(const File *file) { return file->isA(&ID); }
+
+private:
+ SerialPort(int fd, OpenOptions options, Options serial_options,
+ bool transfer_ownership);
+
+ SerialPort(const SerialPort &) = delete;
+ const SerialPort &operator=(const SerialPort &) = delete;
+
+ TerminalState m_state;
+};
+
} // namespace lldb_private
#endif // LLDB_HOST_FILE_H
diff --git a/lldb/include/lldb/Host/MainLoop.h b/lldb/include/lldb/Host/MainLoop.h
index 06785bbdbe24..94499f583463 100644
--- a/lldb/include/lldb/Host/MainLoop.h
+++ b/lldb/include/lldb/Host/MainLoop.h
@@ -95,7 +95,7 @@ private:
struct SignalInfo {
std::list<Callback> callbacks;
-#if HAVE_SIGACTION
+#ifndef SIGNAL_POLLING_UNSUPPORTED
struct sigaction old_action;
#endif
bool was_blocked : 1;
diff --git a/lldb/include/lldb/Host/Socket.h b/lldb/include/lldb/Host/Socket.h
index 36db0ec63e9d..01f790ee11fb 100644
--- a/lldb/include/lldb/Host/Socket.h
+++ b/lldb/include/lldb/Host/Socket.h
@@ -16,7 +16,6 @@
#include "lldb/Host/SocketAddress.h"
#include "lldb/Utility/IOObject.h"
-#include "lldb/Utility/Predicate.h"
#include "lldb/Utility/Status.h"
#ifdef _WIN32
@@ -48,6 +47,15 @@ public:
ProtocolUnixAbstract
};
+ struct HostAndPort {
+ std::string hostname;
+ uint16_t port;
+
+ bool operator==(const HostAndPort &R) const {
+ return port == R.port && hostname == R.hostname;
+ }
+ };
+
static const NativeSocket kInvalidSocketValue;
~Socket() override;
@@ -68,7 +76,7 @@ public:
// the socket after it is initialized, but before entering a blocking accept.
static llvm::Expected<std::unique_ptr<TCPSocket>>
TcpListen(llvm::StringRef host_and_port, bool child_processes_inherit,
- Predicate<uint16_t> *predicate, int backlog = 5);
+ int backlog = 5);
static llvm::Expected<std::unique_ptr<Socket>>
TcpConnect(llvm::StringRef host_and_port, bool child_processes_inherit);
@@ -76,18 +84,6 @@ public:
static llvm::Expected<std::unique_ptr<UDPSocket>>
UdpConnect(llvm::StringRef host_and_port, bool child_processes_inherit);
- static Status UnixDomainConnect(llvm::StringRef host_and_port,
- bool child_processes_inherit,
- Socket *&socket);
- static Status UnixDomainAccept(llvm::StringRef host_and_port,
- bool child_processes_inherit, Socket *&socket);
- static Status UnixAbstractConnect(llvm::StringRef host_and_port,
- bool child_processes_inherit,
- Socket *&socket);
- static Status UnixAbstractAccept(llvm::StringRef host_and_port,
- bool child_processes_inherit,
- Socket *&socket);
-
int GetOption(int level, int option_name, int &option_value);
int SetOption(int level, int option_name, int option_value);
@@ -103,9 +99,8 @@ public:
bool IsValid() const override { return m_socket != kInvalidSocketValue; }
WaitableHandle GetWaitableHandle() override;
- static bool DecodeHostAndPort(llvm::StringRef host_and_port,
- std::string &host_str, std::string &port_str,
- int32_t &port, Status *error_ptr);
+ static llvm::Expected<HostAndPort>
+ DecodeHostAndPort(llvm::StringRef host_and_port);
// If this Socket is connected then return the URI used to connect.
virtual std::string GetRemoteConnectionURI() const { return ""; };
@@ -130,6 +125,9 @@ protected:
bool m_should_close_fd;
};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const Socket::HostAndPort &HP);
+
} // namespace lldb_private
#endif // LLDB_HOST_SOCKET_H
diff --git a/lldb/include/lldb/Host/StringConvert.h b/lldb/include/lldb/Host/StringConvert.h
deleted file mode 100644
index 33608a85ff42..000000000000
--- a/lldb/include/lldb/Host/StringConvert.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- StringConvert.h -----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLDB_HOST_STRINGCONVERT_H
-#define LLDB_HOST_STRINGCONVERT_H
-
-#include <cstdint>
-
-namespace lldb_private {
-
-namespace StringConvert {
-
-/// \namespace StringConvert StringConvert.h "lldb/Host/StringConvert.h"
-/// Utility classes for converting strings into Integers
-
-int32_t ToSInt32(const char *s, int32_t fail_value = 0, int base = 0,
- bool *success_ptr = nullptr);
-
-uint32_t ToUInt32(const char *s, uint32_t fail_value = 0, int base = 0,
- bool *success_ptr = nullptr);
-
-int64_t ToSInt64(const char *s, int64_t fail_value = 0, int base = 0,
- bool *success_ptr = nullptr);
-
-uint64_t ToUInt64(const char *s, uint64_t fail_value = 0, int base = 0,
- bool *success_ptr = nullptr);
-
-double ToDouble(const char *s, double fail_value = 0.0,
- bool *success_ptr = nullptr);
-} // namespace StringConvert
-} // namespace lldb_private
-
-#endif
diff --git a/lldb/include/lldb/Host/Terminal.h b/lldb/include/lldb/Host/Terminal.h
index ca91d6b59720..8ff6d75657a7 100644
--- a/lldb/include/lldb/Host/Terminal.h
+++ b/lldb/include/lldb/Host/Terminal.h
@@ -10,15 +10,35 @@
#define LLDB_HOST_TERMINAL_H
#if defined(__cplusplus)
-#include "lldb/Host/Config.h"
#include "lldb/lldb-private.h"
-
-struct termios;
+#include "llvm/Support/Error.h"
namespace lldb_private {
+class TerminalState;
+
class Terminal {
public:
+ enum class Parity {
+ No,
+ Even,
+ Odd,
+ Space,
+ Mark,
+ };
+
+ enum class ParityCheck {
+ // No parity checking
+ No,
+ // Replace erraneous bytes with NUL
+ ReplaceWithNUL,
+ // Ignore erraneous bytes
+ Ignore,
+ // Mark erraneous bytes by prepending them with \xFF\x00; real \xFF
+ // is escaped to \xFF\xFF
+ Mark,
+ };
+
Terminal(int fd = -1) : m_fd(fd) {}
~Terminal() = default;
@@ -33,25 +53,54 @@ public:
void Clear() { m_fd = -1; }
- bool SetEcho(bool enabled);
+ llvm::Error SetEcho(bool enabled);
+
+ llvm::Error SetCanonical(bool enabled);
+
+ llvm::Error SetRaw();
- bool SetCanonical(bool enabled);
+ llvm::Error SetBaudRate(unsigned int baud_rate);
+
+ llvm::Error SetStopBits(unsigned int stop_bits);
+
+ llvm::Error SetParity(Parity parity);
+
+ llvm::Error SetParityCheck(ParityCheck parity_check);
+
+ llvm::Error SetHardwareFlowControl(bool enabled);
protected:
+ struct Data;
+
int m_fd; // This may or may not be a terminal file descriptor
+
+ llvm::Expected<Data> GetData();
+ llvm::Error SetData(const Data &data);
+
+ friend class TerminalState;
};
-/// \class State Terminal.h "lldb/Host/Terminal.h"
-/// A terminal state saving/restoring class.
+/// \class TerminalState Terminal.h "lldb/Host/Terminal.h"
+/// A RAII-friendly terminal state saving/restoring class.
///
/// This class can be used to remember the terminal state for a file
/// descriptor and later restore that state as it originally was.
class TerminalState {
public:
- /// Default constructor
- TerminalState();
+ /// Construct a new instance and optionally save terminal state.
+ ///
+ /// \param[in] term
+ /// The Terminal instance holding the file descriptor to save the state
+ /// of. If the instance is not associated with a fd, no state will
+ /// be saved.
+ ///
+ /// \param[in] save_process_group
+ /// If \b true, save the process group settings, else do not
+ /// save the process group settings for a TTY.
+ TerminalState(Terminal term = -1, bool save_process_group = false);
- /// Destructor
+ /// Destroy the instance, restoring terminal state if saved. If restoring
+ /// state is undesirable, the instance needs to be reset before destruction.
~TerminalState();
/// Save the TTY state for \a fd.
@@ -60,8 +109,8 @@ public:
/// "save_process_group" is true, attempt to save the process group info for
/// the TTY.
///
- /// \param[in] fd
- /// The file descriptor to save the state of.
+ /// \param[in] term
+ /// The Terminal instance holding fd to save.
///
/// \param[in] save_process_group
/// If \b true, save the process group settings, else do not
@@ -70,7 +119,7 @@ public:
/// \return
/// Returns \b true if \a fd describes a TTY and if the state
/// was able to be saved, \b false otherwise.
- bool Save(int fd, bool save_process_group);
+ bool Save(Terminal term, bool save_process_group);
/// Restore the TTY state to the cached state.
///
@@ -115,66 +164,10 @@ protected:
bool ProcessGroupIsValid() const;
// Member variables
- Terminal m_tty; ///< A terminal
- int m_tflags = -1; ///< Cached tflags information.
-#if LLDB_ENABLE_TERMIOS
- std::unique_ptr<struct termios>
- m_termios_up; ///< Cached terminal state information.
-#endif
- lldb::pid_t m_process_group = -1; ///< Cached process group information.
-};
-
-/// \class TerminalStateSwitcher Terminal.h "lldb/Host/Terminal.h"
-/// A TTY state switching class.
-///
-/// This class can be used to remember 2 TTY states for a given file
-/// descriptor and switch between the two states.
-class TerminalStateSwitcher {
-public:
- /// Constructor
- TerminalStateSwitcher();
-
- /// Destructor
- ~TerminalStateSwitcher();
-
- /// Get the number of possible states to save.
- ///
- /// \return
- /// The number of states that this TTY switcher object contains.
- uint32_t GetNumberOfStates() const;
-
- /// Restore the TTY state for state at index \a idx.
- ///
- /// \return
- /// Returns \b true if the TTY state was successfully restored,
- /// \b false otherwise.
- bool Restore(uint32_t idx) const;
-
- /// Save the TTY state information for the state at index \a idx. The TTY
- /// state is saved for the file descriptor \a fd and the process group
- /// information will also be saved if requested by \a save_process_group.
- ///
- /// \param[in] idx
- /// The index into the state array where the state should be
- /// saved.
- ///
- /// \param[in] fd
- /// The file descriptor for which to save the settings.
- ///
- /// \param[in] save_process_group
- /// If \b true, save the process group information for the TTY.
- ///
- /// \return
- /// Returns \b true if the save was successful, \b false
- /// otherwise.
- bool Save(uint32_t idx, int fd, bool save_process_group);
-
-protected:
- // Member variables
- mutable uint32_t m_currentState =
- UINT32_MAX; ///< The currently active TTY state index.
- TerminalState
- m_ttystates[2]; ///< The array of TTY states that holds saved TTY info.
+ Terminal m_tty; ///< A terminal
+ int m_tflags = -1; ///< Cached tflags information.
+ std::unique_ptr<Terminal::Data> m_data; ///< Platform-specific implementation.
+ lldb::pid_t m_process_group = -1; ///< Cached process group information.
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Host/common/NativeProcessProtocol.h b/lldb/include/lldb/Host/common/NativeProcessProtocol.h
index 770149e3fb28..7c3458527616 100644
--- a/lldb/include/lldb/Host/common/NativeProcessProtocol.h
+++ b/lldb/include/lldb/Host/common/NativeProcessProtocol.h
@@ -250,8 +250,9 @@ public:
auxv = (1u << 4),
libraries_svr4 = (1u << 5),
memory_tagging = (1u << 6),
+ savecore = (1u << 7),
- LLVM_MARK_AS_BITMASK_ENUM(memory_tagging)
+ LLVM_MARK_AS_BITMASK_ENUM(savecore)
};
class Factory {
@@ -369,6 +370,19 @@ public:
m_enabled_extensions = flags;
}
+ /// Write a core dump (without crashing the program).
+ ///
+ /// \param[in] path_hint
+ /// Suggested core dump path (optional, can be empty).
+ ///
+ /// \return
+ /// Path to the core dump if successfully written, an error
+ /// otherwise.
+ virtual llvm::Expected<std::string> SaveCore(llvm::StringRef path_hint) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "Not implemented");
+ }
+
protected:
struct SoftwareBreakpoint {
uint32_t ref_count;
diff --git a/lldb/include/lldb/Host/freebsd/HostInfoFreeBSD.h b/lldb/include/lldb/Host/freebsd/HostInfoFreeBSD.h
index 56f20bbd23d3..b2f3f08cd145 100644
--- a/lldb/include/lldb/Host/freebsd/HostInfoFreeBSD.h
+++ b/lldb/include/lldb/Host/freebsd/HostInfoFreeBSD.h
@@ -18,8 +18,7 @@ namespace lldb_private {
class HostInfoFreeBSD : public HostInfoPosix {
public:
static llvm::VersionTuple GetOSVersion();
- static bool GetOSBuildString(std::string &s);
- static bool GetOSKernelDescription(std::string &s);
+ static llvm::Optional<std::string> GetOSBuildString();
static FileSpec GetProgramFileSpec();
};
}
diff --git a/lldb/include/lldb/Host/netbsd/HostInfoNetBSD.h b/lldb/include/lldb/Host/netbsd/HostInfoNetBSD.h
index f9ad66eb2b2a..32644ce79a69 100644
--- a/lldb/include/lldb/Host/netbsd/HostInfoNetBSD.h
+++ b/lldb/include/lldb/Host/netbsd/HostInfoNetBSD.h
@@ -18,8 +18,7 @@ namespace lldb_private {
class HostInfoNetBSD : public HostInfoPosix {
public:
static llvm::VersionTuple GetOSVersion();
- static bool GetOSBuildString(std::string &s);
- static bool GetOSKernelDescription(std::string &s);
+ static llvm::Optional<std::string> GetOSBuildString();
static FileSpec GetProgramFileSpec();
};
}
diff --git a/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h b/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h
index 7ec1d5fc3606..01879ad5c0e4 100644
--- a/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h
+++ b/lldb/include/lldb/Host/openbsd/HostInfoOpenBSD.h
@@ -18,8 +18,7 @@ namespace lldb_private {
class HostInfoOpenBSD : public HostInfoPosix {
public:
static llvm::VersionTuple GetOSVersion();
- static bool GetOSBuildString(std::string &s);
- static bool GetOSKernelDescription(std::string &s);
+ static llvm::Optional<std::string> GetOSBuildString();
static FileSpec GetProgramFileSpec();
};
}
diff --git a/lldb/include/lldb/Host/posix/ConnectionFileDescriptorPosix.h b/lldb/include/lldb/Host/posix/ConnectionFileDescriptorPosix.h
index 42be989dfa7b..35773d5907e9 100644
--- a/lldb/include/lldb/Host/posix/ConnectionFileDescriptorPosix.h
+++ b/lldb/include/lldb/Host/posix/ConnectionFileDescriptorPosix.h
@@ -16,9 +16,9 @@
#include "lldb/lldb-forward.h"
#include "lldb/Host/Pipe.h"
+#include "lldb/Host/Socket.h"
#include "lldb/Utility/Connection.h"
#include "lldb/Utility/IOObject.h"
-#include "lldb/Utility/Predicate.h"
namespace lldb_private {
@@ -28,16 +28,8 @@ class SocketAddress;
class ConnectionFileDescriptor : public Connection {
public:
- static const char *LISTEN_SCHEME;
- static const char *ACCEPT_SCHEME;
- static const char *UNIX_ACCEPT_SCHEME;
- static const char *CONNECT_SCHEME;
- static const char *TCP_CONNECT_SCHEME;
- static const char *UDP_SCHEME;
- static const char *UNIX_CONNECT_SCHEME;
- static const char *UNIX_ABSTRACT_CONNECT_SCHEME;
- static const char *FD_SCHEME;
- static const char *FILE_SCHEME;
+ typedef llvm::function_ref<void(llvm::StringRef local_socket_id)>
+ socket_id_callback_type;
ConnectionFileDescriptor(bool child_processes_inherit = false);
@@ -49,7 +41,12 @@ public:
bool IsConnected() const override;
- lldb::ConnectionStatus Connect(llvm::StringRef s, Status *error_ptr) override;
+ lldb::ConnectionStatus Connect(llvm::StringRef url,
+ Status *error_ptr) override;
+
+ lldb::ConnectionStatus Connect(llvm::StringRef url,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
lldb::ConnectionStatus Disconnect(Status *error_ptr) override;
@@ -66,9 +63,7 @@ public:
bool InterruptRead() override;
- lldb::IOObjectSP GetReadObject() override { return m_read_sp; }
-
- uint16_t GetListeningPort(const Timeout<std::micro> &timeout);
+ lldb::IOObjectSP GetReadObject() override { return m_io_sp; }
bool GetChildProcessesInherit() const;
void SetChildProcessesInherit(bool child_processes_inherit);
@@ -78,37 +73,68 @@ protected:
void CloseCommandPipe();
- lldb::ConnectionStatus SocketListenAndAccept(llvm::StringRef host_and_port,
- Status *error_ptr);
+ lldb::ConnectionStatus
+ AcceptSocket(Socket::SocketProtocol socket_protocol,
+ llvm::StringRef socket_name,
+ llvm::function_ref<void(Socket &)> post_listen_callback,
+ Status *error_ptr);
+
+ lldb::ConnectionStatus ConnectSocket(Socket::SocketProtocol socket_protocol,
+ llvm::StringRef socket_name,
+ Status *error_ptr);
+
+ lldb::ConnectionStatus AcceptTCP(llvm::StringRef host_and_port,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
lldb::ConnectionStatus ConnectTCP(llvm::StringRef host_and_port,
+ socket_id_callback_type socket_id_callback,
Status *error_ptr);
- lldb::ConnectionStatus ConnectUDP(llvm::StringRef args, Status *error_ptr);
+ lldb::ConnectionStatus ConnectUDP(llvm::StringRef args,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
+
+ lldb::ConnectionStatus
+ ConnectNamedSocket(llvm::StringRef socket_name,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
+
+ lldb::ConnectionStatus
+ AcceptNamedSocket(llvm::StringRef socket_name,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
+
+ lldb::ConnectionStatus
+ AcceptAbstractSocket(llvm::StringRef socket_name,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
- lldb::ConnectionStatus NamedSocketConnect(llvm::StringRef socket_name,
- Status *error_ptr);
+ lldb::ConnectionStatus
+ ConnectAbstractSocket(llvm::StringRef socket_name,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
- lldb::ConnectionStatus NamedSocketAccept(llvm::StringRef socket_name,
- Status *error_ptr);
+ lldb::ConnectionStatus ConnectFD(llvm::StringRef args,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
- lldb::ConnectionStatus UnixAbstractSocketConnect(llvm::StringRef socket_name,
- Status *error_ptr);
+ lldb::ConnectionStatus ConnectFile(llvm::StringRef args,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
- lldb::IOObjectSP m_read_sp;
- lldb::IOObjectSP m_write_sp;
+ lldb::ConnectionStatus
+ ConnectSerialPort(llvm::StringRef args,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr);
- Predicate<uint16_t>
- m_port_predicate; // Used when binding to port zero to wait for the thread
- // that creates the socket, binds and listens to
- // resolve the port number.
+ lldb::IOObjectSP m_io_sp;
Pipe m_pipe;
std::recursive_mutex m_mutex;
std::atomic<bool> m_shutting_down; // This marks that we are shutting down so
// if we get woken up from
// BytesAvailable to disconnect, we won't try to read again.
- bool m_waiting_for_accept = false;
bool m_child_processes_inherit;
std::string m_uri;
diff --git a/lldb/include/lldb/Host/posix/HostInfoPosix.h b/lldb/include/lldb/Host/posix/HostInfoPosix.h
index 825c79f53ecb..f1ff6b860864 100644
--- a/lldb/include/lldb/Host/posix/HostInfoPosix.h
+++ b/lldb/include/lldb/Host/posix/HostInfoPosix.h
@@ -22,6 +22,7 @@ class HostInfoPosix : public HostInfoBase {
public:
static size_t GetPageSize();
static bool GetHostname(std::string &s);
+ static llvm::Optional<std::string> GetOSKernelDescription();
static uint32_t GetUserID();
static uint32_t GetGroupID();
diff --git a/lldb/include/lldb/Interpreter/CommandCompletions.h b/lldb/include/lldb/Interpreter/CommandCompletions.h
index c80bde0e719b..c13bc4997ff3 100644
--- a/lldb/include/lldb/Interpreter/CommandCompletions.h
+++ b/lldb/include/lldb/Interpreter/CommandCompletions.h
@@ -13,6 +13,7 @@
#include "lldb/Core/FileSpecList.h"
#include "lldb/Core/SearchFilter.h"
+#include "lldb/Interpreter/Options.h"
#include "lldb/Utility/CompletionRequest.h"
#include "lldb/Utility/RegularExpression.h"
#include "lldb/lldb-private.h"
@@ -151,6 +152,15 @@ public:
static void TypeCategoryNames(CommandInterpreter &interpreter,
CompletionRequest &request,
SearchFilter *searcher);
+
+ /// This completer works for commands whose only arguments are a command path.
+ /// It isn't tied to an argument type because it completes not on a single
+ /// argument but on the sequence of arguments, so you have to invoke it by
+ /// hand.
+ static void
+ CompleteModifiableCmdPathArgs(CommandInterpreter &interpreter,
+ CompletionRequest &request,
+ OptionElementVector &opt_element_vector);
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h
index 3b3daced3e33..e6f0d5f9c4d4 100644
--- a/lldb/include/lldb/Interpreter/CommandInterpreter.h
+++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h
@@ -231,11 +231,12 @@ public:
};
enum CommandTypes {
- eCommandTypesBuiltin = 0x0001, // native commands such as "frame"
- eCommandTypesUserDef = 0x0002, // scripted commands
- eCommandTypesAliases = 0x0004, // aliases such as "po"
- eCommandTypesHidden = 0x0008, // commands prefixed with an underscore
- eCommandTypesAllThem = 0xFFFF // all commands
+ eCommandTypesBuiltin = 0x0001, //< native commands such as "frame"
+ eCommandTypesUserDef = 0x0002, //< scripted commands
+ eCommandTypesUserMW = 0x0004, //< multiword commands (command containers)
+ eCommandTypesAliases = 0x0008, //< aliases such as "po"
+ eCommandTypesHidden = 0x0010, //< commands prefixed with an underscore
+ eCommandTypesAllThem = 0xFFFF //< all commands
};
CommandInterpreter(Debugger &debugger, bool synchronous_execution);
@@ -256,8 +257,8 @@ public:
bool AddCommand(llvm::StringRef name, const lldb::CommandObjectSP &cmd_sp,
bool can_replace);
- bool AddUserCommand(llvm::StringRef name, const lldb::CommandObjectSP &cmd_sp,
- bool can_replace);
+ Status AddUserCommand(llvm::StringRef name,
+ const lldb::CommandObjectSP &cmd_sp, bool can_replace);
lldb::CommandObjectSP GetCommandSPExact(llvm::StringRef cmd,
bool include_aliases = false) const;
@@ -266,12 +267,49 @@ public:
StringList *matches = nullptr,
StringList *descriptions = nullptr) const;
+ CommandObject *GetUserCommandObject(llvm::StringRef cmd,
+ StringList *matches = nullptr,
+ StringList *descriptions = nullptr) const;
+
+ /// Determine whether a root level, built-in command with this name exists.
bool CommandExists(llvm::StringRef cmd) const;
+ /// Determine whether an alias command with this name exists
bool AliasExists(llvm::StringRef cmd) const;
+ /// Determine whether a root-level user command with this name exists.
bool UserCommandExists(llvm::StringRef cmd) const;
+ /// Determine whether a root-level user multiword command with this name
+ /// exists.
+ bool UserMultiwordCommandExists(llvm::StringRef cmd) const;
+
+ /// Look up the command pointed to by path encoded in the arguments of
+ /// the incoming command object. If all the path components exist
+ /// and are all actual commands - not aliases, and the leaf command is a
+ /// multiword command, return the command. Otherwise return nullptr, and put
+ /// a useful diagnostic in the Status object.
+ ///
+ /// \param[in] path
+ /// An Args object holding the path in its arguments
+ /// \param[in] leaf_is_command
+ /// If true, return the container of the leaf name rather than looking up
+ /// the whole path as a leaf command. The leaf needn't exist in this case.
+ /// \param[in,out] result
+ /// If the path is not found, this error shows where we got off track.
+ /// \return
+ /// If found, a pointer to the CommandObjectMultiword pointed to by path,
+ /// or to the container of the leaf element is is_leaf_command.
+ /// Returns nullptr under two circumstances:
+ /// 1) The command in not found (check error.Fail)
+ /// 2) is_leaf is true and the path has only a leaf. We don't have a
+ /// dummy "contains everything MWC, so we return null here, but
+ /// in this case error.Success is true.
+
+ CommandObjectMultiword *VerifyUserMultiwordCmdPath(Args &path,
+ bool leaf_is_command,
+ Status &result);
+
CommandAlias *AddAlias(llvm::StringRef alias_name,
lldb::CommandObjectSP &command_obj_sp,
llvm::StringRef args_string = llvm::StringRef());
@@ -283,6 +321,11 @@ public:
bool GetAliasFullName(llvm::StringRef cmd, std::string &full_name) const;
+ bool RemoveUserMultiword(llvm::StringRef multiword_name);
+
+ // Do we want to allow top-level user multiword commands to be deleted?
+ void RemoveAllUserMultiword() { m_user_mw_dict.clear(); }
+
bool RemoveUser(llvm::StringRef alias_name);
void RemoveAllUser() { m_user_dict.clear(); }
@@ -414,6 +457,8 @@ public:
bool HasUserCommands() const;
+ bool HasUserMultiwordCommands() const;
+
bool HasAliasOptions() const;
void BuildAliasCommandArgs(CommandObject *alias_cmd_obj,
@@ -421,6 +466,7 @@ public:
std::string &raw_input_string,
CommandReturnObject &result);
+ /// Picks the number out of a string of the form "%NNN", otherwise return 0.
int GetOptionArgumentPosition(const char *in_string);
void SkipLLDBInitFiles(bool skip_lldbinit_files) {
@@ -437,7 +483,8 @@ public:
StringList &commands_help,
bool search_builtin_commands,
bool search_user_commands,
- bool search_alias_commands);
+ bool search_alias_commands,
+ bool search_user_mw_commands);
bool GetBatchCommandMode() { return m_batch_command_mode; }
@@ -506,6 +553,10 @@ public:
return m_user_dict;
}
+ const CommandObject::CommandMap &GetUserMultiwordCommands() const {
+ return m_user_mw_dict;
+ }
+
const CommandObject::CommandMap &GetCommands() const {
return m_command_dict;
}
@@ -636,6 +687,8 @@ private:
CommandObject::CommandMap
m_alias_dict; // Stores user aliases/abbreviations for commands
CommandObject::CommandMap m_user_dict; // Stores user-defined commands
+ CommandObject::CommandMap
+ m_user_mw_dict; // Stores user-defined multiword commands
CommandHistory m_command_history;
std::string m_repeat_command; // Stores the command that will be executed for
// an empty command string.
diff --git a/lldb/include/lldb/Interpreter/CommandObject.h b/lldb/include/lldb/Interpreter/CommandObject.h
index 8bc5d3e22355..89cc161993a9 100644
--- a/lldb/include/lldb/Interpreter/CommandObject.h
+++ b/lldb/include/lldb/Interpreter/CommandObject.h
@@ -145,6 +145,10 @@ public:
virtual bool IsMultiwordObject() { return false; }
+ bool IsUserCommand() { return m_is_user_command; }
+
+ void SetIsUserCommand(bool is_user) { m_is_user_command = is_user; }
+
virtual CommandObjectMultiword *GetAsMultiwordCommand() { return nullptr; }
virtual bool IsAlias() { return false; }
@@ -159,6 +163,10 @@ public:
return lldb::CommandObjectSP();
}
+ virtual lldb::CommandObjectSP GetSubcommandSPExact(llvm::StringRef sub_cmd) {
+ return lldb::CommandObjectSP();
+ }
+
virtual CommandObject *GetSubcommandObject(llvm::StringRef sub_cmd,
StringList *matches = nullptr) {
return nullptr;
@@ -183,6 +191,13 @@ public:
return false;
}
+ virtual llvm::Error LoadUserSubcommand(llvm::StringRef cmd_name,
+ const lldb::CommandObjectSP &command_obj,
+ bool can_replace) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "can only add commands to container commands");
+ }
+
virtual bool WantsRawCommandString() = 0;
// By default, WantsCompletion = !WantsRawCommandString. Subclasses who want
@@ -367,6 +382,7 @@ protected:
lldb::CommandOverrideCallback m_deprecated_command_override_callback;
lldb::CommandOverrideCallbackWithResult m_command_override_callback;
void *m_command_override_baton;
+ bool m_is_user_command = false;
// Helper function to populate IDs or ID ranges as the command argument data
// to the specified command argument entry.
diff --git a/lldb/include/lldb/Interpreter/CommandObjectMultiword.h b/lldb/include/lldb/Interpreter/CommandObjectMultiword.h
index f330a745f9bd..a0e8d163c4b6 100644
--- a/lldb/include/lldb/Interpreter/CommandObjectMultiword.h
+++ b/lldb/include/lldb/Interpreter/CommandObjectMultiword.h
@@ -35,11 +35,19 @@ public:
bool LoadSubCommand(llvm::StringRef cmd_name,
const lldb::CommandObjectSP &command_obj) override;
+ llvm::Error LoadUserSubcommand(llvm::StringRef cmd_name,
+ const lldb::CommandObjectSP &command_obj,
+ bool can_replace) override;
+
+ llvm::Error RemoveUserSubcommand(llvm::StringRef cmd_name, bool multiword_okay);
+
void GenerateHelpText(Stream &output_stream) override;
lldb::CommandObjectSP GetSubcommandSP(llvm::StringRef sub_cmd,
StringList *matches = nullptr) override;
+ lldb::CommandObjectSP GetSubcommandSPExact(llvm::StringRef sub_cmd) override;
+
CommandObject *GetSubcommandObject(llvm::StringRef sub_cmd,
StringList *matches = nullptr) override;
diff --git a/lldb/include/lldb/Interpreter/Property.h b/lldb/include/lldb/Interpreter/Property.h
index 97ec7ca1d4af..09f09358e8af 100644
--- a/lldb/include/lldb/Interpreter/Property.h
+++ b/lldb/include/lldb/Interpreter/Property.h
@@ -10,7 +10,6 @@
#define LLDB_INTERPRETER_PROPERTY_H
#include "lldb/Interpreter/OptionValue.h"
-#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Flags.h"
#include "lldb/lldb-defines.h"
#include "lldb/lldb-private-types.h"
@@ -37,13 +36,11 @@ class Property {
public:
Property(const PropertyDefinition &definition);
- Property(ConstString name, ConstString desc, bool is_global,
+ Property(llvm::StringRef name, llvm::StringRef desc, bool is_global,
const lldb::OptionValueSP &value_sp);
- llvm::StringRef GetName() const { return m_name.GetStringRef(); }
- llvm::StringRef GetDescription() const {
- return m_description.GetStringRef();
- }
+ llvm::StringRef GetName() const { return m_name; }
+ llvm::StringRef GetDescription() const { return m_description; }
const lldb::OptionValueSP &GetValue() const { return m_value_sp; }
@@ -67,8 +64,8 @@ public:
void SetValueChangedCallback(std::function<void()> callback);
protected:
- ConstString m_name;
- ConstString m_description;
+ std::string m_name;
+ std::string m_description;
lldb::OptionValueSP m_value_sp;
bool m_is_global;
};
diff --git a/lldb/include/lldb/Interpreter/ScriptInterpreter.h b/lldb/include/lldb/Interpreter/ScriptInterpreter.h
index 80a054b32ce6..2b96021fffc9 100644
--- a/lldb/include/lldb/Interpreter/ScriptInterpreter.h
+++ b/lldb/include/lldb/Interpreter/ScriptInterpreter.h
@@ -11,6 +11,7 @@
#include "lldb/API/SBData.h"
#include "lldb/API/SBError.h"
+#include "lldb/API/SBMemoryRegionInfo.h"
#include "lldb/Breakpoint/BreakpointOptions.h"
#include "lldb/Core/Communication.h"
#include "lldb/Core/PluginInterface.h"
@@ -147,6 +148,8 @@ public:
lldb::ScriptedProcessInterfaceUP scripted_process_interface_up =
std::make_unique<ScriptedProcessInterface>());
+ virtual StructuredData::DictionarySP GetInterpreterInfo();
+
~ScriptInterpreter() override = default;
virtual bool Interrupt() { return false; }
@@ -564,6 +567,9 @@ public:
Status GetStatusFromSBError(const lldb::SBError &error) const;
+ llvm::Optional<MemoryRegionInfo> GetOpaqueTypeFromSBMemoryRegionInfo(
+ const lldb::SBMemoryRegionInfo &mem_region) const;
+
protected:
Debugger &m_debugger;
lldb::ScriptLanguage m_script_lang;
diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
new file mode 100644
index 000000000000..427fa3f4f793
--- /dev/null
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -0,0 +1,74 @@
+//===-- ScriptedInterface.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_INTERPRETER_SCRIPTEDINTERFACE_H
+#define LLDB_INTERPRETER_SCRIPTEDINTERFACE_H
+
+#include "lldb/Core/StructuredDataImpl.h"
+#include "lldb/Target/ExecutionContext.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
+#include "lldb/lldb-private.h"
+
+#include "llvm/Support/Compiler.h"
+
+#include <string>
+
+namespace lldb_private {
+class ScriptedInterface {
+public:
+ ScriptedInterface() = default;
+ virtual ~ScriptedInterface() = default;
+
+ virtual StructuredData::GenericSP
+ CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
+ StructuredData::DictionarySP args_sp) = 0;
+
+ template <typename Ret>
+ Ret ErrorWithMessage(llvm::StringRef caller_name, llvm::StringRef error_msg,
+ Status &error,
+ uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
+ LLDB_LOGF(GetLogIfAllCategoriesSet(log_caterogy), "%s ERROR = %s",
+ caller_name.data(), error_msg.data());
+ error.SetErrorString(llvm::Twine(caller_name + llvm::Twine(" ERROR = ") +
+ llvm::Twine(error_msg))
+ .str());
+ return {};
+ }
+
+ template <typename T = StructuredData::ObjectSP>
+ bool CheckStructuredDataObject(llvm::StringRef caller, T obj, Status &error) {
+ if (!obj) {
+ return ErrorWithMessage<bool>(caller,
+ llvm::Twine("Null StructuredData object (" +
+ llvm::Twine(error.AsCString()) +
+ llvm::Twine(")."))
+ .str(),
+ error);
+ }
+
+ if (!obj->IsValid()) {
+ return ErrorWithMessage<bool>(
+ caller,
+ llvm::Twine("Invalid StructuredData object (" +
+ llvm::Twine(error.AsCString()) + llvm::Twine(")."))
+ .str(),
+ error);
+ }
+
+ if (error.Fail())
+ return ErrorWithMessage<bool>(caller, error.AsCString(), error);
+
+ return true;
+ }
+
+protected:
+ StructuredData::GenericSP m_object_instance_sp;
+};
+} // namespace lldb_private
+#endif // LLDB_INTERPRETER_SCRIPTEDINTERFACE_H
diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index 223e89be87ee..26fd956f96bb 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -11,20 +11,19 @@
#include "lldb/Core/StructuredDataImpl.h"
#include "lldb/Interpreter/ScriptInterpreter.h"
+#include "lldb/Interpreter/ScriptedInterface.h"
+#include "lldb/Target/MemoryRegionInfo.h"
+
#include "lldb/lldb-private.h"
#include <string>
namespace lldb_private {
-class ScriptedProcessInterface {
+class ScriptedProcessInterface : virtual public ScriptedInterface {
public:
- ScriptedProcessInterface() : m_object_instance_sp(nullptr) {}
-
- virtual ~ScriptedProcessInterface() = default;
-
- virtual StructuredData::GenericSP
- CreatePluginObject(const llvm::StringRef class_name, lldb::TargetSP target_sp,
- StructuredData::DictionarySP args_sp) {
+ StructuredData::GenericSP
+ CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
+ StructuredData::DictionarySP args_sp) override {
return nullptr;
}
@@ -36,9 +35,10 @@ public:
virtual Status Stop() { return Status("ScriptedProcess did not stop"); }
- virtual lldb::MemoryRegionInfoSP
- GetMemoryRegionContainingAddress(lldb::addr_t address) {
- return nullptr;
+ virtual llvm::Optional<MemoryRegionInfo>
+ GetMemoryRegionContainingAddress(lldb::addr_t address, Status &error) {
+ error.SetErrorString("ScriptedProcess have no memory region.");
+ return {};
}
virtual StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) {
@@ -60,8 +60,44 @@ public:
virtual bool IsAlive() { return true; }
-private:
- StructuredData::ObjectSP m_object_instance_sp;
+ virtual llvm::Optional<std::string> GetScriptedThreadPluginName() {
+ return llvm::None;
+ }
+
+protected:
+ friend class ScriptedThread;
+ virtual lldb::ScriptedThreadInterfaceSP GetScriptedThreadInterface() {
+ return nullptr;
+ }
+
+ lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
+};
+
+class ScriptedThreadInterface : virtual public ScriptedInterface {
+public:
+ StructuredData::GenericSP
+ CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
+ StructuredData::DictionarySP args_sp) override {
+ return nullptr;
+ }
+
+ virtual lldb::tid_t GetThreadID() { return LLDB_INVALID_THREAD_ID; }
+
+ virtual llvm::Optional<std::string> GetName() { return llvm::None; }
+
+ virtual lldb::StateType GetState() { return lldb::eStateInvalid; }
+
+ virtual llvm::Optional<std::string> GetQueue() { return llvm::None; }
+
+ virtual StructuredData::DictionarySP GetStopReason() { return nullptr; }
+
+ virtual StructuredData::ArraySP GetStackFrames() { return nullptr; }
+
+ virtual StructuredData::DictionarySP GetRegisterInfo() { return nullptr; }
+
+ virtual llvm::Optional<std::string> GetRegisterContext() {
+ return llvm::None;
+ }
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Symbol/Block.h b/lldb/include/lldb/Symbol/Block.h
index de94556d3f22..02fd2add5310 100644
--- a/lldb/include/lldb/Symbol/Block.h
+++ b/lldb/include/lldb/Symbol/Block.h
@@ -338,6 +338,8 @@ public:
Block *FindBlockByID(lldb::user_id_t block_id);
+ Block *FindInnermostBlockByOffset(const lldb::addr_t offset);
+
size_t GetNumRanges() const { return m_ranges.GetSize(); }
bool GetRangeContainingOffset(const lldb::addr_t offset, Range &range);
diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h
index 2e52bca7097c..34e34e5514df 100644
--- a/lldb/include/lldb/Symbol/CompileUnit.h
+++ b/lldb/include/lldb/Symbol/CompileUnit.h
@@ -332,6 +332,7 @@ public:
void SetLineTable(LineTable *line_table);
void SetSupportFiles(const FileSpecList &support_files);
+ void SetSupportFiles(FileSpecList &&support_files);
void SetDebugMacros(const DebugMacrosSP &debug_macros);
@@ -442,6 +443,7 @@ private:
CompileUnit(const CompileUnit &) = delete;
const CompileUnit &operator=(const CompileUnit &) = delete;
+ const char *GetCachedLanguage() const;
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h
index ffdbdc6853f7..9ab63cac56dd 100644
--- a/lldb/include/lldb/Symbol/SymbolFile.h
+++ b/lldb/include/lldb/Symbol/SymbolFile.h
@@ -19,6 +19,7 @@
#include "lldb/Symbol/Type.h"
#include "lldb/Symbol/TypeList.h"
#include "lldb/Symbol/TypeSystem.h"
+#include "lldb/Target/Statistics.h"
#include "lldb/Utility/XcodeSDK.h"
#include "lldb/lldb-private.h"
#include "llvm/ADT/DenseSet.h"
@@ -299,6 +300,38 @@ public:
virtual void Dump(Stream &s);
+ /// Metrics gathering functions
+
+ /// Return the size in bytes of all debug information in the symbol file.
+ ///
+ /// If the debug information is contained in sections of an ObjectFile, then
+ /// this call should add the size of all sections that contain debug
+ /// information. Symbols the symbol tables are not considered debug
+ /// information for this call to make it easy and quick for this number to be
+ /// calculated. If the symbol file is all debug information, the size of the
+ /// entire file should be returned. The default implementation of this
+ /// function will iterate over all sections in a module and add up their
+ /// debug info only section byte sizes.
+ virtual uint64_t GetDebugInfoSize();
+
+ /// Return the time taken to parse the debug information.
+ ///
+ /// \returns 0.0 if no information has been parsed or if there is
+ /// no computational cost to parsing the debug information.
+ virtual StatsDuration GetDebugInfoParseTime() {
+ return StatsDuration(0.0);
+ }
+
+ /// Return the time it took to index the debug information in the object
+ /// file.
+ ///
+ /// \returns 0.0 if the file doesn't need to be indexed or if it
+ /// hasn't been indexed yet, or a valid duration if it has.
+ virtual StatsDuration GetDebugInfoIndexTime() {
+ return StatsDuration(0.0);
+ }
+
+
protected:
void AssertModuleLock();
virtual uint32_t CalculateNumCompileUnits() = 0;
diff --git a/lldb/include/lldb/Symbol/SymbolVendor.h b/lldb/include/lldb/Symbol/SymbolVendor.h
index 5c785e8c5a85..b060ac95b75a 100644
--- a/lldb/include/lldb/Symbol/SymbolVendor.h
+++ b/lldb/include/lldb/Symbol/SymbolVendor.h
@@ -41,9 +41,7 @@ public:
SymbolFile *GetSymbolFile() { return m_sym_file_up.get(); }
// PluginInterface protocol
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return "vendor-default"; }
protected:
std::unique_ptr<SymbolFile> m_sym_file_up; // A single symbol file. Subclasses
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index a37c1040b16e..be5783596897 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -91,7 +91,6 @@ public:
virtual SymbolFile *GetSymbolFile() const { return m_sym_file; }
- // Returns true if the symbol file changed during the set accessor.
virtual void SetSymbolFile(SymbolFile *sym_file) { m_sym_file = sym_file; }
// CompilerDecl functions
@@ -392,6 +391,12 @@ public:
lldb::opaque_compiler_type_t type, Stream *s,
lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;
+ /// Dump a textual representation of the internal TypeSystem state to the
+ /// given stream.
+ ///
+ /// This should not modify the state of the TypeSystem if possible.
+ virtual void Dump(llvm::raw_ostream &output) = 0;
+
// TODO: These methods appear unused. Should they be removed?
virtual bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) = 0;
diff --git a/lldb/include/lldb/Target/ABI.h b/lldb/include/lldb/Target/ABI.h
index 8fbb6aae68c4..8ac6003554d5 100644
--- a/lldb/include/lldb/Target/ABI.h
+++ b/lldb/include/lldb/Target/ABI.h
@@ -11,6 +11,7 @@
#include "lldb/Core/PluginInterface.h"
#include "lldb/Symbol/UnwindPlan.h"
+#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Utility/Status.h"
#include "lldb/lldb-private.h"
@@ -127,7 +128,8 @@ public:
llvm::MCRegisterInfo &GetMCRegisterInfo() { return *m_mc_register_info_up; }
- virtual void AugmentRegisterInfo(RegisterInfo &info) = 0;
+ virtual void
+ AugmentRegisterInfo(std::vector<DynamicRegisterInfo::Register> &regs) = 0;
virtual bool GetPointerReturnRegister(const char *&name) { return false; }
@@ -159,7 +161,8 @@ private:
class RegInfoBasedABI : public ABI {
public:
- void AugmentRegisterInfo(RegisterInfo &info) override;
+ void AugmentRegisterInfo(
+ std::vector<DynamicRegisterInfo::Register> &regs) override;
protected:
using ABI::ABI;
@@ -171,12 +174,14 @@ protected:
class MCBasedABI : public ABI {
public:
- void AugmentRegisterInfo(RegisterInfo &info) override;
+ void AugmentRegisterInfo(
+ std::vector<DynamicRegisterInfo::Register> &regs) override;
/// If the register name is of the form "<from_prefix>[<number>]" then change
/// the name to "<to_prefix>[<number>]". Otherwise, leave the name unchanged.
static void MapRegisterName(std::string &reg, llvm::StringRef from_prefix,
- llvm::StringRef to_prefix);
+ llvm::StringRef to_prefix);
+
protected:
using ABI::ABI;
diff --git a/lldb/include/lldb/Target/AppleArm64ExceptionClass.def b/lldb/include/lldb/Target/AppleArm64ExceptionClass.def
new file mode 100644
index 000000000000..9a938c3b302c
--- /dev/null
+++ b/lldb/include/lldb/Target/AppleArm64ExceptionClass.def
@@ -0,0 +1,50 @@
+/*===-- AppleArm64ExceptionClass.def ---------------------------*- C++ -*-=== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+
+// Defines ESR exception classes for Apple arm64* targets.
+// These largely map 1:1 to the exception classes defined in ARM's architecture
+// reference manual, but there are some Apple-specific additions.
+
+#ifndef APPLE_ARM64_EXCEPTION_CLASS
+#error "APPLE_ARM64_EXCEPTION_CLASS(Name, Code) not defined."
+#endif
+
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_UNCATEGORIZED, 0x00)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_WFI_WFE, 0x01)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_MCR_MRC_CP15_TRAP, 0x03)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_MCRR_MRRC_CP15_TRAP, 0x04)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_MCR_MRC_CP14_TRAP, 0x05)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_LDC_STC_CP14_TRAP, 0x06)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_TRAP_SIMD_FP, 0x07)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_PTRAUTH_INSTR_TRAP, 0x09)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_MCRR_MRRC_CP14_TRAP, 0x0c)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_ILLEGAL_INSTR_SET, 0x0e)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_SVC_32, 0x11)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_SVC_64, 0x15)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_MSR_TRAP, 0x18)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_PAC_FAIL, 0x1C)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_IABORT_EL0, 0x20)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_IABORT_EL1, 0x21)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_PC_ALIGN, 0x22)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_DABORT_EL0, 0x24)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_DABORT_EL1, 0x25)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_SP_ALIGN, 0x26)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_FLOATING_POINT_32, 0x28)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_FLOATING_POINT_64, 0x2C)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_SERROR_INTERRUPT, 0x2F)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_BKPT_REG_MATCH_EL0, 0x30)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_BKPT_REG_MATCH_EL1, 0x31)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_SW_STEP_DEBUG_EL0, 0x32)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_SW_STEP_DEBUG_EL1, 0x33)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_WATCHPT_MATCH_EL0, 0x34)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_WATCHPT_MATCH_EL1, 0x35)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_BKPT_AARCH32, 0x38)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_BRK_AARCH64, 0x3C)
+APPLE_ARM64_EXCEPTION_CLASS(ESR_EC_PRIV, 0x3F)
+
+#undef APPLE_ARM64_EXCEPTION_CLASS
diff --git a/lldb/include/lldb/Target/AppleArm64ExceptionClass.h b/lldb/include/lldb/Target/AppleArm64ExceptionClass.h
new file mode 100644
index 000000000000..95f58ee081ab
--- /dev/null
+++ b/lldb/include/lldb/Target/AppleArm64ExceptionClass.h
@@ -0,0 +1,50 @@
+//===-- AppleArm64ExceptionClass.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_APPLEARM64EXCEPTIONCLASS_H
+#define LLDB_TARGET_APPLEARM64EXCEPTIONCLASS_H
+
+#include <cstdint>
+
+namespace lldb_private {
+
+enum class AppleArm64ExceptionClass : unsigned {
+#define APPLE_ARM64_EXCEPTION_CLASS(Name, Code) Name = Code,
+#include "AppleArm64ExceptionClass.def"
+};
+
+/// Get the Apple ARM64 exception class encoded within \p esr.
+inline AppleArm64ExceptionClass getAppleArm64ExceptionClass(uint32_t esr) {
+ /*
+ * Exception Syndrome Register
+ *
+ * 31 26 25 24 0
+ * +------+--+------------------+
+ * | EC |IL| ISS |
+ * +------+--+------------------+
+ *
+ * EC - Exception Class
+ * IL - Instruction Length
+ * ISS - Instruction Specific Syndrome
+ */
+ return static_cast<AppleArm64ExceptionClass>(esr >> 26);
+}
+
+inline const char *toString(AppleArm64ExceptionClass EC) {
+ switch (EC) {
+#define APPLE_ARM64_EXCEPTION_CLASS(Name, Code) \
+ case AppleArm64ExceptionClass::Name: \
+ return #Name;
+#include "AppleArm64ExceptionClass.def"
+ }
+ return "Unknown Exception Class";
+}
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_APPLEARM64EXCEPTIONCLASS_H
diff --git a/lldb/include/lldb/Target/DynamicLoader.h b/lldb/include/lldb/Target/DynamicLoader.h
index a904fac0c779..84ad0f11fabb 100644
--- a/lldb/include/lldb/Target/DynamicLoader.h
+++ b/lldb/include/lldb/Target/DynamicLoader.h
@@ -62,8 +62,9 @@ public:
///
/// \param[in] plugin_name
/// An optional name of a specific dynamic loader plug-in that
- /// should be used. If NULL, pick the best plug-in.
- static DynamicLoader *FindPlugin(Process *process, const char *plugin_name);
+ /// should be used. If empty, pick the best plug-in.
+ static DynamicLoader *FindPlugin(Process *process,
+ llvm::StringRef plugin_name);
/// Construct with a process.
DynamicLoader(Process *process);
diff --git a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h b/lldb/include/lldb/Target/DynamicRegisterInfo.h
index 7e90454c6d9d..20f442529da8 100644
--- a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h
+++ b/lldb/include/lldb/Target/DynamicRegisterInfo.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_DYNAMICREGISTERINFO_H
-#define LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_DYNAMICREGISTERINFO_H
+#ifndef LLDB_TARGET_DYNAMICREGISTERINFO_H
+#define LLDB_TARGET_DYNAMICREGISTERINFO_H
#include <map>
#include <vector>
@@ -16,12 +16,31 @@
#include "lldb/Utility/StructuredData.h"
#include "lldb/lldb-private.h"
+namespace lldb_private {
+
class DynamicRegisterInfo {
protected:
DynamicRegisterInfo(DynamicRegisterInfo &) = default;
DynamicRegisterInfo &operator=(DynamicRegisterInfo &) = default;
public:
+ struct Register {
+ ConstString name;
+ ConstString alt_name;
+ ConstString set_name;
+ uint32_t byte_size = LLDB_INVALID_INDEX32;
+ uint32_t byte_offset = LLDB_INVALID_INDEX32;
+ lldb::Encoding encoding = lldb::eEncodingUint;
+ lldb::Format format = lldb::eFormatHex;
+ uint32_t regnum_dwarf = LLDB_INVALID_REGNUM;
+ uint32_t regnum_ehframe = LLDB_INVALID_REGNUM;
+ uint32_t regnum_generic = LLDB_INVALID_REGNUM;
+ uint32_t regnum_remote = LLDB_INVALID_REGNUM;
+ std::vector<uint32_t> value_regs;
+ std::vector<uint32_t> invalidate_regs;
+ uint32_t value_reg_offset = 0;
+ };
+
DynamicRegisterInfo() = default;
DynamicRegisterInfo(const lldb_private::StructuredData::Dictionary &dict,
@@ -35,12 +54,8 @@ public:
size_t SetRegisterInfo(const lldb_private::StructuredData::Dictionary &dict,
const lldb_private::ArchSpec &arch);
- void AddRegister(lldb_private::RegisterInfo &reg_info,
- lldb_private::ConstString &reg_name,
- lldb_private::ConstString &reg_alt_name,
- lldb_private::ConstString &set_name);
-
- void Finalize(const lldb_private::ArchSpec &arch);
+ size_t SetRegisterInfo(std::vector<Register> &&regs,
+ const lldb_private::ArchSpec &arch);
size_t GetNumRegisters() const;
@@ -50,11 +65,9 @@ public:
const lldb_private::RegisterInfo *GetRegisterInfoAtIndex(uint32_t i) const;
- lldb_private::RegisterInfo *GetRegisterInfoAtIndex(uint32_t i);
-
const lldb_private::RegisterSet *GetRegisterSet(uint32_t i) const;
- uint32_t GetRegisterSetIndexByName(lldb_private::ConstString &set_name,
+ uint32_t GetRegisterSetIndexByName(const lldb_private::ConstString &set_name,
bool can_create);
uint32_t ConvertRegisterKindToRegisterNumber(uint32_t kind,
@@ -72,19 +85,34 @@ public:
const lldb_private::RegisterInfo *
GetRegisterInfo(llvm::StringRef reg_name) const;
+ typedef std::vector<lldb_private::RegisterInfo> reg_collection;
+ llvm::iterator_range<reg_collection::const_iterator> registers() const {
+ return llvm::iterator_range<reg_collection::const_iterator>(m_regs);
+ }
+
protected:
// Classes that inherit from DynamicRegisterInfo can see and modify these
- typedef std::vector<lldb_private::RegisterInfo> reg_collection;
typedef std::vector<lldb_private::RegisterSet> set_collection;
typedef std::vector<uint32_t> reg_num_collection;
typedef std::vector<reg_num_collection> set_reg_num_collection;
typedef std::vector<lldb_private::ConstString> name_collection;
typedef std::map<uint32_t, reg_num_collection> reg_to_regs_map;
- typedef std::vector<uint8_t> dwarf_opcode;
- typedef std::map<uint32_t, dwarf_opcode> dynamic_reg_size_map;
+ typedef std::map<uint32_t, uint32_t> reg_offset_map;
+
+ llvm::Expected<uint32_t> ByteOffsetFromSlice(uint32_t index,
+ llvm::StringRef slice_str,
+ lldb::ByteOrder byte_order);
+ llvm::Expected<uint32_t> ByteOffsetFromComposite(
+ uint32_t index, lldb_private::StructuredData::Array &composite_reg_list,
+ lldb::ByteOrder byte_order);
+ llvm::Expected<uint32_t> ByteOffsetFromRegInfoDict(
+ uint32_t index, lldb_private::StructuredData::Dictionary &reg_info_dict,
+ lldb::ByteOrder byte_order);
void MoveFrom(DynamicRegisterInfo &&info);
+ void Finalize(const lldb_private::ArchSpec &arch);
+
void ConfigureOffsets();
reg_collection m_regs;
@@ -93,10 +121,16 @@ protected:
name_collection m_set_names;
reg_to_regs_map m_value_regs_map;
reg_to_regs_map m_invalidate_regs_map;
- dynamic_reg_size_map m_dynamic_reg_size_map;
+ reg_offset_map m_value_reg_offset_map;
size_t m_reg_data_byte_size = 0u; // The number of bytes required to store
// all registers
bool m_finalized = false;
bool m_is_reconfigurable = false;
};
-#endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_DYNAMICREGISTERINFO_H
+
+void addSupplementaryRegister(std::vector<DynamicRegisterInfo::Register> &regs,
+ DynamicRegisterInfo::Register new_reg_info);
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_DYNAMICREGISTERINFO_H
diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h
index 11b9daa38945..0b0891c14029 100644
--- a/lldb/include/lldb/Target/Language.h
+++ b/lldb/include/lldb/Target/Language.h
@@ -243,6 +243,14 @@ public:
FunctionNameRepresentation representation,
Stream &s);
+ virtual ConstString
+ GetDemangledFunctionNameWithoutArguments(Mangled mangled) const {
+ if (ConstString demangled = mangled.GetDemangledName())
+ return demangled;
+
+ return mangled.GetMangledName();
+ }
+
virtual void GetExceptionResolverDescription(bool catch_on, bool throw_on,
Stream &s);
@@ -285,6 +293,19 @@ public:
static LanguageSet GetLanguagesSupportingTypeSystemsForExpressions();
static LanguageSet GetLanguagesSupportingREPLs();
+ // Given a mangled function name, calculates some alternative manglings since
+ // the compiler mangling may not line up with the symbol we are expecting.
+ virtual std::vector<ConstString>
+ GenerateAlternateFunctionManglings(const ConstString mangled) const {
+ return std::vector<ConstString>();
+ }
+
+ virtual ConstString
+ FindBestAlternateFunctionMangledName(const Mangled mangled,
+ const SymbolContext &sym_ctx) const {
+ return ConstString();
+ }
+
protected:
// Classes that inherit from Language can see and modify these
diff --git a/lldb/include/lldb/Target/MemoryRegionInfo.h b/lldb/include/lldb/Target/MemoryRegionInfo.h
index c43f27e0c366..fc5fcff5159e 100644
--- a/lldb/include/lldb/Target/MemoryRegionInfo.h
+++ b/lldb/include/lldb/Target/MemoryRegionInfo.h
@@ -28,10 +28,10 @@ public:
MemoryRegionInfo(RangeType range, OptionalBool read, OptionalBool write,
OptionalBool execute, OptionalBool mapped, ConstString name,
OptionalBool flash, lldb::offset_t blocksize,
- OptionalBool memory_tagged)
+ OptionalBool memory_tagged, OptionalBool stack_memory)
: m_range(range), m_read(read), m_write(write), m_execute(execute),
m_mapped(mapped), m_name(name), m_flash(flash), m_blocksize(blocksize),
- m_memory_tagged(memory_tagged) {}
+ m_memory_tagged(memory_tagged), m_is_stack_memory(stack_memory) {}
RangeType &GetRange() { return m_range; }
@@ -98,7 +98,8 @@ public:
m_mapped == rhs.m_mapped && m_name == rhs.m_name &&
m_flash == rhs.m_flash && m_blocksize == rhs.m_blocksize &&
m_memory_tagged == rhs.m_memory_tagged &&
- m_pagesize == rhs.m_pagesize;
+ m_pagesize == rhs.m_pagesize &&
+ m_is_stack_memory == rhs.m_is_stack_memory;
}
bool operator!=(const MemoryRegionInfo &rhs) const { return !(*this == rhs); }
@@ -116,6 +117,10 @@ public:
return m_dirty_pages;
}
+ OptionalBool IsStackMemory() const { return m_is_stack_memory; }
+
+ void SetIsStackMemory(OptionalBool val) { m_is_stack_memory = val; }
+
void SetPageSize(int pagesize) { m_pagesize = pagesize; }
void SetDirtyPageList(std::vector<lldb::addr_t> pagelist) {
@@ -134,6 +139,7 @@ protected:
OptionalBool m_flash = eDontKnow;
lldb::offset_t m_blocksize = 0;
OptionalBool m_memory_tagged = eDontKnow;
+ OptionalBool m_is_stack_memory = eDontKnow;
int m_pagesize = 0;
llvm::Optional<std::vector<lldb::addr_t>> m_dirty_pages;
};
diff --git a/lldb/include/lldb/Target/PathMappingList.h b/lldb/include/lldb/Target/PathMappingList.h
index d788d120c47e..f1cc779ea50f 100644
--- a/lldb/include/lldb/Target/PathMappingList.h
+++ b/lldb/include/lldb/Target/PathMappingList.h
@@ -32,8 +32,7 @@ public:
const PathMappingList &operator=(const PathMappingList &rhs);
- void Append(ConstString path, ConstString replacement,
- bool notify);
+ void Append(llvm::StringRef path, llvm::StringRef replacement, bool notify);
void Append(const PathMappingList &rhs, bool notify);
@@ -49,17 +48,16 @@ public:
bool GetPathsAtIndex(uint32_t idx, ConstString &path,
ConstString &new_path) const;
- void Insert(ConstString path, ConstString replacement,
+ void Insert(llvm::StringRef path, llvm::StringRef replacement,
uint32_t insert_idx, bool notify);
bool Remove(size_t index, bool notify);
bool Remove(ConstString path, bool notify);
- bool Replace(ConstString path, ConstString replacement,
- bool notify);
+ bool Replace(llvm::StringRef path, llvm::StringRef replacement, bool notify);
- bool Replace(ConstString path, ConstString replacement,
+ bool Replace(llvm::StringRef path, llvm::StringRef replacement,
uint32_t index, bool notify);
bool RemapPath(ConstString path, ConstString &new_path) const;
@@ -104,7 +102,7 @@ public:
/// The newly remapped filespec that is guaranteed to exist.
llvm::Optional<FileSpec> FindFile(const FileSpec &orig_spec) const;
- uint32_t FindIndexForPath(ConstString path) const;
+ uint32_t FindIndexForPath(llvm::StringRef path) const;
uint32_t GetModificationID() const { return m_mod_id; }
diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h
index df46466655c3..956b29e45dba 100644
--- a/lldb/include/lldb/Target/Platform.h
+++ b/lldb/include/lldb/Target/Platform.h
@@ -55,7 +55,6 @@ private:
void SetDefaultModuleCacheDirectory(const FileSpec &dir_spec);
};
-typedef std::shared_ptr<PlatformProperties> PlatformPropertiesSP;
typedef llvm::SmallVector<lldb::addr_t, 6> MmapArgList;
/// \class Platform Platform.h "lldb/Target/Platform.h"
@@ -74,8 +73,6 @@ public:
/// Default Constructor
Platform(bool is_host_platform);
- /// Destructor.
- ///
/// The destructor is virtual since this class is designed to be inherited
/// from by the plug-in instance.
~Platform() override;
@@ -84,7 +81,7 @@ public:
static void Terminate();
- static const PlatformPropertiesSP &GetGlobalPlatformProperties();
+ static PlatformProperties &GetGlobalPlatformProperties();
/// Get the native host platform plug-in.
///
@@ -215,9 +212,9 @@ public:
bool SetOSVersion(llvm::VersionTuple os_version);
- bool GetOSBuildString(std::string &s);
+ llvm::Optional<std::string> GetOSBuildString();
- bool GetOSKernelDescription(std::string &s);
+ llvm::Optional<std::string> GetOSKernelDescription();
// Returns the name of the platform
ConstString GetName();
@@ -226,7 +223,7 @@ public:
virtual ConstString GetFullNameForDylib(ConstString basename);
- virtual const char *GetDescription() = 0;
+ virtual llvm::StringRef GetDescription() = 0;
/// Report the current status for this platform.
///
@@ -243,14 +240,12 @@ public:
// HostInfo::GetOSVersion().
virtual bool GetRemoteOSVersion() { return false; }
- virtual bool GetRemoteOSBuildString(std::string &s) {
- s.clear();
- return false;
+ virtual llvm::Optional<std::string> GetRemoteOSBuildString() {
+ return llvm::None;
}
- virtual bool GetRemoteOSKernelDescription(std::string &s) {
- s.clear();
- return false;
+ virtual llvm::Optional<std::string> GetRemoteOSKernelDescription() {
+ return llvm::None;
}
// Remote Platform subclasses need to override this function
@@ -327,7 +322,13 @@ public:
/// \b true if \a arch was filled in and is valid, \b false
/// otherwise.
virtual bool GetSupportedArchitectureAtIndex(uint32_t idx,
- ArchSpec &arch) = 0;
+ ArchSpec &arch);
+
+ /// Get the platform's supported architectures in the order in which they
+ /// should be searched.
+ /// NB: This implementation is mutually recursive with
+ /// GetSupportedArchitectureAtIndex. Subclasses should implement one of them.
+ virtual std::vector<ArchSpec> GetSupportedArchitectures();
virtual size_t GetSoftwareBreakpointTrapOpcode(Target &target,
BreakpointSite *bp_site);
@@ -363,11 +364,9 @@ public:
/// platforms will want to subclass this function in order to be able to
/// intercept STDIO and possibly launch a separate process that will debug
/// the debuggee.
- virtual lldb::ProcessSP
- DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
- Target *target, // Can be nullptr, if nullptr create a new
- // target, else use existing one
- Status &error);
+ virtual lldb::ProcessSP DebugProcess(ProcessLaunchInfo &launch_info,
+ Debugger &debugger, Target &target,
+ Status &error);
virtual lldb::ProcessSP ConnectProcess(llvm::StringRef connect_url,
llvm::StringRef plugin_name,
@@ -726,6 +725,24 @@ public:
/// A list of symbol names. The list may be empty.
virtual const std::vector<ConstString> &GetTrapHandlerSymbolNames();
+ /// Try to get a specific unwind plan for a named trap handler.
+ /// The default is not to have specific unwind plans for trap handlers.
+ ///
+ /// \param[in] triple
+ /// Triple of the current target.
+ ///
+ /// \param[in] name
+ /// Name of the trap handler function.
+ ///
+ /// \return
+ /// A specific unwind plan for that trap handler, or an empty
+ /// shared pointer. The latter means there is no specific plan,
+ /// unwind as normal.
+ virtual lldb::UnwindPlanSP
+ GetTrapHandlerUnwindPlan(const llvm::Triple &triple, ConstString name) {
+ return {};
+ }
+
/// Find a support executable that may not live within in the standard
/// locations related to LLDB.
///
@@ -865,6 +882,12 @@ public:
}
protected:
+ /// Create a list of ArchSpecs with the given OS and a architectures. The
+ /// vendor field is left as an "unspecified unknown".
+ static std::vector<ArchSpec>
+ CreateArchList(llvm::ArrayRef<llvm::Triple::ArchType> archs,
+ llvm::Triple::OSType os);
+
/// Private implementation of connecting to a process. If the stream is set
/// we connect synchronously.
lldb::ProcessSP DoConnectProcess(llvm::StringRef connect_url,
@@ -920,8 +943,7 @@ protected:
virtual void CalculateTrapHandlerSymbolNames() = 0;
Status GetCachedExecutable(ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
- Platform &remote_platform);
+ const FileSpecList *module_search_paths_ptr);
virtual Status DownloadModuleSlice(const FileSpec &src_file_spec,
const uint64_t src_offset,
@@ -933,6 +955,11 @@ protected:
virtual const char *GetCacheHostname();
+ virtual Status
+ ResolveRemoteExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &exe_module_sp,
+ const FileSpecList *module_search_paths_ptr);
+
private:
typedef std::function<Status(const ModuleSpec &)> ModuleResolver;
@@ -946,8 +973,7 @@ private:
Status LoadCachedExecutable(const ModuleSpec &module_spec,
lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr,
- Platform &remote_platform);
+ const FileSpecList *module_search_paths_ptr);
FileSpec GetModuleCacheRoot();
};
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index aaa2470d2931..4627502abd25 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -99,14 +99,13 @@ public:
bool GetOSPluginReportsAllThreads() const;
void SetOSPluginReportsAllThreads(bool does_report);
bool GetSteppingRunsAllThreads() const;
+ FollowForkMode GetFollowForkMode() const;
protected:
Process *m_process; // Can be nullptr for global ProcessProperties
std::unique_ptr<ProcessExperimentalProperties> m_experimental_properties_up;
};
-typedef std::shared_ptr<ProcessProperties> ProcessPropertiesSP;
-
// ProcessAttachInfo
//
// Describes any information that is required to attach to a process.
@@ -239,10 +238,11 @@ public:
~ProcessModID() = default;
- void BumpStopID() {
- m_stop_id++;
+ uint32_t BumpStopID() {
+ const uint32_t prev_stop_id = m_stop_id++;
if (!IsLastResumeForUserExpression())
m_last_natural_stop_id++;
+ return prev_stop_id;
}
void BumpMemoryID() { m_memory_id++; }
@@ -500,7 +500,7 @@ public:
static void SettingsTerminate();
- static const ProcessPropertiesSP &GetGlobalProperties();
+ static ProcessProperties &GetGlobalProperties();
/// Find a Process plug-in that can debug \a module using the currently
/// selected architecture.
@@ -536,13 +536,13 @@ public:
uint32_t GetAddressByteSize() const;
+ /// Returns the pid of the process or LLDB_INVALID_PROCESS_ID if there is
+ /// no known pid.
+ lldb::pid_t GetID() const { return m_pid; }
+
/// Sets the stored pid.
///
/// This does not change the pid of underlying process.
- lldb::pid_t GetID() const { return m_pid; }
-
- /// Returns the pid of the process or LLDB_INVALID_PROCESS_ID if there is
- /// no known pid.
void SetID(lldb::pid_t new_pid) { m_pid = new_pid; }
uint32_t GetUniqueID() const { return m_process_unique_id; }
@@ -611,9 +611,8 @@ public:
virtual Status DoLoadCore() {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support loading core files.",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support loading core files.", GetPluginName());
return error;
}
@@ -686,6 +685,16 @@ public:
"Not implemented");
}
+ /// Save core dump into the specified file.
+ ///
+ /// \param[in] outfile
+ /// Path to store core dump in.
+ ///
+ /// \return
+ /// true if saved successfully, false if saving the core dump
+ /// is not supported by the plugin, error otherwise.
+ virtual llvm::Expected<bool> SaveCore(llvm::StringRef outfile);
+
protected:
virtual JITLoaderList &GetJITLoaders();
@@ -932,9 +941,9 @@ public:
virtual Status DoAttachToProcessWithID(lldb::pid_t pid,
const ProcessAttachInfo &attach_info) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support attaching to a process by pid",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support attaching to a process by pid",
+ GetPluginName());
return error;
}
@@ -981,6 +990,15 @@ public:
/// anything after a process exec's itself.
virtual void DoDidExec() {}
+ /// Called after a reported fork.
+ virtual void DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {}
+
+ /// Called after a reported vfork.
+ virtual void DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {}
+
+ /// Called after reported vfork completion.
+ virtual void DidVForkDone() {}
+
/// Called before launching to a process.
///
/// Allow Process plug-ins to execute some code before launching a process.
@@ -1008,9 +1026,8 @@ public:
/// operation.
virtual Status DoLaunch(Module *exe_module, ProcessLaunchInfo &launch_info) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support launching processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support launching processes", GetPluginName());
return error;
}
@@ -1044,9 +1061,8 @@ public:
/// \see Thread:Suspend()
virtual Status DoResume() {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support resuming processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support resuming processes", GetPluginName());
return error;
}
@@ -1080,9 +1096,8 @@ public:
/// otherwise.
virtual Status DoHalt(bool &caused_stop) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support halting processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support halting processes", GetPluginName());
return error;
}
@@ -1107,9 +1122,9 @@ public:
/// false otherwise.
virtual Status DoDetach(bool keep_stopped) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support detaching from processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support detaching from processes",
+ GetPluginName());
return error;
}
@@ -1138,9 +1153,9 @@ public:
/// Returns an error object.
virtual Status DoSignal(int signal) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support sending signals to processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support sending signals to processes",
+ GetPluginName());
return error;
}
@@ -1467,36 +1482,6 @@ public:
size_t ReadMemoryFromInferior(lldb::addr_t vm_addr, void *buf, size_t size,
Status &error);
- /// Read a NULL terminated string from memory
- ///
- /// This function will read a cache page at a time until a NULL string
- /// terminator is found. It will stop reading if an aligned sequence of NULL
- /// termination \a type_width bytes is not found before reading \a
- /// cstr_max_len bytes. The results are always guaranteed to be NULL
- /// terminated, and that no more than (max_bytes - type_width) bytes will be
- /// read.
- ///
- /// \param[in] vm_addr
- /// The virtual load address to start the memory read.
- ///
- /// \param[in] str
- /// A character buffer containing at least max_bytes.
- ///
- /// \param[in] max_bytes
- /// The maximum number of bytes to read.
- ///
- /// \param[in] error
- /// The error status of the read operation.
- ///
- /// \param[in] type_width
- /// The size of the null terminator (1 to 4 bytes per
- /// character). Defaults to 1.
- ///
- /// \return
- /// The error status or the number of bytes prior to the null terminator.
- size_t ReadStringFromMemory(lldb::addr_t vm_addr, char *str, size_t max_bytes,
- Status &error, size_t type_width = 1);
-
/// Read a NULL terminated C string from memory
///
/// This function will read a cache page at a time until the NULL
@@ -1567,9 +1552,8 @@ public:
/// The number of bytes that were actually written.
virtual size_t DoWriteMemory(lldb::addr_t vm_addr, const void *buf,
size_t size, Status &error) {
- error.SetErrorStringWithFormat(
- "error: %s does not support writing to processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support writing to processes", GetPluginName());
return 0;
}
@@ -1651,9 +1635,9 @@ public:
virtual lldb::addr_t DoAllocateMemory(size_t size, uint32_t permissions,
Status &error) {
- error.SetErrorStringWithFormat(
- "error: %s does not support allocating in the debug process",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support allocating in the debug process",
+ GetPluginName());
return LLDB_INVALID_ADDRESS;
}
@@ -1778,7 +1762,7 @@ public:
///
/// If load_addr is within the address space the process has mapped
/// range_info will be filled in with the start and end of that range as
- /// well as the permissions for that range and range_info.GetMapped will
+ /// well as the permissions for that range and range_info. GetMapped will
/// return true.
///
/// If load_addr is outside any mapped region then range_info will have its
@@ -1787,23 +1771,21 @@ public:
/// there are no valid mapped ranges between load_addr and the end of the
/// process address space.
///
- /// GetMemoryRegionInfo will only return an error if it is unimplemented for
- /// the current process.
+ /// GetMemoryRegionInfo calls DoGetMemoryRegionInfo. Override that function in
+ /// process subclasses.
///
/// \param[in] load_addr
- /// The load address to query the range_info for.
+ /// The load address to query the range_info for. May include non
+ /// address bits, these will be removed by the the ABI plugin if there is
+ /// one.
///
/// \param[out] range_info
/// An range_info value containing the details of the range.
///
/// \return
/// An error value.
- virtual Status GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) {
- Status error;
- error.SetErrorString("Process::GetMemoryRegionInfo() not supported");
- return error;
- }
+ Status GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info);
/// Obtain all the mapped memory regions within this process.
///
@@ -1906,9 +1888,9 @@ public:
/// \btrue if the memory was deallocated, \bfalse otherwise.
virtual Status DoDeallocateMemory(lldb::addr_t ptr) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support deallocating in the debug process",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support deallocating in the debug process",
+ GetPluginName());
return error;
}
@@ -2026,17 +2008,15 @@ public:
virtual Status EnableBreakpointSite(BreakpointSite *bp_site) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support enabling breakpoints",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support enabling breakpoints", GetPluginName());
return error;
}
virtual Status DisableBreakpointSite(BreakpointSite *bp_site) {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support disabling breakpoints",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support disabling breakpoints", GetPluginName());
return error;
}
@@ -2625,6 +2605,26 @@ protected:
virtual size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
Status &error) = 0;
+ /// DoGetMemoryRegionInfo is called by GetMemoryRegionInfo after it has
+ /// removed non address bits from load_addr. Override this method in
+ /// subclasses of Process.
+ ///
+ /// See GetMemoryRegionInfo for details of the logic.
+ ///
+ /// \param[in] load_addr
+ /// The load address to query the range_info for. (non address bits
+ /// removed)
+ ///
+ /// \param[out] range_info
+ /// An range_info value containing the details of the range.
+ ///
+ /// \return
+ /// An error value.
+ virtual Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) {
+ return Status("Process::DoGetMemoryRegionInfo() not supported");
+ }
+
lldb::StateType GetPrivateState();
/// The "private" side of resuming a process. This doesn't alter the state
@@ -2795,9 +2795,10 @@ protected:
/// if the read failed.
virtual llvm::Expected<std::vector<uint8_t>>
DoReadMemoryTags(lldb::addr_t addr, size_t len, int32_t type) {
- return llvm::createStringError(llvm::inconvertibleErrorCode(),
- "%s does not support reading memory tags",
- GetPluginName().GetCString());
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ llvm::formatv("{0} does not support reading memory tags",
+ GetPluginName()));
}
/// Does the final operation to write memory tags. E.g. sending a GDB packet.
@@ -2820,8 +2821,10 @@ protected:
/// Status telling you whether the write succeeded.
virtual Status DoWriteMemoryTags(lldb::addr_t addr, size_t len, int32_t type,
const std::vector<uint8_t> &tags) {
- return Status("%s does not support writing memory tags",
- GetPluginName().GetCString());
+ Status status;
+ status.SetErrorStringWithFormatv("{0} does not support writing memory tags",
+ GetPluginName());
+ return status;
}
// Type definitions
diff --git a/lldb/include/lldb/Target/ProcessTrace.h b/lldb/include/lldb/Target/ProcessTrace.h
index 7b9d6b13dd6f..037dea232cc0 100644
--- a/lldb/include/lldb/Target/ProcessTrace.h
+++ b/lldb/include/lldb/Target/ProcessTrace.h
@@ -23,9 +23,9 @@ public:
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "trace"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
ProcessTrace(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp);
@@ -40,9 +40,7 @@ public:
SystemRuntime *GetSystemRuntime() override { return nullptr; }
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
Status DoDestroy() override;
@@ -50,9 +48,8 @@ public:
Status WillResume() override {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support resuming processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support resuming processes", GetPluginName());
return error;
}
diff --git a/lldb/include/lldb/Target/RegisterContext.h b/lldb/include/lldb/Target/RegisterContext.h
index c5068feedd5b..392b462ecf07 100644
--- a/lldb/include/lldb/Target/RegisterContext.h
+++ b/lldb/include/lldb/Target/RegisterContext.h
@@ -31,10 +31,6 @@ public:
virtual const RegisterInfo *GetRegisterInfoAtIndex(size_t reg) = 0;
- // Detect the register size dynamically.
- uint32_t UpdateDynamicRegisterSize(const lldb_private::ArchSpec &arch,
- RegisterInfo *reg_info);
-
virtual size_t GetRegisterSetCount() = 0;
virtual const RegisterSet *GetRegisterSet(size_t reg_set) = 0;
diff --git a/lldb/include/lldb/Target/RemoteAwarePlatform.h b/lldb/include/lldb/Target/RemoteAwarePlatform.h
index 269d15299889..f2a4ffae2aae 100644
--- a/lldb/include/lldb/Target/RemoteAwarePlatform.h
+++ b/lldb/include/lldb/Target/RemoteAwarePlatform.h
@@ -64,8 +64,8 @@ public:
FileSpec &local_file) override;
bool GetRemoteOSVersion() override;
- bool GetRemoteOSBuildString(std::string &s) override;
- bool GetRemoteOSKernelDescription(std::string &s) override;
+ llvm::Optional<std::string> GetRemoteOSBuildString() override;
+ llvm::Optional<std::string> GetRemoteOSKernelDescription() override;
ArchSpec GetRemoteSystemArchitecture() override;
Status RunShellCommand(llvm::StringRef command, const FileSpec &working_dir,
diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h
new file mode 100644
index 000000000000..087fbee26328
--- /dev/null
+++ b/lldb/include/lldb/Target/Statistics.h
@@ -0,0 +1,142 @@
+//===-- Statistics.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_STATISTICS_H
+#define LLDB_TARGET_STATISTICS_H
+
+#include <chrono>
+#include <string>
+#include <vector>
+
+#include "lldb/Utility/Stream.h"
+#include "lldb/lldb-forward.h"
+#include "llvm/Support/JSON.h"
+
+namespace lldb_private {
+
+using StatsClock = std::chrono::high_resolution_clock;
+using StatsDuration = std::chrono::duration<double>;
+using StatsTimepoint = std::chrono::time_point<StatsClock>;
+
+/// A class that measures elapsed time in an exception safe way.
+///
+/// This is a RAII class is designed to help gather timing statistics within
+/// LLDB where objects have optional Duration variables that get updated with
+/// elapsed times. This helps LLDB measure statistics for many things that are
+/// then reported in LLDB commands.
+///
+/// Objects that need to measure elapsed times should have a variable of type
+/// "StatsDuration m_time_xxx;" which can then be used in the constructor of
+/// this class inside a scope that wants to measure something:
+///
+/// ElapsedTime elapsed(m_time_xxx);
+/// // Do some work
+///
+/// This class will increment the m_time_xxx variable with the elapsed time
+/// when the object goes out of scope. The "m_time_xxx" variable will be
+/// incremented when the class goes out of scope. This allows a variable to
+/// measure something that might happen in stages at different times, like
+/// resolving a breakpoint each time a new shared library is loaded.
+class ElapsedTime {
+public:
+ /// Set to the start time when the object is created.
+ StatsTimepoint m_start_time;
+ /// Elapsed time in seconds to increment when this object goes out of scope.
+ StatsDuration &m_elapsed_time;
+
+public:
+ ElapsedTime(StatsDuration &opt_time) : m_elapsed_time(opt_time) {
+ m_start_time = StatsClock::now();
+ }
+ ~ElapsedTime() {
+ StatsDuration elapsed = StatsClock::now() - m_start_time;
+ m_elapsed_time += elapsed;
+ }
+};
+
+/// A class to count success/fail statistics.
+struct StatsSuccessFail {
+ StatsSuccessFail(llvm::StringRef n) : name(n.str()) {}
+
+ void NotifySuccess() { ++successes; }
+ void NotifyFailure() { ++failures; }
+
+ llvm::json::Value ToJSON() const;
+ std::string name;
+ uint32_t successes = 0;
+ uint32_t failures = 0;
+};
+
+/// A class that represents statistics for a since lldb_private::Module.
+struct ModuleStats {
+ llvm::json::Value ToJSON() const;
+ intptr_t identifier;
+ std::string path;
+ std::string uuid;
+ std::string triple;
+ double symtab_parse_time = 0.0;
+ double symtab_index_time = 0.0;
+ double debug_parse_time = 0.0;
+ double debug_index_time = 0.0;
+ uint64_t debug_info_size = 0;
+};
+
+/// A class that represents statistics for a since lldb_private::Target.
+class TargetStats {
+public:
+ llvm::json::Value ToJSON(Target &target);
+
+ void SetLaunchOrAttachTime();
+ void SetFirstPrivateStopTime();
+ void SetFirstPublicStopTime();
+
+ StatsDuration &GetCreateTime() { return m_create_time; }
+ StatsSuccessFail &GetExpressionStats() { return m_expr_eval; }
+ StatsSuccessFail &GetFrameVariableStats() { return m_frame_var; }
+
+protected:
+ StatsDuration m_create_time{0.0};
+ llvm::Optional<StatsTimepoint> m_launch_or_attach_time;
+ llvm::Optional<StatsTimepoint> m_first_private_stop_time;
+ llvm::Optional<StatsTimepoint> m_first_public_stop_time;
+ StatsSuccessFail m_expr_eval{"expressionEvaluation"};
+ StatsSuccessFail m_frame_var{"frameVariable"};
+ std::vector<intptr_t> m_module_identifiers;
+ void CollectStats(Target &target);
+};
+
+class DebuggerStats {
+public:
+ static void SetCollectingStats(bool enable) { g_collecting_stats = enable; }
+ static bool GetCollectingStats() { return g_collecting_stats; }
+
+ /// Get metrics associated with one or all targets in a debugger in JSON
+ /// format.
+ ///
+ /// \param debugger
+ /// The debugger to get the target list from if \a target is NULL.
+ ///
+ /// \param target
+ /// The single target to emit statistics for if non NULL, otherwise dump
+ /// statistics only for the specified target.
+ ///
+ /// \return
+ /// Returns a JSON value that contains all target metrics.
+ static llvm::json::Value ReportStatistics(Debugger &debugger, Target *target);
+
+protected:
+ // Collecting stats can be set to true to collect stats that are expensive
+ // to collect. By default all stats that are cheap to collect are enabled.
+ // This settings is here to maintain compatibility with "statistics enable"
+ // and "statistics disable".
+ static bool g_collecting_stats;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_STATISTICS_H
diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h
index 0e81e5160846..cdb906dcd7ed 100644
--- a/lldb/include/lldb/Target/StopInfo.h
+++ b/lldb/include/lldb/Target/StopInfo.h
@@ -132,6 +132,16 @@ public:
static lldb::StopInfoSP
CreateStopReasonProcessorTrace(Thread &thread, const char *description);
+ static lldb::StopInfoSP CreateStopReasonFork(Thread &thread,
+ lldb::pid_t child_pid,
+ lldb::tid_t child_tid);
+
+ static lldb::StopInfoSP CreateStopReasonVFork(Thread &thread,
+ lldb::pid_t child_pid,
+ lldb::tid_t child_tid);
+
+ static lldb::StopInfoSP CreateStopReasonVForkDone(Thread &thread);
+
static lldb::ValueObjectSP
GetReturnValueObject(lldb::StopInfoSP &stop_info_sp);
diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index ac8d002b09a1..7e8e1373a506 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -28,6 +28,7 @@
#include "lldb/Target/ExecutionContextScope.h"
#include "lldb/Target/PathMappingList.h"
#include "lldb/Target/SectionLoadHistory.h"
+#include "lldb/Target/Statistics.h"
#include "lldb/Target/ThreadSpec.h"
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/Broadcaster.h"
@@ -122,7 +123,16 @@ public:
void SetRunArguments(const Args &args);
+ // Get the whole environment including the platform inherited environment and
+ // the target specific environment, excluding the unset environment variables.
Environment GetEnvironment() const;
+ // Get the platform inherited environment, excluding the unset environment
+ // variables.
+ Environment GetInheritedEnvironment() const;
+ // Get the target specific environment only, without the platform inherited
+ // environment.
+ Environment GetTargetEnvironment() const;
+ // Set the target specific environment.
void SetEnvironment(Environment env);
bool GetSkipPrologue() const;
@@ -197,10 +207,6 @@ public:
void SetUserSpecifiedTrapHandlerNames(const Args &args);
- bool GetNonStopModeEnabled() const;
-
- void SetNonStopModeEnabled(bool b);
-
bool GetDisplayRuntimeSupportValues() const;
void SetDisplayRuntimeSupportValues(bool b);
@@ -557,7 +563,7 @@ public:
// Settings accessors
- static const lldb::TargetPropertiesSP &GetGlobalProperties();
+ static TargetProperties &GetGlobalProperties();
std::recursive_mutex &GetAPIMutex();
@@ -1017,6 +1023,37 @@ public:
size_t ReadCStringFromMemory(const Address &addr, char *dst,
size_t dst_max_len, Status &result_error);
+ /// Read a NULL terminated string from memory
+ ///
+ /// This function will read a cache page at a time until a NULL string
+ /// terminator is found. It will stop reading if an aligned sequence of NULL
+ /// termination \a type_width bytes is not found before reading \a
+ /// cstr_max_len bytes. The results are always guaranteed to be NULL
+ /// terminated, and that no more than (max_bytes - type_width) bytes will be
+ /// read.
+ ///
+ /// \param[in] addr
+ /// The address to start the memory read.
+ ///
+ /// \param[in] dst
+ /// A character buffer containing at least max_bytes.
+ ///
+ /// \param[in] max_bytes
+ /// The maximum number of bytes to read.
+ ///
+ /// \param[in] error
+ /// The error status of the read operation.
+ ///
+ /// \param[in] type_width
+ /// The size of the null terminator (1 to 4 bytes per
+ /// character). Defaults to 1.
+ ///
+ /// \return
+ /// The error status or the number of bytes prior to the null terminator.
+ size_t ReadStringFromMemory(const Address &addr, char *dst, size_t max_bytes,
+ Status &error, size_t type_width,
+ bool force_live_memory = true);
+
size_t ReadScalarIntegerFromMemory(const Address &addr, uint32_t byte_size,
bool is_signed, Scalar &scalar,
Status &error,
@@ -1446,23 +1483,22 @@ protected:
// Utilities for `statistics` command.
private:
- std::vector<uint32_t> m_stats_storage;
- bool m_collecting_stats = false;
+ // Target metrics storage.
+ TargetStats m_stats;
public:
- void SetCollectingStats(bool v) { m_collecting_stats = v; }
-
- bool GetCollectingStats() { return m_collecting_stats; }
-
- void IncrementStats(lldb_private::StatisticKind key) {
- if (!GetCollectingStats())
- return;
- lldbassert(key < lldb_private::StatisticKind::StatisticMax &&
- "invalid statistics!");
- m_stats_storage[key] += 1;
- }
+ /// Get metrics associated with this target in JSON format.
+ ///
+ /// Target metrics help measure timings and information that is contained in
+ /// a target. These are designed to help measure performance of a debug
+ /// session as well as represent the current state of the target, like
+ /// information on the currently modules, currently set breakpoints and more.
+ ///
+ /// \return
+ /// Returns a JSON value that contains all target metrics.
+ llvm::json::Value ReportStatistics();
- std::vector<uint32_t> GetStatistics() { return m_stats_storage; }
+ TargetStats &GetStatistics() { return m_stats; }
private:
/// Construct with optional file and arch.
@@ -1485,6 +1521,10 @@ private:
void FinalizeFileActions(ProcessLaunchInfo &info);
+ /// Return a recommended size for memory reads at \a addr, optimizing for
+ /// cache usage.
+ lldb::addr_t GetReasonableReadSize(const Address &addr);
+
Target(const Target &) = delete;
const Target &operator=(const Target &) = delete;
};
diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h
index 0f6b5741573e..91feed310eb9 100644
--- a/lldb/include/lldb/Target/Thread.h
+++ b/lldb/include/lldb/Target/Thread.h
@@ -57,8 +57,6 @@ public:
uint64_t GetMaxBacktraceDepth() const;
};
-typedef std::shared_ptr<ThreadProperties> ThreadPropertiesSP;
-
class Thread : public std::enable_shared_from_this<Thread>,
public ThreadProperties,
public UserID,
@@ -149,7 +147,7 @@ public:
static void SettingsTerminate();
- static const ThreadPropertiesSP &GetGlobalProperties();
+ static ThreadProperties &GetGlobalProperties();
lldb::ProcessSP GetProcess() const { return m_process_wp.lock(); }
@@ -1017,7 +1015,8 @@ public:
/// Discards the plans queued on the plan stack of the current thread. This
/// is
- /// arbitrated by the "Master" ThreadPlans, using the "OkayToDiscard" call.
+ /// arbitrated by the "Controlling" ThreadPlans, using the "OkayToDiscard"
+ /// call.
// But if \a force is true, all thread plans are discarded.
void DiscardThreadPlans(bool force);
diff --git a/lldb/include/lldb/Target/ThreadPlan.h b/lldb/include/lldb/Target/ThreadPlan.h
index 5e14a1fd6577..616939f89fc8 100644
--- a/lldb/include/lldb/Target/ThreadPlan.h
+++ b/lldb/include/lldb/Target/ThreadPlan.h
@@ -81,7 +81,7 @@ namespace lldb_private {
//
// Cleaning up after your plans:
//
-// When the plan is moved from the plan stack its WillPop method is always
+// When the plan is moved from the plan stack its DidPop method is always
// called, no matter why. Once it is moved off the plan stack it is done, and
// won't get a chance to run again. So you should undo anything that affects
// target state in this method. But be sure to leave the plan able to
@@ -144,39 +144,42 @@ namespace lldb_private {
// implement DoPlanExplainsStop, the result is cached in PlanExplainsStop so
// the DoPlanExplainsStop itself will only get called once per stop.
//
-// Master plans:
+// Controlling plans:
//
// In the normal case, when we decide to stop, we will collapse the plan
// stack up to the point of the plan that understood the stop reason.
// However, if a plan wishes to stay on the stack after an event it didn't
-// directly handle it can designate itself a "Master" plan by responding true
-// to IsMasterPlan, and then if it wants not to be discarded, it can return
-// false to OkayToDiscard, and it and all its dependent plans will be
+// directly handle it can designate itself a "Controlling" plan by responding
+// true to IsControllingPlan, and then if it wants not to be discarded, it can
+// return false to OkayToDiscard, and it and all its dependent plans will be
// preserved when we resume execution.
//
-// The other effect of being a master plan is that when the Master plan is
+// The other effect of being a controlling plan is that when the Controlling
+// plan is
// done , if it has set "OkayToDiscard" to false, then it will be popped &
// execution will stop and return to the user. Remember that if OkayToDiscard
// is false, the plan will be popped and control will be given to the next
// plan above it on the stack So setting OkayToDiscard to false means the
-// user will regain control when the MasterPlan is completed.
+// user will regain control when the ControllingPlan is completed.
//
// Between these two controls this allows things like: a
-// MasterPlan/DontDiscard Step Over to hit a breakpoint, stop and return
+// ControllingPlan/DontDiscard Step Over to hit a breakpoint, stop and return
// control to the user, but then when the user continues, the step out
// succeeds. Even more tricky, when the breakpoint is hit, the user can
// continue to step in/step over/etc, and finally when they continue, they
// will finish up the Step Over.
//
-// FIXME: MasterPlan & OkayToDiscard aren't really orthogonal. MasterPlan
+// FIXME: ControllingPlan & OkayToDiscard aren't really orthogonal.
+// ControllingPlan
// designation means that this plan controls it's fate and the fate of plans
-// below it. OkayToDiscard tells whether the MasterPlan wants to stay on the
-// stack. I originally thought "MasterPlan-ness" would need to be a fixed
+// below it. OkayToDiscard tells whether the ControllingPlan wants to stay on
+// the stack. I originally thought "ControllingPlan-ness" would need to be a
+// fixed
// characteristic of a ThreadPlan, in which case you needed the extra control.
// But that doesn't seem to be true. So we should be able to convert to only
-// MasterPlan status to mean the current "MasterPlan/DontDiscard". Then no
-// plans would be MasterPlans by default, and you would set the ones you
-// wanted to be "user level" in this way.
+// ControllingPlan status to mean the current "ControllingPlan/DontDiscard".
+// Then no plans would be ControllingPlans by default, and you would set the
+// ones you wanted to be "user level" in this way.
//
//
// Actually Stopping:
@@ -224,9 +227,11 @@ namespace lldb_private {
//
// Cleaning up the plan stack:
//
-// One of the complications of MasterPlans is that you may get past the limits
+// One of the complications of ControllingPlans is that you may get past the
+// limits
// of a plan without triggering it to clean itself up. For instance, if you
-// are doing a MasterPlan StepOver, and hit a breakpoint in a called function,
+// are doing a ControllingPlan StepOver, and hit a breakpoint in a called
+// function,
// then step over enough times to step out of the initial StepOver range, each
// of the step overs will explain the stop & take themselves off the stack,
// but control would never be returned to the original StepOver. Eventually,
@@ -386,11 +391,11 @@ public:
virtual bool WillStop() = 0;
- bool IsMasterPlan() { return m_is_master_plan; }
+ bool IsControllingPlan() { return m_is_controlling_plan; }
- bool SetIsMasterPlan(bool value) {
- bool old_value = m_is_master_plan;
- m_is_master_plan = value;
+ bool SetIsControllingPlan(bool value) {
+ bool old_value = m_is_controlling_plan;
+ m_is_controlling_plan = value;
return old_value;
}
@@ -413,7 +418,7 @@ public:
virtual void DidPush();
- virtual void WillPop();
+ virtual void DidPop();
ThreadPlanKind GetKind() const { return m_kind; }
@@ -490,12 +495,12 @@ protected:
virtual bool DoPlanExplainsStop(Event *event_ptr) = 0;
// This pushes a plan onto the plan stack of the current plan's thread.
- // Also sets the plans to private and not master plans. A plan pushed by
+ // Also sets the plans to private and not controlling plans. A plan pushed by
// another thread plan is never either of the above.
void PushPlan(lldb::ThreadPlanSP &thread_plan_sp) {
GetThread().PushPlan(thread_plan_sp);
thread_plan_sp->SetPrivate(true);
- thread_plan_sp->SetIsMasterPlan(false);
+ thread_plan_sp->SetIsControllingPlan(false);
}
// This gets the previous plan to the current plan (for forwarding requests).
@@ -546,7 +551,7 @@ private:
bool m_plan_complete;
bool m_plan_private;
bool m_okay_to_discard;
- bool m_is_master_plan;
+ bool m_is_controlling_plan;
bool m_plan_succeeded;
lldb::ThreadPlanTracerSP m_tracer_sp;
diff --git a/lldb/include/lldb/Target/ThreadPlanCallFunction.h b/lldb/include/lldb/Target/ThreadPlanCallFunction.h
index 24c5736f44c3..cb6e7caebb4a 100644
--- a/lldb/include/lldb/Target/ThreadPlanCallFunction.h
+++ b/lldb/include/lldb/Target/ThreadPlanCallFunction.h
@@ -68,10 +68,10 @@ public:
// been cleaned up.
lldb::addr_t GetFunctionStackPointer() { return m_function_sp; }
- // Classes that derive from FunctionCaller, and implement their own WillPop
+ // Classes that derive from FunctionCaller, and implement their own DidPop
// methods should call this so that the thread state gets restored if the
// plan gets discarded.
- void WillPop() override;
+ void DidPop() override;
// If the thread plan stops mid-course, this will be the stop reason that
// interrupted us. Once DoTakedown is called, this will be the real stop
diff --git a/lldb/include/lldb/Target/ThreadPlanCallUserExpression.h b/lldb/include/lldb/Target/ThreadPlanCallUserExpression.h
index adaea6c7056f..11e126a2da9c 100644
--- a/lldb/include/lldb/Target/ThreadPlanCallUserExpression.h
+++ b/lldb/include/lldb/Target/ThreadPlanCallUserExpression.h
@@ -32,7 +32,7 @@ public:
void DidPush() override;
- void WillPop() override;
+ void DidPop() override;
lldb::StopInfoSP GetRealStopInfo() override;
diff --git a/lldb/include/lldb/Target/ThreadPlanStack.h b/lldb/include/lldb/Target/ThreadPlanStack.h
index e0f76f8e1df5..90f1ea3a284b 100644
--- a/lldb/include/lldb/Target/ThreadPlanStack.h
+++ b/lldb/include/lldb/Target/ThreadPlanStack.h
@@ -60,7 +60,7 @@ public:
void DiscardAllPlans();
- void DiscardConsultingMasterPlans();
+ void DiscardConsultingControllingPlans();
lldb::ThreadPlanSP GetCurrentPlan() const;
diff --git a/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h b/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h
index 86f7798487c3..1f3aff45c49a 100644
--- a/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h
+++ b/lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h
@@ -26,7 +26,7 @@ public:
bool StopOthers() override;
lldb::StateType GetPlanRunState() override;
bool WillStop() override;
- void WillPop() override;
+ void DidPop() override;
bool MischiefManaged() override;
void ThreadDestroyed() override;
void SetAutoContinue(bool do_it);
diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h
index f5654988b201..643b761cdb89 100644
--- a/lldb/include/lldb/Target/Trace.h
+++ b/lldb/include/lldb/Target/Trace.h
@@ -20,6 +20,7 @@
#include "lldb/Utility/TraceGDBRemotePackets.h"
#include "lldb/Utility/UnimplementedError.h"
#include "lldb/lldb-private.h"
+#include "lldb/lldb-types.h"
namespace lldb_private {
@@ -55,6 +56,22 @@ public:
/// A stream object to dump the information to.
virtual void Dump(Stream *s) const = 0;
+ /// Save the trace of a live process to the specified directory, which
+ /// will be created if needed.
+ /// This will also create a a file \a <directory>/trace.json with the main
+ /// properties of the trace session, along with others files which contain
+ /// the actual trace data. The trace.json file can be used later as input
+ /// for the "trace load" command to load the trace in LLDB.
+ /// The process being trace is not a live process, return an error.
+ ///
+ /// \param[in] directory
+ /// The directory where the trace files will be saved.
+ ///
+ /// \return
+ /// \a llvm::success if the operation was successful, or an \a llvm::Error
+ /// otherwise.
+ virtual llvm::Error SaveLiveTraceToDisk(FileSpec directory) = 0;
+
/// Find a trace plug-in using JSON data.
///
/// When loading trace data from disk, the information for the trace data
@@ -156,12 +173,12 @@ public:
/// Check if a thread is currently traced by this object.
///
- /// \param[in] thread
- /// The thread in question.
+ /// \param[in] tid
+ /// The id of the thread in question.
///
/// \return
/// \b true if the thread is traced by this instance, \b false otherwise.
- virtual bool IsTraced(const Thread &thread) = 0;
+ virtual bool IsTraced(lldb::tid_t tid) = 0;
/// \return
/// A description of the parameters to use for the \a Trace::Start method.
diff --git a/lldb/include/lldb/Target/TraceExporter.h b/lldb/include/lldb/Target/TraceExporter.h
index 6560b39fd42e..d5a06c499a6e 100644
--- a/lldb/include/lldb/Target/TraceExporter.h
+++ b/lldb/include/lldb/Target/TraceExporter.h
@@ -10,6 +10,8 @@
#define LLDB_TARGET_TRACE_EXPORTER_H
#include "lldb/Core/PluginInterface.h"
+#include "lldb/lldb-forward.h"
+#include "llvm/Support/Error.h"
namespace lldb_private {
diff --git a/lldb/include/lldb/Target/UnixSignals.h b/lldb/include/lldb/Target/UnixSignals.h
index 6fecdda12def..1c91c9fdd489 100644
--- a/lldb/include/lldb/Target/UnixSignals.h
+++ b/lldb/include/lldb/Target/UnixSignals.h
@@ -16,6 +16,7 @@
#include "lldb/Utility/ConstString.h"
#include "lldb/lldb-private.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/Support/JSON.h"
namespace lldb_private {
@@ -80,6 +81,18 @@ public:
void RemoveSignal(int signo);
+ /// Track how many times signals are hit as stop reasons.
+ void IncrementSignalHitCount(int signo);
+
+ /// Get the hit count statistics for signals.
+ ///
+ /// Gettings statistics on the hit counts of signals can help explain why some
+ /// debug sessions are slow since each stop takes a few hundred ms and some
+ /// software use signals a lot and can cause slow debugging performance if
+ /// they are used too often. Even if a signal is not stopped at, it will auto
+ /// continue the process and a delay will happen.
+ llvm::json::Value GetHitCountStatistics() const;
+
// Returns a current version of the data stored in this class. Version gets
// incremented each time Set... method is called.
uint64_t GetVersion() const;
@@ -99,6 +112,7 @@ protected:
ConstString m_name;
ConstString m_alias;
std::string m_description;
+ uint32_t m_hit_count = 0;
bool m_suppress : 1, m_stop : 1, m_notify : 1;
Signal(const char *name, bool default_suppress, bool default_stop,
diff --git a/lldb/include/lldb/Utility/ConstString.h b/lldb/include/lldb/Utility/ConstString.h
index 52d3556418f6..2756f1fd7203 100644
--- a/lldb/include/lldb/Utility/ConstString.h
+++ b/lldb/include/lldb/Utility/ConstString.h
@@ -409,7 +409,7 @@ public:
static size_t StaticMemorySize();
protected:
- template <typename T> friend struct ::llvm::DenseMapInfo;
+ template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo;
/// Only used by DenseMapInfo.
static ConstString FromStringPoolPointer(const char *ptr) {
ConstString s;
diff --git a/lldb/include/lldb/Utility/DataExtractor.h b/lldb/include/lldb/Utility/DataExtractor.h
index 0923e5280cba..dbf0bce8c8d0 100644
--- a/lldb/include/lldb/Utility/DataExtractor.h
+++ b/lldb/include/lldb/Utility/DataExtractor.h
@@ -134,7 +134,12 @@ public:
DataExtractor(const DataExtractor &data, lldb::offset_t offset,
lldb::offset_t length, uint32_t target_byte_size = 1);
- DataExtractor(const DataExtractor &rhs);
+ /// Copy constructor.
+ ///
+ /// The copy constructor is explicit as otherwise it is easy to make
+ /// unintended modification of a local copy instead of a caller's instance.
+ /// Also a needless copy of the \a m_data_sp shared pointer is/ expensive.
+ explicit DataExtractor(const DataExtractor &rhs);
/// Assignment operator.
///
@@ -149,6 +154,12 @@ public:
/// A const reference to this object.
const DataExtractor &operator=(const DataExtractor &rhs);
+ /// Move constructor and move assignment operators to complete the rule of 5.
+ ///
+ /// They would get deleted as we already defined those of rule of 3.
+ DataExtractor(DataExtractor &&rhs) = default;
+ DataExtractor &operator=(DataExtractor &&rhs) = default;
+
/// Destructor
///
/// If this object contains a valid shared data reference, the reference
@@ -1005,7 +1016,8 @@ protected:
uint32_t m_addr_size; ///< The address size to use when extracting addresses.
/// The shared pointer to data that can be shared among multiple instances
lldb::DataBufferSP m_data_sp;
- const uint32_t m_target_byte_size = 1;
+ /// Making it const would require implementation of move assignment operator.
+ uint32_t m_target_byte_size = 1;
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Utility/Environment.h b/lldb/include/lldb/Utility/Environment.h
index e2af2eb2463d..24cbee246f83 100644
--- a/lldb/include/lldb/Utility/Environment.h
+++ b/lldb/include/lldb/Utility/Environment.h
@@ -73,7 +73,7 @@ public:
return insert(std::make_pair(Split.first, std::string(Split.second)));
}
- void insert(const_iterator first, const_iterator last);
+ void insert(iterator first, iterator last);
Envp getEnvp() const { return Envp(*this); }
diff --git a/lldb/include/lldb/Utility/FileSpec.h b/lldb/include/lldb/Utility/FileSpec.h
index 0f4e6505e433..305cd04f95c0 100644
--- a/lldb/include/lldb/Utility/FileSpec.h
+++ b/lldb/include/lldb/Utility/FileSpec.h
@@ -202,7 +202,7 @@ public:
/// \return
/// \b true if the file path is case sensitive (POSIX), false
/// if case insensitive (Windows).
- bool IsCaseSensitive() const { return m_style != Style::windows; }
+ bool IsCaseSensitive() const { return is_style_posix(m_style); }
/// Dump this object to a Stream.
///
diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
index 2b2d273a17a8..6c5d27879d36 100644
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
@@ -868,17 +868,14 @@ public:
/// Mark the current thread as a private thread and pretend that everything
/// on this thread is behind happening behind the API boundary.
- static void PrivateThread() { g_global_boundary = true; }
+ static void PrivateThread();
private:
static unsigned GetNextSequenceNumber() { return g_sequence++; }
unsigned GetSequenceNumber() const;
template <typename T> friend struct replay;
- void UpdateBoundary() {
- if (m_local_boundary)
- g_global_boundary = false;
- }
+ void UpdateBoundary();
#ifdef LLDB_REPRO_INSTR_TRACE
void Log(unsigned id) {
@@ -902,9 +899,6 @@ private:
/// The sequence number for this pair of function and result.
unsigned m_sequence;
- /// Whether we're currently across the API boundary.
- static thread_local bool g_global_boundary;
-
/// Global mutex to protect concurrent access.
static std::mutex g_mutex;
diff --git a/lldb/include/lldb/Utility/Status.h b/lldb/include/lldb/Utility/Status.h
index 61d663bdccba..bee2b57b6ea9 100644
--- a/lldb/include/lldb/Utility/Status.h
+++ b/lldb/include/lldb/Utility/Status.h
@@ -184,16 +184,6 @@ public:
/// success (non-erro), \b false otherwise.
bool Success() const;
- /// Test for a failure due to a generic interrupt.
- ///
- /// Returns true if the error code in this object was caused by an
- /// interrupt. At present only supports Posix EINTR.
- ///
- /// \return
- /// \b true if this object contains an value that describes
- /// failure due to interrupt, \b false otherwise.
- bool WasInterrupted() const;
-
protected:
/// Member variables
ValueType m_code = 0; ///< Status code as an integer value.
diff --git a/lldb/include/lldb/Utility/StringExtractorGDBRemote.h b/lldb/include/lldb/Utility/StringExtractorGDBRemote.h
index c67c05bdf182..1712c113d396 100644
--- a/lldb/include/lldb/Utility/StringExtractorGDBRemote.h
+++ b/lldb/include/lldb/Utility/StringExtractorGDBRemote.h
@@ -88,6 +88,7 @@ public:
eServerPacketType_vFile_mode,
eServerPacketType_vFile_exists,
eServerPacketType_vFile_md5,
+ eServerPacketType_vFile_fstat,
eServerPacketType_vFile_stat,
eServerPacketType_vFile_symlink,
eServerPacketType_vFile_unlink,
@@ -135,6 +136,7 @@ public:
eServerPacketType_vAttachName,
eServerPacketType_vCont,
eServerPacketType_vCont_actions, // vCont?
+ eServerPacketType_vRun,
eServerPacketType_stop_reason, // '?'
@@ -170,6 +172,8 @@ public:
eServerPacketType_qMemTags, // read memory tags
eServerPacketType_QMemTags, // write memory tags
+
+ eServerPacketType_qLLDBSaveCore,
};
ServerPacketType GetServerPacketType() const;
diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h
index 4d03af18e527..c1d136db1c2e 100644
--- a/lldb/include/lldb/Utility/StructuredData.h
+++ b/lldb/include/lldb/Utility/StructuredData.h
@@ -353,6 +353,17 @@ public:
public:
Dictionary() : Object(lldb::eStructuredDataTypeDictionary), m_dict() {}
+ Dictionary(ObjectSP obj_sp)
+ : Object(lldb::eStructuredDataTypeDictionary), m_dict() {
+ if (!obj_sp || obj_sp->GetType() != lldb::eStructuredDataTypeDictionary) {
+ SetType(lldb::eStructuredDataTypeInvalid);
+ return;
+ }
+
+ Dictionary *dict = obj_sp->GetAsDictionary();
+ m_dict = dict->m_dict;
+ }
+
~Dictionary() override = default;
size_t GetSize() const { return m_dict.size(); }
diff --git a/lldb/include/lldb/Utility/Timer.h b/lldb/include/lldb/Utility/Timer.h
index c70c18049426..201378bbeb2c 100644
--- a/lldb/include/lldb/Utility/Timer.h
+++ b/lldb/include/lldb/Utility/Timer.h
@@ -9,16 +9,11 @@
#ifndef LLDB_UTILITY_TIMER_H
#define LLDB_UTILITY_TIMER_H
-#include "llvm/ADT/ScopeExit.h"
+#include "lldb/lldb-defines.h"
#include "llvm/Support/Chrono.h"
-#include "llvm/Support/Signposts.h"
#include <atomic>
#include <cstdint>
-namespace llvm {
- class SignpostEmitter;
-}
-
namespace lldb_private {
class Stream;
@@ -81,28 +76,15 @@ private:
const Timer &operator=(const Timer &) = delete;
};
-llvm::SignpostEmitter &GetSignposts();
-
} // namespace lldb_private
// Use a format string because LLVM_PRETTY_FUNCTION might not be a string
// literal.
#define LLDB_SCOPED_TIMER() \
static ::lldb_private::Timer::Category _cat(LLVM_PRETTY_FUNCTION); \
- ::lldb_private::Timer _scoped_timer(_cat, "%s", LLVM_PRETTY_FUNCTION); \
- SIGNPOST_EMITTER_START_INTERVAL(::lldb_private::GetSignposts(), \
- &_scoped_timer, "%s", LLVM_PRETTY_FUNCTION); \
- auto _scoped_signpost = llvm::make_scope_exit([&_scoped_timer]() { \
- ::lldb_private::GetSignposts().endInterval(&_scoped_timer); \
- })
-
-#define LLDB_SCOPED_TIMERF(FMT, ...) \
+ ::lldb_private::Timer _scoped_timer(_cat, "%s", LLVM_PRETTY_FUNCTION)
+#define LLDB_SCOPED_TIMERF(...) \
static ::lldb_private::Timer::Category _cat(LLVM_PRETTY_FUNCTION); \
- ::lldb_private::Timer _scoped_timer(_cat, FMT, __VA_ARGS__); \
- SIGNPOST_EMITTER_START_INTERVAL(::lldb_private::GetSignposts(), \
- &_scoped_timer, FMT, __VA_ARGS__); \
- auto _scoped_signpost = llvm::make_scope_exit([&_scoped_timer]() { \
- ::lldb_private::GetSignposts().endInterval(&_scoped_timer); \
- })
+ ::lldb_private::Timer _scoped_timer(_cat, __VA_ARGS__)
#endif // LLDB_UTILITY_TIMER_H
diff --git a/lldb/include/lldb/Utility/UriParser.h b/lldb/include/lldb/Utility/UriParser.h
index 6a64c3d747b5..3c0f8d2273d0 100644
--- a/lldb/include/lldb/Utility/UriParser.h
+++ b/lldb/include/lldb/Utility/UriParser.h
@@ -9,23 +9,27 @@
#ifndef LLDB_UTILITY_URIPARSER_H
#define LLDB_UTILITY_URIPARSER_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
namespace lldb_private {
-class UriParser {
-public:
- // Parses
- // RETURN VALUE
- // if url is valid, function returns true and
- // scheme/hostname/port/path are set to the parsed values
- // port it set to -1 if it is not included in the URL
- //
- // if the url is invalid, function returns false and
- // output parameters remain unchanged
- static bool Parse(llvm::StringRef uri, llvm::StringRef &scheme,
- llvm::StringRef &hostname, int &port,
- llvm::StringRef &path);
+
+struct URI {
+ llvm::StringRef scheme;
+ llvm::StringRef hostname;
+ llvm::Optional<uint16_t> port;
+ llvm::StringRef path;
+
+ bool operator==(const URI &R) const {
+ return port == R.port && scheme == R.scheme && hostname == R.hostname &&
+ path == R.path;
+ }
+
+ static llvm::Optional<URI> Parse(llvm::StringRef uri);
};
-}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const URI &U);
+
+} // namespace lldb_private
#endif // LLDB_UTILITY_URIPARSER_H
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index 81f6be3eec7d..f5cabc02bd84 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -1137,6 +1137,7 @@ enum SaveCoreStyle {
eSaveCoreUnspecified = 0,
eSaveCoreFull = 1,
eSaveCoreDirtyOnly = 2,
+ eSaveCoreStackOnly = 3,
};
} // namespace lldb
diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h
index ad5298151e4a..482da17bfacb 100644
--- a/lldb/include/lldb/lldb-forward.h
+++ b/lldb/include/lldb/lldb-forward.h
@@ -175,6 +175,7 @@ class Scalar;
class ScriptInterpreter;
class ScriptInterpreterLocker;
class ScriptedProcessInterface;
+class ScriptedThreadInterface;
class ScriptedSyntheticChildren;
class SearchFilter;
class Section;
@@ -290,7 +291,6 @@ typedef std::shared_ptr<lldb_private::Block> BlockSP;
typedef std::shared_ptr<lldb_private::Breakpoint> BreakpointSP;
typedef std::weak_ptr<lldb_private::Breakpoint> BreakpointWP;
typedef std::shared_ptr<lldb_private::BreakpointSite> BreakpointSiteSP;
-typedef std::weak_ptr<lldb_private::BreakpointSite> BreakpointSiteWP;
typedef std::shared_ptr<lldb_private::BreakpointLocation> BreakpointLocationSP;
typedef std::weak_ptr<lldb_private::BreakpointLocation> BreakpointLocationWP;
typedef std::shared_ptr<lldb_private::BreakpointPrecondition>
@@ -301,7 +301,6 @@ typedef std::shared_ptr<lldb_private::BroadcasterManager> BroadcasterManagerSP;
typedef std::weak_ptr<lldb_private::BroadcasterManager> BroadcasterManagerWP;
typedef std::shared_ptr<lldb_private::UserExpression> UserExpressionSP;
typedef std::shared_ptr<lldb_private::CommandObject> CommandObjectSP;
-typedef std::shared_ptr<lldb_private::Communication> CommunicationSP;
typedef std::shared_ptr<lldb_private::Connection> ConnectionSP;
typedef std::shared_ptr<lldb_private::CompileUnit> CompUnitSP;
typedef std::shared_ptr<lldb_private::DataBuffer> DataBufferSP;
@@ -311,7 +310,6 @@ typedef std::weak_ptr<lldb_private::Debugger> DebuggerWP;
typedef std::shared_ptr<lldb_private::Disassembler> DisassemblerSP;
typedef std::unique_ptr<lldb_private::DynamicCheckerFunctions>
DynamicCheckerFunctionsUP;
-typedef std::shared_ptr<lldb_private::DynamicLoader> DynamicLoaderSP;
typedef std::unique_ptr<lldb_private::DynamicLoader> DynamicLoaderUP;
typedef std::shared_ptr<lldb_private::Event> EventSP;
typedef std::shared_ptr<lldb_private::EventData> EventDataSP;
@@ -323,7 +321,6 @@ typedef std::shared_ptr<lldb_private::ExpressionVariable> ExpressionVariableSP;
typedef std::unique_ptr<lldb_private::File> FileUP;
typedef std::shared_ptr<lldb_private::File> FileSP;
typedef std::shared_ptr<lldb_private::Function> FunctionSP;
-typedef std::shared_ptr<lldb_private::FunctionCaller> FunctionCallerSP;
typedef std::shared_ptr<lldb_private::FuncUnwinders> FuncUnwindersSP;
typedef std::shared_ptr<lldb_private::InlineFunctionInfo> InlineFunctionInfoSP;
typedef std::shared_ptr<lldb_private::Instruction> InstructionSP;
@@ -335,9 +332,7 @@ typedef std::shared_ptr<lldb_private::IRExecutionUnit> IRExecutionUnitSP;
typedef std::shared_ptr<lldb_private::JITLoader> JITLoaderSP;
typedef std::unique_ptr<lldb_private::JITLoaderList> JITLoaderListUP;
typedef std::shared_ptr<lldb_private::LanguageRuntime> LanguageRuntimeSP;
-typedef std::shared_ptr<lldb_private::SystemRuntime> SystemRuntimeSP;
typedef std::unique_ptr<lldb_private::SystemRuntime> SystemRuntimeUP;
-typedef std::shared_ptr<lldb_private::LineTable> LineTableSP;
typedef std::shared_ptr<lldb_private::Listener> ListenerSP;
typedef std::weak_ptr<lldb_private::Listener> ListenerWP;
typedef std::shared_ptr<lldb_private::MemoryHistory> MemoryHistorySP;
@@ -346,7 +341,6 @@ typedef std::shared_ptr<lldb_private::MemoryRegionInfo> MemoryRegionInfoSP;
typedef std::shared_ptr<lldb_private::Module> ModuleSP;
typedef std::weak_ptr<lldb_private::Module> ModuleWP;
typedef std::shared_ptr<lldb_private::ObjectFile> ObjectFileSP;
-typedef std::weak_ptr<lldb_private::ObjectFile> ObjectFileWP;
typedef std::shared_ptr<lldb_private::ObjectFileJITDelegate>
ObjectFileJITDelegateSP;
typedef std::weak_ptr<lldb_private::ObjectFileJITDelegate>
@@ -354,32 +348,12 @@ typedef std::weak_ptr<lldb_private::ObjectFileJITDelegate>
typedef std::unique_ptr<lldb_private::OperatingSystem> OperatingSystemUP;
typedef std::shared_ptr<lldb_private::OptionValue> OptionValueSP;
typedef std::weak_ptr<lldb_private::OptionValue> OptionValueWP;
-typedef std::shared_ptr<lldb_private::OptionValueArch> OptionValueArchSP;
-typedef std::shared_ptr<lldb_private::OptionValueArgs> OptionValueArgsSP;
-typedef std::shared_ptr<lldb_private::OptionValueArray> OptionValueArraySP;
-typedef std::shared_ptr<lldb_private::OptionValueBoolean> OptionValueBooleanSP;
-typedef std::shared_ptr<lldb_private::OptionValueDictionary>
- OptionValueDictionarySP;
-typedef std::shared_ptr<lldb_private::OptionValueFileSpec>
- OptionValueFileSpecSP;
-typedef std::shared_ptr<lldb_private::OptionValueFileSpecList>
- OptionValueFileSpecListSP;
-typedef std::shared_ptr<lldb_private::OptionValueFormat> OptionValueFormatSP;
-typedef std::shared_ptr<lldb_private::OptionValuePathMappings>
- OptionValuePathMappingsSP;
typedef std::shared_ptr<lldb_private::OptionValueProperties>
OptionValuePropertiesSP;
-typedef std::shared_ptr<lldb_private::OptionValueRegex> OptionValueRegexSP;
-typedef std::shared_ptr<lldb_private::OptionValueSInt64> OptionValueSInt64SP;
-typedef std::shared_ptr<lldb_private::OptionValueString> OptionValueStringSP;
-typedef std::shared_ptr<lldb_private::OptionValueUInt64> OptionValueUInt64SP;
-typedef std::shared_ptr<lldb_private::OptionValueUUID> OptionValueUUIDSP;
typedef std::shared_ptr<lldb_private::Platform> PlatformSP;
typedef std::shared_ptr<lldb_private::Process> ProcessSP;
typedef std::shared_ptr<lldb_private::ProcessAttachInfo> ProcessAttachInfoSP;
-typedef std::shared_ptr<lldb_private::ProcessLaunchInfo> ProcessLaunchInfoSP;
typedef std::weak_ptr<lldb_private::Process> ProcessWP;
-typedef std::shared_ptr<lldb_private::Property> PropertySP;
typedef std::shared_ptr<lldb_private::RegisterCheckpoint> RegisterCheckpointSP;
typedef std::shared_ptr<lldb_private::RegisterContext> RegisterContextSP;
typedef std::shared_ptr<lldb_private::RegularExpression> RegularExpressionSP;
@@ -392,18 +366,17 @@ typedef std::shared_ptr<lldb_private::RecognizedStackFrame>
typedef std::shared_ptr<lldb_private::ScriptSummaryFormat>
ScriptSummaryFormatSP;
typedef std::shared_ptr<lldb_private::ScriptInterpreter> ScriptInterpreterSP;
-typedef std::unique_ptr<lldb_private::ScriptInterpreter> ScriptInterpreterUP;
typedef std::unique_ptr<lldb_private::ScriptedProcessInterface>
ScriptedProcessInterfaceUP;
+typedef std::shared_ptr<lldb_private::ScriptedThreadInterface>
+ ScriptedThreadInterfaceSP;
typedef std::shared_ptr<lldb_private::Section> SectionSP;
typedef std::unique_ptr<lldb_private::SectionList> SectionListUP;
typedef std::weak_ptr<lldb_private::Section> SectionWP;
typedef std::shared_ptr<lldb_private::SectionLoadList> SectionLoadListSP;
typedef std::shared_ptr<lldb_private::SearchFilter> SearchFilterSP;
-typedef std::shared_ptr<lldb_private::Settings> SettingsSP;
typedef std::unique_ptr<lldb_private::SourceManager> SourceManagerUP;
typedef std::shared_ptr<lldb_private::StackFrame> StackFrameSP;
-typedef std::unique_ptr<lldb_private::StackFrame> StackFrameUP;
typedef std::weak_ptr<lldb_private::StackFrame> StackFrameWP;
typedef std::shared_ptr<lldb_private::StackFrameList> StackFrameListSP;
typedef std::shared_ptr<lldb_private::StackFrameRecognizer>
@@ -412,7 +385,6 @@ typedef std::unique_ptr<lldb_private::StackFrameRecognizerManager>
StackFrameRecognizerManagerUP;
typedef std::shared_ptr<lldb_private::StopInfo> StopInfoSP;
typedef std::shared_ptr<lldb_private::Stream> StreamSP;
-typedef std::weak_ptr<lldb_private::Stream> StreamWP;
typedef std::shared_ptr<lldb_private::StreamFile> StreamFileSP;
typedef std::shared_ptr<lldb_private::StringSummaryFormat>
StringTypeSummaryImplSP;
@@ -421,9 +393,7 @@ typedef std::shared_ptr<lldb_private::StructuredDataPlugin>
StructuredDataPluginSP;
typedef std::weak_ptr<lldb_private::StructuredDataPlugin>
StructuredDataPluginWP;
-typedef std::shared_ptr<lldb_private::SymbolFile> SymbolFileSP;
typedef std::shared_ptr<lldb_private::SymbolFileType> SymbolFileTypeSP;
-typedef std::weak_ptr<lldb_private::SymbolFileType> SymbolFileTypeWP;
typedef std::shared_ptr<lldb_private::SymbolContextSpecifier>
SymbolContextSpecifierSP;
typedef std::unique_ptr<lldb_private::SymbolVendor> SymbolVendorUP;
@@ -432,7 +402,6 @@ typedef std::shared_ptr<lldb_private::SyntheticChildrenFrontEnd>
SyntheticChildrenFrontEndSP;
typedef std::shared_ptr<lldb_private::Target> TargetSP;
typedef std::weak_ptr<lldb_private::Target> TargetWP;
-typedef std::shared_ptr<lldb_private::TargetProperties> TargetPropertiesSP;
typedef std::shared_ptr<lldb_private::Thread> ThreadSP;
typedef std::weak_ptr<lldb_private::Thread> ThreadWP;
typedef std::shared_ptr<lldb_private::ThreadCollection> ThreadCollectionSP;
@@ -464,10 +433,8 @@ typedef std::shared_ptr<lldb_private::UnixSignals> UnixSignalsSP;
typedef std::weak_ptr<lldb_private::UnixSignals> UnixSignalsWP;
typedef std::shared_ptr<lldb_private::UnwindAssembly> UnwindAssemblySP;
typedef std::shared_ptr<lldb_private::UnwindPlan> UnwindPlanSP;
-typedef std::shared_ptr<lldb_private::UtilityFunction> UtilityFunctionSP;
typedef std::shared_ptr<lldb_private::ValueObject> ValueObjectSP;
typedef std::shared_ptr<lldb_private::Value> ValueSP;
-typedef std::shared_ptr<lldb_private::ValueList> ValueListSP;
typedef std::shared_ptr<lldb_private::Variable> VariableSP;
typedef std::shared_ptr<lldb_private::VariableList> VariableListSP;
typedef std::shared_ptr<lldb_private::ValueObjectList> ValueObjectListSP;
diff --git a/lldb/include/lldb/lldb-private-enumerations.h b/lldb/include/lldb/lldb-private-enumerations.h
index 7009d1b4fba7..9bbb889359b1 100644
--- a/lldb/include/lldb/lldb-private-enumerations.h
+++ b/lldb/include/lldb/lldb-private-enumerations.h
@@ -172,6 +172,12 @@ enum MemoryModuleLoadLevel {
eMemoryModuleLoadLevelComplete, // Load sections and all symbols
};
+// Behavior on fork/vfork
+enum FollowForkMode {
+ eFollowParent, // Follow parent process
+ eFollowChild, // Follow child process
+};
+
// Result enums for when reading multiple lines from IOHandlers
enum class LineStatus {
Success, // The line that was just edited if good and should be added to the
diff --git a/lldb/include/lldb/lldb-private-types.h b/lldb/include/lldb/lldb-private-types.h
index 73d618d7069c..3be7003cd0fb 100644
--- a/lldb/include/lldb/lldb-private-types.h
+++ b/lldb/include/lldb/lldb-private-types.h
@@ -51,19 +51,15 @@ struct RegisterInfo {
/// List of registers (terminated with LLDB_INVALID_REGNUM). If this value is
/// not null, all registers in this list will be read first, at which point
/// the value for this register will be valid. For example, the value list
- /// for ah would be eax (x86) or rax (x64).
- uint32_t *value_regs; //
+ /// for ah would be eax (x86) or rax (x64). Register numbers are
+ /// of eRegisterKindLLDB. If multiple registers are listed, the final
+ /// value will be the concatenation of them.
+ uint32_t *value_regs;
/// List of registers (terminated with LLDB_INVALID_REGNUM). If this value is
/// not null, all registers in this list will be invalidated when the value of
/// this register changes. For example, the invalidate list for eax would be
/// rax ax, ah, and al.
uint32_t *invalidate_regs;
- /// A DWARF expression that when evaluated gives the byte size of this
- /// register.
- const uint8_t *dynamic_size_dwarf_expr_bytes;
- /// The length of the DWARF expression in bytes in the
- /// dynamic_size_dwarf_expr_bytes member.
- size_t dynamic_size_dwarf_len;
llvm::ArrayRef<uint8_t> data(const uint8_t *context_base) const {
return llvm::ArrayRef<uint8_t>(context_base + byte_offset, byte_size);
diff --git a/lldb/include/lldb/module.modulemap b/lldb/include/lldb/module.modulemap
index 7feea8ee99c3..c0d467a6505e 100644
--- a/lldb/include/lldb/module.modulemap
+++ b/lldb/include/lldb/module.modulemap
@@ -48,7 +48,6 @@ module lldb_Host {
module SafeMachO { header "Host/SafeMachO.h" export * }
module SocketAddress { header "Host/SocketAddress.h" export * }
module Socket { header "Host/Socket.h" export * }
- module StringConvert { textual header "Host/StringConvert.h" export * }
module Terminal { header "Host/Terminal.h" export * }
module ThreadLauncher { header "Host/ThreadLauncher.h" export * }
module Time { header "Host/Time.h" export * }
@@ -119,6 +118,7 @@ module lldb_Wrapper {
requires cplusplus
umbrella "Target"
+ textual header "Target/AppleArm64ExceptionClass.def"
module * { export * }
}
}
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index b4a69c3e972a..3830f6ed80ba 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -574,12 +574,11 @@ lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddMultiwordCommand,
(const char *, const char *), name, help);
- CommandObjectMultiword *new_command =
- new CommandObjectMultiword(*m_opaque_ptr, name, help);
- new_command->SetRemovable(true);
- lldb::CommandObjectSP new_command_sp(new_command);
- if (new_command_sp &&
- m_opaque_ptr->AddUserCommand(name, new_command_sp, true))
+ lldb::CommandObjectSP new_command_sp(
+ new CommandObjectMultiword(*m_opaque_ptr, name, help));
+ new_command_sp->GetAsMultiwordCommand()->SetRemovable(true);
+ Status add_error = m_opaque_ptr->AddUserCommand(name, new_command_sp, true);
+ if (add_error.Success())
return LLDB_RECORD_RESULT(lldb::SBCommand(new_command_sp));
return LLDB_RECORD_RESULT(lldb::SBCommand());
}
@@ -620,8 +619,8 @@ lldb::SBCommand SBCommandInterpreter::AddCommand(
*m_opaque_ptr, name, impl, help, syntax, /*flags=*/0,
auto_repeat_command);
- if (new_command_sp &&
- m_opaque_ptr->AddUserCommand(name, new_command_sp, true))
+ Status add_error = m_opaque_ptr->AddUserCommand(name, new_command_sp, true);
+ if (add_error.Success())
return LLDB_RECORD_RESULT(lldb::SBCommand(new_command_sp));
return LLDB_RECORD_RESULT(lldb::SBCommand());
}
diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp
index a854c22bb214..4bb23c3e705c 100644
--- a/lldb/source/API/SBDebugger.cpp
+++ b/lldb/source/API/SBDebugger.cpp
@@ -680,6 +680,21 @@ SBDebugger::GetScriptingLanguage(const char *script_language_name) {
llvm::StringRef(script_language_name), eScriptLanguageDefault, nullptr);
}
+SBStructuredData
+SBDebugger::GetScriptInterpreterInfo(lldb::ScriptLanguage language) {
+ LLDB_RECORD_METHOD(SBStructuredData, SBDebugger, GetScriptInterpreterInfo,
+ (lldb::ScriptLanguage), language);
+ SBStructuredData data;
+ if (m_opaque_sp) {
+ lldb_private::ScriptInterpreter *interp =
+ m_opaque_sp->GetScriptInterpreter(language);
+ if (interp) {
+ data.m_impl_up->SetObjectSP(interp->GetInterpreterInfo());
+ }
+ }
+ return LLDB_RECORD_RESULT(data);
+}
+
const char *SBDebugger::GetVersionString() {
LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBDebugger, GetVersionString);
@@ -1114,7 +1129,7 @@ uint32_t SBDebugger::GetNumAvailablePlatforms() {
uint32_t idx = 0;
while (true) {
- if (!PluginManager::GetPlatformPluginNameAtIndex(idx)) {
+ if (PluginManager::GetPlatformPluginNameAtIndex(idx).empty()) {
break;
}
++idx;
@@ -1133,23 +1148,19 @@ SBStructuredData SBDebugger::GetAvailablePlatformInfoAtIndex(uint32_t idx) {
if (idx == 0) {
PlatformSP host_platform_sp(Platform::GetHostPlatform());
- platform_dict->AddStringItem(
- name_str, host_platform_sp->GetPluginName().GetStringRef());
+ platform_dict->AddStringItem(name_str, host_platform_sp->GetPluginName());
platform_dict->AddStringItem(
desc_str, llvm::StringRef(host_platform_sp->GetDescription()));
} else if (idx > 0) {
- const char *plugin_name =
+ llvm::StringRef plugin_name =
PluginManager::GetPlatformPluginNameAtIndex(idx - 1);
- if (!plugin_name) {
+ if (plugin_name.empty()) {
return LLDB_RECORD_RESULT(data);
}
platform_dict->AddStringItem(name_str, llvm::StringRef(plugin_name));
- const char *plugin_desc =
+ llvm::StringRef plugin_desc =
PluginManager::GetPlatformPluginDescriptionAtIndex(idx - 1);
- if (!plugin_desc) {
- return LLDB_RECORD_RESULT(data);
- }
platform_dict->AddStringItem(desc_str, llvm::StringRef(plugin_desc));
}
@@ -1787,6 +1798,8 @@ template <> void RegisterMethods<SBDebugger>(Registry &R) {
(const char *));
LLDB_REGISTER_METHOD(lldb::ScriptLanguage, SBDebugger, GetScriptingLanguage,
(const char *));
+ LLDB_REGISTER_METHOD(SBStructuredData, SBDebugger, GetScriptInterpreterInfo,
+ (lldb::ScriptLanguage));
LLDB_REGISTER_STATIC_METHOD(const char *, SBDebugger, GetVersionString, ());
LLDB_REGISTER_STATIC_METHOD(const char *, SBDebugger, StateAsCString,
(lldb::StateType));
diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp
index 8f9e426e066e..7107768ba884 100644
--- a/lldb/source/API/SBFrame.cpp
+++ b/lldb/source/API/SBFrame.cpp
@@ -633,18 +633,10 @@ SBValue SBFrame::FindValue(const char *name, ValueType value_type,
{
RegisterContextSP reg_ctx(frame->GetRegisterContext());
if (reg_ctx) {
- const uint32_t num_regs = reg_ctx->GetRegisterCount();
- for (uint32_t reg_idx = 0; reg_idx < num_regs; ++reg_idx) {
- const RegisterInfo *reg_info =
- reg_ctx->GetRegisterInfoAtIndex(reg_idx);
- if (reg_info &&
- ((reg_info->name && strcasecmp(reg_info->name, name) == 0) ||
- (reg_info->alt_name &&
- strcasecmp(reg_info->alt_name, name) == 0))) {
- value_sp = ValueObjectRegister::Create(frame, reg_ctx, reg_idx);
- sb_value.SetSP(value_sp);
- break;
- }
+ if (const RegisterInfo *reg_info =
+ reg_ctx->GetRegisterInfoByName(name)) {
+ value_sp = ValueObjectRegister::Create(frame, reg_ctx, reg_info);
+ sb_value.SetSP(value_sp);
}
}
} break;
@@ -953,18 +945,10 @@ SBValue SBFrame::FindRegister(const char *name) {
if (frame) {
RegisterContextSP reg_ctx(frame->GetRegisterContext());
if (reg_ctx) {
- const uint32_t num_regs = reg_ctx->GetRegisterCount();
- for (uint32_t reg_idx = 0; reg_idx < num_regs; ++reg_idx) {
- const RegisterInfo *reg_info =
- reg_ctx->GetRegisterInfoAtIndex(reg_idx);
- if (reg_info &&
- ((reg_info->name && strcasecmp(reg_info->name, name) == 0) ||
- (reg_info->alt_name &&
- strcasecmp(reg_info->alt_name, name) == 0))) {
- value_sp = ValueObjectRegister::Create(frame, reg_ctx, reg_idx);
- result.SetSP(value_sp);
- break;
- }
+ if (const RegisterInfo *reg_info =
+ reg_ctx->GetRegisterInfoByName(name)) {
+ value_sp = ValueObjectRegister::Create(frame, reg_ctx, reg_info);
+ result.SetSP(value_sp);
}
}
}
diff --git a/lldb/source/API/SBLaunchInfo.cpp b/lldb/source/API/SBLaunchInfo.cpp
index 70cd1c6ecf74..0735e62a16cf 100644
--- a/lldb/source/API/SBLaunchInfo.cpp
+++ b/lldb/source/API/SBLaunchInfo.cpp
@@ -380,16 +380,18 @@ lldb::SBStructuredData SBLaunchInfo::GetScriptedProcessDictionary() const {
void SBLaunchInfo::SetScriptedProcessDictionary(lldb::SBStructuredData dict) {
LLDB_RECORD_METHOD(void, SBLaunchInfo, SetScriptedProcessDictionary,
(lldb::SBStructuredData), dict);
+ if (!dict.IsValid() || !dict.m_impl_up)
+ return;
- SBStream stream;
- SBError error = dict.GetAsJSON(stream);
+ StructuredData::ObjectSP obj_sp = dict.m_impl_up->GetObjectSP();
- if (error.Fail())
+ if (!obj_sp)
return;
- StructuredData::DictionarySP dict_sp;
- llvm::json::OStream s(stream.ref().AsRawOstream());
- dict_sp->Serialize(s);
+ StructuredData::DictionarySP dict_sp =
+ std::make_shared<StructuredData::Dictionary>(obj_sp);
+ if (!dict_sp || dict_sp->GetType() == lldb::eStructuredDataTypeInvalid)
+ return;
m_opaque_sp->SetScriptedProcessDictionarySP(dict_sp);
}
diff --git a/lldb/source/API/SBMemoryRegionInfo.cpp b/lldb/source/API/SBMemoryRegionInfo.cpp
index ab74d559387f..9cf7874b54a3 100644
--- a/lldb/source/API/SBMemoryRegionInfo.cpp
+++ b/lldb/source/API/SBMemoryRegionInfo.cpp
@@ -22,6 +22,24 @@ SBMemoryRegionInfo::SBMemoryRegionInfo() : m_opaque_up(new MemoryRegionInfo()) {
LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBMemoryRegionInfo);
}
+SBMemoryRegionInfo::SBMemoryRegionInfo(const char *name, lldb::addr_t begin,
+ lldb::addr_t end, uint32_t permissions,
+ bool mapped, bool stack_memory)
+ : SBMemoryRegionInfo() {
+ LLDB_RECORD_CONSTRUCTOR(
+ SBMemoryRegionInfo,
+ (const char *, lldb::addr_t, lldb::addr_t, uint32_t, bool, bool), name,
+ begin, end, permissions, mapped, stack_memory);
+ m_opaque_up->SetName(name);
+ m_opaque_up->GetRange().SetRangeBase(begin);
+ m_opaque_up->GetRange().SetRangeEnd(end);
+ m_opaque_up->SetLLDBPermissions(permissions);
+ m_opaque_up->SetMapped(mapped ? MemoryRegionInfo::eYes
+ : MemoryRegionInfo::eNo);
+ m_opaque_up->SetIsStackMemory(stack_memory ? MemoryRegionInfo::eYes
+ : MemoryRegionInfo::eNo);
+}
+
SBMemoryRegionInfo::SBMemoryRegionInfo(const MemoryRegionInfo *lldb_object_ptr)
: m_opaque_up(new MemoryRegionInfo()) {
if (lldb_object_ptr)
@@ -135,8 +153,8 @@ uint32_t SBMemoryRegionInfo::GetNumDirtyPages() {
}
addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) {
- LLDB_RECORD_METHOD(addr_t, SBMemoryRegionInfo, GetDirtyPageAddressAtIndex,
- (uint32_t), idx);
+ LLDB_RECORD_METHOD(lldb::addr_t, SBMemoryRegionInfo,
+ GetDirtyPageAddressAtIndex, (uint32_t), idx);
addr_t dirty_page_addr = LLDB_INVALID_ADDRESS;
const llvm::Optional<std::vector<addr_t>> &dirty_page_list =
@@ -149,6 +167,7 @@ addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) {
int SBMemoryRegionInfo::GetPageSize() {
LLDB_RECORD_METHOD_NO_ARGS(int, SBMemoryRegionInfo, GetPageSize);
+
return m_opaque_up->GetPageSize();
}
@@ -177,6 +196,9 @@ void RegisterMethods<SBMemoryRegionInfo>(Registry &R) {
LLDB_REGISTER_CONSTRUCTOR(SBMemoryRegionInfo, ());
LLDB_REGISTER_CONSTRUCTOR(SBMemoryRegionInfo,
(const lldb::SBMemoryRegionInfo &));
+ LLDB_REGISTER_CONSTRUCTOR(
+ SBMemoryRegionInfo,
+ (const char *, lldb::addr_t, lldb::addr_t, uint32_t, bool, bool));
LLDB_REGISTER_METHOD(
const lldb::SBMemoryRegionInfo &,
SBMemoryRegionInfo, operator=,(const lldb::SBMemoryRegionInfo &));
@@ -196,6 +218,10 @@ void RegisterMethods<SBMemoryRegionInfo>(Registry &R) {
LLDB_REGISTER_METHOD(const char *, SBMemoryRegionInfo, GetName, ());
LLDB_REGISTER_METHOD(bool, SBMemoryRegionInfo, GetDescription,
(lldb::SBStream &));
+ LLDB_REGISTER_METHOD(bool, SBMemoryRegionInfo, HasDirtyMemoryPageList, ());
+ LLDB_REGISTER_METHOD(uint32_t, SBMemoryRegionInfo, GetNumDirtyPages, ());
+ LLDB_REGISTER_METHOD(lldb::addr_t, SBMemoryRegionInfo, GetDirtyPageAddressAtIndex, (uint32_t));
+ LLDB_REGISTER_METHOD(int, SBMemoryRegionInfo, GetPageSize, ());
}
}
diff --git a/lldb/source/API/SBMemoryRegionInfoList.cpp b/lldb/source/API/SBMemoryRegionInfoList.cpp
index 0f3f9c1b8177..cd8fc00ffce0 100644
--- a/lldb/source/API/SBMemoryRegionInfoList.cpp
+++ b/lldb/source/API/SBMemoryRegionInfoList.cpp
@@ -48,6 +48,17 @@ public:
void Clear() { m_regions.clear(); }
+ bool GetMemoryRegionContainingAddress(lldb::addr_t addr,
+ MemoryRegionInfo &region_info) {
+ for (auto &region : m_regions) {
+ if (region.GetRange().Contains(addr)) {
+ region_info = region;
+ return true;
+ }
+ }
+ return false;
+ }
+
bool GetMemoryRegionInfoAtIndex(size_t index,
MemoryRegionInfo &region_info) {
if (index >= GetSize())
@@ -103,6 +114,15 @@ uint32_t SBMemoryRegionInfoList::GetSize() const {
return m_opaque_up->GetSize();
}
+bool SBMemoryRegionInfoList::GetMemoryRegionContainingAddress(
+ lldb::addr_t addr, SBMemoryRegionInfo &region_info) {
+ LLDB_RECORD_METHOD(
+ bool, SBMemoryRegionInfoList, GetMemoryRegionContainingAddress,
+ (lldb::addr_t, lldb::SBMemoryRegionInfo &), addr, region_info);
+
+ return m_opaque_up->GetMemoryRegionContainingAddress(addr, region_info.ref());
+}
+
bool SBMemoryRegionInfoList::GetMemoryRegionAtIndex(
uint32_t idx, SBMemoryRegionInfo &region_info) {
LLDB_RECORD_METHOD(bool, SBMemoryRegionInfoList, GetMemoryRegionAtIndex,
@@ -153,6 +173,9 @@ void RegisterMethods<SBMemoryRegionInfoList>(Registry &R) {
SBMemoryRegionInfoList, operator=,(
const lldb::SBMemoryRegionInfoList &));
LLDB_REGISTER_METHOD_CONST(uint32_t, SBMemoryRegionInfoList, GetSize, ());
+ LLDB_REGISTER_METHOD(bool, SBMemoryRegionInfoList,
+ GetMemoryRegionContainingAddress,
+ (lldb::addr_t, lldb::SBMemoryRegionInfo &));
LLDB_REGISTER_METHOD(bool, SBMemoryRegionInfoList, GetMemoryRegionAtIndex,
(uint32_t, lldb::SBMemoryRegionInfo &));
LLDB_REGISTER_METHOD(void, SBMemoryRegionInfoList, Clear, ());
diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp
index b5b9fe16aa63..710ee8551bd6 100644
--- a/lldb/source/API/SBModule.cpp
+++ b/lldb/source/API/SBModule.cpp
@@ -397,11 +397,13 @@ lldb::SBSymbolContextList SBModule::FindFunctions(const char *name,
lldb::SBSymbolContextList sb_sc_list;
ModuleSP module_sp(GetSP());
if (name && module_sp) {
- const bool symbols_ok = true;
- const bool inlines_ok = true;
+
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = true;
FunctionNameType type = static_cast<FunctionNameType>(name_type_mask);
module_sp->FindFunctions(ConstString(name), CompilerDeclContext(), type,
- symbols_ok, inlines_ok, *sb_sc_list);
+ function_options, *sb_sc_list);
}
return LLDB_RECORD_RESULT(sb_sc_list);
}
diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp
index 496c40a0678f..d7a86f0ad1dd 100644
--- a/lldb/source/API/SBPlatform.cpp
+++ b/lldb/source/API/SBPlatform.cpp
@@ -458,13 +458,11 @@ const char *SBPlatform::GetOSBuild() {
PlatformSP platform_sp(GetSP());
if (platform_sp) {
- std::string s;
- if (platform_sp->GetOSBuildString(s)) {
- if (!s.empty()) {
- // Const-ify the string so we don't need to worry about the lifetime of
- // the string
- return ConstString(s.c_str()).GetCString();
- }
+ std::string s = platform_sp->GetOSBuildString().getValueOr("");
+ if (!s.empty()) {
+ // Const-ify the string so we don't need to worry about the lifetime of
+ // the string
+ return ConstString(s).GetCString();
}
}
return nullptr;
@@ -475,13 +473,11 @@ const char *SBPlatform::GetOSDescription() {
PlatformSP platform_sp(GetSP());
if (platform_sp) {
- std::string s;
- if (platform_sp->GetOSKernelDescription(s)) {
- if (!s.empty()) {
- // Const-ify the string so we don't need to worry about the lifetime of
- // the string
- return ConstString(s.c_str()).GetCString();
- }
+ std::string s = platform_sp->GetOSKernelDescription().getValueOr("");
+ if (!s.empty()) {
+ // Const-ify the string so we don't need to worry about the lifetime of
+ // the string
+ return ConstString(s.c_str()).GetCString();
}
}
return nullptr;
diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp
index 47c35a23b078..797e19462800 100644
--- a/lldb/source/API/SBProcess.cpp
+++ b/lldb/source/API/SBProcess.cpp
@@ -88,7 +88,7 @@ const char *SBProcess::GetPluginName() {
ProcessSP process_sp(GetSP());
if (process_sp) {
- return process_sp->GetPluginName().GetCString();
+ return ConstString(process_sp->GetPluginName()).GetCString();
}
return "<Unknown>";
}
@@ -98,7 +98,7 @@ const char *SBProcess::GetShortPluginName() {
ProcessSP process_sp(GetSP());
if (process_sp) {
- return process_sp->GetPluginName().GetCString();
+ return ConstString(process_sp->GetPluginName()).GetCString();
}
return "<Unknown>";
}
@@ -1228,7 +1228,7 @@ lldb::SBError SBProcess::SaveCore(const char *file_name) {
FileSpec core_file(file_name);
SaveCoreStyle core_style = SaveCoreStyle::eSaveCoreFull;
- error.ref() = PluginManager::SaveCore(process_sp, core_file, core_style);
+ error.ref() = PluginManager::SaveCore(process_sp, core_file, core_style, "");
return LLDB_RECORD_RESULT(error);
}
diff --git a/lldb/source/API/SBStream.cpp b/lldb/source/API/SBStream.cpp
index 66172d248bf3..190abd18df33 100644
--- a/lldb/source/API/SBStream.cpp
+++ b/lldb/source/API/SBStream.cpp
@@ -90,7 +90,7 @@ void SBStream::RedirectToFile(const char *path, bool append) {
local_data = std::string(
static_cast<StreamString *>(m_opaque_up.get())->GetString());
}
- auto open_options = File::eOpenOptionWrite | File::eOpenOptionCanCreate;
+ auto open_options = File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate;
if (append)
open_options |= File::eOpenOptionAppend;
else
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 6f0633288a2b..98158f457a04 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -73,9 +73,7 @@ using namespace lldb_private;
#define DEFAULT_DISASM_BYTE_SIZE 32
-namespace {
-
-Status AttachToProcess(ProcessAttachInfo &attach_info, Target &target) {
+static Status AttachToProcess(ProcessAttachInfo &attach_info, Target &target) {
std::lock_guard<std::recursive_mutex> guard(target.GetAPIMutex());
auto process_sp = target.GetProcessSP();
@@ -94,8 +92,6 @@ Status AttachToProcess(ProcessAttachInfo &attach_info, Target &target) {
return target.Attach(attach_info, nullptr);
}
-} // namespace
-
// SBTarget constructor
SBTarget::SBTarget() : m_opaque_sp() {
LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTarget);
@@ -217,17 +213,11 @@ SBStructuredData SBTarget::GetStatistics() {
TargetSP target_sp(GetSP());
if (!target_sp)
return LLDB_RECORD_RESULT(data);
-
- auto stats_up = std::make_unique<StructuredData::Dictionary>();
- int i = 0;
- for (auto &Entry : target_sp->GetStatistics()) {
- std::string Desc = lldb_private::GetStatDescription(
- static_cast<lldb_private::StatisticKind>(i));
- stats_up->AddIntegerItem(Desc, Entry);
- i += 1;
- }
-
- data.m_impl_up->SetObjectSP(std::move(stats_up));
+ std::string json_str =
+ llvm::formatv("{0:2}",
+ DebuggerStats::ReportStatistics(target_sp->GetDebugger(),
+ target_sp.get())).str();
+ data.m_impl_up->SetObjectSP(StructuredData::ParseJSON(json_str));
return LLDB_RECORD_RESULT(data);
}
@@ -237,7 +227,7 @@ void SBTarget::SetCollectingStats(bool v) {
TargetSP target_sp(GetSP());
if (!target_sp)
return;
- return target_sp->SetCollectingStats(v);
+ return DebuggerStats::SetCollectingStats(v);
}
bool SBTarget::GetCollectingStats() {
@@ -246,7 +236,7 @@ bool SBTarget::GetCollectingStats() {
TargetSP target_sp(GetSP());
if (!target_sp)
return false;
- return target_sp->GetCollectingStats();
+ return DebuggerStats::GetCollectingStats();
}
SBProcess SBTarget::LoadCore(const char *core_file) {
@@ -1596,13 +1586,13 @@ void SBTarget::AppendImageSearchPath(const char *from, const char *to,
if (!target_sp)
return error.SetErrorString("invalid target");
- const ConstString csFrom(from), csTo(to);
- if (!csFrom)
+ llvm::StringRef srFrom = from, srTo = to;
+ if (srFrom.empty())
return error.SetErrorString("<from> path can't be empty");
- if (!csTo)
+ if (srTo.empty())
return error.SetErrorString("<to> path can't be empty");
- target_sp->GetImageSearchPathList().Append(csFrom, csTo, true);
+ target_sp->GetImageSearchPathList().Append(srFrom, srTo, true);
}
lldb::SBModule SBTarget::AddModule(const char *path, const char *triple,
@@ -1831,11 +1821,13 @@ lldb::SBSymbolContextList SBTarget::FindFunctions(const char *name,
if (!target_sp)
return LLDB_RECORD_RESULT(sb_sc_list);
- const bool symbols_ok = true;
- const bool inlines_ok = true;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = true;
+
FunctionNameType mask = static_cast<FunctionNameType>(name_type_mask);
- target_sp->GetImages().FindFunctions(ConstString(name), mask, symbols_ok,
- inlines_ok, *sb_sc_list);
+ target_sp->GetImages().FindFunctions(ConstString(name), mask,
+ function_options, *sb_sc_list);
return LLDB_RECORD_RESULT(sb_sc_list);
}
@@ -1851,20 +1843,25 @@ lldb::SBSymbolContextList SBTarget::FindGlobalFunctions(const char *name,
llvm::StringRef name_ref(name);
TargetSP target_sp(GetSP());
if (target_sp) {
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = true;
+
std::string regexstr;
switch (matchtype) {
case eMatchTypeRegex:
- target_sp->GetImages().FindFunctions(RegularExpression(name_ref), true,
- true, *sb_sc_list);
+ target_sp->GetImages().FindFunctions(RegularExpression(name_ref),
+ function_options, *sb_sc_list);
break;
case eMatchTypeStartsWith:
regexstr = llvm::Regex::escape(name) + ".*";
- target_sp->GetImages().FindFunctions(RegularExpression(regexstr), true,
- true, *sb_sc_list);
+ target_sp->GetImages().FindFunctions(RegularExpression(regexstr),
+ function_options, *sb_sc_list);
break;
default:
- target_sp->GetImages().FindFunctions(
- ConstString(name), eFunctionNameTypeAny, true, true, *sb_sc_list);
+ target_sp->GetImages().FindFunctions(ConstString(name),
+ eFunctionNameTypeAny,
+ function_options, *sb_sc_list);
break;
}
}
diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp
index e0ab8b2e9fa8..8d5b6f2a5423 100644
--- a/lldb/source/API/SBThread.cpp
+++ b/lldb/source/API/SBThread.cpp
@@ -513,10 +513,10 @@ SBError SBThread::ResumeNewPlan(ExecutionContext &exe_ctx,
return sb_error;
}
- // User level plans should be Master Plans so they can be interrupted, other
- // plans executed, and then a "continue" will resume the plan.
+ // User level plans should be Controlling Plans so they can be interrupted,
+ // other plans executed, and then a "continue" will resume the plan.
if (new_plan != nullptr) {
- new_plan->SetIsMasterPlan(true);
+ new_plan->SetIsControllingPlan(true);
new_plan->SetOkayToDiscard(false);
}
diff --git a/lldb/source/API/liblldb-private.exports b/lldb/source/API/liblldb-private.exports
index 9b3d86dfc892..cf1360021c6c 100644
--- a/lldb/source/API/liblldb-private.exports
+++ b/lldb/source/API/liblldb-private.exports
@@ -4,3 +4,4 @@ _ZN12lldb_private*
_ZNK12lldb_private*
init_lld*
PyInit__lldb*
+luaopen_lldb*
diff --git a/lldb/source/API/liblldb.exports b/lldb/source/API/liblldb.exports
index 3ceb562c7ed1..5835cf0a02ea 100644
--- a/lldb/source/API/liblldb.exports
+++ b/lldb/source/API/liblldb.exports
@@ -2,3 +2,4 @@ _ZN4lldb*
_ZNK4lldb*
init_lld*
PyInit__lldb*
+luaopen_lldb*
diff --git a/lldb/source/API/liblldb.xcode.exports b/lldb/source/API/liblldb.xcode.exports
deleted file mode 100644
index 9c194fa6ff67..000000000000
--- a/lldb/source/API/liblldb.xcode.exports
+++ /dev/null
@@ -1,3 +0,0 @@
-__ZN4lldb*
-__ZNK4lldb*
-_init_lld*
diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp
index 8d5d5a31337c..d6acf659e852 100644
--- a/lldb/source/Breakpoint/Breakpoint.cpp
+++ b/lldb/source/Breakpoint/Breakpoint.cpp
@@ -439,12 +439,15 @@ BreakpointOptions &Breakpoint::GetOptions() { return m_options; }
const BreakpointOptions &Breakpoint::GetOptions() const { return m_options; }
void Breakpoint::ResolveBreakpoint() {
- if (m_resolver_sp)
+ if (m_resolver_sp) {
+ ElapsedTime elapsed(m_resolve_time);
m_resolver_sp->ResolveBreakpoint(*m_filter_sp);
+ }
}
void Breakpoint::ResolveBreakpointInModules(
ModuleList &module_list, BreakpointLocationCollection &new_locations) {
+ ElapsedTime elapsed(m_resolve_time);
m_locations.StartRecordingNewLocations(new_locations);
m_resolver_sp->ResolveBreakpointInModules(*m_filter_sp, module_list);
@@ -470,6 +473,7 @@ void Breakpoint::ResolveBreakpointInModules(ModuleList &module_list,
} else
delete new_locations_event;
} else {
+ ElapsedTime elapsed(m_resolve_time);
m_resolver_sp->ResolveBreakpointInModules(*m_filter_sp, module_list);
}
}
@@ -602,7 +606,6 @@ void Breakpoint::ModulesChanged(ModuleList &module_list, bool load,
}
}
-namespace {
static bool SymbolContextsMightBeEquivalent(SymbolContext &old_sc,
SymbolContext &new_sc) {
bool equivalent_scs = false;
@@ -640,7 +643,6 @@ static bool SymbolContextsMightBeEquivalent(SymbolContext &old_sc,
}
return equivalent_scs;
}
-} // anonymous namespace
void Breakpoint::ModuleReplaced(ModuleSP old_module_sp,
ModuleSP new_module_sp) {
@@ -1088,3 +1090,34 @@ Breakpoint::BreakpointEventData::GetBreakpointLocationAtIndexFromEvent(
return bp_loc_sp;
}
+
+json::Value Breakpoint::GetStatistics() {
+ json::Object bp;
+ bp.try_emplace("id", GetID());
+ bp.try_emplace("resolveTime", m_resolve_time.count());
+ bp.try_emplace("numLocations", (int64_t)GetNumLocations());
+ bp.try_emplace("numResolvedLocations", (int64_t)GetNumResolvedLocations());
+ bp.try_emplace("internal", IsInternal());
+ if (!m_kind_description.empty())
+ bp.try_emplace("kindDescription", m_kind_description);
+ // Put the full structured data for reproducing this breakpoint in a key/value
+ // pair named "details". This allows the breakpoint's details to be visible
+ // in the stats in case we need to reproduce a breakpoint that has long
+ // resolve times
+ StructuredData::ObjectSP bp_data_sp = SerializeToStructuredData();
+ if (bp_data_sp) {
+ std::string buffer;
+ llvm::raw_string_ostream ss(buffer);
+ json::OStream json_os(ss);
+ bp_data_sp->Serialize(json_os);
+ if (auto expected_value = llvm::json::parse(ss.str())) {
+ bp.try_emplace("details", std::move(*expected_value));
+ } else {
+ std::string details_error = toString(expected_value.takeError());
+ json::Object details;
+ details.try_emplace("error", details_error);
+ bp.try_emplace("details", std::move(details));
+ }
+ }
+ return json::Value(std::move(bp));
+}
diff --git a/lldb/source/Breakpoint/BreakpointID.cpp b/lldb/source/Breakpoint/BreakpointID.cpp
index f20572144068..9bd22898196e 100644
--- a/lldb/source/Breakpoint/BreakpointID.cpp
+++ b/lldb/source/Breakpoint/BreakpointID.cpp
@@ -29,12 +29,7 @@ static llvm::StringRef g_range_specifiers[] = {"-", "to", "To", "TO"};
// for specifying ID ranges at a later date.
bool BreakpointID::IsRangeIdentifier(llvm::StringRef str) {
- for (auto spec : g_range_specifiers) {
- if (spec == str)
- return true;
- }
-
- return false;
+ return llvm::is_contained(g_range_specifiers, str);
}
bool BreakpointID::IsValidIDExpression(llvm::StringRef str) {
diff --git a/lldb/source/Breakpoint/BreakpointList.cpp b/lldb/source/Breakpoint/BreakpointList.cpp
index a00f6bed6181..ca181ee306a4 100644
--- a/lldb/source/Breakpoint/BreakpointList.cpp
+++ b/lldb/source/Breakpoint/BreakpointList.cpp
@@ -101,10 +101,8 @@ void BreakpointList::RemoveAllowed(bool notify) {
NotifyChange(bp_sp, eBreakpointEventTypeRemoved);
}
- m_breakpoints.erase(
- std::remove_if(m_breakpoints.begin(), m_breakpoints.end(),
- [&](const BreakpointSP &bp) { return bp->AllowDelete(); }),
- m_breakpoints.end());
+ llvm::erase_if(m_breakpoints,
+ [&](const BreakpointSP &bp) { return bp->AllowDelete(); });
}
BreakpointList::bp_collection::iterator
diff --git a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp
index 1d1ac2e90bdc..be4616064f9e 100644
--- a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp
@@ -188,7 +188,7 @@ void BreakpointResolverFileLine::FilterContexts(SymbolContextList &sc_list,
// is 0, then we can't do this calculation. That can happen if
// GetStartLineSourceInfo gets an error, or if the first line number in
// the function really is 0 - which happens for some languages.
-
+
// But only do this calculation if the line number we found in the SC
// was different from the one requested in the source file. If we actually
// found an exact match it must be valid.
@@ -229,18 +229,25 @@ Searcher::CallbackReturn BreakpointResolverFileLine::SearchCallback(
const uint32_t line = m_location_spec.GetLine().getValueOr(0);
const llvm::Optional<uint16_t> column = m_location_spec.GetColumn();
+ // We'll create a new SourceLocationSpec that can take into account the
+ // relative path case, and we'll use it to resolve the symbol context
+ // of the CUs.
FileSpec search_file_spec = m_location_spec.GetFileSpec();
const bool is_relative = search_file_spec.IsRelative();
if (is_relative)
search_file_spec.GetDirectory().Clear();
+ SourceLocationSpec search_location_spec(
+ search_file_spec, m_location_spec.GetLine().getValueOr(0),
+ m_location_spec.GetColumn(), m_location_spec.GetCheckInlines(),
+ m_location_spec.GetExactMatch());
const size_t num_comp_units = context.module_sp->GetNumCompileUnits();
for (size_t i = 0; i < num_comp_units; i++) {
CompUnitSP cu_sp(context.module_sp->GetCompileUnitAtIndex(i));
if (cu_sp) {
if (filter.CompUnitPasses(*cu_sp))
- cu_sp->ResolveSymbolContext(m_location_spec, eSymbolContextEverything,
- sc_list);
+ cu_sp->ResolveSymbolContext(search_location_spec,
+ eSymbolContextEverything, sc_list);
}
}
diff --git a/lldb/source/Breakpoint/BreakpointResolverName.cpp b/lldb/source/Breakpoint/BreakpointResolverName.cpp
index 121ac5690d70..49087b39944d 100644
--- a/lldb/source/Breakpoint/BreakpointResolverName.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolverName.cpp
@@ -264,8 +264,10 @@ BreakpointResolverName::SearchCallback(SearchFilter &filter,
bool filter_by_cu =
(filter.GetFilterRequiredItems() & eSymbolContextCompUnit) != 0;
bool filter_by_language = (m_language != eLanguageTypeUnknown);
- const bool include_symbols = !filter_by_cu;
- const bool include_inlines = true;
+
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = !filter_by_cu;
+ function_options.include_inlines = true;
switch (m_match_type) {
case Breakpoint::Exact:
@@ -274,8 +276,7 @@ BreakpointResolverName::SearchCallback(SearchFilter &filter,
const size_t start_func_idx = func_list.GetSize();
context.module_sp->FindFunctions(
lookup.GetLookupName(), CompilerDeclContext(),
- lookup.GetNameTypeMask(), include_symbols, include_inlines,
- func_list);
+ lookup.GetNameTypeMask(), function_options, func_list);
const size_t end_func_idx = func_list.GetSize();
@@ -286,10 +287,7 @@ BreakpointResolverName::SearchCallback(SearchFilter &filter,
break;
case Breakpoint::Regexp:
if (context.module_sp) {
- context.module_sp->FindFunctions(
- m_regex,
- !filter_by_cu, // include symbols only if we aren't filtering by CU
- include_inlines, func_list);
+ context.module_sp->FindFunctions(m_regex, function_options, func_list);
}
break;
case Breakpoint::Glob:
diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp
index 55018cef57d4..42b0bac717bd 100644
--- a/lldb/source/Commands/CommandCompletions.cpp
+++ b/lldb/source/Commands/CommandCompletions.cpp
@@ -17,6 +17,8 @@
#include "lldb/Host/FileSystem.h"
#include "lldb/Interpreter/CommandCompletions.h"
#include "lldb/Interpreter/CommandInterpreter.h"
+#include "lldb/Interpreter/CommandObject.h"
+#include "lldb/Interpreter/CommandObjectMultiword.h"
#include "lldb/Interpreter/OptionValueProperties.h"
#include "lldb/Symbol/CompileUnit.h"
#include "lldb/Symbol/Variable.h"
@@ -213,10 +215,10 @@ public:
Address *addr) override {
if (context.module_sp) {
SymbolContextList sc_list;
- const bool include_symbols = true;
- const bool include_inlines = true;
- context.module_sp->FindFunctions(m_regex, include_symbols,
- include_inlines, sc_list);
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = true;
+ context.module_sp->FindFunctions(m_regex, function_options, sc_list);
SymbolContext sc;
// Now add the functions & symbols to the list - only add if unique:
@@ -792,3 +794,60 @@ void CommandCompletions::TypeCategoryNames(CommandInterpreter &interpreter,
return true;
});
}
+
+void CommandCompletions::CompleteModifiableCmdPathArgs(
+ CommandInterpreter &interpreter, CompletionRequest &request,
+ OptionElementVector &opt_element_vector) {
+ // The only arguments constitute a command path, however, there might be
+ // options interspersed among the arguments, and we need to skip those. Do that
+ // by copying the args vector, and just dropping all the option bits:
+ Args args = request.GetParsedLine();
+ std::vector<size_t> to_delete;
+ for (auto &elem : opt_element_vector) {
+ to_delete.push_back(elem.opt_pos);
+ if (elem.opt_arg_pos != 0)
+ to_delete.push_back(elem.opt_arg_pos);
+ }
+ sort(to_delete.begin(), to_delete.end(), std::greater<size_t>());
+ for (size_t idx : to_delete)
+ args.DeleteArgumentAtIndex(idx);
+
+ // At this point, we should only have args, so now lookup the command up to
+ // the cursor element.
+
+ // There's nothing here but options. It doesn't seem very useful here to
+ // dump all the commands, so just return.
+ size_t num_args = args.GetArgumentCount();
+ if (num_args == 0)
+ return;
+
+ // There's just one argument, so we should complete its name:
+ StringList matches;
+ if (num_args == 1) {
+ interpreter.GetUserCommandObject(args.GetArgumentAtIndex(0), &matches,
+ nullptr);
+ request.AddCompletions(matches);
+ return;
+ }
+
+ // There was more than one path element, lets find the containing command:
+ Status error;
+ CommandObjectMultiword *mwc =
+ interpreter.VerifyUserMultiwordCmdPath(args, true, error);
+
+ // Something was wrong somewhere along the path, but I don't think there's
+ // a good way to go back and fill in the missing elements:
+ if (error.Fail())
+ return;
+
+ // This should never happen. We already handled the case of one argument
+ // above, and we can only get Success & nullptr back if there's a one-word
+ // leaf.
+ assert(mwc != nullptr);
+
+ mwc->GetSubcommandObject(args.GetArgumentAtIndex(num_args - 1), &matches);
+ if (matches.GetSize() == 0)
+ return;
+
+ request.AddCompletions(matches);
+}
diff --git a/lldb/source/Commands/CommandObjectApropos.cpp b/lldb/source/Commands/CommandObjectApropos.cpp
index 656487169a34..c6680f8b140d 100644
--- a/lldb/source/Commands/CommandObjectApropos.cpp
+++ b/lldb/source/Commands/CommandObjectApropos.cpp
@@ -49,8 +49,8 @@ bool CommandObjectApropos::DoExecute(Args &args, CommandReturnObject &result) {
StringList commands_found;
StringList commands_help;
- m_interpreter.FindCommandsForApropos(search_word, commands_found,
- commands_help, true, true, true);
+ m_interpreter.FindCommandsForApropos(
+ search_word, commands_found, commands_help, true, true, true, true);
if (commands_found.GetSize() == 0) {
result.AppendMessageWithFormat("No commands found pertaining to '%s'. "
diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp
index 722d5c4d8f47..3f88a2fa6378 100644
--- a/lldb/source/Commands/CommandObjectBreakpoint.cpp
+++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp
@@ -110,7 +110,19 @@ public:
case 't': {
lldb::tid_t thread_id = LLDB_INVALID_THREAD_ID;
if (option_arg[0] != '\0') {
- if (option_arg.getAsInteger(0, thread_id))
+ if (option_arg == "current") {
+ if (!execution_context) {
+ error.SetErrorStringWithFormat("No context to determine current "
+ "thread");
+ } else {
+ ThreadSP ctx_thread_sp = execution_context->GetThreadSP();
+ if (!ctx_thread_sp || !ctx_thread_sp->IsValid()) {
+ error.SetErrorStringWithFormat("No currently selected thread");
+ } else {
+ thread_id = ctx_thread_sp->GetID();
+ }
+ }
+ } else if (option_arg.getAsInteger(0, thread_id))
error.SetErrorStringWithFormat("invalid thread id string '%s'",
option_arg.str().c_str());
}
diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp
index 9a8b81c007ad..1ec54cf7eded 100644
--- a/lldb/source/Commands/CommandObjectCommands.cpp
+++ b/lldb/source/Commands/CommandObjectCommands.cpp
@@ -77,7 +77,7 @@ protected:
public:
CommandOptions()
: Options(), m_stop_on_error(true), m_silent_run(false),
- m_stop_on_continue(true) {}
+ m_stop_on_continue(true), m_cmd_relative_to_command_file(false) {}
~CommandOptions() override = default;
@@ -95,6 +95,10 @@ protected:
error = m_stop_on_continue.SetValueFromString(option_arg);
break;
+ case 'C':
+ m_cmd_relative_to_command_file = true;
+ break;
+
case 's':
error = m_silent_run.SetValueFromString(option_arg);
break;
@@ -110,6 +114,7 @@ protected:
m_stop_on_error.Clear();
m_silent_run.Clear();
m_stop_on_continue.Clear();
+ m_cmd_relative_to_command_file.Clear();
}
llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
@@ -121,6 +126,7 @@ protected:
OptionValueBoolean m_stop_on_error;
OptionValueBoolean m_silent_run;
OptionValueBoolean m_stop_on_continue;
+ OptionValueBoolean m_cmd_relative_to_command_file;
};
bool DoExecute(Args &command, CommandReturnObject &result) override {
@@ -131,7 +137,29 @@ protected:
return false;
}
+ FileSpec source_dir = {};
+ if (m_options.m_cmd_relative_to_command_file) {
+ source_dir = GetDebugger().GetCommandInterpreter().GetCurrentSourceDir();
+ if (!source_dir) {
+ result.AppendError("command source -C can only be specified "
+ "from a command file");
+ result.SetStatus(eReturnStatusFailed);
+ return false;
+ }
+ }
+
FileSpec cmd_file(command[0].ref());
+ if (source_dir) {
+ // Prepend the source_dir to the cmd_file path:
+ if (!cmd_file.IsRelative()) {
+ result.AppendError("command source -C can only be used "
+ "with a relative path.");
+ result.SetStatus(eReturnStatusFailed);
+ return false;
+ }
+ cmd_file.MakeAbsolute(source_dir);
+ }
+
FileSystem::Instance().Resolve(cmd_file);
CommandInterpreterRunOptions options;
@@ -415,6 +443,14 @@ protected:
return false;
}
+ if (m_interpreter.UserMultiwordCommandExists(alias_command)) {
+ result.AppendErrorWithFormat(
+ "'%s' is a user container command and cannot be overwritten.\n"
+ "Delete it first with 'command container delete'\n",
+ args[0].c_str());
+ return false;
+ }
+
// Get CommandObject that is being aliased. The command name is read from
// the front of raw_command_string. raw_command_string is returned with the
// name of the command object stripped off the front.
@@ -500,6 +536,14 @@ protected:
return false;
}
+ if (m_interpreter.UserMultiwordCommandExists(alias_command)) {
+ result.AppendErrorWithFormat(
+ "'%s' is user container command and cannot be overwritten.\n"
+ "Delete it first with 'command container delete'",
+ alias_command.c_str());
+ return false;
+ }
+
CommandObjectSP command_obj_sp(
m_interpreter.GetCommandSPExact(actual_command, true));
CommandObjectSP subcommand_obj_sp;
@@ -1343,14 +1387,21 @@ public:
CommandObjectCommandsScriptAdd(CommandInterpreter &interpreter)
: CommandObjectParsed(interpreter, "command script add",
"Add a scripted function as an LLDB command.",
- nullptr),
+ "Add a scripted function as an lldb command. "
+ "If you provide a single argument, the command "
+ "will be added at the root level of the command "
+ "hierarchy. If there are more arguments they "
+ "must be a path to a user-added container "
+ "command, and the last element will be the new "
+ "command name."),
IOHandlerDelegateMultiline("DONE"), m_options() {
CommandArgumentEntry arg1;
CommandArgumentData cmd_arg;
- // Define the first (and only) variant of this arg.
- cmd_arg.arg_type = eArgTypeCommandName;
- cmd_arg.arg_repetition = eArgRepeatPlain;
+ // This is one or more command names, which form the path to the command
+ // you want to add.
+ cmd_arg.arg_type = eArgTypeCommand;
+ cmd_arg.arg_repetition = eArgRepeatPlus;
// There is only one variant this argument could be; put it into the
// argument entry.
@@ -1364,6 +1415,13 @@ public:
Options *GetOptions() override { return &m_options; }
+ void
+ HandleArgumentCompletion(CompletionRequest &request,
+ OptionElementVector &opt_element_vector) override {
+ CommandCompletions::CompleteModifiableCmdPathArgs(m_interpreter, request,
+ opt_element_vector);
+ }
+
protected:
class CommandOptions : public Options {
public:
@@ -1390,6 +1448,9 @@ protected:
if (!option_arg.empty())
m_short_help = std::string(option_arg);
break;
+ case 'o':
+ m_overwrite = true;
+ break;
case 's':
m_synchronicity =
(ScriptedCommandSynchronicity)OptionArgParser::ToOptionEnum(
@@ -1410,6 +1471,7 @@ protected:
m_class_name.clear();
m_funct_name.clear();
m_short_help.clear();
+ m_overwrite = false;
m_synchronicity = eScriptedCommandSynchronicitySynchronous;
}
@@ -1422,6 +1484,7 @@ protected:
std::string m_class_name;
std::string m_funct_name;
std::string m_short_help;
+ bool m_overwrite;
ScriptedCommandSynchronicity m_synchronicity =
eScriptedCommandSynchronicitySynchronous;
};
@@ -1456,26 +1519,36 @@ protected:
CommandObjectSP command_obj_sp(new CommandObjectPythonFunction(
m_interpreter, m_cmd_name, funct_name_str, m_short_help,
m_synchronicity));
-
- if (!m_interpreter.AddUserCommand(m_cmd_name, command_obj_sp,
- true)) {
- error_sp->Printf("error: unable to add selected command, didn't "
- "add python command.\n");
- error_sp->Flush();
+ if (!m_container) {
+ Status error = m_interpreter.AddUserCommand(
+ m_cmd_name, command_obj_sp, m_overwrite);
+ if (error.Fail()) {
+ error_sp->Printf("error: unable to add selected command: '%s'",
+ error.AsCString());
+ error_sp->Flush();
+ }
+ } else {
+ llvm::Error llvm_error = m_container->LoadUserSubcommand(
+ m_cmd_name, command_obj_sp, m_overwrite);
+ if (llvm_error) {
+ error_sp->Printf("error: unable to add selected command: '%s'",
+ llvm::toString(std::move(llvm_error)).c_str());
+ error_sp->Flush();
+ }
}
}
} else {
error_sp->Printf(
- "error: unable to create function, didn't add python command.\n");
+ "error: unable to create function, didn't add python command\n");
error_sp->Flush();
}
} else {
- error_sp->Printf("error: empty function, didn't add python command.\n");
+ error_sp->Printf("error: empty function, didn't add python command\n");
error_sp->Flush();
}
} else {
error_sp->Printf(
- "error: script interpreter missing, didn't add python command.\n");
+ "error: script interpreter missing, didn't add python command\n");
error_sp->Flush();
}
@@ -1489,31 +1562,45 @@ protected:
return false;
}
- if (command.GetArgumentCount() != 1) {
- result.AppendError("'command script add' requires one argument");
+ if (command.GetArgumentCount() == 0) {
+ result.AppendError("'command script add' requires at least one argument");
return false;
}
-
// Store the options in case we get multi-line input
- m_cmd_name = std::string(command[0].ref());
+ m_overwrite = m_options.m_overwrite;
+ Status path_error;
+ m_container = GetCommandInterpreter().VerifyUserMultiwordCmdPath(
+ command, true, path_error);
+
+ if (path_error.Fail()) {
+ result.AppendErrorWithFormat("error in command path: %s",
+ path_error.AsCString());
+ return false;
+ }
+
+ if (!m_container) {
+ // This is getting inserted into the root of the interpreter.
+ m_cmd_name = std::string(command[0].ref());
+ } else {
+ size_t num_args = command.GetArgumentCount();
+ m_cmd_name = std::string(command[num_args - 1].ref());
+ }
+
m_short_help.assign(m_options.m_short_help);
m_synchronicity = m_options.m_synchronicity;
+ // Handle the case where we prompt for the script code first:
+ if (m_options.m_class_name.empty() && m_options.m_funct_name.empty()) {
+ m_interpreter.GetPythonCommandsFromIOHandler(" ", // Prompt
+ *this); // IOHandlerDelegate
+ return result.Succeeded();
+ }
+
+ CommandObjectSP new_cmd_sp;
if (m_options.m_class_name.empty()) {
- if (m_options.m_funct_name.empty()) {
- m_interpreter.GetPythonCommandsFromIOHandler(
- " ", // Prompt
- *this); // IOHandlerDelegate
- } else {
- CommandObjectSP new_cmd(new CommandObjectPythonFunction(
- m_interpreter, m_cmd_name, m_options.m_funct_name,
- m_options.m_short_help, m_synchronicity));
- if (m_interpreter.AddUserCommand(m_cmd_name, new_cmd, true)) {
- result.SetStatus(eReturnStatusSuccessFinishNoResult);
- } else {
- result.AppendError("cannot add command");
- }
- }
+ new_cmd_sp.reset(new CommandObjectPythonFunction(
+ m_interpreter, m_cmd_name, m_options.m_funct_name,
+ m_options.m_short_help, m_synchronicity));
} else {
ScriptInterpreter *interpreter = GetDebugger().GetScriptInterpreter();
if (!interpreter) {
@@ -1528,21 +1615,33 @@ protected:
return false;
}
- CommandObjectSP new_cmd(new CommandObjectScriptingObject(
+ new_cmd_sp.reset(new CommandObjectScriptingObject(
m_interpreter, m_cmd_name, cmd_obj_sp, m_synchronicity));
- if (m_interpreter.AddUserCommand(m_cmd_name, new_cmd, true)) {
- result.SetStatus(eReturnStatusSuccessFinishNoResult);
- } else {
- result.AppendError("cannot add command");
- }
}
-
+
+ // Assume we're going to succeed...
+ result.SetStatus(eReturnStatusSuccessFinishNoResult);
+ if (!m_container) {
+ Status add_error =
+ m_interpreter.AddUserCommand(m_cmd_name, new_cmd_sp, m_overwrite);
+ if (add_error.Fail())
+ result.AppendErrorWithFormat("cannot add command: %s",
+ add_error.AsCString());
+ } else {
+ llvm::Error llvm_error =
+ m_container->LoadUserSubcommand(m_cmd_name, new_cmd_sp, m_overwrite);
+ if (llvm_error)
+ result.AppendErrorWithFormat("cannot add command: %s",
+ llvm::toString(std::move(llvm_error)).c_str());
+ }
return result.Succeeded();
}
CommandOptions m_options;
std::string m_cmd_name;
+ CommandObjectMultiword *m_container = nullptr;
std::string m_short_help;
+ bool m_overwrite;
ScriptedCommandSynchronicity m_synchronicity;
};
@@ -1552,7 +1651,8 @@ class CommandObjectCommandsScriptList : public CommandObjectParsed {
public:
CommandObjectCommandsScriptList(CommandInterpreter &interpreter)
: CommandObjectParsed(interpreter, "command script list",
- "List defined scripted commands.", nullptr) {}
+ "List defined top-level scripted commands.",
+ nullptr) {}
~CommandObjectCommandsScriptList() override = default;
@@ -1600,14 +1700,17 @@ protected:
class CommandObjectCommandsScriptDelete : public CommandObjectParsed {
public:
CommandObjectCommandsScriptDelete(CommandInterpreter &interpreter)
- : CommandObjectParsed(interpreter, "command script delete",
- "Delete a scripted command.", nullptr) {
+ : CommandObjectParsed(
+ interpreter, "command script delete",
+ "Delete a scripted command by specifying the path to the command.",
+ nullptr) {
CommandArgumentEntry arg1;
CommandArgumentData cmd_arg;
- // Define the first (and only) variant of this arg.
- cmd_arg.arg_type = eArgTypeCommandName;
- cmd_arg.arg_repetition = eArgRepeatPlain;
+ // This is a list of command names forming the path to the command
+ // to be deleted.
+ cmd_arg.arg_type = eArgTypeCommand;
+ cmd_arg.arg_repetition = eArgRepeatPlus;
// There is only one variant this argument could be; put it into the
// argument entry.
@@ -1622,30 +1725,86 @@ public:
void
HandleArgumentCompletion(CompletionRequest &request,
OptionElementVector &opt_element_vector) override {
- if (!m_interpreter.HasCommands() || request.GetCursorIndex() != 0)
- return;
-
- for (const auto &c : m_interpreter.GetUserCommands())
- request.TryCompleteCurrentArg(c.first, c.second->GetHelp());
+ CommandCompletions::CompleteModifiableCmdPathArgs(m_interpreter, request,
+ opt_element_vector);
}
protected:
bool DoExecute(Args &command, CommandReturnObject &result) override {
- if (command.GetArgumentCount() != 1) {
- result.AppendError("'command script delete' requires one argument");
+ llvm::StringRef root_cmd = command[0].ref();
+ size_t num_args = command.GetArgumentCount();
+
+ if (root_cmd.empty()) {
+ result.AppendErrorWithFormat("empty root command name");
+ return false;
+ }
+ if (!m_interpreter.HasUserCommands() &&
+ !m_interpreter.HasUserMultiwordCommands()) {
+ result.AppendErrorWithFormat("can only delete user defined commands, "
+ "but no user defined commands found");
return false;
}
- auto cmd_name = command[0].ref();
+ CommandObjectSP cmd_sp = m_interpreter.GetCommandSPExact(root_cmd);
+ if (!cmd_sp) {
+ result.AppendErrorWithFormat("command '%s' not found.",
+ command[0].c_str());
+ return false;
+ }
+ if (!cmd_sp->IsUserCommand()) {
+ result.AppendErrorWithFormat("command '%s' is not a user command.",
+ command[0].c_str());
+ return false;
+ }
+ if (cmd_sp->GetAsMultiwordCommand() && num_args == 1) {
+ result.AppendErrorWithFormat("command '%s' is a multi-word command.\n "
+ "Delete with \"command container delete\"",
+ command[0].c_str());
+ return false;
+ }
- if (cmd_name.empty() || !m_interpreter.HasUserCommands() ||
- !m_interpreter.UserCommandExists(cmd_name)) {
- result.AppendErrorWithFormat("command %s not found", command[0].c_str());
+ if (command.GetArgumentCount() == 1) {
+ m_interpreter.RemoveUser(root_cmd);
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ return true;
+ }
+ // We're deleting a command from a multiword command. Verify the command
+ // path:
+ Status error;
+ CommandObjectMultiword *container =
+ GetCommandInterpreter().VerifyUserMultiwordCmdPath(command, true,
+ error);
+ if (error.Fail()) {
+ result.AppendErrorWithFormat("could not resolve command path: %s",
+ error.AsCString());
+ return false;
+ }
+ if (!container) {
+ // This means that command only had a leaf command, so the container is
+ // the root. That should have been handled above.
+ result.AppendErrorWithFormat("could not find a container for '%s'",
+ command[0].c_str());
+ return false;
+ }
+ const char *leaf_cmd = command[num_args - 1].c_str();
+ llvm::Error llvm_error = container->RemoveUserSubcommand(leaf_cmd,
+ /* multiword not okay */ false);
+ if (llvm_error) {
+ result.AppendErrorWithFormat("could not delete command '%s': %s",
+ leaf_cmd,
+ llvm::toString(std::move(llvm_error)).c_str());
return false;
}
- m_interpreter.RemoveUser(cmd_name);
+ Stream &out_stream = result.GetOutputStream();
+
+ out_stream << "Deleted command:";
+ for (size_t idx = 0; idx < num_args; idx++) {
+ out_stream << ' ';
+ out_stream << command[idx].c_str();
+ }
+ out_stream << '\n';
result.SetStatus(eReturnStatusSuccessFinishResult);
return true;
}
@@ -1682,6 +1841,271 @@ public:
~CommandObjectMultiwordCommandsScript() override = default;
};
+#pragma mark CommandObjectCommandContainer
+#define LLDB_OPTIONS_container_add
+#include "CommandOptions.inc"
+
+class CommandObjectCommandsContainerAdd : public CommandObjectParsed {
+public:
+ CommandObjectCommandsContainerAdd(CommandInterpreter &interpreter)
+ : CommandObjectParsed(
+ interpreter, "command container add",
+ "Add a container command to lldb. Adding to built-"
+ "in container commands is not allowed.",
+ "command container add [[path1]...] container-name") {
+ CommandArgumentEntry arg1;
+ CommandArgumentData cmd_arg;
+
+ // This is one or more command names, which form the path to the command
+ // you want to add.
+ cmd_arg.arg_type = eArgTypeCommand;
+ cmd_arg.arg_repetition = eArgRepeatPlus;
+
+ // There is only one variant this argument could be; put it into the
+ // argument entry.
+ arg1.push_back(cmd_arg);
+
+ // Push the data for the first argument into the m_arguments vector.
+ m_arguments.push_back(arg1);
+ }
+
+ ~CommandObjectCommandsContainerAdd() override = default;
+
+ Options *GetOptions() override { return &m_options; }
+
+ void
+ HandleArgumentCompletion(CompletionRequest &request,
+ OptionElementVector &opt_element_vector) override {
+ CommandCompletions::CompleteModifiableCmdPathArgs(m_interpreter, request,
+ opt_element_vector);
+ }
+
+protected:
+ class CommandOptions : public Options {
+ public:
+ CommandOptions() : Options(), m_short_help(), m_long_help() {}
+
+ ~CommandOptions() override = default;
+
+ Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
+ ExecutionContext *execution_context) override {
+ Status error;
+ const int short_option = m_getopt_table[option_idx].val;
+
+ switch (short_option) {
+ case 'h':
+ if (!option_arg.empty())
+ m_short_help = std::string(option_arg);
+ break;
+ case 'o':
+ m_overwrite = true;
+ break;
+ case 'H':
+ if (!option_arg.empty())
+ m_long_help = std::string(option_arg);
+ break;
+ default:
+ llvm_unreachable("Unimplemented option");
+ }
+
+ return error;
+ }
+
+ void OptionParsingStarting(ExecutionContext *execution_context) override {
+ m_short_help.clear();
+ m_long_help.clear();
+ m_overwrite = false;
+ }
+
+ llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+ return llvm::makeArrayRef(g_container_add_options);
+ }
+
+ // Instance variables to hold the values for command options.
+
+ std::string m_short_help;
+ std::string m_long_help;
+ bool m_overwrite = false;
+ };
+ bool DoExecute(Args &command, CommandReturnObject &result) override {
+ size_t num_args = command.GetArgumentCount();
+
+ if (num_args == 0) {
+ result.AppendError("no command was specified");
+ return false;
+ }
+
+ if (num_args == 1) {
+ // We're adding this as a root command, so use the interpreter.
+ const char *cmd_name = command.GetArgumentAtIndex(0);
+ auto cmd_sp = CommandObjectSP(new CommandObjectMultiword(
+ GetCommandInterpreter(), cmd_name, m_options.m_short_help.c_str(),
+ m_options.m_long_help.c_str()));
+ cmd_sp->GetAsMultiwordCommand()->SetRemovable(true);
+ Status add_error = GetCommandInterpreter().AddUserCommand(
+ cmd_name, cmd_sp, m_options.m_overwrite);
+ if (add_error.Fail()) {
+ result.AppendErrorWithFormat("error adding command: %s",
+ add_error.AsCString());
+ return false;
+ }
+ result.SetStatus(eReturnStatusSuccessFinishNoResult);
+ return true;
+ }
+
+ // We're adding this to a subcommand, first find the subcommand:
+ Status path_error;
+ CommandObjectMultiword *add_to_me =
+ GetCommandInterpreter().VerifyUserMultiwordCmdPath(command, true,
+ path_error);
+
+ if (!add_to_me) {
+ result.AppendErrorWithFormat("error adding command: %s",
+ path_error.AsCString());
+ return false;
+ }
+
+ const char *cmd_name = command.GetArgumentAtIndex(num_args - 1);
+ auto cmd_sp = CommandObjectSP(new CommandObjectMultiword(
+ GetCommandInterpreter(), cmd_name, m_options.m_short_help.c_str(),
+ m_options.m_long_help.c_str()));
+ llvm::Error llvm_error =
+ add_to_me->LoadUserSubcommand(cmd_name, cmd_sp, m_options.m_overwrite);
+ if (llvm_error) {
+ result.AppendErrorWithFormat("error adding subcommand: %s",
+ llvm::toString(std::move(llvm_error)).c_str());
+ return false;
+ }
+
+ result.SetStatus(eReturnStatusSuccessFinishNoResult);
+ return true;
+ }
+
+private:
+ CommandOptions m_options;
+};
+
+#define LLDB_OPTIONS_multiword_delete
+#include "CommandOptions.inc"
+class CommandObjectCommandsContainerDelete : public CommandObjectParsed {
+public:
+ CommandObjectCommandsContainerDelete(CommandInterpreter &interpreter)
+ : CommandObjectParsed(
+ interpreter, "command container delete",
+ "Delete a container command previously added to "
+ "lldb.",
+ "command container delete [[path1] ...] container-cmd") {
+ CommandArgumentEntry arg1;
+ CommandArgumentData cmd_arg;
+
+ // This is one or more command names, which form the path to the command
+ // you want to add.
+ cmd_arg.arg_type = eArgTypeCommand;
+ cmd_arg.arg_repetition = eArgRepeatPlus;
+
+ // There is only one variant this argument could be; put it into the
+ // argument entry.
+ arg1.push_back(cmd_arg);
+
+ // Push the data for the first argument into the m_arguments vector.
+ m_arguments.push_back(arg1);
+ }
+
+ ~CommandObjectCommandsContainerDelete() override = default;
+
+ void
+ HandleArgumentCompletion(CompletionRequest &request,
+ OptionElementVector &opt_element_vector) override {
+ CommandCompletions::CompleteModifiableCmdPathArgs(m_interpreter, request,
+ opt_element_vector);
+ }
+
+protected:
+ bool DoExecute(Args &command, CommandReturnObject &result) override {
+ size_t num_args = command.GetArgumentCount();
+
+ if (num_args == 0) {
+ result.AppendError("No command was specified.");
+ return false;
+ }
+
+ if (num_args == 1) {
+ // We're removing a root command, so we need to delete it from the
+ // interpreter.
+ const char *cmd_name = command.GetArgumentAtIndex(0);
+ // Let's do a little more work here so we can do better error reporting.
+ CommandInterpreter &interp = GetCommandInterpreter();
+ CommandObjectSP cmd_sp = interp.GetCommandSPExact(cmd_name);
+ if (!cmd_sp) {
+ result.AppendErrorWithFormat("container command %s doesn't exist.",
+ cmd_name);
+ return false;
+ }
+ if (!cmd_sp->IsUserCommand()) {
+ result.AppendErrorWithFormat(
+ "container command %s is not a user command", cmd_name);
+ return false;
+ }
+ if (!cmd_sp->GetAsMultiwordCommand()) {
+ result.AppendErrorWithFormat("command %s is not a container command",
+ cmd_name);
+ return false;
+ }
+
+ bool did_remove = GetCommandInterpreter().RemoveUserMultiword(cmd_name);
+ if (!did_remove) {
+ result.AppendErrorWithFormat("error removing command %s.", cmd_name);
+ return false;
+ }
+
+ result.SetStatus(eReturnStatusSuccessFinishNoResult);
+ return true;
+ }
+
+ // We're removing a subcommand, first find the subcommand's owner:
+ Status path_error;
+ CommandObjectMultiword *container =
+ GetCommandInterpreter().VerifyUserMultiwordCmdPath(command, true,
+ path_error);
+
+ if (!container) {
+ result.AppendErrorWithFormat("error removing container command: %s",
+ path_error.AsCString());
+ return false;
+ }
+ const char *leaf = command.GetArgumentAtIndex(num_args - 1);
+ llvm::Error llvm_error =
+ container->RemoveUserSubcommand(leaf, /* multiword okay */ true);
+ if (llvm_error) {
+ result.AppendErrorWithFormat("error removing container command: %s",
+ llvm::toString(std::move(llvm_error)).c_str());
+ return false;
+ }
+ result.SetStatus(eReturnStatusSuccessFinishNoResult);
+ return true;
+ }
+};
+
+class CommandObjectCommandContainer : public CommandObjectMultiword {
+public:
+ CommandObjectCommandContainer(CommandInterpreter &interpreter)
+ : CommandObjectMultiword(
+ interpreter, "command container",
+ "Commands for adding container commands to lldb. "
+ "Container commands are containers for other commands. You can"
+ "add nested container commands by specifying a command path, but "
+ "but you can't add commands into the built-in command hierarchy.",
+ "command container <subcommand> [<subcommand-options>]") {
+ LoadSubCommand("add", CommandObjectSP(new CommandObjectCommandsContainerAdd(
+ interpreter)));
+ LoadSubCommand(
+ "delete",
+ CommandObjectSP(new CommandObjectCommandsContainerDelete(interpreter)));
+ }
+
+ ~CommandObjectCommandContainer() override = default;
+};
+
#pragma mark CommandObjectMultiwordCommands
// CommandObjectMultiwordCommands
@@ -1699,6 +2123,8 @@ CommandObjectMultiwordCommands::CommandObjectMultiwordCommands(
new CommandObjectCommandsUnalias(interpreter)));
LoadSubCommand("delete",
CommandObjectSP(new CommandObjectCommandsDelete(interpreter)));
+ LoadSubCommand("container", CommandObjectSP(new CommandObjectCommandContainer(
+ interpreter)));
LoadSubCommand(
"regex", CommandObjectSP(new CommandObjectCommandsAddRegex(interpreter)));
LoadSubCommand(
diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp
index 5e73fb8218ab..02a16622c76b 100644
--- a/lldb/source/Commands/CommandObjectDisassemble.cpp
+++ b/lldb/source/Commands/CommandObjectDisassemble.cpp
@@ -322,13 +322,15 @@ CommandObjectDisassemble::GetCurrentLineRanges() {
llvm::Expected<std::vector<AddressRange>>
CommandObjectDisassemble::GetNameRanges(CommandReturnObject &result) {
ConstString name(m_options.func_name.c_str());
- const bool include_symbols = true;
- const bool include_inlines = true;
+
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = true;
// Find functions matching the given name.
SymbolContextList sc_list;
- GetSelectedTarget().GetImages().FindFunctions(
- name, eFunctionNameTypeAuto, include_symbols, include_inlines, sc_list);
+ GetSelectedTarget().GetImages().FindFunctions(name, eFunctionNameTypeAuto,
+ function_options, sc_list);
std::vector<AddressRange> ranges;
llvm::Error range_errs = llvm::Error::success();
diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp
index bf62f3f297cc..9d13ccab6d3e 100644
--- a/lldb/source/Commands/CommandObjectExpression.cpp
+++ b/lldb/source/Commands/CommandObjectExpression.cpp
@@ -421,9 +421,8 @@ bool CommandObjectExpression::EvaluateExpression(llvm::StringRef expr,
// We only tell you about the FixIt if we applied it. The compiler errors
// will suggest the FixIt if it parsed.
if (!m_fixed_expression.empty() && target.GetEnableNotifyAboutFixIts()) {
- if (success == eExpressionCompleted)
- error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n",
- m_fixed_expression.c_str());
+ error_stream.Printf(" Fix-it applied, fixed expression was: \n %s\n",
+ m_fixed_expression.c_str());
}
if (result_valobj_sp) {
@@ -660,13 +659,8 @@ bool CommandObjectExpression::DoExecute(llvm::StringRef command,
fixed_command.append(m_fixed_expression);
history.AppendString(fixed_command);
}
- // Increment statistics to record this expression evaluation success.
- target.IncrementStats(StatisticKind::ExpressionSuccessful);
return true;
}
-
- // Increment statistics to record this expression evaluation failure.
- target.IncrementStats(StatisticKind::ExpressionFailure);
result.SetStatus(eReturnStatusFailed);
return false;
}
diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp
index d90e357bf1aa..2b9f5316409f 100644
--- a/lldb/source/Commands/CommandObjectFrame.cpp
+++ b/lldb/source/Commands/CommandObjectFrame.cpp
@@ -14,6 +14,7 @@
#include "lldb/Host/OptionParser.h"
#include "lldb/Interpreter/CommandInterpreter.h"
#include "lldb/Interpreter/CommandReturnObject.h"
+#include "lldb/Interpreter/OptionArgParser.h"
#include "lldb/Interpreter/OptionGroupFormat.h"
#include "lldb/Interpreter/OptionGroupValueObjectDisplay.h"
#include "lldb/Interpreter/OptionGroupVariable.h"
@@ -104,7 +105,7 @@ public:
CommandObjectFrameDiagnose(CommandInterpreter &interpreter)
: CommandObjectParsed(interpreter, "frame diagnose",
- "Try to determine what path path the current stop "
+ "Try to determine what path the current stop "
"location used to get to a register or address",
nullptr,
eCommandRequiresThread | eCommandTryTargetAPILock |
@@ -707,11 +708,11 @@ protected:
// Increment statistics.
bool res = result.Succeeded();
- Target &target = GetSelectedOrDummyTarget();
+ TargetStats &target_stats = GetSelectedOrDummyTarget().GetStatistics();
if (res)
- target.IncrementStats(StatisticKind::FrameVarSuccess);
+ target_stats.GetFrameVariableStats().NotifySuccess();
else
- target.IncrementStats(StatisticKind::FrameVarFailure);
+ target_stats.GetFrameVariableStats().NotifyFailure();
return res;
}
@@ -739,6 +740,17 @@ private:
const int short_option = m_getopt_table[option_idx].val;
switch (short_option) {
+ case 'f': {
+ bool value, success;
+ value = OptionArgParser::ToBoolean(option_arg, true, &success);
+ if (success) {
+ m_first_instruction_only = value;
+ } else {
+ error.SetErrorStringWithFormat(
+ "invalid boolean value '%s' passed for -f option",
+ option_arg.str().c_str());
+ }
+ } break;
case 'l':
m_class_name = std::string(option_arg);
break;
@@ -763,6 +775,7 @@ private:
m_symbols.clear();
m_class_name = "";
m_regex = false;
+ m_first_instruction_only = true;
}
llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
@@ -774,6 +787,7 @@ private:
std::string m_module;
std::vector<std::string> m_symbols;
bool m_regex;
+ bool m_first_instruction_only;
};
CommandOptions m_options;
@@ -883,13 +897,13 @@ bool CommandObjectFrameRecognizerAdd::DoExecute(Args &command,
auto func =
RegularExpressionSP(new RegularExpression(m_options.m_symbols.front()));
GetSelectedOrDummyTarget().GetFrameRecognizerManager().AddRecognizer(
- recognizer_sp, module, func);
+ recognizer_sp, module, func, m_options.m_first_instruction_only);
} else {
auto module = ConstString(m_options.m_module);
std::vector<ConstString> symbols(m_options.m_symbols.begin(),
m_options.m_symbols.end());
GetSelectedOrDummyTarget().GetFrameRecognizerManager().AddRecognizer(
- recognizer_sp, module, symbols);
+ recognizer_sp, module, symbols, m_options.m_first_instruction_only);
}
#endif
diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp
index 4643ee30f0f9..8c24efaa08ee 100644
--- a/lldb/source/Commands/CommandObjectHelp.cpp
+++ b/lldb/source/Commands/CommandObjectHelp.cpp
@@ -51,8 +51,9 @@ CommandObjectHelp::CommandObjectHelp(CommandInterpreter &interpreter)
CommandArgumentEntry arg;
CommandArgumentData command_arg;
- // Define the first (and only) variant of this arg.
- command_arg.arg_type = eArgTypeCommandName;
+ // A list of command names forming a path to the command we want help on.
+ // No names is allowed - in which case we dump the top-level help.
+ command_arg.arg_type = eArgTypeCommand;
command_arg.arg_repetition = eArgRepeatStar;
// There is only one variant this argument could be; put it into the argument
@@ -85,8 +86,10 @@ bool CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) {
uint32_t cmd_types = CommandInterpreter::eCommandTypesBuiltin;
if (m_options.m_show_aliases)
cmd_types |= CommandInterpreter::eCommandTypesAliases;
- if (m_options.m_show_user_defined)
+ if (m_options.m_show_user_defined) {
cmd_types |= CommandInterpreter::eCommandTypesUserDef;
+ cmd_types |= CommandInterpreter::eCommandTypesUserMW;
+ }
if (m_options.m_show_hidden)
cmd_types |= CommandInterpreter::eCommandTypesHidden;
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index 5487d94c9019..f27d4bd7e4b2 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -754,7 +754,7 @@ protected:
if (outfile_spec) {
File::OpenOptions open_options =
- File::eOpenOptionWrite | File::eOpenOptionCanCreate;
+ File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate;
const bool append = m_outfile_options.GetAppend().GetCurrentValue();
open_options |=
append ? File::eOpenOptionAppend : File::eOpenOptionTruncate;
diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp
index 1dfb32a92f3b..840f81719d7d 100644
--- a/lldb/source/Commands/CommandObjectMemoryTag.cpp
+++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp
@@ -7,8 +7,11 @@
//===----------------------------------------------------------------------===//
#include "CommandObjectMemoryTag.h"
+#include "lldb/Host/OptionParser.h"
#include "lldb/Interpreter/CommandReturnObject.h"
#include "lldb/Interpreter/OptionArgParser.h"
+#include "lldb/Interpreter/OptionGroupFormat.h"
+#include "lldb/Interpreter/OptionValueString.h"
#include "lldb/Target/Process.h"
using namespace lldb;
@@ -21,7 +24,8 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed {
public:
CommandObjectMemoryTagRead(CommandInterpreter &interpreter)
: CommandObjectParsed(interpreter, "tag",
- "Read memory tags for the given range of memory.",
+ "Read memory tags for the given range of memory."
+ " Mismatched tags will be marked.",
nullptr,
eCommandRequiresTarget | eCommandRequiresProcess |
eCommandProcessMustBePaused) {
@@ -97,16 +101,17 @@ protected:
return false;
}
- result.AppendMessageWithFormatv("Logical tag: {0:x}",
- tag_manager->GetLogicalTag(start_addr));
+ lldb::addr_t logical_tag = tag_manager->GetLogicalTag(start_addr);
+ result.AppendMessageWithFormatv("Logical tag: {0:x}", logical_tag);
result.AppendMessage("Allocation tags:");
addr_t addr = tagged_range->GetRangeBase();
for (auto tag : *tags) {
addr_t next_addr = addr + tag_manager->GetGranuleSize();
// Showing tagged adresses here until we have non address bit handling
- result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}", addr, next_addr,
- tag);
+ result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}{3}", addr,
+ next_addr, tag,
+ logical_tag == tag ? "" : " (mismatch)");
addr = next_addr;
}
@@ -115,6 +120,168 @@ protected:
}
};
+#define LLDB_OPTIONS_memory_tag_write
+#include "CommandOptions.inc"
+
+class CommandObjectMemoryTagWrite : public CommandObjectParsed {
+public:
+ class OptionGroupTagWrite : public OptionGroup {
+ public:
+ OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {}
+
+ ~OptionGroupTagWrite() override = default;
+
+ llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+ return llvm::makeArrayRef(g_memory_tag_write_options);
+ }
+
+ Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_value,
+ ExecutionContext *execution_context) override {
+ Status status;
+ const int short_option =
+ g_memory_tag_write_options[option_idx].short_option;
+
+ switch (short_option) {
+ case 'e':
+ m_end_addr = OptionArgParser::ToAddress(execution_context, option_value,
+ LLDB_INVALID_ADDRESS, &status);
+ break;
+ default:
+ llvm_unreachable("Unimplemented option");
+ }
+
+ return status;
+ }
+
+ void OptionParsingStarting(ExecutionContext *execution_context) override {
+ m_end_addr = LLDB_INVALID_ADDRESS;
+ }
+
+ lldb::addr_t m_end_addr;
+ };
+
+ CommandObjectMemoryTagWrite(CommandInterpreter &interpreter)
+ : CommandObjectParsed(interpreter, "tag",
+ "Write memory tags starting from the granule that "
+ "contains the given address.",
+ nullptr,
+ eCommandRequiresTarget | eCommandRequiresProcess |
+ eCommandProcessMustBePaused),
+ m_option_group(), m_tag_write_options() {
+ // Address
+ m_arguments.push_back(
+ CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)});
+ // One or more tag values
+ m_arguments.push_back(CommandArgumentEntry{
+ CommandArgumentData(eArgTypeValue, eArgRepeatPlus)});
+
+ m_option_group.Append(&m_tag_write_options);
+ m_option_group.Finalize();
+ }
+
+ ~CommandObjectMemoryTagWrite() override = default;
+
+ Options *GetOptions() override { return &m_option_group; }
+
+protected:
+ bool DoExecute(Args &command, CommandReturnObject &result) override {
+ if (command.GetArgumentCount() < 2) {
+ result.AppendError("wrong number of arguments; expected "
+ "<address-expression> <tag> [<tag> [...]]");
+ return false;
+ }
+
+ Status error;
+ addr_t start_addr = OptionArgParser::ToAddress(
+ &m_exe_ctx, command[0].ref(), LLDB_INVALID_ADDRESS, &error);
+ if (start_addr == LLDB_INVALID_ADDRESS) {
+ result.AppendErrorWithFormatv("Invalid address expression, {0}",
+ error.AsCString());
+ return false;
+ }
+
+ command.Shift(); // shift off start address
+
+ std::vector<lldb::addr_t> tags;
+ for (auto &entry : command) {
+ lldb::addr_t tag_value;
+ // getAsInteger returns true on failure
+ if (entry.ref().getAsInteger(0, tag_value)) {
+ result.AppendErrorWithFormat(
+ "'%s' is not a valid unsigned decimal string value.\n",
+ entry.c_str());
+ return false;
+ }
+ tags.push_back(tag_value);
+ }
+
+ Process *process = m_exe_ctx.GetProcessPtr();
+ llvm::Expected<const MemoryTagManager *> tag_manager_or_err =
+ process->GetMemoryTagManager();
+
+ if (!tag_manager_or_err) {
+ result.SetError(Status(tag_manager_or_err.takeError()));
+ return false;
+ }
+
+ const MemoryTagManager *tag_manager = *tag_manager_or_err;
+
+ MemoryRegionInfos memory_regions;
+ // If this fails the list of regions is cleared, so we don't need to read
+ // the return status here.
+ process->GetMemoryRegions(memory_regions);
+
+ // We have to assume start_addr is not granule aligned.
+ // So if we simply made a range:
+ // (start_addr, start_addr + (N * granule_size))
+ // We would end up with a range that isn't N granules but N+1
+ // granules. To avoid this we'll align the start first using the method that
+ // doesn't check memory attributes. (if the final range is untagged we'll
+ // handle that error later)
+ lldb::addr_t aligned_start_addr =
+ tag_manager->ExpandToGranule(MemoryTagManager::TagRange(start_addr, 1))
+ .GetRangeBase();
+
+ lldb::addr_t end_addr = 0;
+ // When you have an end address you want to align the range like tag read
+ // does. Meaning, align the start down (which we've done) and align the end
+ // up.
+ if (m_tag_write_options.m_end_addr != LLDB_INVALID_ADDRESS)
+ end_addr = m_tag_write_options.m_end_addr;
+ else
+ // Without an end address assume number of tags matches number of granules
+ // to write to
+ end_addr =
+ aligned_start_addr + (tags.size() * tag_manager->GetGranuleSize());
+
+ // Now we've aligned the start address so if we ask for another range
+ // using the number of tags N, we'll get back a range that is also N
+ // granules in size.
+ llvm::Expected<MemoryTagManager::TagRange> tagged_range =
+ tag_manager->MakeTaggedRange(aligned_start_addr, end_addr,
+ memory_regions);
+
+ if (!tagged_range) {
+ result.SetError(Status(tagged_range.takeError()));
+ return false;
+ }
+
+ Status status = process->WriteMemoryTags(tagged_range->GetRangeBase(),
+ tagged_range->GetByteSize(), tags);
+
+ if (status.Fail()) {
+ result.SetError(status);
+ return false;
+ }
+
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ return true;
+ }
+
+ OptionGroupOptions m_option_group;
+ OptionGroupTagWrite m_tag_write_options;
+};
+
CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter)
: CommandObjectMultiword(
interpreter, "tag", "Commands for manipulating memory tags",
@@ -123,6 +290,11 @@ CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter)
new CommandObjectMemoryTagRead(interpreter));
read_command_object->SetCommandName("memory tag read");
LoadSubCommand("read", read_command_object);
+
+ CommandObjectSP write_command_object(
+ new CommandObjectMemoryTagWrite(interpreter));
+ write_command_object->SetCommandName("memory tag write");
+ LoadSubCommand("write", write_command_object);
}
CommandObjectMemoryTag::~CommandObjectMemoryTag() = default;
diff --git a/lldb/source/Commands/CommandObjectMultiword.cpp b/lldb/source/Commands/CommandObjectMultiword.cpp
index a523fd0b1560..e800bcc12bd3 100644
--- a/lldb/source/Commands/CommandObjectMultiword.cpp
+++ b/lldb/source/Commands/CommandObjectMultiword.cpp
@@ -26,36 +26,48 @@ CommandObjectMultiword::CommandObjectMultiword(CommandInterpreter &interpreter,
CommandObjectMultiword::~CommandObjectMultiword() = default;
+CommandObjectSP
+CommandObjectMultiword::GetSubcommandSPExact(llvm::StringRef sub_cmd) {
+ if (m_subcommand_dict.empty())
+ return {};
+
+ auto pos = m_subcommand_dict.find(std::string(sub_cmd));
+ if (pos == m_subcommand_dict.end())
+ return {};
+
+ return pos->second;
+}
+
CommandObjectSP CommandObjectMultiword::GetSubcommandSP(llvm::StringRef sub_cmd,
StringList *matches) {
- CommandObjectSP return_cmd_sp;
+ if (m_subcommand_dict.empty())
+ return {};
+
+ CommandObjectSP return_cmd_sp = GetSubcommandSPExact(sub_cmd);
+ if (return_cmd_sp) {
+ if (matches)
+ matches->AppendString(sub_cmd);
+ return return_cmd_sp;
+ }
+
CommandObject::CommandMap::iterator pos;
- if (!m_subcommand_dict.empty()) {
+ StringList local_matches;
+ if (matches == nullptr)
+ matches = &local_matches;
+ int num_matches =
+ AddNamesMatchingPartialString(m_subcommand_dict, sub_cmd, *matches);
+
+ if (num_matches == 1) {
+ // Cleaner, but slightly less efficient would be to call back into this
+ // function, since I now know I have an exact match...
+
+ sub_cmd = matches->GetStringAtIndex(0);
pos = m_subcommand_dict.find(std::string(sub_cmd));
- if (pos != m_subcommand_dict.end()) {
- // An exact match; append the sub_cmd to the 'matches' string list.
- if (matches)
- matches->AppendString(sub_cmd);
+ if (pos != m_subcommand_dict.end())
return_cmd_sp = pos->second;
- } else {
- StringList local_matches;
- if (matches == nullptr)
- matches = &local_matches;
- int num_matches =
- AddNamesMatchingPartialString(m_subcommand_dict, sub_cmd, *matches);
-
- if (num_matches == 1) {
- // Cleaner, but slightly less efficient would be to call back into this
- // function, since I now know I have an exact match...
-
- sub_cmd = matches->GetStringAtIndex(0);
- pos = m_subcommand_dict.find(std::string(sub_cmd));
- if (pos != m_subcommand_dict.end())
- return_cmd_sp = pos->second;
- }
- }
}
+
return return_cmd_sp;
}
@@ -66,9 +78,9 @@ CommandObjectMultiword::GetSubcommandObject(llvm::StringRef sub_cmd,
}
bool CommandObjectMultiword::LoadSubCommand(llvm::StringRef name,
- const CommandObjectSP &cmd_obj) {
- if (cmd_obj)
- assert((&GetCommandInterpreter() == &cmd_obj->GetCommandInterpreter()) &&
+ const CommandObjectSP &cmd_obj_sp) {
+ if (cmd_obj_sp)
+ lldbassert((&GetCommandInterpreter() == &cmd_obj_sp->GetCommandInterpreter()) &&
"tried to add a CommandObject from a different interpreter");
CommandMap::iterator pos;
@@ -76,13 +88,76 @@ bool CommandObjectMultiword::LoadSubCommand(llvm::StringRef name,
pos = m_subcommand_dict.find(std::string(name));
if (pos == m_subcommand_dict.end()) {
- m_subcommand_dict[std::string(name)] = cmd_obj;
+ m_subcommand_dict[std::string(name)] = cmd_obj_sp;
} else
success = false;
return success;
}
+llvm::Error CommandObjectMultiword::LoadUserSubcommand(
+ llvm::StringRef name, const CommandObjectSP &cmd_obj_sp, bool can_replace) {
+ Status result;
+ if (cmd_obj_sp)
+ lldbassert((&GetCommandInterpreter() == &cmd_obj_sp->GetCommandInterpreter()) &&
+ "tried to add a CommandObject from a different interpreter");
+ if (!IsUserCommand()) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "can't add a user subcommand to a builtin container command.");
+ }
+ // Make sure this a user command if it isn't already:
+ cmd_obj_sp->SetIsUserCommand(true);
+
+ std::string str_name(name);
+
+ auto pos = m_subcommand_dict.find(str_name);
+ if (pos == m_subcommand_dict.end()) {
+ m_subcommand_dict[str_name] = cmd_obj_sp;
+ return llvm::Error::success();
+ }
+
+ const char *error_str = nullptr;
+ if (!can_replace)
+ error_str = "sub-command already exists";
+ if (!(*pos).second->IsUserCommand())
+ error_str = "can't replace a builtin subcommand";
+
+ if (error_str) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(), error_str);
+ }
+ m_subcommand_dict[str_name] = cmd_obj_sp;
+ return llvm::Error::success();
+}
+
+llvm::Error CommandObjectMultiword::RemoveUserSubcommand(llvm::StringRef cmd_name,
+ bool must_be_multiword) {
+ CommandMap::iterator pos;
+ std::string str_name(cmd_name);
+
+ pos = m_subcommand_dict.find(str_name);
+ if (pos == m_subcommand_dict.end()) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),"subcommand '%s' not found.",
+ str_name.c_str());
+ }
+ if (!(*pos).second->IsUserCommand()) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),"subcommand '%s' not a user command.",
+ str_name.c_str());
+ }
+
+ if (must_be_multiword && !(*pos).second->IsMultiwordObject()) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),"subcommand '%s' is not a container command",
+ str_name.c_str());
+ }
+ if (!must_be_multiword && (*pos).second->IsMultiwordObject()) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),"subcommand '%s' is not a user command",
+ str_name.c_str());
+ }
+
+ m_subcommand_dict.erase(pos);
+
+ return llvm::Error::success();
+}
+
bool CommandObjectMultiword::Execute(const char *args_string,
CommandReturnObject &result) {
Args args(args_string);
diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp
index bf23c4552aa8..10dd87824911 100644
--- a/lldb/source/Commands/CommandObjectPlatform.cpp
+++ b/lldb/source/Commands/CommandObjectPlatform.cpp
@@ -211,20 +211,18 @@ protected:
ostrm.Printf("Available platforms:\n");
PlatformSP host_platform_sp(Platform::GetHostPlatform());
- ostrm.Printf("%s: %s\n", host_platform_sp->GetPluginName().GetCString(),
+ ostrm.Format("{0}: {1}\n", host_platform_sp->GetPluginName(),
host_platform_sp->GetDescription());
uint32_t idx;
for (idx = 0; true; ++idx) {
- const char *plugin_name =
+ llvm::StringRef plugin_name =
PluginManager::GetPlatformPluginNameAtIndex(idx);
- if (plugin_name == nullptr)
+ if (plugin_name.empty())
break;
- const char *plugin_desc =
+ llvm::StringRef plugin_desc =
PluginManager::GetPlatformPluginDescriptionAtIndex(idx);
- if (plugin_desc == nullptr)
- break;
- ostrm.Printf("%s: %s\n", plugin_name, plugin_desc);
+ ostrm.Format("{0}: {1}\n", plugin_name, plugin_desc);
}
if (idx == 0) {
@@ -346,8 +344,8 @@ protected:
if (error.Success()) {
Stream &ostrm = result.GetOutputStream();
if (hostname.empty())
- ostrm.Printf("Disconnected from \"%s\"\n",
- platform_sp->GetPluginName().GetCString());
+ ostrm.Format("Disconnected from \"{0}\"\n",
+ platform_sp->GetPluginName());
else
ostrm.Printf("Disconnected from \"%s\"\n", hostname.c_str());
result.SetStatus(eReturnStatusSuccessFinishResult);
@@ -356,9 +354,8 @@ protected:
}
} else {
// Not connected...
- result.AppendErrorWithFormat(
- "not connected to '%s'",
- platform_sp->GetPluginName().GetCString());
+ result.AppendErrorWithFormatv("not connected to '{0}'",
+ platform_sp->GetPluginName());
}
} else {
// Bad args
@@ -498,8 +495,7 @@ public:
lldb::eFilePermissionsWorldRead;
lldb::user_id_t fd = platform_sp->OpenFile(
FileSpec(cmd_line),
- File::eOpenOptionRead | File::eOpenOptionWrite |
- File::eOpenOptionAppend | File::eOpenOptionCanCreate,
+ File::eOpenOptionReadWrite | File::eOpenOptionCanCreate,
perms, error);
if (error.Success()) {
result.AppendMessageWithFormat("File Descriptor = %" PRIu64 "\n", fd);
@@ -589,11 +585,15 @@ public:
}
std::string buffer(m_options.m_count, 0);
Status error;
- uint32_t retcode = platform_sp->ReadFile(
+ uint64_t retcode = platform_sp->ReadFile(
fd, m_options.m_offset, &buffer[0], m_options.m_count, error);
- result.AppendMessageWithFormat("Return = %d\n", retcode);
- result.AppendMessageWithFormat("Data = \"%s\"\n", buffer.c_str());
- result.SetStatus(eReturnStatusSuccessFinishResult);
+ if (retcode != UINT64_MAX) {
+ result.AppendMessageWithFormat("Return = %" PRIu64 "\n", retcode);
+ result.AppendMessageWithFormat("Data = \"%s\"\n", buffer.c_str());
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ } else {
+ result.AppendError(error.AsCString());
+ }
} else {
result.AppendError("no platform currently selected\n");
}
@@ -678,11 +678,15 @@ public:
cmd_line);
return result.Succeeded();
}
- uint32_t retcode =
+ uint64_t retcode =
platform_sp->WriteFile(fd, m_options.m_offset, &m_options.m_data[0],
m_options.m_data.size(), error);
- result.AppendMessageWithFormat("Return = %d\n", retcode);
- result.SetStatus(eReturnStatusSuccessFinishResult);
+ if (retcode != UINT64_MAX) {
+ result.AppendMessageWithFormat("Return = %" PRIu64 "\n", retcode);
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ } else {
+ result.AppendError(error.AsCString());
+ }
} else {
result.AppendError("no platform currently selected\n");
}
@@ -919,13 +923,159 @@ public:
}
};
+// "platform get-permissions remote-file-path"
+class CommandObjectPlatformGetPermissions : public CommandObjectParsed {
+public:
+ CommandObjectPlatformGetPermissions(CommandInterpreter &interpreter)
+ : CommandObjectParsed(interpreter, "platform get-permissions",
+ "Get the file permission bits from the remote end.",
+ "platform get-permissions <remote-file-spec>", 0) {
+ SetHelpLong(
+ R"(Examples:
+
+(lldb) platform get-permissions /the/remote/file/path
+
+ Get the file permissions from the remote end with path /the/remote/file/path.)");
+
+ CommandArgumentEntry arg1;
+ CommandArgumentData file_arg_remote;
+
+ // Define the first (and only) variant of this arg.
+ file_arg_remote.arg_type = eArgTypeFilename;
+ file_arg_remote.arg_repetition = eArgRepeatPlain;
+ // There is only one variant this argument could be; put it into the
+ // argument entry.
+ arg1.push_back(file_arg_remote);
+
+ // Push the data for the first argument into the m_arguments vector.
+ m_arguments.push_back(arg1);
+ }
+
+ ~CommandObjectPlatformGetPermissions() override = default;
+
+ void
+ HandleArgumentCompletion(CompletionRequest &request,
+ OptionElementVector &opt_element_vector) override {
+ if (request.GetCursorIndex() != 0)
+ return;
+
+ CommandCompletions::InvokeCommonCompletionCallbacks(
+ GetCommandInterpreter(), CommandCompletions::eRemoteDiskFileCompletion,
+ request, nullptr);
+ }
+
+ bool DoExecute(Args &args, CommandReturnObject &result) override {
+ // If the number of arguments is incorrect, issue an error message.
+ if (args.GetArgumentCount() != 1) {
+ result.AppendError("required argument missing; specify the source file "
+ "path as the only argument");
+ return false;
+ }
+
+ PlatformSP platform_sp(
+ GetDebugger().GetPlatformList().GetSelectedPlatform());
+ if (platform_sp) {
+ std::string remote_file_path(args.GetArgumentAtIndex(0));
+ uint32_t permissions;
+ Status error = platform_sp->GetFilePermissions(FileSpec(remote_file_path),
+ permissions);
+ if (error.Success()) {
+ result.AppendMessageWithFormat(
+ "File permissions of %s (remote): 0o%04" PRIo32 "\n",
+ remote_file_path.c_str(), permissions);
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ } else
+ result.AppendError(error.AsCString());
+ } else {
+ result.AppendError("no platform currently selected\n");
+ }
+ return result.Succeeded();
+ }
+};
+
+// "platform file-exists remote-file-path"
+class CommandObjectPlatformFileExists : public CommandObjectParsed {
+public:
+ CommandObjectPlatformFileExists(CommandInterpreter &interpreter)
+ : CommandObjectParsed(interpreter, "platform file-exists",
+ "Check if the file exists on the remote end.",
+ "platform file-exists <remote-file-spec>", 0) {
+ SetHelpLong(
+ R"(Examples:
+
+(lldb) platform file-exists /the/remote/file/path
+
+ Check if /the/remote/file/path exists on the remote end.)");
+
+ CommandArgumentEntry arg1;
+ CommandArgumentData file_arg_remote;
+
+ // Define the first (and only) variant of this arg.
+ file_arg_remote.arg_type = eArgTypeFilename;
+ file_arg_remote.arg_repetition = eArgRepeatPlain;
+ // There is only one variant this argument could be; put it into the
+ // argument entry.
+ arg1.push_back(file_arg_remote);
+
+ // Push the data for the first argument into the m_arguments vector.
+ m_arguments.push_back(arg1);
+ }
+
+ ~CommandObjectPlatformFileExists() override = default;
+
+ void
+ HandleArgumentCompletion(CompletionRequest &request,
+ OptionElementVector &opt_element_vector) override {
+ if (request.GetCursorIndex() != 0)
+ return;
+
+ CommandCompletions::InvokeCommonCompletionCallbacks(
+ GetCommandInterpreter(), CommandCompletions::eRemoteDiskFileCompletion,
+ request, nullptr);
+ }
+
+ bool DoExecute(Args &args, CommandReturnObject &result) override {
+ // If the number of arguments is incorrect, issue an error message.
+ if (args.GetArgumentCount() != 1) {
+ result.AppendError("required argument missing; specify the source file "
+ "path as the only argument");
+ return false;
+ }
+
+ PlatformSP platform_sp(
+ GetDebugger().GetPlatformList().GetSelectedPlatform());
+ if (platform_sp) {
+ std::string remote_file_path(args.GetArgumentAtIndex(0));
+ bool exists = platform_sp->GetFileExists(FileSpec(remote_file_path));
+ result.AppendMessageWithFormat(
+ "File %s (remote) %s\n",
+ remote_file_path.c_str(), exists ? "exists" : "does not exist");
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ } else {
+ result.AppendError("no platform currently selected\n");
+ }
+ return result.Succeeded();
+ }
+};
+
// "platform put-file"
class CommandObjectPlatformPutFile : public CommandObjectParsed {
public:
CommandObjectPlatformPutFile(CommandInterpreter &interpreter)
: CommandObjectParsed(
interpreter, "platform put-file",
- "Transfer a file from this system to the remote end.", nullptr, 0) {
+ "Transfer a file from this system to the remote end.",
+ "platform put-file <source> [<destination>]", 0) {
+ SetHelpLong(
+ R"(Examples:
+
+(lldb) platform put-file /source/foo.txt /destination/bar.txt
+
+(lldb) platform put-file /source/foo.txt
+
+ Relative source file paths are resolved against lldb's local working directory.
+
+ Omitting the destination places the file in the platform working directory.)");
}
~CommandObjectPlatformPutFile() override = default;
@@ -1029,7 +1179,7 @@ protected:
target->GetRunArguments(m_options.launch_info.GetArguments());
ProcessSP process_sp(platform_sp->DebugProcess(
- m_options.launch_info, debugger, target, error));
+ m_options.launch_info, debugger, *target, error));
if (process_sp && process_sp->IsAlive()) {
result.SetStatus(eReturnStatusSuccessFinishNoResult);
return true;
@@ -1136,15 +1286,14 @@ protected:
if (matches == 0) {
if (match_desc)
- result.AppendErrorWithFormat(
- "no processes were found that %s \"%s\" on the \"%s\" "
+ result.AppendErrorWithFormatv(
+ "no processes were found that {0} \"{1}\" on the \"{2}\" "
"platform\n",
- match_desc, match_name,
- platform_sp->GetPluginName().GetCString());
+ match_desc, match_name, platform_sp->GetPluginName());
else
- result.AppendErrorWithFormat(
- "no processes were found on the \"%s\" platform\n",
- platform_sp->GetPluginName().GetCString());
+ result.AppendErrorWithFormatv(
+ "no processes were found on the \"{0}\" platform\n",
+ platform_sp->GetPluginName());
} else {
result.AppendMessageWithFormat(
"%u matching process%s found on \"%s\"", matches,
@@ -1390,9 +1539,8 @@ protected:
}
} else {
// Not connected...
- result.AppendErrorWithFormat(
- "not connected to '%s'",
- platform_sp->GetPluginName().GetCString());
+ result.AppendErrorWithFormatv("not connected to '{0}'",
+ platform_sp->GetPluginName());
}
} else {
// No args
@@ -1752,8 +1900,12 @@ CommandObjectPlatform::CommandObjectPlatform(CommandInterpreter &interpreter)
CommandObjectSP(new CommandObjectPlatformMkDir(interpreter)));
LoadSubCommand("file",
CommandObjectSP(new CommandObjectPlatformFile(interpreter)));
+ LoadSubCommand("file-exists",
+ CommandObjectSP(new CommandObjectPlatformFileExists(interpreter)));
LoadSubCommand("get-file", CommandObjectSP(new CommandObjectPlatformGetFile(
interpreter)));
+ LoadSubCommand("get-permissions",
+ CommandObjectSP(new CommandObjectPlatformGetPermissions(interpreter)));
LoadSubCommand("get-size", CommandObjectSP(new CommandObjectPlatformGetSize(
interpreter)));
LoadSubCommand("put-file", CommandObjectSP(new CommandObjectPlatformPutFile(
diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp
index 7aaba3731500..5fd1718e8484 100644
--- a/lldb/source/Commands/CommandObjectProcess.cpp
+++ b/lldb/source/Commands/CommandObjectProcess.cpp
@@ -159,7 +159,12 @@ protected:
// If our listener is nullptr, users aren't allows to launch
ModuleSP exe_module_sp = target->GetExecutableModule();
- if (exe_module_sp == nullptr) {
+ // If the target already has an executable module, then use that. If it
+ // doesn't then someone must be trying to launch using a path that will
+ // make sense to the remote stub, but doesn't exist on the local host.
+ // In that case use the ExecutableFile that was set in the target's
+ // ProcessLaunchInfo.
+ if (exe_module_sp == nullptr && !target->GetProcessLaunchInfo().GetExecutableFile()) {
result.AppendError("no file in target, create a debug target using the "
"'target create' command");
return false;
@@ -170,8 +175,6 @@ protected:
if (!StopProcessIfNecessary(m_exe_ctx.GetProcessPtr(), state, result))
return false;
- llvm::StringRef target_settings_argv0 = target->GetArg0();
-
// Determine whether we will disable ASLR or leave it in the default state
// (i.e. enabled if the platform supports it). First check if the process
// launch options explicitly turn on/off
@@ -216,14 +219,22 @@ protected:
m_options.launch_info.GetEnvironment().insert(target_env.begin(),
target_env.end());
+ llvm::StringRef target_settings_argv0 = target->GetArg0();
+
if (!target_settings_argv0.empty()) {
m_options.launch_info.GetArguments().AppendArgument(
target_settings_argv0);
- m_options.launch_info.SetExecutableFile(
- exe_module_sp->GetPlatformFileSpec(), false);
+ if (exe_module_sp)
+ m_options.launch_info.SetExecutableFile(
+ exe_module_sp->GetPlatformFileSpec(), false);
+ else
+ m_options.launch_info.SetExecutableFile(target->GetProcessLaunchInfo().GetExecutableFile(), false);
} else {
- m_options.launch_info.SetExecutableFile(
- exe_module_sp->GetPlatformFileSpec(), true);
+ if (exe_module_sp)
+ m_options.launch_info.SetExecutableFile(
+ exe_module_sp->GetPlatformFileSpec(), true);
+ else
+ m_options.launch_info.SetExecutableFile(target->GetProcessLaunchInfo().GetExecutableFile(), true);
}
if (launch_args.GetArgumentCount() == 0) {
@@ -250,11 +261,20 @@ protected:
llvm::StringRef data = stream.GetString();
if (!data.empty())
result.AppendMessage(data);
- const char *archname =
- exe_module_sp->GetArchitecture().GetArchitectureName();
- result.AppendMessageWithFormat(
- "Process %" PRIu64 " launched: '%s' (%s)\n", process_sp->GetID(),
- exe_module_sp->GetFileSpec().GetPath().c_str(), archname);
+ // If we didn't have a local executable, then we wouldn't have had an
+ // executable module before launch.
+ if (!exe_module_sp)
+ exe_module_sp = target->GetExecutableModule();
+ if (!exe_module_sp) {
+ result.AppendWarning("Could not get executable module after launch.");
+ } else {
+
+ const char *archname =
+ exe_module_sp->GetArchitecture().GetArchitectureName();
+ result.AppendMessageWithFormat(
+ "Process %" PRIu64 " launched: '%s' (%s)\n", process_sp->GetID(),
+ exe_module_sp->GetFileSpec().GetPath().c_str(), archname);
+ }
result.SetStatus(eReturnStatusSuccessFinishResult);
result.SetDidChangeProcessState(true);
} else {
@@ -398,9 +418,10 @@ protected:
}
StreamString stream;
+ ProcessSP process_sp;
const auto error = target->Attach(m_options.attach_info, &stream);
if (error.Success()) {
- ProcessSP process_sp(target->GetProcessSP());
+ process_sp = target->GetProcessSP();
if (process_sp) {
result.AppendMessage(stream.GetString());
result.SetStatus(eReturnStatusSuccessFinishNoResult);
@@ -452,8 +473,13 @@ protected:
// This supports the use-case scenario of immediately continuing the
// process once attached.
- if (m_options.attach_info.GetContinueOnceAttached())
- m_interpreter.HandleCommand("process continue", eLazyBoolNo, result);
+ if (m_options.attach_info.GetContinueOnceAttached()) {
+ // We have made a process but haven't told the interpreter about it yet,
+ // so CheckRequirements will fail for "process continue". Set the override
+ // here:
+ ExecutionContext exe_ctx(process_sp);
+ m_interpreter.HandleCommand("process continue", eLazyBoolNo, exe_ctx, result);
+ }
return result.Succeeded();
}
@@ -1166,7 +1192,9 @@ protected:
static constexpr OptionEnumValueElement g_corefile_save_style[] = {
{eSaveCoreFull, "full", "Create a core file with all memory saved"},
{eSaveCoreDirtyOnly, "modified-memory",
- "Create a corefile with only modified memory saved"}};
+ "Create a corefile with only modified memory saved"},
+ {eSaveCoreStackOnly, "stack",
+ "Create a corefile with only stack memory saved"}};
static constexpr OptionEnumValues SaveCoreStyles() {
return OptionEnumValues(g_corefile_save_style);
@@ -1178,12 +1206,13 @@ static constexpr OptionEnumValues SaveCoreStyles() {
class CommandObjectProcessSaveCore : public CommandObjectParsed {
public:
CommandObjectProcessSaveCore(CommandInterpreter &interpreter)
- : CommandObjectParsed(interpreter, "process save-core",
- "Save the current process as a core file using an "
- "appropriate file type.",
- "process save-core [-s corefile-style] FILE",
- eCommandRequiresProcess | eCommandTryTargetAPILock |
- eCommandProcessMustBeLaunched) {}
+ : CommandObjectParsed(
+ interpreter, "process save-core",
+ "Save the current process as a core file using an "
+ "appropriate file type.",
+ "process save-core [-s corefile-style -p plugin-name] FILE",
+ eCommandRequiresProcess | eCommandTryTargetAPILock |
+ eCommandProcessMustBeLaunched) {}
~CommandObjectProcessSaveCore() override = default;
@@ -1206,6 +1235,9 @@ public:
Status error;
switch (short_option) {
+ case 'p':
+ m_requested_plugin_name = option_arg.str();
+ break;
case 's':
m_requested_save_core_style =
(lldb::SaveCoreStyle)OptionArgParser::ToOptionEnum(
@@ -1221,10 +1253,12 @@ public:
void OptionParsingStarting(ExecutionContext *execution_context) override {
m_requested_save_core_style = eSaveCoreUnspecified;
+ m_requested_plugin_name.clear();
}
// Instance variables to hold the values for command options.
SaveCoreStyle m_requested_save_core_style;
+ std::string m_requested_plugin_name;
};
protected:
@@ -1235,13 +1269,15 @@ protected:
FileSpec output_file(command.GetArgumentAtIndex(0));
SaveCoreStyle corefile_style = m_options.m_requested_save_core_style;
Status error =
- PluginManager::SaveCore(process_sp, output_file, corefile_style);
+ PluginManager::SaveCore(process_sp, output_file, corefile_style,
+ m_options.m_requested_plugin_name);
if (error.Success()) {
- if (corefile_style == SaveCoreStyle::eSaveCoreDirtyOnly) {
+ if (corefile_style == SaveCoreStyle::eSaveCoreDirtyOnly ||
+ corefile_style == SaveCoreStyle::eSaveCoreStackOnly) {
result.AppendMessageWithFormat(
- "\nModified-memory only corefile "
- "created. This corefile may not show \n"
- "library/framework/app binaries "
+ "\nModified-memory or stack-memory only corefile "
+ "created. This corefile may \n"
+ "not show library/framework/app binaries "
"on a different system, or when \n"
"those binaries have "
"been updated/modified. Copies are not included\n"
@@ -1641,6 +1677,80 @@ protected:
}
};
+// CommandObjectProcessTraceSave
+#define LLDB_OPTIONS_process_trace_save
+#include "CommandOptions.inc"
+
+#pragma mark CommandObjectProcessTraceSave
+
+class CommandObjectProcessTraceSave : public CommandObjectParsed {
+public:
+ class CommandOptions : public Options {
+ public:
+ CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+
+ Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
+ ExecutionContext *execution_context) override {
+ Status error;
+ const int short_option = m_getopt_table[option_idx].val;
+
+ switch (short_option) {
+
+ case 'd': {
+ m_directory.SetFile(option_arg, FileSpec::Style::native);
+ FileSystem::Instance().Resolve(m_directory);
+ break;
+ }
+ default:
+ llvm_unreachable("Unimplemented option");
+ }
+ return error;
+ }
+
+ void OptionParsingStarting(ExecutionContext *execution_context) override{};
+
+ llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+ return llvm::makeArrayRef(g_process_trace_save_options);
+ };
+
+ FileSpec m_directory;
+ };
+
+ Options *GetOptions() override { return &m_options; }
+ CommandObjectProcessTraceSave(CommandInterpreter &interpreter)
+ : CommandObjectParsed(
+ interpreter, "process trace save",
+ "Save the trace of the current process in the specified directory. "
+ "The directory will be created if needed. "
+ "This will also create a file <directory>/trace.json with the main "
+ "properties of the trace session, along with others files which "
+ "contain the actual trace data. The trace.json file can be used "
+ "later as input for the \"trace load\" command to load the trace "
+ "in LLDB",
+ "process trace save [<cmd-options>]",
+ eCommandRequiresProcess | eCommandTryTargetAPILock |
+ eCommandProcessMustBeLaunched | eCommandProcessMustBePaused |
+ eCommandProcessMustBeTraced) {}
+
+ ~CommandObjectProcessTraceSave() override = default;
+
+protected:
+ bool DoExecute(Args &command, CommandReturnObject &result) override {
+ ProcessSP process_sp = m_exe_ctx.GetProcessSP();
+
+ TraceSP trace_sp = process_sp->GetTarget().GetTrace();
+
+ if (llvm::Error err = trace_sp->SaveLiveTraceToDisk(m_options.m_directory))
+ result.AppendError(toString(std::move(err)));
+ else
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+
+ return result.Succeeded();
+ }
+
+ CommandOptions m_options;
+};
+
// CommandObjectProcessTraceStop
class CommandObjectProcessTraceStop : public CommandObjectParsed {
public:
@@ -1678,6 +1788,8 @@ public:
: CommandObjectMultiword(
interpreter, "trace", "Commands for tracing the current process.",
"process trace <subcommand> [<subcommand objects>]") {
+ LoadSubCommand("save", CommandObjectSP(
+ new CommandObjectProcessTraceSave(interpreter)));
LoadSubCommand("start", CommandObjectSP(new CommandObjectProcessTraceStart(
interpreter)));
LoadSubCommand("stop", CommandObjectSP(
diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp
index cd79680e31f7..13ff27c78dea 100644
--- a/lldb/source/Commands/CommandObjectSettings.cpp
+++ b/lldb/source/Commands/CommandObjectSettings.cpp
@@ -369,7 +369,7 @@ protected:
FileSpec file_spec(m_options.m_filename);
FileSystem::Instance().Resolve(file_spec);
std::string path(file_spec.GetPath());
- auto options = File::eOpenOptionWrite | File::eOpenOptionCanCreate;
+ auto options = File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate;
if (m_options.m_append)
options |= File::eOpenOptionAppend;
else
diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp
index 7a0338e35bc7..fb33f41b8ef9 100644
--- a/lldb/source/Commands/CommandObjectSource.cpp
+++ b/lldb/source/Commands/CommandObjectSource.cpp
@@ -374,13 +374,16 @@ protected:
Target *target = m_exe_ctx.GetTargetPtr();
uint32_t addr_byte_size = target->GetArchitecture().GetAddressByteSize();
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = false;
+ function_options.include_inlines = true;
+
// Note: module_list can't be const& because FindFunctionSymbols isn't
// const.
ModuleList module_list =
(m_module_list.GetSize() > 0) ? m_module_list : target->GetImages();
- module_list.FindFunctions(name, eFunctionNameTypeAuto,
- /*include_symbols=*/false,
- /*include_inlines=*/true, sc_list_funcs);
+ module_list.FindFunctions(name, eFunctionNameTypeAuto, function_options,
+ sc_list_funcs);
size_t num_matches = sc_list_funcs.GetSize();
if (!num_matches) {
@@ -874,12 +877,13 @@ protected:
void FindMatchingFunctions(Target *target, ConstString name,
SymbolContextList &sc_list) {
// Displaying the source for a symbol:
- bool include_inlines = true;
- bool include_symbols = false;
-
if (m_options.num_lines == 0)
m_options.num_lines = 10;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = false;
+
const size_t num_modules = m_options.modules.size();
if (num_modules > 0) {
ModuleList matching_modules;
@@ -889,15 +893,14 @@ protected:
ModuleSpec module_spec(module_file_spec);
matching_modules.Clear();
target->GetImages().FindModules(module_spec, matching_modules);
+
matching_modules.FindFunctions(name, eFunctionNameTypeAuto,
- include_symbols, include_inlines,
- sc_list);
+ function_options, sc_list);
}
}
} else {
target->GetImages().FindFunctions(name, eFunctionNameTypeAuto,
- include_symbols, include_inlines,
- sc_list);
+ function_options, sc_list);
}
}
diff --git a/lldb/source/Commands/CommandObjectStats.cpp b/lldb/source/Commands/CommandObjectStats.cpp
index 23c7dbbaf373..f32d559ca039 100644
--- a/lldb/source/Commands/CommandObjectStats.cpp
+++ b/lldb/source/Commands/CommandObjectStats.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "CommandObjectStats.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Host/OptionParser.h"
#include "lldb/Interpreter/CommandReturnObject.h"
#include "lldb/Target/Target.h"
@@ -24,14 +26,12 @@ public:
protected:
bool DoExecute(Args &command, CommandReturnObject &result) override {
- Target &target = GetSelectedOrDummyTarget();
-
- if (target.GetCollectingStats()) {
+ if (DebuggerStats::GetCollectingStats()) {
result.AppendError("statistics already enabled");
return false;
}
- target.SetCollectingStats(true);
+ DebuggerStats::SetCollectingStats(true);
result.SetStatus(eReturnStatusSuccessFinishResult);
return true;
}
@@ -48,44 +48,74 @@ public:
protected:
bool DoExecute(Args &command, CommandReturnObject &result) override {
- Target &target = GetSelectedOrDummyTarget();
-
- if (!target.GetCollectingStats()) {
+ if (!DebuggerStats::GetCollectingStats()) {
result.AppendError("need to enable statistics before disabling them");
return false;
}
- target.SetCollectingStats(false);
+ DebuggerStats::SetCollectingStats(false);
result.SetStatus(eReturnStatusSuccessFinishResult);
return true;
}
};
+#define LLDB_OPTIONS_statistics_dump
+#include "CommandOptions.inc"
+
class CommandObjectStatsDump : public CommandObjectParsed {
+ class CommandOptions : public Options {
+ public:
+ CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+
+ Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
+ ExecutionContext *execution_context) override {
+ Status error;
+ const int short_option = m_getopt_table[option_idx].val;
+
+ switch (short_option) {
+ case 'a':
+ m_all_targets = true;
+ break;
+ default:
+ llvm_unreachable("Unimplemented option");
+ }
+ return error;
+ }
+
+ void OptionParsingStarting(ExecutionContext *execution_context) override {
+ m_all_targets = false;
+ }
+
+ llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+ return llvm::makeArrayRef(g_statistics_dump_options);
+ }
+
+ bool m_all_targets = false;
+ };
+
public:
CommandObjectStatsDump(CommandInterpreter &interpreter)
- : CommandObjectParsed(interpreter, "dump", "Dump statistics results",
- nullptr, eCommandProcessMustBePaused) {}
+ : CommandObjectParsed(
+ interpreter, "statistics dump", "Dump metrics in JSON format",
+ "statistics dump [<options>]", eCommandRequiresTarget) {}
~CommandObjectStatsDump() override = default;
+ Options *GetOptions() override { return &m_options; }
+
protected:
bool DoExecute(Args &command, CommandReturnObject &result) override {
- Target &target = GetSelectedOrDummyTarget();
-
- uint32_t i = 0;
- for (auto &stat : target.GetStatistics()) {
- result.AppendMessageWithFormat(
- "%s : %u\n",
- lldb_private::GetStatDescription(
- static_cast<lldb_private::StatisticKind>(i))
- .c_str(),
- stat);
- i += 1;
- }
+ Target *target = nullptr;
+ if (!m_options.m_all_targets)
+ target = m_exe_ctx.GetTargetPtr();
+
+ result.AppendMessageWithFormatv(
+ "{0:2}", DebuggerStats::ReportStatistics(GetDebugger(), target));
result.SetStatus(eReturnStatusSuccessFinishResult);
return true;
}
+
+ CommandOptions m_options;
};
CommandObjectStats::CommandObjectStats(CommandInterpreter &interpreter)
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index b25514b1ffbc..2a42eb22938d 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -272,7 +272,7 @@ protected:
if (core_file) {
auto file = FileSystem::Instance().Open(
- core_file, lldb_private::File::eOpenOptionRead);
+ core_file, lldb_private::File::eOpenOptionReadOnly);
if (!file) {
result.AppendErrorWithFormatv("Cannot open '{0}': {1}.",
@@ -286,7 +286,7 @@ protected:
FileSpec symfile(m_symbol_file.GetOptionValue().GetCurrentValue());
if (symfile) {
auto file = FileSystem::Instance().Open(
- symfile, lldb_private::File::eOpenOptionRead);
+ symfile, lldb_private::File::eOpenOptionReadOnly);
if (!file) {
result.AppendErrorWithFormatv("Cannot open '{0}': {1}.",
@@ -1047,8 +1047,7 @@ protected:
}
bool last_pair = ((argc - i) == 2);
target->GetImageSearchPathList().Append(
- ConstString(from), ConstString(to),
- last_pair); // Notify if this is the last pair
+ from, to, last_pair); // Notify if this is the last pair
result.SetStatus(eReturnStatusSuccessFinishNoResult);
} else {
if (from[0])
@@ -1175,8 +1174,8 @@ protected:
if (from[0] && to[0]) {
bool last_pair = ((argc - i) == 2);
- target->GetImageSearchPathList().Insert(
- ConstString(from), ConstString(to), insert_idx, last_pair);
+ target->GetImageSearchPathList().Insert(from, to, insert_idx,
+ last_pair);
result.SetStatus(eReturnStatusSuccessFinishNoResult);
} else {
if (from[0])
@@ -1570,20 +1569,18 @@ static void DumpSymbolContextList(ExecutionContextScope *exe_scope,
static size_t LookupFunctionInModule(CommandInterpreter &interpreter,
Stream &strm, Module *module,
const char *name, bool name_is_regex,
- bool include_inlines, bool include_symbols,
+ const ModuleFunctionSearchOptions &options,
bool verbose) {
if (module && name && name[0]) {
SymbolContextList sc_list;
size_t num_matches = 0;
if (name_is_regex) {
RegularExpression function_name_regex((llvm::StringRef(name)));
- module->FindFunctions(function_name_regex, include_symbols,
- include_inlines, sc_list);
+ module->FindFunctions(function_name_regex, options, sc_list);
} else {
ConstString function_name(name);
module->FindFunctions(function_name, CompilerDeclContext(),
- eFunctionNameTypeAuto, include_symbols,
- include_inlines, sc_list);
+ eFunctionNameTypeAuto, options, sc_list);
}
num_matches = sc_list.GetSize();
if (num_matches) {
@@ -2836,6 +2833,7 @@ protected:
OptionGroupUInt64 m_slide_option;
};
+#pragma mark CommandObjectTargetModulesList
// List images with associated information
#define LLDB_OPTIONS_target_modules_list
#include "CommandOptions.inc"
@@ -3281,8 +3279,11 @@ protected:
if (m_options.m_type == eLookupTypeFunctionOrSymbol) {
ConstString function_name(m_options.m_str.c_str());
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = false;
target->GetImages().FindFunctions(function_name, eFunctionNameTypeAuto,
- true, false, sc_list);
+ function_options, sc_list);
} else if (m_options.m_type == eLookupTypeAddress && target) {
Address addr;
if (target->GetSectionLoadList().ResolveLoadAddress(m_options.m_addr,
@@ -3753,13 +3754,15 @@ public:
case eLookupTypeFunctionOrSymbol:
case eLookupTypeFunction:
if (!m_options.m_str.empty()) {
- if (LookupFunctionInModule(
- m_interpreter, result.GetOutputStream(), module,
- m_options.m_str.c_str(), m_options.m_use_regex,
- m_options.m_include_inlines,
- m_options.m_type ==
- eLookupTypeFunctionOrSymbol, // include symbols
- m_options.m_verbose)) {
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols =
+ m_options.m_type == eLookupTypeFunctionOrSymbol;
+ function_options.include_inlines = m_options.m_include_inlines;
+
+ if (LookupFunctionInModule(m_interpreter, result.GetOutputStream(),
+ module, m_options.m_str.c_str(),
+ m_options.m_use_regex, function_options,
+ m_options.m_verbose)) {
result.SetStatus(eReturnStatusSuccessFinishResult);
return true;
}
@@ -3959,8 +3962,12 @@ public:
"name."),
m_current_frame_option(
LLDB_OPT_SET_2, false, "frame", 'F',
- "Locate the debug symbols for the currently selected frame.",
- false, true)
+ "Locate the debug symbols for the currently selected frame.", false,
+ true),
+ m_current_stack_option(LLDB_OPT_SET_2, false, "stack", 'S',
+ "Locate the debug symbols for every frame in "
+ "the current call stack.",
+ false, true)
{
m_option_group.Append(&m_uuid_option_group, LLDB_OPT_SET_ALL,
@@ -3968,6 +3975,8 @@ public:
m_option_group.Append(&m_file_option, LLDB_OPT_SET_ALL, LLDB_OPT_SET_1);
m_option_group.Append(&m_current_frame_option, LLDB_OPT_SET_2,
LLDB_OPT_SET_2);
+ m_option_group.Append(&m_current_stack_option, LLDB_OPT_SET_2,
+ LLDB_OPT_SET_2);
m_option_group.Finalize();
}
@@ -4140,6 +4149,167 @@ protected:
return false;
}
+ bool DownloadObjectAndSymbolFile(ModuleSpec &module_spec,
+ CommandReturnObject &result, bool &flush) {
+ if (Symbols::DownloadObjectAndSymbolFile(module_spec)) {
+ if (module_spec.GetSymbolFileSpec())
+ return AddModuleSymbols(m_exe_ctx.GetTargetPtr(), module_spec, flush,
+ result);
+ }
+ return false;
+ }
+
+ bool AddSymbolsForUUID(CommandReturnObject &result, bool &flush) {
+ assert(m_uuid_option_group.GetOptionValue().OptionWasSet());
+
+ ModuleSpec module_spec;
+ module_spec.GetUUID() =
+ m_uuid_option_group.GetOptionValue().GetCurrentValue();
+
+ if (!DownloadObjectAndSymbolFile(module_spec, result, flush)) {
+ StreamString error_strm;
+ error_strm.PutCString("unable to find debug symbols for UUID ");
+ module_spec.GetUUID().Dump(&error_strm);
+ result.AppendError(error_strm.GetString());
+ return false;
+ }
+
+ return true;
+ }
+
+ bool AddSymbolsForFile(CommandReturnObject &result, bool &flush) {
+ assert(m_file_option.GetOptionValue().OptionWasSet());
+
+ ModuleSpec module_spec;
+ module_spec.GetFileSpec() =
+ m_file_option.GetOptionValue().GetCurrentValue();
+
+ Target *target = m_exe_ctx.GetTargetPtr();
+ ModuleSP module_sp(target->GetImages().FindFirstModule(module_spec));
+ if (module_sp) {
+ module_spec.GetFileSpec() = module_sp->GetFileSpec();
+ module_spec.GetPlatformFileSpec() = module_sp->GetPlatformFileSpec();
+ module_spec.GetUUID() = module_sp->GetUUID();
+ module_spec.GetArchitecture() = module_sp->GetArchitecture();
+ } else {
+ module_spec.GetArchitecture() = target->GetArchitecture();
+ }
+
+ if (!DownloadObjectAndSymbolFile(module_spec, result, flush)) {
+ StreamString error_strm;
+ error_strm.PutCString(
+ "unable to find debug symbols for the executable file ");
+ error_strm << module_spec.GetFileSpec();
+ result.AppendError(error_strm.GetString());
+ return false;
+ }
+
+ return true;
+ }
+
+ bool AddSymbolsForFrame(CommandReturnObject &result, bool &flush) {
+ assert(m_current_frame_option.GetOptionValue().OptionWasSet());
+
+ Process *process = m_exe_ctx.GetProcessPtr();
+ if (!process) {
+ result.AppendError(
+ "a process must exist in order to use the --frame option");
+ return false;
+ }
+
+ const StateType process_state = process->GetState();
+ if (!StateIsStoppedState(process_state, true)) {
+ result.AppendErrorWithFormat("process is not stopped: %s",
+ StateAsCString(process_state));
+ return false;
+ }
+
+ StackFrame *frame = m_exe_ctx.GetFramePtr();
+ if (!frame) {
+ result.AppendError("invalid current frame");
+ return false;
+ }
+
+ ModuleSP frame_module_sp(
+ frame->GetSymbolContext(eSymbolContextModule).module_sp);
+ if (!frame_module_sp) {
+ result.AppendError("frame has no module");
+ return false;
+ }
+
+ ModuleSpec module_spec;
+ module_spec.GetUUID() = frame_module_sp->GetUUID();
+
+ if (FileSystem::Instance().Exists(frame_module_sp->GetPlatformFileSpec())) {
+ module_spec.GetArchitecture() = frame_module_sp->GetArchitecture();
+ module_spec.GetFileSpec() = frame_module_sp->GetPlatformFileSpec();
+ }
+
+ if (!DownloadObjectAndSymbolFile(module_spec, result, flush)) {
+ result.AppendError("unable to find debug symbols for the current frame");
+ return false;
+ }
+
+ return true;
+ }
+
+ bool AddSymbolsForStack(CommandReturnObject &result, bool &flush) {
+ assert(m_current_stack_option.GetOptionValue().OptionWasSet());
+
+ Process *process = m_exe_ctx.GetProcessPtr();
+ if (!process) {
+ result.AppendError(
+ "a process must exist in order to use the --stack option");
+ return false;
+ }
+
+ const StateType process_state = process->GetState();
+ if (!StateIsStoppedState(process_state, true)) {
+ result.AppendErrorWithFormat("process is not stopped: %s",
+ StateAsCString(process_state));
+ return false;
+ }
+
+ Thread *thread = m_exe_ctx.GetThreadPtr();
+ if (!thread) {
+ result.AppendError("invalid current thread");
+ return false;
+ }
+
+ bool symbols_found = false;
+ uint32_t frame_count = thread->GetStackFrameCount();
+ for (uint32_t i = 0; i < frame_count; ++i) {
+ lldb::StackFrameSP frame_sp = thread->GetStackFrameAtIndex(i);
+
+ ModuleSP frame_module_sp(
+ frame_sp->GetSymbolContext(eSymbolContextModule).module_sp);
+ if (!frame_module_sp)
+ continue;
+
+ ModuleSpec module_spec;
+ module_spec.GetUUID() = frame_module_sp->GetUUID();
+
+ if (FileSystem::Instance().Exists(
+ frame_module_sp->GetPlatformFileSpec())) {
+ module_spec.GetArchitecture() = frame_module_sp->GetArchitecture();
+ module_spec.GetFileSpec() = frame_module_sp->GetPlatformFileSpec();
+ }
+
+ bool current_frame_flush = false;
+ if (DownloadObjectAndSymbolFile(module_spec, result, current_frame_flush))
+ symbols_found = true;
+ flush |= current_frame_flush;
+ }
+
+ if (!symbols_found) {
+ result.AppendError(
+ "unable to find debug symbols in the current call stack");
+ return false;
+ }
+
+ return true;
+ }
+
bool DoExecute(Args &args, CommandReturnObject &result) override {
Target *target = m_exe_ctx.GetTargetPtr();
result.SetStatus(eReturnStatusFailed);
@@ -4150,100 +4320,22 @@ protected:
const bool file_option_set = m_file_option.GetOptionValue().OptionWasSet();
const bool frame_option_set =
m_current_frame_option.GetOptionValue().OptionWasSet();
+ const bool stack_option_set =
+ m_current_stack_option.GetOptionValue().OptionWasSet();
const size_t argc = args.GetArgumentCount();
if (argc == 0) {
- if (uuid_option_set || file_option_set || frame_option_set) {
- bool success = false;
- bool error_set = false;
- if (frame_option_set) {
- Process *process = m_exe_ctx.GetProcessPtr();
- if (process) {
- const StateType process_state = process->GetState();
- if (StateIsStoppedState(process_state, true)) {
- StackFrame *frame = m_exe_ctx.GetFramePtr();
- if (frame) {
- ModuleSP frame_module_sp(
- frame->GetSymbolContext(eSymbolContextModule).module_sp);
- if (frame_module_sp) {
- if (FileSystem::Instance().Exists(
- frame_module_sp->GetPlatformFileSpec())) {
- module_spec.GetArchitecture() =
- frame_module_sp->GetArchitecture();
- module_spec.GetFileSpec() =
- frame_module_sp->GetPlatformFileSpec();
- }
- module_spec.GetUUID() = frame_module_sp->GetUUID();
- success = module_spec.GetUUID().IsValid() ||
- module_spec.GetFileSpec();
- } else {
- result.AppendError("frame has no module");
- error_set = true;
- }
- } else {
- result.AppendError("invalid current frame");
- error_set = true;
- }
- } else {
- result.AppendErrorWithFormat("process is not stopped: %s",
- StateAsCString(process_state));
- error_set = true;
- }
- } else {
- result.AppendError(
- "a process must exist in order to use the --frame option");
- error_set = true;
- }
- } else {
- if (uuid_option_set) {
- module_spec.GetUUID() =
- m_uuid_option_group.GetOptionValue().GetCurrentValue();
- success |= module_spec.GetUUID().IsValid();
- } else if (file_option_set) {
- module_spec.GetFileSpec() =
- m_file_option.GetOptionValue().GetCurrentValue();
- ModuleSP module_sp(
- target->GetImages().FindFirstModule(module_spec));
- if (module_sp) {
- module_spec.GetFileSpec() = module_sp->GetFileSpec();
- module_spec.GetPlatformFileSpec() =
- module_sp->GetPlatformFileSpec();
- module_spec.GetUUID() = module_sp->GetUUID();
- module_spec.GetArchitecture() = module_sp->GetArchitecture();
- } else {
- module_spec.GetArchitecture() = target->GetArchitecture();
- }
- success |= module_spec.GetUUID().IsValid() ||
- FileSystem::Instance().Exists(module_spec.GetFileSpec());
- }
- }
-
- if (success) {
- if (Symbols::DownloadObjectAndSymbolFile(module_spec)) {
- if (module_spec.GetSymbolFileSpec())
- success = AddModuleSymbols(target, module_spec, flush, result);
- }
- }
-
- if (!success && !error_set) {
- StreamString error_strm;
- if (uuid_option_set) {
- error_strm.PutCString("unable to find debug symbols for UUID ");
- module_spec.GetUUID().Dump(&error_strm);
- } else if (file_option_set) {
- error_strm.PutCString(
- "unable to find debug symbols for the executable file ");
- error_strm << module_spec.GetFileSpec();
- } else if (frame_option_set) {
- error_strm.PutCString(
- "unable to find debug symbols for the current frame");
- }
- result.AppendError(error_strm.GetString());
- }
- } else {
+ if (uuid_option_set)
+ AddSymbolsForUUID(result, flush);
+ else if (file_option_set)
+ AddSymbolsForFile(result, flush);
+ else if (frame_option_set)
+ AddSymbolsForFrame(result, flush);
+ else if (stack_option_set)
+ AddSymbolsForStack(result, flush);
+ else
result.AppendError("one or more symbol file paths must be specified, "
"or options must be specified");
- }
} else {
if (uuid_option_set) {
result.AppendError("specify either one or more paths to symbol files "
@@ -4310,6 +4402,7 @@ protected:
OptionGroupUUID m_uuid_option_group;
OptionGroupFile m_file_option;
OptionGroupBoolean m_current_frame_option;
+ OptionGroupBoolean m_current_stack_option;
};
#pragma mark CommandObjectTargetSymbols
@@ -4511,29 +4604,29 @@ public:
Command Based stop-hooks:
-------------------------
Stop hooks can run a list of lldb commands by providing one or more
- --one-line-command options. The commands will get run in the order they are
+ --one-line-command options. The commands will get run in the order they are
added. Or you can provide no commands, in which case you will enter a
command editor where you can enter the commands to be run.
-
+
Python Based Stop Hooks:
------------------------
Stop hooks can be implemented with a suitably defined Python class, whose name
is passed in the --python-class option.
-
+
When the stop hook is added, the class is initialized by calling:
-
+
def __init__(self, target, extra_args, internal_dict):
-
+
target: The target that the stop hook is being added to.
- extra_args: An SBStructuredData Dictionary filled with the -key -value
- option pairs passed to the command.
+ extra_args: An SBStructuredData Dictionary filled with the -key -value
+ option pairs passed to the command.
dict: An implementation detail provided by lldb.
- Then when the stop-hook triggers, lldb will run the 'handle_stop' method.
+ Then when the stop-hook triggers, lldb will run the 'handle_stop' method.
The method has the signature:
-
+
def handle_stop(self, exe_ctx, stream):
-
+
exe_ctx: An SBExecutionContext for the thread that has stopped.
stream: An SBStream, anything written to this stream will be printed in the
the stop message when the process stops.
@@ -4542,12 +4635,12 @@ Python Based Stop Hooks:
from all the stop hook executions on threads that stopped
with a reason, then the process will continue. Note that this
will happen only after all the stop hooks are run.
-
+
Filter Options:
---------------
Stop hooks can be set to always run, or to only run when the stopped thread
matches the filter options passed on the command line. The available filter
- options include a shared library or a thread or queue specification,
+ options include a shared library or a thread or queue specification,
a line range in a source file, a function name or a class name.
)");
m_all_options.Append(&m_python_class_options,
@@ -4896,6 +4989,55 @@ public:
~CommandObjectMultiwordTargetStopHooks() override = default;
};
+#pragma mark CommandObjectTargetDumpTypesystem
+
+/// Dumps the TypeSystem of the selected Target.
+class CommandObjectTargetDumpTypesystem : public CommandObjectParsed {
+public:
+ CommandObjectTargetDumpTypesystem(CommandInterpreter &interpreter)
+ : CommandObjectParsed(
+ interpreter, "target dump typesystem",
+ "Dump the state of the target's internal type system.\n"
+ "Intended to be used for debugging LLDB itself.",
+ nullptr, eCommandRequiresTarget) {}
+
+ ~CommandObjectTargetDumpTypesystem() override = default;
+
+protected:
+ bool DoExecute(Args &command, CommandReturnObject &result) override {
+ if (!command.empty()) {
+ result.AppendError("target dump typesystem doesn't take arguments.");
+ return result.Succeeded();
+ }
+
+ // Go over every scratch TypeSystem and dump to the command output.
+ for (TypeSystem *ts : GetSelectedTarget().GetScratchTypeSystems())
+ ts->Dump(result.GetOutputStream().AsRawOstream());
+
+ result.SetStatus(eReturnStatusSuccessFinishResult);
+ return result.Succeeded();
+ }
+};
+
+#pragma mark CommandObjectTargetDump
+
+/// Multi-word command for 'target dump'.
+class CommandObjectTargetDump : public CommandObjectMultiword {
+public:
+ // Constructors and Destructors
+ CommandObjectTargetDump(CommandInterpreter &interpreter)
+ : CommandObjectMultiword(
+ interpreter, "target dump",
+ "Commands for dumping information about the target.",
+ "target dump [typesystem]") {
+ LoadSubCommand(
+ "typesystem",
+ CommandObjectSP(new CommandObjectTargetDumpTypesystem(interpreter)));
+ }
+
+ ~CommandObjectTargetDump() override = default;
+};
+
#pragma mark CommandObjectMultiwordTarget
// CommandObjectMultiwordTarget
@@ -4909,6 +5051,8 @@ CommandObjectMultiwordTarget::CommandObjectMultiwordTarget(
CommandObjectSP(new CommandObjectTargetCreate(interpreter)));
LoadSubCommand("delete",
CommandObjectSP(new CommandObjectTargetDelete(interpreter)));
+ LoadSubCommand("dump",
+ CommandObjectSP(new CommandObjectTargetDump(interpreter)));
LoadSubCommand("list",
CommandObjectSP(new CommandObjectTargetList(interpreter)));
LoadSubCommand("select",
diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp
index 7247601b292b..71e67f6ba208 100644
--- a/lldb/source/Commands/CommandObjectThread.cpp
+++ b/lldb/source/Commands/CommandObjectThread.cpp
@@ -292,16 +292,10 @@ public:
// Check if we are in Non-Stop mode
TargetSP target_sp =
execution_context ? execution_context->GetTargetSP() : TargetSP();
- if (target_sp && target_sp->GetNonStopModeEnabled()) {
- // NonStopMode runs all threads by definition, so when it is on we don't
- // need to check the process setting for runs all threads.
- m_run_mode = eOnlyThisThread;
- } else {
- ProcessSP process_sp =
- execution_context ? execution_context->GetProcessSP() : ProcessSP();
- if (process_sp && process_sp->GetSteppingRunsAllThreads())
- m_run_mode = eAllThreads;
- }
+ ProcessSP process_sp =
+ execution_context ? execution_context->GetProcessSP() : ProcessSP();
+ if (process_sp && process_sp->GetSteppingRunsAllThreads())
+ m_run_mode = eAllThreads;
m_avoid_regexp.clear();
m_step_in_target.clear();
@@ -532,12 +526,12 @@ protected:
return false;
}
- // If we got a new plan, then set it to be a master plan (User level Plans
- // should be master plans so that they can be interruptible). Then resume
- // the process.
+ // If we got a new plan, then set it to be a controlling plan (User level
+ // Plans should be controlling plans so that they can be interruptible).
+ // Then resume the process.
if (new_plan_sp) {
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
if (m_options.m_step_count > 1) {
@@ -1027,11 +1021,12 @@ protected:
abort_other_plans, &address_list.front(), address_list.size(),
m_options.m_stop_others, m_options.m_frame_idx, new_plan_status);
if (new_plan_sp) {
- // User level plans should be master plans so they can be interrupted
+ // User level plans should be controlling plans so they can be
+ // interrupted
// (e.g. by hitting a breakpoint) and other plans executed by the
// user (stepping around the breakpoint) and then a "continue" will
// resume the original plan.
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
} else {
result.SetError(new_plan_status);
@@ -1935,15 +1930,14 @@ public:
"process to different formats.",
"thread trace export <export-plugin> [<subcommand objects>]") {
- for (uint32_t i = 0; true; i++) {
- if (const char *plugin_name =
- PluginManager::GetTraceExporterPluginNameAtIndex(i)) {
- if (ThreadTraceExportCommandCreator command_creator =
- PluginManager::GetThreadTraceExportCommandCreatorAtIndex(i)) {
- LoadSubCommand(plugin_name, command_creator(interpreter));
- }
- } else {
- break;
+ unsigned i = 0;
+ for (llvm::StringRef plugin_name =
+ PluginManager::GetTraceExporterPluginNameAtIndex(i++);
+ !plugin_name.empty();
+ plugin_name = PluginManager::GetTraceExporterPluginNameAtIndex(i++)) {
+ if (ThreadTraceExportCommandCreator command_creator =
+ PluginManager::GetThreadTraceExportCommandCreatorAtIndex(i)) {
+ LoadSubCommand(plugin_name, command_creator(interpreter));
}
}
}
@@ -2205,9 +2199,8 @@ public:
bool DoExecute(Args &command, CommandReturnObject &result) override {
Target &target = m_exe_ctx.GetTargetRef();
- result.GetOutputStream().Printf(
- "Trace technology: %s\n",
- target.GetTrace()->GetPluginName().AsCString());
+ result.GetOutputStream().Format("Trace technology: {0}\n",
+ target.GetTrace()->GetPluginName());
return CommandObjectIterateOverThreads::DoExecute(command, result);
}
diff --git a/lldb/source/Commands/CommandObjectTrace.cpp b/lldb/source/Commands/CommandObjectTrace.cpp
index c55fed45d4f4..62ee48ca0546 100644
--- a/lldb/source/Commands/CommandObjectTrace.cpp
+++ b/lldb/source/Commands/CommandObjectTrace.cpp
@@ -117,8 +117,8 @@ protected:
json_file.GetDirectory().AsCString())) {
lldb::TraceSP trace_sp = traceOrErr.get();
if (m_options.m_verbose && trace_sp)
- result.AppendMessageWithFormat("loading trace with plugin %s\n",
- trace_sp->GetPluginName().AsCString());
+ result.AppendMessageWithFormatv("loading trace with plugin {0}\n",
+ trace_sp->GetPluginName());
} else
return end_with_failure(traceOrErr.takeError());
diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp
index 90e224867e2a..0562b6be3cb5 100644
--- a/lldb/source/Commands/CommandObjectType.cpp
+++ b/lldb/source/Commands/CommandObjectType.cpp
@@ -1598,7 +1598,7 @@ static bool FixArrayTypeNameWithRegex(ConstString &type_name) {
std::string type_name_str(type_name.GetCString());
type_name_str.resize(type_name_str.length() - 2);
if (type_name_str.back() != ' ')
- type_name_str.append(" \\[[0-9]+\\]");
+ type_name_str.append(" ?\\[[0-9]+\\]");
else
type_name_str.append("\\[[0-9]+\\]");
type_name.SetCString(type_name_str.c_str());
@@ -2978,7 +2978,7 @@ public:
CommandObjectTypeFilter(CommandInterpreter &interpreter)
: CommandObjectMultiword(interpreter, "type filter",
"Commands for operating on type filters.",
- "type synthetic [<sub-command-options>] ") {
+ "type filter [<sub-command-options>] ") {
LoadSubCommand(
"add", CommandObjectSP(new CommandObjectTypeFilterAdd(interpreter)));
LoadSubCommand("clear", CommandObjectSP(
diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp
index d7a446fc366c..9fbf036a19d1 100644
--- a/lldb/source/Commands/CommandObjectWatchpoint.cpp
+++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp
@@ -56,7 +56,7 @@ static int32_t WithRSAIndex(llvm::StringRef Arg) {
uint32_t i;
for (i = 0; i < 4; ++i)
- if (Arg.find(RSA[i]) != llvm::StringRef::npos)
+ if (Arg.contains(RSA[i]))
return i;
return -1;
}
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 9c9b7c6e9b82..3e89eb0f6bda 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -73,7 +73,7 @@ let Command = "breakpoint modify" in {
"index matches this argument.">;
def breakpoint_modify_thread_id : Option<"thread-id", "t">, Group<1>,
Arg<"ThreadID">, Desc<"The breakpoint stops only for the thread whose TID "
- "matches this argument.">;
+ "matches this argument. The token 'current' resolves to the current thread's ID.">;
def breakpoint_modify_thread_name : Option<"thread-name", "T">, Group<1>,
Arg<"ThreadName">, Desc<"The breakpoint stops only for the thread whose "
"thread name matches this argument.">;
@@ -151,10 +151,10 @@ let Command = "breakpoint set" in {
def breakpoint_set_selector : Option<"selector", "S">, Group<5>,
Arg<"Selector">, Required,
Desc<"Set the breakpoint by ObjC selector name. Can be repeated multiple "
- "times tomake one breakpoint for multiple Selectors.">;
+ "times to make one breakpoint for multiple Selectors.">;
def breakpoint_set_method : Option<"method", "M">, Group<6>, Arg<"Method">,
Required, Desc<"Set the breakpoint by C++ method names. Can be repeated "
- "multiple times tomake one breakpoint for multiple methods.">;
+ "multiple times to make one breakpoint for multiple methods.">;
def breakpoint_set_func_regex : Option<"func-regex", "r">, Group<7>,
Arg<"RegularExpression">, Required, Desc<"Set the breakpoint by function "
"name, evaluating a regular-expression to find the function name(s).">;
@@ -355,7 +355,7 @@ let Command = "expression" in {
Desc<"When specified, debug the JIT code by setting a breakpoint on the "
"first instruction and forcing breakpoints to not be ignored (-i0) and no "
"unwinding to happen on error (-u0).">;
- def expression_options_language : Option<"language", "l">, Groups<[1,2]>,
+ def expression_options_language : Option<"language", "l">, Groups<[1,2,3]>,
Arg<"Language">, Desc<"Specifies the Language to use when parsing the "
"expression. If not set the target.language setting is used.">;
def expression_options_apply_fixits : Option<"apply-fixits", "X">,
@@ -404,6 +404,13 @@ let Command = "frame recognizer add" in {
Desc<"Give the name of a Python class to use for this frame recognizer.">;
def frame_recognizer_regex : Option<"regex", "x">,
Desc<"Function name and module name are actually regular expressions.">;
+ def frame_recognizer_first_instruction_only : Option<"first-instruction-only", "f">, Arg<"Boolean">,
+ Desc<"If true, only apply this recognizer to frames whose PC currently points to the "
+ "first instruction of the specified function. If false, the recognizer "
+ "will always be applied, regardless of the current position within the specified function. The "
+ "implementor should keep in mind that some features, e.g. accessing function argument "
+ "values via $arg<N>, are not guaranteed to work reliably in this case, so extra care must "
+ "be taken to make the recognizer operate correctly. Defaults to true.">;
}
let Command = "history" in {
@@ -504,6 +511,14 @@ let Command = "memory write" in {
Desc<"Start writing bytes from an offset within the input file.">;
}
+let Command = "memory tag write" in {
+ def memory_write_end_addr : Option<"end-addr", "e">, Group<1>,
+ Arg<"AddressOrExpression">, Desc<
+ "Set tags for start address to end-addr, repeating tags as needed"
+ " to cover the range. (instead of calculating the range from the"
+ " number of tags given)">;
+}
+
let Command = "register read" in {
def register_read_alternate : Option<"alternate", "A">,
Desc<"Display register names using the alternate register name if there "
@@ -521,6 +536,10 @@ let Command = "source" in {
Desc<"If true, stop executing commands on continue.">;
def source_silent_run : Option<"silent-run", "s">, Arg<"Boolean">,
Desc<"If true don't echo commands while executing.">;
+ def cmd_relative_to_command_file : Option<"relative-to-command-file", "C">,
+ Desc<"Resolve non-absolute paths relative to the location of the "
+ "current command file. This argument can only be used when the command is "
+ "being sourced from a file.">;
}
let Command = "alias" in {
@@ -734,6 +753,17 @@ let Command = "process save_core" in {
def process_save_core_style : Option<"style", "s">, Group<1>,
EnumArg<"SaveCoreStyle", "SaveCoreStyles()">, Desc<"Request a specific style "
"of corefile to be saved.">;
+ def process_save_core_plugin_name : Option<"plugin-name", "p">,
+ OptionalArg<"Plugin">, Desc<"Specify a plugin name to create the core file."
+ "This allows core files to be saved in different formats.">;
+}
+
+let Command = "process trace save" in {
+ def process_trace_save_directory: Option<"directory", "d">,
+ Group<1>,
+ Arg<"Value">, Required,
+ Desc<"The directory where the trace will be saved."
+ "It will be created if it does not exist.">;
}
let Command = "script import" in {
@@ -757,12 +787,23 @@ let Command = "script add" in {
Desc<"Name of the Python class to bind to this command name.">;
def script_add_help : Option<"help", "h">, Group<1>, Arg<"HelpText">,
Desc<"The help text to display for this command.">;
+ def script_add_overwrite : Option<"overwrite", "o">, Groups<[1,2]>,
+ Desc<"Overwrite an existing command at this node.">;
def script_add_synchronicity : Option<"synchronicity", "s">,
EnumArg<"ScriptedCommandSynchronicity", "ScriptSynchroType()">,
Desc<"Set the synchronicity of this command's executions with regard to "
"LLDB event system.">;
}
+let Command = "container add" in {
+ def container_add_help : Option<"help", "h">, Arg<"HelpText">,
+ Desc<"Help text for this command">;
+ def container_add_long_help : Option<"long-help", "H">, Arg<"HelpText">,
+ Desc<"Long help text for this command">;
+ def container_add_overwrite : Option<"overwrite", "o">, Group<1>,
+ Desc<"Overwrite an existing command at this node.">;
+}
+
let Command = "script" in {
def script_language : Option<"language", "l">,
EnumArg<"ScriptLang", "ScriptOptionEnum()">, Desc<"Specify the scripting "
@@ -1264,3 +1305,8 @@ let Command = "trace schema" in {
def trace_schema_verbose : Option<"verbose", "v">, Group<1>,
Desc<"Show verbose trace schema logging for debugging the plug-in.">;
}
+
+let Command = "statistics dump" in {
+ def statistics_dump_all: Option<"all-targets", "a">, Group<1>,
+ Desc<"Include statistics for all targets.">;
+}
diff --git a/lldb/source/Core/Address.cpp b/lldb/source/Core/Address.cpp
index f0c7e2b34f99..122bed924b42 100644
--- a/lldb/source/Core/Address.cpp
+++ b/lldb/source/Core/Address.cpp
@@ -389,6 +389,19 @@ bool Address::SetOpcodeLoadAddress(lldb::addr_t load_addr, Target *target,
return false;
}
+bool Address::GetDescription(Stream &s, Target &target,
+ DescriptionLevel level) const {
+ assert(level == eDescriptionLevelBrief &&
+ "Non-brief descriptions not implemented");
+ LineEntry line_entry;
+ if (CalculateSymbolContextLineEntry(line_entry)) {
+ s.Printf(" (%s:%u:%u)", line_entry.file.GetFilename().GetCString(),
+ line_entry.line, line_entry.column);
+ return true;
+ }
+ return false;
+}
+
bool Address::Dump(Stream *s, ExecutionContextScope *exe_scope, DumpStyle style,
DumpStyle fallback_style, uint32_t addr_size) const {
// If the section was nullptr, only load address is going to work unless we
diff --git a/lldb/source/Core/AddressRange.cpp b/lldb/source/Core/AddressRange.cpp
index af6e31a67da3..66dcda574890 100644
--- a/lldb/source/Core/AddressRange.cpp
+++ b/lldb/source/Core/AddressRange.cpp
@@ -59,15 +59,6 @@ bool AddressRange::Contains(const Address &addr) const {
return ContainsFileAddress(addr);
}
-//
-// bool
-// AddressRange::Contains (const Address *addr) const
-//{
-// if (addr)
-// return Contains (*addr);
-// return false;
-//}
-
bool AddressRange::ContainsFileAddress(const Address &addr) const {
if (addr.GetSection() == m_base_addr.GetSection())
return (addr.GetOffset() - m_base_addr.GetOffset()) < GetByteSize();
@@ -212,11 +203,3 @@ void AddressRange::DumpDebug(Stream *s) const {
static_cast<void *>(m_base_addr.GetSection().get()),
m_base_addr.GetOffset(), GetByteSize());
}
-//
-// bool
-// lldb::operator== (const AddressRange& lhs, const AddressRange& rhs)
-//{
-// if (lhs.GetBaseAddress() == rhs.GetBaseAddress())
-// return lhs.GetByteSize() == rhs.GetByteSize();
-// return false;
-//}
diff --git a/lldb/source/Core/Communication.cpp b/lldb/source/Core/Communication.cpp
index 5640e0510cf1..0ad2751f24f0 100644
--- a/lldb/source/Core/Communication.cpp
+++ b/lldb/source/Core/Communication.cpp
@@ -176,8 +176,8 @@ size_t Communication::Write(const void *src, size_t src_len,
std::lock_guard<std::mutex> guard(m_write_mutex);
LLDB_LOG(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_COMMUNICATION),
- "{0} Communication::Write (src = {1}, src_len = %" PRIu64
- ") connection = {2}",
+ "{0} Communication::Write (src = {1}, src_len = {2}"
+ ") connection = {3}",
this, src, (uint64_t)src_len, connection_sp.get());
if (connection_sp)
@@ -189,6 +189,16 @@ size_t Communication::Write(const void *src, size_t src_len,
return 0;
}
+size_t Communication::WriteAll(const void *src, size_t src_len,
+ ConnectionStatus &status, Status *error_ptr) {
+ size_t total_written = 0;
+ do
+ total_written += Write(static_cast<const char *>(src) + total_written,
+ src_len - total_written, status, error_ptr);
+ while (status == eConnectionStatusSuccess && total_written < src_len);
+ return total_written;
+}
+
bool Communication::StartReadThread(Status *error_ptr) {
if (error_ptr)
error_ptr->Clear();
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 17c3ba426f71..32dcfb1ce17b 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -488,7 +488,7 @@ void Debugger::Terminate() {
"Debugger::Terminate called without a matching Debugger::Initialize!");
if (g_debugger_list_ptr && g_debugger_list_mutex_ptr) {
- // Clear our master list of debugger objects
+ // Clear our global list of debugger objects
{
std::lock_guard<std::recursive_mutex> guard(*g_debugger_list_mutex_ptr);
for (const auto &debugger : *g_debugger_list_ptr)
@@ -723,10 +723,10 @@ Debugger::Debugger(lldb::LogOutputCallback log_callback, void *baton)
m_collection_sp->AppendProperty(
ConstString("target"),
ConstString("Settings specify to debugging targets."), true,
- Target::GetGlobalProperties()->GetValueProperties());
+ Target::GetGlobalProperties().GetValueProperties());
m_collection_sp->AppendProperty(
ConstString("platform"), ConstString("Platform settings."), true,
- Platform::GetGlobalPlatformProperties()->GetValueProperties());
+ Platform::GetGlobalPlatformProperties().GetValueProperties());
m_collection_sp->AppendProperty(
ConstString("symbols"), ConstString("Symbol lookup and cache settings."),
true, ModuleList::GetGlobalModuleListProperties().GetValueProperties());
@@ -1243,7 +1243,7 @@ bool Debugger::EnableLog(llvm::StringRef channel,
log_stream_sp = pos->second.lock();
if (!log_stream_sp) {
File::OpenOptions flags =
- File::eOpenOptionWrite | File::eOpenOptionCanCreate;
+ File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate;
if (log_options & LLDB_LOG_OPTION_APPEND)
flags |= File::eOpenOptionAppend;
else
@@ -1423,10 +1423,9 @@ void Debugger::HandleProcessEvent(const EventSP &event_sp) {
output_stream_sp->PutCString(content_stream.GetString());
}
} else {
- error_stream_sp->Printf("Failed to print structured "
- "data with plugin %s: %s",
- plugin_sp->GetPluginName().AsCString(),
- error.AsCString());
+ error_stream_sp->Format("Failed to print structured "
+ "data with plugin {0}: {1}",
+ plugin_sp->GetPluginName(), error);
}
}
}
diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp
index 704b3df4b2ac..00d92053bc4f 100644
--- a/lldb/source/Core/Disassembler.cpp
+++ b/lldb/source/Core/Disassembler.cpp
@@ -64,9 +64,8 @@ DisassemblerSP Disassembler::FindPlugin(const ArchSpec &arch,
DisassemblerCreateInstance create_callback = nullptr;
if (plugin_name) {
- ConstString const_plugin_name(plugin_name);
- create_callback = PluginManager::GetDisassemblerCreateCallbackForPluginName(
- const_plugin_name);
+ create_callback =
+ PluginManager::GetDisassemblerCreateCallbackForPluginName(plugin_name);
if (create_callback) {
DisassemblerSP disassembler_sp(create_callback(arch, flavor));
@@ -1123,6 +1122,10 @@ bool PseudoInstruction::HasDelaySlot() {
return false;
}
+bool PseudoInstruction::IsLoad() { return false; }
+
+bool PseudoInstruction::IsAuthenticated() { return false; }
+
size_t PseudoInstruction::Decode(const lldb_private::Disassembler &disassembler,
const lldb_private::DataExtractor &data,
lldb::offset_t data_offset) {
diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp
index 10d2b7207018..1c7b9125e4d1 100644
--- a/lldb/source/Core/DynamicLoader.cpp
+++ b/lldb/source/Core/DynamicLoader.cpp
@@ -30,13 +30,11 @@ using namespace lldb;
using namespace lldb_private;
DynamicLoader *DynamicLoader::FindPlugin(Process *process,
- const char *plugin_name) {
+ llvm::StringRef plugin_name) {
DynamicLoaderCreateInstance create_callback = nullptr;
- if (plugin_name) {
- ConstString const_plugin_name(plugin_name);
+ if (!plugin_name.empty()) {
create_callback =
- PluginManager::GetDynamicLoaderCreateCallbackForPluginName(
- const_plugin_name);
+ PluginManager::GetDynamicLoaderCreateCallbackForPluginName(plugin_name);
if (create_callback) {
std::unique_ptr<DynamicLoader> instance_up(
create_callback(process, true));
diff --git a/lldb/source/Core/EmulateInstruction.cpp b/lldb/source/Core/EmulateInstruction.cpp
index 9b9111408209..c352b0129382 100644
--- a/lldb/source/Core/EmulateInstruction.cpp
+++ b/lldb/source/Core/EmulateInstruction.cpp
@@ -46,10 +46,9 @@ EmulateInstruction::FindPlugin(const ArchSpec &arch,
const char *plugin_name) {
EmulateInstructionCreateInstance create_callback = nullptr;
if (plugin_name) {
- ConstString const_plugin_name(plugin_name);
create_callback =
PluginManager::GetEmulateInstructionCreateCallbackForPluginName(
- const_plugin_name);
+ plugin_name);
if (create_callback) {
EmulateInstruction *emulate_insn_ptr =
create_callback(arch, supported_inst_type);
diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp
index c6f05d43a2a7..c35b17990842 100644
--- a/lldb/source/Core/IOHandler.cpp
+++ b/lldb/source/Core/IOHandler.cpp
@@ -251,8 +251,7 @@ IOHandlerEditline::IOHandlerEditline(
m_delegate(delegate), m_prompt(), m_continuation_prompt(),
m_current_lines_ptr(nullptr), m_base_line_number(line_number_start),
m_curr_line_idx(UINT32_MAX), m_multi_line(multi_line),
- m_color_prompts(color_prompts), m_interrupt_exits(true),
- m_editing(false) {
+ m_color_prompts(color_prompts), m_interrupt_exits(true) {
SetPrompt(prompt);
#if LLDB_ENABLE_LIBEDIT
@@ -399,7 +398,6 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) {
}
if (!got_line && in) {
- m_editing = true;
while (!got_line) {
char *r = fgets(buffer, sizeof(buffer), in);
#ifdef _WIN32
@@ -425,7 +423,6 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) {
m_line_buffer += buffer;
got_line = SplitLine(m_line_buffer);
}
- m_editing = false;
}
if (got_line) {
diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp
index 4bed788d4863..9122117ef5ff 100644
--- a/lldb/source/Core/IOHandlerCursesGUI.cpp
+++ b/lldb/source/Core/IOHandlerCursesGUI.cpp
@@ -36,6 +36,7 @@
#include "lldb/Interpreter/CommandCompletions.h"
#include "lldb/Interpreter/CommandInterpreter.h"
+#include "lldb/Interpreter/OptionGroupPlatform.h"
#if LLDB_ENABLE_CURSES
#include "lldb/Breakpoint/BreakpointLocation.h"
@@ -44,6 +45,7 @@
#include "lldb/Core/ValueObject.h"
#include "lldb/Core/ValueObjectRegister.h"
#include "lldb/Symbol/Block.h"
+#include "lldb/Symbol/CompileUnit.h"
#include "lldb/Symbol/Function.h"
#include "lldb/Symbol/Symbol.h"
#include "lldb/Symbol/VariableList.h"
@@ -83,10 +85,15 @@ using llvm::StringRef;
// we may want curses to be disabled for some builds for instance, windows
#if LLDB_ENABLE_CURSES
+#define KEY_CTRL_A 1
+#define KEY_CTRL_E 5
+#define KEY_CTRL_K 11
#define KEY_RETURN 10
#define KEY_ESCAPE 27
+#define KEY_DELETE 127
#define KEY_SHIFT_TAB (KEY_MAX + 1)
+#define KEY_ALT_ENTER (KEY_MAX + 2)
namespace curses {
class Menu;
@@ -342,15 +349,30 @@ protected:
// A surface is an abstraction for something than can be drawn on. The surface
// have a width, a height, a cursor position, and a multitude of drawing
// operations. This type should be sub-classed to get an actually useful ncurses
-// object, such as a Window, SubWindow, Pad, or a SubPad.
+// object, such as a Window or a Pad.
class Surface {
public:
- Surface() : m_window(nullptr) {}
+ enum class Type { Window, Pad };
+
+ Surface(Surface::Type type) : m_type(type), m_window(nullptr) {}
WINDOW *get() { return m_window; }
operator WINDOW *() { return m_window; }
+ Surface SubSurface(Rect bounds) {
+ Surface subSurface(m_type);
+ if (m_type == Type::Pad)
+ subSurface.m_window =
+ ::subpad(m_window, bounds.size.height, bounds.size.width,
+ bounds.origin.y, bounds.origin.x);
+ else
+ subSurface.m_window =
+ ::derwin(m_window, bounds.size.height, bounds.size.width,
+ bounds.origin.y, bounds.origin.x);
+ return subSurface;
+ }
+
// Copy a region of the surface to another surface.
void CopyToSurface(Surface &target, Point source_origin, Point target_origin,
Size size) {
@@ -534,41 +556,32 @@ public:
}
protected:
+ Type m_type;
WINDOW *m_window;
};
class Pad : public Surface {
public:
- Pad(Size size) { m_window = ::newpad(size.height, size.width); }
-
- ~Pad() { ::delwin(m_window); }
-};
-
-class SubPad : public Surface {
-public:
- SubPad(Pad &pad, Rect bounds) {
- m_window = ::subpad(pad.get(), bounds.size.height, bounds.size.width,
- bounds.origin.y, bounds.origin.x);
- }
- SubPad(SubPad &subpad, Rect bounds) {
- m_window = ::subpad(subpad.get(), bounds.size.height, bounds.size.width,
- bounds.origin.y, bounds.origin.x);
+ Pad(Size size) : Surface(Surface::Type::Pad) {
+ m_window = ::newpad(size.height, size.width);
}
- ~SubPad() { ::delwin(m_window); }
+ ~Pad() { ::delwin(m_window); }
};
class Window : public Surface {
public:
Window(const char *name)
- : m_name(name), m_panel(nullptr), m_parent(nullptr), m_subwindows(),
- m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX),
+ : Surface(Surface::Type::Window), m_name(name), m_panel(nullptr),
+ m_parent(nullptr), m_subwindows(), m_delegate_sp(),
+ m_curr_active_window_idx(UINT32_MAX),
m_prev_active_window_idx(UINT32_MAX), m_delete(false),
m_needs_update(true), m_can_activate(true), m_is_subwin(false) {}
Window(const char *name, WINDOW *w, bool del = true)
- : m_name(name), m_panel(nullptr), m_parent(nullptr), m_subwindows(),
- m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX),
+ : Surface(Surface::Type::Window), m_name(name), m_panel(nullptr),
+ m_parent(nullptr), m_subwindows(), m_delegate_sp(),
+ m_curr_active_window_idx(UINT32_MAX),
m_prev_active_window_idx(UINT32_MAX), m_delete(del),
m_needs_update(true), m_can_activate(true), m_is_subwin(false) {
if (w)
@@ -576,8 +589,8 @@ public:
}
Window(const char *name, const Rect &bounds)
- : m_name(name), m_parent(nullptr), m_subwindows(), m_delegate_sp(),
- m_curr_active_window_idx(UINT32_MAX),
+ : Surface(Surface::Type::Window), m_name(name), m_parent(nullptr),
+ m_subwindows(), m_delegate_sp(), m_curr_active_window_idx(UINT32_MAX),
m_prev_active_window_idx(UINT32_MAX), m_delete(true),
m_needs_update(true), m_can_activate(true), m_is_subwin(false) {
Reset(::newwin(bounds.size.height, bounds.size.width, bounds.origin.y,
@@ -969,20 +982,6 @@ private:
const Window &operator=(const Window &) = delete;
};
-class DerivedWindow : public Surface {
-public:
- DerivedWindow(Window &window, Rect bounds) {
- m_window = ::derwin(window.get(), bounds.size.height, bounds.size.width,
- bounds.origin.y, bounds.origin.x);
- }
- DerivedWindow(DerivedWindow &derived_window, Rect bounds) {
- m_window = ::derwin(derived_window.get(), bounds.size.height,
- bounds.size.width, bounds.origin.y, bounds.origin.x);
- }
-
- ~DerivedWindow() { ::delwin(m_window); }
-};
-
/////////
// Forms
/////////
@@ -1024,7 +1023,7 @@ public:
// Draw the field in the given subpad surface. The surface have a height that
// is equal to the height returned by FieldDelegateGetHeight(). If the field
// is selected in the form window, then is_selected will be true.
- virtual void FieldDelegateDraw(SubPad &surface, bool is_selected) = 0;
+ virtual void FieldDelegateDraw(Surface &surface, bool is_selected) = 0;
// Handle the key that wasn't handled by the form window or a container field.
virtual HandleCharResult FieldDelegateHandleChar(int key) {
@@ -1111,11 +1110,12 @@ public:
int GetContentLength() { return m_content.length(); }
- void DrawContent(SubPad &surface, bool is_selected) {
+ void DrawContent(Surface &surface, bool is_selected) {
+ UpdateScrolling(surface.GetWidth());
+
surface.MoveCursor(0, 0);
const char *text = m_content.c_str() + m_first_visibile_char;
surface.PutCString(text, surface.GetWidth());
- m_last_drawn_content_width = surface.GetWidth();
// Highlight the cursor.
surface.MoveCursor(GetCursorXPosition(), 0);
@@ -1130,17 +1130,17 @@ public:
surface.AttributeOff(A_REVERSE);
}
- void DrawField(SubPad &surface, bool is_selected) {
+ void DrawField(Surface &surface, bool is_selected) {
surface.TitledBox(m_label.c_str());
Rect content_bounds = surface.GetFrame();
content_bounds.Inset(1, 1);
- SubPad content_surface = SubPad(surface, content_bounds);
+ Surface content_surface = surface.SubSurface(content_bounds);
DrawContent(content_surface, is_selected);
}
- void DrawError(SubPad &surface) {
+ void DrawError(Surface &surface) {
if (!FieldDelegateHasError())
return;
surface.MoveCursor(0, 0);
@@ -1151,17 +1151,33 @@ public:
surface.AttributeOff(COLOR_PAIR(RedOnBlack));
}
- void FieldDelegateDraw(SubPad &surface, bool is_selected) override {
+ void FieldDelegateDraw(Surface &surface, bool is_selected) override {
Rect frame = surface.GetFrame();
Rect field_bounds, error_bounds;
frame.HorizontalSplit(GetFieldHeight(), field_bounds, error_bounds);
- SubPad field_surface = SubPad(surface, field_bounds);
- SubPad error_surface = SubPad(surface, error_bounds);
+ Surface field_surface = surface.SubSurface(field_bounds);
+ Surface error_surface = surface.SubSurface(error_bounds);
DrawField(field_surface, is_selected);
DrawError(error_surface);
}
+ // Get the position of the last visible character.
+ int GetLastVisibleCharPosition(int width) {
+ int position = m_first_visibile_char + width - 1;
+ return std::min(position, GetContentLength());
+ }
+
+ void UpdateScrolling(int width) {
+ if (m_cursor_position < m_first_visibile_char) {
+ m_first_visibile_char = m_cursor_position;
+ return;
+ }
+
+ if (m_cursor_position > GetLastVisibleCharPosition(width))
+ m_first_visibile_char = m_cursor_position - (width - 1);
+ }
+
// The cursor is allowed to move one character past the string.
// m_cursor_position is in range [0, GetContentLength()].
void MoveCursorRight() {
@@ -1174,47 +1190,65 @@ public:
m_cursor_position--;
}
- // If the cursor moved past the last visible character, scroll right by one
- // character.
- void ScrollRightIfNeeded() {
- if (m_cursor_position - m_first_visibile_char == m_last_drawn_content_width)
- m_first_visibile_char++;
- }
+ void MoveCursorToStart() { m_cursor_position = 0; }
+
+ void MoveCursorToEnd() { m_cursor_position = GetContentLength(); }
void ScrollLeft() {
if (m_first_visibile_char > 0)
m_first_visibile_char--;
}
- // If the cursor moved past the first visible character, scroll left by one
- // character.
- void ScrollLeftIfNeeded() {
- if (m_cursor_position < m_first_visibile_char)
- m_first_visibile_char--;
- }
-
- // Insert a character at the current cursor position, advance the cursor
- // position, and make sure to scroll right if needed.
+ // Insert a character at the current cursor position and advance the cursor
+ // position.
void InsertChar(char character) {
m_content.insert(m_cursor_position, 1, character);
m_cursor_position++;
- ScrollRightIfNeeded();
+ ClearError();
}
// Remove the character before the cursor position, retreat the cursor
- // position, and make sure to scroll left if needed.
- void RemoveChar() {
+ // position, and scroll left.
+ void RemovePreviousChar() {
if (m_cursor_position == 0)
return;
m_content.erase(m_cursor_position - 1, 1);
m_cursor_position--;
ScrollLeft();
+ ClearError();
+ }
+
+ // Remove the character after the cursor position.
+ void RemoveNextChar() {
+ if (m_cursor_position == GetContentLength())
+ return;
+
+ m_content.erase(m_cursor_position, 1);
+ ClearError();
+ }
+
+ // Clear characters from the current cursor position to the end.
+ void ClearToEnd() {
+ m_content.erase(m_cursor_position);
+ ClearError();
+ }
+
+ void Clear() {
+ m_content.clear();
+ m_cursor_position = 0;
+ ClearError();
}
// True if the key represents a char that can be inserted in the field
// content, false otherwise.
- virtual bool IsAcceptableChar(int key) { return isprint(key); }
+ virtual bool IsAcceptableChar(int key) {
+ // The behavior of isprint is undefined when the value is not representable
+ // as an unsigned char. So explicitly check for non-ascii key codes.
+ if (key > 127)
+ return false;
+ return isprint(key);
+ }
HandleCharResult FieldDelegateHandleChar(int key) override {
if (IsAcceptableChar(key)) {
@@ -1224,17 +1258,36 @@ public:
}
switch (key) {
+ case KEY_HOME:
+ case KEY_CTRL_A:
+ MoveCursorToStart();
+ return eKeyHandled;
+ case KEY_END:
+ case KEY_CTRL_E:
+ MoveCursorToEnd();
+ return eKeyHandled;
case KEY_RIGHT:
+ case KEY_SF:
MoveCursorRight();
- ScrollRightIfNeeded();
return eKeyHandled;
case KEY_LEFT:
+ case KEY_SR:
MoveCursorLeft();
- ScrollLeftIfNeeded();
return eKeyHandled;
case KEY_BACKSPACE:
- ClearError();
- RemoveChar();
+ case KEY_DELETE:
+ RemovePreviousChar();
+ return eKeyHandled;
+ case KEY_DC:
+ RemoveNextChar();
+ return eKeyHandled;
+ case KEY_EOL:
+ case KEY_CTRL_K:
+ ClearToEnd();
+ return eKeyHandled;
+ case KEY_DL:
+ case KEY_CLEAR:
+ Clear();
return eKeyHandled;
default:
break;
@@ -1259,6 +1312,14 @@ public:
const std::string &GetText() { return m_content; }
+ void SetText(const char *text) {
+ if (text == nullptr) {
+ m_content.clear();
+ return;
+ }
+ m_content = text;
+ }
+
protected:
std::string m_label;
bool m_required;
@@ -1269,9 +1330,6 @@ protected:
int m_cursor_position;
// The index of the first visible character in the content.
int m_first_visibile_char;
- // The width of the fields content that was last drawn. Width can change, so
- // this is used to determine if scrolling is needed dynamically.
- int m_last_drawn_content_width;
// Optional error message. If empty, field is considered to have no error.
std::string m_error;
};
@@ -1405,7 +1463,7 @@ public:
// Boolean fields are have a single line.
int FieldDelegateGetHeight() override { return 1; }
- void FieldDelegateDraw(SubPad &surface, bool is_selected) override {
+ void FieldDelegateDraw(Surface &surface, bool is_selected) override {
surface.MoveCursor(0, 0);
surface.PutChar('[');
if (is_selected)
@@ -1485,7 +1543,7 @@ public:
return std::min(index, GetNumberOfChoices()) - 1;
}
- void DrawContent(SubPad &surface, bool is_selected) {
+ void DrawContent(Surface &surface, bool is_selected) {
int choices_to_draw = GetLastVisibleChoice() - m_first_visibile_choice + 1;
for (int i = 0; i < choices_to_draw; i++) {
surface.MoveCursor(0, i);
@@ -1501,14 +1559,14 @@ public:
}
}
- void FieldDelegateDraw(SubPad &surface, bool is_selected) override {
+ void FieldDelegateDraw(Surface &surface, bool is_selected) override {
UpdateScrolling();
surface.TitledBox(m_label.c_str());
Rect content_bounds = surface.GetFrame();
content_bounds.Inset(1, 1);
- SubPad content_surface = SubPad(surface, content_bounds);
+ Surface content_surface = surface.SubSurface(content_bounds);
DrawContent(content_surface, is_selected);
}
@@ -1584,8 +1642,10 @@ public:
std::vector<std::string> GetPossiblePluginNames() {
std::vector<std::string> names;
size_t i = 0;
- while (auto name = PluginManager::GetPlatformPluginNameAtIndex(i++))
- names.push_back(name);
+ for (llvm::StringRef name =
+ PluginManager::GetPlatformPluginNameAtIndex(i++);
+ !name.empty(); name = PluginManager::GetProcessPluginNameAtIndex(i++))
+ names.push_back(name.str());
return names;
}
@@ -1605,8 +1665,9 @@ public:
names.push_back("<default>");
size_t i = 0;
- while (auto name = PluginManager::GetProcessPluginNameAtIndex(i++))
- names.push_back(name);
+ for (llvm::StringRef name = PluginManager::GetProcessPluginNameAtIndex(i++);
+ !name.empty(); name = PluginManager::GetProcessPluginNameAtIndex(i++))
+ names.push_back(name.str());
return names;
}
@@ -1618,6 +1679,33 @@ public:
}
};
+class LazyBooleanFieldDelegate : public ChoicesFieldDelegate {
+public:
+ LazyBooleanFieldDelegate(const char *label, const char *calculate_label)
+ : ChoicesFieldDelegate(label, 3, GetPossibleOptions(calculate_label)) {}
+
+ static constexpr const char *kNo = "No";
+ static constexpr const char *kYes = "Yes";
+
+ std::vector<std::string> GetPossibleOptions(const char *calculate_label) {
+ std::vector<std::string> options;
+ options.push_back(calculate_label);
+ options.push_back(kYes);
+ options.push_back(kNo);
+ return options;
+ }
+
+ LazyBool GetLazyBoolean() {
+ std::string choice = GetChoiceContent();
+ if (choice == kNo)
+ return eLazyBoolNo;
+ else if (choice == kYes)
+ return eLazyBoolYes;
+ else
+ return eLazyBoolCalculate;
+ }
+};
+
template <class T> class ListFieldDelegate : public FieldDelegate {
public:
ListFieldDelegate(const char *label, T default_field)
@@ -1683,7 +1771,7 @@ public:
return context;
}
- void DrawRemoveButton(SubPad &surface, int highlight) {
+ void DrawRemoveButton(Surface &surface, int highlight) {
surface.MoveCursor(1, surface.GetHeight() / 2);
if (highlight)
surface.AttributeOn(A_REVERSE);
@@ -1692,7 +1780,7 @@ public:
surface.AttributeOff(A_REVERSE);
}
- void DrawFields(SubPad &surface, bool is_selected) {
+ void DrawFields(Surface &surface, bool is_selected) {
int line = 0;
int width = surface.GetWidth();
for (int i = 0; i < GetNumberOfFields(); i++) {
@@ -1701,8 +1789,8 @@ public:
Rect field_bounds, remove_button_bounds;
bounds.VerticalSplit(bounds.size.width - sizeof(" [Remove]"),
field_bounds, remove_button_bounds);
- SubPad field_surface = SubPad(surface, field_bounds);
- SubPad remove_button_surface = SubPad(surface, remove_button_bounds);
+ Surface field_surface = surface.SubSurface(field_bounds);
+ Surface remove_button_surface = surface.SubSurface(remove_button_bounds);
bool is_element_selected = m_selection_index == i && is_selected;
bool is_field_selected =
@@ -1717,7 +1805,7 @@ public:
}
}
- void DrawNewButton(SubPad &surface, bool is_selected) {
+ void DrawNewButton(Surface &surface, bool is_selected) {
const char *button_text = "[New]";
int x = (surface.GetWidth() - sizeof(button_text) - 1) / 2;
surface.MoveCursor(x, 0);
@@ -1730,7 +1818,7 @@ public:
surface.AttributeOff(A_REVERSE);
}
- void FieldDelegateDraw(SubPad &surface, bool is_selected) override {
+ void FieldDelegateDraw(Surface &surface, bool is_selected) override {
surface.TitledBox(m_label.c_str());
Rect content_bounds = surface.GetFrame();
@@ -1738,8 +1826,8 @@ public:
Rect fields_bounds, new_button_bounds;
content_bounds.HorizontalSplit(content_bounds.size.height - 1,
fields_bounds, new_button_bounds);
- SubPad fields_surface = SubPad(surface, fields_bounds);
- SubPad new_button_surface = SubPad(surface, new_button_bounds);
+ Surface fields_surface = surface.SubSurface(fields_bounds);
+ Surface new_button_surface = surface.SubSurface(new_button_bounds);
DrawFields(fields_surface, is_selected);
DrawNewButton(new_button_surface, is_selected);
@@ -1822,6 +1910,31 @@ public:
return eKeyHandled;
}
+ // If the last element of the field is selected and it didn't handle the key.
+ // Select the next field or new button if the selected field is the last one.
+ HandleCharResult SelectNextInList(int key) {
+ assert(m_selection_type == SelectionType::Field);
+
+ FieldDelegate &field = m_fields[m_selection_index];
+ if (field.FieldDelegateHandleChar(key) == eKeyHandled)
+ return eKeyHandled;
+
+ if (!field.FieldDelegateOnLastOrOnlyElement())
+ return eKeyNotHandled;
+
+ field.FieldDelegateExitCallback();
+
+ if (m_selection_index == GetNumberOfFields() - 1) {
+ m_selection_type = SelectionType::NewButton;
+ return eKeyHandled;
+ }
+
+ m_selection_index++;
+ FieldDelegate &next_field = m_fields[m_selection_index];
+ next_field.FieldDelegateSelectFirstElement();
+ return eKeyHandled;
+ }
+
HandleCharResult FieldDelegateHandleChar(int key) override {
switch (key) {
case '\r':
@@ -1834,16 +1947,14 @@ public:
case SelectionType::RemoveButton:
RemoveField();
return eKeyHandled;
- default:
- break;
+ case SelectionType::Field:
+ return SelectNextInList(key);
}
break;
case '\t':
- SelectNext(key);
- return eKeyHandled;
+ return SelectNext(key);
case KEY_SHIFT_TAB:
- SelectPrevious(key);
- return eKeyHandled;
+ return SelectPrevious(key);
default:
break;
}
@@ -1908,6 +2019,241 @@ protected:
SelectionType m_selection_type;
};
+class ArgumentsFieldDelegate : public ListFieldDelegate<TextFieldDelegate> {
+public:
+ ArgumentsFieldDelegate()
+ : ListFieldDelegate("Arguments",
+ TextFieldDelegate("Argument", "", false)) {}
+
+ Args GetArguments() {
+ Args arguments;
+ for (int i = 0; i < GetNumberOfFields(); i++) {
+ arguments.AppendArgument(GetField(i).GetText());
+ }
+ return arguments;
+ }
+
+ void AddArguments(const Args &arguments) {
+ for (size_t i = 0; i < arguments.GetArgumentCount(); i++) {
+ AddNewField();
+ TextFieldDelegate &field = GetField(GetNumberOfFields() - 1);
+ field.SetText(arguments.GetArgumentAtIndex(i));
+ }
+ }
+};
+
+template <class KeyFieldDelegateType, class ValueFieldDelegateType>
+class MappingFieldDelegate : public FieldDelegate {
+public:
+ MappingFieldDelegate(KeyFieldDelegateType key_field,
+ ValueFieldDelegateType value_field)
+ : m_key_field(key_field), m_value_field(value_field),
+ m_selection_type(SelectionType::Key) {}
+
+ // Signify which element is selected. The key field or its value field.
+ enum class SelectionType { Key, Value };
+
+ // A mapping field is drawn as two text fields with a right arrow in between.
+ // The first field stores the key of the mapping and the second stores the
+ // value if the mapping.
+ //
+ // __[Key]_____________ __[Value]___________
+ // | | > | |
+ // |__________________| |__________________|
+ // - Error message if it exists.
+
+ // The mapping field has a height that is equal to the maximum height between
+ // the key and value fields.
+ int FieldDelegateGetHeight() override {
+ return std::max(m_key_field.FieldDelegateGetHeight(),
+ m_value_field.FieldDelegateGetHeight());
+ }
+
+ void DrawArrow(Surface &surface) {
+ surface.MoveCursor(0, 1);
+ surface.PutChar(ACS_RARROW);
+ }
+
+ void FieldDelegateDraw(Surface &surface, bool is_selected) override {
+ Rect bounds = surface.GetFrame();
+ Rect key_field_bounds, arrow_and_value_field_bounds;
+ bounds.VerticalSplit(bounds.size.width / 2, key_field_bounds,
+ arrow_and_value_field_bounds);
+ Rect arrow_bounds, value_field_bounds;
+ arrow_and_value_field_bounds.VerticalSplit(1, arrow_bounds,
+ value_field_bounds);
+
+ Surface key_field_surface = surface.SubSurface(key_field_bounds);
+ Surface arrow_surface = surface.SubSurface(arrow_bounds);
+ Surface value_field_surface = surface.SubSurface(value_field_bounds);
+
+ bool key_is_selected =
+ m_selection_type == SelectionType::Key && is_selected;
+ m_key_field.FieldDelegateDraw(key_field_surface, key_is_selected);
+ DrawArrow(arrow_surface);
+ bool value_is_selected =
+ m_selection_type == SelectionType::Value && is_selected;
+ m_value_field.FieldDelegateDraw(value_field_surface, value_is_selected);
+ }
+
+ HandleCharResult SelectNext(int key) {
+ if (FieldDelegateOnLastOrOnlyElement())
+ return eKeyNotHandled;
+
+ if (!m_key_field.FieldDelegateOnLastOrOnlyElement()) {
+ return m_key_field.FieldDelegateHandleChar(key);
+ }
+
+ m_key_field.FieldDelegateExitCallback();
+ m_selection_type = SelectionType::Value;
+ m_value_field.FieldDelegateSelectFirstElement();
+ return eKeyHandled;
+ }
+
+ HandleCharResult SelectPrevious(int key) {
+ if (FieldDelegateOnFirstOrOnlyElement())
+ return eKeyNotHandled;
+
+ if (!m_value_field.FieldDelegateOnFirstOrOnlyElement()) {
+ return m_value_field.FieldDelegateHandleChar(key);
+ }
+
+ m_value_field.FieldDelegateExitCallback();
+ m_selection_type = SelectionType::Key;
+ m_key_field.FieldDelegateSelectLastElement();
+ return eKeyHandled;
+ }
+
+ // If the value field is selected, pass the key to it. If the key field is
+ // selected, its last element is selected, and it didn't handle the key, then
+ // select its corresponding value field.
+ HandleCharResult SelectNextField(int key) {
+ if (m_selection_type == SelectionType::Value) {
+ return m_value_field.FieldDelegateHandleChar(key);
+ }
+
+ if (m_key_field.FieldDelegateHandleChar(key) == eKeyHandled)
+ return eKeyHandled;
+
+ if (!m_key_field.FieldDelegateOnLastOrOnlyElement())
+ return eKeyNotHandled;
+
+ m_key_field.FieldDelegateExitCallback();
+ m_selection_type = SelectionType::Value;
+ m_value_field.FieldDelegateSelectFirstElement();
+ return eKeyHandled;
+ }
+
+ HandleCharResult FieldDelegateHandleChar(int key) override {
+ switch (key) {
+ case KEY_RETURN:
+ return SelectNextField(key);
+ case '\t':
+ return SelectNext(key);
+ case KEY_SHIFT_TAB:
+ return SelectPrevious(key);
+ default:
+ break;
+ }
+
+ // If the key wasn't handled, pass the key to the selected field.
+ if (m_selection_type == SelectionType::Key)
+ return m_key_field.FieldDelegateHandleChar(key);
+ else
+ return m_value_field.FieldDelegateHandleChar(key);
+
+ return eKeyNotHandled;
+ }
+
+ bool FieldDelegateOnFirstOrOnlyElement() override {
+ return m_selection_type == SelectionType::Key;
+ }
+
+ bool FieldDelegateOnLastOrOnlyElement() override {
+ return m_selection_type == SelectionType::Value;
+ }
+
+ void FieldDelegateSelectFirstElement() override {
+ m_selection_type = SelectionType::Key;
+ }
+
+ void FieldDelegateSelectLastElement() override {
+ m_selection_type = SelectionType::Value;
+ }
+
+ bool FieldDelegateHasError() override {
+ return m_key_field.FieldDelegateHasError() ||
+ m_value_field.FieldDelegateHasError();
+ }
+
+ KeyFieldDelegateType &GetKeyField() { return m_key_field; }
+
+ ValueFieldDelegateType &GetValueField() { return m_value_field; }
+
+protected:
+ KeyFieldDelegateType m_key_field;
+ ValueFieldDelegateType m_value_field;
+ // See SelectionType class enum.
+ SelectionType m_selection_type;
+};
+
+class EnvironmentVariableNameFieldDelegate : public TextFieldDelegate {
+public:
+ EnvironmentVariableNameFieldDelegate(const char *content)
+ : TextFieldDelegate("Name", content, true) {}
+
+ // Environment variable names can't contain an equal sign.
+ bool IsAcceptableChar(int key) override {
+ return TextFieldDelegate::IsAcceptableChar(key) && key != '=';
+ }
+
+ const std::string &GetName() { return m_content; }
+};
+
+class EnvironmentVariableFieldDelegate
+ : public MappingFieldDelegate<EnvironmentVariableNameFieldDelegate,
+ TextFieldDelegate> {
+public:
+ EnvironmentVariableFieldDelegate()
+ : MappingFieldDelegate(
+ EnvironmentVariableNameFieldDelegate(""),
+ TextFieldDelegate("Value", "", /*required=*/false)) {}
+
+ const std::string &GetName() { return GetKeyField().GetName(); }
+
+ const std::string &GetValue() { return GetValueField().GetText(); }
+
+ void SetName(const char *name) { return GetKeyField().SetText(name); }
+
+ void SetValue(const char *value) { return GetValueField().SetText(value); }
+};
+
+class EnvironmentVariableListFieldDelegate
+ : public ListFieldDelegate<EnvironmentVariableFieldDelegate> {
+public:
+ EnvironmentVariableListFieldDelegate(const char *label)
+ : ListFieldDelegate(label, EnvironmentVariableFieldDelegate()) {}
+
+ Environment GetEnvironment() {
+ Environment environment;
+ for (int i = 0; i < GetNumberOfFields(); i++) {
+ environment.insert(
+ std::make_pair(GetField(i).GetName(), GetField(i).GetValue()));
+ }
+ return environment;
+ }
+
+ void AddEnvironmentVariables(const Environment &environment) {
+ for (auto &variable : environment) {
+ AddNewField();
+ EnvironmentVariableFieldDelegate &field =
+ GetField(GetNumberOfFields() - 1);
+ field.SetName(variable.getKey().str().c_str());
+ field.SetValue(variable.getValue().c_str());
+ }
+ }
+};
+
class FormAction {
public:
FormAction(const char *label, std::function<void(Window &)> action)
@@ -1917,7 +2263,7 @@ public:
}
// Draw a centered [Label].
- void Draw(SubPad &surface, bool is_selected) {
+ void Draw(Surface &surface, bool is_selected) {
int x = (surface.GetWidth() - m_label.length()) / 2;
surface.MoveCursor(x, 0);
if (is_selected)
@@ -1973,6 +2319,7 @@ public:
// action that requires valid fields.
bool CheckFieldsValidity() {
for (int i = 0; i < GetNumberOfFields(); i++) {
+ GetField(i)->FieldDelegateExitCallback();
if (GetField(i)->FieldDelegateHasError()) {
SetError("Some fields are invalid!");
return false;
@@ -2030,6 +2377,14 @@ public:
return delegate;
}
+ LazyBooleanFieldDelegate *AddLazyBooleanField(const char *label,
+ const char *calculate_label) {
+ LazyBooleanFieldDelegate *delegate =
+ new LazyBooleanFieldDelegate(label, calculate_label);
+ m_fields.push_back(FieldDelegateUP(delegate));
+ return delegate;
+ }
+
ChoicesFieldDelegate *AddChoicesField(const char *label, int height,
std::vector<std::string> choices) {
ChoicesFieldDelegate *delegate =
@@ -2059,6 +2414,43 @@ public:
return delegate;
}
+ ArgumentsFieldDelegate *AddArgumentsField() {
+ ArgumentsFieldDelegate *delegate = new ArgumentsFieldDelegate();
+ m_fields.push_back(FieldDelegateUP(delegate));
+ return delegate;
+ }
+
+ template <class K, class V>
+ MappingFieldDelegate<K, V> *AddMappingField(K key_field, V value_field) {
+ MappingFieldDelegate<K, V> *delegate =
+ new MappingFieldDelegate<K, V>(key_field, value_field);
+ m_fields.push_back(FieldDelegateUP(delegate));
+ return delegate;
+ }
+
+ EnvironmentVariableNameFieldDelegate *
+ AddEnvironmentVariableNameField(const char *content) {
+ EnvironmentVariableNameFieldDelegate *delegate =
+ new EnvironmentVariableNameFieldDelegate(content);
+ m_fields.push_back(FieldDelegateUP(delegate));
+ return delegate;
+ }
+
+ EnvironmentVariableFieldDelegate *AddEnvironmentVariableField() {
+ EnvironmentVariableFieldDelegate *delegate =
+ new EnvironmentVariableFieldDelegate();
+ m_fields.push_back(FieldDelegateUP(delegate));
+ return delegate;
+ }
+
+ EnvironmentVariableListFieldDelegate *
+ AddEnvironmentVariableListField(const char *label) {
+ EnvironmentVariableListFieldDelegate *delegate =
+ new EnvironmentVariableListFieldDelegate(label);
+ m_fields.push_back(FieldDelegateUP(delegate));
+ return delegate;
+ }
+
// Factory methods for adding actions.
void AddAction(const char *label, std::function<void(Window &)> action) {
@@ -2156,7 +2548,7 @@ public:
return context;
}
- void UpdateScrolling(DerivedWindow &surface) {
+ void UpdateScrolling(Surface &surface) {
ScrollContext context = GetScrollContext();
int content_height = GetContentHeight();
int surface_height = surface.GetHeight();
@@ -2180,7 +2572,7 @@ public:
}
}
- void DrawError(SubPad &surface) {
+ void DrawError(Surface &surface) {
if (!m_delegate_sp->HasError())
return;
surface.MoveCursor(0, 0);
@@ -2194,7 +2586,7 @@ public:
surface.HorizontalLine(surface.GetWidth());
}
- void DrawFields(SubPad &surface) {
+ void DrawFields(Surface &surface) {
int line = 0;
int width = surface.GetWidth();
bool a_field_is_selected = m_selection_type == SelectionType::Field;
@@ -2205,13 +2597,13 @@ public:
bool is_field_selected = a_field_is_selected && m_selection_index == i;
int height = field->FieldDelegateGetHeight();
Rect bounds = Rect(Point(0, line), Size(width, height));
- SubPad field_surface = SubPad(surface, bounds);
+ Surface field_surface = surface.SubSurface(bounds);
field->FieldDelegateDraw(field_surface, is_field_selected);
line += height;
}
}
- void DrawActions(SubPad &surface) {
+ void DrawActions(Surface &surface) {
int number_of_actions = m_delegate_sp->GetNumberOfActions();
int width = surface.GetWidth() / number_of_actions;
bool an_action_is_selected = m_selection_type == SelectionType::Action;
@@ -2220,19 +2612,19 @@ public:
bool is_action_selected = an_action_is_selected && m_selection_index == i;
FormAction &action = m_delegate_sp->GetAction(i);
Rect bounds = Rect(Point(x, 0), Size(width, 1));
- SubPad action_surface = SubPad(surface, bounds);
+ Surface action_surface = surface.SubSurface(bounds);
action.Draw(action_surface, is_action_selected);
x += width;
}
}
- void DrawElements(SubPad &surface) {
+ void DrawElements(Surface &surface) {
Rect frame = surface.GetFrame();
Rect fields_bounds, actions_bounds;
frame.HorizontalSplit(surface.GetHeight() - GetActionsHeight(),
fields_bounds, actions_bounds);
- SubPad fields_surface = SubPad(surface, fields_bounds);
- SubPad actions_surface = SubPad(surface, actions_bounds);
+ Surface fields_surface = surface.SubSurface(fields_bounds);
+ Surface actions_surface = surface.SubSurface(actions_bounds);
DrawFields(fields_surface);
DrawActions(actions_surface);
@@ -2241,7 +2633,7 @@ public:
// Contents are first drawn on a pad. Then a subset of that pad is copied to
// the derived window starting at the first visible line. This essentially
// provides scrolling functionality.
- void DrawContent(DerivedWindow &surface) {
+ void DrawContent(Surface &surface) {
UpdateScrolling(surface);
int width = surface.GetWidth();
@@ -2251,8 +2643,8 @@ public:
Rect frame = pad.GetFrame();
Rect error_bounds, elements_bounds;
frame.HorizontalSplit(GetErrorHeight(), error_bounds, elements_bounds);
- SubPad error_surface = SubPad(pad, error_bounds);
- SubPad elements_surface = SubPad(pad, elements_bounds);
+ Surface error_surface = pad.SubSurface(error_bounds);
+ Surface elements_surface = pad.SubSurface(elements_bounds);
DrawError(error_surface);
DrawElements(elements_surface);
@@ -2262,17 +2654,28 @@ public:
Size(width, copy_height));
}
+ void DrawSubmitHint(Surface &surface, bool is_active) {
+ surface.MoveCursor(2, surface.GetHeight() - 1);
+ if (is_active)
+ surface.AttributeOn(A_BOLD | COLOR_PAIR(BlackOnWhite));
+ surface.Printf("[Press Alt+Enter to %s]",
+ m_delegate_sp->GetAction(0).GetLabel().c_str());
+ if (is_active)
+ surface.AttributeOff(A_BOLD | COLOR_PAIR(BlackOnWhite));
+ }
+
bool WindowDelegateDraw(Window &window, bool force) override {
m_delegate_sp->UpdateFieldsVisibility();
window.Erase();
window.DrawTitleBox(m_delegate_sp->GetName().c_str(),
- "Press Esc to cancel");
+ "Press Esc to Cancel");
+ DrawSubmitHint(window, window.IsActive());
Rect content_bounds = window.GetFrame();
content_bounds.Inset(2, 2);
- DerivedWindow content_surface = DerivedWindow(window, content_bounds);
+ Surface content_surface = window.SubSurface(content_bounds);
DrawContent(content_surface);
return true;
@@ -2391,8 +2794,8 @@ public:
return eKeyHandled;
}
- void ExecuteAction(Window &window) {
- FormAction &action = m_delegate_sp->GetAction(m_selection_index);
+ void ExecuteAction(Window &window, int index) {
+ FormAction &action = m_delegate_sp->GetAction(index);
action.Execute(window);
if (m_delegate_sp->HasError()) {
m_first_visible_line = 0;
@@ -2401,20 +2804,27 @@ public:
}
}
+ // Always return eKeyHandled to absorb all events since forms are always
+ // added as pop-ups that should take full control until canceled or submitted.
HandleCharResult WindowDelegateHandleChar(Window &window, int key) override {
switch (key) {
case '\r':
case '\n':
case KEY_ENTER:
if (m_selection_type == SelectionType::Action) {
- ExecuteAction(window);
+ ExecuteAction(window, m_selection_index);
return eKeyHandled;
}
break;
+ case KEY_ALT_ENTER:
+ ExecuteAction(window, 0);
+ return eKeyHandled;
case '\t':
- return SelectNext(key);
+ SelectNext(key);
+ return eKeyHandled;
case KEY_SHIFT_TAB:
- return SelectPrevious(key);
+ SelectPrevious(key);
+ return eKeyHandled;
case KEY_ESCAPE:
window.GetParent()->RemoveSubWindow(&window);
return eKeyHandled;
@@ -2426,10 +2836,24 @@ public:
// to that field.
if (m_selection_type == SelectionType::Field) {
FieldDelegate *field = m_delegate_sp->GetField(m_selection_index);
- return field->FieldDelegateHandleChar(key);
+ if (field->FieldDelegateHandleChar(key) == eKeyHandled)
+ return eKeyHandled;
}
- return eKeyNotHandled;
+ // If the key wasn't handled by the possibly selected field, handle some
+ // extra keys for navigation.
+ switch (key) {
+ case KEY_DOWN:
+ SelectNext(key);
+ return eKeyHandled;
+ case KEY_UP:
+ SelectPrevious(key);
+ return eKeyHandled;
+ default:
+ break;
+ }
+
+ return eKeyHandled;
}
protected:
@@ -2651,6 +3075,801 @@ protected:
ProcessPluginFieldDelegate *m_plugin_field;
};
+class TargetCreateFormDelegate : public FormDelegate {
+public:
+ TargetCreateFormDelegate(Debugger &debugger) : m_debugger(debugger) {
+ m_executable_field = AddFileField("Executable", "", /*need_to_exist=*/true,
+ /*required=*/true);
+ m_core_file_field = AddFileField("Core File", "", /*need_to_exist=*/true,
+ /*required=*/false);
+ m_symbol_file_field = AddFileField(
+ "Symbol File", "", /*need_to_exist=*/true, /*required=*/false);
+ m_show_advanced_field = AddBooleanField("Show advanced settings.", false);
+ m_remote_file_field = AddFileField(
+ "Remote File", "", /*need_to_exist=*/false, /*required=*/false);
+ m_arch_field = AddArchField("Architecture", "", /*required=*/false);
+ m_platform_field = AddPlatformPluginField(debugger);
+ m_load_dependent_files_field =
+ AddChoicesField("Load Dependents", 3, GetLoadDependentFilesChoices());
+
+ AddAction("Create", [this](Window &window) { CreateTarget(window); });
+ }
+
+ std::string GetName() override { return "Create Target"; }
+
+ void UpdateFieldsVisibility() override {
+ if (m_show_advanced_field->GetBoolean()) {
+ m_remote_file_field->FieldDelegateShow();
+ m_arch_field->FieldDelegateShow();
+ m_platform_field->FieldDelegateShow();
+ m_load_dependent_files_field->FieldDelegateShow();
+ } else {
+ m_remote_file_field->FieldDelegateHide();
+ m_arch_field->FieldDelegateHide();
+ m_platform_field->FieldDelegateHide();
+ m_load_dependent_files_field->FieldDelegateHide();
+ }
+ }
+
+ static constexpr const char *kLoadDependentFilesNo = "No";
+ static constexpr const char *kLoadDependentFilesYes = "Yes";
+ static constexpr const char *kLoadDependentFilesExecOnly = "Executable only";
+
+ std::vector<std::string> GetLoadDependentFilesChoices() {
+ std::vector<std::string> load_depentents_options;
+ load_depentents_options.push_back(kLoadDependentFilesExecOnly);
+ load_depentents_options.push_back(kLoadDependentFilesYes);
+ load_depentents_options.push_back(kLoadDependentFilesNo);
+ return load_depentents_options;
+ }
+
+ LoadDependentFiles GetLoadDependentFiles() {
+ std::string choice = m_load_dependent_files_field->GetChoiceContent();
+ if (choice == kLoadDependentFilesNo)
+ return eLoadDependentsNo;
+ if (choice == kLoadDependentFilesYes)
+ return eLoadDependentsYes;
+ return eLoadDependentsDefault;
+ }
+
+ OptionGroupPlatform GetPlatformOptions() {
+ OptionGroupPlatform platform_options(false);
+ platform_options.SetPlatformName(m_platform_field->GetPluginName().c_str());
+ return platform_options;
+ }
+
+ TargetSP GetTarget() {
+ OptionGroupPlatform platform_options = GetPlatformOptions();
+ TargetSP target_sp;
+ Status status = m_debugger.GetTargetList().CreateTarget(
+ m_debugger, m_executable_field->GetPath(),
+ m_arch_field->GetArchString(), GetLoadDependentFiles(),
+ &platform_options, target_sp);
+
+ if (status.Fail()) {
+ SetError(status.AsCString());
+ return nullptr;
+ }
+
+ m_debugger.GetTargetList().SetSelectedTarget(target_sp);
+
+ return target_sp;
+ }
+
+ void SetSymbolFile(TargetSP target_sp) {
+ if (!m_symbol_file_field->IsSpecified())
+ return;
+
+ ModuleSP module_sp(target_sp->GetExecutableModule());
+ if (!module_sp)
+ return;
+
+ module_sp->SetSymbolFileFileSpec(
+ m_symbol_file_field->GetResolvedFileSpec());
+ }
+
+ void SetCoreFile(TargetSP target_sp) {
+ if (!m_core_file_field->IsSpecified())
+ return;
+
+ FileSpec core_file_spec = m_core_file_field->GetResolvedFileSpec();
+
+ FileSpec core_file_directory_spec;
+ core_file_directory_spec.GetDirectory() = core_file_spec.GetDirectory();
+ target_sp->AppendExecutableSearchPaths(core_file_directory_spec);
+
+ ProcessSP process_sp(target_sp->CreateProcess(
+ m_debugger.GetListener(), llvm::StringRef(), &core_file_spec, false));
+
+ if (!process_sp) {
+ SetError("Unable to find process plug-in for core file!");
+ return;
+ }
+
+ Status status = process_sp->LoadCore();
+ if (status.Fail()) {
+ SetError("Can't find plug-in for core file!");
+ return;
+ }
+ }
+
+ void SetRemoteFile(TargetSP target_sp) {
+ if (!m_remote_file_field->IsSpecified())
+ return;
+
+ ModuleSP module_sp(target_sp->GetExecutableModule());
+ if (!module_sp)
+ return;
+
+ FileSpec remote_file_spec = m_remote_file_field->GetFileSpec();
+ module_sp->SetPlatformFileSpec(remote_file_spec);
+ }
+
+ void RemoveTarget(TargetSP target_sp) {
+ m_debugger.GetTargetList().DeleteTarget(target_sp);
+ }
+
+ void CreateTarget(Window &window) {
+ ClearError();
+
+ bool all_fields_are_valid = CheckFieldsValidity();
+ if (!all_fields_are_valid)
+ return;
+
+ TargetSP target_sp = GetTarget();
+ if (HasError())
+ return;
+
+ SetSymbolFile(target_sp);
+ if (HasError()) {
+ RemoveTarget(target_sp);
+ return;
+ }
+
+ SetCoreFile(target_sp);
+ if (HasError()) {
+ RemoveTarget(target_sp);
+ return;
+ }
+
+ SetRemoteFile(target_sp);
+ if (HasError()) {
+ RemoveTarget(target_sp);
+ return;
+ }
+
+ window.GetParent()->RemoveSubWindow(&window);
+ }
+
+protected:
+ Debugger &m_debugger;
+
+ FileFieldDelegate *m_executable_field;
+ FileFieldDelegate *m_core_file_field;
+ FileFieldDelegate *m_symbol_file_field;
+ BooleanFieldDelegate *m_show_advanced_field;
+ FileFieldDelegate *m_remote_file_field;
+ ArchFieldDelegate *m_arch_field;
+ PlatformPluginFieldDelegate *m_platform_field;
+ ChoicesFieldDelegate *m_load_dependent_files_field;
+};
+
+class ProcessLaunchFormDelegate : public FormDelegate {
+public:
+ ProcessLaunchFormDelegate(Debugger &debugger, WindowSP main_window_sp)
+ : m_debugger(debugger), m_main_window_sp(main_window_sp) {
+
+ m_arguments_field = AddArgumentsField();
+ SetArgumentsFieldDefaultValue();
+ m_target_environment_field =
+ AddEnvironmentVariableListField("Target Environment Variables");
+ SetTargetEnvironmentFieldDefaultValue();
+ m_working_directory_field = AddDirectoryField(
+ "Working Directory", GetDefaultWorkingDirectory().c_str(), true, false);
+
+ m_show_advanced_field = AddBooleanField("Show advanced settings.", false);
+
+ m_stop_at_entry_field = AddBooleanField("Stop at entry point.", false);
+ m_detach_on_error_field =
+ AddBooleanField("Detach on error.", GetDefaultDetachOnError());
+ m_disable_aslr_field =
+ AddBooleanField("Disable ASLR", GetDefaultDisableASLR());
+ m_plugin_field = AddProcessPluginField();
+ m_arch_field = AddArchField("Architecture", "", false);
+ m_shell_field = AddFileField("Shell", "", true, false);
+ m_expand_shell_arguments_field =
+ AddBooleanField("Expand shell arguments.", false);
+
+ m_disable_standard_io_field =
+ AddBooleanField("Disable Standard IO", GetDefaultDisableStandardIO());
+ m_standard_output_field =
+ AddFileField("Standard Output File", "", /*need_to_exist=*/false,
+ /*required=*/false);
+ m_standard_error_field =
+ AddFileField("Standard Error File", "", /*need_to_exist=*/false,
+ /*required=*/false);
+ m_standard_input_field =
+ AddFileField("Standard Input File", "", /*need_to_exist=*/false,
+ /*required=*/false);
+
+ m_show_inherited_environment_field =
+ AddBooleanField("Show inherited environment variables.", false);
+ m_inherited_environment_field =
+ AddEnvironmentVariableListField("Inherited Environment Variables");
+ SetInheritedEnvironmentFieldDefaultValue();
+
+ AddAction("Launch", [this](Window &window) { Launch(window); });
+ }
+
+ std::string GetName() override { return "Launch Process"; }
+
+ void UpdateFieldsVisibility() override {
+ if (m_show_advanced_field->GetBoolean()) {
+ m_stop_at_entry_field->FieldDelegateShow();
+ m_detach_on_error_field->FieldDelegateShow();
+ m_disable_aslr_field->FieldDelegateShow();
+ m_plugin_field->FieldDelegateShow();
+ m_arch_field->FieldDelegateShow();
+ m_shell_field->FieldDelegateShow();
+ m_expand_shell_arguments_field->FieldDelegateShow();
+ m_disable_standard_io_field->FieldDelegateShow();
+ if (m_disable_standard_io_field->GetBoolean()) {
+ m_standard_input_field->FieldDelegateHide();
+ m_standard_output_field->FieldDelegateHide();
+ m_standard_error_field->FieldDelegateHide();
+ } else {
+ m_standard_input_field->FieldDelegateShow();
+ m_standard_output_field->FieldDelegateShow();
+ m_standard_error_field->FieldDelegateShow();
+ }
+ m_show_inherited_environment_field->FieldDelegateShow();
+ if (m_show_inherited_environment_field->GetBoolean())
+ m_inherited_environment_field->FieldDelegateShow();
+ else
+ m_inherited_environment_field->FieldDelegateHide();
+ } else {
+ m_stop_at_entry_field->FieldDelegateHide();
+ m_detach_on_error_field->FieldDelegateHide();
+ m_disable_aslr_field->FieldDelegateHide();
+ m_plugin_field->FieldDelegateHide();
+ m_arch_field->FieldDelegateHide();
+ m_shell_field->FieldDelegateHide();
+ m_expand_shell_arguments_field->FieldDelegateHide();
+ m_disable_standard_io_field->FieldDelegateHide();
+ m_standard_input_field->FieldDelegateHide();
+ m_standard_output_field->FieldDelegateHide();
+ m_standard_error_field->FieldDelegateHide();
+ m_show_inherited_environment_field->FieldDelegateHide();
+ m_inherited_environment_field->FieldDelegateHide();
+ }
+ }
+
+ // Methods for setting the default value of the fields.
+
+ void SetArgumentsFieldDefaultValue() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return;
+
+ const Args &target_arguments =
+ target->GetProcessLaunchInfo().GetArguments();
+ m_arguments_field->AddArguments(target_arguments);
+ }
+
+ void SetTargetEnvironmentFieldDefaultValue() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return;
+
+ const Environment &target_environment = target->GetTargetEnvironment();
+ m_target_environment_field->AddEnvironmentVariables(target_environment);
+ }
+
+ void SetInheritedEnvironmentFieldDefaultValue() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return;
+
+ const Environment &inherited_environment =
+ target->GetInheritedEnvironment();
+ m_inherited_environment_field->AddEnvironmentVariables(
+ inherited_environment);
+ }
+
+ std::string GetDefaultWorkingDirectory() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return "";
+
+ PlatformSP platform = target->GetPlatform();
+ return platform->GetWorkingDirectory().GetPath();
+ }
+
+ bool GetDefaultDisableASLR() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return false;
+
+ return target->GetDisableASLR();
+ }
+
+ bool GetDefaultDisableStandardIO() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return true;
+
+ return target->GetDisableSTDIO();
+ }
+
+ bool GetDefaultDetachOnError() {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (target == nullptr)
+ return true;
+
+ return target->GetDetachOnError();
+ }
+
+ // Methods for getting the necessary information and setting them to the
+ // ProcessLaunchInfo.
+
+ void GetExecutableSettings(ProcessLaunchInfo &launch_info) {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ ModuleSP executable_module = target->GetExecutableModule();
+ llvm::StringRef target_settings_argv0 = target->GetArg0();
+
+ if (!target_settings_argv0.empty()) {
+ launch_info.GetArguments().AppendArgument(target_settings_argv0);
+ launch_info.SetExecutableFile(executable_module->GetPlatformFileSpec(),
+ false);
+ return;
+ }
+
+ launch_info.SetExecutableFile(executable_module->GetPlatformFileSpec(),
+ true);
+ }
+
+ void GetArguments(ProcessLaunchInfo &launch_info) {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ Args arguments = m_arguments_field->GetArguments();
+ launch_info.GetArguments().AppendArguments(arguments);
+ }
+
+ void GetEnvironment(ProcessLaunchInfo &launch_info) {
+ Environment target_environment =
+ m_target_environment_field->GetEnvironment();
+ Environment inherited_environment =
+ m_inherited_environment_field->GetEnvironment();
+ launch_info.GetEnvironment().insert(target_environment.begin(),
+ target_environment.end());
+ launch_info.GetEnvironment().insert(inherited_environment.begin(),
+ inherited_environment.end());
+ }
+
+ void GetWorkingDirectory(ProcessLaunchInfo &launch_info) {
+ if (m_working_directory_field->IsSpecified())
+ launch_info.SetWorkingDirectory(
+ m_working_directory_field->GetResolvedFileSpec());
+ }
+
+ void GetStopAtEntry(ProcessLaunchInfo &launch_info) {
+ if (m_stop_at_entry_field->GetBoolean())
+ launch_info.GetFlags().Set(eLaunchFlagStopAtEntry);
+ else
+ launch_info.GetFlags().Clear(eLaunchFlagStopAtEntry);
+ }
+
+ void GetDetachOnError(ProcessLaunchInfo &launch_info) {
+ if (m_detach_on_error_field->GetBoolean())
+ launch_info.GetFlags().Set(eLaunchFlagDetachOnError);
+ else
+ launch_info.GetFlags().Clear(eLaunchFlagDetachOnError);
+ }
+
+ void GetDisableASLR(ProcessLaunchInfo &launch_info) {
+ if (m_disable_aslr_field->GetBoolean())
+ launch_info.GetFlags().Set(eLaunchFlagDisableASLR);
+ else
+ launch_info.GetFlags().Clear(eLaunchFlagDisableASLR);
+ }
+
+ void GetPlugin(ProcessLaunchInfo &launch_info) {
+ launch_info.SetProcessPluginName(m_plugin_field->GetPluginName());
+ }
+
+ void GetArch(ProcessLaunchInfo &launch_info) {
+ if (!m_arch_field->IsSpecified())
+ return;
+
+ TargetSP target_sp = m_debugger.GetSelectedTarget();
+ PlatformSP platform_sp =
+ target_sp ? target_sp->GetPlatform() : PlatformSP();
+ launch_info.GetArchitecture() = Platform::GetAugmentedArchSpec(
+ platform_sp.get(), m_arch_field->GetArchString());
+ }
+
+ void GetShell(ProcessLaunchInfo &launch_info) {
+ if (!m_shell_field->IsSpecified())
+ return;
+
+ launch_info.SetShell(m_shell_field->GetResolvedFileSpec());
+ launch_info.SetShellExpandArguments(
+ m_expand_shell_arguments_field->GetBoolean());
+ }
+
+ void GetStandardIO(ProcessLaunchInfo &launch_info) {
+ if (m_disable_standard_io_field->GetBoolean()) {
+ launch_info.GetFlags().Set(eLaunchFlagDisableSTDIO);
+ return;
+ }
+
+ FileAction action;
+ if (m_standard_input_field->IsSpecified()) {
+ action.Open(STDIN_FILENO, m_standard_input_field->GetFileSpec(), true,
+ false);
+ launch_info.AppendFileAction(action);
+ }
+ if (m_standard_output_field->IsSpecified()) {
+ action.Open(STDOUT_FILENO, m_standard_output_field->GetFileSpec(), false,
+ true);
+ launch_info.AppendFileAction(action);
+ }
+ if (m_standard_error_field->IsSpecified()) {
+ action.Open(STDERR_FILENO, m_standard_error_field->GetFileSpec(), false,
+ true);
+ launch_info.AppendFileAction(action);
+ }
+ }
+
+ void GetInheritTCC(ProcessLaunchInfo &launch_info) {
+ if (m_debugger.GetSelectedTarget()->GetInheritTCC())
+ launch_info.GetFlags().Set(eLaunchFlagInheritTCCFromParent);
+ }
+
+ ProcessLaunchInfo GetLaunchInfo() {
+ ProcessLaunchInfo launch_info;
+
+ GetExecutableSettings(launch_info);
+ GetArguments(launch_info);
+ GetEnvironment(launch_info);
+ GetWorkingDirectory(launch_info);
+ GetStopAtEntry(launch_info);
+ GetDetachOnError(launch_info);
+ GetDisableASLR(launch_info);
+ GetPlugin(launch_info);
+ GetArch(launch_info);
+ GetShell(launch_info);
+ GetStandardIO(launch_info);
+ GetInheritTCC(launch_info);
+
+ return launch_info;
+ }
+
+ bool StopRunningProcess() {
+ ExecutionContext exe_ctx =
+ m_debugger.GetCommandInterpreter().GetExecutionContext();
+
+ if (!exe_ctx.HasProcessScope())
+ return false;
+
+ Process *process = exe_ctx.GetProcessPtr();
+ if (!(process && process->IsAlive()))
+ return false;
+
+ FormDelegateSP form_delegate_sp =
+ FormDelegateSP(new DetachOrKillProcessFormDelegate(process));
+ Rect bounds = m_main_window_sp->GetCenteredRect(85, 8);
+ WindowSP form_window_sp = m_main_window_sp->CreateSubWindow(
+ form_delegate_sp->GetName().c_str(), bounds, true);
+ WindowDelegateSP window_delegate_sp =
+ WindowDelegateSP(new FormWindowDelegate(form_delegate_sp));
+ form_window_sp->SetDelegate(window_delegate_sp);
+
+ return true;
+ }
+
+ Target *GetTarget() {
+ Target *target = m_debugger.GetSelectedTarget().get();
+
+ if (target == nullptr) {
+ SetError("No target exists!");
+ return nullptr;
+ }
+
+ ModuleSP exe_module_sp = target->GetExecutableModule();
+
+ if (exe_module_sp == nullptr) {
+ SetError("No executable in target!");
+ return nullptr;
+ }
+
+ return target;
+ }
+
+ void Launch(Window &window) {
+ ClearError();
+
+ bool all_fields_are_valid = CheckFieldsValidity();
+ if (!all_fields_are_valid)
+ return;
+
+ bool process_is_running = StopRunningProcess();
+ if (process_is_running)
+ return;
+
+ Target *target = GetTarget();
+ if (HasError())
+ return;
+
+ StreamString stream;
+ ProcessLaunchInfo launch_info = GetLaunchInfo();
+ Status status = target->Launch(launch_info, &stream);
+
+ if (status.Fail()) {
+ SetError(status.AsCString());
+ return;
+ }
+
+ ProcessSP process_sp(target->GetProcessSP());
+ if (!process_sp) {
+ SetError("Launched successfully but target has no process!");
+ return;
+ }
+
+ window.GetParent()->RemoveSubWindow(&window);
+ }
+
+protected:
+ Debugger &m_debugger;
+ WindowSP m_main_window_sp;
+
+ ArgumentsFieldDelegate *m_arguments_field;
+ EnvironmentVariableListFieldDelegate *m_target_environment_field;
+ DirectoryFieldDelegate *m_working_directory_field;
+
+ BooleanFieldDelegate *m_show_advanced_field;
+
+ BooleanFieldDelegate *m_stop_at_entry_field;
+ BooleanFieldDelegate *m_detach_on_error_field;
+ BooleanFieldDelegate *m_disable_aslr_field;
+ ProcessPluginFieldDelegate *m_plugin_field;
+ ArchFieldDelegate *m_arch_field;
+ FileFieldDelegate *m_shell_field;
+ BooleanFieldDelegate *m_expand_shell_arguments_field;
+ BooleanFieldDelegate *m_disable_standard_io_field;
+ FileFieldDelegate *m_standard_input_field;
+ FileFieldDelegate *m_standard_output_field;
+ FileFieldDelegate *m_standard_error_field;
+
+ BooleanFieldDelegate *m_show_inherited_environment_field;
+ EnvironmentVariableListFieldDelegate *m_inherited_environment_field;
+};
+
+////////////
+// Searchers
+////////////
+
+class SearcherDelegate {
+public:
+ SearcherDelegate() {}
+
+ virtual ~SearcherDelegate() = default;
+
+ virtual int GetNumberOfMatches() = 0;
+
+ // Get the string that will be displayed for the match at the input index.
+ virtual const std::string &GetMatchTextAtIndex(int index) = 0;
+
+ // Update the matches of the search. This is executed every time the text
+ // field handles an event.
+ virtual void UpdateMatches(const std::string &text) = 0;
+
+ // Execute the user callback given the index of some match. This is executed
+ // once the user selects a match.
+ virtual void ExecuteCallback(int match_index) = 0;
+};
+
+typedef std::shared_ptr<SearcherDelegate> SearcherDelegateSP;
+
+class SearcherWindowDelegate : public WindowDelegate {
+public:
+ SearcherWindowDelegate(SearcherDelegateSP &delegate_sp)
+ : m_delegate_sp(delegate_sp), m_text_field("Search", "", false),
+ m_selected_match(0), m_first_visible_match(0) {
+ ;
+ }
+
+ // A completion window is padded by one character from all sides. A text field
+ // is first drawn for inputting the searcher request, then a list of matches
+ // are displayed in a scrollable list.
+ //
+ // ___<Searcher Window Name>____________________________
+ // | |
+ // | __[Search]_______________________________________ |
+ // | | | |
+ // | |_______________________________________________| |
+ // | - Match 1. |
+ // | - Match 2. |
+ // | - ... |
+ // | |
+ // |____________________________[Press Esc to Cancel]__|
+ //
+
+ // Get the index of the last visible match. Assuming at least one match
+ // exists.
+ int GetLastVisibleMatch(int height) {
+ int index = m_first_visible_match + height;
+ return std::min(index, m_delegate_sp->GetNumberOfMatches()) - 1;
+ }
+
+ int GetNumberOfVisibleMatches(int height) {
+ return GetLastVisibleMatch(height) - m_first_visible_match + 1;
+ }
+
+ void UpdateScrolling(Surface &surface) {
+ if (m_selected_match < m_first_visible_match) {
+ m_first_visible_match = m_selected_match;
+ return;
+ }
+
+ int height = surface.GetHeight();
+ int last_visible_match = GetLastVisibleMatch(height);
+ if (m_selected_match > last_visible_match) {
+ m_first_visible_match = m_selected_match - height + 1;
+ }
+ }
+
+ void DrawMatches(Surface &surface) {
+ if (m_delegate_sp->GetNumberOfMatches() == 0)
+ return;
+
+ UpdateScrolling(surface);
+
+ int count = GetNumberOfVisibleMatches(surface.GetHeight());
+ for (int i = 0; i < count; i++) {
+ surface.MoveCursor(1, i);
+ int current_match = m_first_visible_match + i;
+ if (current_match == m_selected_match)
+ surface.AttributeOn(A_REVERSE);
+ surface.PutCString(
+ m_delegate_sp->GetMatchTextAtIndex(current_match).c_str());
+ if (current_match == m_selected_match)
+ surface.AttributeOff(A_REVERSE);
+ }
+ }
+
+ void DrawContent(Surface &surface) {
+ Rect content_bounds = surface.GetFrame();
+ Rect text_field_bounds, matchs_bounds;
+ content_bounds.HorizontalSplit(m_text_field.FieldDelegateGetHeight(),
+ text_field_bounds, matchs_bounds);
+ Surface text_field_surface = surface.SubSurface(text_field_bounds);
+ Surface matches_surface = surface.SubSurface(matchs_bounds);
+
+ m_text_field.FieldDelegateDraw(text_field_surface, true);
+ DrawMatches(matches_surface);
+ }
+
+ bool WindowDelegateDraw(Window &window, bool force) override {
+ window.Erase();
+
+ window.DrawTitleBox(window.GetName(), "Press Esc to Cancel");
+
+ Rect content_bounds = window.GetFrame();
+ content_bounds.Inset(2, 2);
+ Surface content_surface = window.SubSurface(content_bounds);
+
+ DrawContent(content_surface);
+ return true;
+ }
+
+ void SelectNext() {
+ if (m_selected_match != m_delegate_sp->GetNumberOfMatches() - 1)
+ m_selected_match++;
+ return;
+ }
+
+ void SelectPrevious() {
+ if (m_selected_match != 0)
+ m_selected_match--;
+ return;
+ }
+
+ void ExecuteCallback(Window &window) {
+ m_delegate_sp->ExecuteCallback(m_selected_match);
+ window.GetParent()->RemoveSubWindow(&window);
+ }
+
+ void UpdateMatches() {
+ m_delegate_sp->UpdateMatches(m_text_field.GetText());
+ m_selected_match = 0;
+ }
+
+ HandleCharResult WindowDelegateHandleChar(Window &window, int key) override {
+ switch (key) {
+ case '\r':
+ case '\n':
+ case KEY_ENTER:
+ ExecuteCallback(window);
+ return eKeyHandled;
+ case '\t':
+ case KEY_DOWN:
+ SelectNext();
+ return eKeyHandled;
+ case KEY_SHIFT_TAB:
+ case KEY_UP:
+ SelectPrevious();
+ return eKeyHandled;
+ case KEY_ESCAPE:
+ window.GetParent()->RemoveSubWindow(&window);
+ return eKeyHandled;
+ default:
+ break;
+ }
+
+ if (m_text_field.FieldDelegateHandleChar(key) == eKeyHandled)
+ UpdateMatches();
+
+ return eKeyHandled;
+ }
+
+protected:
+ SearcherDelegateSP m_delegate_sp;
+ TextFieldDelegate m_text_field;
+ // The index of the currently selected match.
+ int m_selected_match;
+ // The index of the first visible match.
+ int m_first_visible_match;
+};
+
+//////////////////////////////
+// Searcher Delegate Instances
+//////////////////////////////
+
+// This is a searcher delegate wrapper around CommandCompletions common
+// callbacks. The callbacks are only given the match string. The completion_mask
+// can be a combination of CommonCompletionTypes.
+class CommonCompletionSearcherDelegate : public SearcherDelegate {
+public:
+ typedef std::function<void(const std::string &)> CallbackType;
+
+ CommonCompletionSearcherDelegate(Debugger &debugger, uint32_t completion_mask,
+ CallbackType callback)
+ : m_debugger(debugger), m_completion_mask(completion_mask),
+ m_callback(callback) {}
+
+ int GetNumberOfMatches() override { return m_matches.GetSize(); }
+
+ const std::string &GetMatchTextAtIndex(int index) override {
+ return m_matches[index];
+ }
+
+ void UpdateMatches(const std::string &text) override {
+ CompletionResult result;
+ CompletionRequest request(text.c_str(), text.size(), result);
+ CommandCompletions::InvokeCommonCompletionCallbacks(
+ m_debugger.GetCommandInterpreter(), m_completion_mask, request,
+ nullptr);
+ result.GetMatches(m_matches);
+ }
+
+ void ExecuteCallback(int match_index) override {
+ m_callback(m_matches[match_index]);
+ }
+
+protected:
+ Debugger &m_debugger;
+ // A compound mask from CommonCompletionTypes.
+ uint32_t m_completion_mask;
+ // A callback to execute once the user selects a match. The match is passed to
+ // the callback as a string.
+ CallbackType m_callback;
+ StringList m_matches;
+};
+
+////////
+// Menus
+////////
+
class MenuDelegate {
public:
virtual ~MenuDelegate() = default;
@@ -3078,15 +4297,15 @@ public:
bool done = false;
int delay_in_tenths_of_a_second = 1;
- // Alas the threading model in curses is a bit lame so we need to resort to
- // polling every 0.5 seconds. We could poll for stdin ourselves and then
- // pass the keys down but then we need to translate all of the escape
+ // Alas the threading model in curses is a bit lame so we need to resort
+ // to polling every 0.5 seconds. We could poll for stdin ourselves and
+ // then pass the keys down but then we need to translate all of the escape
// sequences ourselves. So we resort to polling for input because we need
// to receive async process events while in this loop.
- halfdelay(delay_in_tenths_of_a_second); // Poll using some number of tenths
- // of seconds seconds when calling
- // Window::GetChar()
+ halfdelay(delay_in_tenths_of_a_second); // Poll using some number of
+ // tenths of seconds seconds when
+ // calling Window::GetChar()
ListenerSP listener_sp(
Listener::MakeListener("lldb.IOHandler.curses.Application"));
@@ -3392,9 +4611,14 @@ public:
TreeItem *&selected_item) {
return;
}
- virtual bool TreeDelegateItemSelected(
- TreeItem &item) = 0; // Return true if we need to update views
+ // This is invoked when a tree item is selected. If true is returned, the
+ // views are updated.
+ virtual bool TreeDelegateItemSelected(TreeItem &item) = 0;
virtual bool TreeDelegateExpandRootByDefault() { return false; }
+ // This is mostly useful for root tree delegates. If false is returned,
+ // drawing will be skipped completely. This is needed, for instance, in
+ // skipping drawing of the threads tree if there is no running process.
+ virtual bool TreeDelegateShouldDraw() { return true; }
};
typedef std::shared_ptr<TreeDelegate> TreeDelegateSP;
@@ -3584,6 +4808,16 @@ public:
void SetIdentifier(uint64_t identifier) { m_identifier = identifier; }
+ const std::string &GetText() const { return m_text; }
+
+ void SetText(const char *text) {
+ if (text == nullptr) {
+ m_text.clear();
+ return;
+ }
+ m_text = text;
+ }
+
void SetMightHaveChildren(bool b) { m_might_have_children = b; }
protected:
@@ -3591,6 +4825,7 @@ protected:
TreeDelegate &m_delegate;
void *m_user_data;
uint64_t m_identifier;
+ std::string m_text;
int m_row_idx; // Zero based visible row index, -1 if not visible or for the
// root item
std::vector<TreeItem> m_children;
@@ -3609,21 +4844,6 @@ public:
int NumVisibleRows() const { return m_max_y - m_min_y; }
bool WindowDelegateDraw(Window &window, bool force) override {
- ExecutionContext exe_ctx(
- m_debugger.GetCommandInterpreter().GetExecutionContext());
- Process *process = exe_ctx.GetProcessPtr();
-
- bool display_content = false;
- if (process) {
- StateType state = process->GetState();
- if (StateIsStoppedState(state, true)) {
- // We are stopped, so it is ok to
- display_content = true;
- } else if (StateIsRunningState(state)) {
- return true; // Don't do any updating when we are running
- }
- }
-
m_min_x = 2;
m_min_y = 1;
m_max_x = window.GetWidth() - 1;
@@ -3632,35 +4852,36 @@ public:
window.Erase();
window.DrawTitleBox(window.GetName());
- if (display_content) {
- const int num_visible_rows = NumVisibleRows();
- m_num_rows = 0;
- m_root.CalculateRowIndexes(m_num_rows);
- m_delegate_sp->TreeDelegateUpdateSelection(m_root, m_selected_row_idx,
- m_selected_item);
-
- // If we unexpanded while having something selected our total number of
- // rows is less than the num visible rows, then make sure we show all the
- // rows by setting the first visible row accordingly.
- if (m_first_visible_row > 0 && m_num_rows < num_visible_rows)
- m_first_visible_row = 0;
-
- // Make sure the selected row is always visible
- if (m_selected_row_idx < m_first_visible_row)
- m_first_visible_row = m_selected_row_idx;
- else if (m_first_visible_row + num_visible_rows <= m_selected_row_idx)
- m_first_visible_row = m_selected_row_idx - num_visible_rows + 1;
-
- int row_idx = 0;
- int num_rows_left = num_visible_rows;
- m_root.Draw(window, m_first_visible_row, m_selected_row_idx, row_idx,
- num_rows_left);
- // Get the selected row
- m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx);
- } else {
+ if (!m_delegate_sp->TreeDelegateShouldDraw()) {
m_selected_item = nullptr;
+ return true;
}
+ const int num_visible_rows = NumVisibleRows();
+ m_num_rows = 0;
+ m_root.CalculateRowIndexes(m_num_rows);
+ m_delegate_sp->TreeDelegateUpdateSelection(m_root, m_selected_row_idx,
+ m_selected_item);
+
+ // If we unexpanded while having something selected our total number of
+ // rows is less than the num visible rows, then make sure we show all the
+ // rows by setting the first visible row accordingly.
+ if (m_first_visible_row > 0 && m_num_rows < num_visible_rows)
+ m_first_visible_row = 0;
+
+ // Make sure the selected row is always visible
+ if (m_selected_row_idx < m_first_visible_row)
+ m_first_visible_row = m_selected_row_idx;
+ else if (m_first_visible_row + num_visible_rows <= m_selected_row_idx)
+ m_first_visible_row = m_selected_row_idx - num_visible_rows + 1;
+
+ int row_idx = 0;
+ int num_rows_left = num_visible_rows;
+ m_root.Draw(window, m_first_visible_row, m_selected_row_idx, row_idx,
+ num_rows_left);
+ // Get the selected row
+ m_selected_item = m_root.GetItemForRowIndex(m_selected_row_idx);
+
return true; // Drawing handled
}
@@ -3788,6 +5009,23 @@ protected:
int m_max_y;
};
+// A tree delegate that just draws the text member of the tree item, it doesn't
+// have any children or actions.
+class TextTreeDelegate : public TreeDelegate {
+public:
+ TextTreeDelegate() : TreeDelegate() {}
+
+ ~TextTreeDelegate() override = default;
+
+ void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override {
+ window.PutCStringTruncated(1, item.GetText().c_str());
+ }
+
+ void TreeDelegateGenerateChildren(TreeItem &item) override {}
+
+ bool TreeDelegateItemSelected(TreeItem &item) override { return false; }
+};
+
class FrameTreeDelegate : public TreeDelegate {
public:
FrameTreeDelegate() : TreeDelegate() {
@@ -3952,6 +5190,17 @@ public:
.GetProcessSP();
}
+ bool TreeDelegateShouldDraw() override {
+ ProcessSP process = GetProcess();
+ if (!process)
+ return false;
+
+ if (StateIsRunningState(process->GetState()))
+ return false;
+
+ return true;
+ }
+
void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override {
ProcessSP process_sp = GetProcess();
if (process_sp && process_sp->IsAlive()) {
@@ -4043,6 +5292,240 @@ protected:
FormatEntity::Entry m_format;
};
+class BreakpointLocationTreeDelegate : public TreeDelegate {
+public:
+ BreakpointLocationTreeDelegate(Debugger &debugger)
+ : TreeDelegate(), m_debugger(debugger) {}
+
+ ~BreakpointLocationTreeDelegate() override = default;
+
+ Process *GetProcess() {
+ ExecutionContext exe_ctx(
+ m_debugger.GetCommandInterpreter().GetExecutionContext());
+ return exe_ctx.GetProcessPtr();
+ }
+
+ BreakpointLocationSP GetBreakpointLocation(const TreeItem &item) {
+ Breakpoint *breakpoint = (Breakpoint *)item.GetUserData();
+ return breakpoint->GetLocationAtIndex(item.GetIdentifier());
+ }
+
+ void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override {
+ BreakpointLocationSP breakpoint_location = GetBreakpointLocation(item);
+ Process *process = GetProcess();
+ StreamString stream;
+ stream.Printf("%i.%i: ", breakpoint_location->GetBreakpoint().GetID(),
+ breakpoint_location->GetID());
+ Address address = breakpoint_location->GetAddress();
+ address.Dump(&stream, process, Address::DumpStyleResolvedDescription,
+ Address::DumpStyleInvalid);
+ window.PutCStringTruncated(1, stream.GetString().str().c_str());
+ }
+
+ StringList ComputeDetailsList(BreakpointLocationSP breakpoint_location) {
+ StringList details;
+
+ Address address = breakpoint_location->GetAddress();
+ SymbolContext symbol_context;
+ address.CalculateSymbolContext(&symbol_context);
+
+ if (symbol_context.module_sp) {
+ StreamString module_stream;
+ module_stream.PutCString("module = ");
+ symbol_context.module_sp->GetFileSpec().Dump(
+ module_stream.AsRawOstream());
+ details.AppendString(module_stream.GetString());
+ }
+
+ if (symbol_context.comp_unit != nullptr) {
+ StreamString compile_unit_stream;
+ compile_unit_stream.PutCString("compile unit = ");
+ symbol_context.comp_unit->GetPrimaryFile().GetFilename().Dump(
+ &compile_unit_stream);
+ details.AppendString(compile_unit_stream.GetString());
+
+ if (symbol_context.function != nullptr) {
+ StreamString function_stream;
+ function_stream.PutCString("function = ");
+ function_stream.PutCString(
+ symbol_context.function->GetName().AsCString("<unknown>"));
+ details.AppendString(function_stream.GetString());
+ }
+
+ if (symbol_context.line_entry.line > 0) {
+ StreamString location_stream;
+ location_stream.PutCString("location = ");
+ symbol_context.line_entry.DumpStopContext(&location_stream, true);
+ details.AppendString(location_stream.GetString());
+ }
+
+ } else {
+ if (symbol_context.symbol) {
+ StreamString symbol_stream;
+ if (breakpoint_location->IsReExported())
+ symbol_stream.PutCString("re-exported target = ");
+ else
+ symbol_stream.PutCString("symbol = ");
+ symbol_stream.PutCString(
+ symbol_context.symbol->GetName().AsCString("<unknown>"));
+ details.AppendString(symbol_stream.GetString());
+ }
+ }
+
+ Process *process = GetProcess();
+
+ StreamString address_stream;
+ address.Dump(&address_stream, process, Address::DumpStyleLoadAddress,
+ Address::DumpStyleModuleWithFileAddress);
+ details.AppendString(address_stream.GetString());
+
+ BreakpointSiteSP breakpoint_site = breakpoint_location->GetBreakpointSite();
+ if (breakpoint_location->IsIndirect() && breakpoint_site) {
+ Address resolved_address;
+ resolved_address.SetLoadAddress(breakpoint_site->GetLoadAddress(),
+ &breakpoint_location->GetTarget());
+ Symbol *resolved_symbol = resolved_address.CalculateSymbolContextSymbol();
+ if (resolved_symbol) {
+ StreamString indirect_target_stream;
+ indirect_target_stream.PutCString("indirect target = ");
+ indirect_target_stream.PutCString(
+ resolved_symbol->GetName().GetCString());
+ details.AppendString(indirect_target_stream.GetString());
+ }
+ }
+
+ bool is_resolved = breakpoint_location->IsResolved();
+ StreamString resolved_stream;
+ resolved_stream.Printf("resolved = %s", is_resolved ? "true" : "false");
+ details.AppendString(resolved_stream.GetString());
+
+ bool is_hardware = is_resolved && breakpoint_site->IsHardware();
+ StreamString hardware_stream;
+ hardware_stream.Printf("hardware = %s", is_hardware ? "true" : "false");
+ details.AppendString(hardware_stream.GetString());
+
+ StreamString hit_count_stream;
+ hit_count_stream.Printf("hit count = %-4u",
+ breakpoint_location->GetHitCount());
+ details.AppendString(hit_count_stream.GetString());
+
+ return details;
+ }
+
+ void TreeDelegateGenerateChildren(TreeItem &item) override {
+ BreakpointLocationSP breakpoint_location = GetBreakpointLocation(item);
+ StringList details = ComputeDetailsList(breakpoint_location);
+
+ if (!m_string_delegate_sp)
+ m_string_delegate_sp = std::make_shared<TextTreeDelegate>();
+ TreeItem details_tree_item(&item, *m_string_delegate_sp, false);
+
+ item.Resize(details.GetSize(), details_tree_item);
+ for (size_t i = 0; i < details.GetSize(); i++) {
+ item[i].SetText(details.GetStringAtIndex(i));
+ }
+ }
+
+ bool TreeDelegateItemSelected(TreeItem &item) override { return false; }
+
+protected:
+ Debugger &m_debugger;
+ std::shared_ptr<TextTreeDelegate> m_string_delegate_sp;
+};
+
+class BreakpointTreeDelegate : public TreeDelegate {
+public:
+ BreakpointTreeDelegate(Debugger &debugger)
+ : TreeDelegate(), m_debugger(debugger),
+ m_breakpoint_location_delegate_sp() {}
+
+ ~BreakpointTreeDelegate() override = default;
+
+ BreakpointSP GetBreakpoint(const TreeItem &item) {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ BreakpointList &breakpoints = target->GetBreakpointList(false);
+ return breakpoints.GetBreakpointAtIndex(item.GetIdentifier());
+ }
+
+ void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override {
+ BreakpointSP breakpoint = GetBreakpoint(item);
+ StreamString stream;
+ stream.Format("{0}: ", breakpoint->GetID());
+ breakpoint->GetResolverDescription(&stream);
+ breakpoint->GetFilterDescription(&stream);
+ window.PutCStringTruncated(1, stream.GetString().str().c_str());
+ }
+
+ void TreeDelegateGenerateChildren(TreeItem &item) override {
+ BreakpointSP breakpoint = GetBreakpoint(item);
+
+ if (!m_breakpoint_location_delegate_sp)
+ m_breakpoint_location_delegate_sp =
+ std::make_shared<BreakpointLocationTreeDelegate>(m_debugger);
+ TreeItem breakpoint_location_tree_item(
+ &item, *m_breakpoint_location_delegate_sp, true);
+
+ item.Resize(breakpoint->GetNumLocations(), breakpoint_location_tree_item);
+ for (size_t i = 0; i < breakpoint->GetNumLocations(); i++) {
+ item[i].SetIdentifier(i);
+ item[i].SetUserData(breakpoint.get());
+ }
+ }
+
+ bool TreeDelegateItemSelected(TreeItem &item) override { return false; }
+
+protected:
+ Debugger &m_debugger;
+ std::shared_ptr<BreakpointLocationTreeDelegate>
+ m_breakpoint_location_delegate_sp;
+};
+
+class BreakpointsTreeDelegate : public TreeDelegate {
+public:
+ BreakpointsTreeDelegate(Debugger &debugger)
+ : TreeDelegate(), m_debugger(debugger), m_breakpoint_delegate_sp() {}
+
+ ~BreakpointsTreeDelegate() override = default;
+
+ bool TreeDelegateShouldDraw() override {
+ TargetSP target = m_debugger.GetSelectedTarget();
+ if (!target)
+ return false;
+
+ return true;
+ }
+
+ void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) override {
+ window.PutCString("Breakpoints");
+ }
+
+ void TreeDelegateGenerateChildren(TreeItem &item) override {
+ TargetSP target = m_debugger.GetSelectedTarget();
+
+ BreakpointList &breakpoints = target->GetBreakpointList(false);
+ std::unique_lock<std::recursive_mutex> lock;
+ breakpoints.GetListMutex(lock);
+
+ if (!m_breakpoint_delegate_sp)
+ m_breakpoint_delegate_sp =
+ std::make_shared<BreakpointTreeDelegate>(m_debugger);
+ TreeItem breakpoint_tree_item(&item, *m_breakpoint_delegate_sp, true);
+
+ item.Resize(breakpoints.GetSize(), breakpoint_tree_item);
+ for (size_t i = 0; i < breakpoints.GetSize(); i++) {
+ item[i].SetIdentifier(i);
+ }
+ }
+
+ bool TreeDelegateItemSelected(TreeItem &item) override { return false; }
+
+ bool TreeDelegateExpandRootByDefault() override { return true; }
+
+protected:
+ Debugger &m_debugger;
+ std::shared_ptr<BreakpointTreeDelegate> m_breakpoint_delegate_sp;
+};
+
class ValueObjectListDelegate : public WindowDelegate {
public:
ValueObjectListDelegate() : m_rows() {}
@@ -4844,6 +6327,7 @@ public:
eMenuID_ViewRegisters,
eMenuID_ViewSource,
eMenuID_ViewVariables,
+ eMenuID_ViewBreakpoints,
eMenuID_Help,
eMenuID_HelpGUIHelp
@@ -4908,6 +6392,18 @@ public:
MenuActionResult MenuDelegateAction(Menu &menu) override {
switch (menu.GetIdentifier()) {
+ case eMenuID_TargetCreate: {
+ WindowSP main_window_sp = m_app.GetMainWindow();
+ FormDelegateSP form_delegate_sp =
+ FormDelegateSP(new TargetCreateFormDelegate(m_debugger));
+ Rect bounds = main_window_sp->GetCenteredRect(80, 19);
+ WindowSP form_window_sp = main_window_sp->CreateSubWindow(
+ form_delegate_sp->GetName().c_str(), bounds, true);
+ WindowDelegateSP window_delegate_sp =
+ WindowDelegateSP(new FormWindowDelegate(form_delegate_sp));
+ form_window_sp->SetDelegate(window_delegate_sp);
+ return MenuActionResult::Handled;
+ }
case eMenuID_ThreadStepIn: {
ExecutionContext exe_ctx =
m_debugger.GetCommandInterpreter().GetExecutionContext();
@@ -4956,6 +6452,18 @@ public:
form_window_sp->SetDelegate(window_delegate_sp);
return MenuActionResult::Handled;
}
+ case eMenuID_ProcessLaunch: {
+ WindowSP main_window_sp = m_app.GetMainWindow();
+ FormDelegateSP form_delegate_sp = FormDelegateSP(
+ new ProcessLaunchFormDelegate(m_debugger, main_window_sp));
+ Rect bounds = main_window_sp->GetCenteredRect(80, 22);
+ WindowSP form_window_sp = main_window_sp->CreateSubWindow(
+ form_delegate_sp->GetName().c_str(), bounds, true);
+ WindowDelegateSP window_delegate_sp =
+ WindowDelegateSP(new FormWindowDelegate(form_delegate_sp));
+ form_window_sp->SetDelegate(window_delegate_sp);
+ return MenuActionResult::Handled;
+ }
case eMenuID_ProcessContinue: {
ExecutionContext exe_ctx =
@@ -5046,8 +6554,8 @@ public:
// previously added
submenus.erase(submenus.begin() + 7, submenus.end());
}
- // Since we are adding and removing items we need to recalculate the name
- // lengths
+ // Since we are adding and removing items we need to recalculate the
+ // name lengths
menu.RecalculateNameLengths();
}
return MenuActionResult::Handled;
@@ -5155,6 +6663,39 @@ public:
}
return MenuActionResult::Handled;
+ case eMenuID_ViewBreakpoints: {
+ WindowSP main_window_sp = m_app.GetMainWindow();
+ WindowSP threads_window_sp = main_window_sp->FindSubWindow("Threads");
+ WindowSP breakpoints_window_sp =
+ main_window_sp->FindSubWindow("Breakpoints");
+ const Rect threads_bounds = threads_window_sp->GetBounds();
+
+ // If a breakpoints window already exists, remove it and give the area
+ // it used to occupy to the threads window. If it doesn't exist, split
+ // the threads window horizontally into two windows where the top window
+ // is the threads window and the bottom window is a newly added
+ // breakpoints window.
+ if (breakpoints_window_sp) {
+ threads_window_sp->Resize(threads_bounds.size.width,
+ threads_bounds.size.height +
+ breakpoints_window_sp->GetHeight());
+ main_window_sp->RemoveSubWindow(breakpoints_window_sp.get());
+ } else {
+ Rect new_threads_bounds, breakpoints_bounds;
+ threads_bounds.HorizontalSplitPercentage(0.70, new_threads_bounds,
+ breakpoints_bounds);
+ threads_window_sp->SetBounds(new_threads_bounds);
+ breakpoints_window_sp = main_window_sp->CreateSubWindow(
+ "Breakpoints", breakpoints_bounds, false);
+ TreeDelegateSP breakpoints_delegate_sp(
+ new BreakpointsTreeDelegate(m_debugger));
+ breakpoints_window_sp->SetDelegate(WindowDelegateSP(
+ new TreeWindowDelegate(m_debugger, breakpoints_delegate_sp)));
+ }
+ touchwin(stdscr);
+ return MenuActionResult::Handled;
+ }
+
case eMenuID_HelpGUIHelp:
m_app.GetMainWindow()->CreateHelpSubwindow();
return MenuActionResult::Handled;
@@ -5347,8 +6888,8 @@ public:
m_selected_line = m_pc_line;
if (m_file_sp && m_file_sp->GetFileSpec() == m_sc.line_entry.file) {
- // Same file, nothing to do, we should either have the lines or not
- // (source file missing)
+ // Same file, nothing to do, we should either have the lines or
+ // not (source file missing)
if (m_selected_line >= static_cast<size_t>(m_first_visible_line)) {
if (m_selected_line >= m_first_visible_line + num_visible_lines)
m_first_visible_line = m_selected_line - 10;
@@ -5470,8 +7011,8 @@ public:
window.MoveCursor(1, line_y);
const bool is_pc_line = curr_line == m_pc_line;
const bool line_is_selected = m_selected_line == curr_line;
- // Highlight the line as the PC line first, then if the selected line
- // isn't the same as the PC line, highlight it differently
+ // Highlight the line as the PC line first, then if the selected
+ // line isn't the same as the PC line, highlight it differently
attr_t highlight_attr = 0;
attr_t bp_attr = 0;
if (is_pc_line)
@@ -5610,8 +7151,8 @@ public:
window.MoveCursor(1, line_y);
const bool is_pc_line = frame_sp && inst_idx == pc_idx;
const bool line_is_selected = m_selected_line == inst_idx;
- // Highlight the line as the PC line first, then if the selected line
- // isn't the same as the PC line, highlight it differently
+ // Highlight the line as the PC line first, then if the selected
+ // line isn't the same as the PC line, highlight it differently
attr_t highlight_attr = 0;
attr_t bp_attr = 0;
if (is_pc_line)
@@ -6075,7 +7616,7 @@ void IOHandlerCursesGUI::Activate() {
MenuSP view_menu_sp(
new Menu("View", "F5", KEY_F(5), ApplicationDelegate::eMenuID_View));
view_menu_sp->AddSubmenu(
- MenuSP(new Menu("Backtrace", nullptr, 'b',
+ MenuSP(new Menu("Backtrace", nullptr, 't',
ApplicationDelegate::eMenuID_ViewBacktrace)));
view_menu_sp->AddSubmenu(
MenuSP(new Menu("Registers", nullptr, 'r',
@@ -6085,6 +7626,9 @@ void IOHandlerCursesGUI::Activate() {
view_menu_sp->AddSubmenu(
MenuSP(new Menu("Variables", nullptr, 'v',
ApplicationDelegate::eMenuID_ViewVariables)));
+ view_menu_sp->AddSubmenu(
+ MenuSP(new Menu("Breakpoints", nullptr, 'b',
+ ApplicationDelegate::eMenuID_ViewBreakpoints)));
MenuSP help_menu_sp(
new Menu("Help", "F6", KEY_F(6), ApplicationDelegate::eMenuID_Help));
@@ -6145,7 +7689,8 @@ void IOHandlerCursesGUI::Activate() {
status_window_sp->SetDelegate(
WindowDelegateSP(new StatusBarWindowDelegate(m_debugger)));
- // Show the main help window once the first time the curses GUI is launched
+ // Show the main help window once the first time the curses GUI is
+ // launched
static bool g_showed_help = false;
if (!g_showed_help) {
g_showed_help = true;
@@ -6176,6 +7721,7 @@ void IOHandlerCursesGUI::Activate() {
static_assert(LastColorPairIndex == 18, "Color indexes do not match.");
define_key("\033[Z", KEY_SHIFT_TAB);
+ define_key("\033\015", KEY_ALT_ENTER);
}
}
diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp
index fbaf9ff7151a..20f4dbdb419f 100644
--- a/lldb/source/Core/Mangled.cpp
+++ b/lldb/source/Core/Mangled.cpp
@@ -9,6 +9,7 @@
#include "lldb/Core/Mangled.h"
#include "lldb/Core/RichManglingContext.h"
+#include "lldb/Target/Language.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/Logging.h"
@@ -16,8 +17,6 @@
#include "lldb/Utility/Stream.h"
#include "lldb/lldb-enumerations.h"
-#include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h"
-
#include "llvm/ADT/StringRef.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/Compiler.h"
@@ -34,35 +33,6 @@ static inline bool cstring_is_mangled(llvm::StringRef s) {
return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;
}
-static ConstString GetDemangledNameWithoutArguments(ConstString mangled,
- ConstString demangled) {
- const char *mangled_name_cstr = mangled.GetCString();
-
- if (demangled && mangled_name_cstr && mangled_name_cstr[0]) {
- if (mangled_name_cstr[0] == '_' && mangled_name_cstr[1] == 'Z' &&
- (mangled_name_cstr[2] != 'T' && // avoid virtual table, VTT structure,
- // typeinfo structure, and typeinfo
- // mangled_name
- mangled_name_cstr[2] != 'G' && // avoid guard variables
- mangled_name_cstr[2] != 'Z')) // named local entities (if we eventually
- // handle eSymbolTypeData, we will want
- // this back)
- {
- CPlusPlusLanguage::MethodName cxx_method(demangled);
- if (!cxx_method.GetBasename().empty()) {
- std::string shortname;
- if (!cxx_method.GetContext().empty())
- shortname = cxx_method.GetContext().str() + "::";
- shortname += cxx_method.GetBasename().str();
- return ConstString(shortname);
- }
- }
- }
- if (demangled)
- return demangled;
- return mangled;
-}
-
#pragma mark Mangled
Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
@@ -75,6 +45,9 @@ Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {
if (name.startswith("_R"))
return Mangled::eManglingSchemeRustV0;
+ if (name.startswith("_D"))
+ return Mangled::eManglingSchemeD;
+
if (name.startswith("_Z"))
return Mangled::eManglingSchemeItanium;
@@ -161,9 +134,9 @@ void Mangled::SetValue(ConstString name) {
static char *GetMSVCDemangledStr(const char *M) {
char *demangled_cstr = llvm::microsoftDemangle(
M, nullptr, nullptr, nullptr, nullptr,
- llvm::MSDemangleFlags(llvm::MSDF_NoAccessSpecifier |
- llvm::MSDF_NoCallingConvention |
- llvm::MSDF_NoMemberType));
+ llvm::MSDemangleFlags(
+ llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
+ llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) {
if (demangled_cstr && demangled_cstr[0])
@@ -215,6 +188,19 @@ static char *GetRustV0DemangledStr(const char *M) {
return demangled_cstr;
}
+static char *GetDLangDemangledStr(const char *M) {
+ char *demangled_cstr = llvm::dlangDemangle(M);
+
+ if (Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_DEMANGLE)) {
+ if (demangled_cstr && demangled_cstr[0])
+ LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
+ else
+ LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
+ }
+
+ return demangled_cstr;
+}
+
// Explicit demangling for scheduled requests during batch processing. This
// makes use of ItaniumPartialDemangler's rich demangle info
bool Mangled::DemangleWithRichManglingInfo(
@@ -274,7 +260,8 @@ bool Mangled::DemangleWithRichManglingInfo(
}
case eManglingSchemeRustV0:
- // Rich demangling scheme is not supported for Rust
+ case eManglingSchemeD:
+ // Rich demangling scheme is not supported
return false;
}
llvm_unreachable("Fully covered switch above!");
@@ -290,7 +277,8 @@ ConstString Mangled::GetDemangledName() const {
if (m_mangled && m_demangled.IsNull()) {
// Don't bother running anything that isn't mangled
const char *mangled_name = m_mangled.GetCString();
- ManglingScheme mangling_scheme = GetManglingScheme(m_mangled.GetStringRef());
+ ManglingScheme mangling_scheme =
+ GetManglingScheme(m_mangled.GetStringRef());
if (mangling_scheme != eManglingSchemeNone &&
!m_mangled.GetMangledCounterpart(m_demangled)) {
// We didn't already mangle this name, demangle it and if all goes well
@@ -307,6 +295,9 @@ ConstString Mangled::GetDemangledName() const {
case eManglingSchemeRustV0:
demangled_name = GetRustV0DemangledStr(mangled_name);
break;
+ case eManglingSchemeD:
+ demangled_name = GetDLangDemangledStr(mangled_name);
+ break;
case eManglingSchemeNone:
llvm_unreachable("eManglingSchemeNone was handled already");
}
@@ -326,8 +317,7 @@ ConstString Mangled::GetDemangledName() const {
return m_demangled;
}
-ConstString
-Mangled::GetDisplayDemangledName() const {
+ConstString Mangled::GetDisplayDemangledName() const {
return GetDemangledName();
}
@@ -344,14 +334,16 @@ ConstString Mangled::GetName(Mangled::NamePreference preference) const {
if (preference == ePreferMangled && m_mangled)
return m_mangled;
+ // Call the accessor to make sure we get a demangled name in case it hasn't
+ // been demangled yet...
ConstString demangled = GetDemangledName();
if (preference == ePreferDemangledWithoutArguments) {
- return GetDemangledNameWithoutArguments(m_mangled, demangled);
+ if (Language *lang = Language::FindPlugin(GuessLanguage())) {
+ return lang->GetDemangledFunctionNameWithoutArguments(*this);
+ }
}
if (preference == ePreferDemangled) {
- // Call the accessor to make sure we get a demangled name in case it hasn't
- // been demangled yet...
if (demangled)
return demangled;
return m_mangled;
diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
index 19c97be15066..bd0a667171a5 100644
--- a/lldb/source/Core/Module.cpp
+++ b/lldb/source/Core/Module.cpp
@@ -796,7 +796,7 @@ void Module::LookupInfo::Prune(SymbolContextList &sc_list,
void Module::FindFunctions(ConstString name,
const CompilerDeclContext &parent_decl_ctx,
FunctionNameType name_type_mask,
- bool include_symbols, bool include_inlines,
+ const ModuleFunctionSearchOptions &options,
SymbolContextList &sc_list) {
const size_t old_size = sc_list.GetSize();
@@ -808,12 +808,12 @@ void Module::FindFunctions(ConstString name,
if (symbols) {
symbols->FindFunctions(lookup_info.GetLookupName(), parent_decl_ctx,
- lookup_info.GetNameTypeMask(), include_inlines,
- sc_list);
+ lookup_info.GetNameTypeMask(),
+ options.include_inlines, sc_list);
// Now check our symbol table for symbols that are code symbols if
// requested
- if (include_symbols) {
+ if (options.include_symbols) {
Symtab *symtab = symbols->GetSymtab();
if (symtab)
symtab->FindFunctionSymbols(lookup_info.GetLookupName(),
@@ -828,11 +828,11 @@ void Module::FindFunctions(ConstString name,
} else {
if (symbols) {
symbols->FindFunctions(name, parent_decl_ctx, name_type_mask,
- include_inlines, sc_list);
+ options.include_inlines, sc_list);
// Now check our symbol table for symbols that are code symbols if
// requested
- if (include_symbols) {
+ if (options.include_symbols) {
Symtab *symtab = symbols->GetSymtab();
if (symtab)
symtab->FindFunctionSymbols(name, name_type_mask, sc_list);
@@ -841,17 +841,17 @@ void Module::FindFunctions(ConstString name,
}
}
-void Module::FindFunctions(const RegularExpression &regex, bool include_symbols,
- bool include_inlines,
+void Module::FindFunctions(const RegularExpression &regex,
+ const ModuleFunctionSearchOptions &options,
SymbolContextList &sc_list) {
const size_t start_size = sc_list.GetSize();
if (SymbolFile *symbols = GetSymbolFile()) {
- symbols->FindFunctions(regex, include_inlines, sc_list);
+ symbols->FindFunctions(regex, options.include_inlines, sc_list);
// Now check our symbol table for symbols that are code symbols if
// requested
- if (include_symbols) {
+ if (options.include_symbols) {
Symtab *symtab = symbols->GetSymtab();
if (symtab) {
std::vector<uint32_t> symbol_indexes;
@@ -1614,24 +1614,23 @@ llvm::Optional<std::string> Module::RemapSourceFile(llvm::StringRef path) const
void Module::RegisterXcodeSDK(llvm::StringRef sdk_name, llvm::StringRef sysroot) {
XcodeSDK sdk(sdk_name.str());
- ConstString sdk_path(HostInfo::GetXcodeSDKPath(sdk));
- if (!sdk_path)
+ llvm::StringRef sdk_path(HostInfo::GetXcodeSDKPath(sdk));
+ if (sdk_path.empty())
return;
// If the SDK changed for a previously registered source path, update it.
// This could happend with -fdebug-prefix-map, otherwise it's unlikely.
- ConstString sysroot_cs(sysroot);
- if (!m_source_mappings.Replace(sysroot_cs, sdk_path, true))
+ if (!m_source_mappings.Replace(sysroot, sdk_path, true))
// In the general case, however, append it to the list.
- m_source_mappings.Append(sysroot_cs, sdk_path, false);
+ m_source_mappings.Append(sysroot, sdk_path, false);
}
bool Module::MergeArchitecture(const ArchSpec &arch_spec) {
if (!arch_spec.IsValid())
return false;
- LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT | LIBLLDB_LOG_MODULES),
- "module has arch %s, merging/replacing with arch %s",
- m_arch.GetTriple().getTriple().c_str(),
- arch_spec.GetTriple().getTriple().c_str());
+ LLDB_LOGF(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT | LIBLLDB_LOG_MODULES),
+ "module has arch %s, merging/replacing with arch %s",
+ m_arch.GetTriple().getTriple().c_str(),
+ arch_spec.GetTriple().getTriple().c_str());
if (!m_arch.IsCompatibleMatch(arch_spec)) {
// The new architecture is different, we just need to replace it.
return SetArchitecture(arch_spec);
diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp
index 56bc4c72d8e9..9176c9dbb357 100644
--- a/lldb/source/Core/ModuleList.cpp
+++ b/lldb/source/Core/ModuleList.cpp
@@ -122,8 +122,7 @@ void ModuleListProperties::UpdateSymlinkMappings() {
FileSpec resolved;
Status status = FileSystem::Instance().Readlink(symlink, resolved);
if (status.Success())
- m_symlink_paths.Append(ConstString(symlink.GetPath()),
- ConstString(resolved.GetPath()), notify);
+ m_symlink_paths.Append(symlink.GetPath(), resolved.GetPath(), notify);
}
}
@@ -200,16 +199,15 @@ void ModuleList::ReplaceEquivalent(
}
}
-bool ModuleList::AppendIfNeeded(const ModuleSP &module_sp, bool notify) {
- if (module_sp) {
+bool ModuleList::AppendIfNeeded(const ModuleSP &new_module, bool notify) {
+ if (new_module) {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- if (pos->get() == module_sp.get())
+ for (const ModuleSP &module_sp : m_modules) {
+ if (module_sp.get() == new_module.get())
return false; // Already in the list
}
// Only push module_sp on the list if it wasn't already in there.
- Append(module_sp, notify);
+ Append(new_module, notify);
return true;
}
return false;
@@ -364,7 +362,7 @@ ModuleSP ModuleList::GetModuleAtIndexUnlocked(size_t idx) const {
void ModuleList::FindFunctions(ConstString name,
FunctionNameType name_type_mask,
- bool include_symbols, bool include_inlines,
+ const ModuleFunctionSearchOptions &options,
SymbolContextList &sc_list) const {
const size_t old_size = sc_list.GetSize();
@@ -372,11 +370,10 @@ void ModuleList::FindFunctions(ConstString name,
Module::LookupInfo lookup_info(name, name_type_mask, eLanguageTypeUnknown);
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindFunctions(lookup_info.GetLookupName(), CompilerDeclContext(),
- lookup_info.GetNameTypeMask(), include_symbols,
- include_inlines, sc_list);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->FindFunctions(lookup_info.GetLookupName(),
+ CompilerDeclContext(),
+ lookup_info.GetNameTypeMask(), options, sc_list);
}
const size_t new_size = sc_list.GetSize();
@@ -385,10 +382,9 @@ void ModuleList::FindFunctions(ConstString name,
lookup_info.Prune(sc_list, old_size);
} else {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindFunctions(name, CompilerDeclContext(), name_type_mask,
- include_symbols, include_inlines, sc_list);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->FindFunctions(name, CompilerDeclContext(), name_type_mask,
+ options, sc_list);
}
}
}
@@ -402,10 +398,9 @@ void ModuleList::FindFunctionSymbols(ConstString name,
Module::LookupInfo lookup_info(name, name_type_mask, eLanguageTypeUnknown);
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindFunctionSymbols(lookup_info.GetLookupName(),
- lookup_info.GetNameTypeMask(), sc_list);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->FindFunctionSymbols(lookup_info.GetLookupName(),
+ lookup_info.GetNameTypeMask(), sc_list);
}
const size_t new_size = sc_list.GetSize();
@@ -414,39 +409,33 @@ void ModuleList::FindFunctionSymbols(ConstString name,
lookup_info.Prune(sc_list, old_size);
} else {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindFunctionSymbols(name, name_type_mask, sc_list);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->FindFunctionSymbols(name, name_type_mask, sc_list);
}
}
}
void ModuleList::FindFunctions(const RegularExpression &name,
- bool include_symbols, bool include_inlines,
+ const ModuleFunctionSearchOptions &options,
SymbolContextList &sc_list) {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindFunctions(name, include_symbols, include_inlines, sc_list);
- }
+ for (const ModuleSP &module_sp : m_modules)
+ module_sp->FindFunctions(name, options, sc_list);
}
void ModuleList::FindCompileUnits(const FileSpec &path,
SymbolContextList &sc_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindCompileUnits(path, sc_list);
- }
+ for (const ModuleSP &module_sp : m_modules)
+ module_sp->FindCompileUnits(path, sc_list);
}
void ModuleList::FindGlobalVariables(ConstString name, size_t max_matches,
VariableList &variable_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindGlobalVariables(name, CompilerDeclContext(), max_matches,
- variable_list);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->FindGlobalVariables(name, CompilerDeclContext(), max_matches,
+ variable_list);
}
}
@@ -454,36 +443,30 @@ void ModuleList::FindGlobalVariables(const RegularExpression &regex,
size_t max_matches,
VariableList &variable_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindGlobalVariables(regex, max_matches, variable_list);
- }
+ for (const ModuleSP &module_sp : m_modules)
+ module_sp->FindGlobalVariables(regex, max_matches, variable_list);
}
void ModuleList::FindSymbolsWithNameAndType(ConstString name,
SymbolType symbol_type,
SymbolContextList &sc_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos)
- (*pos)->FindSymbolsWithNameAndType(name, symbol_type, sc_list);
+ for (const ModuleSP &module_sp : m_modules)
+ module_sp->FindSymbolsWithNameAndType(name, symbol_type, sc_list);
}
void ModuleList::FindSymbolsMatchingRegExAndType(
const RegularExpression &regex, lldb::SymbolType symbol_type,
SymbolContextList &sc_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos)
- (*pos)->FindSymbolsMatchingRegExAndType(regex, symbol_type, sc_list);
+ for (const ModuleSP &module_sp : m_modules)
+ module_sp->FindSymbolsMatchingRegExAndType(regex, symbol_type, sc_list);
}
void ModuleList::FindModules(const ModuleSpec &module_spec,
ModuleList &matching_module_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- ModuleSP module_sp(*pos);
+ for (const ModuleSP &module_sp : m_modules) {
if (module_sp->MatchesModuleSpec(module_spec))
matching_module_list.Append(module_sp);
}
@@ -559,9 +542,8 @@ void ModuleList::FindTypes(Module *search_first, ConstString name,
bool ModuleList::FindSourceFile(const FileSpec &orig_spec,
FileSpec &new_spec) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- if ((*pos)->FindSourceFile(orig_spec, new_spec))
+ for (const ModuleSP &module_sp : m_modules) {
+ if (module_sp->FindSourceFile(orig_spec, new_spec))
return true;
}
return false;
@@ -573,10 +555,9 @@ void ModuleList::FindAddressesForLine(const lldb::TargetSP target_sp,
std::vector<Address> &output_local,
std::vector<Address> &output_extern) {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->FindAddressesForLine(target_sp, file, line, function, output_local,
- output_extern);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->FindAddressesForLine(target_sp, file, line, function,
+ output_local, output_extern);
}
}
@@ -603,10 +584,8 @@ size_t ModuleList::GetSize() const {
void ModuleList::Dump(Stream *s) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->Dump(s);
- }
+ for (const ModuleSP &module_sp : m_modules)
+ module_sp->Dump(s);
}
void ModuleList::LogUUIDAndPaths(Log *log, const char *prefix_cstr) {
@@ -629,9 +608,8 @@ void ModuleList::LogUUIDAndPaths(Log *log, const char *prefix_cstr) {
bool ModuleList::ResolveFileAddress(lldb::addr_t vm_addr,
Address &so_addr) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- if ((*pos)->ResolveFileAddress(vm_addr, so_addr))
+ for (const ModuleSP &module_sp : m_modules) {
+ if (module_sp->ResolveFileAddress(vm_addr, so_addr))
return true;
}
@@ -674,10 +652,9 @@ uint32_t ModuleList::ResolveSymbolContextsForFileSpec(
const FileSpec &file_spec, uint32_t line, bool check_inlines,
SymbolContextItem resolve_scope, SymbolContextList &sc_list) const {
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);
- collection::const_iterator pos, end = m_modules.end();
- for (pos = m_modules.begin(); pos != end; ++pos) {
- (*pos)->ResolveSymbolContextsForFileSpec(file_spec, line, check_inlines,
- resolve_scope, sc_list);
+ for (const ModuleSP &module_sp : m_modules) {
+ module_sp->ResolveSymbolContextsForFileSpec(file_spec, line, check_inlines,
+ resolve_scope, sc_list);
}
return sc_list.GetSize();
diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp
index fcaa868b083e..801591129244 100644
--- a/lldb/source/Core/PluginManager.cpp
+++ b/lldb/source/Core/PluginManager.cpp
@@ -12,6 +12,7 @@
#include "lldb/Host/FileSystem.h"
#include "lldb/Host/HostInfo.h"
#include "lldb/Interpreter/OptionValueProperties.h"
+#include "lldb/Target/Process.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/FileSpec.h"
#include "lldb/Utility/Status.h"
@@ -184,15 +185,14 @@ template <typename Callback> struct PluginInstance {
typedef Callback CallbackType;
PluginInstance() = default;
- PluginInstance(ConstString name, std::string description,
- Callback create_callback = nullptr,
+ PluginInstance(llvm::StringRef name, llvm::StringRef description,
+ Callback create_callback,
DebuggerInitializeCallback debugger_init_callback = nullptr)
- : name(name), description(std::move(description)),
- create_callback(create_callback),
+ : name(name), description(description), create_callback(create_callback),
debugger_init_callback(debugger_init_callback) {}
- ConstString name;
- std::string description;
+ llvm::StringRef name;
+ llvm::StringRef description;
Callback create_callback;
DebuggerInitializeCallback debugger_init_callback;
};
@@ -200,12 +200,12 @@ template <typename Callback> struct PluginInstance {
template <typename Instance> class PluginInstances {
public:
template <typename... Args>
- bool RegisterPlugin(ConstString name, const char *description,
+ bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
typename Instance::CallbackType callback,
- Args &&... args) {
+ Args &&...args) {
if (!callback)
return false;
- assert((bool)name);
+ assert(!name.empty());
Instance instance =
Instance(name, description, callback, std::forward<Args>(args)...);
m_instances.push_back(instance);
@@ -232,20 +232,20 @@ public:
return nullptr;
}
- const char *GetDescriptionAtIndex(uint32_t idx) {
+ llvm::StringRef GetDescriptionAtIndex(uint32_t idx) {
if (Instance *instance = GetInstanceAtIndex(idx))
- return instance->description.c_str();
- return nullptr;
+ return instance->description;
+ return "";
}
- const char *GetNameAtIndex(uint32_t idx) {
+ llvm::StringRef GetNameAtIndex(uint32_t idx) {
if (Instance *instance = GetInstanceAtIndex(idx))
- return instance->name.GetCString();
- return nullptr;
+ return instance->name;
+ return "";
}
- typename Instance::CallbackType GetCallbackForName(ConstString name) {
- if (!name)
+ typename Instance::CallbackType GetCallbackForName(llvm::StringRef name) {
+ if (name.empty())
return nullptr;
for (auto &instance : m_instances) {
if (name == instance.name)
@@ -284,7 +284,8 @@ static ABIInstances &GetABIInstances() {
return g_instances;
}
-bool PluginManager::RegisterPlugin(ConstString name, const char *description,
+bool PluginManager::RegisterPlugin(llvm::StringRef name,
+ llvm::StringRef description,
ABICreateInstance create_callback) {
return GetABIInstances().RegisterPlugin(name, description, create_callback);
}
@@ -307,11 +308,10 @@ static ArchitectureInstances &GetArchitectureInstances() {
return g_instances;
}
-void PluginManager::RegisterPlugin(ConstString name,
+void PluginManager::RegisterPlugin(llvm::StringRef name,
llvm::StringRef description,
ArchitectureCreateInstance create_callback) {
- GetArchitectureInstances().push_back(
- {name, std::string(description), create_callback});
+ GetArchitectureInstances().push_back({name, description, create_callback});
}
void PluginManager::UnregisterPlugin(
@@ -346,7 +346,8 @@ static DisassemblerInstances &GetDisassemblerInstances() {
return g_instances;
}
-bool PluginManager::RegisterPlugin(ConstString name, const char *description,
+bool PluginManager::RegisterPlugin(llvm::StringRef name,
+ llvm::StringRef description,
DisassemblerCreateInstance create_callback) {
return GetDisassemblerInstances().RegisterPlugin(name, description,
create_callback);
@@ -363,7 +364,8 @@ PluginManager::GetDisassemblerCreateCallbackAtIndex(uint32_t idx) {
}
DisassemblerCreateInstance
-PluginManager::GetDisassemblerCreateCallbackForPluginName(ConstString name) {
+PluginManager::GetDisassemblerCreateCallbackForPluginName(
+ llvm::StringRef name) {
return GetDisassemblerInstances().GetCallbackForName(name);
}
@@ -378,7 +380,7 @@ static DynamicLoaderInstances &GetDynamicLoaderInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
DynamicLoaderCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback) {
return GetDynamicLoaderInstances().RegisterPlugin(
@@ -396,7 +398,8 @@ PluginManager::GetDynamicLoaderCreateCallbackAtIndex(uint32_t idx) {
}
DynamicLoaderCreateInstance
-PluginManager::GetDynamicLoaderCreateCallbackForPluginName(ConstString name) {
+PluginManager::GetDynamicLoaderCreateCallbackForPluginName(
+ llvm::StringRef name) {
return GetDynamicLoaderInstances().GetCallbackForName(name);
}
@@ -411,7 +414,7 @@ static JITLoaderInstances &GetJITLoaderInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
JITLoaderCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback) {
return GetJITLoaderInstances().RegisterPlugin(
@@ -439,7 +442,7 @@ static EmulateInstructionInstances &GetEmulateInstructionInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
EmulateInstructionCreateInstance create_callback) {
return GetEmulateInstructionInstances().RegisterPlugin(name, description,
create_callback);
@@ -457,7 +460,7 @@ PluginManager::GetEmulateInstructionCreateCallbackAtIndex(uint32_t idx) {
EmulateInstructionCreateInstance
PluginManager::GetEmulateInstructionCreateCallbackForPluginName(
- ConstString name) {
+ llvm::StringRef name) {
return GetEmulateInstructionInstances().GetCallbackForName(name);
}
@@ -472,7 +475,7 @@ static OperatingSystemInstances &GetOperatingSystemInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
OperatingSystemCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback) {
return GetOperatingSystemInstances().RegisterPlugin(
@@ -490,7 +493,8 @@ PluginManager::GetOperatingSystemCreateCallbackAtIndex(uint32_t idx) {
}
OperatingSystemCreateInstance
-PluginManager::GetOperatingSystemCreateCallbackForPluginName(ConstString name) {
+PluginManager::GetOperatingSystemCreateCallbackForPluginName(
+ llvm::StringRef name) {
return GetOperatingSystemInstances().GetCallbackForName(name);
}
@@ -504,7 +508,8 @@ static LanguageInstances &GetLanguageInstances() {
return g_instances;
}
-bool PluginManager::RegisterPlugin(ConstString name, const char *description,
+bool PluginManager::RegisterPlugin(llvm::StringRef name,
+ llvm::StringRef description,
LanguageCreateInstance create_callback) {
return GetLanguageInstances().RegisterPlugin(name, description,
create_callback);
@@ -524,13 +529,13 @@ PluginManager::GetLanguageCreateCallbackAtIndex(uint32_t idx) {
struct LanguageRuntimeInstance
: public PluginInstance<LanguageRuntimeCreateInstance> {
LanguageRuntimeInstance(
- ConstString name, std::string description, CallbackType create_callback,
+ llvm::StringRef name, llvm::StringRef description,
+ CallbackType create_callback,
DebuggerInitializeCallback debugger_init_callback,
LanguageRuntimeGetCommandObject command_callback,
LanguageRuntimeGetExceptionPrecondition precondition_callback)
: PluginInstance<LanguageRuntimeCreateInstance>(
- name, std::move(description), create_callback,
- debugger_init_callback),
+ name, description, create_callback, debugger_init_callback),
command_callback(command_callback),
precondition_callback(precondition_callback) {}
@@ -546,7 +551,7 @@ static LanguageRuntimeInstances &GetLanguageRuntimeInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
LanguageRuntimeCreateInstance create_callback,
LanguageRuntimeGetCommandObject command_callback,
LanguageRuntimeGetExceptionPrecondition precondition_callback) {
@@ -592,7 +597,7 @@ static SystemRuntimeInstances &GetSystemRuntimeInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
SystemRuntimeCreateInstance create_callback) {
return GetSystemRuntimeInstances().RegisterPlugin(name, description,
create_callback);
@@ -612,11 +617,12 @@ PluginManager::GetSystemRuntimeCreateCallbackAtIndex(uint32_t idx) {
struct ObjectFileInstance : public PluginInstance<ObjectFileCreateInstance> {
ObjectFileInstance(
- ConstString name, std::string description, CallbackType create_callback,
+ llvm::StringRef name, llvm::StringRef description,
+ CallbackType create_callback,
ObjectFileCreateMemoryInstance create_memory_callback,
ObjectFileGetModuleSpecifications get_module_specifications,
ObjectFileSaveCore save_core)
- : PluginInstance<ObjectFileCreateInstance>(name, std::move(description),
+ : PluginInstance<ObjectFileCreateInstance>(name, description,
create_callback),
create_memory_callback(create_memory_callback),
get_module_specifications(get_module_specifications),
@@ -634,7 +640,7 @@ static ObjectFileInstances &GetObjectFileInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
ObjectFileCreateInstance create_callback,
ObjectFileCreateMemoryInstance create_memory_callback,
ObjectFileGetModuleSpecifications get_module_specifications,
@@ -672,9 +678,7 @@ PluginManager::GetObjectFileGetModuleSpecificationsCallbackAtIndex(
ObjectFileCreateMemoryInstance
PluginManager::GetObjectFileCreateMemoryCallbackForPluginName(
- ConstString name) {
- if (!name)
- return nullptr;
+ llvm::StringRef name) {
const auto &instances = GetObjectFileInstances().GetInstances();
for (auto &instance : instances) {
if (instance.name == name)
@@ -685,13 +689,26 @@ PluginManager::GetObjectFileCreateMemoryCallbackForPluginName(
Status PluginManager::SaveCore(const lldb::ProcessSP &process_sp,
const FileSpec &outfile,
- lldb::SaveCoreStyle &core_style) {
+ lldb::SaveCoreStyle &core_style,
+ llvm::StringRef plugin_name) {
+ if (plugin_name.empty()) {
+ // Try saving core directly from the process plugin first.
+ llvm::Expected<bool> ret = process_sp->SaveCore(outfile.GetPath());
+ if (!ret)
+ return Status(ret.takeError());
+ if (ret.get())
+ return Status();
+ }
+
+ // Fall back to object plugins.
Status error;
auto &instances = GetObjectFileInstances().GetInstances();
for (auto &instance : instances) {
- if (instance.save_core &&
- instance.save_core(process_sp, outfile, core_style, error))
- return error;
+ if (plugin_name.empty() || instance.name == plugin_name) {
+ if (instance.save_core &&
+ instance.save_core(process_sp, outfile, core_style, error))
+ return error;
+ }
}
error.SetErrorString(
"no ObjectFile plugins were able to save a core for this process");
@@ -703,10 +720,11 @@ Status PluginManager::SaveCore(const lldb::ProcessSP &process_sp,
struct ObjectContainerInstance
: public PluginInstance<ObjectContainerCreateInstance> {
ObjectContainerInstance(
- ConstString name, std::string description, CallbackType create_callback,
+ llvm::StringRef name, llvm::StringRef description,
+ CallbackType create_callback,
ObjectFileGetModuleSpecifications get_module_specifications)
- : PluginInstance<ObjectContainerCreateInstance>(
- name, std::move(description), create_callback),
+ : PluginInstance<ObjectContainerCreateInstance>(name, description,
+ create_callback),
get_module_specifications(get_module_specifications) {}
ObjectFileGetModuleSpecifications get_module_specifications;
@@ -719,7 +737,7 @@ static ObjectContainerInstances &GetObjectContainerInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
ObjectContainerCreateInstance create_callback,
ObjectFileGetModuleSpecifications get_module_specifications) {
return GetObjectContainerInstances().RegisterPlugin(
@@ -756,7 +774,7 @@ static PlatformInstances &GetPlatformInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
PlatformCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback) {
return GetPlatformInstances().RegisterPlugin(
@@ -767,11 +785,12 @@ bool PluginManager::UnregisterPlugin(PlatformCreateInstance create_callback) {
return GetPlatformInstances().UnregisterPlugin(create_callback);
}
-const char *PluginManager::GetPlatformPluginNameAtIndex(uint32_t idx) {
+llvm::StringRef PluginManager::GetPlatformPluginNameAtIndex(uint32_t idx) {
return GetPlatformInstances().GetNameAtIndex(idx);
}
-const char *PluginManager::GetPlatformPluginDescriptionAtIndex(uint32_t idx) {
+llvm::StringRef
+PluginManager::GetPlatformPluginDescriptionAtIndex(uint32_t idx) {
return GetPlatformInstances().GetDescriptionAtIndex(idx);
}
@@ -781,15 +800,15 @@ PluginManager::GetPlatformCreateCallbackAtIndex(uint32_t idx) {
}
PlatformCreateInstance
-PluginManager::GetPlatformCreateCallbackForPluginName(ConstString name) {
+PluginManager::GetPlatformCreateCallbackForPluginName(llvm::StringRef name) {
return GetPlatformInstances().GetCallbackForName(name);
}
void PluginManager::AutoCompletePlatformName(llvm::StringRef name,
CompletionRequest &request) {
for (const auto &instance : GetPlatformInstances().GetInstances()) {
- if (instance.name.GetStringRef().startswith(name))
- request.AddCompletion(instance.name.GetCString());
+ if (instance.name.startswith(name))
+ request.AddCompletion(instance.name);
}
}
@@ -804,7 +823,7 @@ static ProcessInstances &GetProcessInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
ProcessCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback) {
return GetProcessInstances().RegisterPlugin(
@@ -815,11 +834,11 @@ bool PluginManager::UnregisterPlugin(ProcessCreateInstance create_callback) {
return GetProcessInstances().UnregisterPlugin(create_callback);
}
-const char *PluginManager::GetProcessPluginNameAtIndex(uint32_t idx) {
+llvm::StringRef PluginManager::GetProcessPluginNameAtIndex(uint32_t idx) {
return GetProcessInstances().GetNameAtIndex(idx);
}
-const char *PluginManager::GetProcessPluginDescriptionAtIndex(uint32_t idx) {
+llvm::StringRef PluginManager::GetProcessPluginDescriptionAtIndex(uint32_t idx) {
return GetProcessInstances().GetDescriptionAtIndex(idx);
}
@@ -829,15 +848,15 @@ PluginManager::GetProcessCreateCallbackAtIndex(uint32_t idx) {
}
ProcessCreateInstance
-PluginManager::GetProcessCreateCallbackForPluginName(ConstString name) {
+PluginManager::GetProcessCreateCallbackForPluginName(llvm::StringRef name) {
return GetProcessInstances().GetCallbackForName(name);
}
void PluginManager::AutoCompleteProcessName(llvm::StringRef name,
CompletionRequest &request) {
for (const auto &instance : GetProcessInstances().GetInstances()) {
- if (instance.name.GetStringRef().startswith(name))
- request.AddCompletion(instance.name.GetCString(), instance.description);
+ if (instance.name.startswith(name))
+ request.AddCompletion(instance.name, instance.description);
}
}
@@ -845,11 +864,11 @@ void PluginManager::AutoCompleteProcessName(llvm::StringRef name,
struct ScriptInterpreterInstance
: public PluginInstance<ScriptInterpreterCreateInstance> {
- ScriptInterpreterInstance(ConstString name, std::string description,
+ ScriptInterpreterInstance(llvm::StringRef name, llvm::StringRef description,
CallbackType create_callback,
lldb::ScriptLanguage language)
- : PluginInstance<ScriptInterpreterCreateInstance>(
- name, std::move(description), create_callback),
+ : PluginInstance<ScriptInterpreterCreateInstance>(name, description,
+ create_callback),
language(language) {}
lldb::ScriptLanguage language = lldb::eScriptLanguageNone;
@@ -863,7 +882,7 @@ static ScriptInterpreterInstances &GetScriptInterpreterInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
lldb::ScriptLanguage script_language,
ScriptInterpreterCreateInstance create_callback) {
return GetScriptInterpreterInstances().RegisterPlugin(
@@ -903,12 +922,12 @@ PluginManager::GetScriptInterpreterForLanguage(lldb::ScriptLanguage script_lang,
struct StructuredDataPluginInstance
: public PluginInstance<StructuredDataPluginCreateInstance> {
StructuredDataPluginInstance(
- ConstString name, std::string description, CallbackType create_callback,
+ llvm::StringRef name, llvm::StringRef description,
+ CallbackType create_callback,
DebuggerInitializeCallback debugger_init_callback,
StructuredDataFilterLaunchInfo filter_callback)
: PluginInstance<StructuredDataPluginCreateInstance>(
- name, std::move(description), create_callback,
- debugger_init_callback),
+ name, description, create_callback, debugger_init_callback),
filter_callback(filter_callback) {}
StructuredDataFilterLaunchInfo filter_callback = nullptr;
@@ -923,7 +942,7 @@ static StructuredDataPluginInstances &GetStructuredDataPluginInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
StructuredDataPluginCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback,
StructuredDataFilterLaunchInfo filter_callback) {
@@ -966,7 +985,7 @@ static SymbolFileInstances &GetSymbolFileInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
SymbolFileCreateInstance create_callback,
DebuggerInitializeCallback debugger_init_callback) {
return GetSymbolFileInstances().RegisterPlugin(
@@ -992,7 +1011,8 @@ static SymbolVendorInstances &GetSymbolVendorInstances() {
return g_instances;
}
-bool PluginManager::RegisterPlugin(ConstString name, const char *description,
+bool PluginManager::RegisterPlugin(llvm::StringRef name,
+ llvm::StringRef description,
SymbolVendorCreateInstance create_callback) {
return GetSymbolVendorInstances().RegisterPlugin(name, description,
create_callback);
@@ -1013,12 +1033,12 @@ PluginManager::GetSymbolVendorCreateCallbackAtIndex(uint32_t idx) {
struct TraceInstance
: public PluginInstance<TraceCreateInstanceForSessionFile> {
TraceInstance(
- ConstString name, std::string description,
+ llvm::StringRef name, llvm::StringRef description,
CallbackType create_callback_for_session_file,
TraceCreateInstanceForLiveProcess create_callback_for_live_process,
llvm::StringRef schema)
: PluginInstance<TraceCreateInstanceForSessionFile>(
- name, std::move(description), create_callback_for_session_file),
+ name, description, create_callback_for_session_file),
schema(schema),
create_callback_for_live_process(create_callback_for_live_process) {}
@@ -1034,7 +1054,7 @@ static TraceInstances &GetTracePluginInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
TraceCreateInstanceForSessionFile create_callback_for_session_file,
TraceCreateInstanceForLiveProcess create_callback_for_live_process,
llvm::StringRef schema) {
@@ -1050,19 +1070,19 @@ bool PluginManager::UnregisterPlugin(
}
TraceCreateInstanceForSessionFile
-PluginManager::GetTraceCreateCallback(ConstString plugin_name) {
+PluginManager::GetTraceCreateCallback(llvm::StringRef plugin_name) {
return GetTracePluginInstances().GetCallbackForName(plugin_name);
}
TraceCreateInstanceForLiveProcess
-PluginManager::GetTraceCreateCallbackForLiveProcess(ConstString plugin_name) {
+PluginManager::GetTraceCreateCallbackForLiveProcess(llvm::StringRef plugin_name) {
for (const TraceInstance &instance : GetTracePluginInstances().GetInstances())
if (instance.name == plugin_name)
return instance.create_callback_for_live_process;
return nullptr;
}
-llvm::StringRef PluginManager::GetTraceSchema(ConstString plugin_name) {
+llvm::StringRef PluginManager::GetTraceSchema(llvm::StringRef plugin_name) {
for (const TraceInstance &instance : GetTracePluginInstances().GetInstances())
if (instance.name == plugin_name)
return instance.schema;
@@ -1081,11 +1101,11 @@ llvm::StringRef PluginManager::GetTraceSchema(size_t index) {
struct TraceExporterInstance
: public PluginInstance<TraceExporterCreateInstance> {
TraceExporterInstance(
- ConstString name, std::string description,
+ llvm::StringRef name, llvm::StringRef description,
TraceExporterCreateInstance create_instance,
ThreadTraceExportCommandCreator create_thread_trace_export_command)
- : PluginInstance<TraceExporterCreateInstance>(
- name, std::move(description), create_instance),
+ : PluginInstance<TraceExporterCreateInstance>(name, description,
+ create_instance),
create_thread_trace_export_command(create_thread_trace_export_command) {
}
@@ -1100,7 +1120,7 @@ static TraceExporterInstances &GetTraceExporterInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
TraceExporterCreateInstance create_callback,
ThreadTraceExportCommandCreator create_thread_trace_export_command) {
return GetTraceExporterInstances().RegisterPlugin(
@@ -1108,7 +1128,7 @@ bool PluginManager::RegisterPlugin(
}
TraceExporterCreateInstance
-PluginManager::GetTraceExporterCreateCallback(ConstString plugin_name) {
+PluginManager::GetTraceExporterCreateCallback(llvm::StringRef plugin_name) {
return GetTraceExporterInstances().GetCallbackForName(plugin_name);
}
@@ -1125,7 +1145,8 @@ PluginManager::GetThreadTraceExportCommandCreatorAtIndex(uint32_t index) {
return nullptr;
}
-const char *PluginManager::GetTraceExporterPluginNameAtIndex(uint32_t index) {
+llvm::StringRef
+PluginManager::GetTraceExporterPluginNameAtIndex(uint32_t index) {
return GetTraceExporterInstances().GetNameAtIndex(index);
}
@@ -1140,7 +1161,7 @@ static UnwindAssemblyInstances &GetUnwindAssemblyInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
UnwindAssemblyCreateInstance create_callback) {
return GetUnwindAssemblyInstances().RegisterPlugin(name, description,
create_callback);
@@ -1167,7 +1188,7 @@ static MemoryHistoryInstances &GetMemoryHistoryInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
MemoryHistoryCreateInstance create_callback) {
return GetMemoryHistoryInstances().RegisterPlugin(name, description,
create_callback);
@@ -1188,10 +1209,11 @@ PluginManager::GetMemoryHistoryCreateCallbackAtIndex(uint32_t idx) {
struct InstrumentationRuntimeInstance
: public PluginInstance<InstrumentationRuntimeCreateInstance> {
InstrumentationRuntimeInstance(
- ConstString name, std::string description, CallbackType create_callback,
+ llvm::StringRef name, llvm::StringRef description,
+ CallbackType create_callback,
InstrumentationRuntimeGetType get_type_callback)
- : PluginInstance<InstrumentationRuntimeCreateInstance>(
- name, std::move(description), create_callback),
+ : PluginInstance<InstrumentationRuntimeCreateInstance>(name, description,
+ create_callback),
get_type_callback(get_type_callback) {}
InstrumentationRuntimeGetType get_type_callback = nullptr;
@@ -1206,7 +1228,7 @@ static InstrumentationRuntimeInstances &GetInstrumentationRuntimeInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
InstrumentationRuntimeCreateInstance create_callback,
InstrumentationRuntimeGetType get_type_callback) {
return GetInstrumentationRuntimeInstances().RegisterPlugin(
@@ -1234,11 +1256,11 @@ PluginManager::GetInstrumentationRuntimeCreateCallbackAtIndex(uint32_t idx) {
#pragma mark TypeSystem
struct TypeSystemInstance : public PluginInstance<TypeSystemCreateInstance> {
- TypeSystemInstance(ConstString name, std::string description,
+ TypeSystemInstance(llvm::StringRef name, llvm::StringRef description,
CallbackType create_callback,
LanguageSet supported_languages_for_types,
LanguageSet supported_languages_for_expressions)
- : PluginInstance<TypeSystemCreateInstance>(name, std::move(description),
+ : PluginInstance<TypeSystemCreateInstance>(name, description,
create_callback),
supported_languages_for_types(supported_languages_for_types),
supported_languages_for_expressions(
@@ -1256,7 +1278,7 @@ static TypeSystemInstances &GetTypeSystemInstances() {
}
bool PluginManager::RegisterPlugin(
- ConstString name, const char *description,
+ llvm::StringRef name, llvm::StringRef description,
TypeSystemCreateInstance create_callback,
LanguageSet supported_languages_for_types,
LanguageSet supported_languages_for_expressions) {
@@ -1293,10 +1315,9 @@ LanguageSet PluginManager::GetAllTypeSystemSupportedLanguagesForExpressions() {
#pragma mark REPL
struct REPLInstance : public PluginInstance<REPLCreateInstance> {
- REPLInstance(ConstString name, std::string description,
+ REPLInstance(llvm::StringRef name, llvm::StringRef description,
CallbackType create_callback, LanguageSet supported_languages)
- : PluginInstance<REPLCreateInstance>(name, std::move(description),
- create_callback),
+ : PluginInstance<REPLCreateInstance>(name, description, create_callback),
supported_languages(supported_languages) {}
LanguageSet supported_languages;
@@ -1309,7 +1330,7 @@ static REPLInstances &GetREPLInstances() {
return g_instances;
}
-bool PluginManager::RegisterPlugin(ConstString name, const char *description,
+bool PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description,
REPLCreateInstance create_callback,
LanguageSet supported_languages) {
return GetREPLInstances().RegisterPlugin(name, description, create_callback,
@@ -1421,8 +1442,9 @@ namespace {
typedef lldb::OptionValuePropertiesSP
GetDebuggerPropertyForPluginsPtr(Debugger &, ConstString, ConstString,
bool can_create);
+}
-lldb::OptionValuePropertiesSP
+static lldb::OptionValuePropertiesSP
GetSettingForPlugin(Debugger &debugger, ConstString setting_name,
ConstString plugin_type_name,
GetDebuggerPropertyForPluginsPtr get_debugger_property =
@@ -1438,13 +1460,13 @@ GetSettingForPlugin(Debugger &debugger, ConstString setting_name,
return properties_sp;
}
-bool CreateSettingForPlugin(
- Debugger &debugger, ConstString plugin_type_name,
- ConstString plugin_type_desc,
- const lldb::OptionValuePropertiesSP &properties_sp, ConstString description,
- bool is_global_property,
- GetDebuggerPropertyForPluginsPtr get_debugger_property =
- GetDebuggerPropertyForPlugins) {
+static bool
+CreateSettingForPlugin(Debugger &debugger, ConstString plugin_type_name,
+ ConstString plugin_type_desc,
+ const lldb::OptionValuePropertiesSP &properties_sp,
+ ConstString description, bool is_global_property,
+ GetDebuggerPropertyForPluginsPtr get_debugger_property =
+ GetDebuggerPropertyForPlugins) {
if (properties_sp) {
lldb::OptionValuePropertiesSP plugin_type_properties_sp(
get_debugger_property(debugger, plugin_type_name, plugin_type_desc,
@@ -1459,14 +1481,12 @@ bool CreateSettingForPlugin(
return false;
}
-const char *kDynamicLoaderPluginName("dynamic-loader");
-const char *kPlatformPluginName("platform");
-const char *kProcessPluginName("process");
-const char *kSymbolFilePluginName("symbol-file");
-const char *kJITLoaderPluginName("jit-loader");
-const char *kStructuredDataPluginName("structured-data");
-
-} // anonymous namespace
+static const char *kDynamicLoaderPluginName("dynamic-loader");
+static const char *kPlatformPluginName("platform");
+static const char *kProcessPluginName("process");
+static const char *kSymbolFilePluginName("symbol-file");
+static const char *kJITLoaderPluginName("jit-loader");
+static const char *kStructuredDataPluginName("structured-data");
lldb::OptionValuePropertiesSP
PluginManager::GetSettingForDynamicLoaderPlugin(Debugger &debugger,
diff --git a/lldb/source/Core/RichManglingContext.cpp b/lldb/source/Core/RichManglingContext.cpp
index 2dcb1407e6c7..63170feb6231 100644
--- a/lldb/source/Core/RichManglingContext.cpp
+++ b/lldb/source/Core/RichManglingContext.cpp
@@ -83,19 +83,6 @@ bool RichManglingContext::IsCtorOrDtor() const {
llvm_unreachable("Fully covered switch above!");
}
-bool RichManglingContext::IsFunction() const {
- assert(m_provider != None && "Initialize a provider first");
- switch (m_provider) {
- case ItaniumPartialDemangler:
- return m_ipd.isFunction();
- case PluginCxxLanguage:
- return get<CPlusPlusLanguage::MethodName>(m_cxx_method_parser)->IsValid();
- case None:
- return false;
- }
- llvm_unreachable("Fully covered switch above!");
-}
-
void RichManglingContext::processIPDStrResult(char *ipd_res, size_t res_size) {
// Error case: Clear the buffer.
if (LLVM_UNLIKELY(ipd_res == nullptr)) {
diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp
index a5a10141aa64..1660e3c92f2c 100644
--- a/lldb/source/Core/Section.cpp
+++ b/lldb/source/Core/Section.cpp
@@ -396,6 +396,76 @@ lldb::offset_t Section::GetSectionData(DataExtractor &section_data) {
return 0;
}
+bool Section::ContainsOnlyDebugInfo() const {
+ switch (m_type) {
+ case eSectionTypeInvalid:
+ case eSectionTypeCode:
+ case eSectionTypeContainer:
+ case eSectionTypeData:
+ case eSectionTypeDataCString:
+ case eSectionTypeDataCStringPointers:
+ case eSectionTypeDataSymbolAddress:
+ case eSectionTypeData4:
+ case eSectionTypeData8:
+ case eSectionTypeData16:
+ case eSectionTypeDataPointers:
+ case eSectionTypeZeroFill:
+ case eSectionTypeDataObjCMessageRefs:
+ case eSectionTypeDataObjCCFStrings:
+ case eSectionTypeELFSymbolTable:
+ case eSectionTypeELFDynamicSymbols:
+ case eSectionTypeELFRelocationEntries:
+ case eSectionTypeELFDynamicLinkInfo:
+ case eSectionTypeEHFrame:
+ case eSectionTypeARMexidx:
+ case eSectionTypeARMextab:
+ case eSectionTypeCompactUnwind:
+ case eSectionTypeGoSymtab:
+ case eSectionTypeAbsoluteAddress:
+ case eSectionTypeOther:
+ return false;
+
+ case eSectionTypeDebug:
+ case eSectionTypeDWARFDebugAbbrev:
+ case eSectionTypeDWARFDebugAbbrevDwo:
+ case eSectionTypeDWARFDebugAddr:
+ case eSectionTypeDWARFDebugAranges:
+ case eSectionTypeDWARFDebugCuIndex:
+ case eSectionTypeDWARFDebugTuIndex:
+ case eSectionTypeDWARFDebugFrame:
+ case eSectionTypeDWARFDebugInfo:
+ case eSectionTypeDWARFDebugInfoDwo:
+ case eSectionTypeDWARFDebugLine:
+ case eSectionTypeDWARFDebugLineStr:
+ case eSectionTypeDWARFDebugLoc:
+ case eSectionTypeDWARFDebugLocDwo:
+ case eSectionTypeDWARFDebugLocLists:
+ case eSectionTypeDWARFDebugLocListsDwo:
+ case eSectionTypeDWARFDebugMacInfo:
+ case eSectionTypeDWARFDebugMacro:
+ case eSectionTypeDWARFDebugPubNames:
+ case eSectionTypeDWARFDebugPubTypes:
+ case eSectionTypeDWARFDebugRanges:
+ case eSectionTypeDWARFDebugRngLists:
+ case eSectionTypeDWARFDebugRngListsDwo:
+ case eSectionTypeDWARFDebugStr:
+ case eSectionTypeDWARFDebugStrDwo:
+ case eSectionTypeDWARFDebugStrOffsets:
+ case eSectionTypeDWARFDebugStrOffsetsDwo:
+ case eSectionTypeDWARFDebugTypes:
+ case eSectionTypeDWARFDebugTypesDwo:
+ case eSectionTypeDWARFDebugNames:
+ case eSectionTypeDWARFAppleNames:
+ case eSectionTypeDWARFAppleTypes:
+ case eSectionTypeDWARFAppleNamespaces:
+ case eSectionTypeDWARFAppleObjC:
+ case eSectionTypeDWARFGNUDebugAltLink:
+ return true;
+ }
+ return false;
+}
+
+
#pragma mark SectionList
SectionList &SectionList::operator=(const SectionList &rhs) {
@@ -599,3 +669,15 @@ size_t SectionList::Slide(addr_t slide_amount, bool slide_children) {
}
return count;
}
+
+uint64_t SectionList::GetDebugInfoSize() const {
+ uint64_t debug_info_size = 0;
+ for (const auto &section : m_sections) {
+ const SectionList &sub_sections = section->GetChildren();
+ if (sub_sections.GetSize() > 0)
+ debug_info_size += sub_sections.GetDebugInfoSize();
+ else if (section->ContainsOnlyDebugInfo())
+ debug_info_size += section->GetFileSize();
+ }
+ return debug_info_size;
+}
diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp
index 9c1112979c54..effba485f026 100644
--- a/lldb/source/Core/SourceManager.cpp
+++ b/lldb/source/Core/SourceManager.cpp
@@ -339,11 +339,14 @@ bool SourceManager::GetDefaultFileAndLine(FileSpec &file_spec, uint32_t &line) {
if (executable_ptr) {
SymbolContextList sc_list;
ConstString main_name("main");
- bool symbols_okay = false; // Force it to be a debug symbol.
- bool inlines_okay = true;
+
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols =
+ false; // Force it to be a debug symbol.
+ function_options.include_inlines = true;
executable_ptr->FindFunctions(main_name, CompilerDeclContext(),
- lldb::eFunctionNameTypeBase, inlines_okay,
- symbols_okay, sc_list);
+ lldb::eFunctionNameTypeBase,
+ function_options, sc_list);
size_t num_matches = sc_list.GetSize();
for (size_t idx = 0; idx < num_matches; idx++) {
SymbolContext sc;
diff --git a/lldb/source/Core/StreamFile.cpp b/lldb/source/Core/StreamFile.cpp
index 2f922fe11440..7753397ae0f1 100644
--- a/lldb/source/Core/StreamFile.cpp
+++ b/lldb/source/Core/StreamFile.cpp
@@ -21,8 +21,8 @@ StreamFile::StreamFile(uint32_t flags, uint32_t addr_size, ByteOrder byte_order)
}
StreamFile::StreamFile(int fd, bool transfer_ownership) : Stream() {
- m_file_sp =
- std::make_shared<NativeFile>(fd, File::eOpenOptionWrite, transfer_ownership);
+ m_file_sp = std::make_shared<NativeFile>(fd, File::eOpenOptionWriteOnly,
+ transfer_ownership);
}
StreamFile::StreamFile(FILE *fh, bool transfer_ownership) : Stream() {
diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
index 9c1ba99da1d0..6794d0c7331d 100644
--- a/lldb/source/Core/ValueObject.cpp
+++ b/lldb/source/Core/ValueObject.cpp
@@ -849,8 +849,10 @@ bool ValueObject::SetData(DataExtractor &data, Status &error) {
static bool CopyStringDataToBufferSP(const StreamString &source,
lldb::DataBufferSP &destination) {
- destination = std::make_shared<DataBufferHeap>(source.GetSize() + 1, 0);
- memcpy(destination->GetBytes(), source.GetString().data(), source.GetSize());
+ llvm::StringRef src = source.GetString();
+ src.consume_back(llvm::StringRef("\0", 1));
+ destination = std::make_shared<DataBufferHeap>(src.size(), 0);
+ memcpy(destination->GetBytes(), src.data(), src.size());
return true;
}
@@ -912,8 +914,8 @@ ValueObject::ReadPointedString(lldb::DataBufferSP &buffer_sp, Status &error,
CopyStringDataToBufferSP(s, buffer_sp);
return {0, was_capped};
}
- buffer_sp = std::make_shared<DataBufferHeap>(cstr_len, 0);
- memcpy(buffer_sp->GetBytes(), cstr, cstr_len);
+ s << llvm::StringRef(cstr, cstr_len);
+ CopyStringDataToBufferSP(s, buffer_sp);
return {cstr_len, was_capped};
} else {
s << "<invalid address>";
@@ -1196,6 +1198,7 @@ bool ValueObject::DumpPrintableRepresentation(
options.SetQuote('"');
options.SetSourceSize(buffer_sp->GetByteSize());
options.SetIsTruncated(read_string.second);
+ options.SetBinaryZeroIsTerminator(custom_format != eFormatVectorOfChar);
formatters::StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::ASCII>(
options);
diff --git a/lldb/source/Core/ValueObjectConstResultImpl.cpp b/lldb/source/Core/ValueObjectConstResultImpl.cpp
index 980cea049f6f..fee1da138bbc 100644
--- a/lldb/source/Core/ValueObjectConstResultImpl.cpp
+++ b/lldb/source/Core/ValueObjectConstResultImpl.cpp
@@ -34,7 +34,7 @@ using namespace lldb_private;
ValueObjectConstResultImpl::ValueObjectConstResultImpl(
ValueObject *valobj, lldb::addr_t live_address)
: m_impl_backend(valobj), m_live_address(live_address),
- m_live_address_type(eAddressTypeLoad), m_load_addr_backend(),
+ m_live_address_type(eAddressTypeLoad),
m_address_of_backend() {}
lldb::ValueObjectSP ValueObjectConstResultImpl::Dereference(Status &error) {
diff --git a/lldb/source/Core/ValueObjectDynamicValue.cpp b/lldb/source/Core/ValueObjectDynamicValue.cpp
index d77509496509..bf087f33c0e9 100644
--- a/lldb/source/Core/ValueObjectDynamicValue.cpp
+++ b/lldb/source/Core/ValueObjectDynamicValue.cpp
@@ -36,10 +36,6 @@ ValueObjectDynamicValue::ValueObjectDynamicValue(
SetName(parent.GetName());
}
-ValueObjectDynamicValue::~ValueObjectDynamicValue() {
- m_owning_valobj_sp.reset();
-}
-
CompilerType ValueObjectDynamicValue::GetCompilerTypeImpl() {
const bool success = UpdateValueIfNeeded(false);
if (success) {
diff --git a/lldb/source/Core/ValueObjectRegister.cpp b/lldb/source/Core/ValueObjectRegister.cpp
index 089fd7667080..743083a2d1ed 100644
--- a/lldb/source/Core/ValueObjectRegister.cpp
+++ b/lldb/source/Core/ValueObjectRegister.cpp
@@ -118,8 +118,9 @@ ValueObject *ValueObjectRegisterSet::CreateChildAtIndex(
if (m_reg_ctx_sp && m_reg_set) {
const size_t num_children = GetNumChildren();
if (idx < num_children)
- valobj = new ValueObjectRegister(*this, m_reg_ctx_sp,
- m_reg_set->registers[idx]);
+ valobj = new ValueObjectRegister(
+ *this, m_reg_ctx_sp,
+ m_reg_ctx_sp->GetRegisterInfoAtIndex(m_reg_set->registers[idx]));
}
return valobj;
}
@@ -132,8 +133,7 @@ ValueObjectRegisterSet::GetChildMemberWithName(ConstString name,
const RegisterInfo *reg_info =
m_reg_ctx_sp->GetRegisterInfoByName(name.GetStringRef());
if (reg_info != nullptr)
- valobj = new ValueObjectRegister(*this, m_reg_ctx_sp,
- reg_info->kinds[eRegisterKindLLDB]);
+ valobj = new ValueObjectRegister(*this, m_reg_ctx_sp, reg_info);
}
if (valobj)
return valobj->GetSP();
@@ -155,8 +155,7 @@ ValueObjectRegisterSet::GetIndexOfChildWithName(ConstString name) {
#pragma mark -
#pragma mark ValueObjectRegister
-void ValueObjectRegister::ConstructObject(uint32_t reg_num) {
- const RegisterInfo *reg_info = m_reg_ctx_sp->GetRegisterInfoAtIndex(reg_num);
+void ValueObjectRegister::ConstructObject(const RegisterInfo *reg_info) {
if (reg_info) {
m_reg_info = *reg_info;
if (reg_info->name)
@@ -168,29 +167,29 @@ void ValueObjectRegister::ConstructObject(uint32_t reg_num) {
ValueObjectRegister::ValueObjectRegister(ValueObject &parent,
lldb::RegisterContextSP &reg_ctx_sp,
- uint32_t reg_num)
+ const RegisterInfo *reg_info)
: ValueObject(parent), m_reg_ctx_sp(reg_ctx_sp), m_reg_info(),
m_reg_value(), m_type_name(), m_compiler_type() {
assert(reg_ctx_sp.get());
- ConstructObject(reg_num);
+ ConstructObject(reg_info);
}
ValueObjectSP ValueObjectRegister::Create(ExecutionContextScope *exe_scope,
lldb::RegisterContextSP &reg_ctx_sp,
- uint32_t reg_num) {
+ const RegisterInfo *reg_info) {
auto manager_sp = ValueObjectManager::Create();
- return (new ValueObjectRegister(exe_scope, *manager_sp, reg_ctx_sp, reg_num))
+ return (new ValueObjectRegister(exe_scope, *manager_sp, reg_ctx_sp, reg_info))
->GetSP();
}
ValueObjectRegister::ValueObjectRegister(ExecutionContextScope *exe_scope,
ValueObjectManager &manager,
lldb::RegisterContextSP &reg_ctx,
- uint32_t reg_num)
+ const RegisterInfo *reg_info)
: ValueObject(exe_scope, manager), m_reg_ctx_sp(reg_ctx), m_reg_info(),
m_reg_value(), m_type_name(), m_compiler_type() {
assert(reg_ctx);
- ConstructObject(reg_num);
+ ConstructObject(reg_info);
}
ValueObjectRegister::~ValueObjectRegister() = default;
diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp
index 6c824d1f7728..cda1ae60d857 100644
--- a/lldb/source/DataFormatters/FormatManager.cpp
+++ b/lldb/source/DataFormatters/FormatManager.cpp
@@ -722,9 +722,9 @@ void FormatManager::LoadSystemFormatters() {
new StringSummaryFormat(string_flags, "${var%s}"));
lldb::TypeSummaryImplSP string_array_format(
- new StringSummaryFormat(string_array_flags, "${var%s}"));
+ new StringSummaryFormat(string_array_flags, "${var%char[]}"));
- RegularExpression any_size_char_arr(llvm::StringRef("char \\[[0-9]+\\]"));
+ RegularExpression any_size_char_arr(llvm::StringRef("char ?\\[[0-9]+\\]"));
TypeCategoryImpl::SharedPointer sys_category_sp =
GetCategory(m_system_category_name);
@@ -773,12 +773,11 @@ void FormatManager::LoadVectorFormatters() {
AddStringSummary(vectors_category_sp, "${var.uint128}",
ConstString("builtin_type_vec128"), vector_flags);
-
- AddStringSummary(vectors_category_sp, "", ConstString("float [4]"),
+ AddStringSummary(vectors_category_sp, "", ConstString("float[4]"),
vector_flags);
- AddStringSummary(vectors_category_sp, "", ConstString("int32_t [4]"),
+ AddStringSummary(vectors_category_sp, "", ConstString("int32_t[4]"),
vector_flags);
- AddStringSummary(vectors_category_sp, "", ConstString("int16_t [8]"),
+ AddStringSummary(vectors_category_sp, "", ConstString("int16_t[8]"),
vector_flags);
AddStringSummary(vectors_category_sp, "", ConstString("vDouble"),
vector_flags);
diff --git a/lldb/source/DataFormatters/FormattersHelpers.cpp b/lldb/source/DataFormatters/FormattersHelpers.cpp
index 7944ff06eee5..b9c6b017e293 100644
--- a/lldb/source/DataFormatters/FormattersHelpers.cpp
+++ b/lldb/source/DataFormatters/FormattersHelpers.cpp
@@ -10,7 +10,7 @@
#include "lldb/DataFormatters/FormattersHelpers.h"
-
+#include "lldb/Core/Module.h"
#include "lldb/Target/StackFrame.h"
#include "lldb/Target/Target.h"
#include "lldb/Target/Thread.h"
@@ -131,14 +131,17 @@ size_t lldb_private::formatters::ExtractIndexFromString(const char *item_name) {
return idx;
}
-lldb::addr_t
+Address
lldb_private::formatters::GetArrayAddressOrPointerValue(ValueObject &valobj) {
lldb::addr_t data_addr = LLDB_INVALID_ADDRESS;
+ AddressType type;
if (valobj.IsPointerType())
- data_addr = valobj.GetValueAsUnsigned(0);
+ data_addr = valobj.GetPointerValue(&type);
else if (valobj.IsArrayType())
- data_addr = valobj.GetAddressOf();
+ data_addr = valobj.GetAddressOf(/*scalar_is_load_address=*/true, &type);
+ if (data_addr != LLDB_INVALID_ADDRESS && type == eAddressTypeFile)
+ return Address(data_addr, valobj.GetModule()->GetSectionList());
return data_addr;
}
diff --git a/lldb/source/DataFormatters/StringPrinter.cpp b/lldb/source/DataFormatters/StringPrinter.cpp
index 0c6438f7dd86..ec8664ebe17a 100644
--- a/lldb/source/DataFormatters/StringPrinter.cpp
+++ b/lldb/source/DataFormatters/StringPrinter.cpp
@@ -408,8 +408,8 @@ static bool ReadEncodedBufferAndDumpToStream(
options.GetLocation() == LLDB_INVALID_ADDRESS)
return false;
- lldb::ProcessSP process_sp(options.GetProcessSP());
- if (!process_sp)
+ lldb::TargetSP target_sp = options.GetTargetSP();
+ if (!target_sp)
return false;
constexpr int type_width = sizeof(SourceDataType);
@@ -423,7 +423,7 @@ static bool ReadEncodedBufferAndDumpToStream(
bool needs_zero_terminator = options.GetNeedsZeroTermination();
bool is_truncated = false;
- const auto max_size = process_sp->GetTarget().GetMaximumSizeOfStringSummary();
+ const auto max_size = target_sp->GetMaximumSizeOfStringSummary();
uint32_t sourceSize;
if (elem_type == StringElementType::ASCII && !options.GetSourceSize()) {
@@ -462,24 +462,22 @@ static bool ReadEncodedBufferAndDumpToStream(
char *buffer = reinterpret_cast<char *>(buffer_sp->GetBytes());
if (elem_type == StringElementType::ASCII)
- process_sp->ReadCStringFromMemory(options.GetLocation(), buffer,
+ target_sp->ReadCStringFromMemory(options.GetLocation(), buffer,
bufferSPSize, error);
else if (needs_zero_terminator)
- process_sp->ReadStringFromMemory(options.GetLocation(), buffer,
+ target_sp->ReadStringFromMemory(options.GetLocation(), buffer,
bufferSPSize, error, type_width);
else
- process_sp->ReadMemoryFromInferior(options.GetLocation(), buffer,
- bufferSPSize, error);
+ target_sp->ReadMemory(options.GetLocation(), buffer, bufferSPSize, error);
if (error.Fail()) {
options.GetStream()->Printf("unable to read data");
return true;
}
- DataExtractor data(buffer_sp, process_sp->GetByteOrder(),
- process_sp->GetAddressByteSize());
-
StringPrinter::ReadBufferAndDumpToStreamOptions dump_options(options);
- dump_options.SetData(data);
+ dump_options.SetData(
+ DataExtractor(buffer_sp, target_sp->GetArchitecture().GetByteOrder(),
+ target_sp->GetArchitecture().GetAddressByteSize()));
dump_options.SetSourceSize(sourceSize);
dump_options.SetIsTruncated(is_truncated);
dump_options.SetNeedsZeroTermination(needs_zero_terminator);
diff --git a/lldb/source/Expression/FunctionCaller.cpp b/lldb/source/Expression/FunctionCaller.cpp
index 5f1eb24a905a..5f34675b4b64 100644
--- a/lldb/source/Expression/FunctionCaller.cpp
+++ b/lldb/source/Expression/FunctionCaller.cpp
@@ -254,7 +254,7 @@ lldb::ThreadPlanSP FunctionCaller::GetThreadPlanToCallFunction(
lldb::ThreadPlanSP new_plan_sp(new ThreadPlanCallFunction(
*thread, wrapper_address, CompilerType(), args, options));
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
return new_plan_sp;
}
diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp
index 63184ba477a6..f2d22f7ed9cc 100644
--- a/lldb/source/Expression/IRExecutionUnit.cpp
+++ b/lldb/source/Expression/IRExecutionUnit.cpp
@@ -26,6 +26,7 @@
#include "lldb/Symbol/SymbolFile.h"
#include "lldb/Symbol/SymbolVendor.h"
#include "lldb/Target/ExecutionContext.h"
+#include "lldb/Target/Language.h"
#include "lldb/Target/LanguageRuntime.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/DataBufferHeap.h"
@@ -33,7 +34,6 @@
#include "lldb/Utility/LLDBAssert.h"
#include "lldb/Utility/Log.h"
-#include "lldb/../../source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h"
#include "lldb/../../source/Plugins/ObjectFile/JIT/ObjectFileJIT.h"
using namespace lldb_private;
@@ -652,257 +652,168 @@ uint8_t *IRExecutionUnit::MemoryManager::allocateDataSection(
return return_value;
}
-static ConstString FindBestAlternateMangledName(ConstString demangled,
- const SymbolContext &sym_ctx) {
- CPlusPlusLanguage::MethodName cpp_name(demangled);
- std::string scope_qualified_name = cpp_name.GetScopeQualifiedName();
-
- if (!scope_qualified_name.size())
- return ConstString();
-
- if (!sym_ctx.module_sp)
- return ConstString();
-
- lldb_private::SymbolFile *sym_file = sym_ctx.module_sp->GetSymbolFile();
- if (!sym_file)
- return ConstString();
-
- std::vector<ConstString> alternates;
- sym_file->GetMangledNamesForFunction(scope_qualified_name, alternates);
+void IRExecutionUnit::CollectCandidateCNames(std::vector<ConstString> &C_names,
+ ConstString name) {
+ if (m_strip_underscore && name.AsCString()[0] == '_')
+ C_names.insert(C_names.begin(), ConstString(&name.AsCString()[1]));
+ C_names.push_back(name);
+}
- std::vector<ConstString> param_and_qual_matches;
- std::vector<ConstString> param_matches;
- for (size_t i = 0; i < alternates.size(); i++) {
- ConstString alternate_mangled_name = alternates[i];
- Mangled mangled(alternate_mangled_name);
- ConstString demangled = mangled.GetDemangledName();
+void IRExecutionUnit::CollectCandidateCPlusPlusNames(
+ std::vector<ConstString> &CPP_names,
+ const std::vector<ConstString> &C_names, const SymbolContext &sc) {
+ if (auto *cpp_lang = Language::FindPlugin(lldb::eLanguageTypeC_plus_plus)) {
+ for (const ConstString &name : C_names) {
+ Mangled mangled(name);
+ if (cpp_lang->SymbolNameFitsToLanguage(mangled)) {
+ if (ConstString best_alternate =
+ cpp_lang->FindBestAlternateFunctionMangledName(mangled, sc)) {
+ CPP_names.push_back(best_alternate);
+ }
+ }
- CPlusPlusLanguage::MethodName alternate_cpp_name(demangled);
- if (!cpp_name.IsValid())
- continue;
+ std::vector<ConstString> alternates =
+ cpp_lang->GenerateAlternateFunctionManglings(name);
+ CPP_names.insert(CPP_names.end(), alternates.begin(), alternates.end());
- if (alternate_cpp_name.GetArguments() == cpp_name.GetArguments()) {
- if (alternate_cpp_name.GetQualifiers() == cpp_name.GetQualifiers())
- param_and_qual_matches.push_back(alternate_mangled_name);
- else
- param_matches.push_back(alternate_mangled_name);
+ // As a last-ditch fallback, try the base name for C++ names. It's
+ // terrible, but the DWARF doesn't always encode "extern C" correctly.
+ ConstString basename =
+ cpp_lang->GetDemangledFunctionNameWithoutArguments(mangled);
+ CPP_names.push_back(basename);
}
}
-
- if (param_and_qual_matches.size())
- return param_and_qual_matches[0]; // It is assumed that there will be only
- // one!
- else if (param_matches.size())
- return param_matches[0]; // Return one of them as a best match
- else
- return ConstString();
}
-struct IRExecutionUnit::SearchSpec {
- ConstString name;
- lldb::FunctionNameType mask;
-
- SearchSpec(ConstString n,
- lldb::FunctionNameType m = lldb::eFunctionNameTypeFull)
- : name(n), mask(m) {}
-};
+class LoadAddressResolver {
+public:
+ LoadAddressResolver(Target *target, bool &symbol_was_missing_weak)
+ : m_target(target), m_symbol_was_missing_weak(symbol_was_missing_weak) {}
-void IRExecutionUnit::CollectCandidateCNames(
- std::vector<IRExecutionUnit::SearchSpec> &C_specs,
- ConstString name) {
- if (m_strip_underscore && name.AsCString()[0] == '_')
- C_specs.insert(C_specs.begin(), ConstString(&name.AsCString()[1]));
- C_specs.push_back(SearchSpec(name));
-}
-
-void IRExecutionUnit::CollectCandidateCPlusPlusNames(
- std::vector<IRExecutionUnit::SearchSpec> &CPP_specs,
- const std::vector<SearchSpec> &C_specs, const SymbolContext &sc) {
- for (const SearchSpec &C_spec : C_specs) {
- ConstString name = C_spec.name;
+ llvm::Optional<lldb::addr_t> Resolve(SymbolContextList &sc_list) {
+ if (sc_list.IsEmpty())
+ return llvm::None;
- if (CPlusPlusLanguage::IsCPPMangledName(name.GetCString())) {
- Mangled mangled(name);
- ConstString demangled = mangled.GetDemangledName();
-
- if (demangled) {
- ConstString best_alternate_mangled_name =
- FindBestAlternateMangledName(demangled, sc);
+ lldb::addr_t load_address = LLDB_INVALID_ADDRESS;
- if (best_alternate_mangled_name) {
- CPP_specs.push_back(best_alternate_mangled_name);
+ // Missing_weak_symbol will be true only if we found only weak undefined
+ // references to this symbol.
+ m_symbol_was_missing_weak = true;
+
+ for (auto candidate_sc : sc_list.SymbolContexts()) {
+ // Only symbols can be weak undefined.
+ if (!candidate_sc.symbol ||
+ candidate_sc.symbol->GetType() != lldb::eSymbolTypeUndefined ||
+ !candidate_sc.symbol->IsWeak())
+ m_symbol_was_missing_weak = false;
+
+ // First try the symbol.
+ if (candidate_sc.symbol) {
+ load_address = candidate_sc.symbol->ResolveCallableAddress(*m_target);
+ if (load_address == LLDB_INVALID_ADDRESS) {
+ Address addr = candidate_sc.symbol->GetAddress();
+ load_address = m_target->GetProcessSP()
+ ? addr.GetLoadAddress(m_target)
+ : addr.GetFileAddress();
}
}
- }
- std::set<ConstString> alternates;
- CPlusPlusLanguage::FindAlternateFunctionManglings(name, alternates);
- CPP_specs.insert(CPP_specs.end(), alternates.begin(), alternates.end());
- }
-}
-
-void IRExecutionUnit::CollectFallbackNames(
- std::vector<SearchSpec> &fallback_specs,
- const std::vector<SearchSpec> &C_specs) {
- // As a last-ditch fallback, try the base name for C++ names. It's terrible,
- // but the DWARF doesn't always encode "extern C" correctly.
+ // If that didn't work, try the function.
+ if (load_address == LLDB_INVALID_ADDRESS && candidate_sc.function) {
+ Address addr =
+ candidate_sc.function->GetAddressRange().GetBaseAddress();
+ load_address = m_target->GetProcessSP() ? addr.GetLoadAddress(m_target)
+ : addr.GetFileAddress();
+ }
- for (const SearchSpec &C_spec : C_specs) {
- ConstString name = C_spec.name;
+ // We found a load address.
+ if (load_address != LLDB_INVALID_ADDRESS) {
+ // If the load address is external, we're done.
+ const bool is_external =
+ (candidate_sc.function) ||
+ (candidate_sc.symbol && candidate_sc.symbol->IsExternal());
+ if (is_external)
+ return load_address;
- if (!CPlusPlusLanguage::IsCPPMangledName(name.GetCString()))
- continue;
+ // Otherwise, remember the best internal load address.
+ if (m_best_internal_load_address == LLDB_INVALID_ADDRESS)
+ m_best_internal_load_address = load_address;
+ }
+ }
- Mangled mangled_name(name);
- ConstString demangled_name = mangled_name.GetDemangledName();
- if (demangled_name.IsEmpty())
- continue;
+ // You test the address of a weak symbol against NULL to see if it is
+ // present. So we should return 0 for a missing weak symbol.
+ if (m_symbol_was_missing_weak)
+ return 0;
- const char *demangled_cstr = demangled_name.AsCString();
- const char *lparen_loc = strchr(demangled_cstr, '(');
- if (!lparen_loc)
- continue;
+ return llvm::None;
+ }
- llvm::StringRef base_name(demangled_cstr,
- lparen_loc - demangled_cstr);
- fallback_specs.push_back(ConstString(base_name));
+ lldb::addr_t GetBestInternalLoadAddress() const {
+ return m_best_internal_load_address;
}
-}
-lldb::addr_t IRExecutionUnit::FindInSymbols(
- const std::vector<IRExecutionUnit::SearchSpec> &specs,
- const lldb_private::SymbolContext &sc,
- bool &symbol_was_missing_weak) {
+private:
+ Target *m_target;
+ bool &m_symbol_was_missing_weak;
+ lldb::addr_t m_best_internal_load_address = LLDB_INVALID_ADDRESS;
+};
+
+lldb::addr_t
+IRExecutionUnit::FindInSymbols(const std::vector<ConstString> &names,
+ const lldb_private::SymbolContext &sc,
+ bool &symbol_was_missing_weak) {
symbol_was_missing_weak = false;
- Target *target = sc.target_sp.get();
+ Target *target = sc.target_sp.get();
if (!target) {
- // we shouldn't be doing any symbol lookup at all without a target
+ // We shouldn't be doing any symbol lookup at all without a target.
return LLDB_INVALID_ADDRESS;
}
- for (const SearchSpec &spec : specs) {
- SymbolContextList sc_list;
-
- lldb::addr_t best_internal_load_address = LLDB_INVALID_ADDRESS;
-
- std::function<bool(lldb::addr_t &, SymbolContextList &,
- const lldb_private::SymbolContext &)>
- get_external_load_address = [&best_internal_load_address, target,
- &symbol_was_missing_weak](
- lldb::addr_t &load_address, SymbolContextList &sc_list,
- const lldb_private::SymbolContext &sc) -> lldb::addr_t {
- load_address = LLDB_INVALID_ADDRESS;
-
- if (sc_list.GetSize() == 0)
- return false;
-
- // missing_weak_symbol will be true only if we found only weak undefined
- // references to this symbol.
- symbol_was_missing_weak = true;
- for (auto candidate_sc : sc_list.SymbolContexts()) {
- // Only symbols can be weak undefined:
- if (!candidate_sc.symbol)
- symbol_was_missing_weak = false;
- else if (candidate_sc.symbol->GetType() != lldb::eSymbolTypeUndefined
- || !candidate_sc.symbol->IsWeak())
- symbol_was_missing_weak = false;
-
- const bool is_external =
- (candidate_sc.function) ||
- (candidate_sc.symbol && candidate_sc.symbol->IsExternal());
- if (candidate_sc.symbol) {
- load_address = candidate_sc.symbol->ResolveCallableAddress(*target);
-
- if (load_address == LLDB_INVALID_ADDRESS) {
- if (target->GetProcessSP())
- load_address =
- candidate_sc.symbol->GetAddress().GetLoadAddress(target);
- else
- load_address = candidate_sc.symbol->GetAddress().GetFileAddress();
- }
- }
-
- if (load_address == LLDB_INVALID_ADDRESS && candidate_sc.function) {
- if (target->GetProcessSP())
- load_address = candidate_sc.function->GetAddressRange()
- .GetBaseAddress()
- .GetLoadAddress(target);
- else
- load_address = candidate_sc.function->GetAddressRange()
- .GetBaseAddress()
- .GetFileAddress();
- }
+ LoadAddressResolver resolver(target, symbol_was_missing_weak);
- if (load_address != LLDB_INVALID_ADDRESS) {
- if (is_external) {
- return true;
- } else if (best_internal_load_address == LLDB_INVALID_ADDRESS) {
- best_internal_load_address = load_address;
- load_address = LLDB_INVALID_ADDRESS;
- }
- }
- }
-
- // You test the address of a weak symbol against NULL to see if it is
- // present. So we should return 0 for a missing weak symbol.
- if (symbol_was_missing_weak) {
- load_address = 0;
- return true;
- }
-
- return false;
- };
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = false;
+ for (const ConstString &name : names) {
if (sc.module_sp) {
- sc.module_sp->FindFunctions(spec.name, CompilerDeclContext(), spec.mask,
- true, // include_symbols
- false, // include_inlines
+ SymbolContextList sc_list;
+ sc.module_sp->FindFunctions(name, CompilerDeclContext(),
+ lldb::eFunctionNameTypeFull, function_options,
sc_list);
+ if (auto load_addr = resolver.Resolve(sc_list))
+ return *load_addr;
}
- lldb::addr_t load_address = LLDB_INVALID_ADDRESS;
-
- if (get_external_load_address(load_address, sc_list, sc)) {
- return load_address;
- } else {
- sc_list.Clear();
- }
-
- if (sc_list.GetSize() == 0 && sc.target_sp) {
- sc.target_sp->GetImages().FindFunctions(spec.name, spec.mask,
- true, // include_symbols
- false, // include_inlines
- sc_list);
+ if (sc.target_sp) {
+ SymbolContextList sc_list;
+ sc.target_sp->GetImages().FindFunctions(name, lldb::eFunctionNameTypeFull,
+ function_options, sc_list);
+ if (auto load_addr = resolver.Resolve(sc_list))
+ return *load_addr;
}
- if (get_external_load_address(load_address, sc_list, sc)) {
- return load_address;
- } else {
- sc_list.Clear();
- }
-
- if (sc_list.GetSize() == 0 && sc.target_sp) {
+ if (sc.target_sp) {
+ SymbolContextList sc_list;
sc.target_sp->GetImages().FindSymbolsWithNameAndType(
- spec.name, lldb::eSymbolTypeAny, sc_list);
- }
-
- if (get_external_load_address(load_address, sc_list, sc)) {
- return load_address;
+ name, lldb::eSymbolTypeAny, sc_list);
+ if (auto load_addr = resolver.Resolve(sc_list))
+ return *load_addr;
}
- // if there are any searches we try after this, add an sc_list.Clear() in
- // an "else" clause here
- if (best_internal_load_address != LLDB_INVALID_ADDRESS) {
+ lldb::addr_t best_internal_load_address =
+ resolver.GetBestInternalLoadAddress();
+ if (best_internal_load_address != LLDB_INVALID_ADDRESS)
return best_internal_load_address;
- }
}
return LLDB_INVALID_ADDRESS;
}
lldb::addr_t
-IRExecutionUnit::FindInRuntimes(const std::vector<SearchSpec> &specs,
+IRExecutionUnit::FindInRuntimes(const std::vector<ConstString> &names,
const lldb_private::SymbolContext &sc) {
lldb::TargetSP target_sp = sc.target_sp;
@@ -916,9 +827,9 @@ IRExecutionUnit::FindInRuntimes(const std::vector<SearchSpec> &specs,
return LLDB_INVALID_ADDRESS;
}
- for (const SearchSpec &spec : specs) {
+ for (const ConstString &name : names) {
for (LanguageRuntime *runtime : process_sp->GetLanguageRuntimes()) {
- lldb::addr_t symbol_load_addr = runtime->LookupRuntimeSymbol(spec.name);
+ lldb::addr_t symbol_load_addr = runtime->LookupRuntimeSymbol(name);
if (symbol_load_addr != LLDB_INVALID_ADDRESS)
return symbol_load_addr;
@@ -929,12 +840,12 @@ IRExecutionUnit::FindInRuntimes(const std::vector<SearchSpec> &specs,
}
lldb::addr_t IRExecutionUnit::FindInUserDefinedSymbols(
- const std::vector<SearchSpec> &specs,
+ const std::vector<ConstString> &names,
const lldb_private::SymbolContext &sc) {
lldb::TargetSP target_sp = sc.target_sp;
- for (const SearchSpec &spec : specs) {
- lldb::addr_t symbol_load_addr = target_sp->GetPersistentSymbol(spec.name);
+ for (const ConstString &name : names) {
+ lldb::addr_t symbol_load_addr = target_sp->GetPersistentSymbol(name);
if (symbol_load_addr != LLDB_INVALID_ADDRESS)
return symbol_load_addr;
@@ -943,18 +854,18 @@ lldb::addr_t IRExecutionUnit::FindInUserDefinedSymbols(
return LLDB_INVALID_ADDRESS;
}
-lldb::addr_t
-IRExecutionUnit::FindSymbol(lldb_private::ConstString name, bool &missing_weak) {
- std::vector<SearchSpec> candidate_C_names;
- std::vector<SearchSpec> candidate_CPlusPlus_names;
+lldb::addr_t IRExecutionUnit::FindSymbol(lldb_private::ConstString name,
+ bool &missing_weak) {
+ std::vector<ConstString> candidate_C_names;
+ std::vector<ConstString> candidate_CPlusPlus_names;
CollectCandidateCNames(candidate_C_names, name);
-
+
lldb::addr_t ret = FindInSymbols(candidate_C_names, m_sym_ctx, missing_weak);
if (ret != LLDB_INVALID_ADDRESS)
return ret;
-
- // If we find the symbol in runtimes or user defined symbols it can't be
+
+ // If we find the symbol in runtimes or user defined symbols it can't be
// a missing weak symbol.
missing_weak = false;
ret = FindInRuntimes(candidate_C_names, m_sym_ctx);
@@ -968,14 +879,6 @@ IRExecutionUnit::FindSymbol(lldb_private::ConstString name, bool &missing_weak)
CollectCandidateCPlusPlusNames(candidate_CPlusPlus_names, candidate_C_names,
m_sym_ctx);
ret = FindInSymbols(candidate_CPlusPlus_names, m_sym_ctx, missing_weak);
- if (ret != LLDB_INVALID_ADDRESS)
- return ret;
-
- std::vector<SearchSpec> candidate_fallback_names;
-
- CollectFallbackNames(candidate_fallback_names, candidate_C_names);
- ret = FindInSymbols(candidate_fallback_names, m_sym_ctx, missing_weak);
-
return ret;
}
diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp
index 788520d1f32b..9b2af56dfc8a 100644
--- a/lldb/source/Expression/IRInterpreter.cpp
+++ b/lldb/source/Expression/IRInterpreter.cpp
@@ -1398,7 +1398,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
}
// Find number of arguments
- const int numArgs = call_inst->getNumArgOperands();
+ const int numArgs = call_inst->arg_size();
// We work with a fixed array of 16 arguments which is our upper limit
static lldb_private::ABI::CallArgument rawArgs[16];
diff --git a/lldb/source/Expression/REPL.cpp b/lldb/source/Expression/REPL.cpp
index c3d14960f74c..9cd6129eedd7 100644
--- a/lldb/source/Expression/REPL.cpp
+++ b/lldb/source/Expression/REPL.cpp
@@ -445,7 +445,7 @@ void REPL::IOHandlerInputComplete(IOHandler &io_handler, std::string &code) {
if (!m_repl_source_path.empty()) {
auto file = FileSystem::Instance().Open(
FileSpec(m_repl_source_path),
- File::eOpenOptionWrite | File::eOpenOptionTruncate |
+ File::eOpenOptionWriteOnly | File::eOpenOptionTruncate |
File::eOpenOptionCanCreate,
lldb::eFilePermissionsFileDefault);
if (file) {
diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp
index eac89c24bc1e..b61781c0b82b 100644
--- a/lldb/source/Expression/UserExpression.cpp
+++ b/lldb/source/Expression/UserExpression.cpp
@@ -6,12 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "lldb/Host/Config.h"
-
#include <cstdio>
-#if HAVE_SYS_TYPES_H
#include <sys/types.h>
-#endif
#include <cstdlib>
#include <map>
diff --git a/lldb/source/Expression/UtilityFunction.cpp b/lldb/source/Expression/UtilityFunction.cpp
index d7a89a8e1446..1a4df9722706 100644
--- a/lldb/source/Expression/UtilityFunction.cpp
+++ b/lldb/source/Expression/UtilityFunction.cpp
@@ -6,13 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "lldb/Host/Config.h"
-
#include <cstdio>
-#if HAVE_SYS_TYPES_H
#include <sys/types.h>
-#endif
-
#include "lldb/Core/Module.h"
#include "lldb/Core/StreamFile.h"
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index a5598c387b8c..6898f8452161 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -205,7 +205,7 @@ private:
// Use static GetHistory() function to get a EditlineHistorySP to one of
// these objects
EditlineHistory(const std::string &prefix, uint32_t size, bool unique_entries)
- : m_history(nullptr), m_event(), m_prefix(prefix), m_path() {
+ : m_prefix(prefix) {
m_history = history_winit();
history_w(m_history, &m_event, H_SETSIZE, size);
if (unique_entries)
@@ -298,11 +298,15 @@ public:
}
protected:
- HistoryW *m_history; // The history object
- HistEventW m_event; // The history event needed to contain all history events
- std::string m_prefix; // The prefix name (usually the editline program name)
- // to use when loading/saving history
- std::string m_path; // Path to the history file
+ /// The history object.
+ HistoryW *m_history = nullptr;
+ /// The history event needed to contain all history events.
+ HistEventW m_event;
+ /// The prefix name (usually the editline program name) to use when
+ /// loading/saving history.
+ std::string m_prefix;
+ /// Path to the history file.
+ std::string m_path;
};
}
}
@@ -1006,11 +1010,11 @@ unsigned char Editline::TabCommand(int ch) {
switch (completion.GetMode()) {
case CompletionMode::Normal: {
std::string to_add = completion.GetCompletion();
- to_add = to_add.substr(request.GetCursorArgumentPrefix().size());
// Terminate the current argument with a quote if it started with a quote.
if (!request.GetParsedLine().empty() && request.GetParsedArg().IsQuoted())
to_add.push_back(request.GetParsedArg().GetQuoteChar());
to_add.push_back(' ');
+ el_deletestr(m_editline, request.GetCursorArgumentPrefix().size());
el_insertstr(m_editline, to_add.c_str());
// Clear all the autosuggestion parts if the only single space can be completed.
if (to_add == " ")
@@ -1554,8 +1558,10 @@ bool Editline::GetLines(int first_line_number, StringList &lines,
interrupted = m_editor_status == EditorStatus::Interrupted;
if (!interrupted) {
- // Save the completed entry in history before returning
- m_history_sp->Enter(CombineLines(m_input_lines).c_str());
+ // Save the completed entry in history before returning. Don't save empty
+ // input as that just clutters the command history.
+ if (!m_input_lines.empty())
+ m_history_sp->Enter(CombineLines(m_input_lines).c_str());
lines = GetInputAsStringList();
}
diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp
index e302e0a0de09..daac1fef2f36 100644
--- a/lldb/source/Host/common/File.cpp
+++ b/lldb/source/Host/common/File.cpp
@@ -41,20 +41,23 @@ using llvm::Expected;
Expected<const char *>
File::GetStreamOpenModeFromOptions(File::OpenOptions options) {
+ File::OpenOptions rw =
+ options & (File::eOpenOptionReadOnly | File::eOpenOptionWriteOnly |
+ File::eOpenOptionReadWrite);
+
if (options & File::eOpenOptionAppend) {
- if (options & File::eOpenOptionRead) {
+ if (rw == File::eOpenOptionReadWrite) {
if (options & File::eOpenOptionCanCreateNewOnly)
return "a+x";
else
return "a+";
- } else if (options & File::eOpenOptionWrite) {
+ } else if (rw == File::eOpenOptionWriteOnly) {
if (options & File::eOpenOptionCanCreateNewOnly)
return "ax";
else
return "a";
}
- } else if (options & File::eOpenOptionRead &&
- options & File::eOpenOptionWrite) {
+ } else if (rw == File::eOpenOptionReadWrite) {
if (options & File::eOpenOptionCanCreate) {
if (options & File::eOpenOptionCanCreateNewOnly)
return "w+x";
@@ -62,10 +65,10 @@ File::GetStreamOpenModeFromOptions(File::OpenOptions options) {
return "w+";
} else
return "r+";
- } else if (options & File::eOpenOptionRead) {
- return "r";
- } else if (options & File::eOpenOptionWrite) {
+ } else if (rw == File::eOpenOptionWriteOnly) {
return "w";
+ } else if (rw == File::eOpenOptionReadOnly) {
+ return "r";
}
return llvm::createStringError(
llvm::inconvertibleErrorCode(),
@@ -75,19 +78,20 @@ File::GetStreamOpenModeFromOptions(File::OpenOptions options) {
Expected<File::OpenOptions> File::GetOptionsFromMode(llvm::StringRef mode) {
OpenOptions opts =
llvm::StringSwitch<OpenOptions>(mode)
- .Cases("r", "rb", eOpenOptionRead)
- .Cases("w", "wb", eOpenOptionWrite)
+ .Cases("r", "rb", eOpenOptionReadOnly)
+ .Cases("w", "wb", eOpenOptionWriteOnly)
.Cases("a", "ab",
- eOpenOptionWrite | eOpenOptionAppend | eOpenOptionCanCreate)
- .Cases("r+", "rb+", "r+b", eOpenOptionRead | eOpenOptionWrite)
+ eOpenOptionWriteOnly | eOpenOptionAppend |
+ eOpenOptionCanCreate)
+ .Cases("r+", "rb+", "r+b", eOpenOptionReadWrite)
.Cases("w+", "wb+", "w+b",
- eOpenOptionRead | eOpenOptionWrite | eOpenOptionCanCreate |
- eOpenOptionTruncate)
+ eOpenOptionReadWrite | eOpenOptionCanCreate |
+ eOpenOptionTruncate)
.Cases("a+", "ab+", "a+b",
- eOpenOptionRead | eOpenOptionWrite | eOpenOptionAppend |
+ eOpenOptionReadWrite | eOpenOptionAppend |
eOpenOptionCanCreate)
- .Default(OpenOptions());
- if (opts)
+ .Default(eOpenOptionInvalid);
+ if (opts != eOpenOptionInvalid)
return opts;
return llvm::createStringError(
llvm::inconvertibleErrorCode(),
@@ -310,9 +314,15 @@ Status NativeFile::Close() {
if (m_own_stream) {
if (::fclose(m_stream) == EOF)
error.SetErrorToErrno();
- } else if (m_options & eOpenOptionWrite) {
- if (::fflush(m_stream) == EOF)
- error.SetErrorToErrno();
+ } else {
+ File::OpenOptions rw =
+ m_options & (File::eOpenOptionReadOnly | File::eOpenOptionWriteOnly |
+ File::eOpenOptionReadWrite);
+
+ if (rw == eOpenOptionWriteOnly || rw == eOpenOptionReadWrite) {
+ if (::fflush(m_stream) == EOF)
+ error.SetErrorToErrno();
+ }
}
}
if (DescriptorIsValid() && m_own_descriptor) {
@@ -732,10 +742,15 @@ size_t NativeFile::PrintfVarArg(const char *format, va_list args) {
mode_t File::ConvertOpenOptionsForPOSIXOpen(OpenOptions open_options) {
mode_t mode = 0;
- if (open_options & eOpenOptionRead && open_options & eOpenOptionWrite)
+ File::OpenOptions rw =
+ open_options & (File::eOpenOptionReadOnly | File::eOpenOptionWriteOnly |
+ File::eOpenOptionReadWrite);
+ if (rw == eOpenOptionReadWrite)
mode |= O_RDWR;
- else if (open_options & eOpenOptionWrite)
+ else if (rw == eOpenOptionWriteOnly)
mode |= O_WRONLY;
+ else if (rw == eOpenOptionReadOnly)
+ mode |= O_RDONLY;
if (open_options & eOpenOptionAppend)
mode |= O_APPEND;
@@ -754,5 +769,107 @@ mode_t File::ConvertOpenOptionsForPOSIXOpen(OpenOptions open_options) {
return mode;
}
+llvm::Expected<SerialPort::Options>
+SerialPort::OptionsFromURL(llvm::StringRef urlqs) {
+ SerialPort::Options serial_options;
+ for (llvm::StringRef x : llvm::split(urlqs, '&')) {
+ if (x.consume_front("baud=")) {
+ unsigned int baud_rate;
+ if (!llvm::to_integer(x, baud_rate, 10))
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "Invalid baud rate: %s",
+ x.str().c_str());
+ serial_options.BaudRate = baud_rate;
+ } else if (x.consume_front("parity=")) {
+ serial_options.Parity =
+ llvm::StringSwitch<llvm::Optional<Terminal::Parity>>(x)
+ .Case("no", Terminal::Parity::No)
+ .Case("even", Terminal::Parity::Even)
+ .Case("odd", Terminal::Parity::Odd)
+ .Case("mark", Terminal::Parity::Mark)
+ .Case("space", Terminal::Parity::Space)
+ .Default(llvm::None);
+ if (!serial_options.Parity)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "Invalid parity (must be no, even, odd, mark or space): %s",
+ x.str().c_str());
+ } else if (x.consume_front("parity-check=")) {
+ serial_options.ParityCheck =
+ llvm::StringSwitch<llvm::Optional<Terminal::ParityCheck>>(x)
+ .Case("no", Terminal::ParityCheck::No)
+ .Case("replace", Terminal::ParityCheck::ReplaceWithNUL)
+ .Case("ignore", Terminal::ParityCheck::Ignore)
+ // "mark" mode is not currently supported as it requires special
+ // input processing
+ // .Case("mark", Terminal::ParityCheck::Mark)
+ .Default(llvm::None);
+ if (!serial_options.ParityCheck)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "Invalid parity-check (must be no, replace, ignore or mark): %s",
+ x.str().c_str());
+ } else if (x.consume_front("stop-bits=")) {
+ unsigned int stop_bits;
+ if (!llvm::to_integer(x, stop_bits, 10) ||
+ (stop_bits != 1 && stop_bits != 2))
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "Invalid stop bit number (must be 1 or 2): %s", x.str().c_str());
+ serial_options.StopBits = stop_bits;
+ } else
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "Unknown parameter: %s", x.str().c_str());
+ }
+ return serial_options;
+}
+
+llvm::Expected<std::unique_ptr<SerialPort>>
+SerialPort::Create(int fd, OpenOptions options, Options serial_options,
+ bool transfer_ownership) {
+ std::unique_ptr<SerialPort> out{
+ new SerialPort(fd, options, serial_options, transfer_ownership)};
+
+ if (!out->GetIsInteractive())
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "the specified file is not a teletype");
+
+ Terminal term{fd};
+ if (llvm::Error error = term.SetRaw())
+ return std::move(error);
+ if (serial_options.BaudRate) {
+ if (llvm::Error error =
+ term.SetBaudRate(serial_options.BaudRate.getValue()))
+ return std::move(error);
+ }
+ if (serial_options.Parity) {
+ if (llvm::Error error = term.SetParity(serial_options.Parity.getValue()))
+ return std::move(error);
+ }
+ if (serial_options.ParityCheck) {
+ if (llvm::Error error =
+ term.SetParityCheck(serial_options.ParityCheck.getValue()))
+ return std::move(error);
+ }
+ if (serial_options.StopBits) {
+ if (llvm::Error error =
+ term.SetStopBits(serial_options.StopBits.getValue()))
+ return std::move(error);
+ }
+
+ return std::move(out);
+}
+
+SerialPort::SerialPort(int fd, OpenOptions options,
+ SerialPort::Options serial_options,
+ bool transfer_ownership)
+ : NativeFile(fd, options, transfer_ownership), m_state(fd) {}
+
+Status SerialPort::Close() {
+ m_state.Restore();
+ return NativeFile::Close();
+}
+
char File::ID = 0;
char NativeFile::ID = 0;
+char SerialPort::ID = 0;
diff --git a/lldb/source/Host/common/FileSystem.cpp b/lldb/source/Host/common/FileSystem.cpp
index a2c3b3556a6c..7687ad6c20a6 100644
--- a/lldb/source/Host/common/FileSystem.cpp
+++ b/lldb/source/Host/common/FileSystem.cpp
@@ -381,13 +381,13 @@ static int OpenWithFS(const FileSystem &fs, const char *path, int flags,
return const_cast<FileSystem &>(fs).Open(path, flags, mode);
}
-static int GetOpenFlags(uint32_t options) {
- const bool read = options & File::eOpenOptionRead;
- const bool write = options & File::eOpenOptionWrite;
-
+static int GetOpenFlags(File::OpenOptions options) {
int open_flags = 0;
- if (write) {
- if (read)
+ File::OpenOptions rw =
+ options & (File::eOpenOptionReadOnly | File::eOpenOptionWriteOnly |
+ File::eOpenOptionReadWrite);
+ if (rw == File::eOpenOptionWriteOnly || rw == File::eOpenOptionReadWrite) {
+ if (rw == File::eOpenOptionReadWrite)
open_flags |= O_RDWR;
else
open_flags |= O_WRONLY;
@@ -403,7 +403,7 @@ static int GetOpenFlags(uint32_t options) {
if (options & File::eOpenOptionCanCreateNewOnly)
open_flags |= O_CREAT | O_EXCL;
- } else if (read) {
+ } else if (rw == File::eOpenOptionReadOnly) {
open_flags |= O_RDONLY;
#ifndef _WIN32
diff --git a/lldb/source/Host/common/LockFileBase.cpp b/lldb/source/Host/common/LockFileBase.cpp
index d4cd8f7ffed1..1c0de9e04e29 100644
--- a/lldb/source/Host/common/LockFileBase.cpp
+++ b/lldb/source/Host/common/LockFileBase.cpp
@@ -11,12 +11,9 @@
using namespace lldb;
using namespace lldb_private;
-namespace {
+static Status AlreadyLocked() { return Status("Already locked"); }
-Status AlreadyLocked() { return Status("Already locked"); }
-
-Status NotLocked() { return Status("Not locked"); }
-}
+static Status NotLocked() { return Status("Not locked"); }
LockFileBase::LockFileBase(int fd)
: m_fd(fd), m_locked(false), m_start(0), m_len(0) {}
diff --git a/lldb/source/Host/common/NativeRegisterContext.cpp b/lldb/source/Host/common/NativeRegisterContext.cpp
index 04d10aba4e63..d0afc2b47dac 100644
--- a/lldb/source/Host/common/NativeRegisterContext.cpp
+++ b/lldb/source/Host/common/NativeRegisterContext.cpp
@@ -56,6 +56,17 @@ NativeRegisterContext::GetRegisterInfoByName(llvm::StringRef reg_name,
if (reg_name.empty())
return nullptr;
+ // Generic register names take precedence over specific register names.
+ // For example, on x86 we want "sp" to refer to the complete RSP/ESP register
+ // rather than the 16-bit SP pseudo-register.
+ uint32_t generic_reg = Args::StringToGenericRegister(reg_name);
+ if (generic_reg != LLDB_INVALID_REGNUM) {
+ const RegisterInfo *reg_info =
+ GetRegisterInfo(eRegisterKindGeneric, generic_reg);
+ if (reg_info)
+ return reg_info;
+ }
+
const uint32_t num_registers = GetRegisterCount();
for (uint32_t reg = start_idx; reg < num_registers; ++reg) {
const RegisterInfo *reg_info = GetRegisterInfoAtIndex(reg);
@@ -64,6 +75,7 @@ NativeRegisterContext::GetRegisterInfoByName(llvm::StringRef reg_name,
reg_name.equals_insensitive(reg_info->alt_name))
return reg_info;
}
+
return nullptr;
}
diff --git a/lldb/source/Host/common/Socket.cpp b/lldb/source/Host/common/Socket.cpp
index d1c327dcb790..cc0659797530 100644
--- a/lldb/source/Host/common/Socket.cpp
+++ b/lldb/source/Host/common/Socket.cpp
@@ -11,11 +11,9 @@
#include "lldb/Host/Config.h"
#include "lldb/Host/Host.h"
#include "lldb/Host/SocketAddress.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Host/common/TCPSocket.h"
#include "lldb/Host/common/UDPSocket.h"
#include "lldb/Utility/Log.h"
-#include "lldb/Utility/RegularExpression.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Errno.h"
@@ -61,16 +59,13 @@ typedef void *get_socket_option_arg_type;
const NativeSocket Socket::kInvalidSocketValue = -1;
#endif // #if defined(_WIN32)
-namespace {
-
-bool IsInterrupted() {
+static bool IsInterrupted() {
#if defined(_WIN32)
return ::WSAGetLastError() == WSAEINTR;
#else
return errno == EINTR;
#endif
}
-}
Socket::Socket(SocketProtocol protocol, bool should_close,
bool child_processes_inherit)
@@ -168,37 +163,17 @@ Socket::TcpConnect(llvm::StringRef host_and_port,
llvm::Expected<std::unique_ptr<TCPSocket>>
Socket::TcpListen(llvm::StringRef host_and_port, bool child_processes_inherit,
- Predicate<uint16_t> *predicate, int backlog) {
+ int backlog) {
Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_CONNECTION));
LLDB_LOG(log, "host_and_port = {0}", host_and_port);
- Status error;
- std::string host_str;
- std::string port_str;
- int32_t port = INT32_MIN;
- if (!DecodeHostAndPort(host_and_port, host_str, port_str, port, &error))
- return error.ToError();
-
std::unique_ptr<TCPSocket> listen_socket(
new TCPSocket(true, child_processes_inherit));
- error = listen_socket->Listen(host_and_port, backlog);
+ Status error = listen_socket->Listen(host_and_port, backlog);
if (error.Fail())
return error.ToError();
- // We were asked to listen on port zero which means we must now read the
- // actual port that was given to us as port zero is a special code for
- // "find an open port for me".
- if (port == 0)
- port = listen_socket->GetLocalPortNumber();
-
- // Set the port predicate since when doing a listen://<host>:<port> it
- // often needs to accept the incoming connection which is a blocking system
- // call. Allowing access to the bound port using a predicate allows us to
- // wait for the port predicate to be set to a non-zero value from another
- // thread in an efficient manor.
- if (predicate)
- predicate->SetValue(port, eBroadcastAlways);
return std::move(listen_socket);
}
@@ -208,111 +183,27 @@ Socket::UdpConnect(llvm::StringRef host_and_port,
return UDPSocket::Connect(host_and_port, child_processes_inherit);
}
-Status Socket::UnixDomainConnect(llvm::StringRef name,
- bool child_processes_inherit,
- Socket *&socket) {
- Status error;
- std::unique_ptr<Socket> connect_socket(
- Create(ProtocolUnixDomain, child_processes_inherit, error));
- if (error.Fail())
- return error;
-
- error = connect_socket->Connect(name);
- if (error.Success())
- socket = connect_socket.release();
-
- return error;
-}
-
-Status Socket::UnixDomainAccept(llvm::StringRef name,
- bool child_processes_inherit, Socket *&socket) {
- Status error;
- std::unique_ptr<Socket> listen_socket(
- Create(ProtocolUnixDomain, child_processes_inherit, error));
- if (error.Fail())
- return error;
-
- error = listen_socket->Listen(name, 5);
- if (error.Fail())
- return error;
-
- error = listen_socket->Accept(socket);
- return error;
-}
-
-Status Socket::UnixAbstractConnect(llvm::StringRef name,
- bool child_processes_inherit,
- Socket *&socket) {
- Status error;
- std::unique_ptr<Socket> connect_socket(
- Create(ProtocolUnixAbstract, child_processes_inherit, error));
- if (error.Fail())
- return error;
-
- error = connect_socket->Connect(name);
- if (error.Success())
- socket = connect_socket.release();
- return error;
-}
-
-Status Socket::UnixAbstractAccept(llvm::StringRef name,
- bool child_processes_inherit,
- Socket *&socket) {
- Status error;
- std::unique_ptr<Socket> listen_socket(
- Create(ProtocolUnixAbstract, child_processes_inherit, error));
- if (error.Fail())
- return error;
-
- error = listen_socket->Listen(name, 5);
- if (error.Fail())
- return error;
-
- error = listen_socket->Accept(socket);
- return error;
-}
-
-bool Socket::DecodeHostAndPort(llvm::StringRef host_and_port,
- std::string &host_str, std::string &port_str,
- int32_t &port, Status *error_ptr) {
- static RegularExpression g_regex(
- llvm::StringRef("([^:]+|\\[[0-9a-fA-F:]+.*\\]):([0-9]+)"));
+llvm::Expected<Socket::HostAndPort> Socket::DecodeHostAndPort(llvm::StringRef host_and_port) {
+ static llvm::Regex g_regex("([^:]+|\\[[0-9a-fA-F:]+.*\\]):([0-9]+)");
+ HostAndPort ret;
llvm::SmallVector<llvm::StringRef, 3> matches;
- if (g_regex.Execute(host_and_port, &matches)) {
- host_str = matches[1].str();
- port_str = matches[2].str();
+ if (g_regex.match(host_and_port, &matches)) {
+ ret.hostname = matches[1].str();
// IPv6 addresses are wrapped in [] when specified with ports
- if (host_str.front() == '[' && host_str.back() == ']')
- host_str = host_str.substr(1, host_str.size() - 2);
- bool ok = false;
- port = StringConvert::ToUInt32(port_str.c_str(), UINT32_MAX, 10, &ok);
- if (ok && port <= UINT16_MAX) {
- if (error_ptr)
- error_ptr->Clear();
- return true;
- }
- // port is too large
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat(
- "invalid host:port specification: '%s'", host_and_port.str().c_str());
- return false;
- }
-
- // If this was unsuccessful, then check if it's simply a signed 32-bit
- // integer, representing a port with an empty host.
- host_str.clear();
- port_str.clear();
- if (to_integer(host_and_port, port, 10) && port < UINT16_MAX) {
- port_str = std::string(host_and_port);
- if (error_ptr)
- error_ptr->Clear();
- return true;
+ if (ret.hostname.front() == '[' && ret.hostname.back() == ']')
+ ret.hostname = ret.hostname.substr(1, ret.hostname.size() - 2);
+ if (to_integer(matches[2], ret.port, 10))
+ return ret;
+ } else {
+ // If this was unsuccessful, then check if it's simply an unsigned 16-bit
+ // integer, representing a port with an empty host.
+ if (to_integer(host_and_port, ret.port, 10))
+ return ret;
}
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("invalid host:port specification: '%s'",
- host_and_port.str().c_str());
- return false;
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "invalid host:port specification: '%s'",
+ host_and_port.str().c_str());
}
IOObject::WaitableHandle Socket::GetWaitableHandle() {
@@ -481,3 +372,8 @@ NativeSocket Socket::AcceptSocket(NativeSocket sockfd, struct sockaddr *addr,
SetLastError(error);
return fd;
}
+
+llvm::raw_ostream &lldb_private::operator<<(llvm::raw_ostream &OS,
+ const Socket::HostAndPort &HP) {
+ return OS << '[' << HP.hostname << ']' << ':' << HP.port;
+}
diff --git a/lldb/source/Host/common/StringConvert.cpp b/lldb/source/Host/common/StringConvert.cpp
deleted file mode 100644
index b4eb92755367..000000000000
--- a/lldb/source/Host/common/StringConvert.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===-- StringConvert.cpp -------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <cstdlib>
-
-#include "lldb/Host/StringConvert.h"
-
-namespace lldb_private {
-namespace StringConvert {
-
-int32_t ToSInt32(const char *s, int32_t fail_value, int base,
- bool *success_ptr) {
- if (s && s[0]) {
- char *end = nullptr;
- const long sval = ::strtol(s, &end, base);
- if (*end == '\0') {
- if (success_ptr)
- *success_ptr = ((sval <= INT32_MAX) && (sval >= INT32_MIN));
- return (int32_t)sval; // All characters were used, return the result
- }
- }
- if (success_ptr)
- *success_ptr = false;
- return fail_value;
-}
-
-uint32_t ToUInt32(const char *s, uint32_t fail_value, int base,
- bool *success_ptr) {
- if (s && s[0]) {
- char *end = nullptr;
- const unsigned long uval = ::strtoul(s, &end, base);
- if (*end == '\0') {
- if (success_ptr)
- *success_ptr = (uval <= UINT32_MAX);
- return (uint32_t)uval; // All characters were used, return the result
- }
- }
- if (success_ptr)
- *success_ptr = false;
- return fail_value;
-}
-
-int64_t ToSInt64(const char *s, int64_t fail_value, int base,
- bool *success_ptr) {
- if (s && s[0]) {
- char *end = nullptr;
- int64_t uval = ::strtoll(s, &end, base);
- if (*end == '\0') {
- if (success_ptr)
- *success_ptr = true;
- return uval; // All characters were used, return the result
- }
- }
- if (success_ptr)
- *success_ptr = false;
- return fail_value;
-}
-
-uint64_t ToUInt64(const char *s, uint64_t fail_value, int base,
- bool *success_ptr) {
- if (s && s[0]) {
- char *end = nullptr;
- uint64_t uval = ::strtoull(s, &end, base);
- if (*end == '\0') {
- if (success_ptr)
- *success_ptr = true;
- return uval; // All characters were used, return the result
- }
- }
- if (success_ptr)
- *success_ptr = false;
- return fail_value;
-}
-
-double ToDouble(const char *s, double fail_value, bool *success_ptr) {
- if (s && s[0]) {
- char *end = nullptr;
- double val = strtod(s, &end);
- if (*end == '\0') {
- if (success_ptr)
- *success_ptr = true;
- return val; // All characters were used, return the result
- }
- }
- if (success_ptr)
- *success_ptr = false;
- return fail_value;
-}
-}
-}
diff --git a/lldb/source/Host/common/TCPSocket.cpp b/lldb/source/Host/common/TCPSocket.cpp
index ea7377edbd45..28c3fa1188c2 100644
--- a/lldb/source/Host/common/TCPSocket.cpp
+++ b/lldb/source/Host/common/TCPSocket.cpp
@@ -53,9 +53,7 @@ static Status GetLastSocketError() {
return EC;
}
-namespace {
-const int kType = SOCK_STREAM;
-}
+static const int kType = SOCK_STREAM;
TCPSocket::TCPSocket(bool should_close, bool child_processes_inherit)
: Socket(ProtocolTcp, should_close, child_processes_inherit) {}
@@ -154,23 +152,23 @@ Status TCPSocket::Connect(llvm::StringRef name) {
LLDB_LOGF(log, "TCPSocket::%s (host/port = %s)", __FUNCTION__, name.data());
Status error;
- std::string host_str;
- std::string port_str;
- int32_t port = INT32_MIN;
- if (!DecodeHostAndPort(name, host_str, port_str, port, &error))
- return error;
+ llvm::Expected<HostAndPort> host_port = DecodeHostAndPort(name);
+ if (!host_port)
+ return Status(host_port.takeError());
- std::vector<SocketAddress> addresses = SocketAddress::GetAddressInfo(
- host_str.c_str(), nullptr, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
+ std::vector<SocketAddress> addresses =
+ SocketAddress::GetAddressInfo(host_port->hostname.c_str(), nullptr,
+ AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
for (SocketAddress &address : addresses) {
error = CreateSocket(address.GetFamily());
if (error.Fail())
continue;
- address.SetPort(port);
+ address.SetPort(host_port->port);
- if (-1 == llvm::sys::RetryAfterSignal(-1, ::connect,
- GetNativeSocket(), &address.sockaddr(), address.GetLength())) {
+ if (-1 == llvm::sys::RetryAfterSignal(-1, ::connect, GetNativeSocket(),
+ &address.sockaddr(),
+ address.GetLength())) {
CLOSE_SOCKET(GetNativeSocket());
continue;
}
@@ -190,16 +188,14 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) {
LLDB_LOGF(log, "TCPSocket::%s (%s)", __FUNCTION__, name.data());
Status error;
- std::string host_str;
- std::string port_str;
- int32_t port = INT32_MIN;
- if (!DecodeHostAndPort(name, host_str, port_str, port, &error))
- return error;
+ llvm::Expected<HostAndPort> host_port = DecodeHostAndPort(name);
+ if (!host_port)
+ return Status(host_port.takeError());
- if (host_str == "*")
- host_str = "0.0.0.0";
+ if (host_port->hostname == "*")
+ host_port->hostname = "0.0.0.0";
std::vector<SocketAddress> addresses = SocketAddress::GetAddressInfo(
- host_str.c_str(), nullptr, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
+ host_port->hostname.c_str(), nullptr, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
for (SocketAddress &address : addresses) {
int fd = Socket::CreateSocket(address.GetFamily(), kType, IPPROTO_TCP,
m_child_processes_inherit, error);
@@ -215,9 +211,9 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) {
SocketAddress listen_address = address;
if(!listen_address.IsLocalhost())
- listen_address.SetToAnyAddress(address.GetFamily(), port);
+ listen_address.SetToAnyAddress(address.GetFamily(), host_port->port);
else
- listen_address.SetPort(port);
+ listen_address.SetPort(host_port->port);
int err =
::bind(fd, &listen_address.sockaddr(), listen_address.GetLength());
@@ -230,10 +226,10 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) {
continue;
}
- if (port == 0) {
+ if (host_port->port == 0) {
socklen_t sa_len = address.GetLength();
if (getsockname(fd, &address.sockaddr(), &sa_len) == 0)
- port = address.GetPort();
+ host_port->port = address.GetPort();
}
m_listen_sockets[fd] = address;
}
diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index 2301abe9afb1..2a1c12e667bc 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -21,123 +21,417 @@
using namespace lldb_private;
+struct Terminal::Data {
+#if LLDB_ENABLE_TERMIOS
+ struct termios m_termios; ///< Cached terminal state information.
+#endif
+};
+
bool Terminal::IsATerminal() const { return m_fd >= 0 && ::isatty(m_fd); }
-bool Terminal::SetEcho(bool enabled) {
- if (FileDescriptorIsValid()) {
+#if !LLDB_ENABLE_TERMIOS
+static llvm::Error termiosMissingError() {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "termios support missing in LLDB");
+}
+#endif
+
+llvm::Expected<Terminal::Data> Terminal::GetData() {
#if LLDB_ENABLE_TERMIOS
- if (IsATerminal()) {
- struct termios fd_termios;
- if (::tcgetattr(m_fd, &fd_termios) == 0) {
- bool set_corectly = false;
- if (enabled) {
- if (fd_termios.c_lflag & ECHO)
- set_corectly = true;
- else
- fd_termios.c_lflag |= ECHO;
- } else {
- if (fd_termios.c_lflag & ECHO)
- fd_termios.c_lflag &= ~ECHO;
- else
- set_corectly = true;
- }
-
- if (set_corectly)
- return true;
- return ::tcsetattr(m_fd, TCSANOW, &fd_termios) == 0;
- }
- }
-#endif // #if LLDB_ENABLE_TERMIOS
+ if (!FileDescriptorIsValid())
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "invalid fd");
+
+ if (!IsATerminal())
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "fd not a terminal");
+
+ Data data;
+ if (::tcgetattr(m_fd, &data.m_termios) != 0)
+ return llvm::createStringError(
+ std::error_code(errno, std::generic_category()),
+ "unable to get teletype attributes");
+ return data;
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetData(const Terminal::Data &data) {
+#if LLDB_ENABLE_TERMIOS
+ assert(FileDescriptorIsValid());
+ assert(IsATerminal());
+
+ if (::tcsetattr(m_fd, TCSANOW, &data.m_termios) != 0)
+ return llvm::createStringError(
+ std::error_code(errno, std::generic_category()),
+ "unable to set teletype attributes");
+ return llvm::Error::success();
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetEcho(bool enabled) {
+#if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ fd_termios.c_lflag &= ~ECHO;
+ if (enabled)
+ fd_termios.c_lflag |= ECHO;
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetCanonical(bool enabled) {
+#if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ fd_termios.c_lflag &= ~ICANON;
+ if (enabled)
+ fd_termios.c_lflag |= ICANON;
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetRaw() {
+#if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ ::cfmakeraw(&fd_termios);
+
+ // Make sure only one character is needed to return from a read
+ // (cfmakeraw() doesn't do this on NetBSD)
+ fd_termios.c_cc[VMIN] = 1;
+ fd_termios.c_cc[VTIME] = 0;
+
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+#if LLDB_ENABLE_TERMIOS
+static llvm::Optional<speed_t> baudRateToConst(unsigned int baud_rate) {
+ switch (baud_rate) {
+#if defined(B50)
+ case 50:
+ return B50;
+#endif
+#if defined(B75)
+ case 75:
+ return B75;
+#endif
+#if defined(B110)
+ case 110:
+ return B110;
+#endif
+#if defined(B134)
+ case 134:
+ return B134;
+#endif
+#if defined(B150)
+ case 150:
+ return B150;
+#endif
+#if defined(B200)
+ case 200:
+ return B200;
+#endif
+#if defined(B300)
+ case 300:
+ return B300;
+#endif
+#if defined(B600)
+ case 600:
+ return B600;
+#endif
+#if defined(B1200)
+ case 1200:
+ return B1200;
+#endif
+#if defined(B1800)
+ case 1800:
+ return B1800;
+#endif
+#if defined(B2400)
+ case 2400:
+ return B2400;
+#endif
+#if defined(B4800)
+ case 4800:
+ return B4800;
+#endif
+#if defined(B9600)
+ case 9600:
+ return B9600;
+#endif
+#if defined(B19200)
+ case 19200:
+ return B19200;
+#endif
+#if defined(B38400)
+ case 38400:
+ return B38400;
+#endif
+#if defined(B57600)
+ case 57600:
+ return B57600;
+#endif
+#if defined(B115200)
+ case 115200:
+ return B115200;
+#endif
+#if defined(B230400)
+ case 230400:
+ return B230400;
+#endif
+#if defined(B460800)
+ case 460800:
+ return B460800;
+#endif
+#if defined(B500000)
+ case 500000:
+ return B500000;
+#endif
+#if defined(B576000)
+ case 576000:
+ return B576000;
+#endif
+#if defined(B921600)
+ case 921600:
+ return B921600;
+#endif
+#if defined(B1000000)
+ case 1000000:
+ return B1000000;
+#endif
+#if defined(B1152000)
+ case 1152000:
+ return B1152000;
+#endif
+#if defined(B1500000)
+ case 1500000:
+ return B1500000;
+#endif
+#if defined(B2000000)
+ case 2000000:
+ return B2000000;
+#endif
+#if defined(B76800)
+ case 76800:
+ return B76800;
+#endif
+#if defined(B153600)
+ case 153600:
+ return B153600;
+#endif
+#if defined(B307200)
+ case 307200:
+ return B307200;
+#endif
+#if defined(B614400)
+ case 614400:
+ return B614400;
+#endif
+#if defined(B2500000)
+ case 2500000:
+ return B2500000;
+#endif
+#if defined(B3000000)
+ case 3000000:
+ return B3000000;
+#endif
+#if defined(B3500000)
+ case 3500000:
+ return B3500000;
+#endif
+#if defined(B4000000)
+ case 4000000:
+ return B4000000;
+#endif
+ default:
+ return llvm::None;
}
- return false;
}
+#endif
-bool Terminal::SetCanonical(bool enabled) {
- if (FileDescriptorIsValid()) {
+llvm::Error Terminal::SetBaudRate(unsigned int baud_rate) {
#if LLDB_ENABLE_TERMIOS
- if (IsATerminal()) {
- struct termios fd_termios;
- if (::tcgetattr(m_fd, &fd_termios) == 0) {
- bool set_corectly = false;
- if (enabled) {
- if (fd_termios.c_lflag & ICANON)
- set_corectly = true;
- else
- fd_termios.c_lflag |= ICANON;
- } else {
- if (fd_termios.c_lflag & ICANON)
- fd_termios.c_lflag &= ~ICANON;
- else
- set_corectly = true;
- }
-
- if (set_corectly)
- return true;
- return ::tcsetattr(m_fd, TCSANOW, &fd_termios) == 0;
- }
- }
-#endif // #if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ llvm::Optional<speed_t> val = baudRateToConst(baud_rate);
+ if (!val) // invalid value
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "baud rate %d unsupported by the platform",
+ baud_rate);
+ if (::cfsetispeed(&fd_termios, val.getValue()) != 0)
+ return llvm::createStringError(
+ std::error_code(errno, std::generic_category()),
+ "setting input baud rate failed");
+ if (::cfsetospeed(&fd_termios, val.getValue()) != 0)
+ return llvm::createStringError(
+ std::error_code(errno, std::generic_category()),
+ "setting output baud rate failed");
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetStopBits(unsigned int stop_bits) {
+#if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ switch (stop_bits) {
+ case 1:
+ fd_termios.c_cflag &= ~CSTOPB;
+ break;
+ case 2:
+ fd_termios.c_cflag |= CSTOPB;
+ break;
+ default:
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "invalid stop bit count: %d (must be 1 or 2)", stop_bits);
}
- return false;
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
}
-// Default constructor
-TerminalState::TerminalState()
- : m_tty()
+llvm::Error Terminal::SetParity(Terminal::Parity parity) {
#if LLDB_ENABLE_TERMIOS
- ,
- m_termios_up()
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ fd_termios.c_cflag &= ~(
+#if defined(CMSPAR)
+ CMSPAR |
+#endif
+ PARENB | PARODD);
+
+ if (parity != Parity::No) {
+ fd_termios.c_cflag |= PARENB;
+ if (parity == Parity::Odd || parity == Parity::Mark)
+ fd_termios.c_cflag |= PARODD;
+ if (parity == Parity::Mark || parity == Parity::Space) {
+#if defined(CMSPAR)
+ fd_termios.c_cflag |= CMSPAR;
+#else
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "space/mark parity is not supported by the platform");
#endif
-{
+ }
+ }
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetParityCheck(Terminal::ParityCheck parity_check) {
+#if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+ struct termios &fd_termios = data->m_termios;
+ fd_termios.c_iflag &= ~(IGNPAR | PARMRK | INPCK);
+
+ if (parity_check != ParityCheck::No) {
+ fd_termios.c_iflag |= INPCK;
+ if (parity_check == ParityCheck::Ignore)
+ fd_termios.c_iflag |= IGNPAR;
+ else if (parity_check == ParityCheck::Mark)
+ fd_termios.c_iflag |= PARMRK;
+ }
+ return SetData(data.get());
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+llvm::Error Terminal::SetHardwareFlowControl(bool enabled) {
+#if LLDB_ENABLE_TERMIOS
+ llvm::Expected<Data> data = GetData();
+ if (!data)
+ return data.takeError();
+
+#if defined(CRTSCTS)
+ struct termios &fd_termios = data->m_termios;
+ fd_termios.c_cflag &= ~CRTSCTS;
+ if (enabled)
+ fd_termios.c_cflag |= CRTSCTS;
+ return SetData(data.get());
+#else // !defined(CRTSCTS)
+ if (enabled)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "hardware flow control is not supported by the platform");
+ return llvm::Error::success();
+#endif // defined(CRTSCTS)
+#else // !LLDB_ENABLE_TERMIOS
+ return termiosMissingError();
+#endif // LLDB_ENABLE_TERMIOS
+}
+
+TerminalState::TerminalState(Terminal term, bool save_process_group)
+ : m_tty(term) {
+ Save(term, save_process_group);
}
-// Destructor
-TerminalState::~TerminalState() = default;
+TerminalState::~TerminalState() { Restore(); }
void TerminalState::Clear() {
m_tty.Clear();
m_tflags = -1;
-#if LLDB_ENABLE_TERMIOS
- m_termios_up.reset();
-#endif
+ m_data.reset();
m_process_group = -1;
}
-// Save the current state of the TTY for the file descriptor "fd" and if
-// "save_process_group" is true, attempt to save the process group info for the
-// TTY.
-bool TerminalState::Save(int fd, bool save_process_group) {
- m_tty.SetFileDescriptor(fd);
+bool TerminalState::Save(Terminal term, bool save_process_group) {
+ Clear();
+ m_tty = term;
if (m_tty.IsATerminal()) {
+ int fd = m_tty.GetFileDescriptor();
#if LLDB_ENABLE_POSIX
m_tflags = ::fcntl(fd, F_GETFL, 0);
-#endif
#if LLDB_ENABLE_TERMIOS
- if (m_termios_up == nullptr)
- m_termios_up.reset(new struct termios);
- int err = ::tcgetattr(fd, m_termios_up.get());
- if (err != 0)
- m_termios_up.reset();
-#endif // #if LLDB_ENABLE_TERMIOS
-#if LLDB_ENABLE_POSIX
+ std::unique_ptr<Terminal::Data> new_data{new Terminal::Data()};
+ if (::tcgetattr(fd, &new_data->m_termios) == 0)
+ m_data = std::move(new_data);
+#endif // LLDB_ENABLE_TERMIOS
if (save_process_group)
- m_process_group = ::tcgetpgrp(0);
- else
- m_process_group = -1;
-#endif
- } else {
- m_tty.Clear();
- m_tflags = -1;
-#if LLDB_ENABLE_TERMIOS
- m_termios_up.reset();
-#endif
- m_process_group = -1;
+ m_process_group = ::tcgetpgrp(fd);
+#endif // LLDB_ENABLE_POSIX
}
return IsValid();
}
-// Restore the state of the TTY using the cached values from a previous call to
-// Save().
bool TerminalState::Restore() const {
#if LLDB_ENABLE_POSIX
if (IsValid()) {
@@ -147,8 +441,8 @@ bool TerminalState::Restore() const {
#if LLDB_ENABLE_TERMIOS
if (TTYStateIsValid())
- tcsetattr(fd, TCSANOW, m_termios_up.get());
-#endif // #if LLDB_ENABLE_TERMIOS
+ tcsetattr(fd, TCSANOW, &m_data->m_termios);
+#endif // LLDB_ENABLE_TERMIOS
if (ProcessGroupIsValid()) {
// Save the original signal handler.
@@ -161,77 +455,19 @@ bool TerminalState::Restore() const {
}
return true;
}
-#endif
+#endif // LLDB_ENABLE_POSIX
return false;
}
-// Returns true if this object has valid saved TTY state settings that can be
-// used to restore a previous state.
bool TerminalState::IsValid() const {
return m_tty.FileDescriptorIsValid() &&
- (TFlagsIsValid() || TTYStateIsValid());
+ (TFlagsIsValid() || TTYStateIsValid() || ProcessGroupIsValid());
}
-// Returns true if m_tflags is valid
bool TerminalState::TFlagsIsValid() const { return m_tflags != -1; }
-// Returns true if m_ttystate is valid
-bool TerminalState::TTYStateIsValid() const {
-#if LLDB_ENABLE_TERMIOS
- return m_termios_up != nullptr;
-#else
- return false;
-#endif
-}
+bool TerminalState::TTYStateIsValid() const { return bool(m_data); }
-// Returns true if m_process_group is valid
bool TerminalState::ProcessGroupIsValid() const {
return static_cast<int32_t>(m_process_group) != -1;
}
-
-// Constructor
-TerminalStateSwitcher::TerminalStateSwitcher() = default;
-
-// Destructor
-TerminalStateSwitcher::~TerminalStateSwitcher() = default;
-
-// Returns the number of states that this switcher contains
-uint32_t TerminalStateSwitcher::GetNumberOfStates() const {
- return llvm::array_lengthof(m_ttystates);
-}
-
-// Restore the state at index "idx".
-//
-// Returns true if the restore was successful, false otherwise.
-bool TerminalStateSwitcher::Restore(uint32_t idx) const {
- const uint32_t num_states = GetNumberOfStates();
- if (idx >= num_states)
- return false;
-
- // See if we already are in this state?
- if (m_currentState < num_states && (idx == m_currentState) &&
- m_ttystates[idx].IsValid())
- return true;
-
- // Set the state to match the index passed in and only update the current
- // state if there are no errors.
- if (m_ttystates[idx].Restore()) {
- m_currentState = idx;
- return true;
- }
-
- // We failed to set the state. The tty state was invalid or not initialized.
- return false;
-}
-
-// Save the state at index "idx" for file descriptor "fd" and save the process
-// group if requested.
-//
-// Returns true if the restore was successful, false otherwise.
-bool TerminalStateSwitcher::Save(uint32_t idx, int fd,
- bool save_process_group) {
- const uint32_t num_states = GetNumberOfStates();
- if (idx < num_states)
- return m_ttystates[idx].Save(fd, save_process_group);
- return false;
-}
diff --git a/lldb/source/Host/common/UDPSocket.cpp b/lldb/source/Host/common/UDPSocket.cpp
index 0b537b3a9b13..31266c980e0e 100644
--- a/lldb/source/Host/common/UDPSocket.cpp
+++ b/lldb/source/Host/common/UDPSocket.cpp
@@ -21,13 +21,10 @@
using namespace lldb;
using namespace lldb_private;
-namespace {
-
-const int kDomain = AF_INET;
-const int kType = SOCK_DGRAM;
+static const int kDomain = AF_INET;
+static const int kType = SOCK_DGRAM;
static const char *g_not_supported_error = "Not supported";
-}
UDPSocket::UDPSocket(NativeSocket socket) : Socket(ProtocolUdp, true, true) {
m_socket = socket;
@@ -61,11 +58,9 @@ UDPSocket::Connect(llvm::StringRef name, bool child_processes_inherit) {
LLDB_LOG(log, "host/port = {0}", name);
Status error;
- std::string host_str;
- std::string port_str;
- int32_t port = INT32_MIN;
- if (!DecodeHostAndPort(name, host_str, port_str, port, &error))
- return error.ToError();
+ llvm::Expected<HostAndPort> host_port = DecodeHostAndPort(name);
+ if (!host_port)
+ return host_port.takeError();
// At this point we have setup the receive port, now we need to setup the UDP
// send socket
@@ -76,16 +71,16 @@ UDPSocket::Connect(llvm::StringRef name, bool child_processes_inherit) {
::memset(&hints, 0, sizeof(hints));
hints.ai_family = kDomain;
hints.ai_socktype = kType;
- int err = ::getaddrinfo(host_str.c_str(), port_str.c_str(), &hints,
+ int err = ::getaddrinfo(host_port->hostname.c_str(), std::to_string(host_port->port).c_str(), &hints,
&service_info_list);
if (err != 0) {
error.SetErrorStringWithFormat(
#if defined(_WIN32) && defined(UNICODE)
- "getaddrinfo(%s, %s, &hints, &info) returned error %i (%S)",
+ "getaddrinfo(%s, %d, &hints, &info) returned error %i (%S)",
#else
- "getaddrinfo(%s, %s, &hints, &info) returned error %i (%s)",
+ "getaddrinfo(%s, %d, &hints, &info) returned error %i (%s)",
#endif
- host_str.c_str(), port_str.c_str(), err, gai_strerror(err));
+ host_port->hostname.c_str(), host_port->port, err, gai_strerror(err));
return error.ToError();
}
@@ -112,9 +107,9 @@ UDPSocket::Connect(llvm::StringRef name, bool child_processes_inherit) {
// Only bind to the loopback address if we are expecting a connection from
// localhost to avoid any firewall issues.
- const bool bind_addr_success = (host_str == "127.0.0.1" || host_str == "localhost")
- ? bind_addr.SetToLocalhost(kDomain, port)
- : bind_addr.SetToAnyAddress(kDomain, port);
+ const bool bind_addr_success = (host_port->hostname == "127.0.0.1" || host_port->hostname == "localhost")
+ ? bind_addr.SetToLocalhost(kDomain, host_port->port)
+ : bind_addr.SetToAnyAddress(kDomain, host_port->port);
if (!bind_addr_success) {
error.SetErrorString("Failed to get hostspec to bind for");
diff --git a/lldb/source/Host/common/XML.cpp b/lldb/source/Host/common/XML.cpp
index c3225d3f4433..79128b98dc38 100644
--- a/lldb/source/Host/common/XML.cpp
+++ b/lldb/source/Host/common/XML.cpp
@@ -6,10 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include <stdlib.h> /* atof */
-
#include "lldb/Host/Config.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Host/XML.h"
using namespace lldb;
@@ -153,14 +150,8 @@ llvm::StringRef XMLNode::GetAttributeValue(const char *name,
bool XMLNode::GetAttributeValueAsUnsigned(const char *name, uint64_t &value,
uint64_t fail_value, int base) const {
-#if LLDB_ENABLE_LIBXML2
- llvm::StringRef str_value = GetAttributeValue(name, "");
-#else
- llvm::StringRef str_value;
-#endif
- bool success = false;
- value = StringConvert::ToUInt64(str_value.data(), fail_value, base, &success);
- return success;
+ value = fail_value;
+ return llvm::to_integer(GetAttributeValue(name, ""), value, base);
}
void XMLNode::ForEachChildNode(NodeCallback const &callback) const {
@@ -302,33 +293,17 @@ bool XMLNode::GetElementText(std::string &text) const {
bool XMLNode::GetElementTextAsUnsigned(uint64_t &value, uint64_t fail_value,
int base) const {
- bool success = false;
-#if LLDB_ENABLE_LIBXML2
- if (IsValid()) {
- std::string text;
- if (GetElementText(text))
- value = StringConvert::ToUInt64(text.c_str(), fail_value, base, &success);
- }
-#endif
- if (!success)
- value = fail_value;
- return success;
+ std::string text;
+
+ value = fail_value;
+ return GetElementText(text) && llvm::to_integer(text, value, base);
}
bool XMLNode::GetElementTextAsFloat(double &value, double fail_value) const {
- bool success = false;
-#if LLDB_ENABLE_LIBXML2
- if (IsValid()) {
- std::string text;
- if (GetElementText(text)) {
- value = atof(text.c_str());
- success = true;
- }
- }
-#endif
- if (!success)
- value = fail_value;
- return success;
+ std::string text;
+
+ value = fail_value;
+ return GetElementText(text) && llvm::to_float(text, value);
}
bool XMLNode::NameIs(const char *name) const {
@@ -473,9 +448,7 @@ bool ApplePropertyList::ExtractStringFromValueNode(const XMLNode &node,
#if LLDB_ENABLE_LIBXML2
-namespace {
-
-StructuredData::ObjectSP CreatePlistValue(XMLNode node) {
+static StructuredData::ObjectSP CreatePlistValue(XMLNode node) {
llvm::StringRef element_name = node.GetName();
if (element_name == "array") {
std::shared_ptr<StructuredData::Array> array_sp(
@@ -528,7 +501,6 @@ StructuredData::ObjectSP CreatePlistValue(XMLNode node) {
}
return StructuredData::ObjectSP(new StructuredData::Null());
}
-}
#endif
StructuredData::ObjectSP ApplePropertyList::GetStructuredData() {
diff --git a/lldb/source/Host/freebsd/HostInfoFreeBSD.cpp b/lldb/source/Host/freebsd/HostInfoFreeBSD.cpp
index 1b9e3ccaf181..f9ff45666c1e 100644
--- a/lldb/source/Host/freebsd/HostInfoFreeBSD.cpp
+++ b/lldb/source/Host/freebsd/HostInfoFreeBSD.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "lldb/Host/freebsd/HostInfoFreeBSD.h"
-
+#include "llvm/Support/FormatVariadic.h"
#include <cstdio>
#include <cstring>
#include <sys/sysctl.h>
@@ -30,34 +30,15 @@ llvm::VersionTuple HostInfoFreeBSD::GetOSVersion() {
return llvm::VersionTuple();
}
-bool HostInfoFreeBSD::GetOSBuildString(std::string &s) {
+llvm::Optional<std::string> HostInfoFreeBSD::GetOSBuildString() {
int mib[2] = {CTL_KERN, KERN_OSREV};
- char osrev_str[12];
uint32_t osrev = 0;
size_t osrev_len = sizeof(osrev);
- if (::sysctl(mib, 2, &osrev, &osrev_len, NULL, 0) == 0) {
- ::snprintf(osrev_str, sizeof(osrev_str), "%-8.8u", osrev);
- s.assign(osrev_str);
- return true;
- }
-
- s.clear();
- return false;
-}
-
-bool HostInfoFreeBSD::GetOSKernelDescription(std::string &s) {
- struct utsname un;
-
- ::memset(&un, 0, sizeof(utsname));
- s.clear();
-
- if (uname(&un) < 0)
- return false;
-
- s.assign(un.version);
+ if (::sysctl(mib, 2, &osrev, &osrev_len, NULL, 0) == 0)
+ return llvm::formatv("{0,8:8}", osrev).str();
- return true;
+ return llvm::None;
}
FileSpec HostInfoFreeBSD::GetProgramFileSpec() {
diff --git a/lldb/source/Host/netbsd/HostInfoNetBSD.cpp b/lldb/source/Host/netbsd/HostInfoNetBSD.cpp
index bddd46cec3ee..234dd3d5e103 100644
--- a/lldb/source/Host/netbsd/HostInfoNetBSD.cpp
+++ b/lldb/source/Host/netbsd/HostInfoNetBSD.cpp
@@ -42,34 +42,16 @@ llvm::VersionTuple HostInfoNetBSD::GetOSVersion() {
return llvm::VersionTuple();
}
-bool HostInfoNetBSD::GetOSBuildString(std::string &s) {
+llvm::Optional<std::string> HostInfoNetBSD::GetOSBuildString() {
int mib[2] = {CTL_KERN, KERN_OSREV};
char osrev_str[12];
int osrev = 0;
size_t osrev_len = sizeof(osrev);
- if (::sysctl(mib, 2, &osrev, &osrev_len, NULL, 0) == 0) {
- ::snprintf(osrev_str, sizeof(osrev_str), "%-10.10d", osrev);
- s.assign(osrev_str);
- return true;
- }
-
- s.clear();
- return false;
-}
-
-bool HostInfoNetBSD::GetOSKernelDescription(std::string &s) {
- struct utsname un;
-
- ::memset(&un, 0, sizeof(un));
- s.clear();
-
- if (::uname(&un) < 0)
- return false;
-
- s.assign(un.version);
+ if (::sysctl(mib, 2, &osrev, &osrev_len, NULL, 0) == 0)
+ return llvm::formatv("{0,10:10}", osrev).str();
- return true;
+ return llvm::None;
}
FileSpec HostInfoNetBSD::GetProgramFileSpec() {
diff --git a/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp b/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp
index 9617375babe1..5db843ff628d 100644
--- a/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp
+++ b/lldb/source/Host/openbsd/HostInfoOpenBSD.cpp
@@ -29,34 +29,16 @@ llvm::VersionTuple HostInfoOpenBSD::GetOSVersion() {
return llvm::VersionTuple();
}
-bool HostInfoOpenBSD::GetOSBuildString(std::string &s) {
+llvm::Optional<std::string> HostInfoOpenBSD::GetOSBuildString() {
int mib[2] = {CTL_KERN, KERN_OSREV};
char osrev_str[12];
uint32_t osrev = 0;
size_t osrev_len = sizeof(osrev);
- if (::sysctl(mib, 2, &osrev, &osrev_len, NULL, 0) == 0) {
- ::snprintf(osrev_str, sizeof(osrev_str), "%-8.8u", osrev);
- s.assign(osrev_str);
- return true;
- }
+ if (::sysctl(mib, 2, &osrev, &osrev_len, NULL, 0) == 0)
+ return llvm::formatv("{0,8:8}", osrev).str();
- s.clear();
- return false;
-}
-
-bool HostInfoOpenBSD::GetOSKernelDescription(std::string &s) {
- struct utsname un;
-
- ::memset(&un, 0, sizeof(utsname));
- s.clear();
-
- if (uname(&un) < 0)
- return false;
-
- s.assign(un.version);
-
- return true;
+ return llvm::None;
}
FileSpec HostInfoOpenBSD::GetProgramFileSpec() {
diff --git a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
index 2f4cc960f02d..b3140f7b0fd7 100644
--- a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
+++ b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
@@ -13,10 +13,10 @@
#define _DARWIN_UNLIMITED_SELECT
#endif
-#include "lldb/Host/posix/ConnectionFileDescriptorPosix.h"
#include "lldb/Host/Config.h"
#include "lldb/Host/Socket.h"
#include "lldb/Host/SocketAddress.h"
+#include "lldb/Host/posix/ConnectionFileDescriptorPosix.h"
#include "lldb/Utility/SelectHelper.h"
#include "lldb/Utility/Timeout.h"
@@ -50,30 +50,6 @@
using namespace lldb;
using namespace lldb_private;
-const char *ConnectionFileDescriptor::LISTEN_SCHEME = "listen";
-const char *ConnectionFileDescriptor::ACCEPT_SCHEME = "accept";
-const char *ConnectionFileDescriptor::UNIX_ACCEPT_SCHEME = "unix-accept";
-const char *ConnectionFileDescriptor::CONNECT_SCHEME = "connect";
-const char *ConnectionFileDescriptor::TCP_CONNECT_SCHEME = "tcp-connect";
-const char *ConnectionFileDescriptor::UDP_SCHEME = "udp";
-const char *ConnectionFileDescriptor::UNIX_CONNECT_SCHEME = "unix-connect";
-const char *ConnectionFileDescriptor::UNIX_ABSTRACT_CONNECT_SCHEME =
- "unix-abstract-connect";
-const char *ConnectionFileDescriptor::FD_SCHEME = "fd";
-const char *ConnectionFileDescriptor::FILE_SCHEME = "file";
-
-namespace {
-
-llvm::Optional<llvm::StringRef> GetURLAddress(llvm::StringRef url,
- llvm::StringRef scheme) {
- if (!url.consume_front(scheme))
- return llvm::None;
- if (!url.consume_front("://"))
- return llvm::None;
- return url;
-}
-}
-
ConnectionFileDescriptor::ConnectionFileDescriptor(bool child_processes_inherit)
: Connection(), m_pipe(), m_mutex(), m_shutting_down(false),
@@ -86,9 +62,9 @@ ConnectionFileDescriptor::ConnectionFileDescriptor(bool child_processes_inherit)
ConnectionFileDescriptor::ConnectionFileDescriptor(int fd, bool owns_fd)
: Connection(), m_pipe(), m_mutex(), m_shutting_down(false),
- m_waiting_for_accept(false), m_child_processes_inherit(false) {
- m_write_sp = std::make_shared<NativeFile>(fd, File::eOpenOptionWrite, owns_fd);
- m_read_sp = std::make_shared<NativeFile>(fd, File::eOpenOptionRead, false);
+ m_child_processes_inherit(false) {
+ m_io_sp =
+ std::make_shared<NativeFile>(fd, File::eOpenOptionReadWrite, owns_fd);
Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_CONNECTION |
LIBLLDB_LOG_OBJECT));
@@ -101,7 +77,7 @@ ConnectionFileDescriptor::ConnectionFileDescriptor(int fd, bool owns_fd)
ConnectionFileDescriptor::ConnectionFileDescriptor(Socket *socket)
: Connection(), m_pipe(), m_mutex(), m_shutting_down(false),
- m_waiting_for_accept(false), m_child_processes_inherit(false) {
+ m_child_processes_inherit(false) {
InitializeSocket(socket);
}
@@ -143,12 +119,18 @@ void ConnectionFileDescriptor::CloseCommandPipe() {
}
bool ConnectionFileDescriptor::IsConnected() const {
- return (m_read_sp && m_read_sp->IsValid()) ||
- (m_write_sp && m_write_sp->IsValid());
+ return m_io_sp && m_io_sp->IsValid();
}
ConnectionStatus ConnectionFileDescriptor::Connect(llvm::StringRef path,
Status *error_ptr) {
+ return Connect(path, nullptr, error_ptr);
+}
+
+ConnectionStatus
+ConnectionFileDescriptor::Connect(llvm::StringRef path,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
std::lock_guard<std::recursive_mutex> guard(m_mutex);
Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_CONNECTION));
LLDB_LOGF(log, "%p ConnectionFileDescriptor::Connect (url = '%s')",
@@ -156,133 +138,47 @@ ConnectionStatus ConnectionFileDescriptor::Connect(llvm::StringRef path,
OpenCommandPipe();
+ if (path.empty()) {
+ if (error_ptr)
+ error_ptr->SetErrorString("invalid connect arguments");
+ return eConnectionStatusError;
+ }
+
+ llvm::StringRef scheme;
+ std::tie(scheme, path) = path.split("://");
+
if (!path.empty()) {
- llvm::Optional<llvm::StringRef> addr;
- if ((addr = GetURLAddress(path, LISTEN_SCHEME))) {
- // listen://HOST:PORT
- return SocketListenAndAccept(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, ACCEPT_SCHEME))) {
- // unix://SOCKNAME
- return NamedSocketAccept(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, UNIX_ACCEPT_SCHEME))) {
- // unix://SOCKNAME
- return NamedSocketAccept(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, CONNECT_SCHEME))) {
- return ConnectTCP(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, TCP_CONNECT_SCHEME))) {
- return ConnectTCP(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, UDP_SCHEME))) {
- return ConnectUDP(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, UNIX_CONNECT_SCHEME))) {
- // unix-connect://SOCKNAME
- return NamedSocketConnect(*addr, error_ptr);
- } else if ((addr = GetURLAddress(path, UNIX_ABSTRACT_CONNECT_SCHEME))) {
- // unix-abstract-connect://SOCKNAME
- return UnixAbstractSocketConnect(*addr, error_ptr);
- }
+ auto method =
+ llvm::StringSwitch<ConnectionStatus (ConnectionFileDescriptor::*)(
+ llvm::StringRef, socket_id_callback_type, Status *)>(scheme)
+ .Case("listen", &ConnectionFileDescriptor::AcceptTCP)
+ .Cases("accept", "unix-accept",
+ &ConnectionFileDescriptor::AcceptNamedSocket)
+ .Case("unix-abstract-accept",
+ &ConnectionFileDescriptor::AcceptAbstractSocket)
+ .Cases("connect", "tcp-connect",
+ &ConnectionFileDescriptor::ConnectTCP)
+ .Case("udp", &ConnectionFileDescriptor::ConnectUDP)
+ .Case("unix-connect", &ConnectionFileDescriptor::ConnectNamedSocket)
+ .Case("unix-abstract-connect",
+ &ConnectionFileDescriptor::ConnectAbstractSocket)
#if LLDB_ENABLE_POSIX
- else if ((addr = GetURLAddress(path, FD_SCHEME))) {
- // Just passing a native file descriptor within this current process that
- // is already opened (possibly from a service or other source).
- int fd = -1;
-
- if (!addr->getAsInteger(0, fd)) {
- // We have what looks to be a valid file descriptor, but we should make
- // sure it is. We currently are doing this by trying to get the flags
- // from the file descriptor and making sure it isn't a bad fd.
- errno = 0;
- int flags = ::fcntl(fd, F_GETFL, 0);
- if (flags == -1 || errno == EBADF) {
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("stale file descriptor: %s",
- path.str().c_str());
- m_read_sp.reset();
- m_write_sp.reset();
- return eConnectionStatusError;
- } else {
- // Don't take ownership of a file descriptor that gets passed to us
- // since someone else opened the file descriptor and handed it to us.
- // TODO: Since are using a URL to open connection we should
- // eventually parse options using the web standard where we have
- // "fd://123?opt1=value;opt2=value" and we can have an option be
- // "owns=1" or "owns=0" or something like this to allow us to specify
- // this. For now, we assume we must assume we don't own it.
-
- std::unique_ptr<TCPSocket> tcp_socket;
- tcp_socket = std::make_unique<TCPSocket>(fd, false, false);
- // Try and get a socket option from this file descriptor to see if
- // this is a socket and set m_is_socket accordingly.
- int resuse;
- bool is_socket =
- !!tcp_socket->GetOption(SOL_SOCKET, SO_REUSEADDR, resuse);
- if (is_socket) {
- m_read_sp = std::move(tcp_socket);
- m_write_sp = m_read_sp;
- } else {
- m_read_sp =
- std::make_shared<NativeFile>(fd, File::eOpenOptionRead, false);
- m_write_sp =
- std::make_shared<NativeFile>(fd, File::eOpenOptionWrite, false);
- }
- m_uri = std::string(*addr);
- return eConnectionStatusSuccess;
- }
- }
+ .Case("fd", &ConnectionFileDescriptor::ConnectFD)
+ .Case("file", &ConnectionFileDescriptor::ConnectFile)
+ .Case("serial", &ConnectionFileDescriptor::ConnectSerialPort)
+#endif
+ .Default(nullptr);
+ if (method) {
if (error_ptr)
- error_ptr->SetErrorStringWithFormat("invalid file descriptor: \"%s\"",
- path.str().c_str());
- m_read_sp.reset();
- m_write_sp.reset();
- return eConnectionStatusError;
- } else if ((addr = GetURLAddress(path, FILE_SCHEME))) {
- std::string addr_str = addr->str();
- // file:///PATH
- int fd = llvm::sys::RetryAfterSignal(-1, ::open, addr_str.c_str(), O_RDWR);
- if (fd == -1) {
- if (error_ptr)
- error_ptr->SetErrorToErrno();
- return eConnectionStatusError;
- }
-
- if (::isatty(fd)) {
- // Set up serial terminal emulation
- struct termios options;
- ::tcgetattr(fd, &options);
-
- // Set port speed to maximum
- ::cfsetospeed(&options, B115200);
- ::cfsetispeed(&options, B115200);
-
- // Raw input, disable echo and signals
- options.c_lflag &= ~(ICANON | ECHO | ECHOE | ISIG);
-
- // Make sure only one character is needed to return from a read
- options.c_cc[VMIN] = 1;
- options.c_cc[VTIME] = 0;
-
- llvm::sys::RetryAfterSignal(-1, ::tcsetattr, fd, TCSANOW, &options);
- }
-
- int flags = ::fcntl(fd, F_GETFL, 0);
- if (flags >= 0) {
- if ((flags & O_NONBLOCK) == 0) {
- flags |= O_NONBLOCK;
- ::fcntl(fd, F_SETFL, flags);
- }
- }
- m_read_sp = std::make_shared<NativeFile>(fd, File::eOpenOptionRead, true);
- m_write_sp = std::make_shared<NativeFile>(fd, File::eOpenOptionWrite, false);
- return eConnectionStatusSuccess;
+ *error_ptr = Status();
+ return (this->*method)(path, socket_id_callback, error_ptr);
}
-#endif
- if (error_ptr)
- error_ptr->SetErrorStringWithFormat("unsupported connection URL: '%s'",
- path.str().c_str());
- return eConnectionStatusError;
}
+
if (error_ptr)
- error_ptr->SetErrorString("invalid connect arguments");
+ error_ptr->SetErrorStringWithFormat("unsupported connection URL: '%s'",
+ path.str().c_str());
return eConnectionStatusError;
}
@@ -306,9 +202,8 @@ ConnectionStatus ConnectionFileDescriptor::Disconnect(Status *error_ptr) {
return eConnectionStatusSuccess;
}
- if (m_read_sp && m_read_sp->IsValid() &&
- m_read_sp->GetFdType() == IOObject::eFDTypeSocket)
- static_cast<Socket &>(*m_read_sp).PreDisconnect();
+ if (m_io_sp->GetFdType() == IOObject::eFDTypeSocket)
+ static_cast<Socket &>(*m_io_sp).PreDisconnect();
// Try to get the ConnectionFileDescriptor's mutex. If we fail, that is
// quite likely because somebody is doing a blocking read on our file
@@ -337,12 +232,11 @@ ConnectionStatus ConnectionFileDescriptor::Disconnect(Status *error_ptr) {
// Prevents reads and writes during shutdown.
m_shutting_down = true;
- Status error = m_read_sp->Close();
- Status error2 = m_write_sp->Close();
- if (error.Fail() || error2.Fail())
+ Status error = m_io_sp->Close();
+ if (error.Fail())
status = eConnectionStatusError;
if (error_ptr)
- *error_ptr = error.Fail() ? error : error2;
+ *error_ptr = error;
// Close any pipes we were using for async interrupts
m_pipe.Close();
@@ -384,14 +278,14 @@ size_t ConnectionFileDescriptor::Read(void *dst, size_t dst_len,
Status error;
size_t bytes_read = dst_len;
- error = m_read_sp->Read(dst, bytes_read);
+ error = m_io_sp->Read(dst, bytes_read);
if (log) {
LLDB_LOGF(log,
"%p ConnectionFileDescriptor::Read() fd = %" PRIu64
", dst = %p, dst_len = %" PRIu64 ") => %" PRIu64 ", error = %s",
static_cast<void *>(this),
- static_cast<uint64_t>(m_read_sp->GetWaitableHandle()),
+ static_cast<uint64_t>(m_io_sp->GetWaitableHandle()),
static_cast<void *>(dst), static_cast<uint64_t>(dst_len),
static_cast<uint64_t>(bytes_read), error.AsCString());
}
@@ -410,7 +304,7 @@ size_t ConnectionFileDescriptor::Read(void *dst, size_t dst_len,
switch (error_value) {
case EAGAIN: // The file was marked for non-blocking I/O, and no data were
// ready to be read.
- if (m_read_sp->GetFdType() == IOObject::eFDTypeSocket)
+ if (m_io_sp->GetFdType() == IOObject::eFDTypeSocket)
status = eConnectionStatusTimedOut;
else
status = eConnectionStatusSuccess;
@@ -488,14 +382,14 @@ size_t ConnectionFileDescriptor::Write(const void *src, size_t src_len,
Status error;
size_t bytes_sent = src_len;
- error = m_write_sp->Write(src, bytes_sent);
+ error = m_io_sp->Write(src, bytes_sent);
if (log) {
LLDB_LOGF(log,
"%p ConnectionFileDescriptor::Write(fd = %" PRIu64
", src = %p, src_len = %" PRIu64 ") => %" PRIu64 " (error = %s)",
static_cast<void *>(this),
- static_cast<uint64_t>(m_write_sp->GetWaitableHandle()),
+ static_cast<uint64_t>(m_io_sp->GetWaitableHandle()),
static_cast<const void *>(src), static_cast<uint64_t>(src_len),
static_cast<uint64_t>(bytes_sent), error.AsCString());
}
@@ -558,7 +452,7 @@ ConnectionFileDescriptor::BytesAvailable(const Timeout<std::micro> &timeout,
// Make a copy of the file descriptors to make sure we don't have another
// thread change these values out from under us and cause problems in the
// loop below where like in FS_SET()
- const IOObject::WaitableHandle handle = m_read_sp->GetWaitableHandle();
+ const IOObject::WaitableHandle handle = m_io_sp->GetWaitableHandle();
const int pipe_fd = m_pipe.GetReadFileDescriptor();
if (handle != IOObject::kInvalidHandleValue) {
@@ -579,7 +473,7 @@ ConnectionFileDescriptor::BytesAvailable(const Timeout<std::micro> &timeout,
if (have_pipe_fd)
select_helper.FDSetRead(pipe_fd);
- while (handle == m_read_sp->GetWaitableHandle()) {
+ while (handle == m_io_sp->GetWaitableHandle()) {
Status error = select_helper.Select();
@@ -616,7 +510,8 @@ ConnectionFileDescriptor::BytesAvailable(const Timeout<std::micro> &timeout,
// data from that pipe:
char c;
- ssize_t bytes_read = llvm::sys::RetryAfterSignal(-1, ::read, pipe_fd, &c, 1);
+ ssize_t bytes_read =
+ llvm::sys::RetryAfterSignal(-1, ::read, pipe_fd, &c, 1);
assert(bytes_read == 1);
(void)bytes_read;
switch (c) {
@@ -640,95 +535,123 @@ ConnectionFileDescriptor::BytesAvailable(const Timeout<std::micro> &timeout,
return eConnectionStatusLostConnection;
}
-ConnectionStatus
-ConnectionFileDescriptor::NamedSocketAccept(llvm::StringRef socket_name,
- Status *error_ptr) {
- Socket *socket = nullptr;
- Status error =
- Socket::UnixDomainAccept(socket_name, m_child_processes_inherit, socket);
- if (error_ptr)
- *error_ptr = error;
- m_write_sp.reset(socket);
- m_read_sp = m_write_sp;
- if (error.Fail()) {
- return eConnectionStatusError;
+lldb::ConnectionStatus ConnectionFileDescriptor::AcceptSocket(
+ Socket::SocketProtocol socket_protocol, llvm::StringRef socket_name,
+ llvm::function_ref<void(Socket &)> post_listen_callback,
+ Status *error_ptr) {
+ Status error;
+ std::unique_ptr<Socket> listening_socket =
+ Socket::Create(socket_protocol, m_child_processes_inherit, error);
+ Socket *accepted_socket;
+
+ if (!error.Fail())
+ error = listening_socket->Listen(socket_name, 5);
+
+ if (!error.Fail()) {
+ post_listen_callback(*listening_socket);
+ error = listening_socket->Accept(accepted_socket);
+ }
+
+ if (!error.Fail()) {
+ m_io_sp.reset(accepted_socket);
+ m_uri.assign(socket_name.str());
+ return eConnectionStatusSuccess;
}
- m_uri.assign(std::string(socket_name));
- return eConnectionStatusSuccess;
-}
-ConnectionStatus
-ConnectionFileDescriptor::NamedSocketConnect(llvm::StringRef socket_name,
- Status *error_ptr) {
- Socket *socket = nullptr;
- Status error =
- Socket::UnixDomainConnect(socket_name, m_child_processes_inherit, socket);
if (error_ptr)
*error_ptr = error;
- m_write_sp.reset(socket);
- m_read_sp = m_write_sp;
- if (error.Fail()) {
- return eConnectionStatusError;
- }
- m_uri.assign(std::string(socket_name));
- return eConnectionStatusSuccess;
+ return eConnectionStatusError;
}
lldb::ConnectionStatus
-ConnectionFileDescriptor::UnixAbstractSocketConnect(llvm::StringRef socket_name,
- Status *error_ptr) {
- Socket *socket = nullptr;
- Status error = Socket::UnixAbstractConnect(socket_name,
- m_child_processes_inherit, socket);
+ConnectionFileDescriptor::ConnectSocket(Socket::SocketProtocol socket_protocol,
+ llvm::StringRef socket_name,
+ Status *error_ptr) {
+ Status error;
+ std::unique_ptr<Socket> socket =
+ Socket::Create(socket_protocol, m_child_processes_inherit, error);
+
+ if (!error.Fail())
+ error = socket->Connect(socket_name);
+
+ if (!error.Fail()) {
+ m_io_sp = std::move(socket);
+ m_uri.assign(socket_name.str());
+ return eConnectionStatusSuccess;
+ }
+
if (error_ptr)
*error_ptr = error;
- m_write_sp.reset(socket);
- m_read_sp = m_write_sp;
- if (error.Fail()) {
- return eConnectionStatusError;
- }
- m_uri.assign(std::string(socket_name));
- return eConnectionStatusSuccess;
+ return eConnectionStatusError;
}
-ConnectionStatus
-ConnectionFileDescriptor::SocketListenAndAccept(llvm::StringRef s,
- Status *error_ptr) {
- if (error_ptr)
- *error_ptr = Status();
- m_port_predicate.SetValue(0, eBroadcastNever);
+ConnectionStatus ConnectionFileDescriptor::AcceptNamedSocket(
+ llvm::StringRef socket_name, socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+ return AcceptSocket(
+ Socket::ProtocolUnixDomain, socket_name,
+ [socket_id_callback, socket_name](Socket &listening_socket) {
+ socket_id_callback(socket_name);
+ },
+ error_ptr);
+}
- m_waiting_for_accept = true;
- llvm::Expected<std::unique_ptr<TCPSocket>> listening_socket =
- Socket::TcpListen(s, m_child_processes_inherit, &m_port_predicate);
- if (!listening_socket) {
- if (error_ptr)
- *error_ptr = listening_socket.takeError();
- else
- LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_CONNECTION),
- listening_socket.takeError(), "tcp listen failed: {0}");
- return eConnectionStatusError;
- }
+ConnectionStatus ConnectionFileDescriptor::ConnectNamedSocket(
+ llvm::StringRef socket_name, socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+ return ConnectSocket(Socket::ProtocolUnixDomain, socket_name, error_ptr);
+}
+ConnectionStatus ConnectionFileDescriptor::AcceptAbstractSocket(
+ llvm::StringRef socket_name, socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+ return AcceptSocket(
+ Socket::ProtocolUnixAbstract, socket_name,
+ [socket_id_callback, socket_name](Socket &listening_socket) {
+ socket_id_callback(socket_name);
+ },
+ error_ptr);
+}
- Socket *accepted_socket;
- Status error = listening_socket.get()->Accept(accepted_socket);
- if (error_ptr)
- *error_ptr = error;
- if (error.Fail())
- return eConnectionStatusError;
+lldb::ConnectionStatus ConnectionFileDescriptor::ConnectAbstractSocket(
+ llvm::StringRef socket_name, socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+ return ConnectSocket(Socket::ProtocolUnixAbstract, socket_name, error_ptr);
+}
- InitializeSocket(accepted_socket);
- return eConnectionStatusSuccess;
+ConnectionStatus
+ConnectionFileDescriptor::AcceptTCP(llvm::StringRef socket_name,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+ ConnectionStatus ret = AcceptSocket(
+ Socket::ProtocolTcp, socket_name,
+ [socket_id_callback](Socket &listening_socket) {
+ uint16_t port =
+ static_cast<TCPSocket &>(listening_socket).GetLocalPortNumber();
+ socket_id_callback(std::to_string(port));
+ },
+ error_ptr);
+ if (ret == eConnectionStatusSuccess)
+ m_uri.assign(
+ static_cast<TCPSocket *>(m_io_sp.get())->GetRemoteConnectionURI());
+ return ret;
}
-ConnectionStatus ConnectionFileDescriptor::ConnectTCP(llvm::StringRef s,
- Status *error_ptr) {
+ConnectionStatus
+ConnectionFileDescriptor::ConnectTCP(llvm::StringRef socket_name,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+ return ConnectSocket(Socket::ProtocolTcp, socket_name, error_ptr);
+}
+
+ConnectionStatus
+ConnectionFileDescriptor::ConnectUDP(llvm::StringRef s,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
if (error_ptr)
*error_ptr = Status();
-
- llvm::Expected<std::unique_ptr<Socket>> socket =
- Socket::TcpConnect(s, m_child_processes_inherit);
+ llvm::Expected<std::unique_ptr<UDPSocket>> socket =
+ Socket::UdpConnect(s, m_child_processes_inherit);
if (!socket) {
if (error_ptr)
*error_ptr = socket.takeError();
@@ -737,36 +660,144 @@ ConnectionStatus ConnectionFileDescriptor::ConnectTCP(llvm::StringRef s,
socket.takeError(), "tcp connect failed: {0}");
return eConnectionStatusError;
}
- m_write_sp = std::move(*socket);
- m_read_sp = m_write_sp;
+ m_io_sp = std::move(*socket);
m_uri.assign(std::string(s));
return eConnectionStatusSuccess;
}
-ConnectionStatus ConnectionFileDescriptor::ConnectUDP(llvm::StringRef s,
- Status *error_ptr) {
+ConnectionStatus
+ConnectionFileDescriptor::ConnectFD(llvm::StringRef s,
+ socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+#if LLDB_ENABLE_POSIX
+ // Just passing a native file descriptor within this current process that
+ // is already opened (possibly from a service or other source).
+ int fd = -1;
+
+ if (!s.getAsInteger(0, fd)) {
+ // We have what looks to be a valid file descriptor, but we should make
+ // sure it is. We currently are doing this by trying to get the flags
+ // from the file descriptor and making sure it isn't a bad fd.
+ errno = 0;
+ int flags = ::fcntl(fd, F_GETFL, 0);
+ if (flags == -1 || errno == EBADF) {
+ if (error_ptr)
+ error_ptr->SetErrorStringWithFormat("stale file descriptor: %s",
+ s.str().c_str());
+ m_io_sp.reset();
+ return eConnectionStatusError;
+ } else {
+ // Don't take ownership of a file descriptor that gets passed to us
+ // since someone else opened the file descriptor and handed it to us.
+ // TODO: Since are using a URL to open connection we should
+ // eventually parse options using the web standard where we have
+ // "fd://123?opt1=value;opt2=value" and we can have an option be
+ // "owns=1" or "owns=0" or something like this to allow us to specify
+ // this. For now, we assume we must assume we don't own it.
+
+ std::unique_ptr<TCPSocket> tcp_socket;
+ tcp_socket = std::make_unique<TCPSocket>(fd, false, false);
+ // Try and get a socket option from this file descriptor to see if
+ // this is a socket and set m_is_socket accordingly.
+ int resuse;
+ bool is_socket =
+ !!tcp_socket->GetOption(SOL_SOCKET, SO_REUSEADDR, resuse);
+ if (is_socket)
+ m_io_sp = std::move(tcp_socket);
+ else
+ m_io_sp =
+ std::make_shared<NativeFile>(fd, File::eOpenOptionReadWrite, false);
+ m_uri = s.str();
+ return eConnectionStatusSuccess;
+ }
+ }
+
if (error_ptr)
- *error_ptr = Status();
- llvm::Expected<std::unique_ptr<UDPSocket>> socket =
- Socket::UdpConnect(s, m_child_processes_inherit);
- if (!socket) {
+ error_ptr->SetErrorStringWithFormat("invalid file descriptor: \"%s\"",
+ s.str().c_str());
+ m_io_sp.reset();
+ return eConnectionStatusError;
+#endif // LLDB_ENABLE_POSIX
+ llvm_unreachable("this function should be only called w/ LLDB_ENABLE_POSIX");
+}
+
+ConnectionStatus ConnectionFileDescriptor::ConnectFile(
+ llvm::StringRef s, socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+#if LLDB_ENABLE_POSIX
+ std::string addr_str = s.str();
+ // file:///PATH
+ int fd = llvm::sys::RetryAfterSignal(-1, ::open, addr_str.c_str(), O_RDWR);
+ if (fd == -1) {
if (error_ptr)
- *error_ptr = socket.takeError();
- else
- LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_CONNECTION),
- socket.takeError(), "tcp connect failed: {0}");
+ error_ptr->SetErrorToErrno();
return eConnectionStatusError;
}
- m_write_sp = std::move(*socket);
- m_read_sp = m_write_sp;
- m_uri.assign(std::string(s));
+
+ if (::isatty(fd)) {
+ // Set up serial terminal emulation
+ struct termios options;
+ ::tcgetattr(fd, &options);
+
+ // Set port speed to maximum
+ ::cfsetospeed(&options, B115200);
+ ::cfsetispeed(&options, B115200);
+
+ // Raw input, disable echo and signals
+ options.c_lflag &= ~(ICANON | ECHO | ECHOE | ISIG);
+
+ // Make sure only one character is needed to return from a read
+ options.c_cc[VMIN] = 1;
+ options.c_cc[VTIME] = 0;
+
+ llvm::sys::RetryAfterSignal(-1, ::tcsetattr, fd, TCSANOW, &options);
+ }
+
+ m_io_sp = std::make_shared<NativeFile>(fd, File::eOpenOptionReadWrite, true);
return eConnectionStatusSuccess;
+#endif // LLDB_ENABLE_POSIX
+ llvm_unreachable("this function should be only called w/ LLDB_ENABLE_POSIX");
}
-uint16_t
-ConnectionFileDescriptor::GetListeningPort(const Timeout<std::micro> &timeout) {
- auto Result = m_port_predicate.WaitForValueNotEqualTo(0, timeout);
- return Result ? *Result : 0;
+ConnectionStatus ConnectionFileDescriptor::ConnectSerialPort(
+ llvm::StringRef s, socket_id_callback_type socket_id_callback,
+ Status *error_ptr) {
+#if LLDB_ENABLE_POSIX
+ llvm::StringRef path, qs;
+ // serial:///PATH?k1=v1&k2=v2...
+ std::tie(path, qs) = s.split('?');
+
+ llvm::Expected<SerialPort::Options> serial_options =
+ SerialPort::OptionsFromURL(qs);
+ if (!serial_options) {
+ if (error_ptr)
+ *error_ptr = serial_options.takeError();
+ else
+ llvm::consumeError(serial_options.takeError());
+ return eConnectionStatusError;
+ }
+
+ int fd = llvm::sys::RetryAfterSignal(-1, ::open, path.str().c_str(), O_RDWR);
+ if (fd == -1) {
+ if (error_ptr)
+ error_ptr->SetErrorToErrno();
+ return eConnectionStatusError;
+ }
+
+ llvm::Expected<std::unique_ptr<SerialPort>> serial_sp = SerialPort::Create(
+ fd, File::eOpenOptionReadWrite, serial_options.get(), true);
+ if (!serial_sp) {
+ if (error_ptr)
+ *error_ptr = serial_sp.takeError();
+ else
+ llvm::consumeError(serial_sp.takeError());
+ return eConnectionStatusError;
+ }
+ m_io_sp = std::move(serial_sp.get());
+
+ return eConnectionStatusSuccess;
+#endif // LLDB_ENABLE_POSIX
+ llvm_unreachable("this function should be only called w/ LLDB_ENABLE_POSIX");
}
bool ConnectionFileDescriptor::GetChildProcessesInherit() const {
@@ -779,7 +810,6 @@ void ConnectionFileDescriptor::SetChildProcessesInherit(
}
void ConnectionFileDescriptor::InitializeSocket(Socket *socket) {
- m_write_sp.reset(socket);
- m_read_sp = m_write_sp;
+ m_io_sp.reset(socket);
m_uri = socket->GetRemoteConnectionURI();
}
diff --git a/lldb/source/Host/posix/DomainSocket.cpp b/lldb/source/Host/posix/DomainSocket.cpp
index 7322b15200b4..ddbd983abb81 100644
--- a/lldb/source/Host/posix/DomainSocket.cpp
+++ b/lldb/source/Host/posix/DomainSocket.cpp
@@ -26,13 +26,11 @@ using namespace lldb_private;
#endif
#endif // #ifdef __ANDROID__
-namespace {
+static const int kDomain = AF_UNIX;
+static const int kType = SOCK_STREAM;
-const int kDomain = AF_UNIX;
-const int kType = SOCK_STREAM;
-
-bool SetSockAddr(llvm::StringRef name, const size_t name_offset,
- sockaddr_un *saddr_un, socklen_t &saddr_un_len) {
+static bool SetSockAddr(llvm::StringRef name, const size_t name_offset,
+ sockaddr_un *saddr_un, socklen_t &saddr_un_len) {
if (name.size() + name_offset > sizeof(saddr_un->sun_path))
return false;
@@ -56,7 +54,6 @@ bool SetSockAddr(llvm::StringRef name, const size_t name_offset,
return true;
}
-} // namespace
DomainSocket::DomainSocket(bool should_close, bool child_processes_inherit)
: Socket(ProtocolUnixDomain, should_close, child_processes_inherit) {}
@@ -127,29 +124,33 @@ void DomainSocket::DeleteSocketFile(llvm::StringRef name) {
}
std::string DomainSocket::GetSocketName() const {
- if (m_socket != kInvalidSocketValue) {
- struct sockaddr_un saddr_un;
- saddr_un.sun_family = AF_UNIX;
- socklen_t sock_addr_len = sizeof(struct sockaddr_un);
- if (::getpeername(m_socket, (struct sockaddr *)&saddr_un, &sock_addr_len) ==
- 0) {
- std::string name(saddr_un.sun_path + GetNameOffset(),
- sock_addr_len -
- offsetof(struct sockaddr_un, sun_path) -
+ if (m_socket == kInvalidSocketValue)
+ return "";
+
+ struct sockaddr_un saddr_un;
+ saddr_un.sun_family = AF_UNIX;
+ socklen_t sock_addr_len = sizeof(struct sockaddr_un);
+ if (::getpeername(m_socket, (struct sockaddr *)&saddr_un, &sock_addr_len) !=
+ 0)
+ return "";
+
+ if (sock_addr_len <= offsetof(struct sockaddr_un, sun_path))
+ return ""; // Unnamed domain socket
+
+ llvm::StringRef name(saddr_un.sun_path + GetNameOffset(),
+ sock_addr_len - offsetof(struct sockaddr_un, sun_path) -
GetNameOffset());
- if (name.back() == '\0') name.pop_back();
- return name;
- }
- }
- return "";
+ name = name.rtrim('\0');
+
+ return name.str();
}
std::string DomainSocket::GetRemoteConnectionURI() const {
- if (m_socket != kInvalidSocketValue) {
- return std::string(llvm::formatv(
- "{0}://{1}",
- GetNameOffset() == 0 ? "unix-connect" : "unix-abstract-connect",
- GetSocketName()));
- }
- return "";
+ std::string name = GetSocketName();
+ if (name.empty())
+ return name;
+
+ return llvm::formatv(
+ "{0}://{1}",
+ GetNameOffset() == 0 ? "unix-connect" : "unix-abstract-connect", name);
}
diff --git a/lldb/source/Host/posix/HostInfoPosix.cpp b/lldb/source/Host/posix/HostInfoPosix.cpp
index b633acf6fec6..63553590dff5 100644
--- a/lldb/source/Host/posix/HostInfoPosix.cpp
+++ b/lldb/source/Host/posix/HostInfoPosix.cpp
@@ -21,6 +21,7 @@
#include <mutex>
#include <pwd.h>
#include <sys/types.h>
+#include <sys/utsname.h>
#include <unistd.h>
using namespace lldb_private;
@@ -37,6 +38,14 @@ bool HostInfoPosix::GetHostname(std::string &s) {
return false;
}
+llvm::Optional<std::string> HostInfoPosix::GetOSKernelDescription() {
+ struct utsname un;
+ if (uname(&un) < 0)
+ return llvm::None;
+
+ return std::string(un.version);
+}
+
#ifdef __ANDROID__
#include <android/api-level.h>
#endif
diff --git a/lldb/source/Host/posix/HostProcessPosix.cpp b/lldb/source/Host/posix/HostProcessPosix.cpp
index f0142ec946b7..8599a94d2241 100644
--- a/lldb/source/Host/posix/HostProcessPosix.cpp
+++ b/lldb/source/Host/posix/HostProcessPosix.cpp
@@ -18,9 +18,7 @@
using namespace lldb_private;
-namespace {
-const int kInvalidPosixProcess = 0;
-}
+static const int kInvalidPosixProcess = 0;
HostProcessPosix::HostProcessPosix()
: HostNativeProcessBase(kInvalidPosixProcess) {}
diff --git a/lldb/source/Host/posix/LockFilePosix.cpp b/lldb/source/Host/posix/LockFilePosix.cpp
index d197974a72a5..cb9ca5c29e5f 100644
--- a/lldb/source/Host/posix/LockFilePosix.cpp
+++ b/lldb/source/Host/posix/LockFilePosix.cpp
@@ -16,10 +16,8 @@
using namespace lldb;
using namespace lldb_private;
-namespace {
-
-Status fileLock(int fd, int cmd, int lock_type, const uint64_t start,
- const uint64_t len) {
+static Status fileLock(int fd, int cmd, int lock_type, const uint64_t start,
+ const uint64_t len) {
struct flock fl;
fl.l_type = lock_type;
@@ -35,8 +33,6 @@ Status fileLock(int fd, int cmd, int lock_type, const uint64_t start,
return error;
}
-} // namespace
-
LockFilePosix::LockFilePosix(int fd) : LockFileBase(fd) {}
LockFilePosix::~LockFilePosix() { Unlock(); }
diff --git a/lldb/source/Host/posix/PipePosix.cpp b/lldb/source/Host/posix/PipePosix.cpp
index a8cce573f12a..bd311ad8769a 100644
--- a/lldb/source/Host/posix/PipePosix.cpp
+++ b/lldb/source/Host/posix/PipePosix.cpp
@@ -38,12 +38,10 @@ enum PIPES { READ, WRITE }; // Constants 0 and 1 for READ and WRITE
#define PIPE2_SUPPORTED 0
#endif
-namespace {
-
-constexpr auto OPEN_WRITER_SLEEP_TIMEOUT_MSECS = 100;
+static constexpr auto OPEN_WRITER_SLEEP_TIMEOUT_MSECS = 100;
#if defined(FD_CLOEXEC) && !PIPE2_SUPPORTED
-bool SetCloexecFlag(int fd) {
+static bool SetCloexecFlag(int fd) {
int flags = ::fcntl(fd, F_GETFD);
if (flags == -1)
return false;
@@ -51,10 +49,9 @@ bool SetCloexecFlag(int fd) {
}
#endif
-std::chrono::time_point<std::chrono::steady_clock> Now() {
+static std::chrono::time_point<std::chrono::steady_clock> Now() {
return std::chrono::steady_clock::now();
}
-} // namespace
PipePosix::PipePosix()
: m_fds{PipePosix::kInvalidDescriptor, PipePosix::kInvalidDescriptor} {}
diff --git a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp
index 25dcf1e592c5..63178e6c8a7a 100644
--- a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp
+++ b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp
@@ -14,6 +14,7 @@
#include "lldb/Utility/FileSpec.h"
#include "lldb/Utility/Log.h"
#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
#include <climits>
#include <sys/ptrace.h>
@@ -46,8 +47,8 @@ static void FixupEnvironment(Environment &env) {
#endif
}
-static void LLVM_ATTRIBUTE_NORETURN ExitWithError(int error_fd,
- const char *operation) {
+[[noreturn]] static void ExitWithError(int error_fd,
+ const char *operation) {
int err = errno;
llvm::raw_fd_ostream os(error_fd, true);
os << operation << " failed: " << llvm::sys::StrError(err);
@@ -88,8 +89,8 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd,
return;
}
-static void LLVM_ATTRIBUTE_NORETURN ChildFunc(int error_fd,
- const ProcessLaunchInfo &info) {
+[[noreturn]] static void ChildFunc(int error_fd,
+ const ProcessLaunchInfo &info) {
if (info.GetFlags().Test(eLaunchFlagLaunchInSeparateProcessGroup)) {
if (setpgid(0, 0) != 0)
ExitWithError(error_fd, "setpgid");
@@ -143,9 +144,32 @@ static void LLVM_ATTRIBUTE_NORETURN ChildFunc(int error_fd,
// Close everything besides stdin, stdout, and stderr that has no file
// action to avoid leaking. Only do this when debugging, as elsewhere we
// actually rely on passing open descriptors to child processes.
- for (int fd = 3; fd < sysconf(_SC_OPEN_MAX); ++fd)
- if (!info.GetFileActionForFD(fd) && fd != error_fd)
- close(fd);
+
+ const llvm::StringRef proc_fd_path = "/proc/self/fd";
+ std::error_code ec;
+ bool result;
+ ec = llvm::sys::fs::is_directory(proc_fd_path, result);
+ if (result) {
+ std::vector<int> files_to_close;
+ // Directory iterator doesn't ensure any sequence.
+ for (llvm::sys::fs::directory_iterator iter(proc_fd_path, ec), file_end;
+ iter != file_end && !ec; iter.increment(ec)) {
+ int fd = std::stoi(iter->path().substr(proc_fd_path.size() + 1));
+
+ // Don't close first three entries since they are stdin, stdout and
+ // stderr.
+ if (fd > 2 && !info.GetFileActionForFD(fd) && fd != error_fd)
+ files_to_close.push_back(fd);
+ }
+ for (int file_to_close : files_to_close)
+ close(file_to_close);
+ } else {
+ // Since /proc/self/fd didn't work, trying the slow way instead.
+ int max_fd = sysconf(_SC_OPEN_MAX);
+ for (int fd = 3; fd < max_fd; ++fd)
+ if (!info.GetFileActionForFD(fd) && fd != error_fd)
+ close(fd);
+ }
// Start tracing this child that is about to exec.
if (ptrace(PT_TRACE_ME, 0, nullptr, 0) == -1)
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 00e9ccb762c3..301bf949feef 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -744,8 +744,10 @@ void CommandInterpreter::LoadCommandDictionary() {
std::unique_ptr<CommandObjectRegexCommand> connect_gdb_remote_cmd_up(
new CommandObjectRegexCommand(
*this, "gdb-remote",
- "Connect to a process via remote GDB server. "
- "If no host is specifed, localhost is assumed.",
+ "Connect to a process via remote GDB server.\n"
+ "If no host is specifed, localhost is assumed.\n"
+ "gdb-remote is an abbreviation for 'process connect --plugin "
+ "gdb-remote connect://<hostname>:<port>'\n",
"gdb-remote [<hostname>:]<portnum>", 2, 0, false));
if (connect_gdb_remote_cmd_up) {
if (connect_gdb_remote_cmd_up->AddRegexCommand(
@@ -762,9 +764,10 @@ void CommandInterpreter::LoadCommandDictionary() {
std::unique_ptr<CommandObjectRegexCommand> connect_kdp_remote_cmd_up(
new CommandObjectRegexCommand(
*this, "kdp-remote",
- "Connect to a process via remote KDP server. "
- "If no UDP port is specified, port 41139 is "
- "assumed.",
+ "Connect to a process via remote KDP server.\n"
+ "If no UDP port is specified, port 41139 is assumed.\n"
+ "kdp-remote is an abbreviation for 'process connect --plugin "
+ "kdp-remote udp://<hostname>:<port>'\n",
"kdp-remote <hostname>[:<portnum>]", 2, 0, false));
if (connect_kdp_remote_cmd_up) {
if (connect_kdp_remote_cmd_up->AddRegexCommand(
@@ -897,6 +900,63 @@ int CommandInterpreter::GetCommandNamesMatchingPartialString(
return matches.GetSize();
}
+CommandObjectMultiword *CommandInterpreter::VerifyUserMultiwordCmdPath(
+ Args &path, bool leaf_is_command, Status &result) {
+ result.Clear();
+
+ auto get_multi_or_report_error =
+ [&result](CommandObjectSP cmd_sp,
+ const char *name) -> CommandObjectMultiword * {
+ if (!cmd_sp) {
+ result.SetErrorStringWithFormat("Path component: '%s' not found", name);
+ return nullptr;
+ }
+ if (!cmd_sp->IsUserCommand()) {
+ result.SetErrorStringWithFormat("Path component: '%s' is not a user "
+ "command",
+ name);
+ return nullptr;
+ }
+ CommandObjectMultiword *cmd_as_multi = cmd_sp->GetAsMultiwordCommand();
+ if (!cmd_as_multi) {
+ result.SetErrorStringWithFormat("Path component: '%s' is not a container "
+ "command",
+ name);
+ return nullptr;
+ }
+ return cmd_as_multi;
+ };
+
+ size_t num_args = path.GetArgumentCount();
+ if (num_args == 0) {
+ result.SetErrorString("empty command path");
+ return nullptr;
+ }
+
+ if (num_args == 1 && leaf_is_command) {
+ // We just got a leaf command to be added to the root. That's not an error,
+ // just return null for the container.
+ return nullptr;
+ }
+
+ // Start by getting the root command from the interpreter.
+ const char *cur_name = path.GetArgumentAtIndex(0);
+ CommandObjectSP cur_cmd_sp = GetCommandSPExact(cur_name);
+ CommandObjectMultiword *cur_as_multi =
+ get_multi_or_report_error(cur_cmd_sp, cur_name);
+ if (cur_as_multi == nullptr)
+ return nullptr;
+
+ size_t num_path_elements = num_args - (leaf_is_command ? 1 : 0);
+ for (size_t cursor = 1; cursor < num_path_elements && cur_as_multi != nullptr;
+ cursor++) {
+ cur_name = path.GetArgumentAtIndex(cursor);
+ cur_cmd_sp = cur_as_multi->GetSubcommandSPExact(cur_name);
+ cur_as_multi = get_multi_or_report_error(cur_cmd_sp, cur_name);
+ }
+ return cur_as_multi;
+}
+
CommandObjectSP
CommandInterpreter::GetCommandSP(llvm::StringRef cmd_str, bool include_aliases,
bool exact, StringList *matches,
@@ -923,10 +983,17 @@ CommandInterpreter::GetCommandSP(llvm::StringRef cmd_str, bool include_aliases,
command_sp = pos->second;
}
+ if (HasUserMultiwordCommands()) {
+ auto pos = m_user_mw_dict.find(cmd);
+ if (pos != m_user_mw_dict.end())
+ command_sp = pos->second;
+ }
+
if (!exact && !command_sp) {
// We will only get into here if we didn't find any exact matches.
- CommandObjectSP user_match_sp, alias_match_sp, real_match_sp;
+ CommandObjectSP user_match_sp, user_mw_match_sp, alias_match_sp,
+ real_match_sp;
StringList local_matches;
if (matches == nullptr)
@@ -935,6 +1002,7 @@ CommandInterpreter::GetCommandSP(llvm::StringRef cmd_str, bool include_aliases,
unsigned int num_cmd_matches = 0;
unsigned int num_alias_matches = 0;
unsigned int num_user_matches = 0;
+ unsigned int num_user_mw_matches = 0;
// Look through the command dictionaries one by one, and if we get only one
// match from any of them in toto, then return that, otherwise return an
@@ -978,14 +1046,32 @@ CommandInterpreter::GetCommandSP(llvm::StringRef cmd_str, bool include_aliases,
user_match_sp = pos->second;
}
+ if (HasUserMultiwordCommands()) {
+ num_user_mw_matches = AddNamesMatchingPartialString(
+ m_user_mw_dict, cmd_str, *matches, descriptions);
+ }
+
+ if (num_user_mw_matches == 1) {
+ cmd.assign(matches->GetStringAtIndex(num_cmd_matches + num_alias_matches +
+ num_user_matches));
+
+ auto pos = m_user_mw_dict.find(cmd);
+ if (pos != m_user_mw_dict.end())
+ user_mw_match_sp = pos->second;
+ }
+
// If we got exactly one match, return that, otherwise return the match
// list.
- if (num_user_matches + num_cmd_matches + num_alias_matches == 1) {
+ if (num_user_matches + num_user_mw_matches + num_cmd_matches +
+ num_alias_matches ==
+ 1) {
if (num_cmd_matches)
return real_match_sp;
else if (num_alias_matches)
return alias_match_sp;
+ else if (num_user_mw_matches)
+ return user_mw_match_sp;
else
return user_match_sp;
}
@@ -1008,6 +1094,8 @@ bool CommandInterpreter::AddCommand(llvm::StringRef name,
if (name.empty())
return false;
+ cmd_sp->SetIsUserCommand(false);
+
std::string name_sstr(name);
auto name_iter = m_command_dict.find(name_sstr);
if (name_iter != m_command_dict.end()) {
@@ -1020,33 +1108,49 @@ bool CommandInterpreter::AddCommand(llvm::StringRef name,
return true;
}
-bool CommandInterpreter::AddUserCommand(llvm::StringRef name,
- const lldb::CommandObjectSP &cmd_sp,
- bool can_replace) {
+Status CommandInterpreter::AddUserCommand(llvm::StringRef name,
+ const lldb::CommandObjectSP &cmd_sp,
+ bool can_replace) {
+ Status result;
if (cmd_sp.get())
lldbassert((this == &cmd_sp->GetCommandInterpreter()) &&
"tried to add a CommandObject from a different interpreter");
-
- if (!name.empty()) {
- // do not allow replacement of internal commands
- if (CommandExists(name)) {
- if (!can_replace)
- return false;
- if (!m_command_dict[std::string(name)]->IsRemovable())
- return false;
+ if (name.empty()) {
+ result.SetErrorString("can't use the empty string for a command name");
+ return result;
+ }
+ // do not allow replacement of internal commands
+ if (CommandExists(name)) {
+ result.SetErrorString("can't replace builtin command");
+ return result;
+ }
+
+ if (UserCommandExists(name)) {
+ if (!can_replace) {
+ result.SetErrorString("user command exists and force replace not set");
+ return result;
+ }
+ if (cmd_sp->IsMultiwordObject()) {
+ if (!m_user_mw_dict[std::string(name)]->IsRemovable()) {
+ result.SetErrorString(
+ "can't replace explicitly non-removable multi-word command");
+ return result;
+ }
+ } else {
+ if (!m_user_dict[std::string(name)]->IsRemovable()) {
+ result.SetErrorString("can't replace explicitly non-removable command");
+ return result;
+ }
}
+ }
- if (UserCommandExists(name)) {
- if (!can_replace)
- return false;
- if (!m_user_dict[std::string(name)]->IsRemovable())
- return false;
- }
+ cmd_sp->SetIsUserCommand(true);
+ if (cmd_sp->IsMultiwordObject())
+ m_user_mw_dict[std::string(name)] = cmd_sp;
+ else
m_user_dict[std::string(name)] = cmd_sp;
- return true;
- }
- return false;
+ return result;
}
CommandObjectSP
@@ -1127,6 +1231,42 @@ CommandInterpreter::GetCommandObject(llvm::StringRef cmd_str,
return GetCommandSP(cmd_str, true, false, matches, descriptions).get();
}
+CommandObject *CommandInterpreter::GetUserCommandObject(
+ llvm::StringRef cmd, StringList *matches, StringList *descriptions) const {
+ std::string cmd_str(cmd);
+ auto find_exact = [&](const CommandObject::CommandMap &map) {
+ auto found_elem = map.find(std::string(cmd));
+ if (found_elem == map.end())
+ return (CommandObject *)nullptr;
+ CommandObject *exact_cmd = found_elem->second.get();
+ if (exact_cmd) {
+ if (matches)
+ matches->AppendString(exact_cmd->GetCommandName());
+ if (descriptions)
+ descriptions->AppendString(exact_cmd->GetHelp());
+ return exact_cmd;
+ }
+ return (CommandObject *)nullptr;
+ };
+
+ CommandObject *exact_cmd = find_exact(GetUserCommands());
+ if (exact_cmd)
+ return exact_cmd;
+
+ exact_cmd = find_exact(GetUserMultiwordCommands());
+ if (exact_cmd)
+ return exact_cmd;
+
+ // We didn't have an exact command, so now look for partial matches.
+ StringList tmp_list;
+ StringList *matches_ptr = matches ? matches : &tmp_list;
+ AddNamesMatchingPartialString(GetUserCommands(), cmd_str, *matches_ptr);
+ AddNamesMatchingPartialString(GetUserMultiwordCommands(),
+ cmd_str, *matches_ptr);
+
+ return {};
+}
+
bool CommandInterpreter::CommandExists(llvm::StringRef cmd) const {
return m_command_dict.find(std::string(cmd)) != m_command_dict.end();
}
@@ -1169,6 +1309,10 @@ bool CommandInterpreter::UserCommandExists(llvm::StringRef cmd) const {
return m_user_dict.find(std::string(cmd)) != m_user_dict.end();
}
+bool CommandInterpreter::UserMultiwordCommandExists(llvm::StringRef cmd) const {
+ return m_user_mw_dict.find(std::string(cmd)) != m_user_mw_dict.end();
+}
+
CommandAlias *
CommandInterpreter::AddAlias(llvm::StringRef alias_name,
lldb::CommandObjectSP &command_obj_sp,
@@ -1209,9 +1353,10 @@ bool CommandInterpreter::RemoveCommand(llvm::StringRef cmd) {
}
return false;
}
-bool CommandInterpreter::RemoveUser(llvm::StringRef alias_name) {
+
+bool CommandInterpreter::RemoveUser(llvm::StringRef user_name) {
CommandObject::CommandMap::iterator pos =
- m_user_dict.find(std::string(alias_name));
+ m_user_dict.find(std::string(user_name));
if (pos != m_user_dict.end()) {
m_user_dict.erase(pos);
return true;
@@ -1219,6 +1364,16 @@ bool CommandInterpreter::RemoveUser(llvm::StringRef alias_name) {
return false;
}
+bool CommandInterpreter::RemoveUserMultiword(llvm::StringRef multi_name) {
+ CommandObject::CommandMap::iterator pos =
+ m_user_mw_dict.find(std::string(multi_name));
+ if (pos != m_user_mw_dict.end()) {
+ m_user_mw_dict.erase(pos);
+ return true;
+ }
+ return false;
+}
+
void CommandInterpreter::GetHelp(CommandReturnObject &result,
uint32_t cmd_types) {
llvm::StringRef help_prologue(GetDebugger().GetIOHandlerHelpPrologue());
@@ -1274,6 +1429,18 @@ void CommandInterpreter::GetHelp(CommandReturnObject &result,
result.AppendMessage("");
}
+ if (!m_user_mw_dict.empty() &&
+ ((cmd_types & eCommandTypesUserMW) == eCommandTypesUserMW)) {
+ result.AppendMessage("Current user-defined container commands:");
+ result.AppendMessage("");
+ max_len = FindLongestCommandWord(m_user_mw_dict);
+ for (pos = m_user_dict.begin(); pos != m_user_mw_dict.end(); ++pos) {
+ OutputFormattedHelpText(result.GetOutputStream(), pos->first, "--",
+ pos->second->GetHelp(), max_len);
+ }
+ result.AppendMessage("");
+ }
+
result.AppendMessageWithFormat(
"For more information on any command, type '%shelp <command-name>'.\n",
GetCommandPrefix());
@@ -1931,6 +2098,10 @@ bool CommandInterpreter::HasAliases() const { return (!m_alias_dict.empty()); }
bool CommandInterpreter::HasUserCommands() const { return (!m_user_dict.empty()); }
+bool CommandInterpreter::HasUserMultiwordCommands() const {
+ return (!m_user_mw_dict.empty());
+}
+
bool CommandInterpreter::HasAliasOptions() const { return HasAliases(); }
void CommandInterpreter::BuildAliasCommandArgs(CommandObject *alias_cmd_obj,
@@ -2113,13 +2284,6 @@ static void GetCwdInitFile(llvm::SmallVectorImpl<char> &init_file) {
FileSystem::Instance().Resolve(init_file);
}
-static LoadCWDlldbinitFile ShouldLoadCwdInitFile() {
- lldb::TargetPropertiesSP properties = Target::GetGlobalProperties();
- if (!properties)
- return eLoadCWDlldbinitFalse;
- return properties->GetLoadCWDlldbinitFile();
-}
-
void CommandInterpreter::SourceInitFile(FileSpec file,
CommandReturnObject &result) {
assert(!m_skip_lldbinit_files);
@@ -2155,7 +2319,8 @@ void CommandInterpreter::SourceInitFileCwd(CommandReturnObject &result) {
return;
}
- LoadCWDlldbinitFile should_load = ShouldLoadCwdInitFile();
+ LoadCWDlldbinitFile should_load =
+ Target::GetGlobalProperties().GetLoadCWDlldbinitFile();
switch (should_load) {
case eLoadCWDlldbinitFalse:
@@ -2433,7 +2598,7 @@ void CommandInterpreter::HandleCommandsFromFile(FileSpec &cmd_file,
std::string cmd_file_path = cmd_file.GetPath();
auto input_file_up =
- FileSystem::Instance().Open(cmd_file, File::eOpenOptionRead);
+ FileSystem::Instance().Open(cmd_file, File::eOpenOptionReadOnly);
if (!input_file_up) {
std::string error = llvm::toString(input_file_up.takeError());
result.AppendErrorWithFormatv(
@@ -2587,6 +2752,9 @@ void CommandInterpreter::OutputFormattedHelpText(Stream &strm,
strm.IndentMore(prefix.size());
bool prefixed_yet = false;
+ // Even if we have no help text we still want to emit the command name.
+ if (help_text.empty())
+ help_text = "No help text";
while (!help_text.empty()) {
// Prefix the first line, indent subsequent lines to line up
if (!prefixed_yet) {
@@ -2706,7 +2874,8 @@ void CommandInterpreter::FindCommandsForApropos(llvm::StringRef search_word,
StringList &commands_help,
bool search_builtin_commands,
bool search_user_commands,
- bool search_alias_commands) {
+ bool search_alias_commands,
+ bool search_user_mw_commands) {
CommandObject::CommandMap::const_iterator pos;
if (search_builtin_commands)
@@ -2717,6 +2886,10 @@ void CommandInterpreter::FindCommandsForApropos(llvm::StringRef search_word,
FindCommandsForApropos(search_word, commands_found, commands_help,
m_user_dict);
+ if (search_user_mw_commands)
+ FindCommandsForApropos(search_word, commands_found, commands_help,
+ m_user_mw_dict);
+
if (search_alias_commands)
FindCommandsForApropos(search_word, commands_found, commands_help,
m_alias_dict);
@@ -2954,7 +3127,7 @@ bool CommandInterpreter::SaveTranscript(
return false;
};
- File::OpenOptions flags = File::eOpenOptionWrite |
+ File::OpenOptions flags = File::eOpenOptionWriteOnly |
File::eOpenOptionCanCreate |
File::eOpenOptionTruncate;
diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp
index a7dcd5682701..64b23d04abea 100644
--- a/lldb/source/Interpreter/CommandObject.cpp
+++ b/lldb/source/Interpreter/CommandObject.cpp
@@ -1120,7 +1120,7 @@ CommandObject::ArgumentTableEntry CommandObject::g_arguments_data[] = {
{ eArgTypeWatchpointIDRange, "watchpt-id-list", CommandCompletions::eNoCompletion, { nullptr, false }, "For example, '1-3' or '1 to 3'." },
{ eArgTypeWatchType, "watch-type", CommandCompletions::eNoCompletion, { nullptr, false }, "Specify the type for a watchpoint." },
{ eArgRawInput, "raw-input", CommandCompletions::eNoCompletion, { nullptr, false }, "Free-form text passed to a command without prior interpretation, allowing spaces without requiring quotes. To pass arguments and free form text put two dashes ' -- ' between the last argument and any raw input." },
- { eArgTypeCommand, "command", CommandCompletions::eNoCompletion, { nullptr, false }, "An LLDB Command line command." },
+ { eArgTypeCommand, "command", CommandCompletions::eNoCompletion, { nullptr, false }, "An LLDB Command line command element." },
{ eArgTypeColumnNum, "column", CommandCompletions::eNoCompletion, { nullptr, false }, "Column number in a source file." },
{ eArgTypeModuleUUID, "module-uuid", CommandCompletions::eModuleUUIDCompletion, { nullptr, false }, "A module UUID value." },
{ eArgTypeSaveCoreStyle, "corefile-style", CommandCompletions::eNoCompletion, { nullptr, false }, "The type of corefile that lldb will try to create, dependant on this target's capabilities." }
diff --git a/lldb/source/Interpreter/OptionValueArray.cpp b/lldb/source/Interpreter/OptionValueArray.cpp
index b1545bdebf10..4468fe57702e 100644
--- a/lldb/source/Interpreter/OptionValueArray.cpp
+++ b/lldb/source/Interpreter/OptionValueArray.cpp
@@ -8,7 +8,6 @@
#include "lldb/Interpreter/OptionValueArray.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Utility/Args.h"
#include "lldb/Utility/Stream.h"
@@ -167,13 +166,12 @@ Status OptionValueArray::SetArgs(const Args &args, VarSetOperationType op) {
case eVarSetOperationInsertBefore:
case eVarSetOperationInsertAfter:
if (argc > 1) {
- uint32_t idx =
- StringConvert::ToUInt32(args.GetArgumentAtIndex(0), UINT32_MAX);
+ uint32_t idx;
const uint32_t count = GetSize();
- if (idx > count) {
+ if (!llvm::to_integer(args.GetArgumentAtIndex(0), idx) || idx > count) {
error.SetErrorStringWithFormat(
- "invalid insert array index %u, index must be 0 through %u", idx,
- count);
+ "invalid insert array index %s, index must be 0 through %u",
+ args.GetArgumentAtIndex(0), count);
} else {
if (op == eVarSetOperationInsertAfter)
++idx;
@@ -207,9 +205,8 @@ Status OptionValueArray::SetArgs(const Args &args, VarSetOperationType op) {
bool all_indexes_valid = true;
size_t i;
for (i = 0; i < argc; ++i) {
- const size_t idx =
- StringConvert::ToSInt32(args.GetArgumentAtIndex(i), INT32_MAX);
- if (idx >= size) {
+ size_t idx;
+ if (!llvm::to_integer(args.GetArgumentAtIndex(i), idx) || idx >= size) {
all_indexes_valid = false;
break;
} else
@@ -249,13 +246,12 @@ Status OptionValueArray::SetArgs(const Args &args, VarSetOperationType op) {
case eVarSetOperationReplace:
if (argc > 1) {
- uint32_t idx =
- StringConvert::ToUInt32(args.GetArgumentAtIndex(0), UINT32_MAX);
+ uint32_t idx;
const uint32_t count = GetSize();
- if (idx > count) {
+ if (!llvm::to_integer(args.GetArgumentAtIndex(0), idx) || idx > count) {
error.SetErrorStringWithFormat(
- "invalid replace array index %u, index must be 0 through %u", idx,
- count);
+ "invalid replace array index %s, index must be 0 through %u",
+ args.GetArgumentAtIndex(0), count);
} else {
for (size_t i = 1; i < argc; ++i, ++idx) {
lldb::OptionValueSP value_sp(CreateValueFromCStringForTypeMask(
diff --git a/lldb/source/Interpreter/OptionValueFileSpecList.cpp b/lldb/source/Interpreter/OptionValueFileSpecList.cpp
index 2160fd61d428..6566eee09d73 100644
--- a/lldb/source/Interpreter/OptionValueFileSpecList.cpp
+++ b/lldb/source/Interpreter/OptionValueFileSpecList.cpp
@@ -8,7 +8,6 @@
#include "lldb/Interpreter/OptionValueFileSpecList.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Utility/Args.h"
#include "lldb/Utility/Stream.h"
@@ -57,13 +56,12 @@ Status OptionValueFileSpecList::SetValueFromString(llvm::StringRef value,
case eVarSetOperationReplace:
if (argc > 1) {
- uint32_t idx =
- StringConvert::ToUInt32(args.GetArgumentAtIndex(0), UINT32_MAX);
+ uint32_t idx;
const uint32_t count = m_current_value.GetSize();
- if (idx > count) {
+ if (!llvm::to_integer(args.GetArgumentAtIndex(0), idx) || idx > count) {
error.SetErrorStringWithFormat(
- "invalid file list index %u, index must be 0 through %u", idx,
- count);
+ "invalid file list index %s, index must be 0 through %u",
+ args.GetArgumentAtIndex(0), count);
} else {
for (size_t i = 1; i < argc; ++i, ++idx) {
FileSpec file(args.GetArgumentAtIndex(i));
@@ -101,13 +99,12 @@ Status OptionValueFileSpecList::SetValueFromString(llvm::StringRef value,
case eVarSetOperationInsertBefore:
case eVarSetOperationInsertAfter:
if (argc > 1) {
- uint32_t idx =
- StringConvert::ToUInt32(args.GetArgumentAtIndex(0), UINT32_MAX);
+ uint32_t idx;
const uint32_t count = m_current_value.GetSize();
- if (idx > count) {
+ if (!llvm::to_integer(args.GetArgumentAtIndex(0), idx) || idx > count) {
error.SetErrorStringWithFormat(
- "invalid insert file list index %u, index must be 0 through %u",
- idx, count);
+ "invalid insert file list index %s, index must be 0 through %u",
+ args.GetArgumentAtIndex(0), count);
} else {
if (op == eVarSetOperationInsertAfter)
++idx;
@@ -129,9 +126,8 @@ Status OptionValueFileSpecList::SetValueFromString(llvm::StringRef value,
bool all_indexes_valid = true;
size_t i;
for (i = 0; all_indexes_valid && i < argc; ++i) {
- const int idx =
- StringConvert::ToSInt32(args.GetArgumentAtIndex(i), INT32_MAX);
- if (idx == INT32_MAX)
+ int idx;
+ if (!llvm::to_integer(args.GetArgumentAtIndex(i), idx))
all_indexes_valid = false;
else
remove_indexes.push_back(idx);
diff --git a/lldb/source/Interpreter/OptionValuePathMappings.cpp b/lldb/source/Interpreter/OptionValuePathMappings.cpp
index 4dceb5632716..543b0e1b8ea8 100644
--- a/lldb/source/Interpreter/OptionValuePathMappings.cpp
+++ b/lldb/source/Interpreter/OptionValuePathMappings.cpp
@@ -9,21 +9,19 @@
#include "lldb/Interpreter/OptionValuePathMappings.h"
#include "lldb/Host/FileSystem.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Utility/Args.h"
#include "lldb/Utility/FileSpec.h"
#include "lldb/Utility/Stream.h"
using namespace lldb;
using namespace lldb_private;
-namespace {
+
static bool VerifyPathExists(const char *path) {
if (path && path[0])
return FileSystem::Instance().Exists(path);
else
return false;
}
-}
void OptionValuePathMappings::DumpValue(const ExecutionContext *exe_ctx,
Stream &strm, uint32_t dump_mask) {
@@ -52,23 +50,22 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value,
// Must be at least one index + 1 pair of paths, and the pair count must be
// even
if (argc >= 3 && (((argc - 1) & 1) == 0)) {
- uint32_t idx =
- StringConvert::ToUInt32(args.GetArgumentAtIndex(0), UINT32_MAX);
+ uint32_t idx;
const uint32_t count = m_path_mappings.GetSize();
- if (idx > count) {
+ if (!llvm::to_integer(args.GetArgumentAtIndex(0), idx) || idx > count) {
error.SetErrorStringWithFormat(
- "invalid file list index %u, index must be 0 through %u", idx,
- count);
+ "invalid file list index %s, index must be 0 through %u",
+ args.GetArgumentAtIndex(0), count);
} else {
bool changed = false;
for (size_t i = 1; i < argc; idx++, i += 2) {
const char *orginal_path = args.GetArgumentAtIndex(i);
const char *replace_path = args.GetArgumentAtIndex(i + 1);
if (VerifyPathExists(replace_path)) {
- ConstString a(orginal_path);
- ConstString b(replace_path);
- if (!m_path_mappings.Replace(a, b, idx, m_notify_changes))
- m_path_mappings.Append(a, b, m_notify_changes);
+ if (!m_path_mappings.Replace(orginal_path, replace_path, idx,
+ m_notify_changes))
+ m_path_mappings.Append(orginal_path, replace_path,
+ m_notify_changes);
changed = true;
} else {
std::string previousError =
@@ -105,9 +102,7 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value,
const char *orginal_path = args.GetArgumentAtIndex(i);
const char *replace_path = args.GetArgumentAtIndex(i + 1);
if (VerifyPathExists(replace_path)) {
- ConstString a(orginal_path);
- ConstString b(replace_path);
- m_path_mappings.Append(a, b, m_notify_changes);
+ m_path_mappings.Append(orginal_path, replace_path, m_notify_changes);
m_value_was_set = true;
changed = true;
} else {
@@ -128,13 +123,12 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value,
// Must be at least one index + 1 pair of paths, and the pair count must be
// even
if (argc >= 3 && (((argc - 1) & 1) == 0)) {
- uint32_t idx =
- StringConvert::ToUInt32(args.GetArgumentAtIndex(0), UINT32_MAX);
+ uint32_t idx;
const uint32_t count = m_path_mappings.GetSize();
- if (idx > count) {
+ if (!llvm::to_integer(args.GetArgumentAtIndex(0), idx) || idx > count) {
error.SetErrorStringWithFormat(
- "invalid file list index %u, index must be 0 through %u", idx,
- count);
+ "invalid file list index %s, index must be 0 through %u",
+ args.GetArgumentAtIndex(0), count);
} else {
bool changed = false;
if (op == eVarSetOperationInsertAfter)
@@ -143,9 +137,8 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value,
const char *orginal_path = args.GetArgumentAtIndex(i);
const char *replace_path = args.GetArgumentAtIndex(i + 1);
if (VerifyPathExists(replace_path)) {
- ConstString a(orginal_path);
- ConstString b(replace_path);
- m_path_mappings.Insert(a, b, idx, m_notify_changes);
+ m_path_mappings.Insert(orginal_path, replace_path, idx,
+ m_notify_changes);
changed = true;
idx++;
} else {
@@ -169,9 +162,9 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value,
if (argc > 0) {
std::vector<int> remove_indexes;
for (size_t i = 0; i < argc; ++i) {
- int idx =
- StringConvert::ToSInt32(args.GetArgumentAtIndex(i), INT32_MAX);
- if (idx < 0 || idx >= (int)m_path_mappings.GetSize()) {
+ int idx;
+ if (!llvm::to_integer(args.GetArgumentAtIndex(i), idx) || idx < 0 ||
+ idx >= (int)m_path_mappings.GetSize()) {
error.SetErrorStringWithFormat(
"invalid array index '%s', aborting remove operation",
args.GetArgumentAtIndex(i));
diff --git a/lldb/source/Interpreter/OptionValueProperties.cpp b/lldb/source/Interpreter/OptionValueProperties.cpp
index ae073798ca12..1a8f2f0ab180 100644
--- a/lldb/source/Interpreter/OptionValueProperties.cpp
+++ b/lldb/source/Interpreter/OptionValueProperties.cpp
@@ -48,7 +48,8 @@ void OptionValueProperties::AppendProperty(ConstString name,
ConstString desc,
bool is_global,
const OptionValueSP &value_sp) {
- Property property(name, desc, is_global, value_sp);
+ Property property(name.GetStringRef(), desc.GetStringRef(), is_global,
+ value_sp);
m_name_to_index.Append(name, m_properties.size());
m_properties.push_back(property);
value_sp->SetParent(shared_from_this());
diff --git a/lldb/source/Interpreter/OptionValueSInt64.cpp b/lldb/source/Interpreter/OptionValueSInt64.cpp
index b875ba8e3536..c1db5056cd94 100644
--- a/lldb/source/Interpreter/OptionValueSInt64.cpp
+++ b/lldb/source/Interpreter/OptionValueSInt64.cpp
@@ -8,7 +8,6 @@
#include "lldb/Interpreter/OptionValueSInt64.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Utility/Stream.h"
using namespace lldb;
@@ -41,10 +40,9 @@ Status OptionValueSInt64::SetValueFromString(llvm::StringRef value_ref,
case eVarSetOperationReplace:
case eVarSetOperationAssign: {
- bool success = false;
- std::string value_str = value_ref.trim().str();
- int64_t value = StringConvert::ToSInt64(value_str.c_str(), 0, 0, &success);
- if (success) {
+ llvm::StringRef value_trimmed = value_ref.trim();
+ int64_t value;
+ if (llvm::to_integer(value_trimmed, value)) {
if (value >= m_min_value && value <= m_max_value) {
m_value_was_set = true;
m_current_value = value;
diff --git a/lldb/source/Interpreter/OptionValueUInt64.cpp b/lldb/source/Interpreter/OptionValueUInt64.cpp
index a2751a4d02eb..1999c63d11af 100644
--- a/lldb/source/Interpreter/OptionValueUInt64.cpp
+++ b/lldb/source/Interpreter/OptionValueUInt64.cpp
@@ -8,7 +8,6 @@
#include "lldb/Interpreter/OptionValueUInt64.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Utility/Stream.h"
using namespace lldb;
@@ -45,16 +44,15 @@ Status OptionValueUInt64::SetValueFromString(llvm::StringRef value_ref,
case eVarSetOperationReplace:
case eVarSetOperationAssign: {
- bool success = false;
- std::string value_str = value_ref.trim().str();
- uint64_t value = StringConvert::ToUInt64(value_str.c_str(), 0, 0, &success);
- if (success) {
+ llvm::StringRef value_trimmed = value_ref.trim();
+ uint64_t value;
+ if (llvm::to_integer(value_trimmed, value)) {
m_value_was_set = true;
m_current_value = value;
NotifyValueChanged();
} else {
error.SetErrorStringWithFormat("invalid uint64_t string value: '%s'",
- value_str.c_str());
+ value_ref.str().c_str());
}
} break;
diff --git a/lldb/source/Interpreter/Property.cpp b/lldb/source/Interpreter/Property.cpp
index 55400a2bc42d..fe3a8a31394b 100644
--- a/lldb/source/Interpreter/Property.cpp
+++ b/lldb/source/Interpreter/Property.cpp
@@ -9,7 +9,6 @@
#include "lldb/Interpreter/Property.h"
#include "lldb/Core/UserSettingsController.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Interpreter/CommandInterpreter.h"
#include "lldb/Interpreter/OptionArgParser.h"
#include "lldb/Interpreter/OptionValues.h"
@@ -176,28 +175,32 @@ Property::Property(const PropertyDefinition &definition)
std::make_shared<OptionValueRegex>(definition.default_cstr_value);
break;
- case OptionValue::eTypeSInt64:
+ case OptionValue::eTypeSInt64: {
// "definition.default_uint_value" is the default integer value if
// "definition.default_cstr_value" is NULL, otherwise interpret
// "definition.default_cstr_value" as a string value that represents the
// default value.
+ int64_t value = 0;
+ // FIXME: improve error handling for llvm::to_integer()
+ if (definition.default_cstr_value)
+ llvm::to_integer(definition.default_cstr_value, value);
m_value_sp = std::make_shared<OptionValueSInt64>(
- definition.default_cstr_value
- ? StringConvert::ToSInt64(definition.default_cstr_value)
- : definition.default_uint_value);
+ definition.default_cstr_value ? value : definition.default_uint_value);
break;
-
- case OptionValue::eTypeUInt64:
+ }
+ case OptionValue::eTypeUInt64: {
+ uint64_t value = 0;
+ // FIXME: improve error handling for llvm::to_integer()
+ if (definition.default_cstr_value)
+ llvm::to_integer(definition.default_cstr_value, value);
// "definition.default_uint_value" is the default unsigned integer value if
// "definition.default_cstr_value" is NULL, otherwise interpret
// "definition.default_cstr_value" as a string value that represents the
// default value.
m_value_sp = std::make_shared<OptionValueUInt64>(
- definition.default_cstr_value
- ? StringConvert::ToUInt64(definition.default_cstr_value)
- : definition.default_uint_value);
+ definition.default_cstr_value ? value : definition.default_uint_value);
break;
-
+ }
case OptionValue::eTypeUUID:
// "definition.default_uint_value" is not used for a OptionValue::eTypeUUID
// "definition.default_cstr_value" can contain a default UUID value
@@ -224,13 +227,13 @@ Property::Property(const PropertyDefinition &definition)
}
}
-Property::Property(ConstString name, ConstString desc,
- bool is_global, const lldb::OptionValueSP &value_sp)
+Property::Property(llvm::StringRef name, llvm::StringRef desc, bool is_global,
+ const lldb::OptionValueSP &value_sp)
: m_name(name), m_description(desc), m_value_sp(value_sp),
m_is_global(is_global) {}
bool Property::DumpQualifiedName(Stream &strm) const {
- if (m_name) {
+ if (!m_name.empty()) {
if (m_value_sp->DumpQualifiedName(strm))
strm.PutChar('.');
strm << m_name;
@@ -248,7 +251,7 @@ void Property::Dump(const ExecutionContext *exe_ctx, Stream &strm,
if (dump_cmd && !transparent)
strm << "settings set -f ";
if (dump_desc || !transparent) {
- if ((dump_mask & OptionValue::eDumpOptionName) && m_name) {
+ if ((dump_mask & OptionValue::eDumpOptionName) && !m_name.empty()) {
DumpQualifiedName(strm);
if (dump_mask & ~OptionValue::eDumpOptionName)
strm.PutChar(' ');
@@ -292,8 +295,8 @@ void Property::DumpDescription(CommandInterpreter &interpreter, Stream &strm,
interpreter.OutputFormattedHelpText(strm, qualified_name.GetString(),
"--", desc, output_width);
} else {
- interpreter.OutputFormattedHelpText(strm, m_name.GetStringRef(), "--",
- desc, output_width);
+ interpreter.OutputFormattedHelpText(strm, m_name, "--", desc,
+ output_width);
}
}
}
diff --git a/lldb/source/Interpreter/ScriptInterpreter.cpp b/lldb/source/Interpreter/ScriptInterpreter.cpp
index f26474836a68..fbdcbb8da868 100644
--- a/lldb/source/Interpreter/ScriptInterpreter.cpp
+++ b/lldb/source/Interpreter/ScriptInterpreter.cpp
@@ -46,6 +46,10 @@ void ScriptInterpreter::CollectDataForWatchpointCommandCallback(
"This script interpreter does not support watchpoint callbacks.");
}
+StructuredData::DictionarySP ScriptInterpreter::GetInterpreterInfo() {
+ return nullptr;
+}
+
bool ScriptInterpreter::LoadScriptingModule(const char *filename,
const LoadScriptOptions &options,
lldb_private::Status &error,
@@ -83,6 +87,14 @@ ScriptInterpreter::GetStatusFromSBError(const lldb::SBError &error) const {
return Status();
}
+llvm::Optional<MemoryRegionInfo>
+ScriptInterpreter::GetOpaqueTypeFromSBMemoryRegionInfo(
+ const lldb::SBMemoryRegionInfo &mem_region) const {
+ if (!mem_region.m_opaque_up)
+ return llvm::None;
+ return *mem_region.m_opaque_up.get();
+}
+
lldb::ScriptLanguage
ScriptInterpreter::StringToLanguage(const llvm::StringRef &language) {
if (language.equals_insensitive(LanguageToString(eScriptLanguageNone)))
@@ -141,12 +153,12 @@ ScriptInterpreterIORedirect::Create(bool enable_io, Debugger &debugger,
new ScriptInterpreterIORedirect(debugger, result));
auto nullin = FileSystem::Instance().Open(FileSpec(FileSystem::DEV_NULL),
- File::eOpenOptionRead);
+ File::eOpenOptionReadOnly);
if (!nullin)
return nullin.takeError();
auto nullout = FileSystem::Instance().Open(FileSpec(FileSystem::DEV_NULL),
- File::eOpenOptionWrite);
+ File::eOpenOptionWriteOnly);
if (!nullout)
return nullin.takeError();
diff --git a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp
index 42d73ce39ed6..f060d0362c67 100644
--- a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp
+++ b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp
@@ -13,6 +13,8 @@
#include "lldb/Core/PluginManager.h"
#include "lldb/Target/Process.h"
+#include <bitset>
+
LLDB_PLUGIN_DEFINE(ABIAArch64)
void ABIAArch64::Initialize() {
@@ -52,12 +54,13 @@ std::string ABIAArch64::GetMCName(std::string reg) {
MapRegisterName(reg, "x30", "lr");
return reg;
}
+
uint32_t ABIAArch64::GetGenericNum(llvm::StringRef name) {
return llvm::StringSwitch<uint32_t>(name)
.Case("pc", LLDB_REGNUM_GENERIC_PC)
- .Case("lr", LLDB_REGNUM_GENERIC_RA)
- .Case("sp", LLDB_REGNUM_GENERIC_SP)
- .Case("fp", LLDB_REGNUM_GENERIC_FP)
+ .Cases("lr", "x30", LLDB_REGNUM_GENERIC_RA)
+ .Cases("sp", "x31", LLDB_REGNUM_GENERIC_SP)
+ .Cases("fp", "x29", LLDB_REGNUM_GENERIC_FP)
.Case("cpsr", LLDB_REGNUM_GENERIC_FLAGS)
.Case("x0", LLDB_REGNUM_GENERIC_ARG1)
.Case("x1", LLDB_REGNUM_GENERIC_ARG2)
@@ -69,3 +72,76 @@ uint32_t ABIAArch64::GetGenericNum(llvm::StringRef name) {
.Case("x7", LLDB_REGNUM_GENERIC_ARG8)
.Default(LLDB_INVALID_REGNUM);
}
+
+static void addPartialRegisters(
+ std::vector<lldb_private::DynamicRegisterInfo::Register> &regs,
+ llvm::ArrayRef<llvm::Optional<uint32_t>> full_reg_indices,
+ uint32_t full_reg_size, const char *partial_reg_format,
+ uint32_t partial_reg_size, lldb::Encoding encoding, lldb::Format format) {
+ for (auto it : llvm::enumerate(full_reg_indices)) {
+ llvm::Optional<uint32_t> full_reg_index = it.value();
+ if (!full_reg_index ||
+ regs[full_reg_index.getValue()].byte_size != full_reg_size)
+ return;
+
+ lldb_private::DynamicRegisterInfo::Register partial_reg{
+ lldb_private::ConstString(
+ llvm::formatv(partial_reg_format, it.index()).str()),
+ lldb_private::ConstString(),
+ lldb_private::ConstString("supplementary registers"),
+ partial_reg_size,
+ LLDB_INVALID_INDEX32,
+ encoding,
+ format,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ {full_reg_index.getValue()},
+ {}};
+ addSupplementaryRegister(regs, partial_reg);
+ }
+}
+
+void ABIAArch64::AugmentRegisterInfo(
+ std::vector<lldb_private::DynamicRegisterInfo::Register> &regs) {
+ lldb_private::MCBasedABI::AugmentRegisterInfo(regs);
+
+ lldb_private::ConstString sp_string{"sp"};
+
+ std::array<llvm::Optional<uint32_t>, 32> x_regs;
+ std::array<llvm::Optional<uint32_t>, 32> v_regs;
+
+ for (auto it : llvm::enumerate(regs)) {
+ lldb_private::DynamicRegisterInfo::Register &info = it.value();
+ // GDB sends x31 as "sp". Add the "x31" alt_name for convenience.
+ if (info.name == sp_string && !info.alt_name)
+ info.alt_name.SetCString("x31");
+
+ unsigned int reg_num;
+ auto get_reg = [&info, &reg_num](const char *prefix) {
+ llvm::StringRef reg_name = info.name.GetStringRef();
+ llvm::StringRef alt_name = info.alt_name.GetStringRef();
+ return (reg_name.consume_front(prefix) &&
+ llvm::to_integer(reg_name, reg_num, 10) && reg_num < 32) ||
+ (alt_name.consume_front(prefix) &&
+ llvm::to_integer(alt_name, reg_num, 10) && reg_num < 32);
+ };
+
+ if (get_reg("x"))
+ x_regs[reg_num] = it.index();
+ else if (get_reg("v"))
+ v_regs[reg_num] = it.index();
+ // if we have at least one subregister, abort
+ else if (get_reg("w") || get_reg("s") || get_reg("d"))
+ return;
+ }
+
+ // Create aliases for partial registers: wN for xN, and sN/dN for vN.
+ addPartialRegisters(regs, x_regs, 8, "w{0}", 4, lldb::eEncodingUint,
+ lldb::eFormatHex);
+ addPartialRegisters(regs, v_regs, 16, "s{0}", 4, lldb::eEncodingIEEE754,
+ lldb::eFormatFloat);
+ addPartialRegisters(regs, v_regs, 16, "d{0}", 8, lldb::eEncodingIEEE754,
+ lldb::eFormatFloat);
+}
diff --git a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h
index 41bbf5cfdeb9..e771f69d7dbc 100644
--- a/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h
+++ b/lldb/source/Plugins/ABI/AArch64/ABIAArch64.h
@@ -11,7 +11,7 @@
#include "lldb/Target/ABI.h"
-class ABIAArch64: public lldb_private::MCBasedABI {
+class ABIAArch64 : public lldb_private::MCBasedABI {
public:
static void Initialize();
static void Terminate();
@@ -31,6 +31,9 @@ protected:
uint32_t GetGenericNum(llvm::StringRef name) override;
+ void AugmentRegisterInfo(
+ std::vector<lldb_private::DynamicRegisterInfo::Register> &regs) override;
+
using lldb_private::MCBasedABI::MCBasedABI;
};
#endif
diff --git a/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp b/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp
index 348a081cfe17..ccfbeec3d589 100644
--- a/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp
+++ b/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp
@@ -402,7 +402,7 @@ bool ABIMacOSX_arm64::CreateDefaultUnwindPlan(UnwindPlan &unwind_plan) {
// volatile (and specifically only the lower 8 bytes of these regs), the rest
// of the fp/SIMD registers are volatile.
//
-// v. https://github.com/ARM-software/abi-aa/blob/master/aapcs64/
+// v. https://github.com/ARM-software/abi-aa/blob/main/aapcs64/
// We treat x29 as callee preserved also, else the unwinder won't try to
// retrieve fp saves.
@@ -828,12 +828,3 @@ void ABIMacOSX_arm64::Initialize() {
void ABIMacOSX_arm64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-// PluginInterface protocol
-
-ConstString ABIMacOSX_arm64::GetPluginNameStatic() {
- static ConstString g_plugin_name("ABIMacOSX_arm64");
- return g_plugin_name;
-}
-
-uint32_t ABIMacOSX_arm64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.h b/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.h
index dc3ab35115fd..1a5bc7f67573 100644
--- a/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.h
+++ b/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.h
@@ -74,13 +74,9 @@ public:
// PluginInterface protocol
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "ABIMacOSX_arm64"; }
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
lldb_private::Status
SetReturnValueObject(lldb::StackFrameSP &frame_sp,
diff --git a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp
index 16fb38e107c3..95924159e9f0 100644
--- a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp
+++ b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.cpp
@@ -845,14 +845,3 @@ void ABISysV_arm64::Initialize() {
void ABISysV_arm64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_arm64::GetPluginNameStatic() {
- static ConstString g_name("SysV-arm64");
- return g_name;
-}
-
-// PluginInterface protocol
-
-ConstString ABISysV_arm64::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t ABISysV_arm64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.h b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.h
index 3428a7ad9418..b3d4cba795f0 100644
--- a/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.h
+++ b/lldb/source/Plugins/ABI/AArch64/ABISysV_arm64.h
@@ -77,13 +77,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "SysV-arm64"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
lldb::addr_t FixCodeAddress(lldb::addr_t pc) override;
lldb::addr_t FixDataAddress(lldb::addr_t pc) override;
diff --git a/lldb/source/Plugins/ABI/ARC/ABISysV_arc.cpp b/lldb/source/Plugins/ABI/ARC/ABISysV_arc.cpp
index 60cdbc534113..3620be6f274d 100644
--- a/lldb/source/Plugins/ABI/ARC/ABISysV_arc.cpp
+++ b/lldb/source/Plugins/ABI/ARC/ABISysV_arc.cpp
@@ -46,7 +46,7 @@
DEFINE_REG_NAME(dwarf_num), DEFINE_REG_NAME_STR(str_name), \
0, 0, eEncodingInvalid, eFormatDefault, \
{ dwarf_num, dwarf_num, generic_num, LLDB_INVALID_REGNUM, dwarf_num }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr \
}
#define DEFINE_REGISTER_STUB(dwarf_num, str_name) \
@@ -167,15 +167,15 @@ ABISP ABISysV_arc::CreateInstance(ProcessSP process_sp, const ArchSpec &arch) {
ABISP();
}
-namespace {
-const size_t word_size = 4U;
-const size_t reg_size = word_size;
+static const size_t word_size = 4U;
+static const size_t reg_size = word_size;
-inline size_t AugmentArgSize(size_t size_in_bytes) {
+static inline size_t AugmentArgSize(size_t size_in_bytes) {
return llvm::alignTo(size_in_bytes, word_size);
}
-size_t TotalArgsSizeInWords(const llvm::ArrayRef<ABI::CallArgument> &args) {
+static size_t
+TotalArgsSizeInWords(const llvm::ArrayRef<ABI::CallArgument> &args) {
size_t total_size = 0;
for (const auto &arg : args)
total_size +=
@@ -185,7 +185,6 @@ size_t TotalArgsSizeInWords(const llvm::ArrayRef<ABI::CallArgument> &args) {
return total_size;
}
-} // namespace
bool ABISysV_arc::PrepareTrivialCall(Thread &thread, addr_t sp,
addr_t func_addr, addr_t return_addr,
@@ -372,9 +371,8 @@ Status ABISysV_arc::SetReturnValueObject(StackFrameSP &frame_sp,
return result;
}
-namespace {
template <typename T>
-void SetInteger(Scalar &scalar, uint64_t raw_value, bool is_signed) {
+static void SetInteger(Scalar &scalar, uint64_t raw_value, bool is_signed) {
raw_value &= std::numeric_limits<T>::max();
if (is_signed)
scalar = static_cast<typename std::make_signed<T>::type>(raw_value);
@@ -382,8 +380,8 @@ void SetInteger(Scalar &scalar, uint64_t raw_value, bool is_signed) {
scalar = static_cast<T>(raw_value);
}
-bool SetSizedInteger(Scalar &scalar, uint64_t raw_value, uint8_t size_in_bytes,
- bool is_signed) {
+static bool SetSizedInteger(Scalar &scalar, uint64_t raw_value,
+ uint8_t size_in_bytes, bool is_signed) {
switch (size_in_bytes) {
default:
return false;
@@ -408,7 +406,8 @@ bool SetSizedInteger(Scalar &scalar, uint64_t raw_value, uint8_t size_in_bytes,
return true;
}
-bool SetSizedFloat(Scalar &scalar, uint64_t raw_value, uint8_t size_in_bytes) {
+static bool SetSizedFloat(Scalar &scalar, uint64_t raw_value,
+ uint8_t size_in_bytes) {
switch (size_in_bytes) {
default:
return false;
@@ -425,7 +424,8 @@ bool SetSizedFloat(Scalar &scalar, uint64_t raw_value, uint8_t size_in_bytes) {
return true;
}
-uint64_t ReadRawValue(const RegisterContextSP &reg_ctx, uint8_t size_in_bytes) {
+static uint64_t ReadRawValue(const RegisterContextSP &reg_ctx,
+ uint8_t size_in_bytes) {
auto reg_info_r0 =
reg_ctx->GetRegisterInfo(eRegisterKindGeneric, LLDB_REGNUM_GENERIC_ARG1);
@@ -441,7 +441,6 @@ uint64_t ReadRawValue(const RegisterContextSP &reg_ctx, uint8_t size_in_bytes) {
return raw_value;
}
-} // namespace
ValueObjectSP
ABISysV_arc::GetReturnValueObjectSimple(Thread &thread,
@@ -600,18 +599,3 @@ void ABISysV_arc::Initialize() {
void ABISysV_arc::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-ConstString ABISysV_arc::GetPluginNameStatic() {
- static ConstString g_name("sysv-arc");
- return g_name;
-}
-
-//------------------------------------------------------------------
-// PluginInterface protocol
-//------------------------------------------------------------------
-
-ConstString ABISysV_arc::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_arc::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/ARC/ABISysV_arc.h b/lldb/source/Plugins/ABI/ARC/ABISysV_arc.h
index 3fbe64b4b45b..9bf75dfe6add 100644
--- a/lldb/source/Plugins/ABI/ARC/ABISysV_arc.h
+++ b/lldb/source/Plugins/ABI/ARC/ABISysV_arc.h
@@ -1,4 +1,4 @@
-//===-- ArchitectureArc.h ---------------------------------------*- C++ -*-===//
+//===-- ABISysV_arc.h -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -80,15 +80,13 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp,
const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-arc"; }
//------------------------------------------------------------------
// PluginInterface protocol
//------------------------------------------------------------------
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
private:
lldb::ValueObjectSP
diff --git a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
index e429f3ee0cc4..a8d1cbc675e3 100644
--- a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
@@ -42,7 +42,7 @@ static const RegisterInfo g_register_infos[] = {
// ======================= =================== ===========================
// ======================= ======================
{"r0",
- "arg1",
+ nullptr,
4,
0,
eEncodingUint,
@@ -51,10 +51,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r1",
- "arg2",
+ nullptr,
4,
0,
eEncodingUint,
@@ -63,10 +62,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r2",
- "arg3",
+ nullptr,
4,
0,
eEncodingUint,
@@ -75,10 +73,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r3",
- "arg4",
+ nullptr,
4,
0,
eEncodingUint,
@@ -87,8 +84,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r4",
nullptr,
4,
@@ -99,8 +95,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r5",
nullptr,
4,
@@ -111,8 +106,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r6",
nullptr,
4,
@@ -123,8 +117,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r7",
nullptr,
4,
@@ -135,8 +128,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8",
nullptr,
4,
@@ -147,8 +139,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9",
nullptr,
4,
@@ -159,8 +150,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10",
nullptr,
4,
@@ -171,8 +161,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11",
nullptr,
4,
@@ -183,8 +172,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12",
nullptr,
4,
@@ -195,8 +183,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sp",
"r13",
4,
@@ -207,8 +194,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lr",
"r14",
4,
@@ -219,8 +205,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
"r15",
4,
@@ -231,8 +216,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cpsr",
"psr",
4,
@@ -243,8 +227,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s0",
nullptr,
4,
@@ -255,8 +238,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s1",
nullptr,
4,
@@ -267,8 +249,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s2",
nullptr,
4,
@@ -279,8 +260,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s3",
nullptr,
4,
@@ -291,8 +271,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s4",
nullptr,
4,
@@ -303,8 +282,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s5",
nullptr,
4,
@@ -315,8 +293,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s6",
nullptr,
4,
@@ -327,8 +304,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s7",
nullptr,
4,
@@ -339,8 +315,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s8",
nullptr,
4,
@@ -351,8 +326,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s9",
nullptr,
4,
@@ -363,8 +337,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s10",
nullptr,
4,
@@ -375,8 +348,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s11",
nullptr,
4,
@@ -387,8 +359,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s12",
nullptr,
4,
@@ -399,8 +370,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s13",
nullptr,
4,
@@ -411,8 +381,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s14",
nullptr,
4,
@@ -423,8 +392,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s15",
nullptr,
4,
@@ -435,8 +403,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s16",
nullptr,
4,
@@ -447,8 +414,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s17",
nullptr,
4,
@@ -459,8 +425,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s18",
nullptr,
4,
@@ -471,8 +436,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s19",
nullptr,
4,
@@ -483,8 +447,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s20",
nullptr,
4,
@@ -495,8 +458,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s21",
nullptr,
4,
@@ -507,8 +469,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s22",
nullptr,
4,
@@ -519,8 +480,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s23",
nullptr,
4,
@@ -531,8 +491,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s24",
nullptr,
4,
@@ -543,8 +502,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s25",
nullptr,
4,
@@ -555,8 +513,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s26",
nullptr,
4,
@@ -567,8 +524,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s27",
nullptr,
4,
@@ -579,8 +535,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s28",
nullptr,
4,
@@ -591,8 +546,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s29",
nullptr,
4,
@@ -603,8 +557,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s30",
nullptr,
4,
@@ -615,8 +568,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s31",
nullptr,
4,
@@ -627,8 +579,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fpscr",
nullptr,
4,
@@ -639,8 +590,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d0",
nullptr,
8,
@@ -651,8 +601,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d1",
nullptr,
8,
@@ -663,8 +612,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d2",
nullptr,
8,
@@ -675,8 +623,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d3",
nullptr,
8,
@@ -687,8 +634,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d4",
nullptr,
8,
@@ -699,8 +645,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d5",
nullptr,
8,
@@ -711,8 +656,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d6",
nullptr,
8,
@@ -723,8 +667,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d7",
nullptr,
8,
@@ -735,8 +678,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d8",
nullptr,
8,
@@ -747,8 +689,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d9",
nullptr,
8,
@@ -759,8 +700,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d10",
nullptr,
8,
@@ -771,8 +711,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d11",
nullptr,
8,
@@ -783,8 +722,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d12",
nullptr,
8,
@@ -795,8 +733,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d13",
nullptr,
8,
@@ -807,8 +744,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d14",
nullptr,
8,
@@ -819,8 +755,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d15",
nullptr,
8,
@@ -831,8 +766,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d16",
nullptr,
8,
@@ -843,8 +777,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d17",
nullptr,
8,
@@ -855,8 +788,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d18",
nullptr,
8,
@@ -867,8 +799,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d19",
nullptr,
8,
@@ -879,8 +810,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d20",
nullptr,
8,
@@ -891,8 +821,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d21",
nullptr,
8,
@@ -903,8 +832,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d22",
nullptr,
8,
@@ -915,8 +843,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d23",
nullptr,
8,
@@ -927,8 +854,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d24",
nullptr,
8,
@@ -939,8 +865,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d25",
nullptr,
8,
@@ -951,8 +876,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d26",
nullptr,
8,
@@ -963,8 +887,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d27",
nullptr,
8,
@@ -975,8 +898,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d28",
nullptr,
8,
@@ -987,8 +909,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d29",
nullptr,
8,
@@ -999,8 +920,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d30",
nullptr,
8,
@@ -1011,8 +931,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d31",
nullptr,
8,
@@ -1023,8 +942,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8_usr",
nullptr,
4,
@@ -1035,8 +953,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9_usr",
nullptr,
4,
@@ -1047,8 +964,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10_usr",
nullptr,
4,
@@ -1059,8 +975,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11_usr",
nullptr,
4,
@@ -1071,8 +986,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12_usr",
nullptr,
4,
@@ -1083,8 +997,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_usr",
"sp_usr",
4,
@@ -1095,8 +1008,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_usr",
"lr_usr",
4,
@@ -1107,8 +1019,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8_fiq",
nullptr,
4,
@@ -1119,8 +1030,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9_fiq",
nullptr,
4,
@@ -1131,8 +1041,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10_fiq",
nullptr,
4,
@@ -1143,8 +1052,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11_fiq",
nullptr,
4,
@@ -1155,8 +1063,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12_fiq",
nullptr,
4,
@@ -1167,8 +1074,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_fiq",
"sp_fiq",
4,
@@ -1179,8 +1085,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_fiq",
"lr_fiq",
4,
@@ -1191,8 +1096,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_irq",
"sp_irq",
4,
@@ -1203,8 +1107,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_irq",
"lr_irq",
4,
@@ -1215,8 +1118,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_abt",
"sp_abt",
4,
@@ -1227,8 +1129,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_abt",
"lr_abt",
4,
@@ -1239,8 +1140,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_und",
"sp_und",
4,
@@ -1251,8 +1151,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_und",
"lr_und",
4,
@@ -1263,8 +1162,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_svc",
"sp_svc",
4,
@@ -1275,8 +1173,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_svc",
"lr_svc",
4,
@@ -1287,8 +1184,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0}};
+ }};
static const uint32_t k_num_register_infos =
llvm::array_lengthof(g_register_infos);
@@ -2025,16 +1921,3 @@ void ABIMacOSX_arm::Initialize() {
void ABIMacOSX_arm::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABIMacOSX_arm::GetPluginNameStatic() {
- static ConstString g_name("macosx-arm");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABIMacOSX_arm::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABIMacOSX_arm::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.h b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.h
index e0fa349eea73..a77af75e57b8 100644
--- a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.h
+++ b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.h
@@ -71,13 +71,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "macosx-arm"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::ValueObjectSP
diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
index 3e544e0483a7..9ed042df4e50 100644
--- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
@@ -45,7 +45,7 @@ static const RegisterInfo g_register_infos[] = {
// ======================= ====================== ==========
// ===============
{"r0",
- "arg1",
+ nullptr,
4,
0,
eEncodingUint,
@@ -54,10 +54,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r1",
- "arg2",
+ nullptr,
4,
0,
eEncodingUint,
@@ -66,10 +65,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r2",
- "arg3",
+ nullptr,
4,
0,
eEncodingUint,
@@ -78,10 +76,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r3",
- "arg4",
+ nullptr,
4,
0,
eEncodingUint,
@@ -90,8 +87,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r4",
nullptr,
4,
@@ -102,8 +98,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r5",
nullptr,
4,
@@ -114,8 +109,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r6",
nullptr,
4,
@@ -126,8 +120,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r7",
nullptr,
4,
@@ -138,8 +131,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8",
nullptr,
4,
@@ -150,8 +142,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9",
nullptr,
4,
@@ -162,8 +153,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10",
nullptr,
4,
@@ -174,8 +164,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11",
nullptr,
4,
@@ -186,8 +175,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12",
nullptr,
4,
@@ -198,8 +186,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sp",
"r13",
4,
@@ -210,8 +197,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lr",
"r14",
4,
@@ -222,8 +208,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
"r15",
4,
@@ -234,8 +219,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cpsr",
"psr",
4,
@@ -246,8 +230,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s0",
nullptr,
4,
@@ -258,8 +241,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s1",
nullptr,
4,
@@ -270,8 +252,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s2",
nullptr,
4,
@@ -282,8 +263,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s3",
nullptr,
4,
@@ -294,8 +274,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s4",
nullptr,
4,
@@ -306,8 +285,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s5",
nullptr,
4,
@@ -318,8 +296,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s6",
nullptr,
4,
@@ -330,8 +307,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s7",
nullptr,
4,
@@ -342,8 +318,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s8",
nullptr,
4,
@@ -354,8 +329,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s9",
nullptr,
4,
@@ -366,8 +340,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s10",
nullptr,
4,
@@ -378,8 +351,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s11",
nullptr,
4,
@@ -390,8 +362,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s12",
nullptr,
4,
@@ -402,8 +373,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s13",
nullptr,
4,
@@ -414,8 +384,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s14",
nullptr,
4,
@@ -426,8 +395,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s15",
nullptr,
4,
@@ -438,8 +406,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s16",
nullptr,
4,
@@ -450,8 +417,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s17",
nullptr,
4,
@@ -462,8 +428,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s18",
nullptr,
4,
@@ -474,8 +439,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s19",
nullptr,
4,
@@ -486,8 +450,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s20",
nullptr,
4,
@@ -498,8 +461,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s21",
nullptr,
4,
@@ -510,8 +472,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s22",
nullptr,
4,
@@ -522,8 +483,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s23",
nullptr,
4,
@@ -534,8 +494,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s24",
nullptr,
4,
@@ -546,8 +505,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s25",
nullptr,
4,
@@ -558,8 +516,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s26",
nullptr,
4,
@@ -570,8 +527,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s27",
nullptr,
4,
@@ -582,8 +538,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s28",
nullptr,
4,
@@ -594,8 +549,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s29",
nullptr,
4,
@@ -606,8 +560,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s30",
nullptr,
4,
@@ -618,8 +571,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s31",
nullptr,
4,
@@ -630,8 +582,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fpscr",
nullptr,
4,
@@ -642,8 +593,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d0",
nullptr,
8,
@@ -654,8 +604,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d1",
nullptr,
8,
@@ -666,8 +615,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d2",
nullptr,
8,
@@ -678,8 +626,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d3",
nullptr,
8,
@@ -690,8 +637,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d4",
nullptr,
8,
@@ -702,8 +648,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d5",
nullptr,
8,
@@ -714,8 +659,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d6",
nullptr,
8,
@@ -726,8 +670,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d7",
nullptr,
8,
@@ -738,8 +681,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d8",
nullptr,
8,
@@ -750,8 +692,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d9",
nullptr,
8,
@@ -762,8 +703,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d10",
nullptr,
8,
@@ -774,8 +714,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d11",
nullptr,
8,
@@ -786,8 +725,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d12",
nullptr,
8,
@@ -798,8 +736,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d13",
nullptr,
8,
@@ -810,8 +747,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d14",
nullptr,
8,
@@ -822,8 +758,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d15",
nullptr,
8,
@@ -834,8 +769,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d16",
nullptr,
8,
@@ -846,8 +780,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d17",
nullptr,
8,
@@ -858,8 +791,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d18",
nullptr,
8,
@@ -870,8 +802,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d19",
nullptr,
8,
@@ -882,8 +813,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d20",
nullptr,
8,
@@ -894,8 +824,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d21",
nullptr,
8,
@@ -906,8 +835,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d22",
nullptr,
8,
@@ -918,8 +846,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d23",
nullptr,
8,
@@ -930,8 +857,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d24",
nullptr,
8,
@@ -942,8 +868,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d25",
nullptr,
8,
@@ -954,8 +879,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d26",
nullptr,
8,
@@ -966,8 +890,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d27",
nullptr,
8,
@@ -978,8 +901,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d28",
nullptr,
8,
@@ -990,8 +912,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d29",
nullptr,
8,
@@ -1002,8 +923,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d30",
nullptr,
8,
@@ -1014,8 +934,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"d31",
nullptr,
8,
@@ -1026,8 +945,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8_usr",
nullptr,
4,
@@ -1038,8 +956,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9_usr",
nullptr,
4,
@@ -1050,8 +967,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10_usr",
nullptr,
4,
@@ -1062,8 +978,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11_usr",
nullptr,
4,
@@ -1074,8 +989,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12_usr",
nullptr,
4,
@@ -1086,8 +1000,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_usr",
"sp_usr",
4,
@@ -1098,8 +1011,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_usr",
"lr_usr",
4,
@@ -1110,8 +1022,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8_fiq",
nullptr,
4,
@@ -1122,8 +1033,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9_fiq",
nullptr,
4,
@@ -1134,8 +1044,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10_fiq",
nullptr,
4,
@@ -1146,8 +1055,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11_fiq",
nullptr,
4,
@@ -1158,8 +1066,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12_fiq",
nullptr,
4,
@@ -1170,8 +1077,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_fiq",
"sp_fiq",
4,
@@ -1182,8 +1088,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_fiq",
"lr_fiq",
4,
@@ -1194,8 +1099,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_irq",
"sp_irq",
4,
@@ -1206,8 +1110,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_irq",
"lr_irq",
4,
@@ -1218,8 +1121,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_abt",
"sp_abt",
4,
@@ -1230,8 +1132,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_abt",
"lr_abt",
4,
@@ -1242,8 +1143,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_und",
"sp_und",
4,
@@ -1254,8 +1154,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_und",
"lr_und",
4,
@@ -1266,8 +1165,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13_svc",
"sp_svc",
4,
@@ -1278,8 +1176,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14_svc",
"lr_svc",
4,
@@ -1290,8 +1187,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0}};
+ }};
static const uint32_t k_num_register_infos =
llvm::array_lengthof(g_register_infos);
@@ -2131,16 +2027,3 @@ void ABISysV_arm::Initialize() {
void ABISysV_arm::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_arm::GetPluginNameStatic() {
- static ConstString g_name("SysV-arm");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_arm::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_arm::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.h b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.h
index f28f75ce4fe5..ce67b367d18f 100644
--- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.h
+++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.h
@@ -71,13 +71,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "SysV-arm"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::ValueObjectSP
diff --git a/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp b/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp
index 6794f7d07210..d47bca48ae09 100644
--- a/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp
+++ b/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp
@@ -45,8 +45,7 @@ static const RegisterInfo g_register_infos[] = {
{0, 0, LLDB_INVALID_REGNUM, 0, 0},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r01",
"",
4,
@@ -56,8 +55,7 @@ static const RegisterInfo g_register_infos[] = {
{1, 1, LLDB_INVALID_REGNUM, 1, 1},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r02",
"",
4,
@@ -67,8 +65,7 @@ static const RegisterInfo g_register_infos[] = {
{2, 2, LLDB_INVALID_REGNUM, 2, 2},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r03",
"",
4,
@@ -78,8 +75,7 @@ static const RegisterInfo g_register_infos[] = {
{3, 3, LLDB_INVALID_REGNUM, 3, 3},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r04",
"",
4,
@@ -89,8 +85,7 @@ static const RegisterInfo g_register_infos[] = {
{4, 4, LLDB_INVALID_REGNUM, 4, 4},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r05",
"",
4,
@@ -100,8 +95,7 @@ static const RegisterInfo g_register_infos[] = {
{5, 5, LLDB_INVALID_REGNUM, 5, 5},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r06",
"",
4,
@@ -111,8 +105,7 @@ static const RegisterInfo g_register_infos[] = {
{6, 6, LLDB_INVALID_REGNUM, 6, 6},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r07",
"",
4,
@@ -122,8 +115,7 @@ static const RegisterInfo g_register_infos[] = {
{7, 7, LLDB_INVALID_REGNUM, 7, 7},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r08",
"",
4,
@@ -133,8 +125,7 @@ static const RegisterInfo g_register_infos[] = {
{8, 8, LLDB_INVALID_REGNUM, 8, 8},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r09",
"",
4,
@@ -144,8 +135,7 @@ static const RegisterInfo g_register_infos[] = {
{9, 9, LLDB_INVALID_REGNUM, 9, 9},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10",
"",
4,
@@ -155,8 +145,7 @@ static const RegisterInfo g_register_infos[] = {
{10, 10, LLDB_INVALID_REGNUM, 10, 10},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11",
"",
4,
@@ -166,8 +155,7 @@ static const RegisterInfo g_register_infos[] = {
{11, 11, LLDB_INVALID_REGNUM, 11, 11},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12",
"",
4,
@@ -177,8 +165,7 @@ static const RegisterInfo g_register_infos[] = {
{12, 12, LLDB_INVALID_REGNUM, 12, 12},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13",
"",
4,
@@ -188,8 +175,7 @@ static const RegisterInfo g_register_infos[] = {
{13, 13, LLDB_INVALID_REGNUM, 13, 13},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14",
"",
4,
@@ -199,8 +185,7 @@ static const RegisterInfo g_register_infos[] = {
{14, 14, LLDB_INVALID_REGNUM, 14, 14},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r15",
"",
4,
@@ -210,8 +195,7 @@ static const RegisterInfo g_register_infos[] = {
{15, 15, LLDB_INVALID_REGNUM, 15, 15},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r16",
"",
4,
@@ -221,8 +205,7 @@ static const RegisterInfo g_register_infos[] = {
{16, 16, LLDB_INVALID_REGNUM, 16, 16},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r17",
"",
4,
@@ -232,8 +215,7 @@ static const RegisterInfo g_register_infos[] = {
{17, 17, LLDB_INVALID_REGNUM, 17, 17},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r18",
"",
4,
@@ -243,8 +225,7 @@ static const RegisterInfo g_register_infos[] = {
{18, 18, LLDB_INVALID_REGNUM, 18, 18},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r19",
"",
4,
@@ -254,8 +235,7 @@ static const RegisterInfo g_register_infos[] = {
{19, 19, LLDB_INVALID_REGNUM, 19, 19},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r20",
"",
4,
@@ -265,8 +245,7 @@ static const RegisterInfo g_register_infos[] = {
{20, 20, LLDB_INVALID_REGNUM, 20, 20},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r21",
"",
4,
@@ -276,8 +255,7 @@ static const RegisterInfo g_register_infos[] = {
{21, 21, LLDB_INVALID_REGNUM, 21, 21},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r22",
"",
4,
@@ -287,8 +265,7 @@ static const RegisterInfo g_register_infos[] = {
{22, 22, LLDB_INVALID_REGNUM, 22, 22},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r23",
"",
4,
@@ -298,8 +275,7 @@ static const RegisterInfo g_register_infos[] = {
{23, 23, LLDB_INVALID_REGNUM, 23, 23},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r24",
"",
4,
@@ -309,8 +285,7 @@ static const RegisterInfo g_register_infos[] = {
{24, 24, LLDB_INVALID_REGNUM, 24, 24},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r25",
"",
4,
@@ -320,8 +295,7 @@ static const RegisterInfo g_register_infos[] = {
{25, 25, LLDB_INVALID_REGNUM, 25, 25},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r26",
"",
4,
@@ -331,8 +305,7 @@ static const RegisterInfo g_register_infos[] = {
{26, 26, LLDB_INVALID_REGNUM, 26, 26},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r27",
"",
4,
@@ -342,8 +315,7 @@ static const RegisterInfo g_register_infos[] = {
{27, 27, LLDB_INVALID_REGNUM, 27, 27},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r28",
"",
4,
@@ -353,8 +325,7 @@ static const RegisterInfo g_register_infos[] = {
{28, 28, LLDB_INVALID_REGNUM, 28, 28},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sp",
"r29",
4,
@@ -364,8 +335,7 @@ static const RegisterInfo g_register_infos[] = {
{29, 29, LLDB_REGNUM_GENERIC_SP, 29, 29},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fp",
"r30",
4,
@@ -375,8 +345,7 @@ static const RegisterInfo g_register_infos[] = {
{30, 30, LLDB_REGNUM_GENERIC_FP, 30, 30},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lr",
"r31",
4,
@@ -386,8 +355,7 @@ static const RegisterInfo g_register_infos[] = {
{31, 31, LLDB_REGNUM_GENERIC_RA, 31, 31},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sa0",
"",
4,
@@ -397,8 +365,7 @@ static const RegisterInfo g_register_infos[] = {
{32, 32, LLDB_INVALID_REGNUM, 32, 32},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lc0",
"",
4,
@@ -408,8 +375,7 @@ static const RegisterInfo g_register_infos[] = {
{33, 33, LLDB_INVALID_REGNUM, 33, 33},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sa1",
"",
4,
@@ -419,8 +385,7 @@ static const RegisterInfo g_register_infos[] = {
{34, 34, LLDB_INVALID_REGNUM, 34, 34},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lc1",
"",
4,
@@ -430,8 +395,7 @@ static const RegisterInfo g_register_infos[] = {
{35, 35, LLDB_INVALID_REGNUM, 35, 35},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// --> hexagon-v4/5/55/56-sim.xml
{"p3_0",
"",
@@ -442,8 +406,7 @@ static const RegisterInfo g_register_infos[] = {
{36, 36, LLDB_INVALID_REGNUM, 36, 36},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// PADDING {
{"p00",
"",
@@ -454,8 +417,7 @@ static const RegisterInfo g_register_infos[] = {
{37, 37, LLDB_INVALID_REGNUM, 37, 37},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// }
{"m0",
"",
@@ -466,8 +428,7 @@ static const RegisterInfo g_register_infos[] = {
{38, 38, LLDB_INVALID_REGNUM, 38, 38},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"m1",
"",
4,
@@ -477,8 +438,7 @@ static const RegisterInfo g_register_infos[] = {
{39, 39, LLDB_INVALID_REGNUM, 39, 39},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"usr",
"",
4,
@@ -488,8 +448,7 @@ static const RegisterInfo g_register_infos[] = {
{40, 40, LLDB_INVALID_REGNUM, 40, 40},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
"",
4,
@@ -499,8 +458,7 @@ static const RegisterInfo g_register_infos[] = {
{41, 41, LLDB_REGNUM_GENERIC_PC, 41, 41},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"ugp",
"",
4,
@@ -510,8 +468,7 @@ static const RegisterInfo g_register_infos[] = {
{42, 42, LLDB_INVALID_REGNUM, 42, 42},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"gp",
"",
4,
@@ -521,8 +478,7 @@ static const RegisterInfo g_register_infos[] = {
{43, 43, LLDB_INVALID_REGNUM, 43, 43},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cs0",
"",
4,
@@ -532,8 +488,7 @@ static const RegisterInfo g_register_infos[] = {
{44, 44, LLDB_INVALID_REGNUM, 44, 44},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cs1",
"",
4,
@@ -543,8 +498,7 @@ static const RegisterInfo g_register_infos[] = {
{45, 45, LLDB_INVALID_REGNUM, 45, 45},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// PADDING {
{"p01",
"",
@@ -555,8 +509,7 @@ static const RegisterInfo g_register_infos[] = {
{46, 46, LLDB_INVALID_REGNUM, 46, 46},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p02",
"",
4,
@@ -566,8 +519,7 @@ static const RegisterInfo g_register_infos[] = {
{47, 47, LLDB_INVALID_REGNUM, 47, 47},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p03",
"",
4,
@@ -577,8 +529,7 @@ static const RegisterInfo g_register_infos[] = {
{48, 48, LLDB_INVALID_REGNUM, 48, 48},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p04",
"",
4,
@@ -588,8 +539,7 @@ static const RegisterInfo g_register_infos[] = {
{49, 49, LLDB_INVALID_REGNUM, 49, 49},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p05",
"",
4,
@@ -599,8 +549,7 @@ static const RegisterInfo g_register_infos[] = {
{50, 50, LLDB_INVALID_REGNUM, 50, 50},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p06",
"",
4,
@@ -610,8 +559,7 @@ static const RegisterInfo g_register_infos[] = {
{51, 51, LLDB_INVALID_REGNUM, 51, 51},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p07",
"",
4,
@@ -621,8 +569,7 @@ static const RegisterInfo g_register_infos[] = {
{52, 52, LLDB_INVALID_REGNUM, 52, 52},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p08",
"",
4,
@@ -632,8 +579,7 @@ static const RegisterInfo g_register_infos[] = {
{53, 53, LLDB_INVALID_REGNUM, 53, 53},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p09",
"",
4,
@@ -643,8 +589,7 @@ static const RegisterInfo g_register_infos[] = {
{54, 54, LLDB_INVALID_REGNUM, 54, 54},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p10",
"",
4,
@@ -654,8 +599,7 @@ static const RegisterInfo g_register_infos[] = {
{55, 55, LLDB_INVALID_REGNUM, 55, 55},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p11",
"",
4,
@@ -665,8 +609,7 @@ static const RegisterInfo g_register_infos[] = {
{56, 56, LLDB_INVALID_REGNUM, 56, 56},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p12",
"",
4,
@@ -676,8 +619,7 @@ static const RegisterInfo g_register_infos[] = {
{57, 57, LLDB_INVALID_REGNUM, 57, 57},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p13",
"",
4,
@@ -687,8 +629,7 @@ static const RegisterInfo g_register_infos[] = {
{58, 58, LLDB_INVALID_REGNUM, 58, 58},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p14",
"",
4,
@@ -698,8 +639,7 @@ static const RegisterInfo g_register_infos[] = {
{59, 59, LLDB_INVALID_REGNUM, 59, 59},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p15",
"",
4,
@@ -709,8 +649,7 @@ static const RegisterInfo g_register_infos[] = {
{60, 60, LLDB_INVALID_REGNUM, 60, 60},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p16",
"",
4,
@@ -720,8 +659,7 @@ static const RegisterInfo g_register_infos[] = {
{61, 61, LLDB_INVALID_REGNUM, 61, 61},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p17",
"",
4,
@@ -731,8 +669,7 @@ static const RegisterInfo g_register_infos[] = {
{62, 62, LLDB_INVALID_REGNUM, 62, 62},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p18",
"",
4,
@@ -742,8 +679,7 @@ static const RegisterInfo g_register_infos[] = {
{63, 63, LLDB_INVALID_REGNUM, 63, 63},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// }
{"sgp0",
"",
@@ -754,8 +690,7 @@ static const RegisterInfo g_register_infos[] = {
{64, 64, LLDB_INVALID_REGNUM, 64, 64},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// PADDING {
{"p19",
"",
@@ -766,8 +701,7 @@ static const RegisterInfo g_register_infos[] = {
{65, 65, LLDB_INVALID_REGNUM, 65, 65},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// }
{"stid",
"",
@@ -778,8 +712,7 @@ static const RegisterInfo g_register_infos[] = {
{66, 66, LLDB_INVALID_REGNUM, 66, 66},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"elr",
"",
4,
@@ -789,8 +722,7 @@ static const RegisterInfo g_register_infos[] = {
{67, 67, LLDB_INVALID_REGNUM, 67, 67},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"badva0",
"",
4,
@@ -800,8 +732,7 @@ static const RegisterInfo g_register_infos[] = {
{68, 68, LLDB_INVALID_REGNUM, 68, 68},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"badva1",
"",
4,
@@ -811,8 +742,7 @@ static const RegisterInfo g_register_infos[] = {
{69, 69, LLDB_INVALID_REGNUM, 69, 69},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"ssr",
"",
4,
@@ -822,8 +752,7 @@ static const RegisterInfo g_register_infos[] = {
{70, 70, LLDB_INVALID_REGNUM, 70, 70},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"ccr",
"",
4,
@@ -833,8 +762,7 @@ static const RegisterInfo g_register_infos[] = {
{71, 71, LLDB_INVALID_REGNUM, 71, 71},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"htid",
"",
4,
@@ -844,8 +772,7 @@ static const RegisterInfo g_register_infos[] = {
{72, 72, LLDB_INVALID_REGNUM, 72, 72},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// PADDING {
{"p20",
"",
@@ -856,8 +783,7 @@ static const RegisterInfo g_register_infos[] = {
{73, 73, LLDB_INVALID_REGNUM, 73, 73},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// }
{"imask",
"",
@@ -868,8 +794,7 @@ static const RegisterInfo g_register_infos[] = {
{74, 74, LLDB_INVALID_REGNUM, 74, 74},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// PADDING {
{"p21",
"",
@@ -880,8 +805,7 @@ static const RegisterInfo g_register_infos[] = {
{75, 75, LLDB_INVALID_REGNUM, 75, 75},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p22",
"",
4,
@@ -891,8 +815,7 @@ static const RegisterInfo g_register_infos[] = {
{76, 76, LLDB_INVALID_REGNUM, 76, 76},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p23",
"",
4,
@@ -902,8 +825,7 @@ static const RegisterInfo g_register_infos[] = {
{77, 77, LLDB_INVALID_REGNUM, 77, 77},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p24",
"",
4,
@@ -913,8 +835,7 @@ static const RegisterInfo g_register_infos[] = {
{78, 78, LLDB_INVALID_REGNUM, 78, 78},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"p25",
"",
4,
@@ -924,8 +845,7 @@ static const RegisterInfo g_register_infos[] = {
{79, 79, LLDB_INVALID_REGNUM, 79, 79},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// }
{"g0",
"",
@@ -936,8 +856,7 @@ static const RegisterInfo g_register_infos[] = {
{80, 80, LLDB_INVALID_REGNUM, 80, 80},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"g1",
"",
4,
@@ -947,8 +866,7 @@ static const RegisterInfo g_register_infos[] = {
{81, 81, LLDB_INVALID_REGNUM, 81, 81},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"g2",
"",
4,
@@ -958,8 +876,7 @@ static const RegisterInfo g_register_infos[] = {
{82, 82, LLDB_INVALID_REGNUM, 82, 82},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"g3",
"",
4,
@@ -969,8 +886,7 @@ static const RegisterInfo g_register_infos[] = {
{83, 83, LLDB_INVALID_REGNUM, 83, 83},
nullptr,
nullptr,
- nullptr,
- 0}};
+ }};
static const uint32_t k_num_register_infos =
sizeof(g_register_infos) / sizeof(RegisterInfo);
@@ -1277,19 +1193,6 @@ void ABISysV_hexagon::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ABISysV_hexagon::GetPluginNameStatic() {
- static ConstString g_name("sysv-hexagon");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_hexagon::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_hexagon::GetPluginVersion() { return 1; }
-
// get value object specialized to work with llvm IR types
lldb::ValueObjectSP
ABISysV_hexagon::GetReturnValueObjectImpl(lldb_private::Thread &thread,
diff --git a/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.h b/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.h
index d6dab0c2e378..df7016926d79 100644
--- a/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.h
+++ b/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.h
@@ -79,13 +79,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-hexagon"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
index 538ec06c3b0d..662689ca0615 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
@@ -92,8 +92,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r1",
"AT",
4,
@@ -104,8 +103,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r2",
"v0",
4,
@@ -116,8 +114,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r3",
"v1",
4,
@@ -128,10 +125,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r4",
- "arg1",
+ nullptr,
4,
0,
eEncodingUint,
@@ -140,10 +136,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r5",
- "arg2",
+ nullptr,
4,
0,
eEncodingUint,
@@ -152,10 +147,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r6",
- "arg3",
+ nullptr,
4,
0,
eEncodingUint,
@@ -164,10 +158,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r7",
- "arg4",
+ nullptr,
4,
0,
eEncodingUint,
@@ -176,8 +169,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8",
"arg5",
4,
@@ -188,8 +180,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9",
"arg6",
4,
@@ -200,8 +191,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10",
"arg7",
4,
@@ -212,8 +202,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11",
"arg8",
4,
@@ -224,8 +213,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12",
nullptr,
4,
@@ -236,8 +224,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13",
nullptr,
4,
@@ -248,8 +235,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14",
nullptr,
4,
@@ -260,8 +246,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r15",
nullptr,
4,
@@ -272,8 +257,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r16",
nullptr,
4,
@@ -284,8 +268,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r17",
nullptr,
4,
@@ -296,8 +279,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r18",
nullptr,
4,
@@ -308,8 +290,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r19",
nullptr,
4,
@@ -320,8 +301,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r20",
nullptr,
4,
@@ -332,8 +312,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r21",
nullptr,
4,
@@ -344,8 +323,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r22",
nullptr,
4,
@@ -356,8 +334,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r23",
nullptr,
4,
@@ -368,8 +345,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r24",
nullptr,
4,
@@ -380,8 +356,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r25",
nullptr,
4,
@@ -392,8 +367,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r26",
nullptr,
4,
@@ -404,8 +378,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r27",
nullptr,
4,
@@ -416,8 +389,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r28",
"gp",
4,
@@ -428,10 +400,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r29",
- "sp",
+ nullptr,
4,
0,
eEncodingUint,
@@ -440,10 +411,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r30",
- "fp",
+ nullptr,
4,
0,
eEncodingUint,
@@ -452,10 +422,9 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r31",
- "ra",
+ nullptr,
4,
0,
eEncodingUint,
@@ -464,8 +433,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sr",
nullptr,
4,
@@ -476,8 +444,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lo",
nullptr,
4,
@@ -488,8 +455,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"hi",
nullptr,
4,
@@ -500,8 +466,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"bad",
nullptr,
4,
@@ -512,8 +477,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cause",
nullptr,
4,
@@ -524,8 +488,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
nullptr,
4,
@@ -536,8 +499,7 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
};
static const uint32_t k_num_register_infos =
@@ -1052,16 +1014,3 @@ void ABISysV_mips::Initialize() {
void ABISysV_mips::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_mips::GetPluginNameStatic() {
- static ConstString g_name("sysv-mips");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_mips::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_mips::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.h b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.h
index 715405e7ef97..e77a8bfc0ab6 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.h
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.h
@@ -69,13 +69,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-mips"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
index 7220508c75ff..7e272265e15d 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
@@ -92,8 +92,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r1",
"AT",
8,
@@ -104,8 +103,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r2",
"v0",
8,
@@ -116,8 +114,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r3",
"v1",
8,
@@ -128,10 +125,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r4",
- "arg1",
+ nullptr,
8,
0,
eEncodingUint,
@@ -140,10 +136,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r5",
- "arg2",
+ nullptr,
8,
0,
eEncodingUint,
@@ -152,10 +147,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r6",
- "arg3",
+ nullptr,
8,
0,
eEncodingUint,
@@ -164,10 +158,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r7",
- "arg4",
+ nullptr,
8,
0,
eEncodingUint,
@@ -176,10 +169,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8",
- "arg5",
+ nullptr,
8,
0,
eEncodingUint,
@@ -188,10 +180,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9",
- "arg6",
+ nullptr,
8,
0,
eEncodingUint,
@@ -200,10 +191,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10",
- "arg7",
+ nullptr,
8,
0,
eEncodingUint,
@@ -212,10 +202,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11",
- "arg8",
+ nullptr,
8,
0,
eEncodingUint,
@@ -224,8 +213,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12",
nullptr,
8,
@@ -236,8 +224,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r13",
nullptr,
8,
@@ -248,8 +235,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r14",
nullptr,
8,
@@ -260,8 +246,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r15",
nullptr,
8,
@@ -272,8 +257,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r16",
nullptr,
8,
@@ -284,8 +268,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r17",
nullptr,
8,
@@ -296,8 +279,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r18",
nullptr,
8,
@@ -308,8 +290,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r19",
nullptr,
8,
@@ -320,8 +301,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r20",
nullptr,
8,
@@ -332,8 +312,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r21",
nullptr,
8,
@@ -344,8 +323,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r22",
nullptr,
8,
@@ -356,8 +334,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r23",
nullptr,
8,
@@ -368,8 +345,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r24",
nullptr,
8,
@@ -380,8 +356,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r25",
nullptr,
8,
@@ -392,8 +367,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r26",
nullptr,
8,
@@ -404,8 +378,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r27",
nullptr,
8,
@@ -416,8 +389,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r28",
"gp",
8,
@@ -428,10 +400,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r29",
- "sp",
+ nullptr,
8,
0,
eEncodingUint,
@@ -440,10 +411,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r30",
- "fp",
+ nullptr,
8,
0,
eEncodingUint,
@@ -452,10 +422,9 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r31",
- "ra",
+ nullptr,
8,
0,
eEncodingUint,
@@ -464,8 +433,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sr",
nullptr,
4,
@@ -476,8 +444,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lo",
nullptr,
8,
@@ -488,8 +455,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"hi",
nullptr,
8,
@@ -500,8 +466,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"bad",
nullptr,
8,
@@ -512,8 +477,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cause",
nullptr,
8,
@@ -524,8 +488,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
nullptr,
8,
@@ -536,8 +499,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
};
static const uint32_t k_num_register_infos =
@@ -1200,16 +1162,3 @@ void ABISysV_mips64::Initialize() {
void ABISysV_mips64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_mips64::GetPluginNameStatic() {
- static ConstString g_name("sysv-mips64");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_mips64::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_mips64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.h b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.h
index 91428216a73a..3eda3992e65e 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.h
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.h
@@ -82,13 +82,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-mips64"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
index 98a14b50cbf3..70b9c46d353f 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
@@ -112,7 +112,7 @@ enum dwarf_regnums {
#define DEFINE_GPR(reg, alt, kind1, kind2, kind3, kind4) \
{ \
#reg, alt, 8, 0, eEncodingUint, eFormatHex, {kind1, kind2, kind3, kind4 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
static const RegisterInfo g_register_infos[] = {
@@ -120,25 +120,25 @@ static const RegisterInfo g_register_infos[] = {
// Generic, Process Plugin
DEFINE_GPR(r0, nullptr, dwarf_r0, dwarf_r0, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r1, "sp", dwarf_r1, dwarf_r1, LLDB_REGNUM_GENERIC_SP,
+ DEFINE_GPR(r1, nullptr, dwarf_r1, dwarf_r1, LLDB_REGNUM_GENERIC_SP,
LLDB_INVALID_REGNUM),
DEFINE_GPR(r2, nullptr, dwarf_r2, dwarf_r2, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r3, "arg1", dwarf_r3, dwarf_r3, LLDB_REGNUM_GENERIC_ARG1,
+ DEFINE_GPR(r3, nullptr, dwarf_r3, dwarf_r3, LLDB_REGNUM_GENERIC_ARG1,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r4, "arg2", dwarf_r4, dwarf_r4, LLDB_REGNUM_GENERIC_ARG2,
+ DEFINE_GPR(r4, nullptr, dwarf_r4, dwarf_r4, LLDB_REGNUM_GENERIC_ARG2,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r5, "arg3", dwarf_r5, dwarf_r5, LLDB_REGNUM_GENERIC_ARG3,
+ DEFINE_GPR(r5, nullptr, dwarf_r5, dwarf_r5, LLDB_REGNUM_GENERIC_ARG3,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r6, "arg4", dwarf_r6, dwarf_r6, LLDB_REGNUM_GENERIC_ARG4,
+ DEFINE_GPR(r6, nullptr, dwarf_r6, dwarf_r6, LLDB_REGNUM_GENERIC_ARG4,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r7, "arg5", dwarf_r7, dwarf_r7, LLDB_REGNUM_GENERIC_ARG5,
+ DEFINE_GPR(r7, nullptr, dwarf_r7, dwarf_r7, LLDB_REGNUM_GENERIC_ARG5,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r8, "arg6", dwarf_r8, dwarf_r8, LLDB_REGNUM_GENERIC_ARG6,
+ DEFINE_GPR(r8, nullptr, dwarf_r8, dwarf_r8, LLDB_REGNUM_GENERIC_ARG6,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r9, "arg7", dwarf_r9, dwarf_r9, LLDB_REGNUM_GENERIC_ARG7,
+ DEFINE_GPR(r9, nullptr, dwarf_r9, dwarf_r9, LLDB_REGNUM_GENERIC_ARG7,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(r10, "arg8", dwarf_r10, dwarf_r10, LLDB_REGNUM_GENERIC_ARG8,
+ DEFINE_GPR(r10, nullptr, dwarf_r10, dwarf_r10, LLDB_REGNUM_GENERIC_ARG8,
LLDB_INVALID_REGNUM),
DEFINE_GPR(r11, nullptr, dwarf_r11, dwarf_r11, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM),
@@ -182,15 +182,15 @@ static const RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM),
DEFINE_GPR(r31, nullptr, dwarf_r31, dwarf_r31, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(lr, "lr", dwarf_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA,
+ DEFINE_GPR(lr, nullptr, dwarf_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(cr, "cr", dwarf_cr, dwarf_cr, LLDB_REGNUM_GENERIC_FLAGS,
+ DEFINE_GPR(cr, nullptr, dwarf_cr, dwarf_cr, LLDB_REGNUM_GENERIC_FLAGS,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(xer, "xer", dwarf_xer, dwarf_xer, LLDB_INVALID_REGNUM,
+ DEFINE_GPR(xer, nullptr, dwarf_xer, dwarf_xer, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(ctr, "ctr", dwarf_ctr, dwarf_ctr, LLDB_INVALID_REGNUM,
+ DEFINE_GPR(ctr, nullptr, dwarf_ctr, dwarf_ctr, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM),
- DEFINE_GPR(pc, "pc", dwarf_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC,
+ DEFINE_GPR(pc, nullptr, dwarf_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC,
LLDB_INVALID_REGNUM),
{nullptr,
nullptr,
@@ -201,8 +201,7 @@ static const RegisterInfo g_register_infos[] = {
{dwarf_cfa, dwarf_cfa, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
nullptr,
nullptr,
- nullptr,
- 0}};
+ }};
static const uint32_t k_num_register_infos =
llvm::array_lengthof(g_register_infos);
@@ -963,16 +962,3 @@ void ABISysV_ppc::Initialize() {
void ABISysV_ppc::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_ppc::GetPluginNameStatic() {
- static ConstString g_name("sysv-ppc");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_ppc::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_ppc::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.h b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.h
index 4a586849e585..21b970f87934 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.h
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.h
@@ -78,13 +78,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-ppc"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
index 7cc9482e7c5d..f9a2851d3949 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
@@ -1074,16 +1074,3 @@ void ABISysV_ppc64::Initialize() {
void ABISysV_ppc64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_ppc64::GetPluginNameStatic() {
- static ConstString g_name("sysv-ppc64");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_ppc64::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_ppc64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.h b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.h
index 8dcf3ca48b56..bfa96cc0df70 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.h
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.h
@@ -78,13 +78,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-ppc64"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
index 88e85111d871..f8156deb7e30 100644
--- a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
+++ b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
@@ -115,22 +115,22 @@ enum dwarf_regnums {
#name, alt, size, 0, eEncodingUint, eFormatHex, \
{dwarf_##name##_s390x, dwarf_##name##_s390x, generic, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
static const RegisterInfo g_register_infos[] = {
DEFINE_REG(r0, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(r1, 8, nullptr, LLDB_INVALID_REGNUM),
- DEFINE_REG(r2, 8, "arg1", LLDB_REGNUM_GENERIC_ARG1),
- DEFINE_REG(r3, 8, "arg2", LLDB_REGNUM_GENERIC_ARG2),
- DEFINE_REG(r4, 8, "arg3", LLDB_REGNUM_GENERIC_ARG3),
- DEFINE_REG(r5, 8, "arg4", LLDB_REGNUM_GENERIC_ARG4),
- DEFINE_REG(r6, 8, "arg5", LLDB_REGNUM_GENERIC_ARG5),
+ DEFINE_REG(r2, 8, nullptr, LLDB_REGNUM_GENERIC_ARG1),
+ DEFINE_REG(r3, 8, nullptr, LLDB_REGNUM_GENERIC_ARG2),
+ DEFINE_REG(r4, 8, nullptr, LLDB_REGNUM_GENERIC_ARG3),
+ DEFINE_REG(r5, 8, nullptr, LLDB_REGNUM_GENERIC_ARG4),
+ DEFINE_REG(r6, 8, nullptr, LLDB_REGNUM_GENERIC_ARG5),
DEFINE_REG(r7, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(r8, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(r9, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(r10, 8, nullptr, LLDB_INVALID_REGNUM),
- DEFINE_REG(r11, 8, "fp", LLDB_REGNUM_GENERIC_FP),
+ DEFINE_REG(r11, 8, nullptr, LLDB_REGNUM_GENERIC_FP),
DEFINE_REG(r12, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(r13, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(r14, 8, nullptr, LLDB_INVALID_REGNUM),
@@ -151,8 +151,8 @@ static const RegisterInfo g_register_infos[] = {
DEFINE_REG(acr13, 4, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(acr14, 4, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(acr15, 4, nullptr, LLDB_INVALID_REGNUM),
- DEFINE_REG(pswm, 8, "flags", LLDB_REGNUM_GENERIC_FLAGS),
- DEFINE_REG(pswa, 8, "pc", LLDB_REGNUM_GENERIC_PC),
+ DEFINE_REG(pswm, 8, nullptr, LLDB_REGNUM_GENERIC_FLAGS),
+ DEFINE_REG(pswa, 8, nullptr, LLDB_REGNUM_GENERIC_PC),
DEFINE_REG(f0, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(f1, 8, nullptr, LLDB_INVALID_REGNUM),
DEFINE_REG(f2, 8, nullptr, LLDB_INVALID_REGNUM),
@@ -716,16 +716,3 @@ void ABISysV_s390x::Initialize() {
void ABISysV_s390x::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_s390x::GetPluginNameStatic() {
- static ConstString g_name("sysv-s390x");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_s390x::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_s390x::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.h b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.h
index f8f412465658..f6c248dc59ba 100644
--- a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.h
+++ b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.h
@@ -70,13 +70,11 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-s390x"; }
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.cpp b/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.cpp
index 461e4af599d3..69adeee4742b 100644
--- a/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.cpp
@@ -460,16 +460,3 @@ void ABIMacOSX_i386::Initialize() {
void ABIMacOSX_i386::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABIMacOSX_i386::GetPluginNameStatic() {
- static ConstString g_short_name("abi.macosx-i386");
- return g_short_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABIMacOSX_i386::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABIMacOSX_i386::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.h b/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.h
index b8b253144165..462317f17666 100644
--- a/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.h
+++ b/lldb/source/Plugins/ABI/X86/ABIMacOSX_i386.h
@@ -9,11 +9,11 @@
#ifndef LLDB_SOURCE_PLUGINS_ABI_X86_ABIMACOSX_I386_H
#define LLDB_SOURCE_PLUGINS_ABI_X86_ABIMACOSX_I386_H
-#include "Plugins/ABI/X86/ABIX86.h"
+#include "Plugins/ABI/X86/ABIX86_i386.h"
#include "lldb/Core/Value.h"
#include "lldb/lldb-private.h"
-class ABIMacOSX_i386 : public ABIX86 {
+class ABIMacOSX_i386 : public ABIX86_i386 {
public:
~ABIMacOSX_i386() override = default;
@@ -75,11 +75,9 @@ public:
// PluginInterface protocol
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "abi.macosx-i386"; }
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::ValueObjectSP
@@ -94,7 +92,7 @@ protected:
}
private:
- using ABIX86::ABIX86; // Call CreateInstance instead.
+ using ABIX86_i386::ABIX86_i386; // Call CreateInstance instead.
};
#endif // LLDB_SOURCE_PLUGINS_ABI_X86_ABIMACOSX_I386_H
diff --git a/lldb/source/Plugins/ABI/X86/ABISysV_i386.cpp b/lldb/source/Plugins/ABI/X86/ABISysV_i386.cpp
index 7d2f0a64d679..054e28f5c7a6 100644
--- a/lldb/source/Plugins/ABI/X86/ABISysV_i386.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABISysV_i386.cpp
@@ -714,14 +714,3 @@ void ABISysV_i386::Initialize() {
void ABISysV_i386::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_i386::GetPluginNameStatic() {
- static ConstString g_name("sysv-i386");
- return g_name;
-}
-
-lldb_private::ConstString ABISysV_i386::GetPluginName() {
- return GetPluginNameStatic();
-}
diff --git a/lldb/source/Plugins/ABI/X86/ABISysV_i386.h b/lldb/source/Plugins/ABI/X86/ABISysV_i386.h
index 1ebb107d36df..a19c8ddb5b64 100644
--- a/lldb/source/Plugins/ABI/X86/ABISysV_i386.h
+++ b/lldb/source/Plugins/ABI/X86/ABISysV_i386.h
@@ -9,10 +9,10 @@
#ifndef LLDB_SOURCE_PLUGINS_ABI_X86_ABISYSV_I386_H
#define LLDB_SOURCE_PLUGINS_ABI_X86_ABISYSV_I386_H
-#include "Plugins/ABI/X86/ABIX86.h"
+#include "Plugins/ABI/X86/ABIX86_i386.h"
#include "lldb/lldb-private.h"
-class ABISysV_i386 : public ABIX86 {
+class ABISysV_i386 : public ABIX86_i386 {
public:
~ABISysV_i386() override = default;
@@ -83,11 +83,9 @@ public:
// PluginInterface protocol
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-i386"; }
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::ValueObjectSP
@@ -97,7 +95,7 @@ protected:
bool RegisterIsCalleeSaved(const lldb_private::RegisterInfo *reg_info);
private:
- using ABIX86::ABIX86; // Call CreateInstance instead.
+ using ABIX86_i386::ABIX86_i386; // Call CreateInstance instead.
};
#endif // LLDB_SOURCE_PLUGINS_ABI_X86_ABISYSV_I386_H
diff --git a/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp b/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp
index 196b45b3b6da..b51ee5c48515 100644
--- a/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.cpp
@@ -933,6 +933,8 @@ uint32_t ABISysV_x86_64::GetGenericNum(llvm::StringRef name) {
.Case("rsp", LLDB_REGNUM_GENERIC_SP)
.Case("rbp", LLDB_REGNUM_GENERIC_FP)
.Case("rflags", LLDB_REGNUM_GENERIC_FLAGS)
+ // gdbserver uses eflags
+ .Case("eflags", LLDB_REGNUM_GENERIC_FLAGS)
.Case("rdi", LLDB_REGNUM_GENERIC_ARG1)
.Case("rsi", LLDB_REGNUM_GENERIC_ARG2)
.Case("rdx", LLDB_REGNUM_GENERIC_ARG3)
@@ -950,16 +952,3 @@ void ABISysV_x86_64::Initialize() {
void ABISysV_x86_64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABISysV_x86_64::GetPluginNameStatic() {
- static ConstString g_name("sysv-x86_64");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ABISysV_x86_64::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABISysV_x86_64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.h b/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.h
index 6dce4ce0f012..ce0357dfa331 100644
--- a/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.h
+++ b/lldb/source/Plugins/ABI/X86/ABISysV_x86_64.h
@@ -76,13 +76,10 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "sysv-x86_64"; }
// PluginInterface protocol
-
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp
index 6c473c652c5f..5e8f8b16ec1a 100644
--- a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.cpp
@@ -806,6 +806,8 @@ uint32_t ABIWindows_x86_64::GetGenericNum(llvm::StringRef reg) {
.Case("rsp", LLDB_REGNUM_GENERIC_SP)
.Case("rbp", LLDB_REGNUM_GENERIC_FP)
.Case("rflags", LLDB_REGNUM_GENERIC_FLAGS)
+ // gdbserver uses eflags
+ .Case("eflags", LLDB_REGNUM_GENERIC_FLAGS)
.Case("rcx", LLDB_REGNUM_GENERIC_ARG1)
.Case("rdx", LLDB_REGNUM_GENERIC_ARG2)
.Case("r8", LLDB_REGNUM_GENERIC_ARG3)
@@ -821,18 +823,3 @@ void ABIWindows_x86_64::Initialize() {
void ABIWindows_x86_64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-
-lldb_private::ConstString ABIWindows_x86_64::GetPluginNameStatic() {
- static ConstString g_name("windows-x86_64");
- return g_name;
-}
-
-//------------------------------------------------------------------
-// PluginInterface protocol
-//------------------------------------------------------------------
-
-lldb_private::ConstString ABIWindows_x86_64::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ABIWindows_x86_64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h
index 89fc6e6ca21f..e74b9126404e 100644
--- a/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h
+++ b/lldb/source/Plugins/ABI/X86/ABIWindows_x86_64.h
@@ -67,15 +67,13 @@ public:
static lldb::ABISP CreateInstance(lldb::ProcessSP process_sp, const lldb_private::ArchSpec &arch);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "windows-x86_64"; }
//------------------------------------------------------------------
// PluginInterface protocol
//------------------------------------------------------------------
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
void CreateRegisterMapIfNeeded();
diff --git a/lldb/source/Plugins/ABI/X86/ABIX86.cpp b/lldb/source/Plugins/ABI/X86/ABIX86.cpp
index bf5ab669417e..7cdba0c5fe57 100644
--- a/lldb/source/Plugins/ABI/X86/ABIX86.cpp
+++ b/lldb/source/Plugins/ABI/X86/ABIX86.cpp
@@ -1,4 +1,4 @@
-//===-- X86.h -------------------------------------------------------------===//
+//===-- ABIX86.cpp --------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +6,16 @@
//
//===----------------------------------------------------------------------===//
-#include "ABIX86.h"
#include "ABIMacOSX_i386.h"
#include "ABISysV_i386.h"
#include "ABISysV_x86_64.h"
#include "ABIWindows_x86_64.h"
+#include "ABIX86.h"
#include "lldb/Core/PluginManager.h"
+#include "lldb/Target/Process.h"
+
+using namespace lldb;
+using namespace lldb_private;
LLDB_PLUGIN_DEFINE(ABIX86)
@@ -29,15 +33,225 @@ void ABIX86::Terminate() {
ABIWindows_x86_64::Terminate();
}
-uint32_t ABIX86::GetGenericNum(llvm::StringRef name) {
- return llvm::StringSwitch<uint32_t>(name)
- .Case("eip", LLDB_REGNUM_GENERIC_PC)
- .Case("esp", LLDB_REGNUM_GENERIC_SP)
- .Case("ebp", LLDB_REGNUM_GENERIC_FP)
- .Case("eflags", LLDB_REGNUM_GENERIC_FLAGS)
- .Case("edi", LLDB_REGNUM_GENERIC_ARG1)
- .Case("esi", LLDB_REGNUM_GENERIC_ARG2)
- .Case("edx", LLDB_REGNUM_GENERIC_ARG3)
- .Case("ecx", LLDB_REGNUM_GENERIC_ARG4)
- .Default(LLDB_INVALID_REGNUM);
+namespace {
+enum RegKind {
+ GPR32,
+ GPR16,
+ GPR8h,
+ GPR8,
+ MM,
+ YMM_YMMh,
+ YMM_XMM,
+
+ RegKindCount
+};
+}
+
+struct RegData {
+ RegKind subreg_kind;
+ llvm::StringRef subreg_name;
+ llvm::Optional<uint32_t> base_index;
+};
+
+static void
+addPartialRegisters(std::vector<DynamicRegisterInfo::Register> &regs,
+ llvm::ArrayRef<RegData *> subregs, uint32_t base_size,
+ lldb::Encoding encoding, lldb::Format format,
+ uint32_t subreg_size, uint32_t subreg_offset = 0) {
+ for (const RegData *subreg : subregs) {
+ assert(subreg);
+ uint32_t base_index = subreg->base_index.getValue();
+ DynamicRegisterInfo::Register &full_reg = regs[base_index];
+ if (full_reg.byte_size != base_size)
+ continue;
+
+ lldb_private::DynamicRegisterInfo::Register new_reg{
+ lldb_private::ConstString(subreg->subreg_name),
+ lldb_private::ConstString(),
+ lldb_private::ConstString("supplementary registers"),
+ subreg_size,
+ LLDB_INVALID_INDEX32,
+ encoding,
+ format,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ {base_index},
+ {},
+ subreg_offset};
+
+ addSupplementaryRegister(regs, new_reg);
+ }
+}
+
+static void
+addCombinedRegisters(std::vector<DynamicRegisterInfo::Register> &regs,
+ llvm::ArrayRef<RegData *> subregs1,
+ llvm::ArrayRef<RegData *> subregs2, uint32_t base_size,
+ lldb::Encoding encoding, lldb::Format format) {
+ for (auto it : llvm::zip(subregs1, subregs2)) {
+ RegData *regdata1, *regdata2;
+ std::tie(regdata1, regdata2) = it;
+ assert(regdata1);
+ assert(regdata2);
+
+ // verify that we've got matching target registers
+ if (regdata1->subreg_name != regdata2->subreg_name)
+ continue;
+
+ uint32_t base_index1 = regdata1->base_index.getValue();
+ uint32_t base_index2 = regdata2->base_index.getValue();
+ if (regs[base_index1].byte_size != base_size ||
+ regs[base_index2].byte_size != base_size)
+ continue;
+
+ lldb_private::DynamicRegisterInfo::Register new_reg{
+ lldb_private::ConstString(regdata1->subreg_name),
+ lldb_private::ConstString(),
+ lldb_private::ConstString("supplementary registers"),
+ base_size * 2,
+ LLDB_INVALID_INDEX32,
+ encoding,
+ format,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM,
+ {base_index1, base_index2},
+ {}};
+
+ addSupplementaryRegister(regs, new_reg);
+ }
+}
+
+typedef llvm::SmallDenseMap<llvm::StringRef, llvm::SmallVector<RegData, 4>, 64>
+ BaseRegToRegsMap;
+
+#define GPRh(l) \
+ { \
+ is64bit \
+ ? BaseRegToRegsMap::value_type("r" l "x", \
+ {{GPR32, "e" l "x", llvm::None}, \
+ {GPR16, l "x", llvm::None}, \
+ {GPR8h, l "h", llvm::None}, \
+ {GPR8, l "l", llvm::None}}) \
+ : BaseRegToRegsMap::value_type("e" l "x", {{GPR16, l "x", llvm::None}, \
+ {GPR8h, l "h", llvm::None}, \
+ {GPR8, l "l", llvm::None}}) \
+ }
+
+#define GPR(r16) \
+ { \
+ is64bit \
+ ? BaseRegToRegsMap::value_type("r" r16, {{GPR32, "e" r16, llvm::None}, \
+ {GPR16, r16, llvm::None}, \
+ {GPR8, r16 "l", llvm::None}}) \
+ : BaseRegToRegsMap::value_type("e" r16, {{GPR16, r16, llvm::None}, \
+ {GPR8, r16 "l", llvm::None}}) \
+ }
+
+#define GPR64(n) \
+ { \
+ BaseRegToRegsMap::value_type("r" #n, {{GPR32, "r" #n "d", llvm::None}, \
+ {GPR16, "r" #n "w", llvm::None}, \
+ {GPR8, "r" #n "l", llvm::None}}) \
+ }
+
+#define STMM(n) \
+ { BaseRegToRegsMap::value_type("st" #n, {{MM, "mm" #n, llvm::None}}) }
+
+#define YMM(n) \
+ {BaseRegToRegsMap::value_type("ymm" #n "h", \
+ {{YMM_YMMh, "ymm" #n, llvm::None}})}, \
+ { \
+ BaseRegToRegsMap::value_type("xmm" #n, {{YMM_XMM, "ymm" #n, llvm::None}}) \
+ }
+
+BaseRegToRegsMap makeBaseRegMap(bool is64bit) {
+ BaseRegToRegsMap out{
+ {// GPRs common to amd64 & i386
+ GPRh("a"), GPRh("b"), GPRh("c"), GPRh("d"), GPR("si"), GPR("di"),
+ GPR("bp"), GPR("sp"),
+
+ // ST/MM registers
+ STMM(0), STMM(1), STMM(2), STMM(3), STMM(4), STMM(5), STMM(6), STMM(7),
+
+ // lower YMM registers (common to amd64 & i386)
+ YMM(0), YMM(1), YMM(2), YMM(3), YMM(4), YMM(5), YMM(6), YMM(7)}};
+
+ if (is64bit) {
+ BaseRegToRegsMap amd64_regs{{// GPRs specific to amd64
+ GPR64(8), GPR64(9), GPR64(10), GPR64(11),
+ GPR64(12), GPR64(13), GPR64(14), GPR64(15),
+
+ // higher YMM registers (specific to amd64)
+ YMM(8), YMM(9), YMM(10), YMM(11), YMM(12),
+ YMM(13), YMM(14), YMM(15)}};
+ out.insert(amd64_regs.begin(), amd64_regs.end());
+ }
+
+ return out;
+}
+
+void ABIX86::AugmentRegisterInfo(
+ std::vector<DynamicRegisterInfo::Register> &regs) {
+ MCBasedABI::AugmentRegisterInfo(regs);
+
+ ProcessSP process_sp = GetProcessSP();
+ if (!process_sp)
+ return;
+
+ uint32_t gpr_base_size =
+ process_sp->GetTarget().GetArchitecture().GetAddressByteSize();
+
+ // primary map from a base register to its subregisters
+ BaseRegToRegsMap base_reg_map = makeBaseRegMap(gpr_base_size == 8);
+ // set used for fast matching of register names to subregisters
+ llvm::SmallDenseSet<llvm::StringRef, 64> subreg_name_set;
+ // convenience array providing access to all subregisters of given kind,
+ // sorted by base register index
+ std::array<llvm::SmallVector<RegData *, 16>, RegKindCount> subreg_by_kind;
+
+ // prepare the set of all known subregisters
+ for (const auto &x : base_reg_map) {
+ for (const auto &subreg : x.second)
+ subreg_name_set.insert(subreg.subreg_name);
+ }
+
+ // iterate over all registers
+ for (const auto &x : llvm::enumerate(regs)) {
+ llvm::StringRef reg_name = x.value().name.GetStringRef();
+ // abort if at least one sub-register is already present
+ if (llvm::is_contained(subreg_name_set, reg_name))
+ return;
+
+ auto found = base_reg_map.find(reg_name);
+ if (found == base_reg_map.end())
+ continue;
+
+ for (auto &subreg : found->second) {
+ // fill in base register indices
+ subreg.base_index = x.index();
+ // fill subreg_by_kind map-array
+ subreg_by_kind[static_cast<size_t>(subreg.subreg_kind)].push_back(
+ &subreg);
+ }
+ }
+
+ // now add registers by kind
+ addPartialRegisters(regs, subreg_by_kind[GPR32], gpr_base_size, eEncodingUint,
+ eFormatHex, 4);
+ addPartialRegisters(regs, subreg_by_kind[GPR16], gpr_base_size, eEncodingUint,
+ eFormatHex, 2);
+ addPartialRegisters(regs, subreg_by_kind[GPR8h], gpr_base_size, eEncodingUint,
+ eFormatHex, 1, 1);
+ addPartialRegisters(regs, subreg_by_kind[GPR8], gpr_base_size, eEncodingUint,
+ eFormatHex, 1);
+
+ addPartialRegisters(regs, subreg_by_kind[MM], 10, eEncodingUint, eFormatHex,
+ 8);
+
+ addCombinedRegisters(regs, subreg_by_kind[YMM_XMM], subreg_by_kind[YMM_YMMh],
+ 16, eEncodingVector, eFormatVectorOfUInt8);
}
diff --git a/lldb/source/Plugins/ABI/X86/ABIX86.h b/lldb/source/Plugins/ABI/X86/ABIX86.h
index 22521cacf180..1114084fbc5d 100644
--- a/lldb/source/Plugins/ABI/X86/ABIX86.h
+++ b/lldb/source/Plugins/ABI/X86/ABIX86.h
@@ -1,4 +1,4 @@
-//===-- X86.h ---------------------------------------------------*- C++ -*-===//
+//===-- ABIX86.h ------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,15 +10,19 @@
#define LLDB_SOURCE_PLUGINS_ABI_X86_ABIX86_H
#include "lldb/Target/ABI.h"
+#include "lldb/lldb-private.h"
class ABIX86 : public lldb_private::MCBasedABI {
public:
static void Initialize();
static void Terminate();
- uint32_t GetGenericNum(llvm::StringRef name) override;
+protected:
+ void AugmentRegisterInfo(
+ std::vector<lldb_private::DynamicRegisterInfo::Register> &regs) override;
private:
using lldb_private::MCBasedABI::MCBasedABI;
};
+
#endif
diff --git a/lldb/source/Plugins/ABI/X86/ABIX86_64.h b/lldb/source/Plugins/ABI/X86/ABIX86_64.h
index e65c2d97d897..8fc98507adee 100644
--- a/lldb/source/Plugins/ABI/X86/ABIX86_64.h
+++ b/lldb/source/Plugins/ABI/X86/ABIX86_64.h
@@ -9,10 +9,9 @@
#ifndef LLDB_SOURCE_PLUGINS_ABI_X86_ABIX86_64_H
#define LLDB_SOURCE_PLUGINS_ABI_X86_ABIX86_64_H
-#include "lldb/Target/ABI.h"
-#include "lldb/lldb-private.h"
+#include "Plugins/ABI/X86/ABIX86.h"
-class ABIX86_64 : public lldb_private::MCBasedABI {
+class ABIX86_64 : public ABIX86 {
protected:
std::string GetMCName(std::string name) override {
MapRegisterName(name, "stmm", "st");
@@ -20,7 +19,7 @@ protected:
}
private:
- using lldb_private::MCBasedABI::MCBasedABI;
+ using ABIX86::ABIX86;
};
#endif // LLDB_SOURCE_PLUGINS_ABI_X86_ABIX86_64_H
diff --git a/lldb/source/Plugins/ABI/X86/ABIX86_i386.cpp b/lldb/source/Plugins/ABI/X86/ABIX86_i386.cpp
new file mode 100644
index 000000000000..e376f0eb2bc7
--- /dev/null
+++ b/lldb/source/Plugins/ABI/X86/ABIX86_i386.cpp
@@ -0,0 +1,22 @@
+//===-- ABIX86_i386.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ABIX86_i386.h"
+
+uint32_t ABIX86_i386::GetGenericNum(llvm::StringRef name) {
+ return llvm::StringSwitch<uint32_t>(name)
+ .Case("eip", LLDB_REGNUM_GENERIC_PC)
+ .Case("esp", LLDB_REGNUM_GENERIC_SP)
+ .Case("ebp", LLDB_REGNUM_GENERIC_FP)
+ .Case("eflags", LLDB_REGNUM_GENERIC_FLAGS)
+ .Case("edi", LLDB_REGNUM_GENERIC_ARG1)
+ .Case("esi", LLDB_REGNUM_GENERIC_ARG2)
+ .Case("edx", LLDB_REGNUM_GENERIC_ARG3)
+ .Case("ecx", LLDB_REGNUM_GENERIC_ARG4)
+ .Default(LLDB_INVALID_REGNUM);
+}
diff --git a/lldb/source/Plugins/ABI/X86/ABIX86_i386.h b/lldb/source/Plugins/ABI/X86/ABIX86_i386.h
new file mode 100644
index 000000000000..cb3baa5150fc
--- /dev/null
+++ b/lldb/source/Plugins/ABI/X86/ABIX86_i386.h
@@ -0,0 +1,22 @@
+//===-- ABIX86_i386.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_ABI_X86_ABIX86_I386_H
+#define LLDB_SOURCE_PLUGINS_ABI_X86_ABIX86_I386_H
+
+#include "Plugins/ABI/X86/ABIX86.h"
+
+class ABIX86_i386 : public ABIX86 {
+public:
+ uint32_t GetGenericNum(llvm::StringRef name) override;
+
+private:
+ using ABIX86::ABIX86;
+};
+
+#endif
diff --git a/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp b/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp
index 9994cc293d6a..1b2b41ee8758 100644
--- a/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp
+++ b/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp
@@ -15,10 +15,6 @@ using namespace lldb;
LLDB_PLUGIN_DEFINE(ArchitectureAArch64)
-ConstString ArchitectureAArch64::GetPluginNameStatic() {
- return ConstString("aarch64");
-}
-
void ArchitectureAArch64::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
"AArch64-specific algorithms",
@@ -38,8 +34,3 @@ ArchitectureAArch64::Create(const ArchSpec &arch) {
}
return std::unique_ptr<Architecture>(new ArchitectureAArch64());
}
-
-ConstString ArchitectureAArch64::GetPluginName() {
- return GetPluginNameStatic();
-}
-uint32_t ArchitectureAArch64::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.h b/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.h
index 775478cc9338..e1b8558e1cda 100644
--- a/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.h
+++ b/lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.h
@@ -16,12 +16,11 @@ namespace lldb_private {
class ArchitectureAArch64 : public Architecture {
public:
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "aarch64"; }
static void Initialize();
static void Terminate();
- ConstString GetPluginName() override;
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
void OverrideStopInfo(Thread &thread) const override{};
diff --git a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp
index 58c7cbb4530a..bb44675e842e 100644
--- a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp
+++ b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp
@@ -19,10 +19,6 @@ using namespace lldb;
LLDB_PLUGIN_DEFINE(ArchitectureArm)
-ConstString ArchitectureArm::GetPluginNameStatic() {
- return ConstString("arm");
-}
-
void ArchitectureArm::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
"Arm-specific algorithms",
@@ -39,9 +35,6 @@ std::unique_ptr<Architecture> ArchitectureArm::Create(const ArchSpec &arch) {
return std::unique_ptr<Architecture>(new ArchitectureArm());
}
-ConstString ArchitectureArm::GetPluginName() { return GetPluginNameStatic(); }
-uint32_t ArchitectureArm::GetPluginVersion() { return 1; }
-
void ArchitectureArm::OverrideStopInfo(Thread &thread) const {
// We need to check if we are stopped in Thumb mode in a IT instruction and
// detect if the condition doesn't pass. If this is the case it means we
diff --git a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h
index 36b79c7c01a1..f579d6b62505 100644
--- a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h
+++ b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h
@@ -15,12 +15,11 @@ namespace lldb_private {
class ArchitectureArm : public Architecture {
public:
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "arm"; }
static void Initialize();
static void Terminate();
- ConstString GetPluginName() override;
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
void OverrideStopInfo(Thread &thread) const override;
diff --git a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp
index 757c91570009..865c72fed61c 100644
--- a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp
+++ b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.cpp
@@ -23,10 +23,6 @@ using namespace lldb;
LLDB_PLUGIN_DEFINE(ArchitectureMips)
-ConstString ArchitectureMips::GetPluginNameStatic() {
- return ConstString("mips");
-}
-
void ArchitectureMips::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
"Mips-specific algorithms",
@@ -42,9 +38,6 @@ std::unique_ptr<Architecture> ArchitectureMips::Create(const ArchSpec &arch) {
std::unique_ptr<Architecture>(new ArchitectureMips(arch)) : nullptr;
}
-ConstString ArchitectureMips::GetPluginName() { return GetPluginNameStatic(); }
-uint32_t ArchitectureMips::GetPluginVersion() { return 1; }
-
addr_t ArchitectureMips::GetCallableLoadAddress(addr_t code_addr,
AddressClass addr_class) const {
bool is_alternate_isa = false;
diff --git a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h
index 71ee60184b69..9513a10b5965 100644
--- a/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h
+++ b/lldb/source/Plugins/Architecture/Mips/ArchitectureMips.h
@@ -16,12 +16,11 @@ namespace lldb_private {
class ArchitectureMips : public Architecture {
public:
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "mips"; }
static void Initialize();
static void Terminate();
- ConstString GetPluginName() override;
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
void OverrideStopInfo(Thread &thread) const override {}
diff --git a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp
index 94301ecf052c..b8fac55e41da 100644
--- a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp
+++ b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp
@@ -22,10 +22,6 @@ using namespace lldb;
LLDB_PLUGIN_DEFINE(ArchitecturePPC64)
-ConstString ArchitecturePPC64::GetPluginNameStatic() {
- return ConstString("ppc64");
-}
-
void ArchitecturePPC64::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
"PPC64-specific algorithms",
@@ -43,9 +39,6 @@ std::unique_ptr<Architecture> ArchitecturePPC64::Create(const ArchSpec &arch) {
return nullptr;
}
-ConstString ArchitecturePPC64::GetPluginName() { return GetPluginNameStatic(); }
-uint32_t ArchitecturePPC64::GetPluginVersion() { return 1; }
-
static int32_t GetLocalEntryOffset(const Symbol &sym) {
unsigned char other = sym.GetFlags() >> 8 & 0xFF;
return llvm::ELF::decodePPC64LocalEntryOffset(other);
diff --git a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h
index 25210d37e53a..80f7f27b54cc 100644
--- a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h
+++ b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h
@@ -15,12 +15,11 @@ namespace lldb_private {
class ArchitecturePPC64 : public Architecture {
public:
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "ppc64"; }
static void Initialize();
static void Terminate();
- ConstString GetPluginName() override;
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
void OverrideStopInfo(Thread &thread) const override {}
diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
index 7cd505d0ed29..2cf32bdd3800 100644
--- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
+++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "lldb/Core/Address.h"
@@ -61,6 +61,8 @@ public:
bool CanBranch(llvm::MCInst &mc_inst) const;
bool HasDelaySlot(llvm::MCInst &mc_inst) const;
bool IsCall(llvm::MCInst &mc_inst) const;
+ bool IsLoad(llvm::MCInst &mc_inst) const;
+ bool IsAuthenticated(llvm::MCInst &mc_inst) const;
private:
MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
@@ -102,6 +104,16 @@ public:
return m_has_delay_slot;
}
+ bool IsLoad() override {
+ VisitInstruction();
+ return m_is_load;
+ }
+
+ bool IsAuthenticated() override {
+ VisitInstruction();
+ return m_is_authenticated;
+ }
+
DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
DisassemblerScope disasm(*this);
return GetDisasmToUse(is_alternate_isa, disasm);
@@ -817,9 +829,13 @@ protected:
// - Might branch
// - Does not have a delay slot
// - Is not a call
+ // - Is not a load
+ // - Is not an authenticated instruction
bool m_does_branch = true;
bool m_has_delay_slot = false;
bool m_is_call = false;
+ bool m_is_load = false;
+ bool m_is_authenticated = false;
void VisitInstruction() {
if (m_has_visited_instruction)
@@ -849,6 +865,8 @@ protected:
m_does_branch = mc_disasm_ptr->CanBranch(inst);
m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
m_is_call = mc_disasm_ptr->IsCall(inst);
+ m_is_load = mc_disasm_ptr->IsLoad(inst);
+ m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst);
}
private:
@@ -1027,10 +1045,32 @@ bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
}
+bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {
+ return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad();
+}
+
+bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
+ llvm::MCInst &mc_inst) const {
+ auto InstrDesc = m_instr_info_up->get(mc_inst.getOpcode());
+
+ // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
+ // == 'a' + 'c') as authenticated instructions for reporting purposes, in
+ // addition to the standard authenticated instructions specified in ARMv8.3.
+ bool IsBrkC47x = false;
+ if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {
+ const llvm::MCOperand &Op0 = mc_inst.getOperand(0);
+ if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)
+ IsBrkC47x = true;
+ }
+
+ return InstrDesc.isAuthenticated() || IsBrkC47x;
+}
+
DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
const char *flavor_string)
: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
- m_data_from_file(false) {
+ m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
+ m_adrp_insn() {
if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
m_flavor.assign("default");
}
@@ -1255,11 +1295,6 @@ void DisassemblerLLVMC::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString DisassemblerLLVMC::GetPluginNameStatic() {
- static ConstString g_name("llvm-mc");
- return g_name;
-}
-
int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
uint64_t offset, uint64_t size,
int tag_type, void *tag_bug) {
@@ -1310,6 +1345,46 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
Address value_so_addr;
Address pc_so_addr;
+ if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
+ target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
+ target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
+ if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
+ m_adrp_address = pc;
+ m_adrp_insn = value;
+ *name = nullptr;
+ *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
+ return nullptr;
+ }
+ // If this instruction is an ADD and
+ // the previous instruction was an ADRP and
+ // the ADRP's register and this ADD's register are the same,
+ // then this is a pc-relative address calculation.
+ if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
+ m_adrp_insn.hasValue() && m_adrp_address == pc - 4 &&
+ (m_adrp_insn.getValue() & 0x1f) == ((value >> 5) & 0x1f)) {
+ uint32_t addxri_inst;
+ uint64_t adrp_imm, addxri_imm;
+ // Get immlo and immhi bits, OR them together to get the ADRP imm
+ // value.
+ adrp_imm = ((m_adrp_insn.getValue() & 0x00ffffe0) >> 3) |
+ ((m_adrp_insn.getValue() >> 29) & 0x3);
+ // if high bit of immhi after right-shifting set, sign extend
+ if (adrp_imm & (1ULL << 20))
+ adrp_imm |= ~((1ULL << 21) - 1);
+
+ addxri_inst = value;
+ addxri_imm = (addxri_inst >> 10) & 0xfff;
+ // check if 'sh' bit is set, shift imm value up if so
+ // (this would make no sense, ADRP already gave us this part)
+ if ((addxri_inst >> (12 + 5 + 5)) & 1)
+ addxri_imm <<= 12;
+ value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
+ addxri_imm;
+ }
+ m_adrp_address = LLDB_INVALID_ADDRESS;
+ m_adrp_insn.reset();
+ }
+
if (m_inst->UsingFileAddress()) {
ModuleSP module_sp(m_inst->GetAddress().GetModule());
if (module_sp) {
@@ -1371,12 +1446,13 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
}
}
+ // TODO: llvm-objdump sets the type_ptr to the
+ // LLVMDisassembler_ReferenceType_Out_* values
+ // based on where value_so_addr is pointing, with
+ // Mach-O specific augmentations in MachODump.cpp. e.g.
+ // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
+ // handles.
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
*name = nullptr;
return nullptr;
}
-
-// PluginInterface protocol
-ConstString DisassemblerLLVMC::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t DisassemblerLLVMC::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
index 9b3741bdd18f..5d0204caaa9a 100644
--- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
+++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h
@@ -16,6 +16,7 @@
#include "lldb/Core/Address.h"
#include "lldb/Core/Disassembler.h"
#include "lldb/Core/PluginManager.h"
+#include "llvm/ADT/Optional.h"
class InstructionLLVMC;
@@ -31,7 +32,7 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "llvm-mc"; }
static lldb_private::Disassembler *
CreateInstance(const lldb_private::ArchSpec &arch, const char *flavor);
@@ -42,9 +43,7 @@ public:
bool append, bool data_from_file) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
friend class InstructionLLVMC;
@@ -73,6 +72,12 @@ protected:
InstructionLLVMC *m_inst;
std::mutex m_mutex;
bool m_data_from_file;
+ // Save the AArch64 ADRP instruction word and address it was at,
+ // in case the next instruction is an ADD to the same register;
+ // this is a pc-relative address calculation and we need both
+ // parts to calculate the symbolication.
+ lldb::addr_t m_adrp_address;
+ llvm::Optional<uint32_t> m_adrp_insn;
// Since we need to make two actual MC Disassemblers for ARM (ARM & THUMB),
// and there's a bit of goo to set up and own in the MC disassembler world,
diff --git a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp
index fe86b2929073..82c50a32594b 100644
--- a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp
@@ -78,22 +78,11 @@ void DynamicLoaderHexagonDYLD::Initialize() {
void DynamicLoaderHexagonDYLD::Terminate() {}
-lldb_private::ConstString DynamicLoaderHexagonDYLD::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-lldb_private::ConstString DynamicLoaderHexagonDYLD::GetPluginNameStatic() {
- static ConstString g_name("hexagon-dyld");
- return g_name;
-}
-
-const char *DynamicLoaderHexagonDYLD::GetPluginDescriptionStatic() {
+llvm::StringRef DynamicLoaderHexagonDYLD::GetPluginDescriptionStatic() {
return "Dynamic loader plug-in that watches for shared library "
"loads/unloads in Hexagon processes.";
}
-uint32_t DynamicLoaderHexagonDYLD::GetPluginVersion() { return 1; }
-
DynamicLoader *DynamicLoaderHexagonDYLD::CreateInstance(Process *process,
bool force) {
bool create = force;
diff --git a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.h b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.h
index 2570e003fd6a..54711f5e6bb3 100644
--- a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.h
+++ b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.h
@@ -24,9 +24,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "hexagon-dyld"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::DynamicLoader *
CreateInstance(lldb_private::Process *process, bool force);
@@ -47,9 +47,7 @@ public:
lldb::addr_t tls_file_addr) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
/// Runtime linker rendezvous structure.
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
index 866acbddbdc8..7e80dc28e56b 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
@@ -24,28 +24,35 @@
using namespace lldb;
using namespace lldb_private;
-/// Locates the address of the rendezvous structure. Returns the address on
-/// success and LLDB_INVALID_ADDRESS on failure.
-static addr_t ResolveRendezvousAddress(Process *process) {
+DYLDRendezvous::DYLDRendezvous(Process *process)
+ : m_process(process), m_rendezvous_addr(LLDB_INVALID_ADDRESS),
+ m_executable_interpreter(false), m_current(), m_previous(),
+ m_loaded_modules(), m_soentries(), m_added_soentries(),
+ m_removed_soentries() {
+ m_thread_info.valid = false;
+ UpdateExecutablePath();
+}
+
+addr_t DYLDRendezvous::ResolveRendezvousAddress() {
Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
addr_t info_location;
addr_t info_addr;
Status error;
- if (!process) {
+ if (!m_process) {
LLDB_LOGF(log, "%s null process provided", __FUNCTION__);
return LLDB_INVALID_ADDRESS;
}
// Try to get it from our process. This might be a remote process and might
// grab it via some remote-specific mechanism.
- info_location = process->GetImageInfoAddress();
+ info_location = m_process->GetImageInfoAddress();
LLDB_LOGF(log, "%s info_location = 0x%" PRIx64, __FUNCTION__, info_location);
// If the process fails to return an address, fall back to seeing if the
// local object file can help us find it.
if (info_location == LLDB_INVALID_ADDRESS) {
- Target *target = &process->GetTarget();
+ Target *target = &m_process->GetTarget();
if (target) {
ObjectFile *obj_file = target->GetExecutableModule()->GetObjectFile();
Address addr = obj_file->GetImageInfoAddress(target);
@@ -56,6 +63,20 @@ static addr_t ResolveRendezvousAddress(Process *process) {
"%s resolved via direct object file approach to 0x%" PRIx64,
__FUNCTION__, info_location);
} else {
+ const Symbol *_r_debug =
+ target->GetExecutableModule()->FindFirstSymbolWithNameAndType(
+ ConstString("_r_debug"));
+ if (_r_debug) {
+ info_addr = _r_debug->GetAddress().GetLoadAddress(target);
+ if (info_addr != LLDB_INVALID_ADDRESS) {
+ LLDB_LOGF(log,
+ "%s resolved by finding symbol '_r_debug' whose value is "
+ "0x%" PRIx64,
+ __FUNCTION__, info_addr);
+ m_executable_interpreter = true;
+ return info_addr;
+ }
+ }
LLDB_LOGF(log,
"%s FAILED - direct object file approach did not yield a "
"valid address",
@@ -70,9 +91,9 @@ static addr_t ResolveRendezvousAddress(Process *process) {
}
LLDB_LOGF(log, "%s reading pointer (%" PRIu32 " bytes) from 0x%" PRIx64,
- __FUNCTION__, process->GetAddressByteSize(), info_location);
+ __FUNCTION__, m_process->GetAddressByteSize(), info_location);
- info_addr = process->ReadPointerFromMemory(info_location, error);
+ info_addr = m_process->ReadPointerFromMemory(info_location, error);
if (error.Fail()) {
LLDB_LOGF(log, "%s FAILED - could not read from the info location: %s",
__FUNCTION__, error.AsCString());
@@ -90,14 +111,6 @@ static addr_t ResolveRendezvousAddress(Process *process) {
return info_addr;
}
-DYLDRendezvous::DYLDRendezvous(Process *process)
- : m_process(process), m_rendezvous_addr(LLDB_INVALID_ADDRESS), m_current(),
- m_previous(), m_loaded_modules(), m_soentries(), m_added_soentries(),
- m_removed_soentries() {
- m_thread_info.valid = false;
- UpdateExecutablePath();
-}
-
void DYLDRendezvous::UpdateExecutablePath() {
if (m_process) {
Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
@@ -132,7 +145,8 @@ bool DYLDRendezvous::Resolve() {
__FUNCTION__, uint64_t(address_size), uint64_t(padding));
if (m_rendezvous_addr == LLDB_INVALID_ADDRESS)
- cursor = info_addr = ResolveRendezvousAddress(m_process);
+ cursor = info_addr =
+ ResolveRendezvousAddress();
else
cursor = info_addr = m_rendezvous_addr;
LLDB_LOGF(log, "DYLDRendezvous::%s cursor = 0x%" PRIx64, __FUNCTION__,
@@ -296,8 +310,10 @@ bool DYLDRendezvous::SaveSOEntriesFromRemote(
return false;
// Only add shared libraries and not the executable.
- if (!SOEntryIsMainExecutable(entry))
+ if (!SOEntryIsMainExecutable(entry)) {
+ UpdateFileSpecIfNecessary(entry);
m_soentries.push_back(entry);
+ }
}
m_loaded_modules = module_list;
@@ -324,6 +340,7 @@ bool DYLDRendezvous::AddSOEntriesFromRemote(
// Only add shared libraries and not the executable.
if (!SOEntryIsMainExecutable(entry)) {
+ UpdateFileSpecIfNecessary(entry);
m_soentries.push_back(entry);
m_added_soentries.push_back(entry);
}
@@ -383,6 +400,8 @@ bool DYLDRendezvous::AddSOEntries() {
if (SOEntryIsMainExecutable(entry))
continue;
+ UpdateFileSpecIfNecessary(entry);
+
pos = std::find(m_soentries.begin(), m_soentries.end(), entry);
if (pos == m_soentries.end()) {
m_soentries.push_back(entry);
@@ -424,6 +443,10 @@ bool DYLDRendezvous::SOEntryIsMainExecutable(const SOEntry &entry) {
case llvm::Triple::Linux:
if (triple.isAndroid())
return entry.file_spec == m_exe_file_spec;
+ // If we are debugging ld.so, then all SOEntries should be treated as
+ // libraries, including the "main" one (denoted by an empty string).
+ if (!entry.file_spec && m_executable_interpreter)
+ return false;
return !entry.file_spec;
default:
return false;
@@ -447,6 +470,8 @@ bool DYLDRendezvous::TakeSnapshot(SOEntryList &entry_list) {
if (SOEntryIsMainExecutable(entry))
continue;
+ UpdateFileSpecIfNecessary(entry);
+
entry_list.push_back(entry);
}
@@ -512,6 +537,19 @@ void DYLDRendezvous::UpdateBaseAddrIfNecessary(SOEntry &entry,
}
}
+void DYLDRendezvous::UpdateFileSpecIfNecessary(SOEntry &entry) {
+ // Updates filename if empty. It is useful while debugging ld.so,
+ // when the link map returns empty string for the main executable.
+ if (!entry.file_spec) {
+ MemoryRegionInfo region;
+ Status region_status =
+ m_process->GetMemoryRegionInfo(entry.dyn_addr, region);
+ if (!region.GetName().IsEmpty())
+ entry.file_spec.SetFile(region.GetName().AsCString(),
+ FileSpec::Style::native);
+ }
+}
+
bool DYLDRendezvous::ReadSOEntryFromMemory(lldb::addr_t addr, SOEntry &entry) {
entry.clear();
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h
index 5775f5a730cd..04d3e665f859 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h
@@ -47,6 +47,12 @@ class DYLDRendezvous {
Rendezvous() = default;
};
+ /// Locates the address of the rendezvous structure. It updates
+ /// m_executable_interpreter if address is extracted from _r_debug.
+ ///
+ /// \returns address on success and LLDB_INVALID_ADDRESS on failure.
+ lldb::addr_t ResolveRendezvousAddress();
+
public:
// Various metadata supplied by the inferior's threading library to describe
// the per-thread state.
@@ -183,6 +189,9 @@ protected:
/// Location of the r_debug structure in the inferiors address space.
lldb::addr_t m_rendezvous_addr;
+ // True if the main program is the dynamic linker/loader/program interpreter.
+ bool m_executable_interpreter;
+
/// Current and previous snapshots of the rendezvous structure.
Rendezvous m_current;
Rendezvous m_previous;
@@ -246,6 +255,8 @@ protected:
void UpdateBaseAddrIfNecessary(SOEntry &entry, std::string const &file_path);
+ void UpdateFileSpecIfNecessary(SOEntry &entry);
+
bool SOEntryIsMainExecutable(const SOEntry &entry);
/// Reads the current list of shared objects according to the link map
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
index 160faa74af23..d9cbcce22c52 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
@@ -38,22 +38,11 @@ void DynamicLoaderPOSIXDYLD::Initialize() {
void DynamicLoaderPOSIXDYLD::Terminate() {}
-lldb_private::ConstString DynamicLoaderPOSIXDYLD::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-lldb_private::ConstString DynamicLoaderPOSIXDYLD::GetPluginNameStatic() {
- static ConstString g_name("linux-dyld");
- return g_name;
-}
-
-const char *DynamicLoaderPOSIXDYLD::GetPluginDescriptionStatic() {
+llvm::StringRef DynamicLoaderPOSIXDYLD::GetPluginDescriptionStatic() {
return "Dynamic loader plug-in that watches for shared library "
"loads/unloads in POSIX processes.";
}
-uint32_t DynamicLoaderPOSIXDYLD::GetPluginVersion() { return 1; }
-
DynamicLoader *DynamicLoaderPOSIXDYLD::CreateInstance(Process *process,
bool force) {
bool create = force;
@@ -333,28 +322,37 @@ bool DynamicLoaderPOSIXDYLD::SetRendezvousBreakpoint() {
LLDB_LOG(log, "Rendezvous structure is not set up yet. "
"Trying to locate rendezvous breakpoint in the interpreter "
"by symbol name.");
- ModuleSP interpreter = LoadInterpreterModule();
- if (!interpreter) {
- LLDB_LOG(log, "Can't find interpreter, rendezvous breakpoint isn't set.");
- return false;
- }
-
- // Function names from different dynamic loaders that are known to be used
- // as rendezvous between the loader and debuggers.
+ // Function names from different dynamic loaders that are known to be
+ // used as rendezvous between the loader and debuggers.
static std::vector<std::string> DebugStateCandidates{
"_dl_debug_state", "rtld_db_dlactivity", "__dl_rtld_db_dlactivity",
"r_debug_state", "_r_debug_state", "_rtld_debug_state",
};
- FileSpecList containingModules;
- containingModules.Append(interpreter->GetFileSpec());
- dyld_break = target.CreateBreakpoint(
- &containingModules, nullptr /* containingSourceFiles */,
- DebugStateCandidates, eFunctionNameTypeFull, eLanguageTypeC,
- 0, /* offset */
- eLazyBoolNo, /* skip_prologue */
- true, /* internal */
- false /* request_hardware */);
+ ModuleSP interpreter = LoadInterpreterModule();
+ if (!interpreter) {
+ FileSpecList containingModules;
+ containingModules.Append(
+ m_process->GetTarget().GetExecutableModulePointer()->GetFileSpec());
+
+ dyld_break = target.CreateBreakpoint(
+ &containingModules, /*containingSourceFiles=*/nullptr,
+ DebugStateCandidates, eFunctionNameTypeFull, eLanguageTypeC,
+ /*offset=*/0,
+ /*skip_prologue=*/eLazyBoolNo,
+ /*internal=*/true,
+ /*request_hardware=*/false);
+ } else {
+ FileSpecList containingModules;
+ containingModules.Append(interpreter->GetFileSpec());
+ dyld_break = target.CreateBreakpoint(
+ &containingModules, /*containingSourceFiles=*/nullptr,
+ DebugStateCandidates, eFunctionNameTypeFull, eLanguageTypeC,
+ /*offset=*/0,
+ /*skip_prologue=*/eLazyBoolNo,
+ /*internal=*/true,
+ /*request_hardware=*/false);
+ }
}
if (dyld_break->GetNumResolvedLocations() != 1) {
@@ -442,14 +440,18 @@ void DynamicLoaderPOSIXDYLD::RefreshModules() {
if (module_sp->GetObjectFile()->GetBaseAddress().GetLoadAddress(
&m_process->GetTarget()) == m_interpreter_base &&
module_sp != m_interpreter_module.lock()) {
- // If this is a duplicate instance of ld.so, unload it. We may end up
- // with it if we load it via a different path than before (symlink
- // vs real path).
- // TODO: remove this once we either fix library matching or avoid
- // loading the interpreter when setting the rendezvous breakpoint.
- UnloadSections(module_sp);
- loaded_modules.Remove(module_sp);
- continue;
+ if (m_interpreter_module.lock() == nullptr) {
+ m_interpreter_module = module_sp;
+ } else {
+ // If this is a duplicate instance of ld.so, unload it. We may end
+ // up with it if we load it via a different path than before
+ // (symlink vs real path).
+ // TODO: remove this once we either fix library matching or avoid
+ // loading the interpreter when setting the rendezvous breakpoint.
+ UnloadSections(module_sp);
+ loaded_modules.Remove(module_sp);
+ continue;
+ }
}
loaded_modules.AppendIfNeeded(module_sp);
@@ -620,6 +622,7 @@ void DynamicLoaderPOSIXDYLD::LoadAllCurrentModules() {
}
m_process->GetTarget().ModulesDidLoad(module_list);
+ m_initial_modules_added = true;
}
addr_t DynamicLoaderPOSIXDYLD::ComputeLoadOffset() {
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h
index 61567801fdd0..422856e7a660 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h
@@ -30,9 +30,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "posix-dyld"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::DynamicLoader *
CreateInstance(lldb_private::Process *process, bool force);
@@ -53,9 +53,7 @@ public:
lldb::addr_t tls_file_addr) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
/// Runtime linker rendezvous structure.
diff --git a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp
index 8a5528f1e474..a39aa2280ab8 100644
--- a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp
@@ -152,19 +152,7 @@ void DynamicLoaderStatic::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString DynamicLoaderStatic::GetPluginNameStatic() {
- static ConstString g_name("static");
- return g_name;
-}
-
-const char *DynamicLoaderStatic::GetPluginDescriptionStatic() {
+llvm::StringRef DynamicLoaderStatic::GetPluginDescriptionStatic() {
return "Dynamic loader plug-in that will load any images at the static "
"addresses contained in each image.";
}
-
-// PluginInterface protocol
-lldb_private::ConstString DynamicLoaderStatic::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t DynamicLoaderStatic::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.h b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.h
index 1a36c7851c2a..dac19dcd38d7 100644
--- a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.h
+++ b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.h
@@ -23,9 +23,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "static"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::DynamicLoader *
CreateInstance(lldb_private::Process *process, bool force);
@@ -44,9 +44,7 @@ public:
lldb_private::Status CanLoadImage() override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
private:
void LoadAllImagesAtFileAddresses();
diff --git a/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp b/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp
index 54dfa3e9d6f2..bf6dc57003d5 100644
--- a/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.cpp
@@ -37,12 +37,7 @@ void DynamicLoaderWindowsDYLD::Initialize() {
void DynamicLoaderWindowsDYLD::Terminate() {}
-ConstString DynamicLoaderWindowsDYLD::GetPluginNameStatic() {
- static ConstString g_plugin_name("windows-dyld");
- return g_plugin_name;
-}
-
-const char *DynamicLoaderWindowsDYLD::GetPluginDescriptionStatic() {
+llvm::StringRef DynamicLoaderWindowsDYLD::GetPluginDescriptionStatic() {
return "Dynamic loader plug-in that watches for shared library "
"loads/unloads in Windows processes.";
}
@@ -174,12 +169,6 @@ void DynamicLoaderWindowsDYLD::DidLaunch() {
Status DynamicLoaderWindowsDYLD::CanLoadImage() { return Status(); }
-ConstString DynamicLoaderWindowsDYLD::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t DynamicLoaderWindowsDYLD::GetPluginVersion() { return 1; }
-
ThreadPlanSP
DynamicLoaderWindowsDYLD::GetStepThroughTrampolinePlan(Thread &thread,
bool stop) {
diff --git a/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.h b/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.h
index 502a4c160ddd..42ea5aacecb4 100644
--- a/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.h
+++ b/lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.h
@@ -24,8 +24,8 @@ public:
static void Initialize();
static void Terminate();
- static ConstString GetPluginNameStatic();
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "windows-dyld"; }
+ static llvm::StringRef GetPluginDescriptionStatic();
static DynamicLoader *CreateInstance(Process *process, bool force);
@@ -39,8 +39,7 @@ public:
lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread,
bool stop) override;
- ConstString GetPluginName() override;
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::addr_t GetLoadAddress(lldb::ModuleSP executable);
diff --git a/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp b/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp
index ae7e011eaa52..1634372bb905 100644
--- a/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp
@@ -30,12 +30,7 @@ void DynamicLoaderWasmDYLD::Initialize() {
GetPluginDescriptionStatic(), CreateInstance);
}
-ConstString DynamicLoaderWasmDYLD::GetPluginNameStatic() {
- static ConstString g_plugin_name("wasm-dyld");
- return g_plugin_name;
-}
-
-const char *DynamicLoaderWasmDYLD::GetPluginDescriptionStatic() {
+llvm::StringRef DynamicLoaderWasmDYLD::GetPluginDescriptionStatic() {
return "Dynamic loader plug-in that watches for shared library "
"loads/unloads in WebAssembly engines.";
}
diff --git a/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.h b/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.h
index 4a18972bb848..fe67e5885904 100644
--- a/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.h
+++ b/lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.h
@@ -21,8 +21,8 @@ public:
static void Initialize();
static void Terminate() {}
- static ConstString GetPluginNameStatic();
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "wasm-dyld"; }
+ static llvm::StringRef GetPluginDescriptionStatic();
static DynamicLoader *CreateInstance(Process *process, bool force);
@@ -37,8 +37,7 @@ public:
/// PluginInterface protocol.
/// \{
- ConstString GetPluginName() override { return GetPluginNameStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
/// \}
};
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp
index 94647b0ef978..80469e292580 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp
@@ -824,7 +824,7 @@ ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) {
}
// Check which ASTContext this declaration originally came from.
- DeclOrigin origin = m_master.GetDeclOrigin(From);
+ DeclOrigin origin = m_main.GetDeclOrigin(From);
// Prevent infinite recursion when the origin tracking contains a cycle.
assert(origin.decl != From && "Origin points to itself?");
@@ -853,7 +853,7 @@ ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) {
// though all these different source ASTContexts just got a copy from
// one source AST).
if (origin.Valid()) {
- auto R = m_master.CopyDecl(&getToContext(), origin.decl);
+ auto R = m_main.CopyDecl(&getToContext(), origin.decl);
if (R) {
RegisterImportedDecl(From, R);
return R;
@@ -862,7 +862,7 @@ ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) {
// If we have a forcefully completed type, try to find an actual definition
// for it in other modules.
- const ClangASTMetadata *md = m_master.GetDeclMetadata(From);
+ const ClangASTMetadata *md = m_main.GetDeclMetadata(From);
auto *td = dyn_cast<TagDecl>(From);
if (td && md && md->IsForcefullyCompleted()) {
Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS);
@@ -888,37 +888,6 @@ ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) {
LLDB_LOG(log, "[ClangASTImporter] Complete definition not found");
}
- // Disable the minimal import for fields that have record types. There is
- // no point in minimally importing the record behind their type as Clang
- // will anyway request their definition when the FieldDecl is added to the
- // RecordDecl (as Clang will query the FieldDecl's type for things such
- // as a deleted constexpr destructor).
- // By importing the type ahead of time we avoid some corner cases where
- // the FieldDecl's record is importing in the middle of Clang's
- // `DeclContext::addDecl` logic.
- if (clang::FieldDecl *fd = dyn_cast<FieldDecl>(From)) {
- // This is only necessary because we do the 'minimal import'. Remove this
- // once LLDB stopped using that mode.
- assert(isMinimalImport() && "Only necessary for minimal import");
- QualType field_type = fd->getType();
- if (field_type->isRecordType()) {
- // First get the underlying record and minimally import it.
- clang::TagDecl *record_decl = field_type->getAsTagDecl();
- llvm::Expected<Decl *> imported = Import(record_decl);
- if (!imported)
- return imported.takeError();
- // Check how/if the import got redirected to a different AST. Now
- // import the definition of what was actually imported. If there is no
- // origin then that means the record was imported by just picking a
- // compatible type in the target AST (in which case there is no more
- // importing to do).
- if (clang::Decl *origin = m_master.GetDeclOrigin(*imported).decl) {
- if (llvm::Error def_err = ImportDefinition(record_decl))
- return std::move(def_err);
- }
- }
- }
-
return ASTImporter::ImportImpl(From);
}
@@ -1076,7 +1045,7 @@ void ClangASTImporter::ASTImporterDelegate::Imported(clang::Decl *from,
}
lldb::user_id_t user_id = LLDB_INVALID_UID;
- ClangASTMetadata *metadata = m_master.GetDeclMetadata(from);
+ ClangASTMetadata *metadata = m_main.GetDeclMetadata(from);
if (metadata)
user_id = metadata->GetUserID();
@@ -1100,9 +1069,9 @@ void ClangASTImporter::ASTImporterDelegate::Imported(clang::Decl *from,
}
ASTContextMetadataSP to_context_md =
- m_master.GetContextMetadata(&to->getASTContext());
+ m_main.GetContextMetadata(&to->getASTContext());
ASTContextMetadataSP from_context_md =
- m_master.MaybeGetContextMetadata(m_source_ctx);
+ m_main.MaybeGetContextMetadata(m_source_ctx);
if (from_context_md) {
DeclOrigin origin = from_context_md->getOrigin(from);
@@ -1113,7 +1082,7 @@ void ClangASTImporter::ASTImporterDelegate::Imported(clang::Decl *from,
to_context_md->setOrigin(to, origin);
ImporterDelegateSP direct_completer =
- m_master.GetDelegate(&to->getASTContext(), origin.ctx);
+ m_main.GetDelegate(&to->getASTContext(), origin.ctx);
if (direct_completer.get() != this)
direct_completer->ASTImporter::Imported(origin.decl, to);
@@ -1174,7 +1143,7 @@ void ClangASTImporter::ASTImporterDelegate::Imported(clang::Decl *from,
}
if (auto *to_namespace_decl = dyn_cast<NamespaceDecl>(to)) {
- m_master.BuildNamespaceMap(to_namespace_decl);
+ m_main.BuildNamespaceMap(to_namespace_decl);
to_namespace_decl->setHasExternalVisibleStorage();
}
@@ -1203,10 +1172,10 @@ void ClangASTImporter::ASTImporterDelegate::Imported(clang::Decl *from,
}
if (clang::CXXMethodDecl *to_method = dyn_cast<CXXMethodDecl>(to))
- MaybeCompleteReturnType(m_master, to_method);
+ MaybeCompleteReturnType(m_main, to_method);
}
clang::Decl *
ClangASTImporter::ASTImporterDelegate::GetOriginalDecl(clang::Decl *To) {
- return m_master.GetDeclOrigin(To).decl;
+ return m_main.GetDeclOrigin(To).decl;
}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.h
index 4f589d34aa48..e565a96b217f 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.h
@@ -259,11 +259,11 @@ public:
/// CxxModuleHandler to replace any missing or malformed declarations with
/// their counterpart from a C++ module.
struct ASTImporterDelegate : public clang::ASTImporter {
- ASTImporterDelegate(ClangASTImporter &master, clang::ASTContext *target_ctx,
+ ASTImporterDelegate(ClangASTImporter &main, clang::ASTContext *target_ctx,
clang::ASTContext *source_ctx)
- : clang::ASTImporter(*target_ctx, master.m_file_manager, *source_ctx,
- master.m_file_manager, true /*minimal*/),
- m_master(master), m_source_ctx(source_ctx) {
+ : clang::ASTImporter(*target_ctx, main.m_file_manager, *source_ctx,
+ main.m_file_manager, true /*minimal*/),
+ m_main(main), m_source_ctx(source_ctx) {
// Target and source ASTContext shouldn't be identical. Importing AST
// nodes within the same AST doesn't make any sense as the whole idea
// is to import them to a different AST.
@@ -329,7 +329,7 @@ public:
/// were created from the 'std' C++ module to prevent that the Importer
/// tries to sync them with the broken equivalent in the debug info AST.
llvm::SmallPtrSet<clang::Decl *, 16> m_decls_to_ignore;
- ClangASTImporter &m_master;
+ ClangASTImporter &m_main;
clang::ASTContext *m_source_ctx;
CxxModuleHandler *m_std_handler = nullptr;
/// The currently attached listener.
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index b43423707ae1..410d8a95cb12 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -479,7 +479,10 @@ void ClangASTSource::FindExternalLexicalDecls(
decl->getDeclKindName(), ast_dump);
}
- CopyDecl(decl);
+ Decl *copied_decl = CopyDecl(decl);
+
+ if (!copied_decl)
+ continue;
// FIXME: We should add the copied decl to the 'decls' list. This would
// add the copied Decl into the DeclContext and make sure that we
@@ -489,6 +492,12 @@ void ClangASTSource::FindExternalLexicalDecls(
// lookup issues later on.
// We can't just add them for now as the ASTImporter already added the
// decl into the DeclContext and this would add it twice.
+
+ if (FieldDecl *copied_field = dyn_cast<FieldDecl>(copied_decl)) {
+ QualType copied_field_type = copied_field->getType();
+
+ m_ast_importer_sp->RequireCompleteType(copied_field_type);
+ }
} else {
SkippedDecls = true;
}
@@ -974,8 +983,9 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
interface_decl->getName(), selector_name);
SymbolContextList sc_list;
- const bool include_symbols = false;
- const bool include_inlines = false;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = false;
+ function_options.include_inlines = false;
std::string interface_name = interface_decl->getNameAsString();
@@ -986,9 +996,9 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
ConstString instance_method_name(ms.GetString());
sc_list.Clear();
- m_target->GetImages().FindFunctions(
- instance_method_name, lldb::eFunctionNameTypeFull, include_symbols,
- include_inlines, sc_list);
+ m_target->GetImages().FindFunctions(instance_method_name,
+ lldb::eFunctionNameTypeFull,
+ function_options, sc_list);
if (sc_list.GetSize())
break;
@@ -999,9 +1009,9 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
ConstString class_method_name(ms.GetString());
sc_list.Clear();
- m_target->GetImages().FindFunctions(
- class_method_name, lldb::eFunctionNameTypeFull, include_symbols,
- include_inlines, sc_list);
+ m_target->GetImages().FindFunctions(class_method_name,
+ lldb::eFunctionNameTypeFull,
+ function_options, sc_list);
if (sc_list.GetSize())
break;
@@ -1012,9 +1022,9 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
SymbolContextList candidate_sc_list;
- m_target->GetImages().FindFunctions(
- selector_name, lldb::eFunctionNameTypeSelector, include_symbols,
- include_inlines, candidate_sc_list);
+ m_target->GetImages().FindFunctions(selector_name,
+ lldb::eFunctionNameTypeSelector,
+ function_options, candidate_sc_list);
for (uint32_t ci = 0, ce = candidate_sc_list.GetSize(); ci != ce; ++ci) {
SymbolContext candidate_sc;
@@ -1494,7 +1504,7 @@ bool ClangASTSource::layoutRecordType(const RecordDecl *record, uint64_t &size,
LLDB_LOG(log,
"LayoutRecordType on (ASTContext*){0} '{1}' for (RecordDecl*)"
- "{3} [name = '{4}']",
+ "{2} [name = '{3}']",
m_ast_context, m_clang_ast_context->getDisplayName(), record,
record->getName());
@@ -1570,8 +1580,10 @@ bool ClangASTSource::layoutRecordType(const RecordDecl *record, uint64_t &size,
fe = record->field_end();
fi != fe; ++fi) {
LLDB_LOG(log,
- "LRT (FieldDecl*){0}, Name = '{1}', Offset = {2} bits",
- *fi, fi->getName(), field_offsets[*fi]);
+ "LRT (FieldDecl*){0}, Name = '{1}', Type = '{2}', Offset = "
+ "{3} bits",
+ *fi, fi->getName(), fi->getType().getAsString(),
+ field_offsets[*fi]);
}
DeclFromParser<const CXXRecordDecl> parser_cxx_record =
DynCast<const CXXRecordDecl>(parser_record);
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index 731b81c61a6f..846c1597292b 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -59,9 +59,7 @@ using namespace lldb;
using namespace lldb_private;
using namespace clang;
-namespace {
-const char *g_lldb_local_vars_namespace_cstr = "$__lldb_local_vars";
-} // anonymous namespace
+static const char *g_lldb_local_vars_namespace_cstr = "$__lldb_local_vars";
ClangExpressionDeclMap::ClangExpressionDeclMap(
bool keep_result_in_memory,
@@ -1220,22 +1218,25 @@ void ClangExpressionDeclMap::LookupFunction(
}
}
- const bool include_inlines = false;
SymbolContextList sc_list;
if (namespace_decl && module_sp) {
- const bool include_symbols = false;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_inlines = false;
+ function_options.include_symbols = false;
module_sp->FindFunctions(name, namespace_decl, eFunctionNameTypeBase,
- include_symbols, include_inlines, sc_list);
+ function_options, sc_list);
} else if (target && !namespace_decl) {
- const bool include_symbols = true;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_inlines = false;
+ function_options.include_symbols = true;
// TODO Fix FindFunctions so that it doesn't return
// instance methods for eFunctionNameTypeBase.
target->GetImages().FindFunctions(
- name, eFunctionNameTypeFull | eFunctionNameTypeBase, include_symbols,
- include_inlines, sc_list);
+ name, eFunctionNameTypeFull | eFunctionNameTypeBase, function_options,
+ sc_list);
}
// If we found more than one function, see if we can use the frame's decl
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
index 0b5e1ab059d2..a0cff3cc9bf8 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
@@ -1069,7 +1069,7 @@ ClangExpressionParser::ParseInternal(DiagnosticManager &diagnostic_manager,
}
if (temp_fd != -1) {
- lldb_private::NativeFile file(temp_fd, File::eOpenOptionWrite, true);
+ lldb_private::NativeFile file(temp_fd, File::eOpenOptionWriteOnly, true);
const size_t expr_text_len = strlen(expr_text);
size_t bytes_written = expr_text_len;
if (file.Write(expr_text, bytes_written).Success()) {
@@ -1309,7 +1309,7 @@ static bool FindFunctionInModule(ConstString &mangled_name,
llvm::Module *module, const char *orig_name) {
for (const auto &func : module->getFunctionList()) {
const StringRef &name = func.getName();
- if (name.find(orig_name) != StringRef::npos) {
+ if (name.contains(orig_name)) {
mangled_name.SetString(name);
return true;
}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp
index 31707f81a270..977a461e3f6f 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionSourceCode.cpp
@@ -311,8 +311,7 @@ bool ClangExpressionSourceCode::GetText(
}
if (target->GetArchitecture().GetMachine() == llvm::Triple::x86_64) {
if (lldb::PlatformSP platform_sp = target->GetPlatform()) {
- static ConstString g_platform_ios_simulator("ios-simulator");
- if (platform_sp->GetPluginName() == g_platform_ios_simulator) {
+ if (platform_sp->GetPluginName() == "ios-simulator") {
target_specific_defines = "typedef bool BOOL;\n";
}
}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
index 1b205b13113b..50e9f7827838 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
@@ -6,12 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "lldb/Host/Config.h"
-
#include <cstdio>
-#if HAVE_SYS_TYPES_H
#include <sys/types.h>
-#endif
#include <cstdlib>
#include <map>
@@ -689,15 +685,22 @@ bool ClangUserExpression::Parse(DiagnosticManager &diagnostic_manager,
SetupCppModuleImports(exe_ctx);
// If we did load any modules, then retry parsing.
if (!m_imported_cpp_modules.empty()) {
+ // Create a dedicated diagnostic manager for the second parse attempt.
+ // These diagnostics are only returned to the caller if using the fallback
+ // actually succeeded in getting the expression to parse. This prevents
+ // that module-specific issues regress diagnostic quality with the
+ // fallback mode.
+ DiagnosticManager retry_manager;
// The module imports are injected into the source code wrapper,
// so recreate those.
- CreateSourceCode(diagnostic_manager, exe_ctx, m_imported_cpp_modules,
+ CreateSourceCode(retry_manager, exe_ctx, m_imported_cpp_modules,
/*for_completion*/ false);
- // Clear the error diagnostics from the previous parse attempt.
- diagnostic_manager.Clear();
- parse_success = TryParse(diagnostic_manager, exe_scope, exe_ctx,
+ parse_success = TryParse(retry_manager, exe_scope, exe_ctx,
execution_policy, keep_result_in_memory,
generate_debug_info);
+ // Return the parse diagnostics if we were successful.
+ if (parse_success)
+ diagnostic_manager = std::move(retry_manager);
}
}
if (!parse_success)
@@ -907,8 +910,8 @@ bool ClangUserExpression::AddArguments(ExecutionContext &exe_ctx,
if (!object_ptr_error.Success()) {
exe_ctx.GetTargetRef().GetDebugger().GetAsyncOutputStream()->Printf(
- "warning: `%s' is not accessible (substituting 0)\n",
- object_name.AsCString());
+ "warning: `%s' is not accessible (substituting 0). %s\n",
+ object_name.AsCString(), object_ptr_error.AsCString());
object_ptr = 0;
}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp
index a78116352c2e..3db3fcea0192 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp
@@ -6,8 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "lldb/Host/Config.h"
-
#include "ClangUtilityFunction.h"
#include "ClangExpressionDeclMap.h"
#include "ClangExpressionParser.h"
@@ -15,9 +13,7 @@
#include "ClangPersistentVariables.h"
#include <cstdio>
-#if HAVE_SYS_TYPES_H
#include <sys/types.h>
-#endif
#include "lldb/Core/Module.h"
@@ -49,7 +45,7 @@ ClangUtilityFunction::ClangUtilityFunction(ExecutionContextScope &exe_scope,
llvm::SmallString<128> result_path;
llvm::sys::fs::createTemporaryFile("lldb", "expr", temp_fd, result_path);
if (temp_fd != -1) {
- lldb_private::NativeFile file(temp_fd, File::eOpenOptionWrite, true);
+ lldb_private::NativeFile file(temp_fd, File::eOpenOptionWriteOnly, true);
text = "#line 1 \"" + std::string(result_path) + "\"\n" + text;
size_t bytes_written = text.size();
file.Write(text.c_str(), bytes_written);
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h
index 425106bba0a3..907db5d625dc 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h
@@ -16,9 +16,9 @@ namespace lldb_private {
/// A Clang configuration when importing C++ modules.
///
-/// Includes a list of include paths that should be used when importing
-/// and a list of modules that can be imported. Currently only used when
-/// importing the 'std' module and its dependencies.
+/// This class computes a list of include paths and module names that can be
+/// imported given a list of source files. Currently only used when importing
+/// the 'std' module and its dependencies.
class CppModuleConfiguration {
/// Utility class for a path that can only be set once.
class SetOncePath {
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
index 5655d548ee34..f80dc2b14467 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
@@ -1303,7 +1303,7 @@ bool IRForTarget::MaybeHandleCallArguments(CallInst *Old) {
LLDB_LOG(log, "MaybeHandleCallArguments({0})", PrintValue(Old));
- for (unsigned op_index = 0, num_ops = Old->getNumArgOperands();
+ for (unsigned op_index = 0, num_ops = Old->arg_size();
op_index < num_ops; ++op_index)
// conservatively believe that this is a store
if (!MaybeHandleVariable(Old->getArgOperand(op_index))) {
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp b/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp
index 829afa5ffcec..8709c2b0dcea 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.cpp
@@ -66,6 +66,7 @@ clang::NamedDecl *NameSearchContext::AddFunDecl(const CompilerType &type,
context = LinkageSpecDecl::Create(
ast, context, SourceLocation(), SourceLocation(),
clang::LinkageSpecDecl::LanguageIDs::lang_c, false);
+ // FIXME: The LinkageSpecDecl here should be added to m_decl_context.
}
// Pass the identifier info for functions the decl_name is needed for
@@ -77,7 +78,7 @@ clang::NamedDecl *NameSearchContext::AddFunDecl(const CompilerType &type,
clang::FunctionDecl *func_decl = FunctionDecl::Create(
ast, context, SourceLocation(), SourceLocation(), decl_name, qual_type,
- nullptr, SC_Extern, isInlineSpecified, hasWrittenPrototype,
+ nullptr, SC_Extern, /*UsesFPIntrin=*/false, isInlineSpecified, hasWrittenPrototype,
isConstexprSpecified ? ConstexprSpecKind::Constexpr
: ConstexprSpecKind::Unspecified);
diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
index bf0bbdab740f..5a238c5d4ac7 100644
--- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
+++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
@@ -713,12 +713,7 @@ void EmulateInstructionARM::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString EmulateInstructionARM::GetPluginNameStatic() {
- static ConstString g_name("arm");
- return g_name;
-}
-
-const char *EmulateInstructionARM::GetPluginDescriptionStatic() {
+llvm::StringRef EmulateInstructionARM::GetPluginDescriptionStatic() {
return "Emulate instructions for the ARM architecture.";
}
diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h
index dfd7c926dabf..8b167dd347ad 100644
--- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h
+++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h
@@ -62,9 +62,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "arm"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::EmulateInstruction *
CreateInstance(const lldb_private::ArchSpec &arch, InstructionType inst_type);
@@ -83,11 +83,7 @@ public:
return false;
}
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
bool SetTargetTriple(const ArchSpec &arch) override;
diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
index 9b0c06bcccab..f86609f3c5c1 100644
--- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
+++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
@@ -35,7 +35,7 @@
"na", nullptr, 8, 0, lldb::eEncodingUint, lldb::eFormatHex, \
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM}, \
- nullptr, nullptr, nullptr, 0
+ nullptr, nullptr
#define DECLARE_REGISTER_INFOS_ARM64_STRUCT
@@ -117,17 +117,7 @@ void EmulateInstructionARM64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString EmulateInstructionARM64::GetPluginNameStatic() {
- ConstString g_plugin_name("lldb.emulate-instruction.arm64");
- return g_plugin_name;
-}
-
-lldb_private::ConstString EmulateInstructionARM64::GetPluginName() {
- static ConstString g_plugin_name("EmulateInstructionARM64");
- return g_plugin_name;
-}
-
-const char *EmulateInstructionARM64::GetPluginDescriptionStatic() {
+llvm::StringRef EmulateInstructionARM64::GetPluginDescriptionStatic() {
return "Emulate instructions for the ARM64 architecture.";
}
diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h
index 11ad8a99b0fc..4f11f7387a2e 100644
--- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h
+++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.h
@@ -24,9 +24,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "arm64"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::EmulateInstruction *
CreateInstance(const lldb_private::ArchSpec &arch,
@@ -46,9 +46,7 @@ public:
return false;
}
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
bool SetTargetTriple(const lldb_private::ArchSpec &arch) override;
diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
index a1a93c0b5a5f..ea9c95c55cbb 100644
--- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
@@ -29,7 +29,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/ADT/STLExtras.h"
@@ -193,17 +193,7 @@ void EmulateInstructionMIPS::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString EmulateInstructionMIPS::GetPluginNameStatic() {
- ConstString g_plugin_name("lldb.emulate-instruction.mips32");
- return g_plugin_name;
-}
-
-lldb_private::ConstString EmulateInstructionMIPS::GetPluginName() {
- static ConstString g_plugin_name("EmulateInstructionMIPS");
- return g_plugin_name;
-}
-
-const char *EmulateInstructionMIPS::GetPluginDescriptionStatic() {
+llvm::StringRef EmulateInstructionMIPS::GetPluginDescriptionStatic() {
return "Emulate instructions for the MIPS32 architecture.";
}
@@ -2946,9 +2936,9 @@ bool EmulateInstructionMIPS::Emulate_MSA_Branch_V(llvm::MCInst &insn,
bool bnz) {
bool success = false;
int32_t target = 0;
- llvm::APInt wr_val = llvm::APInt::getNullValue(128);
+ llvm::APInt wr_val = llvm::APInt::getZero(128);
llvm::APInt fail_value = llvm::APInt::getMaxValue(128);
- llvm::APInt zero_value = llvm::APInt::getNullValue(128);
+ llvm::APInt zero_value = llvm::APInt::getZero(128);
RegisterValue reg_value;
uint32_t wt = m_reg_info->getEncodingValue(insn.getOperand(0).getReg());
diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h
index 61291c729879..48782186e065 100644
--- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h
+++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.h
@@ -33,9 +33,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "mips32"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::EmulateInstruction *
CreateInstance(const lldb_private::ArchSpec &arch,
@@ -55,9 +55,7 @@ public:
return false;
}
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
bool SetTargetTriple(const lldb_private::ArchSpec &arch) override;
diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
index 6044d00c0cbf..e5732a50f3f2 100644
--- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
@@ -29,7 +29,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/ADT/STLExtras.h"
@@ -180,17 +180,7 @@ void EmulateInstructionMIPS64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString EmulateInstructionMIPS64::GetPluginNameStatic() {
- ConstString g_plugin_name("lldb.emulate-instruction.mips64");
- return g_plugin_name;
-}
-
-lldb_private::ConstString EmulateInstructionMIPS64::GetPluginName() {
- static ConstString g_plugin_name("EmulateInstructionMIPS64");
- return g_plugin_name;
-}
-
-const char *EmulateInstructionMIPS64::GetPluginDescriptionStatic() {
+llvm::StringRef EmulateInstructionMIPS64::GetPluginDescriptionStatic() {
return "Emulate instructions for the MIPS64 architecture.";
}
@@ -2258,9 +2248,9 @@ bool EmulateInstructionMIPS64::Emulate_MSA_Branch_V(llvm::MCInst &insn,
bool bnz) {
bool success = false;
int64_t target = 0;
- llvm::APInt wr_val = llvm::APInt::getNullValue(128);
+ llvm::APInt wr_val = llvm::APInt::getZero(128);
llvm::APInt fail_value = llvm::APInt::getMaxValue(128);
- llvm::APInt zero_value = llvm::APInt::getNullValue(128);
+ llvm::APInt zero_value = llvm::APInt::getZero(128);
RegisterValue reg_value;
uint32_t wt = m_reg_info->getEncodingValue(insn.getOperand(0).getReg());
diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h
index c4ae2296c5dd..acd956e613d4 100644
--- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h
+++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h
@@ -31,9 +31,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "mips64"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::EmulateInstruction *
CreateInstance(const lldb_private::ArchSpec &arch,
@@ -53,9 +53,7 @@ public:
return false;
}
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
bool SetTargetTriple(const lldb_private::ArchSpec &arch) override;
diff --git a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
index 4e78c369c128..dcfad8e106aa 100644
--- a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
+++ b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
@@ -39,17 +39,7 @@ void EmulateInstructionPPC64::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString EmulateInstructionPPC64::GetPluginNameStatic() {
- ConstString g_plugin_name("lldb.emulate-instruction.ppc64");
- return g_plugin_name;
-}
-
-ConstString EmulateInstructionPPC64::GetPluginName() {
- static ConstString g_plugin_name("EmulateInstructionPPC64");
- return g_plugin_name;
-}
-
-const char *EmulateInstructionPPC64::GetPluginDescriptionStatic() {
+llvm::StringRef EmulateInstructionPPC64::GetPluginDescriptionStatic() {
return "Emulate instructions for the PPC64 architecture.";
}
diff --git a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h
index 02d2bce8f05e..117ff8965eb5 100644
--- a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h
+++ b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h
@@ -23,9 +23,9 @@ public:
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "ppc64"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static EmulateInstruction *CreateInstance(const ArchSpec &arch,
InstructionType inst_type);
@@ -44,9 +44,7 @@ public:
return false;
}
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
bool SetTargetTriple(const ArchSpec &arch) override;
diff --git a/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.cpp b/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.cpp
index e78ea3a68483..8d8b5c68e41b 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.cpp
@@ -47,10 +47,6 @@ void InstrumentationRuntimeASan::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString InstrumentationRuntimeASan::GetPluginNameStatic() {
- return ConstString("AddressSanitizer");
-}
-
lldb::InstrumentationRuntimeType InstrumentationRuntimeASan::GetTypeStatic() {
return eInstrumentationRuntimeTypeAddressSanitizer;
}
diff --git a/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.h b/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.h
index cde0a9613350..83a88cf7f89f 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.h
+++ b/lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.h
@@ -27,18 +27,14 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "AddressSanitizer"; }
static lldb::InstrumentationRuntimeType GetTypeStatic();
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
virtual lldb::InstrumentationRuntimeType GetType() { return GetTypeStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
-
private:
InstrumentationRuntimeASan(const lldb::ProcessSP &process_sp)
: lldb_private::InstrumentationRuntime(process_sp) {}
diff --git a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp
index 9a88b343878c..dc8c7c96aa11 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp
@@ -54,11 +54,6 @@ void InstrumentationRuntimeMainThreadChecker::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString
-InstrumentationRuntimeMainThreadChecker::GetPluginNameStatic() {
- return ConstString("MainThreadChecker");
-}
-
lldb::InstrumentationRuntimeType
InstrumentationRuntimeMainThreadChecker::GetTypeStatic() {
return eInstrumentationRuntimeTypeMainThreadChecker;
diff --git a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.h b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.h
index 1435ae8d367f..3bbbf13b7798 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.h
+++ b/lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.h
@@ -28,18 +28,14 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "MainThreadChecker"; }
static lldb::InstrumentationRuntimeType GetTypeStatic();
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
virtual lldb::InstrumentationRuntimeType GetType() { return GetTypeStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
-
lldb::ThreadCollectionSP
GetBacktracesFromExtendedStopInfo(StructuredData::ObjectSP info) override;
diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp
index a2954f556b10..aef10bb2a778 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp
@@ -52,10 +52,6 @@ void InstrumentationRuntimeTSan::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString InstrumentationRuntimeTSan::GetPluginNameStatic() {
- return ConstString("ThreadSanitizer");
-}
-
lldb::InstrumentationRuntimeType InstrumentationRuntimeTSan::GetTypeStatic() {
return eInstrumentationRuntimeTypeThreadSanitizer;
}
diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.h b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.h
index 35a878d90aff..db4466a13193 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.h
+++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.h
@@ -27,18 +27,14 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "ThreadSanitizer"; }
static lldb::InstrumentationRuntimeType GetTypeStatic();
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
virtual lldb::InstrumentationRuntimeType GetType() { return GetTypeStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
-
lldb::ThreadCollectionSP
GetBacktracesFromExtendedStopInfo(StructuredData::ObjectSP info) override;
diff --git a/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.cpp b/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.cpp
index 58bc38a551f0..8e7799dc0761 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.cpp
@@ -56,10 +56,6 @@ void InstrumentationRuntimeUBSan::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString InstrumentationRuntimeUBSan::GetPluginNameStatic() {
- return ConstString("UndefinedBehaviorSanitizer");
-}
-
lldb::InstrumentationRuntimeType InstrumentationRuntimeUBSan::GetTypeStatic() {
return eInstrumentationRuntimeTypeUndefinedBehaviorSanitizer;
}
diff --git a/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.h b/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.h
index 813c30069600..e0de158473de 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.h
+++ b/lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.h
@@ -28,18 +28,16 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() {
+ return "UndefinedBehaviorSanitizer";
+ }
static lldb::InstrumentationRuntimeType GetTypeStatic();
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
virtual lldb::InstrumentationRuntimeType GetType() { return GetTypeStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
-
lldb::ThreadCollectionSP
GetBacktracesFromExtendedStopInfo(StructuredData::ObjectSP info) override;
diff --git a/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp b/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp
index 16c474fdbf3b..e4994e19710c 100644
--- a/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp
+++ b/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.cpp
@@ -90,7 +90,7 @@ enum {
class PluginProperties : public Properties {
public:
static ConstString GetSettingName() {
- return JITLoaderGDB::GetPluginNameStatic();
+ return ConstString(JITLoaderGDB::GetPluginNameStatic());
}
PluginProperties() {
@@ -105,11 +105,9 @@ public:
}
};
-typedef std::shared_ptr<PluginProperties> JITLoaderGDBPropertiesSP;
-
-static const JITLoaderGDBPropertiesSP &GetGlobalPluginProperties() {
- static const auto g_settings_sp(std::make_shared<PluginProperties>());
- return g_settings_sp;
+static PluginProperties &GetGlobalPluginProperties() {
+ static PluginProperties g_settings;
+ return g_settings;
}
template <typename ptr_t>
@@ -160,7 +158,7 @@ void JITLoaderGDB::DebuggerInitialize(Debugger &debugger) {
debugger, PluginProperties::GetSettingName())) {
const bool is_global_setting = true;
PluginManager::CreateSettingForJITLoaderPlugin(
- debugger, GetGlobalPluginProperties()->GetValueProperties(),
+ debugger, GetGlobalPluginProperties().GetValueProperties(),
ConstString("Properties for the JIT LoaderGDB plug-in."),
is_global_setting);
}
@@ -404,15 +402,10 @@ bool JITLoaderGDB::ReadJITDescriptorImpl(bool all_entries) {
}
// PluginInterface protocol
-lldb_private::ConstString JITLoaderGDB::GetPluginNameStatic() {
- static ConstString g_name("gdb");
- return g_name;
-}
-
JITLoaderSP JITLoaderGDB::CreateInstance(Process *process, bool force) {
JITLoaderSP jit_loader_sp;
bool enable;
- switch (GetGlobalPluginProperties()->GetEnable()) {
+ switch (GetGlobalPluginProperties().GetEnable()) {
case EnableJITLoaderGDB::eEnableJITLoaderGDBOn:
enable = true;
break;
@@ -429,17 +422,11 @@ JITLoaderSP JITLoaderGDB::CreateInstance(Process *process, bool force) {
return jit_loader_sp;
}
-const char *JITLoaderGDB::GetPluginDescriptionStatic() {
+llvm::StringRef JITLoaderGDB::GetPluginDescriptionStatic() {
return "JIT loader plug-in that watches for JIT events using the GDB "
"interface.";
}
-lldb_private::ConstString JITLoaderGDB::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t JITLoaderGDB::GetPluginVersion() { return 1; }
-
void JITLoaderGDB::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
GetPluginDescriptionStatic(), CreateInstance,
diff --git a/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.h b/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.h
index 42377f435293..2b337b3e9844 100644
--- a/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.h
+++ b/lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.h
@@ -25,9 +25,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "gdb"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb::JITLoaderSP CreateInstance(lldb_private::Process *process,
bool force);
@@ -35,9 +35,7 @@ public:
static void DebuggerInitialize(lldb_private::Debugger &debugger);
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// JITLoader interface
void DidAttach() override;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index 895fd55f499c..83e8e52b86f2 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -20,12 +20,14 @@
#include "llvm/Demangle/ItaniumDemangle.h"
#include "lldb/Core/Mangled.h"
+#include "lldb/Core/Module.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Core/UniqueCStringMap.h"
#include "lldb/DataFormatters/CXXFunctionPointer.h"
#include "lldb/DataFormatters/DataVisualization.h"
#include "lldb/DataFormatters/FormattersHelpers.h"
#include "lldb/DataFormatters/VectorType.h"
+#include "lldb/Symbol/SymbolFile.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/RegularExpression.h"
@@ -54,24 +56,40 @@ void CPlusPlusLanguage::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString CPlusPlusLanguage::GetPluginNameStatic() {
- static ConstString g_name("cplusplus");
- return g_name;
-}
-
bool CPlusPlusLanguage::SymbolNameFitsToLanguage(Mangled mangled) const {
const char *mangled_name = mangled.GetMangledName().GetCString();
return mangled_name && CPlusPlusLanguage::IsCPPMangledName(mangled_name);
}
-// PluginInterface protocol
-
-lldb_private::ConstString CPlusPlusLanguage::GetPluginName() {
- return GetPluginNameStatic();
+ConstString CPlusPlusLanguage::GetDemangledFunctionNameWithoutArguments(
+ Mangled mangled) const {
+ const char *mangled_name_cstr = mangled.GetMangledName().GetCString();
+ ConstString demangled_name = mangled.GetDemangledName();
+ if (demangled_name && mangled_name_cstr && mangled_name_cstr[0]) {
+ if (mangled_name_cstr[0] == '_' && mangled_name_cstr[1] == 'Z' &&
+ (mangled_name_cstr[2] != 'T' && // avoid virtual table, VTT structure,
+ // typeinfo structure, and typeinfo
+ // mangled_name
+ mangled_name_cstr[2] != 'G' && // avoid guard variables
+ mangled_name_cstr[2] != 'Z')) // named local entities (if we
+ // eventually handle eSymbolTypeData,
+ // we will want this back)
+ {
+ CPlusPlusLanguage::MethodName cxx_method(demangled_name);
+ if (!cxx_method.GetBasename().empty()) {
+ std::string shortname;
+ if (!cxx_method.GetContext().empty())
+ shortname = cxx_method.GetContext().str() + "::";
+ shortname += cxx_method.GetBasename().str();
+ return ConstString(shortname);
+ }
+ }
+ }
+ if (demangled_name)
+ return demangled_name;
+ return mangled.GetMangledName();
}
-uint32_t CPlusPlusLanguage::GetPluginVersion() { return 1; }
-
// Static Functions
Language *CPlusPlusLanguage::CreateInstance(lldb::LanguageType language) {
@@ -303,13 +321,12 @@ class ManglingSubstitutor
public:
ManglingSubstitutor() : Base(nullptr, nullptr) {}
- template<typename... Ts>
+ template <typename... Ts>
ConstString substitute(llvm::StringRef Mangled, Ts &&... Vals) {
this->getDerived().reset(Mangled, std::forward<Ts>(Vals)...);
return substituteImpl(Mangled);
}
-
protected:
void reset(llvm::StringRef Mangled) {
Base::reset(Mangled.begin(), Mangled.end());
@@ -363,7 +380,6 @@ private:
llvm::StringRef(Written, std::distance(Written, currentParserPos()));
Written = currentParserPos();
}
-
};
/// Given a mangled function `Mangled`, replace all the primitive function type
@@ -397,9 +413,10 @@ public:
};
} // namespace
-uint32_t CPlusPlusLanguage::FindAlternateFunctionManglings(
- const ConstString mangled_name, std::set<ConstString> &alternates) {
- const auto start_size = alternates.size();
+std::vector<ConstString> CPlusPlusLanguage::GenerateAlternateFunctionManglings(
+ const ConstString mangled_name) const {
+ std::vector<ConstString> alternates;
+
/// Get a basic set of alternative manglings for the given symbol `name`, by
/// making a few basic possible substitutions on basic types, storage duration
/// and `const`ness for the given symbol. The output parameter `alternates`
@@ -412,7 +429,7 @@ uint32_t CPlusPlusLanguage::FindAlternateFunctionManglings(
strncmp(mangled_name.GetCString(), "_ZNK", 4)) {
std::string fixed_scratch("_ZNK");
fixed_scratch.append(mangled_name.GetCString() + 3);
- alternates.insert(ConstString(fixed_scratch));
+ alternates.push_back(ConstString(fixed_scratch));
}
// Maybe we're looking for a static symbol but we thought it was global...
@@ -420,7 +437,7 @@ uint32_t CPlusPlusLanguage::FindAlternateFunctionManglings(
strncmp(mangled_name.GetCString(), "_ZL", 3)) {
std::string fixed_scratch("_ZL");
fixed_scratch.append(mangled_name.GetCString() + 2);
- alternates.insert(ConstString(fixed_scratch));
+ alternates.push_back(ConstString(fixed_scratch));
}
TypeSubstitutor TS;
@@ -430,24 +447,74 @@ uint32_t CPlusPlusLanguage::FindAlternateFunctionManglings(
// parameter, try finding matches which have the general case 'c'.
if (ConstString char_fixup =
TS.substitute(mangled_name.GetStringRef(), "a", "c"))
- alternates.insert(char_fixup);
+ alternates.push_back(char_fixup);
// long long parameter mangling 'x', may actually just be a long 'l' argument
if (ConstString long_fixup =
TS.substitute(mangled_name.GetStringRef(), "x", "l"))
- alternates.insert(long_fixup);
+ alternates.push_back(long_fixup);
// unsigned long long parameter mangling 'y', may actually just be unsigned
// long 'm' argument
if (ConstString ulong_fixup =
TS.substitute(mangled_name.GetStringRef(), "y", "m"))
- alternates.insert(ulong_fixup);
+ alternates.push_back(ulong_fixup);
if (ConstString ctor_fixup =
CtorDtorSubstitutor().substitute(mangled_name.GetStringRef()))
- alternates.insert(ctor_fixup);
+ alternates.push_back(ctor_fixup);
- return alternates.size() - start_size;
+ return alternates;
+}
+
+ConstString CPlusPlusLanguage::FindBestAlternateFunctionMangledName(
+ const Mangled mangled, const SymbolContext &sym_ctx) const {
+ ConstString demangled = mangled.GetDemangledName();
+ if (!demangled)
+ return ConstString();
+
+ CPlusPlusLanguage::MethodName cpp_name(demangled);
+ std::string scope_qualified_name = cpp_name.GetScopeQualifiedName();
+
+ if (!scope_qualified_name.size())
+ return ConstString();
+
+ if (!sym_ctx.module_sp)
+ return ConstString();
+
+ lldb_private::SymbolFile *sym_file = sym_ctx.module_sp->GetSymbolFile();
+ if (!sym_file)
+ return ConstString();
+
+ std::vector<ConstString> alternates;
+ sym_file->GetMangledNamesForFunction(scope_qualified_name, alternates);
+
+ std::vector<ConstString> param_and_qual_matches;
+ std::vector<ConstString> param_matches;
+ for (size_t i = 0; i < alternates.size(); i++) {
+ ConstString alternate_mangled_name = alternates[i];
+ Mangled mangled(alternate_mangled_name);
+ ConstString demangled = mangled.GetDemangledName();
+
+ CPlusPlusLanguage::MethodName alternate_cpp_name(demangled);
+ if (!cpp_name.IsValid())
+ continue;
+
+ if (alternate_cpp_name.GetArguments() == cpp_name.GetArguments()) {
+ if (alternate_cpp_name.GetQualifiers() == cpp_name.GetQualifiers())
+ param_and_qual_matches.push_back(alternate_mangled_name);
+ else
+ param_matches.push_back(alternate_mangled_name);
+ }
+ }
+
+ if (param_and_qual_matches.size())
+ return param_and_qual_matches[0]; // It is assumed that there will be only
+ // one!
+ else if (param_matches.size())
+ return param_matches[0]; // Return one of them as a best match
+ else
+ return ConstString();
}
static void LoadLibCxxFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
@@ -486,26 +553,23 @@ static void LoadLibCxxFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::LibcxxStringSummaryProviderUTF16,
"std::u16string summary provider",
- ConstString(
- "^std::__[[:alnum:]]+::basic_string<char16_t, "
- "std::__[[:alnum:]]+::char_traits<char16_t>, "
- "std::__[[:alnum:]]+::allocator<char16_t> >$"),
+ ConstString("^std::__[[:alnum:]]+::basic_string<char16_t, "
+ "std::__[[:alnum:]]+::char_traits<char16_t>, "
+ "std::__[[:alnum:]]+::allocator<char16_t> >$"),
stl_summary_flags, true);
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::LibcxxStringSummaryProviderUTF32,
"std::u32string summary provider",
- ConstString(
- "^std::__[[:alnum:]]+::basic_string<char32_t, "
- "std::__[[:alnum:]]+::char_traits<char32_t>, "
- "std::__[[:alnum:]]+::allocator<char32_t> >$"),
+ ConstString("^std::__[[:alnum:]]+::basic_string<char32_t, "
+ "std::__[[:alnum:]]+::char_traits<char32_t>, "
+ "std::__[[:alnum:]]+::allocator<char32_t> >$"),
stl_summary_flags, true);
- AddCXXSummary(cpp_category_sp,
- lldb_private::formatters::LibcxxWStringSummaryProvider,
- "std::wstring summary provider",
- ConstString("^std::__[[:alnum:]]+::wstring$"),
- stl_summary_flags, true);
+ AddCXXSummary(
+ cpp_category_sp, lldb_private::formatters::LibcxxWStringSummaryProvider,
+ "std::wstring summary provider",
+ ConstString("^std::__[[:alnum:]]+::wstring$"), stl_summary_flags, true);
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::LibcxxWStringSummaryProvider,
"std::wstring summary provider",
@@ -702,11 +766,11 @@ static void LoadLibCxxFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
"libc++ std::tuple summary provider",
ConstString("^std::__[[:alnum:]]+::tuple<.*>(( )?&)?$"),
stl_summary_flags, true);
- AddCXXSummary(
- cpp_category_sp, lldb_private::formatters::LibCxxAtomicSummaryProvider,
- "libc++ std::atomic summary provider",
- ConstString("^std::__[[:alnum:]]+::atomic<.+>$"), stl_summary_flags,
- true);
+ AddCXXSummary(cpp_category_sp,
+ lldb_private::formatters::LibCxxAtomicSummaryProvider,
+ "libc++ std::atomic summary provider",
+ ConstString("^std::__[[:alnum:]]+::atomic<.+>$"),
+ stl_summary_flags, true);
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::LibcxxOptionalSummaryProvider,
"libc++ std::optional summary provider",
@@ -793,7 +857,8 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
"std::allocator<char> >"),
cxx11_string_summary_sp);
cpp_category_sp->GetTypeSummariesContainer()->Add(
- ConstString("std::__cxx11::basic_string<unsigned char, std::char_traits<unsigned char>, "
+ ConstString("std::__cxx11::basic_string<unsigned char, "
+ "std::char_traits<unsigned char>, "
"std::allocator<unsigned char> >"),
cxx11_string_summary_sp);
@@ -825,6 +890,8 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
SyntheticChildren::Flags stl_synth_flags;
stl_synth_flags.SetCascades(true).SetSkipPointers(false).SetSkipReferences(
false);
+ SyntheticChildren::Flags stl_deref_flags = stl_synth_flags;
+ stl_deref_flags.SetFrontEndWantsDereference();
cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
RegularExpression("^std::vector<.+>(( )?&)?$"),
@@ -835,14 +902,38 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
RegularExpression("^std::map<.+> >(( )?&)?$"),
SyntheticChildrenSP(new ScriptedSyntheticChildren(
stl_synth_flags,
- "lldb.formatters.cpp.gnu_libstdcpp.StdMapSynthProvider")));
+ "lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider")));
+ cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
+ RegularExpression("^std::set<.+> >(( )?&)?$"),
+ SyntheticChildrenSP(new ScriptedSyntheticChildren(
+ stl_deref_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider")));
+ cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
+ RegularExpression("^std::multimap<.+> >(( )?&)?$"),
+ SyntheticChildrenSP(new ScriptedSyntheticChildren(
+ stl_deref_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider")));
+ cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
+ RegularExpression("^std::multiset<.+> >(( )?&)?$"),
+ SyntheticChildrenSP(new ScriptedSyntheticChildren(
+ stl_deref_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider")));
cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
RegularExpression("^std::(__cxx11::)?list<.+>(( )?&)?$"),
SyntheticChildrenSP(new ScriptedSyntheticChildren(
stl_synth_flags,
"lldb.formatters.cpp.gnu_libstdcpp.StdListSynthProvider")));
+ cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
+ RegularExpression("^std::(__cxx11::)?forward_list<.+>(( )?&)?$"),
+ SyntheticChildrenSP(new ScriptedSyntheticChildren(
+ stl_synth_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider")));
stl_summary_flags.SetDontShowChildren(false);
- stl_summary_flags.SetSkipPointers(true);
+ stl_summary_flags.SetSkipPointers(false);
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::bitset<.+>(( )?&)?$"),
+ TypeSummaryImplSP(
+ new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
RegularExpression("^std::vector<.+>(( )?&)?$"),
TypeSummaryImplSP(
@@ -852,9 +943,25 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
TypeSummaryImplSP(
new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::set<.+> >(( )?&)?$"),
+ TypeSummaryImplSP(
+ new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::multimap<.+> >(( )?&)?$"),
+ TypeSummaryImplSP(
+ new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::multiset<.+> >(( )?&)?$"),
+ TypeSummaryImplSP(
+ new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
RegularExpression("^std::(__cxx11::)?list<.+>(( )?&)?$"),
TypeSummaryImplSP(
new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::(__cxx11::)?forward_list<.+>(( )?&)?$"),
+ TypeSummaryImplSP(
+ new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
AddCXXSynthetic(
cpp_category_sp,
@@ -889,6 +996,12 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
"std::tuple synthetic children", ConstString("^std::tuple<.+>(( )?&)?$"),
stl_synth_flags, true);
+ AddCXXSynthetic(
+ cpp_category_sp,
+ lldb_private::formatters::LibStdcppBitsetSyntheticFrontEndCreator,
+ "std::bitset synthetic child", ConstString("^std::bitset<.+>(( )?&)?$"),
+ stl_deref_flags, true);
+
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::LibStdcppUniquePointerSummaryProvider,
"libstdc++ std::unique_ptr summary provider",
@@ -928,15 +1041,13 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
.SetShowMembersOneLiner(false)
.SetHideItemNames(false);
- // FIXME because of a bug in the FormattersContainer we need to add a summary
- // for both X* and const X* (<rdar://problem/12717717>)
AddCXXSummary(
cpp_category_sp, lldb_private::formatters::Char8StringSummaryProvider,
"char8_t * summary provider", ConstString("char8_t *"), string_flags);
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::Char8StringSummaryProvider,
"char8_t [] summary provider",
- ConstString("char8_t \\[[0-9]+\\]"), string_array_flags, true);
+ ConstString("char8_t ?\\[[0-9]+\\]"), string_array_flags, true);
AddCXXSummary(
cpp_category_sp, lldb_private::formatters::Char16StringSummaryProvider,
@@ -944,7 +1055,7 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::Char16StringSummaryProvider,
"char16_t [] summary provider",
- ConstString("char16_t \\[[0-9]+\\]"), string_array_flags, true);
+ ConstString("char16_t ?\\[[0-9]+\\]"), string_array_flags, true);
AddCXXSummary(
cpp_category_sp, lldb_private::formatters::Char32StringSummaryProvider,
@@ -952,7 +1063,7 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::Char32StringSummaryProvider,
"char32_t [] summary provider",
- ConstString("char32_t \\[[0-9]+\\]"), string_array_flags, true);
+ ConstString("char32_t ?\\[[0-9]+\\]"), string_array_flags, true);
AddCXXSummary(
cpp_category_sp, lldb_private::formatters::WCharStringSummaryProvider,
@@ -960,7 +1071,7 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
AddCXXSummary(cpp_category_sp,
lldb_private::formatters::WCharStringSummaryProvider,
"wchar_t * summary provider",
- ConstString("wchar_t \\[[0-9]+\\]"), string_array_flags, true);
+ ConstString("wchar_t ?\\[[0-9]+\\]"), string_array_flags, true);
AddCXXSummary(
cpp_category_sp, lldb_private::formatters::Char16StringSummaryProvider,
@@ -1015,7 +1126,8 @@ lldb::TypeCategoryImplSP CPlusPlusLanguage::GetFormatters() {
static TypeCategoryImplSP g_category;
llvm::call_once(g_initialize, [this]() -> void {
- DataVisualization::Categories::GetCategory(GetPluginName(), g_category);
+ DataVisualization::Categories::GetCategory(ConstString(GetPluginName()),
+ g_category);
if (g_category) {
LoadLibStdcppFormatters(g_category);
LoadLibCxxFormatters(g_category);
@@ -1097,9 +1209,8 @@ CPlusPlusLanguage::GetHardcodedSynthetics() {
llvm::call_once(g_initialize, []() -> void {
g_formatters.push_back([](lldb_private::ValueObject &valobj,
- lldb::DynamicValueType,
- FormatManager &
- fmt_mgr) -> SyntheticChildren::SharedPointer {
+ lldb::DynamicValueType, FormatManager &fmt_mgr)
+ -> SyntheticChildren::SharedPointer {
static CXXSyntheticChildren::SharedPointer formatter_sp(
new CXXSyntheticChildren(
SyntheticChildren::Flags()
@@ -1116,9 +1227,8 @@ CPlusPlusLanguage::GetHardcodedSynthetics() {
return nullptr;
});
g_formatters.push_back([](lldb_private::ValueObject &valobj,
- lldb::DynamicValueType,
- FormatManager &
- fmt_mgr) -> SyntheticChildren::SharedPointer {
+ lldb::DynamicValueType, FormatManager &fmt_mgr)
+ -> SyntheticChildren::SharedPointer {
static CXXSyntheticChildren::SharedPointer formatter_sp(
new CXXSyntheticChildren(
SyntheticChildren::Flags()
@@ -1133,7 +1243,6 @@ CPlusPlusLanguage::GetHardcodedSynthetics() {
}
return nullptr;
});
-
});
return g_formatters;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
index 9163be4807ec..5547864a3763 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
@@ -102,10 +102,13 @@ public:
static lldb_private::Language *CreateInstance(lldb::LanguageType language);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "cplusplus"; }
bool SymbolNameFitsToLanguage(Mangled mangled) const override;
+ ConstString
+ GetDemangledFunctionNameWithoutArguments(Mangled mangled) const override;
+
static bool IsCPPMangledName(llvm::StringRef name);
// Extract C++ context and identifier from a string using heuristic matching
@@ -124,16 +127,14 @@ public:
llvm::StringRef &context,
llvm::StringRef &identifier);
- // Given a mangled function name, calculates some alternative manglings since
- // the compiler mangling may not line up with the symbol we are expecting
- static uint32_t
- FindAlternateFunctionManglings(const ConstString mangled,
- std::set<ConstString> &candidates);
+ std::vector<ConstString>
+ GenerateAlternateFunctionManglings(const ConstString mangled) const override;
- // PluginInterface protocol
- ConstString GetPluginName() override;
+ ConstString FindBestAlternateFunctionMangledName(
+ const Mangled mangled, const SymbolContext &sym_ctx) const override;
- uint32_t GetPluginVersion() override;
+ // PluginInterface protocol
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
};
} // namespace lldb_private
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp b/lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
index 41bbd2b01a1e..535a9f6015d8 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
@@ -32,89 +32,87 @@ using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::formatters;
-bool lldb_private::formatters::Char8StringSummaryProvider(
- ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- ProcessSP process_sp = valobj.GetProcessSP();
- if (!process_sp)
- return false;
+using StringElementType = StringPrinter::StringElementType;
+
+static constexpr std::pair<const char *, Format>
+getElementTraits(StringElementType ElemType) {
+ switch (ElemType) {
+ case StringElementType::UTF8:
+ return std::make_pair("u8", lldb::eFormatUnicode8);
+ case StringElementType::UTF16:
+ return std::make_pair("u", lldb::eFormatUnicode16);
+ case StringElementType::UTF32:
+ return std::make_pair("U", lldb::eFormatUnicode32);
+ default:
+ return std::make_pair(nullptr, lldb::eFormatInvalid);
+ }
+}
- lldb::addr_t valobj_addr = GetArrayAddressOrPointerValue(valobj);
- if (valobj_addr == 0 || valobj_addr == LLDB_INVALID_ADDRESS)
+template <StringElementType ElemType>
+static bool CharStringSummaryProvider(ValueObject &valobj, Stream &stream) {
+ Address valobj_addr = GetArrayAddressOrPointerValue(valobj);
+ if (!valobj_addr.IsValid())
return false;
StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
options.SetLocation(valobj_addr);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
- options.SetPrefixToken("u8");
+ options.SetPrefixToken(getElementTraits(ElemType).first);
- if (!StringPrinter::ReadStringAndDumpToStream<
- StringPrinter::StringElementType::UTF8>(options)) {
+ if (!StringPrinter::ReadStringAndDumpToStream<ElemType>(options))
stream.Printf("Summary Unavailable");
- return true;
- }
return true;
}
-bool lldb_private::formatters::Char16StringSummaryProvider(
- ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- ProcessSP process_sp = valobj.GetProcessSP();
- if (!process_sp)
- return false;
+template <StringElementType ElemType>
+static bool CharSummaryProvider(ValueObject &valobj, Stream &stream) {
+ DataExtractor data;
+ Status error;
+ valobj.GetData(data, error);
- lldb::addr_t valobj_addr = GetArrayAddressOrPointerValue(valobj);
- if (valobj_addr == 0 || valobj_addr == LLDB_INVALID_ADDRESS)
+ if (error.Fail())
return false;
- StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
- options.SetLocation(valobj_addr);
- options.SetProcessSP(process_sp);
- options.SetStream(&stream);
- options.SetPrefixToken("u");
+ std::string value;
+ StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
- if (!StringPrinter::ReadStringAndDumpToStream<
- StringPrinter::StringElementType::UTF16>(options)) {
- stream.Printf("Summary Unavailable");
- return true;
- }
+ constexpr auto ElemTraits = getElementTraits(ElemType);
+ valobj.GetValueAsCString(ElemTraits.second, value);
- return true;
-}
+ if (!value.empty())
+ stream.Printf("%s ", value.c_str());
-bool lldb_private::formatters::Char32StringSummaryProvider(
- ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- ProcessSP process_sp = valobj.GetProcessSP();
- if (!process_sp)
- return false;
+ options.SetData(std::move(data));
+ options.SetStream(&stream);
+ options.SetPrefixToken(ElemTraits.first);
+ options.SetQuote('\'');
+ options.SetSourceSize(1);
+ options.SetBinaryZeroIsTerminator(false);
- lldb::addr_t valobj_addr = GetArrayAddressOrPointerValue(valobj);
- if (valobj_addr == 0 || valobj_addr == LLDB_INVALID_ADDRESS)
- return false;
+ return StringPrinter::ReadBufferAndDumpToStream<ElemType>(options);
+}
- StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
- options.SetLocation(valobj_addr);
- options.SetProcessSP(process_sp);
- options.SetStream(&stream);
- options.SetPrefixToken("U");
+bool lldb_private::formatters::Char8StringSummaryProvider(
+ ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
+ return CharStringSummaryProvider<StringElementType::UTF8>(valobj, stream);
+}
- if (!StringPrinter::ReadStringAndDumpToStream<
- StringPrinter::StringElementType::UTF32>(options)) {
- stream.Printf("Summary Unavailable");
- return true;
- }
+bool lldb_private::formatters::Char16StringSummaryProvider(
+ ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
+ return CharStringSummaryProvider<StringElementType::UTF16>(valobj, stream);
+}
- return true;
+bool lldb_private::formatters::Char32StringSummaryProvider(
+ ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
+ return CharStringSummaryProvider<StringElementType::UTF32>(valobj, stream);
}
bool lldb_private::formatters::WCharStringSummaryProvider(
ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- ProcessSP process_sp = valobj.GetProcessSP();
- if (!process_sp)
- return false;
-
- lldb::addr_t valobj_addr = GetArrayAddressOrPointerValue(valobj);
- if (valobj_addr == 0 || valobj_addr == LLDB_INVALID_ADDRESS)
+ Address valobj_addr = GetArrayAddressOrPointerValue(valobj);
+ if (!valobj_addr.IsValid())
return false;
// Get a wchar_t basic type from the current type system
@@ -132,20 +130,20 @@ bool lldb_private::formatters::WCharStringSummaryProvider(
StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
options.SetLocation(valobj_addr);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetPrefixToken("L");
switch (wchar_size) {
case 8:
- return StringPrinter::ReadStringAndDumpToStream<
- StringPrinter::StringElementType::UTF8>(options);
+ return StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF8>(
+ options);
case 16:
- return StringPrinter::ReadStringAndDumpToStream<
- StringPrinter::StringElementType::UTF16>(options);
+ return StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF16>(
+ options);
case 32:
- return StringPrinter::ReadStringAndDumpToStream<
- StringPrinter::StringElementType::UTF32>(options);
+ return StringPrinter::ReadStringAndDumpToStream<StringElementType::UTF32>(
+ options);
default:
stream.Printf("size for wchar_t is not valid");
return true;
@@ -155,80 +153,17 @@ bool lldb_private::formatters::WCharStringSummaryProvider(
bool lldb_private::formatters::Char8SummaryProvider(
ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- DataExtractor data;
- Status error;
- valobj.GetData(data, error);
-
- if (error.Fail())
- return false;
-
- std::string value;
- valobj.GetValueAsCString(lldb::eFormatUnicode8, value);
- if (!value.empty())
- stream.Printf("%s ", value.c_str());
-
- StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
- options.SetData(data);
- options.SetStream(&stream);
- options.SetPrefixToken("u8");
- options.SetQuote('\'');
- options.SetSourceSize(1);
- options.SetBinaryZeroIsTerminator(false);
-
- return StringPrinter::ReadBufferAndDumpToStream<
- StringPrinter::StringElementType::UTF8>(options);
+ return CharSummaryProvider<StringElementType::UTF8>(valobj, stream);
}
bool lldb_private::formatters::Char16SummaryProvider(
ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- DataExtractor data;
- Status error;
- valobj.GetData(data, error);
-
- if (error.Fail())
- return false;
-
- std::string value;
- valobj.GetValueAsCString(lldb::eFormatUnicode16, value);
- if (!value.empty())
- stream.Printf("%s ", value.c_str());
-
- StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
- options.SetData(data);
- options.SetStream(&stream);
- options.SetPrefixToken("u");
- options.SetQuote('\'');
- options.SetSourceSize(1);
- options.SetBinaryZeroIsTerminator(false);
-
- return StringPrinter::ReadBufferAndDumpToStream<
- StringPrinter::StringElementType::UTF16>(options);
+ return CharSummaryProvider<StringElementType::UTF16>(valobj, stream);
}
bool lldb_private::formatters::Char32SummaryProvider(
ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
- DataExtractor data;
- Status error;
- valobj.GetData(data, error);
-
- if (error.Fail())
- return false;
-
- std::string value;
- valobj.GetValueAsCString(lldb::eFormatUnicode32, value);
- if (!value.empty())
- stream.Printf("%s ", value.c_str());
-
- StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
- options.SetData(data);
- options.SetStream(&stream);
- options.SetPrefixToken("U");
- options.SetQuote('\'');
- options.SetSourceSize(1);
- options.SetBinaryZeroIsTerminator(false);
-
- return StringPrinter::ReadBufferAndDumpToStream<
- StringPrinter::StringElementType::UTF32>(options);
+ return CharSummaryProvider<StringElementType::UTF32>(valobj, stream);
}
bool lldb_private::formatters::WCharSummaryProvider(
@@ -254,7 +189,7 @@ bool lldb_private::formatters::WCharSummaryProvider(
const uint32_t wchar_size = *size;
StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
- options.SetData(data);
+ options.SetData(std::move(data));
options.SetStream(&stream);
options.SetPrefixToken("L");
options.SetQuote('\'');
@@ -263,14 +198,14 @@ bool lldb_private::formatters::WCharSummaryProvider(
switch (wchar_size) {
case 8:
- return StringPrinter::ReadBufferAndDumpToStream<
- StringPrinter::StringElementType::UTF8>(options);
+ return StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF8>(
+ options);
case 16:
- return StringPrinter::ReadBufferAndDumpToStream<
- StringPrinter::StringElementType::UTF16>(options);
+ return StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF16>(
+ options);
case 32:
- return StringPrinter::ReadBufferAndDumpToStream<
- StringPrinter::StringElementType::UTF32>(options);
+ return StringPrinter::ReadBufferAndDumpToStream<StringElementType::UTF32>(
+ options);
default:
stream.Printf("size for wchar_t is not valid");
return true;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
index e5b868fc0fce..fc8255983436 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxBitset.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
@@ -1,4 +1,4 @@
-//===-- LibCxxBitset.cpp --------------------------------------------------===//
+//===-- GenericBitset.cpp //-----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "LibCxx.h"
+#include "LibStdcpp.h"
#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
#include "lldb/DataFormatters/FormattersHelpers.h"
#include "lldb/Target/Target.h"
@@ -16,9 +17,15 @@ using namespace lldb_private;
namespace {
-class BitsetFrontEnd : public SyntheticChildrenFrontEnd {
+/// This class can be used for handling bitsets from both libcxx and libstdcpp.
+class GenericBitsetFrontEnd : public SyntheticChildrenFrontEnd {
public:
- BitsetFrontEnd(ValueObject &valobj);
+ enum class StdLib {
+ LibCxx,
+ LibStdcpp,
+ };
+
+ GenericBitsetFrontEnd(ValueObject &valobj, StdLib stdlib);
size_t GetIndexOfChildWithName(ConstString name) override {
return formatters::ExtractIndexFromString(name.GetCString());
@@ -30,6 +37,8 @@ public:
ValueObjectSP GetChildAtIndex(size_t idx) override;
private:
+ ConstString GetDataContainerMemberName();
+
// The lifetime of a ValueObject and all its derivative ValueObjects
// (children, clones, etc.) is managed by a ClusterManager. These
// objects are only destroyed when every shared pointer to any of them
@@ -38,15 +47,16 @@ private:
// Value objects created from raw data (i.e. in a different cluster) must
// be referenced via shared pointer to keep them alive, however.
std::vector<ValueObjectSP> m_elements;
- ValueObject* m_first = nullptr;
+ ValueObject *m_first = nullptr;
CompilerType m_bool_type;
ByteOrder m_byte_order = eByteOrderInvalid;
uint8_t m_byte_size = 0;
+ StdLib m_stdlib;
};
} // namespace
-BitsetFrontEnd::BitsetFrontEnd(ValueObject &valobj)
- : SyntheticChildrenFrontEnd(valobj) {
+GenericBitsetFrontEnd::GenericBitsetFrontEnd(ValueObject &valobj, StdLib stdlib)
+ : SyntheticChildrenFrontEnd(valobj), m_stdlib(stdlib) {
m_bool_type = valobj.GetCompilerType().GetBasicTypeFromAST(eBasicTypeBool);
if (auto target_sp = m_backend.GetTargetSP()) {
m_byte_order = target_sp->GetArchitecture().GetByteOrder();
@@ -55,7 +65,16 @@ BitsetFrontEnd::BitsetFrontEnd(ValueObject &valobj)
}
}
-bool BitsetFrontEnd::Update() {
+ConstString GenericBitsetFrontEnd::GetDataContainerMemberName() {
+ switch (m_stdlib) {
+ case StdLib::LibCxx:
+ return ConstString("__first_");
+ case StdLib::LibStdcpp:
+ return ConstString("_M_w");
+ }
+}
+
+bool GenericBitsetFrontEnd::Update() {
m_elements.clear();
m_first = nullptr;
@@ -65,16 +84,17 @@ bool BitsetFrontEnd::Update() {
size_t capping_size = target_sp->GetMaximumNumberOfChildrenToDisplay();
size_t size = 0;
+
if (auto arg = m_backend.GetCompilerType().GetIntegralTemplateArgument(0))
size = arg->value.getLimitedValue(capping_size);
m_elements.assign(size, ValueObjectSP());
-
- m_first = m_backend.GetChildMemberWithName(ConstString("__first_"), true).get();
+ m_first = m_backend.GetChildMemberWithName(GetDataContainerMemberName(), true)
+ .get();
return false;
}
-ValueObjectSP BitsetFrontEnd::GetChildAtIndex(size_t idx) {
+ValueObjectSP GenericBitsetFrontEnd::GetChildAtIndex(size_t idx) {
if (idx >= m_elements.size() || !m_first)
return ValueObjectSP();
@@ -112,9 +132,18 @@ ValueObjectSP BitsetFrontEnd::GetChildAtIndex(size_t idx) {
return m_elements[idx];
}
+SyntheticChildrenFrontEnd *formatters::LibStdcppBitsetSyntheticFrontEndCreator(
+ CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) {
+ if (valobj_sp)
+ return new GenericBitsetFrontEnd(*valobj_sp,
+ GenericBitsetFrontEnd::StdLib::LibStdcpp);
+ return nullptr;
+}
+
SyntheticChildrenFrontEnd *formatters::LibcxxBitsetSyntheticFrontEndCreator(
CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) {
if (valobj_sp)
- return new BitsetFrontEnd(*valobj_sp);
+ return new GenericBitsetFrontEnd(*valobj_sp,
+ GenericBitsetFrontEnd::StdLib::LibCxx);
return nullptr;
}
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
index 8eda422f3145..b9aef0ae7d9e 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -686,7 +686,7 @@ bool lldb_private::formatters::LibcxxWStringSummaryProvider(
if (!wchar_t_size)
return false;
- options.SetData(extractor);
+ options.SetData(std::move(extractor));
options.SetStream(&stream);
options.SetPrefixToken("L");
options.SetQuote('"');
@@ -743,12 +743,14 @@ bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream,
}
}
- DataExtractor extractor;
- const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
- if (bytes_read < size)
- return false;
+ {
+ DataExtractor extractor;
+ const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+ if (bytes_read < size)
+ return false;
- options.SetData(extractor);
+ options.SetData(std::move(extractor));
+ }
options.SetStream(&stream);
if (prefix_token.empty())
options.SetPrefixToken(nullptr);
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
index b4af67ecee0d..9e248d162cd2 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
@@ -250,7 +250,7 @@ bool lldb_private::formatters::LibStdcppStringSummaryProvider(
addr_of_data == LLDB_INVALID_ADDRESS)
return false;
options.SetLocation(addr_of_data);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetNeedsZeroTermination(false);
options.SetBinaryZeroIsTerminator(true);
@@ -311,7 +311,7 @@ bool lldb_private::formatters::LibStdcppWStringSummaryProvider(
addr_of_data == LLDB_INVALID_ADDRESS)
return false;
options.SetLocation(addr_of_data);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetNeedsZeroTermination(false);
options.SetBinaryZeroIsTerminator(false);
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h
index 9e41aa0ffc01..b6f9c469fedd 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h
@@ -42,6 +42,10 @@ LibStdcppTupleSyntheticFrontEndCreator(CXXSyntheticChildren *,
lldb::ValueObjectSP);
SyntheticChildrenFrontEnd *
+LibStdcppBitsetSyntheticFrontEndCreator(CXXSyntheticChildren *,
+ lldb::ValueObjectSP);
+
+SyntheticChildrenFrontEnd *
LibStdcppVectorIteratorSyntheticFrontEndCreator(CXXSyntheticChildren *,
lldb::ValueObjectSP);
diff --git a/lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.cpp b/lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.cpp
index b24bcc1344e2..8ecf6712eace 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.cpp
@@ -72,7 +72,7 @@ MSVCUndecoratedNameParser::MSVCUndecoratedNameParser(llvm::StringRef name) {
}
bool MSVCUndecoratedNameParser::IsMSVCUndecoratedName(llvm::StringRef name) {
- return name.find('`') != llvm::StringRef::npos;
+ return name.contains('`');
}
bool MSVCUndecoratedNameParser::ExtractContextAndIdentifier(
diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
index 1479f4f0c151..0d7cd6791b04 100644
--- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
+++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
@@ -7,7 +7,10 @@
//===----------------------------------------------------------------------===//
#include "Cocoa.h"
+#include "NSString.h"
+#include "ObjCConstants.h"
+#include "Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h"
#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
#include "lldb/Core/Mangled.h"
#include "lldb/Core/ValueObject.h"
@@ -28,9 +31,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/bit.h"
-#include "Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h"
-
-#include "NSString.h"
using namespace lldb;
using namespace lldb_private;
@@ -456,6 +456,72 @@ bool lldb_private::formatters::NSNumberSummaryProvider(
if (class_name == "NSDecimalNumber")
return NSDecimalNumberSummaryProvider(valobj, stream, options);
+ if (class_name == "NSConstantIntegerNumber") {
+ Status error;
+ int64_t value = process_sp->ReadSignedIntegerFromMemory(
+ valobj_addr + 2 * ptr_size, 8, 0, error);
+ if (error.Fail())
+ return false;
+ uint64_t encoding_addr = process_sp->ReadUnsignedIntegerFromMemory(
+ valobj_addr + ptr_size, ptr_size, 0, error);
+ if (error.Fail())
+ return false;
+ char encoding =
+ process_sp->ReadUnsignedIntegerFromMemory(encoding_addr, 1, 0, error);
+ if (error.Fail())
+ return false;
+
+ switch (encoding) {
+ case _C_CHR:
+ NSNumber_FormatChar(valobj, stream, (char)value, options.GetLanguage());
+ return true;
+ case _C_SHT:
+ NSNumber_FormatShort(valobj, stream, (short)value, options.GetLanguage());
+ return true;
+ case _C_INT:
+ NSNumber_FormatInt(valobj, stream, (int)value, options.GetLanguage());
+ return true;
+ case _C_LNG:
+ case _C_LNG_LNG:
+ NSNumber_FormatLong(valobj, stream, value, options.GetLanguage());
+ return true;
+
+ case _C_UCHR:
+ case _C_USHT:
+ case _C_UINT:
+ case _C_ULNG:
+ case _C_ULNG_LNG:
+ stream.Printf("%" PRIu64, value);
+ return true;
+ }
+
+ return false;
+ }
+
+ if (class_name == "NSConstantFloatNumber") {
+ Status error;
+ uint32_t flt_as_int = process_sp->ReadUnsignedIntegerFromMemory(
+ valobj_addr + ptr_size, 4, 0, error);
+ if (error.Fail())
+ return false;
+ float flt_value = 0.0f;
+ memcpy(&flt_value, &flt_as_int, sizeof(flt_as_int));
+ NSNumber_FormatFloat(valobj, stream, flt_value, options.GetLanguage());
+ return true;
+ }
+
+ if (class_name == "NSConstantDoubleNumber") {
+ Status error;
+ uint64_t dbl_as_lng = process_sp->ReadUnsignedIntegerFromMemory(
+ valobj_addr + ptr_size, 8, 0, error);
+ if (error.Fail())
+ return false;
+ double dbl_value = 0.0;
+ memcpy(&dbl_value, &dbl_as_lng, sizeof(dbl_as_lng));
+ NSNumber_FormatDouble(valobj, stream, dbl_value, options.GetLanguage());
+ return true;
+ }
+
if (class_name == "NSNumber" || class_name == "__NSCFNumber") {
int64_t value = 0;
uint64_t i_bits = 0;
diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
index b0398dd19c02..f18b59fb11ff 100644
--- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
@@ -280,6 +280,22 @@ namespace Foundation1436 {
}
}
+namespace ConstantArray {
+
+struct ConstantArray32 {
+ uint64_t used;
+ uint32_t list;
+};
+
+struct ConstantArray64 {
+ uint64_t used;
+ uint64_t list;
+};
+
+using NSConstantArraySyntheticFrontEnd =
+ GenericNSArrayISyntheticFrontEnd<ConstantArray32, ConstantArray64, false>;
+} // namespace ConstantArray
+
class NSArray0SyntheticFrontEnd : public SyntheticChildrenFrontEnd {
public:
NSArray0SyntheticFrontEnd(lldb::ValueObjectSP valobj_sp);
@@ -356,6 +372,7 @@ bool lldb_private::formatters::NSArraySummaryProvider(
static const ConstString g_NSArrayMLegacy("__NSArrayM_Legacy");
static const ConstString g_NSArrayMImmutable("__NSArrayM_Immutable");
static const ConstString g_NSCallStackArray("_NSCallStackArray");
+ static const ConstString g_NSConstantArray("NSConstantArray");
if (class_name.IsEmpty())
return false;
@@ -366,6 +383,12 @@ bool lldb_private::formatters::NSArraySummaryProvider(
ptr_size, 0, error);
if (error.Fail())
return false;
+ } else if (class_name == g_NSConstantArray) {
+ Status error;
+ value = process_sp->ReadUnsignedIntegerFromMemory(valobj_addr + ptr_size, 8,
+ 0, error);
+ if (error.Fail())
+ return false;
} else if (class_name == g_NSArrayM) {
AppleObjCRuntime *apple_runtime =
llvm::dyn_cast_or_null<AppleObjCRuntime>(runtime);
@@ -803,6 +826,7 @@ lldb_private::formatters::NSArraySyntheticFrontEndCreator(
ConstString class_name(descriptor->GetClassName());
static const ConstString g_NSArrayI("__NSArrayI");
+ static const ConstString g_NSConstantArray("NSConstantArray");
static const ConstString g_NSArrayI_Transfer("__NSArrayI_Transfer");
static const ConstString g_NSFrozenArrayM("__NSFrozenArrayM");
static const ConstString g_NSArrayM("__NSArrayM");
@@ -823,6 +847,8 @@ lldb_private::formatters::NSArraySyntheticFrontEndCreator(
return (new Foundation1300::NSArrayISyntheticFrontEnd(valobj_sp));
} else if (class_name == g_NSArrayI_Transfer) {
return (new Foundation1436::NSArrayI_TransferSyntheticFrontEnd(valobj_sp));
+ } else if (class_name == g_NSConstantArray) {
+ return new ConstantArray::NSConstantArraySyntheticFrontEnd(valobj_sp);
} else if (class_name == g_NSFrozenArrayM) {
return (new Foundation1436::NSFrozenArrayMSyntheticFrontEnd(valobj_sp));
} else if (class_name == g_NSArray0) {
diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
index 326f47a10660..73c40ba959a6 100644
--- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
@@ -142,6 +142,38 @@ private:
std::vector<DictionaryItemDescriptor> m_children;
};
+class NSConstantDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd {
+public:
+ NSConstantDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp);
+
+ size_t CalculateNumChildren() override;
+
+ lldb::ValueObjectSP GetChildAtIndex(size_t idx) override;
+
+ bool Update() override;
+
+ bool MightHaveChildren() override;
+
+ size_t GetIndexOfChildWithName(ConstString name) override;
+
+private:
+ ExecutionContextRef m_exe_ctx_ref;
+ CompilerType m_pair_type;
+ uint8_t m_ptr_size = 8;
+ lldb::ByteOrder m_order = lldb::eByteOrderInvalid;
+ unsigned int m_size = 0;
+ lldb::addr_t m_keys_ptr = LLDB_INVALID_ADDRESS;
+ lldb::addr_t m_objects_ptr = LLDB_INVALID_ADDRESS;
+
+ struct DictionaryItemDescriptor {
+ lldb::addr_t key_ptr;
+ lldb::addr_t val_ptr;
+ lldb::ValueObjectSP valobj_sp;
+ };
+
+ std::vector<DictionaryItemDescriptor> m_children;
+};
+
class NSCFDictionarySyntheticFrontEnd : public SyntheticChildrenFrontEnd {
public:
NSCFDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp);
@@ -302,7 +334,6 @@ namespace Foundation1428 {
}
namespace Foundation1437 {
- namespace {
static const uint64_t NSDictionaryCapacities[] = {
0, 3, 7, 13, 23, 41, 71, 127, 191, 251, 383, 631, 1087, 1723,
2803, 4523, 7351, 11959, 19447, 31231, 50683, 81919, 132607,
@@ -313,7 +344,8 @@ namespace Foundation1437 {
static const size_t NSDictionaryNumSizeBuckets =
sizeof(NSDictionaryCapacities) / sizeof(uint64_t);
-
+
+ namespace {
struct DataDescriptor_32 {
uint32_t _buffer;
uint32_t _muts;
@@ -339,8 +371,8 @@ namespace Foundation1437 {
0 : NSDictionaryCapacities[_szidx];
}
};
- }
-
+ } // namespace
+
using NSDictionaryMSyntheticFrontEnd =
GenericNSDictionaryMSyntheticFrontEnd<DataDescriptor_32, DataDescriptor_64>;
@@ -416,6 +448,7 @@ bool lldb_private::formatters::NSDictionarySummaryProvider(
static const ConstString g_DictionaryCF("__CFDictionary");
static const ConstString g_DictionaryNSCF("__NSCFDictionary");
static const ConstString g_DictionaryCFRef("CFDictionaryRef");
+ static const ConstString g_ConstantDictionary("NSConstantDictionary");
if (class_name.IsEmpty())
return false;
@@ -428,8 +461,14 @@ bool lldb_private::formatters::NSDictionarySummaryProvider(
return false;
value &= (is_64bit ? ~0xFC00000000000000UL : ~0xFC000000U);
- } else if (class_name == g_DictionaryM || class_name == g_DictionaryMLegacy
- || class_name == g_DictionaryMFrozen) {
+ } else if (class_name == g_ConstantDictionary) {
+ Status error;
+ value = process_sp->ReadUnsignedIntegerFromMemory(
+ valobj_addr + 2 * ptr_size, ptr_size, 0, error);
+ if (error.Fail())
+ return false;
+ } else if (class_name == g_DictionaryM || class_name == g_DictionaryMLegacy ||
+ class_name == g_DictionaryMFrozen) {
AppleObjCRuntime *apple_runtime =
llvm::dyn_cast_or_null<AppleObjCRuntime>(runtime);
Status error;
@@ -447,8 +486,7 @@ bool lldb_private::formatters::NSDictionarySummaryProvider(
value = 1;
} else if (class_name == g_Dictionary0) {
value = 0;
- } else if (class_name == g_DictionaryCF ||
- class_name == g_DictionaryNSCF ||
+ } else if (class_name == g_DictionaryCF || class_name == g_DictionaryNSCF ||
class_name == g_DictionaryCFRef) {
ExecutionContext exe_ctx(process_sp);
CFBasicHash cfbh;
@@ -517,12 +555,15 @@ lldb_private::formatters::NSDictionarySyntheticFrontEndCreator(
static const ConstString g_DictionaryCF("__CFDictionary");
static const ConstString g_DictionaryNSCF("__NSCFDictionary");
static const ConstString g_DictionaryCFRef("CFDictionaryRef");
+ static const ConstString g_ConstantDictionary("NSConstantDictionary");
if (class_name.IsEmpty())
return nullptr;
if (class_name == g_DictionaryI) {
return (new NSDictionaryISyntheticFrontEnd(valobj_sp));
+ } else if (class_name == g_ConstantDictionary) {
+ return (new NSConstantDictionarySyntheticFrontEnd(valobj_sp));
} else if (class_name == g_DictionaryM || class_name == g_DictionaryMFrozen) {
if (runtime->GetFoundationVersion() >= 1437) {
return (new Foundation1437::NSDictionaryMSyntheticFrontEnd(valobj_sp));
@@ -532,11 +573,10 @@ lldb_private::formatters::NSDictionarySyntheticFrontEndCreator(
return (new Foundation1100::NSDictionaryMSyntheticFrontEnd(valobj_sp));
}
} else if (class_name == g_DictionaryMLegacy) {
- return (new Foundation1100::NSDictionaryMSyntheticFrontEnd(valobj_sp));
+ return (new Foundation1100::NSDictionaryMSyntheticFrontEnd(valobj_sp));
} else if (class_name == g_Dictionary1) {
return (new NSDictionary1SyntheticFrontEnd(valobj_sp));
- } else if (class_name == g_DictionaryCF ||
- class_name == g_DictionaryNSCF ||
+ } else if (class_name == g_DictionaryCF || class_name == g_DictionaryNSCF ||
class_name == g_DictionaryCFRef) {
return (new NSCFDictionarySyntheticFrontEnd(valobj_sp));
} else {
@@ -830,6 +870,120 @@ lldb_private::formatters::NSCFDictionarySyntheticFrontEnd::GetChildAtIndex(
return dict_item.valobj_sp;
}
+lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::
+ NSConstantDictionarySyntheticFrontEnd(lldb::ValueObjectSP valobj_sp)
+ : SyntheticChildrenFrontEnd(*valobj_sp) {}
+
+size_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::
+ GetIndexOfChildWithName(ConstString name) {
+ const char *item_name = name.GetCString();
+ uint32_t idx = ExtractIndexFromString(item_name);
+ if (idx < UINT32_MAX && idx >= CalculateNumChildren())
+ return UINT32_MAX;
+ return idx;
+}
+
+size_t lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::
+ CalculateNumChildren() {
+ return m_size;
+}
+
+bool lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::Update() {
+ ValueObjectSP valobj_sp = m_backend.GetSP();
+ if (!valobj_sp)
+ return false;
+ m_exe_ctx_ref = valobj_sp->GetExecutionContextRef();
+ Status error;
+ error.Clear();
+ lldb::ProcessSP process_sp(valobj_sp->GetProcessSP());
+ if (!process_sp)
+ return false;
+ m_ptr_size = process_sp->GetAddressByteSize();
+ m_order = process_sp->GetByteOrder();
+ uint64_t valobj_addr = valobj_sp->GetValueAsUnsigned(0);
+ m_size = process_sp->ReadUnsignedIntegerFromMemory(
+ valobj_addr + 2 * m_ptr_size, m_ptr_size, 0, error);
+ if (error.Fail())
+ return false;
+ m_keys_ptr =
+ process_sp->ReadPointerFromMemory(valobj_addr + 3 * m_ptr_size, error);
+ if (error.Fail())
+ return false;
+ m_objects_ptr =
+ process_sp->ReadPointerFromMemory(valobj_addr + 4 * m_ptr_size, error);
+ return !error.Fail();
+}
+
+bool lldb_private::formatters::NSConstantDictionarySyntheticFrontEnd::
+ MightHaveChildren() {
+ return true;
+}
+
+lldb::ValueObjectSP lldb_private::formatters::
+ NSConstantDictionarySyntheticFrontEnd::GetChildAtIndex(size_t idx) {
+ uint32_t num_children = CalculateNumChildren();
+
+ if (idx >= num_children)
+ return lldb::ValueObjectSP();
+
+ if (m_children.empty()) {
+ // do the scan phase
+ lldb::addr_t key_at_idx = 0, val_at_idx = 0;
+ ProcessSP process_sp = m_exe_ctx_ref.GetProcessSP();
+ if (!process_sp)
+ return lldb::ValueObjectSP();
+
+ for (unsigned int child = 0; child < num_children; ++child) {
+ Status error;
+ key_at_idx = process_sp->ReadPointerFromMemory(
+ m_keys_ptr + child * m_ptr_size, error);
+ if (error.Fail())
+ return lldb::ValueObjectSP();
+ val_at_idx = process_sp->ReadPointerFromMemory(
+ m_objects_ptr + child * m_ptr_size, error);
+ if (error.Fail())
+ return lldb::ValueObjectSP();
+ DictionaryItemDescriptor descriptor = {key_at_idx, val_at_idx,
+ lldb::ValueObjectSP()};
+ m_children.push_back(descriptor);
+ }
+ }
+
+ if (idx >= m_children.size()) // should never happen
+ return lldb::ValueObjectSP();
+
+ DictionaryItemDescriptor &dict_item = m_children[idx];
+ if (!dict_item.valobj_sp) {
+ if (!m_pair_type.IsValid()) {
+ TargetSP target_sp(m_backend.GetTargetSP());
+ if (!target_sp)
+ return ValueObjectSP();
+ m_pair_type = GetLLDBNSPairType(target_sp);
+ }
+ if (!m_pair_type.IsValid())
+ return ValueObjectSP();
+
+ DataBufferSP buffer_sp(new DataBufferHeap(2 * m_ptr_size, 0));
+
+ if (m_ptr_size == 8) {
+ uint64_t *data_ptr = (uint64_t *)buffer_sp->GetBytes();
+ *data_ptr = dict_item.key_ptr;
+ *(data_ptr + 1) = dict_item.val_ptr;
+ } else {
+ uint32_t *data_ptr = (uint32_t *)buffer_sp->GetBytes();
+ *data_ptr = dict_item.key_ptr;
+ *(data_ptr + 1) = dict_item.val_ptr;
+ }
+
+ StreamString idx_name;
+ idx_name.Printf("[%" PRIu64 "]", (uint64_t)idx);
+ DataExtractor data(buffer_sp, m_order, m_ptr_size);
+ dict_item.valobj_sp = CreateValueObjectFromData(idx_name.GetString(), data,
+ m_exe_ctx_ref, m_pair_type);
+ }
+ return dict_item.valobj_sp;
+}
+
lldb_private::formatters::NSDictionary1SyntheticFrontEnd::
NSDictionary1SyntheticFrontEnd(lldb::ValueObjectSP valobj_sp)
: SyntheticChildrenFrontEnd(*valobj_sp.get()), m_pair(nullptr) {}
diff --git a/lldb/source/Plugins/Language/ObjC/NSString.cpp b/lldb/source/Plugins/Language/ObjC/NSString.cpp
index 85922992eb2b..2b5161e781f2 100644
--- a/lldb/source/Plugins/Language/ObjC/NSString.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSString.cpp
@@ -166,7 +166,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
return false;
if (has_explicit_length && is_unicode) {
options.SetLocation(location);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetQuote('"');
options.SetSourceSize(explicit_length);
@@ -179,7 +179,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
StringPrinter::StringElementType::UTF16>(options);
} else {
options.SetLocation(location + 1);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetSourceSize(explicit_length);
options.SetHasSourceSize(has_explicit_length);
@@ -195,7 +195,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
uint64_t location = 3 * ptr_size + valobj_addr;
options.SetLocation(location);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetQuote('"');
options.SetSourceSize(explicit_length);
@@ -217,7 +217,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
return false;
}
options.SetLocation(location);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetQuote('"');
options.SetSourceSize(explicit_length);
@@ -237,7 +237,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
lldb::addr_t location = valobj.GetValueAsUnsigned(0) + ptr_size + 4;
options.SetLocation(location);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetQuote('"');
options.SetSourceSize(explicit_length);
@@ -260,7 +260,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
location++;
}
options.SetLocation(location);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetSourceSize(explicit_length);
options.SetHasSourceSize(has_explicit_length);
@@ -283,7 +283,7 @@ bool lldb_private::formatters::NSStringSummaryProvider(
explicit_length++; // account for the fact that there is no NULL and we
// need to have one added
options.SetLocation(location);
- options.SetProcessSP(process_sp);
+ options.SetTargetSP(valobj.GetTargetSP());
options.SetStream(&stream);
options.SetSourceSize(explicit_length);
options.SetHasSourceSize(has_explicit_length);
diff --git a/lldb/source/Plugins/Language/ObjC/ObjCConstants.h b/lldb/source/Plugins/Language/ObjC/ObjCConstants.h
new file mode 100644
index 000000000000..c7c498d4cab3
--- /dev/null
+++ b/lldb/source/Plugins/Language/ObjC/ObjCConstants.h
@@ -0,0 +1,44 @@
+//===-- ObjCConstants.h------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_OBJC_OBJCCONSTANTS_H
+#define LLDB_SOURCE_PLUGINS_LANGUAGE_OBJC_OBJCCONSTANTS_H
+
+// Objective-C Type Encoding
+#define _C_ID '@'
+#define _C_CLASS '#'
+#define _C_SEL ':'
+#define _C_CHR 'c'
+#define _C_UCHR 'C'
+#define _C_SHT 's'
+#define _C_USHT 'S'
+#define _C_INT 'i'
+#define _C_UINT 'I'
+#define _C_LNG 'l'
+#define _C_ULNG 'L'
+#define _C_LNG_LNG 'q'
+#define _C_ULNG_LNG 'Q'
+#define _C_FLT 'f'
+#define _C_DBL 'd'
+#define _C_BFLD 'b'
+#define _C_BOOL 'B'
+#define _C_VOID 'v'
+#define _C_UNDEF '?'
+#define _C_PTR '^'
+#define _C_CHARPTR '*'
+#define _C_ATOM '%'
+#define _C_ARY_B '['
+#define _C_ARY_E ']'
+#define _C_UNION_B '('
+#define _C_UNION_E ')'
+#define _C_STRUCT_B '{'
+#define _C_STRUCT_E '}'
+#define _C_VECTOR '!'
+#define _C_CONST 'r'
+
+#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_OBJC_OBJCCONSTANTS_H
diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp
index 379c53432b7b..99ce389bb53e 100644
--- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp
+++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp
@@ -48,19 +48,6 @@ void ObjCLanguage::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ObjCLanguage::GetPluginNameStatic() {
- static ConstString g_name("objc");
- return g_name;
-}
-
-// PluginInterface protocol
-
-lldb_private::ConstString ObjCLanguage::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ObjCLanguage::GetPluginVersion() { return 1; }
-
// Static Functions
Language *ObjCLanguage::CreateInstance(lldb::LanguageType language) {
@@ -405,6 +392,9 @@ static void LoadObjCFormatters(TypeCategoryImplSP objc_category_sp) {
"NSArray summary provider", ConstString("NSArray"), appkit_flags);
AddCXXSummary(
objc_category_sp, lldb_private::formatters::NSArraySummaryProvider,
+ "NSArray summary provider", ConstString("NSConstantArray"), appkit_flags);
+ AddCXXSummary(
+ objc_category_sp, lldb_private::formatters::NSArraySummaryProvider,
"NSArray summary provider", ConstString("NSMutableArray"), appkit_flags);
AddCXXSummary(
objc_category_sp, lldb_private::formatters::NSArraySummaryProvider,
@@ -440,6 +430,10 @@ static void LoadObjCFormatters(TypeCategoryImplSP objc_category_sp) {
AddCXXSummary(objc_category_sp,
lldb_private::formatters::NSDictionarySummaryProvider<false>,
"NSDictionary summary provider",
+ ConstString("NSConstantDictionary"), appkit_flags);
+ AddCXXSummary(objc_category_sp,
+ lldb_private::formatters::NSDictionarySummaryProvider<false>,
+ "NSDictionary summary provider",
ConstString("NSMutableDictionary"), appkit_flags);
AddCXXSummary(objc_category_sp,
lldb_private::formatters::NSDictionarySummaryProvider<false>,
@@ -545,6 +539,10 @@ static void LoadObjCFormatters(TypeCategoryImplSP objc_category_sp) {
ScriptedSyntheticChildren::Flags());
AddCXXSynthetic(objc_category_sp,
lldb_private::formatters::NSArraySyntheticFrontEndCreator,
+ "NSArray synthetic children", ConstString("NSConstantArray"),
+ ScriptedSyntheticChildren::Flags());
+ AddCXXSynthetic(objc_category_sp,
+ lldb_private::formatters::NSArraySyntheticFrontEndCreator,
"NSArray synthetic children", ConstString("NSMutableArray"),
ScriptedSyntheticChildren::Flags());
AddCXXSynthetic(objc_category_sp,
@@ -573,6 +571,11 @@ static void LoadObjCFormatters(TypeCategoryImplSP objc_category_sp) {
AddCXXSynthetic(
objc_category_sp,
lldb_private::formatters::NSDictionarySyntheticFrontEndCreator,
+ "NSDictionary synthetic children", ConstString("NSConstantDictionary"),
+ ScriptedSyntheticChildren::Flags());
+ AddCXXSynthetic(
+ objc_category_sp,
+ lldb_private::formatters::NSDictionarySyntheticFrontEndCreator,
"NSDictionary synthetic children", ConstString("__NSDictionaryI"),
ScriptedSyntheticChildren::Flags());
AddCXXSynthetic(
@@ -791,6 +794,18 @@ static void LoadObjCFormatters(TypeCategoryImplSP objc_category_sp) {
AddCXXSummary(
objc_category_sp, lldb_private::formatters::NSNumberSummaryProvider,
"NSNumber summary provider", ConstString("NSNumber"), appkit_flags);
+ AddCXXSummary(objc_category_sp,
+ lldb_private::formatters::NSNumberSummaryProvider,
+ "NSNumber summary provider",
+ ConstString("NSConstantIntegerNumber"), appkit_flags);
+ AddCXXSummary(objc_category_sp,
+ lldb_private::formatters::NSNumberSummaryProvider,
+ "NSNumber summary provider",
+ ConstString("NSConstantDoubleNumber"), appkit_flags);
+ AddCXXSummary(objc_category_sp,
+ lldb_private::formatters::NSNumberSummaryProvider,
+ "NSNumber summary provider",
+ ConstString("NSConstantFloatNumber"), appkit_flags);
AddCXXSummary(
objc_category_sp, lldb_private::formatters::NSNumberSummaryProvider,
"CFNumberRef summary provider", ConstString("CFNumberRef"), appkit_flags);
@@ -906,7 +921,8 @@ lldb::TypeCategoryImplSP ObjCLanguage::GetFormatters() {
static TypeCategoryImplSP g_category;
llvm::call_once(g_initialize, [this]() -> void {
- DataVisualization::Categories::GetCategory(GetPluginName(), g_category);
+ DataVisualization::Categories::GetCategory(ConstString(GetPluginName()),
+ g_category);
if (g_category) {
LoadCoreMediaFormatters(g_category);
LoadObjCFormatters(g_category);
diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h
index 691c51883c8a..914452086db7 100644
--- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h
+++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h
@@ -133,7 +133,7 @@ public:
static lldb_private::Language *CreateInstance(lldb::LanguageType language);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "objc"; }
static bool IsPossibleObjCMethodName(const char *name) {
if (!name)
@@ -156,9 +156,7 @@ public:
}
// PluginInterface protocol
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
};
} // namespace lldb_private
diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.cpp
index 359978553210..700dda3d33bb 100644
--- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.cpp
@@ -34,18 +34,6 @@ void ObjCPlusPlusLanguage::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ObjCPlusPlusLanguage::GetPluginNameStatic() {
- static ConstString g_name("objcplusplus");
- return g_name;
-}
-
-// PluginInterface protocol
-lldb_private::ConstString ObjCPlusPlusLanguage::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ObjCPlusPlusLanguage::GetPluginVersion() { return 1; }
-
// Static Functions
Language *ObjCPlusPlusLanguage::CreateInstance(lldb::LanguageType language) {
switch (language) {
diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h
index 233fd5c00a7a..20184fd709d5 100644
--- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h
+++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h
@@ -40,12 +40,10 @@ public:
static lldb_private::Language *CreateInstance(lldb::LanguageType language);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "objcplusplus"; }
// PluginInterface protocol
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
};
} // namespace lldb_private
diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
index f5b587c51960..92f732fe6827 100644
--- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
@@ -405,18 +405,6 @@ void ItaniumABILanguageRuntime::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ItaniumABILanguageRuntime::GetPluginNameStatic() {
- static ConstString g_name("itanium");
- return g_name;
-}
-
-// PluginInterface protocol
-lldb_private::ConstString ItaniumABILanguageRuntime::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ItaniumABILanguageRuntime::GetPluginVersion() { return 1; }
-
BreakpointResolverSP ItaniumABILanguageRuntime::CreateExceptionResolver(
const BreakpointSP &bkpt, bool catch_bp, bool throw_bp) {
return CreateExceptionResolver(bkpt, catch_bp, throw_bp, false);
diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h
index d591527d9257..ca8d5ab1a93a 100644
--- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h
+++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.h
@@ -35,7 +35,7 @@ public:
static lldb_private::LanguageRuntime *
CreateInstance(Process *process, lldb::LanguageType language);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "itanium"; }
static char ID;
@@ -76,9 +76,7 @@ public:
lldb::ThreadSP thread_sp) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::BreakpointResolverSP
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
index 405b8a6f16b7..162ccad3cdcd 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
@@ -9,6 +9,7 @@
#include "AppleObjCClassDescriptorV2.h"
#include "lldb/Expression/FunctionCaller.h"
+#include "lldb/Target/ABI.h"
#include "lldb/Utility/Log.h"
using namespace lldb;
@@ -73,6 +74,10 @@ bool ClassDescriptorV2::objc_class_t::Read(Process *process,
m_flags = (uint8_t)(data_NEVER_USE & (lldb::addr_t)3);
m_data_ptr = data_NEVER_USE & GetClassDataMask(process);
+ if (ABISP abi_sp = process->GetABI()) {
+ m_isa = abi_sp->FixCodeAddress(m_isa);
+ m_superclass = abi_sp->FixCodeAddress(m_superclass);
+ }
return true;
}
@@ -105,6 +110,8 @@ bool ClassDescriptorV2::class_rw_t::Read(Process *process, lldb::addr_t addr) {
m_flags = extractor.GetU32_unchecked(&cursor);
m_version = extractor.GetU32_unchecked(&cursor);
m_ro_ptr = extractor.GetAddress_unchecked(&cursor);
+ if (ABISP abi_sp = process->GetABI())
+ m_ro_ptr = abi_sp->FixCodeAddress(m_ro_ptr);
m_method_list_ptr = extractor.GetAddress_unchecked(&cursor);
m_properties_ptr = extractor.GetAddress_unchecked(&cursor);
m_firstSubclass = extractor.GetAddress_unchecked(&cursor);
@@ -120,6 +127,8 @@ bool ClassDescriptorV2::class_rw_t::Read(Process *process, lldb::addr_t addr) {
process->GetByteOrder(),
process->GetAddressByteSize());
m_ro_ptr = extractor.GetAddress_unchecked(&cursor);
+ if (ABISP abi_sp = process->GetABI())
+ m_ro_ptr = abi_sp->FixCodeAddress(m_ro_ptr);
}
return true;
@@ -231,6 +240,8 @@ bool ClassDescriptorV2::method_list_t::Read(Process *process,
DataBufferHeap buffer(size, '\0');
Status error;
+ if (ABISP abi_sp = process->GetABI())
+ addr = abi_sp->FixCodeAddress(addr);
process->ReadMemory(addr, buffer.GetBytes(), size, error);
if (error.Fail()) {
return false;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp
index 98d0e9cf991b..609e8e9bc1b6 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp
@@ -92,18 +92,6 @@ void AppleObjCRuntimeV1::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString AppleObjCRuntimeV1::GetPluginNameStatic() {
- static ConstString g_name("apple-objc-v1");
- return g_name;
-}
-
-// PluginInterface protocol
-ConstString AppleObjCRuntimeV1::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t AppleObjCRuntimeV1::GetPluginVersion() { return 1; }
-
BreakpointResolverSP
AppleObjCRuntimeV1::CreateExceptionResolver(const BreakpointSP &bkpt,
bool catch_bp, bool throw_bp) {
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h
index 12ee2cc53639..46d8e89c906e 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h
@@ -28,7 +28,7 @@ public:
static lldb_private::LanguageRuntime *
CreateInstance(Process *process, lldb::LanguageType language);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "apple-objc-v1"; }
static char ID;
@@ -107,9 +107,7 @@ public:
CreateObjectChecker(std::string, ExecutionContext &exe_ctx) override;
// PluginInterface protocol
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
ObjCRuntimeVersions GetRuntimeVersion() const override {
return ObjCRuntimeVersions::eAppleObjC_V1;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
index 10512a97ad69..bd6b6335ca8c 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
@@ -292,6 +292,7 @@ struct objc_clsopt_v16_t {
uint32_t occupied;
uint32_t shift;
uint32_t mask;
+ uint32_t zero;
uint64_t salt;
uint32_t scramble[256];
uint8_t tab[0]; // tab[mask+1]
@@ -950,50 +951,70 @@ protected:
Process *process = m_exe_ctx.GetProcessPtr();
ExecutionContext exe_ctx(process);
+
ObjCLanguageRuntime *objc_runtime = ObjCLanguageRuntime::Get(*process);
- if (objc_runtime) {
- ObjCLanguageRuntime::TaggedPointerVendor *tagged_ptr_vendor =
- objc_runtime->GetTaggedPointerVendor();
- if (tagged_ptr_vendor) {
- for (size_t i = 0; i < command.GetArgumentCount(); i++) {
- const char *arg_str = command.GetArgumentAtIndex(i);
- if (!arg_str)
- continue;
- Status error;
- lldb::addr_t arg_addr = OptionArgParser::ToAddress(
- &exe_ctx, arg_str, LLDB_INVALID_ADDRESS, &error);
- if (arg_addr == 0 || arg_addr == LLDB_INVALID_ADDRESS || error.Fail())
- continue;
- auto descriptor_sp = tagged_ptr_vendor->GetClassDescriptor(arg_addr);
- if (!descriptor_sp)
- continue;
- uint64_t info_bits = 0;
- uint64_t value_bits = 0;
- uint64_t payload = 0;
- if (descriptor_sp->GetTaggedPointerInfo(&info_bits, &value_bits,
- &payload)) {
- result.GetOutputStream().Printf(
- "0x%" PRIx64 " is tagged.\n\tpayload = 0x%" PRIx64
- "\n\tvalue = 0x%" PRIx64 "\n\tinfo bits = 0x%" PRIx64
- "\n\tclass = %s\n",
- (uint64_t)arg_addr, payload, value_bits, info_bits,
- descriptor_sp->GetClassName().AsCString("<unknown>"));
- } else {
- result.GetOutputStream().Printf("0x%" PRIx64 " is not tagged.\n",
- (uint64_t)arg_addr);
- }
- }
- } else {
- result.AppendError("current process has no tagged pointer support");
+ if (!objc_runtime) {
+ result.AppendError("current process has no Objective-C runtime loaded");
+ result.SetStatus(lldb::eReturnStatusFailed);
+ return false;
+ }
+
+ ObjCLanguageRuntime::TaggedPointerVendor *tagged_ptr_vendor =
+ objc_runtime->GetTaggedPointerVendor();
+ if (!tagged_ptr_vendor) {
+ result.AppendError("current process has no tagged pointer support");
+ result.SetStatus(lldb::eReturnStatusFailed);
+ return false;
+ }
+
+ for (size_t i = 0; i < command.GetArgumentCount(); i++) {
+ const char *arg_str = command.GetArgumentAtIndex(i);
+ if (!arg_str)
+ continue;
+
+ Status error;
+ lldb::addr_t arg_addr = OptionArgParser::ToAddress(
+ &exe_ctx, arg_str, LLDB_INVALID_ADDRESS, &error);
+ if (arg_addr == 0 || arg_addr == LLDB_INVALID_ADDRESS || error.Fail()) {
+ result.AppendErrorWithFormatv(
+ "could not convert '{0}' to a valid address\n", arg_str);
result.SetStatus(lldb::eReturnStatusFailed);
return false;
}
- result.SetStatus(lldb::eReturnStatusSuccessFinishResult);
- return true;
+
+ if (!tagged_ptr_vendor->IsPossibleTaggedPointer(arg_addr)) {
+ result.GetOutputStream().Format("{0:x16} is not tagged\n", arg_addr);
+ continue;
+ }
+
+ auto descriptor_sp = tagged_ptr_vendor->GetClassDescriptor(arg_addr);
+ if (!descriptor_sp) {
+ result.AppendErrorWithFormatv(
+ "could not get class descriptor for {0:x16}\n", arg_addr);
+ result.SetStatus(lldb::eReturnStatusFailed);
+ return false;
+ }
+
+ uint64_t info_bits = 0;
+ uint64_t value_bits = 0;
+ uint64_t payload = 0;
+ if (descriptor_sp->GetTaggedPointerInfo(&info_bits, &value_bits,
+ &payload)) {
+ result.GetOutputStream().Format(
+ "{0:x} is tagged\n"
+ "\tpayload = {1:x16}\n"
+ "\tvalue = {2:x16}\n"
+ "\tinfo bits = {3:x16}\n"
+ "\tclass = {4}\n",
+ arg_addr, payload, value_bits, info_bits,
+ descriptor_sp->GetClassName().AsCString("<unknown>"));
+ } else {
+ result.GetOutputStream().Format("{0:x16} is not tagged\n", arg_addr);
+ }
}
- result.AppendError("current process has no Objective-C runtime loaded");
- result.SetStatus(lldb::eReturnStatusFailed);
- return false;
+
+ result.SetStatus(lldb::eReturnStatusSuccessFinishResult);
+ return true;
}
};
@@ -1060,18 +1081,6 @@ void AppleObjCRuntimeV2::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString AppleObjCRuntimeV2::GetPluginNameStatic() {
- static ConstString g_name("apple-objc-v2");
- return g_name;
-}
-
-// PluginInterface protocol
-lldb_private::ConstString AppleObjCRuntimeV2::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t AppleObjCRuntimeV2::GetPluginVersion() { return 1; }
-
BreakpointResolverSP
AppleObjCRuntimeV2::CreateExceptionResolver(const BreakpointSP &bkpt,
bool catch_bp, bool throw_bp) {
@@ -2001,6 +2010,11 @@ AppleObjCRuntimeV2::SharedCacheClassInfoExtractor::UpdateISAToDescriptorMap() {
const uint32_t num_classes = 128 * 1024;
UtilityFunction *get_class_info_code = GetClassInfoUtilityFunction(exe_ctx);
+ if (!get_class_info_code) {
+ // The callee will have already logged a useful error message.
+ return DescriptorMapUpdateResult::Fail();
+ }
+
FunctionCaller *get_shared_cache_class_info_function =
get_class_info_code->GetFunctionCaller();
@@ -2188,8 +2202,12 @@ lldb::addr_t AppleObjCRuntimeV2::GetSharedCacheBaseAddress() {
if (!info_dict)
return LLDB_INVALID_ADDRESS;
- return info_dict->GetValueForKey("shared_cache_base_address")
- ->GetIntegerValue(LLDB_INVALID_ADDRESS);
+ StructuredData::ObjectSP value =
+ info_dict->GetValueForKey("shared_cache_base_address");
+ if (!value)
+ return LLDB_INVALID_ADDRESS;
+
+ return value->GetIntegerValue(LLDB_INVALID_ADDRESS);
}
void AppleObjCRuntimeV2::UpdateISAToDescriptorMapIfNeeded() {
@@ -2299,13 +2317,9 @@ static bool DoesProcessHaveSharedCache(Process &process) {
if (!platform_sp)
return true; // this should not happen
- ConstString platform_plugin_name = platform_sp->GetPluginName();
- if (platform_plugin_name) {
- llvm::StringRef platform_plugin_name_sr =
- platform_plugin_name.GetStringRef();
- if (platform_plugin_name_sr.endswith("-simulator"))
- return false;
- }
+ llvm::StringRef platform_plugin_name_sr = platform_sp->GetPluginName();
+ if (platform_plugin_name_sr.endswith("-simulator"))
+ return false;
return true;
}
@@ -2967,11 +2981,13 @@ bool AppleObjCRuntimeV2::GetCFBooleanValuesIfNeeded() {
if (m_CFBoolean_values)
return true;
- static ConstString g_kCFBooleanFalse("__kCFBooleanFalse");
- static ConstString g_kCFBooleanTrue("__kCFBooleanTrue");
+ static ConstString g___kCFBooleanFalse("__kCFBooleanFalse");
+ static ConstString g___kCFBooleanTrue("__kCFBooleanTrue");
+ static ConstString g_kCFBooleanFalse("kCFBooleanFalse");
+ static ConstString g_kCFBooleanTrue("kCFBooleanTrue");
- std::function<lldb::addr_t(ConstString)> get_symbol =
- [this](ConstString sym) -> lldb::addr_t {
+ std::function<lldb::addr_t(ConstString, ConstString)> get_symbol =
+ [this](ConstString sym, ConstString real_sym) -> lldb::addr_t {
SymbolContextList sc_list;
GetProcess()->GetTarget().GetImages().FindSymbolsWithNameAndType(
sym, lldb::eSymbolTypeData, sc_list);
@@ -2981,12 +2997,26 @@ bool AppleObjCRuntimeV2::GetCFBooleanValuesIfNeeded() {
if (sc.symbol)
return sc.symbol->GetLoadAddress(&GetProcess()->GetTarget());
}
+ GetProcess()->GetTarget().GetImages().FindSymbolsWithNameAndType(
+ real_sym, lldb::eSymbolTypeData, sc_list);
+ if (sc_list.GetSize() != 1)
+ return LLDB_INVALID_ADDRESS;
- return LLDB_INVALID_ADDRESS;
+ SymbolContext sc;
+ sc_list.GetContextAtIndex(0, sc);
+ if (!sc.symbol)
+ return LLDB_INVALID_ADDRESS;
+
+ lldb::addr_t addr = sc.symbol->GetLoadAddress(&GetProcess()->GetTarget());
+ Status error;
+ addr = GetProcess()->ReadPointerFromMemory(addr, error);
+ if (error.Fail())
+ return LLDB_INVALID_ADDRESS;
+ return addr;
};
- lldb::addr_t false_addr = get_symbol(g_kCFBooleanFalse);
- lldb::addr_t true_addr = get_symbol(g_kCFBooleanTrue);
+ lldb::addr_t false_addr = get_symbol(g___kCFBooleanFalse, g_kCFBooleanFalse);
+ lldb::addr_t true_addr = get_symbol(g___kCFBooleanTrue, g_kCFBooleanTrue);
return (m_CFBoolean_values = {false_addr, true_addr}).operator bool();
}
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h
index d0caa2969115..6266634e64c5 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h
@@ -33,7 +33,7 @@ public:
static lldb_private::LanguageRuntime *
CreateInstance(Process *process, lldb::LanguageType language);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "apple-objc-v2"; }
static char ID;
@@ -54,9 +54,7 @@ public:
llvm::Expected<std::unique_ptr<UtilityFunction>>
CreateObjectChecker(std::string name, ExecutionContext &exe_ctx) override;
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
ObjCRuntimeVersions GetRuntimeVersion() const override {
return ObjCRuntimeVersions::eAppleObjC_V2;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp
index 7b0121503bc4..c6cb2be981a7 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp
@@ -78,13 +78,13 @@ AppleObjCTypeEncodingParser::ReadStructElement(TypeSystemClang &ast_ctx,
clang::QualType AppleObjCTypeEncodingParser::BuildStruct(
TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression) {
- return BuildAggregate(ast_ctx, type, for_expression, '{', '}',
+ return BuildAggregate(ast_ctx, type, for_expression, _C_STRUCT_B, _C_STRUCT_E,
clang::TTK_Struct);
}
clang::QualType AppleObjCTypeEncodingParser::BuildUnion(
TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression) {
- return BuildAggregate(ast_ctx, type, for_expression, '(', ')',
+ return BuildAggregate(ast_ctx, type, for_expression, _C_UNION_B, _C_UNION_E,
clang::TTK_Union);
}
@@ -148,11 +148,11 @@ clang::QualType AppleObjCTypeEncodingParser::BuildAggregate(
clang::QualType AppleObjCTypeEncodingParser::BuildArray(
TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression) {
- if (!type.NextIf('['))
+ if (!type.NextIf(_C_ARY_B))
return clang::QualType();
uint32_t size = ReadNumber(type);
clang::QualType element_type(BuildType(ast_ctx, type, for_expression));
- if (!type.NextIf(']'))
+ if (!type.NextIf(_C_ARY_E))
return clang::QualType();
CompilerType array_type(ast_ctx.CreateArrayType(
CompilerType(&ast_ctx, element_type.getAsOpaquePtr()), size, false));
@@ -166,7 +166,7 @@ clang::QualType AppleObjCTypeEncodingParser::BuildArray(
// dynamic typing will resolve things for us anyway
clang::QualType AppleObjCTypeEncodingParser::BuildObjCObjectPointerType(
TypeSystemClang &clang_ast_ctx, StringLexer &type, bool for_expression) {
- if (!type.NextIf('@'))
+ if (!type.NextIf(_C_ID))
return clang::QualType();
clang::ASTContext &ast_ctx = clang_ast_ctx.getASTContext();
@@ -203,9 +203,9 @@ clang::QualType AppleObjCTypeEncodingParser::BuildObjCObjectPointerType(
2); // undo our consumption of the string and of the quotes
name.clear();
break;
- case '}':
- case ')':
- case ']':
+ case _C_STRUCT_E:
+ case _C_UNION_E:
+ case _C_ARY_E:
case '"':
// the quoted string is a class name – see the rule
break;
@@ -260,13 +260,13 @@ AppleObjCTypeEncodingParser::BuildType(TypeSystemClang &clang_ast_ctx,
switch (type.Peek()) {
default:
break;
- case '{':
+ case _C_STRUCT_B:
return BuildStruct(clang_ast_ctx, type, for_expression);
- case '[':
+ case _C_ARY_B:
return BuildArray(clang_ast_ctx, type, for_expression);
- case '(':
+ case _C_UNION_B:
return BuildUnion(clang_ast_ctx, type, for_expression);
- case '@':
+ case _C_ID:
return BuildObjCObjectPointerType(clang_ast_ctx, type, for_expression);
}
@@ -274,46 +274,46 @@ AppleObjCTypeEncodingParser::BuildType(TypeSystemClang &clang_ast_ctx,
default:
type.PutBack(1);
return clang::QualType();
- case 'c':
+ case _C_CHR:
return ast_ctx.CharTy;
- case 'i':
+ case _C_INT:
return ast_ctx.IntTy;
- case 's':
+ case _C_SHT:
return ast_ctx.ShortTy;
- case 'l':
+ case _C_LNG:
return ast_ctx.getIntTypeForBitwidth(32, true);
// this used to be done like this:
// return clang_ast_ctx->GetIntTypeFromBitSize(32, true).GetQualType();
// which uses one of the constants if one is available, but we don't think
// all this work is necessary.
- case 'q':
+ case _C_LNG_LNG:
return ast_ctx.LongLongTy;
- case 'C':
+ case _C_UCHR:
return ast_ctx.UnsignedCharTy;
- case 'I':
+ case _C_UINT:
return ast_ctx.UnsignedIntTy;
- case 'S':
+ case _C_USHT:
return ast_ctx.UnsignedShortTy;
- case 'L':
+ case _C_ULNG:
return ast_ctx.getIntTypeForBitwidth(32, false);
- // see note for 'l'
- case 'Q':
+ // see note for _C_LNG
+ case _C_ULNG_LNG:
return ast_ctx.UnsignedLongLongTy;
- case 'f':
+ case _C_FLT:
return ast_ctx.FloatTy;
- case 'd':
+ case _C_DBL:
return ast_ctx.DoubleTy;
- case 'B':
+ case _C_BOOL:
return ast_ctx.BoolTy;
- case 'v':
+ case _C_VOID:
return ast_ctx.VoidTy;
- case '*':
+ case _C_CHARPTR:
return ast_ctx.getPointerType(ast_ctx.CharTy);
- case '#':
+ case _C_CLASS:
return ast_ctx.getObjCClassType();
- case ':':
+ case _C_SEL:
return ast_ctx.getObjCSelType();
- case 'b': {
+ case _C_BFLD: {
uint32_t size = ReadNumber(type);
if (bitfield_bit_size) {
*bitfield_bit_size = size;
@@ -321,7 +321,7 @@ AppleObjCTypeEncodingParser::BuildType(TypeSystemClang &clang_ast_ctx,
} else
return clang::QualType();
}
- case 'r': {
+ case _C_CONST: {
clang::QualType target_type =
BuildType(clang_ast_ctx, type, for_expression);
if (target_type.isNull())
@@ -331,8 +331,8 @@ AppleObjCTypeEncodingParser::BuildType(TypeSystemClang &clang_ast_ctx,
else
return ast_ctx.getConstType(target_type);
}
- case '^': {
- if (!for_expression && type.NextIf('?')) {
+ case _C_PTR: {
+ if (!for_expression && type.NextIf(_C_UNDEF)) {
// if we are not supporting the concept of unknownAny, but what is being
// created here is an unknownAny*, then we can just get away with a void*
// this is theoretically wrong (in the same sense as 'theoretically
@@ -350,7 +350,7 @@ AppleObjCTypeEncodingParser::BuildType(TypeSystemClang &clang_ast_ctx,
return ast_ctx.getPointerType(target_type);
}
}
- case '?':
+ case _C_UNDEF:
return for_expression ? ast_ctx.UnknownAnyTy : clang::QualType();
}
}
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h
index 6e533b591eca..57ed9c21faba 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h
@@ -9,11 +9,11 @@
#ifndef LLDB_SOURCE_PLUGINS_LANGUAGERUNTIME_OBJC_APPLEOBJCRUNTIME_APPLEOBJCTYPEENCODINGPARSER_H
#define LLDB_SOURCE_PLUGINS_LANGUAGERUNTIME_OBJC_APPLEOBJCRUNTIME_APPLEOBJCTYPEENCODINGPARSER_H
-#include "clang/AST/ASTContext.h"
-
+#include "Plugins/Language/ObjC/ObjCConstants.h"
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
#include "lldb/lldb-private.h"
-#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+#include "clang/AST/ASTContext.h"
namespace lldb_private {
class StringLexer;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
index 1dc8034c537a..f935ae7db8c0 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
@@ -12,6 +12,7 @@
#include "lldb/Expression/DiagnosticManager.h"
#include "lldb/Expression/FunctionCaller.h"
#include "lldb/Expression/UtilityFunction.h"
+#include "lldb/Target/ABI.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Thread.h"
@@ -134,6 +135,10 @@ bool AppleThreadPlanStepThroughObjCTrampoline::ShouldStop(Event *event_ptr) {
target_addr_value);
m_impl_function->DeallocateFunctionResults(exc_ctx, m_args_addr);
lldb::addr_t target_addr = target_addr_value.GetScalar().ULongLong();
+
+ if (ABISP abi_sp = GetThread().GetProcess()->GetABI()) {
+ target_addr = abi_sp->FixCodeAddress(target_addr);
+ }
Address target_so_addr;
target_so_addr.SetOpcodeLoadAddress(target_addr, exc_ctx.GetTargetPtr());
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp
index 65bf3e6af626..0cc96e43e195 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp
@@ -20,6 +20,7 @@
#include "lldb/Symbol/TypeList.h"
#include "lldb/Symbol/Variable.h"
#include "lldb/Target/Target.h"
+#include "lldb/Target/ABI.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/Timer.h"
@@ -273,10 +274,17 @@ ObjCLanguageRuntime::ClassDescriptorSP
ObjCLanguageRuntime::GetClassDescriptorFromISA(ObjCISA isa) {
if (isa) {
UpdateISAToDescriptorMap();
+
ObjCLanguageRuntime::ISAToDescriptorIterator pos =
m_isa_to_descriptor.find(isa);
if (pos != m_isa_to_descriptor.end())
return pos->second;
+
+ if (ABISP abi_sp = m_process->GetABI()) {
+ pos = m_isa_to_descriptor.find(abi_sp->FixCodeAddress(isa));
+ if (pos != m_isa_to_descriptor.end())
+ return pos->second;
+ }
}
return ClassDescriptorSP();
}
diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp
index 08a752eaa888..c990c733d24c 100644
--- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp
@@ -14,7 +14,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -40,9 +40,8 @@ using namespace lldb_renderscript;
// perform a fixup pass that removes those assumptions right before the module
// is sent to be generated by the llvm backend.
-namespace {
-bool registerRSDefaultTargetOpts(clang::TargetOptions &proto,
- const llvm::Triple::ArchType &arch) {
+static bool registerRSDefaultTargetOpts(clang::TargetOptions &proto,
+ const llvm::Triple::ArchType &arch) {
switch (arch) {
case llvm::Triple::ArchType::x86:
proto.Triple = "i686--linux-android";
@@ -75,7 +74,6 @@ bool registerRSDefaultTargetOpts(clang::TargetOptions &proto,
}
return true;
}
-} // end anonymous namespace
bool RenderScriptRuntimeModulePass::runOnModule(llvm::Module &module) {
bool changed_module = false;
diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.h b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.h
index 52da677128e2..e0b4f388dcc5 100644
--- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.h
+++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.h
@@ -10,7 +10,7 @@
#define LLDB_SOURCE_PLUGINS_LANGUAGERUNTIME_RENDERSCRIPT_RENDERSCRIPTRUNTIME_RENDERSCRIPTEXPRESSIONOPTS_H
#include "llvm/IR/Module.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
index 10ff5aa72b52..d6de65809c18 100644
--- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp
@@ -17,7 +17,6 @@
#include "lldb/DataFormatters/DumpValueObjectOptions.h"
#include "lldb/Expression/UserExpression.h"
#include "lldb/Host/OptionParser.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Interpreter/CommandInterpreter.h"
#include "lldb/Interpreter/CommandObjectMultiword.h"
#include "lldb/Interpreter/CommandReturnObject.h"
@@ -967,11 +966,6 @@ void RenderScriptRuntime::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString RenderScriptRuntime::GetPluginNameStatic() {
- static ConstString plugin_name("renderscript");
- return plugin_name;
-}
-
RenderScriptRuntime::ModuleKind
RenderScriptRuntime::GetModuleKind(const lldb::ModuleSP &module_sp) {
if (module_sp) {
@@ -1014,13 +1008,6 @@ void RenderScriptRuntime::ModulesDidLoad(const ModuleList &module_list) {
}
}
-// PluginInterface protocol
-lldb_private::ConstString RenderScriptRuntime::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t RenderScriptRuntime::GetPluginVersion() { return 1; }
-
bool RenderScriptRuntime::GetDynamicTypeAndAddress(
ValueObject &in_value, lldb::DynamicValueType use_dynamic,
TypeAndOrName &class_type_or_name, Address &address,
@@ -2660,7 +2647,7 @@ bool RenderScriptRuntime::SaveAllocation(Stream &strm, const uint32_t alloc_id,
FileSpec file_spec(path);
FileSystem::Instance().Resolve(file_spec);
auto file = FileSystem::Instance().Open(
- file_spec, File::eOpenOptionWrite | File::eOpenOptionCanCreate |
+ file_spec, File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate |
File::eOpenOptionTruncate);
if (!file) {
@@ -4568,10 +4555,8 @@ public:
eLanguageTypeExtRenderScript));
const char *id_cstr = command.GetArgumentAtIndex(0);
- bool success = false;
- const uint32_t id =
- StringConvert::ToUInt32(id_cstr, UINT32_MAX, 0, &success);
- if (!success) {
+ uint32_t id;
+ if (!llvm::to_integer(id_cstr, id)) {
result.AppendErrorWithFormat("invalid allocation id argument '%s'",
id_cstr);
return false;
@@ -4585,8 +4570,9 @@ public:
if (outfile_spec) {
// Open output file
std::string path = outfile_spec.GetPath();
- auto file = FileSystem::Instance().Open(
- outfile_spec, File::eOpenOptionWrite | File::eOpenOptionCanCreate);
+ auto file = FileSystem::Instance().Open(outfile_spec,
+ File::eOpenOptionWriteOnly |
+ File::eOpenOptionCanCreate);
if (file) {
output_stream_storage =
std::make_unique<StreamFile>(std::move(file.get()));
@@ -4714,10 +4700,8 @@ public:
eLanguageTypeExtRenderScript));
const char *id_cstr = command.GetArgumentAtIndex(0);
- bool success = false;
- const uint32_t id =
- StringConvert::ToUInt32(id_cstr, UINT32_MAX, 0, &success);
- if (!success) {
+ uint32_t id;
+ if (!llvm::to_integer(id_cstr, id)) {
result.AppendErrorWithFormat("invalid allocation id argument '%s'",
id_cstr);
return false;
@@ -4763,10 +4747,8 @@ public:
eLanguageTypeExtRenderScript));
const char *id_cstr = command.GetArgumentAtIndex(0);
- bool success = false;
- const uint32_t id =
- StringConvert::ToUInt32(id_cstr, UINT32_MAX, 0, &success);
- if (!success) {
+ uint32_t id;
+ if (!llvm::to_integer(id_cstr, id)) {
result.AppendErrorWithFormat("invalid allocation id argument '%s'",
id_cstr);
return false;
diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
index 2785c3b08125..4ddf996dedb2 100644
--- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
+++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h
@@ -318,7 +318,7 @@ public:
static lldb::CommandObjectSP
GetCommandObject(CommandInterpreter &interpreter);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "renderscript"; }
static char ID;
@@ -410,9 +410,7 @@ public:
bool GetOverrideExprOptions(clang::TargetOptions &prototype);
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
static bool GetKernelCoordinate(lldb_renderscript::RSCoordinate &coord,
Thread *thread_ptr);
diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp
index f51190e0c82c..f3b7c9dd3edc 100644
--- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp
@@ -22,9 +22,8 @@
#include "lldb/Utility/Log.h"
using namespace lldb_private;
-namespace {
-bool isRSAPICall(llvm::Module &module, llvm::CallInst *call_inst) {
+static bool isRSAPICall(llvm::Module &module, llvm::CallInst *call_inst) {
// TODO get the list of renderscript modules from lldb and check if
// this llvm::Module calls into any of them.
(void)module;
@@ -38,7 +37,8 @@ bool isRSAPICall(llvm::Module &module, llvm::CallInst *call_inst) {
return true;
}
-bool isRSLargeReturnCall(llvm::Module &module, llvm::CallInst *call_inst) {
+static bool isRSLargeReturnCall(llvm::Module &module,
+ llvm::CallInst *call_inst) {
// i686 and x86_64 returns for large vectors in the RenderScript API are not
// handled as normal register pairs, but as a hidden sret type. This is not
// reflected in the debug info or mangled symbol name, and the android ABI
@@ -58,7 +58,7 @@ bool isRSLargeReturnCall(llvm::Module &module, llvm::CallInst *call_inst) {
->getPrimitiveSizeInBits() > 128;
}
-bool isRSAllocationPtrTy(const llvm::Type *type) {
+static bool isRSAllocationPtrTy(const llvm::Type *type) {
if (!type->isPointerTy())
return false;
auto ptr_type = type->getPointerElementType();
@@ -67,7 +67,8 @@ bool isRSAllocationPtrTy(const llvm::Type *type) {
ptr_type->getStructName().startswith("struct.rs_allocation");
}
-bool isRSAllocationTyCallSite(llvm::Module &module, llvm::CallInst *call_inst) {
+static bool isRSAllocationTyCallSite(llvm::Module &module,
+ llvm::CallInst *call_inst) {
(void)module;
if (!call_inst->hasByValArgument())
return false;
@@ -77,7 +78,7 @@ bool isRSAllocationTyCallSite(llvm::Module &module, llvm::CallInst *call_inst) {
return false;
}
-llvm::FunctionType *cloneToStructRetFnTy(llvm::CallInst *call_inst) {
+static llvm::FunctionType *cloneToStructRetFnTy(llvm::CallInst *call_inst) {
// on x86 StructReturn functions return a pointer to the return value, rather
// than the return value itself
// [ref](http://www.agner.org/optimize/calling_conventions.pdf section 6). We
@@ -122,9 +123,9 @@ llvm::FunctionType *cloneToStructRetFnTy(llvm::CallInst *call_inst) {
orig->isVarArg());
}
-bool findRSCallSites(llvm::Module &module,
- std::set<llvm::CallInst *> &rs_callsites,
- bool (*predicate)(llvm::Module &, llvm::CallInst *)) {
+static bool
+findRSCallSites(llvm::Module &module, std::set<llvm::CallInst *> &rs_callsites,
+ bool (*predicate)(llvm::Module &, llvm::CallInst *)) {
bool found = false;
for (auto &func : module.getFunctionList())
@@ -143,7 +144,7 @@ bool findRSCallSites(llvm::Module &module,
return found;
}
-bool fixupX86StructRetCalls(llvm::Module &module) {
+static bool fixupX86StructRetCalls(llvm::Module &module) {
bool changed = false;
// changing a basic block while iterating over it seems to have some
// undefined behaviour going on so we find all RS callsites first, then fix
@@ -207,7 +208,7 @@ bool fixupX86StructRetCalls(llvm::Module &module) {
return changed;
}
-bool fixupRSAllocationStructByValCalls(llvm::Module &module) {
+static bool fixupRSAllocationStructByValCalls(llvm::Module &module) {
// On x86_64, calls to functions in the RS runtime that take an
// `rs_allocation` type argument are actually handled as by-ref params by
// bcc, but appear to be passed by value by lldb (the callsite all use
@@ -237,12 +238,11 @@ bool fixupRSAllocationStructByValCalls(llvm::Module &module) {
llvm::AttributeList call_attribs = call_inst->getAttributes();
// iterate over the argument attributes
- for (unsigned I = call_attribs.index_begin(); I != call_attribs.index_end();
- I++) {
+ for (unsigned I : call_attribs.indexes()) {
// if this argument is passed by val
- if (call_attribs.hasAttribute(I, llvm::Attribute::ByVal)) {
+ if (call_attribs.hasAttributeAtIndex(I, llvm::Attribute::ByVal)) {
// strip away the byval attribute
- call_inst->removeAttribute(I, llvm::Attribute::ByVal);
+ call_inst->removeAttributeAtIndex(I, llvm::Attribute::ByVal);
changed = true;
}
}
@@ -260,7 +260,6 @@ bool fixupRSAllocationStructByValCalls(llvm::Module &module) {
}
return changed;
}
-} // end anonymous namespace
namespace lldb_private {
namespace lldb_renderscript {
diff --git a/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp b/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp
index 7d9976285192..14d9c4024689 100644
--- a/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp
+++ b/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp
@@ -56,11 +56,6 @@ void MemoryHistoryASan::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString MemoryHistoryASan::GetPluginNameStatic() {
- static ConstString g_name("asan");
- return g_name;
-}
-
MemoryHistoryASan::MemoryHistoryASan(const ProcessSP &process_sp) {
if (process_sp)
m_process_wp = process_sp;
diff --git a/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.h b/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.h
index e9fe37d344a4..6563c3cd94a7 100644
--- a/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.h
+++ b/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.h
@@ -27,13 +27,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "asan"; }
- lldb_private::ConstString GetPluginName() override {
- return GetPluginNameStatic();
- }
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
lldb_private::HistoryThreads GetHistoryThreads(lldb::addr_t address) override;
diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
index 7ff917518b64..a2522372f5af 100644
--- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
+++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
@@ -274,15 +274,6 @@ void ObjectContainerBSDArchive::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ObjectContainerBSDArchive::GetPluginNameStatic() {
- static ConstString g_name("bsd-archive");
- return g_name;
-}
-
-const char *ObjectContainerBSDArchive::GetPluginDescriptionStatic() {
- return "BSD Archive object container reader.";
-}
-
ObjectContainer *ObjectContainerBSDArchive::CreateInstance(
const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
lldb::offset_t data_offset, const FileSpec *file,
@@ -433,13 +424,6 @@ ObjectFileSP ObjectContainerBSDArchive::GetObjectFile(const FileSpec *file) {
return ObjectFileSP();
}
-// PluginInterface protocol
-lldb_private::ConstString ObjectContainerBSDArchive::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ObjectContainerBSDArchive::GetPluginVersion() { return 1; }
-
size_t ObjectContainerBSDArchive::GetModuleSpecifications(
const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
lldb::offset_t data_offset, lldb::offset_t file_offset,
diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
index 9830e9b5d1b2..21106d7b8590 100644
--- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
+++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
@@ -36,9 +36,11 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "bsd-archive"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic() {
+ return "BSD Archive object container reader.";
+ }
static lldb_private::ObjectContainer *
CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
@@ -68,9 +70,7 @@ public:
lldb::ObjectFileSP GetObjectFile(const lldb_private::FileSpec *file) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
struct Object {
diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp b/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
index bd8eeedce57d..24941be515de 100644
--- a/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
@@ -23,6 +23,8 @@ enum class Token {
CodeID,
File,
Func,
+ Inline,
+ InlineOrigin,
Public,
Stack,
CFI,
@@ -41,6 +43,8 @@ template <> Token stringTo<Token>(llvm::StringRef Str) {
.Case("CODE_ID", Token::CodeID)
.Case("FILE", Token::File)
.Case("FUNC", Token::Func)
+ .Case("INLINE", Token::Inline)
+ .Case("INLINE_ORIGIN", Token::InlineOrigin)
.Case("PUBLIC", Token::Public)
.Case("STACK", Token::Stack)
.Case("CFI", Token::CFI)
@@ -145,7 +149,10 @@ llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
default:
return llvm::None;
}
-
+ case Token::Inline:
+ return Record::Inline;
+ case Token::InlineOrigin:
+ return Record::InlineOrigin;
case Token::Unknown:
// Optimistically assume that any unrecognised token means this is a line
// record, those don't have a special keyword and start directly with a
@@ -216,9 +223,11 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
return OS << "INFO CODE_ID " << R.ID.GetAsString();
}
-llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
- // FILE number name
- if (consume<Token>(Line) != Token::File)
+template <typename T>
+static llvm::Optional<T> parseNumberName(llvm::StringRef Line,
+ Token TokenType) {
+ // TOKEN number name
+ if (consume<Token>(Line) != TokenType)
return llvm::None;
llvm::StringRef Str;
@@ -231,7 +240,12 @@ llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
if (Name.empty())
return llvm::None;
- return FileRecord(Number, Name);
+ return T(Number, Name);
+}
+
+llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
+ // FILE number name
+ return parseNumberName<FileRecord>(Line, Token::File);
}
llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
@@ -239,6 +253,17 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
return OS << "FILE " << R.Number << " " << R.Name;
}
+llvm::Optional<InlineOriginRecord>
+InlineOriginRecord::parse(llvm::StringRef Line) {
+ // INLINE_ORIGIN number name
+ return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
+}
+
+llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
+ const InlineOriginRecord &R) {
+ return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
+}
+
static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
lldb::addr_t &Address, lldb::addr_t *Size,
lldb::addr_t &ParamSize, llvm::StringRef &Name) {
@@ -299,6 +324,58 @@ llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
R.ParamSize, R.Name);
}
+llvm::Optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
+ // INLINE inline_nest_level call_site_line call_site_file_num origin_num
+ // [address size]+
+ if (consume<Token>(Line) != Token::Inline)
+ return llvm::None;
+
+ llvm::SmallVector<llvm::StringRef> Tokens;
+ SplitString(Line, Tokens, " ");
+ if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
+ return llvm::None;
+
+ size_t InlineNestLevel;
+ uint32_t CallSiteLineNum;
+ size_t CallSiteFileNum;
+ size_t OriginNum;
+ if (!(to_integer(Tokens[0], InlineNestLevel) &&
+ to_integer(Tokens[1], CallSiteLineNum) &&
+ to_integer(Tokens[2], CallSiteFileNum) &&
+ to_integer(Tokens[3], OriginNum)))
+ return llvm::None;
+
+ InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum,
+ CallSiteFileNum, OriginNum);
+ for (size_t i = 4; i < Tokens.size(); i += 2) {
+ lldb::addr_t Address;
+ if (!to_integer(Tokens[i], Address, 16))
+ return llvm::None;
+ lldb::addr_t Size;
+ if (!to_integer(Tokens[i + 1].trim(), Size, 16))
+ return llvm::None;
+ Record.Ranges.emplace_back(Address, Size);
+ }
+ return Record;
+}
+
+bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) {
+ return L.InlineNestLevel == R.InlineNestLevel &&
+ L.CallSiteLineNum == R.CallSiteLineNum &&
+ L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum &&
+ L.Ranges == R.Ranges;
+}
+
+llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
+ const InlineRecord &R) {
+ OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
+ R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum);
+ for (const auto &range : R.Ranges) {
+ OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
+ }
+ return OS;
+}
+
llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
lldb::addr_t Address;
llvm::StringRef Str;
@@ -490,6 +567,10 @@ llvm::StringRef breakpad::toString(Record::Kind K) {
return "FILE";
case Record::Func:
return "FUNC";
+ case Record::Inline:
+ return "INLINE";
+ case Record::InlineOrigin:
+ return "INLINE_ORIGIN";
case Record::Line:
return "LINE";
case Record::Public:
diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h b/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
index 1620a1210b84..8a11323d521c 100644
--- a/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h
@@ -20,7 +20,18 @@ namespace breakpad {
class Record {
public:
- enum Kind { Module, Info, File, Func, Line, Public, StackCFI, StackWin };
+ enum Kind {
+ Module,
+ Info,
+ File,
+ Func,
+ Inline,
+ InlineOrigin,
+ Line,
+ Public,
+ StackCFI,
+ StackWin
+ };
/// Attempt to guess the kind of the record present in the argument without
/// doing a full parse. The returned kind will always be correct for valid
@@ -89,6 +100,23 @@ inline bool operator==(const FileRecord &L, const FileRecord &R) {
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRecord &R);
+class InlineOriginRecord : public Record {
+public:
+ static llvm::Optional<InlineOriginRecord> parse(llvm::StringRef Line);
+ InlineOriginRecord(size_t Number, llvm::StringRef Name)
+ : Record(InlineOrigin), Number(Number), Name(Name) {}
+
+ size_t Number;
+ llvm::StringRef Name;
+};
+
+inline bool operator==(const InlineOriginRecord &L,
+ const InlineOriginRecord &R) {
+ return L.Number == R.Number && L.Name == R.Name;
+}
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const InlineOriginRecord &R);
+
class FuncRecord : public Record {
public:
static llvm::Optional<FuncRecord> parse(llvm::StringRef Line);
@@ -107,6 +135,26 @@ public:
bool operator==(const FuncRecord &L, const FuncRecord &R);
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FuncRecord &R);
+class InlineRecord : public Record {
+public:
+ static llvm::Optional<InlineRecord> parse(llvm::StringRef Line);
+ InlineRecord(size_t InlineNestLevel, uint32_t CallSiteLineNum,
+ size_t CallSiteFileNum, size_t OriginNum)
+ : Record(Inline), InlineNestLevel(InlineNestLevel),
+ CallSiteLineNum(CallSiteLineNum), CallSiteFileNum(CallSiteFileNum),
+ OriginNum(OriginNum) {}
+
+ size_t InlineNestLevel;
+ uint32_t CallSiteLineNum;
+ size_t CallSiteFileNum;
+ size_t OriginNum;
+ // A vector of address range covered by this inline
+ std::vector<std::pair<lldb::addr_t, lldb::addr_t>> Ranges;
+};
+
+bool operator==(const InlineRecord &L, const InlineRecord &R);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InlineRecord &R);
+
class LineRecord : public Record {
public:
static llvm::Optional<LineRecord> parse(llvm::StringRef Line);
diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
index 7a9163ddb880..bad730512ff4 100644
--- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
@@ -56,11 +56,6 @@ void ObjectFileBreakpad::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString ObjectFileBreakpad::GetPluginNameStatic() {
- static ConstString g_name("breakpad");
- return g_name;
-}
-
ObjectFile *ObjectFileBreakpad::CreateInstance(
const ModuleSP &module_sp, DataBufferSP &data_sp, offset_t data_offset,
const FileSpec *file, offset_t file_offset, offset_t length) {
@@ -153,9 +148,9 @@ void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
std::tie(line, text) = text.split('\n');
llvm::Optional<Record::Kind> next_section = Record::classify(line);
- if (next_section == Record::Line) {
- // Line records logically belong to the preceding Func record, so we put
- // them in the same section.
+ if (next_section == Record::Line || next_section == Record::Inline) {
+ // Line/Inline records logically belong to the preceding Func record, so
+ // we put them in the same section.
next_section = Record::Func;
}
if (next_section == current_section)
diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h
index 8724feaa422d..c320c7ad3e2e 100644
--- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h
@@ -21,7 +21,7 @@ public:
static void Initialize();
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "breakpad"; }
static const char *GetPluginDescriptionStatic() {
return "Breakpad object file reader.";
}
@@ -44,9 +44,7 @@ public:
ModuleSpecList &specs);
// PluginInterface protocol
- ConstString GetPluginName() override { return GetPluginNameStatic(); }
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// LLVM RTTI support
static char ID;
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index a5e86f0c2c1b..8e0f228a988f 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -55,36 +55,36 @@ using namespace llvm::ELF;
LLDB_PLUGIN_DEFINE(ObjectFileELF)
-namespace {
-
// ELF note owner definitions
-const char *const LLDB_NT_OWNER_FREEBSD = "FreeBSD";
-const char *const LLDB_NT_OWNER_GNU = "GNU";
-const char *const LLDB_NT_OWNER_NETBSD = "NetBSD";
-const char *const LLDB_NT_OWNER_NETBSDCORE = "NetBSD-CORE";
-const char *const LLDB_NT_OWNER_OPENBSD = "OpenBSD";
-const char *const LLDB_NT_OWNER_ANDROID = "Android";
-const char *const LLDB_NT_OWNER_CORE = "CORE";
-const char *const LLDB_NT_OWNER_LINUX = "LINUX";
+static const char *const LLDB_NT_OWNER_FREEBSD = "FreeBSD";
+static const char *const LLDB_NT_OWNER_GNU = "GNU";
+static const char *const LLDB_NT_OWNER_NETBSD = "NetBSD";
+static const char *const LLDB_NT_OWNER_NETBSDCORE = "NetBSD-CORE";
+static const char *const LLDB_NT_OWNER_OPENBSD = "OpenBSD";
+static const char *const LLDB_NT_OWNER_ANDROID = "Android";
+static const char *const LLDB_NT_OWNER_CORE = "CORE";
+static const char *const LLDB_NT_OWNER_LINUX = "LINUX";
// ELF note type definitions
-const elf_word LLDB_NT_FREEBSD_ABI_TAG = 0x01;
-const elf_word LLDB_NT_FREEBSD_ABI_SIZE = 4;
+static const elf_word LLDB_NT_FREEBSD_ABI_TAG = 0x01;
+static const elf_word LLDB_NT_FREEBSD_ABI_SIZE = 4;
-const elf_word LLDB_NT_GNU_ABI_TAG = 0x01;
-const elf_word LLDB_NT_GNU_ABI_SIZE = 16;
+static const elf_word LLDB_NT_GNU_ABI_TAG = 0x01;
+static const elf_word LLDB_NT_GNU_ABI_SIZE = 16;
-const elf_word LLDB_NT_GNU_BUILD_ID_TAG = 0x03;
+static const elf_word LLDB_NT_GNU_BUILD_ID_TAG = 0x03;
-const elf_word LLDB_NT_NETBSD_IDENT_TAG = 1;
-const elf_word LLDB_NT_NETBSD_IDENT_DESCSZ = 4;
-const elf_word LLDB_NT_NETBSD_IDENT_NAMESZ = 7;
-const elf_word LLDB_NT_NETBSD_PROCINFO = 1;
+static const elf_word LLDB_NT_NETBSD_IDENT_TAG = 1;
+static const elf_word LLDB_NT_NETBSD_IDENT_DESCSZ = 4;
+static const elf_word LLDB_NT_NETBSD_IDENT_NAMESZ = 7;
+static const elf_word LLDB_NT_NETBSD_PROCINFO = 1;
// GNU ABI note OS constants
-const elf_word LLDB_NT_GNU_ABI_OS_LINUX = 0x00;
-const elf_word LLDB_NT_GNU_ABI_OS_HURD = 0x01;
-const elf_word LLDB_NT_GNU_ABI_OS_SOLARIS = 0x02;
+static const elf_word LLDB_NT_GNU_ABI_OS_LINUX = 0x00;
+static const elf_word LLDB_NT_GNU_ABI_OS_HURD = 0x01;
+static const elf_word LLDB_NT_GNU_ABI_OS_SOLARIS = 0x02;
+
+namespace {
//===----------------------------------------------------------------------===//
/// \class ELFRelocation
@@ -125,6 +125,7 @@ private:
RelocUnion reloc;
};
+} // end anonymous namespace
ELFRelocation::ELFRelocation(unsigned type) {
if (type == DT_REL || type == SHT_REL)
@@ -208,8 +209,6 @@ unsigned ELFRelocation::RelocAddend64(const ELFRelocation &rel) {
return rel.reloc.get<ELFRela *>()->r_addend;
}
-} // end anonymous namespace
-
static user_id_t SegmentID(size_t PHdrIndex) {
return ~user_id_t(PHdrIndex);
}
@@ -335,15 +334,6 @@ void ObjectFileELF::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ObjectFileELF::GetPluginNameStatic() {
- static ConstString g_name("elf");
- return g_name;
-}
-
-const char *ObjectFileELF::GetPluginDescriptionStatic() {
- return "ELF object file reader.";
-}
-
ObjectFile *ObjectFileELF::CreateInstance(const lldb::ModuleSP &module_sp,
DataBufferSP &data_sp,
lldb::offset_t data_offset,
@@ -634,12 +624,6 @@ size_t ObjectFileELF::GetModuleSpecifications(
return specs.GetSize() - initial_count;
}
-// PluginInterface protocol
-lldb_private::ConstString ObjectFileELF::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ObjectFileELF::GetPluginVersion() { return m_plugin_version; }
// ObjectFile protocol
ObjectFileELF::ObjectFileELF(const lldb::ModuleSP &module_sp,
@@ -2708,9 +2692,6 @@ Symtab *ObjectFileELF::GetSymtab() {
if (!module_sp)
return nullptr;
- Progress progress(llvm::formatv("Parsing symbol table for {0}",
- m_file.GetFilename().AsCString("<Unknown>")));
-
// We always want to use the main object file so we (hopefully) only have one
// cached copy of our symtab, dynamic sections, etc.
ObjectFile *module_obj_file = module_sp->GetObjectFile();
@@ -2718,6 +2699,10 @@ Symtab *ObjectFileELF::GetSymtab() {
return module_obj_file->GetSymtab();
if (m_symtab_up == nullptr) {
+ Progress progress(
+ llvm::formatv("Parsing symbol table for {0}",
+ m_file.GetFilename().AsCString("<Unknown>")));
+ ElapsedTime elapsed(module_sp->GetSymtabParseTime());
SectionList *section_list = module_sp->GetSectionList();
if (!section_list)
return nullptr;
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index e678c2f5f011..5738e5cf60d5 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -61,9 +61,11 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "elf"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic() {
+ return "ELF object file reader.";
+ }
static lldb_private::ObjectFile *
CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
@@ -85,9 +87,7 @@ public:
lldb::addr_t length);
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// LLVM RTTI support
static char ID;
diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
index f93ac9261afd..bec0099517c8 100644
--- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
+++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
@@ -53,15 +53,6 @@ void ObjectFileJIT::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString ObjectFileJIT::GetPluginNameStatic() {
- static ConstString g_name("jit");
- return g_name;
-}
-
-const char *ObjectFileJIT::GetPluginDescriptionStatic() {
- return "JIT code object file";
-}
-
ObjectFile *ObjectFileJIT::CreateInstance(const lldb::ModuleSP &module_sp,
DataBufferSP &data_sp,
lldb::offset_t data_offset,
@@ -120,6 +111,7 @@ Symtab *ObjectFileJIT::GetSymtab() {
if (module_sp) {
std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
if (m_symtab_up == nullptr) {
+ ElapsedTime elapsed(module_sp->GetSymtabParseTime());
m_symtab_up = std::make_unique<Symtab>(this);
std::lock_guard<std::recursive_mutex> symtab_guard(
m_symtab_up->GetMutex());
@@ -199,13 +191,6 @@ ArchSpec ObjectFileJIT::GetArchitecture() {
return ArchSpec();
}
-// PluginInterface protocol
-lldb_private::ConstString ObjectFileJIT::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ObjectFileJIT::GetPluginVersion() { return 1; }
-
bool ObjectFileJIT::SetLoadAddress(Target &target, lldb::addr_t value,
bool value_is_offset) {
size_t num_loaded_sections = 0;
diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h
index a3a1acea916a..03ac001988a0 100644
--- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h
+++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h
@@ -26,9 +26,11 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "jit"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic() {
+ return "JIT code object file";
+ }
static lldb_private::ObjectFile *
CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
@@ -96,9 +98,7 @@ public:
ObjectFile::Strata CalculateStrata() override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
lldb::ObjectFileJITDelegateWP m_delegate_wp;
diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
new file mode 100644
index 000000000000..a70e6a079f76
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
@@ -0,0 +1,772 @@
+//===-- MinidumpFileBuilder.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MinidumpFileBuilder.h"
+
+#include "Plugins/Process/minidump/RegisterContextMinidump_x86_64.h"
+
+#include "lldb/Core/Module.h"
+#include "lldb/Core/ModuleList.h"
+#include "lldb/Core/Section.h"
+#include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/RegisterContext.h"
+#include "lldb/Target/StopInfo.h"
+#include "lldb/Target/ThreadList.h"
+#include "lldb/Utility/DataExtractor.h"
+#include "lldb/Utility/RegisterValue.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Minidump.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Error.h"
+
+#include "Plugins/Process/minidump/MinidumpTypes.h"
+
+#include <cinttypes>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace llvm::minidump;
+
+void MinidumpFileBuilder::AddDirectory(StreamType type, size_t stream_size) {
+ LocationDescriptor loc;
+ loc.DataSize = static_cast<llvm::support::ulittle32_t>(stream_size);
+ // Stream will begin at the current end of data section
+ loc.RVA = static_cast<llvm::support::ulittle32_t>(GetCurrentDataEndOffset());
+
+ Directory dir;
+ dir.Type = static_cast<llvm::support::little_t<StreamType>>(type);
+ dir.Location = loc;
+
+ m_directories.push_back(dir);
+}
+
+Status MinidumpFileBuilder::AddSystemInfo(const llvm::Triple &target_triple) {
+ Status error;
+ AddDirectory(StreamType::SystemInfo, sizeof(llvm::minidump::SystemInfo));
+
+ llvm::minidump::ProcessorArchitecture arch;
+ switch (target_triple.getArch()) {
+ case llvm::Triple::ArchType::x86_64:
+ arch = ProcessorArchitecture::AMD64;
+ break;
+ case llvm::Triple::ArchType::x86:
+ arch = ProcessorArchitecture::X86;
+ break;
+ case llvm::Triple::ArchType::arm:
+ arch = ProcessorArchitecture::ARM;
+ break;
+ case llvm::Triple::ArchType::aarch64:
+ arch = ProcessorArchitecture::ARM64;
+ break;
+ case llvm::Triple::ArchType::mips64:
+ case llvm::Triple::ArchType::mips64el:
+ case llvm::Triple::ArchType::mips:
+ case llvm::Triple::ArchType::mipsel:
+ arch = ProcessorArchitecture::MIPS;
+ break;
+ case llvm::Triple::ArchType::ppc64:
+ case llvm::Triple::ArchType::ppc:
+ case llvm::Triple::ArchType::ppc64le:
+ arch = ProcessorArchitecture::PPC;
+ break;
+ default:
+ error.SetErrorStringWithFormat("Architecture %s not supported.",
+ target_triple.getArchName().str().c_str());
+ return error;
+ };
+
+ llvm::support::little_t<OSPlatform> platform_id;
+ switch (target_triple.getOS()) {
+ case llvm::Triple::OSType::Linux:
+ if (target_triple.getEnvironment() ==
+ llvm::Triple::EnvironmentType::Android)
+ platform_id = OSPlatform::Android;
+ else
+ platform_id = OSPlatform::Linux;
+ break;
+ case llvm::Triple::OSType::Win32:
+ platform_id = OSPlatform::Win32NT;
+ break;
+ case llvm::Triple::OSType::MacOSX:
+ platform_id = OSPlatform::MacOSX;
+ break;
+ case llvm::Triple::OSType::IOS:
+ platform_id = OSPlatform::IOS;
+ break;
+ default:
+ error.SetErrorStringWithFormat("OS %s not supported.",
+ target_triple.getOSName().str().c_str());
+ return error;
+ };
+
+ llvm::minidump::SystemInfo sys_info;
+ sys_info.ProcessorArch =
+ static_cast<llvm::support::little_t<ProcessorArchitecture>>(arch);
+ // Global offset to beginning of a csd_string in a data section
+ sys_info.CSDVersionRVA = static_cast<llvm::support::ulittle32_t>(
+ GetCurrentDataEndOffset() + sizeof(llvm::minidump::SystemInfo));
+ sys_info.PlatformId = platform_id;
+ m_data.AppendData(&sys_info, sizeof(llvm::minidump::SystemInfo));
+
+ std::string csd_string = "";
+
+ error = WriteString(csd_string, &m_data);
+ if (error.Fail()) {
+ error.SetErrorString("Unable to convert the csd string to UTF16.");
+ return error;
+ }
+
+ return error;
+}
+
+Status WriteString(const std::string &to_write,
+ lldb_private::DataBufferHeap *buffer) {
+ Status error;
+ // let the StringRef eat also null termination char
+ llvm::StringRef to_write_ref(to_write.c_str(), to_write.size() + 1);
+ llvm::SmallVector<llvm::UTF16, 128> to_write_utf16;
+
+ bool converted = convertUTF8ToUTF16String(to_write_ref, to_write_utf16);
+ if (!converted) {
+ error.SetErrorStringWithFormat(
+ "Unable to convert the string to UTF16. Failed to convert %s",
+ to_write.c_str());
+ return error;
+ }
+
+ // size of the UTF16 string should be written without the null termination
+ // character that is stored in 2 bytes
+ llvm::support::ulittle32_t to_write_size(to_write_utf16.size_in_bytes() - 2);
+
+ buffer->AppendData(&to_write_size, sizeof(llvm::support::ulittle32_t));
+ buffer->AppendData(to_write_utf16.data(), to_write_utf16.size_in_bytes());
+
+ return error;
+}
+
+llvm::Expected<uint64_t> getModuleFileSize(Target &target,
+ const ModuleSP &mod) {
+ SectionSP sect_sp = mod->GetObjectFile()->GetBaseAddress().GetSection();
+ uint64_t SizeOfImage = 0;
+
+ if (!sect_sp) {
+ return llvm::createStringError(std::errc::operation_not_supported,
+ "Couldn't obtain the section information.");
+ }
+ lldb::addr_t sect_addr = sect_sp->GetLoadBaseAddress(&target);
+ // Use memory size since zero fill sections, like ".bss", will be smaller on
+ // disk.
+ lldb::addr_t sect_size = sect_sp->GetByteSize();
+ // This will usually be zero, but make sure to calculate the BaseOfImage
+ // offset.
+ const lldb::addr_t base_sect_offset =
+ mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target) -
+ sect_addr;
+ SizeOfImage = sect_size - base_sect_offset;
+ lldb::addr_t next_sect_addr = sect_addr + sect_size;
+ Address sect_so_addr;
+ target.ResolveLoadAddress(next_sect_addr, sect_so_addr);
+ lldb::SectionSP next_sect_sp = sect_so_addr.GetSection();
+ while (next_sect_sp &&
+ next_sect_sp->GetLoadBaseAddress(&target) == next_sect_addr) {
+ sect_size = sect_sp->GetByteSize();
+ SizeOfImage += sect_size;
+ next_sect_addr += sect_size;
+ target.ResolveLoadAddress(next_sect_addr, sect_so_addr);
+ next_sect_sp = sect_so_addr.GetSection();
+ }
+
+ return SizeOfImage;
+}
+
+// ModuleList stream consists of a number of modules, followed by an array
+// of llvm::minidump::Module's structures. Every structure informs about a
+// single module. Additional data of variable length, such as module's names,
+// are stored just after the ModuleList stream. The llvm::minidump::Module
+// structures point to this helper data by global offset.
+Status MinidumpFileBuilder::AddModuleList(Target &target) {
+ constexpr size_t minidump_module_size = sizeof(llvm::minidump::Module);
+ Status error;
+
+ const ModuleList &modules = target.GetImages();
+ llvm::support::ulittle32_t modules_count =
+ static_cast<llvm::support::ulittle32_t>(modules.GetSize());
+
+ // This helps us with getting the correct global offset in minidump
+ // file later, when we will be setting up offsets from the
+ // the llvm::minidump::Module's structures into helper data
+ size_t size_before = GetCurrentDataEndOffset();
+
+ // This is the size of the main part of the ModuleList stream.
+ // It consists of a module number and corresponding number of
+ // structs describing individual modules
+ size_t module_stream_size =
+ sizeof(llvm::support::ulittle32_t) + modules_count * minidump_module_size;
+
+ // Adding directory describing this stream.
+ AddDirectory(StreamType::ModuleList, module_stream_size);
+
+ m_data.AppendData(&modules_count, sizeof(llvm::support::ulittle32_t));
+
+ // Temporary storage for the helper data (of variable length)
+ // as these cannot be dumped to m_data before dumping entire
+ // array of module structures.
+ DataBufferHeap helper_data;
+
+ for (size_t i = 0; i < modules_count; ++i) {
+ ModuleSP mod = modules.GetModuleAtIndex(i);
+ std::string module_name = mod->GetSpecificationDescription();
+ auto maybe_mod_size = getModuleFileSize(target, mod);
+ if (!maybe_mod_size) {
+ error.SetErrorStringWithFormat("Unable to get the size of module %s.",
+ module_name.c_str());
+ return error;
+ }
+
+ uint64_t mod_size = std::move(*maybe_mod_size);
+
+ llvm::support::ulittle32_t signature =
+ static_cast<llvm::support::ulittle32_t>(
+ static_cast<uint32_t>(minidump::CvSignature::ElfBuildId));
+ auto uuid = mod->GetUUID().GetBytes();
+
+ VSFixedFileInfo info;
+ info.Signature = static_cast<llvm::support::ulittle32_t>(0u);
+ info.StructVersion = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileVersionHigh = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileVersionLow = static_cast<llvm::support::ulittle32_t>(0u);
+ info.ProductVersionHigh = static_cast<llvm::support::ulittle32_t>(0u);
+ info.ProductVersionLow = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileFlagsMask = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileFlags = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileOS = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileType = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileSubtype = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileDateHigh = static_cast<llvm::support::ulittle32_t>(0u);
+ info.FileDateLow = static_cast<llvm::support::ulittle32_t>(0u);
+
+ LocationDescriptor ld;
+ ld.DataSize = static_cast<llvm::support::ulittle32_t>(0u);
+ ld.RVA = static_cast<llvm::support::ulittle32_t>(0u);
+
+ // Setting up LocationDescriptor for uuid string. The global offset into
+ // minidump file is calculated.
+ LocationDescriptor ld_cv;
+ ld_cv.DataSize = static_cast<llvm::support::ulittle32_t>(
+ sizeof(llvm::support::ulittle32_t) + uuid.size());
+ ld_cv.RVA = static_cast<llvm::support::ulittle32_t>(
+ size_before + module_stream_size + helper_data.GetByteSize());
+
+ helper_data.AppendData(&signature, sizeof(llvm::support::ulittle32_t));
+ helper_data.AppendData(uuid.begin(), uuid.size());
+
+ llvm::minidump::Module m;
+ m.BaseOfImage = static_cast<llvm::support::ulittle64_t>(
+ mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target));
+ m.SizeOfImage = static_cast<llvm::support::ulittle32_t>(mod_size);
+ m.Checksum = static_cast<llvm::support::ulittle32_t>(0);
+ m.TimeDateStamp = static_cast<llvm::support::ulittle32_t>(std::time(0));
+ m.ModuleNameRVA = static_cast<llvm::support::ulittle32_t>(
+ size_before + module_stream_size + helper_data.GetByteSize());
+ m.VersionInfo = info;
+ m.CvRecord = ld_cv;
+ m.MiscRecord = ld;
+
+ error = WriteString(module_name, &helper_data);
+
+ if (error.Fail())
+ return error;
+
+ m_data.AppendData(&m, sizeof(llvm::minidump::Module));
+ }
+
+ m_data.AppendData(helper_data.GetBytes(), helper_data.GetByteSize());
+ return error;
+}
+
+uint16_t read_register_u16_raw(RegisterContext *reg_ctx,
+ const std::string &reg_name) {
+ const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name);
+ if (!reg_info)
+ return 0;
+ lldb_private::RegisterValue reg_value;
+ bool success = reg_ctx->ReadRegister(reg_info, reg_value);
+ if (!success)
+ return 0;
+ return reg_value.GetAsUInt16();
+}
+
+uint32_t read_register_u32_raw(RegisterContext *reg_ctx,
+ const std::string &reg_name) {
+ const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name);
+ if (!reg_info)
+ return 0;
+ lldb_private::RegisterValue reg_value;
+ bool success = reg_ctx->ReadRegister(reg_info, reg_value);
+ if (!success)
+ return 0;
+ return reg_value.GetAsUInt32();
+}
+
+uint64_t read_register_u64_raw(RegisterContext *reg_ctx,
+ const std::string &reg_name) {
+ const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name);
+ if (!reg_info)
+ return 0;
+ lldb_private::RegisterValue reg_value;
+ bool success = reg_ctx->ReadRegister(reg_info, reg_value);
+ if (!success)
+ return 0;
+ return reg_value.GetAsUInt64();
+}
+
+llvm::support::ulittle16_t read_register_u16(RegisterContext *reg_ctx,
+ const std::string &reg_name) {
+ return static_cast<llvm::support::ulittle16_t>(
+ read_register_u16_raw(reg_ctx, reg_name));
+}
+
+llvm::support::ulittle32_t read_register_u32(RegisterContext *reg_ctx,
+ const std::string &reg_name) {
+ return static_cast<llvm::support::ulittle32_t>(
+ read_register_u32_raw(reg_ctx, reg_name));
+}
+
+llvm::support::ulittle64_t read_register_u64(RegisterContext *reg_ctx,
+ const std::string &reg_name) {
+ return static_cast<llvm::support::ulittle64_t>(
+ read_register_u64_raw(reg_ctx, reg_name));
+}
+
+lldb_private::minidump::MinidumpContext_x86_64
+GetThreadContext_64(RegisterContext *reg_ctx) {
+ lldb_private::minidump::MinidumpContext_x86_64 thread_context;
+ thread_context.context_flags = static_cast<uint32_t>(
+ lldb_private::minidump::MinidumpContext_x86_64_Flags::x86_64_Flag |
+ lldb_private::minidump::MinidumpContext_x86_64_Flags::Control |
+ lldb_private::minidump::MinidumpContext_x86_64_Flags::Segments |
+ lldb_private::minidump::MinidumpContext_x86_64_Flags::Integer);
+ thread_context.rax = read_register_u64(reg_ctx, "rax");
+ thread_context.rbx = read_register_u64(reg_ctx, "rbx");
+ thread_context.rcx = read_register_u64(reg_ctx, "rcx");
+ thread_context.rdx = read_register_u64(reg_ctx, "rdx");
+ thread_context.rdi = read_register_u64(reg_ctx, "rdi");
+ thread_context.rsi = read_register_u64(reg_ctx, "rsi");
+ thread_context.rbp = read_register_u64(reg_ctx, "rbp");
+ thread_context.rsp = read_register_u64(reg_ctx, "rsp");
+ thread_context.r8 = read_register_u64(reg_ctx, "r8");
+ thread_context.r9 = read_register_u64(reg_ctx, "r9");
+ thread_context.r10 = read_register_u64(reg_ctx, "r10");
+ thread_context.r11 = read_register_u64(reg_ctx, "r11");
+ thread_context.r12 = read_register_u64(reg_ctx, "r12");
+ thread_context.r13 = read_register_u64(reg_ctx, "r13");
+ thread_context.r14 = read_register_u64(reg_ctx, "r14");
+ thread_context.r15 = read_register_u64(reg_ctx, "r15");
+ thread_context.rip = read_register_u64(reg_ctx, "rip");
+ thread_context.eflags = read_register_u32(reg_ctx, "rflags");
+ thread_context.cs = read_register_u16(reg_ctx, "cs");
+ thread_context.fs = read_register_u16(reg_ctx, "fs");
+ thread_context.gs = read_register_u16(reg_ctx, "gs");
+ thread_context.ss = read_register_u16(reg_ctx, "ss");
+ thread_context.ds = read_register_u16(reg_ctx, "ds");
+ return thread_context;
+}
+
+// Function returns start and size of the memory region that contains
+// memory location pointed to by the current stack pointer.
+llvm::Expected<std::pair<addr_t, addr_t>>
+findStackHelper(const lldb::ProcessSP &process_sp, uint64_t rsp) {
+ MemoryRegionInfo range_info;
+ Status error = process_sp->GetMemoryRegionInfo(rsp, range_info);
+ // Skip failed memory region requests or any regions with no permissions.
+ if (error.Fail() || range_info.GetLLDBPermissions() == 0)
+ return llvm::createStringError(
+ std::errc::not_supported,
+ "unable to load stack segment of the process");
+
+ const addr_t addr = range_info.GetRange().GetRangeBase();
+ const addr_t size = range_info.GetRange().GetByteSize();
+
+ if (size == 0)
+ return llvm::createStringError(std::errc::not_supported,
+ "stack segment of the process is empty");
+
+ return std::make_pair(addr, size);
+}
+
+Status MinidumpFileBuilder::AddThreadList(const lldb::ProcessSP &process_sp) {
+ constexpr size_t minidump_thread_size = sizeof(llvm::minidump::Thread);
+ lldb_private::ThreadList thread_list = process_sp->GetThreadList();
+
+ // size of the entire thread stream consists of:
+ // number of threads and threads array
+ size_t thread_stream_size = sizeof(llvm::support::ulittle32_t) +
+ thread_list.GetSize() * minidump_thread_size;
+ // save for the ability to set up RVA
+ size_t size_before = GetCurrentDataEndOffset();
+
+ AddDirectory(StreamType::ThreadList, thread_stream_size);
+
+ llvm::support::ulittle32_t thread_count =
+ static_cast<llvm::support::ulittle32_t>(thread_list.GetSize());
+ m_data.AppendData(&thread_count, sizeof(llvm::support::ulittle32_t));
+
+ DataBufferHeap helper_data;
+
+ const uint32_t num_threads = thread_list.GetSize();
+
+ for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
+ ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx));
+ RegisterContextSP reg_ctx_sp(thread_sp->GetRegisterContext());
+ Status error;
+
+ if (!reg_ctx_sp) {
+ error.SetErrorString("Unable to get the register context.");
+ return error;
+ }
+ RegisterContext *reg_ctx = reg_ctx_sp.get();
+ auto thread_context = GetThreadContext_64(reg_ctx);
+ uint64_t rsp = read_register_u64_raw(reg_ctx, "rsp");
+ auto expected_address_range = findStackHelper(process_sp, rsp);
+
+ if (!expected_address_range) {
+ error.SetErrorString("Unable to get the stack address.");
+ return error;
+ }
+
+ std::pair<uint64_t, uint64_t> range = std::move(*expected_address_range);
+ uint64_t addr = range.first;
+ uint64_t size = range.second;
+
+ auto data_up = std::make_unique<DataBufferHeap>(size, 0);
+ const size_t stack_bytes_read =
+ process_sp->ReadMemory(addr, data_up->GetBytes(), size, error);
+
+ if (error.Fail())
+ return error;
+
+ LocationDescriptor stack_memory;
+ stack_memory.DataSize =
+ static_cast<llvm::support::ulittle32_t>(stack_bytes_read);
+ stack_memory.RVA = static_cast<llvm::support::ulittle32_t>(
+ size_before + thread_stream_size + helper_data.GetByteSize());
+
+ MemoryDescriptor stack;
+ stack.StartOfMemoryRange = static_cast<llvm::support::ulittle64_t>(addr);
+ stack.Memory = stack_memory;
+
+ helper_data.AppendData(data_up->GetBytes(), stack_bytes_read);
+
+ LocationDescriptor thread_context_memory_locator;
+ thread_context_memory_locator.DataSize =
+ static_cast<llvm::support::ulittle32_t>(sizeof(thread_context));
+ thread_context_memory_locator.RVA = static_cast<llvm::support::ulittle32_t>(
+ size_before + thread_stream_size + helper_data.GetByteSize());
+
+ helper_data.AppendData(
+ &thread_context,
+ sizeof(lldb_private::minidump::MinidumpContext_x86_64));
+
+ llvm::minidump::Thread t;
+ t.ThreadId = static_cast<llvm::support::ulittle32_t>(thread_sp->GetID());
+ t.SuspendCount = static_cast<llvm::support::ulittle32_t>(
+ (thread_sp->GetState() == StateType::eStateSuspended) ? 1 : 0);
+ t.PriorityClass = static_cast<llvm::support::ulittle32_t>(0);
+ t.Priority = static_cast<llvm::support::ulittle32_t>(0);
+ t.EnvironmentBlock = static_cast<llvm::support::ulittle64_t>(0);
+ t.Stack = stack, t.Context = thread_context_memory_locator;
+
+ m_data.AppendData(&t, sizeof(llvm::minidump::Thread));
+ }
+
+ m_data.AppendData(helper_data.GetBytes(), helper_data.GetByteSize());
+ return Status();
+}
+
+Status MinidumpFileBuilder::AddException(const lldb::ProcessSP &process_sp) {
+ Status error;
+ lldb_private::ThreadList thread_list = process_sp->GetThreadList();
+
+ const uint32_t num_threads = thread_list.GetSize();
+ uint32_t stop_reason_thread_idx = 0;
+ for (stop_reason_thread_idx = 0; stop_reason_thread_idx < num_threads;
+ ++stop_reason_thread_idx) {
+ ThreadSP thread_sp(thread_list.GetThreadAtIndex(stop_reason_thread_idx));
+ StopInfoSP stop_info_sp = thread_sp->GetStopInfo();
+
+ if (stop_info_sp && stop_info_sp->IsValid())
+ break;
+ }
+
+ if (stop_reason_thread_idx == num_threads) {
+ error.SetErrorString("No stop reason thread found.");
+ return error;
+ }
+
+ constexpr size_t minidump_exception_size =
+ sizeof(llvm::minidump::ExceptionStream);
+ AddDirectory(StreamType::Exception, minidump_exception_size);
+ size_t size_before = GetCurrentDataEndOffset();
+
+ ThreadSP thread_sp(thread_list.GetThreadAtIndex(stop_reason_thread_idx));
+ RegisterContextSP reg_ctx_sp(thread_sp->GetRegisterContext());
+ RegisterContext *reg_ctx = reg_ctx_sp.get();
+ auto thread_context = GetThreadContext_64(reg_ctx);
+ StopInfoSP stop_info_sp = thread_sp->GetStopInfo();
+
+ DataBufferHeap helper_data;
+
+ LocationDescriptor thread_context_memory_locator;
+ thread_context_memory_locator.DataSize =
+ static_cast<llvm::support::ulittle32_t>(sizeof(thread_context));
+ thread_context_memory_locator.RVA = static_cast<llvm::support::ulittle32_t>(
+ size_before + minidump_exception_size + helper_data.GetByteSize());
+
+ helper_data.AppendData(
+ &thread_context, sizeof(lldb_private::minidump::MinidumpContext_x86_64));
+
+ Exception exp_record;
+ exp_record.ExceptionCode =
+ static_cast<llvm::support::ulittle32_t>(stop_info_sp->GetValue());
+ exp_record.ExceptionFlags = static_cast<llvm::support::ulittle32_t>(0);
+ exp_record.ExceptionRecord = static_cast<llvm::support::ulittle64_t>(0);
+ exp_record.ExceptionAddress = read_register_u64(reg_ctx, "rip");
+ exp_record.NumberParameters = static_cast<llvm::support::ulittle32_t>(0);
+ exp_record.UnusedAlignment = static_cast<llvm::support::ulittle32_t>(0);
+ // exp_record.ExceptionInformation;
+
+ ExceptionStream exp_stream;
+ exp_stream.ThreadId =
+ static_cast<llvm::support::ulittle32_t>(thread_sp->GetID());
+ exp_stream.UnusedAlignment = static_cast<llvm::support::ulittle32_t>(0);
+ exp_stream.ExceptionRecord = exp_record;
+ exp_stream.ThreadContext = thread_context_memory_locator;
+
+ m_data.AppendData(&exp_stream, minidump_exception_size);
+ m_data.AppendData(helper_data.GetBytes(), helper_data.GetByteSize());
+ return error;
+}
+
+lldb_private::Status
+MinidumpFileBuilder::AddMemoryList(const lldb::ProcessSP &process_sp) {
+ Status error;
+
+ if (error.Fail()) {
+ error.SetErrorString("Process doesn't support getting memory region info.");
+ return error;
+ }
+
+ // Get interesting addresses
+ std::vector<size_t> interesting_addresses;
+ auto thread_list = process_sp->GetThreadList();
+ for (size_t i = 0; i < thread_list.GetSize(); ++i) {
+ ThreadSP thread_sp(thread_list.GetThreadAtIndex(i));
+ RegisterContextSP reg_ctx_sp(thread_sp->GetRegisterContext());
+ RegisterContext *reg_ctx = reg_ctx_sp.get();
+
+ interesting_addresses.push_back(read_register_u64(reg_ctx, "rsp"));
+ interesting_addresses.push_back(read_register_u64(reg_ctx, "rip"));
+ }
+
+ DataBufferHeap helper_data;
+ std::vector<MemoryDescriptor> mem_descriptors;
+
+ std::set<addr_t> visited_region_base_addresses;
+ for (size_t interesting_address : interesting_addresses) {
+ MemoryRegionInfo range_info;
+ error = process_sp->GetMemoryRegionInfo(interesting_address, range_info);
+ // Skip failed memory region requests or any regions with no permissions.
+ if (error.Fail() || range_info.GetLLDBPermissions() == 0)
+ continue;
+ const addr_t addr = range_info.GetRange().GetRangeBase();
+ // Skip any regions we have already saved out.
+ if (visited_region_base_addresses.insert(addr).second == false)
+ continue;
+ const addr_t size = range_info.GetRange().GetByteSize();
+ if (size == 0)
+ continue;
+ auto data_up = std::make_unique<DataBufferHeap>(size, 0);
+ const size_t bytes_read =
+ process_sp->ReadMemory(addr, data_up->GetBytes(), size, error);
+ if (bytes_read == 0)
+ continue;
+ // We have a good memory region with valid bytes to store.
+ LocationDescriptor memory_dump;
+ memory_dump.DataSize = static_cast<llvm::support::ulittle32_t>(bytes_read);
+ memory_dump.RVA =
+ static_cast<llvm::support::ulittle32_t>(GetCurrentDataEndOffset());
+ MemoryDescriptor memory_desc;
+ memory_desc.StartOfMemoryRange =
+ static_cast<llvm::support::ulittle64_t>(addr);
+ memory_desc.Memory = memory_dump;
+ mem_descriptors.push_back(memory_desc);
+ m_data.AppendData(data_up->GetBytes(), bytes_read);
+ }
+
+ AddDirectory(StreamType::MemoryList,
+ sizeof(llvm::support::ulittle32_t) +
+ mem_descriptors.size() *
+ sizeof(llvm::minidump::MemoryDescriptor));
+ llvm::support::ulittle32_t memory_ranges_num(mem_descriptors.size());
+
+ m_data.AppendData(&memory_ranges_num, sizeof(llvm::support::ulittle32_t));
+ for (auto memory_descriptor : mem_descriptors) {
+ m_data.AppendData(&memory_descriptor,
+ sizeof(llvm::minidump::MemoryDescriptor));
+ }
+
+ return error;
+}
+
+void MinidumpFileBuilder::AddMiscInfo(const lldb::ProcessSP &process_sp) {
+ AddDirectory(StreamType::MiscInfo,
+ sizeof(lldb_private::minidump::MinidumpMiscInfo));
+
+ lldb_private::minidump::MinidumpMiscInfo misc_info;
+ misc_info.size = static_cast<llvm::support::ulittle32_t>(
+ sizeof(lldb_private::minidump::MinidumpMiscInfo));
+ // Default set flags1 to 0, in case that we will not be able to
+ // get any information
+ misc_info.flags1 = static_cast<llvm::support::ulittle32_t>(0);
+
+ lldb_private::ProcessInstanceInfo process_info;
+ process_sp->GetProcessInfo(process_info);
+ if (process_info.ProcessIDIsValid()) {
+ // Set flags1 to reflect that PID is filled in
+ misc_info.flags1 =
+ static_cast<llvm::support::ulittle32_t>(static_cast<uint32_t>(
+ lldb_private::minidump::MinidumpMiscInfoFlags::ProcessID));
+ misc_info.process_id =
+ static_cast<llvm::support::ulittle32_t>(process_info.GetProcessID());
+ }
+
+ m_data.AppendData(&misc_info,
+ sizeof(lldb_private::minidump::MinidumpMiscInfo));
+}
+
+std::unique_ptr<llvm::MemoryBuffer>
+getFileStreamHelper(const std::string &path) {
+ auto maybe_stream = llvm::MemoryBuffer::getFileAsStream(path);
+ if (!maybe_stream)
+ return nullptr;
+ return std::move(maybe_stream.get());
+}
+
+void MinidumpFileBuilder::AddLinuxFileStreams(
+ const lldb::ProcessSP &process_sp) {
+ std::vector<std::pair<StreamType, std::string>> files_with_stream_types = {
+ {StreamType::LinuxCPUInfo, "/proc/cpuinfo"},
+ {StreamType::LinuxLSBRelease, "/etc/lsb-release"},
+ };
+
+ lldb_private::ProcessInstanceInfo process_info;
+ process_sp->GetProcessInfo(process_info);
+ if (process_info.ProcessIDIsValid()) {
+ lldb::pid_t pid = process_info.GetProcessID();
+ std::string pid_str = std::to_string(pid);
+ files_with_stream_types.push_back(
+ {StreamType::LinuxProcStatus, "/proc/" + pid_str + "/status"});
+ files_with_stream_types.push_back(
+ {StreamType::LinuxCMDLine, "/proc/" + pid_str + "/cmdline"});
+ files_with_stream_types.push_back(
+ {StreamType::LinuxEnviron, "/proc/" + pid_str + "/environ"});
+ files_with_stream_types.push_back(
+ {StreamType::LinuxAuxv, "/proc/" + pid_str + "/auxv"});
+ files_with_stream_types.push_back(
+ {StreamType::LinuxMaps, "/proc/" + pid_str + "/maps"});
+ files_with_stream_types.push_back(
+ {StreamType::LinuxProcStat, "/proc/" + pid_str + "/stat"});
+ files_with_stream_types.push_back(
+ {StreamType::LinuxProcFD, "/proc/" + pid_str + "/fd"});
+ }
+
+ for (const auto &entry : files_with_stream_types) {
+ StreamType stream = entry.first;
+ std::string path = entry.second;
+ auto memory_buffer = getFileStreamHelper(path);
+
+ if (memory_buffer) {
+ size_t size = memory_buffer->getBufferSize();
+ if (size == 0)
+ continue;
+ AddDirectory(stream, size);
+ m_data.AppendData(memory_buffer->getBufferStart(), size);
+ }
+ }
+}
+
+Status MinidumpFileBuilder::Dump(lldb::FileUP &core_file) const {
+ constexpr size_t header_size = sizeof(llvm::minidump::Header);
+ constexpr size_t directory_size = sizeof(llvm::minidump::Directory);
+
+ // write header
+ llvm::minidump::Header header;
+ header.Signature = static_cast<llvm::support::ulittle32_t>(
+ llvm::minidump::Header::MagicSignature);
+ header.Version = static_cast<llvm::support::ulittle32_t>(
+ llvm::minidump::Header::MagicVersion);
+ header.NumberOfStreams =
+ static_cast<llvm::support::ulittle32_t>(GetDirectoriesNum());
+ header.StreamDirectoryRVA =
+ static_cast<llvm::support::ulittle32_t>(GetCurrentDataEndOffset());
+ header.Checksum = static_cast<llvm::support::ulittle32_t>(
+ 0u), // not used in most of the writers
+ header.TimeDateStamp =
+ static_cast<llvm::support::ulittle32_t>(std::time(0));
+ header.Flags =
+ static_cast<llvm::support::ulittle64_t>(0u); // minidump normal flag
+
+ Status error;
+ size_t bytes_written;
+
+ bytes_written = header_size;
+ error = core_file->Write(&header, bytes_written);
+ if (error.Fail() || bytes_written != header_size) {
+ if (bytes_written != header_size)
+ error.SetErrorStringWithFormat(
+ "unable to write the header (written %zd/%zd)", bytes_written,
+ header_size);
+ return error;
+ }
+
+ // write data
+ bytes_written = m_data.GetByteSize();
+ error = core_file->Write(m_data.GetBytes(), bytes_written);
+ if (error.Fail() || bytes_written != m_data.GetByteSize()) {
+ if (bytes_written != m_data.GetByteSize())
+ error.SetErrorStringWithFormat(
+ "unable to write the data (written %zd/%" PRIu64 ")", bytes_written,
+ m_data.GetByteSize());
+ return error;
+ }
+
+ // write directories
+ for (const Directory &dir : m_directories) {
+ bytes_written = directory_size;
+ error = core_file->Write(&dir, bytes_written);
+ if (error.Fail() || bytes_written != directory_size) {
+ if (bytes_written != directory_size)
+ error.SetErrorStringWithFormat(
+ "unable to write the directory (written %zd/%zd)", bytes_written,
+ directory_size);
+ return error;
+ }
+ }
+
+ return error;
+}
+
+size_t MinidumpFileBuilder::GetDirectoriesNum() const {
+ return m_directories.size();
+}
+
+size_t MinidumpFileBuilder::GetCurrentDataEndOffset() const {
+ return sizeof(llvm::minidump::Header) + m_data.GetByteSize();
+}
diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
new file mode 100644
index 000000000000..f4017fb66384
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h
@@ -0,0 +1,92 @@
+//===-- MinidumpFileBuilder.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Structure holding data neccessary for minidump file creation.
+///
+/// The class MinidumpFileWriter is used to hold the data that will eventually
+/// be dumped to the file.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_MINIDUMPFILEBUILDER_H
+#define LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_MINIDUMPFILEBUILDER_H
+
+#include <cstddef>
+
+#include "lldb/Target/Target.h"
+#include "lldb/Utility/DataBufferHeap.h"
+#include "lldb/Utility/Status.h"
+
+#include "llvm/Object/Minidump.h"
+
+// Write std::string to minidump in the UTF16 format(with null termination char)
+// with the size(without null termination char) preceding the UTF16 string.
+// Empty strings are also printed with zero length and just null termination
+// char.
+lldb_private::Status WriteString(const std::string &to_write,
+ lldb_private::DataBufferHeap *buffer);
+
+/// \class MinidumpFileBuilder
+/// Minidump writer for Linux
+///
+/// This class provides a Minidump writer that is able to
+/// snapshot the current process state. For the whole time, it stores all
+/// the data on heap.
+class MinidumpFileBuilder {
+public:
+ MinidumpFileBuilder() = default;
+
+ MinidumpFileBuilder(const MinidumpFileBuilder &) = delete;
+ MinidumpFileBuilder &operator=(const MinidumpFileBuilder &) = delete;
+
+ MinidumpFileBuilder(MinidumpFileBuilder &&other) = default;
+ MinidumpFileBuilder &operator=(MinidumpFileBuilder &&other) = default;
+
+ ~MinidumpFileBuilder() = default;
+
+ // Add SystemInfo stream, used for storing the most basic information
+ // about the system, platform etc...
+ lldb_private::Status AddSystemInfo(const llvm::Triple &target_triple);
+ // Add ModuleList stream, containing information about all loaded modules
+ // at the time of saving minidump.
+ lldb_private::Status AddModuleList(lldb_private::Target &target);
+ // Add ThreadList stream, containing information about all threads running
+ // at the moment of core saving. Contains information about thread
+ // contexts.
+ lldb_private::Status AddThreadList(const lldb::ProcessSP &process_sp);
+ // Add Exception stream, this contains information about the exception
+ // that stopped the process. In case no thread made exception it return
+ // failed status.
+ lldb_private::Status AddException(const lldb::ProcessSP &process_sp);
+ // Add MemoryList stream, containing dumps of important memory segments
+ lldb_private::Status AddMemoryList(const lldb::ProcessSP &process_sp);
+ // Add MiscInfo stream, mainly providing ProcessId
+ void AddMiscInfo(const lldb::ProcessSP &process_sp);
+ // Add informative files about a Linux process
+ void AddLinuxFileStreams(const lldb::ProcessSP &process_sp);
+ // Dump the prepared data into file. In case of the failure data are
+ // intact.
+ lldb_private::Status Dump(lldb::FileUP &core_file) const;
+ // Returns the current number of directories(streams) that have been so far
+ // created. This number of directories will be dumped when calling Dump()
+ size_t GetDirectoriesNum() const;
+
+private:
+ // Add directory of StreamType pointing to the current end of the prepared
+ // file with the specified size.
+ void AddDirectory(llvm::minidump::StreamType type, size_t stream_size);
+ size_t GetCurrentDataEndOffset() const;
+
+ // Stores directories to later put them at the end of minidump file
+ std::vector<llvm::minidump::Directory> m_directories;
+ // Main data buffer consisting of data without the minidump header and
+ // directories
+ lldb_private::DataBufferHeap m_data;
+};
+
+#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_MINIDUMPFILEBUILDER_H
diff --git a/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp
new file mode 100644
index 000000000000..715ccd311dee
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp
@@ -0,0 +1,114 @@
+//===-- ObjectFileMinidump.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ObjectFileMinidump.h"
+
+#include "MinidumpFileBuilder.h"
+
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Core/Section.h"
+#include "lldb/Target/Process.h"
+
+#include "llvm/Support/FileSystem.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+LLDB_PLUGIN_DEFINE(ObjectFileMinidump)
+
+void ObjectFileMinidump::Initialize() {
+ PluginManager::RegisterPlugin(
+ GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
+ CreateMemoryInstance, GetModuleSpecifications, SaveCore);
+}
+
+void ObjectFileMinidump::Terminate() {
+ PluginManager::UnregisterPlugin(CreateInstance);
+}
+
+ObjectFile *ObjectFileMinidump::CreateInstance(
+ const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
+ lldb::offset_t data_offset, const lldb_private::FileSpec *file,
+ lldb::offset_t offset, lldb::offset_t length) {
+ return nullptr;
+}
+
+ObjectFile *ObjectFileMinidump::CreateMemoryInstance(
+ const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
+ const ProcessSP &process_sp, lldb::addr_t header_addr) {
+ return nullptr;
+}
+
+size_t ObjectFileMinidump::GetModuleSpecifications(
+ const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
+ lldb::offset_t data_offset, lldb::offset_t file_offset,
+ lldb::offset_t length, lldb_private::ModuleSpecList &specs) {
+ specs.Clear();
+ return 0;
+}
+
+bool ObjectFileMinidump::SaveCore(const lldb::ProcessSP &process_sp,
+ const lldb_private::FileSpec &outfile,
+ lldb::SaveCoreStyle &core_style,
+ lldb_private::Status &error) {
+ if (core_style != SaveCoreStyle::eSaveCoreStackOnly) {
+ error.SetErrorString("Only stack minidumps supported yet.");
+ return false;
+ }
+
+ if (!process_sp)
+ return false;
+
+ MinidumpFileBuilder builder;
+
+ Target &target = process_sp->GetTarget();
+
+ error = builder.AddSystemInfo(target.GetArchitecture().GetTriple());
+ if (error.Fail())
+ return false;
+
+ error = builder.AddModuleList(target);
+ if (error.Fail())
+ return false;
+
+ builder.AddMiscInfo(process_sp);
+
+ if (target.GetArchitecture().GetMachine() == llvm::Triple::ArchType::x86_64) {
+ error = builder.AddThreadList(process_sp);
+ if (error.Fail())
+ return false;
+
+ error = builder.AddException(process_sp);
+ if (error.Fail())
+ return false;
+
+ error = builder.AddMemoryList(process_sp);
+ if (error.Fail())
+ return false;
+ }
+
+ if (target.GetArchitecture().GetTriple().getOS() ==
+ llvm::Triple::OSType::Linux) {
+ builder.AddLinuxFileStreams(process_sp);
+ }
+
+ llvm::Expected<lldb::FileUP> maybe_core_file = FileSystem::Instance().Open(
+ outfile, File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate);
+ if (!maybe_core_file) {
+ error = maybe_core_file.takeError();
+ return false;
+ }
+ lldb::FileUP core_file = std::move(maybe_core_file.get());
+
+ error = builder.Dump(core_file);
+ if (error.Fail())
+ return false;
+
+ return true;
+}
diff --git a/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h
new file mode 100644
index 000000000000..3e4d55dc6c8c
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h
@@ -0,0 +1,66 @@
+//===-- ObjectFileMinidump.h ---------------------------------- -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Placeholder plugin for the save core functionality.
+///
+/// ObjectFileMinidump is created only to be able to save minidump core files
+/// from existing processes with the ObjectFileMinidump::SaveCore function.
+/// Minidump files are not ObjectFile objects, but they are core files and
+/// currently LLDB's ObjectFile plug-ins handle emitting core files. If the
+/// core file saving ever moves into a new plug-in type within LLDB, this code
+/// should move as well, but for now this is the best place architecturally.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H
+#define LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H
+
+#include "lldb/Symbol/ObjectFile.h"
+#include "lldb/Utility/ArchSpec.h"
+
+class ObjectFileMinidump : public lldb_private::PluginInterface {
+public:
+ // Static Functions
+ static void Initialize();
+ static void Terminate();
+
+ static llvm::StringRef GetPluginNameStatic() { return "minidump"; }
+ static const char *GetPluginDescriptionStatic() {
+ return "Minidump object file.";
+ }
+
+ // PluginInterface protocol
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+ static lldb_private::ObjectFile *
+ CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
+ lldb::offset_t data_offset, const lldb_private::FileSpec *file,
+ lldb::offset_t offset, lldb::offset_t length);
+
+ static lldb_private::ObjectFile *CreateMemoryInstance(
+ const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
+ const lldb::ProcessSP &process_sp, lldb::addr_t header_addr);
+
+ static size_t GetModuleSpecifications(const lldb_private::FileSpec &file,
+ lldb::DataBufferSP &data_sp,
+ lldb::offset_t data_offset,
+ lldb::offset_t file_offset,
+ lldb::offset_t length,
+ lldb_private::ModuleSpecList &specs);
+
+ // Saves dump in Minidump file format
+ static bool SaveCore(const lldb::ProcessSP &process_sp,
+ const lldb_private::FileSpec &outfile,
+ lldb::SaveCoreStyle &core_style,
+ lldb_private::Status &error);
+
+private:
+ ObjectFileMinidump() = default;
+};
+
+#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H
diff --git a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp
index cb7bbeeca054..b63cd8e70899 100644
--- a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp
+++ b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp
@@ -46,11 +46,6 @@ void ObjectFilePDB::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString ObjectFilePDB::GetPluginNameStatic() {
- static ConstString g_name("pdb");
- return g_name;
-}
-
ArchSpec ObjectFilePDB::GetArchitecture() {
auto dbi_stream = m_file_up->getPDBDbiStream();
if (!dbi_stream) {
diff --git a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h
index 19dd46b31406..36e71e21332f 100644
--- a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h
+++ b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h
@@ -22,7 +22,7 @@ public:
static void Initialize();
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "pdb"; }
static const char *GetPluginDescriptionStatic() {
return "PDB object file reader.";
}
@@ -48,9 +48,7 @@ public:
ModuleSpecList &specs);
// PluginInterface protocol
- ConstString GetPluginName() override { return GetPluginNameStatic(); }
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// LLVM RTTI support
static char ID;
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 5272da9ab33a..0e6329885528 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -86,11 +86,6 @@ void ObjectFileWasm::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString ObjectFileWasm::GetPluginNameStatic() {
- static ConstString g_name("wasm");
- return g_name;
-}
-
ObjectFile *
ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP &data_sp,
offset_t data_offset, const FileSpec *file,
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h
index b6e906a7b15f..44939b6d4ea0 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h
@@ -24,7 +24,7 @@ public:
static void Initialize();
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "wasm"; }
static const char *GetPluginDescriptionStatic() {
return "WebAssembly object file reader.";
}
@@ -48,8 +48,7 @@ public:
/// PluginInterface protocol.
/// \{
- ConstString GetPluginName() override { return GetPluginNameStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
/// \}
/// LLVM RTTI support
diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp
index 730c88f96e13..7d14f02e68f7 100644
--- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp
+++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp
@@ -12,7 +12,6 @@
#include "OperatingSystemPython.h"
-#include "Plugins/Process/Utility/DynamicRegisterInfo.h"
#include "Plugins/Process/Utility/RegisterContextDummy.h"
#include "Plugins/Process/Utility/RegisterContextMemory.h"
#include "Plugins/Process/Utility/ThreadMemory.h"
@@ -66,12 +65,7 @@ OperatingSystem *OperatingSystemPython::CreateInstance(Process *process,
return nullptr;
}
-ConstString OperatingSystemPython::GetPluginNameStatic() {
- static ConstString g_name("python");
- return g_name;
-}
-
-const char *OperatingSystemPython::GetPluginDescriptionStatic() {
+llvm::StringRef OperatingSystemPython::GetPluginDescriptionStatic() {
return "Operating system plug-in that gathers OS information from a python "
"class that implements the necessary OperatingSystem functionality.";
}
@@ -141,13 +135,6 @@ DynamicRegisterInfo *OperatingSystemPython::GetDynamicRegisterInfo() {
return m_register_info_up.get();
}
-// PluginInterface protocol
-ConstString OperatingSystemPython::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t OperatingSystemPython::GetPluginVersion() { return 1; }
-
bool OperatingSystemPython::UpdateThreadList(ThreadList &old_thread_list,
ThreadList &core_thread_list,
ThreadList &new_thread_list) {
diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h
index 4a594cf29a08..7800cf03af8e 100644
--- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h
+++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h
@@ -13,11 +13,10 @@
#if LLDB_ENABLE_PYTHON
+#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Target/OperatingSystem.h"
#include "lldb/Utility/StructuredData.h"
-class DynamicRegisterInfo;
-
namespace lldb_private {
class ScriptInterpreter;
}
@@ -37,14 +36,12 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "python"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
// lldb_private::PluginInterface Methods
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// lldb_private::OperatingSystem Methods
bool UpdateThreadList(lldb_private::ThreadList &old_thread_list,
@@ -74,14 +71,14 @@ protected:
lldb_private::ThreadList &old_thread_list,
std::vector<bool> &core_used_map, bool *did_create_ptr);
- DynamicRegisterInfo *GetDynamicRegisterInfo();
+ lldb_private::DynamicRegisterInfo *GetDynamicRegisterInfo();
lldb::ValueObjectSP m_thread_list_valobj_sp;
- std::unique_ptr<DynamicRegisterInfo> m_register_info_up;
+ std::unique_ptr<lldb_private::DynamicRegisterInfo> m_register_info_up;
lldb_private::ScriptInterpreter *m_interpreter;
lldb_private::StructuredData::ObjectSP m_python_object_sp;
};
-#endif
+#endif // LLDB_ENABLE_PYTHON
#endif // liblldb_OperatingSystemPython_h_
diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
index 7b3d8a375bf6..754d06de7cb9 100644
--- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
+++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
@@ -76,25 +76,10 @@ PlatformSP PlatformFreeBSD::CreateInstance(bool force, const ArchSpec *arch) {
return PlatformSP();
}
-ConstString PlatformFreeBSD::GetPluginNameStatic(bool is_host) {
- if (is_host) {
- static ConstString g_host_name(Platform::GetHostPlatformName());
- return g_host_name;
- } else {
- static ConstString g_remote_name("remote-freebsd");
- return g_remote_name;
- }
-}
-
-const char *PlatformFreeBSD::GetPluginDescriptionStatic(bool is_host) {
+llvm::StringRef PlatformFreeBSD::GetPluginDescriptionStatic(bool is_host) {
if (is_host)
return "Local FreeBSD user platform plug-in.";
- else
- return "Remote FreeBSD user platform plug-in.";
-}
-
-ConstString PlatformFreeBSD::GetPluginName() {
- return GetPluginNameStatic(IsHost());
+ return "Remote FreeBSD user platform plug-in.";
}
void PlatformFreeBSD::Initialize() {
@@ -126,72 +111,27 @@ void PlatformFreeBSD::Terminate() {
/// Default Constructor
PlatformFreeBSD::PlatformFreeBSD(bool is_host)
: PlatformPOSIX(is_host) // This is the local host platform
-{}
-
-bool PlatformFreeBSD::GetSupportedArchitectureAtIndex(uint32_t idx,
- ArchSpec &arch) {
- if (IsHost()) {
+{
+ if (is_host) {
ArchSpec hostArch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault);
- if (hostArch.GetTriple().isOSFreeBSD()) {
- if (idx == 0) {
- arch = hostArch;
- return arch.IsValid();
- } else if (idx == 1) {
- // If the default host architecture is 64-bit, look for a 32-bit
- // variant
- if (hostArch.IsValid() && hostArch.GetTriple().isArch64Bit()) {
- arch = HostInfo::GetArchitecture(HostInfo::eArchKind32);
- return arch.IsValid();
- }
- }
+ m_supported_architectures.push_back(hostArch);
+ if (hostArch.GetTriple().isArch64Bit()) {
+ m_supported_architectures.push_back(
+ HostInfo::GetArchitecture(HostInfo::eArchKind32));
}
} else {
- if (m_remote_platform_sp)
- return m_remote_platform_sp->GetSupportedArchitectureAtIndex(idx, arch);
-
- llvm::Triple triple;
- // Set the OS to FreeBSD
- triple.setOS(llvm::Triple::FreeBSD);
- // Set the architecture
- switch (idx) {
- case 0:
- triple.setArchName("x86_64");
- break;
- case 1:
- triple.setArchName("i386");
- break;
- case 2:
- triple.setArchName("aarch64");
- break;
- case 3:
- triple.setArchName("arm");
- break;
- case 4:
- triple.setArchName("mips64");
- break;
- case 5:
- triple.setArchName("mips");
- break;
- case 6:
- triple.setArchName("ppc64");
- break;
- case 7:
- triple.setArchName("ppc");
- break;
- default:
- return false;
- }
- // Leave the vendor as "llvm::Triple:UnknownVendor" and don't specify the
- // vendor by calling triple.SetVendorName("unknown") so that it is a
- // "unspecified unknown". This means when someone calls
- // triple.GetVendorName() it will return an empty string which indicates
- // that the vendor can be set when two architectures are merged
-
- // Now set the triple into "arch" and return true
- arch.SetTriple(triple);
- return true;
+ m_supported_architectures = CreateArchList(
+ {llvm::Triple::x86_64, llvm::Triple::x86, llvm::Triple::aarch64,
+ llvm::Triple::arm, llvm::Triple::mips64, llvm::Triple::ppc64,
+ llvm::Triple::ppc},
+ llvm::Triple::FreeBSD);
}
- return false;
+}
+
+std::vector<ArchSpec> PlatformFreeBSD::GetSupportedArchitectures() {
+ if (m_remote_platform_sp)
+ return m_remote_platform_sp->GetSupportedArchitectures();
+ return m_supported_architectures;
}
void PlatformFreeBSD::GetStatus(Stream &strm) {
diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h
index 4fd10fb1be73..fd37b13de017 100644
--- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h
+++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h
@@ -25,22 +25,24 @@ public:
// lldb_private::PluginInterface functions
static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch);
- static ConstString GetPluginNameStatic(bool is_host);
-
- static const char *GetPluginDescriptionStatic(bool is_host);
+ static llvm::StringRef GetPluginNameStatic(bool is_host) {
+ return is_host ? Platform::GetHostPlatformName() : "remote-freebsd";
+ }
- ConstString GetPluginName() override;
+ static llvm::StringRef GetPluginDescriptionStatic(bool is_host);
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override {
+ return GetPluginNameStatic(IsHost());
+ }
// lldb_private::Platform functions
- const char *GetDescription() override {
+ llvm::StringRef GetDescription() override {
return GetPluginDescriptionStatic(IsHost());
}
void GetStatus(Stream &strm) override;
- bool GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) override;
+ std::vector<ArchSpec> GetSupportedArchitectures() override;
bool CanDebugProcess() override;
@@ -50,6 +52,8 @@ public:
lldb::addr_t length, unsigned prot,
unsigned flags, lldb::addr_t fd,
lldb::addr_t offset) override;
+
+ std::vector<ArchSpec> m_supported_architectures;
};
} // namespace platform_freebsd
diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
index e3682b44e141..552b3890615c 100644
--- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
+++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
@@ -65,25 +65,10 @@ PlatformSP PlatformNetBSD::CreateInstance(bool force, const ArchSpec *arch) {
return PlatformSP();
}
-ConstString PlatformNetBSD::GetPluginNameStatic(bool is_host) {
- if (is_host) {
- static ConstString g_host_name(Platform::GetHostPlatformName());
- return g_host_name;
- } else {
- static ConstString g_remote_name("remote-netbsd");
- return g_remote_name;
- }
-}
-
-const char *PlatformNetBSD::GetPluginDescriptionStatic(bool is_host) {
+llvm::StringRef PlatformNetBSD::GetPluginDescriptionStatic(bool is_host) {
if (is_host)
return "Local NetBSD user platform plug-in.";
- else
- return "Remote NetBSD user platform plug-in.";
-}
-
-ConstString PlatformNetBSD::GetPluginName() {
- return GetPluginNameStatic(IsHost());
+ return "Remote NetBSD user platform plug-in.";
}
void PlatformNetBSD::Initialize() {
@@ -115,54 +100,24 @@ void PlatformNetBSD::Terminate() {
/// Default Constructor
PlatformNetBSD::PlatformNetBSD(bool is_host)
: PlatformPOSIX(is_host) // This is the local host platform
-{}
-
-bool PlatformNetBSD::GetSupportedArchitectureAtIndex(uint32_t idx,
- ArchSpec &arch) {
- if (IsHost()) {
+{
+ if (is_host) {
ArchSpec hostArch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault);
- if (hostArch.GetTriple().isOSNetBSD()) {
- if (idx == 0) {
- arch = hostArch;
- return arch.IsValid();
- } else if (idx == 1) {
- // If the default host architecture is 64-bit, look for a 32-bit
- // variant
- if (hostArch.IsValid() && hostArch.GetTriple().isArch64Bit()) {
- arch = HostInfo::GetArchitecture(HostInfo::eArchKind32);
- return arch.IsValid();
- }
- }
+ m_supported_architectures.push_back(hostArch);
+ if (hostArch.GetTriple().isArch64Bit()) {
+ m_supported_architectures.push_back(
+ HostInfo::GetArchitecture(HostInfo::eArchKind32));
}
} else {
- if (m_remote_platform_sp)
- return m_remote_platform_sp->GetSupportedArchitectureAtIndex(idx, arch);
-
- llvm::Triple triple;
- // Set the OS to NetBSD
- triple.setOS(llvm::Triple::NetBSD);
- // Set the architecture
- switch (idx) {
- case 0:
- triple.setArchName("x86_64");
- break;
- case 1:
- triple.setArchName("i386");
- break;
- default:
- return false;
- }
- // Leave the vendor as "llvm::Triple:UnknownVendor" and don't specify the
- // vendor by calling triple.SetVendorName("unknown") so that it is a
- // "unspecified unknown". This means when someone calls
- // triple.GetVendorName() it will return an empty string which indicates
- // that the vendor can be set when two architectures are merged
-
- // Now set the triple into "arch" and return true
- arch.SetTriple(triple);
- return true;
+ m_supported_architectures = CreateArchList(
+ {llvm::Triple::x86_64, llvm::Triple::x86}, llvm::Triple::NetBSD);
}
- return false;
+}
+
+std::vector<ArchSpec> PlatformNetBSD::GetSupportedArchitectures() {
+ if (m_remote_platform_sp)
+ return m_remote_platform_sp->GetSupportedArchitectures();
+ return m_supported_architectures;
}
void PlatformNetBSD::GetStatus(Stream &strm) {
diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h
index e664f5181123..7158fbd26efb 100644
--- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h
+++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h
@@ -25,22 +25,24 @@ public:
// lldb_private::PluginInterface functions
static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch);
- static ConstString GetPluginNameStatic(bool is_host);
-
- static const char *GetPluginDescriptionStatic(bool is_host);
+ static llvm::StringRef GetPluginNameStatic(bool is_host) {
+ return is_host ? Platform::GetHostPlatformName() : "remote-netbsd";
+ }
- ConstString GetPluginName() override;
+ static llvm::StringRef GetPluginDescriptionStatic(bool is_host);
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override {
+ return GetPluginNameStatic(IsHost());
+ }
// lldb_private::Platform functions
- const char *GetDescription() override {
+ llvm::StringRef GetDescription() override {
return GetPluginDescriptionStatic(IsHost());
}
void GetStatus(Stream &strm) override;
- bool GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) override;
+ std::vector<ArchSpec> GetSupportedArchitectures() override;
uint32_t GetResumeCountForLaunchInfo(ProcessLaunchInfo &launch_info) override;
@@ -52,6 +54,8 @@ public:
lldb::addr_t length, unsigned prot,
unsigned flags, lldb::addr_t fd,
lldb::addr_t offset) override;
+
+ std::vector<ArchSpec> m_supported_architectures;
};
} // namespace platform_netbsd
diff --git a/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.cpp b/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.cpp
index 012b688231a0..84d9ff799f47 100644
--- a/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.cpp
+++ b/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.cpp
@@ -71,25 +71,10 @@ PlatformSP PlatformOpenBSD::CreateInstance(bool force, const ArchSpec *arch) {
return PlatformSP();
}
-ConstString PlatformOpenBSD::GetPluginNameStatic(bool is_host) {
- if (is_host) {
- static ConstString g_host_name(Platform::GetHostPlatformName());
- return g_host_name;
- } else {
- static ConstString g_remote_name("remote-openbsd");
- return g_remote_name;
- }
-}
-
-const char *PlatformOpenBSD::GetPluginDescriptionStatic(bool is_host) {
+llvm::StringRef PlatformOpenBSD::GetPluginDescriptionStatic(bool is_host) {
if (is_host)
return "Local OpenBSD user platform plug-in.";
- else
- return "Remote OpenBSD user platform plug-in.";
-}
-
-ConstString PlatformOpenBSD::GetPluginName() {
- return GetPluginNameStatic(IsHost());
+ return "Remote OpenBSD user platform plug-in.";
}
void PlatformOpenBSD::Initialize() {
@@ -121,53 +106,21 @@ void PlatformOpenBSD::Terminate() {
/// Default Constructor
PlatformOpenBSD::PlatformOpenBSD(bool is_host)
: PlatformPOSIX(is_host) // This is the local host platform
-{}
-
-bool PlatformOpenBSD::GetSupportedArchitectureAtIndex(uint32_t idx,
- ArchSpec &arch) {
- if (IsHost()) {
- ArchSpec hostArch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault);
- if (hostArch.GetTriple().isOSOpenBSD()) {
- if (idx == 0) {
- arch = hostArch;
- return arch.IsValid();
- }
- }
+{
+ if (is_host) {
+ m_supported_architectures.push_back(HostInfo::GetArchitecture());
} else {
- if (m_remote_platform_sp)
- return m_remote_platform_sp->GetSupportedArchitectureAtIndex(idx, arch);
-
- llvm::Triple triple;
- // Set the OS to OpenBSD
- triple.setOS(llvm::Triple::OpenBSD);
- // Set the architecture
- switch (idx) {
- case 0:
- triple.setArchName("x86_64");
- break;
- case 1:
- triple.setArchName("i386");
- break;
- case 2:
- triple.setArchName("aarch64");
- break;
- case 3:
- triple.setArchName("arm");
- break;
- default:
- return false;
- }
- // Leave the vendor as "llvm::Triple:UnknownVendor" and don't specify the
- // vendor by calling triple.SetVendorName("unknown") so that it is a
- // "unspecified unknown". This means when someone calls
- // triple.GetVendorName() it will return an empty string which indicates
- // that the vendor can be set when two architectures are merged
-
- // Now set the triple into "arch" and return true
- arch.SetTriple(triple);
- return true;
+ m_supported_architectures =
+ CreateArchList({llvm::Triple::x86_64, llvm::Triple::x86,
+ llvm::Triple::aarch64, llvm::Triple::arm},
+ llvm::Triple::OpenBSD);
}
- return false;
+}
+
+std::vector<ArchSpec> PlatformOpenBSD::GetSupportedArchitectures() {
+ if (m_remote_platform_sp)
+ return m_remote_platform_sp->GetSupportedArchitectures();
+ return m_supported_architectures;
}
void PlatformOpenBSD::GetStatus(Stream &strm) {
diff --git a/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h b/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h
index e1402ae0ae9f..fd03988590ca 100644
--- a/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h
+++ b/lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h
@@ -25,22 +25,24 @@ public:
// lldb_private::PluginInterface functions
static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch);
- static ConstString GetPluginNameStatic(bool is_host);
-
- static const char *GetPluginDescriptionStatic(bool is_host);
+ static llvm::StringRef GetPluginNameStatic(bool is_host) {
+ return is_host ? Platform::GetHostPlatformName() : "remote-openbsd";
+ }
- ConstString GetPluginName() override;
+ static llvm::StringRef GetPluginDescriptionStatic(bool is_host);
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override {
+ return GetPluginNameStatic(IsHost());
+ }
// lldb_private::Platform functions
- const char *GetDescription() override {
+ llvm::StringRef GetDescription() override {
return GetPluginDescriptionStatic(IsHost());
}
void GetStatus(Stream &strm) override;
- bool GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) override;
+ std::vector<ArchSpec> GetSupportedArchitectures() override;
bool CanDebugProcess() override;
@@ -50,6 +52,8 @@ public:
lldb::addr_t length, unsigned prot,
unsigned flags, lldb::addr_t fd,
lldb::addr_t offset) override;
+
+ std::vector<ArchSpec> m_supported_architectures;
};
} // namespace platform_openbsd
diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp
index 7353132cd96f..719109c863e7 100644
--- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp
+++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp
@@ -205,7 +205,7 @@ lldb_private::Status PlatformPOSIX::GetFile(
// close dst
LLDB_LOGF(log, "[GetFile] Using block by block transfer....\n");
Status error;
- user_id_t fd_src = OpenFile(source, File::eOpenOptionRead,
+ user_id_t fd_src = OpenFile(source, File::eOpenOptionReadOnly,
lldb::eFilePermissionsFileDefault, error);
if (fd_src == UINT64_MAX)
@@ -218,7 +218,7 @@ lldb_private::Status PlatformPOSIX::GetFile(
permissions = lldb::eFilePermissionsFileDefault;
user_id_t fd_dst = FileCache::GetInstance().OpenFile(
- destination, File::eOpenOptionCanCreate | File::eOpenOptionWrite |
+ destination, File::eOpenOptionCanCreate | File::eOpenOptionWriteOnly |
File::eOpenOptionTruncate,
permissions, error);
@@ -300,9 +300,9 @@ const lldb::UnixSignalsSP &PlatformPOSIX::GetRemoteUnixSignals() {
Status PlatformPOSIX::ConnectRemote(Args &args) {
Status error;
if (IsHost()) {
- error.SetErrorStringWithFormat(
- "can't connect to the host platform '%s', always connected",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "can't connect to the host platform '{0}', always connected",
+ GetPluginName());
} else {
if (!m_remote_platform_sp)
m_remote_platform_sp =
@@ -344,9 +344,9 @@ Status PlatformPOSIX::DisconnectRemote() {
Status error;
if (IsHost()) {
- error.SetErrorStringWithFormat(
- "can't disconnect from the host platform '%s', always connected",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "can't disconnect from the host platform '{0}', always connected",
+ GetPluginName());
} else {
if (m_remote_platform_sp)
error = m_remote_platform_sp->DisconnectRemote();
@@ -410,13 +410,11 @@ lldb::ProcessSP PlatformPOSIX::Attach(ProcessAttachInfo &attach_info,
return process_sp;
}
-lldb::ProcessSP
-PlatformPOSIX::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
- Target *target, // Can be NULL, if NULL create a new
- // target, else use existing one
- Status &error) {
+lldb::ProcessSP PlatformPOSIX::DebugProcess(ProcessLaunchInfo &launch_info,
+ Debugger &debugger, Target &target,
+ Status &error) {
Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM));
- LLDB_LOG(log, "target {0}", target);
+ LLDB_LOG(log, "target {0}", &target);
ProcessSP process_sp;
@@ -442,29 +440,10 @@ PlatformPOSIX::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
// worry about the target getting them as well.
launch_info.SetLaunchInSeparateProcessGroup(true);
- // Ensure we have a target.
- if (target == nullptr) {
- LLDB_LOG(log, "creating new target");
- TargetSP new_target_sp;
- error = debugger.GetTargetList().CreateTarget(
- debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp);
- if (error.Fail()) {
- LLDB_LOG(log, "failed to create new target: {0}", error);
- return process_sp;
- }
-
- target = new_target_sp.get();
- if (!target) {
- error.SetErrorString("CreateTarget() returned nullptr");
- LLDB_LOG(log, "error: {0}", error);
- return process_sp;
- }
- }
-
// Now create the gdb-remote process.
LLDB_LOG(log, "having target create process with gdb-remote plugin");
process_sp =
- target->CreateProcess(launch_info.GetListener(), "gdb-remote", nullptr,
+ target.CreateProcess(launch_info.GetListener(), "gdb-remote", nullptr,
true);
if (!process_sp) {
@@ -518,8 +497,8 @@ PlatformPOSIX::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
LLDB_LOG(log, "not using process STDIO pty");
} else {
LLDB_LOG(log, "{0}", error);
- // FIXME figure out appropriate cleanup here. Do we delete the target? Do
- // we delete the process? Does our caller do that?
+ // FIXME figure out appropriate cleanup here. Do we delete the process?
+ // Does our caller do that?
}
return process_sp;
diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h
index 1cba4c5eb2e9..511797ce6bb7 100644
--- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h
+++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h
@@ -47,11 +47,7 @@ public:
lldb::ProcessSP DebugProcess(lldb_private::ProcessLaunchInfo &launch_info,
lldb_private::Debugger &debugger,
- lldb_private::Target *target, // Can be nullptr,
- // if nullptr
- // create a new
- // target, else use
- // existing one
+ lldb_private::Target &target,
lldb_private::Status &error) override;
std::string GetPlatformSpecificConnectionInformation() override;
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index 528208665a4e..987f7c7f57e7 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -72,17 +72,12 @@ PlatformSP PlatformRemoteGDBServer::CreateInstance(bool force,
return PlatformSP();
}
-ConstString PlatformRemoteGDBServer::GetPluginNameStatic() {
- static ConstString g_name("remote-gdb-server");
- return g_name;
-}
-
-const char *PlatformRemoteGDBServer::GetDescriptionStatic() {
+llvm::StringRef PlatformRemoteGDBServer::GetDescriptionStatic() {
return "A platform that uses the GDB remote protocol as the communication "
"transport.";
}
-const char *PlatformRemoteGDBServer::GetDescription() {
+llvm::StringRef PlatformRemoteGDBServer::GetDescription() {
if (m_platform_description.empty()) {
if (IsConnected()) {
// Send the get description packet
@@ -94,76 +89,6 @@ const char *PlatformRemoteGDBServer::GetDescription() {
return GetDescriptionStatic();
}
-Status PlatformRemoteGDBServer::ResolveExecutable(
- const ModuleSpec &module_spec, lldb::ModuleSP &exe_module_sp,
- const FileSpecList *module_search_paths_ptr) {
- // copied from PlatformRemoteiOS
-
- Status error;
- // Nothing special to do here, just use the actual file and architecture
-
- ModuleSpec resolved_module_spec(module_spec);
-
- // Resolve any executable within an apk on Android?
- // Host::ResolveExecutableInBundle (resolved_module_spec.GetFileSpec());
-
- if (FileSystem::Instance().Exists(resolved_module_spec.GetFileSpec()) ||
- module_spec.GetUUID().IsValid()) {
- if (resolved_module_spec.GetArchitecture().IsValid() ||
- resolved_module_spec.GetUUID().IsValid()) {
- error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr, nullptr,
- nullptr);
-
- if (exe_module_sp && exe_module_sp->GetObjectFile())
- return error;
- exe_module_sp.reset();
- }
- // No valid architecture was specified or the exact arch wasn't found so
- // ask the platform for the architectures that we should be using (in the
- // correct order) and see if we can find a match that way
- StreamString arch_names;
- for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(
- idx, resolved_module_spec.GetArchitecture());
- ++idx) {
- error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr, nullptr,
- nullptr);
- // Did we find an executable using one of the
- if (error.Success()) {
- if (exe_module_sp && exe_module_sp->GetObjectFile())
- break;
- else
- error.SetErrorToGenericError();
- }
-
- if (idx > 0)
- arch_names.PutCString(", ");
- arch_names.PutCString(
- resolved_module_spec.GetArchitecture().GetArchitectureName());
- }
-
- if (error.Fail() || !exe_module_sp) {
- if (FileSystem::Instance().Readable(resolved_module_spec.GetFileSpec())) {
- error.SetErrorStringWithFormat(
- "'%s' doesn't contain any '%s' platform architectures: %s",
- resolved_module_spec.GetFileSpec().GetPath().c_str(),
- GetPluginName().GetCString(), arch_names.GetData());
- } else {
- error.SetErrorStringWithFormat(
- "'%s' is not readable",
- resolved_module_spec.GetFileSpec().GetPath().c_str());
- }
- }
- } else {
- error.SetErrorStringWithFormat(
- "'%s' does not exist",
- resolved_module_spec.GetFileSpec().GetPath().c_str());
- }
-
- return error;
-}
-
bool PlatformRemoteGDBServer::GetModuleSpec(const FileSpec &module_file_spec,
const ArchSpec &arch,
ModuleSpec &module_spec) {
@@ -214,21 +139,6 @@ PlatformRemoteGDBServer::PlatformRemoteGDBServer()
/// inherited from by the plug-in instance.
PlatformRemoteGDBServer::~PlatformRemoteGDBServer() = default;
-bool PlatformRemoteGDBServer::GetSupportedArchitectureAtIndex(uint32_t idx,
- ArchSpec &arch) {
- ArchSpec remote_arch = m_gdb_client.GetSystemArchitecture();
-
- if (idx == 0) {
- arch = remote_arch;
- return arch.IsValid();
- } else if (idx == 1 && remote_arch.IsValid() &&
- remote_arch.GetTriple().isArch64Bit()) {
- arch.SetTriple(remote_arch.GetTriple().get32BitArchVariant());
- return arch.IsValid();
- }
- return false;
-}
-
size_t PlatformRemoteGDBServer::GetSoftwareBreakpointTrapOpcode(
Target &target, BreakpointSite *bp_site) {
// This isn't needed if the z/Z packets are supported in the GDB remote
@@ -241,12 +151,13 @@ bool PlatformRemoteGDBServer::GetRemoteOSVersion() {
return !m_os_version.empty();
}
-bool PlatformRemoteGDBServer::GetRemoteOSBuildString(std::string &s) {
- return m_gdb_client.GetOSBuildString(s);
+llvm::Optional<std::string> PlatformRemoteGDBServer::GetRemoteOSBuildString() {
+ return m_gdb_client.GetOSBuildString();
}
-bool PlatformRemoteGDBServer::GetRemoteOSKernelDescription(std::string &s) {
- return m_gdb_client.GetOSKernelDescription(s);
+llvm::Optional<std::string>
+PlatformRemoteGDBServer::GetRemoteOSKernelDescription() {
+ return m_gdb_client.GetOSKernelDescription();
}
// Remote Platform subclasses need to override this function
@@ -305,14 +216,13 @@ Status PlatformRemoteGDBServer::ConnectRemote(Args &args) {
if (!url)
return Status("URL is null.");
- int port;
- llvm::StringRef scheme, hostname, pathname;
- if (!UriParser::Parse(url, scheme, hostname, port, pathname))
+ llvm::Optional<URI> parsed_url = URI::Parse(url);
+ if (!parsed_url)
return Status("Invalid URL: %s", url);
// We're going to reuse the hostname when we connect to the debugserver.
- m_platform_scheme = std::string(scheme);
- m_platform_hostname = std::string(hostname);
+ m_platform_scheme = parsed_url->scheme.str();
+ m_platform_hostname = parsed_url->hostname.str();
m_gdb_client.SetConnection(std::make_unique<ConnectionFileDescriptor>());
if (repro::Reproducer::Instance().IsReplaying()) {
@@ -337,6 +247,15 @@ Status PlatformRemoteGDBServer::ConnectRemote(Args &args) {
// now.
if (m_working_dir)
m_gdb_client.SetWorkingDir(m_working_dir);
+
+ m_supported_architectures.clear();
+ ArchSpec remote_arch = m_gdb_client.GetSystemArchitecture();
+ if (remote_arch) {
+ m_supported_architectures.push_back(remote_arch);
+ if (remote_arch.GetTriple().isArch64Bit())
+ m_supported_architectures.push_back(
+ ArchSpec(remote_arch.GetTriple().get32BitArchVariant()));
+ }
} else {
m_gdb_client.Disconnect();
if (error.Success())
@@ -475,11 +394,10 @@ Status PlatformRemoteGDBServer::KillProcess(const lldb::pid_t pid) {
return Status();
}
-lldb::ProcessSP PlatformRemoteGDBServer::DebugProcess(
- ProcessLaunchInfo &launch_info, Debugger &debugger,
- Target *target, // Can be NULL, if NULL create a new target, else use
- // existing one
- Status &error) {
+lldb::ProcessSP
+PlatformRemoteGDBServer::DebugProcess(ProcessLaunchInfo &launch_info,
+ Debugger &debugger, Target &target,
+ Status &error) {
lldb::ProcessSP process_sp;
if (IsRemote()) {
if (IsConnected()) {
@@ -489,32 +407,21 @@ lldb::ProcessSP PlatformRemoteGDBServer::DebugProcess(
error.SetErrorStringWithFormat("unable to launch a GDB server on '%s'",
GetHostname());
} else {
- if (target == nullptr) {
- TargetSP new_target_sp;
-
- error = debugger.GetTargetList().CreateTarget(
- debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp);
- target = new_target_sp.get();
- } else
- error.Clear();
-
- if (target && error.Success()) {
- // The darwin always currently uses the GDB remote debugger plug-in
- // so even when debugging locally we are debugging remotely!
- process_sp = target->CreateProcess(launch_info.GetListener(),
- "gdb-remote", nullptr, true);
-
- if (process_sp) {
+ // The darwin always currently uses the GDB remote debugger plug-in
+ // so even when debugging locally we are debugging remotely!
+ process_sp = target.CreateProcess(launch_info.GetListener(),
+ "gdb-remote", nullptr, true);
+
+ if (process_sp) {
+ error = process_sp->ConnectRemote(connect_url.c_str());
+ // Retry the connect remote one time...
+ if (error.Fail())
error = process_sp->ConnectRemote(connect_url.c_str());
- // Retry the connect remote one time...
- if (error.Fail())
- error = process_sp->ConnectRemote(connect_url.c_str());
- if (error.Success())
- error = process_sp->Launch(launch_info);
- else if (debugserver_pid != LLDB_INVALID_PROCESS_ID) {
- printf("error: connect remote failed (%s)\n", error.AsCString());
- KillSpawnedProcess(debugserver_pid);
- }
+ if (error.Success())
+ error = process_sp->Launch(launch_info);
+ else if (debugserver_pid != LLDB_INVALID_PROCESS_ID) {
+ printf("error: connect remote failed (%s)\n", error.AsCString());
+ KillSpawnedProcess(debugserver_pid);
}
}
}
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
index e43cd0e55c6d..f594f43b3f13 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
@@ -28,28 +28,22 @@ public:
static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch);
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "remote-gdb-server"; }
- static const char *GetDescriptionStatic();
+ static llvm::StringRef GetDescriptionStatic();
PlatformRemoteGDBServer();
~PlatformRemoteGDBServer() override;
// lldb_private::PluginInterface functions
- ConstString GetPluginName() override { return GetPluginNameStatic(); }
-
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// lldb_private::Platform functions
- Status
- ResolveExecutable(const ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr) override;
-
bool GetModuleSpec(const FileSpec &module_file_spec, const ArchSpec &arch,
ModuleSpec &module_spec) override;
- const char *GetDescription() override;
+ llvm::StringRef GetDescription() override;
Status GetFileWithUUID(const FileSpec &platform_file, const UUID *uuid_ptr,
FileSpec &local_file) override;
@@ -64,10 +58,7 @@ public:
Status KillProcess(const lldb::pid_t pid) override;
lldb::ProcessSP DebugProcess(ProcessLaunchInfo &launch_info,
- Debugger &debugger,
- Target *target, // Can be NULL, if NULL create a
- // new target, else use existing
- // one
+ Debugger &debugger, Target &target,
Status &error) override;
lldb::ProcessSP Attach(ProcessAttachInfo &attach_info, Debugger &debugger,
@@ -75,16 +66,18 @@ public:
// target, else use existing one
Status &error) override;
- bool GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) override;
+ std::vector<ArchSpec> GetSupportedArchitectures() override {
+ return m_supported_architectures;
+ }
size_t GetSoftwareBreakpointTrapOpcode(Target &target,
BreakpointSite *bp_site) override;
bool GetRemoteOSVersion() override;
- bool GetRemoteOSBuildString(std::string &s) override;
+ llvm::Optional<std::string> GetRemoteOSBuildString() override;
- bool GetRemoteOSKernelDescription(std::string &s) override;
+ llvm::Optional<std::string> GetRemoteOSKernelDescription() override;
// Remote Platform subclasses need to override this function
ArchSpec GetRemoteSystemArchitecture() override;
@@ -191,6 +184,8 @@ private:
llvm::Optional<std::string> DoGetUserName(UserIDResolver::id_t uid) override;
llvm::Optional<std::string> DoGetGroupName(UserIDResolver::id_t uid) override;
+ std::vector<ArchSpec> m_supported_architectures;
+
PlatformRemoteGDBServer(const PlatformRemoteGDBServer &) = delete;
const PlatformRemoteGDBServer &
operator=(const PlatformRemoteGDBServer &) = delete;
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp
index d6426b3d2367..a62d3c1ba052 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp
@@ -130,8 +130,12 @@ NativeProcessFreeBSD::Factory::Attach(
NativeProcessFreeBSD::Extension
NativeProcessFreeBSD::Factory::GetSupportedExtensions() const {
- return Extension::multiprocess | Extension::fork | Extension::vfork |
- Extension::pass_signals | Extension::auxv | Extension::libraries_svr4;
+ return
+#if defined(PT_COREDUMP)
+ Extension::savecore |
+#endif
+ Extension::multiprocess | Extension::fork | Extension::vfork |
+ Extension::pass_signals | Extension::auxv | Extension::libraries_svr4;
}
// Public Instance Methods
@@ -1009,3 +1013,36 @@ void NativeProcessFreeBSD::MonitorClone(::pid_t child_pid, bool is_vfork,
}
}
}
+
+llvm::Expected<std::string>
+NativeProcessFreeBSD::SaveCore(llvm::StringRef path_hint) {
+#if defined(PT_COREDUMP)
+ using namespace llvm::sys::fs;
+
+ llvm::SmallString<128> path{path_hint};
+ Status error;
+ struct ptrace_coredump pc = {};
+
+ // Try with the suggested path first. If there is no suggested path or it
+ // failed to open, use a temporary file.
+ if (path.empty() ||
+ openFile(path, pc.pc_fd, CD_CreateNew, FA_Write, OF_None)) {
+ if (std::error_code errc =
+ createTemporaryFile("lldb", "core", pc.pc_fd, path))
+ return llvm::createStringError(errc, "Unable to create a temporary file");
+ }
+ error = PtraceWrapper(PT_COREDUMP, GetID(), &pc, sizeof(pc));
+
+ std::error_code close_err = closeFile(pc.pc_fd);
+ if (error.Fail())
+ return error.ToError();
+ if (close_err)
+ return llvm::createStringError(
+ close_err, "Unable to close the core dump after writing");
+ return path.str().str();
+#else // !defined(PT_COREDUMP)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "PT_COREDUMP not supported in the FreeBSD version used to build LLDB");
+#endif
+}
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.h b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.h
index 7ec9d17d4cf4..44b8a53699bb 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.h
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.h
@@ -91,6 +91,8 @@ public:
bool SupportHardwareSingleStepping() const;
+ llvm::Expected<std::string> SaveCore(llvm::StringRef path_hint) override;
+
protected:
llvm::Expected<llvm::ArrayRef<uint8_t>>
GetSoftwareBreakpointTrapOpcode(size_t size_hint) override;
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.cpp
index 8e722c09314c..d93b7fd33815 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.cpp
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.cpp
@@ -15,6 +15,7 @@
#include "lldb/Utility/Status.h"
#include "Plugins/Process/FreeBSD/NativeProcessFreeBSD.h"
+#include "Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h"
// clang-format off
#include <sys/param.h>
@@ -59,11 +60,32 @@ uint32_t NativeRegisterContextFreeBSD_mips64::GetUserRegisterCount() const {
return count;
}
+llvm::Optional<NativeRegisterContextFreeBSD_mips64::RegSetKind>
+NativeRegisterContextFreeBSD_mips64::GetSetForNativeRegNum(
+ uint32_t reg_num) const {
+ switch (GetRegisterInfoInterface().GetTargetArchitecture().GetMachine()) {
+ case llvm::Triple::mips64:
+ if (reg_num >= k_first_gpr_mips64 && reg_num <= k_last_gpr_mips64)
+ return GPRegSet;
+ if (reg_num >= k_first_fpr_mips64 && reg_num <= k_last_fpr_mips64)
+ return FPRegSet;
+ break;
+ default:
+ llvm_unreachable("Unhandled target architecture.");
+ }
+
+ llvm_unreachable("Register does not belong to any register set");
+}
+
Status NativeRegisterContextFreeBSD_mips64::ReadRegisterSet(RegSetKind set) {
switch (set) {
case GPRegSet:
return NativeProcessFreeBSD::PtraceWrapper(PT_GETREGS, m_thread.GetID(),
m_reg_data.data());
+ case FPRegSet:
+ return NativeProcessFreeBSD::PtraceWrapper(
+ PT_GETFPREGS, m_thread.GetID(),
+ m_reg_data.data() + GetRegisterInfo().GetGPRSize());
}
llvm_unreachable("NativeRegisterContextFreeBSD_mips64::ReadRegisterSet");
}
@@ -73,6 +95,10 @@ Status NativeRegisterContextFreeBSD_mips64::WriteRegisterSet(RegSetKind set) {
case GPRegSet:
return NativeProcessFreeBSD::PtraceWrapper(PT_SETREGS, m_thread.GetID(),
m_reg_data.data());
+ case FPRegSet:
+ return NativeProcessFreeBSD::PtraceWrapper(
+ PT_SETFPREGS, m_thread.GetID(),
+ m_reg_data.data() + GetRegisterInfo().GetGPRSize());
}
llvm_unreachable("NativeRegisterContextFreeBSD_mips64::WriteRegisterSet");
}
@@ -94,7 +120,16 @@ NativeRegisterContextFreeBSD_mips64::ReadRegister(const RegisterInfo *reg_info,
? reg_info->name
: "<unknown register>");
- RegSetKind set = GPRegSet;
+ llvm::Optional<RegSetKind> opt_set = GetSetForNativeRegNum(reg);
+ if (!opt_set) {
+ // This is likely an internal register for lldb use only and should not be
+ // directly queried.
+ error.SetErrorStringWithFormat("register \"%s\" is in unrecognized set",
+ reg_info->name);
+ return error;
+ }
+
+ RegSetKind set = opt_set.getValue();
error = ReadRegisterSet(set);
if (error.Fail())
return error;
@@ -119,7 +154,16 @@ Status NativeRegisterContextFreeBSD_mips64::WriteRegister(
? reg_info->name
: "<unknown register>");
- RegSetKind set = GPRegSet;
+ llvm::Optional<RegSetKind> opt_set = GetSetForNativeRegNum(reg);
+ if (!opt_set) {
+ // This is likely an internal register for lldb use only and should not be
+ // directly queried.
+ error.SetErrorStringWithFormat("register \"%s\" is in unrecognized set",
+ reg_info->name);
+ return error;
+ }
+
+ RegSetKind set = opt_set.getValue();
error = ReadRegisterSet(set);
if (error.Fail())
return error;
@@ -139,6 +183,10 @@ Status NativeRegisterContextFreeBSD_mips64::ReadAllRegisterValues(
if (error.Fail())
return error;
+ error = ReadRegisterSet(FPRegSet);
+ if (error.Fail())
+ return error;
+
data_sp.reset(new DataBufferHeap(m_reg_data.size(), 0));
uint8_t *dst = data_sp->GetBytes();
::memcpy(dst, m_reg_data.data(), m_reg_data.size());
@@ -175,7 +223,11 @@ Status NativeRegisterContextFreeBSD_mips64::WriteAllRegisterValues(
}
::memcpy(m_reg_data.data(), src, m_reg_data.size());
- return WriteRegisterSet(GPRegSet);
+ error = WriteRegisterSet(GPRegSet);
+ if (error.Fail())
+ return error;
+
+ return WriteRegisterSet(FPRegSet);
}
llvm::Error NativeRegisterContextFreeBSD_mips64::CopyHardwareWatchpointsFrom(
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h
index 6a3eb86a9231..8e300ed829c9 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h
@@ -54,8 +54,11 @@ public:
private:
enum RegSetKind {
GPRegSet,
+ FPRegSet,
};
- std::array<uint8_t, sizeof(reg)> m_reg_data;
+ std::array<uint8_t, sizeof(reg) + sizeof(fpreg)> m_reg_data;
+
+ llvm::Optional<RegSetKind> GetSetForNativeRegNum(uint32_t reg_num) const;
Status ReadRegisterSet(RegSetKind set);
Status WriteRegisterSet(RegSetKind set);
diff --git a/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp b/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp
index 9ea1a16b8785..0420d00e39d6 100644
--- a/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp
+++ b/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp
@@ -136,7 +136,8 @@ NativeProcessNetBSD::Factory::Attach(
NativeProcessNetBSD::Extension
NativeProcessNetBSD::Factory::GetSupportedExtensions() const {
return Extension::multiprocess | Extension::fork | Extension::vfork |
- Extension::pass_signals | Extension::auxv | Extension::libraries_svr4;
+ Extension::pass_signals | Extension::auxv | Extension::libraries_svr4 |
+ Extension::savecore;
}
// Public Instance Methods
@@ -1073,3 +1074,27 @@ void NativeProcessNetBSD::MonitorClone(::pid_t child_pid, bool is_vfork,
}
}
}
+
+llvm::Expected<std::string>
+NativeProcessNetBSD::SaveCore(llvm::StringRef path_hint) {
+ llvm::SmallString<128> path{path_hint};
+ Status error;
+
+ // Try with the suggested path first.
+ if (!path.empty()) {
+ error = PtraceWrapper(PT_DUMPCORE, GetID(), path.data(), path.size());
+ if (!error.Fail())
+ return path.str().str();
+
+ // If the request errored, fall back to a generic temporary file.
+ }
+
+ if (std::error_code errc =
+ llvm::sys::fs::createTemporaryFile("lldb", "core", path))
+ return llvm::createStringError(errc, "Unable to create a temporary file");
+
+ error = PtraceWrapper(PT_DUMPCORE, GetID(), path.data(), path.size());
+ if (error.Fail())
+ return error.ToError();
+ return path.str().str();
+}
diff --git a/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h b/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h
index 90d32aa6069d..3f54d02a9075 100644
--- a/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h
+++ b/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h
@@ -88,6 +88,8 @@ public:
static Status PtraceWrapper(int req, lldb::pid_t pid, void *addr = nullptr,
int data = 0, int *result = nullptr);
+ llvm::Expected<std::string> SaveCore(llvm::StringRef path_hint) override;
+
private:
MainLoop::SignalHandleUP m_sigchld_handle;
ArchSpec m_arch;
diff --git a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp b/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
index 427225c14d3b..15981a2c1cb8 100644
--- a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
+++ b/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
@@ -15,4 +15,167 @@ GDBRemoteSignals::GDBRemoteSignals() : UnixSignals() { Reset(); }
GDBRemoteSignals::GDBRemoteSignals(const lldb::UnixSignalsSP &rhs)
: UnixSignals(*rhs) {}
-void GDBRemoteSignals::Reset() { m_signals.clear(); }
+void GDBRemoteSignals::Reset() {
+ m_signals.clear();
+ // clang-format off
+ // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION
+ // ====== ============== ======== ====== ====== ===================================================
+ AddSignal(1, "SIGHUP", false, true, true, "hangup");
+ AddSignal(2, "SIGINT", true, true, true, "interrupt");
+ AddSignal(3, "SIGQUIT", false, true, true, "quit");
+ AddSignal(4, "SIGILL", false, true, true, "illegal instruction");
+ AddSignal(5, "SIGTRAP", true, true, true, "trace trap (not reset when caught)");
+ AddSignal(6, "SIGABRT", false, true, true, "abort()/IOT trap", "SIGIOT");
+ AddSignal(7, "SIGEMT", false, true, true, "emulation trap");
+ AddSignal(8, "SIGFPE", false, true, true, "floating point exception");
+ AddSignal(9, "SIGKILL", false, true, true, "kill");
+ AddSignal(10, "SIGBUS", false, true, true, "bus error");
+ AddSignal(11, "SIGSEGV", false, true, true, "segmentation violation");
+ AddSignal(12, "SIGSYS", false, true, true, "invalid system call");
+ AddSignal(13, "SIGPIPE", false, true, true, "write to pipe with reading end closed");
+ AddSignal(14, "SIGALRM", false, false, false, "alarm");
+ AddSignal(15, "SIGTERM", false, true, true, "termination requested");
+ AddSignal(16, "SIGURG", false, true, true, "urgent data on socket");
+ AddSignal(17, "SIGSTOP", true, true, true, "process stop");
+ AddSignal(18, "SIGTSTP", false, true, true, "tty stop");
+ AddSignal(19, "SIGCONT", false, false, true, "process continue");
+ AddSignal(20, "SIGCHLD", false, false, true, "child status has changed", "SIGCLD");
+ AddSignal(21, "SIGTTIN", false, true, true, "background tty read");
+ AddSignal(22, "SIGTTOU", false, true, true, "background tty write");
+ AddSignal(23, "SIGIO", false, true, true, "input/output ready/Pollable event");
+ AddSignal(24, "SIGXCPU", false, true, true, "CPU resource exceeded");
+ AddSignal(25, "SIGXFSZ", false, true, true, "file size limit exceeded");
+ AddSignal(26, "SIGVTALRM", false, true, true, "virtual time alarm");
+ AddSignal(27, "SIGPROF", false, false, false, "profiling time alarm");
+ AddSignal(28, "SIGWINCH", false, true, true, "window size changes");
+ AddSignal(29, "SIGLOST", false, true, true, "resource lost");
+ AddSignal(30, "SIGUSR1", false, true, true, "user defined signal 1");
+ AddSignal(31, "SIGUSR2", false, true, true, "user defined signal 2");
+ AddSignal(32, "SIGPWR", false, true, true, "power failure");
+ AddSignal(33, "SIGPOLL", false, true, true, "pollable event");
+ AddSignal(34, "SIGWIND", false, true, true, "SIGWIND");
+ AddSignal(35, "SIGPHONE", false, true, true, "SIGPHONE");
+ AddSignal(36, "SIGWAITING", false, true, true, "process's LWPs are blocked");
+ AddSignal(37, "SIGLWP", false, true, true, "signal LWP");
+ AddSignal(38, "SIGDANGER", false, true, true, "swap space dangerously low");
+ AddSignal(39, "SIGGRANT", false, true, true, "monitor mode granted");
+ AddSignal(40, "SIGRETRACT", false, true, true, "need to relinquish monitor mode");
+ AddSignal(41, "SIGMSG", false, true, true, "monitor mode data available");
+ AddSignal(42, "SIGSOUND", false, true, true, "sound completed");
+ AddSignal(43, "SIGSAK", false, true, true, "secure attention");
+ AddSignal(44, "SIGPRIO", false, true, true, "SIGPRIO");
+
+ AddSignal(45, "SIG33", false, false, false, "real-time event 33");
+ AddSignal(46, "SIG34", false, false, false, "real-time event 34");
+ AddSignal(47, "SIG35", false, false, false, "real-time event 35");
+ AddSignal(48, "SIG36", false, false, false, "real-time event 36");
+ AddSignal(49, "SIG37", false, false, false, "real-time event 37");
+ AddSignal(50, "SIG38", false, false, false, "real-time event 38");
+ AddSignal(51, "SIG39", false, false, false, "real-time event 39");
+ AddSignal(52, "SIG40", false, false, false, "real-time event 40");
+ AddSignal(53, "SIG41", false, false, false, "real-time event 41");
+ AddSignal(54, "SIG42", false, false, false, "real-time event 42");
+ AddSignal(55, "SIG43", false, false, false, "real-time event 43");
+ AddSignal(56, "SIG44", false, false, false, "real-time event 44");
+ AddSignal(57, "SIG45", false, false, false, "real-time event 45");
+ AddSignal(58, "SIG46", false, false, false, "real-time event 46");
+ AddSignal(59, "SIG47", false, false, false, "real-time event 47");
+ AddSignal(60, "SIG48", false, false, false, "real-time event 48");
+ AddSignal(61, "SIG49", false, false, false, "real-time event 49");
+ AddSignal(62, "SIG50", false, false, false, "real-time event 50");
+ AddSignal(63, "SIG51", false, false, false, "real-time event 51");
+ AddSignal(64, "SIG52", false, false, false, "real-time event 52");
+ AddSignal(65, "SIG53", false, false, false, "real-time event 53");
+ AddSignal(66, "SIG54", false, false, false, "real-time event 54");
+ AddSignal(67, "SIG55", false, false, false, "real-time event 55");
+ AddSignal(68, "SIG56", false, false, false, "real-time event 56");
+ AddSignal(69, "SIG57", false, false, false, "real-time event 57");
+ AddSignal(70, "SIG58", false, false, false, "real-time event 58");
+ AddSignal(71, "SIG59", false, false, false, "real-time event 59");
+ AddSignal(72, "SIG60", false, false, false, "real-time event 60");
+ AddSignal(73, "SIG61", false, false, false, "real-time event 61");
+ AddSignal(74, "SIG62", false, false, false, "real-time event 62");
+ AddSignal(75, "SIG63", false, false, false, "real-time event 63");
+
+ AddSignal(76, "SIGCANCEL", false, true, true, "LWP internal signal");
+
+ AddSignal(77, "SIG32", false, false, false, "real-time event 32");
+ AddSignal(78, "SIG64", false, false, false, "real-time event 64");
+ AddSignal(79, "SIG65", false, false, false, "real-time event 65");
+ AddSignal(80, "SIG66", false, false, false, "real-time event 66");
+ AddSignal(81, "SIG67", false, false, false, "real-time event 67");
+ AddSignal(82, "SIG68", false, false, false, "real-time event 68");
+ AddSignal(83, "SIG69", false, false, false, "real-time event 69");
+ AddSignal(84, "SIG70", false, false, false, "real-time event 70");
+ AddSignal(85, "SIG71", false, false, false, "real-time event 71");
+ AddSignal(86, "SIG72", false, false, false, "real-time event 72");
+ AddSignal(87, "SIG73", false, false, false, "real-time event 73");
+ AddSignal(88, "SIG74", false, false, false, "real-time event 74");
+ AddSignal(89, "SIG75", false, false, false, "real-time event 75");
+ AddSignal(90, "SIG76", false, false, false, "real-time event 76");
+ AddSignal(91, "SIG77", false, false, false, "real-time event 77");
+ AddSignal(92, "SIG78", false, false, false, "real-time event 78");
+ AddSignal(93, "SIG79", false, false, false, "real-time event 79");
+ AddSignal(94, "SIG80", false, false, false, "real-time event 80");
+ AddSignal(95, "SIG81", false, false, false, "real-time event 81");
+ AddSignal(96, "SIG82", false, false, false, "real-time event 82");
+ AddSignal(97, "SIG83", false, false, false, "real-time event 83");
+ AddSignal(98, "SIG84", false, false, false, "real-time event 84");
+ AddSignal(99, "SIG85", false, false, false, "real-time event 85");
+ AddSignal(100, "SIG86", false, false, false, "real-time event 86");
+ AddSignal(101, "SIG87", false, false, false, "real-time event 87");
+ AddSignal(102, "SIG88", false, false, false, "real-time event 88");
+ AddSignal(103, "SIG89", false, false, false, "real-time event 89");
+ AddSignal(104, "SIG90", false, false, false, "real-time event 90");
+ AddSignal(105, "SIG91", false, false, false, "real-time event 91");
+ AddSignal(106, "SIG92", false, false, false, "real-time event 92");
+ AddSignal(107, "SIG93", false, false, false, "real-time event 93");
+ AddSignal(108, "SIG94", false, false, false, "real-time event 94");
+ AddSignal(109, "SIG95", false, false, false, "real-time event 95");
+ AddSignal(110, "SIG96", false, false, false, "real-time event 96");
+ AddSignal(111, "SIG97", false, false, false, "real-time event 97");
+ AddSignal(112, "SIG98", false, false, false, "real-time event 98");
+ AddSignal(113, "SIG99", false, false, false, "real-time event 99");
+ AddSignal(114, "SIG100", false, false, false, "real-time event 100");
+ AddSignal(115, "SIG101", false, false, false, "real-time event 101");
+ AddSignal(116, "SIG102", false, false, false, "real-time event 102");
+ AddSignal(117, "SIG103", false, false, false, "real-time event 103");
+ AddSignal(118, "SIG104", false, false, false, "real-time event 104");
+ AddSignal(119, "SIG105", false, false, false, "real-time event 105");
+ AddSignal(120, "SIG106", false, false, false, "real-time event 106");
+ AddSignal(121, "SIG107", false, false, false, "real-time event 107");
+ AddSignal(122, "SIG108", false, false, false, "real-time event 108");
+ AddSignal(123, "SIG109", false, false, false, "real-time event 109");
+ AddSignal(124, "SIG110", false, false, false, "real-time event 110");
+ AddSignal(125, "SIG111", false, false, false, "real-time event 111");
+ AddSignal(126, "SIG112", false, false, false, "real-time event 112");
+ AddSignal(127, "SIG113", false, false, false, "real-time event 113");
+ AddSignal(128, "SIG114", false, false, false, "real-time event 114");
+ AddSignal(129, "SIG115", false, false, false, "real-time event 115");
+ AddSignal(130, "SIG116", false, false, false, "real-time event 116");
+ AddSignal(131, "SIG117", false, false, false, "real-time event 117");
+ AddSignal(132, "SIG118", false, false, false, "real-time event 118");
+ AddSignal(133, "SIG119", false, false, false, "real-time event 119");
+ AddSignal(134, "SIG120", false, false, false, "real-time event 120");
+ AddSignal(135, "SIG121", false, false, false, "real-time event 121");
+ AddSignal(136, "SIG122", false, false, false, "real-time event 122");
+ AddSignal(137, "SIG123", false, false, false, "real-time event 123");
+ AddSignal(138, "SIG124", false, false, false, "real-time event 124");
+ AddSignal(139, "SIG125", false, false, false, "real-time event 125");
+ AddSignal(140, "SIG126", false, false, false, "real-time event 126");
+ AddSignal(141, "SIG127", false, false, false, "real-time event 127");
+
+ AddSignal(142, "SIGINFO", false, true, true, "information request");
+ AddSignal(143, "unknown", false, true, true, "unknown signal");
+
+ AddSignal(145, "EXC_BAD_ACCESS", false, true, true, "could not access memory");
+ AddSignal(146, "EXC_BAD_INSTRUCTION", false, true, true, "illegal instruction/operand");
+ AddSignal(147, "EXC_ARITHMETIC", false, true, true, "arithmetic exception");
+ AddSignal(148, "EXC_EMULATION", false, true, true, "emulation instruction");
+ AddSignal(149, "EXC_SOFTWARE", false, true, true, "software generated exception");
+ AddSignal(150, "EXC_BREAKPOINT", false, true, true, "breakpoint");
+
+ AddSignal(151, "SIGLIBRT", false, true, true, "librt internal signal");
+
+ // clang-format on
+}
diff --git a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h b/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h
index d37757ab60a5..4c260b94eba8 100644
--- a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h
+++ b/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h
@@ -13,7 +13,8 @@
namespace lldb_private {
-/// Empty set of Unix signals to be filled by PlatformRemoteGDBServer
+/// Initially carries signals defined by the GDB Remote Serial Protocol.
+/// Can be filled with platform's signals through PlatformRemoteGDBServer.
class GDBRemoteSignals : public UnixSignals {
public:
GDBRemoteSignals();
diff --git a/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp b/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
index 0f331933f2ea..5091f68a9acf 100644
--- a/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
+++ b/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
@@ -8,12 +8,13 @@
#include "InferiorCallPOSIX.h"
#include "lldb/Core/Address.h"
+#include "lldb/Core/Module.h"
#include "lldb/Core/StreamFile.h"
#include "lldb/Core/ValueObject.h"
#include "lldb/Expression/DiagnosticManager.h"
#include "lldb/Host/Config.h"
-#include "lldb/Symbol/TypeSystem.h"
#include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Symbol/TypeSystem.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Platform.h"
#include "lldb/Target/Process.h"
@@ -41,12 +42,13 @@ bool lldb_private::InferiorCallMmap(Process *process, addr_t &allocated_addr,
if (thread == nullptr)
return false;
- const bool include_symbols = true;
- const bool include_inlines = false;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = false;
+
SymbolContextList sc_list;
process->GetTarget().GetImages().FindFunctions(
- ConstString("mmap"), eFunctionNameTypeFull, include_symbols,
- include_inlines, sc_list);
+ ConstString("mmap"), eFunctionNameTypeFull, function_options, sc_list);
const uint32_t count = sc_list.GetSize();
if (count > 0) {
SymbolContext sc;
@@ -135,12 +137,13 @@ bool lldb_private::InferiorCallMunmap(Process *process, addr_t addr,
if (thread == nullptr)
return false;
- const bool include_symbols = true;
- const bool include_inlines = false;
+ ModuleFunctionSearchOptions function_options;
+ function_options.include_symbols = true;
+ function_options.include_inlines = false;
+
SymbolContextList sc_list;
process->GetTarget().GetImages().FindFunctions(
- ConstString("munmap"), eFunctionNameTypeFull, include_symbols,
- include_inlines, sc_list);
+ ConstString("munmap"), eFunctionNameTypeFull, function_options, sc_list);
const uint32_t count = sc_list.GetSize();
if (count > 0) {
SymbolContext sc;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
index 7e38091738e3..e1d3b6ecd7d0 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
@@ -177,7 +177,7 @@ enum {
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM }, \
- nullptr, nullptr, nullptr, 0
+ nullptr, nullptr,
#define REG_CONTEXT_SIZE \
(sizeof(RegisterContextDarwin_arm::GPR) + \
sizeof(RegisterContextDarwin_arm::FPU) + \
@@ -200,8 +200,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r0, dwarf_r0, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r0},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r1",
nullptr,
4,
@@ -211,8 +210,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r1, dwarf_r1, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r1},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r2",
nullptr,
4,
@@ -222,8 +220,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r2, dwarf_r2, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r2},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r3",
nullptr,
4,
@@ -233,8 +230,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r3, dwarf_r3, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r3},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r4",
nullptr,
4,
@@ -244,8 +240,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r4, dwarf_r4, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r4},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r5",
nullptr,
4,
@@ -255,8 +250,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r5, dwarf_r5, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r5},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r6",
nullptr,
4,
@@ -266,8 +260,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r6, dwarf_r6, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r6},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r7",
nullptr,
4,
@@ -278,8 +271,7 @@ static RegisterInfo g_register_infos[] = {
gpr_r7},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r8",
nullptr,
4,
@@ -289,8 +281,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r8, dwarf_r8, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r8},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r9",
nullptr,
4,
@@ -300,8 +291,7 @@ static RegisterInfo g_register_infos[] = {
{ehframe_r9, dwarf_r9, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r9},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r10",
nullptr,
4,
@@ -312,8 +302,7 @@ static RegisterInfo g_register_infos[] = {
gpr_r10},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r11",
nullptr,
4,
@@ -324,8 +313,7 @@ static RegisterInfo g_register_infos[] = {
gpr_r11},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"r12",
nullptr,
4,
@@ -336,8 +324,7 @@ static RegisterInfo g_register_infos[] = {
gpr_r12},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sp",
"r13",
4,
@@ -348,8 +335,7 @@ static RegisterInfo g_register_infos[] = {
gpr_sp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lr",
"r14",
4,
@@ -360,8 +346,7 @@ static RegisterInfo g_register_infos[] = {
gpr_lr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
"r15",
4,
@@ -372,8 +357,7 @@ static RegisterInfo g_register_infos[] = {
gpr_pc},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cpsr",
"psr",
4,
@@ -384,8 +368,7 @@ static RegisterInfo g_register_infos[] = {
gpr_cpsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s0",
nullptr,
@@ -397,8 +380,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s0},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s1",
nullptr,
4,
@@ -409,8 +391,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s1},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s2",
nullptr,
4,
@@ -421,8 +402,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s2},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s3",
nullptr,
4,
@@ -433,8 +413,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s3},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s4",
nullptr,
4,
@@ -445,8 +424,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s4},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s5",
nullptr,
4,
@@ -457,8 +435,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s5},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s6",
nullptr,
4,
@@ -469,8 +446,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s6},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s7",
nullptr,
4,
@@ -481,8 +457,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s7},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s8",
nullptr,
4,
@@ -493,8 +468,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s8},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s9",
nullptr,
4,
@@ -505,8 +479,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s9},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s10",
nullptr,
4,
@@ -517,8 +490,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s10},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s11",
nullptr,
4,
@@ -529,8 +501,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s11},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s12",
nullptr,
4,
@@ -541,8 +512,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s12},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s13",
nullptr,
4,
@@ -553,8 +523,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s13},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s14",
nullptr,
4,
@@ -565,8 +534,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s14},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s15",
nullptr,
4,
@@ -577,8 +545,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s15},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s16",
nullptr,
4,
@@ -589,8 +556,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s16},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s17",
nullptr,
4,
@@ -601,8 +567,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s17},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s18",
nullptr,
4,
@@ -613,8 +578,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s18},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s19",
nullptr,
4,
@@ -625,8 +589,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s19},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s20",
nullptr,
4,
@@ -637,8 +600,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s20},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s21",
nullptr,
4,
@@ -649,8 +611,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s21},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s22",
nullptr,
4,
@@ -661,8 +622,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s22},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s23",
nullptr,
4,
@@ -673,8 +633,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s23},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s24",
nullptr,
4,
@@ -685,8 +644,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s24},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s25",
nullptr,
4,
@@ -697,8 +655,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s25},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s26",
nullptr,
4,
@@ -709,8 +666,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s26},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s27",
nullptr,
4,
@@ -721,8 +677,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s27},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s28",
nullptr,
4,
@@ -733,8 +688,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s28},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s29",
nullptr,
4,
@@ -745,8 +699,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s29},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s30",
nullptr,
4,
@@ -757,8 +710,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s30},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"s31",
nullptr,
4,
@@ -769,8 +721,7 @@ static RegisterInfo g_register_infos[] = {
fpu_s31},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fpscr",
nullptr,
4,
@@ -781,8 +732,7 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, fpu_fpscr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"exception",
nullptr,
@@ -794,8 +744,7 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, exc_exception},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fsr",
nullptr,
4,
@@ -806,8 +755,7 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, exc_fsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"far",
nullptr,
4,
@@ -818,8 +766,7 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, exc_far},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_DBG(bvr, 0)},
{DEFINE_DBG(bvr, 1)},
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
index b98b2f35c23e..50f710e26815 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
@@ -59,7 +59,7 @@ using namespace lldb_private;
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM }, \
- NULL, NULL, NULL, 0
+ NULL, NULL
#define REG_CONTEXT_SIZE \
(sizeof(RegisterContextDarwin_arm64::GPR) + \
sizeof(RegisterContextDarwin_arm64::FPU) + \
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
index 95f8132a990c..5f56e6f1636a 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
@@ -154,7 +154,7 @@ enum {
{LLDB_INVALID_REGNUM, dwarf_##reg##i, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
fpu_##reg##i }, \
- nullptr, nullptr, nullptr, 0
+ nullptr, nullptr,
#define DEFINE_EXC(reg) \
#reg, NULL, sizeof(((RegisterContextDarwin_i386::EXC *) NULL)->reg), \
@@ -175,184 +175,158 @@ static RegisterInfo g_register_infos[] = {
gpr_eax},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(ebx, nullptr),
{ehframe_ebx, dwarf_ebx, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
gpr_ebx},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(ecx, nullptr),
{ehframe_ecx, dwarf_ecx, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
gpr_ecx},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(edx, nullptr),
{ehframe_edx, dwarf_edx, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
gpr_edx},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(edi, nullptr),
{ehframe_edi, dwarf_edi, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
gpr_edi},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(esi, nullptr),
{ehframe_esi, dwarf_esi, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
gpr_esi},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(ebp, "fp"),
{ehframe_ebp, dwarf_ebp, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM,
gpr_ebp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(esp, "sp"),
{ehframe_esp, dwarf_esp, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM,
gpr_esp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(ss, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_ss},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(eflags, "flags"),
{ehframe_eflags, dwarf_eflags, LLDB_REGNUM_GENERIC_FLAGS,
LLDB_INVALID_REGNUM, gpr_eflags},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(eip, "pc"),
{ehframe_eip, dwarf_eip, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM,
gpr_eip},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(cs, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_cs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(ds, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_ds},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(es, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_es},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(fs, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_fs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(gs, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_gs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(fcw),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_fcw},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(fsw),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_fsw},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(ftw),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_ftw},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(fop),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_fop},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(ip),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_ip},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(cs),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_cs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(dp),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_dp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(ds),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_ds},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(mxcsr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_mxcsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(mxcsrmask),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_mxcsrmask},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_VECT(stmm, 0)},
{DEFINE_FPU_VECT(stmm, 1)},
{DEFINE_FPU_VECT(stmm, 2)},
@@ -375,22 +349,19 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, exc_trapno},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_EXC(err),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, exc_err},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_EXC(faultvaddr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, exc_faultvaddr},
nullptr,
nullptr,
- nullptr,
- 0}};
+ }};
static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
index 03e5ea424e39..567df8fc980c 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
@@ -172,7 +172,7 @@ enum ehframe_dwarf_regnums {
{ehframe_dwarf_fpu_##reg##i, \
ehframe_dwarf_fpu_##reg##i, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, fpu_##reg##i }, \
- nullptr, nullptr, nullptr, 0
+ nullptr, nullptr,
#define DEFINE_EXC(reg) \
#reg, NULL, sizeof(((RegisterContextDarwin_x86_64::EXC *) NULL)->reg), \
EXC_OFFSET(reg), eEncodingUint, eFormatHex
@@ -194,219 +194,188 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, gpr_rax},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rbx, nullptr),
{ehframe_dwarf_gpr_rbx, ehframe_dwarf_gpr_rbx, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_rbx},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rcx, nullptr),
{ehframe_dwarf_gpr_rcx, ehframe_dwarf_gpr_rcx, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_rcx},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rdx, nullptr),
{ehframe_dwarf_gpr_rdx, ehframe_dwarf_gpr_rdx, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_rdx},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rdi, nullptr),
{ehframe_dwarf_gpr_rdi, ehframe_dwarf_gpr_rdi, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_rdi},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rsi, nullptr),
{ehframe_dwarf_gpr_rsi, ehframe_dwarf_gpr_rsi, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_rsi},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rbp, "fp"),
{ehframe_dwarf_gpr_rbp, ehframe_dwarf_gpr_rbp, LLDB_REGNUM_GENERIC_FP,
LLDB_INVALID_REGNUM, gpr_rbp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rsp, "sp"),
{ehframe_dwarf_gpr_rsp, ehframe_dwarf_gpr_rsp, LLDB_REGNUM_GENERIC_SP,
LLDB_INVALID_REGNUM, gpr_rsp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r8, nullptr),
{ehframe_dwarf_gpr_r8, ehframe_dwarf_gpr_r8, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r8},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r9, nullptr),
{ehframe_dwarf_gpr_r9, ehframe_dwarf_gpr_r9, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r9},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r10, nullptr),
{ehframe_dwarf_gpr_r10, ehframe_dwarf_gpr_r10, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r10},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r11, nullptr),
{ehframe_dwarf_gpr_r11, ehframe_dwarf_gpr_r11, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r11},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r12, nullptr),
{ehframe_dwarf_gpr_r12, ehframe_dwarf_gpr_r12, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r12},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r13, nullptr),
{ehframe_dwarf_gpr_r13, ehframe_dwarf_gpr_r13, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r13},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r14, nullptr),
{ehframe_dwarf_gpr_r14, ehframe_dwarf_gpr_r14, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r14},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(r15, nullptr),
{ehframe_dwarf_gpr_r15, ehframe_dwarf_gpr_r15, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_r15},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rip, "pc"),
{ehframe_dwarf_gpr_rip, ehframe_dwarf_gpr_rip, LLDB_REGNUM_GENERIC_PC,
LLDB_INVALID_REGNUM, gpr_rip},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(rflags, "flags"),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_REGNUM_GENERIC_FLAGS,
LLDB_INVALID_REGNUM, gpr_rflags},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(cs, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_cs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(fs, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_fs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_GPR(gs, nullptr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, gpr_gs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(fcw),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_fcw},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(fsw),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_fsw},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(ftw),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_ftw},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(fop),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_fop},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(ip),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_ip},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(cs),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_cs},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(dp),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_dp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(ds),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_ds},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(mxcsr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_mxcsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_UINT(mxcsrmask),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, fpu_mxcsrmask},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_FPU_VECT(stmm, 0)},
{DEFINE_FPU_VECT(stmm, 1)},
{DEFINE_FPU_VECT(stmm, 2)},
@@ -437,22 +406,19 @@ static RegisterInfo g_register_infos[] = {
LLDB_INVALID_REGNUM, exc_trapno},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_EXC(err),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, exc_err},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{DEFINE_EXC(faultvaddr),
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
LLDB_INVALID_REGNUM, exc_faultvaddr},
nullptr,
nullptr,
- nullptr,
- 0}};
+ }};
static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp
index 0c5d34f345db..7b4c7be21f74 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp
@@ -14,27 +14,53 @@
using namespace lldb_private;
using namespace lldb;
-static const uint32_t g_gpr_regnums[] = {
- gpr_zero_mips64, gpr_r1_mips64, gpr_r2_mips64, gpr_r3_mips64,
- gpr_r4_mips64, gpr_r5_mips64, gpr_r6_mips64, gpr_r7_mips64,
- gpr_r8_mips64, gpr_r9_mips64, gpr_r10_mips64, gpr_r11_mips64,
- gpr_r12_mips64, gpr_r13_mips64, gpr_r14_mips64, gpr_r15_mips64,
- gpr_r16_mips64, gpr_r17_mips64, gpr_r18_mips64, gpr_r19_mips64,
- gpr_r20_mips64, gpr_r21_mips64, gpr_r22_mips64, gpr_r23_mips64,
- gpr_r24_mips64, gpr_r25_mips64, gpr_r26_mips64, gpr_r27_mips64,
- gpr_gp_mips64, gpr_sp_mips64, gpr_r30_mips64, gpr_ra_mips64,
- gpr_sr_mips64, gpr_mullo_mips64, gpr_mulhi_mips64, gpr_badvaddr_mips64,
- gpr_cause_mips64, gpr_pc_mips64, gpr_ic_mips64, gpr_dummy_mips64};
+static const uint32_t g_gp_regnums_mips64[] = {
+ gpr_zero_mips64, gpr_r1_mips64, gpr_r2_mips64, gpr_r3_mips64,
+ gpr_r4_mips64, gpr_r5_mips64, gpr_r6_mips64, gpr_r7_mips64,
+ gpr_r8_mips64, gpr_r9_mips64, gpr_r10_mips64, gpr_r11_mips64,
+ gpr_r12_mips64, gpr_r13_mips64, gpr_r14_mips64, gpr_r15_mips64,
+ gpr_r16_mips64, gpr_r17_mips64, gpr_r18_mips64, gpr_r19_mips64,
+ gpr_r20_mips64, gpr_r21_mips64, gpr_r22_mips64, gpr_r23_mips64,
+ gpr_r24_mips64, gpr_r25_mips64, gpr_r26_mips64, gpr_r27_mips64,
+ gpr_gp_mips64, gpr_sp_mips64, gpr_r30_mips64, gpr_ra_mips64,
+ gpr_sr_mips64, gpr_mullo_mips64, gpr_mulhi_mips64, gpr_badvaddr_mips64,
+ gpr_cause_mips64, gpr_pc_mips64, gpr_ic_mips64, gpr_dummy_mips64,
+ LLDB_INVALID_REGNUM // register sets need to end with this flag
+};
+
+static_assert((sizeof(g_gp_regnums_mips64) / sizeof(g_gp_regnums_mips64[0])) -
+ 1 ==
+ k_num_gpr_registers_mips64,
+ "g_gp_regnums_mips64 has wrong number of register infos");
+
+const uint32_t g_fp_regnums_mips64[] = {
+ fpr_f0_mips64, fpr_f1_mips64, fpr_f2_mips64, fpr_f3_mips64,
+ fpr_f4_mips64, fpr_f5_mips64, fpr_f6_mips64, fpr_f7_mips64,
+ fpr_f8_mips64, fpr_f9_mips64, fpr_f10_mips64, fpr_f11_mips64,
+ fpr_f12_mips64, fpr_f13_mips64, fpr_f14_mips64, fpr_f15_mips64,
+ fpr_f16_mips64, fpr_f17_mips64, fpr_f18_mips64, fpr_f19_mips64,
+ fpr_f20_mips64, fpr_f21_mips64, fpr_f22_mips64, fpr_f23_mips64,
+ fpr_f24_mips64, fpr_f25_mips64, fpr_f26_mips64, fpr_f27_mips64,
+ fpr_f28_mips64, fpr_f29_mips64, fpr_f30_mips64, fpr_f31_mips64,
+ fpr_fcsr_mips64, fpr_fir_mips64,
+ LLDB_INVALID_REGNUM // register sets need to end with this flag
+};
+
+static_assert((sizeof(g_fp_regnums_mips64) / sizeof(g_fp_regnums_mips64[0])) -
+ 1 ==
+ k_num_fpr_registers_mips64,
+ "g_fp_regnums_mips64 has wrong number of register infos");
// Number of register sets provided by this context.
-constexpr size_t k_num_register_sets = 1;
+constexpr size_t k_num_register_sets = 2;
static const RegisterSet g_reg_sets_mips64[k_num_register_sets] = {
{"General Purpose Registers", "gpr", k_num_gpr_registers_mips64,
- g_gpr_regnums},
+ g_gp_regnums_mips64},
+ {"Floating Point Registers", "fpu", k_num_fpr_registers_mips64,
+ g_fp_regnums_mips64},
};
-
// http://svnweb.freebsd.org/base/head/sys/mips/include/regnum.h
typedef struct _GPR {
uint64_t zero;
@@ -79,6 +105,43 @@ typedef struct _GPR {
uint64_t dummy;
} GPR_freebsd_mips;
+typedef struct _FPR {
+ uint64_t f0;
+ uint64_t f1;
+ uint64_t f2;
+ uint64_t f3;
+ uint64_t f4;
+ uint64_t f5;
+ uint64_t f6;
+ uint64_t f7;
+ uint64_t f8;
+ uint64_t f9;
+ uint64_t f10;
+ uint64_t f11;
+ uint64_t f12;
+ uint64_t f13;
+ uint64_t f14;
+ uint64_t f15;
+ uint64_t f16;
+ uint64_t f17;
+ uint64_t f18;
+ uint64_t f19;
+ uint64_t f20;
+ uint64_t f21;
+ uint64_t f22;
+ uint64_t f23;
+ uint64_t f24;
+ uint64_t f25;
+ uint64_t f26;
+ uint64_t f27;
+ uint64_t f28;
+ uint64_t f29;
+ uint64_t f30;
+ uint64_t f31;
+ uint64_t fcsr;
+ uint64_t fir;
+} FPR_freebsd_mips;
+
// Include RegisterInfos_mips64 to declare our g_register_infos_mips64
// structure.
#define DECLARE_REGISTER_INFOS_MIPS64_STRUCT
@@ -95,14 +158,13 @@ size_t RegisterContextFreeBSD_mips64::GetGPRSize() const {
const RegisterSet *
RegisterContextFreeBSD_mips64::GetRegisterSet(size_t set) const {
- // Check if RegisterSet is available
- if (set < k_num_register_sets)
- return &g_reg_sets_mips64[set];
- return nullptr;
+ // Check if RegisterSet is available
+ if (set < k_num_register_sets)
+ return &g_reg_sets_mips64[set];
+ return nullptr;
}
-size_t
-RegisterContextFreeBSD_mips64::GetRegisterSetCount() const {
+size_t RegisterContextFreeBSD_mips64::GetRegisterSetCount() const {
return k_num_register_sets;
}
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
index 518dc273faf4..39bec20649a4 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
@@ -89,19 +89,18 @@ struct UserArea {
RegisterContextLinux_i386::RegisterContextLinux_i386(
const ArchSpec &target_arch)
: RegisterInfoInterface(target_arch) {
- RegisterInfo orig_ax = {"orig_eax",
- nullptr,
- sizeof(((GPR *)nullptr)->orig_eax),
- (LLVM_EXTENSION offsetof(GPR, orig_eax)),
- eEncodingUint,
- eFormatHex,
- {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM},
- nullptr,
- nullptr,
- nullptr,
- 0};
+ RegisterInfo orig_ax = {
+ "orig_eax",
+ nullptr,
+ sizeof(((GPR *)nullptr)->orig_eax),
+ (LLVM_EXTENSION offsetof(GPR, orig_eax)),
+ eEncodingUint,
+ eFormatHex,
+ {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
+ nullptr,
+ nullptr,
+ };
d_register_infos.push_back(orig_ax);
}
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp
index f9d4e23fcde2..20b8d74f8c0d 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp
@@ -156,19 +156,18 @@ RegisterContextLinux_x86_64::RegisterContextLinux_x86_64(
m_register_info_p(GetRegisterInfoPtr(target_arch)),
m_register_info_count(GetRegisterInfoCount(target_arch)),
m_user_register_count(GetUserRegisterInfoCount(target_arch)) {
- RegisterInfo orig_ax = {"orig_rax",
- nullptr,
- sizeof(((GPR *)nullptr)->orig_rax),
- (LLVM_EXTENSION offsetof(GPR, orig_rax)),
- eEncodingUint,
- eFormatHex,
- {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM},
- nullptr,
- nullptr,
- nullptr,
- 0};
+ RegisterInfo orig_ax = {
+ "orig_rax",
+ nullptr,
+ sizeof(((GPR *)nullptr)->orig_rax),
+ (LLVM_EXTENSION offsetof(GPR, orig_rax)),
+ eEncodingUint,
+ eFormatHex,
+ {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM},
+ nullptr,
+ nullptr,
+ };
d_register_infos.push_back(orig_ax);
}
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMemory.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextMemory.cpp
index c55ffebb03e7..49a4c8669022 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextMemory.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextMemory.cpp
@@ -8,7 +8,6 @@
#include "RegisterContextMemory.h"
-#include "DynamicRegisterInfo.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Thread.h"
#include "lldb/Utility/DataBufferHeap.h"
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMemory.h b/lldb/source/Plugins/Process/Utility/RegisterContextMemory.h
index 764ee9b97211..c3b9ec72ca22 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextMemory.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextMemory.h
@@ -11,17 +11,16 @@
#include <vector>
+#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Target/RegisterContext.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/lldb-private.h"
-class DynamicRegisterInfo;
-
class RegisterContextMemory : public lldb_private::RegisterContext {
public:
RegisterContextMemory(lldb_private::Thread &thread,
uint32_t concrete_frame_idx,
- DynamicRegisterInfo &reg_info,
+ lldb_private::DynamicRegisterInfo &reg_info,
lldb::addr_t reg_data_addr);
~RegisterContextMemory() override;
@@ -60,7 +59,7 @@ public:
protected:
void SetAllRegisterValid(bool b);
- DynamicRegisterInfo &m_reg_infos;
+ lldb_private::DynamicRegisterInfo &m_reg_infos;
std::vector<bool> m_reg_valid;
lldb_private::DataExtractor m_reg_data;
lldb::addr_t m_reg_data_addr; // If this is valid, then we have a register
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
index 11556e802e33..066d50d9c149 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
@@ -41,7 +41,6 @@ typedef struct _GPR {
#reg, alt, sizeof(((GPR *)nullptr)->reg), GPR_OFFSET(reg), eEncodingUint, \
eFormatHex, \
{kind1, kind2, kind3, kind4, lldb_##reg##_i386 }, nullptr, nullptr, \
- nullptr, 0 \
}
// clang-format off
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
index 4ffc4d25781c..a35ccace5d5b 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
@@ -49,7 +49,6 @@ typedef struct _GPR {
#reg, alt, sizeof(((GPR *)nullptr)->reg), GPR_OFFSET(reg), eEncodingUint, \
eFormatHex, \
{kind1, kind2, kind3, kind4, lldb_##reg##_x86_64 }, nullptr, nullptr, \
- nullptr, 0 \
}
typedef struct _FPReg {
@@ -80,7 +79,7 @@ typedef struct _FPReg {
eEncodingUint, eFormatVectorOfUInt64, \
{dwarf_##reg##_x86_64, dwarf_##reg##_x86_64, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_##reg##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
// clang-format off
@@ -89,22 +88,22 @@ static RegisterInfo g_register_infos_x86_64[] = {
// =========================== ================== ================ ========================= ====================
DEFINE_GPR(rax, nullptr, dwarf_rax_x86_64, dwarf_rax_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(rbx, nullptr, dwarf_rbx_x86_64, dwarf_rbx_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rcx, "arg4", dwarf_rcx_x86_64, dwarf_rcx_x86_64, LLDB_REGNUM_GENERIC_ARG4, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rdx, "arg3", dwarf_rdx_x86_64, dwarf_rdx_x86_64, LLDB_REGNUM_GENERIC_ARG3, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rdi, "arg1", dwarf_rdi_x86_64, dwarf_rdi_x86_64, LLDB_REGNUM_GENERIC_ARG1, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rsi, "arg2", dwarf_rsi_x86_64, dwarf_rsi_x86_64, LLDB_REGNUM_GENERIC_ARG2, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rbp, "fp", dwarf_rbp_x86_64, dwarf_rbp_x86_64, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rsp, "sp", dwarf_rsp_x86_64, dwarf_rsp_x86_64, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r8, "arg5", dwarf_r8_x86_64, dwarf_r8_x86_64, LLDB_REGNUM_GENERIC_ARG5, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r9, "arg6", dwarf_r9_x86_64, dwarf_r9_x86_64, LLDB_REGNUM_GENERIC_ARG6, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rcx, nullptr, dwarf_rcx_x86_64, dwarf_rcx_x86_64, LLDB_REGNUM_GENERIC_ARG4, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rdx, nullptr, dwarf_rdx_x86_64, dwarf_rdx_x86_64, LLDB_REGNUM_GENERIC_ARG3, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rdi, nullptr, dwarf_rdi_x86_64, dwarf_rdi_x86_64, LLDB_REGNUM_GENERIC_ARG1, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rsi, nullptr, dwarf_rsi_x86_64, dwarf_rsi_x86_64, LLDB_REGNUM_GENERIC_ARG2, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rbp, nullptr, dwarf_rbp_x86_64, dwarf_rbp_x86_64, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rsp, nullptr, dwarf_rsp_x86_64, dwarf_rsp_x86_64, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(r8, nullptr, dwarf_r8_x86_64, dwarf_r8_x86_64, LLDB_REGNUM_GENERIC_ARG5, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(r9, nullptr, dwarf_r9_x86_64, dwarf_r9_x86_64, LLDB_REGNUM_GENERIC_ARG6, LLDB_INVALID_REGNUM),
DEFINE_GPR(r10, nullptr, dwarf_r10_x86_64, dwarf_r10_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r11, nullptr, dwarf_r11_x86_64, dwarf_r11_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r12, nullptr, dwarf_r12_x86_64, dwarf_r12_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r13, nullptr, dwarf_r13_x86_64, dwarf_r13_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r14, nullptr, dwarf_r14_x86_64, dwarf_r14_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r15, nullptr, dwarf_r15_x86_64, dwarf_r15_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rip, "pc", dwarf_rip_x86_64, dwarf_rip_x86_64, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rflags, "flags", dwarf_rflags_x86_64, dwarf_rflags_x86_64, LLDB_REGNUM_GENERIC_FLAGS, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rip, nullptr, dwarf_rip_x86_64, dwarf_rip_x86_64, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rflags, nullptr, dwarf_rflags_x86_64, dwarf_rflags_x86_64, LLDB_REGNUM_GENERIC_FLAGS, LLDB_INVALID_REGNUM),
DEFINE_GPR(cs, nullptr, dwarf_cs_x86_64, dwarf_cs_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(fs, nullptr, dwarf_fs_x86_64, dwarf_fs_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(gs, nullptr, dwarf_gs_x86_64, dwarf_gs_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp
index 63461f7ab2db..fd4c373e2cb1 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp
@@ -38,7 +38,7 @@ using namespace lldb_private;
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
dbg_##reg##i }, \
- NULL, NULL, NULL, 0
+ NULL, NULL,
#define REG_CONTEXT_SIZE \
(sizeof(RegisterInfoPOSIX_arm::GPR) + sizeof(RegisterInfoPOSIX_arm::FPU) + \
sizeof(RegisterInfoPOSIX_arm::EXC))
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
index b878534b39db..6c130be7b741 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
@@ -60,7 +60,7 @@
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
dbg_##reg##i }, \
- NULL, NULL, NULL, 0
+ NULL, NULL,
#define REG_CONTEXT_SIZE \
(sizeof(RegisterInfoPOSIX_arm64::GPR) + \
sizeof(RegisterInfoPOSIX_arm64::FPU) + \
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
index ba873ba4436b..96cab49d5ac8 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
@@ -110,6 +110,7 @@ public:
bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
+ bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
bool IsSVEReg(unsigned reg) const;
bool IsSVEZReg(unsigned reg) const;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_arm.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_arm.h
index 4af0069eb6f3..ace2e5a9f68b 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_arm.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_arm.h
@@ -254,39 +254,38 @@ static uint32_t g_s29_invalidates[] = {fpu_d14, fpu_q7, LLDB_INVALID_REGNUM};
static uint32_t g_s30_invalidates[] = {fpu_d15, fpu_q7, LLDB_INVALID_REGNUM};
static uint32_t g_s31_invalidates[] = {fpu_d15, fpu_q7, LLDB_INVALID_REGNUM};
-static uint32_t g_d0_contains[] = {fpu_s0, fpu_s1, LLDB_INVALID_REGNUM};
-static uint32_t g_d1_contains[] = {fpu_s2, fpu_s3, LLDB_INVALID_REGNUM};
-static uint32_t g_d2_contains[] = {fpu_s4, fpu_s5, LLDB_INVALID_REGNUM};
-static uint32_t g_d3_contains[] = {fpu_s6, fpu_s7, LLDB_INVALID_REGNUM};
-static uint32_t g_d4_contains[] = {fpu_s8, fpu_s9, LLDB_INVALID_REGNUM};
-static uint32_t g_d5_contains[] = {fpu_s10, fpu_s11, LLDB_INVALID_REGNUM};
-static uint32_t g_d6_contains[] = {fpu_s12, fpu_s13, LLDB_INVALID_REGNUM};
-static uint32_t g_d7_contains[] = {fpu_s14, fpu_s15, LLDB_INVALID_REGNUM};
-static uint32_t g_d8_contains[] = {fpu_s16, fpu_s17, LLDB_INVALID_REGNUM};
-static uint32_t g_d9_contains[] = {fpu_s18, fpu_s19, LLDB_INVALID_REGNUM};
-static uint32_t g_d10_contains[] = {fpu_s20, fpu_s21, LLDB_INVALID_REGNUM};
-static uint32_t g_d11_contains[] = {fpu_s22, fpu_s23, LLDB_INVALID_REGNUM};
-static uint32_t g_d12_contains[] = {fpu_s24, fpu_s25, LLDB_INVALID_REGNUM};
-static uint32_t g_d13_contains[] = {fpu_s26, fpu_s27, LLDB_INVALID_REGNUM};
-static uint32_t g_d14_contains[] = {fpu_s28, fpu_s29, LLDB_INVALID_REGNUM};
-static uint32_t g_d15_contains[] = {fpu_s30, fpu_s31, LLDB_INVALID_REGNUM};
-
-static uint32_t g_d0_invalidates[] = {fpu_q0, LLDB_INVALID_REGNUM};
-static uint32_t g_d1_invalidates[] = {fpu_q0, LLDB_INVALID_REGNUM};
-static uint32_t g_d2_invalidates[] = {fpu_q1, LLDB_INVALID_REGNUM};
-static uint32_t g_d3_invalidates[] = {fpu_q1, LLDB_INVALID_REGNUM};
-static uint32_t g_d4_invalidates[] = {fpu_q2, LLDB_INVALID_REGNUM};
-static uint32_t g_d5_invalidates[] = {fpu_q2, LLDB_INVALID_REGNUM};
-static uint32_t g_d6_invalidates[] = {fpu_q3, LLDB_INVALID_REGNUM};
-static uint32_t g_d7_invalidates[] = {fpu_q3, LLDB_INVALID_REGNUM};
-static uint32_t g_d8_invalidates[] = {fpu_q4, LLDB_INVALID_REGNUM};
-static uint32_t g_d9_invalidates[] = {fpu_q4, LLDB_INVALID_REGNUM};
-static uint32_t g_d10_invalidates[] = {fpu_q5, LLDB_INVALID_REGNUM};
-static uint32_t g_d11_invalidates[] = {fpu_q5, LLDB_INVALID_REGNUM};
-static uint32_t g_d12_invalidates[] = {fpu_q6, LLDB_INVALID_REGNUM};
-static uint32_t g_d13_invalidates[] = {fpu_q6, LLDB_INVALID_REGNUM};
-static uint32_t g_d14_invalidates[] = {fpu_q7, LLDB_INVALID_REGNUM};
-static uint32_t g_d15_invalidates[] = {fpu_q7, LLDB_INVALID_REGNUM};
+static uint32_t g_d0_invalidates[] = {fpu_q0, fpu_s0, fpu_s1,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d1_invalidates[] = {fpu_q0, fpu_s2, fpu_s3,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d2_invalidates[] = {fpu_q1, fpu_s4, fpu_s5,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d3_invalidates[] = {fpu_q1, fpu_s6, fpu_s7,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d4_invalidates[] = {fpu_q2, fpu_s8, fpu_s9,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d5_invalidates[] = {fpu_q2, fpu_s10, fpu_s11,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d6_invalidates[] = {fpu_q3, fpu_s12, fpu_s13,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d7_invalidates[] = {fpu_q3, fpu_s14, fpu_s15,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d8_invalidates[] = {fpu_q4, fpu_s16, fpu_s17,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d9_invalidates[] = {fpu_q4, fpu_s18, fpu_s19,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d10_invalidates[] = {fpu_q5, fpu_s20, fpu_s21,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d11_invalidates[] = {fpu_q5, fpu_s22, fpu_s23,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d12_invalidates[] = {fpu_q6, fpu_s24, fpu_s25,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d13_invalidates[] = {fpu_q6, fpu_s26, fpu_s27,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d14_invalidates[] = {fpu_q7, fpu_s28, fpu_s29,
+ LLDB_INVALID_REGNUM};
+static uint32_t g_d15_invalidates[] = {fpu_q7, fpu_s30, fpu_s31,
+ LLDB_INVALID_REGNUM};
static uint32_t g_d16_invalidates[] = {fpu_q8, LLDB_INVALID_REGNUM};
static uint32_t g_d17_invalidates[] = {fpu_q8, LLDB_INVALID_REGNUM};
static uint32_t g_d18_invalidates[] = {fpu_q9, LLDB_INVALID_REGNUM};
@@ -304,30 +303,64 @@ static uint32_t g_d29_invalidates[] = {fpu_q14, LLDB_INVALID_REGNUM};
static uint32_t g_d30_invalidates[] = {fpu_q15, LLDB_INVALID_REGNUM};
static uint32_t g_d31_invalidates[] = {fpu_q15, LLDB_INVALID_REGNUM};
-static uint32_t g_q0_contains[] = {
+static uint32_t g_q0_invalidates[] = {
fpu_d0, fpu_d1, fpu_s0, fpu_s1, fpu_s2, fpu_s3, LLDB_INVALID_REGNUM};
-static uint32_t g_q1_contains[] = {
+static uint32_t g_q1_invalidates[] = {
fpu_d2, fpu_d3, fpu_s4, fpu_s5, fpu_s6, fpu_s7, LLDB_INVALID_REGNUM};
-static uint32_t g_q2_contains[] = {
+static uint32_t g_q2_invalidates[] = {
fpu_d4, fpu_d5, fpu_s8, fpu_s9, fpu_s10, fpu_s11, LLDB_INVALID_REGNUM};
-static uint32_t g_q3_contains[] = {
+static uint32_t g_q3_invalidates[] = {
fpu_d6, fpu_d7, fpu_s12, fpu_s13, fpu_s14, fpu_s15, LLDB_INVALID_REGNUM};
-static uint32_t g_q4_contains[] = {
+static uint32_t g_q4_invalidates[] = {
fpu_d8, fpu_d9, fpu_s16, fpu_s17, fpu_s18, fpu_s19, LLDB_INVALID_REGNUM};
-static uint32_t g_q5_contains[] = {
+static uint32_t g_q5_invalidates[] = {
fpu_d10, fpu_d11, fpu_s20, fpu_s21, fpu_s22, fpu_s23, LLDB_INVALID_REGNUM};
-static uint32_t g_q6_contains[] = {
+static uint32_t g_q6_invalidates[] = {
fpu_d12, fpu_d13, fpu_s24, fpu_s25, fpu_s26, fpu_s27, LLDB_INVALID_REGNUM};
-static uint32_t g_q7_contains[] = {
+static uint32_t g_q7_invalidates[] = {
fpu_d14, fpu_d15, fpu_s28, fpu_s29, fpu_s30, fpu_s31, LLDB_INVALID_REGNUM};
-static uint32_t g_q8_contains[] = {fpu_d16, fpu_d17, LLDB_INVALID_REGNUM};
-static uint32_t g_q9_contains[] = {fpu_d18, fpu_d19, LLDB_INVALID_REGNUM};
-static uint32_t g_q10_contains[] = {fpu_d20, fpu_d21, LLDB_INVALID_REGNUM};
-static uint32_t g_q11_contains[] = {fpu_d22, fpu_d23, LLDB_INVALID_REGNUM};
-static uint32_t g_q12_contains[] = {fpu_d24, fpu_d25, LLDB_INVALID_REGNUM};
-static uint32_t g_q13_contains[] = {fpu_d26, fpu_d27, LLDB_INVALID_REGNUM};
-static uint32_t g_q14_contains[] = {fpu_d28, fpu_d29, LLDB_INVALID_REGNUM};
-static uint32_t g_q15_contains[] = {fpu_d30, fpu_d31, LLDB_INVALID_REGNUM};
+static uint32_t g_q8_invalidates[] = {fpu_d16, fpu_d17, LLDB_INVALID_REGNUM};
+static uint32_t g_q9_invalidates[] = {fpu_d18, fpu_d19, LLDB_INVALID_REGNUM};
+static uint32_t g_q10_invalidates[] = {fpu_d20, fpu_d21, LLDB_INVALID_REGNUM};
+static uint32_t g_q11_invalidates[] = {fpu_d22, fpu_d23, LLDB_INVALID_REGNUM};
+static uint32_t g_q12_invalidates[] = {fpu_d24, fpu_d25, LLDB_INVALID_REGNUM};
+static uint32_t g_q13_invalidates[] = {fpu_d26, fpu_d27, LLDB_INVALID_REGNUM};
+static uint32_t g_q14_invalidates[] = {fpu_d28, fpu_d29, LLDB_INVALID_REGNUM};
+static uint32_t g_q15_invalidates[] = {fpu_d30, fpu_d31, LLDB_INVALID_REGNUM};
+
+static uint32_t g_q0_contained[] = {fpu_q0, LLDB_INVALID_REGNUM};
+static uint32_t g_q1_contained[] = {fpu_q1, LLDB_INVALID_REGNUM};
+static uint32_t g_q2_contained[] = {fpu_q2, LLDB_INVALID_REGNUM};
+static uint32_t g_q3_contained[] = {fpu_q3, LLDB_INVALID_REGNUM};
+static uint32_t g_q4_contained[] = {fpu_q4, LLDB_INVALID_REGNUM};
+static uint32_t g_q5_contained[] = {fpu_q5, LLDB_INVALID_REGNUM};
+static uint32_t g_q6_contained[] = {fpu_q6, LLDB_INVALID_REGNUM};
+static uint32_t g_q7_contained[] = {fpu_q7, LLDB_INVALID_REGNUM};
+static uint32_t g_q8_contained[] = {fpu_q8, LLDB_INVALID_REGNUM};
+static uint32_t g_q9_contained[] = {fpu_q9, LLDB_INVALID_REGNUM};
+static uint32_t g_q10_contained[] = {fpu_q10, LLDB_INVALID_REGNUM};
+static uint32_t g_q11_contained[] = {fpu_q11, LLDB_INVALID_REGNUM};
+static uint32_t g_q12_contained[] = {fpu_q12, LLDB_INVALID_REGNUM};
+static uint32_t g_q13_contained[] = {fpu_q13, LLDB_INVALID_REGNUM};
+static uint32_t g_q14_contained[] = {fpu_q14, LLDB_INVALID_REGNUM};
+static uint32_t g_q15_contained[] = {fpu_q15, LLDB_INVALID_REGNUM};
+
+#define FPU_REG(name, size, offset, qreg) \
+ { \
+ #name, nullptr, size, FPU_OFFSET(offset), eEncodingIEEE754, eFormatFloat, \
+ {LLDB_INVALID_REGNUM, dwarf_##name, LLDB_INVALID_REGNUM, \
+ LLDB_INVALID_REGNUM, fpu_##name }, \
+ g_##qreg##_contained, g_##name##_invalidates, \
+ }
+
+#define FPU_QREG(name, offset) \
+ { \
+ #name, nullptr, 16, FPU_OFFSET(offset), eEncodingVector, \
+ eFormatVectorOfUInt8, \
+ {LLDB_INVALID_REGNUM, dwarf_##name, LLDB_INVALID_REGNUM, \
+ LLDB_INVALID_REGNUM, fpu_##name }, \
+ nullptr, g_##name##_invalidates, \
+ }
static RegisterInfo g_register_infos_arm[] = {
// NAME ALT SZ OFFSET ENCODING FORMAT
@@ -337,1216 +370,343 @@ static RegisterInfo g_register_infos_arm[] = {
// ==================== =================== ===================
// ========================== =================== =============
// ============== =================
- {"r0",
- nullptr,
- 4,
- GPR_OFFSET(0),
- eEncodingUint,
- eFormatHex,
- {ehframe_r0, dwarf_r0, LLDB_REGNUM_GENERIC_ARG1, LLDB_INVALID_REGNUM,
- gpr_r0},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r1",
- nullptr,
- 4,
- GPR_OFFSET(1),
- eEncodingUint,
- eFormatHex,
- {ehframe_r1, dwarf_r1, LLDB_REGNUM_GENERIC_ARG2, LLDB_INVALID_REGNUM,
- gpr_r1},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r2",
- nullptr,
- 4,
- GPR_OFFSET(2),
- eEncodingUint,
- eFormatHex,
- {ehframe_r2, dwarf_r2, LLDB_REGNUM_GENERIC_ARG3, LLDB_INVALID_REGNUM,
- gpr_r2},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r3",
- nullptr,
- 4,
- GPR_OFFSET(3),
- eEncodingUint,
- eFormatHex,
- {ehframe_r3, dwarf_r3, LLDB_REGNUM_GENERIC_ARG4, LLDB_INVALID_REGNUM,
- gpr_r3},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r4",
- nullptr,
- 4,
- GPR_OFFSET(4),
- eEncodingUint,
- eFormatHex,
- {ehframe_r4, dwarf_r4, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r4},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r5",
- nullptr,
- 4,
- GPR_OFFSET(5),
- eEncodingUint,
- eFormatHex,
- {ehframe_r5, dwarf_r5, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r5},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r6",
- nullptr,
- 4,
- GPR_OFFSET(6),
- eEncodingUint,
- eFormatHex,
- {ehframe_r6, dwarf_r6, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r6},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r7",
- nullptr,
- 4,
- GPR_OFFSET(7),
- eEncodingUint,
- eFormatHex,
- {ehframe_r7, dwarf_r7, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r7},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r8",
- nullptr,
- 4,
- GPR_OFFSET(8),
- eEncodingUint,
- eFormatHex,
- {ehframe_r8, dwarf_r8, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r8},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r9",
- nullptr,
- 4,
- GPR_OFFSET(9),
- eEncodingUint,
- eFormatHex,
- {ehframe_r9, dwarf_r9, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, gpr_r9},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r10",
- nullptr,
- 4,
- GPR_OFFSET(10),
- eEncodingUint,
- eFormatHex,
- {ehframe_r10, dwarf_r10, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- gpr_r10},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r11",
- nullptr,
- 4,
- GPR_OFFSET(11),
- eEncodingUint,
- eFormatHex,
- {ehframe_r11, dwarf_r11, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM,
- gpr_r11},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"r12",
- nullptr,
- 4,
- GPR_OFFSET(12),
- eEncodingUint,
- eFormatHex,
- {ehframe_r12, dwarf_r12, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- gpr_r12},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"sp",
- "r13",
- 4,
- GPR_OFFSET(13),
- eEncodingUint,
- eFormatHex,
- {ehframe_sp, dwarf_sp, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM,
- gpr_sp},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"lr",
- "r14",
- 4,
- GPR_OFFSET(14),
- eEncodingUint,
- eFormatHex,
- {ehframe_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA, LLDB_INVALID_REGNUM,
- gpr_lr},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"pc",
- "r15",
- 4,
- GPR_OFFSET(15),
- eEncodingUint,
- eFormatHex,
- {ehframe_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM,
- gpr_pc},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"cpsr",
- "psr",
- 4,
- GPR_OFFSET(16),
- eEncodingUint,
- eFormatHex,
- {ehframe_cpsr, dwarf_cpsr, LLDB_REGNUM_GENERIC_FLAGS, LLDB_INVALID_REGNUM,
- gpr_cpsr},
- nullptr,
- nullptr,
- nullptr,
- 0},
-
- {"s0",
- nullptr,
- 4,
- FPU_OFFSET(0),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s0, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s0},
- nullptr,
- g_s0_invalidates,
- nullptr,
- 0},
- {"s1",
- nullptr,
- 4,
- FPU_OFFSET(1),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s1, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s1},
- nullptr,
- g_s1_invalidates,
- nullptr,
- 0},
- {"s2",
- nullptr,
- 4,
- FPU_OFFSET(2),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s2, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s2},
- nullptr,
- g_s2_invalidates,
- nullptr,
- 0},
- {"s3",
- nullptr,
- 4,
- FPU_OFFSET(3),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s3, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s3},
- nullptr,
- g_s3_invalidates,
- nullptr,
- 0},
- {"s4",
- nullptr,
- 4,
- FPU_OFFSET(4),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s4, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s4},
- nullptr,
- g_s4_invalidates,
- nullptr,
- 0},
- {"s5",
- nullptr,
- 4,
- FPU_OFFSET(5),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s5, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s5},
- nullptr,
- g_s5_invalidates,
- nullptr,
- 0},
- {"s6",
- nullptr,
- 4,
- FPU_OFFSET(6),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s6, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s6},
- nullptr,
- g_s6_invalidates,
- nullptr,
- 0},
- {"s7",
- nullptr,
- 4,
- FPU_OFFSET(7),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s7, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s7},
- nullptr,
- g_s7_invalidates,
- nullptr,
- 0},
- {"s8",
- nullptr,
- 4,
- FPU_OFFSET(8),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s8, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s8},
- nullptr,
- g_s8_invalidates,
- nullptr,
- 0},
- {"s9",
- nullptr,
- 4,
- FPU_OFFSET(9),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s9, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s9},
- nullptr,
- g_s9_invalidates,
- nullptr,
- 0},
- {"s10",
- nullptr,
- 4,
- FPU_OFFSET(10),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s10, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s10},
- nullptr,
- g_s10_invalidates,
- nullptr,
- 0},
- {"s11",
- nullptr,
- 4,
- FPU_OFFSET(11),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s11, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s11},
- nullptr,
- g_s11_invalidates,
- nullptr,
- 0},
- {"s12",
- nullptr,
- 4,
- FPU_OFFSET(12),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s12, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s12},
- nullptr,
- g_s12_invalidates,
- nullptr,
- 0},
- {"s13",
- nullptr,
- 4,
- FPU_OFFSET(13),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s13, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s13},
- nullptr,
- g_s13_invalidates,
- nullptr,
- 0},
- {"s14",
- nullptr,
- 4,
- FPU_OFFSET(14),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s14, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s14},
- nullptr,
- g_s14_invalidates,
- nullptr,
- 0},
- {"s15",
- nullptr,
- 4,
- FPU_OFFSET(15),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s15, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s15},
- nullptr,
- g_s15_invalidates,
- nullptr,
- 0},
- {"s16",
- nullptr,
- 4,
- FPU_OFFSET(16),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s16, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s16},
- nullptr,
- g_s16_invalidates,
- nullptr,
- 0},
- {"s17",
- nullptr,
- 4,
- FPU_OFFSET(17),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s17, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s17},
- nullptr,
- g_s17_invalidates,
- nullptr,
- 0},
- {"s18",
- nullptr,
- 4,
- FPU_OFFSET(18),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s18, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s18},
- nullptr,
- g_s18_invalidates,
- nullptr,
- 0},
- {"s19",
- nullptr,
- 4,
- FPU_OFFSET(19),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s19, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s19},
- nullptr,
- g_s19_invalidates,
- nullptr,
- 0},
- {"s20",
- nullptr,
- 4,
- FPU_OFFSET(20),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s20, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s20},
- nullptr,
- g_s20_invalidates,
- nullptr,
- 0},
- {"s21",
- nullptr,
- 4,
- FPU_OFFSET(21),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s21, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s21},
- nullptr,
- g_s21_invalidates,
- nullptr,
- 0},
- {"s22",
- nullptr,
- 4,
- FPU_OFFSET(22),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s22, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s22},
- nullptr,
- g_s22_invalidates,
- nullptr,
- 0},
- {"s23",
- nullptr,
- 4,
- FPU_OFFSET(23),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s23, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s23},
- nullptr,
- g_s23_invalidates,
- nullptr,
- 0},
- {"s24",
- nullptr,
- 4,
- FPU_OFFSET(24),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s24, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s24},
- nullptr,
- g_s24_invalidates,
- nullptr,
- 0},
- {"s25",
- nullptr,
- 4,
- FPU_OFFSET(25),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s25, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s25},
- nullptr,
- g_s25_invalidates,
- nullptr,
- 0},
- {"s26",
- nullptr,
- 4,
- FPU_OFFSET(26),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s26, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s26},
- nullptr,
- g_s26_invalidates,
- nullptr,
- 0},
- {"s27",
- nullptr,
- 4,
- FPU_OFFSET(27),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s27, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s27},
- nullptr,
- g_s27_invalidates,
- nullptr,
- 0},
- {"s28",
- nullptr,
- 4,
- FPU_OFFSET(28),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s28, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s28},
- nullptr,
- g_s28_invalidates,
- nullptr,
- 0},
- {"s29",
- nullptr,
- 4,
- FPU_OFFSET(29),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s29, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s29},
- nullptr,
- g_s29_invalidates,
- nullptr,
- 0},
- {"s30",
- nullptr,
- 4,
- FPU_OFFSET(30),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s30, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s30},
- nullptr,
- g_s30_invalidates,
- nullptr,
- 0},
- {"s31",
- nullptr,
- 4,
- FPU_OFFSET(31),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_s31, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_s31},
- nullptr,
- g_s31_invalidates,
- nullptr,
- 0},
- {"fpscr",
- nullptr,
- 4,
- FPSCR_OFFSET,
- eEncodingUint,
- eFormatHex,
- {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM, fpu_fpscr},
- nullptr,
- nullptr,
- nullptr,
- 0},
-
- {"d0",
- nullptr,
- 8,
- FPU_OFFSET(0),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d0, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d0},
- g_d0_contains,
- g_d0_invalidates,
- nullptr,
- 0},
- {"d1",
- nullptr,
- 8,
- FPU_OFFSET(2),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d1, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d1},
- g_d1_contains,
- g_d1_invalidates,
- nullptr,
- 0},
- {"d2",
- nullptr,
- 8,
- FPU_OFFSET(4),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d2, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d2},
- g_d2_contains,
- g_d2_invalidates,
- nullptr,
- 0},
- {"d3",
- nullptr,
- 8,
- FPU_OFFSET(6),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d3, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d3},
- g_d3_contains,
- g_d3_invalidates,
- nullptr,
- 0},
- {"d4",
- nullptr,
- 8,
- FPU_OFFSET(8),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d4, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d4},
- g_d4_contains,
- g_d4_invalidates,
- nullptr,
- 0},
- {"d5",
- nullptr,
- 8,
- FPU_OFFSET(10),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d5, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d5},
- g_d5_contains,
- g_d5_invalidates,
- nullptr,
- 0},
- {"d6",
- nullptr,
- 8,
- FPU_OFFSET(12),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d6, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d6},
- g_d6_contains,
- g_d6_invalidates,
- nullptr,
- 0},
- {"d7",
- nullptr,
- 8,
- FPU_OFFSET(14),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d7, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d7},
- g_d7_contains,
- g_d7_invalidates,
- nullptr,
- 0},
- {"d8",
- nullptr,
- 8,
- FPU_OFFSET(16),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d8, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d8},
- g_d8_contains,
- g_d8_invalidates,
- nullptr,
- 0},
- {"d9",
- nullptr,
- 8,
- FPU_OFFSET(18),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d9, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d9},
- g_d9_contains,
- g_d9_invalidates,
- nullptr,
- 0},
- {"d10",
- nullptr,
- 8,
- FPU_OFFSET(20),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d10, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d10},
- g_d10_contains,
- g_d10_invalidates,
- nullptr,
- 0},
- {"d11",
- nullptr,
- 8,
- FPU_OFFSET(22),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d11, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d11},
- g_d11_contains,
- g_d11_invalidates,
- nullptr,
- 0},
- {"d12",
- nullptr,
- 8,
- FPU_OFFSET(24),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d12, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d12},
- g_d12_contains,
- g_d12_invalidates,
- nullptr,
- 0},
- {"d13",
- nullptr,
- 8,
- FPU_OFFSET(26),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d13, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d13},
- g_d13_contains,
- g_d13_invalidates,
- nullptr,
- 0},
- {"d14",
- nullptr,
- 8,
- FPU_OFFSET(28),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d14, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d14},
- g_d14_contains,
- g_d14_invalidates,
- nullptr,
- 0},
- {"d15",
- nullptr,
- 8,
- FPU_OFFSET(30),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d15, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d15},
- g_d15_contains,
- g_d15_invalidates,
- nullptr,
- 0},
- {"d16",
- nullptr,
- 8,
- FPU_OFFSET(32),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d16, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d16},
- nullptr,
- g_d16_invalidates,
- nullptr,
- 0},
- {"d17",
- nullptr,
- 8,
- FPU_OFFSET(34),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d17, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d17},
- nullptr,
- g_d17_invalidates,
- nullptr,
- 0},
- {"d18",
- nullptr,
- 8,
- FPU_OFFSET(36),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d18, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d18},
- nullptr,
- g_d18_invalidates,
- nullptr,
- 0},
- {"d19",
- nullptr,
- 8,
- FPU_OFFSET(38),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d19, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d19},
- nullptr,
- g_d19_invalidates,
- nullptr,
- 0},
- {"d20",
- nullptr,
- 8,
- FPU_OFFSET(40),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d20, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d20},
- nullptr,
- g_d20_invalidates,
- nullptr,
- 0},
- {"d21",
- nullptr,
- 8,
- FPU_OFFSET(42),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d21, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d21},
- nullptr,
- g_d21_invalidates,
- nullptr,
- 0},
- {"d22",
- nullptr,
- 8,
- FPU_OFFSET(44),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d22, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d22},
- nullptr,
- g_d22_invalidates,
- nullptr,
- 0},
- {"d23",
- nullptr,
- 8,
- FPU_OFFSET(46),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d23, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d23},
- nullptr,
- g_d23_invalidates,
- nullptr,
- 0},
- {"d24",
- nullptr,
- 8,
- FPU_OFFSET(48),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d24, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d24},
- nullptr,
- g_d24_invalidates,
- nullptr,
- 0},
- {"d25",
- nullptr,
- 8,
- FPU_OFFSET(50),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d25, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d25},
- nullptr,
- g_d25_invalidates,
- nullptr,
- 0},
- {"d26",
- nullptr,
- 8,
- FPU_OFFSET(52),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d26, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d26},
- nullptr,
- g_d26_invalidates,
- nullptr,
- 0},
- {"d27",
- nullptr,
- 8,
- FPU_OFFSET(54),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d27, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d27},
- nullptr,
- g_d27_invalidates,
- nullptr,
- 0},
- {"d28",
- nullptr,
- 8,
- FPU_OFFSET(56),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d28, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d28},
- nullptr,
- g_d28_invalidates,
- nullptr,
- 0},
- {"d29",
- nullptr,
- 8,
- FPU_OFFSET(58),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d29, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d29},
- nullptr,
- g_d29_invalidates,
- nullptr,
- 0},
- {"d30",
- nullptr,
- 8,
- FPU_OFFSET(60),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d30, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d30},
- nullptr,
- g_d30_invalidates,
- nullptr,
- 0},
- {"d31",
- nullptr,
- 8,
- FPU_OFFSET(62),
- eEncodingIEEE754,
- eFormatFloat,
- {LLDB_INVALID_REGNUM, dwarf_d31, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_d31},
- nullptr,
- g_d31_invalidates,
- nullptr,
- 0},
-
- {"q0",
- nullptr,
- 16,
- FPU_OFFSET(0),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q0, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q0},
- g_q0_contains,
- nullptr,
- nullptr,
- 0},
- {"q1",
- nullptr,
- 16,
- FPU_OFFSET(4),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q1, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q1},
- g_q1_contains,
- nullptr,
- nullptr,
- 0},
- {"q2",
- nullptr,
- 16,
- FPU_OFFSET(8),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q2, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q2},
- g_q2_contains,
- nullptr,
- nullptr,
- 0},
- {"q3",
- nullptr,
- 16,
- FPU_OFFSET(12),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q3, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q3},
- g_q3_contains,
- nullptr,
- nullptr,
- 0},
- {"q4",
- nullptr,
- 16,
- FPU_OFFSET(16),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q4, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q4},
- g_q4_contains,
- nullptr,
- nullptr,
- 0},
- {"q5",
- nullptr,
- 16,
- FPU_OFFSET(20),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q5, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q5},
- g_q5_contains,
- nullptr,
- nullptr,
- 0},
- {"q6",
- nullptr,
- 16,
- FPU_OFFSET(24),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q6, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q6},
- g_q6_contains,
- nullptr,
- nullptr,
- 0},
- {"q7",
- nullptr,
- 16,
- FPU_OFFSET(28),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q7, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q7},
- g_q7_contains,
- nullptr,
- nullptr,
- 0},
- {"q8",
- nullptr,
- 16,
- FPU_OFFSET(32),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q8, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q8},
- g_q8_contains,
- nullptr,
- nullptr,
- 0},
- {"q9",
- nullptr,
- 16,
- FPU_OFFSET(36),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q9, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q9},
- g_q9_contains,
- nullptr,
- nullptr,
- 0},
- {"q10",
- nullptr,
- 16,
- FPU_OFFSET(40),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q10, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q10},
- g_q10_contains,
- nullptr,
- nullptr,
- 0},
- {"q11",
- nullptr,
- 16,
- FPU_OFFSET(44),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q11, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q11},
- g_q11_contains,
- nullptr,
- nullptr,
- 0},
- {"q12",
- nullptr,
- 16,
- FPU_OFFSET(48),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q12, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q12},
- g_q12_contains,
- nullptr,
- nullptr,
- 0},
- {"q13",
- nullptr,
- 16,
- FPU_OFFSET(52),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q13, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q13},
- g_q13_contains,
- nullptr,
- nullptr,
- 0},
- {"q14",
- nullptr,
- 16,
- FPU_OFFSET(56),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q14, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q14},
- g_q14_contains,
- nullptr,
- nullptr,
- 0},
- {"q15",
- nullptr,
- 16,
- FPU_OFFSET(60),
- eEncodingVector,
- eFormatVectorOfUInt8,
- {LLDB_INVALID_REGNUM, dwarf_q15, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- fpu_q15},
- g_q15_contains,
- nullptr,
- nullptr,
- 0},
-
- {"exception",
- nullptr,
- 4,
- EXC_OFFSET(0),
- eEncodingUint,
- eFormatHex,
- {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM, exc_exception},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"fsr",
- nullptr,
- 4,
- EXC_OFFSET(1),
- eEncodingUint,
- eFormatHex,
- {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM, exc_fsr},
- nullptr,
- nullptr,
- nullptr,
- 0},
- {"far",
- nullptr,
- 4,
- EXC_OFFSET(2),
- eEncodingUint,
- eFormatHex,
- {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
- LLDB_INVALID_REGNUM, exc_far},
- nullptr,
- nullptr,
- nullptr,
- 0},
+ {
+ "r0",
+ nullptr,
+ 4,
+ GPR_OFFSET(0),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r0, dwarf_r0, LLDB_REGNUM_GENERIC_ARG1, LLDB_INVALID_REGNUM,
+ gpr_r0},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r1",
+ nullptr,
+ 4,
+ GPR_OFFSET(1),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r1, dwarf_r1, LLDB_REGNUM_GENERIC_ARG2, LLDB_INVALID_REGNUM,
+ gpr_r1},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r2",
+ nullptr,
+ 4,
+ GPR_OFFSET(2),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r2, dwarf_r2, LLDB_REGNUM_GENERIC_ARG3, LLDB_INVALID_REGNUM,
+ gpr_r2},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r3",
+ nullptr,
+ 4,
+ GPR_OFFSET(3),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r3, dwarf_r3, LLDB_REGNUM_GENERIC_ARG4, LLDB_INVALID_REGNUM,
+ gpr_r3},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r4",
+ nullptr,
+ 4,
+ GPR_OFFSET(4),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r4, dwarf_r4, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r4},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r5",
+ nullptr,
+ 4,
+ GPR_OFFSET(5),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r5, dwarf_r5, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r5},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r6",
+ nullptr,
+ 4,
+ GPR_OFFSET(6),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r6, dwarf_r6, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r6},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r7",
+ nullptr,
+ 4,
+ GPR_OFFSET(7),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r7, dwarf_r7, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r7},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r8",
+ nullptr,
+ 4,
+ GPR_OFFSET(8),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r8, dwarf_r8, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r8},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r9",
+ nullptr,
+ 4,
+ GPR_OFFSET(9),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r9, dwarf_r9, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r9},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r10",
+ nullptr,
+ 4,
+ GPR_OFFSET(10),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r10, dwarf_r10, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r10},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r11",
+ nullptr,
+ 4,
+ GPR_OFFSET(11),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r11, dwarf_r11, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM,
+ gpr_r11},
+ nullptr,
+ nullptr,
+ },
+ {
+ "r12",
+ nullptr,
+ 4,
+ GPR_OFFSET(12),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_r12, dwarf_r12, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ gpr_r12},
+ nullptr,
+ nullptr,
+ },
+ {
+ "sp",
+ "r13",
+ 4,
+ GPR_OFFSET(13),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_sp, dwarf_sp, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM,
+ gpr_sp},
+ nullptr,
+ nullptr,
+ },
+ {
+ "lr",
+ "r14",
+ 4,
+ GPR_OFFSET(14),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA, LLDB_INVALID_REGNUM,
+ gpr_lr},
+ nullptr,
+ nullptr,
+ },
+ {
+ "pc",
+ "r15",
+ 4,
+ GPR_OFFSET(15),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM,
+ gpr_pc},
+ nullptr,
+ nullptr,
+ },
+ {
+ "cpsr",
+ "psr",
+ 4,
+ GPR_OFFSET(16),
+ eEncodingUint,
+ eFormatHex,
+ {ehframe_cpsr, dwarf_cpsr, LLDB_REGNUM_GENERIC_FLAGS,
+ LLDB_INVALID_REGNUM, gpr_cpsr},
+ nullptr,
+ nullptr,
+ },
+
+ FPU_REG(s0, 4, 0, q0),
+ FPU_REG(s1, 4, 1, q0),
+ FPU_REG(s2, 4, 2, q0),
+ FPU_REG(s3, 4, 3, q0),
+ FPU_REG(s4, 4, 4, q1),
+ FPU_REG(s5, 4, 5, q1),
+ FPU_REG(s6, 4, 6, q1),
+ FPU_REG(s7, 4, 7, q1),
+ FPU_REG(s8, 4, 8, q2),
+ FPU_REG(s9, 4, 9, q2),
+ FPU_REG(s10, 4, 10, q2),
+ FPU_REG(s11, 4, 11, q2),
+ FPU_REG(s12, 4, 12, q3),
+ FPU_REG(s13, 4, 13, q3),
+ FPU_REG(s14, 4, 14, q3),
+ FPU_REG(s15, 4, 15, q3),
+ FPU_REG(s16, 4, 16, q4),
+ FPU_REG(s17, 4, 17, q4),
+ FPU_REG(s18, 4, 18, q4),
+ FPU_REG(s19, 4, 19, q4),
+ FPU_REG(s20, 4, 20, q5),
+ FPU_REG(s21, 4, 21, q5),
+ FPU_REG(s22, 4, 22, q5),
+ FPU_REG(s23, 4, 23, q5),
+ FPU_REG(s24, 4, 24, q6),
+ FPU_REG(s25, 4, 25, q6),
+ FPU_REG(s26, 4, 26, q6),
+ FPU_REG(s27, 4, 27, q6),
+ FPU_REG(s28, 4, 28, q7),
+ FPU_REG(s29, 4, 29, q7),
+ FPU_REG(s30, 4, 30, q7),
+ FPU_REG(s31, 4, 31, q7),
+
+ {
+ "fpscr",
+ nullptr,
+ 4,
+ FPSCR_OFFSET,
+ eEncodingUint,
+ eFormatHex,
+ {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM, fpu_fpscr},
+ nullptr,
+ nullptr,
+ },
+
+ FPU_REG(d0, 8, 0, q0),
+ FPU_REG(d1, 8, 2, q0),
+ FPU_REG(d2, 8, 4, q1),
+ FPU_REG(d3, 8, 6, q1),
+ FPU_REG(d4, 8, 8, q2),
+ FPU_REG(d5, 8, 10, q2),
+ FPU_REG(d6, 8, 12, q3),
+ FPU_REG(d7, 8, 14, q3),
+ FPU_REG(d8, 8, 16, q4),
+ FPU_REG(d9, 8, 18, q4),
+ FPU_REG(d10, 8, 20, q5),
+ FPU_REG(d11, 8, 22, q5),
+ FPU_REG(d12, 8, 24, q6),
+ FPU_REG(d13, 8, 26, q6),
+ FPU_REG(d14, 8, 28, q7),
+ FPU_REG(d15, 8, 30, q7),
+ FPU_REG(d16, 8, 32, q8),
+ FPU_REG(d17, 8, 34, q8),
+ FPU_REG(d18, 8, 36, q9),
+ FPU_REG(d19, 8, 38, q9),
+ FPU_REG(d20, 8, 40, q10),
+ FPU_REG(d21, 8, 42, q10),
+ FPU_REG(d22, 8, 44, q11),
+ FPU_REG(d23, 8, 46, q11),
+ FPU_REG(d24, 8, 48, q12),
+ FPU_REG(d25, 8, 50, q12),
+ FPU_REG(d26, 8, 52, q13),
+ FPU_REG(d27, 8, 54, q13),
+ FPU_REG(d28, 8, 56, q14),
+ FPU_REG(d29, 8, 58, q14),
+ FPU_REG(d30, 8, 60, q15),
+ FPU_REG(d31, 8, 62, q15),
+
+ FPU_QREG(q0, 0),
+ FPU_QREG(q1, 4),
+ FPU_QREG(q2, 8),
+ FPU_QREG(q3, 12),
+ FPU_QREG(q4, 16),
+ FPU_QREG(q5, 20),
+ FPU_QREG(q6, 24),
+ FPU_QREG(q7, 28),
+ FPU_QREG(q8, 32),
+ FPU_QREG(q9, 36),
+ FPU_QREG(q10, 40),
+ FPU_QREG(q11, 44),
+ FPU_QREG(q12, 48),
+ FPU_QREG(q13, 52),
+ FPU_QREG(q14, 56),
+ FPU_QREG(q15, 60),
+
+ {
+ "exception",
+ nullptr,
+ 4,
+ EXC_OFFSET(0),
+ eEncodingUint,
+ eFormatHex,
+ {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM, exc_exception},
+ nullptr,
+ nullptr,
+ },
+ {
+ "fsr",
+ nullptr,
+ 4,
+ EXC_OFFSET(1),
+ eEncodingUint,
+ eFormatHex,
+ {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM, exc_fsr},
+ nullptr,
+ nullptr,
+ },
+ {
+ "far",
+ nullptr,
+ 4,
+ EXC_OFFSET(2),
+ eEncodingUint,
+ eFormatHex,
+ {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
+ LLDB_INVALID_REGNUM, exc_far},
+ nullptr,
+ nullptr,
+ },
{DEFINE_DBG(bvr, 0)},
{DEFINE_DBG(bvr, 1)},
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64.h
index 47cedc31bcd7..ccfbd6afbefb 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64.h
@@ -489,7 +489,6 @@ static uint32_t g_d31_invalidates[] = {fpu_v31, fpu_s31, LLDB_INVALID_REGNUM};
{ \
#reg, nullptr, 8, GPR_OFFSET(gpr_##reg), lldb::eEncodingUint, \
lldb::eFormatHex, GPR64_KIND(reg, generic_kind), nullptr, nullptr, \
- nullptr, 0 \
}
// Defines a 64-bit general purpose register
@@ -497,7 +496,6 @@ static uint32_t g_d31_invalidates[] = {fpu_v31, fpu_s31, LLDB_INVALID_REGNUM};
{ \
#reg, #alt, 8, GPR_OFFSET(gpr_##reg), lldb::eEncodingUint, \
lldb::eFormatHex, GPR64_KIND(reg, generic_kind), nullptr, nullptr, \
- nullptr, 0 \
}
// Defines a 32-bit general purpose pseudo register
@@ -506,15 +504,14 @@ static uint32_t g_d31_invalidates[] = {fpu_v31, fpu_s31, LLDB_INVALID_REGNUM};
#wreg, nullptr, 4, \
GPR_OFFSET(gpr_##xreg) + GPR_W_PSEUDO_REG_ENDIAN_OFFSET, \
lldb::eEncodingUint, lldb::eFormatHex, LLDB_KIND(gpr_##wreg), \
- g_contained_##xreg, g_##wreg##_invalidates, nullptr, 0 \
+ g_contained_##xreg, g_##wreg##_invalidates, \
}
// Defines a vector register with 16-byte size
#define DEFINE_VREG(reg) \
{ \
#reg, nullptr, 16, FPU_OFFSET(fpu_##reg - fpu_v0), lldb::eEncodingVector, \
- lldb::eFormatVectorOfUInt8, VREG_KIND(reg), nullptr, nullptr, nullptr, \
- 0 \
+ lldb::eFormatVectorOfUInt8, VREG_KIND(reg), nullptr, nullptr, \
}
// Defines S and D pseudo registers mapping over corresponding vector register
@@ -522,7 +519,7 @@ static uint32_t g_d31_invalidates[] = {fpu_v31, fpu_s31, LLDB_INVALID_REGNUM};
{ \
#reg, nullptr, size, FPU_OFFSET(fpu_##vreg - fpu_v0) + offset, \
lldb::eEncodingIEEE754, lldb::eFormatFloat, LLDB_KIND(fpu_##reg), \
- g_contained_##vreg, g_##reg##_invalidates, nullptr, 0 \
+ g_contained_##vreg, g_##reg##_invalidates, \
}
// Defines miscellaneous status and control registers like cpsr, fpsr etc
@@ -530,14 +527,13 @@ static uint32_t g_d31_invalidates[] = {fpu_v31, fpu_s31, LLDB_INVALID_REGNUM};
{ \
#reg, nullptr, size, TYPE##_OFFSET_NAME(reg), lldb::eEncodingUint, \
lldb::eFormatHex, MISC_##TYPE##_KIND(lldb_kind), nullptr, nullptr, \
- nullptr, 0 \
}
// Defines pointer authentication mask registers
#define DEFINE_EXTENSION_REG(reg) \
{ \
#reg, nullptr, 8, 0, lldb::eEncodingUint, lldb::eFormatHex, \
- KIND_ALL_INVALID, nullptr, nullptr, nullptr, 0 \
+ KIND_ALL_INVALID, nullptr, nullptr, \
}
static lldb_private::RegisterInfo g_register_infos_arm64_le[] = {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64_sve.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64_sve.h
index 9551db7e8ebf..b2837b8f1e98 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64_sve.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_arm64_sve.h
@@ -309,7 +309,6 @@ static uint32_t g_contained_z31[] = {sve_z31, LLDB_INVALID_REGNUM};
{ \
#vreg, nullptr, 16, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8, \
VREG_KIND(vreg), g_contained_##zreg, g_sve_##vreg##_invalidates, \
- nullptr, 0 \
}
// Defines S and D pseudo registers mapping over corresponding vector register
@@ -317,21 +316,20 @@ static uint32_t g_contained_z31[] = {sve_z31, LLDB_INVALID_REGNUM};
{ \
#reg, nullptr, size, 0, lldb::eEncodingIEEE754, lldb::eFormatFloat, \
LLDB_KIND(fpu_##reg), g_contained_##zreg, g_sve_##reg##_invalidates, \
- nullptr, 0 \
}
// Defines a Z vector register with 16-byte default size
#define DEFINE_ZREG(reg) \
{ \
#reg, nullptr, 16, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8, \
- SVE_REG_KIND(reg), nullptr, nullptr, nullptr, 0 \
+ SVE_REG_KIND(reg), nullptr, nullptr, \
}
// Defines a P vector register with 2-byte default size
#define DEFINE_PREG(reg) \
{ \
#reg, nullptr, 2, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8, \
- SVE_REG_KIND(reg), nullptr, nullptr, nullptr, 0 \
+ SVE_REG_KIND(reg), nullptr, nullptr, \
}
static lldb_private::RegisterInfo g_register_infos_arm64_sve_le[] = {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_i386.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_i386.h
index 15c7cac544a1..4b73008adb16 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_i386.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_i386.h
@@ -64,7 +64,7 @@
GPR_OFFSET(reg), eEncodingUint, eFormatHex, \
{kind1, kind2, kind3, kind4, \
lldb_##reg##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_FPR(name, reg, kind1, kind2, kind3, kind4) \
@@ -72,7 +72,7 @@
#name, nullptr, FPR_SIZE(reg), FPR_OFFSET(reg), eEncodingUint, eFormatHex, \
{kind1, kind2, kind3, kind4, \
lldb_##name##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
// RegisterKind: EHFrame, DWARF, Generic, Process Plugin, LLDB
@@ -84,7 +84,7 @@
stmm[i]), eEncodingVector, eFormatVectorOfUInt8, \
{ehframe_st##i##_i386, dwarf_st##i##_i386, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_st##i##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_FP_MM(reg, i, streg) \
@@ -94,7 +94,7 @@
{dwarf_mm##i##_i386, dwarf_mm##i##_i386, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_mm##i##_i386 }, \
RegisterContextPOSIX_x86::g_contained_##streg##_32, \
- RegisterContextPOSIX_x86::g_invalidate_##streg##_32, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##streg##_32, \
}
#define DEFINE_XMM(reg, i) \
@@ -104,7 +104,7 @@
reg[i]), eEncodingVector, eFormatVectorOfUInt8, \
{ehframe_##reg##i##_i386, dwarf_##reg##i##_i386, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, lldb_##reg##i##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
// I believe the YMM registers use dwarf_xmm_%_i386 register numbers and then
@@ -116,7 +116,7 @@
{LLDB_INVALID_REGNUM, dwarf_xmm##i##_i386, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg##i##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_BNDR(reg, i) \
@@ -125,7 +125,7 @@
LLVM_EXTENSION BNDR_OFFSET(i), eEncodingVector, eFormatVectorOfUInt64, \
{dwarf_##reg##i##_i386, dwarf_##reg##i##_i386, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_##reg##i##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_BNDC(name, i) \
@@ -135,7 +135,7 @@
eFormatVectorOfUInt8, \
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_##name##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_DR(reg, i) \
@@ -145,7 +145,7 @@
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg##i##_i386 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_GPR_PSEUDO_16(reg16, reg32) \
@@ -156,7 +156,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg16##_i386 }, \
RegisterContextPOSIX_x86::g_contained_##reg32, \
- RegisterContextPOSIX_x86::g_invalidate_##reg32, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg32, \
}
#define DEFINE_GPR_PSEUDO_8H(reg8, reg32) \
@@ -167,7 +167,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg8##_i386 }, \
RegisterContextPOSIX_x86::g_contained_##reg32, \
- RegisterContextPOSIX_x86::g_invalidate_##reg32, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg32, \
}
#define DEFINE_GPR_PSEUDO_8L(reg8, reg32) \
@@ -178,7 +178,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg8##_i386 }, \
RegisterContextPOSIX_x86::g_contained_##reg32, \
- RegisterContextPOSIX_x86::g_invalidate_##reg32, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg32, \
}
static RegisterInfo g_register_infos_i386[] = {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_mips64.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_mips64.h
index b28b91810e48..60811d65ffc5 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_mips64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_mips64.h
@@ -11,12 +11,16 @@
#include "lldb/Core/dwarf.h"
#include "llvm/Support/Compiler.h"
-
#ifdef DECLARE_REGISTER_INFOS_MIPS64_STRUCT
// Computes the offset of the given GPR in the user data area.
#define GPR_OFFSET(regname) (LLVM_EXTENSION offsetof(GPR_freebsd_mips, regname))
+// Computes the offset of the given FPR in the extended data area.
+#define FPR_OFFSET(regname) \
+ (sizeof(GPR_freebsd_mips) + \
+ LLVM_EXTENSION offsetof(FPR_freebsd_mips, regname))
+
// RegisterKind: EHFrame, DWARF, Generic, Process Plugin, LLDB
// Note that the size and offset will be updated by platform-specific classes.
@@ -26,9 +30,28 @@
GPR_OFFSET(reg), eEncodingUint, eFormatHex, \
{kind1, kind2, kind3, kind4, \
gpr_##reg##_mips64 }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL \
+ }
+
+#define DEFINE_FPR(reg, alt, kind1, kind2, kind3) \
+ { \
+ #reg, alt, sizeof(((FPR_freebsd_mips *) 0)->reg), \
+ FPR_OFFSET(reg), eEncodingIEEE754, eFormatFloat, \
+ {kind1, kind2, kind3, LLDB_INVALID_REGNUM, \
+ fpr_##reg##_mips64 }, \
+ NULL, NULL, \
}
+#define DEFINE_FPR_INFO(reg, alt, kind1, kind2, kind3) \
+ { \
+ #reg, alt, sizeof(((FPR_freebsd_mips *) 0)->reg), \
+ FPR_OFFSET(reg), eEncodingUint, eFormatHex, \
+ {kind1, kind2, kind3, LLDB_INVALID_REGNUM, \
+ fpr_##reg##_mips64 }, \
+ NULL, NULL, \
+ }
+
+
static RegisterInfo g_register_infos_mips64[] = {
// General purpose registers. EH_Frame, DWARF,
// Generic, Process Plugin
@@ -112,6 +135,75 @@ static RegisterInfo g_register_infos_mips64[] = {
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(dummy, nullptr, dwarf_dummy_mips64, dwarf_dummy_mips64,
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
+
+ DEFINE_FPR(f0, nullptr, dwarf_f0_mips64, dwarf_f0_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f1, nullptr, dwarf_f1_mips64, dwarf_f1_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f2, nullptr, dwarf_f2_mips64, dwarf_f2_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f3, nullptr, dwarf_f3_mips64, dwarf_f3_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f4, nullptr, dwarf_f4_mips64, dwarf_f4_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f5, nullptr, dwarf_f5_mips64, dwarf_f5_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f6, nullptr, dwarf_f6_mips64, dwarf_f6_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f7, nullptr, dwarf_f7_mips64, dwarf_f7_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f8, nullptr, dwarf_f8_mips64, dwarf_f8_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f9, nullptr, dwarf_f9_mips64, dwarf_f9_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f10, nullptr, dwarf_f10_mips64, dwarf_f10_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f11, nullptr, dwarf_f11_mips64, dwarf_f11_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f12, nullptr, dwarf_f12_mips64, dwarf_f12_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f13, nullptr, dwarf_f13_mips64, dwarf_f13_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f14, nullptr, dwarf_f14_mips64, dwarf_f14_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f15, nullptr, dwarf_f15_mips64, dwarf_f15_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f16, nullptr, dwarf_f16_mips64, dwarf_f16_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f17, nullptr, dwarf_f17_mips64, dwarf_f17_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f18, nullptr, dwarf_f18_mips64, dwarf_f18_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f19, nullptr, dwarf_f19_mips64, dwarf_f19_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f20, nullptr, dwarf_f20_mips64, dwarf_f20_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f21, nullptr, dwarf_f21_mips64, dwarf_f21_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f22, nullptr, dwarf_f22_mips64, dwarf_f22_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f23, nullptr, dwarf_f23_mips64, dwarf_f23_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f24, nullptr, dwarf_f24_mips64, dwarf_f24_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f25, nullptr, dwarf_f25_mips64, dwarf_f25_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f26, nullptr, dwarf_f26_mips64, dwarf_f26_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f27, nullptr, dwarf_f27_mips64, dwarf_f27_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f28, nullptr, dwarf_f28_mips64, dwarf_f28_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f29, nullptr, dwarf_f29_mips64, dwarf_f29_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f30, nullptr, dwarf_f30_mips64, dwarf_f30_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR(f31, nullptr, dwarf_f31_mips64, dwarf_f31_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR_INFO(fcsr, nullptr, dwarf_fcsr_mips64, dwarf_fcsr_mips64,
+ LLDB_INVALID_REGNUM),
+ DEFINE_FPR_INFO(fir, nullptr, dwarf_fir_mips64, dwarf_fir_mips64,
+ LLDB_INVALID_REGNUM),
};
static_assert((sizeof(g_register_infos_mips64) /
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_powerpc.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_powerpc.h
index 04b4171b6722..0fd0a526f921 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_powerpc.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_powerpc.h
@@ -24,7 +24,7 @@
dwarf_##reg##_powerpc, lldb_kind, \
LLDB_INVALID_REGNUM, \
gpr_##reg##_powerpc }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_FPR(reg, lldb_kind) \
{ \
@@ -32,7 +32,7 @@
{dwarf_##reg##_powerpc, dwarf_##reg##_powerpc, \
lldb_kind, LLDB_INVALID_REGNUM, \
fpr_##reg##_powerpc }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_VMX(reg, lldb_kind) \
{ \
@@ -40,23 +40,23 @@
{dwarf_##reg##_powerpc, dwarf_##reg##_powerpc, \
lldb_kind, LLDB_INVALID_REGNUM, \
vmx_##reg##_powerpc }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
// General purpose registers. EH_Frame, DWARF,
// Generic, Process Plugin
#define POWERPC_REGS \
DEFINE_GPR(r0, NULL, LLDB_INVALID_REGNUM) \
- , DEFINE_GPR(r1, "sp", LLDB_REGNUM_GENERIC_SP), \
+ , DEFINE_GPR(r1, NULL, LLDB_REGNUM_GENERIC_SP), \
DEFINE_GPR(r2, NULL, LLDB_INVALID_REGNUM), \
- DEFINE_GPR(r3, "arg1", LLDB_REGNUM_GENERIC_ARG1), \
- DEFINE_GPR(r4, "arg2", LLDB_REGNUM_GENERIC_ARG2), \
- DEFINE_GPR(r5, "arg3", LLDB_REGNUM_GENERIC_ARG3), \
- DEFINE_GPR(r6, "arg4", LLDB_REGNUM_GENERIC_ARG4), \
- DEFINE_GPR(r7, "arg5", LLDB_REGNUM_GENERIC_ARG5), \
- DEFINE_GPR(r8, "arg6", LLDB_REGNUM_GENERIC_ARG6), \
- DEFINE_GPR(r9, "arg7", LLDB_REGNUM_GENERIC_ARG7), \
- DEFINE_GPR(r10, "arg8", LLDB_REGNUM_GENERIC_ARG8), \
+ DEFINE_GPR(r3, NULL, LLDB_REGNUM_GENERIC_ARG1), \
+ DEFINE_GPR(r4, NULL, LLDB_REGNUM_GENERIC_ARG2), \
+ DEFINE_GPR(r5, NULL, LLDB_REGNUM_GENERIC_ARG3), \
+ DEFINE_GPR(r6, NULL, LLDB_REGNUM_GENERIC_ARG4), \
+ DEFINE_GPR(r7, NULL, LLDB_REGNUM_GENERIC_ARG5), \
+ DEFINE_GPR(r8, NULL, LLDB_REGNUM_GENERIC_ARG6), \
+ DEFINE_GPR(r9, NULL, LLDB_REGNUM_GENERIC_ARG7), \
+ DEFINE_GPR(r10, NULL, LLDB_REGNUM_GENERIC_ARG8), \
DEFINE_GPR(r11, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r12, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r13, NULL, LLDB_INVALID_REGNUM), \
@@ -78,11 +78,11 @@
DEFINE_GPR(r29, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r30, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r31, NULL, LLDB_INVALID_REGNUM), \
- DEFINE_GPR(lr, "lr", LLDB_REGNUM_GENERIC_RA), \
- DEFINE_GPR(cr, "cr", LLDB_REGNUM_GENERIC_FLAGS), \
- DEFINE_GPR(xer, "xer", LLDB_INVALID_REGNUM), \
- DEFINE_GPR(ctr, "ctr", LLDB_INVALID_REGNUM), \
- DEFINE_GPR(pc, "pc", LLDB_REGNUM_GENERIC_PC), \
+ DEFINE_GPR(lr, NULL, LLDB_REGNUM_GENERIC_RA), \
+ DEFINE_GPR(cr, NULL, LLDB_REGNUM_GENERIC_FLAGS), \
+ DEFINE_GPR(xer, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(ctr, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(pc, NULL, LLDB_REGNUM_GENERIC_PC), \
DEFINE_FPR(f0, LLDB_INVALID_REGNUM), \
DEFINE_FPR(f1, LLDB_INVALID_REGNUM), \
DEFINE_FPR(f2, LLDB_INVALID_REGNUM), \
@@ -125,8 +125,7 @@
LLDB_INVALID_REGNUM, fpr_fpscr_powerpc}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
DEFINE_VMX(v0, LLDB_INVALID_REGNUM), \
DEFINE_VMX(v1, LLDB_INVALID_REGNUM), \
DEFINE_VMX(v2, LLDB_INVALID_REGNUM), \
@@ -169,8 +168,7 @@
LLDB_INVALID_REGNUM, vmx_vrsave_powerpc}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
{"vscr", \
NULL, \
4, \
@@ -181,8 +179,7 @@
LLDB_INVALID_REGNUM, vmx_vscr_powerpc}, \
NULL, \
NULL, \
- NULL, \
- 0},
+ },
static RegisterInfo g_register_infos_powerpc64[] = {
#define GPR GPR64
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64.h
index 059dba45f9bb..19f2e5627703 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64.h
@@ -31,7 +31,7 @@
lldb_kind, \
LLDB_INVALID_REGNUM, \
gpr_##reg##_ppc64 }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_FPR_PPC64(reg, alt, lldb_kind) \
{ \
@@ -40,7 +40,7 @@
{ppc64_dwarf::dwarf_##reg##_ppc64, \
ppc64_dwarf::dwarf_##reg##_ppc64, lldb_kind, LLDB_INVALID_REGNUM, \
fpr_##reg##_ppc64 }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_VMX_PPC64(reg, lldb_kind) \
{ \
@@ -49,23 +49,23 @@
{ppc64_dwarf::dwarf_##reg##_ppc64, \
ppc64_dwarf::dwarf_##reg##_ppc64, lldb_kind, LLDB_INVALID_REGNUM, \
vmx_##reg##_ppc64 }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
// General purpose registers.
// EH_Frame, Generic, Process Plugin
#define PPC64_REGS \
DEFINE_GPR_PPC64(r0, NULL, LLDB_INVALID_REGNUM) \
- , DEFINE_GPR_PPC64(r1, "sp", LLDB_REGNUM_GENERIC_SP), \
+ , DEFINE_GPR_PPC64(r1, NULL, LLDB_REGNUM_GENERIC_SP), \
DEFINE_GPR_PPC64(r2, NULL, LLDB_INVALID_REGNUM), \
- DEFINE_GPR_PPC64(r3, "arg1", LLDB_REGNUM_GENERIC_ARG1), \
- DEFINE_GPR_PPC64(r4, "arg2", LLDB_REGNUM_GENERIC_ARG2), \
- DEFINE_GPR_PPC64(r5, "arg3", LLDB_REGNUM_GENERIC_ARG3), \
- DEFINE_GPR_PPC64(r6, "arg4", LLDB_REGNUM_GENERIC_ARG4), \
- DEFINE_GPR_PPC64(r7, "arg5", LLDB_REGNUM_GENERIC_ARG5), \
- DEFINE_GPR_PPC64(r8, "arg6", LLDB_REGNUM_GENERIC_ARG6), \
- DEFINE_GPR_PPC64(r9, "arg7", LLDB_REGNUM_GENERIC_ARG7), \
- DEFINE_GPR_PPC64(r10, "arg8", LLDB_REGNUM_GENERIC_ARG8), \
+ DEFINE_GPR_PPC64(r3, NULL, LLDB_REGNUM_GENERIC_ARG1), \
+ DEFINE_GPR_PPC64(r4, NULL, LLDB_REGNUM_GENERIC_ARG2), \
+ DEFINE_GPR_PPC64(r5, NULL, LLDB_REGNUM_GENERIC_ARG3), \
+ DEFINE_GPR_PPC64(r6, NULL, LLDB_REGNUM_GENERIC_ARG4), \
+ DEFINE_GPR_PPC64(r7, NULL, LLDB_REGNUM_GENERIC_ARG5), \
+ DEFINE_GPR_PPC64(r8, NULL, LLDB_REGNUM_GENERIC_ARG6), \
+ DEFINE_GPR_PPC64(r9, NULL, LLDB_REGNUM_GENERIC_ARG7), \
+ DEFINE_GPR_PPC64(r10, NULL, LLDB_REGNUM_GENERIC_ARG8), \
DEFINE_GPR_PPC64(r11, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR_PPC64(r12, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR_PPC64(r13, NULL, LLDB_INVALID_REGNUM), \
@@ -87,12 +87,12 @@
DEFINE_GPR_PPC64(r29, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR_PPC64(r30, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR_PPC64(r31, NULL, LLDB_INVALID_REGNUM), \
- DEFINE_GPR_PPC64(cr, "cr", LLDB_REGNUM_GENERIC_FLAGS), \
- DEFINE_GPR_PPC64(msr, "msr", LLDB_INVALID_REGNUM), \
- DEFINE_GPR_PPC64(xer, "xer", LLDB_INVALID_REGNUM), \
- DEFINE_GPR_PPC64(lr, "lr", LLDB_REGNUM_GENERIC_RA), \
- DEFINE_GPR_PPC64(ctr, "ctr", LLDB_INVALID_REGNUM), \
- DEFINE_GPR_PPC64(pc, "pc", LLDB_REGNUM_GENERIC_PC), \
+ DEFINE_GPR_PPC64(cr, NULL, LLDB_REGNUM_GENERIC_FLAGS), \
+ DEFINE_GPR_PPC64(msr, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR_PPC64(xer, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR_PPC64(lr, NULL, LLDB_REGNUM_GENERIC_RA), \
+ DEFINE_GPR_PPC64(ctr, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR_PPC64(pc, NULL, LLDB_REGNUM_GENERIC_PC), \
DEFINE_FPR_PPC64(f0, NULL, LLDB_INVALID_REGNUM), \
DEFINE_FPR_PPC64(f1, NULL, LLDB_INVALID_REGNUM), \
DEFINE_FPR_PPC64(f2, NULL, LLDB_INVALID_REGNUM), \
@@ -136,8 +136,7 @@
LLDB_INVALID_REGNUM, fpr_fpscr_ppc64}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
DEFINE_VMX_PPC64(vr0, LLDB_INVALID_REGNUM), \
DEFINE_VMX_PPC64(vr1, LLDB_INVALID_REGNUM), \
DEFINE_VMX_PPC64(vr2, LLDB_INVALID_REGNUM), \
@@ -180,8 +179,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, vmx_vscr_ppc64}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
{"vrsave", \
NULL, \
4, \
@@ -193,8 +191,7 @@
LLDB_INVALID_REGNUM, vmx_vrsave_ppc64}, \
NULL, \
NULL, \
- NULL, \
- 0}, /* */
+ }, /* */
typedef struct _GPR_PPC64 {
uint64_t r0;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64le.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64le.h
index 9937da2f3050..f8f8651c856c 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64le.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_ppc64le.h
@@ -31,7 +31,7 @@
lldb_kind, \
LLDB_INVALID_REGNUM, \
gpr_##reg##_ppc64le }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_FPR(reg, alt, lldb_kind) \
{ \
@@ -39,7 +39,7 @@
{ppc64le_dwarf::dwarf_##reg##_ppc64le, \
ppc64le_dwarf::dwarf_##reg##_ppc64le, lldb_kind, LLDB_INVALID_REGNUM, \
fpr_##reg##_ppc64le }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_VMX(reg, lldb_kind) \
{ \
@@ -48,7 +48,7 @@
{ppc64le_dwarf::dwarf_##reg##_ppc64le, \
ppc64le_dwarf::dwarf_##reg##_ppc64le, lldb_kind, LLDB_INVALID_REGNUM, \
vmx_##reg##_ppc64le }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_VSX(reg, lldb_kind) \
{ \
@@ -57,23 +57,23 @@
{ppc64le_dwarf::dwarf_##reg##_ppc64le, \
ppc64le_dwarf::dwarf_##reg##_ppc64le, lldb_kind, LLDB_INVALID_REGNUM, \
vsx_##reg##_ppc64le }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
// General purpose registers.
// EH_Frame, Generic, Process Plugin
#define POWERPC_REGS \
DEFINE_GPR(r0, NULL, LLDB_INVALID_REGNUM) \
- , DEFINE_GPR(r1, "sp", LLDB_REGNUM_GENERIC_SP), \
+ , DEFINE_GPR(r1, NULL, LLDB_REGNUM_GENERIC_SP), \
DEFINE_GPR(r2, NULL, LLDB_INVALID_REGNUM), \
- DEFINE_GPR(r3, "arg1", LLDB_REGNUM_GENERIC_ARG1), \
- DEFINE_GPR(r4, "arg2", LLDB_REGNUM_GENERIC_ARG2), \
- DEFINE_GPR(r5, "arg3", LLDB_REGNUM_GENERIC_ARG3), \
- DEFINE_GPR(r6, "arg4", LLDB_REGNUM_GENERIC_ARG4), \
- DEFINE_GPR(r7, "arg5", LLDB_REGNUM_GENERIC_ARG5), \
- DEFINE_GPR(r8, "arg6", LLDB_REGNUM_GENERIC_ARG6), \
- DEFINE_GPR(r9, "arg7", LLDB_REGNUM_GENERIC_ARG7), \
- DEFINE_GPR(r10, "arg8", LLDB_REGNUM_GENERIC_ARG8), \
+ DEFINE_GPR(r3, NULL, LLDB_REGNUM_GENERIC_ARG1), \
+ DEFINE_GPR(r4, NULL, LLDB_REGNUM_GENERIC_ARG2), \
+ DEFINE_GPR(r5, NULL, LLDB_REGNUM_GENERIC_ARG3), \
+ DEFINE_GPR(r6, NULL, LLDB_REGNUM_GENERIC_ARG4), \
+ DEFINE_GPR(r7, NULL, LLDB_REGNUM_GENERIC_ARG5), \
+ DEFINE_GPR(r8, NULL, LLDB_REGNUM_GENERIC_ARG6), \
+ DEFINE_GPR(r9, NULL, LLDB_REGNUM_GENERIC_ARG7), \
+ DEFINE_GPR(r10, NULL, LLDB_REGNUM_GENERIC_ARG8), \
DEFINE_GPR(r11, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r12, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r13, NULL, LLDB_INVALID_REGNUM), \
@@ -95,15 +95,15 @@
DEFINE_GPR(r29, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r30, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(r31, NULL, LLDB_INVALID_REGNUM), \
- DEFINE_GPR(pc, "pc", LLDB_REGNUM_GENERIC_PC), \
- DEFINE_GPR(msr, "msr", LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(pc, NULL, LLDB_REGNUM_GENERIC_PC), \
+ DEFINE_GPR(msr, NULL, LLDB_INVALID_REGNUM), \
DEFINE_GPR(origr3, "orig_r3", LLDB_INVALID_REGNUM), \
- DEFINE_GPR(ctr, "ctr", LLDB_INVALID_REGNUM), \
- DEFINE_GPR(lr, "lr", LLDB_REGNUM_GENERIC_RA), \
- DEFINE_GPR(xer, "xer", LLDB_INVALID_REGNUM), \
- DEFINE_GPR(cr, "cr", LLDB_REGNUM_GENERIC_FLAGS), \
- DEFINE_GPR(softe, "softe", LLDB_INVALID_REGNUM), \
- DEFINE_GPR(trap, "trap", LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(ctr, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(lr, NULL, LLDB_REGNUM_GENERIC_RA), \
+ DEFINE_GPR(xer, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(cr, NULL, LLDB_REGNUM_GENERIC_FLAGS), \
+ DEFINE_GPR(softe, NULL, LLDB_INVALID_REGNUM), \
+ DEFINE_GPR(trap, NULL, LLDB_INVALID_REGNUM), \
DEFINE_FPR(f0, NULL, LLDB_INVALID_REGNUM), \
DEFINE_FPR(f1, NULL, LLDB_INVALID_REGNUM), \
DEFINE_FPR(f2, NULL, LLDB_INVALID_REGNUM), \
@@ -147,8 +147,7 @@
LLDB_INVALID_REGNUM, fpr_fpscr_ppc64le}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
DEFINE_VMX(vr0, LLDB_INVALID_REGNUM), \
DEFINE_VMX(vr1, LLDB_INVALID_REGNUM), \
DEFINE_VMX(vr2, LLDB_INVALID_REGNUM), \
@@ -191,8 +190,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, vmx_vscr_ppc64le}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
{"vrsave", \
NULL, \
4, \
@@ -204,8 +202,7 @@
LLDB_INVALID_REGNUM, vmx_vrsave_ppc64le}, \
NULL, \
NULL, \
- NULL, \
- 0}, \
+ }, \
DEFINE_VSX(vs0, LLDB_INVALID_REGNUM), \
DEFINE_VSX(vs1, LLDB_INVALID_REGNUM), \
DEFINE_VSX(vs2, LLDB_INVALID_REGNUM), \
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_s390x.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_s390x.h
index d1df7c606207..65878b04eed8 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_s390x.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_s390x.h
@@ -27,7 +27,7 @@
#name, alt, size, offset, eEncodingUint, eFormatHex, \
{dwarf_##name##_s390x, dwarf_##name##_s390x, generic, \
LLDB_INVALID_REGNUM, lldb_##name##_s390x }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_GPR_NODWARF(name, size, offset, alt, generic) \
@@ -35,7 +35,7 @@
#name, alt, size, offset, eEncodingUint, eFormatHex, \
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, generic, \
LLDB_INVALID_REGNUM, lldb_##name##_s390x }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_FPR(name, size, offset) \
@@ -43,7 +43,7 @@
#name, NULL, size, offset, eEncodingUint, eFormatHex, \
{dwarf_##name##_s390x, dwarf_##name##_s390x, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_##name##_s390x }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
#define DEFINE_FPR_NODWARF(name, size, offset) \
@@ -51,27 +51,27 @@
#name, NULL, size, offset, eEncodingUint, eFormatHex, \
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_##name##_s390x }, \
- NULL, NULL, NULL, 0 \
+ NULL, NULL, \
}
static RegisterInfo g_register_infos_s390x[] = {
// General purpose registers.
DEFINE_GPR(r0, 8, GPR_OFFSET(0), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(r1, 8, GPR_OFFSET(1), nullptr, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r2, 8, GPR_OFFSET(2), "arg1", LLDB_REGNUM_GENERIC_ARG1),
- DEFINE_GPR(r3, 8, GPR_OFFSET(3), "arg2", LLDB_REGNUM_GENERIC_ARG2),
- DEFINE_GPR(r4, 8, GPR_OFFSET(4), "arg3", LLDB_REGNUM_GENERIC_ARG3),
- DEFINE_GPR(r5, 8, GPR_OFFSET(5), "arg4", LLDB_REGNUM_GENERIC_ARG4),
- DEFINE_GPR(r6, 8, GPR_OFFSET(6), "arg5", LLDB_REGNUM_GENERIC_ARG5),
+ DEFINE_GPR(r2, 8, GPR_OFFSET(2), nullptr, LLDB_REGNUM_GENERIC_ARG1),
+ DEFINE_GPR(r3, 8, GPR_OFFSET(3), nullptr, LLDB_REGNUM_GENERIC_ARG2),
+ DEFINE_GPR(r4, 8, GPR_OFFSET(4), nullptr, LLDB_REGNUM_GENERIC_ARG3),
+ DEFINE_GPR(r5, 8, GPR_OFFSET(5), nullptr, LLDB_REGNUM_GENERIC_ARG4),
+ DEFINE_GPR(r6, 8, GPR_OFFSET(6), nullptr, LLDB_REGNUM_GENERIC_ARG5),
DEFINE_GPR(r7, 8, GPR_OFFSET(7), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(r8, 8, GPR_OFFSET(8), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(r9, 8, GPR_OFFSET(9), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(r10, 8, GPR_OFFSET(10), nullptr, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r11, 8, GPR_OFFSET(11), "fp", LLDB_REGNUM_GENERIC_FP),
+ DEFINE_GPR(r11, 8, GPR_OFFSET(11), nullptr, LLDB_REGNUM_GENERIC_FP),
DEFINE_GPR(r12, 8, GPR_OFFSET(12), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(r13, 8, GPR_OFFSET(13), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(r14, 8, GPR_OFFSET(14), nullptr, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r15, 8, GPR_OFFSET(15), "sp", LLDB_REGNUM_GENERIC_SP),
+ DEFINE_GPR(r15, 8, GPR_OFFSET(15), nullptr, LLDB_REGNUM_GENERIC_SP),
DEFINE_GPR(acr0, 4, ACR_OFFSET(0), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(acr1, 4, ACR_OFFSET(1), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(acr2, 4, ACR_OFFSET(2), nullptr, LLDB_INVALID_REGNUM),
@@ -88,8 +88,8 @@ static RegisterInfo g_register_infos_s390x[] = {
DEFINE_GPR(acr13, 4, ACR_OFFSET(13), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(acr14, 4, ACR_OFFSET(14), nullptr, LLDB_INVALID_REGNUM),
DEFINE_GPR(acr15, 4, ACR_OFFSET(15), nullptr, LLDB_INVALID_REGNUM),
- DEFINE_GPR(pswm, 8, 0, "flags", LLDB_REGNUM_GENERIC_FLAGS),
- DEFINE_GPR(pswa, 8, 8, "pc", LLDB_REGNUM_GENERIC_PC),
+ DEFINE_GPR(pswm, 8, 0, nullptr, LLDB_REGNUM_GENERIC_FLAGS),
+ DEFINE_GPR(pswa, 8, 8, nullptr, LLDB_REGNUM_GENERIC_PC),
// Floating point registers.
DEFINE_FPR(f0, 8, FPR_OFFSET(0)), DEFINE_FPR(f1, 8, FPR_OFFSET(1)),
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfos_x86_64.h b/lldb/source/Plugins/Process/Utility/RegisterInfos_x86_64.h
index 41c04b20f391..1de67165fb2f 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfos_x86_64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfos_x86_64.h
@@ -67,7 +67,7 @@
GPR_OFFSET(reg), eEncodingUint, eFormatHex, \
{kind1, kind2, kind3, kind4, \
lldb_##reg##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_FPR(name, reg, kind1, kind2, kind3, kind4) \
@@ -75,7 +75,7 @@
#name, nullptr, FPR_SIZE(reg), FPR_OFFSET(reg), eEncodingUint, eFormatHex, \
{kind1, kind2, kind3, kind4, \
lldb_##name##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_FP_ST(reg, i) \
@@ -85,7 +85,7 @@
stmm[i]), eEncodingVector, eFormatVectorOfUInt8, \
{dwarf_st##i##_x86_64, dwarf_st##i##_x86_64, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_st##i##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_FP_MM(reg, i, streg) \
@@ -95,7 +95,7 @@
{dwarf_mm##i##_x86_64, dwarf_mm##i##_x86_64, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_mm##i##_x86_64 }, \
RegisterContextPOSIX_x86::g_contained_##streg##_64, \
- RegisterContextPOSIX_x86::g_invalidate_##streg##_64, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##streg##_64, \
}
#define DEFINE_XMM(reg, i) \
@@ -106,7 +106,7 @@
{dwarf_##reg##i##_x86_64, dwarf_##reg##i##_x86_64, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg##i##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_YMM(reg, i) \
@@ -117,7 +117,7 @@
dwarf_##reg##i##h_x86_64, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg##i##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_BNDR(reg, i) \
@@ -128,7 +128,7 @@
dwarf_##reg##i##_x86_64, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg##i##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_BNDC(name, i) \
@@ -137,7 +137,7 @@
LLVM_EXTENSION BNDC_OFFSET(i), eEncodingVector, eFormatVectorOfUInt8, \
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, lldb_##name##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_DR(reg, i) \
@@ -147,7 +147,7 @@
{LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg##i##_x86_64 }, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEFINE_GPR_PSEUDO_32(reg32, reg64) \
@@ -158,7 +158,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg32##_x86_64 }, \
RegisterContextPOSIX_x86::g_contained_##reg64, \
- RegisterContextPOSIX_x86::g_invalidate_##reg64, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg64, \
}
#define DEFINE_GPR_PSEUDO_16(reg16, reg64) \
@@ -169,7 +169,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg16##_x86_64 }, \
RegisterContextPOSIX_x86::g_contained_##reg64, \
- RegisterContextPOSIX_x86::g_invalidate_##reg64, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg64, \
}
#define DEFINE_GPR_PSEUDO_8H(reg8, reg64) \
@@ -180,7 +180,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg8##_x86_64 }, \
RegisterContextPOSIX_x86::g_contained_##reg64, \
- RegisterContextPOSIX_x86::g_invalidate_##reg64, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg64, \
}
#define DEFINE_GPR_PSEUDO_8L(reg8, reg64) \
@@ -191,7 +191,7 @@
LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, \
lldb_##reg8##_x86_64 }, \
RegisterContextPOSIX_x86::g_contained_##reg64, \
- RegisterContextPOSIX_x86::g_invalidate_##reg64, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg64, \
}
#define DEFINE_FPR_32(name, reg, kind1, kind2, kind3, kind4, reg64) \
@@ -199,7 +199,7 @@
#name, nullptr, FPR_SIZE(reg), FPR_OFFSET(reg), eEncodingUint, eFormatHex, \
{kind1, kind2, kind3, kind4, lldb_##name##_x86_64 }, \
RegisterContextPOSIX_x86::g_contained_##reg64, \
- RegisterContextPOSIX_x86::g_invalidate_##reg64, nullptr, 0 \
+ RegisterContextPOSIX_x86::g_invalidate_##reg64, \
}
// clang-format off
@@ -208,22 +208,22 @@ static RegisterInfo g_register_infos_x86_64[] = {
// =========================== ================== ================ ========================= ====================
DEFINE_GPR(rax, nullptr, dwarf_rax_x86_64, dwarf_rax_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(rbx, nullptr, dwarf_rbx_x86_64, dwarf_rbx_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rcx, "arg4", dwarf_rcx_x86_64, dwarf_rcx_x86_64, LLDB_REGNUM_GENERIC_ARG4, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rdx, "arg3", dwarf_rdx_x86_64, dwarf_rdx_x86_64, LLDB_REGNUM_GENERIC_ARG3, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rdi, "arg1", dwarf_rdi_x86_64, dwarf_rdi_x86_64, LLDB_REGNUM_GENERIC_ARG1, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rsi, "arg2", dwarf_rsi_x86_64, dwarf_rsi_x86_64, LLDB_REGNUM_GENERIC_ARG2, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rbp, "fp", dwarf_rbp_x86_64, dwarf_rbp_x86_64, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rsp, "sp", dwarf_rsp_x86_64, dwarf_rsp_x86_64, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r8, "arg5", dwarf_r8_x86_64, dwarf_r8_x86_64, LLDB_REGNUM_GENERIC_ARG5, LLDB_INVALID_REGNUM),
- DEFINE_GPR(r9, "arg6", dwarf_r9_x86_64, dwarf_r9_x86_64, LLDB_REGNUM_GENERIC_ARG6, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rcx, nullptr, dwarf_rcx_x86_64, dwarf_rcx_x86_64, LLDB_REGNUM_GENERIC_ARG4, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rdx, nullptr, dwarf_rdx_x86_64, dwarf_rdx_x86_64, LLDB_REGNUM_GENERIC_ARG3, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rdi, nullptr, dwarf_rdi_x86_64, dwarf_rdi_x86_64, LLDB_REGNUM_GENERIC_ARG1, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rsi, nullptr, dwarf_rsi_x86_64, dwarf_rsi_x86_64, LLDB_REGNUM_GENERIC_ARG2, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rbp, nullptr, dwarf_rbp_x86_64, dwarf_rbp_x86_64, LLDB_REGNUM_GENERIC_FP, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rsp, nullptr, dwarf_rsp_x86_64, dwarf_rsp_x86_64, LLDB_REGNUM_GENERIC_SP, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(r8, nullptr, dwarf_r8_x86_64, dwarf_r8_x86_64, LLDB_REGNUM_GENERIC_ARG5, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(r9, nullptr, dwarf_r9_x86_64, dwarf_r9_x86_64, LLDB_REGNUM_GENERIC_ARG6, LLDB_INVALID_REGNUM),
DEFINE_GPR(r10, nullptr, dwarf_r10_x86_64, dwarf_r10_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r11, nullptr, dwarf_r11_x86_64, dwarf_r11_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r12, nullptr, dwarf_r12_x86_64, dwarf_r12_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r13, nullptr, dwarf_r13_x86_64, dwarf_r13_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r14, nullptr, dwarf_r14_x86_64, dwarf_r14_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(r15, nullptr, dwarf_r15_x86_64, dwarf_r15_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rip, "pc", dwarf_rip_x86_64, dwarf_rip_x86_64, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM),
- DEFINE_GPR(rflags, "flags", dwarf_rflags_x86_64, dwarf_rflags_x86_64, LLDB_REGNUM_GENERIC_FLAGS, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rip, nullptr, dwarf_rip_x86_64, dwarf_rip_x86_64, LLDB_REGNUM_GENERIC_PC, LLDB_INVALID_REGNUM),
+ DEFINE_GPR(rflags, nullptr, dwarf_rflags_x86_64, dwarf_rflags_x86_64, LLDB_REGNUM_GENERIC_FLAGS, LLDB_INVALID_REGNUM),
DEFINE_GPR(cs, nullptr, dwarf_cs_x86_64, dwarf_cs_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(fs, nullptr, dwarf_fs_x86_64, dwarf_fs_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
DEFINE_GPR(gs, nullptr, dwarf_gs_x86_64, dwarf_gs_x86_64, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM),
diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp
index 85785a20354d..4df210032153 100644
--- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp
+++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp
@@ -17,6 +17,7 @@
#include "lldb/Breakpoint/Watchpoint.h"
#include "lldb/Symbol/Symbol.h"
+#include "lldb/Target/ABI.h"
#include "lldb/Target/DynamicLoader.h"
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Process.h"
@@ -30,6 +31,182 @@
using namespace lldb;
using namespace lldb_private;
+/// Information about a pointer-authentication related instruction.
+struct PtrauthInstructionInfo {
+ bool IsAuthenticated;
+ bool IsLoad;
+ bool DoesBranch;
+};
+
+/// Get any pointer-authentication related information about the instruction
+/// at address \p at_addr.
+static llvm::Optional<PtrauthInstructionInfo>
+GetPtrauthInstructionInfo(Target &target, const ArchSpec &arch,
+ const Address &at_addr) {
+ const char *plugin_name = nullptr;
+ const char *flavor = nullptr;
+ AddressRange range_bounds(at_addr, 4);
+ const bool prefer_file_cache = true;
+ DisassemblerSP disassembler_sp = Disassembler::DisassembleRange(
+ arch, plugin_name, flavor, target, range_bounds, prefer_file_cache);
+ if (!disassembler_sp)
+ return llvm::None;
+
+ InstructionList &insn_list = disassembler_sp->GetInstructionList();
+ InstructionSP insn = insn_list.GetInstructionAtIndex(0);
+ if (!insn)
+ return llvm::None;
+
+ return PtrauthInstructionInfo{insn->IsAuthenticated(), insn->IsLoad(),
+ insn->DoesBranch()};
+}
+
+/// Describe the load address of \p addr using the format filename:line:col.
+static void DescribeAddressBriefly(Stream &strm, const Address &addr,
+ Target &target) {
+ strm.Printf("at address=0x%" PRIx64, addr.GetLoadAddress(&target));
+ StreamString s;
+ if (addr.GetDescription(s, target, eDescriptionLevelBrief))
+ strm.Printf(" %s", s.GetString().data());
+ strm.Printf(".\n");
+}
+
+bool StopInfoMachException::DeterminePtrauthFailure(ExecutionContext &exe_ctx) {
+ bool IsBreakpoint = m_value == 6; // EXC_BREAKPOINT
+ bool IsBadAccess = m_value == 1; // EXC_BAD_ACCESS
+ if (!IsBreakpoint && !IsBadAccess)
+ return false;
+
+ // Check that we have a live process.
+ if (!exe_ctx.HasProcessScope() || !exe_ctx.HasThreadScope() ||
+ !exe_ctx.HasTargetScope())
+ return false;
+
+ Thread &thread = *exe_ctx.GetThreadPtr();
+ StackFrameSP current_frame = thread.GetStackFrameAtIndex(0);
+ if (!current_frame)
+ return false;
+
+ Target &target = *exe_ctx.GetTargetPtr();
+ Process &process = *exe_ctx.GetProcessPtr();
+ ABISP abi_sp = process.GetABI();
+ const ArchSpec &arch = target.GetArchitecture();
+ assert(abi_sp && "Missing ABI info");
+
+ // Check for a ptrauth-enabled target.
+ const bool ptrauth_enabled_target =
+ arch.GetCore() == ArchSpec::eCore_arm_arm64e;
+ if (!ptrauth_enabled_target)
+ return false;
+
+ // Set up a stream we can write a diagnostic into.
+ StreamString strm;
+ auto emit_ptrauth_prologue = [&](uint64_t at_address) {
+ strm.Printf("EXC_BAD_ACCESS (code=%" PRIu64 ", address=0x%" PRIx64 ")\n",
+ m_exc_code, at_address);
+ strm.Printf("Note: Possible pointer authentication failure detected.\n");
+ };
+
+ // Check if we have a "brk 0xc47x" trap, where the value that failed to
+ // authenticate is in x16.
+ Address current_address = current_frame->GetFrameCodeAddress();
+ if (IsBreakpoint) {
+ RegisterContext *reg_ctx = exe_ctx.GetRegisterContext();
+ if (!reg_ctx)
+ return false;
+
+ const RegisterInfo *X16Info = reg_ctx->GetRegisterInfoByName("x16");
+ RegisterValue X16Val;
+ if (!reg_ctx->ReadRegister(X16Info, X16Val))
+ return false;
+ uint64_t bad_address = X16Val.GetAsUInt64();
+
+ uint64_t fixed_bad_address = abi_sp->FixCodeAddress(bad_address);
+ Address brk_address;
+ if (!target.ResolveLoadAddress(fixed_bad_address, brk_address))
+ return false;
+
+ auto brk_ptrauth_info =
+ GetPtrauthInstructionInfo(target, arch, current_address);
+ if (brk_ptrauth_info && brk_ptrauth_info->IsAuthenticated) {
+ emit_ptrauth_prologue(bad_address);
+ strm.Printf("Found value that failed to authenticate ");
+ DescribeAddressBriefly(strm, brk_address, target);
+ m_description = std::string(strm.GetString());
+ return true;
+ }
+ return false;
+ }
+
+ assert(IsBadAccess && "Handle EXC_BAD_ACCESS only after this point");
+
+ // Check that we have the "bad address" from an EXC_BAD_ACCESS.
+ if (m_exc_data_count < 2)
+ return false;
+
+ // Ok, we know the Target is valid and that it describes a ptrauth-enabled
+ // device. Now, we need to determine whether this exception was caused by a
+ // ptrauth failure.
+
+ uint64_t bad_address = m_exc_subcode;
+ uint64_t fixed_bad_address = abi_sp->FixCodeAddress(bad_address);
+ uint64_t current_pc = current_address.GetLoadAddress(&target);
+
+ // Detect: LDRAA, LDRAB (Load Register, with pointer authentication).
+ //
+ // If an authenticated load results in an exception, the instruction at the
+ // current PC should be one of LDRAx.
+ if (bad_address != current_pc && fixed_bad_address != current_pc) {
+ auto ptrauth_info =
+ GetPtrauthInstructionInfo(target, arch, current_address);
+ if (ptrauth_info && ptrauth_info->IsAuthenticated && ptrauth_info->IsLoad) {
+ emit_ptrauth_prologue(bad_address);
+ strm.Printf("Found authenticated load instruction ");
+ DescribeAddressBriefly(strm, current_address, target);
+ m_description = std::string(strm.GetString());
+ return true;
+ }
+ }
+
+ // Detect: BLRAA, BLRAAZ, BLRAB, BLRABZ (Branch with Link to Register, with
+ // pointer authentication).
+ //
+ // TODO: Detect: BRAA, BRAAZ, BRAB, BRABZ (Branch to Register, with pointer
+ // authentication). At a minimum, this requires call site info support for
+ // indirect calls.
+ //
+ // If an authenticated call or tail call results in an exception, stripping
+ // the bad address should give the current PC, which points to the address
+ // we tried to branch to.
+ if (bad_address != current_pc && fixed_bad_address == current_pc) {
+ if (StackFrameSP parent_frame = thread.GetStackFrameAtIndex(1)) {
+ addr_t return_pc =
+ parent_frame->GetFrameCodeAddress().GetLoadAddress(&target);
+ Address blr_address;
+ if (!target.ResolveLoadAddress(return_pc - 4, blr_address))
+ return false;
+
+ auto blr_ptrauth_info =
+ GetPtrauthInstructionInfo(target, arch, blr_address);
+ if (blr_ptrauth_info && blr_ptrauth_info->IsAuthenticated &&
+ blr_ptrauth_info->DoesBranch) {
+ emit_ptrauth_prologue(bad_address);
+ strm.Printf("Found authenticated indirect branch ");
+ DescribeAddressBriefly(strm, blr_address, target);
+ m_description = std::string(strm.GetString());
+ return true;
+ }
+ }
+ }
+
+ // TODO: Detect: RETAA, RETAB (Return from subroutine, with pointer
+ // authentication).
+ //
+ // Is there a motivating, non-malicious code snippet that corrupts LR?
+
+ return false;
+}
+
const char *StopInfoMachException::GetDescription() {
if (!m_description.empty())
return m_description.c_str();
@@ -79,6 +256,11 @@ const char *StopInfoMachException::GetDescription() {
}
break;
+ case llvm::Triple::aarch64:
+ if (DeterminePtrauthFailure(exe_ctx))
+ return m_description.c_str();
+ break;
+
default:
break;
}
@@ -190,6 +372,11 @@ const char *StopInfoMachException::GetDescription() {
}
break;
+ case llvm::Triple::aarch64:
+ if (DeterminePtrauthFailure(exe_ctx))
+ return m_description.c_str();
+ break;
+
default:
break;
}
diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.h b/lldb/source/Plugins/Process/Utility/StopInfoMachException.h
index d9c1886d7096..6467745a7bf2 100644
--- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.h
+++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.h
@@ -16,6 +16,11 @@
namespace lldb_private {
class StopInfoMachException : public StopInfo {
+ /// Determine the pointer-authentication related failure that caused this
+ /// exception. Returns true and fills out the failure description if there
+ /// is auth-related failure, and returns false otherwise.
+ bool DeterminePtrauthFailure(ExecutionContext &exe_ctx);
+
public:
// Constructors and Destructors
StopInfoMachException(Thread &thread, uint32_t exc_type,
diff --git a/lldb/source/Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h b/lldb/source/Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h
index e6a7efd00f67..000f6e3847e7 100644
--- a/lldb/source/Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h
+++ b/lldb/source/Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h
@@ -57,9 +57,47 @@ enum {
gpr_dummy_mips64,
k_last_gpr_mips64 = gpr_dummy_mips64,
+ k_first_fpr_mips64,
+ fpr_f0_mips64 = k_first_fpr_mips64,
+ fpr_f1_mips64,
+ fpr_f2_mips64,
+ fpr_f3_mips64,
+ fpr_f4_mips64,
+ fpr_f5_mips64,
+ fpr_f6_mips64,
+ fpr_f7_mips64,
+ fpr_f8_mips64,
+ fpr_f9_mips64,
+ fpr_f10_mips64,
+ fpr_f11_mips64,
+ fpr_f12_mips64,
+ fpr_f13_mips64,
+ fpr_f14_mips64,
+ fpr_f15_mips64,
+ fpr_f16_mips64,
+ fpr_f17_mips64,
+ fpr_f18_mips64,
+ fpr_f19_mips64,
+ fpr_f20_mips64,
+ fpr_f21_mips64,
+ fpr_f22_mips64,
+ fpr_f23_mips64,
+ fpr_f24_mips64,
+ fpr_f25_mips64,
+ fpr_f26_mips64,
+ fpr_f27_mips64,
+ fpr_f28_mips64,
+ fpr_f29_mips64,
+ fpr_f30_mips64,
+ fpr_f31_mips64,
+ fpr_fcsr_mips64,
+ fpr_fir_mips64,
+ k_last_fpr_mips64 = fpr_fir_mips64,
+
k_num_registers_mips64,
- k_num_gpr_registers_mips64 = k_last_gpr_mips64 - k_first_gpr_mips64 + 1
+ k_num_gpr_registers_mips64 = k_last_gpr_mips64 - k_first_gpr_mips64 + 1,
+ k_num_fpr_registers_mips64 = k_last_fpr_mips64 - k_first_fpr_mips64 + 1,
};
-}
+} // namespace lldb_private
#endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_LLDB_MIPS_FREEBSD_REGISTER_ENUMS_H
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index 12bc7390c729..23b346d5c17f 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -37,12 +37,7 @@ namespace ELF = llvm::ELF;
LLDB_PLUGIN_DEFINE(ProcessElfCore)
-ConstString ProcessElfCore::GetPluginNameStatic() {
- static ConstString g_name("elf-core");
- return g_name;
-}
-
-const char *ProcessElfCore::GetPluginDescriptionStatic() {
+llvm::StringRef ProcessElfCore::GetPluginDescriptionStatic() {
return "ELF core dump plug-in.";
}
@@ -110,11 +105,6 @@ ProcessElfCore::~ProcessElfCore() {
Finalize();
}
-// PluginInterface
-ConstString ProcessElfCore::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t ProcessElfCore::GetPluginVersion() { return 1; }
-
lldb::addr_t ProcessElfCore::AddAddressRangeFromLoadSegment(
const elf::ELFProgramHeader &header) {
const lldb::addr_t addr = header.p_vaddr;
@@ -257,7 +247,7 @@ Status ProcessElfCore::DoLoadCore() {
lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() {
if (m_dyld_up.get() == nullptr)
m_dyld_up.reset(DynamicLoader::FindPlugin(
- this, DynamicLoaderPOSIXDYLD::GetPluginNameStatic().GetCString()));
+ this, DynamicLoaderPOSIXDYLD::GetPluginNameStatic()));
return m_dyld_up.get();
}
@@ -291,8 +281,8 @@ size_t ProcessElfCore::ReadMemory(lldb::addr_t addr, void *buf, size_t size,
return DoReadMemory(addr, buf, size, error);
}
-Status ProcessElfCore::GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region_info) {
+Status ProcessElfCore::DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region_info) {
region_info.Clear();
const VMRangeToPermissions::Entry *permission_entry =
m_core_range_infos.FindEntryThatContainsOrFollows(load_addr);
@@ -519,9 +509,8 @@ ProcessElfCore::parseSegment(const DataExtractor &segment) {
size_t note_start = offset;
size_t note_size = llvm::alignTo(note.n_descsz, 4);
- DataExtractor note_data(segment, note_start, note_size);
- result.push_back({note, note_data});
+ result.push_back({note, DataExtractor(segment, note_start, note_size)});
offset += note_size;
}
@@ -897,7 +886,8 @@ llvm::Error ProcessElfCore::parseLinuxNotes(llvm::ArrayRef<CoreNote> notes) {
/// A note segment consists of one or more NOTE entries, but their types and
/// meaning differ depending on the OS.
llvm::Error ProcessElfCore::ParseThreadContextsFromNoteSegment(
- const elf::ELFProgramHeader &segment_header, DataExtractor segment_data) {
+ const elf::ELFProgramHeader &segment_header,
+ const DataExtractor &segment_data) {
assert(segment_header.p_type == llvm::ELF::PT_NOTE);
auto notes_or_error = parseSegment(segment_data);
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
index d8e3cc9ae3e1..fd36e5027816 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
@@ -40,9 +40,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "elf-core"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
// Constructors and Destructors
ProcessElfCore(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp,
@@ -60,9 +60,7 @@ public:
lldb_private::DynamicLoader *GetDynamicLoader() override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// Process Control
lldb_private::Status DoDestroy() override;
@@ -71,9 +69,8 @@ public:
lldb_private::Status WillResume() override {
lldb_private::Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support resuming processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support resuming processes", GetPluginName());
return error;
}
@@ -89,10 +86,6 @@ public:
size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
lldb_private::Status &error) override;
- lldb_private::Status
- GetMemoryRegionInfo(lldb::addr_t load_addr,
- lldb_private::MemoryRegionInfo &region_info) override;
-
lldb::addr_t GetImageInfoAddress() override;
lldb_private::ArchSpec GetArchitecture();
@@ -108,6 +101,10 @@ protected:
bool DoUpdateThreadList(lldb_private::ThreadList &old_thread_list,
lldb_private::ThreadList &new_thread_list) override;
+ lldb_private::Status
+ DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ lldb_private::MemoryRegionInfo &region_info) override;
+
private:
struct NT_FILE_Entry {
lldb::addr_t start;
@@ -148,7 +145,7 @@ private:
// Parse thread(s) data structures(prstatus, prpsinfo) from given NOTE segment
llvm::Error ParseThreadContextsFromNoteSegment(
const elf::ELFProgramHeader &segment_header,
- lldb_private::DataExtractor segment_data);
+ const lldb_private::DataExtractor &segment_data);
// Returns number of thread contexts stored in the core file
uint32_t GetNumThreadContexts();
diff --git a/lldb/source/Plugins/Process/elf-core/RegisterUtilities.cpp b/lldb/source/Plugins/Process/elf-core/RegisterUtilities.cpp
index 0c21c0f50abb..6f3bf02cd303 100644
--- a/lldb/source/Plugins/Process/elf-core/RegisterUtilities.cpp
+++ b/lldb/source/Plugins/Process/elf-core/RegisterUtilities.cpp
@@ -34,5 +34,5 @@ DataExtractor lldb_private::getRegset(llvm::ArrayRef<CoreNote> Notes,
uint32_t Type = *TypeOr;
auto Iter = llvm::find_if(
Notes, [Type](const CoreNote &Note) { return Note.info.n_type == Type; });
- return Iter == Notes.end() ? DataExtractor() : Iter->data;
+ return Iter == Notes.end() ? DataExtractor() : DataExtractor(Iter->data);
}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp
index a4c71e864a76..803e5842cd7d 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp
@@ -249,32 +249,6 @@ GDBRemoteClientBase::SendPacketAndWaitForResponseNoLock(
return packet_result;
}
-bool GDBRemoteClientBase::SendvContPacket(
- llvm::StringRef payload, std::chrono::seconds interrupt_timeout,
- StringExtractorGDBRemote &response) {
- Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
- LLDB_LOGF(log, "GDBRemoteCommunicationClient::%s ()", __FUNCTION__);
-
- // we want to lock down packet sending while we continue
- Lock lock(*this, interrupt_timeout);
-
- LLDB_LOGF(log,
- "GDBRemoteCommunicationClient::%s () sending vCont packet: %.*s",
- __FUNCTION__, int(payload.size()), payload.data());
-
- if (SendPacketNoLock(payload) != PacketResult::Success)
- return false;
-
- OnRunPacketSent(true);
-
- // wait for the response to the vCont
- if (ReadPacket(response, llvm::None, false) == PacketResult::Success) {
- if (response.IsOKResponse())
- return true;
- }
-
- return false;
-}
bool GDBRemoteClientBase::ShouldStop(const UnixSignals &signals,
StringExtractorGDBRemote &response) {
std::lock_guard<std::mutex> lock(m_mutex);
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h
index 518b81318b6c..43a5313eae6a 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h
@@ -59,10 +59,6 @@ public:
std::chrono::seconds interrupt_timeout,
llvm::function_ref<void(llvm::StringRef)> output_callback);
- bool SendvContPacket(llvm::StringRef payload,
- std::chrono::seconds interrupt_timeout,
- StringExtractorGDBRemote &response);
-
class Lock {
public:
// If interrupt_timeout == 0 seconds, only take the lock if the target is
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
index 013d407c0fc1..4ce79da48f07 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
@@ -22,7 +22,6 @@
#include "lldb/Host/Pipe.h"
#include "lldb/Host/ProcessLaunchInfo.h"
#include "lldb/Host/Socket.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Host/ThreadLauncher.h"
#include "lldb/Host/common/TCPSocket.h"
#include "lldb/Host/posix/ConnectionFileDescriptorPosix.h"
@@ -102,7 +101,7 @@ size_t GDBRemoteCommunication::SendAck() {
Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS));
ConnectionStatus status = eConnectionStatusSuccess;
char ch = '+';
- const size_t bytes_written = Write(&ch, 1, status, nullptr);
+ const size_t bytes_written = WriteAll(&ch, 1, status, nullptr);
LLDB_LOGF(log, "<%4" PRIu64 "> send packet: %c", (uint64_t)bytes_written, ch);
m_history.AddPacket(ch, GDBRemotePacket::ePacketTypeSend, bytes_written);
return bytes_written;
@@ -112,7 +111,7 @@ size_t GDBRemoteCommunication::SendNack() {
Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS));
ConnectionStatus status = eConnectionStatusSuccess;
char ch = '-';
- const size_t bytes_written = Write(&ch, 1, status, nullptr);
+ const size_t bytes_written = WriteAll(&ch, 1, status, nullptr);
LLDB_LOGF(log, "<%4" PRIu64 "> send packet: %c", (uint64_t)bytes_written, ch);
m_history.AddPacket(ch, GDBRemotePacket::ePacketTypeSend, bytes_written);
return bytes_written;
@@ -138,7 +137,7 @@ GDBRemoteCommunication::SendRawPacketNoLock(llvm::StringRef packet,
ConnectionStatus status = eConnectionStatusSuccess;
const char *packet_data = packet.data();
const size_t packet_length = packet.size();
- size_t bytes_written = Write(packet_data, packet_length, status, nullptr);
+ size_t bytes_written = WriteAll(packet_data, packet_length, status, nullptr);
if (log) {
size_t binary_start_offset = 0;
if (strncmp(packet_data, "$vFile:pwrite:", strlen("$vFile:pwrite:")) ==
@@ -894,8 +893,13 @@ GDBRemoteCommunication::ListenThread(lldb::thread_arg_t arg) {
if (connection) {
// Do the listen on another thread so we can continue on...
- if (connection->Connect(comm->m_listen_url.c_str(), &error) !=
- eConnectionStatusSuccess)
+ if (connection->Connect(
+ comm->m_listen_url.c_str(), [comm](llvm::StringRef port_str) {
+ uint16_t port = 0;
+ llvm::to_integer(port_str, port, 10);
+ comm->m_port_promise.set_value(port);
+ },
+ &error) != eConnectionStatusSuccess)
comm->SetConnection(nullptr);
}
return {};
@@ -1057,10 +1061,12 @@ Status GDBRemoteCommunication::StartDebugserverProcess(
return error;
}
- ConnectionFileDescriptor *connection =
- (ConnectionFileDescriptor *)GetConnection();
// Wait for 10 seconds to resolve the bound port
- uint16_t port_ = connection->GetListeningPort(std::chrono::seconds(10));
+ std::future<uint16_t> port_future = m_port_promise.get_future();
+ uint16_t port_ = port_future.wait_for(std::chrono::seconds(10)) ==
+ std::future_status::ready
+ ? port_future.get()
+ : 0;
if (port_ > 0) {
char port_cstr[32];
snprintf(port_cstr, sizeof(port_cstr), "127.0.0.1:%i", port_);
@@ -1173,7 +1179,9 @@ Status GDBRemoteCommunication::StartDebugserverProcess(
port_cstr, num_bytes, std::chrono::seconds{10}, num_bytes);
if (error.Success() && (port != nullptr)) {
assert(num_bytes > 0 && port_cstr[num_bytes - 1] == '\0');
- uint16_t child_port = StringConvert::ToUInt32(port_cstr, 0);
+ uint16_t child_port = 0;
+ // FIXME: improve error handling
+ llvm::to_integer(port_cstr, child_port);
if (*port == 0 || *port == child_port) {
*port = child_port;
LLDB_LOGF(log,
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h
index b1e2075a64fe..5da568e9b4d4 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h
@@ -12,6 +12,7 @@
#include "GDBRemoteCommunicationHistory.h"
#include <condition_variable>
+#include <future>
#include <mutex>
#include <queue>
#include <string>
@@ -51,6 +52,32 @@ enum class CompressionType {
LZMA, // Lempel–Ziv–Markov chain algorithm
};
+// Data included in the vFile:fstat packet.
+// https://sourceware.org/gdb/onlinedocs/gdb/struct-stat.html#struct-stat
+struct GDBRemoteFStatData {
+ llvm::support::ubig32_t gdb_st_dev;
+ llvm::support::ubig32_t gdb_st_ino;
+ llvm::support::ubig32_t gdb_st_mode;
+ llvm::support::ubig32_t gdb_st_nlink;
+ llvm::support::ubig32_t gdb_st_uid;
+ llvm::support::ubig32_t gdb_st_gid;
+ llvm::support::ubig32_t gdb_st_rdev;
+ llvm::support::ubig64_t gdb_st_size;
+ llvm::support::ubig64_t gdb_st_blksize;
+ llvm::support::ubig64_t gdb_st_blocks;
+ llvm::support::ubig32_t gdb_st_atime;
+ llvm::support::ubig32_t gdb_st_mtime;
+ llvm::support::ubig32_t gdb_st_ctime;
+};
+static_assert(sizeof(GDBRemoteFStatData) == 64,
+ "size of GDBRemoteFStatData is not 64");
+
+enum GDBErrno {
+#define HANDLE_ERRNO(name, value) GDB_##name = value,
+#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def"
+ GDB_EUNKNOWN = 9999
+};
+
class ProcessGDBRemote;
class GDBRemoteCommunication : public Communication {
@@ -217,6 +244,8 @@ private:
std::mutex m_packet_queue_mutex; // Mutex for accessing queue
std::condition_variable
m_condition_queue_not_empty; // Condition variable to wait for packets
+ // Promise used to grab the port number from listening thread
+ std::promise<uint16_t> m_port_promise;
HostThread m_listen_thread;
std::string m_listen_url;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index b16aed4f5c90..78e722eee080 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -16,7 +16,6 @@
#include "lldb/Core/ModuleSpec.h"
#include "lldb/Host/HostInfo.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Host/XML.h"
#include "lldb/Symbol/Symbol.h"
#include "lldb/Target/MemoryRegionInfo.h"
@@ -65,6 +64,8 @@ GDBRemoteCommunicationClient::GDBRemoteCommunicationClient()
m_supports_QEnvironmentHexEncoded(true), m_supports_qSymbol(true),
m_qSymbol_requests_done(false), m_supports_qModuleInfo(true),
m_supports_jThreadsInfo(true), m_supports_jModulesInfo(true),
+ m_supports_vFileSize(true), m_supports_vFileMode(true),
+ m_supports_vFileExists(true), m_supports_vRun(true),
m_host_arch(), m_process_arch(), m_os_build(), m_os_kernel(),
m_hostname(), m_gdb_server_name(), m_default_packet_timeout(0),
@@ -82,6 +83,8 @@ bool GDBRemoteCommunicationClient::HandshakeWithServer(Status *error_ptr) {
// Start the read thread after we send the handshake ack since if we fail to
// send the handshake ack, there is no reason to continue...
+ std::chrono::steady_clock::time_point start_of_handshake =
+ std::chrono::steady_clock::now();
if (SendAck()) {
// Wait for any responses that might have been queued up in the remote
// GDB server and flush them all
@@ -97,8 +100,24 @@ bool GDBRemoteCommunicationClient::HandshakeWithServer(Status *error_ptr) {
if (QueryNoAckModeSupported()) {
return true;
} else {
- if (error_ptr)
- error_ptr->SetErrorString("failed to get reply to handshake packet");
+ std::chrono::steady_clock::time_point end_of_handshake =
+ std::chrono::steady_clock::now();
+ auto handshake_timeout =
+ std::chrono::duration<double>(end_of_handshake - start_of_handshake)
+ .count();
+ if (error_ptr) {
+ if (packet_result == PacketResult::ErrorDisconnected)
+ error_ptr->SetErrorString("Connection shut down by remote side "
+ "while waiting for reply to initial "
+ "handshake packet");
+ else if (packet_result == PacketResult::ErrorReplyTimeout)
+ error_ptr->SetErrorStringWithFormat(
+ "failed to get reply to handshake packet within timeout of "
+ "%.1f seconds",
+ handshake_timeout);
+ else
+ error_ptr->SetErrorString("failed to get reply to handshake packet");
+ }
}
} else {
if (error_ptr)
@@ -257,12 +276,14 @@ void GDBRemoteCommunicationClient::ResetDiscoverableSettings(bool did_exec) {
m_attach_or_wait_reply = eLazyBoolCalculate;
m_avoid_g_packets = eLazyBoolCalculate;
m_supports_multiprocess = eLazyBoolCalculate;
+ m_supports_qSaveCore = eLazyBoolCalculate;
m_supports_qXfer_auxv_read = eLazyBoolCalculate;
m_supports_qXfer_libraries_read = eLazyBoolCalculate;
m_supports_qXfer_libraries_svr4_read = eLazyBoolCalculate;
m_supports_qXfer_features_read = eLazyBoolCalculate;
m_supports_qXfer_memory_map_read = eLazyBoolCalculate;
m_supports_augmented_libraries_svr4_read = eLazyBoolCalculate;
+ m_uses_native_signals = eLazyBoolCalculate;
m_supports_qProcessInfoPID = true;
m_supports_qfProcessInfo = true;
m_supports_qUserName = true;
@@ -312,13 +333,16 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
m_supports_qEcho = eLazyBoolNo;
m_supports_QPassSignals = eLazyBoolNo;
m_supports_memory_tagging = eLazyBoolNo;
+ m_supports_qSaveCore = eLazyBoolNo;
+ m_uses_native_signals = eLazyBoolNo;
m_max_packet_size = UINT64_MAX; // It's supposed to always be there, but if
// not, we assume no limit
// build the qSupported packet
std::vector<std::string> features = {"xmlRegisters=i386,arm,mips,arc",
- "multiprocess+"};
+ "multiprocess+", "fork-events+",
+ "vfork-events+"};
StreamString packet;
packet.PutCString("qSupported");
for (uint32_t i = 0; i < features.size(); ++i) {
@@ -333,10 +357,7 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
// configuration of the transport before attaching/launching the process.
m_qSupported_response = response.GetStringRef().str();
- llvm::SmallVector<llvm::StringRef, 16> server_features;
- response.GetStringRef().split(server_features, ';');
-
- for (llvm::StringRef x : server_features) {
+ for (llvm::StringRef x : llvm::split(response.GetStringRef(), ';')) {
if (x == "qXfer:auxv:read+")
m_supports_qXfer_auxv_read = eLazyBoolYes;
else if (x == "qXfer:libraries-svr4:read+")
@@ -358,6 +379,10 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
m_supports_multiprocess = eLazyBoolYes;
else if (x == "memory-tagging+")
m_supports_memory_tagging = eLazyBoolYes;
+ else if (x == "qSaveCore+")
+ m_supports_qSaveCore = eLazyBoolYes;
+ else if (x == "native-signals+")
+ m_uses_native_signals = eLazyBoolYes;
// Look for a list of compressions in the features list e.g.
// qXfer:features:read+;PacketSize=20000;qEcho+;SupportedCompressions=zlib-
// deflate,lzma
@@ -500,6 +525,10 @@ LazyBool GDBRemoteCommunicationClient::GetThreadPacketSupported(
return eLazyBoolNo;
}
+bool GDBRemoteCommunicationClient::GetSaveCoreSupported() const {
+ return m_supports_qSaveCore == eLazyBoolYes;
+}
+
StructuredData::ObjectSP GDBRemoteCommunicationClient::GetThreadsInfo() {
// Get information on all threads at one using the "jThreadsInfo" packet
StructuredData::ObjectSP object_sp;
@@ -661,54 +690,6 @@ bool GDBRemoteCommunicationClient::GetxPacketSupported() {
return m_supports_x;
}
-GDBRemoteCommunicationClient::PacketResult
-GDBRemoteCommunicationClient::SendPacketsAndConcatenateResponses(
- const char *payload_prefix, std::string &response_string) {
- Lock lock(*this);
- if (!lock) {
- Log *log(ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_PROCESS |
- GDBR_LOG_PACKETS));
- LLDB_LOGF(log,
- "error: failed to get packet sequence mutex, not sending "
- "packets with prefix '%s'",
- payload_prefix);
- return PacketResult::ErrorNoSequenceLock;
- }
-
- response_string = "";
- std::string payload_prefix_str(payload_prefix);
- unsigned int response_size = 0x1000;
- if (response_size > GetRemoteMaxPacketSize()) { // May send qSupported packet
- response_size = GetRemoteMaxPacketSize();
- }
-
- for (unsigned int offset = 0; true; offset += response_size) {
- StringExtractorGDBRemote this_response;
- // Construct payload
- char sizeDescriptor[128];
- snprintf(sizeDescriptor, sizeof(sizeDescriptor), "%x,%x", offset,
- response_size);
- PacketResult result = SendPacketAndWaitForResponseNoLock(
- payload_prefix_str + sizeDescriptor, this_response);
- if (result != PacketResult::Success)
- return result;
-
- const std::string &this_string = std::string(this_response.GetStringRef());
-
- // Check for m or l as first character; l seems to mean this is the last
- // chunk
- char first_char = *this_string.c_str();
- if (first_char != 'm' && first_char != 'l') {
- return PacketResult::ErrorReplyInvalid;
- }
- // Concatenate the result so far (skipping 'm' or 'l')
- response_string.append(this_string, 1, std::string::npos);
- if (first_char == 'l')
- // We're done
- return PacketResult::Success;
- }
-}
-
lldb::pid_t GDBRemoteCommunicationClient::GetCurrentProcessID(bool allow_lazy) {
if (allow_lazy && m_curr_pid_is_valid == eLazyBoolYes)
return m_curr_pid;
@@ -765,6 +746,11 @@ bool GDBRemoteCommunicationClient::GetLaunchSuccess(std::string &error_str) {
PacketResult::Success) {
if (response.IsOKResponse())
return true;
+ // GDB does not implement qLaunchSuccess -- but if we used vRun,
+ // then we already received a successful launch indication via stop
+ // reason.
+ if (response.IsUnsupportedResponse() && m_supports_vRun)
+ return true;
if (response.GetChar() == 'E') {
// A string the describes what failed when launching...
error_str = std::string(response.GetStringRef().substr(1));
@@ -803,6 +789,36 @@ int GDBRemoteCommunicationClient::SendArgumentsPacket(
}
}
if (!argv.empty()) {
+ // try vRun first
+ if (m_supports_vRun) {
+ StreamString packet;
+ packet.PutCString("vRun");
+ for (const char *arg : argv) {
+ packet.PutChar(';');
+ packet.PutBytesAsRawHex8(arg, strlen(arg));
+ }
+
+ StringExtractorGDBRemote response;
+ if (SendPacketAndWaitForResponse(packet.GetString(), response) !=
+ PacketResult::Success)
+ return -1;
+
+ if (response.IsErrorResponse()) {
+ uint8_t error = response.GetError();
+ if (error)
+ return error;
+ return -1;
+ }
+ // vRun replies with a stop reason packet
+ // FIXME: right now we just discard the packet and LLDB queries
+ // for stop reason again
+ if (!response.IsUnsupportedResponse())
+ return 0;
+
+ m_supports_vRun = false;
+ }
+
+ // fallback to A
StreamString packet;
packet.PutChar('A');
for (size_t i = 0, n = argv.size(); i < n; ++i) {
@@ -839,7 +855,6 @@ int GDBRemoteCommunicationClient::SendEnvironment(const Environment &env) {
int GDBRemoteCommunicationClient::SendEnvironmentPacket(
char const *name_equal_value) {
if (name_equal_value && name_equal_value[0]) {
- StreamString packet;
bool send_hex_encoding = false;
for (const char *p = name_equal_value; *p != '\0' && !send_hex_encoding;
++p) {
@@ -861,33 +876,43 @@ int GDBRemoteCommunicationClient::SendEnvironmentPacket(
}
StringExtractorGDBRemote response;
- if (send_hex_encoding) {
- if (m_supports_QEnvironmentHexEncoded) {
- packet.PutCString("QEnvironmentHexEncoded:");
- packet.PutBytesAsRawHex8(name_equal_value, strlen(name_equal_value));
- if (SendPacketAndWaitForResponse(packet.GetString(), response) ==
- PacketResult::Success) {
- if (response.IsOKResponse())
- return 0;
- uint8_t error = response.GetError();
- if (error)
- return error;
- if (response.IsUnsupportedResponse())
- m_supports_QEnvironmentHexEncoded = false;
- }
+ // Prefer sending unencoded, if possible and the server supports it.
+ if (!send_hex_encoding && m_supports_QEnvironment) {
+ StreamString packet;
+ packet.Printf("QEnvironment:%s", name_equal_value);
+ if (SendPacketAndWaitForResponse(packet.GetString(), response) !=
+ PacketResult::Success)
+ return -1;
+
+ if (response.IsOKResponse())
+ return 0;
+ if (response.IsUnsupportedResponse())
+ m_supports_QEnvironment = false;
+ else {
+ uint8_t error = response.GetError();
+ if (error)
+ return error;
+ return -1;
}
+ }
- } else if (m_supports_QEnvironment) {
- packet.Printf("QEnvironment:%s", name_equal_value);
- if (SendPacketAndWaitForResponse(packet.GetString(), response) ==
- PacketResult::Success) {
- if (response.IsOKResponse())
- return 0;
+ if (m_supports_QEnvironmentHexEncoded) {
+ StreamString packet;
+ packet.PutCString("QEnvironmentHexEncoded:");
+ packet.PutBytesAsRawHex8(name_equal_value, strlen(name_equal_value));
+ if (SendPacketAndWaitForResponse(packet.GetString(), response) !=
+ PacketResult::Success)
+ return -1;
+
+ if (response.IsOKResponse())
+ return 0;
+ if (response.IsUnsupportedResponse())
+ m_supports_QEnvironmentHexEncoded = false;
+ else {
uint8_t error = response.GetError();
if (error)
return error;
- if (response.IsUnsupportedResponse())
- m_supports_QEnvironment = false;
+ return -1;
}
}
}
@@ -949,26 +974,21 @@ llvm::VersionTuple GDBRemoteCommunicationClient::GetMacCatalystVersion() {
return m_maccatalyst_version;
}
-bool GDBRemoteCommunicationClient::GetOSBuildString(std::string &s) {
+llvm::Optional<std::string> GDBRemoteCommunicationClient::GetOSBuildString() {
if (GetHostInfo()) {
- if (!m_os_build.empty()) {
- s = m_os_build;
- return true;
- }
+ if (!m_os_build.empty())
+ return m_os_build;
}
- s.clear();
- return false;
+ return llvm::None;
}
-bool GDBRemoteCommunicationClient::GetOSKernelDescription(std::string &s) {
+llvm::Optional<std::string>
+GDBRemoteCommunicationClient::GetOSKernelDescription() {
if (GetHostInfo()) {
- if (!m_os_kernel.empty()) {
- s = m_os_kernel;
- return true;
- }
+ if (!m_os_kernel.empty())
+ return m_os_kernel;
}
- s.clear();
- return false;
+ return llvm::None;
}
bool GDBRemoteCommunicationClient::GetHostname(std::string &s) {
@@ -1093,9 +1113,8 @@ void GDBRemoteCommunicationClient::MaybeEnableCompression(
if (avail_type != CompressionType::None) {
StringExtractorGDBRemote response;
- llvm::Twine packet = "QEnableCompression:type:" + avail_name + ";";
- if (SendPacketAndWaitForResponse(packet.str(), response) !=
- PacketResult::Success)
+ std::string packet = "QEnableCompression:type:" + avail_name.str() + ";";
+ if (SendPacketAndWaitForResponse(packet, response) != PacketResult::Success)
return;
if (response.IsOKResponse()) {
@@ -1360,24 +1379,6 @@ bool GDBRemoteCommunicationClient::GetHostInfo(bool force) {
return m_qHostInfo_is_valid == eLazyBoolYes;
}
-int GDBRemoteCommunicationClient::SendAttach(
- lldb::pid_t pid, StringExtractorGDBRemote &response) {
- if (pid != LLDB_INVALID_PROCESS_ID) {
- char packet[64];
- const int packet_len =
- ::snprintf(packet, sizeof(packet), "vAttach;%" PRIx64, pid);
- UNUSED_IF_ASSERT_DISABLED(packet_len);
- assert(packet_len < (int)sizeof(packet));
- if (SendPacketAndWaitForResponse(packet, response) ==
- PacketResult::Success) {
- if (response.IsErrorResponse())
- return response.GetError();
- return 0;
- }
- }
- return -1;
-}
-
int GDBRemoteCommunicationClient::SendStdinNotification(const char *data,
size_t data_len) {
StreamString packet;
@@ -1457,9 +1458,12 @@ bool GDBRemoteCommunicationClient::DeallocateMemory(addr_t addr) {
return false;
}
-Status GDBRemoteCommunicationClient::Detach(bool keep_stopped) {
+Status GDBRemoteCommunicationClient::Detach(bool keep_stopped,
+ lldb::pid_t pid) {
Status error;
+ lldb_private::StreamString packet;
+ packet.PutChar('D');
if (keep_stopped) {
if (m_supports_detach_stay_stopped == eLazyBoolCalculate) {
char packet[64];
@@ -1481,17 +1485,27 @@ Status GDBRemoteCommunicationClient::Detach(bool keep_stopped) {
error.SetErrorString("Stays stopped not supported by this target.");
return error;
} else {
- StringExtractorGDBRemote response;
- PacketResult packet_result = SendPacketAndWaitForResponse("D1", response);
- if (packet_result != PacketResult::Success)
- error.SetErrorString("Sending extended disconnect packet failed.");
+ packet.PutChar('1');
}
- } else {
- StringExtractorGDBRemote response;
- PacketResult packet_result = SendPacketAndWaitForResponse("D", response);
- if (packet_result != PacketResult::Success)
- error.SetErrorString("Sending disconnect packet failed.");
}
+
+ if (m_supports_multiprocess) {
+ // Some servers (e.g. qemu) require specifying the PID even if only a single
+ // process is running.
+ if (pid == LLDB_INVALID_PROCESS_ID)
+ pid = GetCurrentProcessID();
+ packet.PutChar(';');
+ packet.PutHex64(pid);
+ } else if (pid != LLDB_INVALID_PROCESS_ID) {
+ error.SetErrorString("Multiprocess extension not supported by the server.");
+ return error;
+ }
+
+ StringExtractorGDBRemote response;
+ PacketResult packet_result =
+ SendPacketAndWaitForResponse(packet.GetString(), response);
+ if (packet_result != PacketResult::Success)
+ error.SetErrorString("Sending isconnect packet failed.");
return error;
}
@@ -1527,17 +1541,17 @@ Status GDBRemoteCommunicationClient::GetMemoryRegionInfo(
region_info.GetRange().IsValid()) {
saw_permissions = true;
if (region_info.GetRange().Contains(addr)) {
- if (value.find('r') != llvm::StringRef::npos)
+ if (value.contains('r'))
region_info.SetReadable(MemoryRegionInfo::eYes);
else
region_info.SetReadable(MemoryRegionInfo::eNo);
- if (value.find('w') != llvm::StringRef::npos)
+ if (value.contains('w'))
region_info.SetWritable(MemoryRegionInfo::eYes);
else
region_info.SetWritable(MemoryRegionInfo::eNo);
- if (value.find('x') != llvm::StringRef::npos)
+ if (value.contains('x'))
region_info.SetExecutable(MemoryRegionInfo::eYes);
else
region_info.SetExecutable(MemoryRegionInfo::eNo);
@@ -1572,6 +1586,19 @@ Status GDBRemoteCommunicationClient::GetMemoryRegionInfo(
}
}
}
+ } else if (name.equals("type")) {
+ std::string comma_sep_str = value.str();
+ size_t comma_pos;
+ while ((comma_pos = comma_sep_str.find(',')) != std::string::npos) {
+ comma_sep_str[comma_pos] = '\0';
+ if (comma_sep_str == "stack") {
+ region_info.SetIsStackMemory(MemoryRegionInfo::eYes);
+ }
+ }
+ // handle final (or only) type of "stack"
+ if (comma_sep_str == "stack") {
+ region_info.SetIsStackMemory(MemoryRegionInfo::eYes);
+ }
} else if (name.equals("error")) {
StringExtractorGDBRemote error_extractor(value);
std::string error_string;
@@ -1580,21 +1607,12 @@ Status GDBRemoteCommunicationClient::GetMemoryRegionInfo(
error.SetErrorString(error_string.c_str());
} else if (name.equals("dirty-pages")) {
std::vector<addr_t> dirty_page_list;
- std::string comma_sep_str = value.str();
- size_t comma_pos;
- addr_t page;
- while ((comma_pos = comma_sep_str.find(',')) != std::string::npos) {
- comma_sep_str[comma_pos] = '\0';
- page = StringConvert::ToUInt64(comma_sep_str.c_str(),
- LLDB_INVALID_ADDRESS, 16);
- if (page != LLDB_INVALID_ADDRESS)
+ for (llvm::StringRef x : llvm::split(value, ',')) {
+ addr_t page;
+ x.consume_front("0x");
+ if (llvm::to_integer(x, page, 16))
dirty_page_list.push_back(page);
- comma_sep_str.erase(0, comma_pos + 1);
}
- page = StringConvert::ToUInt64(comma_sep_str.c_str(),
- LLDB_INVALID_ADDRESS, 16);
- if (page != LLDB_INVALID_ADDRESS)
- dirty_page_list.push_back(page);
region_info.SetDirtyPageList(dirty_page_list);
}
}
@@ -1683,17 +1701,13 @@ Status GDBRemoteCommunicationClient::LoadQXferMemoryMap() {
return error;
}
- std::string xml;
- lldb_private::Status lldberr;
- if (!ReadExtFeature(ConstString("memory-map"), ConstString(""), xml,
- lldberr)) {
- error.SetErrorString("Failed to read memory map");
- return error;
- }
+ llvm::Expected<std::string> xml = ReadExtFeature("memory-map", "");
+ if (!xml)
+ return Status(xml.takeError());
XMLDocument xml_document;
- if (!xml_document.ParseMemory(xml.c_str(), xml.size())) {
+ if (!xml_document.ParseMemory(xml->c_str(), xml->size())) {
error.SetErrorString("Failed to parse memory map xml");
return error;
}
@@ -2358,24 +2372,6 @@ bool GDBRemoteCommunicationClient::GetGroupName(uint32_t gid,
return false;
}
-bool GDBRemoteCommunicationClient::SetNonStopMode(const bool enable) {
- // Form non-stop packet request
- char packet[32];
- const int packet_len =
- ::snprintf(packet, sizeof(packet), "QNonStop:%1d", (int)enable);
- assert(packet_len < (int)sizeof(packet));
- UNUSED_IF_ASSERT_DISABLED(packet_len);
-
- StringExtractorGDBRemote response;
- // Send to target
- if (SendPacketAndWaitForResponse(packet, response) == PacketResult::Success)
- if (response.IsOKResponse())
- return true;
-
- // Failed or not supported
- return false;
-}
-
static void MakeSpeedTestPacket(StreamString &packet, uint32_t send_size,
uint32_t recv_size) {
packet.Clear();
@@ -2827,8 +2823,12 @@ GDBRemoteCommunicationClient::GetCurrentProcessAndThreadIDs(
if (ch == 'm') {
do {
auto pid_tid = response.GetPidTid(LLDB_INVALID_PROCESS_ID);
+ // If we get an invalid response, break out of the loop.
+ // If there are valid tids, they have been added to ids.
+ // If there are no valid tids, we'll fall through to the
+ // bare-iron target handling below.
if (!pid_tid)
- return {};
+ break;
ids.push_back(pid_tid.getValue());
ch = response.GetChar(); // Skip the command separator
@@ -2959,7 +2959,7 @@ Status GDBRemoteCommunicationClient::MakeDirectory(const FileSpec &file_spec,
if (response.GetChar() != 'F')
return Status("invalid response to '%s' packet", packet.str().c_str());
- return Status(response.GetU32(UINT32_MAX), eErrorTypePOSIX);
+ return Status(response.GetHexMaxU32(false, UINT32_MAX), eErrorTypePOSIX);
}
Status
@@ -2980,7 +2980,18 @@ GDBRemoteCommunicationClient::SetFilePermissions(const FileSpec &file_spec,
if (response.GetChar() != 'F')
return Status("invalid response to '%s' packet", stream.GetData());
- return Status(response.GetU32(UINT32_MAX), eErrorTypePOSIX);
+ return Status(response.GetHexMaxU32(false, UINT32_MAX), eErrorTypePOSIX);
+}
+
+static int gdb_errno_to_system(int err) {
+ switch (err) {
+#define HANDLE_ERRNO(name, value) \
+ case GDB_##name: \
+ return name;
+#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def"
+ default:
+ return -1;
+ }
}
static uint64_t ParseHostIOPacketResponse(StringExtractorGDBRemote &response,
@@ -2988,12 +2999,12 @@ static uint64_t ParseHostIOPacketResponse(StringExtractorGDBRemote &response,
response.SetFilePos(0);
if (response.GetChar() != 'F')
return fail_result;
- int32_t result = response.GetS32(-2);
+ int32_t result = response.GetS32(-2, 16);
if (result == -2)
return fail_result;
if (response.GetChar() == ',') {
- int result_errno = response.GetS32(-2);
- if (result_errno != -2)
+ int result_errno = gdb_errno_to_system(response.GetS32(-1, 16));
+ if (result_errno != -1)
error.SetError(result_errno, eErrorTypePOSIX);
else
error.SetError(-1, eErrorTypeGeneric);
@@ -3026,7 +3037,7 @@ GDBRemoteCommunicationClient::OpenFile(const lldb_private::FileSpec &file_spec,
bool GDBRemoteCommunicationClient::CloseFile(lldb::user_id_t fd,
Status &error) {
lldb_private::StreamString stream;
- stream.Printf("vFile:close:%i", (int)fd);
+ stream.Printf("vFile:close:%x", (int)fd);
StringExtractorGDBRemote response;
if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
PacketResult::Success) {
@@ -3035,22 +3046,66 @@ bool GDBRemoteCommunicationClient::CloseFile(lldb::user_id_t fd,
return false;
}
-// Extension of host I/O packets to get the file size.
-lldb::user_id_t GDBRemoteCommunicationClient::GetFileSize(
- const lldb_private::FileSpec &file_spec) {
- std::string path(file_spec.GetPath(false));
+llvm::Optional<GDBRemoteFStatData>
+GDBRemoteCommunicationClient::FStat(lldb::user_id_t fd) {
lldb_private::StreamString stream;
- stream.PutCString("vFile:size:");
- stream.PutStringAsRawHex8(path);
+ stream.Printf("vFile:fstat:%" PRIx64, fd);
StringExtractorGDBRemote response;
if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
PacketResult::Success) {
if (response.GetChar() != 'F')
+ return llvm::None;
+ int64_t size = response.GetS64(-1, 16);
+ if (size > 0 && response.GetChar() == ';') {
+ std::string buffer;
+ if (response.GetEscapedBinaryData(buffer)) {
+ GDBRemoteFStatData out;
+ if (buffer.size() != sizeof(out))
+ return llvm::None;
+ memcpy(&out, buffer.data(), sizeof(out));
+ return out;
+ }
+ }
+ }
+ return llvm::None;
+}
+
+llvm::Optional<GDBRemoteFStatData>
+GDBRemoteCommunicationClient::Stat(const lldb_private::FileSpec &file_spec) {
+ Status error;
+ lldb::user_id_t fd = OpenFile(file_spec, File::eOpenOptionReadOnly, 0, error);
+ if (fd == UINT64_MAX)
+ return llvm::None;
+ llvm::Optional<GDBRemoteFStatData> st = FStat(fd);
+ CloseFile(fd, error);
+ return st;
+}
+
+// Extension of host I/O packets to get the file size.
+lldb::user_id_t GDBRemoteCommunicationClient::GetFileSize(
+ const lldb_private::FileSpec &file_spec) {
+ if (m_supports_vFileSize) {
+ std::string path(file_spec.GetPath(false));
+ lldb_private::StreamString stream;
+ stream.PutCString("vFile:size:");
+ stream.PutStringAsRawHex8(path);
+ StringExtractorGDBRemote response;
+ if (SendPacketAndWaitForResponse(stream.GetString(), response) !=
+ PacketResult::Success)
return UINT64_MAX;
- uint32_t retcode = response.GetHexMaxU64(false, UINT64_MAX);
- return retcode;
+
+ if (!response.IsUnsupportedResponse()) {
+ if (response.GetChar() != 'F')
+ return UINT64_MAX;
+ uint32_t retcode = response.GetHexMaxU64(false, UINT64_MAX);
+ return retcode;
+ }
+ m_supports_vFileSize = false;
}
- return UINT64_MAX;
+
+ // Fallback to fstat.
+ llvm::Optional<GDBRemoteFStatData> st = Stat(file_spec);
+ return st ? st->gdb_st_size : UINT64_MAX;
}
void GDBRemoteCommunicationClient::AutoCompleteDiskFileOrDirectory(
@@ -3081,37 +3136,50 @@ void GDBRemoteCommunicationClient::AutoCompleteDiskFileOrDirectory(
Status
GDBRemoteCommunicationClient::GetFilePermissions(const FileSpec &file_spec,
uint32_t &file_permissions) {
- std::string path{file_spec.GetPath(false)};
- Status error;
- lldb_private::StreamString stream;
- stream.PutCString("vFile:mode:");
- stream.PutStringAsRawHex8(path);
- StringExtractorGDBRemote response;
- if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
- PacketResult::Success) {
- if (response.GetChar() != 'F') {
- error.SetErrorStringWithFormat("invalid response to '%s' packet",
+ if (m_supports_vFileMode) {
+ std::string path{file_spec.GetPath(false)};
+ Status error;
+ lldb_private::StreamString stream;
+ stream.PutCString("vFile:mode:");
+ stream.PutStringAsRawHex8(path);
+ StringExtractorGDBRemote response;
+ if (SendPacketAndWaitForResponse(stream.GetString(), response) !=
+ PacketResult::Success) {
+ error.SetErrorStringWithFormat("failed to send '%s' packet",
stream.GetData());
- } else {
- const uint32_t mode = response.GetS32(-1);
- if (static_cast<int32_t>(mode) == -1) {
- if (response.GetChar() == ',') {
- int response_errno = response.GetS32(-1);
- if (response_errno > 0)
- error.SetError(response_errno, lldb::eErrorTypePOSIX);
- else
- error.SetErrorToGenericError();
- } else
- error.SetErrorToGenericError();
+ return error;
+ }
+ if (!response.IsUnsupportedResponse()) {
+ if (response.GetChar() != 'F') {
+ error.SetErrorStringWithFormat("invalid response to '%s' packet",
+ stream.GetData());
} else {
- file_permissions = mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+ const uint32_t mode = response.GetS32(-1, 16);
+ if (static_cast<int32_t>(mode) == -1) {
+ if (response.GetChar() == ',') {
+ int response_errno = gdb_errno_to_system(response.GetS32(-1, 16));
+ if (response_errno > 0)
+ error.SetError(response_errno, lldb::eErrorTypePOSIX);
+ else
+ error.SetErrorToGenericError();
+ } else
+ error.SetErrorToGenericError();
+ } else {
+ file_permissions = mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+ }
}
+ return error;
+ } else { // response.IsUnsupportedResponse()
+ m_supports_vFileMode = false;
}
- } else {
- error.SetErrorStringWithFormat("failed to send '%s' packet",
- stream.GetData());
}
- return error;
+
+ // Fallback to fstat.
+ if (llvm::Optional<GDBRemoteFStatData> st = Stat(file_spec)) {
+ file_permissions = st->gdb_st_mode & (S_IRWXU | S_IRWXG | S_IRWXO);
+ return Status();
+ }
+ return Status("fstat failed");
}
uint64_t GDBRemoteCommunicationClient::ReadFile(lldb::user_id_t fd,
@@ -3119,16 +3187,23 @@ uint64_t GDBRemoteCommunicationClient::ReadFile(lldb::user_id_t fd,
uint64_t dst_len,
Status &error) {
lldb_private::StreamString stream;
- stream.Printf("vFile:pread:%i,%" PRId64 ",%" PRId64, (int)fd, dst_len,
+ stream.Printf("vFile:pread:%x,%" PRIx64 ",%" PRIx64, (int)fd, dst_len,
offset);
StringExtractorGDBRemote response;
if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
PacketResult::Success) {
if (response.GetChar() != 'F')
return 0;
- uint32_t retcode = response.GetHexMaxU32(false, UINT32_MAX);
- if (retcode == UINT32_MAX)
- return retcode;
+ int64_t retcode = response.GetS64(-1, 16);
+ if (retcode == -1) {
+ error.SetErrorToGenericError();
+ if (response.GetChar() == ',') {
+ int response_errno = gdb_errno_to_system(response.GetS32(-1, 16));
+ if (response_errno > 0)
+ error.SetError(response_errno, lldb::eErrorTypePOSIX);
+ }
+ return -1;
+ }
const char next = (response.Peek() ? *response.Peek() : 0);
if (next == ',')
return 0;
@@ -3153,7 +3228,7 @@ uint64_t GDBRemoteCommunicationClient::WriteFile(lldb::user_id_t fd,
uint64_t src_len,
Status &error) {
lldb_private::StreamGDBRemote stream;
- stream.Printf("vFile:pwrite:%i,%" PRId64 ",", (int)fd, offset);
+ stream.Printf("vFile:pwrite:%x,%" PRIx64 ",", (int)fd, offset);
stream.PutEscapedBytes(src, src_len);
StringExtractorGDBRemote response;
if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
@@ -3162,15 +3237,15 @@ uint64_t GDBRemoteCommunicationClient::WriteFile(lldb::user_id_t fd,
error.SetErrorStringWithFormat("write file failed");
return 0;
}
- uint64_t bytes_written = response.GetU64(UINT64_MAX);
- if (bytes_written == UINT64_MAX) {
+ int64_t bytes_written = response.GetS64(-1, 16);
+ if (bytes_written == -1) {
error.SetErrorToGenericError();
if (response.GetChar() == ',') {
- int response_errno = response.GetS32(-1);
+ int response_errno = gdb_errno_to_system(response.GetS32(-1, 16));
if (response_errno > 0)
error.SetError(response_errno, lldb::eErrorTypePOSIX);
}
- return 0;
+ return -1;
}
return bytes_written;
} else {
@@ -3194,11 +3269,11 @@ Status GDBRemoteCommunicationClient::CreateSymlink(const FileSpec &src,
if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
PacketResult::Success) {
if (response.GetChar() == 'F') {
- uint32_t result = response.GetU32(UINT32_MAX);
+ uint32_t result = response.GetHexMaxU32(false, UINT32_MAX);
if (result != 0) {
error.SetErrorToGenericError();
if (response.GetChar() == ',') {
- int response_errno = response.GetS32(-1);
+ int response_errno = gdb_errno_to_system(response.GetS32(-1, 16));
if (response_errno > 0)
error.SetError(response_errno, lldb::eErrorTypePOSIX);
}
@@ -3225,11 +3300,11 @@ Status GDBRemoteCommunicationClient::Unlink(const FileSpec &file_spec) {
if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
PacketResult::Success) {
if (response.GetChar() == 'F') {
- uint32_t result = response.GetU32(UINT32_MAX);
+ uint32_t result = response.GetHexMaxU32(false, UINT32_MAX);
if (result != 0) {
error.SetErrorToGenericError();
if (response.GetChar() == ',') {
- int response_errno = response.GetS32(-1);
+ int response_errno = gdb_errno_to_system(response.GetS32(-1, 16));
if (response_errno > 0)
error.SetError(response_errno, lldb::eErrorTypePOSIX);
}
@@ -3247,21 +3322,33 @@ Status GDBRemoteCommunicationClient::Unlink(const FileSpec &file_spec) {
// Extension of host I/O packets to get whether a file exists.
bool GDBRemoteCommunicationClient::GetFileExists(
const lldb_private::FileSpec &file_spec) {
- std::string path(file_spec.GetPath(false));
- lldb_private::StreamString stream;
- stream.PutCString("vFile:exists:");
- stream.PutStringAsRawHex8(path);
- StringExtractorGDBRemote response;
- if (SendPacketAndWaitForResponse(stream.GetString(), response) ==
- PacketResult::Success) {
- if (response.GetChar() != 'F')
- return false;
- if (response.GetChar() != ',')
+ if (m_supports_vFileExists) {
+ std::string path(file_spec.GetPath(false));
+ lldb_private::StreamString stream;
+ stream.PutCString("vFile:exists:");
+ stream.PutStringAsRawHex8(path);
+ StringExtractorGDBRemote response;
+ if (SendPacketAndWaitForResponse(stream.GetString(), response) !=
+ PacketResult::Success)
return false;
- bool retcode = (response.GetChar() != '0');
- return retcode;
+ if (!response.IsUnsupportedResponse()) {
+ if (response.GetChar() != 'F')
+ return false;
+ if (response.GetChar() != ',')
+ return false;
+ bool retcode = (response.GetChar() != '0');
+ return retcode;
+ } else
+ m_supports_vFileExists = false;
}
- return false;
+
+ // Fallback to open.
+ Status error;
+ lldb::user_id_t fd = OpenFile(file_spec, File::eOpenOptionReadOnly, 0, error);
+ if (fd == UINT64_MAX)
+ return false;
+ CloseFile(fd, error);
+ return true;
}
bool GDBRemoteCommunicationClient::CalculateMD5(
@@ -3790,15 +3877,14 @@ GDBRemoteCommunicationClient::GetModulesInfo(
// query the target remote for extended information using the qXfer packet
//
-// example: object='features', annex='target.xml', out=<xml output> return:
-// 'true' on success
-// 'false' on failure (err set)
-bool GDBRemoteCommunicationClient::ReadExtFeature(
- const lldb_private::ConstString object,
- const lldb_private::ConstString annex, std::string &out,
- lldb_private::Status &err) {
-
- std::stringstream output;
+// example: object='features', annex='target.xml'
+// return: <xml output> or error
+llvm::Expected<std::string>
+GDBRemoteCommunicationClient::ReadExtFeature(llvm::StringRef object,
+ llvm::StringRef annex) {
+
+ std::string output;
+ llvm::raw_string_ostream output_stream(output);
StringExtractorGDBRemote chunk;
uint64_t size = GetRemoteMaxPacketSize();
@@ -3812,28 +3898,22 @@ bool GDBRemoteCommunicationClient::ReadExtFeature(
while (active) {
// send query extended feature packet
- std::stringstream packet;
- packet << "qXfer:" << object.AsCString("")
- << ":read:" << annex.AsCString("") << ":" << std::hex << offset
- << "," << std::hex << size;
+ std::string packet =
+ ("qXfer:" + object + ":read:" + annex + ":" +
+ llvm::Twine::utohexstr(offset) + "," + llvm::Twine::utohexstr(size))
+ .str();
GDBRemoteCommunication::PacketResult res =
- SendPacketAndWaitForResponse(packet.str(), chunk);
+ SendPacketAndWaitForResponse(packet, chunk);
- if (res != GDBRemoteCommunication::PacketResult::Success) {
- err.SetErrorString("Error sending $qXfer packet");
- return false;
- }
-
- const std::string &str = std::string(chunk.GetStringRef());
- if (str.length() == 0) {
- // should have some data in chunk
- err.SetErrorString("Empty response from $qXfer packet");
- return false;
+ if (res != GDBRemoteCommunication::PacketResult::Success ||
+ chunk.GetStringRef().empty()) {
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "Error sending $qXfer packet");
}
// check packet code
- switch (str[0]) {
+ switch (chunk.GetStringRef()[0]) {
// last chunk
case ('l'):
active = false;
@@ -3841,21 +3921,19 @@ bool GDBRemoteCommunicationClient::ReadExtFeature(
// more chunks
case ('m'):
- if (str.length() > 1)
- output << &str[1];
- offset += str.length() - 1;
+ output_stream << chunk.GetStringRef().drop_front();
+ offset += chunk.GetStringRef().size() - 1;
break;
// unknown chunk
default:
- err.SetErrorString("Invalid continuation code from $qXfer packet");
- return false;
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "Invalid continuation code from $qXfer packet");
}
}
- out = output.str();
- err.Success();
- return true;
+ return output_stream.str();
}
// Notify the target that gdb is prepared to serve symbol lookup requests.
@@ -4146,3 +4224,14 @@ void GDBRemoteCommunicationClient::OnRunPacketSent(bool first) {
GDBRemoteClientBase::OnRunPacketSent(first);
m_curr_tid = LLDB_INVALID_THREAD_ID;
}
+
+bool GDBRemoteCommunicationClient::UsesNativeSignals() {
+ if (m_uses_native_signals == eLazyBoolCalculate)
+ GetRemoteQSupported();
+ if (m_uses_native_signals == eLazyBoolYes)
+ return true;
+
+ // If the remote didn't indicate native-signal support explicitly,
+ // check whether it is an old version of lldb-server.
+ return GetThreadSuffixSupported();
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index 1e1797c10dfc..6765372ce124 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -65,27 +65,6 @@ public:
// we are communicating with it.
bool HandshakeWithServer(Status *error_ptr);
- // For packets which specify a range of output to be returned,
- // return all of the output via a series of request packets of the form
- // <prefix>0,<size>
- // <prefix><size>,<size>
- // <prefix><size>*2,<size>
- // <prefix><size>*3,<size>
- // ...
- // until a "$l..." packet is received, indicating the end.
- // (size is in hex; this format is used by a standard gdbserver to
- // return the given portion of the output specified by <prefix>;
- // for example, "qXfer:libraries-svr4:read::fff,1000" means
- // "return a chunk of the xml description file for shared
- // library load addresses, where the chunk starts at offset 0xfff
- // and continues for 0x1000 bytes").
- // Concatenate the resulting server response packets together and
- // return in response_string. If any packet fails, the return value
- // indicates that failure and the returned string value is undefined.
- PacketResult
- SendPacketsAndConcatenateResponses(const char *send_payload_prefix,
- std::string &response_string);
-
bool GetThreadSuffixSupported();
// This packet is usually sent first and the boolean return value
@@ -147,21 +126,6 @@ public:
int SendLaunchEventDataPacket(const char *data,
bool *was_supported = nullptr);
- /// Sends a "vAttach:PID" where PID is in hex.
- ///
- /// \param[in] pid
- /// A process ID for the remote gdb server to attach to.
- ///
- /// \param[out] response
- /// The response received from the gdb server. If the return
- /// value is zero, \a response will contain a stop reply
- /// packet.
- ///
- /// \return
- /// Zero if the attach was successful, or an error indicating
- /// an error code.
- int SendAttach(lldb::pid_t pid, StringExtractorGDBRemote &response);
-
/// Sends a GDB remote protocol 'I' packet that delivers stdin
/// data to the remote process.
///
@@ -235,7 +199,7 @@ public:
bool DeallocateMemory(lldb::addr_t addr);
- Status Detach(bool keep_stopped);
+ Status Detach(bool keep_stopped, lldb::pid_t pid = LLDB_INVALID_PROCESS_ID);
Status GetMemoryRegionInfo(lldb::addr_t addr, MemoryRegionInfo &range_info);
@@ -275,9 +239,9 @@ public:
llvm::VersionTuple GetMacCatalystVersion();
- bool GetOSBuildString(std::string &s);
+ llvm::Optional<std::string> GetOSBuildString();
- bool GetOSKernelDescription(std::string &s);
+ llvm::Optional<std::string> GetOSKernelDescription();
ArchSpec GetSystemArchitecture();
@@ -330,8 +294,6 @@ public:
uint32_t length, // Byte Size of breakpoint or watchpoint
std::chrono::seconds interrupt_timeout); // Time to wait for an interrupt
- bool SetNonStopMode(const bool enable);
-
void TestPacketSpeed(const uint32_t num_packets, uint32_t max_send,
uint32_t max_recv, uint64_t recv_amount, bool json,
Stream &strm);
@@ -391,6 +353,12 @@ public:
bool CloseFile(lldb::user_id_t fd, Status &error);
+ llvm::Optional<GDBRemoteFStatData> FStat(lldb::user_id_t fd);
+
+ // NB: this is just a convenience wrapper over open() + fstat(). It does not
+ // work if the file cannot be opened.
+ llvm::Optional<GDBRemoteFStatData> Stat(const FileSpec &file_spec);
+
lldb::user_id_t GetFileSize(const FileSpec &file_spec);
void AutoCompleteDiskFileOrDirectory(CompletionRequest &request,
@@ -465,6 +433,8 @@ public:
bool GetMemoryTaggingSupported();
+ bool UsesNativeSignals();
+
lldb::DataBufferSP ReadMemoryTags(lldb::addr_t addr, size_t len,
int32_t type);
@@ -483,9 +453,8 @@ public:
GetModulesInfo(llvm::ArrayRef<FileSpec> module_file_specs,
const llvm::Triple &triple);
- bool ReadExtFeature(const lldb_private::ConstString object,
- const lldb_private::ConstString annex, std::string &out,
- lldb_private::Status &err);
+ llvm::Expected<std::string> ReadExtFeature(llvm::StringRef object,
+ llvm::StringRef annex);
void ServeSymbolLookups(lldb_private::Process *process);
@@ -547,6 +516,8 @@ public:
SendTraceGetBinaryData(const TraceGetBinaryDataRequest &request,
std::chrono::seconds interrupt_timeout);
+ bool GetSaveCoreSupported() const;
+
protected:
LazyBool m_supports_not_sending_acks = eLazyBoolCalculate;
LazyBool m_supports_thread_suffix = eLazyBoolCalculate;
@@ -585,6 +556,8 @@ protected:
LazyBool m_supports_error_string_reply = eLazyBoolCalculate;
LazyBool m_supports_multiprocess = eLazyBoolCalculate;
LazyBool m_supports_memory_tagging = eLazyBoolCalculate;
+ LazyBool m_supports_qSaveCore = eLazyBoolCalculate;
+ LazyBool m_uses_native_signals = eLazyBoolCalculate;
bool m_supports_qProcessInfoPID : 1, m_supports_qfProcessInfo : 1,
m_supports_qUserName : 1, m_supports_qGroupName : 1,
@@ -593,7 +566,9 @@ protected:
m_supports_QEnvironment : 1, m_supports_QEnvironmentHexEncoded : 1,
m_supports_qSymbol : 1, m_qSymbol_requests_done : 1,
m_supports_qModuleInfo : 1, m_supports_jThreadsInfo : 1,
- m_supports_jModulesInfo : 1;
+ m_supports_jModulesInfo : 1, m_supports_vFileSize : 1,
+ m_supports_vFileMode : 1, m_supports_vFileExists : 1,
+ m_supports_vRun : 1;
/// Current gdb remote protocol process identifier for all other operations
lldb::pid_t m_curr_pid = LLDB_INVALID_PROCESS_ID;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp
index b2b802552720..f371649842e8 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.cpp
@@ -157,6 +157,9 @@ GDBRemoteCommunicationServerCommon::GDBRemoteCommunicationServerCommon(
StringExtractorGDBRemote::eServerPacketType_vFile_size,
&GDBRemoteCommunicationServerCommon::Handle_vFile_Size);
RegisterMemberFunctionHandler(
+ StringExtractorGDBRemote::eServerPacketType_vFile_fstat,
+ &GDBRemoteCommunicationServerCommon::Handle_vFile_FStat);
+ RegisterMemberFunctionHandler(
StringExtractorGDBRemote::eServerPacketType_vFile_stat,
&GDBRemoteCommunicationServerCommon::Handle_vFile_Stat);
RegisterMemberFunctionHandler(
@@ -264,18 +267,18 @@ GDBRemoteCommunicationServerCommon::Handle_qHostInfo(
}
#endif
- std::string s;
- if (HostInfo::GetOSBuildString(s)) {
+ if (llvm::Optional<std::string> s = HostInfo::GetOSBuildString()) {
response.PutCString("os_build:");
- response.PutStringAsRawHex8(s);
+ response.PutStringAsRawHex8(*s);
response.PutChar(';');
}
- if (HostInfo::GetOSKernelDescription(s)) {
+ if (llvm::Optional<std::string> s = HostInfo::GetOSKernelDescription()) {
response.PutCString("os_kernel:");
- response.PutStringAsRawHex8(s);
+ response.PutStringAsRawHex8(*s);
response.PutChar(';');
}
+ std::string s;
#if defined(__APPLE__)
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
@@ -501,10 +504,6 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open(
packet.GetHexByteStringTerminatedBy(path, ',');
if (!path.empty()) {
if (packet.GetChar() == ',') {
- // FIXME
- // The flag values for OpenOptions do not match the values used by GDB
- // * https://sourceware.org/gdb/onlinedocs/gdb/Open-Flags.html#Open-Flags
- // * rdar://problem/46788934
auto flags = File::OpenOptions(packet.GetHexMaxU32(false, 0));
if (packet.GetChar() == ',') {
mode_t mode = packet.GetHexMaxU32(false, 0600);
@@ -513,22 +512,21 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open(
// Do not close fd.
auto file = FileSystem::Instance().Open(path_spec, flags, mode, false);
- int save_errno = 0;
+ StreamString response;
+ response.PutChar('F');
+
int descriptor = File::kInvalidDescriptor;
if (file) {
descriptor = file.get()->GetDescriptor();
+ response.Printf("%x", descriptor);
} else {
+ response.PutCString("-1");
std::error_code code = errorToErrorCode(file.takeError());
if (code.category() == std::system_category()) {
- save_errno = code.value();
+ response.Printf(",%x", code.value());
}
}
- StreamString response;
- response.PutChar('F');
- response.Printf("%i", descriptor);
- if (save_errno)
- response.Printf(",%i", save_errno);
return SendPacketNoLock(response.GetString());
}
}
@@ -536,11 +534,22 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Open(
return SendErrorResponse(18);
}
+static GDBErrno system_errno_to_gdb(int err) {
+ switch (err) {
+#define HANDLE_ERRNO(name, value) \
+ case name: \
+ return GDB_##name;
+#include "Plugins/Process/gdb-remote/GDBRemoteErrno.def"
+ default:
+ return GDB_EUNKNOWN;
+ }
+}
+
GDBRemoteCommunication::PacketResult
GDBRemoteCommunicationServerCommon::Handle_vFile_Close(
StringExtractorGDBRemote &packet) {
packet.SetFilePos(::strlen("vFile:close:"));
- int fd = packet.GetS32(-1);
+ int fd = packet.GetS32(-1, 16);
int err = -1;
int save_errno = 0;
if (fd >= 0) {
@@ -553,9 +562,9 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Close(
}
StreamString response;
response.PutChar('F');
- response.Printf("%i", err);
+ response.Printf("%x", err);
if (save_errno)
- response.Printf(",%i", save_errno);
+ response.Printf(",%x", system_errno_to_gdb(save_errno));
return SendPacketNoLock(response.GetString());
}
@@ -564,28 +573,29 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_pRead(
StringExtractorGDBRemote &packet) {
StreamGDBRemote response;
packet.SetFilePos(::strlen("vFile:pread:"));
- int fd = packet.GetS32(-1);
+ int fd = packet.GetS32(-1, 16);
if (packet.GetChar() == ',') {
- size_t count = packet.GetU64(SIZE_MAX);
+ size_t count = packet.GetHexMaxU64(false, SIZE_MAX);
if (packet.GetChar() == ',') {
- off_t offset = packet.GetU64(UINT32_MAX);
+ off_t offset = packet.GetHexMaxU32(false, UINT32_MAX);
if (count == SIZE_MAX) {
- response.Printf("F-1:%i", EINVAL);
+ response.Printf("F-1:%x", EINVAL);
return SendPacketNoLock(response.GetString());
}
std::string buffer(count, 0);
- NativeFile file(fd, File::eOpenOptionRead, false);
+ NativeFile file(fd, File::eOpenOptionReadOnly, false);
Status error = file.Read(static_cast<void *>(&buffer[0]), count, offset);
- const ssize_t bytes_read = error.Success() ? count : -1;
const int save_errno = error.GetError();
response.PutChar('F');
- response.Printf("%zi", bytes_read);
- if (save_errno)
- response.Printf(",%i", save_errno);
- else {
+ if (error.Success()) {
+ response.Printf("%zx", count);
response.PutChar(';');
- response.PutEscapedBytes(&buffer[0], bytes_read);
+ response.PutEscapedBytes(&buffer[0], count);
+ } else {
+ response.PutCString("-1");
+ if (save_errno)
+ response.Printf(",%x", system_errno_to_gdb(save_errno));
}
return SendPacketNoLock(response.GetString());
}
@@ -601,23 +611,26 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_pWrite(
StreamGDBRemote response;
response.PutChar('F');
- int fd = packet.GetU32(UINT32_MAX);
+ int fd = packet.GetS32(-1, 16);
if (packet.GetChar() == ',') {
- off_t offset = packet.GetU64(UINT32_MAX);
+ off_t offset = packet.GetHexMaxU32(false, UINT32_MAX);
if (packet.GetChar() == ',') {
std::string buffer;
if (packet.GetEscapedBinaryData(buffer)) {
- NativeFile file(fd, File::eOpenOptionWrite, false);
+ NativeFile file(fd, File::eOpenOptionWriteOnly, false);
size_t count = buffer.size();
Status error =
file.Write(static_cast<const void *>(&buffer[0]), count, offset);
- const ssize_t bytes_written = error.Success() ? count : -1;
const int save_errno = error.GetError();
- response.Printf("%zi", bytes_written);
- if (save_errno)
- response.Printf(",%i", save_errno);
+ if (error.Success())
+ response.Printf("%zx", count);
+ else {
+ response.PutCString("-1");
+ if (save_errno)
+ response.Printf(",%x", system_errno_to_gdb(save_errno));
+ }
} else {
- response.Printf("-1,%i", EINVAL);
+ response.Printf("-1,%x", EINVAL);
}
return SendPacketNoLock(response.GetString());
}
@@ -659,9 +672,10 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_Mode(
std::error_code ec;
const uint32_t mode = FileSystem::Instance().GetPermissions(file_spec, ec);
StreamString response;
- response.Printf("F%u", mode);
- if (mode == 0 || ec)
- response.Printf(",%i", (int)Status(ec).GetError());
+ if (mode != llvm::sys::fs::perms_not_known)
+ response.Printf("F%x", mode);
+ else
+ response.Printf("F-1,%x", (int)Status(ec).GetError());
return SendPacketNoLock(response.GetString());
}
return SendErrorResponse(23);
@@ -701,7 +715,7 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_symlink(
Status error = FileSystem::Instance().Symlink(src_spec, FileSpec(dst));
StreamString response;
- response.Printf("F%u,%u", error.GetError(), error.GetError());
+ response.Printf("F%x,%x", error.GetError(), error.GetError());
return SendPacketNoLock(response.GetString());
}
@@ -713,7 +727,7 @@ GDBRemoteCommunicationServerCommon::Handle_vFile_unlink(
packet.GetHexByteString(path);
Status error(llvm::sys::fs::remove(path));
StreamString response;
- response.Printf("F%u,%u", error.GetError(), error.GetError());
+ response.Printf("F%x,%x", error.GetError(), error.GetError());
return SendPacketNoLock(response.GetString());
}
@@ -755,6 +769,54 @@ GDBRemoteCommunicationServerCommon::Handle_qPlatform_shell(
return SendErrorResponse(24);
}
+template <typename T, typename U>
+static void fill_clamp(T &dest, U src, typename T::value_type fallback) {
+ static_assert(std::is_unsigned<typename T::value_type>::value,
+ "Destination type must be unsigned.");
+ using UU = typename std::make_unsigned<U>::type;
+ constexpr auto T_max = std::numeric_limits<typename T::value_type>::max();
+ dest = src >= 0 && static_cast<UU>(src) <= T_max ? src : fallback;
+}
+
+GDBRemoteCommunication::PacketResult
+GDBRemoteCommunicationServerCommon::Handle_vFile_FStat(
+ StringExtractorGDBRemote &packet) {
+ StreamGDBRemote response;
+ packet.SetFilePos(::strlen("vFile:fstat:"));
+ int fd = packet.GetS32(-1, 16);
+
+ struct stat file_stats;
+ if (::fstat(fd, &file_stats) == -1) {
+ const int save_errno = errno;
+ response.Printf("F-1,%x", system_errno_to_gdb(save_errno));
+ return SendPacketNoLock(response.GetString());
+ }
+
+ GDBRemoteFStatData data;
+ fill_clamp(data.gdb_st_dev, file_stats.st_dev, 0);
+ fill_clamp(data.gdb_st_ino, file_stats.st_ino, 0);
+ data.gdb_st_mode = file_stats.st_mode;
+ fill_clamp(data.gdb_st_nlink, file_stats.st_nlink, UINT32_MAX);
+ fill_clamp(data.gdb_st_uid, file_stats.st_uid, 0);
+ fill_clamp(data.gdb_st_gid, file_stats.st_gid, 0);
+ fill_clamp(data.gdb_st_rdev, file_stats.st_rdev, 0);
+ data.gdb_st_size = file_stats.st_size;
+#if !defined(_WIN32)
+ data.gdb_st_blksize = file_stats.st_blksize;
+ data.gdb_st_blocks = file_stats.st_blocks;
+#else
+ data.gdb_st_blksize = 0;
+ data.gdb_st_blocks = 0;
+#endif
+ fill_clamp(data.gdb_st_atime, file_stats.st_atime, 0);
+ fill_clamp(data.gdb_st_mtime, file_stats.st_mtime, 0);
+ fill_clamp(data.gdb_st_ctime, file_stats.st_ctime, 0);
+
+ response.Printf("F%zx;", sizeof(data));
+ response.PutEscapedBytes(&data, sizeof(data));
+ return SendPacketNoLock(response.GetString());
+}
+
GDBRemoteCommunication::PacketResult
GDBRemoteCommunicationServerCommon::Handle_vFile_Stat(
StringExtractorGDBRemote &packet) {
@@ -795,7 +857,7 @@ GDBRemoteCommunicationServerCommon::Handle_qPlatform_mkdir(
Status error(llvm::sys::fs::create_directory(path, mode));
StreamGDBRemote response;
- response.Printf("F%u", error.GetError());
+ response.Printf("F%x", error.GetError());
return SendPacketNoLock(response.GetString());
}
@@ -815,7 +877,7 @@ GDBRemoteCommunicationServerCommon::Handle_qPlatform_chmod(
Status error(llvm::sys::fs::setPermissions(path, perms));
StreamGDBRemote response;
- response.Printf("F%u", error.GetError());
+ response.Printf("F%x", error.GetError());
return SendPacketNoLock(response.GetString());
}
@@ -1287,5 +1349,6 @@ std::vector<std::string> GDBRemoteCommunicationServerCommon::HandleFeatures(
llvm::formatv("PacketSize={0}", max_packet_size),
"QStartNoAckMode+",
"qEcho+",
+ "native-signals+",
};
}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h
index ecd80923fcf0..029972348ef0 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h
@@ -71,6 +71,8 @@ protected:
PacketResult Handle_vFile_unlink(StringExtractorGDBRemote &packet);
+ PacketResult Handle_vFile_FStat(StringExtractorGDBRemote &packet);
+
PacketResult Handle_vFile_Stat(StringExtractorGDBRemote &packet);
PacketResult Handle_vFile_MD5(StringExtractorGDBRemote &packet);
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index 5e69b5793f9f..5360db3d8462 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -25,6 +25,7 @@
#include "lldb/Host/Host.h"
#include "lldb/Host/HostInfo.h"
#include "lldb/Host/PosixApi.h"
+#include "lldb/Host/Socket.h"
#include "lldb/Host/common/NativeProcessProtocol.h"
#include "lldb/Host/common/NativeRegisterContext.h"
#include "lldb/Host/common/NativeThreadProtocol.h"
@@ -183,6 +184,9 @@ void GDBRemoteCommunicationServerLLGS::RegisterPacketHandlers() {
StringExtractorGDBRemote::eServerPacketType_vCont_actions,
&GDBRemoteCommunicationServerLLGS::Handle_vCont_actions);
RegisterMemberFunctionHandler(
+ StringExtractorGDBRemote::eServerPacketType_vRun,
+ &GDBRemoteCommunicationServerLLGS::Handle_vRun);
+ RegisterMemberFunctionHandler(
StringExtractorGDBRemote::eServerPacketType_x,
&GDBRemoteCommunicationServerLLGS::Handle_memory_read);
RegisterMemberFunctionHandler(StringExtractorGDBRemote::eServerPacketType_Z,
@@ -226,6 +230,10 @@ void GDBRemoteCommunicationServerLLGS::RegisterPacketHandlers() {
quit = true;
return this->Handle_k(packet);
});
+
+ RegisterMemberFunctionHandler(
+ StringExtractorGDBRemote::eServerPacketType_qLLDBSaveCore,
+ &GDBRemoteCommunicationServerLLGS::Handle_qSaveCore);
}
void GDBRemoteCommunicationServerLLGS::SetLaunchInfo(const ProcessLaunchInfo &info) {
@@ -278,7 +286,7 @@ Status GDBRemoteCommunicationServerLLGS::LaunchProcess() {
if (should_forward_stdio) {
// nullptr means it's not redirected to file or pty (in case of LLGS local)
// at least one of stdio will be transferred pty<->gdb-remote we need to
- // give the pty master handle to this object to read and/or write
+ // give the pty primary handle to this object to read and/or write
LLDB_LOG(log,
"pid = {0}: setting up stdout/stderr redirection via $O "
"gdb-remote commands",
@@ -331,7 +339,7 @@ Status GDBRemoteCommunicationServerLLGS::AttachToProcess(lldb::pid_t pid) {
auto process_or = m_process_factory.Attach(pid, *this, m_mainloop);
if (!process_or) {
Status status(process_or.takeError());
- llvm::errs() << llvm::formatv("failed to attach to process {0}: {1}", pid,
+ llvm::errs() << llvm::formatv("failed to attach to process {0}: {1}\n", pid,
status);
return status;
}
@@ -1820,13 +1828,6 @@ GDBRemoteCommunicationServerLLGS::Handle_qRegisterInfo(
response.PutChar(';');
}
- if (reg_info->dynamic_size_dwarf_expr_bytes) {
- const size_t dwarf_opcode_len = reg_info->dynamic_size_dwarf_len;
- response.PutCString("dynamic_size_dwarf_expr_bytes:");
- for (uint32_t i = 0; i < dwarf_opcode_len; ++i)
- response.PutHex8(reg_info->dynamic_size_dwarf_expr_bytes[i]);
- response.PutChar(';');
- }
return SendPacketNoLock(response.GetString());
}
@@ -2064,12 +2065,8 @@ GDBRemoteCommunicationServerLLGS::Handle_P(StringExtractorGDBRemote &packet) {
return SendErrorResponse(0x47);
}
- // The dwarf expression are evaluate on host site which may cause register
- // size to change Hence the reg_size may not be same as reg_info->bytes_size
- if ((reg_size != reg_info->byte_size) &&
- !(reg_info->dynamic_size_dwarf_expr_bytes)) {
+ if (reg_size != reg_info->byte_size)
return SendIllFormedResponse(packet, "P packet register size is incorrect");
- }
// Build the reginfos response.
StreamGDBRemote response;
@@ -2909,14 +2906,6 @@ GDBRemoteCommunicationServerLLGS::BuildTargetXml() {
response.Printf("\" ");
}
- if (reg_info->dynamic_size_dwarf_expr_bytes) {
- const size_t dwarf_opcode_len = reg_info->dynamic_size_dwarf_len;
- response.PutCString("dynamic_size_dwarf_expr_bytes=\"");
- for (uint32_t i = 0; i < dwarf_opcode_len; ++i)
- response.PutHex8(reg_info->dynamic_size_dwarf_expr_bytes[i]);
- response.Printf("\" ");
- }
-
response.Printf("/>");
}
@@ -3252,6 +3241,38 @@ GDBRemoteCommunicationServerLLGS::Handle_vAttachOrWait(
}
GDBRemoteCommunication::PacketResult
+GDBRemoteCommunicationServerLLGS::Handle_vRun(
+ StringExtractorGDBRemote &packet) {
+ Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS));
+
+ llvm::StringRef s = packet.GetStringRef();
+ if (!s.consume_front("vRun;"))
+ return SendErrorResponse(8);
+
+ llvm::SmallVector<llvm::StringRef, 16> argv;
+ s.split(argv, ';');
+
+ for (llvm::StringRef hex_arg : argv) {
+ StringExtractor arg_ext{hex_arg};
+ std::string arg;
+ arg_ext.GetHexByteString(arg);
+ m_process_launch_info.GetArguments().AppendArgument(arg);
+ LLDB_LOGF(log, "LLGSPacketHandler::%s added arg: \"%s\"", __FUNCTION__,
+ arg.c_str());
+ }
+
+ if (!argv.empty()) {
+ m_process_launch_info.GetExecutableFile().SetFile(
+ m_process_launch_info.GetArguments()[0].ref(), FileSpec::Style::native);
+ m_process_launch_error = LaunchProcess();
+ if (m_process_launch_error.Success())
+ return SendStopReasonForState(m_current_process->GetState());
+ LLDB_LOG(log, "failed to launch exe: {0}", m_process_launch_error);
+ }
+ return SendErrorResponse(8);
+}
+
+GDBRemoteCommunication::PacketResult
GDBRemoteCommunicationServerLLGS::Handle_D(StringExtractorGDBRemote &packet) {
StopSTDIOForwarding();
@@ -3474,15 +3495,31 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemTags(
if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':')
return SendIllFormedResponse(packet, invalid_type_err);
- int32_t type =
- packet.GetS32(std::numeric_limits<int32_t>::max(), /*base=*/16);
- if (type == std::numeric_limits<int32_t>::max() ||
+ // Type is a signed integer but packed into the packet as its raw bytes.
+ // However, our GetU64 uses strtoull which allows +/-. We do not want this.
+ const char *first_type_char = packet.Peek();
+ if (first_type_char && (*first_type_char == '+' || *first_type_char == '-'))
+ return SendIllFormedResponse(packet, invalid_type_err);
+
+ // Extract type as unsigned then cast to signed.
+ // Using a uint64_t here so that we have some value outside of the 32 bit
+ // range to use as the invalid return value.
+ uint64_t raw_type =
+ packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16);
+
+ if ( // Make sure the cast below would be valid
+ raw_type > std::numeric_limits<uint32_t>::max() ||
// To catch inputs like "123aardvark" that will parse but clearly aren't
// valid in this case.
packet.GetBytesLeft()) {
return SendIllFormedResponse(packet, invalid_type_err);
}
+ // First narrow to 32 bits otherwise the copy into type would take
+ // the wrong 4 bytes on big endian.
+ uint32_t raw_type_32 = raw_type;
+ int32_t type = reinterpret_cast<int32_t &>(raw_type_32);
+
StreamGDBRemote response;
std::vector<uint8_t> tags;
Status error = m_current_process->ReadMemoryTags(type, addr, length, tags);
@@ -3552,7 +3589,11 @@ GDBRemoteCommunicationServerLLGS::Handle_QMemTags(
packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16);
if (raw_type > std::numeric_limits<uint32_t>::max())
return SendIllFormedResponse(packet, invalid_type_err);
- int32_t type = static_cast<int32_t>(raw_type);
+
+ // First narrow to 32 bits. Otherwise the copy below would get the wrong
+ // 4 bytes on big endian.
+ uint32_t raw_type_32 = raw_type;
+ int32_t type = reinterpret_cast<int32_t &>(raw_type_32);
// Tag data
if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':')
@@ -3584,6 +3625,37 @@ GDBRemoteCommunicationServerLLGS::Handle_QMemTags(
return status.Success() ? SendOKResponse() : SendErrorResponse(1);
}
+GDBRemoteCommunication::PacketResult
+GDBRemoteCommunicationServerLLGS::Handle_qSaveCore(
+ StringExtractorGDBRemote &packet) {
+ // Fail if we don't have a current process.
+ if (!m_current_process ||
+ (m_current_process->GetID() == LLDB_INVALID_PROCESS_ID))
+ return SendErrorResponse(Status("Process not running."));
+
+ std::string path_hint;
+
+ StringRef packet_str{packet.GetStringRef()};
+ assert(packet_str.startswith("qSaveCore"));
+ if (packet_str.consume_front("qSaveCore;")) {
+ for (auto x : llvm::split(packet_str, ';')) {
+ if (x.consume_front("path-hint:"))
+ StringExtractor(x).GetHexByteString(path_hint);
+ else
+ return SendErrorResponse(Status("Unsupported qSaveCore option"));
+ }
+ }
+
+ llvm::Expected<std::string> ret = m_current_process->SaveCore(path_hint);
+ if (!ret)
+ return SendErrorResponse(ret.takeError());
+
+ StreamString response;
+ response.PutCString("core-path:");
+ response.PutStringAsRawHex8(ret.get());
+ return SendPacketNoLock(response.GetString());
+}
+
void GDBRemoteCommunicationServerLLGS::MaybeCloseInferiorTerminalConnection() {
Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS));
@@ -3780,6 +3852,8 @@ std::vector<std::string> GDBRemoteCommunicationServerLLGS::HandleFeatures(
ret.push_back("qXfer:libraries-svr4:read+");
if (bool(plugin_features & Extension::memory_tagging))
ret.push_back("memory-tagging+");
+ if (bool(plugin_features & Extension::savecore))
+ ret.push_back("qSaveCore+");
// check for client features
m_extensions_supported = {};
@@ -3816,3 +3890,38 @@ void GDBRemoteCommunicationServerLLGS::SetEnabledExtensions(
assert(!bool(flags & ~m_process_factory.GetSupportedExtensions()));
process.SetEnabledExtensions(flags);
}
+
+std::string
+lldb_private::process_gdb_remote::LLGSArgToURL(llvm::StringRef url_arg,
+ bool reverse_connect) {
+ // Try parsing the argument as URL.
+ if (llvm::Optional<URI> url = URI::Parse(url_arg)) {
+ if (reverse_connect)
+ return url_arg.str();
+
+ // Translate the scheme from LLGS notation to ConnectionFileDescriptor.
+ // If the scheme doesn't match any, pass it through to support using CFD
+ // schemes directly.
+ std::string new_url = llvm::StringSwitch<std::string>(url->scheme)
+ .Case("tcp", "listen")
+ .Case("unix", "unix-accept")
+ .Case("unix-abstract", "unix-abstract-accept")
+ .Default(url->scheme.str());
+ llvm::append_range(new_url, url_arg.substr(url->scheme.size()));
+ return new_url;
+ }
+
+ std::string host_port = url_arg.str();
+ // If host_and_port starts with ':', default the host to be "localhost" and
+ // expect the remainder to be the port.
+ if (url_arg.startswith(":"))
+ host_port.insert(0, "localhost");
+
+ // Try parsing the (preprocessed) argument as host:port pair.
+ if (!llvm::errorToBool(Socket::DecodeHostAndPort(host_port).takeError()))
+ return (reverse_connect ? "connect://" : "listen://") + host_port;
+
+ // If none of the above applied, interpret the argument as UNIX socket path.
+ return (reverse_connect ? "unix-connect://" : "unix-accept://") +
+ url_arg.str();
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
index 04d0605fe420..6c75771f6427 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
@@ -202,6 +202,8 @@ protected:
PacketResult Handle_vAttachOrWait(StringExtractorGDBRemote &packet);
+ PacketResult Handle_vRun(StringExtractorGDBRemote &packet);
+
PacketResult Handle_D(StringExtractorGDBRemote &packet);
PacketResult Handle_qThreadStopInfo(StringExtractorGDBRemote &packet);
@@ -214,6 +216,8 @@ protected:
PacketResult Handle_QPassSignals(StringExtractorGDBRemote &packet);
+ PacketResult Handle_qSaveCore(StringExtractorGDBRemote &packet);
+
PacketResult Handle_g(StringExtractorGDBRemote &packet);
PacketResult Handle_qMemTags(StringExtractorGDBRemote &packet);
@@ -285,6 +289,8 @@ private:
operator=(const GDBRemoteCommunicationServerLLGS &) = delete;
};
+std::string LLGSArgToURL(llvm::StringRef url_arg, bool reverse_connect);
+
} // namespace process_gdb_remote
} // namespace lldb_private
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
index 7c2f80dc76b8..a63b98edec55 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
@@ -197,16 +197,9 @@ Status GDBRemoteCommunicationServerPlatform::LaunchGDBServer(
#endif
uint16_t *port_ptr = port.getPointer();
if (m_socket_protocol == Socket::ProtocolTcp) {
- llvm::StringRef platform_scheme;
- llvm::StringRef platform_ip;
- int platform_port;
- llvm::StringRef platform_path;
std::string platform_uri = GetConnection()->GetURI();
- bool ok = UriParser::Parse(platform_uri, platform_scheme, platform_ip,
- platform_port, platform_path);
- UNUSED_IF_ASSERT_DISABLED(ok);
- assert(ok);
- url << '[' << platform_ip.str() << "]:" << *port;
+ llvm::Optional<URI> parsed_uri = URI::Parse(platform_uri);
+ url << '[' << parsed_uri->hostname.str() << "]:" << *port;
} else {
socket_name = GetDomainSocketPath("gdbserver").GetPath();
url << socket_name;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteErrno.def b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteErrno.def
new file mode 100644
index 000000000000..e26d23fdad0c
--- /dev/null
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteErrno.def
@@ -0,0 +1,39 @@
+//===-- GDBRemoteErrno.def --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+// HANDLE_ERRNO(name, value)
+#ifndef HANDLE_ERRNO
+#error "HANDLE_ERRNO must be defined"
+#endif
+
+// from gdb's include/gdb/fileio.h
+HANDLE_ERRNO(EPERM, 1)
+HANDLE_ERRNO(ENOENT, 2)
+HANDLE_ERRNO(EINTR, 4)
+HANDLE_ERRNO(EIO, 5)
+HANDLE_ERRNO(EBADF, 9)
+HANDLE_ERRNO(EACCES, 13)
+HANDLE_ERRNO(EFAULT, 14)
+HANDLE_ERRNO(EBUSY, 16)
+HANDLE_ERRNO(EEXIST, 17)
+HANDLE_ERRNO(ENODEV, 19)
+HANDLE_ERRNO(ENOTDIR, 20)
+HANDLE_ERRNO(EISDIR, 21)
+HANDLE_ERRNO(EINVAL, 22)
+HANDLE_ERRNO(ENFILE, 23)
+HANDLE_ERRNO(EMFILE, 24)
+HANDLE_ERRNO(EFBIG, 27)
+HANDLE_ERRNO(ENOSPC, 28)
+HANDLE_ERRNO(ESPIPE, 29)
+HANDLE_ERRNO(EROFS, 30)
+HANDLE_ERRNO(ENOSYS, 88)
+HANDLE_ERRNO(ENAMETOOLONG, 91)
+
+#undef HANDLE_ERRNO
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
index 65cf9fb2a834..9410c9bd83ec 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
@@ -69,14 +69,7 @@ size_t GDBRemoteRegisterContext::GetRegisterCount() {
const RegisterInfo *
GDBRemoteRegisterContext::GetRegisterInfoAtIndex(size_t reg) {
- RegisterInfo *reg_info = m_reg_info_sp->GetRegisterInfoAtIndex(reg);
-
- if (reg_info && reg_info->dynamic_size_dwarf_expr_bytes) {
- const ArchSpec &arch = m_thread.GetProcess()->GetTarget().GetArchitecture();
- uint8_t reg_size = UpdateDynamicRegisterSize(arch, reg_info);
- reg_info->byte_size = reg_size;
- }
- return reg_info;
+ return m_reg_info_sp->GetRegisterInfoAtIndex(reg);
}
size_t GDBRemoteRegisterContext::GetRegisterSetCount() {
@@ -90,14 +83,38 @@ const RegisterSet *GDBRemoteRegisterContext::GetRegisterSet(size_t reg_set) {
bool GDBRemoteRegisterContext::ReadRegister(const RegisterInfo *reg_info,
RegisterValue &value) {
// Read the register
- if (ReadRegisterBytes(reg_info, m_reg_data)) {
+ if (ReadRegisterBytes(reg_info)) {
const uint32_t reg = reg_info->kinds[eRegisterKindLLDB];
if (m_reg_valid[reg] == false)
return false;
- const bool partial_data_ok = false;
- Status error(value.SetValueFromData(
- reg_info, m_reg_data, reg_info->byte_offset, partial_data_ok));
- return error.Success();
+ if (reg_info->value_regs &&
+ reg_info->value_regs[0] != LLDB_INVALID_REGNUM &&
+ reg_info->value_regs[1] != LLDB_INVALID_REGNUM) {
+ std::vector<char> combined_data;
+ uint32_t offset = 0;
+ for (int i = 0; reg_info->value_regs[i] != LLDB_INVALID_REGNUM; i++) {
+ const RegisterInfo *parent_reg = GetRegisterInfo(
+ eRegisterKindLLDB, reg_info->value_regs[i]);
+ if (!parent_reg)
+ return false;
+ combined_data.resize(offset + parent_reg->byte_size);
+ if (m_reg_data.CopyData(parent_reg->byte_offset, parent_reg->byte_size,
+ combined_data.data() + offset) !=
+ parent_reg->byte_size)
+ return false;
+ offset += parent_reg->byte_size;
+ }
+
+ Status error;
+ return value.SetFromMemoryData(
+ reg_info, combined_data.data(), combined_data.size(),
+ m_reg_data.GetByteOrder(), error) == combined_data.size();
+ } else {
+ const bool partial_data_ok = false;
+ Status error(value.SetValueFromData(
+ reg_info, m_reg_data, reg_info->byte_offset, partial_data_ok));
+ return error.Success();
+ }
}
return false;
}
@@ -184,8 +201,7 @@ bool GDBRemoteRegisterContext::GetPrimordialRegister(
return false;
}
-bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info,
- DataExtractor &data) {
+bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info) {
ExecutionContext exe_ctx(CalculateThread());
Process *process = exe_ctx.GetProcessPtr();
@@ -211,16 +227,11 @@ bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info,
SetAllRegisterValid(true);
return true;
} else if (buffer_sp->GetByteSize() > 0) {
- const int regcount = m_reg_info_sp->GetNumRegisters();
- for (int i = 0; i < regcount; i++) {
- struct RegisterInfo *reginfo =
- m_reg_info_sp->GetRegisterInfoAtIndex(i);
- if (reginfo->byte_offset + reginfo->byte_size <=
- buffer_sp->GetByteSize()) {
- m_reg_valid[i] = true;
- } else {
- m_reg_valid[i] = false;
- }
+ for (auto x : llvm::enumerate(m_reg_info_sp->registers())) {
+ const struct RegisterInfo &reginfo = x.value();
+ m_reg_valid[x.index()] =
+ (reginfo.byte_offset + reginfo.byte_size <=
+ buffer_sp->GetByteSize());
}
m_gpacket_cached = true;
@@ -254,7 +265,7 @@ bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info,
// We have a valid primordial register as our constituent. Grab the
// corresponding register info.
const RegisterInfo *prim_reg_info =
- GetRegisterInfo(eRegisterKindProcessPlugin, prim_reg);
+ GetRegisterInfo(eRegisterKindLLDB, prim_reg);
if (prim_reg_info == nullptr)
success = false;
else {
@@ -279,30 +290,44 @@ bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info,
return false;
}
- if (&data != &m_reg_data) {
- assert(m_reg_data.GetByteSize() >=
- reg_info->byte_offset + reg_info->byte_size);
- // If our register context and our register info disagree, which should
- // never happen, don't read past the end of the buffer.
- if (m_reg_data.GetByteSize() < reg_info->byte_offset + reg_info->byte_size)
- return false;
-
- // If we aren't extracting into our own buffer (which only happens when
- // this function is called from ReadRegisterValue(uint32_t, Scalar&)) then
- // we transfer bytes from our buffer into the data buffer that was passed
- // in
-
- data.SetByteOrder(m_reg_data.GetByteOrder());
- data.SetData(m_reg_data, reg_info->byte_offset, reg_info->byte_size);
- }
return true;
}
bool GDBRemoteRegisterContext::WriteRegister(const RegisterInfo *reg_info,
const RegisterValue &value) {
DataExtractor data;
- if (value.GetData(data))
- return WriteRegisterBytes(reg_info, data, 0);
+ if (value.GetData(data)) {
+ if (reg_info->value_regs &&
+ reg_info->value_regs[0] != LLDB_INVALID_REGNUM &&
+ reg_info->value_regs[1] != LLDB_INVALID_REGNUM) {
+ uint32_t combined_size = 0;
+ for (int i = 0; reg_info->value_regs[i] != LLDB_INVALID_REGNUM; i++) {
+ const RegisterInfo *parent_reg = GetRegisterInfo(
+ eRegisterKindLLDB, reg_info->value_regs[i]);
+ if (!parent_reg)
+ return false;
+ combined_size += parent_reg->byte_size;
+ }
+
+ if (data.GetByteSize() < combined_size)
+ return false;
+
+ uint32_t offset = 0;
+ for (int i = 0; reg_info->value_regs[i] != LLDB_INVALID_REGNUM; i++) {
+ const RegisterInfo *parent_reg = GetRegisterInfo(
+ eRegisterKindLLDB, reg_info->value_regs[i]);
+ assert(parent_reg);
+
+ DataExtractor parent_data{data, offset, parent_reg->byte_size};
+ if (!WriteRegisterBytes(parent_reg, parent_data, 0))
+ return false;
+ offset += parent_reg->byte_size;
+ }
+ assert(offset == combined_size);
+ return true;
+ } else
+ return WriteRegisterBytes(reg_info, data, 0);
+ }
return false;
}
@@ -401,7 +426,7 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info,
// We have a valid primordial register as our constituent. Grab the
// corresponding register info.
const RegisterInfo *value_reg_info =
- GetRegisterInfo(eRegisterKindProcessPlugin, reg);
+ GetRegisterInfo(eRegisterKindLLDB, reg);
if (value_reg_info == nullptr)
success = false;
else
@@ -422,7 +447,7 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info,
reg != LLDB_INVALID_REGNUM;
reg = reg_info->invalidate_regs[++idx])
SetRegisterIsValid(ConvertRegisterKindToRegisterNumber(
- eRegisterKindProcessPlugin, reg),
+ eRegisterKindLLDB, reg),
false);
}
@@ -526,7 +551,7 @@ bool GDBRemoteRegisterContext::ReadAllRegisterValues(
if (reg_info
->value_regs) // skip registers that are slices of real registers
continue;
- ReadRegisterBytes(reg_info, m_reg_data);
+ ReadRegisterBytes(reg_info);
// ReadRegisterBytes saves the contents of the register in to the
// m_reg_data buffer
}
@@ -790,277 +815,3 @@ bool GDBRemoteDynamicRegisterInfo::UpdateARM64SVERegistersInfos(uint64_t vg) {
ConfigureOffsets();
return true;
}
-
-void GDBRemoteDynamicRegisterInfo::HardcodeARMRegisters(bool from_scratch) {
- // For Advanced SIMD and VFP register mapping.
- static uint32_t g_d0_regs[] = {26, 27, LLDB_INVALID_REGNUM}; // (s0, s1)
- static uint32_t g_d1_regs[] = {28, 29, LLDB_INVALID_REGNUM}; // (s2, s3)
- static uint32_t g_d2_regs[] = {30, 31, LLDB_INVALID_REGNUM}; // (s4, s5)
- static uint32_t g_d3_regs[] = {32, 33, LLDB_INVALID_REGNUM}; // (s6, s7)
- static uint32_t g_d4_regs[] = {34, 35, LLDB_INVALID_REGNUM}; // (s8, s9)
- static uint32_t g_d5_regs[] = {36, 37, LLDB_INVALID_REGNUM}; // (s10, s11)
- static uint32_t g_d6_regs[] = {38, 39, LLDB_INVALID_REGNUM}; // (s12, s13)
- static uint32_t g_d7_regs[] = {40, 41, LLDB_INVALID_REGNUM}; // (s14, s15)
- static uint32_t g_d8_regs[] = {42, 43, LLDB_INVALID_REGNUM}; // (s16, s17)
- static uint32_t g_d9_regs[] = {44, 45, LLDB_INVALID_REGNUM}; // (s18, s19)
- static uint32_t g_d10_regs[] = {46, 47, LLDB_INVALID_REGNUM}; // (s20, s21)
- static uint32_t g_d11_regs[] = {48, 49, LLDB_INVALID_REGNUM}; // (s22, s23)
- static uint32_t g_d12_regs[] = {50, 51, LLDB_INVALID_REGNUM}; // (s24, s25)
- static uint32_t g_d13_regs[] = {52, 53, LLDB_INVALID_REGNUM}; // (s26, s27)
- static uint32_t g_d14_regs[] = {54, 55, LLDB_INVALID_REGNUM}; // (s28, s29)
- static uint32_t g_d15_regs[] = {56, 57, LLDB_INVALID_REGNUM}; // (s30, s31)
- static uint32_t g_q0_regs[] = {
- 26, 27, 28, 29, LLDB_INVALID_REGNUM}; // (d0, d1) -> (s0, s1, s2, s3)
- static uint32_t g_q1_regs[] = {
- 30, 31, 32, 33, LLDB_INVALID_REGNUM}; // (d2, d3) -> (s4, s5, s6, s7)
- static uint32_t g_q2_regs[] = {
- 34, 35, 36, 37, LLDB_INVALID_REGNUM}; // (d4, d5) -> (s8, s9, s10, s11)
- static uint32_t g_q3_regs[] = {
- 38, 39, 40, 41, LLDB_INVALID_REGNUM}; // (d6, d7) -> (s12, s13, s14, s15)
- static uint32_t g_q4_regs[] = {
- 42, 43, 44, 45, LLDB_INVALID_REGNUM}; // (d8, d9) -> (s16, s17, s18, s19)
- static uint32_t g_q5_regs[] = {
- 46, 47, 48, 49,
- LLDB_INVALID_REGNUM}; // (d10, d11) -> (s20, s21, s22, s23)
- static uint32_t g_q6_regs[] = {
- 50, 51, 52, 53,
- LLDB_INVALID_REGNUM}; // (d12, d13) -> (s24, s25, s26, s27)
- static uint32_t g_q7_regs[] = {
- 54, 55, 56, 57,
- LLDB_INVALID_REGNUM}; // (d14, d15) -> (s28, s29, s30, s31)
- static uint32_t g_q8_regs[] = {59, 60, LLDB_INVALID_REGNUM}; // (d16, d17)
- static uint32_t g_q9_regs[] = {61, 62, LLDB_INVALID_REGNUM}; // (d18, d19)
- static uint32_t g_q10_regs[] = {63, 64, LLDB_INVALID_REGNUM}; // (d20, d21)
- static uint32_t g_q11_regs[] = {65, 66, LLDB_INVALID_REGNUM}; // (d22, d23)
- static uint32_t g_q12_regs[] = {67, 68, LLDB_INVALID_REGNUM}; // (d24, d25)
- static uint32_t g_q13_regs[] = {69, 70, LLDB_INVALID_REGNUM}; // (d26, d27)
- static uint32_t g_q14_regs[] = {71, 72, LLDB_INVALID_REGNUM}; // (d28, d29)
- static uint32_t g_q15_regs[] = {73, 74, LLDB_INVALID_REGNUM}; // (d30, d31)
-
- // This is our array of composite registers, with each element coming from
- // the above register mappings.
- static uint32_t *g_composites[] = {
- g_d0_regs, g_d1_regs, g_d2_regs, g_d3_regs, g_d4_regs, g_d5_regs,
- g_d6_regs, g_d7_regs, g_d8_regs, g_d9_regs, g_d10_regs, g_d11_regs,
- g_d12_regs, g_d13_regs, g_d14_regs, g_d15_regs, g_q0_regs, g_q1_regs,
- g_q2_regs, g_q3_regs, g_q4_regs, g_q5_regs, g_q6_regs, g_q7_regs,
- g_q8_regs, g_q9_regs, g_q10_regs, g_q11_regs, g_q12_regs, g_q13_regs,
- g_q14_regs, g_q15_regs};
-
- // clang-format off
- static RegisterInfo g_register_infos[] = {
-// NAME ALT SZ OFF ENCODING FORMAT EH_FRAME DWARF GENERIC PROCESS PLUGIN LLDB VALUE REGS INVALIDATE REGS SIZE EXPR SIZE LEN
-// ====== ====== === === ============= ========== =================== =================== ====================== ============= ==== ========== =============== ========= ========
- { "r0", "arg1", 4, 0, eEncodingUint, eFormatHex, { ehframe_r0, dwarf_r0, LLDB_REGNUM_GENERIC_ARG1,0, 0 }, nullptr, nullptr, nullptr, 0 },
- { "r1", "arg2", 4, 0, eEncodingUint, eFormatHex, { ehframe_r1, dwarf_r1, LLDB_REGNUM_GENERIC_ARG2,1, 1 }, nullptr, nullptr, nullptr, 0 },
- { "r2", "arg3", 4, 0, eEncodingUint, eFormatHex, { ehframe_r2, dwarf_r2, LLDB_REGNUM_GENERIC_ARG3,2, 2 }, nullptr, nullptr, nullptr, 0 },
- { "r3", "arg4", 4, 0, eEncodingUint, eFormatHex, { ehframe_r3, dwarf_r3, LLDB_REGNUM_GENERIC_ARG4,3, 3 }, nullptr, nullptr, nullptr, 0 },
- { "r4", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r4, dwarf_r4, LLDB_INVALID_REGNUM, 4, 4 }, nullptr, nullptr, nullptr, 0 },
- { "r5", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r5, dwarf_r5, LLDB_INVALID_REGNUM, 5, 5 }, nullptr, nullptr, nullptr, 0 },
- { "r6", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r6, dwarf_r6, LLDB_INVALID_REGNUM, 6, 6 }, nullptr, nullptr, nullptr, 0 },
- { "r7", "fp", 4, 0, eEncodingUint, eFormatHex, { ehframe_r7, dwarf_r7, LLDB_REGNUM_GENERIC_FP, 7, 7 }, nullptr, nullptr, nullptr, 0 },
- { "r8", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r8, dwarf_r8, LLDB_INVALID_REGNUM, 8, 8 }, nullptr, nullptr, nullptr, 0 },
- { "r9", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r9, dwarf_r9, LLDB_INVALID_REGNUM, 9, 9 }, nullptr, nullptr, nullptr, 0 },
- { "r10", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r10, dwarf_r10, LLDB_INVALID_REGNUM, 10, 10 }, nullptr, nullptr, nullptr, 0 },
- { "r11", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r11, dwarf_r11, LLDB_INVALID_REGNUM, 11, 11 }, nullptr, nullptr, nullptr, 0 },
- { "r12", nullptr, 4, 0, eEncodingUint, eFormatHex, { ehframe_r12, dwarf_r12, LLDB_INVALID_REGNUM, 12, 12 }, nullptr, nullptr, nullptr, 0 },
- { "sp", "r13", 4, 0, eEncodingUint, eFormatHex, { ehframe_sp, dwarf_sp, LLDB_REGNUM_GENERIC_SP, 13, 13 }, nullptr, nullptr, nullptr, 0 },
- { "lr", "r14", 4, 0, eEncodingUint, eFormatHex, { ehframe_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA, 14, 14 }, nullptr, nullptr, nullptr, 0 },
- { "pc", "r15", 4, 0, eEncodingUint, eFormatHex, { ehframe_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC, 15, 15 }, nullptr, nullptr, nullptr, 0 },
- { "f0", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 16, 16 }, nullptr, nullptr, nullptr, 0 },
- { "f1", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 17, 17 }, nullptr, nullptr, nullptr, 0 },
- { "f2", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 18, 18 }, nullptr, nullptr, nullptr, 0 },
- { "f3", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 19, 19 }, nullptr, nullptr, nullptr, 0 },
- { "f4", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 20, 20 }, nullptr, nullptr, nullptr, 0 },
- { "f5", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 21, 21 }, nullptr, nullptr, nullptr, 0 },
- { "f6", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 22, 22 }, nullptr, nullptr, nullptr, 0 },
- { "f7", nullptr, 12, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 23, 23 }, nullptr, nullptr, nullptr, 0 },
- { "fps", nullptr, 4, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 24, 24 }, nullptr, nullptr, nullptr, 0 },
- { "cpsr","flags", 4, 0, eEncodingUint, eFormatHex, { ehframe_cpsr, dwarf_cpsr, LLDB_INVALID_REGNUM, 25, 25 }, nullptr, nullptr, nullptr, 0 },
- { "s0", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s0, LLDB_INVALID_REGNUM, 26, 26 }, nullptr, nullptr, nullptr, 0 },
- { "s1", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s1, LLDB_INVALID_REGNUM, 27, 27 }, nullptr, nullptr, nullptr, 0 },
- { "s2", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s2, LLDB_INVALID_REGNUM, 28, 28 }, nullptr, nullptr, nullptr, 0 },
- { "s3", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s3, LLDB_INVALID_REGNUM, 29, 29 }, nullptr, nullptr, nullptr, 0 },
- { "s4", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s4, LLDB_INVALID_REGNUM, 30, 30 }, nullptr, nullptr, nullptr, 0 },
- { "s5", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s5, LLDB_INVALID_REGNUM, 31, 31 }, nullptr, nullptr, nullptr, 0 },
- { "s6", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s6, LLDB_INVALID_REGNUM, 32, 32 }, nullptr, nullptr, nullptr, 0 },
- { "s7", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s7, LLDB_INVALID_REGNUM, 33, 33 }, nullptr, nullptr, nullptr, 0 },
- { "s8", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s8, LLDB_INVALID_REGNUM, 34, 34 }, nullptr, nullptr, nullptr, 0 },
- { "s9", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s9, LLDB_INVALID_REGNUM, 35, 35 }, nullptr, nullptr, nullptr, 0 },
- { "s10", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s10, LLDB_INVALID_REGNUM, 36, 36 }, nullptr, nullptr, nullptr, 0 },
- { "s11", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s11, LLDB_INVALID_REGNUM, 37, 37 }, nullptr, nullptr, nullptr, 0 },
- { "s12", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s12, LLDB_INVALID_REGNUM, 38, 38 }, nullptr, nullptr, nullptr, 0 },
- { "s13", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s13, LLDB_INVALID_REGNUM, 39, 39 }, nullptr, nullptr, nullptr, 0 },
- { "s14", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s14, LLDB_INVALID_REGNUM, 40, 40 }, nullptr, nullptr, nullptr, 0 },
- { "s15", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s15, LLDB_INVALID_REGNUM, 41, 41 }, nullptr, nullptr, nullptr, 0 },
- { "s16", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s16, LLDB_INVALID_REGNUM, 42, 42 }, nullptr, nullptr, nullptr, 0 },
- { "s17", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s17, LLDB_INVALID_REGNUM, 43, 43 }, nullptr, nullptr, nullptr, 0 },
- { "s18", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s18, LLDB_INVALID_REGNUM, 44, 44 }, nullptr, nullptr, nullptr, 0 },
- { "s19", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s19, LLDB_INVALID_REGNUM, 45, 45 }, nullptr, nullptr, nullptr, 0 },
- { "s20", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s20, LLDB_INVALID_REGNUM, 46, 46 }, nullptr, nullptr, nullptr, 0 },
- { "s21", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s21, LLDB_INVALID_REGNUM, 47, 47 }, nullptr, nullptr, nullptr, 0 },
- { "s22", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s22, LLDB_INVALID_REGNUM, 48, 48 }, nullptr, nullptr, nullptr, 0 },
- { "s23", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s23, LLDB_INVALID_REGNUM, 49, 49 }, nullptr, nullptr, nullptr, 0 },
- { "s24", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s24, LLDB_INVALID_REGNUM, 50, 50 }, nullptr, nullptr, nullptr, 0 },
- { "s25", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s25, LLDB_INVALID_REGNUM, 51, 51 }, nullptr, nullptr, nullptr, 0 },
- { "s26", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s26, LLDB_INVALID_REGNUM, 52, 52 }, nullptr, nullptr, nullptr, 0 },
- { "s27", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s27, LLDB_INVALID_REGNUM, 53, 53 }, nullptr, nullptr, nullptr, 0 },
- { "s28", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s28, LLDB_INVALID_REGNUM, 54, 54 }, nullptr, nullptr, nullptr, 0 },
- { "s29", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s29, LLDB_INVALID_REGNUM, 55, 55 }, nullptr, nullptr, nullptr, 0 },
- { "s30", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s30, LLDB_INVALID_REGNUM, 56, 56 }, nullptr, nullptr, nullptr, 0 },
- { "s31", nullptr, 4, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_s31, LLDB_INVALID_REGNUM, 57, 57 }, nullptr, nullptr, nullptr, 0 },
- { "fpscr",nullptr, 4, 0, eEncodingUint, eFormatHex, { LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, 58, 58 }, nullptr, nullptr, nullptr, 0 },
- { "d16", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d16, LLDB_INVALID_REGNUM, 59, 59 }, nullptr, nullptr, nullptr, 0 },
- { "d17", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d17, LLDB_INVALID_REGNUM, 60, 60 }, nullptr, nullptr, nullptr, 0 },
- { "d18", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d18, LLDB_INVALID_REGNUM, 61, 61 }, nullptr, nullptr, nullptr, 0 },
- { "d19", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d19, LLDB_INVALID_REGNUM, 62, 62 }, nullptr, nullptr, nullptr, 0 },
- { "d20", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d20, LLDB_INVALID_REGNUM, 63, 63 }, nullptr, nullptr, nullptr, 0 },
- { "d21", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d21, LLDB_INVALID_REGNUM, 64, 64 }, nullptr, nullptr, nullptr, 0 },
- { "d22", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d22, LLDB_INVALID_REGNUM, 65, 65 }, nullptr, nullptr, nullptr, 0 },
- { "d23", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d23, LLDB_INVALID_REGNUM, 66, 66 }, nullptr, nullptr, nullptr, 0 },
- { "d24", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d24, LLDB_INVALID_REGNUM, 67, 67 }, nullptr, nullptr, nullptr, 0 },
- { "d25", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d25, LLDB_INVALID_REGNUM, 68, 68 }, nullptr, nullptr, nullptr, 0 },
- { "d26", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d26, LLDB_INVALID_REGNUM, 69, 69 }, nullptr, nullptr, nullptr, 0 },
- { "d27", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d27, LLDB_INVALID_REGNUM, 70, 70 }, nullptr, nullptr, nullptr, 0 },
- { "d28", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d28, LLDB_INVALID_REGNUM, 71, 71 }, nullptr, nullptr, nullptr, 0 },
- { "d29", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d29, LLDB_INVALID_REGNUM, 72, 72 }, nullptr, nullptr, nullptr, 0 },
- { "d30", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d30, LLDB_INVALID_REGNUM, 73, 73 }, nullptr, nullptr, nullptr, 0 },
- { "d31", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d31, LLDB_INVALID_REGNUM, 74, 74 }, nullptr, nullptr, nullptr, 0 },
- { "d0", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d0, LLDB_INVALID_REGNUM, 75, 75 }, g_d0_regs, nullptr, nullptr, 0 },
- { "d1", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d1, LLDB_INVALID_REGNUM, 76, 76 }, g_d1_regs, nullptr, nullptr, 0 },
- { "d2", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d2, LLDB_INVALID_REGNUM, 77, 77 }, g_d2_regs, nullptr, nullptr, 0 },
- { "d3", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d3, LLDB_INVALID_REGNUM, 78, 78 }, g_d3_regs, nullptr, nullptr, 0 },
- { "d4", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d4, LLDB_INVALID_REGNUM, 79, 79 }, g_d4_regs, nullptr, nullptr, 0 },
- { "d5", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d5, LLDB_INVALID_REGNUM, 80, 80 }, g_d5_regs, nullptr, nullptr, 0 },
- { "d6", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d6, LLDB_INVALID_REGNUM, 81, 81 }, g_d6_regs, nullptr, nullptr, 0 },
- { "d7", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d7, LLDB_INVALID_REGNUM, 82, 82 }, g_d7_regs, nullptr, nullptr, 0 },
- { "d8", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d8, LLDB_INVALID_REGNUM, 83, 83 }, g_d8_regs, nullptr, nullptr, 0 },
- { "d9", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d9, LLDB_INVALID_REGNUM, 84, 84 }, g_d9_regs, nullptr, nullptr, 0 },
- { "d10", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d10, LLDB_INVALID_REGNUM, 85, 85 }, g_d10_regs, nullptr, nullptr, 0 },
- { "d11", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d11, LLDB_INVALID_REGNUM, 86, 86 }, g_d11_regs, nullptr, nullptr, 0 },
- { "d12", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d12, LLDB_INVALID_REGNUM, 87, 87 }, g_d12_regs, nullptr, nullptr, 0 },
- { "d13", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d13, LLDB_INVALID_REGNUM, 88, 88 }, g_d13_regs, nullptr, nullptr, 0 },
- { "d14", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d14, LLDB_INVALID_REGNUM, 89, 89 }, g_d14_regs, nullptr, nullptr, 0 },
- { "d15", nullptr, 8, 0, eEncodingIEEE754, eFormatFloat, { LLDB_INVALID_REGNUM, dwarf_d15, LLDB_INVALID_REGNUM, 90, 90 }, g_d15_regs, nullptr, nullptr, 0 },
- { "q0", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q0, LLDB_INVALID_REGNUM, 91, 91 }, g_q0_regs, nullptr, nullptr, 0 },
- { "q1", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q1, LLDB_INVALID_REGNUM, 92, 92 }, g_q1_regs, nullptr, nullptr, 0 },
- { "q2", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q2, LLDB_INVALID_REGNUM, 93, 93 }, g_q2_regs, nullptr, nullptr, 0 },
- { "q3", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q3, LLDB_INVALID_REGNUM, 94, 94 }, g_q3_regs, nullptr, nullptr, 0 },
- { "q4", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q4, LLDB_INVALID_REGNUM, 95, 95 }, g_q4_regs, nullptr, nullptr, 0 },
- { "q5", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q5, LLDB_INVALID_REGNUM, 96, 96 }, g_q5_regs, nullptr, nullptr, 0 },
- { "q6", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q6, LLDB_INVALID_REGNUM, 97, 97 }, g_q6_regs, nullptr, nullptr, 0 },
- { "q7", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q7, LLDB_INVALID_REGNUM, 98, 98 }, g_q7_regs, nullptr, nullptr, 0 },
- { "q8", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q8, LLDB_INVALID_REGNUM, 99, 99 }, g_q8_regs, nullptr, nullptr, 0 },
- { "q9", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q9, LLDB_INVALID_REGNUM, 100, 100 }, g_q9_regs, nullptr, nullptr, 0 },
- { "q10", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q10, LLDB_INVALID_REGNUM, 101, 101 }, g_q10_regs, nullptr, nullptr, 0 },
- { "q11", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q11, LLDB_INVALID_REGNUM, 102, 102 }, g_q11_regs, nullptr, nullptr, 0 },
- { "q12", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q12, LLDB_INVALID_REGNUM, 103, 103 }, g_q12_regs, nullptr, nullptr, 0 },
- { "q13", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q13, LLDB_INVALID_REGNUM, 104, 104 }, g_q13_regs, nullptr, nullptr, 0 },
- { "q14", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q14, LLDB_INVALID_REGNUM, 105, 105 }, g_q14_regs, nullptr, nullptr, 0 },
- { "q15", nullptr, 16, 0, eEncodingVector, eFormatVectorOfUInt8, { LLDB_INVALID_REGNUM, dwarf_q15, LLDB_INVALID_REGNUM, 106, 106 }, g_q15_regs, nullptr, nullptr, 0 }
- };
- // clang-format on
-
- static const uint32_t num_registers = llvm::array_lengthof(g_register_infos);
- static ConstString gpr_reg_set("General Purpose Registers");
- static ConstString sfp_reg_set("Software Floating Point Registers");
- static ConstString vfp_reg_set("Floating Point Registers");
- size_t i;
- if (from_scratch) {
- // Calculate the offsets of the registers
- // Note that the layout of the "composite" registers (d0-d15 and q0-q15)
- // which comes after the "primordial" registers is important. This enables
- // us to calculate the offset of the composite register by using the offset
- // of its first primordial register. For example, to calculate the offset
- // of q0, use s0's offset.
- if (g_register_infos[2].byte_offset == 0) {
- uint32_t byte_offset = 0;
- for (i = 0; i < num_registers; ++i) {
- // For primordial registers, increment the byte_offset by the byte_size
- // to arrive at the byte_offset for the next register. Otherwise, we
- // have a composite register whose offset can be calculated by
- // consulting the offset of its first primordial register.
- if (!g_register_infos[i].value_regs) {
- g_register_infos[i].byte_offset = byte_offset;
- byte_offset += g_register_infos[i].byte_size;
- } else {
- const uint32_t first_primordial_reg =
- g_register_infos[i].value_regs[0];
- g_register_infos[i].byte_offset =
- g_register_infos[first_primordial_reg].byte_offset;
- }
- }
- }
- for (i = 0; i < num_registers; ++i) {
- ConstString name;
- ConstString alt_name;
- if (g_register_infos[i].name && g_register_infos[i].name[0])
- name.SetCString(g_register_infos[i].name);
- if (g_register_infos[i].alt_name && g_register_infos[i].alt_name[0])
- alt_name.SetCString(g_register_infos[i].alt_name);
-
- if (i <= 15 || i == 25)
- AddRegister(g_register_infos[i], name, alt_name, gpr_reg_set);
- else if (i <= 24)
- AddRegister(g_register_infos[i], name, alt_name, sfp_reg_set);
- else
- AddRegister(g_register_infos[i], name, alt_name, vfp_reg_set);
- }
- } else {
- // Add composite registers to our primordial registers, then.
- const size_t num_composites = llvm::array_lengthof(g_composites);
- const size_t num_dynamic_regs = GetNumRegisters();
- const size_t num_common_regs = num_registers - num_composites;
- RegisterInfo *g_comp_register_infos = g_register_infos + num_common_regs;
-
- // First we need to validate that all registers that we already have match
- // the non composite regs. If so, then we can add the registers, else we
- // need to bail
- bool match = true;
- if (num_dynamic_regs == num_common_regs) {
- for (i = 0; match && i < num_dynamic_regs; ++i) {
- // Make sure all register names match
- if (m_regs[i].name && g_register_infos[i].name) {
- if (strcmp(m_regs[i].name, g_register_infos[i].name)) {
- match = false;
- break;
- }
- }
-
- // Make sure all register byte sizes match
- if (m_regs[i].byte_size != g_register_infos[i].byte_size) {
- match = false;
- break;
- }
- }
- } else {
- // Wrong number of registers.
- match = false;
- }
- // If "match" is true, then we can add extra registers.
- if (match) {
- for (i = 0; i < num_composites; ++i) {
- ConstString name;
- ConstString alt_name;
- const uint32_t first_primordial_reg =
- g_comp_register_infos[i].value_regs[0];
- const char *reg_name = g_register_infos[first_primordial_reg].name;
- if (reg_name && reg_name[0]) {
- for (uint32_t j = 0; j < num_dynamic_regs; ++j) {
- const RegisterInfo *reg_info = GetRegisterInfoAtIndex(j);
- // Find a matching primordial register info entry.
- if (reg_info && reg_info->name &&
- ::strcasecmp(reg_info->name, reg_name) == 0) {
- // The name matches the existing primordial entry. Find and
- // assign the offset, and then add this composite register entry.
- g_comp_register_infos[i].byte_offset = reg_info->byte_offset;
- name.SetCString(g_comp_register_infos[i].name);
- AddRegister(g_comp_register_infos[i], name, alt_name,
- vfp_reg_set);
- }
- }
- }
- }
- }
- }
-}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
index 18fcb73b9815..83c809c5aab6 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
@@ -11,7 +11,7 @@
#include <vector>
-#include "Plugins/Process/Utility/DynamicRegisterInfo.h"
+#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Target/RegisterContext.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/DataExtractor.h"
@@ -38,7 +38,6 @@ public:
~GDBRemoteDynamicRegisterInfo() override = default;
- void HardcodeARMRegisters(bool from_scratch);
bool UpdateARM64SVERegistersInfos(uint64_t vg);
};
@@ -83,7 +82,7 @@ public:
protected:
friend class ThreadGDBRemote;
- bool ReadRegisterBytes(const RegisterInfo *reg_info, DataExtractor &data);
+ bool ReadRegisterBytes(const RegisterInfo *reg_info);
bool WriteRegisterBytes(const RegisterInfo *reg_info, DataExtractor &data,
uint32_t data_offset);
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 6914b37348ea..2233bf675819 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -43,7 +43,6 @@
#include "lldb/Host/HostThread.h"
#include "lldb/Host/PosixApi.h"
#include "lldb/Host/PseudoTerminal.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Host/ThreadLauncher.h"
#include "lldb/Host/XML.h"
#include "lldb/Interpreter/CommandInterpreter.h"
@@ -104,7 +103,7 @@ namespace lldb {
// and get the packet history dumped to a file.
void DumpProcessGDBRemotePacketHistory(void *p, const char *path) {
auto file = FileSystem::Instance().Open(
- FileSpec(path), File::eOpenOptionWrite | File::eOpenOptionCanCreate);
+ FileSpec(path), File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate);
if (!file) {
llvm::consumeError(file.takeError());
return;
@@ -127,7 +126,7 @@ enum {
class PluginProperties : public Properties {
public:
static ConstString GetSettingName() {
- return ProcessGDBRemote::GetPluginNameStatic();
+ return ConstString(ProcessGDBRemote::GetPluginNameStatic());
}
PluginProperties() : Properties() {
@@ -166,13 +165,9 @@ public:
}
};
-typedef std::shared_ptr<PluginProperties> ProcessKDPPropertiesSP;
-
-static const ProcessKDPPropertiesSP &GetGlobalPluginProperties() {
- static ProcessKDPPropertiesSP g_settings_sp;
- if (!g_settings_sp)
- g_settings_sp = std::make_shared<PluginProperties>();
- return g_settings_sp;
+static PluginProperties &GetGlobalPluginProperties() {
+ static PluginProperties g_settings;
+ return g_settings;
}
} // namespace
@@ -189,12 +184,7 @@ static const ProcessKDPPropertiesSP &GetGlobalPluginProperties() {
#define HIGH_PORT (49151u)
#endif
-ConstString ProcessGDBRemote::GetPluginNameStatic() {
- static ConstString g_name("gdb-remote");
- return g_name;
-}
-
-const char *ProcessGDBRemote::GetPluginDescriptionStatic() {
+llvm::StringRef ProcessGDBRemote::GetPluginDescriptionStatic() {
return "GDB Remote protocol based debugging plug-in.";
}
@@ -214,7 +204,7 @@ ProcessGDBRemote::CreateInstance(lldb::TargetSP target_sp,
}
std::chrono::seconds ProcessGDBRemote::GetPacketTimeout() {
- return std::chrono::seconds(GetGlobalPluginProperties()->GetPacketTimeout());
+ return std::chrono::seconds(GetGlobalPluginProperties().GetPacketTimeout());
}
bool ProcessGDBRemote::CanDebug(lldb::TargetSP target_sp,
@@ -252,8 +242,7 @@ bool ProcessGDBRemote::CanDebug(lldb::TargetSP target_sp,
ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp,
ListenerSP listener_sp)
: Process(target_sp, listener_sp),
- m_debugserver_pid(LLDB_INVALID_PROCESS_ID), m_last_stop_packet_mutex(),
- m_register_info_sp(nullptr),
+ m_debugserver_pid(LLDB_INVALID_PROCESS_ID), m_register_info_sp(nullptr),
m_async_broadcaster(nullptr, "lldb.process.gdb-remote.async-broadcaster"),
m_async_listener_sp(
Listener::MakeListener("lldb.process.gdb-remote.async-listener")),
@@ -265,7 +254,8 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp,
m_waiting_for_attach(false), m_destroy_tried_resuming(false),
m_command_sp(), m_breakpoint_pc_offset(0),
m_initial_tid(LLDB_INVALID_THREAD_ID), m_replay_mode(false),
- m_allow_flash_writes(false), m_erased_flash_ranges() {
+ m_allow_flash_writes(false), m_erased_flash_ranges(),
+ m_vfork_in_progress(false) {
m_async_broadcaster.SetEventName(eBroadcastBitAsyncThreadShouldExit,
"async thread should exit");
m_async_broadcaster.SetEventName(eBroadcastBitAsyncContinue,
@@ -303,12 +293,12 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp,
}
const uint64_t timeout_seconds =
- GetGlobalPluginProperties()->GetPacketTimeout();
+ GetGlobalPluginProperties().GetPacketTimeout();
if (timeout_seconds > 0)
m_gdb_comm.SetPacketTimeout(std::chrono::seconds(timeout_seconds));
m_use_g_packet_for_reading =
- GetGlobalPluginProperties()->GetUseGPacketForReading();
+ GetGlobalPluginProperties().GetUseGPacketForReading();
}
// Destructor
@@ -329,11 +319,6 @@ ProcessGDBRemote::~ProcessGDBRemote() {
KillDebugserverProcess();
}
-// PluginInterface
-ConstString ProcessGDBRemote::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t ProcessGDBRemote::GetPluginVersion() { return 1; }
-
bool ProcessGDBRemote::ParsePythonTargetDefinition(
const FileSpec &target_definition_fspec) {
ScriptInterpreter *interpreter =
@@ -382,20 +367,14 @@ bool ProcessGDBRemote::ParsePythonTargetDefinition(
}
static size_t SplitCommaSeparatedRegisterNumberString(
- const llvm::StringRef &comma_separated_regiter_numbers,
+ const llvm::StringRef &comma_separated_register_numbers,
std::vector<uint32_t> &regnums, int base) {
regnums.clear();
- std::pair<llvm::StringRef, llvm::StringRef> value_pair;
- value_pair.second = comma_separated_regiter_numbers;
- do {
- value_pair = value_pair.second.split(',');
- if (!value_pair.first.empty()) {
- uint32_t reg = StringConvert::ToUInt32(value_pair.first.str().c_str(),
- LLDB_INVALID_REGNUM, base);
- if (reg != LLDB_INVALID_REGNUM)
- regnums.push_back(reg);
- }
- } while (!value_pair.second.empty());
+ for (llvm::StringRef x : llvm::split(comma_separated_register_numbers, ',')) {
+ uint32_t reg;
+ if (llvm::to_integer(x, reg, base))
+ regnums.push_back(reg);
+ }
return regnums.size();
}
@@ -410,7 +389,7 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
// timeout is and can see it.
const auto host_packet_timeout = m_gdb_comm.GetHostDefaultPacketTimeout();
if (host_packet_timeout > std::chrono::seconds(0)) {
- GetGlobalPluginProperties()->SetPacketTimeout(host_packet_timeout.count());
+ GetGlobalPluginProperties().SetPacketTimeout(host_packet_timeout.count());
}
// Register info search order:
@@ -420,7 +399,7 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
// 3 - Fall back on the qRegisterInfo packets.
FileSpec target_definition_fspec =
- GetGlobalPluginProperties()->GetTargetDefinitionFile();
+ GetGlobalPluginProperties().GetTargetDefinitionFile();
if (!FileSystem::Instance().Exists(target_definition_fspec)) {
// If the filename doesn't exist, it may be a ~ not having been expanded -
// try to resolve it.
@@ -455,7 +434,7 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
return;
char packet[128];
- uint32_t reg_offset = LLDB_INVALID_INDEX32;
+ std::vector<DynamicRegisterInfo::Register> registers;
uint32_t reg_num = 0;
for (StringExtractorGDBRemote::ResponseType response_type =
StringExtractorGDBRemote::eResponse;
@@ -471,53 +450,25 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
if (response_type == StringExtractorGDBRemote::eResponse) {
llvm::StringRef name;
llvm::StringRef value;
- ConstString reg_name;
- ConstString alt_name;
- ConstString set_name;
- std::vector<uint32_t> value_regs;
- std::vector<uint32_t> invalidate_regs;
- std::vector<uint8_t> dwarf_opcode_bytes;
- RegisterInfo reg_info = {
- nullptr, // Name
- nullptr, // Alt name
- 0, // byte size
- reg_offset, // offset
- eEncodingUint, // encoding
- eFormatHex, // format
- {
- LLDB_INVALID_REGNUM, // eh_frame reg num
- LLDB_INVALID_REGNUM, // DWARF reg num
- LLDB_INVALID_REGNUM, // generic reg num
- reg_num, // process plugin reg num
- reg_num // native register number
- },
- nullptr,
- nullptr,
- nullptr, // Dwarf expression opcode bytes pointer
- 0 // Dwarf expression opcode bytes length
- };
+ DynamicRegisterInfo::Register reg_info;
while (response.GetNameColonValue(name, value)) {
if (name.equals("name")) {
- reg_name.SetString(value);
+ reg_info.name.SetString(value);
} else if (name.equals("alt-name")) {
- alt_name.SetString(value);
+ reg_info.alt_name.SetString(value);
} else if (name.equals("bitsize")) {
- value.getAsInteger(0, reg_info.byte_size);
- reg_info.byte_size /= CHAR_BIT;
+ if (!value.getAsInteger(0, reg_info.byte_size))
+ reg_info.byte_size /= CHAR_BIT;
} else if (name.equals("offset")) {
- if (value.getAsInteger(0, reg_offset))
- reg_offset = UINT32_MAX;
+ value.getAsInteger(0, reg_info.byte_offset);
} else if (name.equals("encoding")) {
const Encoding encoding = Args::StringToEncoding(value);
if (encoding != eEncodingInvalid)
reg_info.encoding = encoding;
} else if (name.equals("format")) {
- Format format = eFormatInvalid;
- if (OptionArgParser::ToFormat(value.str().c_str(), format, nullptr)
+ if (!OptionArgParser::ToFormat(value.str().c_str(), reg_info.format, nullptr)
.Success())
- reg_info.format = format;
- else {
reg_info.format =
llvm::StringSwitch<Format>(value)
.Case("binary", eFormatBinary)
@@ -534,58 +485,23 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
.Case("vector-uint64", eFormatVectorOfUInt64)
.Case("vector-uint128", eFormatVectorOfUInt128)
.Default(eFormatInvalid);
- }
} else if (name.equals("set")) {
- set_name.SetString(value);
+ reg_info.set_name.SetString(value);
} else if (name.equals("gcc") || name.equals("ehframe")) {
- if (value.getAsInteger(0, reg_info.kinds[eRegisterKindEHFrame]))
- reg_info.kinds[eRegisterKindEHFrame] = LLDB_INVALID_REGNUM;
+ value.getAsInteger(0, reg_info.regnum_ehframe);
} else if (name.equals("dwarf")) {
- if (value.getAsInteger(0, reg_info.kinds[eRegisterKindDWARF]))
- reg_info.kinds[eRegisterKindDWARF] = LLDB_INVALID_REGNUM;
+ value.getAsInteger(0, reg_info.regnum_dwarf);
} else if (name.equals("generic")) {
- reg_info.kinds[eRegisterKindGeneric] =
- Args::StringToGenericRegister(value);
+ reg_info.regnum_generic = Args::StringToGenericRegister(value);
} else if (name.equals("container-regs")) {
- SplitCommaSeparatedRegisterNumberString(value, value_regs, 16);
+ SplitCommaSeparatedRegisterNumberString(value, reg_info.value_regs, 16);
} else if (name.equals("invalidate-regs")) {
- SplitCommaSeparatedRegisterNumberString(value, invalidate_regs, 16);
- } else if (name.equals("dynamic_size_dwarf_expr_bytes")) {
- size_t dwarf_opcode_len = value.size() / 2;
- assert(dwarf_opcode_len > 0);
-
- dwarf_opcode_bytes.resize(dwarf_opcode_len);
- reg_info.dynamic_size_dwarf_len = dwarf_opcode_len;
-
- StringExtractor opcode_extractor(value);
- uint32_t ret_val =
- opcode_extractor.GetHexBytesAvail(dwarf_opcode_bytes);
- assert(dwarf_opcode_len == ret_val);
- UNUSED_IF_ASSERT_DISABLED(ret_val);
- reg_info.dynamic_size_dwarf_expr_bytes = dwarf_opcode_bytes.data();
+ SplitCommaSeparatedRegisterNumberString(value, reg_info.invalidate_regs, 16);
}
}
- reg_info.byte_offset = reg_offset;
assert(reg_info.byte_size != 0);
- reg_offset = LLDB_INVALID_INDEX32;
- if (!value_regs.empty()) {
- value_regs.push_back(LLDB_INVALID_REGNUM);
- reg_info.value_regs = value_regs.data();
- }
- if (!invalidate_regs.empty()) {
- invalidate_regs.push_back(LLDB_INVALID_REGNUM);
- reg_info.invalidate_regs = invalidate_regs.data();
- }
-
- reg_info.name = reg_name.AsCString();
- // We have to make a temporary ABI here, and not use the GetABI because
- // this code gets called in DidAttach, when the target architecture
- // (and consequently the ABI we'll get from the process) may be wrong.
- if (ABISP abi_sp = ABI::FindPlugin(shared_from_this(), arch_to_use))
- abi_sp->AugmentRegisterInfo(reg_info);
-
- m_register_info_sp->AddRegister(reg_info, reg_name, alt_name, set_name);
+ registers.push_back(reg_info);
} else {
break; // ensure exit before reg_num is incremented
}
@@ -594,31 +510,7 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
}
}
- if (m_register_info_sp->GetNumRegisters() > 0) {
- m_register_info_sp->Finalize(GetTarget().GetArchitecture());
- return;
- }
-
- // We didn't get anything if the accumulated reg_num is zero. See if we are
- // debugging ARM and fill with a hard coded register set until we can get an
- // updated debugserver down on the devices. On the other hand, if the
- // accumulated reg_num is positive, see if we can add composite registers to
- // the existing primordial ones.
- bool from_scratch = (m_register_info_sp->GetNumRegisters() == 0);
-
- if (!target_arch.IsValid()) {
- if (arch_to_use.IsValid() &&
- (arch_to_use.GetMachine() == llvm::Triple::arm ||
- arch_to_use.GetMachine() == llvm::Triple::thumb) &&
- arch_to_use.GetTriple().getVendor() == llvm::Triple::Apple)
- m_register_info_sp->HardcodeARMRegisters(from_scratch);
- } else if (target_arch.GetMachine() == llvm::Triple::arm ||
- target_arch.GetMachine() == llvm::Triple::thumb) {
- m_register_info_sp->HardcodeARMRegisters(from_scratch);
- }
-
- // At this point, we can finalize our register info.
- m_register_info_sp->Finalize(GetTarget().GetArchitecture());
+ AddRemoteRegisters(registers, arch_to_use);
}
Status ProcessGDBRemote::WillLaunch(lldb_private::Module *module) {
@@ -663,10 +555,6 @@ Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
if (m_gdb_comm.GetStopReply(response)) {
SetLastStopPacket(response);
- // '?' Packets must be handled differently in non-stop mode
- if (GetTarget().GetNonStopModeEnabled())
- HandleStopReplySequence();
-
Target &target = GetTarget();
if (!target.GetArchitecture().IsValid()) {
if (m_gdb_comm.GetProcessArchitecture().IsValid()) {
@@ -719,14 +607,6 @@ Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
__FUNCTION__, GetID(),
GetTarget().GetArchitecture().GetTriple().getTriple().c_str());
- if (error.Success()) {
- PlatformSP platform_sp = GetTarget().GetPlatform();
- if (platform_sp && platform_sp->IsConnected())
- SetUnixSignals(platform_sp->GetUnixSignals());
- else
- SetUnixSignals(UnixSignals::Create(GetTarget().GetArchitecture()));
- }
-
return error;
}
@@ -797,146 +677,133 @@ Status ProcessGDBRemote::DoLaunch(lldb_private::Module *exe_module,
// LLDB_LOG_OPTION_PREPEND_PROC_AND_THREAD);
// ::LogSetLogFile ("/dev/stdout");
- ObjectFile *object_file = exe_module->GetObjectFile();
- if (object_file) {
- error = EstablishConnectionIfNeeded(launch_info);
- if (error.Success()) {
- PseudoTerminal pty;
- const bool disable_stdio = (launch_flags & eLaunchFlagDisableSTDIO) != 0;
-
- PlatformSP platform_sp(GetTarget().GetPlatform());
- if (disable_stdio) {
- // set to /dev/null unless redirected to a file above
- if (!stdin_file_spec)
- stdin_file_spec.SetFile(FileSystem::DEV_NULL,
- FileSpec::Style::native);
- if (!stdout_file_spec)
- stdout_file_spec.SetFile(FileSystem::DEV_NULL,
- FileSpec::Style::native);
- if (!stderr_file_spec)
- stderr_file_spec.SetFile(FileSystem::DEV_NULL,
- FileSpec::Style::native);
- } else if (platform_sp && platform_sp->IsHost()) {
- // If the debugserver is local and we aren't disabling STDIO, lets use
- // a pseudo terminal to instead of relying on the 'O' packets for stdio
- // since 'O' packets can really slow down debugging if the inferior
- // does a lot of output.
- if ((!stdin_file_spec || !stdout_file_spec || !stderr_file_spec) &&
- !errorToBool(pty.OpenFirstAvailablePrimary(O_RDWR | O_NOCTTY))) {
- FileSpec secondary_name(pty.GetSecondaryName());
-
- if (!stdin_file_spec)
- stdin_file_spec = secondary_name;
-
- if (!stdout_file_spec)
- stdout_file_spec = secondary_name;
-
- if (!stderr_file_spec)
- stderr_file_spec = secondary_name;
- }
- LLDB_LOGF(
- log,
- "ProcessGDBRemote::%s adjusted STDIO paths for local platform "
- "(IsHost() is true) using secondary: stdin=%s, stdout=%s, "
- "stderr=%s",
- __FUNCTION__,
- stdin_file_spec ? stdin_file_spec.GetCString() : "<null>",
- stdout_file_spec ? stdout_file_spec.GetCString() : "<null>",
- stderr_file_spec ? stderr_file_spec.GetCString() : "<null>");
- }
-
- LLDB_LOGF(log,
- "ProcessGDBRemote::%s final STDIO paths after all "
- "adjustments: stdin=%s, stdout=%s, stderr=%s",
- __FUNCTION__,
- stdin_file_spec ? stdin_file_spec.GetCString() : "<null>",
- stdout_file_spec ? stdout_file_spec.GetCString() : "<null>",
- stderr_file_spec ? stderr_file_spec.GetCString() : "<null>");
-
- if (stdin_file_spec)
- m_gdb_comm.SetSTDIN(stdin_file_spec);
- if (stdout_file_spec)
- m_gdb_comm.SetSTDOUT(stdout_file_spec);
- if (stderr_file_spec)
- m_gdb_comm.SetSTDERR(stderr_file_spec);
+ error = EstablishConnectionIfNeeded(launch_info);
+ if (error.Success()) {
+ PseudoTerminal pty;
+ const bool disable_stdio = (launch_flags & eLaunchFlagDisableSTDIO) != 0;
- m_gdb_comm.SetDisableASLR(launch_flags & eLaunchFlagDisableASLR);
- m_gdb_comm.SetDetachOnError(launch_flags & eLaunchFlagDetachOnError);
+ PlatformSP platform_sp(GetTarget().GetPlatform());
+ if (disable_stdio) {
+ // set to /dev/null unless redirected to a file above
+ if (!stdin_file_spec)
+ stdin_file_spec.SetFile(FileSystem::DEV_NULL,
+ FileSpec::Style::native);
+ if (!stdout_file_spec)
+ stdout_file_spec.SetFile(FileSystem::DEV_NULL,
+ FileSpec::Style::native);
+ if (!stderr_file_spec)
+ stderr_file_spec.SetFile(FileSystem::DEV_NULL,
+ FileSpec::Style::native);
+ } else if (platform_sp && platform_sp->IsHost()) {
+ // If the debugserver is local and we aren't disabling STDIO, lets use
+ // a pseudo terminal to instead of relying on the 'O' packets for stdio
+ // since 'O' packets can really slow down debugging if the inferior
+ // does a lot of output.
+ if ((!stdin_file_spec || !stdout_file_spec || !stderr_file_spec) &&
+ !errorToBool(pty.OpenFirstAvailablePrimary(O_RDWR | O_NOCTTY))) {
+ FileSpec secondary_name(pty.GetSecondaryName());
- m_gdb_comm.SendLaunchArchPacket(
- GetTarget().GetArchitecture().GetArchitectureName());
+ if (!stdin_file_spec)
+ stdin_file_spec = secondary_name;
- const char *launch_event_data = launch_info.GetLaunchEventData();
- if (launch_event_data != nullptr && *launch_event_data != '\0')
- m_gdb_comm.SendLaunchEventDataPacket(launch_event_data);
+ if (!stdout_file_spec)
+ stdout_file_spec = secondary_name;
- if (working_dir) {
- m_gdb_comm.SetWorkingDir(working_dir);
+ if (!stderr_file_spec)
+ stderr_file_spec = secondary_name;
}
+ LLDB_LOGF(
+ log,
+ "ProcessGDBRemote::%s adjusted STDIO paths for local platform "
+ "(IsHost() is true) using secondary: stdin=%s, stdout=%s, "
+ "stderr=%s",
+ __FUNCTION__,
+ stdin_file_spec ? stdin_file_spec.GetCString() : "<null>",
+ stdout_file_spec ? stdout_file_spec.GetCString() : "<null>",
+ stderr_file_spec ? stderr_file_spec.GetCString() : "<null>");
+ }
- // Send the environment and the program + arguments after we connect
- m_gdb_comm.SendEnvironment(launch_info.GetEnvironment());
+ LLDB_LOGF(log,
+ "ProcessGDBRemote::%s final STDIO paths after all "
+ "adjustments: stdin=%s, stdout=%s, stderr=%s",
+ __FUNCTION__,
+ stdin_file_spec ? stdin_file_spec.GetCString() : "<null>",
+ stdout_file_spec ? stdout_file_spec.GetCString() : "<null>",
+ stderr_file_spec ? stderr_file_spec.GetCString() : "<null>");
+
+ if (stdin_file_spec)
+ m_gdb_comm.SetSTDIN(stdin_file_spec);
+ if (stdout_file_spec)
+ m_gdb_comm.SetSTDOUT(stdout_file_spec);
+ if (stderr_file_spec)
+ m_gdb_comm.SetSTDERR(stderr_file_spec);
+
+ m_gdb_comm.SetDisableASLR(launch_flags & eLaunchFlagDisableASLR);
+ m_gdb_comm.SetDetachOnError(launch_flags & eLaunchFlagDetachOnError);
+
+ m_gdb_comm.SendLaunchArchPacket(
+ GetTarget().GetArchitecture().GetArchitectureName());
+
+ const char *launch_event_data = launch_info.GetLaunchEventData();
+ if (launch_event_data != nullptr && *launch_event_data != '\0')
+ m_gdb_comm.SendLaunchEventDataPacket(launch_event_data);
+
+ if (working_dir) {
+ m_gdb_comm.SetWorkingDir(working_dir);
+ }
- {
- // Scope for the scoped timeout object
- GDBRemoteCommunication::ScopedTimeout timeout(m_gdb_comm,
- std::chrono::seconds(10));
+ // Send the environment and the program + arguments after we connect
+ m_gdb_comm.SendEnvironment(launch_info.GetEnvironment());
- int arg_packet_err = m_gdb_comm.SendArgumentsPacket(launch_info);
- if (arg_packet_err == 0) {
- std::string error_str;
- if (m_gdb_comm.GetLaunchSuccess(error_str)) {
- SetID(m_gdb_comm.GetCurrentProcessID());
- } else {
- error.SetErrorString(error_str.c_str());
- }
+ {
+ // Scope for the scoped timeout object
+ GDBRemoteCommunication::ScopedTimeout timeout(m_gdb_comm,
+ std::chrono::seconds(10));
+
+ int arg_packet_err = m_gdb_comm.SendArgumentsPacket(launch_info);
+ if (arg_packet_err == 0) {
+ std::string error_str;
+ if (m_gdb_comm.GetLaunchSuccess(error_str)) {
+ SetID(m_gdb_comm.GetCurrentProcessID());
} else {
- error.SetErrorStringWithFormat("'A' packet returned an error: %i",
- arg_packet_err);
+ error.SetErrorString(error_str.c_str());
}
+ } else {
+ error.SetErrorStringWithFormat("'A' packet returned an error: %i",
+ arg_packet_err);
}
+ }
- if (GetID() == LLDB_INVALID_PROCESS_ID) {
- LLDB_LOGF(log, "failed to connect to debugserver: %s",
- error.AsCString());
- KillDebugserverProcess();
- return error;
- }
+ if (GetID() == LLDB_INVALID_PROCESS_ID) {
+ LLDB_LOGF(log, "failed to connect to debugserver: %s",
+ error.AsCString());
+ KillDebugserverProcess();
+ return error;
+ }
- StringExtractorGDBRemote response;
- if (m_gdb_comm.GetStopReply(response)) {
- SetLastStopPacket(response);
- // '?' Packets must be handled differently in non-stop mode
- if (GetTarget().GetNonStopModeEnabled())
- HandleStopReplySequence();
+ StringExtractorGDBRemote response;
+ if (m_gdb_comm.GetStopReply(response)) {
+ SetLastStopPacket(response);
- const ArchSpec &process_arch = m_gdb_comm.GetProcessArchitecture();
+ const ArchSpec &process_arch = m_gdb_comm.GetProcessArchitecture();
- if (process_arch.IsValid()) {
- GetTarget().MergeArchitecture(process_arch);
- } else {
- const ArchSpec &host_arch = m_gdb_comm.GetHostArchitecture();
- if (host_arch.IsValid())
- GetTarget().MergeArchitecture(host_arch);
- }
+ if (process_arch.IsValid()) {
+ GetTarget().MergeArchitecture(process_arch);
+ } else {
+ const ArchSpec &host_arch = m_gdb_comm.GetHostArchitecture();
+ if (host_arch.IsValid())
+ GetTarget().MergeArchitecture(host_arch);
+ }
- SetPrivateState(SetThreadStopInfo(response));
+ SetPrivateState(SetThreadStopInfo(response));
- if (!disable_stdio) {
- if (pty.GetPrimaryFileDescriptor() != PseudoTerminal::invalid_fd)
- SetSTDIOFileDescriptor(pty.ReleasePrimaryFileDescriptor());
- }
+ if (!disable_stdio) {
+ if (pty.GetPrimaryFileDescriptor() != PseudoTerminal::invalid_fd)
+ SetSTDIOFileDescriptor(pty.ReleasePrimaryFileDescriptor());
}
- } else {
- LLDB_LOGF(log, "failed to connect to debugserver: %s", error.AsCString());
}
} else {
- // Set our user ID to an invalid process ID.
- SetID(LLDB_INVALID_PROCESS_ID);
- error.SetErrorStringWithFormat(
- "failed to get object file from '%s' for arch %s",
- exe_module->GetFileSpec().GetFilename().AsCString(),
- exe_module->GetArchitecture().GetArchitectureName());
+ LLDB_LOGF(log, "failed to connect to debugserver: %s", error.AsCString());
}
return error;
}
@@ -958,9 +825,6 @@ Status ProcessGDBRemote::ConnectToDebugserver(llvm::StringRef connect_url) {
if (conn_up->Connect(connect_url, &error) == eConnectionStatusSuccess) {
m_gdb_comm.SetConnection(std::move(conn_up));
break;
- } else if (error.WasInterrupted()) {
- // If we were interrupted, don't keep retrying.
- break;
}
retry_count++;
@@ -979,11 +843,6 @@ Status ProcessGDBRemote::ConnectToDebugserver(llvm::StringRef connect_url) {
return error;
}
- // Start the communications read thread so all incoming data can be parsed
- // into packets and queued as they arrive.
- if (GetTarget().GetNonStopModeEnabled())
- m_gdb_comm.StartReadThread();
-
// We always seem to be able to open a connection to a local port so we need
// to make sure we can then send data to it. If we can't then we aren't
// actually connected to anything, so try and do the handshake with the
@@ -995,10 +854,6 @@ Status ProcessGDBRemote::ConnectToDebugserver(llvm::StringRef connect_url) {
return error;
}
- // Send $QNonStop:1 packet on startup if required
- if (GetTarget().GetNonStopModeEnabled())
- GetTarget().SetNonStopModeEnabled(m_gdb_comm.SetNonStopMode(true));
-
m_gdb_comm.GetEchoSupported();
m_gdb_comm.GetThreadSuffixSupported();
m_gdb_comm.GetListThreadsInStopReplySupported();
@@ -1007,10 +862,6 @@ Status ProcessGDBRemote::ConnectToDebugserver(llvm::StringRef connect_url) {
m_gdb_comm.GetVAttachOrWaitSupported();
m_gdb_comm.EnableErrorStringInPacket();
- // Ask the remote server for the default thread id
- if (GetTarget().GetNonStopModeEnabled())
- m_gdb_comm.GetDefaultThreadId(m_initial_tid);
-
size_t num_cmds = GetExtraStartupCommands().GetArgumentCount();
for (size_t idx = 0; idx < num_cmds; idx++) {
StringExtractorGDBRemote response;
@@ -1111,6 +962,18 @@ void ProcessGDBRemote::DidLaunchOrAttach(ArchSpec &process_arch) {
if (StructuredData::Array *supported_packets =
m_gdb_comm.GetSupportedStructuredDataPlugins())
MapSupportedStructuredDataPlugins(*supported_packets);
+
+ // If connected to LLDB ("native-signals+"), use signal defs for
+ // the remote platform. If connected to GDB, just use the standard set.
+ if (!m_gdb_comm.UsesNativeSignals()) {
+ SetUnixSignals(std::make_shared<GDBRemoteSignals>());
+ } else {
+ PlatformSP platform_sp = GetTarget().GetPlatform();
+ if (platform_sp && platform_sp->IsConnected())
+ SetUnixSignals(platform_sp->GetUnixSignals());
+ else
+ SetUnixSignals(UnixSignals::Create(GetTarget().GetArchitecture()));
+ }
}
void ProcessGDBRemote::MaybeLoadExecutableModule() {
@@ -1270,10 +1133,9 @@ Status ProcessGDBRemote::DoResume() {
StreamString continue_packet;
bool continue_packet_error = false;
if (m_gdb_comm.HasAnyVContSupport()) {
- if (!GetTarget().GetNonStopModeEnabled() &&
- (m_continue_c_tids.size() == num_threads ||
- (m_continue_c_tids.empty() && m_continue_C_tids.empty() &&
- m_continue_s_tids.empty() && m_continue_S_tids.empty()))) {
+ if (m_continue_c_tids.size() == num_threads ||
+ (m_continue_c_tids.empty() && m_continue_C_tids.empty() &&
+ m_continue_s_tids.empty() && m_continue_S_tids.empty())) {
// All threads are continuing, just send a "c" packet
continue_packet.PutCString("c");
} else {
@@ -1391,14 +1253,7 @@ Status ProcessGDBRemote::DoResume() {
// All threads are resuming...
m_gdb_comm.SetCurrentThreadForRun(-1);
- // If in Non-Stop-Mode use vCont when stepping
- if (GetTarget().GetNonStopModeEnabled()) {
- if (m_gdb_comm.GetVContSupported('s'))
- continue_packet.PutCString("vCont;s");
- else
- continue_packet.PutChar('s');
- } else
- continue_packet.PutChar('s');
+ continue_packet.PutChar('s');
continue_packet_error = false;
} else if (num_continue_c_tids == 0 && num_continue_C_tids == 0 &&
@@ -1512,21 +1367,14 @@ size_t ProcessGDBRemote::UpdateThreadIDsFromStopReplyThreadsValue(
return m_thread_ids.size();
}
-size_t
-ProcessGDBRemote::UpdateThreadPCsFromStopReplyThreadsValue(std::string &value) {
+size_t ProcessGDBRemote::UpdateThreadPCsFromStopReplyThreadsValue(
+ llvm::StringRef value) {
m_thread_pcs.clear();
- size_t comma_pos;
- lldb::addr_t pc;
- while ((comma_pos = value.find(',')) != std::string::npos) {
- value[comma_pos] = '\0';
- pc = StringConvert::ToUInt64(value.c_str(), LLDB_INVALID_ADDRESS, 16);
- if (pc != LLDB_INVALID_ADDRESS)
+ for (llvm::StringRef x : llvm::split(value, ',')) {
+ lldb::addr_t pc;
+ if (llvm::to_integer(x, pc, 16))
m_thread_pcs.push_back(pc);
- value.erase(0, comma_pos + 1);
}
- pc = StringConvert::ToUInt64(value.c_str(), LLDB_INVALID_ADDRESS, 16);
- if (pc != LLDB_INVALID_ADDRESS)
- m_thread_pcs.push_back(pc);
return m_thread_pcs.size();
}
@@ -1557,40 +1405,30 @@ bool ProcessGDBRemote::UpdateThreadIDList() {
// See if we can get the thread IDs from the current stop reply packets
// that might contain a "threads" key/value pair
- // Lock the thread stack while we access it
- // Mutex::Locker stop_stack_lock(m_last_stop_packet_mutex);
- std::unique_lock<std::recursive_mutex> stop_stack_lock(
- m_last_stop_packet_mutex, std::defer_lock);
- if (stop_stack_lock.try_lock()) {
- // Get the number of stop packets on the stack
- int nItems = m_stop_packet_stack.size();
- // Iterate over them
- for (int i = 0; i < nItems; i++) {
- // Get the thread stop info
- StringExtractorGDBRemote &stop_info = m_stop_packet_stack[i];
- const std::string &stop_info_str =
- std::string(stop_info.GetStringRef());
+ if (m_last_stop_packet) {
+ // Get the thread stop info
+ StringExtractorGDBRemote &stop_info = *m_last_stop_packet;
+ const std::string &stop_info_str = std::string(stop_info.GetStringRef());
- m_thread_pcs.clear();
- const size_t thread_pcs_pos = stop_info_str.find(";thread-pcs:");
- if (thread_pcs_pos != std::string::npos) {
- const size_t start = thread_pcs_pos + strlen(";thread-pcs:");
- const size_t end = stop_info_str.find(';', start);
- if (end != std::string::npos) {
- std::string value = stop_info_str.substr(start, end - start);
- UpdateThreadPCsFromStopReplyThreadsValue(value);
- }
+ m_thread_pcs.clear();
+ const size_t thread_pcs_pos = stop_info_str.find(";thread-pcs:");
+ if (thread_pcs_pos != std::string::npos) {
+ const size_t start = thread_pcs_pos + strlen(";thread-pcs:");
+ const size_t end = stop_info_str.find(';', start);
+ if (end != std::string::npos) {
+ std::string value = stop_info_str.substr(start, end - start);
+ UpdateThreadPCsFromStopReplyThreadsValue(value);
}
+ }
- const size_t threads_pos = stop_info_str.find(";threads:");
- if (threads_pos != std::string::npos) {
- const size_t start = threads_pos + strlen(";threads:");
- const size_t end = stop_info_str.find(';', start);
- if (end != std::string::npos) {
- std::string value = stop_info_str.substr(start, end - start);
- if (UpdateThreadIDsFromStopReplyThreadsValue(value))
- return true;
- }
+ const size_t threads_pos = stop_info_str.find(";threads:");
+ if (threads_pos != std::string::npos) {
+ const size_t start = threads_pos + strlen(";threads:");
+ const size_t end = stop_info_str.find(';', start);
+ if (end != std::string::npos) {
+ std::string value = stop_info_str.substr(start, end - start);
+ if (UpdateThreadIDsFromStopReplyThreadsValue(value))
+ return true;
}
}
}
@@ -1906,6 +1744,28 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
} else if (reason == "processor trace") {
thread_sp->SetStopInfo(StopInfo::CreateStopReasonProcessorTrace(
*thread_sp, description.c_str()));
+ } else if (reason == "fork") {
+ StringExtractor desc_extractor(description.c_str());
+ lldb::pid_t child_pid = desc_extractor.GetU64(
+ LLDB_INVALID_PROCESS_ID);
+ lldb::tid_t child_tid = desc_extractor.GetU64(
+ LLDB_INVALID_THREAD_ID);
+ thread_sp->SetStopInfo(StopInfo::CreateStopReasonFork(
+ *thread_sp, child_pid, child_tid));
+ handled = true;
+ } else if (reason == "vfork") {
+ StringExtractor desc_extractor(description.c_str());
+ lldb::pid_t child_pid = desc_extractor.GetU64(
+ LLDB_INVALID_PROCESS_ID);
+ lldb::tid_t child_tid = desc_extractor.GetU64(
+ LLDB_INVALID_THREAD_ID);
+ thread_sp->SetStopInfo(StopInfo::CreateStopReasonVFork(
+ *thread_sp, child_pid, child_tid));
+ handled = true;
+ } else if (reason == "vforkdone") {
+ thread_sp->SetStopInfo(
+ StopInfo::CreateStopReasonVForkDone(*thread_sp));
+ handled = true;
}
} else if (!signo) {
addr_t pc = thread_sp->GetRegisterContext()->GetPC();
@@ -2030,6 +1890,7 @@ ProcessGDBRemote::SetThreadStopInfo(StructuredData::Dictionary *thread_dict) {
// Iterate through all of the thread dictionary key/value pairs from the
// structured data dictionary
+ // FIXME: we're silently ignoring invalid data here
thread_dict->ForEach([this, &tid, &expedited_register_map, &thread_name,
&signo, &reason, &description, &exc_type, &exc_data,
&thread_dispatch_qaddr, &queue_vars_valid,
@@ -2094,9 +1955,8 @@ ProcessGDBRemote::SetThreadStopInfo(StructuredData::Dictionary *thread_dict) {
registers_dict->ForEach(
[&expedited_register_map](ConstString key,
StructuredData::Object *object) -> bool {
- const uint32_t reg =
- StringConvert::ToUInt32(key.GetCString(), UINT32_MAX, 10);
- if (reg != UINT32_MAX)
+ uint32_t reg;
+ if (llvm::to_integer(key.AsCString(), reg))
expedited_register_map[reg] =
std::string(object->GetStringValue());
return true; // Keep iterating through all array items
@@ -2312,6 +2172,21 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) {
ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
LLDB_LOG_ERROR(log, std::move(error), "Failed to load modules: {0}");
}
+ } else if (key.compare("fork") == 0 || key.compare("vfork") == 0) {
+ // fork includes child pid/tid in thread-id format
+ StringExtractorGDBRemote thread_id{value};
+ auto pid_tid = thread_id.GetPidTid(LLDB_INVALID_PROCESS_ID);
+ if (!pid_tid) {
+ Log *log(
+ ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
+ LLDB_LOG(log, "Invalid PID/TID to fork: {0}", value);
+ pid_tid = {{LLDB_INVALID_PROCESS_ID, LLDB_INVALID_THREAD_ID}};
+ }
+
+ reason = key.str();
+ StreamString ostr;
+ ostr.Printf("%" PRIu64 " %" PRIu64, pid_tid->first, pid_tid->second);
+ description = std::string(ostr.GetString());
} else if (key.size() == 2 && ::isxdigit(key[0]) && ::isxdigit(key[1])) {
uint32_t reg = UINT32_MAX;
if (!key.getAsInteger(16, reg))
@@ -2378,22 +2253,9 @@ void ProcessGDBRemote::RefreshStateAfterStop() {
// date before we do that or we might overwrite what was computed here.
UpdateThreadListIfNeeded();
- // Scope for the lock
- {
- // Lock the thread stack while we access it
- std::lock_guard<std::recursive_mutex> guard(m_last_stop_packet_mutex);
- // Get the number of stop packets on the stack
- int nItems = m_stop_packet_stack.size();
- // Iterate over them
- for (int i = 0; i < nItems; i++) {
- // Get the thread stop info
- StringExtractorGDBRemote stop_info = m_stop_packet_stack[i];
- // Process thread stop info
- SetThreadStopInfo(stop_info);
- }
- // Clear the thread stop stack
- m_stop_packet_stack.clear();
- }
+ if (m_last_stop_packet)
+ SetThreadStopInfo(*m_last_stop_packet);
+ m_last_stop_packet.reset();
// If we have queried for a default thread id
if (m_initial_tid != LLDB_INVALID_THREAD_ID) {
@@ -2476,9 +2338,9 @@ Status ProcessGDBRemote::DoDestroy() {
m_public_state.GetValue() != eStateRunning) {
PlatformSP platform_sp = GetTarget().GetPlatform();
- // FIXME: These should be ConstStrings so we aren't doing strcmp'ing.
if (platform_sp && platform_sp->GetName() &&
- platform_sp->GetName() == PlatformRemoteiOS::GetPluginNameStatic()) {
+ platform_sp->GetName().GetStringRef() ==
+ PlatformRemoteiOS::GetPluginNameStatic()) {
if (m_destroy_tried_resuming) {
if (log)
log->PutCString("ProcessGDBRemote::DoDestroy() - Tried resuming to "
@@ -2640,20 +2502,7 @@ void ProcessGDBRemote::SetLastStopPacket(
m_gdb_comm.ResetDiscoverableSettings(did_exec);
}
- // Scope the lock
- {
- // Lock the thread stack while we access it
- std::lock_guard<std::recursive_mutex> guard(m_last_stop_packet_mutex);
-
- // We are are not using non-stop mode, there can only be one last stop
- // reply packet, so clear the list.
- if (!GetTarget().GetNonStopModeEnabled())
- m_stop_packet_stack.clear();
-
- // Add this stop packet to the stop packet stack This stack will get popped
- // and examined when we switch to the Stopped state
- m_stop_packet_stack.push_back(response);
- }
+ m_last_stop_packet = response;
}
void ProcessGDBRemote::SetUnixSignals(const UnixSignalsSP &signals_sp) {
@@ -3048,8 +2897,8 @@ lldb::addr_t ProcessGDBRemote::DoAllocateMemory(size_t size,
return allocated_addr;
}
-Status ProcessGDBRemote::GetMemoryRegionInfo(addr_t load_addr,
- MemoryRegionInfo &region_info) {
+Status ProcessGDBRemote::DoGetMemoryRegionInfo(addr_t load_addr,
+ MemoryRegionInfo &region_info) {
Status error(m_gdb_comm.GetMemoryRegionInfo(load_addr, region_info));
return error;
@@ -3634,7 +3483,7 @@ void ProcessGDBRemote::DebuggerInitialize(Debugger &debugger) {
debugger, PluginProperties::GetSettingName())) {
const bool is_global_setting = true;
PluginManager::CreateSettingForProcessPlugin(
- debugger, GetGlobalPluginProperties()->GetValueProperties(),
+ debugger, GetGlobalPluginProperties().GetValueProperties(),
ConstString("Properties for the gdb-remote process plug-in."),
is_global_setting);
}
@@ -3770,88 +3619,72 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
process->SetPrivateState(eStateRunning);
StringExtractorGDBRemote response;
- // If in Non-Stop-Mode
- if (process->GetTarget().GetNonStopModeEnabled()) {
- // send the vCont packet
- if (!process->GetGDBRemote().SendvContPacket(
- llvm::StringRef(continue_cstr, continue_cstr_len),
- process->GetInterruptTimeout(), response)) {
- // Something went wrong
- done = true;
- break;
- }
- }
- // If in All-Stop-Mode
- else {
- StateType stop_state =
- process->GetGDBRemote().SendContinuePacketAndWaitForResponse(
- *process, *process->GetUnixSignals(),
- llvm::StringRef(continue_cstr, continue_cstr_len),
- process->GetInterruptTimeout(),
- response);
-
- // We need to immediately clear the thread ID list so we are sure
- // to get a valid list of threads. The thread ID list might be
- // contained within the "response", or the stop reply packet that
- // caused the stop. So clear it now before we give the stop reply
- // packet to the process using the
- // process->SetLastStopPacket()...
- process->ClearThreadIDList();
+ StateType stop_state =
+ process->GetGDBRemote().SendContinuePacketAndWaitForResponse(
+ *process, *process->GetUnixSignals(),
+ llvm::StringRef(continue_cstr, continue_cstr_len),
+ process->GetInterruptTimeout(), response);
+
+ // We need to immediately clear the thread ID list so we are sure
+ // to get a valid list of threads. The thread ID list might be
+ // contained within the "response", or the stop reply packet that
+ // caused the stop. So clear it now before we give the stop reply
+ // packet to the process using the
+ // process->SetLastStopPacket()...
+ process->ClearThreadIDList();
+
+ switch (stop_state) {
+ case eStateStopped:
+ case eStateCrashed:
+ case eStateSuspended:
+ process->SetLastStopPacket(response);
+ process->SetPrivateState(stop_state);
+ break;
- switch (stop_state) {
- case eStateStopped:
- case eStateCrashed:
- case eStateSuspended:
- process->SetLastStopPacket(response);
- process->SetPrivateState(stop_state);
- break;
-
- case eStateExited: {
- process->SetLastStopPacket(response);
- process->ClearThreadIDList();
- response.SetFilePos(1);
-
- int exit_status = response.GetHexU8();
- std::string desc_string;
- if (response.GetBytesLeft() > 0 &&
- response.GetChar('-') == ';') {
- llvm::StringRef desc_str;
- llvm::StringRef desc_token;
- while (response.GetNameColonValue(desc_token, desc_str)) {
- if (desc_token != "description")
- continue;
- StringExtractor extractor(desc_str);
- extractor.GetHexByteString(desc_string);
- }
+ case eStateExited: {
+ process->SetLastStopPacket(response);
+ process->ClearThreadIDList();
+ response.SetFilePos(1);
+
+ int exit_status = response.GetHexU8();
+ std::string desc_string;
+ if (response.GetBytesLeft() > 0 && response.GetChar('-') == ';') {
+ llvm::StringRef desc_str;
+ llvm::StringRef desc_token;
+ while (response.GetNameColonValue(desc_token, desc_str)) {
+ if (desc_token != "description")
+ continue;
+ StringExtractor extractor(desc_str);
+ extractor.GetHexByteString(desc_string);
}
- process->SetExitStatus(exit_status, desc_string.c_str());
- done = true;
- break;
}
- case eStateInvalid: {
- // Check to see if we were trying to attach and if we got back
- // the "E87" error code from debugserver -- this indicates that
- // the process is not debuggable. Return a slightly more
- // helpful error message about why the attach failed.
- if (::strstr(continue_cstr, "vAttach") != nullptr &&
- response.GetError() == 0x87) {
- process->SetExitStatus(-1, "cannot attach to process due to "
- "System Integrity Protection");
- } else if (::strstr(continue_cstr, "vAttach") != nullptr &&
- response.GetStatus().Fail()) {
- process->SetExitStatus(-1, response.GetStatus().AsCString());
- } else {
- process->SetExitStatus(-1, "lost connection");
- }
- done = true;
- break;
+ process->SetExitStatus(exit_status, desc_string.c_str());
+ done = true;
+ break;
+ }
+ case eStateInvalid: {
+ // Check to see if we were trying to attach and if we got back
+ // the "E87" error code from debugserver -- this indicates that
+ // the process is not debuggable. Return a slightly more
+ // helpful error message about why the attach failed.
+ if (::strstr(continue_cstr, "vAttach") != nullptr &&
+ response.GetError() == 0x87) {
+ process->SetExitStatus(-1, "cannot attach to process due to "
+ "System Integrity Protection");
+ } else if (::strstr(continue_cstr, "vAttach") != nullptr &&
+ response.GetStatus().Fail()) {
+ process->SetExitStatus(-1, response.GetStatus().AsCString());
+ } else {
+ process->SetExitStatus(-1, "lost connection");
}
+ done = true;
+ break;
+ }
- default:
- process->SetPrivateState(stop_state);
- break;
- } // switch(stop_state)
- } // else // if in All-stop-mode
+ default:
+ process->SetPrivateState(stop_state);
+ break;
+ } // switch(stop_state)
} // if (continue_packet)
} // case eBroadcastBitAsyncContinue
break;
@@ -4023,7 +3856,7 @@ bool ProcessGDBRemote::StopNoticingNewThreads() {
DynamicLoader *ProcessGDBRemote::GetDynamicLoader() {
if (m_dyld_up.get() == nullptr)
- m_dyld_up.reset(DynamicLoader::FindPlugin(this, nullptr));
+ m_dyld_up.reset(DynamicLoader::FindPlugin(this, ""));
return m_dyld_up.get();
}
@@ -4047,12 +3880,14 @@ Status ProcessGDBRemote::SendEventData(const char *data) {
DataExtractor ProcessGDBRemote::GetAuxvData() {
DataBufferSP buf;
if (m_gdb_comm.GetQXferAuxvReadSupported()) {
- std::string response_string;
- if (m_gdb_comm.SendPacketsAndConcatenateResponses("qXfer:auxv:read::",
- response_string) ==
- GDBRemoteCommunication::PacketResult::Success)
- buf = std::make_shared<DataBufferHeap>(response_string.c_str(),
- response_string.length());
+ llvm::Expected<std::string> response = m_gdb_comm.ReadExtFeature("auxv", "");
+ if (response)
+ buf = std::make_shared<DataBufferHeap>(response->c_str(),
+ response->length());
+ else
+ LLDB_LOG_ERROR(
+ ProcessGDBRemoteLog::GetLogIfAnyCategoryIsSet(GDBR_LOG_PROCESS),
+ response.takeError(), "{0}");
}
return DataExtractor(buf, GetByteOrder(), GetAddressByteSize());
}
@@ -4347,132 +4182,84 @@ struct GdbServerTargetInfo {
};
bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
- GDBRemoteDynamicRegisterInfo &dyn_reg_info, ABISP abi_sp,
- uint32_t &reg_num_remote, uint32_t &reg_num_local) {
+ std::vector<DynamicRegisterInfo::Register> &registers) {
if (!feature_node)
return false;
- uint32_t reg_offset = LLDB_INVALID_INDEX32;
feature_node.ForEachChildElementWithName(
- "reg", [&target_info, &dyn_reg_info, &reg_num_remote, &reg_num_local,
- &reg_offset, &abi_sp](const XMLNode &reg_node) -> bool {
+ "reg", [&target_info, &registers](const XMLNode &reg_node) -> bool {
std::string gdb_group;
std::string gdb_type;
- ConstString reg_name;
- ConstString alt_name;
- ConstString set_name;
- std::vector<uint32_t> value_regs;
- std::vector<uint32_t> invalidate_regs;
- std::vector<uint8_t> dwarf_opcode_bytes;
+ DynamicRegisterInfo::Register reg_info;
bool encoding_set = false;
bool format_set = false;
- RegisterInfo reg_info = {
- nullptr, // Name
- nullptr, // Alt name
- 0, // byte size
- reg_offset, // offset
- eEncodingUint, // encoding
- eFormatHex, // format
- {
- LLDB_INVALID_REGNUM, // eh_frame reg num
- LLDB_INVALID_REGNUM, // DWARF reg num
- LLDB_INVALID_REGNUM, // generic reg num
- reg_num_remote, // process plugin reg num
- reg_num_local // native register number
- },
- nullptr,
- nullptr,
- nullptr, // Dwarf Expression opcode bytes pointer
- 0 // Dwarf Expression opcode bytes length
- };
+ // FIXME: we're silently ignoring invalid data here
reg_node.ForEachAttribute([&target_info, &gdb_group, &gdb_type,
- &reg_name, &alt_name, &set_name, &value_regs,
- &invalidate_regs, &encoding_set, &format_set,
- &reg_info, &reg_offset, &dwarf_opcode_bytes](
+ &encoding_set, &format_set, &reg_info](
const llvm::StringRef &name,
const llvm::StringRef &value) -> bool {
if (name == "name") {
- reg_name.SetString(value);
+ reg_info.name.SetString(value);
} else if (name == "bitsize") {
- reg_info.byte_size =
- StringConvert::ToUInt32(value.data(), 0, 0) / CHAR_BIT;
+ if (llvm::to_integer(value, reg_info.byte_size))
+ reg_info.byte_size =
+ llvm::divideCeil(reg_info.byte_size, CHAR_BIT);
} else if (name == "type") {
gdb_type = value.str();
} else if (name == "group") {
gdb_group = value.str();
} else if (name == "regnum") {
- const uint32_t regnum =
- StringConvert::ToUInt32(value.data(), LLDB_INVALID_REGNUM, 0);
- if (regnum != LLDB_INVALID_REGNUM) {
- reg_info.kinds[eRegisterKindProcessPlugin] = regnum;
- }
+ llvm::to_integer(value, reg_info.regnum_remote);
} else if (name == "offset") {
- reg_offset = StringConvert::ToUInt32(value.data(), UINT32_MAX, 0);
+ llvm::to_integer(value, reg_info.byte_offset);
} else if (name == "altname") {
- alt_name.SetString(value);
+ reg_info.alt_name.SetString(value);
} else if (name == "encoding") {
encoding_set = true;
reg_info.encoding = Args::StringToEncoding(value, eEncodingUint);
} else if (name == "format") {
format_set = true;
- Format format = eFormatInvalid;
- if (OptionArgParser::ToFormat(value.data(), format, nullptr)
- .Success())
- reg_info.format = format;
- else if (value == "vector-sint8")
- reg_info.format = eFormatVectorOfSInt8;
- else if (value == "vector-uint8")
- reg_info.format = eFormatVectorOfUInt8;
- else if (value == "vector-sint16")
- reg_info.format = eFormatVectorOfSInt16;
- else if (value == "vector-uint16")
- reg_info.format = eFormatVectorOfUInt16;
- else if (value == "vector-sint32")
- reg_info.format = eFormatVectorOfSInt32;
- else if (value == "vector-uint32")
- reg_info.format = eFormatVectorOfUInt32;
- else if (value == "vector-float32")
- reg_info.format = eFormatVectorOfFloat32;
- else if (value == "vector-uint64")
- reg_info.format = eFormatVectorOfUInt64;
- else if (value == "vector-uint128")
- reg_info.format = eFormatVectorOfUInt128;
+ if (!OptionArgParser::ToFormat(value.data(), reg_info.format,
+ nullptr)
+ .Success())
+ reg_info.format =
+ llvm::StringSwitch<lldb::Format>(value)
+ .Case("vector-sint8", eFormatVectorOfSInt8)
+ .Case("vector-uint8", eFormatVectorOfUInt8)
+ .Case("vector-sint16", eFormatVectorOfSInt16)
+ .Case("vector-uint16", eFormatVectorOfUInt16)
+ .Case("vector-sint32", eFormatVectorOfSInt32)
+ .Case("vector-uint32", eFormatVectorOfUInt32)
+ .Case("vector-float32", eFormatVectorOfFloat32)
+ .Case("vector-uint64", eFormatVectorOfUInt64)
+ .Case("vector-uint128", eFormatVectorOfUInt128)
+ .Default(eFormatInvalid);
} else if (name == "group_id") {
- const uint32_t set_id =
- StringConvert::ToUInt32(value.data(), UINT32_MAX, 0);
+ uint32_t set_id = UINT32_MAX;
+ llvm::to_integer(value, set_id);
RegisterSetMap::const_iterator pos =
target_info.reg_set_map.find(set_id);
if (pos != target_info.reg_set_map.end())
- set_name = pos->second.name;
+ reg_info.set_name = pos->second.name;
} else if (name == "gcc_regnum" || name == "ehframe_regnum") {
- reg_info.kinds[eRegisterKindEHFrame] =
- StringConvert::ToUInt32(value.data(), LLDB_INVALID_REGNUM, 0);
+ llvm::to_integer(value, reg_info.regnum_ehframe);
} else if (name == "dwarf_regnum") {
- reg_info.kinds[eRegisterKindDWARF] =
- StringConvert::ToUInt32(value.data(), LLDB_INVALID_REGNUM, 0);
+ llvm::to_integer(value, reg_info.regnum_dwarf);
} else if (name == "generic") {
- reg_info.kinds[eRegisterKindGeneric] =
- Args::StringToGenericRegister(value);
+ reg_info.regnum_generic = Args::StringToGenericRegister(value);
} else if (name == "value_regnums") {
- SplitCommaSeparatedRegisterNumberString(value, value_regs, 0);
+ SplitCommaSeparatedRegisterNumberString(value, reg_info.value_regs,
+ 0);
} else if (name == "invalidate_regnums") {
- SplitCommaSeparatedRegisterNumberString(value, invalidate_regs, 0);
- } else if (name == "dynamic_size_dwarf_expr_bytes") {
- std::string opcode_string = value.str();
- size_t dwarf_opcode_len = opcode_string.length() / 2;
- assert(dwarf_opcode_len > 0);
-
- dwarf_opcode_bytes.resize(dwarf_opcode_len);
- reg_info.dynamic_size_dwarf_len = dwarf_opcode_len;
- StringExtractor opcode_extractor(opcode_string);
- uint32_t ret_val =
- opcode_extractor.GetHexBytesAvail(dwarf_opcode_bytes);
- assert(dwarf_opcode_len == ret_val);
- UNUSED_IF_ASSERT_DISABLED(ret_val);
- reg_info.dynamic_size_dwarf_expr_bytes = dwarf_opcode_bytes.data();
+ SplitCommaSeparatedRegisterNumberString(
+ value, reg_info.invalidate_regs, 0);
} else {
- printf("unhandled attribute %s = %s\n", name.data(), value.data());
+ Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(
+ GDBR_LOG_PROCESS));
+ LLDB_LOGF(log,
+ "ProcessGDBRemote::%s unhandled reg attribute %s = %s",
+ __FUNCTION__, name.data(), value.data());
}
return true; // Keep iterating through all attributes
});
@@ -4484,43 +4271,40 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
} else if (gdb_type == "data_ptr" || gdb_type == "code_ptr") {
reg_info.format = eFormatAddressInfo;
reg_info.encoding = eEncodingUint;
- } else if (gdb_type == "i387_ext" || gdb_type == "float") {
+ } else if (gdb_type == "float") {
reg_info.format = eFormatFloat;
reg_info.encoding = eEncodingIEEE754;
+ } else if (gdb_type == "aarch64v" ||
+ llvm::StringRef(gdb_type).startswith("vec") ||
+ gdb_type == "i387_ext" || gdb_type == "uint128") {
+ // lldb doesn't handle 128-bit uints correctly (for ymm*h), so treat
+ // them as vector (similarly to xmm/ymm)
+ reg_info.format = eFormatVectorOfUInt8;
+ reg_info.encoding = eEncodingVector;
}
}
// Only update the register set name if we didn't get a "reg_set"
// attribute. "set_name" will be empty if we didn't have a "reg_set"
// attribute.
- if (!set_name) {
+ if (!reg_info.set_name) {
if (!gdb_group.empty()) {
- set_name.SetCString(gdb_group.c_str());
+ reg_info.set_name.SetCString(gdb_group.c_str());
} else {
// If no register group name provided anywhere,
// we'll create a 'general' register set
- set_name.SetCString("general");
+ reg_info.set_name.SetCString("general");
}
}
- reg_info.byte_offset = reg_offset;
- assert(reg_info.byte_size != 0);
- reg_offset = LLDB_INVALID_INDEX32;
- if (!value_regs.empty()) {
- value_regs.push_back(LLDB_INVALID_REGNUM);
- reg_info.value_regs = value_regs.data();
- }
- if (!invalidate_regs.empty()) {
- invalidate_regs.push_back(LLDB_INVALID_REGNUM);
- reg_info.invalidate_regs = invalidate_regs.data();
- }
-
- reg_num_remote = reg_info.kinds[eRegisterKindProcessPlugin] + 1;
- ++reg_num_local;
- reg_info.name = reg_name.AsCString();
- if (abi_sp)
- abi_sp->AugmentRegisterInfo(reg_info);
- dyn_reg_info.AddRegister(reg_info, reg_name, alt_name, set_name);
+ if (reg_info.byte_size == 0) {
+ Log *log(
+ ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
+ LLDB_LOGF(log,
+ "ProcessGDBRemote::%s Skipping zero bitsize register %s",
+ __FUNCTION__, reg_info.name.AsCString());
+ } else
+ registers.push_back(reg_info);
return true; // Keep iterating through all "reg" elements
});
@@ -4535,20 +4319,17 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
// for nested register definition files. It returns true if it was able
// to fetch and parse an xml file.
bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
- ArchSpec &arch_to_use, std::string xml_filename, uint32_t &reg_num_remote,
- uint32_t &reg_num_local) {
+ ArchSpec &arch_to_use, std::string xml_filename,
+ std::vector<DynamicRegisterInfo::Register> &registers) {
// request the target xml file
- std::string raw;
- lldb_private::Status lldberr;
- if (!m_gdb_comm.ReadExtFeature(ConstString("features"),
- ConstString(xml_filename.c_str()), raw,
- lldberr)) {
+ llvm::Expected<std::string> raw = m_gdb_comm.ReadExtFeature("features", xml_filename);
+ if (errorToBool(raw.takeError()))
return false;
- }
XMLDocument xml_document;
- if (xml_document.ParseMemory(raw.c_str(), raw.size(), xml_filename.c_str())) {
+ if (xml_document.ParseMemory(raw->c_str(), raw->size(),
+ xml_filename.c_str())) {
GdbServerTargetInfo target_info;
std::vector<XMLNode> feature_nodes;
@@ -4577,9 +4358,9 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
node.ForEachAttribute(
[&set_id, &set_info](const llvm::StringRef &name,
const llvm::StringRef &value) -> bool {
+ // FIXME: we're silently ignoring invalid data here
if (name == "id")
- set_id = StringConvert::ToUInt32(value.data(),
- UINT32_MAX, 0);
+ llvm::to_integer(value, set_id);
if (name == "name")
set_info.name = ConstString(value);
return true; // Keep iterating through all attributes
@@ -4613,39 +4394,33 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
}
}
- // If the target.xml includes an architecture entry like
+ // gdbserver does not implement the LLDB packets used to determine host
+ // or process architecture. If that is the case, attempt to use
+ // the <architecture/> field from target.xml, e.g.:
+ //
// <architecture>i386:x86-64</architecture> (seen from VMWare ESXi)
- // <architecture>arm</architecture> (seen from Segger JLink on unspecified arm board)
- // use that if we don't have anything better.
+ // <architecture>arm</architecture> (seen from Segger JLink on unspecified
+ // arm board)
if (!arch_to_use.IsValid() && !target_info.arch.empty()) {
- if (target_info.arch == "i386:x86-64") {
- // We don't have any information about vendor or OS.
- arch_to_use.SetTriple("x86_64--");
- GetTarget().MergeArchitecture(arch_to_use);
- }
+ // We don't have any information about vendor or OS.
+ arch_to_use.SetTriple(llvm::StringSwitch<std::string>(target_info.arch)
+ .Case("i386:x86-64", "x86_64")
+ .Default(target_info.arch) +
+ "--");
- // SEGGER J-Link jtag boards send this very-generic arch name,
- // we'll need to use this if we have absolutely nothing better
- // to work with or the register definitions won't be accepted.
- if (target_info.arch == "arm") {
- arch_to_use.SetTriple("arm--");
+ if (arch_to_use.IsValid())
GetTarget().MergeArchitecture(arch_to_use);
- }
}
if (arch_to_use.IsValid()) {
- // Don't use Process::GetABI, this code gets called from DidAttach, and
- // in that context we haven't set the Target's architecture yet, so the
- // ABI is also potentially incorrect.
- ABISP abi_to_use_sp = ABI::FindPlugin(shared_from_this(), arch_to_use);
for (auto &feature_node : feature_nodes) {
- ParseRegisters(feature_node, target_info, *this->m_register_info_sp,
- abi_to_use_sp, reg_num_remote, reg_num_local);
+ ParseRegisters(feature_node, target_info,
+ registers);
}
for (const auto &include : target_info.includes) {
GetGDBServerRegisterInfoXMLAndProcess(arch_to_use, include,
- reg_num_remote, reg_num_local);
+ registers);
}
}
} else {
@@ -4654,6 +4429,46 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfoXMLAndProcess(
return true;
}
+void ProcessGDBRemote::AddRemoteRegisters(
+ std::vector<DynamicRegisterInfo::Register> &registers,
+ const ArchSpec &arch_to_use) {
+ std::map<uint32_t, uint32_t> remote_to_local_map;
+ uint32_t remote_regnum = 0;
+ for (auto it : llvm::enumerate(registers)) {
+ DynamicRegisterInfo::Register &remote_reg_info = it.value();
+
+ // Assign successive remote regnums if missing.
+ if (remote_reg_info.regnum_remote == LLDB_INVALID_REGNUM)
+ remote_reg_info.regnum_remote = remote_regnum;
+
+ // Create a mapping from remote to local regnos.
+ remote_to_local_map[remote_reg_info.regnum_remote] = it.index();
+
+ remote_regnum = remote_reg_info.regnum_remote + 1;
+ }
+
+ for (DynamicRegisterInfo::Register &remote_reg_info : registers) {
+ auto proc_to_lldb = [&remote_to_local_map](uint32_t process_regnum) {
+ auto lldb_regit = remote_to_local_map.find(process_regnum);
+ return lldb_regit != remote_to_local_map.end() ? lldb_regit->second
+ : LLDB_INVALID_REGNUM;
+ };
+
+ llvm::transform(remote_reg_info.value_regs,
+ remote_reg_info.value_regs.begin(), proc_to_lldb);
+ llvm::transform(remote_reg_info.invalidate_regs,
+ remote_reg_info.invalidate_regs.begin(), proc_to_lldb);
+ }
+
+ // Don't use Process::GetABI, this code gets called from DidAttach, and
+ // in that context we haven't set the Target's architecture yet, so the
+ // ABI is also potentially incorrect.
+ if (ABISP abi_sp = ABI::FindPlugin(shared_from_this(), arch_to_use))
+ abi_sp->AugmentRegisterInfo(registers);
+
+ m_register_info_sp->SetRegisterInfo(std::move(registers), arch_to_use);
+}
+
// query the target of gdb-remote for extended target information returns
// true on success (got register definitions), false on failure (did not).
bool ProcessGDBRemote::GetGDBServerRegisterInfo(ArchSpec &arch_to_use) {
@@ -4665,11 +4480,10 @@ bool ProcessGDBRemote::GetGDBServerRegisterInfo(ArchSpec &arch_to_use) {
if (!m_gdb_comm.GetQXferFeaturesReadSupported())
return false;
- uint32_t reg_num_remote = 0;
- uint32_t reg_num_local = 0;
+ std::vector<DynamicRegisterInfo::Register> registers;
if (GetGDBServerRegisterInfoXMLAndProcess(arch_to_use, "target.xml",
- reg_num_remote, reg_num_local))
- this->m_register_info_sp->Finalize(arch_to_use);
+ registers))
+ AddRemoteRegisters(registers, arch_to_use);
return m_register_info_sp->GetNumRegisters() > 0;
}
@@ -4685,24 +4499,20 @@ llvm::Expected<LoadedModuleInfoList> ProcessGDBRemote::GetLoadedModuleList() {
LoadedModuleInfoList list;
GDBRemoteCommunicationClient &comm = m_gdb_comm;
- bool can_use_svr4 = GetGlobalPluginProperties()->GetUseSVR4();
+ bool can_use_svr4 = GetGlobalPluginProperties().GetUseSVR4();
// check that we have extended feature read support
if (can_use_svr4 && comm.GetQXferLibrariesSVR4ReadSupported()) {
// request the loaded library list
- std::string raw;
- lldb_private::Status lldberr;
-
- if (!comm.ReadExtFeature(ConstString("libraries-svr4"), ConstString(""),
- raw, lldberr))
- return llvm::createStringError(llvm::inconvertibleErrorCode(),
- "Error in libraries-svr4 packet");
+ llvm::Expected<std::string> raw = comm.ReadExtFeature("libraries-svr4", "");
+ if (!raw)
+ return raw.takeError();
// parse the xml file in memory
- LLDB_LOGF(log, "parsing: %s", raw.c_str());
+ LLDB_LOGF(log, "parsing: %s", raw->c_str());
XMLDocument doc;
- if (!doc.ParseMemory(raw.c_str(), raw.size(), "noname.xml"))
+ if (!doc.ParseMemory(raw->c_str(), raw->size(), "noname.xml"))
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"Error reading noname.xml");
@@ -4714,38 +4524,37 @@ llvm::Expected<LoadedModuleInfoList> ProcessGDBRemote::GetLoadedModuleList() {
// main link map structure
llvm::StringRef main_lm = root_element.GetAttributeValue("main-lm");
- if (!main_lm.empty()) {
- list.m_link_map =
- StringConvert::ToUInt64(main_lm.data(), LLDB_INVALID_ADDRESS, 0);
- }
+ // FIXME: we're silently ignoring invalid data here
+ if (!main_lm.empty())
+ llvm::to_integer(main_lm, list.m_link_map);
root_element.ForEachChildElementWithName(
"library", [log, &list](const XMLNode &library) -> bool {
-
LoadedModuleInfoList::LoadedModuleInfo module;
+ // FIXME: we're silently ignoring invalid data here
library.ForEachAttribute(
[&module](const llvm::StringRef &name,
const llvm::StringRef &value) -> bool {
-
+ uint64_t uint_value = LLDB_INVALID_ADDRESS;
if (name == "name")
module.set_name(value.str());
else if (name == "lm") {
// the address of the link_map struct.
- module.set_link_map(StringConvert::ToUInt64(
- value.data(), LLDB_INVALID_ADDRESS, 0));
+ llvm::to_integer(value, uint_value);
+ module.set_link_map(uint_value);
} else if (name == "l_addr") {
// the displacement as read from the field 'l_addr' of the
// link_map struct.
- module.set_base(StringConvert::ToUInt64(
- value.data(), LLDB_INVALID_ADDRESS, 0));
+ llvm::to_integer(value, uint_value);
+ module.set_base(uint_value);
// base address is always a displacement, not an absolute
// value.
module.set_base_is_offset(true);
} else if (name == "l_ld") {
// the memory address of the libraries PT_DYNAMIC section.
- module.set_dynamic(StringConvert::ToUInt64(
- value.data(), LLDB_INVALID_ADDRESS, 0));
+ llvm::to_integer(value, uint_value);
+ module.set_dynamic(uint_value);
}
return true; // Keep iterating over all properties of "library"
@@ -4780,18 +4589,15 @@ llvm::Expected<LoadedModuleInfoList> ProcessGDBRemote::GetLoadedModuleList() {
return list;
} else if (comm.GetQXferLibrariesReadSupported()) {
// request the loaded library list
- std::string raw;
- lldb_private::Status lldberr;
+ llvm::Expected<std::string> raw = comm.ReadExtFeature("libraries", "");
- if (!comm.ReadExtFeature(ConstString("libraries"), ConstString(""), raw,
- lldberr))
- return llvm::createStringError(llvm::inconvertibleErrorCode(),
- "Error in libraries packet");
+ if (!raw)
+ return raw.takeError();
- LLDB_LOGF(log, "parsing: %s", raw.c_str());
+ LLDB_LOGF(log, "parsing: %s", raw->c_str());
XMLDocument doc;
- if (!doc.ParseMemory(raw.c_str(), raw.size(), "noname.xml"))
+ if (!doc.ParseMemory(raw->c_str(), raw->size(), "noname.xml"))
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"Error reading noname.xml");
@@ -4800,6 +4606,7 @@ llvm::Expected<LoadedModuleInfoList> ProcessGDBRemote::GetLoadedModuleList() {
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"Error finding library-list xml element");
+ // FIXME: we're silently ignoring invalid data here
root_element.ForEachChildElementWithName(
"library", [log, &list](const XMLNode &library) -> bool {
LoadedModuleInfoList::LoadedModuleInfo module;
@@ -4813,8 +4620,9 @@ llvm::Expected<LoadedModuleInfoList> ProcessGDBRemote::GetLoadedModuleList() {
const XMLNode &section =
library.FindFirstChildElementWithName("section");
llvm::StringRef address = section.GetAttributeValue("address");
- module.set_base(
- StringConvert::ToUInt64(address.data(), LLDB_INVALID_ADDRESS, 0));
+ uint64_t address_value = LLDB_INVALID_ADDRESS;
+ llvm::to_integer(address, address_value);
+ module.set_base(address_value);
// These addresses are absolute values.
module.set_base_is_offset(false);
@@ -5109,6 +4917,56 @@ void ProcessGDBRemote::HandleStopReply() {
BuildDynamicRegisterInfo(true);
}
+llvm::Expected<bool> ProcessGDBRemote::SaveCore(llvm::StringRef outfile) {
+ if (!m_gdb_comm.GetSaveCoreSupported())
+ return false;
+
+ StreamString packet;
+ packet.PutCString("qSaveCore;path-hint:");
+ packet.PutStringAsRawHex8(outfile);
+
+ StringExtractorGDBRemote response;
+ if (m_gdb_comm.SendPacketAndWaitForResponse(packet.GetString(), response) ==
+ GDBRemoteCommunication::PacketResult::Success) {
+ // TODO: grab error message from the packet? StringExtractor seems to
+ // be missing a method for that
+ if (response.IsErrorResponse())
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ llvm::formatv("qSaveCore returned an error"));
+
+ std::string path;
+
+ // process the response
+ for (auto x : llvm::split(response.GetStringRef(), ';')) {
+ if (x.consume_front("core-path:"))
+ StringExtractor(x).GetHexByteString(path);
+ }
+
+ // verify that we've gotten what we need
+ if (path.empty())
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "qSaveCore returned no core path");
+
+ // now transfer the core file
+ FileSpec remote_core{llvm::StringRef(path)};
+ Platform &platform = *GetTarget().GetPlatform();
+ Status error = platform.GetFile(remote_core, FileSpec(outfile));
+
+ if (platform.IsRemote()) {
+ // NB: we unlink the file on error too
+ platform.Unlink(remote_core);
+ if (error.Fail())
+ return error.ToError();
+ }
+
+ return true;
+ }
+
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "Unable to send qSaveCore");
+}
+
static const char *const s_async_json_packet_prefix = "JSON-async:";
static StructuredData::ObjectSP
@@ -5447,3 +5305,171 @@ CommandObject *ProcessGDBRemote::GetPluginCommandObject() {
GetTarget().GetDebugger().GetCommandInterpreter());
return m_command_sp.get();
}
+
+void ProcessGDBRemote::DidForkSwitchSoftwareBreakpoints(bool enable) {
+ GetBreakpointSiteList().ForEach([this, enable](BreakpointSite *bp_site) {
+ if (bp_site->IsEnabled() &&
+ (bp_site->GetType() == BreakpointSite::eSoftware ||
+ bp_site->GetType() == BreakpointSite::eExternal)) {
+ m_gdb_comm.SendGDBStoppointTypePacket(
+ eBreakpointSoftware, enable, bp_site->GetLoadAddress(),
+ GetSoftwareBreakpointTrapOpcode(bp_site), GetInterruptTimeout());
+ }
+ });
+}
+
+void ProcessGDBRemote::DidForkSwitchHardwareTraps(bool enable) {
+ if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointHardware)) {
+ GetBreakpointSiteList().ForEach([this, enable](BreakpointSite *bp_site) {
+ if (bp_site->IsEnabled() &&
+ bp_site->GetType() == BreakpointSite::eHardware) {
+ m_gdb_comm.SendGDBStoppointTypePacket(
+ eBreakpointHardware, enable, bp_site->GetLoadAddress(),
+ GetSoftwareBreakpointTrapOpcode(bp_site), GetInterruptTimeout());
+ }
+ });
+ }
+
+ WatchpointList &wps = GetTarget().GetWatchpointList();
+ size_t wp_count = wps.GetSize();
+ for (size_t i = 0; i < wp_count; ++i) {
+ WatchpointSP wp = wps.GetByIndex(i);
+ if (wp->IsEnabled()) {
+ GDBStoppointType type = GetGDBStoppointType(wp.get());
+ m_gdb_comm.SendGDBStoppointTypePacket(type, enable, wp->GetLoadAddress(),
+ wp->GetByteSize(),
+ GetInterruptTimeout());
+ }
+ }
+}
+
+void ProcessGDBRemote::DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {
+ Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
+
+ lldb::pid_t parent_pid = m_gdb_comm.GetCurrentProcessID();
+ // Any valid TID will suffice, thread-relevant actions will set a proper TID
+ // anyway.
+ lldb::tid_t parent_tid = m_thread_ids.front();
+
+ lldb::pid_t follow_pid, detach_pid;
+ lldb::tid_t follow_tid, detach_tid;
+
+ switch (GetFollowForkMode()) {
+ case eFollowParent:
+ follow_pid = parent_pid;
+ follow_tid = parent_tid;
+ detach_pid = child_pid;
+ detach_tid = child_tid;
+ break;
+ case eFollowChild:
+ follow_pid = child_pid;
+ follow_tid = child_tid;
+ detach_pid = parent_pid;
+ detach_tid = parent_tid;
+ break;
+ }
+
+ // Switch to the process that is going to be detached.
+ if (!m_gdb_comm.SetCurrentThread(detach_tid, detach_pid)) {
+ LLDB_LOG(log, "ProcessGDBRemote::DidFork() unable to set pid/tid");
+ return;
+ }
+
+ // Disable all software breakpoints in the forked process.
+ if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointSoftware))
+ DidForkSwitchSoftwareBreakpoints(false);
+
+ // Remove hardware breakpoints / watchpoints from parent process if we're
+ // following child.
+ if (GetFollowForkMode() == eFollowChild)
+ DidForkSwitchHardwareTraps(false);
+
+ // Switch to the process that is going to be followed
+ if (!m_gdb_comm.SetCurrentThread(follow_tid, follow_pid) ||
+ !m_gdb_comm.SetCurrentThreadForRun(follow_tid, follow_pid)) {
+ LLDB_LOG(log, "ProcessGDBRemote::DidFork() unable to reset pid/tid");
+ return;
+ }
+
+ LLDB_LOG(log, "Detaching process {0}", detach_pid);
+ Status error = m_gdb_comm.Detach(false, detach_pid);
+ if (error.Fail()) {
+ LLDB_LOG(log, "ProcessGDBRemote::DidFork() detach packet send failed: {0}",
+ error.AsCString() ? error.AsCString() : "<unknown error>");
+ return;
+ }
+
+ // Hardware breakpoints/watchpoints are not inherited implicitly,
+ // so we need to readd them if we're following child.
+ if (GetFollowForkMode() == eFollowChild)
+ DidForkSwitchHardwareTraps(true);
+}
+
+void ProcessGDBRemote::DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) {
+ Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
+
+ assert(!m_vfork_in_progress);
+ m_vfork_in_progress = true;
+
+ // Disable all software breakpoints for the duration of vfork.
+ if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointSoftware))
+ DidForkSwitchSoftwareBreakpoints(false);
+
+ lldb::pid_t detach_pid;
+ lldb::tid_t detach_tid;
+
+ switch (GetFollowForkMode()) {
+ case eFollowParent:
+ detach_pid = child_pid;
+ detach_tid = child_tid;
+ break;
+ case eFollowChild:
+ detach_pid = m_gdb_comm.GetCurrentProcessID();
+ // Any valid TID will suffice, thread-relevant actions will set a proper TID
+ // anyway.
+ detach_tid = m_thread_ids.front();
+
+ // Switch to the parent process before detaching it.
+ if (!m_gdb_comm.SetCurrentThread(detach_tid, detach_pid)) {
+ LLDB_LOG(log, "ProcessGDBRemote::DidFork() unable to set pid/tid");
+ return;
+ }
+
+ // Remove hardware breakpoints / watchpoints from the parent process.
+ DidForkSwitchHardwareTraps(false);
+
+ // Switch to the child process.
+ if (!m_gdb_comm.SetCurrentThread(child_tid, child_pid) ||
+ !m_gdb_comm.SetCurrentThreadForRun(child_tid, child_pid)) {
+ LLDB_LOG(log, "ProcessGDBRemote::DidFork() unable to reset pid/tid");
+ return;
+ }
+ break;
+ }
+
+ LLDB_LOG(log, "Detaching process {0}", detach_pid);
+ Status error = m_gdb_comm.Detach(false, detach_pid);
+ if (error.Fail()) {
+ LLDB_LOG(log,
+ "ProcessGDBRemote::DidFork() detach packet send failed: {0}",
+ error.AsCString() ? error.AsCString() : "<unknown error>");
+ return;
+ }
+}
+
+void ProcessGDBRemote::DidVForkDone() {
+ assert(m_vfork_in_progress);
+ m_vfork_in_progress = false;
+
+ // Reenable all software breakpoints that were enabled before vfork.
+ if (m_gdb_comm.SupportsGDBStoppointPacket(eBreakpointSoftware))
+ DidForkSwitchSoftwareBreakpoints(true);
+}
+
+void ProcessGDBRemote::DidExec() {
+ // If we are following children, vfork is finished by exec (rather than
+ // vforkdone that is submitted for parent).
+ if (GetFollowForkMode() == eFollowChild)
+ m_vfork_in_progress = false;
+ Process::DidExec();
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
index fe04cdddd0f5..8134bc6b530d 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
@@ -19,6 +19,7 @@
#include "lldb/Core/ModuleSpec.h"
#include "lldb/Core/ThreadSafeValue.h"
#include "lldb/Host/HostThread.h"
+#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Thread.h"
#include "lldb/Utility/ArchSpec.h"
@@ -64,9 +65,9 @@ public:
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "gdb-remote"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static std::chrono::seconds GetPacketTimeout();
@@ -102,9 +103,7 @@ public:
void DidAttach(ArchSpec &process_arch) override;
// PluginInterface protocol
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
// Process Control
Status WillResume() override;
@@ -145,9 +144,6 @@ public:
lldb::addr_t DoAllocateMemory(size_t size, uint32_t permissions,
Status &error) override;
- Status GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region_info) override;
-
Status DoDeallocateMemory(lldb::addr_t ptr) override;
// Process STDIO
@@ -230,6 +226,13 @@ public:
std::string HarmonizeThreadIdsForProfileData(
StringExtractorGDBRemote &inputStringExtractor);
+ void DidFork(lldb::pid_t child_pid, lldb::tid_t child_tid) override;
+ void DidVFork(lldb::pid_t child_pid, lldb::tid_t child_tid) override;
+ void DidVForkDone() override;
+ void DidExec() override;
+
+ llvm::Expected<bool> SaveCore(llvm::StringRef outfile) override;
+
protected:
friend class ThreadGDBRemote;
friend class GDBRemoteCommunicationClient;
@@ -247,11 +250,10 @@ protected:
GDBRemoteCommunicationClient m_gdb_comm;
GDBRemoteCommunicationReplayServer m_gdb_replay_server;
std::atomic<lldb::pid_t> m_debugserver_pid;
- std::vector<StringExtractorGDBRemote> m_stop_packet_stack; // The stop packet
- // stack replaces
- // the last stop
- // packet variable
+
+ llvm::Optional<StringExtractorGDBRemote> m_last_stop_packet;
std::recursive_mutex m_last_stop_packet_mutex;
+
GDBRemoteDynamicRegisterInfoSP m_register_info_sp;
Broadcaster m_async_broadcaster;
lldb::ListenerSP m_async_listener_sp;
@@ -293,6 +295,8 @@ protected:
using FlashRange = FlashRangeVector::Entry;
FlashRangeVector m_erased_flash_ranges;
+ bool m_vfork_in_progress;
+
// Accessors
bool IsRunning(lldb::StateType state) {
return state == lldb::eStateRunning || IsStepping(state);
@@ -335,7 +339,7 @@ protected:
bool CalculateThreadStopInfo(ThreadGDBRemote *thread);
- size_t UpdateThreadPCsFromStopReplyThreadsValue(std::string &value);
+ size_t UpdateThreadPCsFromStopReplyThreadsValue(llvm::StringRef value);
size_t UpdateThreadIDsFromStopReplyThreadsValue(llvm::StringRef value);
@@ -387,11 +391,14 @@ protected:
DynamicLoader *GetDynamicLoader() override;
- bool GetGDBServerRegisterInfoXMLAndProcess(ArchSpec &arch_to_use,
- std::string xml_filename,
- uint32_t &cur_reg_remote,
- uint32_t &cur_reg_local);
+ bool GetGDBServerRegisterInfoXMLAndProcess(
+ ArchSpec &arch_to_use, std::string xml_filename,
+ std::vector<DynamicRegisterInfo::Register> &registers);
+ // Convert DynamicRegisterInfo::Registers into RegisterInfos and add
+ // to the dynamic register list.
+ void AddRemoteRegisters(std::vector<DynamicRegisterInfo::Register> &registers,
+ const ArchSpec &arch_to_use);
// Query remote GDBServer for register information
bool GetGDBServerRegisterInfo(ArchSpec &arch);
@@ -414,6 +421,9 @@ protected:
Status DoWriteMemoryTags(lldb::addr_t addr, size_t len, int32_t type,
const std::vector<uint8_t> &tags) override;
+ Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region_info) override;
+
private:
// For ProcessGDBRemote only
std::string m_partial_profile_data;
@@ -459,6 +469,10 @@ private:
ProcessGDBRemote(const ProcessGDBRemote &) = delete;
const ProcessGDBRemote &operator=(const ProcessGDBRemote &) = delete;
+
+ // fork helpers
+ void DidForkSwitchSoftwareBreakpoints(bool enable);
+ void DidForkSwitchHardwareTraps(bool enable);
};
} // namespace process_gdb_remote
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index 385557422758..736cfa070088 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -63,8 +63,9 @@ public:
static ConstString GetStaticPluginName() {
return ConstString("placeholder");
}
- ConstString GetPluginName() override { return GetStaticPluginName(); }
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override {
+ return GetStaticPluginName().GetStringRef();
+ }
bool ParseHeader() override { return true; }
Type CalculateType() override { return eTypeUnknown; }
Strata CalculateStrata() override { return eStrataUnknown; }
@@ -189,12 +190,7 @@ void HashElfTextSection(ModuleSP module_sp, std::vector<uint8_t> &breakpad_uuid,
} // namespace
-ConstString ProcessMinidump::GetPluginNameStatic() {
- static ConstString g_name("minidump");
- return g_name;
-}
-
-const char *ProcessMinidump::GetPluginDescriptionStatic() {
+llvm::StringRef ProcessMinidump::GetPluginDescriptionStatic() {
return "Minidump plug-in.";
}
@@ -305,10 +301,6 @@ Status ProcessMinidump::DoLoadCore() {
return error;
}
-ConstString ProcessMinidump::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t ProcessMinidump::GetPluginVersion() { return 1; }
-
Status ProcessMinidump::DoDestroy() { return Status(); }
void ProcessMinidump::RefreshStateAfterStop() {
@@ -447,8 +439,8 @@ void ProcessMinidump::BuildMemoryRegions() {
llvm::sort(*m_memory_regions);
}
-Status ProcessMinidump::GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region) {
+Status ProcessMinidump::DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region) {
BuildMemoryRegions();
region = MinidumpParser::GetMemoryRegionInfo(*m_memory_regions, load_addr);
return Status();
@@ -584,8 +576,9 @@ void ProcessMinidump::ReadModuleList() {
// we don't then we will end up setting the load address of a different
// PlaceholderObjectFile and an assertion will fire.
auto *objfile = module_sp->GetObjectFile();
- if (objfile && objfile->GetPluginName() ==
- PlaceholderObjectFile::GetStaticPluginName()) {
+ if (objfile &&
+ objfile->GetPluginName() ==
+ PlaceholderObjectFile::GetStaticPluginName().GetStringRef()) {
if (((PlaceholderObjectFile *)objfile)->GetBaseImageAddress() !=
load_addr)
module_sp.reset();
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
index 27b0da0047a5..5360269199cd 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
@@ -37,9 +37,9 @@ public:
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "minidump"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
ProcessMinidump(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp,
const FileSpec &core_file, lldb::DataBufferSP code_data);
@@ -55,9 +55,7 @@ public:
DynamicLoader *GetDynamicLoader() override { return nullptr; }
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
SystemRuntime *GetSystemRuntime() override { return nullptr; }
@@ -77,9 +75,6 @@ public:
ArchSpec GetArchitecture();
- Status GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) override;
-
Status GetMemoryRegions(
lldb_private::MemoryRegionInfos &region_list) override;
@@ -87,9 +82,8 @@ public:
Status WillResume() override {
Status error;
- error.SetErrorStringWithFormat(
- "error: %s does not support resuming processes",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "error: {0} does not support resuming processes", GetPluginName());
return error;
}
@@ -101,6 +95,9 @@ protected:
bool DoUpdateThreadList(ThreadList &old_thread_list,
ThreadList &new_thread_list) override;
+ Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) override;
+
void ReadModuleList();
lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid,
diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
index 7e309e8322a8..7184dbacb08d 100644
--- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
+++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
@@ -30,36 +30,35 @@ using namespace minidump;
#define DEF_R(i) \
{ \
"r" #i, nullptr, 4, OFFSET(r) + i * 4, eEncodingUint, eFormatHex, \
- {ehframe_r##i, dwarf_r##i, INV, INV, reg_r##i}, \
- nullptr, nullptr, nullptr, 0 \
+ {ehframe_r##i, dwarf_r##i, INV, INV, reg_r##i}, nullptr, nullptr, \
}
#define DEF_R_ARG(i, n) \
{ \
"r" #i, "arg" #n, 4, OFFSET(r) + i * 4, eEncodingUint, eFormatHex, \
- {ehframe_r##i, dwarf_r##i, LLDB_REGNUM_GENERIC_ARG1 + i, INV, reg_r##i}, \
- nullptr, nullptr, nullptr, 0 \
+ {ehframe_r##i, dwarf_r##i, LLDB_REGNUM_GENERIC_ARG1 + i, INV, \
+ reg_r##i}, \
+ nullptr, nullptr, \
}
#define DEF_D(i) \
{ \
"d" #i, nullptr, 8, OFFSET(d) + i * 8, eEncodingVector, \
eFormatVectorOfUInt8, {dwarf_d##i, dwarf_d##i, INV, INV, reg_d##i}, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEF_S(i) \
{ \
"s" #i, nullptr, 4, OFFSET(s) + i * 4, eEncodingIEEE754, eFormatFloat, \
- {dwarf_s##i, dwarf_s##i, INV, INV, reg_s##i}, \
- nullptr, nullptr, nullptr, 0 \
+ {dwarf_s##i, dwarf_s##i, INV, INV, reg_s##i}, nullptr, nullptr, \
}
#define DEF_Q(i) \
{ \
"q" #i, nullptr, 16, OFFSET(q) + i * 16, eEncodingVector, \
eFormatVectorOfUInt8, {dwarf_q##i, dwarf_q##i, INV, INV, reg_q##i}, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
// Zero based LLDB register numbers for this register context
@@ -177,8 +176,7 @@ static RegisterInfo g_reg_info_apple_fp = {
{ehframe_r7, dwarf_r7, LLDB_REGNUM_GENERIC_FP, INV, reg_r7},
nullptr,
nullptr,
- nullptr,
- 0};
+};
static RegisterInfo g_reg_info_fp = {
"fp",
@@ -190,8 +188,7 @@ static RegisterInfo g_reg_info_fp = {
{ehframe_r11, dwarf_r11, LLDB_REGNUM_GENERIC_FP, INV, reg_r11},
nullptr,
nullptr,
- nullptr,
- 0};
+};
// Register info definitions for this register context
static RegisterInfo g_reg_infos[] = {
@@ -217,8 +214,7 @@ static RegisterInfo g_reg_infos[] = {
{ehframe_sp, dwarf_sp, LLDB_REGNUM_GENERIC_SP, INV, reg_sp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lr",
"r14",
4,
@@ -228,8 +224,7 @@ static RegisterInfo g_reg_infos[] = {
{ehframe_lr, dwarf_lr, LLDB_REGNUM_GENERIC_RA, INV, reg_lr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
"r15",
4,
@@ -239,8 +234,7 @@ static RegisterInfo g_reg_infos[] = {
{ehframe_pc, dwarf_pc, LLDB_REGNUM_GENERIC_PC, INV, reg_pc},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"cpsr",
"psr",
4,
@@ -250,8 +244,7 @@ static RegisterInfo g_reg_infos[] = {
{ehframe_cpsr, dwarf_cpsr, LLDB_REGNUM_GENERIC_FLAGS, INV, reg_cpsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fpscr",
nullptr,
8,
@@ -261,8 +254,7 @@ static RegisterInfo g_reg_infos[] = {
{INV, INV, INV, INV, reg_fpscr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
DEF_D(0),
DEF_D(1),
DEF_D(2),
diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
index e2b7c0e362a7..e606ec9c3b64 100644
--- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
+++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
@@ -29,48 +29,48 @@ using namespace minidump;
{ \
"x" #i, nullptr, 8, OFFSET(x) + i * 8, eEncodingUint, eFormatHex, \
{arm64_dwarf::x##i, arm64_dwarf::x##i, INV, INV, reg_x##i}, \
- nullptr, nullptr, nullptr, 0 \
+ nullptr, nullptr, \
}
#define DEF_W(i) \
{ \
"w" #i, nullptr, 4, OFFSET(x) + i * 8, eEncodingUint, eFormatHex, \
- {INV, INV, INV, INV, reg_w##i}, nullptr, nullptr, nullptr, 0 \
+ {INV, INV, INV, INV, reg_w##i}, nullptr, nullptr, \
}
#define DEF_X_ARG(i, n) \
{ \
"x" #i, "arg" #n, 8, OFFSET(x) + i * 8, eEncodingUint, eFormatHex, \
{arm64_dwarf::x##i, arm64_dwarf::x##i, LLDB_REGNUM_GENERIC_ARG1 + i, \
- INV, reg_x##i}, nullptr, nullptr, nullptr, 0 \
+ INV, reg_x##i}, nullptr, nullptr, \
}
#define DEF_V(i) \
{ \
"v" #i, nullptr, 16, OFFSET(v) + i * 16, eEncodingVector, \
eFormatVectorOfUInt8, {arm64_dwarf::v##i, arm64_dwarf::v##i, INV, INV, \
- reg_v##i}, nullptr, nullptr, nullptr, 0 \
+ reg_v##i}, nullptr, nullptr, \
}
#define DEF_D(i) \
{ \
"d" #i, nullptr, 8, OFFSET(v) + i * 16, eEncodingVector, \
eFormatVectorOfUInt8, {INV, INV, INV, INV, reg_d##i}, nullptr, \
- nullptr, nullptr, 0 \
+ nullptr, \
}
#define DEF_S(i) \
{ \
"s" #i, nullptr, 4, OFFSET(v) + i * 16, eEncodingVector, \
eFormatVectorOfUInt8, {INV, INV, INV, INV, reg_s##i}, nullptr, \
- nullptr, nullptr, 0 \
+ nullptr, \
}
#define DEF_H(i) \
{ \
"h" #i, nullptr, 2, OFFSET(v) + i * 16, eEncodingVector, \
eFormatVectorOfUInt8, {INV, INV, INV, INV, reg_h##i}, nullptr, \
- nullptr, nullptr, 0 \
+ nullptr, \
}
// Zero based LLDB register numbers for this register context
@@ -316,8 +316,7 @@ static RegisterInfo g_reg_infos[] = {
{arm64_dwarf::x29, arm64_dwarf::x29, LLDB_REGNUM_GENERIC_FP, INV, reg_fp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"lr",
"x30",
8,
@@ -327,8 +326,7 @@ static RegisterInfo g_reg_infos[] = {
{arm64_dwarf::x30, arm64_dwarf::x30, LLDB_REGNUM_GENERIC_RA, INV, reg_lr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"sp",
"x31",
8,
@@ -338,8 +336,7 @@ static RegisterInfo g_reg_infos[] = {
{arm64_dwarf::x31, arm64_dwarf::x31, LLDB_REGNUM_GENERIC_SP, INV, reg_sp},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"pc",
nullptr,
8,
@@ -349,8 +346,7 @@ static RegisterInfo g_reg_infos[] = {
{arm64_dwarf::pc, arm64_dwarf::pc, LLDB_REGNUM_GENERIC_PC, INV, reg_pc},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// w0 - w31
DEF_W(0),
DEF_W(1),
@@ -393,8 +389,7 @@ static RegisterInfo g_reg_infos[] = {
{INV, arm64_dwarf::cpsr, LLDB_REGNUM_GENERIC_FLAGS, INV, reg_cpsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fpsr",
nullptr,
4,
@@ -404,8 +399,7 @@ static RegisterInfo g_reg_infos[] = {
{INV, INV, INV, INV, reg_fpsr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
{"fpcr",
nullptr,
4,
@@ -415,8 +409,7 @@ static RegisterInfo g_reg_infos[] = {
{INV, INV, INV, INV, reg_fpcr},
nullptr,
nullptr,
- nullptr,
- 0},
+ },
// v0 - v31
DEF_V(0),
DEF_V(1),
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index 09e9375b6f66..15d3d43d9993 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -20,9 +20,6 @@
#include "lldb/Interpreter/ScriptInterpreter.h"
#include "lldb/Target/MemoryRegionInfo.h"
#include "lldb/Target/RegisterContext.h"
-
-#include "lldb/Utility/Log.h"
-#include "lldb/Utility/Logging.h"
#include "lldb/Utility/State.h"
#include <mutex>
@@ -32,12 +29,7 @@ LLDB_PLUGIN_DEFINE(ScriptedProcess)
using namespace lldb;
using namespace lldb_private;
-ConstString ScriptedProcess::GetPluginNameStatic() {
- static ConstString g_name("ScriptedProcess");
- return g_name;
-}
-
-const char *ScriptedProcess::GetPluginDescriptionStatic() {
+llvm::StringRef ScriptedProcess::GetPluginDescriptionStatic() {
return "Scripted Process plug-in.";
}
@@ -109,9 +101,11 @@ ScriptedProcess::ScriptedProcess(
return;
}
- StructuredData::ObjectSP object_sp = GetInterface().CreatePluginObject(
- m_scripted_process_info.GetClassName().c_str(), target_sp,
- m_scripted_process_info.GetDictionarySP());
+ ExecutionContext exe_ctx(target_sp, /*get_process=*/false);
+
+ StructuredData::GenericSP object_sp = GetInterface().CreatePluginObject(
+ m_scripted_process_info.GetClassName().c_str(), exe_ctx,
+ m_scripted_process_info.GetArgsSP());
if (!object_sp || !object_sp->IsValid()) {
error.SetErrorStringWithFormat("ScriptedProcess::%s () - ERROR: %s",
@@ -145,10 +139,6 @@ void ScriptedProcess::Terminate() {
PluginManager::UnregisterPlugin(ScriptedProcess::CreateInstance);
}
-ConstString ScriptedProcess::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t ScriptedProcess::GetPluginVersion() { return 1; }
-
Status ScriptedProcess::DoLoadCore() {
ProcessLaunchInfo launch_info = GetTarget().GetProcessLaunchInfo();
@@ -234,26 +224,22 @@ bool ScriptedProcess::IsAlive() {
size_t ScriptedProcess::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
Status &error) {
-
- auto error_with_message = [&error](llvm::StringRef message) {
- error.SetErrorString(message);
- return 0;
- };
-
if (!m_interpreter)
- return error_with_message("No interpreter.");
+ return GetInterface().ErrorWithMessage<size_t>(LLVM_PRETTY_FUNCTION,
+ "No interpreter.", error);
lldb::DataExtractorSP data_extractor_sp =
GetInterface().ReadMemoryAtAddress(addr, size, error);
- if (!data_extractor_sp || error.Fail())
+ if (!data_extractor_sp || !data_extractor_sp->GetByteSize() || error.Fail())
return 0;
offset_t bytes_copied = data_extractor_sp->CopyByteOrderedData(
0, data_extractor_sp->GetByteSize(), buf, size, GetByteOrder());
if (!bytes_copied || bytes_copied == LLDB_INVALID_OFFSET)
- return error_with_message("Failed to copy read memory to buffer.");
+ return GetInterface().ErrorWithMessage<size_t>(
+ LLVM_PRETTY_FUNCTION, "Failed to copy read memory to buffer.", error);
return size;
}
@@ -262,26 +248,36 @@ ArchSpec ScriptedProcess::GetArchitecture() {
return GetTarget().GetArchitecture();
}
-Status ScriptedProcess::GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region) {
- // TODO: Implement
- return Status();
+Status ScriptedProcess::DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region) {
+ CheckInterpreterAndScriptObject();
+
+ Status error;
+ if (auto region_or_err =
+ GetInterface().GetMemoryRegionContainingAddress(load_addr, error))
+ region = *region_or_err;
+
+ return error;
}
Status ScriptedProcess::GetMemoryRegions(MemoryRegionInfos &region_list) {
CheckInterpreterAndScriptObject();
+ Status error;
lldb::addr_t address = 0;
- lldb::MemoryRegionInfoSP mem_region_sp = nullptr;
- while ((mem_region_sp =
- GetInterface().GetMemoryRegionContainingAddress(address))) {
- auto range = mem_region_sp->GetRange();
+ while (auto region_or_err =
+ GetInterface().GetMemoryRegionContainingAddress(address, error)) {
+ if (error.Fail())
+ break;
+
+ MemoryRegionInfo &mem_region = *region_or_err;
+ auto range = mem_region.GetRange();
address += range.GetRangeBase() + range.GetByteSize();
- region_list.push_back(*mem_region_sp.get());
+ region_list.push_back(mem_region);
}
- return {};
+ return error;
}
void ScriptedProcess::Clear() { Process::m_thread_list.Clear(); }
@@ -292,9 +288,40 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
// This is supposed to get the current set of threads, if any of them are in
// old_thread_list then they get copied to new_thread_list, and then any
// actually new threads will get added to new_thread_list.
+
+ CheckInterpreterAndScriptObject();
+ m_thread_plans.ClearThreadCache();
+
+ Status error;
+ ScriptLanguage language = m_interpreter->GetLanguage();
+
+ if (language != eScriptLanguagePython)
+ return GetInterface().ErrorWithMessage<bool>(
+ LLVM_PRETTY_FUNCTION,
+ llvm::Twine("ScriptInterpreter language (" +
+ llvm::Twine(m_interpreter->LanguageToString(language)) +
+ llvm::Twine(") not supported."))
+ .str(),
+ error);
+
+ lldb::ThreadSP thread_sp;
+ thread_sp = std::make_shared<ScriptedThread>(*this, error);
+
+ if (!thread_sp || error.Fail())
+ return GetInterface().ErrorWithMessage<bool>(LLVM_PRETTY_FUNCTION,
+ error.AsCString(), error);
+
+ new_thread_list.AddThread(thread_sp);
+
return new_thread_list.GetSize(false) > 0;
}
+void ScriptedProcess::RefreshStateAfterStop() {
+ // Let all threads recover from stopping and do any clean up based on the
+ // previous thread state (if any).
+ m_thread_list.RefreshStateAfterStop();
+}
+
bool ScriptedProcess::GetProcessInfo(ProcessInstanceInfo &info) {
info.Clear();
info.SetProcessID(GetID());
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h
index 98c1a1ca4fe9..c8355f35548a 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h
@@ -13,6 +13,8 @@
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Status.h"
+#include "ScriptedThread.h"
+
#include <mutex>
namespace lldb_private {
@@ -23,17 +25,15 @@ protected:
public:
ScriptedProcessInfo(const ProcessLaunchInfo &launch_info) {
m_class_name = launch_info.GetScriptedProcessClassName();
- m_dictionary_sp = launch_info.GetScriptedProcessDictionarySP();
+ m_args_sp = launch_info.GetScriptedProcessDictionarySP();
}
std::string GetClassName() const { return m_class_name; }
- StructuredData::DictionarySP GetDictionarySP() const {
- return m_dictionary_sp;
- }
+ StructuredData::DictionarySP GetArgsSP() const { return m_args_sp; }
private:
std::string m_class_name;
- StructuredData::DictionarySP m_dictionary_sp;
+ StructuredData::DictionarySP m_args_sp;
};
public:
@@ -46,9 +46,9 @@ public:
static void Terminate();
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "ScriptedProcess"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
ScriptedProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp,
const ScriptedProcess::ScriptedProcessInfo &launch_info,
@@ -61,9 +61,7 @@ public:
DynamicLoader *GetDynamicLoader() override { return nullptr; }
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
SystemRuntime *GetSystemRuntime() override { return nullptr; }
@@ -77,7 +75,7 @@ public:
Status DoDestroy() override;
- void RefreshStateAfterStop() override{};
+ void RefreshStateAfterStop() override;
bool IsAlive() override;
@@ -86,9 +84,6 @@ public:
ArchSpec GetArchitecture();
- Status GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) override;
-
Status
GetMemoryRegions(lldb_private::MemoryRegionInfos &region_list) override;
@@ -102,7 +97,12 @@ protected:
bool DoUpdateThreadList(ThreadList &old_thread_list,
ThreadList &new_thread_list) override;
+ Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) override;
+
private:
+ friend class ScriptedThread;
+
void CheckInterpreterAndScriptObject() const;
ScriptedProcessInterface &GetInterface() const;
static bool IsScriptLanguageSupported(lldb::ScriptLanguage language);
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
new file mode 100644
index 000000000000..1adbd4e7799d
--- /dev/null
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -0,0 +1,211 @@
+//===-- ScriptedThread.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScriptedThread.h"
+
+#include "Plugins/Process/Utility/RegisterContextThreadMemory.h"
+#include "lldb/Target/OperatingSystem.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/RegisterContext.h"
+#include "lldb/Target/StopInfo.h"
+#include "lldb/Target/Unwind.h"
+#include "lldb/Utility/DataBufferHeap.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
+
+#include <memory>
+
+using namespace lldb;
+using namespace lldb_private;
+
+void ScriptedThread::CheckInterpreterAndScriptObject() const {
+ lldbassert(m_script_object_sp && "Invalid Script Object.");
+ lldbassert(GetInterface() && "Invalid Scripted Thread Interface.");
+}
+
+ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
+ : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process) {
+ if (!process.IsValid()) {
+ error.SetErrorString("Invalid scripted process");
+ return;
+ }
+
+ process.CheckInterpreterAndScriptObject();
+
+ auto scripted_thread_interface = GetInterface();
+ if (!scripted_thread_interface) {
+ error.SetErrorString("Failed to get scripted thread interface.");
+ return;
+ }
+
+ llvm::Optional<std::string> class_name =
+ process.GetInterface().GetScriptedThreadPluginName();
+ if (!class_name || class_name->empty()) {
+ error.SetErrorString("Failed to get scripted thread class name.");
+ return;
+ }
+
+ ExecutionContext exe_ctx(process);
+
+ StructuredData::GenericSP object_sp =
+ scripted_thread_interface->CreatePluginObject(
+ class_name->c_str(), exe_ctx,
+ process.m_scripted_process_info.GetArgsSP());
+ if (!object_sp || !object_sp->IsValid()) {
+ error.SetErrorString("Failed to create valid script object");
+ return;
+ }
+
+ m_script_object_sp = object_sp;
+
+ SetID(scripted_thread_interface->GetThreadID());
+}
+
+ScriptedThread::~ScriptedThread() { DestroyThread(); }
+
+const char *ScriptedThread::GetName() {
+ CheckInterpreterAndScriptObject();
+ llvm::Optional<std::string> thread_name = GetInterface()->GetName();
+ if (!thread_name)
+ return nullptr;
+ return ConstString(thread_name->c_str()).AsCString();
+}
+
+const char *ScriptedThread::GetQueueName() {
+ CheckInterpreterAndScriptObject();
+ llvm::Optional<std::string> queue_name = GetInterface()->GetQueue();
+ if (!queue_name)
+ return nullptr;
+ return ConstString(queue_name->c_str()).AsCString();
+}
+
+void ScriptedThread::WillResume(StateType resume_state) {}
+
+void ScriptedThread::ClearStackFrames() { Thread::ClearStackFrames(); }
+
+RegisterContextSP ScriptedThread::GetRegisterContext() {
+ if (!m_reg_context_sp)
+ m_reg_context_sp = CreateRegisterContextForFrame(nullptr);
+ return m_reg_context_sp;
+}
+
+RegisterContextSP
+ScriptedThread::CreateRegisterContextForFrame(StackFrame *frame) {
+ const uint32_t concrete_frame_idx =
+ frame ? frame->GetConcreteFrameIndex() : 0;
+
+ if (concrete_frame_idx)
+ return GetUnwinder().CreateRegisterContextForFrame(frame);
+
+ lldb::RegisterContextSP reg_ctx_sp;
+ Status error;
+
+ llvm::Optional<std::string> reg_data = GetInterface()->GetRegisterContext();
+ if (!reg_data)
+ return GetInterface()->ErrorWithMessage<lldb::RegisterContextSP>(
+ LLVM_PRETTY_FUNCTION, "Failed to get scripted thread registers data.",
+ error, LIBLLDB_LOG_THREAD);
+
+ DataBufferSP data_sp(
+ std::make_shared<DataBufferHeap>(reg_data->c_str(), reg_data->size()));
+
+ if (!data_sp->GetByteSize())
+ return GetInterface()->ErrorWithMessage<lldb::RegisterContextSP>(
+ LLVM_PRETTY_FUNCTION, "Failed to copy raw registers data.", error,
+ LIBLLDB_LOG_THREAD);
+
+ std::shared_ptr<RegisterContextMemory> reg_ctx_memory =
+ std::make_shared<RegisterContextMemory>(
+ *this, 0, *GetDynamicRegisterInfo(), LLDB_INVALID_ADDRESS);
+ if (!reg_ctx_memory)
+ return GetInterface()->ErrorWithMessage<lldb::RegisterContextSP>(
+ LLVM_PRETTY_FUNCTION, "Failed to create a register context.", error,
+ LIBLLDB_LOG_THREAD);
+
+ reg_ctx_memory->SetAllRegisterData(data_sp);
+ m_reg_context_sp = reg_ctx_memory;
+
+ return m_reg_context_sp;
+}
+
+bool ScriptedThread::CalculateStopInfo() {
+ StructuredData::DictionarySP dict_sp = GetInterface()->GetStopReason();
+
+ Status error;
+ lldb::StopInfoSP stop_info_sp;
+ lldb::StopReason stop_reason_type;
+
+ if (!dict_sp->GetValueForKeyAsInteger("type", stop_reason_type))
+ return GetInterface()->ErrorWithMessage<bool>(
+ LLVM_PRETTY_FUNCTION,
+ "Couldn't find value for key 'type' in stop reason dictionary.", error,
+ LIBLLDB_LOG_THREAD);
+
+ StructuredData::Dictionary *data_dict;
+ if (!dict_sp->GetValueForKeyAsDictionary("data", data_dict))
+ return GetInterface()->ErrorWithMessage<bool>(
+ LLVM_PRETTY_FUNCTION,
+ "Couldn't find value for key 'type' in stop reason dictionary.", error,
+ LIBLLDB_LOG_THREAD);
+
+ switch (stop_reason_type) {
+ case lldb::eStopReasonNone:
+ break;
+ case lldb::eStopReasonBreakpoint: {
+ lldb::break_id_t break_id;
+ data_dict->GetValueForKeyAsInteger("break_id", break_id,
+ LLDB_INVALID_BREAK_ID);
+ stop_info_sp =
+ StopInfo::CreateStopReasonWithBreakpointSiteID(*this, break_id);
+ } break;
+ case lldb::eStopReasonSignal: {
+ int signal;
+ llvm::StringRef description;
+ data_dict->GetValueForKeyAsInteger("signal", signal,
+ LLDB_INVALID_SIGNAL_NUMBER);
+ data_dict->GetValueForKeyAsString("desc", description);
+ stop_info_sp =
+ StopInfo::CreateStopReasonWithSignal(*this, signal, description.data());
+ } break;
+ default:
+ return GetInterface()->ErrorWithMessage<bool>(
+ LLVM_PRETTY_FUNCTION,
+ llvm::Twine("Unsupported stop reason type (" +
+ llvm::Twine(stop_reason_type) + llvm::Twine(")."))
+ .str(),
+ error, LIBLLDB_LOG_THREAD);
+ }
+
+ SetStopInfo(stop_info_sp);
+ return true;
+}
+
+void ScriptedThread::RefreshStateAfterStop() {
+ GetRegisterContext()->InvalidateIfNeeded(/*force=*/false);
+}
+
+lldb::ScriptedThreadInterfaceSP ScriptedThread::GetInterface() const {
+ return m_scripted_process.GetInterface().GetScriptedThreadInterface();
+}
+
+std::shared_ptr<DynamicRegisterInfo> ScriptedThread::GetDynamicRegisterInfo() {
+ CheckInterpreterAndScriptObject();
+
+ if (!m_register_info_sp) {
+ StructuredData::DictionarySP reg_info = GetInterface()->GetRegisterInfo();
+ if (!reg_info)
+ return nullptr;
+
+ m_register_info_sp = std::make_shared<DynamicRegisterInfo>(
+ *reg_info, m_scripted_process.GetTarget().GetArchitecture());
+ assert(m_register_info_sp->GetNumRegisters() > 0);
+ assert(m_register_info_sp->GetNumRegisterSets() > 0);
+ }
+
+ return m_register_info_sp;
+}
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
new file mode 100644
index 000000000000..cdcd543702a4
--- /dev/null
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -0,0 +1,68 @@
+//===-- ScriptedThread.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_SCRIPTED_THREAD_H
+#define LLDB_SOURCE_PLUGINS_SCRIPTED_THREAD_H
+
+#include <string>
+
+#include "ScriptedProcess.h"
+
+#include "Plugins/Process/Utility/RegisterContextMemory.h"
+#include "lldb/Interpreter/ScriptInterpreter.h"
+#include "lldb/Target//DynamicRegisterInfo.h"
+#include "lldb/Target/Thread.h"
+
+namespace lldb_private {
+class ScriptedProcess;
+}
+
+namespace lldb_private {
+
+class ScriptedThread : public lldb_private::Thread {
+public:
+ ScriptedThread(ScriptedProcess &process, Status &error);
+
+ ~ScriptedThread() override;
+
+ lldb::RegisterContextSP GetRegisterContext() override;
+
+ lldb::RegisterContextSP
+ CreateRegisterContextForFrame(lldb_private::StackFrame *frame) override;
+
+ bool CalculateStopInfo() override;
+
+ const char *GetInfo() override { return nullptr; }
+
+ const char *GetName() override;
+
+ const char *GetQueueName() override;
+
+ void WillResume(lldb::StateType resume_state) override;
+
+ void RefreshStateAfterStop() override;
+
+ void ClearStackFrames() override;
+
+private:
+ void CheckInterpreterAndScriptObject() const;
+ lldb::ScriptedThreadInterfaceSP GetInterface() const;
+
+ ScriptedThread(const ScriptedThread &) = delete;
+ const ScriptedThread &operator=(const ScriptedThread &) = delete;
+
+ std::shared_ptr<DynamicRegisterInfo> GetDynamicRegisterInfo();
+
+ const ScriptedProcess &m_scripted_process;
+ std::shared_ptr<DynamicRegisterInfo> m_register_info_sp = nullptr;
+ lldb_private::StructuredData::ObjectSP m_script_object_sp = nullptr;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_SCRIPTED_THREAD_H
diff --git a/lldb/source/Plugins/REPL/Clang/ClangREPL.cpp b/lldb/source/Plugins/REPL/Clang/ClangREPL.cpp
new file mode 100644
index 000000000000..5060dbb7ddba
--- /dev/null
+++ b/lldb/source/Plugins/REPL/Clang/ClangREPL.cpp
@@ -0,0 +1,102 @@
+//===-- ClangREPL.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangREPL.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Expression/ExpressionVariable.h"
+
+using namespace lldb_private;
+
+LLDB_PLUGIN_DEFINE(ClangREPL)
+
+ClangREPL::ClangREPL(lldb::LanguageType language, Target &target)
+ : REPL(eKindClang, target), m_language(language),
+ m_implicit_expr_result_regex("\\$[0-9]+") {}
+
+ClangREPL::~ClangREPL() {}
+
+void ClangREPL::Initialize() {
+ LanguageSet languages;
+ // FIXME: There isn't a way to ask CPlusPlusLanguage and ObjCLanguage for
+ // a list of languages they support.
+ languages.Insert(lldb::LanguageType::eLanguageTypeC);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC89);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC99);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC11);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC_plus_plus);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC_plus_plus_03);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC_plus_plus_11);
+ languages.Insert(lldb::LanguageType::eLanguageTypeC_plus_plus_14);
+ languages.Insert(lldb::LanguageType::eLanguageTypeObjC);
+ languages.Insert(lldb::LanguageType::eLanguageTypeObjC_plus_plus);
+ PluginManager::RegisterPlugin(GetPluginNameStatic(), "C language REPL",
+ &CreateInstance, languages);
+}
+
+void ClangREPL::Terminate() {
+ PluginManager::UnregisterPlugin(&CreateInstance);
+}
+
+lldb::REPLSP ClangREPL::CreateInstance(Status &error,
+ lldb::LanguageType language,
+ Debugger *debugger, Target *target,
+ const char *repl_options) {
+ // Creating a dummy target if only a debugger is given isn't implemented yet.
+ if (!target) {
+ error.SetErrorString("must have a target to create a REPL");
+ return nullptr;
+ }
+ lldb::REPLSP result = std::make_shared<ClangREPL>(language, *target);
+ target->SetREPL(language, result);
+ error = Status();
+ return result;
+}
+
+Status ClangREPL::DoInitialization() { return Status(); }
+
+ConstString ClangREPL::GetSourceFileBasename() {
+ return ConstString("repl.c");
+}
+
+const char *ClangREPL::GetAutoIndentCharacters() { return " "; }
+
+bool ClangREPL::SourceIsComplete(const std::string &source) {
+ // FIXME: There isn't a good way to know if the input source is complete or
+ // not, so just say that every single REPL line is ready to be parsed.
+ return !source.empty();
+}
+
+lldb::offset_t ClangREPL::GetDesiredIndentation(const StringList &lines,
+ int cursor_position,
+ int tab_size) {
+ // FIXME: Not implemented.
+ return LLDB_INVALID_OFFSET;
+}
+
+lldb::LanguageType ClangREPL::GetLanguage() { return m_language; }
+
+bool ClangREPL::PrintOneVariable(Debugger &debugger,
+ lldb::StreamFileSP &output_sp,
+ lldb::ValueObjectSP &valobj_sp,
+ ExpressionVariable *var) {
+ // If a ExpressionVariable was passed, check first if that variable is just
+ // an automatically created expression result. These variables are already
+ // printed by the REPL so this is done to prevent printing the variable twice.
+ if (var) {
+ if (m_implicit_expr_result_regex.Execute(var->GetName().GetStringRef()))
+ return true;
+ }
+ valobj_sp->Dump(*output_sp);
+ return true;
+}
+
+void ClangREPL::CompleteCode(const std::string &current_code,
+ CompletionRequest &request) {
+ // Not implemented.
+}
diff --git a/lldb/source/Plugins/REPL/Clang/ClangREPL.h b/lldb/source/Plugins/REPL/Clang/ClangREPL.h
new file mode 100644
index 000000000000..07b7f73b1faf
--- /dev/null
+++ b/lldb/source/Plugins/REPL/Clang/ClangREPL.h
@@ -0,0 +1,63 @@
+//===-- ClangREPL.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_REPL_CLANG_CLANGREPL_H
+#define LLDB_SOURCE_PLUGINS_REPL_CLANG_CLANGREPL_H
+
+#include "lldb/Expression/REPL.h"
+
+namespace lldb_private {
+/// Implements a Clang-based REPL for C languages on top of LLDB's REPL
+/// framework.
+class ClangREPL : public REPL {
+public:
+ ClangREPL(lldb::LanguageType language, Target &target);
+
+ ~ClangREPL() override;
+
+ static void Initialize();
+
+ static void Terminate();
+
+ static lldb::REPLSP CreateInstance(Status &error, lldb::LanguageType language,
+ Debugger *debugger, Target *target,
+ const char *repl_options);
+
+ static llvm::StringRef GetPluginNameStatic() { return "ClangREPL"; }
+
+protected:
+ Status DoInitialization() override;
+
+ ConstString GetSourceFileBasename() override;
+
+ const char *GetAutoIndentCharacters() override;
+
+ bool SourceIsComplete(const std::string &source) override;
+
+ lldb::offset_t GetDesiredIndentation(const StringList &lines,
+ int cursor_position,
+ int tab_size) override;
+
+ lldb::LanguageType GetLanguage() override;
+
+ bool PrintOneVariable(Debugger &debugger, lldb::StreamFileSP &output_sp,
+ lldb::ValueObjectSP &valobj_sp,
+ ExpressionVariable *var = nullptr) override;
+
+ void CompleteCode(const std::string &current_code,
+ CompletionRequest &request) override;
+
+private:
+ /// The specific C language of this REPL.
+ lldb::LanguageType m_language;
+ /// A regex matching the implicitly created LLDB result variables.
+ lldb_private::RegularExpression m_implicit_expr_result_regex;
+};
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_REPL_CLANG_CLANGREPL_H
diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp
index ef46401c8b46..c677abfaa5f2 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp
@@ -148,6 +148,12 @@ ScriptInterpreterLua::ScriptInterpreterLua(Debugger &debugger)
ScriptInterpreterLua::~ScriptInterpreterLua() = default;
+StructuredData::DictionarySP ScriptInterpreterLua::GetInterpreterInfo() {
+ auto info = std::make_shared<StructuredData::Dictionary>();
+ info->AddStringItem("language", "lua");
+ return info;
+}
+
bool ScriptInterpreterLua::ExecuteOneLine(llvm::StringRef command,
CommandReturnObject *result,
const ExecuteScriptOptions &options) {
@@ -387,19 +393,8 @@ ScriptInterpreterLua::CreateInstance(Debugger &debugger) {
return std::make_shared<ScriptInterpreterLua>(debugger);
}
-lldb_private::ConstString ScriptInterpreterLua::GetPluginNameStatic() {
- static ConstString g_name("script-lua");
- return g_name;
-}
-
-const char *ScriptInterpreterLua::GetPluginDescriptionStatic() {
+llvm::StringRef ScriptInterpreterLua::GetPluginDescriptionStatic() {
return "Lua script interpreter";
}
-lldb_private::ConstString ScriptInterpreterLua::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ScriptInterpreterLua::GetPluginVersion() { return 1; }
-
Lua &ScriptInterpreterLua::GetLua() { return *m_lua; }
diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h
index 808000b833ec..b601779ff301 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h
@@ -49,6 +49,8 @@ public:
StructuredData::ObjectSP *module_sp = nullptr,
FileSpec extra_search_dir = {}) override;
+ StructuredData::DictionarySP GetInterpreterInfo() override;
+
// Static Functions
static void Initialize();
@@ -56,9 +58,9 @@ public:
static lldb::ScriptInterpreterSP CreateInstance(Debugger &debugger);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "script-lua"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static bool BreakpointCallbackFunction(void *baton,
StoppointCallbackContext *context,
@@ -70,9 +72,7 @@ public:
lldb::user_id_t watch_id);
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
Lua &GetLua();
diff --git a/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp b/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp
index f8a385240bd9..bec90b2038e1 100644
--- a/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp
@@ -57,17 +57,6 @@ ScriptInterpreterNone::CreateInstance(Debugger &debugger) {
return std::make_shared<ScriptInterpreterNone>(debugger);
}
-lldb_private::ConstString ScriptInterpreterNone::GetPluginNameStatic() {
- static ConstString g_name("script-none");
- return g_name;
-}
-
-const char *ScriptInterpreterNone::GetPluginDescriptionStatic() {
+llvm::StringRef ScriptInterpreterNone::GetPluginDescriptionStatic() {
return "Null script interpreter";
}
-
-lldb_private::ConstString ScriptInterpreterNone::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ScriptInterpreterNone::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h b/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h
index c438b6315c5d..6d3ff251e362 100644
--- a/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h
+++ b/lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h
@@ -32,14 +32,12 @@ public:
static lldb::ScriptInterpreterSP CreateInstance(Debugger &debugger);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "script-none"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
};
} // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index f51d9b3a796c..7c71c9329e57 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -998,20 +998,6 @@ bool PythonFile::Check(PyObject *py_obj) {
#endif
}
-namespace {
-class GIL {
-public:
- GIL() {
- m_state = PyGILState_Ensure();
- assert(!PyErr_Occurred());
- }
- ~GIL() { PyGILState_Release(m_state); }
-
-protected:
- PyGILState_STATE m_state;
-};
-} // namespace
-
const char *PythonException::toCString() const {
if (!m_repr_bytes)
return "unknown exception";
@@ -1114,10 +1100,12 @@ GetOptionsForPyObject(const PythonObject &obj) {
auto writable = As<bool>(obj.CallMethod("writable"));
if (!writable)
return writable.takeError();
- if (readable.get())
- options |= File::eOpenOptionRead;
- if (writable.get())
- options |= File::eOpenOptionWrite;
+ if (readable.get() && writable.get())
+ options |= File::eOpenOptionReadWrite;
+ else if (writable.get())
+ options |= File::eOpenOptionWriteOnly;
+ else if (readable.get())
+ options |= File::eOpenOptionReadOnly;
return options;
#else
PythonString py_mode = obj.GetAttributeValue("mode").AsType<PythonString>();
@@ -1413,7 +1401,10 @@ llvm::Expected<FileSP> PythonFile::ConvertToFile(bool borrowed) {
if (!options)
return options.takeError();
- if (options.get() & File::eOpenOptionWrite) {
+ File::OpenOptions rw =
+ options.get() & (File::eOpenOptionReadOnly | File::eOpenOptionWriteOnly |
+ File::eOpenOptionReadWrite);
+ if (rw == File::eOpenOptionWriteOnly || rw == File::eOpenOptionReadWrite) {
// LLDB and python will not share I/O buffers. We should probably
// flush the python buffers now.
auto r = CallMethod("flush");
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
index 4577253227cd..56bc55d239d1 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
@@ -71,6 +71,18 @@ class PythonDictionary;
class PythonInteger;
class PythonException;
+class GIL {
+public:
+ GIL() {
+ m_state = PyGILState_Ensure();
+ assert(!PyErr_Occurred());
+ }
+ ~GIL() { PyGILState_Release(m_state); }
+
+protected:
+ PyGILState_STATE m_state;
+};
+
class StructuredPythonObject : public StructuredData::Generic {
public:
StructuredPythonObject() : StructuredData::Generic() {}
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
index 1ef792bcf303..798d947a0a7d 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
@@ -46,9 +46,15 @@ extern "C" void *LLDBSwigPythonCreateScriptedProcess(
const lldb::TargetSP &target_sp, StructuredDataImpl *args_impl,
std::string &error_string);
+extern "C" void *LLDBSwigPythonCreateScriptedThread(
+ const char *python_class_name, const char *session_dictionary_name,
+ const lldb::ProcessSP &process_sp, StructuredDataImpl *args_impl,
+ std::string &error_string);
+
extern "C" void *LLDBSWIGPython_CastPyObjectToSBData(void *data);
extern "C" void *LLDBSWIGPython_CastPyObjectToSBError(void *data);
extern "C" void *LLDBSWIGPython_CastPyObjectToSBValue(void *data);
+extern "C" void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(void *data);
} // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 7ad63722c31c..c1f4c2d3b4d3 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -355,7 +355,6 @@ private:
PyEval_InitThreads();
}
- TerminalState m_stdin_tty_state;
PyGILState_STATE m_gil_state = PyGILState_UNLOCKED;
bool m_was_already_initialized = false;
};
@@ -411,6 +410,36 @@ FileSpec ScriptInterpreterPython::GetPythonDir() {
return g_spec;
}
+static const char GetInterpreterInfoScript[] = R"(
+import os
+import sys
+
+def main(lldb_python_dir, python_exe_relative_path):
+ info = {
+ "lldb-pythonpath": lldb_python_dir,
+ "language": "python",
+ "prefix": sys.prefix,
+ "executable": os.path.join(sys.prefix, python_exe_relative_path)
+ }
+ return info
+)";
+
+static const char python_exe_relative_path[] = LLDB_PYTHON_EXE_RELATIVE_PATH;
+
+StructuredData::DictionarySP ScriptInterpreterPython::GetInterpreterInfo() {
+ GIL gil;
+ FileSpec python_dir_spec = GetPythonDir();
+ if (!python_dir_spec)
+ return nullptr;
+ PythonScript get_info(GetInterpreterInfoScript);
+ auto info_json = unwrapIgnoringErrors(
+ As<PythonDictionary>(get_info(PythonString(python_dir_spec.GetPath()),
+ PythonString(python_exe_relative_path))));
+ if (!info_json)
+ return nullptr;
+ return info_json.CreateStructuredDictionary();
+}
+
void ScriptInterpreterPython::SharedLibraryDirectoryHelper(
FileSpec &this_file) {
// When we're loaded from python, this_file will point to the file inside the
@@ -437,12 +466,7 @@ void ScriptInterpreterPython::SharedLibraryDirectoryHelper(
#endif
}
-lldb_private::ConstString ScriptInterpreterPython::GetPluginNameStatic() {
- static ConstString g_name("script-python");
- return g_name;
-}
-
-const char *ScriptInterpreterPython::GetPluginDescriptionStatic() {
+llvm::StringRef ScriptInterpreterPython::GetPluginDescriptionStatic() {
return "Embedded Python interpreter";
}
@@ -591,12 +615,6 @@ ScriptInterpreterPythonImpl::~ScriptInterpreterPythonImpl() {
PyGILState_Release(gil_state);
}
-lldb_private::ConstString ScriptInterpreterPythonImpl::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t ScriptInterpreterPythonImpl::GetPluginVersion() { return 1; }
-
void ScriptInterpreterPythonImpl::IOHandlerActivated(IOHandler &io_handler,
bool interactive) {
const char *instructions = nullptr;
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h
index b8b978118218..8cfc24e71283 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h
@@ -13,8 +13,6 @@
#if LLDB_ENABLE_PYTHON
-#include "ScriptedProcessPythonInterface.h"
-
#include "lldb/Breakpoint/BreakpointOptions.h"
#include "lldb/Core/IOHandler.h"
#include "lldb/Core/StructuredDataImpl.h"
@@ -48,10 +46,11 @@ public:
: ScriptInterpreter(debugger, lldb::eScriptLanguagePython),
IOHandlerDelegateMultiline("DONE") {}
+ StructuredData::DictionarySP GetInterpreterInfo() override;
static void Initialize();
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "script-python"; }
+ static llvm::StringRef GetPluginDescriptionStatic();
static FileSpec GetPythonDir();
static void SharedLibraryDirectoryHelper(FileSpec &this_file);
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h
index d1b0b3fda1ef..a3f83b696ed4 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h
@@ -292,9 +292,7 @@ public:
static lldb::ScriptInterpreterSP CreateInstance(Debugger &debugger);
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
class Locker : public ScriptInterpreterLocker {
public:
@@ -432,13 +430,12 @@ public:
int stdin_fd = GetInputFD();
if (stdin_fd >= 0) {
Terminal terminal(stdin_fd);
- TerminalState terminal_state;
- const bool is_a_tty = terminal.IsATerminal();
+ TerminalState terminal_state(terminal);
- if (is_a_tty) {
- terminal_state.Save(stdin_fd, false);
- terminal.SetCanonical(false);
- terminal.SetEcho(true);
+ if (terminal.IsATerminal()) {
+ // FIXME: error handling?
+ llvm::consumeError(terminal.SetCanonical(false));
+ llvm::consumeError(terminal.SetEcho(true));
}
ScriptInterpreterPythonImpl::Locker locker(
@@ -466,9 +463,6 @@ public:
run_string.Printf("run_python_interpreter (%s)",
m_python->GetDictionaryName());
PyRun_SimpleString(run_string.GetData());
-
- if (is_a_tty)
- terminal_state.Restore();
}
}
SetIsDone(true);
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index ce262c930f8b..29680dab5a14 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "lldb/Host/Config.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
#include "lldb/lldb-enumerations.h"
#if LLDB_ENABLE_PYTHON
@@ -17,36 +19,40 @@
#include "SWIGPythonBridge.h"
#include "ScriptInterpreterPythonImpl.h"
#include "ScriptedProcessPythonInterface.h"
+#include "ScriptedThreadPythonInterface.h"
using namespace lldb;
using namespace lldb_private;
using namespace lldb_private::python;
using Locker = ScriptInterpreterPythonImpl::Locker;
+ScriptedProcessPythonInterface::ScriptedProcessPythonInterface(
+ ScriptInterpreterPythonImpl &interpreter)
+ : ScriptedProcessInterface(), ScriptedPythonInterface(interpreter) {}
+
StructuredData::GenericSP ScriptedProcessPythonInterface::CreatePluginObject(
- const llvm::StringRef class_name, lldb::TargetSP target_sp,
+ llvm::StringRef class_name, ExecutionContext &exe_ctx,
StructuredData::DictionarySP args_sp) {
if (class_name.empty())
return {};
- std::string error_string;
+ TargetSP target_sp = exe_ctx.GetTargetSP();
StructuredDataImpl *args_impl = nullptr;
if (args_sp) {
args_impl = new StructuredDataImpl();
args_impl->SetObjectSP(args_sp);
}
+ std::string error_string;
- void *ret_val;
-
- {
+ Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
+ Locker::FreeLock);
- Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
- Locker::FreeLock);
+ void *ret_val = LLDBSwigPythonCreateScriptedProcess(
+ class_name.str().c_str(), m_interpreter.GetDictionaryName(), target_sp,
+ args_impl, error_string);
- ret_val = LLDBSwigPythonCreateScriptedProcess(
- class_name.str().c_str(), m_interpreter.GetDictionaryName(), target_sp,
- args_impl, error_string);
- }
+ if (!ret_val)
+ return {};
m_object_instance_sp =
StructuredData::GenericSP(new StructuredPythonObject(ret_val));
@@ -63,244 +69,101 @@ Status ScriptedProcessPythonInterface::Resume() {
}
bool ScriptedProcessPythonInterface::ShouldStop() {
- llvm::Optional<unsigned long long> should_stop =
- GetGenericInteger("should_stop");
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("is_alive", error);
- if (!should_stop)
- return false;
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
- return static_cast<bool>(*should_stop);
+ return obj->GetBooleanValue();
}
Status ScriptedProcessPythonInterface::Stop() {
return GetStatusFromMethod("stop");
}
-Status ScriptedProcessPythonInterface::GetStatusFromMethod(
- llvm::StringRef method_name) {
- Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
- Locker::FreeLock);
-
- if (!m_object_instance_sp)
- return Status("Python object ill-formed.");
-
- if (!m_object_instance_sp)
- return Status("Cannot convert Python object to StructuredData::Generic.");
- PythonObject implementor(PyRefType::Borrowed,
- (PyObject *)m_object_instance_sp->GetValue());
-
- if (!implementor.IsAllocated())
- return Status("Python implementor not allocated.");
-
- PythonObject pmeth(
- PyRefType::Owned,
- PyObject_GetAttrString(implementor.get(), method_name.str().c_str()));
-
- if (PyErr_Occurred())
- PyErr_Clear();
-
- if (!pmeth.IsAllocated())
- return Status("Python method not allocated.");
-
- if (PyCallable_Check(pmeth.get()) == 0) {
- if (PyErr_Occurred())
- PyErr_Clear();
- return Status("Python method not callable.");
- }
-
- if (PyErr_Occurred())
- PyErr_Clear();
-
- PythonObject py_return(PyRefType::Owned,
- PyObject_CallMethod(implementor.get(),
- method_name.str().c_str(),
- nullptr));
-
- if (PyErr_Occurred()) {
- PyErr_Print();
- PyErr_Clear();
- return Status("Python method could not be called.");
- }
-
- if (PyObject *py_ret_ptr = py_return.get()) {
- lldb::SBError *sb_error =
- (lldb::SBError *)LLDBSWIGPython_CastPyObjectToSBError(py_ret_ptr);
-
- if (!sb_error)
- return Status("Couldn't cast lldb::SBError to lldb::Status.");
-
- Status status = m_interpreter.GetStatusFromSBError(*sb_error);
-
- if (status.Fail())
- return Status("error: %s", status.AsCString());
+llvm::Optional<MemoryRegionInfo>
+ScriptedProcessPythonInterface::GetMemoryRegionContainingAddress(
+ lldb::addr_t address, Status &error) {
+ auto mem_region = Dispatch<llvm::Optional<MemoryRegionInfo>>(
+ "get_memory_region_containing_address", error, address);
- return status;
+ if (error.Fail()) {
+ return ErrorWithMessage<MemoryRegionInfo>(LLVM_PRETTY_FUNCTION,
+ error.AsCString(), error);
}
- return Status("Returned object is null.");
+ return mem_region;
}
-llvm::Optional<unsigned long long>
-ScriptedProcessPythonInterface::GetGenericInteger(llvm::StringRef method_name) {
- Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
- Locker::FreeLock);
-
- if (!m_object_instance_sp)
- return llvm::None;
-
- if (!m_object_instance_sp)
- return llvm::None;
- PythonObject implementor(PyRefType::Borrowed,
- (PyObject *)m_object_instance_sp->GetValue());
-
- if (!implementor.IsAllocated())
- return llvm::None;
-
- PythonObject pmeth(
- PyRefType::Owned,
- PyObject_GetAttrString(implementor.get(), method_name.str().c_str()));
-
- if (PyErr_Occurred())
- PyErr_Clear();
-
- if (!pmeth.IsAllocated())
- return llvm::None;
-
- if (PyCallable_Check(pmeth.get()) == 0) {
- if (PyErr_Occurred())
- PyErr_Clear();
- return llvm::None;
- }
-
- if (PyErr_Occurred())
- PyErr_Clear();
-
- PythonObject py_return(PyRefType::Owned,
- PyObject_CallMethod(implementor.get(),
- method_name.str().c_str(),
- nullptr));
-
- if (PyErr_Occurred()) {
- PyErr_Print();
- PyErr_Clear();
- }
-
- if (!py_return.get())
- return llvm::None;
+StructuredData::DictionarySP
+ScriptedProcessPythonInterface::GetThreadWithID(lldb::tid_t tid) {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_thread_with_id", error, tid);
- llvm::Expected<unsigned long long> size = py_return.AsUnsignedLongLong();
- // FIXME: Handle error.
- if (!size)
- return llvm::None;
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
- return *size;
-}
+ StructuredData::DictionarySP dict{obj->GetAsDictionary()};
-lldb::MemoryRegionInfoSP
-ScriptedProcessPythonInterface::GetMemoryRegionContainingAddress(
- lldb::addr_t address) {
- // TODO: Implement
- return nullptr;
-}
-
-StructuredData::DictionarySP
-ScriptedProcessPythonInterface::GetThreadWithID(lldb::tid_t tid) {
- // TODO: Implement
- return nullptr;
+ return dict;
}
StructuredData::DictionarySP
ScriptedProcessPythonInterface::GetRegistersForThread(lldb::tid_t tid) {
// TODO: Implement
- return nullptr;
+ return {};
}
lldb::DataExtractorSP ScriptedProcessPythonInterface::ReadMemoryAtAddress(
lldb::addr_t address, size_t size, Status &error) {
- Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
- Locker::FreeLock);
-
- auto error_with_message = [&error](llvm::StringRef message) {
- error.SetErrorString(message);
- return nullptr;
- };
-
- static char callee_name[] = "read_memory_at_address";
- std::string param_format = GetPythonValueFormatString(address);
- param_format += GetPythonValueFormatString(size);
-
- if (!m_object_instance_sp)
- return error_with_message("Python object ill-formed.");
-
- if (!m_object_instance_sp)
- return error_with_message("Python method not callable.");
-
- PythonObject implementor(PyRefType::Borrowed,
- (PyObject *)m_object_instance_sp->GetValue());
-
- if (!implementor.IsAllocated())
- return error_with_message("Python implementor not allocated.");
-
- PythonObject pmeth(PyRefType::Owned,
- PyObject_GetAttrString(implementor.get(), callee_name));
-
- if (PyErr_Occurred())
- PyErr_Clear();
-
- if (!pmeth.IsAllocated())
- return error_with_message("Python method not allocated.");
-
- if (PyCallable_Check(pmeth.get()) == 0) {
- if (PyErr_Occurred())
- PyErr_Clear();
- return error_with_message("Python method not callable.");
- }
+ return Dispatch<lldb::DataExtractorSP>("read_memory_at_address", error,
+ address, size);
+}
- if (PyErr_Occurred())
- PyErr_Clear();
+StructuredData::DictionarySP ScriptedProcessPythonInterface::GetLoadedImages() {
+ // TODO: Implement
+ return {};
+}
- PythonObject py_return(PyRefType::Owned,
- PyObject_CallMethod(implementor.get(), callee_name,
- param_format.c_str(), address,
- size));
+lldb::pid_t ScriptedProcessPythonInterface::GetProcessID() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_process_id", error);
- if (PyErr_Occurred()) {
- PyErr_Print();
- PyErr_Clear();
- return error_with_message("Python method could not be called.");
- }
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return LLDB_INVALID_PROCESS_ID;
- if (PyObject *py_ret_ptr = py_return.get()) {
- lldb::SBData *sb_data =
- (lldb::SBData *)LLDBSWIGPython_CastPyObjectToSBData(py_ret_ptr);
+ return obj->GetIntegerValue(LLDB_INVALID_PROCESS_ID);
+}
- if (!sb_data)
- return error_with_message(
- "Couldn't cast lldb::SBData to lldb::DataExtractor.");
+bool ScriptedProcessPythonInterface::IsAlive() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("is_alive", error);
- return m_interpreter.GetDataExtractorFromSBData(*sb_data);
- }
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
- return error_with_message("Returned object is null.");
+ return obj->GetBooleanValue();
}
-StructuredData::DictionarySP ScriptedProcessPythonInterface::GetLoadedImages() {
- // TODO: Implement
- return nullptr;
-}
+llvm::Optional<std::string>
+ScriptedProcessPythonInterface::GetScriptedThreadPluginName() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_scripted_thread_plugin", error);
-lldb::pid_t ScriptedProcessPythonInterface::GetProcessID() {
- llvm::Optional<unsigned long long> pid = GetGenericInteger("get_process_id");
- return (!pid) ? LLDB_INVALID_PROCESS_ID : *pid;
-}
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
-bool ScriptedProcessPythonInterface::IsAlive() {
- llvm::Optional<unsigned long long> is_alive = GetGenericInteger("is_alive");
+ return obj->GetStringValue().str();
+}
- if (!is_alive)
- return false;
+lldb::ScriptedThreadInterfaceSP
+ScriptedProcessPythonInterface::GetScriptedThreadInterface() {
+ if (!m_scripted_thread_interface_sp)
+ m_scripted_thread_interface_sp =
+ std::make_shared<ScriptedThreadPythonInterface>(m_interpreter);
- return static_cast<bool>(*is_alive);
+ return m_scripted_thread_interface_sp;
}
#endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index 30cb5a882af2..421bdd59887c 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -13,17 +13,18 @@
#if LLDB_ENABLE_PYTHON
+#include "ScriptedPythonInterface.h"
#include "lldb/Interpreter/ScriptedProcessInterface.h"
namespace lldb_private {
-class ScriptInterpreterPythonImpl;
-class ScriptedProcessPythonInterface : public ScriptedProcessInterface {
+class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
+ public ScriptedPythonInterface {
public:
- ScriptedProcessPythonInterface(ScriptInterpreterPythonImpl &interpreter)
- : ScriptedProcessInterface(), m_interpreter(interpreter) {}
+ ScriptedProcessPythonInterface(ScriptInterpreterPythonImpl &interpreter);
StructuredData::GenericSP
- CreatePluginObject(const llvm::StringRef class_name, lldb::TargetSP target_sp,
+ CreatePluginObject(const llvm::StringRef class_name,
+ ExecutionContext &exe_ctx,
StructuredData::DictionarySP args_sp) override;
Status Launch() override;
@@ -34,8 +35,9 @@ public:
Status Stop() override;
- lldb::MemoryRegionInfoSP
- GetMemoryRegionContainingAddress(lldb::addr_t address) override;
+ llvm::Optional<MemoryRegionInfo>
+ GetMemoryRegionContainingAddress(lldb::addr_t address,
+ Status &error) override;
StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) override;
@@ -50,15 +52,10 @@ public:
bool IsAlive() override;
-protected:
- llvm::Optional<unsigned long long>
- GetGenericInteger(llvm::StringRef method_name);
- Status GetStatusFromMethod(llvm::StringRef method_name);
+ llvm::Optional<std::string> GetScriptedThreadPluginName() override;
private:
- // The lifetime is managed by the ScriptInterpreter
- ScriptInterpreterPythonImpl &m_interpreter;
- StructuredData::GenericSP m_object_instance_sp;
+ lldb::ScriptedThreadInterfaceSP GetScriptedThreadInterface() override;
};
} // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp
new file mode 100644
index 000000000000..07bf952bf840
--- /dev/null
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp
@@ -0,0 +1,92 @@
+//===-- ScriptedPythonInterface.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/Config.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
+#include "lldb/lldb-enumerations.h"
+
+#if LLDB_ENABLE_PYTHON
+
+// LLDB Python header must be included first
+#include "lldb-python.h"
+
+#include "SWIGPythonBridge.h"
+#include "ScriptInterpreterPythonImpl.h"
+#include "ScriptedPythonInterface.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+ScriptedPythonInterface::ScriptedPythonInterface(
+ ScriptInterpreterPythonImpl &interpreter)
+ : ScriptedInterface(), m_interpreter(interpreter) {}
+
+Status
+ScriptedPythonInterface::GetStatusFromMethod(llvm::StringRef method_name) {
+ Status error;
+ Dispatch<Status>(method_name, error);
+
+ return error;
+}
+
+template <>
+StructuredData::DictionarySP
+ScriptedPythonInterface::ExtractValueFromPythonObject<
+ StructuredData::DictionarySP>(python::PythonObject &p, Status &error) {
+ python::PythonDictionary result_dict(python::PyRefType::Borrowed, p.get());
+ return result_dict.CreateStructuredDictionary();
+}
+
+template <>
+Status ScriptedPythonInterface::ExtractValueFromPythonObject<Status>(
+ python::PythonObject &p, Status &error) {
+ if (lldb::SBError *sb_error = reinterpret_cast<lldb::SBError *>(
+ LLDBSWIGPython_CastPyObjectToSBError(p.get())))
+ error = m_interpreter.GetStatusFromSBError(*sb_error);
+ else
+ error.SetErrorString("Couldn't cast lldb::SBError to lldb::Status.");
+
+ return error;
+}
+
+template <>
+lldb::DataExtractorSP
+ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::DataExtractorSP>(
+ python::PythonObject &p, Status &error) {
+ lldb::SBData *sb_data = reinterpret_cast<lldb::SBData *>(
+ LLDBSWIGPython_CastPyObjectToSBData(p.get()));
+
+ if (!sb_data) {
+ error.SetErrorString(
+ "Couldn't cast lldb::SBData to lldb::DataExtractorSP.");
+ return nullptr;
+ }
+
+ return m_interpreter.GetDataExtractorFromSBData(*sb_data);
+}
+
+template <>
+llvm::Optional<MemoryRegionInfo>
+ScriptedPythonInterface::ExtractValueFromPythonObject<
+ llvm::Optional<MemoryRegionInfo>>(python::PythonObject &p, Status &error) {
+
+ lldb::SBMemoryRegionInfo *sb_mem_reg_info =
+ reinterpret_cast<lldb::SBMemoryRegionInfo *>(
+ LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(p.get()));
+
+ if (!sb_mem_reg_info) {
+ error.SetErrorString(
+ "Couldn't cast lldb::SBMemoryRegionInfo to lldb::MemoryRegionInfoSP.");
+ return {};
+ }
+
+ return m_interpreter.GetOpaqueTypeFromSBMemoryRegionInfo(*sb_mem_reg_info);
+}
+
+#endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h
new file mode 100644
index 000000000000..da112eb72022
--- /dev/null
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h
@@ -0,0 +1,151 @@
+//===-- ScriptedPythonInterface.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPYTHONINTERFACE_H
+#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPYTHONINTERFACE_H
+
+#include "lldb/Host/Config.h"
+
+#if LLDB_ENABLE_PYTHON
+
+#include "lldb/Interpreter/ScriptedInterface.h"
+#include "lldb/Utility/DataBufferHeap.h"
+
+#include "PythonDataObjects.h"
+#include "SWIGPythonBridge.h"
+#include "ScriptInterpreterPythonImpl.h"
+
+namespace lldb_private {
+class ScriptInterpreterPythonImpl;
+class ScriptedPythonInterface : virtual public ScriptedInterface {
+public:
+ ScriptedPythonInterface(ScriptInterpreterPythonImpl &interpreter);
+ virtual ~ScriptedPythonInterface() = default;
+
+protected:
+ template <typename T = StructuredData::ObjectSP>
+ T ExtractValueFromPythonObject(python::PythonObject &p, Status &error) {
+ return p.CreateStructuredObject();
+ }
+
+ template <typename T = StructuredData::ObjectSP, typename... Args>
+ T Dispatch(llvm::StringRef method_name, Status &error, Args... args) {
+ using namespace python;
+ using Locker = ScriptInterpreterPythonImpl::Locker;
+
+ std::string caller_signature =
+ llvm::Twine(LLVM_PRETTY_FUNCTION + llvm::Twine(" (") +
+ llvm::Twine(method_name) + llvm::Twine(")"))
+ .str();
+ if (!m_object_instance_sp)
+ return ErrorWithMessage<T>(caller_signature, "Python object ill-formed",
+ error);
+
+ Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
+ Locker::FreeLock);
+
+ PythonObject implementor(PyRefType::Borrowed,
+ (PyObject *)m_object_instance_sp->GetValue());
+
+ if (!implementor.IsAllocated())
+ return ErrorWithMessage<T>(caller_signature,
+ "Python implementor not allocated.", error);
+
+ PythonObject pmeth(
+ PyRefType::Owned,
+ PyObject_GetAttrString(implementor.get(), method_name.str().c_str()));
+
+ if (PyErr_Occurred())
+ PyErr_Clear();
+
+ if (!pmeth.IsAllocated())
+ return ErrorWithMessage<T>(caller_signature,
+ "Python method not allocated.", error);
+
+ if (PyCallable_Check(pmeth.get()) == 0) {
+ if (PyErr_Occurred())
+ PyErr_Clear();
+ return ErrorWithMessage<T>(caller_signature,
+ "Python method not callable.", error);
+ }
+
+ if (PyErr_Occurred())
+ PyErr_Clear();
+
+ // TODO: make `const char *` when removing support for Python 2.
+ char *format = nullptr;
+ std::string format_buffer;
+
+ if (sizeof...(Args) > 0) {
+ FormatArgs(format_buffer, args...);
+ // TODO: make `const char *` when removing support for Python 2.
+ format = const_cast<char *>(format_buffer.c_str());
+ }
+
+ // TODO: make `const char *` when removing support for Python 2.
+ PythonObject py_return(
+ PyRefType::Owned,
+ PyObject_CallMethod(implementor.get(),
+ const_cast<char *>(method_name.data()), format,
+ args...));
+
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ PyErr_Clear();
+ return ErrorWithMessage<T>(caller_signature,
+ "Python method could not be called.", error);
+ }
+
+ if (!py_return.IsAllocated())
+ return ErrorWithMessage<T>(caller_signature, "Returned object is null.",
+ error);
+
+ return ExtractValueFromPythonObject<T>(py_return, error);
+ }
+
+ Status GetStatusFromMethod(llvm::StringRef method_name);
+
+ template <typename T, typename... Args>
+ void FormatArgs(std::string &fmt, T arg, Args... args) const {
+ FormatArgs(fmt, arg);
+ FormatArgs(fmt, args...);
+ }
+
+ template <typename T> void FormatArgs(std::string &fmt, T arg) const {
+ fmt += GetPythonValueFormatString(arg);
+ }
+
+ void FormatArgs(std::string &fmt) const {}
+
+ // The lifetime is managed by the ScriptInterpreter
+ ScriptInterpreterPythonImpl &m_interpreter;
+};
+
+template <>
+StructuredData::DictionarySP
+ScriptedPythonInterface::ExtractValueFromPythonObject<
+ StructuredData::DictionarySP>(python::PythonObject &p, Status &error);
+
+template <>
+Status ScriptedPythonInterface::ExtractValueFromPythonObject<Status>(
+ python::PythonObject &p, Status &error);
+
+template <>
+lldb::DataExtractorSP
+ScriptedPythonInterface::ExtractValueFromPythonObject<lldb::DataExtractorSP>(
+ python::PythonObject &p, Status &error);
+
+template <>
+llvm::Optional<MemoryRegionInfo>
+ScriptedPythonInterface::ExtractValueFromPythonObject<
+ llvm::Optional<MemoryRegionInfo>>(python::PythonObject &p, Status &error);
+
+} // namespace lldb_private
+
+#endif // LLDB_ENABLE_PYTHON
+#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPYTHONINTERFACE_H
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
new file mode 100644
index 000000000000..d2c28bc426ee
--- /dev/null
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
@@ -0,0 +1,140 @@
+//===-- ScriptedThreadPythonInterface.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/Config.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
+#include "lldb/lldb-enumerations.h"
+
+#if LLDB_ENABLE_PYTHON
+
+// LLDB Python header must be included first
+#include "lldb-python.h"
+
+#include "SWIGPythonBridge.h"
+#include "ScriptInterpreterPythonImpl.h"
+#include "ScriptedThreadPythonInterface.h"
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::python;
+using Locker = ScriptInterpreterPythonImpl::Locker;
+
+ScriptedThreadPythonInterface::ScriptedThreadPythonInterface(
+ ScriptInterpreterPythonImpl &interpreter)
+ : ScriptedThreadInterface(), ScriptedPythonInterface(interpreter) {}
+
+StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
+ const llvm::StringRef class_name, ExecutionContext &exe_ctx,
+ StructuredData::DictionarySP args_sp) {
+
+ if (class_name.empty())
+ return {};
+
+ ProcessSP process_sp = exe_ctx.GetProcessSP();
+ StructuredDataImpl *args_impl = nullptr;
+ if (args_sp) {
+ args_impl = new StructuredDataImpl();
+ args_impl->SetObjectSP(args_sp);
+ }
+ std::string error_string;
+
+ Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
+ Locker::FreeLock);
+
+ void *ret_val = LLDBSwigPythonCreateScriptedThread(
+ class_name.str().c_str(), m_interpreter.GetDictionaryName(), process_sp,
+ args_impl, error_string);
+
+ if (!ret_val)
+ return {};
+
+ m_object_instance_sp =
+ StructuredData::GenericSP(new StructuredPythonObject(ret_val));
+
+ return m_object_instance_sp;
+}
+
+lldb::tid_t ScriptedThreadPythonInterface::GetThreadID() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_thread_id", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return LLDB_INVALID_THREAD_ID;
+
+ return obj->GetIntegerValue(LLDB_INVALID_THREAD_ID);
+}
+
+llvm::Optional<std::string> ScriptedThreadPythonInterface::GetName() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_name", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
+
+ return obj->GetStringValue().str();
+}
+
+lldb::StateType ScriptedThreadPythonInterface::GetState() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_state", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return eStateInvalid;
+
+ return static_cast<StateType>(obj->GetIntegerValue(eStateInvalid));
+}
+
+llvm::Optional<std::string> ScriptedThreadPythonInterface::GetQueue() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_queue", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
+
+ return obj->GetStringValue().str();
+}
+
+StructuredData::DictionarySP ScriptedThreadPythonInterface::GetStopReason() {
+ Status error;
+ StructuredData::DictionarySP dict =
+ Dispatch<StructuredData::DictionarySP>("get_stop_reason", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error))
+ return {};
+
+ return dict;
+}
+
+StructuredData::ArraySP ScriptedThreadPythonInterface::GetStackFrames() {
+ return nullptr;
+}
+
+StructuredData::DictionarySP ScriptedThreadPythonInterface::GetRegisterInfo() {
+ Status error;
+ StructuredData::DictionarySP dict =
+ Dispatch<StructuredData::DictionarySP>("get_register_info", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error))
+ return {};
+
+ return dict;
+}
+
+llvm::Optional<std::string>
+ScriptedThreadPythonInterface::GetRegisterContext() {
+ Status error;
+ StructuredData::ObjectSP obj = Dispatch("get_register_context", error);
+
+ if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, obj, error))
+ return {};
+
+ return obj->GetAsString()->GetValue().str();
+}
+
+#endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
new file mode 100644
index 000000000000..996b8d43136b
--- /dev/null
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
@@ -0,0 +1,48 @@
+//===-- ScriptedThreadPythonInterface.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDTHREADPYTHONINTERFACE_H
+#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDTHREADPYTHONINTERFACE_H
+
+#include "lldb/Host/Config.h"
+
+#if LLDB_ENABLE_PYTHON
+
+#include "ScriptedPythonInterface.h"
+#include "lldb/Interpreter/ScriptedProcessInterface.h"
+
+namespace lldb_private {
+class ScriptedThreadPythonInterface : public ScriptedThreadInterface,
+ public ScriptedPythonInterface {
+public:
+ ScriptedThreadPythonInterface(ScriptInterpreterPythonImpl &interpreter);
+
+ StructuredData::GenericSP
+ CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
+ StructuredData::DictionarySP args_sp) override;
+
+ lldb::tid_t GetThreadID() override;
+
+ llvm::Optional<std::string> GetName() override;
+
+ lldb::StateType GetState() override;
+
+ llvm::Optional<std::string> GetQueue() override;
+
+ StructuredData::DictionarySP GetStopReason() override;
+
+ StructuredData::ArraySP GetStackFrames() override;
+
+ StructuredData::DictionarySP GetRegisterInfo() override;
+
+ llvm::Optional<std::string> GetRegisterContext() override;
+};
+} // namespace lldb_private
+
+#endif // LLDB_ENABLE_PYTHON
+#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_SCRIPTEDPROCESSTHREADINTERFACE_H
diff --git a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
index 87edf7789f0d..b024854f3981 100644
--- a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
+++ b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.cpp
@@ -143,14 +143,9 @@ public:
const char *GetLoggingModuleName() const { return "libsystem_trace.dylib"; }
};
-using StructuredDataDarwinLogPropertiesSP =
- std::shared_ptr<StructuredDataDarwinLogProperties>;
-
-static const StructuredDataDarwinLogPropertiesSP &GetGlobalProperties() {
- static StructuredDataDarwinLogPropertiesSP g_settings_sp;
- if (!g_settings_sp)
- g_settings_sp = std::make_shared<StructuredDataDarwinLogProperties>();
- return g_settings_sp;
+static StructuredDataDarwinLogProperties &GetGlobalProperties() {
+ static StructuredDataDarwinLogProperties g_settings;
+ return g_settings;
}
const char *const s_filter_attributes[] = {
@@ -879,9 +874,9 @@ protected:
process_sp->GetStructuredDataPlugin(GetDarwinLogTypeName());
stream.Printf("Availability: %s\n",
plugin_sp ? "available" : "unavailable");
- ConstString plugin_name = StructuredDataDarwinLog::GetStaticPluginName();
+ llvm::StringRef plugin_name = StructuredDataDarwinLog::GetStaticPluginName();
const bool enabled =
- plugin_sp ? plugin_sp->GetEnabled(plugin_name) : false;
+ plugin_sp ? plugin_sp->GetEnabled(ConstString(plugin_name)) : false;
stream.Printf("Enabled: %s\n", enabled ? "true" : "false");
}
@@ -1023,7 +1018,7 @@ bool RunEnableCommand(CommandInterpreter &interpreter) {
StreamString command_stream;
command_stream << "plugin structured-data darwin-log enable";
- auto enable_options = GetGlobalProperties()->GetAutoEnableOptions();
+ auto enable_options = GetGlobalProperties().GetAutoEnableOptions();
if (!enable_options.empty()) {
command_stream << ' ';
command_stream << enable_options;
@@ -1054,22 +1049,6 @@ void StructuredDataDarwinLog::Terminate() {
PluginManager::UnregisterPlugin(&CreateInstance);
}
-ConstString StructuredDataDarwinLog::GetStaticPluginName() {
- static ConstString s_plugin_name("darwin-log");
- return s_plugin_name;
-}
-
-#pragma mark -
-#pragma mark PluginInterface API
-
-// PluginInterface API
-
-ConstString StructuredDataDarwinLog::GetPluginName() {
- return GetStaticPluginName();
-}
-
-uint32_t StructuredDataDarwinLog::GetPluginVersion() { return 1; }
-
#pragma mark -
#pragma mark StructuredDataPlugin API
@@ -1220,7 +1199,7 @@ Status StructuredDataDarwinLog::GetDescription(
}
bool StructuredDataDarwinLog::GetEnabled(ConstString type_name) const {
- if (type_name == GetStaticPluginName())
+ if (type_name.GetStringRef() == GetStaticPluginName())
return m_is_enabled;
else
return false;
@@ -1237,7 +1216,7 @@ void StructuredDataDarwinLog::ModulesDidLoad(Process &process,
__FUNCTION__, process.GetUniqueID());
// Check if we should enable the darwin log support on startup/attach.
- if (!GetGlobalProperties()->GetEnableOnStartup() &&
+ if (!GetGlobalProperties().GetEnableOnStartup() &&
!s_is_explicitly_enabled) {
// We're neither auto-enabled or explicitly enabled, so we shouldn't try to
// enable here.
@@ -1264,7 +1243,7 @@ void StructuredDataDarwinLog::ModulesDidLoad(Process &process,
// must be loaded into the debugged process before we can try to enable
// logging.
const char *logging_module_cstr =
- GetGlobalProperties()->GetLoggingModuleName();
+ GetGlobalProperties().GetLoggingModuleName();
if (!logging_module_cstr || (logging_module_cstr[0] == 0)) {
// We need this. Bail.
LLDB_LOGF(log,
@@ -1384,7 +1363,7 @@ void StructuredDataDarwinLog::DebuggerInitialize(Debugger &debugger) {
debugger, StructuredDataDarwinLogProperties::GetSettingName())) {
const bool is_global_setting = true;
PluginManager::CreateSettingForStructuredDataPlugin(
- debugger, GetGlobalProperties()->GetValueProperties(),
+ debugger, GetGlobalProperties().GetValueProperties(),
ConstString("Properties for the darwin-log"
" plug-in."),
is_global_setting);
@@ -1421,7 +1400,7 @@ Status StructuredDataDarwinLog::FilterLaunchInfo(ProcessLaunchInfo &launch_info,
// If DarwinLog is not enabled (either by explicit user command or via the
// auto-enable option), then we have nothing to do.
- if (!GetGlobalProperties()->GetEnableOnStartup() &&
+ if (!GetGlobalProperties().GetEnableOnStartup() &&
!s_is_explicitly_enabled) {
// Nothing to do, DarwinLog is not enabled.
return error;
@@ -1617,7 +1596,7 @@ void StructuredDataDarwinLog::AddInitCompletionHook(Process &process) {
// Build up the module list.
FileSpecList module_spec_list;
auto module_file_spec =
- FileSpec(GetGlobalProperties()->GetLoggingModuleName());
+ FileSpec(GetGlobalProperties().GetLoggingModuleName());
module_spec_list.Append(module_file_spec);
// We aren't specifying a source file set.
@@ -1638,7 +1617,7 @@ void StructuredDataDarwinLog::AddInitCompletionHook(Process &process) {
LLDB_LOGF(log,
"StructuredDataDarwinLog::%s() failed to set "
"breakpoint in module %s, function %s (process uid %u)",
- __FUNCTION__, GetGlobalProperties()->GetLoggingModuleName(),
+ __FUNCTION__, GetGlobalProperties().GetLoggingModuleName(),
func_name, process.GetUniqueID());
return;
}
@@ -1649,7 +1628,7 @@ void StructuredDataDarwinLog::AddInitCompletionHook(Process &process) {
LLDB_LOGF(log,
"StructuredDataDarwinLog::%s() breakpoint set in module %s,"
"function %s (process uid %u)",
- __FUNCTION__, GetGlobalProperties()->GetLoggingModuleName(),
+ __FUNCTION__, GetGlobalProperties().GetLoggingModuleName(),
func_name, process.GetUniqueID());
}
diff --git a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.h b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.h
index caa94af1f30e..308fd82e9b12 100644
--- a/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.h
+++ b/lldb/source/Plugins/StructuredData/DarwinLog/StructuredDataDarwinLog.h
@@ -30,7 +30,7 @@ public:
static void Terminate();
- static ConstString GetStaticPluginName();
+ static llvm::StringRef GetStaticPluginName() { return "darwin-log"; }
/// Return whether the DarwinLog functionality is enabled.
///
@@ -46,9 +46,7 @@ public:
// PluginInterface API
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetStaticPluginName(); }
// StructuredDataPlugin API
diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
index b815ebb3c07a..b07674af3bd9 100644
--- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
+++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
@@ -176,11 +176,6 @@ void SymbolFileBreakpad::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString SymbolFileBreakpad::GetPluginNameStatic() {
- static ConstString g_name("breakpad");
- return g_name;
-}
-
uint32_t SymbolFileBreakpad::CalculateAbilities() {
if (!m_objfile_sp || !llvm::isa<ObjectFileBreakpad>(*m_objfile_sp))
return 0;
@@ -209,6 +204,10 @@ CompUnitSP SymbolFileBreakpad::ParseCompileUnitAtIndex(uint32_t index) {
End(*m_objfile_sp);
assert(Record::classify(*It) == Record::Func);
++It; // Skip FUNC record.
+ // Skip INLINE records.
+ while (It != End && Record::classify(*It) == Record::Inline)
+ ++It;
+
if (It != End) {
auto record = LineRecord::parse(*It);
if (record && record->FileNum < m_files->size())
@@ -224,9 +223,45 @@ CompUnitSP SymbolFileBreakpad::ParseCompileUnitAtIndex(uint32_t index) {
return cu_sp;
}
+FunctionSP SymbolFileBreakpad::GetOrCreateFunction(CompileUnit &comp_unit) {
+ user_id_t id = comp_unit.GetID();
+ if (FunctionSP func_sp = comp_unit.FindFunctionByUID(id))
+ return func_sp;
+
+ Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SYMBOLS);
+ FunctionSP func_sp;
+ addr_t base = GetBaseFileAddress();
+ if (base == LLDB_INVALID_ADDRESS) {
+ LLDB_LOG(log, "Unable to fetch the base address of object file. Skipping "
+ "symtab population.");
+ return func_sp;
+ }
+
+ const SectionList *list = comp_unit.GetModule()->GetSectionList();
+ CompUnitData &data = m_cu_data->GetEntryRef(id).data;
+ LineIterator It(*m_objfile_sp, Record::Func, data.bookmark);
+ assert(Record::classify(*It) == Record::Func);
+
+ if (auto record = FuncRecord::parse(*It)) {
+ Mangled func_name;
+ func_name.SetValue(ConstString(record->Name), false);
+ addr_t address = record->Address + base;
+ SectionSP section_sp = list->FindSectionContainingFileAddress(address);
+ if (section_sp) {
+ AddressRange func_range(
+ section_sp, address - section_sp->GetFileAddress(), record->Size);
+ // Use the CU's id because every CU has only one function inside.
+ func_sp = std::make_shared<Function>(&comp_unit, id, 0, func_name,
+ nullptr, func_range);
+ comp_unit.AddFunction(func_sp);
+ }
+ }
+ return func_sp;
+}
+
size_t SymbolFileBreakpad::ParseFunctions(CompileUnit &comp_unit) {
- // TODO
- return 0;
+ std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
+ return GetOrCreateFunction(comp_unit) ? 1 : 0;
}
bool SymbolFileBreakpad::ParseLineTable(CompileUnit &comp_unit) {
@@ -251,12 +286,88 @@ bool SymbolFileBreakpad::ParseSupportFiles(CompileUnit &comp_unit,
return true;
}
+size_t SymbolFileBreakpad::ParseBlocksRecursive(Function &func) {
+ std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
+ CompileUnit *comp_unit = func.GetCompileUnit();
+ lldbassert(comp_unit);
+ ParseInlineOriginRecords();
+ // A vector of current each level's parent block. For example, when parsing
+ // "INLINE 0 ...", the current level is 0 and its parent block is the
+ // funciton block at index 0.
+ std::vector<Block *> blocks;
+ Block &block = func.GetBlock(false);
+ block.AddRange(Block::Range(0, func.GetAddressRange().GetByteSize()));
+ blocks.push_back(&block);
+
+ size_t blocks_added = 0;
+ addr_t func_base = func.GetAddressRange().GetBaseAddress().GetOffset();
+ CompUnitData &data = m_cu_data->GetEntryRef(comp_unit->GetID()).data;
+ LineIterator It(*m_objfile_sp, Record::Func, data.bookmark),
+ End(*m_objfile_sp);
+ ++It; // Skip the FUNC record.
+ size_t last_added_nest_level = 0;
+ while (It != End && Record::classify(*It) == Record::Inline) {
+ if (auto record = InlineRecord::parse(*It)) {
+ if (record->InlineNestLevel == 0 ||
+ record->InlineNestLevel <= last_added_nest_level + 1) {
+ last_added_nest_level = record->InlineNestLevel;
+ BlockSP block_sp = std::make_shared<Block>(It.GetBookmark().offset);
+ FileSpec callsite_file;
+ if (record->CallSiteFileNum < m_files->size())
+ callsite_file = (*m_files)[record->CallSiteFileNum];
+ llvm::StringRef name;
+ if (record->OriginNum < m_inline_origins->size())
+ name = (*m_inline_origins)[record->OriginNum];
+
+ Declaration callsite(callsite_file, record->CallSiteLineNum);
+ block_sp->SetInlinedFunctionInfo(name.str().c_str(),
+ /*mangled=*/nullptr,
+ /*decl_ptr=*/nullptr, &callsite);
+ for (const auto &range : record->Ranges) {
+ block_sp->AddRange(
+ Block::Range(range.first - func_base, range.second));
+ }
+ block_sp->FinalizeRanges();
+
+ blocks[record->InlineNestLevel]->AddChild(block_sp);
+ if (record->InlineNestLevel + 1 >= blocks.size()) {
+ blocks.resize(blocks.size() + 1);
+ }
+ blocks[record->InlineNestLevel + 1] = block_sp.get();
+ ++blocks_added;
+ }
+ }
+ ++It;
+ }
+ return blocks_added;
+}
+
+void SymbolFileBreakpad::ParseInlineOriginRecords() {
+ if (m_inline_origins)
+ return;
+ m_inline_origins.emplace();
+
+ Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SYMBOLS);
+ for (llvm::StringRef line : lines(Record::InlineOrigin)) {
+ auto record = InlineOriginRecord::parse(line);
+ if (!record) {
+ LLDB_LOG(log, "Failed to parse: {0}. Skipping record.", line);
+ continue;
+ }
+
+ if (record->Number >= m_inline_origins->size())
+ m_inline_origins->resize(record->Number + 1);
+ (*m_inline_origins)[record->Number] = record->Name;
+ }
+}
+
uint32_t
SymbolFileBreakpad::ResolveSymbolContext(const Address &so_addr,
SymbolContextItem resolve_scope,
SymbolContext &sc) {
std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
- if (!(resolve_scope & (eSymbolContextCompUnit | eSymbolContextLineEntry)))
+ if (!(resolve_scope & (eSymbolContextCompUnit | eSymbolContextLineEntry |
+ eSymbolContextFunction | eSymbolContextBlock)))
return 0;
ParseCUData();
@@ -274,6 +385,22 @@ SymbolFileBreakpad::ResolveSymbolContext(const Address &so_addr,
}
}
+ if (resolve_scope & (eSymbolContextFunction | eSymbolContextBlock)) {
+ FunctionSP func_sp = GetOrCreateFunction(*sc.comp_unit);
+ if (func_sp) {
+ sc.function = func_sp.get();
+ result |= eSymbolContextFunction;
+ if (resolve_scope & eSymbolContextBlock) {
+ Block &block = func_sp->GetBlock(true);
+ sc.block = block.FindInnermostBlockByOffset(
+ so_addr.GetFileAddress() -
+ sc.function->GetAddressRange().GetBaseAddress().GetFileAddress());
+ if (sc.block)
+ result |= eSymbolContextBlock;
+ }
+ }
+ }
+
return result;
}
@@ -296,7 +423,20 @@ void SymbolFileBreakpad::FindFunctions(
ConstString name, const CompilerDeclContext &parent_decl_ctx,
FunctionNameType name_type_mask, bool include_inlines,
SymbolContextList &sc_list) {
- // TODO
+ std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
+ // TODO: Implement this with supported FunctionNameType.
+
+ for (uint32_t i = 0; i < GetNumCompileUnits(); ++i) {
+ CompUnitSP cu_sp = GetCompileUnitAtIndex(i);
+ FunctionSP func_sp = GetOrCreateFunction(*cu_sp);
+ if (func_sp && name == func_sp->GetNameNoArguments()) {
+ SymbolContext sc;
+ sc.comp_unit = cu_sp.get();
+ sc.function = func_sp.get();
+ sc.module_sp = func_sp->CalculateSymbolContextModule();
+ sc_list.Append(sc);
+ }
+ }
}
void SymbolFileBreakpad::FindFunctions(const RegularExpression &regex,
@@ -351,11 +491,6 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) {
size.hasValue(), /*contains_linker_annotations*/ false, /*flags*/ 0);
};
- for (llvm::StringRef line : lines(Record::Func)) {
- if (auto record = FuncRecord::parse(line))
- add_symbol(record->Address, record->Size, record->Name);
- }
-
for (llvm::StringRef line : lines(Record::Public)) {
if (auto record = PublicRecord::parse(line))
add_symbol(record->Address, llvm::None, record->Name);
@@ -727,6 +862,10 @@ void SymbolFileBreakpad::ParseLineTableAndSupportFiles(CompileUnit &cu,
End(*m_objfile_sp);
assert(Record::classify(*It) == Record::Func);
for (++It; It != End; ++It) {
+ // Skip INLINE records
+ if (Record::classify(*It) == Record::Inline)
+ continue;
+
auto record = LineRecord::parse(*It);
if (!record)
break;
@@ -783,3 +922,9 @@ void SymbolFileBreakpad::ParseUnwindData() {
}
m_unwind_data->win.Sort();
}
+
+uint64_t SymbolFileBreakpad::GetDebugInfoSize() {
+ // Breakpad files are all debug info.
+ return m_objfile_sp->GetByteSize();
+}
+
diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h
index b0a35fa11de4..bf3e25c1a63e 100644
--- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h
+++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.h
@@ -37,9 +37,9 @@ public:
static void Initialize();
static void Terminate();
static void DebuggerInitialize(Debugger &debugger) {}
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "breakpad"; }
- static const char *GetPluginDescriptionStatic() {
+ static llvm::StringRef GetPluginDescriptionStatic() {
return "Breakpad debug symbol file reader.";
}
@@ -63,6 +63,8 @@ public:
return lldb::eLanguageTypeUnknown;
}
+ lldb::FunctionSP GetOrCreateFunction(CompileUnit &comp_unit);
+
size_t ParseFunctions(CompileUnit &comp_unit) override;
bool ParseLineTable(CompileUnit &comp_unit) override;
@@ -79,7 +81,7 @@ public:
return false;
}
- size_t ParseBlocksRecursive(Function &func) override { return 0; }
+ size_t ParseBlocksRecursive(Function &func) override;
void FindGlobalVariables(ConstString name,
const CompilerDeclContext &parent_decl_ctx,
@@ -146,8 +148,9 @@ public:
GetUnwindPlan(const Address &address,
const RegisterInfoResolver &resolver) override;
- ConstString GetPluginName() override { return GetPluginNameStatic(); }
- uint32_t GetPluginVersion() override { return 1; }
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+ uint64_t GetDebugInfoSize() override;
private:
// A class representing a position in the breakpad file. Useful for
@@ -220,11 +223,13 @@ private:
UnwindPlan::Row &row);
lldb::UnwindPlanSP ParseWinUnwindPlan(const Bookmark &bookmark,
const RegisterInfoResolver &resolver);
+ void ParseInlineOriginRecords();
using CompUnitMap = RangeDataVector<lldb::addr_t, lldb::addr_t, CompUnitData>;
llvm::Optional<std::vector<FileSpec>> m_files;
llvm::Optional<CompUnitMap> m_cu_data;
+ llvm::Optional<std::vector<llvm::StringRef>> m_inline_origins;
using UnwindMap = RangeDataVector<lldb::addr_t, lldb::addr_t, Bookmark>;
struct UnwindData {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp
index 60b6b726f6c0..4e09b523b778 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp
@@ -75,12 +75,15 @@ void AppleDWARFIndex::GetGlobalVariables(
}
void AppleDWARFIndex::GetGlobalVariables(
- const DWARFUnit &cu, llvm::function_ref<bool(DWARFDIE die)> callback) {
+ DWARFUnit &cu, llvm::function_ref<bool(DWARFDIE die)> callback) {
if (!m_apple_names_up)
return;
+ lldbassert(!cu.GetSymbolFileDWARF().GetDwoNum());
+ const DWARFUnit &non_skeleton_cu = cu.GetNonSkeletonUnit();
DWARFMappedHash::DIEInfoArray hash_data;
- m_apple_names_up->AppendAllDIEsInRange(cu.GetOffset(), cu.GetNextUnitOffset(),
+ m_apple_names_up->AppendAllDIEsInRange(non_skeleton_cu.GetOffset(),
+ non_skeleton_cu.GetNextUnitOffset(),
hash_data);
DWARFMappedHash::ExtractDIEArray(hash_data, DIERefCallback(callback));
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h
index a7032f50e590..ef3cb5dee035 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h
@@ -39,7 +39,7 @@ public:
GetGlobalVariables(const RegularExpression &regex,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
void
- GetGlobalVariables(const DWARFUnit &cu,
+ GetGlobalVariables(DWARFUnit &cu,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
void GetObjCMethods(ConstString class_name,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
index ffe24836955f..00123a4b9216 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h
@@ -32,7 +32,8 @@ public:
virtual lldb_private::Function *
ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit,
- const DWARFDIE &die) = 0;
+ const DWARFDIE &die,
+ const lldb_private::AddressRange &range) = 0;
virtual bool
CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 46015f7b43b1..4ac6e165dda3 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -843,7 +843,8 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc,
}
clang_type = m_ast.CreateEnumerationType(
- attrs.name.GetCString(), GetClangDeclContextContainingDIE(die, nullptr),
+ attrs.name.GetStringRef(),
+ GetClangDeclContextContainingDIE(die, nullptr),
GetOwningClangModule(die), attrs.decl, enumerator_clang_type,
attrs.is_scoped_enum);
} else {
@@ -876,6 +877,37 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc,
return type_sp;
}
+static clang::CallingConv
+ConvertDWARFCallingConventionToClang(const ParsedDWARFTypeAttributes &attrs) {
+ switch (attrs.calling_convention) {
+ case llvm::dwarf::DW_CC_normal:
+ return clang::CC_C;
+ case llvm::dwarf::DW_CC_BORLAND_stdcall:
+ return clang::CC_X86StdCall;
+ case llvm::dwarf::DW_CC_BORLAND_msfastcall:
+ return clang::CC_X86FastCall;
+ case llvm::dwarf::DW_CC_LLVM_vectorcall:
+ return clang::CC_X86VectorCall;
+ case llvm::dwarf::DW_CC_BORLAND_pascal:
+ return clang::CC_X86Pascal;
+ case llvm::dwarf::DW_CC_LLVM_Win64:
+ return clang::CC_Win64;
+ case llvm::dwarf::DW_CC_LLVM_X86_64SysV:
+ return clang::CC_X86_64SysV;
+ case llvm::dwarf::DW_CC_LLVM_X86RegCall:
+ return clang::CC_X86RegCall;
+ default:
+ break;
+ }
+
+ Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION |
+ DWARF_LOG_LOOKUPS));
+ LLDB_LOG(log, "Unsupported DW_AT_calling_convention value: {0}",
+ attrs.calling_convention);
+ // Use the default calling convention as a fallback.
+ return clang::CC_C;
+}
+
TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
ParsedDWARFTypeAttributes &attrs) {
Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION |
@@ -954,11 +986,14 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die,
is_cxx_method = false;
}
+ clang::CallingConv calling_convention =
+ ConvertDWARFCallingConventionToClang(attrs);
+
// clang_type will get the function prototype clang type after this
// call
CompilerType clang_type = m_ast.CreateFunctionType(
return_clang_type, function_param_types.data(),
- function_param_types.size(), is_variadic, type_quals);
+ function_param_types.size(), is_variadic, type_quals, calling_convention);
if (attrs.name) {
bool type_handled = false;
@@ -1372,6 +1407,123 @@ TypeSP DWARFASTParserClang::ParsePointerToMemberType(
return nullptr;
}
+void DWARFASTParserClang::ParseInheritance(
+ const DWARFDIE &die, const DWARFDIE &parent_die,
+ const CompilerType class_clang_type, const AccessType default_accessibility,
+ const lldb::ModuleSP &module_sp,
+ std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
+ ClangASTImporter::LayoutInfo &layout_info) {
+
+ TypeSystemClang *ast =
+ llvm::dyn_cast_or_null<TypeSystemClang>(class_clang_type.GetTypeSystem());
+ if (ast == nullptr)
+ return;
+
+ // TODO: implement DW_TAG_inheritance type parsing.
+ DWARFAttributes attributes;
+ const size_t num_attributes = die.GetAttributes(attributes);
+ if (num_attributes == 0)
+ return;
+
+ DWARFFormValue encoding_form;
+ AccessType accessibility = default_accessibility;
+ bool is_virtual = false;
+ bool is_base_of_class = true;
+ off_t member_byte_offset = 0;
+
+ for (uint32_t i = 0; i < num_attributes; ++i) {
+ const dw_attr_t attr = attributes.AttributeAtIndex(i);
+ DWARFFormValue form_value;
+ if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+ switch (attr) {
+ case DW_AT_type:
+ encoding_form = form_value;
+ break;
+ case DW_AT_data_member_location:
+ if (form_value.BlockData()) {
+ Value initialValue(0);
+ Value memberOffset(0);
+ const DWARFDataExtractor &debug_info_data = die.GetData();
+ uint32_t block_length = form_value.Unsigned();
+ uint32_t block_offset =
+ form_value.BlockData() - debug_info_data.GetDataStart();
+ if (DWARFExpression::Evaluate(
+ nullptr, nullptr, module_sp,
+ DataExtractor(debug_info_data, block_offset, block_length),
+ die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr,
+ memberOffset, nullptr)) {
+ member_byte_offset = memberOffset.ResolveValue(nullptr).UInt();
+ }
+ } else {
+ // With DWARF 3 and later, if the value is an integer constant,
+ // this form value is the offset in bytes from the beginning of
+ // the containing entity.
+ member_byte_offset = form_value.Unsigned();
+ }
+ break;
+
+ case DW_AT_accessibility:
+ accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
+ break;
+
+ case DW_AT_virtuality:
+ is_virtual = form_value.Boolean();
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ Type *base_class_type = die.ResolveTypeUID(encoding_form.Reference());
+ if (base_class_type == nullptr) {
+ module_sp->ReportError("0x%8.8x: DW_TAG_inheritance failed to "
+ "resolve the base class at 0x%8.8x"
+ " from enclosing type 0x%8.8x. \nPlease file "
+ "a bug and attach the file at the start of "
+ "this error message",
+ die.GetOffset(),
+ encoding_form.Reference().GetOffset(),
+ parent_die.GetOffset());
+ return;
+ }
+
+ CompilerType base_class_clang_type = base_class_type->GetFullCompilerType();
+ assert(base_class_clang_type);
+ if (TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type)) {
+ ast->SetObjCSuperClass(class_clang_type, base_class_clang_type);
+ return;
+ }
+ std::unique_ptr<clang::CXXBaseSpecifier> result =
+ ast->CreateBaseClassSpecifier(base_class_clang_type.GetOpaqueQualType(),
+ accessibility, is_virtual,
+ is_base_of_class);
+ if (!result)
+ return;
+
+ base_classes.push_back(std::move(result));
+
+ if (is_virtual) {
+ // Do not specify any offset for virtual inheritance. The DWARF
+ // produced by clang doesn't give us a constant offset, but gives
+ // us a DWARF expressions that requires an actual object in memory.
+ // the DW_AT_data_member_location for a virtual base class looks
+ // like:
+ // DW_AT_data_member_location( DW_OP_dup, DW_OP_deref,
+ // DW_OP_constu(0x00000018), DW_OP_minus, DW_OP_deref,
+ // DW_OP_plus )
+ // Given this, there is really no valid response we can give to
+ // clang for virtual base class offsets, and this should eventually
+ // be removed from LayoutRecordType() in the external
+ // AST source in clang.
+ } else {
+ layout_info.base_offsets.insert(std::make_pair(
+ ast->GetAsCXXRecordDecl(base_class_clang_type.GetOpaqueQualType()),
+ clang::CharUnits::fromQuantity(member_byte_offset)));
+ }
+}
+
TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType(
const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) {
if (!type_sp)
@@ -1584,6 +1736,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc,
}
}
assert(tag_decl_kind != -1);
+ (void)tag_decl_kind;
bool clang_type_was_created = false;
clang_type.SetCompilerType(
&m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
@@ -1763,11 +1916,10 @@ public:
const CompilerType &property_opaque_type, // The property type is only
// required if you don't have an
// ivar decl
- clang::ObjCIvarDecl *ivar_decl, const char *property_setter_name,
- const char *property_getter_name, uint32_t property_attributes,
- const ClangASTMetadata *metadata)
+ const char *property_setter_name, const char *property_getter_name,
+ uint32_t property_attributes, const ClangASTMetadata *metadata)
: m_class_opaque_type(class_opaque_type), m_property_name(property_name),
- m_property_opaque_type(property_opaque_type), m_ivar_decl(ivar_decl),
+ m_property_opaque_type(property_opaque_type),
m_property_setter_name(property_setter_name),
m_property_getter_name(property_getter_name),
m_property_attributes(property_attributes) {
@@ -1786,7 +1938,6 @@ public:
m_class_opaque_type = rhs.m_class_opaque_type;
m_property_name = rhs.m_property_name;
m_property_opaque_type = rhs.m_property_opaque_type;
- m_ivar_decl = rhs.m_ivar_decl;
m_property_setter_name = rhs.m_property_setter_name;
m_property_getter_name = rhs.m_property_getter_name;
m_property_attributes = rhs.m_property_attributes;
@@ -1801,7 +1952,7 @@ public:
bool Finalize() {
return TypeSystemClang::AddObjCClassProperty(
m_class_opaque_type, m_property_name, m_property_opaque_type,
- m_ivar_decl, m_property_setter_name, m_property_getter_name,
+ /*ivar_decl=*/nullptr, m_property_setter_name, m_property_getter_name,
m_property_attributes, m_metadata_up.get());
}
@@ -1809,7 +1960,6 @@ private:
CompilerType m_class_opaque_type;
const char *m_property_name;
CompilerType m_property_opaque_type;
- clang::ObjCIvarDecl *m_ivar_decl;
const char *m_property_setter_name;
const char *m_property_getter_name;
uint32_t m_property_attributes;
@@ -1966,16 +2116,12 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
TypeSystemClang::StartTagDeclarationDefinition(clang_type);
}
- int tag_decl_kind = -1;
AccessType default_accessibility = eAccessNone;
if (tag == DW_TAG_structure_type) {
- tag_decl_kind = clang::TTK_Struct;
default_accessibility = eAccessPublic;
} else if (tag == DW_TAG_union_type) {
- tag_decl_kind = clang::TTK_Union;
default_accessibility = eAccessPublic;
} else if (tag == DW_TAG_class_type) {
- tag_decl_kind = clang::TTK_Class;
default_accessibility = eAccessPrivate;
}
@@ -1999,10 +2145,8 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
return true;
});
- for (DelayedPropertyList::iterator pi = delayed_properties.begin(),
- pe = delayed_properties.end();
- pi != pe; ++pi)
- pi->Finalize();
+ for (DelayedAddObjCClassProperty &property : delayed_properties)
+ property.Finalize();
}
}
@@ -2199,8 +2343,11 @@ size_t DWARFASTParserClang::ParseChildEnumerators(
return enumerators_added;
}
-Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
- const DWARFDIE &die) {
+Function *
+DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
+ const DWARFDIE &die,
+ const AddressRange &func_range) {
+ assert(func_range.GetBaseAddress().IsValid());
DWARFRangeList func_ranges;
const char *name = nullptr;
const char *mangled = nullptr;
@@ -2220,149 +2367,126 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
if (die.GetDIENamesAndRanges(name, mangled, func_ranges, decl_file, decl_line,
decl_column, call_file, call_line, call_column,
&frame_base)) {
+ Mangled func_name;
+ if (mangled)
+ func_name.SetValue(ConstString(mangled), true);
+ else if ((die.GetParent().Tag() == DW_TAG_compile_unit ||
+ die.GetParent().Tag() == DW_TAG_partial_unit) &&
+ Language::LanguageIsCPlusPlus(
+ SymbolFileDWARF::GetLanguage(*die.GetCU())) &&
+ !Language::LanguageIsObjC(
+ SymbolFileDWARF::GetLanguage(*die.GetCU())) &&
+ name && strcmp(name, "main") != 0) {
+ // If the mangled name is not present in the DWARF, generate the
+ // demangled name using the decl context. We skip if the function is
+ // "main" as its name is never mangled.
+ bool is_static = false;
+ bool is_variadic = false;
+ bool has_template_params = false;
+ unsigned type_quals = 0;
+ std::vector<CompilerType> param_types;
+ std::vector<clang::ParmVarDecl *> param_decls;
+ StreamString sstr;
+
+ DWARFDeclContext decl_ctx = SymbolFileDWARF::GetDWARFDeclContext(die);
+ sstr << decl_ctx.GetQualifiedName();
- // Union of all ranges in the function DIE (if the function is
- // discontiguous)
- AddressRange func_range;
- lldb::addr_t lowest_func_addr = func_ranges.GetMinRangeBase(0);
- lldb::addr_t highest_func_addr = func_ranges.GetMaxRangeEnd(0);
- if (lowest_func_addr != LLDB_INVALID_ADDRESS &&
- lowest_func_addr <= highest_func_addr) {
- ModuleSP module_sp(die.GetModule());
- func_range.GetBaseAddress().ResolveAddressUsingFileSections(
- lowest_func_addr, module_sp->GetSectionList());
- if (func_range.GetBaseAddress().IsValid())
- func_range.SetByteSize(highest_func_addr - lowest_func_addr);
- }
-
- if (func_range.GetBaseAddress().IsValid()) {
- Mangled func_name;
- if (mangled)
- func_name.SetValue(ConstString(mangled), true);
- else if ((die.GetParent().Tag() == DW_TAG_compile_unit ||
- die.GetParent().Tag() == DW_TAG_partial_unit) &&
- Language::LanguageIsCPlusPlus(
- SymbolFileDWARF::GetLanguage(*die.GetCU())) &&
- !Language::LanguageIsObjC(
- SymbolFileDWARF::GetLanguage(*die.GetCU())) &&
- name && strcmp(name, "main") != 0) {
- // If the mangled name is not present in the DWARF, generate the
- // demangled name using the decl context. We skip if the function is
- // "main" as its name is never mangled.
- bool is_static = false;
- bool is_variadic = false;
- bool has_template_params = false;
- unsigned type_quals = 0;
- std::vector<CompilerType> param_types;
- std::vector<clang::ParmVarDecl *> param_decls;
- StreamString sstr;
-
- DWARFDeclContext decl_ctx = SymbolFileDWARF::GetDWARFDeclContext(die);
- sstr << decl_ctx.GetQualifiedName();
-
- clang::DeclContext *containing_decl_ctx =
- GetClangDeclContextContainingDIE(die, nullptr);
- ParseChildParameters(containing_decl_ctx, die, true, is_static,
- is_variadic, has_template_params, param_types,
- param_decls, type_quals);
- sstr << "(";
- for (size_t i = 0; i < param_types.size(); i++) {
- if (i > 0)
- sstr << ", ";
- sstr << param_types[i].GetTypeName();
- }
- if (is_variadic)
- sstr << ", ...";
- sstr << ")";
- if (type_quals & clang::Qualifiers::Const)
- sstr << " const";
-
- func_name.SetValue(ConstString(sstr.GetString()), false);
- } else
- func_name.SetValue(ConstString(name), false);
-
- FunctionSP func_sp;
- std::unique_ptr<Declaration> decl_up;
- if (decl_file != 0 || decl_line != 0 || decl_column != 0)
- decl_up = std::make_unique<Declaration>(die.GetCU()->GetFile(decl_file),
- decl_line, decl_column);
-
- SymbolFileDWARF *dwarf = die.GetDWARF();
- // Supply the type _only_ if it has already been parsed
- Type *func_type = dwarf->GetDIEToType().lookup(die.GetDIE());
-
- assert(func_type == nullptr || func_type != DIE_IS_BEING_PARSED);
-
- if (dwarf->FixupAddress(func_range.GetBaseAddress())) {
- const user_id_t func_user_id = die.GetID();
- func_sp =
- std::make_shared<Function>(&comp_unit,
+ clang::DeclContext *containing_decl_ctx =
+ GetClangDeclContextContainingDIE(die, nullptr);
+ ParseChildParameters(containing_decl_ctx, die, true, is_static,
+ is_variadic, has_template_params, param_types,
+ param_decls, type_quals);
+ sstr << "(";
+ for (size_t i = 0; i < param_types.size(); i++) {
+ if (i > 0)
+ sstr << ", ";
+ sstr << param_types[i].GetTypeName();
+ }
+ if (is_variadic)
+ sstr << ", ...";
+ sstr << ")";
+ if (type_quals & clang::Qualifiers::Const)
+ sstr << " const";
+
+ func_name.SetValue(ConstString(sstr.GetString()), false);
+ } else
+ func_name.SetValue(ConstString(name), false);
+
+ FunctionSP func_sp;
+ std::unique_ptr<Declaration> decl_up;
+ if (decl_file != 0 || decl_line != 0 || decl_column != 0)
+ decl_up = std::make_unique<Declaration>(die.GetCU()->GetFile(decl_file),
+ decl_line, decl_column);
+
+ SymbolFileDWARF *dwarf = die.GetDWARF();
+ // Supply the type _only_ if it has already been parsed
+ Type *func_type = dwarf->GetDIEToType().lookup(die.GetDIE());
+
+ assert(func_type == nullptr || func_type != DIE_IS_BEING_PARSED);
+
+ const user_id_t func_user_id = die.GetID();
+ func_sp =
+ std::make_shared<Function>(&comp_unit,
func_user_id, // UserID is the DIE offset
func_user_id, func_name, func_type,
- func_range); // first address range
+ func_range); // first address range
- if (func_sp.get() != nullptr) {
- if (frame_base.IsValid())
- func_sp->GetFrameBaseExpression() = frame_base;
- comp_unit.AddFunction(func_sp);
- return func_sp.get();
- }
- }
+ if (func_sp.get() != nullptr) {
+ if (frame_base.IsValid())
+ func_sp->GetFrameBaseExpression() = frame_base;
+ comp_unit.AddFunction(func_sp);
+ return func_sp.get();
}
}
return nullptr;
}
-void DWARFASTParserClang::ParseSingleMember(
- const DWARFDIE &die, const DWARFDIE &parent_die,
- const lldb_private::CompilerType &class_clang_type,
- lldb::AccessType default_accessibility,
- DelayedPropertyList &delayed_properties,
- lldb_private::ClangASTImporter::LayoutInfo &layout_info,
- FieldInfo &last_field_info) {
- ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
- const dw_tag_t tag = die.Tag();
- // Get the parent byte size so we can verify any members will fit
- const uint64_t parent_byte_size =
- parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, UINT64_MAX);
- const uint64_t parent_bit_size =
- parent_byte_size == UINT64_MAX ? UINT64_MAX : parent_byte_size * 8;
-
- DWARFAttributes attributes;
- const size_t num_attributes = die.GetAttributes(attributes);
- if (num_attributes == 0)
- return;
-
+namespace {
+/// Parsed form of all attributes that are relevant for parsing type members.
+struct MemberAttributes {
+ explicit MemberAttributes(const DWARFDIE &die, const DWARFDIE &parent_die,
+ ModuleSP module_sp);
const char *name = nullptr;
+ /// Indicates how many bits into the word (according to the host endianness)
+ /// the low-order bit of the field starts. Can be negative.
+ int64_t bit_offset = 0;
+ /// Indicates the size of the field in bits.
+ size_t bit_size = 0;
+ uint64_t data_bit_offset = UINT64_MAX;
+ AccessType accessibility = eAccessNone;
+ llvm::Optional<uint64_t> byte_size;
+ DWARFFormValue encoding_form;
+ /// Indicates the byte offset of the word from the base address of the
+ /// structure.
+ uint32_t member_byte_offset;
+ bool is_artificial = false;
+ /// On DW_TAG_members, this means the member is static.
+ bool is_external = false;
+};
+
+/// Parsed form of all attributes that are relevant for parsing Objective-C
+/// properties.
+struct PropertyAttributes {
+ explicit PropertyAttributes(const DWARFDIE &die);
const char *prop_name = nullptr;
const char *prop_getter_name = nullptr;
const char *prop_setter_name = nullptr;
+ /// \see clang::ObjCPropertyAttribute
uint32_t prop_attributes = 0;
+};
+} // namespace
- bool is_artificial = false;
- DWARFFormValue encoding_form;
- AccessType accessibility = eAccessNone;
- uint32_t member_byte_offset =
- (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX;
- llvm::Optional<uint64_t> byte_size;
- int64_t bit_offset = 0;
- uint64_t data_bit_offset = UINT64_MAX;
- size_t bit_size = 0;
- bool is_external =
- false; // On DW_TAG_members, this means the member is static
- uint32_t i;
- for (i = 0; i < num_attributes && !is_artificial; ++i) {
+MemberAttributes::MemberAttributes(const DWARFDIE &die,
+ const DWARFDIE &parent_die,
+ ModuleSP module_sp) {
+ member_byte_offset = (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX;
+
+ DWARFAttributes attributes;
+ const size_t num_attributes = die.GetAttributes(attributes);
+ for (std::size_t i = 0; i < num_attributes; ++i) {
const dw_attr_t attr = attributes.AttributeAtIndex(i);
DWARFFormValue form_value;
if (attributes.ExtractFormValueAtIndex(i, form_value)) {
- // DW_AT_data_member_location indicates the byte offset of the
- // word from the base address of the structure.
- //
- // DW_AT_bit_offset indicates how many bits into the word
- // (according to the host endianness) the low-order bit of the
- // field starts. AT_bit_offset can be negative.
- //
- // DW_AT_bit_size indicates the size of the field in bits.
switch (attr) {
case DW_AT_name:
name = form_value.AsCString();
@@ -2413,6 +2537,42 @@ void DWARFASTParserClang::ParseSingleMember(
case DW_AT_artificial:
is_artificial = form_value.Boolean();
break;
+ case DW_AT_external:
+ is_external = form_value.Boolean();
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ // Clang has a DWARF generation bug where sometimes it represents
+ // fields that are references with bad byte size and bit size/offset
+ // information such as:
+ //
+ // DW_AT_byte_size( 0x00 )
+ // DW_AT_bit_size( 0x40 )
+ // DW_AT_bit_offset( 0xffffffffffffffc0 )
+ //
+ // So check the bit offset to make sure it is sane, and if the values
+ // are not sane, remove them. If we don't do this then we will end up
+ // with a crash if we try to use this type in an expression when clang
+ // becomes unhappy with its recycled debug info.
+ if (byte_size.getValueOr(0) == 0 && bit_offset < 0) {
+ bit_size = 0;
+ bit_offset = 0;
+ }
+}
+
+PropertyAttributes::PropertyAttributes(const DWARFDIE &die) {
+
+ DWARFAttributes attributes;
+ const size_t num_attributes = die.GetAttributes(attributes);
+ for (size_t i = 0; i < num_attributes; ++i) {
+ const dw_attr_t attr = attributes.AttributeAtIndex(i);
+ DWARFFormValue form_value;
+ if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+ switch (attr) {
case DW_AT_APPLE_property_name:
prop_name = form_value.AsCString();
break;
@@ -2425,315 +2585,330 @@ void DWARFASTParserClang::ParseSingleMember(
case DW_AT_APPLE_property_attribute:
prop_attributes = form_value.Unsigned();
break;
- case DW_AT_external:
- is_external = form_value.Boolean();
- break;
-
default:
- case DW_AT_declaration:
- case DW_AT_description:
- case DW_AT_mutable:
- case DW_AT_visibility:
- case DW_AT_sibling:
break;
}
}
}
- if (prop_name) {
- ConstString fixed_setter;
+ if (!prop_name)
+ return;
+ ConstString fixed_setter;
- // Check if the property getter/setter were provided as full names.
- // We want basenames, so we extract them.
+ // Check if the property getter/setter were provided as full names.
+ // We want basenames, so we extract them.
+ if (prop_getter_name && prop_getter_name[0] == '-') {
+ ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true);
+ prop_getter_name = prop_getter_method.GetSelector().GetCString();
+ }
- if (prop_getter_name && prop_getter_name[0] == '-') {
- ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true);
- prop_getter_name = prop_getter_method.GetSelector().GetCString();
- }
+ if (prop_setter_name && prop_setter_name[0] == '-') {
+ ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true);
+ prop_setter_name = prop_setter_method.GetSelector().GetCString();
+ }
- if (prop_setter_name && prop_setter_name[0] == '-') {
- ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true);
- prop_setter_name = prop_setter_method.GetSelector().GetCString();
- }
+ // If the names haven't been provided, they need to be filled in.
+ if (!prop_getter_name)
+ prop_getter_name = prop_name;
+ if (!prop_setter_name && prop_name[0] &&
+ !(prop_attributes & DW_APPLE_PROPERTY_readonly)) {
+ StreamString ss;
- // If the names haven't been provided, they need to be filled in.
+ ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]);
- if (!prop_getter_name) {
- prop_getter_name = prop_name;
- }
- if (!prop_setter_name && prop_name[0] &&
- !(prop_attributes & DW_APPLE_PROPERTY_readonly)) {
- StreamString ss;
+ fixed_setter.SetString(ss.GetString());
+ prop_setter_name = fixed_setter.GetCString();
+ }
+}
- ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]);
+void DWARFASTParserClang::ParseObjCProperty(
+ const DWARFDIE &die, const DWARFDIE &parent_die,
+ const lldb_private::CompilerType &class_clang_type,
+ DelayedPropertyList &delayed_properties) {
+ // This function can only parse DW_TAG_APPLE_property.
+ assert(die.Tag() == DW_TAG_APPLE_property);
- fixed_setter.SetString(ss.GetString());
- prop_setter_name = fixed_setter.GetCString();
- }
- }
+ ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
- // Clang has a DWARF generation bug where sometimes it represents
- // fields that are references with bad byte size and bit size/offset
- // information such as:
- //
- // DW_AT_byte_size( 0x00 )
- // DW_AT_bit_size( 0x40 )
- // DW_AT_bit_offset( 0xffffffffffffffc0 )
- //
- // So check the bit offset to make sure it is sane, and if the values
- // are not sane, remove them. If we don't do this then we will end up
- // with a crash if we try to use this type in an expression when clang
- // becomes unhappy with its recycled debug info.
+ const MemberAttributes attrs(die, parent_die, module_sp);
+ const PropertyAttributes propAttrs(die);
- if (byte_size.getValueOr(0) == 0 && bit_offset < 0) {
- bit_size = 0;
- bit_offset = 0;
+ if (!propAttrs.prop_name) {
+ module_sp->ReportError(
+ "0x%8.8" PRIx64 ": DW_TAG_APPLE_property has no name.", die.GetID());
+ return;
+ }
+
+ Type *member_type = die.ResolveTypeUID(attrs.encoding_form.Reference());
+ if (!member_type) {
+ module_sp->ReportError("0x%8.8" PRIx64
+ ": DW_TAG_APPLE_property '%s' refers to type 0x%8.8x"
+ " which was unable to be parsed",
+ die.GetID(), propAttrs.prop_name,
+ attrs.encoding_form.Reference().GetOffset());
+ return;
}
+ ClangASTMetadata metadata;
+ metadata.SetUserID(die.GetID());
+ delayed_properties.push_back(DelayedAddObjCClassProperty(
+ class_clang_type, propAttrs.prop_name,
+ member_type->GetLayoutCompilerType(), propAttrs.prop_setter_name,
+ propAttrs.prop_getter_name, propAttrs.prop_attributes, &metadata));
+}
+
+void DWARFASTParserClang::ParseSingleMember(
+ const DWARFDIE &die, const DWARFDIE &parent_die,
+ const lldb_private::CompilerType &class_clang_type,
+ lldb::AccessType default_accessibility,
+ lldb_private::ClangASTImporter::LayoutInfo &layout_info,
+ FieldInfo &last_field_info) {
+ // This function can only parse DW_TAG_member.
+ assert(die.Tag() == DW_TAG_member);
+
+ ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule();
+ const dw_tag_t tag = die.Tag();
+ // Get the parent byte size so we can verify any members will fit
+ const uint64_t parent_byte_size =
+ parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, UINT64_MAX);
+ const uint64_t parent_bit_size =
+ parent_byte_size == UINT64_MAX ? UINT64_MAX : parent_byte_size * 8;
+
+ // FIXME: Remove the workarounds below and make this const.
+ MemberAttributes attrs(die, parent_die, module_sp);
+
const bool class_is_objc_object_or_interface =
TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type);
// FIXME: Make Clang ignore Objective-C accessibility for expressions
if (class_is_objc_object_or_interface)
- accessibility = eAccessNone;
+ attrs.accessibility = eAccessNone;
// Handle static members
- if (is_external && member_byte_offset == UINT32_MAX) {
- Type *var_type = die.ResolveTypeUID(encoding_form.Reference());
+ if (attrs.is_external && attrs.member_byte_offset == UINT32_MAX) {
+ Type *var_type = die.ResolveTypeUID(attrs.encoding_form.Reference());
if (var_type) {
- if (accessibility == eAccessNone)
- accessibility = eAccessPublic;
+ if (attrs.accessibility == eAccessNone)
+ attrs.accessibility = eAccessPublic;
TypeSystemClang::AddVariableToRecordType(
- class_clang_type, name, var_type->GetForwardCompilerType(),
- accessibility);
+ class_clang_type, attrs.name, var_type->GetForwardCompilerType(),
+ attrs.accessibility);
}
return;
}
- if (!is_artificial) {
- Type *member_type = die.ResolveTypeUID(encoding_form.Reference());
-
- clang::FieldDecl *field_decl = nullptr;
- const uint64_t character_width = 8;
- const uint64_t word_width = 32;
- if (tag == DW_TAG_member) {
- if (member_type) {
- CompilerType member_clang_type = member_type->GetLayoutCompilerType();
-
- if (accessibility == eAccessNone)
- accessibility = default_accessibility;
-
- uint64_t field_bit_offset =
- (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8));
-
- if (bit_size > 0) {
- FieldInfo this_field_info;
- this_field_info.bit_offset = field_bit_offset;
- this_field_info.bit_size = bit_size;
+ Type *member_type = die.ResolveTypeUID(attrs.encoding_form.Reference());
+ if (!member_type) {
+ if (attrs.name)
+ module_sp->ReportError(
+ "0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x"
+ " which was unable to be parsed",
+ die.GetID(), attrs.name, attrs.encoding_form.Reference().GetOffset());
+ else
+ module_sp->ReportError(
+ "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x"
+ " which was unable to be parsed",
+ die.GetID(), attrs.encoding_form.Reference().GetOffset());
+ return;
+ }
- if (data_bit_offset != UINT64_MAX) {
- this_field_info.bit_offset = data_bit_offset;
- } else {
- if (!byte_size)
- byte_size = member_type->GetByteSize(nullptr);
+ const uint64_t character_width = 8;
+ const uint64_t word_width = 32;
+ CompilerType member_clang_type = member_type->GetLayoutCompilerType();
- ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
- if (objfile->GetByteOrder() == eByteOrderLittle) {
- this_field_info.bit_offset += byte_size.getValueOr(0) * 8;
- this_field_info.bit_offset -= (bit_offset + bit_size);
- } else {
- this_field_info.bit_offset += bit_offset;
- }
- }
+ if (attrs.accessibility == eAccessNone)
+ attrs.accessibility = default_accessibility;
- // The ObjC runtime knows the byte offset but we still need to provide
- // the bit-offset in the layout. It just means something different then
- // what it does in C and C++. So we skip this check for ObjC types.
- //
- // We also skip this for fields of a union since they will all have a
- // zero offset.
- if (!TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type) &&
- !(parent_die.Tag() == DW_TAG_union_type && this_field_info.bit_offset == 0) &&
- ((this_field_info.bit_offset >= parent_bit_size) ||
- (last_field_info.IsBitfield() &&
- !last_field_info.NextBitfieldOffsetIsValid(
- this_field_info.bit_offset)))) {
- ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
- objfile->GetModule()->ReportWarning(
- "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid "
- "bit offset (0x%8.8" PRIx64
- ") member will be ignored. Please file a bug against the "
- "compiler and include the preprocessed output for %s\n",
- die.GetID(), DW_TAG_value_to_name(tag), name,
- this_field_info.bit_offset, GetUnitName(parent_die).c_str());
- return;
- }
+ uint64_t field_bit_offset = (attrs.member_byte_offset == UINT32_MAX
+ ? 0
+ : (attrs.member_byte_offset * 8));
- // Update the field bit offset we will report for layout
- field_bit_offset = this_field_info.bit_offset;
+ if (attrs.bit_size > 0) {
+ FieldInfo this_field_info;
+ this_field_info.bit_offset = field_bit_offset;
+ this_field_info.bit_size = attrs.bit_size;
- // Objective-C has invalid DW_AT_bit_offset values in older
- // versions of clang, so we have to be careful and only insert
- // unnamed bitfields if we have a new enough clang.
- bool detect_unnamed_bitfields = true;
+ if (attrs.data_bit_offset != UINT64_MAX) {
+ this_field_info.bit_offset = attrs.data_bit_offset;
+ } else {
+ if (!attrs.byte_size)
+ attrs.byte_size = member_type->GetByteSize(nullptr);
- if (class_is_objc_object_or_interface)
- detect_unnamed_bitfields =
- die.GetCU()->Supports_unnamed_objc_bitfields();
+ ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
+ if (objfile->GetByteOrder() == eByteOrderLittle) {
+ this_field_info.bit_offset += attrs.byte_size.getValueOr(0) * 8;
+ this_field_info.bit_offset -= (attrs.bit_offset + attrs.bit_size);
+ } else {
+ this_field_info.bit_offset += attrs.bit_offset;
+ }
+ }
- if (detect_unnamed_bitfields) {
- clang::Optional<FieldInfo> unnamed_field_info;
- uint64_t last_field_end = 0;
+ // The ObjC runtime knows the byte offset but we still need to provide
+ // the bit-offset in the layout. It just means something different then
+ // what it does in C and C++. So we skip this check for ObjC types.
+ //
+ // We also skip this for fields of a union since they will all have a
+ // zero offset.
+ if (!TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type) &&
+ !(parent_die.Tag() == DW_TAG_union_type &&
+ this_field_info.bit_offset == 0) &&
+ ((this_field_info.bit_offset >= parent_bit_size) ||
+ (last_field_info.IsBitfield() &&
+ !last_field_info.NextBitfieldOffsetIsValid(
+ this_field_info.bit_offset)))) {
+ ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
+ objfile->GetModule()->ReportWarning(
+ "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid "
+ "bit offset (0x%8.8" PRIx64
+ ") member will be ignored. Please file a bug against the "
+ "compiler and include the preprocessed output for %s\n",
+ die.GetID(), DW_TAG_value_to_name(tag), attrs.name,
+ this_field_info.bit_offset, GetUnitName(parent_die).c_str());
+ return;
+ }
- last_field_end =
- last_field_info.bit_offset + last_field_info.bit_size;
+ // Update the field bit offset we will report for layout
+ field_bit_offset = this_field_info.bit_offset;
- if (!last_field_info.IsBitfield()) {
- // The last field was not a bit-field...
- // but if it did take up the entire word then we need to extend
- // last_field_end so the bit-field does not step into the last
- // fields padding.
- if (last_field_end != 0 && ((last_field_end % word_width) != 0))
- last_field_end += word_width - (last_field_end % word_width);
- }
+ // Objective-C has invalid DW_AT_bit_offset values in older
+ // versions of clang, so we have to be careful and only insert
+ // unnamed bitfields if we have a new enough clang.
+ bool detect_unnamed_bitfields = true;
- // If we have a gap between the last_field_end and the current
- // field we have an unnamed bit-field.
- // If we have a base class, we assume there is no unnamed
- // bit-field if this is the first field since the gap can be
- // attributed to the members from the base class. This assumption
- // is not correct if the first field of the derived class is
- // indeed an unnamed bit-field. We currently do not have the
- // machinary to track the offset of the last field of classes we
- // have seen before, so we are not handling this case.
- if (this_field_info.bit_offset != last_field_end &&
- this_field_info.bit_offset > last_field_end &&
- !(last_field_info.bit_offset == 0 &&
- last_field_info.bit_size == 0 &&
- layout_info.base_offsets.size() != 0)) {
- unnamed_field_info = FieldInfo{};
- unnamed_field_info->bit_size =
- this_field_info.bit_offset - last_field_end;
- unnamed_field_info->bit_offset = last_field_end;
- }
+ if (class_is_objc_object_or_interface)
+ detect_unnamed_bitfields =
+ die.GetCU()->Supports_unnamed_objc_bitfields();
- if (unnamed_field_info) {
- clang::FieldDecl *unnamed_bitfield_decl =
- TypeSystemClang::AddFieldToRecordType(
- class_clang_type, llvm::StringRef(),
- m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint,
- word_width),
- accessibility, unnamed_field_info->bit_size);
+ if (detect_unnamed_bitfields) {
+ llvm::Optional<FieldInfo> unnamed_field_info;
+ uint64_t last_field_end = 0;
- layout_info.field_offsets.insert(std::make_pair(
- unnamed_bitfield_decl, unnamed_field_info->bit_offset));
- }
- }
+ last_field_end = last_field_info.bit_offset + last_field_info.bit_size;
- last_field_info = this_field_info;
- last_field_info.SetIsBitfield(true);
- } else {
- last_field_info.bit_offset = field_bit_offset;
+ if (!last_field_info.IsBitfield()) {
+ // The last field was not a bit-field...
+ // but if it did take up the entire word then we need to extend
+ // last_field_end so the bit-field does not step into the last
+ // fields padding.
+ if (last_field_end != 0 && ((last_field_end % word_width) != 0))
+ last_field_end += word_width - (last_field_end % word_width);
+ }
- if (llvm::Optional<uint64_t> clang_type_size =
- member_type->GetByteSize(nullptr)) {
- last_field_info.bit_size = *clang_type_size * character_width;
- }
+ // If we have a gap between the last_field_end and the current
+ // field we have an unnamed bit-field.
+ // If we have a base class, we assume there is no unnamed
+ // bit-field if this is the first field since the gap can be
+ // attributed to the members from the base class. This assumption
+ // is not correct if the first field of the derived class is
+ // indeed an unnamed bit-field. We currently do not have the
+ // machinary to track the offset of the last field of classes we
+ // have seen before, so we are not handling this case.
+ if (this_field_info.bit_offset != last_field_end &&
+ this_field_info.bit_offset > last_field_end &&
+ !(last_field_info.bit_offset == 0 &&
+ last_field_info.bit_size == 0 &&
+ layout_info.base_offsets.size() != 0)) {
+ unnamed_field_info = FieldInfo{};
+ unnamed_field_info->bit_size =
+ this_field_info.bit_offset - last_field_end;
+ unnamed_field_info->bit_offset = last_field_end;
+ }
- last_field_info.SetIsBitfield(false);
- }
+ if (unnamed_field_info) {
+ clang::FieldDecl *unnamed_bitfield_decl =
+ TypeSystemClang::AddFieldToRecordType(
+ class_clang_type, llvm::StringRef(),
+ m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint,
+ word_width),
+ attrs.accessibility, unnamed_field_info->bit_size);
- if (!member_clang_type.IsCompleteType())
- member_clang_type.GetCompleteType();
-
- {
- // Older versions of clang emit array[0] and array[1] in the
- // same way (<rdar://problem/12566646>). If the current field
- // is at the end of the structure, then there is definitely no
- // room for extra elements and we override the type to
- // array[0].
-
- CompilerType member_array_element_type;
- uint64_t member_array_size;
- bool member_array_is_incomplete;
-
- if (member_clang_type.IsArrayType(&member_array_element_type,
- &member_array_size,
- &member_array_is_incomplete) &&
- !member_array_is_incomplete) {
- uint64_t parent_byte_size =
- parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size,
- UINT64_MAX);
-
- if (member_byte_offset >= parent_byte_size) {
- if (member_array_size != 1 &&
- (member_array_size != 0 ||
- member_byte_offset > parent_byte_size)) {
- module_sp->ReportError(
- "0x%8.8" PRIx64
- ": DW_TAG_member '%s' refers to type 0x%8.8x"
- " which extends beyond the bounds of 0x%8.8" PRIx64,
- die.GetID(), name, encoding_form.Reference().GetOffset(),
- parent_die.GetID());
- }
+ layout_info.field_offsets.insert(std::make_pair(
+ unnamed_bitfield_decl, unnamed_field_info->bit_offset));
+ }
+ }
- member_clang_type =
- m_ast.CreateArrayType(member_array_element_type, 0, false);
- }
- }
- }
+ last_field_info = this_field_info;
+ last_field_info.SetIsBitfield(true);
+ } else {
+ last_field_info.bit_offset = field_bit_offset;
- RequireCompleteType(member_clang_type);
+ if (llvm::Optional<uint64_t> clang_type_size =
+ member_type->GetByteSize(nullptr)) {
+ last_field_info.bit_size = *clang_type_size * character_width;
+ }
- field_decl = TypeSystemClang::AddFieldToRecordType(
- class_clang_type, name, member_clang_type, accessibility,
- bit_size);
+ last_field_info.SetIsBitfield(false);
+ }
- m_ast.SetMetadataAsUserID(field_decl, die.GetID());
+ // Don't turn artificial members such as vtable pointers into real FieldDecls
+ // in our AST. Clang will re-create those articial members and they would
+ // otherwise just overlap in the layout with the FieldDecls we add here.
+ // This needs to be done after updating FieldInfo which keeps track of where
+ // field start/end so we don't later try to fill the the space of this
+ // artificial member with (unnamed bitfield) padding.
+ // FIXME: This check should verify that this is indeed an artificial member
+ // we are supposed to ignore.
+ if (attrs.is_artificial)
+ return;
- layout_info.field_offsets.insert(
- std::make_pair(field_decl, field_bit_offset));
- } else {
- if (name)
+ if (!member_clang_type.IsCompleteType())
+ member_clang_type.GetCompleteType();
+
+ {
+ // Older versions of clang emit array[0] and array[1] in the
+ // same way (<rdar://problem/12566646>). If the current field
+ // is at the end of the structure, then there is definitely no
+ // room for extra elements and we override the type to
+ // array[0].
+
+ CompilerType member_array_element_type;
+ uint64_t member_array_size;
+ bool member_array_is_incomplete;
+
+ if (member_clang_type.IsArrayType(&member_array_element_type,
+ &member_array_size,
+ &member_array_is_incomplete) &&
+ !member_array_is_incomplete) {
+ uint64_t parent_byte_size =
+ parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, UINT64_MAX);
+
+ if (attrs.member_byte_offset >= parent_byte_size) {
+ if (member_array_size != 1 &&
+ (member_array_size != 0 ||
+ attrs.member_byte_offset > parent_byte_size)) {
module_sp->ReportError(
"0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x"
- " which was unable to be parsed",
- die.GetID(), name, encoding_form.Reference().GetOffset());
- else
- module_sp->ReportError(
- "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x"
- " which was unable to be parsed",
- die.GetID(), encoding_form.Reference().GetOffset());
+ " which extends beyond the bounds of 0x%8.8" PRIx64,
+ die.GetID(), attrs.name,
+ attrs.encoding_form.Reference().GetOffset(),
+ parent_die.GetID());
+ }
+
+ member_clang_type =
+ m_ast.CreateArrayType(member_array_element_type, 0, false);
}
}
+ }
- if (prop_name != nullptr && member_type) {
- clang::ObjCIvarDecl *ivar_decl = nullptr;
+ RequireCompleteType(member_clang_type);
- if (field_decl) {
- ivar_decl = clang::dyn_cast<clang::ObjCIvarDecl>(field_decl);
- assert(ivar_decl != nullptr);
- }
+ clang::FieldDecl *field_decl = TypeSystemClang::AddFieldToRecordType(
+ class_clang_type, attrs.name, member_clang_type, attrs.accessibility,
+ attrs.bit_size);
- ClangASTMetadata metadata;
- metadata.SetUserID(die.GetID());
- delayed_properties.push_back(DelayedAddObjCClassProperty(
- class_clang_type, prop_name, member_type->GetLayoutCompilerType(),
- ivar_decl, prop_setter_name, prop_getter_name, prop_attributes,
- &metadata));
+ m_ast.SetMetadataAsUserID(field_decl, die.GetID());
- if (ivar_decl)
- m_ast.SetMetadataAsUserID(ivar_decl, die.GetID());
- }
- }
+ layout_info.field_offsets.insert(
+ std::make_pair(field_decl, field_bit_offset));
}
bool DWARFASTParserClang::ParseChildMembers(
const DWARFDIE &parent_die, CompilerType &class_clang_type,
std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
std::vector<DWARFDIE> &member_function_dies,
- DelayedPropertyList &delayed_properties, AccessType &default_accessibility,
+ DelayedPropertyList &delayed_properties,
+ const AccessType default_accessibility,
ClangASTImporter::LayoutInfo &layout_info) {
if (!parent_die)
return false;
@@ -2750,11 +2925,13 @@ bool DWARFASTParserClang::ParseChildMembers(
dw_tag_t tag = die.Tag();
switch (tag) {
- case DW_TAG_member:
case DW_TAG_APPLE_property:
+ ParseObjCProperty(die, parent_die, class_clang_type, delayed_properties);
+ break;
+
+ case DW_TAG_member:
ParseSingleMember(die, parent_die, class_clang_type,
- default_accessibility, delayed_properties, layout_info,
- last_field_info);
+ default_accessibility, layout_info, last_field_info);
break;
case DW_TAG_subprogram:
@@ -2762,117 +2939,10 @@ bool DWARFASTParserClang::ParseChildMembers(
member_function_dies.push_back(die);
break;
- case DW_TAG_inheritance: {
- // TODO: implement DW_TAG_inheritance type parsing
- DWARFAttributes attributes;
- const size_t num_attributes = die.GetAttributes(attributes);
- if (num_attributes > 0) {
- DWARFFormValue encoding_form;
- AccessType accessibility = default_accessibility;
- bool is_virtual = false;
- bool is_base_of_class = true;
- off_t member_byte_offset = 0;
- uint32_t i;
- for (i = 0; i < num_attributes; ++i) {
- const dw_attr_t attr = attributes.AttributeAtIndex(i);
- DWARFFormValue form_value;
- if (attributes.ExtractFormValueAtIndex(i, form_value)) {
- switch (attr) {
- case DW_AT_type:
- encoding_form = form_value;
- break;
- case DW_AT_data_member_location:
- if (form_value.BlockData()) {
- Value initialValue(0);
- Value memberOffset(0);
- const DWARFDataExtractor &debug_info_data = die.GetData();
- uint32_t block_length = form_value.Unsigned();
- uint32_t block_offset =
- form_value.BlockData() - debug_info_data.GetDataStart();
- if (DWARFExpression::Evaluate(
- nullptr, nullptr, module_sp,
- DataExtractor(debug_info_data, block_offset,
- block_length),
- die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr,
- memberOffset, nullptr)) {
- member_byte_offset =
- memberOffset.ResolveValue(nullptr).UInt();
- }
- } else {
- // With DWARF 3 and later, if the value is an integer constant,
- // this form value is the offset in bytes from the beginning of
- // the containing entity.
- member_byte_offset = form_value.Unsigned();
- }
- break;
-
- case DW_AT_accessibility:
- accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
- break;
-
- case DW_AT_virtuality:
- is_virtual = form_value.Boolean();
- break;
-
- case DW_AT_sibling:
- break;
-
- default:
- break;
- }
- }
- }
-
- Type *base_class_type = die.ResolveTypeUID(encoding_form.Reference());
- if (base_class_type == nullptr) {
- module_sp->ReportError("0x%8.8x: DW_TAG_inheritance failed to "
- "resolve the base class at 0x%8.8x"
- " from enclosing type 0x%8.8x. \nPlease file "
- "a bug and attach the file at the start of "
- "this error message",
- die.GetOffset(),
- encoding_form.Reference().GetOffset(),
- parent_die.GetOffset());
- break;
- }
-
- CompilerType base_class_clang_type =
- base_class_type->GetFullCompilerType();
- assert(base_class_clang_type);
- if (TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type)) {
- ast->SetObjCSuperClass(class_clang_type, base_class_clang_type);
- } else {
- std::unique_ptr<clang::CXXBaseSpecifier> result =
- ast->CreateBaseClassSpecifier(
- base_class_clang_type.GetOpaqueQualType(), accessibility,
- is_virtual, is_base_of_class);
- if (!result)
- break;
-
- base_classes.push_back(std::move(result));
-
- if (is_virtual) {
- // Do not specify any offset for virtual inheritance. The DWARF
- // produced by clang doesn't give us a constant offset, but gives
- // us a DWARF expressions that requires an actual object in memory.
- // the DW_AT_data_member_location for a virtual base class looks
- // like:
- // DW_AT_data_member_location( DW_OP_dup, DW_OP_deref,
- // DW_OP_constu(0x00000018), DW_OP_minus, DW_OP_deref,
- // DW_OP_plus )
- // Given this, there is really no valid response we can give to
- // clang for virtual base class offsets, and this should eventually
- // be removed from LayoutRecordType() in the external
- // AST source in clang.
- } else {
- layout_info.base_offsets.insert(std::make_pair(
- ast->GetAsCXXRecordDecl(
- base_class_clang_type.GetOpaqueQualType()),
- clang::CharUnits::fromQuantity(member_byte_offset)));
- }
- }
- }
- } break;
+ case DW_TAG_inheritance:
+ ParseInheritance(die, parent_die, class_clang_type, default_accessibility,
+ module_sp, base_classes, layout_info);
+ break;
default:
break;
@@ -3140,7 +3210,6 @@ clang::Decl *DWARFASTParserClang::GetClangDeclForDIE(const DWARFDIE &die) {
if (DWARFDIE spec_die = die.GetReferencedDIE(DW_AT_specification)) {
clang::Decl *decl = GetClangDeclForDIE(spec_die);
m_die_to_decl[die.GetDIE()] = decl;
- m_decl_to_die[decl].insert(die.GetDIE());
return decl;
}
@@ -3148,7 +3217,6 @@ clang::Decl *DWARFASTParserClang::GetClangDeclForDIE(const DWARFDIE &die) {
die.GetReferencedDIE(DW_AT_abstract_origin)) {
clang::Decl *decl = GetClangDeclForDIE(abstract_origin_die);
m_die_to_decl[die.GetDIE()] = decl;
- m_decl_to_die[decl].insert(die.GetDIE());
return decl;
}
@@ -3213,7 +3281,6 @@ clang::Decl *DWARFASTParserClang::GetClangDeclForDIE(const DWARFDIE &die) {
}
m_die_to_decl[die.GetDIE()] = decl;
- m_decl_to_die[decl].insert(die.GetDIE());
return decl;
}
@@ -3477,7 +3544,8 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes(
UniqueCStringMap<DWARFDIE> dst_name_to_die;
UniqueCStringMap<DWARFDIE> src_name_to_die_artificial;
UniqueCStringMap<DWARFDIE> dst_name_to_die_artificial;
- for (DWARFDIE src_die : src_class_die.children()) {
+ for (src_die = src_class_die.GetFirstChild(); src_die.IsValid();
+ src_die = src_die.GetSibling()) {
if (src_die.Tag() == DW_TAG_subprogram) {
// Make sure this is a declaration and not a concrete instance by looking
// for DW_AT_declaration set to 1. Sometimes concrete function instances
@@ -3495,7 +3563,8 @@ bool DWARFASTParserClang::CopyUniqueClassMethodTypes(
}
}
}
- for (DWARFDIE dst_die : dst_class_die.children()) {
+ for (dst_die = dst_class_die.GetFirstChild(); dst_die.IsValid();
+ dst_die = dst_die.GetSibling()) {
if (dst_die.Tag() == DW_TAG_subprogram) {
// Make sure this is a declaration and not a concrete instance by looking
// for DW_AT_declaration set to 1. Sometimes concrete function instances
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index 9bf6240b7554..f97c0c470ab0 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -47,7 +47,8 @@ public:
lldb_private::Function *
ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit,
- const DWARFDIE &die) override;
+ const DWARFDIE &die,
+ const lldb_private::AddressRange &func_range) override;
bool
CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type,
@@ -73,7 +74,6 @@ protected:
class DelayedAddObjCClassProperty;
typedef std::vector<DelayedAddObjCClassProperty> DelayedPropertyList;
- typedef llvm::SmallPtrSet<const DWARFDebugInfoEntry *, 4> DIEPointerSet;
typedef llvm::DenseMap<const DWARFDebugInfoEntry *, clang::DeclContext *>
DIEToDeclContextMap;
typedef std::multimap<const clang::DeclContext *, const DWARFDIE>
@@ -83,11 +83,9 @@ protected:
DIEToModuleMap;
typedef llvm::DenseMap<const DWARFDebugInfoEntry *, clang::Decl *>
DIEToDeclMap;
- typedef llvm::DenseMap<const clang::Decl *, DIEPointerSet> DeclToDIEMap;
lldb_private::TypeSystemClang &m_ast;
DIEToDeclMap m_die_to_decl;
- DeclToDIEMap m_decl_to_die;
DIEToDeclContextMap m_die_to_decl_ctx;
DeclContextToDIEMap m_decl_ctx_to_die;
DIEToModuleMap m_die_to_module;
@@ -113,7 +111,7 @@ protected:
std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
std::vector<DWARFDIE> &member_function_dies,
DelayedPropertyList &delayed_properties,
- lldb::AccessType &default_accessibility,
+ const lldb::AccessType default_accessibility,
lldb_private::ClangASTImporter::LayoutInfo &layout_info);
size_t
@@ -190,11 +188,26 @@ private:
}
};
+ /// Parses a DW_TAG_APPLE_property DIE and appends the parsed data to the
+ /// list of delayed Objective-C properties.
+ ///
+ /// Note: The delayed property needs to be finalized to actually create the
+ /// property declarations in the module AST.
+ ///
+ /// \param die The DW_TAG_APPLE_property DIE that will be parsed.
+ /// \param parent_die The parent DIE.
+ /// \param class_clang_type The Objective-C class that will contain the
+ /// created property.
+ /// \param delayed_properties The list of delayed properties that the result
+ /// will be appended to.
+ void ParseObjCProperty(const DWARFDIE &die, const DWARFDIE &parent_die,
+ const lldb_private::CompilerType &class_clang_type,
+ DelayedPropertyList &delayed_properties);
+
void
ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die,
const lldb_private::CompilerType &class_clang_type,
lldb::AccessType default_accessibility,
- DelayedPropertyList &delayed_properties,
lldb_private::ClangASTImporter::LayoutInfo &layout_info,
FieldInfo &last_field_info);
@@ -215,6 +228,28 @@ private:
ParsedDWARFTypeAttributes &attrs);
lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die,
const ParsedDWARFTypeAttributes &attrs);
+
+ /// Parses a DW_TAG_inheritance DIE into a base/super class.
+ ///
+ /// \param die The DW_TAG_inheritance DIE to parse.
+ /// \param parent_die The parent DIE of the given DIE.
+ /// \param class_clang_type The C++/Objective-C class representing parent_die.
+ /// For an Objective-C class this method sets the super class on success. For
+ /// a C++ class this will *not* add the result as a base class.
+ /// \param default_accessibility The default accessibility that is given to
+ /// base classes if they don't have an explicit accessibility set.
+ /// \param module_sp The current Module.
+ /// \param base_classes The list of C++ base classes that will be appended
+ /// with the parsed base class on success.
+ /// \param layout_info The layout information that will be updated for C++
+ /// base classes with the base offset.
+ void ParseInheritance(
+ const DWARFDIE &die, const DWARFDIE &parent_die,
+ const lldb_private::CompilerType class_clang_type,
+ const lldb::AccessType default_accessibility,
+ const lldb::ModuleSP &module_sp,
+ std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
+ lldb_private::ClangASTImporter::LayoutInfo &layout_info);
};
/// Parsed form of all attributes that are relevant for type reconstruction.
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp
index dda691eecacc..529007e31b9e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.cpp
@@ -448,3 +448,7 @@ bool DWARFDIE::GetDIENamesAndRanges(
} else
return false;
}
+
+llvm::iterator_range<DWARFDIE::child_iterator> DWARFDIE::children() const {
+ return llvm::make_range(child_iterator(*this), child_iterator());
+}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h
index 56154055c44d..5ee44a763204 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h
@@ -90,14 +90,9 @@ public:
int &decl_line, int &decl_column, int &call_file,
int &call_line, int &call_column,
lldb_private::DWARFExpression *frame_base) const;
+
/// The range of all the children of this DIE.
- ///
- /// This is a template just because child_iterator is not completely defined
- /// at this point.
- template <typename T = child_iterator>
- llvm::iterator_range<T> children() const {
- return llvm::make_range(T(*this), T());
- }
+ llvm::iterator_range<child_iterator> children() const;
};
class DWARFDIE::child_iterator
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
index e43afa104413..b72c7406ece1 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
@@ -174,15 +174,6 @@ bool DWARFDebugInfo::ContainsTypeUnits() {
return !m_type_hash_to_unit_index.empty();
}
-DWARFDIE
-DWARFDebugInfo::GetDIEForDIEOffset(DIERef::Section section,
- dw_offset_t die_offset) {
- DWARFUnit *cu = GetUnitContainingDIEOffset(section, die_offset);
- if (cu)
- return cu->GetDIE(die_offset);
- return DWARFDIE();
-}
-
// GetDIE()
//
// Get the DIE (Debug Information Entry) with the specified offset.
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
index 46c04d749c46..c990ac9fbe58 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
@@ -43,8 +43,6 @@ public:
DWARFUnit *GetUnit(const DIERef &die_ref);
DWARFTypeUnit *GetTypeUnitForHash(uint64_t hash);
bool ContainsTypeUnits();
- DWARFDIE GetDIEForDIEOffset(DIERef::Section section,
- dw_offset_t die_offset);
DWARFDIE GetDIE(const DIERef &die_ref);
enum {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp
index 683033d0ee4c..6707d471e09b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp
@@ -41,7 +41,7 @@ bool DWARFIndex::ProcessFunctionDIE(
return true;
// In case of a full match, we just insert everything we find.
- if (name_type_mask & eFunctionNameTypeFull)
+ if (name_type_mask & eFunctionNameTypeFull && die.GetMangledName() == name)
return callback(die);
// If looking for ObjC selectors, we need to also check if the name is a
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h
index ecf82a910b66..1d3d70dfef01 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h
@@ -13,6 +13,8 @@
#include "Plugins/SymbolFile/DWARF/DWARFDIE.h"
#include "Plugins/SymbolFile/DWARF/DWARFFormValue.h"
+#include "lldb/Target/Statistics.h"
+
class DWARFDeclContext;
class DWARFDIE;
@@ -34,8 +36,9 @@ public:
virtual void
GetGlobalVariables(const RegularExpression &regex,
llvm::function_ref<bool(DWARFDIE die)> callback) = 0;
+ /// \a cu must be the skeleton unit if possible, not GetNonSkeletonUnit().
virtual void
- GetGlobalVariables(const DWARFUnit &cu,
+ GetGlobalVariables(DWARFUnit &cu,
llvm::function_ref<bool(DWARFDIE die)> callback) = 0;
virtual void
GetObjCMethods(ConstString class_name,
@@ -61,8 +64,11 @@ public:
virtual void Dump(Stream &s) = 0;
+ StatsDuration GetIndexTime() { return m_index_time; }
+
protected:
Module &m_module;
+ StatsDuration m_index_time{0.0};
/// Helper function implementing common logic for processing function dies. If
/// the function given by "ref" matches search criteria given by
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 824e43872269..5487f709d223 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -9,7 +9,6 @@
#include "DWARFUnit.h"
#include "lldb/Core/Module.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Symbol/ObjectFile.h"
#include "lldb/Utility/LLDBAssert.h"
#include "lldb/Utility/StreamString.h"
@@ -35,12 +34,12 @@ DWARFUnit::DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid,
DIERef::Section section, bool is_dwo)
: UserID(uid), m_dwarf(dwarf), m_header(header), m_abbrevs(&abbrevs),
m_cancel_scopes(false), m_section(section), m_is_dwo(is_dwo),
- m_dwo_id(header.GetDWOId()) {}
+ m_has_parsed_non_skeleton_unit(false), m_dwo_id(header.GetDWOId()) {}
DWARFUnit::~DWARFUnit() = default;
-// Parses first DIE of a compile unit.
-void DWARFUnit::ExtractUnitDIEIfNeeded() {
+// Parses first DIE of a compile unit, excluding DWO.
+void DWARFUnit::ExtractUnitDIENoDwoIfNeeded() {
{
llvm::sys::ScopedReader lock(m_first_die_mutex);
if (m_first_die)
@@ -50,7 +49,9 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
if (m_first_die)
return; // Already parsed
- LLDB_SCOPED_TIMERF("%8.8x: DWARFUnit::ExtractUnitDIEIfNeeded()", GetOffset());
+ ElapsedTime elapsed(m_dwarf.GetDebugInfoParseTimeRef());
+ LLDB_SCOPED_TIMERF("%8.8x: DWARFUnit::ExtractUnitDIENoDwoIfNeeded()",
+ GetOffset());
// Set the offset to that of the first DIE and calculate the start of the
// next compilation unit header.
@@ -66,6 +67,58 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
}
}
+// Parses first DIE of a compile unit including DWO.
+void DWARFUnit::ExtractUnitDIEIfNeeded() {
+ ExtractUnitDIENoDwoIfNeeded();
+
+ if (m_has_parsed_non_skeleton_unit)
+ return;
+
+ m_has_parsed_non_skeleton_unit = true;
+
+ std::shared_ptr<SymbolFileDWARFDwo> dwo_symbol_file =
+ m_dwarf.GetDwoSymbolFileForCompileUnit(*this, m_first_die);
+ if (!dwo_symbol_file)
+ return;
+
+ DWARFUnit *dwo_cu = dwo_symbol_file->GetDWOCompileUnitForHash(m_dwo_id);
+
+ if (!dwo_cu)
+ return; // Can't fetch the compile unit from the dwo file.
+ dwo_cu->SetUserData(this);
+
+ DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
+ if (!dwo_cu_die.IsValid())
+ return; // Can't fetch the compile unit DIE from the dwo file.
+
+ // Here for DWO CU we want to use the address base set in the skeleton unit
+ // (DW_AT_addr_base) if it is available and use the DW_AT_GNU_addr_base
+ // otherwise. We do that because pre-DWARF v5 could use the DW_AT_GNU_*
+ // attributes which were applicable to the DWO units. The corresponding
+ // DW_AT_* attributes standardized in DWARF v5 are also applicable to the
+ // main unit in contrast.
+ if (m_addr_base)
+ dwo_cu->SetAddrBase(*m_addr_base);
+ else if (m_gnu_addr_base)
+ dwo_cu->SetAddrBase(*m_gnu_addr_base);
+
+ if (GetVersion() <= 4 && m_gnu_ranges_base)
+ dwo_cu->SetRangesBase(*m_gnu_ranges_base);
+ else if (dwo_symbol_file->GetDWARFContext()
+ .getOrLoadRngListsData()
+ .GetByteSize() > 0)
+ dwo_cu->SetRangesBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
+
+ if (GetVersion() >= 5 &&
+ dwo_symbol_file->GetDWARFContext().getOrLoadLocListsData().GetByteSize() >
+ 0)
+ dwo_cu->SetLoclistsBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
+
+ dwo_cu->SetBaseAddress(GetBaseAddress());
+
+ m_dwo = std::shared_ptr<DWARFUnit>(std::move(dwo_symbol_file), dwo_cu);
+}
+
// Parses a compile unit and indexes its DIEs if it hasn't already been done.
// It will leave this compile unit extracted forever.
void DWARFUnit::ExtractDIEsIfNeeded() {
@@ -144,6 +197,7 @@ DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=(
void DWARFUnit::ExtractDIEsRWLocked() {
llvm::sys::ScopedWriter first_die_lock(m_first_die_mutex);
+ ElapsedTime elapsed(m_dwarf.GetDebugInfoParseTimeRef());
LLDB_SCOPED_TIMERF("%8.8x: DWARFUnit::ExtractDIEsIfNeeded()", GetOffset());
// Set the offset to that of the first DIE and calculate the start of the
@@ -291,14 +345,12 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
}
uint64_t DWARFUnit::GetDWOId() {
- ExtractUnitDIEIfNeeded();
+ ExtractUnitDIENoDwoIfNeeded();
return m_dwo_id;
}
// m_die_array_mutex must be already held as read/write.
void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
- llvm::Optional<uint64_t> addr_base, gnu_addr_base, gnu_ranges_base;
-
DWARFAttributes attributes;
size_t num_attributes = cu_die.GetAttributes(this, attributes);
@@ -308,8 +360,7 @@ void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
continue;
DWARFFormValue form_value;
if (attributes.ExtractFormValueAtIndex(i, form_value)) {
- addr_base = form_value.Unsigned();
- SetAddrBase(*addr_base);
+ SetAddrBase(form_value.Unsigned());
break;
}
}
@@ -341,10 +392,10 @@ void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
m_line_table_offset = form_value.Unsigned();
break;
case DW_AT_GNU_addr_base:
- gnu_addr_base = form_value.Unsigned();
+ m_gnu_addr_base = form_value.Unsigned();
break;
case DW_AT_GNU_ranges_base:
- gnu_ranges_base = form_value.Unsigned();
+ m_gnu_ranges_base = form_value.Unsigned();
break;
case DW_AT_GNU_dwo_id:
m_dwo_id = form_value.Unsigned();
@@ -353,50 +404,10 @@ void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
}
if (m_is_dwo) {
+ m_has_parsed_non_skeleton_unit = true;
SetDwoStrOffsetsBase();
return;
}
-
- std::shared_ptr<SymbolFileDWARFDwo> dwo_symbol_file =
- m_dwarf.GetDwoSymbolFileForCompileUnit(*this, cu_die);
- if (!dwo_symbol_file)
- return;
-
- DWARFUnit *dwo_cu = dwo_symbol_file->GetDWOCompileUnitForHash(m_dwo_id);
-
- if (!dwo_cu)
- return; // Can't fetch the compile unit from the dwo file.
- dwo_cu->SetUserData(this);
-
- DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
- if (!dwo_cu_die.IsValid())
- return; // Can't fetch the compile unit DIE from the dwo file.
-
- // Here for DWO CU we want to use the address base set in the skeleton unit
- // (DW_AT_addr_base) if it is available and use the DW_AT_GNU_addr_base
- // otherwise. We do that because pre-DWARF v5 could use the DW_AT_GNU_*
- // attributes which were applicable to the DWO units. The corresponding
- // DW_AT_* attributes standardized in DWARF v5 are also applicable to the main
- // unit in contrast.
- if (addr_base)
- dwo_cu->SetAddrBase(*addr_base);
- else if (gnu_addr_base)
- dwo_cu->SetAddrBase(*gnu_addr_base);
-
- if (GetVersion() <= 4 && gnu_ranges_base)
- dwo_cu->SetRangesBase(*gnu_ranges_base);
- else if (dwo_symbol_file->GetDWARFContext()
- .getOrLoadRngListsData()
- .GetByteSize() > 0)
- dwo_cu->SetRangesBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
-
- if (GetVersion() >= 5 &&
- dwo_symbol_file->GetDWARFContext().getOrLoadLocListsData().GetByteSize() >
- 0)
- dwo_cu->SetLoclistsBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
- dwo_cu->SetBaseAddress(GetBaseAddress());
-
- m_dwo = std::shared_ptr<DWARFUnit>(std::move(dwo_symbol_file), dwo_cu);
}
size_t DWARFUnit::GetDebugInfoSize() const {
@@ -412,7 +423,7 @@ dw_offset_t DWARFUnit::GetAbbrevOffset() const {
}
dw_offset_t DWARFUnit::GetLineTableOffset() {
- ExtractUnitDIEIfNeeded();
+ ExtractUnitDIENoDwoIfNeeded();
return m_line_table_offset;
}
@@ -427,15 +438,20 @@ ParseListTableHeader(const llvm::DWARFDataExtractor &data, uint64_t offset,
// We are expected to be called with Offset 0 or pointing just past the table
// header. Correct Offset in the latter case so that it points to the start
// of the header.
- if (offset > 0) {
- uint64_t HeaderSize = llvm::DWARFListTableHeader::getHeaderSize(format);
- if (offset < HeaderSize)
- return llvm::createStringError(errc::invalid_argument,
- "did not detect a valid"
- " list table with base = 0x%" PRIx64 "\n",
- offset);
- offset -= HeaderSize;
+ if (offset == 0) {
+ // This means DW_AT_rnglists_base is missing and therefore DW_FORM_rnglistx
+ // cannot be handled. Returning a default-constructed ListTableType allows
+ // DW_FORM_sec_offset to be supported.
+ return ListTableType();
}
+
+ uint64_t HeaderSize = llvm::DWARFListTableHeader::getHeaderSize(format);
+ if (offset < HeaderSize)
+ return llvm::createStringError(errc::invalid_argument,
+ "did not detect a valid"
+ " list table with base = 0x%" PRIx64 "\n",
+ offset);
+ offset -= HeaderSize;
ListTableType Table;
if (llvm::Error E = Table.extractHeaderAndOffsets(data, &offset))
return std::move(E);
@@ -443,6 +459,18 @@ ParseListTableHeader(const llvm::DWARFDataExtractor &data, uint64_t offset,
}
void DWARFUnit::SetLoclistsBase(dw_addr_t loclists_base) {
+ uint64_t offset = 0;
+ if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
+ const auto *contribution = entry->getContribution(llvm::DW_SECT_LOCLISTS);
+ if (!contribution) {
+ GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
+ "Failed to find location list contribution for CU with DWO Id "
+ "0x%" PRIx64,
+ this->GetDWOId());
+ return;
+ }
+ offset += contribution->Offset;
+ }
m_loclists_base = loclists_base;
uint64_t header_size = llvm::DWARFListTableHeader::getHeaderSize(DWARF32);
@@ -450,13 +478,14 @@ void DWARFUnit::SetLoclistsBase(dw_addr_t loclists_base) {
return;
m_loclist_table_header.emplace(".debug_loclists", "locations");
- uint64_t offset = loclists_base - header_size;
+ offset += loclists_base - header_size;
if (llvm::Error E = m_loclist_table_header->extract(
m_dwarf.GetDWARFContext().getOrLoadLocListsData().GetAsLLVM(),
&offset)) {
GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
- "Failed to extract location list table at offset 0x%" PRIx64 ": %s",
- loclists_base, toString(std::move(E)).c_str());
+ "Failed to extract location list table at offset 0x%" PRIx64
+ " (location list base: 0x%" PRIx64 "): %s",
+ offset, loclists_base, toString(std::move(E)).c_str());
}
}
@@ -476,9 +505,28 @@ DWARFDataExtractor DWARFUnit::GetLocationData() const {
const DWARFDataExtractor &data =
GetVersion() >= 5 ? Ctx.getOrLoadLocListsData() : Ctx.getOrLoadLocData();
if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
- if (const auto *contribution = entry->getContribution(llvm::DW_SECT_EXT_LOC))
+ if (const auto *contribution = entry->getContribution(
+ GetVersion() >= 5 ? llvm::DW_SECT_LOCLISTS : llvm::DW_SECT_EXT_LOC))
+ return DWARFDataExtractor(data, contribution->Offset,
+ contribution->Length);
+ return DWARFDataExtractor();
+ }
+ return data;
+}
+
+DWARFDataExtractor DWARFUnit::GetRnglistData() const {
+ DWARFContext &Ctx = GetSymbolFileDWARF().GetDWARFContext();
+ const DWARFDataExtractor &data = Ctx.getOrLoadRngListsData();
+ if (const llvm::DWARFUnitIndex::Entry *entry = m_header.GetIndexEntry()) {
+ if (const auto *contribution =
+ entry->getContribution(llvm::DW_SECT_RNGLISTS))
return DWARFDataExtractor(data, contribution->Offset,
contribution->Length);
+ GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
+ "Failed to find range list contribution for CU with signature "
+ "0x%" PRIx64,
+ entry->getSignature());
+
return DWARFDataExtractor();
}
return data;
@@ -496,8 +544,7 @@ DWARFUnit::GetRnglistTable() {
m_rnglist_table_done = true;
if (auto table_or_error =
ParseListTableHeader<llvm::DWARFDebugRnglistTable>(
- m_dwarf.GetDWARFContext().getOrLoadRngListsData().GetAsLLVM(),
- m_ranges_base, DWARF32))
+ GetRnglistData().GetAsLLVM(), m_ranges_base, DWARF32))
m_rnglist_table = std::move(table_or_error.get());
else
GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
@@ -518,7 +565,7 @@ llvm::Expected<uint64_t> DWARFUnit::GetRnglistOffset(uint32_t Index) {
"DW_AT_rnglists_base for CU at 0x%8.8x",
GetOffset());
if (llvm::Optional<uint64_t> off = GetRnglistTable()->getOffsetEntry(
- m_dwarf.GetDWARFContext().getOrLoadRngListsData().GetAsLLVM(), Index))
+ GetRnglistData().GetAsLLVM(), Index))
return *off + m_ranges_base;
return llvm::createStringError(
errc::invalid_argument,
@@ -609,52 +656,45 @@ bool DWARFUnit::DW_AT_decl_file_attributes_are_invalid() {
}
bool DWARFUnit::Supports_unnamed_objc_bitfields() {
- if (GetProducer() == eProducerClang) {
- const uint32_t major_version = GetProducerVersionMajor();
- return major_version > 425 ||
- (major_version == 425 && GetProducerVersionUpdate() >= 13);
- }
- return true; // Assume all other compilers didn't have incorrect ObjC bitfield
- // info
+ if (GetProducer() == eProducerClang)
+ return GetProducerVersion() >= llvm::VersionTuple(425, 0, 13);
+ // Assume all other compilers didn't have incorrect ObjC bitfield info.
+ return true;
}
void DWARFUnit::ParseProducerInfo() {
- m_producer_version_major = UINT32_MAX;
- m_producer_version_minor = UINT32_MAX;
- m_producer_version_update = UINT32_MAX;
-
+ m_producer = eProducerOther;
const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
- if (die) {
-
- const char *producer_cstr =
- die->GetAttributeValueAsString(this, DW_AT_producer, nullptr);
- if (producer_cstr) {
- RegularExpression llvm_gcc_regex(
- llvm::StringRef("^4\\.[012]\\.[01] \\(Based on Apple "
- "Inc\\. build [0-9]+\\) \\(LLVM build "
- "[\\.0-9]+\\)$"));
- if (llvm_gcc_regex.Execute(llvm::StringRef(producer_cstr))) {
- m_producer = eProducerLLVMGCC;
- } else if (strstr(producer_cstr, "clang")) {
- static RegularExpression g_clang_version_regex(
- llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)"));
- llvm::SmallVector<llvm::StringRef, 4> matches;
- if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr),
- &matches)) {
- m_producer_version_major =
- StringConvert::ToUInt32(matches[1].str().c_str(), UINT32_MAX, 10);
- m_producer_version_minor =
- StringConvert::ToUInt32(matches[2].str().c_str(), UINT32_MAX, 10);
- m_producer_version_update =
- StringConvert::ToUInt32(matches[3].str().c_str(), UINT32_MAX, 10);
- }
- m_producer = eProducerClang;
- } else if (strstr(producer_cstr, "GNU"))
- m_producer = eProducerGCC;
- }
+ if (!die)
+ return;
+
+ llvm::StringRef producer(
+ die->GetAttributeValueAsString(this, DW_AT_producer, nullptr));
+ if (producer.empty())
+ return;
+
+ static const RegularExpression g_swiftlang_version_regex(
+ llvm::StringRef(R"(swiftlang-([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?))"));
+ static const RegularExpression g_clang_version_regex(
+ llvm::StringRef(R"(clang-([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?))"));
+ static const RegularExpression g_llvm_gcc_regex(
+ llvm::StringRef(R"(4\.[012]\.[01] )"
+ R"(\(Based on Apple Inc\. build [0-9]+\) )"
+ R"(\(LLVM build [\.0-9]+\)$)"));
+
+ llvm::SmallVector<llvm::StringRef, 3> matches;
+ if (g_swiftlang_version_regex.Execute(producer, &matches)) {
+ m_producer_version.tryParse(matches[1]);
+ m_producer = eProducerSwift;
+ } else if (producer.contains("clang")) {
+ if (g_clang_version_regex.Execute(producer, &matches))
+ m_producer_version.tryParse(matches[1]);
+ m_producer = eProducerClang;
+ } else if (producer.contains("GNU")) {
+ m_producer = eProducerGCC;
+ } else if (g_llvm_gcc_regex.Execute(producer)) {
+ m_producer = eProducerLLVMGCC;
}
- if (m_producer == eProducerInvalid)
- m_producer = eProcucerOther;
}
DWARFProducer DWARFUnit::GetProducer() {
@@ -663,22 +703,10 @@ DWARFProducer DWARFUnit::GetProducer() {
return m_producer;
}
-uint32_t DWARFUnit::GetProducerVersionMajor() {
- if (m_producer_version_major == 0)
+llvm::VersionTuple DWARFUnit::GetProducerVersion() {
+ if (m_producer_version.empty())
ParseProducerInfo();
- return m_producer_version_major;
-}
-
-uint32_t DWARFUnit::GetProducerVersionMinor() {
- if (m_producer_version_minor == 0)
- ParseProducerInfo();
- return m_producer_version_minor;
-}
-
-uint32_t DWARFUnit::GetProducerVersionUpdate() {
- if (m_producer_version_update == 0)
- ParseProducerInfo();
- return m_producer_version_update;
+ return m_producer_version;
}
uint64_t DWARFUnit::GetDWARFLanguageType() {
@@ -972,8 +1000,11 @@ DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) {
return llvm::createStringError(errc::invalid_argument,
"missing or invalid range list table");
- auto range_list_or_error = GetRnglistTable()->findList(
- m_dwarf.GetDWARFContext().getOrLoadRngListsData().GetAsLLVM(), offset);
+ llvm::DWARFDataExtractor data = GetRnglistData().GetAsLLVM();
+
+ // As DW_AT_rnglists_base may be missing we need to call setAddressSize.
+ data.setAddressSize(m_header.GetAddressByteSize());
+ auto range_list_or_error = GetRnglistTable()->findList(data, offset);
if (!range_list_or_error)
return range_list_or_error.takeError();
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index da79a6aaf64e..cece29dcf9ac 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -29,7 +29,8 @@ enum DWARFProducer {
eProducerClang,
eProducerGCC,
eProducerLLVMGCC,
- eProcucerOther
+ eProducerSwift,
+ eProducerOther
};
/// Base class describing the header of any kind of "unit." Some information
@@ -92,6 +93,7 @@ public:
uint64_t GetDWOId();
void ExtractUnitDIEIfNeeded();
+ void ExtractUnitDIENoDwoIfNeeded();
void ExtractDIEsIfNeeded();
class ScopedExtractDIEs {
@@ -151,7 +153,7 @@ public:
const DWARFAbbreviationDeclarationSet *GetAbbreviations() const;
dw_offset_t GetAbbrevOffset() const;
uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); }
- dw_addr_t GetAddrBase() const { return m_addr_base; }
+ dw_addr_t GetAddrBase() const { return m_addr_base ? *m_addr_base : 0; }
dw_addr_t GetBaseAddress() const { return m_base_addr; }
dw_offset_t GetLineTableOffset();
dw_addr_t GetRangesBase() const { return m_ranges_base; }
@@ -194,11 +196,7 @@ public:
DWARFProducer GetProducer();
- uint32_t GetProducerVersionMajor();
-
- uint32_t GetProducerVersionMinor();
-
- uint32_t GetProducerVersionUpdate();
+ llvm::VersionTuple GetProducerVersion();
uint64_t GetDWARFLanguageType();
@@ -268,7 +266,7 @@ protected:
// Get the DWARF unit DWARF debug information entry. Parse the single DIE
// if needed.
const DWARFDebugInfoEntry *GetUnitDIEPtrOnly() {
- ExtractUnitDIEIfNeeded();
+ ExtractUnitDIENoDwoIfNeeded();
// m_first_die_mutex is not required as m_first_die is never cleared.
if (!m_first_die)
return NULL;
@@ -285,6 +283,8 @@ protected:
const llvm::Optional<llvm::DWARFDebugRnglistTable> &GetRnglistTable();
+ lldb_private::DWARFDataExtractor GetRnglistData() const;
+
SymbolFileDWARF &m_dwarf;
std::shared_ptr<DWARFUnit> m_dwo;
DWARFUnitHeader m_header;
@@ -308,16 +308,16 @@ protected:
std::unique_ptr<DWARFDebugAranges> m_func_aranges_up;
dw_addr_t m_base_addr = 0;
DWARFProducer m_producer = eProducerInvalid;
- uint32_t m_producer_version_major = 0;
- uint32_t m_producer_version_minor = 0;
- uint32_t m_producer_version_update = 0;
+ llvm::VersionTuple m_producer_version;
llvm::Optional<uint64_t> m_language_type;
lldb_private::LazyBool m_is_optimized = lldb_private::eLazyBoolCalculate;
llvm::Optional<lldb_private::FileSpec> m_comp_dir;
llvm::Optional<lldb_private::FileSpec> m_file_spec;
- dw_addr_t m_addr_base = 0; ///< Value of DW_AT_addr_base.
- dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base.
- dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base.
+ llvm::Optional<dw_addr_t> m_addr_base; ///< Value of DW_AT_addr_base.
+ dw_addr_t m_loclists_base = 0; ///< Value of DW_AT_loclists_base.
+ dw_addr_t m_ranges_base = 0; ///< Value of DW_AT_rnglists_base.
+ llvm::Optional<uint64_t> m_gnu_addr_base;
+ llvm::Optional<uint64_t> m_gnu_ranges_base;
/// Value of DW_AT_stmt_list.
dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
@@ -330,6 +330,7 @@ protected:
const DIERef::Section m_section;
bool m_is_dwo;
+ bool m_has_parsed_non_skeleton_unit;
/// Value of DW_AT_GNU_dwo_id (v4) or dwo_id from CU header (v5).
uint64_t m_dwo_id;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
index cb3e662a6cdf..4a148e7744bb 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
@@ -123,8 +123,10 @@ void DebugNamesDWARFIndex::GetGlobalVariables(
}
void DebugNamesDWARFIndex::GetGlobalVariables(
- const DWARFUnit &cu, llvm::function_ref<bool(DWARFDIE die)> callback) {
+ DWARFUnit &cu, llvm::function_ref<bool(DWARFDIE die)> callback) {
+ lldbassert(!cu.GetSymbolFileDWARF().GetDwoNum());
uint64_t cu_offset = cu.GetOffset();
+ bool found_entry_for_cu = false;
for (const DebugNames::NameIndex &ni: *m_debug_names_up) {
for (DebugNames::NameTableEntry nte: ni) {
uint64_t entry_offset = nte.getEntryOffset();
@@ -135,6 +137,7 @@ void DebugNamesDWARFIndex::GetGlobalVariables(
if (entry_or->getCUOffset() != cu_offset)
continue;
+ found_entry_for_cu = true;
if (!ProcessEntry(*entry_or, callback,
llvm::StringRef(nte.getString())))
return;
@@ -142,8 +145,10 @@ void DebugNamesDWARFIndex::GetGlobalVariables(
MaybeLogLookupError(entry_or.takeError(), ni, nte.getString());
}
}
-
- m_fallback.GetGlobalVariables(cu, callback);
+ // If no name index for that particular CU was found, fallback to
+ // creating the manual index.
+ if (!found_entry_for_cu)
+ m_fallback.GetGlobalVariables(cu, callback);
}
void DebugNamesDWARFIndex::GetCompleteObjCClass(
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h
index 5d041c36c8f2..c451ccd4857f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h
@@ -32,7 +32,7 @@ public:
GetGlobalVariables(const RegularExpression &regex,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
void
- GetGlobalVariables(const DWARFUnit &cu,
+ GetGlobalVariables(DWARFUnit &cu,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
void
GetObjCMethods(ConstString class_name,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index 1f40d880ea34..ab10e9ca98f9 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -30,6 +30,7 @@ void ManualDWARFIndex::Index() {
SymbolFileDWARF &main_dwarf = *m_dwarf;
m_dwarf = nullptr;
+ ElapsedTime elapsed(m_index_time);
LLDB_SCOPED_TIMERF("%p", static_cast<void *>(&main_dwarf));
DWARFDebugInfo &main_info = main_dwarf.DebugInfo();
@@ -358,7 +359,8 @@ void ManualDWARFIndex::GetGlobalVariables(
}
void ManualDWARFIndex::GetGlobalVariables(
- const DWARFUnit &unit, llvm::function_ref<bool(DWARFDIE die)> callback) {
+ DWARFUnit &unit, llvm::function_ref<bool(DWARFDIE die)> callback) {
+ lldbassert(!unit.GetSymbolFileDWARF().GetDwoNum());
Index();
m_set.globals.FindAllEntriesForUnit(unit, DIERefCallback(callback));
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
index baff989eecca..36f371402b90 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
@@ -33,7 +33,7 @@ public:
GetGlobalVariables(const RegularExpression &regex,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
void
- GetGlobalVariables(const DWARFUnit &unit,
+ GetGlobalVariables(DWARFUnit &unit,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
void GetObjCMethods(ConstString class_name,
llvm::function_ref<bool(DWARFDIE die)> callback) override;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
index 42e96af84a96..493d1b4a2702 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
@@ -45,15 +45,16 @@ bool NameToDIE::Find(const RegularExpression &regex,
}
void NameToDIE::FindAllEntriesForUnit(
- const DWARFUnit &unit,
- llvm::function_ref<bool(DIERef ref)> callback) const {
+ DWARFUnit &s_unit, llvm::function_ref<bool(DIERef ref)> callback) const {
+ lldbassert(!s_unit.GetSymbolFileDWARF().GetDwoNum());
+ const DWARFUnit &ns_unit = s_unit.GetNonSkeletonUnit();
const uint32_t size = m_map.GetSize();
for (uint32_t i = 0; i < size; ++i) {
const DIERef &die_ref = m_map.GetValueAtIndexUnchecked(i);
- if (unit.GetSymbolFileDWARF().GetDwoNum() == die_ref.dwo_num() &&
- unit.GetDebugSection() == die_ref.section() &&
- unit.GetOffset() <= die_ref.die_offset() &&
- die_ref.die_offset() < unit.GetNextUnitOffset()) {
+ if (ns_unit.GetSymbolFileDWARF().GetDwoNum() == die_ref.dwo_num() &&
+ ns_unit.GetDebugSection() == die_ref.section() &&
+ ns_unit.GetOffset() <= die_ref.die_offset() &&
+ die_ref.die_offset() < ns_unit.GetNextUnitOffset()) {
if (!callback(die_ref))
return;
}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h
index a6863f6c9549..994af07189f8 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.h
@@ -38,8 +38,9 @@ public:
bool Find(const lldb_private::RegularExpression &regex,
llvm::function_ref<bool(DIERef ref)> callback) const;
+ /// \a unit must be the skeleton unit if possible, not GetNonSkeletonUnit().
void
- FindAllEntriesForUnit(const DWARFUnit &unit,
+ FindAllEntriesForUnit(DWARFUnit &unit,
llvm::function_ref<bool(DIERef ref)> callback) const;
void
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index ccaf31317d75..2dd7ae60b231 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -100,24 +100,6 @@ LLDB_PLUGIN_DEFINE(SymbolFileDWARF)
char SymbolFileDWARF::ID;
-// static inline bool
-// child_requires_parent_class_union_or_struct_to_be_completed (dw_tag_t tag)
-//{
-// switch (tag)
-// {
-// default:
-// break;
-// case DW_TAG_subprogram:
-// case DW_TAG_inlined_subroutine:
-// case DW_TAG_class_type:
-// case DW_TAG_structure_type:
-// case DW_TAG_union_type:
-// return true;
-// }
-// return false;
-//}
-//
-
namespace {
#define LLDB_PROPERTIES_symbolfiledwarf
@@ -131,7 +113,7 @@ enum {
class PluginProperties : public Properties {
public:
static ConstString GetSettingName() {
- return SymbolFileDWARF::GetPluginNameStatic();
+ return ConstString(SymbolFileDWARF::GetPluginNameStatic());
}
PluginProperties() {
@@ -145,11 +127,9 @@ public:
}
};
-typedef std::shared_ptr<PluginProperties> SymbolFileDWARFPropertiesSP;
-
-static const SymbolFileDWARFPropertiesSP &GetGlobalPluginProperties() {
- static const auto g_settings_sp(std::make_shared<PluginProperties>());
- return g_settings_sp;
+static PluginProperties &GetGlobalPluginProperties() {
+ static PluginProperties g_settings;
+ return g_settings;
}
} // namespace
@@ -267,7 +247,7 @@ void SymbolFileDWARF::DebuggerInitialize(Debugger &debugger) {
debugger, PluginProperties::GetSettingName())) {
const bool is_global_setting = true;
PluginManager::CreateSettingForSymbolFilePlugin(
- debugger, GetGlobalPluginProperties()->GetValueProperties(),
+ debugger, GetGlobalPluginProperties().GetValueProperties(),
ConstString("Properties for the dwarf symbol-file plug-in."),
is_global_setting);
}
@@ -279,12 +259,7 @@ void SymbolFileDWARF::Terminate() {
LogChannelDWARF::Terminate();
}
-lldb_private::ConstString SymbolFileDWARF::GetPluginNameStatic() {
- static ConstString g_name("dwarf");
- return g_name;
-}
-
-const char *SymbolFileDWARF::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolFileDWARF::GetPluginDescriptionStatic() {
return "DWARF and DWARF3 debug symbol file reader.";
}
@@ -470,7 +445,9 @@ SymbolFileDWARF::GetTypeSystemForLanguage(LanguageType language) {
void SymbolFileDWARF::InitializeObject() {
Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_DEBUG_INFO);
- if (!GetGlobalPluginProperties()->IgnoreFileIndexes()) {
+ InitializeFirstCodeAddress();
+
+ if (!GetGlobalPluginProperties().IgnoreFileIndexes()) {
StreamString module_desc;
GetObjectFile()->GetModule()->GetDescription(module_desc.AsRawOstream(),
lldb::eDescriptionLevelBrief);
@@ -514,6 +491,25 @@ void SymbolFileDWARF::InitializeObject() {
std::make_unique<ManualDWARFIndex>(*GetObjectFile()->GetModule(), *this);
}
+void SymbolFileDWARF::InitializeFirstCodeAddress() {
+ InitializeFirstCodeAddressRecursive(
+ *m_objfile_sp->GetModule()->GetSectionList());
+ if (m_first_code_address == LLDB_INVALID_ADDRESS)
+ m_first_code_address = 0;
+}
+
+void SymbolFileDWARF::InitializeFirstCodeAddressRecursive(
+ const lldb_private::SectionList &section_list) {
+ for (SectionSP section_sp : section_list) {
+ if (section_sp->GetChildren().GetSize() > 0) {
+ InitializeFirstCodeAddressRecursive(section_sp->GetChildren());
+ } else if (section_sp->GetType() == eSectionTypeCode) {
+ m_first_code_address =
+ std::min(m_first_code_address, section_sp->GetFileAddress());
+ }
+ }
+}
+
bool SymbolFileDWARF::SupportedVersion(uint16_t version) {
return version >= 2 && version <= 5;
}
@@ -687,6 +683,17 @@ static void MakeAbsoluteAndRemap(FileSpec &file_spec, DWARFUnit &dwarf_cu,
file_spec.SetFile(*remapped_file, FileSpec::Style::native);
}
+/// Return the DW_AT_(GNU_)dwo_name.
+static const char *GetDWOName(DWARFCompileUnit &dwarf_cu,
+ const DWARFDebugInfoEntry &cu_die) {
+ const char *dwo_name =
+ cu_die.GetAttributeValueAsString(&dwarf_cu, DW_AT_GNU_dwo_name, nullptr);
+ if (!dwo_name)
+ dwo_name =
+ cu_die.GetAttributeValueAsString(&dwarf_cu, DW_AT_dwo_name, nullptr);
+ return dwo_name;
+}
+
lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFCompileUnit &dwarf_cu) {
CompUnitSP cu_sp;
CompileUnit *comp_unit = (CompileUnit *)dwarf_cu.GetUserData();
@@ -701,25 +708,66 @@ lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFCompileUnit &dwarf_cu) {
} else {
ModuleSP module_sp(m_objfile_sp->GetModule());
if (module_sp) {
- const DWARFBaseDIE cu_die =
- dwarf_cu.GetNonSkeletonUnit().GetUnitDIEOnly();
- if (cu_die) {
- FileSpec cu_file_spec(cu_die.GetName(), dwarf_cu.GetPathStyle());
- MakeAbsoluteAndRemap(cu_file_spec, dwarf_cu, module_sp);
-
- LanguageType cu_language = SymbolFileDWARF::LanguageTypeFromDWARF(
- cu_die.GetAttributeValueAsUnsigned(DW_AT_language, 0));
-
- bool is_optimized = dwarf_cu.GetNonSkeletonUnit().GetIsOptimized();
+ auto initialize_cu = [&](const FileSpec &file_spec,
+ LanguageType cu_language) {
BuildCuTranslationTable();
cu_sp = std::make_shared<CompileUnit>(
- module_sp, &dwarf_cu, cu_file_spec,
+ module_sp, &dwarf_cu, file_spec,
*GetDWARFUnitIndex(dwarf_cu.GetID()), cu_language,
- is_optimized ? eLazyBoolYes : eLazyBoolNo);
+ eLazyBoolCalculate);
dwarf_cu.SetUserData(cu_sp.get());
SetCompileUnitAtIndex(dwarf_cu.GetID(), cu_sp);
+ };
+
+ auto lazy_initialize_cu = [&]() {
+ // If the version is < 5, we can't do lazy initialization.
+ if (dwarf_cu.GetVersion() < 5)
+ return false;
+
+ // If there is no DWO, there is no reason to initialize
+ // lazily; we will do eager initialization in that case.
+ if (GetDebugMapSymfile())
+ return false;
+ const DWARFBaseDIE cu_die = dwarf_cu.GetUnitDIEOnly();
+ if (!cu_die)
+ return false;
+ if (!GetDWOName(dwarf_cu, *cu_die.GetDIE()))
+ return false;
+
+ // With DWARFv5 we can assume that the first support
+ // file is also the name of the compile unit. This
+ // allows us to avoid loading the non-skeleton unit,
+ // which may be in a separate DWO file.
+ FileSpecList support_files;
+ if (!ParseSupportFiles(dwarf_cu, module_sp, support_files))
+ return false;
+ if (support_files.GetSize() == 0)
+ return false;
+
+ initialize_cu(support_files.GetFileSpecAtIndex(0),
+ eLanguageTypeUnknown);
+ cu_sp->SetSupportFiles(std::move(support_files));
+ return true;
+ };
+
+ if (!lazy_initialize_cu()) {
+ // Eagerly initialize compile unit
+ const DWARFBaseDIE cu_die =
+ dwarf_cu.GetNonSkeletonUnit().GetUnitDIEOnly();
+ if (cu_die) {
+ LanguageType cu_language = SymbolFileDWARF::LanguageTypeFromDWARF(
+ dwarf_cu.GetDWARFLanguageType());
+
+ FileSpec cu_file_spec(cu_die.GetName(), dwarf_cu.GetPathStyle());
+
+ // Path needs to be remapped in this case. In the support files
+ // case ParseSupportFiles takes care of the remapping.
+ MakeAbsoluteAndRemap(cu_file_spec, dwarf_cu, module_sp);
+
+ initialize_cu(cu_file_spec, cu_language);
+ }
}
}
}
@@ -785,7 +833,32 @@ Function *SymbolFileDWARF::ParseFunction(CompileUnit &comp_unit,
if (!dwarf_ast)
return nullptr;
- return dwarf_ast->ParseFunctionFromDWARF(comp_unit, die);
+ DWARFRangeList ranges;
+ if (die.GetDIE()->GetAttributeAddressRanges(die.GetCU(), ranges,
+ /*check_hi_lo_pc=*/true) == 0)
+ return nullptr;
+
+ // Union of all ranges in the function DIE (if the function is
+ // discontiguous)
+ lldb::addr_t lowest_func_addr = ranges.GetMinRangeBase(0);
+ lldb::addr_t highest_func_addr = ranges.GetMaxRangeEnd(0);
+ if (lowest_func_addr == LLDB_INVALID_ADDRESS ||
+ lowest_func_addr >= highest_func_addr ||
+ lowest_func_addr < m_first_code_address)
+ return nullptr;
+
+ ModuleSP module_sp(die.GetModule());
+ AddressRange func_range;
+ func_range.GetBaseAddress().ResolveAddressUsingFileSections(
+ lowest_func_addr, module_sp->GetSectionList());
+ if (!func_range.GetBaseAddress().IsValid())
+ return nullptr;
+
+ func_range.SetByteSize(highest_func_addr - lowest_func_addr);
+ if (!FixupAddress(func_range.GetBaseAddress()))
+ return nullptr;
+
+ return dwarf_ast->ParseFunctionFromDWARF(comp_unit, die, func_range);
}
lldb::addr_t SymbolFileDWARF::FixupAddress(lldb::addr_t file_addr) {
@@ -807,7 +880,7 @@ lldb::LanguageType SymbolFileDWARF::ParseLanguage(CompileUnit &comp_unit) {
std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
DWARFUnit *dwarf_cu = GetDWARFCompileUnit(&comp_unit);
if (dwarf_cu)
- return GetLanguage(*dwarf_cu);
+ return GetLanguage(dwarf_cu->GetNonSkeletonUnit());
else
return eLanguageTypeUnknown;
}
@@ -898,18 +971,30 @@ bool SymbolFileDWARF::ParseSupportFiles(CompileUnit &comp_unit,
if (!dwarf_cu)
return false;
- dw_offset_t offset = dwarf_cu->GetLineTableOffset();
+ if (!ParseSupportFiles(*dwarf_cu, comp_unit.GetModule(), support_files))
+ return false;
+
+ comp_unit.SetSupportFiles(support_files);
+ return true;
+}
+
+bool SymbolFileDWARF::ParseSupportFiles(DWARFUnit &dwarf_cu,
+ const ModuleSP &module,
+ FileSpecList &support_files) {
+
+ dw_offset_t offset = dwarf_cu.GetLineTableOffset();
if (offset == DW_INVALID_OFFSET)
return false;
+ ElapsedTime elapsed(m_parse_time);
llvm::DWARFDebugLine::Prologue prologue;
if (!ParseLLVMLineTablePrologue(m_context, prologue, offset,
- dwarf_cu->GetOffset()))
+ dwarf_cu.GetOffset()))
return false;
- comp_unit.SetSupportFiles(ParseSupportFilesFromPrologue(
- comp_unit.GetModule(), prologue, dwarf_cu->GetPathStyle(),
- dwarf_cu->GetCompilationDirectory().GetCString()));
+ support_files = ParseSupportFilesFromPrologue(
+ module, prologue, dwarf_cu.GetPathStyle(),
+ dwarf_cu.GetCompilationDirectory().GetCString());
return true;
}
@@ -950,6 +1035,7 @@ SymbolFileDWARF::GetTypeUnitSupportFiles(DWARFTypeUnit &tu) {
"SymbolFileDWARF::GetTypeUnitSupportFiles failed to parse "
"the line table prologue");
};
+ ElapsedTime elapsed(m_parse_time);
llvm::Error error = prologue.parse(data, &line_table_offset, report, ctx);
if (error) {
report(std::move(error));
@@ -965,7 +1051,7 @@ bool SymbolFileDWARF::ParseIsOptimized(CompileUnit &comp_unit) {
std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
DWARFUnit *dwarf_cu = GetDWARFCompileUnit(&comp_unit);
if (dwarf_cu)
- return dwarf_cu->GetIsOptimized();
+ return dwarf_cu->GetNonSkeletonUnit().GetIsOptimized();
return false;
}
@@ -1036,6 +1122,7 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) {
if (offset == DW_INVALID_OFFSET)
return false;
+ ElapsedTime elapsed(m_parse_time);
llvm::DWARFDebugLine line;
const llvm::DWARFDebugLine::LineTable *line_table =
ParseLLVMLineTable(m_context, line, offset, dwarf_cu->GetOffset());
@@ -1050,6 +1137,12 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) {
// The Sequences view contains only valid line sequences. Don't iterate over
// the Rows directly.
for (const llvm::DWARFDebugLine::Sequence &seq : line_table->Sequences) {
+ // Ignore line sequences that do not start after the first code address.
+ // All addresses generated in a sequence are incremental so we only need
+ // to check the first one of the sequence. Check the comment at the
+ // m_first_code_address declaration for more details on this.
+ if (seq.LowPC < m_first_code_address)
+ continue;
std::unique_ptr<LineSequence> sequence =
LineTable::CreateLineSequenceContainer();
for (unsigned idx = seq.FirstRowIndex; idx < seq.LastRowIndex; ++idx) {
@@ -1084,6 +1177,7 @@ SymbolFileDWARF::ParseDebugMacros(lldb::offset_t *offset) {
if (iter != m_debug_macros_map.end())
return iter->second;
+ ElapsedTime elapsed(m_parse_time);
const DWARFDataExtractor &debug_macro_data = m_context.getOrLoadMacroData();
if (debug_macro_data.GetByteSize() == 0)
return DebugMacrosSP();
@@ -1585,17 +1679,6 @@ SymbolFileDWARF::GetDIE(const DIERef &die_ref) {
return DebugInfo().GetDIE(die_ref);
}
-/// Return the DW_AT_(GNU_)dwo_name.
-static const char *GetDWOName(DWARFCompileUnit &dwarf_cu,
- const DWARFDebugInfoEntry &cu_die) {
- const char *dwo_name =
- cu_die.GetAttributeValueAsString(&dwarf_cu, DW_AT_GNU_dwo_name, nullptr);
- if (!dwo_name)
- dwo_name =
- cu_die.GetAttributeValueAsString(&dwarf_cu, DW_AT_dwo_name, nullptr);
- return dwo_name;
-}
-
/// Return the DW_AT_(GNU_)dwo_id.
/// FIXME: Technically 0 is a valid hash.
static uint64_t GetDWOId(DWARFCompileUnit &dwarf_cu,
@@ -1623,7 +1706,7 @@ SymbolFileDWARF::GetDwoSymbolFileForCompileUnit(
DWARFUnit &unit, const DWARFDebugInfoEntry &cu_die) {
// If this is a Darwin-style debug map (non-.dSYM) symbol file,
// never attempt to load ELF-style DWO files since the -gmodules
- // support uses the same DWO machanism to specify full debug info
+ // support uses the same DWO mechanism to specify full debug info
// files for modules. This is handled in
// UpdateExternalModuleListIfNeeded().
if (GetDebugMapSymfile())
@@ -2083,7 +2166,7 @@ void SymbolFileDWARF::FindGlobalVariables(
}
}
- ParseVariables(sc, die, LLDB_INVALID_ADDRESS, false, false, &variables);
+ ParseAndAppendGlobalVariable(sc, die, variables);
while (pruned_idx < variables.GetSize()) {
VariableSP var_sp = variables.GetVariableAtIndex(pruned_idx);
if (name_is_mangled ||
@@ -2136,7 +2219,7 @@ void SymbolFileDWARF::FindGlobalVariables(const RegularExpression &regex,
return true;
sc.comp_unit = GetCompUnitForDWARFCompUnit(*dwarf_cu);
- ParseVariables(sc, die, LLDB_INVALID_ADDRESS, false, false, &variables);
+ ParseAndAppendGlobalVariable(sc, die, variables);
return variables.GetSize() - original_size < max_matches;
});
@@ -2186,13 +2269,8 @@ bool SymbolFileDWARF::ResolveFunction(const DWARFDIE &orig_die,
addr = sc.function->GetAddressRange().GetBaseAddress();
}
-
- if (auto section_sp = addr.GetSection()) {
- if (section_sp->GetPermissions() & ePermissionsExecutable) {
- sc_list.Append(sc);
- return true;
- }
- }
+ sc_list.Append(sc);
+ return true;
}
return false;
@@ -2507,7 +2585,7 @@ TypeSP SymbolFileDWARF::GetTypeForDIE(const DWARFDIE &die,
type_sp = ParseType(sc, die, nullptr);
} else if (type_ptr != DIE_IS_BEING_PARSED) {
- // Grab the existing type from the master types lists
+ // Get the original shared pointer for this type
type_sp = type_ptr->shared_from_this();
}
}
@@ -2793,7 +2871,7 @@ TypeSP SymbolFileDWARF::FindDefinitionTypeForDWARFDeclContext(
}
m_index->GetTypes(dwarf_decl_ctx, [&](DWARFDIE type_die) {
- // Make sure type_die's langauge matches the type system we are
+ // Make sure type_die's language matches the type system we are
// looking for. We don't want to find a "Foo" type from Java if we
// are looking for a "Foo" type for C, C++, ObjC, or ObjC++.
if (type_system &&
@@ -2997,8 +3075,8 @@ size_t SymbolFileDWARF::ParseVariablesForContext(const SymbolContext &sc) {
/*check_hi_lo_pc=*/true))
func_lo_pc = ranges.GetMinRangeBase(0);
if (func_lo_pc != LLDB_INVALID_ADDRESS) {
- const size_t num_variables = ParseVariables(
- sc, function_die.GetFirstChild(), func_lo_pc, true, true);
+ const size_t num_variables =
+ ParseVariablesInFunctionContext(sc, function_die, func_lo_pc);
// Let all blocks know they have parse all their variables
sc.function->GetBlock(false).SetDidParseVariables(true, true);
@@ -3017,16 +3095,14 @@ size_t SymbolFileDWARF::ParseVariablesForContext(const SymbolContext &sc) {
variables = std::make_shared<VariableList>();
sc.comp_unit->SetVariableList(variables);
- m_index->GetGlobalVariables(
- dwarf_cu->GetNonSkeletonUnit(), [&](DWARFDIE die) {
- VariableSP var_sp(
- ParseVariableDIE(sc, die, LLDB_INVALID_ADDRESS));
- if (var_sp) {
- variables->AddVariableIfUnique(var_sp);
- ++vars_added;
- }
- return true;
- });
+ m_index->GetGlobalVariables(*dwarf_cu, [&](DWARFDIE die) {
+ VariableSP var_sp(ParseVariableDIECached(sc, die));
+ if (var_sp) {
+ variables->AddVariableIfUnique(var_sp);
+ ++vars_added;
+ }
+ return true;
+ });
}
return vars_added;
}
@@ -3034,6 +3110,26 @@ size_t SymbolFileDWARF::ParseVariablesForContext(const SymbolContext &sc) {
return 0;
}
+VariableSP SymbolFileDWARF::ParseVariableDIECached(const SymbolContext &sc,
+ const DWARFDIE &die) {
+ if (!die)
+ return nullptr;
+
+ DIEToVariableSP &die_to_variable = die.GetDWARF()->GetDIEToVariable();
+
+ VariableSP var_sp = die_to_variable[die.GetDIE()];
+ if (var_sp)
+ return var_sp;
+
+ var_sp = ParseVariableDIE(sc, die, LLDB_INVALID_ADDRESS);
+ if (var_sp) {
+ die_to_variable[die.GetDIE()] = var_sp;
+ if (DWARFDIE spec_die = die.GetReferencedDIE(DW_AT_specification))
+ die_to_variable[spec_die.GetDIE()] = var_sp;
+ }
+ return var_sp;
+}
+
VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
const DWARFDIE &die,
const lldb::addr_t func_low_pc) {
@@ -3043,9 +3139,6 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
if (!die)
return nullptr;
- if (VariableSP var_sp = GetDIEToVariable()[die.GetDIE()])
- return var_sp; // Already been parsed!
-
const dw_tag_t tag = die.Tag();
ModuleSP module = GetObjectFile()->GetModule();
@@ -3055,8 +3148,6 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
DWARFAttributes attributes;
const size_t num_attributes = die.GetAttributes(attributes);
- DWARFDIE spec_die;
- VariableSP var_sp;
const char *name = nullptr;
const char *mangled = nullptr;
Declaration decl;
@@ -3103,9 +3194,6 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
case DW_AT_location:
location_form = form_value;
break;
- case DW_AT_specification:
- spec_die = form_value.Reference();
- break;
case DW_AT_start_scope:
// TODO: Implement this.
break;
@@ -3116,6 +3204,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
case DW_AT_description:
case DW_AT_endianity:
case DW_AT_segment:
+ case DW_AT_specification:
case DW_AT_visibility:
default:
case DW_AT_abstract_origin:
@@ -3244,7 +3333,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
SymbolFileDWARFDebugMap *debug_map_symfile = GetDebugMapSymfile();
if (debug_map_symfile)
// Set the module of the expression to the linked module
- // instead of the oject file so the relocated address can be
+ // instead of the object file so the relocated address can be
// found there.
location.SetModule(debug_map_symfile->GetObjectFile()->GetModule());
@@ -3308,7 +3397,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
location.Update_DW_OP_addr(exe_file_addr);
} else {
// Variable didn't make it into the final executable
- return var_sp;
+ return nullptr;
}
}
}
@@ -3354,35 +3443,25 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
}
}
- if (symbol_context_scope) {
- auto type_sp = std::make_shared<SymbolFileType>(
- *this, GetUID(type_die_form.Reference()));
-
- if (use_type_size_for_value && type_sp->GetType())
- location.UpdateValue(
- const_value_form.Unsigned(),
- type_sp->GetType()->GetByteSize(nullptr).getValueOr(0),
- die.GetCU()->GetAddressByteSize());
-
- var_sp = std::make_shared<Variable>(
- die.GetID(), name, mangled, type_sp, scope, symbol_context_scope,
- scope_ranges, &decl, location, is_external, is_artificial,
- location_is_const_value_data, is_static_member);
- } else {
+ if (!symbol_context_scope) {
// Not ready to parse this variable yet. It might be a global or static
// variable that is in a function scope and the function in the symbol
// context wasn't filled in yet
- return var_sp;
+ return nullptr;
}
- // Cache var_sp even if NULL (the variable was just a specification or was
- // missing vital information to be able to be displayed in the debugger
- // (missing location due to optimization, etc)) so we don't re-parse this
- // DIE over and over later...
- GetDIEToVariable()[die.GetDIE()] = var_sp;
- if (spec_die)
- GetDIEToVariable()[spec_die.GetDIE()] = var_sp;
- return var_sp;
+ auto type_sp = std::make_shared<SymbolFileType>(
+ *this, GetUID(type_die_form.Reference()));
+
+ if (use_type_size_for_value && type_sp->GetType())
+ location.UpdateValue(const_value_form.Unsigned(),
+ type_sp->GetType()->GetByteSize(nullptr).getValueOr(0),
+ die.GetCU()->GetAddressByteSize());
+
+ return std::make_shared<Variable>(
+ die.GetID(), name, mangled, type_sp, scope, symbol_context_scope,
+ scope_ranges, &decl, location, is_external, is_artificial,
+ location_is_const_value_data, is_static_member);
}
DWARFDIE
@@ -3429,120 +3508,234 @@ SymbolFileDWARF::FindBlockContainingSpecification(
return DWARFDIE();
}
-size_t SymbolFileDWARF::ParseVariables(const SymbolContext &sc,
- const DWARFDIE &orig_die,
- const lldb::addr_t func_low_pc,
- bool parse_siblings, bool parse_children,
- VariableList *cc_variable_list) {
- if (!orig_die)
- return 0;
+void SymbolFileDWARF::ParseAndAppendGlobalVariable(
+ const SymbolContext &sc, const DWARFDIE &die,
+ VariableList &cc_variable_list) {
+ if (!die)
+ return;
- VariableListSP variable_list_sp;
+ dw_tag_t tag = die.Tag();
+ if (tag != DW_TAG_variable && tag != DW_TAG_constant)
+ return;
- size_t vars_added = 0;
- DWARFDIE die = orig_die;
- while (die) {
- dw_tag_t tag = die.Tag();
+ // Check to see if we have already parsed this variable or constant?
+ VariableSP var_sp = GetDIEToVariable()[die.GetDIE()];
+ if (var_sp) {
+ cc_variable_list.AddVariableIfUnique(var_sp);
+ return;
+ }
- // Check to see if we have already parsed this variable or constant?
- VariableSP var_sp = GetDIEToVariable()[die.GetDIE()];
- if (var_sp) {
- if (cc_variable_list)
- cc_variable_list->AddVariableIfUnique(var_sp);
+ // We haven't parsed the variable yet, lets do that now. Also, let us include
+ // the variable in the relevant compilation unit's variable list, if it
+ // exists.
+ VariableListSP variable_list_sp;
+ DWARFDIE sc_parent_die = GetParentSymbolContextDIE(die);
+ dw_tag_t parent_tag = sc_parent_die.Tag();
+ switch (parent_tag) {
+ case DW_TAG_compile_unit:
+ case DW_TAG_partial_unit:
+ if (sc.comp_unit != nullptr) {
+ variable_list_sp = sc.comp_unit->GetVariableList(false);
} else {
- // We haven't already parsed it, lets do that now.
- if ((tag == DW_TAG_variable) || (tag == DW_TAG_constant) ||
- (tag == DW_TAG_formal_parameter && sc.function)) {
- if (variable_list_sp.get() == nullptr) {
- DWARFDIE sc_parent_die = GetParentSymbolContextDIE(orig_die);
- dw_tag_t parent_tag = sc_parent_die.Tag();
- switch (parent_tag) {
- case DW_TAG_compile_unit:
- case DW_TAG_partial_unit:
- if (sc.comp_unit != nullptr) {
- variable_list_sp = sc.comp_unit->GetVariableList(false);
- if (variable_list_sp.get() == nullptr) {
- variable_list_sp = std::make_shared<VariableList>();
- }
- } else {
- GetObjectFile()->GetModule()->ReportError(
- "parent 0x%8.8" PRIx64 " %s with no valid compile unit in "
- "symbol context for 0x%8.8" PRIx64 " %s.\n",
- sc_parent_die.GetID(), sc_parent_die.GetTagAsCString(),
- orig_die.GetID(), orig_die.GetTagAsCString());
- }
- break;
+ GetObjectFile()->GetModule()->ReportError(
+ "parent 0x%8.8" PRIx64 " %s with no valid compile unit in "
+ "symbol context for 0x%8.8" PRIx64 " %s.\n",
+ sc_parent_die.GetID(), sc_parent_die.GetTagAsCString(), die.GetID(),
+ die.GetTagAsCString());
+ return;
+ }
+ break;
- case DW_TAG_subprogram:
- case DW_TAG_inlined_subroutine:
- case DW_TAG_lexical_block:
- if (sc.function != nullptr) {
- // Check to see if we already have parsed the variables for the
- // given scope
-
- Block *block = sc.function->GetBlock(true).FindBlockByID(
- sc_parent_die.GetID());
- if (block == nullptr) {
- // This must be a specification or abstract origin with a
- // concrete block counterpart in the current function. We need
- // to find the concrete block so we can correctly add the
- // variable to it
- const DWARFDIE concrete_block_die =
- FindBlockContainingSpecification(
- GetDIE(sc.function->GetID()),
- sc_parent_die.GetOffset());
- if (concrete_block_die)
- block = sc.function->GetBlock(true).FindBlockByID(
- concrete_block_die.GetID());
- }
+ default:
+ GetObjectFile()->GetModule()->ReportError(
+ "didn't find appropriate parent DIE for variable list for "
+ "0x%8.8" PRIx64 " %s.\n",
+ die.GetID(), die.GetTagAsCString());
+ return;
+ }
- if (block != nullptr) {
- const bool can_create = false;
- variable_list_sp = block->GetBlockVariableList(can_create);
- if (variable_list_sp.get() == nullptr) {
- variable_list_sp = std::make_shared<VariableList>();
- block->SetVariableList(variable_list_sp);
- }
- }
- }
- break;
+ var_sp = ParseVariableDIECached(sc, die);
+ if (!var_sp)
+ return;
- default:
- GetObjectFile()->GetModule()->ReportError(
- "didn't find appropriate parent DIE for variable list for "
- "0x%8.8" PRIx64 " %s.\n",
- orig_die.GetID(), orig_die.GetTagAsCString());
- break;
- }
- }
+ cc_variable_list.AddVariableIfUnique(var_sp);
+ if (variable_list_sp)
+ variable_list_sp->AddVariableIfUnique(var_sp);
+}
+
+DIEArray
+SymbolFileDWARF::MergeBlockAbstractParameters(const DWARFDIE &block_die,
+ DIEArray &&variable_dies) {
+ // DW_TAG_inline_subroutine objects may omit DW_TAG_formal_parameter in
+ // instances of the function when they are unused (i.e., the parameter's
+ // location list would be empty). The current DW_TAG_inline_subroutine may
+ // refer to another DW_TAG_subprogram that might actually have the definitions
+ // of the parameters and we need to include these so they show up in the
+ // variables for this function (for example, in a stack trace). Let us try to
+ // find the abstract subprogram that might contain the parameter definitions
+ // and merge with the concrete parameters.
+
+ // Nothing to merge if the block is not an inlined function.
+ if (block_die.Tag() != DW_TAG_inlined_subroutine) {
+ return std::move(variable_dies);
+ }
+
+ // Nothing to merge if the block does not have abstract parameters.
+ DWARFDIE abs_die = block_die.GetReferencedDIE(DW_AT_abstract_origin);
+ if (!abs_die || abs_die.Tag() != DW_TAG_subprogram ||
+ !abs_die.HasChildren()) {
+ return std::move(variable_dies);
+ }
+
+ // For each abstract parameter, if we have its concrete counterpart, insert
+ // it. Otherwise, insert the abstract parameter.
+ DIEArray::iterator concrete_it = variable_dies.begin();
+ DWARFDIE abstract_child = abs_die.GetFirstChild();
+ DIEArray merged;
+ bool did_merge_abstract = false;
+ for (; abstract_child; abstract_child = abstract_child.GetSibling()) {
+ if (abstract_child.Tag() == DW_TAG_formal_parameter) {
+ if (concrete_it == variable_dies.end() ||
+ GetDIE(*concrete_it).Tag() != DW_TAG_formal_parameter) {
+ // We arrived at the end of the concrete parameter list, so all
+ // the remaining abstract parameters must have been omitted.
+ // Let us insert them to the merged list here.
+ merged.push_back(*abstract_child.GetDIERef());
+ did_merge_abstract = true;
+ continue;
+ }
- if (variable_list_sp) {
- VariableSP var_sp(ParseVariableDIE(sc, die, func_low_pc));
- if (var_sp) {
- variable_list_sp->AddVariableIfUnique(var_sp);
- if (cc_variable_list)
- cc_variable_list->AddVariableIfUnique(var_sp);
- ++vars_added;
- }
- }
+ DWARFDIE origin_of_concrete =
+ GetDIE(*concrete_it).GetReferencedDIE(DW_AT_abstract_origin);
+ if (origin_of_concrete == abstract_child) {
+ // The current abstract parameter is the origin of the current
+ // concrete parameter, just push the concrete parameter.
+ merged.push_back(*concrete_it);
+ ++concrete_it;
+ } else {
+ // Otherwise, the parameter must have been omitted from the concrete
+ // function, so insert the abstract one.
+ merged.push_back(*abstract_child.GetDIERef());
+ did_merge_abstract = true;
}
}
+ }
- bool skip_children = (sc.function == nullptr && tag == DW_TAG_subprogram);
+ // Shortcut if no merging happened.
+ if (!did_merge_abstract)
+ return std::move(variable_dies);
- if (!skip_children && parse_children && die.HasChildren()) {
- vars_added += ParseVariables(sc, die.GetFirstChild(), func_low_pc, true,
- true, cc_variable_list);
+ // We inserted all the abstract parameters (or their concrete counterparts).
+ // Let us insert all the remaining concrete variables to the merged list.
+ // During the insertion, let us check there are no remaining concrete
+ // formal parameters. If that's the case, then just bailout from the merge -
+ // the variable list is malformed.
+ for (; concrete_it != variable_dies.end(); ++concrete_it) {
+ if (GetDIE(*concrete_it).Tag() == DW_TAG_formal_parameter) {
+ return std::move(variable_dies);
}
+ merged.push_back(*concrete_it);
+ }
+ return merged;
+}
- if (parse_siblings)
- die = die.GetSibling();
- else
- die.Clear();
+size_t SymbolFileDWARF::ParseVariablesInFunctionContext(
+ const SymbolContext &sc, const DWARFDIE &die,
+ const lldb::addr_t func_low_pc) {
+ if (!die || !sc.function)
+ return 0;
+
+ DIEArray dummy_block_variables; // The recursive call should not add anything
+ // to this vector because |die| should be a
+ // subprogram, so all variables will be added
+ // to the subprogram's list.
+ return ParseVariablesInFunctionContextRecursive(sc, die, func_low_pc,
+ dummy_block_variables);
+}
+
+// This method parses all the variables in the blocks in the subtree of |die|,
+// and inserts them to the variable list for all the nested blocks.
+// The uninserted variables for the current block are accumulated in
+// |accumulator|.
+size_t SymbolFileDWARF::ParseVariablesInFunctionContextRecursive(
+ const lldb_private::SymbolContext &sc, const DWARFDIE &die,
+ lldb::addr_t func_low_pc, DIEArray &accumulator) {
+ size_t vars_added = 0;
+ dw_tag_t tag = die.Tag();
+
+ if ((tag == DW_TAG_variable) || (tag == DW_TAG_constant) ||
+ (tag == DW_TAG_formal_parameter)) {
+ accumulator.push_back(*die.GetDIERef());
+ }
+
+ switch (tag) {
+ case DW_TAG_subprogram:
+ case DW_TAG_inlined_subroutine:
+ case DW_TAG_lexical_block: {
+ // If we start a new block, compute a new block variable list and recurse.
+ Block *block =
+ sc.function->GetBlock(/*can_create=*/true).FindBlockByID(die.GetID());
+ if (block == nullptr) {
+ // This must be a specification or abstract origin with a
+ // concrete block counterpart in the current function. We need
+ // to find the concrete block so we can correctly add the
+ // variable to it.
+ const DWARFDIE concrete_block_die = FindBlockContainingSpecification(
+ GetDIE(sc.function->GetID()), die.GetOffset());
+ if (concrete_block_die)
+ block = sc.function->GetBlock(/*can_create=*/true)
+ .FindBlockByID(concrete_block_die.GetID());
+ }
+
+ if (block == nullptr)
+ return 0;
+
+ const bool can_create = false;
+ VariableListSP block_variable_list_sp =
+ block->GetBlockVariableList(can_create);
+ if (block_variable_list_sp.get() == nullptr) {
+ block_variable_list_sp = std::make_shared<VariableList>();
+ block->SetVariableList(block_variable_list_sp);
+ }
+
+ DIEArray block_variables;
+ for (DWARFDIE child = die.GetFirstChild(); child;
+ child = child.GetSibling()) {
+ vars_added += ParseVariablesInFunctionContextRecursive(
+ sc, child, func_low_pc, block_variables);
+ }
+ block_variables =
+ MergeBlockAbstractParameters(die, std::move(block_variables));
+ vars_added += PopulateBlockVariableList(*block_variable_list_sp, sc,
+ block_variables, func_low_pc);
+ break;
+ }
+
+ default:
+ // Recurse to children with the same variable accumulator.
+ for (DWARFDIE child = die.GetFirstChild(); child;
+ child = child.GetSibling()) {
+ vars_added += ParseVariablesInFunctionContextRecursive(
+ sc, child, func_low_pc, accumulator);
+ }
+ break;
}
+
return vars_added;
}
+size_t SymbolFileDWARF::PopulateBlockVariableList(
+ VariableList &variable_list, const lldb_private::SymbolContext &sc,
+ llvm::ArrayRef<DIERef> variable_dies, lldb::addr_t func_low_pc) {
+ // Parse the variable DIEs and insert them to the list.
+ for (auto &die : variable_dies) {
+ if (VariableSP var_sp = ParseVariableDIE(sc, GetDIE(die), func_low_pc)) {
+ variable_list.AddVariableIfUnique(var_sp);
+ }
+ }
+ return variable_dies.size();
+}
+
/// Collect call site parameters in a DW_TAG_call_site DIE.
static CallSiteParameterArray
CollectCallSiteParameters(ModuleSP module, DWARFDIE call_site_die) {
@@ -3766,11 +3959,6 @@ SymbolFileDWARF::ParseCallEdgesInFunction(UserID func_id) {
return {};
}
-// PluginInterface protocol
-ConstString SymbolFileDWARF::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t SymbolFileDWARF::GetPluginVersion() { return 1; }
-
void SymbolFileDWARF::Dump(lldb_private::Stream &s) {
SymbolFile::Dump(s);
m_index->Dump(s);
@@ -3784,7 +3972,7 @@ void SymbolFileDWARF::DumpClangAST(Stream &s) {
llvm::dyn_cast_or_null<TypeSystemClang>(&ts_or_err.get());
if (!clang)
return;
- clang->Dump(s);
+ clang->Dump(s.AsRawOstream());
}
SymbolFileDWARFDebugMap *SymbolFileDWARF::GetDebugMapSymfile() {
@@ -3889,3 +4077,9 @@ LanguageType SymbolFileDWARF::GetLanguageFamily(DWARFUnit &unit) {
lang = DW_LANG_C_plus_plus;
return LanguageTypeFromDWARF(lang);
}
+
+StatsDuration SymbolFileDWARF::GetDebugInfoIndexTime() {
+ if (m_index)
+ return m_index->GetIndexTime();
+ return StatsDuration(0.0);
+}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index d9feeef549ed..271ce7be1eea 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -24,6 +24,7 @@
#include "lldb/Symbol/DebugMacros.h"
#include "lldb/Symbol/SymbolContext.h"
#include "lldb/Symbol/SymbolFile.h"
+#include "lldb/Target/Statistics.h"
#include "lldb/Utility/ConstString.h"
#include "lldb/Utility/Flags.h"
#include "lldb/Utility/RangeMap.h"
@@ -83,9 +84,9 @@ public:
static void DebuggerInitialize(lldb_private::Debugger &debugger);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "dwarf"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::SymbolFile *
CreateInstance(lldb::ObjectFileSP objfile_sp);
@@ -218,9 +219,7 @@ public:
std::recursive_mutex &GetModuleMutex() const override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
DWARFDebugAbbrev *DebugAbbrev();
@@ -320,6 +319,16 @@ public:
/// Same as GetLanguage() but reports all C++ versions as C++ (no version).
static lldb::LanguageType GetLanguageFamily(DWARFUnit &unit);
+ lldb_private::StatsDuration GetDebugInfoParseTime() override {
+ return m_parse_time;
+ }
+ lldb_private::StatsDuration GetDebugInfoIndexTime() override;
+
+ lldb_private::StatsDuration &GetDebugInfoParseTimeRef() {
+ return m_parse_time;
+ }
+
+
protected:
typedef llvm::DenseMap<const DWARFDebugInfoEntry *, lldb_private::Type *>
DIEToTypePtr;
@@ -368,6 +377,9 @@ protected:
lldb::TypeSP ParseType(const lldb_private::SymbolContext &sc,
const DWARFDIE &die, bool *type_is_new);
+ bool ParseSupportFiles(DWARFUnit &dwarf_cu, const lldb::ModuleSP &module,
+ lldb_private::FileSpecList &support_files);
+
lldb_private::Type *ResolveTypeUID(const DWARFDIE &die,
bool assert_not_being_parsed);
@@ -376,12 +388,29 @@ protected:
lldb::VariableSP ParseVariableDIE(const lldb_private::SymbolContext &sc,
const DWARFDIE &die,
const lldb::addr_t func_low_pc);
+ lldb::VariableSP ParseVariableDIECached(const lldb_private::SymbolContext &sc,
+ const DWARFDIE &die);
- size_t ParseVariables(const lldb_private::SymbolContext &sc,
- const DWARFDIE &orig_die,
- const lldb::addr_t func_low_pc, bool parse_siblings,
- bool parse_children,
- lldb_private::VariableList *cc_variable_list = nullptr);
+ void
+ ParseAndAppendGlobalVariable(const lldb_private::SymbolContext &sc,
+ const DWARFDIE &die,
+ lldb_private::VariableList &cc_variable_list);
+
+ size_t ParseVariablesInFunctionContext(const lldb_private::SymbolContext &sc,
+ const DWARFDIE &die,
+ const lldb::addr_t func_low_pc);
+
+ size_t ParseVariablesInFunctionContextRecursive(
+ const lldb_private::SymbolContext &sc, const DWARFDIE &die,
+ lldb::addr_t func_low_pc, DIEArray &accumulator);
+
+ size_t PopulateBlockVariableList(lldb_private::VariableList &variable_list,
+ const lldb_private::SymbolContext &sc,
+ llvm::ArrayRef<DIERef> variable_dies,
+ lldb::addr_t func_low_pc);
+
+ DIEArray MergeBlockAbstractParameters(const DWARFDIE &block_die,
+ DIEArray &&variable_dies);
bool ClassOrStructIsVirtual(const DWARFDIE &die);
@@ -483,6 +512,11 @@ protected:
const lldb_private::FileSpecList &GetTypeUnitSupportFiles(DWARFTypeUnit &tu);
+ void InitializeFirstCodeAddressRecursive(
+ const lldb_private::SectionList &section_list);
+
+ void InitializeFirstCodeAddress();
+
lldb::ModuleWP m_debug_map_module_wp;
SymbolFileDWARFDebugMap *m_debug_map_symfile;
@@ -518,6 +552,14 @@ protected:
llvm::DenseMap<dw_offset_t, lldb_private::FileSpecList>
m_type_unit_support_files;
std::vector<uint32_t> m_lldb_cu_to_dwarf_unit;
+ /// DWARF does not provide a good way for traditional (concatenating) linkers
+ /// to invalidate debug info describing dead-stripped code. These linkers will
+ /// keep the debug info but resolve any addresses referring to such code as
+ /// zero (BFD) or a small positive integer (zero + relocation addend -- GOLD).
+ /// Try to filter out this debug info by comparing it to the lowest code
+ /// address in the module.
+ lldb::addr_t m_first_code_address = LLDB_INVALID_ADDRESS;
+ lldb_private::StatsDuration m_parse_time{0.0};
};
#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_SYMBOLFILEDWARF_H
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp
index 4e2e5e16637b..2491f6af8c19 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp
@@ -16,6 +16,7 @@
#include "lldb/Host/FileSystem.h"
#include "lldb/Utility/RangeMap.h"
#include "lldb/Utility/RegularExpression.h"
+#include "lldb/Utility/Timer.h"
//#define DEBUG_OSO_DMAP // DO NOT CHECKIN WITH THIS NOT COMMENTED OUT
#if defined(DEBUG_OSO_DMAP)
@@ -33,9 +34,6 @@
#include "LogChannelDWARF.h"
#include "SymbolFileDWARF.h"
-// Work around the fact that Timer.h pulls in the system Mach-O headers.
-#include "lldb/Utility/Timer.h"
-
#include <memory>
using namespace lldb;
@@ -230,12 +228,7 @@ void SymbolFileDWARFDebugMap::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString SymbolFileDWARFDebugMap::GetPluginNameStatic() {
- static ConstString g_name("dwarf-debugmap");
- return g_name;
-}
-
-const char *SymbolFileDWARFDebugMap::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolFileDWARFDebugMap::GetPluginDescriptionStatic() {
return "DWARF and DWARF3 debug symbol file reader (debug map).";
}
@@ -1236,13 +1229,6 @@ void SymbolFileDWARFDebugMap::DumpClangAST(Stream &s) {
});
}
-// PluginInterface protocol
-lldb_private::ConstString SymbolFileDWARFDebugMap::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t SymbolFileDWARFDebugMap::GetPluginVersion() { return 1; }
-
lldb::CompUnitSP
SymbolFileDWARFDebugMap::GetCompileUnit(SymbolFileDWARF *oso_dwarf) {
if (oso_dwarf) {
@@ -1443,3 +1429,54 @@ SymbolFileDWARFDebugMap::AddOSOARanges(SymbolFileDWARF *dwarf2Data,
}
return num_line_entries_added;
}
+
+uint64_t SymbolFileDWARFDebugMap::GetDebugInfoSize() {
+ uint64_t debug_info_size = 0;
+ ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool {
+ ObjectFile *oso_objfile = oso_dwarf->GetObjectFile();
+ if (!oso_objfile)
+ return false; // Keep iterating
+ ModuleSP module_sp = oso_objfile->GetModule();
+ if (!module_sp)
+ return false; // Keep iterating
+ SectionList *section_list = module_sp->GetSectionList();
+ if (section_list)
+ debug_info_size += section_list->GetDebugInfoSize();
+ return false; // Keep iterating
+ });
+ return debug_info_size;
+}
+
+StatsDuration SymbolFileDWARFDebugMap::GetDebugInfoParseTime() {
+ StatsDuration elapsed(0.0);
+ ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool {
+ ObjectFile *oso_objfile = oso_dwarf->GetObjectFile();
+ if (oso_objfile) {
+ ModuleSP module_sp = oso_objfile->GetModule();
+ if (module_sp) {
+ SymbolFile *symfile = module_sp->GetSymbolFile();
+ if (symfile)
+ elapsed += symfile->GetDebugInfoParseTime();
+ }
+ }
+ return false; // Keep iterating
+ });
+ return elapsed;
+}
+
+StatsDuration SymbolFileDWARFDebugMap::GetDebugInfoIndexTime() {
+ StatsDuration elapsed(0.0);
+ ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool {
+ ObjectFile *oso_objfile = oso_dwarf->GetObjectFile();
+ if (oso_objfile) {
+ ModuleSP module_sp = oso_objfile->GetModule();
+ if (module_sp) {
+ SymbolFile *symfile = module_sp->GetSymbolFile();
+ if (symfile)
+ elapsed += symfile->GetDebugInfoIndexTime();
+ }
+ }
+ return false; // Keep iterating
+ });
+ return elapsed;
+}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
index 8b6624e70869..74f32442de2f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
@@ -40,9 +40,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "dwarf-debugmap"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::SymbolFile *
CreateInstance(lldb::ObjectFileSP objfile_sp);
@@ -140,9 +140,11 @@ public:
void DumpClangAST(lldb_private::Stream &s) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
- uint32_t GetPluginVersion() override;
+ uint64_t GetDebugInfoSize() override;
+ lldb_private::StatsDuration GetDebugInfoParseTime() override;
+ lldb_private::StatsDuration GetDebugInfoIndexTime() override;
protected:
enum { kHaveInitializedOSOs = (1 << 0), kNumFlags };
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index 43cf262016c2..c29fc2230a67 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -1105,7 +1105,7 @@ clang::QualType PdbAstBuilder::CreateEnumType(PdbTypeSymId id,
Declaration declaration;
CompilerType enum_ct = m_clang.CreateEnumerationType(
- uname.c_str(), decl_context, OptionalClangModuleID(), declaration,
+ uname, decl_context, OptionalClangModuleID(), declaration,
ToCompilerType(underlying_type), er.isScoped());
TypeSystemClang::StartTagDeclarationDefinition(enum_ct);
@@ -1358,4 +1358,6 @@ PdbAstBuilder::FromCompilerDeclContext(CompilerDeclContext context) {
return static_cast<clang::DeclContext *>(context.GetOpaqueDeclContext());
}
-void PdbAstBuilder::Dump(Stream &stream) { m_clang.Dump(stream); }
+void PdbAstBuilder::Dump(Stream &stream) {
+ m_clang.Dump(stream.AsRawOstream());
+}
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index b9b075d83b6a..8af90cb66e87 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -240,12 +240,7 @@ void SymbolFileNativePDB::Terminate() {
void SymbolFileNativePDB::DebuggerInitialize(Debugger &debugger) {}
-ConstString SymbolFileNativePDB::GetPluginNameStatic() {
- static ConstString g_name("native-pdb");
- return g_name;
-}
-
-const char *SymbolFileNativePDB::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolFileNativePDB::GetPluginDescriptionStatic() {
return "Microsoft PDB debug symbol cross-platform file reader.";
}
@@ -950,11 +945,11 @@ uint32_t SymbolFileNativePDB::ResolveSymbolContext(
llvm::Optional<uint16_t> modi = m_index->GetModuleIndexForVa(file_addr);
if (!modi)
return 0;
- CompilandIndexItem *cci = m_index->compilands().GetCompiland(*modi);
- if (!cci)
+ CompUnitSP cu_sp = GetCompileUnitAtIndex(modi.getValue());
+ if (!cu_sp)
return 0;
- sc.comp_unit = GetOrCreateCompileUnit(*cci).get();
+ sc.comp_unit = cu_sp.get();
resolved_flags |= eSymbolContextCompUnit;
}
@@ -1567,9 +1562,8 @@ SymbolFileNativePDB::GetTypeSystemForLanguage(lldb::LanguageType language) {
return type_system_or_err;
}
-ConstString SymbolFileNativePDB::GetPluginName() {
- static ConstString g_name("pdb");
- return g_name;
+uint64_t SymbolFileNativePDB::GetDebugInfoSize() {
+ // PDB files are a separate file that contains all debug info.
+ return m_index->pdb().getFileSize();
}
-uint32_t SymbolFileNativePDB::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
index def0995065ca..56a5ec0a464d 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
@@ -60,9 +60,9 @@ public:
static void DebuggerInitialize(Debugger &debugger);
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "native-pdb"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static SymbolFile *CreateInstance(lldb::ObjectFileSP objfile_sp);
@@ -75,6 +75,8 @@ public:
void InitializeObject() override;
+ uint64_t GetDebugInfoSize() override;
+
// Compile Unit function calls
void
@@ -151,9 +153,7 @@ public:
FindNamespace(ConstString name,
const CompilerDeclContext &parent_decl_ctx) override;
- ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
llvm::pdb::PDBFile &GetPDBFile() { return m_index->pdb(); }
const llvm::pdb::PDBFile &GetPDBFile() const { return m_index->pdb(); }
diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
index 78a0d09a681a..f45287fd0fff 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp
@@ -497,7 +497,7 @@ lldb::TypeSP PDBASTParser::CreateLLDBTypeFromPDBType(const PDBSymbol &type) {
// Class). Set it false for now.
bool isScoped = false;
- ast_enum = m_ast.CreateEnumerationType(name.c_str(), decl_context,
+ ast_enum = m_ast.CreateEnumerationType(name, decl_context,
OptionalClangModuleID(), decl,
builtin_type, isScoped);
diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
index 6b30ad26dca7..45dfc4b9a152 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
@@ -52,6 +52,10 @@
#include "Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.h"
#include "Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h"
+#if defined(_WIN32)
+#include "llvm/Config/config.h"
+#endif
+
using namespace lldb;
using namespace lldb_private;
using namespace llvm::pdb;
@@ -83,14 +87,16 @@ bool ShouldAddLine(uint32_t requested_line, uint32_t actual_line,
static bool ShouldUseNativeReader() {
#if defined(_WIN32)
+#if LLVM_ENABLE_DIA_SDK
llvm::StringRef use_native = ::getenv("LLDB_USE_NATIVE_PDB_READER");
- return use_native.equals_insensitive("on") ||
- use_native.equals_insensitive("yes") ||
- use_native.equals_insensitive("1") ||
- use_native.equals_insensitive("true");
-#else
- return true;
+ if (!use_native.equals_insensitive("on") &&
+ !use_native.equals_insensitive("yes") &&
+ !use_native.equals_insensitive("1") &&
+ !use_native.equals_insensitive("true"))
+ return false;
+#endif
#endif
+ return true;
}
void SymbolFilePDB::Initialize() {
@@ -113,12 +119,7 @@ void SymbolFilePDB::Terminate() {
void SymbolFilePDB::DebuggerInitialize(lldb_private::Debugger &debugger) {}
-lldb_private::ConstString SymbolFilePDB::GetPluginNameStatic() {
- static ConstString g_name("pdb");
- return g_name;
-}
-
-const char *SymbolFilePDB::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolFilePDB::GetPluginDescriptionStatic() {
return "Microsoft PDB debug symbol file reader.";
}
@@ -1455,7 +1456,7 @@ void SymbolFilePDB::DumpClangAST(Stream &s) {
llvm::dyn_cast_or_null<TypeSystemClang>(&type_system_or_err.get());
if (!clang_type_system)
return;
- clang_type_system->Dump(s);
+ clang_type_system->Dump(s.AsRawOstream());
}
void SymbolFilePDB::FindTypesByRegex(
@@ -1707,13 +1708,6 @@ SymbolFilePDB::FindNamespace(lldb_private::ConstString name,
return clang_type_system->CreateDeclContext(namespace_decl);
}
-lldb_private::ConstString SymbolFilePDB::GetPluginName() {
- static ConstString g_name("pdb");
- return g_name;
-}
-
-uint32_t SymbolFilePDB::GetPluginVersion() { return 1; }
-
IPDBSession &SymbolFilePDB::GetPDBSession() { return *m_session_up; }
const IPDBSession &SymbolFilePDB::GetPDBSession() const {
diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h
index 2171b7f686cc..69f1d268edfd 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h
+++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h
@@ -41,9 +41,9 @@ public:
static void DebuggerInitialize(lldb_private::Debugger &debugger);
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "pdb"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::SymbolFile *
CreateInstance(lldb::ObjectFileSP objfile_sp);
@@ -161,9 +161,7 @@ public:
lldb_private::ConstString name,
const lldb_private::CompilerDeclContext &parent_decl_ctx) override;
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
llvm::pdb::IPDBSession &GetPDBSession();
diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
index 3a5e02d4fb86..d95cfea5e872 100644
--- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
+++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
@@ -38,12 +38,7 @@ void SymbolFileSymtab::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString SymbolFileSymtab::GetPluginNameStatic() {
- static ConstString g_name("symtab");
- return g_name;
-}
-
-const char *SymbolFileSymtab::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolFileSymtab::GetPluginDescriptionStatic() {
return "Reads debug symbols from an object file's symbol table.";
}
@@ -260,10 +255,3 @@ uint32_t SymbolFileSymtab::ResolveSymbolContext(const Address &so_addr,
}
return resolved_flags;
}
-
-// PluginInterface protocol
-lldb_private::ConstString SymbolFileSymtab::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t SymbolFileSymtab::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h
index 377c41e50770..2dad12baac6f 100644
--- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h
+++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h
@@ -36,9 +36,9 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "symtab"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::SymbolFile *
CreateInstance(lldb::ObjectFileSP objfile_sp);
@@ -85,9 +85,7 @@ public:
lldb_private::TypeList &type_list) override;
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
protected:
uint32_t CalculateNumCompileUnits() override;
diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
index 9130eed63e43..0a98f5032b77 100644
--- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
+++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
@@ -40,12 +40,7 @@ void SymbolVendorELF::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString SymbolVendorELF::GetPluginNameStatic() {
- static ConstString g_name("ELF");
- return g_name;
-}
-
-const char *SymbolVendorELF::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolVendorELF::GetPluginDescriptionStatic() {
return "Symbol vendor for ELF that looks for dSYM files that match "
"executables.";
}
@@ -144,8 +139,3 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp,
symbol_vendor->AddSymbolFileRepresentation(dsym_objfile_sp);
return symbol_vendor;
}
-
-// PluginInterface protocol
-ConstString SymbolVendorELF::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t SymbolVendorELF::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.h b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.h
index 2080084a15b8..13a1071e38ab 100644
--- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.h
+++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.h
@@ -22,18 +22,16 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "ELF"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::SymbolVendor *
CreateInstance(const lldb::ModuleSP &module_sp,
lldb_private::Stream *feedback_strm);
// PluginInterface protocol
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
};
#endif // LLDB_SOURCE_PLUGINS_SYMBOLVENDOR_ELF_SYMBOLVENDORELF_H
diff --git a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp
index 2b2840796579..9da5b0133d37 100644
--- a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp
+++ b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp
@@ -41,12 +41,7 @@ void SymbolVendorWasm::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString SymbolVendorWasm::GetPluginNameStatic() {
- static ConstString g_name("WASM");
- return g_name;
-}
-
-const char *SymbolVendorWasm::GetPluginDescriptionStatic() {
+llvm::StringRef SymbolVendorWasm::GetPluginDescriptionStatic() {
return "Symbol vendor for WASM that looks for dwo files that match "
"executables.";
}
@@ -139,8 +134,3 @@ SymbolVendorWasm::CreateInstance(const lldb::ModuleSP &module_sp,
symbol_vendor->AddSymbolFileRepresentation(sym_objfile_sp);
return symbol_vendor;
}
-
-// PluginInterface protocol
-ConstString SymbolVendorWasm::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t SymbolVendorWasm::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.h b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.h
index b212337e0549..af692c6e9e00 100644
--- a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.h
+++ b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.h
@@ -21,8 +21,8 @@ public:
static void Initialize();
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "WASM"; }
+ static llvm::StringRef GetPluginDescriptionStatic();
static lldb_private::SymbolVendor *
CreateInstance(const lldb::ModuleSP &module_sp,
@@ -30,8 +30,7 @@ public:
/// PluginInterface protocol.
/// \{
- lldb_private::ConstString GetPluginName() override;
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
/// \}
};
diff --git a/lldb/source/Plugins/Trace/common/TraceJSONStructs.cpp b/lldb/source/Plugins/Trace/common/TraceJSONStructs.cpp
new file mode 100644
index 000000000000..1773a6003701
--- /dev/null
+++ b/lldb/source/Plugins/Trace/common/TraceJSONStructs.cpp
@@ -0,0 +1,106 @@
+//===-- TraceSessionFileStructs.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===/
+
+#include "TraceJSONStructs.h"
+#include "ThreadPostMortemTrace.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Target.h"
+#include <sstream>
+
+using namespace lldb_private;
+namespace llvm {
+namespace json {
+
+llvm::json::Value toJSON(const JSONModule &module) {
+ llvm::json::Object json_module;
+ json_module["systemPath"] = module.system_path;
+ if (module.file)
+ json_module["file"] = *module.file;
+ std::ostringstream oss;
+ oss << "0x" << std::hex << module.load_address.value;
+ json_module["loadAddress"] = oss.str();
+ if (module.uuid)
+ json_module["uuid"] = *module.uuid;
+ return std::move(json_module);
+}
+
+llvm::json::Value toJSON(const JSONThread &thread) {
+ return Value(Object{{"tid", thread.tid}, {"traceFile", thread.trace_file}});
+}
+
+llvm::json::Value toJSON(const JSONProcess &process) {
+ llvm::json::Object json_process;
+ json_process["pid"] = process.pid;
+ json_process["triple"] = process.triple;
+
+ llvm::json::Array threads_arr;
+ for (JSONThread e : process.threads)
+ threads_arr.push_back(toJSON(e));
+
+ json_process["threads"] = llvm::json::Value(std::move(threads_arr));
+
+ llvm::json::Array modules_arr;
+ for (JSONModule e : process.modules)
+ modules_arr.push_back(toJSON(e));
+
+ json_process["modules"] = llvm::json::Value(std::move(modules_arr));
+
+ return std::move(json_process);
+}
+
+llvm::json::Value toJSON(const JSONTraceSessionBase &session) {
+ llvm::json::Array arr;
+ for (JSONProcess e : session.processes)
+ arr.push_back(toJSON(e));
+
+ return std::move(arr);
+}
+
+bool fromJSON(const Value &value, JSONAddress &address, Path path) {
+ Optional<StringRef> s = value.getAsString();
+ if (s.hasValue() && !s->getAsInteger(0, address.value))
+ return true;
+
+ path.report("expected numeric string");
+ return false;
+}
+
+bool fromJSON(const Value &value, JSONModule &module, Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("systemPath", module.system_path) &&
+ o.map("file", module.file) &&
+ o.map("loadAddress", module.load_address) &&
+ o.map("uuid", module.uuid);
+}
+
+bool fromJSON(const Value &value, JSONThread &thread, Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("tid", thread.tid) && o.map("traceFile", thread.trace_file);
+}
+
+bool fromJSON(const Value &value, JSONProcess &process, Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("pid", process.pid) && o.map("triple", process.triple) &&
+ o.map("threads", process.threads) && o.map("modules", process.modules);
+}
+
+bool fromJSON(const Value &value, JSONTracePluginSettings &plugin_settings,
+ Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("type", plugin_settings.type);
+}
+
+bool fromJSON(const Value &value, JSONTraceSessionBase &session, Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("processes", session.processes);
+}
+
+} // namespace json
+} // namespace llvm
diff --git a/lldb/source/Plugins/Trace/common/TraceJSONStructs.h b/lldb/source/Plugins/Trace/common/TraceJSONStructs.h
new file mode 100644
index 000000000000..e01c33bf0d6a
--- /dev/null
+++ b/lldb/source/Plugins/Trace/common/TraceJSONStructs.h
@@ -0,0 +1,98 @@
+//===-- TraceJSONStruct.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_TRACEJSONSTRUCTS_H
+#define LLDB_TARGET_TRACEJSONSTRUCTS_H
+
+#include "lldb/lldb-types.h"
+#include "llvm/Support/JSON.h"
+
+namespace lldb_private {
+
+struct JSONAddress {
+ lldb::addr_t value;
+};
+
+struct JSONModule {
+ std::string system_path;
+ llvm::Optional<std::string> file;
+ JSONAddress load_address;
+ llvm::Optional<std::string> uuid;
+};
+
+struct JSONThread {
+ int64_t tid;
+ std::string trace_file;
+};
+
+struct JSONProcess {
+ int64_t pid;
+ std::string triple;
+ std::vector<JSONThread> threads;
+ std::vector<JSONModule> modules;
+};
+
+struct JSONTracePluginSettings {
+ std::string type;
+};
+
+struct JSONTraceSessionBase {
+ std::vector<JSONProcess> processes;
+};
+
+/// The trace plug-in implementation should provide its own TPluginSettings,
+/// which corresponds to the "trace" section of the schema.
+template <class TPluginSettings>
+struct JSONTraceSession : JSONTraceSessionBase {
+ TPluginSettings trace;
+};
+
+} // namespace lldb_private
+
+namespace llvm {
+namespace json {
+
+llvm::json::Value toJSON(const lldb_private::JSONModule &module);
+
+llvm::json::Value toJSON(const lldb_private::JSONThread &thread);
+
+llvm::json::Value toJSON(const lldb_private::JSONProcess &process);
+
+llvm::json::Value
+toJSON(const lldb_private::JSONTraceSessionBase &session_base);
+
+bool fromJSON(const Value &value, lldb_private::JSONAddress &address,
+ Path path);
+
+bool fromJSON(const Value &value, lldb_private::JSONModule &module, Path path);
+
+bool fromJSON(const Value &value, lldb_private::JSONThread &thread, Path path);
+
+bool fromJSON(const Value &value, lldb_private::JSONProcess &process,
+ Path path);
+
+bool fromJSON(const Value &value,
+ lldb_private::JSONTracePluginSettings &plugin_settings,
+ Path path);
+
+bool fromJSON(const Value &value, lldb_private::JSONTraceSessionBase &session,
+ Path path);
+
+template <class TPluginSettings>
+bool fromJSON(const Value &value,
+ lldb_private::JSONTraceSession<TPluginSettings> &session,
+ Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("trace", session.trace) &&
+ fromJSON(value, (lldb_private::JSONTraceSessionBase &)session, path);
+}
+
+} // namespace json
+} // namespace llvm
+
+#endif // LLDB_TARGET_TRACEJSONSTRUCTS_H
diff --git a/lldb/source/Plugins/Trace/common/TraceSessionFileParser.cpp b/lldb/source/Plugins/Trace/common/TraceSessionFileParser.cpp
index c88ad9dc6a59..b26704ca34be 100644
--- a/lldb/source/Plugins/Trace/common/TraceSessionFileParser.cpp
+++ b/lldb/source/Plugins/Trace/common/TraceSessionFileParser.cpp
@@ -170,55 +170,3 @@ TraceSessionFileParser::ParseCommonSessionFile(
}
return parsed_processes;
}
-
-namespace llvm {
-namespace json {
-
-bool fromJSON(const Value &value, TraceSessionFileParser::JSONAddress &address,
- Path path) {
- Optional<StringRef> s = value.getAsString();
- if (s.hasValue() && !s->getAsInteger(0, address.value))
- return true;
-
- path.report("expected numeric string");
- return false;
-}
-
-bool fromJSON(const Value &value, TraceSessionFileParser::JSONModule &module,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("systemPath", module.system_path) &&
- o.map("file", module.file) &&
- o.map("loadAddress", module.load_address) &&
- o.map("uuid", module.uuid);
-}
-
-bool fromJSON(const Value &value, TraceSessionFileParser::JSONThread &thread,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("tid", thread.tid) && o.map("traceFile", thread.trace_file);
-}
-
-bool fromJSON(const Value &value, TraceSessionFileParser::JSONProcess &process,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("pid", process.pid) && o.map("triple", process.triple) &&
- o.map("threads", process.threads) && o.map("modules", process.modules);
-}
-
-bool fromJSON(const Value &value,
- TraceSessionFileParser::JSONTracePluginSettings &plugin_settings,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("type", plugin_settings.type);
-}
-
-bool fromJSON(const Value &value,
- TraceSessionFileParser::JSONTraceSessionBase &session,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("processes", session.processes);
-}
-
-} // namespace json
-} // namespace llvm
diff --git a/lldb/source/Plugins/Trace/common/TraceSessionFileParser.h b/lldb/source/Plugins/Trace/common/TraceSessionFileParser.h
index 6abaffcecd3a..19cc2f59ded7 100644
--- a/lldb/source/Plugins/Trace/common/TraceSessionFileParser.h
+++ b/lldb/source/Plugins/Trace/common/TraceSessionFileParser.h
@@ -9,9 +9,8 @@
#ifndef LLDB_TARGET_TRACESESSIONPARSER_H
#define LLDB_TARGET_TRACESESSIONPARSER_H
-#include "llvm/Support/JSON.h"
-
#include "ThreadPostMortemTrace.h"
+#include "TraceJSONStructs.h"
namespace lldb_private {
@@ -24,46 +23,6 @@ namespace lldb_private {
/// See \a Trace::FindPlugin for more information regarding these JSON files.
class TraceSessionFileParser {
public:
- /// C++ structs representing the JSON trace session.
- /// \{
- struct JSONAddress {
- lldb::addr_t value;
- };
-
- struct JSONModule {
- std::string system_path;
- llvm::Optional<std::string> file;
- JSONAddress load_address;
- llvm::Optional<std::string> uuid;
- };
-
- struct JSONThread {
- int64_t tid;
- std::string trace_file;
- };
-
- struct JSONProcess {
- int64_t pid;
- std::string triple;
- std::vector<JSONThread> threads;
- std::vector<JSONModule> modules;
- };
-
- struct JSONTracePluginSettings {
- std::string type;
- };
-
- struct JSONTraceSessionBase {
- std::vector<JSONProcess> processes;
- };
-
- /// The trace plug-in implementation should provide its own TPluginSettings,
- /// which corresponds to the "trace" section of the schema.
- template <class TPluginSettings>
- struct JSONTraceSession : JSONTraceSessionBase {
- TPluginSettings trace;
- };
- /// \}
/// Helper struct holding the objects created when parsing a process
struct ParsedProcess {
@@ -130,50 +89,5 @@ protected:
};
} // namespace lldb_private
-namespace llvm {
-namespace json {
-
-bool fromJSON(const Value &value,
- lldb_private::TraceSessionFileParser::JSONAddress &address,
- Path path);
-
-bool fromJSON(const Value &value,
- lldb_private::TraceSessionFileParser::JSONModule &module,
- Path path);
-
-bool fromJSON(const Value &value,
- lldb_private::TraceSessionFileParser::JSONThread &thread,
- Path path);
-
-bool fromJSON(const Value &value,
- lldb_private::TraceSessionFileParser::JSONProcess &process,
- Path path);
-
-bool fromJSON(const Value &value,
- lldb_private::TraceSessionFileParser::JSONTracePluginSettings
- &plugin_settings,
- Path path);
-
-bool fromJSON(
- const Value &value,
- lldb_private::TraceSessionFileParser::JSONTraceSessionBase &session,
- Path path);
-
-template <class TPluginSettings>
-bool fromJSON(
- const Value &value,
- lldb_private::TraceSessionFileParser::JSONTraceSession<TPluginSettings>
- &session,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("trace", session.trace) &&
- fromJSON(value,
- (lldb_private::TraceSessionFileParser::JSONTraceSessionBase &)
- session,
- path);
-}
-
-} // namespace json
-} // namespace llvm
#endif // LLDB_TARGET_TRACESESSIONPARSER_H
diff --git a/lldb/source/Plugins/Trace/common/TraceSessionSaver.cpp b/lldb/source/Plugins/Trace/common/TraceSessionSaver.cpp
new file mode 100644
index 000000000000..8c20a3b158cd
--- /dev/null
+++ b/lldb/source/Plugins/Trace/common/TraceSessionSaver.cpp
@@ -0,0 +1,149 @@
+//===-- TraceSessionSaver.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TraceSessionSaver.h"
+
+#include "lldb/Core/Module.h"
+#include "lldb/Core/Value.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/SectionLoadList.h"
+#include "lldb/Target/Target.h"
+#include "lldb/lldb-types.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+
+#include <fstream>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace llvm;
+
+llvm::Error TraceSessionSaver::WriteSessionToFile(
+ const llvm::json::Value &trace_session_description, FileSpec directory) {
+
+ FileSpec trace_path = directory;
+ trace_path.AppendPathComponent("trace.json");
+ std::ofstream os(trace_path.GetPath());
+ os << std::string(formatv("{0:2}", trace_session_description));
+ os.close();
+ if (!os)
+ return createStringError(inconvertibleErrorCode(),
+ formatv("couldn't write to the file {0}",
+ trace_path.GetPath().c_str()));
+ return Error::success();
+}
+
+llvm::Expected<JSONTraceSessionBase> TraceSessionSaver::BuildProcessesSection(
+ Process &live_process,
+ std::function<
+ llvm::Expected<llvm::Optional<std::vector<uint8_t>>>(lldb::tid_t tid)>
+ raw_trace_fetcher,
+ FileSpec directory) {
+
+ JSONTraceSessionBase json_session_description;
+ Expected<std::vector<JSONThread>> json_threads =
+ BuildThreadsSection(live_process, raw_trace_fetcher, directory);
+ if (!json_threads)
+ return json_threads.takeError();
+
+ Expected<std::vector<JSONModule>> json_modules =
+ BuildModulesSection(live_process, directory);
+ if (!json_modules)
+ return json_modules.takeError();
+
+ json_session_description.processes.push_back(JSONProcess{
+ static_cast<int64_t>(live_process.GetID()),
+ live_process.GetTarget().GetArchitecture().GetTriple().getTriple(),
+ json_threads.get(), json_modules.get()});
+ return json_session_description;
+}
+
+llvm::Expected<std::vector<JSONThread>> TraceSessionSaver::BuildThreadsSection(
+ Process &live_process,
+ std::function<
+ llvm::Expected<llvm::Optional<std::vector<uint8_t>>>(lldb::tid_t tid)>
+ raw_trace_fetcher,
+ FileSpec directory) {
+ std::vector<JSONThread> json_threads;
+ for (ThreadSP thread_sp : live_process.Threads()) {
+ // resolve the directory just in case
+ FileSystem::Instance().Resolve(directory);
+ FileSpec raw_trace_path = directory;
+ raw_trace_path.AppendPathComponent(std::to_string(thread_sp->GetID()) +
+ ".trace");
+ json_threads.push_back(JSONThread{static_cast<int64_t>(thread_sp->GetID()),
+ raw_trace_path.GetPath().c_str()});
+
+ llvm::Expected<llvm::Optional<std::vector<uint8_t>>> raw_trace =
+ raw_trace_fetcher(thread_sp->GetID());
+
+ if (!raw_trace)
+ return raw_trace.takeError();
+ if (!raw_trace.get())
+ continue;
+
+ std::basic_fstream<char> raw_trace_fs = std::fstream(
+ raw_trace_path.GetPath().c_str(), std::ios::out | std::ios::binary);
+ raw_trace_fs.write(reinterpret_cast<const char *>(&raw_trace.get()->at(0)),
+ raw_trace.get()->size() * sizeof(uint8_t));
+ raw_trace_fs.close();
+ if (!raw_trace_fs) {
+ return createStringError(inconvertibleErrorCode(),
+ formatv("couldn't write to the file {0}",
+ raw_trace_path.GetPath().c_str()));
+ }
+ }
+ return json_threads;
+}
+
+llvm::Expected<std::vector<JSONModule>>
+TraceSessionSaver::BuildModulesSection(Process &live_process,
+ FileSpec directory) {
+ std::vector<JSONModule> json_modules;
+ ModuleList module_list = live_process.GetTarget().GetImages();
+ for (size_t i = 0; i < module_list.GetSize(); ++i) {
+ ModuleSP module_sp(module_list.GetModuleAtIndex(i));
+ if (!module_sp)
+ continue;
+ std::string system_path = module_sp->GetPlatformFileSpec().GetPath();
+ // TODO: support memory-only libraries like [vdso]
+ if (!module_sp->GetFileSpec().IsAbsolute())
+ continue;
+
+ std::string file = module_sp->GetFileSpec().GetPath();
+ ObjectFile *objfile = module_sp->GetObjectFile();
+ if (objfile == nullptr)
+ continue;
+
+ lldb::addr_t load_addr = LLDB_INVALID_ADDRESS;
+ Address base_addr(objfile->GetBaseAddress());
+ if (base_addr.IsValid() &&
+ !live_process.GetTarget().GetSectionLoadList().IsEmpty())
+ load_addr = base_addr.GetLoadAddress(&live_process.GetTarget());
+
+ if (load_addr == LLDB_INVALID_ADDRESS)
+ continue;
+
+ FileSystem::Instance().Resolve(directory);
+ FileSpec path_to_copy_module = directory;
+ path_to_copy_module.AppendPathComponent("modules");
+ path_to_copy_module.AppendPathComponent(system_path);
+ sys::fs::create_directories(path_to_copy_module.GetDirectory().AsCString());
+
+ if (std::error_code ec = llvm::sys::fs::copy_file(
+ system_path, path_to_copy_module.GetPath()))
+ return createStringError(
+ inconvertibleErrorCode(),
+ formatv("couldn't write to the file. {0}", ec.message()));
+
+ json_modules.push_back(
+ JSONModule{system_path, path_to_copy_module.GetPath(),
+ JSONAddress{load_addr}, module_sp->GetUUID().GetAsString()});
+ }
+ return json_modules;
+}
diff --git a/lldb/source/Plugins/Trace/common/TraceSessionSaver.h b/lldb/source/Plugins/Trace/common/TraceSessionSaver.h
new file mode 100644
index 000000000000..9a1a75c167dc
--- /dev/null
+++ b/lldb/source/Plugins/Trace/common/TraceSessionSaver.h
@@ -0,0 +1,112 @@
+//===-- SessionSaver.h ----------------------------------------*- C++ //-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_TRACESESSIONSAVER_H
+#define LLDB_TARGET_TRACESESSIONSAVER_H
+
+#include "TraceJSONStructs.h"
+
+namespace lldb_private {
+
+class TraceSessionSaver {
+
+public:
+ /// Save the trace session description JSON object inside the given directory
+ /// as a file named \a trace.json.
+ ///
+ /// \param[in] trace_session_description
+ /// The trace's session, as JSON Object.
+ ///
+ /// \param[in] directory
+ /// The directory where the JSON file will be saved.
+ ///
+ /// \return
+ /// \a llvm::success if the operation was successful, or an \a llvm::Error
+ /// otherwise.
+ static llvm::Error
+ WriteSessionToFile(const llvm::json::Value &trace_session_description,
+ FileSpec directory);
+
+ /// Build the processes section of the trace session description file. Besides
+ /// returning the processes information, this method saves to disk all modules
+ /// and raw traces corresponding to the traced threads of the given process.
+ ///
+ /// \param[in] live_process
+ /// The process being traced.
+ ///
+ /// \param[in] raw_trace_fetcher
+ /// Callback function that receives a thread ID and returns its raw trace.
+ /// This callback should return \a None if the thread is not being traced.
+ /// Otherwise, it should return the raw trace in bytes or an
+ /// \a llvm::Error in case of failures.
+ ///
+ /// \param[in] directory
+ /// The directory where files will be saved when building the processes
+ /// section.
+ ///
+ /// \return
+ /// The processes section or \a llvm::Error in case of failures.
+ static llvm::Expected<JSONTraceSessionBase> BuildProcessesSection(
+ Process &live_process,
+ std::function<
+ llvm::Expected<llvm::Optional<std::vector<uint8_t>>>(lldb::tid_t tid)>
+ raw_trace_fetcher,
+ FileSpec directory);
+
+ /// Build the threads sub-section of the trace session description file.
+ /// For each traced thread, its raw trace is also written to the file
+ /// \a thread_id_.trace inside of the given directory.
+ ///
+ /// \param[in] live_process
+ /// The process being traced.
+ ///
+ /// \param[in] raw_trace_fetcher
+ /// Callback function that receives a thread ID and returns its raw trace.
+ /// This callback should return \a None if the thread is not being traced.
+ /// Otherwise, it should return the raw trace in bytes or an
+ /// \a llvm::Error in case of failures.
+ ///
+ /// \param[in] directory
+ /// The directory where files will be saved when building the threads
+ /// section.
+ ///
+ /// \return
+ /// The threads section or \a llvm::Error in case of failures.
+ static llvm::Expected<std::vector<JSONThread>> BuildThreadsSection(
+ Process &live_process,
+ std::function<
+ llvm::Expected<llvm::Optional<std::vector<uint8_t>>>(lldb::tid_t tid)>
+ raw_trace_fetcher,
+ FileSpec directory);
+
+ /// Build modules sub-section of the trace's session. The original modules
+ /// will be copied over to the \a <directory/modules> folder. Invalid modules
+ /// are skipped.
+ /// Copying the modules has the benefit of making these trace session
+ /// directories self-contained, as the raw traces and modules are part of the
+ /// output directory and can be sent to another machine, where lldb can load
+ /// them and replicate exactly the same trace session.
+ ///
+ /// \param[in] live_process
+ /// The process being traced.
+ ///
+ /// \param[in] directory
+ /// The directory where the modules files will be saved when building
+ /// the modules section.
+ /// Example: If a module \a libbar.so exists in the path
+ /// \a /usr/lib/foo/libbar.so, then it will be copied to
+ /// \a <directory>/modules/usr/lib/foo/libbar.so.
+ ///
+ /// \return
+ /// The modules section or \a llvm::Error in case of failures.
+ static llvm::Expected<std::vector<JSONModule>>
+ BuildModulesSection(Process &live_process, FileSpec directory);
+};
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_TRACESESSIONSAVER_H
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
index c12bcd3523e3..6ec957771e47 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -13,9 +13,11 @@
#include "DecodedThread.h"
#include "TraceIntelPTConstants.h"
#include "TraceIntelPTSessionFileParser.h"
+#include "TraceIntelPTSessionSaver.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Target.h"
+#include "llvm/ADT/None.h"
using namespace lldb;
using namespace lldb_private;
@@ -47,25 +49,17 @@ void TraceIntelPT::Terminate() {
PluginManager::UnregisterPlugin(CreateInstanceForSessionFile);
}
-ConstString TraceIntelPT::GetPluginNameStatic() {
- static ConstString g_name("intel-pt");
- return g_name;
-}
-
StringRef TraceIntelPT::GetSchema() {
return TraceIntelPTSessionFileParser::GetSchema();
}
-//------------------------------------------------------------------
-// PluginInterface protocol
-//------------------------------------------------------------------
-
-ConstString TraceIntelPT::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t TraceIntelPT::GetPluginVersion() { return 1; }
-
void TraceIntelPT::Dump(Stream *s) const {}
+llvm::Error TraceIntelPT::SaveLiveTraceToDisk(FileSpec directory) {
+ RefreshLiveProcessState();
+ return TraceIntelPTSessionSaver().SaveToDisk(*this, directory);
+}
+
Expected<TraceSP> TraceIntelPT::CreateInstanceForSessionFile(
const json::Value &trace_session_file, StringRef session_file_dir,
Debugger &debugger) {
@@ -86,7 +80,8 @@ TraceIntelPT::TraceIntelPT(
: m_cpu_info(cpu_info) {
for (const ThreadPostMortemTraceSP &thread : traced_threads)
m_thread_decoders.emplace(
- thread.get(), std::make_unique<PostMortemThreadDecoder>(thread, *this));
+ thread->GetID(),
+ std::make_unique<PostMortemThreadDecoder>(thread, *this));
}
DecodedThreadSP TraceIntelPT::Decode(Thread &thread) {
@@ -96,7 +91,7 @@ DecodedThreadSP TraceIntelPT::Decode(Thread &thread) {
thread.shared_from_this(),
createStringError(inconvertibleErrorCode(), *m_live_refresh_error));
- auto it = m_thread_decoders.find(&thread);
+ auto it = m_thread_decoders.find(thread.GetID());
if (it == m_thread_decoders.end())
return std::make_shared<DecodedThread>(
thread.shared_from_this(),
@@ -120,7 +115,7 @@ void TraceIntelPT::DumpTraceInfo(Thread &thread, Stream &s, bool verbose) {
}
Optional<size_t> TraceIntelPT::GetRawTraceSize(Thread &thread) {
- if (IsTraced(thread))
+ if (IsTraced(thread.GetID()))
return Decode(thread)->GetRawTraceSize();
else
return None;
@@ -188,6 +183,8 @@ Expected<pt_cpu> TraceIntelPT::GetCPUInfo() {
return *m_cpu_info;
}
+Process *TraceIntelPT::GetLiveProcess() { return m_live_process; }
+
void TraceIntelPT::DoRefreshLiveProcessState(
Expected<TraceGetStateResponse> state) {
m_thread_decoders.clear();
@@ -201,13 +198,13 @@ void TraceIntelPT::DoRefreshLiveProcessState(
Thread &thread =
*m_live_process->GetThreadList().FindThreadByID(thread_state.tid);
m_thread_decoders.emplace(
- &thread, std::make_unique<LiveThreadDecoder>(thread, *this));
+ thread_state.tid, std::make_unique<LiveThreadDecoder>(thread, *this));
}
}
-bool TraceIntelPT::IsTraced(const Thread &thread) {
+bool TraceIntelPT::IsTraced(lldb::tid_t tid) {
RefreshLiveProcessState();
- return m_thread_decoders.count(&thread);
+ return m_thread_decoders.count(tid);
}
// The information here should match the description of the intel-pt section
@@ -276,7 +273,7 @@ Error TraceIntelPT::Start(size_t thread_buffer_size,
request.processBufferSizeLimit = total_buffer_size_limit;
request.enableTsc = enable_tsc;
request.psbPeriod = psb_period.map([](size_t val) { return (int64_t)val; });
- request.type = GetPluginName().AsCString();
+ request.type = GetPluginName().str();
return Trace::Start(toJSON(request));
}
@@ -310,7 +307,7 @@ llvm::Error TraceIntelPT::Start(llvm::ArrayRef<lldb::tid_t> tids,
request.threadBufferSize = thread_buffer_size;
request.enableTsc = enable_tsc;
request.psbPeriod = psb_period.map([](size_t val) { return (int64_t)val; });
- request.type = GetPluginName().AsCString();
+ request.type = GetPluginName().str();
request.tids.emplace();
for (lldb::tid_t tid : tids)
request.tids->push_back(tid);
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
index e3b247112ae1..a6ecf6f906b2 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
@@ -11,6 +11,9 @@
#include "IntelPTDecoder.h"
#include "TraceIntelPTSessionFileParser.h"
+#include "lldb/Utility/FileSpec.h"
+#include "lldb/lldb-types.h"
+#include "llvm/Support/raw_ostream.h"
namespace lldb_private {
namespace trace_intel_pt {
@@ -19,11 +22,13 @@ class TraceIntelPT : public Trace {
public:
void Dump(Stream *s) const override;
+ llvm::Error SaveLiveTraceToDisk(FileSpec directory) override;
+
~TraceIntelPT() override = default;
/// PluginInterface protocol
/// \{
- ConstString GetPluginName() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
static void Initialize();
@@ -52,9 +57,7 @@ public:
static llvm::Expected<lldb::TraceSP>
CreateInstanceForLiveProcess(Process &process);
- static ConstString GetPluginNameStatic();
-
- uint32_t GetPluginVersion() override;
+ static llvm::StringRef GetPluginNameStatic() { return "intel-pt"; }
/// \}
lldb::CommandObjectSP
@@ -74,7 +77,7 @@ public:
void DoRefreshLiveProcessState(
llvm::Expected<TraceGetStateResponse> state) override;
- bool IsTraced(const Thread &thread) override;
+ bool IsTraced(lldb::tid_t tid) override;
const char *GetStartConfigurationHelp() override;
@@ -139,6 +142,13 @@ public:
llvm::Expected<pt_cpu> GetCPUInfo();
+ /// Get the current traced live process.
+ ///
+ /// \return
+ /// The current traced live process. If it's not a live process,
+ /// return \a nullptr.
+ Process *GetLiveProcess();
+
private:
friend class TraceIntelPTSessionFileParser;
@@ -170,7 +180,7 @@ private:
/// It is provided by either a session file or a live process' "cpuInfo"
/// binary data.
llvm::Optional<pt_cpu> m_cpu_info;
- std::map<const Thread *, std::unique_ptr<ThreadDecoder>> m_thread_decoders;
+ std::map<lldb::tid_t, std::unique_ptr<ThreadDecoder>> m_thread_decoders;
/// Error gotten after a failed live process update, if any.
llvm::Optional<std::string> m_live_refresh_error;
};
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp
new file mode 100644
index 000000000000..e36751e235dc
--- /dev/null
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp
@@ -0,0 +1,59 @@
+//===-- TraceIntelPTJSONStructs.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TraceIntelPTJSONStructs.h"
+
+#include "llvm/Support/JSON.h"
+#include <string>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::trace_intel_pt;
+using namespace llvm;
+
+namespace llvm {
+namespace json {
+
+bool fromJSON(const Value &value, JSONTraceIntelPTSettings &plugin_settings,
+ Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("cpuInfo", plugin_settings.cpuInfo) &&
+ fromJSON(value, (JSONTracePluginSettings &)plugin_settings, path);
+}
+
+bool fromJSON(const json::Value &value, JSONTraceIntelPTCPUInfo &cpu_info,
+ Path path) {
+ ObjectMapper o(value, path);
+ return o && o.map("vendor", cpu_info.vendor) &&
+ o.map("family", cpu_info.family) && o.map("model", cpu_info.model) &&
+ o.map("stepping", cpu_info.stepping);
+}
+
+Value toJSON(const JSONTraceIntelPTCPUInfo &cpu_info) {
+ return Value(Object{{"family", cpu_info.family},
+ {"model", cpu_info.model},
+ {"stepping", cpu_info.stepping},
+ {"vendor", cpu_info.vendor}});
+}
+
+llvm::json::Value toJSON(const JSONTraceIntelPTTrace &trace) {
+ llvm::json::Object json_trace;
+ json_trace["type"] = trace.type;
+ json_trace["cpuInfo"] = toJSON(trace.cpuInfo);
+ return std::move(json_trace);
+}
+
+llvm::json::Value toJSON(const JSONTraceIntelPTSession &session) {
+ llvm::json::Object json_session;
+ json_session["trace"] = toJSON(session.ipt_trace);
+ json_session["processes"] = toJSON(session.session_base);
+ return std::move(json_session);
+}
+
+} // namespace json
+} // namespace llvm
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.h
new file mode 100644
index 000000000000..ec024f27b8c9
--- /dev/null
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.h
@@ -0,0 +1,75 @@
+//===-- TraceIntelPTJSONStructs.h -----------------------------*- C++ //-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTJSONSTRUCTS_H
+#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTJSONSTRUCTS_H
+
+#include "../common/TraceJSONStructs.h"
+#include <intel-pt.h>
+
+namespace lldb_private {
+namespace trace_intel_pt {
+
+struct JSONTraceIntelPTCPUInfo {
+ JSONTraceIntelPTCPUInfo() = default;
+
+ JSONTraceIntelPTCPUInfo(pt_cpu cpu_info) {
+ family = static_cast<int64_t>(cpu_info.family);
+ model = static_cast<int64_t>(cpu_info.model);
+ stepping = static_cast<int64_t>(cpu_info.stepping);
+ vendor = cpu_info.vendor == pcv_intel ? "intel" : "Unknown";
+ }
+
+ int64_t family;
+ int64_t model;
+ int64_t stepping;
+ std::string vendor;
+};
+
+struct JSONTraceIntelPTTrace {
+ std::string type;
+ JSONTraceIntelPTCPUInfo cpuInfo;
+};
+
+struct JSONTraceIntelPTSession {
+ JSONTraceIntelPTTrace ipt_trace;
+ JSONTraceSessionBase session_base;
+};
+
+struct JSONTraceIntelPTSettings : JSONTracePluginSettings {
+ JSONTraceIntelPTCPUInfo cpuInfo;
+};
+
+} // namespace trace_intel_pt
+} // namespace lldb_private
+
+namespace llvm {
+namespace json {
+
+bool fromJSON(
+ const Value &value,
+ lldb_private::trace_intel_pt::JSONTraceIntelPTSettings &plugin_settings,
+ Path path);
+
+bool fromJSON(const llvm::json::Value &value,
+ lldb_private::trace_intel_pt::JSONTraceIntelPTCPUInfo &packet,
+ llvm::json::Path path);
+
+llvm::json::Value
+toJSON(const lldb_private::trace_intel_pt::JSONTraceIntelPTCPUInfo &cpu_info);
+
+llvm::json::Value
+toJSON(const lldb_private::trace_intel_pt::JSONTraceIntelPTTrace &trace);
+
+llvm::json::Value
+toJSON(const lldb_private::trace_intel_pt::JSONTraceIntelPTSession &session);
+
+} // namespace json
+} // namespace llvm
+
+#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTJSONSTRUCTS_H
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td
index 9e8cab1ee5c4..714a13b6e5e0 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTOptions.td
@@ -72,3 +72,13 @@ let Command = "process trace start intel pt" in {
"packets as: 2 ^ (value + 11), e.g. value 3 means 16KiB between PSB "
"packets. Defaults to 0 if supported.">;
}
+
+let Command = "process trace save intel pt" in {
+ def process_trace_save_intel_directory: Option<"directory", "d">,
+ Group<1>,
+ Arg<"Value">, Required,
+ Desc<"This value defines the directory where the trace will be saved."
+ "It will be created if it does not exist. It will also create a "
+ "trace files with the trace data and a trace.json with the main "
+ "properties of the trace session.">;
+}
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
index 5af7c269d0cb..7e2c39a20255 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
@@ -9,10 +9,7 @@
#include "TraceIntelPTSessionFileParser.h"
#include "../common/ThreadPostMortemTrace.h"
-#include "lldb/Core/Debugger.h"
-#include "lldb/Target/Process.h"
-#include "lldb/Target/Target.h"
-#include "lldb/Target/ThreadList.h"
+#include "TraceIntelPT.h"
using namespace lldb;
using namespace lldb_private;
@@ -59,7 +56,7 @@ TraceSP TraceIntelPTSessionFileParser::CreateTraceIntelPTInstance(
Expected<TraceSP> TraceIntelPTSessionFileParser::Parse() {
json::Path::Root root("traceSession");
- TraceSessionFileParser::JSONTraceSession<JSONTraceIntelPTSettings> session;
+ JSONTraceSession<JSONTraceIntelPTSettings> session;
if (!json::fromJSON(m_trace_session_file, session, root))
return CreateJSONError(root, m_trace_session_file);
@@ -70,38 +67,3 @@ Expected<TraceSP> TraceIntelPTSessionFileParser::Parse() {
else
return parsed_processes.takeError();
}
-
-namespace llvm {
-namespace json {
-
-bool fromJSON(
- const Value &value,
- TraceIntelPTSessionFileParser::JSONTraceIntelPTSettings &plugin_settings,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("cpuInfo", plugin_settings.cpuInfo) &&
- fromJSON(
- value,
- (TraceSessionFileParser::JSONTracePluginSettings &)plugin_settings,
- path);
-}
-
-bool fromJSON(const json::Value &value,
- TraceIntelPTSessionFileParser::JSONTraceIntelPTCPUInfo &cpu_info,
- Path path) {
- ObjectMapper o(value, path);
- return o && o.map("vendor", cpu_info.vendor) &&
- o.map("family", cpu_info.family) && o.map("model", cpu_info.model) &&
- o.map("stepping", cpu_info.stepping);
-}
-
-Value toJSON(
- const TraceIntelPTSessionFileParser::JSONTraceIntelPTCPUInfo &cpu_info) {
- return Value(Object{{"family", cpu_info.family},
- {"model", cpu_info.model},
- {"stepping", cpu_info.stepping},
- {"vendor", cpu_info.vendor}});
-}
-
-} // namespace json
-} // namespace llvm
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.h
index b2667a882222..34883d3cf300 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.h
@@ -9,9 +9,9 @@
#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTSESSIONFILEPARSER_H
#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTSESSIONFILEPARSER_H
-#include "TraceIntelPT.h"
-
#include "../common/TraceSessionFileParser.h"
+#include "TraceIntelPT.h"
+#include "TraceIntelPTJSONStructs.h"
namespace lldb_private {
namespace trace_intel_pt {
@@ -20,17 +20,6 @@ class TraceIntelPT;
class TraceIntelPTSessionFileParser : public TraceSessionFileParser {
public:
- struct JSONTraceIntelPTCPUInfo {
- int64_t family;
- int64_t model;
- int64_t stepping;
- std::string vendor;
- };
-
- struct JSONTraceIntelPTSettings
- : TraceSessionFileParser::JSONTracePluginSettings {
- JSONTraceIntelPTCPUInfo cpuInfo;
- };
/// See \a TraceSessionFileParser::TraceSessionFileParser for the description
/// of these fields.
@@ -65,24 +54,5 @@ private:
} // namespace trace_intel_pt
} // namespace lldb_private
-namespace llvm {
-namespace json {
-
-bool fromJSON(const Value &value,
- lldb_private::trace_intel_pt::TraceIntelPTSessionFileParser::
- JSONTraceIntelPTSettings &plugin_settings,
- Path path);
-
-bool fromJSON(const llvm::json::Value &value,
- lldb_private::trace_intel_pt::TraceIntelPTSessionFileParser::
- JSONTraceIntelPTCPUInfo &packet,
- llvm::json::Path path);
-
-llvm::json::Value
-toJSON(const lldb_private::trace_intel_pt::TraceIntelPTSessionFileParser::
- JSONTraceIntelPTCPUInfo &packet);
-
-} // namespace json
-} // namespace llvm
#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTSESSIONFILEPARSER_H
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
new file mode 100644
index 000000000000..a8d03db1c25c
--- /dev/null
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp
@@ -0,0 +1,79 @@
+//===-- TraceIntelPTSessionSaver.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TraceIntelPTSessionSaver.h"
+#include "../common/TraceSessionSaver.h"
+#include "TraceIntelPT.h"
+#include "TraceIntelPTJSONStructs.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Core/ModuleList.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/SectionLoadList.h"
+#include "lldb/Target/Target.h"
+#include "lldb/Target/ThreadList.h"
+#include "lldb/lldb-types.h"
+#include "llvm/ADT/None.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/JSON.h"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::trace_intel_pt;
+using namespace llvm;
+
+llvm::Error TraceIntelPTSessionSaver::SaveToDisk(TraceIntelPT &trace_ipt,
+ FileSpec directory) {
+ Process *live_process = trace_ipt.GetLiveProcess();
+ if (live_process == nullptr)
+ return createStringError(inconvertibleErrorCode(),
+ "Saving a trace requires a live process.");
+
+ if (std::error_code ec =
+ sys::fs::create_directories(directory.GetPath().c_str()))
+ return llvm::errorCodeToError(ec);
+
+ llvm::Expected<JSONTraceIntelPTTrace> json_intel_pt_trace =
+ BuildTraceSection(trace_ipt);
+ if (!json_intel_pt_trace)
+ return json_intel_pt_trace.takeError();
+
+ llvm::Expected<JSONTraceSessionBase> json_session_description =
+ TraceSessionSaver::BuildProcessesSection(
+ *live_process,
+ [&](lldb::tid_t tid)
+ -> llvm::Expected<llvm::Optional<std::vector<uint8_t>>> {
+ if (!trace_ipt.IsTraced(tid))
+ return None;
+ return trace_ipt.GetLiveThreadBuffer(tid);
+ },
+ directory);
+
+ if (!json_session_description)
+ return json_session_description.takeError();
+
+ JSONTraceIntelPTSession json_intel_pt_session{json_intel_pt_trace.get(),
+ json_session_description.get()};
+
+ return TraceSessionSaver::WriteSessionToFile(
+ llvm::json::toJSON(json_intel_pt_session), directory);
+}
+
+llvm::Expected<JSONTraceIntelPTTrace>
+TraceIntelPTSessionSaver::BuildTraceSection(TraceIntelPT &trace_ipt) {
+ llvm::Expected<pt_cpu> cpu_info = trace_ipt.GetCPUInfo();
+ if (!cpu_info)
+ return cpu_info.takeError();
+
+ return JSONTraceIntelPTTrace{"intel-pt",
+ JSONTraceIntelPTCPUInfo(cpu_info.get())};
+}
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.h
new file mode 100644
index 000000000000..943519f959e9
--- /dev/null
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.h
@@ -0,0 +1,57 @@
+//===-- TraceIntelPTSessionSaver.h ---------------------------*- C++ //-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTSESSIONSAVER_H
+#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTSESSIONSAVER_H
+
+#include "TraceIntelPT.h"
+
+#include "../common/TraceJSONStructs.h"
+
+namespace lldb_private {
+namespace trace_intel_pt {
+
+class TraceIntelPT;
+
+class TraceIntelPTSessionSaver {
+
+public:
+ /// Save the Intel PT trace of a live process to the specified directory,
+ /// which will be created if needed. This will also create a file
+ /// \a <directory>/trace.json with the main properties of the trace
+ /// session, along with others files which contain the actual trace data.
+ /// The trace.json file can be used later as input for the "trace load"
+ /// command to load the trace in LLDB.
+ ///
+ /// \param[in] trace_ipt
+ /// The Intel PT trace to be saved to disk.
+ ///
+ /// \param[in] directory
+ /// The directory where the trace files will be saved.
+ ///
+ /// \return
+ /// \a llvm::success if the operation was successful, or an \a llvm::Error
+ /// otherwise.
+ llvm::Error SaveToDisk(TraceIntelPT &trace_ipt, FileSpec directory);
+
+private:
+ /// Build trace section of the intel-pt trace session description file.
+ ///
+ /// \param[in] trace_ipt
+ /// The Intel PT trace.
+ ///
+ /// \return
+ /// The trace section an \a llvm::Error in case of failures.
+ llvm::Expected<JSONTraceIntelPTTrace>
+ BuildTraceSection(TraceIntelPT &trace_ipt);
+};
+
+} // namespace trace_intel_pt
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTSESSIONSAVER_H
diff --git a/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp b/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp
new file mode 100644
index 000000000000..d29445cc004f
--- /dev/null
+++ b/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp
@@ -0,0 +1,485 @@
+//===-- TraceHTR.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TraceHTR.h"
+
+#include "lldb/Symbol/Function.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Target.h"
+#include "llvm/Support/JSON.h"
+#include <sstream>
+#include <string>
+
+using namespace lldb_private;
+using namespace lldb;
+
+size_t HTRBlockMetadata::GetNumInstructions() const {
+ return m_num_instructions;
+}
+
+llvm::Optional<llvm::StringRef>
+HTRBlockMetadata::GetMostFrequentlyCalledFunction() const {
+ size_t max_ncalls = 0;
+ llvm::Optional<llvm::StringRef> max_name = llvm::None;
+ for (const auto &it : m_func_calls) {
+ ConstString name = it.first;
+ size_t ncalls = it.second;
+ if (ncalls > max_ncalls) {
+ max_ncalls = ncalls;
+ max_name = name.GetStringRef();
+ }
+ }
+ return max_name;
+}
+
+llvm::DenseMap<ConstString, size_t> const &
+HTRBlockMetadata::GetFunctionCalls() const {
+ return m_func_calls;
+}
+
+lldb::addr_t HTRBlockMetadata::GetFirstInstructionLoadAddress() const {
+ return m_first_instruction_load_address;
+}
+
+size_t HTRBlock::GetOffset() const { return m_offset; }
+
+size_t HTRBlock::GetSize() const { return m_size; }
+
+HTRBlockMetadata const &HTRBlock::GetMetadata() const { return m_metadata; }
+
+llvm::ArrayRef<HTRBlockLayerUP> TraceHTR::GetBlockLayers() const {
+ return m_block_layer_ups;
+}
+
+HTRInstructionLayer const &TraceHTR::GetInstructionLayer() const {
+ return *m_instruction_layer_up;
+}
+
+void TraceHTR::AddNewBlockLayer(HTRBlockLayerUP &&block_layer) {
+ m_block_layer_ups.emplace_back(std::move(block_layer));
+}
+
+size_t IHTRLayer::GetLayerId() const { return m_layer_id; }
+
+void HTRBlockLayer::AppendNewBlock(size_t block_id, HTRBlock &&block) {
+ m_block_id_trace.emplace_back(block_id);
+ m_block_defs.emplace(block_id, block);
+}
+
+void HTRBlockLayer::AppendRepeatedBlock(size_t block_id) {
+ m_block_id_trace.emplace_back(block_id);
+}
+
+llvm::ArrayRef<lldb::addr_t> HTRInstructionLayer::GetInstructionTrace() const {
+ return m_instruction_trace;
+}
+
+void HTRInstructionLayer::AddCallInstructionMetadata(
+ lldb::addr_t load_addr, llvm::Optional<ConstString> func_name) {
+ m_call_isns.emplace(load_addr, func_name);
+}
+
+void HTRInstructionLayer::AppendInstruction(lldb::addr_t load_addr) {
+ m_instruction_trace.emplace_back(load_addr);
+}
+
+HTRBlock const *HTRBlockLayer::GetBlockById(size_t block_id) const {
+ auto block_it = m_block_defs.find(block_id);
+ if (block_it == m_block_defs.end())
+ return nullptr;
+ else
+ return &block_it->second;
+}
+
+llvm::ArrayRef<size_t> HTRBlockLayer::GetBlockIdTrace() const {
+ return m_block_id_trace;
+}
+
+size_t HTRBlockLayer::GetNumUnits() const { return m_block_id_trace.size(); }
+
+HTRBlockMetadata HTRInstructionLayer::GetMetadataByIndex(size_t index) const {
+ lldb::addr_t instruction_load_address = m_instruction_trace[index];
+ llvm::DenseMap<ConstString, size_t> func_calls;
+
+ auto func_name_it = m_call_isns.find(instruction_load_address);
+ if (func_name_it != m_call_isns.end()) {
+ if (llvm::Optional<ConstString> func_name = func_name_it->second) {
+ func_calls[*func_name] = 1;
+ }
+ }
+ return {instruction_load_address, 1, std::move(func_calls)};
+}
+
+size_t HTRInstructionLayer::GetNumUnits() const {
+ return m_instruction_trace.size();
+}
+
+HTRBlockMetadata HTRBlockLayer::GetMetadataByIndex(size_t index) const {
+ size_t block_id = m_block_id_trace[index];
+ HTRBlock block = m_block_defs.find(block_id)->second;
+ return block.GetMetadata();
+}
+
+TraceHTR::TraceHTR(Thread &thread, TraceCursor &cursor)
+ : m_instruction_layer_up(std::make_unique<HTRInstructionLayer>(0)) {
+
+ // Move cursor to the first instruction in the trace
+ cursor.SetForwards(true);
+ cursor.Seek(0, TraceCursor::SeekType::Set);
+
+ Target &target = thread.GetProcess()->GetTarget();
+ auto function_name_from_load_address =
+ [&](lldb::addr_t load_address) -> llvm::Optional<ConstString> {
+ lldb_private::Address pc_addr;
+ SymbolContext sc;
+ if (target.ResolveLoadAddress(load_address, pc_addr) &&
+ pc_addr.CalculateSymbolContext(&sc))
+ return sc.GetFunctionName()
+ ? llvm::Optional<ConstString>(sc.GetFunctionName())
+ : llvm::None;
+ else
+ return llvm::None;
+ };
+
+ bool more_data_in_trace = true;
+ while (more_data_in_trace) {
+ if (cursor.IsError()) {
+ // Append a load address of 0 for all instructions that an error occured
+ // while decoding.
+ // TODO: Make distinction between errors by storing the error messages.
+ // Currently, all errors are treated the same.
+ m_instruction_layer_up->AppendInstruction(0);
+ more_data_in_trace = cursor.Next();
+ } else {
+ lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress();
+ lldb::TraceInstructionControlFlowType current_instruction_type =
+ cursor.GetInstructionControlFlowType();
+
+ m_instruction_layer_up->AppendInstruction(
+ current_instruction_load_address);
+ more_data_in_trace = cursor.Next();
+ if (current_instruction_type &
+ lldb::eTraceInstructionControlFlowTypeCall) {
+ if (more_data_in_trace && !cursor.IsError()) {
+ m_instruction_layer_up->AddCallInstructionMetadata(
+ current_instruction_load_address,
+ function_name_from_load_address(cursor.GetLoadAddress()));
+ } else {
+ // Next instruction is not known - pass None to indicate the name
+ // of the function being called is not known
+ m_instruction_layer_up->AddCallInstructionMetadata(
+ current_instruction_load_address, llvm::None);
+ }
+ }
+ }
+ }
+}
+
+void HTRBlockMetadata::MergeMetadata(
+ HTRBlockMetadata &merged_metadata,
+ HTRBlockMetadata const &metadata_to_merge) {
+ merged_metadata.m_num_instructions += metadata_to_merge.m_num_instructions;
+ for (const auto &it : metadata_to_merge.m_func_calls) {
+ ConstString name = it.first;
+ size_t num_calls = it.second;
+ merged_metadata.m_func_calls[name] += num_calls;
+ }
+}
+
+HTRBlock IHTRLayer::MergeUnits(size_t start_unit_index, size_t num_units) {
+ // TODO: make this function take `end_unit_index` as a parameter instead of
+ // unit and merge the range [start_unit_indx, end_unit_index] inclusive.
+ HTRBlockMetadata merged_metadata = GetMetadataByIndex(start_unit_index);
+ for (size_t i = start_unit_index + 1; i < start_unit_index + num_units; i++) {
+ // merge the new metadata into merged_metadata
+ HTRBlockMetadata::MergeMetadata(merged_metadata, GetMetadataByIndex(i));
+ }
+ return {start_unit_index, num_units, merged_metadata};
+}
+
+void TraceHTR::ExecutePasses() {
+ auto are_passes_done = [](IHTRLayer &l1, IHTRLayer &l2) {
+ return l1.GetNumUnits() == l2.GetNumUnits();
+ };
+ HTRBlockLayerUP current_block_layer_up =
+ BasicSuperBlockMerge(*m_instruction_layer_up);
+ HTRBlockLayer &current_block_layer = *current_block_layer_up;
+ if (are_passes_done(*m_instruction_layer_up, *current_block_layer_up))
+ return;
+
+ AddNewBlockLayer(std::move(current_block_layer_up));
+ while (true) {
+ HTRBlockLayerUP new_block_layer_up =
+ BasicSuperBlockMerge(current_block_layer);
+ if (are_passes_done(current_block_layer, *new_block_layer_up))
+ return;
+
+ current_block_layer = *new_block_layer_up;
+ AddNewBlockLayer(std::move(new_block_layer_up));
+ }
+}
+
+llvm::Error TraceHTR::Export(std::string outfile) {
+ std::error_code ec;
+ llvm::raw_fd_ostream os(outfile, ec, llvm::sys::fs::OF_Text);
+ if (ec) {
+ return llvm::make_error<llvm::StringError>(
+ "unable to open destination file: " + outfile, os.error());
+ } else {
+ os << toJSON(*this);
+ os.close();
+ if (os.has_error()) {
+ return llvm::make_error<llvm::StringError>(
+ "unable to write to destination file: " + outfile, os.error());
+ }
+ }
+ return llvm::Error::success();
+}
+
+HTRBlockLayerUP lldb_private::BasicSuperBlockMerge(IHTRLayer &layer) {
+ std::unique_ptr<HTRBlockLayer> new_block_layer =
+ std::make_unique<HTRBlockLayer>(layer.GetLayerId() + 1);
+
+ if (layer.GetNumUnits()) {
+ // Future Improvement: split this into two functions - one for finding heads
+ // and tails, one for merging/creating the next layer A 'head' is defined to
+ // be a block whose occurrences in the trace do not have a unique preceding
+ // block.
+ std::unordered_set<size_t> heads;
+
+ // The load address of the first instruction of a block is the unique ID for
+ // that block (i.e. blocks with the same first instruction load address are
+ // the same block)
+
+ // Future Improvement: no need to store all its preceding block ids, all we
+ // care about is that there is more than one preceding block id, so an enum
+ // could be used
+ std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> head_map;
+ lldb::addr_t prev_id =
+ layer.GetMetadataByIndex(0).GetFirstInstructionLoadAddress();
+ size_t num_units = layer.GetNumUnits();
+ // This excludes the first unit since it has no previous unit
+ for (size_t i = 1; i < num_units; i++) {
+ lldb::addr_t current_id =
+ layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
+ head_map[current_id].insert(prev_id);
+ prev_id = current_id;
+ }
+ for (const auto &it : head_map) {
+ // ID of 0 represents an error - errors can't be heads or tails
+ lldb::addr_t id = it.first;
+ const std::unordered_set<lldb::addr_t> predecessor_set = it.second;
+ if (id && predecessor_set.size() > 1)
+ heads.insert(id);
+ }
+
+ // Future Improvement: identify heads and tails in the same loop
+ // A 'tail' is defined to be a block whose occurrences in the trace do
+ // not have a unique succeeding block.
+ std::unordered_set<lldb::addr_t> tails;
+ std::unordered_map<lldb::addr_t, std::unordered_set<lldb::addr_t>> tail_map;
+
+ // This excludes the last unit since it has no next unit
+ for (size_t i = 0; i < num_units - 1; i++) {
+ lldb::addr_t current_id =
+ layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
+ lldb::addr_t next_id =
+ layer.GetMetadataByIndex(i + 1).GetFirstInstructionLoadAddress();
+ tail_map[current_id].insert(next_id);
+ }
+
+ // Mark last block as tail so the algorithm stops gracefully
+ lldb::addr_t last_id = layer.GetMetadataByIndex(num_units - 1)
+ .GetFirstInstructionLoadAddress();
+ tails.insert(last_id);
+ for (const auto &it : tail_map) {
+ lldb::addr_t id = it.first;
+ const std::unordered_set<lldb::addr_t> successor_set = it.second;
+ // ID of 0 represents an error - errors can't be heads or tails
+ if (id && successor_set.size() > 1)
+ tails.insert(id);
+ }
+
+ // Need to keep track of size of string since things we push are variable
+ // length
+ size_t superblock_size = 0;
+ // Each super block always has the same first unit (we call this the
+ // super block head) This gurantee allows us to use the super block head as
+ // the unique key mapping to the super block it begins
+ llvm::Optional<size_t> superblock_head = llvm::None;
+ auto construct_next_layer = [&](size_t merge_start, size_t n) -> void {
+ if (!superblock_head)
+ return;
+ if (new_block_layer->GetBlockById(*superblock_head)) {
+ new_block_layer->AppendRepeatedBlock(*superblock_head);
+ } else {
+ HTRBlock new_block = layer.MergeUnits(merge_start, n);
+ new_block_layer->AppendNewBlock(*superblock_head, std::move(new_block));
+ }
+ };
+
+ for (size_t i = 0; i < num_units; i++) {
+ lldb::addr_t unit_id =
+ layer.GetMetadataByIndex(i).GetFirstInstructionLoadAddress();
+ auto isHead = heads.count(unit_id) > 0;
+ auto isTail = tails.count(unit_id) > 0;
+
+ if (isHead && isTail) {
+ // Head logic
+ if (superblock_size) { // this handles (tail, head) adjacency -
+ // otherwise an empty
+ // block is created
+ // End previous super block
+ construct_next_layer(i - superblock_size, superblock_size);
+ }
+ // Current id is first in next super block since it's a head
+ superblock_head = unit_id;
+ superblock_size = 1;
+
+ // Tail logic
+ construct_next_layer(i - superblock_size + 1, superblock_size);
+ // Reset the block_head since the prev super block has come to and end
+ superblock_head = llvm::None;
+ superblock_size = 0;
+ } else if (isHead) {
+ if (superblock_size) { // this handles (tail, head) adjacency -
+ // otherwise an empty
+ // block is created
+ // End previous super block
+ construct_next_layer(i - superblock_size, superblock_size);
+ }
+ // Current id is first in next super block since it's a head
+ superblock_head = unit_id;
+ superblock_size = 1;
+ } else if (isTail) {
+ if (!superblock_head)
+ superblock_head = unit_id;
+ superblock_size++;
+
+ // End previous super block
+ construct_next_layer(i - superblock_size + 1, superblock_size);
+ // Reset the block_head since the prev super block has come to and end
+ superblock_head = llvm::None;
+ superblock_size = 0;
+ } else {
+ if (!superblock_head)
+ superblock_head = unit_id;
+ superblock_size++;
+ }
+ }
+ }
+ return new_block_layer;
+}
+
+llvm::json::Value lldb_private::toJSON(const TraceHTR &htr) {
+ std::vector<llvm::json::Value> layers_as_json;
+ for (size_t i = 0; i < htr.GetInstructionLayer().GetInstructionTrace().size();
+ i++) {
+ size_t layer_id = htr.GetInstructionLayer().GetLayerId();
+ HTRBlockMetadata metadata = htr.GetInstructionLayer().GetMetadataByIndex(i);
+ lldb::addr_t load_address = metadata.GetFirstInstructionLoadAddress();
+
+ std::string display_name;
+
+ std::stringstream stream;
+ stream << "0x" << std::hex << load_address;
+ std::string load_address_hex_string(stream.str());
+ display_name.assign(load_address_hex_string);
+
+ // name: load address of the first instruction of the block and the name
+ // of the most frequently called function from the block (if applicable)
+
+ // ph: the event type - 'X' for Complete events (see link to documentation
+ // below)
+
+ // Since trace timestamps aren't yet supported in HTR, the ts (timestamp) is
+ // based on the instruction's offset in the trace and the dur (duration) is
+ // 1 since this layer contains single instructions. Using the instruction
+ // offset and a duration of 1 oversimplifies the true timing information of
+ // the trace, nonetheless, these approximate timestamps/durations provide an
+ // clear visualization of the trace.
+
+ // ts: offset from the beginning of the trace for the first instruction in
+ // the block
+
+ // dur: 1 since this layer contains single instructions.
+
+ // pid: the ID of the HTR layer the blocks belong to
+
+ // See
+ // https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.j75x71ritcoy
+ // for documentation on the Trace Event Format
+ layers_as_json.emplace_back(llvm::json::Object{
+ {"name", display_name},
+ {"ph", "X"},
+ {"ts", (int64_t)i},
+ {"dur", 1},
+ {"pid", (int64_t)layer_id},
+ });
+ }
+
+ for (const auto &layer : htr.GetBlockLayers()) {
+ size_t start_ts = 0;
+ std::vector<size_t> block_id_trace = layer->GetBlockIdTrace();
+ for (size_t i = 0; i < block_id_trace.size(); i++) {
+ size_t id = block_id_trace[i];
+ // Guranteed that this ID is valid, so safe to dereference here.
+ HTRBlock block = *layer->GetBlockById(id);
+ llvm::json::Value block_json = toJSON(block);
+ size_t layer_id = layer->GetLayerId();
+
+ HTRBlockMetadata metadata = block.GetMetadata();
+
+ llvm::Optional<llvm::StringRef> most_freq_func =
+ metadata.GetMostFrequentlyCalledFunction();
+ std::stringstream stream;
+ stream << "0x" << std::hex << metadata.GetFirstInstructionLoadAddress();
+ std::string offset_hex_string(stream.str());
+ std::string display_name =
+ most_freq_func ? offset_hex_string + ": " + most_freq_func->str()
+ : offset_hex_string;
+
+ // Since trace timestamps aren't yet supported in HTR, the ts (timestamp)
+ // and dur (duration) are based on the block's offset in the trace and
+ // number of instructions in the block, respectively. Using the block
+ // offset and the number of instructions oversimplifies the true timing
+ // information of the trace, nonetheless, these approximate
+ // timestamps/durations provide an understandable visualization of the
+ // trace.
+ auto duration = metadata.GetNumInstructions();
+ layers_as_json.emplace_back(llvm::json::Object{
+ {"name", display_name},
+ {"ph", "X"},
+ {"ts", (int64_t)start_ts},
+ {"dur", (int64_t)duration},
+ {"pid", (int64_t)layer_id},
+ {"args", block_json},
+ });
+ start_ts += duration;
+ }
+ }
+ return layers_as_json;
+}
+
+llvm::json::Value lldb_private::toJSON(const HTRBlock &block) {
+ return llvm::json::Value(
+ llvm::json::Object{{"Metadata", block.GetMetadata()}});
+}
+
+llvm::json::Value lldb_private::toJSON(const HTRBlockMetadata &metadata) {
+ std::vector<llvm::json::Value> function_calls;
+ for (const auto &it : metadata.GetFunctionCalls()) {
+ ConstString name = it.first;
+ size_t n_calls = it.second;
+ function_calls.emplace_back(llvm::formatv("({0}: {1})", name, n_calls));
+ }
+
+ return llvm::json::Value(llvm::json::Object{
+ {"Number of Instructions", (ssize_t)metadata.GetNumInstructions()},
+ {"Functions", function_calls}});
+}
diff --git a/lldb/source/Plugins/TraceExporter/common/TraceHTR.h b/lldb/source/Plugins/TraceExporter/common/TraceHTR.h
new file mode 100644
index 000000000000..03babc5a36ab
--- /dev/null
+++ b/lldb/source/Plugins/TraceExporter/common/TraceHTR.h
@@ -0,0 +1,409 @@
+//===-- TraceHTR.h --------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache
+// License v2.0 with LLVM Exceptions.// See https://llvm.org/LICENSE.txt for
+// license information.// SPDX-License-Identifier: Apache-2.0 WITH
+// LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_TRACE_HTR_H
+#define LLDB_TARGET_TRACE_HTR_H
+
+#include "lldb/Target/Thread.h"
+#include "lldb/Target/Trace.h"
+
+#include <unordered_map>
+#include <unordered_set>
+
+namespace lldb_private {
+
+/// Metadata associated with an HTR block
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+class HTRBlockMetadata {
+public:
+ /// Constructor for a block's metadata.
+ ///
+ /// \param[in] first_instruction_load_address
+ /// The load address of the block's first instruction.
+ ///
+ /// \param[in] num_instructions
+ /// The total number of instructions in the block.
+ ///
+ /// \param[in] func_calls
+ /// The map of a function name to the number of times it is called from
+ /// the block.
+ HTRBlockMetadata(lldb::addr_t first_instruction_load_address,
+ size_t num_instructions,
+ llvm::DenseMap<ConstString, size_t> &&func_calls)
+ : m_first_instruction_load_address(first_instruction_load_address),
+ m_num_instructions(num_instructions), m_func_calls(func_calls) {}
+
+ /// Merge two \a HTRBlockMetadata in place.
+ ///
+ /// \param[in][out] merged_metadata
+ /// Metadata that metadata_to_merge will be merged into.
+ ///
+ /// \param[in] metadata_to_merge
+ /// Metadata to merge into merged_metadata.
+ static void MergeMetadata(HTRBlockMetadata &merged_metadata,
+ HTRBlockMetadata const &metadata_to_merge);
+ /// Get the number of instructions in the block.
+ ///
+ /// \return
+ /// The number of instructions in the block.
+ size_t GetNumInstructions() const;
+
+ /// Get the name of the most frequently called function from the block.
+ ///
+ /// \return
+ /// The name of the function that is called the most from this block or
+ /// None if no function is called from this block.
+ llvm::Optional<llvm::StringRef> GetMostFrequentlyCalledFunction() const;
+
+ /// Get the load address of the first instruction in the block.
+ ///
+ /// \return
+ /// The load address of the first instruction in the block.
+ lldb::addr_t GetFirstInstructionLoadAddress() const;
+
+ /// Get the function calls map for the block.
+ /// Function calls are identified in the instruction layer by finding 'call'
+ /// instructions and determining the function they are calling. As these
+ /// instructions are merged into blocks, we merge these different function
+ /// calls into a single map containing the function names to the number of
+ /// times it is called from this block.
+ ///
+ /// \return
+ /// The mapping of function name to the number of times it is called from
+ /// this block.
+ llvm::DenseMap<ConstString, size_t> const &GetFunctionCalls() const;
+
+private:
+ lldb::addr_t m_first_instruction_load_address;
+ size_t m_num_instructions;
+ llvm::DenseMap<ConstString, size_t> m_func_calls;
+};
+
+/// Block structure representing a sequence of trace "units" (ie instructions).
+/// Sequences of blocks are merged to create a new, single block
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+class HTRBlock {
+public:
+ /// Constructor for a block of an HTR layer.
+ ///
+ /// \param[in] offset
+ /// The offset of the start of this block in the previous layer.
+ ///
+ /// \param[in] size
+ /// Number of blocks/instructions that make up this block in the previous
+ /// layer.
+ ///
+ /// \param[in] metadata
+ /// General metadata for this block.
+ HTRBlock(size_t offset, size_t size, HTRBlockMetadata metadata)
+ : m_offset(offset), m_size(size), m_metadata(metadata) {}
+
+ /// Get the offset of the start of this block in the previous layer.
+ ///
+ /// \return
+ /// The offset of the block.
+ size_t GetOffset() const;
+
+ /// Get the number of blocks/instructions that make up this block in the
+ /// previous layer.
+ ///
+ /// \return
+ /// The size of the block.
+ size_t GetSize() const;
+
+ /// Get the metadata for this block.
+ ///
+ /// \return
+ /// The metadata of the block.
+ HTRBlockMetadata const &GetMetadata() const;
+
+private:
+ /// Offset in the previous layer
+ size_t m_offset;
+ /// Number of blocks/instructions that make up this block in the previous
+ /// layer
+ size_t m_size;
+ /// General metadata for this block
+ HTRBlockMetadata m_metadata;
+};
+
+/// HTR layer interface
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+class IHTRLayer {
+public:
+ /// Construct new HTR layer.
+ //
+ /// \param[in] id
+ /// The layer's id.
+ IHTRLayer(size_t id) : m_layer_id(id) {}
+
+ /// Get the ID of the layer.
+ ///
+ /// \return
+ /// The layer ID of this layer.
+ size_t GetLayerId() const;
+
+ /// Get the metadata of a unit (instruction or block) in the layer.
+ ///
+ /// \param[in] index
+ /// The position of the unit in the layer.
+ ///
+ /// \return
+ /// The metadata of the unit in the layer.
+ virtual HTRBlockMetadata GetMetadataByIndex(size_t index) const = 0;
+
+ /// Get the total number of units (instruction or block) in this layer.
+ ///
+ /// \return
+ /// The total number of units in the layer.
+ virtual size_t GetNumUnits() const = 0;
+
+ /// Creates a new block from the result of merging a contiguous sequence of
+ /// "units" (instructions or blocks depending on layer type) in this layer
+ /// This allows the implementation class to decide how to store/generate this
+ /// metadata. For example, in the case of the instruction layer we want to
+ /// lazily generate this metadata instead of storing it for each instruction.
+ ///
+ /// \param[in] start_unit_index
+ /// The index of the first unit to be merged.
+ ///
+ /// \param[in] num_units
+ /// The number of units to be merged. Must be >= 1, since merging 0 blocks
+ /// does not make sense.
+ ///
+ /// \return
+ /// A new block instance representing the merge of the specified units.
+ HTRBlock MergeUnits(size_t start_unit_index, size_t num_units);
+
+ virtual ~IHTRLayer() = default;
+
+protected:
+ /// ID of the layer.
+ size_t m_layer_id;
+};
+
+/// "Base" layer of HTR representing the dynamic instructions of the trace.
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+class HTRInstructionLayer : public IHTRLayer {
+public:
+ /// Construct new instruction layer.
+ //
+ /// \param[in] id
+ /// The layer's id.
+ HTRInstructionLayer(size_t id) : IHTRLayer(id) {}
+
+ size_t GetNumUnits() const override;
+
+ HTRBlockMetadata GetMetadataByIndex(size_t index) const override;
+
+ /// Get the dynamic instruction trace.
+ ///
+ /// \return
+ /// The dynamic instruction trace.
+ llvm::ArrayRef<lldb::addr_t> GetInstructionTrace() const;
+
+ /// Add metadata for a 'call' instruction of the trace.
+ ///
+ /// \param[in] load_addr
+ /// The load address of the 'call' instruction.
+ ///
+ /// \param[in] func_name
+ /// The name of the function the 'call' instruction is calling if it can
+ /// be determined, None otherwise.
+ void AddCallInstructionMetadata(lldb::addr_t load_addr,
+ llvm::Optional<ConstString> func_name);
+
+ /// Append the load address of an instruction to the dynamic instruction
+ /// trace.
+ ///
+ /// \param[in] load_addr
+ /// The load address of the instruction.
+ void AppendInstruction(lldb::addr_t load_addr);
+
+private:
+ // Dynamic instructions of trace are stored in chronological order.
+ std::vector<lldb::addr_t> m_instruction_trace;
+ // Only store metadata for instructions of interest (call instructions)
+ // If we stored metadata for each instruction this would be wasteful since
+ // most instructions don't contain useful metadata
+
+ // This map contains the load address of all the call instructions.
+ // load address maps to the name of the function it calls (None if function
+ // name can't be determined)
+ std::unordered_map<lldb::addr_t, llvm::Optional<ConstString>> m_call_isns;
+};
+
+/// HTR layer composed of blocks of the trace.
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+class HTRBlockLayer : public IHTRLayer {
+public:
+ /// Construct new block layer.
+ //
+ /// \param[in] id
+ /// The layer's id.
+ HTRBlockLayer(size_t id) : IHTRLayer(id) {}
+
+ size_t GetNumUnits() const override;
+
+ HTRBlockMetadata GetMetadataByIndex(size_t index) const override;
+
+ /// Get an \a HTRBlock from its block id.
+ ///
+ /// \param[in] block_id
+ /// The id of the block to retrieve.
+ ///
+ /// \return
+ /// The \a HTRBlock with the specified id, nullptr if no there is no block
+ /// in the layer with the specified block id.
+ HTRBlock const *GetBlockById(size_t block_id) const;
+
+ /// Get the block ID trace for this layer.
+ /// This block ID trace stores the block ID of each block that occured in the
+ /// trace and the block defs map maps block ID to the corresponding \a
+ /// HTRBlock.
+ ///
+ /// \return
+ /// The block ID trace for this layer.
+ llvm::ArrayRef<size_t> GetBlockIdTrace() const;
+
+ /// Appends a new block to the layer.
+ ///
+ /// \param[in] block_id
+ /// The block id of the new block.
+ ///
+ /// \param[in] block
+ /// The new \a HTRBlock to be appended to the layer. This block is moved
+ /// into the layer.
+ void AppendNewBlock(size_t block_id, HTRBlock &&block);
+
+ /// Appends a repeated block to the layer.
+ ///
+ /// \param[in] block_id
+ /// The block id of the repeated block.
+ void AppendRepeatedBlock(size_t block_id);
+
+private:
+ /// Maps a unique Block ID to the corresponding HTRBlock
+ std::unordered_map<size_t, HTRBlock> m_block_defs;
+ /// Reduce memory footprint by just storing a trace of block IDs and use
+ /// m_block_defs to map a block_id to its corresponding HTRBlock
+ std::vector<size_t> m_block_id_trace;
+};
+
+typedef std::unique_ptr<lldb_private::HTRBlockLayer> HTRBlockLayerUP;
+typedef std::unique_ptr<lldb_private::HTRInstructionLayer>
+ HTRInstructionLayerUP;
+
+/// Top-level HTR class
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+class TraceHTR {
+
+public:
+ /// Constructor for a trace's HTR.
+ ///
+ /// \param[in] thread
+ /// The thread the trace belongs to.
+ ///
+ /// \param[in] cursor
+ /// The trace cursor that gives access to the trace's contents.
+ TraceHTR(Thread &thread, TraceCursor &cursor);
+
+ /// Executes passes on the HTR layers until no further
+ /// summarization/compression is achieved
+ void ExecutePasses();
+
+ /// Export HTR layers to the specified format and outfile.
+ ///
+ /// \param[in] outfile
+ /// The file that the exported HTR data will be written to.
+ ///
+ /// \return
+ /// Success if the export is successful, Error otherwise.
+ llvm::Error Export(std::string outfile);
+
+ /// Get the block layers of this HTR.
+ ///
+ /// \return
+ /// The block layers of this HTR.
+ llvm::ArrayRef<HTRBlockLayerUP> GetBlockLayers() const;
+
+ /// Get the instruction layer of this HTR.
+ ///
+ /// \return
+ /// The instruction layer of this HTR.
+ HTRInstructionLayer const &GetInstructionLayer() const;
+
+ /// Add a new block layer to this HTR.
+ ///
+ /// \param[in]
+ /// The new block layer to be added.
+ void AddNewBlockLayer(HTRBlockLayerUP &&block_layer);
+
+private:
+ // There is a single instruction layer per HTR
+ HTRInstructionLayerUP m_instruction_layer_up;
+ // There are one or more block layers per HTR
+ std::vector<HTRBlockLayerUP> m_block_layer_ups;
+};
+
+// Serialization functions for exporting HTR to Chrome Trace Format
+llvm::json::Value toJSON(const TraceHTR &htr);
+llvm::json::Value toJSON(const HTRBlock &block);
+llvm::json::Value toJSON(const HTRBlockMetadata &metadata);
+
+/// The HTR passes are defined below:
+
+/// Creates a new layer by merging the "basic super blocks" in the current layer
+///
+/// A "basic super block" is the longest sequence of blocks that always occur in
+/// the same order. (The concept is akin to “Basic Block" in compiler theory,
+/// but refers to dynamic occurrences rather than CFG nodes)
+///
+/// Procedure to find all basic super blocks:
+//
+/// - For each block, compute the number of distinct predecessor and
+/// successor blocks.
+/// Predecessor - the block that occurs directly before (to the left of)
+/// the current block Successor - the block that occurs directly after
+/// (to the right of) the current block
+/// - A block with more than one distinct successor is always the start of a
+/// super block, the super block will continue until the next block with
+/// more than one distinct predecessor or successor.
+///
+/// The implementation makes use of two terms - 'heads' and 'tails' known as
+/// the 'endpoints' of a basic super block:
+/// A 'head' is defined to be a block in the trace that doesn't have a
+/// unique predecessor
+/// A 'tail' is defined to be a block in the trace that doesn't have a
+/// unique successor
+///
+/// A basic super block is defined to be a sequence of blocks between two
+/// endpoints
+///
+/// A head represents the start of the next group, so the current group
+/// ends at the block preceding the head and the next group begins with
+/// this head block
+///
+/// A tail represents the end of the current group, so the current group
+/// ends with the tail block and the next group begins with the
+/// following block.
+///
+/// See lldb/docs/htr.rst for comprehensive HTR documentation
+///
+/// \param[in] layer
+/// The layer to execute the pass on.
+///
+/// \return
+/// A new layer instance representing the merge of blocks in the
+/// previous layer
+HTRBlockLayerUP BasicSuperBlockMerge(IHTRLayer &layer);
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_TRACE_HTR_H
diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp
index 3dd4c89e2777..919cdf46a5c0 100644
--- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp
+++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp
@@ -8,7 +8,10 @@
#include "CommandObjectThreadTraceExportCTF.h"
+#include "../common/TraceHTR.h"
#include "lldb/Host/OptionParser.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Trace.h"
using namespace lldb;
using namespace lldb_private;
@@ -27,6 +30,10 @@ Status CommandObjectThreadTraceExportCTF::CommandOptions::SetOptionValue(
const int short_option = m_getopt_table[option_idx].val;
switch (short_option) {
+ case 'f': {
+ m_file.assign(std::string(option_arg));
+ break;
+ }
case 't': {
int64_t thread_index;
if (option_arg.empty() || option_arg.getAsInteger(0, thread_index) ||
@@ -45,6 +52,7 @@ Status CommandObjectThreadTraceExportCTF::CommandOptions::SetOptionValue(
void CommandObjectThreadTraceExportCTF::CommandOptions::OptionParsingStarting(
ExecutionContext *execution_context) {
+ m_file.clear();
m_thread_index = None;
}
@@ -55,12 +63,30 @@ CommandObjectThreadTraceExportCTF::CommandOptions::GetDefinitions() {
bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command,
CommandReturnObject &result) {
- Stream &s = result.GetOutputStream();
- // TODO: create an actual instance of the exporter and invoke it
- if (m_options.m_thread_index)
- s.Printf("got thread index %d\n", (int)m_options.m_thread_index.getValue());
- else
- s.Printf("didn't get a thread index\n");
+ const TraceSP &trace_sp = m_exe_ctx.GetTargetSP()->GetTrace();
+ Process *process = m_exe_ctx.GetProcessPtr();
+ Thread *thread = m_options.m_thread_index
+ ? process->GetThreadList()
+ .FindThreadByIndexID(*m_options.m_thread_index)
+ .get()
+ : GetDefaultThread();
- return result.Succeeded();
+ if (thread == nullptr) {
+ const uint32_t num_threads = process->GetThreadList().GetSize();
+ size_t tid = m_options.m_thread_index ? *m_options.m_thread_index
+ : LLDB_INVALID_THREAD_ID;
+ result.AppendErrorWithFormatv(
+ "Thread index {0} is out of range (valid values are 1 - {1}).\n", tid,
+ num_threads);
+ return false;
+ } else {
+ TraceHTR htr(*thread, *trace_sp->GetCursor(*thread));
+ htr.ExecutePasses();
+ if (llvm::Error err = htr.Export(m_options.m_file)) {
+ result.AppendErrorWithFormat("%s\n", toString(std::move(err)).c_str());
+ return false;
+ } else {
+ return true;
+ }
+ }
}
diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h
index 26b068a8f8c5..c6364cfe027d 100644
--- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h
+++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h
@@ -30,6 +30,7 @@ public:
llvm::ArrayRef<OptionDefinition> GetDefinitions() override;
llvm::Optional<size_t> m_thread_index;
+ std::string m_file;
};
CommandObjectThreadTraceExportCTF(CommandInterpreter &interpreter)
@@ -39,7 +40,8 @@ public:
"thread trace export ctf [<ctf-options>]",
lldb::eCommandRequiresProcess | lldb::eCommandTryTargetAPILock |
lldb::eCommandProcessMustBeLaunched |
- lldb::eCommandProcessMustBePaused),
+ lldb::eCommandProcessMustBePaused |
+ lldb::eCommandProcessMustBeTraced),
m_options() {}
Options *GetOptions() override { return &m_options; }
diff --git a/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.cpp
index 08bc03d78303..e98e2c83e649 100644
--- a/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.cpp
+++ b/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.cpp
@@ -39,15 +39,6 @@ void TraceExporterCTF::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString TraceExporterCTF::GetPluginNameStatic() {
- static ConstString g_name("ctf");
- return g_name;
-}
-
-ConstString TraceExporterCTF::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t TraceExporterCTF::GetPluginVersion() { return 1; }
-
Expected<TraceExporterUP> TraceExporterCTF::CreateInstance() {
return std::make_unique<TraceExporterCTF>();
}
diff --git a/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.h b/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.h
index 8f9e354ab0dd..74f14fc354d1 100644
--- a/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.h
+++ b/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.h
@@ -24,15 +24,15 @@ public:
/// \{
static llvm::Expected<lldb::TraceExporterUP> CreateInstance();
- ConstString GetPluginName() override;
+ llvm::StringRef GetPluginName() override {
+ return GetPluginNameStatic();
+ }
static void Initialize();
static void Terminate();
- static ConstString GetPluginNameStatic();
-
- uint32_t GetPluginVersion() override;
+ static llvm::StringRef GetPluginNameStatic() { return "ctf"; }
/// \}
};
diff --git a/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTFOptions.td b/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTFOptions.td
index ce751f148d9f..1919e7184f9b 100644
--- a/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTFOptions.td
+++ b/lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTFOptions.td
@@ -6,4 +6,8 @@ let Command = "thread trace export ctf" in {
Arg<"ThreadIndex">,
Desc<"Export the trace for the specified thread index. Otherwise, the "
"currently selected thread will be used.">;
+ def thread_trace_export_file: Option<"file", "f">, Required,
+ Group<1>,
+ Arg<"Filename">,
+ Desc<"Path of the file to export the trace data">;
}
diff --git a/lldb/source/Plugins/TraceExporter/docs/htr.rst b/lldb/source/Plugins/TraceExporter/docs/htr.rst
new file mode 100644
index 000000000000..1341cf5f0c80
--- /dev/null
+++ b/lldb/source/Plugins/TraceExporter/docs/htr.rst
@@ -0,0 +1,48 @@
+Hierarchical Trace Representation (HTR)
+======================================
+The humongous amount of data processor traces like the ones obtained with Intel PT contain is not digestible to humans in its raw form. Given this, it is useful to summarize these massive traces by extracting useful information. Hierarchical Trace Representation (HTR) is the way lldb represents a summarized trace internally. HTR efficiently stores trace data and allows the trace data to be transformed in a way akin to compiler passes.
+
+Concepts
+--------
+**Block:** One or more contiguous units of the trace. At minimum, the unit of a trace is the load address of an instruction.
+
+**Block Metadata:** Metadata associated with each *block*. For processor traces, some metadata examples are the number of instructions in the block or information on what functions are called in the block.
+
+**Layer:** The representation of trace data between passes. For Intel PT there are two types of layers:
+
+ **Instruction Layer:** Composed of the load addresses of the instructions in the trace. In an effort to save space,
+ metadata is only stored for instructions that are of interest, not every instruction in the trace. HTR contains a
+ single instruction layer.
+
+ **Block Layer:** Composed of blocks - a block in *layer n* refers to a sequence of blocks in *layer n - 1*. A block in
+ *layer 1* refers to a sequence of instructions in *layer 0* (the instruction layer). Metadata is stored for each block in
+ a block layer. HTR contains one or more block layers.
+
+**Pass:** A transformation applied to a *layer* that generates a new *layer* that is a more summarized, consolidated representation of the trace data.
+A pass merges instructions/blocks based on its specific purpose - for example, a pass designed to summarize a processor trace by function calls would merge all the blocks of a function into a single block representing the entire function.
+
+The image below illusrates the transformation of a trace's representation (HTR)
+
+.. image:: media/htr-example.png
+
+
+Passes
+------
+A *pass* is applied to a *layer* to extract useful information (summarization) and compress the trace representation into a new *layer*. The idea is to have a series of passes where each pass specializes in extracting certain information about the trace. Some examples of potential passes include: identifying functions, identifying loops, or a more general purpose such as identifying long sequences of instructions that are repeated (i.e. Basic Super Block). Below you will find a description of each pass currently implemented in lldb.
+
+**Basic Super Block Reduction**
+
+A “basic super block” is the longest sequence of blocks that always occur in the same order. (The concept is akin to “Basic Block'' in compiler theory, but refers to dynamic occurrences rather than CFG nodes).
+
+The image below shows the "basic super blocks" of the sequence. Each unique "basic super block" is marked with a different color
+
+.. image:: media/basic_super_block_pass.png
+
+*Procedure to find all super blocks:*
+
+- For each block, compute the number of distinct predecessor and successor blocks.
+
+ - **Predecessor** - the block that occurs directly before (to the left of) the current block
+ - **Successor** - the block that occurs directly after (to the right of) the current block
+
+- A block with more than one distinct successor is always the start of a super block, the super block will continue until the next block with more than one distinct predecessor or successor.
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 7150fdc78476..b20ae32a08ac 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -478,7 +478,7 @@ static void ParseLangArgs(LangOptions &Opts, InputKind IK, const char *triple) {
LangStd = LangStandard::lang_opencl10;
break;
case clang::Language::OpenCLCXX:
- LangStd = LangStandard::lang_openclcpp;
+ LangStd = LangStandard::lang_openclcpp10;
break;
case clang::Language::CUDA:
LangStd = LangStandard::lang_cuda;
@@ -570,16 +570,6 @@ TypeSystemClang::TypeSystemClang(llvm::StringRef name,
// Destructor
TypeSystemClang::~TypeSystemClang() { Finalize(); }
-ConstString TypeSystemClang::GetPluginNameStatic() {
- return ConstString("clang");
-}
-
-ConstString TypeSystemClang::GetPluginName() {
- return TypeSystemClang::GetPluginNameStatic();
-}
-
-uint32_t TypeSystemClang::GetPluginVersion() { return 1; }
-
lldb::TypeSystemSP TypeSystemClang::CreateInstance(lldb::LanguageType language,
lldb_private::Module *module,
Target *target) {
@@ -983,21 +973,25 @@ CompilerType TypeSystemClang::GetBuiltinTypeForDWARFEncodingAndBitSize(
}
break;
- case DW_ATE_complex_float:
- if (QualTypeMatchesBitSize(bit_size, ast, ast.FloatComplexTy))
- return GetType(ast.FloatComplexTy);
- else if (QualTypeMatchesBitSize(bit_size, ast, ast.DoubleComplexTy))
- return GetType(ast.DoubleComplexTy);
- else if (QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleComplexTy))
- return GetType(ast.LongDoubleComplexTy);
- else {
- CompilerType complex_float_clang_type =
- GetBuiltinTypeForDWARFEncodingAndBitSize("float", DW_ATE_float,
- bit_size / 2);
- return GetType(
- ast.getComplexType(ClangUtil::GetQualType(complex_float_clang_type)));
- }
- break;
+ case DW_ATE_complex_float: {
+ CanQualType FloatComplexTy = ast.getComplexType(ast.FloatTy);
+ if (QualTypeMatchesBitSize(bit_size, ast, FloatComplexTy))
+ return GetType(FloatComplexTy);
+
+ CanQualType DoubleComplexTy = ast.getComplexType(ast.DoubleTy);
+ if (QualTypeMatchesBitSize(bit_size, ast, DoubleComplexTy))
+ return GetType(DoubleComplexTy);
+
+ CanQualType LongDoubleComplexTy = ast.getComplexType(ast.LongDoubleTy);
+ if (QualTypeMatchesBitSize(bit_size, ast, LongDoubleComplexTy))
+ return GetType(LongDoubleComplexTy);
+
+ CompilerType complex_float_clang_type =
+ GetBuiltinTypeForDWARFEncodingAndBitSize("float", DW_ATE_float,
+ bit_size / 2);
+ return GetType(
+ ast.getComplexType(ClangUtil::GetQualType(complex_float_clang_type)));
+ }
case DW_ATE_float:
if (type_name == "float" &&
@@ -1153,20 +1147,12 @@ CompilerType TypeSystemClang::GetBuiltinTypeForDWARFEncodingAndBitSize(
}
break;
}
- // This assert should fire for anything that we don't catch above so we know
- // to fix any issues we run into.
- if (!type_name.empty()) {
- std::string type_name_str = type_name.str();
- Host::SystemLog(Host::eSystemLogError,
- "error: need to add support for DW_TAG_base_type '%s' "
- "encoded with DW_ATE = 0x%x, bit_size = %u\n",
- type_name_str.c_str(), dw_ate, bit_size);
- } else {
- Host::SystemLog(Host::eSystemLogError, "error: need to add support for "
- "DW_TAG_base_type encoded with "
- "DW_ATE = 0x%x, bit_size = %u\n",
- dw_ate, bit_size);
- }
+
+ Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_TYPES);
+ LLDB_LOG(log,
+ "error: need to add support for DW_TAG_base_type '{0}' "
+ "encoded with DW_ATE = {1:x}, bit_size = {2}",
+ type_name, dw_ate, bit_size);
return CompilerType();
}
@@ -1341,19 +1327,16 @@ CompilerType TypeSystemClang::CreateRecordType(
decl->setAnonymousStructOrUnion(true);
}
- if (decl) {
- if (metadata)
- SetMetadata(decl, *metadata);
+ if (metadata)
+ SetMetadata(decl, *metadata);
- if (access_type != eAccessNone)
- decl->setAccess(ConvertAccessTypeToAccessSpecifier(access_type));
+ if (access_type != eAccessNone)
+ decl->setAccess(ConvertAccessTypeToAccessSpecifier(access_type));
- if (decl_ctx)
- decl_ctx->addDecl(decl);
+ if (decl_ctx)
+ decl_ctx->addDecl(decl);
- return GetType(ast.getTagDeclType(decl));
- }
- return CompilerType();
+ return GetType(ast.getTagDeclType(decl));
}
namespace {
@@ -1560,7 +1543,7 @@ static bool ClassTemplateAllowsToInstantiationArgs(
ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl(
DeclContext *decl_ctx, OptionalClangModuleID owning_module,
- lldb::AccessType access_type, const char *class_name, int kind,
+ lldb::AccessType access_type, llvm::StringRef class_name, int kind,
const TemplateParameterInfos &template_param_infos) {
ASTContext &ast = getASTContext();
@@ -1619,15 +1602,13 @@ ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl(
template_cxx_decl->setDescribedClassTemplate(class_template_decl);
SetOwningModule(class_template_decl, owning_module);
- if (class_template_decl) {
- if (access_type != eAccessNone)
- class_template_decl->setAccess(
- ConvertAccessTypeToAccessSpecifier(access_type));
+ if (access_type != eAccessNone)
+ class_template_decl->setAccess(
+ ConvertAccessTypeToAccessSpecifier(access_type));
- decl_ctx->addDecl(class_template_decl);
+ decl_ctx->addDecl(class_template_decl);
- VerifyDecl(class_template_decl);
- }
+ VerifyDecl(class_template_decl);
return class_template_decl;
}
@@ -1817,7 +1798,7 @@ CompilerType TypeSystemClang::CreateObjCClass(
decl->setImplicit(isInternal);
SetOwningModule(decl, owning_module);
- if (decl && metadata)
+ if (metadata)
SetMetadata(decl, *metadata);
return GetType(ast.getObjCInterfaceType(decl));
@@ -2053,11 +2034,11 @@ TypeSystemClang::GetOpaqueCompilerType(clang::ASTContext *ast,
case eBasicTypeLongDouble:
return ast->LongDoubleTy.getAsOpaquePtr();
case eBasicTypeFloatComplex:
- return ast->FloatComplexTy.getAsOpaquePtr();
+ return ast->getComplexType(ast->FloatTy).getAsOpaquePtr();
case eBasicTypeDoubleComplex:
- return ast->DoubleComplexTy.getAsOpaquePtr();
+ return ast->getComplexType(ast->DoubleTy).getAsOpaquePtr();
case eBasicTypeLongDoubleComplex:
- return ast->LongDoubleComplexTy.getAsOpaquePtr();
+ return ast->getComplexType(ast->LongDoubleTy).getAsOpaquePtr();
case eBasicTypeObjCID:
return ast->getObjCIdType().getAsOpaquePtr();
case eBasicTypeObjCClass:
@@ -2155,8 +2136,7 @@ FunctionDecl *TypeSystemClang::CreateFunctionDeclaration(
? ConstexprSpecKind::Constexpr
: ConstexprSpecKind::Unspecified);
SetOwningModule(func_decl, owning_module);
- if (func_decl)
- decl_ctx->addDecl(func_decl);
+ decl_ctx->addDecl(func_decl);
VerifyDecl(func_decl);
@@ -2302,7 +2282,7 @@ CompilerType TypeSystemClang::GetOrCreateStructForIdentifier(
#pragma mark Enumeration Types
CompilerType TypeSystemClang::CreateEnumerationType(
- const char *name, clang::DeclContext *decl_ctx,
+ llvm::StringRef name, clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module, const Declaration &decl,
const CompilerType &integer_clang_type, bool is_scoped) {
// TODO: Do something intelligent with the Declaration object passed in
@@ -2313,24 +2293,21 @@ CompilerType TypeSystemClang::CreateEnumerationType(
// const bool IsFixed = false;
EnumDecl *enum_decl = EnumDecl::CreateDeserialized(ast, 0);
enum_decl->setDeclContext(decl_ctx);
- if (name && name[0])
+ if (!name.empty())
enum_decl->setDeclName(&ast.Idents.get(name));
enum_decl->setScoped(is_scoped);
enum_decl->setScopedUsingClassTag(is_scoped);
enum_decl->setFixed(false);
SetOwningModule(enum_decl, owning_module);
- if (enum_decl) {
- if (decl_ctx)
- decl_ctx->addDecl(enum_decl);
+ if (decl_ctx)
+ decl_ctx->addDecl(enum_decl);
- // TODO: check if we should be setting the promotion type too?
- enum_decl->setIntegerType(ClangUtil::GetQualType(integer_clang_type));
+ // TODO: check if we should be setting the promotion type too?
+ enum_decl->setIntegerType(ClangUtil::GetQualType(integer_clang_type));
- enum_decl->setAccess(AS_public); // TODO respect what's in the debug info
+ enum_decl->setAccess(AS_public); // TODO respect what's in the debug info
- return GetType(ast.getTagDeclType(enum_decl));
- }
- return CompilerType();
+ return GetType(ast.getTagDeclType(enum_decl));
}
CompilerType TypeSystemClang::GetIntTypeFromBitSize(size_t bit_size,
@@ -2950,7 +2927,12 @@ bool TypeSystemClang::IsCharType(lldb::opaque_compiler_type_t type) {
}
bool TypeSystemClang::IsCompleteType(lldb::opaque_compiler_type_t type) {
- const bool allow_completion = false;
+ // If the type hasn't been lazily completed yet, complete it now so that we
+ // can give the caller an accurate answer whether the type actually has a
+ // definition. Without completing the type now we would just tell the user
+ // the current (internal) completeness state of the type and most users don't
+ // care (or even know) about this behavior.
+ const bool allow_completion = true;
return GetCompleteQualType(&getASTContext(), GetQualType(type),
allow_completion);
}
@@ -4248,7 +4230,13 @@ static clang::QualType GetFullyUnqualifiedType_Impl(clang::ASTContext *ast,
if (qual_type->isPointerType())
qual_type = ast->getPointerType(
GetFullyUnqualifiedType_Impl(ast, qual_type->getPointeeType()));
- else
+ else if (const ConstantArrayType *arr =
+ ast->getAsConstantArrayType(qual_type)) {
+ qual_type = ast->getConstantArrayType(
+ GetFullyUnqualifiedType_Impl(ast, arr->getElementType()),
+ arr->getSize(), arr->getSizeExpr(), arr->getSizeModifier(),
+ arr->getIndexTypeQualifiers().getAsOpaqueValue());
+ } else
qual_type = qual_type.getUnqualifiedType();
qual_type.removeLocalConst();
qual_type.removeLocalRestrict();
@@ -4824,6 +4812,7 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
case clang::BuiltinType::Double:
case clang::BuiltinType::LongDouble:
case clang::BuiltinType::BFloat16:
+ case clang::BuiltinType::Ibm128:
return lldb::eEncodingIEEE754;
case clang::BuiltinType::ObjCClass:
@@ -6503,7 +6492,8 @@ CompilerType TypeSystemClang::GetChildCompilerTypeAtIndex(
case clang::Type::RValueReference:
if (idx_is_valid) {
const clang::ReferenceType *reference_type =
- llvm::cast<clang::ReferenceType>(GetQualType(type).getTypePtr());
+ llvm::cast<clang::ReferenceType>(
+ RemoveWrappingTypes(GetQualType(type)).getTypePtr());
CompilerType pointee_clang_type =
GetType(reference_type->getPointeeType());
if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
@@ -8367,9 +8357,8 @@ TypeSystemClang::dump(lldb::opaque_compiler_type_t type) const {
}
#endif
-void TypeSystemClang::Dump(Stream &s) {
- Decl *tu = Decl::castFromDeclContext(GetTranslationUnitDecl());
- tu->dump(s.AsRawOstream());
+void TypeSystemClang::Dump(llvm::raw_ostream &output) {
+ GetTranslationUnitDecl()->dump(output);
}
void TypeSystemClang::DumpFromSymbolFile(Stream &s,
@@ -9755,6 +9744,41 @@ ScratchTypeSystemClang::GetForTarget(Target &target,
return &scratch_ast.GetIsolatedAST(*ast_kind);
}
+/// Returns a human-readable name that uniquely identifiers the sub-AST kind.
+static llvm::StringRef
+GetNameForIsolatedASTKind(ScratchTypeSystemClang::IsolatedASTKind kind) {
+ switch (kind) {
+ case ScratchTypeSystemClang::IsolatedASTKind::CppModules:
+ return "C++ modules";
+ }
+ llvm_unreachable("Unimplemented IsolatedASTKind?");
+}
+
+void ScratchTypeSystemClang::Dump(llvm::raw_ostream &output) {
+ // First dump the main scratch AST.
+ output << "State of scratch Clang type system:\n";
+ TypeSystemClang::Dump(output);
+
+ // Now sort the isolated sub-ASTs.
+ typedef std::pair<IsolatedASTKey, TypeSystem *> KeyAndTS;
+ std::vector<KeyAndTS> sorted_typesystems;
+ for (const auto &a : m_isolated_asts)
+ sorted_typesystems.emplace_back(a.first, a.second.get());
+ llvm::stable_sort(sorted_typesystems,
+ [](const KeyAndTS &lhs, const KeyAndTS &rhs) {
+ return lhs.first < rhs.first;
+ });
+
+ // Dump each sub-AST too.
+ for (const auto &a : sorted_typesystems) {
+ IsolatedASTKind kind =
+ static_cast<ScratchTypeSystemClang::IsolatedASTKind>(a.first);
+ output << "State of scratch Clang type subsystem "
+ << GetNameForIsolatedASTKind(kind) << ":\n";
+ a.second->Dump(output);
+ }
+}
+
UserExpression *ScratchTypeSystemClang::GetUserExpression(
llvm::StringRef expr, llvm::StringRef prefix, lldb::LanguageType language,
Expression::ResultType desired_type,
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index 701e4ca42e39..f3a07397ec44 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -138,11 +138,9 @@ public:
void Finalize() override;
// PluginInterface functions
- ConstString GetPluginName() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
- uint32_t GetPluginVersion() override;
-
- static ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "clang"; }
static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
Module *module, Target *target);
@@ -345,11 +343,10 @@ public:
clang::FunctionDecl *func_decl, clang::FunctionTemplateDecl *Template,
const TemplateParameterInfos &infos);
- clang::ClassTemplateDecl *
- CreateClassTemplateDecl(clang::DeclContext *decl_ctx,
- OptionalClangModuleID owning_module,
- lldb::AccessType access_type, const char *class_name,
- int kind, const TemplateParameterInfos &infos);
+ clang::ClassTemplateDecl *CreateClassTemplateDecl(
+ clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
+ lldb::AccessType access_type, llvm::StringRef class_name, int kind,
+ const TemplateParameterInfos &infos);
clang::TemplateTemplateParmDecl *
CreateTemplateTemplateParmDecl(const char *template_name);
@@ -400,14 +397,7 @@ public:
CompilerType CreateFunctionType(const CompilerType &result_type,
const CompilerType *args, unsigned num_args,
bool is_variadic, unsigned type_quals,
- clang::CallingConv cc);
-
- CompilerType CreateFunctionType(const CompilerType &result_type,
- const CompilerType *args, unsigned num_args,
- bool is_variadic, unsigned type_quals) {
- return CreateFunctionType(result_type, args, num_args, is_variadic,
- type_quals, clang::CC_C);
- }
+ clang::CallingConv cc = clang::CC_C);
clang::ParmVarDecl *
CreateParameterDeclaration(clang::DeclContext *decl_ctx,
@@ -426,7 +416,7 @@ public:
size_t element_count, bool is_vector);
// Enumeration Types
- CompilerType CreateEnumerationType(const char *name,
+ CompilerType CreateEnumerationType(llvm::StringRef name,
clang::DeclContext *decl_ctx,
OptionalClangModuleID owning_module,
const Declaration &decl,
@@ -945,7 +935,8 @@ public:
LLVM_DUMP_METHOD void dump(lldb::opaque_compiler_type_t type) const override;
#endif
- void Dump(Stream &s);
+ /// \see lldb_private::TypeSystem::Dump
+ void Dump(llvm::raw_ostream &output) override;
/// Dump clang AST types from the symbol file.
///
@@ -1169,6 +1160,9 @@ public:
return GetForTarget(target, InferIsolatedASTKindFromLangOpts(lang_opts));
}
+ /// \see lldb_private::TypeSystem::Dump
+ void Dump(llvm::raw_ostream &output) override;
+
UserExpression *
GetUserExpression(llvm::StringRef expr, llvm::StringRef prefix,
lldb::LanguageType language,
diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
index 65947c5f833b..ccaac687ed7a 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -333,13 +333,6 @@ UnwindAssemblyInstEmulation::CreateInstance(const ArchSpec &arch) {
return nullptr;
}
-// PluginInterface protocol in UnwindAssemblyParser_x86
-ConstString UnwindAssemblyInstEmulation::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t UnwindAssemblyInstEmulation::GetPluginVersion() { return 1; }
-
void UnwindAssemblyInstEmulation::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
GetPluginDescriptionStatic(), CreateInstance);
@@ -349,12 +342,7 @@ void UnwindAssemblyInstEmulation::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-ConstString UnwindAssemblyInstEmulation::GetPluginNameStatic() {
- static ConstString g_name("inst-emulation");
- return g_name;
-}
-
-const char *UnwindAssemblyInstEmulation::GetPluginDescriptionStatic() {
+llvm::StringRef UnwindAssemblyInstEmulation::GetPluginDescriptionStatic() {
return "Instruction emulation based unwind information.";
}
diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h
index 5784a42a8269..97cb04e51408 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h
@@ -52,13 +52,11 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "inst-emulation"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
private:
// Call CreateInstance to get an instance of this class
diff --git a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
index 402a70cd025f..eca78a9b3a04 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
@@ -239,14 +239,6 @@ UnwindAssembly *UnwindAssembly_x86::CreateInstance(const ArchSpec &arch) {
return nullptr;
}
-// PluginInterface protocol in UnwindAssemblyParser_x86
-
-ConstString UnwindAssembly_x86::GetPluginName() {
- return GetPluginNameStatic();
-}
-
-uint32_t UnwindAssembly_x86::GetPluginVersion() { return 1; }
-
void UnwindAssembly_x86::Initialize() {
PluginManager::RegisterPlugin(GetPluginNameStatic(),
GetPluginDescriptionStatic(), CreateInstance);
@@ -256,11 +248,6 @@ void UnwindAssembly_x86::Terminate() {
PluginManager::UnregisterPlugin(CreateInstance);
}
-lldb_private::ConstString UnwindAssembly_x86::GetPluginNameStatic() {
- static ConstString g_name("x86");
- return g_name;
-}
-
-const char *UnwindAssembly_x86::GetPluginDescriptionStatic() {
+llvm::StringRef UnwindAssembly_x86::GetPluginDescriptionStatic() {
return "i386 and x86_64 assembly language profiler plugin.";
}
diff --git a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.h b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.h
index 3e1588f2065c..3857c8776b02 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.h
+++ b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.h
@@ -46,13 +46,11 @@ public:
static void Terminate();
- static lldb_private::ConstString GetPluginNameStatic();
+ static llvm::StringRef GetPluginNameStatic() { return "x86"; }
- static const char *GetPluginDescriptionStatic();
+ static llvm::StringRef GetPluginDescriptionStatic();
- lldb_private::ConstString GetPluginName() override;
-
- uint32_t GetPluginVersion() override;
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
private:
UnwindAssembly_x86(const lldb_private::ArchSpec &arch);
diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp
index fc246ac8575d..4aafef34bd2d 100644
--- a/lldb/source/Symbol/Block.cpp
+++ b/lldb/source/Symbol/Block.cpp
@@ -122,6 +122,16 @@ Block *Block::FindBlockByID(user_id_t block_id) {
return matching_block;
}
+Block *Block::FindInnermostBlockByOffset(const lldb::addr_t offset) {
+ if (!Contains(offset))
+ return nullptr;
+ for (const BlockSP &block_sp : m_children) {
+ if (Block *block = block_sp->FindInnermostBlockByOffset(offset))
+ return block;
+ }
+ return this;
+}
+
void Block::CalculateSymbolContext(SymbolContext *sc) {
if (m_parent_scope)
m_parent_scope->CalculateSymbolContext(sc);
diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp
index 588ed4976d65..7c840d8bb064 100644
--- a/lldb/source/Symbol/CompileUnit.cpp
+++ b/lldb/source/Symbol/CompileUnit.cpp
@@ -52,7 +52,7 @@ void CompileUnit::DumpSymbolContext(Stream *s) {
void CompileUnit::GetDescription(Stream *s,
lldb::DescriptionLevel level) const {
- const char *language = Language::GetNameForLanguageType(m_language);
+ const char *language = GetCachedLanguage();
*s << "id = " << (const UserID &)*this << ", file = \""
<< this->GetPrimaryFile() << "\", language = \"" << language << '"';
}
@@ -97,12 +97,18 @@ lldb::FunctionSP CompileUnit::FindFunction(
return {};
}
+const char *CompileUnit::GetCachedLanguage() const {
+ if (m_flags.IsClear(flagsParsedLanguage))
+ return "<not loaded>";
+ return Language::GetNameForLanguageType(m_language);
+}
+
// Dump the current contents of this object. No functions that cause on demand
// parsing of functions, globals, statics are called, so this is a good
// function to call to get an idea of the current contents of the CompileUnit
// object.
void CompileUnit::Dump(Stream *s, bool show_context) const {
- const char *language = Language::GetNameForLanguageType(m_language);
+ const char *language = GetCachedLanguage();
s->Printf("%p: ", static_cast<const void *>(this));
s->Indent();
@@ -175,6 +181,10 @@ void CompileUnit::SetSupportFiles(const FileSpecList &support_files) {
m_support_files = support_files;
}
+void CompileUnit::SetSupportFiles(FileSpecList &&support_files) {
+ m_support_files = std::move(support_files);
+}
+
DebugMacros *CompileUnit::GetDebugMacros() {
if (m_debug_macros_sp.get() == nullptr) {
if (m_flags.IsClear(flagsParsedDebugMacros)) {
@@ -309,8 +319,13 @@ void CompileUnit::ResolveSymbolContext(
// subsequent line exact matches below.
const bool inlines = false;
const bool exact = true;
- SourceLocationSpec found_entry(line_entry.file, line_entry.line,
- line_entry.column, inlines, exact);
+ const llvm::Optional<uint16_t> column =
+ src_location_spec.GetColumn().hasValue()
+ ? llvm::Optional<uint16_t>(line_entry.column)
+ : llvm::None;
+
+ SourceLocationSpec found_entry(line_entry.file, line_entry.line, column,
+ inlines, exact);
while (line_idx != UINT32_MAX) {
// If they only asked for the line entry, then we're done, we can
diff --git a/lldb/source/Symbol/DeclVendor.cpp b/lldb/source/Symbol/DeclVendor.cpp
index cf87f4f879b1..e99ebfee4cff 100644
--- a/lldb/source/Symbol/DeclVendor.cpp
+++ b/lldb/source/Symbol/DeclVendor.cpp
@@ -17,8 +17,6 @@ using namespace lldb_private;
std::vector<CompilerType> DeclVendor::FindTypes(ConstString name,
uint32_t max_matches) {
- // FIXME: This depends on clang, but should be able to support any
- // TypeSystem.
std::vector<CompilerType> ret;
std::vector<CompilerDecl> decls;
if (FindDecls(name, /*append*/ true, max_matches, decls))
diff --git a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp
index 2655e4de9063..a13b4a7a54f2 100644
--- a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp
+++ b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp
@@ -414,9 +414,8 @@ static bool GetModuleSpecInfoFromUUIDDictionary(CFDictionaryRef uuid_dict,
// last two filename parts from the source remapping and get a more
// general source remapping that still works. Add this as another
// option in addition to the full source path remap.
- module_spec.GetSourceMappingList().Append(
- ConstString(DBGBuildSourcePath.c_str()),
- ConstString(DBGSourcePath.c_str()), true);
+ module_spec.GetSourceMappingList().Append(DBGBuildSourcePath,
+ DBGSourcePath, true);
if (do_truncate_remapping_names) {
FileSpec build_path(DBGBuildSourcePath.c_str());
FileSpec source_path(DBGSourcePath.c_str());
@@ -425,8 +424,7 @@ static bool GetModuleSpecInfoFromUUIDDictionary(CFDictionaryRef uuid_dict,
source_path.RemoveLastPathComponent();
source_path.RemoveLastPathComponent();
module_spec.GetSourceMappingList().Append(
- ConstString(build_path.GetPath().c_str()),
- ConstString(source_path.GetPath().c_str()), true);
+ build_path.GetPath(), source_path.GetPath(), true);
}
}
}
@@ -458,9 +456,8 @@ static bool GetModuleSpecInfoFromUUIDDictionary(CFDictionaryRef uuid_dict,
FileSystem::Instance().Resolve(resolved_source_path);
DBGSourcePath = resolved_source_path.GetPath();
}
- module_spec.GetSourceMappingList().Append(
- ConstString(DBGBuildSourcePath.c_str()),
- ConstString(DBGSourcePath.c_str()), true);
+ module_spec.GetSourceMappingList().Append(DBGBuildSourcePath,
+ DBGSourcePath, true);
}
}
return success;
diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp
index 251f9104ad54..a8c81ee3082f 100644
--- a/lldb/source/Symbol/Symbol.cpp
+++ b/lldb/source/Symbol/Symbol.cpp
@@ -115,7 +115,8 @@ void Symbol::Clear() {
}
bool Symbol::ValueIsAddress() const {
- return m_addr_range.GetBaseAddress().GetSection().get() != nullptr;
+ return m_addr_range.GetBaseAddress().GetSection().get() != nullptr ||
+ m_type == eSymbolTypeAbsolute;
}
ConstString Symbol::GetDisplayName() const {
diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp
index 2e8fe1cec30e..f1c3a9e5b4e0 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -11,7 +11,6 @@
#include "lldb/Core/Module.h"
#include "lldb/Core/ModuleSpec.h"
#include "lldb/Host/Host.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Symbol/Block.h"
#include "lldb/Symbol/CompileUnit.h"
#include "lldb/Symbol/ObjectFile.h"
@@ -977,13 +976,11 @@ bool SymbolContextSpecifier::AddSpecification(const char *spec_string,
m_type |= eFileSpecified;
break;
case eLineStartSpecified:
- m_start_line = StringConvert::ToSInt32(spec_string, 0, 0, &return_value);
- if (return_value)
+ if ((return_value = llvm::to_integer(spec_string, m_start_line)))
m_type |= eLineStartSpecified;
break;
case eLineEndSpecified:
- m_end_line = StringConvert::ToSInt32(spec_string, 0, 0, &return_value);
- if (return_value)
+ if ((return_value = llvm::to_integer(spec_string, m_end_line)))
m_type |= eLineEndSpecified;
break;
case eFunctionSpecified:
diff --git a/lldb/source/Symbol/SymbolFile.cpp b/lldb/source/Symbol/SymbolFile.cpp
index 152bbe8de6cb..53f8dd68c8b7 100644
--- a/lldb/source/Symbol/SymbolFile.cpp
+++ b/lldb/source/Symbol/SymbolFile.cpp
@@ -236,3 +236,15 @@ void SymbolFile::Dump(Stream &s) {
}
SymbolFile::RegisterInfoResolver::~RegisterInfoResolver() = default;
+
+uint64_t SymbolFile::GetDebugInfoSize() {
+ if (!m_objfile_sp)
+ return 0;
+ ModuleSP module_sp(m_objfile_sp->GetModule());
+ if (!module_sp)
+ return 0;
+ const SectionList *section_list = module_sp->GetSectionList();
+ if (section_list)
+ return section_list->GetDebugInfoSize();
+ return 0;
+}
diff --git a/lldb/source/Symbol/SymbolVendor.cpp b/lldb/source/Symbol/SymbolVendor.cpp
index 0ef332a5813f..d26363130e2d 100644
--- a/lldb/source/Symbol/SymbolVendor.cpp
+++ b/lldb/source/Symbol/SymbolVendor.cpp
@@ -69,11 +69,3 @@ void SymbolVendor::AddSymbolFileRepresentation(const ObjectFileSP &objfile_sp) {
m_sym_file_up.reset(SymbolFile::FindPlugin(objfile_sp));
}
}
-
-// PluginInterface protocol
-lldb_private::ConstString SymbolVendor::GetPluginName() {
- static ConstString g_name("vendor-default");
- return g_name;
-}
-
-uint32_t SymbolVendor::GetPluginVersion() { return 1; }
diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp
index 313b451601ae..19c1fee2bb38 100644
--- a/lldb/source/Symbol/Symtab.cpp
+++ b/lldb/source/Symbol/Symtab.cpp
@@ -248,10 +248,8 @@ static bool lldb_skip_name(llvm::StringRef mangled,
// No filters for this scheme yet. Include all names in indexing.
case Mangled::eManglingSchemeMSVC:
- return false;
-
- // No filters for this scheme yet. Include all names in indexing.
case Mangled::eManglingSchemeRustV0:
+ case Mangled::eManglingSchemeD:
return false;
// Don't try and demangle things we can't categorize.
@@ -265,6 +263,7 @@ void Symtab::InitNameIndexes() {
// Protected function, no need to lock mutex...
if (!m_name_indexes_computed) {
m_name_indexes_computed = true;
+ ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabIndexTime());
LLDB_SCOPED_TIMER();
// Collect all loaded language plugins.
@@ -1099,6 +1098,7 @@ void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
case eSymbolTypeCode:
case eSymbolTypeResolver:
case eSymbolTypeReExported:
+ case eSymbolTypeAbsolute:
symbol_indexes.push_back(temp_symbol_indexes[i]);
break;
default:
diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
index 252b06e269d6..0b3f7e4f3bd4 100644
--- a/lldb/source/Symbol/TypeSystem.cpp
+++ b/lldb/source/Symbol/TypeSystem.cpp
@@ -223,62 +223,32 @@ void TypeSystemMap::ForEach(std::function<bool(TypeSystem *)> const &callback) {
llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
lldb::LanguageType language,
llvm::Optional<CreateCallback> create_callback) {
- llvm::Error error = llvm::Error::success();
- assert(!error); // Check the success value when assertions are enabled
std::lock_guard<std::mutex> guard(m_mutex);
- if (m_clear_in_progress) {
- error = llvm::make_error<llvm::StringError>(
+ if (m_clear_in_progress)
+ return llvm::make_error<llvm::StringError>(
"Unable to get TypeSystem because TypeSystemMap is being cleared",
llvm::inconvertibleErrorCode());
- } else {
- collection::iterator pos = m_map.find(language);
- if (pos != m_map.end()) {
- auto *type_system = pos->second.get();
- if (type_system) {
- llvm::consumeError(std::move(error));
- return *type_system;
- }
- error = llvm::make_error<llvm::StringError>(
- "TypeSystem for language " +
- llvm::StringRef(Language::GetNameForLanguageType(language)) +
- " doesn't exist",
- llvm::inconvertibleErrorCode());
- return std::move(error);
- }
- for (const auto &pair : m_map) {
- if (pair.second && pair.second->SupportsLanguage(language)) {
- // Add a new mapping for "language" to point to an already existing
- // TypeSystem that supports this language
- m_map[language] = pair.second;
- if (pair.second.get()) {
- llvm::consumeError(std::move(error));
- return *pair.second.get();
- }
- error = llvm::make_error<llvm::StringError>(
- "TypeSystem for language " +
- llvm::StringRef(Language::GetNameForLanguageType(language)) +
- " doesn't exist",
- llvm::inconvertibleErrorCode());
- return std::move(error);
- }
- }
+ collection::iterator pos = m_map.find(language);
+ if (pos != m_map.end()) {
+ auto *type_system = pos->second.get();
+ if (type_system)
+ return *type_system;
+ return llvm::make_error<llvm::StringError>(
+ "TypeSystem for language " +
+ llvm::StringRef(Language::GetNameForLanguageType(language)) +
+ " doesn't exist",
+ llvm::inconvertibleErrorCode());
+ }
- if (!create_callback) {
- error = llvm::make_error<llvm::StringError>(
- "Unable to find type system for language " +
- llvm::StringRef(Language::GetNameForLanguageType(language)),
- llvm::inconvertibleErrorCode());
- } else {
- // Cache even if we get a shared pointer that contains a null type system
- // back
- TypeSystemSP type_system_sp = (*create_callback)();
- m_map[language] = type_system_sp;
- if (type_system_sp.get()) {
- llvm::consumeError(std::move(error));
- return *type_system_sp.get();
- }
- error = llvm::make_error<llvm::StringError>(
+ for (const auto &pair : m_map) {
+ if (pair.second && pair.second->SupportsLanguage(language)) {
+ // Add a new mapping for "language" to point to an already existing
+ // TypeSystem that supports this language
+ m_map[language] = pair.second;
+ if (pair.second.get())
+ return *pair.second.get();
+ return llvm::make_error<llvm::StringError>(
"TypeSystem for language " +
llvm::StringRef(Language::GetNameForLanguageType(language)) +
" doesn't exist",
@@ -286,7 +256,23 @@ llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
}
}
- return std::move(error);
+ if (!create_callback)
+ return llvm::make_error<llvm::StringError>(
+ "Unable to find type system for language " +
+ llvm::StringRef(Language::GetNameForLanguageType(language)),
+ llvm::inconvertibleErrorCode());
+
+ // Cache even if we get a shared pointer that contains a null type system
+ // back
+ TypeSystemSP type_system_sp = (*create_callback)();
+ m_map[language] = type_system_sp;
+ if (type_system_sp.get())
+ return *type_system_sp.get();
+ return llvm::make_error<llvm::StringError>(
+ "TypeSystem for language " +
+ llvm::StringRef(Language::GetNameForLanguageType(language)) +
+ " doesn't exist",
+ llvm::inconvertibleErrorCode());
}
llvm::Expected<TypeSystem &>
diff --git a/lldb/source/Symbol/UnwindPlan.cpp b/lldb/source/Symbol/UnwindPlan.cpp
index 41bd8cd46ad8..5547998691db 100644
--- a/lldb/source/Symbol/UnwindPlan.cpp
+++ b/lldb/source/Symbol/UnwindPlan.cpp
@@ -8,7 +8,6 @@
#include "lldb/Symbol/UnwindPlan.h"
-#include "lldb/Expression/DWARFExpression.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/RegisterContext.h"
#include "lldb/Target/Target.h"
diff --git a/lldb/source/Target/ABI.cpp b/lldb/source/Target/ABI.cpp
index c3342caf8742..6e8772cbd142 100644
--- a/lldb/source/Target/ABI.cpp
+++ b/lldb/source/Target/ABI.cpp
@@ -16,7 +16,7 @@
#include "lldb/Target/Target.h"
#include "lldb/Target/Thread.h"
#include "lldb/Utility/Log.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include <cctype>
using namespace lldb;
@@ -214,33 +214,39 @@ std::unique_ptr<llvm::MCRegisterInfo> ABI::MakeMCRegisterInfo(const ArchSpec &ar
return info_up;
}
-void RegInfoBasedABI::AugmentRegisterInfo(RegisterInfo &info) {
- if (info.kinds[eRegisterKindEHFrame] != LLDB_INVALID_REGNUM &&
- info.kinds[eRegisterKindDWARF] != LLDB_INVALID_REGNUM)
- return;
-
- RegisterInfo abi_info;
- if (!GetRegisterInfoByName(info.name, abi_info))
- return;
-
- if (info.kinds[eRegisterKindEHFrame] == LLDB_INVALID_REGNUM)
- info.kinds[eRegisterKindEHFrame] = abi_info.kinds[eRegisterKindEHFrame];
- if (info.kinds[eRegisterKindDWARF] == LLDB_INVALID_REGNUM)
- info.kinds[eRegisterKindDWARF] = abi_info.kinds[eRegisterKindDWARF];
- if (info.kinds[eRegisterKindGeneric] == LLDB_INVALID_REGNUM)
- info.kinds[eRegisterKindGeneric] = abi_info.kinds[eRegisterKindGeneric];
+void RegInfoBasedABI::AugmentRegisterInfo(
+ std::vector<DynamicRegisterInfo::Register> &regs) {
+ for (DynamicRegisterInfo::Register &info : regs) {
+ if (info.regnum_ehframe != LLDB_INVALID_REGNUM &&
+ info.regnum_dwarf != LLDB_INVALID_REGNUM)
+ continue;
+
+ RegisterInfo abi_info;
+ if (!GetRegisterInfoByName(info.name.GetStringRef(), abi_info))
+ continue;
+
+ if (info.regnum_ehframe == LLDB_INVALID_REGNUM)
+ info.regnum_ehframe = abi_info.kinds[eRegisterKindEHFrame];
+ if (info.regnum_dwarf == LLDB_INVALID_REGNUM)
+ info.regnum_dwarf = abi_info.kinds[eRegisterKindDWARF];
+ if (info.regnum_generic == LLDB_INVALID_REGNUM)
+ info.regnum_generic = abi_info.kinds[eRegisterKindGeneric];
+ }
}
-void MCBasedABI::AugmentRegisterInfo(RegisterInfo &info) {
- uint32_t eh, dwarf;
- std::tie(eh, dwarf) = GetEHAndDWARFNums(info.name);
-
- if (info.kinds[eRegisterKindEHFrame] == LLDB_INVALID_REGNUM)
- info.kinds[eRegisterKindEHFrame] = eh;
- if (info.kinds[eRegisterKindDWARF] == LLDB_INVALID_REGNUM)
- info.kinds[eRegisterKindDWARF] = dwarf;
- if (info.kinds[eRegisterKindGeneric] == LLDB_INVALID_REGNUM)
- info.kinds[eRegisterKindGeneric] = GetGenericNum(info.name);
+void MCBasedABI::AugmentRegisterInfo(
+ std::vector<DynamicRegisterInfo::Register> &regs) {
+ for (DynamicRegisterInfo::Register &info : regs) {
+ uint32_t eh, dwarf;
+ std::tie(eh, dwarf) = GetEHAndDWARFNums(info.name.GetStringRef());
+
+ if (info.regnum_ehframe == LLDB_INVALID_REGNUM)
+ info.regnum_ehframe = eh;
+ if (info.regnum_dwarf == LLDB_INVALID_REGNUM)
+ info.regnum_dwarf = dwarf;
+ if (info.regnum_generic == LLDB_INVALID_REGNUM)
+ info.regnum_generic = GetGenericNum(info.name.GetStringRef());
+ }
}
std::pair<uint32_t, uint32_t>
diff --git a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp b/lldb/source/Target/DynamicRegisterInfo.cpp
index a85d7bd6f525..9f894f86aea8 100644
--- a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
+++ b/lldb/source/Target/DynamicRegisterInfo.cpp
@@ -6,13 +6,12 @@
//
//===----------------------------------------------------------------------===//
-#include "DynamicRegisterInfo.h"
-
+#include "lldb/Target/DynamicRegisterInfo.h"
#include "lldb/Core/StreamFile.h"
#include "lldb/DataFormatters/FormatManager.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Interpreter/OptionArgParser.h"
#include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/Log.h"
#include "lldb/Utility/RegularExpression.h"
#include "lldb/Utility/StringExtractor.h"
#include "lldb/Utility/StructuredData.h"
@@ -43,7 +42,6 @@ void DynamicRegisterInfo::MoveFrom(DynamicRegisterInfo &&info) {
m_set_names = std::move(info.m_set_names);
m_value_regs_map = std::move(info.m_value_regs_map);
m_invalidate_regs_map = std::move(info.m_invalidate_regs_map);
- m_dynamic_reg_size_map = std::move(info.m_dynamic_reg_size_map);
m_reg_data_byte_size = info.m_reg_data_byte_size;
m_finalized = info.m_finalized;
@@ -57,9 +55,144 @@ void DynamicRegisterInfo::MoveFrom(DynamicRegisterInfo &&info) {
info.Clear();
}
+llvm::Expected<uint32_t> DynamicRegisterInfo::ByteOffsetFromSlice(
+ uint32_t index, llvm::StringRef slice_str, lldb::ByteOrder byte_order) {
+ // Slices use the following format:
+ // REGNAME[MSBIT:LSBIT]
+ // REGNAME - name of the register to grab a slice of
+ // MSBIT - the most significant bit at which the current register value
+ // starts at
+ // LSBIT - the least significant bit at which the current register value
+ // ends at
+ static llvm::Regex g_bitfield_regex(
+ "([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]");
+ llvm::SmallVector<llvm::StringRef, 4> matches;
+ if (!g_bitfield_regex.match(slice_str, &matches))
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "failed to match against register bitfield regex (slice: %s)",
+ slice_str.str().c_str());
+
+ llvm::StringRef reg_name_str = matches[1];
+ llvm::StringRef msbit_str = matches[2];
+ llvm::StringRef lsbit_str = matches[3];
+ uint32_t msbit;
+ uint32_t lsbit;
+ if (!llvm::to_integer(msbit_str, msbit) ||
+ !llvm::to_integer(lsbit_str, lsbit))
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(), "msbit (%s) or lsbit (%s) are invalid",
+ msbit_str.str().c_str(), lsbit_str.str().c_str());
+
+ if (msbit <= lsbit)
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "msbit (%u) must be greater than lsbit (%u)",
+ msbit, lsbit);
+
+ const uint32_t msbyte = msbit / 8;
+ const uint32_t lsbyte = lsbit / 8;
+
+ const RegisterInfo *containing_reg_info = GetRegisterInfo(reg_name_str);
+ if (!containing_reg_info)
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "invalid concrete register \"%s\"",
+ reg_name_str.str().c_str());
+
+ const uint32_t max_bit = containing_reg_info->byte_size * 8;
+
+ if (msbit > max_bit)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "msbit (%u) must be less than the bitsize of the register \"%s\" (%u)",
+ msbit, reg_name_str.str().c_str(), max_bit);
+ if (lsbit > max_bit)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "lsbit (%u) must be less than the bitsize of the register \"%s\" (%u)",
+ lsbit, reg_name_str.str().c_str(), max_bit);
+
+ m_invalidate_regs_map[containing_reg_info->kinds[eRegisterKindLLDB]]
+ .push_back(index);
+ m_value_regs_map[index].push_back(
+ containing_reg_info->kinds[eRegisterKindLLDB]);
+ m_invalidate_regs_map[index].push_back(
+ containing_reg_info->kinds[eRegisterKindLLDB]);
+
+ if (byte_order == eByteOrderLittle)
+ return containing_reg_info->byte_offset + lsbyte;
+ if (byte_order == eByteOrderBig)
+ return containing_reg_info->byte_offset + msbyte;
+ llvm_unreachable("Invalid byte order");
+}
+
+llvm::Expected<uint32_t> DynamicRegisterInfo::ByteOffsetFromComposite(
+ uint32_t index, StructuredData::Array &composite_reg_list,
+ lldb::ByteOrder byte_order) {
+ const size_t num_composite_regs = composite_reg_list.GetSize();
+ if (num_composite_regs == 0)
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "\"composite\" list is empty");
+
+ uint32_t composite_offset = UINT32_MAX;
+ for (uint32_t composite_idx = 0; composite_idx < num_composite_regs;
+ ++composite_idx) {
+ ConstString composite_reg_name;
+ if (!composite_reg_list.GetItemAtIndexAsString(composite_idx,
+ composite_reg_name, nullptr))
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "\"composite\" list value is not a Python string at index %d",
+ composite_idx);
+
+ const RegisterInfo *composite_reg_info =
+ GetRegisterInfo(composite_reg_name.GetStringRef());
+ if (!composite_reg_info)
+ return llvm::createStringError(
+ llvm::inconvertibleErrorCode(),
+ "failed to find composite register by name: \"%s\"",
+ composite_reg_name.GetCString());
+
+ composite_offset =
+ std::min(composite_offset, composite_reg_info->byte_offset);
+ m_value_regs_map[index].push_back(
+ composite_reg_info->kinds[eRegisterKindLLDB]);
+ m_invalidate_regs_map[composite_reg_info->kinds[eRegisterKindLLDB]]
+ .push_back(index);
+ m_invalidate_regs_map[index].push_back(
+ composite_reg_info->kinds[eRegisterKindLLDB]);
+ }
+
+ return composite_offset;
+}
+
+llvm::Expected<uint32_t> DynamicRegisterInfo::ByteOffsetFromRegInfoDict(
+ uint32_t index, StructuredData::Dictionary &reg_info_dict,
+ lldb::ByteOrder byte_order) {
+ uint32_t byte_offset;
+ if (reg_info_dict.GetValueForKeyAsInteger("offset", byte_offset))
+ return byte_offset;
+
+ // No offset for this register, see if the register has a value
+ // expression which indicates this register is part of another register.
+ // Value expressions are things like "rax[31:0]" which state that the
+ // current register's value is in a concrete register "rax" in bits 31:0.
+ // If there is a value expression we can calculate the offset
+ llvm::StringRef slice_str;
+ if (reg_info_dict.GetValueForKeyAsString("slice", slice_str, nullptr))
+ return ByteOffsetFromSlice(index, slice_str, byte_order);
+
+ StructuredData::Array *composite_reg_list;
+ if (reg_info_dict.GetValueForKeyAsArray("composite", composite_reg_list))
+ return ByteOffsetFromComposite(index, *composite_reg_list, byte_order);
+
+ return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "insufficient data to calculate byte offset");
+}
+
size_t
DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
const ArchSpec &arch) {
+ Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT);
assert(!m_finalized);
StructuredData::Array *sets = nullptr;
if (dict.GetValueForKeyAsArray("sets", sets)) {
@@ -81,6 +214,8 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
if (!dict.GetValueForKeyAsArray("registers", regs))
return 0;
+ const ByteOrder byte_order = arch.GetByteOrder();
+
const uint32_t num_regs = regs->GetSize();
// typedef std::map<std::string, std::vector<std::string> >
// InvalidateNameMap;
@@ -114,148 +249,16 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
reg_info_dict->GetValueForKeyAsString("alt-name", alt_name_val, nullptr);
reg_info.alt_name = alt_name_val.GetCString();
- reg_info_dict->GetValueForKeyAsInteger("offset", reg_info.byte_offset,
- UINT32_MAX);
-
- const ByteOrder byte_order = arch.GetByteOrder();
-
- if (reg_info.byte_offset == UINT32_MAX) {
- // No offset for this register, see if the register has a value
- // expression which indicates this register is part of another register.
- // Value expressions are things like "rax[31:0]" which state that the
- // current register's value is in a concrete register "rax" in bits 31:0.
- // If there is a value expression we can calculate the offset
- bool success = false;
- llvm::StringRef slice_str;
- if (reg_info_dict->GetValueForKeyAsString("slice", slice_str, nullptr)) {
- // Slices use the following format:
- // REGNAME[MSBIT:LSBIT]
- // REGNAME - name of the register to grab a slice of
- // MSBIT - the most significant bit at which the current register value
- // starts at
- // LSBIT - the least significant bit at which the current register value
- // ends at
- static RegularExpression g_bitfield_regex(
- llvm::StringRef("([A-Za-z_][A-Za-z0-9_]*)\\[([0-9]+):([0-9]+)\\]"));
- llvm::SmallVector<llvm::StringRef, 4> matches;
- if (g_bitfield_regex.Execute(slice_str, &matches)) {
- std::string reg_name_str = matches[1].str();
- std::string msbit_str = matches[2].str();
- std::string lsbit_str = matches[3].str();
- const uint32_t msbit =
- StringConvert::ToUInt32(msbit_str.c_str(), UINT32_MAX);
- const uint32_t lsbit =
- StringConvert::ToUInt32(lsbit_str.c_str(), UINT32_MAX);
- if (msbit != UINT32_MAX && lsbit != UINT32_MAX) {
- if (msbit > lsbit) {
- const uint32_t msbyte = msbit / 8;
- const uint32_t lsbyte = lsbit / 8;
-
- const RegisterInfo *containing_reg_info =
- GetRegisterInfo(reg_name_str);
- if (containing_reg_info) {
- const uint32_t max_bit = containing_reg_info->byte_size * 8;
- if (msbit < max_bit && lsbit < max_bit) {
- m_invalidate_regs_map[containing_reg_info
- ->kinds[eRegisterKindLLDB]]
- .push_back(i);
- m_value_regs_map[i].push_back(
- containing_reg_info->kinds[eRegisterKindLLDB]);
- m_invalidate_regs_map[i].push_back(
- containing_reg_info->kinds[eRegisterKindLLDB]);
-
- if (byte_order == eByteOrderLittle) {
- success = true;
- reg_info.byte_offset =
- containing_reg_info->byte_offset + lsbyte;
- } else if (byte_order == eByteOrderBig) {
- success = true;
- reg_info.byte_offset =
- containing_reg_info->byte_offset + msbyte;
- } else {
- llvm_unreachable("Invalid byte order");
- }
- } else {
- if (msbit > max_bit)
- printf("error: msbit (%u) must be less than the bitsize "
- "of the register (%u)\n",
- msbit, max_bit);
- else
- printf("error: lsbit (%u) must be less than the bitsize "
- "of the register (%u)\n",
- lsbit, max_bit);
- }
- } else {
- printf("error: invalid concrete register \"%s\"\n",
- reg_name_str.c_str());
- }
- } else {
- printf("error: msbit (%u) must be greater than lsbit (%u)\n",
- msbit, lsbit);
- }
- } else {
- printf("error: msbit (%u) and lsbit (%u) must be valid\n", msbit,
- lsbit);
- }
- } else {
- // TODO: print error invalid slice string that doesn't follow the
- // format
- printf("error: failed to match against register bitfield regex\n");
- }
- } else {
- StructuredData::Array *composite_reg_list = nullptr;
- if (reg_info_dict->GetValueForKeyAsArray("composite",
- composite_reg_list)) {
- const size_t num_composite_regs = composite_reg_list->GetSize();
- if (num_composite_regs > 0) {
- uint32_t composite_offset = UINT32_MAX;
- for (uint32_t composite_idx = 0; composite_idx < num_composite_regs;
- ++composite_idx) {
- ConstString composite_reg_name;
- if (composite_reg_list->GetItemAtIndexAsString(
- composite_idx, composite_reg_name, nullptr)) {
- const RegisterInfo *composite_reg_info =
- GetRegisterInfo(composite_reg_name.GetStringRef());
- if (composite_reg_info) {
- composite_offset = std::min(composite_offset,
- composite_reg_info->byte_offset);
- m_value_regs_map[i].push_back(
- composite_reg_info->kinds[eRegisterKindLLDB]);
- m_invalidate_regs_map[composite_reg_info
- ->kinds[eRegisterKindLLDB]]
- .push_back(i);
- m_invalidate_regs_map[i].push_back(
- composite_reg_info->kinds[eRegisterKindLLDB]);
- } else {
- // TODO: print error invalid slice string that doesn't follow
- // the format
- printf("error: failed to find composite register by name: "
- "\"%s\"\n",
- composite_reg_name.GetCString());
- }
- } else {
- printf(
- "error: 'composite' list value wasn't a python string\n");
- }
- }
- if (composite_offset != UINT32_MAX) {
- reg_info.byte_offset = composite_offset;
- success = m_value_regs_map.find(i) != m_value_regs_map.end();
- } else {
- printf("error: 'composite' registers must specify at least one "
- "real register\n");
- }
- } else {
- printf("error: 'composite' list was empty\n");
- }
- }
- }
-
- if (!success) {
- Clear();
- reg_info_dict->DumpToStdout();
- return 0;
- }
+ llvm::Expected<uint32_t> byte_offset =
+ ByteOffsetFromRegInfoDict(i, *reg_info_dict, byte_order);
+ if (byte_offset)
+ reg_info.byte_offset = byte_offset.get();
+ else {
+ LLDB_LOG_ERROR(log, byte_offset.takeError(),
+ "error while parsing register {1}: {0}", reg_info.name);
+ Clear();
+ reg_info_dict->DumpToStdout();
+ return 0;
}
int64_t bitsize = 0;
@@ -269,25 +272,6 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
reg_info.byte_size = bitsize / 8;
- llvm::StringRef dwarf_opcode_string;
- if (reg_info_dict->GetValueForKeyAsString("dynamic_size_dwarf_expr_bytes",
- dwarf_opcode_string)) {
- reg_info.dynamic_size_dwarf_len = dwarf_opcode_string.size() / 2;
- assert(reg_info.dynamic_size_dwarf_len > 0);
-
- std::vector<uint8_t> dwarf_opcode_bytes(reg_info.dynamic_size_dwarf_len);
- uint32_t j;
- StringExtractor opcode_extractor(dwarf_opcode_string);
- uint32_t ret_val = opcode_extractor.GetHexBytesAvail(dwarf_opcode_bytes);
- UNUSED_IF_ASSERT_DISABLED(ret_val);
- assert(ret_val == reg_info.dynamic_size_dwarf_len);
-
- for (j = 0; j < reg_info.dynamic_size_dwarf_len; ++j)
- m_dynamic_reg_size_map[i].push_back(dwarf_opcode_bytes[j]);
-
- reg_info.dynamic_size_dwarf_expr_bytes = m_dynamic_reg_size_map[i].data();
- }
-
llvm::StringRef format_str;
if (reg_info_dict->GetValueForKeyAsString("format", format_str, nullptr)) {
if (OptionArgParser::ToFormat(format_str.str().c_str(), reg_info.format,
@@ -395,39 +379,47 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
return m_regs.size();
}
-void DynamicRegisterInfo::AddRegister(RegisterInfo &reg_info,
- ConstString &reg_name,
- ConstString &reg_alt_name,
- ConstString &set_name) {
+size_t DynamicRegisterInfo::SetRegisterInfo(
+ std::vector<DynamicRegisterInfo::Register> &&regs,
+ const ArchSpec &arch) {
assert(!m_finalized);
- const uint32_t reg_num = m_regs.size();
- reg_info.name = reg_name.AsCString();
- assert(reg_info.name);
- reg_info.alt_name = reg_alt_name.AsCString(nullptr);
- uint32_t i;
- if (reg_info.value_regs) {
- for (i = 0; reg_info.value_regs[i] != LLDB_INVALID_REGNUM; ++i)
- m_value_regs_map[reg_num].push_back(reg_info.value_regs[i]);
- }
- if (reg_info.invalidate_regs) {
- for (i = 0; reg_info.invalidate_regs[i] != LLDB_INVALID_REGNUM; ++i)
- m_invalidate_regs_map[reg_num].push_back(reg_info.invalidate_regs[i]);
- }
- if (reg_info.dynamic_size_dwarf_expr_bytes) {
- for (i = 0; i < reg_info.dynamic_size_dwarf_len; ++i)
- m_dynamic_reg_size_map[reg_num].push_back(
- reg_info.dynamic_size_dwarf_expr_bytes[i]);
- reg_info.dynamic_size_dwarf_expr_bytes =
- m_dynamic_reg_size_map[reg_num].data();
- }
+ for (auto it : llvm::enumerate(regs)) {
+ uint32_t local_regnum = it.index();
+ const DynamicRegisterInfo::Register &reg = it.value();
+
+ assert(reg.name);
+ assert(reg.set_name);
+
+ if (!reg.value_regs.empty())
+ m_value_regs_map[local_regnum] = std::move(reg.value_regs);
+ if (!reg.invalidate_regs.empty())
+ m_invalidate_regs_map[local_regnum] = std::move(reg.invalidate_regs);
+ if (reg.value_reg_offset != 0) {
+ assert(reg.value_regs.size() == 1);
+ m_value_reg_offset_map[local_regnum] = reg.value_reg_offset;
+ }
+
+ struct RegisterInfo reg_info {
+ reg.name.AsCString(), reg.alt_name.AsCString(), reg.byte_size,
+ reg.byte_offset, reg.encoding, reg.format,
+ {reg.regnum_ehframe, reg.regnum_dwarf, reg.regnum_generic,
+ reg.regnum_remote, local_regnum},
+ // value_regs and invalidate_regs are filled by Finalize()
+ nullptr, nullptr
+ };
- m_regs.push_back(reg_info);
- uint32_t set = GetRegisterSetIndexByName(set_name, true);
- assert(set < m_sets.size());
- assert(set < m_set_reg_nums.size());
- assert(set < m_set_names.size());
- m_set_reg_nums[set].push_back(reg_num);
+ m_regs.push_back(reg_info);
+
+ uint32_t set = GetRegisterSetIndexByName(reg.set_name, true);
+ assert(set < m_sets.size());
+ assert(set < m_set_reg_nums.size());
+ assert(set < m_set_names.size());
+ m_set_reg_nums[set].push_back(local_regnum);
+ };
+
+ Finalize(arch);
+ return m_regs.size();
}
void DynamicRegisterInfo::Finalize(const ArchSpec &arch) {
@@ -442,20 +434,11 @@ void DynamicRegisterInfo::Finalize(const ArchSpec &arch) {
m_sets[set].registers = m_set_reg_nums[set].data();
}
- // sort and unique all value registers and make sure each is terminated with
- // LLDB_INVALID_REGNUM
+ // make sure value_regs are terminated with LLDB_INVALID_REGNUM
for (reg_to_regs_map::iterator pos = m_value_regs_map.begin(),
end = m_value_regs_map.end();
pos != end; ++pos) {
- if (pos->second.size() > 1) {
- llvm::sort(pos->second.begin(), pos->second.end());
- reg_num_collection::iterator unique_end =
- std::unique(pos->second.begin(), pos->second.end());
- if (unique_end != pos->second.end())
- pos->second.erase(unique_end, pos->second.end());
- }
- assert(!pos->second.empty());
if (pos->second.back() != LLDB_INVALID_REGNUM)
pos->second.push_back(LLDB_INVALID_REGNUM);
}
@@ -603,6 +586,7 @@ void DynamicRegisterInfo::Finalize(const ArchSpec &arch) {
(strcmp(reg.name, "fp") == 0))
reg.kinds[eRegisterKindGeneric] = LLDB_REGNUM_GENERIC_FP;
else if ((strcmp(reg.name, "rflags") == 0) ||
+ (strcmp(reg.name, "eflags") == 0) ||
(strcmp(reg.name, "flags") == 0))
reg.kinds[eRegisterKindGeneric] = LLDB_REGNUM_GENERIC_FLAGS;
}
@@ -656,15 +640,20 @@ void DynamicRegisterInfo::ConfigureOffsets() {
// Now update all value_regs with each register info as needed
for (auto &reg : m_regs) {
if (reg.value_regs != nullptr) {
- // Assign a valid offset to all pseudo registers if not assigned by stub.
- // Pseudo registers with value_regs list populated will share same offset
- // as that of their corresponding primary register in value_regs list.
+ // Assign a valid offset to all pseudo registers that have only a single
+ // parent register in value_regs list, if not assigned by stub. Pseudo
+ // registers with value_regs list populated will share same offset as
+ // that of their corresponding parent register.
if (reg.byte_offset == LLDB_INVALID_INDEX32) {
uint32_t value_regnum = reg.value_regs[0];
- if (value_regnum != LLDB_INVALID_INDEX32)
+ if (value_regnum != LLDB_INVALID_INDEX32 &&
+ reg.value_regs[1] == LLDB_INVALID_INDEX32) {
reg.byte_offset =
- GetRegisterInfoAtIndex(remote_to_local_regnum_map[value_regnum])
- ->byte_offset;
+ GetRegisterInfoAtIndex(value_regnum)->byte_offset;
+ auto it = m_value_reg_offset_map.find(reg.kinds[eRegisterKindLLDB]);
+ if (it != m_value_reg_offset_map.end())
+ reg.byte_offset += it->second;
+ }
}
}
@@ -691,12 +680,6 @@ DynamicRegisterInfo::GetRegisterInfoAtIndex(uint32_t i) const {
return nullptr;
}
-RegisterInfo *DynamicRegisterInfo::GetRegisterInfoAtIndex(uint32_t i) {
- if (i < m_regs.size())
- return &m_regs[i];
- return nullptr;
-}
-
const RegisterInfo *DynamicRegisterInfo::GetRegisterInfo(uint32_t kind,
uint32_t num) const {
uint32_t reg_index = ConvertRegisterKindToRegisterNumber(kind, num);
@@ -711,8 +694,9 @@ const RegisterSet *DynamicRegisterInfo::GetRegisterSet(uint32_t i) const {
return nullptr;
}
-uint32_t DynamicRegisterInfo::GetRegisterSetIndexByName(ConstString &set_name,
- bool can_create) {
+uint32_t
+DynamicRegisterInfo::GetRegisterSetIndexByName(const ConstString &set_name,
+ bool can_create) {
name_collection::iterator pos, end = m_set_names.end();
for (pos = m_set_names.begin(); pos != end; ++pos) {
if (*pos == set_name)
@@ -745,7 +729,6 @@ void DynamicRegisterInfo::Clear() {
m_set_names.clear();
m_value_regs_map.clear();
m_invalidate_regs_map.clear();
- m_dynamic_reg_size_map.clear();
m_reg_data_byte_size = 0;
m_finalized = false;
}
@@ -809,3 +792,28 @@ DynamicRegisterInfo::GetRegisterInfo(llvm::StringRef reg_name) const {
return &reg_info;
return nullptr;
}
+
+void lldb_private::addSupplementaryRegister(
+ std::vector<DynamicRegisterInfo::Register> &regs,
+ DynamicRegisterInfo::Register new_reg_info) {
+ assert(!new_reg_info.value_regs.empty());
+ const uint32_t reg_num = regs.size();
+ regs.push_back(new_reg_info);
+
+ std::map<uint32_t, std::vector<uint32_t>> new_invalidates;
+ for (uint32_t value_reg : new_reg_info.value_regs) {
+ // copy value_regs to invalidate_regs
+ new_invalidates[reg_num].push_back(value_reg);
+
+ // copy invalidate_regs from the parent register
+ llvm::append_range(new_invalidates[reg_num],
+ regs[value_reg].invalidate_regs);
+
+ // add reverse invalidate entries
+ for (uint32_t x : new_invalidates[reg_num])
+ new_invalidates[x].push_back(reg_num);
+ }
+
+ for (const auto &x : new_invalidates)
+ llvm::append_range(regs[x.first].invalidate_regs, x.second);
+}
diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp
index 7b35da5028ac..eee1ff1512d9 100644
--- a/lldb/source/Target/Language.cpp
+++ b/lldb/source/Target/Language.cpp
@@ -108,10 +108,21 @@ void Language::ForEach(std::function<bool(Language *)> callback) {
}
});
- std::lock_guard<std::mutex> guard(GetLanguagesMutex());
- LanguagesMap &map(GetLanguagesMap());
- for (const auto &entry : map) {
- if (!callback(entry.second.get()))
+ // callback may call a method in Language that attempts to acquire the same
+ // lock (such as Language::ForEach or Language::FindPlugin). To avoid a
+ // deadlock, we do not use callback while holding the lock.
+ std::vector<Language *> loaded_plugins;
+ {
+ std::lock_guard<std::mutex> guard(GetLanguagesMutex());
+ LanguagesMap &map(GetLanguagesMap());
+ for (const auto &entry : map) {
+ if (entry.second)
+ loaded_plugins.push_back(entry.second.get());
+ }
+ }
+
+ for (auto *lang : loaded_plugins) {
+ if (!callback(lang))
break;
}
}
diff --git a/lldb/source/Target/ModuleCache.cpp b/lldb/source/Target/ModuleCache.cpp
index dcdc0772b31a..7143fcd2707c 100644
--- a/lldb/source/Target/ModuleCache.cpp
+++ b/lldb/source/Target/ModuleCache.cpp
@@ -159,7 +159,7 @@ ModuleLock::ModuleLock(const FileSpec &root_dir_spec, const UUID &uuid,
m_file_spec = JoinPath(lock_dir_spec, uuid.GetAsString().c_str());
auto file = FileSystem::Instance().Open(
- m_file_spec, File::eOpenOptionWrite | File::eOpenOptionCanCreate |
+ m_file_spec, File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate |
File::eOpenOptionCloseOnExec);
if (file)
m_file_up = std::move(file.get());
diff --git a/lldb/source/Target/OperatingSystem.cpp b/lldb/source/Target/OperatingSystem.cpp
index 033a806460da..75762c05151d 100644
--- a/lldb/source/Target/OperatingSystem.cpp
+++ b/lldb/source/Target/OperatingSystem.cpp
@@ -17,10 +17,9 @@ OperatingSystem *OperatingSystem::FindPlugin(Process *process,
const char *plugin_name) {
OperatingSystemCreateInstance create_callback = nullptr;
if (plugin_name) {
- ConstString const_plugin_name(plugin_name);
create_callback =
PluginManager::GetOperatingSystemCreateCallbackForPluginName(
- const_plugin_name);
+ plugin_name);
if (create_callback) {
std::unique_ptr<OperatingSystem> instance_up(
create_callback(process, true));
diff --git a/lldb/source/Target/PathMappingList.cpp b/lldb/source/Target/PathMappingList.cpp
index b660c310ef31..e49f6213cf27 100644
--- a/lldb/source/Target/PathMappingList.cpp
+++ b/lldb/source/Target/PathMappingList.cpp
@@ -30,11 +30,11 @@ namespace {
// with the raw path pair, which doesn't work anymore because the paths have
// been normalized when the debug info was loaded. So we need to store
// nomalized path pairs to ensure things match up.
- ConstString NormalizePath(ConstString path) {
- // If we use "path" to construct a FileSpec, it will normalize the path for
- // us. We then grab the string and turn it back into a ConstString.
- return ConstString(FileSpec(path.GetStringRef()).GetPath());
- }
+std::string NormalizePath(llvm::StringRef path) {
+ // If we use "path" to construct a FileSpec, it will normalize the path for
+ // us. We then grab the string.
+ return FileSpec(path).GetPath();
+}
}
// PathMappingList constructor
PathMappingList::PathMappingList() : m_pairs() {}
@@ -59,8 +59,8 @@ const PathMappingList &PathMappingList::operator=(const PathMappingList &rhs) {
PathMappingList::~PathMappingList() = default;
-void PathMappingList::Append(ConstString path,
- ConstString replacement, bool notify) {
+void PathMappingList::Append(llvm::StringRef path, llvm::StringRef replacement,
+ bool notify) {
++m_mod_id;
m_pairs.emplace_back(pair(NormalizePath(path), NormalizePath(replacement)));
if (notify && m_callback)
@@ -78,9 +78,8 @@ void PathMappingList::Append(const PathMappingList &rhs, bool notify) {
}
}
-void PathMappingList::Insert(ConstString path,
- ConstString replacement, uint32_t index,
- bool notify) {
+void PathMappingList::Insert(llvm::StringRef path, llvm::StringRef replacement,
+ uint32_t index, bool notify) {
++m_mod_id;
iterator insert_iter;
if (index >= m_pairs.size())
@@ -93,9 +92,8 @@ void PathMappingList::Insert(ConstString path,
m_callback(*this, m_callback_baton);
}
-bool PathMappingList::Replace(ConstString path,
- ConstString replacement, uint32_t index,
- bool notify) {
+bool PathMappingList::Replace(llvm::StringRef path, llvm::StringRef replacement,
+ uint32_t index, bool notify) {
if (index >= m_pairs.size())
return false;
++m_mod_id;
@@ -218,18 +216,22 @@ bool PathMappingList::ReverseRemapPath(const FileSpec &file, FileSpec &fixed) co
}
llvm::Optional<FileSpec> PathMappingList::FindFile(const FileSpec &orig_spec) const {
- if (auto remapped = RemapPath(orig_spec.GetPath(), /*only_if_exists=*/true))
+ // We must normalize the orig_spec again using the host's path style,
+ // otherwise there will be mismatch between the host and remote platform
+ // if they use different path styles.
+ if (auto remapped = RemapPath(NormalizePath(orig_spec.GetPath()),
+ /*only_if_exists=*/true))
return remapped;
return {};
}
-bool PathMappingList::Replace(ConstString path,
- ConstString new_path, bool notify) {
+bool PathMappingList::Replace(llvm::StringRef path, llvm::StringRef new_path,
+ bool notify) {
uint32_t idx = FindIndexForPath(path);
if (idx < m_pairs.size()) {
++m_mod_id;
- m_pairs[idx].second = new_path;
+ m_pairs[idx].second = ConstString(new_path);
if (notify && m_callback)
m_callback(*this, m_callback_baton);
return true;
@@ -285,8 +287,8 @@ bool PathMappingList::GetPathsAtIndex(uint32_t idx, ConstString &path,
return false;
}
-uint32_t PathMappingList::FindIndexForPath(ConstString orig_path) const {
- const ConstString path = NormalizePath(orig_path);
+uint32_t PathMappingList::FindIndexForPath(llvm::StringRef orig_path) const {
+ const ConstString path = ConstString(NormalizePath(orig_path));
const_iterator pos;
const_iterator begin = m_pairs.begin();
const_iterator end = m_pairs.end();
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index a77ecddfbab6..bd455310f08e 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -155,9 +155,9 @@ void Platform::Terminate() {
}
}
-const PlatformPropertiesSP &Platform::GetGlobalPlatformProperties() {
- static const auto g_settings_sp(std::make_shared<PlatformProperties>());
- return g_settings_sp;
+PlatformProperties &Platform::GetGlobalPlatformProperties() {
+ static PlatformProperties g_settings;
+ return g_settings;
}
void Platform::SetHostPlatform(const lldb::PlatformSP &platform_sp) {
@@ -294,8 +294,8 @@ PlatformSP Platform::Create(ConstString name, Status &error) {
if (name == g_host_platform_name)
return GetHostPlatform();
- create_callback =
- PluginManager::GetPlatformCreateCallbackForPluginName(name);
+ create_callback = PluginManager::GetPlatformCreateCallbackForPluginName(
+ name.GetStringRef());
if (create_callback)
platform_sp = create_callback(true, nullptr);
else
@@ -395,18 +395,10 @@ Platform::Platform(bool is_host)
LLDB_LOGF(log, "%p Platform::Platform()", static_cast<void *>(this));
}
-/// Destructor.
-///
-/// The destructor is virtual since this class is designed to be
-/// inherited from by the plug-in instance.
-Platform::~Platform() {
- Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
- LLDB_LOGF(log, "%p Platform::~Platform()", static_cast<void *>(this));
-}
+Platform::~Platform() = default;
void Platform::GetStatus(Stream &strm) {
- std::string s;
- strm.Printf(" Platform: %s\n", GetPluginName().GetCString());
+ strm.Format(" Platform: {0}\n", GetPluginName());
ArchSpec arch(GetSystemArchitecture());
if (arch.IsValid()) {
@@ -421,8 +413,8 @@ void Platform::GetStatus(Stream &strm) {
if (!os_version.empty()) {
strm.Format("OS Version: {0}", os_version.getAsString());
- if (GetOSBuildString(s))
- strm.Printf(" (%s)", s.c_str());
+ if (llvm::Optional<std::string> s = GetOSBuildString())
+ strm.Format(" ({0})", *s);
strm.EOL();
}
@@ -447,8 +439,8 @@ void Platform::GetStatus(Stream &strm) {
if (!specific_info.empty())
strm.Printf("Platform-specific connection: %s\n", specific_info.c_str());
- if (GetOSKernelDescription(s))
- strm.Printf(" Kernel: %s\n", s.c_str());
+ if (llvm::Optional<std::string> s = GetOSKernelDescription())
+ strm.Format(" Kernel: {0}\n", *s);
}
llvm::VersionTuple Platform::GetOSVersion(Process *process) {
@@ -493,28 +485,16 @@ llvm::VersionTuple Platform::GetOSVersion(Process *process) {
return llvm::VersionTuple();
}
-bool Platform::GetOSBuildString(std::string &s) {
- s.clear();
-
+llvm::Optional<std::string> Platform::GetOSBuildString() {
if (IsHost())
-#if !defined(__linux__)
- return HostInfo::GetOSBuildString(s);
-#else
- return false;
-#endif
- else
- return GetRemoteOSBuildString(s);
+ return HostInfo::GetOSBuildString();
+ return GetRemoteOSBuildString();
}
-bool Platform::GetOSKernelDescription(std::string &s) {
+llvm::Optional<std::string> Platform::GetOSKernelDescription() {
if (IsHost())
-#if !defined(__linux__)
- return HostInfo::GetOSKernelDescription(s);
-#else
- return false;
-#endif
- else
- return GetRemoteOSKernelDescription(s);
+ return HostInfo::GetOSKernelDescription();
+ return GetRemoteOSKernelDescription();
}
void Platform::AddClangModuleCompilationOptions(
@@ -769,9 +749,8 @@ Status Platform::MakeDirectory(const FileSpec &file_spec,
return llvm::sys::fs::create_directory(file_spec.GetPath(), permissions);
else {
Status error;
- error.SetErrorStringWithFormat("remote platform %s doesn't support %s",
- GetPluginName().GetCString(),
- LLVM_PRETTY_FUNCTION);
+ error.SetErrorStringWithFormatv("remote platform {0} doesn't support {1}",
+ GetPluginName(), LLVM_PRETTY_FUNCTION);
return error;
}
}
@@ -785,9 +764,8 @@ Status Platform::GetFilePermissions(const FileSpec &file_spec,
return Status(Value.getError());
} else {
Status error;
- error.SetErrorStringWithFormat("remote platform %s doesn't support %s",
- GetPluginName().GetCString(),
- LLVM_PRETTY_FUNCTION);
+ error.SetErrorStringWithFormatv("remote platform {0} doesn't support {1}",
+ GetPluginName(), LLVM_PRETTY_FUNCTION);
return error;
}
}
@@ -799,14 +777,13 @@ Status Platform::SetFilePermissions(const FileSpec &file_spec,
return llvm::sys::fs::setPermissions(file_spec.GetPath(), Perms);
} else {
Status error;
- error.SetErrorStringWithFormat("remote platform %s doesn't support %s",
- GetPluginName().GetCString(),
- LLVM_PRETTY_FUNCTION);
+ error.SetErrorStringWithFormatv("remote platform {0} doesn't support {1}",
+ GetPluginName(), LLVM_PRETTY_FUNCTION);
return error;
}
}
-ConstString Platform::GetName() { return GetPluginName(); }
+ConstString Platform::GetName() { return ConstString(GetPluginName()); }
const char *Platform::GetHostname() {
if (IsHost())
@@ -856,6 +833,7 @@ Platform::ResolveExecutable(const ModuleSpec &module_spec,
lldb::ModuleSP &exe_module_sp,
const FileSpecList *module_search_paths_ptr) {
Status error;
+
if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) {
if (module_spec.GetArchitecture().IsValid()) {
error = ModuleList::GetSharedModule(module_spec, exe_module_sp,
@@ -866,9 +844,8 @@ Platform::ResolveExecutable(const ModuleSpec &module_spec,
// architectures that we should be using (in the correct order) and see
// if we can find a match that way
ModuleSpec arch_module_spec(module_spec);
- for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(
- idx, arch_module_spec.GetArchitecture());
- ++idx) {
+ for (const ArchSpec &arch : GetSupportedArchitectures()) {
+ arch_module_spec.GetArchitecture() = arch;
error = ModuleList::GetSharedModule(arch_module_spec, exe_module_sp,
module_search_paths_ptr, nullptr,
nullptr);
@@ -878,9 +855,74 @@ Platform::ResolveExecutable(const ModuleSpec &module_spec,
}
}
} else {
- error.SetErrorStringWithFormat("'%s' does not exist",
- module_spec.GetFileSpec().GetPath().c_str());
+ error.SetErrorStringWithFormat(
+ "'%s' does not exist", module_spec.GetFileSpec().GetPath().c_str());
+ }
+ return error;
+}
+
+Status
+Platform::ResolveRemoteExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &exe_module_sp,
+ const FileSpecList *module_search_paths_ptr) {
+ Status error;
+
+ // We may connect to a process and use the provided executable (Don't use
+ // local $PATH).
+ ModuleSpec resolved_module_spec(module_spec);
+
+ // Resolve any executable within a bundle on MacOSX
+ Host::ResolveExecutableInBundle(resolved_module_spec.GetFileSpec());
+
+ if (FileSystem::Instance().Exists(resolved_module_spec.GetFileSpec()) ||
+ module_spec.GetUUID().IsValid()) {
+ if (resolved_module_spec.GetArchitecture().IsValid() ||
+ resolved_module_spec.GetUUID().IsValid()) {
+ error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
+ module_search_paths_ptr, nullptr,
+ nullptr);
+
+ if (exe_module_sp && exe_module_sp->GetObjectFile())
+ return error;
+ exe_module_sp.reset();
+ }
+ // No valid architecture was specified or the exact arch wasn't found so
+ // ask the platform for the architectures that we should be using (in the
+ // correct order) and see if we can find a match that way
+ StreamString arch_names;
+ llvm::ListSeparator LS;
+ for (const ArchSpec &arch : GetSupportedArchitectures()) {
+ resolved_module_spec.GetArchitecture() = arch;
+ error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
+ module_search_paths_ptr, nullptr,
+ nullptr);
+ // Did we find an executable using one of the
+ if (error.Success()) {
+ if (exe_module_sp && exe_module_sp->GetObjectFile())
+ break;
+ else
+ error.SetErrorToGenericError();
+ }
+
+ arch_names << LS << arch.GetArchitectureName();
+ }
+
+ if (error.Fail() || !exe_module_sp) {
+ if (FileSystem::Instance().Readable(resolved_module_spec.GetFileSpec())) {
+ error.SetErrorStringWithFormatv(
+ "'{0}' doesn't contain any '{1}' platform architectures: {2}",
+ resolved_module_spec.GetFileSpec(), GetPluginName(),
+ arch_names.GetData());
+ } else {
+ error.SetErrorStringWithFormatv("'{0}' is not readable",
+ resolved_module_spec.GetFileSpec());
+ }
+ }
+ } else {
+ error.SetErrorStringWithFormatv("'{0}' does not exist",
+ resolved_module_spec.GetFileSpec());
}
+
return error;
}
@@ -966,26 +1008,27 @@ ArchSpec Platform::GetAugmentedArchSpec(llvm::StringRef triple) {
Status Platform::ConnectRemote(Args &args) {
Status error;
if (IsHost())
- error.SetErrorStringWithFormat("The currently selected platform (%s) is "
- "the host platform and is always connected.",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "The currently selected platform ({0}) is "
+ "the host platform and is always connected.",
+ GetPluginName());
else
- error.SetErrorStringWithFormat(
- "Platform::ConnectRemote() is not supported by %s",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "Platform::ConnectRemote() is not supported by {0}", GetPluginName());
return error;
}
Status Platform::DisconnectRemote() {
Status error;
if (IsHost())
- error.SetErrorStringWithFormat("The currently selected platform (%s) is "
- "the host platform and is always connected.",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "The currently selected platform ({0}) is "
+ "the host platform and is always connected.",
+ GetPluginName());
else
- error.SetErrorStringWithFormat(
- "Platform::DisconnectRemote() is not supported by %s",
- GetPluginName().GetCString());
+ error.SetErrorStringWithFormatv(
+ "Platform::DisconnectRemote() is not supported by {0}",
+ GetPluginName());
return error;
}
@@ -1066,36 +1109,19 @@ Status Platform::KillProcess(const lldb::pid_t pid) {
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM));
LLDB_LOGF(log, "Platform::%s, pid %" PRIu64, __FUNCTION__, pid);
- // Try to find a process plugin to handle this Kill request. If we can't,
- // fall back to the default OS implementation.
- size_t num_debuggers = Debugger::GetNumDebuggers();
- for (size_t didx = 0; didx < num_debuggers; ++didx) {
- DebuggerSP debugger = Debugger::GetDebuggerAtIndex(didx);
- lldb_private::TargetList &targets = debugger->GetTargetList();
- for (int tidx = 0; tidx < targets.GetNumTargets(); ++tidx) {
- ProcessSP process = targets.GetTargetAtIndex(tidx)->GetProcessSP();
- if (process->GetID() == pid)
- return process->Destroy(true);
- }
- }
-
if (!IsHost()) {
return Status(
- "base lldb_private::Platform class can't kill remote processes unless "
- "they are controlled by a process plugin");
+ "base lldb_private::Platform class can't kill remote processes");
}
- Host::Kill(pid, SIGTERM);
+ Host::Kill(pid, SIGKILL);
return Status();
}
-lldb::ProcessSP
-Platform::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
- Target *target, // Can be nullptr, if nullptr create a
- // new target, else use existing one
- Status &error) {
+lldb::ProcessSP Platform::DebugProcess(ProcessLaunchInfo &launch_info,
+ Debugger &debugger, Target &target,
+ Status &error) {
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM));
- LLDB_LOGF(log, "Platform::%s entered (target %p)", __FUNCTION__,
- static_cast<void *>(target));
+ LLDB_LOG(log, "target = {0})", &target);
ProcessSP process_sp;
// Make sure we stop at the entry point
@@ -1117,7 +1143,7 @@ Platform::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
filter_callback = get_filter_func(++i, iteration_complete)) {
if (filter_callback) {
// Give this ProcessLaunchInfo filter a chance to adjust the launch info.
- error = (*filter_callback)(launch_info, target);
+ error = (*filter_callback)(launch_info, &target);
if (!error.Success()) {
LLDB_LOGF(log,
"Platform::%s() StructuredDataPlugin launch "
@@ -1135,10 +1161,10 @@ Platform::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
__FUNCTION__, launch_info.GetProcessID());
if (launch_info.GetProcessID() != LLDB_INVALID_PROCESS_ID) {
ProcessAttachInfo attach_info(launch_info);
- process_sp = Attach(attach_info, debugger, target, error);
+ process_sp = Attach(attach_info, debugger, &target, error);
if (process_sp) {
- LLDB_LOGF(log, "Platform::%s Attach() succeeded, Process plugin: %s",
- __FUNCTION__, process_sp->GetPluginName().AsCString());
+ LLDB_LOG(log, "Attach() succeeded, Process plugin: {0}",
+ process_sp->GetPluginName());
launch_info.SetHijackListener(attach_info.GetHijackListener());
// Since we attached to the process, it will think it needs to detach
@@ -1149,7 +1175,7 @@ Platform::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
// If we didn't have any file actions, the pseudo terminal might have
// been used where the secondary side was given as the file to open for
- // stdin/out/err after we have already opened the master so we can
+ // stdin/out/err after we have already opened the primary so we can
// read/write stdin/out/err.
int pty_fd = launch_info.GetPTY().ReleasePrimaryFileDescriptor();
if (pty_fd != PseudoTerminal::invalid_fd) {
@@ -1183,6 +1209,35 @@ Platform::GetPlatformForArchitecture(const ArchSpec &arch,
return platform_sp;
}
+std::vector<ArchSpec>
+Platform::CreateArchList(llvm::ArrayRef<llvm::Triple::ArchType> archs,
+ llvm::Triple::OSType os) {
+ std::vector<ArchSpec> list;
+ for(auto arch : archs) {
+ llvm::Triple triple;
+ triple.setArch(arch);
+ triple.setOS(os);
+ list.push_back(ArchSpec(triple));
+ }
+ return list;
+}
+
+bool Platform::GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) {
+ const auto &archs = GetSupportedArchitectures();
+ if (idx >= archs.size())
+ return false;
+ arch = archs[idx];
+ return true;
+}
+
+std::vector<ArchSpec> Platform::GetSupportedArchitectures() {
+ std::vector<ArchSpec> result;
+ ArchSpec arch;
+ for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(idx, arch); ++idx)
+ result.push_back(arch);
+ return result;
+}
+
/// Lets a platform answer if it is compatible with a given
/// architecture and the target triple contained within.
bool Platform::IsCompatibleArchitecture(const ArchSpec &arch,
@@ -1191,26 +1246,13 @@ bool Platform::IsCompatibleArchitecture(const ArchSpec &arch,
// If the architecture is invalid, we must answer true...
if (arch.IsValid()) {
ArchSpec platform_arch;
- // Try for an exact architecture match first.
- if (exact_arch_match) {
- for (uint32_t arch_idx = 0;
- GetSupportedArchitectureAtIndex(arch_idx, platform_arch);
- ++arch_idx) {
- if (arch.IsExactMatch(platform_arch)) {
- if (compatible_arch_ptr)
- *compatible_arch_ptr = platform_arch;
- return true;
- }
- }
- } else {
- for (uint32_t arch_idx = 0;
- GetSupportedArchitectureAtIndex(arch_idx, platform_arch);
- ++arch_idx) {
- if (arch.IsCompatibleMatch(platform_arch)) {
- if (compatible_arch_ptr)
- *compatible_arch_ptr = platform_arch;
- return true;
- }
+ auto match = exact_arch_match ? &ArchSpec::IsExactMatch
+ : &ArchSpec::IsCompatibleMatch;
+ for (const ArchSpec &platform_arch : GetSupportedArchitectures()) {
+ if ((arch.*match)(platform_arch)) {
+ if (compatible_arch_ptr)
+ *compatible_arch_ptr = platform_arch;
+ return true;
}
}
}
@@ -1225,7 +1267,7 @@ Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
LLDB_LOGF(log, "[PutFile] Using block by block transfer....\n");
auto source_open_options =
- File::eOpenOptionRead | File::eOpenOptionCloseOnExec;
+ File::eOpenOptionReadOnly | File::eOpenOptionCloseOnExec;
namespace fs = llvm::sys::fs;
if (fs::is_symlink_file(source.GetPath()))
source_open_options |= File::eOpenOptionDontFollowSymlinks;
@@ -1240,7 +1282,7 @@ Status Platform::PutFile(const FileSpec &source, const FileSpec &destination,
permissions = lldb::eFilePermissionsFileDefault;
lldb::user_id_t dest_file = OpenFile(
- destination, File::eOpenOptionCanCreate | File::eOpenOptionWrite |
+ destination, File::eOpenOptionCanCreate | File::eOpenOptionWriteOnly |
File::eOpenOptionTruncate | File::eOpenOptionCloseOnExec,
permissions, error);
LLDB_LOGF(log, "dest_file = %" PRIu64 "\n", dest_file);
@@ -1517,12 +1559,13 @@ const std::vector<ConstString> &Platform::GetTrapHandlerSymbolNames() {
return m_trap_handlers;
}
-Status Platform::GetCachedExecutable(
- ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr, Platform &remote_platform) {
+Status
+Platform::GetCachedExecutable(ModuleSpec &module_spec,
+ lldb::ModuleSP &module_sp,
+ const FileSpecList *module_search_paths_ptr) {
const auto platform_spec = module_spec.GetFileSpec();
- const auto error = LoadCachedExecutable(
- module_spec, module_sp, module_search_paths_ptr, remote_platform);
+ const auto error =
+ LoadCachedExecutable(module_spec, module_sp, module_search_paths_ptr);
if (error.Success()) {
module_spec.GetFileSpec() = module_sp->GetFileSpec();
module_spec.GetPlatformFileSpec() = platform_spec;
@@ -1531,15 +1574,17 @@ Status Platform::GetCachedExecutable(
return error;
}
-Status Platform::LoadCachedExecutable(
- const ModuleSpec &module_spec, lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr, Platform &remote_platform) {
- return GetRemoteSharedModule(module_spec, nullptr, module_sp,
- [&](const ModuleSpec &spec) {
- return remote_platform.ResolveExecutable(
- spec, module_sp, module_search_paths_ptr);
- },
- nullptr);
+Status
+Platform::LoadCachedExecutable(const ModuleSpec &module_spec,
+ lldb::ModuleSP &module_sp,
+ const FileSpecList *module_search_paths_ptr) {
+ return GetRemoteSharedModule(
+ module_spec, nullptr, module_sp,
+ [&](const ModuleSpec &spec) {
+ return ResolveRemoteExecutable(spec, module_sp,
+ module_search_paths_ptr);
+ },
+ nullptr);
}
Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec,
@@ -1568,9 +1613,8 @@ Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec,
// architectures that we should be using (in the correct order) and see if
// we can find a match that way
ModuleSpec arch_module_spec(module_spec);
- for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(
- idx, arch_module_spec.GetArchitecture());
- ++idx) {
+ for (const ArchSpec &arch : GetSupportedArchitectures()) {
+ arch_module_spec.GetArchitecture() = arch;
error = ModuleList::GetSharedModule(arch_module_spec, module_sp, nullptr,
nullptr, nullptr);
// Did we find an executable using one of the
@@ -1619,8 +1663,8 @@ Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec,
bool Platform::GetCachedSharedModule(const ModuleSpec &module_spec,
lldb::ModuleSP &module_sp,
bool *did_create_ptr) {
- if (IsHost() || !GetGlobalPlatformProperties()->GetUseModuleCache() ||
- !GetGlobalPlatformProperties()->GetModuleCacheDirectory())
+ if (IsHost() || !GetGlobalPlatformProperties().GetUseModuleCache() ||
+ !GetGlobalPlatformProperties().GetModuleCacheDirectory())
return false;
Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PLATFORM);
@@ -1663,7 +1707,7 @@ Status Platform::DownloadModuleSlice(const FileSpec &src_file_spec,
return error;
}
- auto src_fd = OpenFile(src_file_spec, File::eOpenOptionRead,
+ auto src_fd = OpenFile(src_file_spec, File::eOpenOptionReadOnly,
lldb::eFilePermissionsFileDefault, error);
if (error.Fail()) {
@@ -1704,7 +1748,7 @@ Status Platform::DownloadSymbolFile(const lldb::ModuleSP &module_sp,
}
FileSpec Platform::GetModuleCacheRoot() {
- auto dir_spec = GetGlobalPlatformProperties()->GetModuleCacheDirectory();
+ auto dir_spec = GetGlobalPlatformProperties().GetModuleCacheDirectory();
dir_spec.AppendPathComponent(GetName().AsCString());
return dir_spec;
}
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 8ecc66b592ea..84dc2b94a0eb 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -110,6 +110,19 @@ public:
}
};
+static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = {
+ {
+ eFollowParent,
+ "parent",
+ "Continue tracing the parent process and detach the child.",
+ },
+ {
+ eFollowChild,
+ "child",
+ "Trace the child process and detach the parent.",
+ },
+};
+
#define LLDB_PROPERTIES_process
#include "TargetProperties.inc"
@@ -153,10 +166,10 @@ ProcessProperties::ProcessProperties(lldb_private::Process *process)
m_collection_sp->Initialize(g_process_properties);
m_collection_sp->AppendProperty(
ConstString("thread"), ConstString("Settings specific to threads."),
- true, Thread::GetGlobalProperties()->GetValueProperties());
+ true, Thread::GetGlobalProperties().GetValueProperties());
} else {
m_collection_sp =
- OptionValueProperties::CreateLocalCopy(*Process::GetGlobalProperties());
+ OptionValueProperties::CreateLocalCopy(Process::GetGlobalProperties());
m_collection_sp->SetValueChangedCallback(
ePropertyPythonOSPluginPath,
[this] { m_process->LoadOperatingSystemPlugin(true); });
@@ -334,6 +347,12 @@ void ProcessProperties::SetOSPluginReportsAllThreads(bool does_report) {
nullptr, ePropertyOSPluginReportsAllThreads, does_report);
}
+FollowForkMode ProcessProperties::GetFollowForkMode() const {
+ const uint32_t idx = ePropertyFollowForkMode;
+ return (FollowForkMode)m_collection_sp->GetPropertyAtIndexAsEnumeration(
+ nullptr, idx, g_process_properties[idx].default_uint_value);
+}
+
ProcessSP Process::FindPlugin(lldb::TargetSP target_sp,
llvm::StringRef plugin_name,
ListenerSP listener_sp,
@@ -344,9 +363,8 @@ ProcessSP Process::FindPlugin(lldb::TargetSP target_sp,
ProcessSP process_sp;
ProcessCreateInstance create_callback = nullptr;
if (!plugin_name.empty()) {
- ConstString const_plugin_name(plugin_name);
create_callback =
- PluginManager::GetProcessCreateCallbackForPluginName(const_plugin_name);
+ PluginManager::GetProcessCreateCallbackForPluginName(plugin_name);
if (create_callback) {
process_sp = create_callback(target_sp, listener_sp, crash_file_path,
can_connect);
@@ -481,12 +499,12 @@ Process::~Process() {
m_thread_list.Clear();
}
-const ProcessPropertiesSP &Process::GetGlobalProperties() {
+ProcessProperties &Process::GetGlobalProperties() {
// NOTE: intentional leak so we don't crash if global destructor chain gets
// called as other threads still use the result of this function
- static ProcessPropertiesSP *g_settings_sp_ptr =
- new ProcessPropertiesSP(new ProcessProperties(nullptr));
- return *g_settings_sp_ptr;
+ static ProcessProperties *g_settings_ptr =
+ new ProcessProperties(nullptr);
+ return *g_settings_ptr;
}
void Process::Finalize() {
@@ -1278,6 +1296,17 @@ StateType Process::GetState() {
}
void Process::SetPublicState(StateType new_state, bool restarted) {
+ const bool new_state_is_stopped = StateIsStoppedState(new_state, false);
+ if (new_state_is_stopped) {
+ // This will only set the time if the public stop time has no value, so
+ // it is ok to call this multiple times. With a public stop we can't look
+ // at the stop ID because many private stops might have happened, so we
+ // can't check for a stop ID of zero. This allows the "statistics" command
+ // to dump the time it takes to reach somewhere in your code, like a
+ // breakpoint you set.
+ GetTarget().GetStatistics().SetFirstPublicStopTime();
+ }
+
Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_STATE |
LIBLLDB_LOG_PROCESS));
LLDB_LOGF(log, "Process::SetPublicState (state = %s, restarted = %i)",
@@ -1296,7 +1325,6 @@ void Process::SetPublicState(StateType new_state, bool restarted) {
m_public_run_lock.SetStopped();
} else {
const bool old_state_is_stopped = StateIsStoppedState(old_state, false);
- const bool new_state_is_stopped = StateIsStoppedState(new_state, false);
if ((old_state_is_stopped != new_state_is_stopped)) {
if (new_state_is_stopped && !restarted) {
LLDB_LOGF(log, "Process::SetPublicState (%s) -- unlocking run lock",
@@ -1427,7 +1455,9 @@ void Process::SetPrivateState(StateType new_state) {
// before we get here.
m_thread_list.DidStop();
- m_mod_id.BumpStopID();
+ if (m_mod_id.BumpStopID() == 0)
+ GetTarget().GetStatistics().SetFirstPrivateStopTime();
+
if (!m_mod_id.IsLastResumeForUserExpression())
m_mod_id.SetStopEventForLastNaturalStopID(event_sp);
m_memory_cache.Clear();
@@ -1953,57 +1983,6 @@ size_t Process::ReadCStringFromMemory(addr_t addr, std::string &out_str,
return out_str.size();
}
-size_t Process::ReadStringFromMemory(addr_t addr, char *dst, size_t max_bytes,
- Status &error, size_t type_width) {
- size_t total_bytes_read = 0;
- if (dst && max_bytes && type_width && max_bytes >= type_width) {
- // Ensure a null terminator independent of the number of bytes that is
- // read.
- memset(dst, 0, max_bytes);
- size_t bytes_left = max_bytes - type_width;
-
- const char terminator[4] = {'\0', '\0', '\0', '\0'};
- assert(sizeof(terminator) >= type_width && "Attempting to validate a "
- "string with more than 4 bytes "
- "per character!");
-
- addr_t curr_addr = addr;
- const size_t cache_line_size = m_memory_cache.GetMemoryCacheLineSize();
- char *curr_dst = dst;
-
- error.Clear();
- while (bytes_left > 0 && error.Success()) {
- addr_t cache_line_bytes_left =
- cache_line_size - (curr_addr % cache_line_size);
- addr_t bytes_to_read =
- std::min<addr_t>(bytes_left, cache_line_bytes_left);
- size_t bytes_read = ReadMemory(curr_addr, curr_dst, bytes_to_read, error);
-
- if (bytes_read == 0)
- break;
-
- // Search for a null terminator of correct size and alignment in
- // bytes_read
- size_t aligned_start = total_bytes_read - total_bytes_read % type_width;
- for (size_t i = aligned_start;
- i + type_width <= total_bytes_read + bytes_read; i += type_width)
- if (::memcmp(&dst[i], terminator, type_width) == 0) {
- error.Clear();
- return i;
- }
-
- total_bytes_read += bytes_read;
- curr_dst += bytes_read;
- curr_addr += bytes_read;
- bytes_left -= bytes_read;
- }
- } else {
- if (max_bytes)
- error.SetErrorString("invalid arguments");
- }
- return total_bytes_read;
-}
-
// Deprecated in favor of ReadStringFromMemory which has wchar support and
// correct code to find null terminators.
size_t Process::ReadCStringFromMemory(addr_t addr, char *dst,
@@ -2463,115 +2442,125 @@ Status Process::Launch(ProcessLaunchInfo &launch_info) {
m_process_input_reader.reset();
Module *exe_module = GetTarget().GetExecutableModulePointer();
- if (!exe_module) {
- error.SetErrorString("executable module does not exist");
- return error;
- }
- char local_exec_file_path[PATH_MAX];
- char platform_exec_file_path[PATH_MAX];
- exe_module->GetFileSpec().GetPath(local_exec_file_path,
- sizeof(local_exec_file_path));
- exe_module->GetPlatformFileSpec().GetPath(platform_exec_file_path,
- sizeof(platform_exec_file_path));
- if (FileSystem::Instance().Exists(exe_module->GetFileSpec())) {
+ // The "remote executable path" is hooked up to the local Executable
+ // module. But we should be able to debug a remote process even if the
+ // executable module only exists on the remote. However, there needs to
+ // be a way to express this path, without actually having a module.
+ // The way to do that is to set the ExecutableFile in the LaunchInfo.
+ // Figure that out here:
+
+ FileSpec exe_spec_to_use;
+ if (!exe_module) {
+ if (!launch_info.GetExecutableFile()) {
+ error.SetErrorString("executable module does not exist");
+ return error;
+ }
+ exe_spec_to_use = launch_info.GetExecutableFile();
+ } else
+ exe_spec_to_use = exe_module->GetFileSpec();
+
+ if (exe_module && FileSystem::Instance().Exists(exe_module->GetFileSpec())) {
// Install anything that might need to be installed prior to launching.
// For host systems, this will do nothing, but if we are connected to a
// remote platform it will install any needed binaries
error = GetTarget().Install(&launch_info);
if (error.Fail())
return error;
+ }
+ // Listen and queue events that are broadcasted during the process launch.
+ ListenerSP listener_sp(Listener::MakeListener("LaunchEventHijack"));
+ HijackProcessEvents(listener_sp);
+ auto on_exit = llvm::make_scope_exit([this]() { RestoreProcessEvents(); });
- // Listen and queue events that are broadcasted during the process launch.
- ListenerSP listener_sp(Listener::MakeListener("LaunchEventHijack"));
- HijackProcessEvents(listener_sp);
- auto on_exit = llvm::make_scope_exit([this]() { RestoreProcessEvents(); });
+ if (PrivateStateThreadIsValid())
+ PausePrivateStateThread();
- if (PrivateStateThreadIsValid())
- PausePrivateStateThread();
+ error = WillLaunch(exe_module);
+ if (error.Success()) {
+ const bool restarted = false;
+ SetPublicState(eStateLaunching, restarted);
+ m_should_detach = false;
- error = WillLaunch(exe_module);
- if (error.Success()) {
- const bool restarted = false;
- SetPublicState(eStateLaunching, restarted);
- m_should_detach = false;
+ if (m_public_run_lock.TrySetRunning()) {
+ // Now launch using these arguments.
+ error = DoLaunch(exe_module, launch_info);
+ } else {
+ // This shouldn't happen
+ error.SetErrorString("failed to acquire process run lock");
+ }
- if (m_public_run_lock.TrySetRunning()) {
- // Now launch using these arguments.
- error = DoLaunch(exe_module, launch_info);
- } else {
- // This shouldn't happen
- error.SetErrorString("failed to acquire process run lock");
+ if (error.Fail()) {
+ if (GetID() != LLDB_INVALID_PROCESS_ID) {
+ SetID(LLDB_INVALID_PROCESS_ID);
+ const char *error_string = error.AsCString();
+ if (error_string == nullptr)
+ error_string = "launch failed";
+ SetExitStatus(-1, error_string);
}
+ } else {
+ EventSP event_sp;
- if (error.Fail()) {
- if (GetID() != LLDB_INVALID_PROCESS_ID) {
- SetID(LLDB_INVALID_PROCESS_ID);
- const char *error_string = error.AsCString();
- if (error_string == nullptr)
- error_string = "launch failed";
- SetExitStatus(-1, error_string);
- }
- } else {
- EventSP event_sp;
-
- // Now wait for the process to launch and return control to us, and then
- // call DidLaunch:
- StateType state = WaitForProcessStopPrivate(event_sp, seconds(10));
-
- if (state == eStateInvalid || !event_sp) {
- // We were able to launch the process, but we failed to catch the
- // initial stop.
- error.SetErrorString("failed to catch stop after launch");
- SetExitStatus(0, "failed to catch stop after launch");
- Destroy(false);
- } else if (state == eStateStopped || state == eStateCrashed) {
- DidLaunch();
-
- DynamicLoader *dyld = GetDynamicLoader();
- if (dyld)
- dyld->DidLaunch();
-
- GetJITLoaders().DidLaunch();
-
- SystemRuntime *system_runtime = GetSystemRuntime();
- if (system_runtime)
- system_runtime->DidLaunch();
-
- if (!m_os_up)
- LoadOperatingSystemPlugin(false);
-
- // We successfully launched the process and stopped, now it the
- // right time to set up signal filters before resuming.
- UpdateAutomaticSignalFiltering();
-
- // Note, the stop event was consumed above, but not handled. This
- // was done to give DidLaunch a chance to run. The target is either
- // stopped or crashed. Directly set the state. This is done to
- // prevent a stop message with a bunch of spurious output on thread
- // status, as well as not pop a ProcessIOHandler.
- SetPublicState(state, false);
-
- if (PrivateStateThreadIsValid())
- ResumePrivateStateThread();
- else
- StartPrivateStateThread();
+ // Now wait for the process to launch and return control to us, and then
+ // call DidLaunch:
+ StateType state = WaitForProcessStopPrivate(event_sp, seconds(10));
+
+ if (state == eStateInvalid || !event_sp) {
+ // We were able to launch the process, but we failed to catch the
+ // initial stop.
+ error.SetErrorString("failed to catch stop after launch");
+ SetExitStatus(0, "failed to catch stop after launch");
+ Destroy(false);
+ } else if (state == eStateStopped || state == eStateCrashed) {
+ DidLaunch();
+
+ DynamicLoader *dyld = GetDynamicLoader();
+ if (dyld)
+ dyld->DidLaunch();
+
+ GetJITLoaders().DidLaunch();
+
+ SystemRuntime *system_runtime = GetSystemRuntime();
+ if (system_runtime)
+ system_runtime->DidLaunch();
+
+ if (!m_os_up)
+ LoadOperatingSystemPlugin(false);
+
+ // We successfully launched the process and stopped, now it the
+ // right time to set up signal filters before resuming.
+ UpdateAutomaticSignalFiltering();
+
+ // Note, the stop event was consumed above, but not handled. This
+ // was done to give DidLaunch a chance to run. The target is either
+ // stopped or crashed. Directly set the state. This is done to
+ // prevent a stop message with a bunch of spurious output on thread
+ // status, as well as not pop a ProcessIOHandler.
+ // We are done with the launch hijack listener, and this stop should
+ // go to the public state listener:
+ RestoreProcessEvents();
+ SetPublicState(state, false);
+
+ if (PrivateStateThreadIsValid())
+ ResumePrivateStateThread();
+ else
+ StartPrivateStateThread();
- // Target was stopped at entry as was intended. Need to notify the
- // listeners about it.
- if (state == eStateStopped &&
- launch_info.GetFlags().Test(eLaunchFlagStopAtEntry))
- HandlePrivateEvent(event_sp);
- } else if (state == eStateExited) {
- // We exited while trying to launch somehow. Don't call DidLaunch
- // as that's not likely to work, and return an invalid pid.
+ // Target was stopped at entry as was intended. Need to notify the
+ // listeners about it.
+ if (state == eStateStopped &&
+ launch_info.GetFlags().Test(eLaunchFlagStopAtEntry))
HandlePrivateEvent(event_sp);
- }
+ } else if (state == eStateExited) {
+ // We exited while trying to launch somehow. Don't call DidLaunch
+ // as that's not likely to work, and return an invalid pid.
+ HandlePrivateEvent(event_sp);
}
}
} else {
+ std::string local_exec_file_path = exe_spec_to_use.GetPath();
error.SetErrorStringWithFormat("file doesn't exist: '%s'",
- local_exec_file_path);
+ local_exec_file_path.c_str());
}
return error;
@@ -2625,12 +2614,16 @@ Status Process::LoadCore() {
DynamicLoader *Process::GetDynamicLoader() {
if (!m_dyld_up)
- m_dyld_up.reset(DynamicLoader::FindPlugin(this, nullptr));
+ m_dyld_up.reset(DynamicLoader::FindPlugin(this, ""));
return m_dyld_up.get();
}
DataExtractor Process::GetAuxvData() { return DataExtractor(); }
+llvm::Expected<bool> Process::SaveCore(llvm::StringRef outfile) {
+ return false;
+}
+
JITLoaderList &Process::GetJITLoaders() {
if (!m_jit_loaders_up) {
m_jit_loaders_up = std::make_unique<JITLoaderList>();
@@ -2916,13 +2909,11 @@ void Process::CompleteAttach() {
dyld->DidAttach();
if (log) {
ModuleSP exe_module_sp = GetTarget().GetExecutableModule();
- LLDB_LOGF(log,
- "Process::%s after DynamicLoader::DidAttach(), target "
- "executable is %s (using %s plugin)",
- __FUNCTION__,
- exe_module_sp ? exe_module_sp->GetFileSpec().GetPath().c_str()
- : "<none>",
- dyld->GetPluginName().AsCString("<unnamed>"));
+ LLDB_LOG(log,
+ "after DynamicLoader::DidAttach(), target "
+ "executable is {0} (using {1} plugin)",
+ exe_module_sp ? exe_module_sp->GetFileSpec() : FileSpec(),
+ dyld->GetPluginName());
}
}
@@ -2933,13 +2924,11 @@ void Process::CompleteAttach() {
system_runtime->DidAttach();
if (log) {
ModuleSP exe_module_sp = GetTarget().GetExecutableModule();
- LLDB_LOGF(log,
- "Process::%s after SystemRuntime::DidAttach(), target "
- "executable is %s (using %s plugin)",
- __FUNCTION__,
- exe_module_sp ? exe_module_sp->GetFileSpec().GetPath().c_str()
- : "<none>",
- system_runtime->GetPluginName().AsCString("<unnamed>"));
+ LLDB_LOG(log,
+ "after SystemRuntime::DidAttach(), target "
+ "executable is {0} (using {1} plugin)",
+ exe_module_sp ? exe_module_sp->GetFileSpec() : FileSpec(),
+ system_runtime->GetPluginName());
}
}
@@ -4310,8 +4299,8 @@ public:
: IOHandler(process->GetTarget().GetDebugger(),
IOHandler::Type::ProcessIO),
m_process(process),
- m_read_file(GetInputFD(), File::eOpenOptionRead, false),
- m_write_file(write_fd, File::eOpenOptionWrite, false) {
+ m_read_file(GetInputFD(), File::eOpenOptionReadOnly, false),
+ m_write_file(write_fd, File::eOpenOptionWriteOnly, false) {
m_pipe.CreateNew(false);
}
@@ -4328,11 +4317,11 @@ public:
SetIsDone(false);
const int read_fd = m_read_file.GetDescriptor();
- TerminalState terminal_state;
- terminal_state.Save(read_fd, false);
Terminal terminal(read_fd);
- terminal.SetCanonical(false);
- terminal.SetEcho(false);
+ TerminalState terminal_state(terminal, false);
+ // FIXME: error handling?
+ llvm::consumeError(terminal.SetCanonical(false));
+ llvm::consumeError(terminal.SetEcho(false));
// FD_ZERO, FD_SET are not supported on windows
#ifndef _WIN32
const int pipe_read_fd = m_pipe.GetReadFileDescriptor();
@@ -4376,7 +4365,6 @@ public:
}
m_is_running = false;
#endif
- terminal_state.Restore();
}
void Cancel() override {
@@ -4433,7 +4421,7 @@ public:
protected:
Process *m_process;
NativeFile m_read_file; // Read from this file (usually actual STDIN for LLDB
- NativeFile m_write_file; // Write to this file (usually the master pty for
+ NativeFile m_write_file; // Write to this file (usually the primary pty for
// getting io to debuggee)
Pipe m_pipe;
std::atomic<bool> m_is_running{false};
@@ -4494,7 +4482,8 @@ void Process::SettingsInitialize() { Thread::SettingsInitialize(); }
void Process::SettingsTerminate() { Thread::SettingsTerminate(); }
namespace {
-// RestorePlanState is used to record the "is private", "is master" and "okay
+// RestorePlanState is used to record the "is private", "is controlling" and
+// "okay
// to discard" fields of the plan we are running, and reset it on Clean or on
// destruction. It will only reset the state once, so you can call Clean and
// then monkey with the state and it won't get reset on you again.
@@ -4505,7 +4494,7 @@ public:
: m_thread_plan_sp(thread_plan_sp), m_already_reset(false) {
if (m_thread_plan_sp) {
m_private = m_thread_plan_sp->GetPrivate();
- m_is_master = m_thread_plan_sp->IsMasterPlan();
+ m_is_controlling = m_thread_plan_sp->IsControllingPlan();
m_okay_to_discard = m_thread_plan_sp->OkayToDiscard();
}
}
@@ -4516,7 +4505,7 @@ public:
if (!m_already_reset && m_thread_plan_sp) {
m_already_reset = true;
m_thread_plan_sp->SetPrivate(m_private);
- m_thread_plan_sp->SetIsMasterPlan(m_is_master);
+ m_thread_plan_sp->SetIsControllingPlan(m_is_controlling);
m_thread_plan_sp->SetOkayToDiscard(m_okay_to_discard);
}
}
@@ -4525,7 +4514,7 @@ private:
lldb::ThreadPlanSP m_thread_plan_sp;
bool m_already_reset;
bool m_private;
- bool m_is_master;
+ bool m_is_controlling;
bool m_okay_to_discard;
};
} // anonymous namespace
@@ -4676,11 +4665,11 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx,
thread_plan_sp->SetPrivate(false);
- // The plans run with RunThreadPlan also need to be terminal master plans or
- // when they are done we will end up asking the plan above us whether we
+ // The plans run with RunThreadPlan also need to be terminal controlling plans
+ // or when they are done we will end up asking the plan above us whether we
// should stop, which may give the wrong answer.
- thread_plan_sp->SetIsMasterPlan(true);
+ thread_plan_sp->SetIsControllingPlan(true);
thread_plan_sp->SetOkayToDiscard(false);
// If we are running some utility expression for LLDB, we now have to mark
@@ -5864,6 +5853,13 @@ Process::AdvanceAddressToNextBranchInstruction(Address default_stop_addr,
return retval;
}
+Status Process::GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) {
+ if (auto abi = GetABI())
+ load_addr = abi->FixDataAddress(load_addr);
+ return DoGetMemoryRegionInfo(load_addr, range_info);
+}
+
Status
Process::GetMemoryRegions(lldb_private::MemoryRegionInfos &region_list) {
@@ -5963,11 +5959,8 @@ void Process::MapSupportedStructuredDataPlugins(
m_structured_data_plugin_map.insert(
std::make_pair(type_name, plugin_sp));
names_to_remove.push_back(type_name);
- LLDB_LOGF(log,
- "Process::%s(): using plugin %s for type name "
- "%s",
- __FUNCTION__, plugin_sp->GetPluginName().GetCString(),
- type_name.GetCString());
+ LLDB_LOG(log, "using plugin {0} for type name {1}",
+ plugin_sp->GetPluginName(), type_name);
}
}
@@ -6091,8 +6084,7 @@ llvm::Expected<const MemoryTagManager *> Process::GetMemoryTagManager() {
if (!arch || !tag_manager) {
return llvm::createStringError(
llvm::inconvertibleErrorCode(),
- "This architecture does not support memory tagging",
- GetPluginName().GetCString());
+ "This architecture does not support memory tagging");
}
if (!SupportsMemoryTagging()) {
diff --git a/lldb/source/Target/ProcessTrace.cpp b/lldb/source/Target/ProcessTrace.cpp
index c878a2ac4eb9..41d5b01b61d8 100644
--- a/lldb/source/Target/ProcessTrace.cpp
+++ b/lldb/source/Target/ProcessTrace.cpp
@@ -19,12 +19,7 @@
using namespace lldb;
using namespace lldb_private;
-ConstString ProcessTrace::GetPluginNameStatic() {
- static ConstString g_name("trace");
- return g_name;
-}
-
-const char *ProcessTrace::GetPluginDescriptionStatic() {
+llvm::StringRef ProcessTrace::GetPluginDescriptionStatic() {
return "Trace process plug-in.";
}
@@ -57,10 +52,6 @@ ProcessTrace::~ProcessTrace() {
Finalize();
}
-ConstString ProcessTrace::GetPluginName() { return GetPluginNameStatic(); }
-
-uint32_t ProcessTrace::GetPluginVersion() { return 1; }
-
void ProcessTrace::DidAttach(ArchSpec &process_arch) {
ListenerSP listener_sp(
Listener::MakeListener("lldb.process_trace.did_attach_listener"));
diff --git a/lldb/source/Target/RegisterContext.cpp b/lldb/source/Target/RegisterContext.cpp
index bd50a9486ef3..7364660650e8 100644
--- a/lldb/source/Target/RegisterContext.cpp
+++ b/lldb/source/Target/RegisterContext.cpp
@@ -54,6 +54,17 @@ RegisterContext::GetRegisterInfoByName(llvm::StringRef reg_name,
if (reg_name.empty())
return nullptr;
+ // Generic register names take precedence over specific register names.
+ // For example, on x86 we want "sp" to refer to the complete RSP/ESP register
+ // rather than the 16-bit SP pseudo-register.
+ uint32_t generic_reg = Args::StringToGenericRegister(reg_name);
+ if (generic_reg != LLDB_INVALID_REGNUM) {
+ const RegisterInfo *reg_info =
+ GetRegisterInfo(eRegisterKindGeneric, generic_reg);
+ if (reg_info)
+ return reg_info;
+ }
+
const uint32_t num_registers = GetRegisterCount();
for (uint32_t reg = start_idx; reg < num_registers; ++reg) {
const RegisterInfo *reg_info = GetRegisterInfoAtIndex(reg);
@@ -62,45 +73,8 @@ RegisterContext::GetRegisterInfoByName(llvm::StringRef reg_name,
reg_name.equals_insensitive(reg_info->alt_name))
return reg_info;
}
- return nullptr;
-}
-uint32_t
-RegisterContext::UpdateDynamicRegisterSize(const lldb_private::ArchSpec &arch,
- RegisterInfo *reg_info) {
- ExecutionContext exe_ctx(CalculateThread());
-
- // In MIPS, the floating point registers size is depends on FR bit of SR
- // register. if SR.FR == 1 then all floating point registers are 64 bits.
- // else they are all 32 bits.
-
- int expr_result;
- uint32_t addr_size = arch.GetAddressByteSize();
- const uint8_t *dwarf_opcode_ptr = reg_info->dynamic_size_dwarf_expr_bytes;
- const size_t dwarf_opcode_len = reg_info->dynamic_size_dwarf_len;
-
- DataExtractor dwarf_data(dwarf_opcode_ptr, dwarf_opcode_len,
- arch.GetByteOrder(), addr_size);
- ModuleSP opcode_ctx;
- DWARFExpression dwarf_expr(opcode_ctx, dwarf_data, nullptr);
- Value result;
- Status error;
- if (dwarf_expr.Evaluate(&exe_ctx, this, opcode_ctx, dwarf_data, nullptr,
- eRegisterKindDWARF, nullptr, nullptr, result,
- &error)) {
- expr_result = result.GetScalar().SInt(-1);
- switch (expr_result) {
- case 0:
- return 4;
- case 1:
- return 8;
- default:
- return reg_info->byte_size;
- }
- } else {
- printf("Error executing DwarfExpression::Evaluate %s\n", error.AsCString());
- return reg_info->byte_size;
- }
+ return nullptr;
}
const RegisterInfo *RegisterContext::GetRegisterInfo(lldb::RegisterKind kind,
diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp
index 1ce21e6306e0..96b69640a3a3 100644
--- a/lldb/source/Target/RegisterContextUnwind.cpp
+++ b/lldb/source/Target/RegisterContextUnwind.cpp
@@ -893,13 +893,22 @@ UnwindPlanSP RegisterContextUnwind::GetFullUnwindPlanForFrame() {
return arch_default_unwind_plan_sp;
}
- // If we're in _sigtramp(), unwinding past this frame requires special
- // knowledge. On Mac OS X this knowledge is properly encoded in the eh_frame
- // section, so prefer that if available. On other platforms we may need to
- // provide a platform-specific UnwindPlan which encodes the details of how to
- // unwind out of sigtramp.
if (m_frame_type == eTrapHandlerFrame && process) {
m_fast_unwind_plan_sp.reset();
+
+ // On some platforms the unwind information for signal handlers is not
+ // present or correct. Give the platform plugins a chance to provide
+ // substitute plan. Otherwise, use eh_frame.
+ if (m_sym_ctx_valid) {
+ lldb::PlatformSP platform = process->GetTarget().GetPlatform();
+ unwind_plan_sp = platform->GetTrapHandlerUnwindPlan(
+ process->GetTarget().GetArchitecture().GetTriple(),
+ GetSymbolOrFunctionName(m_sym_ctx));
+
+ if (unwind_plan_sp)
+ return unwind_plan_sp;
+ }
+
unwind_plan_sp =
func_unwinders_sp->GetEHFrameUnwindPlan(process->GetTarget());
if (!unwind_plan_sp)
diff --git a/lldb/source/Target/RemoteAwarePlatform.cpp b/lldb/source/Target/RemoteAwarePlatform.cpp
index b0c43ffa839e..eb39fc6db304 100644
--- a/lldb/source/Target/RemoteAwarePlatform.cpp
+++ b/lldb/source/Target/RemoteAwarePlatform.cpp
@@ -72,8 +72,7 @@ Status RemoteAwarePlatform::ResolveExecutable(
} else {
if (m_remote_platform_sp) {
return GetCachedExecutable(resolved_module_spec, exe_module_sp,
- module_search_paths_ptr,
- *m_remote_platform_sp);
+ module_search_paths_ptr);
}
// We may connect to a process and use the provided executable (Don't use
@@ -154,10 +153,10 @@ Status RemoteAwarePlatform::ResolveExecutable(
if (error.Fail() || !exe_module_sp) {
if (FileSystem::Instance().Readable(
resolved_module_spec.GetFileSpec())) {
- error.SetErrorStringWithFormat(
- "'%s' doesn't contain any '%s' platform architectures: %s",
- resolved_module_spec.GetFileSpec().GetPath().c_str(),
- GetPluginName().GetCString(), arch_names.GetData());
+ error.SetErrorStringWithFormatv(
+ "'{0}' doesn't contain any '{1}' platform architectures: {2}",
+ resolved_module_spec.GetFileSpec(), GetPluginName(),
+ arch_names.GetData());
} else {
error.SetErrorStringWithFormat(
"'%s' is not readable",
@@ -332,18 +331,16 @@ bool RemoteAwarePlatform::GetRemoteOSVersion() {
return false;
}
-bool RemoteAwarePlatform::GetRemoteOSBuildString(std::string &s) {
+llvm::Optional<std::string> RemoteAwarePlatform::GetRemoteOSBuildString() {
if (m_remote_platform_sp)
- return m_remote_platform_sp->GetRemoteOSBuildString(s);
- s.clear();
- return false;
+ return m_remote_platform_sp->GetRemoteOSBuildString();
+ return llvm::None;
}
-bool RemoteAwarePlatform::GetRemoteOSKernelDescription(std::string &s) {
+llvm::Optional<std::string> RemoteAwarePlatform::GetRemoteOSKernelDescription() {
if (m_remote_platform_sp)
- return m_remote_platform_sp->GetRemoteOSKernelDescription(s);
- s.clear();
- return false;
+ return m_remote_platform_sp->GetRemoteOSKernelDescription();
+ return llvm::None;
}
ArchSpec RemoteAwarePlatform::GetRemoteSystemArchitecture() {
diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp
new file mode 100644
index 000000000000..1b205c533519
--- /dev/null
+++ b/lldb/source/Target/Statistics.cpp
@@ -0,0 +1,196 @@
+//===-- Statistics.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/Statistics.h"
+
+#include "lldb/Core/Debugger.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Symbol/SymbolFile.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Target.h"
+#include "lldb/Target/UnixSignals.h"
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace llvm;
+
+static void EmplaceSafeString(llvm::json::Object &obj, llvm::StringRef key,
+ const std::string &str) {
+ if (str.empty())
+ return;
+ if (LLVM_LIKELY(llvm::json::isUTF8(str)))
+ obj.try_emplace(key, str);
+ else
+ obj.try_emplace(key, llvm::json::fixUTF8(str));
+}
+
+json::Value StatsSuccessFail::ToJSON() const {
+ return json::Object{{"successes", successes}, {"failures", failures}};
+}
+
+static double elapsed(const StatsTimepoint &start, const StatsTimepoint &end) {
+ StatsDuration elapsed = end.time_since_epoch() - start.time_since_epoch();
+ return elapsed.count();
+}
+
+void TargetStats::CollectStats(Target &target) {
+ m_module_identifiers.clear();
+ for (ModuleSP module_sp : target.GetImages().Modules())
+ m_module_identifiers.emplace_back((intptr_t)module_sp.get());
+}
+
+json::Value ModuleStats::ToJSON() const {
+ json::Object module;
+ EmplaceSafeString(module, "path", path);
+ EmplaceSafeString(module, "uuid", uuid);
+ EmplaceSafeString(module, "triple", triple);
+ module.try_emplace("identifier", identifier);
+ module.try_emplace("symbolTableParseTime", symtab_parse_time);
+ module.try_emplace("symbolTableIndexTime", symtab_index_time);
+ module.try_emplace("debugInfoParseTime", debug_parse_time);
+ module.try_emplace("debugInfoIndexTime", debug_index_time);
+ module.try_emplace("debugInfoByteSize", (int64_t)debug_info_size);
+ return module;
+}
+
+json::Value TargetStats::ToJSON(Target &target) {
+ CollectStats(target);
+
+ json::Array json_module_uuid_array;
+ for (auto module_identifier : m_module_identifiers)
+ json_module_uuid_array.emplace_back(module_identifier);
+
+ json::Object target_metrics_json{
+ {m_expr_eval.name, m_expr_eval.ToJSON()},
+ {m_frame_var.name, m_frame_var.ToJSON()},
+ {"moduleIdentifiers", std::move(json_module_uuid_array)}};
+
+ if (m_launch_or_attach_time && m_first_private_stop_time) {
+ double elapsed_time =
+ elapsed(*m_launch_or_attach_time, *m_first_private_stop_time);
+ target_metrics_json.try_emplace("launchOrAttachTime", elapsed_time);
+ }
+ if (m_launch_or_attach_time && m_first_public_stop_time) {
+ double elapsed_time =
+ elapsed(*m_launch_or_attach_time, *m_first_public_stop_time);
+ target_metrics_json.try_emplace("firstStopTime", elapsed_time);
+ }
+ target_metrics_json.try_emplace("targetCreateTime", m_create_time.count());
+
+ json::Array breakpoints_array;
+ double totalBreakpointResolveTime = 0.0;
+ // Rport both the normal breakpoint list and the internal breakpoint list.
+ for (int i = 0; i < 2; ++i) {
+ BreakpointList &breakpoints = target.GetBreakpointList(i == 1);
+ std::unique_lock<std::recursive_mutex> lock;
+ breakpoints.GetListMutex(lock);
+ size_t num_breakpoints = breakpoints.GetSize();
+ for (size_t i = 0; i < num_breakpoints; i++) {
+ Breakpoint *bp = breakpoints.GetBreakpointAtIndex(i).get();
+ breakpoints_array.push_back(bp->GetStatistics());
+ totalBreakpointResolveTime += bp->GetResolveTime().count();
+ }
+ }
+
+ ProcessSP process_sp = target.GetProcessSP();
+ if (process_sp) {
+ UnixSignalsSP unix_signals_sp = process_sp->GetUnixSignals();
+ if (unix_signals_sp)
+ target_metrics_json.try_emplace("signals",
+ unix_signals_sp->GetHitCountStatistics());
+ uint32_t stop_id = process_sp->GetStopID();
+ target_metrics_json.try_emplace("stopCount", stop_id);
+ }
+ target_metrics_json.try_emplace("breakpoints", std::move(breakpoints_array));
+ target_metrics_json.try_emplace("totalBreakpointResolveTime",
+ totalBreakpointResolveTime);
+
+ return target_metrics_json;
+}
+
+void TargetStats::SetLaunchOrAttachTime() {
+ m_launch_or_attach_time = StatsClock::now();
+ m_first_private_stop_time = llvm::None;
+}
+
+void TargetStats::SetFirstPrivateStopTime() {
+ // Launching and attaching has many paths depending on if synchronous mode
+ // was used or if we are stopping at the entry point or not. Only set the
+ // first stop time if it hasn't already been set.
+ if (!m_first_private_stop_time)
+ m_first_private_stop_time = StatsClock::now();
+}
+
+void TargetStats::SetFirstPublicStopTime() {
+ // Launching and attaching has many paths depending on if synchronous mode
+ // was used or if we are stopping at the entry point or not. Only set the
+ // first stop time if it hasn't already been set.
+ if (!m_first_public_stop_time)
+ m_first_public_stop_time = StatsClock::now();
+}
+
+bool DebuggerStats::g_collecting_stats = false;
+
+llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger,
+ Target *target) {
+ json::Array json_targets;
+ json::Array json_modules;
+ double symtab_parse_time = 0.0;
+ double symtab_index_time = 0.0;
+ double debug_parse_time = 0.0;
+ double debug_index_time = 0.0;
+ uint64_t debug_info_size = 0;
+ if (target) {
+ json_targets.emplace_back(target->ReportStatistics());
+ } else {
+ for (const auto &target : debugger.GetTargetList().Targets())
+ json_targets.emplace_back(target->ReportStatistics());
+ }
+ std::vector<ModuleStats> modules;
+ std::lock_guard<std::recursive_mutex> guard(
+ Module::GetAllocationModuleCollectionMutex());
+ const size_t num_modules = Module::GetNumberAllocatedModules();
+ for (size_t image_idx = 0; image_idx < num_modules; ++image_idx) {
+ Module *module = Module::GetAllocatedModuleAtIndex(image_idx);
+ ModuleStats module_stat;
+ module_stat.identifier = (intptr_t)module;
+ module_stat.path = module->GetFileSpec().GetPath();
+ if (ConstString object_name = module->GetObjectName()) {
+ module_stat.path.append(1, '(');
+ module_stat.path.append(object_name.GetStringRef().str());
+ module_stat.path.append(1, ')');
+ }
+ module_stat.uuid = module->GetUUID().GetAsString();
+ module_stat.triple = module->GetArchitecture().GetTriple().str();
+ module_stat.symtab_parse_time = module->GetSymtabParseTime().count();
+ module_stat.symtab_index_time = module->GetSymtabIndexTime().count();
+ SymbolFile *sym_file = module->GetSymbolFile();
+ if (sym_file) {
+ module_stat.debug_index_time = sym_file->GetDebugInfoIndexTime().count();
+ module_stat.debug_parse_time = sym_file->GetDebugInfoParseTime().count();
+ module_stat.debug_info_size = sym_file->GetDebugInfoSize();
+ }
+ symtab_parse_time += module_stat.symtab_parse_time;
+ symtab_index_time += module_stat.symtab_index_time;
+ debug_parse_time += module_stat.debug_parse_time;
+ debug_index_time += module_stat.debug_index_time;
+ debug_info_size += module_stat.debug_info_size;
+ json_modules.emplace_back(module_stat.ToJSON());
+ }
+
+ json::Object global_stats{
+ {"targets", std::move(json_targets)},
+ {"modules", std::move(json_modules)},
+ {"totalSymbolTableParseTime", symtab_parse_time},
+ {"totalSymbolTableIndexTime", symtab_index_time},
+ {"totalDebugInfoParseTime", debug_parse_time},
+ {"totalDebugInfoIndexTime", debug_index_time},
+ {"totalDebugInfoByteSize", debug_info_size},
+ };
+ return std::move(global_stats);
+}
diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp
index aeb97f1919eb..1de281b1761f 100644
--- a/lldb/source/Target/StopInfo.cpp
+++ b/lldb/source/Target/StopInfo.cpp
@@ -307,7 +307,7 @@ protected:
// There's one other complication here. We may have run an async
// breakpoint callback that said we should stop. We only want to
- // override that if another breakpoint action says we shouldn't
+ // override that if another breakpoint action says we shouldn't
// stop. If nobody else has an opinion, then we should stop if the
// async callback says we should. An example of this is the async
// shared library load notification breakpoint and the setting
@@ -425,7 +425,7 @@ protected:
}
internal_breakpoint = bp_loc_sp->GetBreakpoint().IsInternal();
-
+
// First run the precondition, but since the precondition is per
// breakpoint, only run it once per breakpoint.
std::pair<std::unordered_set<break_id_t>::iterator, bool> result =
@@ -535,7 +535,7 @@ protected:
else
actually_said_continue = true;
}
-
+
// If we are going to stop for this breakpoint, then remove the
// breakpoint.
if (callback_says_stop && bp_loc_sp &&
@@ -579,7 +579,7 @@ protected:
// Override should_stop decision when we have completed step plan
// additionally to the breakpoint
m_should_stop = true;
-
+
// We know we're stopping for a completed plan and we don't want to
// show the breakpoint stop, so compute the public stop info immediately
// here.
@@ -615,7 +615,7 @@ public:
// performing watchpoint actions.
class WatchpointSentry {
public:
- WatchpointSentry(ProcessSP p_sp, WatchpointSP w_sp) : process_sp(p_sp),
+ WatchpointSentry(ProcessSP p_sp, WatchpointSP w_sp) : process_sp(p_sp),
watchpoint_sp(w_sp) {
if (process_sp && watchpoint_sp) {
const bool notify = false;
@@ -624,7 +624,7 @@ public:
process_sp->AddPreResumeAction(SentryPreResumeAction, this);
}
}
-
+
void DoReenable() {
if (process_sp && watchpoint_sp) {
bool was_disabled = watchpoint_sp->IsDisabledDuringEphemeralMode();
@@ -637,13 +637,13 @@ public:
}
}
}
-
+
~WatchpointSentry() {
DoReenable();
if (process_sp)
process_sp->ClearPreResumeAction(SentryPreResumeAction, this);
}
-
+
static bool SentryPreResumeAction(void *sentry_void) {
WatchpointSentry *sentry = (WatchpointSentry *) sentry_void;
sentry->DoReenable();
@@ -724,14 +724,14 @@ protected:
// course of this code. Also by default we're going to stop, so set that
// here.
m_should_stop = true;
-
+
ThreadSP thread_sp(m_thread_wp.lock());
if (thread_sp) {
WatchpointSP wp_sp(
thread_sp->CalculateTarget()->GetWatchpointList().FindByID(
- GetValue()));
+ GetValue()));
if (wp_sp) {
ExecutionContext exe_ctx(thread_sp->GetStackFrameAtIndex(0));
ProcessSP process_sp = exe_ctx.GetProcessSP();
@@ -764,7 +764,7 @@ protected:
true, // stop_other_threads
new_plan_status));
if (new_plan_sp && new_plan_status.Success()) {
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
new_plan_sp->SetPrivate(true);
}
@@ -889,12 +889,12 @@ protected:
bool old_async = debugger.GetAsyncExecution();
debugger.SetAsyncExecution(true);
-
+
StoppointCallbackContext context(event_ptr, exe_ctx, false);
bool stop_requested = wp_sp->InvokeCallback(&context);
-
+
debugger.SetAsyncExecution(old_async);
-
+
// Also make sure that the callback hasn't continued the target. If
// it did, when we'll set m_should_stop to false and get out of here.
if (HasTargetRunSinceMe())
@@ -1154,6 +1154,103 @@ protected:
bool m_performed_action;
};
+// StopInfoFork
+
+class StopInfoFork : public StopInfo {
+public:
+ StopInfoFork(Thread &thread, lldb::pid_t child_pid, lldb::tid_t child_tid)
+ : StopInfo(thread, child_pid), m_performed_action(false),
+ m_child_pid(child_pid), m_child_tid(child_tid) {}
+
+ ~StopInfoFork() override = default;
+
+ bool ShouldStop(Event *event_ptr) override { return false; }
+
+ StopReason GetStopReason() const override { return eStopReasonFork; }
+
+ const char *GetDescription() override { return "fork"; }
+
+protected:
+ void PerformAction(Event *event_ptr) override {
+ // Only perform the action once
+ if (m_performed_action)
+ return;
+ m_performed_action = true;
+ ThreadSP thread_sp(m_thread_wp.lock());
+ if (thread_sp)
+ thread_sp->GetProcess()->DidFork(m_child_pid, m_child_tid);
+ }
+
+ bool m_performed_action;
+
+private:
+ lldb::pid_t m_child_pid;
+ lldb::tid_t m_child_tid;
+};
+
+// StopInfoVFork
+
+class StopInfoVFork : public StopInfo {
+public:
+ StopInfoVFork(Thread &thread, lldb::pid_t child_pid, lldb::tid_t child_tid)
+ : StopInfo(thread, child_pid), m_performed_action(false),
+ m_child_pid(child_pid), m_child_tid(child_tid) {}
+
+ ~StopInfoVFork() override = default;
+
+ bool ShouldStop(Event *event_ptr) override { return false; }
+
+ StopReason GetStopReason() const override { return eStopReasonVFork; }
+
+ const char *GetDescription() override { return "vfork"; }
+
+protected:
+ void PerformAction(Event *event_ptr) override {
+ // Only perform the action once
+ if (m_performed_action)
+ return;
+ m_performed_action = true;
+ ThreadSP thread_sp(m_thread_wp.lock());
+ if (thread_sp)
+ thread_sp->GetProcess()->DidVFork(m_child_pid, m_child_tid);
+ }
+
+ bool m_performed_action;
+
+private:
+ lldb::pid_t m_child_pid;
+ lldb::tid_t m_child_tid;
+};
+
+// StopInfoVForkDone
+
+class StopInfoVForkDone : public StopInfo {
+public:
+ StopInfoVForkDone(Thread &thread)
+ : StopInfo(thread, 0), m_performed_action(false) {}
+
+ ~StopInfoVForkDone() override = default;
+
+ bool ShouldStop(Event *event_ptr) override { return false; }
+
+ StopReason GetStopReason() const override { return eStopReasonVForkDone; }
+
+ const char *GetDescription() override { return "vforkdone"; }
+
+protected:
+ void PerformAction(Event *event_ptr) override {
+ // Only perform the action once
+ if (m_performed_action)
+ return;
+ m_performed_action = true;
+ ThreadSP thread_sp(m_thread_wp.lock());
+ if (thread_sp)
+ thread_sp->GetProcess()->DidVForkDone();
+ }
+
+ bool m_performed_action;
+};
+
} // namespace lldb_private
StopInfoSP StopInfo::CreateStopReasonWithBreakpointSiteID(Thread &thread,
@@ -1175,6 +1272,7 @@ StopInfo::CreateStopReasonWithWatchpointID(Thread &thread, break_id_t watch_id,
StopInfoSP StopInfo::CreateStopReasonWithSignal(Thread &thread, int signo,
const char *description) {
+ thread.GetProcess()->GetUnixSignals()->IncrementSignalHitCount(signo);
return StopInfoSP(new StopInfoUnixSignal(thread, signo, description));
}
@@ -1203,6 +1301,23 @@ StopInfoSP StopInfo::CreateStopReasonWithExec(Thread &thread) {
return StopInfoSP(new StopInfoExec(thread));
}
+StopInfoSP StopInfo::CreateStopReasonFork(Thread &thread,
+ lldb::pid_t child_pid,
+ lldb::tid_t child_tid) {
+ return StopInfoSP(new StopInfoFork(thread, child_pid, child_tid));
+}
+
+
+StopInfoSP StopInfo::CreateStopReasonVFork(Thread &thread,
+ lldb::pid_t child_pid,
+ lldb::tid_t child_tid) {
+ return StopInfoSP(new StopInfoVFork(thread, child_pid, child_tid));
+}
+
+StopInfoSP StopInfo::CreateStopReasonVForkDone(Thread &thread) {
+ return StopInfoSP(new StopInfoVForkDone(thread));
+}
+
ValueObjectSP StopInfo::GetReturnValueObject(StopInfoSP &stop_info_sp) {
if (stop_info_sp &&
stop_info_sp->GetStopReason() == eStopReasonPlanComplete) {
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 1f8e8c54fa9e..28575b50cf96 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -60,6 +60,7 @@
#include "lldb/Utility/Timer.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
#include <memory>
#include <mutex>
@@ -95,14 +96,10 @@ Target::Target(Debugger &debugger, const ArchSpec &target_arch,
m_watchpoint_list(), m_process_sp(), m_search_filter_sp(),
m_image_search_paths(ImageSearchPathsChanged, this),
m_source_manager_up(), m_stop_hooks(), m_stop_hook_next_id(0),
- m_latest_stop_hook_id(0),
- m_valid(true), m_suppress_stop_hooks(false),
+ m_latest_stop_hook_id(0), m_valid(true), m_suppress_stop_hooks(false),
m_is_dummy_target(is_dummy_target),
m_frame_recognizer_manager_up(
- std::make_unique<StackFrameRecognizerManager>()),
- m_stats_storage(static_cast<int>(StatisticKind::StatisticMax))
-
-{
+ std::make_unique<StackFrameRecognizerManager>()) {
SetEventName(eBroadcastBitBreakpointChanged, "breakpoint-changed");
SetEventName(eBroadcastBitModulesLoaded, "modules-loaded");
SetEventName(eBroadcastBitModulesUnloaded, "modules-unloaded");
@@ -1022,7 +1019,7 @@ Status Target::SerializeBreakpointsToFile(const FileSpec &file,
}
StreamFile out_file(path.c_str(),
- File::eOpenOptionTruncate | File::eOpenOptionWrite |
+ File::eOpenOptionTruncate | File::eOpenOptionWriteOnly |
File::eOpenOptionCanCreate |
File::eOpenOptionCloseOnExec,
lldb::eFilePermissionsFileDefault);
@@ -1400,6 +1397,7 @@ void Target::SetExecutableModule(ModuleSP &executable_sp,
ClearModules(false);
if (executable_sp) {
+ ElapsedTime elapsed(m_stats.GetCreateTime());
LLDB_SCOPED_TIMERF("Target::SetExecutableModule (executable = '%s')",
executable_sp->GetFileSpec().GetPath().c_str());
@@ -1906,6 +1904,68 @@ size_t Target::ReadCStringFromMemory(const Address &addr, char *dst,
return total_cstr_len;
}
+addr_t Target::GetReasonableReadSize(const Address &addr) {
+ addr_t load_addr = addr.GetLoadAddress(this);
+ if (load_addr != LLDB_INVALID_ADDRESS && m_process_sp) {
+ // Avoid crossing cache line boundaries.
+ addr_t cache_line_size = m_process_sp->GetMemoryCacheLineSize();
+ return cache_line_size - (load_addr % cache_line_size);
+ }
+
+ // The read is going to go to the file cache, so we can just pick a largish
+ // value.
+ return 0x1000;
+}
+
+size_t Target::ReadStringFromMemory(const Address &addr, char *dst,
+ size_t max_bytes, Status &error,
+ size_t type_width, bool force_live_memory) {
+ if (!dst || !max_bytes || !type_width || max_bytes < type_width)
+ return 0;
+
+ size_t total_bytes_read = 0;
+
+ // Ensure a null terminator independent of the number of bytes that is
+ // read.
+ memset(dst, 0, max_bytes);
+ size_t bytes_left = max_bytes - type_width;
+
+ const char terminator[4] = {'\0', '\0', '\0', '\0'};
+ assert(sizeof(terminator) >= type_width && "Attempting to validate a "
+ "string with more than 4 bytes "
+ "per character!");
+
+ Address address = addr;
+ char *curr_dst = dst;
+
+ error.Clear();
+ while (bytes_left > 0 && error.Success()) {
+ addr_t bytes_to_read =
+ std::min<addr_t>(bytes_left, GetReasonableReadSize(address));
+ size_t bytes_read =
+ ReadMemory(address, curr_dst, bytes_to_read, error, force_live_memory);
+
+ if (bytes_read == 0)
+ break;
+
+ // Search for a null terminator of correct size and alignment in
+ // bytes_read
+ size_t aligned_start = total_bytes_read - total_bytes_read % type_width;
+ for (size_t i = aligned_start;
+ i + type_width <= total_bytes_read + bytes_read; i += type_width)
+ if (::memcmp(&dst[i], terminator, type_width) == 0) {
+ error.Clear();
+ return i;
+ }
+
+ total_bytes_read += bytes_read;
+ curr_dst += bytes_read;
+ address.Slide(bytes_read);
+ bytes_left -= bytes_read;
+ }
+ return total_bytes_read;
+}
+
size_t Target::ReadScalarIntegerFromMemory(const Address &addr, uint32_t byte_size,
bool is_signed, Scalar &scalar,
Status &error,
@@ -2231,7 +2291,10 @@ std::vector<TypeSystem *> Target::GetScratchTypeSystems(bool create_on_demand) {
if (!m_valid)
return {};
- std::vector<TypeSystem *> scratch_type_systems;
+ // Some TypeSystem instances are associated with several LanguageTypes so
+ // they will show up several times in the loop below. The SetVector filters
+ // out all duplicates as they serve no use for the caller.
+ llvm::SetVector<TypeSystem *> scratch_type_systems;
LanguageSet languages_for_expressions =
Language::GetLanguagesSupportingTypeSystemsForExpressions();
@@ -2247,10 +2310,10 @@ std::vector<TypeSystem *> Target::GetScratchTypeSystems(bool create_on_demand) {
"system available",
Language::GetNameForLanguageType(language));
else
- scratch_type_systems.emplace_back(&type_system_or_err.get());
+ scratch_type_systems.insert(&type_system_or_err.get());
}
- return scratch_type_systems;
+ return scratch_type_systems.takeVector();
}
PersistentExpressionState *
@@ -2345,35 +2408,22 @@ void Target::SettingsInitialize() { Process::SettingsInitialize(); }
void Target::SettingsTerminate() { Process::SettingsTerminate(); }
FileSpecList Target::GetDefaultExecutableSearchPaths() {
- TargetPropertiesSP properties_sp(Target::GetGlobalProperties());
- if (properties_sp)
- return properties_sp->GetExecutableSearchPaths();
- return FileSpecList();
+ return Target::GetGlobalProperties().GetExecutableSearchPaths();
}
FileSpecList Target::GetDefaultDebugFileSearchPaths() {
- TargetPropertiesSP properties_sp(Target::GetGlobalProperties());
- if (properties_sp)
- return properties_sp->GetDebugFileSearchPaths();
- return FileSpecList();
+ return Target::GetGlobalProperties().GetDebugFileSearchPaths();
}
ArchSpec Target::GetDefaultArchitecture() {
- TargetPropertiesSP properties_sp(Target::GetGlobalProperties());
- if (properties_sp)
- return properties_sp->GetDefaultArchitecture();
- return ArchSpec();
+ return Target::GetGlobalProperties().GetDefaultArchitecture();
}
void Target::SetDefaultArchitecture(const ArchSpec &arch) {
- TargetPropertiesSP properties_sp(Target::GetGlobalProperties());
- if (properties_sp) {
- LLDB_LOG(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_TARGET),
- "Target::SetDefaultArchitecture setting target's "
- "default architecture to {0} ({1})",
- arch.GetArchitectureName(), arch.GetTriple().getTriple());
- return properties_sp->SetDefaultArchitecture(arch);
- }
+ LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_TARGET),
+ "setting target's default architecture to {0} ({1})",
+ arch.GetArchitectureName(), arch.GetTriple().getTriple());
+ Target::GetGlobalProperties().SetDefaultArchitecture(arch);
}
Target *Target::GetTargetFromContexts(const ExecutionContext *exe_ctx_ptr,
@@ -2399,8 +2449,10 @@ ExpressionResults Target::EvaluateExpression(
ExpressionResults execution_results = eExpressionSetupError;
- if (expr.empty())
+ if (expr.empty()) {
+ m_stats.GetExpressionStats().NotifyFailure();
return execution_results;
+ }
// We shouldn't run stop hooks in expressions.
bool old_suppress_value = m_suppress_stop_hooks;
@@ -2445,6 +2497,10 @@ ExpressionResults Target::EvaluateExpression(
fixed_expression, ctx_obj);
}
+ if (execution_results == eExpressionCompleted)
+ m_stats.GetExpressionStats().NotifySuccess();
+ else
+ m_stats.GetExpressionStats().NotifyFailure();
return execution_results;
}
@@ -2768,12 +2824,12 @@ bool Target::RunStopHooks() {
return false;
}
-const TargetPropertiesSP &Target::GetGlobalProperties() {
+TargetProperties &Target::GetGlobalProperties() {
// NOTE: intentional leak so we don't crash if global destructor chain gets
// called as other threads still use the result of this function
- static TargetPropertiesSP *g_settings_sp_ptr =
- new TargetPropertiesSP(new TargetProperties(nullptr));
- return *g_settings_sp_ptr;
+ static TargetProperties *g_settings_ptr =
+ new TargetProperties(nullptr);
+ return *g_settings_ptr;
}
Status Target::Install(ProcessLaunchInfo *launch_info) {
@@ -2908,6 +2964,7 @@ bool Target::SetSectionUnloaded(const lldb::SectionSP &section_sp,
void Target::ClearAllLoadedSections() { m_section_load_history.Clear(); }
Status Target::Launch(ProcessLaunchInfo &launch_info, Stream *stream) {
+ m_stats.SetLaunchOrAttachTime();
Status error;
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_TARGET));
@@ -2936,17 +2993,9 @@ Status Target::Launch(ProcessLaunchInfo &launch_info, Stream *stream) {
launch_info.GetFlags().Set(eLaunchFlagDebug);
if (launch_info.IsScriptedProcess()) {
- TargetPropertiesSP properties_sp = GetGlobalProperties();
-
- if (!properties_sp) {
- LLDB_LOGF(log, "Target::%s Couldn't fetch target global properties.",
- __FUNCTION__);
- return error;
- }
-
// Only copy scripted process launch options.
- ProcessLaunchInfo &default_launch_info =
- const_cast<ProcessLaunchInfo &>(properties_sp->GetProcessLaunchInfo());
+ ProcessLaunchInfo &default_launch_info = const_cast<ProcessLaunchInfo &>(
+ GetGlobalProperties().GetProcessLaunchInfo());
default_launch_info.SetProcessPluginName("ScriptedProcess");
default_launch_info.SetScriptedProcessClassName(
@@ -2993,7 +3042,7 @@ Status Target::Launch(ProcessLaunchInfo &launch_info, Stream *stream) {
DeleteCurrentProcess();
m_process_sp =
- GetPlatform()->DebugProcess(launch_info, debugger, this, error);
+ GetPlatform()->DebugProcess(launch_info, debugger, *this, error);
} else {
LLDB_LOGF(log,
@@ -3119,6 +3168,7 @@ llvm::Expected<TraceSP> Target::GetTraceOrCreate() {
}
Status Target::Attach(ProcessAttachInfo &attach_info, Stream *stream) {
+ m_stats.SetLaunchOrAttachTime();
auto state = eStateInvalid;
auto process_sp = GetProcessSP();
if (process_sp) {
@@ -3731,7 +3781,7 @@ TargetProperties::TargetProperties(Target *target)
: Properties(), m_launch_info(), m_target(target) {
if (target) {
m_collection_sp =
- OptionValueProperties::CreateLocalCopy(*Target::GetGlobalProperties());
+ OptionValueProperties::CreateLocalCopy(Target::GetGlobalProperties());
// Set callbacks to update launch_info whenever "settins set" updated any
// of these properties
@@ -3781,7 +3831,7 @@ TargetProperties::TargetProperties(Target *target)
true, m_experimental_properties_up->GetValueProperties());
m_collection_sp->AppendProperty(
ConstString("process"), ConstString("Settings specific to processes."),
- true, Process::GetGlobalProperties()->GetValueProperties());
+ true, Process::GetGlobalProperties().GetValueProperties());
}
}
@@ -3985,6 +4035,45 @@ Environment TargetProperties::GetEnvironment() const {
return ComputeEnvironment();
}
+Environment TargetProperties::GetInheritedEnvironment() const {
+ Environment environment;
+
+ if (m_target == nullptr)
+ return environment;
+
+ if (!m_collection_sp->GetPropertyAtIndexAsBoolean(
+ nullptr, ePropertyInheritEnv,
+ g_target_properties[ePropertyInheritEnv].default_uint_value != 0))
+ return environment;
+
+ PlatformSP platform_sp = m_target->GetPlatform();
+ if (platform_sp == nullptr)
+ return environment;
+
+ Environment platform_environment = platform_sp->GetEnvironment();
+ for (const auto &KV : platform_environment)
+ environment[KV.first()] = KV.second;
+
+ Args property_unset_environment;
+ m_collection_sp->GetPropertyAtIndexAsArgs(nullptr, ePropertyUnsetEnvVars,
+ property_unset_environment);
+ for (const auto &var : property_unset_environment)
+ environment.erase(var.ref());
+
+ return environment;
+}
+
+Environment TargetProperties::GetTargetEnvironment() const {
+ Args property_environment;
+ m_collection_sp->GetPropertyAtIndexAsArgs(nullptr, ePropertyEnvVars,
+ property_environment);
+ Environment environment;
+ for (const auto &KV : Environment(property_environment))
+ environment[KV.first()] = KV.second;
+
+ return environment;
+}
+
void TargetProperties::SetEnvironment(Environment env) {
// TODO: Get rid of the Args intermediate step
const uint32_t idx = ePropertyEnvVars;
@@ -4249,16 +4338,6 @@ void TargetProperties::SetDisplayRecognizedArguments(bool b) {
m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, b);
}
-bool TargetProperties::GetNonStopModeEnabled() const {
- const uint32_t idx = ePropertyNonStopModeEnabled;
- return m_collection_sp->GetPropertyAtIndexAsBoolean(nullptr, idx, false);
-}
-
-void TargetProperties::SetNonStopModeEnabled(bool b) {
- const uint32_t idx = ePropertyNonStopModeEnabled;
- m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, b);
-}
-
const ProcessLaunchInfo &TargetProperties::GetProcessLaunchInfo() const {
return m_launch_info;
}
@@ -4435,3 +4514,6 @@ std::recursive_mutex &Target::GetAPIMutex() {
else
return m_mutex;
}
+
+/// Get metrics associated with this target in JSON format.
+llvm::json::Value Target::ReportStatistics() { return m_stats.ToJSON(*this); }
diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td
index 8f627ad0f1a8..063ba0a6c25a 100644
--- a/lldb/source/Target/TargetProperties.td
+++ b/lldb/source/Target/TargetProperties.td
@@ -163,9 +163,6 @@ let Definition = "target" in {
def DisplayRecognizedArguments: Property<"display-recognized-arguments", "Boolean">,
DefaultFalse,
Desc<"Show recognized arguments in variable listings by default.">;
- def NonStopModeEnabled: Property<"non-stop-mode", "Boolean">,
- DefaultFalse,
- Desc<"Disable lock-step debugging, instead control threads independently.">;
def RequireHardwareBreakpoints: Property<"require-hardware-breakpoint", "Boolean">,
DefaultFalse,
Desc<"Require all breakpoints to be hardware breakpoints.">;
@@ -239,6 +236,10 @@ let Definition = "process" in {
def VirtualAddressableBits: Property<"virtual-addressable-bits", "UInt64">,
DefaultUnsignedValue<0>,
Desc<"The number of bits used for addressing. If the value is 39, then bits 0..38 are used for addressing. The default value of 0 means unspecified.">;
+ def FollowForkMode: Property<"follow-fork-mode", "Enum">,
+ DefaultEnumValue<"eFollowParent">,
+ EnumValues<"OptionEnumValues(g_follow_fork_mode_values)">,
+ Desc<"Debugger's behavior upon fork or vfork.">;
}
let Definition = "platform" in {
diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp
index b423f1b5f1fe..1b32331d98f7 100644
--- a/lldb/source/Target/Thread.cpp
+++ b/lldb/source/Target/Thread.cpp
@@ -55,12 +55,11 @@
using namespace lldb;
using namespace lldb_private;
-const ThreadPropertiesSP &Thread::GetGlobalProperties() {
+ThreadProperties &Thread::GetGlobalProperties() {
// NOTE: intentional leak so we don't crash if global destructor chain gets
// called as other threads still use the result of this function
- static ThreadPropertiesSP *g_settings_sp_ptr =
- new ThreadPropertiesSP(new ThreadProperties(true));
- return *g_settings_sp_ptr;
+ static ThreadProperties *g_settings_ptr = new ThreadProperties(true);
+ return *g_settings_ptr;
}
#define LLDB_PROPERTIES_thread
@@ -103,7 +102,7 @@ ThreadProperties::ThreadProperties(bool is_global) : Properties() {
m_collection_sp->Initialize(g_thread_properties);
} else
m_collection_sp =
- OptionValueProperties::CreateLocalCopy(*Thread::GetGlobalProperties());
+ OptionValueProperties::CreateLocalCopy(Thread::GetGlobalProperties());
}
ThreadProperties::~ThreadProperties() = default;
@@ -845,7 +844,7 @@ bool Thread::ShouldStop(Event *event_ptr) {
// we're done, otherwise we forward this to the next plan in the
// stack below.
done_processing_current_plan =
- (plan_ptr->IsMasterPlan() && !plan_ptr->OkayToDiscard());
+ (plan_ptr->IsControllingPlan() && !plan_ptr->OkayToDiscard());
} else
done_processing_current_plan = true;
@@ -883,11 +882,11 @@ bool Thread::ShouldStop(Event *event_ptr) {
current_plan->GetName());
}
- // If a Master Plan wants to stop, we let it. Otherwise, see if the
- // plan's parent wants to stop.
+ // If a Controlling Plan wants to stop, we let it. Otherwise, see if
+ // the plan's parent wants to stop.
PopPlan();
- if (should_stop && current_plan->IsMasterPlan() &&
+ if (should_stop && current_plan->IsControllingPlan() &&
!current_plan->OkayToDiscard()) {
break;
}
@@ -906,8 +905,8 @@ bool Thread::ShouldStop(Event *event_ptr) {
should_stop = false;
}
- // One other potential problem is that we set up a master plan, then stop in
- // before it is complete - for instance by hitting a breakpoint during a
+ // One other potential problem is that we set up a controlling plan, then stop
+ // in before it is complete - for instance by hitting a breakpoint during a
// step-over - then do some step/finish/etc operations that wind up past the
// end point condition of the initial plan. We don't want to strand the
// original plan on the stack, This code clears stale plans off the stack.
@@ -1215,7 +1214,7 @@ void Thread::DiscardThreadPlans(bool force) {
GetPlans().DiscardAllPlans();
return;
}
- GetPlans().DiscardConsultingMasterPlans();
+ GetPlans().DiscardConsultingControllingPlans();
}
Status Thread::UnwindInnermostExpression() {
@@ -1915,7 +1914,7 @@ Status Thread::StepIn(bool source_step,
false, abort_other_plans, run_mode, error);
}
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
// Why do we need to set the current thread by ID here???
@@ -1948,7 +1947,7 @@ Status Thread::StepOver(bool source_step,
true, abort_other_plans, run_mode, error);
}
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
// Why do we need to set the current thread by ID here???
@@ -1972,7 +1971,7 @@ Status Thread::StepOut() {
abort_other_plans, nullptr, first_instruction, stop_other_threads,
eVoteYes, eVoteNoOpinion, 0, error));
- new_plan_sp->SetIsMasterPlan(true);
+ new_plan_sp->SetIsControllingPlan(true);
new_plan_sp->SetOkayToDiscard(false);
// Why do we need to set the current thread by ID here???
diff --git a/lldb/source/Target/ThreadPlan.cpp b/lldb/source/Target/ThreadPlan.cpp
index 6b55f3912d11..3b42831f1fbf 100644
--- a/lldb/source/Target/ThreadPlan.cpp
+++ b/lldb/source/Target/ThreadPlan.cpp
@@ -26,8 +26,8 @@ ThreadPlan::ThreadPlan(ThreadPlanKind kind, const char *name, Thread &thread,
m_takes_iteration_count(false), m_could_not_resolve_hw_bp(false),
m_thread(&thread), m_kind(kind), m_name(name), m_plan_complete_mutex(),
m_cached_plan_explains_stop(eLazyBoolCalculate), m_plan_complete(false),
- m_plan_private(false), m_okay_to_discard(true), m_is_master_plan(false),
- m_plan_succeeded(true) {
+ m_plan_private(false), m_okay_to_discard(true),
+ m_is_controlling_plan(false), m_plan_succeeded(true) {
SetID(GetNextID());
}
@@ -149,10 +149,10 @@ lldb::user_id_t ThreadPlan::GetNextID() {
void ThreadPlan::DidPush() {}
-void ThreadPlan::WillPop() {}
+void ThreadPlan::DidPop() {}
bool ThreadPlan::OkayToDiscard() {
- return IsMasterPlan() ? m_okay_to_discard : true;
+ return IsControllingPlan() ? m_okay_to_discard : true;
}
lldb::StateType ThreadPlan::RunState() {
diff --git a/lldb/source/Target/ThreadPlanBase.cpp b/lldb/source/Target/ThreadPlanBase.cpp
index c6c4d97c1655..46ae9c32a0de 100644
--- a/lldb/source/Target/ThreadPlanBase.cpp
+++ b/lldb/source/Target/ThreadPlanBase.cpp
@@ -40,7 +40,7 @@ ThreadPlanBase::ThreadPlanBase(Thread &thread)
#endif
new_tracer_sp->EnableTracing(thread.GetTraceEnabledState());
SetThreadPlanTracer(new_tracer_sp);
- SetIsMasterPlan(true);
+ SetIsControllingPlan(true);
}
ThreadPlanBase::~ThreadPlanBase() = default;
@@ -90,8 +90,8 @@ bool ThreadPlanBase::ShouldStop(Event *event_ptr) {
case eStopReasonWatchpoint:
if (stop_info_sp->ShouldStopSynchronous(event_ptr)) {
// If we are going to stop for a breakpoint, then unship the other
- // plans at this point. Don't force the discard, however, so Master
- // plans can stay in place if they want to.
+ // plans at this point. Don't force the discard, however, so
+ // Controlling plans can stay in place if they want to.
LLDB_LOGF(
log,
"Base plan discarding thread plans for thread tid = 0x%4.4" PRIx64
diff --git a/lldb/source/Target/ThreadPlanCallFunction.cpp b/lldb/source/Target/ThreadPlanCallFunction.cpp
index 3699a507d058..0336a9daf10a 100644
--- a/lldb/source/Target/ThreadPlanCallFunction.cpp
+++ b/lldb/source/Target/ThreadPlanCallFunction.cpp
@@ -33,7 +33,7 @@ using namespace lldb_private;
bool ThreadPlanCallFunction::ConstructorSetup(
Thread &thread, ABI *&abi, lldb::addr_t &start_load_addr,
lldb::addr_t &function_load_addr) {
- SetIsMasterPlan(true);
+ SetIsControllingPlan(true);
SetOkayToDiscard(false);
SetPrivate(true);
@@ -209,7 +209,7 @@ void ThreadPlanCallFunction::DoTakedown(bool success) {
}
}
-void ThreadPlanCallFunction::WillPop() { DoTakedown(PlanSucceeded()); }
+void ThreadPlanCallFunction::DidPop() { DoTakedown(PlanSucceeded()); }
void ThreadPlanCallFunction::GetDescription(Stream *s, DescriptionLevel level) {
if (level == eDescriptionLevelBrief) {
diff --git a/lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp b/lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp
index 7471e9b3d7ac..4bccf96d721b 100644
--- a/lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp
+++ b/lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp
@@ -18,7 +18,7 @@ ThreadPlanCallOnFunctionExit::ThreadPlanCallOnFunctionExit(
),
m_callback(callback) {
// We are not a user-generated plan.
- SetIsMasterPlan(false);
+ SetIsControllingPlan(false);
}
void ThreadPlanCallOnFunctionExit::DidPush() {
diff --git a/lldb/source/Target/ThreadPlanCallUserExpression.cpp b/lldb/source/Target/ThreadPlanCallUserExpression.cpp
index 9dddd850b6ab..d833a4d7ed27 100644
--- a/lldb/source/Target/ThreadPlanCallUserExpression.cpp
+++ b/lldb/source/Target/ThreadPlanCallUserExpression.cpp
@@ -39,7 +39,7 @@ ThreadPlanCallUserExpression::ThreadPlanCallUserExpression(
m_user_expression_sp(user_expression_sp) {
// User expressions are generally "User generated" so we should set them up
// to stop when done.
- SetIsMasterPlan(true);
+ SetIsControllingPlan(true);
SetOkayToDiscard(false);
}
@@ -59,8 +59,8 @@ void ThreadPlanCallUserExpression::DidPush() {
m_user_expression_sp->WillStartExecuting();
}
-void ThreadPlanCallUserExpression::WillPop() {
- ThreadPlanCallFunction::WillPop();
+void ThreadPlanCallUserExpression::DidPop() {
+ ThreadPlanCallFunction::DidPop();
if (m_user_expression_sp)
m_user_expression_sp.reset();
}
diff --git a/lldb/source/Target/ThreadPlanPython.cpp b/lldb/source/Target/ThreadPlanPython.cpp
index e83f0e9e715e..cd63d28a3934 100644
--- a/lldb/source/Target/ThreadPlanPython.cpp
+++ b/lldb/source/Target/ThreadPlanPython.cpp
@@ -31,7 +31,7 @@ ThreadPlanPython::ThreadPlanPython(Thread &thread, const char *class_name,
eVoteNoOpinion, eVoteNoOpinion),
m_class_name(class_name), m_args_data(args_data), m_did_push(false),
m_stop_others(false) {
- SetIsMasterPlan(true);
+ SetIsControllingPlan(true);
SetOkayToDiscard(true);
SetPrivate(false);
}
diff --git a/lldb/source/Target/ThreadPlanStack.cpp b/lldb/source/Target/ThreadPlanStack.cpp
index d25602d25b91..f09583cc50cc 100644
--- a/lldb/source/Target/ThreadPlanStack.cpp
+++ b/lldb/source/Target/ThreadPlanStack.cpp
@@ -150,10 +150,13 @@ lldb::ThreadPlanSP ThreadPlanStack::PopPlan() {
std::lock_guard<std::recursive_mutex> guard(m_stack_mutex);
assert(m_plans.size() > 1 && "Can't pop the base thread plan");
- lldb::ThreadPlanSP plan_sp = std::move(m_plans.back());
- m_completed_plans.push_back(plan_sp);
- plan_sp->WillPop();
+ // Note that moving the top element of the vector would leave it in an
+ // undefined state, and break the guarantee that the stack's thread plans are
+ // all valid.
+ lldb::ThreadPlanSP plan_sp = m_plans.back();
m_plans.pop_back();
+ m_completed_plans.push_back(plan_sp);
+ plan_sp->DidPop();
return plan_sp;
}
@@ -161,10 +164,13 @@ lldb::ThreadPlanSP ThreadPlanStack::DiscardPlan() {
std::lock_guard<std::recursive_mutex> guard(m_stack_mutex);
assert(m_plans.size() > 1 && "Can't discard the base thread plan");
- lldb::ThreadPlanSP plan_sp = std::move(m_plans.back());
- m_discarded_plans.push_back(plan_sp);
- plan_sp->WillPop();
+ // Note that moving the top element of the vector would leave it in an
+ // undefined state, and break the guarantee that the stack's thread plans are
+ // all valid.
+ lldb::ThreadPlanSP plan_sp = m_plans.back();
m_plans.pop_back();
+ m_discarded_plans.push_back(plan_sp);
+ plan_sp->DidPop();
return plan_sp;
}
@@ -207,35 +213,35 @@ void ThreadPlanStack::DiscardAllPlans() {
return;
}
-void ThreadPlanStack::DiscardConsultingMasterPlans() {
+void ThreadPlanStack::DiscardConsultingControllingPlans() {
std::lock_guard<std::recursive_mutex> guard(m_stack_mutex);
while (true) {
- int master_plan_idx;
+ int controlling_plan_idx;
bool discard = true;
- // Find the first master plan, see if it wants discarding, and if yes
+ // Find the first controlling plan, see if it wants discarding, and if yes
// discard up to it.
- for (master_plan_idx = m_plans.size() - 1; master_plan_idx >= 0;
- master_plan_idx--) {
- if (m_plans[master_plan_idx]->IsMasterPlan()) {
- discard = m_plans[master_plan_idx]->OkayToDiscard();
+ for (controlling_plan_idx = m_plans.size() - 1; controlling_plan_idx >= 0;
+ controlling_plan_idx--) {
+ if (m_plans[controlling_plan_idx]->IsControllingPlan()) {
+ discard = m_plans[controlling_plan_idx]->OkayToDiscard();
break;
}
}
- // If the master plan doesn't want to get discarded, then we're done.
+ // If the controlling plan doesn't want to get discarded, then we're done.
if (!discard)
return;
// First pop all the dependent plans:
- for (int i = m_plans.size() - 1; i > master_plan_idx; i--) {
+ for (int i = m_plans.size() - 1; i > controlling_plan_idx; i--) {
DiscardPlan();
}
- // Now discard the master plan itself.
+ // Now discard the controlling plan itself.
// The bottom-most plan never gets discarded. "OkayToDiscard" for it
// means discard it's dependent plans, but not it...
- if (master_plan_idx > 0) {
+ if (controlling_plan_idx > 0) {
DiscardPlan();
}
}
diff --git a/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp b/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp
index 965a7b3a9960..f007b0fa9371 100644
--- a/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp
+++ b/lldb/source/Target/ThreadPlanStepOverBreakpoint.cpp
@@ -124,9 +124,7 @@ bool ThreadPlanStepOverBreakpoint::WillStop() {
return true;
}
-void ThreadPlanStepOverBreakpoint::WillPop() {
- ReenableBreakpointSite();
-}
+void ThreadPlanStepOverBreakpoint::DidPop() { ReenableBreakpointSite(); }
bool ThreadPlanStepOverBreakpoint::MischiefManaged() {
lldb::addr_t pc_addr = GetThread().GetRegisterContext()->GetPC();
diff --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp
index 827f3264c096..38b3a7cb006d 100644
--- a/lldb/source/Target/Trace.cpp
+++ b/lldb/source/Target/Trace.cpp
@@ -67,30 +67,28 @@ Trace::FindPluginForPostMortemProcess(Debugger &debugger,
if (!json::fromJSON(trace_session_file, json_session, root))
return root.getError();
- ConstString plugin_name(json_session.trace.type);
- if (auto create_callback = PluginManager::GetTraceCreateCallback(plugin_name))
+ if (auto create_callback =
+ PluginManager::GetTraceCreateCallback(json_session.trace.type))
return create_callback(trace_session_file, session_file_dir, debugger);
return createInvalidPlugInError(json_session.trace.type);
}
-Expected<lldb::TraceSP>
-Trace::FindPluginForLiveProcess(llvm::StringRef plugin_name, Process &process) {
+Expected<lldb::TraceSP> Trace::FindPluginForLiveProcess(llvm::StringRef name,
+ Process &process) {
if (!process.IsLiveDebugSession())
return createStringError(inconvertibleErrorCode(),
"Can't trace non-live processes");
- ConstString name(plugin_name);
if (auto create_callback =
PluginManager::GetTraceCreateCallbackForLiveProcess(name))
return create_callback(process);
- return createInvalidPlugInError(plugin_name);
+ return createInvalidPlugInError(name);
}
Expected<StringRef> Trace::FindPluginSchema(StringRef name) {
- ConstString plugin_name(name);
- StringRef schema = PluginManager::GetTraceSchema(plugin_name);
+ StringRef schema = PluginManager::GetTraceSchema(name);
if (!schema.empty())
return schema;
@@ -108,23 +106,21 @@ Error Trace::Stop() {
if (!m_live_process)
return createStringError(inconvertibleErrorCode(),
"Tracing requires a live process.");
- return m_live_process->TraceStop(
- TraceStopRequest(GetPluginName().AsCString()));
+ return m_live_process->TraceStop(TraceStopRequest(GetPluginName()));
}
Error Trace::Stop(llvm::ArrayRef<lldb::tid_t> tids) {
if (!m_live_process)
return createStringError(inconvertibleErrorCode(),
"Tracing requires a live process.");
- return m_live_process->TraceStop(
- TraceStopRequest(GetPluginName().AsCString(), tids));
+ return m_live_process->TraceStop(TraceStopRequest(GetPluginName(), tids));
}
Expected<std::string> Trace::GetLiveProcessState() {
if (!m_live_process)
return createStringError(inconvertibleErrorCode(),
"Tracing requires a live process.");
- return m_live_process->TraceGetState(GetPluginName().AsCString());
+ return m_live_process->TraceGetState(GetPluginName());
}
Optional<size_t> Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid,
@@ -158,7 +154,7 @@ Trace::GetLiveThreadBinaryData(lldb::tid_t tid, llvm::StringRef kind) {
"Tracing data \"%s\" is not available for thread %" PRIu64 ".",
kind.data(), tid);
- TraceGetBinaryDataRequest request{GetPluginName().AsCString(), kind.str(),
+ TraceGetBinaryDataRequest request{GetPluginName().str(), kind.str(),
static_cast<int64_t>(tid), 0,
static_cast<int64_t>(*size)};
return m_live_process->TraceGetBinaryData(request);
@@ -175,8 +171,8 @@ Trace::GetLiveProcessBinaryData(llvm::StringRef kind) {
inconvertibleErrorCode(),
"Tracing data \"%s\" is not available for the process.", kind.data());
- TraceGetBinaryDataRequest request{GetPluginName().AsCString(), kind.str(),
- None, 0, static_cast<int64_t>(*size)};
+ TraceGetBinaryDataRequest request{GetPluginName().str(), kind.str(), None, 0,
+ static_cast<int64_t>(*size)};
return m_live_process->TraceGetBinaryData(request);
}
diff --git a/lldb/source/Target/TraceExporter.cpp b/lldb/source/Target/TraceExporter.cpp
index 1a6571dba4a0..8c925aa495b0 100644
--- a/lldb/source/Target/TraceExporter.cpp
+++ b/lldb/source/Target/TraceExporter.cpp
@@ -22,11 +22,10 @@ static Error createInvalidPlugInError(StringRef plugin_name) {
}
Expected<lldb::TraceExporterUP>
-TraceExporter::FindPlugin(llvm::StringRef plugin_name) {
- ConstString name(plugin_name);
+TraceExporter::FindPlugin(llvm::StringRef name) {
if (auto create_callback =
PluginManager::GetTraceExporterCreateCallback(name))
return create_callback();
- return createInvalidPlugInError(plugin_name);
+ return createInvalidPlugInError(name);
}
diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp
index 4ec2e25c7e3b..26ff0bbd3825 100644
--- a/lldb/source/Target/UnixSignals.cpp
+++ b/lldb/source/Target/UnixSignals.cpp
@@ -12,10 +12,10 @@
#include "Plugins/Process/Utility/MipsLinuxSignals.h"
#include "Plugins/Process/Utility/NetBSDSignals.h"
#include "lldb/Host/HostInfo.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Utility/ArchSpec.h"
using namespace lldb_private;
+using namespace llvm;
UnixSignals::Signal::Signal(const char *name, bool default_suppress,
bool default_stop, bool default_notify,
@@ -156,9 +156,8 @@ int32_t UnixSignals::GetSignalNumberFromName(const char *name) const {
return pos->first;
}
- const int32_t signo =
- StringConvert::ToSInt32(name, LLDB_INVALID_SIGNAL_NUMBER, 0);
- if (signo != LLDB_INVALID_SIGNAL_NUMBER)
+ int32_t signo;
+ if (llvm::to_integer(name, signo))
return signo;
return LLDB_INVALID_SIGNAL_NUMBER;
}
@@ -314,3 +313,20 @@ UnixSignals::GetFilteredSignals(llvm::Optional<bool> should_suppress,
return result;
}
+
+void UnixSignals::IncrementSignalHitCount(int signo) {
+ collection::iterator pos = m_signals.find(signo);
+ if (pos != m_signals.end())
+ pos->second.m_hit_count += 1;
+}
+
+json::Value UnixSignals::GetHitCountStatistics() const {
+ json::Array json_signals;
+ for (const auto &pair: m_signals) {
+ if (pair.second.m_hit_count > 0)
+ json_signals.emplace_back(json::Object{
+ { pair.second.m_name.GetCString(), pair.second.m_hit_count }
+ });
+ }
+ return std::move(json_signals);
+}
diff --git a/lldb/source/Utility/Environment.cpp b/lldb/source/Utility/Environment.cpp
index 5666c2c12ffd..419e0745671a 100644
--- a/lldb/source/Utility/Environment.cpp
+++ b/lldb/source/Utility/Environment.cpp
@@ -41,7 +41,7 @@ Environment::Environment(const char *const *Env) {
insert(*Env++);
}
-void Environment::insert(const_iterator first, const_iterator last) {
+void Environment::insert(iterator first, iterator last) {
while (first != last) {
try_emplace(first->first(), first->second);
++first;
diff --git a/lldb/source/Utility/FileSpec.cpp b/lldb/source/Utility/FileSpec.cpp
index bea3c6d6268b..601edb86c1b0 100644
--- a/lldb/source/Utility/FileSpec.cpp
+++ b/lldb/source/Utility/FileSpec.cpp
@@ -43,9 +43,7 @@ static constexpr FileSpec::Style GetNativeStyle() {
}
bool PathStyleIsPosix(FileSpec::Style style) {
- return (style == FileSpec::Style::posix ||
- (style == FileSpec::Style::native &&
- GetNativeStyle() == FileSpec::Style::posix));
+ return llvm::sys::path::is_style_posix(style);
}
const char *GetPathSeparators(FileSpec::Style style) {
diff --git a/lldb/source/Utility/ReproducerInstrumentation.cpp b/lldb/source/Utility/ReproducerInstrumentation.cpp
index e5bd2ba4b625..b3285f4b3776 100644
--- a/lldb/source/Utility/ReproducerInstrumentation.cpp
+++ b/lldb/source/Utility/ReproducerInstrumentation.cpp
@@ -16,6 +16,9 @@
using namespace lldb_private;
using namespace lldb_private::repro;
+// Whether we're currently across the API boundary.
+static thread_local bool g_global_boundary = false;
+
void *IndexToObject::GetObjectForIndexImpl(unsigned idx) {
return m_mapping.lookup(idx);
}
@@ -227,6 +230,13 @@ unsigned Recorder::GetSequenceNumber() const {
return m_sequence;
}
+void Recorder::PrivateThread() { g_global_boundary = true; }
+
+void Recorder::UpdateBoundary() {
+ if (m_local_boundary)
+ g_global_boundary = false;
+}
+
void InstrumentationData::Initialize(Serializer &serializer,
Registry &registry) {
InstanceImpl().emplace(serializer, registry);
@@ -248,6 +258,5 @@ llvm::Optional<InstrumentationData> &InstrumentationData::InstanceImpl() {
return g_instrumentation_data;
}
-thread_local bool lldb_private::repro::Recorder::g_global_boundary = false;
std::atomic<unsigned> lldb_private::repro::Recorder::g_sequence;
std::mutex lldb_private::repro::Recorder::g_mutex;
diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp
index e0b26e89f3c1..19e00e111be5 100644
--- a/lldb/source/Utility/Scalar.cpp
+++ b/lldb/source/Utility/Scalar.cpp
@@ -714,7 +714,7 @@ Status Scalar::SetValueFromData(const DataExtractor &data,
return Status("insufficient data");
m_type = e_int;
m_integer =
- APSInt(APInt::getNullValue(8 * byte_size), encoding == eEncodingUint);
+ APSInt(APInt::getZero(8 * byte_size), encoding == eEncodingUint);
if (data.GetByteOrder() == endian::InlHostByteOrder()) {
llvm::LoadIntFromMemory(m_integer, data.GetDataStart(), byte_size);
} else {
diff --git a/lldb/source/Utility/Status.cpp b/lldb/source/Utility/Status.cpp
index 72fd087decc0..e6d381421f28 100644
--- a/lldb/source/Utility/Status.cpp
+++ b/lldb/source/Utility/Status.cpp
@@ -287,10 +287,6 @@ int Status::SetErrorStringWithVarArg(const char *format, va_list args) {
// return value.
bool Status::Success() const { return m_code == 0; }
-bool Status::WasInterrupted() const {
- return (m_type == eErrorTypePOSIX && m_code == EINTR);
-}
-
void llvm::format_provider<lldb_private::Status>::format(
const lldb_private::Status &error, llvm::raw_ostream &OS,
llvm::StringRef Options) {
diff --git a/lldb/source/Utility/StringExtractorGDBRemote.cpp b/lldb/source/Utility/StringExtractorGDBRemote.cpp
index 29cf585bea56..d6bbf7171916 100644
--- a/lldb/source/Utility/StringExtractorGDBRemote.cpp
+++ b/lldb/source/Utility/StringExtractorGDBRemote.cpp
@@ -260,6 +260,8 @@ StringExtractorGDBRemote::GetServerPacketType() const {
break;
case 'S':
+ if (PACKET_STARTS_WITH("qSaveCore"))
+ return eServerPacketType_qLLDBSaveCore;
if (PACKET_STARTS_WITH("qSpeedTest:"))
return eServerPacketType_qSpeedTest;
if (PACKET_MATCHES("qShlibInfoAddr"))
@@ -337,6 +339,8 @@ StringExtractorGDBRemote::GetServerPacketType() const {
return eServerPacketType_vFile_size;
else if (PACKET_STARTS_WITH("vFile:exists"))
return eServerPacketType_vFile_exists;
+ else if (PACKET_STARTS_WITH("vFile:fstat"))
+ return eServerPacketType_vFile_fstat;
else if (PACKET_STARTS_WITH("vFile:stat"))
return eServerPacketType_vFile_stat;
else if (PACKET_STARTS_WITH("vFile:mode"))
@@ -361,6 +365,8 @@ StringExtractorGDBRemote::GetServerPacketType() const {
return eServerPacketType_vCont;
if (PACKET_MATCHES("vCont?"))
return eServerPacketType_vCont_actions;
+ if (PACKET_STARTS_WITH("vRun;"))
+ return eServerPacketType_vRun;
}
break;
case '_':
diff --git a/lldb/source/Utility/Timer.cpp b/lldb/source/Utility/Timer.cpp
index b59ce3b9f556..2f3afe4c8703 100644
--- a/lldb/source/Utility/Timer.cpp
+++ b/lldb/source/Utility/Timer.cpp
@@ -33,8 +33,6 @@ static std::atomic<Timer::Category *> g_categories;
/// Allows llvm::Timer to emit signposts when supported.
static llvm::ManagedStatic<llvm::SignpostEmitter> Signposts;
-llvm::SignpostEmitter &lldb_private::GetSignposts() { return *Signposts; }
-
std::atomic<bool> Timer::g_quiet(true);
std::atomic<unsigned> Timer::g_display_depth(0);
static std::mutex &GetFileMutex() {
@@ -61,6 +59,7 @@ void Timer::SetQuiet(bool value) { g_quiet = value; }
Timer::Timer(Timer::Category &category, const char *format, ...)
: m_category(category), m_total_start(std::chrono::steady_clock::now()) {
+ Signposts->startInterval(this, m_category.GetName());
TimerStack &stack = GetTimerStackForCurrentThread();
stack.push_back(this);
@@ -87,6 +86,8 @@ Timer::~Timer() {
auto total_dur = stop_time - m_total_start;
auto timer_dur = total_dur - m_child_duration;
+ Signposts->endInterval(this, m_category.GetName());
+
TimerStack &stack = GetTimerStackForCurrentThread();
if (g_quiet && stack.size() <= g_display_depth) {
std::lock_guard<std::mutex> lock(GetFileMutex());
diff --git a/lldb/source/Utility/UriParser.cpp b/lldb/source/Utility/UriParser.cpp
index c6ed24985896..cfb9009898d2 100644
--- a/lldb/source/Utility/UriParser.cpp
+++ b/lldb/source/Utility/UriParser.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "lldb/Utility/UriParser.h"
+#include "llvm/Support/raw_ostream.h"
#include <string>
@@ -15,25 +16,30 @@
using namespace lldb_private;
-// UriParser::Parse
-bool UriParser::Parse(llvm::StringRef uri, llvm::StringRef &scheme,
- llvm::StringRef &hostname, int &port,
- llvm::StringRef &path) {
- llvm::StringRef tmp_scheme, tmp_hostname, tmp_path;
+llvm::raw_ostream &lldb_private::operator<<(llvm::raw_ostream &OS,
+ const URI &U) {
+ OS << U.scheme << "://[" << U.hostname << ']';
+ if (U.port)
+ OS << ':' << U.port.getValue();
+ return OS << U.path;
+}
+
+llvm::Optional<URI> URI::Parse(llvm::StringRef uri) {
+ URI ret;
const llvm::StringRef kSchemeSep("://");
auto pos = uri.find(kSchemeSep);
if (pos == std::string::npos)
- return false;
+ return llvm::None;
// Extract path.
- tmp_scheme = uri.substr(0, pos);
+ ret.scheme = uri.substr(0, pos);
auto host_pos = pos + kSchemeSep.size();
auto path_pos = uri.find('/', host_pos);
if (path_pos != std::string::npos)
- tmp_path = uri.substr(path_pos);
+ ret.path = uri.substr(path_pos);
else
- tmp_path = "/";
+ ret.path = "/";
auto host_port = uri.substr(
host_pos,
@@ -44,27 +50,24 @@ bool UriParser::Parse(llvm::StringRef uri, llvm::StringRef &scheme,
// hostname is enclosed with square brackets.
pos = host_port.rfind(']');
if (pos == std::string::npos)
- return false;
+ return llvm::None;
- tmp_hostname = host_port.substr(1, pos - 1);
+ ret.hostname = host_port.substr(1, pos - 1);
host_port = host_port.drop_front(pos + 1);
if (!host_port.empty() && !host_port.consume_front(":"))
- return false;
+ return llvm::None;
} else {
- std::tie(tmp_hostname, host_port) = host_port.split(':');
+ std::tie(ret.hostname, host_port) = host_port.split(':');
}
// Extract port
if (!host_port.empty()) {
uint16_t port_value = 0;
if (host_port.getAsInteger(0, port_value))
- return false;
- port = port_value;
+ return llvm::None;
+ ret.port = port_value;
} else
- port = -1;
+ ret.port = llvm::None;
- scheme = tmp_scheme;
- hostname = tmp_hostname;
- path = tmp_path;
- return true;
+ return ret;
}
diff --git a/lldb/source/Utility/VMRange.cpp b/lldb/source/Utility/VMRange.cpp
index 184531b4bb27..ddd2a67c29b2 100644
--- a/lldb/source/Utility/VMRange.cpp
+++ b/lldb/source/Utility/VMRange.cpp
@@ -23,16 +23,14 @@ using namespace lldb_private;
bool VMRange::ContainsValue(const VMRange::collection &coll,
lldb::addr_t value) {
- return llvm::find_if(coll, [&](const VMRange &r) {
- return r.Contains(value);
- }) != coll.end();
+ return llvm::any_of(coll,
+ [&](const VMRange &r) { return r.Contains(value); });
}
bool VMRange::ContainsRange(const VMRange::collection &coll,
const VMRange &range) {
- return llvm::find_if(coll, [&](const VMRange &r) {
- return r.Contains(range);
- }) != coll.end();
+ return llvm::any_of(coll,
+ [&](const VMRange &r) { return r.Contains(range); });
}
void VMRange::Dump(llvm::raw_ostream &s, lldb::addr_t offset,
diff --git a/lldb/tools/argdumper/argdumper.exports b/lldb/tools/argdumper/argdumper.exports
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/lldb/tools/argdumper/argdumper.exports
+++ /dev/null
diff --git a/lldb/tools/compact-unwind/compact-unwind-dumper.c b/lldb/tools/compact-unwind/compact-unwind-dumper.c
index d4706eaf5386..1551ed92597d 100644
--- a/lldb/tools/compact-unwind/compact-unwind-dumper.c
+++ b/lldb/tools/compact-unwind/compact-unwind-dumper.c
@@ -14,49 +14,6 @@
#include <sys/stat.h>
#include <sys/types.h>
-enum {
- UNWIND_ARM64_MODE_MASK = 0x0F000000,
- UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
- UNWIND_ARM64_MODE_DWARF = 0x03000000,
- UNWIND_ARM64_MODE_FRAME = 0x04000000,
-
- UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
- UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
- UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
- UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
- UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
- UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
- UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
- UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
- UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800,
-
- UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000,
- UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF,
-};
-
-enum {
- UNWIND_ARM_MODE_MASK = 0x0F000000,
- UNWIND_ARM_MODE_FRAME = 0x01000000,
- UNWIND_ARM_MODE_FRAME_D = 0x02000000,
- UNWIND_ARM_MODE_DWARF = 0x04000000,
-
- UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
-
- UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
- UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
- UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
-
- UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
- UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
- UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
- UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
- UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
-
- UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000700,
-
- UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF,
-};
-
#define EXTRACT_BITS(value, mask) \
((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1))
diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp
index a6a4a2a1b80b..a51c124f9615 100644
--- a/lldb/tools/driver/Driver.cpp
+++ b/lldb/tools/driver/Driver.cpp
@@ -18,6 +18,7 @@
#include "lldb/API/SBReproducer.h"
#include "lldb/API/SBStream.h"
#include "lldb/API/SBStringList.h"
+#include "lldb/API/SBStructuredData.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Format.h"
@@ -201,6 +202,9 @@ SBError Driver::ProcessArgs(const opt::InputArgList &args, bool &exiting) {
if (args.hasArg(OPT_python_path)) {
m_option_data.m_print_python_path = true;
}
+ if (args.hasArg(OPT_print_script_interpreter_info)) {
+ m_option_data.m_print_script_interpreter_info = true;
+ }
if (args.hasArg(OPT_batch)) {
m_option_data.m_batch = true;
@@ -398,6 +402,22 @@ SBError Driver::ProcessArgs(const opt::InputArgList &args, bool &exiting) {
return error;
}
+ if (m_option_data.m_print_script_interpreter_info) {
+ SBStructuredData info =
+ m_debugger.GetScriptInterpreterInfo(m_debugger.GetScriptLanguage());
+ if (!info) {
+ error.SetErrorString("no script interpreter.");
+ } else {
+ SBStream stream;
+ error = info.GetAsJSON(stream);
+ if (error.Success()) {
+ llvm::outs() << stream.GetData() << '\n';
+ }
+ }
+ exiting = true;
+ return error;
+ }
+
return error;
}
@@ -609,6 +629,7 @@ int Driver::MainLoop() {
options.SetSpawnThread(false);
options.SetStopOnError(true);
options.SetStopOnCrash(m_option_data.m_batch);
+ options.SetEchoCommands(!m_option_data.m_source_quietly);
SBCommandInterpreterRunResult results =
m_debugger.RunCommandInterpreter(options);
diff --git a/lldb/tools/driver/Driver.h b/lldb/tools/driver/Driver.h
index 2d91491a2540..d5779b3c2c91 100644
--- a/lldb/tools/driver/Driver.h
+++ b/lldb/tools/driver/Driver.h
@@ -79,6 +79,7 @@ public:
bool m_source_quietly = false;
bool m_print_version = false;
bool m_print_python_path = false;
+ bool m_print_script_interpreter_info = false;
bool m_wait_for = false;
bool m_repl = false;
bool m_batch = false;
diff --git a/lldb/tools/driver/Options.td b/lldb/tools/driver/Options.td
index 8bcb0e7bc52e..d59ac314d594 100644
--- a/lldb/tools/driver/Options.td
+++ b/lldb/tools/driver/Options.td
@@ -48,6 +48,10 @@ def: Flag<["-"], "P">,
HelpText<"Alias for --python-path">,
Group<grp_scripting>;
+def print_script_interpreter_info: F<"print-script-interpreter-info">,
+ HelpText<"Prints out a json dictionary with information about the scripting language interpreter.">,
+ Group<grp_scripting>;
+
def script_language: Separate<["--", "-"], "script-language">,
MetaVarName<"<language>">,
HelpText<"Tells the debugger to use the specified scripting language for user-defined scripts.">,
@@ -110,7 +114,7 @@ def: Flag<["-"], "b">,
Group<grp_command>;
def source_quietly: F<"source-quietly">,
- HelpText<"Tells the debugger to execute this one-line lldb command before any file has been loaded.">,
+ HelpText<"Tells the debugger not to echo commands while sourcing files or one-line commands provided on the command line.">,
Group<grp_command>;
def: Flag<["-"], "Q">,
Alias<source_quietly>,
diff --git a/lldb/tools/driver/Platform.h b/lldb/tools/driver/Platform.h
index d7573b75bf32..ff017c4422b1 100644
--- a/lldb/tools/driver/Platform.h
+++ b/lldb/tools/driver/Platform.h
@@ -9,19 +9,16 @@
#ifndef LLDB_TOOLS_DRIVER_PLATFORM_H
#define LLDB_TOOLS_DRIVER_PLATFORM_H
-#include "lldb/Host/Config.h"
-
#if defined(_WIN32)
#include <io.h>
#if defined(_MSC_VER)
#include <csignal>
#endif
-#if HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
+
#include "lldb/Host/windows/windows.h"
#include <cinttypes>
+#include <sys/types.h>
struct winsize {
long ws_col;
diff --git a/lldb/tools/lldb-server/Acceptor.cpp b/lldb/tools/lldb-server/Acceptor.cpp
index b8be9c5c2661..4714252011aa 100644
--- a/lldb/tools/lldb-server/Acceptor.cpp
+++ b/lldb/tools/lldb-server/Acceptor.cpp
@@ -84,21 +84,16 @@ std::unique_ptr<Acceptor> Acceptor::Create(StringRef name,
error.Clear();
Socket::SocketProtocol socket_protocol = Socket::ProtocolUnixDomain;
- int port;
- StringRef scheme, host, path;
// Try to match socket name as URL - e.g., tcp://localhost:5555
- if (UriParser::Parse(name, scheme, host, port, path)) {
- if (!FindProtocolByScheme(scheme.str().c_str(), socket_protocol))
+ if (llvm::Optional<URI> res = URI::Parse(name)) {
+ if (!FindProtocolByScheme(res->scheme.str().c_str(), socket_protocol))
error.SetErrorStringWithFormat("Unknown protocol scheme \"%s\"",
- scheme.str().c_str());
+ res->scheme.str().c_str());
else
- name = name.drop_front(scheme.size() + strlen("://"));
+ name = name.drop_front(res->scheme.size() + strlen("://"));
} else {
- std::string host_str;
- std::string port_str;
- int32_t port = INT32_MIN;
// Try to match socket name as $host:port - e.g., localhost:5555
- if (Socket::DecodeHostAndPort(name, host_str, port_str, port, nullptr))
+ if (!llvm::errorToBool(Socket::DecodeHostAndPort(name).takeError()))
socket_protocol = Socket::ProtocolTcp;
}
diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp
index 888ba728fa91..906ae4c378b6 100644
--- a/lldb/tools/lldb-server/lldb-gdbserver.cpp
+++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp
@@ -17,7 +17,6 @@
#include <unistd.h>
#endif
-#include "Acceptor.h"
#include "LLDBServerUtilities.h"
#include "Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h"
#include "Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h"
@@ -26,7 +25,6 @@
#include "lldb/Host/FileSystem.h"
#include "lldb/Host/Pipe.h"
#include "lldb/Host/Socket.h"
-#include "lldb/Host/StringConvert.h"
#include "lldb/Host/common/NativeProcessProtocol.h"
#include "lldb/Target/Process.h"
#include "lldb/Utility/Status.h"
@@ -165,15 +163,14 @@ void handle_launch(GDBRemoteCommunicationServerLLGS &gdb_server,
}
}
-Status writeSocketIdToPipe(Pipe &port_pipe, const std::string &socket_id) {
+Status writeSocketIdToPipe(Pipe &port_pipe, llvm::StringRef socket_id) {
size_t bytes_written = 0;
// Write the port number as a C string with the NULL terminator.
- return port_pipe.Write(socket_id.c_str(), socket_id.size() + 1,
- bytes_written);
+ return port_pipe.Write(socket_id.data(), socket_id.size() + 1, bytes_written);
}
Status writeSocketIdToPipe(const char *const named_pipe_path,
- const std::string &socket_id) {
+ llvm::StringRef socket_id) {
Pipe port_name_pipe;
// Wait for 10 seconds for pipe to be opened.
auto error = port_name_pipe.OpenAsWriterWithTimeout(named_pipe_path, false,
@@ -184,7 +181,7 @@ Status writeSocketIdToPipe(const char *const named_pipe_path,
}
Status writeSocketIdToPipe(lldb::pipe_t unnamed_pipe,
- const std::string &socket_id) {
+ llvm::StringRef socket_id) {
Pipe port_pipe{LLDB_INVALID_PIPE, unnamed_pipe};
return writeSocketIdToPipe(port_pipe, socket_id);
}
@@ -198,129 +195,76 @@ void ConnectToRemote(MainLoop &mainloop,
Status error;
std::unique_ptr<Connection> connection_up;
+ std::string url;
+
if (connection_fd != -1) {
- // Build the connection string.
- char connection_url[512];
- snprintf(connection_url, sizeof(connection_url), "fd://%d", connection_fd);
+ url = llvm::formatv("fd://{0}", connection_fd).str();
// Create the connection.
#if LLDB_ENABLE_POSIX && !defined _WIN32
::fcntl(connection_fd, F_SETFD, FD_CLOEXEC);
#endif
- connection_up.reset(new ConnectionFileDescriptor);
- auto connection_result = connection_up->Connect(connection_url, &error);
- if (connection_result != eConnectionStatusSuccess) {
- fprintf(stderr, "error: failed to connect to client at '%s' "
- "(connection status: %d)\n",
- connection_url, static_cast<int>(connection_result));
- exit(-1);
- }
- if (error.Fail()) {
- fprintf(stderr, "error: failed to connect to client at '%s': %s\n",
- connection_url, error.AsCString());
- exit(-1);
- }
} else if (!host_and_port.empty()) {
- // Parse out host and port.
- std::string final_host_and_port;
- std::string connection_host;
- std::string connection_port;
- uint32_t connection_portno = 0;
-
- // If host_and_port starts with ':', default the host to be "localhost" and
- // expect the remainder to be the port.
- if (host_and_port[0] == ':')
- final_host_and_port.append("localhost");
- final_host_and_port.append(host_and_port.str());
-
- // Note: use rfind, because the host/port may look like "[::1]:12345".
- const std::string::size_type colon_pos = final_host_and_port.rfind(':');
- if (colon_pos != std::string::npos) {
- connection_host = final_host_and_port.substr(0, colon_pos);
- connection_port = final_host_and_port.substr(colon_pos + 1);
- connection_portno = StringConvert::ToUInt32(connection_port.c_str(), 0);
+ llvm::Expected<std::string> url_exp =
+ LLGSArgToURL(host_and_port, reverse_connect);
+ if (!url_exp) {
+ llvm::errs() << llvm::formatv("error: invalid host:port or URL '{0}': "
+ "{1}\n",
+ host_and_port,
+ llvm::toString(url_exp.takeError()));
+ exit(-1);
}
+ url = std::move(url_exp.get());
+ }
- if (reverse_connect) {
- // llgs will connect to the gdb-remote client.
-
- // Ensure we have a port number for the connection.
- if (connection_portno == 0) {
- fprintf(stderr, "error: port number must be specified on when using "
- "reverse connect\n");
- exit(1);
- }
-
- // Build the connection string.
- char connection_url[512];
- snprintf(connection_url, sizeof(connection_url), "connect://%s",
- final_host_and_port.c_str());
-
- // Create the connection.
- connection_up.reset(new ConnectionFileDescriptor);
- auto connection_result = connection_up->Connect(connection_url, &error);
- if (connection_result != eConnectionStatusSuccess) {
- fprintf(stderr, "error: failed to connect to client at '%s' "
- "(connection status: %d)\n",
- connection_url, static_cast<int>(connection_result));
- exit(-1);
- }
- if (error.Fail()) {
- fprintf(stderr, "error: failed to connect to client at '%s': %s\n",
- connection_url, error.AsCString());
- exit(-1);
- }
- } else {
- std::unique_ptr<Acceptor> acceptor_up(
- Acceptor::Create(final_host_and_port, false, error));
- if (error.Fail()) {
- fprintf(stderr, "failed to create acceptor: %s\n", error.AsCString());
- exit(1);
- }
- error = acceptor_up->Listen(1);
- if (error.Fail()) {
- fprintf(stderr, "failed to listen: %s\n", error.AsCString());
- exit(1);
- }
- const std::string socket_id = acceptor_up->GetLocalSocketId();
- if (!socket_id.empty()) {
- // If we have a named pipe to write the socket id back to, do that now.
- if (named_pipe_path && named_pipe_path[0]) {
- error = writeSocketIdToPipe(named_pipe_path, socket_id);
- if (error.Fail())
- fprintf(stderr, "failed to write to the named pipe \'%s\': %s\n",
- named_pipe_path, error.AsCString());
- }
- // If we have an unnamed pipe to write the socket id back to, do that
- // now.
- else if (unnamed_pipe != LLDB_INVALID_PIPE) {
- error = writeSocketIdToPipe(unnamed_pipe, socket_id);
- if (error.Fail())
- fprintf(stderr, "failed to write to the unnamed pipe: %s\n",
- error.AsCString());
- }
- } else {
- fprintf(stderr,
- "unable to get the socket id for the listening connection\n");
- }
+ if (!url.empty()) {
+ // Create the connection or server.
+ std::unique_ptr<ConnectionFileDescriptor> conn_fd_up{
+ new ConnectionFileDescriptor};
+ auto connection_result = conn_fd_up->Connect(
+ url,
+ [named_pipe_path, unnamed_pipe](llvm::StringRef socket_id) {
+ // If we have a named pipe to write the socket id back to, do that
+ // now.
+ if (named_pipe_path && named_pipe_path[0]) {
+ Status error = writeSocketIdToPipe(named_pipe_path, socket_id);
+ if (error.Fail())
+ llvm::errs() << llvm::formatv(
+ "failed to write to the named peipe '{0}': {1}\n",
+ named_pipe_path, error.AsCString());
+ }
+ // If we have an unnamed pipe to write the socket id back to, do
+ // that now.
+ else if (unnamed_pipe != LLDB_INVALID_PIPE) {
+ Status error = writeSocketIdToPipe(unnamed_pipe, socket_id);
+ if (error.Fail())
+ llvm::errs() << llvm::formatv(
+ "failed to write to the unnamed pipe: {0}\n", error);
+ }
+ },
+ &error);
- Connection *conn = nullptr;
- error = acceptor_up->Accept(false, conn);
- if (error.Fail()) {
- printf("failed to accept new connection: %s\n", error.AsCString());
- exit(1);
- }
- connection_up.reset(conn);
+ if (error.Fail()) {
+ llvm::errs() << llvm::formatv(
+ "error: failed to connect to client at '{0}': {1}\n", url, error);
+ exit(-1);
+ }
+ if (connection_result != eConnectionStatusSuccess) {
+ llvm::errs() << llvm::formatv(
+ "error: failed to connect to client at '{0}' "
+ "(connection status: {1})\n",
+ url, static_cast<int>(connection_result));
+ exit(-1);
}
+ connection_up = std::move(conn_fd_up);
}
error = gdb_server.InitializeConnection(std::move(connection_up));
if (error.Fail()) {
- fprintf(stderr, "Failed to initialize connection: %s\n",
- error.AsCString());
+ llvm::errs() << llvm::formatv("failed to initialize connection\n", error);
exit(-1);
}
- printf("Connection established.\n");
+ llvm::outs() << "Connection established.\n";
}
namespace {
diff --git a/lldb/tools/lldb-server/lldb-server.exports b/lldb/tools/lldb-server/lldb-server.exports
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/lldb/tools/lldb-server/lldb-server.exports
+++ /dev/null
diff --git a/llvm/include/llvm-c/Comdat.h b/llvm/include/llvm-c/Comdat.h
index 81cde1107fa4..8002bc0581af 100644
--- a/llvm/include/llvm-c/Comdat.h
+++ b/llvm/include/llvm-c/Comdat.h
@@ -19,6 +19,13 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @defgroup LLVMCCoreComdat Comdats
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
typedef enum {
LLVMAnyComdatSelectionKind, ///< The linker may choose any COMDAT.
LLVMExactMatchComdatSelectionKind, ///< The data referenced by the COMDAT must
@@ -66,6 +73,10 @@ LLVMComdatSelectionKind LLVMGetComdatSelectionKind(LLVMComdatRef C);
*/
void LLVMSetComdatSelectionKind(LLVMComdatRef C, LLVMComdatSelectionKind Kind);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 1a5e763cfc60..d170eff17951 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -1580,10 +1580,10 @@ LLVMTypeRef LLVMX86AMXType(void);
macro(ConstantVector) \
macro(GlobalValue) \
macro(GlobalAlias) \
- macro(GlobalIFunc) \
macro(GlobalObject) \
macro(Function) \
macro(GlobalVariable) \
+ macro(GlobalIFunc) \
macro(UndefValue) \
macro(PoisonValue) \
macro(Instruction) \
@@ -3287,7 +3287,7 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
*/
unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, LLVMAttributeIndex Idx,
unsigned Align);
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
@@ -3611,11 +3611,21 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc);
* current debug location for the given builder. If the builder has no current
* debug location, this function is a no-op.
*
+ * @deprecated LLVMSetInstDebugLocation is deprecated in favor of the more general
+ * LLVMAddMetadataToInst.
+ *
* @see llvm::IRBuilder::SetInstDebugLocation()
*/
void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
/**
+ * Adds the metadata registered with the given builder to the given instruction.
+ *
+ * @see llvm::IRBuilder::AddMetadataToInst()
+ */
+void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst);
+
+/**
* Get the dafult floating-point math metadata for a given builder.
*
* @see llvm::IRBuilder::getDefaultFPMathTag()
@@ -4081,6 +4091,7 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
/**
* @defgroup LLVMCCorePassRegistry Pass Registry
+ * @ingroup LLVMCCore
*
* @{
*/
@@ -4095,6 +4106,7 @@ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
/**
* @defgroup LLVMCCorePassManagers Pass Managers
+ * @ingroup LLVMCCore
*
* @{
*/
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index 8c085807914b..d7fb898b60d2 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -22,6 +22,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCCoreDebugInfo Debug Information
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
+/**
* Debug info flags.
*/
typedef enum {
@@ -227,6 +234,13 @@ void LLVMDisposeDIBuilder(LLVMDIBuilderRef Builder);
void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder);
/**
+ * Finalize a specific subprogram.
+ * No new variables may be added to this subprogram afterwards.
+ */
+void LLVMDIBuilderFinalizeSubprogram(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef Subprogram);
+
+/**
* A CompileUnit provides an anchor for all debugging
* information generated during this instance of compilation.
* \param Lang Source programming language, eg.
@@ -389,48 +403,48 @@ LLVMDIBuilderCreateImportedModuleFromNamespace(LLVMDIBuilderRef Builder,
* \param ImportedEntity Previous imported entity to alias.
* \param File File where the declaration is located.
* \param Line Line number of the declaration.
+ * \param Elements Renamed elements.
+ * \param NumElements Number of renamed elements.
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromAlias(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef ImportedEntity,
- LLVMMetadataRef File,
- unsigned Line);
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromAlias(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope,
+ LLVMMetadataRef ImportedEntity, LLVMMetadataRef File, unsigned Line,
+ LLVMMetadataRef *Elements, unsigned NumElements);
/**
* Create a descriptor for an imported module.
- * \param Builder The \c DIBuilder.
- * \param Scope The scope this module is imported into
- * \param M The module being imported here
- * \param File File where the declaration is located.
- * \param Line Line number of the declaration.
+ * \param Builder The \c DIBuilder.
+ * \param Scope The scope this module is imported into
+ * \param M The module being imported here
+ * \param File File where the declaration is located.
+ * \param Line Line number of the declaration.
+ * \param Elements Renamed elements.
+ * \param NumElements Number of renamed elements.
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromModule(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef M,
- LLVMMetadataRef File,
- unsigned Line);
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromModule(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef M,
+ LLVMMetadataRef File, unsigned Line, LLVMMetadataRef *Elements,
+ unsigned NumElements);
/**
* Create a descriptor for an imported function, type, or variable. Suitable
* for e.g. FORTRAN-style USE declarations.
- * \param Builder The DIBuilder.
- * \param Scope The scope this module is imported into.
- * \param Decl The declaration (or definition) of a function, type,
- or variable.
- * \param File File where the declaration is located.
- * \param Line Line number of the declaration.
- * \param Name A name that uniquely identifies this imported declaration.
- * \param NameLen The length of the C string passed to \c Name.
+ * \param Builder The DIBuilder.
+ * \param Scope The scope this module is imported into.
+ * \param Decl The declaration (or definition) of a function, type,
+ or variable.
+ * \param File File where the declaration is located.
+ * \param Line Line number of the declaration.
+ * \param Name A name that uniquely identifies this imported
+ declaration.
+ * \param NameLen The length of the C string passed to \c Name.
+ * \param Elements Renamed elements.
+ * \param NumElements Number of renamed elements.
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedDeclaration(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef Decl,
- LLVMMetadataRef File,
- unsigned Line,
- const char *Name, size_t NameLen);
+LLVMMetadataRef LLVMDIBuilderCreateImportedDeclaration(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef Decl,
+ LLVMMetadataRef File, unsigned Line, const char *Name, size_t NameLen,
+ LLVMMetadataRef *Elements, unsigned NumElements);
/**
* Creates a new DebugLocation that describes a source location.
@@ -1360,6 +1374,10 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
*/
LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h
index ae5c68227594..53baaef11033 100644
--- a/llvm/include/llvm-c/DisassemblerTypes.h
+++ b/llvm/include/llvm-c/DisassemblerTypes.h
@@ -18,6 +18,12 @@
#endif
/**
+ * @addtogroup LLVMCDisassembler
+ *
+ * @{
+ */
+
+/**
* An opaque reference to a disassembler context.
*/
typedef void *LLVMDisasmContextRef;
@@ -157,4 +163,8 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
/* The output reference is to a C++ symbol name. */
#define LLVMDisassembler_ReferenceType_DeMangled_Name 9
+/**
+ * @}
+ */
+
#endif
diff --git a/llvm/include/llvm-c/Error.h b/llvm/include/llvm-c/Error.h
index bc702ac7a1bf..c3baaf65186a 100644
--- a/llvm/include/llvm-c/Error.h
+++ b/llvm/include/llvm-c/Error.h
@@ -18,6 +18,13 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @defgroup LLVMCError Error Handling
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
#define LLVMErrorSuccess 0
/**
@@ -67,6 +74,10 @@ LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
*/
LLVMErrorRef LLVMCreateStringError(const char *ErrMsg);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/ErrorHandling.h b/llvm/include/llvm-c/ErrorHandling.h
index 5ba099c209c0..d9b9f22752b8 100644
--- a/llvm/include/llvm-c/ErrorHandling.h
+++ b/llvm/include/llvm-c/ErrorHandling.h
@@ -18,6 +18,12 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @addtogroup LLVMCError
+ *
+ * @{
+ */
+
typedef void (*LLVMFatalErrorHandler)(const char *Reason);
/**
@@ -42,6 +48,10 @@ void LLVMResetFatalErrorHandler(void);
*/
void LLVMEnablePrettyStackTrace(void);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/IRReader.h b/llvm/include/llvm-c/IRReader.h
index 5a3f633c3d91..905b84fa5a86 100644
--- a/llvm/include/llvm-c/IRReader.h
+++ b/llvm/include/llvm-c/IRReader.h
@@ -20,6 +20,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCCoreIRReader IR Reader
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
+/**
* Read LLVM IR from a memory buffer and convert it into an in-memory Module
* object. Returns 0 on success.
* Optionally returns a human-readable description of any errors that
@@ -32,6 +39,10 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
char **OutMessage);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h
index f689ca0f1cf0..a06133aac4fb 100644
--- a/llvm/include/llvm-c/LLJIT.h
+++ b/llvm/include/llvm-c/LLJIT.h
@@ -32,6 +32,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCExecutionEngineLLJIT LLJIT
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
+/**
* A function for constructing an ObjectLinkingLayer instance to be used
* by an LLJIT instance.
*
@@ -235,6 +242,10 @@ LLVMOrcIRTransformLayerRef LLVMOrcLLJITGetIRTransformLayer(LLVMOrcLLJITRef J);
*/
const char *LLVMOrcLLJITGetDataLayoutStr(LLVMOrcLLJITRef J);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif /* LLVM_C_LLJIT_H */
diff --git a/llvm/include/llvm-c/Linker.h b/llvm/include/llvm-c/Linker.h
index 1ad9cc958753..acff5d5e2225 100644
--- a/llvm/include/llvm-c/Linker.h
+++ b/llvm/include/llvm-c/Linker.h
@@ -19,6 +19,13 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @defgroup LLVMCCoreLinker Linker
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
/* This enum is provided for backwards-compatibility only. It has no effect. */
typedef enum {
LLVMLinkerDestroySource = 0, /* This is the default behavior. */
@@ -35,4 +42,8 @@ LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src);
LLVM_C_EXTERN_C_END
+/**
+ * @}
+ */
+
#endif
diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index 1790afbcecc7..e2f30b7cdf45 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -34,6 +34,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCExecutionEngineORC On-Request-Compilation
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
+/**
* Represents an address in the executor process.
*/
typedef uint64_t LLVMOrcJITTargetAddress;
@@ -921,6 +928,49 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
LLVMOrcSymbolPredicate Filter, void *FilterCtx);
/**
+ * Get a LLVMOrcCreateDynamicLibararySearchGeneratorForPath that will reflect
+ * library symbols into the JITDylib. On success the resulting generator is
+ * owned by the client. Ownership is typically transferred by adding the
+ * instance to a JITDylib using LLVMOrcJITDylibAddGenerator,
+ *
+ * The GlobalPrefix argument specifies the character that appears on the front
+ * of linker-mangled symbols for the target platform (e.g. '_' on MachO).
+ * If non-null, this character will be stripped from the start of all symbol
+ * strings before passing the remaining substring to dlsym.
+ *
+ * The optional Filter and Ctx arguments can be used to supply a symbol name
+ * filter: Only symbols for which the filter returns true will be visible to
+ * JIT'd code. If the Filter argument is null then all library symbols will
+ * be visible to JIT'd code. Note that the symbol name passed to the Filter
+ * function is the full mangled symbol: The client is responsible for stripping
+ * the global prefix if present.
+ *
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ *
+ */
+LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, const char *FileName,
+ char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx);
+
+/**
+ * Get a LLVMOrcCreateStaticLibrarySearchGeneratorForPath that will reflect
+ * static library symbols into the JITDylib. On success the resulting
+ * generator is owned by the client. Ownership is typically transferred by
+ * adding the instance to a JITDylib using LLVMOrcJITDylibAddGenerator,
+ *
+ * Call with the optional TargetTriple argument will succeed if the file at
+ * the given path is a static library or a MachO universal binary containing a
+ * static library that is compatible with the given triple. Otherwise it will
+ * return an error.
+ *
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ *
+ */
+LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer,
+ const char *FileName, const char *TargetTriple);
+
+/**
* Create a ThreadSafeContext containing a new LLVMContext.
*
* Ownership of the underlying ThreadSafeContext data is shared: Clients
@@ -1133,6 +1183,10 @@ void LLVMOrcDisposeDumpObjects(LLVMOrcDumpObjectsRef DumpObjects);
LLVMErrorRef LLVMOrcDumpObjects_CallOperator(LLVMOrcDumpObjectsRef DumpObjects,
LLVMMemoryBufferRef *ObjBuffer);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif /* LLVM_C_ORC_H */
diff --git a/llvm/include/llvm-c/OrcEE.h b/llvm/include/llvm-c/OrcEE.h
index 2435e7421a42..e7ae0f5e6be2 100644
--- a/llvm/include/llvm-c/OrcEE.h
+++ b/llvm/include/llvm-c/OrcEE.h
@@ -33,6 +33,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCExecutionEngineORCEE ExecutionEngine-based ORC Utils
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
+/**
* Create a RTDyldObjectLinkingLayer instance using the standard
* SectionMemoryManager for memory management.
*/
@@ -50,6 +57,10 @@ void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
LLVMOrcObjectLayerRef RTDyldObjLinkingLayer,
LLVMJITEventListenerRef Listener);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif /* LLVM_C_ORCEE_H */
diff --git a/llvm/include/llvm-c/Support.h b/llvm/include/llvm-c/Support.h
index 866df32efa98..17657861b32b 100644
--- a/llvm/include/llvm-c/Support.h
+++ b/llvm/include/llvm-c/Support.h
@@ -21,6 +21,12 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @addtogroup LLVMCCore
+ *
+ * @{
+ */
+
+/**
* This function permanently loads the dynamic library at the given path.
* It is safe to call this function multiple times for the same library.
*
@@ -57,6 +63,10 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName);
*/
void LLVMAddSymbol(const char *symbolName, void *symbolValue);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h
index f82edd948b59..23c8c63ff0b4 100644
--- a/llvm/include/llvm-c/TargetMachine.h
+++ b/llvm/include/llvm-c/TargetMachine.h
@@ -25,6 +25,12 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @addtogroup LLVMCTarget
+ *
+ * @{
+ */
+
typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef;
typedef struct LLVMTarget *LLVMTargetRef;
@@ -156,6 +162,10 @@ char* LLVMGetHostCPUFeatures(void);
/** Adds the target-specific analysis passes to the pass manager. */
void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/Transforms/PassBuilder.h b/llvm/include/llvm-c/Transforms/PassBuilder.h
index 5635f10d6877..6d9f1b45c707 100644
--- a/llvm/include/llvm-c/Transforms/PassBuilder.h
+++ b/llvm/include/llvm-c/Transforms/PassBuilder.h
@@ -18,6 +18,13 @@
#include "llvm-c/TargetMachine.h"
#include "llvm-c/Types.h"
+/**
+ * @defgroup LLVMCCoreNewPM New Pass Manager
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
LLVM_C_EXTERN_C_BEGIN
/**
@@ -50,7 +57,7 @@ LLVMErrorRef LLVMRunPasses(LLVMModuleRef M, const char *Passes,
* responsible for it. The client should call LLVMDisposePassBuilderOptions
* to free the pass builder options.
*/
-LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions();
+LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions(void);
/**
* Toggle adding the VerifierPass for the PassBuilder, ensuring all functions
@@ -97,6 +104,10 @@ void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options,
*/
void LLVMDisposePassBuilderOptions(LLVMPassBuilderOptionsRef Options);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif // LLVM_C_TRANSFORMS_PASSBUILDER_H
diff --git a/llvm/include/llvm-c/lto.h b/llvm/include/llvm-c/lto.h
index f6fc8588f5f7..5ceb02224d2b 100644
--- a/llvm/include/llvm-c/lto.h
+++ b/llvm/include/llvm-c/lto.h
@@ -46,7 +46,7 @@ typedef bool lto_bool_t;
* @{
*/
-#define LTO_API_VERSION 28
+#define LTO_API_VERSION 29
/**
* \since prior to LTO_API_VERSION=3
@@ -313,6 +313,16 @@ extern lto_bool_t lto_module_get_macho_cputype(lto_module_t mod,
unsigned int *out_cpusubtype);
/**
+ * This function can be used by the linker to check if a given module has
+ * any constructor or destructor functions.
+ *
+ * Returns true if the module has either the @llvm.global_ctors or the
+ * @llvm.global_dtors symbol. Otherwise returns false.
+ *
+ * \since LTO_API_VERSION=29
+ */
+extern lto_bool_t lto_module_has_ctor_dtor(lto_module_t mod);
+/**
* Diagnostic severity.
*
* \since LTO_API_VERSION=7
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index f493a03b4b87..40e0e32c77a8 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -961,9 +961,7 @@ public:
/// Returns a float which is bitcasted from an all one value int.
///
/// \param Semantics - type float semantics
- /// \param BitWidth - Select float type
- static APFloat getAllOnesValue(const fltSemantics &Semantics,
- unsigned BitWidth);
+ static APFloat getAllOnesValue(const fltSemantics &Semantics);
/// Used to insert APFloat objects, or objects that contain APFloat objects,
/// into FoldingSets.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index ff586f763e82..595cd94b6b8f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -31,7 +31,7 @@ class raw_ostream;
template <typename T> class SmallVectorImpl;
template <typename T> class ArrayRef;
template <typename T> class Optional;
-template <typename T> struct DenseMapInfo;
+template <typename T, typename Enable> struct DenseMapInfo;
class APInt;
@@ -66,6 +66,11 @@ inline APInt operator-(APInt);
/// not.
/// * In general, the class tries to follow the style of computation that LLVM
/// uses in its IR. This simplifies its use for LLVM.
+/// * APInt supports zero-bit-width values, but operations that require bits
+/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
+/// integer). This means that operations like zero extension and logical
+/// shifts are defined, but sign extension and ashr is not. Zero bit values
+/// compare and hash equal to themselves, and countLeadingZeros returns 0.
///
class LLVM_NODISCARD APInt {
public:
@@ -87,176 +92,6 @@ public:
static constexpr WordType WORDTYPE_MAX = ~WordType(0);
-private:
- /// This union is used to store the integer value. When the
- /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
- union {
- uint64_t VAL; ///< Used to store the <= 64 bits integer value.
- uint64_t *pVal; ///< Used to store the >64 bits integer value.
- } U;
-
- unsigned BitWidth; ///< The number of bits in this APInt.
-
- friend struct DenseMapInfo<APInt>;
-
- friend class APSInt;
-
- /// Fast internal constructor
- ///
- /// This constructor is used only internally for speed of construction of
- /// temporaries. It is unsafe for general use so it is not public.
- APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
- U.pVal = val;
- }
-
- /// Determine which word a bit is in.
- ///
- /// \returns the word position for the specified bit position.
- static unsigned whichWord(unsigned bitPosition) {
- return bitPosition / APINT_BITS_PER_WORD;
- }
-
- /// Determine which bit in a word a bit is in.
- ///
- /// \returns the bit position in a word for the specified bit position
- /// in the APInt.
- static unsigned whichBit(unsigned bitPosition) {
- return bitPosition % APINT_BITS_PER_WORD;
- }
-
- /// Get a single bit mask.
- ///
- /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
- /// This method generates and returns a uint64_t (word) mask for a single
- /// bit at a specific bit position. This is used to mask the bit in the
- /// corresponding word.
- static uint64_t maskBit(unsigned bitPosition) {
- return 1ULL << whichBit(bitPosition);
- }
-
- /// Clear unused high order bits
- ///
- /// This method is used internally to clear the top "N" bits in the high order
- /// word that are not used by the APInt. This is needed after the most
- /// significant word is assigned a value to ensure that those bits are
- /// zero'd out.
- APInt &clearUnusedBits() {
- // Compute how many bits are used in the final word
- unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
-
- // Mask out the high bits.
- uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
- if (isSingleWord())
- U.VAL &= mask;
- else
- U.pVal[getNumWords() - 1] &= mask;
- return *this;
- }
-
- /// Get the word corresponding to a bit position
- /// \returns the corresponding word for the specified bit position.
- uint64_t getWord(unsigned bitPosition) const {
- return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
- }
-
- /// Utility method to change the bit width of this APInt to new bit width,
- /// allocating and/or deallocating as necessary. There is no guarantee on the
- /// value of any bits upon return. Caller should populate the bits after.
- void reallocate(unsigned NewBitWidth);
-
- /// Convert a char array into an APInt
- ///
- /// \param radix 2, 8, 10, 16, or 36
- /// Converts a string into a number. The string must be non-empty
- /// and well-formed as a number of the given base. The bit-width
- /// must be sufficient to hold the result.
- ///
- /// This is used by the constructors that take string arguments.
- ///
- /// StringRef::getAsInteger is superficially similar but (1) does
- /// not assume that the string is well-formed and (2) grows the
- /// result to hold the input.
- void fromString(unsigned numBits, StringRef str, uint8_t radix);
-
- /// An internal division function for dividing APInts.
- ///
- /// This is used by the toString method to divide by the radix. It simply
- /// provides a more convenient form of divide for internal use since KnuthDiv
- /// has specific constraints on its inputs. If those constraints are not met
- /// then it provides a simpler form of divide.
- static void divide(const WordType *LHS, unsigned lhsWords,
- const WordType *RHS, unsigned rhsWords, WordType *Quotient,
- WordType *Remainder);
-
- /// out-of-line slow case for inline constructor
- void initSlowCase(uint64_t val, bool isSigned);
-
- /// shared code between two array constructors
- void initFromArray(ArrayRef<uint64_t> array);
-
- /// out-of-line slow case for inline copy constructor
- void initSlowCase(const APInt &that);
-
- /// out-of-line slow case for shl
- void shlSlowCase(unsigned ShiftAmt);
-
- /// out-of-line slow case for lshr.
- void lshrSlowCase(unsigned ShiftAmt);
-
- /// out-of-line slow case for ashr.
- void ashrSlowCase(unsigned ShiftAmt);
-
- /// out-of-line slow case for operator=
- void AssignSlowCase(const APInt &RHS);
-
- /// out-of-line slow case for operator==
- bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY;
-
- /// out-of-line slow case for countLeadingZeros
- unsigned countLeadingZerosSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countLeadingOnes.
- unsigned countLeadingOnesSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countTrailingZeros.
- unsigned countTrailingZerosSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countTrailingOnes
- unsigned countTrailingOnesSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countPopulation
- unsigned countPopulationSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for intersects.
- bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
-
- /// out-of-line slow case for isSubsetOf.
- bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
-
- /// out-of-line slow case for setBits.
- void setBitsSlowCase(unsigned loBit, unsigned hiBit);
-
- /// out-of-line slow case for flipAllBits.
- void flipAllBitsSlowCase();
-
- /// out-of-line slow case for operator&=.
- void AndAssignSlowCase(const APInt& RHS);
-
- /// out-of-line slow case for operator|=.
- void OrAssignSlowCase(const APInt& RHS);
-
- /// out-of-line slow case for operator^=.
- void XorAssignSlowCase(const APInt& RHS);
-
- /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
- /// to, or greater than RHS.
- int compare(const APInt &RHS) const LLVM_READONLY;
-
- /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
- /// to, or greater than RHS.
- int compareSigned(const APInt &RHS) const LLVM_READONLY;
-
-public:
/// \name Constructors
/// @{
@@ -272,7 +107,6 @@ public:
/// \param isSigned how to treat signedness of val
APInt(unsigned numBits, uint64_t val, bool isSigned = false)
: BitWidth(numBits) {
- assert(BitWidth && "bitwidth too small");
if (isSingleWord()) {
U.VAL = val;
clearUnusedBits();
@@ -312,7 +146,9 @@ public:
/// \param radix the radix to use for the conversion
APInt(unsigned numBits, StringRef str, uint8_t radix);
- /// Simply makes *this a copy of that.
+ /// Default constructor that creates an APInt with a 1-bit zero value.
+ explicit APInt() : BitWidth(1) { U.VAL = 0; }
+
/// Copy Constructor.
APInt(const APInt &that) : BitWidth(that.BitWidth) {
if (isSingleWord())
@@ -333,19 +169,131 @@ public:
delete[] U.pVal;
}
- /// Default constructor that creates an uninteresting APInt
- /// representing a 1-bit zero value.
+ /// @}
+ /// \name Value Generators
+ /// @{
+
+ /// Get the '0' value for the specified bit-width.
+ static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
+
+ /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
+ static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
+
+ /// Return an APInt zero bits wide.
+ static APInt getZeroWidth() { return getZero(0); }
+
+ /// Gets maximum unsigned value of APInt for specific bit width.
+ static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
+
+ /// Gets maximum signed value of APInt for a specific bit width.
+ static APInt getSignedMaxValue(unsigned numBits) {
+ APInt API = getAllOnes(numBits);
+ API.clearBit(numBits - 1);
+ return API;
+ }
+
+ /// Gets minimum unsigned value of APInt for a specific bit width.
+ static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
+
+ /// Gets minimum signed value of APInt for a specific bit width.
+ static APInt getSignedMinValue(unsigned numBits) {
+ APInt API(numBits, 0);
+ API.setBit(numBits - 1);
+ return API;
+ }
+
+ /// Get the SignMask for a specific bit width.
///
- /// This is useful for object deserialization (pair this with the static
- /// method Read).
- explicit APInt() : BitWidth(1) { U.VAL = 0; }
+ /// This is just a wrapper function of getSignedMinValue(), and it helps code
+ /// readability when we want to get a SignMask.
+ static APInt getSignMask(unsigned BitWidth) {
+ return getSignedMinValue(BitWidth);
+ }
- /// Returns whether this instance allocated memory.
- bool needsCleanup() const { return !isSingleWord(); }
+ /// Return an APInt of a specified width with all bits set.
+ static APInt getAllOnes(unsigned numBits) {
+ return APInt(numBits, WORDTYPE_MAX, true);
+ }
- /// Used to insert APInt objects, or objects that contain APInt objects, into
- /// FoldingSets.
- void Profile(FoldingSetNodeID &id) const;
+ /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
+ static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
+
+ /// Return an APInt with exactly one bit set in the result.
+ static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
+ APInt Res(numBits, 0);
+ Res.setBit(BitNo);
+ return Res;
+ }
+
+ /// Get a value with a block of bits set.
+ ///
+ /// Constructs an APInt value that has a contiguous range of bits set. The
+ /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
+ /// bits will be zero. For example, with parameters(32, 0, 16) you would get
+ /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
+ /// \p hiBit.
+ ///
+ /// \param numBits the intended bit width of the result
+ /// \param loBit the index of the lowest bit set.
+ /// \param hiBit the index of the highest bit set.
+ ///
+ /// \returns An APInt value with the requested bits set.
+ static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
+ APInt Res(numBits, 0);
+ Res.setBits(loBit, hiBit);
+ return Res;
+ }
+
+ /// Wrap version of getBitsSet.
+ /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
+ /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
+ /// with parameters (32, 28, 4), you would get 0xF000000F.
+ /// If \p hiBit is equal to \p loBit, you would get a result with all bits
+ /// set.
+ static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
+ unsigned hiBit) {
+ APInt Res(numBits, 0);
+ Res.setBitsWithWrap(loBit, hiBit);
+ return Res;
+ }
+
+ /// Constructs an APInt value that has a contiguous range of bits set. The
+ /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
+ /// bits will be zero. For example, with parameters(32, 12) you would get
+ /// 0xFFFFF000.
+ ///
+ /// \param numBits the intended bit width of the result
+ /// \param loBit the index of the lowest bit to set.
+ ///
+ /// \returns An APInt value with the requested bits set.
+ static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
+ APInt Res(numBits, 0);
+ Res.setBitsFrom(loBit);
+ return Res;
+ }
+
+ /// Constructs an APInt value that has the top hiBitsSet bits set.
+ ///
+ /// \param numBits the bitwidth of the result
+ /// \param hiBitsSet the number of high-order bits set in the result.
+ static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
+ APInt Res(numBits, 0);
+ Res.setHighBits(hiBitsSet);
+ return Res;
+ }
+
+ /// Constructs an APInt value that has the bottom loBitsSet bits set.
+ ///
+ /// \param numBits the bitwidth of the result
+ /// \param loBitsSet the number of low-order bits set in the result.
+ static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
+ APInt Res(numBits, 0);
+ Res.setLowBits(loBitsSet);
+ return Res;
+ }
+
+ /// Return a value containing V broadcasted over NewLen bits.
+ static APInt getSplat(unsigned NewLen, const APInt &V);
/// @}
/// \name Value Tests
@@ -373,7 +321,7 @@ public:
/// This tests the high bit of this APInt to determine if it is set.
///
/// \returns true if this APInt has its sign bit set, false otherwise.
- bool isSignBitSet() const { return (*this)[BitWidth-1]; }
+ bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
/// Determine if sign bit of this APInt is clear.
///
@@ -388,50 +336,62 @@ public:
/// that 0 is not a positive value.
///
/// \returns true if this APInt is positive.
- bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+ bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
/// Determine if this APInt Value is non-positive (<= 0).
///
/// \returns true if this APInt is non-positive.
bool isNonPositive() const { return !isStrictlyPositive(); }
- /// Determine if all bits are set
- ///
- /// This checks to see if the value has all bits of the APInt are set or not.
- bool isAllOnesValue() const {
+ /// Determine if all bits are set. This is true for zero-width values.
+ bool isAllOnes() const {
+ if (BitWidth == 0)
+ return true;
if (isSingleWord())
return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
return countTrailingOnesSlowCase() == BitWidth;
}
- /// Determine if all bits are clear
- ///
- /// This checks to see if the value has all bits of the APInt are clear or
- /// not.
- bool isNullValue() const { return !*this; }
+ /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
+ bool isAllOnesValue() const { return isAllOnes(); }
+
+ /// Determine if this value is zero, i.e. all bits are clear.
+ bool isZero() const {
+ if (isSingleWord())
+ return U.VAL == 0;
+ return countLeadingZerosSlowCase() == BitWidth;
+ }
+
+ /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
+ bool isNullValue() const { return isZero(); }
/// Determine if this is a value of 1.
///
/// This checks to see if the value of this APInt is one.
- bool isOneValue() const {
+ bool isOne() const {
if (isSingleWord())
return U.VAL == 1;
return countLeadingZerosSlowCase() == BitWidth - 1;
}
+ /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
+ bool isOneValue() const { return isOne(); }
+
/// Determine if this is the largest unsigned value.
///
/// This checks to see if the value of this APInt is the maximum unsigned
/// value for the APInt's bit width.
- bool isMaxValue() const { return isAllOnesValue(); }
+ bool isMaxValue() const { return isAllOnes(); }
/// Determine if this is the largest signed value.
///
/// This checks to see if the value of this APInt is the maximum signed
/// value for the APInt's bit width.
bool isMaxSignedValue() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
+ }
return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
}
@@ -439,39 +399,48 @@ public:
///
/// This checks to see if the value of this APInt is the minimum unsigned
/// value for the APInt's bit width.
- bool isMinValue() const { return isNullValue(); }
+ bool isMinValue() const { return isZero(); }
/// Determine if this is the smallest signed value.
///
/// This checks to see if the value of this APInt is the minimum signed
/// value for the APInt's bit width.
bool isMinSignedValue() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return U.VAL == (WordType(1) << (BitWidth - 1));
+ }
return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
}
/// Check if this APInt has an N-bits unsigned integer value.
- bool isIntN(unsigned N) const {
- assert(N && "N == 0 ???");
- return getActiveBits() <= N;
- }
+ bool isIntN(unsigned N) const { return getActiveBits() <= N; }
/// Check if this APInt has an N-bits signed integer value.
- bool isSignedIntN(unsigned N) const {
- assert(N && "N == 0 ???");
- return getMinSignedBits() <= N;
- }
+ bool isSignedIntN(unsigned N) const { return getMinSignedBits() <= N; }
/// Check if this APInt's value is a power of two greater than zero.
///
/// \returns true if the argument APInt value is a power of two > 0.
bool isPowerOf2() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return isPowerOf2_64(U.VAL);
+ }
return countPopulationSlowCase() == 1;
}
+ /// Check if this APInt's negated value is a power of two greater than zero.
+ bool isNegatedPowerOf2() const {
+ assert(BitWidth && "zero width values not allowed");
+ if (isNonNegative())
+ return false;
+ // NegatedPowerOf2 - shifted mask in the top bits.
+ unsigned LO = countLeadingOnes();
+ unsigned TZ = countTrailingZeros();
+ return (LO + TZ) == BitWidth;
+ }
+
/// Check if the APInt's value is returned by getSignMask.
///
/// \returns true if this is the value returned by getSignMask.
@@ -480,7 +449,7 @@ public:
/// Convert APInt to a boolean value.
///
/// This converts the APInt to a boolean value as a test against zero.
- bool getBoolValue() const { return !!*this; }
+ bool getBoolValue() const { return !isZero(); }
/// If this value is smaller than the specified limit, return it, otherwise
/// return the limit value. This causes the value to saturate to the limit.
@@ -527,152 +496,22 @@ public:
return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
}
- /// @}
- /// \name Value Generators
- /// @{
-
- /// Gets maximum unsigned value of APInt for specific bit width.
- static APInt getMaxValue(unsigned numBits) {
- return getAllOnesValue(numBits);
- }
-
- /// Gets maximum signed value of APInt for a specific bit width.
- static APInt getSignedMaxValue(unsigned numBits) {
- APInt API = getAllOnesValue(numBits);
- API.clearBit(numBits - 1);
- return API;
- }
-
- /// Gets minimum unsigned value of APInt for a specific bit width.
- static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
-
- /// Gets minimum signed value of APInt for a specific bit width.
- static APInt getSignedMinValue(unsigned numBits) {
- APInt API(numBits, 0);
- API.setBit(numBits - 1);
- return API;
- }
-
- /// Get the SignMask for a specific bit width.
- ///
- /// This is just a wrapper function of getSignedMinValue(), and it helps code
- /// readability when we want to get a SignMask.
- static APInt getSignMask(unsigned BitWidth) {
- return getSignedMinValue(BitWidth);
- }
-
- /// Get the all-ones value.
- ///
- /// \returns the all-ones value for an APInt of the specified bit-width.
- static APInt getAllOnesValue(unsigned numBits) {
- return APInt(numBits, WORDTYPE_MAX, true);
- }
-
- /// Get the '0' value.
- ///
- /// \returns the '0' value for an APInt of the specified bit-width.
- static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
-
/// Compute an APInt containing numBits highbits from this APInt.
///
- /// Get an APInt with the same BitWidth as this APInt, just zero mask
- /// the low bits and right shift to the least significant bit.
+ /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
+ /// bits and right shift to the least significant bit.
///
/// \returns the high "numBits" bits of this APInt.
APInt getHiBits(unsigned numBits) const;
/// Compute an APInt containing numBits lowbits from this APInt.
///
- /// Get an APInt with the same BitWidth as this APInt, just zero mask
- /// the high bits.
+ /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
+ /// bits.
///
/// \returns the low "numBits" bits of this APInt.
APInt getLoBits(unsigned numBits) const;
- /// Return an APInt with exactly one bit set in the result.
- static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
- APInt Res(numBits, 0);
- Res.setBit(BitNo);
- return Res;
- }
-
- /// Get a value with a block of bits set.
- ///
- /// Constructs an APInt value that has a contiguous range of bits set. The
- /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
- /// bits will be zero. For example, with parameters(32, 0, 16) you would get
- /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
- /// \p hiBit.
- ///
- /// \param numBits the intended bit width of the result
- /// \param loBit the index of the lowest bit set.
- /// \param hiBit the index of the highest bit set.
- ///
- /// \returns An APInt value with the requested bits set.
- static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
- assert(loBit <= hiBit && "loBit greater than hiBit");
- APInt Res(numBits, 0);
- Res.setBits(loBit, hiBit);
- return Res;
- }
-
- /// Wrap version of getBitsSet.
- /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
- /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
- /// with parameters (32, 28, 4), you would get 0xF000000F.
- /// If \p hiBit is equal to \p loBit, you would get a result with all bits
- /// set.
- static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
- unsigned hiBit) {
- APInt Res(numBits, 0);
- Res.setBitsWithWrap(loBit, hiBit);
- return Res;
- }
-
- /// Get a value with upper bits starting at loBit set.
- ///
- /// Constructs an APInt value that has a contiguous range of bits set. The
- /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
- /// bits will be zero. For example, with parameters(32, 12) you would get
- /// 0xFFFFF000.
- ///
- /// \param numBits the intended bit width of the result
- /// \param loBit the index of the lowest bit to set.
- ///
- /// \returns An APInt value with the requested bits set.
- static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
- APInt Res(numBits, 0);
- Res.setBitsFrom(loBit);
- return Res;
- }
-
- /// Get a value with high bits set
- ///
- /// Constructs an APInt value that has the top hiBitsSet bits set.
- ///
- /// \param numBits the bitwidth of the result
- /// \param hiBitsSet the number of high-order bits set in the result.
- static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
- APInt Res(numBits, 0);
- Res.setHighBits(hiBitsSet);
- return Res;
- }
-
- /// Get a value with low bits set
- ///
- /// Constructs an APInt value that has the bottom loBitsSet bits set.
- ///
- /// \param numBits the bitwidth of the result
- /// \param loBitsSet the number of low-order bits set in the result.
- static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
- APInt Res(numBits, 0);
- Res.setLowBits(loBitsSet);
- return Res;
- }
-
- /// Return a value containing V broadcasted over NewLen bits.
- static APInt getSplat(unsigned NewLen, const APInt &V);
-
/// Determine if two APInts have the same value, after zero-extending
/// one of them (if needed!) to ensure that the bit-widths match.
static bool isSameValue(const APInt &I1, const APInt &I2) {
@@ -701,12 +540,10 @@ public:
/// \name Unary Operators
/// @{
- /// Postfix increment operator.
- ///
- /// Increments *this by 1.
+ /// Postfix increment operator. Increment *this by 1.
///
/// \returns a new APInt value representing the original value of *this.
- const APInt operator++(int) {
+ APInt operator++(int) {
APInt API(*this);
++(*this);
return API;
@@ -717,12 +554,10 @@ public:
/// \returns *this incremented by one
APInt &operator++();
- /// Postfix decrement operator.
- ///
- /// Decrements *this by 1.
+ /// Postfix decrement operator. Decrement *this by 1.
///
/// \returns a new APInt value representing the original value of *this.
- const APInt operator--(int) {
+ APInt operator--(int) {
APInt API(*this);
--(*this);
return API;
@@ -733,16 +568,9 @@ public:
/// \returns *this decremented by one.
APInt &operator--();
- /// Logical negation operator.
- ///
- /// Performs logical negation operation on this APInt.
- ///
- /// \returns true if *this is zero, false otherwise.
- bool operator!() const {
- if (isSingleWord())
- return U.VAL == 0;
- return countLeadingZerosSlowCase() == BitWidth;
- }
+ /// Logical negation operation on this APInt returns true if zero, like normal
+ /// integers.
+ bool operator!() const { return isZero(); }
/// @}
/// \name Assignment Operators
@@ -752,14 +580,15 @@ public:
///
/// \returns *this after assignment of RHS.
APInt &operator=(const APInt &RHS) {
- // If the bitwidths are the same, we can avoid mucking with memory
+ // The common case (both source or dest being inline) doesn't require
+ // allocation or deallocation.
if (isSingleWord() && RHS.isSingleWord()) {
U.VAL = RHS.U.VAL;
BitWidth = RHS.BitWidth;
- return clearUnusedBits();
+ return *this;
}
- AssignSlowCase(RHS);
+ assignSlowCase(RHS);
return *this;
}
@@ -780,7 +609,6 @@ public:
BitWidth = that.BitWidth;
that.BitWidth = 0;
-
return *this;
}
@@ -812,7 +640,7 @@ public:
if (isSingleWord())
U.VAL &= RHS.U.VAL;
else
- AndAssignSlowCase(RHS);
+ andAssignSlowCase(RHS);
return *this;
}
@@ -827,7 +655,7 @@ public:
return *this;
}
U.pVal[0] &= RHS;
- memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+ memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
return *this;
}
@@ -842,7 +670,7 @@ public:
if (isSingleWord())
U.VAL |= RHS.U.VAL;
else
- OrAssignSlowCase(RHS);
+ orAssignSlowCase(RHS);
return *this;
}
@@ -871,7 +699,7 @@ public:
if (isSingleWord())
U.VAL ^= RHS.U.VAL;
else
- XorAssignSlowCase(RHS);
+ xorAssignSlowCase(RHS);
return *this;
}
@@ -1057,6 +885,17 @@ public:
/// Rotate right by rotateAmt.
APInt rotr(const APInt &rotateAmt) const;
+ /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
+ /// equivalent to:
+ /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
+ APInt concat(const APInt &NewLSB) const {
+ /// If the result will be small, then both the merged values are small.
+ unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
+ if (NewWidth <= APINT_BITS_PER_WORD)
+ return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
+ return concatSlowCase(NewLSB);
+ }
+
/// Unsigned division operation.
///
/// Perform an unsigned divide operation on this APInt by RHS. Both this and
@@ -1151,7 +990,7 @@ public:
assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
if (isSingleWord())
return U.VAL == RHS.U.VAL;
- return EqualSlowCase(RHS);
+ return equalSlowCase(RHS);
}
/// Equality operator.
@@ -1436,8 +1275,6 @@ public:
clearUnusedBits();
}
- /// Set a given bit to 1.
- ///
/// Set the given bit to 1 whose position is given as "bitPosition".
void setBit(unsigned BitPosition) {
assert(BitPosition < BitWidth && "BitPosition out of range");
@@ -1449,9 +1286,7 @@ public:
}
/// Set the sign bit to 1.
- void setSignBit() {
- setBit(BitWidth - 1);
- }
+ void setSignBit() { setBit(BitWidth - 1); }
/// Set a given bit to a given value.
void setBitVal(unsigned BitPosition, bool BitValue) {
@@ -1497,14 +1332,10 @@ public:
}
/// Set the top bits starting from loBit.
- void setBitsFrom(unsigned loBit) {
- return setBits(loBit, BitWidth);
- }
+ void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
/// Set the bottom loBits bits.
- void setLowBits(unsigned loBits) {
- return setBits(0, loBits);
- }
+ void setLowBits(unsigned loBits) { return setBits(0, loBits); }
/// Set the top hiBits bits.
void setHighBits(unsigned hiBits) {
@@ -1539,9 +1370,7 @@ public:
}
/// Set the sign bit to 0.
- void clearSignBit() {
- clearBit(BitWidth - 1);
- }
+ void clearSignBit() { clearBit(BitWidth - 1); }
/// Toggle every bit to its opposite value.
void flipAllBits() {
@@ -1629,8 +1458,10 @@ public:
/// uint64_t. The bitwidth must be <= 64 or the value must fit within a
/// uint64_t. Otherwise an assertion will result.
uint64_t getZExtValue() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return U.VAL;
+ }
assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
return U.pVal[0];
}
@@ -1678,8 +1509,11 @@ public:
/// \returns 0 if the high order bit is not set, otherwise returns the number
/// of 1 bits from the most significant to the least
unsigned countLeadingOnes() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ return 0;
return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
+ }
return countLeadingOnesSlowCase();
}
@@ -1774,9 +1608,7 @@ public:
/// The conversion does not do a translation from integer to double, it just
/// re-interprets the bits as a double. Note that it is valid to do this on
/// any bit width. Exactly 64 bits will be translated.
- double bitsToDouble() const {
- return BitsToDouble(getWord(0));
- }
+ double bitsToDouble() const { return BitsToDouble(getWord(0)); }
/// Converts APInt bits to a float
///
@@ -1808,7 +1640,7 @@ public:
/// @{
/// \returns the floor log base 2 of this APInt.
- unsigned logBase2() const { return getActiveBits() - 1; }
+ unsigned logBase2() const { return getActiveBits() - 1; }
/// \returns the ceil log base 2 of this APInt.
unsigned ceilLogBase2() const {
@@ -1826,25 +1658,7 @@ public:
///
/// to get around any mathematical concerns resulting from
/// referencing 2 in a space where 2 does no exist.
- unsigned nearestLogBase2() const {
- // Special case when we have a bitwidth of 1. If VAL is 1, then we
- // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
- // UINT32_MAX.
- if (BitWidth == 1)
- return U.VAL - 1;
-
- // Handle the zero case.
- if (isNullValue())
- return UINT32_MAX;
-
- // The non-zero case is handled by computing:
- //
- // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
- //
- // where x[i] is referring to the value of the ith bit of x.
- unsigned lg = logBase2();
- return lg + unsigned((*this)[lg - 1]);
- }
+ unsigned nearestLogBase2() const;
/// \returns the log base 2 of this APInt if its an exact power of two, -1
/// otherwise
@@ -1854,12 +1668,12 @@ public:
return logBase2();
}
- /// Compute the square root
+ /// Compute the square root.
APInt sqrt() const;
- /// Get the absolute value;
- ///
- /// If *this is < 0 then return -(*this), otherwise *this;
+ /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
+ /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
+ /// wide APInt) is unchanged due to how negation works.
APInt abs() const {
if (isNegative())
return -(*this);
@@ -1870,18 +1684,6 @@ public:
APInt multiplicativeInverse(const APInt &modulo) const;
/// @}
- /// \name Support for division by constant
- /// @{
-
- /// Calculate the magic number for signed division by a constant.
- struct ms;
- ms magic() const;
-
- /// Calculate the magic number for unsigned division by a constant.
- struct mu;
- mu magicu(unsigned LeadingZeros = 0) const;
-
- /// @}
/// \name Building-block Operations for APInt and APFloat
/// @{
@@ -1908,9 +1710,8 @@ public:
/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
/// significant bit of DST. All high bits above srcBITS in DST are
/// zero-filled.
- static void tcExtract(WordType *, unsigned dstCount,
- const WordType *, unsigned srcBits,
- unsigned srcLSB);
+ static void tcExtract(WordType *, unsigned dstCount, const WordType *,
+ unsigned srcBits, unsigned srcLSB);
/// Set the given bit of a bignum. Zero-based.
static void tcSetBit(WordType *, unsigned bit);
@@ -1927,14 +1728,13 @@ public:
static void tcNegate(WordType *, unsigned);
/// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
- static WordType tcAdd(WordType *, const WordType *,
- WordType carry, unsigned);
+ static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
/// DST += RHS. Returns the carry flag.
static WordType tcAddPart(WordType *, WordType, unsigned);
/// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
- static WordType tcSubtract(WordType *, const WordType *,
- WordType carry, unsigned);
+ static WordType tcSubtract(WordType *, const WordType *, WordType carry,
+ unsigned);
/// DST -= RHS. Returns the carry flag.
static WordType tcSubtractPart(WordType *, WordType, unsigned);
@@ -1950,8 +1750,7 @@ public:
/// otherwise overflow occurred and return one.
static int tcMultiplyPart(WordType *dst, const WordType *src,
WordType multiplier, WordType carry,
- unsigned srcParts, unsigned dstParts,
- bool add);
+ unsigned srcParts, unsigned dstParts, bool add);
/// DST = LHS * RHS, where DST has the same width as the operands and is
/// filled with the least significant parts of the result. Returns one if
@@ -1962,8 +1761,8 @@ public:
/// DST = LHS * RHS, where DST has width the sum of the widths of the
/// operands. No overflow occurs. DST must be disjoint from both operands.
- static void tcFullMultiply(WordType *, const WordType *,
- const WordType *, unsigned, unsigned);
+ static void tcFullMultiply(WordType *, const WordType *, const WordType *,
+ unsigned, unsigned);
/// If RHS is zero LHS and REMAINDER are left unchanged, return one.
/// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
@@ -1974,9 +1773,8 @@ public:
/// SCRATCH is a bignum of the same size as the operands and result for use by
/// the routine; its contents need not be initialized and are destroyed. LHS,
/// REMAINDER and SCRATCH must be distinct.
- static int tcDivide(WordType *lhs, const WordType *rhs,
- WordType *remainder, WordType *scratch,
- unsigned parts);
+ static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
+ WordType *scratch, unsigned parts);
/// Shift a bignum left Count bits. Shifted in bits are zero. There are no
/// restrictions on Count.
@@ -1986,12 +1784,6 @@ public:
/// restrictions on Count.
static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
- /// The obvious AND, OR and XOR and complement operations.
- static void tcAnd(WordType *, const WordType *, unsigned);
- static void tcOr(WordType *, const WordType *, unsigned);
- static void tcXor(WordType *, const WordType *, unsigned);
- static void tcComplement(WordType *, unsigned);
-
/// Comparison (unsigned) of two bignums.
static int tcCompare(const WordType *, const WordType *, unsigned);
@@ -2005,26 +1797,185 @@ public:
return tcSubtractPart(dst, 1, parts);
}
- /// Set the least significant BITS and clear the rest.
- static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
+ /// Used to insert APInt objects, or objects that contain APInt objects, into
+ /// FoldingSets.
+ void Profile(FoldingSetNodeID &id) const;
/// debug method
void dump() const;
- /// @}
-};
+ /// Returns whether this instance allocated memory.
+ bool needsCleanup() const { return !isSingleWord(); }
-/// Magic data for optimising signed division by a constant.
-struct APInt::ms {
- APInt m; ///< magic number
- unsigned s; ///< shift amount
-};
+private:
+ /// This union is used to store the integer value. When the
+ /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+ union {
+ uint64_t VAL; ///< Used to store the <= 64 bits integer value.
+ uint64_t *pVal; ///< Used to store the >64 bits integer value.
+ } U;
+
+ unsigned BitWidth; ///< The number of bits in this APInt.
+
+ friend struct DenseMapInfo<APInt, void>;
+ friend class APSInt;
+
+ /// This constructor is used only internally for speed of construction of
+ /// temporaries. It is unsafe since it takes ownership of the pointer, so it
+ /// is not public.
+ APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
+
+ /// Determine which word a bit is in.
+ ///
+ /// \returns the word position for the specified bit position.
+ static unsigned whichWord(unsigned bitPosition) {
+ return bitPosition / APINT_BITS_PER_WORD;
+ }
+
+ /// Determine which bit in a word the specified bit position is in.
+ static unsigned whichBit(unsigned bitPosition) {
+ return bitPosition % APINT_BITS_PER_WORD;
+ }
+
+ /// Get a single bit mask.
+ ///
+ /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
+ /// This method generates and returns a uint64_t (word) mask for a single
+ /// bit at a specific bit position. This is used to mask the bit in the
+ /// corresponding word.
+ static uint64_t maskBit(unsigned bitPosition) {
+ return 1ULL << whichBit(bitPosition);
+ }
+
+ /// Clear unused high order bits
+ ///
+ /// This method is used internally to clear the top "N" bits in the high order
+ /// word that are not used by the APInt. This is needed after the most
+ /// significant word is assigned a value to ensure that those bits are
+ /// zero'd out.
+ APInt &clearUnusedBits() {
+ // Compute how many bits are used in the final word.
+ unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
+
+ // Mask out the high bits.
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ mask = 0;
+
+ if (isSingleWord())
+ U.VAL &= mask;
+ else
+ U.pVal[getNumWords() - 1] &= mask;
+ return *this;
+ }
+
+ /// Get the word corresponding to a bit position
+ /// \returns the corresponding word for the specified bit position.
+ uint64_t getWord(unsigned bitPosition) const {
+ return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
+ }
+
+ /// Utility method to change the bit width of this APInt to new bit width,
+ /// allocating and/or deallocating as necessary. There is no guarantee on the
+ /// value of any bits upon return. Caller should populate the bits after.
+ void reallocate(unsigned NewBitWidth);
+
+ /// Convert a char array into an APInt
+ ///
+ /// \param radix 2, 8, 10, 16, or 36
+ /// Converts a string into a number. The string must be non-empty
+ /// and well-formed as a number of the given base. The bit-width
+ /// must be sufficient to hold the result.
+ ///
+ /// This is used by the constructors that take string arguments.
+ ///
+ /// StringRef::getAsInteger is superficially similar but (1) does
+ /// not assume that the string is well-formed and (2) grows the
+ /// result to hold the input.
+ void fromString(unsigned numBits, StringRef str, uint8_t radix);
+
+ /// An internal division function for dividing APInts.
+ ///
+ /// This is used by the toString method to divide by the radix. It simply
+ /// provides a more convenient form of divide for internal use since KnuthDiv
+ /// has specific constraints on its inputs. If those constraints are not met
+ /// then it provides a simpler form of divide.
+ static void divide(const WordType *LHS, unsigned lhsWords,
+ const WordType *RHS, unsigned rhsWords, WordType *Quotient,
+ WordType *Remainder);
+
+ /// out-of-line slow case for inline constructor
+ void initSlowCase(uint64_t val, bool isSigned);
+
+ /// shared code between two array constructors
+ void initFromArray(ArrayRef<uint64_t> array);
+
+ /// out-of-line slow case for inline copy constructor
+ void initSlowCase(const APInt &that);
+
+ /// out-of-line slow case for shl
+ void shlSlowCase(unsigned ShiftAmt);
+
+ /// out-of-line slow case for lshr.
+ void lshrSlowCase(unsigned ShiftAmt);
+
+ /// out-of-line slow case for ashr.
+ void ashrSlowCase(unsigned ShiftAmt);
+
+ /// out-of-line slow case for operator=
+ void assignSlowCase(const APInt &RHS);
+
+ /// out-of-line slow case for operator==
+ bool equalSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+ /// out-of-line slow case for countLeadingZeros
+ unsigned countLeadingZerosSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countLeadingOnes.
+ unsigned countLeadingOnesSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countTrailingZeros.
+ unsigned countTrailingZerosSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countTrailingOnes
+ unsigned countTrailingOnesSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countPopulation
+ unsigned countPopulationSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for intersects.
+ bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+ /// out-of-line slow case for isSubsetOf.
+ bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+ /// out-of-line slow case for setBits.
+ void setBitsSlowCase(unsigned loBit, unsigned hiBit);
+
+ /// out-of-line slow case for flipAllBits.
+ void flipAllBitsSlowCase();
-/// Magic data for optimising unsigned division by a constant.
-struct APInt::mu {
- APInt m; ///< magic number
- bool a; ///< add indicator
- unsigned s; ///< shift amount
+ /// out-of-line slow case for concat.
+ APInt concatSlowCase(const APInt &NewLSB) const;
+
+ /// out-of-line slow case for operator&=.
+ void andAssignSlowCase(const APInt &RHS);
+
+ /// out-of-line slow case for operator|=.
+ void orAssignSlowCase(const APInt &RHS);
+
+ /// out-of-line slow case for operator^=.
+ void xorAssignSlowCase(const APInt &RHS);
+
+ /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+ /// to, or greater than RHS.
+ int compare(const APInt &RHS) const LLVM_READONLY;
+
+ /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+ /// to, or greater than RHS.
+ int compareSigned(const APInt &RHS) const LLVM_READONLY;
+
+ /// @}
};
inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
@@ -2161,7 +2112,6 @@ inline APInt operator*(uint64_t LHS, APInt b) {
return b;
}
-
namespace APIntOps {
/// Determine the smaller of two APInts considered to be signed.
@@ -2277,7 +2227,16 @@ Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
const APInt &B);
-} // End of APIntOps namespace
+/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
+/// by \param A to \param NewBitWidth bits.
+///
+/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
+/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
+/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
+///
+/// TODO: Do we need a mode where all bits must be set when merging down?
+APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
+} // namespace APIntOps
// See friend declaration above. This additional declaration is required in
// order to compile LLVM with IBM xlC compiler.
@@ -2292,7 +2251,7 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
/// Provide DenseMapInfo for APInt.
-template <> struct DenseMapInfo<APInt> {
+template <> struct DenseMapInfo<APInt, void> {
static inline APInt getEmptyKey() {
APInt V(nullptr, 0);
V.U.VAL = 0;
diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h
index 1509d472f131..c1cf3c546070 100644
--- a/llvm/include/llvm/ADT/APSInt.h
+++ b/llvm/include/llvm/ADT/APSInt.h
@@ -58,7 +58,7 @@ public:
/// that 0 is not a positive value.
///
/// \returns true if this APSInt is positive.
- bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+ bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
APSInt &operator=(APInt RHS) {
// Retain our current sign.
@@ -344,17 +344,17 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APSInt &I) {
}
/// Provide DenseMapInfo for APSInt, using the DenseMapInfo for APInt.
-template <> struct DenseMapInfo<APSInt> {
+template <> struct DenseMapInfo<APSInt, void> {
static inline APSInt getEmptyKey() {
- return APSInt(DenseMapInfo<APInt>::getEmptyKey());
+ return APSInt(DenseMapInfo<APInt, void>::getEmptyKey());
}
static inline APSInt getTombstoneKey() {
- return APSInt(DenseMapInfo<APInt>::getTombstoneKey());
+ return APSInt(DenseMapInfo<APInt, void>::getTombstoneKey());
}
static unsigned getHashValue(const APSInt &Key) {
- return DenseMapInfo<APInt>::getHashValue(Key);
+ return DenseMapInfo<APInt, void>::getHashValue(Key);
}
static bool isEqual(const APSInt &LHS, const APSInt &RHS) {
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index 2df49223c987..61f85cfc812b 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -26,8 +26,6 @@
namespace llvm {
- template<typename T> struct DenseMapInfo;
-
/// ArrayRef - Represent a constant reference to an array (0 or more elements
/// consecutively in memory), i.e. a start pointer and a length. It allows
/// various APIs to take consecutive elements easily and conveniently.
@@ -572,7 +570,7 @@ namespace llvm {
}
// Provide DenseMapInfo for ArrayRefs.
- template <typename T> struct DenseMapInfo<ArrayRef<T>> {
+ template <typename T> struct DenseMapInfo<ArrayRef<T>, void> {
static inline ArrayRef<T> getEmptyKey() {
return ArrayRef<T>(
reinterpret_cast<const T *>(~static_cast<uintptr_t>(0)), size_t(0));
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index 31d388073633..cd1964cbdd98 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -85,7 +85,7 @@ class BitVector {
unsigned Size; // Size of bitvector in bits.
public:
- typedef unsigned size_type;
+ using size_type = unsigned;
// Encapsulation of a single bit.
class reference {
@@ -536,8 +536,8 @@ public:
[&Arg](auto const &BV) { return Arg.size() == BV; }) &&
"consistent sizes");
Out.resize(Arg.size());
- for (size_t i = 0, e = Arg.Bits.size(); i != e; ++i)
- Out.Bits[i] = f(Arg.Bits[i], Args.Bits[i]...);
+ for (size_type I = 0, E = Arg.Bits.size(); I != E; ++I)
+ Out.Bits[I] = f(Arg.Bits[I], Args.Bits[I]...);
Out.clear_unused_bits();
return Out;
}
@@ -545,16 +545,16 @@ public:
BitVector &operator|=(const BitVector &RHS) {
if (size() < RHS.size())
resize(RHS.size());
- for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i)
- Bits[i] |= RHS.Bits[i];
+ for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I)
+ Bits[I] |= RHS.Bits[I];
return *this;
}
BitVector &operator^=(const BitVector &RHS) {
if (size() < RHS.size())
resize(RHS.size());
- for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i)
- Bits[i] ^= RHS.Bits[i];
+ for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I)
+ Bits[I] ^= RHS.Bits[I];
return *this;
}
@@ -808,11 +808,11 @@ private:
public:
/// Return the size (in bytes) of the bit vector.
- size_t getMemorySize() const { return Bits.size() * sizeof(BitWord); }
- size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; }
+ size_type getMemorySize() const { return Bits.size() * sizeof(BitWord); }
+ size_type getBitCapacity() const { return Bits.size() * BITWORD_SIZE; }
};
-inline size_t capacity_in_bytes(const BitVector &X) {
+inline BitVector::size_type capacity_in_bytes(const BitVector &X) {
return X.getMemorySize();
}
@@ -824,8 +824,8 @@ template <> struct DenseMapInfo<BitVector> {
return V;
}
static unsigned getHashValue(const BitVector &V) {
- return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
- std::make_pair(V.size(), V.getData()));
+ return DenseMapInfo<std::pair<BitVector::size_type, ArrayRef<uintptr_t>>>::
+ getHashValue(std::make_pair(V.size(), V.getData()));
}
static bool isEqual(const BitVector &LHS, const BitVector &RHS) {
if (LHS.isInvalid() || RHS.isInvalid())
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h
new file mode 100644
index 000000000000..ab6afd555726
--- /dev/null
+++ b/llvm/include/llvm/ADT/CombinationGenerator.h
@@ -0,0 +1,148 @@
+//===-- llvm/ADT/CombinationGenerator.h ------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Combination generator.
+///
+/// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following
+/// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}.
+///
+/// It is useful to think of input as vector-of-vectors, where the
+/// outer vector is the variable space, and inner vector is choice space.
+/// The number of choices for each variable can be different.
+///
+/// As for implementation, it is useful to think of this as a weird number,
+/// where each digit (==variable) may have different base (==number of choices).
+/// Thus modelling of 'produce next combination' is exactly analogous to the
+/// incrementing of an number - increment lowest digit (pick next choice for the
+/// variable), and if it wrapped to the beginning then increment next digit.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_COMBINATIONGENERATOR_H
+#define LLVM_ADT_COMBINATIONGENERATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+
+template <typename choice_type, typename choices_storage_type,
+ int variable_smallsize>
+class CombinationGenerator {
+ template <typename T> struct WrappingIterator {
+ using value_type = T;
+
+ const ArrayRef<value_type> Range;
+ typename decltype(Range)::const_iterator Position;
+
+ // Rewind the tape, placing the position to again point at the beginning.
+ void rewind() { Position = Range.begin(); }
+
+ // Advance position forward, possibly wrapping to the beginning.
+ // Returns whether the wrap happened.
+ bool advance() {
+ ++Position;
+ bool Wrapped = Position == Range.end();
+ if (Wrapped)
+ rewind();
+ return Wrapped;
+ }
+
+ // Get the value at which we are currently pointing.
+ const value_type &operator*() const { return *Position; }
+
+ WrappingIterator(ArrayRef<value_type> Range_) : Range(Range_) {
+ assert(!Range.empty() && "The range must not be empty.");
+ rewind();
+ }
+ };
+
+ const ArrayRef<choices_storage_type> VariablesChoices;
+
+ void performGeneration(
+ const function_ref<bool(ArrayRef<choice_type>)> Callback) const {
+ SmallVector<WrappingIterator<choice_type>, variable_smallsize>
+ VariablesState;
+
+ // 'increment' of the the whole VariablesState is defined identically to the
+ // increment of a number: starting from the least significant element,
+ // increment it, and if it wrapped, then propagate that carry by also
+ // incrementing next (more significant) element.
+ auto IncrementState =
+ [](MutableArrayRef<WrappingIterator<choice_type>> VariablesState)
+ -> bool {
+ for (WrappingIterator<choice_type> &Variable :
+ llvm::reverse(VariablesState)) {
+ bool Wrapped = Variable.advance();
+ if (!Wrapped)
+ return false; // There you go, next combination is ready.
+ // We have carry - increment more significant variable next..
+ }
+ return true; // MSB variable wrapped, no more unique combinations.
+ };
+
+ // Initialize the per-variable state to refer to the possible choices for
+ // that variable.
+ VariablesState.reserve(VariablesChoices.size());
+ for (ArrayRef<choice_type> VC : VariablesChoices)
+ VariablesState.emplace_back(VC);
+
+ // Temporary buffer to store each combination before performing Callback.
+ SmallVector<choice_type, variable_smallsize> CurrentCombination;
+ CurrentCombination.resize(VariablesState.size());
+
+ while (true) {
+ // Gather the currently-selected variable choices into a vector.
+ for (auto I : llvm::zip(VariablesState, CurrentCombination))
+ std::get<1>(I) = *std::get<0>(I);
+ // And pass the new combination into callback, as intended.
+ if (/*Abort=*/Callback(CurrentCombination))
+ return;
+ // And tick the state to next combination, which will be unique.
+ if (IncrementState(VariablesState))
+ return; // All combinations produced.
+ }
+ };
+
+public:
+ CombinationGenerator(ArrayRef<choices_storage_type> VariablesChoices_)
+ : VariablesChoices(VariablesChoices_) {
+#ifndef NDEBUG
+ assert(!VariablesChoices.empty() && "There should be some variables.");
+ llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) {
+ assert(!VariableChoices.empty() &&
+ "There must always be some choice, at least a placeholder one.");
+ });
+#endif
+ }
+
+ // How many combinations can we produce, max?
+ // This is at most how many times the callback will be called.
+ size_t numCombinations() const {
+ size_t NumVariants = 1;
+ for (ArrayRef<choice_type> VariableChoices : VariablesChoices)
+ NumVariants *= VariableChoices.size();
+ assert(NumVariants >= 1 &&
+ "We should always end up producing at least one combination");
+ return NumVariants;
+ }
+
+ // Actually perform exhaustive combination generation.
+ // Each result will be passed into the callback.
+ void generate(const function_ref<bool(ArrayRef<choice_type>)> Callback) {
+ performGeneration(Callback);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index d276acbfa6a6..75b7371a3683 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -13,10 +13,10 @@
#ifndef LLVM_ADT_DENSEMAPINFO_H
#define LLVM_ADT_DENSEMAPINFO_H
-#include "llvm/ADT/Hashing.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <tuple>
#include <utility>
namespace llvm {
@@ -39,7 +39,12 @@ static inline unsigned combineHashValue(unsigned a, unsigned b) {
} // end namespace detail
-template<typename T>
+/// An information struct used to provide DenseMap with the various necessary
+/// components for a given value type `T`. `Enable` is an optional additional
+/// parameter that is used to support SFINAE (generally using std::enable_if_t)
+/// in derived DenseMapInfo specializations; in non-SFINAE use cases this should
+/// just be `void`.
+template<typename T, typename Enable = void>
struct DenseMapInfo {
//static inline T getEmptyKey();
//static inline T getTombstoneKey();
@@ -282,13 +287,6 @@ template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> {
}
};
-template <> struct DenseMapInfo<hash_code> {
- static inline hash_code getEmptyKey() { return hash_code(-1); }
- static inline hash_code getTombstoneKey() { return hash_code(-2); }
- static unsigned getHashValue(hash_code val) { return val; }
- static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
-};
-
} // end namespace llvm
#endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index 273b00f99d5d..de6bb3bca7e3 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -30,7 +30,8 @@ namespace llvm {
///
/// This implementation is an efficient implementation that only stores one copy
/// of the element being indexed per entry in the set, and allows any arbitrary
-/// type to be indexed (as long as it can be ordered with operator<).
+/// type to be indexed (as long as it can be ordered with operator< or a
+/// comparator is provided).
///
/// Here is a simple example using integers:
///
@@ -54,7 +55,7 @@ namespace llvm {
/// 4
/// 5 1 2
///
-template <class ElemTy>
+template <class ElemTy, class Compare = std::less<ElemTy>>
class EquivalenceClasses {
/// ECValue - The EquivalenceClasses data structure is just a set of these.
/// Each of these represents a relation for a value. First it stores the
@@ -101,22 +102,40 @@ class EquivalenceClasses {
assert(RHS.isLeader() && RHS.getNext() == nullptr && "Not a singleton!");
}
- bool operator<(const ECValue &UFN) const { return Data < UFN.Data; }
-
bool isLeader() const { return (intptr_t)Next & 1; }
const ElemTy &getData() const { return Data; }
const ECValue *getNext() const {
return (ECValue*)((intptr_t)Next & ~(intptr_t)1);
}
+ };
+
+ /// A wrapper of the comparator, to be passed to the set.
+ struct ECValueComparator {
+ using is_transparent = void;
+
+ ECValueComparator() : compare(Compare()) {}
+
+ bool operator()(const ECValue &lhs, const ECValue &rhs) const {
+ return compare(lhs.Data, rhs.Data);
+ }
+
+ template <typename T>
+ bool operator()(const T &lhs, const ECValue &rhs) const {
+ return compare(lhs, rhs.Data);
+ }
+
+ template <typename T>
+ bool operator()(const ECValue &lhs, const T &rhs) const {
+ return compare(lhs.Data, rhs);
+ }
- template<typename T>
- bool operator<(const T &Val) const { return Data < Val; }
+ const Compare compare;
};
/// TheMapping - This implicitly provides a mapping from ElemTy values to the
/// ECValues, it just keeps the key as part of the value.
- std::set<ECValue> TheMapping;
+ std::set<ECValue, ECValueComparator> TheMapping;
public:
EquivalenceClasses() = default;
diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h
index e67ef7377c88..5a37417ddde5 100644
--- a/llvm/include/llvm/ADT/FunctionExtras.h
+++ b/llvm/include/llvm/ADT/FunctionExtras.h
@@ -37,6 +37,7 @@
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/Support/MemAlloc.h"
#include "llvm/Support/type_traits.h"
+#include <cstring>
#include <memory>
#include <type_traits>
@@ -64,11 +65,16 @@ template <typename CallableT, typename ThisT>
using EnableUnlessSameType =
std::enable_if_t<!std::is_same<remove_cvref_t<CallableT>, ThisT>::value>;
template <typename CallableT, typename Ret, typename... Params>
-using EnableIfCallable =
- std::enable_if_t<std::is_void<Ret>::value ||
- std::is_convertible<decltype(std::declval<CallableT>()(
- std::declval<Params>()...)),
- Ret>::value>;
+using EnableIfCallable = std::enable_if_t<llvm::disjunction<
+ std::is_void<Ret>,
+ std::is_same<decltype(std::declval<CallableT>()(std::declval<Params>()...)),
+ Ret>,
+ std::is_same<const decltype(std::declval<CallableT>()(
+ std::declval<Params>()...)),
+ Ret>,
+ std::is_convertible<decltype(std::declval<CallableT>()(
+ std::declval<Params>()...)),
+ Ret>>::value>;
template <typename ReturnT, typename... ParamTs> class UniqueFunctionBase {
protected:
diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index e296c1c53ebd..74a87a3d8dbb 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -56,6 +56,7 @@
#include <utility>
namespace llvm {
+template <typename T, typename Enable> struct DenseMapInfo;
/// An opaque object representing a hash code.
///
@@ -677,6 +678,13 @@ hash_code hash_value(const std::basic_string<T> &arg) {
return hash_combine_range(arg.begin(), arg.end());
}
+template <> struct DenseMapInfo<hash_code, void> {
+ static inline hash_code getEmptyKey() { return hash_code(-1); }
+ static inline hash_code getTombstoneKey() { return hash_code(-2); }
+ static unsigned getHashValue(hash_code val) { return val; }
+ static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
+};
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h
index c9ee494734e7..cf27c5a16d28 100644
--- a/llvm/include/llvm/ADT/ImmutableList.h
+++ b/llvm/include/llvm/ADT/ImmutableList.h
@@ -220,8 +220,7 @@ public:
// Partially-specialized Traits.
//===----------------------------------------------------------------------===//
-template<typename T> struct DenseMapInfo;
-template<typename T> struct DenseMapInfo<ImmutableList<T>> {
+template <typename T> struct DenseMapInfo<ImmutableList<T>, void> {
static inline ImmutableList<T> getEmptyKey() {
return reinterpret_cast<ImmutableListImpl<T>*>(-1);
}
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h
index 26a7ed0cd333..3c107a3622a9 100644
--- a/llvm/include/llvm/ADT/IntervalMap.h
+++ b/llvm/include/llvm/ADT/IntervalMap.h
@@ -1137,7 +1137,7 @@ public:
/// overlaps(a, b) - Return true if the intervals in this map overlap with the
/// interval [a;b].
- bool overlaps(KeyT a, KeyT b) {
+ bool overlaps(KeyT a, KeyT b) const {
assert(Traits::nonEmpty(a, b));
const_iterator I = find(a);
if (!I.valid())
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index 1de1124f4ea2..f9540999381a 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -43,6 +43,7 @@ class MapVector {
"The mapped_type of the specified Map must be an integral type");
public:
+ using key_type = KeyT;
using value_type = typename VectorType::value_type;
using size_type = typename VectorType::size_type;
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index cb8b202c48b7..393ace6b70fc 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -22,7 +22,7 @@
namespace llvm {
-template <typename T> struct DenseMapInfo;
+template <typename T, typename Enable> struct DenseMapInfo;
template <typename PointerT, unsigned IntBits, typename PtrTraits>
struct PointerIntPairInfo;
@@ -192,7 +192,7 @@ struct PointerIntPairInfo {
// Provide specialization of DenseMapInfo for PointerIntPair.
template <typename PointerTy, unsigned IntBits, typename IntType>
-struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
+struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>, void> {
using Ty = PointerIntPair<PointerTy, IntBits, IntType>;
static Ty getEmptyKey() {
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index c39691061b72..0874f67db3fe 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -17,42 +17,13 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
namespace llvm {
-template <typename T> struct PointerUnionTypeSelectorReturn {
- using Return = T;
-};
-
-/// Get a type based on whether two types are the same or not.
-///
-/// For:
-///
-/// \code
-/// using Ret = typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return;
-/// \endcode
-///
-/// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
-template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelector {
- using Return = typename PointerUnionTypeSelectorReturn<RET_NE>::Return;
-};
-
-template <typename T, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
- using Return = typename PointerUnionTypeSelectorReturn<RET_EQ>::Return;
-};
-
-template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelectorReturn<
- PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>> {
- using Return =
- typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return;
-};
-
namespace pointer_union_detail {
/// Determine the number of bits required to store integers with values < n.
/// This is ceil(log2(n)).
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index eb001346b609..48f15b02283a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -272,20 +272,24 @@ template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) {
// be applied whenever operator* is invoked on the iterator.
template <typename ItTy, typename FuncTy,
- typename FuncReturnTy =
- decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))>
+ typename ReferenceTy =
+ decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))>
class mapped_iterator
: public iterator_adaptor_base<
- mapped_iterator<ItTy, FuncTy>, ItTy,
- typename std::iterator_traits<ItTy>::iterator_category,
- typename std::remove_reference<FuncReturnTy>::type> {
+ mapped_iterator<ItTy, FuncTy>, ItTy,
+ typename std::iterator_traits<ItTy>::iterator_category,
+ std::remove_reference_t<ReferenceTy>,
+ typename std::iterator_traits<ItTy>::difference_type,
+ std::remove_reference_t<ReferenceTy> *, ReferenceTy> {
public:
mapped_iterator(ItTy U, FuncTy F)
: mapped_iterator::iterator_adaptor_base(std::move(U)), F(std::move(F)) {}
ItTy getCurrent() { return this->I; }
- FuncReturnTy operator*() const { return F(*this->I); }
+ const FuncTy &getFunction() const { return F; }
+
+ ReferenceTy operator*() const { return F(*this->I); }
private:
FuncTy F;
@@ -303,6 +307,32 @@ auto map_range(ContainerTy &&C, FuncTy F) {
return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F));
}
+/// A base type of mapped iterator, that is useful for building derived
+/// iterators that do not need/want to store the map function (as in
+/// mapped_iterator). These iterators must simply provide a `mapElement` method
+/// that defines how to map a value of the iterator to the provided reference
+/// type.
+template <typename DerivedT, typename ItTy, typename ReferenceTy>
+class mapped_iterator_base
+ : public iterator_adaptor_base<
+ DerivedT, ItTy,
+ typename std::iterator_traits<ItTy>::iterator_category,
+ std::remove_reference_t<ReferenceTy>,
+ typename std::iterator_traits<ItTy>::difference_type,
+ std::remove_reference_t<ReferenceTy> *, ReferenceTy> {
+public:
+ using BaseT = mapped_iterator_base;
+
+ mapped_iterator_base(ItTy U)
+ : mapped_iterator_base::iterator_adaptor_base(std::move(U)) {}
+
+ ItTy getCurrent() { return this->I; }
+
+ ReferenceTy operator*() const {
+ return static_cast<const DerivedT &>(*this).mapElement(*this->I);
+ }
+};
+
/// Helper to determine if type T has a member called rbegin().
template <typename Ty> class has_rbegin_impl {
using yes = char[1];
@@ -371,12 +401,7 @@ class filter_iterator_base
typename std::common_type<
IterTag, typename std::iterator_traits<
WrappedIteratorT>::iterator_category>::type> {
- using BaseT = iterator_adaptor_base<
- filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>,
- WrappedIteratorT,
- typename std::common_type<
- IterTag, typename std::iterator_traits<
- WrappedIteratorT>::iterator_category>::type>;
+ using BaseT = typename filter_iterator_base::iterator_adaptor_base;
protected:
WrappedIteratorT End;
@@ -411,12 +436,10 @@ template <typename WrappedIteratorT, typename PredicateT,
typename IterTag = std::forward_iterator_tag>
class filter_iterator_impl
: public filter_iterator_base<WrappedIteratorT, PredicateT, IterTag> {
- using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>;
-
public:
filter_iterator_impl(WrappedIteratorT Begin, WrappedIteratorT End,
PredicateT Pred)
- : BaseT(Begin, End, Pred) {}
+ : filter_iterator_impl::filter_iterator_base(Begin, End, Pred) {}
};
/// Specialization of filter_iterator_base for bidirectional iteration.
@@ -425,8 +448,8 @@ class filter_iterator_impl<WrappedIteratorT, PredicateT,
std::bidirectional_iterator_tag>
: public filter_iterator_base<WrappedIteratorT, PredicateT,
std::bidirectional_iterator_tag> {
- using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT,
- std::bidirectional_iterator_tag>;
+ using BaseT = typename filter_iterator_impl::filter_iterator_base;
+
void findPrevValid() {
while (!this->Pred(*this->I))
BaseT::operator--();
@@ -514,9 +537,7 @@ template <typename WrappedIteratorT>
class early_inc_iterator_impl
: public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
WrappedIteratorT, std::input_iterator_tag> {
- using BaseT =
- iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
- WrappedIteratorT, std::input_iterator_tag>;
+ using BaseT = typename early_inc_iterator_impl::iterator_adaptor_base;
using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer;
@@ -630,12 +651,18 @@ protected:
return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...);
}
+ template <size_t... Ns>
+ bool test_all_equals(const zip_common &other,
+ std::index_sequence<Ns...>) const {
+ return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) ==
+ std::get<Ns>(other.iterators)...},
+ identity<bool>{});
+ }
+
public:
zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {}
- value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
-
- const value_type operator*() const {
+ value_type operator*() const {
return deref(std::index_sequence_for<Iters...>{});
}
@@ -650,6 +677,11 @@ public:
iterators = tup_dec(std::index_sequence_for<Iters...>{});
return *reinterpret_cast<ZipType *>(this);
}
+
+ /// Return true if all the iterator are matching `other`'s iterators.
+ bool all_equals(zip_common &other) {
+ return test_all_equals(other, std::index_sequence_for<Iters...>{});
+ }
};
template <typename... Iters>
@@ -801,8 +833,6 @@ public:
: iterators(std::forward<Iters>(ts.first)...),
end_iterators(std::forward<Iters>(ts.second)...) {}
- value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
-
value_type operator*() const {
return deref(std::index_sequence_for<Iters...>{});
}
@@ -1073,8 +1103,7 @@ template <typename DerivedT, typename BaseT, typename T,
typename PointerT = T *, typename ReferenceT = T &>
class indexed_accessor_range_base {
public:
- using RangeBaseT =
- indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, ReferenceT>;
+ using RangeBaseT = indexed_accessor_range_base;
/// An iterator element of this range.
class iterator : public indexed_accessor_iterator<iterator, BaseT, T,
@@ -1087,8 +1116,7 @@ public:
private:
iterator(BaseT owner, ptrdiff_t curIndex)
- : indexed_accessor_iterator<iterator, BaseT, T, PointerT, ReferenceT>(
- owner, curIndex) {}
+ : iterator::indexed_accessor_iterator(owner, curIndex) {}
/// Allow access to the constructor.
friend indexed_accessor_range_base<DerivedT, BaseT, T, PointerT,
@@ -1234,20 +1262,39 @@ public:
}
};
+namespace detail {
+/// Return a reference to the first or second member of a reference. Otherwise,
+/// return a copy of the member of a temporary.
+///
+/// When passing a range whose iterators return values instead of references,
+/// the reference must be dropped from `decltype((elt.first))`, which will
+/// always be a reference, to avoid returning a reference to a temporary.
+template <typename EltTy, typename FirstTy> class first_or_second_type {
+public:
+ using type =
+ typename std::conditional_t<std::is_reference<EltTy>::value, FirstTy,
+ std::remove_reference_t<FirstTy>>;
+};
+} // end namespace detail
+
/// Given a container of pairs, return a range over the first elements.
template <typename ContainerTy> auto make_first_range(ContainerTy &&c) {
- return llvm::map_range(
- std::forward<ContainerTy>(c),
- [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) {
- return elt.first;
- });
+ using EltTy = decltype((*std::begin(c)));
+ return llvm::map_range(std::forward<ContainerTy>(c),
+ [](EltTy elt) -> typename detail::first_or_second_type<
+ EltTy, decltype((elt.first))>::type {
+ return elt.first;
+ });
}
/// Given a container of pairs, return a range over the second elements.
template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
+ using EltTy = decltype((*std::begin(c)));
return llvm::map_range(
std::forward<ContainerTy>(c),
- [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) {
+ [](EltTy elt) ->
+ typename detail::first_or_second_type<EltTy,
+ decltype((elt.second))>::type {
return elt.second;
});
}
@@ -1260,7 +1307,7 @@ template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
/// compares less than the first component of another std::pair.
struct less_first {
template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return lhs.first < rhs.first;
+ return std::less<>()(lhs.first, rhs.first);
}
};
@@ -1268,7 +1315,7 @@ struct less_first {
/// compares less than the second component of another std::pair.
struct less_second {
template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return lhs.second < rhs.second;
+ return std::less<>()(lhs.second, rhs.second);
}
};
@@ -1877,8 +1924,7 @@ template <typename R> struct result_pair {
}
std::size_t index() const { return Index; }
- const value_reference value() const { return *Iter; }
- value_reference value() { return *Iter; }
+ value_reference value() const { return *Iter; }
private:
std::size_t Index = std::numeric_limits<std::size_t>::max();
@@ -1887,11 +1933,8 @@ private:
template <typename R>
class enumerator_iter
- : public iterator_facade_base<
- enumerator_iter<R>, std::forward_iterator_tag, result_pair<R>,
- typename std::iterator_traits<IterOfRange<R>>::difference_type,
- typename std::iterator_traits<IterOfRange<R>>::pointer,
- typename std::iterator_traits<IterOfRange<R>>::reference> {
+ : public iterator_facade_base<enumerator_iter<R>, std::forward_iterator_tag,
+ const result_pair<R>> {
using result_type = result_pair<R>;
public:
@@ -1901,7 +1944,6 @@ public:
enumerator_iter(std::size_t Index, IterOfRange<R> Iter)
: Result(Index, Iter) {}
- result_type &operator*() { return Result; }
const result_type &operator*() const { return Result; }
enumerator_iter &operator++() {
@@ -1986,6 +2028,45 @@ decltype(auto) apply_tuple(F &&f, Tuple &&t) {
Indices{});
}
+namespace detail {
+
+template <typename Predicate, typename... Args>
+bool all_of_zip_predicate_first(Predicate &&P, Args &&...args) {
+ auto z = zip(args...);
+ auto it = z.begin();
+ auto end = z.end();
+ while (it != end) {
+ if (!apply_tuple([&](auto &&...args) { return P(args...); }, *it))
+ return false;
+ ++it;
+ }
+ return it.all_equals(end);
+}
+
+// Just an adaptor to switch the order of argument and have the predicate before
+// the zipped inputs.
+template <typename... ArgsThenPredicate, size_t... InputIndexes>
+bool all_of_zip_predicate_last(
+ std::tuple<ArgsThenPredicate...> argsThenPredicate,
+ std::index_sequence<InputIndexes...>) {
+ auto constexpr OutputIndex =
+ std::tuple_size<decltype(argsThenPredicate)>::value - 1;
+ return all_of_zip_predicate_first(std::get<OutputIndex>(argsThenPredicate),
+ std::get<InputIndexes>(argsThenPredicate)...);
+}
+
+} // end namespace detail
+
+/// Compare two zipped ranges using the provided predicate (as last argument).
+/// Return true if all elements satisfy the predicate and false otherwise.
+// Return false if the zipped iterator aren't all at end (size mismatch).
+template <typename... ArgsAndPredicate>
+bool all_of_zip(ArgsAndPredicate &&...argsAndPredicate) {
+ return detail::all_of_zip_predicate_last(
+ std::forward_as_tuple(argsAndPredicate...),
+ std::make_index_sequence<sizeof...(argsAndPredicate) - 1>{});
+}
+
/// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N)
/// time. Not meant for use with random-access iterators.
/// Can optionally take a predicate to filter lazily some items.
diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h
index 3e4bf0932222..fdbf397984d0 100644
--- a/llvm/include/llvm/ADT/Sequence.h
+++ b/llvm/include/llvm/ADT/Sequence.h
@@ -6,9 +6,74 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This routine provides some synthesis utilities to produce sequences of
-/// values. The names are intentionally kept very short as they tend to occur
-/// in common and widely used contexts.
+/// Provides some synthesis utilities to produce sequences of values. The names
+/// are intentionally kept very short as they tend to occur in common and
+/// widely used contexts.
+///
+/// The `seq(A, B)` function produces a sequence of values from `A` to up to
+/// (but not including) `B`, i.e., [`A`, `B`), that can be safely iterated over.
+/// `seq` supports both integral (e.g., `int`, `char`, `uint32_t`) and enum
+/// types. `seq_inclusive(A, B)` produces a sequence of values from `A` to `B`,
+/// including `B`.
+///
+/// Examples with integral types:
+/// ```
+/// for (int x : seq(0, 3))
+/// outs() << x << " ";
+/// ```
+///
+/// Prints: `0 1 2 `.
+///
+/// ```
+/// for (int x : seq_inclusive(0, 3))
+/// outs() << x << " ";
+/// ```
+///
+/// Prints: `0 1 2 3 `.
+///
+/// Similar to `seq` and `seq_inclusive`, the `enum_seq` and
+/// `enum_seq_inclusive` functions produce sequences of enum values that can be
+/// iterated over.
+/// To enable iteration with enum types, you need to either mark enums as safe
+/// to iterate on by specializing `enum_iteration_traits`, or opt into
+/// potentially unsafe iteration at every callsite by passing
+/// `force_iteration_on_noniterable_enum`.
+///
+/// Examples with enum types:
+/// ```
+/// namespace X {
+/// enum class MyEnum : unsigned {A = 0, B, C};
+/// } // namespace X
+///
+/// template <> struct enum_iteration_traits<X::MyEnum> {
+/// static contexpr bool is_iterable = true;
+/// };
+///
+/// class MyClass {
+/// public:
+/// enum Safe { D = 3, E, F };
+/// enum MaybeUnsafe { G = 1, H = 2, I = 4 };
+/// };
+///
+/// template <> struct enum_iteration_traits<MyClass::Safe> {
+/// static contexpr bool is_iterable = true;
+/// };
+/// ```
+///
+/// ```
+/// for (auto v : enum_seq(MyClass::Safe::D, MyClass::Safe::F))
+/// outs() << int(v) << " ";
+/// ```
+///
+/// Prints: `3 4 `.
+///
+/// ```
+/// for (auto v : enum_seq(MyClass::MaybeUnsafe::H, MyClass::MaybeUnsafe::I,
+/// force_iteration_on_noniterable_enum))
+/// outs() << int(v) << " ";
+/// ```
+///
+/// Prints: `2 3 `.
///
//===----------------------------------------------------------------------===//
@@ -18,12 +83,31 @@
#include <cassert> // assert
#include <iterator> // std::random_access_iterator_tag
#include <limits> // std::numeric_limits
-#include <type_traits> // std::underlying_type, std::is_enum
+#include <type_traits> // std::is_integral, std::is_enum, std::underlying_type,
+ // std::enable_if
#include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow
namespace llvm {
+// Enum traits that marks enums as safe or unsafe to iterate over.
+// By default, enum types are *not* considered safe for iteration.
+// To allow iteration for your enum type, provide a specialization with
+// `is_iterable` set to `true` in the `llvm` namespace.
+// Alternatively, you can pass the `force_iteration_on_noniterable_enum` tag
+// to `enum_seq` or `enum_seq_inclusive`.
+template <typename EnumT> struct enum_iteration_traits {
+ static constexpr bool is_iterable = false;
+};
+
+struct force_iteration_on_noniterable_enum_t {
+ explicit force_iteration_on_noniterable_enum_t() = default;
+};
+
+// TODO: Make this `inline` once we update to C++17 to avoid ORD violations.
+constexpr force_iteration_on_noniterable_enum_t
+ force_iteration_on_noniterable_enum;
+
namespace detail {
// Returns whether a value of type U can be represented with type T.
@@ -213,27 +297,81 @@ private:
iterator PastEndValue;
};
-/// Iterate over an integral/enum type from Begin up to - but not including -
-/// End.
-/// Note on enum iteration: `seq` will generate each consecutive value, even if
-/// no enumerator with that value exists.
+/// Iterate over an integral type from Begin up to - but not including - End.
/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
/// iteration).
-template <typename T> auto seq(T Begin, T End) {
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value &&
+ !std::is_enum<T>::value>>
+auto seq(T Begin, T End) {
return iota_range<T>(Begin, End, false);
}
-/// Iterate over an integral/enum type from Begin to End inclusive.
-/// Note on enum iteration: `seq_inclusive` will generate each consecutive
-/// value, even if no enumerator with that value exists.
+/// Iterate over an integral type from Begin to End inclusive.
/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
/// iteration).
-template <typename T> auto seq_inclusive(T Begin, T End) {
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value &&
+ !std::is_enum<T>::value>>
+auto seq_inclusive(T Begin, T End) {
return iota_range<T>(Begin, End, true);
}
+/// Iterate over an enum type from Begin up to - but not including - End.
+/// Note: `enum_seq` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
+/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq(EnumT Begin, EnumT End) {
+ static_assert(enum_iteration_traits<EnumT>::is_iterable,
+ "Enum type is not marked as iterable.");
+ return iota_range<EnumT>(Begin, End, false);
+}
+
+/// Iterate over an enum type from Begin up to - but not including - End, even
+/// when `EnumT` is not marked as safely iterable by `enum_iteration_traits`.
+/// Note: `enum_seq` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
+/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq(EnumT Begin, EnumT End, force_iteration_on_noniterable_enum_t) {
+ return iota_range<EnumT>(Begin, End, false);
+}
+
+/// Iterate over an enum type from Begin to End inclusive.
+/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
+/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq_inclusive(EnumT Begin, EnumT End) {
+ static_assert(enum_iteration_traits<EnumT>::is_iterable,
+ "Enum type is not marked as iterable.");
+ return iota_range<EnumT>(Begin, End, true);
+}
+
+/// Iterate over an enum type from Begin to End inclusive, even when `EnumT`
+/// is not marked as safely iterable by `enum_iteration_traits`.
+/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
+/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq_inclusive(EnumT Begin, EnumT End,
+ force_iteration_on_noniterable_enum_t) {
+ return iota_range<EnumT>(Begin, End, true);
+}
+
} // end namespace llvm
#endif // LLVM_ADT_SEQUENCE_H
diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h
index 62f1d26dc1c2..3e30b6bb83d3 100644
--- a/llvm/include/llvm/ADT/SetOperations.h
+++ b/llvm/include/llvm/ADT/SetOperations.h
@@ -77,15 +77,6 @@ bool set_is_subset(const S1Ty &S1, const S2Ty &S2) {
return true;
}
-/// set_is_strict_subset(A, B) - Return true iff A in B and and A != B
-///
-template <class S1Ty, class S2Ty>
-bool set_is_strict_subset(const S1Ty &S1, const S2Ty &S2) {
- if (S1.size() >= S2.size())
- return false;
- return set_is_subset(S1, S2);
-}
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index f570bac23ad5..51ee5dbbce05 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -60,7 +60,7 @@ class SmallBitVector {
"Unsupported word size");
public:
- using size_type = unsigned;
+ using size_type = uintptr_t;
// Encapsulation of a single bit.
class reference {
@@ -96,7 +96,7 @@ private:
return reinterpret_cast<BitVector *>(X);
}
- void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) {
+ void switchToSmall(uintptr_t NewSmallBits, size_type NewSize) {
X = 1;
setSmallSize(NewSize);
setSmallBits(NewSmallBits);
@@ -120,9 +120,11 @@ private:
}
// Return the size.
- size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; }
+ size_type getSmallSize() const {
+ return getSmallRawBits() >> SmallNumDataBits;
+ }
- void setSmallSize(size_t Size) {
+ void setSmallSize(size_type Size) {
setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits));
}
@@ -189,7 +191,7 @@ public:
}
/// Returns the number of bits in this bitvector.
- size_t size() const {
+ size_type size() const {
return isSmall() ? getSmallSize() : getPointer()->size();
}
@@ -336,8 +338,8 @@ public:
} else {
BitVector *BV = new BitVector(N, t);
uintptr_t OldBits = getSmallBits();
- for (size_t i = 0, e = getSmallSize(); i != e; ++i)
- (*BV)[i] = (OldBits >> i) & 1;
+ for (size_type I = 0, E = getSmallSize(); I != E; ++I)
+ (*BV)[I] = (OldBits >> I) & 1;
switchToLarge(BV);
}
}
@@ -346,11 +348,11 @@ public:
if (isSmall()) {
if (N > SmallNumDataBits) {
uintptr_t OldBits = getSmallRawBits();
- size_t SmallSize = getSmallSize();
+ size_type SmallSize = getSmallSize();
BitVector *BV = new BitVector(SmallSize);
- for (size_t i = 0; i < SmallSize; ++i)
- if ((OldBits >> i) & 1)
- BV->set(i);
+ for (size_type I = 0; I < SmallSize; ++I)
+ if ((OldBits >> I) & 1)
+ BV->set(I);
BV->reserve(N);
switchToLarge(BV);
}
@@ -491,8 +493,8 @@ public:
else if (!isSmall() && !RHS.isSmall())
return *getPointer() == *RHS.getPointer();
else {
- for (size_t i = 0, e = size(); i != e; ++i) {
- if ((*this)[i] != RHS[i])
+ for (size_type I = 0, E = size(); I != E; ++I) {
+ if ((*this)[I] != RHS[I])
return false;
}
return true;
@@ -512,11 +514,11 @@ public:
else if (!isSmall() && !RHS.isSmall())
getPointer()->operator&=(*RHS.getPointer());
else {
- size_t i, e;
- for (i = 0, e = std::min(size(), RHS.size()); i != e; ++i)
- (*this)[i] = test(i) && RHS.test(i);
- for (e = size(); i != e; ++i)
- reset(i);
+ size_type I, E;
+ for (I = 0, E = std::min(size(), RHS.size()); I != E; ++I)
+ (*this)[I] = test(I) && RHS.test(I);
+ for (E = size(); I != E; ++I)
+ reset(I);
}
return *this;
}
@@ -561,8 +563,8 @@ public:
else if (!isSmall() && !RHS.isSmall())
getPointer()->operator|=(*RHS.getPointer());
else {
- for (size_t i = 0, e = RHS.size(); i != e; ++i)
- (*this)[i] = test(i) || RHS.test(i);
+ for (size_type I = 0, E = RHS.size(); I != E; ++I)
+ (*this)[I] = test(I) || RHS.test(I);
}
return *this;
}
@@ -574,8 +576,8 @@ public:
else if (!isSmall() && !RHS.isSmall())
getPointer()->operator^=(*RHS.getPointer());
else {
- for (size_t i = 0, e = RHS.size(); i != e; ++i)
- (*this)[i] = test(i) != RHS.test(i);
+ for (size_type I = 0, E = RHS.size(); I != E; ++I)
+ (*this)[I] = test(I) != RHS.test(I);
}
return *this;
}
@@ -721,8 +723,9 @@ template <> struct DenseMapInfo<SmallBitVector> {
}
static unsigned getHashValue(const SmallBitVector &V) {
uintptr_t Store;
- return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
- std::make_pair(V.size(), V.getData(Store)));
+ return DenseMapInfo<
+ std::pair<SmallBitVector::size_type, ArrayRef<uintptr_t>>>::
+ getHashValue(std::make_pair(V.size(), V.getData(Store)));
}
static bool isEqual(const SmallBitVector &LHS, const SmallBitVector &RHS) {
if (LHS.isInvalid() || RHS.isInvalid())
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index b8a11030fc33..0d13524f25ce 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -1239,13 +1239,22 @@ inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
return X.capacity_in_bytes();
}
+template <typename RangeType>
+using ValueTypeFromRangeType =
+ typename std::remove_const<typename std::remove_reference<
+ decltype(*std::begin(std::declval<RangeType &>()))>::type>::type;
+
/// Given a range of type R, iterate the entire range and return a
/// SmallVector with elements of the vector. This is useful, for example,
/// when you want to iterate a range and then sort the results.
template <unsigned Size, typename R>
-SmallVector<typename std::remove_const<typename std::remove_reference<
- decltype(*std::begin(std::declval<R &>()))>::type>::type,
- Size>
+SmallVector<ValueTypeFromRangeType<R>, Size> to_vector(R &&Range) {
+ return {std::begin(Range), std::end(Range)};
+}
+template <typename R>
+SmallVector<ValueTypeFromRangeType<R>,
+ CalculateSmallVectorDefaultInlinedElements<
+ ValueTypeFromRangeType<R>>::value>
to_vector(R &&Range) {
return {std::begin(Range), std::end(Range)};
}
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index 6bda25b85313..2ca672e7855b 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -67,22 +67,27 @@ inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
///
/// If \p C is not a valid hex digit, -1U is returned.
inline unsigned hexDigitValue(char C) {
- struct HexTable {
- unsigned LUT[255] = {};
- constexpr HexTable() {
- // Default initialize everything to invalid.
- for (int i = 0; i < 255; ++i)
- LUT[i] = ~0U;
- // Initialize `0`-`9`.
- for (int i = 0; i < 10; ++i)
- LUT['0' + i] = i;
- // Initialize `A`-`F` and `a`-`f`.
- for (int i = 0; i < 6; ++i)
- LUT['A' + i] = LUT['a' + i] = 10 + i;
- }
+ /* clang-format off */
+ static const int16_t LUT[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // '0'..'9'
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'A'..'F'
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'a'..'f'
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
- constexpr HexTable Table;
- return Table.LUT[static_cast<unsigned char>(C)];
+ /* clang-format on */
+ return LUT[static_cast<unsigned char>(C)];
}
/// Checks if character \p C is one of the 10 decimal digits.
@@ -210,24 +215,31 @@ inline bool tryGetFromHex(StringRef Input, std::string &Output) {
if (Input.empty())
return true;
- Output.reserve((Input.size() + 1) / 2);
+ // If the input string is not properly aligned on 2 nibbles we pad out the
+ // front with a 0 prefix; e.g. `ABC` -> `0ABC`.
+ Output.resize((Input.size() + 1) / 2);
+ char *OutputPtr = const_cast<char *>(Output.data());
if (Input.size() % 2 == 1) {
uint8_t Hex = 0;
if (!tryGetHexFromNibbles('0', Input.front(), Hex))
return false;
-
- Output.push_back(Hex);
+ *OutputPtr++ = Hex;
Input = Input.drop_front();
}
- assert(Input.size() % 2 == 0);
- while (!Input.empty()) {
+ // Convert the nibble pairs (e.g. `9C`) into bytes (0x9C).
+ // With the padding above we know the input is aligned and the output expects
+ // exactly half as many bytes as nibbles in the input.
+ size_t InputSize = Input.size();
+ assert(InputSize % 2 == 0);
+ const char *InputPtr = Input.data();
+ for (size_t OutputIndex = 0; OutputIndex < InputSize / 2; ++OutputIndex) {
uint8_t Hex = 0;
- if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
+ if (!tryGetHexFromNibbles(InputPtr[OutputIndex * 2 + 0], // MSB
+ InputPtr[OutputIndex * 2 + 1], // LSB
+ Hex))
return false;
-
- Output.push_back(Hex);
- Input = Input.drop_front(2);
+ OutputPtr[OutputIndex] = Hex;
}
return true;
}
@@ -501,6 +513,83 @@ public:
}
};
+/// A forward iterator over partitions of string over a separator.
+class SplittingIterator
+ : public iterator_facade_base<SplittingIterator, std::forward_iterator_tag,
+ StringRef> {
+ char SeparatorStorage;
+ StringRef Current;
+ StringRef Next;
+ StringRef Separator;
+
+public:
+ SplittingIterator(StringRef Str, StringRef Separator)
+ : Next(Str), Separator(Separator) {
+ ++*this;
+ }
+
+ SplittingIterator(StringRef Str, char Separator)
+ : SeparatorStorage(Separator), Next(Str),
+ Separator(&SeparatorStorage, 1) {
+ ++*this;
+ }
+
+ SplittingIterator(const SplittingIterator &R)
+ : SeparatorStorage(R.SeparatorStorage), Current(R.Current), Next(R.Next),
+ Separator(R.Separator) {
+ if (R.Separator.data() == &R.SeparatorStorage)
+ Separator = StringRef(&SeparatorStorage, 1);
+ }
+
+ SplittingIterator &operator=(const SplittingIterator &R) {
+ if (this == &R)
+ return *this;
+
+ SeparatorStorage = R.SeparatorStorage;
+ Current = R.Current;
+ Next = R.Next;
+ Separator = R.Separator;
+ if (R.Separator.data() == &R.SeparatorStorage)
+ Separator = StringRef(&SeparatorStorage, 1);
+ return *this;
+ }
+
+ bool operator==(const SplittingIterator &R) const {
+ assert(Separator == R.Separator);
+ return Current.data() == R.Current.data();
+ }
+
+ const StringRef &operator*() const { return Current; }
+
+ StringRef &operator*() { return Current; }
+
+ SplittingIterator &operator++() {
+ std::tie(Current, Next) = Next.split(Separator);
+ return *this;
+ }
+};
+
+/// Split the specified string over a separator and return a range-compatible
+/// iterable over its partitions. Used to permit conveniently iterating
+/// over separated strings like so:
+///
+/// \code
+/// for (StringRef x : llvm::split("foo,bar,baz", ","))
+/// ...;
+/// \end
+///
+/// Note that the passed string must remain valid throuhgout lifetime
+/// of the iterators.
+inline iterator_range<SplittingIterator> split(StringRef Str, StringRef Separator) {
+ return {SplittingIterator(Str, Separator),
+ SplittingIterator(StringRef(), Separator)};
+}
+
+inline iterator_range<SplittingIterator> split(StringRef Str, char Separator) {
+ return {SplittingIterator(Str, Separator),
+ SplittingIterator(StringRef(), Separator)};
+}
+
} // end namespace llvm
#endif // LLVM_ADT_STRINGEXTRAS_H
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index a82afc9a817c..669956d41e0c 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -126,9 +126,7 @@ public:
StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List)
: StringMapImpl(List.size(), static_cast<unsigned>(sizeof(MapEntryTy))) {
- for (const auto &P : List) {
- insert(P);
- }
+ insert(List);
}
StringMap(StringMap &&RHS)
@@ -297,6 +295,21 @@ public:
return try_emplace(KV.first, std::move(KV.second));
}
+ /// Inserts elements from range [first, last). If multiple elements in the
+ /// range have keys that compare equivalent, it is unspecified which element
+ /// is inserted .
+ template <typename InputIt> void insert(InputIt First, InputIt Last) {
+ for (InputIt It = First; It != Last; ++It)
+ insert(*It);
+ }
+
+ /// Inserts elements from initializer list ilist. If multiple elements in
+ /// the range have keys that compare equivalent, it is unspecified which
+ /// element is inserted
+ void insert(std::initializer_list<std::pair<StringRef, ValueTy>> List) {
+ insert(List.begin(), List.end());
+ }
+
/// Inserts an element or assigns to the current element if the key already
/// exists. The return type is the same as try_emplace.
template <typename V>
@@ -465,13 +478,7 @@ public:
explicit StringMapKeyIterator(StringMapConstIterator<ValueTy> Iter)
: base(std::move(Iter)) {}
- StringRef &operator*() {
- Key = this->wrapped()->getKey();
- return Key;
- }
-
-private:
- StringRef Key;
+ StringRef operator*() const { return this->wrapped()->getKey(); }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 17e64f7f81bb..9f4b89218042 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -35,7 +35,6 @@ namespace llvm {
class APInt;
class hash_code;
template <typename T> class SmallVectorImpl;
- template <typename T> struct DenseMapInfo;
class StringRef;
/// Helper functions for StringRef::getAsInteger.
@@ -949,7 +948,7 @@ namespace llvm {
hash_code hash_value(StringRef S);
// Provide DenseMapInfo for StringRefs.
- template <> struct DenseMapInfo<StringRef> {
+ template <> struct DenseMapInfo<StringRef, void> {
static inline StringRef getEmptyKey() {
return StringRef(
reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 76f3514050f0..2fd3047acbfd 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -93,6 +93,8 @@ public:
hsail64, // AMD HSAIL with 64-bit pointers
spir, // SPIR: standard portable IR for OpenCL 32-bit version
spir64, // SPIR: standard portable IR for OpenCL 64-bit version
+ spirv32, // SPIR-V with 32-bit pointers
+ spirv64, // SPIR-V with 64-bit pointers
kalimba, // Kalimba: generic kalimba
shave, // SHAVE: Movidius vector VLIW processors
lanai, // Lanai: Lanai 32-bit
@@ -106,6 +108,9 @@ public:
enum SubArchType {
NoSubArch,
+ ARMSubArch_v9_2a,
+ ARMSubArch_v9_1a,
+ ARMSubArch_v9,
ARMSubArch_v8_7a,
ARMSubArch_v8_6a,
ARMSubArch_v8_5a,
@@ -290,10 +295,10 @@ public:
/// @name Normalization
/// @{
- /// normalize - Turn an arbitrary machine specification into the canonical
- /// triple form (or something sensible that the Triple class understands if
- /// nothing better can reasonably be done). In particular, it handles the
- /// common case in which otherwise valid components are in the wrong order.
+ /// Turn an arbitrary machine specification into the canonical triple form (or
+ /// something sensible that the Triple class understands if nothing better can
+ /// reasonably be done). In particular, it handles the common case in which
+ /// otherwise valid components are in the wrong order.
static std::string normalize(StringRef Str);
/// Return the normalized form of this triple's string.
@@ -303,25 +308,24 @@ public:
/// @name Typed Component Access
/// @{
- /// getArch - Get the parsed architecture type of this triple.
+ /// Get the parsed architecture type of this triple.
ArchType getArch() const { return Arch; }
- /// getSubArch - get the parsed subarchitecture type for this triple.
+ /// get the parsed subarchitecture type for this triple.
SubArchType getSubArch() const { return SubArch; }
- /// getVendor - Get the parsed vendor type of this triple.
+ /// Get the parsed vendor type of this triple.
VendorType getVendor() const { return Vendor; }
- /// getOS - Get the parsed operating system type of this triple.
+ /// Get the parsed operating system type of this triple.
OSType getOS() const { return OS; }
- /// hasEnvironment - Does this triple have the optional environment
- /// (fourth) component?
+ /// Does this triple have the optional environment (fourth) component?
bool hasEnvironment() const {
return getEnvironmentName() != "";
}
- /// getEnvironment - Get the parsed environment type of this triple.
+ /// Get the parsed environment type of this triple.
EnvironmentType getEnvironment() const { return Environment; }
/// Parse the version number from the OS name component of the
@@ -333,39 +337,39 @@ public:
void getEnvironmentVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
- /// getFormat - Get the object format for this triple.
+ /// Get the object format for this triple.
ObjectFormatType getObjectFormat() const { return ObjectFormat; }
- /// getOSVersion - Parse the version number from the OS name component of the
- /// triple, if present.
+ /// Parse the version number from the OS name component of the triple, if
+ /// present.
///
/// For example, "fooos1.2.3" would return (1, 2, 3).
///
/// If an entry is not defined, it will be returned as 0.
void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const;
- /// getOSMajorVersion - Return just the major version number, this is
- /// specialized because it is a common query.
+ /// Return just the major version number, this is specialized because it is a
+ /// common query.
unsigned getOSMajorVersion() const {
unsigned Maj, Min, Micro;
getOSVersion(Maj, Min, Micro);
return Maj;
}
- /// getMacOSXVersion - Parse the version number as with getOSVersion and then
- /// translate generic "darwin" versions to the corresponding OS X versions.
- /// This may also be called with IOS triples but the OS X version number is
- /// just set to a constant 10.4.0 in that case. Returns true if successful.
+ /// Parse the version number as with getOSVersion and then translate generic
+ /// "darwin" versions to the corresponding OS X versions. This may also be
+ /// called with IOS triples but the OS X version number is just set to a
+ /// constant 10.4.0 in that case. Returns true if successful.
bool getMacOSXVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
- /// getiOSVersion - Parse the version number as with getOSVersion. This should
- /// only be called with IOS or generic triples.
+ /// Parse the version number as with getOSVersion. This should only be called
+ /// with IOS or generic triples.
void getiOSVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
- /// getWatchOSVersion - Parse the version number as with getOSVersion. This
- /// should only be called with WatchOS or generic triples.
+ /// Parse the version number as with getOSVersion. This should only be called
+ /// with WatchOS or generic triples.
void getWatchOSVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
@@ -377,24 +381,24 @@ public:
const std::string &getTriple() const { return Data; }
- /// getArchName - Get the architecture (first) component of the
- /// triple.
+ /// Get the architecture (first) component of the triple.
StringRef getArchName() const;
- /// getVendorName - Get the vendor (second) component of the triple.
+ /// Get the architecture name based on Kind and SubArch.
+ StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch) const;
+
+ /// Get the vendor (second) component of the triple.
StringRef getVendorName() const;
- /// getOSName - Get the operating system (third) component of the
- /// triple.
+ /// Get the operating system (third) component of the triple.
StringRef getOSName() const;
- /// getEnvironmentName - Get the optional environment (fourth)
- /// component of the triple, or "" if empty.
+ /// Get the optional environment (fourth) component of the triple, or "" if
+ /// empty.
StringRef getEnvironmentName() const;
- /// getOSAndEnvironmentName - Get the operating system and optional
- /// environment components as a single string (separated by a '-'
- /// if the environment component is present).
+ /// Get the operating system and optional environment components as a single
+ /// string (separated by a '-' if the environment component is present).
StringRef getOSAndEnvironmentName() const;
/// @}
@@ -420,8 +424,8 @@ public:
/// Note that this tests for 16-bit pointer width, and nothing else.
bool isArch16Bit() const;
- /// isOSVersionLT - Helper function for doing comparisons against version
- /// numbers included in the target triple.
+ /// Helper function for doing comparisons against version numbers included in
+ /// the target triple.
bool isOSVersionLT(unsigned Major, unsigned Minor = 0,
unsigned Micro = 0) const {
unsigned LHS[3];
@@ -443,14 +447,13 @@ public:
return isOSVersionLT(RHS[0], RHS[1], RHS[2]);
}
- /// isMacOSXVersionLT - Comparison function for checking OS X version
- /// compatibility, which handles supporting skewed version numbering schemes
- /// used by the "darwin" triples.
+ /// Comparison function for checking OS X version compatibility, which handles
+ /// supporting skewed version numbering schemes used by the "darwin" triples.
bool isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
unsigned Micro = 0) const;
- /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both
- /// "darwin" and "osx" as OS X triples.
+ /// Is this a Mac OS X triple. For legacy reasons, we support both "darwin"
+ /// and "osx" as OS X triples.
bool isMacOSX() const {
return getOS() == Triple::Darwin || getOS() == Triple::MacOSX;
}
@@ -480,7 +483,7 @@ public:
bool isOSzOS() const { return getOS() == Triple::ZOS; }
- /// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
+ /// Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
bool isOSDarwin() const {
return isMacOSX() || isiOS() || isWatchOS();
}
@@ -698,6 +701,11 @@ public:
return getArch() == Triple::spir || getArch() == Triple::spir64;
}
+ /// Tests whether the target is SPIR-V (32/64-bit).
+ bool isSPIRV() const {
+ return getArch() == Triple::spirv32 || getArch() == Triple::spirv64;
+ }
+
/// Tests whether the target is NVPTX (32- or 64-bit).
bool isNVPTX() const {
return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
@@ -720,6 +728,19 @@ public:
return getArch() == Triple::arm || getArch() == Triple::armeb;
}
+ /// Tests whether the target supports the EHABI exception
+ /// handling standard.
+ bool isTargetEHABICompatible() const {
+ return (isARM() || isThumb()) &&
+ (getEnvironment() == Triple::EABI ||
+ getEnvironment() == Triple::GNUEABI ||
+ getEnvironment() == Triple::MuslEABI ||
+ getEnvironment() == Triple::EABIHF ||
+ getEnvironment() == Triple::GNUEABIHF ||
+ getEnvironment() == Triple::MuslEABIHF || isAndroid()) &&
+ isOSBinFormatELF();
+ }
+
/// Tests whether the target is AArch64 (little and big endian).
bool isAArch64() const {
return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be ||
@@ -833,46 +854,38 @@ public:
/// @name Mutators
/// @{
- /// setArch - Set the architecture (first) component of the triple
- /// to a known type.
- void setArch(ArchType Kind);
+ /// Set the architecture (first) component of the triple to a known type.
+ void setArch(ArchType Kind, SubArchType SubArch = NoSubArch);
- /// setVendor - Set the vendor (second) component of the triple to a
- /// known type.
+ /// Set the vendor (second) component of the triple to a known type.
void setVendor(VendorType Kind);
- /// setOS - Set the operating system (third) component of the triple
- /// to a known type.
+ /// Set the operating system (third) component of the triple to a known type.
void setOS(OSType Kind);
- /// setEnvironment - Set the environment (fourth) component of the triple
- /// to a known type.
+ /// Set the environment (fourth) component of the triple to a known type.
void setEnvironment(EnvironmentType Kind);
- /// setObjectFormat - Set the object file format
+ /// Set the object file format.
void setObjectFormat(ObjectFormatType Kind);
- /// setTriple - Set all components to the new triple \p Str.
+ /// Set all components to the new triple \p Str.
void setTriple(const Twine &Str);
- /// setArchName - Set the architecture (first) component of the
- /// triple by name.
+ /// Set the architecture (first) component of the triple by name.
void setArchName(StringRef Str);
- /// setVendorName - Set the vendor (second) component of the triple
- /// by name.
+ /// Set the vendor (second) component of the triple by name.
void setVendorName(StringRef Str);
- /// setOSName - Set the operating system (third) component of the
- /// triple by name.
+ /// Set the operating system (third) component of the triple by name.
void setOSName(StringRef Str);
- /// setEnvironmentName - Set the optional environment (fourth)
- /// component of the triple by name.
+ /// Set the optional environment (fourth) component of the triple by name.
void setEnvironmentName(StringRef Str);
- /// setOSAndEnvironmentName - Set the operating system and optional
- /// environment components with a single string.
+ /// Set the operating system and optional environment components with a single
+ /// string.
void setOSAndEnvironmentName(StringRef Str);
/// @}
@@ -938,33 +951,30 @@ public:
/// @name Static helpers for IDs.
/// @{
- /// getArchTypeName - Get the canonical name for the \p Kind architecture.
+ /// Get the canonical name for the \p Kind architecture.
static StringRef getArchTypeName(ArchType Kind);
- /// getArchTypePrefix - Get the "prefix" canonical name for the \p Kind
- /// architecture. This is the prefix used by the architecture specific
- /// builtins, and is suitable for passing to \see
- /// Intrinsic::getIntrinsicForGCCBuiltin().
+ /// Get the "prefix" canonical name for the \p Kind architecture. This is the
+ /// prefix used by the architecture specific builtins, and is suitable for
+ /// passing to \see Intrinsic::getIntrinsicForGCCBuiltin().
///
/// \return - The architecture prefix, or 0 if none is defined.
static StringRef getArchTypePrefix(ArchType Kind);
- /// getVendorTypeName - Get the canonical name for the \p Kind vendor.
+ /// Get the canonical name for the \p Kind vendor.
static StringRef getVendorTypeName(VendorType Kind);
- /// getOSTypeName - Get the canonical name for the \p Kind operating system.
+ /// Get the canonical name for the \p Kind operating system.
static StringRef getOSTypeName(OSType Kind);
- /// getEnvironmentTypeName - Get the canonical name for the \p Kind
- /// environment.
+ /// Get the canonical name for the \p Kind environment.
static StringRef getEnvironmentTypeName(EnvironmentType Kind);
/// @}
/// @name Static helpers for converting alternate architecture names.
/// @{
- /// getArchTypeForLLVMName - The canonical type for the given LLVM
- /// architecture name (e.g., "x86").
+ /// The canonical type for the given LLVM architecture name (e.g., "x86").
static ArchType getArchTypeForLLVMName(StringRef Str);
/// @}
diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 815b9a40afaf..3b7598f3251d 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -35,7 +35,12 @@ public:
/// Invoke a case on the derived class with multiple case types.
template <typename CaseT, typename CaseT2, typename... CaseTs,
typename CallableT>
- DerivedT &Case(CallableT &&caseFn) {
+ // This is marked always_inline and nodebug so it doesn't show up in stack
+ // traces at -O0 (or other optimization levels). Large TypeSwitch's are
+ // common, are equivalent to a switch, and don't add any value to stack
+ // traces.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE LLVM_ATTRIBUTE_NODEBUG DerivedT &
+ Case(CallableT &&caseFn) {
DerivedT &derived = static_cast<DerivedT &>(*this);
return derived.template Case<CaseT>(caseFn)
.template Case<CaseT2, CaseTs...>(caseFn);
diff --git a/llvm/include/llvm/ADT/iterator.h b/llvm/include/llvm/ADT/iterator.h
index b3c6608e9b6e..6f0c42fe08be 100644
--- a/llvm/include/llvm/ADT/iterator.h
+++ b/llvm/include/llvm/ADT/iterator.h
@@ -35,6 +35,21 @@ namespace llvm {
/// terms of addition of one. These aren't equivalent for all iterator
/// categories, and respecting that adds a lot of complexity for little gain.
///
+/// Iterators are expected to have const rules analogous to pointers, with a
+/// single, const-qualified operator*() that returns ReferenceT. This matches
+/// the second and third pointers in the following example:
+/// \code
+/// int Value;
+/// { int *I = &Value; } // ReferenceT 'int&'
+/// { int *const I = &Value; } // ReferenceT 'int&'; const
+/// { const int *I = &Value; } // ReferenceT 'const int&'
+/// { const int *const I = &Value; } // ReferenceT 'const int&'; const
+/// \endcode
+/// If an iterator facade returns a handle to its own state, then T (and
+/// PointerT and ReferenceT) should usually be const-qualified. Otherwise, if
+/// clients are expected to modify the handle itself, the field can be declared
+/// mutable or use const_cast.
+///
/// Classes wishing to use `iterator_facade_base` should implement the following
/// methods:
///
@@ -42,8 +57,7 @@ namespace llvm {
/// (All of the following methods)
/// - DerivedT &operator=(const DerivedT &R);
/// - bool operator==(const DerivedT &R) const;
-/// - const T &operator*() const;
-/// - T &operator*();
+/// - T &operator*() const;
/// - DerivedT &operator++();
///
/// Bidirectional Iterators:
@@ -95,6 +109,22 @@ protected:
operator ReferenceT() const { return *I; }
};
+ /// A proxy object for computing a pointer via indirecting a copy of a
+ /// reference. This is used in APIs which need to produce a pointer but for
+ /// which the reference might be a temporary. The proxy preserves the
+ /// reference internally and exposes the pointer via a arrow operator.
+ class PointerProxy {
+ friend iterator_facade_base;
+
+ ReferenceT R;
+
+ template <typename RefT>
+ PointerProxy(RefT &&R) : R(std::forward<RefT>(R)) {}
+
+ public:
+ PointerT operator->() const { return &R; }
+ };
+
public:
DerivedT operator+(DifferenceTypeT n) const {
static_assert(std::is_base_of<iterator_facade_base, DerivedT>::value,
@@ -172,19 +202,13 @@ public:
return !(static_cast<const DerivedT &>(*this) < RHS);
}
- PointerT operator->() { return &static_cast<DerivedT *>(this)->operator*(); }
- PointerT operator->() const {
- return &static_cast<const DerivedT *>(this)->operator*();
- }
- ReferenceProxy operator[](DifferenceTypeT n) {
- static_assert(IsRandomAccess,
- "Subscripting is only defined for random access iterators.");
- return ReferenceProxy(static_cast<DerivedT *>(this)->operator+(n));
+ PointerProxy operator->() const {
+ return static_cast<const DerivedT *>(this)->operator*();
}
ReferenceProxy operator[](DifferenceTypeT n) const {
static_assert(IsRandomAccess,
"Subscripting is only defined for random access iterators.");
- return ReferenceProxy(static_cast<const DerivedT *>(this)->operator+(n));
+ return static_cast<const DerivedT *>(this)->operator+(n);
}
};
@@ -330,8 +354,7 @@ public:
explicit pointer_iterator(WrappedIteratorT u)
: pointer_iterator::iterator_adaptor_base(std::move(u)) {}
- T &operator*() { return Ptr = &*this->I; }
- const T &operator*() const { return Ptr = &*this->I; }
+ T &operator*() const { return Ptr = &*this->I; }
};
template <typename RangeT, typename WrappedIteratorT =
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 7fec0feb09d5..2770a1a9b277 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -61,6 +61,7 @@ class DominatorTree;
class FenceInst;
class Function;
class InvokeInst;
+class LoopInfo;
class PreservedAnalyses;
class TargetLibraryInfo;
class Value;
@@ -378,6 +379,50 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
}
+/// Virtual base class for providers of capture information.
+struct CaptureInfo {
+ virtual ~CaptureInfo() = 0;
+ virtual bool isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) = 0;
+};
+
+/// Context-free CaptureInfo provider, which computes and caches whether an
+/// object is captured in the function at all, but does not distinguish whether
+/// it was captured before or after the context instruction.
+class SimpleCaptureInfo final : public CaptureInfo {
+ SmallDenseMap<const Value *, bool, 8> IsCapturedCache;
+
+public:
+ bool isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) override;
+};
+
+/// Context-sensitive CaptureInfo provider, which computes and caches the
+/// earliest common dominator closure of all captures. It provides a good
+/// approximation to a precise "captures before" analysis.
+class EarliestEscapeInfo final : public CaptureInfo {
+ DominatorTree &DT;
+ const LoopInfo &LI;
+
+ /// Map from identified local object to an instruction before which it does
+ /// not escape, or nullptr if it never escapes. The "earliest" instruction
+ /// may be a conservative approximation, e.g. the first instruction in the
+ /// function is always a legal choice.
+ DenseMap<const Value *, Instruction *> EarliestEscapes;
+
+ /// Reverse map from instruction to the objects it is the earliest escape for.
+ /// This is used for cache invalidation purposes.
+ DenseMap<Instruction *, TinyPtrVector<const Value *>> Inst2Obj;
+
+public:
+ EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI) : DT(DT), LI(LI) {}
+
+ bool isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) override;
+
+ void removeInstruction(Instruction *I);
+};
+
/// Reduced version of MemoryLocation that only stores a pointer and size.
/// Used for caching AATags independent BasicAA results.
struct AACacheLoc {
@@ -425,8 +470,7 @@ public:
using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>;
AliasCacheT AliasCache;
- using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
- IsCapturedCacheT IsCapturedCache;
+ CaptureInfo *CI;
/// Query depth used to distinguish recursive queries.
unsigned Depth = 0;
@@ -439,18 +483,26 @@ public:
/// assumption is disproven.
SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;
- AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+ AAQueryInfo(CaptureInfo *CI) : CI(CI) {}
/// Create a new AAQueryInfo based on this one, but with the cache cleared.
/// This is used for recursive queries across phis, where cache results may
/// not be valid.
AAQueryInfo withEmptyCache() {
- AAQueryInfo NewAAQI;
+ AAQueryInfo NewAAQI(CI);
NewAAQI.Depth = Depth;
return NewAAQI;
}
};
+/// AAQueryInfo that uses SimpleCaptureInfo.
+class SimpleAAQueryInfo : public AAQueryInfo {
+ SimpleCaptureInfo CI;
+
+public:
+ SimpleAAQueryInfo() : AAQueryInfo(&CI) {}
+};
+
class BatchAAResults;
class AAResults {
@@ -770,7 +822,7 @@ public:
/// helpers above.
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(I, OptLoc, AAQIP);
}
@@ -797,7 +849,7 @@ public:
ModRefInfo callCapturesBefore(const Instruction *I,
const MemoryLocation &MemLoc,
DominatorTree *DT) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return callCapturesBefore(I, MemLoc, DT, AAQIP);
}
@@ -896,9 +948,12 @@ private:
class BatchAAResults {
AAResults &AA;
AAQueryInfo AAQI;
+ SimpleCaptureInfo SimpleCI;
public:
- BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
+ BatchAAResults(AAResults &AAR) : AA(AAR), AAQI(&SimpleCI) {}
+ BatchAAResults(AAResults &AAR, CaptureInfo *CI) : AA(AAR), AAQI(CI) {}
+
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
return AA.alias(LocA, LocB, AAQI);
}
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 49c0cd89a4db..77da19110246 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/DenseMap.h"
namespace llvm {
-class IntrinsicInst;
class AssumptionCache;
class DominatorTree;
@@ -70,15 +69,15 @@ template<> struct DenseMapInfo<Attribute::AttrKind> {
using RetainedKnowledgeKey = std::pair<Value *, Attribute::AttrKind>;
struct MinMax {
- unsigned Min;
- unsigned Max;
+ uint64_t Min;
+ uint64_t Max;
};
/// A mapping from intrinsics (=`llvm.assume` calls) to a value range
/// (=knowledge) that is encoded in them. How the value range is interpreted
/// depends on the RetainedKnowledgeKey that was used to get this out of the
/// RetainedKnowledgeMap.
-using Assume2KnowledgeMap = DenseMap<IntrinsicInst *, MinMax>;
+using Assume2KnowledgeMap = DenseMap<AssumeInst *, MinMax>;
using RetainedKnowledgeMap =
DenseMap<RetainedKnowledgeKey, Assume2KnowledgeMap>;
@@ -100,7 +99,7 @@ void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result);
/// - ArgValue will be 4.
struct RetainedKnowledge {
Attribute::AttrKind AttrKind = Attribute::None;
- unsigned ArgValue = 0;
+ uint64_t ArgValue = 0;
Value *WasOn = nullptr;
bool operator==(RetainedKnowledge Other) const {
return AttrKind == Other.AttrKind && WasOn == Other.WasOn &&
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 51d04bd8cf02..12dd9b04c932 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -29,6 +29,7 @@ namespace llvm {
class AssumeInst;
class Function;
class raw_ostream;
+class TargetTransformInfo;
class Value;
/// A cache of \@llvm.assume calls within a function.
@@ -59,6 +60,8 @@ private:
/// We track this to lazily populate our assumptions.
Function &F;
+ TargetTransformInfo *TTI;
+
/// Vector of weak value handles to calls of the \@llvm.assume
/// intrinsic.
SmallVector<ResultElem, 4> AssumeHandles;
@@ -103,7 +106,8 @@ private:
public:
/// Construct an AssumptionCache from a function by scanning all of
/// its instructions.
- AssumptionCache(Function &F) : F(F) {}
+ AssumptionCache(Function &F, TargetTransformInfo *TTI = nullptr)
+ : F(F), TTI(TTI) {}
/// This cache is designed to be self-updating and so it should never be
/// invalidated.
@@ -174,9 +178,7 @@ class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
public:
using Result = AssumptionCache;
- AssumptionCache run(Function &F, FunctionAnalysisManager &) {
- return AssumptionCache(F);
- }
+ AssumptionCache run(Function &F, FunctionAnalysisManager &);
};
/// Printer pass for the \c AssumptionAnalysis results.
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 991c0cbb642a..ed9d1ba4c5a7 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -13,10 +13,8 @@
#ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H
#define LLVM_ANALYSIS_BASICALIASANALYSIS_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
@@ -28,7 +26,6 @@
namespace llvm {
struct AAMDNodes;
-class APInt;
class AssumptionCache;
class BasicBlock;
class DataLayout;
@@ -98,71 +95,7 @@ public:
FunctionModRefBehavior getModRefBehavior(const Function *Fn);
private:
- // A linear transformation of a Value; this class represents ZExt(SExt(V,
- // SExtBits), ZExtBits) * Scale + Offset.
- struct VariableGEPIndex {
- // An opaque Value - we can't decompose this further.
- const Value *V;
-
- // We need to track what extensions we've done as we consider the same Value
- // with different extensions as different variables in a GEP's linear
- // expression;
- // e.g.: if V == -1, then sext(x) != zext(x).
- unsigned ZExtBits;
- unsigned SExtBits;
-
- APInt Scale;
-
- // Context instruction to use when querying information about this index.
- const Instruction *CxtI;
-
- /// True if all operations in this expression are NSW.
- bool IsNSW;
-
- void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
- void print(raw_ostream &OS) const {
- OS << "(V=" << V->getName()
- << ", zextbits=" << ZExtBits
- << ", sextbits=" << SExtBits
- << ", scale=" << Scale << ")";
- }
- };
-
- // Represents the internal structure of a GEP, decomposed into a base pointer,
- // constant offsets, and variable scaled indices.
- struct DecomposedGEP {
- // Base pointer of the GEP
- const Value *Base;
- // Total constant offset from base.
- APInt Offset;
- // Scaled variable (non-constant) indices.
- SmallVector<VariableGEPIndex, 4> VarIndices;
- // Is GEP index scale compile-time constant.
- bool HasCompileTimeConstantScale;
- // Are all operations inbounds GEPs or non-indexing operations?
- // (None iff expression doesn't involve any geps)
- Optional<bool> InBounds;
-
- void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
- void print(raw_ostream &OS) const {
- OS << "(DecomposedGEP Base=" << Base->getName()
- << ", Offset=" << Offset
- << ", VarIndices=[";
- for (size_t i = 0; i < VarIndices.size(); i++) {
- if (i != 0)
- OS << ", ";
- VarIndices[i].print(OS);
- }
- OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale
- << ")";
- }
- };
+ struct DecomposedGEP;
/// Tracks phi nodes we have visited.
///
@@ -187,10 +120,6 @@ private:
DecomposeGEPExpression(const Value *V, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT);
- static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
- const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
- LocationSize ObjectAccessSize);
-
/// A Heuristic for aliasGEP that searches for a constant offset
/// between the variables.
///
@@ -200,15 +129,14 @@ private:
/// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if
/// the addition overflows.
bool
- constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
- LocationSize V1Size, LocationSize V2Size,
- const APInt &BaseOffset, AssumptionCache *AC,
+ constantOffsetHeuristic(const DecomposedGEP &GEP, LocationSize V1Size,
+ LocationSize V2Size, AssumptionCache *AC,
DominatorTree *DT);
bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
- void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src);
+ void subtractDecomposedGEPs(DecomposedGEP &DestGEP,
+ const DecomposedGEP &SrcGEP);
AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
const Value *V2, LocationSize V2Size,
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index e361cccef960..7cf172dc1dd1 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -20,7 +20,7 @@
/// A secondary more general goal is to be able to isolate optimization on
/// unrelated parts of the IR module. This is useful to ensure our
/// optimizations are principled and don't miss oportunities where refinement
-/// of one part of the module influence transformations in another part of the
+/// of one part of the module influences transformations in another part of the
/// module. But this is also useful if we want to parallelize the optimizations
/// across common large module graph shapes which tend to be very wide and have
/// large regions of unrelated cliques.
@@ -161,6 +161,12 @@ struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager,
(void)AM.template getResult<AnalysisT>(C, CG);
return PreservedAnalyses::all();
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "require<" << PassName << ">";
+ }
};
/// A proxy from a \c CGSCCAnalysisManager to a \c Module.
@@ -215,7 +221,7 @@ using ModuleAnalysisManagerCGSCCProxy =
LazyCallGraph &>;
/// Support structure for SCC passes to communicate updates the call graph back
-/// to the CGSCC pass manager infrsatructure.
+/// to the CGSCC pass manager infrastructure.
///
/// The CGSCC pass manager runs SCC passes which are allowed to update the call
/// graph and SCC structures. This means the structure the pass manager works
@@ -274,22 +280,22 @@ struct CGSCCUpdateResult {
/// If non-null, the updated current \c RefSCC being processed.
///
- /// This is set when a graph refinement takes place an the "current" point in
- /// the graph moves "down" or earlier in the post-order walk. This will often
- /// cause the "current" RefSCC to be a newly created RefSCC object and the
- /// old one to be added to the above worklist. When that happens, this
+ /// This is set when a graph refinement takes place and the "current" point
+ /// in the graph moves "down" or earlier in the post-order walk. This will
+ /// often cause the "current" RefSCC to be a newly created RefSCC object and
+ /// the old one to be added to the above worklist. When that happens, this
/// pointer is non-null and can be used to continue processing the "top" of
/// the post-order walk.
LazyCallGraph::RefSCC *UpdatedRC;
/// If non-null, the updated current \c SCC being processed.
///
- /// This is set when a graph refinement takes place an the "current" point in
- /// the graph moves "down" or earlier in the post-order walk. This will often
- /// cause the "current" SCC to be a newly created SCC object and the old one
- /// to be added to the above worklist. When that happens, this pointer is
- /// non-null and can be used to continue processing the "top" of the
- /// post-order walk.
+ /// This is set when a graph refinement takes place and the "current" point
+ /// in the graph moves "down" or earlier in the post-order walk. This will
+ /// often cause the "current" SCC to be a newly created SCC object and the
+ /// old one to be added to the above worklist. When that happens, this
+ /// pointer is non-null and can be used to continue processing the "top" of
+ /// the post-order walk.
LazyCallGraph::SCC *UpdatedC;
/// Preserved analyses across SCCs.
@@ -298,7 +304,7 @@ struct CGSCCUpdateResult {
/// (changing both the CG structure and the function IR itself). However,
/// this means we need to take special care to correctly mark what analyses
/// are preserved *across* SCCs. We have to track this out-of-band here
- /// because within the main `PassManeger` infrastructure we need to mark
+ /// because within the main `PassManager` infrastructure we need to mark
/// everything within an SCC as preserved in order to avoid repeatedly
/// invalidating the same analyses as we unnest pass managers and adaptors.
/// So we track the cross-SCC version of the preserved analyses here from any
@@ -363,6 +369,13 @@ public:
/// Runs the CGSCC pass across every SCC in the module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "cgscc(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
static bool isRequired() { return true; }
private:
@@ -377,8 +390,11 @@ createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) {
using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
PreservedAnalyses, CGSCCAnalysisManager,
LazyCallGraph &, CGSCCUpdateResult &>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return ModuleToPostOrderCGSCCPassAdaptor(
- std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)));
+ std::unique_ptr<ModuleToPostOrderCGSCCPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<CGSCCPassT>(Pass))));
}
/// A proxy from a \c FunctionAnalysisManager to an \c SCC.
@@ -461,11 +477,14 @@ class CGSCCToFunctionPassAdaptor
public:
using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
- explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
- : Pass(std::move(Pass)) {}
+ explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+ bool EagerlyInvalidate, bool NoRerun)
+ : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate),
+ NoRerun(NoRerun) {}
CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg)
- : Pass(std::move(Arg.Pass)) {}
+ : Pass(std::move(Arg.Pass)), EagerlyInvalidate(Arg.EagerlyInvalidate),
+ NoRerun(Arg.NoRerun) {}
friend void swap(CGSCCToFunctionPassAdaptor &LHS,
CGSCCToFunctionPassAdaptor &RHS) {
@@ -481,24 +500,56 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "function";
+ if (EagerlyInvalidate)
+ OS << "<eager-inv>";
+ OS << "(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
static bool isRequired() { return true; }
private:
std::unique_ptr<PassConceptT> Pass;
+ bool EagerlyInvalidate;
+ bool NoRerun;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename FunctionPassT>
CGSCCToFunctionPassAdaptor
-createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) {
+createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass,
+ bool EagerlyInvalidate = false,
+ bool NoRerun = false) {
using PassModelT =
detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
FunctionAnalysisManager>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return CGSCCToFunctionPassAdaptor(
- std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
+ std::unique_ptr<CGSCCToFunctionPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<FunctionPassT>(Pass))),
+ EagerlyInvalidate, NoRerun);
}
+// A marker to determine if function passes should be run on a function within a
+// CGSCCToFunctionPassAdaptor. This is used to prevent running an expensive
+// function pass (manager) on a function multiple times if SCC mutations cause a
+// function to be visited multiple times and the function is not modified by
+// other SCC passes.
+class ShouldNotRunFunctionPassesAnalysis
+ : public AnalysisInfoMixin<ShouldNotRunFunctionPassesAnalysis> {
+public:
+ static AnalysisKey Key;
+ struct Result {};
+
+ Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
+};
+
/// A helper that repeats an SCC pass each time an indirect call is refined to
/// a direct call by that pass.
///
@@ -528,6 +579,13 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "devirt<" << MaxIterations << ">(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
private:
std::unique_ptr<PassConceptT> Pass;
int MaxIterations;
@@ -541,8 +599,11 @@ DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass,
using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
PreservedAnalyses, CGSCCAnalysisManager,
LazyCallGraph &, CGSCCUpdateResult &>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return DevirtSCCRepeatedPass(
- std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)),
+ std::unique_ptr<DevirtSCCRepeatedPass::PassConceptT>(
+ new PassModelT(std::forward<CGSCCPassT>(Pass))),
MaxIterations);
}
diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h
index 9da5f18e944b..50d12db7a1c3 100644
--- a/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -22,6 +22,8 @@ namespace llvm {
class DataLayout;
class Instruction;
class DominatorTree;
+ class LoopInfo;
+ class Function;
/// getDefaultMaxUsesToExploreForCaptureTracking - Return default value of
/// the maximal number of uses to explore before giving up. It is used by
@@ -55,10 +57,25 @@ namespace llvm {
/// MaxUsesToExplore specifies how many uses the analysis should explore for
/// one value before giving up due too "too many uses". If MaxUsesToExplore
/// is zero, a default value is assumed.
- bool PointerMayBeCapturedBefore(
- const Value *V, bool ReturnCaptures, bool StoreCaptures,
- const Instruction *I, const DominatorTree *DT, bool IncludeI = false,
- unsigned MaxUsesToExplore = 0);
+ bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
+ bool StoreCaptures, const Instruction *I,
+ const DominatorTree *DT,
+ bool IncludeI = false,
+ unsigned MaxUsesToExplore = 0,
+ const LoopInfo *LI = nullptr);
+
+ // Returns the 'earliest' instruction that captures \p V in \F. An instruction
+ // A is considered earlier than instruction B, if A dominates B. If 2 escapes
+ // do not dominate each other, the terminator of the common dominator is
+ // chosen. If not all uses can be analyzed, the earliest escape is set to
+ // the first instruction in the function entry block. If \p V does not escape,
+ // nullptr is returned. Note that the caller of the function has to ensure
+ // that the instruction the result value is compared against is not in a
+ // cycle.
+ Instruction *FindEarliestCapture(const Value *V, Function &F,
+ bool ReturnCaptures, bool StoreCaptures,
+ const DominatorTree &DT,
+ unsigned MaxUsesToExplore = 0);
/// This callback is used in conjunction with PointerMayBeCaptured. In
/// addition to the interface here, you'll need to provide your own getters
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 62742fdf9a91..45fb879f0c1f 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -128,10 +128,25 @@ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
ArrayRef<int> Mask);
-/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
-/// produce if it is constant and determinable. If this is not determinable,
-/// return null.
-Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL);
+/// Extract value of C at the given Offset reinterpreted as Ty. If bits past
+/// the end of C are accessed, they are assumed to be poison.
+Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset,
+ const DataLayout &DL);
+
+/// Extract value of C reinterpreted as Ty. Same as previous API with zero
+/// offset.
+Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+ const DataLayout &DL);
+
+/// Return the value that a load from C with offset Offset would produce if it
+/// is constant and determinable. If this is not determinable, return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset,
+ const DataLayout &DL);
+
+/// Return the value that a load from C would produce if it is constant and
+/// determinable. If this is not determinable, return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+ const DataLayout &DL);
/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
/// getelementptr constantexpr, return the constant value being addressed by the
@@ -140,13 +155,6 @@ Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE,
Type *Ty,
const DataLayout &DL);
-/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
-/// indices (with an *implied* zero pointer index that is not in the list),
-/// return the constant value being addressed by a virtual load, or null if
-/// something is funny and we can't decide.
-Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
- ArrayRef<Constant *> Indices);
-
/// canConstantFoldCallTo - Return true if its even possible to fold a call to
/// the specified function.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
diff --git a/llvm/include/llvm/Analysis/CostModel.h b/llvm/include/llvm/Analysis/CostModel.h
new file mode 100644
index 000000000000..649168050cec
--- /dev/null
+++ b/llvm/include/llvm/Analysis/CostModel.h
@@ -0,0 +1,26 @@
+//===- CostModel.h - --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_COSTMODEL_H
+#define LLVM_ANALYSIS_COSTMODEL_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+/// Printer pass for cost modeling results.
+class CostModelPrinterPass : public PassInfoMixin<CostModelPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit CostModelPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_COSTMODEL_H
diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 2658b6bbc80c..6e942530f253 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -16,10 +16,115 @@
#ifndef LLVM_ANALYSIS_DELINEARIZATION_H
#define LLVM_ANALYSIS_DELINEARIZATION_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class GetElementPtrInst;
+class ScalarEvolution;
+class SCEV;
+
+/// Compute the array dimensions Sizes from the set of Terms extracted from
+/// the memory access function of this SCEVAddRecExpr (second step of
+/// delinearization).
+void findArrayDimensions(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize);
+
+/// Collect parametric terms occurring in step expressions (first step of
+/// delinearization).
+void collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Terms);
+
+/// Return in Subscripts the access functions for each dimension in Sizes
+/// (third step of delinearization).
+void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes);
+/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+/// subscripts and sizes of an array access.
+///
+/// The delinearization is a 3 step process: the first two steps compute the
+/// sizes of each subscript and the third step computes the access functions
+/// for the delinearized array:
+///
+/// 1. Find the terms in the step functions
+/// 2. Compute the array size
+/// 3. Compute the access function: divide the SCEV by the array size
+/// starting with the innermost dimensions found in step 2. The Quotient
+/// is the SCEV to be divided in the next step of the recursion. The
+/// Remainder is the subscript of the innermost dimension. Loop over all
+/// array dimensions computed in step 2.
+///
+/// To compute a uniform array size for several memory accesses to the same
+/// object, one can collect in step 1 all the step terms for all the memory
+/// accesses, and compute in step 2 a unique array shape. This guarantees
+/// that the array shape will be the same across all memory accesses.
+///
+/// FIXME: We could derive the result of steps 1 and 2 from a description of
+/// the array shape given in metadata.
+///
+/// Example:
+///
+/// A[][n][m]
+///
+/// for i
+/// for j
+/// for k
+/// A[j+k][2i][5i] =
+///
+/// The initial SCEV:
+///
+/// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
+///
+/// 1. Find the different terms in the step functions:
+/// -> [2*m, 5, n*m, n*m]
+///
+/// 2. Compute the array size: sort and unique them
+/// -> [n*m, 2*m, 5]
+/// find the GCD of all the terms = 1
+/// divide by the GCD and erase constant terms
+/// -> [n*m, 2*m]
+/// GCD = m
+/// divide by GCD -> [n, 2]
+/// remove constant terms
+/// -> [n]
+/// size of the array is A[unknown][n][m]
+///
+/// 3. Compute the access function
+/// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
+/// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
+/// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
+/// The remainder is the subscript of the innermost array dimension: [5i].
+///
+/// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
+/// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
+/// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
+/// The Remainder is the subscript of the next array dimension: [2i].
+///
+/// The subscript of the outermost dimension is the Quotient: [j+k].
+///
+/// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
+void delinearize(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
+
+/// Gathers the individual index expressions from a GEP instruction.
+///
+/// This function optimistically assumes the GEP references into a fixed size
+/// array. If this is actually true, this function returns a list of array
+/// subscript expressions in \p Subscripts and a list of integers describing
+/// the size of the individual array dimensions in \p Sizes. Both lists have
+/// either equal length or the size list is one element shorter in case there
+/// is no known size available for the outermost array dimension. Returns true
+/// if successful and false otherwise.
+bool getIndexExpressionsFromGEP(ScalarEvolution &SE,
+ const GetElementPtrInst *GEP,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<int> &Sizes);
+
struct DelinearizationPrinterPass
: public PassInfoMixin<DelinearizationPrinterPass> {
explicit DelinearizationPrinterPass(raw_ostream &OS);
diff --git a/llvm/include/llvm/Analysis/HeatUtils.h b/llvm/include/llvm/Analysis/HeatUtils.h
index b665e211c6ac..9ecbbaf318da 100644
--- a/llvm/include/llvm/Analysis/HeatUtils.h
+++ b/llvm/include/llvm/Analysis/HeatUtils.h
@@ -1,9 +1,8 @@
//===-- HeatUtils.h - Utility for printing heat colors ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index b623b9ca58d8..51c5c620230b 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -110,7 +110,8 @@ enum InstrType { Legal, Illegal, Invisible };
/// by \ref isSameOperationAs.
/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
/// exact same, and some do not.
-struct IRInstructionData : ilist_node<IRInstructionData> {
+struct IRInstructionData
+ : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
/// The source Instruction that is being wrapped.
Instruction *Inst = nullptr;
@@ -127,12 +128,41 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
/// to a less than form. It is None otherwise.
Optional<CmpInst::Predicate> RevisedPredicate;
+ /// This structure holds the distances of how far "ahead of" or "behind" the
+ /// target blocks of a branch, or the incoming blocks of a phi nodes are.
+ /// If the value is negative, it means that the block was registered before
+ /// the block of this instruction in terms of blocks in the function.
+ /// Code Example:
+ /// \code
+ /// block_1:
+ /// br i1 %0, label %block_2, label %block_3
+ /// block_2:
+ /// br i1 %1, label %block_1, label %block_2
+ /// block_3:
+ /// br i1 %2, label %block_2, label %block_1
+ /// ; Replacing the labels with relative values, this becomes:
+ /// block_1:
+ /// br i1 %0, distance 1, distance 2
+ /// block_2:
+ /// br i1 %1, distance -1, distance 0
+ /// block_3:
+ /// br i1 %2, distance -1, distance -2
+ /// \endcode
+ /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
+ /// "ahead" of block_2.
+ SmallVector<int, 4> RelativeBlockLocations;
+
/// Gather the information that is difficult to gather for an Instruction, or
/// is changed. i.e. the operands of an Instruction and the Types of those
/// operands. This extra information allows for similarity matching to make
/// assertions that allow for more flexibility when checking for whether an
/// Instruction performs the same operation.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
+ IRInstructionData(IRInstructionDataList &IDL);
+
+ /// Fills data stuctures for IRInstructionData when it is constructed from a
+ // reference or a pointer.
+ void initializeInstruction();
/// Get the predicate that the compare instruction is using for hashing the
/// instruction. the IRInstructionData must be wrapping a CmpInst.
@@ -145,6 +175,16 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
/// \return the consistent comparison predicate.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
+ /// For an IRInstructionData containing a branch, finds the
+ /// relative distances from the source basic block to the target by taking
+ /// the difference of the number assigned to the current basic block and the
+ /// target basic block of the branch.
+ ///
+ /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+ /// in the module.
+ void
+ setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
/// Hashes \p Value based on its opcode, types, and operand types.
/// Two IRInstructionData instances produce the same hash when they perform
/// the same operation.
@@ -198,7 +238,8 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
IRInstructionDataList *IDL = nullptr;
};
-struct IRInstructionDataList : simple_ilist<IRInstructionData> {};
+struct IRInstructionDataList
+ : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
/// Compare one IRInstructionData class to another IRInstructionData class for
/// whether they are performing a the same operation, and can mapped to the
@@ -288,6 +329,10 @@ struct IRInstructionMapper {
DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
InstructionIntegerMap;
+ /// A mapping for a basic block in a module to its assigned number/location
+ /// in the module.
+ DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
+
/// Set if we added an illegal number in the previous step.
/// Since each illegal number is unique, we only need one of them between
/// each range of legal numbers. This lets us make sure we don't add more
@@ -322,6 +367,14 @@ struct IRInstructionMapper {
IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
IRInstructionDataList &IDL);
+ /// Get an empty allocated IRInstructionData struct using the
+ /// InstDataAllocator.
+ ///
+ /// \param IDL - The InstructionDataList that the IRInstructionData is
+ /// inserted into.
+ /// \returns An allocated IRInstructionData struct.
+ IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
+
/// Get an allocated IRInstructionDataList object using the IDLAllocator.
///
/// \returns An allocated IRInstructionDataList object.
@@ -329,6 +382,24 @@ struct IRInstructionMapper {
IRInstructionDataList *IDL = nullptr;
+ /// Assigns values to all the basic blocks in function \p F starting from
+ /// integer \p BBNumber.
+ ///
+ /// \param F - The function containing the basic blocks to assign numbers to.
+ /// \param BBNumber - The number to start from.
+ void initializeForBBs(Function &F, unsigned &BBNumber) {
+ for (BasicBlock &BB : F)
+ BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++));
+ }
+
+ /// Assigns values to all the basic blocks in Module \p M.
+ /// \param M - The module containing the basic blocks to assign numbers to.
+ void initializeForBBs(Module &M) {
+ unsigned BBNumber = 0;
+ for (Function &F : M)
+ initializeForBBs(F, BBNumber);
+ }
+
/// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
/// determined by \p InstrType. Two Instructions are mapped to the same value
/// if they are close as defined by the InstructionData class above.
@@ -386,7 +457,11 @@ struct IRInstructionMapper {
InstructionClassification() {}
// TODO: Determine a scheme to resolve when the label is similar enough.
- InstrType visitBranchInst(BranchInst &BI) { return Illegal; }
+ InstrType visitBranchInst(BranchInst &BI) {
+ if (EnableBranches)
+ return Legal;
+ return Illegal;
+ }
// TODO: Determine a scheme to resolve when the labels are similar enough.
InstrType visitPHINode(PHINode &PN) { return Illegal; }
// TODO: Handle allocas.
@@ -419,6 +494,10 @@ struct IRInstructionMapper {
// TODO: Handle interblock similarity.
InstrType visitTerminator(Instruction &I) { return Illegal; }
InstrType visitInstruction(Instruction &I) { return Legal; }
+
+ // The flag variable that lets the classifier know whether we should
+ // allow branches to be checked for similarity.
+ bool EnableBranches = false;
};
/// Maps an Instruction to a member of InstrType.
@@ -488,6 +567,12 @@ private:
DenseMap<Value *, unsigned> ValueToNumber;
/// Stores the mapping of the number to the value assigned this number.
DenseMap<unsigned, Value *> NumberToValue;
+ /// Stores the mapping of a value's number to canonical numbering in the
+ /// candidate's respective similarity group.
+ DenseMap<unsigned, unsigned> NumberToCanonNum;
+ /// Stores the mapping of canonical number in the candidate's respective
+ /// similarity group to a value number.
+ DenseMap<unsigned, unsigned> CanonNumToNumber;
/// @}
public:
@@ -506,13 +591,27 @@ public:
static bool isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
- /// \param A - The first IRInstructionCandidate to compare.
- /// \param B - The second IRInstructionCandidate to compare.
+ /// \param [in] A - The first IRInstructionCandidate to compare.
+ /// \param [in] B - The second IRInstructionCandidate to compare.
/// \returns True when every IRInstructionData in \p A is structurally similar
/// to \p B.
static bool compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
+ /// \param [in] A - The first IRInstructionCandidate to compare.
+ /// \param [in] B - The second IRInstructionCandidate to compare.
+ /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
+ /// candidate \p A to candidate \B.
+ /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
+ /// candidate \p B to candidate \A.
+ /// \returns True when every IRInstructionData in \p A is structurally similar
+ /// to \p B.
+ static bool
+ compareStructure(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
+
struct OperandMapping {
/// The IRSimilarityCandidate that holds the instruction the OperVals were
/// pulled from.
@@ -526,6 +625,21 @@ public:
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
};
+ /// A helper struct to hold the candidate, for a branch instruction, the
+ /// relative location of a label, and the label itself. This is mostly to
+ /// group the values together before passing them as a bundle to a function.
+ struct RelativeLocMapping {
+ /// The IRSimilarityCandidate that holds the instruction the relative
+ /// location was pulled from.
+ const IRSimilarityCandidate &IRSC;
+
+ /// The relative location to be analyzed.
+ int RelativeLocation;
+
+ /// The corresponding value.
+ Value *OperVal;
+ };
+
/// Compare the operands in \p A and \p B and check that the current mapping
/// of global value numbers from \p A to \p B and \p B to \A is consistent.
///
@@ -549,6 +663,94 @@ public:
static bool compareCommutativeOperandMapping(OperandMapping A,
OperandMapping B);
+ /// Compare the relative locations in \p A and \p B and check that the
+ /// distances match if both locations are contained in the region, and that
+ /// the branches both point outside the region if they do not.
+ /// Example Region:
+ /// \code
+ /// entry:
+ /// br i1 %0, label %block_1, label %block_3
+ /// block_0:
+ /// br i1 %0, label %block_1, label %block_2
+ /// block_1:
+ /// br i1 %0, label %block_2, label %block_3
+ /// block_2:
+ /// br i1 %1, label %block_1, label %block_4
+ /// block_3:
+ /// br i1 %2, label %block_2, label %block_5
+ /// \endcode
+ /// If we compare the branches in block_0 and block_1 the relative values are
+ /// 1 and 2 for both, so we consider this a match.
+ ///
+ /// If we compare the branches in entry and block_0 the relative values are
+ /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not
+ /// consider them a match.
+ ///
+ /// If we compare the branches in block_1 and block_2 the relative values are
+ /// 1 and 2, and -1 and None respectively. As a result we do not consider
+ /// these to be the same
+ ///
+ /// If we compare the branches in block_2 and block_3 the relative values are
+ /// -1 and None for both. We do consider these to be a match.
+ ///
+ /// \param A - The first IRInstructionCandidate, relative location value,
+ /// and incoming block.
+ /// \param B - The second IRInstructionCandidate, relative location value,
+ /// and incoming block.
+ /// \returns true if the relative locations match.
+ static bool checkRelativeLocations(RelativeLocMapping A,
+ RelativeLocMapping B);
+
+ /// Create a mapping from the value numbering to a different separate set of
+ /// numbers. This will serve as a guide for relating one candidate to another.
+ /// The canonical number gives use the ability identify which global value
+ /// number in one candidate relates to the global value number in the other.
+ ///
+ /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
+ /// canonical numbering for.
+ static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
+
+ /// Create a mapping for the value numbering of the calling
+ /// IRSimilarityCandidate, to a different separate set of numbers, based on
+ /// the canonical ordering in \p SourceCand. These are defined based on the
+ /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
+ /// these relationships should have the same information, just in opposite
+ /// directions.
+ ///
+ /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+ /// canonical numbering from.
+ /// \param ToSourceMapping - The mapping of value numbers from this candidate
+ /// to \p SourceCand.
+ /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
+ /// to this candidate.
+ void createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand,
+ DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+ DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+
+ /// \param [in,out] BBSet - The set to track the basic blocks.
+ void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
+ for (IRInstructionData &ID : *this) {
+ BasicBlock *BB = ID.Inst->getParent();
+ if (BBSet.contains(BB))
+ continue;
+ BBSet.insert(BB);
+ }
+ }
+
+ /// \param [in,out] BBSet - The set to track the basic blocks.
+ /// \param [in,out] BBList - A list in order of use to track the basic blocks.
+ void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
+ SmallVector<BasicBlock *> &BBList) const {
+ for (IRInstructionData &ID : *this) {
+ BasicBlock *BB = ID.Inst->getParent();
+ if (BBSet.contains(BB))
+ continue;
+ BBSet.insert(BB);
+ BBList.push_back(BB);
+ }
+ }
+
/// Compare the start and end indices of the two IRSimilarityCandidates for
/// whether they overlap. If the start instruction of one
/// IRSimilarityCandidate is less than the end instruction of the other, and
@@ -611,6 +813,32 @@ public:
return VNIt->second;
}
+ /// Find the canonical number from the global value number \p N stored in the
+ /// candidate.
+ ///
+ /// \param N - The global value number to find the canonical number for.
+ /// \returns An optional containing the value, and None if it could not be
+ /// found.
+ Optional<unsigned> getCanonicalNum(unsigned N) {
+ DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N);
+ if (NCIt == NumberToCanonNum.end())
+ return None;
+ return NCIt->second;
+ }
+
+ /// Find the global value number from the canonical number \p N stored in the
+ /// candidate.
+ ///
+ /// \param N - The canonical number to find the global vlaue number for.
+ /// \returns An optional containing the value, and None if it could not be
+ /// found.
+ Optional<unsigned> fromCanonicalNum(unsigned N) {
+ DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N);
+ if (CNIt == CanonNumToNumber.end())
+ return None;
+ return CNIt->second;
+ }
+
/// \param RHS -The IRSimilarityCandidate to compare against
/// \returns true if the IRSimilarityCandidate is occurs after the
/// IRSimilarityCandidate in the program.
@@ -623,6 +851,9 @@ public:
iterator end() const { return std::next(iterator(back())); }
};
+typedef DenseMap<IRSimilarityCandidate *,
+ DenseMap<unsigned, DenseSet<unsigned>>>
+ CandidateGVNMapping;
typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
typedef std::vector<SimilarityGroup> SimilarityGroupList;
@@ -651,8 +882,9 @@ typedef std::vector<SimilarityGroup> SimilarityGroupList;
/// analyzing the module.
class IRSimilarityIdentifier {
public:
- IRSimilarityIdentifier()
- : Mapper(&InstDataAllocator, &InstDataListAllocator) {}
+ IRSimilarityIdentifier(bool MatchBranches = true)
+ : Mapper(&InstDataAllocator, &InstDataListAllocator),
+ EnableBranches(MatchBranches) {}
private:
/// Map the instructions in the module to unsigned integers, using mapping
@@ -728,6 +960,10 @@ private:
/// instance of IRInstructionData.
IRInstructionMapper Mapper;
+ /// The flag variable that marks whether we should check branches for
+ /// similarity, or only look within basic blocks.
+ bool EnableBranches = true;
+
/// The SimilarityGroups found with the most recent run of \ref
/// findSimilarity. None if there is no recent run.
Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 82e1b14960bd..c26dbc457949 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -36,20 +36,24 @@ class DominatorTree;
/// These are the kinds of recurrences that we support.
enum class RecurKind {
- None, ///< Not a recurrence.
- Add, ///< Sum of integers.
- Mul, ///< Product of integers.
- Or, ///< Bitwise or logical OR of integers.
- And, ///< Bitwise or logical AND of integers.
- Xor, ///< Bitwise or logical XOR of integers.
- SMin, ///< Signed integer min implemented in terms of select(cmp()).
- SMax, ///< Signed integer max implemented in terms of select(cmp()).
- UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
- UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
- FAdd, ///< Sum of floats.
- FMul, ///< Product of floats.
- FMin, ///< FP min implemented in terms of select(cmp()).
- FMax ///< FP max implemented in terms of select(cmp()).
+ None, ///< Not a recurrence.
+ Add, ///< Sum of integers.
+ Mul, ///< Product of integers.
+ Or, ///< Bitwise or logical OR of integers.
+ And, ///< Bitwise or logical AND of integers.
+ Xor, ///< Bitwise or logical XOR of integers.
+ SMin, ///< Signed integer min implemented in terms of select(cmp()).
+ SMax, ///< Signed integer max implemented in terms of select(cmp()).
+ UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
+ UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
+ FAdd, ///< Sum of floats.
+ FMul, ///< Product of floats.
+ FMin, ///< FP min implemented in terms of select(cmp()).
+ FMax, ///< FP max implemented in terms of select(cmp()).
+ SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
+ ///< invariant
+ SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
+ ///< invariant
};
/// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -112,12 +116,14 @@ public:
};
/// Returns a struct describing if the instruction 'I' can be a recurrence
- /// variable of type 'Kind'. If the recurrence is a min/max pattern of
- /// select(icmp()) this function advances the instruction pointer 'I' from the
- /// compare instruction to the select instruction and stores this pointer in
- /// 'PatternLastInst' member of the returned struct.
- static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
- InstDesc &Prev, FastMathFlags FMF);
+ /// variable of type 'Kind' for a Loop \p L and reduction PHI \p Phi.
+ /// If the recurrence is a min/max pattern of select(icmp()) this function
+ /// advances the instruction pointer 'I' from the compare instruction to the
+ /// select instruction and stores this pointer in 'PatternLastInst' member of
+ /// the returned struct.
+ static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I,
+ RecurKind Kind, InstDesc &Prev,
+ FastMathFlags FuncFMF);
/// Returns true if instruction I has multiple uses in Insts
static bool hasMultipleUsesOf(Instruction *I,
@@ -127,20 +133,29 @@ public:
/// Returns true if all uses of the instruction I is within the Set.
static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
- /// Returns a struct describing if the instruction is a
- /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
- /// or max(X, Y). \p Prev specifies the description of an already processed
- /// select instruction, so its corresponding cmp can be matched to it.
- static InstDesc isMinMaxSelectCmpPattern(Instruction *I,
- const InstDesc &Prev);
+ /// Returns a struct describing if the instruction is a llvm.(s/u)(min/max),
+ /// llvm.minnum/maxnum or a Select(ICmp(X, Y), X, Y) pair of instructions
+ /// corresponding to a min(X, Y) or max(X, Y), matching the recurrence kind \p
+ /// Kind. \p Prev specifies the description of an already processed select
+ /// instruction, so its corresponding cmp can be matched to it.
+ static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind,
+ const InstDesc &Prev);
+
+ /// Returns a struct describing whether the instruction is either a
+ /// Select(ICmp(A, B), X, Y), or
+ /// Select(FCmp(A, B), X, Y)
+ /// where one of (X, Y) is a loop invariant integer and the other is a PHI
+ /// value. \p Prev specifies the description of an already processed select
+ /// instruction, so its corresponding cmp can be matched to it.
+ static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
+ Instruction *I, InstDesc &Prev);
/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
/// Returns identity corresponding to the RecurrenceKind.
- static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
- FastMathFlags FMF);
+ Value *getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF);
/// Returns the opcode corresponding to the RecurrenceKind.
static unsigned getOpcode(RecurKind Kind);
@@ -150,7 +165,7 @@ public:
/// non-null, the minimal bit width needed to compute the reduction will be
/// computed.
static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
- FastMathFlags FMF,
+ FastMathFlags FuncFMF,
RecurrenceDescriptor &RedDes,
DemandedBits *DB = nullptr,
AssumptionCache *AC = nullptr,
@@ -220,6 +235,12 @@ public:
return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind);
}
+ /// Returns true if the recurrence kind is of the form
+ /// select(cmp(),x,y) where one of (x,y) is loop invariant.
+ static bool isSelectCmpRecurrenceKind(RecurKind Kind) {
+ return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp;
+ }
+
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
@@ -329,6 +350,11 @@ public:
: Instruction::BinaryOpsEnd;
}
+ Type *getElementType() const {
+ assert(IK == IK_PtrInduction && "Only pointer induction has element type");
+ return ElementType;
+ }
+
/// Returns a reference to the type cast instructions in the induction
/// update chain, that are redundant when guarded with a runtime
/// SCEV overflow check.
@@ -340,6 +366,7 @@ private:
/// Private constructor - used by \c isInductionPHI.
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
BinaryOperator *InductionBinOp = nullptr,
+ Type *ElementType = nullptr,
SmallVectorImpl<Instruction *> *Casts = nullptr);
/// Start value.
@@ -350,6 +377,9 @@ private:
const SCEV *Step = nullptr;
// Instruction that advances induction variable.
BinaryOperator *InductionBinOp = nullptr;
+ // Element type for pointer induction variables.
+ // TODO: This can be dropped once support for typed pointers is removed.
+ Type *ElementType = nullptr;
// Instructions used for type-casts of the induction variable,
// that are redundant when guarded with a runtime SCEV overflow check.
SmallVector<Instruction *, 2> RedundantCasts;
diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h
index f8ea3bcca229..e2026a4d5875 100644
--- a/llvm/include/llvm/Analysis/IVUsers.h
+++ b/llvm/include/llvm/Analysis/IVUsers.h
@@ -157,9 +157,6 @@ public:
/// dump - This method is used for debugging.
void dump() const;
-
-protected:
- bool AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests);
};
Pass *createIVUsersPass();
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index c27aaf0db8f2..9f9bc3a5e71b 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -22,6 +22,7 @@ class CallBase;
class Function;
class Module;
class OptimizationRemarkEmitter;
+struct ReplayInlinerSettings;
/// There are 3 scenarios we can use the InlineAdvisor:
/// - Default - use manual heuristics.
@@ -143,7 +144,11 @@ public:
/// be up-to-date wrt previous inlining decisions. \p MandatoryOnly indicates
/// only mandatory (always-inline) call sites should be recommended - this
/// allows the InlineAdvisor track such inlininings.
- /// Returns an InlineAdvice with the inlining recommendation.
+ /// Returns:
+ /// - An InlineAdvice with the inlining recommendation.
+ /// - Null when no recommendation is made (https://reviews.llvm.org/D110658).
+ /// TODO: Consider removing the Null return scenario by incorporating the
+ /// SampleProfile inliner into an InlineAdvisor
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
bool MandatoryOnly = false);
@@ -157,6 +162,12 @@ public:
/// to prepare for a partial update.
virtual void onPassExit() {}
+ /// Called when the module is invalidated. We let the advisor implementation
+ /// decide what to refresh - in the case of the development mode
+ /// implementation, for example, we wouldn't want to delete the whole object
+ /// and need to re-load the model evaluator.
+ virtual void onModuleInvalidated() {}
+
protected:
InlineAdvisor(Module &M, FunctionAnalysisManager &FAM);
virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0;
@@ -219,15 +230,18 @@ public:
InlineAdvisorAnalysis() = default;
struct Result {
Result(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) {}
- bool invalidate(Module &, const PreservedAnalyses &,
+ bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
- // InlineAdvisor must be preserved across analysis invalidations.
- return false;
+ if (Advisor && !PA.areAllPreserved())
+ Advisor->onModuleInvalidated();
+ // Check whether the analysis has been explicitly invalidated. Otherwise,
+ // it's stateless and remains preserved.
+ auto PAC = PA.getChecker<InlineAdvisorAnalysis>();
+ return !PAC.preservedWhenStateless();
}
bool tryCreate(InlineParams Params, InliningAdvisorMode Mode,
- StringRef ReplayFile);
+ const ReplayInlinerSettings &ReplaySettings);
InlineAdvisor *getAdvisor() const { return Advisor.get(); }
- void clear() { Advisor.reset(); }
private:
Module &M;
@@ -263,12 +277,16 @@ shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
/// Emit ORE message.
void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
const BasicBlock *Block, const Function &Callee,
- const Function &Caller, const InlineCost &IC,
- bool ForProfileContext = false,
+ const Function &Caller, bool IsMandatory,
+ function_ref<void(OptimizationRemark &)> ExtraContext = {},
const char *PassName = nullptr);
-/// get call site location as string
-std::string getCallSiteLocation(DebugLoc DLoc);
+/// Emit ORE message based in cost (default heuristic).
+void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
+ const BasicBlock *Block, const Function &Callee,
+ const Function &Caller, const InlineCost &IC,
+ bool ForProfileContext = false,
+ const char *PassName = nullptr);
/// Add location info to ORE message.
void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 4e1b28d4633f..b22841343b1a 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -213,6 +213,9 @@ struct InlineParams {
/// Indicate whether we should allow inline deferral.
Optional<bool> EnableDeferral = true;
+
+ /// Indicate whether we allow inlining for recursive call.
+ Optional<bool> AllowRecursiveCall = false;
};
/// Generate the parameters to tune the inline cost analysis based only on the
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
new file mode 100644
index 000000000000..def3192356f4
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -0,0 +1,172 @@
+//===- InlineOrder.h - Inlining order abstraction -*- C++ ---*-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_INLINEORDER_H
+#define LLVM_ANALYSIS_INLINEORDER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include <algorithm>
+#include <utility>
+
+namespace llvm {
+class CallBase;
+class Function;
+class Module;
+
+template <typename T> class InlineOrder {
+public:
+ using reference = T &;
+ using const_reference = const T &;
+
+ virtual ~InlineOrder() {}
+
+ virtual size_t size() = 0;
+
+ virtual void push(const T &Elt) = 0;
+
+ virtual T pop() = 0;
+
+ virtual const_reference front() = 0;
+
+ virtual void erase_if(function_ref<bool(T)> Pred) = 0;
+
+ bool empty() { return !size(); }
+};
+
+template <typename T, typename Container = SmallVector<T, 16>>
+class DefaultInlineOrder : public InlineOrder<T> {
+ using reference = T &;
+ using const_reference = const T &;
+
+public:
+ size_t size() override { return Calls.size() - FirstIndex; }
+
+ void push(const T &Elt) override { Calls.push_back(Elt); }
+
+ T pop() override {
+ assert(size() > 0);
+ return Calls[FirstIndex++];
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ return Calls[FirstIndex];
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
+ Calls.end());
+ }
+
+private:
+ Container Calls;
+ size_t FirstIndex = 0;
+};
+
+class InlineSizePriority {
+public:
+ InlineSizePriority(int Size) : Size(Size) {}
+
+ static bool isMoreDesirable(const InlineSizePriority &S1,
+ const InlineSizePriority &S2) {
+ return S1.Size < S2.Size;
+ }
+
+ static InlineSizePriority evaluate(CallBase *CB) {
+ Function *Callee = CB->getCalledFunction();
+ return InlineSizePriority(Callee->getInstructionCount());
+ }
+
+ int Size;
+};
+
+template <typename PriorityT>
+class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
+ using T = std::pair<CallBase *, int>;
+ using HeapT = std::pair<CallBase *, PriorityT>;
+ using reference = T &;
+ using const_reference = const T &;
+
+ static bool cmp(const HeapT &P1, const HeapT &P2) {
+ return PriorityT::isMoreDesirable(P2.second, P1.second);
+ }
+
+ // A call site could become less desirable for inlining because of the size
+ // growth from prior inlining into the callee. This method is used to lazily
+ // update the desirability of a call site if it's decreasing. It is only
+ // called on pop() or front(), not every time the desirability changes. When
+ // the desirability of the front call site decreases, an updated one would be
+ // pushed right back into the heap. For simplicity, those cases where
+ // the desirability of a call site increases are ignored here.
+ void adjust() {
+ bool Changed = false;
+ do {
+ CallBase *CB = Heap.front().first;
+ const PriorityT PreviousGoodness = Heap.front().second;
+ const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
+ Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
+ if (Changed) {
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ Heap.push_back({CB, CurrentGoodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ }
+ } while (Changed);
+ }
+
+public:
+ size_t size() override { return Heap.size(); }
+
+ void push(const T &Elt) override {
+ CallBase *CB = Elt.first;
+ const int InlineHistoryID = Elt.second;
+ const PriorityT Goodness = PriorityT::evaluate(CB);
+
+ Heap.push_back({CB, Goodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ InlineHistoryMap[CB] = InlineHistoryID;
+ }
+
+ T pop() override {
+ assert(size() > 0);
+ adjust();
+
+ CallBase *CB = Heap.front().first;
+ T Result = std::make_pair(CB, InlineHistoryMap[CB]);
+ InlineHistoryMap.erase(CB);
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ return Result;
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ adjust();
+
+ CallBase *CB = Heap.front().first;
+ return *InlineHistoryMap.find(CB);
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ auto PredWrapper = [=](HeapT P) -> bool {
+ return Pred(std::make_pair(P.first, 0));
+ };
+ llvm::erase_if(Heap, PredWrapper);
+ std::make_heap(Heap.begin(), Heap.end(), cmp);
+ }
+
+private:
+ SmallVector<HeapT, 16> Heap;
+ DenseMap<CallBase *, int> InlineHistoryMap;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INLINEORDER_H
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index efaf1847276b..f0f8e4bc9175 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -248,7 +248,7 @@ Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q);
/// Given operands for a GetElementPtrInst, fold the result or return null.
-Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
const SimplifyQuery &Q);
/// Given operands for an InsertValueInst, fold the result or return null.
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index ca276d2f3cf8..0580f4d7b226 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -145,7 +145,7 @@ public:
/// around but clear them.
explicit operator bool() const;
- /// Returnss the \c Kind of the edge.
+ /// Returns the \c Kind of the edge.
Kind getKind() const;
/// Test whether the edge represents a direct call to a function.
@@ -307,9 +307,9 @@ public:
/// A node in the call graph.
///
- /// This represents a single node. It's primary roles are to cache the list of
- /// callees, de-duplicate and provide fast testing of whether a function is
- /// a callee, and facilitate iteration of child nodes in the graph.
+ /// This represents a single node. Its primary roles are to cache the list of
+ /// callees, de-duplicate and provide fast testing of whether a function is a
+ /// callee, and facilitate iteration of child nodes in the graph.
///
/// The node works much like an optional in order to lazily populate the
/// edges of each node. Until populated, there are no edges. Once populated,
@@ -392,7 +392,7 @@ public:
/// Internal helper to directly replace the function with a new one.
///
- /// This is used to facilitate tranfsormations which need to replace the
+ /// This is used to facilitate transformations which need to replace the
/// formal Function object but directly move the body and users from one to
/// the other.
void replaceFunction(Function &NewF);
@@ -419,7 +419,7 @@ public:
/// outer structure. SCCs do not support mutation of the call graph, that
/// must be done through the containing \c RefSCC in order to fully reason
/// about the ordering and connections of the graph.
- class SCC {
+ class LLVM_EXTERNAL_VISIBILITY SCC {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
@@ -435,7 +435,7 @@ public:
Nodes.clear();
}
- /// Print a short descrtiption useful for debugging or logging.
+ /// Print a short description useful for debugging or logging.
///
/// We print the function names in the SCC wrapped in '()'s and skipping
/// the middle functions if there are a large number.
@@ -467,9 +467,10 @@ public:
/// Verify invariants about the SCC.
///
/// This will attempt to validate all of the basic invariants within an
- /// SCC, but not that it is a strongly connected componet per-se. Primarily
- /// useful while building and updating the graph to check that basic
- /// properties are in place rather than having inexplicable crashes later.
+ /// SCC, but not that it is a strongly connected component per se.
+ /// Primarily useful while building and updating the graph to check that
+ /// basic properties are in place rather than having inexplicable crashes
+ /// later.
void verify();
#endif
@@ -511,7 +512,7 @@ public:
/// Provide a short name by printing this SCC to a std::string.
///
- /// This copes with the fact that we don't have a name per-se for an SCC
+ /// This copes with the fact that we don't have a name per se for an SCC
/// while still making the use of this in debugging and logging useful.
std::string getName() const {
std::string Name;
@@ -644,7 +645,7 @@ public:
/// Provide a short name by printing this RefSCC to a std::string.
///
- /// This copes with the fact that we don't have a name per-se for an RefSCC
+ /// This copes with the fact that we don't have a name per se for an RefSCC
/// while still making the use of this in debugging and logging useful.
std::string getName() const {
std::string Name;
@@ -1085,47 +1086,9 @@ public:
/// updates that set with every constant visited.
///
/// For each defined function, calls \p Callback with that function.
- template <typename CallbackT>
static void visitReferences(SmallVectorImpl<Constant *> &Worklist,
SmallPtrSetImpl<Constant *> &Visited,
- CallbackT Callback) {
- while (!Worklist.empty()) {
- Constant *C = Worklist.pop_back_val();
-
- if (Function *F = dyn_cast<Function>(C)) {
- if (!F->isDeclaration())
- Callback(*F);
- continue;
- }
-
- // The blockaddress constant expression is a weird special case, we can't
- // generically walk its operands the way we do for all other constants.
- if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
- // If we've already visited the function referred to by the block
- // address, we don't need to revisit it.
- if (Visited.count(BA->getFunction()))
- continue;
-
- // If all of the blockaddress' users are instructions within the
- // referred to function, we don't need to insert a cycle.
- if (llvm::all_of(BA->users(), [&](User *U) {
- if (Instruction *I = dyn_cast<Instruction>(U))
- return I->getFunction() == BA->getFunction();
- return false;
- }))
- continue;
-
- // Otherwise we should go visit the referred to function.
- Visited.insert(BA->getFunction());
- Worklist.push_back(BA->getFunction());
- continue;
- }
-
- for (Value *Op : C->operand_values())
- if (Visited.insert(cast<Constant>(Op)).second)
- Worklist.push_back(cast<Constant>(Op));
- }
- }
+ function_ref<void(Function &)> Callback);
///@}
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 0a0ef1536caf..2b4edfac61fc 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -177,21 +177,11 @@ public:
/// Register the location (instructions are given increasing numbers)
/// of a write access.
- void addAccess(StoreInst *SI) {
- Value *Ptr = SI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
- InstMap.push_back(SI);
- ++AccessIdx;
- }
+ void addAccess(StoreInst *SI);
/// Register the location (instructions are given increasing numbers)
/// of a write access.
- void addAccess(LoadInst *LI) {
- Value *Ptr = LI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
- InstMap.push_back(LI);
- ++AccessIdx;
- }
+ void addAccess(LoadInst *LI);
/// Check whether the dependencies between the accesses are safe.
///
@@ -664,15 +654,14 @@ Value *stripIntegerCast(Value *V);
/// If necessary this method will version the stride of the pointer according
/// to \p PtrToStride and therefore add further predicates to \p PSE.
///
-/// If \p OrigPtr is not null, use it to look up the stride value instead of \p
-/// Ptr. \p PtrToStride provides the mapping between the pointer value and its
+/// \p PtrToStride provides the mapping between the pointer value and its
/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr = nullptr);
+ Value *Ptr);
-/// If the pointer has a constant stride return it in units of its
-/// element size. Otherwise return zero.
+/// If the pointer has a constant stride return it in units of the access type
+/// size. Otherwise return zero.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
@@ -681,7 +670,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
-int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
+int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false, bool ShouldCheckWrap = true);
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index 92db1d67fc4e..bc8a1e74e447 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -58,6 +58,7 @@ struct LoopStandardAnalysisResults {
TargetLibraryInfo &TLI;
TargetTransformInfo &TTI;
BlockFrequencyInfo *BFI;
+ BranchProbabilityInfo *BPI;
MemorySSA *MSSA;
};
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 164ec50e47bc..15c9d911ab80 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -527,7 +527,7 @@ extern template class LoopBase<BasicBlock, Loop>;
/// Represents a single loop in the control flow graph. Note that not all SCCs
/// in the CFG are necessarily loops.
-class Loop : public LoopBase<BasicBlock, Loop> {
+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
public:
/// A range representing the start and end location of a loop.
class LocRange {
@@ -950,7 +950,7 @@ public:
///
/// Note that because loops form a forest of trees, preorder is equivalent to
/// reverse postorder.
- SmallVector<LoopT *, 4> getLoopsInPreorder();
+ SmallVector<LoopT *, 4> getLoopsInPreorder() const;
/// Return all of the loops in the function in preorder across the loop
/// nests, with siblings in *reverse* program order.
@@ -960,7 +960,7 @@ public:
///
/// Also note that this is *not* a reverse preorder. Only the siblings are in
/// reverse program order.
- SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();
+ SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder() const;
/// Return the inner most loop that BB lives in. If a basic block is in no
/// loop (for example the entry node), null is returned.
@@ -1213,6 +1213,13 @@ public:
};
+/// Enable verification of loop info.
+///
+/// The flag enables checks which are expensive and are disabled by default
+/// unless the `EXPENSIVE_CHECKS` macro is defined. The `-verify-loop-info`
+/// flag allows the checks to be enabled selectively without re-compilation.
+extern bool VerifyLoopInfo;
+
// Allow clients to walk the list of nested loops...
template <> struct GraphTraits<const Loop *> {
typedef const Loop *NodeRef;
@@ -1305,6 +1312,10 @@ bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
llvm::Optional<int>
getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
+/// Find named metadata for a loop with an integer value. Return \p Default if
+/// not set.
+int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
+
/// Find string metadata for loop
///
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 2cc9afb7c2cd..b8b8330d0fe1 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -574,7 +574,8 @@ void LoopInfoBase<BlockT, LoopT>::analyze(const DomTreeBase<BlockT> &DomTree) {
}
template <class BlockT, class LoopT>
-SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
+SmallVector<LoopT *, 4>
+LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() const {
SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
// The outer-most loop actually goes into the result in the same relative
// order as we walk it. But LoopInfo stores the top level loops in reverse
@@ -592,7 +593,7 @@ SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
template <class BlockT, class LoopT>
SmallVector<LoopT *, 4>
-LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() {
+LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const {
SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
// The outer-most loop actually goes into the result in the same relative
// order as we walk it. LoopInfo stores the top level loops in reverse
diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 9a749a1c8eae..3d4a064cf7e3 100644
--- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -21,11 +21,14 @@
namespace llvm {
using LoopVectorTy = SmallVector<Loop *, 8>;
+
class LPMUpdater;
/// This class represents a loop nest and can be used to query its properties.
-class LoopNest {
+class LLVM_EXTERNAL_VISIBILITY LoopNest {
public:
+ using InstrVectorTy = SmallVector<const Instruction *>;
+
/// Construct a loop nest rooted by loop \p Root.
LoopNest(Loop &Root, ScalarEvolution &SE);
@@ -48,6 +51,12 @@ public:
static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE);
+ /// Return a vector of instructions that prevent the LoopNest given
+ /// by loops \p OuterLoop and \p InnerLoop from being perfect.
+ static InstrVectorTy getInterveningInstructions(const Loop &OuterLoop,
+ const Loop &InnerLoop,
+ ScalarEvolution &SE);
+
/// Return the maximum nesting depth of the loop nest rooted by loop \p Root.
/// For example given the loop nest:
/// \code
@@ -150,6 +159,17 @@ public:
protected:
const unsigned MaxPerfectDepth; // maximum perfect nesting depth level.
LoopVectorTy Loops; // the loops in the nest (in breadth first order).
+
+private:
+ enum LoopNestEnum {
+ PerfectLoopNest,
+ ImperfectLoopNest,
+ InvalidLoopStructure,
+ OuterLoopLowerBoundUnknown
+ };
+ static LoopNestEnum analyzeLoopNestForPerfectNest(const Loop &OuterLoop,
+ const Loop &InnerLoop,
+ ScalarEvolution &SE);
};
raw_ostream &operator<<(raw_ostream &, const LoopNest &);
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 54edbb823263..a218561e61c7 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -38,6 +38,7 @@ public:
bool isForcedToStop() const { return ForceStop; }
int64_t getLocalCalls(Function &F);
const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
+ void onModuleInvalidated() override { Invalid = true; }
protected:
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -55,6 +56,7 @@ protected:
private:
int64_t getModuleIRSize() const;
+ bool Invalid = true;
std::unique_ptr<CallGraph> CG;
int64_t NodeCount = 0;
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index f40b99968fd3..48aeef371e3d 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -106,9 +106,6 @@
namespace llvm {
-/// Enables memory ssa as a dependency for loop passes.
-extern cl::opt<bool> EnableMSSALoopDependency;
-
class AllocaInst;
class Function;
class Instruction;
@@ -786,21 +783,22 @@ public:
/// dominates Use \p B.
bool dominates(const MemoryAccess *A, const Use &B) const;
+ enum class VerificationLevel { Fast, Full };
/// Verify that MemorySSA is self consistent (IE definitions dominate
/// all uses, uses appear in the right places). This is used by unit tests.
- void verifyMemorySSA() const;
+ void verifyMemorySSA(VerificationLevel = VerificationLevel::Fast) const;
/// Used in various insertion functions to specify whether we are talking
/// about the beginning or end of a block.
enum InsertionPlace { Beginning, End, BeforeTerminator };
protected:
- // Used by Memory SSA annotater, dumpers, and wrapper pass
- friend class MemorySSAAnnotatedWriter;
+ // Used by Memory SSA dumpers and wrapper pass
friend class MemorySSAPrinterLegacyPass;
friend class MemorySSAUpdater;
- void verifyOrderingDominationAndDefUses(Function &F) const;
+ void verifyOrderingDominationAndDefUses(
+ Function &F, VerificationLevel = VerificationLevel::Fast) const;
void verifyDominationNumbers(const Function &F) const;
void verifyPrevDefInPhis(Function &F) const;
@@ -898,6 +896,13 @@ private:
unsigned NextID;
};
+/// Enables verification of MemorySSA.
+///
+/// The checks which this flag enables is exensive and disabled by default
+/// unless `EXPENSIVE_CHECKS` is defined. The flag `-verify-memoryssa` can be
+/// used to selectively enable the verification without re-compilation.
+extern bool VerifyMemorySSA;
+
// Internal MemorySSA utils, for use by MemorySSA classes and walkers
class MemorySSAUtil {
protected:
@@ -956,6 +961,17 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+/// Printer pass for \c MemorySSA via the walker.
+class MemorySSAWalkerPrinterPass
+ : public PassInfoMixin<MemorySSAWalkerPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
/// Verifier pass for \c MemorySSA.
struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 62bdade95d96..17062ab907a6 100644
--- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -78,14 +78,17 @@ inline const Value *GetUnderlyingObjCPtr(const Value *V) {
}
/// A wrapper for GetUnderlyingObjCPtr used for results memoization.
-inline const Value *
-GetUnderlyingObjCPtrCached(const Value *V,
- DenseMap<const Value *, WeakTrackingVH> &Cache) {
- if (auto InCache = Cache.lookup(V))
- return InCache;
+inline const Value *GetUnderlyingObjCPtrCached(
+ const Value *V,
+ DenseMap<const Value *, std::pair<WeakVH, WeakTrackingVH>> &Cache) {
+ // The entry is invalid if either value handle is null.
+ auto InCache = Cache.lookup(V);
+ if (InCache.first && InCache.second)
+ return InCache.second;
const Value *Computed = GetUnderlyingObjCPtr(V);
- Cache[V] = const_cast<Value *>(Computed);
+ Cache[V] =
+ std::make_pair(const_cast<Value *>(V), const_cast<Value *>(Computed));
return Computed;
}
@@ -168,8 +171,8 @@ bool IsPotentialRetainableObjPtr(const Value *Op, AAResults &AA);
/// Helper for GetARCInstKind. Determines what kind of construct CS
/// is.
inline ARCInstKind GetCallSiteClass(const CallBase &CB) {
- for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I)
- if (IsPotentialRetainableObjPtr(*I))
+ for (const Use &U : CB.args())
+ if (IsPotentialRetainableObjPtr(U))
return CB.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
return CB.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
@@ -204,11 +207,10 @@ inline bool IsObjCIdentifiedObject(const Value *V) {
return true;
StringRef Section = GV->getSection();
- if (Section.find("__message_refs") != StringRef::npos ||
- Section.find("__objc_classrefs") != StringRef::npos ||
- Section.find("__objc_superrefs") != StringRef::npos ||
- Section.find("__objc_methname") != StringRef::npos ||
- Section.find("__cstring") != StringRef::npos)
+ if (Section.contains("__message_refs") ||
+ Section.contains("__objc_classrefs") ||
+ Section.contains("__objc_superrefs") ||
+ Section.contains("__objc_methname") || Section.contains("__cstring"))
return true;
}
}
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 2566bfbcf61c..362dd6c29992 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -11,9 +11,11 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_IR_OBJCARCUTIL_H
-#define LLVM_IR_OBJCARCUTIL_H
+#ifndef LLVM_ANALYSIS_OBJCARCUTIL_H
+#define LLVM_ANALYSIS_OBJCARCUTIL_H
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/LLVMContext.h"
@@ -24,13 +26,6 @@ inline const char *getRVMarkerModuleFlagStr() {
return "clang.arc.retainAutoreleasedReturnValueMarker";
}
-enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim };
-
-inline AttachedCallOperandBundle
-getAttachedCallOperandBundleEnum(bool IsRetain) {
- return IsRetain ? RVOB_Retain : RVOB_Claim;
-}
-
inline bool hasAttachedCallOpBundle(const CallBase *CB) {
// Ignore the bundle if the return type is void. Global optimization passes
// can turn the called function's return type to void. That should happen only
@@ -43,14 +38,32 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) {
.hasValue();
}
-inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) {
- assert(hasAttachedCallOpBundle(CB) &&
- "call doesn't have operand bundle clang_arc_attachedcall");
+/// This function returns operand bundle clang_arc_attachedcall's argument,
+/// which is the address of the ARC runtime function.
+inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) {
auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
- if (!B.hasValue())
- return false;
- return cast<ConstantInt>(B->Inputs[0])->getZExtValue() ==
- getAttachedCallOperandBundleEnum(IsRetain);
+ if (!B.hasValue() || B->Inputs.size() == 0)
+ return None;
+
+ return cast<Function>(B->Inputs[0]);
+}
+
+/// Check whether the function is retainRV/claimRV.
+inline bool isRetainOrClaimRV(ARCInstKind Kind) {
+ return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::ClaimRV;
+}
+
+/// This function returns the ARCInstKind of the function attached to operand
+/// bundle clang_arc_attachedcall. It returns None if the call doesn't have the
+/// operand bundle or the operand is null. Otherwise it returns either RetainRV
+/// or ClaimRV.
+inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) {
+ Optional<Function *> Fn = getAttachedARCFunction(CB);
+ if (!Fn.hasValue())
+ return ARCInstKind::None;
+ auto FnClass = GetFunctionClass(*Fn);
+ assert(isRetainOrClaimRV(FnClass) && "unexpected ARC runtime function");
+ return FnClass;
}
} // end namespace objcarc
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index c95404d96f4e..886800d8a0f5 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -134,9 +134,13 @@ public:
bool isColdCount(uint64_t C) const;
/// Returns true if count \p C is considered hot with regard to a given
/// hot percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const;
/// Returns true if count \p C is considered cold with regard to a given
/// cold percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
/// Returns true if BasicBlock \p BB is considered hot.
bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
@@ -144,10 +148,14 @@ public:
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
/// Returns true if BasicBlock \p BB is considered hot with regard to a given
/// hot percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
BlockFrequencyInfo *BFI) const;
/// Returns true if BasicBlock \p BB is considered cold with regard to a given
/// cold percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
BlockFrequencyInfo *BFI) const;
/// Returns true if the call site \p CB is considered hot.
@@ -162,11 +170,11 @@ public:
uint64_t getOrCompColdCountThreshold() const;
/// Returns HotCountThreshold if set.
uint64_t getHotCountThreshold() const {
- return HotCountThreshold ? HotCountThreshold.getValue() : 0;
+ return HotCountThreshold.getValueOr(0);
}
/// Returns ColdCountThreshold if set.
uint64_t getColdCountThreshold() const {
- return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
+ return ColdCountThreshold.getValueOr(0);
}
private:
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
index 3018bcc241d8..a0eb9af62205 100644
--- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -20,6 +20,46 @@ class Function;
class Module;
class OptimizationRemarkEmitter;
+struct CallSiteFormat {
+ enum class Format : int {
+ Line,
+ LineColumn,
+ LineDiscriminator,
+ LineColumnDiscriminator
+ };
+
+ bool outputColumn() const {
+ return OutputFormat == Format::LineColumn ||
+ OutputFormat == Format::LineColumnDiscriminator;
+ }
+
+ bool outputDiscriminator() const {
+ return OutputFormat == Format::LineDiscriminator ||
+ OutputFormat == Format::LineColumnDiscriminator;
+ }
+
+ Format OutputFormat;
+};
+
+/// Replay Inliner Setup
+struct ReplayInlinerSettings {
+ enum class Scope : int { Function, Module };
+ enum class Fallback : int { Original, AlwaysInline, NeverInline };
+
+ StringRef ReplayFile;
+ Scope ReplayScope;
+ Fallback ReplayFallback;
+ CallSiteFormat ReplayFormat;
+};
+
+/// Get call site location as a string with the given format
+std::string formatCallSiteLocation(DebugLoc DLoc, const CallSiteFormat &Format);
+
+std::unique_ptr<InlineAdvisor> getReplayInlineAdvisor(
+ Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks);
+
/// Replay inline advisor that uses optimization remarks from inlining of
/// previous build to guide current inlining. This is useful for inliner tuning.
class ReplayInlineAdvisor : public InlineAdvisor {
@@ -27,15 +67,24 @@ public:
ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
LLVMContext &Context,
std::unique_ptr<InlineAdvisor> OriginalAdvisor,
- StringRef RemarksFile, bool EmitRemarks);
+ const ReplayInlinerSettings &ReplaySettings,
+ bool EmitRemarks);
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
private:
- StringSet<> InlineSitesFromRemarks;
+ bool hasInlineAdvice(Function &F) const {
+ return (ReplaySettings.ReplayScope ==
+ ReplayInlinerSettings::Scope::Module) ||
+ CallersToReplay.contains(F.getName());
+ }
std::unique_ptr<InlineAdvisor> OriginalAdvisor;
bool HasReplayRemarks = false;
+ const ReplayInlinerSettings ReplaySettings;
bool EmitRemarks = false;
+
+ StringMap<bool> InlineSitesFromRemarks;
+ StringSet<> CallersToReplay;
};
} // namespace llvm
#endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index ae9c73fede96..a2260688e3d6 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -25,7 +25,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SetVector.h"
@@ -112,6 +111,24 @@ public:
/// Note that NUW and NSW are also valid properties of a recurrence, and
/// either implies NW. For convenience, NW will be set for a recurrence
/// whenever either NUW or NSW are set.
+ ///
+ /// We require that the flag on a SCEV apply to the entire scope in which
+ /// that SCEV is defined. A SCEV's scope is set of locations dominated by
+ /// a defining location, which is in turn described by the following rules:
+ /// * A SCEVUnknown is at the point of definition of the Value.
+ /// * A SCEVConstant is defined at all points.
+ /// * A SCEVAddRec is defined starting with the header of the associated
+ /// loop.
+ /// * All other SCEVs are defined at the earlest point all operands are
+ /// defined.
+ ///
+ /// The above rules describe a maximally hoisted form (without regards to
+ /// potential control dependence). A SCEV is defined anywhere a
+ /// corresponding instruction could be defined in said maximally hoisted
+ /// form. Note that SCEVUDivExpr (currently the only expression type which
+ /// can trap) can be defined per these rules in regions where it would trap
+ /// at runtime. A SCEV being defined does not require the existence of any
+ /// instruction within the defined scope.
enum NoWrapFlags {
FlagAnyWrap = 0, // No guarantee.
FlagNW = (1 << 0), // No self-wrap.
@@ -472,6 +489,10 @@ public:
clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) {
return (SCEV::NoWrapFlags)(Flags & ~OffFlags);
}
+ LLVM_NODISCARD static bool hasFlags(SCEV::NoWrapFlags Flags,
+ SCEV::NoWrapFlags TestFlags) {
+ return TestFlags == maskFlags(Flags, TestFlags);
+ };
ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree &DT, LoopInfo &LI);
@@ -498,13 +519,26 @@ public:
// Returns a wider type among {Ty1, Ty2}.
Type *getWiderType(Type *Ty1, Type *Ty2) const;
+ /// Return true if there exists a point in the program at which both
+ /// A and B could be operands to the same instruction.
+ /// SCEV expressions are generally assumed to correspond to instructions
+ /// which could exists in IR. In general, this requires that there exists
+ /// a use point in the program where all operands dominate the use.
+ ///
+ /// Example:
+ /// loop {
+ /// if
+ /// loop { v1 = load @global1; }
+ /// else
+ /// loop { v2 = load @global2; }
+ /// }
+ /// No SCEV with operand V1, and v2 can exist in this program.
+ bool instructionCouldExistWitthOperands(const SCEV *A, const SCEV *B);
+
/// Return true if the SCEV is a scAddRecExpr or it contains
/// scAddRecExpr. The result will be cached in HasRecMap.
bool containsAddRecurrence(const SCEV *S);
- /// Erase Value from ValueExprMap and ExprValueMap.
- void eraseValueFromMap(Value *V);
-
/// Is operation \p BinOp between \p LHS and \p RHS provably does not have
/// a signed/unsigned overflow (\p Signed)?
bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
@@ -516,6 +550,12 @@ public:
std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO);
+ /// Notify this ScalarEvolution that \p User directly uses SCEVs in \p Ops.
+ void registerUser(const SCEV *User, ArrayRef<const SCEV *> Ops);
+
+ /// Return true if the SCEV expression contains an undef value.
+ bool containsUndefs(const SCEV *S) const;
+
/// Return a SCEV expression for the full generality of the specified
/// expression.
const SCEV *getSCEV(Value *V);
@@ -700,6 +740,9 @@ public:
/// cases do exist.
const SCEV *getPointerBase(const SCEV *V);
+ /// Compute an expression equivalent to S - getPointerBase(S).
+ const SCEV *removePointerBase(const SCEV *S);
+
/// Return a SCEV expression for the specified value at the specified scope
/// in the program. The L value specifies a loop nest to evaluate the
/// expression at, where null is the top-level or a specified loop is
@@ -735,9 +778,13 @@ public:
/// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
/// count". A "trip count" is the number of times the header of the loop
/// will execute if an exit is taken after the specified number of backedges
- /// have been taken. (e.g. TripCount = ExitCount + 1) A zero result
- /// must be interpreted as a loop having an unknown trip count.
- const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
+ /// have been taken. (e.g. TripCount = ExitCount + 1). Note that the
+ /// expression can overflow if ExitCount = UINT_MAX. \p Extend controls
+ /// how potential overflow is handled. If true, a wider result type is
+ /// returned. ex: EC = 255 (i8), TC = 256 (i9). If false, result unsigned
+ /// wraps with 2s-complement semantics. ex: EC = 255 (i8), TC = 0 (i8)
+ const SCEV *getTripCountFromExitCount(const SCEV *ExitCount,
+ bool Extend = true);
/// Returns the exact trip count of the loop if we can compute it, and
/// the result is a small constant. '0' is used to represent an unknown
@@ -762,6 +809,13 @@ public:
/// Returns 0 if the trip count is unknown or not constant.
unsigned getSmallConstantMaxTripCount(const Loop *L);
+ /// Returns the upper bound of the loop trip count infered from array size.
+ /// Can not access bytes starting outside the statically allocated size
+ /// without being immediate UB.
+ /// Returns SCEVCouldNotCompute if the trip count could not inferred
+ /// from array accesses.
+ const SCEV *getConstantMaxTripCountFromArray(const Loop *L);
+
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
/// always a multiple of the returned value. Returns 1 if the trip count is
@@ -988,14 +1042,13 @@ public:
/// Test if the given expression is known to satisfy the condition described
/// by Pred, LHS, and RHS in the given Context.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS, const Instruction *Context);
+ const SCEV *RHS, const Instruction *CtxI);
/// Check whether the condition described by Pred, LHS, and RHS is true or
/// false in the given \p Context. If we know it, return the evaluation of
/// this condition. If neither is proved, return None.
Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS,
- const Instruction *Context);
+ const SCEV *RHS, const Instruction *CtxI);
/// Test if the condition described by Pred, LHS, RHS is known to be true on
/// every iteration of the loop of the recurrency LHS.
@@ -1045,7 +1098,7 @@ public:
getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS, const Loop *L,
- const Instruction *Context,
+ const Instruction *CtxI,
const SCEV *MaxIter);
/// Simplify LHS and RHS in a comparison with predicate Pred. Return true
@@ -1092,110 +1145,11 @@ public:
/// Return the size of an element read or written by Inst.
const SCEV *getElementSize(Instruction *Inst);
- /// Compute the array dimensions Sizes from the set of Terms extracted from
- /// the memory access function of this SCEVAddRecExpr (second step of
- /// delinearization).
- void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize);
-
void print(raw_ostream &OS) const;
void verify() const;
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv);
- /// Collect parametric terms occurring in step expressions (first step of
- /// delinearization).
- void collectParametricTerms(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Terms);
-
- /// Return in Subscripts the access functions for each dimension in Sizes
- /// (third step of delinearization).
- void computeAccessFunctions(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes);
-
- /// Gathers the individual index expressions from a GEP instruction.
- ///
- /// This function optimistically assumes the GEP references into a fixed size
- /// array. If this is actually true, this function returns a list of array
- /// subscript expressions in \p Subscripts and a list of integers describing
- /// the size of the individual array dimensions in \p Sizes. Both lists have
- /// either equal length or the size list is one element shorter in case there
- /// is no known size available for the outermost array dimension. Returns true
- /// if successful and false otherwise.
- bool getIndexExpressionsFromGEP(const GetElementPtrInst *GEP,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<int> &Sizes);
-
- /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
- /// subscripts and sizes of an array access.
- ///
- /// The delinearization is a 3 step process: the first two steps compute the
- /// sizes of each subscript and the third step computes the access functions
- /// for the delinearized array:
- ///
- /// 1. Find the terms in the step functions
- /// 2. Compute the array size
- /// 3. Compute the access function: divide the SCEV by the array size
- /// starting with the innermost dimensions found in step 2. The Quotient
- /// is the SCEV to be divided in the next step of the recursion. The
- /// Remainder is the subscript of the innermost dimension. Loop over all
- /// array dimensions computed in step 2.
- ///
- /// To compute a uniform array size for several memory accesses to the same
- /// object, one can collect in step 1 all the step terms for all the memory
- /// accesses, and compute in step 2 a unique array shape. This guarantees
- /// that the array shape will be the same across all memory accesses.
- ///
- /// FIXME: We could derive the result of steps 1 and 2 from a description of
- /// the array shape given in metadata.
- ///
- /// Example:
- ///
- /// A[][n][m]
- ///
- /// for i
- /// for j
- /// for k
- /// A[j+k][2i][5i] =
- ///
- /// The initial SCEV:
- ///
- /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
- ///
- /// 1. Find the different terms in the step functions:
- /// -> [2*m, 5, n*m, n*m]
- ///
- /// 2. Compute the array size: sort and unique them
- /// -> [n*m, 2*m, 5]
- /// find the GCD of all the terms = 1
- /// divide by the GCD and erase constant terms
- /// -> [n*m, 2*m]
- /// GCD = m
- /// divide by GCD -> [n, 2]
- /// remove constant terms
- /// -> [n]
- /// size of the array is A[unknown][n][m]
- ///
- /// 3. Compute the access function
- /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
- /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
- /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
- /// The remainder is the subscript of the innermost array dimension: [5i].
- ///
- /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
- /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
- /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
- /// The Remainder is the subscript of the next array dimension: [2i].
- ///
- /// The subscript of the outermost dimension is the Quotient: [j+k].
- ///
- /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
- void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize);
-
/// Return the DataLayout associated with the module this SCEV instance is
/// operating on.
const DataLayout &getDataLayout() const {
@@ -1234,6 +1188,18 @@ public:
/// Try to apply information from loop guards for \p L to \p Expr.
const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
+ /// Return true if the loop has no abnormal exits. That is, if the loop
+ /// is not infinite, it must exit through an explicit edge in the CFG.
+ /// (As opposed to either a) throwing out of the function or b) entering a
+ /// well defined infinite loop in some callee.)
+ bool loopHasNoAbnormalExits(const Loop *L) {
+ return getLoopProperties(L).HasNoAbnormalExits;
+ }
+
+ /// Return true if this loop is finite by assumption. That is,
+ /// to be infinite, it must also be undefined.
+ bool loopIsFiniteByAssumption(const Loop *L);
+
private:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
@@ -1532,15 +1498,15 @@ private:
LoopDispositions;
struct LoopProperties {
- /// Set to true if the loop contains no instruction that can have side
- /// effects (i.e. via throwing an exception, volatile or atomic access).
- bool HasNoAbnormalExits;
-
/// Set to true if the loop contains no instruction that can abnormally exit
/// the loop (i.e. via throwing an exception, by terminating the thread
/// cleanly or by infinite looping in a called function). Strictly
/// speaking, the last one is not leaving the loop, but is identical to
/// leaving the loop for reasoning about undefined behavior.
+ bool HasNoAbnormalExits;
+
+ /// Set to true if the loop contains no instruction that can have side
+ /// effects (i.e. via throwing an exception, volatile or atomic access).
bool HasNoSideEffects;
};
@@ -1554,14 +1520,6 @@ private:
return getLoopProperties(L).HasNoSideEffects;
}
- bool loopHasNoAbnormalExits(const Loop *L) {
- return getLoopProperties(L).HasNoAbnormalExits;
- }
-
- /// Return true if this loop is finite by assumption. That is,
- /// to be infinite, it must also be undefined.
- bool loopIsFiniteByAssumption(const Loop *L);
-
/// Compute a LoopDisposition value.
LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
@@ -1574,6 +1532,9 @@ private:
/// Compute a BlockDisposition value.
BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
+ /// Stores all SCEV that use a given SCEV as its direct operand.
+ DenseMap<const SCEV *, SmallPtrSet<const SCEV *, 8> > SCEVUsers;
+
/// Memoized results from getRange
DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
@@ -1600,22 +1561,22 @@ private:
/// copied if its needed for longer.
const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint);
- /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}.
+ /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}.
/// Helper for \c getRange.
- ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
+ ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
const SCEV *MaxBECount, unsigned BitWidth);
/// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
- /// Start,+,\p Stop}<nw>.
+ /// Start,+,\p Step}<nw>.
ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
const SCEV *MaxBECount,
unsigned BitWidth,
RangeSignHint SignHint);
/// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
- /// Stop} by "factoring out" a ternary expression from the add recurrence.
+ /// Step} by "factoring out" a ternary expression from the add recurrence.
/// Helper called by \c getRange.
- ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
+ ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Step,
const SCEV *MaxBECount, unsigned BitWidth);
/// If the unknown expression U corresponds to a simple recurrence, return
@@ -1761,12 +1722,6 @@ private:
BasicBlock *ExitingBB,
bool IsSubExpr);
- /// Given an exit condition of 'icmp op load X, cst', try to see if we can
- /// compute the backedge-taken count.
- ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS,
- const Loop *L,
- ICmpInst::Predicate p);
-
/// Compute the exit limit of a loop that is controlled by a
/// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip
/// count in these cases (since SCEV has no way of expressing them), but we
@@ -1839,7 +1794,7 @@ private:
const SCEV *RHS,
ICmpInst::Predicate FoundPred,
const SCEV *FoundLHS, const SCEV *FoundRHS,
- const Instruction *Context);
+ const Instruction *CtxI);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
@@ -1914,7 +1869,7 @@ private:
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
- const Instruction *Context);
+ const Instruction *CtxI);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by Pred, FoundLHS, and FoundRHS is
@@ -1956,12 +1911,18 @@ private:
bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags);
- /// Drop memoized information computed for S.
- void forgetMemoizedResults(const SCEV *S);
+ /// Drop memoized information for all \p SCEVs.
+ void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs);
+
+ /// Helper for forgetMemoizedResults.
+ void forgetMemoizedResultsImpl(const SCEV *S);
/// Return an existing SCEV for V if there is one, otherwise return nullptr.
const SCEV *getExistingSCEV(Value *V);
+ /// Erase Value from ValueExprMap and ExprValueMap.
+ void eraseValueFromMap(Value *V);
+
/// Return false iff given SCEV contains a SCEVUnknown with NULL value-
/// pointer.
bool checkValidity(const SCEV *S) const;
@@ -1995,6 +1956,27 @@ private:
/// would trigger undefined behavior on overflow.
SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
+ /// Return a scope which provides an upper bound on the defining scope of
+ /// 'S'. Specifically, return the first instruction in said bounding scope.
+ /// Return nullptr if the scope is trivial (function entry).
+ /// (See scope definition rules associated with flag discussion above)
+ const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S);
+
+ /// Return a scope which provides an upper bound on the defining scope for
+ /// a SCEV with the operands in Ops. The outparam Precise is set if the
+ /// bound found is a precise bound (i.e. must be the defining scope.)
+ const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+ bool &Precise);
+
+ /// Wrapper around the above for cases which don't care if the bound
+ /// is precise.
+ const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops);
+
+ /// Given two instructions in the same function, return true if we can
+ /// prove B must execute given A executes.
+ bool isGuaranteedToTransferExecutionTo(const Instruction *A,
+ const Instruction *B);
+
/// Return true if the SCEV corresponding to \p I is never poison. Proving
/// this is more complex than proving that just \p I is never poison, since
/// SCEV commons expressions across control flow, and you can have cases
@@ -2036,8 +2018,11 @@ private:
/// permitted by Start, End, and Stride. This is for loops of the form
/// {Start, +, Stride} LT End.
///
- /// Precondition: the induction variable is known to be positive. We *don't*
- /// assert these preconditions so please be careful.
+ /// Preconditions:
+ /// * the induction variable is known to be positive.
+ /// * the induction variable is assumed not to overflow (i.e. either it
+ /// actually doesn't, or we'd have to immediately execute UB)
+ /// We *don't* assert these preconditions so please be careful.
const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
const SCEV *End, unsigned BitWidth,
bool IsSigned);
@@ -2072,31 +2057,20 @@ private:
/// an add rec on said loop.
void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed);
- /// Find all of the loops transitively used in \p S, and update \c LoopUsers
- /// accordingly.
- void addToLoopUseLists(const SCEV *S);
-
/// Try to match the pattern generated by getURemExpr(A, B). If successful,
/// Assign A and B to LHS and RHS, respectively.
bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
/// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
- /// `UniqueSCEVs`.
- ///
- /// The first component of the returned tuple is the SCEV if found and null
- /// otherwise. The second component is the `FoldingSetNodeID` that was
- /// constructed to look up the SCEV and the third component is the insertion
- /// point.
- std::tuple<SCEV *, FoldingSetNodeID, void *>
- findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
+ /// `UniqueSCEVs`. Return if found, else nullptr.
+ SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
- /// This maps loops to a list of SCEV expressions that (transitively) use said
- /// loop.
- DenseMap<const Loop *, SmallVector<const SCEV *, 4>> LoopUsers;
+ /// This maps loops to a list of addrecs that directly use said loop.
+ DenseMap<const Loop *, SmallVector<const SCEVAddRecExpr *, 4>> LoopUsers;
/// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression
/// they can be rewritten into under certain predicates.
diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h
index df342a9533ee..239aec4e258b 100644
--- a/llvm/include/llvm/Analysis/StackLifetime.h
+++ b/llvm/include/llvm/Analysis/StackLifetime.h
@@ -191,6 +191,8 @@ public:
StackLifetimePrinterPass(raw_ostream &OS, StackLifetime::LivenessType Type)
: Type(Type), OS(OS) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
index 59c1e3e3bd56..751735f3e59f 100644
--- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -75,7 +75,15 @@ public:
StackSafetyGlobalInfo &operator=(StackSafetyGlobalInfo &&);
~StackSafetyGlobalInfo();
+ // Whether we can prove that all accesses to this Alloca are in-range and
+ // during its lifetime.
bool isSafe(const AllocaInst &AI) const;
+
+ // Returns true if the instruction can be proven to do only two types of
+ // memory accesses:
+ // (1) live stack locations in-bounds or
+ // (2) non-stack locations.
+ bool stackAccessIsSafe(const Instruction &I) const;
void print(raw_ostream &O) const;
void dump() const;
};
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 22bfeda0efd0..6e3e1380535e 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -76,7 +76,7 @@ class TargetLibraryInfoImpl {
/// Return true if the function type FTy is valid for the library function
/// F, regardless of whether the function is available.
bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
- const DataLayout *DL) const;
+ const Module &M) const;
public:
/// List of known vector-functions libraries.
@@ -115,6 +115,8 @@ public:
///
/// If it is one of the known library functions, return true and set F to the
/// corresponding value.
+ ///
+ /// FDecl is assumed to have a parent Module when using this function.
bool getLibFunc(const Function &FDecl, LibFunc &F) const;
/// Forces a function to be marked as unavailable.
@@ -238,7 +240,7 @@ public:
else {
// Disable individual libc/libm calls in TargetLibraryInfo.
LibFunc LF;
- AttributeSet FnAttrs = (*F)->getAttributes().getFnAttributes();
+ AttributeSet FnAttrs = (*F)->getAttributes().getFnAttrs();
for (const Attribute &Attr : FnAttrs) {
if (!Attr.isStringAttribute())
continue;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 628058142e48..170d6b8f35ff 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,7 +21,6 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
-#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/InstructionCost.h"
#include <functional>
+#include <utility>
namespace llvm {
@@ -47,12 +47,14 @@ class ExtractElementInst;
class Function;
class GlobalValue;
class InstCombiner;
+class OptimizationRemarkEmitter;
class IntrinsicInst;
class LoadInst;
class LoopAccessInfo;
class Loop;
class LoopInfo;
class ProfileSummaryInfo;
+class RecurrenceDescriptor;
class SCEV;
class ScalarEvolution;
class StoreInst;
@@ -97,7 +99,7 @@ struct HardwareLoopInfo {
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
- const SCEV *TripCount = nullptr;
+ const SCEV *ExitCount = nullptr;
IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration.
@@ -382,8 +384,15 @@ public:
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+ /// Return true if globals in this address space can have initializers other
+ /// than `undef`.
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
+
unsigned getAssumedAddrSpace(const Value *V) const;
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const;
+
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
/// NewV, which has a different address space. This should happen for every
/// operand index that collectFlatAddressOperands returned for the intrinsic.
@@ -506,7 +515,8 @@ public:
/// transformation. The caller will initialize UP with the current
/// target-independent defaults.
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
- UnrollingPreferences &UP) const;
+ UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const;
/// Query the target whether it would be profitable to convert the given loop
/// into a hardware loop.
@@ -660,6 +670,9 @@ public:
/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;
+ /// Return true if we should be enabling ordered reductions for the target.
+ bool enableOrderedReductions() const;
+
/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
@@ -907,6 +920,9 @@ public:
/// architectural maximum vector length, and None otherwise.
Optional<unsigned> getMaxVScale() const;
+ /// \return the value of vscale to tune the cost model for.
+ Optional<unsigned> getVScaleForTuning() const;
+
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
@@ -1094,8 +1110,8 @@ public:
/// is using a compare with the specified predicate as condition. When vector
/// types are passed, \p VecPred must be used for all lanes.
InstructionCost
- getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
- CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+ getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
@@ -1104,6 +1120,16 @@ public:
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index = -1) const;
+ /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
+ /// \p ReplicationFactor times.
+ ///
+ /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+ /// <0,0,0,1,1,1,2,2,2,3,3,3>
+ InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind);
+
/// \return The cost of Load and Store instructions.
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
@@ -1452,13 +1478,18 @@ public:
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const = 0;
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+ virtual bool
+ canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+ virtual std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *OldV,
Value *NewV) const = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
- UnrollingPreferences &UP) = 0;
+ UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) = 0;
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
@@ -1505,6 +1536,7 @@ public:
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+ virtual bool enableOrderedReductions() = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
@@ -1563,6 +1595,7 @@ public:
virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() const = 0;
virtual Optional<unsigned> getMaxVScale() const = 0;
+ virtual Optional<unsigned> getVScaleForTuning() const = 0;
virtual bool shouldMaximizeVectorBandwidth() const = 0;
virtual ElementCount getMinimumVF(unsigned ElemWidth,
bool IsScalable) const = 0;
@@ -1623,6 +1656,12 @@ public:
const Instruction *I) = 0;
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
+
+ virtual InstructionCost
+ getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) = 0;
+
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment,
unsigned AddressSpace,
@@ -1730,8 +1769,8 @@ public:
InstructionCost
getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
- enum TargetTransformInfo::TargetCostKind CostKind) override {
- return Impl.getGEPCost(PointeeType, Ptr, Operands);
+ TargetTransformInfo::TargetCostKind CostKind) override {
+ return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
@@ -1775,10 +1814,20 @@ public:
return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
}
+ bool
+ canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
+ return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+ }
+
unsigned getAssumedAddrSpace(const Value *V) const override {
return Impl.getAssumedAddrSpace(V);
}
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override {
+ return Impl.getPredicatedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override {
return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -1788,8 +1837,9 @@ public:
return Impl.isLoweredToCall(F);
}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- UnrollingPreferences &UP) override {
- return Impl.getUnrollingPreferences(L, SE, UP);
+ UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) override {
+ return Impl.getUnrollingPreferences(L, SE, UP, ORE);
}
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) override {
@@ -1886,6 +1936,9 @@ public:
bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);
}
+ bool enableOrderedReductions() override {
+ return Impl.enableOrderedReductions();
+ }
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
@@ -2015,6 +2068,9 @@ public:
Optional<unsigned> getMaxVScale() const override {
return Impl.getMaxVScale();
}
+ Optional<unsigned> getVScaleForTuning() const override {
+ return Impl.getVScaleForTuning();
+ }
bool shouldMaximizeVectorBandwidth() const override {
return Impl.shouldMaximizeVectorBandwidth();
}
@@ -2115,6 +2171,13 @@ public:
unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
+ InstructionCost
+ getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ }
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index c07a33c9f155..05ef2495475f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -24,6 +24,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
+#include <utility>
using namespace llvm::PatternMatch;
@@ -47,10 +48,9 @@ public:
const DataLayout &getDataLayout() const { return DL; }
- InstructionCost
- getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
+ InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) const {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -105,9 +105,17 @@ public:
}
bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+ return AS == 0;
+ };
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const {
+ return std::make_pair(nullptr, -1);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
@@ -187,7 +195,8 @@ public:
}
void getUnrollingPreferences(Loop *, ScalarEvolution &,
- TTI::UnrollingPreferences &) const {}
+ TTI::UnrollingPreferences &,
+ OptimizationRemarkEmitter *) const {}
void getPeelingPreferences(Loop *, ScalarEvolution &,
TTI::PeelingPreferences &) const {}
@@ -262,6 +271,8 @@ public:
bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
+ bool enableOrderedReductions() const { return false; }
+
bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
@@ -394,6 +405,7 @@ public:
unsigned getMinVectorRegisterBitWidth() const { return 128; }
Optional<unsigned> getMaxVScale() const { return None; }
+ Optional<unsigned> getVScaleForTuning() const { return None; }
bool shouldMaximizeVectorBandwidth() const { return false; }
@@ -539,6 +551,12 @@ public:
return 1;
}
+ unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ return 1;
+ }
+
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -614,7 +632,8 @@ public:
return 1;
}
- unsigned getNumberOfParts(Type *Tp) const { return 0; }
+ // Assume that we have a register of the right size for the type.
+ unsigned getNumberOfParts(Type *Tp) const { return 1; }
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
const SCEV *) const {
@@ -632,9 +651,10 @@ public:
return 1;
}
- InstructionCost getExtendedAddReductionCost(
- bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const {
+ InstructionCost
+ getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy,
+ VectorType *Ty,
+ TTI::TargetCostKind CostKind) const {
return 1;
}
@@ -856,10 +876,9 @@ protected:
public:
using BaseT::getGEPCost;
- InstructionCost
- getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+ InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
assert(cast<PointerType>(Ptr->getType()->getScalarType())
->isOpaqueOrPointeeTypeMatches(PointeeType) &&
@@ -964,10 +983,10 @@ public:
return TTI::TCC_Free;
break;
case Instruction::GetElementPtr: {
- const GEPOperator *GEP = cast<GEPOperator>(U);
+ const auto *GEP = cast<GEPOperator>(U);
return TargetTTI->getGEPCost(GEP->getSourceElementType(),
GEP->getPointerOperand(),
- Operands.drop_front());
+ Operands.drop_front(), CostKind);
}
case Instruction::Add:
case Instruction::FAdd:
@@ -1063,58 +1082,94 @@ public:
auto *IE = dyn_cast<InsertElementInst>(U);
if (!IE)
return TTI::TCC_Basic; // FIXME
- auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
- unsigned Idx = CI ? CI->getZExtValue() : -1;
+ unsigned Idx = -1;
+ if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
+ if (CI->getValue().getActiveBits() <= 32)
+ Idx = CI->getZExtValue();
return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
}
case Instruction::ShuffleVector: {
auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
if (!Shuffle)
return TTI::TCC_Basic; // FIXME
+
auto *VecTy = cast<VectorType>(U->getType());
auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
+ int NumSubElts, SubIndex;
+
+ if (Shuffle->changesLength()) {
+ // Treat a 'subvector widening' as a free shuffle.
+ if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
+ return 0;
+
+ if (Shuffle->isExtractSubvectorMask(SubIndex))
+ return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
+ Shuffle->getShuffleMask(), SubIndex,
+ VecTy);
+
+ if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+ return TargetTTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
+ SubIndex,
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
+ int ReplicationFactor, VF;
+ if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
+ APInt DemandedDstElts =
+ APInt::getNullValue(Shuffle->getShuffleMask().size());
+ for (auto I : enumerate(Shuffle->getShuffleMask())) {
+ if (I.value() != UndefMaskElem)
+ DemandedDstElts.setBit(I.index());
+ }
+ return TargetTTI->getReplicationShuffleCost(
+ VecSrcTy->getElementType(), ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ }
- // TODO: Identify and add costs for insert subvector, etc.
- int SubIndex;
- if (Shuffle->isExtractSubvectorMask(SubIndex))
- return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
- Shuffle->getShuffleMask(), SubIndex,
- VecTy);
- else if (Shuffle->changesLength())
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
- else if (Shuffle->isIdentity())
+ }
+
+ if (Shuffle->isIdentity())
return 0;
- else if (Shuffle->isReverse())
+
+ if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isSelect())
+
+ if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isTranspose())
+
+ if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isZeroEltSplat())
+
+ if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isSingleSource())
+
+ if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
+ if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+ return TargetTTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
}
case Instruction::ExtractElement: {
- unsigned Idx = -1;
auto *EEI = dyn_cast<ExtractElementInst>(U);
if (!EEI)
return TTI::TCC_Basic; // FIXME
-
- auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1));
- if (CI)
- Idx = CI->getZExtValue();
-
- return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
- Idx);
+ unsigned Idx = -1;
+ if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
+ if (CI->getValue().getActiveBits() <= 32)
+ Idx = CI->getZExtValue();
+ Type *DstTy = U->getOperand(0)->getType();
+ return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx);
}
}
// By default, just classify everything as 'basic'.
diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
index 3f7603142900..074c40942b06 100644
--- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -22,6 +22,7 @@ namespace llvm {
class CallBase;
class CallInst;
class Constant;
+class Function;
class DominatorTree;
class Instruction;
class Module;
@@ -56,7 +57,30 @@ void findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
const CallInst *CI, DominatorTree &DT);
-Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
-}
+/// Processes a Constant recursively looking into elements of arrays, structs
+/// and expressions to find a trivial pointer element that is located at the
+/// given offset (relative to the beginning of the whole outer Constant).
+///
+/// Used for example from GlobalDCE to find an entry in a C++ vtable that
+/// matches a vcall offset.
+///
+/// To support Swift vtables, getPointerAtOffset can see through "relative
+/// pointers", i.e. (sub-)expressions of the form of:
+///
+/// @symbol = ... {
+/// i32 trunc (i64 sub (
+/// i64 ptrtoint (<type> @target to i64), i64 ptrtoint (... @symbol to i64)
+/// ) to i32)
+/// }
+///
+/// For such (sub-)expressions, getPointerAtOffset returns the @target pointer.
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
+ Constant *TopLevelGlobal = nullptr);
+
+/// Finds the same "relative pointer" pattern as described above, where the
+/// target is `F`, and replaces the entire pattern with a constant zero.
+void replaceRelativePointerUsersWithZero(Function *F);
+
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 47ee23e06000..1f6be0e60eb9 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -104,6 +104,9 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
struct LoggedFeatureSpec {
TensorSpec Spec;
Optional<std::string> LoggingName;
+ const std::string &getLoggingName() const {
+ return LoggingName ? *LoggingName : Spec.name();
+ }
};
/// Load the output specs. If SpecFileOverride is not empty, that path is used.
@@ -170,7 +173,9 @@ public:
// we can consider using bytes.
char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
- void print(raw_ostream &OS);
+ // Flush the content of the log to the stream, clearing the stored data in the
+ // process.
+ void flush(raw_ostream &OS);
private:
std::vector<LoggedFeatureSpec> FeatureSpecs;
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 90ec742f18e6..b4f38a3e976f 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -203,6 +203,15 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
const DominatorTree *DT = nullptr,
bool UseInstrInfo = true);
+ /// Get the minimum bit size for this Value \p Op as a signed integer.
+ /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+ /// Similar to the APInt::getMinSignedBits function.
+ unsigned ComputeMinSignedBits(const Value *Op, const DataLayout &DL,
+ unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
+
/// This function computes the integer multiple of Base that equals V. If
/// successful, it returns true and returns the multiple in Multiple. If
/// unsuccessful, it returns false. Also, if V can be simplified to an
@@ -549,6 +558,7 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true,
AssumptionCache *AC = nullptr,
const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
unsigned Depth = 0);
/// Return true if this function can prove that the instruction I will
@@ -573,6 +583,18 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// instruction variant of this function.
bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB);
+ /// Return true if every instruction in the range (Begin, End) is
+ /// guaranteed to transfer execution to its static successor. \p ScanLimit
+ /// bounds the search to avoid scanning huge blocks.
+ bool isGuaranteedToTransferExecutionToSuccessor(
+ BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
+ unsigned ScanLimit = 32);
+
+ /// Same as previous, but with range expressed via iterator_range.
+ bool isGuaranteedToTransferExecutionToSuccessor(
+ iterator_range<BasicBlock::const_iterator> Range,
+ unsigned ScanLimit = 32);
+
/// Return true if this function can prove that the instruction I
/// is executed for every iteration of the loop L.
///
@@ -624,10 +646,16 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// true. If Op raises immediate UB but never creates poison or undef
/// (e.g. sdiv I, 0), canCreatePoison returns false.
///
+ /// \p ConsiderFlags controls whether poison producing flags on the
+ /// instruction are considered. This can be used to see if the instruction
+ /// could still introduce undef or poison even without poison generating flags
+ /// which might be on the instruction. (i.e. could the result of
+ /// Op->dropPoisonGeneratingFlags() still create poison or undef)
+ ///
/// canCreatePoison returns true if Op can create poison from non-poison
/// operands.
- bool canCreateUndefOrPoison(const Operator *Op);
- bool canCreatePoison(const Operator *Op);
+ bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlags = true);
+ bool canCreatePoison(const Operator *Op, bool ConsiderFlags = true);
/// Return true if V is poison given that ValAssumedPoison is already poison.
/// For example, if ValAssumedPoison is `icmp X, 10` and V is `icmp X, 5`,
@@ -744,6 +772,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
+ /// Return the minimum or maximum constant value for the specified integer
+ /// min/max flavor and type.
+ APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth);
+
/// Check if the values in \p VL are select instructions that can be converted
/// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
/// conversion is possible, together with a bool indicating whether all select
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index c890216c9e01..24e2318de48b 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -533,6 +533,12 @@ llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
llvm::SmallVector<int, 16>
createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
+/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
+/// mask assuming both operands are identical. This assumes that the unary
+/// shuffle will use elements from operand 0 (operand 1 will be unused).
+llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask,
+ unsigned NumElts);
+
/// Concatenate a list of vectors.
///
/// This function generates code that concatenate the vectors in \p Vecs into a
@@ -686,10 +692,8 @@ public:
if (getMember(getFactor() - 1))
return false;
- // We have a group with gaps. It therefore cannot be a group of stores,
- // and it can't be a reversed access, because such groups get invalidated.
- assert(!getMember(0)->mayWriteToMemory() &&
- "Group should have been invalidated");
+ // We have a group with gaps. It therefore can't be a reversed access,
+ // because such groups get invalidated (TODO).
assert(!isReverse() && "Group should have been invalidated");
// This is a group of loads, with gaps, and without a last-member
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index c97d9781c33b..c30165e4a97b 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ASMPARSER_LLLEXER_H
-#define LLVM_LIB_ASMPARSER_LLLEXER_H
+#ifndef LLVM_ASMPARSER_LLLEXER_H
+#define LLVM_ASMPARSER_LLLEXER_H
#include "LLToken.h"
#include "llvm/ADT/APFloat.h"
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 70db9218fa3d..d621c232378c 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
-#define LLVM_LIB_ASMPARSER_LLPARSER_H
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
#include "LLLexer.h"
#include "llvm/ADT/Optional.h"
@@ -172,9 +172,8 @@ namespace llvm {
/// getGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
- bool IsCall);
- GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+ GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
+ GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
/// Get a Comdat with the specified name, creating a forward reference
/// record if needed.
@@ -270,7 +269,6 @@ namespace llvm {
bool parseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma);
bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
bool &AteExtraComma);
- bool parseOptionalCommaInAlloca(bool &IsInAlloca);
bool parseAllocSizeArguments(unsigned &BaseSizeArg,
Optional<unsigned> &HowManyArg);
bool parseVScaleRangeArguments(unsigned &MinValue, unsigned &MaxValue);
@@ -306,11 +304,10 @@ namespace llvm {
unsigned DLLStorageClass, bool DSOLocal,
GlobalVariable::ThreadLocalMode TLM,
GlobalVariable::UnnamedAddr UnnamedAddr);
- bool parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
- unsigned L, unsigned Visibility,
- unsigned DLLStorageClass, bool DSOLocal,
- GlobalVariable::ThreadLocalMode TLM,
- GlobalVariable::UnnamedAddr UnnamedAddr);
+ bool parseAliasOrIFunc(const std::string &Name, LocTy NameLoc, unsigned L,
+ unsigned Visibility, unsigned DLLStorageClass,
+ bool DSOLocal, GlobalVariable::ThreadLocalMode TLM,
+ GlobalVariable::UnnamedAddr UnnamedAddr);
bool parseComdat();
bool parseStandaloneMetadata();
bool parseNamedMetadata();
@@ -424,8 +421,8 @@ namespace llvm {
/// GetVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- Value *getVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall);
- Value *getVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+ Value *getVal(const std::string &Name, Type *Ty, LocTy Loc);
+ Value *getVal(unsigned ID, Type *Ty, LocTy Loc);
/// setInstName - After an instruction is parsed and inserted into its
/// basic block, this installs its name.
@@ -447,10 +444,10 @@ namespace llvm {
};
bool convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
- PerFunctionState *PFS, bool IsCall);
+ PerFunctionState *PFS);
Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
- Value *Val, bool IsCall);
+ Value *Val);
bool parseConstantValue(Type *Ty, Constant *&C);
bool parseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index aa49c68fe924..f8ca054863ac 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ASMPARSER_LLTOKEN_H
-#define LLVM_LIB_ASMPARSER_LLTOKEN_H
+#ifndef LLVM_ASMPARSER_LLTOKEN_H
+#define LLVM_ASMPARSER_LLTOKEN_H
namespace llvm {
namespace lltok {
@@ -190,6 +190,7 @@ enum Kind {
kw_convergent,
kw_dereferenceable,
kw_dereferenceable_or_null,
+ kw_disable_sanitizer_instrumentation,
kw_elementtype,
kw_inaccessiblememonly,
kw_inaccessiblemem_or_argmemonly,
@@ -403,6 +404,9 @@ enum Kind {
kw_returnDoesNotAlias,
kw_noInline,
kw_alwaysInline,
+ kw_noUnwind,
+ kw_mayThrow,
+ kw_hasUnknownCall,
kw_calls,
kw_callee,
kw_params,
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index 34f124b5779a..61f3f27ebb47 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -248,6 +248,9 @@ HANDLE_DW_TAG(0x5103, ALTIUM_rev_carry_type, 0, ALTIUM, DW_KIND_NONE)
// M16 __rom qualifier
HANDLE_DW_TAG(0x5111, ALTIUM_rom, 0, ALTIUM, DW_KIND_NONE)
+// LLVM
+HANDLE_DW_TAG(0x6000, LLVM_annotation, 0, LLVM, DW_KIND_NONE)
+
// Green Hills.
HANDLE_DW_TAG(0x8004, GHS_namespace, 0, GHS, DW_KIND_NONE)
HANDLE_DW_TAG(0x8005, GHS_using_namespace, 0, GHS, DW_KIND_NONE)
diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index c08f8a53bdb5..814d8b113ec4 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -31,6 +31,11 @@
#define PPC64_DYNAMIC_TAG_DEFINED
#endif
+#ifndef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define RISCV_DYNAMIC_TAG_DEFINED
+#endif
+
#ifndef DYNAMIC_TAG_MARKER
#define DYNAMIC_TAG_MARKER(name, value) DYNAMIC_TAG(name, value)
#define DYNAMIC_TAG_MARKER_DEFINED
@@ -213,6 +218,9 @@ PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
// first glink lazy resolver stub.
+// RISC-V specific dynamic array tags.
+RISCV_DYNAMIC_TAG(RISCV_VARIANT_CC, 0x70000001)
+
// Sun machine-independent extensions.
DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self
DYNAMIC_TAG(USED, 0x7FFFFFFE) // Same as DT_NEEDED
@@ -243,3 +251,7 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF) // Shared object to get values from
#undef PPC64_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG_DEFINED
#endif
+#ifdef RISCV_DYNAMIC_TAG_DEFINED
+#undef RISCV_DYNAMIC_TAG
+#undef RISCV_DYNAMIC_TAG_DEFINED
+#endif
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6148f968cdba..a270fd399aeb 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -660,6 +660,12 @@ enum {
#include "ELFRelocs/RISCV.def"
};
+enum {
+ // Symbol may follow different calling convention than the standard calling
+ // convention.
+ STO_RISCV_VARIANT_CC = 0x80
+};
+
// ELF Relocation types for S390/zSeries
enum {
#include "ELFRelocs/SystemZ.def"
@@ -1596,6 +1602,16 @@ enum {
NT_FREEBSD_PROCSTAT_AUXV = 16,
};
+// OpenBSD core note types.
+enum {
+ NT_OPENBSD_PROCINFO = 10,
+ NT_OPENBSD_AUXV = 11,
+ NT_OPENBSD_REGS = 20,
+ NT_OPENBSD_FPREGS = 21,
+ NT_OPENBSD_XFPREGS = 22,
+ NT_OPENBSD_WCOOKIE = 23,
+};
+
// AMDGPU-specific section indices.
enum {
SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
@@ -1618,6 +1634,13 @@ enum {
NT_AMDGPU_METADATA = 32
};
+// LLVMOMPOFFLOAD specific notes.
+enum : unsigned {
+ NT_LLVM_OPENMP_OFFLOAD_VERSION = 1,
+ NT_LLVM_OPENMP_OFFLOAD_PRODUCER = 2,
+ NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION = 3
+};
+
enum {
GNU_ABI_TAG_LINUX = 0,
GNU_ABI_TAG_HURD = 1,
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index 9f2f0540bcbd..454450950444 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -46,10 +46,6 @@ ELF_RELOC(R_RISCV_ALIGN, 43)
ELF_RELOC(R_RISCV_RVC_BRANCH, 44)
ELF_RELOC(R_RISCV_RVC_JUMP, 45)
ELF_RELOC(R_RISCV_RVC_LUI, 46)
-ELF_RELOC(R_RISCV_GPREL_I, 47)
-ELF_RELOC(R_RISCV_GPREL_S, 48)
-ELF_RELOC(R_RISCV_TPREL_I, 49)
-ELF_RELOC(R_RISCV_TPREL_S, 50)
ELF_RELOC(R_RISCV_RELAX, 51)
ELF_RELOC(R_RISCV_SUB6, 52)
ELF_RELOC(R_RISCV_SET6, 53)
diff --git a/llvm/include/llvm/BinaryFormat/MachO.def b/llvm/include/llvm/BinaryFormat/MachO.def
index 76dcc58ba048..f68ecefa6c9e 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.def
+++ b/llvm/include/llvm/BinaryFormat/MachO.def
@@ -74,6 +74,8 @@ HANDLE_LOAD_COMMAND(LC_VERSION_MIN_TVOS, 0x0000002Fu, version_min_command)
HANDLE_LOAD_COMMAND(LC_VERSION_MIN_WATCHOS, 0x00000030u, version_min_command)
HANDLE_LOAD_COMMAND(LC_NOTE, 0x00000031u, note_command)
HANDLE_LOAD_COMMAND(LC_BUILD_VERSION, 0x00000032u, build_version_command)
+HANDLE_LOAD_COMMAND(LC_DYLD_EXPORTS_TRIE, 0x80000033u, linkedit_data_command)
+HANDLE_LOAD_COMMAND(LC_DYLD_CHAINED_FIXUPS, 0x80000034u, linkedit_data_command)
#endif
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index c38e64928521..0bc8c4e167d8 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file defines manifest constants for the wasm object file format.
-// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
//
//===----------------------------------------------------------------------===//
@@ -36,12 +36,25 @@ struct WasmObjectHeader {
uint32_t Version;
};
+struct WasmDylinkImportInfo {
+ StringRef Module;
+ StringRef Field;
+ uint32_t Flags;
+};
+
+struct WasmDylinkExportInfo {
+ StringRef Name;
+ uint32_t Flags;
+};
+
struct WasmDylinkInfo {
uint32_t MemorySize; // Memory size in bytes
uint32_t MemoryAlignment; // P2 alignment of memory
uint32_t TableSize; // Table size in elements
uint32_t TableAlignment; // P2 alignment of table
std::vector<StringRef> Needed; // Shared library dependencies
+ std::vector<WasmDylinkImportInfo> ImportInfo;
+ std::vector<WasmDylinkExportInfo> ExportInfo;
};
struct WasmProducerInfo {
@@ -101,15 +114,9 @@ struct WasmGlobal {
StringRef SymbolName; // from the "linking" section
};
-struct WasmTagType {
- // Kind of tag. Currently only WASM_TAG_ATTRIBUTE_EXCEPTION is possible.
- uint8_t Attribute;
- uint32_t SigIndex;
-};
-
struct WasmTag {
uint32_t Index;
- WasmTagType Type;
+ uint32_t SigIndex;
StringRef SymbolName; // from the "linking" section
};
@@ -122,7 +129,6 @@ struct WasmImport {
WasmGlobalType Global;
WasmTableType Table;
WasmLimits Memory;
- WasmTagType Tag;
};
};
@@ -133,6 +139,7 @@ struct WasmLocalDecl {
struct WasmFunction {
uint32_t Index;
+ uint32_t SigIndex;
std::vector<WasmLocalDecl> Locals;
ArrayRef<uint8_t> Body;
uint32_t CodeSectionOffset;
@@ -284,11 +291,14 @@ enum : unsigned {
// Opcodes used in synthetic functions.
enum : unsigned {
- WASM_OPCODE_IF = 0x04,
- WASM_OPCODE_ELSE = 0x05,
+ WASM_OPCODE_BLOCK = 0x02,
+ WASM_OPCODE_BR = 0x0c,
+ WASM_OPCODE_BR_TABLE = 0x0e,
+ WASM_OPCODE_RETURN = 0x0f,
WASM_OPCODE_DROP = 0x1a,
WASM_OPCODE_MISC_PREFIX = 0xfc,
WASM_OPCODE_MEMORY_INIT = 0x08,
+ WASM_OPCODE_MEMORY_FILL = 0x0b,
WASM_OPCODE_DATA_DROP = 0x09,
WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
@@ -339,6 +349,14 @@ enum : unsigned {
WASM_SYMBOL_TABLE = 0x8,
};
+// Kind codes used in the custom "dylink" section
+enum : unsigned {
+ WASM_DYLINK_MEM_INFO = 0x1,
+ WASM_DYLINK_NEEDED = 0x2,
+ WASM_DYLINK_EXPORT_INFO = 0x3,
+ WASM_DYLINK_IMPORT_INFO = 0x4,
+};
+
// Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
enum : unsigned {
WASM_COMDAT_DATA = 0x0,
@@ -379,6 +397,7 @@ const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
const unsigned WASM_SYMBOL_EXPORTED = 0x20;
const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
+const unsigned WASM_SYMBOL_TLS = 0x100;
#define WASM_RELOC(name, value) name = value,
diff --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h
index 930ee690bcc0..bef9dd3291ca 100644
--- a/llvm/include/llvm/BinaryFormat/WasmTraits.h
+++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h
@@ -18,10 +18,8 @@
namespace llvm {
-template <typename T> struct DenseMapInfo;
-
// Traits for using WasmSignature in a DenseMap.
-template <> struct DenseMapInfo<wasm::WasmSignature> {
+template <> struct DenseMapInfo<wasm::WasmSignature, void> {
static wasm::WasmSignature getEmptyKey() {
wasm::WasmSignature Sig;
Sig.State = wasm::WasmSignature::Empty;
@@ -47,7 +45,7 @@ template <> struct DenseMapInfo<wasm::WasmSignature> {
};
// Traits for using WasmGlobalType in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmGlobalType> {
+template <> struct DenseMapInfo<wasm::WasmGlobalType, void> {
static wasm::WasmGlobalType getEmptyKey() {
return wasm::WasmGlobalType{1, true};
}
@@ -64,7 +62,7 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> {
};
// Traits for using WasmLimits in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmLimits> {
+template <> struct DenseMapInfo<wasm::WasmLimits, void> {
static wasm::WasmLimits getEmptyKey() {
return wasm::WasmLimits{0xff, 0xff, 0xff};
}
@@ -86,19 +84,19 @@ template <> struct DenseMapInfo<wasm::WasmLimits> {
};
// Traits for using WasmTableType in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmTableType> {
+template <> struct DenseMapInfo<wasm::WasmTableType, void> {
static wasm::WasmTableType getEmptyKey() {
- return wasm::WasmTableType{0,
- DenseMapInfo<wasm::WasmLimits>::getEmptyKey()};
+ return wasm::WasmTableType{
+ 0, DenseMapInfo<wasm::WasmLimits, void>::getEmptyKey()};
}
static wasm::WasmTableType getTombstoneKey() {
return wasm::WasmTableType{
- 1, DenseMapInfo<wasm::WasmLimits>::getTombstoneKey()};
+ 1, DenseMapInfo<wasm::WasmLimits, void>::getTombstoneKey()};
}
static unsigned getHashValue(const wasm::WasmTableType &TableType) {
return hash_combine(
TableType.ElemType,
- DenseMapInfo<wasm::WasmLimits>::getHashValue(TableType.Limits));
+ DenseMapInfo<wasm::WasmLimits, void>::getHashValue(TableType.Limits));
}
static bool isEqual(const wasm::WasmTableType &LHS,
const wasm::WasmTableType &RHS) {
diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h
index 8a42d26f3f4a..cffd8618f1e3 100644
--- a/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -28,9 +28,14 @@ namespace XCOFF {
constexpr size_t FileNamePadSize = 6;
constexpr size_t NameSize = 8;
constexpr size_t FileHeaderSize32 = 20;
+constexpr size_t FileHeaderSize64 = 24;
+constexpr size_t AuxFileHeaderSize32 = 72;
+constexpr size_t AuxFileHeaderSize64 = 110;
constexpr size_t SectionHeaderSize32 = 40;
+constexpr size_t SectionHeaderSize64 = 72;
constexpr size_t SymbolTableEntrySize = 18;
constexpr size_t RelocationSerializationSize32 = 10;
+constexpr size_t RelocationSerializationSize64 = 14;
constexpr uint16_t RelocOverflow = 65535;
constexpr uint8_t AllocRegNo = 31;
@@ -38,6 +43,17 @@ enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
enum MagicNumber : uint16_t { XCOFF32 = 0x01DF, XCOFF64 = 0x01F7 };
+// This field only exists in the XCOFF64 definition.
+enum AuxHeaderFlags64 : uint16_t {
+ SHR_SYMTAB = 0x8000, ///< At exec time, create shared symbol table for program
+ ///< (main program only).
+ FORK_POLICY = 0x4000, ///< Forktree policy specified (main program only).
+ FORK_COR = 0x2000 ///< If _AOUT_FORK_POLICY is set, specify copy-on-reference
+ ///< if this bit is set. Specify copy-on- write otherwise.
+ ///< If _AOUT_FORK_POLICY is 0, this bit is reserved for
+ ///< future use and should be set to 0.
+};
+
// x_smclas field of x_csect from system header: /usr/include/syms.h
/// Storage Mapping Class definitions.
enum StorageMappingClass : uint8_t {
diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
index de828be3bf1b..f6fc284da33f 100644
--- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
+++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -42,6 +42,8 @@ struct BCDumpOptions {
bool Symbolic = false;
/// Print binary blobs using hex escapes.
bool ShowBinaryBlobs = false;
+ /// Print BLOCKINFO block details.
+ bool DumpBlockinfo = false;
BCDumpOptions(raw_ostream &OS) : OS(OS) {}
};
diff --git a/llvm/include/llvm/Bitcode/BitcodeCommon.h b/llvm/include/llvm/Bitcode/BitcodeCommon.h
index 6a3e74550bc4..22d1872fe49c 100644
--- a/llvm/include/llvm/Bitcode/BitcodeCommon.h
+++ b/llvm/include/llvm/Bitcode/BitcodeCommon.h
@@ -19,10 +19,14 @@
namespace llvm {
struct AllocaPackedValues {
- using Align = Bitfield::Element<unsigned, 0, 5>;
- using UsedWithInAlloca = Bitfield::Element<bool, Align::NextBit, 1>;
+ // We increased the number of bits needed to represent alignment to be more
+ // than 5, but to preserve backward compatibility we store the upper bits
+ // separately.
+ using AlignLower = Bitfield::Element<unsigned, 0, 5>;
+ using UsedWithInAlloca = Bitfield::Element<bool, AlignLower::NextBit, 1>;
using ExplicitType = Bitfield::Element<bool, UsedWithInAlloca::NextBit, 1>;
using SwiftError = Bitfield::Element<bool, ExplicitType::NextBit, 1>;
+ using AlignUpper = Bitfield::Element<unsigned, SwiftError::NextBit, 3>;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 28870afb2fcb..04eb2739cbd5 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -671,6 +671,7 @@ enum AttributeKindCodes {
ATTR_KIND_SWIFT_ASYNC = 75,
ATTR_KIND_NO_SANITIZE_COVERAGE = 76,
ATTR_KIND_ELEMENTTYPE = 77,
+ ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78,
};
enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index bdfb416d9bd9..60442326d6c7 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -104,9 +104,12 @@ ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC);
/// getICmpCondCode - Return the ISD condition code corresponding to
/// the given LLVM IR integer condition code.
-///
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
+/// getICmpCondCode - Return the LLVM IR integer condition code
+/// corresponding to the given ISD integer condition code.
+ICmpInst::Predicate getICmpCondCode(ISD::CondCode Pred);
+
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 5dea86e67d64..d7d3692877de 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -41,7 +41,6 @@ class DIEAbbrev;
class DwarfDebug;
class GCMetadataPrinter;
class GCStrategy;
-class GlobalIndirectSymbol;
class GlobalObject;
class GlobalValue;
class GlobalVariable;
@@ -708,7 +707,7 @@ public:
/// ${:comment}. Targets can override this to add support for their own
/// strange codes.
virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
- const char *Code) const;
+ StringRef Code) const;
/// Print the MachineOperand as a symbol. Targets with complex handling of
/// symbol references should override the base implementation.
@@ -795,8 +794,8 @@ private:
void emitModuleCommandLines(Module &M);
GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
- /// Emit GlobalAlias or GlobalIFunc.
- void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
+ void emitGlobalAlias(Module &M, const GlobalAlias &GA);
+ void emitGlobalIFunc(Module &M, const GlobalIFunc &GI);
/// This method decides whether the specified basic block requires a label.
bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e3b834ec42c3..324b7dcfb3ac 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -282,6 +283,11 @@ public:
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const {
+ return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
@@ -363,8 +369,9 @@ public:
}
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands) {
- return BaseT::getGEPCost(PointeeType, Ptr, Operands);
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -484,7 +491,8 @@ public:
int getInlinerVectorBonusPercent() { return 150; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:
@@ -526,6 +534,15 @@ public:
continue;
}
+ if (ORE) {
+ ORE->emit([&]() {
+ return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
+ L->getHeader())
+ << "advising against unrolling the loop because it "
+ "contains a "
+ << ore::NV("Call", &I);
+ });
+ }
return;
}
}
@@ -653,6 +670,7 @@ public:
}
Optional<unsigned> getMaxVScale() const { return None; }
+ Optional<unsigned> getVScaleForTuning() const { return None; }
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
@@ -686,7 +704,7 @@ public:
bool Extract) {
auto *Ty = cast<FixedVectorType>(InTy);
- APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
+ APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
@@ -737,8 +755,7 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -1102,6 +1119,39 @@ public:
return LT.first;
}
+ InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
+ "Unexpected size of DemandedDstElts.");
+
+ InstructionCost Cost;
+
+ auto *SrcVT = FixedVectorType::get(EltTy, VF);
+ auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
+
+ // The Mask shuffling cost is extract all the elements of the Mask
+ // and insert each of them Factor times into the wide vector:
+ //
+ // E.g. an interleaved group with factor 3:
+ // %mask = icmp ult <8 x i32> %vec1, %vec2
+ // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+ // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+ // The cost is estimated as extract all mask elements from the <8xi1> mask
+ // vector and insert them factor times into the <24xi1> shuffled mask
+ // vector.
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
+ Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
+ /*Insert*/ false,
+ /*Extract*/ true);
+ Cost +=
+ thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
+ /*Insert*/ true, /*Extract*/ false);
+
+ return Cost;
+ }
+
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -1201,9 +1251,9 @@ public:
// used (those corresponding to elements [0:1] and [8:9] of the unlegalized
// type). The other loads are unused.
//
- // We only scale the cost of loads since interleaved store groups aren't
- // allowed to have gaps.
- if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
+ // TODO: Note that legalization can turn masked loads/stores into unmasked
+ // (legalized) loads/stores. This can be reflected in the cost.
+ if (Cost.isValid() && VecTySize > VecTyLTSize) {
// The number of loads of a legal type it will take to represent a load
// of the unlegalized vector type.
unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
@@ -1220,10 +1270,24 @@ public:
// Scale the cost of the load by the fraction of legal instructions that
// will be used.
- Cost *= UsedInsts.count() / NumLegalInsts;
+ Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
+ NumLegalInsts);
}
// Then plus the cost of interleave operation.
+ assert(Indices.size() <= Factor &&
+ "Interleaved memory op has too many members");
+
+ const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
+ const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
+
+ APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
+ for (unsigned Index : Indices) {
+ assert(Index < Factor && "Invalid index for interleaved memory op");
+ for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
+ DemandedLoadStoreElts.setBit(Index + Elm * Factor);
+ }
+
if (Opcode == Instruction::Load) {
// The interleave cost is similar to extract sub vectors' elements
// from the wide vector, and insert them into sub vectors.
@@ -1233,79 +1297,56 @@ public:
// %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
// The cost is estimated as extract elements at 0, 2, 4, 6 from the
// <8 x i32> vector and insert them into a <4 x i32> vector.
-
- assert(Indices.size() <= Factor &&
- "Interleaved memory op has too many members");
-
- for (unsigned Index : Indices) {
- assert(Index < Factor && "Invalid index for interleaved memory op");
-
- // Extract elements from loaded vector for each sub vector.
- for (unsigned i = 0; i < NumSubElts; i++)
- Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
- Index + i * Factor);
- }
-
- InstructionCost InsSubCost = 0;
- for (unsigned i = 0; i < NumSubElts; i++)
- InsSubCost +=
- thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
-
+ InstructionCost InsSubCost =
+ thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
+ /*Insert*/ true, /*Extract*/ false);
Cost += Indices.size() * InsSubCost;
+ Cost +=
+ thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+ /*Insert*/ false, /*Extract*/ true);
} else {
- // The interleave cost is extract all elements from sub vectors, and
+ // The interleave cost is extract elements from sub vectors, and
// insert them into the wide vector.
//
- // E.g. An interleaved store of factor 2:
- // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
- // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
- // The cost is estimated as extract all elements from both <4 x i32>
- // vectors and insert into the <8 x i32> vector.
-
- InstructionCost ExtSubCost = 0;
- for (unsigned i = 0; i < NumSubElts; i++)
- ExtSubCost +=
- thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
- Cost += ExtSubCost * Factor;
-
- for (unsigned i = 0; i < NumElts; i++)
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::InsertElement, VT, i);
+ // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
+ // (using VF=4):
+ // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
+ // %gaps.mask = <true, true, false, true, true, false,
+ // true, true, false, true, true, false>
+ // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
+ // i32 Align, <12 x i1> %gaps.mask
+ // The cost is estimated as extract all elements (of actual members,
+ // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
+ // i32> vector.
+ InstructionCost ExtSubCost =
+ thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
+ /*Insert*/ false, /*Extract*/ true);
+ Cost += ExtSubCost * Indices.size();
+ Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+ /*Insert*/ true,
+ /*Extract*/ false);
}
if (!UseMaskForCond)
return Cost;
Type *I8Type = Type::getInt8Ty(VT->getContext());
- auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
- SubVT = FixedVectorType::get(I8Type, NumSubElts);
-
- // The Mask shuffling cost is extract all the elements of the Mask
- // and insert each of them Factor times into the wide vector:
- //
- // E.g. an interleaved group with factor 3:
- // %mask = icmp ult <8 x i32> %vec1, %vec2
- // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
- // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
- // The cost is estimated as extract all mask elements from the <8xi1> mask
- // vector and insert them factor times into the <24xi1> shuffled mask
- // vector.
- for (unsigned i = 0; i < NumSubElts; i++)
- Cost +=
- thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
- for (unsigned i = 0; i < NumElts; i++)
- Cost +=
- thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
+ Cost += thisT()->getReplicationShuffleCost(
+ I8Type, Factor, NumSubElts,
+ UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
+ CostKind);
// The Gaps mask is invariant and created outside the loop, therefore the
// cost of creating it is not accounted for here. However if we have both
// a MaskForGaps and some other mask that guards the execution of the
// memory access, we need to account for the cost of And-ing the two masks
// inside the loop.
- if (UseMaskForGaps)
+ if (UseMaskForGaps) {
+ auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
CostKind);
+ }
return Cost;
}
@@ -1460,10 +1501,10 @@ public:
Type *CondTy = RetTy->getWithNewBitWidth(1);
Cost +=
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ CmpInst::ICMP_EQ, CostKind);
Cost +=
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ CmpInst::ICMP_EQ, CostKind);
}
return Cost;
}
@@ -1689,26 +1730,34 @@ public:
return thisT()->getMinMaxReductionCost(
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsUnsigned=*/true, CostKind);
- case Intrinsic::abs:
+ case Intrinsic::abs: {
+ // abs(X) = select(icmp(X,0),X,sub(0,X))
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+ InstructionCost Cost = 0;
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ Pred, CostKind);
+ // TODO: Should we add an OperandValueProperties::OP_Zero property?
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+ return Cost;
+ }
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin: {
- // abs(X) = select(icmp(X,0),X,sub(0,X))
// minmax(X,Y) = select(icmp(X,Y),X,Y)
Type *CondTy = RetTy->getWithNewBitWidth(1);
+ bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
+ CmpInst::Predicate Pred =
+ IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
InstructionCost Cost = 0;
- // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- // TODO: Should we add an OperandValueProperties::OP_Zero property?
- if (IID == Intrinsic::abs)
- Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ Pred, CostKind);
return Cost;
}
case Intrinsic::sadd_sat:
@@ -1719,6 +1768,7 @@ public:
Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
? Intrinsic::sadd_with_overflow
: Intrinsic::ssub_with_overflow;
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
// SatMax -> Overflow && SumDiff < 0
// SatMin -> Overflow && SumDiff >= 0
@@ -1726,12 +1776,10 @@ public:
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
nullptr, ScalarizationCostPassed);
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- Cost += 2 * thisT()->getCmpSelInstrCost(
- BinaryOperator::Select, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, Pred, CostKind);
return Cost;
}
case Intrinsic::uadd_sat:
@@ -1784,23 +1832,16 @@ public:
? BinaryOperator::Add
: BinaryOperator::Sub;
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
// Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Overflow -> (Result < LHS) ^ (RHS < 0)
// Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ // Overflow -> (Result < LHS) ^ (RHS > 0)
InstructionCost Cost = 0;
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
- Cost += 3 * thisT()->getCmpSelInstrCost(
- Instruction::ICmp, SumTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
Cost += 2 * thisT()->getCmpSelInstrCost(
- Instruction::Select, OverflowTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
+ Instruction::ICmp, SumTy, OverflowTy,
+ CmpInst::ICMP_SGT, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
CostKind);
return Cost;
}
@@ -1811,12 +1852,15 @@ public:
unsigned Opcode = IID == Intrinsic::uadd_with_overflow
? BinaryOperator::Add
: BinaryOperator::Sub;
+ CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
+ ? CmpInst::ICMP_ULT
+ : CmpInst::ICMP_UGT;
InstructionCost Cost = 0;
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
Cost +=
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Pred, CostKind);
return Cost;
}
case Intrinsic::smul_with_overflow:
@@ -1825,9 +1869,9 @@ public:
Type *OverflowTy = RetTy->getContainedType(1);
unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+ bool IsSigned = IID == Intrinsic::smul_with_overflow;
- unsigned ExtOp =
- IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+ unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
TTI::CastContextHint CCH = TTI::CastContextHint::None;
InstructionCost Cost = 0;
@@ -1836,18 +1880,17 @@ public:
thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
CCH, CostKind);
- Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
+ Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
- if (IID == Intrinsic::smul_with_overflow)
+ if (IsSigned)
Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(
+ BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
return Cost;
}
case Intrinsic::ctpop:
@@ -1974,16 +2017,16 @@ public:
/// \param RetTy Return value types.
/// \param Tys Argument types.
/// \returns The cost of Call instruction.
- InstructionCost
- getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+ InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) {
return 10;
}
unsigned getNumberOfParts(Type *Tp) {
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, Tp);
- return *LT.first.getValue();
+ return LT.first.isValid() ? *LT.first.getValue() : 0;
}
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
@@ -2060,7 +2103,8 @@ public:
// By default reductions need one shuffle per reduction level.
ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
- ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
+ ArithCost +=
+ NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
return ShuffleCost + ArithCost +
thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
new file mode 100644
index 000000000000..270f935b6738
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -0,0 +1,219 @@
+//===- CodeGenCommonISel.h - Common code between ISels ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common utilities that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CODEGENCOMMONISEL_H
+#define LLVM_CODEGEN_CODEGENCOMMONISEL_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <cassert>
+namespace llvm {
+
+class BasicBlock;
+class MachineBasicBlock;
+/// Encapsulates all of the information needed to generate a stack protector
+/// check, and signals to isel when initialized that one needs to be generated.
+///
+/// *NOTE* The following is a high level documentation of SelectionDAG Stack
+/// Protector Generation. This is now also ported be shared with GlobalISel,
+/// but without any significant changes.
+///
+/// High Level Overview of ISel Stack Protector Generation:
+///
+/// Previously, the "stack protector" IR pass handled stack protector
+/// generation. This necessitated splitting basic blocks at the IR level to
+/// create the success/failure basic blocks in the tail of the basic block in
+/// question. As a result of this, calls that would have qualified for the
+/// sibling call optimization were no longer eligible for optimization since
+/// said calls were no longer right in the "tail position" (i.e. the immediate
+/// predecessor of a ReturnInst instruction).
+///
+/// Since the sibling call optimization causes the callee to reuse the caller's
+/// stack, if we could delay the generation of the stack protector check until
+/// later in CodeGen after the sibling call decision was made, we get both the
+/// tail call optimization and the stack protector check!
+///
+/// A few goals in solving this problem were:
+///
+/// 1. Preserve the architecture independence of stack protector generation.
+///
+/// 2. Preserve the normal IR level stack protector check for platforms like
+/// OpenBSD for which we support platform-specific stack protector
+/// generation.
+///
+/// The main problem that guided the present solution is that one can not
+/// solve this problem in an architecture independent manner at the IR level
+/// only. This is because:
+///
+/// 1. The decision on whether or not to perform a sibling call on certain
+/// platforms (for instance i386) requires lower level information
+/// related to available registers that can not be known at the IR level.
+///
+/// 2. Even if the previous point were not true, the decision on whether to
+/// perform a tail call is done in LowerCallTo in SelectionDAG (or
+/// CallLowering in GlobalISel) which occurs after the Stack Protector
+/// Pass. As a result, one would need to put the relevant callinst into the
+/// stack protector check success basic block (where the return inst is
+/// placed) and then move it back later at ISel/MI time before the
+/// stack protector check if the tail call optimization failed. The MI
+/// level option was nixed immediately since it would require
+/// platform-specific pattern matching. The ISel level option was
+/// nixed because SelectionDAG only processes one IR level basic block at a
+/// time implying one could not create a DAG Combine to move the callinst.
+///
+/// To get around this problem:
+///
+/// 1. SelectionDAG can only process one block at a time, we can generate
+/// multiple machine basic blocks for one IR level basic block.
+/// This is how we handle bit tests and switches.
+///
+/// 2. At the MI level, tail calls are represented via a special return
+/// MIInst called "tcreturn". Thus if we know the basic block in which we
+/// wish to insert the stack protector check, we get the correct behavior
+/// by always inserting the stack protector check right before the return
+/// statement. This is a "magical transformation" since no matter where
+/// the stack protector check intrinsic is, we always insert the stack
+/// protector check code at the end of the BB.
+///
+/// Given the aforementioned constraints, the following solution was devised:
+///
+/// 1. On platforms that do not support ISel stack protector check
+/// generation, allow for the normal IR level stack protector check
+/// generation to continue.
+///
+/// 2. On platforms that do support ISel stack protector check
+/// generation:
+///
+/// a. Use the IR level stack protector pass to decide if a stack
+/// protector is required/which BB we insert the stack protector check
+/// in by reusing the logic already therein.
+///
+/// b. After we finish selecting the basic block, we produce the validation
+/// code with one of these techniques:
+/// 1) with a call to a guard check function
+/// 2) with inlined instrumentation
+///
+/// 1) We insert a call to the check function before the terminator.
+///
+/// 2) We first find a splice point in the parent basic block
+/// before the terminator and then splice the terminator of said basic
+/// block into the success basic block. Then we code-gen a new tail for
+/// the parent basic block consisting of the two loads, the comparison,
+/// and finally two branches to the success/failure basic blocks. We
+/// conclude by code-gening the failure basic block if we have not
+/// code-gened it already (all stack protector checks we generate in
+/// the same function, use the same failure basic block).
+class StackProtectorDescriptor {
+public:
+ StackProtectorDescriptor() = default;
+
+ /// Returns true if all fields of the stack protector descriptor are
+ /// initialized implying that we should/are ready to emit a stack protector.
+ bool shouldEmitStackProtector() const {
+ return ParentMBB && SuccessMBB && FailureMBB;
+ }
+
+ bool shouldEmitFunctionBasedCheckStackProtector() const {
+ return ParentMBB && !SuccessMBB && !FailureMBB;
+ }
+
+ /// Initialize the stack protector descriptor structure for a new basic
+ /// block.
+ void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+ bool FunctionBasedInstrumentation) {
+ // Make sure we are not initialized yet.
+ assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+ "already initialized!");
+ ParentMBB = MBB;
+ if (!FunctionBasedInstrumentation) {
+ SuccessMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+ }
+ }
+
+ /// Reset state that changes when we handle different basic blocks.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. The specific basic block we are generating a
+ /// stack protector for (ParentMBB).
+ ///
+ /// 2. The successor machine basic block that will contain the tail of
+ /// parent mbb after we create the stack protector check (SuccessMBB). This
+ /// BB is visited only on stack protector check success.
+ void resetPerBBState() {
+ ParentMBB = nullptr;
+ SuccessMBB = nullptr;
+ }
+
+ /// Reset state that only changes when we switch functions.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. FailureMBB since we reuse the failure code path for all stack
+ /// protector checks created in an individual function.
+ ///
+ /// 2.The guard variable since the guard variable we are checking against is
+ /// always the same.
+ void resetPerFunctionState() { FailureMBB = nullptr; }
+
+ MachineBasicBlock *getParentMBB() { return ParentMBB; }
+ MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+ MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+
+private:
+ /// The basic block for which we are generating the stack protector.
+ ///
+ /// As a result of stack protector generation, we will splice the
+ /// terminators of this basic block into the successor mbb SuccessMBB and
+ /// replace it with a compare/branch to the successor mbbs
+ /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+ /// was violated.
+ MachineBasicBlock *ParentMBB = nullptr;
+
+ /// A basic block visited on stack protector check success that contains the
+ /// terminators of ParentMBB.
+ MachineBasicBlock *SuccessMBB = nullptr;
+
+ /// This basic block visited on stack protector check failure that will
+ /// contain a call to __stack_chk_fail().
+ MachineBasicBlock *FailureMBB = nullptr;
+
+ /// Add a successor machine basic block to ParentMBB. If the successor mbb
+ /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+ /// block will be created. Assign a large weight if IsLikely is true.
+ MachineBasicBlock *addSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ bool IsLikely,
+ MachineBasicBlock *SuccMBB = nullptr);
+};
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+findSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII);
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CODEGENCOMMONISEL_H
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index 5a4351756297..ed3cd54df272 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -48,7 +48,6 @@ Optional<CodeModel::Model> getExplicitCodeModel();
llvm::ExceptionHandling getExceptionModel();
-CodeGenFileType getFileType();
Optional<CodeGenFileType> getExplicitFileType();
CodeGenFileType getFileType();
@@ -74,6 +73,8 @@ llvm::FloatABI::ABIType getFloatABIForCalls();
llvm::FPOpFusion::FPOpFusionMode getFuseFPOps();
+SwiftAsyncFramePointerMode getSwiftAsyncFramePointer();
+
bool getDontPlaceZerosInBSS();
bool getEnableGuaranteedTailCallOpt();
@@ -128,8 +129,6 @@ bool getEnableMachineFunctionSplitter();
bool getEnableDebugEntryValues();
-bool getPseudoProbeForProfiling();
-
bool getValueTrackingVariableLocations();
bool getForceDwarfFrameSection();
@@ -138,6 +137,8 @@ bool getXRayOmitFunctionIndex();
bool getDebugStrictDwarf();
+unsigned getAlignLoops();
+
/// Create this object with static storage to register codegen-related command
/// line options.
struct RegisterCodeGenFlags {
diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index b6bde0249f88..524730d53694 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -17,7 +17,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 6bdaddd9c6f5..9c878d4b087b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -116,6 +116,9 @@ public:
/// vreg that the swifterror should be copied into after the call.
Register SwiftErrorVReg;
+ /// Original IR callsite corresponding to this call, if available.
+ const CallBase *CB = nullptr;
+
MDNode *KnownCallees = nullptr;
/// True if the call must be tail call optimized.
@@ -259,7 +262,7 @@ public:
/// handle the appropriate COPY (either to or from) and mark any
/// relevant uses/defines as needed.
virtual void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) = 0;
+ CCValAssign VA) = 0;
/// The specified value has been assigned to a stack
/// location. Load or store it there, with appropriate extension
@@ -279,11 +282,14 @@ public:
}
/// Handle custom values, which may be passed into one or more of \p VAs.
+ /// \p If the handler wants the assignments to be delayed until after
+ /// mem loc assignments, then it sets \p Thunk to the thunk to do the
+ /// assignment.
/// \return The number of \p VAs that have been assigned after the first
/// one, and which should therefore be skipped from further
/// processing.
- virtual unsigned assignCustomValue(ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ virtual unsigned assignCustomValue(ArgInfo &Arg, ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk = nullptr) {
// This is not a pure virtual method because not all targets need to worry
// about custom values.
llvm_unreachable("Custom values not supported");
@@ -315,7 +321,7 @@ public:
/// Provides a default implementation for argument handling.
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
};
/// Base class for ValueHandlers used for arguments passed to a function call,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 56459b68dce0..ff4ad4b72636 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -36,7 +36,10 @@ class GISelKnownBits;
class MachineDominatorTree;
class LegalizerInfo;
struct LegalityQuery;
+class RegisterBank;
+class RegisterBankInfo;
class TargetLowering;
+class TargetRegisterInfo;
struct PreferredTuple {
LLT Ty; // The result type of the extend.
@@ -54,6 +57,7 @@ struct IndexedLoadStoreMatchInfo {
struct PtrAddChain {
int64_t Imm;
Register Base;
+ const RegisterBank *Bank;
};
struct RegisterImmPair {
@@ -68,6 +72,16 @@ struct ShiftOfShiftedLogic {
uint64_t ValSum;
};
+using BuildFnTy = std::function<void(MachineIRBuilder &)>;
+
+struct MergeTruncStoresInfo {
+ SmallVector<GStore *> FoundStores;
+ GStore *LowestIdxStore = nullptr;
+ Register WideSrcVal;
+ bool NeedBSwap = false;
+ bool NeedRotate = false;
+};
+
using OperandBuildSteps =
SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
struct InstructionBuildSteps {
@@ -95,6 +109,8 @@ protected:
GISelKnownBits *KB;
MachineDominatorTree *MDT;
const LegalizerInfo *LI;
+ const RegisterBankInfo *RBI;
+ const TargetRegisterInfo *TRI;
public:
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
@@ -120,6 +136,22 @@ public:
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
Register ToReg) const;
+ /// Replace the opcode in instruction with a new opcode and inform the
+ /// observer of the changes.
+ void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const;
+
+ /// Get the register bank of \p Reg.
+ /// If Reg has not been assigned a register, a register class,
+ /// or a register bank, then this returns nullptr.
+ ///
+ /// \pre Reg.isValid()
+ const RegisterBank *getRegBank(Register Reg) const;
+
+ /// Set the register bank of \p Reg.
+ /// Does nothing if the RegBank is null.
+ /// This is the counterpart to getRegBank.
+ void setRegBank(Register Reg, const RegisterBank *RegBank);
+
/// If \p MI is COPY, try to combine it.
/// Returns true if MI changed.
bool tryCombineCopy(MachineInstr &MI);
@@ -144,6 +176,9 @@ public:
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+ /// Match (and (load x), mask) -> zextload x
+ bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Combine \p MI into a pre-indexed or post-indexed load/store operation if
/// legal and the surrounding code makes it useful.
bool tryCombineIndexedLoadStore(MachineInstr &MI);
@@ -341,6 +376,9 @@ public:
bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
void applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+ /// Transform fabs(fneg(x)) to fabs(x).
+ bool matchCombineFAbsOfFNeg(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool matchCombineTruncOfExt(MachineInstr &MI,
std::pair<Register, unsigned> &MatchInfo);
@@ -445,7 +483,7 @@ public:
/// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
bool matchOverlappingAnd(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ BuildFnTy &MatchInfo);
/// \return true if \p MI is a G_AND instruction whose operands are x and y
/// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
@@ -501,8 +539,10 @@ public:
///
/// And check if the tree can be replaced with a M-bit load + possibly a
/// bswap.
- bool matchLoadOrCombine(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
+ void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
@@ -519,12 +559,10 @@ public:
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction \p MI from the function.
- void applyBuildFn(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// This variant does not erase \p MI after calling the build function.
- void applyBuildFnNoErase(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
@@ -535,21 +573,57 @@ public:
/// or false constant based off of KnownBits information.
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
- bool matchBitfieldExtractFromSExtInReg(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
- /// Match: and (lshr x, cst), mask -> ubfx x, cst, width
- bool matchBitfieldExtractFromAnd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of
+ /// KnownBits information.
+ bool
+ matchICmpToLHSKnownBits(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2)
+ bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+ /// Match: and (lshr x, cst), mask -> ubfx x, cst, width
+ bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
+ bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Match: shr (and x, n), k -> ubfx x, pos, width
+ bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ // Helpers for reassociation:
+ bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS,
+ BuildFnTy &MatchInfo);
+ bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo);
+ bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS,
+ MachineInstr *RHS, BuildFnTy &MatchInfo);
/// Reassociate pointer calculations with G_ADD involved, to allow better
/// addressing mode usage.
- bool matchReassocPtrAdd(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
-
+ bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);
+ /// \returns true if it is possible to narrow the width of a scalar binop
+ /// feeding a G_AND instruction \p MI.
+ bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Given an G_UDIV \p MI expressing a divide by constant, return an
+ /// expression that implements it by multiplying by a magic number.
+ /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+ MachineInstr *buildUDivUsingMul(MachineInstr &MI);
+ /// Combine G_UDIV by constant into a multiply by magic constant.
+ bool matchUDivByConst(MachineInstr &MI);
+ void applyUDivByConst(MachineInstr &MI);
+
+ // G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
+ bool matchUMulHToLShr(MachineInstr &MI);
+ void applyUMulHToLShr(MachineInstr &MI);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
@@ -560,20 +634,21 @@ public:
/// and rename: s/bool tryEmit/void emit/
bool tryEmitMemcpyInline(MachineInstr &MI);
-private:
- // Memcpy family optimization helpers.
- bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, Align DstAlign, Align SrcAlign,
- bool IsVolatile);
- bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile);
- bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, Align DstAlign, Align SrcAlign,
- bool IsVolatile);
- bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
- uint64_t KnownLen, Align DstAlign, bool IsVolatile);
+ /// Match:
+ /// (G_UMULO x, 2) -> (G_UADDO x, x)
+ /// (G_SMULO x, 2) -> (G_SADDO x, x)
+ bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Transform (fadd x, fneg(y)) -> (fsub x, y)
+ /// (fadd fneg(x), y) -> (fsub y, x)
+ /// (fsub x, fneg(y)) -> (fadd x, y)
+ /// (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ /// (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ /// (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ /// (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 1162134b2ad2..7103656365b1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -57,9 +57,9 @@ public:
bool isUnordered() const { return getMMO().isUnordered(); }
/// Returns the size in bytes of the memory access.
- uint64_t getMemSize() { return getMMO().getSize();
+ uint64_t getMemSize() const { return getMMO().getSize();
} /// Returns the size in bits of the memory access.
- uint64_t getMemSizeInBits() { return getMMO().getSizeInBits(); }
+ uint64_t getMemSizeInBits() const { return getMMO().getSizeInBits(); }
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
@@ -195,6 +195,37 @@ public:
}
};
+/// Represents a G_PTR_ADD.
+class GPtrAdd : public GenericMachineInstr {
+public:
+ Register getBaseReg() const { return getReg(1); }
+ Register getOffsetReg() const { return getReg(2); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_PTR_ADD;
+ }
+};
+
+/// Represents a G_IMPLICIT_DEF.
+class GImplicitDef : public GenericMachineInstr {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+ }
+};
+
+/// Represents a G_SELECT.
+class GSelect : public GenericMachineInstr {
+public:
+ Register getCondReg() const { return getReg(1); }
+ Register getTrueReg() const { return getReg(2); }
+ Register getFalseReg() const { return getReg(3); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SELECT;
+ }
+};
+
} // namespace llvm
-#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H \ No newline at end of file
+#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 8eab8a5846a7..ebe16cd4f58c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -466,9 +467,8 @@ private:
bool translateSIToFP(const User &U, MachineIRBuilder &MIRBuilder) {
return translateCast(TargetOpcode::G_SITOFP, U, MIRBuilder);
}
- bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
- return true;
- }
+ bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder);
+
bool translateSExt(const User &U, MachineIRBuilder &MIRBuilder) {
return translateCast(TargetOpcode::G_SEXT, U, MIRBuilder);
}
@@ -586,6 +586,8 @@ private:
/// stop translating such blocks early.
bool HasTailCall = false;
+ StackProtectorDescriptor SPDescriptor;
+
/// Switch analysis and optimization.
class GISelSwitchLowering : public SwitchCG::SwitchLowering {
public:
@@ -614,8 +616,34 @@ private:
// * Clear the different maps.
void finalizeFunction();
- // Handle emitting jump tables for each basic block.
- void finalizeBasicBlock();
+ // Processing steps done per block. E.g. emitting jump tables, stack
+ // protectors etc. Returns true if no errors, false if there was a problem
+ // that caused an abort.
+ bool finalizeBasicBlock(const BasicBlock &BB, MachineBasicBlock &MBB);
+
+ /// Codegen a new tail for a stack protector check ParentMBB which has had its
+ /// tail spliced into a stack protector check success bb.
+ ///
+ /// For a high level explanation of how this fits into the stack protector
+ /// generation see the comment on the declaration of class
+ /// StackProtectorDescriptor.
+ ///
+ /// \return true if there were no problems.
+ bool emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+
+ /// Codegen the failure basic block for a stack protector check.
+ ///
+ /// A failure stack protector machine basic block consists simply of a call to
+ /// __stack_chk_fail().
+ ///
+ /// For a high level explanation of how this fits into the stack protector
+ /// generation see the comment on the declaration of class
+ /// StackProtectorDescriptor.
+ ///
+ /// \return true if there were no problems.
+ bool emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *FailureBB);
/// Get the VRegs that represent \p Val.
/// Non-aggregate types have just one corresponding VReg and the list can be
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
index b1f2103da309..f6704df3f49d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
@@ -478,4 +478,4 @@ private:
} // end namespace llvm
-#endif // define LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
+#endif // LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 44a48927d35a..8a603de2f91d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -22,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "legalizer"
@@ -52,7 +54,8 @@ public:
bool tryCombineAnyExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts,
- SmallVectorImpl<Register> &UpdatedDefs) {
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelObserverWrapper &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
Builder.setInstrAndDebugLoc(MI);
@@ -63,7 +66,11 @@ public:
Register TruncSrc;
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
- Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+ if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
+ replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
+ Observer);
+ else
+ Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
@@ -120,12 +127,14 @@ public:
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
+ APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
auto Mask = Builder.buildConstant(
DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
- auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) :
- Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
- Builder.buildAnd(DstReg, Extended, Mask);
+ if (SextSrc && (DstTy != MRI.getType(SextSrc)))
+ SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
+ if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
+ TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
+ Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -176,9 +185,9 @@ public:
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
- Builder.buildInstr(
- TargetOpcode::G_SEXT_INREG, {DstReg},
- {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits});
+ if (DstTy != MRI.getType(TruncSrc))
+ TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
+ Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -544,12 +553,14 @@ public:
MachineIRBuilder &MIB;
const LegalizerInfo &LI;
- private:
+ // Stores the best register found in the current query so far.
+ Register CurrentBest = Register();
+
/// Given an concat_vector op \p Concat and a start bit and size, try to
/// find the origin of the value defined by that start position and size.
///
- /// \returns A register if a value can be found, otherwise an empty
- /// Register.
+ /// \returns a register with the requested size, or the current best
+ /// register found during the current query.
Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
unsigned Size) {
assert(Size > 0);
@@ -566,22 +577,22 @@ public:
// FIXME: we might be able return multiple sources? Or create an
// appropriate concat to make it fit.
if (InRegOffset + Size > SrcSize)
- return Register();
+ return CurrentBest;
- // If the bits exactly cover a single source, then return the operand as
- // our value reg.
Register SrcReg = Concat.getReg(StartSrcIdx);
- if (InRegOffset == 0 && Size == SrcSize)
- return SrcReg; // A source operand matches exactly.
+ if (InRegOffset == 0 && Size == SrcSize) {
+ CurrentBest = SrcReg;
+ return findValueFromDefImpl(SrcReg, 0, Size);
+ }
- return findValueFromDef(SrcReg, InRegOffset, Size);
+ return findValueFromDefImpl(SrcReg, InRegOffset, Size);
}
/// Given an build_vector op \p BV and a start bit and size, try to find
/// the origin of the value defined by that start position and size.
///
- /// \returns A register if a value can be found, otherwise an empty
- /// Register.
+ /// \returns a register with the requested size, or the current best
+ /// register found during the current query.
Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
unsigned Size) {
assert(Size > 0);
@@ -596,17 +607,21 @@ public:
unsigned InRegOffset = StartBit % SrcSize;
if (InRegOffset != 0)
- return Register(); // Give up, bits don't start at a scalar source.
+ return CurrentBest; // Give up, bits don't start at a scalar source.
if (Size < SrcSize)
- return Register(); // Scalar source is too large for requested bits.
+ return CurrentBest; // Scalar source is too large for requested bits.
// If the bits cover multiple sources evenly, then create a new
// build_vector to synthesize the required size, if that's been requested.
if (Size > SrcSize) {
if (Size % SrcSize > 0)
- return Register(); // Isn't covered exactly by sources.
+ return CurrentBest; // Isn't covered exactly by sources.
unsigned NumSrcsUsed = Size / SrcSize;
+ // If we're requesting all of the sources, just return this def.
+ if (NumSrcsUsed == BV.getNumSources())
+ return BV.getReg(0);
+
LLT SrcTy = MRI.getType(Src1Reg);
LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
@@ -614,7 +629,7 @@ public:
LegalizeActionStep ActionStep =
LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
if (ActionStep.Action != LegalizeActions::Legal)
- return Register();
+ return CurrentBest;
SmallVector<Register> NewSrcs;
for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
@@ -630,8 +645,8 @@ public:
/// Given an G_INSERT op \p MI and a start bit and size, try to find
/// the origin of the value defined by that start position and size.
///
- /// \returns A register if a value can be found, otherwise an empty
- /// Register.
+ /// \returns a register with the requested size, or the current best
+ /// register found during the current query.
Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
unsigned Size) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT);
@@ -685,28 +700,25 @@ public:
if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
SrcRegToUse = ContainerSrcReg;
NewStartBit = StartBit;
- return findValueFromDef(SrcRegToUse, NewStartBit, Size);
+ return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
}
if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
SrcRegToUse = InsertedReg;
NewStartBit = StartBit - InsertOffset;
- return findValueFromDef(SrcRegToUse, NewStartBit, Size);
+ if (NewStartBit == 0 &&
+ Size == MRI.getType(SrcRegToUse).getSizeInBits())
+ CurrentBest = SrcRegToUse;
+ return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
}
// The bit range spans both the inserted and container regions.
return Register();
}
- public:
- ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
- const LegalizerInfo &Info)
- : MRI(Mri), MIB(Builder), LI(Info) {}
-
- /// Try to find a source of the value defined in the def \p DefReg, starting
- /// at position \p StartBit with size \p Size.
- /// \returns an empty Register if no value could be found, or \p DefReg if
- /// if that was the best we could do.
- Register findValueFromDef(Register DefReg, unsigned StartBit,
- unsigned Size) {
+ /// Internal implementation for findValueFromDef(). findValueFromDef()
+ /// initializes some data like the CurrentBest register, which this method
+ /// and its callees rely upon.
+ Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
+ unsigned Size) {
MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI);
// If the instruction has a single def, then simply delegate the search.
// For unmerge however with multiple defs, we need to compute the offset
@@ -724,7 +736,7 @@ public:
}
Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
Register SrcOriginReg =
- findValueFromDef(SrcReg, StartBit + DefStartBit, Size);
+ findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
if (SrcOriginReg)
return SrcOriginReg;
// Failed to find a further value. If the StartBit and Size perfectly
@@ -732,7 +744,7 @@ public:
// nothing.
if (StartBit == 0 && Size == DefSize)
return DefReg;
- return Register();
+ return CurrentBest;
}
case TargetOpcode::G_BUILD_VECTOR:
return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
@@ -740,41 +752,48 @@ public:
case TargetOpcode::G_INSERT:
return findValueFromInsert(*Def, StartBit, Size);
default:
- return Register();
+ return CurrentBest;
}
}
- };
- bool tryCombineUnmergeValues(GUnmerge &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts,
- SmallVectorImpl<Register> &UpdatedDefs,
- GISelChangeObserver &Observer) {
- unsigned NumDefs = MI.getNumDefs();
- Register SrcReg = MI.getSourceReg();
- MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
- if (!SrcDef)
- return false;
-
- LLT OpTy = MRI.getType(SrcReg);
- LLT DestTy = MRI.getType(MI.getReg(0));
- unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+ public:
+ ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
+ const LegalizerInfo &Info)
+ : MRI(Mri), MIB(Builder), LI(Info) {}
- Builder.setInstrAndDebugLoc(MI);
+ /// Try to find a source of the value defined in the def \p DefReg, starting
+ /// at position \p StartBit with size \p Size.
+ /// \returns a register with the requested size, or an empty Register if no
+ /// better value could be found.
+ Register findValueFromDef(Register DefReg, unsigned StartBit,
+ unsigned Size) {
+ CurrentBest = Register();
+ Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
+ return FoundReg != DefReg ? FoundReg : Register();
+ }
- auto tryCombineViaValueFinder = [&]() {
- ArtifactValueFinder ValueFinder(MRI, Builder, LI);
+ /// Try to combine the defs of an unmerge \p MI by attempting to find
+ /// values that provides the bits for each def reg.
+ /// \returns true if all the defs of the unmerge have been made dead.
+ bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
+ SmallVectorImpl<Register> &UpdatedDefs) {
+ unsigned NumDefs = MI.getNumDefs();
+ LLT DestTy = MRI.getType(MI.getReg(0));
SmallBitVector DeadDefs(NumDefs);
for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
Register DefReg = MI.getReg(DefIdx);
- Register FoundVal =
- ValueFinder.findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
- if (!FoundVal || FoundVal == DefReg)
+ if (MRI.use_nodbg_empty(DefReg)) {
+ DeadDefs[DefIdx] = true;
+ continue;
+ }
+ Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
+ if (!FoundVal)
continue;
if (MRI.getType(FoundVal) != DestTy)
continue;
- replaceRegOrBuildCopy(DefReg, FoundVal, MRI, Builder, UpdatedDefs,
+ replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
Observer);
// We only want to replace the uses, not the def of the old reg.
Observer.changingInstr(MI);
@@ -782,12 +801,31 @@ public:
Observer.changedInstr(MI);
DeadDefs[DefIdx] = true;
}
- if (DeadDefs.all()) {
- markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
- return true;
- }
+ return DeadDefs.all();
+ }
+ };
+
+ bool tryCombineUnmergeValues(GUnmerge &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelChangeObserver &Observer) {
+ unsigned NumDefs = MI.getNumDefs();
+ Register SrcReg = MI.getSourceReg();
+ MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
+ if (!SrcDef)
return false;
- };
+
+ LLT OpTy = MRI.getType(SrcReg);
+ LLT DestTy = MRI.getType(MI.getReg(0));
+ unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ ArtifactValueFinder Finder(MRI, Builder, LI);
+ if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
+ markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
+ return true;
+ }
if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
// %0:_(<4 x s16>) = G_FOO
@@ -813,7 +851,7 @@ public:
return false;
break;
default:
- return tryCombineViaValueFinder();
+ return false;
}
auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
@@ -845,11 +883,7 @@ public:
ConvertOp, OpTy, DestTy)) {
// We might have a chance to combine later by trying to combine
// unmerge(cast) first
- if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs))
- return true;
-
- // Try using the value finder.
- return tryCombineViaValueFinder();
+ return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
}
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
@@ -1042,7 +1076,7 @@ public:
default:
return false;
case TargetOpcode::G_ANYEXT:
- Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
+ Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
break;
case TargetOpcode::G_ZEXT:
Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 67141f3a6326..74615c73741a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -256,6 +256,20 @@ private:
LLT SrcTy, LLT NarrowTy,
unsigned ScalarOpc);
+ // Memcpy family legalization helpers.
+ LegalizeResult lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+ uint64_t KnownLen, Align Alignment,
+ bool IsVolatile);
+ LegalizeResult lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign,
+ Align SrcAlign, bool IsVolatile);
+ LegalizeResult lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile);
+ LegalizeResult lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile);
+
public:
/// Return the alignment to use for a stack temporary object with the given
/// type.
@@ -402,6 +416,9 @@ public:
LegalizeResult lowerDIVREM(MachineInstr &MI);
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
+ LegalizeResult lowerVectorReduction(MachineInstr &MI);
+ LegalizeResult lowerMemcpyInline(MachineInstr &MI);
+ LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
};
/// Helper function that creates a libcall to the given \p Name using the given
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 4fdfabbfb161..68c14240ebc7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -15,8 +15,6 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -113,6 +111,14 @@ struct LegalityQuery {
LLT MemoryTy;
uint64_t AlignInBits;
AtomicOrdering Ordering;
+
+ MemDesc() = default;
+ MemDesc(LLT MemoryTy, uint64_t AlignInBits, AtomicOrdering Ordering)
+ : MemoryTy(MemoryTy), AlignInBits(AlignInBits), Ordering(Ordering) {}
+ MemDesc(const MachineMemOperand &MMO)
+ : MemoryTy(MMO.getMemoryType()),
+ AlignInBits(MMO.getAlign().value() * 8),
+ Ordering(MMO.getSuccessOrdering()) {}
};
/// Operations which require memory can use this to place requirements on the
@@ -293,6 +299,10 @@ LegalityPredicate scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size);
/// type that's wider than the given size.
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size);
+/// True iff the specified type index is a scalar whose size is not a multiple
+/// of Size.
+LegalityPredicate sizeNotMultipleOf(unsigned TypeIdx, unsigned Size);
+
/// True iff the specified type index is a scalar whose size is not a power of
/// 2.
LegalityPredicate sizeNotPow2(unsigned TypeIdx);
@@ -348,6 +358,11 @@ LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx);
/// next power of 2.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+/// Widen the scalar type or vector element type for the given type index to
+/// next multiple of \p Size.
+LegalizeMutation widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size);
+
/// Add more elements to the type for the given type index to the next power of
/// 2.
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0);
@@ -828,6 +843,16 @@ public:
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
}
+ /// Widen the scalar to the next multiple of Size. No effect if the
+ /// type is not a scalar or is a multiple of Size.
+ LegalizeRuleSet &widenScalarToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ using namespace LegalityPredicates;
+ return actionIf(
+ LegalizeAction::WidenScalar, sizeNotMultipleOf(typeIdx(TypeIdx), Size),
+ LegalizeMutations::widenScalarOrEltToNextMultipleOf(TypeIdx, Size));
+ }
+
/// Widen the scalar or vector element type to the next power of two that is
/// at least MinSize. No effect if the scalar size is a power of two.
LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
new file mode 100644
index 000000000000..29575f386d7a
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -0,0 +1,165 @@
+//== llvm/CodeGen/GlobalISel/LoadStoreOpt.h - LoadStoreOpt -------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// This is an optimization pass for GlobalISel generic memory operations.
+/// Specifically, it focuses on merging stores and loads to consecutive
+/// addresses.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
+#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+
+namespace llvm {
+// Forward declarations.
+class MachineRegisterInfo;
+class TargetTransformInfo;
+namespace GISelAddressing {
+/// Helper struct to store a base, index and offset that forms an address
+struct BaseIndexOffset {
+ Register BaseReg;
+ Register IndexReg;
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+};
+
+/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
+BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI);
+
+/// Compute whether or not a memory access at \p MI1 aliases with an access at
+/// \p MI2 \returns true if either alias/no-alias is known. Sets \p IsAlias
+/// accordingly.
+bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2,
+ bool &IsAlias, MachineRegisterInfo &MRI);
+
+/// Returns true if the instruction \p MI may alias \p Other.
+/// This function uses multiple strategies to detect aliasing, whereas
+/// aliasIsKnownForLoadStore just looks at the addresses of load/stores and is
+/// tries to reason about base/index/offsets.
+bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other,
+ MachineRegisterInfo &MRI, AliasAnalysis *AA);
+} // namespace GISelAddressing
+
+using namespace GISelAddressing;
+
+class LoadStoreOpt : public MachineFunctionPass {
+public:
+ static char ID;
+
+private:
+ /// An input function to decide if the pass should run or not
+ /// on the given MachineFunction.
+ std::function<bool(const MachineFunction &)> DoNotRunPass;
+
+ MachineRegisterInfo *MRI;
+ const TargetLowering *TLI;
+ MachineFunction *MF;
+ AliasAnalysis *AA;
+ const LegalizerInfo *LI;
+
+ MachineIRBuilder Builder;
+
+ /// Initialize the field members using \p MF.
+ void init(MachineFunction &MF);
+
+ class StoreMergeCandidate {
+ public:
+ // The base pointer used as the base for all stores in this candidate.
+ Register BasePtr;
+ // Our algorithm is very simple at the moment. We assume that in instruction
+ // order stores are writing to incremeneting consecutive addresses. So when
+ // we walk the block in reverse order, the next eligible store must write to
+ // an offset one store width lower than CurrentLowestOffset.
+ uint64_t CurrentLowestOffset;
+ SmallVector<GStore *> Stores;
+ // A vector of MachineInstr/unsigned pairs to denote potential aliases that
+ // need to be checked before the candidate is considered safe to merge. The
+ // unsigned value is an index into the Stores vector. The indexed store is
+ // the highest-indexed store that has already been checked to not have an
+ // alias with the instruction. We record this so we don't have to repeat
+ // alias checks that have been already done, only those with stores added
+ // after the potential alias is recorded.
+ SmallVector<std::pair<MachineInstr *, unsigned>> PotentialAliases;
+
+ void addPotentialAlias(MachineInstr &MI);
+
+ /// Reset this candidate back to an empty one.
+ void reset() {
+ Stores.clear();
+ PotentialAliases.clear();
+ CurrentLowestOffset = 0;
+ BasePtr = Register();
+ }
+ };
+
+ bool isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+ MachineFunction &MF) const;
+ /// If the given store is valid to be a member of the candidate, add it and
+ /// return true. Otherwise, returns false.
+ bool addStoreToCandidate(GStore &MI, StoreMergeCandidate &C);
+ /// Returns true if the instruction \p MI would potentially alias with any
+ /// stores in the candidate \p C.
+ bool operationAliasesWithCandidate(MachineInstr &MI, StoreMergeCandidate &C);
+ /// Merges the stores in the given vector into a wide store.
+ /// \p returns true if at least some of the stores were merged.
+ /// This may decide not to merge stores if heuristics predict it will not be
+ /// worth it.
+ bool mergeStores(SmallVectorImpl<GStore *> &StoresToMerge);
+ /// Perform a merge of all the stores in \p Stores into a single store.
+ /// Erases the old stores from the block when finished.
+ /// \returns true if merging was done. It may fail to perform a merge if
+ /// there are issues with materializing legal wide values.
+ bool doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores);
+ bool processMergeCandidate(StoreMergeCandidate &C);
+ bool mergeBlockStores(MachineBasicBlock &MBB);
+ bool mergeFunctionStores(MachineFunction &MF);
+
+ /// Initialize some target-specific data structures for the store merging
+ /// optimization. \p AddrSpace indicates which address space to use when
+ /// probing the legalizer info for legal stores.
+ void initializeStoreMergeTargetInfo(unsigned AddrSpace = 0);
+ /// A map between address space numbers and a bitvector of supported stores
+ /// sizes. Each bit in the bitvector represents whether a store size of
+ /// that bit's value is legal. E.g. if bit 64 is set, then 64 bit scalar
+ /// stores are legal.
+ DenseMap<unsigned, BitVector> LegalStoreSizes;
+ bool IsPreLegalizer;
+ /// Contains instructions to be erased at the end of a block scan.
+ SmallSet<MachineInstr *, 16> InstsToErase;
+
+public:
+ LoadStoreOpt();
+ LoadStoreOpt(std::function<bool(const MachineFunction &)>);
+
+ StringRef getPassName() const override { return "LoadStoreOpt"; }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End namespace llvm.
+
+#endif
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 4c6b47ab9bc8..e813d030eec3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -63,7 +63,7 @@ struct ConstantMatch {
int64_t &CR;
ConstantMatch(int64_t &C) : CR(C) {}
bool match(const MachineRegisterInfo &MRI, Register Reg) {
- if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) {
+ if (auto MaybeCst = getIConstantVRegSExtVal(Reg, MRI)) {
CR = *MaybeCst;
return true;
}
@@ -73,21 +73,46 @@ struct ConstantMatch {
inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
-struct ICstRegMatch {
- Register &CR;
- ICstRegMatch(Register &C) : CR(C) {}
+struct GCstAndRegMatch {
+ Optional<ValueAndVReg> &ValReg;
+ GCstAndRegMatch(Optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {}
bool match(const MachineRegisterInfo &MRI, Register Reg) {
- if (auto MaybeCst = getConstantVRegValWithLookThrough(
- Reg, MRI, /*LookThroughInstrs*/ true,
- /*HandleFConstants*/ false)) {
- CR = MaybeCst->VReg;
- return true;
- }
- return false;
+ ValReg = getIConstantVRegValWithLookThrough(Reg, MRI);
+ return ValReg ? true : false;
}
};
-inline ICstRegMatch m_ICst(Register &Reg) { return ICstRegMatch(Reg); }
+inline GCstAndRegMatch m_GCst(Optional<ValueAndVReg> &ValReg) {
+ return GCstAndRegMatch(ValReg);
+}
+
+struct GFCstAndRegMatch {
+ Optional<FPValueAndVReg> &FPValReg;
+ GFCstAndRegMatch(Optional<FPValueAndVReg> &FPValReg) : FPValReg(FPValReg) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI);
+ return FPValReg ? true : false;
+ }
+};
+
+inline GFCstAndRegMatch m_GFCst(Optional<FPValueAndVReg> &FPValReg) {
+ return GFCstAndRegMatch(FPValReg);
+}
+
+struct GFCstOrSplatGFCstMatch {
+ Optional<FPValueAndVReg> &FPValReg;
+ GFCstOrSplatGFCstMatch(Optional<FPValueAndVReg> &FPValReg)
+ : FPValReg(FPValReg) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ return (FPValReg = getFConstantSplat(Reg, MRI)) ||
+ (FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI));
+ };
+};
+
+inline GFCstOrSplatGFCstMatch
+m_GFCstOrSplat(Optional<FPValueAndVReg> &FPValReg) {
+ return GFCstOrSplatGFCstMatch(FPValReg);
+}
/// Matcher for a specific constant value.
struct SpecificConstantMatch {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b652d8e16bc..069f71b54328 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1537,6 +1537,14 @@ public:
return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
}
+ /// Build and insert integer negation
+ /// \p Zero = G_CONSTANT 0
+ /// \p Res = G_SUB Zero, \p Op0
+ MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) {
+ auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0);
+ return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0});
+ }
+
/// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 818475a48abb..86545b976b8d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -14,6 +14,9 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_UTILS_H
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
+#include "GISelWorkList.h"
+#include "LostDebugLocObserver.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Register.h"
@@ -44,6 +47,7 @@ class TargetRegisterClass;
class ConstantInt;
class ConstantFP;
class APFloat;
+class MachineIRBuilder;
// Convenience macros for dealing with vector reduction opcodes.
#define GISEL_VECREDUCE_CASES_ALL \
@@ -162,13 +166,12 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
MachineOptimizationRemarkMissed &R);
/// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
-Optional<APInt> getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI);
+Optional<APInt> getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI);
-/// If \p VReg is defined by a G_CONSTANT fits in int64_t
-/// returns it.
-Optional<int64_t> getConstantVRegSExtVal(Register VReg,
- const MachineRegisterInfo &MRI);
+/// If \p VReg is defined by a G_CONSTANT fits in int64_t returns it.
+Optional<int64_t> getIConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI);
/// Simple struct used to hold a constant integer value and a virtual
/// register.
@@ -176,22 +179,32 @@ struct ValueAndVReg {
APInt Value;
Register VReg;
};
-/// If \p VReg is defined by a statically evaluable chain of
-/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true)
-/// and that constant fits in int64_t, returns its value as well as the
-/// virtual register defined by this G_F/CONSTANT.
-/// When \p LookThroughInstrs == false this function behaves like
-/// getConstantVRegVal.
-/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
-/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
-/// G_SEXT.
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_CONSTANT returns its APInt value and def register.
Optional<ValueAndVReg>
-getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
- bool LookThroughInstrs = true,
- bool HandleFConstants = true,
- bool LookThroughAnyExt = false);
-const ConstantInt *getConstantIntVRegVal(Register VReg,
- const MachineRegisterInfo &MRI);
+getIConstantVRegValWithLookThrough(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true);
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_CONSTANT or G_FCONSTANT returns its value as APInt and def register.
+Optional<ValueAndVReg> getAnyConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true, bool LookThroughAnyExt = false);
+
+struct FPValueAndVReg {
+ APFloat Value;
+ Register VReg;
+};
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_FCONSTANT returns its APFloat value and def register.
+Optional<FPValueAndVReg>
+getFConstantVRegValWithLookThrough(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true);
+
const ConstantFP* getConstantFPVRegVal(Register VReg,
const MachineRegisterInfo &MRI);
@@ -254,6 +267,14 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI);
+/// Tries to constant fold a vector binop with sources \p Op1 and \p Op2.
+/// If successful, returns the G_BUILD_VECTOR representing the folded vector
+/// constant. \p MIB should have an insertion point already set to create new
+/// G_CONSTANT instructions as needed.
+Optional<MachineInstr *>
+ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2,
+ const MachineRegisterInfo &MRI, MachineIRBuilder &MIB);
+
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
@@ -261,6 +282,11 @@ Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI);
+/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
+/// then it tries to do an element-wise constant fold.
+Optional<SmallVector<unsigned>>
+ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
+
/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit is
/// set.
@@ -346,15 +372,23 @@ Optional<int> getSplatIndex(MachineInstr &MI);
Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// Returns a floating point scalar constant of a build vector splat if it
+/// exists. When \p AllowUndef == true some elements can be undef but not all.
+Optional<FPValueAndVReg> getFConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef = true);
+
/// Return true if the specified instruction is a G_BUILD_VECTOR or
/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const MachineInstr &MI,
- const MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef = false);
/// Return true if the specified instruction is a G_BUILD_VECTOR or
/// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef.
bool isBuildVectorAllOnes(const MachineInstr &MI,
- const MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef = false);
/// \returns a value when \p MI is a vector splat. The splat can be either a
/// Register or a constant.
@@ -378,6 +412,17 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// Determines if \p MI defines a constant integer or a build vector of
+/// constant integers. Treats undef values as constants.
+bool isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Determines if \p MI defines a constant integer or a splat vector of
+/// constant integers.
+/// \returns the scalar constant or None.
+Optional<APInt> isConstantOrConstantSplatVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
/// Attempt to match a unary predicate against a scalar/splat constant or every
/// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source
/// value was undef.
@@ -398,5 +443,14 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI);
+using SmallInstListTy = GISelWorkList<4>;
+void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver,
+ SmallInstListTy &DeadInstChain);
+void eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver = nullptr);
+void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver = nullptr);
+
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 6803f4d76cf0..fd106f55a43d 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1260,6 +1260,11 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400;
/// be used with SelectionDAG::getMemIntrinsicNode.
static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
+/// Whether this is bitwise logic opcode.
+inline bool isBitwiseLogicOp(unsigned Opcode) {
+ return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR;
+}
+
/// Get underlying scalar opcode for VECREDUCE opcode.
/// For example ISD::AND for ISD::VECREDUCE_AND.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
@@ -1267,6 +1272,12 @@ NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
/// Whether this is a vector-predicated Opcode.
bool isVPOpcode(unsigned Opcode);
+/// Whether this is a vector-predicated binary operation opcode.
+bool isVPBinaryOp(unsigned Opcode);
+
+/// Whether this is a vector-predicated reduction opcode.
+bool isVPReduction(unsigned Opcode);
+
/// The operand position of the vector mask.
Optional<unsigned> getVPMaskIdx(unsigned Opcode);
diff --git a/llvm/include/llvm/CodeGen/IndirectThunks.h b/llvm/include/llvm/CodeGen/IndirectThunks.h
index 74973f38bc79..90f9912f0ee0 100644
--- a/llvm/include/llvm/CodeGen/IndirectThunks.h
+++ b/llvm/include/llvm/CodeGen/IndirectThunks.h
@@ -62,7 +62,7 @@ void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
AttrBuilder B;
B.addAttribute(llvm::Attribute::NoUnwind);
B.addAttribute(llvm::Attribute::Naked);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
// Populate our function a bit so that we can verify.
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 81b0025fdddc..c22f9d49f374 100644
--- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -24,6 +24,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 1b13ff53ac85..d615a5db4504 100644
--- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -27,6 +27,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index c2b158ac1b7f..923a45821dd4 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -521,11 +521,11 @@ namespace llvm {
removeSegment(S.start, S.end, RemoveDeadValNo);
}
- /// Remove segment pointed to by iterator @p I from this range. This does
- /// not remove dead value numbers.
- iterator removeSegment(iterator I) {
- return segments.erase(I);
- }
+ /// Remove segment pointed to by iterator @p I from this range.
+ iterator removeSegment(iterator I, bool RemoveDeadValNo = false);
+
+ /// Mark \p ValNo for deletion if no segments in this range use it.
+ void removeValNoIfDead(VNInfo *ValNo);
/// Query Liveness at Idx.
/// The sub-instruction slot of Idx doesn't matter, only the instruction
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index 4ebe0f2dcfd8..3b6a4a379d72 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -114,12 +114,19 @@ public:
const LiveRange *LR = nullptr;
LiveRange::const_iterator LRI; ///< current position in LR
ConstSegmentIter LiveUnionI; ///< current position in LiveUnion
- Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs;
+ SmallVector<LiveInterval *, 4> InterferingVRegs;
bool CheckedFirstInterference = false;
bool SeenAllInterferences = false;
unsigned Tag = 0;
unsigned UserTag = 0;
+ // Count the virtual registers in this union that interfere with this
+ // query's live virtual register, up to maxInterferingRegs.
+ unsigned collectInterferingVRegs(unsigned MaxInterferingRegs);
+
+ // Was this virtual register visited during collectInterferingVRegs?
+ bool isSeenInterference(LiveInterval *VirtReg) const;
+
public:
Query() = default;
Query(const LiveRange &LR, const LiveIntervalUnion &LIU)
@@ -131,7 +138,7 @@ public:
const LiveIntervalUnion &NewLiveUnion) {
LiveUnion = &NewLiveUnion;
LR = &NewLR;
- InterferingVRegs = None;
+ InterferingVRegs.clear();
CheckedFirstInterference = false;
SeenAllInterferences = false;
Tag = NewLiveUnion.getTag();
@@ -151,20 +158,12 @@ public:
// Does this live virtual register interfere with the union?
bool checkInterference() { return collectInterferingVRegs(1); }
- // Count the virtual registers in this union that interfere with this
- // query's live virtual register, up to maxInterferingRegs.
- unsigned collectInterferingVRegs(
- unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max());
-
- // Was this virtual register visited during collectInterferingVRegs?
- bool isSeenInterference(LiveInterval *VirtReg) const;
-
- // Did collectInterferingVRegs collect all interferences?
- bool seenAllInterferences() const { return SeenAllInterferences; }
-
// Vector generated by collectInterferingVRegs.
- const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
- return *InterferingVRegs;
+ const SmallVectorImpl<LiveInterval *> &interferingVRegs(
+ unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()) {
+ if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size())
+ collectInterferingVRegs(MaxInterferingRegs);
+ return InterferingVRegs;
}
};
diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index 9b0667bbbeb0..dee316677b25 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -188,6 +188,12 @@ public:
//===--------------------------------------------------------------------===//
// API to update live variable information
+ /// Recompute liveness from scratch for a virtual register \p Reg that is
+ /// known to have a single def that dominates all uses. This can be useful
+ /// after removing some uses of \p Reg. It is not necessary for the whole
+ /// machine function to be in SSA form.
+ void recomputeForSingleDefVirtReg(Register Reg);
+
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
void replaceKillInstruction(Register Reg, MachineInstr &OldMI,
diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h
index 40985e16b37a..922f93d2e598 100644
--- a/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -16,8 +16,8 @@
#ifndef LLVM_CODEGEN_LOWLEVELTYPE_H
#define LLVM_CODEGEN_LOWLEVELTYPE_H
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -31,6 +31,7 @@ LLT getLLTForType(Type &Ty, const DataLayout &DL);
/// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
/// pointers, so these will convert to a plain integer.
MVT getMVTForLLT(LLT Ty);
+EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx);
/// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
/// scalarable vector types, and will assert if used.
diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
index 6137411b6dba..deb6b37a9bcf 100644
--- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
+++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
@@ -57,6 +57,10 @@ public:
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
+ StringRef getPassName() const override {
+ return "Add FS discriminators in MIR";
+ }
+
/// getNumFSBBs() - Return the number of machine BBs that have FS samples.
unsigned getNumFSBBs();
diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
index 9cb92091db50..12c90600f6df 100644
--- a/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
new file mode 100644
index 000000000000..2503524ccfdf
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
@@ -0,0 +1,76 @@
+//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the supoorting functions for machine level Sample FDO
+// loader. This is used in Flow Sensitive SampelFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+
+#include <cassert>
+
+namespace llvm {
+
+using namespace sampleprof;
+
+class MIRProfileLoader;
+class MIRProfileLoaderPass : public MachineFunctionPass {
+ MachineFunction *MF;
+ std::string ProfileFileName;
+ FSDiscriminatorPass P;
+ unsigned LowBit;
+ unsigned HighBit;
+
+public:
+ static char ID;
+ /// FS bits will only use the '1' bits in the Mask.
+ MIRProfileLoaderPass(std::string FileName = "",
+ std::string RemappingFileName = "",
+ FSDiscriminatorPass P = FSDiscriminatorPass::Pass1);
+
+ /// getMachineFunction - Return the last machine function computed.
+ const MachineFunction *getMachineFunction() const { return MF; }
+
+ StringRef getPassName() const override { return "SampleFDO loader in MIR"; }
+
+private:
+ void init(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &) override;
+ bool doInitialization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ std::unique_ptr<MIRProfileLoader> MIRSampleLoader;
+ /// Hold the information of the basic block frequency.
+ MachineBlockFrequencyInfo *MBFI;
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index e7428e7ad260..b6d7c2487126 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -694,6 +694,7 @@ struct MachineFunction {
// Register information
bool TracksRegLiveness = false;
bool HasWinCFI = false;
+ bool FailsVerification = false;
std::vector<VirtualRegisterDefinition> VirtualRegisters;
std::vector<MachineFunctionLiveIn> LiveIns;
Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
@@ -722,6 +723,7 @@ template <> struct MappingTraits<MachineFunction> {
YamlIO.mapOptional("failedISel", MF.FailedISel, false);
YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false);
+ YamlIO.mapOptional("failsVerification", MF.FailsVerification, false);
YamlIO.mapOptional("registers", MF.VirtualRegisters,
std::vector<VirtualRegisterDefinition>());
YamlIO.mapOptional("liveins", MF.LiveIns,
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index ac0cc70744d1..67544779f34c 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -153,7 +153,18 @@ enum class MachineCombinerPattern {
FMLSv4f32_OP1,
FMLSv4f32_OP2,
FMLSv4i32_indexed_OP1,
- FMLSv4i32_indexed_OP2
+ FMLSv4i32_indexed_OP2,
+
+ FMULv2i32_indexed_OP1,
+ FMULv2i32_indexed_OP2,
+ FMULv2i64_indexed_OP1,
+ FMULv2i64_indexed_OP2,
+ FMULv4i16_indexed_OP1,
+ FMULv4i16_indexed_OP2,
+ FMULv4i32_indexed_OP1,
+ FMULv4i32_indexed_OP2,
+ FMULv8i16_indexed_OP1,
+ FMULv8i16_indexed_OP2,
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h
index 46bf73cdd7b6..f749e9ff7e0a 100644
--- a/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -36,6 +36,7 @@ extern template class DomTreeNodeBase<MachineBasicBlock>;
extern template class DominatorTreeBase<MachineBasicBlock, false>; // DomTree
extern template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTree
+using MachineDomTree = DomTreeBase<MachineBasicBlock>;
using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
//===-------------------------------------
@@ -43,8 +44,6 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
/// compute a normal dominator tree.
///
class MachineDominatorTree : public MachineFunctionPass {
- using DomTreeT = DomTreeBase<MachineBasicBlock>;
-
/// Helper structure used to hold all the basic blocks
/// involved in the split of a critical edge.
struct CriticalEdge {
@@ -67,7 +66,7 @@ class MachineDominatorTree : public MachineFunctionPass {
mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
/// The DominatorTreeBase that is used to compute a normal dominator tree.
- std::unique_ptr<DomTreeT> DT;
+ std::unique_ptr<MachineDomTree> DT;
/// Apply all the recorded critical edges to the DT.
/// This updates the underlying DT information in a way that uses
@@ -84,8 +83,9 @@ public:
calculate(MF);
}
- DomTreeT &getBase() {
- if (!DT) DT.reset(new DomTreeT());
+ MachineDomTree &getBase() {
+ if (!DT)
+ DT.reset(new MachineDomTree());
applySplitCriticalEdges();
return *DT;
}
@@ -112,6 +112,12 @@ public:
return DT->dominates(A, B);
}
+ void getDescendants(MachineBasicBlock *A,
+ SmallVectorImpl<MachineBasicBlock *> &Result) {
+ applySplitCriticalEdges();
+ DT->getDescendants(A, Result);
+ }
+
bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 28a59703dc60..5df468102a8a 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -342,6 +342,8 @@ public:
: StackAlignment(assumeAligned(StackAlignment)),
StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {}
+ MachineFrameInfo(const MachineFrameInfo &) = delete;
+
/// Return true if there are any stack objects in this function.
bool hasStackObjects() const { return !Objects.empty(); }
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 786fe908f68f..dcbd19ac6b5a 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -149,6 +149,9 @@ public:
// all sizes attached to them have been eliminated.
// TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it
// means that tied-def have been rewritten to meet the RegConstraint.
+ // FailsVerification: Means that the function is not expected to pass machine
+ // verification. This can be set by passes that introduce known problems that
+ // have not been fixed yet.
enum class Property : unsigned {
IsSSA,
NoPHIs,
@@ -159,7 +162,8 @@ public:
RegBankSelected,
Selected,
TiedOpsRewritten,
- LastProperty = TiedOpsRewritten,
+ FailsVerification,
+ LastProperty = FailsVerification,
};
bool hasProperty(Property P) const {
@@ -227,7 +231,7 @@ struct LandingPadInfo {
: LandingPadBlock(MBB) {}
};
-class MachineFunction {
+class LLVM_EXTERNAL_VISIBILITY MachineFunction {
Function &F;
const LLVMTargetMachine &Target;
const TargetSubtargetInfo *STI;
@@ -536,6 +540,14 @@ public:
/// (or DBG_PHI).
void finalizeDebugInstrRefs();
+ /// Returns true if the function's variable locations should be tracked with
+ /// instruction referencing.
+ bool useDebugInstrRef() const;
+
+ /// A reserved operand number representing the instructions memory operand,
+ /// for instructions that have a stack spill fused into them.
+ const static unsigned int DebugOperandMemNumber;
+
MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 757907f6d887..0ac934e208b6 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -517,7 +517,7 @@ public:
SmallSet<Register, 4> getUsedDebugRegs() const {
assert(isDebugValue() && "not a DBG_VALUE*");
SmallSet<Register, 4> UsedRegs;
- for (auto MO : debug_operands())
+ for (const auto &MO : debug_operands())
if (MO.isReg() && MO.getReg())
UsedRegs.insert(MO.getReg());
return UsedRegs;
@@ -1331,6 +1331,7 @@ public:
case TargetOpcode::LIFETIME_START:
case TargetOpcode::LIFETIME_END:
case TargetOpcode::PSEUDO_PROBE:
+ case TargetOpcode::ARITH_FENCE:
return true;
}
}
@@ -1859,17 +1860,6 @@ public:
}
}
- PseudoProbeAttributes getPseudoProbeAttribute() const {
- assert(isPseudoProbe() && "Must be a pseudo probe instruction");
- return (PseudoProbeAttributes)getOperand(3).getImm();
- }
-
- void addPseudoProbeAttribute(PseudoProbeAttributes Attr) {
- assert(isPseudoProbe() && "Must be a pseudo probe instruction");
- MachineOperand &AttrOperand = getOperand(3);
- AttrOperand.setImm(AttrOperand.getImm() | (uint32_t)Attr);
- }
-
private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 07b8e5ebcc1d..00080b171974 100644
--- a/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -282,17 +282,7 @@ public:
/// success and failure orderings for an atomic operation. (For operations
/// other than cmpxchg, this is equivalent to getSuccessOrdering().)
AtomicOrdering getMergedOrdering() const {
- AtomicOrdering Ordering = getSuccessOrdering();
- AtomicOrdering FailureOrdering = getFailureOrdering();
- if (FailureOrdering == AtomicOrdering::SequentiallyConsistent)
- return AtomicOrdering::SequentiallyConsistent;
- if (FailureOrdering == AtomicOrdering::Acquire) {
- if (Ordering == AtomicOrdering::Monotonic)
- return AtomicOrdering::Acquire;
- if (Ordering == AtomicOrdering::Release)
- return AtomicOrdering::AcquireRelease;
- }
- return Ordering;
+ return getMergedAtomicOrdering(getSuccessOrdering(), getFailureOrdering());
}
bool isLoad() const { return FlagVals & MOLoad; }
diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 8cc5909c40b7..285b858c96cb 100644
--- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -118,6 +118,12 @@ public:
: DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
PassName, RemarkName, Loc, MBB) {}
+ MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+ const MachineInstr *MI)
+ : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
+ PassName, RemarkName, MI->getDebugLoc(),
+ MI->getParent()) {}
+
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == DK_MachineOptimizationRemarkAnalysis;
}
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index ca3dd992bbd5..dbabfe5f0f32 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -821,7 +821,7 @@ public:
/// deleted during LiveDebugVariables analysis.
void markUsesInDebugValueAsUndef(Register Reg) const;
- /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions
+ /// updateDbgUsersToReg - Update a collection of debug instructions
/// to refer to the designated register.
void updateDbgUsersToReg(MCRegister OldReg, MCRegister NewReg,
ArrayRef<MachineInstr *> Users) const {
@@ -829,21 +829,34 @@ public:
for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); RUI.isValid();
++RUI)
OldRegUnits.insert(*RUI);
- for (MachineInstr *MI : Users) {
- assert(MI->isDebugValue());
- for (auto &Op : MI->debug_operands()) {
- if (Op.isReg()) {
- for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
- RUI.isValid(); ++RUI) {
- if (OldRegUnits.contains(*RUI)) {
- Op.setReg(NewReg);
- break;
- }
+
+ // If this operand is a register, check whether it overlaps with OldReg.
+ // If it does, replace with NewReg.
+ auto UpdateOp = [this, &NewReg, &OldReg, &OldRegUnits](MachineOperand &Op) {
+ if (Op.isReg()) {
+ for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
+ RUI.isValid(); ++RUI) {
+ if (OldRegUnits.contains(*RUI)) {
+ Op.setReg(NewReg);
+ break;
}
}
}
- assert(MI->hasDebugOperandForReg(NewReg) &&
- "Expected debug value to have some overlap with OldReg");
+ };
+
+ // Iterate through (possibly several) operands to DBG_VALUEs and update
+ // each. For DBG_PHIs, only one operand will be present.
+ for (MachineInstr *MI : Users) {
+ if (MI->isDebugValue()) {
+ for (auto &Op : MI->debug_operands())
+ UpdateOp(Op);
+ assert(MI->hasDebugOperandForReg(NewReg) &&
+ "Expected debug value to have some overlap with OldReg");
+ } else if (MI->isDebugPHI()) {
+ UpdateOp(MI->getOperand(0));
+ } else {
+ llvm_unreachable("Non-DBG_VALUE, Non-DBG_PHI debug instr updated");
+ }
}
}
@@ -964,7 +977,7 @@ public:
MCRegister getLiveInPhysReg(Register VReg) const;
/// getLiveInVirtReg - If PReg is a live-in physical register, return the
- /// corresponding live-in physical register.
+ /// corresponding live-in virtual register.
Register getLiveInVirtReg(MCRegister PReg) const;
/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h
index 3a140fe63fde..ea2c7a5faae3 100644
--- a/llvm/include/llvm/CodeGen/MacroFusion.h
+++ b/llvm/include/llvm/CodeGen/MacroFusion.h
@@ -23,6 +23,8 @@ class MachineInstr;
class ScheduleDAGMutation;
class TargetInstrInfo;
class TargetSubtargetInfo;
+class ScheduleDAGInstrs;
+class SUnit;
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
@@ -32,6 +34,18 @@ using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI)>;
+/// Checks if the number of cluster edges between SU and its predecessors is
+/// less than FuseLimit
+bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit);
+
+/// Create an artificial edge between FirstSU and SecondSU.
+/// Make data dependencies from the FirstSU also dependent on the SecondSU to
+/// prevent them from being scheduled between the FirstSU and the SecondSU
+/// and vice-versa.
+/// Fusing more than 2 instructions is not currently supported.
+bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+ SUnit &SecondSU);
+
/// Create a DAG scheduling mutation to pair instructions back to back
/// for instructions that benefit according to the target-specific
/// shouldScheduleAdjacent predicate function.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index da1bab718948..d5ad12fadfa0 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -37,6 +37,10 @@ class raw_ostream;
// List of target independent CodeGen pass IDs.
namespace llvm {
+
+ /// AtomicExpandPass - At IR level this pass replace atomic instructions with
+ /// __atomic_* library calls, or target specific instruction which implement the
+ /// same semantics in a way which better fits the target backend.
FunctionPass *createAtomicExpandPass();
/// createUnreachableBlockEliminationPass - The LLVM code generator does not
@@ -171,6 +175,9 @@ namespace llvm {
/// This pass adds flow sensitive discriminators.
extern char &MIRAddFSDiscriminatorsID;
+ /// This pass reads flow sensitive profile.
+ extern char &MIRProfileLoaderPassID;
+
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
@@ -513,6 +520,11 @@ namespace llvm {
FunctionPass *
createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
+ /// Read Flow Sensitive Profile.
+ FunctionPass *createMIRProfileLoaderPass(std::string File,
+ std::string RemappingFile,
+ sampleprof::FSDiscriminatorPass P);
+
/// Creates MIR Debugify pass. \see MachineDebugify.cpp
ModulePass *createDebugifyMachineModulePass();
diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h
index 39b77d919370..757ca8e112ee 100644
--- a/llvm/include/llvm/CodeGen/RegAllocCommon.h
+++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h
@@ -1,9 +1,8 @@
//===- RegAllocCommon.h - Utilities shared between allocators ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 4f48ea2dc8e8..218e05f6eb6b 100644
--- a/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -211,9 +211,6 @@ private:
/// Initialize RegisterScavenger.
void init(MachineBasicBlock &MBB);
- /// Mark live-in registers of basic block as used.
- void setLiveInsUsed(const MachineBasicBlock &MBB);
-
/// Spill a register after position \p After and reload it before position
/// \p UseMI.
ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 948a4763b872..5a3f4e9a23ff 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -531,7 +531,7 @@ public:
}
#ifndef NDEBUG
- void VerifyDAGDiverence();
+ void VerifyDAGDivergence();
#endif
/// This iterates over the nodes in the SelectionDAG, folding
@@ -621,8 +621,8 @@ public:
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false,
bool IsOpaque = false) {
- return getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL,
- VT, IsTarget, IsOpaque);
+ return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT,
+ IsTarget, IsOpaque);
}
SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
@@ -1307,6 +1307,74 @@ public:
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);
+ SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges = nullptr, bool IsExpanding = false);
+ inline SDValue
+ getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(),
+ const MDNode *Ranges = nullptr, bool IsExpanding = false) {
+ // Ensures that codegen never sees a None Alignment.
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL,
+ PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)),
+ MMOFlags, AAInfo, Ranges, IsExpanding);
+ }
+ SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding = false);
+ SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges = nullptr,
+ bool IsExpanding = false);
+ SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachineMemOperand *MMO,
+ bool IsExpanding = false);
+ SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
+ SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding = false);
+ SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
+ SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding = false);
+ SDValue getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM);
+ SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+ Align Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo = AAMDNodes(),
+ bool IsCompressing = false);
+ SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachineMemOperand *MMO,
+ bool IsCompressing = false);
+ SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsCompressing = false);
+ SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO, bool IsCompressing = false);
+ SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM);
+
+ SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType);
+ SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType);
+
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base,
SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO, ISD::MemIndexedMode AM,
@@ -1664,10 +1732,6 @@ public:
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops);
- SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags = SDNodeFlags());
-
/// Fold floating-point operations with 2 operands when both operands are
/// constants and/or undefined.
SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -1769,6 +1833,19 @@ public:
unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth = 0) const;
+ /// Get the minimum bit size for this Value \p Op as a signed integer.
+ /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+ /// Similar to the APInt::getMinSignedBits function.
+ /// Helper wrapper to ComputeNumSignBits.
+ unsigned ComputeMinSignedBits(SDValue Op, unsigned Depth = 0) const;
+
+ /// Get the minimum bit size for this Value \p Op as a signed integer.
+ /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+ /// Similar to the APInt::getMinSignedBits function.
+ /// Helper wrapper to ComputeNumSignBits.
+ unsigned ComputeMinSignedBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const;
+
/// Return true if this function can prove that \p Op is never poison
/// and, if \p PoisonOnly is false, does not have undef bits.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly = false,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 4ee58333495b..6a3d76be0ed6 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -50,6 +50,7 @@ public:
SDValue getIndex() { return Index; }
SDValue getIndex() const { return Index; }
bool hasValidOffset() const { return Offset.hasValue(); }
+ int64_t getOffset() const { return *Offset; }
// Returns true if `Other` and `*this` are both some offset from the same base
// pointer. In that case, `Off` is set to the offset between `*this` and
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index deeca98af3f3..2855e1f1e587 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -58,7 +58,6 @@ namespace llvm {
class APInt;
class Constant;
-template <typename T> struct DenseMapInfo;
class GlobalValue;
class MachineBasicBlock;
class MachineConstantPoolValue;
@@ -509,15 +508,19 @@ BEGIN_TWO_BYTE_PACK()
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
+ friend class VPLoadStoreSDNode;
friend class MaskedLoadStoreSDNode;
friend class MaskedGatherScatterSDNode;
+ friend class VPGatherScatterSDNode;
uint16_t : NumMemSDNodeBits;
// This storage is shared between disparate class hierarchies to hold an
// enumeration specific to the class hierarchy in use.
// LSBaseSDNode => enum ISD::MemIndexedMode
+ // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
// MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
+ // VPGatherScatterSDNode => enum ISD::MemIndexType
// MaskedGatherScatterSDNode => enum ISD::MemIndexType
uint16_t AddressingMode : 3;
};
@@ -525,8 +528,10 @@ BEGIN_TWO_BYTE_PACK()
class LoadSDNodeBitfields {
friend class LoadSDNode;
+ friend class VPLoadSDNode;
friend class MaskedLoadSDNode;
friend class MaskedGatherSDNode;
+ friend class VPGatherSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -536,8 +541,10 @@ BEGIN_TWO_BYTE_PACK()
class StoreSDNodeBitfields {
friend class StoreSDNode;
+ friend class VPStoreSDNode;
friend class MaskedStoreSDNode;
friend class MaskedScatterSDNode;
+ friend class VPScatterSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -1353,7 +1360,9 @@ public:
const SDValue &getBasePtr() const {
switch (getOpcode()) {
case ISD::STORE:
+ case ISD::VP_STORE:
case ISD::MSTORE:
+ case ISD::VP_SCATTER:
return getOperand(2);
case ISD::MGATHER:
case ISD::MSCATTER:
@@ -1393,6 +1402,10 @@ public:
case ISD::MSTORE:
case ISD::MGATHER:
case ISD::MSCATTER:
+ case ISD::VP_LOAD:
+ case ISD::VP_STORE:
+ case ISD::VP_GATHER:
+ case ISD::VP_SCATTER:
return true;
default:
return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
@@ -1563,8 +1576,12 @@ public:
Align getAlignValue() const { return Value->getAlignValue(); }
bool isOne() const { return Value->isOne(); }
- bool isNullValue() const { return Value->isZero(); }
- bool isAllOnesValue() const { return Value->isMinusOne(); }
+ bool isZero() const { return Value->isZero(); }
+ // NOTE: This is soft-deprecated. Please use `isZero()` instead.
+ bool isNullValue() const { return isZero(); }
+ bool isAllOnes() const { return Value->isMinusOne(); }
+ // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
+ bool isAllOnesValue() const { return isAllOnes(); }
bool isMaxSignedValue() const { return Value->isMaxValue(true); }
bool isMinSignedValue() const { return Value->isMinValue(true); }
@@ -2031,8 +2048,25 @@ public:
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
uint32_t BitWidth) const;
+ /// Extract the raw bit data from a build vector of Undef, Constant or
+ /// ConstantFP node elements. Each raw bit element will be \p
+ /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely
+ /// undefined elements are flagged in \p UndefElements.
+ bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements,
+ BitVector &UndefElements) const;
+
bool isConstant() const;
+ /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements.
+ /// Undef elements are treated as zero, and entirely undefined elements are
+ /// flagged in \p DstUndefElements.
+ static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements);
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BUILD_VECTOR;
}
@@ -2318,6 +2352,116 @@ public:
}
};
+/// This base class is used to represent VP_LOAD and VP_STORE nodes
+class VPLoadStoreSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = AM;
+ assert(getAddressingMode() == AM && "Value truncated");
+ }
+
+ // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
+ // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
+ // Mask is a vector of i1 elements;
+ // the type of EVL is TLI.getVPExplicitVectorLengthTy().
+ const SDValue &getOffset() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3);
+ }
+ const SDValue &getBasePtr() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2);
+ }
+ const SDValue &getMask() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4);
+ }
+ const SDValue &getVectorLength() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5);
+ }
+
+ /// Return the addressing mode for this load or store:
+ /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
+ ISD::MemIndexedMode getAddressingMode() const {
+ return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
+ }
+
+ /// Return true if this is a pre/post inc/dec load/store.
+ bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
+
+ /// Return true if this is NOT a pre/post inc/dec load/store.
+ bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
+ }
+};
+
+/// This class is used to represent a VP_LOAD node
+class VPLoadSDNode : public VPLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+ ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
+ EVT MemVT, MachineMemOperand *MMO)
+ : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
+ LoadSDNodeBits.ExtTy = ETy;
+ LoadSDNodeBits.IsExpanding = isExpanding;
+ }
+
+ ISD::LoadExtType getExtensionType() const {
+ return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
+ }
+
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getOffset() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
+ const SDValue &getVectorLength() const { return getOperand(4); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_LOAD;
+ }
+ bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
+};
+
+/// This class is used to represent a VP_STORE node
+class VPStoreSDNode : public VPLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+ ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
+ EVT MemVT, MachineMemOperand *MMO)
+ : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
+ StoreSDNodeBits.IsTruncating = isTrunc;
+ StoreSDNodeBits.IsCompressing = isCompressing;
+ }
+
+ /// Return true if this is a truncating store.
+ /// For integers this is the same as doing a TRUNCATE and storing the result.
+ /// For floats, it is the same as doing an FP_ROUND and storing the result.
+ bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+
+ /// Returns true if the op does a compression to the vector before storing.
+ /// The node contiguously stores the active elements (integers or floats)
+ /// in src (those with their respective bit set in writemask k) to unaligned
+ /// memory at base_addr.
+ bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
+
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+ const SDValue &getOffset() const { return getOperand(3); }
+ const SDValue &getMask() const { return getOperand(4); }
+ const SDValue &getVectorLength() const { return getOperand(5); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_STORE;
+ }
+};
+
/// This base class is used to represent MLOAD and MSTORE nodes
class MaskedLoadStoreSDNode : public MemSDNode {
public:
@@ -2424,6 +2568,94 @@ public:
};
/// This is a base class used to represent
+/// VP_GATHER and VP_SCATTER nodes
+///
+class VPGatherScatterSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = IndexType;
+ assert(getIndexType() == IndexType && "Value truncated");
+ }
+
+ /// How is Index applied to BasePtr when computing addresses.
+ ISD::MemIndexType getIndexType() const {
+ return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
+ }
+ bool isIndexScaled() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::UNSIGNED_SCALED);
+ }
+ bool isIndexSigned() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::SIGNED_UNSCALED);
+ }
+
+ // In the both nodes address is Op1, mask is Op2:
+ // VPGatherSDNode (Chain, base, index, scale, mask, vlen)
+ // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
+ // Mask is a vector of i1 elements
+ const SDValue &getBasePtr() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
+ }
+ const SDValue &getIndex() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
+ }
+ const SDValue &getScale() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
+ }
+ const SDValue &getMask() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
+ }
+ const SDValue &getVectorLength() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
+ }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_GATHER ||
+ N->getOpcode() == ISD::VP_SCATTER;
+ }
+};
+
+/// This class is used to represent an VP_GATHER node
+///
+class VPGatherSDNode : public VPGatherScatterSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {}
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_GATHER;
+ }
+};
+
+/// This class is used to represent an VP_SCATTER node
+///
+class VPScatterSDNode : public VPGatherScatterSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {}
+
+ const SDValue &getValue() const { return getOperand(1); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_SCATTER;
+ }
+};
+
+/// This is a base class used to represent
/// MGATHER and MSCATTER nodes
///
class MaskedGatherScatterSDNode : public MemSDNode {
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 51f1d7d6fd21..bc22d7789856 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -183,12 +183,12 @@ struct JumpTableHeader {
const Value *SValue;
MachineBasicBlock *HeaderBB;
bool Emitted;
- bool OmitRangeCheck;
+ bool FallthroughUnreachable;
JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
bool E = false)
: First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E), OmitRangeCheck(false) {}
+ Emitted(E), FallthroughUnreachable(false) {}
};
using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
@@ -218,14 +218,14 @@ struct BitTestBlock {
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
- bool OmitRangeCheck;
+ bool FallthroughUnreachable;
BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
: First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {}
+ Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {}
};
/// Return the range of values within a range.
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 29e644898f6b..7713dd0800c0 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -247,11 +247,11 @@ namespace ISD {
unsigned PartOffset;
OutputArg() = default;
- OutputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool isfixed,
+ OutputArg(ArgFlagsTy flags, MVT vt, EVT argvt, bool isfixed,
unsigned origIdx, unsigned partOffs)
- : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
- PartOffset(partOffs) {
- VT = vt.getSimpleVT();
+ : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
+ PartOffset(partOffs) {
+ VT = vt;
ArgVT = argvt;
}
};
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 05d0591f1e5d..8bc730a3eda5 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -411,9 +411,12 @@ public:
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the last new instruction.
///
- virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const {
+ /// If \p LIS is not nullptr, the LiveIntervals info should be updated for
+ /// replacing \p MI with new instructions, even though this function does not
+ /// remove MI.
+ virtual MachineInstr *convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
return nullptr;
}
@@ -583,15 +586,14 @@ public:
}
/// Insert an unconditional indirect branch at the end of \p MBB to \p
- /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to
+ /// NewDestBB. Optionally, insert the clobbered register restoring in \p
+ /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to
/// the offset of the position to insert the new branch.
- ///
- /// \returns The number of bytes added to the block.
- virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset = 0,
- RegScavenger *RS = nullptr) const {
+ virtual void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset = 0,
+ RegScavenger *RS = nullptr) const {
llvm_unreachable("target did not implement");
}
@@ -1537,7 +1539,8 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
virtual bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
return false;
}
@@ -1545,7 +1548,8 @@ public:
/// into something more efficient. E.g., on ARM most instructions can set the
/// flags register, obviating the need for a separate CMP.
virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int Mask, int Value,
+ Register SrcReg2, int64_t Mask,
+ int64_t Value,
const MachineRegisterInfo *MRI) const {
return false;
}
@@ -1624,9 +1628,6 @@ public:
unsigned defaultDefLatency(const MCSchedModel &SchedModel,
const MachineInstr &DefMI) const;
- int computeDefOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI) const;
-
/// Return true if this opcode has high latency to its result.
virtual bool isHighLatencyDef(int opc) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 692dc4d7d4cf..87f5168ec48f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -371,10 +372,18 @@ public:
return getPointerTy(DL);
}
- /// EVT is not used in-tree, but is used by out-of-tree target.
- /// A documentation for this function would be nice...
+ /// Return the type to use for a scalar shift opcode, given the shifted amount
+ /// type. Targets should return a legal type if the input type is legal.
+ /// Targets can return a type that is too small if the input type is illegal.
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
+ /// Returns the type for the shift amount of a shift opcode. For vectors,
+ /// returns the input type. For scalars, behavior depends on \p LegalTypes. If
+ /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses
+ /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent
+ /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes
+ /// should be set to true for calls during type legalization and after type
+ /// legalization has been completed.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
bool LegalTypes = true) const;
@@ -591,7 +600,7 @@ public:
/// Returns if it's reasonable to merge stores to MemVT size.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
return true;
}
@@ -1396,6 +1405,11 @@ public:
return NVT;
}
+ virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
+ bool AllowUnknown = false) const {
+ return getValueType(DL, Ty, AllowUnknown);
+ }
+
/// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
/// operations except for the pointer size. If AllowUnknown is true, this
/// will return MVT::Other for types with no EVT counterpart (e.g. structs),
@@ -1448,7 +1462,7 @@ public:
/// Return the desired alignment for ByVal or InAlloca aggregate function
/// arguments in the caller parameter area. This is the actual alignment, not
/// its logarithm.
- virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
+ virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
/// Return the type of registers that this ValueType will eventually require.
MVT getRegisterType(MVT VT) const {
@@ -1763,9 +1777,7 @@ public:
Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
/// Return the preferred loop alignment.
- virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
- return PrefLoopAlignment;
- }
+ virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;
/// Should loops be aligned even when the function is marked OptSize (but not
/// MinSize).
@@ -2077,6 +2089,20 @@ public:
return false;
}
+ /// Return true if it may be profitable to transform
+ /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// This may not be true if c1 and c2 can be represented as immediates but
+ /// c1*c2 cannot, for example.
+ /// The target should check if c1, c2 and c1*c2 can be represented as
+ /// immediates, or have to be materialized into registers. If it is not sure
+ /// about some cases, a default true can be returned to let the DAGCombiner
+ /// decide.
+ /// AddNode is (add x, c1), and ConstNode is c2.
+ virtual bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const {
+ return true;
+ }
+
/// Return true if it is more correct/profitable to use strict FP_TO_INT
/// conversion operations - canonicalizing the FP source value instead of
/// converting all cases and then selecting based on value.
@@ -2177,8 +2203,7 @@ protected:
/// Indicate that the specified operation does not work with the specified
/// type and indicate what to do about it. Note that VT may refer to either
/// the type of a result or that of an operand of Op.
- void setOperationAction(unsigned Op, MVT VT,
- LegalizeAction Action) {
+ void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
OpActions[(unsigned)VT.SimpleTy][Op] = Action;
}
@@ -2197,8 +2222,7 @@ protected:
/// Indicate that the specified truncating store does not work with the
/// specified type and indicate what to do about it.
- void setTruncStoreAction(MVT ValVT, MVT MemVT,
- LegalizeAction Action) {
+ void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) {
assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
}
@@ -2506,8 +2530,11 @@ public:
return false;
}
- virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
- return false;
+ virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
+ virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+ LLVMContext &Ctx) const {
+ return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+ getApproximateEVTForLLT(ToTy, DL, Ctx));
}
virtual bool isProfitableToHoist(Instruction *I) const { return true; }
@@ -2583,8 +2610,11 @@ public:
return false;
}
- virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
- return false;
+ virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
+ virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+ LLVMContext &Ctx) const {
+ return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+ getApproximateEVTForLLT(ToTy, DL, Ctx));
}
/// Return true if sign-extension from FromTy to ToTy is cheaper than
@@ -3807,7 +3837,7 @@ public:
RetSExt = Call.hasRetAttr(Attribute::SExt);
RetZExt = Call.hasRetAttr(Attribute::ZExt);
NoMerge = Call.hasFnAttr(Attribute::NoMerge);
-
+
Callee = Target;
CallConv = Call.getCallingConv();
@@ -4424,33 +4454,29 @@ public:
/// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
- /// \param Result output after conversion
- /// \returns True, if the expansion was successful, false otherwise
- bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const;
/// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
- /// \param Result output after conversion
- /// \returns True, if the expansion was successful, false otherwise
- bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
- /// \param Result output after conversion
- /// \returns True, if the expansion was successful, false otherwise
- bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand ABS nodes. Expands vector/scalar ABS nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
/// \param N Node to expand
- /// \param Result output after conversion
/// \param IsNegative indicate negated abs
- /// \returns True, if the expansion was successful, false otherwise
- bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG,
- bool IsNegative = false) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative = false) const;
/// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
/// scalar types. Returns SDValue() if expand fails.
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 11138039a3c5..9b13b61fc9de 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -187,8 +187,7 @@ public:
void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
/// Insert InsertedPassID pass after TargetPassID pass.
- void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter = true);
+ void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
/// Allow the target to enable a specific standard pass by default.
void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -323,8 +322,7 @@ public:
/// Add standard passes after a pass that has just been added. For example,
/// the MachineVerifier if it is enabled.
- void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true,
- bool AllowStrip = true);
+ void addMachinePostPasses(const std::string &Banner);
/// Check whether or not GlobalISel should abort on error.
/// When this is disabled, GlobalISel will fall back on SDISel instead of
@@ -449,16 +447,12 @@ protected:
/// Add a CodeGen pass at this point in the pipeline after checking overrides.
/// Return the pass that was added, or zero if no pass was added.
- /// @p verifyAfter if true and adding a machine function pass add an extra
- /// machine verification pass afterwards.
- AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true);
+ AnalysisID addPass(AnalysisID PassID);
/// Add a pass to the PassManager if that pass is supposed to be run, as
/// determined by the StartAfter and StopAfter options. Takes ownership of the
/// pass.
- /// @p verifyAfter if true and adding a machine function pass add an extra
- /// machine verification pass afterwards.
- void addPass(Pass *P, bool verifyAfter = true);
+ void addPass(Pass *P);
/// addMachinePasses helper to create the target-selected or overriden
/// regalloc pass.
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 92ce5b737090..8483d078ca74 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -57,6 +57,8 @@ public:
/// Classes with a higher priority value are assigned first by register
/// allocators using a greedy heuristic. The value is in the range [0,63].
const uint8_t AllocationPriority;
+ /// Configurable target specific flags.
+ const uint8_t TSFlags;
/// Whether the class supports two (or more) disjunct subregister indices.
const bool HasDisjunctSubRegs;
/// Whether a combination of subregisters can cover every register in the
@@ -871,10 +873,6 @@ public:
/// (3) Bottom-up allocation is no longer guaranteed to optimally color.
virtual bool reverseLocalAssignment() const { return false; }
- /// Add the allocation priority to global and split ranges as well as the
- /// local ranges when registers are added to the queue.
- virtual bool addAllocPriorityToGlobalRanges() const { return false; }
-
/// Allow the target to override the cost of using a callee-saved register for
/// the first time. Default value of 0 means we will use a callee-saved
/// register if it is available.
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index aa6b82e14aa6..049ede89ab46 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -15,7 +15,6 @@
#ifndef LLVM_CODEGEN_TARGETSCHEDULE_H
#define LLVM_CODEGEN_TARGETSCHEDULE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 0e88e705e16b..7f989e08e9bf 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -216,6 +216,7 @@ def untyped : ValueType<8, 174>; // Produces an untyped value
def funcref : ValueType<0, 175>; // WebAssembly's funcref type
def externref : ValueType<0, 176>; // WebAssembly's externref type
def x86amx : ValueType<8192, 177>; // X86 AMX value
+def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64)
def token : ValueType<0, 248>; // TokenTy
@@ -243,7 +244,7 @@ def Any : ValueType<0, 255>;
/// This class is for targets that want to use pointer types in patterns
/// with the GlobalISelEmitter. Targets must define their own pointer
/// derived from this class. The scalar argument should be an
-/// integer type with the same bit size as the ponter.
+/// integer type with the same bit size as the pointer.
/// e.g. def p0 : PtrValueType <i64, 0>;
class PtrValueType <ValueType scalar, int addrspace> :
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index 7b89c9f66f86..1c6d0b1ead86 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -80,7 +80,7 @@ public:
CompileUnit::DIEInfo &Info) = 0;
/// Apply the valid relocations to the buffer \p Data, taking into
- /// account that Data is at \p BaseOffset in the debug_info section.
+ /// account that Data is at \p BaseOffset in the .debug_info section.
///
/// \returns true whether any reloc has been applied.
virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
@@ -109,7 +109,7 @@ public:
/// Emit section named SecName with data SecData.
virtual void emitSectionContents(StringRef SecData, StringRef SecName) = 0;
- /// Emit the abbreviation table \p Abbrevs to the debug_abbrev section.
+ /// Emit the abbreviation table \p Abbrevs to the .debug_abbrev section.
virtual void
emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
unsigned DwarfVersion) = 0;
@@ -137,7 +137,7 @@ public:
virtual void
emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table) = 0;
- /// Emit debug_ranges for \p FuncRange by translating the
+ /// Emit .debug_ranges for \p FuncRange by translating the
/// original \p Entries.
virtual void emitRangesEntries(
int64_t UnitPcOffset, uint64_t OrigLowPc,
@@ -145,17 +145,17 @@ public:
const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
unsigned AddressSize) = 0;
- /// Emit debug_aranges entries for \p Unit and if \p DoRangesSection is true,
- /// also emit the debug_ranges entries for the DW_TAG_compile_unit's
+ /// Emit .debug_aranges entries for \p Unit and if \p DoRangesSection is true,
+ /// also emit the .debug_ranges entries for the DW_TAG_compile_unit's
/// DW_AT_ranges attribute.
virtual void emitUnitRangesEntries(CompileUnit &Unit,
bool DoRangesSection) = 0;
- /// Copy the debug_line over to the updated binary while unobfuscating the
+ /// Copy the .debug_line over to the updated binary while unobfuscating the
/// file names and directories.
virtual void translateLineTable(DataExtractor LineData, uint64_t Offset) = 0;
- /// Emit the line table described in \p Rows into the debug_line section.
+ /// Emit the line table described in \p Rows into the .debug_line section.
virtual void emitLineTableForUnit(MCDwarfLineTableParams Params,
StringRef PrologueBytes,
unsigned MinInstLength,
@@ -175,7 +175,7 @@ public:
virtual void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address,
StringRef Bytes) = 0;
- /// Emit the debug_loc contribution for \p Unit by copying the entries from
+ /// Emit the .debug_loc contribution for \p Unit by copying the entries from
/// \p Dwarf and offsetting them. Update the location attributes to point to
/// the new entries.
virtual void emitLocationsForUnit(
@@ -184,7 +184,7 @@ public:
ProcessExpr) = 0;
/// Emit the compilation unit header for \p Unit in the
- /// debug_info section.
+ /// .debug_info section.
///
/// As a side effect, this also switches the current Dwarf version
/// of the MC layer to the one of U.getOrigUnit().
@@ -695,7 +695,7 @@ private:
/// Assign an abbreviation number to \p Abbrev
void assignAbbrev(DIEAbbrev &Abbrev);
- /// Compute and emit debug_ranges section for \p Unit, and
+ /// Compute and emit .debug_ranges section for \p Unit, and
/// patch the attributes referencing it.
void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf,
const DWARFFile &File) const;
@@ -706,7 +706,7 @@ private:
/// Extract the line tables from the original dwarf, extract the relevant
/// parts according to the linked function ranges and emit the result in the
- /// debug_line section.
+ /// .debug_line section.
void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf,
const DWARFFile &File);
@@ -753,7 +753,7 @@ private:
StringMap<uint32_t> EmittedCIEs;
/// Offset of the last CIE that has been emitted in the output
- /// debug_frame section.
+ /// .debug_frame section.
uint32_t LastCIEOffset = 0;
/// Apple accelerator tables.
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index 18392e3608e7..99de8ebef812 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -10,7 +10,6 @@
#define LLVM_DEBUGINFO_CODEVIEW_CVRECORD_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 48ea7e52c172..4cee3abdde87 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -373,7 +373,7 @@ CV_REGISTER(AMD64_K7, 765)
CV_REGISTER(ARM_NOREG, 0)
-// General purpose 32-bit integer regisers
+// General purpose 32-bit integer registers
CV_REGISTER(ARM_R0, 10)
CV_REGISTER(ARM_R1, 11)
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index bdc6cf46509b..226a436c0930 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -18,6 +18,7 @@
namespace llvm {
class ScopedPrinter;
+class StringRef;
namespace codeview {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index dcb26f12b13e..cdf3f60f88be 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -144,6 +144,27 @@ public:
const dwarf::Attribute Attr,
const DWARFUnit &U) const;
+ /// Compute an offset from a DIE specified by DIE offset and attribute index.
+ ///
+ /// \param AttrIndex an index of DWARF attribute.
+ /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation
+ /// code in the .debug_info data.
+ /// \param U the DWARFUnit the contains the DIE.
+ /// \returns an offset of the attribute.
+ uint64_t getAttributeOffsetFromIndex(uint32_t AttrIndex, uint64_t DIEOffset,
+ const DWARFUnit &U) const;
+
+ /// Extract a DWARF form value from a DIE speccified by attribute index and
+ /// its offset.
+ ///
+ /// \param AttrIndex an index of DWARF attribute.
+ /// \param Offset offset of the attribute.
+ /// \param U the DWARFUnit the contains the DIE.
+ /// \returns Optional DWARF form value if the attribute was extracted.
+ Optional<DWARFFormValue>
+ getAttributeValueFromOffset(uint32_t AttrIndex, uint64_t Offset,
+ const DWARFUnit &U) const;
+
bool extract(DataExtractor Data, uint64_t* OffsetPtr);
void dump(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 154f7893aa17..537a03ec11fc 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -39,6 +39,8 @@ struct DWARFAddressRange {
/// Returns true if [LowPC, HighPC) intersects with [RHS.LowPC, RHS.HighPC).
bool intersects(const DWARFAddressRange &RHS) const {
assert(valid() && RHS.valid());
+ if (SectionIndex != RHS.SectionIndex)
+ return false;
// Empty ranges can't intersect.
if (LowPC == HighPC || RHS.LowPC == RHS.HighPC)
return false;
@@ -69,12 +71,12 @@ struct DWARFAddressRange {
inline bool operator<(const DWARFAddressRange &LHS,
const DWARFAddressRange &RHS) {
- return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC);
+ return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) < std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC);
}
inline bool operator==(const DWARFAddressRange &LHS,
const DWARFAddressRange &RHS) {
- return std::tie(LHS.LowPC, LHS.HighPC) == std::tie(RHS.LowPC, RHS.HighPC);
+ return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) == std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC);
}
raw_ostream &operator<<(raw_ostream &OS, const DWARFAddressRange &R);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 75b2280658f1..902973ff5722 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -243,6 +243,7 @@ public:
}
DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash);
+ DWARFTypeUnit *getTypeUnitForHash(uint16_t Version, uint64_t Hash, bool IsDWO);
/// Return the compile unit that includes an offset (relative to .debug_info).
DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset);
@@ -373,8 +374,24 @@ public:
return {2, 4, 8};
}
static bool isAddressSizeSupported(unsigned AddressSize) {
- return llvm::any_of(getSupportedAddressSizes(),
- [=](auto Elem) { return Elem == AddressSize; });
+ return llvm::is_contained(getSupportedAddressSizes(), AddressSize);
+ }
+ template <typename... Ts>
+ static Error checkAddressSizeSupported(unsigned AddressSize,
+ std::error_code EC, char const *Fmt,
+ const Ts &...Vals) {
+ if (isAddressSizeSupported(AddressSize))
+ return Error::success();
+ std::string Buffer;
+ raw_string_ostream Stream(Buffer);
+ Stream << format(Fmt, Vals...)
+ << " has unsupported address size: " << AddressSize
+ << " (supported are ";
+ ListSeparator LS;
+ for (unsigned Size : DWARFContext::getSupportedAddressSizes())
+ Stream << LS << Size;
+ Stream << ')';
+ return make_error<StringError>(Stream.str(), EC);
}
std::shared_ptr<DWARFContext> getDWOContext(StringRef AbsolutePath);
@@ -387,9 +404,12 @@ public:
function_ref<void(Error)> getWarningHandler() { return WarningHandler; }
+ enum class ProcessDebugRelocations { Process, Ignore };
+
static std::unique_ptr<DWARFContext>
- create(const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr,
- std::string DWPName = "",
+ create(const object::ObjectFile &Obj,
+ ProcessDebugRelocations RelocAction = ProcessDebugRelocations::Process,
+ const LoadedObjectInfo *L = nullptr, std::string DWPName = "",
std::function<void(Error)> RecoverableErrorHandler =
WithColor::defaultErrorHandler,
std::function<void(Error)> WarningHandler =
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 0bfe9f376f46..c4370cb54113 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -24,9 +24,11 @@ class DWARFDebugInfoEntry {
/// Offset within the .debug_info of the start of this entry.
uint64_t Offset = 0;
- /// The integer depth of this DIE within the compile unit DIEs where the
- /// compile/type unit DIE has a depth of zero.
- uint32_t Depth = 0;
+ /// Index of the parent die. UINT32_MAX if there is no parent.
+ uint32_t ParentIdx = UINT32_MAX;
+
+ /// Index of the sibling die. Zero if there is no sibling.
+ uint32_t SiblingIdx = 0;
const DWARFAbbreviationDeclaration *AbbrevDecl = nullptr;
@@ -36,15 +38,31 @@ public:
/// Extracts a debug info entry, which is a child of a given unit,
/// starting at a given offset. If DIE can't be extracted, returns false and
/// doesn't change OffsetPtr.
- bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr);
-
/// High performance extraction should use this call.
bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset,
- uint32_t Depth);
+ uint32_t ParentIdx);
uint64_t getOffset() const { return Offset; }
- uint32_t getDepth() const { return Depth; }
+
+ /// Returns index of the parent die.
+ Optional<uint32_t> getParentIdx() const {
+ if (ParentIdx == UINT32_MAX)
+ return None;
+
+ return ParentIdx;
+ }
+
+ /// Returns index of the sibling die.
+ Optional<uint32_t> getSiblingIdx() const {
+ if (SiblingIdx == 0)
+ return None;
+
+ return SiblingIdx;
+ }
+
+ /// Set index of sibling.
+ void setSiblingIdx(uint32_t Idx) { SiblingIdx = Idx; }
dwarf::Tag getTag() const {
return AbbrevDecl ? AbbrevDecl->getTag() : dwarf::DW_TAG_null;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index d1d65372740b..ee15b6d4112d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -110,10 +110,6 @@ public:
/// Length of the prologue in bytes.
uint64_t getLength() const;
- int32_t getMaxLineIncrementForSpecialOpcode() const {
- return LineBase + (int8_t)LineRange - 1;
- }
-
/// Get DWARF-version aware access to the file name entry at the provided
/// index.
const llvm::DWARFDebugLine::FileNameEntry &
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index 2f72c642a2d5..0d9f37c5610b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -49,12 +49,7 @@ public:
/// 2. An address, which defines the appropriate base address for
/// use in interpreting the beginning and ending address offsets of
/// subsequent entries of the location list.
- bool isBaseAddressSelectionEntry(uint8_t AddressSize) const {
- assert(AddressSize == 4 || AddressSize == 8);
- if (AddressSize == 4)
- return StartAddress == -1U;
- return StartAddress == -1ULL;
- }
+ bool isBaseAddressSelectionEntry(uint8_t AddressSize) const;
};
private:
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 1903bab5e73f..8f93ebc4ebc0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -182,6 +182,8 @@ public:
DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const;
DWARFDie getAttributeValueAsReferencedDie(const DWARFFormValue &V) const;
+ DWARFDie resolveTypeUnitReference() const;
+
/// Extract the range base attribute from this DIE as absolute section offset.
///
/// This is a utility function that checks for either the DW_AT_rnglists_base
@@ -220,16 +222,6 @@ public:
/// information is available.
Expected<DWARFAddressRangesVector> getAddressRanges() const;
- /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any
- /// of its children.
- ///
- /// Get the hi/low PC range if both attributes are available or exrtracts the
- /// non-contiguous address ranges from the DW_AT_ranges attribute for this DIE
- /// and all children.
- ///
- /// \param Ranges the addres range vector to fill in.
- void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const;
-
bool addressRangeContainsAddress(const uint64_t Address) const;
Expected<DWARFLocationExpressionsVector>
@@ -246,6 +238,8 @@ public:
/// for ShortName if LinkageName is not found.
/// Returns null if no name is found.
const char *getName(DINameKind Kind) const;
+ void getFullName(raw_string_ostream &,
+ std::string *OriginalFullName = nullptr) const;
/// Return the DIE short name resolving DW_AT_specification or
/// DW_AT_abstract_origin references if necessary. Returns null if no name
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 794e859bfe72..b694eeacfd9d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -86,24 +86,30 @@ public:
uint64_t OperandEndOffsets[2];
public:
- Description &getDescription() { return Desc; }
- uint8_t getCode() { return Opcode; }
- uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; }
- uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
- uint64_t getEndOffset() { return EndOffset; }
- bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
- Optional<dwarf::DwarfFormat> Format);
- bool isError() { return Error; }
+ const Description &getDescription() const { return Desc; }
+ uint8_t getCode() const { return Opcode; }
+ uint64_t getRawOperand(unsigned Idx) const { return Operands[Idx]; }
+ uint64_t getOperandEndOffset(unsigned Idx) const {
+ return OperandEndOffsets[Idx];
+ }
+ uint64_t getEndOffset() const { return EndOffset; }
+ bool isError() const { return Error; }
bool print(raw_ostream &OS, DIDumpOptions DumpOpts,
const DWARFExpression *Expr, const MCRegisterInfo *RegInfo,
- DWARFUnit *U, bool isEH);
- bool verify(DWARFUnit *U);
+ DWARFUnit *U, bool isEH) const;
+
+ /// Verify \p Op. Does not affect the return of \a isError().
+ static bool verify(const Operation &Op, DWARFUnit *U);
+
+ private:
+ bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
+ Optional<dwarf::DwarfFormat> Format);
};
/// An iterator to go through the expression operations.
class iterator
: public iterator_facade_base<iterator, std::forward_iterator_tag,
- Operation> {
+ const Operation> {
friend class DWARFExpression;
const DWARFExpression *Expr;
uint64_t Offset;
@@ -116,19 +122,17 @@ public:
}
public:
- class Operation &operator++() {
+ iterator &operator++() {
Offset = Op.isError() ? Expr->Data.getData().size() : Op.EndOffset;
Op.Error =
Offset >= Expr->Data.getData().size() ||
!Op.extract(Expr->Data, Expr->AddressSize, Offset, Expr->Format);
- return Op;
+ return *this;
}
- class Operation &operator*() {
- return Op;
- }
+ const Operation &operator*() const { return Op; }
- iterator skipBytes(uint64_t Add) {
+ iterator skipBytes(uint64_t Add) const {
return iterator(Expr, Op.EndOffset + Add);
}
@@ -159,6 +163,8 @@ public:
bool operator==(const DWARFExpression &RHS) const;
+ StringRef getData() const { return Data.getData(); }
+
private:
DataExtractor Data;
uint8_t AddressSize;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 43be024f0d37..3c051c3ea018 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -102,10 +102,6 @@ public:
return extractValue(Data, OffsetPtr, FormParams, nullptr, U);
}
- bool isInlinedCStr() const {
- return Value.data != nullptr && Value.data == (const uint8_t *)Value.cstr;
- }
-
/// getAsFoo functions below return the extracted value as Foo if only
/// DWARFFormValue has form class is suitable for representing Foo.
Optional<uint64_t> getAsReference() const;
@@ -123,6 +119,19 @@ public:
Optional<ArrayRef<uint8_t>> getAsBlock() const;
Optional<uint64_t> getAsCStringOffset() const;
Optional<uint64_t> getAsReferenceUVal() const;
+ /// Correctly extract any file paths from a form value.
+ ///
+ /// These attributes can be in the from DW_AT_decl_file or DW_AT_call_file
+ /// attributes. We need to use the file index in the correct DWARFUnit's line
+ /// table prologue, and each DWARFFormValue has the DWARFUnit the form value
+ /// was extracted from.
+ ///
+ /// \param Kind The kind of path to extract.
+ ///
+ /// \returns A valid string value on success, or llvm::None if the form class
+ /// is not FC_Constant, or if the file index is not valid.
+ Optional<std::string>
+ getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const;
/// Skip a form's value in \p DebugInfoData at the offset specified by
/// \p OffsetPtr.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 93d7e2b563fd..d471b80c7fe1 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -49,8 +49,6 @@ public:
DieRangeInfo(std::vector<DWARFAddressRange> Ranges)
: Ranges(std::move(Ranges)) {}
- typedef std::vector<DWARFAddressRange>::const_iterator
- address_range_iterator;
typedef std::set<DieRangeInfo>::const_iterator die_range_info_iterator;
/// Inserts the address range. If the range overlaps with an existing
@@ -62,16 +60,6 @@ public:
/// children address ranges must all be contained in.
Optional<DWARFAddressRange> insert(const DWARFAddressRange &R);
- /// Finds an address range in the sorted vector of ranges.
- address_range_iterator findRange(const DWARFAddressRange &R) const {
- auto Begin = Ranges.begin();
- auto End = Ranges.end();
- auto Iter = std::upper_bound(Begin, End, R);
- if (Iter != Begin)
- --Iter;
- return Iter;
- }
-
/// Inserts the address range info. If any of its ranges overlaps with a
/// range in an existing range info, the range info is *not* added and an
/// iterator to the overlapping range info.
@@ -91,14 +79,11 @@ private:
raw_ostream &OS;
DWARFContext &DCtx;
DIDumpOptions DumpOpts;
- /// A map that tracks all references (converted absolute references) so we
- /// can verify each reference points to a valid DIE and not an offset that
- /// lies between to valid DIEs.
- std::map<uint64_t, std::set<uint64_t>> ReferenceToDIEOffsets;
uint32_t NumDebugLineErrors = 0;
// Used to relax some checks that do not currently work portably
bool IsObjectFile;
bool IsMachOObject;
+ using ReferenceMap = std::map<uint64_t, std::set<uint64_t>>;
raw_ostream &error() const;
raw_ostream &warn() const;
@@ -140,6 +125,7 @@ private:
bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
bool &isUnitDWARF64);
+ bool verifyName(const DWARFDie &Die);
/// Verifies the header of a unit in a .debug_info or .debug_types section.
///
@@ -156,7 +142,9 @@ private:
/// \param Unit The DWARF Unit to verify.
///
/// \returns The number of errors that occurred during verification.
- unsigned verifyUnitContents(DWARFUnit &Unit);
+ unsigned verifyUnitContents(DWARFUnit &Unit,
+ ReferenceMap &UnitLocalReferences,
+ ReferenceMap &CrossUnitReferences);
/// Verifies the unit headers and contents in a .debug_info or .debug_types
/// section.
@@ -208,7 +196,9 @@ private:
///
/// \returns NumErrors The number of errors occurred during verification of
/// attributes' forms in a unit
- unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
+ unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue,
+ ReferenceMap &UnitLocalReferences,
+ ReferenceMap &CrossUnitReferences);
/// Verifies the all valid references that were found when iterating through
/// all of the DIE attributes.
@@ -220,7 +210,9 @@ private:
///
/// \returns NumErrors The number of errors occurred during verification of
/// references for the .debug_info and .debug_types sections
- unsigned verifyDebugInfoReferences();
+ unsigned verifyDebugInfoReferences(
+ const ReferenceMap &,
+ llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForDieOffset);
/// Verify the DW_AT_stmt_list encoding and value and ensure that no
/// compile units that have the same DW_AT_stmt_list value.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
index f7f800d01647..045c9e3f3ebd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/GSYM/Range.h"
#include <stdint.h>
diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
index 83331b14b8af..a922839a999d 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
@@ -93,6 +93,9 @@ inline bool isValidBlockSize(uint32_t Size) {
case 1024:
case 2048:
case 4096:
+ case 8192:
+ case 16384:
+ case 32768:
return true;
}
return false;
diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index 473c89e8106f..296a4840b779 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -58,12 +58,12 @@ public:
return support::little;
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override;
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override;
- uint32_t getLength() override;
+ uint64_t getLength() override;
BumpPtrAllocator &getAllocator() { return Allocator; }
@@ -79,10 +79,10 @@ protected:
private:
const MSFStreamLayout &getStreamLayout() const { return StreamLayout; }
- void fixCacheAfterWrite(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+ void fixCacheAfterWrite(uint64_t Offset, ArrayRef<uint8_t> Data) const;
- Error readBytes(uint32_t Offset, MutableArrayRef<uint8_t> Buffer);
- bool tryReadContiguously(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, MutableArrayRef<uint8_t> Buffer);
+ bool tryReadContiguously(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer);
const uint32_t BlockSize;
@@ -125,13 +125,13 @@ public:
return support::little;
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override;
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override;
- uint32_t getLength() override;
+ uint64_t getLength() override;
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override;
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override;
Error commit() override;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 5fb13ad30e91..de5b46f21672 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -31,9 +31,7 @@ struct FileInfoSubstreamHeader;
class DbiModuleSourceFilesIterator
: public iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef> {
- using BaseType =
- iterator_facade_base<DbiModuleSourceFilesIterator,
- std::random_access_iterator_tag, StringRef>;
+ using BaseType = typename DbiModuleSourceFilesIterator::iterator_facade_base;
public:
DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 95c0a89551ed..474bd796b2b3 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -38,6 +38,7 @@ class HashTableIterator
: public iterator_facade_base<HashTableIterator<ValueT>,
std::forward_iterator_tag,
const std::pair<uint32_t, ValueT>> {
+ using BaseT = typename HashTableIterator::iterator_facade_base;
friend HashTable<ValueT>;
HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
@@ -76,9 +77,7 @@ public:
// Implement postfix op++ in terms of prefix op++ by using the superclass
// implementation.
- using iterator_facade_base<HashTableIterator<ValueT>,
- std::forward_iterator_tag,
- const std::pair<uint32_t, ValueT>>::operator++;
+ using BaseT::operator++;
HashTableIterator &operator++() {
while (Index < Map->Buckets.size()) {
++Index;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index 1df059ffa9fd..f110e90b3f90 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
index 5dedc70f11ba..be0ddf0a063a 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
index 8f1834d0a2c2..90b5d8068959 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
index 4ae8f1471781..21995ca665c1 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 3c414e7a9005..004d005280d4 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -10,7 +10,6 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index c396a1dc5dd3..3150e049320b 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -31,7 +31,6 @@ enum : int {
char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
int *status);
-
enum MSDemangleFlags {
MSDF_None = 0,
MSDF_DumpBackrefs = 1 << 0,
@@ -39,6 +38,7 @@ enum MSDemangleFlags {
MSDF_NoCallingConvention = 1 << 2,
MSDF_NoReturnType = 1 << 3,
MSDF_NoMemberType = 1 << 4,
+ MSDF_NoVariableType = 1 << 5,
};
/// Demangles the Microsoft symbol pointed at by mangled_name and returns it.
@@ -53,13 +53,16 @@ enum MSDemangleFlags {
/// receives the size of the demangled string on output if n_buf is not nullptr.
/// status receives one of the demangle_ enum entries above if it's not nullptr.
/// Flags controls various details of the demangled representation.
-char *microsoftDemangle(const char *mangled_name, size_t *n_read,
- char *buf, size_t *n_buf,
- int *status, MSDemangleFlags Flags = MSDF_None);
+char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf,
+ size_t *n_buf, int *status,
+ MSDemangleFlags Flags = MSDF_None);
// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle.
char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
+// Demangles a D mangled symbol.
+char *dlangDemangle(const char *MangledName);
+
/// Attempt to demangle a string using different demangling schemes.
/// The function uses heuristics to determine which demangling scheme to use.
/// \param MangledName - reference to string to demangle.
@@ -67,6 +70,8 @@ char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
/// demangling occurred.
std::string demangle(const std::string &MangledName);
+bool nonMicrosoftDemangle(const char *MangledName, std::string &Result);
+
/// "Partial" demangler. This supports demangling a string into an AST
/// (typically an intermediate stage in itaniumDemangle) and querying certain
/// properties or partially printing the demangled name.
@@ -118,6 +123,7 @@ struct ItaniumPartialDemangler {
bool isSpecialName() const;
~ItaniumPartialDemangler();
+
private:
void *RootNode;
void *Context;
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 9163b713d118..86f5c992b63d 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -57,6 +57,7 @@
X(LocalName) \
X(VectorType) \
X(PixelVectorType) \
+ X(BinaryFPType) \
X(SyntheticTemplateParamName) \
X(TypeTemplateParamDecl) \
X(NonTypeTemplateParamDecl) \
@@ -109,6 +110,126 @@
DEMANGLE_NAMESPACE_BEGIN
+template <class T, size_t N> class PODSmallVector {
+ static_assert(std::is_pod<T>::value,
+ "T is required to be a plain old data type");
+
+ T *First = nullptr;
+ T *Last = nullptr;
+ T *Cap = nullptr;
+ T Inline[N] = {0};
+
+ bool isInline() const { return First == Inline; }
+
+ void clearInline() {
+ First = Inline;
+ Last = Inline;
+ Cap = Inline + N;
+ }
+
+ void reserve(size_t NewCap) {
+ size_t S = size();
+ if (isInline()) {
+ auto *Tmp = static_cast<T *>(std::malloc(NewCap * sizeof(T)));
+ if (Tmp == nullptr)
+ std::terminate();
+ std::copy(First, Last, Tmp);
+ First = Tmp;
+ } else {
+ First = static_cast<T *>(std::realloc(First, NewCap * sizeof(T)));
+ if (First == nullptr)
+ std::terminate();
+ }
+ Last = First + S;
+ Cap = First + NewCap;
+ }
+
+public:
+ PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
+
+ PODSmallVector(const PODSmallVector &) = delete;
+ PODSmallVector &operator=(const PODSmallVector &) = delete;
+
+ PODSmallVector(PODSmallVector &&Other) : PODSmallVector() {
+ if (Other.isInline()) {
+ std::copy(Other.begin(), Other.end(), First);
+ Last = First + Other.size();
+ Other.clear();
+ return;
+ }
+
+ First = Other.First;
+ Last = Other.Last;
+ Cap = Other.Cap;
+ Other.clearInline();
+ }
+
+ PODSmallVector &operator=(PODSmallVector &&Other) {
+ if (Other.isInline()) {
+ if (!isInline()) {
+ std::free(First);
+ clearInline();
+ }
+ std::copy(Other.begin(), Other.end(), First);
+ Last = First + Other.size();
+ Other.clear();
+ return *this;
+ }
+
+ if (isInline()) {
+ First = Other.First;
+ Last = Other.Last;
+ Cap = Other.Cap;
+ Other.clearInline();
+ return *this;
+ }
+
+ std::swap(First, Other.First);
+ std::swap(Last, Other.Last);
+ std::swap(Cap, Other.Cap);
+ Other.clear();
+ return *this;
+ }
+
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void push_back(const T &Elem) {
+ if (Last == Cap)
+ reserve(size() * 2);
+ *Last++ = Elem;
+ }
+
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void pop_back() {
+ assert(Last != First && "Popping empty vector!");
+ --Last;
+ }
+
+ void dropBack(size_t Index) {
+ assert(Index <= size() && "dropBack() can't expand!");
+ Last = First + Index;
+ }
+
+ T *begin() { return First; }
+ T *end() { return Last; }
+
+ bool empty() const { return First == Last; }
+ size_t size() const { return static_cast<size_t>(Last - First); }
+ T &back() {
+ assert(Last != First && "Calling back() on empty vector!");
+ return *(Last - 1);
+ }
+ T &operator[](size_t Index) {
+ assert(Index < size() && "Invalid access!");
+ return *(begin() + Index);
+ }
+ void clear() { Last = First; }
+
+ ~PODSmallVector() {
+ if (!isInline())
+ std::free(First);
+ }
+};
+
// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
@@ -155,50 +276,48 @@ public:
// would construct an equivalent node.
//template<typename Fn> void match(Fn F) const;
- bool hasRHSComponent(OutputStream &S) const {
+ bool hasRHSComponent(OutputBuffer &OB) const {
if (RHSComponentCache != Cache::Unknown)
return RHSComponentCache == Cache::Yes;
- return hasRHSComponentSlow(S);
+ return hasRHSComponentSlow(OB);
}
- bool hasArray(OutputStream &S) const {
+ bool hasArray(OutputBuffer &OB) const {
if (ArrayCache != Cache::Unknown)
return ArrayCache == Cache::Yes;
- return hasArraySlow(S);
+ return hasArraySlow(OB);
}
- bool hasFunction(OutputStream &S) const {
+ bool hasFunction(OutputBuffer &OB) const {
if (FunctionCache != Cache::Unknown)
return FunctionCache == Cache::Yes;
- return hasFunctionSlow(S);
+ return hasFunctionSlow(OB);
}
Kind getKind() const { return K; }
- virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
- virtual bool hasArraySlow(OutputStream &) const { return false; }
- virtual bool hasFunctionSlow(OutputStream &) const { return false; }
+ virtual bool hasRHSComponentSlow(OutputBuffer &) const { return false; }
+ virtual bool hasArraySlow(OutputBuffer &) const { return false; }
+ virtual bool hasFunctionSlow(OutputBuffer &) const { return false; }
// Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
// get at a node that actually represents some concrete syntax.
- virtual const Node *getSyntaxNode(OutputStream &) const {
- return this;
- }
+ virtual const Node *getSyntaxNode(OutputBuffer &) const { return this; }
- void print(OutputStream &S) const {
- printLeft(S);
+ void print(OutputBuffer &OB) const {
+ printLeft(OB);
if (RHSComponentCache != Cache::No)
- printRight(S);
+ printRight(OB);
}
- // Print the "left" side of this Node into OutputStream.
- virtual void printLeft(OutputStream &) const = 0;
+ // Print the "left" side of this Node into OutputBuffer.
+ virtual void printLeft(OutputBuffer &) const = 0;
// Print the "right". This distinction is necessary to represent C++ types
// that appear on the RHS of their subtype, such as arrays or functions.
// Since most types don't have such a component, provide a default
// implementation.
- virtual void printRight(OutputStream &) const {}
+ virtual void printRight(OutputBuffer &) const {}
virtual StringView getBaseName() const { return StringView(); }
@@ -227,19 +346,19 @@ public:
Node *operator[](size_t Idx) const { return Elements[Idx]; }
- void printWithComma(OutputStream &S) const {
+ void printWithComma(OutputBuffer &OB) const {
bool FirstElement = true;
for (size_t Idx = 0; Idx != NumElements; ++Idx) {
- size_t BeforeComma = S.getCurrentPosition();
+ size_t BeforeComma = OB.getCurrentPosition();
if (!FirstElement)
- S += ", ";
- size_t AfterComma = S.getCurrentPosition();
- Elements[Idx]->print(S);
+ OB += ", ";
+ size_t AfterComma = OB.getCurrentPosition();
+ Elements[Idx]->print(OB);
// Elements[Idx] is an empty parameter pack expansion, we should erase the
// comma we just printed.
- if (AfterComma == S.getCurrentPosition()) {
- S.setCurrentPosition(BeforeComma);
+ if (AfterComma == OB.getCurrentPosition()) {
+ OB.setCurrentPosition(BeforeComma);
continue;
}
@@ -254,9 +373,7 @@ struct NodeArrayNode : Node {
template<typename Fn> void match(Fn F) const { F(Array); }
- void printLeft(OutputStream &S) const override {
- Array.printWithComma(S);
- }
+ void printLeft(OutputBuffer &OB) const override { Array.printWithComma(OB); }
};
class DotSuffix final : public Node {
@@ -269,11 +386,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); }
- void printLeft(OutputStream &s) const override {
- Prefix->print(s);
- s += " (";
- s += Suffix;
- s += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ Prefix->print(OB);
+ OB += " (";
+ OB += Suffix;
+ OB += ")";
}
};
@@ -288,12 +405,12 @@ public:
template <typename Fn> void match(Fn F) const { F(Ty, Ext, TA); }
- void printLeft(OutputStream &S) const override {
- Ty->print(S);
- S += " ";
- S += Ext;
+ void printLeft(OutputBuffer &OB) const override {
+ Ty->print(OB);
+ OB += " ";
+ OB += Ext;
if (TA != nullptr)
- TA->print(S);
+ TA->print(OB);
}
};
@@ -319,13 +436,13 @@ protected:
const Qualifiers Quals;
const Node *Child;
- void printQuals(OutputStream &S) const {
+ void printQuals(OutputBuffer &OB) const {
if (Quals & QualConst)
- S += " const";
+ OB += " const";
if (Quals & QualVolatile)
- S += " volatile";
+ OB += " volatile";
if (Quals & QualRestrict)
- S += " restrict";
+ OB += " restrict";
}
public:
@@ -336,22 +453,22 @@ public:
template<typename Fn> void match(Fn F) const { F(Child, Quals); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Child->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return Child->hasRHSComponent(OB);
}
- bool hasArraySlow(OutputStream &S) const override {
- return Child->hasArray(S);
+ bool hasArraySlow(OutputBuffer &OB) const override {
+ return Child->hasArray(OB);
}
- bool hasFunctionSlow(OutputStream &S) const override {
- return Child->hasFunction(S);
+ bool hasFunctionSlow(OutputBuffer &OB) const override {
+ return Child->hasFunction(OB);
}
- void printLeft(OutputStream &S) const override {
- Child->printLeft(S);
- printQuals(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Child->printLeft(OB);
+ printQuals(OB);
}
- void printRight(OutputStream &S) const override { Child->printRight(S); }
+ void printRight(OutputBuffer &OB) const override { Child->printRight(OB); }
};
class ConversionOperatorType final : public Node {
@@ -363,9 +480,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty); }
- void printLeft(OutputStream &S) const override {
- S += "operator ";
- Ty->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "operator ";
+ Ty->print(OB);
}
};
@@ -379,9 +496,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
- void printLeft(OutputStream &s) const override {
- Ty->printLeft(s);
- s += Postfix;
+ void printLeft(OutputBuffer &OB) const override {
+ Ty->printLeft(OB);
+ OB += Postfix;
}
};
@@ -396,7 +513,7 @@ public:
StringView getName() const { return Name; }
StringView getBaseName() const override { return Name; }
- void printLeft(OutputStream &s) const override { s += Name; }
+ void printLeft(OutputBuffer &OB) const override { OB += Name; }
};
class ElaboratedTypeSpefType : public Node {
@@ -408,10 +525,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Kind, Child); }
- void printLeft(OutputStream &S) const override {
- S += Kind;
- S += ' ';
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Kind;
+ OB += ' ';
+ Child->print(OB);
}
};
@@ -426,11 +543,11 @@ struct AbiTagAttr : Node {
template<typename Fn> void match(Fn F) const { F(Base, Tag); }
- void printLeft(OutputStream &S) const override {
- Base->printLeft(S);
- S += "[abi:";
- S += Tag;
- S += "]";
+ void printLeft(OutputBuffer &OB) const override {
+ Base->printLeft(OB);
+ OB += "[abi:";
+ OB += Tag;
+ OB += "]";
}
};
@@ -442,10 +559,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Conditions); }
- void printLeft(OutputStream &S) const override {
- S += " [enable_if:";
- Conditions.printWithComma(S);
- S += ']';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += " [enable_if:";
+ Conditions.printWithComma(OB);
+ OB += ']';
}
};
@@ -466,11 +583,11 @@ public:
static_cast<const NameType *>(Ty)->getName() == "objc_object";
}
- void printLeft(OutputStream &S) const override {
- Ty->print(S);
- S += "<";
- S += Protocol;
- S += ">";
+ void printLeft(OutputBuffer &OB) const override {
+ Ty->print(OB);
+ OB += "<";
+ OB += Protocol;
+ OB += ">";
}
};
@@ -484,34 +601,34 @@ public:
template<typename Fn> void match(Fn F) const { F(Pointee); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Pointee->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return Pointee->hasRHSComponent(OB);
}
- void printLeft(OutputStream &s) const override {
+ void printLeft(OutputBuffer &OB) const override {
// We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
if (Pointee->getKind() != KObjCProtoName ||
!static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
- Pointee->printLeft(s);
- if (Pointee->hasArray(s))
- s += " ";
- if (Pointee->hasArray(s) || Pointee->hasFunction(s))
- s += "(";
- s += "*";
+ Pointee->printLeft(OB);
+ if (Pointee->hasArray(OB))
+ OB += " ";
+ if (Pointee->hasArray(OB) || Pointee->hasFunction(OB))
+ OB += "(";
+ OB += "*";
} else {
const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
- s += "id<";
- s += objcProto->Protocol;
- s += ">";
+ OB += "id<";
+ OB += objcProto->Protocol;
+ OB += ">";
}
}
- void printRight(OutputStream &s) const override {
+ void printRight(OutputBuffer &OB) const override {
if (Pointee->getKind() != KObjCProtoName ||
!static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
- if (Pointee->hasArray(s) || Pointee->hasFunction(s))
- s += ")";
- Pointee->printRight(s);
+ if (Pointee->hasArray(OB) || Pointee->hasFunction(OB))
+ OB += ")";
+ Pointee->printRight(OB);
}
}
};
@@ -531,15 +648,30 @@ class ReferenceType : public Node {
// Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
// rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
// other combination collapses to a lvalue ref.
- std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
+ //
+ // A combination of a TemplateForwardReference and a back-ref Substitution
+ // from an ill-formed string may have created a cycle; use cycle detection to
+ // avoid looping forever.
+ std::pair<ReferenceKind, const Node *> collapse(OutputBuffer &OB) const {
auto SoFar = std::make_pair(RK, Pointee);
+ // Track the chain of nodes for the Floyd's 'tortoise and hare'
+ // cycle-detection algorithm, since getSyntaxNode(S) is impure
+ PODSmallVector<const Node *, 8> Prev;
for (;;) {
- const Node *SN = SoFar.second->getSyntaxNode(S);
+ const Node *SN = SoFar.second->getSyntaxNode(OB);
if (SN->getKind() != KReferenceType)
break;
auto *RT = static_cast<const ReferenceType *>(SN);
SoFar.second = RT->Pointee;
SoFar.first = std::min(SoFar.first, RT->RK);
+
+ // The middle of Prev is the 'slow' pointer moving at half speed
+ Prev.push_back(SoFar.second);
+ if (Prev.size() > 1 && SoFar.second == Prev[(Prev.size() - 1) / 2]) {
+ // Cycle detected
+ SoFar.second = nullptr;
+ break;
+ }
}
return SoFar;
}
@@ -551,31 +683,35 @@ public:
template<typename Fn> void match(Fn F) const { F(Pointee, RK); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Pointee->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return Pointee->hasRHSComponent(OB);
}
- void printLeft(OutputStream &s) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
- Collapsed.second->printLeft(s);
- if (Collapsed.second->hasArray(s))
- s += " ";
- if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
- s += "(";
+ std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
+ if (!Collapsed.second)
+ return;
+ Collapsed.second->printLeft(OB);
+ if (Collapsed.second->hasArray(OB))
+ OB += " ";
+ if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB))
+ OB += "(";
- s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+ OB += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
}
- void printRight(OutputStream &s) const override {
+ void printRight(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
- if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
- s += ")";
- Collapsed.second->printRight(s);
+ std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
+ if (!Collapsed.second)
+ return;
+ if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB))
+ OB += ")";
+ Collapsed.second->printRight(OB);
}
};
@@ -590,24 +726,24 @@ public:
template<typename Fn> void match(Fn F) const { F(ClassType, MemberType); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return MemberType->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return MemberType->hasRHSComponent(OB);
}
- void printLeft(OutputStream &s) const override {
- MemberType->printLeft(s);
- if (MemberType->hasArray(s) || MemberType->hasFunction(s))
- s += "(";
+ void printLeft(OutputBuffer &OB) const override {
+ MemberType->printLeft(OB);
+ if (MemberType->hasArray(OB) || MemberType->hasFunction(OB))
+ OB += "(";
else
- s += " ";
- ClassType->print(s);
- s += "::*";
+ OB += " ";
+ ClassType->print(OB);
+ OB += "::*";
}
- void printRight(OutputStream &s) const override {
- if (MemberType->hasArray(s) || MemberType->hasFunction(s))
- s += ")";
- MemberType->printRight(s);
+ void printRight(OutputBuffer &OB) const override {
+ if (MemberType->hasArray(OB) || MemberType->hasFunction(OB))
+ OB += ")";
+ MemberType->printRight(OB);
}
};
@@ -624,19 +760,19 @@ public:
template<typename Fn> void match(Fn F) const { F(Base, Dimension); }
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasArraySlow(OutputStream &) const override { return true; }
+ bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+ bool hasArraySlow(OutputBuffer &) const override { return true; }
- void printLeft(OutputStream &S) const override { Base->printLeft(S); }
+ void printLeft(OutputBuffer &OB) const override { Base->printLeft(OB); }
- void printRight(OutputStream &S) const override {
- if (S.back() != ']')
- S += " ";
- S += "[";
+ void printRight(OutputBuffer &OB) const override {
+ if (OB.back() != ']')
+ OB += " ";
+ OB += "[";
if (Dimension)
- Dimension->print(S);
- S += "]";
- Base->printRight(S);
+ Dimension->print(OB);
+ OB += "]";
+ Base->printRight(OB);
}
};
@@ -660,8 +796,8 @@ public:
F(Ret, Params, CVQuals, RefQual, ExceptionSpec);
}
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasFunctionSlow(OutputStream &) const override { return true; }
+ bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+ bool hasFunctionSlow(OutputBuffer &) const override { return true; }
// Handle C++'s ... quirky decl grammar by using the left & right
// distinction. Consider:
@@ -670,32 +806,32 @@ public:
// that takes a char and returns an int. If we're trying to print f, start
// by printing out the return types's left, then print our parameters, then
// finally print right of the return type.
- void printLeft(OutputStream &S) const override {
- Ret->printLeft(S);
- S += " ";
+ void printLeft(OutputBuffer &OB) const override {
+ Ret->printLeft(OB);
+ OB += " ";
}
- void printRight(OutputStream &S) const override {
- S += "(";
- Params.printWithComma(S);
- S += ")";
- Ret->printRight(S);
+ void printRight(OutputBuffer &OB) const override {
+ OB += "(";
+ Params.printWithComma(OB);
+ OB += ")";
+ Ret->printRight(OB);
if (CVQuals & QualConst)
- S += " const";
+ OB += " const";
if (CVQuals & QualVolatile)
- S += " volatile";
+ OB += " volatile";
if (CVQuals & QualRestrict)
- S += " restrict";
+ OB += " restrict";
if (RefQual == FrefQualLValue)
- S += " &";
+ OB += " &";
else if (RefQual == FrefQualRValue)
- S += " &&";
+ OB += " &&";
if (ExceptionSpec != nullptr) {
- S += ' ';
- ExceptionSpec->print(S);
+ OB += ' ';
+ ExceptionSpec->print(OB);
}
}
};
@@ -707,10 +843,10 @@ public:
template<typename Fn> void match(Fn F) const { F(E); }
- void printLeft(OutputStream &S) const override {
- S += "noexcept(";
- E->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "noexcept(";
+ E->print(OB);
+ OB += ")";
}
};
@@ -722,10 +858,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Types); }
- void printLeft(OutputStream &S) const override {
- S += "throw(";
- Types.printWithComma(S);
- S += ')';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "throw(";
+ Types.printWithComma(OB);
+ OB += ')';
}
};
@@ -756,41 +892,41 @@ public:
NodeArray getParams() const { return Params; }
const Node *getReturnType() const { return Ret; }
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasFunctionSlow(OutputStream &) const override { return true; }
+ bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+ bool hasFunctionSlow(OutputBuffer &) const override { return true; }
const Node *getName() const { return Name; }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Ret) {
- Ret->printLeft(S);
- if (!Ret->hasRHSComponent(S))
- S += " ";
+ Ret->printLeft(OB);
+ if (!Ret->hasRHSComponent(OB))
+ OB += " ";
}
- Name->print(S);
+ Name->print(OB);
}
- void printRight(OutputStream &S) const override {
- S += "(";
- Params.printWithComma(S);
- S += ")";
+ void printRight(OutputBuffer &OB) const override {
+ OB += "(";
+ Params.printWithComma(OB);
+ OB += ")";
if (Ret)
- Ret->printRight(S);
+ Ret->printRight(OB);
if (CVQuals & QualConst)
- S += " const";
+ OB += " const";
if (CVQuals & QualVolatile)
- S += " volatile";
+ OB += " volatile";
if (CVQuals & QualRestrict)
- S += " restrict";
+ OB += " restrict";
if (RefQual == FrefQualLValue)
- S += " &";
+ OB += " &";
else if (RefQual == FrefQualRValue)
- S += " &&";
+ OB += " &&";
if (Attrs != nullptr)
- Attrs->print(S);
+ Attrs->print(OB);
}
};
@@ -803,9 +939,9 @@ public:
template<typename Fn> void match(Fn F) const { F(OpName); }
- void printLeft(OutputStream &S) const override {
- S += "operator\"\" ";
- OpName->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "operator\"\" ";
+ OpName->print(OB);
}
};
@@ -819,9 +955,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Special, Child); }
- void printLeft(OutputStream &S) const override {
- S += Special;
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Special;
+ Child->print(OB);
}
};
@@ -836,11 +972,11 @@ public:
template<typename Fn> void match(Fn F) const { F(FirstType, SecondType); }
- void printLeft(OutputStream &S) const override {
- S += "construction vtable for ";
- FirstType->print(S);
- S += "-in-";
- SecondType->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "construction vtable for ";
+ FirstType->print(OB);
+ OB += "-in-";
+ SecondType->print(OB);
}
};
@@ -855,10 +991,10 @@ struct NestedName : Node {
StringView getBaseName() const override { return Name->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- Qual->print(S);
- S += "::";
- Name->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Qual->print(OB);
+ OB += "::";
+ Name->print(OB);
}
};
@@ -871,10 +1007,10 @@ struct LocalName : Node {
template<typename Fn> void match(Fn F) const { F(Encoding, Entity); }
- void printLeft(OutputStream &S) const override {
- Encoding->print(S);
- S += "::";
- Entity->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Encoding->print(OB);
+ OB += "::";
+ Entity->print(OB);
}
};
@@ -891,10 +1027,10 @@ public:
StringView getBaseName() const override { return Name->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- Qualifier->print(S);
- S += "::";
- Name->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Qualifier->print(OB);
+ OB += "::";
+ Name->print(OB);
}
};
@@ -909,12 +1045,12 @@ public:
template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); }
- void printLeft(OutputStream &S) const override {
- BaseType->print(S);
- S += " vector[";
+ void printLeft(OutputBuffer &OB) const override {
+ BaseType->print(OB);
+ OB += " vector[";
if (Dimension)
- Dimension->print(S);
- S += "]";
+ Dimension->print(OB);
+ OB += "]";
}
};
@@ -927,11 +1063,26 @@ public:
template<typename Fn> void match(Fn F) const { F(Dimension); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
// FIXME: This should demangle as "vector pixel".
- S += "pixel vector[";
- Dimension->print(S);
- S += "]";
+ OB += "pixel vector[";
+ Dimension->print(OB);
+ OB += "]";
+ }
+};
+
+class BinaryFPType final : public Node {
+ const Node *Dimension;
+
+public:
+ BinaryFPType(const Node *Dimension_)
+ : Node(KBinaryFPType), Dimension(Dimension_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Dimension); }
+
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "_Float";
+ Dimension->print(OB);
}
};
@@ -953,20 +1104,20 @@ public:
template<typename Fn> void match(Fn F) const { F(Kind, Index); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
switch (Kind) {
case TemplateParamKind::Type:
- S += "$T";
+ OB += "$T";
break;
case TemplateParamKind::NonType:
- S += "$N";
+ OB += "$N";
break;
case TemplateParamKind::Template:
- S += "$TT";
+ OB += "$TT";
break;
}
if (Index > 0)
- S << Index - 1;
+ OB << Index - 1;
}
};
@@ -980,13 +1131,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Name); }
- void printLeft(OutputStream &S) const override {
- S += "typename ";
- }
+ void printLeft(OutputBuffer &OB) const override { OB += "typename "; }
- void printRight(OutputStream &S) const override {
- Name->print(S);
- }
+ void printRight(OutputBuffer &OB) const override { Name->print(OB); }
};
/// A non-type template parameter declaration, 'int N'.
@@ -1000,15 +1147,15 @@ public:
template<typename Fn> void match(Fn F) const { F(Name, Type); }
- void printLeft(OutputStream &S) const override {
- Type->printLeft(S);
- if (!Type->hasRHSComponent(S))
- S += " ";
+ void printLeft(OutputBuffer &OB) const override {
+ Type->printLeft(OB);
+ if (!Type->hasRHSComponent(OB))
+ OB += " ";
}
- void printRight(OutputStream &S) const override {
- Name->print(S);
- Type->printRight(S);
+ void printRight(OutputBuffer &OB) const override {
+ Name->print(OB);
+ Type->printRight(OB);
}
};
@@ -1025,15 +1172,13 @@ public:
template<typename Fn> void match(Fn F) const { F(Name, Params); }
- void printLeft(OutputStream &S) const override {
- S += "template<";
- Params.printWithComma(S);
- S += "> typename ";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "template<";
+ Params.printWithComma(OB);
+ OB += "> typename ";
}
- void printRight(OutputStream &S) const override {
- Name->print(S);
- }
+ void printRight(OutputBuffer &OB) const override { Name->print(OB); }
};
/// A template parameter pack declaration, 'typename ...T'.
@@ -1046,14 +1191,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Param); }
- void printLeft(OutputStream &S) const override {
- Param->printLeft(S);
- S += "...";
+ void printLeft(OutputBuffer &OB) const override {
+ Param->printLeft(OB);
+ OB += "...";
}
- void printRight(OutputStream &S) const override {
- Param->printRight(S);
- }
+ void printRight(OutputBuffer &OB) const override { Param->printRight(OB); }
};
/// An unexpanded parameter pack (either in the expression or type context). If
@@ -1067,11 +1210,11 @@ public:
class ParameterPack final : public Node {
NodeArray Data;
- // Setup OutputStream for a pack expansion unless we're already expanding one.
- void initializePackExpansion(OutputStream &S) const {
- if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
- S.CurrentPackMax = static_cast<unsigned>(Data.size());
- S.CurrentPackIndex = 0;
+ // Setup OutputBuffer for a pack expansion unless we're already expanding one.
+ void initializePackExpansion(OutputBuffer &OB) const {
+ if (OB.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
+ OB.CurrentPackMax = static_cast<unsigned>(Data.size());
+ OB.CurrentPackIndex = 0;
}
}
@@ -1094,38 +1237,38 @@ public:
template<typename Fn> void match(Fn F) const { F(Data); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasRHSComponent(OB);
}
- bool hasArraySlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasArray(S);
+ bool hasArraySlow(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasArray(OB);
}
- bool hasFunctionSlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasFunction(S);
+ bool hasFunctionSlow(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasFunction(OB);
}
- const Node *getSyntaxNode(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
+ const Node *getSyntaxNode(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() ? Data[Idx]->getSyntaxNode(OB) : this;
}
- void printLeft(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
+ void printLeft(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
if (Idx < Data.size())
- Data[Idx]->printLeft(S);
+ Data[Idx]->printLeft(OB);
}
- void printRight(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
+ void printRight(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
if (Idx < Data.size())
- Data[Idx]->printRight(S);
+ Data[Idx]->printRight(OB);
}
};
@@ -1144,8 +1287,8 @@ public:
NodeArray getElements() const { return Elements; }
- void printLeft(OutputStream &S) const override {
- Elements.printWithComma(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Elements.printWithComma(OB);
}
};
@@ -1162,35 +1305,35 @@ public:
const Node *getChild() const { return Child; }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
constexpr unsigned Max = std::numeric_limits<unsigned>::max();
- SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
- SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
- size_t StreamPos = S.getCurrentPosition();
+ SwapAndRestore<unsigned> SavePackIdx(OB.CurrentPackIndex, Max);
+ SwapAndRestore<unsigned> SavePackMax(OB.CurrentPackMax, Max);
+ size_t StreamPos = OB.getCurrentPosition();
// Print the first element in the pack. If Child contains a ParameterPack,
// it will set up S.CurrentPackMax and print the first element.
- Child->print(S);
+ Child->print(OB);
// No ParameterPack was found in Child. This can occur if we've found a pack
// expansion on a <function-param>.
- if (S.CurrentPackMax == Max) {
- S += "...";
+ if (OB.CurrentPackMax == Max) {
+ OB += "...";
return;
}
// We found a ParameterPack, but it has no elements. Erase whatever we may
// of printed.
- if (S.CurrentPackMax == 0) {
- S.setCurrentPosition(StreamPos);
+ if (OB.CurrentPackMax == 0) {
+ OB.setCurrentPosition(StreamPos);
return;
}
// Else, iterate through the rest of the elements in the pack.
- for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
- S += ", ";
- S.CurrentPackIndex = I;
- Child->print(S);
+ for (unsigned I = 1, E = OB.CurrentPackMax; I < E; ++I) {
+ OB += ", ";
+ OB.CurrentPackIndex = I;
+ Child->print(OB);
}
}
};
@@ -1205,12 +1348,12 @@ public:
NodeArray getParams() { return Params; }
- void printLeft(OutputStream &S) const override {
- S += "<";
- Params.printWithComma(S);
- if (S.back() == '>')
- S += " ";
- S += ">";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "<";
+ Params.printWithComma(OB);
+ if (OB.back() == '>')
+ OB += " ";
+ OB += ">";
}
};
@@ -1252,42 +1395,42 @@ struct ForwardTemplateReference : Node {
// special handling.
template<typename Fn> void match(Fn F) const = delete;
- bool hasRHSComponentSlow(OutputStream &S) const override {
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
if (Printing)
return false;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasRHSComponent(S);
+ return Ref->hasRHSComponent(OB);
}
- bool hasArraySlow(OutputStream &S) const override {
+ bool hasArraySlow(OutputBuffer &OB) const override {
if (Printing)
return false;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasArray(S);
+ return Ref->hasArray(OB);
}
- bool hasFunctionSlow(OutputStream &S) const override {
+ bool hasFunctionSlow(OutputBuffer &OB) const override {
if (Printing)
return false;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasFunction(S);
+ return Ref->hasFunction(OB);
}
- const Node *getSyntaxNode(OutputStream &S) const override {
+ const Node *getSyntaxNode(OutputBuffer &OB) const override {
if (Printing)
return this;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->getSyntaxNode(S);
+ return Ref->getSyntaxNode(OB);
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- Ref->printLeft(S);
+ Ref->printLeft(OB);
}
- void printRight(OutputStream &S) const override {
+ void printRight(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- Ref->printRight(S);
+ Ref->printRight(OB);
}
};
@@ -1303,9 +1446,9 @@ struct NameWithTemplateArgs : Node {
StringView getBaseName() const override { return Name->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- Name->print(S);
- TemplateArgs->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Name->print(OB);
+ TemplateArgs->print(OB);
}
};
@@ -1320,9 +1463,9 @@ public:
StringView getBaseName() const override { return Child->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- S += "::";
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "::";
+ Child->print(OB);
}
};
@@ -1335,9 +1478,9 @@ struct StdQualifiedName : Node {
StringView getBaseName() const override { return Child->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- S += "std::";
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "std::";
+ Child->print(OB);
}
};
@@ -1377,26 +1520,26 @@ public:
DEMANGLE_UNREACHABLE;
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
switch (SSK) {
case SpecialSubKind::allocator:
- S += "std::allocator";
+ OB += "std::allocator";
break;
case SpecialSubKind::basic_string:
- S += "std::basic_string";
+ OB += "std::basic_string";
break;
case SpecialSubKind::string:
- S += "std::basic_string<char, std::char_traits<char>, "
- "std::allocator<char> >";
+ OB += "std::basic_string<char, std::char_traits<char>, "
+ "std::allocator<char> >";
break;
case SpecialSubKind::istream:
- S += "std::basic_istream<char, std::char_traits<char> >";
+ OB += "std::basic_istream<char, std::char_traits<char> >";
break;
case SpecialSubKind::ostream:
- S += "std::basic_ostream<char, std::char_traits<char> >";
+ OB += "std::basic_ostream<char, std::char_traits<char> >";
break;
case SpecialSubKind::iostream:
- S += "std::basic_iostream<char, std::char_traits<char> >";
+ OB += "std::basic_iostream<char, std::char_traits<char> >";
break;
}
}
@@ -1429,25 +1572,25 @@ public:
DEMANGLE_UNREACHABLE;
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
switch (SSK) {
case SpecialSubKind::allocator:
- S += "std::allocator";
+ OB += "std::allocator";
break;
case SpecialSubKind::basic_string:
- S += "std::basic_string";
+ OB += "std::basic_string";
break;
case SpecialSubKind::string:
- S += "std::string";
+ OB += "std::string";
break;
case SpecialSubKind::istream:
- S += "std::istream";
+ OB += "std::istream";
break;
case SpecialSubKind::ostream:
- S += "std::ostream";
+ OB += "std::ostream";
break;
case SpecialSubKind::iostream:
- S += "std::iostream";
+ OB += "std::iostream";
break;
}
}
@@ -1465,10 +1608,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsDtor)
- S += "~";
- S += Basename->getBaseName();
+ OB += "~";
+ OB += Basename->getBaseName();
}
};
@@ -1480,9 +1623,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Base); }
- void printLeft(OutputStream &S) const override {
- S += "~";
- Base->printLeft(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "~";
+ Base->printLeft(OB);
}
};
@@ -1494,10 +1637,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Count); }
- void printLeft(OutputStream &S) const override {
- S += "'unnamed";
- S += Count;
- S += "\'";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "'unnamed";
+ OB += Count;
+ OB += "\'";
}
};
@@ -1516,22 +1659,22 @@ public:
F(TemplateParams, Params, Count);
}
- void printDeclarator(OutputStream &S) const {
+ void printDeclarator(OutputBuffer &OB) const {
if (!TemplateParams.empty()) {
- S += "<";
- TemplateParams.printWithComma(S);
- S += ">";
+ OB += "<";
+ TemplateParams.printWithComma(OB);
+ OB += ">";
}
- S += "(";
- Params.printWithComma(S);
- S += ")";
+ OB += "(";
+ Params.printWithComma(OB);
+ OB += ")";
}
- void printLeft(OutputStream &S) const override {
- S += "\'lambda";
- S += Count;
- S += "\'";
- printDeclarator(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "\'lambda";
+ OB += Count;
+ OB += "\'";
+ printDeclarator(OB);
}
};
@@ -1543,10 +1686,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Bindings); }
- void printLeft(OutputStream &S) const override {
- S += '[';
- Bindings.printWithComma(S);
- S += ']';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += '[';
+ Bindings.printWithComma(OB);
+ OB += ']';
}
};
@@ -1564,22 +1707,22 @@ public:
template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
// might be a template argument expression, then we need to disambiguate
// with parens.
if (InfixOperator == ">")
- S += "(";
+ OB += "(";
- S += "(";
- LHS->print(S);
- S += ") ";
- S += InfixOperator;
- S += " (";
- RHS->print(S);
- S += ")";
+ OB += "(";
+ LHS->print(OB);
+ OB += ") ";
+ OB += InfixOperator;
+ OB += " (";
+ RHS->print(OB);
+ OB += ")";
if (InfixOperator == ">")
- S += ")";
+ OB += ")";
}
};
@@ -1593,12 +1736,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Op1, Op2); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Op1->print(S);
- S += ")[";
- Op2->print(S);
- S += "]";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Op1->print(OB);
+ OB += ")[";
+ Op2->print(OB);
+ OB += "]";
}
};
@@ -1612,11 +1755,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Child, Operator); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Child->print(S);
- S += ")";
- S += Operator;
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Child->print(OB);
+ OB += ")";
+ OB += Operator;
}
};
@@ -1631,14 +1774,14 @@ public:
template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Cond->print(S);
- S += ") ? (";
- Then->print(S);
- S += ") : (";
- Else->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Cond->print(OB);
+ OB += ") ? (";
+ Then->print(OB);
+ OB += ") : (";
+ Else->print(OB);
+ OB += ")";
}
};
@@ -1653,10 +1796,10 @@ public:
template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); }
- void printLeft(OutputStream &S) const override {
- LHS->print(S);
- S += Kind;
- RHS->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ LHS->print(OB);
+ OB += Kind;
+ RHS->print(OB);
}
};
@@ -1677,20 +1820,20 @@ public:
F(Type, SubExpr, Offset, UnionSelectors, OnePastTheEnd);
}
- void printLeft(OutputStream &S) const override {
- SubExpr->print(S);
- S += ".<";
- Type->print(S);
- S += " at offset ";
+ void printLeft(OutputBuffer &OB) const override {
+ SubExpr->print(OB);
+ OB += ".<";
+ Type->print(OB);
+ OB += " at offset ";
if (Offset.empty()) {
- S += "0";
+ OB += "0";
} else if (Offset[0] == 'n') {
- S += "-";
- S += Offset.dropFront();
+ OB += "-";
+ OB += Offset.dropFront();
} else {
- S += Offset;
+ OB += Offset;
}
- S += ">";
+ OB += ">";
}
};
@@ -1706,10 +1849,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); }
- void printLeft(OutputStream &S) const override {
- S += Prefix;
- Infix->print(S);
- S += Postfix;
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Prefix;
+ Infix->print(OB);
+ OB += Postfix;
}
};
@@ -1725,13 +1868,13 @@ public:
template<typename Fn> void match(Fn F) const { F(CastKind, To, From); }
- void printLeft(OutputStream &S) const override {
- S += CastKind;
- S += "<";
- To->printLeft(S);
- S += ">(";
- From->printLeft(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += CastKind;
+ OB += "<";
+ To->printLeft(OB);
+ OB += ">(";
+ From->printLeft(OB);
+ OB += ")";
}
};
@@ -1744,11 +1887,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Pack); }
- void printLeft(OutputStream &S) const override {
- S += "sizeof...(";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "sizeof...(";
ParameterPackExpansion PPE(Pack);
- PPE.printLeft(S);
- S += ")";
+ PPE.printLeft(OB);
+ OB += ")";
}
};
@@ -1762,11 +1905,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Callee, Args); }
- void printLeft(OutputStream &S) const override {
- Callee->print(S);
- S += "(";
- Args.printWithComma(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ Callee->print(OB);
+ OB += "(";
+ Args.printWithComma(OB);
+ OB += ")";
}
};
@@ -1787,25 +1930,24 @@ public:
F(ExprList, Type, InitList, IsGlobal, IsArray);
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsGlobal)
- S += "::operator ";
- S += "new";
+ OB += "::operator ";
+ OB += "new";
if (IsArray)
- S += "[]";
- S += ' ';
+ OB += "[]";
+ OB += ' ';
if (!ExprList.empty()) {
- S += "(";
- ExprList.printWithComma(S);
- S += ")";
+ OB += "(";
+ ExprList.printWithComma(OB);
+ OB += ")";
}
- Type->print(S);
+ Type->print(OB);
if (!InitList.empty()) {
- S += "(";
- InitList.printWithComma(S);
- S += ")";
+ OB += "(";
+ InitList.printWithComma(OB);
+ OB += ")";
}
-
}
};
@@ -1820,13 +1962,13 @@ public:
template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsGlobal)
- S += "::";
- S += "delete";
+ OB += "::";
+ OB += "delete";
if (IsArray)
- S += "[] ";
- Op->print(S);
+ OB += "[] ";
+ Op->print(OB);
}
};
@@ -1840,11 +1982,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Prefix, Child); }
- void printLeft(OutputStream &S) const override {
- S += Prefix;
- S += "(";
- Child->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Prefix;
+ OB += "(";
+ Child->print(OB);
+ OB += ")";
}
};
@@ -1856,9 +1998,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Number); }
- void printLeft(OutputStream &S) const override {
- S += "fp";
- S += Number;
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "fp";
+ OB += Number;
}
};
@@ -1872,12 +2014,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Type, Expressions); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Type->print(S);
- S += ")(";
- Expressions.printWithComma(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Type->print(OB);
+ OB += ")(";
+ Expressions.printWithComma(OB);
+ OB += ")";
}
};
@@ -1894,12 +2036,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Type->print(S);
- S += ")(";
- SubExpr->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Type->print(OB);
+ OB += ")(";
+ SubExpr->print(OB);
+ OB += ")";
}
};
@@ -1912,12 +2054,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Ty)
- Ty->print(S);
- S += '{';
- Inits.printWithComma(S);
- S += '}';
+ Ty->print(OB);
+ OB += '{';
+ Inits.printWithComma(OB);
+ OB += '}';
}
};
@@ -1931,18 +2073,18 @@ public:
template<typename Fn> void match(Fn F) const { F(Elem, Init, IsArray); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsArray) {
- S += '[';
- Elem->print(S);
- S += ']';
+ OB += '[';
+ Elem->print(OB);
+ OB += ']';
} else {
- S += '.';
- Elem->print(S);
+ OB += '.';
+ Elem->print(OB);
}
if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
- S += " = ";
- Init->print(S);
+ OB += " = ";
+ Init->print(OB);
}
};
@@ -1956,15 +2098,15 @@ public:
template<typename Fn> void match(Fn F) const { F(First, Last, Init); }
- void printLeft(OutputStream &S) const override {
- S += '[';
- First->print(S);
- S += " ... ";
- Last->print(S);
- S += ']';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += '[';
+ First->print(OB);
+ OB += " ... ";
+ Last->print(OB);
+ OB += ']';
if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
- S += " = ";
- Init->print(S);
+ OB += " = ";
+ Init->print(OB);
}
};
@@ -1983,43 +2125,43 @@ public:
F(IsLeftFold, OperatorName, Pack, Init);
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
auto PrintPack = [&] {
- S += '(';
- ParameterPackExpansion(Pack).print(S);
- S += ')';
+ OB += '(';
+ ParameterPackExpansion(Pack).print(OB);
+ OB += ')';
};
- S += '(';
+ OB += '(';
if (IsLeftFold) {
// init op ... op pack
if (Init != nullptr) {
- Init->print(S);
- S += ' ';
- S += OperatorName;
- S += ' ';
+ Init->print(OB);
+ OB += ' ';
+ OB += OperatorName;
+ OB += ' ';
}
// ... op pack
- S += "... ";
- S += OperatorName;
- S += ' ';
+ OB += "... ";
+ OB += OperatorName;
+ OB += ' ';
PrintPack();
} else { // !IsLeftFold
// pack op ...
PrintPack();
- S += ' ';
- S += OperatorName;
- S += " ...";
+ OB += ' ';
+ OB += OperatorName;
+ OB += " ...";
// pack op ... op init
if (Init != nullptr) {
- S += ' ';
- S += OperatorName;
- S += ' ';
- Init->print(S);
+ OB += ' ';
+ OB += OperatorName;
+ OB += ' ';
+ Init->print(OB);
}
}
- S += ')';
+ OB += ')';
}
};
@@ -2031,9 +2173,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Op); }
- void printLeft(OutputStream &S) const override {
- S += "throw ";
- Op->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "throw ";
+ Op->print(OB);
}
};
@@ -2045,8 +2187,8 @@ public:
template<typename Fn> void match(Fn F) const { F(Value); }
- void printLeft(OutputStream &S) const override {
- S += Value ? StringView("true") : StringView("false");
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Value ? StringView("true") : StringView("false");
}
};
@@ -2058,10 +2200,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Type); }
- void printLeft(OutputStream &S) const override {
- S += "\"<";
- Type->print(S);
- S += ">\"";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "\"<";
+ Type->print(OB);
+ OB += ">\"";
}
};
@@ -2073,11 +2215,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Type); }
- void printLeft(OutputStream &S) const override {
- S += "[]";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "[]";
if (Type->getKind() == KClosureTypeName)
- static_cast<const ClosureTypeName *>(Type)->printDeclarator(S);
- S += "{...}";
+ static_cast<const ClosureTypeName *>(Type)->printDeclarator(OB);
+ OB += "{...}";
}
};
@@ -2092,15 +2234,15 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
- void printLeft(OutputStream &S) const override {
- S << "(";
- Ty->print(S);
- S << ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB << "(";
+ Ty->print(OB);
+ OB << ")";
if (Integer[0] == 'n')
- S << "-" << Integer.dropFront(1);
+ OB << "-" << Integer.dropFront(1);
else
- S << Integer;
+ OB << Integer;
}
};
@@ -2114,21 +2256,21 @@ public:
template<typename Fn> void match(Fn F) const { F(Type, Value); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Type.size() > 3) {
- S += "(";
- S += Type;
- S += ")";
+ OB += "(";
+ OB += Type;
+ OB += ")";
}
if (Value[0] == 'n') {
- S += "-";
- S += Value.dropFront(1);
+ OB += "-";
+ OB += Value.dropFront(1);
} else
- S += Value;
+ OB += Value;
if (Type.size() <= 3)
- S += Type;
+ OB += Type;
}
};
@@ -2158,7 +2300,7 @@ public:
template<typename Fn> void match(Fn F) const { F(Contents); }
- void printLeft(OutputStream &s) const override {
+ void printLeft(OutputBuffer &OB) const override {
const char *first = Contents.begin();
const char *last = Contents.end() + 1;
@@ -2184,7 +2326,7 @@ public:
#endif
char num[FloatData<Float>::max_demangled_size] = {0};
int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
- s += StringView(num, num + n);
+ OB += StringView(num, num + n);
}
}
};
@@ -2217,125 +2359,6 @@ FOR_EACH_NODE_KIND(SPECIALIZATION)
#undef FOR_EACH_NODE_KIND
-template <class T, size_t N>
-class PODSmallVector {
- static_assert(std::is_pod<T>::value,
- "T is required to be a plain old data type");
-
- T* First = nullptr;
- T* Last = nullptr;
- T* Cap = nullptr;
- T Inline[N] = {0};
-
- bool isInline() const { return First == Inline; }
-
- void clearInline() {
- First = Inline;
- Last = Inline;
- Cap = Inline + N;
- }
-
- void reserve(size_t NewCap) {
- size_t S = size();
- if (isInline()) {
- auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
- if (Tmp == nullptr)
- std::terminate();
- std::copy(First, Last, Tmp);
- First = Tmp;
- } else {
- First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
- if (First == nullptr)
- std::terminate();
- }
- Last = First + S;
- Cap = First + NewCap;
- }
-
-public:
- PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
-
- PODSmallVector(const PODSmallVector&) = delete;
- PODSmallVector& operator=(const PODSmallVector&) = delete;
-
- PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
- if (Other.isInline()) {
- std::copy(Other.begin(), Other.end(), First);
- Last = First + Other.size();
- Other.clear();
- return;
- }
-
- First = Other.First;
- Last = Other.Last;
- Cap = Other.Cap;
- Other.clearInline();
- }
-
- PODSmallVector& operator=(PODSmallVector&& Other) {
- if (Other.isInline()) {
- if (!isInline()) {
- std::free(First);
- clearInline();
- }
- std::copy(Other.begin(), Other.end(), First);
- Last = First + Other.size();
- Other.clear();
- return *this;
- }
-
- if (isInline()) {
- First = Other.First;
- Last = Other.Last;
- Cap = Other.Cap;
- Other.clearInline();
- return *this;
- }
-
- std::swap(First, Other.First);
- std::swap(Last, Other.Last);
- std::swap(Cap, Other.Cap);
- Other.clear();
- return *this;
- }
-
- void push_back(const T& Elem) {
- if (Last == Cap)
- reserve(size() * 2);
- *Last++ = Elem;
- }
-
- void pop_back() {
- assert(Last != First && "Popping empty vector!");
- --Last;
- }
-
- void dropBack(size_t Index) {
- assert(Index <= size() && "dropBack() can't expand!");
- Last = First + Index;
- }
-
- T* begin() { return First; }
- T* end() { return Last; }
-
- bool empty() const { return First == Last; }
- size_t size() const { return static_cast<size_t>(Last - First); }
- T& back() {
- assert(Last != First && "Calling back() on empty vector!");
- return *(Last - 1);
- }
- T& operator[](size_t Index) {
- assert(Index < size() && "Invalid access!");
- return *(begin() + Index);
- }
- void clear() { Last = First; }
-
- ~PODSmallVector() {
- if (!isInline())
- std::free(First);
- }
-};
-
template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
const char *Last;
@@ -3884,6 +3907,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
case 'h':
First += 2;
return make<NameType>("half");
+ // ::= DF <number> _ # ISO/IEC TS 18661 binary floating point (N bits)
+ case 'F': {
+ First += 2;
+ Node *DimensionNumber = make<NameType>(parseNumber());
+ if (!DimensionNumber)
+ return nullptr;
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<BinaryFPType>(DimensionNumber);
+ }
// ::= Di # char32_t
case 'i':
First += 2;
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 77446e9b0f07..46daa3885a06 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -21,11 +21,11 @@
namespace llvm {
namespace itanium_demangle {
-class OutputStream;
+class OutputBuffer;
}
}
-using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::OutputBuffer;
using llvm::itanium_demangle::StringView;
namespace llvm {
@@ -80,6 +80,7 @@ enum OutputFlags {
OF_NoAccessSpecifier = 4,
OF_NoMemberType = 8,
OF_NoReturnType = 16,
+ OF_NoVariableType = 32,
};
// Types
@@ -261,7 +262,7 @@ struct Node {
NodeKind kind() const { return Kind; }
- virtual void output(OutputStream &OS, OutputFlags Flags) const = 0;
+ virtual void output(OutputBuffer &OB, OutputFlags Flags) const = 0;
std::string toString(OutputFlags Flags = OF_Default) const;
@@ -300,12 +301,12 @@ struct SpecialTableSymbolNode;
struct TypeNode : public Node {
explicit TypeNode(NodeKind K) : Node(K) {}
- virtual void outputPre(OutputStream &OS, OutputFlags Flags) const = 0;
- virtual void outputPost(OutputStream &OS, OutputFlags Flags) const = 0;
+ virtual void outputPre(OutputBuffer &OB, OutputFlags Flags) const = 0;
+ virtual void outputPost(OutputBuffer &OB, OutputFlags Flags) const = 0;
- void output(OutputStream &OS, OutputFlags Flags) const override {
- outputPre(OS, Flags);
- outputPost(OS, Flags);
+ void output(OutputBuffer &OB, OutputFlags Flags) const override {
+ outputPre(OB, Flags);
+ outputPost(OB, Flags);
}
Qualifiers Quals = Q_None;
@@ -315,8 +316,8 @@ struct PrimitiveTypeNode : public TypeNode {
explicit PrimitiveTypeNode(PrimitiveKind K)
: TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override {}
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override {}
PrimitiveKind PrimKind;
};
@@ -325,8 +326,8 @@ struct FunctionSignatureNode : public TypeNode {
explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {}
FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
// Valid if this FunctionTypeNode is the Pointee of a PointerType or
// MemberPointerType.
@@ -359,13 +360,13 @@ struct IdentifierNode : public Node {
NodeArrayNode *TemplateParams = nullptr;
protected:
- void outputTemplateParameters(OutputStream &OS, OutputFlags Flags) const;
+ void outputTemplateParameters(OutputBuffer &OB, OutputFlags Flags) const;
};
struct VcallThunkIdentifierNode : public IdentifierNode {
VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
uint64_t OffsetInVTable = 0;
};
@@ -374,7 +375,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
DynamicStructorIdentifierNode()
: IdentifierNode(NodeKind::DynamicStructorIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
VariableSymbolNode *Variable = nullptr;
QualifiedNameNode *Name = nullptr;
@@ -384,7 +385,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
struct NamedIdentifierNode : public IdentifierNode {
NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StringView Name;
};
@@ -394,7 +395,7 @@ struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
: IdentifierNode(NodeKind::IntrinsicFunctionIdentifier),
Operator(Operator) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
IntrinsicFunctionKind Operator;
};
@@ -403,7 +404,7 @@ struct LiteralOperatorIdentifierNode : public IdentifierNode {
LiteralOperatorIdentifierNode()
: IdentifierNode(NodeKind::LiteralOperatorIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StringView Name;
};
@@ -412,7 +413,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
LocalStaticGuardIdentifierNode()
: IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
bool IsThread = false;
uint32_t ScopeIndex = 0;
@@ -422,7 +423,7 @@ struct ConversionOperatorIdentifierNode : public IdentifierNode {
ConversionOperatorIdentifierNode()
: IdentifierNode(NodeKind::ConversionOperatorIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
// The type that this operator converts too.
TypeNode *TargetType = nullptr;
@@ -434,7 +435,7 @@ struct StructorIdentifierNode : public IdentifierNode {
: IdentifierNode(NodeKind::StructorIdentifier),
IsDestructor(IsDestructor) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
// The name of the class that this is a structor of.
IdentifierNode *Class = nullptr;
@@ -444,8 +445,8 @@ struct StructorIdentifierNode : public IdentifierNode {
struct ThunkSignatureNode : public FunctionSignatureNode {
ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
struct ThisAdjustor {
uint32_t StaticOffset = 0;
@@ -459,8 +460,8 @@ struct ThunkSignatureNode : public FunctionSignatureNode {
struct PointerTypeNode : public TypeNode {
PointerTypeNode() : TypeNode(NodeKind::PointerType) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
// Is this a pointer, reference, or rvalue-reference?
PointerAffinity Affinity = PointerAffinity::None;
@@ -476,8 +477,8 @@ struct PointerTypeNode : public TypeNode {
struct TagTypeNode : public TypeNode {
explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
QualifiedNameNode *QualifiedName = nullptr;
TagKind Tag;
@@ -486,11 +487,11 @@ struct TagTypeNode : public TypeNode {
struct ArrayTypeNode : public TypeNode {
ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
- void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const;
- void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const;
+ void outputDimensionsImpl(OutputBuffer &OB, OutputFlags Flags) const;
+ void outputOneDimension(OutputBuffer &OB, OutputFlags Flags, Node *N) const;
// A list of array dimensions. e.g. [3,4,5] in `int Foo[3][4][5]`
NodeArrayNode *Dimensions = nullptr;
@@ -501,14 +502,14 @@ struct ArrayTypeNode : public TypeNode {
struct IntrinsicNode : public TypeNode {
IntrinsicNode() : TypeNode(NodeKind::IntrinsicType) {}
- void output(OutputStream &OS, OutputFlags Flags) const override {}
+ void output(OutputBuffer &OB, OutputFlags Flags) const override {}
};
struct CustomTypeNode : public TypeNode {
CustomTypeNode() : TypeNode(NodeKind::Custom) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
IdentifierNode *Identifier = nullptr;
};
@@ -516,9 +517,9 @@ struct CustomTypeNode : public TypeNode {
struct NodeArrayNode : public Node {
NodeArrayNode() : Node(NodeKind::NodeArray) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
- void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
+ void output(OutputBuffer &OB, OutputFlags Flags, StringView Separator) const;
Node **Nodes = nullptr;
size_t Count = 0;
@@ -527,7 +528,7 @@ struct NodeArrayNode : public Node {
struct QualifiedNameNode : public Node {
QualifiedNameNode() : Node(NodeKind::QualifiedName) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
NodeArrayNode *Components = nullptr;
@@ -541,7 +542,7 @@ struct TemplateParameterReferenceNode : public Node {
TemplateParameterReferenceNode()
: Node(NodeKind::TemplateParameterReference) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
SymbolNode *Symbol = nullptr;
@@ -556,7 +557,7 @@ struct IntegerLiteralNode : public Node {
IntegerLiteralNode(uint64_t Value, bool IsNegative)
: Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
uint64_t Value = 0;
bool IsNegative = false;
@@ -566,7 +567,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
RttiBaseClassDescriptorNode()
: IdentifierNode(NodeKind::RttiBaseClassDescriptor) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
uint32_t NVOffset = 0;
int32_t VBPtrOffset = 0;
@@ -576,7 +577,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
struct SymbolNode : public Node {
explicit SymbolNode(NodeKind K) : Node(K) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
QualifiedNameNode *Name = nullptr;
};
@@ -584,7 +585,7 @@ struct SpecialTableSymbolNode : public SymbolNode {
explicit SpecialTableSymbolNode()
: SymbolNode(NodeKind::SpecialTableSymbol) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
QualifiedNameNode *TargetName = nullptr;
Qualifiers Quals = Qualifiers::Q_None;
};
@@ -593,7 +594,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
LocalStaticGuardVariableNode()
: SymbolNode(NodeKind::LocalStaticGuardVariable) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
bool IsVisible = false;
};
@@ -601,7 +602,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
struct EncodedStringLiteralNode : public SymbolNode {
EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StringView DecodedString;
bool IsTruncated = false;
@@ -611,7 +612,7 @@ struct EncodedStringLiteralNode : public SymbolNode {
struct VariableSymbolNode : public SymbolNode {
VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StorageClass SC = StorageClass::None;
TypeNode *Type = nullptr;
@@ -620,7 +621,7 @@ struct VariableSymbolNode : public SymbolNode {
struct FunctionSymbolNode : public SymbolNode {
FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
FunctionSignatureNode *Signature = nullptr;
};
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 04ff65a35aed..4fea9351a4bf 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -24,7 +24,7 @@ DEMANGLE_NAMESPACE_BEGIN
// Stream that AST nodes write their string representation into after the AST
// has been parsed.
-class OutputStream {
+class OutputBuffer {
char *Buffer = nullptr;
size_t CurrentPosition = 0;
size_t BufferCapacity = 0;
@@ -63,9 +63,9 @@ class OutputStream {
}
public:
- OutputStream(char *StartBuf, size_t Size)
+ OutputBuffer(char *StartBuf, size_t Size)
: Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {}
- OutputStream() = default;
+ OutputBuffer() = default;
void reset(char *Buffer_, size_t BufferCapacity_) {
CurrentPosition = 0;
Buffer = Buffer_;
@@ -77,7 +77,7 @@ public:
unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
- OutputStream &operator+=(StringView R) {
+ OutputBuffer &operator+=(StringView R) {
size_t Size = R.size();
if (Size == 0)
return *this;
@@ -87,17 +87,28 @@ public:
return *this;
}
- OutputStream &operator+=(char C) {
+ OutputBuffer &operator+=(char C) {
grow(1);
Buffer[CurrentPosition++] = C;
return *this;
}
- OutputStream &operator<<(StringView R) { return (*this += R); }
+ OutputBuffer &operator<<(StringView R) { return (*this += R); }
- OutputStream &operator<<(char C) { return (*this += C); }
+ OutputBuffer prepend(StringView R) {
+ size_t Size = R.size();
+
+ grow(Size);
+ std::memmove(Buffer + Size, Buffer, CurrentPosition);
+ std::memcpy(Buffer, R.begin(), Size);
+ CurrentPosition += Size;
- OutputStream &operator<<(long long N) {
+ return *this;
+ }
+
+ OutputBuffer &operator<<(char C) { return (*this += C); }
+
+ OutputBuffer &operator<<(long long N) {
if (N < 0)
writeUnsigned(static_cast<unsigned long long>(-N), true);
else
@@ -105,27 +116,37 @@ public:
return *this;
}
- OutputStream &operator<<(unsigned long long N) {
+ OutputBuffer &operator<<(unsigned long long N) {
writeUnsigned(N, false);
return *this;
}
- OutputStream &operator<<(long N) {
+ OutputBuffer &operator<<(long N) {
return this->operator<<(static_cast<long long>(N));
}
- OutputStream &operator<<(unsigned long N) {
+ OutputBuffer &operator<<(unsigned long N) {
return this->operator<<(static_cast<unsigned long long>(N));
}
- OutputStream &operator<<(int N) {
+ OutputBuffer &operator<<(int N) {
return this->operator<<(static_cast<long long>(N));
}
- OutputStream &operator<<(unsigned int N) {
+ OutputBuffer &operator<<(unsigned int N) {
return this->operator<<(static_cast<unsigned long long>(N));
}
+ void insert(size_t Pos, const char *S, size_t N) {
+ assert(Pos <= CurrentPosition);
+ if (N == 0)
+ return;
+ grow(N);
+ std::memmove(Buffer + Pos + N, Buffer + Pos, CurrentPosition - Pos);
+ std::memcpy(Buffer + Pos, S, N);
+ CurrentPosition += N;
+ }
+
size_t getCurrentPosition() const { return CurrentPosition; }
void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; }
@@ -171,7 +192,7 @@ public:
SwapAndRestore &operator=(const SwapAndRestore &) = delete;
};
-inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
+inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB,
size_t InitSize) {
size_t BufferSize;
if (Buf == nullptr) {
@@ -182,7 +203,7 @@ inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
} else
BufferSize = *N;
- S.reset(Buf, BufferSize);
+ OB.reset(Buf, BufferSize);
return true;
}
diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 2e386518f0bf..43c91fb5f988 100644
--- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -21,7 +21,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/Binary.h"
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
new file mode 100644
index 000000000000..50eb598139ea
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
@@ -0,0 +1,39 @@
+//===--- ELF_aarch64.h - JIT link functions for ELF/aarch64 --*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for ELF/aarch64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Create a LinkGraph from an ELF/aarch64 relocatable object
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer);
+
+/// jit-link the given object buffer, which must be a ELF aarch64 relocatable
+/// object file.
+void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
index 1339ab51cbb9..5a8b186a2c3e 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
@@ -35,4 +35,4 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
} // end namespace jitlink
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV64_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
index d8ed953363e6..f5fa9e96c594 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
@@ -21,29 +21,17 @@ namespace jitlink {
namespace ELF_x86_64_Edges {
enum ELFX86RelocationKind : Edge::Kind {
Branch32 = Edge::FirstRelocation,
- Branch32ToStub,
- Pointer32,
+ Pointer32Signed,
Pointer64,
- Pointer64Anon,
PCRel32,
- PCRel64,
- PCRel32Minus1,
- PCRel32Minus2,
- PCRel32Minus4,
- PCRel32Anon,
- PCRel32Minus1Anon,
- PCRel32Minus2Anon,
- PCRel32Minus4Anon,
PCRel32GOTLoad,
- PCRel32GOT,
+ PCRel32GOTLoadRelaxable,
+ PCRel32REXGOTLoadRelaxable,
+ PCRel32TLV,
PCRel64GOT,
GOTOFF64,
GOT64,
- PCRel32TLV,
- Delta32,
Delta64,
- NegDelta32,
- NegDelta64,
};
} // end namespace ELF_x86_64_Edges
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 6162a675ec12..83d85953fce6 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -13,19 +13,19 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
#define LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
-#include "JITLinkMemoryManager.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include <map>
@@ -225,7 +225,7 @@ public:
/// Get the content for this block. Block must not be a zero-fill block.
ArrayRef<char> getContent() const {
- assert(Data && "Section does not contain content");
+ assert(Data && "Block does not contain content");
return ArrayRef<char>(Data, Size);
}
@@ -233,6 +233,7 @@ public:
/// Caller is responsible for ensuring the underlying bytes are not
/// deallocated while pointed to by this block.
void setContent(ArrayRef<char> Content) {
+ assert(Content.data() && "Setting null content");
Data = Content.data();
Size = Content.size();
ContentMutable = false;
@@ -251,6 +252,7 @@ public:
/// to call this on a block with immutable content -- consider using
/// getMutableContent instead.
MutableArrayRef<char> getAlreadyMutableContent() {
+ assert(Data && "Block does not contain content");
assert(ContentMutable && "Content is not mutable");
return MutableArrayRef<char>(const_cast<char *>(Data), Size);
}
@@ -260,6 +262,7 @@ public:
/// The caller is responsible for ensuring that the memory pointed to by
/// MutableContent is not deallocated while pointed to by this block.
void setMutableContent(MutableArrayRef<char> MutableContent) {
+ assert(MutableContent.data() && "Setting null content");
Data = MutableContent.data();
Size = MutableContent.size();
ContentMutable = true;
@@ -295,6 +298,7 @@ public:
/// Add an edge to this block.
void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target,
Edge::AddendT Addend) {
+ assert(!isZeroFill() && "Adding edge to zero-fill block?");
Edges.push_back(Edge(K, Offset, Target, Addend));
}
@@ -339,6 +343,12 @@ private:
std::vector<Edge> Edges;
};
+// Align a JITTargetAddress to conform with block alignment requirements.
+inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) {
+ uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
+ return Addr + Delta;
+}
+
/// Describes symbol linkage. This can be used to make resolve definition
/// clashes.
enum class Linkage : uint8_t {
@@ -640,8 +650,7 @@ class Section {
friend class LinkGraph;
private:
- Section(StringRef Name, sys::Memory::ProtectionFlags Prot,
- SectionOrdinal SecOrdinal)
+ Section(StringRef Name, MemProt Prot, SectionOrdinal SecOrdinal)
: Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {}
using SymbolSet = DenseSet<Symbol *>;
@@ -666,12 +675,16 @@ public:
StringRef getName() const { return Name; }
/// Returns the protection flags for this section.
- sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
+ MemProt getMemProt() const { return Prot; }
/// Set the protection flags for this section.
- void setProtectionFlags(sys::Memory::ProtectionFlags Prot) {
- this->Prot = Prot;
- }
+ void setMemProt(MemProt Prot) { this->Prot = Prot; }
+
+ /// Get the deallocation policy for this section.
+ MemDeallocPolicy getMemDeallocPolicy() const { return MDP; }
+
+ /// Set the deallocation policy for this section.
+ void setMemDeallocPolicy(MemDeallocPolicy MDP) { this->MDP = MDP; }
/// Returns the ordinal for this section.
SectionOrdinal getOrdinal() const { return SecOrdinal; }
@@ -686,6 +699,7 @@ public:
return make_range(Blocks.begin(), Blocks.end());
}
+ /// Returns the number of blocks in this section.
BlockSet::size_type blocks_size() const { return Blocks.size(); }
/// Returns an iterator over the symbols defined in this section.
@@ -734,7 +748,8 @@ private:
}
StringRef Name;
- sys::Memory::ProtectionFlags Prot;
+ MemProt Prot;
+ MemDeallocPolicy MDP = MemDeallocPolicy::Standard;
SectionOrdinal SecOrdinal = 0;
BlockSet Blocks;
SymbolSet Symbols;
@@ -916,6 +931,11 @@ public:
: Name(std::move(Name)), TT(TT), PointerSize(PointerSize),
Endianness(Endianness), GetEdgeKindName(std::move(GetEdgeKindName)) {}
+ LinkGraph(const LinkGraph &) = delete;
+ LinkGraph &operator=(const LinkGraph &) = delete;
+ LinkGraph(LinkGraph &&) = delete;
+ LinkGraph &operator=(LinkGraph &&) = delete;
+
/// Returns the name of this graph (usually the name of the original
/// underlying MemoryBuffer).
const std::string &getName() const { return Name; }
@@ -962,7 +982,7 @@ public:
}
/// Create a section with the given name, protection flags, and alignment.
- Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) {
+ Section &createSection(StringRef Name, MemProt Prot) {
assert(llvm::find_if(Sections,
[&](std::unique_ptr<Section> &Sec) {
return Sec->getName() == Name;
@@ -1100,10 +1120,10 @@ public:
Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset,
StringRef Name, JITTargetAddress Size, Linkage L,
Scope S, bool IsCallable, bool IsLive) {
- assert(llvm::count_if(defined_symbols(),
- [&](const Symbol *Sym) {
- return Sym->getName() == Name;
- }) == 0 &&
+ assert((S == Scope::Local || llvm::count_if(defined_symbols(),
+ [&](const Symbol *Sym) {
+ return Sym->getName() == Name;
+ }) == 0) &&
"Duplicate defined symbol");
auto &Sym =
Symbol::constructNamedDef(Allocator.Allocate<Symbol>(), Content, Offset,
@@ -1237,6 +1257,7 @@ public:
void transferDefinedSymbol(Symbol &Sym, Block &DestBlock,
JITTargetAddress NewOffset,
Optional<JITTargetAddress> ExplicitNewSize) {
+ auto &OldSection = Sym.getBlock().getSection();
Sym.setBlock(DestBlock);
Sym.setOffset(NewOffset);
if (ExplicitNewSize)
@@ -1246,6 +1267,10 @@ public:
if (Sym.getSize() > RemainingBlockSize)
Sym.setSize(RemainingBlockSize);
}
+ if (&DestBlock.getSection() != &OldSection) {
+ OldSection.removeSymbol(Sym);
+ DestBlock.getSection().addSymbol(Sym);
+ }
}
/// Transfers the given Block and all Symbols pointing to it to the given
@@ -1280,6 +1305,8 @@ public:
bool PreserveSrcSection = false) {
if (&DstSection == &SrcSection)
return;
+ for (auto *B : SrcSection.blocks())
+ B->setSection(DstSection);
SrcSection.transferContentTo(DstSection);
if (!PreserveSrcSection)
removeSection(SrcSection);
@@ -1345,6 +1372,13 @@ public:
Sections.erase(I);
}
+ /// Accessor for the AllocActions object for this graph. This can be used to
+ /// register allocation action calls prior to finalization.
+ ///
+ /// Accessing this object after finalization will result in undefined
+ /// behavior.
+ JITLinkMemoryManager::AllocActions &allocActions() { return AAs; }
+
/// Dump the graph.
void dump(raw_ostream &OS);
@@ -1361,6 +1395,7 @@ private:
SectionList Sections;
ExternalSymbolSet ExternalSymbols;
ExternalSymbolSet AbsoluteSymbols;
+ JITLinkMemoryManager::AllocActions AAs;
};
inline MutableArrayRef<char> Block::getMutableContent(LinkGraph &G) {
@@ -1650,8 +1685,7 @@ public:
/// finalized (i.e. emitted to memory and memory permissions set). If all of
/// this objects dependencies have also been finalized then the code is ready
/// to run.
- virtual void
- notifyFinalized(std::unique_ptr<JITLinkMemoryManager::Allocation> A) = 0;
+ virtual void notifyFinalized(JITLinkMemoryManager::FinalizedAlloc Alloc) = 0;
/// Called by JITLink prior to linking to determine whether default passes for
/// the target should be added. The default implementation returns true.
@@ -1683,6 +1717,36 @@ Error markAllSymbolsLive(LinkGraph &G);
Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B,
const Edge &E);
+/// Base case for edge-visitors where the visitor-list is empty.
+inline void visitEdge(LinkGraph &G, Block *B, Edge &E) {}
+
+/// Applies the first visitor in the list to the given edge. If the visitor's
+/// visitEdge method returns true then we return immediately, otherwise we
+/// apply the next visitor.
+template <typename VisitorT, typename... VisitorTs>
+void visitEdge(LinkGraph &G, Block *B, Edge &E, VisitorT &&V,
+ VisitorTs &&...Vs) {
+ if (!V.visitEdge(G, B, E))
+ visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...);
+}
+
+/// For each edge in the given graph, apply a list of visitors to the edge,
+/// stopping when the first visitor's visitEdge method returns true.
+///
+/// Only visits edges that were in the graph at call time: if any visitor
+/// adds new edges those will not be visited. Visitors are not allowed to
+/// remove edges (though they can change their kind, target, and addend).
+template <typename... VisitorTs>
+void visitExistingEdges(LinkGraph &G, VisitorTs &&...Vs) {
+ // We may add new blocks during this process, but we don't want to iterate
+ // over them, so build a worklist.
+ std::vector<Block *> Worklist(G.blocks().begin(), G.blocks().end());
+
+ for (auto *B : Worklist)
+ for (auto &E : B->edges())
+ visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...);
+}
+
/// Create a LinkGraph from the given object buffer.
///
/// Note: The graph does not take ownership of the underlying buffer, nor copy
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index cee7d6b09c48..62c271dfc0b2 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -13,106 +13,416 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/RecyclingAllocator.h"
#include <cstdint>
#include <future>
+#include <mutex>
namespace llvm {
namespace jitlink {
+class Block;
+class LinkGraph;
+class Section;
+
/// Manages allocations of JIT memory.
///
/// Instances of this class may be accessed concurrently from multiple threads
/// and their implemetations should include any necessary synchronization.
class JITLinkMemoryManager {
public:
- using ProtectionFlags = sys::Memory::ProtectionFlags;
+ /// Represents a call to a graph-memory-management support function in the
+ /// executor.
+ ///
+ /// Support functions are called as:
+ ///
+ /// auto *Result =
+ /// ((char*(*)(const void*, size_t))FnAddr)(
+ /// (const void*)CtxAddr, (size_t)CtxSize)
+ ///
+ /// A null result is interpreted as success.
+ ///
+ /// A non-null result is interpreted as a heap-allocated string containing
+ /// an error message to report to the allocator (the allocator's
+ /// executor-side implementation code is responsible for freeing the error
+ /// string).
+ struct AllocActionCall {
+ JITTargetAddress FnAddr = 0;
+ JITTargetAddress CtxAddr = 0;
+ JITTargetAddress CtxSize = 0;
+ };
+
+ /// A pair of AllocActionCalls, one to be run at finalization time, one to be
+ /// run at deallocation time.
+ ///
+ /// AllocActionCallPairs should be constructed for paired operations (e.g.
+ /// __register_ehframe and __deregister_ehframe for eh-frame registration).
+ /// See comments for AllocActions for execution ordering.
+ ///
+ /// For unpaired operations one or the other member can be left unused, as
+ /// AllocationActionCalls with an FnAddr of zero will be skipped.
+ struct AllocActionCallPair {
+ AllocActionCall Finalize;
+ AllocActionCall Dealloc;
+ };
+
+ /// A vector of allocation actions to be run for this allocation.
+ ///
+ /// Finalize allocations will be run in order at finalize time. Dealloc
+ /// actions will be run in reverse order at deallocation time.
+ using AllocActions = std::vector<AllocActionCallPair>;
+
+ /// Represents a finalized allocation.
+ ///
+ /// Finalized allocations must be passed to the
+ /// JITLinkMemoryManager:deallocate method prior to being destroyed.
+ ///
+ /// The interpretation of the Address associated with the finalized allocation
+ /// is up to the memory manager implementation. Common options are using the
+ /// base address of the allocation, or the address of a memory management
+ /// object that tracks the allocation.
+ class FinalizedAlloc {
+ friend class JITLinkMemoryManager;
- class SegmentRequest {
public:
- SegmentRequest() = default;
- SegmentRequest(uint64_t Alignment, size_t ContentSize,
- uint64_t ZeroFillSize)
- : Alignment(Alignment), ContentSize(ContentSize),
- ZeroFillSize(ZeroFillSize) {
- assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2");
+ static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0);
+
+ FinalizedAlloc() = default;
+ explicit FinalizedAlloc(JITTargetAddress A) : A(A) {
+ assert(A != 0 && "Explicitly creating an invalid allocation?");
+ }
+ FinalizedAlloc(const FinalizedAlloc &) = delete;
+ FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) {
+ Other.A = InvalidAddr;
+ }
+ FinalizedAlloc &operator=(const FinalizedAlloc &) = delete;
+ FinalizedAlloc &operator=(FinalizedAlloc &&Other) {
+ assert(A == InvalidAddr &&
+ "Cannot overwrite active finalized allocation");
+ std::swap(A, Other.A);
+ return *this;
+ }
+ ~FinalizedAlloc() {
+ assert(A == InvalidAddr && "Finalized allocation was not deallocated");
+ }
+
+ /// FinalizedAllocs convert to false for default-constructed, and
+ /// true otherwise. Default-constructed allocs need not be deallocated.
+ explicit operator bool() const { return A != InvalidAddr; }
+
+ /// Returns the address associated with this finalized allocation.
+ /// The allocation is unmodified.
+ JITTargetAddress getAddress() const { return A; }
+
+ /// Returns the address associated with this finalized allocation and
+ /// resets this object to the default state.
+ /// This should only be used by allocators when deallocating memory.
+ JITTargetAddress release() {
+ JITTargetAddress Tmp = A;
+ A = InvalidAddr;
+ return Tmp;
}
- uint64_t getAlignment() const { return Alignment; }
- size_t getContentSize() const { return ContentSize; }
- uint64_t getZeroFillSize() const { return ZeroFillSize; }
+
private:
- uint64_t Alignment = 0;
- size_t ContentSize = 0;
- uint64_t ZeroFillSize = 0;
+ JITTargetAddress A = InvalidAddr;
};
- using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>;
-
- /// Represents an allocation created by the memory manager.
+ /// Represents an allocation which has not been finalized yet.
///
- /// An allocation object is responsible for allocating and owning jit-linker
- /// working and target memory, and for transfering from working to target
- /// memory.
+ /// InFlightAllocs manage both executor memory allocations and working
+ /// memory allocations.
///
- class Allocation {
+ /// On finalization, the InFlightAlloc should transfer the content of
+ /// working memory into executor memory, apply memory protections, and
+ /// run any finalization functions.
+ ///
+ /// Working memory should be kept alive at least until one of the following
+ /// happens: (1) the InFlightAlloc instance is destroyed, (2) the
+ /// InFlightAlloc is abandoned, (3) finalized target memory is destroyed.
+ ///
+ /// If abandon is called then working memory and executor memory should both
+ /// be freed.
+ class InFlightAlloc {
public:
- using FinalizeContinuation = std::function<void(Error)>;
-
- virtual ~Allocation();
+ using OnFinalizedFunction = unique_function<void(Expected<FinalizedAlloc>)>;
+ using OnAbandonedFunction = unique_function<void(Error)>;
- /// Should return the address of linker working memory for the segment with
- /// the given protection flags.
- virtual MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) = 0;
+ virtual ~InFlightAlloc();
- /// Should return the final address in the target process where the segment
- /// will reside.
- virtual JITTargetAddress getTargetMemory(ProtectionFlags Seg) = 0;
+ /// Called prior to finalization if the allocation should be abandoned.
+ virtual void abandon(OnAbandonedFunction OnAbandoned) = 0;
- /// Should transfer from working memory to target memory, and release
- /// working memory.
- virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0;
+ /// Called to transfer working memory to the target and apply finalization.
+ virtual void finalize(OnFinalizedFunction OnFinalized) = 0;
- /// Calls finalizeAsync and waits for completion.
- Error finalize() {
- std::promise<MSVCPError> FinalizeResultP;
+ /// Synchronous convenience version of finalize.
+ Expected<FinalizedAlloc> finalize() {
+ std::promise<MSVCPExpected<FinalizedAlloc>> FinalizeResultP;
auto FinalizeResultF = FinalizeResultP.get_future();
- finalizeAsync(
- [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); });
+ finalize([&](Expected<FinalizedAlloc> Result) {
+ FinalizeResultP.set_value(std::move(Result));
+ });
return FinalizeResultF.get();
}
-
- /// Should deallocate target memory.
- virtual Error deallocate() = 0;
};
+ /// Typedef for the argument to be passed to OnAllocatedFunction.
+ using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>;
+
+ /// Called when allocation has been completed.
+ using OnAllocatedFunction = unique_function<void(AllocResult)>;
+
+ /// Called when deallocation has completed.
+ using OnDeallocatedFunction = unique_function<void(Error)>;
+
virtual ~JITLinkMemoryManager();
- /// Create an Allocation object.
+ /// Start the allocation process.
///
- /// The JD argument represents the target JITLinkDylib, and can be used by
- /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
- /// (e.g. one pre-reserved address space slab per dylib to ensure that all
- /// allocations for the dylib are within a certain range). The JD argument
- /// may be null (representing an allocation not associated with any
- /// JITDylib.
+ /// If the initial allocation is successful then the OnAllocated function will
+ /// be called with a std::unique_ptr<InFlightAlloc> value. If the assocation
+ /// is unsuccessful then the OnAllocated function will be called with an
+ /// Error.
+ virtual void allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) = 0;
+
+ /// Convenience function for blocking allocation.
+ AllocResult allocate(const JITLinkDylib *JD, LinkGraph &G) {
+ std::promise<MSVCPExpected<std::unique_ptr<InFlightAlloc>>> AllocResultP;
+ auto AllocResultF = AllocResultP.get_future();
+ allocate(JD, G, [&](AllocResult Alloc) {
+ AllocResultP.set_value(std::move(Alloc));
+ });
+ return AllocResultF.get();
+ }
+
+ /// Deallocate a list of allocation objects.
///
- /// The request argument describes the segment sizes and permisssions being
- /// requested.
- virtual Expected<std::unique_ptr<Allocation>>
- allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
+ /// Dealloc actions will be run in reverse order (from the end of the vector
+ /// to the start).
+ virtual void deallocate(std::vector<FinalizedAlloc> Allocs,
+ OnDeallocatedFunction OnDeallocated) = 0;
+
+ /// Convenience function for deallocation of a single alloc.
+ void deallocate(FinalizedAlloc Alloc, OnDeallocatedFunction OnDeallocated) {
+ std::vector<FinalizedAlloc> Allocs;
+ Allocs.push_back(std::move(Alloc));
+ deallocate(std::move(Allocs), std::move(OnDeallocated));
+ }
+
+ /// Convenience function for blocking deallocation.
+ Error deallocate(std::vector<FinalizedAlloc> Allocs) {
+ std::promise<MSVCPError> DeallocResultP;
+ auto DeallocResultF = DeallocResultP.get_future();
+ deallocate(std::move(Allocs),
+ [&](Error Err) { DeallocResultP.set_value(std::move(Err)); });
+ return DeallocResultF.get();
+ }
+
+ /// Convenience function for blocking deallocation of a single alloc.
+ Error deallocate(FinalizedAlloc Alloc) {
+ std::vector<FinalizedAlloc> Allocs;
+ Allocs.push_back(std::move(Alloc));
+ return deallocate(std::move(Allocs));
+ }
+};
+
+/// BasicLayout simplifies the implementation of JITLinkMemoryManagers.
+///
+/// BasicLayout groups Sections into Segments based on their memory protection
+/// and deallocation policies. JITLinkMemoryManagers can construct a BasicLayout
+/// from a Graph, and then assign working memory and addresses to each of the
+/// Segments. These addreses will be mapped back onto the Graph blocks in
+/// the apply method.
+class BasicLayout {
+public:
+ /// The Alignment, ContentSize and ZeroFillSize of each segment will be
+ /// pre-filled from the Graph. Clients must set the Addr and WorkingMem fields
+ /// prior to calling apply.
+ //
+ // FIXME: The C++98 initializer is an attempt to work around compile failures
+ // due to http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397.
+ // We should be able to switch this back to member initialization once that
+ // issue is fixed.
+ class Segment {
+ friend class BasicLayout;
+
+ public:
+ Segment()
+ : ContentSize(0), ZeroFillSize(0), Addr(0), WorkingMem(nullptr),
+ NextWorkingMemOffset(0) {}
+ Align Alignment;
+ size_t ContentSize;
+ uint64_t ZeroFillSize;
+ JITTargetAddress Addr;
+ char *WorkingMem = nullptr;
+
+ private:
+ size_t NextWorkingMemOffset;
+ std::vector<Block *> ContentBlocks, ZeroFillBlocks;
+ };
+
+ /// A convenience class that further groups segments based on memory
+ /// deallocation policy. This allows clients to make two slab allocations:
+ /// one for all standard segments, and one for all finalize segments.
+ struct ContiguousPageBasedLayoutSizes {
+ uint64_t StandardSegs = 0;
+ uint64_t FinalizeSegs = 0;
+
+ uint64_t total() const { return StandardSegs + FinalizeSegs; }
+ };
+
+private:
+ using SegmentMap = AllocGroupSmallMap<Segment>;
+
+public:
+ BasicLayout(LinkGraph &G);
+
+ /// Return a reference to the graph this allocation was created from.
+ LinkGraph &getGraph() { return G; }
+
+ /// Returns the total number of required to allocate all segments (with each
+ /// segment padded out to page size) for all standard segments, and all
+ /// finalize segments.
+ ///
+ /// This is a convenience function for the common case where the segments will
+ /// be allocated contiguously.
+ ///
+ /// This function will return an error if any segment has an alignment that
+ /// is higher than a page.
+ Expected<ContiguousPageBasedLayoutSizes>
+ getContiguousPageBasedLayoutSizes(uint64_t PageSize);
+
+ /// Returns an iterator over the segments of the layout.
+ iterator_range<SegmentMap::iterator> segments() {
+ return {Segments.begin(), Segments.end()};
+ }
+
+ /// Apply the layout to the graph.
+ Error apply();
+
+ /// Returns a reference to the AllocActions in the graph.
+ /// This convenience function saves callers from having to #include
+ /// LinkGraph.h if all they need are allocation actions.
+ JITLinkMemoryManager::AllocActions &graphAllocActions();
+
+private:
+ LinkGraph &G;
+ SegmentMap Segments;
+};
+
+/// A utility class for making simple allocations using JITLinkMemoryManager.
+///
+/// SimpleSegementAlloc takes a mapping of AllocGroups to Segments and uses
+/// this to create a LinkGraph with one Section (containing one Block) per
+/// Segment. Clients can obtain a pointer to the working memory and executor
+/// address of that block using the Segment's AllocGroup. Once memory has been
+/// populated, clients can call finalize to finalize the memory.
+class SimpleSegmentAlloc {
+public:
+ /// Describes a segment to be allocated.
+ struct Segment {
+ Segment() = default;
+ Segment(size_t ContentSize, Align ContentAlign)
+ : ContentSize(ContentSize), ContentAlign(ContentAlign) {}
+
+ size_t ContentSize = 0;
+ Align ContentAlign;
+ };
+
+ /// Describes the segment working memory and executor address.
+ struct SegmentInfo {
+ JITTargetAddress Addr = 0;
+ MutableArrayRef<char> WorkingMem;
+ };
+
+ using SegmentMap = AllocGroupSmallMap<Segment>;
+
+ using OnCreatedFunction = unique_function<void(Expected<SimpleSegmentAlloc>)>;
+
+ using OnFinalizedFunction =
+ JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction;
+
+ static void Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ SegmentMap Segments, OnCreatedFunction OnCreated);
+
+ static Expected<SimpleSegmentAlloc> Create(JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD,
+ SegmentMap Segments);
+
+ SimpleSegmentAlloc(SimpleSegmentAlloc &&);
+ SimpleSegmentAlloc &operator=(SimpleSegmentAlloc &&);
+ ~SimpleSegmentAlloc();
+
+ /// Returns the SegmentInfo for the given group.
+ SegmentInfo getSegInfo(AllocGroup AG);
+
+ /// Finalize all groups (async version).
+ void finalize(OnFinalizedFunction OnFinalized) {
+ Alloc->finalize(std::move(OnFinalized));
+ }
+
+ /// Finalize all groups.
+ Expected<JITLinkMemoryManager::FinalizedAlloc> finalize() {
+ return Alloc->finalize();
+ }
+
+private:
+ SimpleSegmentAlloc(
+ std::unique_ptr<LinkGraph> G, AllocGroupSmallMap<Block *> ContentBlocks,
+ std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc);
+
+ std::unique_ptr<LinkGraph> G;
+ AllocGroupSmallMap<Block *> ContentBlocks;
+ std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc;
};
/// A JITLinkMemoryManager that allocates in-process memory.
class InProcessMemoryManager : public JITLinkMemoryManager {
public:
- Expected<std::unique_ptr<Allocation>>
- allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override;
+ class IPInFlightAlloc;
+
+ /// Attempts to auto-detect the host page size.
+ static Expected<std::unique_ptr<InProcessMemoryManager>> Create();
+
+ /// Create an instance using the given page size.
+ InProcessMemoryManager(uint64_t PageSize) : PageSize(PageSize) {}
+
+ void allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::allocate;
+
+ void deallocate(std::vector<FinalizedAlloc> Alloc,
+ OnDeallocatedFunction OnDeallocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::deallocate;
+
+private:
+ // FIXME: Use an in-place array instead of a vector for DeallocActions.
+ // There shouldn't need to be a heap alloc for this.
+ struct FinalizedAllocInfo {
+ sys::MemoryBlock StandardSegments;
+ std::vector<AllocActionCall> DeallocActions;
+ };
+
+ FinalizedAlloc
+ createFinalizedAlloc(sys::MemoryBlock StandardSegments,
+ std::vector<AllocActionCall> DeallocActions);
+
+ uint64_t PageSize;
+ std::mutex FinalizedAllocsMutex;
+ RecyclingAllocator<BumpPtrAllocator, FinalizedAllocInfo> FinalizedAllocInfos;
};
} // end namespace jitlink
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
index ecbc93e1467d..aee14c0d1fe5 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
@@ -29,6 +29,8 @@ enum MachOARM64RelocationKind : Edge::Kind {
PageOffset12,
GOTPage21,
GOTPageOffset12,
+ TLVPage21,
+ TLVPageOffset12,
PointerToGOT,
PairedAddend,
LDRLiteral19,
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
new file mode 100644
index 000000000000..8fdce93ebc56
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
@@ -0,0 +1,225 @@
+//===-------- MemoryFlags.h - Memory allocation flags -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines types and operations related to memory protection and allocation
+// lifetimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Describes Read/Write/Exec permissions for memory.
+enum class MemProt {
+ None = 0,
+ Read = 1U << 0,
+ Write = 1U << 1,
+ Exec = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Exec)
+};
+
+/// Print a MemProt as an RWX triple.
+raw_ostream &operator<<(raw_ostream &OS, MemProt MP);
+
+/// Convert a MemProt value to a corresponding sys::Memory::ProtectionFlags
+/// value.
+inline sys::Memory::ProtectionFlags toSysMemoryProtectionFlags(MemProt MP) {
+ std::underlying_type_t<sys::Memory::ProtectionFlags> PF = 0;
+ if ((MP & MemProt::Read) != MemProt::None)
+ PF |= sys::Memory::MF_READ;
+ if ((MP & MemProt::Write) != MemProt::None)
+ PF |= sys::Memory::MF_WRITE;
+ if ((MP & MemProt::Exec) != MemProt::None)
+ PF |= sys::Memory::MF_EXEC;
+ return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+/// Convert a sys::Memory::ProtectionFlags value to a corresponding MemProt
+/// value.
+inline MemProt fromSysMemoryProtectionFlags(sys::Memory::ProtectionFlags PF) {
+ MemProt MP = MemProt::None;
+ if (PF & sys::Memory::MF_READ)
+ MP |= MemProt::Read;
+ if (PF & sys::Memory::MF_WRITE)
+ MP |= MemProt::Write;
+ if (PF & sys::Memory::MF_EXEC)
+ MP |= MemProt::None;
+ return MP;
+}
+
+/// Describes a memory deallocation policy for memory to be allocated by a
+/// JITLinkMemoryManager.
+///
+/// All memory allocated by a call to JITLinkMemoryManager::allocate should be
+/// deallocated if a call is made to
+/// JITLinkMemoryManager::InFlightAllocation::abandon. The policies below apply
+/// to finalized allocations.
+enum class MemDeallocPolicy {
+ /// Standard memory should be deallocated when the deallocate method is called
+ /// for the finalized allocation.
+ Standard,
+
+ /// Finalize memory should be overwritten and then deallocated after all
+ /// finalization functions have been run.
+ Finalize
+};
+
+/// Print a MemDeallocPolicy.
+raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP);
+
+/// A pair of memory protections and allocation policies.
+///
+/// Optimized for use as a small map key.
+class AllocGroup {
+ friend struct llvm::DenseMapInfo<AllocGroup>;
+
+ using underlying_type = uint8_t;
+ static constexpr unsigned BitsForProt = 3;
+ static constexpr unsigned BitsForDeallocPolicy = 1;
+ static constexpr unsigned MaxIdentifiers =
+ 1U << (BitsForProt + BitsForDeallocPolicy);
+
+public:
+ static constexpr unsigned NumGroups = MaxIdentifiers;
+
+ /// Create a default AllocGroup. No memory protections, standard
+ /// deallocation policy.
+ AllocGroup() = default;
+
+ /// Create an AllocGroup from a MemProt only -- uses
+ /// MemoryDeallocationPolicy::Standard.
+ AllocGroup(MemProt MP) : Id(static_cast<underlying_type>(MP)) {}
+
+ /// Create an AllocGroup from a MemProt and a MemoryDeallocationPolicy.
+ AllocGroup(MemProt MP, MemDeallocPolicy MDP)
+ : Id(static_cast<underlying_type>(MP) |
+ (static_cast<underlying_type>(MDP) << BitsForProt)) {}
+
+ /// Returns the MemProt for this group.
+ MemProt getMemProt() const {
+ return static_cast<MemProt>(Id & ((1U << BitsForProt) - 1));
+ }
+
+ /// Returns the MemoryDeallocationPolicy for this group.
+ MemDeallocPolicy getMemDeallocPolicy() const {
+ return static_cast<MemDeallocPolicy>(Id >> BitsForProt);
+ }
+
+ friend bool operator==(const AllocGroup &LHS, const AllocGroup &RHS) {
+ return LHS.Id == RHS.Id;
+ }
+
+ friend bool operator!=(const AllocGroup &LHS, const AllocGroup &RHS) {
+ return !(LHS == RHS);
+ }
+
+ friend bool operator<(const AllocGroup &LHS, const AllocGroup &RHS) {
+ return LHS.Id < RHS.Id;
+ }
+
+private:
+ AllocGroup(underlying_type RawId) : Id(RawId) {}
+ underlying_type Id = 0;
+};
+
+/// A specialized small-map for AllocGroups.
+///
+/// Iteration order is guaranteed to match key ordering.
+template <typename T> class AllocGroupSmallMap {
+private:
+ using ElemT = std::pair<AllocGroup, T>;
+ using VectorTy = SmallVector<ElemT, 4>;
+
+ static bool compareKey(const ElemT &E, const AllocGroup &G) {
+ return E.first < G;
+ }
+
+public:
+ using iterator = typename VectorTy::iterator;
+
+ AllocGroupSmallMap() = default;
+ AllocGroupSmallMap(std::initializer_list<std::pair<AllocGroup, T>> Inits) {
+ Elems.reserve(Inits.size());
+ for (const auto &E : Inits)
+ Elems.push_back(E);
+ llvm::sort(Elems, [](const ElemT &LHS, const ElemT &RHS) {
+ return LHS.first < RHS.first;
+ });
+ }
+
+ iterator begin() { return Elems.begin(); }
+ iterator end() { return Elems.end(); }
+ iterator find(AllocGroup G) {
+ auto I = lower_bound(Elems, G, compareKey);
+ return (I->first == G) ? I : end();
+ }
+
+ bool empty() const { return Elems.empty(); }
+ size_t size() const { return Elems.size(); }
+
+ T &operator[](AllocGroup G) {
+ auto I = lower_bound(Elems, G, compareKey);
+ if (I == Elems.end() || I->first != G)
+ I = Elems.insert(I, std::make_pair(G, T()));
+ return I->second;
+ }
+
+private:
+ VectorTy Elems;
+};
+
+/// Print an AllocGroup.
+raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG);
+
+} // end namespace jitlink
+
+template <> struct DenseMapInfo<jitlink::MemProt> {
+ static inline jitlink::MemProt getEmptyKey() {
+ return jitlink::MemProt(~uint8_t(0));
+ }
+ static inline jitlink::MemProt getTombstoneKey() {
+ return jitlink::MemProt(~uint8_t(0) - 1);
+ }
+ static unsigned getHashValue(const jitlink::MemProt &Val) {
+ using UT = std::underlying_type_t<jitlink::MemProt>;
+ return DenseMapInfo<UT>::getHashValue(static_cast<UT>(Val));
+ }
+ static bool isEqual(const jitlink::MemProt &LHS,
+ const jitlink::MemProt &RHS) {
+ return LHS == RHS;
+ }
+};
+
+template <> struct DenseMapInfo<jitlink::AllocGroup> {
+ static inline jitlink::AllocGroup getEmptyKey() {
+ return jitlink::AllocGroup(~uint8_t(0));
+ }
+ static inline jitlink::AllocGroup getTombstoneKey() {
+ return jitlink::AllocGroup(~uint8_t(0) - 1);
+ }
+ static unsigned getHashValue(const jitlink::AllocGroup &Val) {
+ return DenseMapInfo<jitlink::AllocGroup::underlying_type>::getHashValue(
+ Val.Id);
+ }
+ static bool isEqual(const jitlink::AllocGroup &LHS,
+ const jitlink::AllocGroup &RHS) {
+ return LHS == RHS;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
new file mode 100644
index 000000000000..c20f62d515ec
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
@@ -0,0 +1,63 @@
+//===---------------------- TableManager.h ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Fix edge for edge that needs an entry to reference the target symbol
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H
+#define LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// A CRTP base for tables that are built on demand, e.g. Global Offset Tables
+/// and Procedure Linkage Tables.
+/// The getEntyrForTarget function returns the table entry corresponding to the
+/// given target, calling down to the implementation class to build an entry if
+/// one does not already exist.
+template <typename TableManagerImplT> class TableManager {
+public:
+ /// Return the constructed entry
+ ///
+ /// Use parameter G to construct the entry for target symbol
+ Symbol &getEntryForTarget(LinkGraph &G, Symbol &Target) {
+ assert(Target.hasName() && "Edge cannot point to anonymous target");
+
+ auto EntryI = Entries.find(Target.getName());
+
+ // Build the entry if it doesn't exist.
+ if (EntryI == Entries.end()) {
+ auto &Entry = impl().createEntry(G, Target);
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Created" << impl().getSectionName() << "entry for "
+ << Target.getName() << ": " << Entry << "\n";
+ });
+ EntryI = Entries.insert(std::make_pair(Target.getName(), &Entry)).first;
+ }
+
+ assert(EntryI != Entries.end() && "Could not get entry symbol");
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Using " << impl().getSectionName() << " entry "
+ << *EntryI->second << "\n";
+ });
+ return *EntryI->second;
+ }
+
+private:
+ TableManagerImplT &impl() { return static_cast<TableManagerImplT &>(*this); }
+ DenseMap<StringRef, Symbol *> Entries;
+};
+
+} // namespace jitlink
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
new file mode 100644
index 000000000000..994ce783b058
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
@@ -0,0 +1,38 @@
+//=== aarch64.h - Generic JITLink aarch64 edge kinds, utilities -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing aarch64 objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch64 {
+
+/// Represets aarch64 fixups
+enum EdgeKind_aarch64 : Edge::Kind {
+
+ /// Set a CALL immediate field to bits [27:2] of X = Target - Fixup + Addend
+ R_AARCH64_CALL26 = Edge::FirstRelocation,
+
+};
+
+/// Returns a string name for the given aarch64 edge. For debugging purposes
+/// only
+const char *getEdgeKindName(Edge::Kind K);
+
+} // namespace aarch64
+} // namespace jitlink
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index a4509f3888a4..b8d08d88c1c9 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -70,7 +70,19 @@ enum EdgeKind_riscv : Edge::Kind {
///
/// Fixup expression:
/// Fixup <- (Target - Fixup + Addend)
- R_RISCV_CALL
+ R_RISCV_CALL,
+
+ /// PC relative GOT offset
+ ///
+ /// Fixup expression:
+ /// Fixup <- (GOT - Fixup + Addend) >> 12
+ R_RISCV_GOT_HI20,
+
+ /// PC relative call by PLT
+ ///
+ /// Fixup expression:
+ /// Fixup <- (Target - Fixup + Addend)
+ R_RISCV_CALL_PLT
};
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
index 006d983537e9..3130ea381534 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_JITLINK_X86_64_H
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include <limits>
@@ -42,6 +43,16 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
Pointer32,
+ /// A signed 32-bit pointer value relocation
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target + Addend : int32
+ ///
+ /// Errors:
+ /// - The target must reside in the signed 32-bits([-2**31, 2**32 - 1]) of
+ /// the address space, otherwise an out-of-range error will be returned.
+ Pointer32Signed,
+
/// A 64-bit delta.
///
/// Delta from the fixup to the target.
@@ -85,6 +96,18 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// an out-of-range error will be returned.
NegDelta32,
+ /// A 64-bit GOT delta.
+ ///
+ /// Delta from the global offset table to the target
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target - GOTSymbol + Addend : int64
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section
+ /// symbol was not been defined.
+ Delta64FromGOT,
+
/// A 32-bit PC-relative branch.
///
/// Represents a PC-relative call or branch to a target. This can be used to
@@ -120,7 +143,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// This edge kind has the same fixup expression as BranchPCRel32, but further
/// identifies the call/branch as being to a pointer jump stub. For edges of
/// this kind the jump stub should not be bypassed (use
- /// BranchPCRel32ToPtrJumpStubRelaxable for that), but the pointer location
+ /// BranchPCRel32ToPtrJumpStubBypassable for that), but the pointer location
/// target may be recorded to allow manipulation at runtime.
///
/// Fixup expression:
@@ -136,7 +159,8 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
/// The edge kind has the same fixup expression as BranchPCRel32ToPtrJumpStub,
/// but identifies the call/branch as being to a pointer jump stub that may be
- /// bypassed if the ultimate target is within range of the fixup location.
+ /// bypassed with a direct jump to the ultimate target if the ultimate target
+ /// is within range of the fixup location.
///
/// Fixup expression:
/// Fixup <- Target - Fixup + Addend - 4: int32
@@ -145,7 +169,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - The result of the fixup expression must fit into an int32, otherwise
/// an out-of-range error will be returned.
///
- BranchPCRel32ToPtrJumpStubRelaxable,
+ BranchPCRel32ToPtrJumpStubBypassable,
/// A GOT entry getter/constructor, transformed to Delta32 pointing at the GOT
/// entry for the original target.
@@ -167,7 +191,62 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
RequestGOTAndTransformToDelta32,
- /// A PC-relative reference to a GOT entry, relaxable if GOT entry target
+ /// A GOT entry getter/constructor, transformed to Delta64 pointing at the GOT
+ /// entry for the original target.
+ ///
+ /// Indicates that this edge should be transformed into a Delta64 targeting
+ /// the GOT entry for the edge's current target, maintaining the same addend.
+ /// A GOT entry for the target should be created if one does not already
+ /// exist.
+ ///
+ /// Edges of this kind are usually handled by a GOT builder pass inserted by
+ /// default.
+ ///
+ /// Fixup expression:
+ /// NONE
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+ /// phase will result in an assert/unreachable during the fixup phase.
+ ///
+ RequestGOTAndTransformToDelta64,
+
+ /// A GOT entry offset within GOT getter/constructor, transformed to
+ /// Delta64FromGOT
+ /// pointing at the GOT entry for the original target
+ ///
+ /// Indicates that this edge should be transformed into a Delta64FromGOT
+ /// targeting
+ /// the GOT entry for the edge's current target, maintaining the same addend.
+ /// A GOT entry for the target should be created if one does not already
+ /// exist.
+ ///
+ /// Edges of this kind are usually handled by a GOT builder pass inserted by
+ /// default
+ ///
+ /// Fixup expression:
+ /// NONE
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+ /// phase will result in an assert/unreachable during the fixup phase
+ RequestGOTAndTransformToDelta64FromGOT,
+
+ /// A PC-relative load of a GOT entry, relaxable if GOT entry target is
+ /// in-range of the fixup
+ ///
+ /// TODO: Explain the optimization
+ ///
+ /// Fixup expression
+ /// Fixup <- Target - (Fixup + 4) + Addend : int32
+ ///
+ /// Errors:
+ /// - The result of the fixup expression must fit into an int32, otherwise
+ /// an out-of-range error will be returned.
+ //
+ PCRel32GOTLoadRelaxable,
+
+ /// A PC-relative REX load of a GOT entry, relaxable if GOT entry target
/// is in-range of the fixup.
///
/// If the GOT entry target is in-range of the fixup then the load from the
@@ -180,17 +259,39 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - The result of the fixup expression must fit into an int32, otherwise
/// an out-of-range error will be returned.
///
- PCRel32GOTLoadRelaxable,
+ PCRel32GOTLoadREXRelaxable,
- /// A GOT entry getter/constructor, transformed to PCRel32ToGOTLoadRelaxable
- /// pointing at the GOT entry for the original target.
+ /// A GOT entry getter/constructor, transformed to
+ /// PCRel32ToGOTLoadREXRelaxable pointing at the GOT entry for the original
+ /// target.
///
- /// Indicates that this edge should be transformed into a
- /// PC32ToGOTLoadRelaxable targeting the GOT entry for the edge's current
- /// target, maintaining the same addend. A GOT entry for the target should be
- /// created if one does not already exist.
+ /// Indicates that this edge should be lowered to a PC32ToGOTLoadREXRelaxable
+ /// targeting the GOT entry for the edge's current target, maintaining the
+ /// same addend. A GOT entry for the target should be created if one does not
+ /// already exist.
///
- /// Edges of this kind are usually handled by a GOT builder pass inserted by
+ /// Edges of this kind are usually lowered by a GOT builder pass inserted by
+ /// default.
+ ///
+ /// Fixup expression:
+ /// NONE
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+ /// phase will result in an assert/unreachable during the fixup phase.
+ ///
+ RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable,
+
+ /// A GOT entry getter/constructor, transformed to
+ /// PCRel32ToGOTLoadRelaxable pointing at the GOT entry for the original
+ /// target.
+ ///
+ /// Indicates that this edge should be lowered to a PC32ToGOTLoadRelaxable
+ /// targeting the GOT entry for the edge's current target, maintaining the
+ /// same addend. A GOT entry for the target should be created if one does not
+ /// already exist.
+ ///
+ /// Edges of this kind are usually lowered by a GOT builder pass inserted by
/// default.
///
/// Fixup expression:
@@ -202,10 +303,10 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
RequestGOTAndTransformToPCRel32GOTLoadRelaxable,
- /// A PC-relative reference to a Thread Local Variable Pointer (TLVP) entry,
+ /// A PC-relative REX load of a Thread Local Variable Pointer (TLVP) entry,
/// relaxable if the TLVP entry target is in-range of the fixup.
///
- /// If the TLVP entry target is in-range of the fixup then the load frmo the
+ /// If the TLVP entry target is in-range of the fixup then the load from the
/// TLVP may be replaced with a direct memory address calculation.
///
/// The target of this edge must be a thread local variable entry of the form
@@ -222,15 +323,18 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - The target must be either external, or a TLV entry of the required
/// form, otherwise a malformed TLV entry error will be returned.
///
- PCRel32TLVPLoadRelaxable,
+ PCRel32TLVPLoadREXRelaxable,
+
+ /// TODO: Explain the generic edge kind
+ RequestTLSDescInGOTAndTransformToDelta32,
/// A TLVP entry getter/constructor, transformed to
- /// Delta32ToTLVPLoadRelaxable.
+ /// Delta32ToTLVPLoadREXRelaxable.
///
/// Indicates that this edge should be transformed into a
- /// Delta32ToTLVPLoadRelaxable targeting the TLVP entry for the edge's current
- /// target. A TLVP entry for the target should be created if one does not
- /// already exist.
+ /// Delta32ToTLVPLoadREXRelaxable targeting the TLVP entry for the edge's
+ /// current target. A TLVP entry for the target should be created if one does
+ /// not already exist.
///
/// Fixup expression:
/// NONE
@@ -239,7 +343,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
/// phase will result in an assert/unreachable during the fixup phase.
///
- RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable
+ RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable
};
/// Returns a string name for the given x86-64 edge. For debugging purposes
@@ -258,7 +362,8 @@ inline bool isInRangeForImmS32(int64_t Value) {
}
/// Apply fixup expression for edge to block content.
-inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
+inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
+ const Symbol *GOTSymbol) {
using namespace support;
char *BlockWorkingMem = B.getAlreadyMutableContent().data();
@@ -281,12 +386,21 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
return makeTargetOutOfRangeError(G, B, E);
break;
}
+ case Pointer32Signed: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+ *(little32_t *)FixupPtr = Value;
+ else
+ return makeTargetOutOfRangeError(G, B, E);
+ break;
+ }
case BranchPCRel32:
case BranchPCRel32ToPtrJumpStub:
- case BranchPCRel32ToPtrJumpStubRelaxable:
+ case BranchPCRel32ToPtrJumpStubBypassable:
case PCRel32GOTLoadRelaxable:
- case PCRel32TLVPLoadRelaxable: {
+ case PCRel32GOTLoadREXRelaxable:
+ case PCRel32TLVPLoadREXRelaxable: {
int64_t Value =
E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
if (LLVM_LIKELY(isInRangeForImmS32(Value)))
@@ -325,6 +439,13 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
return makeTargetOutOfRangeError(G, B, E);
break;
}
+ case Delta64FromGOT: {
+ assert(GOTSymbol && "No GOT section symbol");
+ int64_t Value =
+ E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend();
+ *(little64_t *)FixupPtr = Value;
+ break;
+ }
default: {
// If you hit this you should check that *constructor and other non-fixup
@@ -395,6 +516,114 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G,
false);
}
+/// Global Offset Table Builder.
+class GOTTableManager : public TableManager<GOTTableManager> {
+public:
+ static StringRef getSectionName() { return "$__GOT"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind KindToSet = Edge::Invalid;
+ switch (E.getKind()) {
+ case x86_64::Delta64FromGOT: {
+ // we need to make sure that the GOT section exists, but don't otherwise
+ // need to fix up this edge
+ getGOTSection(G);
+ return false;
+ }
+ case x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable:
+ KindToSet = x86_64::PCRel32GOTLoadREXRelaxable;
+ break;
+ case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
+ KindToSet = x86_64::PCRel32GOTLoadRelaxable;
+ break;
+ case x86_64::RequestGOTAndTransformToDelta64:
+ KindToSet = x86_64::Delta64;
+ break;
+ case x86_64::RequestGOTAndTransformToDelta64FromGOT:
+ KindToSet = x86_64::Delta64FromGOT;
+ break;
+ case x86_64::RequestGOTAndTransformToDelta32:
+ KindToSet = x86_64::Delta32;
+ break;
+ default:
+ return false;
+ }
+ assert(KindToSet != Edge::Invalid &&
+ "Fell through switch, but no new kind to set");
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+ << formatv("{0:x}", B->getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setKind(KindToSet);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointer(G, getGOTSection(G), &Target);
+ }
+
+private:
+ Section &getGOTSection(LinkGraph &G) {
+ if (!GOTSection)
+ GOTSection = &G.createSection(getSectionName(), MemProt::Read);
+ return *GOTSection;
+ }
+
+ Section *GOTSection = nullptr;
+};
+
+/// Procedure Linkage Table Builder.
+class PLTTableManager : public TableManager<PLTTableManager> {
+public:
+ PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {}
+
+ static StringRef getSectionName() { return "$__STUBS"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) {
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+ << formatv("{0:x}", B->getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to
+ // be optimized when the target is in-range.
+ E.setKind(x86_64::BranchPCRel32ToPtrJumpStubBypassable);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ return false;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointerJumpStub(G, getStubsSection(G),
+ GOT.getEntryForTarget(G, Target));
+ }
+
+public:
+ Section &getStubsSection(LinkGraph &G) {
+ if (!PLTSection)
+ PLTSection =
+ &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec);
+ return *PLTSection;
+ }
+
+ GOTTableManager &GOT;
+ Section *PLTSection = nullptr;
+};
+
+/// Optimize the GOT and Stub relocations if the edge target address is in range
+/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range,
+/// then replace GOT load with lea
+/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is
+/// in range, replace a indirect jump by plt stub with a direct jump to the
+/// target
+Error optimizeGOTAndStubAccesses(LinkGraph &G);
+
} // namespace x86_64
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h
index 8253bf98963b..adce98f380c5 100644
--- a/llvm/include/llvm/ExecutionEngine/MCJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h
@@ -26,6 +26,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index e832d8d57dfa..5cac65b49a05 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -21,7 +21,7 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ExtensibleRTTI.h"
@@ -434,13 +434,16 @@ class SymbolsNotFound : public ErrorInfo<SymbolsNotFound> {
public:
static char ID;
- SymbolsNotFound(SymbolNameSet Symbols);
- SymbolsNotFound(SymbolNameVector Symbols);
+ SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP, SymbolNameSet Symbols);
+ SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameVector Symbols);
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const SymbolNameVector &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
SymbolNameVector Symbols;
};
@@ -449,12 +452,15 @@ class SymbolsCouldNotBeRemoved : public ErrorInfo<SymbolsCouldNotBeRemoved> {
public:
static char ID;
- SymbolsCouldNotBeRemoved(SymbolNameSet Symbols);
+ SymbolsCouldNotBeRemoved(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameSet Symbols);
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const SymbolNameSet &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
SymbolNameSet Symbols;
};
@@ -466,13 +472,17 @@ class MissingSymbolDefinitions : public ErrorInfo<MissingSymbolDefinitions> {
public:
static char ID;
- MissingSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols)
- : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {}
+ MissingSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP,
+ std::string ModuleName, SymbolNameVector Symbols)
+ : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)),
+ Symbols(std::move(Symbols)) {}
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const std::string &getModuleName() const { return ModuleName; }
const SymbolNameVector &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
std::string ModuleName;
SymbolNameVector Symbols;
};
@@ -485,13 +495,17 @@ class UnexpectedSymbolDefinitions : public ErrorInfo<UnexpectedSymbolDefinitions
public:
static char ID;
- UnexpectedSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols)
- : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {}
+ UnexpectedSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP,
+ std::string ModuleName, SymbolNameVector Symbols)
+ : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)),
+ Symbols(std::move(Symbols)) {}
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const std::string &getModuleName() const { return ModuleName; }
const SymbolNameVector &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
std::string ModuleName;
SymbolNameVector Symbols;
};
@@ -1241,21 +1255,6 @@ public:
const DenseMap<JITDylib *, SymbolLookupSet> &InitSyms);
};
-/// Represents an abstract task for ORC to run.
-class Task : public RTTIExtends<Task, RTTIRoot> {
-public:
- static char ID;
-
- /// Description of the task to be performed. Used for logging.
- virtual void printDescription(raw_ostream &OS) = 0;
-
- /// Run the task.
- virtual void run() = 0;
-
-private:
- void anchor() override;
-};
-
/// A materialization task.
class MaterializationTask : public RTTIExtends<MaterializationTask, Task> {
public:
@@ -1285,13 +1284,16 @@ public:
/// For reporting errors.
using ErrorReporter = std::function<void(Error)>;
+ /// Send a result to the remote.
+ using SendResultFunction = unique_function<void(shared::WrapperFunctionResult)>;
+
/// For dispatching ORC tasks (typically materialization tasks).
using DispatchTaskFunction = unique_function<void(std::unique_ptr<Task> T)>;
/// An asynchronous wrapper-function callable from the executor via
/// jit-dispatch.
using JITDispatchHandlerFunction = unique_function<void(
- ExecutorProcessControl::SendResultFunction SendResult,
+ SendResultFunction SendResult,
const char *ArgData, size_t ArgSize)>;
/// A map associating tag names with asynchronous wrapper function
@@ -1303,13 +1305,19 @@ public:
/// object.
ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC);
- /// End the session. Closes all JITDylibs.
+ /// End the session. Closes all JITDylibs and disconnects from the
+ /// executor.
Error endSession();
/// Get the ExecutorProcessControl object associated with this
/// ExecutionSession.
ExecutorProcessControl &getExecutorProcessControl() { return *EPC; }
+ /// Get the SymbolStringPool for this instance.
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() {
+ return EPC->getSymbolStringPool();
+ }
+
/// Add a symbol name to the SymbolStringPool and return a pointer to it.
SymbolStringPtr intern(StringRef SymName) { return EPC->intern(SymName); }
@@ -1462,10 +1470,9 @@ public:
/// \endcode{.cpp}
///
/// The given OnComplete function will be called to return the result.
- void callWrapperAsync(ExecutorProcessControl::SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
- ArrayRef<char> ArgBuffer) {
- EPC->callWrapperAsync(std::move(OnComplete), WrapperFnAddr, ArgBuffer);
+ template <typename... ArgTs>
+ void callWrapperAsync(ArgTs &&... Args) {
+ EPC->callWrapperAsync(std::forward<ArgTs>(Args)...);
}
/// Run a wrapper function in the executor. The wrapper function should be
@@ -1474,30 +1481,18 @@ public:
/// \code{.cpp}
/// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
/// \endcode{.cpp}
- shared::WrapperFunctionResult callWrapper(JITTargetAddress WrapperFnAddr,
+ shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr,
ArrayRef<char> ArgBuffer) {
- std::promise<shared::WrapperFunctionResult> RP;
- auto RF = RP.get_future();
- callWrapperAsync(
- [&](shared::WrapperFunctionResult R) { RP.set_value(std::move(R)); },
- WrapperFnAddr, ArgBuffer);
- return RF.get();
+ return EPC->callWrapper(WrapperFnAddr, ArgBuffer);
}
/// Run a wrapper function using SPS to serialize the arguments and
/// deserialize the results.
template <typename SPSSignature, typename SendResultT, typename... ArgTs>
- void callSPSWrapperAsync(SendResultT &&SendResult,
- JITTargetAddress WrapperFnAddr,
+ void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult,
const ArgTs &...Args) {
- shared::WrapperFunction<SPSSignature>::callAsync(
- [this,
- WrapperFnAddr](ExecutorProcessControl::SendResultFunction SendResult,
- const char *ArgData, size_t ArgSize) {
- callWrapperAsync(std::move(SendResult), WrapperFnAddr,
- ArrayRef<char>(ArgData, ArgSize));
- },
- std::move(SendResult), Args...);
+ EPC->callSPSWrapperAsync<SPSSignature, SendResultT, ArgTs...>(
+ WrapperFnAddr, std::forward<SendResultT>(SendResult), Args...);
}
/// Run a wrapper function using SPS to serialize the arguments and
@@ -1506,13 +1501,10 @@ public:
/// If SPSSignature is a non-void function signature then the second argument
/// (the first in the Args list) should be a reference to a return value.
template <typename SPSSignature, typename... WrapperCallArgTs>
- Error callSPSWrapper(JITTargetAddress WrapperFnAddr,
+ Error callSPSWrapper(ExecutorAddr WrapperFnAddr,
WrapperCallArgTs &&...WrapperCallArgs) {
- return shared::WrapperFunction<SPSSignature>::call(
- [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) {
- return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize));
- },
- std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
+ return EPC->callSPSWrapper<SPSSignature, WrapperCallArgTs...>(
+ WrapperFnAddr, std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
}
/// Wrap a handler that takes concrete argument types (and a sender for a
@@ -1525,7 +1517,7 @@ public:
template <typename SPSSignature, typename HandlerT>
static JITDispatchHandlerFunction wrapAsyncWithSPS(HandlerT &&H) {
return [H = std::forward<HandlerT>(H)](
- ExecutorProcessControl::SendResultFunction SendResult,
+ SendResultFunction SendResult,
const char *ArgData, size_t ArgSize) mutable {
shared::WrapperFunction<SPSSignature>::handleAsync(ArgData, ArgSize, H,
std::move(SendResult));
@@ -1564,7 +1556,7 @@ public:
/// This should be called by the ExecutorProcessControl instance in response
/// to incoming jit-dispatch requests from the executor.
void
- runJITDispatchHandler(ExecutorProcessControl::SendResultFunction SendResult,
+ runJITDispatchHandler(SendResultFunction SendResult,
JITTargetAddress HandlerFnTagAddr,
ArrayRef<char> ArgBuffer);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
new file mode 100644
index 000000000000..af092b3287d3
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
@@ -0,0 +1,64 @@
+//===--- DebugerSupportPlugin.h -- Utils for debugger support ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates debug objects and registers them using the jit-loader-gdb protocol.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
+#define LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+
+namespace llvm {
+namespace orc {
+
+/// For each object containing debug info, installs JITLink passes to synthesize
+/// a debug object and then register it via the GDB JIT-registration interface.
+///
+/// Currently MachO only. For ELF use DebugObjectManagerPlugin. These two
+/// plugins will be merged in the near future.
+class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin {
+public:
+ class DebugSectionSynthesizer {
+ public:
+ virtual ~DebugSectionSynthesizer() {}
+ virtual Error startSynthesis() = 0;
+ virtual Error completeSynthesisAndRegister() = 0;
+ };
+
+ static Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
+ Create(ExecutionSession &ES, JITDylib &ProcessJD, const Triple &TT);
+
+ GDBJITDebugInfoRegistrationPlugin(ExecutorAddr RegisterActionAddr)
+ : RegisterActionAddr(RegisterActionAddr) {}
+
+ Error notifyFailed(MaterializationResponsibility &MR) override;
+ Error notifyRemovingResources(ResourceKey K) override;
+
+ void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) override;
+
+ void modifyPassConfig(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig) override;
+
+private:
+ void modifyPassConfigForMachO(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig);
+
+ ExecutorAddr RegisterActionAddr;
+};
+
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
new file mode 100644
index 000000000000..20da3e3b89eb
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
@@ -0,0 +1,330 @@
+//===-- ELFNixPlatform.h -- Utilities for executing ELF in Orc --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Linux/BSD support for executing JIT'd ELF in Orc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
+#define LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+#include <future>
+#include <thread>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+struct ELFPerObjectSectionsToRegister {
+ ExecutorAddrRange EHFrameSection;
+ ExecutorAddrRange ThreadDataSection;
+};
+
+struct ELFNixJITDylibInitializers {
+ using SectionList = std::vector<ExecutorAddrRange>;
+
+ ELFNixJITDylibInitializers(std::string Name, ExecutorAddr DSOHandleAddress)
+ : Name(std::move(Name)), DSOHandleAddress(std::move(DSOHandleAddress)) {}
+
+ std::string Name;
+ ExecutorAddr DSOHandleAddress;
+
+ StringMap<SectionList> InitSections;
+};
+
+class ELFNixJITDylibDeinitializers {};
+
+using ELFNixJITDylibInitializerSequence =
+ std::vector<ELFNixJITDylibInitializers>;
+
+using ELFNixJITDylibDeinitializerSequence =
+ std::vector<ELFNixJITDylibDeinitializers>;
+
+/// Mediates between ELFNix initialization and ExecutionSession state.
+class ELFNixPlatform : public Platform {
+public:
+ /// Try to create a ELFNixPlatform instance, adding the ORC runtime to the
+ /// given JITDylib.
+ ///
+ /// The ORC runtime requires access to a number of symbols in
+ /// libc++. It is up to the caller to ensure that the requried
+ /// symbols can be referenced by code added to PlatformJD. The
+ /// standard way to achieve this is to first attach dynamic library
+ /// search generators for either the given process, or for the
+ /// specific required libraries, to PlatformJD, then to create the
+ /// platform instance:
+ ///
+ /// \code{.cpp}
+ /// auto &PlatformJD = ES.createBareJITDylib("stdlib");
+ /// PlatformJD.addGenerator(
+ /// ExitOnErr(EPCDynamicLibrarySearchGenerator
+ /// ::GetForTargetProcess(EPC)));
+ /// ES.setPlatform(
+ /// ExitOnErr(ELFNixPlatform::Create(ES, ObjLayer, EPC, PlatformJD,
+ /// "/path/to/orc/runtime")));
+ /// \endcode
+ ///
+ /// Alternatively, these symbols could be added to another JITDylib that
+ /// PlatformJD links against.
+ ///
+ /// Clients are also responsible for ensuring that any JIT'd code that
+ /// depends on runtime functions (including any code using TLV or static
+ /// destructors) can reference the runtime symbols. This is usually achieved
+ /// by linking any JITDylibs containing regular code against
+ /// PlatformJD.
+ ///
+ /// By default, ELFNixPlatform will add the set of aliases returned by the
+ /// standardPlatformAliases function. This includes both required aliases
+ /// (e.g. __cxa_atexit -> __orc_rt_elf_cxa_atexit for static destructor
+ /// support), and optional aliases that provide JIT versions of common
+ /// functions (e.g. dlopen -> __orc_rt_elf_jit_dlopen). Clients can
+ /// override these defaults by passing a non-None value for the
+ /// RuntimeAliases function, in which case the client is responsible for
+ /// setting up all aliases (including the required ones).
+ static Expected<std::unique_ptr<ELFNixPlatform>>
+ Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ Optional<SymbolAliasMap> RuntimeAliases = None);
+
+ ExecutionSession &getExecutionSession() const { return ES; }
+ ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; }
+
+ Error setupJITDylib(JITDylib &JD) override;
+ Error notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) override;
+ Error notifyRemoving(ResourceTracker &RT) override;
+
+ /// Returns an AliasMap containing the default aliases for the ELFNixPlatform.
+ /// This can be modified by clients when constructing the platform to add
+ /// or remove aliases.
+ static SymbolAliasMap standardPlatformAliases(ExecutionSession &ES);
+
+ /// Returns the array of required CXX aliases.
+ static ArrayRef<std::pair<const char *, const char *>> requiredCXXAliases();
+
+ /// Returns the array of standard runtime utility aliases for ELF.
+ static ArrayRef<std::pair<const char *, const char *>>
+ standardRuntimeUtilityAliases();
+
+ /// Returns true if the given section name is an initializer section.
+ static bool isInitializerSection(StringRef SecName);
+
+private:
+ // The ELFNixPlatformPlugin scans/modifies LinkGraphs to support ELF
+ // platform features including initializers, exceptions, TLV, and language
+ // runtime registration.
+ class ELFNixPlatformPlugin : public ObjectLinkingLayer::Plugin {
+ public:
+ ELFNixPlatformPlugin(ELFNixPlatform &MP) : MP(MP) {}
+
+ void modifyPassConfig(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &G,
+ jitlink::PassConfiguration &Config) override;
+
+ SyntheticSymbolDependenciesMap
+ getSyntheticSymbolDependencies(MaterializationResponsibility &MR) override;
+
+ // FIXME: We should be tentatively tracking scraped sections and discarding
+ // if the MR fails.
+ Error notifyFailed(MaterializationResponsibility &MR) override {
+ return Error::success();
+ }
+
+ Error notifyRemovingResources(ResourceKey K) override {
+ return Error::success();
+ }
+
+ void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) override {}
+
+ private:
+ using InitSymbolDepMap =
+ DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>;
+
+ void addInitializerSupportPasses(MaterializationResponsibility &MR,
+ jitlink::PassConfiguration &Config);
+
+ void addDSOHandleSupportPasses(MaterializationResponsibility &MR,
+ jitlink::PassConfiguration &Config);
+
+ void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
+ jitlink::PassConfiguration &Config);
+
+ Error preserveInitSections(jitlink::LinkGraph &G,
+ MaterializationResponsibility &MR);
+
+ Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD);
+
+ Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
+
+ std::mutex PluginMutex;
+ ELFNixPlatform &MP;
+ InitSymbolDepMap InitSymbolDeps;
+ };
+
+ using SendInitializerSequenceFn =
+ unique_function<void(Expected<ELFNixJITDylibInitializerSequence>)>;
+
+ using SendDeinitializerSequenceFn =
+ unique_function<void(Expected<ELFNixJITDylibDeinitializerSequence>)>;
+
+ using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
+
+ static bool supportedTarget(const Triple &TT);
+
+ ELFNixPlatform(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator,
+ Error &Err);
+
+ // Associate ELFNixPlatform JIT-side runtime support functions with handlers.
+ Error associateRuntimeSupportFunctions(JITDylib &PlatformJD);
+
+ void getInitializersBuildSequencePhase(SendInitializerSequenceFn SendResult,
+ JITDylib &JD,
+ std::vector<JITDylibSP> DFSLinkOrder);
+
+ void getInitializersLookupPhase(SendInitializerSequenceFn SendResult,
+ JITDylib &JD);
+
+ void rt_getInitializers(SendInitializerSequenceFn SendResult,
+ StringRef JDName);
+
+ void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
+ ExecutorAddr Handle);
+
+ void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
+ StringRef SymbolName);
+
+ // Records the addresses of runtime symbols used by the platform.
+ Error bootstrapELFNixRuntime(JITDylib &PlatformJD);
+
+ Error registerInitInfo(JITDylib &JD,
+ ArrayRef<jitlink::Section *> InitSections);
+
+ Error registerPerObjectSections(const ELFPerObjectSectionsToRegister &POSR);
+
+ Expected<uint64_t> createPThreadKey();
+
+ ExecutionSession &ES;
+ ObjectLinkingLayer &ObjLinkingLayer;
+
+ SymbolStringPtr DSOHandleSymbol;
+ std::atomic<bool> RuntimeBootstrapped{false};
+
+ ExecutorAddr orc_rt_elfnix_platform_bootstrap;
+ ExecutorAddr orc_rt_elfnix_platform_shutdown;
+ ExecutorAddr orc_rt_elfnix_register_object_sections;
+ ExecutorAddr orc_rt_elfnix_create_pthread_key;
+
+ DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
+
+ // InitSeqs gets its own mutex to avoid locking the whole session when
+ // aggregating data from the jitlink.
+ std::mutex PlatformMutex;
+ DenseMap<JITDylib *, ELFNixJITDylibInitializers> InitSeqs;
+ std::vector<ELFPerObjectSectionsToRegister> BootstrapPOSRs;
+
+ DenseMap<JITTargetAddress, JITDylib *> HandleAddrToJITDylib;
+ DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
+};
+
+namespace shared {
+
+using SPSELFPerObjectSectionsToRegister =
+ SPSTuple<SPSExecutorAddrRange, SPSExecutorAddrRange>;
+
+template <>
+class SPSSerializationTraits<SPSELFPerObjectSectionsToRegister,
+ ELFPerObjectSectionsToRegister> {
+
+public:
+ static size_t size(const ELFPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFPerObjectSectionsToRegister::AsArgList::size(
+ MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ELFPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFPerObjectSectionsToRegister::AsArgList::serialize(
+ OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ ELFPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFPerObjectSectionsToRegister::AsArgList::deserialize(
+ IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+};
+
+using SPSNamedExecutorAddrRangeSequenceMap =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
+
+using SPSELFNixJITDylibInitializers =
+ SPSTuple<SPSString, SPSExecutorAddr, SPSNamedExecutorAddrRangeSequenceMap>;
+
+using SPSELFNixJITDylibInitializerSequence =
+ SPSSequence<SPSELFNixJITDylibInitializers>;
+
+/// Serialization traits for ELFNixJITDylibInitializers.
+template <>
+class SPSSerializationTraits<SPSELFNixJITDylibInitializers,
+ ELFNixJITDylibInitializers> {
+public:
+ static size_t size(const ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::size(
+ MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::serialize(
+ OB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::deserialize(
+ IB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+};
+
+using SPSELFJITDylibDeinitializers = SPSEmpty;
+
+using SPSELFJITDylibDeinitializerSequence =
+ SPSSequence<SPSELFJITDylibDeinitializers>;
+
+template <>
+class SPSSerializationTraits<SPSELFJITDylibDeinitializers,
+ ELFNixJITDylibDeinitializers> {
+public:
+ static size_t size(const ELFNixJITDylibDeinitializers &MOJDDs) { return 0; }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ELFNixJITDylibDeinitializers &MOJDDs) {
+ return true;
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ ELFNixJITDylibDeinitializers &MOJDDs) {
+ MOJDDs = ELFNixJITDylibDeinitializers();
+ return true;
+ }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index 410a202b3296..940d0d28ae83 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Memory.h"
@@ -32,7 +33,7 @@ class ExecutionSession;
/// Abstract interface for registering debug objects in the executor process.
class DebugObjectRegistrar {
public:
- virtual Error registerDebugObject(sys::MemoryBlock) = 0;
+ virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0;
virtual ~DebugObjectRegistrar() {}
};
@@ -40,14 +41,14 @@ public:
/// executor process.
class EPCDebugObjectRegistrar : public DebugObjectRegistrar {
public:
- EPCDebugObjectRegistrar(ExecutionSession &ES, JITTargetAddress RegisterFn)
+ EPCDebugObjectRegistrar(ExecutionSession &ES, ExecutorAddr RegisterFn)
: ES(ES), RegisterFn(RegisterFn) {}
- Error registerDebugObject(sys::MemoryBlock TargetMem) override;
+ Error registerDebugObject(ExecutorAddrRange TargetMem) override;
private:
ExecutionSession &ES;
- JITTargetAddress RegisterFn;
+ ExecutorAddr RegisterFn;
};
/// Create a ExecutorProcessControl-based DebugObjectRegistrar that emits debug
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
index 8cd6e9319a28..6d113a7bdf1a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H
#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
namespace llvm {
namespace orc {
@@ -33,8 +34,8 @@ public:
/// Create a EPCEHFrameRegistrar with the given ExecutorProcessControl
/// object and registration/deregistration function addresses.
EPCEHFrameRegistrar(ExecutionSession &ES,
- JITTargetAddress RegisterEHFrameWrapperFnAddr,
- JITTargetAddress DeregisterEHFRameWrapperFnAddr)
+ ExecutorAddr RegisterEHFrameWrapperFnAddr,
+ ExecutorAddr DeregisterEHFRameWrapperFnAddr)
: ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr),
DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {}
@@ -45,8 +46,8 @@ public:
private:
ExecutionSession &ES;
- JITTargetAddress RegisterEHFrameWrapperFnAddr;
- JITTargetAddress DeregisterEHFrameWrapperFnAddr;
+ ExecutorAddr RegisterEHFrameWrapperFnAddr;
+ ExecutorAddr DeregisterEHFrameWrapperFnAddr;
};
} // end namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
new file mode 100644
index 000000000000..02e580c86f54
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
@@ -0,0 +1,67 @@
+//===- EPCGenericDylibManager.h -- Generic EPC Dylib management -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements dylib loading and searching by making calls to
+// ExecutorProcessControl::callWrapper.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+
+namespace llvm {
+namespace orc {
+
+class SymbolLookupSet;
+
+class EPCGenericDylibManager {
+public:
+ /// Function addresses for memory access.
+ struct SymbolAddrs {
+ ExecutorAddr Instance;
+ ExecutorAddr Open;
+ ExecutorAddr Lookup;
+ };
+
+ /// Create an EPCGenericMemoryAccess instance from a given set of
+ /// function addrs.
+ static Expected<EPCGenericDylibManager>
+ CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC);
+
+ /// Create an EPCGenericMemoryAccess instance from a given set of
+ /// function addrs.
+ EPCGenericDylibManager(ExecutorProcessControl &EPC, SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+ /// Loads the dylib with the given name.
+ Expected<tpctypes::DylibHandle> open(StringRef Path, uint64_t Mode);
+
+ /// Looks up symbols within the given dylib.
+ Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H,
+ const SymbolLookupSet &Lookup);
+
+ /// Looks up symbols within the given dylib.
+ Expected<std::vector<ExecutorAddr>>
+ lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &Lookup);
+
+private:
+ ExecutorProcessControl &EPC;
+ SymbolAddrs SAs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
new file mode 100644
index 000000000000..b9825f17ec17
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
@@ -0,0 +1,97 @@
+//===- EPCGenericJITLinkMemoryManager.h - EPC-based mem manager -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements JITLinkMemoryManager by making remove calls via
+// ExecutorProcessControl::callWrapperAsync.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
+public:
+ /// Function addresses for memory access.
+ struct SymbolAddrs {
+ ExecutorAddr Allocator;
+ ExecutorAddr Reserve;
+ ExecutorAddr Finalize;
+ ExecutorAddr Deallocate;
+ };
+
+ /// Create an EPCGenericJITLinkMemoryManager instance from a given set of
+ /// function addrs.
+ EPCGenericJITLinkMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+ void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G,
+ OnAllocatedFunction OnAllocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::allocate;
+
+ void deallocate(std::vector<FinalizedAlloc> Allocs,
+ OnDeallocatedFunction OnDeallocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::deallocate;
+
+private:
+ class InFlightAlloc;
+
+ void completeAllocation(ExecutorAddr AllocAddr, jitlink::BasicLayout BL,
+ OnAllocatedFunction OnAllocated);
+
+ ExecutorProcessControl &EPC;
+ SymbolAddrs SAs;
+};
+
+namespace shared {
+
+/// FIXME: This specialization should be moved into TargetProcessControlTypes.h
+/// (or whereever those types get merged to) once ORC depends on JITLink.
+template <>
+class SPSSerializationTraits<SPSExecutorAddr,
+ jitlink::JITLinkMemoryManager::FinalizedAlloc> {
+public:
+ static size_t size(const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+ return SPSArgList<SPSExecutorAddr>::size(ExecutorAddr(FA.getAddress()));
+ }
+
+ static bool
+ serialize(SPSOutputBuffer &OB,
+ const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+ return SPSArgList<SPSExecutorAddr>::serialize(
+ OB, ExecutorAddr(FA.getAddress()));
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+ ExecutorAddr A;
+ if (!SPSArgList<SPSExecutorAddr>::deserialize(IB, A))
+ return false;
+ FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue());
+ return true;
+ }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
new file mode 100644
index 000000000000..8c1d457d06ab
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
@@ -0,0 +1,85 @@
+//===- EPCGenericMemoryAccess.h - Generic EPC MemoryAccess impl -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements ExecutorProcessControl::MemoryAccess by making calls to
+// ExecutorProcessControl::callWrapperAsync.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericMemoryAccess : public ExecutorProcessControl::MemoryAccess {
+public:
+ /// Function addresses for memory access.
+ struct FuncAddrs {
+ ExecutorAddr WriteUInt8s;
+ ExecutorAddr WriteUInt16s;
+ ExecutorAddr WriteUInt32s;
+ ExecutorAddr WriteUInt64s;
+ ExecutorAddr WriteBuffers;
+ };
+
+ /// Create an EPCGenericMemoryAccess instance from a given set of
+ /// function addrs.
+ EPCGenericMemoryAccess(ExecutorProcessControl &EPC, FuncAddrs FAs)
+ : EPC(EPC), FAs(FAs) {}
+
+ void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt8Write>)>(
+ FAs.WriteUInt8s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt16Write>)>(
+ FAs.WriteUInt16s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt32Write>)>(
+ FAs.WriteUInt32s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt64Write>)>(
+ FAs.WriteUInt64s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessBufferWrite>)>(
+ FAs.WriteBuffers, std::move(OnWriteComplete), Ws);
+ }
+
+private:
+ ExecutorProcessControl &EPC;
+ FuncAddrs FAs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
new file mode 100644
index 000000000000..b6fdfb92ced3
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
@@ -0,0 +1,133 @@
+//===---- EPCGenericRTDyldMemoryManager.h - EPC-based MemMgr ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a RuntimeDyld::MemoryManager that uses EPC and the ORC runtime
+// bootstrap functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+/// Remote-mapped RuntimeDyld-compatible memory manager.
+class EPCGenericRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
+public:
+ /// Symbol addresses for memory access.
+ struct SymbolAddrs {
+ ExecutorAddr Instance;
+ ExecutorAddr Reserve;
+ ExecutorAddr Finalize;
+ ExecutorAddr Deallocate;
+ ExecutorAddr RegisterEHFrame;
+ ExecutorAddr DeregisterEHFrame;
+ };
+
+ /// Create an EPCGenericRTDyldMemoryManager using the given EPC, looking up
+ /// the default symbol names in the bootstrap symbol set.
+ static Expected<std::unique_ptr<EPCGenericRTDyldMemoryManager>>
+ CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC);
+
+ /// Create an EPCGenericRTDyldMemoryManager using the given EPC and symbol
+ /// addrs.
+ EPCGenericRTDyldMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs);
+
+ EPCGenericRTDyldMemoryManager(const EPCGenericRTDyldMemoryManager &) = delete;
+ EPCGenericRTDyldMemoryManager &
+ operator=(const EPCGenericRTDyldMemoryManager &) = delete;
+ EPCGenericRTDyldMemoryManager(EPCGenericRTDyldMemoryManager &&) = delete;
+ EPCGenericRTDyldMemoryManager &
+ operator=(EPCGenericRTDyldMemoryManager &&) = delete;
+ ~EPCGenericRTDyldMemoryManager();
+
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) override;
+
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) override;
+
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize, uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) override;
+
+ bool needsToReserveAllocationSpace() override;
+
+ void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override;
+
+ void deregisterEHFrames() override;
+
+ void notifyObjectLoaded(RuntimeDyld &Dyld,
+ const object::ObjectFile &Obj) override;
+
+ bool finalizeMemory(std::string *ErrMsg = nullptr) override;
+
+private:
+ struct Alloc {
+ public:
+ Alloc(uint64_t Size, unsigned Align)
+ : Size(Size), Align(Align),
+ Contents(std::make_unique<uint8_t[]>(Size + Align - 1)) {}
+
+ uint64_t Size;
+ unsigned Align;
+ std::unique_ptr<uint8_t[]> Contents;
+ ExecutorAddr RemoteAddr;
+ };
+
+ struct EHFrame {
+ ExecutorAddr Addr;
+ uint64_t Size;
+ };
+
+ // Group of section allocations to be allocated together in the executor. The
+ // RemoteCodeAddr will stand in as the id of the group for deallocation
+ // purposes.
+ struct AllocGroup {
+ AllocGroup() = default;
+ AllocGroup(const AllocGroup &) = delete;
+ AllocGroup &operator=(const AllocGroup &) = delete;
+ AllocGroup(AllocGroup &&) = default;
+ AllocGroup &operator=(AllocGroup &&) = default;
+
+ ExecutorAddrRange RemoteCode;
+ ExecutorAddrRange RemoteROData;
+ ExecutorAddrRange RemoteRWData;
+ std::vector<EHFrame> UnfinalizedEHFrames;
+ std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
+ };
+
+ // Maps all allocations in Allocs to aligned blocks
+ void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs,
+ ExecutorAddr NextAddr);
+
+ ExecutorProcessControl &EPC;
+ SymbolAddrs SAs;
+
+ std::mutex M;
+ std::vector<AllocGroup> Unmapped;
+ std::vector<AllocGroup> Unfinalized;
+ std::vector<ExecutorAddr> FinalizedAllocs;
+ std::string ErrMsg;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
index 64f16d507c97..92de5882bafe 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
@@ -126,7 +126,7 @@ public:
}
private:
- using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+ using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
struct IndirectStubInfo {
IndirectStubInfo() = default;
@@ -149,12 +149,12 @@ private:
ExecutorProcessControl &EPC;
std::unique_ptr<ABISupport> ABI;
JITTargetAddress ResolverBlockAddr;
- std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock;
+ FinalizedAlloc ResolverBlock;
std::unique_ptr<TrampolinePool> TP;
std::unique_ptr<LazyCallThroughManager> LCTM;
std::vector<IndirectStubInfo> AvailableIndirectStubs;
- std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs;
+ std::vector<FinalizedAlloc> IndirectStubAllocs;
};
/// This will call writeResolver on the given EPCIndirectionUtils instance
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
index d540d0cd0608..105dac8e8d04 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
@@ -13,7 +13,6 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
#define LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
@@ -21,6 +20,7 @@
#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
@@ -37,11 +37,65 @@ class SymbolLookupSet;
/// ExecutorProcessControl supports interaction with a JIT target process.
class ExecutorProcessControl {
friend class ExecutionSession;
-
public:
- /// Sender to return the result of a WrapperFunction executed in the JIT.
- using SendResultFunction =
- unique_function<void(shared::WrapperFunctionResult)>;
+
+ /// A handler or incoming WrapperFunctionResults -- either return values from
+ /// callWrapper* calls, or incoming JIT-dispatch requests.
+ ///
+ /// IncomingWFRHandlers are constructible from
+ /// unique_function<void(shared::WrapperFunctionResult)>s using the
+ /// runInPlace function or a RunWithDispatch object.
+ class IncomingWFRHandler {
+ friend class ExecutorProcessControl;
+ public:
+ IncomingWFRHandler() = default;
+ explicit operator bool() const { return !!H; }
+ void operator()(shared::WrapperFunctionResult WFR) { H(std::move(WFR)); }
+ private:
+ template <typename FnT> IncomingWFRHandler(FnT &&Fn)
+ : H(std::forward<FnT>(Fn)) {}
+
+ unique_function<void(shared::WrapperFunctionResult)> H;
+ };
+
+ /// Constructs an IncomingWFRHandler from a function object that is callable
+ /// as void(shared::WrapperFunctionResult). The function object will be called
+ /// directly. This should be used with care as it may block listener threads
+ /// in remote EPCs. It is only suitable for simple tasks (e.g. setting a
+ /// future), or for performing some quick analysis before dispatching "real"
+ /// work as a Task.
+ class RunInPlace {
+ public:
+ template <typename FnT>
+ IncomingWFRHandler operator()(FnT &&Fn) {
+ return IncomingWFRHandler(std::forward<FnT>(Fn));
+ }
+ };
+
+ /// Constructs an IncomingWFRHandler from a function object by creating a new
+ /// function object that dispatches the original using a TaskDispatcher,
+ /// wrapping the original as a GenericNamedTask.
+ ///
+ /// This is the default approach for running WFR handlers.
+ class RunAsTask {
+ public:
+ RunAsTask(TaskDispatcher &D) : D(D) {}
+
+ template <typename FnT>
+ IncomingWFRHandler operator()(FnT &&Fn) {
+ return IncomingWFRHandler(
+ [&D = this->D, Fn = std::move(Fn)]
+ (shared::WrapperFunctionResult WFR) mutable {
+ D.dispatch(
+ makeGenericNamedTask(
+ [Fn = std::move(Fn), WFR = std::move(WFR)]() mutable {
+ Fn(std::move(WFR));
+ }, "WFR handler task"));
+ });
+ }
+ private:
+ TaskDispatcher &D;
+ };
/// APIs for manipulating memory in the target process.
class MemoryAccess {
@@ -51,53 +105,58 @@ public:
virtual ~MemoryAccess();
- virtual void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) = 0;
Error writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt8sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt16sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt32sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt64sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeBuffersAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
};
@@ -113,10 +172,14 @@ public:
/// Contains the address of the dispatch function and context that the ORC
/// runtime can use to call functions in the JIT.
struct JITDispatchInfo {
- ExecutorAddress JITDispatchFunctionAddress;
- ExecutorAddress JITDispatchContextAddress;
+ ExecutorAddr JITDispatchFunction;
+ ExecutorAddr JITDispatchContext;
};
+ ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<TaskDispatcher> D)
+ : SSP(std::move(SSP)), D(std::move(D)) {}
+
virtual ~ExecutorProcessControl();
/// Return the ExecutionSession associated with this instance.
@@ -132,6 +195,8 @@ public:
/// Return a shared pointer to the SymbolStringPool for this instance.
std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; }
+ TaskDispatcher &getDispatcher() { return *D; }
+
/// Return the Triple for the target process.
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -153,6 +218,29 @@ public:
return *MemMgr;
}
+ /// Returns the bootstrap symbol map.
+ const StringMap<ExecutorAddr> &getBootstrapSymbolsMap() const {
+ return BootstrapSymbols;
+ }
+
+ /// For each (ExecutorAddr&, StringRef) pair, looks up the string in the
+ /// bootstrap symbols map and writes its address to the ExecutorAddr if
+ /// found. If any symbol is not found then the function returns an error.
+ Error getBootstrapSymbols(
+ ArrayRef<std::pair<ExecutorAddr &, StringRef>> Pairs) const {
+ for (auto &KV : Pairs) {
+ auto I = BootstrapSymbols.find(KV.second);
+ if (I == BootstrapSymbols.end())
+ return make_error<StringError>("Symbol \"" + KV.second +
+ "\" not found "
+ "in bootstrap symbols map",
+ inconvertibleErrorCode());
+
+ KV.first = I->second;
+ }
+ return Error::success();
+ }
+
/// Load the dynamic library at the given path and return a handle to it.
/// If LibraryPath is null this function will return the global handle for
/// the target process.
@@ -163,44 +251,119 @@ public:
/// The result of the lookup is a 2-dimentional array of target addresses
/// that correspond to the lookup order. If a required symbol is not
/// found then this method will return an error. If a weakly referenced
- /// symbol is not found then it be assigned a '0' value in the result.
- /// that correspond to the lookup order.
+ /// symbol is not found then it be assigned a '0' value.
virtual Expected<std::vector<tpctypes::LookupResult>>
lookupSymbols(ArrayRef<LookupRequest> Request) = 0;
/// Run function with a main-like signature.
- virtual Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ virtual Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) = 0;
- /// Run a wrapper function in the executor.
+ /// Run a wrapper function in the executor. The given WFRHandler will be
+ /// called on the result when it is returned.
///
/// The wrapper function should be callable as:
///
/// \code{.cpp}
/// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
/// \endcode{.cpp}
- ///
- /// The given OnComplete function will be called to return the result.
- virtual void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
+ virtual void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
ArrayRef<char> ArgBuffer) = 0;
+ /// Run a wrapper function in the executor using the given Runner to dispatch
+ /// OnComplete when the result is ready.
+ template <typename RunPolicyT, typename FnT>
+ void callWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr,
+ FnT &&OnComplete, ArrayRef<char> ArgBuffer) {
+ callWrapperAsync(
+ WrapperFnAddr, Runner(std::forward<FnT>(OnComplete)), ArgBuffer);
+ }
+
+ /// Run a wrapper function in the executor. OnComplete will be dispatched
+ /// as a GenericNamedTask using this instance's TaskDispatch object.
+ template <typename FnT>
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr, FnT &&OnComplete,
+ ArrayRef<char> ArgBuffer) {
+ callWrapperAsync(RunAsTask(*D), WrapperFnAddr,
+ std::forward<FnT>(OnComplete), ArgBuffer);
+ }
+
+ /// Run a wrapper function in the executor. The wrapper function should be
+ /// callable as:
+ ///
+ /// \code{.cpp}
+ /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
+ /// \endcode{.cpp}
+ shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr,
+ ArrayRef<char> ArgBuffer) {
+ std::promise<shared::WrapperFunctionResult> RP;
+ auto RF = RP.get_future();
+ callWrapperAsync(
+ RunInPlace(), WrapperFnAddr,
+ [&](shared::WrapperFunctionResult R) {
+ RP.set_value(std::move(R));
+ }, ArgBuffer);
+ return RF.get();
+ }
+
+ /// Run a wrapper function using SPS to serialize the arguments and
+ /// deserialize the results.
+ template <typename SPSSignature, typename RunPolicyT, typename SendResultT,
+ typename... ArgTs>
+ void callSPSWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr,
+ SendResultT &&SendResult, const ArgTs &...Args) {
+ shared::WrapperFunction<SPSSignature>::callAsync(
+ [this, WrapperFnAddr, Runner = std::move(Runner)]
+ (auto &&SendResult, const char *ArgData, size_t ArgSize) mutable {
+ this->callWrapperAsync(std::move(Runner), WrapperFnAddr,
+ std::move(SendResult),
+ ArrayRef<char>(ArgData, ArgSize));
+ },
+ std::forward<SendResultT>(SendResult), Args...);
+ }
+
+ /// Run a wrapper function using SPS to serialize the arguments and
+ /// deserialize the results.
+ template <typename SPSSignature, typename SendResultT, typename... ArgTs>
+ void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult,
+ const ArgTs &...Args) {
+ callSPSWrapperAsync<SPSSignature>(RunAsTask(*D), WrapperFnAddr,
+ std::forward<SendResultT>(SendResult),
+ Args...);
+ }
+
+ /// Run a wrapper function using SPS to serialize the arguments and
+ /// deserialize the results.
+ ///
+ /// If SPSSignature is a non-void function signature then the second argument
+ /// (the first in the Args list) should be a reference to a return value.
+ template <typename SPSSignature, typename... WrapperCallArgTs>
+ Error callSPSWrapper(ExecutorAddr WrapperFnAddr,
+ WrapperCallArgTs &&...WrapperCallArgs) {
+ return shared::WrapperFunction<SPSSignature>::call(
+ [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) {
+ return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize));
+ },
+ std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
+ }
+
/// Disconnect from the target process.
///
/// This should be called after the JIT session is shut down.
virtual Error disconnect() = 0;
protected:
- ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP)
- : SSP(std::move(SSP)) {}
std::shared_ptr<SymbolStringPool> SSP;
+ std::unique_ptr<TaskDispatcher> D;
ExecutionSession *ES = nullptr;
Triple TargetTriple;
unsigned PageSize = 0;
JITDispatchInfo JDI;
MemoryAccess *MemAccess = nullptr;
jitlink::JITLinkMemoryManager *MemMgr = nullptr;
+ StringMap<ExecutorAddr> BootstrapSymbols;
};
/// A ExecutorProcessControl instance that asserts if any of its methods are
@@ -210,9 +373,12 @@ class UnsupportedExecutorProcessControl : public ExecutorProcessControl {
public:
UnsupportedExecutorProcessControl(
std::shared_ptr<SymbolStringPool> SSP = nullptr,
+ std::unique_ptr<TaskDispatcher> D = nullptr,
const std::string &TT = "", unsigned PageSize = 0)
: ExecutorProcessControl(SSP ? std::move(SSP)
- : std::make_shared<SymbolStringPool>()) {
+ : std::make_shared<SymbolStringPool>(),
+ D ? std::move(D)
+ : std::make_unique<InPlaceTaskDispatcher>()) {
this->TargetTriple = Triple(TT);
this->PageSize = PageSize;
}
@@ -226,13 +392,13 @@ public:
llvm_unreachable("Unsupported");
}
- Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) override {
llvm_unreachable("Unsupported");
}
- void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
ArrayRef<char> ArgBuffer) override {
llvm_unreachable("Unsupported");
}
@@ -246,8 +412,9 @@ class SelfExecutorProcessControl
private ExecutorProcessControl::MemoryAccess {
public:
SelfExecutorProcessControl(
- std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
- unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
+ std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
+ Triple TargetTriple, unsigned PageSize,
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
/// Create a SelfExecutorProcessControl with the given symbol string pool and
/// memory manager.
@@ -256,6 +423,7 @@ public:
/// be created and used by default.
static Expected<std::unique_ptr<SelfExecutorProcessControl>>
Create(std::shared_ptr<SymbolStringPool> SSP = nullptr,
+ std::unique_ptr<TaskDispatcher> D = nullptr,
std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr = nullptr);
Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
@@ -263,32 +431,32 @@ public:
Expected<std::vector<tpctypes::LookupResult>>
lookupSymbols(ArrayRef<LookupRequest> Request) override;
- Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) override;
- void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
ArrayRef<char> ArgBuffer) override;
Error disconnect() override;
private:
- void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) override;
- static shared::detail::CWrapperFunctionResult
+ static shared::CWrapperFunctionResult
jitDispatchViaWrapperFunctionManager(void *Ctx, const void *FnTag,
const char *Data, size_t Size);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 78e3ceef50e2..4d6d46595fc3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -45,6 +45,13 @@ class PointerType;
class Triple;
class Twine;
class Value;
+class MCDisassembler;
+class MCInstrAnalysis;
+
+namespace jitlink {
+class LinkGraph;
+class Symbol;
+} // namespace jitlink
namespace orc {
@@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
ValueToValueMapTy &VMap);
+/// Introduce relocations to \p Sym in its own definition if there are any
+/// pointers formed via PC-relative address that do not already have a
+/// relocation.
+///
+/// This is useful when introducing indirection via a stub function at link time
+/// without compiler support. If a function pointer is formed without a
+/// relocation, e.g. in the definition of \c foo
+///
+/// \code
+/// _foo:
+/// leaq -7(%rip), rax # form pointer to _foo without relocation
+/// _bar:
+/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo'
+/// \endcode
+///
+/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we
+/// introduce a stub for _foo. If the pointer is used as a key, this may be
+/// observable to the program. This pass will attempt to introduce the missing
+/// "self-relocation" on the leaq instruction.
+///
+/// This is based on disassembly and should be considered "best effort". It may
+/// silently fail to add relocations.
+Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
+ jitlink::LinkGraph &G,
+ MCDisassembler &Disassembler,
+ MCInstrAnalysis &MIA);
+
} // end namespace orc
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h b/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
deleted file mode 100644
index f3d616deae8f..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- LLVMSPSSerializers.h - SPS serialization for LLVM types -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// SPS Serialization for common LLVM types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
-#define LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
-
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-template <typename SPSValueT, typename ValueT>
-class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>,
- StringMap<ValueT>> {
-public:
- static size_t size(const StringMap<ValueT> &M) {
- size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size()));
- for (auto &E : M)
- Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second);
- return Sz;
- }
-
- static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) {
- if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size())))
- return false;
-
- for (auto &E : M)
- if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second))
- return false;
-
- return true;
- }
-
- static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) {
- uint64_t Size;
- assert(M.empty() && "M already contains elements");
-
- if (!SPSArgList<uint64_t>::deserialize(IB, Size))
- return false;
-
- while (Size--) {
- StringRef S;
- ValueT V;
- if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V))
- return false;
- if (!M.insert(std::make_pair(S, V)).second)
- return false;
- }
-
- return true;
- }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
new file mode 100644
index 000000000000..a598405ee4f6
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
@@ -0,0 +1,70 @@
+//===-- LookupAndRecordAddrs.h - Symbol lookup support utility --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Record the addresses of a set of symbols into ExecutorAddr objects.
+//
+// This can be used to avoid repeated lookup (via ExecutionSession::lookup) of
+// the given symbols.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
+#define LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// Record addresses of the given symbols in the given ExecutorAddrs.
+///
+/// Useful for making permanent records of symbol addreses to call or
+/// access in the executor (e.g. runtime support functions in Platform
+/// subclasses).
+///
+/// By default the symbols are looked up using
+/// SymbolLookupFlags::RequiredSymbol, and an error will be generated if any of
+/// the requested symbols are not defined.
+///
+/// If SymbolLookupFlags::WeaklyReferencedSymbol is used then any missing
+/// symbols will have their corresponding address objects set to zero, and
+/// this function will never generate an error (the caller will need to check
+/// addresses before using them).
+///
+/// Asynchronous version.
+void lookupAndRecordAddrs(
+ unique_function<void(Error)> OnRecorded, ExecutionSession &ES, LookupKind K,
+ const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+/// Record addresses of the given symbols in the given ExecutorAddrs.
+///
+/// Blocking version.
+Error lookupAndRecordAddrs(
+ ExecutionSession &ES, LookupKind K, const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+/// Record addresses of given symbols in the given ExecutorAddrs.
+///
+/// ExecutorProcessControl lookup version. Lookups are always implicitly
+/// weak.
+Error lookupAndRecordAddrs(
+ ExecutorProcessControl &EPC, tpctypes::DylibHandle H,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index f77dfd208413..d7b5e2eda6ee 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
@@ -27,22 +26,16 @@
namespace llvm {
namespace orc {
-struct MachOPerObjectSectionsToRegister {
- ExecutorAddressRange EHFrameSection;
- ExecutorAddressRange ThreadDataSection;
-};
-
struct MachOJITDylibInitializers {
- using SectionList = std::vector<ExecutorAddressRange>;
+ using SectionList = std::vector<ExecutorAddrRange>;
- MachOJITDylibInitializers(std::string Name,
- ExecutorAddress MachOHeaderAddress)
+ MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress)
: Name(std::move(Name)),
MachOHeaderAddress(std::move(MachOHeaderAddress)) {}
std::string Name;
- ExecutorAddress MachOHeaderAddress;
- ExecutorAddress ObjCImageInfoAddress;
+ ExecutorAddr MachOHeaderAddress;
+ ExecutorAddr ObjCImageInfoAddress;
StringMap<SectionList> InitSections;
};
@@ -155,15 +148,12 @@ private:
using InitSymbolDepMap =
DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>;
- void addInitializerSupportPasses(MaterializationResponsibility &MR,
- jitlink::PassConfiguration &Config);
-
- void addMachOHeaderSupportPasses(MaterializationResponsibility &MR,
- jitlink::PassConfiguration &Config);
-
void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
jitlink::PassConfiguration &Config);
+ Error associateJITDylibHeaderSymbol(jitlink::LinkGraph &G,
+ MaterializationResponsibility &MR);
+
Error preserveInitSections(jitlink::LinkGraph &G,
MaterializationResponsibility &MR);
@@ -174,6 +164,10 @@ private:
Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
+ Error registerEHAndTLVSections(jitlink::LinkGraph &G);
+
+ Error registerEHSectionsPhase1(jitlink::LinkGraph &G);
+
std::mutex PluginMutex;
MachOPlatform &MP;
DenseMap<JITDylib *, std::pair<uint32_t, uint32_t>> ObjCImageInfos;
@@ -186,7 +180,7 @@ private:
using SendDeinitializerSequenceFn =
unique_function<void(Expected<MachOJITDylibDeinitializerSequence>)>;
- using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddress>)>;
+ using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
static bool supportedTarget(const Triple &TT);
@@ -209,31 +203,34 @@ private:
StringRef JDName);
void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
- ExecutorAddress Handle);
+ ExecutorAddr Handle);
- void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddress Handle,
+ void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
StringRef SymbolName);
// Records the addresses of runtime symbols used by the platform.
Error bootstrapMachORuntime(JITDylib &PlatformJD);
- Error registerInitInfo(JITDylib &JD, ExecutorAddress ObjCImageInfoAddr,
+ Error registerInitInfo(JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
ArrayRef<jitlink::Section *> InitSections);
- Error registerPerObjectSections(const MachOPerObjectSectionsToRegister &POSR);
-
Expected<uint64_t> createPThreadKey();
+ enum PlatformState { BootstrapPhase1, BootstrapPhase2, Initialized };
+
ExecutionSession &ES;
ObjectLinkingLayer &ObjLinkingLayer;
SymbolStringPtr MachOHeaderStartSymbol;
- std::atomic<bool> RuntimeBootstrapped{false};
+ std::atomic<PlatformState> State{BootstrapPhase1};
- ExecutorAddress orc_rt_macho_platform_bootstrap;
- ExecutorAddress orc_rt_macho_platform_shutdown;
- ExecutorAddress orc_rt_macho_register_object_sections;
- ExecutorAddress orc_rt_macho_create_pthread_key;
+ ExecutorAddr orc_rt_macho_platform_bootstrap;
+ ExecutorAddr orc_rt_macho_platform_shutdown;
+ ExecutorAddr orc_rt_macho_register_ehframe_section;
+ ExecutorAddr orc_rt_macho_deregister_ehframe_section;
+ ExecutorAddr orc_rt_macho_register_thread_data_section;
+ ExecutorAddr orc_rt_macho_deregister_thread_data_section;
+ ExecutorAddr orc_rt_macho_create_pthread_key;
DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
@@ -241,7 +238,6 @@ private:
// aggregating data from the jitlink.
std::mutex PlatformMutex;
DenseMap<JITDylib *, MachOJITDylibInitializers> InitSeqs;
- std::vector<MachOPerObjectSectionsToRegister> BootstrapPOSRs;
DenseMap<JITTargetAddress, JITDylib *> HeaderAddrToJITDylib;
DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
@@ -249,38 +245,12 @@ private:
namespace shared {
-using SPSMachOPerObjectSectionsToRegister =
- SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>;
-
-template <>
-class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
- MachOPerObjectSectionsToRegister> {
-
-public:
- static size_t size(const MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::size(
- MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-
- static bool serialize(SPSOutputBuffer &OB,
- const MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize(
- OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-
- static bool deserialize(SPSInputBuffer &IB,
- MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize(
- IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-};
-
-using SPSNamedExecutorAddressRangeSequenceMap =
- SPSSequence<SPSTuple<SPSString, SPSExecutorAddressRangeSequence>>;
+using SPSNamedExecutorAddrRangeSequenceMap =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
using SPSMachOJITDylibInitializers =
- SPSTuple<SPSString, SPSExecutorAddress, SPSExecutorAddress,
- SPSNamedExecutorAddressRangeSequenceMap>;
+ SPSTuple<SPSString, SPSExecutorAddr, SPSExecutorAddr,
+ SPSNamedExecutorAddrRangeSequenceMap>;
using SPSMachOJITDylibInitializerSequence =
SPSSequence<SPSMachOJITDylibInitializers>;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 5632118eee4e..109922a46e26 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -184,13 +184,13 @@ public:
}
private:
- using AllocPtr = std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation>;
+ using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
void modifyPassConfig(MaterializationResponsibility &MR,
jitlink::LinkGraph &G,
jitlink::PassConfiguration &PassConfig);
void notifyLoaded(MaterializationResponsibility &MR);
- Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc);
+ Error notifyEmitted(MaterializationResponsibility &MR, FinalizedAlloc FA);
Error handleRemoveResources(ResourceKey K) override;
void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override;
@@ -201,7 +201,7 @@ private:
bool OverrideObjectFlags = false;
bool AutoClaimObjectSymbols = false;
ReturnObjectBufferFunction ReturnObjectBuffer;
- DenseMap<ResourceKey, std::vector<AllocPtr>> Allocs;
+ DenseMap<ResourceKey, std::vector<FinalizedAlloc>> Allocs;
std::vector<std::unique_ptr<Plugin>> Plugins;
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
deleted file mode 100644
index 4310ba9ce9e0..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
+++ /dev/null
@@ -1,436 +0,0 @@
-//===-- OrcRPCExecutorProcessControl.h - Remote target control --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Executor control via ORC RPC.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
-
-#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-#include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
-
-namespace llvm {
-namespace orc {
-
-/// JITLinkMemoryManager implementation for a process connected via an ORC RPC
-/// endpoint.
-template <typename OrcRPCEPCImplT>
-class OrcRPCEPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
-private:
- struct HostAlloc {
- std::unique_ptr<char[]> Mem;
- uint64_t Size;
- };
-
- struct TargetAlloc {
- JITTargetAddress Address = 0;
- uint64_t AllocatedSize = 0;
- };
-
- using HostAllocMap = DenseMap<int, HostAlloc>;
- using TargetAllocMap = DenseMap<int, TargetAlloc>;
-
-public:
- class OrcRPCAllocation : public Allocation {
- public:
- OrcRPCAllocation(OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent,
- HostAllocMap HostAllocs, TargetAllocMap TargetAllocs)
- : Parent(Parent), HostAllocs(std::move(HostAllocs)),
- TargetAllocs(std::move(TargetAllocs)) {
- assert(HostAllocs.size() == TargetAllocs.size() &&
- "HostAllocs size should match TargetAllocs");
- }
-
- ~OrcRPCAllocation() override {
- assert(TargetAllocs.empty() && "failed to deallocate");
- }
-
- MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
- auto I = HostAllocs.find(Seg);
- assert(I != HostAllocs.end() && "No host allocation for segment");
- auto &HA = I->second;
- return {HA.Mem.get(), static_cast<size_t>(HA.Size)};
- }
-
- JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
- auto I = TargetAllocs.find(Seg);
- assert(I != TargetAllocs.end() && "No target allocation for segment");
- return I->second.Address;
- }
-
- void finalizeAsync(FinalizeContinuation OnFinalize) override {
-
- std::vector<tpctypes::BufferWrite> BufferWrites;
- orcrpctpc::ReleaseOrFinalizeMemRequest FMR;
-
- for (auto &KV : HostAllocs) {
- assert(TargetAllocs.count(KV.first) &&
- "No target allocation for buffer");
- auto &HA = KV.second;
- auto &TA = TargetAllocs[KV.first];
- BufferWrites.push_back({TA.Address, StringRef(HA.Mem.get(), HA.Size)});
- FMR.push_back({orcrpctpc::toWireProtectionFlags(
- static_cast<sys::Memory::ProtectionFlags>(KV.first)),
- TA.Address, TA.AllocatedSize});
- }
-
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "finalizeAsync " << (void *)this << ":\n";
- auto FMRI = FMR.begin();
- for (auto &B : BufferWrites) {
- auto Prot = FMRI->Prot;
- ++FMRI;
- dbgs() << " Writing " << formatv("{0:x16}", B.Buffer.size())
- << " bytes to " << ((Prot & orcrpctpc::WPF_Read) ? 'R' : '-')
- << ((Prot & orcrpctpc::WPF_Write) ? 'W' : '-')
- << ((Prot & orcrpctpc::WPF_Exec) ? 'X' : '-')
- << " segment: local " << (const void *)B.Buffer.data()
- << " -> target " << formatv("{0:x16}", B.Address) << "\n";
- }
- });
- if (auto Err =
- Parent.Parent.getMemoryAccess().writeBuffers(BufferWrites)) {
- OnFinalize(std::move(Err));
- return;
- }
-
- DEBUG_WITH_TYPE("orc", dbgs() << " Applying permissions...\n");
- if (auto Err =
- Parent.getEndpoint().template callAsync<orcrpctpc::FinalizeMem>(
- [OF = std::move(OnFinalize)](Error Err2) {
- // FIXME: Dispatch to work queue.
- std::thread([OF = std::move(OF),
- Err3 = std::move(Err2)]() mutable {
- DEBUG_WITH_TYPE(
- "orc", { dbgs() << " finalizeAsync complete\n"; });
- OF(std::move(Err3));
- }).detach();
- return Error::success();
- },
- FMR)) {
- DEBUG_WITH_TYPE("orc", dbgs() << " failed.\n");
- Parent.getEndpoint().abandonPendingResponses();
- Parent.reportError(std::move(Err));
- }
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Leaving finalizeAsync (finalization may continue in "
- "background)\n";
- });
- }
-
- Error deallocate() override {
- orcrpctpc::ReleaseOrFinalizeMemRequest RMR;
- for (auto &KV : TargetAllocs)
- RMR.push_back({orcrpctpc::toWireProtectionFlags(
- static_cast<sys::Memory::ProtectionFlags>(KV.first)),
- KV.second.Address, KV.second.AllocatedSize});
- TargetAllocs.clear();
-
- return Parent.getEndpoint().template callB<orcrpctpc::ReleaseMem>(RMR);
- }
-
- private:
- OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent;
- HostAllocMap HostAllocs;
- TargetAllocMap TargetAllocs;
- };
-
- OrcRPCEPCJITLinkMemoryManager(OrcRPCEPCImplT &Parent) : Parent(Parent) {}
-
- Expected<std::unique_ptr<Allocation>>
- allocate(const jitlink::JITLinkDylib *JD,
- const SegmentsRequestMap &Request) override {
- orcrpctpc::ReserveMemRequest RMR;
- HostAllocMap HostAllocs;
-
- for (auto &KV : Request) {
- assert(KV.second.getContentSize() <= std::numeric_limits<size_t>::max() &&
- "Content size is out-of-range for host");
-
- RMR.push_back({orcrpctpc::toWireProtectionFlags(
- static_cast<sys::Memory::ProtectionFlags>(KV.first)),
- KV.second.getContentSize() + KV.second.getZeroFillSize(),
- KV.second.getAlignment()});
- HostAllocs[KV.first] = {
- std::make_unique<char[]>(KV.second.getContentSize()),
- KV.second.getContentSize()};
- }
-
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Orc remote memmgr got request:\n";
- for (auto &KV : Request)
- dbgs() << " permissions: "
- << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
- << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
- << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
- << ", content size: "
- << formatv("{0:x16}", KV.second.getContentSize())
- << " + zero-fill-size: "
- << formatv("{0:x16}", KV.second.getZeroFillSize())
- << ", align: " << KV.second.getAlignment() << "\n";
- });
-
- // FIXME: LLVM RPC needs to be fixed to support alt
- // serialization/deserialization on return types. For now just
- // translate from std::map to DenseMap manually.
- auto TmpTargetAllocs =
- Parent.getEndpoint().template callB<orcrpctpc::ReserveMem>(RMR);
- if (!TmpTargetAllocs)
- return TmpTargetAllocs.takeError();
-
- if (TmpTargetAllocs->size() != RMR.size())
- return make_error<StringError>(
- "Number of target allocations does not match request",
- inconvertibleErrorCode());
-
- TargetAllocMap TargetAllocs;
- for (auto &E : *TmpTargetAllocs)
- TargetAllocs[orcrpctpc::fromWireProtectionFlags(E.Prot)] = {
- E.Address, E.AllocatedSize};
-
- DEBUG_WITH_TYPE("orc", {
- auto HAI = HostAllocs.begin();
- for (auto &KV : TargetAllocs)
- dbgs() << " permissions: "
- << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
- << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
- << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
- << " assigned local " << (void *)HAI->second.Mem.get()
- << ", target " << formatv("{0:x16}", KV.second.Address) << "\n";
- });
-
- return std::make_unique<OrcRPCAllocation>(*this, std::move(HostAllocs),
- std::move(TargetAllocs));
- }
-
-private:
- void reportError(Error Err) { Parent.reportError(std::move(Err)); }
-
- decltype(std::declval<OrcRPCEPCImplT>().getEndpoint()) getEndpoint() {
- return Parent.getEndpoint();
- }
-
- OrcRPCEPCImplT &Parent;
-};
-
-/// ExecutorProcessControl::MemoryAccess implementation for a process connected
-/// via an ORC RPC endpoint.
-template <typename OrcRPCEPCImplT>
-class OrcRPCEPCMemoryAccess : public ExecutorProcessControl::MemoryAccess {
-public:
- OrcRPCEPCMemoryAccess(OrcRPCEPCImplT &Parent) : Parent(Parent) {}
-
- void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt8s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt16s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt32s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt64s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteBuffers>(Ws, std::move(OnWriteComplete));
- }
-
-private:
- template <typename WriteRPCFunction, typename WriteElementT>
- void writeViaRPC(ArrayRef<WriteElementT> Ws, WriteResultFn OnWriteComplete) {
- if (auto Err = Parent.getEndpoint().template callAsync<WriteRPCFunction>(
- [OWC = std::move(OnWriteComplete)](Error Err2) mutable -> Error {
- OWC(std::move(Err2));
- return Error::success();
- },
- Ws)) {
- Parent.reportError(std::move(Err));
- Parent.getEndpoint().abandonPendingResponses();
- }
- }
-
- OrcRPCEPCImplT &Parent;
-};
-
-// ExecutorProcessControl for a process connected via an ORC RPC Endpoint.
-template <typename RPCEndpointT>
-class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl {
-public:
- using ErrorReporter = unique_function<void(Error)>;
-
- using OnCloseConnectionFunction = unique_function<Error(Error)>;
-
- OrcRPCExecutorProcessControlBase(std::shared_ptr<SymbolStringPool> SSP,
- RPCEndpointT &EP, ErrorReporter ReportError)
- : ExecutorProcessControl(std::move(SSP)),
- ReportError(std::move(ReportError)), EP(EP) {
- using ThisT = OrcRPCExecutorProcessControlBase<RPCEndpointT>;
- EP.template addAsyncHandler<orcrpctpc::RunWrapper>(*this,
- &ThisT::runWrapperInJIT);
- }
-
- void reportError(Error Err) { ReportError(std::move(Err)); }
-
- RPCEndpointT &getEndpoint() { return EP; }
-
- Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override {
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Loading dylib \"" << (DylibPath ? DylibPath : "") << "\" ";
- if (!DylibPath)
- dbgs() << "(process symbols)";
- dbgs() << "\n";
- });
- if (!DylibPath)
- DylibPath = "";
- auto H = EP.template callB<orcrpctpc::LoadDylib>(DylibPath);
- DEBUG_WITH_TYPE("orc", {
- if (H)
- dbgs() << " got handle " << formatv("{0:x16}", *H) << "\n";
- else
- dbgs() << " error, unable to load\n";
- });
- return H;
- }
-
- Expected<std::vector<tpctypes::LookupResult>>
- lookupSymbols(ArrayRef<LookupRequest> Request) override {
- std::vector<orcrpctpc::RemoteLookupRequest> RR;
- for (auto &E : Request) {
- RR.push_back({});
- RR.back().first = E.Handle;
- for (auto &KV : E.Symbols)
- RR.back().second.push_back(
- {(*KV.first).str(),
- KV.second == SymbolLookupFlags::WeaklyReferencedSymbol});
- }
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Compound lookup:\n";
- for (auto &R : Request) {
- dbgs() << " In " << formatv("{0:x16}", R.Handle) << ": {";
- bool First = true;
- for (auto &KV : R.Symbols) {
- dbgs() << (First ? "" : ",") << " " << *KV.first;
- First = false;
- }
- dbgs() << " }\n";
- }
- });
- return EP.template callB<orcrpctpc::LookupSymbols>(RR);
- }
-
- Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
- ArrayRef<std::string> Args) override {
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Running as main: " << formatv("{0:x16}", MainFnAddr)
- << ", args = [";
- for (unsigned I = 0; I != Args.size(); ++I)
- dbgs() << (I ? "," : "") << " \"" << Args[I] << "\"";
- dbgs() << "]\n";
- });
- auto Result = EP.template callB<orcrpctpc::RunMain>(MainFnAddr, Args);
- DEBUG_WITH_TYPE("orc", {
- dbgs() << " call to " << formatv("{0:x16}", MainFnAddr);
- if (Result)
- dbgs() << " returned result " << *Result << "\n";
- else
- dbgs() << " failed\n";
- });
- return Result;
- }
-
- void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
- ArrayRef<char> ArgBuffer) override {
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Running as wrapper function "
- << formatv("{0:x16}", WrapperFnAddr) << " with "
- << formatv("{0:x16}", ArgBuffer.size()) << " argument buffer\n";
- });
- auto Result = EP.template callB<orcrpctpc::RunWrapper>(
- WrapperFnAddr,
- ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()),
- ArgBuffer.size()));
-
- if (!Result)
- OnComplete(shared::WrapperFunctionResult::createOutOfBandError(
- toString(Result.takeError())));
- OnComplete(std::move(*Result));
- }
-
- Error closeConnection(OnCloseConnectionFunction OnCloseConnection) {
- DEBUG_WITH_TYPE("orc", dbgs() << "Closing connection to remote\n");
- return EP.template callAsync<orcrpctpc::CloseConnection>(
- std::move(OnCloseConnection));
- }
-
- Error closeConnectionAndWait() {
- std::promise<MSVCPError> P;
- auto F = P.get_future();
- if (auto Err = closeConnection([&](Error Err2) -> Error {
- P.set_value(std::move(Err2));
- return Error::success();
- })) {
- EP.abandonAllPendingResponses();
- return joinErrors(std::move(Err), F.get());
- }
- return F.get();
- }
-
-protected:
- /// Subclasses must call this during construction to initialize the
- /// TargetTriple and PageSize members.
- Error initializeORCRPCEPCBase() {
- if (auto EPI = EP.template callB<orcrpctpc::GetExecutorProcessInfo>()) {
- this->TargetTriple = Triple(EPI->Triple);
- this->PageSize = PageSize;
- this->JDI = {ExecutorAddress(EPI->DispatchFuncAddr),
- ExecutorAddress(EPI->DispatchCtxAddr)};
- return Error::success();
- } else
- return EPI.takeError();
- }
-
-private:
- Error runWrapperInJIT(
- std::function<Error(Expected<shared::WrapperFunctionResult>)> SendResult,
- JITTargetAddress FunctionTag, std::vector<uint8_t> ArgBuffer) {
-
- getExecutionSession().runJITDispatchHandler(
- [this, SendResult = std::move(SendResult)](
- Expected<shared::WrapperFunctionResult> R) {
- if (auto Err = SendResult(std::move(R)))
- ReportError(std::move(Err));
- },
- FunctionTag,
- {reinterpret_cast<const char *>(ArgBuffer.data()), ArgBuffer.size()});
- return Error::success();
- }
-
- ErrorReporter ReportError;
- RPCEndpointT &EP;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
deleted file mode 100644
index 3d139740d677..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ /dev/null
@@ -1,925 +0,0 @@
-//===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the OrcRemoteTargetClient class and helpers. This class
-// can be used to communicate over an RawByteChannel with an
-// OrcRemoteTargetServer instance to support remote-JITing.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-#define DEBUG_TYPE "orc-remote"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-/// This class provides utilities (including memory manager, indirect stubs
-/// manager, and compile callback manager types) that support remote JITing
-/// in ORC.
-///
-/// Each of the utility classes talks to a JIT server (an instance of the
-/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out
-/// its actions.
-class OrcRemoteTargetClient
- : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
- /// Remote-mapped RuntimeDyld-compatible memory manager.
- class RemoteRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
- friend class OrcRemoteTargetClient;
-
- public:
- ~RemoteRTDyldMemoryManager() {
- Client.destroyRemoteAllocator(Id);
- LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
- }
-
- RemoteRTDyldMemoryManager(const RemoteRTDyldMemoryManager &) = delete;
- RemoteRTDyldMemoryManager &
- operator=(const RemoteRTDyldMemoryManager &) = delete;
- RemoteRTDyldMemoryManager(RemoteRTDyldMemoryManager &&) = default;
- RemoteRTDyldMemoryManager &operator=(RemoteRTDyldMemoryManager &&) = delete;
-
- uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID,
- StringRef SectionName) override {
- Unmapped.back().CodeAllocs.emplace_back(Size, Alignment);
- uint8_t *Alloc = reinterpret_cast<uint8_t *>(
- Unmapped.back().CodeAllocs.back().getLocalAddress());
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated code for "
- << SectionName << ": " << Alloc << " (" << Size
- << " bytes, alignment " << Alignment << ")\n");
- return Alloc;
- }
-
- uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, StringRef SectionName,
- bool IsReadOnly) override {
- if (IsReadOnly) {
- Unmapped.back().RODataAllocs.emplace_back(Size, Alignment);
- uint8_t *Alloc = reinterpret_cast<uint8_t *>(
- Unmapped.back().RODataAllocs.back().getLocalAddress());
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for "
- << SectionName << ": " << Alloc << " (" << Size
- << " bytes, alignment " << Alignment << ")\n");
- return Alloc;
- } // else...
-
- Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment);
- uint8_t *Alloc = reinterpret_cast<uint8_t *>(
- Unmapped.back().RWDataAllocs.back().getLocalAddress());
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for "
- << SectionName << ": " << Alloc << " (" << Size
- << " bytes, alignment " << Alignment << ")\n");
- return Alloc;
- }
-
- void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
- uintptr_t RODataSize, uint32_t RODataAlign,
- uintptr_t RWDataSize,
- uint32_t RWDataAlign) override {
- Unmapped.push_back(ObjectAllocs());
-
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " reserved:\n");
-
- if (CodeSize != 0) {
- Unmapped.back().RemoteCodeAddr =
- Client.reserveMem(Id, CodeSize, CodeAlign);
-
- LLVM_DEBUG(
- dbgs() << " code: "
- << format("0x%016" PRIx64, Unmapped.back().RemoteCodeAddr)
- << " (" << CodeSize << " bytes, alignment " << CodeAlign
- << ")\n");
- }
-
- if (RODataSize != 0) {
- Unmapped.back().RemoteRODataAddr =
- Client.reserveMem(Id, RODataSize, RODataAlign);
-
- LLVM_DEBUG(
- dbgs() << " ro-data: "
- << format("0x%016" PRIx64, Unmapped.back().RemoteRODataAddr)
- << " (" << RODataSize << " bytes, alignment " << RODataAlign
- << ")\n");
- }
-
- if (RWDataSize != 0) {
- Unmapped.back().RemoteRWDataAddr =
- Client.reserveMem(Id, RWDataSize, RWDataAlign);
-
- LLVM_DEBUG(
- dbgs() << " rw-data: "
- << format("0x%016" PRIx64, Unmapped.back().RemoteRWDataAddr)
- << " (" << RWDataSize << " bytes, alignment " << RWDataAlign
- << ")\n");
- }
- }
-
- bool needsToReserveAllocationSpace() override { return true; }
-
- void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
- size_t Size) override {
- UnfinalizedEHFrames.push_back({LoadAddr, Size});
- }
-
- void deregisterEHFrames() override {
- for (auto &Frame : RegisteredEHFrames) {
- // FIXME: Add error poll.
- Client.deregisterEHFrames(Frame.Addr, Frame.Size);
- }
- }
-
- void notifyObjectLoaded(RuntimeDyld &Dyld,
- const object::ObjectFile &Obj) override {
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n");
- for (auto &ObjAllocs : Unmapped) {
- mapAllocsToRemoteAddrs(Dyld, ObjAllocs.CodeAllocs,
- ObjAllocs.RemoteCodeAddr);
- mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RODataAllocs,
- ObjAllocs.RemoteRODataAddr);
- mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RWDataAllocs,
- ObjAllocs.RemoteRWDataAddr);
- Unfinalized.push_back(std::move(ObjAllocs));
- }
- Unmapped.clear();
- }
-
- bool finalizeMemory(std::string *ErrMsg = nullptr) override {
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n");
-
- for (auto &ObjAllocs : Unfinalized) {
- if (copyAndProtect(ObjAllocs.CodeAllocs, ObjAllocs.RemoteCodeAddr,
- sys::Memory::MF_READ | sys::Memory::MF_EXEC))
- return true;
-
- if (copyAndProtect(ObjAllocs.RODataAllocs, ObjAllocs.RemoteRODataAddr,
- sys::Memory::MF_READ))
- return true;
-
- if (copyAndProtect(ObjAllocs.RWDataAllocs, ObjAllocs.RemoteRWDataAddr,
- sys::Memory::MF_READ | sys::Memory::MF_WRITE))
- return true;
- }
- Unfinalized.clear();
-
- for (auto &EHFrame : UnfinalizedEHFrames) {
- if (auto Err = Client.registerEHFrames(EHFrame.Addr, EHFrame.Size)) {
- // FIXME: Replace this once finalizeMemory can return an Error.
- handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
- if (ErrMsg) {
- raw_string_ostream ErrOut(*ErrMsg);
- EIB.log(ErrOut);
- }
- });
- return false;
- }
- }
- RegisteredEHFrames = std::move(UnfinalizedEHFrames);
- UnfinalizedEHFrames = {};
-
- return false;
- }
-
- private:
- class Alloc {
- public:
- Alloc(uint64_t Size, unsigned Align)
- : Size(Size), Align(Align), Contents(new char[Size + Align - 1]) {}
-
- Alloc(const Alloc &) = delete;
- Alloc &operator=(const Alloc &) = delete;
- Alloc(Alloc &&) = default;
- Alloc &operator=(Alloc &&) = default;
-
- uint64_t getSize() const { return Size; }
-
- unsigned getAlign() const { return Align; }
-
- char *getLocalAddress() const {
- uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get());
- LocalAddr = alignTo(LocalAddr, Align);
- return reinterpret_cast<char *>(LocalAddr);
- }
-
- void setRemoteAddress(JITTargetAddress RemoteAddr) {
- this->RemoteAddr = RemoteAddr;
- }
-
- JITTargetAddress getRemoteAddress() const { return RemoteAddr; }
-
- private:
- uint64_t Size;
- unsigned Align;
- std::unique_ptr<char[]> Contents;
- JITTargetAddress RemoteAddr = 0;
- };
-
- struct ObjectAllocs {
- ObjectAllocs() = default;
- ObjectAllocs(const ObjectAllocs &) = delete;
- ObjectAllocs &operator=(const ObjectAllocs &) = delete;
- ObjectAllocs(ObjectAllocs &&) = default;
- ObjectAllocs &operator=(ObjectAllocs &&) = default;
-
- JITTargetAddress RemoteCodeAddr = 0;
- JITTargetAddress RemoteRODataAddr = 0;
- JITTargetAddress RemoteRWDataAddr = 0;
- std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
- };
-
- RemoteRTDyldMemoryManager(OrcRemoteTargetClient &Client,
- ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {
- LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
- }
-
- // Maps all allocations in Allocs to aligned blocks
- void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs,
- JITTargetAddress NextAddr) {
- for (auto &Alloc : Allocs) {
- NextAddr = alignTo(NextAddr, Alloc.getAlign());
- Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextAddr);
- LLVM_DEBUG(
- dbgs() << " " << static_cast<void *>(Alloc.getLocalAddress())
- << " -> " << format("0x%016" PRIx64, NextAddr) << "\n");
- Alloc.setRemoteAddress(NextAddr);
-
- // Only advance NextAddr if it was non-null to begin with,
- // otherwise leave it as null.
- if (NextAddr)
- NextAddr += Alloc.getSize();
- }
- }
-
- // Copies data for each alloc in the list, then set permissions on the
- // segment.
- bool copyAndProtect(const std::vector<Alloc> &Allocs,
- JITTargetAddress RemoteSegmentAddr,
- unsigned Permissions) {
- if (RemoteSegmentAddr) {
- assert(!Allocs.empty() && "No sections in allocated segment");
-
- for (auto &Alloc : Allocs) {
- LLVM_DEBUG(dbgs() << " copying section: "
- << static_cast<void *>(Alloc.getLocalAddress())
- << " -> "
- << format("0x%016" PRIx64, Alloc.getRemoteAddress())
- << " (" << Alloc.getSize() << " bytes)\n";);
-
- if (Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
- Alloc.getSize()))
- return true;
- }
-
- LLVM_DEBUG(dbgs() << " setting "
- << (Permissions & sys::Memory::MF_READ ? 'R' : '-')
- << (Permissions & sys::Memory::MF_WRITE ? 'W' : '-')
- << (Permissions & sys::Memory::MF_EXEC ? 'X' : '-')
- << " permissions on block: "
- << format("0x%016" PRIx64, RemoteSegmentAddr)
- << "\n");
- if (Client.setProtections(Id, RemoteSegmentAddr, Permissions))
- return true;
- }
- return false;
- }
-
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- std::vector<ObjectAllocs> Unmapped;
- std::vector<ObjectAllocs> Unfinalized;
-
- struct EHFrame {
- JITTargetAddress Addr;
- uint64_t Size;
- };
- std::vector<EHFrame> UnfinalizedEHFrames;
- std::vector<EHFrame> RegisteredEHFrames;
- };
-
- class RPCMMAlloc : public jitlink::JITLinkMemoryManager::Allocation {
- using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
- using FinalizeContinuation =
- jitlink::JITLinkMemoryManager::Allocation::FinalizeContinuation;
- using ProtectionFlags = sys::Memory::ProtectionFlags;
- using SegmentsRequestMap =
- DenseMap<unsigned, jitlink::JITLinkMemoryManager::SegmentRequest>;
-
- RPCMMAlloc(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {}
-
- public:
- static Expected<std::unique_ptr<RPCMMAlloc>>
- Create(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id,
- const SegmentsRequestMap &Request) {
- auto *MM = new RPCMMAlloc(Client, Id);
-
- if (Error Err = MM->allocateHostBlocks(Request))
- return std::move(Err);
-
- if (Error Err = MM->allocateTargetBlocks())
- return std::move(Err);
-
- return std::unique_ptr<RPCMMAlloc>(MM);
- }
-
- MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
- assert(HostSegBlocks.count(Seg) && "No allocation for segment");
- return {static_cast<char *>(HostSegBlocks[Seg].base()),
- HostSegBlocks[Seg].allocatedSize()};
- }
-
- JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
- assert(TargetSegBlocks.count(Seg) && "No allocation for segment");
- return pointerToJITTargetAddress(TargetSegBlocks[Seg].base());
- }
-
- void finalizeAsync(FinalizeContinuation OnFinalize) override {
- // Host allocations (working memory) remain ReadWrite.
- OnFinalize(copyAndProtect());
- }
-
- Error deallocate() override {
- // TODO: Cannot release target allocation. RPCAPI has no function
- // symmetric to reserveMem(). Add RPC call like freeMem()?
- return errorCodeToError(sys::Memory::releaseMappedMemory(HostAllocation));
- }
-
- private:
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- AllocationMap HostSegBlocks;
- AllocationMap TargetSegBlocks;
- JITTargetAddress TargetSegmentAddr;
- sys::MemoryBlock HostAllocation;
-
- Error allocateHostBlocks(const SegmentsRequestMap &Request) {
- unsigned TargetPageSize = Client.getPageSize();
-
- if (!isPowerOf2_64(static_cast<uint64_t>(TargetPageSize)))
- return make_error<StringError>("Host page size is not a power of 2",
- inconvertibleErrorCode());
-
- auto TotalSize = calcTotalAllocSize(Request, TargetPageSize);
- if (!TotalSize)
- return TotalSize.takeError();
-
- // Allocate one slab to cover all the segments.
- const sys::Memory::ProtectionFlags ReadWrite =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
- std::error_code EC;
- HostAllocation =
- sys::Memory::allocateMappedMemory(*TotalSize, nullptr, ReadWrite, EC);
- if (EC)
- return errorCodeToError(EC);
-
- char *SlabAddr = static_cast<char *>(HostAllocation.base());
-#ifndef NDEBUG
- char *SlabAddrEnd = SlabAddr + HostAllocation.allocatedSize();
-#endif
-
- // Allocate segment memory from the slab.
- for (auto &KV : Request) {
- const auto &Seg = KV.second;
-
- uint64_t SegmentSize = Seg.getContentSize() + Seg.getZeroFillSize();
- uint64_t AlignedSegmentSize = alignTo(SegmentSize, TargetPageSize);
-
- // Zero out zero-fill memory.
- char *ZeroFillBegin = SlabAddr + Seg.getContentSize();
- memset(ZeroFillBegin, 0, Seg.getZeroFillSize());
-
- // Record the block for this segment.
- HostSegBlocks[KV.first] =
- sys::MemoryBlock(SlabAddr, AlignedSegmentSize);
-
- SlabAddr += AlignedSegmentSize;
- assert(SlabAddr <= SlabAddrEnd && "Out of range");
- }
-
- return Error::success();
- }
-
- Error allocateTargetBlocks() {
- // Reserve memory for all blocks on the target. We need as much space on
- // the target as we allocated on the host.
- TargetSegmentAddr = Client.reserveMem(Id, HostAllocation.allocatedSize(),
- Client.getPageSize());
- if (!TargetSegmentAddr)
- return make_error<StringError>("Failed to reserve memory on the target",
- inconvertibleErrorCode());
-
- // Map memory blocks into the allocation, that match the host allocation.
- JITTargetAddress TargetAllocAddr = TargetSegmentAddr;
- for (const auto &KV : HostSegBlocks) {
- size_t TargetAllocSize = KV.second.allocatedSize();
-
- TargetSegBlocks[KV.first] =
- sys::MemoryBlock(jitTargetAddressToPointer<void *>(TargetAllocAddr),
- TargetAllocSize);
-
- TargetAllocAddr += TargetAllocSize;
- assert(TargetAllocAddr - TargetSegmentAddr <=
- HostAllocation.allocatedSize() &&
- "Out of range on target");
- }
-
- return Error::success();
- }
-
- Error copyAndProtect() {
- unsigned Permissions = 0u;
-
- // Copy segments one by one.
- for (auto &KV : TargetSegBlocks) {
- Permissions |= KV.first;
-
- const sys::MemoryBlock &TargetBlock = KV.second;
- const sys::MemoryBlock &HostBlock = HostSegBlocks.lookup(KV.first);
-
- size_t TargetAllocSize = TargetBlock.allocatedSize();
- auto TargetAllocAddr = pointerToJITTargetAddress(TargetBlock.base());
- auto *HostAllocBegin = static_cast<const char *>(HostBlock.base());
-
- bool CopyErr =
- Client.writeMem(TargetAllocAddr, HostAllocBegin, TargetAllocSize);
- if (CopyErr)
- return createStringError(inconvertibleErrorCode(),
- "Failed to copy %d segment to the target",
- KV.first);
- }
-
- // Set permission flags for all segments at once.
- bool ProtectErr =
- Client.setProtections(Id, TargetSegmentAddr, Permissions);
- if (ProtectErr)
- return createStringError(inconvertibleErrorCode(),
- "Failed to apply permissions for %d segment "
- "on the target",
- Permissions);
- return Error::success();
- }
-
- static Expected<size_t>
- calcTotalAllocSize(const SegmentsRequestMap &Request,
- unsigned TargetPageSize) {
- size_t TotalSize = 0;
- for (const auto &KV : Request) {
- const auto &Seg = KV.second;
-
- if (Seg.getAlignment() > TargetPageSize)
- return make_error<StringError>("Cannot request alignment higher than "
- "page alignment on target",
- inconvertibleErrorCode());
-
- TotalSize = alignTo(TotalSize, TargetPageSize);
- TotalSize += Seg.getContentSize();
- TotalSize += Seg.getZeroFillSize();
- }
-
- return TotalSize;
- }
- };
-
- class RemoteJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
- public:
- RemoteJITLinkMemoryManager(OrcRemoteTargetClient &Client,
- ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {}
-
- RemoteJITLinkMemoryManager(const RemoteJITLinkMemoryManager &) = delete;
- RemoteJITLinkMemoryManager(RemoteJITLinkMemoryManager &&) = default;
-
- RemoteJITLinkMemoryManager &
- operator=(const RemoteJITLinkMemoryManager &) = delete;
- RemoteJITLinkMemoryManager &
- operator=(RemoteJITLinkMemoryManager &&) = delete;
-
- ~RemoteJITLinkMemoryManager() {
- Client.destroyRemoteAllocator(Id);
- LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
- }
-
- Expected<std::unique_ptr<Allocation>>
- allocate(const jitlink::JITLinkDylib *JD,
- const SegmentsRequestMap &Request) override {
- return RPCMMAlloc::Create(Client, Id, Request);
- }
-
- private:
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- };
-
- /// Remote indirect stubs manager.
- class RemoteIndirectStubsManager : public IndirectStubsManager {
- public:
- RemoteIndirectStubsManager(OrcRemoteTargetClient &Client,
- ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {}
-
- ~RemoteIndirectStubsManager() override {
- Client.destroyIndirectStubsManager(Id);
- }
-
- Error createStub(StringRef StubName, JITTargetAddress StubAddr,
- JITSymbolFlags StubFlags) override {
- if (auto Err = reserveStubs(1))
- return Err;
-
- return createStubInternal(StubName, StubAddr, StubFlags);
- }
-
- Error createStubs(const StubInitsMap &StubInits) override {
- if (auto Err = reserveStubs(StubInits.size()))
- return Err;
-
- for (auto &Entry : StubInits)
- if (auto Err = createStubInternal(Entry.first(), Entry.second.first,
- Entry.second.second))
- return Err;
-
- return Error::success();
- }
-
- JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
- auto I = StubIndexes.find(Name);
- if (I == StubIndexes.end())
- return nullptr;
- auto Key = I->second.first;
- auto Flags = I->second.second;
- auto StubSymbol = JITEvaluatedSymbol(getStubAddr(Key), Flags);
- if (ExportedStubsOnly && !StubSymbol.getFlags().isExported())
- return nullptr;
- return StubSymbol;
- }
-
- JITEvaluatedSymbol findPointer(StringRef Name) override {
- auto I = StubIndexes.find(Name);
- if (I == StubIndexes.end())
- return nullptr;
- auto Key = I->second.first;
- auto Flags = I->second.second;
- return JITEvaluatedSymbol(getPtrAddr(Key), Flags);
- }
-
- Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
- auto I = StubIndexes.find(Name);
- assert(I != StubIndexes.end() && "No stub pointer for symbol");
- auto Key = I->second.first;
- return Client.writePointer(getPtrAddr(Key), NewAddr);
- }
-
- private:
- struct RemoteIndirectStubsInfo {
- JITTargetAddress StubBase;
- JITTargetAddress PtrBase;
- unsigned NumStubs;
- };
-
- using StubKey = std::pair<uint16_t, uint16_t>;
-
- Error reserveStubs(unsigned NumStubs) {
- if (NumStubs <= FreeStubs.size())
- return Error::success();
-
- unsigned NewStubsRequired = NumStubs - FreeStubs.size();
- JITTargetAddress StubBase;
- JITTargetAddress PtrBase;
- unsigned NumStubsEmitted;
-
- if (auto StubInfoOrErr = Client.emitIndirectStubs(Id, NewStubsRequired))
- std::tie(StubBase, PtrBase, NumStubsEmitted) = *StubInfoOrErr;
- else
- return StubInfoOrErr.takeError();
-
- unsigned NewBlockId = RemoteIndirectStubsInfos.size();
- RemoteIndirectStubsInfos.push_back({StubBase, PtrBase, NumStubsEmitted});
-
- for (unsigned I = 0; I < NumStubsEmitted; ++I)
- FreeStubs.push_back(std::make_pair(NewBlockId, I));
-
- return Error::success();
- }
-
- Error createStubInternal(StringRef StubName, JITTargetAddress InitAddr,
- JITSymbolFlags StubFlags) {
- auto Key = FreeStubs.back();
- FreeStubs.pop_back();
- StubIndexes[StubName] = std::make_pair(Key, StubFlags);
- return Client.writePointer(getPtrAddr(Key), InitAddr);
- }
-
- JITTargetAddress getStubAddr(StubKey K) {
- assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 &&
- "Missing stub address");
- return RemoteIndirectStubsInfos[K.first].StubBase +
- K.second * Client.getIndirectStubSize();
- }
-
- JITTargetAddress getPtrAddr(StubKey K) {
- assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 &&
- "Missing pointer address");
- return RemoteIndirectStubsInfos[K.first].PtrBase +
- K.second * Client.getPointerSize();
- }
-
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos;
- std::vector<StubKey> FreeStubs;
- StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
- };
-
- class RemoteTrampolinePool : public TrampolinePool {
- public:
- RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {}
-
- private:
- Error grow() override {
- JITTargetAddress BlockAddr = 0;
- uint32_t NumTrampolines = 0;
- if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock())
- std::tie(BlockAddr, NumTrampolines) = *TrampolineInfoOrErr;
- else
- return TrampolineInfoOrErr.takeError();
-
- uint32_t TrampolineSize = Client.getTrampolineSize();
- for (unsigned I = 0; I < NumTrampolines; ++I)
- AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
-
- return Error::success();
- }
-
- OrcRemoteTargetClient &Client;
- };
-
- /// Remote compile callback manager.
- class RemoteCompileCallbackManager : public JITCompileCallbackManager {
- public:
- RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
- ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress)
- : JITCompileCallbackManager(
- std::make_unique<RemoteTrampolinePool>(Client), ES,
- ErrorHandlerAddress) {}
- };
-
- /// Create an OrcRemoteTargetClient.
- /// Channel is the ChannelT instance to communicate on. It is assumed that
- /// the channel is ready to be read from and written to.
- static Expected<std::unique_ptr<OrcRemoteTargetClient>>
- Create(shared::RawByteChannel &Channel, ExecutionSession &ES) {
- Error Err = Error::success();
- auto Client = std::unique_ptr<OrcRemoteTargetClient>(
- new OrcRemoteTargetClient(Channel, ES, Err));
- if (Err)
- return std::move(Err);
- return std::move(Client);
- }
-
- /// Call the int(void) function at the given address in the target and return
- /// its result.
- Expected<int> callIntVoid(JITTargetAddress Addr) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(void) "
- << format("0x%016" PRIx64, Addr) << "\n");
- return callB<exec::CallIntVoid>(Addr);
- }
-
- /// Call the int(int) function at the given address in the target and return
- /// its result.
- Expected<int> callIntInt(JITTargetAddress Addr, int Arg) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(int) " << format("0x%016" PRIx64, Addr)
- << "\n");
- return callB<exec::CallIntInt>(Addr, Arg);
- }
-
- /// Call the int(int, char*[]) function at the given address in the target and
- /// return its result.
- Expected<int> callMain(JITTargetAddress Addr,
- const std::vector<std::string> &Args) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(int, char*[]) "
- << format("0x%016" PRIx64, Addr) << "\n");
- return callB<exec::CallMain>(Addr, Args);
- }
-
- /// Call the void() function at the given address in the target and wait for
- /// it to finish.
- Error callVoidVoid(JITTargetAddress Addr) {
- LLVM_DEBUG(dbgs() << "Calling void(*)(void) "
- << format("0x%016" PRIx64, Addr) << "\n");
- return callB<exec::CallVoidVoid>(Addr);
- }
-
- /// Create an RCMemoryManager which will allocate its memory on the remote
- /// target.
- Expected<std::unique_ptr<RemoteRTDyldMemoryManager>>
- createRemoteMemoryManager() {
- auto Id = AllocatorIds.getNext();
- if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
- return std::move(Err);
- return std::unique_ptr<RemoteRTDyldMemoryManager>(
- new RemoteRTDyldMemoryManager(*this, Id));
- }
-
- /// Create a JITLink-compatible memory manager which will allocate working
- /// memory on the host and target memory on the remote target.
- Expected<std::unique_ptr<RemoteJITLinkMemoryManager>>
- createRemoteJITLinkMemoryManager() {
- auto Id = AllocatorIds.getNext();
- if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
- return std::move(Err);
- LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
- return std::unique_ptr<RemoteJITLinkMemoryManager>(
- new RemoteJITLinkMemoryManager(*this, Id));
- }
-
- /// Create an RCIndirectStubsManager that will allocate stubs on the remote
- /// target.
- Expected<std::unique_ptr<RemoteIndirectStubsManager>>
- createIndirectStubsManager() {
- auto Id = IndirectStubOwnerIds.getNext();
- if (auto Err = callB<stubs::CreateIndirectStubsOwner>(Id))
- return std::move(Err);
- return std::make_unique<RemoteIndirectStubsManager>(*this, Id);
- }
-
- Expected<RemoteCompileCallbackManager &>
- enableCompileCallbacks(JITTargetAddress ErrorHandlerAddress) {
- assert(!CallbackManager && "CallbackManager already obtained");
-
- // Emit the resolver block on the JIT server.
- if (auto Err = callB<stubs::EmitResolverBlock>())
- return std::move(Err);
-
- // Create the callback manager.
- CallbackManager.emplace(*this, ES, ErrorHandlerAddress);
- RemoteCompileCallbackManager &Mgr = *CallbackManager;
- return Mgr;
- }
-
- /// Search for symbols in the remote process. Note: This should be used by
- /// symbol resolvers *after* they've searched the local symbol table in the
- /// JIT stack.
- Expected<JITTargetAddress> getSymbolAddress(StringRef Name) {
- return callB<utils::GetSymbolAddress>(Name);
- }
-
- /// Get the triple for the remote target.
- const std::string &getTargetTriple() const { return RemoteTargetTriple; }
-
- Error terminateSession() { return callB<utils::TerminateSession>(); }
-
-private:
- OrcRemoteTargetClient(shared::RawByteChannel &Channel, ExecutionSession &ES,
- Error &Err)
- : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
- true),
- ES(ES) {
- ErrorAsOutParameter EAO(&Err);
-
- addHandler<utils::RequestCompile>(
- [this](JITTargetAddress Addr) -> JITTargetAddress {
- if (CallbackManager)
- return CallbackManager->executeCompileCallback(Addr);
- return 0;
- });
-
- if (auto RIOrErr = callB<utils::GetRemoteInfo>()) {
- std::tie(RemoteTargetTriple, RemotePointerSize, RemotePageSize,
- RemoteTrampolineSize, RemoteIndirectStubSize) = *RIOrErr;
- Err = Error::success();
- } else
- Err = RIOrErr.takeError();
- }
-
- void deregisterEHFrames(JITTargetAddress Addr, uint32_t Size) {
- if (auto Err = callB<eh::RegisterEHFrames>(Addr, Size))
- ES.reportError(std::move(Err));
- }
-
- void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
- if (auto Err = callB<mem::DestroyRemoteAllocator>(Id)) {
- // FIXME: This will be triggered by a removeModuleSet call: Propagate
- // error return up through that.
- llvm_unreachable("Failed to destroy remote allocator.");
- AllocatorIds.release(Id);
- }
- }
-
- void destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) {
- IndirectStubOwnerIds.release(Id);
- if (auto Err = callB<stubs::DestroyIndirectStubsOwner>(Id))
- ES.reportError(std::move(Err));
- }
-
- Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
- emitIndirectStubs(ResourceIdMgr::ResourceId Id, uint32_t NumStubsRequired) {
- return callB<stubs::EmitIndirectStubs>(Id, NumStubsRequired);
- }
-
- Expected<std::tuple<JITTargetAddress, uint32_t>> emitTrampolineBlock() {
- return callB<stubs::EmitTrampolineBlock>();
- }
-
- uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; }
- uint32_t getPageSize() const { return RemotePageSize; }
- uint32_t getPointerSize() const { return RemotePointerSize; }
-
- uint32_t getTrampolineSize() const { return RemoteTrampolineSize; }
-
- Expected<std::vector<uint8_t>> readMem(char *Dst, JITTargetAddress Src,
- uint64_t Size) {
- return callB<mem::ReadMem>(Src, Size);
- }
-
- Error registerEHFrames(JITTargetAddress &RAddr, uint32_t Size) {
- // FIXME: Duplicate error and report it via ReportError too?
- return callB<eh::RegisterEHFrames>(RAddr, Size);
- }
-
- JITTargetAddress reserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size,
- uint32_t Align) {
- if (auto AddrOrErr = callB<mem::ReserveMem>(Id, Size, Align))
- return *AddrOrErr;
- else {
- ES.reportError(AddrOrErr.takeError());
- return 0;
- }
- }
-
- bool setProtections(ResourceIdMgr::ResourceId Id,
- JITTargetAddress RemoteSegAddr, unsigned ProtFlags) {
- if (auto Err = callB<mem::SetProtections>(Id, RemoteSegAddr, ProtFlags)) {
- ES.reportError(std::move(Err));
- return true;
- } else
- return false;
- }
-
- bool writeMem(JITTargetAddress Addr, const char *Src, uint64_t Size) {
- if (auto Err = callB<mem::WriteMem>(DirectBufferWriter(Src, Addr, Size))) {
- ES.reportError(std::move(Err));
- return true;
- } else
- return false;
- }
-
- Error writePointer(JITTargetAddress Addr, JITTargetAddress PtrVal) {
- return callB<mem::WritePtr>(Addr, PtrVal);
- }
-
- static Error doNothing() { return Error::success(); }
-
- ExecutionSession &ES;
- std::function<void(Error)> ReportError;
- std::string RemoteTargetTriple;
- uint32_t RemotePointerSize = 0;
- uint32_t RemotePageSize = 0;
- uint32_t RemoteTrampolineSize = 0;
- uint32_t RemoteIndirectStubSize = 0;
- ResourceIdMgr AllocatorIds, IndirectStubOwnerIds;
- Optional<RemoteCompileCallbackManager> CallbackManager;
-};
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#undef DEBUG_TYPE
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
deleted file mode 100644
index 367bfb369191..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ /dev/null
@@ -1,386 +0,0 @@
-//===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Orc remote-target RPC API. It should not be used
-// directly, but is used by the RemoteTargetClient and RemoteTargetServer
-// classes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-
-namespace llvm {
-namespace orc {
-
-namespace remote {
-
-/// Template error for missing resources.
-template <typename ResourceIdT>
-class ResourceNotFound
- : public ErrorInfo<ResourceNotFound<ResourceIdT>> {
-public:
- static char ID;
-
- ResourceNotFound(ResourceIdT ResourceId,
- std::string ResourceDescription = "")
- : ResourceId(std::move(ResourceId)),
- ResourceDescription(std::move(ResourceDescription)) {}
-
- std::error_code convertToErrorCode() const override {
- return orcError(OrcErrorCode::UnknownResourceHandle);
- }
-
- void log(raw_ostream &OS) const override {
- OS << (ResourceDescription.empty()
- ? "Remote resource with id "
- : ResourceDescription)
- << " " << ResourceId << " not found";
- }
-
-private:
- ResourceIdT ResourceId;
- std::string ResourceDescription;
-};
-
-template <typename ResourceIdT>
-char ResourceNotFound<ResourceIdT>::ID = 0;
-
-class DirectBufferWriter {
-public:
- DirectBufferWriter() = default;
- DirectBufferWriter(const char *Src, JITTargetAddress Dst, uint64_t Size)
- : Src(Src), Dst(Dst), Size(Size) {}
-
- const char *getSrc() const { return Src; }
- JITTargetAddress getDst() const { return Dst; }
- uint64_t getSize() const { return Size; }
-
-private:
- const char *Src;
- JITTargetAddress Dst;
- uint64_t Size;
-};
-
-} // end namespace remote
-
-namespace shared {
-
-template <> class SerializationTypeName<JITSymbolFlags> {
-public:
- static const char *getName() { return "JITSymbolFlags"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, JITSymbolFlags> {
-public:
-
- static Error serialize(ChannelT &C, const JITSymbolFlags &Flags) {
- return serializeSeq(C, Flags.getRawFlagsValue(), Flags.getTargetFlags());
- }
-
- static Error deserialize(ChannelT &C, JITSymbolFlags &Flags) {
- JITSymbolFlags::UnderlyingType JITFlags;
- JITSymbolFlags::TargetFlagsType TargetFlags;
- if (auto Err = deserializeSeq(C, JITFlags, TargetFlags))
- return Err;
- Flags = JITSymbolFlags(static_cast<JITSymbolFlags::FlagNames>(JITFlags),
- TargetFlags);
- return Error::success();
- }
-};
-
-template <> class SerializationTypeName<remote::DirectBufferWriter> {
-public:
- static const char *getName() { return "DirectBufferWriter"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, remote::DirectBufferWriter, remote::DirectBufferWriter,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, const remote::DirectBufferWriter &DBW) {
- if (auto EC = serializeSeq(C, DBW.getDst()))
- return EC;
- if (auto EC = serializeSeq(C, DBW.getSize()))
- return EC;
- return C.appendBytes(DBW.getSrc(), DBW.getSize());
- }
-
- static Error deserialize(ChannelT &C, remote::DirectBufferWriter &DBW) {
- JITTargetAddress Dst;
- if (auto EC = deserializeSeq(C, Dst))
- return EC;
- uint64_t Size;
- if (auto EC = deserializeSeq(C, Size))
- return EC;
- char *Addr = reinterpret_cast<char *>(static_cast<uintptr_t>(Dst));
-
- DBW = remote::DirectBufferWriter(nullptr, Dst, Size);
-
- return C.readBytes(Addr, Size);
- }
-};
-
-} // end namespace shared
-
-namespace remote {
-
-class ResourceIdMgr {
-public:
- using ResourceId = uint64_t;
- static const ResourceId InvalidId = ~0U;
-
- ResourceIdMgr() = default;
- explicit ResourceIdMgr(ResourceId FirstValidId)
- : NextId(std::move(FirstValidId)) {}
-
- ResourceId getNext() {
- if (!FreeIds.empty()) {
- ResourceId I = FreeIds.back();
- FreeIds.pop_back();
- return I;
- }
- assert(NextId + 1 != ~0ULL && "All ids allocated");
- return NextId++;
- }
-
- void release(ResourceId I) { FreeIds.push_back(I); }
-
-private:
- ResourceId NextId = 1;
- std::vector<ResourceId> FreeIds;
-};
-
-/// Registers EH frames on the remote.
-namespace eh {
-
- /// Registers EH frames on the remote.
-class RegisterEHFrames
- : public shared::RPCFunction<RegisterEHFrames,
- void(JITTargetAddress Addr, uint32_t Size)> {
-public:
- static const char *getName() { return "RegisterEHFrames"; }
-};
-
- /// Deregisters EH frames on the remote.
-class DeregisterEHFrames
- : public shared::RPCFunction<DeregisterEHFrames,
- void(JITTargetAddress Addr, uint32_t Size)> {
-public:
- static const char *getName() { return "DeregisterEHFrames"; }
-};
-
-} // end namespace eh
-
-/// RPC functions for executing remote code.
-namespace exec {
-
- /// Call an 'int32_t()'-type function on the remote, returns the called
- /// function's return value.
-class CallIntVoid
- : public shared::RPCFunction<CallIntVoid, int32_t(JITTargetAddress Addr)> {
-public:
- static const char *getName() { return "CallIntVoid"; }
-};
-
- /// Call an 'int32_t(int32_t)'-type function on the remote, returns the called
- /// function's return value.
-class CallIntInt
- : public shared::RPCFunction<CallIntInt,
- int32_t(JITTargetAddress Addr, int)> {
-public:
- static const char *getName() { return "CallIntInt"; }
-};
-
- /// Call an 'int32_t(int32_t, char**)'-type function on the remote, returns the
- /// called function's return value.
-class CallMain
- : public shared::RPCFunction<CallMain,
- int32_t(JITTargetAddress Addr,
- std::vector<std::string> Args)> {
-public:
- static const char *getName() { return "CallMain"; }
-};
-
- /// Calls a 'void()'-type function on the remote, returns when the called
- /// function completes.
-class CallVoidVoid
- : public shared::RPCFunction<CallVoidVoid, void(JITTargetAddress FnAddr)> {
-public:
- static const char *getName() { return "CallVoidVoid"; }
-};
-
-} // end namespace exec
-
-/// RPC functions for remote memory management / inspection / modification.
-namespace mem {
-
- /// Creates a memory allocator on the remote.
-class CreateRemoteAllocator
- : public shared::RPCFunction<CreateRemoteAllocator,
- void(ResourceIdMgr::ResourceId AllocatorID)> {
-public:
- static const char *getName() { return "CreateRemoteAllocator"; }
-};
-
- /// Destroys a remote allocator, freeing any memory allocated by it.
-class DestroyRemoteAllocator
- : public shared::RPCFunction<DestroyRemoteAllocator,
- void(ResourceIdMgr::ResourceId AllocatorID)> {
-public:
- static const char *getName() { return "DestroyRemoteAllocator"; }
-};
-
- /// Read a remote memory block.
-class ReadMem
- : public shared::RPCFunction<
- ReadMem, std::vector<uint8_t>(JITTargetAddress Src, uint64_t Size)> {
-public:
- static const char *getName() { return "ReadMem"; }
-};
-
- /// Reserve a block of memory on the remote via the given allocator.
-class ReserveMem
- : public shared::RPCFunction<
- ReserveMem, JITTargetAddress(ResourceIdMgr::ResourceId AllocID,
- uint64_t Size, uint32_t Align)> {
-public:
- static const char *getName() { return "ReserveMem"; }
-};
-
- /// Set the memory protection on a memory block.
-class SetProtections
- : public shared::RPCFunction<
- SetProtections, void(ResourceIdMgr::ResourceId AllocID,
- JITTargetAddress Dst, uint32_t ProtFlags)> {
-public:
- static const char *getName() { return "SetProtections"; }
-};
-
- /// Write to a remote memory block.
-class WriteMem
- : public shared::RPCFunction<WriteMem,
- void(remote::DirectBufferWriter DB)> {
-public:
- static const char *getName() { return "WriteMem"; }
-};
-
- /// Write to a remote pointer.
-class WritePtr
- : public shared::RPCFunction<WritePtr, void(JITTargetAddress Dst,
- JITTargetAddress Val)> {
-public:
- static const char *getName() { return "WritePtr"; }
-};
-
-} // end namespace mem
-
-/// RPC functions for remote stub and trampoline management.
-namespace stubs {
-
- /// Creates an indirect stub owner on the remote.
-class CreateIndirectStubsOwner
- : public shared::RPCFunction<CreateIndirectStubsOwner,
- void(ResourceIdMgr::ResourceId StubOwnerID)> {
-public:
- static const char *getName() { return "CreateIndirectStubsOwner"; }
-};
-
- /// RPC function for destroying an indirect stubs owner.
-class DestroyIndirectStubsOwner
- : public shared::RPCFunction<DestroyIndirectStubsOwner,
- void(ResourceIdMgr::ResourceId StubsOwnerID)> {
-public:
- static const char *getName() { return "DestroyIndirectStubsOwner"; }
-};
-
- /// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted).
-class EmitIndirectStubs
- : public shared::RPCFunction<
- EmitIndirectStubs,
- std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>(
- ResourceIdMgr::ResourceId StubsOwnerID,
- uint32_t NumStubsRequired)> {
-public:
- static const char *getName() { return "EmitIndirectStubs"; }
-};
-
- /// RPC function to emit the resolver block and return its address.
-class EmitResolverBlock
- : public shared::RPCFunction<EmitResolverBlock, void()> {
-public:
- static const char *getName() { return "EmitResolverBlock"; }
-};
-
- /// EmitTrampolineBlock result is (BlockAddr, NumTrampolines).
-class EmitTrampolineBlock
- : public shared::RPCFunction<EmitTrampolineBlock,
- std::tuple<JITTargetAddress, uint32_t>()> {
-public:
- static const char *getName() { return "EmitTrampolineBlock"; }
-};
-
-} // end namespace stubs
-
-/// Miscelaneous RPC functions for dealing with remotes.
-namespace utils {
-
- /// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize,
- /// IndirectStubsSize).
-class GetRemoteInfo
- : public shared::RPCFunction<
- GetRemoteInfo,
- std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> {
-public:
- static const char *getName() { return "GetRemoteInfo"; }
-};
-
- /// Get the address of a remote symbol.
-class GetSymbolAddress
- : public shared::RPCFunction<GetSymbolAddress,
- JITTargetAddress(std::string SymbolName)> {
-public:
- static const char *getName() { return "GetSymbolAddress"; }
-};
-
- /// Request that the host execute a compile callback.
-class RequestCompile
- : public shared::RPCFunction<
- RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> {
-public:
- static const char *getName() { return "RequestCompile"; }
-};
-
- /// Notify the remote and terminate the session.
-class TerminateSession : public shared::RPCFunction<TerminateSession, void()> {
-public:
- static const char *getName() { return "TerminateSession"; }
-};
-
-} // namespace utils
-
-class OrcRemoteTargetRPCAPI
- : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
- // FIXME: Remove constructors once MSVC supports synthesizing move-ops.
- OrcRemoteTargetRPCAPI(shared::RawByteChannel &C)
- : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(C, true) {}
-};
-
-} // end namespace remote
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
deleted file mode 100644
index ce9bf064303d..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ /dev/null
@@ -1,464 +0,0 @@
-//===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the OrcRemoteTargetServer class. It can be used to build a
-// JIT server that can execute code sent from an OrcRemoteTargetClient.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <string>
-#include <system_error>
-#include <tuple>
-#include <type_traits>
-#include <vector>
-
-#define DEBUG_TYPE "orc-remote"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-template <typename ChannelT, typename TargetT>
-class OrcRemoteTargetServer
- : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
- using SymbolLookupFtor =
- std::function<JITTargetAddress(const std::string &Name)>;
-
- using EHFrameRegistrationFtor =
- std::function<void(uint8_t *Addr, uint32_t Size)>;
-
- OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup,
- EHFrameRegistrationFtor EHFramesRegister,
- EHFrameRegistrationFtor EHFramesDeregister)
- : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
- true),
- SymbolLookup(std::move(SymbolLookup)),
- EHFramesRegister(std::move(EHFramesRegister)),
- EHFramesDeregister(std::move(EHFramesDeregister)) {
- using ThisT = std::remove_reference_t<decltype(*this)>;
- addHandler<exec::CallIntVoid>(*this, &ThisT::handleCallIntVoid);
- addHandler<exec::CallIntInt>(*this, &ThisT::handleCallIntInt);
- addHandler<exec::CallMain>(*this, &ThisT::handleCallMain);
- addHandler<exec::CallVoidVoid>(*this, &ThisT::handleCallVoidVoid);
- addHandler<mem::CreateRemoteAllocator>(*this,
- &ThisT::handleCreateRemoteAllocator);
- addHandler<mem::DestroyRemoteAllocator>(
- *this, &ThisT::handleDestroyRemoteAllocator);
- addHandler<mem::ReadMem>(*this, &ThisT::handleReadMem);
- addHandler<mem::ReserveMem>(*this, &ThisT::handleReserveMem);
- addHandler<mem::SetProtections>(*this, &ThisT::handleSetProtections);
- addHandler<mem::WriteMem>(*this, &ThisT::handleWriteMem);
- addHandler<mem::WritePtr>(*this, &ThisT::handleWritePtr);
- addHandler<eh::RegisterEHFrames>(*this, &ThisT::handleRegisterEHFrames);
- addHandler<eh::DeregisterEHFrames>(*this, &ThisT::handleDeregisterEHFrames);
- addHandler<stubs::CreateIndirectStubsOwner>(
- *this, &ThisT::handleCreateIndirectStubsOwner);
- addHandler<stubs::DestroyIndirectStubsOwner>(
- *this, &ThisT::handleDestroyIndirectStubsOwner);
- addHandler<stubs::EmitIndirectStubs>(*this,
- &ThisT::handleEmitIndirectStubs);
- addHandler<stubs::EmitResolverBlock>(*this,
- &ThisT::handleEmitResolverBlock);
- addHandler<stubs::EmitTrampolineBlock>(*this,
- &ThisT::handleEmitTrampolineBlock);
- addHandler<utils::GetSymbolAddress>(*this, &ThisT::handleGetSymbolAddress);
- addHandler<utils::GetRemoteInfo>(*this, &ThisT::handleGetRemoteInfo);
- addHandler<utils::TerminateSession>(*this, &ThisT::handleTerminateSession);
- }
-
- // FIXME: Remove move/copy ops once MSVC supports synthesizing move ops.
- OrcRemoteTargetServer(const OrcRemoteTargetServer &) = delete;
- OrcRemoteTargetServer &operator=(const OrcRemoteTargetServer &) = delete;
-
- OrcRemoteTargetServer(OrcRemoteTargetServer &&Other) = default;
- OrcRemoteTargetServer &operator=(OrcRemoteTargetServer &&) = delete;
-
- Expected<JITTargetAddress> requestCompile(JITTargetAddress TrampolineAddr) {
- return callB<utils::RequestCompile>(TrampolineAddr);
- }
-
- bool receivedTerminate() const { return TerminateFlag; }
-
-private:
- struct Allocator {
- Allocator() = default;
- Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {}
-
- Allocator &operator=(Allocator &&Other) {
- Allocs = std::move(Other.Allocs);
- return *this;
- }
-
- ~Allocator() {
- for (auto &Alloc : Allocs)
- sys::Memory::releaseMappedMemory(Alloc.second);
- }
-
- Error allocate(void *&Addr, size_t Size, uint32_t Align) {
- std::error_code EC;
- sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(
- Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
- if (EC)
- return errorCodeToError(EC);
-
- Addr = MB.base();
- assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc");
- Allocs[MB.base()] = std::move(MB);
- return Error::success();
- }
-
- Error setProtections(void *block, unsigned Flags) {
- auto I = Allocs.find(block);
- if (I == Allocs.end())
- return errorCodeToError(orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized));
- return errorCodeToError(
- sys::Memory::protectMappedMemory(I->second, Flags));
- }
-
- private:
- std::map<void *, sys::MemoryBlock> Allocs;
- };
-
- static Error doNothing() { return Error::success(); }
-
- static JITTargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
- auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr);
- auto AddrOrErr = T->requestCompile(static_cast<JITTargetAddress>(
- reinterpret_cast<uintptr_t>(TrampolineAddr)));
- // FIXME: Allow customizable failure substitution functions.
- assert(AddrOrErr && "Compile request failed");
- return *AddrOrErr;
- }
-
- Expected<int32_t> handleCallIntVoid(JITTargetAddress Addr) {
- using IntVoidFnTy = int (*)();
-
- IntVoidFnTy Fn =
- reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr));
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n");
- int Result = Fn();
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
-
- return Result;
- }
-
- Expected<int32_t> handleCallIntInt(JITTargetAddress Addr, int Arg) {
- using IntIntFnTy = int (*)(int);
-
- IntIntFnTy Fn = reinterpret_cast<IntIntFnTy>(static_cast<uintptr_t>(Addr));
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr)
- << " with argument " << Arg << "\n");
- int Result = Fn(Arg);
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
-
- return Result;
- }
-
- Expected<int32_t> handleCallMain(JITTargetAddress Addr,
- std::vector<std::string> Args) {
- using MainFnTy = int (*)(int, const char *[]);
-
- MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr));
- int ArgC = Args.size() + 1;
- int Idx = 1;
- std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]);
- ArgV[0] = "<jit process>";
- for (auto &Arg : Args)
- ArgV[Idx++] = Arg.c_str();
- ArgV[ArgC] = 0;
- LLVM_DEBUG(for (int Idx = 0; Idx < ArgC; ++Idx) {
- llvm::dbgs() << "Arg " << Idx << ": " << ArgV[Idx] << "\n";
- });
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n");
- int Result = Fn(ArgC, ArgV.get());
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
-
- return Result;
- }
-
- Error handleCallVoidVoid(JITTargetAddress Addr) {
- using VoidVoidFnTy = void (*)();
-
- VoidVoidFnTy Fn =
- reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr));
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n");
- Fn();
- LLVM_DEBUG(dbgs() << " Complete.\n");
-
- return Error::success();
- }
-
- Error handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) {
- auto I = Allocators.find(Id);
- if (I != Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse));
- LLVM_DEBUG(dbgs() << " Created allocator " << Id << "\n");
- Allocators[Id] = Allocator();
- return Error::success();
- }
-
- Error handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
- auto I = IndirectStubsOwners.find(Id);
- if (I != IndirectStubsOwners.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse));
- LLVM_DEBUG(dbgs() << " Create indirect stubs owner " << Id << "\n");
- IndirectStubsOwners[Id] = ISBlockOwnerList();
- return Error::success();
- }
-
- Error handleDeregisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
- uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
- LLVM_DEBUG(dbgs() << " Registering EH frames at "
- << format("0x%016x", TAddr) << ", Size = " << Size
- << " bytes\n");
- EHFramesDeregister(Addr, Size);
- return Error::success();
- }
-
- Error handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
- auto I = Allocators.find(Id);
- if (I == Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
- Allocators.erase(I);
- LLVM_DEBUG(dbgs() << " Destroyed allocator " << Id << "\n");
- return Error::success();
- }
-
- Error handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
- auto I = IndirectStubsOwners.find(Id);
- if (I == IndirectStubsOwners.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
- IndirectStubsOwners.erase(I);
- return Error::success();
- }
-
- Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
- handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id,
- uint32_t NumStubsRequired) {
- LLVM_DEBUG(dbgs() << " ISMgr " << Id << " request " << NumStubsRequired
- << " stubs.\n");
-
- auto StubOwnerItr = IndirectStubsOwners.find(Id);
- if (StubOwnerItr == IndirectStubsOwners.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
-
- auto IS = LocalIndirectStubsInfo<TargetT>::create(
- NumStubsRequired, sys::Process::getPageSizeEstimate());
- if (!IS)
- return IS.takeError();
-
- JITTargetAddress StubsBase = pointerToJITTargetAddress(IS->getStub(0));
- JITTargetAddress PtrsBase = pointerToJITTargetAddress(IS->getPtr(0));
- uint32_t NumStubsEmitted = IS->getNumStubs();
-
- auto &BlockList = StubOwnerItr->second;
- BlockList.push_back(std::move(*IS));
-
- return std::make_tuple(StubsBase, PtrsBase, NumStubsEmitted);
- }
-
- Error handleEmitResolverBlock() {
- std::error_code EC;
- ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- TargetT::ResolverCodeSize, nullptr,
- sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
- if (EC)
- return errorCodeToError(EC);
-
- TargetT::writeResolverCode(static_cast<char *>(ResolverBlock.base()),
- pointerToJITTargetAddress(ResolverBlock.base()),
- pointerToJITTargetAddress(&reenter),
- pointerToJITTargetAddress(this));
-
- return errorCodeToError(sys::Memory::protectMappedMemory(
- ResolverBlock.getMemoryBlock(),
- sys::Memory::MF_READ | sys::Memory::MF_EXEC));
- }
-
- Expected<std::tuple<JITTargetAddress, uint32_t>> handleEmitTrampolineBlock() {
- std::error_code EC;
- auto TrampolineBlock =
- sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- sys::Process::getPageSizeEstimate(), nullptr,
- sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
- if (EC)
- return errorCodeToError(EC);
-
- uint32_t NumTrampolines =
- (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) /
- TargetT::TrampolineSize;
-
- char *TrampolineMem = static_cast<char *>(TrampolineBlock.base());
- TargetT::writeTrampolines(
- TrampolineMem, pointerToJITTargetAddress(TrampolineMem),
- pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
-
- EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
- sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
-
- TrampolineBlocks.push_back(std::move(TrampolineBlock));
-
- return std::make_tuple(pointerToJITTargetAddress(TrampolineMem),
- NumTrampolines);
- }
-
- Expected<JITTargetAddress> handleGetSymbolAddress(const std::string &Name) {
- JITTargetAddress Addr = SymbolLookup(Name);
- LLVM_DEBUG(dbgs() << " Symbol '" << Name
- << "' = " << format("0x%016x", Addr) << "\n");
- return Addr;
- }
-
- Expected<std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>>
- handleGetRemoteInfo() {
- std::string ProcessTriple = sys::getProcessTriple();
- uint32_t PointerSize = TargetT::PointerSize;
- uint32_t PageSize = sys::Process::getPageSizeEstimate();
- uint32_t TrampolineSize = TargetT::TrampolineSize;
- uint32_t IndirectStubSize = TargetT::StubSize;
- LLVM_DEBUG(dbgs() << " Remote info:\n"
- << " triple = '" << ProcessTriple << "'\n"
- << " pointer size = " << PointerSize << "\n"
- << " page size = " << PageSize << "\n"
- << " trampoline size = " << TrampolineSize << "\n"
- << " indirect stub size = " << IndirectStubSize
- << "\n");
- return std::make_tuple(ProcessTriple, PointerSize, PageSize, TrampolineSize,
- IndirectStubSize);
- }
-
- Expected<std::vector<uint8_t>> handleReadMem(JITTargetAddress RSrc,
- uint64_t Size) {
- uint8_t *Src = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(RSrc));
-
- LLVM_DEBUG(dbgs() << " Reading " << Size << " bytes from "
- << format("0x%016x", RSrc) << "\n");
-
- std::vector<uint8_t> Buffer;
- Buffer.resize(Size);
- for (uint8_t *P = Src; Size != 0; --Size)
- Buffer.push_back(*P++);
-
- return Buffer;
- }
-
- Error handleRegisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
- uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
- LLVM_DEBUG(dbgs() << " Registering EH frames at "
- << format("0x%016x", TAddr) << ", Size = " << Size
- << " bytes\n");
- EHFramesRegister(Addr, Size);
- return Error::success();
- }
-
- Expected<JITTargetAddress> handleReserveMem(ResourceIdMgr::ResourceId Id,
- uint64_t Size, uint32_t Align) {
- auto I = Allocators.find(Id);
- if (I == Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
- auto &Allocator = I->second;
- void *LocalAllocAddr = nullptr;
- if (auto Err = Allocator.allocate(LocalAllocAddr, Size, Align))
- return std::move(Err);
-
- LLVM_DEBUG(dbgs() << " Allocator " << Id << " reserved " << LocalAllocAddr
- << " (" << Size << " bytes, alignment " << Align
- << ")\n");
-
- JITTargetAddress AllocAddr = static_cast<JITTargetAddress>(
- reinterpret_cast<uintptr_t>(LocalAllocAddr));
-
- return AllocAddr;
- }
-
- Error handleSetProtections(ResourceIdMgr::ResourceId Id,
- JITTargetAddress Addr, uint32_t Flags) {
- auto I = Allocators.find(Id);
- if (I == Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
- auto &Allocator = I->second;
- void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
- LLVM_DEBUG(dbgs() << " Allocator " << Id << " set permissions on "
- << LocalAddr << " to "
- << (Flags & sys::Memory::MF_READ ? 'R' : '-')
- << (Flags & sys::Memory::MF_WRITE ? 'W' : '-')
- << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n");
- return Allocator.setProtections(LocalAddr, Flags);
- }
-
- Error handleTerminateSession() {
- TerminateFlag = true;
- return Error::success();
- }
-
- Error handleWriteMem(DirectBufferWriter DBW) {
- LLVM_DEBUG(dbgs() << " Writing " << DBW.getSize() << " bytes to "
- << format("0x%016x", DBW.getDst()) << "\n");
- return Error::success();
- }
-
- Error handleWritePtr(JITTargetAddress Addr, JITTargetAddress PtrVal) {
- LLVM_DEBUG(dbgs() << " Writing pointer *" << format("0x%016x", Addr)
- << " = " << format("0x%016x", PtrVal) << "\n");
- uintptr_t *Ptr =
- reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr));
- *Ptr = static_cast<uintptr_t>(PtrVal);
- return Error::success();
- }
-
- SymbolLookupFtor SymbolLookup;
- EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister;
- std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
- using ISBlockOwnerList = std::vector<LocalIndirectStubsInfo<TargetT>>;
- std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
- sys::OwningMemoryBlock ResolverBlock;
- std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
- bool TerminateFlag = false;
-};
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#undef DEBUG_TYPE
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index 78a6623d7594..3c0b2b9edd52 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -34,25 +34,26 @@ private:
};
/// Represents an address in the executor process.
-class ExecutorAddress {
+class ExecutorAddr {
public:
- ExecutorAddress() = default;
- explicit ExecutorAddress(uint64_t Addr) : Addr(Addr) {}
+ ExecutorAddr() = default;
- /// Create an ExecutorAddress from the given pointer.
+ /// Create an ExecutorAddr from the given value.
+ explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {}
+
+ /// Create an ExecutorAddr from the given pointer.
/// Warning: This should only be used when JITing in-process.
- template <typename T> static ExecutorAddress fromPtr(T *Value) {
- return ExecutorAddress(
+ template <typename T> static ExecutorAddr fromPtr(T *Value) {
+ return ExecutorAddr(
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Value)));
}
- /// Cast this ExecutorAddress to a pointer of the given type.
- /// Warning: This should only be esude when JITing in-process.
+ /// Cast this ExecutorAddr to a pointer of the given type.
+ /// Warning: This should only be used when JITing in-process.
template <typename T> T toPtr() const {
static_assert(std::is_pointer<T>::value, "T must be a pointer type");
uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
- assert(IntPtr == Addr &&
- "JITTargetAddress value out of range for uintptr_t");
+ assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t");
return reinterpret_cast<T>(IntPtr);
}
@@ -62,53 +63,47 @@ public:
explicit operator bool() const { return Addr != 0; }
- friend bool operator==(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator==(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr == RHS.Addr;
}
- friend bool operator!=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator!=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr != RHS.Addr;
}
- friend bool operator<(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator<(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr < RHS.Addr;
}
- friend bool operator<=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator<=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr <= RHS.Addr;
}
- friend bool operator>(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator>(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr > RHS.Addr;
}
- friend bool operator>=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator>=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr >= RHS.Addr;
}
- ExecutorAddress &operator++() {
+ ExecutorAddr &operator++() {
++Addr;
return *this;
}
- ExecutorAddress &operator--() {
+ ExecutorAddr &operator--() {
--Addr;
return *this;
}
- ExecutorAddress operator++(int) { return ExecutorAddress(Addr++); }
- ExecutorAddress operator--(int) { return ExecutorAddress(Addr++); }
+ ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); }
+ ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); }
- ExecutorAddress &operator+=(const ExecutorAddrDiff Delta) {
+ ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) {
Addr += Delta.getValue();
return *this;
}
- ExecutorAddress &operator-=(const ExecutorAddrDiff Delta) {
+ ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) {
Addr -= Delta.getValue();
return *this;
}
@@ -118,83 +113,98 @@ private:
};
/// Subtracting two addresses yields an offset.
-inline ExecutorAddrDiff operator-(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+inline ExecutorAddrDiff operator-(const ExecutorAddr &LHS,
+ const ExecutorAddr &RHS) {
return ExecutorAddrDiff(LHS.getValue() - RHS.getValue());
}
/// Adding an offset and an address yields an address.
-inline ExecutorAddress operator+(const ExecutorAddress &LHS,
- const ExecutorAddrDiff &RHS) {
- return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddr &LHS,
+ const ExecutorAddrDiff &RHS) {
+ return ExecutorAddr(LHS.getValue() + RHS.getValue());
}
/// Adding an address and an offset yields an address.
-inline ExecutorAddress operator+(const ExecutorAddrDiff &LHS,
- const ExecutorAddress &RHS) {
- return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS,
+ const ExecutorAddr &RHS) {
+ return ExecutorAddr(LHS.getValue() + RHS.getValue());
}
/// Represents an address range in the exceutor process.
-struct ExecutorAddressRange {
- ExecutorAddressRange() = default;
- ExecutorAddressRange(ExecutorAddress StartAddress, ExecutorAddress EndAddress)
- : StartAddress(StartAddress), EndAddress(EndAddress) {}
+struct ExecutorAddrRange {
+ ExecutorAddrRange() = default;
+ ExecutorAddrRange(ExecutorAddr Start, ExecutorAddr End)
+ : Start(Start), End(End) {}
+ ExecutorAddrRange(ExecutorAddr Start, ExecutorAddrDiff Size)
+ : Start(Start), End(Start + Size) {}
- bool empty() const { return StartAddress == EndAddress; }
- ExecutorAddrDiff size() const { return EndAddress - StartAddress; }
+ bool empty() const { return Start == End; }
+ ExecutorAddrDiff size() const { return End - Start; }
- ExecutorAddress StartAddress;
- ExecutorAddress EndAddress;
+ friend bool operator==(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start == RHS.Start && LHS.End == RHS.End;
+ }
+ friend bool operator!=(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return !(LHS == RHS);
+ }
+ bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; }
+ bool overlaps(const ExecutorAddrRange &Other) {
+ return !(Other.End <= Start || End <= Other.Start);
+ }
+
+ ExecutorAddr Start;
+ ExecutorAddr End;
};
namespace shared {
-/// SPS serializatior for ExecutorAddress.
-template <> class SPSSerializationTraits<SPSExecutorAddress, ExecutorAddress> {
+class SPSExecutorAddr {};
+
+/// SPS serializatior for ExecutorAddr.
+template <> class SPSSerializationTraits<SPSExecutorAddr, ExecutorAddr> {
public:
- static size_t size(const ExecutorAddress &EA) {
+ static size_t size(const ExecutorAddr &EA) {
return SPSArgList<uint64_t>::size(EA.getValue());
}
- static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddress &EA) {
+ static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddr &EA) {
return SPSArgList<uint64_t>::serialize(BOB, EA.getValue());
}
- static bool deserialize(SPSInputBuffer &BIB, ExecutorAddress &EA) {
+ static bool deserialize(SPSInputBuffer &BIB, ExecutorAddr &EA) {
uint64_t Tmp;
if (!SPSArgList<uint64_t>::deserialize(BIB, Tmp))
return false;
- EA = ExecutorAddress(Tmp);
+ EA = ExecutorAddr(Tmp);
return true;
}
};
-using SPSExecutorAddressRange =
- SPSTuple<SPSExecutorAddress, SPSExecutorAddress>;
+using SPSExecutorAddrRange = SPSTuple<SPSExecutorAddr, SPSExecutorAddr>;
/// Serialization traits for address ranges.
template <>
-class SPSSerializationTraits<SPSExecutorAddressRange, ExecutorAddressRange> {
+class SPSSerializationTraits<SPSExecutorAddrRange, ExecutorAddrRange> {
public:
- static size_t size(const ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::size(
- Value.StartAddress, Value.EndAddress);
+ static size_t size(const ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::size(Value.Start,
+ Value.End);
}
- static bool serialize(SPSOutputBuffer &BOB,
- const ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::serialize(
- BOB, Value.StartAddress, Value.EndAddress);
+ static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::serialize(
+ BOB, Value.Start, Value.End);
}
- static bool deserialize(SPSInputBuffer &BIB, ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::deserialize(
- BIB, Value.StartAddress, Value.EndAddress);
+ static bool deserialize(SPSInputBuffer &BIB, ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::deserialize(
+ BIB, Value.Start, Value.End);
}
};
-using SPSExecutorAddressRangeSequence = SPSSequence<SPSExecutorAddressRange>;
+using SPSExecutorAddrRangeSequence = SPSSequence<SPSExecutorAddrRange>;
} // End namespace shared.
} // End namespace orc.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
deleted file mode 100644
index 3f96fe3da49d..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===- FDRawByteChannel.h - File descriptor based byte-channel -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// File descriptor based RawByteChannel.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
-
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#else
-#include <io.h>
-#endif
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Serialization channel that reads from and writes from file descriptors.
-class FDRawByteChannel final : public RawByteChannel {
-public:
- FDRawByteChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
-
- llvm::Error readBytes(char *Dst, unsigned Size) override {
- assert(Dst && "Attempt to read into null.");
- ssize_t Completed = 0;
- while (Completed < static_cast<ssize_t>(Size)) {
- ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
- if (Read <= 0) {
- auto ErrNo = errno;
- if (ErrNo == EAGAIN || ErrNo == EINTR)
- continue;
- else
- return llvm::errorCodeToError(
- std::error_code(errno, std::generic_category()));
- }
- Completed += Read;
- }
- return llvm::Error::success();
- }
-
- llvm::Error appendBytes(const char *Src, unsigned Size) override {
- assert(Src && "Attempt to append from null.");
- ssize_t Completed = 0;
- while (Completed < static_cast<ssize_t>(Size)) {
- ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
- if (Written < 0) {
- auto ErrNo = errno;
- if (ErrNo == EAGAIN || ErrNo == EINTR)
- continue;
- else
- return llvm::errorCodeToError(
- std::error_code(errno, std::generic_category()));
- }
- Completed += Written;
- }
- return llvm::Error::success();
- }
-
- llvm::Error send() override { return llvm::Error::success(); }
-
-private:
- int InFD, OutFD;
-};
-
-} // namespace shared
-} // namespace orc
-} // namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
new file mode 100644
index 000000000000..3ef43f33d84c
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
@@ -0,0 +1,68 @@
+//===---- OrcRTBridge.h -- Utils for interacting with orc-rt ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declares types and symbol names provided by the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+
+namespace llvm {
+namespace orc {
+namespace rt {
+
+extern const char *SimpleExecutorDylibManagerInstanceName;
+extern const char *SimpleExecutorDylibManagerOpenWrapperName;
+extern const char *SimpleExecutorDylibManagerLookupWrapperName;
+
+extern const char *SimpleExecutorMemoryManagerInstanceName;
+extern const char *SimpleExecutorMemoryManagerReserveWrapperName;
+extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName;
+extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName;
+
+extern const char *MemoryWriteUInt8sWrapperName;
+extern const char *MemoryWriteUInt16sWrapperName;
+extern const char *MemoryWriteUInt32sWrapperName;
+extern const char *MemoryWriteUInt64sWrapperName;
+extern const char *MemoryWriteBuffersWrapperName;
+
+extern const char *RegisterEHFrameSectionCustomDirectWrapperName;
+extern const char *DeregisterEHFrameSectionCustomDirectWrapperName;
+
+extern const char *RunAsMainWrapperName;
+
+using SPSSimpleExecutorDylibManagerOpenSignature =
+ shared::SPSExpected<uint64_t>(shared::SPSExecutorAddr, shared::SPSString,
+ uint64_t);
+
+using SPSSimpleExecutorDylibManagerLookupSignature =
+ shared::SPSExpected<shared::SPSSequence<shared::SPSExecutorAddr>>(
+ shared::SPSExecutorAddr, uint64_t, shared::SPSRemoteSymbolLookupSet);
+
+using SPSSimpleExecutorMemoryManagerReserveSignature =
+ shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
+ uint64_t);
+using SPSSimpleExecutorMemoryManagerFinalizeSignature =
+ shared::SPSError(shared::SPSExecutorAddr, shared::SPSFinalizeRequest);
+using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError(
+ shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
+
+using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr,
+ shared::SPSSequence<shared::SPSString>);
+
+} // end namespace rt
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
deleted file mode 100644
index 1ff47ce42758..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
+++ /dev/null
@@ -1,1659 +0,0 @@
-//===- RPCUtils.h - Utilities for building RPC APIs -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Utilities to support construction of simple RPC APIs.
-//
-// The RPC utilities aim for ease of use (minimal conceptual overhead) for C++
-// programmers, high performance, low memory overhead, and efficient use of the
-// communications channel.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
-
-#include <map>
-#include <thread>
-#include <vector>
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
-
-#include <future>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Base class of all fatal RPC errors (those that necessarily result in the
-/// termination of the RPC session).
-class RPCFatalError : public ErrorInfo<RPCFatalError> {
-public:
- static char ID;
-};
-
-/// RPCConnectionClosed is returned from RPC operations if the RPC connection
-/// has already been closed due to either an error or graceful disconnection.
-class ConnectionClosed : public ErrorInfo<ConnectionClosed> {
-public:
- static char ID;
- std::error_code convertToErrorCode() const override;
- void log(raw_ostream &OS) const override;
-};
-
-/// BadFunctionCall is returned from handleOne when the remote makes a call with
-/// an unrecognized function id.
-///
-/// This error is fatal because Orc RPC needs to know how to parse a function
-/// call to know where the next call starts, and if it doesn't recognize the
-/// function id it cannot parse the call.
-template <typename FnIdT, typename SeqNoT>
-class BadFunctionCall
- : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> {
-public:
- static char ID;
-
- BadFunctionCall(FnIdT FnId, SeqNoT SeqNo)
- : FnId(std::move(FnId)), SeqNo(std::move(SeqNo)) {}
-
- std::error_code convertToErrorCode() const override {
- return orcError(OrcErrorCode::UnexpectedRPCCall);
- }
-
- void log(raw_ostream &OS) const override {
- OS << "Call to invalid RPC function id '" << FnId
- << "' with "
- "sequence number "
- << SeqNo;
- }
-
-private:
- FnIdT FnId;
- SeqNoT SeqNo;
-};
-
-template <typename FnIdT, typename SeqNoT>
-char BadFunctionCall<FnIdT, SeqNoT>::ID = 0;
-
-/// InvalidSequenceNumberForResponse is returned from handleOne when a response
-/// call arrives with a sequence number that doesn't correspond to any in-flight
-/// function call.
-///
-/// This error is fatal because Orc RPC needs to know how to parse the rest of
-/// the response call to know where the next call starts, and if it doesn't have
-/// a result parser for this sequence number it can't do that.
-template <typename SeqNoT>
-class InvalidSequenceNumberForResponse
- : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>,
- RPCFatalError> {
-public:
- static char ID;
-
- InvalidSequenceNumberForResponse(SeqNoT SeqNo) : SeqNo(std::move(SeqNo)) {}
-
- std::error_code convertToErrorCode() const override {
- return orcError(OrcErrorCode::UnexpectedRPCCall);
- };
-
- void log(raw_ostream &OS) const override {
- OS << "Response has unknown sequence number " << SeqNo;
- }
-
-private:
- SeqNoT SeqNo;
-};
-
-template <typename SeqNoT>
-char InvalidSequenceNumberForResponse<SeqNoT>::ID = 0;
-
-/// This non-fatal error will be passed to asynchronous result handlers in place
-/// of a result if the connection goes down before a result returns, or if the
-/// function to be called cannot be negotiated with the remote.
-class ResponseAbandoned : public ErrorInfo<ResponseAbandoned> {
-public:
- static char ID;
-
- std::error_code convertToErrorCode() const override;
- void log(raw_ostream &OS) const override;
-};
-
-/// This error is returned if the remote does not have a handler installed for
-/// the given RPC function.
-class CouldNotNegotiate : public ErrorInfo<CouldNotNegotiate> {
-public:
- static char ID;
-
- CouldNotNegotiate(std::string Signature);
- std::error_code convertToErrorCode() const override;
- void log(raw_ostream &OS) const override;
- const std::string &getSignature() const { return Signature; }
-
-private:
- std::string Signature;
-};
-
-template <typename DerivedFunc, typename FnT> class RPCFunction;
-
-// RPC Function class.
-// DerivedFunc should be a user defined class with a static 'getName()' method
-// returning a const char* representing the function's name.
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-class RPCFunction<DerivedFunc, RetT(ArgTs...)> {
-public:
- /// User defined function type.
- using Type = RetT(ArgTs...);
-
- /// Return type.
- using ReturnType = RetT;
-
- /// Returns the full function prototype as a string.
- static const char *getPrototype() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << SerializationTypeName<RetT>::getName() << " "
- << DerivedFunc::getName() << "("
- << SerializationTypeNameSequence<ArgTs...>() << ")";
- return Name;
- }();
- return Name.data();
- }
-};
-
-/// Allocates RPC function ids during autonegotiation.
-/// Specializations of this class must provide four members:
-///
-/// static T getInvalidId():
-/// Should return a reserved id that will be used to represent missing
-/// functions during autonegotiation.
-///
-/// static T getResponseId():
-/// Should return a reserved id that will be used to send function responses
-/// (return values).
-///
-/// static T getNegotiateId():
-/// Should return a reserved id for the negotiate function, which will be used
-/// to negotiate ids for user defined functions.
-///
-/// template <typename Func> T allocate():
-/// Allocate a unique id for function Func.
-template <typename T, typename = void> class RPCFunctionIdAllocator;
-
-/// This specialization of RPCFunctionIdAllocator provides a default
-/// implementation for integral types.
-template <typename T>
-class RPCFunctionIdAllocator<T, std::enable_if_t<std::is_integral<T>::value>> {
-public:
- static T getInvalidId() { return T(0); }
- static T getResponseId() { return T(1); }
- static T getNegotiateId() { return T(2); }
-
- template <typename Func> T allocate() { return NextId++; }
-
-private:
- T NextId = 3;
-};
-
-namespace detail {
-
-/// Provides a typedef for a tuple containing the decayed argument types.
-template <typename T> class RPCFunctionArgsTuple;
-
-template <typename RetT, typename... ArgTs>
-class RPCFunctionArgsTuple<RetT(ArgTs...)> {
-public:
- using Type = std::tuple<std::decay_t<std::remove_reference_t<ArgTs>>...>;
-};
-
-// ResultTraits provides typedefs and utilities specific to the return type
-// of functions.
-template <typename RetT> class ResultTraits {
-public:
- // The return type wrapped in llvm::Expected.
- using ErrorReturnType = Expected<RetT>;
-
-#ifdef _MSC_VER
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<MSVCPExpected<RetT>>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<MSVCPExpected<RetT>>;
-#else
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<ErrorReturnType>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<ErrorReturnType>;
-#endif
-
- // Create a 'blank' value of the ErrorReturnType, ready and safe to
- // overwrite.
- static ErrorReturnType createBlankErrorReturnValue() {
- return ErrorReturnType(RetT());
- }
-
- // Consume an abandoned ErrorReturnType.
- static void consumeAbandoned(ErrorReturnType RetOrErr) {
- consumeError(RetOrErr.takeError());
- }
-
- static ErrorReturnType returnError(Error Err) { return std::move(Err); }
-};
-
-// ResultTraits specialization for void functions.
-template <> class ResultTraits<void> {
-public:
- // For void functions, ErrorReturnType is llvm::Error.
- using ErrorReturnType = Error;
-
-#ifdef _MSC_VER
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<MSVCPError>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<MSVCPError>;
-#else
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<ErrorReturnType>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<ErrorReturnType>;
-#endif
-
- // Create a 'blank' value of the ErrorReturnType, ready and safe to
- // overwrite.
- static ErrorReturnType createBlankErrorReturnValue() {
- return ErrorReturnType::success();
- }
-
- // Consume an abandoned ErrorReturnType.
- static void consumeAbandoned(ErrorReturnType Err) {
- consumeError(std::move(Err));
- }
-
- static ErrorReturnType returnError(Error Err) { return Err; }
-};
-
-// ResultTraits<Error> is equivalent to ResultTraits<void>. This allows
-// handlers for void RPC functions to return either void (in which case they
-// implicitly succeed) or Error (in which case their error return is
-// propagated). See usage in HandlerTraits::runHandlerHelper.
-template <> class ResultTraits<Error> : public ResultTraits<void> {};
-
-// ResultTraits<Expected<T>> is equivalent to ResultTraits<T>. This allows
-// handlers for RPC functions returning a T to return either a T (in which
-// case they implicitly succeed) or Expected<T> (in which case their error
-// return is propagated). See usage in HandlerTraits::runHandlerHelper.
-template <typename RetT>
-class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {};
-
-// Determines whether an RPC function's defined error return type supports
-// error return value.
-template <typename T> class SupportsErrorReturn {
-public:
- static const bool value = false;
-};
-
-template <> class SupportsErrorReturn<Error> {
-public:
- static const bool value = true;
-};
-
-template <typename T> class SupportsErrorReturn<Expected<T>> {
-public:
- static const bool value = true;
-};
-
-// RespondHelper packages return values based on whether or not the declared
-// RPC function return type supports error returns.
-template <bool FuncSupportsErrorReturn> class RespondHelper;
-
-// RespondHelper specialization for functions that support error returns.
-template <> class RespondHelper<true> {
-public:
- // Send Expected<T>.
- template <typename WireRetT, typename HandlerRetT, typename ChannelT,
- typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo,
- Expected<HandlerRetT> ResultOrErr) {
- if (!ResultOrErr && ResultOrErr.template errorIsA<RPCFatalError>())
- return ResultOrErr.takeError();
-
- // Open the response message.
- if (auto Err = C.startSendMessage(ResponseId, SeqNo))
- return Err;
-
- // Serialize the result.
- if (auto Err =
- SerializationTraits<ChannelT, WireRetT, Expected<HandlerRetT>>::
- serialize(C, std::move(ResultOrErr)))
- return Err;
-
- // Close the response message.
- if (auto Err = C.endSendMessage())
- return Err;
- return C.send();
- }
-
- template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo, Error Err) {
- if (Err && Err.isA<RPCFatalError>())
- return Err;
- if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
- return Err2;
- if (auto Err2 = serializeSeq(C, std::move(Err)))
- return Err2;
- if (auto Err2 = C.endSendMessage())
- return Err2;
- return C.send();
- }
-};
-
-// RespondHelper specialization for functions that do not support error returns.
-template <> class RespondHelper<false> {
-public:
- template <typename WireRetT, typename HandlerRetT, typename ChannelT,
- typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo,
- Expected<HandlerRetT> ResultOrErr) {
- if (auto Err = ResultOrErr.takeError())
- return Err;
-
- // Open the response message.
- if (auto Err = C.startSendMessage(ResponseId, SeqNo))
- return Err;
-
- // Serialize the result.
- if (auto Err =
- SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize(
- C, *ResultOrErr))
- return Err;
-
- // End the response message.
- if (auto Err = C.endSendMessage())
- return Err;
-
- return C.send();
- }
-
- template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo, Error Err) {
- if (Err)
- return Err;
- if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
- return Err2;
- if (auto Err2 = C.endSendMessage())
- return Err2;
- return C.send();
- }
-};
-
-// Send a response of the given wire return type (WireRetT) over the
-// channel, with the given sequence number.
-template <typename WireRetT, typename HandlerRetT, typename ChannelT,
- typename FunctionIdT, typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
- Expected<HandlerRetT> ResultOrErr) {
- return RespondHelper<SupportsErrorReturn<WireRetT>::value>::
- template sendResult<WireRetT>(C, ResponseId, SeqNo,
- std::move(ResultOrErr));
-}
-
-// Send an empty response message on the given channel to indicate that
-// the handler ran.
-template <typename WireRetT, typename ChannelT, typename FunctionIdT,
- typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
- Error Err) {
- return RespondHelper<SupportsErrorReturn<WireRetT>::value>::sendResult(
- C, ResponseId, SeqNo, std::move(Err));
-}
-
-// Converts a given type to the equivalent error return type.
-template <typename T> class WrappedHandlerReturn {
-public:
- using Type = Expected<T>;
-};
-
-template <typename T> class WrappedHandlerReturn<Expected<T>> {
-public:
- using Type = Expected<T>;
-};
-
-template <> class WrappedHandlerReturn<void> {
-public:
- using Type = Error;
-};
-
-template <> class WrappedHandlerReturn<Error> {
-public:
- using Type = Error;
-};
-
-template <> class WrappedHandlerReturn<ErrorSuccess> {
-public:
- using Type = Error;
-};
-
-// Traits class that strips the response function from the list of handler
-// arguments.
-template <typename FnT> class AsyncHandlerTraits;
-
-template <typename ResultT, typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>,
- ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Expected<ResultT>;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Error)>, ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Error;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<ErrorSuccess(std::function<Error(Error)>, ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Error;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<void(std::function<Error(Error)>, ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Error;
-};
-
-template <typename ResponseHandlerT, typename... ArgTs>
-class AsyncHandlerTraits<Error(ResponseHandlerT, ArgTs...)>
- : public AsyncHandlerTraits<Error(std::decay_t<ResponseHandlerT>,
- ArgTs...)> {};
-
-// This template class provides utilities related to RPC function handlers.
-// The base case applies to non-function types (the template class is
-// specialized for function types) and inherits from the appropriate
-// speciilization for the given non-function type's call operator.
-template <typename HandlerT>
-class HandlerTraits
- : public HandlerTraits<
- decltype(&std::remove_reference<HandlerT>::type::operator())> {};
-
-// Traits for handlers with a given function type.
-template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT(ArgTs...)> {
-public:
- // Function type of the handler.
- using Type = RetT(ArgTs...);
-
- // Return type of the handler.
- using ReturnType = RetT;
-
- // Call the given handler with the given arguments.
- template <typename HandlerT, typename... TArgTs>
- static typename WrappedHandlerReturn<RetT>::Type
- unpackAndRun(HandlerT &Handler, std::tuple<TArgTs...> &Args) {
- return unpackAndRunHelper(Handler, Args,
- std::index_sequence_for<TArgTs...>());
- }
-
- // Call the given handler with the given arguments.
- template <typename HandlerT, typename ResponderT, typename... TArgTs>
- static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder,
- std::tuple<TArgTs...> &Args) {
- return unpackAndRunAsyncHelper(Handler, Responder, Args,
- std::index_sequence_for<TArgTs...>());
- }
-
- // Call the given handler with the given arguments.
- template <typename HandlerT>
- static std::enable_if_t<
- std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, Error>
- run(HandlerT &Handler, ArgTs &&...Args) {
- Handler(std::move(Args)...);
- return Error::success();
- }
-
- template <typename HandlerT, typename... TArgTs>
- static std::enable_if_t<
- !std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value,
- typename HandlerTraits<HandlerT>::ReturnType>
- run(HandlerT &Handler, TArgTs... Args) {
- return Handler(std::move(Args)...);
- }
-
- // Serialize arguments to the channel.
- template <typename ChannelT, typename... CArgTs>
- static Error serializeArgs(ChannelT &C, const CArgTs... CArgs) {
- return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, CArgs...);
- }
-
- // Deserialize arguments from the channel.
- template <typename ChannelT, typename... CArgTs>
- static Error deserializeArgs(ChannelT &C, std::tuple<CArgTs...> &Args) {
- return deserializeArgsHelper(C, Args, std::index_sequence_for<CArgTs...>());
- }
-
-private:
- template <typename ChannelT, typename... CArgTs, size_t... Indexes>
- static Error deserializeArgsHelper(ChannelT &C, std::tuple<CArgTs...> &Args,
- std::index_sequence<Indexes...> _) {
- return SequenceSerialization<ChannelT, ArgTs...>::deserialize(
- C, std::get<Indexes>(Args)...);
- }
-
- template <typename HandlerT, typename ArgTuple, size_t... Indexes>
- static typename WrappedHandlerReturn<
- typename HandlerTraits<HandlerT>::ReturnType>::Type
- unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args,
- std::index_sequence<Indexes...>) {
- return run(Handler, std::move(std::get<Indexes>(Args))...);
- }
-
- template <typename HandlerT, typename ResponderT, typename ArgTuple,
- size_t... Indexes>
- static typename WrappedHandlerReturn<
- typename HandlerTraits<HandlerT>::ReturnType>::Type
- unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder,
- ArgTuple &Args, std::index_sequence<Indexes...>) {
- return run(Handler, Responder, std::move(std::get<Indexes>(Args))...);
- }
-};
-
-// Handler traits for free functions.
-template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (*)(ArgTs...)> : public HandlerTraits<RetT(ArgTs...)> {
-};
-
-// Handler traits for class methods (especially call operators for lambdas).
-template <typename Class, typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (Class::*)(ArgTs...)>
- : public HandlerTraits<RetT(ArgTs...)> {};
-
-// Handler traits for const class methods (especially call operators for
-// lambdas).
-template <typename Class, typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (Class::*)(ArgTs...) const>
- : public HandlerTraits<RetT(ArgTs...)> {};
-
-// Utility to peel the Expected wrapper off a response handler error type.
-template <typename HandlerT> class ResponseHandlerArg;
-
-template <typename ArgT> class ResponseHandlerArg<Error(Expected<ArgT>)> {
-public:
- using ArgType = Expected<ArgT>;
- using UnwrappedArgType = ArgT;
-};
-
-template <typename ArgT>
-class ResponseHandlerArg<ErrorSuccess(Expected<ArgT>)> {
-public:
- using ArgType = Expected<ArgT>;
- using UnwrappedArgType = ArgT;
-};
-
-template <> class ResponseHandlerArg<Error(Error)> {
-public:
- using ArgType = Error;
-};
-
-template <> class ResponseHandlerArg<ErrorSuccess(Error)> {
-public:
- using ArgType = Error;
-};
-
-// ResponseHandler represents a handler for a not-yet-received function call
-// result.
-template <typename ChannelT> class ResponseHandler {
-public:
- virtual ~ResponseHandler() {}
-
- // Reads the function result off the wire and acts on it. The meaning of
- // "act" will depend on how this method is implemented in any given
- // ResponseHandler subclass but could, for example, mean running a
- // user-specified handler or setting a promise value.
- virtual Error handleResponse(ChannelT &C) = 0;
-
- // Abandons this outstanding result.
- virtual void abandon() = 0;
-
- // Create an error instance representing an abandoned response.
- static Error createAbandonedResponseError() {
- return make_error<ResponseAbandoned>();
- }
-};
-
-// ResponseHandler subclass for RPC functions with non-void returns.
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-class ResponseHandlerImpl : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result by deserializing it from the channel then passing it
- // to the user defined handler.
- Error handleResponse(ChannelT &C) override {
- using UnwrappedArgType = typename ResponseHandlerArg<
- typename HandlerTraits<HandlerT>::Type>::UnwrappedArgType;
- UnwrappedArgType Result;
- if (auto Err =
- SerializationTraits<ChannelT, FuncRetT,
- UnwrappedArgType>::deserialize(C, Result))
- return Err;
- if (auto Err = C.endReceiveMessage())
- return Err;
- return Handler(std::move(Result));
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-// ResponseHandler subclass for RPC functions with void returns.
-template <typename ChannelT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, void, HandlerT>
- : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result (no actual value, just a notification that the function
- // has completed on the remote end) by calling the user-defined handler with
- // Error::success().
- Error handleResponse(ChannelT &C) override {
- if (auto Err = C.endReceiveMessage())
- return Err;
- return Handler(Error::success());
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, Expected<FuncRetT>, HandlerT>
- : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result by deserializing it from the channel then passing it
- // to the user defined handler.
- Error handleResponse(ChannelT &C) override {
- using HandlerArgType = typename ResponseHandlerArg<
- typename HandlerTraits<HandlerT>::Type>::ArgType;
- HandlerArgType Result((typename HandlerArgType::value_type()));
-
- if (auto Err = SerializationTraits<ChannelT, Expected<FuncRetT>,
- HandlerArgType>::deserialize(C, Result))
- return Err;
- if (auto Err = C.endReceiveMessage())
- return Err;
- return Handler(std::move(Result));
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-template <typename ChannelT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, Error, HandlerT>
- : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result by deserializing it from the channel then passing it
- // to the user defined handler.
- Error handleResponse(ChannelT &C) override {
- Error Result = Error::success();
- if (auto Err = SerializationTraits<ChannelT, Error, Error>::deserialize(
- C, Result)) {
- consumeError(std::move(Result));
- return Err;
- }
- if (auto Err = C.endReceiveMessage()) {
- consumeError(std::move(Result));
- return Err;
- }
- return Handler(std::move(Result));
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-// Create a ResponseHandler from a given user handler.
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-std::unique_ptr<ResponseHandler<ChannelT>> createResponseHandler(HandlerT H) {
- return std::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>(
- std::move(H));
-}
-
-// Helper for wrapping member functions up as functors. This is useful for
-// installing methods as result handlers.
-template <typename ClassT, typename RetT, typename... ArgTs>
-class MemberFnWrapper {
-public:
- using MethodT = RetT (ClassT::*)(ArgTs...);
- MemberFnWrapper(ClassT &Instance, MethodT Method)
- : Instance(Instance), Method(Method) {}
- RetT operator()(ArgTs &&...Args) {
- return (Instance.*Method)(std::move(Args)...);
- }
-
-private:
- ClassT &Instance;
- MethodT Method;
-};
-
-// Helper that provides a Functor for deserializing arguments.
-template <typename... ArgTs> class ReadArgs {
-public:
- Error operator()() { return Error::success(); }
-};
-
-template <typename ArgT, typename... ArgTs>
-class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
-public:
- ReadArgs(ArgT &Arg, ArgTs &...Args) : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
-
- Error operator()(ArgT &ArgVal, ArgTs &...ArgVals) {
- this->Arg = std::move(ArgVal);
- return ReadArgs<ArgTs...>::operator()(ArgVals...);
- }
-
-private:
- ArgT &Arg;
-};
-
-// Manage sequence numbers.
-template <typename SequenceNumberT> class SequenceNumberManager {
-public:
- // Reset, making all sequence numbers available.
- void reset() {
- std::lock_guard<std::mutex> Lock(SeqNoLock);
- NextSequenceNumber = 0;
- FreeSequenceNumbers.clear();
- }
-
- // Get the next available sequence number. Will re-use numbers that have
- // been released.
- SequenceNumberT getSequenceNumber() {
- std::lock_guard<std::mutex> Lock(SeqNoLock);
- if (FreeSequenceNumbers.empty())
- return NextSequenceNumber++;
- auto SequenceNumber = FreeSequenceNumbers.back();
- FreeSequenceNumbers.pop_back();
- return SequenceNumber;
- }
-
- // Release a sequence number, making it available for re-use.
- void releaseSequenceNumber(SequenceNumberT SequenceNumber) {
- std::lock_guard<std::mutex> Lock(SeqNoLock);
- FreeSequenceNumbers.push_back(SequenceNumber);
- }
-
-private:
- std::mutex SeqNoLock;
- SequenceNumberT NextSequenceNumber = 0;
- std::vector<SequenceNumberT> FreeSequenceNumbers;
-};
-
-// Checks that predicate P holds for each corresponding pair of type arguments
-// from T1 and T2 tuple.
-template <template <class, class> class P, typename T1Tuple, typename T2Tuple>
-class RPCArgTypeCheckHelper;
-
-template <template <class, class> class P>
-class RPCArgTypeCheckHelper<P, std::tuple<>, std::tuple<>> {
-public:
- static const bool value = true;
-};
-
-template <template <class, class> class P, typename T, typename... Ts,
- typename U, typename... Us>
-class RPCArgTypeCheckHelper<P, std::tuple<T, Ts...>, std::tuple<U, Us...>> {
-public:
- static const bool value =
- P<T, U>::value &&
- RPCArgTypeCheckHelper<P, std::tuple<Ts...>, std::tuple<Us...>>::value;
-};
-
-template <template <class, class> class P, typename T1Sig, typename T2Sig>
-class RPCArgTypeCheck {
-public:
- using T1Tuple = typename RPCFunctionArgsTuple<T1Sig>::Type;
- using T2Tuple = typename RPCFunctionArgsTuple<T2Sig>::Type;
-
- static_assert(std::tuple_size<T1Tuple>::value >=
- std::tuple_size<T2Tuple>::value,
- "Too many arguments to RPC call");
- static_assert(std::tuple_size<T1Tuple>::value <=
- std::tuple_size<T2Tuple>::value,
- "Too few arguments to RPC call");
-
- static const bool value = RPCArgTypeCheckHelper<P, T1Tuple, T2Tuple>::value;
-};
-
-template <typename ChannelT, typename WireT, typename ConcreteT>
-class CanSerialize {
-private:
- using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
-
- template <typename T>
- static std::true_type check(
- std::enable_if_t<std::is_same<decltype(T::serialize(
- std::declval<ChannelT &>(),
- std::declval<const ConcreteT &>())),
- Error>::value,
- void *>);
-
- template <typename> static std::false_type check(...);
-
-public:
- static const bool value = decltype(check<S>(0))::value;
-};
-
-template <typename ChannelT, typename WireT, typename ConcreteT>
-class CanDeserialize {
-private:
- using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
-
- template <typename T>
- static std::true_type
- check(std::enable_if_t<
- std::is_same<decltype(T::deserialize(std::declval<ChannelT &>(),
- std::declval<ConcreteT &>())),
- Error>::value,
- void *>);
-
- template <typename> static std::false_type check(...);
-
-public:
- static const bool value = decltype(check<S>(0))::value;
-};
-
-/// Contains primitive utilities for defining, calling and handling calls to
-/// remote procedures. ChannelT is a bidirectional stream conforming to the
-/// RPCChannel interface (see RPCChannel.h), FunctionIdT is a procedure
-/// identifier type that must be serializable on ChannelT, and SequenceNumberT
-/// is an integral type that will be used to number in-flight function calls.
-///
-/// These utilities support the construction of very primitive RPC utilities.
-/// Their intent is to ensure correct serialization and deserialization of
-/// procedure arguments, and to keep the client and server's view of the API in
-/// sync.
-template <typename ImplT, typename ChannelT, typename FunctionIdT,
- typename SequenceNumberT>
-class RPCEndpointBase {
-protected:
- class OrcRPCInvalid : public RPCFunction<OrcRPCInvalid, void()> {
- public:
- static const char *getName() { return "__orc_rpc$invalid"; }
- };
-
- class OrcRPCResponse : public RPCFunction<OrcRPCResponse, void()> {
- public:
- static const char *getName() { return "__orc_rpc$response"; }
- };
-
- class OrcRPCNegotiate
- : public RPCFunction<OrcRPCNegotiate, FunctionIdT(std::string)> {
- public:
- static const char *getName() { return "__orc_rpc$negotiate"; }
- };
-
- // Helper predicate for testing for the presence of SerializeTraits
- // serializers.
- template <typename WireT, typename ConcreteT>
- class CanSerializeCheck : detail::CanSerialize<ChannelT, WireT, ConcreteT> {
- public:
- using detail::CanSerialize<ChannelT, WireT, ConcreteT>::value;
-
- static_assert(value, "Missing serializer for argument (Can't serialize the "
- "first template type argument of CanSerializeCheck "
- "from the second)");
- };
-
- // Helper predicate for testing for the presence of SerializeTraits
- // deserializers.
- template <typename WireT, typename ConcreteT>
- class CanDeserializeCheck
- : detail::CanDeserialize<ChannelT, WireT, ConcreteT> {
- public:
- using detail::CanDeserialize<ChannelT, WireT, ConcreteT>::value;
-
- static_assert(value, "Missing deserializer for argument (Can't deserialize "
- "the second template type argument of "
- "CanDeserializeCheck from the first)");
- };
-
-public:
- /// Construct an RPC instance on a channel.
- RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation)
- : C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
- // Hold ResponseId in a special variable, since we expect Response to be
- // called relatively frequently, and want to avoid the map lookup.
- ResponseId = FnIdAllocator.getResponseId();
- RemoteFunctionIds[OrcRPCResponse::getPrototype()] = ResponseId;
-
- // Register the negotiate function id and handler.
- auto NegotiateId = FnIdAllocator.getNegotiateId();
- RemoteFunctionIds[OrcRPCNegotiate::getPrototype()] = NegotiateId;
- Handlers[NegotiateId] = wrapHandler<OrcRPCNegotiate>(
- [this](const std::string &Name) { return handleNegotiate(Name); });
- }
-
- /// Negotiate a function id for Func with the other end of the channel.
- template <typename Func> Error negotiateFunction(bool Retry = false) {
- return getRemoteFunctionId<Func>(true, Retry).takeError();
- }
-
- /// Append a call Func, does not call send on the channel.
- /// The first argument specifies a user-defined handler to be run when the
- /// function returns. The handler should take an Expected<Func::ReturnType>,
- /// or an Error (if Func::ReturnType is void). The handler will be called
- /// with an error if the return value is abandoned due to a channel error.
- template <typename Func, typename HandlerT, typename... ArgTs>
- Error appendCallAsync(HandlerT Handler, const ArgTs &...Args) {
-
- static_assert(
- detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type,
- void(ArgTs...)>::value,
- "");
-
- // Look up the function ID.
- FunctionIdT FnId;
- if (auto FnIdOrErr = getRemoteFunctionId<Func>(LazyAutoNegotiation, false))
- FnId = *FnIdOrErr;
- else {
- // Negotiation failed. Notify the handler then return the negotiate-failed
- // error.
- cantFail(Handler(make_error<ResponseAbandoned>()));
- return FnIdOrErr.takeError();
- }
-
- SequenceNumberT SeqNo; // initialized in locked scope below.
- {
- // Lock the pending responses map and sequence number manager.
- std::lock_guard<std::mutex> Lock(ResponsesMutex);
-
- // Allocate a sequence number.
- SeqNo = SequenceNumberMgr.getSequenceNumber();
- assert(!PendingResponses.count(SeqNo) &&
- "Sequence number already allocated");
-
- // Install the user handler.
- PendingResponses[SeqNo] =
- detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
- std::move(Handler));
- }
-
- // Open the function call message.
- if (auto Err = C.startSendMessage(FnId, SeqNo)) {
- abandonPendingResponses();
- return Err;
- }
-
- // Serialize the call arguments.
- if (auto Err = detail::HandlerTraits<typename Func::Type>::serializeArgs(
- C, Args...)) {
- abandonPendingResponses();
- return Err;
- }
-
- // Close the function call messagee.
- if (auto Err = C.endSendMessage()) {
- abandonPendingResponses();
- return Err;
- }
-
- return Error::success();
- }
-
- Error sendAppendedCalls() { return C.send(); };
-
- template <typename Func, typename HandlerT, typename... ArgTs>
- Error callAsync(HandlerT Handler, const ArgTs &...Args) {
- if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...))
- return Err;
- return C.send();
- }
-
- /// Handle one incoming call.
- Error handleOne() {
- FunctionIdT FnId;
- SequenceNumberT SeqNo;
- if (auto Err = C.startReceiveMessage(FnId, SeqNo)) {
- abandonPendingResponses();
- return Err;
- }
- if (FnId == ResponseId)
- return handleResponse(SeqNo);
- auto I = Handlers.find(FnId);
- if (I != Handlers.end())
- return I->second(C, SeqNo);
-
- // else: No handler found. Report error to client?
- return make_error<BadFunctionCall<FunctionIdT, SequenceNumberT>>(FnId,
- SeqNo);
- }
-
- /// Helper for handling setter procedures - this method returns a functor that
- /// sets the variables referred to by Args... to values deserialized from the
- /// channel.
- /// E.g.
- ///
- /// typedef Function<0, bool, int> Func1;
- ///
- /// ...
- /// bool B;
- /// int I;
- /// if (auto Err = expect<Func1>(Channel, readArgs(B, I)))
- /// /* Handle Args */ ;
- ///
- template <typename... ArgTs>
- static detail::ReadArgs<ArgTs...> readArgs(ArgTs &...Args) {
- return detail::ReadArgs<ArgTs...>(Args...);
- }
-
- /// Abandon all outstanding result handlers.
- ///
- /// This will call all currently registered result handlers to receive an
- /// "abandoned" error as their argument. This is used internally by the RPC
- /// in error situations, but can also be called directly by clients who are
- /// disconnecting from the remote and don't or can't expect responses to their
- /// outstanding calls. (Especially for outstanding blocking calls, calling
- /// this function may be necessary to avoid dead threads).
- void abandonPendingResponses() {
- // Lock the pending responses map and sequence number manager.
- std::lock_guard<std::mutex> Lock(ResponsesMutex);
-
- for (auto &KV : PendingResponses)
- KV.second->abandon();
- PendingResponses.clear();
- SequenceNumberMgr.reset();
- }
-
- /// Remove the handler for the given function.
- /// A handler must currently be registered for this function.
- template <typename Func> void removeHandler() {
- auto IdItr = LocalFunctionIds.find(Func::getPrototype());
- assert(IdItr != LocalFunctionIds.end() &&
- "Function does not have a registered handler");
- auto HandlerItr = Handlers.find(IdItr->second);
- assert(HandlerItr != Handlers.end() &&
- "Function does not have a registered handler");
- Handlers.erase(HandlerItr);
- }
-
- /// Clear all handlers.
- void clearHandlers() { Handlers.clear(); }
-
-protected:
- FunctionIdT getInvalidFunctionId() const {
- return FnIdAllocator.getInvalidId();
- }
-
- /// Add the given handler to the handler map and make it available for
- /// autonegotiation and execution.
- template <typename Func, typename HandlerT>
- void addHandlerImpl(HandlerT Handler) {
-
- static_assert(detail::RPCArgTypeCheck<
- CanDeserializeCheck, typename Func::Type,
- typename detail::HandlerTraits<HandlerT>::Type>::value,
- "");
-
- FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
- LocalFunctionIds[Func::getPrototype()] = NewFnId;
- Handlers[NewFnId] = wrapHandler<Func>(std::move(Handler));
- }
-
- template <typename Func, typename HandlerT>
- void addAsyncHandlerImpl(HandlerT Handler) {
-
- static_assert(
- detail::RPCArgTypeCheck<
- CanDeserializeCheck, typename Func::Type,
- typename detail::AsyncHandlerTraits<
- typename detail::HandlerTraits<HandlerT>::Type>::Type>::value,
- "");
-
- FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
- LocalFunctionIds[Func::getPrototype()] = NewFnId;
- Handlers[NewFnId] = wrapAsyncHandler<Func>(std::move(Handler));
- }
-
- Error handleResponse(SequenceNumberT SeqNo) {
- using Handler = typename decltype(PendingResponses)::mapped_type;
- Handler PRHandler;
-
- {
- // Lock the pending responses map and sequence number manager.
- std::unique_lock<std::mutex> Lock(ResponsesMutex);
- auto I = PendingResponses.find(SeqNo);
-
- if (I != PendingResponses.end()) {
- PRHandler = std::move(I->second);
- PendingResponses.erase(I);
- SequenceNumberMgr.releaseSequenceNumber(SeqNo);
- } else {
- // Unlock the pending results map to prevent recursive lock.
- Lock.unlock();
- abandonPendingResponses();
- return make_error<InvalidSequenceNumberForResponse<SequenceNumberT>>(
- SeqNo);
- }
- }
-
- assert(PRHandler &&
- "If we didn't find a response handler we should have bailed out");
-
- if (auto Err = PRHandler->handleResponse(C)) {
- abandonPendingResponses();
- return Err;
- }
-
- return Error::success();
- }
-
- FunctionIdT handleNegotiate(const std::string &Name) {
- auto I = LocalFunctionIds.find(Name);
- if (I == LocalFunctionIds.end())
- return getInvalidFunctionId();
- return I->second;
- }
-
- // Find the remote FunctionId for the given function.
- template <typename Func>
- Expected<FunctionIdT> getRemoteFunctionId(bool NegotiateIfNotInMap,
- bool NegotiateIfInvalid) {
- bool DoNegotiate;
-
- // Check if we already have a function id...
- auto I = RemoteFunctionIds.find(Func::getPrototype());
- if (I != RemoteFunctionIds.end()) {
- // If it's valid there's nothing left to do.
- if (I->second != getInvalidFunctionId())
- return I->second;
- DoNegotiate = NegotiateIfInvalid;
- } else
- DoNegotiate = NegotiateIfNotInMap;
-
- // We don't have a function id for Func yet, but we're allowed to try to
- // negotiate one.
- if (DoNegotiate) {
- auto &Impl = static_cast<ImplT &>(*this);
- if (auto RemoteIdOrErr =
- Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) {
- RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
- if (*RemoteIdOrErr == getInvalidFunctionId())
- return make_error<CouldNotNegotiate>(Func::getPrototype());
- return *RemoteIdOrErr;
- } else
- return RemoteIdOrErr.takeError();
- }
-
- // No key was available in the map and we weren't allowed to try to
- // negotiate one, so return an unknown function error.
- return make_error<CouldNotNegotiate>(Func::getPrototype());
- }
-
- using WrappedHandlerFn = std::function<Error(ChannelT &, SequenceNumberT)>;
-
- // Wrap the given user handler in the necessary argument-deserialization code,
- // result-serialization code, and call to the launch policy (if present).
- template <typename Func, typename HandlerT>
- WrappedHandlerFn wrapHandler(HandlerT Handler) {
- return [this, Handler](ChannelT &Channel,
- SequenceNumberT SeqNo) mutable -> Error {
- // Start by deserializing the arguments.
- using ArgsTuple = typename detail::RPCFunctionArgsTuple<
- typename detail::HandlerTraits<HandlerT>::Type>::Type;
- auto Args = std::make_shared<ArgsTuple>();
-
- if (auto Err =
- detail::HandlerTraits<typename Func::Type>::deserializeArgs(
- Channel, *Args))
- return Err;
-
- // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
- // for RPCArgs. Void cast RPCArgs to work around this for now.
- // FIXME: Remove this workaround once we can assume a working GCC version.
- (void)Args;
-
- // End receieve message, unlocking the channel for reading.
- if (auto Err = Channel.endReceiveMessage())
- return Err;
-
- using HTraits = detail::HandlerTraits<HandlerT>;
- using FuncReturn = typename Func::ReturnType;
- return detail::respond<FuncReturn>(Channel, ResponseId, SeqNo,
- HTraits::unpackAndRun(Handler, *Args));
- };
- }
-
- // Wrap the given user handler in the necessary argument-deserialization code,
- // result-serialization code, and call to the launch policy (if present).
- template <typename Func, typename HandlerT>
- WrappedHandlerFn wrapAsyncHandler(HandlerT Handler) {
- return [this, Handler](ChannelT &Channel,
- SequenceNumberT SeqNo) mutable -> Error {
- // Start by deserializing the arguments.
- using AHTraits = detail::AsyncHandlerTraits<
- typename detail::HandlerTraits<HandlerT>::Type>;
- using ArgsTuple =
- typename detail::RPCFunctionArgsTuple<typename AHTraits::Type>::Type;
- auto Args = std::make_shared<ArgsTuple>();
-
- if (auto Err =
- detail::HandlerTraits<typename Func::Type>::deserializeArgs(
- Channel, *Args))
- return Err;
-
- // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
- // for RPCArgs. Void cast RPCArgs to work around this for now.
- // FIXME: Remove this workaround once we can assume a working GCC version.
- (void)Args;
-
- // End receieve message, unlocking the channel for reading.
- if (auto Err = Channel.endReceiveMessage())
- return Err;
-
- using HTraits = detail::HandlerTraits<HandlerT>;
- using FuncReturn = typename Func::ReturnType;
- auto Responder = [this,
- SeqNo](typename AHTraits::ResultType RetVal) -> Error {
- return detail::respond<FuncReturn>(C, ResponseId, SeqNo,
- std::move(RetVal));
- };
-
- return HTraits::unpackAndRunAsync(Handler, Responder, *Args);
- };
- }
-
- ChannelT &C;
-
- bool LazyAutoNegotiation;
-
- RPCFunctionIdAllocator<FunctionIdT> FnIdAllocator;
-
- FunctionIdT ResponseId;
- std::map<std::string, FunctionIdT> LocalFunctionIds;
- std::map<const char *, FunctionIdT> RemoteFunctionIds;
-
- std::map<FunctionIdT, WrappedHandlerFn> Handlers;
-
- std::mutex ResponsesMutex;
- detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
- std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
- PendingResponses;
-};
-
-} // end namespace detail
-
-template <typename ChannelT, typename FunctionIdT = uint32_t,
- typename SequenceNumberT = uint32_t>
-class MultiThreadedRPCEndpoint
- : public detail::RPCEndpointBase<
- MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT> {
-private:
- using BaseClass = detail::RPCEndpointBase<
- MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT>;
-
-public:
- MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
- : BaseClass(C, LazyAutoNegotiation) {}
-
- /// Add a handler for the given RPC function.
- /// This installs the given handler functor for the given RPCFunction, and
- /// makes the RPC function available for negotiation/calling from the remote.
- template <typename Func, typename HandlerT>
- void addHandler(HandlerT Handler) {
- return this->template addHandlerImpl<Func>(std::move(Handler));
- }
-
- /// Add a class-method as a handler.
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- template <typename Func, typename HandlerT>
- void addAsyncHandler(HandlerT Handler) {
- return this->template addAsyncHandlerImpl<Func>(std::move(Handler));
- }
-
- /// Add a class-method as a handler.
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addAsyncHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- /// Return type for non-blocking call primitives.
- template <typename Func>
- using NonBlockingCallResult = typename detail::ResultTraits<
- typename Func::ReturnType>::ReturnFutureType;
-
- /// Call Func on Channel C. Does not block, does not call send. Returns a pair
- /// of a future result and the sequence number assigned to the result.
- ///
- /// This utility function is primarily used for single-threaded mode support,
- /// where the sequence number can be used to wait for the corresponding
- /// result. In multi-threaded mode the appendCallNB method, which does not
- /// return the sequence numeber, should be preferred.
- template <typename Func, typename... ArgTs>
- Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &...Args) {
- using RTraits = detail::ResultTraits<typename Func::ReturnType>;
- using ErrorReturn = typename RTraits::ErrorReturnType;
- using ErrorReturnPromise = typename RTraits::ReturnPromiseType;
-
- ErrorReturnPromise Promise;
- auto FutureResult = Promise.get_future();
-
- if (auto Err = this->template appendCallAsync<Func>(
- [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable {
- Promise.set_value(std::move(RetOrErr));
- return Error::success();
- },
- Args...)) {
- RTraits::consumeAbandoned(FutureResult.get());
- return std::move(Err);
- }
- return std::move(FutureResult);
- }
-
- /// The same as appendCallNBWithSeq, except that it calls C.send() to
- /// flush the channel after serializing the call.
- template <typename Func, typename... ArgTs>
- Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &...Args) {
- auto Result = appendCallNB<Func>(Args...);
- if (!Result)
- return Result;
- if (auto Err = this->C.send()) {
- this->abandonPendingResponses();
- detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
- std::move(Result->get()));
- return std::move(Err);
- }
- return Result;
- }
-
- /// Call Func on Channel C. Blocks waiting for a result. Returns an Error
- /// for void functions or an Expected<T> for functions returning a T.
- ///
- /// This function is for use in threaded code where another thread is
- /// handling responses and incoming calls.
- template <typename Func, typename... ArgTs,
- typename AltRetT = typename Func::ReturnType>
- typename detail::ResultTraits<AltRetT>::ErrorReturnType
- callB(const ArgTs &...Args) {
- if (auto FutureResOrErr = callNB<Func>(Args...))
- return FutureResOrErr->get();
- else
- return FutureResOrErr.takeError();
- }
-
- /// Handle incoming RPC calls.
- Error handlerLoop() {
- while (true)
- if (auto Err = this->handleOne())
- return Err;
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename FunctionIdT = uint32_t,
- typename SequenceNumberT = uint32_t>
-class SingleThreadedRPCEndpoint
- : public detail::RPCEndpointBase<
- SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT> {
-private:
- using BaseClass = detail::RPCEndpointBase<
- SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT>;
-
-public:
- SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
- : BaseClass(C, LazyAutoNegotiation) {}
-
- template <typename Func, typename HandlerT>
- void addHandler(HandlerT Handler) {
- return this->template addHandlerImpl<Func>(std::move(Handler));
- }
-
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- template <typename Func, typename HandlerT>
- void addAsyncHandler(HandlerT Handler) {
- return this->template addAsyncHandlerImpl<Func>(std::move(Handler));
- }
-
- /// Add a class-method as a handler.
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addAsyncHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- template <typename Func, typename... ArgTs,
- typename AltRetT = typename Func::ReturnType>
- typename detail::ResultTraits<AltRetT>::ErrorReturnType
- callB(const ArgTs &...Args) {
- bool ReceivedResponse = false;
- using AltRetTraits = detail::ResultTraits<AltRetT>;
- using ResultType = typename AltRetTraits::ErrorReturnType;
- ResultType Result = AltRetTraits::createBlankErrorReturnValue();
-
- // We have to 'Check' result (which we know is in a success state at this
- // point) so that it can be overwritten in the async handler.
- (void)!!Result;
-
- if (Error Err = this->template appendCallAsync<Func>(
- [&](ResultType R) {
- Result = std::move(R);
- ReceivedResponse = true;
- return Error::success();
- },
- Args...)) {
- AltRetTraits::consumeAbandoned(std::move(Result));
- return AltRetTraits::returnError(std::move(Err));
- }
-
- if (Error Err = this->C.send()) {
- AltRetTraits::consumeAbandoned(std::move(Result));
- return AltRetTraits::returnError(std::move(Err));
- }
-
- while (!ReceivedResponse) {
- if (Error Err = this->handleOne()) {
- AltRetTraits::consumeAbandoned(std::move(Result));
- return AltRetTraits::returnError(std::move(Err));
- }
- }
-
- return Result;
- }
-};
-
-/// Asynchronous dispatch for a function on an RPC endpoint.
-template <typename RPCClass, typename Func> class RPCAsyncDispatch {
-public:
- RPCAsyncDispatch(RPCClass &Endpoint) : Endpoint(Endpoint) {}
-
- template <typename HandlerT, typename... ArgTs>
- Error operator()(HandlerT Handler, const ArgTs &...Args) const {
- return Endpoint.template appendCallAsync<Func>(std::move(Handler), Args...);
- }
-
-private:
- RPCClass &Endpoint;
-};
-
-/// Construct an asynchronous dispatcher from an RPC endpoint and a Func.
-template <typename Func, typename RPCEndpointT>
-RPCAsyncDispatch<RPCEndpointT, Func> rpcAsyncDispatch(RPCEndpointT &Endpoint) {
- return RPCAsyncDispatch<RPCEndpointT, Func>(Endpoint);
-}
-
-/// Allows a set of asynchrounous calls to be dispatched, and then
-/// waited on as a group.
-class ParallelCallGroup {
-public:
- ParallelCallGroup() = default;
- ParallelCallGroup(const ParallelCallGroup &) = delete;
- ParallelCallGroup &operator=(const ParallelCallGroup &) = delete;
-
- /// Make as asynchronous call.
- template <typename AsyncDispatcher, typename HandlerT, typename... ArgTs>
- Error call(const AsyncDispatcher &AsyncDispatch, HandlerT Handler,
- const ArgTs &...Args) {
- // Increment the count of outstanding calls. This has to happen before
- // we invoke the call, as the handler may (depending on scheduling)
- // be run immediately on another thread, and we don't want the decrement
- // in the wrapped handler below to run before the increment.
- {
- std::unique_lock<std::mutex> Lock(M);
- ++NumOutstandingCalls;
- }
-
- // Wrap the user handler in a lambda that will decrement the
- // outstanding calls count, then poke the condition variable.
- using ArgType = typename detail::ResponseHandlerArg<
- typename detail::HandlerTraits<HandlerT>::Type>::ArgType;
- auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) {
- auto Err = Handler(std::move(Arg));
- std::unique_lock<std::mutex> Lock(M);
- --NumOutstandingCalls;
- CV.notify_all();
- return Err;
- };
-
- return AsyncDispatch(std::move(WrappedHandler), Args...);
- }
-
- /// Blocks until all calls have been completed and their return value
- /// handlers run.
- void wait() {
- std::unique_lock<std::mutex> Lock(M);
- while (NumOutstandingCalls > 0)
- CV.wait(Lock);
- }
-
-private:
- std::mutex M;
- std::condition_variable CV;
- uint32_t NumOutstandingCalls = 0;
-};
-
-/// Convenience class for grouping RPCFunctions into APIs that can be
-/// negotiated as a block.
-///
-template <typename... Funcs> class APICalls {
-public:
- /// Test whether this API contains Function F.
- template <typename F> class Contains {
- public:
- static const bool value = false;
- };
-
- /// Negotiate all functions in this API.
- template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
- return Error::success();
- }
-};
-
-template <typename Func, typename... Funcs> class APICalls<Func, Funcs...> {
-public:
- template <typename F> class Contains {
- public:
- static const bool value = std::is_same<F, Func>::value |
- APICalls<Funcs...>::template Contains<F>::value;
- };
-
- template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
- if (auto Err = R.template negotiateFunction<Func>())
- return Err;
- return APICalls<Funcs...>::negotiate(R);
- }
-};
-
-template <typename... InnerFuncs, typename... Funcs>
-class APICalls<APICalls<InnerFuncs...>, Funcs...> {
-public:
- template <typename F> class Contains {
- public:
- static const bool value =
- APICalls<InnerFuncs...>::template Contains<F>::value |
- APICalls<Funcs...>::template Contains<F>::value;
- };
-
- template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
- if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
- return Err;
- return APICalls<Funcs...>::negotiate(R);
- }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
deleted file mode 100644
index 2ee471939251..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
+++ /dev/null
@@ -1,183 +0,0 @@
-//===- RawByteChannel.h -----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <mutex>
-#include <string>
-#include <type_traits>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Interface for byte-streams to be used with ORC Serialization.
-class RawByteChannel {
-public:
- virtual ~RawByteChannel() = default;
-
- /// Read Size bytes from the stream into *Dst.
- virtual Error readBytes(char *Dst, unsigned Size) = 0;
-
- /// Read size bytes from *Src and append them to the stream.
- virtual Error appendBytes(const char *Src, unsigned Size) = 0;
-
- /// Flush the stream if possible.
- virtual Error send() = 0;
-
- /// Notify the channel that we're starting a message send.
- /// Locks the channel for writing.
- template <typename FunctionIdT, typename SequenceIdT>
- Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
- writeLock.lock();
- if (auto Err = serializeSeq(*this, FnId, SeqNo)) {
- writeLock.unlock();
- return Err;
- }
- return Error::success();
- }
-
- /// Notify the channel that we're ending a message send.
- /// Unlocks the channel for writing.
- Error endSendMessage() {
- writeLock.unlock();
- return Error::success();
- }
-
- /// Notify the channel that we're starting a message receive.
- /// Locks the channel for reading.
- template <typename FunctionIdT, typename SequenceNumberT>
- Error startReceiveMessage(FunctionIdT &FnId, SequenceNumberT &SeqNo) {
- readLock.lock();
- if (auto Err = deserializeSeq(*this, FnId, SeqNo)) {
- readLock.unlock();
- return Err;
- }
- return Error::success();
- }
-
- /// Notify the channel that we're ending a message receive.
- /// Unlocks the channel for reading.
- Error endReceiveMessage() {
- readLock.unlock();
- return Error::success();
- }
-
- /// Get the lock for stream reading.
- std::mutex &getReadLock() { return readLock; }
-
- /// Get the lock for stream writing.
- std::mutex &getWriteLock() { return writeLock; }
-
-private:
- std::mutex readLock, writeLock;
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<
- ChannelT, T, T,
- std::enable_if_t<
- std::is_base_of<RawByteChannel, ChannelT>::value &&
- (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
- std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
- std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
- std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
- std::is_same<T, char>::value)>> {
-public:
- static Error serialize(ChannelT &C, T V) {
- support::endian::byte_swap<T, support::big>(V);
- return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
- };
-
- static Error deserialize(ChannelT &C, T &V) {
- if (auto Err = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
- return Err;
- support::endian::byte_swap<T, support::big>(V);
- return Error::success();
- };
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, bool, bool,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, bool V) {
- uint8_t Tmp = V ? 1 : 0;
- if (auto Err = C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1))
- return Err;
- return Error::success();
- }
-
- static Error deserialize(ChannelT &C, bool &V) {
- uint8_t Tmp = 0;
- if (auto Err = C.readBytes(reinterpret_cast<char *>(&Tmp), 1))
- return Err;
- V = Tmp != 0;
- return Error::success();
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, std::string, StringRef,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- /// Serialization channel serialization for std::strings.
- static Error serialize(RawByteChannel &C, StringRef S) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
- return Err;
- return C.appendBytes((const char *)S.data(), S.size());
- }
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<
- ChannelT, std::string, T,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value &&
- (std::is_same<T, const char *>::value ||
- std::is_same<T, char *>::value)>> {
-public:
- static Error serialize(RawByteChannel &C, const char *S) {
- return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
- S);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, std::string, std::string,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- /// Serialization channel serialization for std::strings.
- static Error serialize(RawByteChannel &C, const std::string &S) {
- return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
- S);
- }
-
- /// Serialization channel deserialization for std::strings.
- static Error deserialize(RawByteChannel &C, std::string &S) {
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
- S.resize(Count);
- return C.readBytes(&S[0], Count);
- }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
deleted file mode 100644
index 0ea483ba2abb..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
+++ /dev/null
@@ -1,769 +0,0 @@
-//===- Serialization.h ------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/Support/thread.h"
-#include <map>
-#include <mutex>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-template <typename T> class SerializationTypeName;
-
-/// TypeNameSequence is a utility for rendering sequences of types to a string
-/// by rendering each type, separated by ", ".
-template <typename... ArgTs> class SerializationTypeNameSequence {};
-
-/// Render an empty TypeNameSequence to an ostream.
-template <typename OStream>
-OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<> &V) {
- return OS;
-}
-
-/// Render a TypeNameSequence of a single type to an ostream.
-template <typename OStream, typename ArgT>
-OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<ArgT> &V) {
- OS << SerializationTypeName<ArgT>::getName();
- return OS;
-}
-
-/// Render a TypeNameSequence of more than one type to an ostream.
-template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs>
-OStream &
-operator<<(OStream &OS,
- const SerializationTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) {
- OS << SerializationTypeName<ArgT1>::getName() << ", "
- << SerializationTypeNameSequence<ArgT2, ArgTs...>();
- return OS;
-}
-
-template <> class SerializationTypeName<void> {
-public:
- static const char *getName() { return "void"; }
-};
-
-template <> class SerializationTypeName<int8_t> {
-public:
- static const char *getName() { return "int8_t"; }
-};
-
-template <> class SerializationTypeName<uint8_t> {
-public:
- static const char *getName() { return "uint8_t"; }
-};
-
-template <> class SerializationTypeName<int16_t> {
-public:
- static const char *getName() { return "int16_t"; }
-};
-
-template <> class SerializationTypeName<uint16_t> {
-public:
- static const char *getName() { return "uint16_t"; }
-};
-
-template <> class SerializationTypeName<int32_t> {
-public:
- static const char *getName() { return "int32_t"; }
-};
-
-template <> class SerializationTypeName<uint32_t> {
-public:
- static const char *getName() { return "uint32_t"; }
-};
-
-template <> class SerializationTypeName<int64_t> {
-public:
- static const char *getName() { return "int64_t"; }
-};
-
-template <> class SerializationTypeName<uint64_t> {
-public:
- static const char *getName() { return "uint64_t"; }
-};
-
-template <> class SerializationTypeName<bool> {
-public:
- static const char *getName() { return "bool"; }
-};
-
-template <> class SerializationTypeName<std::string> {
-public:
- static const char *getName() { return "std::string"; }
-};
-
-template <> class SerializationTypeName<Error> {
-public:
- static const char *getName() { return "Error"; }
-};
-
-template <typename T> class SerializationTypeName<Expected<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "Expected<" << SerializationTypeNameSequence<T>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T1, typename T2>
-class SerializationTypeName<std::pair<T1, T2>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::pair<" << SerializationTypeNameSequence<T1, T2>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename... ArgTs> class SerializationTypeName<std::tuple<ArgTs...>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::tuple<" << SerializationTypeNameSequence<ArgTs...>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T> class SerializationTypeName<Optional<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "Optional<" << SerializationTypeName<T>::getName() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T> class SerializationTypeName<std::vector<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::vector<" << SerializationTypeName<T>::getName() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T> class SerializationTypeName<std::set<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::set<" << SerializationTypeName<T>::getName() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename K, typename V> class SerializationTypeName<std::map<K, V>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::map<" << SerializationTypeNameSequence<K, V>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-/// The SerializationTraits<ChannelT, T> class describes how to serialize and
-/// deserialize an instance of type T to/from an abstract channel of type
-/// ChannelT. It also provides a representation of the type's name via the
-/// getName method.
-///
-/// Specializations of this class should provide the following functions:
-///
-/// @code{.cpp}
-///
-/// static const char* getName();
-/// static Error serialize(ChannelT&, const T&);
-/// static Error deserialize(ChannelT&, T&);
-///
-/// @endcode
-///
-/// The third argument of SerializationTraits is intended to support SFINAE.
-/// E.g.:
-///
-/// @code{.cpp}
-///
-/// class MyVirtualChannel { ... };
-///
-/// template <DerivedChannelT>
-/// class SerializationTraits<DerivedChannelT, bool,
-/// std::enable_if_t<
-/// std::is_base_of<VirtChannel, DerivedChannel>::value
-/// >> {
-/// public:
-/// static const char* getName() { ... };
-/// }
-///
-/// @endcode
-template <typename ChannelT, typename WireType,
- typename ConcreteType = WireType, typename = void>
-class SerializationTraits;
-
-template <typename ChannelT> class SequenceTraits {
-public:
- static Error emitSeparator(ChannelT &C) { return Error::success(); }
- static Error consumeSeparator(ChannelT &C) { return Error::success(); }
-};
-
-/// Utility class for serializing sequences of values of varying types.
-/// Specializations of this class contain 'serialize' and 'deserialize' methods
-/// for the given channel. The ArgTs... list will determine the "over-the-wire"
-/// types to be serialized. The serialize and deserialize methods take a list
-/// CArgTs... ("caller arg types") which must be the same length as ArgTs...,
-/// but may be different types from ArgTs, provided that for each CArgT there
-/// is a SerializationTraits specialization
-/// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the
-/// caller argument to over-the-wire value.
-template <typename ChannelT, typename... ArgTs> class SequenceSerialization;
-
-template <typename ChannelT> class SequenceSerialization<ChannelT> {
-public:
- static Error serialize(ChannelT &C) { return Error::success(); }
- static Error deserialize(ChannelT &C) { return Error::success(); }
-};
-
-template <typename ChannelT, typename ArgT>
-class SequenceSerialization<ChannelT, ArgT> {
-public:
- template <typename CArgT> static Error serialize(ChannelT &C, CArgT &&CArg) {
- return SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
- C, std::forward<CArgT>(CArg));
- }
-
- template <typename CArgT> static Error deserialize(ChannelT &C, CArgT &CArg) {
- return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg);
- }
-};
-
-template <typename ChannelT, typename ArgT, typename... ArgTs>
-class SequenceSerialization<ChannelT, ArgT, ArgTs...> {
-public:
- template <typename CArgT, typename... CArgTs>
- static Error serialize(ChannelT &C, CArgT &&CArg, CArgTs &&...CArgs) {
- if (auto Err =
- SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
- C, std::forward<CArgT>(CArg)))
- return Err;
- if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C))
- return Err;
- return SequenceSerialization<ChannelT, ArgTs...>::serialize(
- C, std::forward<CArgTs>(CArgs)...);
- }
-
- template <typename CArgT, typename... CArgTs>
- static Error deserialize(ChannelT &C, CArgT &CArg, CArgTs &...CArgs) {
- if (auto Err =
- SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg))
- return Err;
- if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C))
- return Err;
- return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, CArgs...);
- }
-};
-
-template <typename ChannelT, typename... ArgTs>
-Error serializeSeq(ChannelT &C, ArgTs &&...Args) {
- return SequenceSerialization<ChannelT, std::decay_t<ArgTs>...>::serialize(
- C, std::forward<ArgTs>(Args)...);
-}
-
-template <typename ChannelT, typename... ArgTs>
-Error deserializeSeq(ChannelT &C, ArgTs &...Args) {
- return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...);
-}
-
-template <typename ChannelT> class SerializationTraits<ChannelT, Error> {
-public:
- using WrappedErrorSerializer =
- std::function<Error(ChannelT &C, const ErrorInfoBase &)>;
-
- using WrappedErrorDeserializer =
- std::function<Error(ChannelT &C, Error &Err)>;
-
- template <typename ErrorInfoT, typename SerializeFtor,
- typename DeserializeFtor>
- static void registerErrorType(std::string Name, SerializeFtor Serialize,
- DeserializeFtor Deserialize) {
- assert(!Name.empty() &&
- "The empty string is reserved for the Success value");
-
- const std::string *KeyName = nullptr;
- {
- // We're abusing the stability of std::map here: We take a reference to
- // the key of the deserializers map to save us from duplicating the string
- // in the serializer. This should be changed to use a stringpool if we
- // switch to a map type that may move keys in memory.
- std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
- auto I = Deserializers.insert(
- Deserializers.begin(),
- std::make_pair(std::move(Name), std::move(Deserialize)));
- KeyName = &I->first;
- }
-
- {
- assert(KeyName != nullptr && "No keyname pointer");
- std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
- Serializers[ErrorInfoT::classID()] =
- [KeyName, Serialize = std::move(Serialize)](
- ChannelT &C, const ErrorInfoBase &EIB) -> Error {
- assert(EIB.dynamicClassID() == ErrorInfoT::classID() &&
- "Serializer called for wrong error type");
- if (auto Err = serializeSeq(C, *KeyName))
- return Err;
- return Serialize(C, static_cast<const ErrorInfoT &>(EIB));
- };
- }
- }
-
- static Error serialize(ChannelT &C, Error &&Err) {
- std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
-
- if (!Err)
- return serializeSeq(C, std::string());
-
- return handleErrors(std::move(Err), [&C](const ErrorInfoBase &EIB) {
- auto SI = Serializers.find(EIB.dynamicClassID());
- if (SI == Serializers.end())
- return serializeAsStringError(C, EIB);
- return (SI->second)(C, EIB);
- });
- }
-
- static Error deserialize(ChannelT &C, Error &Err) {
- std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
-
- std::string Key;
- if (auto Err = deserializeSeq(C, Key))
- return Err;
-
- if (Key.empty()) {
- ErrorAsOutParameter EAO(&Err);
- Err = Error::success();
- return Error::success();
- }
-
- auto DI = Deserializers.find(Key);
- assert(DI != Deserializers.end() && "No deserializer for error type");
- return (DI->second)(C, Err);
- }
-
-private:
- static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) {
- std::string ErrMsg;
- {
- raw_string_ostream ErrMsgStream(ErrMsg);
- EIB.log(ErrMsgStream);
- }
- return serialize(C, make_error<StringError>(std::move(ErrMsg),
- inconvertibleErrorCode()));
- }
-
- static std::recursive_mutex SerializersMutex;
- static std::recursive_mutex DeserializersMutex;
- static std::map<const void *, WrappedErrorSerializer> Serializers;
- static std::map<std::string, WrappedErrorDeserializer> Deserializers;
-};
-
-template <typename ChannelT>
-std::recursive_mutex SerializationTraits<ChannelT, Error>::SerializersMutex;
-
-template <typename ChannelT>
-std::recursive_mutex SerializationTraits<ChannelT, Error>::DeserializersMutex;
-
-template <typename ChannelT>
-std::map<const void *,
- typename SerializationTraits<ChannelT, Error>::WrappedErrorSerializer>
- SerializationTraits<ChannelT, Error>::Serializers;
-
-template <typename ChannelT>
-std::map<std::string, typename SerializationTraits<
- ChannelT, Error>::WrappedErrorDeserializer>
- SerializationTraits<ChannelT, Error>::Deserializers;
-
-/// Registers a serializer and deserializer for the given error type on the
-/// given channel type.
-template <typename ChannelT, typename ErrorInfoT, typename SerializeFtor,
- typename DeserializeFtor>
-void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize,
- DeserializeFtor &&Deserialize) {
- SerializationTraits<ChannelT, Error>::template registerErrorType<ErrorInfoT>(
- std::move(Name), std::forward<SerializeFtor>(Serialize),
- std::forward<DeserializeFtor>(Deserialize));
-}
-
-/// Registers serialization/deserialization for StringError.
-template <typename ChannelT> void registerStringError() {
- static bool AlreadyRegistered = false;
- if (!AlreadyRegistered) {
- registerErrorSerialization<ChannelT, StringError>(
- "StringError",
- [](ChannelT &C, const StringError &SE) {
- return serializeSeq(C, SE.getMessage());
- },
- [](ChannelT &C, Error &Err) -> Error {
- ErrorAsOutParameter EAO(&Err);
- std::string Msg;
- if (auto E2 = deserializeSeq(C, Msg))
- return E2;
- Err = make_error<StringError>(
- std::move(Msg),
- orcError(OrcErrorCode::UnknownErrorCodeFromRemote));
- return Error::success();
- });
- AlreadyRegistered = true;
- }
-}
-
-/// SerializationTraits for Expected<T1> from an Expected<T2>.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, Expected<T1>, Expected<T2>> {
-public:
- static Error serialize(ChannelT &C, Expected<T2> &&ValOrErr) {
- if (ValOrErr) {
- if (auto Err = serializeSeq(C, true))
- return Err;
- return SerializationTraits<ChannelT, T1, T2>::serialize(C, *ValOrErr);
- }
- if (auto Err = serializeSeq(C, false))
- return Err;
- return serializeSeq(C, ValOrErr.takeError());
- }
-
- static Error deserialize(ChannelT &C, Expected<T2> &ValOrErr) {
- ExpectedAsOutParameter<T2> EAO(&ValOrErr);
- bool HasValue;
- if (auto Err = deserializeSeq(C, HasValue))
- return Err;
- if (HasValue)
- return SerializationTraits<ChannelT, T1, T2>::deserialize(C, *ValOrErr);
- Error Err = Error::success();
- if (auto E2 = deserializeSeq(C, Err))
- return E2;
- ValOrErr = std::move(Err);
- return Error::success();
- }
-};
-
-/// SerializationTraits for Expected<T1> from a T2.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, Expected<T1>, T2> {
-public:
- static Error serialize(ChannelT &C, T2 &&Val) {
- return serializeSeq(C, Expected<T2>(std::forward<T2>(Val)));
- }
-};
-
-/// SerializationTraits for Expected<T1> from an Error.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, Expected<T>, Error> {
-public:
- static Error serialize(ChannelT &C, Error &&Err) {
- return serializeSeq(C, Expected<T>(std::move(Err)));
- }
-};
-
-/// SerializationTraits default specialization for std::pair.
-template <typename ChannelT, typename T1, typename T2, typename T3, typename T4>
-class SerializationTraits<ChannelT, std::pair<T1, T2>, std::pair<T3, T4>> {
-public:
- static Error serialize(ChannelT &C, const std::pair<T3, T4> &V) {
- if (auto Err = SerializationTraits<ChannelT, T1, T3>::serialize(C, V.first))
- return Err;
- return SerializationTraits<ChannelT, T2, T4>::serialize(C, V.second);
- }
-
- static Error deserialize(ChannelT &C, std::pair<T3, T4> &V) {
- if (auto Err =
- SerializationTraits<ChannelT, T1, T3>::deserialize(C, V.first))
- return Err;
- return SerializationTraits<ChannelT, T2, T4>::deserialize(C, V.second);
- }
-};
-
-/// SerializationTraits default specialization for std::tuple.
-template <typename ChannelT, typename... ArgTs>
-class SerializationTraits<ChannelT, std::tuple<ArgTs...>> {
-public:
- /// RPC channel serialization for std::tuple.
- static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) {
- return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
- }
-
- /// RPC channel deserialization for std::tuple.
- static Error deserialize(ChannelT &C, std::tuple<ArgTs...> &V) {
- return deserializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
- }
-
-private:
- // Serialization helper for std::tuple.
- template <size_t... Is>
- static Error serializeTupleHelper(ChannelT &C, const std::tuple<ArgTs...> &V,
- std::index_sequence<Is...> _) {
- return serializeSeq(C, std::get<Is>(V)...);
- }
-
- // Serialization helper for std::tuple.
- template <size_t... Is>
- static Error deserializeTupleHelper(ChannelT &C, std::tuple<ArgTs...> &V,
- std::index_sequence<Is...> _) {
- return deserializeSeq(C, std::get<Is>(V)...);
- }
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, Optional<T>> {
-public:
- /// Serialize an Optional<T>.
- static Error serialize(ChannelT &C, const Optional<T> &O) {
- if (auto Err = serializeSeq(C, O != None))
- return Err;
- if (O)
- if (auto Err = serializeSeq(C, *O))
- return Err;
- return Error::success();
- }
-
- /// Deserialize an Optional<T>.
- static Error deserialize(ChannelT &C, Optional<T> &O) {
- bool HasValue = false;
- if (auto Err = deserializeSeq(C, HasValue))
- return Err;
- if (HasValue)
- if (auto Err = deserializeSeq(C, *O))
- return Err;
- return Error::success();
- };
-};
-
-/// SerializationTraits default specialization for std::vector.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, std::vector<T>> {
-public:
- /// Serialize a std::vector<T> from std::vector<T>.
- static Error serialize(ChannelT &C, const std::vector<T> &V) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
- return Err;
-
- for (const auto &E : V)
- if (auto Err = serializeSeq(C, E))
- return Err;
-
- return Error::success();
- }
-
- /// Deserialize a std::vector<T> to a std::vector<T>.
- static Error deserialize(ChannelT &C, std::vector<T> &V) {
- assert(V.empty() &&
- "Expected default-constructed vector to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- V.resize(Count);
- for (auto &E : V)
- if (auto Err = deserializeSeq(C, E))
- return Err;
-
- return Error::success();
- }
-};
-
-/// Enable vector serialization from an ArrayRef.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, std::vector<T>, ArrayRef<T>> {
-public:
- static Error serialize(ChannelT &C, ArrayRef<T> V) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
- return Err;
-
- for (const auto &E : V)
- if (auto Err = serializeSeq(C, E))
- return Err;
-
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename T, typename T2>
-class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> {
-public:
- /// Serialize a std::set<T> from std::set<T2>.
- static Error serialize(ChannelT &C, const std::set<T2> &S) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
- return Err;
-
- for (const auto &E : S)
- if (auto Err = SerializationTraits<ChannelT, T, T2>::serialize(C, E))
- return Err;
-
- return Error::success();
- }
-
- /// Deserialize a std::set<T> to a std::set<T>.
- static Error deserialize(ChannelT &C, std::set<T2> &S) {
- assert(S.empty() && "Expected default-constructed set to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- while (Count-- != 0) {
- T2 Val;
- if (auto Err = SerializationTraits<ChannelT, T, T2>::deserialize(C, Val))
- return Err;
-
- auto Added = S.insert(Val).second;
- if (!Added)
- return make_error<StringError>("Duplicate element in deserialized set",
- orcError(OrcErrorCode::UnknownORCError));
- }
-
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename K, typename V, typename K2, typename V2>
-class SerializationTraits<ChannelT, std::map<K, V>, std::map<K2, V2>> {
-public:
- /// Serialize a std::map<K, V> from std::map<K2, V2>.
- static Error serialize(ChannelT &C, const std::map<K2, V2> &M) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
- return Err;
-
- for (const auto &E : M) {
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
- return Err;
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
- return Err;
- }
-
- return Error::success();
- }
-
- /// Deserialize a std::map<K, V> to a std::map<K, V>.
- static Error deserialize(ChannelT &C, std::map<K2, V2> &M) {
- assert(M.empty() && "Expected default-constructed map to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- while (Count-- != 0) {
- std::pair<K2, V2> Val;
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
- return Err;
-
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
- return Err;
-
- auto Added = M.insert(Val).second;
- if (!Added)
- return make_error<StringError>("Duplicate element in deserialized map",
- orcError(OrcErrorCode::UnknownORCError));
- }
-
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename K, typename V, typename K2, typename V2>
-class SerializationTraits<ChannelT, std::map<K, V>, DenseMap<K2, V2>> {
-public:
- /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
- static Error serialize(ChannelT &C, const DenseMap<K2, V2> &M) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
- return Err;
-
- for (auto &E : M) {
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
- return Err;
-
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
- return Err;
- }
-
- return Error::success();
- }
-
- /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
- static Error deserialize(ChannelT &C, DenseMap<K2, V2> &M) {
- assert(M.empty() && "Expected default-constructed map to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- while (Count-- != 0) {
- std::pair<K2, V2> Val;
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
- return Err;
-
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
- return Err;
-
- auto Added = M.insert(Val).second;
- if (!Added)
- return make_error<StringError>("Duplicate element in deserialized map",
- orcError(OrcErrorCode::UnknownORCError));
- }
-
- return Error::success();
- }
-};
-
-} // namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
index 854f1098d5af..9ac13a493e9d 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
@@ -33,10 +33,12 @@
#define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEPACKEDSERIALIZATION_H
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SwapByteOrder.h"
+#include <limits>
#include <string>
#include <tuple>
#include <type_traits>
@@ -193,13 +195,6 @@ template <typename SPSElementTagT> class SPSSequence;
/// SPS tag type for strings, which are equivalent to sequences of chars.
using SPSString = SPSSequence<char>;
-/// SPS tag type for executor addresseses.
-class SPSExecutorAddress {};
-
-template <>
-class SPSSerializationTraits<SPSExecutorAddress, uint64_t>
- : public SPSSerializationTraits<uint64_t, uint64_t> {};
-
/// SPS tag type for maps.
///
/// SPS maps are just sequences of (Key, Value) tuples.
@@ -289,6 +284,40 @@ public:
}
};
+/// Trivial ArrayRef<T> -> SPSSequence<SPSElementTagT> serialization.
+template <typename SPSElementTagT, typename T>
+class TrivialSPSSequenceSerialization<SPSElementTagT, ArrayRef<T>> {
+public:
+ static constexpr bool available = true;
+};
+
+/// Specialized SPSSequence<char> -> ArrayRef<char> serialization.
+///
+/// On deserialize, points directly into the input buffer.
+template <> class SPSSerializationTraits<SPSSequence<char>, ArrayRef<char>> {
+public:
+ static size_t size(const ArrayRef<char> &A) {
+ return SPSArgList<uint64_t>::size(static_cast<uint64_t>(A.size())) +
+ A.size();
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const ArrayRef<char> &A) {
+ if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(A.size())))
+ return false;
+ return OB.write(A.data(), A.size());
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, ArrayRef<char> &A) {
+ uint64_t Size;
+ if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+ return false;
+ if (Size > std::numeric_limits<size_t>::max())
+ return false;
+ A = {IB.data(), static_cast<size_t>(Size)};
+ return IB.skip(Size);
+ }
+};
+
/// 'Trivial' sequence serialization: Sequence is serialized as a uint64_t size
/// followed by a for-earch loop over the elements of the sequence to serialize
/// each of them.
@@ -330,6 +359,44 @@ public:
}
};
+/// SPSTuple serialization for std::tuple.
+template <typename... SPSTagTs, typename... Ts>
+class SPSSerializationTraits<SPSTuple<SPSTagTs...>, std::tuple<Ts...>> {
+private:
+ using TupleArgList = typename SPSTuple<SPSTagTs...>::AsArgList;
+ using ArgIndices = std::make_index_sequence<sizeof...(Ts)>;
+
+ template <std::size_t... I>
+ static size_t size(const std::tuple<Ts...> &T, std::index_sequence<I...>) {
+ return TupleArgList::size(std::get<I>(T)...);
+ }
+
+ template <std::size_t... I>
+ static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T,
+ std::index_sequence<I...>) {
+ return TupleArgList::serialize(OB, std::get<I>(T)...);
+ }
+
+ template <std::size_t... I>
+ static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T,
+ std::index_sequence<I...>) {
+ return TupleArgList::deserialize(IB, std::get<I>(T)...);
+ }
+
+public:
+ static size_t size(const std::tuple<Ts...> &T) {
+ return size(T, ArgIndices{});
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T) {
+ return serialize(OB, T, ArgIndices{});
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T) {
+ return deserialize(IB, T, ArgIndices{});
+ }
+};
+
/// SPSTuple serialization for std::pair.
template <typename SPSTagT1, typename SPSTagT2, typename T1, typename T2>
class SPSSerializationTraits<SPSTuple<SPSTagT1, SPSTagT2>, std::pair<T1, T2>> {
@@ -380,6 +447,49 @@ public:
}
};
+/// Serialization for StringMap<ValueT>s.
+template <typename SPSValueT, typename ValueT>
+class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>,
+ StringMap<ValueT>> {
+public:
+ static size_t size(const StringMap<ValueT> &M) {
+ size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size()));
+ for (auto &E : M)
+ Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second);
+ return Sz;
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) {
+ if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size())))
+ return false;
+
+ for (auto &E : M)
+ if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second))
+ return false;
+
+ return true;
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) {
+ uint64_t Size;
+ assert(M.empty() && "M already contains elements");
+
+ if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+ return false;
+
+ while (Size--) {
+ StringRef S;
+ ValueT V;
+ if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V))
+ return false;
+ if (!M.insert(std::make_pair(S, V)).second)
+ return false;
+ }
+
+ return true;
+ }
+};
+
/// SPS tag type for errors.
class SPSError;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
new file mode 100644
index 000000000000..9e074ed1f931
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
@@ -0,0 +1,235 @@
+//===--- SimpleRemoteEPCUtils.h - Utils for Simple Remote EPC ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Message definitions and other utilities for SimpleRemoteEPC and
+// SimpleRemoteEPCServer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+#include "llvm/Support/Error.h"
+
+#include <atomic>
+#include <mutex>
+#include <string>
+#include <thread>
+
+namespace llvm {
+namespace orc {
+
+namespace SimpleRemoteEPCDefaultBootstrapSymbolNames {
+extern const char *ExecutorSessionObjectName;
+extern const char *DispatchFnName;
+} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames
+
+enum class SimpleRemoteEPCOpcode : uint8_t {
+ Setup,
+ Hangup,
+ Result,
+ CallWrapper,
+ LastOpC = CallWrapper
+};
+
+struct SimpleRemoteEPCExecutorInfo {
+ std::string TargetTriple;
+ uint64_t PageSize;
+ StringMap<ExecutorAddr> BootstrapSymbols;
+};
+
+using SimpleRemoteEPCArgBytesVector = SmallVector<char, 128>;
+
+class SimpleRemoteEPCTransportClient {
+public:
+ enum HandleMessageAction { ContinueSession, EndSession };
+
+ virtual ~SimpleRemoteEPCTransportClient();
+
+ /// Handle receipt of a message.
+ ///
+ /// Returns an Error if the message cannot be handled, 'EndSession' if the
+ /// client will not accept any further messages, and 'ContinueSession'
+ /// otherwise.
+ virtual Expected<HandleMessageAction>
+ handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) = 0;
+
+ /// Handle a disconnection from the underlying transport. No further messages
+ /// should be sent to handleMessage after this is called.
+ /// Err may contain an Error value indicating unexpected disconnection. This
+ /// allows clients to log such errors, but no attempt should be made at
+ /// recovery (which should be handled inside the transport class, if it is
+ /// supported at all).
+ virtual void handleDisconnect(Error Err) = 0;
+};
+
+class SimpleRemoteEPCTransport {
+public:
+ virtual ~SimpleRemoteEPCTransport();
+
+ /// Called during setup of the client to indicate that the client is ready
+ /// to receive messages.
+ ///
+ /// Transport objects should not access the client until this method is
+ /// called.
+ virtual Error start() = 0;
+
+ /// Send a SimpleRemoteEPC message.
+ ///
+ /// This function may be called concurrently. Subclasses should implement
+ /// locking if required for the underlying transport.
+ virtual Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) = 0;
+
+ /// Trigger disconnection from the transport. The implementation should
+ /// respond by calling handleDisconnect on the client once disconnection
+ /// is complete. May be called more than once and from different threads.
+ virtual void disconnect() = 0;
+};
+
+/// Uses read/write on FileDescriptors for transport.
+class FDSimpleRemoteEPCTransport : public SimpleRemoteEPCTransport {
+public:
+ /// Create a FDSimpleRemoteEPCTransport using the given FDs for
+ /// reading (InFD) and writing (OutFD).
+ static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+ Create(SimpleRemoteEPCTransportClient &C, int InFD, int OutFD);
+
+ /// Create a FDSimpleRemoteEPCTransport using the given FD for both
+ /// reading and writing.
+ static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+ Create(SimpleRemoteEPCTransportClient &C, int FD) {
+ return Create(C, FD, FD);
+ }
+
+ ~FDSimpleRemoteEPCTransport() override;
+
+ Error start() override;
+
+ Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) override;
+
+ void disconnect() override;
+
+private:
+ FDSimpleRemoteEPCTransport(SimpleRemoteEPCTransportClient &C, int InFD,
+ int OutFD)
+ : C(C), InFD(InFD), OutFD(OutFD) {}
+
+ Error readBytes(char *Dst, size_t Size, bool *IsEOF = nullptr);
+ int writeBytes(const char *Src, size_t Size);
+ void listenLoop();
+
+ std::mutex M;
+ SimpleRemoteEPCTransportClient &C;
+ std::thread ListenerThread;
+ int InFD, OutFD;
+ std::atomic<bool> Disconnected{false};
+};
+
+struct RemoteSymbolLookupSetElement {
+ std::string Name;
+ bool Required;
+};
+
+using RemoteSymbolLookupSet = std::vector<RemoteSymbolLookupSetElement>;
+
+struct RemoteSymbolLookup {
+ uint64_t H;
+ RemoteSymbolLookupSet Symbols;
+};
+
+namespace shared {
+
+using SPSRemoteSymbolLookupSetElement = SPSTuple<SPSString, bool>;
+
+using SPSRemoteSymbolLookupSet = SPSSequence<SPSRemoteSymbolLookupSetElement>;
+
+using SPSRemoteSymbolLookup = SPSTuple<uint64_t, SPSRemoteSymbolLookupSet>;
+
+/// Tuple containing target triple, page size, and bootstrap symbols.
+using SPSSimpleRemoteEPCExecutorInfo =
+ SPSTuple<SPSString, uint64_t,
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddr>>>;
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookupSetElement,
+ RemoteSymbolLookupSetElement> {
+public:
+ static size_t size(const RemoteSymbolLookupSetElement &V) {
+ return SPSArgList<SPSString, bool>::size(V.Name, V.Required);
+ }
+
+ static size_t serialize(SPSOutputBuffer &OB,
+ const RemoteSymbolLookupSetElement &V) {
+ return SPSArgList<SPSString, bool>::serialize(OB, V.Name, V.Required);
+ }
+
+ static size_t deserialize(SPSInputBuffer &IB,
+ RemoteSymbolLookupSetElement &V) {
+ return SPSArgList<SPSString, bool>::deserialize(IB, V.Name, V.Required);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookup, RemoteSymbolLookup> {
+public:
+ static size_t size(const RemoteSymbolLookup &V) {
+ return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::size(V.H, V.Symbols);
+ }
+
+ static size_t serialize(SPSOutputBuffer &OB, const RemoteSymbolLookup &V) {
+ return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::serialize(OB, V.H,
+ V.Symbols);
+ }
+
+ static size_t deserialize(SPSInputBuffer &IB, RemoteSymbolLookup &V) {
+ return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::deserialize(
+ IB, V.H, V.Symbols);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSSimpleRemoteEPCExecutorInfo,
+ SimpleRemoteEPCExecutorInfo> {
+public:
+ static size_t size(const SimpleRemoteEPCExecutorInfo &SI) {
+ return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::size(
+ SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const SimpleRemoteEPCExecutorInfo &SI) {
+ return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::serialize(
+ OB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, SimpleRemoteEPCExecutorInfo &SI) {
+ return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::deserialize(
+ IB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ }
+};
+
+using SPSLoadDylibSignature = SPSExpected<SPSExecutorAddr>(SPSExecutorAddr,
+ SPSString, uint64_t);
+
+using SPSLookupSymbolsSignature =
+ SPSExpected<SPSSequence<SPSSequence<SPSExecutorAddr>>>(
+ SPSExecutorAddr, SPSSequence<SPSRemoteSymbolLookup>);
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
index a44bcd4c8064..0e8b7e7d345a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
@@ -17,6 +17,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/Support/Memory.h"
#include <vector>
@@ -24,12 +28,108 @@ namespace llvm {
namespace orc {
namespace tpctypes {
+enum WireProtectionFlags : uint8_t {
+ WPF_None = 0,
+ WPF_Read = 1U << 0,
+ WPF_Write = 1U << 1,
+ WPF_Exec = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
+};
+
+/// Convert from sys::Memory::ProtectionFlags
+inline WireProtectionFlags
+toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
+ WireProtectionFlags WPF = WPF_None;
+ if (PF & sys::Memory::MF_READ)
+ WPF |= WPF_Read;
+ if (PF & sys::Memory::MF_WRITE)
+ WPF |= WPF_Write;
+ if (PF & sys::Memory::MF_EXEC)
+ WPF |= WPF_Exec;
+ return WPF;
+}
+
+inline sys::Memory::ProtectionFlags
+fromWireProtectionFlags(WireProtectionFlags WPF) {
+ int PF = 0;
+ if (WPF & WPF_Read)
+ PF |= sys::Memory::MF_READ;
+ if (WPF & WPF_Write)
+ PF |= sys::Memory::MF_WRITE;
+ if (WPF & WPF_Exec)
+ PF |= sys::Memory::MF_EXEC;
+ return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+inline std::string getWireProtectionFlagsStr(WireProtectionFlags WPF) {
+ std::string Result;
+ Result += (WPF & WPF_Read) ? 'R' : '-';
+ Result += (WPF & WPF_Write) ? 'W' : '-';
+ Result += (WPF & WPF_Exec) ? 'X' : '-';
+ return Result;
+}
+
+struct WrapperFunctionCall {
+ ExecutorAddr Func;
+ ExecutorAddrRange ArgData;
+
+ WrapperFunctionCall() = default;
+ WrapperFunctionCall(ExecutorAddr Func, ExecutorAddr ArgData,
+ ExecutorAddrDiff ArgSize)
+ : Func(Func), ArgData(ArgData, ArgSize) {}
+ WrapperFunctionCall(ExecutorAddr Func, ExecutorAddrRange ArgData)
+ : Func(Func), ArgData(ArgData) {}
+
+ shared::WrapperFunctionResult run() {
+ using FnTy =
+ shared::CWrapperFunctionResult(const char *ArgData, size_t ArgSize);
+ return shared::WrapperFunctionResult(
+ Func.toPtr<FnTy *>()(ArgData.Start.toPtr<const char *>(),
+ static_cast<size_t>(ArgData.size().getValue())));
+ }
+
+ /// Run call and deserialize result using SPS.
+ template <typename SPSRetT, typename RetT> Error runWithSPSRet(RetT &RetVal) {
+ auto WFR = run();
+ if (const char *ErrMsg = WFR.getOutOfBandError())
+ return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
+ shared::SPSInputBuffer IB(WFR.data(), WFR.size());
+ if (!shared::SPSSerializationTraits<SPSRetT, RetT>::deserialize(IB, RetVal))
+ return make_error<StringError>("Could not deserialize result from "
+ "serialized wrapper function call",
+ inconvertibleErrorCode());
+ return Error::success();
+ }
+
+ /// Overload for SPS functions returning void.
+ Error runWithSPSRet() {
+ shared::SPSEmpty E;
+ return runWithSPSRet<shared::SPSEmpty>(E);
+ }
+};
+
+struct AllocationActionsPair {
+ WrapperFunctionCall Finalize;
+ WrapperFunctionCall Deallocate;
+};
+
+struct SegFinalizeRequest {
+ WireProtectionFlags Prot;
+ ExecutorAddr Addr;
+ uint64_t Size;
+ ArrayRef<char> Content;
+};
+
+struct FinalizeRequest {
+ std::vector<SegFinalizeRequest> Segments;
+ std::vector<AllocationActionsPair> Actions;
+};
+
template <typename T> struct UIntWrite {
UIntWrite() = default;
- UIntWrite(JITTargetAddress Address, T Value)
- : Address(Address), Value(Value) {}
+ UIntWrite(ExecutorAddr Addr, T Value) : Addr(Addr), Value(Value) {}
- JITTargetAddress Address = 0;
+ ExecutorAddr Addr;
T Value = 0;
};
@@ -49,10 +149,10 @@ using UInt64Write = UIntWrite<uint64_t>;
/// For use with TargetProcessControl::MemoryAccess objects.
struct BufferWrite {
BufferWrite() = default;
- BufferWrite(JITTargetAddress Address, StringRef Buffer)
- : Address(Address), Buffer(Buffer) {}
+ BufferWrite(ExecutorAddr Addr, StringRef Buffer)
+ : Addr(Addr), Buffer(Buffer) {}
- JITTargetAddress Address = 0;
+ ExecutorAddr Addr;
StringRef Buffer;
};
@@ -62,6 +162,180 @@ using DylibHandle = JITTargetAddress;
using LookupResult = std::vector<JITTargetAddress>;
} // end namespace tpctypes
+
+namespace shared {
+
+class SPSMemoryProtectionFlags {};
+
+using SPSWrapperFunctionCall = SPSTuple<SPSExecutorAddr, SPSExecutorAddrRange>;
+
+using SPSSegFinalizeRequest =
+ SPSTuple<SPSMemoryProtectionFlags, SPSExecutorAddr, uint64_t,
+ SPSSequence<char>>;
+
+using SPSAllocationActionsPair =
+ SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>;
+
+using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>,
+ SPSSequence<SPSAllocationActionsPair>>;
+
+template <typename T>
+using SPSMemoryAccessUIntWrite = SPSTuple<SPSExecutorAddr, T>;
+
+using SPSMemoryAccessUInt8Write = SPSMemoryAccessUIntWrite<uint8_t>;
+using SPSMemoryAccessUInt16Write = SPSMemoryAccessUIntWrite<uint16_t>;
+using SPSMemoryAccessUInt32Write = SPSMemoryAccessUIntWrite<uint32_t>;
+using SPSMemoryAccessUInt64Write = SPSMemoryAccessUIntWrite<uint64_t>;
+
+using SPSMemoryAccessBufferWrite = SPSTuple<SPSExecutorAddr, SPSSequence<char>>;
+
+template <>
+class SPSSerializationTraits<SPSMemoryProtectionFlags,
+ tpctypes::WireProtectionFlags> {
+public:
+ static size_t size(const tpctypes::WireProtectionFlags &WPF) {
+ return SPSArgList<uint8_t>::size(static_cast<uint8_t>(WPF));
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::WireProtectionFlags &WPF) {
+ return SPSArgList<uint8_t>::serialize(OB, static_cast<uint8_t>(WPF));
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::WireProtectionFlags &WPF) {
+ uint8_t Val;
+ if (!SPSArgList<uint8_t>::deserialize(IB, Val))
+ return false;
+ WPF = static_cast<tpctypes::WireProtectionFlags>(Val);
+ return true;
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSWrapperFunctionCall,
+ tpctypes::WrapperFunctionCall> {
+ using AL = SPSWrapperFunctionCall::AsArgList;
+
+public:
+ static size_t size(const tpctypes::WrapperFunctionCall &WFC) {
+ return AL::size(WFC.Func, WFC.ArgData);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::WrapperFunctionCall &WFC) {
+ return AL::serialize(OB, WFC.Func, WFC.ArgData);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::WrapperFunctionCall &WFC) {
+ return AL::deserialize(IB, WFC.Func, WFC.ArgData);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSAllocationActionsPair,
+ tpctypes::AllocationActionsPair> {
+ using AL = SPSAllocationActionsPair::AsArgList;
+
+public:
+ static size_t size(const tpctypes::AllocationActionsPair &AAP) {
+ return AL::size(AAP.Finalize, AAP.Deallocate);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::AllocationActionsPair &AAP) {
+ return AL::serialize(OB, AAP.Finalize, AAP.Deallocate);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::AllocationActionsPair &AAP) {
+ return AL::deserialize(IB, AAP.Finalize, AAP.Deallocate);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSSegFinalizeRequest,
+ tpctypes::SegFinalizeRequest> {
+ using SFRAL = SPSSegFinalizeRequest::AsArgList;
+
+public:
+ static size_t size(const tpctypes::SegFinalizeRequest &SFR) {
+ return SFRAL::size(SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::SegFinalizeRequest &SFR) {
+ return SFRAL::serialize(OB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::SegFinalizeRequest &SFR) {
+ return SFRAL::deserialize(IB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSFinalizeRequest, tpctypes::FinalizeRequest> {
+ using FRAL = SPSFinalizeRequest::AsArgList;
+
+public:
+ static size_t size(const tpctypes::FinalizeRequest &FR) {
+ return FRAL::size(FR.Segments, FR.Actions);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::FinalizeRequest &FR) {
+ return FRAL::serialize(OB, FR.Segments, FR.Actions);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::FinalizeRequest &FR) {
+ return FRAL::deserialize(IB, FR.Segments, FR.Actions);
+ }
+};
+
+template <typename T>
+class SPSSerializationTraits<SPSMemoryAccessUIntWrite<T>,
+ tpctypes::UIntWrite<T>> {
+public:
+ static size_t size(const tpctypes::UIntWrite<T> &W) {
+ return SPSTuple<SPSExecutorAddr, T>::AsArgList::size(W.Addr, W.Value);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const tpctypes::UIntWrite<T> &W) {
+ return SPSTuple<SPSExecutorAddr, T>::AsArgList::serialize(OB, W.Addr,
+ W.Value);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::UIntWrite<T> &W) {
+ return SPSTuple<SPSExecutorAddr, T>::AsArgList::deserialize(IB, W.Addr,
+ W.Value);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSMemoryAccessBufferWrite,
+ tpctypes::BufferWrite> {
+public:
+ static size_t size(const tpctypes::BufferWrite &W) {
+ return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList::size(
+ W.Addr, W.Buffer);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const tpctypes::BufferWrite &W) {
+ return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList ::serialize(
+ OB, W.Addr, W.Buffer);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::BufferWrite &W) {
+ return SPSTuple<SPSExecutorAddr,
+ SPSSequence<char>>::AsArgList ::deserialize(IB, W.Addr,
+ W.Buffer);
+ }
+};
+
+
+} // end namespace shared
} // end namespace orc
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
index 2f14a1c76332..bf841b1f706b 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
@@ -10,9 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
#include "llvm/Support/Error.h"
@@ -22,24 +23,18 @@ namespace llvm {
namespace orc {
namespace shared {
-namespace detail {
-
-// DO NOT USE DIRECTLY.
// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
union CWrapperFunctionResultDataUnion {
char *ValuePtr;
char Value[sizeof(ValuePtr)];
};
-// DO NOT USE DIRECTLY.
// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
typedef struct {
CWrapperFunctionResultDataUnion Data;
size_t Size;
} CWrapperFunctionResult;
-} // end namespace detail
-
/// C++ wrapper function result: Same as CWrapperFunctionResult but
/// auto-releases memory.
class WrapperFunctionResult {
@@ -48,11 +43,11 @@ public:
WrapperFunctionResult() { init(R); }
/// Create a WrapperFunctionResult by taking ownership of a
- /// detail::CWrapperFunctionResult.
+ /// CWrapperFunctionResult.
///
/// Warning: This should only be used by clients writing wrapper-function
/// caller utilities (like TargetProcessControl).
- WrapperFunctionResult(detail::CWrapperFunctionResult R) : R(R) {
+ WrapperFunctionResult(CWrapperFunctionResult R) : R(R) {
// Reset R.
init(R);
}
@@ -77,18 +72,25 @@ public:
free(R.Data.ValuePtr);
}
- /// Release ownership of the contained detail::CWrapperFunctionResult.
+ /// Release ownership of the contained CWrapperFunctionResult.
/// Warning: Do not use -- this method will be removed in the future. It only
/// exists to temporarily support some code that will eventually be moved to
/// the ORC runtime.
- detail::CWrapperFunctionResult release() {
- detail::CWrapperFunctionResult Tmp;
+ CWrapperFunctionResult release() {
+ CWrapperFunctionResult Tmp;
init(Tmp);
std::swap(R, Tmp);
return Tmp;
}
/// Get a pointer to the data contained in this instance.
+ char *data() {
+ assert((R.Size != 0 || R.Data.ValuePtr == nullptr) &&
+ "Cannot get data for out-of-band error value");
+ return R.Size > sizeof(R.Data.Value) ? R.Data.ValuePtr : R.Data.Value;
+ }
+
+ /// Get a const pointer to the data contained in this instance.
const char *data() const {
assert((R.Size != 0 || R.Data.ValuePtr == nullptr) &&
"Cannot get data for out-of-band error value");
@@ -108,24 +110,19 @@ public:
/// Create a WrapperFunctionResult with the given size and return a pointer
/// to the underlying memory.
- static char *allocate(WrapperFunctionResult &WFR, size_t Size) {
+ static WrapperFunctionResult allocate(size_t Size) {
// Reset.
- WFR = WrapperFunctionResult();
+ WrapperFunctionResult WFR;
WFR.R.Size = Size;
- char *DataPtr;
- if (WFR.R.Size > sizeof(WFR.R.Data.Value)) {
- DataPtr = (char *)malloc(WFR.R.Size);
- WFR.R.Data.ValuePtr = DataPtr;
- } else
- DataPtr = WFR.R.Data.Value;
- return DataPtr;
+ if (WFR.R.Size > sizeof(WFR.R.Data.Value))
+ WFR.R.Data.ValuePtr = (char *)malloc(WFR.R.Size);
+ return WFR;
}
/// Copy from the given char range.
static WrapperFunctionResult copyFrom(const char *Source, size_t Size) {
- WrapperFunctionResult WFR;
- char *DataPtr = allocate(WFR, Size);
- memcpy(DataPtr, Source, Size);
+ auto WFR = allocate(Size);
+ memcpy(WFR.data(), Source, Size);
return WFR;
}
@@ -161,12 +158,12 @@ public:
}
private:
- static void init(detail::CWrapperFunctionResult &R) {
+ static void init(CWrapperFunctionResult &R) {
R.Data.ValuePtr = nullptr;
R.Size = 0;
}
- detail::CWrapperFunctionResult R;
+ CWrapperFunctionResult R;
};
namespace detail {
@@ -174,10 +171,8 @@ namespace detail {
template <typename SPSArgListT, typename... ArgTs>
WrapperFunctionResult
serializeViaSPSToWrapperFunctionResult(const ArgTs &...Args) {
- WrapperFunctionResult Result;
- char *DataPtr =
- WrapperFunctionResult::allocate(Result, SPSArgListT::size(Args...));
- SPSOutputBuffer OB(DataPtr, Result.size());
+ auto Result = WrapperFunctionResult::allocate(SPSArgListT::size(Args...));
+ SPSOutputBuffer OB(Result.data(), Result.size());
if (!SPSArgListT::serialize(OB, Args...))
return WrapperFunctionResult::createOutOfBandError(
"Error serializing arguments to blob in call");
@@ -315,6 +310,7 @@ private:
static void callAsync(HandlerT &&H,
SerializeAndSendResultT &&SerializeAndSendResult,
ArgTupleT Args, std::index_sequence<I...>) {
+ (void)Args; // Silence a buggy GCC warning.
return std::forward<HandlerT>(H)(std::move(SerializeAndSendResult),
std::move(std::get<I>(Args))...);
}
@@ -486,10 +482,16 @@ public:
}
auto SendSerializedResult = [SDR = std::move(SendDeserializedResult)](
- WrapperFunctionResult R) {
+ WrapperFunctionResult R) mutable {
RetT RetVal = detail::ResultDeserializer<SPSRetTagT, RetT>::makeValue();
detail::ResultDeserializer<SPSRetTagT, RetT>::makeSafe(RetVal);
+ if (auto *ErrMsg = R.getOutOfBandError()) {
+ SDR(make_error<StringError>(ErrMsg, inconvertibleErrorCode()),
+ std::move(RetVal));
+ return;
+ }
+
SPSInputBuffer IB(R.data(), R.size());
if (auto Err = detail::ResultDeserializer<SPSRetTagT, RetT>::deserialize(
RetVal, R.data(), R.size()))
@@ -547,12 +549,68 @@ public:
return WrapperFunction<SPSEmpty(SPSTagTs...)>::call(Caller, BE, Args...);
}
+ template <typename AsyncCallerFn, typename SendDeserializedResultFn,
+ typename... ArgTs>
+ static void callAsync(AsyncCallerFn &&Caller,
+ SendDeserializedResultFn &&SendDeserializedResult,
+ const ArgTs &...Args) {
+ WrapperFunction<SPSEmpty(SPSTagTs...)>::callAsync(
+ std::forward<AsyncCallerFn>(Caller),
+ [SDR = std::move(SendDeserializedResult)](Error SerializeErr,
+ SPSEmpty E) mutable {
+ SDR(std::move(SerializeErr));
+ },
+ Args...);
+ }
+
using WrapperFunction<SPSEmpty(SPSTagTs...)>::handle;
using WrapperFunction<SPSEmpty(SPSTagTs...)>::handleAsync;
};
+/// A function object that takes an ExecutorAddr as its first argument,
+/// casts that address to a ClassT*, then calls the given method on that
+/// pointer passing in the remaining function arguments. This utility
+/// removes some of the boilerplate from writing wrappers for method calls.
+///
+/// @code{.cpp}
+/// class MyClass {
+/// public:
+/// void myMethod(uint32_t, bool) { ... }
+/// };
+///
+/// // SPS Method signature -- note MyClass object address as first argument.
+/// using SPSMyMethodWrapperSignature =
+/// SPSTuple<SPSExecutorAddr, uint32_t, bool>;
+///
+/// WrapperFunctionResult
+/// myMethodCallWrapper(const char *ArgData, size_t ArgSize) {
+/// return WrapperFunction<SPSMyMethodWrapperSignature>::handle(
+/// ArgData, ArgSize, makeMethodWrapperHandler(&MyClass::myMethod));
+/// }
+/// @endcode
+///
+template <typename RetT, typename ClassT, typename... ArgTs>
+class MethodWrapperHandler {
+public:
+ using MethodT = RetT (ClassT::*)(ArgTs...);
+ MethodWrapperHandler(MethodT M) : M(M) {}
+ RetT operator()(ExecutorAddr ObjAddr, ArgTs &...Args) {
+ return (ObjAddr.toPtr<ClassT*>()->*M)(std::forward<ArgTs>(Args)...);
+ }
+
+private:
+ MethodT M;
+};
+
+/// Create a MethodWrapperHandler object from the given method pointer.
+template <typename RetT, typename ClassT, typename... ArgTs>
+MethodWrapperHandler<RetT, ClassT, ArgTs...>
+makeMethodWrapperHandler(RetT (ClassT::*Method)(ArgTs...)) {
+ return MethodWrapperHandler<RetT, ClassT, ArgTs...>(Method);
+}
+
} // end namespace shared
} // end namespace orc
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
new file mode 100644
index 000000000000..bd72e4535325
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
@@ -0,0 +1,140 @@
+//===---- SimpleRemoteEPC.h - Simple remote executor control ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple remote executor process control.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
+#define LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+
+#include <future>
+
+namespace llvm {
+namespace orc {
+
+class SimpleRemoteEPC : public ExecutorProcessControl,
+ public SimpleRemoteEPCTransportClient {
+public:
+ /// A setup object containing callbacks to construct a memory manager and
+ /// memory access object. Both are optional. If not specified,
+ /// EPCGenericJITLinkMemoryManager and EPCGenericMemoryAccess will be used.
+ struct Setup {
+ using CreateMemoryManagerFn =
+ Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>(
+ SimpleRemoteEPC &);
+ using CreateMemoryAccessFn =
+ Expected<std::unique_ptr<MemoryAccess>>(SimpleRemoteEPC &);
+
+ unique_function<CreateMemoryManagerFn> CreateMemoryManager;
+ unique_function<CreateMemoryAccessFn> CreateMemoryAccess;
+ };
+
+ /// Create a SimpleRemoteEPC using the given transport type and args.
+ template <typename TransportT, typename... TransportTCtorArgTs>
+ static Expected<std::unique_ptr<SimpleRemoteEPC>>
+ Create(std::unique_ptr<TaskDispatcher> D, Setup S,
+ TransportTCtorArgTs &&...TransportTCtorArgs) {
+ std::unique_ptr<SimpleRemoteEPC> SREPC(
+ new SimpleRemoteEPC(std::make_shared<SymbolStringPool>(),
+ std::move(D)));
+ auto T = TransportT::Create(
+ *SREPC, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...);
+ if (!T)
+ return T.takeError();
+ SREPC->T = std::move(*T);
+ if (auto Err = SREPC->setup(std::move(S)))
+ return joinErrors(std::move(Err), SREPC->disconnect());
+ return std::move(SREPC);
+ }
+
+ SimpleRemoteEPC(const SimpleRemoteEPC &) = delete;
+ SimpleRemoteEPC &operator=(const SimpleRemoteEPC &) = delete;
+ SimpleRemoteEPC(SimpleRemoteEPC &&) = delete;
+ SimpleRemoteEPC &operator=(SimpleRemoteEPC &&) = delete;
+ ~SimpleRemoteEPC();
+
+ Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
+
+ Expected<std::vector<tpctypes::LookupResult>>
+ lookupSymbols(ArrayRef<LookupRequest> Request) override;
+
+ Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
+ ArrayRef<std::string> Args) override;
+
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
+ ArrayRef<char> ArgBuffer) override;
+
+ Error disconnect() override;
+
+ Expected<HandleMessageAction>
+ handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) override;
+
+ void handleDisconnect(Error Err) override;
+
+private:
+ SimpleRemoteEPC(std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<TaskDispatcher> D)
+ : ExecutorProcessControl(std::move(SSP), std::move(D)) {}
+
+ static Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>
+ createDefaultMemoryManager(SimpleRemoteEPC &SREPC);
+ static Expected<std::unique_ptr<MemoryAccess>>
+ createDefaultMemoryAccess(SimpleRemoteEPC &SREPC);
+
+ Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
+
+ Error handleSetup(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ Error setup(Setup S);
+
+ Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ Error handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes);
+
+ uint64_t getNextSeqNo() { return NextSeqNo++; }
+ void releaseSeqNo(uint64_t SeqNo) {}
+
+ using PendingCallWrapperResultsMap =
+ DenseMap<uint64_t, IncomingWFRHandler>;
+
+ std::mutex SimpleRemoteEPCMutex;
+ std::condition_variable DisconnectCV;
+ bool Disconnected = false;
+ Error DisconnectErr = Error::success();
+
+ std::unique_ptr<SimpleRemoteEPCTransport> T;
+ std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr;
+ std::unique_ptr<MemoryAccess> OwnedMemAccess;
+
+ std::unique_ptr<EPCGenericDylibManager> DylibMgr;
+ ExecutorAddr RunAsMainAddr;
+
+ uint64_t NextSeqNo = 0;
+ PendingCallWrapperResultsMap PendingCallWrapperResults;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
new file mode 100644
index 000000000000..32c127634b25
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
@@ -0,0 +1,36 @@
+//===- ExecutorService.h - Provide bootstrap symbols to session -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides a service by supplying some set of bootstrap symbols.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+namespace llvm {
+namespace orc {
+
+class ExecutorBootstrapService {
+public:
+ virtual ~ExecutorBootstrapService();
+
+ virtual void
+ addBootstrapSymbols(StringMap<ExecutorAddr> &BootstrapSymbols) = 0;
+ virtual Error shutdown() = 0;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
index 3fad98b5f178..cfb951178da6 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
@@ -16,7 +16,7 @@
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include <cstdint>
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size);
#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERGDB_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
deleted file mode 100644
index 96e4341fce68..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
+++ /dev/null
@@ -1,660 +0,0 @@
-//===-- OrcRPCTPCServer.h -- OrcRPCTargetProcessControl Server --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// OrcRPCTargetProcessControl server class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
-#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
-
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
-#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
-#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-
-#include <atomic>
-
-namespace llvm {
-namespace orc {
-
-namespace orcrpctpc {
-
-enum WireProtectionFlags : uint8_t {
- WPF_None = 0,
- WPF_Read = 1U << 0,
- WPF_Write = 1U << 1,
- WPF_Exec = 1U << 2,
- LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
-};
-
-struct ExecutorProcessInfo {
- std::string Triple;
- unsigned PageSize;
- JITTargetAddress DispatchFuncAddr;
- JITTargetAddress DispatchCtxAddr;
-};
-
-/// Convert from sys::Memory::ProtectionFlags
-inline WireProtectionFlags
-toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
- WireProtectionFlags WPF = WPF_None;
- if (PF & sys::Memory::MF_READ)
- WPF |= WPF_Read;
- if (PF & sys::Memory::MF_WRITE)
- WPF |= WPF_Write;
- if (PF & sys::Memory::MF_EXEC)
- WPF |= WPF_Exec;
- return WPF;
-}
-
-inline sys::Memory::ProtectionFlags
-fromWireProtectionFlags(WireProtectionFlags WPF) {
- int PF = 0;
- if (WPF & WPF_Read)
- PF |= sys::Memory::MF_READ;
- if (WPF & WPF_Write)
- PF |= sys::Memory::MF_WRITE;
- if (WPF & WPF_Exec)
- PF |= sys::Memory::MF_EXEC;
- return static_cast<sys::Memory::ProtectionFlags>(PF);
-}
-
-struct ReserveMemRequestElement {
- WireProtectionFlags Prot = WPF_None;
- uint64_t Size = 0;
- uint64_t Alignment = 0;
-};
-
-using ReserveMemRequest = std::vector<ReserveMemRequestElement>;
-
-struct ReserveMemResultElement {
- WireProtectionFlags Prot = WPF_None;
- JITTargetAddress Address = 0;
- uint64_t AllocatedSize = 0;
-};
-
-using ReserveMemResult = std::vector<ReserveMemResultElement>;
-
-struct ReleaseOrFinalizeMemRequestElement {
- WireProtectionFlags Prot = WPF_None;
- JITTargetAddress Address = 0;
- uint64_t Size = 0;
-};
-
-using ReleaseOrFinalizeMemRequest =
- std::vector<ReleaseOrFinalizeMemRequestElement>;
-
-} // end namespace orcrpctpc
-
-namespace shared {
-
-template <> class SerializationTypeName<WrapperFunctionResult> {
-public:
- static const char *getName() { return "WrapperFunctionResult"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, WrapperFunctionResult, WrapperFunctionResult,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, const WrapperFunctionResult &E) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(E.size())))
- return Err;
- if (E.size() == 0)
- return Error::success();
- return C.appendBytes(E.data(), E.size());
- }
-
- static Error deserialize(ChannelT &C, WrapperFunctionResult &E) {
- uint64_t Size;
- if (auto Err = deserializeSeq(C, Size))
- return Err;
-
- WrapperFunctionResult Tmp;
- char *Data = WrapperFunctionResult::allocate(Tmp, Size);
-
- if (auto Err = C.readBytes(Data, Size))
- return Err;
-
- E = std::move(Tmp);
-
- return Error::success();
- }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt8Write> {
-public:
- static const char *getName() { return "UInt8Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt16Write> {
-public:
- static const char *getName() { return "UInt16Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt32Write> {
-public:
- static const char *getName() { return "UInt32Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt64Write> {
-public:
- static const char *getName() { return "UInt64Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::BufferWrite> {
-public:
- static const char *getName() { return "BufferWrite"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ReserveMemRequestElement> {
-public:
- static const char *getName() { return "ReserveMemRequestElement"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ReserveMemResultElement> {
-public:
- static const char *getName() { return "ReserveMemResultElement"; }
-};
-
-template <>
-class SerializationTypeName<orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
-public:
- static const char *getName() { return "ReleaseOrFinalizeMemRequestElement"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ExecutorProcessInfo> {
-public:
- static const char *getName() { return "ExecutorProcessInfo"; }
-};
-
-template <typename ChannelT, typename WriteT>
-class SerializationTraits<
- ChannelT, WriteT, WriteT,
- std::enable_if_t<std::is_same<WriteT, tpctypes::UInt8Write>::value ||
- std::is_same<WriteT, tpctypes::UInt16Write>::value ||
- std::is_same<WriteT, tpctypes::UInt32Write>::value ||
- std::is_same<WriteT, tpctypes::UInt64Write>::value>> {
-public:
- static Error serialize(ChannelT &C, const WriteT &W) {
- return serializeSeq(C, W.Address, W.Value);
- }
- static Error deserialize(ChannelT &C, WriteT &W) {
- return deserializeSeq(C, W.Address, W.Value);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, tpctypes::BufferWrite, tpctypes::BufferWrite,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, const tpctypes::BufferWrite &W) {
- uint64_t Size = W.Buffer.size();
- if (auto Err = serializeSeq(C, W.Address, Size))
- return Err;
-
- return C.appendBytes(W.Buffer.data(), Size);
- }
- static Error deserialize(ChannelT &C, tpctypes::BufferWrite &W) {
- JITTargetAddress Address;
- uint64_t Size;
-
- if (auto Err = deserializeSeq(C, Address, Size))
- return Err;
-
- char *Buffer = jitTargetAddressToPointer<char *>(Address);
-
- if (auto Err = C.readBytes(Buffer, Size))
- return Err;
-
- W = {Address, StringRef(Buffer, Size)};
- return Error::success();
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ReserveMemRequestElement> {
-public:
- static Error serialize(ChannelT &C,
- const orcrpctpc::ReserveMemRequestElement &E) {
- return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Size, E.Alignment);
- }
-
- static Error deserialize(ChannelT &C,
- orcrpctpc::ReserveMemRequestElement &E) {
- return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Size,
- E.Alignment);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ReserveMemResultElement> {
-public:
- static Error serialize(ChannelT &C,
- const orcrpctpc::ReserveMemResultElement &E) {
- return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address,
- E.AllocatedSize);
- }
-
- static Error deserialize(ChannelT &C, orcrpctpc::ReserveMemResultElement &E) {
- return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
- E.AllocatedSize);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT,
- orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
-public:
- static Error
- serialize(ChannelT &C,
- const orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
- return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, E.Size);
- }
-
- static Error deserialize(ChannelT &C,
- orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
- return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
- E.Size);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ExecutorProcessInfo> {
-public:
- static Error serialize(ChannelT &C,
- const orcrpctpc::ExecutorProcessInfo &EPI) {
- return serializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr,
- EPI.DispatchCtxAddr);
- }
-
- static Error deserialize(ChannelT &C, orcrpctpc::ExecutorProcessInfo &EPI) {
- return deserializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr,
- EPI.DispatchCtxAddr);
- }
-};
-
-} // end namespace shared
-
-namespace orcrpctpc {
-
-using RemoteSymbolLookupSet = std::vector<std::pair<std::string, bool>>;
-using RemoteLookupRequest =
- std::pair<tpctypes::DylibHandle, RemoteSymbolLookupSet>;
-
-class GetExecutorProcessInfo
- : public shared::RPCFunction<GetExecutorProcessInfo,
- orcrpctpc::ExecutorProcessInfo()> {
-public:
- static const char *getName() { return "GetJITDispatchInfo"; }
-};
-
-class ReserveMem
- : public shared::RPCFunction<ReserveMem, Expected<ReserveMemResult>(
- ReserveMemRequest)> {
-public:
- static const char *getName() { return "ReserveMem"; }
-};
-
-class FinalizeMem
- : public shared::RPCFunction<FinalizeMem,
- Error(ReleaseOrFinalizeMemRequest)> {
-public:
- static const char *getName() { return "FinalizeMem"; }
-};
-
-class ReleaseMem
- : public shared::RPCFunction<ReleaseMem,
- Error(ReleaseOrFinalizeMemRequest)> {
-public:
- static const char *getName() { return "ReleaseMem"; }
-};
-
-class WriteUInt8s
- : public shared::RPCFunction<WriteUInt8s,
- Error(std::vector<tpctypes::UInt8Write>)> {
-public:
- static const char *getName() { return "WriteUInt8s"; }
-};
-
-class WriteUInt16s
- : public shared::RPCFunction<WriteUInt16s,
- Error(std::vector<tpctypes::UInt16Write>)> {
-public:
- static const char *getName() { return "WriteUInt16s"; }
-};
-
-class WriteUInt32s
- : public shared::RPCFunction<WriteUInt32s,
- Error(std::vector<tpctypes::UInt32Write>)> {
-public:
- static const char *getName() { return "WriteUInt32s"; }
-};
-
-class WriteUInt64s
- : public shared::RPCFunction<WriteUInt64s,
- Error(std::vector<tpctypes::UInt64Write>)> {
-public:
- static const char *getName() { return "WriteUInt64s"; }
-};
-
-class WriteBuffers
- : public shared::RPCFunction<WriteBuffers,
- Error(std::vector<tpctypes::BufferWrite>)> {
-public:
- static const char *getName() { return "WriteBuffers"; }
-};
-
-class LoadDylib
- : public shared::RPCFunction<LoadDylib, Expected<tpctypes::DylibHandle>(
- std::string DylibPath)> {
-public:
- static const char *getName() { return "LoadDylib"; }
-};
-
-class LookupSymbols
- : public shared::RPCFunction<LookupSymbols,
- Expected<std::vector<tpctypes::LookupResult>>(
- std::vector<RemoteLookupRequest>)> {
-public:
- static const char *getName() { return "LookupSymbols"; }
-};
-
-class RunMain
- : public shared::RPCFunction<RunMain,
- int64_t(JITTargetAddress MainAddr,
- std::vector<std::string> Args)> {
-public:
- static const char *getName() { return "RunMain"; }
-};
-
-class RunWrapper
- : public shared::RPCFunction<RunWrapper,
- shared::WrapperFunctionResult(
- JITTargetAddress, std::vector<uint8_t>)> {
-public:
- static const char *getName() { return "RunWrapper"; }
-};
-
-class CloseConnection : public shared::RPCFunction<CloseConnection, void()> {
-public:
- static const char *getName() { return "CloseConnection"; }
-};
-
-} // end namespace orcrpctpc
-
-/// TargetProcessControl for a process connected via an ORC RPC Endpoint.
-template <typename RPCEndpointT> class OrcRPCTPCServer {
-private:
- using ThisT = OrcRPCTPCServer<RPCEndpointT>;
-
-public:
- /// Create an OrcRPCTPCServer from the given endpoint.
- OrcRPCTPCServer(RPCEndpointT &EP) : EP(EP) {
-
- TripleStr = sys::getProcessTriple();
- PageSize = sys::Process::getPageSizeEstimate();
-
- EP.template addHandler<orcrpctpc::GetExecutorProcessInfo>(
- *this, &ThisT::getExecutorProcessInfo);
- EP.template addHandler<orcrpctpc::ReserveMem>(*this, &ThisT::reserveMemory);
- EP.template addHandler<orcrpctpc::FinalizeMem>(*this,
- &ThisT::finalizeMemory);
- EP.template addHandler<orcrpctpc::ReleaseMem>(*this, &ThisT::releaseMemory);
-
- EP.template addHandler<orcrpctpc::WriteUInt8s>(
- handleWriteUInt<tpctypes::UInt8Write>);
- EP.template addHandler<orcrpctpc::WriteUInt16s>(
- handleWriteUInt<tpctypes::UInt16Write>);
- EP.template addHandler<orcrpctpc::WriteUInt32s>(
- handleWriteUInt<tpctypes::UInt32Write>);
- EP.template addHandler<orcrpctpc::WriteUInt64s>(
- handleWriteUInt<tpctypes::UInt64Write>);
- EP.template addHandler<orcrpctpc::WriteBuffers>(handleWriteBuffer);
-
- EP.template addHandler<orcrpctpc::LoadDylib>(*this, &ThisT::loadDylib);
- EP.template addHandler<orcrpctpc::LookupSymbols>(*this,
- &ThisT::lookupSymbols);
-
- EP.template addHandler<orcrpctpc::RunMain>(*this, &ThisT::runMain);
- EP.template addHandler<orcrpctpc::RunWrapper>(*this, &ThisT::runWrapper);
-
- EP.template addHandler<orcrpctpc::CloseConnection>(*this,
- &ThisT::closeConnection);
- }
-
- /// Set the ProgramName to be used as the first argv element when running
- /// functions via runAsMain.
- void setProgramName(Optional<std::string> ProgramName = None) {
- this->ProgramName = std::move(ProgramName);
- }
-
- /// Get the RPC endpoint for this server.
- RPCEndpointT &getEndpoint() { return EP; }
-
- /// Run the server loop.
- Error run() {
- while (!Finished) {
- if (auto Err = EP.handleOne())
- return Err;
- }
- return Error::success();
- }
-
- Expected<shared::WrapperFunctionResult>
- runWrapperInJIT(JITTargetAddress FunctionId, ArrayRef<char> ArgBuffer) {
- return EP.template callB<orcrpctpc::RunWrapper>(
- FunctionId,
- ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()),
- ArgBuffer.size()));
- }
-
-private:
- static shared::detail::CWrapperFunctionResult
- jitDispatchViaOrcRPCTPCServer(void *Ctx, const void *FnTag, const char *Data,
- size_t Size) {
- assert(Ctx && "Attempt to dispatch with null context ptr");
- auto R = static_cast<ThisT *>(Ctx)->runWrapperInJIT(
- pointerToJITTargetAddress(FnTag), {Data, Size});
- if (!R) {
- auto ErrMsg = toString(R.takeError());
- return shared::WrapperFunctionResult::createOutOfBandError(ErrMsg.data())
- .release();
- }
- return R->release();
- }
-
- orcrpctpc::ExecutorProcessInfo getExecutorProcessInfo() {
- return {TripleStr, static_cast<uint32_t>(PageSize),
- pointerToJITTargetAddress(jitDispatchViaOrcRPCTPCServer),
- pointerToJITTargetAddress(this)};
- }
-
- template <typename WriteT>
- static void handleWriteUInt(const std::vector<WriteT> &Ws) {
- using ValueT = decltype(std::declval<WriteT>().Value);
- for (auto &W : Ws)
- *jitTargetAddressToPointer<ValueT *>(W.Address) = W.Value;
- }
-
- std::string getProtStr(orcrpctpc::WireProtectionFlags WPF) {
- std::string Result;
- Result += (WPF & orcrpctpc::WPF_Read) ? 'R' : '-';
- Result += (WPF & orcrpctpc::WPF_Write) ? 'W' : '-';
- Result += (WPF & orcrpctpc::WPF_Exec) ? 'X' : '-';
- return Result;
- }
-
- static void handleWriteBuffer(const std::vector<tpctypes::BufferWrite> &Ws) {
- for (auto &W : Ws) {
- memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
- W.Buffer.size());
- }
- }
-
- Expected<orcrpctpc::ReserveMemResult>
- reserveMemory(const orcrpctpc::ReserveMemRequest &Request) {
- orcrpctpc::ReserveMemResult Allocs;
- auto PF = sys::Memory::MF_READ | sys::Memory::MF_WRITE;
-
- uint64_t TotalSize = 0;
-
- for (const auto &E : Request) {
- uint64_t Size = alignTo(E.Size, PageSize);
- uint16_t Align = E.Alignment;
-
- if ((Align > PageSize) || (PageSize % Align))
- return make_error<StringError>(
- "Page alignmen does not satisfy requested alignment",
- inconvertibleErrorCode());
-
- TotalSize += Size;
- }
-
- // Allocate memory slab.
- std::error_code EC;
- auto MB = sys::Memory::allocateMappedMemory(TotalSize, nullptr, PF, EC);
- if (EC)
- return make_error<StringError>("Unable to allocate memory: " +
- EC.message(),
- inconvertibleErrorCode());
-
- // Zero-fill the whole thing.
- memset(MB.base(), 0, MB.allocatedSize());
-
- // Carve up sections to return.
- uint64_t SectionBase = 0;
- for (const auto &E : Request) {
- uint64_t SectionSize = alignTo(E.Size, PageSize);
- Allocs.push_back({E.Prot,
- pointerToJITTargetAddress(MB.base()) + SectionBase,
- SectionSize});
- SectionBase += SectionSize;
- }
-
- return Allocs;
- }
-
- Error finalizeMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &FMR) {
- for (const auto &E : FMR) {
- sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
-
- auto PF = orcrpctpc::fromWireProtectionFlags(E.Prot);
- if (auto EC =
- sys::Memory::protectMappedMemory(MB, static_cast<unsigned>(PF)))
- return make_error<StringError>("error protecting memory: " +
- EC.message(),
- inconvertibleErrorCode());
- }
- return Error::success();
- }
-
- Error releaseMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &RMR) {
- for (const auto &E : RMR) {
- sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
-
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- return make_error<StringError>("error release memory: " + EC.message(),
- inconvertibleErrorCode());
- }
- return Error::success();
- }
-
- Expected<tpctypes::DylibHandle> loadDylib(const std::string &Path) {
- std::string ErrMsg;
- const char *DLPath = !Path.empty() ? Path.c_str() : nullptr;
- auto DL = sys::DynamicLibrary::getPermanentLibrary(DLPath, &ErrMsg);
- if (!DL.isValid())
- return make_error<StringError>(std::move(ErrMsg),
- inconvertibleErrorCode());
-
- tpctypes::DylibHandle H = Dylibs.size();
- Dylibs[H] = std::move(DL);
- return H;
- }
-
- Expected<std::vector<tpctypes::LookupResult>>
- lookupSymbols(const std::vector<orcrpctpc::RemoteLookupRequest> &Request) {
- std::vector<tpctypes::LookupResult> Result;
-
- for (const auto &E : Request) {
- auto I = Dylibs.find(E.first);
- if (I == Dylibs.end())
- return make_error<StringError>("Unrecognized handle",
- inconvertibleErrorCode());
- auto &DL = I->second;
- Result.push_back({});
-
- for (const auto &KV : E.second) {
- auto &SymString = KV.first;
- bool WeakReference = KV.second;
-
- const char *Sym = SymString.c_str();
-#ifdef __APPLE__
- if (*Sym == '_')
- ++Sym;
-#endif
-
- void *Addr = DL.getAddressOfSymbol(Sym);
- if (!Addr && !WeakReference)
- return make_error<StringError>(Twine("Missing definition for ") + Sym,
- inconvertibleErrorCode());
-
- Result.back().push_back(pointerToJITTargetAddress(Addr));
- }
- }
-
- return Result;
- }
-
- int64_t runMain(JITTargetAddress MainFnAddr,
- const std::vector<std::string> &Args) {
- Optional<StringRef> ProgramNameOverride;
- if (ProgramName)
- ProgramNameOverride = *ProgramName;
-
- return runAsMain(
- jitTargetAddressToFunction<int (*)(int, char *[])>(MainFnAddr), Args,
- ProgramNameOverride);
- }
-
- shared::WrapperFunctionResult
- runWrapper(JITTargetAddress WrapperFnAddr,
- const std::vector<uint8_t> &ArgBuffer) {
- using WrapperFnTy = shared::detail::CWrapperFunctionResult (*)(
- const char *Data, uint64_t Size);
- auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
- return WrapperFn(reinterpret_cast<const char *>(ArgBuffer.data()),
- ArgBuffer.size());
- }
-
- void closeConnection() { Finished = true; }
-
- std::string TripleStr;
- uint64_t PageSize = 0;
- Optional<std::string> ProgramName;
- RPCEndpointT &EP;
- std::atomic<bool> Finished{false};
- DenseMap<tpctypes::DylibHandle, sys::DynamicLibrary> Dylibs;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
index 3b4aabb90371..735aa53e41fd 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
@@ -33,10 +33,26 @@ Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
} // end namespace orc
} // end namespace llvm
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+/// An eh-frame registration utility suitable for use as a support function
+/// call. This function expects the direct address and size of the eh-frame
+/// section to register as its arguments (it does not treat its arguments as
+/// pointers to an SPS-serialized arg buffer).
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size);
+
+/// An eh-frame deregistration utility suitable for use as a support function
+/// call. This function expects the direct address and size of the eh-frame
+/// section to register as its arguments (it does not treat its arguments as
+/// pointers to an SPS-serialized arg buffer).
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size);
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size);
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size);
#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
new file mode 100644
index 000000000000..cbab234f8a2d
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
@@ -0,0 +1,64 @@
+//===--------------- SimpleExecutorDylibManager.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple dynamic library management class. Allows dynamic libraries to be
+// loaded and searched.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+/// Simple page-based allocator.
+class SimpleExecutorDylibManager : public ExecutorBootstrapService {
+public:
+ virtual ~SimpleExecutorDylibManager();
+
+ Expected<tpctypes::DylibHandle> open(const std::string &Path, uint64_t Mode);
+ Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H,
+ const RemoteSymbolLookupSet &L);
+
+ Error shutdown() override;
+ void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+private:
+ using DylibsMap = DenseMap<uint64_t, sys::DynamicLibrary>;
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ openWrapper(const char *ArgData, size_t ArgSize);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ lookupWrapper(const char *ArgData, size_t ArgSize);
+
+ std::mutex M;
+ uint64_t NextId = 0;
+ DylibsMap Dylibs;
+};
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
new file mode 100644
index 000000000000..6858f6d4db6e
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
@@ -0,0 +1,70 @@
+//===---------------- SimpleExecutorMemoryManager.h -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple allocator class suitable for basic remote-JIT use.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+/// Simple page-based allocator.
+class SimpleExecutorMemoryManager : public ExecutorBootstrapService {
+public:
+ virtual ~SimpleExecutorMemoryManager();
+
+ Expected<ExecutorAddr> allocate(uint64_t Size);
+ Error finalize(tpctypes::FinalizeRequest &FR);
+ Error deallocate(const std::vector<ExecutorAddr> &Bases);
+
+ Error shutdown() override;
+ void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+private:
+ struct Allocation {
+ size_t Size = 0;
+ std::vector<tpctypes::WrapperFunctionCall> DeallocationActions;
+ };
+
+ using AllocationsMap = DenseMap<void *, Allocation>;
+
+ Error deallocateImpl(void *Base, Allocation &A);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ reserveWrapper(const char *ArgData, size_t ArgSize);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ finalizeWrapper(const char *ArgData, size_t ArgSize);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ deallocateWrapper(const char *ArgData, size_t ArgSize);
+
+ std::mutex M;
+ AllocationsMap Allocations;
+};
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
new file mode 100644
index 000000000000..afd3d39dbb53
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
@@ -0,0 +1,182 @@
+//===---- SimpleRemoteEPCServer.h - EPC over abstract channel ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EPC over simple abstract channel.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+/// A simple EPC server implementation.
+class SimpleRemoteEPCServer : public SimpleRemoteEPCTransportClient {
+public:
+ using ReportErrorFunction = unique_function<void(Error)>;
+
+ /// Dispatches calls to runWrapper.
+ class Dispatcher {
+ public:
+ virtual ~Dispatcher();
+ virtual void dispatch(unique_function<void()> Work) = 0;
+ virtual void shutdown() = 0;
+ };
+
+#if LLVM_ENABLE_THREADS
+ class ThreadDispatcher : public Dispatcher {
+ public:
+ void dispatch(unique_function<void()> Work) override;
+ void shutdown() override;
+
+ private:
+ std::mutex DispatchMutex;
+ bool Running = true;
+ size_t Outstanding = 0;
+ std::condition_variable OutstandingCV;
+ };
+#endif
+
+ class Setup {
+ friend class SimpleRemoteEPCServer;
+
+ public:
+ SimpleRemoteEPCServer &server() { return S; }
+ StringMap<ExecutorAddr> &bootstrapSymbols() { return BootstrapSymbols; }
+ std::vector<std::unique_ptr<ExecutorBootstrapService>> &services() {
+ return Services;
+ }
+ void setDispatcher(std::unique_ptr<Dispatcher> D) { S.D = std::move(D); }
+ void setErrorReporter(unique_function<void(Error)> ReportError) {
+ S.ReportError = std::move(ReportError);
+ }
+
+ private:
+ Setup(SimpleRemoteEPCServer &S) : S(S) {}
+ SimpleRemoteEPCServer &S;
+ StringMap<ExecutorAddr> BootstrapSymbols;
+ std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
+ };
+
+ static StringMap<ExecutorAddr> defaultBootstrapSymbols();
+
+ template <typename TransportT, typename... TransportTCtorArgTs>
+ static Expected<std::unique_ptr<SimpleRemoteEPCServer>>
+ Create(unique_function<Error(Setup &S)> SetupFunction,
+ TransportTCtorArgTs &&...TransportTCtorArgs) {
+ auto Server = std::make_unique<SimpleRemoteEPCServer>();
+ Setup S(*Server);
+ if (auto Err = SetupFunction(S))
+ return std::move(Err);
+
+ // Set ReportError up-front so that it can be used if construction
+ // process fails.
+ if (!Server->ReportError)
+ Server->ReportError = [](Error Err) {
+ logAllUnhandledErrors(std::move(Err), errs(), "SimpleRemoteEPCServer ");
+ };
+
+ // Attempt to create transport.
+ auto T = TransportT::Create(
+ *Server, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...);
+ if (!T)
+ return T.takeError();
+ Server->T = std::move(*T);
+ if (auto Err = Server->T->start())
+ return std::move(Err);
+
+ // If transport creation succeeds then start up services.
+ Server->Services = std::move(S.services());
+ Server->Services.push_back(
+ std::make_unique<rt_bootstrap::SimpleExecutorDylibManager>());
+ for (auto &Service : Server->Services)
+ Service->addBootstrapSymbols(S.bootstrapSymbols());
+
+ if (auto Err = Server->sendSetupMessage(std::move(S.BootstrapSymbols)))
+ return std::move(Err);
+ return std::move(Server);
+ }
+
+ /// Set an error reporter for this server.
+ void setErrorReporter(ReportErrorFunction ReportError) {
+ this->ReportError = std::move(ReportError);
+ }
+
+ /// Call to handle an incoming message.
+ ///
+ /// Returns 'Disconnect' if the message is a 'detach' message from the remote
+ /// otherwise returns 'Continue'. If the server has moved to an error state,
+ /// returns an error, which should be reported and treated as a 'Disconnect'.
+ Expected<HandleMessageAction>
+ handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) override;
+
+ Error waitForDisconnect();
+
+ void handleDisconnect(Error Err) override;
+
+private:
+ Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
+
+ Error sendSetupMessage(StringMap<ExecutorAddr> BootstrapSymbols);
+
+ Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+
+ shared::WrapperFunctionResult
+ doJITDispatch(const void *FnTag, const char *ArgData, size_t ArgSize);
+
+ static shared::CWrapperFunctionResult jitDispatchEntry(void *DispatchCtx,
+ const void *FnTag,
+ const char *ArgData,
+ size_t ArgSize);
+
+ uint64_t getNextSeqNo() { return NextSeqNo++; }
+ void releaseSeqNo(uint64_t) {}
+
+ using PendingJITDispatchResultsMap =
+ DenseMap<uint64_t, std::promise<shared::WrapperFunctionResult> *>;
+
+ std::mutex ServerStateMutex;
+ std::condition_variable ShutdownCV;
+ enum { ServerRunning, ServerShuttingDown, ServerShutDown } RunState;
+ Error ShutdownErr = Error::success();
+ std::unique_ptr<SimpleRemoteEPCTransport> T;
+ std::unique_ptr<Dispatcher> D;
+ std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
+ ReportErrorFunction ReportError;
+
+ uint64_t NextSeqNo = 0;
+ PendingJITDispatchResultsMap PendingJITDispatchResults;
+ std::vector<sys::DynamicLibrary> Dylibs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
new file mode 100644
index 000000000000..c57264e59655
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
@@ -0,0 +1,131 @@
+//===--------- TaskDispatch.h - ORC task dispatch utils ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Task and TaskDispatch classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
+#define LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ExtensibleRTTI.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cassert>
+#include <string>
+
+#if LLVM_ENABLE_THREADS
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#endif
+
+namespace llvm {
+namespace orc {
+
+/// Represents an abstract task for ORC to run.
+class Task : public RTTIExtends<Task, RTTIRoot> {
+public:
+ static char ID;
+
+ virtual ~Task() {}
+
+ /// Description of the task to be performed. Used for logging.
+ virtual void printDescription(raw_ostream &OS) = 0;
+
+ /// Run the task.
+ virtual void run() = 0;
+
+private:
+ void anchor() override;
+};
+
+/// Base class for generic tasks.
+class GenericNamedTask : public RTTIExtends<GenericNamedTask, Task> {
+public:
+ static char ID;
+ static const char *DefaultDescription;
+};
+
+/// Generic task implementation.
+template <typename FnT> class GenericNamedTaskImpl : public GenericNamedTask {
+public:
+ GenericNamedTaskImpl(FnT &&Fn, std::string DescBuffer)
+ : Fn(std::forward<FnT>(Fn)), Desc(DescBuffer.c_str()),
+ DescBuffer(std::move(DescBuffer)) {}
+ GenericNamedTaskImpl(FnT &&Fn, const char *Desc)
+ : Fn(std::forward<FnT>(Fn)), Desc(Desc) {
+ assert(Desc && "Description cannot be null");
+ }
+ void printDescription(raw_ostream &OS) override { OS << Desc; }
+ void run() override { Fn(); }
+
+private:
+ FnT Fn;
+ const char *Desc;
+ std::string DescBuffer;
+};
+
+/// Create a generic named task from a std::string description.
+template <typename FnT>
+std::unique_ptr<GenericNamedTask> makeGenericNamedTask(FnT &&Fn,
+ std::string Desc) {
+ return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn),
+ std::move(Desc));
+}
+
+/// Create a generic named task from a const char * description.
+template <typename FnT>
+std::unique_ptr<GenericNamedTask>
+makeGenericNamedTask(FnT &&Fn, const char *Desc = nullptr) {
+ if (!Desc)
+ Desc = GenericNamedTask::DefaultDescription;
+ return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn),
+ Desc);
+}
+
+/// Abstract base for classes that dispatch ORC Tasks.
+class TaskDispatcher {
+public:
+ virtual ~TaskDispatcher();
+
+ /// Run the given task.
+ virtual void dispatch(std::unique_ptr<Task> T) = 0;
+
+ /// Called by ExecutionSession. Waits until all tasks have completed.
+ virtual void shutdown() = 0;
+};
+
+/// Runs all tasks on the current thread.
+class InPlaceTaskDispatcher : public TaskDispatcher {
+public:
+ void dispatch(std::unique_ptr<Task> T) override;
+ void shutdown() override;
+};
+
+#if LLVM_ENABLE_THREADS
+
+class DynamicThreadPoolTaskDispatcher : public TaskDispatcher {
+public:
+ void dispatch(std::unique_ptr<Task> T) override;
+ void shutdown() override;
+private:
+ std::mutex DispatchMutex;
+ bool Running = true;
+ size_t Outstanding = 0;
+ std::condition_variable OutstandingCV;
+};
+
+#endif // LLVM_ENABLE_THREADS
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
diff --git a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h b/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
deleted file mode 100644
index 6cca1933f39f..000000000000
--- a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===---- OrcMCJITReplacement.h - Orc-based MCJIT replacement ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file forces OrcMCJITReplacement to link in on certain operating systems.
-// (Windows).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H
-#define LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H
-
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include <cstdlib>
-
-extern "C" void LLVMLinkInOrcMCJITReplacement();
-
-namespace {
- struct ForceOrcMCJITReplacementLinking {
- ForceOrcMCJITReplacementLinking() {
- // We must reference OrcMCJITReplacement in such a way that compilers will
- // not delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough to know
- // that getenv() never returns -1, this will do the job.
- if (std::getenv("bar") != (char*) -1)
- return;
-
- LLVMLinkInOrcMCJITReplacement();
- }
- } ForceOrcMCJITReplacementLinking;
-}
-
-#endif
diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 128c9967a596..c434b45077a3 100644
--- a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -112,6 +112,20 @@ public:
StringRef SectionName,
bool IsReadOnly) = 0;
+ /// An allocated TLS section
+ struct TLSSection {
+ /// The pointer to the initialization image
+ uint8_t *InitializationImage;
+ /// The TLS offset
+ intptr_t Offset;
+ };
+
+ /// Allocate a memory block of (at least) the given size to be used for
+ /// thread-local storage (TLS).
+ virtual TLSSection allocateTLSSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName);
+
/// Inform the memory manager about the total amount of memory required to
/// allocate all sections to be loaded:
/// \p CodeSize - the total size of all code sections
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 3dc6194c7830..5ee379b7fcad 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -144,6 +144,26 @@ def OMPC_Schedule : Clause<"schedule"> {
];
}
+def OMP_MEMORY_ORDER_SeqCst : ClauseVal<"seq_cst", 1, 1> {}
+def OMP_MEMORY_ORDER_AcqRel : ClauseVal<"acq_rel", 2, 1> {}
+def OMP_MEMORY_ORDER_Acquire : ClauseVal<"acquire", 3, 1> {}
+def OMP_MEMORY_ORDER_Release : ClauseVal<"release", 4, 1> {}
+def OMP_MEMORY_ORDER_Relaxed : ClauseVal<"relaxed", 5, 1> {}
+def OMP_MEMORY_ORDER_Default : ClauseVal<"default", 6, 0> {
+ let isDefault = 1;
+}
+def OMPC_MemoryOrder : Clause<"memory_order"> {
+ let enumClauseValue = "MemoryOrderKind";
+ let allowedClauseValues = [
+ OMP_MEMORY_ORDER_SeqCst,
+ OMP_MEMORY_ORDER_AcqRel,
+ OMP_MEMORY_ORDER_Acquire,
+ OMP_MEMORY_ORDER_Release,
+ OMP_MEMORY_ORDER_Relaxed,
+ OMP_MEMORY_ORDER_Default
+ ];
+}
+
def OMPC_Ordered : Clause<"ordered"> {
let clangClass = "OMPOrderedClause";
let flangClass = "ScalarIntConstantExpr";
@@ -261,13 +281,17 @@ def OMPC_Allocate : Clause<"allocate"> {
}
def OMPC_NonTemporal : Clause<"nontemporal"> {
let clangClass = "OMPNontemporalClause";
+ let flangClass = "Name";
+ let isValueList = true;
}
-def OMP_ORDER_concurrent : ClauseVal<"default",2,0> { let isDefault = 1; }
+def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {}
+def OMP_ORDER_unknown : ClauseVal<"unknown",2,0> { let isDefault = 1; }
def OMPC_Order : Clause<"order"> {
let clangClass = "OMPOrderClause";
let enumClauseValue = "OrderKind";
let allowedClauseValues = [
+ OMP_ORDER_unknown,
OMP_ORDER_concurrent
];
}
@@ -312,6 +336,8 @@ def OMPC_Uniform : Clause<"uniform"> {
}
def OMPC_DeviceType : Clause<"device_type"> {}
def OMPC_Match : Clause<"match"> {}
+def OMPC_AdjustArgs : Clause<"adjust_args"> { }
+def OMPC_AppendArgs : Clause<"append_args"> { }
def OMPC_Depobj : Clause<"depobj"> {
let clangClass = "OMPDepobjClause";
let isImplicit = true;
@@ -337,6 +363,14 @@ def OMPC_Filter : Clause<"filter"> {
let clangClass = "OMPFilterClause";
let flangClass = "ScalarIntExpr";
}
+def OMPC_Align : Clause<"align"> {
+ let clangClass = "OMPAlignClause";
+}
+def OMPC_When: Clause<"when"> {}
+
+def OMPC_Bind : Clause<"bind"> {
+ let clangClass = "OMPBindClause";
+}
//===----------------------------------------------------------------------===//
// Definition of OpenMP directives
@@ -473,8 +507,8 @@ def OMP_TaskWait : Directive<"taskwait"> {
}
def OMP_TaskGroup : Directive<"taskgroup"> {
let allowedClauses = [
- VersionedClause<OMPC_TaskReduction>,
- VersionedClause<OMPC_Allocate>
+ VersionedClause<OMPC_TaskReduction, 50>,
+ VersionedClause<OMPC_Allocate, 50>
];
}
def OMP_Flush : Directive<"flush"> {
@@ -489,10 +523,12 @@ def OMP_Flush : Directive<"flush"> {
}
def OMP_Ordered : Directive<"ordered"> {
let allowedClauses = [
- VersionedClause<OMPC_Threads>,
- VersionedClause<OMPC_Simd>,
VersionedClause<OMPC_Depend>
];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Threads>,
+ VersionedClause<OMPC_Simd>
+ ];
}
def OMP_Atomic : Directive<"atomic"> {
let allowedClauses = [
@@ -1506,13 +1542,18 @@ def OMP_TargetTeamsDistributeSimd :
}
def OMP_Allocate : Directive<"allocate"> {
let allowedOnceClauses = [
- VersionedClause<OMPC_Allocator>
+ VersionedClause<OMPC_Allocator>,
+ VersionedClause<OMPC_Align, 51>
];
}
def OMP_DeclareVariant : Directive<"declare variant"> {
let allowedClauses = [
VersionedClause<OMPC_Match>
];
+ let allowedExclusiveClauses = [
+ VersionedClause<OMPC_AdjustArgs, 51>,
+ VersionedClause<OMPC_AppendArgs, 51>
+ ];
}
def OMP_MasterTaskloop : Directive<"master taskloop"> {
let allowedClauses = [
@@ -1699,6 +1740,22 @@ def OMP_masked : Directive<"masked"> {
VersionedClause<OMPC_Filter>
];
}
+def OMP_loop : Directive<"loop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_Reduction>,
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Bind, 50>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Order>,
+ ];
+}
+def OMP_Metadirective : Directive<"metadirective"> {
+ let allowedClauses = [VersionedClause<OMPC_When>];
+ let allowedOnceClauses = [VersionedClause<OMPC_Default>];
+}
def OMP_Unknown : Directive<"unknown"> {
let isDefault = true;
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index d174cc8992dd..2fec3e7e4230 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -128,6 +128,14 @@ enum class OMPScheduleType {
LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask)
};
+enum OMPTgtExecModeFlags : int8_t {
+ OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
+ OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
+ OMP_TGT_EXEC_MODE_GENERIC_SPMD =
+ OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD)
+};
+
} // end namespace omp
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 0b6aed1e9e12..89f5de229b3b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,100 +29,89 @@ namespace omp {
///
/// Example usage in clang:
/// const unsigned slot_size =
-/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+/// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
///
/// Example usage in libomptarget/deviceRTLs:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #ifdef __AMDGPU__
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// #else
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// #endif
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget hsa plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget cuda plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
-enum GVIDX {
- /// The maximum number of workers in a kernel.
- /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
- GV_Threads,
+
+struct GV {
/// The size reserved for data in a shared memory slot.
- GV_Slot_Size,
+ const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
- GV_Warp_Size,
- /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
- /// for NVPTX.
- GV_Warp_Size_32,
- /// The number of bits required to represent the max number of threads in warp
- GV_Warp_Size_Log2,
- /// GV_Warp_Size * GV_Slot_Size,
- GV_Warp_Slot_Size,
+ const unsigned GV_Warp_Size;
+
+ constexpr unsigned warpSlotSize() const {
+ return GV_Warp_Size * GV_Slot_Size;
+ }
+
/// the maximum number of teams.
- GV_Max_Teams,
- /// Global Memory Alignment
- GV_Mem_Align,
- /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_Mask,
+ const unsigned GV_Max_Teams;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
- GV_SimpleBufferSize,
+ const unsigned GV_SimpleBufferSize;
// The absolute maximum team size for a working group
- GV_Max_WG_Size,
+ const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
- GV_Default_WG_Size,
- // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
- GV_Max_Warp_Number,
- /// The slot size that should be reserved for a working warp.
- /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_MaskL
+ const unsigned GV_Default_WG_Size;
+
+ constexpr unsigned maxWarpNumber() const {
+ return GV_Max_WG_Size / GV_Warp_Size;
+ }
};
/// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
- 448, // GV_Threads
- 256, // GV_Slot_Size
- 64, // GV_Warp_Size
- 32, // GV_Warp_Size_32
- 6, // GV_Warp_Size_Log2
- 64 * 256, // GV_Warp_Slot_Size
- 128, // GV_Max_Teams
- 256, // GV_Mem_Align
- 63, // GV_Warp_Size_Log2_Mask
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size,
- 256, // GV_Defaut_WG_Size
- 1024 / 64, // GV_Max_WG_Size / GV_WarpSize
- 63 // GV_Warp_Size_Log2_MaskL
+static constexpr GV AMDGPUGridValues64 = {
+ 256, // GV_Slot_Size
+ 64, // GV_Warp_Size
+ 128, // GV_Max_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Default_WG_Size
};
+static constexpr GV AMDGPUGridValues32 = {
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
+ 128, // GV_Max_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Default_WG_Size
+};
+
+template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
+ static_assert(wavesize == 32 || wavesize == 64, "");
+ return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
+}
+
/// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
- 992, // GV_Threads
- 256, // GV_Slot_Size
- 32, // GV_Warp_Size
- 32, // GV_Warp_Size_32
- 5, // GV_Warp_Size_Log2
- 32 * 256, // GV_Warp_Slot_Size
- 1024, // GV_Max_Teams
- 256, // GV_Mem_Align
- (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size
- 128, // GV_Defaut_WG_Size
- 1024 / 32, // GV_Max_WG_Size / GV_WarpSize
- 31 // GV_Warp_Size_Log2_MaskL
+static constexpr GV NVPTXGridValues = {
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
+ 1024, // GV_Max_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size
+ 128, // GV_Default_WG_Size
};
} // namespace omp
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 8144f1527a06..563e0eed1762 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -257,18 +257,17 @@ public:
///
/// * Sign of the step and the comparison operator might disagree:
///
- /// for (int i = 0; i < 42; --i)
+ /// for (int i = 0; i < 42; i -= 1u)
///
//
/// \param Loc The insert and source location description.
/// \param BodyGenCB Callback that will generate the loop body code.
/// \param Start Value of the loop counter for the first iterations.
- /// \param Stop Loop counter values past this will stop the the
- /// iterations.
+ /// \param Stop Loop counter values past this will stop the loop.
/// \param Step Loop counter increment after each iteration; negative
- /// means counting down. \param IsSigned Whether Start, Stop
- /// and Stop are signed integers.
- /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+ /// means counting down.
+ /// \param IsSigned Whether Start, Stop and Step are signed integers.
+ /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
/// counter.
/// \param ComputeIP Insertion point for instructions computing the trip
/// count. Can be used to ensure the trip count is available
@@ -335,7 +334,7 @@ public:
/// has a trip count of 0). This is permitted by the OpenMP specification.
///
/// \param DL Debug location for instructions added for collapsing,
- /// such as instructions to compute derive the input loop's
+ /// such as instructions to compute/derive the input loop's
/// induction variables.
/// \param Loops Loops in the loop nest to collapse. Loops are specified
/// from outermost-to-innermost and every control flow of a
@@ -358,8 +357,16 @@ public:
/// the current thread, updates the relevant instructions in the canonical
/// loop and calls to an OpenMP runtime finalization function after the loop.
///
- /// \param Loc The source location description, the insertion location
- /// is not used.
+ /// TODO: Workshare loops with static scheduling may contain up to two loops
+ /// that fulfill the requirements of an OpenMP canonical loop. One for
+ /// iterating over all iterations of a chunk and another one for iterating
+ /// over all chunks that are executed on the same thread. Returning
+ /// CanonicalLoopInfo objects representing them may eventually be useful for
+ /// the apply clause planned in OpenMP 6.0, but currently whether these are
+ /// canonical loops is irrelevant.
+ ///
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
/// \param AllocaIP An insertion point for Alloca instructions usable in the
/// preheader of the loop.
@@ -368,12 +375,11 @@ public:
/// \param Chunk The size of loop chunk considered as a unit when
/// scheduling. If \p nullptr, defaults to 1.
///
- /// \returns Updated CanonicalLoopInfo.
- CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
- CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ bool NeedsBarrier,
+ Value *Chunk = nullptr);
/// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
///
@@ -382,8 +388,9 @@ public:
/// turn it into a workshare loop. In particular, it calls to an OpenMP
/// runtime function in the preheader to obtain, and then in each iteration
/// to update the loop counter.
- /// \param Loc The source location description, the insertion location
- /// is not used.
+ ///
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
/// \param AllocaIP An insertion point for Alloca instructions usable in the
/// preheader of the loop.
@@ -393,13 +400,12 @@ public:
/// \param Chunk The size of loop chunk considered as a unit when
/// scheduling. If \p nullptr, defaults to 1.
///
- /// \returns Point where to insert code after the loop.
- InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
- CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- omp::OMPScheduleType SchedType,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ omp::OMPScheduleType SchedType,
+ bool NeedsBarrier,
+ Value *Chunk = nullptr);
/// Modifies the canonical loop to be a workshare loop.
///
@@ -410,19 +416,17 @@ public:
/// the current thread, updates the relevant instructions in the canonical
/// loop and calls to an OpenMP runtime finalization function after the loop.
///
- /// \param Loc The source location description, the insertion location
- /// is not used.
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
/// \param AllocaIP An insertion point for Alloca instructions usable in the
/// preheader of the loop.
/// \param NeedsBarrier Indicates whether a barrier must be insterted after
/// the loop.
///
- /// \returns Updated CanonicalLoopInfo.
- CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
- CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- bool NeedsBarrier);
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, bool NeedsBarrier);
/// Tile a loop nest.
///
@@ -471,6 +475,48 @@ public:
tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
ArrayRef<Value *> TileSizes);
+ /// Fully unroll a loop.
+ ///
+ /// Instead of unrolling the loop immediately (and duplicating its body
+ /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
+ /// metadata.
+ ///
+ /// \param DL Debug location for instructions added by unrolling.
+ /// \param Loop The loop to unroll. The loop will be invalidated.
+ void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
+
+ /// Fully or partially unroll a loop. How the loop is unrolled is determined
+ /// using LLVM's LoopUnrollPass.
+ ///
+ /// \param DL Debug location for instructions added by unrolling.
+ /// \param Loop The loop to unroll. The loop will be invalidated.
+ void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
+
+ /// Partially unroll a loop.
+ ///
+ /// The CanonicalLoopInfo of the unrolled loop for use with chained
+ /// loop-associated directive can be requested using \p UnrolledCLI. Not
+ /// needing the CanonicalLoopInfo allows more efficient code generation by
+ /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
+ /// A loop-associated directive applied to the unrolled loop needs to know the
+ /// new trip count which means that if using a heuristically determined unroll
+ /// factor (\p Factor == 0), that factor must be computed immediately. We are
+ /// using the same logic as the LoopUnrollPass to derived the unroll factor,
+ /// but which assumes that some canonicalization has taken place (e.g.
+ /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
+ /// better when the unrolled loop's CanonicalLoopInfo is not needed.
+ ///
+ /// \param DL Debug location for instructions added by unrolling.
+ /// \param Loop The loop to unroll. The loop will be invalidated.
+ /// \param Factor The factor to unroll the loop by. A factor of 0
+ /// indicates that a heuristic should be used to determine
+ /// the unroll-factor.
+ /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
+ /// partially unrolled loop. Otherwise, uses loop metadata
+ /// to defer unrolling to the LoopUnrollPass.
+ void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
+ CanonicalLoopInfo **UnrolledCLI);
+
/// Generator for '#omp flush'
///
/// \param Loc The location where the flush directive was encountered
@@ -486,6 +532,115 @@ public:
/// \param Loc The location where the taskyield directive was encountered.
void createTaskyield(const LocationDescription &Loc);
+ /// Functions used to generate reductions. Such functions take two Values
+ /// representing LHS and RHS of the reduction, respectively, and a reference
+ /// to the value that is updated to refer to the reduction result.
+ using ReductionGenTy =
+ function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
+
+ /// Functions used to generate atomic reductions. Such functions take two
+ /// Values representing pointers to LHS and RHS of the reduction. They are
+ /// expected to atomically update the LHS to the reduced value.
+ using AtomicReductionGenTy =
+ function_ref<InsertPointTy(InsertPointTy, Value *, Value *)>;
+
+ /// Information about an OpenMP reduction.
+ struct ReductionInfo {
+ ReductionInfo(Value *Variable, Value *PrivateVariable,
+ ReductionGenTy ReductionGen,
+ AtomicReductionGenTy AtomicReductionGen)
+ : Variable(Variable), PrivateVariable(PrivateVariable),
+ ReductionGen(ReductionGen), AtomicReductionGen(AtomicReductionGen) {}
+
+ /// Returns the type of the element being reduced.
+ Type *getElementType() const {
+ return Variable->getType()->getPointerElementType();
+ }
+
+ /// Reduction variable of pointer type.
+ Value *Variable;
+
+ /// Thread-private partial reduction variable.
+ Value *PrivateVariable;
+
+ /// Callback for generating the reduction body. The IR produced by this will
+ /// be used to combine two values in a thread-safe context, e.g., under
+ /// lock or within the same thread, and therefore need not be atomic.
+ ReductionGenTy ReductionGen;
+
+ /// Callback for generating the atomic reduction body, may be null. The IR
+ /// produced by this will be used to atomically combine two values during
+ /// reduction. If null, the implementation will use the non-atomic version
+ /// along with the appropriate synchronization mechanisms.
+ AtomicReductionGenTy AtomicReductionGen;
+ };
+
+ // TODO: provide atomic and non-atomic reduction generators for reduction
+ // operators defined by the OpenMP specification.
+
+ /// Generator for '#omp reduction'.
+ ///
+ /// Emits the IR instructing the runtime to perform the specific kind of
+ /// reductions. Expects reduction variables to have been privatized and
+ /// initialized to reduction-neutral values separately. Emits the calls to
+ /// runtime functions as well as the reduction function and the basic blocks
+ /// performing the reduction atomically and non-atomically.
+ ///
+ /// The code emitted for the following:
+ ///
+ /// \code
+ /// type var_1;
+ /// type var_2;
+ /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
+ /// /* body */;
+ /// \endcode
+ ///
+ /// corresponds to the following sketch.
+ ///
+ /// \code
+ /// void _outlined_par() {
+ /// // N is the number of different reductions.
+ /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
+ /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
+ /// _omp_reduction_func,
+ /// _gomp_critical_user.reduction.var)) {
+ /// case 1: {
+ /// var_1 = var_1 <reduction-op> privatized_var_1;
+ /// var_2 = var_2 <reduction-op> privatized_var_2;
+ /// // ...
+ /// __kmpc_end_reduce(...);
+ /// break;
+ /// }
+ /// case 2: {
+ /// _Atomic<ReductionOp>(var_1, privatized_var_1);
+ /// _Atomic<ReductionOp>(var_2, privatized_var_2);
+ /// // ...
+ /// break;
+ /// }
+ /// default: break;
+ /// }
+ /// }
+ ///
+ /// void _omp_reduction_func(void **lhs, void **rhs) {
+ /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
+ /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
+ /// // ...
+ /// }
+ /// \endcode
+ ///
+ /// \param Loc The location where the reduction was
+ /// encountered. Must be within the associate
+ /// directive and after the last local access to the
+ /// reduction variables.
+ /// \param AllocaIP An insertion point suitable for allocas usable
+ /// in reductions.
+ /// \param ReductionInfos A list of info on each reduction variable.
+ /// \param IsNoWait A flag set if the reduction is marked as nowait.
+ InsertPointTy createReductions(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ ArrayRef<ReductionInfo> ReductionInfos,
+ bool IsNoWait = false);
+
///}
/// Return the insertion point used by the underlying IRBuilder.
@@ -515,6 +670,10 @@ public:
Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
unsigned Line, unsigned Column);
+ /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
+ /// fallback if \p DL does not specify the function name.
+ Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr);
+
/// Return the (LLVM-IR) string describing the source location \p Loc.
Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
@@ -524,8 +683,8 @@ public:
omp::IdentFlag Flags = omp::IdentFlag(0),
unsigned Reserve2Flags = 0);
- // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
- Type *getLanemaskType();
+ /// Create a global flag \p Namein the module with initial value \p Value.
+ GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
/// Generate control flow and cleanup for cancellation.
///
@@ -651,11 +810,11 @@ public:
/// \param Loc The source location description.
/// \param MapperFunc Function to be called.
/// \param SrcLocInfo Source location information global.
- /// \param MaptypesArgs
- /// \param MapnamesArg
+ /// \param MaptypesArg The argument types.
+ /// \param MapnamesArg The argument names.
/// \param MapperAllocas The AllocaInst used for the call.
/// \param DeviceID Device ID for the call.
- /// \param TotalNbOperand Number of operand in the call.
+ /// \param NumOperands Number of operands in the call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
struct MapperAllocas &MapperAllocas, int64_t DeviceID,
@@ -705,7 +864,7 @@ public:
/// \param BodyGenCB Callback that will generate the region code.
/// \param FiniCB Callback to finialize variable copies.
///
- /// \returns The insertion position *after* the master.
+ /// \returns The insertion position *after* the masked.
InsertPointTy createMasked(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB, Value *Filter);
@@ -718,12 +877,41 @@ public:
/// \param CriticalName name of the lock used by the critical directive
/// \param HintInst Hint Instruction for hint clause associated with critical
///
- /// \returns The insertion position *after* the master.
+ /// \returns The insertion position *after* the critical.
InsertPointTy createCritical(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB,
StringRef CriticalName, Value *HintInst);
+ /// Generator for '#omp ordered depend (source | sink)'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
+ /// \param NumLoops The number of loops in depend clause.
+ /// \param StoreValues The value will be stored in vector address.
+ /// \param Name The name of alloca instruction.
+ /// \param IsDependSource If true, depend source; otherwise, depend sink.
+ ///
+ /// \return The insertion position *after* the ordered.
+ InsertPointTy createOrderedDepend(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, unsigned NumLoops,
+ ArrayRef<llvm::Value *> StoreValues,
+ const Twine &Name, bool IsDependSource);
+
+ /// Generator for '#omp ordered [threads | simd]'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param FiniCB Callback to finalize variable copies.
+ /// \param IsThreads If true, with threads clause or without clause;
+ /// otherwise, with simd clause;
+ ///
+ /// \returns The insertion position *after* the ordered.
+ InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB,
+ bool IsThreads);
+
/// Generator for '#omp sections'
///
/// \param Loc The insert and source location description.
@@ -816,14 +1004,16 @@ public:
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
/// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+ InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+ bool RequiresFullRuntime);
/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
/// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+ void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
+ bool RequiresFullRuntime);
///}
@@ -1121,7 +1311,25 @@ public:
/// The control-flow structure is standardized for easy consumption by
/// directives associated with loops. For instance, the worksharing-loop
/// construct may change this control flow such that each loop iteration is
-/// executed on only one thread.
+/// executed on only one thread. The constraints of a canonical loop in brief
+/// are:
+///
+/// * The number of loop iterations must have been computed before entering the
+/// loop.
+///
+/// * Has an (unsigned) logical induction variable that starts at zero and
+/// increments by one.
+///
+/// * The loop's CFG itself has no side-effects. The OpenMP specification
+/// itself allows side-effects, but the order in which they happen, including
+/// how often or whether at all, is unspecified. We expect that the frontend
+/// will emit those side-effect instructions somewhere (e.g. before the loop)
+/// such that the CanonicalLoopInfo itself can be side-effect free.
+///
+/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
+/// execution of a loop body that satifies these constraints. It does NOT
+/// represent arbitrary SESE regions that happen to contain a loop. Do not use
+/// CanonicalLoopInfo for such purposes.
///
/// The control flow can be described as follows:
///
@@ -1141,73 +1349,149 @@ public:
/// |
/// After
///
-/// Code in the header, condition block, latch and exit block must not have any
-/// side-effect. The body block is the single entry point into the loop body,
-/// which may contain arbitrary control flow as long as all control paths
-/// eventually branch to the latch block.
+/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
+/// including) and end at AfterIP (at the After's first instruction, excluding).
+/// That is, instructions in the Preheader and After blocks (except the
+/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
+/// side-effects. Typically, the Preheader is used to compute the loop's trip
+/// count. The instructions from BodyIP (at the Body block's first instruction,
+/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
+/// control and thus can have side-effects. The body block is the single entry
+/// point into the loop body, which may contain arbitrary control flow as long
+/// as all control paths eventually branch to the Latch block.
+///
+/// TODO: Consider adding another standardized BasicBlock between Body CFG and
+/// Latch to guarantee that there is only a single edge to the latch. It would
+/// make loop transformations easier to not needing to consider multiple
+/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
+/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
+/// executes after each body iteration.
+///
+/// There must be no loop-carried dependencies through llvm::Values. This is
+/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
+/// for the induction variable.
+///
+/// All code in Header, Cond, Latch and Exit (plus the terminator of the
+/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
+/// by assertOK(). They are expected to not be modified unless explicitly
+/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
+/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
+/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
+/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
+/// anymore as its underlying control flow may not exist anymore.
+/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
+/// may also return a new CanonicalLoopInfo that can be passed to other
+/// loop-associated construct implementing methods. These loop-transforming
+/// methods may either create a new CanonicalLoopInfo usually using
+/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
+/// modify one of the input CanonicalLoopInfo and return it as representing the
+/// modified loop. What is done is an implementation detail of
+/// transformation-implementing method and callers should always assume that the
+/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
+/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
+/// created by createCanonicalLoop, such that transforming methods do not have
+/// to special case where the CanonicalLoopInfo originated from.
+///
+/// Generally, methods consuming CanonicalLoopInfo do not need an
+/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
+/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
+/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
+/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
+/// any InsertPoint in the Preheader, After or Block can still be used after
+/// calling such a method.
///
-/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
-/// classes.
+/// TODO: Provide mechanisms for exception handling and cancellation points.
+///
+/// Defined outside OpenMPIRBuilder because nested classes cannot be
+/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
class CanonicalLoopInfo {
friend class OpenMPIRBuilder;
private:
- /// Whether this object currently represents a loop.
- bool IsValid = false;
-
- BasicBlock *Preheader;
- BasicBlock *Header;
- BasicBlock *Cond;
- BasicBlock *Body;
- BasicBlock *Latch;
- BasicBlock *Exit;
- BasicBlock *After;
+ BasicBlock *Preheader = nullptr;
+ BasicBlock *Header = nullptr;
+ BasicBlock *Cond = nullptr;
+ BasicBlock *Body = nullptr;
+ BasicBlock *Latch = nullptr;
+ BasicBlock *Exit = nullptr;
+ BasicBlock *After = nullptr;
/// Add the control blocks of this loop to \p BBs.
///
/// This does not include any block from the body, including the one returned
/// by getBody().
+ ///
+ /// FIXME: This currently includes the Preheader and After blocks even though
+ /// their content is (mostly) not under CanonicalLoopInfo's control.
+ /// Re-evaluated whether this makes sense.
void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
public:
+ /// Returns whether this object currently represents the IR of a loop. If
+ /// returning false, it may have been consumed by a loop transformation or not
+ /// been intialized. Do not use in this case;
+ bool isValid() const { return Header; }
+
/// The preheader ensures that there is only a single edge entering the loop.
/// Code that must be execute before any loop iteration can be emitted here,
/// such as computing the loop trip count and begin lifetime markers. Code in
/// the preheader is not considered part of the canonical loop.
- BasicBlock *getPreheader() const { return Preheader; }
+ BasicBlock *getPreheader() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Preheader;
+ }
/// The header is the entry for each iteration. In the canonical control flow,
/// it only contains the PHINode for the induction variable.
- BasicBlock *getHeader() const { return Header; }
+ BasicBlock *getHeader() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Header;
+ }
/// The condition block computes whether there is another loop iteration. If
/// yes, branches to the body; otherwise to the exit block.
- BasicBlock *getCond() const { return Cond; }
+ BasicBlock *getCond() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Cond;
+ }
/// The body block is the single entry for a loop iteration and not controlled
/// by CanonicalLoopInfo. It can contain arbitrary control flow but must
/// eventually branch to the \p Latch block.
- BasicBlock *getBody() const { return Body; }
+ BasicBlock *getBody() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Body;
+ }
/// Reaching the latch indicates the end of the loop body code. In the
/// canonical control flow, it only contains the increment of the induction
/// variable.
- BasicBlock *getLatch() const { return Latch; }
+ BasicBlock *getLatch() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Latch;
+ }
/// Reaching the exit indicates no more iterations are being executed.
- BasicBlock *getExit() const { return Exit; }
+ BasicBlock *getExit() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Exit;
+ }
/// The after block is intended for clean-up code such as lifetime end
/// markers. It is separate from the exit block to ensure, analogous to the
/// preheader, it having just a single entry edge and being free from PHI
/// nodes should there be multiple loop exits (such as from break
/// statements/cancellations).
- BasicBlock *getAfter() const { return After; }
+ BasicBlock *getAfter() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return After;
+ }
/// Returns the llvm::Value containing the number of loop iterations. It must
/// be valid in the preheader and always interpreted as an unsigned integer of
/// any bit-width.
Value *getTripCount() const {
+ assert(isValid() && "Requires a valid canonical loop");
Instruction *CmpI = &Cond->front();
assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
return CmpI->getOperand(1);
@@ -1216,33 +1500,47 @@ public:
/// Returns the instruction representing the current logical induction
/// variable. Always unsigned, always starting at 0 with an increment of one.
Instruction *getIndVar() const {
+ assert(isValid() && "Requires a valid canonical loop");
Instruction *IndVarPHI = &Header->front();
assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
return IndVarPHI;
}
/// Return the type of the induction variable (and the trip count).
- Type *getIndVarType() const { return getIndVar()->getType(); }
+ Type *getIndVarType() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return getIndVar()->getType();
+ }
/// Return the insertion point for user code before the loop.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
+ assert(isValid() && "Requires a valid canonical loop");
return {Preheader, std::prev(Preheader->end())};
};
/// Return the insertion point for user code in the body.
OpenMPIRBuilder::InsertPointTy getBodyIP() const {
+ assert(isValid() && "Requires a valid canonical loop");
return {Body, Body->begin()};
};
/// Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const {
+ assert(isValid() && "Requires a valid canonical loop");
return {After, After->begin()};
};
- Function *getFunction() const { return Header->getParent(); }
+ Function *getFunction() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Header->getParent();
+ }
/// Consistency self-check.
void assertOK() const;
+
+ /// Invalidate this loop. That is, the underlying IR does not fulfill the
+ /// requirements of an OpenMP canonical loop anymore.
+ void invalidate();
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index eb673b199fc4..8e4f7568fb9c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -39,7 +39,6 @@ __OMP_TYPE(Int32Ptr)
__OMP_TYPE(Int64Ptr)
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
-OMP_TYPE(LanemaskTy, getLanemaskType())
#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
@@ -272,6 +271,15 @@ __OMP_RTL(__kmpc_for_static_init_8, false, Void, IdentPtr, Int32, Int32,
__OMP_RTL(__kmpc_for_static_init_8u, false, Void, IdentPtr, Int32, Int32,
Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
__OMP_RTL(__kmpc_for_static_fini, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_4, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_4u, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_8, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
+__OMP_RTL(__kmpc_distribute_static_init_8u, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
+__OMP_RTL(__kmpc_distribute_static_fini, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_dist_dispatch_init_4, false, Void, IdentPtr, Int32, Int32,
Int32Ptr, Int32, Int32, Int32, Int32)
__OMP_RTL(__kmpc_dist_dispatch_init_4u, false, Void, IdentPtr, Int32, Int32,
@@ -415,8 +423,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
/// OpenMP Device runtime functions
-__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1)
-__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1)
+__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1)
+__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
@@ -442,9 +450,12 @@ __OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr)
__OMP_RTL(__kmpc_parallel_level, false, Int8, )
__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, )
__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
-__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,)
-__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy)
+__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
+__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
+
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
__OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
@@ -510,6 +521,11 @@ __OMP_ATTRS_SET(NoCaptureAttrs,
? AttributeSet(EnumAttr(NoCapture))
: AttributeSet(EnumAttr(NoCapture)))
+__OMP_ATTRS_SET(AlwaysInlineAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(AlwaysInline))
+ : AttributeSet(EnumAttr(AlwaysInline)))
+
#if 0
__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
OptimisticAttributes
@@ -535,6 +551,11 @@ __OMP_ATTRS_SET(ReadOnlyPtrAttrs,
EnumAttr(NoCapture))
: AttributeSet())
+__OMP_ATTRS_SET(DeviceAllocAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync))
+ : AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync)))
+
#if 0
__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
OptimisticAttributes
@@ -575,6 +596,8 @@ __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_barrier_simple_generic, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(),
ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs())
@@ -703,6 +726,28 @@ __OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
AttributeSet(), AttributeSet()))
__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_fini, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
@@ -854,9 +899,9 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_alloc_shared, DefaultAttrs, ReturnPtrAttrs,
+__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs,
ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_free_shared, AllocAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(),
ParamAttrs(NoCaptureAttrs))
__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs())
@@ -897,6 +942,9 @@ __OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
+ ParamAttrs())
+
#undef __OMP_RTL_ATTRS
#undef OMP_RTL_ATTRS
#undef AttributeSet
@@ -920,6 +968,7 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
OMP_IDENT_FLAG(OMP_IDENT_FLAG_##Name, #Name, Value)
__OMP_IDENT_FLAG(KMPC, 0x02)
+__OMP_IDENT_FLAG(ATOMIC_REDUCE, 0x10)
__OMP_IDENT_FLAG(BARRIER_EXPL, 0x20)
__OMP_IDENT_FLAG(BARRIER_IMPL, 0x0040)
__OMP_IDENT_FLAG(BARRIER_IMPL_MASK, 0x01C0)
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index e8cf05001542..31df4c75b6e7 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -153,7 +153,7 @@ public:
/// Return the number of parameters of the callee.
unsigned getNumArgOperands() const {
if (isDirectCall())
- return CB->getNumArgOperands();
+ return CB->arg_size();
// Subtract 1 for the callee encoding.
return CI.ParameterEncoding.size() - 1;
}
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index dcf658f439b4..396ab6a9d01d 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -97,7 +97,7 @@ public:
/// If this is a byval or inalloca argument, return its alignment.
/// FIXME: Remove this function once transition to Align is over.
/// Use getParamAlign() instead.
- unsigned getParamAlignment() const;
+ uint64_t getParamAlignment() const;
/// If this is a byval or inalloca argument, return its alignment.
MaybeAlign getParamAlign() const;
diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h
index f64616c25d87..08e6c8b6f1e0 100644
--- a/llvm/include/llvm/IR/Assumptions.h
+++ b/llvm/include/llvm/IR/Assumptions.h
@@ -15,12 +15,14 @@
#ifndef LLVM_IR_ASSUMPTIONS_H
#define LLVM_IR_ASSUMPTIONS_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
namespace llvm {
class Function;
+class CallBase;
/// The key we use for assumption attributes.
constexpr StringRef AssumptionAttrKey = "llvm.assume";
@@ -43,7 +45,25 @@ private:
};
/// Return true if \p F has the assumption \p AssumptionStr attached.
-bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr);
+bool hasAssumption(const Function &F,
+ const KnownAssumptionString &AssumptionStr);
+
+/// Return true if \p CB or the callee has the assumption \p AssumptionStr
+/// attached.
+bool hasAssumption(const CallBase &CB,
+ const KnownAssumptionString &AssumptionStr);
+
+/// Return the set of all assumptions for the function \p F.
+DenseSet<StringRef> getAssumptions(const Function &F);
+
+/// Return the set of all assumptions for the call \p CB.
+DenseSet<StringRef> getAssumptions(const CallBase &CB);
+
+/// Appends the set of assumptions \p Assumptions to \F.
+bool addAssumptions(Function &F, const DenseSet<StringRef> &Assumptions);
+
+/// Appends the set of assumptions \p Assumptions to \CB.
+bool addAssumptions(CallBase &CB, const DenseSet<StringRef> &Assumptions);
} // namespace llvm
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index d7bd3edb3d4c..282be640d8be 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -37,7 +37,6 @@ class AttrBuilder;
class AttributeImpl;
class AttributeListImpl;
class AttributeSetNode;
-template<typename T> struct DenseMapInfo;
class FoldingSetNodeID;
class Function;
class LLVMContext;
@@ -78,6 +77,7 @@ public:
TombstoneKey, ///< Use as Tombstone key for DenseMap of AttrKind
};
+ static const unsigned NumIntAttrKinds = LastIntAttr - FirstIntAttr + 1;
static const unsigned NumTypeAttrKinds = LastTypeAttr - FirstTypeAttr + 1;
static bool isEnumAttrKind(AttrKind Kind) {
@@ -265,7 +265,7 @@ inline Attribute unwrap(LLVMAttributeRef Attr) {
/// and removing string or integer attributes involves a FoldingSet lookup.
class AttributeSet {
friend AttributeListImpl;
- template <typename Ty> friend struct DenseMapInfo;
+ template <typename Ty, typename Enable> friend struct DenseMapInfo;
// TODO: Extract AvailableAttrs from AttributeSetNode and store them here.
// This will allow an efficient implementation of addAttribute and
@@ -366,7 +366,7 @@ public:
//===----------------------------------------------------------------------===//
/// \class
/// Provide DenseMapInfo for AttributeSet.
-template <> struct DenseMapInfo<AttributeSet> {
+template <> struct DenseMapInfo<AttributeSet, void> {
static AttributeSet getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
@@ -408,7 +408,7 @@ private:
friend class AttributeListImpl;
friend class AttributeSet;
friend class AttributeSetNode;
- template <typename Ty> friend struct DenseMapInfo;
+ template <typename Ty, typename Enable> friend struct DenseMapInfo;
/// The attributes that we are managing. This can be null to represent
/// the empty attributes list.
@@ -432,8 +432,8 @@ private:
static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets);
- AttributeList setAttributes(LLVMContext &C, unsigned Index,
- AttributeSet Attrs) const;
+ AttributeList setAttributesAtIndex(LLVMContext &C, unsigned Index,
+ AttributeSet Attrs) const;
public:
AttributeList() = default;
@@ -454,32 +454,84 @@ public:
static AttributeList get(LLVMContext &C, unsigned Index,
const AttrBuilder &B);
+ // TODO: remove non-AtIndex versions of these methods.
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeList addAttributeAtIndex(
+ LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
- addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
- StringRef Value = StringRef()) const;
+ addAttributeAtIndex(LLVMContext &C, unsigned Index, StringRef Kind,
+ StringRef Value = StringRef()) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
- Attribute A) const;
+ LLVM_NODISCARD AttributeList addAttributeAtIndex(LLVMContext &C,
+ unsigned Index,
+ Attribute A) const;
/// Add attributes to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) const;
+ LLVM_NODISCARD AttributeList addAttributesAtIndex(LLVMContext &C,
+ unsigned Index,
+ const AttrBuilder &B) const;
+
+ /// Add a function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C,
+ Attribute::AttrKind Kind) const {
+ return addAttributeAtIndex(C, FunctionIndex, Kind);
+ }
+
+ /// Add a function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C,
+ Attribute Attr) const {
+ return addAttributeAtIndex(C, FunctionIndex, Attr);
+ }
+
+ /// Add a function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttribute(
+ LLVMContext &C, StringRef Kind, StringRef Value = StringRef()) const {
+ return addAttributeAtIndex(C, FunctionIndex, Kind, Value);
+ }
+
+ /// Add function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttributes(LLVMContext &C,
+ const AttrBuilder &B) const {
+ return addAttributesAtIndex(C, FunctionIndex, B);
+ }
+
+ /// Add a return value attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C,
+ Attribute::AttrKind Kind) const {
+ return addAttributeAtIndex(C, ReturnIndex, Kind);
+ }
+
+ /// Add a return value attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C,
+ Attribute Attr) const {
+ return addAttributeAtIndex(C, ReturnIndex, Attr);
+ }
+
+ /// Add a return value attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addRetAttributes(LLVMContext &C,
+ const AttrBuilder &B) const {
+ return addAttributesAtIndex(C, ReturnIndex, B);
+ }
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
LLVM_NODISCARD AttributeList addParamAttribute(
LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
- return addAttribute(C, ArgNo + FirstArgIndex, Kind);
+ return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
}
/// Add an argument attribute to the list. Returns a new list because
@@ -487,7 +539,7 @@ public:
LLVM_NODISCARD AttributeList
addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
StringRef Value = StringRef()) const {
- return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
+ return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind, Value);
}
/// Add an attribute to the attribute list at the given arg indices. Returns a
@@ -501,34 +553,87 @@ public:
LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
unsigned ArgNo,
const AttrBuilder &B) const {
- return addAttributes(C, ArgNo + FirstArgIndex, B);
+ return addAttributesAtIndex(C, ArgNo + FirstArgIndex, B);
}
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeList removeAttributeAtIndex(
+ LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const;
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeAttributeAtIndex(LLVMContext &C,
+ unsigned Index,
+ StringRef Kind) const;
LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind) const;
+ StringRef Kind) const {
+ return removeAttributeAtIndex(C, Index, Kind);
+ }
/// Remove the specified attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList removeAttributes(
+ LLVM_NODISCARD AttributeList removeAttributesAtIndex(
LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
/// Remove all attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
- unsigned Index) const;
+ LLVM_NODISCARD AttributeList removeAttributesAtIndex(LLVMContext &C,
+ unsigned Index) const;
+
+ /// Remove the specified attribute at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeFnAttribute(LLVMContext &C, Attribute::AttrKind Kind) const {
+ return removeAttributeAtIndex(C, FunctionIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeFnAttribute(LLVMContext &C,
+ StringRef Kind) const {
+ return removeAttributeAtIndex(C, FunctionIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeFnAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const {
+ return removeAttributesAtIndex(C, FunctionIndex, AttrsToRemove);
+ }
+
+ /// Remove the attributes at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeFnAttributes(LLVMContext &C) const {
+ return removeAttributesAtIndex(C, FunctionIndex);
+ }
+
+ /// Remove the specified attribute at the return value index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const {
+ return removeAttributeAtIndex(C, ReturnIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the return value index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeRetAttribute(LLVMContext &C,
+ StringRef Kind) const {
+ return removeAttributeAtIndex(C, ReturnIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the return value index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeRetAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const {
+ return removeAttributesAtIndex(C, ReturnIndex, AttrsToRemove);
+ }
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttribute(
LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
- return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+ return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
@@ -536,80 +641,55 @@ public:
LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
unsigned ArgNo,
StringRef Kind) const {
- return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+ return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttributes(
LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
- return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
+ return removeAttributesAtIndex(C, ArgNo + FirstArgIndex, AttrsToRemove);
}
/// Remove all attributes at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
unsigned ArgNo) const {
- return removeAttributes(C, ArgNo + FirstArgIndex);
+ return removeAttributesAtIndex(C, ArgNo + FirstArgIndex);
}
/// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih
/// \p ReplacementTy, preserving all other attributes.
- LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C,
- unsigned ArgNo,
- Attribute::AttrKind Kind,
- Type *ReplacementTy) const {
- Attribute Attr = getAttribute(ArgNo, Kind);
- auto Attrs = removeAttribute(C, ArgNo, Kind);
- return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy));
+ LLVM_NODISCARD AttributeList replaceAttributeTypeAtIndex(
+ LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind,
+ Type *ReplacementTy) const {
+ Attribute Attr = getAttributeAtIndex(ArgNo, Kind);
+ auto Attrs = removeAttributeAtIndex(C, ArgNo, Kind);
+ return Attrs.addAttributeAtIndex(C, ArgNo,
+ Attr.getWithNewType(C, ReplacementTy));
}
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// index. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
- unsigned Index,
- uint64_t Bytes) const;
+ LLVM_NODISCARD AttributeList addDereferenceableRetAttr(LLVMContext &C,
+ uint64_t Bytes) const;
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// arg index. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
- LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
- return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
- }
-
- /// Add the dereferenceable_or_null attribute to the attribute set at
- /// the given index. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
- LLVMContext &C, unsigned Index, uint64_t Bytes) const;
+ LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const;
/// Add the dereferenceable_or_null attribute to the attribute set at
/// the given arg index. Returns a new list because attribute lists are
/// immutable.
LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
- LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
- return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
- }
-
- /// Add the allocsize attribute to the attribute set at the given index.
- /// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList
- addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg);
+ LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const;
/// Add the allocsize attribute to the attribute set at the given arg index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg) {
- return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
- }
-
- /// Add the vscale_range attribute to the attribute set at the given index.
- /// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addVScaleRangeAttr(LLVMContext &C,
- unsigned Index,
- unsigned MinValue,
- unsigned MaxValue);
+ const Optional<unsigned> &NumElemsArg);
//===--------------------------------------------------------------------===//
// AttributeList Accessors
@@ -620,48 +700,59 @@ public:
/// The attributes for the argument or parameter at the given index are
/// returned.
- AttributeSet getParamAttributes(unsigned ArgNo) const;
+ AttributeSet getParamAttrs(unsigned ArgNo) const;
/// The attributes for the ret value are returned.
- AttributeSet getRetAttributes() const;
+ AttributeSet getRetAttrs() const;
/// The function attributes are returned.
- AttributeSet getFnAttributes() const;
+ AttributeSet getFnAttrs() const;
/// Return true if the attribute exists at the given index.
- bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+ bool hasAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const;
/// Return true if the attribute exists at the given index.
- bool hasAttribute(unsigned Index, StringRef Kind) const;
+ bool hasAttributeAtIndex(unsigned Index, StringRef Kind) const;
/// Return true if attribute exists at the given index.
- bool hasAttributes(unsigned Index) const;
+ bool hasAttributesAtIndex(unsigned Index) const;
/// Return true if the attribute exists for the given argument
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return hasAttribute(ArgNo + FirstArgIndex, Kind);
+ return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
}
/// Return true if the attribute exists for the given argument
bool hasParamAttr(unsigned ArgNo, StringRef Kind) const {
- return hasAttribute(ArgNo + FirstArgIndex, Kind);
+ return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
}
/// Return true if attributes exists for the given argument
bool hasParamAttrs(unsigned ArgNo) const {
- return hasAttributes(ArgNo + FirstArgIndex);
+ return hasAttributesAtIndex(ArgNo + FirstArgIndex);
+ }
+
+ /// Return true if the attribute exists for the return value.
+ bool hasRetAttr(Attribute::AttrKind Kind) const {
+ return hasAttributeAtIndex(ReturnIndex, Kind);
}
- /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
- /// may be faster.
- bool hasFnAttribute(Attribute::AttrKind Kind) const;
+ /// Return true if the attribute exists for the return value.
+ bool hasRetAttr(StringRef Kind) const {
+ return hasAttributeAtIndex(ReturnIndex, Kind);
+ }
+
+ /// Return true if attributes exist for the return value.
+ bool hasRetAttrs() const { return hasAttributesAtIndex(ReturnIndex); }
+
+ /// Return true if the attribute exists for the function.
+ bool hasFnAttr(Attribute::AttrKind Kind) const;
- /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
- /// may be faster.
- bool hasFnAttribute(StringRef Kind) const;
+ /// Return true if the attribute exists for the function.
+ bool hasFnAttr(StringRef Kind) const;
- /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
- bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+ /// Return true the attributes exist for the function.
+ bool hasFnAttrs() const { return hasAttributesAtIndex(FunctionIndex); }
/// Return true if the specified attribute is set for at least one
/// parameter or for the return value. If Index is not nullptr, the index
@@ -670,19 +761,29 @@ public:
unsigned *Index = nullptr) const;
/// Return the attribute object that exists at the given index.
- Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+ Attribute getAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const;
/// Return the attribute object that exists at the given index.
- Attribute getAttribute(unsigned Index, StringRef Kind) const;
+ Attribute getAttributeAtIndex(unsigned Index, StringRef Kind) const;
/// Return the attribute object that exists at the arg index.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return getAttribute(ArgNo + FirstArgIndex, Kind);
+ return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
}
/// Return the attribute object that exists at the given index.
Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
- return getAttribute(ArgNo + FirstArgIndex, Kind);
+ return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
+ }
+
+ /// Return the attribute object that exists for the function.
+ Attribute getFnAttr(Attribute::AttrKind Kind) const {
+ return getAttributeAtIndex(FunctionIndex, Kind);
+ }
+
+ /// Return the attribute object that exists for the function.
+ Attribute getFnAttr(StringRef Kind) const {
+ return getAttributeAtIndex(FunctionIndex, Kind);
}
/// Return the alignment of the return value.
@@ -712,34 +813,26 @@ public:
/// Return the elementtype type for the specified function parameter.
Type *getParamElementType(unsigned ArgNo) const;
- /// Get the stack alignment.
- MaybeAlign getStackAlignment(unsigned Index) const;
+ /// Get the stack alignment of the function.
+ MaybeAlign getFnStackAlignment() const;
- /// Get the number of dereferenceable bytes (or zero if unknown).
- uint64_t getDereferenceableBytes(unsigned Index) const;
+ /// Get the stack alignment of the return value.
+ MaybeAlign getRetStackAlignment() const;
- /// Get the number of dereferenceable bytes (or zero if unknown) of an
- /// arg.
- uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
- return getDereferenceableBytes(ArgNo + FirstArgIndex);
- }
+ /// Get the number of dereferenceable bytes (or zero if unknown) of the return
+ /// value.
+ uint64_t getRetDereferenceableBytes() const;
- /// Get the number of dereferenceable_or_null bytes (or zero if
- /// unknown).
- uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
+ /// Get the number of dereferenceable bytes (or zero if unknown) of an arg.
+ uint64_t getParamDereferenceableBytes(unsigned Index) const;
- /// Get the number of dereferenceable_or_null bytes (or zero if
- /// unknown) of an arg.
- uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
- return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex);
- }
+ /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of
+ /// the return value.
+ uint64_t getRetDereferenceableOrNullBytes() const;
- /// Get the allocsize argument numbers (or pair(0, 0) if unknown).
- std::pair<unsigned, Optional<unsigned>>
- getAllocSizeArgs(unsigned Index) const;
-
- /// Get the vscale_range argument numbers (or pair(0, 0) if unknown).
- std::pair<unsigned, unsigned> getVScaleRangeArgs(unsigned Index) const;
+ /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of an
+ /// arg.
+ uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
/// Return the attributes at the index as a string.
std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
@@ -758,9 +851,32 @@ public:
unsigned getNumAttrSets() const;
- /// Use these to iterate over the valid attribute indices.
- unsigned index_begin() const { return AttributeList::FunctionIndex; }
- unsigned index_end() const { return getNumAttrSets() - 1; }
+ // Implementation of indexes(). Produces iterators that wrap an index. Mostly
+ // to hide the awkwardness of unsigned wrapping when iterating over valid
+ // indexes.
+ struct index_iterator {
+ unsigned NumAttrSets;
+ index_iterator(int NumAttrSets) : NumAttrSets(NumAttrSets) {}
+ struct int_wrapper {
+ int_wrapper(unsigned i) : i(i) {}
+ unsigned i;
+ unsigned operator*() { return i; }
+ bool operator!=(const int_wrapper &Other) { return i != Other.i; }
+ int_wrapper &operator++() {
+ // This is expected to undergo unsigned wrapping since FunctionIndex is
+ // ~0 and that's where we start.
+ ++i;
+ return *this;
+ }
+ };
+
+ int_wrapper begin() { return int_wrapper(AttributeList::FunctionIndex); }
+
+ int_wrapper end() { return int_wrapper(NumAttrSets - 1); }
+ };
+
+ /// Use this to iterate over the valid attribute indexes.
+ index_iterator indexes() const { return index_iterator(getNumAttrSets()); }
/// operator==/!= - Provide equality predicates.
bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; }
@@ -782,7 +898,7 @@ public:
//===----------------------------------------------------------------------===//
/// \class
/// Provide DenseMapInfo for AttributeList.
-template <> struct DenseMapInfo<AttributeList> {
+template <> struct DenseMapInfo<AttributeList, void> {
static AttributeList getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
@@ -814,14 +930,10 @@ template <> struct DenseMapInfo<AttributeList> {
class AttrBuilder {
std::bitset<Attribute::EndAttrKinds> Attrs;
std::map<SmallString<32>, SmallString<32>, std::less<>> TargetDepAttrs;
- MaybeAlign Alignment;
- MaybeAlign StackAlignment;
- uint64_t DerefBytes = 0;
- uint64_t DerefOrNullBytes = 0;
- uint64_t AllocSizeArgs = 0;
- uint64_t VScaleRangeArgs = 0;
+ std::array<uint64_t, Attribute::NumIntAttrKinds> IntAttrs = {};
std::array<Type *, Attribute::NumTypeAttrKinds> TypeAttrs = {};
+ Optional<unsigned> kindToIntIndex(Attribute::AttrKind Kind) const;
Optional<unsigned> kindToTypeIndex(Attribute::AttrKind Kind) const;
public:
@@ -891,19 +1003,31 @@ public:
/// Return true if the builder has an alignment attribute.
bool hasAlignmentAttr() const;
+ /// Return raw (possibly packed/encoded) value of integer attribute or 0 if
+ /// not set.
+ uint64_t getRawIntAttr(Attribute::AttrKind Kind) const;
+
/// Retrieve the alignment attribute, if it exists.
- MaybeAlign getAlignment() const { return Alignment; }
+ MaybeAlign getAlignment() const {
+ return MaybeAlign(getRawIntAttr(Attribute::Alignment));
+ }
/// Retrieve the stack alignment attribute, if it exists.
- MaybeAlign getStackAlignment() const { return StackAlignment; }
+ MaybeAlign getStackAlignment() const {
+ return MaybeAlign(getRawIntAttr(Attribute::StackAlignment));
+ }
/// Retrieve the number of dereferenceable bytes, if the
/// dereferenceable attribute exists (zero is returned otherwise).
- uint64_t getDereferenceableBytes() const { return DerefBytes; }
+ uint64_t getDereferenceableBytes() const {
+ return getRawIntAttr(Attribute::Dereferenceable);
+ }
/// Retrieve the number of dereferenceable_or_null bytes, if the
/// dereferenceable_or_null attribute exists (zero is returned otherwise).
- uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
+ uint64_t getDereferenceableOrNullBytes() const {
+ return getRawIntAttr(Attribute::DereferenceableOrNull);
+ }
/// Retrieve type for the given type attribute.
Type *getTypeAttr(Attribute::AttrKind Kind) const;
@@ -933,6 +1057,9 @@ public:
/// it doesn't exist, pair(0, 0) is returned.
std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
+ /// Add integer attribute with raw value (packed/encoded if necessary).
+ AttrBuilder &addRawIntAttr(Attribute::AttrKind Kind, uint64_t Value);
+
/// This turns an alignment into the form used internally in Attribute.
/// This call has no effect if Align is not set.
AttrBuilder &addAlignmentAttr(MaybeAlign Align);
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 99b474161df7..de25b51a6292 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -86,6 +86,9 @@ def Dereferenceable : IntAttr<"dereferenceable", [ParamAttr, RetAttr]>;
def DereferenceableOrNull : IntAttr<"dereferenceable_or_null",
[ParamAttr, RetAttr]>;
+/// Do not instrument function with sanitizers.
+def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation", [FnAttr]>;
+
/// Provide pointer element type to intrinsic.
def ElementType : TypeAttr<"elementtype", [ParamAttr]>;
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 0af4ec4ef138..184ddfc01c29 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -167,8 +167,8 @@ public:
/// Returns a pointer to the first instruction in this block that is not a
/// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp
/// is true.
- const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const;
- Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) {
+ const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) const;
+ Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) {
return const_cast<Instruction *>(
static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg(
SkipPseudoOp));
@@ -178,8 +178,8 @@ public:
/// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo
/// operation if \c SkipPseudoOp is true.
const Instruction *
- getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const;
- Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) {
+ getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) const;
+ Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) {
return const_cast<Instruction *>(
static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime(
SkipPseudoOp));
@@ -200,14 +200,14 @@ public:
/// SkipPseudoOp is true.
iterator_range<filter_iterator<BasicBlock::const_iterator,
std::function<bool(const Instruction &)>>>
- instructionsWithoutDebug(bool SkipPseudoOp = false) const;
+ instructionsWithoutDebug(bool SkipPseudoOp = true) const;
/// Return an iterator range over the instructions in the block, skipping any
/// debug instructions. Skip and any pseudo operations as well if \c
/// SkipPseudoOp is true.
iterator_range<
filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>>
- instructionsWithoutDebug(bool SkipPseudoOp = false);
+ instructionsWithoutDebug(bool SkipPseudoOp = true);
/// Return the size of the basic block ignoring debug instructions
filter_iterator<BasicBlock::const_iterator,
diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index 4e2022b36e30..c8999b71f3d1 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -198,6 +198,12 @@ public:
/// hanging off of the globals.
void removeDeadConstantUsers() const;
+ /// Return true if the constant has exactly one live use.
+ ///
+ /// This returns the same result as calling Value::hasOneUse after
+ /// Constant::removeDeadConstantUsers, but doesn't remove dead constants.
+ bool hasOneLiveUse() const;
+
const Constant *stripPointerCasts() const {
return cast<Constant>(Value::stripPointerCasts());
}
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index 44b8c395c89e..fea4d0da1d0d 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -128,6 +128,28 @@ public:
/// NOTE: false does not mean that inverse predicate holds!
bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const;
+ /// Return true iff CR1 ult CR2 is equivalent to CR1 slt CR2.
+ /// Does not depend on strictness/direction of the predicate.
+ static bool
+ areInsensitiveToSignednessOfICmpPredicate(const ConstantRange &CR1,
+ const ConstantRange &CR2);
+
+ /// Return true iff CR1 ult CR2 is equivalent to CR1 sge CR2.
+ /// Does not depend on strictness/direction of the predicate.
+ static bool
+ areInsensitiveToSignednessOfInvertedICmpPredicate(const ConstantRange &CR1,
+ const ConstantRange &CR2);
+
+ /// If the comparison between constant ranges this and Other
+ /// is insensitive to the signedness of the comparison predicate,
+ /// return a predicate equivalent to \p Pred, with flipped signedness
+ /// (i.e. unsigned instead of signed or vice versa), and maybe inverted,
+ /// otherwise returns CmpInst::Predicate::BAD_ICMP_PREDICATE.
+ static CmpInst::Predicate
+ getEquivalentPredWithFlippedSignedness(CmpInst::Predicate Pred,
+ const ConstantRange &CR1,
+ const ConstantRange &CR2);
+
/// Produce the largest range containing all X such that "X BinOp Y" is
/// guaranteed not to wrap (overflow) for *all* Y in Other. However, there may
/// be *some* Y in Other for which additional X not contained in the result
@@ -167,6 +189,11 @@ public:
/// successful.
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const;
+ /// Set up \p Pred, \p RHS and \p Offset such that (V + Offset) Pred RHS
+ /// is true iff V is in the range. Prefers using Offset == 0 if possible.
+ void
+ getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS, APInt &Offset) const;
+
/// Return the lower value for this range.
const APInt &getLower() const { return Lower; }
@@ -305,6 +332,14 @@ public:
ConstantRange unionWith(const ConstantRange &CR,
PreferredRangeType Type = Smallest) const;
+ /// Intersect the two ranges and return the result if it can be represented
+ /// exactly, otherwise return None.
+ Optional<ConstantRange> exactIntersectWith(const ConstantRange &CR) const;
+
+ /// Union the two ranges and return the result if it can be represented
+ /// exactly, otherwise return None.
+ Optional<ConstantRange> exactUnionWith(const ConstantRange &CR) const;
+
/// Return a new range representing the possible values resulting
/// from an application of the specified cast operator to this range. \p
/// BitWidth is the target bitwidth of the cast. For casts which don't
@@ -383,6 +418,11 @@ public:
/// treating both this and \p Other as unsigned ranges.
ConstantRange multiply(const ConstantRange &Other) const;
+ /// Return range of possible values for a signed multiplication of this and
+ /// \p Other. However, if overflow is possible always return a full range
+ /// rather than trying to determine a more precise result.
+ ConstantRange smul_fast(const ConstantRange &Other) const;
+
/// Return a new range representing the possible values resulting
/// from a signed maximum of a value in this range and a value in \p Other.
ConstantRange smax(const ConstantRange &Other) const;
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 1f716a45b70f..71414d95d9a3 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -191,19 +191,19 @@ public:
/// This is just a convenience method to make client code smaller for a
/// common code. It also correctly performs the comparison without the
/// potential for an assertion from getZExtValue().
- bool isZero() const { return Val.isNullValue(); }
+ bool isZero() const { return Val.isZero(); }
/// This is just a convenience method to make client code smaller for a
/// common case. It also correctly performs the comparison without the
/// potential for an assertion from getZExtValue().
/// Determine if the value is one.
- bool isOne() const { return Val.isOneValue(); }
+ bool isOne() const { return Val.isOne(); }
/// This function will return true iff every bit in this constant is set
/// to true.
/// @returns true iff this constant's bits are all set to true.
/// Determine if the value is all ones.
- bool isMinusOne() const { return Val.isAllOnesValue(); }
+ bool isMinusOne() const { return Val.isAllOnes(); }
/// This function will return true iff this constant represents the largest
/// value that may be represented by the constant's type.
@@ -1287,10 +1287,6 @@ public:
/// Return a string representation for an opcode.
const char *getOpcodeName() const;
- /// Return a constant expression identical to this one, but with the specified
- /// operand set to the specified value.
- Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
-
/// This returns the current constant expression with the operands replaced
/// with the specified values. The specified array must have the same number
/// of operands as our current one.
@@ -1312,13 +1308,14 @@ public:
Type *SrcTy = nullptr) const;
/// Returns an Instruction which implements the same operation as this
- /// ConstantExpr. The instruction is not linked to any basic block.
+ /// ConstantExpr. If \p InsertBefore is not null, the new instruction is
+ /// inserted before it, otherwise it is not inserted into any basic block.
///
/// A better approach to this could be to have a constructor for Instruction
/// which would take a ConstantExpr parameter, but that would have spread
/// implementation details of ConstantExpr outside of Constants.cpp, which
/// would make it harder to remove ConstantExprs altogether.
- Instruction *getAsInstruction() const;
+ Instruction *getAsInstruction(Instruction *InsertBefore = nullptr) const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index 23ac47ca4d81..61c6dd885980 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -181,7 +181,7 @@ namespace llvm {
DIFile *File);
/// Create a single enumerator value.
- DIEnumerator *createEnumerator(StringRef Name, APSInt Value);
+ DIEnumerator *createEnumerator(StringRef Name, const APSInt &Value);
DIEnumerator *createEnumerator(StringRef Name, uint64_t Val,
bool IsUnsigned = false);
@@ -219,11 +219,12 @@ namespace llvm {
/// \param AlignInBits Alignment. (optional)
/// \param DWARFAddressSpace DWARF address space. (optional)
/// \param Name Pointer type name. (optional)
- DIDerivedType *createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
- uint32_t AlignInBits = 0,
- Optional<unsigned> DWARFAddressSpace =
- None,
- StringRef Name = "");
+ /// \param Annotations Member annotations.
+ DIDerivedType *
+ createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
+ uint32_t AlignInBits = 0,
+ Optional<unsigned> DWARFAddressSpace = None,
+ StringRef Name = "", DINodeArray Annotations = nullptr);
/// Create debugging information entry for a pointer to member.
/// \param PointeeTy Type pointed to by this pointer.
@@ -250,9 +251,11 @@ namespace llvm {
/// \param LineNo Line number.
/// \param Context The surrounding context for the typedef.
/// \param AlignInBits Alignment. (optional)
+ /// \param Annotations Annotations. (optional)
DIDerivedType *createTypedef(DIType *Ty, StringRef Name, DIFile *File,
unsigned LineNo, DIScope *Context,
- uint32_t AlignInBits = 0);
+ uint32_t AlignInBits = 0,
+ DINodeArray Annotations = nullptr);
/// Create debugging information entry for a 'friend'.
DIDerivedType *createFriend(DIType *Ty, DIType *FriendTy);
@@ -279,12 +282,13 @@ namespace llvm {
/// \param OffsetInBits Member offset.
/// \param Flags Flags to encode member attribute, e.g. private
/// \param Ty Parent type.
+ /// \param Annotations Member annotations.
DIDerivedType *createMemberType(DIScope *Scope, StringRef Name,
DIFile *File, unsigned LineNo,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
+ uint64_t SizeInBits, uint32_t AlignInBits,
uint64_t OffsetInBits,
- DINode::DIFlags Flags, DIType *Ty);
+ DINode::DIFlags Flags, DIType *Ty,
+ DINodeArray Annotations = nullptr);
/// Create debugging information entry for a variant. A variant
/// normally should be a member of a variant part.
@@ -317,10 +321,14 @@ namespace llvm {
/// \param StorageOffsetInBits Member storage offset.
/// \param Flags Flags to encode member attribute.
/// \param Ty Parent type.
- DIDerivedType *createBitFieldMemberType(
- DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
- uint64_t SizeInBits, uint64_t OffsetInBits,
- uint64_t StorageOffsetInBits, DINode::DIFlags Flags, DIType *Ty);
+ /// \param Annotations Member annotations.
+ DIDerivedType *createBitFieldMemberType(DIScope *Scope, StringRef Name,
+ DIFile *File, unsigned LineNo,
+ uint64_t SizeInBits,
+ uint64_t OffsetInBits,
+ uint64_t StorageOffsetInBits,
+ DINode::DIFlags Flags, DIType *Ty,
+ DINodeArray Annotations = nullptr);
/// Create debugging information entry for a
/// C++ static data member.
@@ -586,7 +594,7 @@ namespace llvm {
unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
unsigned RuntimeLang = 0, uint64_t SizeInBits = 0,
uint32_t AlignInBits = 0, DINode::DIFlags Flags = DINode::FlagFwdDecl,
- StringRef UniqueIdentifier = "");
+ StringRef UniqueIdentifier = "", DINodeArray Annotations = nullptr);
/// Retain DIScope* in a module even if it is not referenced
/// through debug info anchors.
@@ -636,7 +644,8 @@ namespace llvm {
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned LineNo, DIType *Ty, bool IsLocalToUnit, bool isDefined = true,
DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
- MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0);
+ MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0,
+ DINodeArray Annotations = nullptr);
/// Identical to createGlobalVariable
/// except that the resulting DbgNode is temporary and meant to be RAUWed.
@@ -682,7 +691,8 @@ namespace llvm {
createParameterVariable(DIScope *Scope, StringRef Name, unsigned ArgNo,
DIFile *File, unsigned LineNo, DIType *Ty,
bool AlwaysPreserve = false,
- DINode::DIFlags Flags = DINode::FlagZero);
+ DINode::DIFlags Flags = DINode::FlagZero,
+ DINodeArray Annotations = nullptr);
/// Create a new descriptor for the specified
/// variable which has a complex address expression for its address.
@@ -711,6 +721,7 @@ namespace llvm {
/// \param SPFlags Additional flags specific to subprograms.
/// \param TParams Function template parameters.
/// \param ThrownTypes Exception types this function may throw.
+ /// \param Annotations Attribute Annotations.
DISubprogram *
createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
DIFile *File, unsigned LineNo, DISubroutineType *Ty,
@@ -718,7 +729,8 @@ namespace llvm {
DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
DITemplateParameterArray TParams = nullptr,
DISubprogram *Decl = nullptr,
- DITypeArray ThrownTypes = nullptr);
+ DITypeArray ThrownTypes = nullptr,
+ DINodeArray Annotations = nullptr);
/// Identical to createFunction,
/// except that the resulting DbgNode is meant to be RAUWed.
@@ -818,29 +830,35 @@ namespace llvm {
unsigned Line, unsigned Col);
/// Create a descriptor for an imported module.
- /// \param Context The scope this module is imported into
- /// \param NS The namespace being imported here.
- /// \param File File where the declaration is located.
- /// \param Line Line number of the declaration.
+ /// \param Context The scope this module is imported into
+ /// \param NS The namespace being imported here.
+ /// \param File File where the declaration is located.
+ /// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS,
- DIFile *File, unsigned Line);
+ DIFile *File, unsigned Line,
+ DINodeArray Elements = nullptr);
/// Create a descriptor for an imported module.
/// \param Context The scope this module is imported into.
/// \param NS An aliased namespace.
/// \param File File where the declaration is located.
/// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedModule(DIScope *Context,
DIImportedEntity *NS, DIFile *File,
- unsigned Line);
+ unsigned Line,
+ DINodeArray Elements = nullptr);
/// Create a descriptor for an imported module.
- /// \param Context The scope this module is imported into.
- /// \param M The module being imported here
- /// \param File File where the declaration is located.
- /// \param Line Line number of the declaration.
+ /// \param Context The scope this module is imported into.
+ /// \param M The module being imported here
+ /// \param File File where the declaration is located.
+ /// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M,
- DIFile *File, unsigned Line);
+ DIFile *File, unsigned Line,
+ DINodeArray Elements = nullptr);
/// Create a descriptor for an imported function.
/// \param Context The scope this module is imported into.
@@ -848,9 +866,11 @@ namespace llvm {
/// variable.
/// \param File File where the declaration is located.
/// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl,
DIFile *File, unsigned Line,
- StringRef Name = "");
+ StringRef Name = "",
+ DINodeArray Elements = nullptr);
/// Insert a new llvm.dbg.declare intrinsic call.
/// \param Storage llvm::Value of the variable
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 300f73c12df0..46acd403bef1 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -19,6 +19,7 @@
#ifndef LLVM_IR_DATALAYOUT_H
#define LLVM_IR_DATALAYOUT_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -135,6 +136,7 @@ private:
MM_MachO,
MM_WinCOFF,
MM_WinCOFFX86,
+ MM_GOFF,
MM_Mips,
MM_XCOFF
};
@@ -316,6 +318,7 @@ public:
switch (ManglingMode) {
case MM_None:
case MM_ELF:
+ case MM_GOFF:
case MM_Mips:
case MM_WinCOFF:
case MM_XCOFF:
@@ -334,6 +337,8 @@ public:
case MM_ELF:
case MM_WinCOFF:
return ".L";
+ case MM_GOFF:
+ return "@";
case MM_Mips:
return "$";
case MM_MachO:
@@ -372,8 +377,8 @@ public:
/// the backends/clients are updated.
unsigned getPointerSize(unsigned AS = 0) const;
- /// Returns the maximum pointer size over all address spaces.
- unsigned getMaxPointerSize() const;
+ /// Returns the maximum index size over all address spaces.
+ unsigned getMaxIndexSize() const;
// Index size used for address calculation.
unsigned getIndexSize(unsigned AS) const;
@@ -405,9 +410,9 @@ public:
return getPointerSize(AS) * 8;
}
- /// Returns the maximum pointer size over all address spaces.
- unsigned getMaxPointerSizeInBits() const {
- return getMaxPointerSize() * 8;
+ /// Returns the maximum index size over all address spaces.
+ unsigned getMaxIndexSizeInBits() const {
+ return getMaxIndexSize() * 8;
}
/// Size in bits of index used for address calculation in getelementptr.
@@ -514,7 +519,7 @@ public:
/// Returns the minimum ABI-required alignment for the specified type.
/// FIXME: Deprecate this function once migration to Align is over.
- unsigned getABITypeAlignment(Type *Ty) const;
+ uint64_t getABITypeAlignment(Type *Ty) const;
/// Returns the minimum ABI-required alignment for the specified type.
Align getABITypeAlign(Type *Ty) const;
@@ -537,7 +542,7 @@ public:
///
/// This is always at least as good as the ABI alignment.
/// FIXME: Deprecate this function once migration to Align is over.
- unsigned getPrefTypeAlignment(Type *Ty) const;
+ uint64_t getPrefTypeAlignment(Type *Ty) const;
/// Returns the preferred stack/global alignment for the specified
/// type.
@@ -579,6 +584,10 @@ public:
/// This is used to implement getelementptr.
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef<Value *> Indices) const;
+ /// Get GEP indices to access Offset inside ElemTy. ElemTy is updated to be
+ /// the result element type and Offset to be the residual offset.
+ SmallVector<APInt> getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const;
+
/// Returns a StructLayout object, indicating the alignment of the
/// struct, its size, and the offsets of its fields.
///
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index eba422a9fde6..730c69d0c622 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -106,8 +106,6 @@ public:
void reset();
private:
- void InitializeTypeMap(const Module &M);
-
void processCompileUnit(DICompileUnit *CU);
void processScope(DIScope *Scope);
void processType(DIType *DT);
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 20a032f04909..c04f07c534af 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -256,11 +256,13 @@ class GenericDINode : public DINode {
public:
unsigned getHash() const { return SubclassData32; }
- DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, StringRef Header,
- ArrayRef<Metadata *> DwarfOps),
+ DEFINE_MDNODE_GET(GenericDINode,
+ (unsigned Tag, StringRef Header,
+ ArrayRef<Metadata *> DwarfOps),
(Tag, Header, DwarfOps))
- DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, MDString *Header,
- ArrayRef<Metadata *> DwarfOps),
+ DEFINE_MDNODE_GET(GenericDINode,
+ (unsigned Tag, MDString *Header,
+ ArrayRef<Metadata *> DwarfOps),
(Tag, Header, DwarfOps))
/// Return a (temporary) clone of this.
@@ -324,7 +326,7 @@ public:
DEFINE_MDNODE_GET(DISubrange, (int64_t Count, int64_t LowerBound = 0),
(Count, LowerBound))
- DEFINE_MDNODE_GET(DISubrange, (Metadata *CountNode, int64_t LowerBound = 0),
+ DEFINE_MDNODE_GET(DISubrange, (Metadata * CountNode, int64_t LowerBound = 0),
(CountNode, LowerBound))
DEFINE_MDNODE_GET(DISubrange,
@@ -334,9 +336,7 @@ public:
TempDISubrange clone() const { return cloneImpl(); }
- Metadata *getRawCountNode() const {
- return getOperand(0).get();
- }
+ Metadata *getRawCountNode() const { return getOperand(0).get(); }
Metadata *getRawLowerBound() const { return getOperand(1).get(); }
@@ -548,14 +548,13 @@ public:
};
/// A single checksum, represented by a \a Kind and a \a Value (a string).
- template <typename T>
- struct ChecksumInfo {
+ template <typename T> struct ChecksumInfo {
/// The kind of checksum which \a Value encodes.
ChecksumKind Kind;
/// The string value of the checksum.
T Value;
- ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) { }
+ ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) {}
~ChecksumInfo() = default;
bool operator==(const ChecksumInfo<T> &X) const {
return Kind == X.Kind && Value == X.Value;
@@ -578,15 +577,17 @@ private:
static DIFile *getImpl(LLVMContext &Context, StringRef Filename,
StringRef Directory,
Optional<ChecksumInfo<StringRef>> CS,
- Optional<StringRef> Source,
- StorageType Storage, bool ShouldCreate = true) {
+ Optional<StringRef> Source, StorageType Storage,
+ bool ShouldCreate = true) {
Optional<ChecksumInfo<MDString *>> MDChecksum;
if (CS)
MDChecksum.emplace(CS->Kind, getCanonicalMDString(Context, CS->Value));
- return getImpl(Context, getCanonicalMDString(Context, Filename),
- getCanonicalMDString(Context, Directory), MDChecksum,
- Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source)) : None,
- Storage, ShouldCreate);
+ return getImpl(
+ Context, getCanonicalMDString(Context, Filename),
+ getCanonicalMDString(Context, Directory), MDChecksum,
+ Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source))
+ : None,
+ Storage, ShouldCreate);
}
static DIFile *getImpl(LLVMContext &Context, MDString *Filename,
MDString *Directory,
@@ -600,13 +601,15 @@ private:
}
public:
- DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory,
- Optional<ChecksumInfo<StringRef>> CS = None,
- Optional<StringRef> Source = None),
+ DEFINE_MDNODE_GET(DIFile,
+ (StringRef Filename, StringRef Directory,
+ Optional<ChecksumInfo<StringRef>> CS = None,
+ Optional<StringRef> Source = None),
(Filename, Directory, CS, Source))
- DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory,
- Optional<ChecksumInfo<MDString *>> CS = None,
- Optional<MDString *> Source = None),
+ DEFINE_MDNODE_GET(DIFile,
+ (MDString * Filename, MDString *Directory,
+ Optional<ChecksumInfo<MDString *>> CS = None,
+ Optional<MDString *> Source = None),
(Filename, Directory, CS, Source))
TempDIFile clone() const { return cloneImpl(); }
@@ -707,7 +710,6 @@ public:
DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
StringRef getName() const { return getStringOperand(2); }
-
Metadata *getRawScope() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -936,47 +938,48 @@ class DIDerivedType : public DIType {
unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
- Metadata *ExtraData, StorageType Storage, bool ShouldCreate = true) {
+ Metadata *ExtraData, DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
- DWARFAddressSpace, Flags, ExtraData, Storage, ShouldCreate);
- }
- static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
- MDString *Name, Metadata *File, unsigned Line,
- Metadata *Scope, Metadata *BaseType,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits,
- Optional<unsigned> DWARFAddressSpace,
- DIFlags Flags, Metadata *ExtraData,
- StorageType Storage, bool ShouldCreate = true);
+ DWARFAddressSpace, Flags, ExtraData, Annotations.get(),
+ Storage, ShouldCreate);
+ }
+ static DIDerivedType *
+ getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
+ unsigned Line, Metadata *Scope, Metadata *BaseType,
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+ Metadata *ExtraData, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate = true);
TempDIDerivedType cloneImpl() const {
- return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
- getScope(), getBaseType(), getSizeInBits(),
- getAlignInBits(), getOffsetInBits(),
- getDWARFAddressSpace(), getFlags(), getExtraData());
+ return getTemporary(
+ getContext(), getTag(), getName(), getFile(), getLine(), getScope(),
+ getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(),
+ getDWARFAddressSpace(), getFlags(), getExtraData(), getAnnotations());
}
public:
- DEFINE_MDNODE_GET(DIDerivedType,
- (unsigned Tag, MDString *Name, Metadata *File,
- unsigned Line, Metadata *Scope, Metadata *BaseType,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits,
- Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
- Metadata *ExtraData = nullptr),
- (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
- AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
- ExtraData))
+ DEFINE_MDNODE_GET(
+ DIDerivedType,
+ (unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
+ Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+ uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+ Metadata *ExtraData = nullptr, Metadata *Annotations = nullptr),
+ (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
+ OffsetInBits, DWARFAddressSpace, Flags, ExtraData, Annotations))
DEFINE_MDNODE_GET(DIDerivedType,
(unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
- Metadata *ExtraData = nullptr),
+ Metadata *ExtraData = nullptr,
+ DINodeArray Annotations = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
- ExtraData))
+ ExtraData, Annotations))
TempDIDerivedType clone() const { return cloneImpl(); }
@@ -999,6 +1002,12 @@ public:
Metadata *getExtraData() const { return getRawExtraData(); }
Metadata *getRawExtraData() const { return getOperand(4); }
+ /// Get annotations associated with this derived type.
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
+ Metadata *getRawAnnotations() const { return getOperand(5); }
+
/// Get casted version of extra data.
/// @{
DIType *getClassType() const {
@@ -1065,8 +1074,8 @@ class DICompositeType : public DIType {
/// Change fields in place.
void mutate(unsigned Tag, unsigned Line, unsigned RuntimeLang,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits, DIFlags Flags) {
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ DIFlags Flags) {
assert(isDistinct() && "Only distinct nodes can mutate");
assert(getRawIdentifier() && "Only ODR-uniqued nodes should mutate");
this->RuntimeLang = RuntimeLang;
@@ -1081,13 +1090,14 @@ class DICompositeType : public DIType {
DITemplateParameterArray TemplateParams, StringRef Identifier,
DIDerivedType *Discriminator, Metadata *DataLocation,
Metadata *Associated, Metadata *Allocated, Metadata *Rank,
- StorageType Storage, bool ShouldCreate = true) {
+ DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(
Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
RuntimeLang, VTableHolder, TemplateParams.get(),
getCanonicalMDString(Context, Identifier), Discriminator, DataLocation,
- Associated, Allocated, Rank, Storage, ShouldCreate);
+ Associated, Allocated, Rank, Annotations.get(), Storage, ShouldCreate);
}
static DICompositeType *
getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -1097,16 +1107,16 @@ class DICompositeType : public DIType {
Metadata *VTableHolder, Metadata *TemplateParams,
MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation,
Metadata *Associated, Metadata *Allocated, Metadata *Rank,
- StorageType Storage, bool ShouldCreate = true);
+ Metadata *Annotations, StorageType Storage, bool ShouldCreate = true);
TempDICompositeType cloneImpl() const {
- return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
- getScope(), getBaseType(), getSizeInBits(),
- getAlignInBits(), getOffsetInBits(), getFlags(),
- getElements(), getRuntimeLang(), getVTableHolder(),
- getTemplateParams(), getIdentifier(),
- getDiscriminator(), getRawDataLocation(),
- getRawAssociated(), getRawAllocated(), getRawRank());
+ return getTemporary(
+ getContext(), getTag(), getName(), getFile(), getLine(), getScope(),
+ getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(),
+ getFlags(), getElements(), getRuntimeLang(), getVTableHolder(),
+ getTemplateParams(), getIdentifier(), getDiscriminator(),
+ getRawDataLocation(), getRawAssociated(), getRawAllocated(),
+ getRawRank(), getAnnotations());
}
public:
@@ -1119,10 +1129,12 @@ public:
DITemplateParameterArray TemplateParams = nullptr,
StringRef Identifier = "", DIDerivedType *Discriminator = nullptr,
Metadata *DataLocation = nullptr, Metadata *Associated = nullptr,
- Metadata *Allocated = nullptr, Metadata *Rank = nullptr),
+ Metadata *Allocated = nullptr, Metadata *Rank = nullptr,
+ DINodeArray Annotations = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank,
+ Annotations))
DEFINE_MDNODE_GET(
DICompositeType,
(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
@@ -1132,10 +1144,11 @@ public:
Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr,
Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr,
Metadata *Associated = nullptr, Metadata *Allocated = nullptr,
- Metadata *Rank = nullptr),
+ Metadata *Rank = nullptr, Metadata *Annotations = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank,
+ Annotations))
TempDICompositeType clone() const { return cloneImpl(); }
@@ -1154,7 +1167,7 @@ public:
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank);
+ Metadata *Rank, Metadata *Annotations);
static DICompositeType *getODRTypeIfExists(LLVMContext &Context,
MDString &Identifier);
@@ -1175,7 +1188,7 @@ public:
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated,
- Metadata *Allocated, Metadata *Rank);
+ Metadata *Allocated, Metadata *Rank, Metadata *Annotations);
DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
DINodeArray getElements() const {
@@ -1196,7 +1209,9 @@ public:
Metadata *getRawTemplateParams() const { return getOperand(6); }
MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
Metadata *getRawDiscriminator() const { return getOperand(8); }
- DIDerivedType *getDiscriminator() const { return getOperandAs<DIDerivedType>(8); }
+ DIDerivedType *getDiscriminator() const {
+ return getOperandAs<DIDerivedType>(8);
+ }
Metadata *getRawDataLocation() const { return getOperand(9); }
DIVariable *getDataLocation() const {
return dyn_cast_or_null<DIVariable>(getRawDataLocation());
@@ -1228,6 +1243,11 @@ public:
return dyn_cast_or_null<DIExpression>(getRawRank());
}
+ Metadata *getRawAnnotations() const { return getOperand(13); }
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
+
/// Replace operands.
///
/// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
@@ -1507,9 +1527,7 @@ public:
void replaceEnumTypes(DICompositeTypeArray N) {
replaceOperandWith(4, N.get());
}
- void replaceRetainedTypes(DITypeArray N) {
- replaceOperandWith(5, N.get());
- }
+ void replaceRetainedTypes(DITypeArray N) { replaceOperandWith(5, N.get()); }
void replaceGlobalVariables(DIGlobalVariableExpressionArray N) {
replaceOperandWith(6, N.get());
}
@@ -1691,7 +1709,8 @@ public:
/// base discriminator is set in the new DILocation, the other encoded values
/// are elided.
/// If the discriminator cannot be encoded, the function returns None.
- inline Optional<const DILocation *> cloneWithBaseDiscriminator(unsigned BD) const;
+ inline Optional<const DILocation *>
+ cloneWithBaseDiscriminator(unsigned BD) const;
/// Returns the duplication factor stored in the discriminator, or 1 if no
/// duplication factor (or 0) is encoded.
@@ -1707,7 +1726,8 @@ public:
/// duplication factor encoded in the discriminator. The current duplication
/// factor is as defined by getDuplicationFactor().
/// Returns None if encoding failed.
- inline Optional<const DILocation *> cloneByMultiplyingDuplicationFactor(unsigned DF) const;
+ inline Optional<const DILocation *>
+ cloneByMultiplyingDuplicationFactor(unsigned DF) const;
/// When two instructions are combined into a single instruction we also
/// need to combine the original locations into a single location.
@@ -1730,8 +1750,8 @@ public:
/// This function applies getMergedLocation() repeatedly left-to-right.
///
/// \p Locs: The locations to be merged.
- static
- const DILocation *getMergedLocations(ArrayRef<const DILocation *> Locs);
+ static const DILocation *
+ getMergedLocations(ArrayRef<const DILocation *> Locs);
/// Return the masked discriminator value for an input discrimnator value D
/// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
@@ -1755,13 +1775,18 @@ public:
/// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
/// have certain special case behavior (e.g. treating empty duplication factor
/// as the value '1').
- /// This API, in conjunction with cloneWithDiscriminator, may be used to encode
- /// the raw values provided. \p BD: base discriminator \p DF: duplication factor
+ /// This API, in conjunction with cloneWithDiscriminator, may be used to
+ /// encode the raw values provided.
+ ///
+ /// \p BD: base discriminator
+ /// \p DF: duplication factor
/// \p CI: copy index
+ ///
/// The return is None if the values cannot be encoded in 32 bits - for
- /// example, values for BD or DF larger than 12 bits. Otherwise, the return
- /// is the encoded value.
- static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI);
+ /// example, values for BD or DF larger than 12 bits. Otherwise, the return is
+ /// the encoded value.
+ static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
+ unsigned CI);
/// Raw decoder for values in an encoded discriminator D.
static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
@@ -1781,11 +1806,10 @@ public:
/// Returns the copy identifier for a given encoded discriminator \p D.
static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
- return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(
- getNextComponentInDiscriminator(D)));
+ return getUnsignedFromPrefixEncoding(
+ getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
}
-
Metadata *getRawScope() const { return getOperand(0); }
Metadata *getRawInlinedAt() const {
if (getNumOperands() == 2)
@@ -1839,10 +1863,10 @@ public:
unsigned Virtuality = SPFlagNonvirtual,
bool IsMainSubprogram = false) {
// We're assuming virtuality is the low-order field.
- static_assert(
- int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
- int(SPFlagPureVirtual) == int(dwarf::DW_VIRTUALITY_pure_virtual),
- "Virtuality constant mismatch");
+ static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
+ int(SPFlagPureVirtual) ==
+ int(dwarf::DW_VIRTUALITY_pure_virtual),
+ "Virtuality constant mismatch");
return static_cast<DISPFlags>(
(Virtuality & SPFlagVirtuality) |
(IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
@@ -1874,23 +1898,23 @@ private:
DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
DINodeArray RetainedNodes, DITypeArray ThrownTypes,
- StorageType Storage, bool ShouldCreate = true) {
+ DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
ScopeLine, ContainingType, VirtualIndex, ThisAdjustment,
Flags, SPFlags, Unit, TemplateParams.get(), Declaration,
- RetainedNodes.get(), ThrownTypes.get(), Storage,
- ShouldCreate);
+ RetainedNodes.get(), ThrownTypes.get(), Annotations.get(),
+ Storage, ShouldCreate);
}
- static DISubprogram *getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, MDString *LinkageName,
- Metadata *File, unsigned Line, Metadata *Type,
- unsigned ScopeLine, Metadata *ContainingType,
- unsigned VirtualIndex, int ThisAdjustment,
- DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
- Metadata *TemplateParams, Metadata *Declaration,
- Metadata *RetainedNodes, Metadata *ThrownTypes,
- StorageType Storage, bool ShouldCreate = true);
+ static DISubprogram *
+ getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
+ unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
+ int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+ Metadata *TemplateParams, Metadata *Declaration,
+ Metadata *RetainedNodes, Metadata *ThrownTypes, Metadata *Annotations,
+ StorageType Storage, bool ShouldCreate = true);
TempDISubprogram cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
@@ -1898,7 +1922,7 @@ private:
getContainingType(), getVirtualIndex(),
getThisAdjustment(), getFlags(), getSPFlags(),
getUnit(), getTemplateParams(), getDeclaration(),
- getRetainedNodes(), getThrownTypes());
+ getRetainedNodes(), getThrownTypes(), getAnnotations());
}
public:
@@ -1910,10 +1934,10 @@ public:
DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams = nullptr,
DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
- DITypeArray ThrownTypes = nullptr),
+ DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr),
(Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
- Declaration, RetainedNodes, ThrownTypes))
+ Declaration, RetainedNodes, ThrownTypes, Annotations))
DEFINE_MDNODE_GET(
DISubprogram,
@@ -1922,10 +1946,11 @@ public:
Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
- Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr),
+ Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr,
+ Metadata *Annotations = nullptr),
(Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
- Declaration, RetainedNodes, ThrownTypes))
+ Declaration, RetainedNodes, ThrownTypes, Annotations))
TempDISubprogram clone() const { return cloneImpl(); }
@@ -1942,7 +1967,10 @@ public:
unsigned getVirtualIndex() const { return VirtualIndex; }
int getThisAdjustment() const { return ThisAdjustment; }
unsigned getScopeLine() const { return ScopeLine; }
- void setScopeLine(unsigned L) { assert(isDistinct()); ScopeLine = L; }
+ void setScopeLine(unsigned L) {
+ assert(isDistinct());
+ ScopeLine = L;
+ }
DIFlags getFlags() const { return Flags; }
DISPFlags getSPFlags() const { return SPFlags; }
bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
@@ -2028,6 +2056,9 @@ public:
DITypeArray getThrownTypes() const {
return cast_or_null<MDTuple>(getRawThrownTypes());
}
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
Metadata *getRawScope() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2045,6 +2076,9 @@ public:
Metadata *getRawThrownTypes() const {
return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr;
}
+ Metadata *getRawAnnotations() const {
+ return getNumOperands() > 11 ? getOperandAs<Metadata>(11) : nullptr;
+ }
void replaceRawLinkageName(MDString *LinkageName) {
replaceOperandWith(3, LinkageName);
@@ -2112,11 +2146,13 @@ class DILexicalBlock : public DILexicalBlockBase {
}
public:
- DEFINE_MDNODE_GET(DILexicalBlock, (DILocalScope * Scope, DIFile *File,
- unsigned Line, unsigned Column),
+ DEFINE_MDNODE_GET(DILexicalBlock,
+ (DILocalScope * Scope, DIFile *File, unsigned Line,
+ unsigned Column),
(Scope, File, Line, Column))
- DEFINE_MDNODE_GET(DILexicalBlock, (Metadata * Scope, Metadata *File,
- unsigned Line, unsigned Column),
+ DEFINE_MDNODE_GET(DILexicalBlock,
+ (Metadata * Scope, Metadata *File, unsigned Line,
+ unsigned Column),
(Scope, File, Line, Column))
TempDILexicalBlock clone() const { return cloneImpl(); }
@@ -2161,8 +2197,9 @@ class DILexicalBlockFile : public DILexicalBlockBase {
}
public:
- DEFINE_MDNODE_GET(DILexicalBlockFile, (DILocalScope * Scope, DIFile *File,
- unsigned Discriminator),
+ DEFINE_MDNODE_GET(DILexicalBlockFile,
+ (DILocalScope * Scope, DIFile *File,
+ unsigned Discriminator),
(Scope, File, Discriminator))
DEFINE_MDNODE_GET(DILexicalBlockFile,
(Metadata * Scope, Metadata *File, unsigned Discriminator),
@@ -2212,7 +2249,8 @@ unsigned DILocation::getCopyIdentifier() const {
return getCopyIdentifierFromDiscriminator(getDiscriminator());
}
-Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *>
+DILocation::cloneWithBaseDiscriminator(unsigned D) const {
unsigned BD, DF, CI;
if (EnableFSDiscriminator) {
@@ -2230,7 +2268,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
return None;
}
-Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *>
+DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
DF *= getDuplicationFactor();
@@ -2274,10 +2313,10 @@ class DINamespace : public DIScope {
public:
DEFINE_MDNODE_GET(DINamespace,
- (DIScope *Scope, StringRef Name, bool ExportSymbols),
+ (DIScope * Scope, StringRef Name, bool ExportSymbols),
(Scope, Name, ExportSymbols))
DEFINE_MDNODE_GET(DINamespace,
- (Metadata *Scope, MDString *Name, bool ExportSymbols),
+ (Metadata * Scope, MDString *Name, bool ExportSymbols),
(Scope, Name, ExportSymbols))
TempDINamespace clone() const { return cloneImpl(); }
@@ -2426,7 +2465,7 @@ public:
(StringRef Name, DIType *Type, bool IsDefault),
(Name, Type, IsDefault))
DEFINE_MDNODE_GET(DITemplateTypeParameter,
- (MDString *Name, Metadata *Type, bool IsDefault),
+ (MDString * Name, Metadata *Type, bool IsDefault),
(Name, Type, IsDefault))
TempDITemplateTypeParameter clone() const { return cloneImpl(); }
@@ -2819,7 +2858,8 @@ public:
/// \param OffsetInBits Offset of the piece in bits.
/// \param SizeInBits Size of the piece in bits.
/// \return Creating a fragment expression may fail if \c Expr
- /// contains arithmetic operations that would be truncated.
+ /// contains arithmetic operations that would be
+ /// truncated.
static Optional<DIExpression *>
createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits,
unsigned SizeInBits);
@@ -2876,6 +2916,12 @@ public:
return getNumElements() > 0 &&
getElement(0) == dwarf::DW_OP_LLVM_entry_value;
}
+
+ /// Try to shorten an expression with an initial constant operand.
+ /// Returns a new expression and constant on success, or the original
+ /// expression and constant on failure.
+ std::pair<DIExpression *, const ConstantInt *>
+ constantFold(const ConstantInt *CI);
};
inline bool operator==(const DIExpression::FragmentInfo &A,
@@ -2927,46 +2973,47 @@ class DIGlobalVariable : public DIVariable {
StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type,
bool IsLocalToUnit, bool IsDefinition,
DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
- uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
+ uint32_t AlignInBits, DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration,
- cast_or_null<Metadata>(TemplateParams), AlignInBits, Storage,
- ShouldCreate);
+ cast_or_null<Metadata>(TemplateParams), AlignInBits,
+ Annotations.get(), Storage, ShouldCreate);
}
static DIGlobalVariable *
getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
- uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true);
+ uint32_t AlignInBits, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate = true);
TempDIGlobalVariable cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
getFile(), getLine(), getType(), isLocalToUnit(),
isDefinition(), getStaticDataMemberDeclaration(),
- getTemplateParams(), getAlignInBits());
+ getTemplateParams(), getAlignInBits(),
+ getAnnotations());
}
public:
- DEFINE_MDNODE_GET(DIGlobalVariable,
- (DIScope * Scope, StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned Line, DIType *Type,
- bool IsLocalToUnit, bool IsDefinition,
- DIDerivedType *StaticDataMemberDeclaration,
- MDTuple *TemplateParams, uint32_t AlignInBits),
- (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, StaticDataMemberDeclaration, TemplateParams,
- AlignInBits))
- DEFINE_MDNODE_GET(DIGlobalVariable,
- (Metadata * Scope, MDString *Name, MDString *LinkageName,
- Metadata *File, unsigned Line, Metadata *Type,
- bool IsLocalToUnit, bool IsDefinition,
- Metadata *StaticDataMemberDeclaration,
- Metadata *TemplateParams, uint32_t AlignInBits),
- (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, StaticDataMemberDeclaration, TemplateParams,
- AlignInBits))
+ DEFINE_MDNODE_GET(
+ DIGlobalVariable,
+ (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+ unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition,
+ DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
+ uint32_t AlignInBits, DINodeArray Annotations),
+ (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations))
+ DEFINE_MDNODE_GET(
+ DIGlobalVariable,
+ (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
+ unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
+ Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+ uint32_t AlignInBits, Metadata *Annotations),
+ (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations))
TempDIGlobalVariable clone() const { return cloneImpl(); }
@@ -2977,11 +3024,15 @@ public:
DIDerivedType *getStaticDataMemberDeclaration() const {
return cast_or_null<DIDerivedType>(getRawStaticDataMemberDeclaration());
}
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); }
Metadata *getRawTemplateParams() const { return getOperand(7); }
MDTuple *getTemplateParams() const { return getOperandAs<MDTuple>(7); }
+ Metadata *getRawAnnotations() const { return getOperand(8); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DIGlobalVariableKind;
@@ -2997,20 +3048,20 @@ class DICommonBlock : public DIScope {
DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo,
ArrayRef<Metadata *> Ops)
: DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
- Ops), LineNo(LineNo) {}
+ Ops),
+ LineNo(LineNo) {}
static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope,
DIGlobalVariable *Decl, StringRef Name,
DIFile *File, unsigned LineNo,
- StorageType Storage,
- bool ShouldCreate = true) {
+ StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Scope, Decl, getCanonicalMDString(Context, Name),
File, LineNo, Storage, ShouldCreate);
}
static DICommonBlock *getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *Decl, MDString *Name, Metadata *File,
- unsigned LineNo,
- StorageType Storage, bool ShouldCreate = true);
+ unsigned LineNo, StorageType Storage,
+ bool ShouldCreate = true);
TempDICommonBlock cloneImpl() const {
return getTemporary(getContext(), getScope(), getDecl(), getName(),
@@ -3019,11 +3070,11 @@ class DICommonBlock : public DIScope {
public:
DEFINE_MDNODE_GET(DICommonBlock,
- (DIScope *Scope, DIGlobalVariable *Decl, StringRef Name,
+ (DIScope * Scope, DIGlobalVariable *Decl, StringRef Name,
DIFile *File, unsigned LineNo),
(Scope, Decl, Name, File, LineNo))
DEFINE_MDNODE_GET(DICommonBlock,
- (Metadata *Scope, Metadata *Decl, MDString *Name,
+ (Metadata * Scope, Metadata *Decl, MDString *Name,
Metadata *File, unsigned LineNo),
(Scope, Decl, Name, File, LineNo))
@@ -3069,34 +3120,39 @@ class DILocalVariable : public DIVariable {
static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
StringRef Name, DIFile *File, unsigned Line,
DIType *Type, unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits, StorageType Storage,
+ uint32_t AlignInBits, DINodeArray Annotations,
+ StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
- Line, Type, Arg, Flags, AlignInBits, Storage, ShouldCreate);
+ Line, Type, Arg, Flags, AlignInBits, Annotations.get(),
+ Storage, ShouldCreate);
}
static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, Metadata *File, unsigned Line,
Metadata *Type, unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits, StorageType Storage,
+ uint32_t AlignInBits, Metadata *Annotations,
+ StorageType Storage,
bool ShouldCreate = true);
TempDILocalVariable cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getFile(),
getLine(), getType(), getArg(), getFlags(),
- getAlignInBits());
+ getAlignInBits(), getAnnotations());
}
public:
DEFINE_MDNODE_GET(DILocalVariable,
(DILocalScope * Scope, StringRef Name, DIFile *File,
unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits),
- (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+ uint32_t AlignInBits, DINodeArray Annotations),
+ (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits,
+ Annotations))
DEFINE_MDNODE_GET(DILocalVariable,
(Metadata * Scope, MDString *Name, Metadata *File,
- unsigned Line, Metadata *Type, unsigned Arg,
- DIFlags Flags, uint32_t AlignInBits),
- (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+ unsigned Line, Metadata *Type, unsigned Arg, DIFlags Flags,
+ uint32_t AlignInBits, Metadata *Annotations),
+ (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits,
+ Annotations))
TempDILocalVariable clone() const { return cloneImpl(); }
@@ -3111,6 +3167,11 @@ public:
unsigned getArg() const { return Arg; }
DIFlags getFlags() const { return Flags; }
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
+ Metadata *getRawAnnotations() const { return getOperand(4); }
+
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
@@ -3141,16 +3202,14 @@ class DILabel : public DINode {
: DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
~DILabel() = default;
- static DILabel *getImpl(LLVMContext &Context, DIScope *Scope,
- StringRef Name, DIFile *File, unsigned Line,
- StorageType Storage,
+ static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
+ DIFile *File, unsigned Line, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
Line, Storage, ShouldCreate);
}
- static DILabel *getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, Metadata *File, unsigned Line,
- StorageType Storage,
+ static DILabel *getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate = true);
TempDILabel cloneImpl() const {
@@ -3295,31 +3354,33 @@ class DIImportedEntity : public DINode {
static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
DIScope *Scope, DINode *Entity, DIFile *File,
unsigned Line, StringRef Name,
- StorageType Storage,
+ DINodeArray Elements, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Tag, Scope, Entity, File, Line,
- getCanonicalMDString(Context, Name), Storage, ShouldCreate);
+ getCanonicalMDString(Context, Name), Elements.get(), Storage,
+ ShouldCreate);
}
- static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
- Metadata *Scope, Metadata *Entity,
- Metadata *File, unsigned Line,
- MDString *Name, StorageType Storage,
- bool ShouldCreate = true);
+ static DIImportedEntity *
+ getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity,
+ Metadata *File, unsigned Line, MDString *Name, Metadata *Elements,
+ StorageType Storage, bool ShouldCreate = true);
TempDIImportedEntity cloneImpl() const {
return getTemporary(getContext(), getTag(), getScope(), getEntity(),
- getFile(), getLine(), getName());
+ getFile(), getLine(), getName(), getElements());
}
public:
DEFINE_MDNODE_GET(DIImportedEntity,
(unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File,
- unsigned Line, StringRef Name = ""),
- (Tag, Scope, Entity, File, Line, Name))
+ unsigned Line, StringRef Name = "",
+ DINodeArray Elements = nullptr),
+ (Tag, Scope, Entity, File, Line, Name, Elements))
DEFINE_MDNODE_GET(DIImportedEntity,
(unsigned Tag, Metadata *Scope, Metadata *Entity,
- Metadata *File, unsigned Line, MDString *Name),
- (Tag, Scope, Entity, File, Line, Name))
+ Metadata *File, unsigned Line, MDString *Name,
+ Metadata *Elements = nullptr),
+ (Tag, Scope, Entity, File, Line, Name, Elements))
TempDIImportedEntity clone() const { return cloneImpl(); }
@@ -3328,11 +3389,15 @@ public:
DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
StringRef getName() const { return getStringOperand(2); }
DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+ DINodeArray getElements() const {
+ return cast_or_null<MDTuple>(getRawElements());
+ }
Metadata *getRawScope() const { return getOperand(0); }
Metadata *getRawEntity() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
Metadata *getRawFile() const { return getOperand(3); }
+ Metadata *getRawElements() const { return getOperand(4); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DIImportedEntityKind;
@@ -3457,11 +3522,13 @@ class DIMacro : public DIMacroNode {
}
public:
- DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name,
- StringRef Value = ""),
+ DEFINE_MDNODE_GET(DIMacro,
+ (unsigned MIType, unsigned Line, StringRef Name,
+ StringRef Value = ""),
(MIType, Line, Name, Value))
- DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name,
- MDString *Value),
+ DEFINE_MDNODE_GET(DIMacro,
+ (unsigned MIType, unsigned Line, MDString *Name,
+ MDString *Value),
(MIType, Line, Name, Value))
TempDIMacro clone() const { return cloneImpl(); }
@@ -3508,11 +3575,13 @@ class DIMacroFile : public DIMacroNode {
}
public:
- DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File,
- DIMacroNodeArray Elements),
+ DEFINE_MDNODE_GET(DIMacroFile,
+ (unsigned MIType, unsigned Line, DIFile *File,
+ DIMacroNodeArray Elements),
(MIType, Line, File, Elements))
- DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line,
- Metadata *File, Metadata *Elements),
+ DEFINE_MDNODE_GET(DIMacroFile,
+ (unsigned MIType, unsigned Line, Metadata *File,
+ Metadata *Elements),
(MIType, Line, File, Elements))
TempDIMacroFile clone() const { return cloneImpl(); }
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index b68a912b5f70..8a1b26e699e3 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -49,10 +49,11 @@ public:
/// This enum is just used to hold constants we need for IntegerType.
enum {
MIN_INT_BITS = 1, ///< Minimum number of bits that can be specified
- MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified
+ MAX_INT_BITS = (1<<23) ///< Maximum number of bits that can be specified
///< Note that bit width is stored in the Type classes SubclassData field
- ///< which has 24 bits. This yields a maximum bit width of 16,777,215
- ///< bits.
+ ///< which has 24 bits. SelectionDAG type legalization can require a
+ ///< power of 2 IntegerType, so limit to the largest representable power
+ ///< of 2, 8388608.
};
/// This static method is the primary way of constructing an IntegerType.
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 5064f4f4edf7..73b0be43e136 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
// Forward declarations.
class DiagnosticPrinter;
+class CallInst;
class Function;
class Instruction;
class InstructionCost;
@@ -79,6 +80,7 @@ enum DiagnosticKind {
DK_PGOProfile,
DK_Unsupported,
DK_SrcMgr,
+ DK_DontCall,
DK_FirstPluginKind // Must be last value to work with
// getNextAvailablePluginDiagnosticKind
};
@@ -194,10 +196,9 @@ public:
/// \p The function that is concerned by this stack size diagnostic.
/// \p The computed stack size.
DiagnosticInfoResourceLimit(const Function &Fn, const char *ResourceName,
- uint64_t ResourceSize,
+ uint64_t ResourceSize, uint64_t ResourceLimit,
DiagnosticSeverity Severity = DS_Warning,
- DiagnosticKind Kind = DK_ResourceLimit,
- uint64_t ResourceLimit = 0)
+ DiagnosticKind Kind = DK_ResourceLimit)
: DiagnosticInfo(Kind, Severity), Fn(Fn), ResourceName(ResourceName),
ResourceSize(ResourceSize), ResourceLimit(ResourceLimit) {}
@@ -218,10 +219,10 @@ class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit {
void anchor() override;
public:
DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize,
- DiagnosticSeverity Severity = DS_Warning,
- uint64_t StackLimit = 0)
- : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize, Severity,
- DK_StackSize, StackLimit) {}
+ uint64_t StackLimit,
+ DiagnosticSeverity Severity = DS_Warning)
+ : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize,
+ StackLimit, Severity, DK_StackSize) {}
uint64_t getStackSize() const { return getResourceSize(); }
uint64_t getStackLimit() const { return getResourceLimit(); }
@@ -1070,6 +1071,27 @@ public:
}
};
+void diagnoseDontCall(const CallInst &CI);
+
+class DiagnosticInfoDontCall : public DiagnosticInfo {
+ StringRef CalleeName;
+ StringRef Note;
+ unsigned LocCookie;
+
+public:
+ DiagnosticInfoDontCall(StringRef CalleeName, StringRef Note,
+ DiagnosticSeverity DS, unsigned LocCookie)
+ : DiagnosticInfo(DK_DontCall, DS), CalleeName(CalleeName), Note(Note),
+ LocCookie(LocCookie) {}
+ StringRef getFunctionName() const { return CalleeName; }
+ StringRef getNote() const { return Note; }
+ unsigned getLocCookie() const { return LocCookie; }
+ void print(DiagnosticPrinter &DP) const override;
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_DontCall;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/llvm/include/llvm/IR/DiagnosticPrinter.h b/llvm/include/llvm/IR/DiagnosticPrinter.h
index 102932ceefa5..2df6fc3dfe73 100644
--- a/llvm/include/llvm/IR/DiagnosticPrinter.h
+++ b/llvm/include/llvm/IR/DiagnosticPrinter.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticPrinter.h - Diagnostic Printer ----*- C++ -*-===//
+//===- llvm/IR/DiagnosticPrinter.h - Diagnostic Printer ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index 4d140c3ad0f2..475355af5647 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -277,6 +277,12 @@ struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+/// Enables verification of dominator trees.
+///
+/// This check is expensive and is disabled by default. `-verify-dom-info`
+/// allows selectively enabling the check without needing to recompile.
+extern bool VerifyDomInfo;
+
/// Legacy analysis pass which computes a \c DominatorTree.
class DominatorTreeWrapperPass : public FunctionPass {
DominatorTree DT;
diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h
index 621540000b5c..bf435ec6d109 100644
--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -39,24 +39,30 @@ enum ExceptionBehavior : uint8_t {
/// Returns a valid RoundingMode enumerator when given a string
/// that is valid as input in constrained intrinsic rounding mode
/// metadata.
-Optional<RoundingMode> StrToRoundingMode(StringRef);
+Optional<RoundingMode> convertStrToRoundingMode(StringRef);
/// For any RoundingMode enumerator, returns a string valid as input in
/// constrained intrinsic rounding mode metadata.
-Optional<StringRef> RoundingModeToStr(RoundingMode);
+Optional<StringRef> convertRoundingModeToStr(RoundingMode);
/// Returns a valid ExceptionBehavior enumerator when given a string
/// valid as input in constrained intrinsic exception behavior metadata.
-Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
+Optional<fp::ExceptionBehavior> convertStrToExceptionBehavior(StringRef);
/// For any ExceptionBehavior enumerator, returns a string valid as
/// input in constrained intrinsic exception behavior metadata.
-Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
+Optional<StringRef> convertExceptionBehaviorToStr(fp::ExceptionBehavior);
/// Returns true if the exception handling behavior and rounding mode
/// match what is used in the default floating point environment.
inline bool isDefaultFPEnvironment(fp::ExceptionBehavior EB, RoundingMode RM) {
return EB == fp::ebIgnore && RM == RoundingMode::NearestTiesToEven;
}
+
+/// Returns true if the rounding mode RM may be QRM at compile time or
+/// at run time.
+inline bool canRoundingModeBe(RoundingMode RM, RoundingMode QRM) {
+ return RM == QRM || RM == RoundingMode::Dynamic;
+}
}
#endif
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index e0094e2afff2..669418eacbb0 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -48,6 +48,7 @@ typedef unsigned ID;
class AssemblyAnnotationWriter;
class Constant;
+struct DenormalMode;
class DISubprogram;
class LLVMContext;
class Module;
@@ -58,7 +59,8 @@ class User;
class BranchProbabilityInfo;
class BlockFrequencyInfo;
-class Function : public GlobalObject, public ilist_node<Function> {
+class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
+ public ilist_node<Function> {
public:
using BasicBlockListType = SymbolTableList<BasicBlock>;
@@ -245,72 +247,22 @@ public:
setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
}
- /// Return the attribute list for this Function.
- AttributeList getAttributes() const { return AttributeSets; }
-
- /// Set the attribute list for this Function.
- void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
-
- /// Add function attributes to this function.
- void addFnAttr(Attribute::AttrKind Kind) {
- addAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Add function attributes to this function.
- void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
- addAttribute(AttributeList::FunctionIndex,
- Attribute::get(getContext(), Kind, Val));
- }
-
- /// Add function attributes to this function.
- void addFnAttr(Attribute Attr) {
- addAttribute(AttributeList::FunctionIndex, Attr);
- }
-
- /// Remove function attributes from this function.
- void removeFnAttr(Attribute::AttrKind Kind) {
- removeAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Remove function attribute from this function.
- void removeFnAttr(StringRef Kind) {
- setAttributes(getAttributes().removeAttribute(
- getContext(), AttributeList::FunctionIndex, Kind));
- }
-
- /// A function will have the "coroutine.presplit" attribute if it's
- /// a coroutine and has not gone through full CoroSplit pass.
- bool isPresplitCoroutine() const {
- return hasFnAttribute("coroutine.presplit");
- }
-
- enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };
+ enum ProfileCountType { PCT_Real, PCT_Synthetic };
/// Class to represent profile counts.
///
/// This class represents both real and synthetic profile counts.
class ProfileCount {
private:
- uint64_t Count;
- ProfileCountType PCT;
- static ProfileCount Invalid;
+ uint64_t Count = 0;
+ ProfileCountType PCT = PCT_Real;
public:
- ProfileCount() : Count(-1), PCT(PCT_Invalid) {}
ProfileCount(uint64_t Count, ProfileCountType PCT)
: Count(Count), PCT(PCT) {}
- bool hasValue() const { return PCT != PCT_Invalid; }
uint64_t getCount() const { return Count; }
ProfileCountType getType() const { return PCT; }
bool isSynthetic() const { return PCT == PCT_Synthetic; }
- explicit operator bool() { return hasValue(); }
- bool operator!() const { return !hasValue(); }
- // Update the count retaining the same profile count type.
- ProfileCount &setCount(uint64_t C) {
- Count = C;
- return *this;
- }
- static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); }
};
/// Set the entry count for this function.
@@ -330,7 +282,7 @@ public:
///
/// Entry count is the number of times the function was executed.
/// When AllowSynthetic is false, only pgo_data will be returned.
- ProfileCount getEntryCount(bool AllowSynthetic = false) const;
+ Optional<ProfileCount> getEntryCount(bool AllowSynthetic = false) const;
/// Return true if the function is annotated with profile data.
///
@@ -351,43 +303,6 @@ public:
/// Get the section prefix for this function.
Optional<StringRef> getSectionPrefix() const;
- /// Return true if the function has the attribute.
- bool hasFnAttribute(Attribute::AttrKind Kind) const {
- return AttributeSets.hasFnAttribute(Kind);
- }
-
- /// Return true if the function has the attribute.
- bool hasFnAttribute(StringRef Kind) const {
- return AttributeSets.hasFnAttribute(Kind);
- }
-
- /// Return the attribute for the given attribute kind.
- Attribute getFnAttribute(Attribute::AttrKind Kind) const {
- return getAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Return the attribute for the given attribute kind.
- Attribute getFnAttribute(StringRef Kind) const {
- return getAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Return the stack alignment for the function.
- unsigned getFnStackAlignment() const {
- if (!hasFnAttribute(Attribute::StackAlignment))
- return 0;
- if (const auto MA =
- AttributeSets.getStackAlignment(AttributeList::FunctionIndex))
- return MA->value();
- return 0;
- }
-
- /// Return the stack alignment for the function.
- MaybeAlign getFnStackAlign() const {
- if (!hasFnAttribute(Attribute::StackAlignment))
- return None;
- return AttributeSets.getStackAlignment(AttributeList::FunctionIndex);
- }
-
/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
/// to use during code generation.
bool hasGC() const {
@@ -397,17 +312,36 @@ public:
void setGC(std::string Str);
void clearGC();
- /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
- bool hasStackProtectorFnAttr() const;
+ /// Return the attribute list for this Function.
+ AttributeList getAttributes() const { return AttributeSets; }
- /// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute::AttrKind Kind);
+ /// Set the attribute list for this Function.
+ void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
+ // TODO: remove non-AtIndex versions of these methods.
/// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute Attr);
+ void addAttributeAtIndex(unsigned i, Attribute Attr);
+
+ /// Add function attributes to this function.
+ void addFnAttr(Attribute::AttrKind Kind);
+
+ /// Add function attributes to this function.
+ void addFnAttr(StringRef Kind, StringRef Val = StringRef());
+
+ /// Add function attributes to this function.
+ void addFnAttr(Attribute Attr);
+
+ /// Add function attributes to this function.
+ void addFnAttrs(const AttrBuilder &Attrs);
- /// adds the attributes to the list of attributes.
- void addAttributes(unsigned i, const AttrBuilder &Attrs);
+ /// Add return value attributes to this function.
+ void addRetAttr(Attribute::AttrKind Kind);
+
+ /// Add return value attributes to this function.
+ void addRetAttr(Attribute Attr);
+
+ /// Add return value attributes to this function.
+ void addRetAttrs(const AttrBuilder &Attrs);
/// adds the attribute to the list of attributes for the given arg.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
@@ -419,13 +353,27 @@ public:
void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, Attribute::AttrKind Kind);
+ void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind);
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, StringRef Kind);
+ void removeAttributeAtIndex(unsigned i, StringRef Kind);
+
+ /// Remove function attributes from this function.
+ void removeFnAttr(Attribute::AttrKind Kind);
+
+ /// Remove function attribute from this function.
+ void removeFnAttr(StringRef Kind);
+
+ void removeFnAttrs(const AttrBuilder &Attrs);
- /// removes the attributes from the list of attributes.
- void removeAttributes(unsigned i, const AttrBuilder &Attrs);
+ /// removes the attribute from the return value list of attributes.
+ void removeRetAttr(Attribute::AttrKind Kind);
+
+ /// removes the attribute from the return value list of attributes.
+ void removeRetAttr(StringRef Kind);
+
+ /// removes the attributes from the return value list of attributes.
+ void removeRetAttrs(const AttrBuilder &Attrs);
/// removes the attribute from the list of attributes.
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
@@ -436,54 +384,57 @@ public:
/// removes the attribute from the list of attributes.
void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
- /// removes noundef and other attributes that imply undefined behavior if a
- /// `undef` or `poison` value is passed from the list of attributes.
- void removeParamUndefImplyingAttrs(unsigned ArgNo);
+ /// Return true if the function has the attribute.
+ bool hasFnAttribute(Attribute::AttrKind Kind) const;
- /// check if an attributes is in the list of attributes.
- bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return getAttributes().hasAttribute(i, Kind);
- }
+ /// Return true if the function has the attribute.
+ bool hasFnAttribute(StringRef Kind) const;
- /// check if an attributes is in the list of attributes.
- bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return getAttributes().hasParamAttribute(ArgNo, Kind);
- }
+ /// check if an attribute is in the list of attributes for the return value.
+ bool hasRetAttribute(Attribute::AttrKind Kind) const;
- /// gets the specified attribute from the list of attributes.
- Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return getAttributes().getParamAttr(ArgNo, Kind);
- }
+ /// check if an attributes is in the list of attributes.
+ bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
/// gets the attribute from the list of attributes.
- Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return AttributeSets.getAttribute(i, Kind);
- }
+ Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const;
/// gets the attribute from the list of attributes.
- Attribute getAttribute(unsigned i, StringRef Kind) const {
- return AttributeSets.getAttribute(i, Kind);
+ Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const;
+
+ /// Return the attribute for the given attribute kind.
+ Attribute getFnAttribute(Attribute::AttrKind Kind) const;
+
+ /// Return the attribute for the given attribute kind.
+ Attribute getFnAttribute(StringRef Kind) const;
+
+ /// gets the specified attribute from the list of attributes.
+ Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+
+ /// removes noundef and other attributes that imply undefined behavior if a
+ /// `undef` or `poison` value is passed from the list of attributes.
+ void removeParamUndefImplyingAttrs(unsigned ArgNo);
+
+ /// Return the stack alignment for the function.
+ MaybeAlign getFnStackAlign() const {
+ return AttributeSets.getFnStackAlignment();
}
- /// adds the dereferenceable attribute to the list of attributes.
- void addDereferenceableAttr(unsigned i, uint64_t Bytes);
+ /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
+ bool hasStackProtectorFnAttr() const;
/// adds the dereferenceable attribute to the list of attributes for
/// the given arg.
void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes);
/// adds the dereferenceable_or_null attribute to the list of
- /// attributes.
- void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
-
- /// adds the dereferenceable_or_null attribute to the list of
/// attributes for the given arg.
void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
/// Extract the alignment for a call or parameter (0=unknown).
/// FIXME: Remove this function once transition to Align is over.
/// Use getParamAlign() instead.
- unsigned getParamAlignment(unsigned ArgNo) const {
+ uint64_t getParamAlignment(unsigned ArgNo) const {
if (const auto MA = getParamAlign(ArgNo))
return MA->value();
return 0;
@@ -517,11 +468,9 @@ public:
return AttributeSets.getParamByRefType(ArgNo);
}
- /// Extract the number of dereferenceable bytes for a call or
- /// parameter (0=unknown).
- /// @param i AttributeList index, referring to a return value or argument.
- uint64_t getDereferenceableBytes(unsigned i) const {
- return AttributeSets.getDereferenceableBytes(i);
+ /// Extract the preallocated type for a parameter.
+ Type *getParamPreallocatedType(unsigned ArgNo) const {
+ return AttributeSets.getParamPreallocatedType(ArgNo);
}
/// Extract the number of dereferenceable bytes for a parameter.
@@ -530,13 +479,6 @@ public:
return AttributeSets.getParamDereferenceableBytes(ArgNo);
}
- /// Extract the number of dereferenceable_or_null bytes for a call or
- /// parameter (0=unknown).
- /// @param i AttributeList index, referring to a return value or argument.
- uint64_t getDereferenceableOrNullBytes(unsigned i) const {
- return AttributeSets.getDereferenceableOrNullBytes(i);
- }
-
/// Extract the number of dereferenceable_or_null bytes for a
/// parameter.
/// @param ArgNo AttributeList ArgNo, referring to an argument.
@@ -544,6 +486,12 @@ public:
return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
}
+ /// A function will have the "coroutine.presplit" attribute if it's
+ /// a coroutine and has not gone through full CoroSplit pass.
+ bool isPresplitCoroutine() const {
+ return hasFnAttribute("coroutine.presplit");
+ }
+
/// Determine if the function does not access memory.
bool doesNotAccessMemory() const {
return hasFnAttribute(Attribute::ReadNone);
@@ -692,19 +640,16 @@ public:
/// Determine if the function returns a structure through first
/// or second pointer argument.
bool hasStructRetAttr() const {
- return AttributeSets.hasParamAttribute(0, Attribute::StructRet) ||
- AttributeSets.hasParamAttribute(1, Attribute::StructRet);
+ return AttributeSets.hasParamAttr(0, Attribute::StructRet) ||
+ AttributeSets.hasParamAttr(1, Attribute::StructRet);
}
/// Determine if the parameter or return value is marked with NoAlias
/// attribute.
bool returnDoesNotAlias() const {
- return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
- Attribute::NoAlias);
- }
- void setReturnDoesNotAlias() {
- addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ return AttributeSets.hasRetAttr(Attribute::NoAlias);
}
+ void setReturnDoesNotAlias() { addRetAttr(Attribute::NoAlias); }
/// Do not optimize this function (-O0).
bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }
@@ -904,13 +849,14 @@ public:
/// hasAddressTaken - returns true if there are any uses of this function
/// other than direct calls or invokes to it, or blockaddress expressions.
/// Optionally passes back an offending user for diagnostic purposes,
- /// ignores callback uses, assume like pointer annotation calls, and
- /// references in llvm.used and llvm.compiler.used variables.
- ///
+ /// ignores callback uses, assume like pointer annotation calls, references in
+ /// llvm.used and llvm.compiler.used variables, and operand bundle
+ /// "clang.arc.attachedcall".
bool hasAddressTaken(const User ** = nullptr,
bool IgnoreCallbackUses = false,
bool IgnoreAssumeLikeCalls = true,
- bool IngoreLLVMUsed = false) const;
+ bool IngoreLLVMUsed = false,
+ bool IgnoreARCAttachedCall = false) const;
/// isDefTriviallyDead - Return true if it is trivially safe to remove
/// this function definition from the module (because it isn't externally
diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h
index a69958d596c6..4fa8e3a8dcf4 100644
--- a/llvm/include/llvm/IR/GCStrategy.h
+++ b/llvm/include/llvm/IR/GCStrategy.h
@@ -131,6 +131,9 @@ public:
/// GCMetadataPrinterRegistery as well.
using GCRegistry = Registry<GCStrategy>;
+/// Lookup the GCStrategy object associated with the given gc name.
+std::unique_ptr<GCStrategy> getGCStrategy(const StringRef Name);
+
} // end namespace llvm
#endif // LLVM_IR_GCSTRATEGY_H
diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h
index f2d9b9676ec9..01134448a8fa 100644
--- a/llvm/include/llvm/IR/GlobalAlias.h
+++ b/llvm/include/llvm/IR/GlobalAlias.h
@@ -15,7 +15,8 @@
#define LLVM_IR_GLOBALALIAS_H
#include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Value.h"
namespace llvm {
@@ -24,8 +25,7 @@ class Twine;
class Module;
template <typename ValueSubClass> class SymbolTableListTraits;
-class GlobalAlias : public GlobalIndirectSymbol,
- public ilist_node<GlobalAlias> {
+class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
friend class SymbolTableListTraits<GlobalAlias>;
GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
@@ -58,6 +58,17 @@ public:
// Linkage, Type, Parent and AddressSpace taken from the Aliasee.
static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee);
+ // allocate space for exactly one operand
+ void *operator new(size_t S) { return User::operator new(S, 1); }
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+ void copyAttributesFrom(const GlobalAlias *Src) {
+ GlobalValue::copyAttributesFrom(Src);
+ }
+
/// removeFromParent - This method unlinks 'this' from the containing module,
/// but does not delete it.
///
@@ -71,10 +82,14 @@ public:
/// These methods retrieve and set alias target.
void setAliasee(Constant *Aliasee);
const Constant *getAliasee() const {
- return getIndirectSymbol();
+ return static_cast<Constant *>(Op<0>().get());
}
- Constant *getAliasee() {
- return getIndirectSymbol();
+ Constant *getAliasee() { return static_cast<Constant *>(Op<0>().get()); }
+
+ const GlobalObject *getAliaseeObject() const;
+ GlobalObject *getAliaseeObject() {
+ return const_cast<GlobalObject *>(
+ static_cast<const GlobalAlias *>(this)->getAliaseeObject());
}
static bool isValidLinkage(LinkageTypes L) {
@@ -88,6 +103,12 @@ public:
}
};
+template <>
+struct OperandTraits<GlobalAlias>
+ : public FixedNumOperandTraits<GlobalAlias, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
+
} // end namespace llvm
#endif // LLVM_IR_GLOBALALIAS_H
diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h
index ddd29c8a4a19..10088ee2fff4 100644
--- a/llvm/include/llvm/IR/GlobalIFunc.h
+++ b/llvm/include/llvm/IR/GlobalIFunc.h
@@ -18,7 +18,9 @@
#define LLVM_IR_GLOBALIFUNC_H
#include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Value.h"
namespace llvm {
@@ -29,8 +31,7 @@ class Module;
// Traits class for using GlobalIFunc in symbol table in Module.
template <typename ValueSubClass> class SymbolTableListTraits;
-class GlobalIFunc final : public GlobalIndirectSymbol,
- public ilist_node<GlobalIFunc> {
+class GlobalIFunc final : public GlobalObject, public ilist_node<GlobalIFunc> {
friend class SymbolTableListTraits<GlobalIFunc>;
GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
@@ -46,6 +47,17 @@ public:
LinkageTypes Linkage, const Twine &Name,
Constant *Resolver, Module *Parent);
+ // allocate space for exactly one operand
+ void *operator new(size_t S) { return User::operator new(S, 1); }
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+ void copyAttributesFrom(const GlobalIFunc *Src) {
+ GlobalObject::copyAttributesFrom(Src);
+ }
+
/// This method unlinks 'this' from the containing module, but does not
/// delete it.
void removeFromParent();
@@ -54,14 +66,22 @@ public:
void eraseFromParent();
/// These methods retrieve and set ifunc resolver function.
- void setResolver(Constant *Resolver) {
- setIndirectSymbol(Resolver);
- }
+ void setResolver(Constant *Resolver) { Op<0>().set(Resolver); }
const Constant *getResolver() const {
- return getIndirectSymbol();
+ return static_cast<Constant *>(Op<0>().get());
}
- Constant *getResolver() {
- return getIndirectSymbol();
+ Constant *getResolver() { return static_cast<Constant *>(Op<0>().get()); }
+
+ // Return the resolver function after peeling off potential ConstantExpr
+ // indirection.
+ const Function *getResolverFunction() const;
+ Function *getResolverFunction() {
+ return const_cast<Function *>(
+ static_cast<const GlobalIFunc *>(this)->getResolverFunction());
+ }
+
+ static FunctionType *getResolverFunctionType(Type *IFuncValTy) {
+ return FunctionType::get(IFuncValTy->getPointerTo(), false);
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -70,6 +90,12 @@ public:
}
};
+template <>
+struct OperandTraits<GlobalIFunc>
+ : public FixedNumOperandTraits<GlobalIFunc, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIFunc, Constant)
+
} // end namespace llvm
#endif // LLVM_IR_GLOBALIFUNC_H
diff --git a/llvm/include/llvm/IR/GlobalIndirectSymbol.h b/llvm/include/llvm/IR/GlobalIndirectSymbol.h
deleted file mode 100644
index e45c7529885d..000000000000
--- a/llvm/include/llvm/IR/GlobalIndirectSymbol.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- llvm/GlobalIndirectSymbol.h - GlobalIndirectSymbol class -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the GlobalIndirectSymbol class, which
-// is a base class for GlobalAlias and GlobalIFunc. It contains all common code
-// for aliases and ifuncs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_GLOBALINDIRECTSYMBOL_H
-#define LLVM_IR_GLOBALINDIRECTSYMBOL_H
-
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include <cstddef>
-
-namespace llvm {
-
-class GlobalIndirectSymbol : public GlobalValue {
-protected:
- GlobalIndirectSymbol(Type *Ty, ValueTy VTy, unsigned AddressSpace,
- LinkageTypes Linkage, const Twine &Name, Constant *Symbol);
-
-public:
- GlobalIndirectSymbol(const GlobalIndirectSymbol &) = delete;
- GlobalIndirectSymbol &operator=(const GlobalIndirectSymbol &) = delete;
-
- // allocate space for exactly one operand
- void *operator new(size_t S) { return User::operator new(S, 1); }
- void operator delete(void *Ptr) { User::operator delete(Ptr); }
-
- /// Provide fast operand accessors
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
- void copyAttributesFrom(const GlobalValue *Src) {
- GlobalValue::copyAttributesFrom(Src);
- }
-
- /// These methods set and retrieve indirect symbol.
- void setIndirectSymbol(Constant *Symbol) {
- setOperand(0, Symbol);
- }
- const Constant *getIndirectSymbol() const {
- return getOperand(0);
- }
- Constant *getIndirectSymbol() {
- return const_cast<Constant *>(
- static_cast<const GlobalIndirectSymbol *>(this)->getIndirectSymbol());
- }
-
- const GlobalObject *getBaseObject() const;
- GlobalObject *getBaseObject() {
- return const_cast<GlobalObject *>(
- static_cast<const GlobalIndirectSymbol *>(this)->getBaseObject());
- }
-
- const GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) const {
- return dyn_cast<GlobalObject>(
- getIndirectSymbol()->stripAndAccumulateInBoundsConstantOffsets(DL,
- Offset));
- }
- GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) {
- return const_cast<GlobalObject *>(
- static_cast<const GlobalIndirectSymbol *>(this)
- ->getBaseObject(DL, Offset));
- }
-
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static bool classof(const Value *V) {
- return V->getValueID() == Value::GlobalAliasVal ||
- V->getValueID() == Value::GlobalIFuncVal;
- }
-};
-
-template <>
-struct OperandTraits<GlobalIndirectSymbol> :
- public FixedNumOperandTraits<GlobalIndirectSymbol, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIndirectSymbol, Constant)
-
-} // end namespace llvm
-
-#endif // LLVM_IR_GLOBALINDIRECTSYMBOL_H
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 341fbec66080..e15cf718bb10 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -51,7 +51,7 @@ protected:
Comdat *ObjComdat;
enum {
- LastAlignmentBit = 4,
+ LastAlignmentBit = 5,
HasSectionHashEntryBit,
GlobalObjectBits,
@@ -68,7 +68,7 @@ public:
GlobalObject(const GlobalObject &) = delete;
/// FIXME: Remove this function once transition to Align is over.
- unsigned getAlignment() const {
+ uint64_t getAlignment() const {
MaybeAlign Align = getAlign();
return Align ? Align->value() : 0;
}
@@ -153,7 +153,8 @@ public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
return V->getValueID() == Value::FunctionVal ||
- V->getValueID() == Value::GlobalVariableVal;
+ V->getValueID() == Value::GlobalVariableVal ||
+ V->getValueID() == Value::GlobalIFuncVal;
}
private:
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index cf704d1f2374..1818f2a8f3cc 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -302,11 +302,14 @@ public:
static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
return Linkage == AvailableExternallyLinkage;
}
+ static bool isLinkOnceAnyLinkage(LinkageTypes Linkage) {
+ return Linkage == LinkOnceAnyLinkage;
+ }
static bool isLinkOnceODRLinkage(LinkageTypes Linkage) {
return Linkage == LinkOnceODRLinkage;
}
static bool isLinkOnceLinkage(LinkageTypes Linkage) {
- return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage;
+ return isLinkOnceAnyLinkage(Linkage) || isLinkOnceODRLinkage(Linkage);
}
static bool isWeakAnyLinkage(LinkageTypes Linkage) {
return Linkage == WeakAnyLinkage;
@@ -433,6 +436,9 @@ public:
return isAvailableExternallyLinkage(getLinkage());
}
bool hasLinkOnceLinkage() const { return isLinkOnceLinkage(getLinkage()); }
+ bool hasLinkOnceAnyLinkage() const {
+ return isLinkOnceAnyLinkage(getLinkage());
+ }
bool hasLinkOnceODRLinkage() const {
return isLinkOnceODRLinkage(getLinkage());
}
@@ -548,10 +554,10 @@ public:
return !(isDeclarationForLinker() || isWeakForLinker());
}
- const GlobalObject *getBaseObject() const;
- GlobalObject *getBaseObject() {
+ const GlobalObject *getAliaseeObject() const;
+ GlobalObject *getAliaseeObject() {
return const_cast<GlobalObject *>(
- static_cast<const GlobalValue *>(this)->getBaseObject());
+ static_cast<const GlobalValue *>(this)->getAliaseeObject());
}
/// Returns whether this is a reference to an absolute symbol.
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 8998ad0f94a9..b4e099e4ec20 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -316,7 +316,7 @@ public:
/// Set the exception handling to be used with constrained floating point
void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
#ifndef NDEBUG
- Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(NewExcept);
+ Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(NewExcept);
assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
#endif
DefaultConstrainedExcept = NewExcept;
@@ -325,7 +325,7 @@ public:
/// Set the rounding mode handling to be used with constrained floating point
void setDefaultConstrainedRounding(RoundingMode NewRounding) {
#ifndef NDEBUG
- Optional<StringRef> RoundingStr = RoundingModeToStr(NewRounding);
+ Optional<StringRef> RoundingStr = convertRoundingModeToStr(NewRounding);
assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
#endif
DefaultConstrainedRounding = NewRounding;
@@ -351,7 +351,7 @@ public:
}
void setConstrainedFPCallAttr(CallBase *I) {
- I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
+ I->addFnAttr(Attribute::StrictFP);
}
void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) {
@@ -697,12 +697,16 @@ public:
MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
- /// Create a vector fadd reduction intrinsic of the source vector.
- /// The first parameter is a scalar accumulator value for ordered reductions.
+ /// Create a sequential vector fadd reduction intrinsic of the source vector.
+ /// The first parameter is a scalar accumulator value. An unordered reduction
+ /// can be created by adding the reassoc fast-math flag to the resulting
+ /// sequential reduction.
CallInst *CreateFAddReduce(Value *Acc, Value *Src);
- /// Create a vector fmul reduction intrinsic of the source vector.
- /// The first parameter is a scalar accumulator value for ordered reductions.
+ /// Create a sequential vector fmul reduction intrinsic of the source vector.
+ /// The first parameter is a scalar accumulator value. An unordered reduction
+ /// can be created by adding the reassoc fast-math flag to the resulting
+ /// sequential reduction.
CallInst *CreateFMulReduce(Value *Acc, Value *Src);
/// Create a vector int add reduction intrinsic of the source vector.
@@ -1172,7 +1176,7 @@ private:
if (Rounding.hasValue())
UseRounding = Rounding.getValue();
- Optional<StringRef> RoundingStr = RoundingModeToStr(UseRounding);
+ Optional<StringRef> RoundingStr = convertRoundingModeToStr(UseRounding);
assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
@@ -1185,7 +1189,7 @@ private:
if (Except.hasValue())
UseExcept = Except.getValue();
- Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(UseExcept);
+ Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(UseExcept);
assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
@@ -2448,6 +2452,16 @@ public:
return CreateExtractElement(Vec, getInt64(Idx), Name);
}
+ Value *CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx,
+ const Twine &Name = "") {
+ return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name);
+ }
+
+ Value *CreateInsertElement(Type *VecTy, Value *NewElt, uint64_t Idx,
+ const Twine &Name = "") {
+ return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name);
+ }
+
Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
const Twine &Name = "") {
if (auto *VC = dyn_cast<Constant>(Vec))
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index ef2c279ed455..143a87f4997d 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -755,6 +756,20 @@ public:
using PredicateField =
Bitfield::Element<Predicate, 0, 6, LAST_ICMP_PREDICATE>;
+ /// Returns the sequence of all FCmp predicates.
+ static auto FCmpPredicates() {
+ return enum_seq_inclusive(Predicate::FIRST_FCMP_PREDICATE,
+ Predicate::LAST_FCMP_PREDICATE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ /// Returns the sequence of all ICmp predicates.
+ static auto ICmpPredicates() {
+ return enum_seq_inclusive(Predicate::FIRST_ICMP_PREDICATE,
+ Predicate::LAST_ICMP_PREDICATE,
+ force_iteration_on_noniterable_enum);
+ }
+
protected:
CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
Value *LHS, Value *RHS, const Twine &Name = "",
@@ -1325,33 +1340,23 @@ public:
bool arg_empty() const { return arg_end() == arg_begin(); }
unsigned arg_size() const { return arg_end() - arg_begin(); }
- // Legacy API names that duplicate the above and will be removed once users
- // are migrated.
- iterator_range<User::op_iterator> arg_operands() {
- return make_range(arg_begin(), arg_end());
- }
- iterator_range<User::const_op_iterator> arg_operands() const {
- return make_range(arg_begin(), arg_end());
- }
- unsigned getNumArgOperands() const { return arg_size(); }
-
Value *getArgOperand(unsigned i) const {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
return getOperand(i);
}
void setArgOperand(unsigned i, Value *v) {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
setOperand(i, v);
}
/// Wrappers for getting the \c Use of a call argument.
const Use &getArgOperandUse(unsigned i) const {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
return User::getOperandUse(i);
}
Use &getArgOperandUse(unsigned i) {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
return User::getOperandUse(i);
}
@@ -1485,92 +1490,104 @@ public:
/// the attribute is allowed for the call.
bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
+ // TODO: remove non-AtIndex versions of these methods.
/// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+ void addAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+ Attrs = Attrs.addAttributeAtIndex(getContext(), i, Kind);
}
/// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Attr);
- setAttributes(PAL);
+ void addAttributeAtIndex(unsigned i, Attribute Attr) {
+ Attrs = Attrs.addAttributeAtIndex(getContext(), i, Attr);
+ }
+
+ /// Adds the attribute to the function.
+ void addFnAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.addFnAttribute(getContext(), Kind);
+ }
+
+ /// Adds the attribute to the function.
+ void addFnAttr(Attribute Attr) {
+ Attrs = Attrs.addFnAttribute(getContext(), Attr);
+ }
+
+ /// Adds the attribute to the return value.
+ void addRetAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.addRetAttribute(getContext(), Kind);
+ }
+
+ /// Adds the attribute to the return value.
+ void addRetAttr(Attribute Attr) {
+ Attrs = Attrs.addRetAttribute(getContext(), Attr);
}
/// Adds the attribute to the indicated argument
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Kind);
}
/// Adds the attribute to the indicated argument
void addParamAttr(unsigned ArgNo, Attribute Attr) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Attr);
}
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+ void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+ Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind);
}
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+ void removeAttributeAtIndex(unsigned i, StringRef Kind) {
+ Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind);
+ }
+
+ /// Removes the attributes from the function
+ void removeFnAttrs(const AttrBuilder &AttrsToRemove) {
+ Attrs = Attrs.removeFnAttributes(getContext(), AttrsToRemove);
+ }
+
+ /// Removes the attribute from the function
+ void removeFnAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.removeFnAttribute(getContext(), Kind);
}
- void removeAttributes(unsigned i, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttributes(getContext(), i, Attrs);
- setAttributes(PAL);
+ /// Removes the attribute from the return value
+ void removeRetAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.removeRetAttribute(getContext(), Kind);
+ }
+
+ /// Removes the attributes from the return value
+ void removeRetAttrs(const AttrBuilder &AttrsToRemove) {
+ Attrs = Attrs.removeRetAttributes(getContext(), AttrsToRemove);
}
/// Removes the attribute from the given argument
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind);
}
/// Removes the attribute from the given argument
void removeParamAttr(unsigned ArgNo, StringRef Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind);
}
/// Removes the attributes from the given argument
- void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs);
- setAttributes(PAL);
+ void removeParamAttrs(unsigned ArgNo, const AttrBuilder &AttrsToRemove) {
+ Attrs = Attrs.removeParamAttributes(getContext(), ArgNo, AttrsToRemove);
}
/// adds the dereferenceable attribute to the list of attributes.
- void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+ void addDereferenceableParamAttr(unsigned i, uint64_t Bytes) {
+ Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes);
}
- /// adds the dereferenceable_or_null attribute to the list of
- /// attributes.
- void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+ /// adds the dereferenceable attribute to the list of attributes.
+ void addDereferenceableRetAttr(uint64_t Bytes) {
+ Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes);
}
/// Determine whether the return value has the given attribute.
@@ -1584,24 +1601,34 @@ public:
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
/// Get the attribute of a given kind at a position.
- Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return getAttributes().getAttribute(i, Kind);
+ Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const {
+ return getAttributes().getAttributeAtIndex(i, Kind);
}
/// Get the attribute of a given kind at a position.
- Attribute getAttribute(unsigned i, StringRef Kind) const {
- return getAttributes().getAttribute(i, Kind);
+ Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const {
+ return getAttributes().getAttributeAtIndex(i, Kind);
+ }
+
+ /// Get the attribute of a given kind for the function.
+ Attribute getFnAttr(StringRef Kind) const {
+ return getAttributes().getFnAttr(Kind);
+ }
+
+ /// Get the attribute of a given kind for the function.
+ Attribute getFnAttr(Attribute::AttrKind Kind) const {
+ return getAttributes().getFnAttr(Kind);
}
/// Get the attribute of a given kind from a given arg
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ assert(ArgNo < arg_size() && "Out of bounds");
return getAttributes().getParamAttr(ArgNo, Kind);
}
/// Get the attribute of a given kind from a given arg
Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ assert(ArgNo < arg_size() && "Out of bounds");
return getAttributes().getParamAttr(ArgNo, Kind);
}
@@ -1609,42 +1636,35 @@ public:
/// A.
///
/// Data operands include call arguments and values used in operand bundles,
- /// but does not include the callee operand. This routine dispatches to the
- /// underlying AttributeList or the OperandBundleUser as appropriate.
+ /// but does not include the callee operand.
///
/// The index \p i is interpreted as
///
- /// \p i == Attribute::ReturnIndex -> the return value
- /// \p i in [1, arg_size + 1) -> argument number (\p i - 1)
- /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
- /// (\p i - 1) in the operand list.
+ /// \p i in [0, arg_size) -> argument number (\p i)
+ /// \p i in [arg_size, data_operand_size) -> bundle operand at index
+ /// (\p i) in the operand list.
bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
// Note that we have to add one because `i` isn't zero-indexed.
- assert(i < (getNumArgOperands() + getNumTotalBundleOperands() + 1) &&
+ assert(i < arg_size() + getNumTotalBundleOperands() &&
"Data operand index out of bounds!");
// The attribute A can either be directly specified, if the operand in
// question is a call argument; or be indirectly implied by the kind of its
// containing operand bundle, if the operand is a bundle operand.
- if (i == AttributeList::ReturnIndex)
- return hasRetAttr(Kind);
-
- // FIXME: Avoid these i - 1 calculations and update the API to use
- // zero-based indices.
- if (i < (getNumArgOperands() + 1))
- return paramHasAttr(i - 1, Kind);
+ if (i < arg_size())
+ return paramHasAttr(i, Kind);
- assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+ assert(hasOperandBundles() && i >= getBundleOperandsStartIndex() &&
"Must be either a call argument or an operand bundle!");
- return bundleOperandHasAttr(i - 1, Kind);
+ return bundleOperandHasAttr(i, Kind);
}
/// Determine whether this data operand is not captured.
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool doesNotCapture(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::NoCapture);
}
/// Determine whether this argument is passed by value.
@@ -1685,21 +1705,21 @@ public:
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool doesNotAccessMemory(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
}
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool onlyReadsMemory(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
- dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::ReadOnly) ||
+ dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
}
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool doesNotReadMemory(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
- dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::WriteOnly) ||
+ dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
}
/// Extract the alignment of the return value.
@@ -1743,14 +1763,26 @@ public:
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
- uint64_t getDereferenceableBytes(unsigned i) const {
- return Attrs.getDereferenceableBytes(i);
+ uint64_t getRetDereferenceableBytes() const {
+ return Attrs.getRetDereferenceableBytes();
+ }
+
+ /// Extract the number of dereferenceable bytes for a call or
+ /// parameter (0=unknown).
+ uint64_t getParamDereferenceableBytes(unsigned i) const {
+ return Attrs.getParamDereferenceableBytes(i);
}
- /// Extract the number of dereferenceable_or_null bytes for a call or
+ /// Extract the number of dereferenceable_or_null bytes for a call
+ /// (0=unknown).
+ uint64_t getRetDereferenceableOrNullBytes() const {
+ return Attrs.getRetDereferenceableOrNullBytes();
+ }
+
+ /// Extract the number of dereferenceable_or_null bytes for a
/// parameter (0=unknown).
- uint64_t getDereferenceableOrNullBytes(unsigned i) const {
- return Attrs.getDereferenceableOrNullBytes(i);
+ uint64_t getParamDereferenceableOrNullBytes(unsigned i) const {
+ return Attrs.getParamDereferenceableOrNullBytes(i);
}
/// Return true if the return value is known to be not null.
@@ -1760,7 +1792,7 @@ public:
/// Determine if the return value is marked with NoAlias attribute.
bool returnDoesNotAlias() const {
- return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ return Attrs.hasRetAttr(Attribute::NoAlias);
}
/// If one of the arguments has the 'returned' attribute, returns its
@@ -1779,40 +1811,30 @@ public:
/// Return true if the call should not be inlined.
bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
- void setIsNoInline() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
- }
+ void setIsNoInline() { addFnAttr(Attribute::NoInline); }
/// Determine if the call does not access memory.
bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
- void setDoesNotAccessMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- }
+ void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); }
/// Determine if the call does not access or only reads memory.
bool onlyReadsMemory() const {
return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
}
- void setOnlyReadsMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
- }
+ void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); }
/// Determine if the call does not access or only writes memory.
bool doesNotReadMemory() const {
return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
}
- void setDoesNotReadMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
- }
+ void setDoesNotReadMemory() { addFnAttr(Attribute::WriteOnly); }
/// Determine if the call can access memmory only using pointers based
/// on its arguments.
bool onlyAccessesArgMemory() const {
return hasFnAttr(Attribute::ArgMemOnly);
}
- void setOnlyAccessesArgMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
- }
+ void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
/// Determine if the function may only access memory that is
/// inaccessible from the IR.
@@ -1820,7 +1842,7 @@ public:
return hasFnAttr(Attribute::InaccessibleMemOnly);
}
void setOnlyAccessesInaccessibleMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
+ addFnAttr(Attribute::InaccessibleMemOnly);
}
/// Determine if the function may only access memory that is
@@ -1829,49 +1851,36 @@ public:
return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
void setOnlyAccessesInaccessibleMemOrArgMem() {
- addAttribute(AttributeList::FunctionIndex,
- Attribute::InaccessibleMemOrArgMemOnly);
+ addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
/// Determine if the call cannot return.
bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
- void setDoesNotReturn() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
- }
+ void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); }
/// Determine if the call should not perform indirect branch tracking.
bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
/// Determine if the call cannot unwind.
bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
- void setDoesNotThrow() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- }
+ void setDoesNotThrow() { addFnAttr(Attribute::NoUnwind); }
/// Determine if the invoke cannot be duplicated.
bool cannotDuplicate() const { return hasFnAttr(Attribute::NoDuplicate); }
- void setCannotDuplicate() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
- }
+ void setCannotDuplicate() { addFnAttr(Attribute::NoDuplicate); }
/// Determine if the call cannot be tail merged.
bool cannotMerge() const { return hasFnAttr(Attribute::NoMerge); }
- void setCannotMerge() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoMerge);
- }
+ void setCannotMerge() { addFnAttr(Attribute::NoMerge); }
/// Determine if the invoke is convergent
bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
- void setConvergent() {
- addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
- }
- void setNotConvergent() {
- removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
- }
+ void setConvergent() { addFnAttr(Attribute::Convergent); }
+ void setNotConvergent() { removeFnAttr(Attribute::Convergent); }
/// Determine if the call returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
- if (getNumArgOperands() == 0)
+ if (arg_empty())
return false;
// Be friendly and also check the callee.
@@ -1918,6 +1927,13 @@ public:
Idx < getBundleOperandsEndIndex();
}
+ /// Return true if the operand at index \p Idx is a bundle operand that has
+ /// tag ID \p ID.
+ bool isOperandBundleOfType(uint32_t ID, unsigned Idx) const {
+ return isBundleOperand(Idx) &&
+ getOperandBundleForOperand(Idx).getTagID() == ID;
+ }
+
/// Returns true if the use is a bundle operand.
bool isBundleOperand(const Use *U) const {
assert(this == U->getUser() &&
@@ -2258,7 +2274,7 @@ private:
bool hasFnAttrOnCalledFunction(StringRef Kind) const;
template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
- if (Attrs.hasFnAttribute(Kind))
+ if (Attrs.hasFnAttr(Kind))
return true;
// Operand bundles override attributes on the called function, but don't
@@ -2272,12 +2288,12 @@ private:
/// Determine whether the return value has the given attribute. Supports
/// Attribute::AttrKind and StringRef as \p AttrKind types.
template <typename AttrKind> bool hasRetAttrImpl(AttrKind Kind) const {
- if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+ if (Attrs.hasRetAttr(Kind))
return true;
// Look at the callee, if available.
if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+ return F->getAttributes().hasRetAttr(Kind);
return false;
}
};
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index deb85cf277fe..9878082ffffa 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -59,11 +59,11 @@ protected:
// Template alias so that all Instruction storing alignment use the same
// definiton.
// Valid alignments are powers of two from 2^0 to 2^MaxAlignmentExponent =
- // 2^29. We store them as Log2(Alignment), so we need 5 bits to encode the 30
+ // 2^32. We store them as Log2(Alignment), so we need 6 bits to encode the 33
// possible values.
template <unsigned Offset>
using AlignmentBitfieldElementT =
- typename Bitfield::Element<unsigned, Offset, 5,
+ typename Bitfield::Element<unsigned, Offset, 6,
Value::MaxAlignmentExponent>;
template <unsigned Offset>
@@ -307,11 +307,6 @@ public:
Value::getAllMetadata(MDs);
}
- /// Fills the AAMDNodes structure with AA metadata from this instruction.
- /// When Merge is true, the existing AA metadata is merged with that from this
- /// instruction providing the most-general result.
- void getAAMetadata(AAMDNodes &N, bool Merge = false) const;
-
/// Set the metadata of the specified kind to the specified node. This updates
/// or replaces metadata if already present, or removes it if Node is null.
void setMetadata(unsigned KindID, MDNode *Node);
@@ -352,7 +347,10 @@ public:
/// to the existing node.
void addAnnotationMetadata(StringRef Annotation);
- /// Sets the metadata on this instruction from the AAMDNodes structure.
+ /// Returns the AA metadata for this instruction.
+ AAMDNodes getAAMetadata() const;
+
+ /// Sets the AA metadata on this instruction from the AAMDNodes structure.
void setAAMetadata(const AAMDNodes &N);
/// Retrieve the raw weight values of a conditional branch or select.
@@ -389,6 +387,10 @@ public:
/// Determine whether the no signed wrap flag is set.
bool hasNoSignedWrap() const;
+ /// Return true if this operator has flags which may cause this instruction
+ /// to evaluate to poison despite having non-poison inputs.
+ bool hasPoisonGeneratingFlags() const;
+
/// Drops flags that may cause this instruction to evaluate to poison despite
/// having non-poison inputs.
void dropPoisonGeneratingFlags();
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 0c43a56daa33..6d32a898b668 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -126,7 +126,7 @@ public:
}
// FIXME: Remove this one transition to Align is over.
- unsigned getAlignment() const { return getAlign().value(); }
+ uint64_t getAlignment() const { return getAlign().value(); }
/// Return true if this alloca is in the entry block of the function and is a
/// constant size. If so, the code generator will fold it into the
@@ -217,7 +217,7 @@ public:
/// Return the alignment of the access that is being performed.
/// FIXME: Remove this function once transition to Align is over.
/// Use getAlign() instead.
- unsigned getAlignment() const { return getAlign().value(); }
+ uint64_t getAlignment() const { return getAlign().value(); }
/// Return the alignment of the access that is being performed.
Align getAlign() const {
@@ -348,7 +348,7 @@ public:
/// Return the alignment of the access that is being performed
/// FIXME: Remove this function once transition to Align is over.
/// Use getAlign() instead.
- unsigned getAlignment() const { return getAlign().value(); }
+ uint64_t getAlignment() const { return getAlign().value(); }
Align getAlign() const {
return Align(1ULL << (getSubclassData<AlignmentField>()));
@@ -1339,6 +1339,10 @@ public:
return P == ICMP_SLE || P == ICMP_ULE;
}
+ /// Returns the sequence of all ICmp predicates.
+ ///
+ static auto predicates() { return ICmpPredicates(); }
+
/// Exchange the two operands to this instruction in such a way that it does
/// not modify the semantics of the instruction. The predicate value may be
/// changed to retain the same result if the predicate is order dependent
@@ -1349,6 +1353,10 @@ public:
Op<0>().swap(Op<1>());
}
+ /// Return result of `LHS Pred RHS` comparison.
+ static bool compare(const APInt &LHS, const APInt &RHS,
+ ICmpInst::Predicate Pred);
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ICmp;
@@ -1457,6 +1465,10 @@ public:
Op<0>().swap(Op<1>());
}
+ /// Returns the sequence of all FCmp predicates.
+ ///
+ static auto predicates() { return FCmpPredicates(); }
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::FCmp;
@@ -1685,9 +1697,7 @@ public:
/// Return true if the call can return twice
bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); }
- void setCanReturnTwice() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
- }
+ void setCanReturnTwice() { addFnAttr(Attribute::ReturnsTwice); }
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
@@ -2019,6 +2029,14 @@ protected:
ShuffleVectorInst *cloneImpl() const;
public:
+ ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr);
+ ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
+ ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr);
+ ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
const Twine &NameStr = "",
Instruction *InsertBefor = nullptr);
@@ -2306,6 +2324,57 @@ public:
return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index);
}
+ /// Return true if this shuffle mask is an insert subvector mask.
+ /// A valid insert subvector mask inserts the lowest elements of a second
+ /// source operand into an in-place first source operand operand.
+ /// Both the sub vector width and the insertion index is returned.
+ static bool isInsertSubvectorMask(ArrayRef<int> Mask, int NumSrcElts,
+ int &NumSubElts, int &Index);
+ static bool isInsertSubvectorMask(const Constant *Mask, int NumSrcElts,
+ int &NumSubElts, int &Index) {
+ assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
+ SmallVector<int, 16> MaskAsInts;
+ getShuffleMask(Mask, MaskAsInts);
+ return isInsertSubvectorMask(MaskAsInts, NumSrcElts, NumSubElts, Index);
+ }
+
+ /// Return true if this shuffle mask is an insert subvector mask.
+ bool isInsertSubvectorMask(int &NumSubElts, int &Index) const {
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ int NumSrcElts =
+ cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index);
+ }
+
+ /// Return true if this shuffle mask replicates each of the \p VF elements
+ /// in a vector \p ReplicationFactor times.
+ /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+ /// <0,0,0,1,1,1,2,2,2,3,3,3>
+ static bool isReplicationMask(ArrayRef<int> Mask, int &ReplicationFactor,
+ int &VF);
+ static bool isReplicationMask(const Constant *Mask, int &ReplicationFactor,
+ int &VF) {
+ assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
+ SmallVector<int, 16> MaskAsInts;
+ getShuffleMask(Mask, MaskAsInts);
+ return isReplicationMask(MaskAsInts, ReplicationFactor, VF);
+ }
+
+ /// Return true if this shuffle mask is a replication mask.
+ bool isReplicationMask(int &ReplicationFactor, int &VF) const;
+
/// Change values in a shuffle permute mask assuming the two vector operands
/// of length InVecNumElts have swapped position.
static void commuteShuffleMask(MutableArrayRef<int> Mask,
@@ -3281,14 +3350,14 @@ public:
CaseHandle(SwitchInst *SI, ptrdiff_t Index) : CaseHandleImpl(SI, Index) {}
/// Sets the new value for current case.
- void setValue(ConstantInt *V) {
+ void setValue(ConstantInt *V) const {
assert((unsigned)Index < SI->getNumCases() &&
"Index out the number of cases.");
SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V));
}
/// Sets the new successor for current case.
- void setSuccessor(BasicBlock *S) {
+ void setSuccessor(BasicBlock *S) const {
SI->setSuccessor(getSuccessorIndex(), S);
}
};
@@ -3297,7 +3366,7 @@ public:
class CaseIteratorImpl
: public iterator_facade_base<CaseIteratorImpl<CaseHandleT>,
std::random_access_iterator_tag,
- CaseHandleT> {
+ const CaseHandleT> {
using SwitchInstT = typename CaseHandleT::SwitchInstType;
CaseHandleT Case;
@@ -3356,7 +3425,6 @@ public:
assert(Case.SI == RHS.Case.SI && "Incompatible operators.");
return Case.Index < RHS.Case.Index;
}
- CaseHandleT &operator*() { return Case; }
const CaseHandleT &operator*() const { return Case; }
};
@@ -3446,15 +3514,12 @@ public:
/// default case iterator to indicate that it is handled by the default
/// handler.
CaseIt findCaseValue(const ConstantInt *C) {
- CaseIt I = llvm::find_if(
- cases(), [C](CaseHandle &Case) { return Case.getCaseValue() == C; });
- if (I != case_end())
- return I;
-
- return case_default();
+ return CaseIt(
+ this,
+ const_cast<const SwitchInst *>(this)->findCaseValue(C)->getCaseIndex());
}
ConstCaseIt findCaseValue(const ConstantInt *C) const {
- ConstCaseIt I = llvm::find_if(cases(), [C](ConstCaseHandle &Case) {
+ ConstCaseIt I = llvm::find_if(cases(), [C](const ConstCaseHandle &Case) {
return Case.getCaseValue() == C;
});
if (I != case_end())
@@ -4069,14 +4134,12 @@ public:
///
Value *getIndirectDestLabel(unsigned i) const {
assert(i < getNumIndirectDests() && "Out of bounds!");
- return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() +
- 1);
+ return getOperand(i + arg_size() + getNumTotalBundleOperands() + 1);
}
Value *getIndirectDestLabelUse(unsigned i) const {
assert(i < getNumIndirectDests() && "Out of bounds!");
- return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() +
- 1);
+ return getOperandUse(i + arg_size() + getNumTotalBundleOperands() + 1);
}
// Return the destination basic blocks...
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 6b42cb949050..d186029db8cf 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -448,6 +448,28 @@ public:
static Optional<unsigned> getFunctionalOpcodeForVP(Intrinsic::ID ID);
};
+/// This represents vector predication reduction intrinsics.
+class VPReductionIntrinsic : public VPIntrinsic {
+public:
+ static bool isVPReduction(Intrinsic::ID ID);
+
+ unsigned getStartParamPos() const;
+ unsigned getVectorParamPos() const;
+
+ static Optional<unsigned> getStartParamPos(Intrinsic::ID ID);
+ static Optional<unsigned> getVectorParamPos(Intrinsic::ID ID);
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ /// @{
+ static bool classof(const IntrinsicInst *I) {
+ return VPReductionIntrinsic::isVPReduction(I->getIntrinsicID());
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ /// @}
+};
+
/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 80a2f5a8cd3e..2ff48380ac28 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -140,7 +140,8 @@ namespace Intrinsic {
Subdivide2Argument,
Subdivide4Argument,
VecOfBitcastsToInt,
- AMX
+ AMX,
+ PPCQuad,
} Kind;
union {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 28fcc13266b1..637e6d8f6cf5 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -312,6 +312,8 @@ def llvm_v1i128_ty : LLVMType<v1i128>; // 1 x i128
def llvm_v2f16_ty : LLVMType<v2f16>; // 2 x half (__fp16)
def llvm_v4f16_ty : LLVMType<v4f16>; // 4 x half (__fp16)
def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16)
+def llvm_v16f16_ty : LLVMType<v16f16>; // 16 x half (__fp16)
+def llvm_v32f16_ty : LLVMType<v32f16>; // 32 x half (__fp16)
def llvm_v2bf16_ty : LLVMType<v2bf16>; // 2 x bfloat (__bf16)
def llvm_v4bf16_ty : LLVMType<v4bf16>; // 4 x bfloat (__bf16)
def llvm_v8bf16_ty : LLVMType<v8bf16>; // 8 x bfloat (__bf16)
@@ -1329,10 +1331,10 @@ def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>;
// The pseudoprobe intrinsic works as a place holder to the block it probes.
-// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
-// optimizer as having opaque side effects so that it won't be get rid of or moved
+// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
+// optimizer as having opaque side effects so that it won't be get rid of or moved
// out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+def int_pseudoprobe : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOnly, IntrWillReturn]>;
// Arithmetic fence intrinsic.
@@ -1497,12 +1499,96 @@ let IntrProperties =
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
}
+// Shuffles.
+def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
+
+// Reductions
+let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+}
def int_get_active_lane_mask:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem, IntrNoSync, IntrWillReturn]>;
+def int_experimental_vp_splice:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_load:
@@ -1558,12 +1644,15 @@ def int_icall_branch_funnel : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], []>;
def int_load_relative: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
[IntrReadMem, IntrArgMemOnly]>;
+def int_asan_check_memaccess :
+ Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
+
def int_hwasan_check_memaccess :
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+ [ImmArg<ArgIndex<2>>]>;
def int_hwasan_check_memaccess_shortgranules :
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+ [ImmArg<ArgIndex<2>>]>;
// Xray intrinsics
//===----------------------------------------------------------------------===//
@@ -1658,7 +1747,7 @@ def int_matrix_multiply
def int_matrix_column_major_load
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty,
+ [LLVMPointerToElt<0>, llvm_anyint_ty, llvm_i1_ty,
llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem,
NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
@@ -1667,7 +1756,7 @@ def int_matrix_column_major_load
def int_matrix_column_major_store
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMPointerToElt<0>,
- llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
+ llvm_anyint_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem,
WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
@@ -1761,6 +1850,61 @@ def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+//===----------------- Pointer Authentication Intrinsics ------------------===//
+//
+
+// Sign an unauthenticated pointer using the specified key and discriminator,
+// passed in that order.
+// Returns the first argument, with some known bits replaced with a signature.
+def int_ptrauth_sign : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+// Authenticate a signed pointer, using the specified key and discriminator.
+// Returns the first argument, with the signature bits removed.
+// The signature must be valid.
+def int_ptrauth_auth : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem,ImmArg<ArgIndex<1>>]>;
+
+// Authenticate a signed pointer and resign it.
+// The second (key) and third (discriminator) arguments specify the signing
+// schema used for authenticating.
+// The fourth and fifth arguments specify the schema used for signing.
+// The signature must be valid.
+// This is a combined form of @llvm.ptrauth.sign and @llvm.ptrauth.auth, with
+// an additional integrity guarantee on the intermediate value.
+def int_ptrauth_resign : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty,
+ llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<3>>]>;
+
+// Strip the embedded signature out of a signed pointer.
+// The second argument specifies the key.
+// This behaves like @llvm.ptrauth.auth, but doesn't require the signature to
+// be valid.
+def int_ptrauth_strip : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+// Blend a small integer discriminator with an address discriminator, producing
+// a new discriminator value.
+def int_ptrauth_blend : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+
+// Compute the signature of a value, using a given discriminator.
+// This differs from @llvm.ptrauth.sign in that it doesn't embed the computed
+// signature in the pointer, but instead returns the signature as a value.
+// That allows it to be used to sign non-pointer data: in that sense, it is
+// generic. There is no generic @llvm.ptrauth.auth: instead, the signature
+// can be computed using @llvm.ptrauth.sign_generic, and compared with icmp.
+def int_ptrauth_sign_generic : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 87e0f83f85b7..c586af45f34d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -962,6 +962,25 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
LLVMPointerToElt<0>],
[IntrReadMem, IntrArgMemOnly]>;
+ class AdvSIMD_2Vec_PredLoad_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ class AdvSIMD_3Vec_PredLoad_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ class AdvSIMD_4Vec_PredLoad_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1365,7 +1384,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// This class of intrinsics are not intended to be useful within LLVM IR but
// are instead here to support some of the more regid parts of the ACLE.
- class Builtin_SVCVT<string name, LLVMType OUT, LLVMType PRED, LLVMType IN>
+ class Builtin_SVCVT<LLVMType OUT, LLVMType PRED, LLVMType IN>
: DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>;
}
@@ -1535,6 +1554,10 @@ def int_aarch64_sve_ld2 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
def int_aarch64_sve_ld3 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
def int_aarch64_sve_ld4 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
+
def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
@@ -1957,44 +1980,44 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
-def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<"svcvt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<"svcvtnt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
//
// Predicate creation
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 46a7aeb39c9a..0a44670de76e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -684,7 +684,14 @@ class AMDGPUDimAtomicProfile<string opmod,
let IsAtomic = true;
}
-class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
+class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
+ list<AMDGPUArg> dataargs>
+ : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
+ let RetTypes = [llvm_anyfloat_ty];
+}
+
+class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
+ : AMDGPUDimProfile<"GET_RESINFO", dim> {
let RetTypes = [llvm_anyfloat_ty];
let DataArgs = [];
let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
@@ -860,17 +867,24 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
// atomic intrinsics
//////////////////////////////////////////////////////////////////////////
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
- multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
- foreach dim = AMDGPUDims.All in {
- def !strconcat(NAME, "_", dim.Name)
- : AMDGPUImageDimIntrinsic<
- AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
- [], [SDNPMemOperand]>;
- }
+ multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
+ int isFloat = 0> {
+ foreach dim = AMDGPUDims.All in {
+ def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
+ !if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
+ AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
+ [], [SDNPMemOperand]>;
+ }
+ }
+
+ multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
+ defm ""
+ : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
+ isFloat>;
}
- multiclass AMDGPUImageDimAtomic<string opmod> {
- defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
+ multiclass AMDGPUImageDimFloatAtomic<string opmod> {
+ defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
}
defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
@@ -878,8 +892,10 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
+ defm int_amdgcn_image_atomic_fmin : AMDGPUImageDimFloatAtomic<"ATOMIC_FMIN">;
defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
+ defm int_amdgcn_image_atomic_fmax : AMDGPUImageDimFloatAtomic<"ATOMIC_FMAX">;
defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
@@ -1015,8 +1031,10 @@ def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
@@ -1036,10 +1054,6 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-// gfx90a intrinsics
-def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
!if(NoRtn, [], [data_ty]),
[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
@@ -1521,6 +1535,16 @@ def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
+def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+>;
+
+def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+>;
+
// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
//
// bar_val is the total number of waves that will wait on this
diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index 4b4dd94b1599..a6bd6f841aab 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -34,4 +34,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf."
[IntrNoMem]>;
def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">,
Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>;
+ def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">,
+ Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty],
+ [IntrNoMem]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index cc43d23bec1c..6f55d1ef730e 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -43,7 +43,7 @@ def llvm_shared_i64ptr_ty : LLVMQualPointerType<llvm_i64_ty, 3>; // (shared)i64*
// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
// Geom: m<M>n<N>k<K>. E.g. m8n32k16
-// Frag: [abcd]
+// Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix)
// PtxEltType: PTX type for the element.
class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
string geom = Geom;
@@ -190,6 +190,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
!eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2),
!eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4),
!eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4),
+
+ // ldmatrix b16 -> s32 @ m8n8
+ !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1),
+ !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4),
);
}
@@ -256,6 +261,17 @@ class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
!subst("llvm.", "int_", llvm));
}
+class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+ string intr = "llvm.nvvm.ldmatrix.sync.aligned"
+ # "." # Frag.geom
+ # "." # Frag.frag
+ # !if(Trans, ".trans", "")
+ # "." # Frag.ptx_elt_type
+ ;
+ string record = !subst(".", "_",
+ !subst("llvm.", "int_", intr));
+}
+
// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
// Geom: list of supported geometries.
// TypeN: PTX type of the corresponding fragment's element.
@@ -286,9 +302,19 @@ class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
list<string> ops = !foreach(x, ret, x.gft);
}
-// Creates list of valid combinations of fragments. This is the master list that
+class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+ list<WMMA_REGS> ret =
+ !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+ !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+ !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+ [WMMA_REGS<geom, frag, type>]))))));
+ // Debugging aid for readable representation of the list above.
+ list<string> ops = !foreach(x, ret, x.gft);
+}
+
+// Creates list of valid combinations of fragments. This is the main list that
// drives generation of corresponding intrinsics and instructions.
-class NVVM_MMA_OPS<int _ = 0> {
+class NVVM_MMA_OPS {
list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS<
["m16n16k8"],
["tf32"], [], ["f32"], []>.ret;
@@ -370,11 +396,14 @@ class NVVM_MMA_OPS<int _ = 0> {
// Separate A/B/C fragments (loads) from D (stores).
list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
+
+ list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS<
+ ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+ list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops;
}
def NVVM_MMA_OPS : NVVM_MMA_OPS;
-
// Returns true if this combination of fragment and layout for WMMA load/store
// ops is supported; false otherwise.
// E.g.
@@ -489,6 +518,23 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
);
}
+// Returns true if the fragment is valid for ldmatrix ops is supported;
+// false otherwise.
+// E.g.
+// if NVVM_LDMATRIX_SUPPORTED<...>.ret then
+// def : FOO<>; // The record will only be defined for supported ops.
+//
+class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> {
+ string g = frag.geom;
+ string t = frag.ptx_elt_type;
+
+ bit ret = !cond(
+ // Only currently support m8n8 and b16
+ !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+ true: false
+ );
+}
+
class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
string Suffix = !if(sync, "sync_", "")
# mode # "_"
@@ -511,7 +557,7 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
let TargetPrefix = "nvvm" in {
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
@@ -519,150 +565,150 @@ let TargetPrefix = "nvvm" in {
//
def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
- , [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
+ , [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
//
// Multiplication
//
def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
//
// Div
//
def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
//
// Sad
//
def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
@@ -670,493 +716,493 @@ let TargetPrefix = "nvvm" in {
//
def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Abs
//
def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Round
//
def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Trunc
//
def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Saturate
//
def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Exp2 Log2
//
def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sin Cos
//
def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
//
// Fma
//
def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
//
// Rcp
//
def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sqrt
//
def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Rsqrt
//
def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Add
//
def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
//
// Convert
//
def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Bitcast
//
def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
// FNS
def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
// Atomics not available as llvm intrinsics.
@@ -1385,37 +1431,37 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
// - This complements the llvm bitcast, which can be used to cast one type
// of pointer to another type of pointer, while the address space remains
// the same.
-def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.local.to.gen">;
-def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.shared.to.gen">;
-def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.global.to.gen">;
-def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.constant.to.gen">;
-def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.global">;
-def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.shared">;
-def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.local">;
-def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.constant">;
// Used in nvvm internally to help address space opt and ptx code generation
// This is for params that are passed to kernel functions by pointer by-val.
def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty],
- [IntrNoMem],
+ [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.param">;
// Move intrinsics, used in nvvm internally
@@ -1453,149 +1499,149 @@ def int_nvvm_reflect :
// isspacep.{const, global, local, shared}
def int_nvvm_isspacep_const
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.const">,
GCCBuiltin<"__nvvm_isspacep_const">;
def int_nvvm_isspacep_global
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.global">,
GCCBuiltin<"__nvvm_isspacep_global">;
def int_nvvm_isspacep_local
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.local">,
GCCBuiltin<"__nvvm_isspacep_local">;
def int_nvvm_isspacep_shared
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.shared">,
GCCBuiltin<"__nvvm_isspacep_shared">;
// Environment register read
def int_nvvm_read_ptx_sreg_envreg0
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg0">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
def int_nvvm_read_ptx_sreg_envreg1
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg1">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
def int_nvvm_read_ptx_sreg_envreg2
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg2">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
def int_nvvm_read_ptx_sreg_envreg3
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg3">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
def int_nvvm_read_ptx_sreg_envreg4
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg4">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
def int_nvvm_read_ptx_sreg_envreg5
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg5">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
def int_nvvm_read_ptx_sreg_envreg6
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg6">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
def int_nvvm_read_ptx_sreg_envreg7
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg7">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
def int_nvvm_read_ptx_sreg_envreg8
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg8">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
def int_nvvm_read_ptx_sreg_envreg9
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg9">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
def int_nvvm_read_ptx_sreg_envreg10
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg10">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
def int_nvvm_read_ptx_sreg_envreg11
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg11">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
def int_nvvm_read_ptx_sreg_envreg12
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg12">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
def int_nvvm_read_ptx_sreg_envreg13
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg13">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
def int_nvvm_read_ptx_sreg_envreg14
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg14">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
def int_nvvm_read_ptx_sreg_envreg15
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg15">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
def int_nvvm_read_ptx_sreg_envreg16
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg16">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
def int_nvvm_read_ptx_sreg_envreg17
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg17">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
def int_nvvm_read_ptx_sreg_envreg18
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg18">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
def int_nvvm_read_ptx_sreg_envreg19
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg19">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
def int_nvvm_read_ptx_sreg_envreg20
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg20">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
def int_nvvm_read_ptx_sreg_envreg21
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg21">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
def int_nvvm_read_ptx_sreg_envreg22
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg22">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
def int_nvvm_read_ptx_sreg_envreg23
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg23">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
def int_nvvm_read_ptx_sreg_envreg24
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg24">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
def int_nvvm_read_ptx_sreg_envreg25
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg25">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
def int_nvvm_read_ptx_sreg_envreg26
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg26">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
def int_nvvm_read_ptx_sreg_envreg27
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg27">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
def int_nvvm_read_ptx_sreg_envreg28
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg28">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
def int_nvvm_read_ptx_sreg_envreg29
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg29">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
def int_nvvm_read_ptx_sreg_envreg30
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg30">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
def int_nvvm_read_ptx_sreg_envreg31
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg31">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
@@ -4200,49 +4246,49 @@ def int_nvvm_sust_p_3d_v4i32_trap
def int_nvvm_rotate_b32
- : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem], "llvm.nvvm.rotate.b32">,
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
GCCBuiltin<"__nvvm_rotate_b32">;
def int_nvvm_rotate_b64
- :Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem], "llvm.nvvm.rotate.b64">,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
GCCBuiltin<"__nvvm_rotate_b64">;
def int_nvvm_rotate_right_b64
- : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem], "llvm.nvvm.rotate.right.b64">,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
GCCBuiltin<"__nvvm_rotate_right_b64">;
def int_nvvm_swap_lo_hi_b64
- : Intrinsic<[llvm_i64_ty], [llvm_i64_ty],
- [IntrNoMem], "llvm.nvvm.swap.lo.hi.b64">,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
// Accessing special registers.
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Do we need the 128-bit integer type version?
-// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
+// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
// FIXME: Enable this once v4i32 support is enabled in back-end.
-// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
+// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
- def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
- def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
- def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
- def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
}
class PTXReadSRegIntrinsic_r32<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSRegIntrinsic_r64<string name>
- : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
// Intrinsics to read registers with non-constant values. E.g. the values that
@@ -4519,4 +4565,20 @@ foreach layout_a = ["row", "col"] in {
} // layout_b
} // layout_a
+// LDMATRIX
+class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
+ : Intrinsic<Frag.regs, [llvm_anyptr_ty],
+ [IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
+ NoCapture<ArgIndex<0>>],
+ LDMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+ foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in {
+ if NVVM_LDMATRIX_SUPPORTED<frag>.ret then {
+ def LDMATRIX_NAME<frag, transposed>.record
+ : NVVM_LDMATRIX<frag, transposed>;
+ }
+ }
+}
+
} // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 92d3bdea37ed..8290342c0d51 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -31,10 +31,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Get content from current FPSCR register
def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">,
- Intrinsic<[llvm_double_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_double_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
// Set FPSCR register, and return previous content
def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ Intrinsic<[llvm_double_ty], [llvm_double_ty],
+ [IntrHasSideEffects]>;
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
@@ -50,6 +52,15 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
+ def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">,
+ Intrinsic<[llvm_double_ty],
+ [llvm_ppcf128_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">,
+ Intrinsic<[llvm_ppcf128_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+
// Generate a random number
def int_ppc_darn : GCCBuiltin<"__builtin_darn">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
@@ -1042,6 +1053,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
}
def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
@@ -1626,8 +1640,7 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// load
def int_ppc_load2r
- : GCCBuiltin<"__builtin_ppc_load2r">,
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+ : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_ppc_load4r
: GCCBuiltin<"__builtin_ppc_load4r">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
@@ -1706,7 +1719,10 @@ let TargetPrefix = "ppc" in {
def int_ppc_fres
: GCCBuiltin<"__builtin_ppc_fres">,
Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
+ def int_ppc_addex
+ : GCCBuiltin<"__builtin_ppc_addex">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>;
def int_ppc_fsel : GCCBuiltin<"__builtin_ppc_fsel">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty,
llvm_double_ty], [IntrNoMem]>;
@@ -1717,6 +1733,33 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_ppc_compare_exp_uo : GCCBuiltin<"__builtin_ppc_compare_exp_uo">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_compare_exp_lt : GCCBuiltin<"__builtin_ppc_compare_exp_lt">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_compare_exp_gt : GCCBuiltin<"__builtin_ppc_compare_exp_gt">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_compare_exp_eq : GCCBuiltin<"__builtin_ppc_compare_exp_eq">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_test_data_class_d : Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty],
+ [llvm_float_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ def int_ppc_convert_f128_to_ppcf128
+ : Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>;
+ def int_ppc_convert_ppcf128_to_f128
+ : Intrinsic<[llvm_f128_ty], [llvm_ppcf128_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -1738,4 +1781,11 @@ let TargetPrefix = "ppc" in {
llvm_i64_ty, llvm_i64_ty,
llvm_i64_ty, llvm_i64_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ def int_ppc_atomic_load_i128 :
+ Intrinsic<[llvm_i64_ty, llvm_i64_ty],
+ [llvm_ptr_ty],
+ [IntrArgMemOnly, IntrReadMem, NoCapture<ArgIndex<0>>]>;
+ def int_ppc_atomic_store_i128 :
+ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
+ [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index a46709bf09d1..3ceb347e97bf 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -159,16 +159,17 @@ let TargetPrefix = "riscv" in {
[NoCapture<ArgIndex<0>>]>,
RISCVVIntrinsic;
// For unit stride load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
class RISCVUSLoadMask
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride fault-only-first load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
@@ -177,8 +178,8 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<1>],
- [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
+ LLVMMatchType<1>, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic;
// For strided load
// Input: (pointer, stride, vl)
class RISCVSLoad
@@ -187,13 +188,15 @@ let TargetPrefix = "riscv" in {
llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For strided load with mask
- // Input: (maskedoff, pointer, stride, mask, vl)
+ // Input: (maskedoff, pointer, stride, mask, vl, ta)
class RISCVSLoadMask
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
+ LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For indexed load
// Input: (pointer, index, vl)
class RISCVILoad
@@ -202,13 +205,15 @@ let TargetPrefix = "riscv" in {
llvm_anyvector_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For indexed load with mask
- // Input: (maskedoff, pointer, index, mask, vl)
+ // Input: (maskedoff, pointer, index, mask, vl, ta)
class RISCVILoadMask
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride store
// Input: (vector_in, pointer, vl)
class RISCVUSStore
@@ -265,10 +270,16 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector (with mask).
- // Input: (vector_in, mask, vl)
+ // Input: (vector_in, mask, vl, ta)
class RISCVUnaryAAMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
+ class RISCVUnaryAAMaskNoTA
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
@@ -284,12 +295,13 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, vector_in, int_vector_in, vl)
+ // Input: (vector_in, vector_in, int_vector_in, vl, ta)
class RISCVRGatherVVMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// Input: (vector_in, int16_vector_in, vl)
class RISCVRGatherEI16VVNoMask
: Intrinsic<[llvm_anyvector_ty],
@@ -297,13 +309,14 @@ let TargetPrefix = "riscv" in {
llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, vector_in, int16_vector_in, vl)
+ // Input: (vector_in, vector_in, int16_vector_in, vl, ta)
class RISCVRGatherEI16VVMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector, and the
// second operand is XLen.
// Input: (vector_in, xlen_in, vl)
@@ -314,12 +327,13 @@ let TargetPrefix = "riscv" in {
}
// For destination vector type is the same as first source vector (with mask).
// Second operand is XLen.
- // Input: (maskedoff, vector_in, xlen_in, mask, vl)
+ // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta)
class RISCVGatherVXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
}
// For destination vector type is the same as first source vector.
// Input: (vector_in, vector_in/scalar_in, vl)
@@ -330,12 +344,13 @@ let TargetPrefix = "riscv" in {
let SplatOperand = 2;
}
// For destination vector type is the same as first source vector (with mask).
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryAAXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let SplatOperand = 3;
}
// For destination vector type is the same as first source vector. The
@@ -347,12 +362,13 @@ let TargetPrefix = "riscv" in {
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector (with mask).
// The second source operand must match the destination type or be an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryAAShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is NOT the same as first source vector.
// Input: (vector_in, vector_in/scalar_in, vl)
class RISCVBinaryABXNoMask
@@ -362,12 +378,13 @@ let TargetPrefix = "riscv" in {
let SplatOperand = 2;
}
// For destination vector type is NOT the same as first source vector (with mask).
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryABXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let SplatOperand = 3;
}
// For destination vector type is NOT the same as first source vector. The
@@ -379,12 +396,13 @@ let TargetPrefix = "riscv" in {
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is NOT the same as first source vector (with mask).
// The second source operand must match the destination type or be an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryABShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// For binary operations with V0 as input.
// Input: (vector_in, vector_in/scalar_in, V0, vl)
class RISCVBinaryWithV0
@@ -461,12 +479,13 @@ let TargetPrefix = "riscv" in {
}
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVSaturatingBinaryAAXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
let SplatOperand = 3;
}
// For Saturating binary operations.
@@ -480,12 +499,13 @@ let TargetPrefix = "riscv" in {
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVSaturatingBinaryAAShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
// For Saturating binary operations.
// The destination vector type is NOT the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
@@ -497,12 +517,13 @@ let TargetPrefix = "riscv" in {
// For Saturating binary operations with mask.
// The destination vector type is NOT the same as first source vector (with mask).
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVSaturatingBinaryABShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
class RISCVTernaryAAAXNoMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
@@ -579,13 +600,13 @@ let TargetPrefix = "riscv" in {
[llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is NOT the same as source vector (with mask).
- // Input: (maskedoff, vector_in, mask, vl)
+ // Input: (maskedoff, vector_in, mask, vl, ta)
class RISCVUnaryABMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
- llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
// For unary operations with the same vector type in/out without mask
// Output: (vector)
// Input: (vector_in, vl)
@@ -614,12 +635,13 @@ let TargetPrefix = "riscv" in {
[llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For Conversion unary operations with mask.
- // Input: (maskedoff, vector_in, mask, vl)
+ // Input: (maskedoff, vector_in, mask, vl, ta)
class RISCVConversionMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
// For atomic operations without mask
// Input: (base, index, value, vl)
class RISCVAMONoMask
@@ -643,15 +665,16 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For unit stride segment load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
class RISCVUSSegLoadMask<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
[LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty]),
- [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<1>]),
+ [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride fault-only-first segment load
// Input: (pointer, vl)
@@ -664,7 +687,7 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
// For unit stride fault-only-first segment load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
@@ -674,8 +697,9 @@ let TargetPrefix = "riscv" in {
!listconcat(!listsplat(LLVMMatchType<0>, nf),
[LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<1>]),
- [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic;
+ LLVMMatchType<1>, LLVMMatchType<1>]),
+ [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>]>,
+ RISCVVIntrinsic;
// For stride segment load
// Input: (pointer, offset, vl)
@@ -685,7 +709,7 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For stride segment load with mask
- // Input: (maskedoff, pointer, offset, mask, vl)
+ // Input: (maskedoff, pointer, offset, mask, vl, ta)
class RISCVSSegLoadMask<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
@@ -693,8 +717,9 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>,
llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<1>]),
- [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+ LLVMMatchType<1>, LLVMMatchType<1>]),
+ [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For indexed segment load
// Input: (pointer, index, vl)
@@ -704,7 +729,7 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For indexed segment load with mask
- // Input: (maskedoff, pointer, index, mask, vl)
+ // Input: (maskedoff, pointer, index, mask, vl, ta)
class RISCVISegLoadMask<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
@@ -712,8 +737,9 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>,
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty]),
- [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<2>]),
+ [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride segment store
// Input: (value, pointer, vl)
@@ -947,8 +973,8 @@ let TargetPrefix = "riscv" in {
defm vsoxei : RISCVIStore;
defm vsuxei : RISCVIStore;
- def int_riscv_vle1 : RISCVUSLoad;
- def int_riscv_vse1 : RISCVUSStore;
+ def int_riscv_vlm : RISCVUSLoad;
+ def int_riscv_vsm : RISCVUSStore;
defm vamoswap : RISCVAMO;
defm vamoadd : RISCVAMO;
@@ -1049,7 +1075,7 @@ let TargetPrefix = "riscv" in {
defm vssubu : RISCVSaturatingBinaryAAX;
defm vssub : RISCVSaturatingBinaryAAX;
- def int_riscv_vmerge : RISCVBinaryWithV0;
+ defm vmerge : RISCVBinaryWithV0;
def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyint_ty],
@@ -1124,7 +1150,7 @@ let TargetPrefix = "riscv" in {
defm vrgather_vx : RISCVRGatherVX;
defm vrgatherei16_vv : RISCVRGatherEI16VV;
- def "int_riscv_vcompress" : RISCVUnaryAAMask;
+ def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA;
defm vaaddu : RISCVSaturatingBinaryAAX;
defm vaadd : RISCVSaturatingBinaryAAX;
@@ -1159,25 +1185,25 @@ let TargetPrefix = "riscv" in {
defm vwredsum : RISCVReduction;
defm vfredosum : RISCVReduction;
- defm vfredsum : RISCVReduction;
+ defm vfredusum : RISCVReduction;
defm vfredmin : RISCVReduction;
defm vfredmax : RISCVReduction;
- defm vfwredsum : RISCVReduction;
+ defm vfwredusum : RISCVReduction;
defm vfwredosum : RISCVReduction;
def int_riscv_vmand: RISCVBinaryAAANoMask;
def int_riscv_vmnand: RISCVBinaryAAANoMask;
- def int_riscv_vmandnot: RISCVBinaryAAANoMask;
+ def int_riscv_vmandn: RISCVBinaryAAANoMask;
def int_riscv_vmxor: RISCVBinaryAAANoMask;
def int_riscv_vmor: RISCVBinaryAAANoMask;
def int_riscv_vmnor: RISCVBinaryAAANoMask;
- def int_riscv_vmornot: RISCVBinaryAAANoMask;
+ def int_riscv_vmorn: RISCVBinaryAAANoMask;
def int_riscv_vmxnor: RISCVBinaryAAANoMask;
def int_riscv_vmclr : RISCVNullaryIntrinsic;
def int_riscv_vmset : RISCVNullaryIntrinsic;
- defm vpopc : RISCVMaskUnarySOut;
+ defm vcpop : RISCVMaskUnarySOut;
defm vfirst : RISCVMaskUnarySOut;
defm vmsbf : RISCVMaskUnaryMOut;
defm vmsof : RISCVMaskUnaryMOut;
@@ -1245,4 +1271,15 @@ let TargetPrefix = "riscv" in {
defm vsuxseg # nf : RISCVISegStore<nf>;
}
+ // Strided loads/stores for fixed vectors.
+ def int_riscv_masked_strided_load
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyptr_ty,
+ llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [NoCapture<ArgIndex<1>>, IntrReadMem]>;
+ def int_riscv_masked_strided_store
+ : Intrinsic<[],
+ [llvm_anyvector_ty, llvm_anyptr_ty,
+ llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>;
} // TargetPrefix = "riscv"
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 81435e98bea0..a149b571072c 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -144,7 +144,7 @@ multiclass SystemZBinaryCCBHF {
def fs : SystemZBinaryCC<llvm_v4i32_ty>;
}
-multiclass SystemZCompareBHFG<string name> {
+multiclass SystemZCompareBHFG {
def bs : SystemZBinaryCC<llvm_v16i8_ty>;
def hs : SystemZBinaryCC<llvm_v8i16_ty>;
def fs : SystemZBinaryCC<llvm_v4i32_ty>;
@@ -341,9 +341,9 @@ let TargetPrefix = "s390" in {
def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>;
- defm int_s390_vceq : SystemZCompareBHFG<"vceq">;
- defm int_s390_vch : SystemZCompareBHFG<"vch">;
- defm int_s390_vchl : SystemZCompareBHFG<"vchl">;
+ defm int_s390_vceq : SystemZCompareBHFG;
+ defm int_s390_vch : SystemZCompareBHFG;
+ defm int_s390_vchl : SystemZCompareBHFG;
defm int_s390_vfae : SystemZTernaryIntBHF<"vfae">;
defm int_s390_vfae : SystemZTernaryIntCCBHF;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 11990554037d..6a8e6c797f85 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -50,7 +50,8 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
//===----------------------------------------------------------------------===//
// throw / rethrow
-// The immediate argument is an index to a tag, which is 0 for C++.
+// The first immediate argument is an index to a tag, which is 0 for C++
+// exception. The second argument is the thrown exception pointer.
def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
[Throws, IntrNoReturn, ImmArg<ArgIndex<0>>]>;
def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
@@ -63,8 +64,9 @@ def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
[IntrHasSideEffects]>;
// wasm.catch returns the pointer to the exception object caught by wasm 'catch'
-// instruction. This returns a single pointer, which is sufficient for C++
-// support. The immediate argument is an index to for a tag, which is 0 for C++.
+// instruction. This returns a single pointer, which is the case for C++
+// exceptions. The immediate argument is an index to for a tag, which is 0 for
+// C++ exceptions.
def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
[IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
@@ -162,6 +164,15 @@ def int_wasm_q15mulr_sat_signed :
[llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmin :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmax :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
def int_wasm_extadd_pairwise_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
@@ -172,6 +183,59 @@ def int_wasm_extadd_pairwise_unsigned :
[IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
+// Relaxed SIMD intrinsics (experimental)
+//===----------------------------------------------------------------------===//
+
+def int_wasm_fma :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_fms :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_laneselect :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_swizzle :
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_min :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_relaxed_max :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_signed:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_unsigned:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_zero_signed:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_zero_unsigned:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+
+//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5848356b5b1a..8de737a1c7a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -792,7 +792,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
- [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
// Test instruction with bitwise comparison.
@@ -1779,7 +1779,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+ llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
//===----------------------------------------------------------------------===//
@@ -5093,6 +5093,10 @@ let TargetPrefix = "x86" in {
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
+ def int_x86_cast_vector_to_tile:
+ Intrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+ def int_x86_cast_tile_to_vector:
+ Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -5108,3 +5112,757 @@ let TargetPrefix = "x86" in {
def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">,
Intrinsic<[], [llvm_i64_ty], []>;
}
+
+//===----------------------------------------------------------------------===//
+// avx512_fp16: vaddph
+let TargetPrefix = "x86" in {
+ def int_x86_avx512fp16_add_ph_512
+ : GCCBuiltin<"__builtin_ia32_addph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_sub_ph_512
+ : GCCBuiltin<"__builtin_ia32_subph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_mul_ph_512
+ : GCCBuiltin<"__builtin_ia32_mulph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_div_ph_512
+ : GCCBuiltin<"__builtin_ia32_divph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_max_ph_128
+ : GCCBuiltin<"__builtin_ia32_maxph128">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_max_ph_256
+ : GCCBuiltin<"__builtin_ia32_maxph256">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_max_ph_512
+ : GCCBuiltin<"__builtin_ia32_maxph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_min_ph_128
+ : GCCBuiltin<"__builtin_ia32_minph128">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_min_ph_256
+ : GCCBuiltin<"__builtin_ia32_minph256">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_min_ph_512
+ : GCCBuiltin<"__builtin_ia32_minph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+
+ def int_x86_avx512fp16_mask_cmp_ph_512
+ : Intrinsic<[ llvm_v32i1_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_v32i1_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_cmp_ph_256
+ : Intrinsic<[ llvm_v16i1_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_mask_cmp_ph_128
+ : Intrinsic<[ llvm_v8i1_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8i1_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+
+ def int_x86_avx512fp16_mask_add_sh_round
+ : GCCBuiltin<"__builtin_ia32_addsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_sub_sh_round
+ : GCCBuiltin<"__builtin_ia32_subsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_mul_sh_round
+ : GCCBuiltin<"__builtin_ia32_mulsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_div_sh_round
+ : GCCBuiltin<"__builtin_ia32_divsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_min_sh_round
+ : GCCBuiltin<"__builtin_ia32_minsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_max_sh_round
+ : GCCBuiltin<"__builtin_ia32_maxsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_cmp_sh
+ : GCCBuiltin<"__builtin_ia32_cmpsh_mask">,
+ Intrinsic<[ llvm_i8_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_vcomi_sh
+ : GCCBuiltin<"__builtin_ia32_vcomish">,
+ Intrinsic<[ llvm_i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2psx_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2psx_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2psx_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_128
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_256
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_512
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_512
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
+ Intrinsic<[ llvm_v2f64_ty ],
+ [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
+ Intrinsic<[ llvm_v4f64_ty ],
+ [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
+ Intrinsic<[ llvm_v8f64_ty ],
+ [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsh2ss_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtss2sh_round
+ : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsd2sh_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsh2sd_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
+ Intrinsic<[ llvm_v2f64_ty ],
+ [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2w_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2w_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2w_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2dq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2dq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2dq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtdq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtudq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtqq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtqq2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtuqq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtuqq2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_vcvtsh2si32
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2usi32
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2si64
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2usi64
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtusi2sh
+ : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtusi642sh
+ : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtsi2sh
+ : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtsi642sh
+ : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvttsh2si32
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2si64
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2usi32
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2usi64
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+
+ def int_x86_avx512fp16_sqrt_ph_512
+ : Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_sqrt_sh
+ : Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_rsqrt_ph_128
+ : GCCBuiltin<"__builtin_ia32_rsqrtph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rsqrt_ph_256
+ : GCCBuiltin<"__builtin_ia32_rsqrtph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rsqrt_ph_512
+ : GCCBuiltin<"__builtin_ia32_rsqrtph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rsqrt_sh
+ : GCCBuiltin<"__builtin_ia32_rsqrtsh_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_ph_128
+ : GCCBuiltin<"__builtin_ia32_rcpph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_ph_256
+ : GCCBuiltin<"__builtin_ia32_rcpph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_ph_512
+ : GCCBuiltin<"__builtin_ia32_rcpph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_sh
+ : GCCBuiltin<"__builtin_ia32_rcpsh_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_reduce_ph_128
+ : GCCBuiltin<"__builtin_ia32_reduceph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_reduce_ph_256
+ : GCCBuiltin<"__builtin_ia32_reduceph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_reduce_ph_512
+ : GCCBuiltin<"__builtin_ia32_reduceph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_reduce_sh
+ : GCCBuiltin<"__builtin_ia32_reducesh_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
+ def int_x86_avx512fp16_fpclass_ph_128
+ : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_fpclass_ph_256
+ : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_fpclass_ph_512
+ : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_fpclass_sh
+ : GCCBuiltin<"__builtin_ia32_fpclasssh_mask">,
+ Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_getexp_ph_128
+ : GCCBuiltin<"__builtin_ia32_getexpph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_getexp_ph_256
+ : GCCBuiltin<"__builtin_ia32_getexpph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_getexp_ph_512
+ : GCCBuiltin<"__builtin_ia32_getexpph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_getexp_sh
+ : GCCBuiltin<"__builtin_ia32_getexpsh128_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_getmant_ph_128
+ : GCCBuiltin<"__builtin_ia32_getmantph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_getmant_ph_256
+ : GCCBuiltin<"__builtin_ia32_getmantph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_getmant_ph_512
+ : GCCBuiltin<"__builtin_ia32_getmantph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_getmant_sh
+ : GCCBuiltin<"__builtin_ia32_getmantsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty,
+ llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_ph_128
+ : GCCBuiltin<"__builtin_ia32_rndscaleph_128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_ph_256
+ : GCCBuiltin<"__builtin_ia32_rndscaleph_256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_ph_512
+ : GCCBuiltin<"__builtin_ia32_rndscaleph_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_sh
+ : GCCBuiltin<"__builtin_ia32_rndscalesh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
+ def int_x86_avx512fp16_mask_scalef_ph_128
+ : GCCBuiltin<"__builtin_ia32_scalefph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_scalef_ph_256
+ : GCCBuiltin<"__builtin_ia32_scalefph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_scalef_ph_512
+ : GCCBuiltin<"__builtin_ia32_scalefph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_scalef_sh
+ : GCCBuiltin<"__builtin_ia32_scalefsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+ def int_x86_avx512fp16_vfmadd_ph_512
+ : Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_vfmaddsub_ph_128
+ : GCCBuiltin<"__builtin_ia32_vfmaddsubph">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_vfmaddsub_ph_256
+ : GCCBuiltin<"__builtin_ia32_vfmaddsubph256">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_vfmaddsub_ph_512
+ : Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_vfmadd_f16
+ : Intrinsic<[ llvm_half_ty ],
+ [ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vfcmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph128_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph256_maskz">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph512_maskz">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfmaddcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfmaddcsh_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfcmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmul_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfmulcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmul_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfcmulcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmul_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfmulcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmul_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfcmulcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmul_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfmulcph512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfcmul_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfcmulcph512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmul_csh
+ : GCCBuiltin<"__builtin_ia32_vfmulcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfcmul_csh
+ : GCCBuiltin<"__builtin_ia32_vfcmulcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+}
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index bc605f108340..1c902ebce5ad 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -305,6 +305,10 @@ public:
/// LLVMContext is used by compilation.
void setOptPassGate(OptPassGate&);
+ /// Enable opaque pointers. Can only be called before creating the first
+ /// pointer type.
+ void enableOpaquePointers() const;
+
/// Whether typed pointers are supported. If false, all pointers are opaque.
bool supportsTypedPointers() const;
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index b14127df2182..6cc5797269e2 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -74,7 +74,7 @@ public:
Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows),
B.getInt32(Columns)};
- Type *OverloadedTypes[] = {RetType};
+ Type *OverloadedTypes[] = {RetType, Stride->getType()};
Function *TheFn = Intrinsic::getDeclaration(
getModule(), Intrinsic::matrix_column_major_load, OverloadedTypes);
@@ -82,7 +82,7 @@ public:
CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name);
Attribute AlignAttr =
Attribute::getWithAlignment(Call->getContext(), Alignment);
- Call->addAttribute(1, AlignAttr);
+ Call->addParamAttr(0, AlignAttr);
return Call;
}
@@ -97,7 +97,7 @@ public:
Value *Ops[] = {Matrix, Ptr,
Stride, B.getInt1(IsVolatile),
B.getInt32(Rows), B.getInt32(Columns)};
- Type *OverloadedTypes[] = {Matrix->getType()};
+ Type *OverloadedTypes[] = {Matrix->getType(), Stride->getType()};
Function *TheFn = Intrinsic::getDeclaration(
getModule(), Intrinsic::matrix_column_major_store, OverloadedTypes);
@@ -105,7 +105,7 @@ public:
CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name);
Attribute AlignAttr =
Attribute::getWithAlignment(Call->getContext(), Alignment);
- Call->addAttribute(2, AlignAttr);
+ Call->addParamAttr(1, AlignAttr);
return Call;
}
@@ -231,9 +231,23 @@ public:
: (IsUnsigned ? B.CreateUDiv(LHS, RHS) : B.CreateSDiv(LHS, RHS));
}
- /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix.
- Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx,
- unsigned NumRows, Twine const &Name = "") {
+ /// Create an assumption that \p Idx is less than \p NumElements.
+ void CreateIndexAssumption(Value *Idx, unsigned NumElements,
+ Twine const &Name = "") {
+
+ Value *NumElts =
+ B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements);
+ auto *Cmp = B.CreateICmpULT(Idx, NumElts);
+ if (auto *ConstCond = dyn_cast<ConstantInt>(Cmp))
+ assert(ConstCond->isOne() && "Index must be valid!");
+ else
+ B.CreateAssumption(Cmp);
+ }
+
+ /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from
+ /// a matrix with \p NumRows embedded in a vector.
+ Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows,
+ Twine const &Name = "") {
unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
ColumnIdx->getType()->getScalarSizeInBits());
@@ -241,9 +255,7 @@ public:
RowIdx = B.CreateZExt(RowIdx, IntTy);
ColumnIdx = B.CreateZExt(ColumnIdx, IntTy);
Value *NumRowsV = B.getIntN(MaxWidth, NumRows);
- return B.CreateExtractElement(
- Matrix, B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx),
- "matext");
+ return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx);
}
};
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index c5840564454e..26d70b4db2d5 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -707,6 +707,15 @@ struct AAMDNodes {
Result.NoAlias = NoAlias;
return Result;
}
+
+ /// Given two sets of AAMDNodes applying to potentially different locations,
+ /// determine the best AAMDNodes that apply to both.
+ AAMDNodes merge(const AAMDNodes &Other) const;
+
+ /// Determine the best AAMDNodes after concatenating two different locations
+ /// together. Different from `merge`, where different locations should
+ /// overlap each other, `concat` puts non-overlapping locations together.
+ AAMDNodes concat(const AAMDNodes &Other) const;
};
// Specialize DenseMapInfo for AAMDNodes.
@@ -897,6 +906,7 @@ struct TempMDNodeDeleter {
class MDNode : public Metadata {
friend class ReplaceableMetadataImpl;
friend class LLVMContextImpl;
+ friend class DIArgList;
unsigned NumOperands;
unsigned NumUnresolved;
@@ -1028,6 +1038,31 @@ public:
return cast<T>(N.release()->replaceWithDistinctImpl());
}
+ /// Print in tree shape.
+ ///
+ /// Prints definition of \c this in tree shape.
+ ///
+ /// If \c M is provided, metadata nodes will be numbered canonically;
+ /// otherwise, pointer addresses are substituted.
+ /// @{
+ void printTree(raw_ostream &OS, const Module *M = nullptr) const;
+ void printTree(raw_ostream &OS, ModuleSlotTracker &MST,
+ const Module *M = nullptr) const;
+ /// @}
+
+ /// User-friendly dump in tree shape.
+ ///
+ /// If \c M is provided, metadata nodes will be numbered canonically;
+ /// otherwise, pointer addresses are substituted.
+ ///
+ /// Note: this uses an explicit overload instead of default arguments so that
+ /// the nullptr version is easy to call from a debugger.
+ ///
+ /// @{
+ void dumpTree() const;
+ void dumpTree(const Module *M) const;
+ /// @}
+
private:
MDNode *replaceWithPermanentImpl();
MDNode *replaceWithUniquedImpl();
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 81e29d9b86e8..bd3a196c7181 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -64,9 +64,9 @@ class VersionTuple;
/// constant references to global variables in the module. When a global
/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
/// The main container class for the LLVM Intermediate Representation.
-class Module {
-/// @name Types And Enumerations
-/// @{
+class LLVM_EXTERNAL_VISIBILITY Module {
+ /// @name Types And Enumerations
+ /// @{
public:
/// The type for the list of global variables.
using GlobalListType = SymbolTableList<GlobalVariable>;
@@ -324,6 +324,9 @@ public:
/// name is not found.
GlobalValue *getNamedValue(StringRef Name) const;
+ /// Return the number of global values in the module.
+ unsigned getNumNamedValues() const;
+
/// Return a unique non-zero ID for the specified metadata kind. This ID is
/// uniqued across modules in the current LLVMContext.
unsigned getMDKindID(StringRef Name) const;
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 4b84f6b0408d..e00b78d45c63 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -572,6 +572,50 @@ public:
unsigned NoInline : 1;
// Indicate if function should be always inlined.
unsigned AlwaysInline : 1;
+ // Indicate if function never raises an exception. Can be modified during
+ // thinlink function attribute propagation
+ unsigned NoUnwind : 1;
+ // Indicate if function contains instructions that mayThrow
+ unsigned MayThrow : 1;
+
+ // If there are calls to unknown targets (e.g. indirect)
+ unsigned HasUnknownCall : 1;
+
+ FFlags &operator&=(const FFlags &RHS) {
+ this->ReadNone &= RHS.ReadNone;
+ this->ReadOnly &= RHS.ReadOnly;
+ this->NoRecurse &= RHS.NoRecurse;
+ this->ReturnDoesNotAlias &= RHS.ReturnDoesNotAlias;
+ this->NoInline &= RHS.NoInline;
+ this->AlwaysInline &= RHS.AlwaysInline;
+ this->NoUnwind &= RHS.NoUnwind;
+ this->MayThrow &= RHS.MayThrow;
+ this->HasUnknownCall &= RHS.HasUnknownCall;
+ return *this;
+ }
+
+ bool anyFlagSet() {
+ return this->ReadNone | this->ReadOnly | this->NoRecurse |
+ this->ReturnDoesNotAlias | this->NoInline | this->AlwaysInline |
+ this->NoUnwind | this->MayThrow | this->HasUnknownCall;
+ }
+
+ operator std::string() {
+ std::string Output;
+ raw_string_ostream OS(Output);
+ OS << "funcFlags: (";
+ OS << "readNone: " << this->ReadNone;
+ OS << ", readOnly: " << this->ReadOnly;
+ OS << ", noRecurse: " << this->NoRecurse;
+ OS << ", returnDoesNotAlias: " << this->ReturnDoesNotAlias;
+ OS << ", noInline: " << this->NoInline;
+ OS << ", alwaysInline: " << this->AlwaysInline;
+ OS << ", noUnwind: " << this->NoUnwind;
+ OS << ", mayThrow: " << this->MayThrow;
+ OS << ", hasUnknownCall: " << this->HasUnknownCall;
+ OS << ")";
+ return OS.str();
+ }
};
/// Describes the uses of a parameter by the function.
@@ -688,6 +732,10 @@ public:
/// Get function summary flags.
FFlags fflags() const { return FunFlags; }
+ void setNoRecurse() { FunFlags.NoRecurse = true; }
+
+ void setNoUnwind() { FunFlags.NoUnwind = true; }
+
/// Get the instruction count recorded for this function.
unsigned instCount() const { return InstCount; }
@@ -700,6 +748,8 @@ public:
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
+ std::vector<EdgeTy> &mutableCalls() { return CallGraphEdgeList; }
+
void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); }
/// Returns the list of type identifiers used by this function in
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index d0bce742cc96..b83d83f0d0ab 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -59,6 +59,10 @@ public:
static bool classof(const Value *V) {
return isa<Instruction>(V) || isa<ConstantExpr>(V);
}
+
+ /// Return true if this operator has flags which may cause this operator
+ /// to evaluate to poison despite having non-poison inputs.
+ bool hasPoisonGeneratingFlags() const;
};
/// Utility class for integer operators which may exhibit overflow - Add, Sub,
@@ -243,6 +247,9 @@ public:
void operator|=(const FastMathFlags &OtherFlags) {
Flags |= OtherFlags.Flags;
}
+ bool operator!=(const FastMathFlags &OtherFlags) const {
+ return Flags != OtherFlags.Flags;
+ }
};
/// Utility class for floating point operations which can have
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index 6c2a1b01d897..63fd98073b51 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ManagedStatic.h"
+#include <limits>
namespace llvm {
@@ -43,14 +44,12 @@ public:
/// optimization-related problems.
class OptBisect : public OptPassGate {
public:
- /// Default constructor, initializes the OptBisect state based on the
- /// -opt-bisect-limit command line argument.
- ///
- /// By default, bisection is disabled.
- ///
+ /// Default constructor. Initializes the state to "disabled". The bisection
+ /// will be enabled by the cl::opt call-back when the command line option
+ /// is processed.
/// Clients should not instantiate this class directly. All access should go
/// through LLVMContext.
- OptBisect();
+ OptBisect() = default;
virtual ~OptBisect() = default;
@@ -60,7 +59,14 @@ public:
bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
/// isEnabled() should return true before calling shouldRunPass().
- bool isEnabled() const override { return BisectEnabled; }
+ bool isEnabled() const override { return BisectLimit != Disabled; }
+
+ /// Set the new optimization limit and reset the counter. Passing
+ /// OptBisect::Disabled disables the limiting.
+ void setLimit(int Limit) {
+ BisectLimit = Limit;
+ LastBisectNum = 0;
+ }
/// Checks the bisect limit to determine if the specified pass should run.
///
@@ -75,9 +81,11 @@ public:
/// instance, function passes should call FunctionPass::skipFunction().
bool checkPass(const StringRef PassName, const StringRef TargetDesc);
+ static const int Disabled = std::numeric_limits<int>::max();
+
private:
- bool BisectEnabled = false;
- unsigned LastBisectNum = 0;
+ int BisectLimit = Disabled;
+ int LastBisectNum = 0;
};
/// Singleton instance of the OptBisect class, so multiple pass managers don't
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 8e592bfb0c78..e88d2233daba 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -377,10 +377,16 @@ template <typename DerivedT> struct PassInfoMixin {
static_assert(std::is_base_of<PassInfoMixin, DerivedT>::value,
"Must pass the derived type as the template argument!");
StringRef Name = getTypeName<DerivedT>();
- if (Name.startswith("llvm::"))
- Name = Name.drop_front(strlen("llvm::"));
+ Name.consume_front("llvm::");
return Name;
}
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ StringRef ClassName = DerivedT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << PassName;
+ }
};
/// A CRTP mix-in that provides informational APIs needed for analysis passes.
@@ -480,6 +486,16 @@ public:
return *this;
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+ auto *P = Passes[Idx].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ if (Idx + 1 < Size)
+ OS << ",";
+ }
+ }
+
/// Run all of the passes in this manager over the given unit of IR.
/// ExtraArgs are passed to each pass.
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
@@ -520,12 +536,6 @@ public:
// Finally, intersect the preserved analyses to compute the aggregate
// preserved set for this pass manager.
PA.intersect(std::move(PassPA));
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- //IR.getContext().yield();
}
// Invalidation was handled after each pass in the above loop for the
@@ -538,13 +548,16 @@ public:
}
template <typename PassT>
- std::enable_if_t<!std::is_same<PassT, PassManager>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<!std::is_same<PassT, PassManager>::value>
+ addPass(PassT &&Pass) {
using PassModelT =
detail::PassModel<IRUnitT, PassT, PreservedAnalyses, AnalysisManagerT,
ExtraArgTs...>;
-
- Passes.emplace_back(new PassModelT(std::forward<PassT>(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ Passes.push_back(std::unique_ptr<PassConceptT>(
+ new PassModelT(std::forward<PassT>(Pass))));
}
/// When adding a pass manager pass that has the same type as this pass
@@ -553,10 +566,11 @@ public:
/// implementation complexity and avoid potential invalidation issues that may
/// happen with nested pass managers of the same type.
template <typename PassT>
- std::enable_if_t<std::is_same<PassT, PassManager>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<std::is_same<PassT, PassManager>::value>
+ addPass(PassT &&Pass) {
for (auto &P : Pass.Passes)
- Passes.emplace_back(std::move(P));
+ Passes.push_back(std::move(P));
}
/// Returns if the pass manager contains any passes.
@@ -1190,29 +1204,37 @@ class ModuleToFunctionPassAdaptor
public:
using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
- explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
- : Pass(std::move(Pass)) {}
+ explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+ bool EagerlyInvalidate)
+ : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate) {}
/// Runs the function pass across every function in the module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
private:
std::unique_ptr<PassConceptT> Pass;
+ bool EagerlyInvalidate;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename FunctionPassT>
ModuleToFunctionPassAdaptor
-createModuleToFunctionPassAdaptor(FunctionPassT &&Pass) {
+createModuleToFunctionPassAdaptor(FunctionPassT &&Pass,
+ bool EagerlyInvalidate = false) {
using PassModelT =
detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
FunctionAnalysisManager>;
-
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return ModuleToFunctionPassAdaptor(
- std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
+ std::unique_ptr<ModuleToFunctionPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<FunctionPassT>(Pass))),
+ EagerlyInvalidate);
}
/// A utility pass template to force an analysis result to be available.
@@ -1243,6 +1265,12 @@ struct RequireAnalysisPass
return PreservedAnalyses::all();
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "require<" << PassName << ">";
+ }
static bool isRequired() { return true; }
};
@@ -1263,6 +1291,12 @@ struct InvalidateAnalysisPass
PA.abandon<AnalysisT>();
return PA;
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "invalidate<" << PassName << ">";
+ }
};
/// A utility pass that does nothing, but preserves no analyses.
@@ -1312,6 +1346,13 @@ public:
return PA;
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "repeat<" << Count << ">(";
+ P.printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
private:
int Count;
PassT P;
diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h
index 8f42e69f3063..29b55a8172e6 100644
--- a/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/llvm/include/llvm/IR/PassManagerInternal.h
@@ -46,6 +46,9 @@ struct PassConcept {
virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
ExtraArgTs... ExtraArgs) = 0;
+ virtual void
+ printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) = 0;
/// Polymorphic method to access the name of a pass.
virtual StringRef name() const = 0;
@@ -85,6 +88,12 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
return Pass.run(IR, AM, ExtraArgs...);
}
+ void printPipeline(
+ raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) override {
+ Pass.printPipeline(OS, MapClassName2PassName);
+ }
+
StringRef name() const override { return PassT::name(); }
template <typename T>
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index cbd429f84ee4..b858733530e3 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -438,7 +438,7 @@ inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
}
struct is_all_ones {
- bool isValue(const APInt &C) { return C.isAllOnesValue(); }
+ bool isValue(const APInt &C) { return C.isAllOnes(); }
};
/// Match an integer or vector with all bits set.
/// For vectors, this includes constants with undefined elements.
@@ -506,7 +506,7 @@ inline cst_pred_ty<is_nonpositive> m_NonPositive() {
inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
struct is_one {
- bool isValue(const APInt &C) { return C.isOneValue(); }
+ bool isValue(const APInt &C) { return C.isOne(); }
};
/// Match an integer 1 or a vector with all elements equal to 1.
/// For vectors, this includes constants with undefined elements.
@@ -515,7 +515,7 @@ inline cst_pred_ty<is_one> m_One() {
}
struct is_zero_int {
- bool isValue(const APInt &C) { return C.isNullValue(); }
+ bool isValue(const APInt &C) { return C.isZero(); }
};
/// Match an integer 0 or a vector with all elements equal to 0.
/// For vectors, this includes constants with undefined elements.
@@ -549,7 +549,7 @@ inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
}
struct is_negated_power2 {
- bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
+ bool isValue(const APInt &C) { return C.isNegatedPowerOf2(); }
};
/// Match a integer or vector negated power-of-2.
/// For vectors, this includes constants with undefined elements.
@@ -593,32 +593,7 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
struct icmp_pred_with_threshold {
ICmpInst::Predicate Pred;
const APInt *Thr;
- bool isValue(const APInt &C) {
- switch (Pred) {
- case ICmpInst::Predicate::ICMP_EQ:
- return C.eq(*Thr);
- case ICmpInst::Predicate::ICMP_NE:
- return C.ne(*Thr);
- case ICmpInst::Predicate::ICMP_UGT:
- return C.ugt(*Thr);
- case ICmpInst::Predicate::ICMP_UGE:
- return C.uge(*Thr);
- case ICmpInst::Predicate::ICMP_ULT:
- return C.ult(*Thr);
- case ICmpInst::Predicate::ICMP_ULE:
- return C.ule(*Thr);
- case ICmpInst::Predicate::ICMP_SGT:
- return C.sgt(*Thr);
- case ICmpInst::Predicate::ICMP_SGE:
- return C.sge(*Thr);
- case ICmpInst::Predicate::ICMP_SLT:
- return C.slt(*Thr);
- case ICmpInst::Predicate::ICMP_SLE:
- return C.sle(*Thr);
- default:
- llvm_unreachable("Unhandled ICmp predicate");
- }
- }
+ bool isValue(const APInt &C) { return ICmpInst::compare(C, *Thr, Pred); }
};
/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
/// to Threshold. For vectors, this includes constants with undefined elements.
@@ -988,20 +963,22 @@ struct BinaryOp_match {
// The LHS is always matched first.
BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
- template <typename OpTy> bool match(OpTy *V) {
- if (V->getValueID() == Value::InstructionVal + Opcode) {
+ template <typename OpTy> inline bool match(unsigned Opc, OpTy *V) {
+ if (V->getValueID() == Value::InstructionVal + Opc) {
auto *I = cast<BinaryOperator>(V);
return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
(Commutable && L.match(I->getOperand(1)) &&
R.match(I->getOperand(0)));
}
if (auto *CE = dyn_cast<ConstantExpr>(V))
- return CE->getOpcode() == Opcode &&
+ return CE->getOpcode() == Opc &&
((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
(Commutable && L.match(CE->getOperand(1)) &&
R.match(CE->getOperand(0))));
return false;
}
+
+ template <typename OpTy> bool match(OpTy *V) { return match(Opcode, V); }
};
template <typename LHS, typename RHS>
@@ -1246,6 +1223,26 @@ m_NUWShl(const LHS &L, const RHS &R) {
L, R);
}
+template <typename LHS_t, typename RHS_t, bool Commutable = false>
+struct SpecificBinaryOp_match
+ : public BinaryOp_match<LHS_t, RHS_t, 0, Commutable> {
+ unsigned Opcode;
+
+ SpecificBinaryOp_match(unsigned Opcode, const LHS_t &LHS, const RHS_t &RHS)
+ : BinaryOp_match<LHS_t, RHS_t, 0, Commutable>(LHS, RHS), Opcode(Opcode) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ return BinaryOp_match<LHS_t, RHS_t, 0, Commutable>::match(Opcode, V);
+ }
+};
+
+/// Matches a specific opcode.
+template <typename LHS, typename RHS>
+inline SpecificBinaryOp_match<LHS, RHS> m_BinOp(unsigned Opcode, const LHS &L,
+ const RHS &R) {
+ return SpecificBinaryOp_match<LHS, RHS>(Opcode, L, R);
+}
+
//===----------------------------------------------------------------------===//
// Class that matches a group of binary opcodes.
//
@@ -2223,6 +2220,13 @@ m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
R);
}
+/// Matches a specific opcode with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline SpecificBinaryOp_match<LHS, RHS, true>
+m_c_BinOp(unsigned Opcode, const LHS &L, const RHS &R) {
+ return SpecificBinaryOp_match<LHS, RHS, true>(Opcode, L, R);
+}
+
/// Matches a Add with LHS and RHS in either order.
template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
@@ -2456,7 +2460,7 @@ inline VScaleVal_match m_VScale(const DataLayout &DL) {
return VScaleVal_match(DL);
}
-template <typename LHS, typename RHS, unsigned Opcode>
+template <typename LHS, typename RHS, unsigned Opcode, bool Commutable = false>
struct LogicalOp_match {
LHS L;
RHS R;
@@ -2464,27 +2468,32 @@ struct LogicalOp_match {
LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
template <typename T> bool match(T *V) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (!I->getType()->isIntOrIntVectorTy(1))
- return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || !I->getType()->isIntOrIntVectorTy(1))
+ return false;
- if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
- R.match(I->getOperand(1)))
- return true;
+ if (I->getOpcode() == Opcode) {
+ auto *Op0 = I->getOperand(0);
+ auto *Op1 = I->getOperand(1);
+ return (L.match(Op0) && R.match(Op1)) ||
+ (Commutable && L.match(Op1) && R.match(Op0));
+ }
- if (auto *SI = dyn_cast<SelectInst>(I)) {
- if (Opcode == Instruction::And) {
- if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
- if (C->isNullValue() && L.match(SI->getCondition()) &&
- R.match(SI->getTrueValue()))
- return true;
- } else {
- assert(Opcode == Instruction::Or);
- if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
- if (C->isOneValue() && L.match(SI->getCondition()) &&
- R.match(SI->getFalseValue()))
- return true;
- }
+ if (auto *Select = dyn_cast<SelectInst>(I)) {
+ auto *Cond = Select->getCondition();
+ auto *TVal = Select->getTrueValue();
+ auto *FVal = Select->getFalseValue();
+ if (Opcode == Instruction::And) {
+ auto *C = dyn_cast<Constant>(FVal);
+ if (C && C->isNullValue())
+ return (L.match(Cond) && R.match(TVal)) ||
+ (Commutable && L.match(TVal) && R.match(Cond));
+ } else {
+ assert(Opcode == Instruction::Or);
+ auto *C = dyn_cast<Constant>(TVal);
+ if (C && C->isOneValue())
+ return (L.match(Cond) && R.match(FVal)) ||
+ (Commutable && L.match(FVal) && R.match(Cond));
}
}
@@ -2503,6 +2512,13 @@ m_LogicalAnd(const LHS &L, const RHS &R) {
/// Matches L && R where L and R are arbitrary values.
inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
+/// Matches L && R with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::And, true>
+m_c_LogicalAnd(const LHS &L, const RHS &R) {
+ return LogicalOp_match<LHS, RHS, Instruction::And, true>(L, R);
+}
+
/// Matches L || R either in the form of L | R or L ? true : R.
/// Note that the latter form is poison-blocking.
template <typename LHS, typename RHS>
@@ -2512,8 +2528,13 @@ m_LogicalOr(const LHS &L, const RHS &R) {
}
/// Matches L || R where L and R are arbitrary values.
-inline auto m_LogicalOr() {
- return m_LogicalOr(m_Value(), m_Value());
+inline auto m_LogicalOr() { return m_LogicalOr(m_Value(), m_Value()); }
+
+/// Matches L || R with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::Or, true>
+m_c_LogicalOr(const LHS &L, const RHS &R) {
+ return LogicalOp_match<LHS, RHS, Instruction::Or, true>(L, R);
}
} // end namespace PatternMatch
diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h
index 889568e7946b..4bb6bb8d4a40 100644
--- a/llvm/include/llvm/IR/ProfileSummary.h
+++ b/llvm/include/llvm/IR/ProfileSummary.h
@@ -31,9 +31,9 @@ class raw_ostream;
// number of counts needed to reach this target and the minimum among these
// counts.
struct ProfileSummaryEntry {
- uint32_t Cutoff; ///< The required percentile of counts.
- uint64_t MinCount; ///< The minimum count for this percentile.
- uint64_t NumCounts; ///< Number of counts >= the minimum count.
+ const uint32_t Cutoff; ///< The required percentile of counts.
+ const uint64_t MinCount; ///< The minimum count for this percentile.
+ const uint64_t NumCounts; ///< Number of counts >= the minimum count.
ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinCount,
uint64_t TheNumCounts)
@@ -48,9 +48,9 @@ public:
private:
const Kind PSK;
- SummaryEntryVector DetailedSummary;
- uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
- uint32_t NumCounts, NumFunctions;
+ const SummaryEntryVector DetailedSummary;
+ const uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
+ const uint32_t NumCounts, NumFunctions;
/// If 'Partial' is false, it means the profile being used to optimize
/// a target is collected from the same target.
/// If 'Partial' is true, it means the profile is for common/shared
@@ -61,14 +61,14 @@ private:
/// of the program being built to the number of profile counters in the
/// partial sample profile. When 'Partial' is false, it is undefined. This is
/// currently only available under thin LTO mode.
- double PartialProfileRatio = 0;
+ double PartialProfileRatio = 0.0;
/// Return detailed summary as metadata.
Metadata *getDetailedSummaryMD(LLVMContext &Context);
public:
static const int Scale = 1000000;
- ProfileSummary(Kind K, SummaryEntryVector DetailedSummary,
+ ProfileSummary(Kind K, const SummaryEntryVector &DetailedSummary,
uint64_t TotalCount, uint64_t MaxCount,
uint64_t MaxInternalCount, uint64_t MaxFunctionCount,
uint32_t NumCounts, uint32_t NumFunctions,
@@ -85,22 +85,22 @@ public:
bool AddPartialProfileRatioField = true);
/// Construct profile summary from metdata.
static ProfileSummary *getFromMD(Metadata *MD);
- SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
- uint32_t getNumFunctions() { return NumFunctions; }
- uint64_t getMaxFunctionCount() { return MaxFunctionCount; }
- uint32_t getNumCounts() { return NumCounts; }
- uint64_t getTotalCount() { return TotalCount; }
- uint64_t getMaxCount() { return MaxCount; }
- uint64_t getMaxInternalCount() { return MaxInternalCount; }
+ const SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
+ uint32_t getNumFunctions() const { return NumFunctions; }
+ uint64_t getMaxFunctionCount() const { return MaxFunctionCount; }
+ uint32_t getNumCounts() const { return NumCounts; }
+ uint64_t getTotalCount() const { return TotalCount; }
+ uint64_t getMaxCount() const { return MaxCount; }
+ uint64_t getMaxInternalCount() const { return MaxInternalCount; }
void setPartialProfile(bool PP) { Partial = PP; }
- bool isPartialProfile() { return Partial; }
- double getPartialProfileRatio() { return PartialProfileRatio; }
+ bool isPartialProfile() const { return Partial; }
+ double getPartialProfileRatio() const { return PartialProfileRatio; }
void setPartialProfileRatio(double R) {
assert(isPartialProfile() && "Unexpected when not partial profile");
PartialProfileRatio = R;
}
- void printSummary(raw_ostream &OS);
- void printDetailedSummary(raw_ostream &OS);
+ void printSummary(raw_ostream &OS) const;
+ void printDetailedSummary(raw_ostream &OS) const;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 53100f049910..51ba7e675efe 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -27,10 +27,6 @@ constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
-enum class PseudoProbeAttributes {
- Reserved = 0x1, // Reserved for future use.
-};
-
// The saturated distrution factor representing 100% for block probes.
constexpr static uint64_t PseudoProbeFullDistributionFactor =
std::numeric_limits<uint64_t>::max();
diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h
index 4d95143a4bd2..5ad1d0a6f920 100644
--- a/llvm/include/llvm/IR/ReplaceConstant.h
+++ b/llvm/include/llvm/IR/ReplaceConstant.h
@@ -21,10 +21,6 @@
namespace llvm {
-/// Create a replacement instruction for constant expression \p CE and insert
-/// it before \p Instr.
-Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr);
-
/// The given instruction \p I contains given constant expression \p CE as one
/// of its operands, possibly nested within constant expression trees. Convert
/// all reachable paths from contant expression operands of \p I to \p CE into
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index c73172612b1e..62d67308114f 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
+HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2")
HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
@@ -375,6 +376,8 @@ HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf")
HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf")
+HANDLE_LIBCALL(CONVERT_F128_PPCF128, "__extendkftf2")
+HANDLE_LIBCALL(CONVERT_PPCF128_F128, "__trunctfkf2")
// Comparison
HANDLE_LIBCALL(OEQ_F32, "__eqsf2")
@@ -431,6 +434,7 @@ HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unorde
// Exception handling
HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume")
+HANDLE_LIBCALL(CXA_END_CLEANUP, "__cxa_end_cleanup")
// Note: there are two sets of atomics libcalls; see
// <https://llvm.org/docs/Atomics.html> for more info on the
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 430bc34a47e7..47431adc6fac 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -14,7 +14,6 @@
#ifndef LLVM_IR_TYPE_H
#define LLVM_IR_TYPE_H
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CBindingWrapping.h"
@@ -29,6 +28,7 @@
namespace llvm {
class IntegerType;
+struct fltSemantics;
class LLVMContext;
class PointerType;
class raw_ostream;
@@ -166,18 +166,7 @@ public:
getTypeID() == PPC_FP128TyID;
}
- const fltSemantics &getFltSemantics() const {
- switch (getTypeID()) {
- case HalfTyID: return APFloat::IEEEhalf();
- case BFloatTyID: return APFloat::BFloat();
- case FloatTyID: return APFloat::IEEEsingle();
- case DoubleTyID: return APFloat::IEEEdouble();
- case X86_FP80TyID: return APFloat::x87DoubleExtended();
- case FP128TyID: return APFloat::IEEEquad();
- case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
- default: llvm_unreachable("Invalid floating type");
- }
- }
+ const fltSemantics &getFltSemantics() const;
/// Return true if this is X86 MMX.
bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
@@ -312,7 +301,7 @@ public:
/// Return whether the type is IEEE compatible, as defined by the eponymous
/// method in APFloat.
- bool isIEEE() const { return APFloat::getZero(getFltSemantics()).isIEEE(); }
+ bool isIEEE() const;
/// If this is a vector type, return the element type, otherwise return
/// 'this'.
@@ -443,26 +432,7 @@ public:
}
llvm_unreachable("Unsupported type in Type::getScalarTy");
}
- static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
- Type *Ty;
- if (&S == &APFloat::IEEEhalf())
- Ty = Type::getHalfTy(C);
- else if (&S == &APFloat::BFloat())
- Ty = Type::getBFloatTy(C);
- else if (&S == &APFloat::IEEEsingle())
- Ty = Type::getFloatTy(C);
- else if (&S == &APFloat::IEEEdouble())
- Ty = Type::getDoubleTy(C);
- else if (&S == &APFloat::x87DoubleExtended())
- Ty = Type::getX86_FP80Ty(C);
- else if (&S == &APFloat::IEEEquad())
- Ty = Type::getFP128Ty(C);
- else {
- assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format");
- Ty = Type::getPPC_FP128Ty(C);
- }
- return Ty;
- }
+ static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S);
//===--------------------------------------------------------------------===//
// Convenience methods for getting pointer types with one of the above builtin
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 92e2cd3a2783..361d6357b303 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -111,6 +111,21 @@ END_REGISTER_VP_SDNODE(SDOPC)
#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS)
#endif
+// Map this VP reduction intrinsic to its reduction operand positions.
+#ifndef HANDLE_VP_REDUCTION
+#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS)
+#endif
+
+// A property to infer VP binary-op SDNode opcodes automatically.
+#ifndef PROPERTY_VP_BINARYOP_SDNODE
+#define PROPERTY_VP_BINARYOP_SDNODE(ID)
+#endif
+
+// A property to infer VP reduction SDNode opcodes automatically.
+#ifndef PROPERTY_VP_REDUCTION_SDNODE
+#define PROPERTY_VP_REDUCTION_SDNODE(ID)
+#endif
+
/// } Property Macros
///// Integer Arithmetic {
@@ -122,6 +137,7 @@ END_REGISTER_VP_SDNODE(SDOPC)
#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \
BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \
HANDLE_VP_TO_OPC(OPC) \
+PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
END_REGISTER_VP(INTRIN, SDOPC)
@@ -181,6 +197,7 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \
HANDLE_VP_TO_OPC(OPC) \
HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
+ PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
END_REGISTER_VP(vp_##OPSUFFIX, SDOPC)
// llvm.vp.fadd(x,y,mask,vlen)
@@ -204,33 +221,146 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
///// Memory Operations {
// llvm.vp.store(ptr,val,mask,vlen)
-BEGIN_REGISTER_VP(vp_store, 2, 3, VP_STORE, 0)
+BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
+// chain = VP_STORE chain,val,base,offset,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5)
HANDLE_VP_TO_OPC(Store)
HANDLE_VP_TO_INTRIN(masked_store)
HANDLE_VP_IS_MEMOP(vp_store, 1, 0)
END_REGISTER_VP(vp_store, VP_STORE)
// llvm.vp.scatter(ptr,val,mask,vlen)
-BEGIN_REGISTER_VP(vp_scatter, 2, 3, VP_SCATTER, 0)
+BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
+// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6)
HANDLE_VP_TO_INTRIN(masked_scatter)
HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0)
END_REGISTER_VP(vp_scatter, VP_SCATTER)
// llvm.vp.load(ptr,mask,vlen)
-BEGIN_REGISTER_VP(vp_load, 1, 2, VP_LOAD, -1)
+BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2)
+// val,chain = VP_LOAD chain,base,offset,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4)
HANDLE_VP_TO_OPC(Load)
HANDLE_VP_TO_INTRIN(masked_load)
HANDLE_VP_IS_MEMOP(vp_load, 0, None)
END_REGISTER_VP(vp_load, VP_LOAD)
// llvm.vp.gather(ptr,mask,vlen)
-BEGIN_REGISTER_VP(vp_gather, 1, 2, VP_GATHER, -1)
+BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2)
+// val,chain = VP_GATHER chain,base,indices,scale,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5)
HANDLE_VP_TO_INTRIN(masked_gather)
HANDLE_VP_IS_MEMOP(vp_gather, 0, None)
END_REGISTER_VP(vp_gather, VP_GATHER)
///// } Memory Operations
+///// Reductions {
+
+// Specialized helper macro for VP reductions (%start, %x, %mask, %evl).
+#ifdef HELPER_REGISTER_REDUCTION_VP
+#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
+#endif
+#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \
+BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \
+HANDLE_VP_TO_INTRIN(INTRIN) \
+HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
+PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+END_REGISTER_VP(VPINTRIN, SDOPC)
+
+// llvm.vp.reduce.add(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD,
+ experimental_vector_reduce_add)
+
+// llvm.vp.reduce.mul(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_mul, VP_REDUCE_MUL,
+ experimental_vector_reduce_mul)
+
+// llvm.vp.reduce.and(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_and, VP_REDUCE_AND,
+ experimental_vector_reduce_and)
+
+// llvm.vp.reduce.or(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_or, VP_REDUCE_OR,
+ experimental_vector_reduce_or)
+
+// llvm.vp.reduce.xor(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_xor, VP_REDUCE_XOR,
+ experimental_vector_reduce_xor)
+
+// llvm.vp.reduce.smax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_smax, VP_REDUCE_SMAX,
+ experimental_vector_reduce_smax)
+
+// llvm.vp.reduce.smin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_smin, VP_REDUCE_SMIN,
+ experimental_vector_reduce_smin)
+
+// llvm.vp.reduce.umax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_umax, VP_REDUCE_UMAX,
+ experimental_vector_reduce_umax)
+
+// llvm.vp.reduce.umin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_umin, VP_REDUCE_UMIN,
+ experimental_vector_reduce_umin)
+
+// llvm.vp.reduce.fmax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX,
+ experimental_vector_reduce_fmax)
+
+// llvm.vp.reduce.fmin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
+ experimental_vector_reduce_fmin)
+
+#undef HELPER_REGISTER_REDUCTION_VP
+
+// Specialized helper macro for VP reductions as above but with two forms:
+// sequential and reassociative. These manifest as the presence of 'reassoc'
+// fast-math flags in the IR and as two distinct ISD opcodes in the
+// SelectionDAG.
+#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP
+#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
+#endif
+#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \
+BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \
+BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \
+END_REGISTER_VP_SDNODE(SDOPC) \
+BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \
+END_REGISTER_VP_SDNODE(SEQ_SDOPC) \
+HANDLE_VP_TO_INTRIN(INTRIN) \
+HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
+PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \
+END_REGISTER_VP_INTRINSIC(VPINTRIN)
+
+// llvm.vp.reduce.fadd(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
+ VP_REDUCE_SEQ_FADD,
+ experimental_vector_reduce_fadd)
+
+// llvm.vp.reduce.fmul(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
+ VP_REDUCE_SEQ_FMUL,
+ experimental_vector_reduce_fmul)
+
+#undef HELPER_REGISTER_REDUCTION_SEQ_VP
+
+///// } Reduction
+
+///// Shuffles {
+
+// llvm.vp.select(mask,on_true,on_false,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3)
+// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4)
+// END_REGISTER_CASES(vp_select, VP_SELECT)
+END_REGISTER_VP_INTRINSIC(vp_select)
+
+BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5,
+ EXPERIMENTAL_VP_SPLICE, -1)
+END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
+
+///// } Shuffles
#undef BEGIN_REGISTER_VP
#undef BEGIN_REGISTER_VP_INTRINSIC
@@ -242,3 +372,6 @@ END_REGISTER_VP(vp_gather, VP_GATHER)
#undef HANDLE_VP_TO_CONSTRAINEDFP
#undef HANDLE_VP_TO_INTRIN
#undef HANDLE_VP_IS_MEMOP
+#undef HANDLE_VP_REDUCTION
+#undef PROPERTY_VP_BINARYOP_SDNODE
+#undef PROPERTY_VP_REDUCTION_SDNODE
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 2ad1c9e8c300..fc2ed00d770f 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -37,7 +37,6 @@ class DataLayout;
class Function;
class GlobalAlias;
class GlobalIFunc;
-class GlobalIndirectSymbol;
class GlobalObject;
class GlobalValue;
class GlobalVariable;
@@ -454,14 +453,18 @@ public:
/// Return true if there is exactly one use of this value that cannot be
/// dropped.
- ///
- /// This is specialized because it is a common request and does not require
- /// traversing the whole use list.
Use *getSingleUndroppableUse();
const Use *getSingleUndroppableUse() const {
return const_cast<Value *>(this)->getSingleUndroppableUse();
}
+ /// Return true if there is exactly one unique user of this value that cannot be
+ /// dropped (that user can have multiple uses of this value).
+ User *getUniqueUndroppableUser();
+ const User *getUniqueUndroppableUser() const {
+ return const_cast<Value *>(this)->getUniqueUndroppableUser();
+ }
+
/// Return true if there this value.
///
/// This is specialized because it is a common request and does not require
@@ -690,6 +693,9 @@ public:
/// If \p AllowNonInbounds is true, offsets in GEPs are stripped and
/// accumulated even if the GEP is not "inbounds".
///
+ /// If \p AllowInvariantGroup is true then this method also looks through
+ /// strip.invariant.group and launder.invariant.group intrinsics.
+ ///
/// If \p ExternalAnalysis is provided it will be used to calculate a offset
/// when a operand of GEP is not constant.
/// For example, for a value \p ExternalAnalysis might try to calculate a
@@ -705,13 +711,15 @@ public:
/// is unchanged.
const Value *stripAndAccumulateConstantOffsets(
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
+ bool AllowInvariantGroup = false,
function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis =
nullptr) const;
Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
- bool AllowNonInbounds) {
+ bool AllowNonInbounds,
+ bool AllowInvariantGroup = false) {
return const_cast<Value *>(
static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets(
- DL, Offset, AllowNonInbounds));
+ DL, Offset, AllowNonInbounds, AllowInvariantGroup));
}
/// This is a wrapper around stripAndAccumulateConstantOffsets with the
@@ -781,8 +789,8 @@ public:
///
/// This is the greatest alignment value supported by load, store, and alloca
/// instructions, and global values.
- static const unsigned MaxAlignmentExponent = 29;
- static const unsigned MaximumAlignment = 1u << MaxAlignmentExponent;
+ static constexpr unsigned MaxAlignmentExponent = 32;
+ static constexpr uint64_t MaximumAlignment = 1ULL << MaxAlignmentExponent;
/// Mutate the type of this Value to be of the specified type.
///
@@ -1012,21 +1020,16 @@ template <> struct isa_impl<GlobalIFunc, Value> {
}
};
-template <> struct isa_impl<GlobalIndirectSymbol, Value> {
- static inline bool doit(const Value &Val) {
- return isa<GlobalAlias>(Val) || isa<GlobalIFunc>(Val);
- }
-};
-
template <> struct isa_impl<GlobalValue, Value> {
static inline bool doit(const Value &Val) {
- return isa<GlobalObject>(Val) || isa<GlobalIndirectSymbol>(Val);
+ return isa<GlobalObject>(Val) || isa<GlobalAlias>(Val);
}
};
template <> struct isa_impl<GlobalObject, Value> {
static inline bool doit(const Value &Val) {
- return isa<GlobalVariable>(Val) || isa<Function>(Val);
+ return isa<GlobalVariable>(Val) || isa<Function>(Val) ||
+ isa<GlobalIFunc>(Val);
}
};
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 365240de321a..845d7dcdebd2 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
void initializeAAResultsWrapperPassPass(PassRegistry&);
void initializeADCELegacyPassPass(PassRegistry&);
void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
+void initializeAddFSDiscriminatorsPass(PassRegistry &);
void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeHardwareLoopsPass(PassRegistry&);
+void initializeMIRProfileLoaderPassPass(PassRegistry &);
void initializeMemProfilerLegacyPassPass(PassRegistry &);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -234,7 +236,8 @@ void initializeLiveIntervalsPass(PassRegistry&);
void initializeLiveRangeShrinkPass(PassRegistry&);
void initializeLiveRegMatrixPass(PassRegistry&);
void initializeLiveStacksPass(PassRegistry&);
-void initializeLiveVariablesPass(PassRegistry&);
+void initializeLiveVariablesPass(PassRegistry &);
+void initializeLoadStoreOptPass(PassRegistry &);
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&);
void initializeLoaderPassPass(PassRegistry&);
void initializeLocalStackSlotPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/InterfaceStub/IFSHandler.h b/llvm/include/llvm/InterfaceStub/IFSHandler.h
index de627492366f..6ae6a421318e 100644
--- a/llvm/include/llvm/InterfaceStub/IFSHandler.h
+++ b/llvm/include/llvm/InterfaceStub/IFSHandler.h
@@ -51,6 +51,9 @@ Error validateIFSTarget(IFSStub &Stub, bool ParseTriple);
void stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
bool StripEndianness, bool StripBitWidth);
+/// Strips symbols from IFS symbol table that are undefined.
+void stripIFSUndefinedSymbols(IFSStub &Stub);
+
/// Parse llvm triple string into a IFSTarget struct.
IFSTarget parseTriple(StringRef TripleStr);
diff --git a/llvm/include/llvm/LTO/Caching.h b/llvm/include/llvm/LTO/Caching.h
deleted file mode 100644
index 43b978328b74..000000000000
--- a/llvm/include/llvm/LTO/Caching.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the localCache function, which allows clients to add a
-// filesystem cache to ThinLTO.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LTO_CACHING_H
-#define LLVM_LTO_CACHING_H
-
-#include "llvm/LTO/LTO.h"
-
-namespace llvm {
-namespace lto {
-
-/// This type defines the callback to add a pre-existing native object file
-/// (e.g. in a cache).
-///
-/// Buffer callbacks must be thread safe.
-using AddBufferFn =
- std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
-
-/// Create a local file system cache which uses the given cache directory and
-/// file callback. This function also creates the cache directory if it does not
-/// already exist.
-Expected<NativeObjectCache> localCache(StringRef CacheDirectoryPath,
- AddBufferFn AddBuffer);
-
-} // namespace lto
-} // namespace llvm
-
-#endif
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 5fd3c9f408f3..eb793d62907e 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -70,6 +70,9 @@ struct Config {
/// Run PGO context sensitive IR instrumentation.
bool RunCSIRInstr = false;
+ /// Turn on/off the warning about a hash mismatch in the PGO profile data.
+ bool PGOWarnMismatch = true;
+
/// Asserts whether we can assume whole program visibility during the LTO
/// link.
bool HasWholeProgramVisibility = false;
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index ea1dea2d6f42..d2b0fef1ca47 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -21,8 +21,10 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/LTO/Config.h"
#include "llvm/Object/IRSymtab.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/thread.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
namespace llvm {
@@ -38,7 +40,7 @@ class ToolOutputFile;
/// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
/// recorded in the index and the ThinLTO backends must apply the changes to
-/// the module via thinLTOResolvePrevailingInModule.
+/// the module via thinLTOFinalizeInModule.
///
/// This is done for correctness (if value exported, ensure we always
/// emit a copy), and compile-time optimization (allow drop of duplicates).
@@ -186,47 +188,13 @@ private:
}
};
-/// This class wraps an output stream for a native object. Most clients should
-/// just be able to return an instance of this base class from the stream
-/// callback, but if a client needs to perform some action after the stream is
-/// written to, that can be done by deriving from this class and overriding the
-/// destructor.
-class NativeObjectStream {
-public:
- NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
- std::unique_ptr<raw_pwrite_stream> OS;
- virtual ~NativeObjectStream() = default;
-};
-
-/// This type defines the callback to add a native object that is generated on
-/// the fly.
-///
-/// Stream callbacks must be thread safe.
-using AddStreamFn =
- std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>;
-
-/// This is the type of a native object cache. To request an item from the
-/// cache, pass a unique string as the Key. For hits, the cached file will be
-/// added to the link and this function will return AddStreamFn(). For misses,
-/// the cache will return a stream callback which must be called at most once to
-/// produce content for the stream. The native object stream produced by the
-/// stream callback will add the file to the link after the stream is written
-/// to.
-///
-/// Clients generally look like this:
-///
-/// if (AddStreamFn AddStream = Cache(Task, Key))
-/// ProduceContent(AddStream);
-using NativeObjectCache =
- std::function<AddStreamFn(unsigned Task, StringRef Key)>;
-
/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
/// The details of this type definition aren't important; clients can only
/// create a ThinBackend using one of the create*ThinBackend() functions below.
using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
const Config &C, ModuleSummaryIndex &CombinedIndex,
StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache)>;
+ AddStreamFn AddStream, FileCache Cache)>;
/// This ThinBackend runs the individual backend jobs in-process.
/// The default value means to use one job per hardware core (not hyper-thread).
@@ -299,7 +267,7 @@ public:
///
/// The client will receive at most one callback (via either AddStream or
/// Cache) for each task identifier.
- Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
+ Error run(AddStreamFn AddStream, FileCache Cache = nullptr);
/// Static method that returns a list of libcall symbols that can be generated
/// by LTO but might not be visible from bitcode symbol table.
@@ -431,7 +399,7 @@ private:
const SymbolResolution *&ResI, const SymbolResolution *ResE);
Error runRegularLTO(AddStreamFn AddStream);
- Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+ Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
Error checkPartiallySplit();
@@ -444,6 +412,9 @@ private:
// Identify symbols exported dynamically, and that therefore could be
// referenced by a shared library not visible to the linker.
DenseSet<GlobalValue::GUID> DynamicExportSymbols;
+
+ // Diagnostic optimization remarks file
+ std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
};
/// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
index 6697c821a5ea..508ab2587ac5 100644
--- a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
+++ b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -10,6 +10,8 @@
#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
namespace llvm {
class ModuleSummaryIndex;
+
+/// Compute synthetic function entry counts.
void computeSyntheticCounts(ModuleSummaryIndex &Index);
} // namespace llvm
diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 31688e43e174..333f483f29c5 100644
--- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -176,7 +176,7 @@ struct LTOCodeGenerator {
/// created using the \p AddStream callback. Returns true on success.
///
/// Calls \a verifyMergedModuleOnce().
- bool compileOptimized(lto::AddStreamFn AddStream, unsigned ParallelismLevel);
+ bool compileOptimized(AddStreamFn AddStream, unsigned ParallelismLevel);
/// Enable the Freestanding mode: indicate that the optimizer should not
/// assume builtins are present on the target.
diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h
index 2a25dab58ada..01e63db4bab3 100644
--- a/llvm/include/llvm/LTO/legacy/LTOModule.h
+++ b/llvm/include/llvm/LTO/legacy/LTOModule.h
@@ -167,6 +167,10 @@ public:
Expected<uint32_t> getMachOCPUSubType() const;
+ /// Returns true if the module has either the @llvm.global_ctors or the
+ /// @llvm.global_dtors symbol. Otherwise returns false.
+ bool hasCtorDtor() const;
+
private:
/// Parse metadata from the module
// FIXME: it only parses "llvm.linker.options" metadata at the moment
diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h
index 4b0aabeee701..ceed784d557d 100644
--- a/llvm/include/llvm/LinkAllIR.h
+++ b/llvm/include/llvm/LinkAllIR.h
@@ -38,6 +38,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
llvm::LLVMContext Context;
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 45978828a8ce..c8b9aaeed76a 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -64,6 +64,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 08739d51f751..bb57c3453d10 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -55,7 +55,8 @@ public:
/// Give the target a chance to manipulate state related to instruction
/// alignment (e.g. padding for optimization), instruction relaxablility, etc.
/// before and after actually emitting the instruction.
- virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) {}
+ virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
+ const MCSubtargetInfo &STI) {}
virtual void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {}
/// lifetime management
@@ -185,13 +186,16 @@ public:
/// Returns the maximum size of a nop in bytes on this target.
///
- virtual unsigned getMaximumNopSize() const { return 0; }
+ virtual unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const {
+ return 0;
+ }
/// Write an (optimal) nop sequence of Count bytes to the given output. If the
/// target cannot generate such a sequence, it should return an error.
///
/// \return - True on success.
- virtual bool writeNopData(raw_ostream &OS, uint64_t Count) const = 0;
+ virtual bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const = 0;
/// Give backend an opportunity to finish layout after relaxation
virtual void finishLayout(MCAssembler const &Asm,
diff --git a/llvm/include/llvm/MC/MCAsmInfoGOFF.h b/llvm/include/llvm/MC/MCAsmInfoGOFF.h
new file mode 100644
index 000000000000..1f3b26311b37
--- /dev/null
+++ b/llvm/include/llvm/MC/MCAsmInfoGOFF.h
@@ -0,0 +1,29 @@
+//===- MCAsmInfoGOFF.h - GOFF Asm Info Fields -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines certain target specific asm properties for GOFF (z/OS)
+/// based targets.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMINFOGOFF_H
+#define LLVM_MC_MCASMINFOGOFF_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+class MCAsmInfoGOFF : public MCAsmInfo {
+ virtual void anchor();
+
+protected:
+ MCAsmInfoGOFF();
+};
+} // end namespace llvm
+
+#endif // LLVM_MC_MCASMINFOGOFF_H
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 877b2dc4ac92..bde750759a0b 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -817,7 +817,7 @@ namespace llvm {
// Unrecoverable error has occurred. Display the best diagnostic we can
// and bail via exit(1). For now, most MC backend errors are unrecoverable.
// FIXME: We should really do something about that.
- LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, const Twine &Msg);
+ [[noreturn]] void reportFatalError(SMLoc L, const Twine &Msg);
const MCAsmMacro *lookupMacro(StringRef Name) {
StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 23efdc70609b..7e72d56f3097 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MD5.h"
#include <cassert>
@@ -34,7 +35,6 @@ namespace llvm {
template <typename T> class ArrayRef;
class MCAsmBackend;
class MCContext;
-class MCDwarfLineStr;
class MCObjectStreamer;
class MCStreamer;
class MCSymbol;
@@ -47,6 +47,24 @@ namespace mcdwarf {
MCSymbol *emitListsTableHeaderStart(MCStreamer &S);
} // namespace mcdwarf
+/// Manage the .debug_line_str section contents, if we use it.
+class MCDwarfLineStr {
+ MCSymbol *LineStrLabel = nullptr;
+ StringTableBuilder LineStrings{StringTableBuilder::DWARF};
+ bool UseRelocs = false;
+
+public:
+ /// Construct an instance that can emit .debug_line_str (for use in a normal
+ /// v5 line table).
+ explicit MCDwarfLineStr(MCContext &Ctx);
+
+ /// Emit a reference to the string.
+ void emitRef(MCStreamer *MCOS, StringRef Path);
+
+ /// Emit the .debug_line_str section if appropriate.
+ void emitSection(MCStreamer *MCOS);
+};
+
/// Instances of this class represent the name of the dwarf .file directive and
/// its associated dwarf file number in the MC file. MCDwarfFile's are created
/// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1;
@@ -170,6 +188,15 @@ public:
MCSymbol *getLabel() const { return Label; }
+ // This indicates the line entry is synthesized for an end entry.
+ bool IsEndEntry = false;
+
+ // Override the label with the given EndLabel.
+ void setEndLabel(MCSymbol *EndLabel) {
+ Label = EndLabel;
+ IsEndEntry = true;
+ }
+
// This is called when an instruction is assembled into the specified
// section and if there is information from the last .loc directive that
// has yet to have a line entry made for it is made.
@@ -187,6 +214,10 @@ public:
MCLineDivisions[Sec].push_back(LineEntry);
}
+ // Add an end entry by cloning the last entry, if exists, for the section
+ // the given EndLabel belongs to. The label is replaced by the given EndLabel.
+ void addEndEntry(MCSymbol *EndLabel);
+
using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>;
using iterator = MCDwarfLineEntryCollection::iterator;
using const_iterator = MCDwarfLineEntryCollection::const_iterator;
@@ -317,6 +348,11 @@ public:
void emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
Optional<MCDwarfLineStr> &LineStr) const;
+ // This emits a single line table associated with a given Section.
+ static void
+ emitOne(MCStreamer *MCOS, MCSection *Section,
+ const MCLineSection::MCDwarfLineEntryCollection &LineEntries);
+
Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index 9f4b8de7947b..fa17759bc21a 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -78,6 +78,8 @@ public:
case Triple::PS4:
case Triple::FreeBSD:
return ELF::ELFOSABI_FREEBSD;
+ case Triple::Solaris:
+ return ELF::ELFOSABI_SOLARIS;
default:
return ELF::ELFOSABI_NONE;
}
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index 8c1e22a14702..8f2b176862c8 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -39,7 +39,7 @@ public:
/// \name MCStreamer Interface
/// @{
- void InitSections(bool NoExecStack) override;
+ void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override;
void changeSection(MCSection *Section, const MCExpr *Subsection) override;
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 38cca2413e1e..bf1f32bb91ba 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -200,6 +200,7 @@ public:
VK_GOTREL,
VK_PCREL,
VK_GOTPCREL,
+ VK_GOTPCREL_NORELAX,
VK_GOTTPOFF,
VK_INDNTPOFF,
VK_NTPOFF,
@@ -328,6 +329,7 @@ public:
VK_WASM_TLSREL, // Memory address relative to __tls_base
VK_WASM_MBREL, // Memory address relative to __memory_base
VK_WASM_TBREL, // Table index relative to __table_base
+ VK_WASM_GOT_TLS, // Wasm global index of TLS symbol.
VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index f3a785fb09b7..736fdd992063 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -311,6 +311,9 @@ class MCAlignFragment : public MCFragment {
/// cannot be satisfied in this width then this fragment is ignored.
unsigned MaxBytesToEmit;
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo *STI;
+
public:
MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
@@ -326,7 +329,12 @@ public:
unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
bool hasEmitNops() const { return EmitNops; }
- void setEmitNops(bool Value) { EmitNops = Value; }
+ void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
+ EmitNops = Value;
+ this->STI = STI;
+ }
+
+ const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Align;
@@ -369,17 +377,22 @@ class MCNopsFragment : public MCFragment {
/// Source location of the directive that this fragment was created for.
SMLoc Loc;
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo &STI;
+
public:
MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L,
- MCSection *Sec = nullptr)
+ const MCSubtargetInfo &STI, MCSection *Sec = nullptr)
: MCFragment(FT_Nops, false, Sec), Size(NumBytes),
- ControlledNopLength(ControlledNopLength), Loc(L) {}
+ ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {}
int64_t getNumBytes() const { return Size; }
int64_t getControlledNopLength() const { return ControlledNopLength; }
SMLoc getLoc() const { return Loc; }
+ const MCSubtargetInfo *getSubtargetInfo() const { return &STI; }
+
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Nops;
}
@@ -572,10 +585,14 @@ class MCBoundaryAlignFragment : public MCFragment {
/// is not meaningful before that.
uint64_t Size = 0;
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo &STI;
+
public:
- MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr)
- : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) {
- }
+ MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI,
+ MCSection *Sec = nullptr)
+ : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary),
+ STI(STI) {}
uint64_t getSize() const { return Size; }
void setSize(uint64_t Value) { Size = Value; }
@@ -589,6 +606,8 @@ public:
LastFragment = F;
}
+ const MCSubtargetInfo *getSubtargetInfo() const { return &STI; }
+
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_BoundaryAlign;
}
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 898ca47b13b8..632a7d8f820e 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -154,9 +154,14 @@ public:
/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
- virtual Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
- uint64_t Addr,
- uint64_t Size) const;
+ virtual Optional<uint64_t>
+ evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI,
+ uint64_t Addr, uint64_t Size) const;
+
+ /// Given an instruction with a memory operand that could require relocation,
+ /// returns the offset within the instruction of that relocation.
+ virtual Optional<uint64_t>
+ getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const;
/// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
virtual std::vector<std::pair<uint64_t, uint64_t>>
diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h
index 0e6b677098e8..e8ffd29170e6 100644
--- a/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/llvm/include/llvm/MC/MCInstrDesc.h
@@ -76,7 +76,7 @@ enum OperandType {
OPERAND_FIRST_TARGET = 13,
};
-}
+} // namespace MCOI
/// This holds information about one operand of a machine instruction,
/// indicating the register class for register operands, etc.
@@ -185,7 +185,7 @@ enum Flag {
VariadicOpsAreDefs,
Authenticated,
};
-}
+} // namespace MCID
/// Describe properties that are true of each instruction in the target
/// description file. This captures information about side effects, register
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 8ae86ef2a574..ba7450ac64f1 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -225,10 +225,13 @@ protected:
// XCOFF specific sections
MCSection *TOCBaseSection = nullptr;
+ MCSection *ReadOnly8Section = nullptr;
+ MCSection *ReadOnly16Section = nullptr;
public:
void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
bool LargeCodeModel = false);
+ virtual ~MCObjectFileInfo();
MCContext &getContext() const { return *Ctx; }
bool getSupportsWeakOmittedEHFrame() const {
@@ -251,6 +254,7 @@ public:
return CompactUnwindDwarfEHFrameOnly;
}
+ virtual unsigned getTextSectionAlignment() const { return 4; }
MCSection *getTextSection() const { return TextSection; }
MCSection *getDataSection() const { return DataSection; }
MCSection *getBSSSection() const { return BSSSection; }
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index dcdee2b5774b..9d6416e4a18d 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -137,7 +137,7 @@ public:
void emitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) override;
- void emitCodeAlignment(unsigned ByteAlignment,
+ void emitCodeAlignment(unsigned ByteAlignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0) override;
void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) override;
@@ -181,8 +181,8 @@ public:
SMLoc Loc = SMLoc()) override;
void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,
SMLoc Loc = SMLoc()) override;
- void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
- SMLoc Loc) override;
+ void emitNops(int64_t NumBytes, int64_t ControlledNopLength, SMLoc Loc,
+ const MCSubtargetInfo &STI) override;
void emitFileDirective(StringRef Filename) override;
void emitFileDirective(StringRef Filename, StringRef CompilerVerion,
StringRef TimeStamp, StringRef Description) override;
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 1c6926b9a9e6..abc9705f0851 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -44,17 +44,26 @@
#ifndef LLVM_MC_MCPSEUDOPROBE_H
#define LLVM_MC_MCPSEUDOPROBE_H
-#include "llvm/ADT/MapVector.h"
-#include "llvm/MC/MCSection.h"
-#include <functional>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/Support/ErrorOr.h"
+#include <list>
#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
#include <vector>
namespace llvm {
+class MCSection;
class MCStreamer;
class MCSymbol;
class MCObjectStreamer;
+class raw_ostream;
enum class MCPseudoProbeFlag {
// If set, indicates that the probe is encoded as an address delta
@@ -62,69 +71,211 @@ enum class MCPseudoProbeFlag {
AddressDelta = 0x1,
};
+// Function descriptor decoded from .pseudo_probe_desc section
+struct MCPseudoProbeFuncDesc {
+ uint64_t FuncGUID = 0;
+ uint64_t FuncHash = 0;
+ std::string FuncName;
+
+ MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
+ : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
+
+ void print(raw_ostream &OS);
+};
+
+class MCPseudoProbe;
+class MCDecodedPseudoProbe;
+
+// An inline frame has the form <Guid, ProbeID>
+using InlineSite = std::tuple<uint64_t, uint32_t>;
+using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+// GUID to PseudoProbeFuncDesc map
+using GUIDProbeFunctionMap =
+ std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
+// Address to pseudo probes map.
+using AddressProbesMap =
+ std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
+
+class MCPseudoProbeInlineTree;
+class MCDecodedPseudoProbeInlineTree;
+
+class MCPseudoProbeBase {
+protected:
+ uint64_t Guid;
+ uint64_t Index;
+ uint8_t Attributes;
+ uint8_t Type;
+ // The value should be equal to PseudoProbeReservedId::Last + 1 which is
+ // defined in SampleProfileProbe.h. The header file is not included here to
+ // reduce the dependency from MC to IPO.
+ const static uint32_t PseudoProbeFirstId = 1;
+
+public:
+ MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
+ : Guid(G), Index(I), Attributes(At), Type(T) {}
+
+ bool isEntry() const { return Index == PseudoProbeFirstId; }
+
+ uint64_t getGuid() const { return Guid; }
+
+ uint64_t getIndex() const { return Index; }
+
+ uint8_t getAttributes() const { return Attributes; }
+
+ uint8_t getType() const { return Type; }
+
+ bool isBlock() const {
+ return Type == static_cast<uint8_t>(PseudoProbeType::Block);
+ }
+
+ bool isIndirectCall() const {
+ return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
+ }
+
+ bool isDirectCall() const {
+ return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
+ }
+
+ bool isCall() const { return isIndirectCall() || isDirectCall(); }
+
+ void setAttributes(uint8_t Attr) { Attributes = Attr; }
+};
+
/// Instances of this class represent a pseudo probe instance for a pseudo probe
/// table entry, which is created during a machine instruction is assembled and
/// uses an address from a temporary label created at the current address in the
/// current section.
-class MCPseudoProbe {
+class MCPseudoProbe : public MCPseudoProbeBase {
MCSymbol *Label;
- uint64_t Guid;
- uint64_t Index;
- uint8_t Type;
- uint8_t Attributes;
public:
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attributes)
- : Label(Label), Guid(Guid), Index(Index), Type(Type),
- Attributes(Attributes) {
+ : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
assert(Attributes <= 0xFF &&
"Probe attributes too big to encode, exceeding 2^16");
}
MCSymbol *getLabel() const { return Label; }
+ void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+};
- uint64_t getGuid() const { return Guid; }
+// Represents a callsite with caller function name and probe id
+using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
- uint64_t getIndex() const { return Index; }
+class MCDecodedPseudoProbe : public MCPseudoProbeBase {
+ uint64_t Address;
+ MCDecodedPseudoProbeInlineTree *InlineTree;
- uint8_t getType() const { return Type; }
+public:
+ MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
+ uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
+ : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
+ InlineTree(Tree){};
- uint8_t getAttributes() const { return Attributes; }
+ uint64_t getAddress() const { return Address; }
- void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+ void setAddress(uint64_t Addr) { Address = Addr; }
+
+ MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
+ return InlineTree;
+ }
+
+ // Get the inlined context by traversing current inline tree backwards,
+ // each tree node has its InlineSite which is taken as the context.
+ // \p ContextStack is populated in root to leaf order
+ void
+ getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
+ const GUIDProbeFunctionMap &GUID2FuncMAP) const;
+
+ // Helper function to get the string from context stack
+ std::string
+ getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
+
+ // Print pseudo probe while disassembling
+ void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
+ bool ShowName) const;
};
-// An inline frame has the form <Guid, ProbeID>
-using InlineSite = std::tuple<uint64_t, uint32_t>;
-using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+template <typename ProbeType, typename DerivedProbeInlineTreeType>
+class MCPseudoProbeInlineTreeBase {
+ struct InlineSiteHash {
+ uint64_t operator()(const InlineSite &Site) const {
+ return std::get<0>(Site) ^ std::get<1>(Site);
+ }
+ };
-// A Tri-tree based data structure to group probes by inline stack.
-// A tree is allocated for a standalone .text section. A fake
-// instance is created as the root of a tree.
-// A real instance of this class is created for each function, either an
-// unlined function that has code in .text section or an inlined function.
-class MCPseudoProbeInlineTree {
- uint64_t Guid;
+protected:
+ // Track children (e.g. inlinees) of current context
+ using InlinedProbeTreeMap = std::unordered_map<
+ InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
+ InlinedProbeTreeMap Children;
// Set of probes that come with the function.
- std::vector<MCPseudoProbe> Probes;
- // Use std::map for a deterministic output.
- std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+ std::vector<ProbeType> Probes;
+ MCPseudoProbeInlineTreeBase() {
+ static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
+ DerivedProbeInlineTreeType>::value,
+ "DerivedProbeInlineTreeType must be subclass of "
+ "MCPseudoProbeInlineTreeBase");
+ }
+
+public:
+ uint64_t Guid = 0;
// Root node has a GUID 0.
- bool isRoot() { return Guid == 0; }
- MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site);
+ bool isRoot() const { return Guid == 0; }
+ InlinedProbeTreeMap &getChildren() { return Children; }
+ const InlinedProbeTreeMap &getChildren() const { return Children; }
+ std::vector<ProbeType> &getProbes() { return Probes; }
+ void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
+ // Caller node of the inline site
+ MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
+ DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
+ auto Ret = Children.emplace(
+ Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
+ Ret.first->second->Parent = this;
+ return Ret.first->second.get();
+ };
+};
+// A Tri-tree based data structure to group probes by inline stack.
+// A tree is allocated for a standalone .text section. A fake
+// instance is created as the root of a tree.
+// A real instance of this class is created for each function, either a
+// not inlined function that has code in .text section or an inlined function.
+class MCPseudoProbeInlineTree
+ : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
+ MCPseudoProbeInlineTree> {
public:
MCPseudoProbeInlineTree() = default;
- MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {}
- ~MCPseudoProbeInlineTree();
+ MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
+ MCPseudoProbeInlineTree(const InlineSite &Site) {
+ this->Guid = std::get<0>(Site);
+ }
+
+ // MCPseudoProbeInlineTree method based on Inlinees
void addPseudoProbe(const MCPseudoProbe &Probe,
const MCPseudoProbeInlineStack &InlineStack);
void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
};
+// inline tree node for the decoded pseudo probe
+class MCDecodedPseudoProbeInlineTree
+ : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
+ MCDecodedPseudoProbeInlineTree> {
+public:
+ InlineSite ISite;
+ // Used for decoding
+ uint32_t ChildrenToProcess = 0;
+
+ MCDecodedPseudoProbeInlineTree(){};
+ MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
+
+ // Return false if it's a dummy inline site
+ bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
+};
+
/// Instances of this class represent the pseudo probes inserted into a compile
/// unit.
class MCPseudoProbeSection {
@@ -172,6 +323,83 @@ public:
static int DdgPrintIndent;
#endif
};
+
+class MCPseudoProbeDecoder {
+ // GUID to PseudoProbeFuncDesc map.
+ GUIDProbeFunctionMap GUID2FuncDescMap;
+
+ // Address to probes map.
+ AddressProbesMap Address2ProbesMap;
+
+ // The dummy root of the inline trie, all the outlined function will directly
+ // be the children of the dummy root, all the inlined function will be the
+ // children of its inlineer. So the relation would be like:
+ // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
+ MCDecodedPseudoProbeInlineTree DummyInlineRoot;
+
+ /// Points to the current location in the buffer.
+ const uint8_t *Data = nullptr;
+
+ /// Points to the end of the buffer.
+ const uint8_t *End = nullptr;
+
+ // Decoding helper function
+ template <typename T> ErrorOr<T> readUnencodedNumber();
+ template <typename T> ErrorOr<T> readUnsignedNumber();
+ template <typename T> ErrorOr<T> readSignedNumber();
+ ErrorOr<StringRef> readString(uint32_t Size);
+
+public:
+ // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
+ bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
+
+ // Decode pseudo_probe section to build address to probes map.
+ bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
+
+ // Print pseudo_probe_desc section info
+ void printGUID2FuncDescMap(raw_ostream &OS);
+
+ // Print pseudo_probe section info, used along with show-disassembly
+ void printProbeForAddress(raw_ostream &OS, uint64_t Address);
+
+ // do printProbeForAddress for all addresses
+ void printProbesForAllAddresses(raw_ostream &OS);
+
+ // Look up the probe of a call for the input address
+ const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
+
+ const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
+
+ // Helper function to populate one probe's inline stack into
+ // \p InlineContextStack.
+ // Current leaf location info will be added if IncludeLeaf is true
+ // Example:
+ // Current probe(bar:3) inlined at foo:2 then inlined at main:1
+ // IncludeLeaf = true, Output: [main:1, foo:2, bar:3]
+ // IncludeLeaf = false, Output: [main:1, foo:2]
+ void getInlineContextForProbe(
+ const MCDecodedPseudoProbe *Probe,
+ SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
+ bool IncludeLeaf) const;
+
+ const AddressProbesMap &getAddress2ProbesMap() const {
+ return Address2ProbesMap;
+ }
+
+ AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
+
+ const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
+ return GUID2FuncDescMap;
+ }
+
+ const MCPseudoProbeFuncDesc *
+ getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
+
+ const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
+ return DummyInlineRoot;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_MC_MCPSEUDOPROBE_H
diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h
index 72507b7d8ee4..1e8c747785eb 100644
--- a/llvm/include/llvm/MC/MCRegister.h
+++ b/llvm/include/llvm/MC/MCRegister.h
@@ -10,6 +10,7 @@
#define LLVM_MC_MCREGISTER_H
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
#include <cassert>
#include <limits>
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index acfbfd387ff3..6dffc158af50 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -14,7 +14,6 @@
#ifndef LLVM_MC_MCSCHEDULE_H
#define LLVM_MC_MCSCHEDULE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index fd326ff18712..e00f50f617fa 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -123,6 +123,8 @@ public:
/// This is used to emit bytes in \p Data as sequence of .byte directives.
virtual void emitRawBytes(StringRef Data);
+ virtual void emitConstantPools();
+
virtual void finish();
};
@@ -165,7 +167,7 @@ public:
virtual void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value);
- void finish() override;
+ void emitConstantPools() override;
/// Reset any state between object emissions, i.e. the equivalent of
/// MCStreamer's reset method.
@@ -445,7 +447,7 @@ public:
}
/// Create the default sections and set the initial one.
- virtual void InitSections(bool NoExecStack);
+ virtual void initSections(bool NoExecStack, const MCSubtargetInfo &STI);
MCSymbol *endSection(MCSection *Section);
@@ -797,7 +799,7 @@ public:
SMLoc Loc = SMLoc());
virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
- SMLoc Loc);
+ SMLoc Loc, const MCSubtargetInfo& STI);
/// Emit NumBytes worth of zeros.
/// This function properly handles data in virtual sections.
@@ -831,10 +833,12 @@ public:
///
/// \param ByteAlignment - The alignment to reach. This must be a power of
/// two on some targets.
+ /// \param STI - The MCSubtargetInfo in operation when padding is emitted.
/// \param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If
/// the alignment cannot be reached in this many bytes, no bytes are
/// emitted.
virtual void emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0);
/// Emit some number of copies of \p Value until the byte offset \p
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 852ab678e616..5a4852e0e895 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -27,7 +27,6 @@ class MCSymbolWasm : public MCSymbol {
wasm::WasmSignature *Signature = nullptr;
Optional<wasm::WasmGlobalType> GlobalType;
Optional<wasm::WasmTableType> TableType;
- Optional<wasm::WasmTagType> TagType;
/// An expression describing how to calculate the size of a symbol. If a
/// symbol has no size this field will be NULL.
@@ -67,6 +66,11 @@ public:
modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP);
}
+ bool isTLS() const { return getFlags() & wasm::WASM_SYMBOL_TLS; }
+ void setTLS() const {
+ modifyFlags(wasm::WASM_SYMBOL_TLS, wasm::WASM_SYMBOL_TLS);
+ }
+
bool isWeak() const { return IsWeak; }
void setWeak(bool isWeak) { IsWeak = isWeak; }
@@ -142,12 +146,6 @@ public:
wasm::WasmLimits Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
setTableType({uint8_t(VT), Limits});
}
-
- const wasm::WasmTagType &getTagType() const {
- assert(TagType.hasValue());
- return TagType.getValue();
- }
- void setTagType(wasm::WasmTagType ET) { TagType = ET; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h
index 6651f071f799..818f59e5ab3e 100644
--- a/llvm/include/llvm/MC/MCWasmStreamer.h
+++ b/llvm/include/llvm/MC/MCWasmStreamer.h
@@ -41,6 +41,9 @@ public:
/// @{
void changeSection(MCSection *Section, const MCExpr *Subsection) override;
+ void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+ void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) override;
void emitAssemblerFlag(MCAssemblerFlag Flag) override;
void emitThumbFunc(MCSymbol *Func) override;
void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
@@ -68,6 +71,8 @@ private:
void emitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &) override;
void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
+ void fixSymbolsInTLSFixups(const MCExpr *expr);
+
/// Merge the content of the fragment \p EF into the fragment \p DF.
void mergeFragment(MCDataFragment *, MCDataFragment *);
diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 53b2ef0bd96e..af1ed6faf753 100644
--- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -39,7 +39,7 @@ public:
/// \name MCStreamer interface
/// \{
- void InitSections(bool NoExecStack) override;
+ void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override;
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void emitAssemblerFlag(MCAssemblerFlag Flag) override;
void emitThumbFunc(MCSymbol *Func) override;
diff --git a/llvm/include/llvm/Support/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index e661ae26cb4e..da9a9269edbf 100644
--- a/llvm/include/llvm/Support/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -1,4 +1,4 @@
-//===- Support/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
+//===- MC/TargetRegistry.h - Target Registration ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,8 +15,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_TARGETREGISTRY_H
-#define LLVM_SUPPORT_TARGETREGISTRY_H
+#ifndef LLVM_MC_TARGETREGISTRY_H
+#define LLVM_MC_TARGETREGISTRY_H
#include "llvm-c/DisassemblerTypes.h"
#include "llvm/ADT/Optional.h"
@@ -59,6 +59,11 @@ class raw_ostream;
class raw_pwrite_stream;
class TargetMachine;
class TargetOptions;
+namespace mca {
+class CustomBehaviour;
+class InstrPostProcess;
+class SourceMgr;
+} // namespace mca
MCStreamer *createNullStreamer(MCContext &Ctx);
// Takes ownership of \p TAB and \p CE.
@@ -114,6 +119,13 @@ MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo);
+mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII);
+
+mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII);
+
/// Target - Wrapper for Target specific information.
///
/// For registration purposes, this is a POD type so that targets can be
@@ -206,6 +218,15 @@ public:
LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo);
+ using CustomBehaviourCtorTy =
+ mca::CustomBehaviour *(*)(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII);
+
+ using InstrPostProcessCtorTy =
+ mca::InstrPostProcess *(*)(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII);
+
private:
/// Next - The next registered target in the linked list, maintained by the
/// TargetRegistry.
@@ -305,6 +326,14 @@ private:
/// MCSymbolizer, if registered (default = llvm::createMCSymbolizer)
MCSymbolizerCtorTy MCSymbolizerCtorFn = nullptr;
+ /// CustomBehaviourCtorFn - Construction function for this target's
+ /// CustomBehaviour, if registered (default = nullptr).
+ CustomBehaviourCtorTy CustomBehaviourCtorFn = nullptr;
+
+ /// InstrPostProcessCtorFn - Construction function for this target's
+ /// InstrPostProcess, if registered (default = nullptr).
+ InstrPostProcessCtorTy InstrPostProcessCtorFn = nullptr;
+
public:
Target() = default;
@@ -623,6 +652,25 @@ public:
std::move(RelInfo));
}
+ /// createCustomBehaviour - Create a target specific CustomBehaviour.
+ /// This class is used by llvm-mca and requires backend functionality.
+ mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII) const {
+ if (CustomBehaviourCtorFn)
+ return CustomBehaviourCtorFn(STI, SrcMgr, MCII);
+ return nullptr;
+ }
+
+ /// createInstrPostProcess - Create a target specific InstrPostProcess.
+ /// This class is used by llvm-mca and requires backend functionality.
+ mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII) const {
+ if (InstrPostProcessCtorFn)
+ return InstrPostProcessCtorFn(STI, MCII);
+ return nullptr;
+ }
+
/// @}
};
@@ -959,6 +1007,34 @@ struct TargetRegistry {
T.MCSymbolizerCtorFn = Fn;
}
+ /// RegisterCustomBehaviour - Register a CustomBehaviour
+ /// implementation for the given target.
+ ///
+ /// Clients are responsible for ensuring that registration doesn't occur
+ /// while another thread is attempting to access the registry. Typically
+ /// this is done by initializing all targets at program startup.
+ ///
+ /// @param T - The target being registered.
+ /// @param Fn - A function to construct a CustomBehaviour for the target.
+ static void RegisterCustomBehaviour(Target &T,
+ Target::CustomBehaviourCtorTy Fn) {
+ T.CustomBehaviourCtorFn = Fn;
+ }
+
+ /// RegisterInstrPostProcess - Register an InstrPostProcess
+ /// implementation for the given target.
+ ///
+ /// Clients are responsible for ensuring that registration doesn't occur
+ /// while another thread is attempting to access the registry. Typically
+ /// this is done by initializing all targets at program startup.
+ ///
+ /// @param T - The target being registered.
+ /// @param Fn - A function to construct an InstrPostProcess for the target.
+ static void RegisterInstrPostProcess(Target &T,
+ Target::InstrPostProcessCtorTy Fn) {
+ T.InstrPostProcessCtorFn = Fn;
+ }
+
/// @}
};
@@ -1294,4 +1370,4 @@ private:
} // end namespace llvm
-#endif // LLVM_SUPPORT_TARGETREGISTRY_H
+#endif // LLVM_MC_TARGETREGISTRY_H
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index 655a9c49c599..395b07cf722b 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/View.h"
namespace llvm {
namespace mca {
@@ -55,29 +56,53 @@ public:
class CustomBehaviour {
protected:
const MCSubtargetInfo &STI;
- const SourceMgr &SrcMgr;
+ const mca::SourceMgr &SrcMgr;
const MCInstrInfo &MCII;
public:
- CustomBehaviour(const MCSubtargetInfo &STI, const SourceMgr &SrcMgr,
+ CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr,
const MCInstrInfo &MCII)
: STI(STI), SrcMgr(SrcMgr), MCII(MCII) {}
virtual ~CustomBehaviour();
- // Before the llvm-mca pipeline dispatches an instruction, it first checks
- // for any register or resource dependencies / hazards. If it doesn't find
- // any, this method will be invoked to determine if there are any custom
- // hazards that the instruction needs to wait for.
- // The return value of this method is the number of cycles that the
- // instruction needs to wait for.
- // It's safe to underestimate the number of cycles to wait for since these
- // checks will be invoked again before the intruction gets dispatched.
- // However, it's not safe (accurate) to overestimate the number of cycles
- // to wait for since the instruction will wait for AT LEAST that number of
- // cycles before attempting to be dispatched again.
+ /// Before the llvm-mca pipeline dispatches an instruction, it first checks
+ /// for any register or resource dependencies / hazards. If it doesn't find
+ /// any, this method will be invoked to determine if there are any custom
+ /// hazards that the instruction needs to wait for.
+ /// The return value of this method is the number of cycles that the
+ /// instruction needs to wait for.
+ /// It's safe to underestimate the number of cycles to wait for since these
+ /// checks will be invoked again before the intruction gets dispatched.
+ /// However, it's not safe (accurate) to overestimate the number of cycles
+ /// to wait for since the instruction will wait for AT LEAST that number of
+ /// cycles before attempting to be dispatched again.
virtual unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
const InstRef &IR);
+
+ // Functions that target CBs can override to return a list of
+ // target specific Views that need to live within /lib/Target/ so that
+ // they can benefit from the target CB or from backend functionality that is
+ // not already exposed through MC-layer classes. Keep in mind that how this
+ // function is used is that the function is called within llvm-mca.cpp and
+ // then each unique_ptr<View> is passed into the PipelinePrinter::addView()
+ // function. This function will then std::move the View into its own vector of
+ // Views. So any CB that overrides this function needs to make sure that they
+ // are not relying on the current address or reference of the View
+ // unique_ptrs. If you do need the CB and View to be able to communicate with
+ // each other, consider giving the View a reference or pointer to the CB when
+ // the View is constructed. Then the View can query the CB for information
+ // when it needs it.
+ /// Return a vector of Views that will be added before all other Views.
+ virtual std::vector<std::unique_ptr<View>>
+ getStartViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts);
+ /// Return a vector of Views that will be added after the InstructionInfoView.
+ virtual std::vector<std::unique_ptr<View>>
+ getPostInstrInfoViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts);
+ /// Return a vector of Views that will be added after all other Views.
+ virtual std::vector<std::unique_ptr<View>>
+ getEndViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts);
};
} // namespace mca
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 988cddcbe013..3eb32186d551 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -46,7 +46,7 @@ class MCAOperand {
kSFPImmediate, ///< Single-floating-point immediate operand.
kDFPImmediate, ///< Double-Floating-point immediate operand.
};
- MCAOperandType Kind = kInvalid;
+ MCAOperandType Kind;
union {
unsigned RegVal;
@@ -62,7 +62,7 @@ class MCAOperand {
unsigned Index;
public:
- MCAOperand() : FPImmVal(0) {}
+ MCAOperand() : Kind(kInvalid), FPImmVal(), Index() {}
bool isValid() const { return Kind != kInvalid; }
bool isReg() const { return Kind == kRegister; }
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
index b7006e761647..42f386a13d85 100644
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -21,6 +21,7 @@
namespace llvm {
namespace mca {
+class LSUnit;
class RegisterFile;
struct StallInfo {
@@ -29,6 +30,7 @@ struct StallInfo {
REGISTER_DEPS,
DISPATCH,
DELAY,
+ LOAD_STORE,
CUSTOM_STALL
};
@@ -54,6 +56,7 @@ class InOrderIssueStage final : public Stage {
RegisterFile &PRF;
ResourceManager RM;
CustomBehaviour &CB;
+ LSUnit &LSU;
/// Instructions that were issued, but not executed yet.
SmallVector<InstRef, 4> IssuedInst;
@@ -110,7 +113,7 @@ class InOrderIssueStage final : public Stage {
public:
InOrderIssueStage(const MCSubtargetInfo &STI, RegisterFile &PRF,
- CustomBehaviour &CB);
+ CustomBehaviour &CB, LSUnit &LSU);
unsigned getIssueWidth() const;
bool isAvailable(const InstRef &) const override;
diff --git a/llvm/tools/llvm-mca/Views/View.h b/llvm/include/llvm/MCA/View.h
index c604733d4ec9..ff8fc1ceb3f1 100644
--- a/llvm/tools/llvm-mca/Views/View.h
+++ b/llvm/include/llvm/MCA/View.h
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H
-#define LLVM_TOOLS_LLVM_MCA_VIEW_H
+#ifndef LLVM_MCA_VIEW_H
+#define LLVM_MCA_VIEW_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MCA/HWEventListener.h"
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index c5f966891bd0..37f23c435ae1 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -81,10 +81,6 @@ getElfArchType(StringRef Object) {
(uint8_t)Object[ELF::EI_DATA]);
}
-static inline Error createError(const Twine &Err) {
- return make_error<StringError>(Err, object_error::parse_failed);
-}
-
enum PPCInstrMasks : uint64_t {
PADDI_R12_NO_DISP = 0x0610000039800000,
ADDIS_R12_TO_R2_NO_DISP = 0x3D820000,
@@ -392,8 +388,7 @@ public:
Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const;
Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const;
Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const;
- Expected<std::vector<Elf_BBAddrMap>>
- decodeBBAddrMap(const Elf_Shdr &Sec) const;
+ Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const;
};
using ELF32LEFile = ELFFile<ELF32LE>;
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index c87a09f86fae..716b94d92d03 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -96,6 +96,10 @@ public:
std::vector<std::pair<Optional<DataRefImpl>, uint64_t>>
getPltAddresses() const;
+
+ /// Returns a vector containing a symbol version for each dynamic symbol.
+ /// Returns an empty vector if version sections do not exist.
+ Expected<std::vector<VersionEntry>> readDynsymVersions() const;
};
class ELFSectionRef : public SectionRef {
@@ -407,7 +411,8 @@ public:
const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
auto RelSecOrErr = EF.getSection(Rel.d.a);
if (!RelSecOrErr)
- report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message());
+ report_fatal_error(
+ Twine(errorToErrorCode(RelSecOrErr.takeError()).message()));
return *RelSecOrErr;
}
@@ -728,7 +733,8 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
} else if (EF.getHeader().e_machine == ELF::EM_ARM) {
if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) {
StringRef Name = *NameOrErr;
- if (Name.startswith("$d") || Name.startswith("$t") ||
+ // TODO Investigate why empty name symbols need to be marked.
+ if (Name.empty() || Name.startswith("$d") || Name.startswith("$t") ||
Name.startswith("$a"))
Result |= SymbolRef::SF_FormatSpecific;
} else {
@@ -966,7 +972,8 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
// Error check sh_link here so that getRelocationSymbol can just use it.
auto SymSecOrErr = EF.getSection(RelSec->sh_link);
if (!SymSecOrErr)
- report_fatal_error(errorToErrorCode(SymSecOrErr.takeError()).message());
+ report_fatal_error(
+ Twine(errorToErrorCode(SymSecOrErr.takeError()).message()));
RelData.d.b += S->sh_size / S->sh_entsize;
return relocation_iterator(RelocationRef(RelData, this));
@@ -1055,7 +1062,7 @@ ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
assert(getRelSection(Rel)->sh_type == ELF::SHT_REL);
auto Ret = EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+ report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
return *Ret;
}
@@ -1065,7 +1072,7 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
assert(getRelSection(Rela)->sh_type == ELF::SHT_RELA);
auto Ret = EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+ report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
return *Ret;
}
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 54ebd751d8d2..e59a63d93989 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -44,7 +44,6 @@ template <class ELFT> struct Elf_Nhdr_Impl;
template <class ELFT> class Elf_Note_Impl;
template <class ELFT> class Elf_Note_Iterator_Impl;
template <class ELFT> struct Elf_CGProfile_Impl;
-template <class ELFT> struct Elf_BBAddrMap_Impl;
template <endianness E, bool Is64> struct ELFType {
private:
@@ -76,7 +75,6 @@ public:
using Note = Elf_Note_Impl<ELFType<E, Is64>>;
using NoteIterator = Elf_Note_Iterator_Impl<ELFType<E, Is64>>;
using CGProfile = Elf_CGProfile_Impl<ELFType<E, Is64>>;
- using BBAddrMap = Elf_BBAddrMap_Impl<ELFType<E, Is64>>;
using DynRange = ArrayRef<Dyn>;
using ShdrRange = ArrayRef<Shdr>;
using SymRange = ArrayRef<Sym>;
@@ -131,7 +129,6 @@ using ELF64BE = ELFType<support::big, true>;
using Elf_Note = typename ELFT::Note; \
using Elf_Note_Iterator = typename ELFT::NoteIterator; \
using Elf_CGProfile = typename ELFT::CGProfile; \
- using Elf_BBAddrMap = typename ELFT::BBAddrMap; \
using Elf_Dyn_Range = typename ELFT::DynRange; \
using Elf_Shdr_Range = typename ELFT::ShdrRange; \
using Elf_Sym_Range = typename ELFT::SymRange; \
@@ -797,9 +794,8 @@ template <class ELFT> struct Elf_Mips_ABIFlags {
};
// Struct representing the BBAddrMap for one function.
-template <class ELFT> struct Elf_BBAddrMap_Impl {
- LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- uintX_t Addr; // Function address
+struct BBAddrMap {
+ uint64_t Addr; // Function address
// Struct representing the BBAddrMap information for one basic block.
struct BBEntry {
uint32_t Offset; // Offset of basic block relative to function start.
diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h
index 07744188444a..1fc1f6603a36 100644
--- a/llvm/include/llvm/Object/Error.h
+++ b/llvm/include/llvm/Object/Error.h
@@ -82,6 +82,10 @@ private:
/// error() function needs to called on the llvm::Error.
Error isNotObjectErrorInvalidFileType(llvm::Error Err);
+inline Error createError(const Twine &Err) {
+ return make_error<StringError>(Err, object_error::parse_failed);
+}
+
} // end namespace object.
} // end namespace llvm.
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index d2ad12e98deb..ca5d63e4074f 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -311,6 +311,9 @@ public:
bool isSectionBitcode(DataRefImpl Sec) const override;
bool isDebugSection(DataRefImpl Sec) const override;
+ /// Return the raw contents of an entire segment.
+ ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
+
/// When dsymutil generates the companion file, it strips all unnecessary
/// sections (e.g. everything in the _TEXT segment) by omitting their body
/// and setting the offset in their corresponding load command to zero.
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 2cea950fcf25..e4802c087b8b 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -9,7 +9,7 @@
// This file declares the WasmObjectFile class, which implements the ObjectFile
// interface for Wasm files.
//
-// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
//
//===----------------------------------------------------------------------===//
@@ -37,15 +37,13 @@ public:
WasmSymbol(const wasm::WasmSymbolInfo &Info,
const wasm::WasmGlobalType *GlobalType,
const wasm::WasmTableType *TableType,
- const wasm::WasmTagType *TagType,
const wasm::WasmSignature *Signature)
: Info(Info), GlobalType(GlobalType), TableType(TableType),
- TagType(TagType), Signature(Signature) {}
+ Signature(Signature) {}
const wasm::WasmSymbolInfo &Info;
const wasm::WasmGlobalType *GlobalType;
const wasm::WasmTableType *TableType;
- const wasm::WasmTagType *TagType;
const wasm::WasmSignature *Signature;
bool isTypeFunction() const {
@@ -138,7 +136,6 @@ public:
return TargetFeatures;
}
ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
- ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
ArrayRef<wasm::WasmImport> imports() const { return Imports; }
ArrayRef<wasm::WasmTable> tables() const { return Tables; }
ArrayRef<wasm::WasmLimits> memories() const { return Memories; }
@@ -260,6 +257,7 @@ private:
// Custom section types
Error parseDylinkSection(ReadContext &Ctx);
+ Error parseDylink0Section(ReadContext &Ctx);
Error parseNameSection(ReadContext &Ctx);
Error parseLinkingSection(ReadContext &Ctx);
Error parseLinkingSectionSymtab(ReadContext &Ctx);
@@ -274,7 +272,6 @@ private:
wasm::WasmProducerInfo ProducerInfo;
std::vector<wasm::WasmFeatureEntry> TargetFeatures;
std::vector<wasm::WasmSignature> Signatures;
- std::vector<uint32_t> FunctionTypes;
std::vector<wasm::WasmTable> Tables;
std::vector<wasm::WasmLimits> Memories;
std::vector<wasm::WasmGlobal> Globals;
diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h
index 7d024fbc3eae..94136afc45ea 100644
--- a/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -51,6 +51,101 @@ struct XCOFFFileHeader64 {
support::ubig32_t NumberOfSymTableEntries;
};
+template <typename T> struct XCOFFAuxiliaryHeader {
+ static constexpr uint8_t AuxiHeaderFlagMask = 0xF0;
+ static constexpr uint8_t AuxiHeaderTDataAlignmentMask = 0x0F;
+
+public:
+ uint8_t getFlag() const {
+ return static_cast<const T *>(this)->FlagAndTDataAlignment &
+ AuxiHeaderFlagMask;
+ }
+ uint8_t getTDataAlignment() const {
+ return static_cast<const T *>(this)->FlagAndTDataAlignment &
+ AuxiHeaderTDataAlignmentMask;
+ }
+};
+
+struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+ support::ubig16_t
+ AuxMagic; ///< If the value of the o_vstamp field is greater than 1, the
+ ///< o_mflags field is reserved for future use and it should
+ ///< contain 0. Otherwise, this field is not used.
+ support::ubig16_t
+ Version; ///< The valid values are 1 and 2. When the o_vstamp field is 2
+ ///< in an XCOFF32 file, the new interpretation of the n_type
+ ///< field in the symbol table entry is used.
+ support::ubig32_t TextSize;
+ support::ubig32_t InitDataSize;
+ support::ubig32_t BssDataSize;
+ support::ubig32_t EntryPointAddr;
+ support::ubig32_t TextStartAddr;
+ support::ubig32_t DataStartAddr;
+ support::ubig32_t TOCAnchorAddr;
+ support::ubig16_t SecNumOfEntryPoint;
+ support::ubig16_t SecNumOfText;
+ support::ubig16_t SecNumOfData;
+ support::ubig16_t SecNumOfTOC;
+ support::ubig16_t SecNumOfLoader;
+ support::ubig16_t SecNumOfBSS;
+ support::ubig16_t MaxAlignOfText;
+ support::ubig16_t MaxAlignOfData;
+ support::ubig16_t ModuleType;
+ uint8_t CpuFlag;
+ uint8_t CpuType;
+ support::ubig32_t MaxStackSize; ///< If the value is 0, the system default
+ ///< maximum stack size is used.
+ support::ubig32_t MaxDataSize; ///< If the value is 0, the system default
+ ///< maximum data size is used.
+ support::ubig32_t
+ ReservedForDebugger; ///< This field should contain 0. When a loaded
+ ///< program is being debugged, the memory image of
+ ///< this field may be modified by a debugger to
+ ///< insert a trap instruction.
+ uint8_t TextPageSize; ///< Specifies the size of pages for the exec text. The
+ ///< default value is 0 (system-selected page size).
+ uint8_t DataPageSize; ///< Specifies the size of pages for the exec data. The
+ ///< default value is 0 (system-selected page size).
+ uint8_t StackPageSize; ///< Specifies the size of pages for the stack. The
+ ///< default value is 0 (system-selected page size).
+ uint8_t FlagAndTDataAlignment;
+ support::ubig16_t SecNumOfTData;
+ support::ubig16_t SecNumOfTBSS;
+};
+
+struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+ support::ubig16_t AuxMagic;
+ support::ubig16_t Version;
+ support::ubig32_t ReservedForDebugger;
+ support::ubig64_t TextStartAddr;
+ support::ubig64_t DataStartAddr;
+ support::ubig64_t TOCAnchorAddr;
+ support::ubig16_t SecNumOfEntryPoint;
+ support::ubig16_t SecNumOfText;
+ support::ubig16_t SecNumOfData;
+ support::ubig16_t SecNumOfTOC;
+ support::ubig16_t SecNumOfLoader;
+ support::ubig16_t SecNumOfBSS;
+ support::ubig16_t MaxAlignOfText;
+ support::ubig16_t MaxAlignOfData;
+ support::ubig16_t ModuleType;
+ uint8_t CpuFlag;
+ uint8_t CpuType;
+ uint8_t TextPageSize;
+ uint8_t DataPageSize;
+ uint8_t StackPageSize;
+ uint8_t FlagAndTDataAlignment;
+ support::ubig64_t TextSize;
+ support::ubig64_t InitDataSize;
+ support::ubig64_t BssDataSize;
+ support::ubig64_t EntryPointAddr;
+ support::ubig64_t MaxStackSize;
+ support::ubig64_t MaxDataSize;
+ support::ubig16_t SecNumOfTData;
+ support::ubig16_t SecNumOfTBSS;
+ support::ubig16_t XCOFF64Flag;
+};
+
template <typename T> struct XCOFFSectionHeader {
// Least significant 3 bits are reserved.
static constexpr unsigned SectionFlagsReservedMask = 0x7;
@@ -97,6 +192,31 @@ struct XCOFFSectionHeader64 : XCOFFSectionHeader<XCOFFSectionHeader64> {
char Padding[4];
};
+struct LoaderSectionHeader32 {
+ support::ubig32_t Version;
+ support::ubig32_t NumberOfSymTabEnt;
+ support::ubig32_t NumberOfRelTabEnt;
+ support::ubig32_t LengthOfImpidStrTbl;
+ support::ubig32_t NumberOfImpid;
+ support::big32_t OffsetToImpid;
+ support::ubig32_t LengthOfStrTbl;
+ support::big32_t OffsetToStrTbl;
+};
+
+struct LoaderSectionHeader64 {
+ support::ubig32_t Version;
+ support::ubig32_t NumberOfSymTabEnt;
+ support::ubig32_t NumberOfRelTabEnt;
+ support::ubig32_t LengthOfImpidStrTbl;
+ support::ubig32_t NumberOfImpid;
+ support::ubig32_t LengthOfStrTbl;
+ support::big64_t OffsetToImpid;
+ support::big64_t OffsetToStrTbl;
+ support::big64_t OffsetToSymTbl;
+ char Padding[16];
+ support::big32_t OffsetToRelEnt;
+};
+
struct XCOFFStringTable {
uint32_t Size;
const char *Data;
@@ -228,7 +348,7 @@ struct XCOFFSectAuxEntForStat {
uint8_t Pad[10];
}; // 32-bit XCOFF file only.
-struct XCOFFRelocation32 {
+template <typename AddressType> struct XCOFFRelocation {
// Masks for packing/unpacking the r_rsize field of relocations.
// The msb is used to indicate if the bits being relocated are signed or
@@ -244,7 +364,7 @@ struct XCOFFRelocation32 {
static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f;
public:
- support::ubig32_t VirtualAddress;
+ AddressType VirtualAddress;
support::ubig32_t SymbolIndex;
// Packed field, see XR_* masks for details of packing.
@@ -260,11 +380,18 @@ public:
uint8_t getRelocatedLength() const;
};
+extern template struct XCOFFRelocation<llvm::support::ubig32_t>;
+extern template struct XCOFFRelocation<llvm::support::ubig64_t>;
+
+struct XCOFFRelocation32 : XCOFFRelocation<llvm::support::ubig32_t> {};
+struct XCOFFRelocation64 : XCOFFRelocation<llvm::support::ubig64_t> {};
+
class XCOFFSymbolRef;
class XCOFFObjectFile : public ObjectFile {
private:
const void *FileHeader = nullptr;
+ const void *AuxiliaryHeader = nullptr;
const void *SectionHeaderTable = nullptr;
const void *SymbolTblPtr = nullptr;
@@ -275,6 +402,7 @@ private:
const XCOFFSectionHeader32 *sectionHeaderTable32() const;
const XCOFFSectionHeader64 *sectionHeaderTable64() const;
+ template <typename T> const T *sectionHeaderTable() const;
size_t getFileHeaderSize() const;
size_t getSectionHeaderSize() const;
@@ -283,6 +411,7 @@ private:
const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const;
uintptr_t getSectionHeaderTableAddress() const;
uintptr_t getEndOfSymbolTableAddress() const;
+ Expected<uintptr_t> getLoaderSectionAddress() const;
// This returns a pointer to the start of the storage for the name field of
// the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily
@@ -322,6 +451,7 @@ public:
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+ uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
@@ -368,6 +498,9 @@ public:
// Below here is the non-inherited interface.
bool is64Bit() const;
+ const XCOFFAuxiliaryHeader32 *auxiliaryHeader32() const;
+ const XCOFFAuxiliaryHeader64 *auxiliaryHeader64() const;
+
const void *getPointerToSymbolTable() const { return SymbolTblPtr; }
Expected<StringRef> getSymbolSectionName(XCOFFSymbolRef Ref) const;
@@ -398,6 +531,11 @@ public:
uint32_t getNumberOfSymbolTableEntries() const;
uint32_t getSymbolIndex(uintptr_t SymEntPtr) const;
+ uint64_t getSymbolSize(DataRefImpl Symb) const;
+ uintptr_t getSymbolByIndex(uint32_t Idx) const {
+ return reinterpret_cast<uintptr_t>(SymbolTblPtr) +
+ XCOFF::SymbolTableEntrySize * Idx;
+ }
uintptr_t getSymbolEntryAddressByIndex(uint32_t SymbolTableIndex) const;
Expected<StringRef> getSymbolNameByIndex(uint32_t SymbolTableIndex) const;
@@ -415,11 +553,15 @@ public:
void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const;
// Relocation-related interfaces.
+ template <typename T>
Expected<uint32_t>
- getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const;
+ getNumberOfRelocationEntries(const XCOFFSectionHeader<T> &Sec) const;
- Expected<ArrayRef<XCOFFRelocation32>>
- relocations(const XCOFFSectionHeader32 &) const;
+ template <typename Shdr, typename Reloc>
+ Expected<ArrayRef<Reloc>> relocations(const Shdr &Sec) const;
+
+ // Loader section related interfaces.
+ Expected<StringRef> getImportFileTable() const;
// This function returns string table entry.
Expected<StringRef> getStringTableEntry(uint32_t Offset) const;
@@ -572,6 +714,7 @@ class XCOFFTracebackTable {
Optional<uint8_t> ExtensionTable;
XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err);
+
public:
/// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes.
/// Returns an XCOFFTracebackTable upon successful parsing, otherwise an
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index 5d1d3ee23594..ee89f4eac61f 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -131,6 +131,7 @@ struct Object {
std::vector<LoadCommand> LoadCommands;
std::vector<Section> Sections;
LinkEditData LinkEdit;
+ Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
DWARFYAML::Data DWARF;
};
diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 661e06fba8bd..e3a1ba0d58a6 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -77,12 +77,6 @@ struct Global {
wasm::WasmInitExpr InitExpr;
};
-struct Tag {
- uint32_t Index;
- uint32_t Attribute;
- uint32_t SigIndex;
-};
-
struct Import {
StringRef Module;
StringRef Field;
@@ -92,7 +86,7 @@ struct Import {
Global GlobalImport;
Table TableImport;
Limits Memory;
- Tag TagImport;
+ uint32_t TagIndex;
};
};
@@ -199,12 +193,23 @@ struct CustomSection : Section {
yaml::BinaryRef Payload;
};
+struct DylinkImportInfo {
+ StringRef Module;
+ StringRef Field;
+ SymbolFlags Flags;
+};
+
+struct DylinkExportInfo {
+ StringRef Name;
+ SymbolFlags Flags;
+};
+
struct DylinkSection : CustomSection {
- DylinkSection() : CustomSection("dylink") {}
+ DylinkSection() : CustomSection("dylink.0") {}
static bool classof(const Section *S) {
auto C = dyn_cast<CustomSection>(S);
- return C && C->Name == "dylink";
+ return C && C->Name == "dylink.0";
}
uint32_t MemorySize;
@@ -212,6 +217,8 @@ struct DylinkSection : CustomSection {
uint32_t TableSize;
uint32_t TableAlignment;
std::vector<StringRef> Needed;
+ std::vector<DylinkImportInfo> ImportInfo;
+ std::vector<DylinkExportInfo> ExportInfo;
};
struct NameSection : CustomSection {
@@ -323,7 +330,7 @@ struct TagSection : Section {
return S->Type == wasm::WASM_SEC_TAG;
}
- std::vector<Tag> Tags;
+ std::vector<uint32_t> TagTypes;
};
struct GlobalSection : Section {
@@ -425,7 +432,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Tag)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkImportInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkExportInfo)
namespace llvm {
namespace yaml {
@@ -570,8 +578,12 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> {
static void enumeration(IO &IO, WasmYAML::RelocType &Kind);
};
-template <> struct MappingTraits<WasmYAML::Tag> {
- static void mapping(IO &IO, WasmYAML::Tag &Tag);
+template <> struct MappingTraits<WasmYAML::DylinkImportInfo> {
+ static void mapping(IO &IO, WasmYAML::DylinkImportInfo &Info);
+};
+
+template <> struct MappingTraits<WasmYAML::DylinkExportInfo> {
+ static void mapping(IO &IO, WasmYAML::DylinkExportInfo &Info);
};
} // end namespace yaml
diff --git a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
index 2630175642c4..aa1bc396f134 100644
--- a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
@@ -24,11 +24,43 @@ struct FileHeader {
uint16_t NumberOfSections;
int32_t TimeStamp;
llvm::yaml::Hex64 SymbolTableOffset;
- uint32_t NumberOfSymTableEntries;
+ int32_t NumberOfSymTableEntries;
uint16_t AuxHeaderSize;
llvm::yaml::Hex16 Flags;
};
+struct AuxiliaryHeader {
+ Optional<llvm::yaml::Hex16> Magic;
+ Optional<llvm::yaml::Hex16> Version;
+ Optional<llvm::yaml::Hex64> TextStartAddr;
+ Optional<llvm::yaml::Hex64> DataStartAddr;
+ Optional<llvm::yaml::Hex64> TOCAnchorAddr;
+ Optional<uint16_t> SecNumOfEntryPoint;
+ Optional<uint16_t> SecNumOfText;
+ Optional<uint16_t> SecNumOfData;
+ Optional<uint16_t> SecNumOfTOC;
+ Optional<uint16_t> SecNumOfLoader;
+ Optional<uint16_t> SecNumOfBSS;
+ Optional<llvm::yaml::Hex16> MaxAlignOfText;
+ Optional<llvm::yaml::Hex16> MaxAlignOfData;
+ Optional<llvm::yaml::Hex16> ModuleType;
+ Optional<llvm::yaml::Hex8> CpuFlag;
+ Optional<llvm::yaml::Hex8> CpuType;
+ Optional<llvm::yaml::Hex8> TextPageSize;
+ Optional<llvm::yaml::Hex8> DataPageSize;
+ Optional<llvm::yaml::Hex8> StackPageSize;
+ Optional<llvm::yaml::Hex8> FlagAndTDataAlignment;
+ Optional<llvm::yaml::Hex64> TextSize;
+ Optional<llvm::yaml::Hex64> InitDataSize;
+ Optional<llvm::yaml::Hex64> BssDataSize;
+ Optional<llvm::yaml::Hex64> EntryPointAddr;
+ Optional<llvm::yaml::Hex64> MaxStackSize;
+ Optional<llvm::yaml::Hex64> MaxDataSize;
+ Optional<uint16_t> SecNumOfTData;
+ Optional<uint16_t> SecNumOfTBSS;
+ Optional<llvm::yaml::Hex16> Flag;
+};
+
struct Relocation {
llvm::yaml::Hex64 VirtualAddress;
llvm::yaml::Hex64 SymbolIndex;
@@ -53,16 +85,27 @@ struct Section {
struct Symbol {
StringRef SymbolName;
llvm::yaml::Hex64 Value; // Symbol value; storage class-dependent.
- StringRef SectionName;
+ Optional<StringRef> SectionName;
+ Optional<uint16_t> SectionIndex;
llvm::yaml::Hex16 Type;
XCOFF::StorageClass StorageClass;
uint8_t NumberOfAuxEntries;
};
+struct StringTable {
+ Optional<uint32_t> ContentSize; // The total size of the string table.
+ Optional<uint32_t> Length; // The value of the length field for the first
+ // 4 bytes of the table.
+ Optional<std::vector<StringRef>> Strings;
+ Optional<yaml::BinaryRef> RawContent;
+};
+
struct Object {
FileHeader Header;
+ Optional<AuxiliaryHeader> AuxHeader;
std::vector<Section> Sections;
std::vector<Symbol> Symbols;
+ StringTable StrTbl;
Object();
};
} // namespace XCOFFYAML
@@ -87,6 +130,9 @@ template <> struct MappingTraits<XCOFFYAML::FileHeader> {
static void mapping(IO &IO, XCOFFYAML::FileHeader &H);
};
+template <> struct MappingTraits<XCOFFYAML::AuxiliaryHeader> {
+ static void mapping(IO &IO, XCOFFYAML::AuxiliaryHeader &AuxHdr);
+};
template <> struct MappingTraits<XCOFFYAML::Symbol> {
static void mapping(IO &IO, XCOFFYAML::Symbol &S);
@@ -100,6 +146,10 @@ template <> struct MappingTraits<XCOFFYAML::Section> {
static void mapping(IO &IO, XCOFFYAML::Section &Sec);
};
+template <> struct MappingTraits<XCOFFYAML::StringTable> {
+ static void mapping(IO &IO, XCOFFYAML::StringTable &Str);
+};
+
template <> struct MappingTraits<XCOFFYAML::Object> {
static void mapping(IO &IO, XCOFFYAML::Object &Obj);
};
diff --git a/llvm/include/llvm/Option/Arg.h b/llvm/include/llvm/Option/Arg.h
index 22e2bcf06a6e..4be254ccdab4 100644
--- a/llvm/include/llvm/Option/Arg.h
+++ b/llvm/include/llvm/Option/Arg.h
@@ -118,10 +118,7 @@ public:
const SmallVectorImpl<const char *> &getValues() const { return Values; }
bool containsValue(StringRef Value) const {
- for (unsigned i = 0, e = getNumValues(); i != e; ++i)
- if (Values[i] == Value)
- return true;
- return false;
+ return llvm::is_contained(Values, Value);
}
/// Append the argument onto the given array as strings.
diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td
index 96014b505d0f..9c73f478db5e 100644
--- a/llvm/include/llvm/Option/OptParser.td
+++ b/llvm/include/llvm/Option/OptParser.td
@@ -214,7 +214,7 @@ class MarshallingInfoBitfieldFlag<KeyPathAndMacro kpm, code value>
}
// Implementation detail of BoolOption.
-class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code name,
+class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value,
code other_value, code other_name>
: MarshallingInfoFlag<kpm, defaultvalue> {
code Normalizer = "makeBooleanOptionNormalizer("#value#", "#other_value#", OPT_"#other_name#")";
diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index ca2013ee6f04..07d9870f71b3 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -64,8 +64,8 @@ private:
bool GroupedShortOptions = false;
const char *EnvVar = nullptr;
- unsigned TheInputOptionID = 0;
- unsigned TheUnknownOptionID = 0;
+ unsigned InputOptionID = 0;
+ unsigned UnknownOptionID = 0;
/// The index of the first option which can be parsed (i.e., is not a
/// special option like 'input' or 'unknown', and is not an option group).
@@ -83,7 +83,8 @@ private:
return OptionInfos[id - 1];
}
- Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const;
+ std::unique_ptr<Arg> parseOneArgGrouped(InputArgList &Args,
+ unsigned &Index) const;
protected:
OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
@@ -199,9 +200,9 @@ public:
/// \return The parsed argument, or 0 if the argument is missing values
/// (in which case Index still points at the conceptual next argument string
/// to parse).
- Arg *ParseOneArg(const ArgList &Args, unsigned &Index,
- unsigned FlagsToInclude = 0,
- unsigned FlagsToExclude = 0) const;
+ std::unique_ptr<Arg> ParseOneArg(const ArgList &Args, unsigned &Index,
+ unsigned FlagsToInclude = 0,
+ unsigned FlagsToExclude = 0) const;
/// Parse an list of arguments into an InputArgList.
///
diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h
index 196cf656355d..106f6863fca1 100644
--- a/llvm/include/llvm/Option/Option.h
+++ b/llvm/include/llvm/Option/Option.h
@@ -205,9 +205,9 @@ public:
/// always be false.
bool matches(OptSpecifier ID) const;
- /// accept - Potentially accept the current argument, returning a
- /// new Arg instance, or 0 if the option does not accept this
- /// argument (or the argument is missing values).
+ /// Potentially accept the current argument, returning a new Arg instance,
+ /// or 0 if the option does not accept this argument (or the argument is
+ /// missing values).
///
/// If the option accepts the current argument, accept() sets
/// Index to the position where argument parsing should resume
@@ -217,12 +217,12 @@ public:
/// underlying storage to represent a Joined argument.
/// \p GroupedShortOption If true, we are handling the fallback case of
/// parsing a prefix of the current argument as a short option.
- Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption,
- unsigned &Index) const;
+ std::unique_ptr<Arg> accept(const ArgList &Args, StringRef CurArg,
+ bool GroupedShortOption, unsigned &Index) const;
private:
- Arg *acceptInternal(const ArgList &Args, StringRef CurArg,
- unsigned &Index) const;
+ std::unique_ptr<Arg> acceptInternal(const ArgList &Args, StringRef CurArg,
+ unsigned &Index) const;
public:
void print(raw_ostream &O) const;
diff --git a/llvm/include/llvm/Passes/OptimizationLevel.h b/llvm/include/llvm/Passes/OptimizationLevel.h
new file mode 100644
index 000000000000..d2c3fde4935f
--- /dev/null
+++ b/llvm/include/llvm/Passes/OptimizationLevel.h
@@ -0,0 +1,127 @@
+//===-------- LLVM-provided High-Level Optimization levels -*- C++ -*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header enumerates the LLVM-provided high-level optimization levels.
+/// Each level has a specific goal and rationale.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSES_OPTIMIZATIONLEVEL_H
+#define LLVM_PASSES_OPTIMIZATIONLEVEL_H
+
+#include <assert.h>
+
+namespace llvm {
+
+class OptimizationLevel final {
+ unsigned SpeedLevel = 2;
+ unsigned SizeLevel = 0;
+ OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
+ : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
+ // Check that only valid combinations are passed.
+ assert(SpeedLevel <= 3 &&
+ "Optimization level for speed should be 0, 1, 2, or 3");
+ assert(SizeLevel <= 2 &&
+ "Optimization level for size should be 0, 1, or 2");
+ assert((SizeLevel == 0 || SpeedLevel == 2) &&
+ "Optimize for size should be encoded with speedup level == 2");
+ }
+
+public:
+ OptimizationLevel() = default;
+ /// Disable as many optimizations as possible. This doesn't completely
+ /// disable the optimizer in all cases, for example always_inline functions
+ /// can be required to be inlined for correctness.
+ static const OptimizationLevel O0;
+
+ /// Optimize quickly without destroying debuggability.
+ ///
+ /// This level is tuned to produce a result from the optimizer as quickly
+ /// as possible and to avoid destroying debuggability. This tends to result
+ /// in a very good development mode where the compiled code will be
+ /// immediately executed as part of testing. As a consequence, where
+ /// possible, we would like to produce efficient-to-execute code, but not
+ /// if it significantly slows down compilation or would prevent even basic
+ /// debugging of the resulting binary.
+ ///
+ /// As an example, complex loop transformations such as versioning,
+ /// vectorization, or fusion don't make sense here due to the degree to
+ /// which the executed code differs from the source code, and the compile
+ /// time cost.
+ static const OptimizationLevel O1;
+ /// Optimize for fast execution as much as possible without triggering
+ /// significant incremental compile time or code size growth.
+ ///
+ /// The key idea is that optimizations at this level should "pay for
+ /// themselves". So if an optimization increases compile time by 5% or
+ /// increases code size by 5% for a particular benchmark, that benchmark
+ /// should also be one which sees a 5% runtime improvement. If the compile
+ /// time or code size penalties happen on average across a diverse range of
+ /// LLVM users' benchmarks, then the improvements should as well.
+ ///
+ /// And no matter what, the compile time needs to not grow superlinearly
+ /// with the size of input to LLVM so that users can control the runtime of
+ /// the optimizer in this mode.
+ ///
+ /// This is expected to be a good default optimization level for the vast
+ /// majority of users.
+ static const OptimizationLevel O2;
+ /// Optimize for fast execution as much as possible.
+ ///
+ /// This mode is significantly more aggressive in trading off compile time
+ /// and code size to get execution time improvements. The core idea is that
+ /// this mode should include any optimization that helps execution time on
+ /// balance across a diverse collection of benchmarks, even if it increases
+ /// code size or compile time for some benchmarks without corresponding
+ /// improvements to execution time.
+ ///
+ /// Despite being willing to trade more compile time off to get improved
+ /// execution time, this mode still tries to avoid superlinear growth in
+ /// order to make even significantly slower compile times at least scale
+ /// reasonably. This does not preclude very substantial constant factor
+ /// costs though.
+ static const OptimizationLevel O3;
+ /// Similar to \c O2 but tries to optimize for small code size instead of
+ /// fast execution without triggering significant incremental execution
+ /// time slowdowns.
+ ///
+ /// The logic here is exactly the same as \c O2, but with code size and
+ /// execution time metrics swapped.
+ ///
+ /// A consequence of the different core goal is that this should in general
+ /// produce substantially smaller executables that still run in
+ /// a reasonable amount of time.
+ static const OptimizationLevel Os;
+ /// A very specialized mode that will optimize for code size at any and all
+ /// costs.
+ ///
+ /// This is useful primarily when there are absolute size limitations and
+ /// any effort taken to reduce the size is worth it regardless of the
+ /// execution time impact. You should expect this level to produce rather
+ /// slow, but very small, code.
+ static const OptimizationLevel Oz;
+
+ bool isOptimizingForSpeed() const { return SizeLevel == 0 && SpeedLevel > 0; }
+
+ bool isOptimizingForSize() const { return SizeLevel > 0; }
+
+ bool operator==(const OptimizationLevel &Other) const {
+ return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
+ }
+ bool operator!=(const OptimizationLevel &Other) const {
+ return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
+ }
+
+ unsigned getSpeedupLevel() const { return SpeedLevel; }
+
+ unsigned getSizeLevel() const { return SizeLevel; }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index fae3e2cd2e0b..7c7883e98183 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -18,9 +18,12 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include <vector>
@@ -31,57 +34,6 @@ class AAManager;
class TargetMachine;
class ModuleSummaryIndex;
-/// A struct capturing PGO tunables.
-struct PGOOptions {
- enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
- enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
- PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
- std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
- CSPGOAction CSAction = NoCSAction,
- bool DebugInfoForProfiling = false,
- bool PseudoProbeForProfiling = false)
- : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
- ProfileRemappingFile(ProfileRemappingFile), Action(Action),
- CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
- (Action == SampleUse &&
- !PseudoProbeForProfiling)),
- PseudoProbeForProfiling(PseudoProbeForProfiling) {
- // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
- // callback with IRUse action without ProfileFile.
-
- // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
- assert(this->CSAction == NoCSAction ||
- (this->Action != IRInstr && this->Action != SampleUse));
-
- // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
- assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
-
- // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
- // a profile.
- assert(this->CSAction != CSIRUse || this->Action == IRUse);
-
- // If neither Action nor CSAction, DebugInfoForProfiling or
- // PseudoProbeForProfiling needs to be true.
- assert(this->Action != NoAction || this->CSAction != NoCSAction ||
- this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
-
- // Pseudo probe emission does not work with -fdebug-info-for-profiling since
- // they both use the discriminator field of debug lines but for different
- // purposes.
- if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) {
- report_fatal_error(
- "Pseudo probes cannot be used with -debug-info-for-profiling", false);
- }
- }
- std::string ProfileFile;
- std::string CSProfileGenFile;
- std::string ProfileRemappingFile;
- PGOAction Action;
- CSPGOAction CSAction;
- bool DebugInfoForProfiling;
- bool PseudoProbeForProfiling;
-};
-
/// Tunable parameters for passes in the default pipelines.
class PipelineTuningOptions {
public:
@@ -122,6 +74,15 @@ public:
/// Tuning option to enable/disable function merging. Its default value is
/// false.
bool MergeFunctions;
+
+ // Experimental option to eagerly invalidate more analyses. This has the
+ // potential to decrease max memory usage in exchange for more compile time.
+ // This may affect codegen due to either passes using analyses only when
+ // cached, or invalidating and recalculating an analysis that was
+ // stale/imprecise but still valid. Currently this invalidates all function
+ // analyses after various module->function or cgscc->function adaptors in the
+ // default pipelines.
+ bool EagerlyInvalidateAnalyses;
};
/// This class provides access to building LLVM's passes.
@@ -150,116 +111,6 @@ public:
std::vector<PipelineElement> InnerPipeline;
};
- /// LLVM-provided high-level optimization levels.
- ///
- /// This enumerates the LLVM-provided high-level optimization levels. Each
- /// level has a specific goal and rationale.
- class OptimizationLevel final {
- unsigned SpeedLevel = 2;
- unsigned SizeLevel = 0;
- OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
- : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
- // Check that only valid combinations are passed.
- assert(SpeedLevel <= 3 &&
- "Optimization level for speed should be 0, 1, 2, or 3");
- assert(SizeLevel <= 2 &&
- "Optimization level for size should be 0, 1, or 2");
- assert((SizeLevel == 0 || SpeedLevel == 2) &&
- "Optimize for size should be encoded with speedup level == 2");
- }
-
- public:
- OptimizationLevel() = default;
- /// Disable as many optimizations as possible. This doesn't completely
- /// disable the optimizer in all cases, for example always_inline functions
- /// can be required to be inlined for correctness.
- static const OptimizationLevel O0;
-
- /// Optimize quickly without destroying debuggability.
- ///
- /// This level is tuned to produce a result from the optimizer as quickly
- /// as possible and to avoid destroying debuggability. This tends to result
- /// in a very good development mode where the compiled code will be
- /// immediately executed as part of testing. As a consequence, where
- /// possible, we would like to produce efficient-to-execute code, but not
- /// if it significantly slows down compilation or would prevent even basic
- /// debugging of the resulting binary.
- ///
- /// As an example, complex loop transformations such as versioning,
- /// vectorization, or fusion don't make sense here due to the degree to
- /// which the executed code differs from the source code, and the compile
- /// time cost.
- static const OptimizationLevel O1;
- /// Optimize for fast execution as much as possible without triggering
- /// significant incremental compile time or code size growth.
- ///
- /// The key idea is that optimizations at this level should "pay for
- /// themselves". So if an optimization increases compile time by 5% or
- /// increases code size by 5% for a particular benchmark, that benchmark
- /// should also be one which sees a 5% runtime improvement. If the compile
- /// time or code size penalties happen on average across a diverse range of
- /// LLVM users' benchmarks, then the improvements should as well.
- ///
- /// And no matter what, the compile time needs to not grow superlinearly
- /// with the size of input to LLVM so that users can control the runtime of
- /// the optimizer in this mode.
- ///
- /// This is expected to be a good default optimization level for the vast
- /// majority of users.
- static const OptimizationLevel O2;
- /// Optimize for fast execution as much as possible.
- ///
- /// This mode is significantly more aggressive in trading off compile time
- /// and code size to get execution time improvements. The core idea is that
- /// this mode should include any optimization that helps execution time on
- /// balance across a diverse collection of benchmarks, even if it increases
- /// code size or compile time for some benchmarks without corresponding
- /// improvements to execution time.
- ///
- /// Despite being willing to trade more compile time off to get improved
- /// execution time, this mode still tries to avoid superlinear growth in
- /// order to make even significantly slower compile times at least scale
- /// reasonably. This does not preclude very substantial constant factor
- /// costs though.
- static const OptimizationLevel O3;
- /// Similar to \c O2 but tries to optimize for small code size instead of
- /// fast execution without triggering significant incremental execution
- /// time slowdowns.
- ///
- /// The logic here is exactly the same as \c O2, but with code size and
- /// execution time metrics swapped.
- ///
- /// A consequence of the different core goal is that this should in general
- /// produce substantially smaller executables that still run in
- /// a reasonable amount of time.
- static const OptimizationLevel Os;
- /// A very specialized mode that will optimize for code size at any and all
- /// costs.
- ///
- /// This is useful primarily when there are absolute size limitations and
- /// any effort taken to reduce the size is worth it regardless of the
- /// execution time impact. You should expect this level to produce rather
- /// slow, but very small, code.
- static const OptimizationLevel Oz;
-
- bool isOptimizingForSpeed() const {
- return SizeLevel == 0 && SpeedLevel > 0;
- }
-
- bool isOptimizingForSize() const { return SizeLevel > 0; }
-
- bool operator==(const OptimizationLevel &Other) const {
- return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
- }
- bool operator!=(const OptimizationLevel &Other) const {
- return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
- }
-
- unsigned getSpeedupLevel() const { return SpeedLevel; }
-
- unsigned getSizeLevel() const { return SizeLevel; }
- };
-
explicit PassBuilder(TargetMachine *TM = nullptr,
PipelineTuningOptions PTO = PipelineTuningOptions(),
Optional<PGOOptions> PGOOpt = None,
@@ -346,6 +197,11 @@ public:
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase);
+ /// Construct the module pipeline that performs inlining with
+ /// module inliner pass.
+ ModuleInlinerPass buildModuleInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase);
+
/// Construct the core LLVM module optimization pipeline.
///
/// This pipeline focuses on optimizing the execution speed of the IR. It
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 2f573585e766..6cab4ce7d138 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -215,8 +215,6 @@ protected:
virtual void handleFiltered(StringRef PassID, std::string &Name) = 0;
// Called when an ignored pass is encountered.
virtual void handleIgnored(StringRef PassID, std::string &Name) = 0;
- // Called to compare the before and after representations of the IR.
- virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0;
// Stack of IRs before passes.
std::vector<IRUnitT> BeforeStack;
@@ -269,50 +267,47 @@ protected:
void handleAfter(StringRef PassID, std::string &Name,
const std::string &Before, const std::string &After,
Any) override;
- // Called to compare the before and after representations of the IR.
- bool same(const std::string &Before, const std::string &After) override;
};
-// The following classes hold a representation of the IR for a change
-// reporter that uses string comparisons of the basic blocks
-// that are created using print (ie, similar to dump()).
-// These classes respect the filtering of passes and functions using
-// -filter-passes and -filter-print-funcs.
-//
// Information that needs to be saved for a basic block in order to compare
// before and after the pass to determine if it was changed by a pass.
-class ChangedBlockData {
+template <typename T> class BlockDataT {
public:
- ChangedBlockData(const BasicBlock &B);
-
- bool operator==(const ChangedBlockData &That) const {
- return Body == That.Body;
- }
- bool operator!=(const ChangedBlockData &That) const {
- return Body != That.Body;
+ BlockDataT(const BasicBlock &B) : Label(B.getName().str()), Data(B) {
+ raw_string_ostream SS(Body);
+ B.print(SS, nullptr, true, true);
}
+ bool operator==(const BlockDataT &That) const { return Body == That.Body; }
+ bool operator!=(const BlockDataT &That) const { return Body != That.Body; }
+
// Return the label of the represented basic block.
StringRef getLabel() const { return Label; }
// Return the string representation of the basic block.
StringRef getBody() const { return Body; }
+ // Return the associated data
+ const T &getData() const { return Data; }
+
protected:
std::string Label;
std::string Body;
+
+ // Extra data associated with a basic block
+ T Data;
};
-template <typename IRData> class OrderedChangedData {
+template <typename T> class OrderedChangedData {
public:
// Return the names in the order they were saved
std::vector<std::string> &getOrder() { return Order; }
const std::vector<std::string> &getOrder() const { return Order; }
// Return a map of names to saved representations
- StringMap<IRData> &getData() { return Data; }
- const StringMap<IRData> &getData() const { return Data; }
+ StringMap<T> &getData() { return Data; }
+ const StringMap<T> &getData() const { return Data; }
- bool operator==(const OrderedChangedData<IRData> &That) const {
+ bool operator==(const OrderedChangedData<T> &That) const {
return Data == That.getData();
}
@@ -321,55 +316,64 @@ public:
// with ones that are only in \p Before interspersed based on where they
// occur in \p Before. This is used to present the output in an order
// based on how the data is ordered in LLVM.
- static void
- report(const OrderedChangedData &Before, const OrderedChangedData &After,
- function_ref<void(const IRData *, const IRData *)> HandlePair);
+ static void report(const OrderedChangedData &Before,
+ const OrderedChangedData &After,
+ function_ref<void(const T *, const T *)> HandlePair);
protected:
std::vector<std::string> Order;
- StringMap<IRData> Data;
+ StringMap<T> Data;
+};
+
+// Do not need extra information for patch-style change reporter.
+class EmptyData {
+public:
+ EmptyData(const BasicBlock &) {}
};
// The data saved for comparing functions.
-using ChangedFuncData = OrderedChangedData<ChangedBlockData>;
+template <typename T>
+class FuncDataT : public OrderedChangedData<BlockDataT<T>> {
+public:
+ FuncDataT(std::string S) : EntryBlockName(S) {}
+
+ // Return the name of the entry block
+ std::string getEntryBlockName() const { return EntryBlockName; }
+
+protected:
+ std::string EntryBlockName;
+};
-// A map of names to the saved data.
-using ChangedIRData = OrderedChangedData<ChangedFuncData>;
+// The data saved for comparing IRs.
+template <typename T>
+class IRDataT : public OrderedChangedData<FuncDataT<T>> {};
-// A class that compares two IRs and does a diff between them. The
-// added lines are prefixed with a '+', the removed lines are prefixed
-// with a '-' and unchanged lines are prefixed with a space (to have
-// things line up).
-class ChangedIRComparer {
+// Abstract template base class for a class that compares two IRs. The
+// class is created with the 2 IRs to compare and then compare is called.
+// The static function analyzeIR is used to build up the IR representation.
+template <typename T> class IRComparer {
public:
- ChangedIRComparer(raw_ostream &OS, const ChangedIRData &Before,
- const ChangedIRData &After, bool ColourMode)
- : Before(Before), After(After), Out(OS), UseColour(ColourMode) {}
+ IRComparer(const IRDataT<T> &Before, const IRDataT<T> &After)
+ : Before(Before), After(After) {}
- // Compare the 2 IRs.
- void compare(Any IR, StringRef Prefix, StringRef PassID, StringRef Name);
+ // Compare the 2 IRs. \p handleFunctionCompare is called to handle the
+ // compare of a function. When \p InModule is set,
+ // this function is being handled as part of comparing a module.
+ void compare(
+ bool CompareModule,
+ std::function<void(bool InModule, unsigned Minor,
+ const FuncDataT<T> &Before, const FuncDataT<T> &After)>
+ CompareFunc);
// Analyze \p IR and build the IR representation in \p Data.
- static void analyzeIR(Any IR, ChangedIRData &Data);
+ static void analyzeIR(Any IR, IRDataT<T> &Data);
protected:
- // Return the module when that is the appropriate level of
- // comparison for \p IR.
- static const Module *getModuleForComparison(Any IR);
-
// Generate the data for \p F into \p Data.
- static bool generateFunctionData(ChangedIRData &Data, const Function &F);
+ static bool generateFunctionData(IRDataT<T> &Data, const Function &F);
- // Called to handle the compare of a function. When \p InModule is set,
- // this function is being handled as part of comparing a module.
- void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
- bool InModule, const ChangedFuncData &Before,
- const ChangedFuncData &After);
-
- const ChangedIRData &Before;
- const ChangedIRData &After;
- raw_ostream &Out;
- bool UseColour;
+ const IRDataT<T> &Before;
+ const IRDataT<T> &After;
};
// A change printer that prints out in-line differences in the basic
@@ -378,25 +382,28 @@ protected:
// and added, respectively. Changes to the IR that do not affect basic
// blocks are not reported as having changed the IR. The option
// -print-module-scope does not affect this change reporter.
-class InLineChangePrinter : public TextChangeReporter<ChangedIRData> {
+class InLineChangePrinter : public TextChangeReporter<IRDataT<EmptyData>> {
public:
InLineChangePrinter(bool VerboseMode, bool ColourMode)
- : TextChangeReporter<ChangedIRData>(VerboseMode), UseColour(ColourMode) {}
+ : TextChangeReporter<IRDataT<EmptyData>>(VerboseMode),
+ UseColour(ColourMode) {}
~InLineChangePrinter() override;
void registerCallbacks(PassInstrumentationCallbacks &PIC);
protected:
// Create a representation of the IR.
virtual void generateIRRepresentation(Any IR, StringRef PassID,
- ChangedIRData &Output) override;
+ IRDataT<EmptyData> &Output) override;
// Called when an interesting IR has changed.
virtual void handleAfter(StringRef PassID, std::string &Name,
- const ChangedIRData &Before,
- const ChangedIRData &After, Any) override;
- // Called to compare the before and after representations of the IR.
- virtual bool same(const ChangedIRData &Before,
- const ChangedIRData &After) override;
+ const IRDataT<EmptyData> &Before,
+ const IRDataT<EmptyData> &After, Any) override;
+
+ void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
+ StringRef Divider, bool InModule, unsigned Minor,
+ const FuncDataT<EmptyData> &Before,
+ const FuncDataT<EmptyData> &After);
bool UseColour;
};
@@ -409,6 +416,81 @@ public:
void registerCallbacks(PassInstrumentationCallbacks &PIC);
};
+// Class that holds transitions between basic blocks. The transitions
+// are contained in a map of values to names of basic blocks.
+class DCData {
+public:
+ // Fill the map with the transitions from basic block \p B.
+ DCData(const BasicBlock &B);
+
+ // Return an iterator to the names of the successor blocks.
+ StringMap<std::string>::const_iterator begin() const {
+ return Successors.begin();
+ }
+ StringMap<std::string>::const_iterator end() const {
+ return Successors.end();
+ }
+
+ // Return the label of the basic block reached on a transition on \p S.
+ const StringRef getSuccessorLabel(StringRef S) const {
+ assert(Successors.count(S) == 1 && "Expected to find successor.");
+ return Successors.find(S)->getValue();
+ }
+
+protected:
+ // Add a transition to \p Succ on \p Label
+ void addSuccessorLabel(StringRef Succ, StringRef Label) {
+ std::pair<std::string, std::string> SS{Succ.str(), Label.str()};
+ Successors.insert(SS);
+ }
+
+ StringMap<std::string> Successors;
+};
+
+// A change reporter that builds a website with links to pdf files showing
+// dot control flow graphs with changed instructions shown in colour.
+class DotCfgChangeReporter : public ChangeReporter<IRDataT<DCData>> {
+public:
+ DotCfgChangeReporter(bool Verbose);
+ ~DotCfgChangeReporter() override;
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+protected:
+ // Initialize the HTML file and output the header.
+ bool initializeHTML();
+
+ // Called on the first IR processed.
+ void handleInitialIR(Any IR) override;
+ // Called before and after a pass to get the representation of the IR.
+ void generateIRRepresentation(Any IR, StringRef PassID,
+ IRDataT<DCData> &Output) override;
+ // Called when the pass is not iteresting.
+ void omitAfter(StringRef PassID, std::string &Name) override;
+ // Called when an interesting IR has changed.
+ void handleAfter(StringRef PassID, std::string &Name,
+ const IRDataT<DCData> &Before, const IRDataT<DCData> &After,
+ Any) override;
+ // Called when an interesting pass is invalidated.
+ void handleInvalidated(StringRef PassID) override;
+ // Called when the IR or pass is not interesting.
+ void handleFiltered(StringRef PassID, std::string &Name) override;
+ // Called when an ignored pass is encountered.
+ void handleIgnored(StringRef PassID, std::string &Name) override;
+
+ // Generate the pdf file into \p Dir / \p PDFFileName using \p DotFile as
+ // input and return the html <a> tag with \Text as the content.
+ static std::string genHTML(StringRef Text, StringRef DotFile,
+ StringRef PDFFileName);
+
+ void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
+ StringRef Divider, bool InModule, unsigned Minor,
+ const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After);
+
+ unsigned N = 0;
+ std::unique_ptr<raw_fd_ostream> HTML;
+};
+
/// This class provides an interface to register all the standard pass
/// instrumentations and manages their state (if any).
class StandardInstrumentations {
@@ -421,6 +503,7 @@ class StandardInstrumentations {
IRChangedPrinter PrintChangedIR;
PseudoProbeVerifier PseudoProbeVerification;
InLineChangePrinter PrintChangedDiff;
+ DotCfgChangeReporter WebsiteChangeReporter;
VerifyInstrumentation Verify;
bool VerifyEach;
@@ -440,8 +523,12 @@ public:
extern template class ChangeReporter<std::string>;
extern template class TextChangeReporter<std::string>;
-extern template class ChangeReporter<ChangedIRData>;
-extern template class TextChangeReporter<ChangedIRData>;
+extern template class BlockDataT<EmptyData>;
+extern template class FuncDataT<EmptyData>;
+extern template class IRDataT<EmptyData>;
+extern template class ChangeReporter<IRDataT<EmptyData>>;
+extern template class TextChangeReporter<IRDataT<EmptyData>>;
+extern template class IRComparer<EmptyData>;
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 8f336c13af61..d3a5d44ce8dd 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -693,8 +693,9 @@ public:
/// An iterator over the \c LineCoverageStats objects for lines described by
/// a \c CoverageData instance.
class LineCoverageIterator
- : public iterator_facade_base<
- LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> {
+ : public iterator_facade_base<LineCoverageIterator,
+ std::forward_iterator_tag,
+ const LineCoverageStats> {
public:
LineCoverageIterator(const CoverageData &CD)
: LineCoverageIterator(CD, CD.begin()->Line) {}
@@ -711,8 +712,6 @@ public:
const LineCoverageStats &operator*() const { return Stats; }
- LineCoverageStats &operator*() { return Stats; }
-
LineCoverageIterator &operator++();
LineCoverageIterator getEnd() const {
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 08a934e6985f..4395c2abb33e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -205,9 +205,9 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
StringRef FileName = "<unknown>");
/// Given a vector of strings (function PGO names) \c NameStrs, the
-/// method generates a combined string \c Result thatis ready to be
+/// method generates a combined string \c Result that is ready to be
/// serialized. The \c Result string is comprised of three fields:
-/// The first field is the legnth of the uncompressed strings, and the
+/// The first field is the length of the uncompressed strings, and the
/// the second field is the length of the zlib-compressed string.
/// Both fields are encoded in ULEB128. If \c doCompress is false, the
/// third field is the uncompressed strings; otherwise it is the
@@ -308,7 +308,8 @@ inline std::error_code make_error_code(instrprof_error E) {
class InstrProfError : public ErrorInfo<InstrProfError> {
public:
- InstrProfError(instrprof_error Err) : Err(Err) {
+ InstrProfError(instrprof_error Err, const Twine &ErrStr = Twine())
+ : Err(Err), Msg(ErrStr.str()) {
assert(Err != instrprof_error::success && "Not an error");
}
@@ -321,6 +322,7 @@ public:
}
instrprof_error get() const { return Err; }
+ const std::string &getMessage() const { return Msg; }
/// Consume an Error and return the raw enum value contained within it. The
/// Error must either be a success value, or contain a single InstrProfError.
@@ -337,6 +339,7 @@ public:
private:
instrprof_error Err;
+ std::string Msg;
};
class SoftInstrProfErrors {
@@ -474,7 +477,8 @@ public:
/// is used by the raw and text profile readers.
Error addFuncName(StringRef FuncName) {
if (FuncName.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "function name is empty");
auto Ins = NameTab.insert(FuncName);
if (Ins.second) {
MD5NameMap.push_back(std::make_pair(
@@ -1104,6 +1108,8 @@ namespace RawInstrProf {
// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
// sensitive records.
// Version 6: Added binary id.
+// Version 7: Reorder binary id and include version in signature.
+// Version 8: Use relative counter pointer.
const uint64_t Version = INSTR_PROF_RAW_VERSION;
template <class IntPtrT> inline uint64_t getMagic();
@@ -1142,8 +1148,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled);
+GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
+ bool InstrEntryBBEnabled);
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 08a642469627..008b8dde5820 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
Inc->getHash()->getZExtValue()))
-INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
- ConstantExpr::getBitCast(CounterPtr, \
- llvm::Type::getInt64PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr)
/* This is used to map function pointers for the indirect call targets to
* function name hashes during the conversion from raw to merged profile
* data.
@@ -129,15 +127,16 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
-INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
+ (uintptr_t)CountersBegin - (uintptr_t)DataBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -646,7 +645,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 6
+#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
/* Coverage mapping format version (start from 0). */
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 501c6f011d53..b62d4ff044a3 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -71,6 +71,7 @@ public:
/// format. Provides an iterator over NamedInstrProfRecords.
class InstrProfReader {
instrprof_error LastError = instrprof_error::success;
+ std::string LastErrorMsg;
public:
InstrProfReader() = default;
@@ -114,14 +115,21 @@ protected:
std::unique_ptr<InstrProfSymtab> Symtab;
/// Set the current error and return same.
- Error error(instrprof_error Err) {
+ Error error(instrprof_error Err, const std::string &ErrMsg = "") {
LastError = Err;
+ LastErrorMsg = ErrMsg;
if (Err == instrprof_error::success)
return Error::success();
- return make_error<InstrProfError>(Err);
+ return make_error<InstrProfError>(Err, ErrMsg);
}
- Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
+ Error error(Error &&E) {
+ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ LastError = IPE.get();
+ LastErrorMsg = IPE.getMessage();
+ });
+ return make_error<InstrProfError>(LastError, LastErrorMsg);
+ }
/// Clear the current error and return a successful one.
Error success() { return error(instrprof_error::success); }
@@ -136,7 +144,7 @@ public:
/// Get the current error.
Error getError() {
if (hasError())
- return make_error<InstrProfError>(LastError);
+ return make_error<InstrProfError>(LastError, LastErrorMsg);
return Error::success();
}
@@ -197,7 +205,7 @@ public:
/// Reader for the raw instrprof binary format from runtime.
///
-/// This format is a raw memory dump of the instrumentation-baed profiling data
+/// This format is a raw memory dump of the instrumentation-based profiling data
/// from the runtime. It has no index.
///
/// Templated on the unsigned type whose size matches pointers on the platform
diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index f2d9ccc45fdc..ad92af22d92e 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -66,9 +66,9 @@ public:
/// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &
- getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile);
- static uint64_t getHotCountThreshold(SummaryEntryVector &DS);
- static uint64_t getColdCountThreshold(SummaryEntryVector &DS);
+ getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile);
+ static uint64_t getHotCountThreshold(const SummaryEntryVector &DS);
+ static uint64_t getColdCountThreshold(const SummaryEntryVector &DS);
};
class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
@@ -92,8 +92,8 @@ public:
void addRecord(const sampleprof::FunctionSamples &FS,
bool isCallsiteSample = false);
- std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
- const StringMap<sampleprof::FunctionSamples> &Profiles);
+ std::unique_ptr<ProfileSummary>
+ computeSummaryForProfiles(const sampleprof::SampleProfileMap &Profiles);
std::unique_ptr<ProfileSummary> getSummary();
};
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 2f71bbc6bbbe..7ac9eccf8ac2 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -29,10 +29,13 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
+#include <list>
#include <map>
#include <set>
+#include <sstream>
#include <string>
#include <system_error>
+#include <unordered_map>
#include <utility>
namespace llvm {
@@ -104,10 +107,10 @@ static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
/// current Format uses MD5 to represent the string.
static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
std::string &GUIDBuf) {
- if (Name.empty())
+ if (Name.empty() || !UseMD5)
return Name;
GUIDBuf = std::to_string(Function::getGUID(Name));
- return UseMD5 ? StringRef(GUIDBuf) : Name;
+ return GUIDBuf;
}
static inline uint64_t SPVersion() { return 103; }
@@ -122,13 +125,14 @@ enum SecType {
SecProfileSymbolList = 3,
SecFuncOffsetTable = 4,
SecFuncMetadata = 5,
+ SecCSNameTable = 6,
// marker for the first type of profile.
SecFuncProfileFirst = 32,
SecLBRProfile = SecFuncProfileFirst
};
static inline std::string getSecName(SecType Type) {
- switch (Type) {
+ switch ((int)Type) { // Avoid -Wcovered-switch-default
case SecInValid:
return "InvalidSection";
case SecProfSummary:
@@ -141,10 +145,13 @@ static inline std::string getSecName(SecType Type) {
return "FuncOffsetTableSection";
case SecFuncMetadata:
return "FunctionMetadata";
+ case SecCSNameTable:
+ return "CSNameTableSection";
case SecLBRProfile:
return "LBRProfileSection";
+ default:
+ return "UnknownSection";
}
- llvm_unreachable("A SecType has no name for output");
}
// Entry type of section header table used by SampleProfileExtBinaryBaseReader
@@ -202,6 +209,13 @@ enum class SecFuncMetadataFlags : uint32_t {
SecFlagHasAttribute = (1 << 1)
};
+enum class SecFuncOffsetFlags : uint32_t {
+ SecFlagInvalid = 0,
+ // Store function offsets in an order of contexts. The order ensures that
+ // callee contexts of a given context laid out next to it.
+ SecFlagOrdered = (1 << 0),
+};
+
// Verify section specific flag is used for the correct section.
template <class SecFlagType>
static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
@@ -222,6 +236,8 @@ static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
break;
default:
+ case SecFuncOffsetTable:
+ IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>();
break;
}
if (!IsFlagLegal)
@@ -396,54 +412,123 @@ enum ContextAttributeMask {
ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
};
+// Represents a context frame with function name and line location
+struct SampleContextFrame {
+ StringRef FuncName;
+ LineLocation Location;
+
+ SampleContextFrame() : Location(0, 0) {}
+
+ SampleContextFrame(StringRef FuncName, LineLocation Location)
+ : FuncName(FuncName), Location(Location) {}
+
+ bool operator==(const SampleContextFrame &That) const {
+ return Location == That.Location && FuncName == That.FuncName;
+ }
+
+ bool operator!=(const SampleContextFrame &That) const {
+ return !(*this == That);
+ }
+
+ std::string toString(bool OutputLineLocation) const {
+ std::ostringstream OContextStr;
+ OContextStr << FuncName.str();
+ if (OutputLineLocation) {
+ OContextStr << ":" << Location.LineOffset;
+ if (Location.Discriminator)
+ OContextStr << "." << Location.Discriminator;
+ }
+ return OContextStr.str();
+ }
+};
+
+static inline hash_code hash_value(const SampleContextFrame &arg) {
+ return hash_combine(arg.FuncName, arg.Location.LineOffset,
+ arg.Location.Discriminator);
+}
+
+using SampleContextFrameVector = SmallVector<SampleContextFrame, 10>;
+using SampleContextFrames = ArrayRef<SampleContextFrame>;
+
+struct SampleContextFrameHash {
+ uint64_t operator()(const SampleContextFrameVector &S) const {
+ return hash_combine_range(S.begin(), S.end());
+ }
+};
+
// Sample context for FunctionSamples. It consists of the calling context,
// the function name and context state. Internally sample context is represented
-// using StringRef, which is also the input for constructing a `SampleContext`.
+// using ArrayRef, which is also the input for constructing a `SampleContext`.
// It can accept and represent both full context string as well as context-less
// function name.
-// Example of full context string (note the wrapping `[]`):
-// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
-// Example of context-less function name (same as AutoFDO):
-// `_Z8funcLeafi`
+// For a CS profile, a full context vector can look like:
+// `main:3 _Z5funcAi:1 _Z8funcLeafi`
+// For a base CS profile without calling context, the context vector should only
+// contain the leaf frame name.
+// For a non-CS profile, the context vector should be empty.
class SampleContext {
public:
SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
- SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
- : Attributes(ContextNone) {
- setContext(ContextStr, CState);
- }
- // Promote context by removing top frames (represented by `ContextStrToRemove`).
- // Note that with string representation of context, the promotion is effectively
- // a substr operation with `ContextStrToRemove` removed from left.
- void promoteOnPath(StringRef ContextStrToRemove) {
- assert(FullContext.startswith(ContextStrToRemove));
+ SampleContext(StringRef Name)
+ : Name(Name), State(UnknownContext), Attributes(ContextNone) {}
- // Remove leading context and frame separator " @ ".
- FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
- CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
+ SampleContext(SampleContextFrames Context,
+ ContextStateMask CState = RawContext)
+ : Attributes(ContextNone) {
+ assert(!Context.empty() && "Context is empty");
+ setContext(Context, CState);
}
- // Split the top context frame (left-most substr) from context.
- static std::pair<StringRef, StringRef>
- splitContextString(StringRef ContextStr) {
- return ContextStr.split(" @ ");
+ // Give a context string, decode and populate internal states like
+ // Function name, Calling context and context state. Example of input
+ // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
+ SampleContext(StringRef ContextStr,
+ std::list<SampleContextFrameVector> &CSNameTable,
+ ContextStateMask CState = RawContext)
+ : Attributes(ContextNone) {
+ assert(!ContextStr.empty());
+ // Note that `[]` wrapped input indicates a full context string, otherwise
+ // it's treated as context-less function name only.
+ bool HasContext = ContextStr.startswith("[");
+ if (!HasContext) {
+ State = UnknownContext;
+ Name = ContextStr;
+ } else {
+ CSNameTable.emplace_back();
+ SampleContextFrameVector &Context = CSNameTable.back();
+ createCtxVectorFromStr(ContextStr, Context);
+ setContext(Context, CState);
+ }
}
- // Reconstruct a new context with the last k frames, return the context-less
- // name if K = 1
- StringRef getContextWithLastKFrames(uint32_t K) {
- if (K == 1)
- return getNameWithoutContext();
-
- size_t I = FullContext.size();
- while (K--) {
- I = FullContext.find_last_of(" @ ", I);
- if (I == StringRef::npos)
- return FullContext;
- I -= 2;
+ /// Create a context vector from a given context string and save it in
+ /// `Context`.
+ static void createCtxVectorFromStr(StringRef ContextStr,
+ SampleContextFrameVector &Context) {
+ // Remove encapsulating '[' and ']' if any
+ ContextStr = ContextStr.substr(1, ContextStr.size() - 2);
+ StringRef ContextRemain = ContextStr;
+ StringRef ChildContext;
+ StringRef CalleeName;
+ while (!ContextRemain.empty()) {
+ auto ContextSplit = ContextRemain.split(" @ ");
+ ChildContext = ContextSplit.first;
+ ContextRemain = ContextSplit.second;
+ LineLocation CallSiteLoc(0, 0);
+ decodeContextString(ChildContext, CalleeName, CallSiteLoc);
+ Context.emplace_back(CalleeName, CallSiteLoc);
}
- return FullContext.slice(I + 3, StringRef::npos);
+ }
+
+ // Promote context by removing top frames with the length of
+ // `ContextFramesToRemove`. Note that with array representation of context,
+ // the promotion is effectively a slice operation with first
+ // `ContextFramesToRemove` elements removed from left.
+ void promoteOnPath(uint32_t ContextFramesToRemove) {
+ assert(ContextFramesToRemove <= FullContext.size() &&
+ "Cannot remove more than the whole context");
+ FullContext = FullContext.drop_front(ContextFramesToRemove);
}
// Decode context string for a frame to get function name and location.
@@ -469,7 +554,7 @@ public:
}
}
- operator StringRef() const { return FullContext; }
+ operator SampleContextFrames() const { return FullContext; }
bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
uint32_t getAllAttributes() { return Attributes; }
@@ -478,60 +563,114 @@ public:
void setState(ContextStateMask S) { State |= (uint32_t)S; }
void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
bool hasContext() const { return State != UnknownContext; }
- bool isBaseContext() const { return CallingContext.empty(); }
- StringRef getNameWithoutContext() const { return Name; }
- StringRef getCallingContext() const { return CallingContext; }
- StringRef getNameWithContext() const { return FullContext; }
-
-private:
- // Give a context string, decode and populate internal states like
- // Function name, Calling context and context state. Example of input
- // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
- void setContext(StringRef ContextStr, ContextStateMask CState) {
- assert(!ContextStr.empty());
- // Note that `[]` wrapped input indicates a full context string, otherwise
- // it's treated as context-less function name only.
- bool HasContext = ContextStr.startswith("[");
- if (!HasContext && CState == UnknownContext) {
- State = UnknownContext;
- Name = FullContext = ContextStr;
- } else {
- // Assume raw context profile if unspecified
- if (CState == UnknownContext)
- State = RawContext;
- else
- State = CState;
-
- // Remove encapsulating '[' and ']' if any
- if (HasContext)
- FullContext = ContextStr.substr(1, ContextStr.size() - 2);
- else
- FullContext = ContextStr;
-
- // Caller is to the left of callee in context string
- auto NameContext = FullContext.rsplit(" @ ");
- if (NameContext.second.empty()) {
- Name = NameContext.first;
- CallingContext = NameContext.second;
- } else {
- Name = NameContext.second;
- CallingContext = NameContext.first;
+ bool isBaseContext() const { return FullContext.size() == 1; }
+ StringRef getName() const { return Name; }
+ SampleContextFrames getContextFrames() const { return FullContext; }
+
+ static std::string getContextString(SampleContextFrames Context,
+ bool IncludeLeafLineLocation = false) {
+ std::ostringstream OContextStr;
+ for (uint32_t I = 0; I < Context.size(); I++) {
+ if (OContextStr.str().size()) {
+ OContextStr << " @ ";
}
+ OContextStr << Context[I].toString(I != Context.size() - 1 ||
+ IncludeLeafLineLocation);
}
+ return OContextStr.str();
+ }
+
+ std::string toString() const {
+ if (!hasContext())
+ return Name.str();
+ return getContextString(FullContext, false);
+ }
+
+ uint64_t getHashCode() const {
+ return hasContext() ? hash_value(getContextFrames())
+ : hash_value(getName());
+ }
+
+ /// Set the name of the function.
+ void setName(StringRef FunctionName) {
+ assert(FullContext.empty() &&
+ "setName should only be called for non-CS profile");
+ Name = FunctionName;
+ }
+
+ void setContext(SampleContextFrames Context,
+ ContextStateMask CState = RawContext) {
+ assert(CState != UnknownContext);
+ FullContext = Context;
+ Name = Context.back().FuncName;
+ State = CState;
+ }
+
+ bool operator==(const SampleContext &That) const {
+ return State == That.State && Name == That.Name &&
+ FullContext == That.FullContext;
+ }
+
+ bool operator!=(const SampleContext &That) const { return !(*this == That); }
+
+ bool operator<(const SampleContext &That) const {
+ if (State != That.State)
+ return State < That.State;
+
+ if (!hasContext()) {
+ return (Name.compare(That.Name)) == -1;
+ }
+
+ uint64_t I = 0;
+ while (I < std::min(FullContext.size(), That.FullContext.size())) {
+ auto &Context1 = FullContext[I];
+ auto &Context2 = That.FullContext[I];
+ auto V = Context1.FuncName.compare(Context2.FuncName);
+ if (V)
+ return V == -1;
+ if (Context1.Location != Context2.Location)
+ return Context1.Location < Context2.Location;
+ I++;
+ }
+
+ return FullContext.size() < That.FullContext.size();
+ }
+
+ struct Hash {
+ uint64_t operator()(const SampleContext &Context) const {
+ return Context.getHashCode();
+ }
+ };
+
+ bool IsPrefixOf(const SampleContext &That) const {
+ auto ThisContext = FullContext;
+ auto ThatContext = That.FullContext;
+ if (ThatContext.size() < ThisContext.size())
+ return false;
+ ThatContext = ThatContext.take_front(ThisContext.size());
+ // Compare Leaf frame first
+ if (ThisContext.back().FuncName != ThatContext.back().FuncName)
+ return false;
+ // Compare leading context
+ return ThisContext.drop_back() == ThatContext.drop_back();
}
- // Full context string including calling context and leaf function name
- StringRef FullContext;
- // Function name for the associated sample profile
+private:
+ /// Mangled name of the function.
StringRef Name;
- // Calling context (leaf function excluded) for the associated sample profile
- StringRef CallingContext;
+ // Full context including calling context and leaf function name
+ SampleContextFrames FullContext;
// State of the associated sample profile
uint32_t State;
// Attribute of the associated sample profile
uint32_t Attributes;
};
+static inline hash_code hash_value(const SampleContext &arg) {
+ return arg.hasContext() ? hash_value(arg.getContextFrames())
+ : hash_value(arg.getName());
+}
+
class FunctionSamples;
class SampleProfileReaderItaniumRemapper;
@@ -592,6 +731,20 @@ public:
return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
}
+ // Accumulate all body samples to set total samples.
+ void updateTotalSamples() {
+ setTotalSamples(0);
+ for (const auto &I : BodySamples)
+ addTotalSamples(I.second.getSamples());
+
+ for (auto &I : CallsiteSamples) {
+ for (auto &CS : I.second) {
+ CS.second.updateTotalSamples();
+ addTotalSamples(CS.second.getTotalSamples());
+ }
+ }
+ }
+
/// Return the number of samples collected at the given location.
/// Each location is specified by \p LineOffset and \p Discriminator.
/// If the location is not found in profile, return error.
@@ -709,10 +862,9 @@ public:
/// Optionally scale samples by \p Weight.
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
sampleprof_error Result = sampleprof_error::success;
- Name = Other.getName();
if (!GUIDToFuncNameMap)
GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
- if (Context.getNameWithContext().empty())
+ if (Context.getName().empty())
Context = Other.getContext();
if (FunctionHash == 0) {
// Set the function hash code for the target profile.
@@ -758,7 +910,7 @@ public:
};
if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
// Add to the import list only when it's defined out of module.
- S.insert(getGUID(Name));
+ S.insert(getGUID(getName()));
}
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
@@ -775,18 +927,13 @@ public:
}
/// Set the name of the function.
- void setName(StringRef FunctionName) { Name = FunctionName; }
+ void setName(StringRef FunctionName) { Context.setName(FunctionName); }
/// Return the function name.
- StringRef getName() const { return Name; }
-
- /// Return function name with context.
- StringRef getNameWithContext() const {
- return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
- }
+ StringRef getName() const { return Context.getName(); }
/// Return the original function name.
- StringRef getFuncName() const { return getFuncName(Name); }
+ StringRef getFuncName() const { return getFuncName(getName()); }
void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
@@ -913,9 +1060,6 @@ public:
void findAllNames(DenseSet<StringRef> &NameSet) const;
private:
- /// Mangled name of the function.
- StringRef Name;
-
/// CFG hash value for the function.
uint64_t FunctionHash = 0;
@@ -961,6 +1105,14 @@ private:
raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
+using SampleProfileMap =
+ std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
+
+using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
+
+void sortFuncProfiles(const SampleProfileMap &ProfileMap,
+ std::vector<NameFunctionSamples> &SortedProfiles);
+
/// Sort a LocationT->SampleT map by LocationT.
///
/// It produces a sorted list of <LocationT, SampleT> records by ascending
@@ -989,18 +1141,24 @@ private:
/// sure ProfileMap's key is consistent with FunctionSample's name/context.
class SampleContextTrimmer {
public:
- SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
- : ProfileMap(Profiles){};
- // Trim and merge cold context profile when requested.
+ SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){};
+ // Trim and merge cold context profile when requested. TrimBaseProfileOnly
+ // should only be effective when TrimColdContext is true. On top of
+ // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all
+ // cold profiles or only cold base profiles. Trimming base profiles only is
+ // mainly to honor the preinliner decsion. Note that when MergeColdContext is
+ // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will
+ // be ignored.
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
bool TrimColdContext,
bool MergeColdContext,
- uint32_t ColdContextFrameLength);
+ uint32_t ColdContextFrameLength,
+ bool TrimBaseProfileOnly);
// Canonicalize context profile name and attributes.
void canonicalizeContextProfiles();
private:
- StringMap<FunctionSamples> &ProfileMap;
+ SampleProfileMap &ProfileMap;
};
/// ProfileSymbolList records the list of function symbols shown up
@@ -1045,6 +1203,22 @@ private:
};
} // end namespace sampleprof
+
+using namespace sampleprof;
+// Provide DenseMapInfo for SampleContext.
+template <> struct DenseMapInfo<SampleContext> {
+ static inline SampleContext getEmptyKey() { return SampleContext(); }
+
+ static inline SampleContext getTombstoneKey() { return SampleContext("@"); }
+
+ static unsigned getHashValue(const SampleContext &Val) {
+ return Val.getHashCode();
+ }
+
+ static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) {
+ return LHS == RHS;
+ }
+};
} // end namespace llvm
#endif // LLVM_PROFILEDATA_SAMPLEPROF_H
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 2d5925bdb2b4..e6d31f1b9098 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -242,9 +242,11 @@
#include "llvm/Support/SymbolRemappingReader.h"
#include <algorithm>
#include <cstdint>
+#include <list>
#include <memory>
#include <string>
#include <system_error>
+#include <unordered_set>
#include <vector>
namespace llvm {
@@ -380,8 +382,8 @@ public:
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
- /// Print the profile for \p FName on stream \p OS.
- void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
+ /// Print the profile for \p FContext on stream \p OS.
+ void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
/// Collect functions with definitions in Module M. For reader which
/// support loading function profiles on demand, return true when the
@@ -407,6 +409,13 @@ public:
std::string FGUID;
StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
+ auto It = Profiles.find(CanonName);
+ if (It != Profiles.end())
+ return &It->second;
+ if (!FGUID.empty()) {
+ assert(useMD5() && "New name should only be generated for md5 profile");
+ CanonName = *MD5NameBuffer.insert(FGUID).first;
+ }
return &Profiles[CanonName];
}
@@ -429,7 +438,7 @@ public:
}
/// Return all the profiles.
- StringMap<FunctionSamples> &getProfiles() { return Profiles; }
+ SampleProfileMap &getProfiles() { return Profiles; }
/// Report a parse error message.
void reportError(int64_t LineNumber, const Twine &Msg) const {
@@ -495,7 +504,7 @@ protected:
/// The profile of every function executed at runtime is collected
/// in the structure FunctionSamples. This maps function objects
/// to their corresponding profiles.
- StringMap<FunctionSamples> Profiles;
+ SampleProfileMap Profiles;
/// LLVM context used to emit diagnostics.
LLVMContext &Ctx;
@@ -503,6 +512,10 @@ protected:
/// Memory buffer holding the profile file.
std::unique_ptr<MemoryBuffer> Buffer;
+ /// Extra name buffer holding names created on demand.
+ /// This should only be needed for md5 profiles.
+ std::unordered_set<std::string> MD5NameBuffer;
+
/// Profile summary information.
std::unique_ptr<ProfileSummary> Summary;
@@ -555,6 +568,11 @@ public:
/// Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
+
+private:
+ /// CSNameTable is used to save full context vectors. This serves as an
+ /// underlying immutable buffer for all clients.
+ std::list<SampleContextFrameVector> CSNameTable;
};
class SampleProfileReaderBinary : public SampleProfileReader {
@@ -626,6 +644,7 @@ protected:
/// Read a string indirectly via the name table.
virtual ErrorOr<StringRef> readStringFromTable();
+ virtual ErrorOr<SampleContext> readSampleContextFromTable();
private:
std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
@@ -683,6 +702,7 @@ protected:
std::error_code readFuncProfiles();
std::error_code readMD5NameTable();
std::error_code readNameTableSec(bool IsMD5);
+ std::error_code readCSNameTableSec();
std::error_code readProfileSymbolList();
virtual std::error_code readHeader() override;
@@ -692,12 +712,19 @@ protected:
// placeholder for subclasses to dispatch their own section readers.
virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
virtual ErrorOr<StringRef> readStringFromTable() override;
+ virtual ErrorOr<SampleContext> readSampleContextFromTable() override;
+ ErrorOr<SampleContextFrames> readContextFromTable();
std::unique_ptr<ProfileSymbolList> ProfSymList;
- /// The table mapping from function name to the offset of its FunctionSample
- /// towards file start.
- DenseMap<StringRef, uint64_t> FuncOffsetTable;
+ /// The table mapping from function context to the offset of its
+ /// FunctionSample towards file start.
+ DenseMap<SampleContext, uint64_t> FuncOffsetTable;
+
+ /// Function offset mapping ordered by contexts.
+ std::unique_ptr<std::vector<std::pair<SampleContext, uint64_t>>>
+ OrderedFuncOffsets;
+
/// The set containing the functions to use when compiling a module.
DenseSet<StringRef> FuncsToUse;
@@ -716,10 +743,16 @@ protected:
/// the lifetime of MD5StringBuf is not shorter than that of NameTable.
std::unique_ptr<std::vector<std::string>> MD5StringBuf;
+ /// CSNameTable is used to save full context vectors. This serves as an
+ /// underlying immutable buffer for all clients.
+ std::unique_ptr<const std::vector<SampleContextFrameVector>> CSNameTable;
+
/// If SkipFlatProf is true, skip the sections with
/// SecFlagFlat flag.
bool SkipFlatProf = false;
+ bool FuncOffsetsOrdered = false;
+
public:
SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
LLVMContext &C, SampleProfileFormat Format)
@@ -753,6 +786,8 @@ private:
virtual std::error_code verifySPMagic(uint64_t Magic) override;
virtual std::error_code
readCustomSection(const SecHdrTableEntry &Entry) override {
+ // Update the data reader pointer to the end of the section.
+ Data = End;
return sampleprof_error::success;
};
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 107f7a730a3c..773beac24ebc 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -52,7 +52,7 @@ public:
/// Write all the sample profiles in the given map of samples.
///
/// \returns status code of the file update operation.
- virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code write(const SampleProfileMap &ProfileMap);
raw_ostream &getOutputStream() { return *OutputStream; }
@@ -78,12 +78,10 @@ protected:
: OutputStream(std::move(OS)) {}
/// Write a file header for the profile file.
- virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0;
+ virtual std::error_code writeHeader(const SampleProfileMap &ProfileMap) = 0;
// Write function profiles to the profile file.
- virtual std::error_code
- writeFuncProfiles(const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap);
/// Output stream where to emit the profile to.
std::unique_ptr<raw_ostream> OutputStream;
@@ -92,7 +90,7 @@ protected:
std::unique_ptr<ProfileSummary> Summary;
/// Compute summary for this profile.
- void computeSummary(const StringMap<FunctionSamples> &ProfileMap);
+ void computeSummary(const SampleProfileMap &ProfileMap);
/// Profile format.
SampleProfileFormat Format = SPF_None;
@@ -107,8 +105,7 @@ protected:
SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS)
: SampleProfileWriter(OS), Indent(0) {}
- std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override {
+ std::error_code writeHeader(const SampleProfileMap &ProfileMap) override {
return sampleprof_error::success;
}
@@ -132,19 +129,22 @@ public:
virtual std::error_code writeSample(const FunctionSamples &S) override;
protected:
+ virtual MapVector<StringRef, uint32_t> &getNameTable() { return NameTable; }
virtual std::error_code writeMagicIdent(SampleProfileFormat Format);
virtual std::error_code writeNameTable();
virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeHeader(const SampleProfileMap &ProfileMap) override;
std::error_code writeSummary();
- std::error_code writeNameIdx(StringRef FName, bool IsContextName = false);
+ virtual std::error_code writeContextIdx(const SampleContext &Context);
+ std::error_code writeNameIdx(StringRef FName);
std::error_code writeBody(const FunctionSamples &S);
- inline void stablizeNameTable(std::set<StringRef> &V);
+ inline void stablizeNameTable(MapVector<StringRef, uint32_t> &NameTable,
+ std::set<StringRef> &V);
MapVector<StringRef, uint32_t> NameTable;
- std::unordered_set<std::string> BracketedContextStr;
- void addName(StringRef FName, bool IsContextName = false);
+ void addName(StringRef FName);
+ virtual void addContext(const SampleContext &Context);
void addNames(const FunctionSamples &S);
private:
@@ -168,6 +168,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
// DefaultLayout
SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0},
{SecNameTable, 0, 0, 0, 0},
+ {SecCSNameTable, 0, 0, 0, 0},
{SecFuncOffsetTable, 0, 0, 0, 0},
{SecLBRProfile, 0, 0, 0, 0},
{SecProfileSymbolList, 0, 0, 0, 0},
@@ -190,8 +191,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
public:
- virtual std::error_code
- write(const StringMap<FunctionSamples> &ProfileMap) override;
+ virtual std::error_code write(const SampleProfileMap &ProfileMap) override;
virtual void setToCompressAllSections() override;
void setToCompressSection(SecType Type);
@@ -246,29 +246,32 @@ protected:
addSecFlag(SectionHdrLayout[SectionIdx], Flag);
}
+ virtual void addContext(const SampleContext &Context) override;
+
// placeholder for subclasses to dispatch their own section writers.
virtual std::error_code writeCustomSection(SecType Type) = 0;
// Verify the SecLayout is supported by the format.
virtual void verifySecLayout(SectionLayout SL) = 0;
// specify the order to write sections.
- virtual std::error_code
- writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
+ virtual std::error_code writeSections(const SampleProfileMap &ProfileMap) = 0;
// Dispatch section writer for each section. \p LayoutIdx is the sequence
// number indicating where the section is located in SectionHdrLayout.
- virtual std::error_code
- writeOneSection(SecType Type, uint32_t LayoutIdx,
- const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code writeOneSection(SecType Type, uint32_t LayoutIdx,
+ const SampleProfileMap &ProfileMap);
// Helper function to write name table.
virtual std::error_code writeNameTable() override;
+ virtual std::error_code
+ writeContextIdx(const SampleContext &Context) override;
+ std::error_code writeCSNameIdx(const SampleContext &Context);
+ std::error_code writeCSNameTableSection();
- std::error_code writeFuncMetadata(const StringMap<FunctionSamples> &Profiles);
+ std::error_code writeFuncMetadata(const SampleProfileMap &Profiles);
// Functions to write various kinds of sections.
- std::error_code
- writeNameTableSection(const StringMap<FunctionSamples> &ProfileMap);
+ std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap);
std::error_code writeFuncOffsetTable();
std::error_code writeProfileSymbolListSection();
@@ -289,7 +292,7 @@ private:
void allocSecHdrTable();
std::error_code writeSecHdrTable();
virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeHeader(const SampleProfileMap &ProfileMap) override;
std::error_code compressAndOutput();
// We will swap the raw_ostream held by LocalBufStream and that
@@ -312,12 +315,16 @@ private:
// be read.
std::vector<SecHdrTableEntry> SecHdrTable;
- // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
- // section. It is used to load function profile on demand.
- MapVector<StringRef, uint64_t> FuncOffsetTable;
+ // FuncOffsetTable maps function context to its profile offset in
+ // SecLBRProfile section. It is used to load function profile on demand.
+ MapVector<SampleContext, uint64_t> FuncOffsetTable;
// Whether to use MD5 to represent string.
bool UseMD5 = false;
+ /// CSNameTable maps function context to its offset in SecCSNameTable section.
+ /// The offset will be used everywhere where the context is referenced.
+ MapVector<SampleContext, uint32_t> CSNameTable;
+
ProfileSymbolList *ProfSymList = nullptr;
};
@@ -327,13 +334,11 @@ public:
: SampleProfileWriterExtBinaryBase(OS) {}
private:
- std::error_code
- writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap);
- std::error_code
- writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap);
+ std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap);
+ std::error_code writeCtxSplitLayout(const SampleProfileMap &ProfileMap);
virtual std::error_code
- writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeSections(const SampleProfileMap &ProfileMap) override;
virtual std::error_code writeCustomSection(SecType Type) override {
return sampleprof_error::success;
@@ -380,8 +385,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
public:
virtual std::error_code writeSample(const FunctionSamples &S) override;
- virtual std::error_code
- write(const StringMap<FunctionSamples> &ProfileMap) override;
+ virtual std::error_code write(const SampleProfileMap &ProfileMap) override;
protected:
/// The table mapping from function name to the offset of its FunctionSample
@@ -392,7 +396,7 @@ protected:
uint64_t TableOffset;
virtual std::error_code writeNameTable() override;
virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeHeader(const SampleProfileMap &ProfileMap) override;
std::error_code writeFuncOffsetTable();
};
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index ae2fc673c54e..b3cfb71601f1 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -58,6 +58,24 @@ AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
+AARCH64_ARCH("armv9-a", ARMV9A, "9-A", "v9a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SVE2))
+AARCH64_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SVE2))
+AARCH64_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SVE2))
// For v8-R, we do not enable crypto and align with GCC that enables a more
// minimal set of optional architecture extensions.
AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r",
@@ -126,6 +144,11 @@ AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
+AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
+ AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -155,11 +178,20 @@ AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
+ AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16))
AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_LSE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
+ AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
AArch64::AEK_RCPC | AArch64::AEK_SSBS))
@@ -172,6 +204,10 @@ AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS |
AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS |
AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM))
+AARCH64_CPU_NAME("neoverse-512tvb", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
+ AArch64::AEK_DOTPROD ))
AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 14b169a6e111..fd08f3e6960c 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -122,6 +122,21 @@ ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
+ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD))
+ARM_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
+ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -296,6 +311,9 @@ ARM_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
ARM::AEK_FP16 | ARM::AEK_DOTPROD)
+ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_BF16 | ARM::AEK_SB |
+ ARM::AEK_I8MM))
ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index 245432debce6..9e8ce4e36197 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -277,7 +277,7 @@ public:
size_t TotalMemory = 0;
for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I));
- for (auto &PtrAndSize : CustomSizedSlabs)
+ for (const auto &PtrAndSize : CustomSizedSlabs)
TotalMemory += PtrAndSize.second;
return TotalMemory;
}
diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h
index 27ca825cef46..1a0d108300bc 100644
--- a/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/llvm/include/llvm/Support/AtomicOrdering.h
@@ -133,6 +133,16 @@ inline bool isReleaseOrStronger(AtomicOrdering AO) {
return isAtLeastOrStrongerThan(AO, AtomicOrdering::Release);
}
+/// Return a single atomic ordering that is at least as strong as both the \p AO
+/// and \p Other orderings for an atomic operation.
+inline AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO,
+ AtomicOrdering Other) {
+ if ((AO == AtomicOrdering::Acquire && Other == AtomicOrdering::Release) ||
+ (AO == AtomicOrdering::Release && Other == AtomicOrdering::Acquire))
+ return AtomicOrdering::AcquireRelease;
+ return isStrongerThan(AO, Other) ? AO : Other;
+}
+
inline AtomicOrderingCABI toCABI(AtomicOrdering AO) {
static const AtomicOrderingCABI lookup[8] = {
/* NotAtomic */ AtomicOrderingCABI::relaxed,
diff --git a/llvm/include/llvm/Support/BinaryByteStream.h b/llvm/include/llvm/Support/BinaryByteStream.h
index ca5bb5abecfc..7d8b6d2dc43d 100644
--- a/llvm/include/llvm/Support/BinaryByteStream.h
+++ b/llvm/include/llvm/Support/BinaryByteStream.h
@@ -38,7 +38,7 @@ public:
llvm::support::endianness getEndian() const override { return Endian; }
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForRead(Offset, Size))
return EC;
@@ -46,7 +46,7 @@ public:
return Error::success();
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForRead(Offset, 1))
return EC;
@@ -54,7 +54,7 @@ public:
return Error::success();
}
- uint32_t getLength() override { return Data.size(); }
+ uint64_t getLength() override { return Data.size(); }
ArrayRef<uint8_t> data() const { return Data; }
@@ -97,19 +97,19 @@ public:
return ImmutableStream.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return ImmutableStream.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return ImmutableStream.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return ImmutableStream.getLength(); }
+ uint64_t getLength() override { return ImmutableStream.getLength(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override {
if (Buffer.empty())
return Error::success();
@@ -145,7 +145,7 @@ public:
llvm::support::endianness getEndian() const override { return Endian; }
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForWrite(Offset, Buffer.size()))
return EC;
@@ -154,11 +154,11 @@ public:
return Error::success();
}
- void insert(uint32_t Offset, ArrayRef<uint8_t> Bytes) {
+ void insert(uint64_t Offset, ArrayRef<uint8_t> Bytes) {
Data.insert(Data.begin() + Offset, Bytes.begin(), Bytes.end());
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForWrite(Offset, 1))
return EC;
@@ -167,9 +167,9 @@ public:
return Error::success();
}
- uint32_t getLength() override { return Data.size(); }
+ uint64_t getLength() override { return Data.size(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override {
if (Buffer.empty())
return Error::success();
@@ -182,7 +182,7 @@ public:
if (Offset > getLength())
return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
- uint32_t RequiredSize = Offset + Buffer.size();
+ uint64_t RequiredSize = Offset + Buffer.size();
if (RequiredSize > Data.size())
Data.resize(RequiredSize);
@@ -240,19 +240,19 @@ public:
return Impl.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return Impl.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return Impl.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return Impl.getLength(); }
+ uint64_t getLength() override { return Impl.getLength(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) override {
return Impl.writeBytes(Offset, Data);
}
diff --git a/llvm/include/llvm/Support/BinaryItemStream.h b/llvm/include/llvm/Support/BinaryItemStream.h
index 4d27013ce368..eb512bf4721a 100644
--- a/llvm/include/llvm/Support/BinaryItemStream.h
+++ b/llvm/include/llvm/Support/BinaryItemStream.h
@@ -38,7 +38,7 @@ public:
llvm::support::endianness getEndian() const override { return Endian; }
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
auto ExpectedIndex = translateOffsetIndex(Offset);
if (!ExpectedIndex)
@@ -52,7 +52,7 @@ public:
return Error::success();
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
auto ExpectedIndex = translateOffsetIndex(Offset);
if (!ExpectedIndex)
@@ -66,7 +66,7 @@ public:
computeItemOffsets();
}
- uint32_t getLength() override {
+ uint64_t getLength() override {
return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back();
}
@@ -74,16 +74,16 @@ private:
void computeItemOffsets() {
ItemEndOffsets.clear();
ItemEndOffsets.reserve(Items.size());
- uint32_t CurrentOffset = 0;
+ uint64_t CurrentOffset = 0;
for (const auto &Item : Items) {
- uint32_t Len = Traits::length(Item);
+ uint64_t Len = Traits::length(Item);
assert(Len > 0 && "no empty items");
CurrentOffset += Len;
ItemEndOffsets.push_back(CurrentOffset);
}
}
- Expected<uint32_t> translateOffsetIndex(uint32_t Offset) {
+ Expected<uint32_t> translateOffsetIndex(uint64_t Offset) {
// Make sure the offset is somewhere in our items array.
if (Offset >= getLength())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
@@ -98,7 +98,7 @@ private:
ArrayRef<T> Items;
// Sorted vector of offsets to accelerate lookup.
- std::vector<uint32_t> ItemEndOffsets;
+ std::vector<uint64_t> ItemEndOffsets;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/BinaryStream.h b/llvm/include/llvm/Support/BinaryStream.h
index fcf4398550ee..e87129d8c201 100644
--- a/llvm/include/llvm/Support/BinaryStream.h
+++ b/llvm/include/llvm/Support/BinaryStream.h
@@ -41,22 +41,22 @@ public:
/// Given an offset into the stream and a number of bytes, attempt to
/// read the bytes and set the output ArrayRef to point to data owned by the
/// stream.
- virtual Error readBytes(uint32_t Offset, uint32_t Size,
+ virtual Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) = 0;
/// Given an offset into the stream, read as much as possible without
/// copying any data.
- virtual Error readLongestContiguousChunk(uint32_t Offset,
+ virtual Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) = 0;
/// Return the number of bytes of data in this stream.
- virtual uint32_t getLength() = 0;
+ virtual uint64_t getLength() = 0;
/// Return the properties of this stream.
virtual BinaryStreamFlags getFlags() const { return BSF_None; }
protected:
- Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) {
+ Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) {
if (Offset > getLength())
return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
if (getLength() < DataSize + Offset)
@@ -77,7 +77,7 @@ public:
/// Attempt to write the given bytes into the stream at the desired
/// offset. This will always necessitate a copy. Cannot shrink or grow the
/// stream, only writes into existing allocated space.
- virtual Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) = 0;
+ virtual Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) = 0;
/// For buffered streams, commits changes to the backing store.
virtual Error commit() = 0;
@@ -86,7 +86,7 @@ public:
BinaryStreamFlags getFlags() const override { return BSF_Write; }
protected:
- Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) {
+ Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) {
if (!(getFlags() & BSF_Append))
return checkOffsetForRead(Offset, DataSize);
diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h
index 148ab85169f2..85d29be26ca9 100644
--- a/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -153,7 +153,7 @@ private:
template <typename ValueType, typename Extractor>
class VarStreamArrayIterator
: public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
- std::forward_iterator_tag, ValueType> {
+ std::forward_iterator_tag, const ValueType> {
typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
typedef VarStreamArray<ValueType, Extractor> ArrayType;
@@ -197,11 +197,6 @@ public:
return ThisValue;
}
- ValueType &operator*() {
- assert(Array && !HasError);
- return ThisValue;
- }
-
IterType &operator+=(unsigned N) {
for (unsigned I = 0; I < N; ++I) {
// We are done with the current record, discard it so that we are
diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h
index 9ad98a89aaf9..29b4b09b848c 100644
--- a/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -251,16 +251,16 @@ public:
}
bool empty() const { return bytesRemaining() == 0; }
- void setOffset(uint32_t Off) { Offset = Off; }
- uint32_t getOffset() const { return Offset; }
- uint32_t getLength() const { return Stream.getLength(); }
- uint32_t bytesRemaining() const { return getLength() - getOffset(); }
+ void setOffset(uint64_t Off) { Offset = Off; }
+ uint64_t getOffset() const { return Offset; }
+ uint64_t getLength() const { return Stream.getLength(); }
+ uint64_t bytesRemaining() const { return getLength() - getOffset(); }
/// Advance the stream's offset by \p Amount bytes.
///
/// \returns a success error code if at least \p Amount bytes remain in the
/// stream, otherwise returns an appropriate error code.
- Error skip(uint32_t Amount);
+ Error skip(uint64_t Amount);
/// Examine the next byte of the underlying stream without advancing the
/// stream's offset. If the stream is empty the behavior is undefined.
@@ -271,11 +271,11 @@ public:
Error padToAlignment(uint32_t Align);
std::pair<BinaryStreamReader, BinaryStreamReader>
- split(uint32_t Offset) const;
+ split(uint64_t Offset) const;
private:
BinaryStreamRef Stream;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h
index ba4c3873586d..e0aaab82ffab 100644
--- a/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -30,12 +30,12 @@ protected:
Length = BorrowedImpl.getLength();
}
- BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint32_t Offset,
- Optional<uint32_t> Length)
+ BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint64_t Offset,
+ Optional<uint64_t> Length)
: SharedImpl(SharedImpl), BorrowedImpl(SharedImpl.get()),
ViewOffset(Offset), Length(Length) {}
- BinaryStreamRefBase(StreamType &BorrowedImpl, uint32_t Offset,
- Optional<uint32_t> Length)
+ BinaryStreamRefBase(StreamType &BorrowedImpl, uint64_t Offset,
+ Optional<uint64_t> Length)
: BorrowedImpl(&BorrowedImpl), ViewOffset(Offset), Length(Length) {}
BinaryStreamRefBase(const BinaryStreamRefBase &Other) = default;
BinaryStreamRefBase &operator=(const BinaryStreamRefBase &Other) = default;
@@ -48,7 +48,7 @@ public:
return BorrowedImpl->getEndian();
}
- uint32_t getLength() const {
+ uint64_t getLength() const {
if (Length.hasValue())
return *Length;
@@ -58,7 +58,7 @@ public:
/// Return a new BinaryStreamRef with the first \p N elements removed. If
/// this BinaryStreamRef is length-tracking, then the resulting one will be
/// too.
- RefType drop_front(uint32_t N) const {
+ RefType drop_front(uint64_t N) const {
if (!BorrowedImpl)
return RefType();
@@ -76,7 +76,7 @@ public:
/// Return a new BinaryStreamRef with the last \p N elements removed. If
/// this BinaryStreamRef is length-tracking and \p N is greater than 0, then
/// this BinaryStreamRef will no longer length-track.
- RefType drop_back(uint32_t N) const {
+ RefType drop_back(uint64_t N) const {
if (!BorrowedImpl)
return RefType();
@@ -96,26 +96,26 @@ public:
}
/// Return a new BinaryStreamRef with only the first \p N elements remaining.
- RefType keep_front(uint32_t N) const {
+ RefType keep_front(uint64_t N) const {
assert(N <= getLength());
return drop_back(getLength() - N);
}
/// Return a new BinaryStreamRef with only the last \p N elements remaining.
- RefType keep_back(uint32_t N) const {
+ RefType keep_back(uint64_t N) const {
assert(N <= getLength());
return drop_front(getLength() - N);
}
/// Return a new BinaryStreamRef with the first and last \p N elements
/// removed.
- RefType drop_symmetric(uint32_t N) const {
+ RefType drop_symmetric(uint64_t N) const {
return drop_front(N).drop_back(N);
}
/// Return a new BinaryStreamRef with the first \p Offset elements removed,
/// and retaining exactly \p Len elements.
- RefType slice(uint32_t Offset, uint32_t Len) const {
+ RefType slice(uint64_t Offset, uint64_t Len) const {
return drop_front(Offset).keep_front(Len);
}
@@ -132,7 +132,7 @@ public:
}
protected:
- Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) const {
+ Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) const {
if (Offset > getLength())
return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
if (getLength() < DataSize + Offset)
@@ -142,8 +142,8 @@ protected:
std::shared_ptr<StreamType> SharedImpl;
StreamType *BorrowedImpl = nullptr;
- uint32_t ViewOffset = 0;
- Optional<uint32_t> Length;
+ uint64_t ViewOffset = 0;
+ Optional<uint64_t> Length;
};
/// BinaryStreamRef is to BinaryStream what ArrayRef is to an Array. It
@@ -157,15 +157,15 @@ class BinaryStreamRef
: public BinaryStreamRefBase<BinaryStreamRef, BinaryStream> {
friend BinaryStreamRefBase<BinaryStreamRef, BinaryStream>;
friend class WritableBinaryStreamRef;
- BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint32_t ViewOffset,
- Optional<uint32_t> Length)
+ BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint64_t ViewOffset,
+ Optional<uint64_t> Length)
: BinaryStreamRefBase(Impl, ViewOffset, Length) {}
public:
BinaryStreamRef() = default;
BinaryStreamRef(BinaryStream &Stream);
- BinaryStreamRef(BinaryStream &Stream, uint32_t Offset,
- Optional<uint32_t> Length);
+ BinaryStreamRef(BinaryStream &Stream, uint64_t Offset,
+ Optional<uint64_t> Length);
explicit BinaryStreamRef(ArrayRef<uint8_t> Data,
llvm::support::endianness Endian);
explicit BinaryStreamRef(StringRef Data, llvm::support::endianness Endian);
@@ -176,8 +176,8 @@ public:
BinaryStreamRef &operator=(BinaryStreamRef &&Other) = default;
// Use BinaryStreamRef.slice() instead.
- BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset,
- uint32_t Length) = delete;
+ BinaryStreamRef(BinaryStreamRef &S, uint64_t Offset,
+ uint64_t Length) = delete;
/// Given an Offset into this StreamRef and a Size, return a reference to a
/// buffer owned by the stream.
@@ -185,7 +185,7 @@ public:
/// \returns a success error code if the entire range of data is within the
/// bounds of this BinaryStreamRef's view and the implementation could read
/// the data, and an appropriate error code otherwise.
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) const;
/// Given an Offset into this BinaryStreamRef, return a reference to the
@@ -193,29 +193,28 @@ public:
///
/// \returns a success error code if implementation could read the data,
/// and an appropriate error code otherwise.
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) const;
};
struct BinarySubstreamRef {
- uint32_t Offset = 0; // Offset in the parent stream
+ uint64_t Offset = 0; // Offset in the parent stream
BinaryStreamRef StreamData; // Stream Data
- BinarySubstreamRef slice(uint32_t Off, uint32_t Size) const {
+ BinarySubstreamRef slice(uint64_t Off, uint64_t Size) const {
BinaryStreamRef SubSub = StreamData.slice(Off, Size);
return {Off + Offset, SubSub};
}
- BinarySubstreamRef drop_front(uint32_t N) const {
+ BinarySubstreamRef drop_front(uint64_t N) const {
return slice(N, size() - N);
}
- BinarySubstreamRef keep_front(uint32_t N) const { return slice(0, N); }
+ BinarySubstreamRef keep_front(uint64_t N) const { return slice(0, N); }
- std::pair<BinarySubstreamRef, BinarySubstreamRef>
- split(uint32_t Off) const {
+ std::pair<BinarySubstreamRef, BinarySubstreamRef> split(uint64_t Off) const {
return std::make_pair(keep_front(Off), drop_front(Off));
}
- uint32_t size() const { return StreamData.getLength(); }
+ uint64_t size() const { return StreamData.getLength(); }
bool empty() const { return size() == 0; }
};
@@ -224,10 +223,10 @@ class WritableBinaryStreamRef
WritableBinaryStream> {
friend BinaryStreamRefBase<WritableBinaryStreamRef, WritableBinaryStream>;
WritableBinaryStreamRef(std::shared_ptr<WritableBinaryStream> Impl,
- uint32_t ViewOffset, Optional<uint32_t> Length)
+ uint64_t ViewOffset, Optional<uint64_t> Length)
: BinaryStreamRefBase(Impl, ViewOffset, Length) {}
- Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) const {
+ Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) const {
if (!(BorrowedImpl->getFlags() & BSF_Append))
return checkOffsetForRead(Offset, DataSize);
@@ -239,8 +238,8 @@ class WritableBinaryStreamRef
public:
WritableBinaryStreamRef() = default;
WritableBinaryStreamRef(WritableBinaryStream &Stream);
- WritableBinaryStreamRef(WritableBinaryStream &Stream, uint32_t Offset,
- Optional<uint32_t> Length);
+ WritableBinaryStreamRef(WritableBinaryStream &Stream, uint64_t Offset,
+ Optional<uint64_t> Length);
explicit WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
llvm::support::endianness Endian);
WritableBinaryStreamRef(const WritableBinaryStreamRef &Other) = default;
@@ -251,8 +250,8 @@ public:
WritableBinaryStreamRef &operator=(WritableBinaryStreamRef &&Other) = default;
// Use WritableBinaryStreamRef.slice() instead.
- WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint32_t Offset,
- uint32_t Length) = delete;
+ WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint64_t Offset,
+ uint64_t Length) = delete;
/// Given an Offset into this WritableBinaryStreamRef and some input data,
/// writes the data to the underlying stream.
@@ -260,7 +259,7 @@ public:
/// \returns a success error code if the data could fit within the underlying
/// stream at the specified location and the implementation could write the
/// data, and an appropriate error code otherwise.
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) const;
/// Conver this WritableBinaryStreamRef to a read-only BinaryStreamRef.
operator BinaryStreamRef() const;
diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h
index ceba792e6b26..3054f4ac7ef0 100644
--- a/llvm/include/llvm/Support/BinaryStreamWriter.h
+++ b/llvm/include/llvm/Support/BinaryStreamWriter.h
@@ -124,7 +124,7 @@ public:
///
/// \returns a success error code if the data was successfully written,
/// otherwise returns an appropriate error code.
- Error writeStreamRef(BinaryStreamRef Ref, uint32_t Size);
+ Error writeStreamRef(BinaryStreamRef Ref, uint64_t Size);
/// Writes the object \p Obj to the underlying stream, as if by using memcpy.
/// It is up to the caller to ensure that type of \p Obj can be safely copied
@@ -178,17 +178,17 @@ public:
}
/// Splits the Writer into two Writers at a given offset.
- std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint32_t Off) const;
+ std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint64_t Off) const;
- void setOffset(uint32_t Off) { Offset = Off; }
- uint32_t getOffset() const { return Offset; }
- uint32_t getLength() const { return Stream.getLength(); }
- uint32_t bytesRemaining() const { return getLength() - getOffset(); }
+ void setOffset(uint64_t Off) { Offset = Off; }
+ uint64_t getOffset() const { return Offset; }
+ uint64_t getLength() const { return Stream.getLength(); }
+ uint64_t bytesRemaining() const { return getLength() - getOffset(); }
Error padToAlignment(uint32_t Align);
protected:
WritableBinaryStreamRef Stream;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
new file mode 100644
index 000000000000..1e5fea17f708
--- /dev/null
+++ b/llvm/include/llvm/Support/Caching.h
@@ -0,0 +1,71 @@
+//===- Caching.h - LLVM Local File Cache ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CachedFileStream and the localCache function, which
+// simplifies caching files on the local filesystem in a directory whose
+// contents are managed by a CachePruningPolicy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CACHING_H
+#define LLVM_SUPPORT_CACHING_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+/// This class wraps an output stream for a file. Most clients should just be
+/// able to return an instance of this base class from the stream callback, but
+/// if a client needs to perform some action after the stream is written to,
+/// that can be done by deriving from this class and overriding the destructor.
+class CachedFileStream {
+public:
+ CachedFileStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
+ std::unique_ptr<raw_pwrite_stream> OS;
+ virtual ~CachedFileStream() = default;
+};
+
+/// This type defines the callback to add a file that is generated on the fly.
+///
+/// Stream callbacks must be thread safe.
+using AddStreamFn =
+ std::function<Expected<std::unique_ptr<CachedFileStream>>(unsigned Task)>;
+
+/// This is the type of a file cache. To request an item from the cache, pass a
+/// unique string as the Key. For hits, the cached file will be added to the
+/// link and this function will return AddStreamFn(). For misses, the cache will
+/// return a stream callback which must be called at most once to produce
+/// content for the stream. The file stream produced by the stream callback will
+/// add the file to the link after the stream is written to.
+///
+/// Clients generally look like this:
+///
+/// if (AddStreamFn AddStream = Cache(Task, Key))
+/// ProduceContent(AddStream);
+using FileCache =
+ std::function<Expected<AddStreamFn>(unsigned Task, StringRef Key)>;
+
+/// This type defines the callback to add a pre-existing file (e.g. in a cache).
+///
+/// Buffer callbacks must be thread safe.
+using AddBufferFn =
+ std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
+
+/// Create a local file system cache which uses the given cache name, temporary
+/// file prefix, cache directory and file callback. This function also creates
+/// the cache directory if it does not already exist. The cache name appears in
+/// error messages for errors during caching. The temporary file prefix is used
+/// in the temporary file naming scheme used when writing files atomically.
+Expected<FileCache> localCache(Twine CacheNameRef, Twine TempFilePrefixRef,
+ Twine CacheDirectoryPathRef,
+ AddBufferFn AddBuffer);
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 14d7e21f78b2..2ee02010ff1d 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -316,9 +316,7 @@ public:
}
bool isInAllSubCommands() const {
- return any_of(Subs, [](const SubCommand *SC) {
- return SC == &*AllSubCommands;
- });
+ return llvm::is_contained(Subs, &*AllSubCommands);
}
//-------------------------------------------------------------------------===
@@ -926,6 +924,9 @@ public:
//--------------------------------------------------
// parser<bool>
//
+
+extern template class basic_parser<bool>;
+
template <> class parser<bool> : public basic_parser<bool> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -949,10 +950,11 @@ public:
void anchor() override;
};
-extern template class basic_parser<bool>;
-
//--------------------------------------------------
// parser<boolOrDefault>
+
+extern template class basic_parser<boolOrDefault>;
+
template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -974,11 +976,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<boolOrDefault>;
-
//--------------------------------------------------
// parser<int>
//
+
+extern template class basic_parser<int>;
+
template <> class parser<int> : public basic_parser<int> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -996,11 +999,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<int>;
-
//--------------------------------------------------
// parser<long>
//
+
+extern template class basic_parser<long>;
+
template <> class parser<long> final : public basic_parser<long> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1018,11 +1022,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<long>;
-
//--------------------------------------------------
// parser<long long>
//
+
+extern template class basic_parser<long long>;
+
template <> class parser<long long> : public basic_parser<long long> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1040,11 +1045,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<long long>;
-
//--------------------------------------------------
// parser<unsigned>
//
+
+extern template class basic_parser<unsigned>;
+
template <> class parser<unsigned> : public basic_parser<unsigned> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1062,11 +1068,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<unsigned>;
-
//--------------------------------------------------
// parser<unsigned long>
//
+
+extern template class basic_parser<unsigned long>;
+
template <>
class parser<unsigned long> final : public basic_parser<unsigned long> {
public:
@@ -1085,11 +1092,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<unsigned long>;
-
//--------------------------------------------------
// parser<unsigned long long>
//
+
+extern template class basic_parser<unsigned long long>;
+
template <>
class parser<unsigned long long> : public basic_parser<unsigned long long> {
public:
@@ -1109,11 +1117,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<unsigned long long>;
-
//--------------------------------------------------
// parser<double>
//
+
+extern template class basic_parser<double>;
+
template <> class parser<double> : public basic_parser<double> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1131,11 +1140,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<double>;
-
//--------------------------------------------------
// parser<float>
//
+
+extern template class basic_parser<float>;
+
template <> class parser<float> : public basic_parser<float> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1153,11 +1163,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<float>;
-
//--------------------------------------------------
// parser<std::string>
//
+
+extern template class basic_parser<std::string>;
+
template <> class parser<std::string> : public basic_parser<std::string> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1178,11 +1189,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<std::string>;
-
//--------------------------------------------------
// parser<char>
//
+
+extern template class basic_parser<char>;
+
template <> class parser<char> : public basic_parser<char> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1203,8 +1215,6 @@ public:
void anchor() override;
};
-extern template class basic_parser<char>;
-
//--------------------------------------------------
// PrintOptionDiff
//
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index 57052b596edb..c5318137ed3d 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -97,7 +97,7 @@
/// Sadly, this is separate from just rvalue reference support because GCC
/// and MSVC implemented this later than everything else. This appears to be
/// corrected in MSVC 2019 but not MSVC 2017.
-#if __has_feature(cxx_rvalue_references) || LLVM_GNUC_PREREQ(4, 8, 1) || \
+#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) || \
LLVM_MSC_PREREQ(1920)
#define LLVM_HAS_RVALUE_REFERENCE_THIS 1
#else
@@ -123,8 +123,8 @@
/// LLVM_EXTERNAL_VISIBILITY - classes, functions, and variables marked with
/// this attribute will be made public and visible outside of any shared library
/// they are linked in to.
-#if (__has_attribute(visibility) || LLVM_GNUC_PREREQ(4, 0, 0)) && \
- !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32)
+#if __has_attribute(visibility) && !defined(__MINGW32__) && \
+ !defined(__CYGWIN__) && !defined(_WIN32)
#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
#define LLVM_EXTERNAL_VISIBILITY __attribute__ ((visibility("default")))
#else
@@ -138,7 +138,7 @@
#define LLVM_PREFETCH(addr, rw, locality)
#endif
-#if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0)
+#if __has_attribute(used)
#define LLVM_ATTRIBUTE_USED __attribute__((__used__))
#else
#define LLVM_ATTRIBUTE_USED
@@ -182,15 +182,15 @@
// more portable solution:
// (void)unused_var_name;
// Prefer cast-to-void wherever it is sufficient.
-#if __has_attribute(unused) || LLVM_GNUC_PREREQ(3, 1, 0)
+#if __has_attribute(unused)
#define LLVM_ATTRIBUTE_UNUSED __attribute__((__unused__))
#else
#define LLVM_ATTRIBUTE_UNUSED
#endif
// FIXME: Provide this for PE/COFF targets.
-#if (__has_attribute(weak) || LLVM_GNUC_PREREQ(4, 0, 0)) && \
- (!defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32))
+#if __has_attribute(weak) && !defined(__MINGW32__) && !defined(__CYGWIN__) && \
+ !defined(_WIN32)
#define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__))
#else
#define LLVM_ATTRIBUTE_WEAK
@@ -212,7 +212,13 @@
#define LLVM_READONLY
#endif
-#if __has_builtin(__builtin_expect) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_attribute(minsize)
+#define LLVM_ATTRIBUTE_MINSIZE __attribute__((minsize))
+#else
+#define LLVM_ATTRIBUTE_MINSIZE
+#endif
+
+#if __has_builtin(__builtin_expect) || defined(__GNUC__)
#define LLVM_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
#define LLVM_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
#else
@@ -222,7 +228,7 @@
/// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
/// mark a method "not for inlining".
-#if __has_attribute(noinline) || LLVM_GNUC_PREREQ(3, 4, 0)
+#if __has_attribute(noinline)
#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
@@ -231,10 +237,8 @@
#endif
/// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
-/// so, mark a method "always inline" because it is performance sensitive. GCC
-/// 3.4 supported this but is buggy in various cases and produces unimplemented
-/// errors, just use it in GCC 4.0 and later.
-#if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0)
+/// so, mark a method "always inline" because it is performance sensitive.
+#if __has_attribute(always_inline)
#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
@@ -242,15 +246,16 @@
#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline
#endif
-#ifdef __GNUC__
-#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
-#elif defined(_MSC_VER)
-#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn)
+/// LLVM_ATTRIBUTE_NO_DEBUG - On compilers where we have a directive to do
+/// so, mark a method "no debug" because debug info makes the debugger
+/// experience worse.
+#if __has_attribute(nodebug)
+#define LLVM_ATTRIBUTE_NODEBUG __attribute__((nodebug))
#else
-#define LLVM_ATTRIBUTE_NORETURN
+#define LLVM_ATTRIBUTE_NODEBUG
#endif
-#if __has_attribute(returns_nonnull) || LLVM_GNUC_PREREQ(4, 9, 0)
+#if __has_attribute(returns_nonnull)
#define LLVM_ATTRIBUTE_RETURNS_NONNULL __attribute__((returns_nonnull))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_RETURNS_NONNULL _Ret_notnull_
@@ -322,15 +327,17 @@
/// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
/// to an expression which states that it is undefined behavior for the
/// compiler to reach this point. Otherwise is not defined.
-#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0)
+#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)
# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
#elif defined(_MSC_VER)
# define LLVM_BUILTIN_UNREACHABLE __assume(false)
+#else
+# define LLVM_BUILTIN_UNREACHABLE
#endif
/// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
/// which causes the program to exit abnormally.
-#if __has_builtin(__builtin_trap) || LLVM_GNUC_PREREQ(4, 3, 0)
+#if __has_builtin(__builtin_trap) || defined(__GNUC__)
# define LLVM_BUILTIN_TRAP __builtin_trap()
#elif defined(_MSC_VER)
// The __debugbreak intrinsic is supported by MSVC, does not require forward
@@ -361,7 +368,7 @@
/// \macro LLVM_ASSUME_ALIGNED
/// Returns a pointer with an assumed alignment.
-#if __has_builtin(__builtin_assume_aligned) || LLVM_GNUC_PREREQ(4, 7, 0)
+#if __has_builtin(__builtin_assume_aligned) || defined(__GNUC__)
# define LLVM_ASSUME_ALIGNED(p, a) __builtin_assume_aligned(p, a)
#elif defined(LLVM_BUILTIN_UNREACHABLE)
# define LLVM_ASSUME_ALIGNED(p, a) \
@@ -549,4 +556,13 @@ void AnnotateIgnoreWritesEnd(const char *file, int line);
#define LLVM_ENABLE_EXCEPTIONS 1
#endif
+/// \macro LLVM_NO_PROFILE_INSTRUMENT_FUNCTION
+/// Disable the profile instrument for a function.
+#if __has_attribute(no_profile_instrument_function)
+#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION \
+ __attribute__((no_profile_instrument_function))
+#else
+#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION
+#endif
+
#endif
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 498690655fd1..2604ccb38431 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -99,8 +99,7 @@ public:
/// Explicitly trigger a crash recovery in the current process, and
/// return failure from RunSafely(). This function does not return.
- LLVM_ATTRIBUTE_NORETURN
- void HandleExit(int RetCode);
+ [[noreturn]] void HandleExit(int RetCode);
/// Throw again a signal or an exception, after it was catched once by a
/// CrashRecoveryContext.
diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h
index a73538fa1462..ffa9abe328c8 100644
--- a/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -65,6 +65,11 @@ public:
return false;
}
+ // renderNodesUsingHTML - If the function returns true, nodes will be
+ // rendered using HTML-like labels which allows colors, etc in the nodes
+ // and the edge source labels.
+ static bool renderNodesUsingHTML() { return false; }
+
/// getNodeLabel - Given a node and a pointer to the top level graph, return
/// the label to print in the node.
template<typename GraphType>
diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index f9335c161563..f4f5905d4bcc 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -70,6 +70,9 @@ public:
/// the position of the Cursor before the first error was encountered.
uint64_t tell() const { return Offset; }
+ /// Set the cursor to the new offset. This does not impact the error state.
+ void seek(uint64_t NewOffSet) { Offset = NewOffSet; }
+
/// Return error contained inside this Cursor, if any. Clears the internal
/// Cursor state.
Error takeError() { return std::move(Err); }
diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h
index 64b730951bda..2ff978476c79 100644
--- a/llvm/include/llvm/Support/Debug.h
+++ b/llvm/include/llvm/Support/Debug.h
@@ -78,27 +78,6 @@ void setCurrentDebugTypes(const char **Types, unsigned Count);
///
extern bool DebugFlag;
-/// \name Verification flags.
-///
-/// These flags turns on/off that are expensive and are turned off by default,
-/// unless macro EXPENSIVE_CHECKS is defined. The flags allow selectively
-/// turning the checks on without need to recompile.
-/// \{
-
-/// Enables verification of dominator trees.
-///
-extern bool VerifyDomInfo;
-
-/// Enables verification of loop info.
-///
-extern bool VerifyLoopInfo;
-
-/// Enables verification of MemorySSA.
-///
-extern bool VerifyMemorySSA;
-
-///\}
-
/// EnableDebugBuffering - This defaults to false. If true, the debug
/// stream will install signal handlers to dump any buffered debug
/// output. It allows clients to selectively allow the debug stream
diff --git a/llvm/include/llvm/Support/DivisionByConstantInfo.h b/llvm/include/llvm/Support/DivisionByConstantInfo.h
new file mode 100644
index 000000000000..5bb326178c3e
--- /dev/null
+++ b/llvm/include/llvm/Support/DivisionByConstantInfo.h
@@ -0,0 +1,38 @@
+//== llvm/Support/DivisonByConstantInfo.h - division by constant -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements support for optimizing divisions by a constant
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H
+#define LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H
+
+#include "llvm/ADT/APInt.h"
+
+namespace llvm {
+
+/// Magic data for optimising signed division by a constant.
+struct SignedDivisionByConstantInfo {
+ static SignedDivisionByConstantInfo get(const APInt &D);
+ APInt Magic; ///< magic number
+ unsigned ShiftAmount; ///< shift amount
+};
+
+/// Magic data for optimising unsigned division by a constant.
+struct UnsignedDivisonByConstantInfo {
+ static UnsignedDivisonByConstantInfo get(const APInt &D,
+ unsigned LeadingZeros = 0);
+ APInt Magic; ///< magic number
+ bool IsAdd; ///< add indicator
+ unsigned ShiftAmount; ///< shift amount
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index e8f340e452ef..e2002b89ada2 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -257,8 +257,7 @@ private:
// of debug prints can cause the function to be too large for inlining. So
// it's important that we define this function out of line so that it can't be
// inlined.
- LLVM_ATTRIBUTE_NORETURN
- void fatalUncheckedError() const;
+ [[noreturn]] void fatalUncheckedError() const;
#endif
void assertIsChecked() {
@@ -314,7 +313,7 @@ private:
}
friend raw_ostream &operator<<(raw_ostream &OS, const Error &E) {
- if (auto P = E.getPtr())
+ if (auto *P = E.getPtr())
P->log(OS);
else
OS << "success";
@@ -374,7 +373,7 @@ class ErrorList final : public ErrorInfo<ErrorList> {
public:
void log(raw_ostream &OS) const override {
OS << "Multiple errors:\n";
- for (auto &ErrPayload : Payloads) {
+ for (const auto &ErrPayload : Payloads) {
ErrPayload->log(OS);
OS << "\n";
}
@@ -578,6 +577,16 @@ public:
return const_cast<Expected<T> *>(this)->get();
}
+ /// Returns \a takeError() after moving the held T (if any) into \p V.
+ template <class OtherT>
+ Error moveInto(OtherT &Value,
+ std::enable_if_t<std::is_assignable<OtherT &, T &&>::value> * =
+ nullptr) && {
+ if (*this)
+ Value = std::move(get());
+ return takeError();
+ }
+
/// Check that this Expected<T> is an error of type ErrT.
template <typename ErrT> bool errorIsA() const {
return HasError && (*getErrorStorage())->template isA<ErrT>();
@@ -688,9 +697,7 @@ private:
}
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- LLVM_ATTRIBUTE_NORETURN
- LLVM_ATTRIBUTE_NOINLINE
- void fatalUncheckedExpected() const {
+ [[noreturn]] LLVM_ATTRIBUTE_NOINLINE void fatalUncheckedExpected() const {
dbgs() << "Expected<T> must be checked before access or destruction.\n";
if (HasError) {
dbgs() << "Unchecked Expected<T> contained error:\n";
@@ -722,8 +729,7 @@ private:
/// Report a serious error, calling any installed error handler. See
/// ErrorHandling.h.
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err,
- bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(Error Err, bool gen_crash_diag = true);
/// Report a fatal error if Err is a failure value.
///
@@ -1159,7 +1165,7 @@ protected:
/// It should only be used in this situation, and should never be used where a
/// sensible conversion to std::error_code is available, as attempts to convert
/// to/from this error will result in a fatal error. (i.e. it is a programmatic
-///error to try to convert such a value).
+/// error to try to convert such a value).
std::error_code inconvertibleErrorCode();
/// Helper for converting an std::error_code to a Error.
@@ -1263,13 +1269,20 @@ class FileError final : public ErrorInfo<FileError> {
public:
void log(raw_ostream &OS) const override {
- assert(Err && !FileName.empty() && "Trying to log after takeError().");
+ assert(Err && "Trying to log after takeError().");
OS << "'" << FileName << "': ";
if (Line.hasValue())
OS << "line " << Line.getValue() << ": ";
Err->log(OS);
}
+ std::string messageWithoutFileInfo() const {
+ std::string Msg;
+ raw_string_ostream OS(Msg);
+ Err->log(OS);
+ return OS.str();
+ }
+
StringRef getFileName() { return FileName; }
Error takeError() { return Error(std::move(Err)); }
@@ -1283,8 +1296,6 @@ private:
FileError(const Twine &F, Optional<size_t> LineNum,
std::unique_ptr<ErrorInfoBase> E) {
assert(E && "Cannot create FileError from Error success value.");
- assert(!F.isTriviallyEmpty() &&
- "The file name provided to FileError must not be empty.");
FileName = F.str();
Err = std::move(E);
Line = std::move(LineNum);
diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h
index 0ec0242d569d..f980510d37f0 100644
--- a/llvm/include/llvm/Support/ErrorHandling.h
+++ b/llvm/include/llvm/Support/ErrorHandling.h
@@ -15,15 +15,14 @@
#define LLVM_SUPPORT_ERRORHANDLING_H
#include "llvm/Support/Compiler.h"
-#include <string>
namespace llvm {
-class StringRef;
+ class StringRef;
class Twine;
/// An error handler callback.
typedef void (*fatal_error_handler_t)(void *user_data,
- const std::string& reason,
+ const char *reason,
bool gen_crash_diag);
/// install_fatal_error_handler - Installs a new error handler to be used
@@ -68,14 +67,13 @@ class StringRef;
/// standard error, followed by a newline.
/// After the error handler is called this function will call abort(), it
/// does not return.
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
- bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
- bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
- bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
- bool gen_crash_diag = true);
+/// NOTE: The std::string variant was removed to avoid a <string> dependency.
+[[noreturn]] void report_fatal_error(const char *reason,
+ bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(StringRef reason,
+ bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(const Twine &reason,
+ bool gen_crash_diag = true);
/// Installs a new bad alloc error handler that should be used whenever a
/// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM.
@@ -113,13 +111,13 @@ void install_out_of_memory_new_handler();
/// If no error handler is installed (default), throws a bad_alloc exception
/// if LLVM is compiled with exception support. Otherwise prints the error
/// to standard error and calls abort().
-LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason,
- bool GenCrashDiag = true);
+[[noreturn]] void report_bad_alloc_error(const char *Reason,
+ bool GenCrashDiag = true);
/// This function calls abort(), and prints the optional message to stderr.
/// Use the llvm_unreachable macro (that adds location info), instead of
/// calling this function directly.
-LLVM_ATTRIBUTE_NORETURN void
+[[noreturn]] void
llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr,
unsigned line = 0);
}
diff --git a/llvm/include/llvm/Support/ExtensibleRTTI.h b/llvm/include/llvm/Support/ExtensibleRTTI.h
index 6b8510ce759f..21055247e932 100644
--- a/llvm/include/llvm/Support/ExtensibleRTTI.h
+++ b/llvm/include/llvm/Support/ExtensibleRTTI.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/ExtensibleRTTI.h - ExtensibleRTTI support --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h
index 38779ef4a3af..1a049533b82b 100644
--- a/llvm/include/llvm/Support/FileSystem.h
+++ b/llvm/include/llvm/Support/FileSystem.h
@@ -772,7 +772,8 @@ enum OpenFlags : unsigned {
/// The file should be opened in append mode.
OF_Append = 4,
- /// Delete the file on close. Only makes a difference on windows.
+ /// The returned handle can be used for deleting the file. Only makes a
+ /// difference on windows.
OF_Delete = 8,
/// When a child process is launched, this file should remain open in the
@@ -865,6 +866,11 @@ public:
// The open file descriptor.
int FD = -1;
+#ifdef _WIN32
+ // Whether we need to manually remove the file on close.
+ bool RemoveOnClose = false;
+#endif
+
// Keep this with the given name.
Error keep(const Twine &Name);
diff --git a/llvm/include/llvm/Support/FileSystem/UniqueID.h b/llvm/include/llvm/Support/FileSystem/UniqueID.h
index 229410c8292e..0d5367236e8d 100644
--- a/llvm/include/llvm/Support/FileSystem/UniqueID.h
+++ b/llvm/include/llvm/Support/FileSystem/UniqueID.h
@@ -14,7 +14,10 @@
#ifndef LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
#define LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
#include <cstdint>
+#include <utility>
namespace llvm {
namespace sys {
@@ -47,6 +50,30 @@ public:
} // end namespace fs
} // end namespace sys
+
+// Support UniqueIDs as DenseMap keys.
+template <> struct DenseMapInfo<llvm::sys::fs::UniqueID> {
+ static inline llvm::sys::fs::UniqueID getEmptyKey() {
+ auto EmptyKey = DenseMapInfo<std::pair<uint64_t, uint64_t>>::getEmptyKey();
+ return {EmptyKey.first, EmptyKey.second};
+ }
+
+ static inline llvm::sys::fs::UniqueID getTombstoneKey() {
+ auto TombstoneKey =
+ DenseMapInfo<std::pair<uint64_t, uint64_t>>::getTombstoneKey();
+ return {TombstoneKey.first, TombstoneKey.second};
+ }
+
+ static hash_code getHashValue(const llvm::sys::fs::UniqueID &Tag) {
+ return hash_value(std::make_pair(Tag.getDevice(), Tag.getFile()));
+ }
+
+ static bool isEqual(const llvm::sys::fs::UniqueID &LHS,
+ const llvm::sys::fs::UniqueID &RHS) {
+ return LHS == RHS;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h
index 094b054f773f..89575f01b717 100644
--- a/llvm/include/llvm/Support/FormatVariadic.h
+++ b/llvm/include/llvm/Support/FormatVariadic.h
@@ -94,7 +94,7 @@ public:
continue;
}
- auto W = Adapters[R.Index];
+ auto *W = Adapters[R.Index];
FmtAlign Align(*W, R.Where, R.Align, R.Pad);
Align.format(S, R.Options);
diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index d306ebe99bc1..e504a0eddeba 100644
--- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -78,7 +78,7 @@ struct SemiNCAInfo {
using UpdateT = typename DomTreeT::UpdateType;
using UpdateKind = typename DomTreeT::UpdateKind;
struct BatchUpdateInfo {
- // Note: Updates inside PreViewCFG are aleady legalized.
+ // Note: Updates inside PreViewCFG are already legalized.
BatchUpdateInfo(GraphDiffT &PreViewCFG, GraphDiffT *PostViewCFG = nullptr)
: PreViewCFG(PreViewCFG), PostViewCFG(PostViewCFG),
NumLegalized(PreViewCFG.getNumLegalizedUpdates()) {}
@@ -430,7 +430,6 @@ struct SemiNCAInfo {
// is unreachable. This is because we are still going to only visit each
// unreachable node once, we may just visit it in two directions,
// depending on how lucky we get.
- SmallPtrSet<NodePtr, 4> ConnectToExitBlock;
for (const NodePtr I : nodes(DT.Parent)) {
if (SNCA.NodeToInfo.count(I) == 0) {
LLVM_DEBUG(dbgs()
@@ -457,7 +456,6 @@ struct SemiNCAInfo {
LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node "
<< "(non-trivial root): "
<< BlockNamePrinter(FurthestAway) << "\n");
- ConnectToExitBlock.insert(FurthestAway);
Roots.push_back(FurthestAway);
LLVM_DEBUG(dbgs() << "\t\t\tPrev DFSNum: " << Num << ", new DFSNum: "
<< NewNum << "\n\t\t\tRemoving DFS info\n");
diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h
index b886bf45f474..11a31bf40160 100644
--- a/llvm/include/llvm/Support/GraphWriter.h
+++ b/llvm/include/llvm/Support/GraphWriter.h
@@ -66,6 +66,7 @@ template<typename GraphType>
class GraphWriter {
raw_ostream &O;
const GraphType &G;
+ bool RenderUsingHTML = false;
using DOTTraits = DOTGraphTraits<GraphType>;
using GTraits = GraphTraits<GraphType>;
@@ -86,6 +87,9 @@ class GraphWriter {
child_iterator EE = GTraits::child_end(Node);
bool hasEdgeSourceLabels = false;
+ if (RenderUsingHTML)
+ O << "</tr><tr>";
+
for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) {
std::string label = DTraits.getEdgeSourceLabel(Node, EI);
@@ -94,14 +98,22 @@ class GraphWriter {
hasEdgeSourceLabels = true;
- if (i)
- O << "|";
+ if (RenderUsingHTML)
+ O << "<td colspan=\"1\" port=\"s" << i << "\">" << label << "</td>";
+ else {
+ if (i)
+ O << "|";
- O << "<s" << i << ">" << DOT::EscapeString(label);
+ O << "<s" << i << ">" << DOT::EscapeString(label);
+ }
}
- if (EI != EE && hasEdgeSourceLabels)
- O << "|<s64>truncated...";
+ if (EI != EE && hasEdgeSourceLabels) {
+ if (RenderUsingHTML)
+ O << "<td colspan=\"1\" port=\"s64\">truncated...</td>";
+ else
+ O << "|<s64>truncated...";
+ }
return hasEdgeSourceLabels;
}
@@ -109,6 +121,7 @@ class GraphWriter {
public:
GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) {
DTraits = DOTTraits(SN);
+ RenderUsingHTML = DTraits.renderNodesUsingHTML();
}
void writeGraph(const std::string &Title = "") {
@@ -163,12 +176,39 @@ public:
void writeNode(NodeRef Node) {
std::string NodeAttributes = DTraits.getNodeAttributes(Node, G);
- O << "\tNode" << static_cast<const void*>(Node) << " [shape=record,";
+ O << "\tNode" << static_cast<const void *>(Node) << " [shape=";
+ if (RenderUsingHTML)
+ O << "none,";
+ else
+ O << "record,";
+
if (!NodeAttributes.empty()) O << NodeAttributes << ",";
- O << "label=\"{";
+ O << "label=";
+
+ if (RenderUsingHTML) {
+ // Count the numbewr of edges out of the node to determine how
+ // many columns to span (max 64)
+ unsigned ColSpan = 0;
+ child_iterator EI = GTraits::child_begin(Node);
+ child_iterator EE = GTraits::child_end(Node);
+ for (; EI != EE && ColSpan != 64; ++EI, ++ColSpan)
+ ;
+ if (ColSpan == 0)
+ ColSpan = 1;
+ // Include truncated messages when counting.
+ if (EI != EE)
+ ++ColSpan;
+ O << "<<table border=\"0\" cellborder=\"1\" cellspacing=\"0\""
+ << " cellpadding=\"0\"><tr><td align=\"text\" colspan=\"" << ColSpan
+ << "\">";
+ } else
+ O << "\"{";
if (!DTraits.renderGraphFromBottomUp()) {
- O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+ if (RenderUsingHTML)
+ O << DTraits.getNodeLabel(Node, G) << "</td>";
+ else
+ O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
// If we should include the address of the node in the label, do so now.
std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
@@ -185,15 +225,25 @@ public:
bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node);
if (hasEdgeSourceLabels) {
- if (!DTraits.renderGraphFromBottomUp()) O << "|";
-
- O << "{" << EdgeSourceLabels.str() << "}";
-
- if (DTraits.renderGraphFromBottomUp()) O << "|";
+ if (!DTraits.renderGraphFromBottomUp())
+ if (!RenderUsingHTML)
+ O << "|";
+
+ if (RenderUsingHTML)
+ O << EdgeSourceLabels.str();
+ else
+ O << "{" << EdgeSourceLabels.str() << "}";
+
+ if (DTraits.renderGraphFromBottomUp())
+ if (!RenderUsingHTML)
+ O << "|";
}
if (DTraits.renderGraphFromBottomUp()) {
- O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+ if (RenderUsingHTML)
+ O << DTraits.getNodeLabel(Node, G);
+ else
+ O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
// If we should include the address of the node in the label, do so now.
std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
@@ -215,12 +265,17 @@ public:
<< DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i));
}
- if (i != e)
- O << "|<d64>truncated...";
- O << "}";
+ if (RenderUsingHTML)
+ O << "<td colspan=\"1\">... truncated</td>";
+ else if (i != e)
+ O << "|<d64>truncated...}";
}
- O << "}\"];\n"; // Finish printing the "node" line
+ if (RenderUsingHTML)
+ O << "</tr></table>>";
+ else
+ O << "}\"";
+ O << "];\n"; // Finish printing the "node" line
// Output all of the edges now
child_iterator EI = GTraits::child_begin(Node);
diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
new file mode 100644
index 000000000000..bf93a0d22da7
--- /dev/null
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -0,0 +1,438 @@
+//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an interface allowing to conveniently build hashes of
+// various data types, without relying on the underlying hasher type to know
+// about hashed data types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HASHBUILDER_H
+#define LLVM_SUPPORT_HASHBUILDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/type_traits.h"
+
+#include <iterator>
+#include <utility>
+
+namespace llvm {
+
+namespace hashbuilder_detail {
+/// Trait to indicate whether a type's bits can be hashed directly (after
+/// endianness correction).
+template <typename U>
+struct IsHashableData
+ : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
+
+} // namespace hashbuilder_detail
+
+/// Declares the hasher member, and functions forwarding directly to the hasher.
+template <typename HasherT> class HashBuilderBase {
+public:
+ HasherT &getHasher() { return Hasher; }
+
+ /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+ ///
+ /// This may not take the size of `Data` into account.
+ /// Users of this function should pay attention to respect endianness
+ /// contraints.
+ void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
+
+ /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+ ///
+ /// This may not take the size of `Data` into account.
+ /// Users of this function should pay attention to respect endianness
+ /// contraints.
+ void update(StringRef Data) {
+ update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
+ Data.size()));
+ }
+
+ /// Forward to `HasherT::final()` if available.
+ template <typename HasherT_ = HasherT> StringRef final() {
+ return this->getHasher().final();
+ }
+
+ /// Forward to `HasherT::result()` if available.
+ template <typename HasherT_ = HasherT> StringRef result() {
+ return this->getHasher().result();
+ }
+
+protected:
+ explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
+
+ template <typename... ArgTypes>
+ explicit HashBuilderBase(ArgTypes &&...Args)
+ : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
+ Hasher(*OptionalHasher) {}
+
+private:
+ Optional<HasherT> OptionalHasher;
+ HasherT &Hasher;
+};
+
+/// Implementation of the `HashBuilder` interface.
+///
+/// `support::endianness::native` is not supported. `HashBuilder` is
+/// expected to canonicalize `support::endianness::native` to one of
+/// `support::endianness::big` or `support::endianness::little`.
+template <typename HasherT, support::endianness Endianness>
+class HashBuilderImpl : public HashBuilderBase<HasherT> {
+ static_assert(Endianness != support::endianness::native,
+ "HashBuilder should canonicalize endianness");
+
+public:
+ explicit HashBuilderImpl(HasherT &Hasher)
+ : HashBuilderBase<HasherT>(Hasher) {}
+ template <typename... ArgTypes>
+ explicit HashBuilderImpl(ArgTypes &&...Args)
+ : HashBuilderBase<HasherT>(Args...) {}
+
+ /// Implement hashing for hashable data types, e.g. integral or enum values.
+ template <typename T>
+ std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
+ HashBuilderImpl &>
+ add(T Value) {
+ return adjustForEndiannessAndAdd(Value);
+ }
+
+ /// Support hashing `ArrayRef`.
+ ///
+ /// `Value.size()` is taken into account to ensure cases like
+ /// ```
+ /// builder.add({1});
+ /// builder.add({2, 3});
+ /// ```
+ /// and
+ /// ```
+ /// builder.add({1, 2});
+ /// builder.add({3});
+ /// ```
+ /// do not collide.
+ template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
+ // As of implementation time, simply calling `addRange(Value)` would also go
+ // through the `update` fast path. But that would rely on the implementation
+ // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
+ // `update` to guarantee the fast path.
+ add(Value.size());
+ if (hashbuilder_detail::IsHashableData<T>::value &&
+ Endianness == support::endian::system_endianness()) {
+ this->update(
+ makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+ Value.size() * sizeof(T)));
+ } else {
+ for (auto &V : Value)
+ add(V);
+ }
+ return *this;
+ }
+
+ /// Support hashing `StringRef`.
+ ///
+ /// `Value.size()` is taken into account to ensure cases like
+ /// ```
+ /// builder.add("a");
+ /// builder.add("bc");
+ /// ```
+ /// and
+ /// ```
+ /// builder.add("ab");
+ /// builder.add("c");
+ /// ```
+ /// do not collide.
+ HashBuilderImpl &add(StringRef Value) {
+ // As of implementation time, simply calling `addRange(Value)` would also go
+ // through `update`. But that would rely on the implementation of
+ // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
+ // guarantee the fast path.
+ add(Value.size());
+ this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+ Value.size()));
+ return *this;
+ }
+
+ template <typename T>
+ using HasAddHashT =
+ decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
+ /// Implement hashing for user-defined `struct`s.
+ ///
+ /// Any user-define `struct` can participate in hashing via `HashBuilder` by
+ /// providing a `addHash` templated function.
+ ///
+ /// ```
+ /// template <typename HasherT, support::endianness Endianness>
+ /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
+ /// const UserDefinedStruct &Value);
+ /// ```
+ ///
+ /// For example:
+ /// ```
+ /// struct SimpleStruct {
+ /// char c;
+ /// int i;
+ /// };
+ ///
+ /// template <typename HasherT, support::endianness Endianness>
+ /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ /// const SimpleStruct &Value) {
+ /// HBuilder.add(Value.c);
+ /// HBuilder.add(Value.i);
+ /// }
+ /// ```
+ ///
+ /// To avoid endianness issues, specializations of `addHash` should
+ /// generally rely on exising `add`, `addRange`, and `addRangeElements`
+ /// functions. If directly using `update`, an implementation must correctly
+ /// handle endianness.
+ ///
+ /// ```
+ /// struct __attribute__ ((packed)) StructWithFastHash {
+ /// int I;
+ /// char C;
+ ///
+ /// // If possible, we want to hash both `I` and `C` in a single
+ /// // `update` call for performance concerns.
+ /// template <typename HasherT, support::endianness Endianness>
+ /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ /// const StructWithFastHash &Value) {
+ /// if (Endianness == support::endian::system_endianness()) {
+ /// HBuilder.update(makeArrayRef(
+ /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
+ /// } else {
+ /// // Rely on existing `add` methods to handle endianness.
+ /// HBuilder.add(Value.I);
+ /// HBuilder.add(Value.C);
+ /// }
+ /// }
+ /// };
+ /// ```
+ ///
+ /// To avoid collisions, specialization of `addHash` for variable-size
+ /// types must take the size into account.
+ ///
+ /// For example:
+ /// ```
+ /// struct CustomContainer {
+ /// private:
+ /// size_t Size;
+ /// int Elements[100];
+ ///
+ /// public:
+ /// CustomContainer(size_t Size) : Size(Size) {
+ /// for (size_t I = 0; I != Size; ++I)
+ /// Elements[I] = I;
+ /// }
+ /// template <typename HasherT, support::endianness Endianness>
+ /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ /// const CustomContainer &Value) {
+ /// if (Endianness == support::endian::system_endianness()) {
+ /// HBuilder.update(makeArrayRef(
+ /// reinterpret_cast<const uint8_t *>(&Value.Size),
+ /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
+ /// } else {
+ /// // `addRange` will take care of encoding the size.
+ /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
+ /// Value.Size);
+ /// }
+ /// }
+ /// };
+ /// ```
+ template <typename T>
+ std::enable_if_t<is_detected<HasAddHashT, T>::value &&
+ !hashbuilder_detail::IsHashableData<T>::value,
+ HashBuilderImpl &>
+ add(const T &Value) {
+ addHash(*this, Value);
+ return *this;
+ }
+
+ template <typename T1, typename T2>
+ HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
+ add(Value.first);
+ add(Value.second);
+ return *this;
+ }
+
+ template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
+ return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
+ }
+
+ /// A convenenience variadic helper.
+ /// It simply iterates over its arguments, in order.
+ /// ```
+ /// add(Arg1, Arg2);
+ /// ```
+ /// is equivalent to
+ /// ```
+ /// add(Arg1)
+ /// add(Arg2)
+ /// ```
+ template <typename T, typename... Ts>
+ typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
+ add(const T &FirstArg, const Ts &...Args) {
+ add(FirstArg);
+ add(Args...);
+ return *this;
+ }
+
+ template <typename ForwardIteratorT>
+ HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
+ add(std::distance(First, Last));
+ return addRangeElements(First, Last);
+ }
+
+ template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
+ return addRange(adl_begin(Range), adl_end(Range));
+ }
+
+ template <typename ForwardIteratorT>
+ HashBuilderImpl &addRangeElements(ForwardIteratorT First,
+ ForwardIteratorT Last) {
+ return addRangeElementsImpl(
+ First, Last,
+ typename std::iterator_traits<ForwardIteratorT>::iterator_category());
+ }
+
+ template <typename RangeT>
+ HashBuilderImpl &addRangeElements(const RangeT &Range) {
+ return addRangeElements(adl_begin(Range), adl_end(Range));
+ }
+
+ template <typename T>
+ using HasByteSwapT = decltype(support::endian::byte_swap(
+ std::declval<T &>(), support::endianness::little));
+ /// Adjust `Value` for the target endianness and add it to the hash.
+ template <typename T>
+ std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
+ adjustForEndiannessAndAdd(const T &Value) {
+ T SwappedValue = support::endian::byte_swap(Value, Endianness);
+ this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
+ sizeof(SwappedValue)));
+ return *this;
+ }
+
+private:
+ template <typename... Ts, std::size_t... Indices>
+ HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
+ std::index_sequence<Indices...>) {
+ add(std::get<Indices>(Arg)...);
+ return *this;
+ }
+
+ // FIXME: Once available, specialize this function for `contiguous_iterator`s,
+ // and use it for `ArrayRef` and `StringRef`.
+ template <typename ForwardIteratorT>
+ HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
+ ForwardIteratorT Last,
+ std::forward_iterator_tag) {
+ for (auto It = First; It != Last; ++It)
+ add(*It);
+ return *this;
+ }
+
+ template <typename T>
+ std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
+ Endianness == support::endian::system_endianness(),
+ HashBuilderImpl &>
+ addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
+ this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
+ (Last - First) * sizeof(T)));
+ return *this;
+ }
+};
+
+/// Interface to help hash various types through a hasher type.
+///
+/// Via provided specializations of `add`, `addRange`, and `addRangeElements`
+/// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
+/// without requiring any knowledge of hashed types from the hasher type.
+///
+/// The only method expected from the templated hasher type `HasherT` is:
+/// * void update(ArrayRef<uint8_t> Data)
+///
+/// Additionally, the following methods will be forwarded to the hasher type:
+/// * decltype(std::declval<HasherT &>().final()) final()
+/// * decltype(std::declval<HasherT &>().result()) result()
+///
+/// From a user point of view, the interface provides the following:
+/// * `template<typename T> add(const T &Value)`
+/// The `add` function implements hashing of various types.
+/// * `template <typename ItT> void addRange(ItT First, ItT Last)`
+/// The `addRange` function is designed to aid hashing a range of values.
+/// It explicitly adds the size of the range in the hash.
+/// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
+/// The `addRangeElements` function is also designed to aid hashing a range of
+/// values. In contrast to `addRange`, it **ignores** the size of the range,
+/// behaving as if elements were added one at a time with `add`.
+///
+/// User-defined `struct` types can participate in this interface by providing
+/// an `addHash` templated function. See the associated template specialization
+/// for details.
+///
+/// This interface does not impose requirements on the hasher
+/// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
+/// variable-size types; for example for
+/// ```
+/// builder.add({1});
+/// builder.add({2, 3});
+/// ```
+/// and
+/// ```
+/// builder.add({1, 2});
+/// builder.add({3});
+/// ```
+/// . Thus, specializations of `add` and `addHash` for variable-size types must
+/// not assume that the hasher type considers the size as part of the hash; they
+/// must explicitly add the size to the hash. See for example specializations
+/// for `ArrayRef` and `StringRef`.
+///
+/// Additionally, since types are eventually forwarded to the hasher's
+/// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
+/// computation (for example when computing `add((int)123)`).
+/// Specifiying a non-`native` `Endianness` template parameter allows to compute
+/// stable hash across platforms with different endianness.
+template <class HasherT, support::endianness Endianness>
+using HashBuilder =
+ HashBuilderImpl<HasherT, (Endianness == support::endianness::native
+ ? support::endian::system_endianness()
+ : Endianness)>;
+
+namespace hashbuilder_detail {
+class HashCodeHasher {
+public:
+ HashCodeHasher() : Code(0) {}
+ void update(ArrayRef<uint8_t> Data) {
+ hash_code DataCode = hash_value(Data);
+ Code = hash_combine(Code, DataCode);
+ }
+ hash_code Code;
+};
+
+using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
+ support::endianness::native>;
+} // namespace hashbuilder_detail
+
+/// Provide a default implementation of `hash_value` when `addHash(const T &)`
+/// is supported.
+template <typename T>
+std::enable_if_t<
+ is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
+ hash_code>
+hash_value(const T &Value) {
+ hashbuilder_detail::HashCodeHashBuilder HBuilder;
+ HBuilder.add(Value);
+ return HBuilder.getHasher().Code;
+}
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HASHBUILDER_H
diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index c753cee60ec1..469f50be40e0 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -234,7 +234,7 @@ inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
/// Each Value is one of the JSON kinds:
/// null (nullptr_t)
/// boolean (bool)
-/// number (double or int64)
+/// number (double, int64 or uint64)
/// string (StringRef)
/// array (json::Array)
/// object (json::Object)
@@ -342,9 +342,20 @@ public:
Value(T B) : Type(T_Boolean) {
create<bool>(B);
}
- // Integers (except boolean). Must be non-narrowing convertible to int64_t.
+
+ // Unsigned 64-bit long integers.
+ template <typename T,
+ typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
+ bool = false, bool = false>
+ Value(T V) : Type(T_UINT64) {
+ create<uint64_t>(uint64_t{V});
+ }
+
+ // Integers (except boolean and uint64_t).
+ // Must be non-narrowing convertible to int64_t.
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
- typename = std::enable_if_t<!std::is_same<T, bool>::value>>
+ typename = std::enable_if_t<!std::is_same<T, bool>::value>,
+ typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
Value(T I) : Type(T_Integer) {
create<int64_t>(int64_t{I});
}
@@ -382,6 +393,7 @@ public:
return Boolean;
case T_Double:
case T_Integer:
+ case T_UINT64:
return Number;
case T_String:
case T_StringRef:
@@ -410,6 +422,8 @@ public:
return as<double>();
if (LLVM_LIKELY(Type == T_Integer))
return as<int64_t>();
+ if (LLVM_LIKELY(Type == T_UINT64))
+ return as<uint64_t>();
return llvm::None;
}
// Succeeds if the Value is a Number, and exactly representable as int64_t.
@@ -425,6 +439,16 @@ public:
}
return llvm::None;
}
+ llvm::Optional<uint64_t> getAsUINT64() const {
+ if (Type == T_UINT64)
+ return as<uint64_t>();
+ else if (Type == T_Integer) {
+ int64_t N = as<int64_t>();
+ if (N >= 0)
+ return as<uint64_t>();
+ }
+ return llvm::None;
+ }
llvm::Optional<llvm::StringRef> getAsString() const {
if (Type == T_String)
return llvm::StringRef(as<std::string>());
@@ -467,11 +491,12 @@ private:
friend class OStream;
- enum ValueType : char {
+ enum ValueType : char16_t {
T_Null,
T_Boolean,
T_Double,
T_Integer,
+ T_UINT64,
T_StringRef,
T_String,
T_Object,
@@ -479,8 +504,9 @@ private:
};
// All members mutable, see moveFrom().
mutable ValueType Type;
- mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
- std::string, json::Array, json::Object>
+ mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
+ llvm::StringRef, std::string, json::Array,
+ json::Object>
Union;
friend bool operator==(const Value &, const Value &);
};
@@ -683,6 +709,14 @@ inline bool fromJSON(const Value &E, bool &Out, Path P) {
P.report("expected boolean");
return false;
}
+inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
+ if (auto S = E.getAsUINT64()) {
+ Out = *S;
+ return true;
+ }
+ P.report("expected uint64_t");
+ return false;
+}
inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
if (auto S = E.getAsNull()) {
Out = *S;
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index cfec5796493f..1f32760a6fd1 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -60,7 +60,7 @@ public:
}
/// Returns true if we don't know any bits.
- bool isUnknown() const { return Zero.isNullValue() && One.isNullValue(); }
+ bool isUnknown() const { return Zero.isZero() && One.isZero(); }
/// Resets the known state of all bits.
void resetAll() {
@@ -71,13 +71,13 @@ public:
/// Returns true if value is all zero.
bool isZero() const {
assert(!hasConflict() && "KnownBits conflict!");
- return Zero.isAllOnesValue();
+ return Zero.isAllOnes();
}
/// Returns true if value is all one bits.
bool isAllOnes() const {
assert(!hasConflict() && "KnownBits conflict!");
- return One.isAllOnesValue();
+ return One.isAllOnes();
}
/// Make all bits known to be zero and discard any previous information.
@@ -99,10 +99,12 @@ public:
bool isNonNegative() const { return Zero.isSignBitSet(); }
/// Returns true if this value is known to be non-zero.
- bool isNonZero() const { return !One.isNullValue(); }
+ bool isNonZero() const { return !One.isZero(); }
/// Returns true if this value is known to be positive.
- bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); }
+ bool isStrictlyPositive() const {
+ return Zero.isSignBitSet() && !One.isZero();
+ }
/// Make this value negative.
void makeNegative() {
@@ -280,6 +282,10 @@ public:
return getBitWidth() - Zero.countPopulation();
}
+ unsigned countMaxActiveBits() const {
+ return getBitWidth() - countMinLeadingZeros();
+ }
+
/// Create known bits from a known constant.
static KnownBits makeConstant(const APInt &C) {
return KnownBits(~C, C);
@@ -292,7 +298,7 @@ public:
/// Return true if LHS and RHS have no common bits set.
static bool haveNoCommonBitsSet(const KnownBits &LHS, const KnownBits &RHS) {
- return (LHS.Zero | RHS.Zero).isAllOnesValue();
+ return (LHS.Zero | RHS.Zero).isAllOnes();
}
/// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
@@ -304,7 +310,8 @@ public:
KnownBits RHS);
/// Compute known bits resulting from multiplying LHS and RHS.
- static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS);
+ static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS,
+ bool SelfMultiply = false);
/// Compute known bits from sign-extended multiply-hi.
static KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS);
diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h
index 3b2d5b974d0b..3b960cd4fd88 100644
--- a/llvm/include/llvm/Support/MD5.h
+++ b/llvm/include/llvm/Support/MD5.h
@@ -39,18 +39,6 @@ template <unsigned N> class SmallString;
template <typename T> class ArrayRef;
class MD5 {
- // Any 32-bit or wider unsigned integer data type will do.
- typedef uint32_t MD5_u32plus;
-
- MD5_u32plus a = 0x67452301;
- MD5_u32plus b = 0xefcdab89;
- MD5_u32plus c = 0x98badcfe;
- MD5_u32plus d = 0x10325476;
- MD5_u32plus hi = 0;
- MD5_u32plus lo = 0;
- uint8_t buffer[64];
- MD5_u32plus block[16];
-
public:
struct MD5Result {
std::array<uint8_t, 16> Bytes;
@@ -90,6 +78,14 @@ public:
/// Finishes off the hash and puts the result in result.
void final(MD5Result &Result);
+ /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+ StringRef final();
+
+ /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+ /// This is suitable for getting the MD5 at any time without invalidating the
+ /// internal state, so that more calls can be made into `update`.
+ StringRef result();
+
/// Translates the bytes in \p Res to a hex string that is
/// deposited into \p Str. The result will be of length 32.
static void stringifyResult(MD5Result &Result, SmallString<32> &Str);
@@ -98,6 +94,23 @@ public:
static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
private:
+ // Any 32-bit or wider unsigned integer data type will do.
+ typedef uint32_t MD5_u32plus;
+
+ // Internal State
+ struct {
+ MD5_u32plus a = 0x67452301;
+ MD5_u32plus b = 0xefcdab89;
+ MD5_u32plus c = 0x98badcfe;
+ MD5_u32plus d = 0x10325476;
+ MD5_u32plus hi = 0;
+ MD5_u32plus lo = 0;
+ uint8_t buffer[64];
+ MD5_u32plus block[16];
+ } InternalState;
+
+ MD5Result Result;
+
const uint8_t *body(ArrayRef<uint8_t> Data);
};
diff --git a/llvm/include/llvm/Support/MSP430AttributeParser.h b/llvm/include/llvm/Support/MSP430AttributeParser.h
new file mode 100644
index 000000000000..bc9b21494470
--- /dev/null
+++ b/llvm/include/llvm/Support/MSP430AttributeParser.h
@@ -0,0 +1,44 @@
+//===-- MSP430AttributeParser.h - MSP430 Attribute Parser -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains support routines for parsing MSP430 ELF build attributes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H
+#define LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H
+
+#include "llvm/Support/ELFAttributeParser.h"
+#include "llvm/Support/MSP430Attributes.h"
+
+namespace llvm {
+class MSP430AttributeParser : public ELFAttributeParser {
+ struct DisplayHandler {
+ MSP430Attrs::AttrType Attribute;
+ Error (MSP430AttributeParser::*Routine)(MSP430Attrs::AttrType);
+ };
+ static const std::array<DisplayHandler, 4> DisplayRoutines;
+
+ Error parseISA(MSP430Attrs::AttrType Tag);
+ Error parseCodeModel(MSP430Attrs::AttrType Tag);
+ Error parseDataModel(MSP430Attrs::AttrType Tag);
+ Error parseEnumSize(MSP430Attrs::AttrType Tag);
+
+ Error handler(uint64_t Tag, bool &Handled) override;
+
+public:
+ MSP430AttributeParser(ScopedPrinter *SW)
+ : ELFAttributeParser(SW, MSP430Attrs::getMSP430AttributeTags(),
+ "mspabi") {}
+ MSP430AttributeParser()
+ : ELFAttributeParser(MSP430Attrs::getMSP430AttributeTags(), "mspabi") {}
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/MSP430Attributes.h b/llvm/include/llvm/Support/MSP430Attributes.h
new file mode 100644
index 000000000000..fccd65e844c3
--- /dev/null
+++ b/llvm/include/llvm/Support/MSP430Attributes.h
@@ -0,0 +1,44 @@
+//===-- MSP430Attributes.h - MSP430 Attributes ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains enumerations for MSP430 ELF build attributes as
+/// defined in the MSP430 ELF psABI specification.
+///
+/// MSP430 ELF psABI specification
+///
+/// https://www.ti.com/lit/pdf/slaa534
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_MSP430ATTRIBUTES_H
+#define LLVM_SUPPORT_MSP430ATTRIBUTES_H
+
+#include "llvm/Support/ELFAttributes.h"
+
+namespace llvm {
+namespace MSP430Attrs {
+
+const TagNameMap &getMSP430AttributeTags();
+
+enum AttrType : unsigned {
+ // Attribute types in ELF/.MSP430.attributes.
+ TagISA = 4,
+ TagCodeModel = 6,
+ TagDataModel = 8,
+ TagEnumSize = 10
+};
+
+enum ISA { ISAMSP430 = 1, ISAMSP430X = 2 };
+enum CodeModel { CMSmall = 1, CMLarge = 2 };
+enum DataModel { DMSmall = 1, DMLarge = 2, DMRestricted = 3 };
+enum EnumSize { ESSmall = 1, ESInteger = 2, ESDontCare = 3 };
+
+} // namespace MSP430Attrs
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 31f2d5a48183..ce10a4c58dfe 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -270,9 +270,10 @@ namespace llvm {
funcref = 175, // WebAssembly's funcref type
externref = 176, // WebAssembly's externref type
x86amx = 177, // This is an X86 AMX value
+ i64x8 = 178, // 8 Consecutive GPRs (AArch64)
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = x86amx, // This always remains at the end of the list.
+ LAST_VALUETYPE = i64x8, // This always remains at the end of the list.
VALUETYPE_SIZE = LAST_VALUETYPE + 1,
// This is the current maximum for LAST_VALUETYPE.
@@ -987,6 +988,7 @@ namespace llvm {
case nxv16f16:
case nxv8f32:
case nxv4f64: return TypeSize::Scalable(256);
+ case i64x8:
case v512i1:
case v64i8:
case v32i16:
@@ -1403,51 +1405,61 @@ namespace llvm {
/// SimpleValueType Iteration
/// @{
static auto all_valuetypes() {
- return seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto integer_valuetypes() {
- return seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
- MVT::LAST_INTEGER_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
+ MVT::LAST_INTEGER_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fp_valuetypes() {
- return seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
- MVT::LAST_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
+ MVT::LAST_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fixedlen_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto scalable_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto integer_fixedlen_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fp_fixedlen_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto integer_scalable_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fp_scalable_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
/// @}
};
diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h
index 31e0abbcdb61..d7d60371d315 100644
--- a/llvm/include/llvm/Support/Memory.h
+++ b/llvm/include/llvm/Support/Memory.h
@@ -37,7 +37,7 @@ namespace sys {
/// The size as it was allocated. This is always greater or equal to the
/// size that was originally requested.
size_t allocatedSize() const { return AllocatedSize; }
-
+
private:
void *Address; ///< Address of first byte of memory area
size_t AllocatedSize; ///< Size, in bytes of the memory area
@@ -148,13 +148,22 @@ namespace sys {
return *this;
}
~OwningMemoryBlock() {
- Memory::releaseMappedMemory(M);
+ if (M.base())
+ Memory::releaseMappedMemory(M);
}
void *base() const { return M.base(); }
/// The size as it was allocated. This is always greater or equal to the
/// size that was originally requested.
size_t allocatedSize() const { return M.allocatedSize(); }
MemoryBlock getMemoryBlock() const { return M; }
+ std::error_code release() {
+ std::error_code EC;
+ if (M.base()) {
+ EC = Memory::releaseMappedMemory(M);
+ M = MemoryBlock();
+ }
+ return EC;
+ }
private:
MemoryBlock M;
};
diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
new file mode 100644
index 000000000000..2141e2159c0c
--- /dev/null
+++ b/llvm/include/llvm/Support/PGOOptions.h
@@ -0,0 +1,65 @@
+//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Define option tunables for PGO.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PGOOPTIONS_H
+#define LLVM_SUPPORT_PGOOPTIONS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+/// A struct capturing PGO tunables.
+struct PGOOptions {
+ enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+ enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+ PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+ std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+ CSPGOAction CSAction = NoCSAction,
+ bool DebugInfoForProfiling = false,
+ bool PseudoProbeForProfiling = false)
+ : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+ ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+ CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
+ (Action == SampleUse &&
+ !PseudoProbeForProfiling)),
+ PseudoProbeForProfiling(PseudoProbeForProfiling) {
+ // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+ // callback with IRUse action without ProfileFile.
+
+ // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+ assert(this->CSAction == NoCSAction ||
+ (this->Action != IRInstr && this->Action != SampleUse));
+
+ // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+ assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+ // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+ // a profile.
+ assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+ // If neither Action nor CSAction, DebugInfoForProfiling or
+ // PseudoProbeForProfiling needs to be true.
+ assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+ this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+ }
+ std::string ProfileFile;
+ std::string CSProfileGenFile;
+ std::string ProfileRemappingFile;
+ PGOAction Action;
+ CSPGOAction CSAction;
+ bool DebugInfoForProfiling;
+ bool PseudoProbeForProfiling;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 28d171d45256..5c3b26d5754c 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -40,7 +40,10 @@ class Latch {
public:
explicit Latch(uint32_t Count = 0) : Count(Count) {}
- ~Latch() { sync(); }
+ ~Latch() {
+ // Ensure at least that sync() was called.
+ assert(Count == 0);
+ }
void inc() {
std::lock_guard<std::mutex> lock(Mutex);
diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h
index af70e086a1b6..da5095714f48 100644
--- a/llvm/include/llvm/Support/Path.h
+++ b/llvm/include/llvm/Support/Path.h
@@ -25,7 +25,29 @@ namespace llvm {
namespace sys {
namespace path {
-enum class Style { windows, posix, native };
+enum class Style {
+ native,
+ posix,
+ windows_slash,
+ windows_backslash,
+ windows = windows_backslash, // deprecated
+};
+
+/// Check if \p S uses POSIX path rules.
+constexpr bool is_style_posix(Style S) {
+ if (S == Style::posix)
+ return true;
+ if (S != Style::native)
+ return false;
+#if defined(_WIN32)
+ return false;
+#else
+ return true;
+#endif
+}
+
+/// Check if \p S uses Windows path rules.
+constexpr bool is_style_windows(Style S) { return !is_style_posix(S); }
/// @name Lexical Component Iterator
/// @{
@@ -174,6 +196,21 @@ bool replace_path_prefix(SmallVectorImpl<char> &Path, StringRef OldPrefix,
StringRef NewPrefix,
Style style = Style::native);
+/// Remove redundant leading "./" pieces and consecutive separators.
+///
+/// @param path Input path.
+/// @result The cleaned-up \a path.
+StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
+
+/// In-place remove any './' and optionally '../' components from a path.
+///
+/// @param path processed path
+/// @param remove_dot_dot specify if '../' (except for leading "../") should be
+/// removed
+/// @result True if path was changed
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
+ Style style = Style::native);
+
/// Append to path.
///
/// @code
@@ -212,7 +249,7 @@ void append(SmallVectorImpl<char> &path, const_iterator begin,
/// Convert path to the native form. This is used to give paths to users and
/// operating system calls in the platform's normal way. For example, on Windows
-/// all '/' are converted to '\'.
+/// all '/' are converted to '\'. On Unix, it converts all '\' to '/'.
///
/// @param path A path that is transformed to native format.
/// @param result Holds the result of the transformation.
@@ -226,6 +263,17 @@ void native(const Twine &path, SmallVectorImpl<char> &result,
/// @param path A path that is transformed to native format.
void native(SmallVectorImpl<char> &path, Style style = Style::native);
+/// For Windows path styles, convert path to use the preferred path separators.
+/// For other styles, do nothing.
+///
+/// @param path A path that is transformed to preferred format.
+inline void make_preferred(SmallVectorImpl<char> &path,
+ Style style = Style::native) {
+ if (!is_style_windows(style))
+ return;
+ native(path, style);
+}
+
/// Replaces backslashes with slashes if Windows.
///
/// @param path processed path
@@ -499,21 +547,6 @@ bool is_absolute_gnu(const Twine &path, Style style = Style::native);
/// @result True if the path is relative, false if it is not.
bool is_relative(const Twine &path, Style style = Style::native);
-/// Remove redundant leading "./" pieces and consecutive separators.
-///
-/// @param path Input path.
-/// @result The cleaned-up \a path.
-StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
-
-/// In-place remove any './' and optionally '../' components from a path.
-///
-/// @param path processed path
-/// @param remove_dot_dot specify if '../' (except for leading "../") should be
-/// removed
-/// @result True if path was changed
-bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
- Style style = Style::native);
-
} // end namespace path
} // end namespace sys
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index 6687e5e7ff9a..ee03efeed9b2 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -214,12 +214,10 @@ public:
/// In that case, the control flow will resume after RunSafely(), like for a
/// crash, rather than exiting the current process.
/// Use \arg NoCleanup for calling _exit() instead of exit().
- LLVM_ATTRIBUTE_NORETURN
- static void Exit(int RetCode, bool NoCleanup = false);
+ [[noreturn]] static void Exit(int RetCode, bool NoCleanup = false);
private:
- LLVM_ATTRIBUTE_NORETURN
- static void ExitNoCleanup(int RetCode);
+ [[noreturn]] static void ExitNoCleanup(int RetCode);
};
}
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
new file mode 100644
index 000000000000..7110de601123
--- /dev/null
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -0,0 +1,89 @@
+//===-- RISCVISAInfo.h - RISCV ISA Information ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RISCVISAINFO_H
+#define LLVM_SUPPORT_RISCVISAINFO_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+struct RISCVExtensionInfo {
+ std::string ExtName;
+ unsigned MajorVersion;
+ unsigned MinorVersion;
+};
+
+class RISCVISAInfo {
+public:
+ RISCVISAInfo(const RISCVISAInfo &) = delete;
+ RISCVISAInfo &operator=(const RISCVISAInfo &) = delete;
+
+ static bool compareExtension(const std::string &LHS, const std::string &RHS);
+
+ /// Helper class for OrderedExtensionMap.
+ struct ExtensionComparator {
+ bool operator()(const std::string &LHS, const std::string &RHS) const {
+ return compareExtension(LHS, RHS);
+ }
+ };
+
+ /// OrderedExtensionMap is std::map, it's specialized to keep entries
+ /// in canonical order of extension.
+ typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator>
+ OrderedExtensionMap;
+
+ /// Parse RISCV ISA info from arch string.
+ static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+ parseArchString(StringRef Arch, bool EnableExperimentalExtension,
+ bool ExperimentalExtensionVersionCheck = true);
+
+ /// Parse RISCV ISA info from feature vector.
+ static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+ parseFeatures(unsigned XLen, const std::vector<std::string> &Features);
+
+ /// Convert RISCV ISA info to a feature vector.
+ void toFeatures(std::vector<StringRef> &Features,
+ std::function<StringRef(const Twine &)> StrAlloc) const;
+
+ const OrderedExtensionMap &getExtensions() const { return Exts; };
+
+ unsigned getXLen() const { return XLen; };
+ unsigned getFLen() const { return FLen; };
+
+ bool hasExtension(StringRef Ext) const;
+ std::string toString() const;
+
+ static bool isSupportedExtensionFeature(StringRef Ext);
+ static bool isSupportedExtension(StringRef Ext);
+ static bool isSupportedExtension(StringRef Ext, unsigned MajorVersion,
+ unsigned MinorVersion);
+
+private:
+ RISCVISAInfo(unsigned XLen) : XLen(XLen), FLen(0) {}
+
+ unsigned XLen;
+ unsigned FLen;
+
+ OrderedExtensionMap Exts;
+
+ void addExtension(StringRef ExtName, unsigned MajorVersion,
+ unsigned MinorVersion);
+
+ void updateFLen();
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def
index 6a06f9258105..f658cdb91c6b 100644
--- a/llvm/include/llvm/Support/RISCVTargetParser.def
+++ b/llvm/include/llvm/Support/RISCVTargetParser.def
@@ -19,9 +19,17 @@ PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""})
PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""})
PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""})
PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""})
+PROC(SIFIVE_E20, {"sifive-e20"}, FK_NONE, {"rv32imc"})
+PROC(SIFIVE_E21, {"sifive-e21"}, FK_NONE, {"rv32imac"})
+PROC(SIFIVE_E24, {"sifive-e24"}, FK_NONE, {"rv32imafc"})
PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"})
-PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_E34, {"sifive-e34"}, FK_NONE, {"rv32imafc"})
PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"})
+PROC(SIFIVE_S21, {"sifive-s21"}, FK_64BIT, {"rv64imac"})
+PROC(SIFIVE_S51, {"sifive-s51"}, FK_64BIT, {"rv64imac"})
+PROC(SIFIVE_S54, {"sifive-s54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_S76, {"sifive-s76"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"})
#undef PROC
diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h
index bc6abba0a0e1..dabbba6f89d1 100644
--- a/llvm/include/llvm/Support/Signposts.h
+++ b/llvm/include/llvm/Support/Signposts.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Signposts.h - Interval debug annotations ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,17 +17,8 @@
#define LLVM_SUPPORT_SIGNPOSTS_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/Config/llvm-config.h"
#include <memory>
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-#include <Availability.h>
-#include <os/signpost.h>
-#endif
-
-#define SIGNPOSTS_AVAILABLE() \
- __builtin_available(macos 10.14, iOS 12, tvOS 12, watchOS 5, *)
-
namespace llvm {
class SignpostEmitterImpl;
@@ -45,33 +35,8 @@ public:
/// Begin a signposted interval for a given object.
void startInterval(const void *O, StringRef Name);
-
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
- os_log_t &getLogger() const;
- os_signpost_id_t getSignpostForObject(const void *O);
-#endif
-
- /// A macro to take advantage of the special format string handling
- /// in the os_signpost API. The format string substitution is
- /// deferred to the log consumer and done outside of the
- /// application.
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...) \
- do { \
- if ((SIGNPOST_EMITTER).isEnabled()) \
- if (SIGNPOSTS_AVAILABLE()) \
- os_signpost_interval_begin((SIGNPOST_EMITTER).getLogger(), \
- (SIGNPOST_EMITTER).getSignpostForObject(O), \
- "LLVM Timers", __VA_ARGS__); \
- } while (0)
-#else
-#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...) \
- do { \
- } while (0)
-#endif
-
/// End a signposted interval for a given object.
- void endInterval(const void *O);
+ void endInterval(const void *O, StringRef Name);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index fbe0d1a55bfc..b34b885ddc35 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -652,6 +652,9 @@ HANDLE_TARGET_OPCODE(G_UMAX)
/// Generic integer absolute value.
HANDLE_TARGET_OPCODE(G_ABS)
+HANDLE_TARGET_OPCODE(G_LROUND)
+HANDLE_TARGET_OPCODE(G_LLROUND)
+
/// Generic BRANCH instruction. This is an unconditional branch.
HANDLE_TARGET_OPCODE(G_BR)
diff --git a/llvm/include/llvm/Support/TargetSelect.h b/llvm/include/llvm/Support/TargetSelect.h
index 9ffb84c4a570..e57614cea758 100644
--- a/llvm/include/llvm/Support/TargetSelect.h
+++ b/llvm/include/llvm/Support/TargetSelect.h
@@ -41,6 +41,10 @@ extern "C" {
#define LLVM_DISASSEMBLER(TargetName) \
void LLVMInitialize##TargetName##Disassembler();
#include "llvm/Config/Disassemblers.def"
+
+// Declare all of the available TargetMCA initialization functions.
+#define LLVM_TARGETMCA(TargetName) void LLVMInitialize##TargetName##TargetMCA();
+#include "llvm/Config/TargetMCAs.def"
}
namespace llvm {
@@ -159,6 +163,14 @@ namespace llvm {
return true;
#endif
}
+
+ /// InitializeAllTargetMCAs - The main program should call
+ /// this function to initialize the target CustomBehaviour and
+ /// InstrPostProcess classes.
+ inline void InitializeAllTargetMCAs() {
+#define LLVM_TARGETMCA(TargetName) LLVMInitialize##TargetName##TargetMCA();
+#include "llvm/Config/TargetMCAs.def"
+ }
}
#endif
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 30bbbd7db8c9..7d1274735a37 100644
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -229,7 +229,6 @@ public:
bool isZero() const { return !Value; }
bool isNonZero() const { return !isZero(); }
explicit operator bool() const { return isNonZero(); }
- ScalarTy getValue() const { return Value; }
ScalarTy getValue(unsigned Dim) const {
return Dim == UnivariateDim ? Value : 0;
}
@@ -250,7 +249,7 @@ public:
//===----------------------------------------------------------------------===//
// LinearPolySize - base class for fixed- or scalable sizes.
-// ^ ^
+// ^ ^
// | |
// | +----- ElementCount - Leaf class to represent an element count
// | (vscale x unsigned)
@@ -294,7 +293,7 @@ public:
static LeafTy getNull() { return get(0, false); }
/// Returns the minimum value this size can represent.
- ScalarTy getKnownMinValue() const { return this->getValue(); }
+ ScalarTy getKnownMinValue() const { return this->Value; }
/// Returns whether the size is scaled by a runtime quantity (vscale).
bool isScalable() const { return this->UnivariateDim == ScalableDim; }
/// A return value of true indicates we know at compile time that the number
@@ -500,8 +499,7 @@ inline raw_ostream &operator<<(raw_ostream &OS,
return OS;
}
-template <typename T> struct DenseMapInfo;
-template <> struct DenseMapInfo<ElementCount> {
+template <> struct DenseMapInfo<ElementCount, void> {
static inline ElementCount getEmptyKey() {
return ElementCount::getScalable(~0U);
}
diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h
index a48ae0bf52bd..1a1072d228f1 100644
--- a/llvm/include/llvm/Support/VersionTuple.h
+++ b/llvm/include/llvm/Support/VersionTuple.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/Support/HashBuilder.h"
#include <string>
#include <tuple>
@@ -164,6 +165,12 @@ public:
return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
}
+ template <typename HasherT, llvm::support::endianness Endianness>
+ friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ const VersionTuple &VT) {
+ HBuilder.add(VT.Major, VT.Minor, VT.Subminor, VT.Build);
+ }
+
/// Retrieve a string representation of the version number.
std::string getAsString() const;
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 323e6719645d..10d2389ee079 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -121,6 +121,14 @@ public:
/// Closes the file.
virtual std::error_code close() = 0;
+
+ // Get the same file with a different path.
+ static ErrorOr<std::unique_ptr<File>>
+ getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
+
+protected:
+ // Set the file's underlying path.
+ virtual void setPath(const Twine &Path) {}
};
/// A member of a directory, yielded by a directory_iterator.
@@ -596,6 +604,17 @@ class RedirectingFileSystemParser;
/// contain multiple path components (e.g. /path/to/file). However, any
/// directory in such a path that contains more than one child must be uniquely
/// represented by a 'directory' entry.
+///
+/// When the 'use-external-name' field is set, calls to \a vfs::File::status()
+/// give the external (remapped) filesystem name instead of the name the file
+/// was accessed by. This is an intentional leak through the \a
+/// RedirectingFileSystem abstraction layer. It enables clients to discover
+/// (and use) the external file location when communicating with users or tools
+/// that don't use the same VFS overlay.
+///
+/// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
+/// "real" filesystems behave. Maybe there should be a separate channel for
+/// this information.
class RedirectingFileSystem : public vfs::FileSystem {
public:
enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
@@ -746,6 +765,12 @@ private:
/// with the given error code on a path associated with the provided Entry.
bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const;
+ /// Get the File status, or error, from the underlying external file system.
+ /// This returns the status with the originally requested name, while looking
+ /// up the entry using the canonical path.
+ ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
+ const Twine &OriginalPath) const;
+
// In a RedirectingFileSystem, keys can be specified in Posix or Windows
// style (or even a mixture of both), so this comparison helper allows
// slashes (representing a root) to match backslashes (and vice versa). Note
@@ -777,12 +802,7 @@ private:
/// Whether to perform case-sensitive comparisons.
///
/// Currently, case-insensitive matching only works correctly with ASCII.
- bool CaseSensitive =
-#ifdef _WIN32
- false;
-#else
- true;
-#endif
+ bool CaseSensitive = is_style_posix(sys::path::Style::native);
/// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
/// be prefixed in every 'external-contents' when reading from YAML files.
@@ -808,7 +828,8 @@ private:
Entry *From) const;
/// Get the status for a path with the provided \c LookupResult.
- ErrorOr<Status> status(const Twine &Path, const LookupResult &Result);
+ ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
+ const LookupResult &Result);
public:
/// Looks up \p Path in \c Roots and returns a LookupResult giving the
diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h
index a45eeaba4ad5..917822678e97 100644
--- a/llvm/include/llvm/Support/Windows/WindowsSupport.h
+++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h
@@ -68,10 +68,10 @@ llvm::VersionTuple GetWindowsOSVersion();
bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix);
// Include GetLastError() in a fatal error message.
-LLVM_ATTRIBUTE_NORETURN inline void ReportLastErrorFatal(const char *Msg) {
+[[noreturn]] inline void ReportLastErrorFatal(const char *Msg) {
std::string ErrMsg;
MakeErrMsg(&ErrMsg, Msg);
- llvm::report_fatal_error(ErrMsg);
+ llvm::report_fatal_error(Twine(ErrMsg));
}
template <typename HandleTraits>
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 757a3c0c8a71..aca717a9f6cb 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -31,6 +31,8 @@ namespace X86Disassembler {
#define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes
#define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes
#define THREEDNOW_MAP_SYM x86Disassembler3DNowOpcodes
+#define MAP5_SYM x86DisassemblerMap5Opcodes
+#define MAP6_SYM x86DisassemblerMap6Opcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
@@ -42,6 +44,8 @@ namespace X86Disassembler {
#define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes"
#define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes"
#define THREEDNOW_MAP_STR "x86Disassembler3DNowOpcodes"
+#define MAP5_STR "x86DisassemblerMap5Opcodes"
+#define MAP6_STR "x86DisassemblerMap6Opcodes"
// Attributes of an instruction that must be known before the opcode can be
// processed correctly. Most of these indicate the presence of particular
@@ -292,7 +296,9 @@ enum OpcodeType {
XOP8_MAP = 4,
XOP9_MAP = 5,
XOPA_MAP = 6,
- THREEDNOW_MAP = 7
+ THREEDNOW_MAP = 7,
+ MAP5 = 8,
+ MAP6 = 9
};
// The following structs are used for the hierarchical decode table. After
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index ffcc2238e3ce..4443d822d3e8 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -91,54 +91,59 @@ X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3")
X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake")
#undef X86_CPU_SUBTYPE
-
-// This macro is used for cpu types present in compiler-rt/libgcc.
+// This macro is used for cpu types present in compiler-rt/libgcc. The third
+// parameter PRIORITY is as required by the attribute 'target' checking. Note
+// that not all are supported/prioritized by GCC, so synchronization with GCC's
+// implementation may require changing some existing values.
+//
+// We cannot just re-sort the list though because its order is dictated by the
+// order of bits in CodeGenFunction::GetX86CpuSupportsMask.
#ifndef X86_FEATURE_COMPAT
-#define X86_FEATURE_COMPAT(ENUM, STR) X86_FEATURE(ENUM, STR)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
#endif
#ifndef X86_FEATURE
#define X86_FEATURE(ENUM, STR)
#endif
-X86_FEATURE_COMPAT(CMOV, "cmov")
-X86_FEATURE_COMPAT(MMX, "mmx")
-X86_FEATURE_COMPAT(POPCNT, "popcnt")
-X86_FEATURE_COMPAT(SSE, "sse")
-X86_FEATURE_COMPAT(SSE2, "sse2")
-X86_FEATURE_COMPAT(SSE3, "sse3")
-X86_FEATURE_COMPAT(SSSE3, "ssse3")
-X86_FEATURE_COMPAT(SSE4_1, "sse4.1")
-X86_FEATURE_COMPAT(SSE4_2, "sse4.2")
-X86_FEATURE_COMPAT(AVX, "avx")
-X86_FEATURE_COMPAT(AVX2, "avx2")
-X86_FEATURE_COMPAT(SSE4_A, "sse4a")
-X86_FEATURE_COMPAT(FMA4, "fma4")
-X86_FEATURE_COMPAT(XOP, "xop")
-X86_FEATURE_COMPAT(FMA, "fma")
-X86_FEATURE_COMPAT(AVX512F, "avx512f")
-X86_FEATURE_COMPAT(BMI, "bmi")
-X86_FEATURE_COMPAT(BMI2, "bmi2")
-X86_FEATURE_COMPAT(AES, "aes")
-X86_FEATURE_COMPAT(PCLMUL, "pclmul")
-X86_FEATURE_COMPAT(AVX512VL, "avx512vl")
-X86_FEATURE_COMPAT(AVX512BW, "avx512bw")
-X86_FEATURE_COMPAT(AVX512DQ, "avx512dq")
-X86_FEATURE_COMPAT(AVX512CD, "avx512cd")
-X86_FEATURE_COMPAT(AVX512ER, "avx512er")
-X86_FEATURE_COMPAT(AVX512PF, "avx512pf")
-X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi")
-X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma")
-X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw")
-X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps")
-X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq")
-X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2")
-X86_FEATURE_COMPAT(GFNI, "gfni")
-X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq")
-X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni")
-X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg")
-X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16")
-X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect")
+X86_FEATURE_COMPAT(CMOV, "cmov", 0)
+X86_FEATURE_COMPAT(MMX, "mmx", 1)
+X86_FEATURE_COMPAT(POPCNT, "popcnt", 9)
+X86_FEATURE_COMPAT(SSE, "sse", 2)
+X86_FEATURE_COMPAT(SSE2, "sse2", 3)
+X86_FEATURE_COMPAT(SSE3, "sse3", 4)
+X86_FEATURE_COMPAT(SSSE3, "ssse3", 5)
+X86_FEATURE_COMPAT(SSE4_1, "sse4.1", 7)
+X86_FEATURE_COMPAT(SSE4_2, "sse4.2", 8)
+X86_FEATURE_COMPAT(AVX, "avx", 12)
+X86_FEATURE_COMPAT(AVX2, "avx2", 18)
+X86_FEATURE_COMPAT(SSE4_A, "sse4a", 6)
+X86_FEATURE_COMPAT(FMA4, "fma4", 14)
+X86_FEATURE_COMPAT(XOP, "xop", 15)
+X86_FEATURE_COMPAT(FMA, "fma", 16)
+X86_FEATURE_COMPAT(AVX512F, "avx512f", 19)
+X86_FEATURE_COMPAT(BMI, "bmi", 13)
+X86_FEATURE_COMPAT(BMI2, "bmi2", 17)
+X86_FEATURE_COMPAT(AES, "aes", 10)
+X86_FEATURE_COMPAT(PCLMUL, "pclmul", 11)
+X86_FEATURE_COMPAT(AVX512VL, "avx512vl", 20)
+X86_FEATURE_COMPAT(AVX512BW, "avx512bw", 21)
+X86_FEATURE_COMPAT(AVX512DQ, "avx512dq", 22)
+X86_FEATURE_COMPAT(AVX512CD, "avx512cd", 23)
+X86_FEATURE_COMPAT(AVX512ER, "avx512er", 24)
+X86_FEATURE_COMPAT(AVX512PF, "avx512pf", 25)
+X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi", 26)
+X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma", 27)
+X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw", 28)
+X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps", 29)
+X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq", 30)
+X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2", 31)
+X86_FEATURE_COMPAT(GFNI, "gfni", 32)
+X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq", 33)
+X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni", 34)
+X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg", 35)
+X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 36)
+X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 37)
// Features below here are not in libgcc/compiler-rt.
X86_FEATURE (3DNOW, "3dnow")
X86_FEATURE (3DNOWA, "3dnowa")
@@ -153,6 +158,7 @@ X86_FEATURE (CLWB, "clwb")
X86_FEATURE (CLZERO, "clzero")
X86_FEATURE (CMPXCHG16B, "cx16")
X86_FEATURE (CMPXCHG8B, "cx8")
+X86_FEATURE (CRC32, "crc32")
X86_FEATURE (ENQCMD, "enqcmd")
X86_FEATURE (F16C, "f16c")
X86_FEATURE (FSGSBASE, "fsgsbase")
@@ -193,6 +199,7 @@ X86_FEATURE (XSAVEC, "xsavec")
X86_FEATURE (XSAVEOPT, "xsaveopt")
X86_FEATURE (XSAVES, "xsaves")
X86_FEATURE (HRESET, "hreset")
+X86_FEATURE (AVX512FP16, "avx512fp16")
X86_FEATURE (AVXVNNI, "avxvnni")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
@@ -202,3 +209,49 @@ X86_FEATURE (LVI_CFI, "lvi-cfi")
X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening")
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
+
+#ifndef CPU_SPECIFIC
+#define CPU_SPECIFIC(NAME, MANGLING, FEATURES)
+#endif
+
+#ifndef CPU_SPECIFIC_ALIAS
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME)
+#endif
+
+CPU_SPECIFIC("generic", 'A', "")
+CPU_SPECIFIC("pentium", 'B', "")
+CPU_SPECIFIC("pentium_pro", 'C', "+cmov")
+CPU_SPECIFIC("pentium_mmx", 'D', "+mmx")
+CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx")
+CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse")
+CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii")
+CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
+CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
+CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
+CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
+CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
+CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge")
+CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
+CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge")
+CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell")
+CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell")
+CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
+CPU_SPECIFIC_ALIAS("mic_avx512", "knl")
+CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
+CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
+CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
+CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
+
+#undef CPU_SPECIFIC_ALIAS
+#undef CPU_SPECIFIC
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index ed02066933a7..bfa3e23dbd9d 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -13,6 +13,7 @@
#ifndef LLVM_SUPPORT_X86TARGETPARSER_H
#define LLVM_SUPPORT_X86TARGETPARSER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -154,6 +155,9 @@ void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features);
void updateImpliedFeatures(StringRef Feature, bool Enabled,
StringMap<bool> &Features);
+uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+unsigned getFeaturePriority(ProcessorFeatures Feat);
+
} // namespace X86
} // namespace llvm
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 9ac9eb300983..bea232e6e000 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -1641,7 +1641,7 @@ void IO::processKeyWithDefault(const char *Key, Optional<T> &Val,
// usually None.
bool IsNone = false;
if (!outputting())
- if (auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
+ if (const auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
// We use rtrim to ignore possible white spaces that might exist when a
// comment is present on the same line.
IsNone = Node->getRawValue().rtrim(' ') == "<none>";
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index c669c2babad9..98c26ef0b1e5 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -330,6 +330,8 @@ public:
// changeColor() has no effect until enable_colors(true) is called.
virtual void enable_colors(bool enable) { ColorEnabled = enable; }
+ bool colors_enabled() const { return ColorEnabled; }
+
/// Tie this stream to the specified stream. Replaces any existing tied-to
/// stream. Specifying a nullptr unties the stream.
void tie(raw_ostream *TieTo) { TiedStream = TieTo; }
@@ -719,7 +721,11 @@ class buffer_unique_ostream : public raw_svector_ostream {
public:
buffer_unique_ostream(std::unique_ptr<raw_ostream> OS)
- : raw_svector_ostream(Buffer), OS(std::move(OS)) {}
+ : raw_svector_ostream(Buffer), OS(std::move(OS)) {
+ // Turn off buffering on OS, which we now own, to avoid allocating a buffer
+ // when the destructor writes only to be immediately flushed again.
+ this->OS->SetUnbuffered();
+ }
~buffer_unique_ostream() override { *OS << str(); }
};
diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h
index 5c4a736eb107..d73b9ae49235 100644
--- a/llvm/include/llvm/TableGen/DirectiveEmitter.h
+++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h
@@ -152,7 +152,7 @@ public:
}
return C;
});
- N.erase(std::remove(N.begin(), N.end(), '_'), N.end());
+ llvm::erase_value(N, '_');
return N;
}
diff --git a/llvm/include/llvm/TableGen/Error.h b/llvm/include/llvm/TableGen/Error.h
index a0e23aca211e..da0132b10f4f 100644
--- a/llvm/include/llvm/TableGen/Error.h
+++ b/llvm/include/llvm/TableGen/Error.h
@@ -22,13 +22,10 @@ namespace llvm {
void PrintNote(const Twine &Msg);
void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Record *Rec,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const RecordVal *RecVal,
- const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const Twine &Msg);
+[[noreturn]] void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const Record *Rec, const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg);
void PrintWarning(const Twine &Msg);
void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg);
@@ -40,13 +37,10 @@ void PrintError(const char *Loc, const Twine &Msg);
void PrintError(const Record *Rec, const Twine &Msg);
void PrintError(const RecordVal *RecVal, const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Record *Rec,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const RecordVal *RecVal,
- const Twine &Msg);
+[[noreturn]] void PrintFatalError(const Twine &Msg);
+[[noreturn]] void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
+[[noreturn]] void PrintFatalError(const Record *Rec, const Twine &Msg);
+[[noreturn]] void PrintFatalError(const RecordVal *RecVal, const Twine &Msg);
void CheckAssert(SMLoc Loc, Init *Condition, Init *Message);
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 713d9375448c..5869a5cf0423 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -39,6 +39,9 @@
#include <vector>
namespace llvm {
+namespace detail {
+struct RecordContext;
+} // namespace detail
class ListRecTy;
struct MultiClass;
@@ -100,7 +103,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
/// 'bit' - Represent a single bit
class BitRecTy : public RecTy {
- static BitRecTy Shared;
+ friend detail::RecordContext;
BitRecTy() : RecTy(BitRecTyKind) {}
@@ -109,7 +112,7 @@ public:
return RT->getRecTyKind() == BitRecTyKind;
}
- static BitRecTy *get() { return &Shared; }
+ static BitRecTy *get();
std::string getAsString() const override { return "bit"; }
@@ -140,7 +143,7 @@ public:
/// 'int' - Represent an integer value of no particular size
class IntRecTy : public RecTy {
- static IntRecTy Shared;
+ friend detail::RecordContext;
IntRecTy() : RecTy(IntRecTyKind) {}
@@ -149,7 +152,7 @@ public:
return RT->getRecTyKind() == IntRecTyKind;
}
- static IntRecTy *get() { return &Shared; }
+ static IntRecTy *get();
std::string getAsString() const override { return "int"; }
@@ -158,7 +161,7 @@ public:
/// 'string' - Represent an string value
class StringRecTy : public RecTy {
- static StringRecTy Shared;
+ friend detail::RecordContext;
StringRecTy() : RecTy(StringRecTyKind) {}
@@ -167,7 +170,7 @@ public:
return RT->getRecTyKind() == StringRecTyKind;
}
- static StringRecTy *get() { return &Shared; }
+ static StringRecTy *get();
std::string getAsString() const override;
@@ -200,7 +203,7 @@ public:
/// 'dag' - Represent a dag fragment
class DagRecTy : public RecTy {
- static DagRecTy Shared;
+ friend detail::RecordContext;
DagRecTy() : RecTy(DagRecTyKind) {}
@@ -209,7 +212,7 @@ public:
return RT->getRecTyKind() == DagRecTyKind;
}
- static DagRecTy *get() { return &Shared; }
+ static DagRecTy *get();
std::string getAsString() const override;
};
@@ -221,6 +224,7 @@ public:
class RecordRecTy final : public RecTy, public FoldingSetNode,
public TrailingObjects<RecordRecTy, Record *> {
friend class Record;
+ friend detail::RecordContext;
unsigned NumClasses;
@@ -437,6 +441,8 @@ public:
/// '?' - Represents an uninitialized value.
class UnsetInit : public Init {
+ friend detail::RecordContext;
+
UnsetInit() : Init(IK_UnsetInit) {}
public:
@@ -468,9 +474,11 @@ public:
/// 'true'/'false' - Represent a concrete initializer for a bit.
class BitInit final : public TypedInit {
+ friend detail::RecordContext;
+
bool Value;
- explicit BitInit(bool V) : TypedInit(IK_BitInit, BitRecTy::get()), Value(V) {}
+ explicit BitInit(bool V, RecTy *T) : TypedInit(IK_BitInit, T), Value(V) {}
public:
BitInit(const BitInit &) = delete;
@@ -637,7 +645,7 @@ public:
}
StringRef getValue() const { return Value; }
- StringFormat getFormat() const { return Format; }
+ StringFormat getFormat() const { return Format; }
bool hasCodeFormat() const { return Format == SF_Code; }
Init *convertInitializerTo(RecTy *Ty) const override;
@@ -1414,6 +1422,7 @@ private:
SMLoc Loc; // Source location of definition of name.
PointerIntPair<RecTy *, 2, FieldKind> TyAndKind;
Init *Value;
+ bool IsUsed = false;
public:
RecordVal(Init *N, RecTy *T, FieldKind K);
@@ -1458,6 +1467,11 @@ public:
/// Set the value and source location of the field.
bool setValue(Init *V, SMLoc NewLoc);
+ /// Whether this value is used. Useful for reporting warnings, for example
+ /// when a template argument is unused.
+ void setUsed(bool Used) { IsUsed = Used; }
+ bool isUsed() const { return IsUsed; }
+
void dump() const;
/// Print the value to an output stream, possibly with a semicolon.
@@ -1483,8 +1497,6 @@ public:
};
private:
- static unsigned LastID;
-
Init *Name;
// Location where record was instantiated, followed by the location of
// multiclass prototypes used.
@@ -1515,8 +1527,8 @@ public:
// Constructs a record.
explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
bool Anonymous = false, bool Class = false)
- : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
- ID(LastID++), IsAnonymous(Anonymous), IsClass(Class) {
+ : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
+ ID(getNewUID()), IsAnonymous(Anonymous), IsClass(Class) {
checkName();
}
@@ -1528,12 +1540,12 @@ public:
// ID number. Don't copy CorrespondingDefInit either, since it's owned by the
// original record. All other fields can be copied normally.
Record(const Record &O)
- : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
- Values(O.Values), Assertions(O.Assertions), SuperClasses(O.SuperClasses),
- TrackedRecords(O.TrackedRecords), ID(LastID++),
- IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) { }
+ : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
+ Values(O.Values), Assertions(O.Assertions),
+ SuperClasses(O.SuperClasses), TrackedRecords(O.TrackedRecords),
+ ID(getNewUID()), IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) {}
- static unsigned getNewUID() { return LastID++; }
+ static unsigned getNewUID();
unsigned getID() const { return ID; }
@@ -1632,6 +1644,7 @@ public:
}
void checkRecordAssertions();
+ void checkUnusedTemplateArgs();
bool isSubClassOf(const Record *R) const {
for (const auto &SCPair : SuperClasses)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index e3e1d5fc3c65..72c974834a2f 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -225,6 +225,18 @@ def G_FREEZE : GenericInstruction {
let hasSideEffects = false;
}
+def G_LROUND: GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
+def G_LLROUND: GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
//------------------------------------------------------------------------------
// Binary ops.
//------------------------------------------------------------------------------
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f35156d59849..e2d3dbdda88a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -130,7 +130,13 @@ def extending_loads : GICombineRule<
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root,
[{ return Helper.matchCombineExtendingLoads(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
-def combines_for_extload: GICombineGroup<[extending_loads]>;
+
+def load_and_mask : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchCombineLoadWithAndMask(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def combines_for_extload: GICombineGroup<[extending_loads, load_and_mask]>;
def sext_trunc_sextload : GICombineRule<
(defs root:$d),
@@ -197,6 +203,12 @@ def reduce_shl_of_extend : GICombineRule<
[{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>;
+def narrow_binop_feeding_and : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchNarrowBinopFeedingAnd(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// [us]itofp(undef) = 0, because the result value is bounded.
def undef_to_fp_zero : GICombineRule<
(defs root:$root),
@@ -275,7 +287,7 @@ def select_constant_cmp: GICombineRule<
def right_identity_zero: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR,
- G_PTR_ADD):$root,
+ G_PTR_ADD, G_ROTL, G_ROTR):$root,
[{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]),
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
@@ -507,6 +519,13 @@ def fabs_fabs_fold: GICombineRule<
(apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
+// Fold (fabs (fneg x)) -> (fabs x).
+def fabs_fneg_fold: GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FABS):$root,
+ [{ return Helper.matchCombineFAbsOfFNeg(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// Fold (unmerge cst) -> cst1, cst2, ...
def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">;
def unmerge_cst : GICombineRule<
@@ -588,6 +607,14 @@ def load_or_combine : GICombineRule<
[{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">;
+def truncstore_merge : GICombineRule<
+ (defs root:$root, truncstore_merge_matcdata:$info),
+ (match (wip_match_opcode G_STORE):$root,
+ [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]),
+ (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>;
+
def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
def extend_through_phis : GICombineRule<
(defs root:$root, extend_through_phis_matchdata:$matchinfo),
@@ -638,6 +665,18 @@ def icmp_to_true_false_known_bits : GICombineRule<
[{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def icmp_to_lhs_known_bits : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return Helper.matchICmpToLHSKnownBits(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def and_or_disjoint_mask : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchAndOrDisjointMask(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${info}); }])>;
+
def bitfield_extract_from_and : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -652,8 +691,31 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
[{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def bitfield_extract_from_shr : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def bitfield_extract_from_shr_and : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchBitfieldExtractFromShrAnd(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
- bitfield_extract_from_and]>;
+ bitfield_extract_from_and,
+ bitfield_extract_from_shr,
+ bitfield_extract_from_shr_and]>;
+
+def udiv_by_const : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UDIV):$root,
+ [{ return Helper.matchUDivByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivByConst(*${root}); }])>;
+
+def intdiv_combines : GICombineGroup<[udiv_by_const]>;
+
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,
@@ -669,6 +731,26 @@ def constant_fold : GICombineRule<
[{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def mulo_by_2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UMULO, G_SMULO):$root,
+ [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def mulh_to_lshr : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UMULH):$root,
+ [{ return Helper.matchUMulHToLShr(*${root}); }]),
+ (apply [{ Helper.applyUMulHToLShr(*${root}); }])>;
+
+def mulh_combines : GICombineGroup<[mulh_to_lshr]>;
+
+def redundant_neg_operands: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMAD, G_FMA):$root,
+ [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -685,13 +767,14 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
fneg_fneg_fold, right_identity_one]>;
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
- overlapping_and]>;
+ overlapping_and, mulo_by_2]>;
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits]>;
+ zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>;
-def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
+def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
+ narrow_binop_feeding_and]>;
def phi_combines : GICombineGroup<[extend_through_phis]>;
@@ -713,8 +796,10 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
- div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract,
- constant_fold]>;
+ truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
+ form_bitfield_extract, constant_fold, fabs_fneg_fold,
+ intdiv_combines, mulh_combines, redundant_neg_operands,
+ and_or_disjoint_mask ]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 8a5052401e9b..12eee24b578f 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -144,6 +144,8 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
def : GINodeEquiv<G_ROTR, rotr>;
def : GINodeEquiv<G_ROTL, rotl>;
+def : GINodeEquiv<G_LROUND, lround>;
+def : GINodeEquiv<G_LLROUND, llround>;
def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index e9720d765167..7ae690b83770 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -306,6 +306,9 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// the assembly matcher will provide a function to map from diagnostic types
// to message strings.
string DiagnosticString = "";
+
+ // Target-specific flags. This becomes the TSFlags field in TargetRegisterClass.
+ bits<8> TSFlags = 0;
}
// The memberList in a RegisterClass is a dag of set operations. TableGen
@@ -650,6 +653,25 @@ class Instruction : InstructionEncoding {
/// instruction selection predicates. FastISel cannot handle such cases, but
/// SelectionDAG can.
bit FastISelShouldIgnore = false;
+
+ /// HasPositionOrder: Indicate tablegen to sort the instructions by record
+ /// ID, so that instruction that is defined earlier can be sorted earlier
+ /// in the assembly matching table.
+ bit HasPositionOrder = false;
+}
+
+/// Defines a Pat match between compressed and uncompressed instruction.
+/// The relationship and helper function generation are handled by
+/// CompressInstEmitter backend.
+class CompressPat<dag input, dag output, list<Predicate> predicates = []> {
+ /// Uncompressed instruction description.
+ dag Input = input;
+ /// Compressed instruction description.
+ dag Output = output;
+ /// Predicates that must be true for this to match.
+ list<Predicate> Predicates = predicates;
+ /// Duplicate match when tied operand is just different.
+ bit isCompressOnly = false;
}
/// Defines an additional encoding that disassembles to the given instruction
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 93bfdd20e082..752032d3d04d 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegister.h"
#include <cstdint>
namespace llvm {
@@ -219,6 +220,14 @@ public:
return SupportDebugThreadLocalLocation;
}
+ /// Returns the register used as static base in RWPI variants.
+ virtual const MCRegister getStaticBase() const { return MCRegister::NoRegister; }
+
+ /// Get the target specific RWPI relocation.
+ virtual const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const {
+ return nullptr;
+ }
+
/// Get the target specific PC relative GOT entry relocation
virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
const MCSymbol *Sym,
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index dd17af4a642a..acfb265a9ff9 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -13,6 +13,7 @@
#ifndef LLVM_TARGET_TARGETMACHINE_H
#define LLVM_TARGET_TARGETMACHINE_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@@ -20,9 +21,11 @@
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
+#include <utility>
namespace llvm {
@@ -110,6 +113,9 @@ protected: // Can only create subclasses.
unsigned RequireStructuredCFG : 1;
unsigned O0WantsFastISel : 1;
+ // PGO related tunables.
+ Optional<PGOOptions> PGOOption = None;
+
public:
const TargetOptions DefaultOptions;
mutable TargetOptions Options;
@@ -303,6 +309,9 @@ public:
return false;
}
+ void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; }
+ const Optional<PGOOptions> &getPGOOption() const { return PGOOption; }
+
/// If the specified generic pointer could be assumed as a pointer to a
/// specific address space, return that address space.
///
@@ -311,6 +320,18 @@ public:
/// properties.
virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ /// If the specified predicate checks whether a generic pointer falls within
+ /// a specified address space, return that generic pointer and the address
+ /// space being queried.
+ ///
+ /// Such predicates could be specified in @llvm.assume intrinsics for the
+ /// optimizer to assume that the given generic pointer always falls within
+ /// the address space based on that predicate.
+ virtual std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const {
+ return std::make_pair(nullptr, -1);
+ }
+
/// Get a \c TargetIRAnalysis appropriate for the target.
///
/// This is used to construct the new pass manager's target IR analysis pass,
@@ -464,6 +485,10 @@ public:
virtual bool useIPRA() const {
return false;
}
+
+ /// The default variant to use in unqualified `asm` instructions.
+ /// If this returns 0, `asm "$(foo$|bar$)"` will evaluate to `asm "foo"`.
+ virtual int unqualifiedInlineAsmVariant() const { return 0; }
};
/// Helper method for getting the code model, returning Default if
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index e5bea9041479..912f6d1c153a 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -110,12 +110,23 @@ namespace llvm {
DisableWithDiag // Disable the abort but emit a diagnostic on failure.
};
+ /// Indicates when and how the Swift async frame pointer bit should be set.
+ enum class SwiftAsyncFramePointerMode {
+ /// Determine whether to set the bit statically or dynamically based
+ /// on the deployment target.
+ DeploymentBased,
+ /// Always set the bit.
+ Always,
+ /// Never set the bit.
+ Never,
+ };
+
class TargetOptions {
public:
TargetOptions()
: UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
NoTrappingFPMath(true), NoSignedZerosFPMath(false),
- EnableAIXExtendedAltivecABI(false),
+ ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false),
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
@@ -129,7 +140,7 @@ namespace llvm {
EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
EmitAddrsig(false), EmitCallSiteInfo(false),
SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
- PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false),
+ ValueTrackingVariableLocations(false),
ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
DebugStrictDwarf(false),
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
@@ -172,9 +183,15 @@ namespace llvm {
/// argument or result as insignificant.
unsigned NoSignedZerosFPMath : 1;
+ /// ApproxFuncFPMath - This flag is enabled when the
+ /// -enable-approx-func-fp-math is specified on the command line. This
+ /// specifies that optimizations are allowed to substitute math functions
+ /// with approximate calculations
+ unsigned ApproxFuncFPMath : 1;
+
/// EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is
/// specified. The code generator is then able to use both volatile and
- /// nonvolitle vector regisers. When false, the code generator only uses
+ /// nonvolitle vector registers. When false, the code generator only uses
/// volatile vector registers which is the default setting on AIX.
unsigned EnableAIXExtendedAltivecABI : 1;
@@ -219,6 +236,11 @@ namespace llvm {
/// selection fails to lower/select an instruction.
GlobalISelAbortMode GlobalISelAbort = GlobalISelAbortMode::Enable;
+ /// Control when and how the Swift async frame pointer bit should
+ /// be set.
+ SwiftAsyncFramePointerMode SwiftAsyncFramePointer =
+ SwiftAsyncFramePointerMode::Always;
+
/// UseInitArray - Use .init_array instead of .ctors for static
/// constructors.
unsigned UseInitArray : 1;
@@ -305,9 +327,6 @@ namespace llvm {
/// production.
bool ShouldEmitDebugEntryValues() const;
- /// Emit pseudo probes into the binary for sample profiling
- unsigned PseudoProbeForProfiling : 1;
-
// When set to true, use experimental new debug variable location tracking,
// which seeks to follow the values of variables rather than their location,
// post isel.
@@ -328,6 +347,9 @@ namespace llvm {
/// passed on the command line.
std::string StackUsageOutput;
+ /// If greater than 0, override TargetLoweringBase::PrefLoopAlignment.
+ unsigned LoopAlignment = 0;
+
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 44ec2250a9c5..d8ef7c49a5f9 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -297,10 +297,6 @@ def SDTAtomicLoad : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
-def SDTConvertOp : SDTypeProfile<1, 5, [ //cvtss, su, us, uu, ff, fs, fu, sf, su
- SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>, SDTCisPtrTy<4>, SDTCisPtrTy<5>
-]>;
-
class SDCallSeqStart<list<SDTypeConstraint> constraints> :
SDTypeProfile<0, 2, constraints>;
class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
@@ -1050,6 +1046,10 @@ def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = true;
let ScalarMemoryVT = i32;
}
+def extloadvf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let IsLoad = true;
+ let ScalarMemoryVT = f16;
+}
def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = true;
let ScalarMemoryVT = f32;
@@ -1472,7 +1472,7 @@ def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred),
[(strict_fsetccs node:$lhs, node:$rhs, node:$pred),
(setcc node:$lhs, node:$rhs, node:$pred)]>;
-multiclass binary_atomic_op_ord<SDNode atomic_op> {
+multiclass binary_atomic_op_ord {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
let IsAtomic = true;
@@ -1500,7 +1500,7 @@ multiclass binary_atomic_op_ord<SDNode atomic_op> {
}
}
-multiclass ternary_atomic_op_ord<SDNode atomic_op> {
+multiclass ternary_atomic_op_ord {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
let IsAtomic = true;
@@ -1550,10 +1550,10 @@ multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
let MemoryVT = !if(IsInt, i64, f64);
}
- defm NAME#_8 : binary_atomic_op_ord<atomic_op>;
- defm NAME#_16 : binary_atomic_op_ord<atomic_op>;
- defm NAME#_32 : binary_atomic_op_ord<atomic_op>;
- defm NAME#_64 : binary_atomic_op_ord<atomic_op>;
+ defm NAME#_8 : binary_atomic_op_ord;
+ defm NAME#_16 : binary_atomic_op_ord;
+ defm NAME#_32 : binary_atomic_op_ord;
+ defm NAME#_64 : binary_atomic_op_ord;
}
multiclass ternary_atomic_op<SDNode atomic_op> {
@@ -1578,10 +1578,10 @@ multiclass ternary_atomic_op<SDNode atomic_op> {
let MemoryVT = i64;
}
- defm NAME#_8 : ternary_atomic_op_ord<atomic_op>;
- defm NAME#_16 : ternary_atomic_op_ord<atomic_op>;
- defm NAME#_32 : ternary_atomic_op_ord<atomic_op>;
- defm NAME#_64 : ternary_atomic_op_ord<atomic_op>;
+ defm NAME#_8 : ternary_atomic_op_ord;
+ defm NAME#_16 : ternary_atomic_op_ord;
+ defm NAME#_32 : ternary_atomic_op_ord;
+ defm NAME#_64 : ternary_atomic_op_ord;
}
defm atomic_load_add : binary_atomic_op<atomic_load_add>;
diff --git a/llvm/include/llvm/TextAPI/Architecture.h b/llvm/include/llvm/TextAPI/Architecture.h
index 3cd8a3a19e96..978359995074 100644
--- a/llvm/include/llvm/TextAPI/Architecture.h
+++ b/llvm/include/llvm/TextAPI/Architecture.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
-#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#ifndef LLVM_TEXTAPI_ARCHITECTURE_H
+#define LLVM_TEXTAPI_ARCHITECTURE_H
#include <cstdint>
#include <utility>
@@ -54,4 +54,4 @@ raw_ostream &operator<<(raw_ostream &OS, Architecture Arch);
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#endif // LLVM_TEXTAPI_ARCHITECTURE_H
diff --git a/llvm/include/llvm/TextAPI/ArchitectureSet.h b/llvm/include/llvm/TextAPI/ArchitectureSet.h
index e9b374e4f69f..f17cb74c9183 100644
--- a/llvm/include/llvm/TextAPI/ArchitectureSet.h
+++ b/llvm/include/llvm/TextAPI/ArchitectureSet.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
-#define LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
+#ifndef LLVM_TEXTAPI_ARCHITECTURESET_H
+#define LLVM_TEXTAPI_ARCHITECTURESET_H
#include "llvm/TextAPI/Architecture.h"
#include <cstddef>
@@ -168,4 +168,4 @@ raw_ostream &operator<<(raw_ostream &OS, ArchitectureSet Set);
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
+#endif // LLVM_TEXTAPI_ARCHITECTURESET_H
diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h
index d17c0c1c5b47..03a541454e1a 100644
--- a/llvm/include/llvm/TextAPI/InterfaceFile.h
+++ b/llvm/include/llvm/TextAPI/InterfaceFile.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
-#define LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
+#ifndef LLVM_TEXTAPI_INTERFACEFILE_H
+#define LLVM_TEXTAPI_INTERFACEFILE_H
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMap.h"
@@ -445,7 +445,7 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
KeyInfoT, BucketT> &RHS) {
if (LHS.size() != RHS.size())
return false;
- for (auto KV : LHS) {
+ for (const auto &KV : LHS) {
auto I = RHS.find(KV.first);
if (I == RHS.end() || *I->second != *KV.second)
return false;
@@ -456,4 +456,4 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
+#endif // LLVM_TEXTAPI_INTERFACEFILE_H
diff --git a/llvm/include/llvm/TextAPI/PackedVersion.h b/llvm/include/llvm/TextAPI/PackedVersion.h
index e3d2bd5ae2e5..24bec2ebe8fc 100644
--- a/llvm/include/llvm/TextAPI/PackedVersion.h
+++ b/llvm/include/llvm/TextAPI/PackedVersion.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
-#define LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
+#ifndef LLVM_TEXTAPI_PACKEDVERSION_H
+#define LLVM_TEXTAPI_PACKEDVERSION_H
#include <cstdint>
#include <utility>
@@ -64,4 +64,4 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PackedVersion &Version) {
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
+#endif // LLVM_TEXTAPI_PACKEDVERSION_H
diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h
index 3f052b7b8624..f7affc3ae980 100644
--- a/llvm/include/llvm/TextAPI/Platform.h
+++ b/llvm/include/llvm/TextAPI/Platform.h
@@ -9,8 +9,8 @@
// Defines the Platforms supported by Tapi and helpers.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H
-#define LLVM_TEXTAPI_MACHO_PLATFORM_H
+#ifndef LLVM_TEXTAPI_PLATFORM_H
+#define LLVM_TEXTAPI_PLATFORM_H
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -46,4 +46,4 @@ std::string getOSAndEnvironmentName(PlatformKind Platform,
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H
+#endif // LLVM_TEXTAPI_PLATFORM_H
diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h
index 02f184d2502f..dfc84908bba2 100644
--- a/llvm/include/llvm/TextAPI/Symbol.h
+++ b/llvm/include/llvm/TextAPI/Symbol.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_SYMBOL_H
-#define LLVM_TEXTAPI_MACHO_SYMBOL_H
+#ifndef LLVM_TEXTAPI_SYMBOL_H
+#define LLVM_TEXTAPI_SYMBOL_H
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringRef.h"
@@ -132,4 +132,4 @@ private:
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_SYMBOL_H
+#endif // LLVM_TEXTAPI_SYMBOL_H
diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h
index 53f56a6ee7b0..c2588b9d5a21 100644
--- a/llvm/include/llvm/TextAPI/Target.h
+++ b/llvm/include/llvm/TextAPI/Target.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_TARGET_H
-#define LLVM_TEXTAPI_MACHO_TARGET_H
+#ifndef LLVM_TEXTAPI_TARGET_H
+#define LLVM_TEXTAPI_TARGET_H
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Error.h"
@@ -67,4 +67,4 @@ raw_ostream &operator<<(raw_ostream &OS, const Target &Target);
} // namespace MachO
} // namespace llvm
-#endif // LLVM_TEXTAPI_MACHO_TARGET_H
+#endif // LLVM_TEXTAPI_TARGET_H
diff --git a/llvm/include/llvm/TextAPI/TextAPIReader.h b/llvm/include/llvm/TextAPI/TextAPIReader.h
index a403bab8465d..389335312a74 100644
--- a/llvm/include/llvm/TextAPI/TextAPIReader.h
+++ b/llvm/include/llvm/TextAPI/TextAPIReader.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
-#define LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
+#ifndef LLVM_TEXTAPI_TEXTAPIREADER_H
+#define LLVM_TEXTAPI_TEXTAPIREADER_H
#include "llvm/Support/Error.h"
@@ -30,4 +30,4 @@ public:
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
+#endif // LLVM_TEXTAPI_TEXTAPIREADER_H
diff --git a/llvm/include/llvm/TextAPI/TextAPIWriter.h b/llvm/include/llvm/TextAPI/TextAPIWriter.h
index 763805168ae6..f9857a806f60 100644
--- a/llvm/include/llvm/TextAPI/TextAPIWriter.h
+++ b/llvm/include/llvm/TextAPI/TextAPIWriter.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
-#define LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
+#ifndef LLVM_TEXTAPI_TEXTAPIWRITER_H
+#define LLVM_TEXTAPI_TEXTAPIWRITER_H
namespace llvm {
@@ -28,4 +28,4 @@ public:
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
+#endif // LLVM_TEXTAPI_TEXTAPIWRITER_H
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c93b8adcc890..d4cbc9bd20b7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -101,6 +101,7 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
@@ -591,7 +592,7 @@ struct IRPosition {
LLVMContext &Ctx = getAnchorValue().getContext();
for (Attribute::AttrKind AK : AKs)
- AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
+ AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK);
if (CB)
CB->setAttributes(AttrList);
@@ -1150,8 +1151,6 @@ struct Attributor {
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions.
/// \param RewriteSignatures Whether to rewrite function signatures.
- /// \param MaxFixedPointIterations Maximum number of iterations to run until
- /// fixpoint.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
CallGraphUpdater &CGUpdater,
DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
@@ -1169,8 +1168,9 @@ struct Attributor {
/// \param CGUpdater Helper to update an underlying call graph.
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions
- /// \param MaxFixedPointIterations Maximum number of iterations to run until
- /// fixpoint.
+ /// \param RewriteSignatures Whether to rewrite function signatures.
+ /// \param MaxFixpointIterations Maximum number of iterations to run until
+ /// fixpoint.
/// \param OREGetter A callback function that returns an ORE object from a
/// Function pointer.
/// \param PassName The name of the pass emitting remarks.
@@ -1855,6 +1855,10 @@ public:
///
static void createShallowWrapper(Function &F);
+ /// Returns true if the function \p F can be internalized. i.e. it has a
+ /// compatible linkage.
+ static bool isInternalizable(Function &F);
+
/// Make another copy of the function \p F such that the copied version has
/// internal linkage afterwards and can be analysed. Then we replace all uses
/// of the original function to the copied one
@@ -1870,6 +1874,22 @@ public:
/// null pointer.
static Function *internalizeFunction(Function &F, bool Force = false);
+ /// Make copies of each function in the set \p FnSet such that the copied
+ /// version has internal linkage afterwards and can be analysed. Then we
+ /// replace all uses of the original function to the copied one. The map
+ /// \p FnMap contains a mapping of functions to their internalized versions.
+ ///
+ /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
+ /// linkage can be internalized because these linkages guarantee that other
+ /// definitions with the same name have the same semantics as this one.
+ ///
+ /// This version will internalize all the functions in the set \p FnSet at
+ /// once and then replace the uses. This prevents internalized functions being
+ /// called by external functions when there is an internalized version in the
+ /// module.
+ static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+ DenseMap<Function *, Function *> &FnMap);
+
/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }
@@ -2492,6 +2512,139 @@ struct IntegerRangeState : public AbstractState {
return *this;
}
};
+
+/// Simple state for a set.
+///
+/// This represents a state containing a set of values. The interface supports
+/// modelling sets that contain all possible elements. The state's internal
+/// value is modified using union or intersection operations.
+template <typename BaseTy> struct SetState : public AbstractState {
+ /// A wrapper around a set that has semantics for handling unions and
+ /// intersections with a "universal" set that contains all elements.
+ struct SetContents {
+ /// Creates a universal set with no concrete elements or an empty set.
+ SetContents(bool Universal) : Universal(Universal) {}
+
+ /// Creates a non-universal set with concrete values.
+ SetContents(const DenseSet<BaseTy> &Assumptions)
+ : Universal(false), Set(Assumptions) {}
+
+ SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions)
+ : Universal(Universal), Set(Assumptions) {}
+
+ const DenseSet<BaseTy> &getSet() const { return Set; }
+
+ bool isUniversal() const { return Universal; }
+
+ bool empty() const { return Set.empty() && !Universal; }
+
+ /// Finds A := A ^ B where A or B could be the "Universal" set which
+ /// contains every possible attribute. Returns true if changes were made.
+ bool getIntersection(const SetContents &RHS) {
+ bool IsUniversal = Universal;
+ unsigned Size = Set.size();
+
+ // A := A ^ U = A
+ if (RHS.isUniversal())
+ return false;
+
+ // A := U ^ B = B
+ if (Universal)
+ Set = RHS.getSet();
+ else
+ set_intersect(Set, RHS.getSet());
+
+ Universal &= RHS.isUniversal();
+ return IsUniversal != Universal || Size != Set.size();
+ }
+
+ /// Finds A := A u B where A or B could be the "Universal" set which
+ /// contains every possible attribute. returns true if changes were made.
+ bool getUnion(const SetContents &RHS) {
+ bool IsUniversal = Universal;
+ unsigned Size = Set.size();
+
+ // A := A u U = U = U u B
+ if (!RHS.isUniversal() && !Universal)
+ set_union(Set, RHS.getSet());
+
+ Universal |= RHS.isUniversal();
+ return IsUniversal != Universal || Size != Set.size();
+ }
+
+ private:
+ /// Indicates if this set is "universal", containing every possible element.
+ bool Universal;
+
+ /// The set of currently active assumptions.
+ DenseSet<BaseTy> Set;
+ };
+
+ SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {}
+
+ /// Initializes the known state with an initial set and initializes the
+ /// assumed state as universal.
+ SetState(const DenseSet<BaseTy> &Known)
+ : Known(Known), Assumed(true), IsAtFixedpoint(false) {}
+
+ /// See AbstractState::isValidState()
+ bool isValidState() const override { return !Assumed.empty(); }
+
+ /// See AbstractState::isAtFixpoint()
+ bool isAtFixpoint() const override { return IsAtFixedpoint; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...)
+ ChangeStatus indicateOptimisticFixpoint() override {
+ IsAtFixedpoint = true;
+ Known = Assumed;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...)
+ ChangeStatus indicatePessimisticFixpoint() override {
+ IsAtFixedpoint = true;
+ Assumed = Known;
+ return ChangeStatus::CHANGED;
+ }
+
+ /// Return the known state encoding.
+ const SetContents &getKnown() const { return Known; }
+
+ /// Return the assumed state encoding.
+ const SetContents &getAssumed() const { return Assumed; }
+
+ /// Returns if the set state contains the element.
+ bool setContains(const BaseTy &Elem) const {
+ return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem);
+ }
+
+ /// Performs the set intersection between this set and \p RHS. Returns true if
+ /// changes were made.
+ bool getIntersection(const SetContents &RHS) {
+ unsigned SizeBefore = Assumed.getSet().size();
+
+ // Get intersection and make sure that the known set is still a proper
+ // subset of the assumed set. A := K u (A ^ R).
+ Assumed.getIntersection(RHS);
+ Assumed.getUnion(Known);
+
+ return SizeBefore != Assumed.getSet().size();
+ }
+
+ /// Performs the set union between this set and \p RHS. Returns true if
+ /// changes were made.
+ bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); }
+
+private:
+ /// The set of values known for this state.
+ SetContents Known;
+
+ /// The set of assumed values for this state.
+ SetContents Assumed;
+
+ bool IsAtFixedpoint;
+};
+
/// Helper struct necessary as the modular build fails if the virtual method
/// IRAttribute::manifest is defined in the Attributor.cpp.
struct IRAttributeManifest {
@@ -3394,7 +3547,7 @@ struct AADereferenceable
};
using AAAlignmentStateType =
- IncIntegerState<uint32_t, Value::MaximumAlignment, 1>;
+ IncIntegerState<uint64_t, Value::MaximumAlignment, 1>;
/// An abstract interface for all align attributes.
struct AAAlign : public IRAttribute<
Attribute::Alignment,
@@ -3402,10 +3555,10 @@ struct AAAlign : public IRAttribute<
AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return assumed alignment.
- unsigned getAssumedAlign() const { return getAssumed(); }
+ uint64_t getAssumedAlign() const { return getAssumed(); }
/// Return known alignment.
- unsigned getKnownAlign() const { return getKnown(); }
+ uint64_t getKnownAlign() const { return getKnown(); }
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAlign"; }
@@ -3776,7 +3929,7 @@ struct AAMemoryLocation
/// Return true if we assume that the associated functions has no observable
/// accesses.
bool isAssumedReadNone() const {
- return isAssumed(NO_LOCATIONS) | isAssumedStackOnly();
+ return isAssumed(NO_LOCATIONS) || isAssumedStackOnly();
}
/// Return true if we know that the associated functions has at most
@@ -3920,19 +4073,19 @@ struct AAValueConstantRange
static AAValueConstantRange &createForPosition(const IRPosition &IRP,
Attributor &A);
- /// Return an assumed range for the assocaited value a program point \p CtxI.
+ /// Return an assumed range for the associated value a program point \p CtxI.
/// If \p I is nullptr, simply return an assumed range.
virtual ConstantRange
getAssumedConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const = 0;
- /// Return a known range for the assocaited value at a program point \p CtxI.
+ /// Return a known range for the associated value at a program point \p CtxI.
/// If \p I is nullptr, simply return a known range.
virtual ConstantRange
getKnownConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const = 0;
- /// Return an assumed constant for the assocaited value a program point \p
+ /// Return an assumed constant for the associated value a program point \p
/// CtxI.
Optional<ConstantInt *>
getAssumedConstantInt(Attributor &A,
@@ -4435,6 +4588,9 @@ struct AAFunctionReachability
/// If the function represented by this possition can reach \p Fn.
virtual bool canReach(Attributor &A, Function *Fn) const = 0;
+ /// Can \p CB reach \p Fn
+ virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0;
+
/// Create an abstract attribute view for the position \p IRP.
static AAFunctionReachability &createForPosition(const IRPosition &IRP,
Attributor &A);
@@ -4587,6 +4743,40 @@ struct AAPointerInfo : public AbstractAttribute {
static const char ID;
};
+/// An abstract attribute for getting assumption information.
+struct AAAssumptionInfo
+ : public StateWrapper<SetState<StringRef>, AbstractAttribute,
+ DenseSet<StringRef>> {
+ using Base =
+ StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>;
+
+ AAAssumptionInfo(const IRPosition &IRP, Attributor &A,
+ const DenseSet<StringRef> &Known)
+ : Base(IRP, Known) {}
+
+ /// Returns true if the assumption set contains the assumption \p Assumption.
+ virtual bool hasAssumption(const StringRef Assumption) const = 0;
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAssumptionInfo &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAAssumptionInfo"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAssumptionInfo
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
/// Run options, used by the pass manager.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index ce61eea05c79..0b6734a3929d 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -17,6 +17,7 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
@@ -38,6 +39,13 @@ enum MemoryAccessKind {
/// Returns the memory access properties of this copy of the function.
MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR);
+/// Propagate function attributes for function summaries along the index's
+/// callgraph during thinlink
+bool thinLTOPropagateFunctionAttrs(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing);
+
/// Computes function attributes in post-order over the call graph.
///
/// By operating in post-order, this pass computes precise attributes for
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index aad938d48570..c5bafb89fcb5 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -167,16 +167,24 @@ void ComputeCrossModuleImportForModuleFromIndex(
FunctionImporter::ImportMapTy &ImportList);
/// PrevailingType enum used as a return type of callback passed
-/// to computeDeadSymbols. Yes and No values used when status explicitly
-/// set by symbols resolution, otherwise status is Unknown.
+/// to computeDeadSymbolsAndUpdateIndirectCalls. Yes and No values used when
+/// status explicitly set by symbols resolution, otherwise status is Unknown.
enum class PrevailingType { Yes, No, Unknown };
+/// Update call edges for indirect calls to local functions added from
+/// SamplePGO when needed. Normally this is done during
+/// computeDeadSymbolsAndUpdateIndirectCalls, but can be called standalone
+/// when that is not called (e.g. during testing).
+void updateIndirectCalls(ModuleSummaryIndex &Index);
+
/// Compute all the symbols that are "dead": i.e these that can't be reached
/// in the graph from any of the given symbols listed in
/// \p GUIDPreservedSymbols. Non-prevailing symbols are symbols without a
/// prevailing copy anywhere in IR and are normally dead, \p isPrevailing
/// predicate returns status of symbol.
-void computeDeadSymbols(
+/// Also update call edges for indirect calls to local functions added from
+/// SamplePGO when needed.
+void computeDeadSymbolsAndUpdateIndirectCalls(
ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing);
@@ -214,12 +222,15 @@ std::error_code EmitImportsFiles(
StringRef ModulePath, StringRef OutputFilename,
const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
-/// Resolve prevailing symbol linkages and constrain visibility (1. CanAutoHide,
-/// 2. consider visibility from other definitions for ELF) in \p TheModule based
-/// on the information recorded in the summaries during global summary-based
-/// analysis.
-void thinLTOResolvePrevailingInModule(Module &TheModule,
- const GVSummaryMapTy &DefinedGlobals);
+/// Based on the information recorded in the summaries during global
+/// summary-based analysis:
+/// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide
+/// and consider visibility from other definitions for ELF) in \p TheModule
+/// 2. (optional) Apply propagated function attributes to \p TheModule if
+/// PropagateAttrs is true
+void thinLTOFinalizeInModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals,
+ bool PropagateAttrs);
/// Internalize \p TheModule based on the information recorded in the summaries
/// during global summary-based analysis.
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 442a8ec1d2e2..110c0b4dcf16 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -86,6 +86,15 @@ struct OutlinableRegion {
DenseMap<unsigned, unsigned> ExtractedArgToAgg;
DenseMap<unsigned, unsigned> AggArgToExtracted;
+ /// Marks whether we need to change the order of the arguments when mapping
+ /// the old extracted function call to the new aggregate outlined function
+ /// call.
+ bool ChangedArgOrder = false;
+
+ /// Marks whether this region ends in a branch, there is special handling
+ /// required for the following basic blocks in this case.
+ bool EndsInBranch = false;
+
/// Mapping of the argument number in the deduplicated function
/// to a given constant, which is used when creating the arguments to the call
/// to the newly created deduplicated function. This is handled separately
@@ -147,6 +156,14 @@ struct OutlinableRegion {
/// containing the called function.
void reattachCandidate();
+ /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
+ ///
+ /// \param Other [in] - The OutlinableRegion to find the corresponding Value
+ /// in.
+ /// \param V [in] - The Value to look for in the other region.
+ /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
+ Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
+
/// Get the size of the code removed from the region.
///
/// \param [in] TTI - The TargetTransformInfo for the parent function.
@@ -176,6 +193,16 @@ private:
/// \returns The number of Functions created.
unsigned doOutline(Module &M);
+ /// Check whether an OutlinableRegion is incompatible with code already
+ /// outlined. OutlinableRegions are incomptaible when there are overlapping
+ /// instructions, or code that has not been recorded has been added to the
+ /// instructions.
+ ///
+ /// \param [in] Region - The OutlinableRegion to check for conflicts with
+ /// already outlined code.
+ /// \returns whether the region can safely be outlined.
+ bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
+
/// Remove all the IRSimilarityCandidates from \p CandidateVec that have
/// instructions contained in a previously outlined region and put the
/// remaining regions in \p CurrentGroup.
@@ -301,8 +328,9 @@ private:
struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
InstructionAllowed() {}
- // TODO: Determine a scheme to resolve when the label is similar enough.
- bool visitBranchInst(BranchInst &BI) { return false; }
+ bool visitBranchInst(BranchInst &BI) {
+ return EnableBranches;
+ }
// TODO: Determine a scheme to resolve when the labels are similar enough.
bool visitPHINode(PHINode &PN) { return false; }
// TODO: Handle allocas.
@@ -341,6 +369,10 @@ private:
// TODO: Handle interblock similarity.
bool visitTerminator(Instruction &I) { return false; }
bool visitInstruction(Instruction &I) { return true; }
+
+ // The flag variable that marks whether we should allow branch instructions
+ // to be outlined.
+ bool EnableBranches = false;
};
/// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
index 23a39d7f2e2b..a7060943c4c0 100644
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -14,7 +14,6 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/PassManager.h"
#include <utility>
@@ -103,6 +102,9 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
private:
InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M);
@@ -130,17 +132,27 @@ public:
/// before run is called, as part of pass pipeline building.
CGSCCPassManager &getPM() { return PM; }
- /// Allow adding module-level passes benefiting the contained CGSCC passes.
+ /// Add a module pass that runs before the CGSCC passes.
template <class T> void addModulePass(T Pass) {
MPM.addPass(std::move(Pass));
}
+ /// Add a module pass that runs after the CGSCC passes.
+ template <class T> void addLateModulePass(T Pass) {
+ AfterCGMPM.addPass(std::move(Pass));
+ }
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
private:
const InlineParams Params;
const InliningAdvisorMode Mode;
const unsigned MaxDevirtIterations;
+ // TODO: Clean this up so we only have one ModulePassManager.
CGSCCPassManager PM;
ModulePassManager MPM;
+ ModulePassManager AfterCGMPM;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
index def3c5943919..aa697484d0e9 100644
--- a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
+++ b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
@@ -23,6 +23,8 @@ namespace llvm {
struct LoopExtractorPass : public PassInfoMixin<LoopExtractorPass> {
LoopExtractorPass(unsigned NumLoops = ~0) : NumLoops(NumLoops) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
private:
unsigned NumLoops;
diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
new file mode 100644
index 000000000000..963d74d71003
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
@@ -0,0 +1,51 @@
+//===- ModuleInliner.h - Module level Inliner pass --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+#define LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/IR/PassManager.h"
+#include <utility>
+
+namespace llvm {
+
+class AssumptionCacheTracker;
+class ProfileSummaryInfo;
+
+/// The module inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a module pass. Different from SCC inliner, it considers every
+/// call in every function in the whole module and tries to inline if
+/// profitable. With this module level inliner, it is possible to evaluate more
+/// heuristics in the module level such like PriorityInlineOrder. It can be
+/// tuned with a number of parameters to control what cost model is used and
+/// what tradeoffs are made when making the decision.
+class ModuleInlinerPass : public PassInfoMixin<ModuleInlinerPass> {
+public:
+ ModuleInlinerPass(InlineParams Params = getInlineParams(),
+ InliningAdvisorMode Mode = InliningAdvisorMode::Default)
+ : Params(Params), Mode(Mode){};
+ ModuleInlinerPass(ModuleInlinerPass &&Arg) = default;
+
+ PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+
+private:
+ InlineAdvisor &getAdvisor(const ModuleAnalysisManager &MAM,
+ FunctionAnalysisManager &FAM, Module &M);
+ std::unique_ptr<InlineAdvisor> OwnedAdvisor;
+ const InlineParams Params;
+ const InliningAdvisorMode Mode;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_MODULEINLINER_H
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 4f941d26df4c..7f321a688aff 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -154,7 +154,6 @@ public:
/// tests.
const ModuleSummaryIndex *ImportSummary = nullptr;
- bool DisableTailCalls;
bool DisableUnrollLoops;
bool CallGraphProfile;
bool SLPVectorize;
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 0adaa1b16d54..6e45f8f6fb05 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H
-#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H
+#ifndef LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
+#define LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/StringMap.h"
@@ -42,7 +42,7 @@ public:
using iterator = std::set<ProfiledCallGraphNode *>::iterator;
// Constructor for non-CS profile.
- ProfiledCallGraph(StringMap<FunctionSamples> &ProfileMap) {
+ ProfiledCallGraph(SampleProfileMap &ProfileMap) {
assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here");
for (const auto &Samples : ProfileMap) {
addProfiledCalls(Samples.second);
@@ -56,7 +56,7 @@ public:
std::queue<ContextTrieNode *> Queue;
for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) {
ContextTrieNode *Callee = &Child.second;
- addProfiledFunction(Callee->getFuncName());
+ addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
}
@@ -72,9 +72,10 @@ public:
// context-based one, which may in turn block context-based inlining.
for (auto &Child : Caller->getAllChildContext()) {
ContextTrieNode *Callee = &Child.second;
- addProfiledFunction(Callee->getFuncName());
+ addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
- addProfiledCall(Caller->getFuncName(), Callee->getFuncName());
+ addProfiledCall(ContextTracker.getFuncNameFor(Caller),
+ ContextTracker.getFuncNameFor(Callee));
}
}
}
diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 94f7796298db..5d80da407d7e 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -42,31 +42,34 @@ public:
: ParentContext(Parent), FuncName(FName), FuncSamples(FSamples),
CallSiteLoc(CallLoc){};
ContextTrieNode *getChildContext(const LineLocation &CallSite,
- StringRef CalleeName);
+ StringRef ChildName);
ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
- StringRef CalleeName,
+ StringRef ChildName,
bool AllowCreate = true);
ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
ContextTrieNode &&NodeToMove,
- StringRef ContextStrToRemove,
+ uint32_t ContextFramesToRemove,
bool DeleteNode = true);
- void removeChildContext(const LineLocation &CallSite, StringRef CalleeName);
- std::map<uint32_t, ContextTrieNode> &getAllChildContext();
+ void removeChildContext(const LineLocation &CallSite, StringRef ChildName);
+ std::map<uint64_t, ContextTrieNode> &getAllChildContext();
StringRef getFuncName() const;
FunctionSamples *getFunctionSamples() const;
void setFunctionSamples(FunctionSamples *FSamples);
+ Optional<uint32_t> getFunctionSize() const;
+ void addFunctionSize(uint32_t FSize);
LineLocation getCallSiteLoc() const;
ContextTrieNode *getParentContext() const;
void setParentContext(ContextTrieNode *Parent);
- void dump();
+ void dumpNode();
+ void dumpTree();
private:
- static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
+ static uint64_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
// Map line+discriminator location to child context
- std::map<uint32_t, ContextTrieNode> AllChildContext;
+ std::map<uint64_t, ContextTrieNode> AllChildContext;
// Link to parent context node
ContextTrieNode *ParentContext;
@@ -77,6 +80,9 @@ private:
// Function Samples for current context
FunctionSamples *FuncSamples;
+ // Function size for current context
+ Optional<uint32_t> FuncSize;
+
// Callsite location in parent context
LineLocation CallSiteLoc;
};
@@ -90,9 +96,22 @@ private:
// calling context and the context is identified by path from root to the node.
class SampleContextTracker {
public:
- using ContextSamplesTy = SmallVector<FunctionSamples *, 16>;
-
- SampleContextTracker(StringMap<FunctionSamples> &Profiles);
+ struct ProfileComparer {
+ bool operator()(FunctionSamples *A, FunctionSamples *B) const {
+ // Sort function profiles by the number of total samples and their
+ // contexts.
+ if (A->getTotalSamples() == B->getTotalSamples())
+ return A->getContext() < B->getContext();
+ return A->getTotalSamples() > B->getTotalSamples();
+ }
+ };
+
+ // Keep profiles of a function sorted so that they will be processed/promoted
+ // deterministically.
+ using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
+
+ SampleContextTracker(SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
// Query context profile for a specific callee with given name at a given
// call-site. The full context is identified by location of call instruction.
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
@@ -116,6 +135,8 @@ public:
FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true);
// Retrieve the context trie node for given profile context
ContextTrieNode *getContextFor(const SampleContext &Context);
+ // Get real function name for a given trie node.
+ StringRef getFuncNameFor(ContextTrieNode *Node) const;
// Mark a context profile as inlined when function is inlined.
// This makes sure that inlined context profile will be excluded in
// function's base profile.
@@ -136,14 +157,18 @@ private:
ContextTrieNode &addTopLevelContextNode(StringRef FName);
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
- StringRef ContextStrToRemove);
- ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
- ContextTrieNode &ToNodeParent,
- StringRef ContextStrToRemove);
+ uint32_t ContextFramesToRemove);
+ ContextTrieNode &
+ promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
+ ContextTrieNode &ToNodeParent,
+ uint32_t ContextFramesToRemove);
// Map from function name to context profiles (excluding base profile)
StringMap<ContextSamplesTy> FuncToCtxtProfiles;
+ // Map from function guid to real function names. Only used in md5 mode.
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;
+
// Root node for context trie tree
ContextTrieNode RootContext;
};
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index af6d2a18a25a..6dee38c83b36 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -18,12 +18,14 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
namespace llvm {
class InstCombinePass : public PassInfoMixin<InstCombinePass> {
- InstCombineWorklist Worklist;
+ InstructionWorklist Worklist;
const unsigned MaxIterations;
public:
@@ -38,7 +40,7 @@ public:
/// This is a basic whole-function wrapper around the instcombine utility. It
/// will try to combine all instructions in the function.
class InstructionCombiningPass : public FunctionPass {
- InstCombineWorklist Worklist;
+ InstructionWorklist Worklist;
const unsigned MaxIterations;
public:
@@ -67,4 +69,6 @@ FunctionPass *createInstructionCombiningPass();
FunctionPass *createInstructionCombiningPass(unsigned MaxIterations);
}
+#undef DEBUG_TYPE
+
#endif
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index ba0d41f9b748..c6aee439b5a0 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -25,10 +25,10 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include <cassert>
#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
namespace llvm {
@@ -43,7 +43,9 @@ class TargetTransformInfo;
/// This class provides both the logic to recursively visit instructions and
/// combine them.
class LLVM_LIBRARY_VISIBILITY InstCombiner {
- /// Only used to call target specific inst combining.
+ /// Only used to call target specific intrinsic combining.
+ /// It must **NOT** be used for any other purpose, as InstCombine is a
+ /// target-independent canonicalization transform.
TargetTransformInfo &TTI;
public:
@@ -57,7 +59,7 @@ public:
protected:
/// A worklist of the instructions that need to be simplified.
- InstCombineWorklist &Worklist;
+ InstructionWorklist &Worklist;
// Mode in which we are running the combiner.
const bool MinimizeSize;
@@ -81,7 +83,7 @@ protected:
bool MadeIRChange = false;
public:
- InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
+ InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
@@ -165,16 +167,16 @@ public:
switch (Pred) {
case ICmpInst::ICMP_SLT: // True if LHS s< 0
TrueIfSigned = true;
- return RHS.isNullValue();
+ return RHS.isZero();
case ICmpInst::ICMP_SLE: // True if LHS s<= -1
TrueIfSigned = true;
- return RHS.isAllOnesValue();
+ return RHS.isAllOnes();
case ICmpInst::ICMP_SGT: // True if LHS s> -1
TrueIfSigned = false;
- return RHS.isAllOnesValue();
+ return RHS.isAllOnes();
case ICmpInst::ICMP_SGE: // True if LHS s>= 0
TrueIfSigned = false;
- return RHS.isNullValue();
+ return RHS.isZero();
case ICmpInst::ICMP_UGT:
// True if LHS u> RHS and RHS == sign-bit-mask - 1
TrueIfSigned = true;
@@ -246,12 +248,13 @@ public:
// If `V` is of the form `A + Constant` then `-1 - V` can be folded into
// `(-1 - Constant) - A` if we are willing to invert all of the uses.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if (BO->getOpcode() == Instruction::Add ||
- BO->getOpcode() == Instruction::Sub)
- if (match(BO, PatternMatch::m_c_BinOp(PatternMatch::m_Value(),
- PatternMatch::m_ImmConstant())))
- return WillInvertAllUses;
+ if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant())))
+ return WillInvertAllUses;
+
+ // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
+ // `A + (-1 - Constant)` if we are willing to invert all of the uses.
+ if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value())))
+ return WillInvertAllUses;
// Selects with invertible operands are freely invertible
if (match(V,
@@ -259,6 +262,12 @@ public:
m_Not(PatternMatch::m_Value()))))
return WillInvertAllUses;
+ // Min/max may be in the form of intrinsics, so handle those identically
+ // to select patterns.
+ if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()),
+ m_Not(PatternMatch::m_Value()))))
+ return WillInvertAllUses;
+
return false;
}
@@ -354,14 +363,6 @@ public:
return ConstantVector::get(Out);
}
- /// Create and insert the idiom we use to indicate a block is unreachable
- /// without having to rewrite the CFG from within InstCombine.
- static void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
- auto &Ctx = InsertAt->getContext();
- new StoreInst(ConstantInt::getTrue(Ctx),
- UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt);
- }
-
void addToWorklist(Instruction *I) { Worklist.push(I); }
AssumptionCache &getAssumptionCache() const { return AC; }
@@ -479,6 +480,11 @@ public:
return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
}
+ unsigned ComputeMinSignedBits(const Value *Op, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
+ return llvm::ComputeMinSignedBits(Op, DL, Depth, &AC, CxtI, &DT);
+ }
+
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
const Instruction *CxtI) const {
diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h
index 03108bacb0da..a288a3972c3d 100644
--- a/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation.h
@@ -78,7 +78,7 @@ struct GCOVOptions {
ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
GCOVOptions::getDefault());
-// PGO Instrumention. Parameter IsCS indicates if this is the context senstive
+// PGO Instrumention. Parameter IsCS indicates if this is the context sensitive
// instrumentation.
ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
ModulePass *
@@ -138,7 +138,7 @@ struct InstrProfOptions {
};
/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
-// this is the context senstive instrumentation.
+// this is the context sensitive instrumentation.
ModulePass *createInstrProfilingLegacyPass(
const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
@@ -169,6 +169,8 @@ struct SanitizerCoverageOptions {
bool PCTable = false;
bool NoPrune = false;
bool StackDepth = false;
+ bool TraceLoads = false;
+ bool TraceStores = false;
SanitizerCoverageOptions() = default;
};
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index 3781253d2694..c13407a44091 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -1,9 +1,8 @@
//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -90,6 +89,14 @@ private:
static AnalysisKey Key;
};
+struct AddressSanitizerOptions {
+ bool CompileKernel = false;
+ bool Recover = false;
+ bool UseAfterScope = false;
+ AsanDetectStackUseAfterReturnMode UseAfterReturn =
+ AsanDetectStackUseAfterReturnMode::Runtime;
+};
+
/// Public interface to the address sanitizer pass for instrumenting code to
/// check for various memory errors at runtime.
///
@@ -99,19 +106,15 @@ private:
/// surrounding requested memory to be checked for invalid accesses.
class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> {
public:
- explicit AddressSanitizerPass(
- bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false,
- AsanDetectStackUseAfterReturnMode UseAfterReturn =
- AsanDetectStackUseAfterReturnMode::Runtime);
+ AddressSanitizerPass(const AddressSanitizerOptions &Options)
+ : Options(Options){};
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
private:
- bool CompileKernel;
- bool Recover;
- bool UseAfterScope;
- AsanDetectStackUseAfterReturnMode UseAfterReturn;
+ AddressSanitizerOptions Options;
};
/// Public interface to the address sanitizer module pass for instrumenting code
@@ -122,16 +125,17 @@ private:
class ModuleAddressSanitizerPass
: public PassInfoMixin<ModuleAddressSanitizerPass> {
public:
- explicit ModuleAddressSanitizerPass(
- bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true,
+ ModuleAddressSanitizerPass(
+ const AddressSanitizerOptions &Options, bool UseGlobalGC = true,
bool UseOdrIndicator = false,
AsanDtorKind DestructorKind = AsanDtorKind::Global);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
private:
- bool CompileKernel;
- bool Recover;
+ AddressSanitizerOptions Options;
bool UseGlobalGC;
bool UseOdrIndicator;
AsanDtorKind DestructorKind;
@@ -148,6 +152,16 @@ ModulePass *createModuleAddressSanitizerLegacyPassPass(
bool UseOdrIndicator = true,
AsanDtorKind DestructorKind = AsanDtorKind::Global);
+struct ASanAccessInfo {
+ const int32_t Packed;
+ const uint8_t AccessSizeIndex;
+ const bool IsWrite;
+ const bool CompileKernel;
+
+ explicit ASanAccessInfo(int32_t Packed);
+ ASanAccessInfo(bool IsWrite, bool CompileKernel, uint8_t AccessSizeIndex);
+};
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 0228992af874..6c351e3f8e1f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -1,9 +1,8 @@
//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
namespace llvm {
@@ -26,7 +26,6 @@ class InterestingMemoryOperand {
public:
Use *PtrUse;
bool IsWrite;
- Type *OpType;
uint64_t TypeSize;
MaybeAlign Alignment;
// The mask Value, if we're looking at a masked load/store.
@@ -35,8 +34,7 @@ public:
InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite,
class Type *OpType, MaybeAlign Alignment,
Value *MaybeMask = nullptr)
- : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment),
- MaybeMask(MaybeMask) {
+ : IsWrite(IsWrite), Alignment(Alignment), MaybeMask(MaybeMask) {
const DataLayout &DL = I->getModule()->getDataLayout();
TypeSize = DL.getTypeStoreSizeInBits(OpType);
PtrUse = &I->getOperandUse(OperandNo);
@@ -47,47 +45,56 @@ public:
Value *getPtr() { return PtrUse->get(); }
};
-// For an alloca valid between lifetime markers Start and End, call the
+// For an alloca valid between lifetime markers Start and Ends, call the
// Callback for all possible exits out of the lifetime in the containing
// function, which can return from the instructions in RetVec.
//
-// Returns whether End was the only possible exit. If it wasn't, the caller
-// should remove End to ensure that work done at the other exits does not
-// happen outside of the lifetime.
+// Returns whether Ends covered all possible exits. If they did not,
+// the caller should remove Ends to ensure that work done at the other
+// exits does not happen outside of the lifetime.
template <typename F>
bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
- const Instruction *Start, Instruction *End,
+ const Instruction *Start,
+ const SmallVectorImpl<IntrinsicInst *> &Ends,
const SmallVectorImpl<Instruction *> &RetVec,
F Callback) {
- // We need to ensure that if we tag some object, we certainly untag it
- // before the function exits.
- if (PDT.dominates(End, Start)) {
- Callback(End);
- } else {
- SmallVector<Instruction *, 8> ReachableRetVec;
- unsigned NumCoveredExits = 0;
- for (auto &RI : RetVec) {
- if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
- continue;
- ReachableRetVec.push_back(RI);
- if (DT.dominates(End, RI))
- ++NumCoveredExits;
- }
- // If there's a mix of covered and non-covered exits, just put the untag
- // on exits, so we avoid the redundancy of untagging twice.
- if (NumCoveredExits == ReachableRetVec.size()) {
+ if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) {
+ Callback(Ends[0]);
+ return true;
+ }
+ SmallVector<Instruction *, 8> ReachableRetVec;
+ unsigned NumCoveredExits = 0;
+ for (auto &RI : RetVec) {
+ if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
+ continue;
+ ReachableRetVec.push_back(RI);
+ // TODO(fmayer): We don't support diamond shapes, where multiple lifetime
+ // ends together dominate the RI, but none of them does by itself.
+ // Check how often this happens and decide whether to support this here.
+ if (std::any_of(Ends.begin(), Ends.end(),
+ [&](Instruction *End) { return DT.dominates(End, RI); }))
+ ++NumCoveredExits;
+ }
+ // If there's a mix of covered and non-covered exits, just put the untag
+ // on exits, so we avoid the redundancy of untagging twice.
+ if (NumCoveredExits == ReachableRetVec.size()) {
+ for (auto *End : Ends)
Callback(End);
- } else {
- for (auto &RI : ReachableRetVec)
- Callback(RI);
- // We may have inserted untag outside of the lifetime interval.
- // Signal the caller to remove the lifetime end call for this alloca.
- return false;
- }
+ } else {
+ for (auto &RI : ReachableRetVec)
+ Callback(RI);
+ // We may have inserted untag outside of the lifetime interval.
+ // Signal the caller to remove the lifetime end call for this alloca.
+ return false;
}
return true;
}
+// Get AddressSanitizer parameters.
+void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset);
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
index 029b3fc4b788..f019d1c00a35 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
@@ -1,9 +1,8 @@
//===--------- Definition of the AddressSanitizer options -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines data types used to set Address Sanitizer options.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index 2e4f3338030a..3118a3762935 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -1,9 +1,8 @@
//===--------- Definition of the HWAddressSanitizer class -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,21 +18,32 @@
namespace llvm {
+struct HWAddressSanitizerOptions {
+ HWAddressSanitizerOptions()
+ : HWAddressSanitizerOptions(false, false, false){};
+ HWAddressSanitizerOptions(bool CompileKernel, bool Recover,
+ bool DisableOptimization)
+ : CompileKernel(CompileKernel), Recover(Recover),
+ DisableOptimization(DisableOptimization){};
+ bool CompileKernel;
+ bool Recover;
+ bool DisableOptimization;
+};
+
/// This is a public interface to the hardware address sanitizer pass for
/// instrumenting code to check for various memory errors at runtime, similar to
/// AddressSanitizer but based on partial hardware assistance.
class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> {
public:
- explicit HWAddressSanitizerPass(bool CompileKernel = false,
- bool Recover = false,
- bool DisableOptimization = false);
+ explicit HWAddressSanitizerPass(HWAddressSanitizerOptions Options)
+ : Options(Options){};
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
static bool isRequired() { return true; }
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
private:
- bool CompileKernel;
- bool Recover;
- bool DisableOptimization;
+ HWAddressSanitizerOptions Options;
};
FunctionPass *
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
index f0f375e0acf6..e3d75f675c93 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
@@ -1,9 +1,8 @@
//===- InstrOrderFile.h ---- Late IR instrumentation for order file ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index ac6a07d299a6..f4d1b1d90e6f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -1,9 +1,8 @@
//===--------- Definition of the MemProfiler class --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index f5f9ec7829bd..d47beb93397e 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -40,6 +40,23 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+ static bool isRequired() { return true; }
+
+private:
+ MemorySanitizerOptions Options;
+};
+
+/// A module pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+/// used.
+struct ModuleMemorySanitizerPass : public PassInfoMixin<ModuleMemorySanitizerPass> {
+ ModuleMemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
static bool isRequired() { return true; }
diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index f9c507624e6d..e795043630d5 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -27,6 +27,14 @@ FunctionPass *createThreadSanitizerLegacyPassPass();
/// yet, the pass inserts the declarations. Otherwise the existing globals are
struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+/// A module pass for tsan instrumentation.
+///
+/// Create ctor and init functions.
+struct ModuleThreadSanitizerPass
+ : public PassInfoMixin<ModuleThreadSanitizerPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
static bool isRequired() { return true; }
};
diff --git a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
index 1e7fd71dcbf4..877d8145e746 100644
--- a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -32,6 +32,8 @@ struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> {
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
bool UseMemorySSA;
};
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 5c29b289d158..cbe5057b9cde 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -115,17 +115,20 @@ struct GVNOptions {
///
/// FIXME: We should have a good summary of the GVN algorithm implemented by
/// this particular pass here.
-class GVN : public PassInfoMixin<GVN> {
+class GVNPass : public PassInfoMixin<GVNPass> {
GVNOptions Options;
public:
struct Expression;
- GVN(GVNOptions Options = {}) : Options(Options) {}
+ GVNPass(GVNOptions Options = {}) : Options(Options) {}
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
/// This removes the specified instruction from
/// our various maps and marks it for deletion.
void markInstructionForDeletion(Instruction *I) {
@@ -179,11 +182,11 @@ public:
Expression createExtractvalueExpr(ExtractValueInst *EI);
uint32_t lookupOrAddCall(CallInst *C);
uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn);
+ uint32_t Num, GVNPass &Gvn);
bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred,
- const BasicBlock *PhiBlock, GVN &Gvn);
+ const BasicBlock *PhiBlock, GVNPass &Gvn);
std::pair<uint32_t, bool> assignExpNewValueNum(Expression &exp);
- bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn);
+ bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVNPass &Gvn);
public:
ValueTable();
@@ -197,7 +200,7 @@ public:
uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred,
Value *LHS, Value *RHS);
uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn);
+ uint32_t Num, GVNPass &Gvn);
void eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock);
bool exists(Value *V) const;
void add(Value *V, uint32_t num);
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 816ea1071e52..0ac7d7c62b7a 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -44,6 +44,7 @@ class PHINode;
class SelectInst;
class SwitchInst;
class TargetLibraryInfo;
+class TargetTransformInfo;
class Value;
/// A private "module" namespace for types and utilities used by
@@ -78,6 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
/// revectored to the false side of the second if.
class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
TargetLibraryInfo *TLI;
+ TargetTransformInfo *TTI;
LazyValueInfo *LVI;
AAResults *AA;
DomTreeUpdater *DTU;
@@ -99,9 +101,9 @@ public:
JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
// Glue for old PM.
- bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI,
- AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData,
- std::unique_ptr<BlockFrequencyInfo> BFI,
+ bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
+ bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
std::unique_ptr<BranchProbabilityInfo> BPI);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 020cfb9a6c85..419729271a23 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -94,6 +94,8 @@ public:
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
/// Add either a loop pass or a loop-nest pass to the pass manager. Append \p
/// Pass to the list of loop passes if it has a dedicated \fn run() method for
/// loops and to the list of loop-nest passes if the \fn run() method is for
@@ -101,51 +103,65 @@ public:
/// to the end of \var IsLoopNestPass so we can easily identify the types of
/// passes in the pass manager later.
template <typename PassT>
- std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(PassT &&Pass) {
using LoopPassModelT =
detail::PassModel<Loop, PassT, PreservedAnalyses, LoopAnalysisManager,
LoopStandardAnalysisResults &, LPMUpdater &>;
IsLoopNestPass.push_back(false);
- LoopPasses.emplace_back(new LoopPassModelT(std::forward<PassT>(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>(
+ new LoopPassModelT(std::forward<PassT>(Pass))));
}
template <typename PassT>
- std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(PassT &&Pass) {
using LoopNestPassModelT =
detail::PassModel<LoopNest, PassT, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
IsLoopNestPass.push_back(true);
- LoopNestPasses.emplace_back(
- new LoopNestPassModelT(std::forward<PassT>(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>(
+ new LoopNestPassModelT(std::forward<PassT>(Pass))));
}
// Specializations of `addPass` for `RepeatedPass`. These are necessary since
// `RepeatedPass` has a templated `run` method that will result in incorrect
// detection of `HasRunOnLoopT`.
template <typename PassT>
- std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
- addPass(RepeatedPass<PassT> &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(RepeatedPass<PassT> &&Pass) {
using RepeatedLoopPassModelT =
detail::PassModel<Loop, RepeatedPass<PassT>, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
IsLoopNestPass.push_back(false);
- LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>(
+ new RepeatedLoopPassModelT(std::move(Pass))));
}
template <typename PassT>
- std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
- addPass(RepeatedPass<PassT> &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(RepeatedPass<PassT> &&Pass) {
using RepeatedLoopNestPassModelT =
detail::PassModel<LoopNest, RepeatedPass<PassT>, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
IsLoopNestPass.push_back(true);
- LoopNestPasses.emplace_back(
- new RepeatedLoopNestPassModelT(std::move(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>(
+ new RepeatedLoopNestPassModelT(std::move(Pass))));
}
bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); }
@@ -215,6 +231,12 @@ struct RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager,
(void)AM.template getResult<AnalysisT>(L, AR);
return PreservedAnalyses::all();
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "require<" << PassName << ">";
+ }
};
/// An alias template to easily name a require analysis loop pass.
@@ -259,8 +281,6 @@ public:
/// state, this routine will mark that the current loop should be skipped by
/// the rest of the pass management infrastructure.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name) {
- assert((!LoopNestMode || CurrentL == &L) &&
- "L should be a top-level loop in loop-nest mode.");
LAM.clear(L, Name);
assert((&L == CurrentL || CurrentL->contains(&L)) &&
"Cannot delete a loop outside of the "
@@ -413,10 +433,12 @@ public:
explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass,
bool UseMemorySSA = false,
bool UseBlockFrequencyInfo = false,
+ bool UseBranchProbabilityInfo = false,
bool LoopNestMode = false)
: Pass(std::move(Pass)), LoopCanonicalizationFPM(),
UseMemorySSA(UseMemorySSA),
UseBlockFrequencyInfo(UseBlockFrequencyInfo),
+ UseBranchProbabilityInfo(UseBranchProbabilityInfo),
LoopNestMode(LoopNestMode) {
LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
LoopCanonicalizationFPM.addPass(LCSSAPass());
@@ -424,6 +446,8 @@ public:
/// Runs the loop passes across every loop in the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
@@ -436,6 +460,7 @@ private:
bool UseMemorySSA = false;
bool UseBlockFrequencyInfo = false;
+ bool UseBranchProbabilityInfo = false;
const bool LoopNestMode;
};
@@ -447,13 +472,17 @@ template <typename LoopPassT>
inline std::enable_if_t<is_detected<HasRunOnLoopT, LoopPassT>::value,
FunctionToLoopPassAdaptor>
createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
- bool UseBlockFrequencyInfo = false) {
+ bool UseBlockFrequencyInfo = false,
+ bool UseBranchProbabilityInfo = false) {
using PassModelT =
detail::PassModel<Loop, LoopPassT, PreservedAnalyses, LoopAnalysisManager,
LoopStandardAnalysisResults &, LPMUpdater &>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return FunctionToLoopPassAdaptor(
- std::make_unique<PassModelT>(std::forward<LoopPassT>(Pass)), UseMemorySSA,
- UseBlockFrequencyInfo, false);
+ std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<LoopPassT>(Pass))),
+ UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, false);
}
/// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a
@@ -462,24 +491,29 @@ template <typename LoopNestPassT>
inline std::enable_if_t<!is_detected<HasRunOnLoopT, LoopNestPassT>::value,
FunctionToLoopPassAdaptor>
createFunctionToLoopPassAdaptor(LoopNestPassT &&Pass, bool UseMemorySSA = false,
- bool UseBlockFrequencyInfo = false) {
+ bool UseBlockFrequencyInfo = false,
+ bool UseBranchProbabilityInfo = false) {
LoopPassManager LPM;
LPM.addPass(std::forward<LoopNestPassT>(Pass));
using PassModelT =
detail::PassModel<Loop, LoopPassManager, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
- return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
- UseMemorySSA, UseBlockFrequencyInfo, true);
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
+ return FunctionToLoopPassAdaptor(
+ std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+ new PassModelT(std::move(LPM))),
+ UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, true);
}
/// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will
/// be in loop-nest mode if the pass manager contains only loop-nest passes.
template <>
inline FunctionToLoopPassAdaptor
-createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
- bool UseMemorySSA,
- bool UseBlockFrequencyInfo) {
+createFunctionToLoopPassAdaptor<LoopPassManager>(
+ LoopPassManager &&LPM, bool UseMemorySSA, bool UseBlockFrequencyInfo,
+ bool UseBranchProbabilityInfo) {
// Check if LPM contains any loop pass and if it does not, returns an adaptor
// in loop-nest mode.
using PassModelT =
@@ -487,9 +521,13 @@ createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
bool LoopNestMode = (LPM.getNumLoopPasses() == 0);
- return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
- UseMemorySSA, UseBlockFrequencyInfo,
- LoopNestMode);
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
+ return FunctionToLoopPassAdaptor(
+ std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+ new PassModelT(std::move(LPM))),
+ UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo,
+ LoopNestMode);
}
/// Pass for printing a loop's contents as textual IR.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 30cc08cb42ae..6afe7ecd2a5d 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -140,6 +140,8 @@ public:
: UnrollOpts(UnrollOpts) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
index dd574e4f32c6..d44d297dd4ff 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
@@ -23,6 +23,8 @@ class LowerMatrixIntrinsicsPass
public:
LowerMatrixIntrinsicsPass(bool Minimal = false) : Minimal(Minimal) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 635b706d0bef..3a4db13d670a 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -31,7 +31,6 @@ class Instruction;
class LoadInst;
class MemCpyInst;
class MemMoveInst;
-class MemoryDependenceResults;
class MemorySSA;
class MemorySSAUpdater;
class MemSetInst;
@@ -40,7 +39,6 @@ class TargetLibraryInfo;
class Value;
class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
- MemoryDependenceResults *MD = nullptr;
TargetLibraryInfo *TLI = nullptr;
AAResults *AA = nullptr;
AssumptionCache *AC = nullptr;
@@ -54,9 +52,8 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
// Glue for the old PM.
- bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI,
- AAResults *AA, AssumptionCache *AC, DominatorTree *DT,
- MemorySSA *MSSA);
+ bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
+ AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
private:
// Helper functions
@@ -65,7 +62,7 @@ private:
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
- Value *cpyDst, Value *cpySrc, uint64_t cpyLen,
+ Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
Align cpyAlign, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
diff --git a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index c5f6d6e0e8bd..256d03675a07 100644
--- a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -48,6 +48,8 @@ public:
MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions)
: Options(PassOptions) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
}
diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h
index 6ef7c6b22c0b..f1a43435d89a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -62,7 +62,7 @@ class SROALegacyPass;
/// onto insert and extract operations on a vector value, and convert them to
/// this form. By doing so, it will enable promotion of vector aggregates to
/// SSA vector values.
-class SROA : public PassInfoMixin<SROA> {
+class SROAPass : public PassInfoMixin<SROAPass> {
LLVMContext *C = nullptr;
DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
@@ -110,7 +110,7 @@ class SROA : public PassInfoMixin<SROA> {
SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
public:
- SROA() = default;
+ SROAPass() = default;
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index c1a9ab475ead..dfb1619c7f2a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -69,6 +69,9 @@ public:
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
/// Create the legacy pass object for the simple loop unswitcher.
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index 7c5393851ae6..67db5031a443 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -41,6 +41,9 @@ public:
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
}
diff --git a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index 0b570c0d1342..f87588db4ee2 100644
--- a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -33,7 +33,7 @@ struct ASanStackVariableDescription {
uint64_t Size; // Size of the variable in bytes.
size_t LifetimeSize; // Size in bytes to use for lifetime analysis check.
// Will be rounded up to Granularity.
- size_t Alignment; // Alignment of the variable (power of 2).
+ uint64_t Alignment; // Alignment of the variable (power of 2).
AllocaInst *AI; // The actual AllocaInst.
size_t Offset; // Offset from the beginning of the frame;
// set by ComputeASanStackFrameLayout.
@@ -42,20 +42,20 @@ struct ASanStackVariableDescription {
// Output data struct for ComputeASanStackFrameLayout.
struct ASanStackFrameLayout {
- size_t Granularity; // Shadow granularity.
- size_t FrameAlignment; // Alignment for the entire frame.
- size_t FrameSize; // Size of the frame in bytes.
+ uint64_t Granularity; // Shadow granularity.
+ uint64_t FrameAlignment; // Alignment for the entire frame.
+ uint64_t FrameSize; // Size of the frame in bytes.
};
ASanStackFrameLayout ComputeASanStackFrameLayout(
// The array of stack variables. The elements may get reordered and changed.
SmallVectorImpl<ASanStackVariableDescription> &Vars,
// AddressSanitizer's shadow granularity. Usually 8, may also be 16, 32, 64.
- size_t Granularity,
+ uint64_t Granularity,
// The minimal size of the left-most redzone (header).
// At least 4 pointer sizes, power of 2, and >= Granularity.
// The resulting FrameSize should be multiple of MinHeaderSize.
- size_t MinHeaderSize);
+ uint64_t MinHeaderSize);
// Compute frame description, see DescribeAddressIfStack in ASan runtime.
SmallString<64> ComputeASanStackFrameDescription(
diff --git a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
index f512c6c06331..0aee2fe95cad 100644
--- a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
+++ b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
@@ -24,6 +24,7 @@ class Function;
class AddDiscriminatorsPass : public PassInfoMixin<AddDiscriminatorsPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index b45c1820bb20..8970afb3aeaa 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
/// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc.
void ReplaceInstWithInst(Instruction *From, Instruction *To);
+/// Check if we can prove that all paths starting from this block converge
+/// to a block that either has a @llvm.experimental.deoptimize call
+/// prior to its terminating return instruction or is terminated by unreachable.
+/// All blocks in the traversed sequence must have an unique successor, maybe
+/// except for the last one.
+bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB);
+
/// Option class for critical edge splitting.
///
/// This provides a builder interface for overriding the default options used
@@ -214,29 +221,6 @@ BasicBlock *SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
CriticalEdgeSplittingOptions(),
const Twine &BBName = "");
-inline BasicBlock *
-SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
- const CriticalEdgeSplittingOptions &Options =
- CriticalEdgeSplittingOptions()) {
- return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(),
- Options);
-}
-
-/// If the edge from *PI to BB is not critical, return false. Otherwise, split
-/// all edges between the two blocks and return true. This updates all of the
-/// same analyses as the other SplitCriticalEdge function. If P is specified, it
-/// updates the analyses described above.
-inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI,
- const CriticalEdgeSplittingOptions &Options =
- CriticalEdgeSplittingOptions()) {
- bool MadeChange = false;
- Instruction *TI = (*PI)->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (TI->getSuccessor(i) == Succ)
- MadeChange |= !!SplitCriticalEdge(TI, i, Options);
- return MadeChange;
-}
-
/// If an edge from Src to Dst is critical, split the edge and return true,
/// otherwise return false. This method requires that there be an edge between
/// the two blocks. It updates the analyses passed in the options struct
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index e7d41933a6c9..87d33b9b11b7 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -54,12 +54,6 @@ namespace llvm {
/// 'i8*' type.
Value *emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI);
- /// Emit a call to the strnlen function to the builder, for the specified
- /// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t
- /// type, and the return value has 'intptr_t' type.
- Value *emitStrNLen(Value *Ptr, Value *MaxLen, IRBuilderBase &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI);
-
/// Emit a call to the strchr function to the builder, for the specified
/// pointer and character. Ptr is required to be some pointer type, and the
/// return value has 'i8*' type.
@@ -205,8 +199,8 @@ namespace llvm {
const TargetLibraryInfo *TLI);
/// Emit a call to the calloc function.
- Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilderBase &B, const TargetLibraryInfo &TLI);
+ Value *emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo &TLI);
}
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index f4fb265c25e0..5a1f322b2054 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -296,10 +296,10 @@ BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
DomTreeUpdater &DTU);
/// Updates profile information by adjusting the entry count by adding
-/// entryDelta then scaling callsite information by the new count divided by the
+/// EntryDelta then scaling callsite information by the new count divided by the
/// old count. VMap is used during inlinng to also update the new clone
void updateProfileCallee(
- Function *Callee, int64_t entryDelta,
+ Function *Callee, int64_t EntryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 1d9f2d135488..f08173e45a5b 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -100,6 +100,10 @@ public:
unsigned NumExitBlocks = std::numeric_limits<unsigned>::max();
Type *RetTy;
+ // Mapping from the original exit blocks, to the new blocks inside
+ // the function.
+ SmallVector<BasicBlock *, 4> OldTargets;
+
// Suffix to use when creating extracted function (appended to the original
// function name + "."). If empty, the default is to use the entry block
// label, if non-empty, otherwise "extracted".
@@ -139,6 +143,20 @@ public:
/// returns false.
Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
+ /// Perform the extraction, returning the new function and providing an
+ /// interface to see what was categorized as inputs and outputs.
+ ///
+ /// \param CEAC - Cache to speed up operations for the CodeExtractor when
+ /// hoisting, and extracting lifetime values and assumes.
+ /// \param Inputs [out] - filled with values marked as inputs to the
+ /// newly outlined function.
+ /// \param Outputs [out] - filled with values marked as outputs to the
+ /// newly outlined function.
+ /// \returns zero when called on a CodeExtractor instance where isEligible
+ /// returns false.
+ Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &Inputs, ValueSet &Outputs);
+
/// Verify that assumption cache isn't stale after a region is extracted.
/// Returns true when verifier finds errors. AssumptionCache is passed as
/// parameter to make this function stateless.
diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 630f936471f2..0f32a97f9fcc 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -40,7 +40,8 @@ bool isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
DominatorTree &DT,
const PostDominatorTree *PDT = nullptr,
- DependenceInfo *DI = nullptr);
+ DependenceInfo *DI = nullptr,
+ bool CheckForEntireBlock = false);
/// Return true if all instructions (except the terminator) in \p BB can be
/// safely moved before \p InsertPoint.
@@ -62,6 +63,19 @@ void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
DominatorTree &DT, const PostDominatorTree &PDT,
DependenceInfo &DI);
+/// In case that two BBs \p ThisBlock and \p OtherBlock are control flow
+/// equivalent but they do not strictly dominate and post-dominate each
+/// other, we determine if \p ThisBlock is reached after \p OtherBlock
+/// in the control flow.
+bool nonStrictlyPostDominate(const BasicBlock *ThisBlock,
+ const BasicBlock *OtherBlock,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT);
+
+// Check if I0 is reached before I1 in the control flow.
+bool isReachedBefore(const Instruction *I0, const Instruction *I1,
+ const DominatorTree *DT, const PostDominatorTree *PDT);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
index 31c023019272..f2b038494a5d 100644
--- a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
+++ b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
@@ -27,6 +27,9 @@ struct EntryExitInstrumenterPass
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
bool PostInlining;
static bool isRequired() { return true; }
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 024d84a7abc8..749b7b2bb5d8 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -130,9 +130,6 @@ bool renameModuleForThinLTO(
bool ClearDSOLocalOnDeclarations,
SetVector<GlobalValue *> *GlobalsToImport = nullptr);
-/// Compute synthetic function entry counts.
-void computeSyntheticCounts(ModuleSummaryIndex &Index);
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 519593c96766..78d7845c4353 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -9,6 +9,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
#define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
@@ -45,7 +46,7 @@ struct GlobalStatus {
/// This global is stored to, but only its initializer and one other value
/// is ever stored to it. If this global isStoredOnce, we track the value
- /// stored to it in StoredOnceValue below. This is only tracked for scalar
+ /// stored to it via StoredOnceStore below. This is only tracked for scalar
/// globals.
StoredOnce,
@@ -55,8 +56,16 @@ struct GlobalStatus {
} StoredType = NotStored;
/// If only one value (besides the initializer constant) is ever stored to
- /// this global, keep track of what value it is.
- Value *StoredOnceValue = nullptr;
+ /// this global, keep track of what value it is via the store instruction.
+ const StoreInst *StoredOnceStore = nullptr;
+
+ /// If only one value (besides the initializer constant) is ever stored to
+ /// this global return the stored value.
+ Value *getStoredOnceValue() const {
+ return (StoredType == StoredOnce && StoredOnceStore)
+ ? StoredOnceStore->getOperand(0)
+ : nullptr;
+ }
/// These start out null/false. When the first accessing function is noticed,
/// it is recorded. When a second different accessing function is noticed,
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
index 25aabe199d0f..a318c2cd28bb 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
@@ -1,4 +1,4 @@
-//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
+//=== InstructionWorklist.h - Worklist for InstCombine & others -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
+#ifndef LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H
+#define LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -18,13 +18,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#define DEBUG_TYPE "instcombine"
-
namespace llvm {
-/// InstCombineWorklist - This is the worklist management logic for
-/// InstCombine.
-class InstCombineWorklist {
+/// InstructionWorklist - This is the worklist management logic for
+/// InstCombine and other simplification passes.
+class InstructionWorklist {
SmallVector<Instruction *, 256> Worklist;
DenseMap<Instruction *, unsigned> WorklistMap;
/// These instructions will be added in reverse order after the current
@@ -33,10 +31,10 @@ class InstCombineWorklist {
SmallSetVector<Instruction *, 16> Deferred;
public:
- InstCombineWorklist() = default;
+ InstructionWorklist() = default;
- InstCombineWorklist(InstCombineWorklist &&) = default;
- InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
+ InstructionWorklist(InstructionWorklist &&) = default;
+ InstructionWorklist &operator=(InstructionWorklist &&) = default;
bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
@@ -45,7 +43,7 @@ public:
/// You likely want to use this method.
void add(Instruction *I) {
if (Deferred.insert(I))
- LLVM_DEBUG(dbgs() << "IC: ADD DEFERRED: " << *I << '\n');
+ LLVM_DEBUG(dbgs() << "ADD DEFERRED: " << *I << '\n');
}
/// Add value to the worklist if it is an instruction.
@@ -62,7 +60,7 @@ public:
assert(I->getParent() && "Instruction not inserted yet?");
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- LLVM_DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
+ LLVM_DEBUG(dbgs() << "ADD: " << *I << '\n');
Worklist.push_back(I);
}
}
@@ -85,7 +83,7 @@ public:
/// Remove I from the worklist if it exists.
void remove(Instruction *I) {
- DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+ DenseMap<Instruction *, unsigned>::iterator It = WorklistMap.find(I);
if (It != WorklistMap.end()) {
// Don't bother moving everything down, just null out the slot.
Worklist[It->second] = nullptr;
@@ -110,7 +108,6 @@ public:
push(cast<Instruction>(U));
}
-
/// Check that the worklist is empty and nuke the backing store for the map.
void zap() {
assert(WorklistMap.empty() && "Worklist empty, but map not?");
@@ -123,6 +120,4 @@ public:
} // end namespace llvm.
-#undef DEBUG_TYPE
-
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 0102aa9ef3cc..72cb606eb51a 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -55,6 +55,7 @@ class MDNode;
class MemorySSAUpdater;
class PHINode;
class StoreInst;
+class SwitchInst;
class TargetLibraryInfo;
class TargetTransformInfo;
@@ -78,7 +79,8 @@ bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false,
//
/// Return true if the result produced by the instruction is not used, and the
-/// instruction has no side effects.
+/// instruction will return. Certain side-effecting instructions are also
+/// considered dead if there are no uses of the instruction.
bool isInstructionTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI = nullptr);
@@ -236,6 +238,10 @@ CallInst *createCallMatchingInvoke(InvokeInst *II);
/// This function converts the specified invoek into a normall call.
void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
+/// This function removes the default destination from the specified switch.
+void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU = nullptr);
+
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///
@@ -292,14 +298,30 @@ void salvageDebugInfo(Instruction &I);
void salvageDebugInfoForDbgValues(Instruction &I,
ArrayRef<DbgVariableIntrinsic *> Insns);
-/// Given an instruction \p I and DIExpression \p DIExpr operating on it, write
-/// the effects of \p I into the returned DIExpression, or return nullptr if
-/// it cannot be salvaged. \p StackVal: whether DW_OP_stack_value should be
-/// appended to the expression. \p LocNo: the index of the location operand to
-/// which \p I applies, should be 0 for debug info without a DIArgList.
-DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr,
- bool StackVal, unsigned LocNo,
- SmallVectorImpl<Value *> &AdditionalValues);
+/// Given an instruction \p I and DIExpression \p DIExpr operating on
+/// it, append the effects of \p I to the DIExpression operand list
+/// \p Ops, or return \p nullptr if it cannot be salvaged.
+/// \p CurrentLocOps is the number of SSA values referenced by the
+/// incoming \p Ops. \return the first non-constant operand
+/// implicitly referred to by Ops. If \p I references more than one
+/// non-constant operand, any additional operands are added to
+/// \p AdditionalValues.
+///
+/// \example
+////
+/// I = add %a, i32 1
+///
+/// Return = %a
+/// Ops = llvm::dwarf::DW_OP_lit1 llvm::dwarf::DW_OP_add
+///
+/// I = add %a, %b
+///
+/// Return = %a
+/// Ops = llvm::dwarf::DW_OP_LLVM_arg0 llvm::dwarf::DW_OP_add
+/// AdditionalValues = %b
+Value *salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Ops,
+ SmallVectorImpl<Value *> &AdditionalValues);
/// Point debug users of \p From to \p To or salvage them. Use this function
/// only when replacing all uses of \p From with \p To, with a guarantee that
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 8f857e1e5c21..6f1b4a880457 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -32,8 +32,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE,
- unsigned Threshold = UINT_MAX);
+ unsigned &TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, unsigned Threshold = UINT_MAX);
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 247b911b7c8f..30c3f71e0947 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -147,11 +147,22 @@ protected:
/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when
+/// this function is called by \p sinkRegionForLoopNest.
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *,
- TargetTransformInfo *, Loop *, AliasSetTracker *,
- MemorySSAUpdater *, ICFLoopSafetyInfo *,
- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+ TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *,
+ ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr);
+
+/// Call sinkRegion on loops contained within the specified loop
+/// in order from innermost to outermost.
+bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
+ DominatorTree *, BlockFrequencyInfo *,
+ TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
@@ -163,9 +174,8 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
/// Diagnostics is emitted via \p ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
- AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
- ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
- OptimizationRemarkEmitter *, bool);
+ MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
@@ -199,7 +209,7 @@ bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
- Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *);
/// Does a BFS from a given node to all of its children inside a given loop.
@@ -338,6 +348,18 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
SinkAndHoistLICMFlags *LICMFlags = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Returns the comparison predicate used when expanding a min/max reduction.
+CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
+
+/// See RecurrenceDescriptor::isSelectCmpPattern for a description of the
+/// pattern we are trying to match. In this pattern we are only ever selecting
+/// between two values: 1) an initial PHI start value, and 2) a loop invariant
+/// value. This function uses \p LoopExitInst to determine 2), which we then use
+/// to select between \p Left and \p Right. Any lane value in \p Left that
+/// matches 2) will be merged into \p Right.
+Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
+ Value *Left, Value *Right);
+
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
/// The Builder's fast-math-flags must be set to propagate the expected values.
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
@@ -365,12 +387,22 @@ Value *createSimpleTargetReduction(IRBuilderBase &B,
RecurKind RdxKind,
ArrayRef<Value *> RedOps = None);
+/// Create a target reduction of the given vector \p Src for a reduction of the
+/// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation
+/// is described by \p Desc.
+Value *createSelectCmpTargetReduction(IRBuilderBase &B,
+ const TargetTransformInfo *TTI,
+ Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi);
+
/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
- const RecurrenceDescriptor &Desc, Value *Src);
+ const RecurrenceDescriptor &Desc, Value *Src,
+ PHINode *OrigPhi = nullptr);
/// Create an ordered reduction intrinsic using the given recurrence
/// descriptor \p Desc.
@@ -463,12 +495,8 @@ Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM);
/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
-/// overlap.
-///
-/// Returns a pair of instructions where the first element is the first
-/// instruction generated in possibly a sequence of instructions and the
-/// second value is the final comparator value or NULL if no check is needed.
-std::pair<Instruction *, Instruction *>
+/// overlap. Returns the final comparator value or NULL if no check is needed.
+Value *
addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Expander);
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
index 7b4a1cdbf4fd..e5f8a46eaf23 100644
--- a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
+++ b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
@@ -1,9 +1,8 @@
//===- MemoryOpRemark.h - Memory operation remark analysis -*- C++ ------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c4030735d965..c922476ac79d 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -51,11 +51,13 @@
#define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
namespace llvm {
@@ -176,7 +178,7 @@ public:
class PredicateInfo {
public:
PredicateInfo(Function &, DominatorTree &, AssumptionCache &);
- ~PredicateInfo() = default;
+ ~PredicateInfo();
void verifyPredicateInfo() const;
@@ -203,6 +205,8 @@ private:
// the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
// vector.
DenseMap<const Value *, const PredicateBase *> PredicateMap;
+ // The set of ssa_copy declarations we created with our custom mangling.
+ SmallSet<AssertingVH<Function>, 20> CreatedDeclarations;
};
// This pass does eager building and then printing of PredicateInfo. It is used
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 3a78e22b7e94..5de575aed059 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -70,10 +70,6 @@ public:
/// rewritten value when RewriteAllUses is called.
void AddUse(unsigned Var, Use *U);
- /// Return true if the SSAUpdater already has a value for the specified
- /// variable in the specified block.
- bool HasValueForBlock(unsigned Var, BasicBlock *BB);
-
/// Perform all the necessary updates, including new PHI-nodes insertion and
/// the requested uses update.
///
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index e0759d359dbe..6a2f0acf46f3 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -56,27 +56,28 @@ template <> struct IRTraits<BasicBlock> {
using FunctionT = Function;
using BlockFrequencyInfoT = BlockFrequencyInfo;
using LoopT = Loop;
- using LoopInfoT = LoopInfo;
+ using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
+ using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
+ using PostDominatorTreeT = PostDominatorTree;
+ using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
using OptRemarkEmitterT = OptimizationRemarkEmitter;
using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
- using DominatorTreeT = DominatorTree;
- using PostDominatorTreeT = PostDominatorTree;
+ using PredRangeT = pred_range;
+ using SuccRangeT = succ_range;
static Function &getFunction(Function &F) { return F; }
static const BasicBlock *getEntryBB(const Function *F) {
return &F->getEntryBlock();
}
+ static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
+ static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
};
} // end namespace afdo_detail
-extern cl::opt<unsigned> SampleProfileMaxPropagateIterations;
-extern cl::opt<unsigned> SampleProfileRecordCoverage;
-extern cl::opt<unsigned> SampleProfileSampleCoverage;
-extern cl::opt<bool> NoWarnSampleUnused;
-
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
- SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {}
+ SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
+ : Filename(Name), RemappingFilename(RemapName) {}
void dump() { Reader->dump(); }
using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
@@ -85,14 +86,19 @@ public:
typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
- using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT;
+ using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
+ using DominatorTreePtrT =
+ typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
+ using PostDominatorTreePtrT =
+ typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
+ using PostDominatorTreeT =
+ typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
using OptRemarkEmitterT =
typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
using OptRemarkAnalysisT =
typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
- using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT;
- using PostDominatorTreeT =
- typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
+ using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
+ using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
using EquivalenceClassMap =
@@ -112,6 +118,12 @@ protected:
const BasicBlockT *getEntryBB(const FunctionT *F) {
return afdo_detail::IRTraits<BT>::getEntryBB(F);
}
+ PredRangeT getPredecessors(BasicBlockT *BB) {
+ return afdo_detail::IRTraits<BT>::getPredecessors(BB);
+ }
+ SuccRangeT getSuccessors(BasicBlockT *BB) {
+ return afdo_detail::IRTraits<BT>::getSuccessors(BB);
+ }
unsigned getFunctionLoc(FunctionT &Func);
virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
@@ -129,12 +141,11 @@ protected:
void findEquivalencesFor(BasicBlockT *BB1,
ArrayRef<BasicBlockT *> Descendants,
PostDominatorTreeT *DomTree);
-
void propagateWeights(FunctionT &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(FunctionT &F);
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
- void clearFunctionData();
+ void clearFunctionData(bool ResetDT = true);
void computeDominanceAndLoopInfo(FunctionT &F);
bool
computeAndPropagateWeights(FunctionT &F,
@@ -168,9 +179,9 @@ protected:
EquivalenceClassMap EquivalenceClass;
/// Dominance, post-dominance and loop information.
- std::unique_ptr<DominatorTreeT> DT;
- std::unique_ptr<PostDominatorTreeT> PDT;
- std::unique_ptr<LoopInfoT> LI;
+ DominatorTreePtrT DT;
+ PostDominatorTreePtrT PDT;
+ LoopInfoPtrT LI;
/// Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -190,6 +201,9 @@ protected:
/// Name of the profile file to load.
std::string Filename;
+ /// Name of the profile remapping file to load.
+ std::string RemappingFilename;
+
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
@@ -199,15 +213,17 @@ protected:
/// Clear all the per-function data used to load samples and propagate weights.
template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() {
+void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
BlockWeights.clear();
EdgeWeights.clear();
VisitedBlocks.clear();
VisitedEdges.clear();
EquivalenceClass.clear();
- DT = nullptr;
- PDT = nullptr;
- LI = nullptr;
+ if (ResetDT) {
+ DT = nullptr;
+ PDT = nullptr;
+ LI = nullptr;
+ }
Predecessors.clear();
Successors.clear();
CoverageTracker.clear();
@@ -475,7 +491,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
// class by making BB2's equivalence class be BB1.
DominatedBBs.clear();
DT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+ findEquivalencesFor(BB1, DominatedBBs, &*PDT);
LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
}
@@ -692,7 +708,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
SmallPtrSet<BasicBlockT *, 16> Visited;
if (!Predecessors[B1].empty())
llvm_unreachable("Found a stale predecessors list in a basic block.");
- for (BasicBlockT *B2 : predecessors(B1))
+ for (auto *B2 : getPredecessors(B1))
if (Visited.insert(B2).second)
Predecessors[B1].push_back(B2);
@@ -700,7 +716,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
Visited.clear();
if (!Successors[B1].empty())
llvm_unreachable("Found a stale successors list in a basic block.");
- for (BasicBlockT *B2 : successors(B1))
+ for (auto *B2 : getSuccessors(B1))
if (Visited.insert(B2).second)
Successors[B1].push_back(B2);
}
@@ -911,12 +927,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
FunctionT &F) {
- DT.reset(new DominatorTreeT);
+ DT.reset(new DominatorTree);
DT->recalculate(F);
PDT.reset(new PostDominatorTree(F));
- LI.reset(new LoopInfoT);
+ LI.reset(new LoopInfo);
LI->analyze(*DT);
}
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 59bf3a342caa..efc3cc775e11 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -32,8 +32,10 @@ extern cl::opt<unsigned> SCEVCheapExpansionBudget;
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate anywhere their operands are
-/// defined.
-bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
+/// defined, and the expander is capable of expanding the expression.
+/// CanonicalMode indicates whether the expander will be used in canonical mode.
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE,
+ bool CanonicalMode = true);
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are defined and safe to speculate at the specified
@@ -489,9 +491,6 @@ private:
Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy,
Type *IntTy, bool useSubtract);
- void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi);
-
void fixupInsertPoints(Instruction *I);
/// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 8703434e1696..a88e72fc9ba8 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -132,8 +132,6 @@ private:
eraseFromParent(I);
}
- Value *foldMallocMemset(CallInst *Memset, IRBuilderBase &B);
-
public:
LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index d95ead2def3d..320c36b36924 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -117,7 +117,8 @@ MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ llvm::OptimizationRemarkEmitter &ORE, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 4245f51cc1e2..95fd0b14dd51 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -22,7 +22,6 @@ namespace llvm {
class Constant;
class Function;
-class GlobalIndirectSymbol;
class GlobalVariable;
class Instruction;
class MDNode;
@@ -122,7 +121,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
/// instance:
/// - \a scheduleMapGlobalInitializer()
/// - \a scheduleMapAppendingVariable()
-/// - \a scheduleMapGlobalIndirectSymbol()
+/// - \a scheduleMapGlobalAlias()
+/// - \a scheduleMapGlobalIFunc()
/// - \a scheduleRemapFunction()
///
/// Sometimes a callback needs a different mapping context. Such a context can
@@ -182,9 +182,10 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MappingContextID = 0);
- void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
- Constant &Target,
- unsigned MappingContextID = 0);
+ void scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MappingContextID = 0);
+ void scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver,
+ unsigned MappingContextID = 0);
void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0);
};
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index e7dcdda8af89..ed9e0beb0339 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -340,7 +340,7 @@ public:
/// -1 - Address is consecutive, and decreasing.
/// NOTE: This method must only be used before modifying the original scalar
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
- int isConsecutivePtr(Value *Ptr) const;
+ int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index ad6a4b561a9b..d105496ad47f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -153,6 +153,8 @@ public:
ProfileSummaryInfo *PSI;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
// Shim for old PM.
LoopVectorizeResult
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index f416a592d683..cd605aacb52d 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -94,9 +94,11 @@ private:
bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
/// Try to vectorize a list of operands.
+ /// \param LimitForRegisterSize Vectorize only using maximal allowed register
+ /// size.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
- bool AllowReorder = false);
+ bool LimitForRegisterSize = false);
/// Try to vectorize a chain that may start at the operands of \p I.
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
diff --git a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
index b7809aa24cae..a32f9fba967f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
+++ b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
@@ -20,10 +20,16 @@
namespace llvm {
/// Optimize scalar/vector interactions in IR using target cost models.
-struct VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
+class VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
+ /// If true only perform scalarization combines and do not introduce new
+ /// vector operations.
+ bool ScalarizationOnly;
+
public:
+ VectorCombinePass(bool ScalarizationOnly = false)
+ : ScalarizationOnly(ScalarizationOnly) {}
+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
};
-
}
#endif // LLVM_TRANSFORMS_VECTORIZE_VECTORCOMBINE_H
diff --git a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
index 31f4daeb7019..2da74bb9dce8 100644
--- a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
+++ b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
@@ -30,6 +30,7 @@
namespace llvm {
class MemoryBuffer;
+class MemoryBufferRef;
namespace windows_manifest {
@@ -49,7 +50,7 @@ class WindowsManifestMerger {
public:
WindowsManifestMerger();
~WindowsManifestMerger();
- Error merge(const MemoryBuffer &Manifest);
+ Error merge(MemoryBufferRef Manifest);
// Returns vector containing merged xml manifest, or uninitialized vector for
// empty manifest.
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index 848fb266374e..6cbbb9a4028e 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -181,21 +181,9 @@ module LLVM_ExecutionEngine {
// translation unit (or none) and aren't part of this module.
exclude header "ExecutionEngine/MCJIT.h"
exclude header "ExecutionEngine/Interpreter.h"
- exclude header "ExecutionEngine/OrcMCJITReplacement.h"
-
- // FIXME: These exclude directives were added as a workaround for
- // <rdar://problem/29247092> and should be removed once it is fixed.
- exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
- exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h"
- exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h"
- exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h"
// Exclude headers from LLVM_OrcSupport.
exclude header "ExecutionEngine/Orc/Shared/OrcError.h"
- exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h"
- exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
- exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
-
}
module LLVM_FileCheck {
@@ -221,9 +209,6 @@ module LLVM_OrcSupport {
requires cplusplus
header "ExecutionEngine/Orc/Shared/OrcError.h"
- header "ExecutionEngine/Orc/Shared/RPCUtils.h"
- header "ExecutionEngine/Orc/Shared/Serialization.h"
- header "ExecutionEngine/Orc/Shared/RawByteChannel.h"
export *
}
@@ -389,6 +374,9 @@ module LLVM_Transforms {
umbrella "Transforms"
module * { export * }
+
+ // Requires DEBUG_TYPE to be defined by including file.
+ exclude header "Transforms/Utils/InstructionWorklist.h"
}
extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index e7445e225d52..d030f74481cf 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -119,7 +119,7 @@ bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
AliasResult AAResults::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return alias(LocA, LocB, AAQIP);
}
@@ -162,7 +162,7 @@ AliasResult AAResults::alias(const MemoryLocation &LocA,
bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return pointsToConstantMemory(Loc, AAQIP, OrLocal);
}
@@ -190,7 +190,7 @@ ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
}
ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(I, Call2, AAQIP);
}
@@ -200,25 +200,24 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2,
if (const auto *Call1 = dyn_cast<CallBase>(I)) {
// Check if the two calls modify the same memory.
return getModRefInfo(Call1, Call2, AAQI);
- } else if (I->isFenceLike()) {
- // If this is a fence, just return ModRef.
- return ModRefInfo::ModRef;
- } else {
- // Otherwise, check if the call modifies or references the
- // location this memory access defines. The best we can say
- // is that if the call references what this instruction
- // defines, it must be clobbered by this location.
- const MemoryLocation DefLoc = MemoryLocation::get(I);
- ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
- if (isModOrRefSet(MR))
- return setModAndRef(MR);
}
+ // If this is a fence, just return ModRef.
+ if (I->isFenceLike())
+ return ModRefInfo::ModRef;
+ // Otherwise, check if the call modifies or references the
+ // location this memory access defines. The best we can say
+ // is that if the call references what this instruction
+ // defines, it must be clobbered by this location.
+ const MemoryLocation DefLoc = MemoryLocation::get(I);
+ ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
+ if (isModOrRefSet(MR))
+ return setModAndRef(MR);
return ModRefInfo::NoModRef;
}
ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(Call, Loc, AAQIP);
}
@@ -285,7 +284,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
const CallBase *Call2) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(Call1, Call2, AAQIP);
}
@@ -475,7 +474,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) {
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(L, Loc, AAQIP);
}
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
@@ -500,7 +499,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(S, Loc, AAQIP);
}
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
@@ -532,7 +531,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
}
ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(S, Loc, AAQIP);
}
@@ -548,7 +547,7 @@ ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(V, Loc, AAQIP);
}
@@ -578,7 +577,7 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(CatchPad, Loc, AAQIP);
}
@@ -598,7 +597,7 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(CatchRet, Loc, AAQIP);
}
@@ -618,7 +617,7 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(CX, Loc, AAQIP);
}
@@ -646,7 +645,7 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(RMW, Loc, AAQIP);
}
@@ -746,7 +745,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!Call->doesNotCapture(ArgNo) && ArgNo < Call->getNumArgOperands() &&
+ (!Call->doesNotCapture(ArgNo) && ArgNo < Call->arg_size() &&
!Call->isByValArgument(ArgNo)))
continue;
@@ -808,11 +807,6 @@ AAResults::Concept::~Concept() = default;
// Provide a definition for the static object used to identify passes.
AnalysisKey AAManager::Key;
-namespace {
-
-
-} // end anonymous namespace
-
ExternalAAWrapperPass::ExternalAAWrapperPass() : ImmutablePass(ID) {
initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index dee044346f02..9d4fe1225b33 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -84,7 +84,7 @@ void llvm::fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result) {
getValueFromBundleOpInfo(Assume, Bundles, ABA_Argument));
if (!CI)
continue;
- unsigned Val = CI->getZExtValue();
+ uint64_t Val = CI->getZExtValue();
auto Lookup = Result.find(Key);
if (Lookup == Result.end() || !Lookup->second.count(&Assume)) {
Result[Key][&Assume] = {Val, Val};
@@ -102,7 +102,7 @@ llvm::getKnowledgeFromBundle(AssumeInst &Assume,
Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
if (bundleHasArgument(BOI, ABA_WasOn))
Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
- auto GetArgOr1 = [&](unsigned Idx) -> unsigned {
+ auto GetArgOr1 = [&](unsigned Idx) -> uint64_t {
if (auto *ConstInt = dyn_cast<ConstantInt>(
getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx)))
return ConstInt->getZExtValue();
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index 0d95b33601f9..3e0214e21ecd 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -56,7 +57,7 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) {
}
static void
-findAffectedValues(CallBase *CI,
+findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
SmallVectorImpl<AssumptionCache::ResultElem> &Affected) {
// Note: This code must be kept in-sync with the code in
// computeKnownBitsFromAssume in ValueTracking.
@@ -124,24 +125,32 @@ findAffectedValues(CallBase *CI,
match(B, m_ConstantInt()))
AddAffected(X);
}
+
+ if (TTI) {
+ const Value *Ptr;
+ unsigned AS;
+ std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(Cond);
+ if (Ptr)
+ AddAffected(const_cast<Value *>(Ptr->stripInBoundsOffsets()));
+ }
}
void AssumptionCache::updateAffectedValues(AssumeInst *CI) {
SmallVector<AssumptionCache::ResultElem, 16> Affected;
- findAffectedValues(CI, Affected);
+ findAffectedValues(CI, TTI, Affected);
for (auto &AV : Affected) {
auto &AVV = getOrInsertAffectedValues(AV.Assume);
- if (std::find_if(AVV.begin(), AVV.end(), [&](ResultElem &Elem) {
+ if (llvm::none_of(AVV, [&](ResultElem &Elem) {
return Elem.Assume == CI && Elem.Index == AV.Index;
- }) == AVV.end())
+ }))
AVV.push_back({CI, AV.Index});
}
}
void AssumptionCache::unregisterAssumption(AssumeInst *CI) {
SmallVector<AssumptionCache::ResultElem, 16> Affected;
- findAffectedValues(CI, Affected);
+ findAffectedValues(CI, TTI, Affected);
for (auto &AV : Affected) {
auto AVI = AffectedValues.find_as(AV.Assume);
@@ -248,6 +257,12 @@ void AssumptionCache::registerAssumption(AssumeInst *CI) {
updateAffectedValues(CI);
}
+AssumptionCache AssumptionAnalysis::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+ return AssumptionCache(F, &TTI);
+}
+
AnalysisKey AssumptionAnalysis::Key;
PreservedAnalyses AssumptionPrinterPass::run(Function &F,
@@ -278,10 +293,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
if (I != AssumptionCaches.end())
return *I->second;
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ auto *TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
+
// Ok, build a new cache by scanning the function, insert it and the value
// handle into our map, and return the newly populated cache.
auto IP = AssumptionCaches.insert(std::make_pair(
- FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F)));
+ FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F, TTI)));
assert(IP.second && "Scanning function already in the map?");
return *IP.first->second;
}
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 357772c9c4f2..88b0f37b1d48 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -68,15 +69,6 @@ using namespace llvm;
static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
cl::init(true));
-/// By default, even on 32-bit architectures we use 64-bit integers for
-/// calculations. This will allow us to more-aggressively decompose indexing
-/// expressions calculated using i64 values (e.g., long long in C) which is
-/// common enough to worry about.
-static cl::opt<bool> ForceAtLeast64Bits("basic-aa-force-at-least-64b",
- cl::Hidden, cl::init(true));
-static cl::opt<bool> DoubleCalcBits("basic-aa-double-calc-bits",
- cl::Hidden, cl::init(false));
-
/// SearchLimitReached / SearchTimes shows how often the limit of
/// to decompose GEPs is reached. It will affect the precision
/// of basic alias analysis.
@@ -91,8 +83,7 @@ STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
// The max limit of the search depth in DecomposeGEPExpression() and
-// getUnderlyingObject(), both functions need to use the same search
-// depth otherwise the algorithm in aliasGEP will assert.
+// getUnderlyingObject().
static const unsigned MaxLookupSearchDepth = 6;
bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
@@ -120,9 +111,6 @@ static bool isEscapeSource(const Value *V) {
if (isa<CallBase>(V))
return true;
- if (isa<Argument>(V))
- return true;
-
// The load case works because isNonEscapingLocalObject considers all
// stores to be escapes (it passes true for the StoreCaptures argument
// to PointerMayBeCaptured).
@@ -206,12 +194,12 @@ static uint64_t getMinimalExtentFrom(const Value &V,
bool NullIsValidLoc) {
// If we have dereferenceability information we know a lower bound for the
// extent as accesses for a lower offset would be valid. We need to exclude
- // the "or null" part if null is a valid pointer.
+ // the "or null" part if null is a valid pointer. We can ignore frees, as an
+ // access after free would be undefined behavior.
bool CanBeNull, CanBeFreed;
uint64_t DerefBytes =
V.getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes;
- DerefBytes = CanBeFreed ? 0 : DerefBytes;
// If queried with a precise location size, we assume that location size to be
// accessed, thus valid.
if (LocSize.isPrecise())
@@ -227,82 +215,163 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
}
//===----------------------------------------------------------------------===//
+// CaptureInfo implementations
+//===----------------------------------------------------------------------===//
+
+CaptureInfo::~CaptureInfo() = default;
+
+bool SimpleCaptureInfo::isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) {
+ return isNonEscapingLocalObject(Object, &IsCapturedCache);
+}
+
+bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) {
+ if (!isIdentifiedFunctionLocal(Object))
+ return false;
+
+ auto Iter = EarliestEscapes.insert({Object, nullptr});
+ if (Iter.second) {
+ Instruction *EarliestCapture = FindEarliestCapture(
+ Object, *const_cast<Function *>(I->getFunction()),
+ /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT);
+ if (EarliestCapture) {
+ auto Ins = Inst2Obj.insert({EarliestCapture, {}});
+ Ins.first->second.push_back(Object);
+ }
+ Iter.first->second = EarliestCapture;
+ }
+
+ // No capturing instruction.
+ if (!Iter.first->second)
+ return true;
+
+ return I != Iter.first->second &&
+ !isPotentiallyReachable(Iter.first->second, I, nullptr, &DT, &LI);
+}
+
+void EarliestEscapeInfo::removeInstruction(Instruction *I) {
+ auto Iter = Inst2Obj.find(I);
+ if (Iter != Inst2Obj.end()) {
+ for (const Value *Obj : Iter->second)
+ EarliestEscapes.erase(Obj);
+ Inst2Obj.erase(I);
+ }
+}
+
+//===----------------------------------------------------------------------===//
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
namespace {
-/// Represents zext(sext(V)).
-struct ExtendedValue {
+/// Represents zext(sext(trunc(V))).
+struct CastedValue {
const Value *V;
- unsigned ZExtBits;
- unsigned SExtBits;
+ unsigned ZExtBits = 0;
+ unsigned SExtBits = 0;
+ unsigned TruncBits = 0;
- explicit ExtendedValue(const Value *V, unsigned ZExtBits = 0,
- unsigned SExtBits = 0)
- : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits) {}
+ explicit CastedValue(const Value *V) : V(V) {}
+ explicit CastedValue(const Value *V, unsigned ZExtBits, unsigned SExtBits,
+ unsigned TruncBits)
+ : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) {}
unsigned getBitWidth() const {
- return V->getType()->getPrimitiveSizeInBits() + ZExtBits + SExtBits;
+ return V->getType()->getPrimitiveSizeInBits() - TruncBits + ZExtBits +
+ SExtBits;
}
- ExtendedValue withValue(const Value *NewV) const {
- return ExtendedValue(NewV, ZExtBits, SExtBits);
+ CastedValue withValue(const Value *NewV) const {
+ return CastedValue(NewV, ZExtBits, SExtBits, TruncBits);
}
- ExtendedValue withZExtOfValue(const Value *NewV) const {
+ /// Replace V with zext(NewV)
+ CastedValue withZExtOfValue(const Value *NewV) const {
unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
NewV->getType()->getPrimitiveSizeInBits();
+ if (ExtendBy <= TruncBits)
+ return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy);
+
// zext(sext(zext(NewV))) == zext(zext(zext(NewV)))
- return ExtendedValue(NewV, ZExtBits + SExtBits + ExtendBy, 0);
+ ExtendBy -= TruncBits;
+ return CastedValue(NewV, ZExtBits + SExtBits + ExtendBy, 0, 0);
}
- ExtendedValue withSExtOfValue(const Value *NewV) const {
+ /// Replace V with sext(NewV)
+ CastedValue withSExtOfValue(const Value *NewV) const {
unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
NewV->getType()->getPrimitiveSizeInBits();
+ if (ExtendBy <= TruncBits)
+ return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy);
+
// zext(sext(sext(NewV)))
- return ExtendedValue(NewV, ZExtBits, SExtBits + ExtendBy);
+ ExtendBy -= TruncBits;
+ return CastedValue(NewV, ZExtBits, SExtBits + ExtendBy, 0);
}
APInt evaluateWith(APInt N) const {
assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
"Incompatible bit width");
+ if (TruncBits) N = N.trunc(N.getBitWidth() - TruncBits);
if (SExtBits) N = N.sext(N.getBitWidth() + SExtBits);
if (ZExtBits) N = N.zext(N.getBitWidth() + ZExtBits);
return N;
}
+ ConstantRange evaluateWith(ConstantRange N) const {
+ assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
+ "Incompatible bit width");
+ if (TruncBits) N = N.truncate(N.getBitWidth() - TruncBits);
+ if (SExtBits) N = N.signExtend(N.getBitWidth() + SExtBits);
+ if (ZExtBits) N = N.zeroExtend(N.getBitWidth() + ZExtBits);
+ return N;
+ }
+
bool canDistributeOver(bool NUW, bool NSW) const {
// zext(x op<nuw> y) == zext(x) op<nuw> zext(y)
// sext(x op<nsw> y) == sext(x) op<nsw> sext(y)
+ // trunc(x op y) == trunc(x) op trunc(y)
return (!ZExtBits || NUW) && (!SExtBits || NSW);
}
+
+ bool hasSameCastsAs(const CastedValue &Other) const {
+ return ZExtBits == Other.ZExtBits && SExtBits == Other.SExtBits &&
+ TruncBits == Other.TruncBits;
+ }
};
-/// Represents zext(sext(V)) * Scale + Offset.
+/// Represents zext(sext(trunc(V))) * Scale + Offset.
struct LinearExpression {
- ExtendedValue Val;
+ CastedValue Val;
APInt Scale;
APInt Offset;
/// True if all operations in this expression are NSW.
bool IsNSW;
- LinearExpression(const ExtendedValue &Val, const APInt &Scale,
+ LinearExpression(const CastedValue &Val, const APInt &Scale,
const APInt &Offset, bool IsNSW)
: Val(Val), Scale(Scale), Offset(Offset), IsNSW(IsNSW) {}
- LinearExpression(const ExtendedValue &Val) : Val(Val), IsNSW(true) {
+ LinearExpression(const CastedValue &Val) : Val(Val), IsNSW(true) {
unsigned BitWidth = Val.getBitWidth();
Scale = APInt(BitWidth, 1);
Offset = APInt(BitWidth, 0);
}
+
+ LinearExpression mul(const APInt &Other, bool MulIsNSW) const {
+ // The check for zero offset is necessary, because generally
+ // (X +nsw Y) *nsw Z does not imply (X *nsw Z) +nsw (Y *nsw Z).
+ bool NSW = IsNSW && (Other.isOne() || (MulIsNSW && Offset.isZero()));
+ return LinearExpression(Val, Scale * Other, Offset * Other, NSW);
+ }
};
}
/// Analyzes the specified value as a linear expression: "A*V + B", where A and
/// B are constant integers.
static LinearExpression GetLinearExpression(
- const ExtendedValue &Val, const DataLayout &DL, unsigned Depth,
+ const CastedValue &Val, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, DominatorTree *DT) {
// Limit our recursion depth.
if (Depth == 6)
@@ -325,6 +394,11 @@ static LinearExpression GetLinearExpression(
if (!Val.canDistributeOver(NUW, NSW))
return Val;
+ // While we can distribute over trunc, we cannot preserve nowrap flags
+ // in that case.
+ if (Val.TruncBits)
+ NUW = NSW = false;
+
LinearExpression E(Val);
switch (BOp->getOpcode()) {
default:
@@ -353,14 +427,11 @@ static LinearExpression GetLinearExpression(
E.IsNSW &= NSW;
break;
}
- case Instruction::Mul: {
+ case Instruction::Mul:
E = GetLinearExpression(Val.withValue(BOp->getOperand(0)), DL,
- Depth + 1, AC, DT);
- E.Offset *= RHS;
- E.Scale *= RHS;
- E.IsNSW &= NSW;
+ Depth + 1, AC, DT)
+ .mul(RHS, NSW);
break;
- }
case Instruction::Shl:
// We're trying to linearize an expression of the kind:
// shl i8 -128, 36
@@ -394,25 +465,75 @@ static LinearExpression GetLinearExpression(
return Val;
}
-/// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// To ensure a pointer offset fits in an integer of size IndexSize
+/// (in bits) when that size is smaller than the maximum index size. This is
/// an issue, for example, in particular for 32b pointers with negative indices
/// that rely on two's complement wrap-arounds for precise alias information
-/// where the maximum pointer size is 64b.
-static APInt adjustToPointerSize(const APInt &Offset, unsigned PointerSize) {
- assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
- unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+/// where the maximum index size is 64b.
+static APInt adjustToIndexSize(const APInt &Offset, unsigned IndexSize) {
+ assert(IndexSize <= Offset.getBitWidth() && "Invalid IndexSize!");
+ unsigned ShiftBits = Offset.getBitWidth() - IndexSize;
return (Offset << ShiftBits).ashr(ShiftBits);
}
-static unsigned getMaxPointerSize(const DataLayout &DL) {
- unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
- if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
- if (DoubleCalcBits) MaxPointerSize *= 2;
+namespace {
+// A linear transformation of a Value; this class represents
+// ZExt(SExt(Trunc(V, TruncBits), SExtBits), ZExtBits) * Scale.
+struct VariableGEPIndex {
+ CastedValue Val;
+ APInt Scale;
+
+ // Context instruction to use when querying information about this index.
+ const Instruction *CxtI;
+
+ /// True if all operations in this expression are NSW.
+ bool IsNSW;
- return MaxPointerSize;
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ void print(raw_ostream &OS) const {
+ OS << "(V=" << Val.V->getName()
+ << ", zextbits=" << Val.ZExtBits
+ << ", sextbits=" << Val.SExtBits
+ << ", truncbits=" << Val.TruncBits
+ << ", scale=" << Scale << ")";
+ }
+};
}
+// Represents the internal structure of a GEP, decomposed into a base pointer,
+// constant offsets, and variable scaled indices.
+struct BasicAAResult::DecomposedGEP {
+ // Base pointer of the GEP
+ const Value *Base;
+ // Total constant offset from base.
+ APInt Offset;
+ // Scaled variable (non-constant) indices.
+ SmallVector<VariableGEPIndex, 4> VarIndices;
+ // Are all operations inbounds GEPs or non-indexing operations?
+ // (None iff expression doesn't involve any geps)
+ Optional<bool> InBounds;
+
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ void print(raw_ostream &OS) const {
+ OS << "(DecomposedGEP Base=" << Base->getName()
+ << ", Offset=" << Offset
+ << ", VarIndices=[";
+ for (size_t i = 0; i < VarIndices.size(); i++) {
+ if (i != 0)
+ OS << ", ";
+ VarIndices[i].print(OS);
+ }
+ OS << "])";
+ }
+};
+
+
/// If V is a symbolic pointer expression, decompose it into a base pointer
/// with a constant offset and a number of scaled symbolic offsets.
///
@@ -420,11 +541,6 @@ static unsigned getMaxPointerSize(const DataLayout &DL) {
/// in the VarIndices vector) are Value*'s that are known to be scaled by the
/// specified amount, but which may have other unrepresented high bits. As
/// such, the gep cannot necessarily be reconstructed from its decomposed form.
-///
-/// This function is capable of analyzing everything that getUnderlyingObject
-/// can look through. To be able to do that getUnderlyingObject and
-/// DecomposeGEPExpression must use the same search depth
-/// (MaxLookupSearchDepth).
BasicAAResult::DecomposedGEP
BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT) {
@@ -433,10 +549,9 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
SearchTimes++;
const Instruction *CxtI = dyn_cast<Instruction>(V);
- unsigned MaxPointerSize = getMaxPointerSize(DL);
+ unsigned MaxIndexSize = DL.getMaxIndexSizeInBits();
DecomposedGEP Decomposed;
- Decomposed.Offset = APInt(MaxPointerSize, 0);
- Decomposed.HasCompileTimeConstantScale = true;
+ Decomposed.Offset = APInt(MaxIndexSize, 0);
do {
// See if this is a bitcast or GEP.
const Operator *Op = dyn_cast<Operator>(V);
@@ -493,24 +608,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
else if (!GEPOp->isInBounds())
Decomposed.InBounds = false;
- // Don't attempt to analyze GEPs over unsized objects.
- if (!GEPOp->getSourceElementType()->isSized()) {
- Decomposed.Base = V;
- return Decomposed;
- }
+ assert(GEPOp->getSourceElementType()->isSized() && "GEP must be sized");
// Don't attempt to analyze GEPs if index scale is not a compile-time
// constant.
if (isa<ScalableVectorType>(GEPOp->getSourceElementType())) {
Decomposed.Base = V;
- Decomposed.HasCompileTimeConstantScale = false;
return Decomposed;
}
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
- unsigned PointerSize = DL.getPointerSizeInBits(AS);
+ unsigned IndexSize = DL.getIndexSizeInBits(AS);
// Assume all GEP operands are constants until proven otherwise.
bool GepHasConstantOffset = true;
for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
@@ -533,49 +643,34 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;
Decomposed.Offset +=
DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() *
- CIdx->getValue().sextOrTrunc(MaxPointerSize);
+ CIdx->getValue().sextOrTrunc(MaxIndexSize);
continue;
}
GepHasConstantOffset = false;
- APInt Scale(MaxPointerSize,
- DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize());
- // If the integer type is smaller than the pointer size, it is implicitly
- // sign extended to pointer size.
+ // If the integer type is smaller than the index size, it is implicitly
+ // sign extended or truncated to index size.
unsigned Width = Index->getType()->getIntegerBitWidth();
- unsigned SExtBits = PointerSize > Width ? PointerSize - Width : 0;
+ unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
+ unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
LinearExpression LE = GetLinearExpression(
- ExtendedValue(Index, 0, SExtBits), DL, 0, AC, DT);
-
- // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
- // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-
- // It can be the case that, even through C1*V+C2 does not overflow for
- // relevant values of V, (C2*Scale) can overflow. In that case, we cannot
- // decompose the expression in this way.
- //
- // FIXME: C1*Scale and the other operations in the decomposed
- // (C1*Scale)*V+C2*Scale can also overflow. We should check for this
- // possibility.
- bool Overflow;
- APInt ScaledOffset = LE.Offset.sextOrTrunc(MaxPointerSize)
- .smul_ov(Scale, Overflow);
- if (Overflow) {
- LE = LinearExpression(ExtendedValue(Index, 0, SExtBits));
- } else {
- Decomposed.Offset += ScaledOffset;
- Scale *= LE.Scale.sextOrTrunc(MaxPointerSize);
- }
+ CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+
+ // Scale by the type size.
+ unsigned TypeSize =
+ DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
+ LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
+ Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize);
+ APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize);
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
- if (Decomposed.VarIndices[i].V == LE.Val.V &&
- Decomposed.VarIndices[i].ZExtBits == LE.Val.ZExtBits &&
- Decomposed.VarIndices[i].SExtBits == LE.Val.SExtBits) {
+ if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
+ Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
Scale += Decomposed.VarIndices[i].Scale;
Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
break;
@@ -583,19 +678,18 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
}
// Make sure that we have a scale that makes sense for this target's
- // pointer size.
- Scale = adjustToPointerSize(Scale, PointerSize);
+ // index size.
+ Scale = adjustToIndexSize(Scale, IndexSize);
if (!!Scale) {
- VariableGEPIndex Entry = {
- LE.Val.V, LE.Val.ZExtBits, LE.Val.SExtBits, Scale, CxtI, LE.IsNSW};
+ VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW};
Decomposed.VarIndices.push_back(Entry);
}
}
// Take care of wrap-arounds
if (GepHasConstantOffset)
- Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize);
+ Decomposed.Offset = adjustToIndexSize(Decomposed.Offset, IndexSize);
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
@@ -838,7 +932,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
if (!isa<Constant>(Object) && Call != Object &&
- isNonEscapingLocalObject(Object, &AAQI.IsCapturedCache)) {
+ AAQI.CI->isNotCapturedBeforeOrAt(Object, Call)) {
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
@@ -852,8 +946,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!Call->doesNotCapture(OperandNo) &&
- OperandNo < Call->getNumArgOperands() &&
+ (!Call->doesNotCapture(OperandNo) && OperandNo < Call->arg_size() &&
!Call->isByValArgument(OperandNo)))
continue;
@@ -1046,20 +1139,13 @@ AliasResult BasicAAResult::aliasGEP(
DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
- // Don't attempt to analyze the decomposed GEP if index scale is not a
- // compile-time constant.
- if (!DecompGEP1.HasCompileTimeConstantScale ||
- !DecompGEP2.HasCompileTimeConstantScale)
+ // Bail if we were not able to decompose anything.
+ if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2)
return AliasResult::MayAlias;
- assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
- "DecomposeGEPExpression returned a result different from "
- "getUnderlyingObject");
-
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
- DecompGEP1.Offset -= DecompGEP2.Offset;
- GetIndexDifference(DecompGEP1.VarIndices, DecompGEP2.VarIndices);
+ subtractDecomposedGEPs(DecompGEP1, DecompGEP2);
// If an inbounds GEP would have to start from an out of bounds address
// for the two to alias, then we can assume noalias.
@@ -1079,14 +1165,14 @@ AliasResult BasicAAResult::aliasGEP(
// For GEPs with identical offsets, we can preserve the size and AAInfo
// when performing the alias check on the underlying objects.
if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty())
- return getBestAAResults().alias(
- MemoryLocation(UnderlyingV1, V1Size),
- MemoryLocation(UnderlyingV2, V2Size), AAQI);
+ return getBestAAResults().alias(MemoryLocation(DecompGEP1.Base, V1Size),
+ MemoryLocation(DecompGEP2.Base, V2Size),
+ AAQI);
// Do the base pointers alias?
AliasResult BaseAlias = getBestAAResults().alias(
- MemoryLocation::getBeforeOrAfter(UnderlyingV1),
- MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI);
+ MemoryLocation::getBeforeOrAfter(DecompGEP1.Base),
+ MemoryLocation::getBeforeOrAfter(DecompGEP2.Base), AAQI);
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
@@ -1100,7 +1186,7 @@ AliasResult BasicAAResult::aliasGEP(
// is less than the size of the associated memory object, then we know
// that the objects are partially overlapping. If the difference is
// greater, we know they do not overlap.
- if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) {
+ if (DecompGEP1.VarIndices.empty()) {
APInt &Off = DecompGEP1.Offset;
// Initialize for Off >= 0 (V2 <= GEP1) case.
@@ -1122,133 +1208,124 @@ AliasResult BasicAAResult::aliasGEP(
Off = -Off;
}
- if (VLeftSize.hasValue()) {
- const uint64_t LSize = VLeftSize.getValue();
- if (Off.ult(LSize)) {
- // Conservatively drop processing if a phi was visited and/or offset is
- // too big.
- AliasResult AR = AliasResult::PartialAlias;
- if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
- (Off + VRightSize.getValue()).ule(LSize)) {
- // Memory referenced by right pointer is nested. Save the offset in
- // cache. Note that originally offset estimated as GEP1-V2, but
- // AliasResult contains the shift that represents GEP1+Offset=V2.
- AR.setOffset(-Off.getSExtValue());
- AR.swap(Swapped);
- }
- return AR;
+ if (!VLeftSize.hasValue())
+ return AliasResult::MayAlias;
+
+ const uint64_t LSize = VLeftSize.getValue();
+ if (Off.ult(LSize)) {
+ // Conservatively drop processing if a phi was visited and/or offset is
+ // too big.
+ AliasResult AR = AliasResult::PartialAlias;
+ if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
+ (Off + VRightSize.getValue()).ule(LSize)) {
+ // Memory referenced by right pointer is nested. Save the offset in
+ // cache. Note that originally offset estimated as GEP1-V2, but
+ // AliasResult contains the shift that represents GEP1+Offset=V2.
+ AR.setOffset(-Off.getSExtValue());
+ AR.swap(Swapped);
}
- return AliasResult::NoAlias;
+ return AR;
}
+ return AliasResult::NoAlias;
}
- if (!DecompGEP1.VarIndices.empty()) {
- APInt GCD;
- bool AllNonNegative = DecompGEP1.Offset.isNonNegative();
- bool AllNonPositive = DecompGEP1.Offset.isNonPositive();
- for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
- APInt Scale = DecompGEP1.VarIndices[i].Scale;
- APInt ScaleForGCD = DecompGEP1.VarIndices[i].Scale;
- if (!DecompGEP1.VarIndices[i].IsNSW)
- ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(),
- Scale.countTrailingZeros());
-
- if (i == 0)
- GCD = ScaleForGCD.abs();
- else
- GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
-
- if (AllNonNegative || AllNonPositive) {
- // If the Value could change between cycles, then any reasoning about
- // the Value this cycle may not hold in the next cycle. We'll just
- // give up if we can't determine conditions that hold for every cycle:
- const Value *V = DecompGEP1.VarIndices[i].V;
- const Instruction *CxtI = DecompGEP1.VarIndices[i].CxtI;
-
- KnownBits Known = computeKnownBits(V, DL, 0, &AC, CxtI, DT);
- bool SignKnownZero = Known.isNonNegative();
- bool SignKnownOne = Known.isNegative();
-
- // Zero-extension widens the variable, and so forces the sign
- // bit to zero.
- bool IsZExt = DecompGEP1.VarIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
- SignKnownZero |= IsZExt;
- SignKnownOne &= !IsZExt;
-
- AllNonNegative &= (SignKnownZero && Scale.isNonNegative()) ||
- (SignKnownOne && Scale.isNonPositive());
- AllNonPositive &= (SignKnownZero && Scale.isNonPositive()) ||
- (SignKnownOne && Scale.isNonNegative());
- }
- }
+ // We need to know both acess sizes for all the following heuristics.
+ if (!V1Size.hasValue() || !V2Size.hasValue())
+ return AliasResult::MayAlias;
- // We now have accesses at two offsets from the same base:
- // 1. (...)*GCD + DecompGEP1.Offset with size V1Size
- // 2. 0 with size V2Size
- // Using arithmetic modulo GCD, the accesses are at
- // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits
- // into the range [V2Size..GCD), then we know they cannot overlap.
- APInt ModOffset = DecompGEP1.Offset.srem(GCD);
- if (ModOffset.isNegative())
- ModOffset += GCD; // We want mod, not rem.
- if (V1Size.hasValue() && V2Size.hasValue() &&
- ModOffset.uge(V2Size.getValue()) &&
- (GCD - ModOffset).uge(V1Size.getValue()))
- return AliasResult::NoAlias;
+ APInt GCD;
+ ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset);
+ for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
+ const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
+ const APInt &Scale = Index.Scale;
+ APInt ScaleForGCD = Scale;
+ if (!Index.IsNSW)
+ ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(),
+ Scale.countTrailingZeros());
+
+ if (i == 0)
+ GCD = ScaleForGCD.abs();
+ else
+ GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
+
+ ConstantRange CR =
+ computeConstantRange(Index.Val.V, true, &AC, Index.CxtI);
+ KnownBits Known =
+ computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT);
+ CR = CR.intersectWith(
+ ConstantRange::fromKnownBits(Known, /* Signed */ true),
+ ConstantRange::Signed);
+ CR = Index.Val.evaluateWith(CR).sextOrTrunc(OffsetRange.getBitWidth());
+
+ assert(OffsetRange.getBitWidth() == Scale.getBitWidth() &&
+ "Bit widths are normalized to MaxIndexSize");
+ if (Index.IsNSW)
+ OffsetRange = OffsetRange.add(CR.smul_sat(ConstantRange(Scale)));
+ else
+ OffsetRange = OffsetRange.add(CR.smul_fast(ConstantRange(Scale)));
+ }
- // If we know all the variables are non-negative, then the total offset is
- // also non-negative and >= DecompGEP1.Offset. We have the following layout:
- // [0, V2Size) ... [TotalOffset, TotalOffer+V1Size]
- // If DecompGEP1.Offset >= V2Size, the accesses don't alias.
- if (AllNonNegative && V2Size.hasValue() &&
- DecompGEP1.Offset.uge(V2Size.getValue()))
- return AliasResult::NoAlias;
- // Similarly, if the variables are non-positive, then the total offset is
- // also non-positive and <= DecompGEP1.Offset. We have the following layout:
- // [TotalOffset, TotalOffset+V1Size) ... [0, V2Size)
- // If -DecompGEP1.Offset >= V1Size, the accesses don't alias.
- if (AllNonPositive && V1Size.hasValue() &&
- (-DecompGEP1.Offset).uge(V1Size.getValue()))
- return AliasResult::NoAlias;
+ // We now have accesses at two offsets from the same base:
+ // 1. (...)*GCD + DecompGEP1.Offset with size V1Size
+ // 2. 0 with size V2Size
+ // Using arithmetic modulo GCD, the accesses are at
+ // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits
+ // into the range [V2Size..GCD), then we know they cannot overlap.
+ APInt ModOffset = DecompGEP1.Offset.srem(GCD);
+ if (ModOffset.isNegative())
+ ModOffset += GCD; // We want mod, not rem.
+ if (ModOffset.uge(V2Size.getValue()) &&
+ (GCD - ModOffset).uge(V1Size.getValue()))
+ return AliasResult::NoAlias;
- if (V1Size.hasValue() && V2Size.hasValue()) {
- // Try to determine whether abs(VarIndex) > 0.
- Optional<APInt> MinAbsVarIndex;
- if (DecompGEP1.VarIndices.size() == 1) {
- // VarIndex = Scale*V. If V != 0 then abs(VarIndex) >= abs(Scale).
- const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
- if (isKnownNonZero(Var.V, DL, 0, &AC, Var.CxtI, DT))
- MinAbsVarIndex = Var.Scale.abs();
- } else if (DecompGEP1.VarIndices.size() == 2) {
- // VarIndex = Scale*V0 + (-Scale)*V1.
- // If V0 != V1 then abs(VarIndex) >= abs(Scale).
- // Check that VisitedPhiBBs is empty, to avoid reasoning about
- // inequality of values across loop iterations.
- const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
- const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
- if (Var0.Scale == -Var1.Scale && Var0.ZExtBits == Var1.ZExtBits &&
- Var0.SExtBits == Var1.SExtBits && VisitedPhiBBs.empty() &&
- isKnownNonEqual(Var0.V, Var1.V, DL, &AC, /* CxtI */ nullptr, DT))
- MinAbsVarIndex = Var0.Scale.abs();
- }
+ // Compute ranges of potentially accessed bytes for both accesses. If the
+ // interseciton is empty, there can be no overlap.
+ unsigned BW = OffsetRange.getBitWidth();
+ ConstantRange Range1 = OffsetRange.add(
+ ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue())));
+ ConstantRange Range2 =
+ ConstantRange(APInt(BW, 0), APInt(BW, V2Size.getValue()));
+ if (Range1.intersectWith(Range2).isEmptySet())
+ return AliasResult::NoAlias;
- if (MinAbsVarIndex) {
- // The constant offset will have added at least +/-MinAbsVarIndex to it.
- APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex;
- APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex;
- // Check that an access at OffsetLo or lower, and an access at OffsetHi
- // or higher both do not alias.
- if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) &&
- OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue()))
- return AliasResult::NoAlias;
- }
+ // Try to determine the range of values for VarIndex such that
+ // VarIndex <= -MinAbsVarIndex || MinAbsVarIndex <= VarIndex.
+ Optional<APInt> MinAbsVarIndex;
+ if (DecompGEP1.VarIndices.size() == 1) {
+ // VarIndex = Scale*V.
+ const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
+ if (Var.Val.TruncBits == 0 &&
+ isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
+ // If V != 0 then abs(VarIndex) >= abs(Scale).
+ MinAbsVarIndex = Var.Scale.abs();
}
+ } else if (DecompGEP1.VarIndices.size() == 2) {
+ // VarIndex = Scale*V0 + (-Scale)*V1.
+ // If V0 != V1 then abs(VarIndex) >= abs(Scale).
+ // Check that VisitedPhiBBs is empty, to avoid reasoning about
+ // inequality of values across loop iterations.
+ const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
+ const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
+ if (Var0.Scale == -Var1.Scale && Var0.Val.TruncBits == 0 &&
+ Var0.Val.hasSameCastsAs(Var1.Val) && VisitedPhiBBs.empty() &&
+ isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr,
+ DT))
+ MinAbsVarIndex = Var0.Scale.abs();
+ }
- if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
- DecompGEP1.Offset, &AC, DT))
+ if (MinAbsVarIndex) {
+ // The constant offset will have added at least +/-MinAbsVarIndex to it.
+ APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex;
+ APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex;
+ // We know that Offset <= OffsetLo || Offset >= OffsetHi
+ if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) &&
+ OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue()))
return AliasResult::NoAlias;
}
+ if (constantOffsetHeuristic(DecompGEP1, V1Size, V2Size, &AC, DT))
+ return AliasResult::NoAlias;
+
// Statically, we can see that the base objects are the same, but the
// pointers have dynamic offsets which we can't resolve. And none of our
// little tricks above worked.
@@ -1517,10 +1594,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// location if that memory location doesn't escape. Or it may pass a
// nocapture value to other functions as long as they don't capture it.
if (isEscapeSource(O1) &&
- isNonEscapingLocalObject(O2, &AAQI.IsCapturedCache))
+ AAQI.CI->isNotCapturedBeforeOrAt(O2, cast<Instruction>(O1)))
return AliasResult::NoAlias;
if (isEscapeSource(O2) &&
- isNonEscapingLocalObject(O1, &AAQI.IsCapturedCache))
+ AAQI.CI->isNotCapturedBeforeOrAt(O1, cast<Instruction>(O2)))
return AliasResult::NoAlias;
}
@@ -1692,62 +1769,54 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
}
/// Computes the symbolic difference between two de-composed GEPs.
-///
-/// Dest and Src are the variable indices from two decomposed GetElementPtr
-/// instructions GEP1 and GEP2 which have common base pointers.
-void BasicAAResult::GetIndexDifference(
- SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src) {
- if (Src.empty())
- return;
-
- for (unsigned i = 0, e = Src.size(); i != e; ++i) {
- const Value *V = Src[i].V;
- unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
- APInt Scale = Src[i].Scale;
-
+void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
+ const DecomposedGEP &SrcGEP) {
+ DestGEP.Offset -= SrcGEP.Offset;
+ for (const VariableGEPIndex &Src : SrcGEP.VarIndices) {
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
- for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
- if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
- Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)
+ bool Found = false;
+ for (auto I : enumerate(DestGEP.VarIndices)) {
+ VariableGEPIndex &Dest = I.value();
+ if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V) ||
+ !Dest.Val.hasSameCastsAs(Src.Val))
continue;
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
- if (Dest[j].Scale != Scale) {
- Dest[j].Scale -= Scale;
- Dest[j].IsNSW = false;
- } else
- Dest.erase(Dest.begin() + j);
- Scale = 0;
+ if (Dest.Scale != Src.Scale) {
+ Dest.Scale -= Src.Scale;
+ Dest.IsNSW = false;
+ } else {
+ DestGEP.VarIndices.erase(DestGEP.VarIndices.begin() + I.index());
+ }
+ Found = true;
break;
}
// If we didn't consume this entry, add it to the end of the Dest list.
- if (!!Scale) {
- VariableGEPIndex Entry = {V, ZExtBits, SExtBits,
- -Scale, Src[i].CxtI, Src[i].IsNSW};
- Dest.push_back(Entry);
+ if (!Found) {
+ VariableGEPIndex Entry = {Src.Val, -Src.Scale, Src.CxtI, Src.IsNSW};
+ DestGEP.VarIndices.push_back(Entry);
}
}
}
bool BasicAAResult::constantOffsetHeuristic(
- const SmallVectorImpl<VariableGEPIndex> &VarIndices,
- LocationSize MaybeV1Size, LocationSize MaybeV2Size, const APInt &BaseOffset,
- AssumptionCache *AC, DominatorTree *DT) {
- if (VarIndices.size() != 2 || !MaybeV1Size.hasValue() ||
+ const DecomposedGEP &GEP, LocationSize MaybeV1Size,
+ LocationSize MaybeV2Size, AssumptionCache *AC, DominatorTree *DT) {
+ if (GEP.VarIndices.size() != 2 || !MaybeV1Size.hasValue() ||
!MaybeV2Size.hasValue())
return false;
const uint64_t V1Size = MaybeV1Size.getValue();
const uint64_t V2Size = MaybeV2Size.getValue();
- const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
+ const VariableGEPIndex &Var0 = GEP.VarIndices[0], &Var1 = GEP.VarIndices[1];
- if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
- Var0.Scale != -Var1.Scale || Var0.V->getType() != Var1.V->getType())
+ if (Var0.Val.TruncBits != 0 || !Var0.Val.hasSameCastsAs(Var1.Val) ||
+ Var0.Scale != -Var1.Scale ||
+ Var0.Val.V->getType() != Var1.Val.V->getType())
return false;
// We'll strip off the Extensions of Var0 and Var1 and do another round
@@ -1755,11 +1824,10 @@ bool BasicAAResult::constantOffsetHeuristic(
// is zext(%x + 1) we should get V1 == %x and V1Offset == 1.
LinearExpression E0 =
- GetLinearExpression(ExtendedValue(Var0.V), DL, 0, AC, DT);
+ GetLinearExpression(CastedValue(Var0.Val.V), DL, 0, AC, DT);
LinearExpression E1 =
- GetLinearExpression(ExtendedValue(Var1.V), DL, 0, AC, DT);
- if (E0.Scale != E1.Scale || E0.Val.ZExtBits != E1.Val.ZExtBits ||
- E0.Val.SExtBits != E1.Val.SExtBits ||
+ GetLinearExpression(CastedValue(Var1.Val.V), DL, 0, AC, DT);
+ if (E0.Scale != E1.Scale || !E0.Val.hasSameCastsAs(E1.Val) ||
!isValueEqualInPotentialCycles(E0.Val.V, E1.Val.V))
return false;
@@ -1779,8 +1847,8 @@ bool BasicAAResult::constantOffsetHeuristic(
// arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
// values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
// V2Size can fit in the MinDiffBytes gap.
- return MinDiffBytes.uge(V1Size + BaseOffset.abs()) &&
- MinDiffBytes.uge(V2Size + BaseOffset.abs());
+ return MinDiffBytes.uge(V1Size + GEP.Offset.abs()) &&
+ MinDiffBytes.uge(V2Size + GEP.Offset.abs());
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index e4e45b3076be..2a5e1f65d731 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -602,7 +602,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
if (!EntryCount)
return None;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
- APInt BlockCount(128, EntryCount.getCount());
+ APInt BlockCount(128, EntryCount->getCount());
APInt BlockFreq(128, Freq);
APInt EntryFreq(128, getEntryFreq());
BlockCount *= BlockFreq;
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index aa6b93fe3f07..33fdc8b628c5 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -190,7 +190,7 @@ void BranchProbabilityInfo::SccInfo::getSccExitBlocks(
if (isSCCExitingBlock(BB, SccNum))
for (const auto *Succ : successors(BB))
if (getSCCNum(Succ) != SccNum)
- Exits.push_back(const_cast<BasicBlock *>(BB));
+ Exits.push_back(const_cast<BasicBlock *>(Succ));
}
}
diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
index 253cc0b0a579..c60b70ae5b69 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -38,12 +38,13 @@ using namespace llvm;
// Explicit template instantiations and specialization definitions for core
// template typedefs.
namespace llvm {
-
static cl::opt<bool> AbortOnMaxDevirtIterationsReached(
"abort-on-max-devirt-iterations-reached",
cl::desc("Abort when the max iterations for devirtualization CGSCC repeat "
"pass is reached"));
+AnalysisKey ShouldNotRunFunctionPassesAnalysis::Key;
+
// Explicit instantiations for the core proxy templates.
template class AllAnalysesOn<LazyCallGraph::SCC>;
template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
@@ -119,12 +120,6 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// Finally, we intersect the final preserved analyses to compute the
// aggregate preserved set for this pass manager.
PA.intersect(std::move(PassPA));
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
// Before we mark all of *this* SCC's analyses as preserved below, intersect
@@ -547,6 +542,9 @@ PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C,
Function &F = N->getFunction();
+ if (NoRerun && FAM.getCachedResult<ShouldNotRunFunctionPassesAnalysis>(F))
+ continue;
+
PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
if (!PI.runBeforePass<Function>(*Pass, F))
continue;
@@ -562,7 +560,9 @@ PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C,
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
- FAM.invalidate(F, PassPA);
+ FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA);
+ if (NoRerun)
+ (void)FAM.getResult<ShouldNotRunFunctionPassesAnalysis>(F);
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
@@ -863,7 +863,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
// split-off SCCs.
// We know however that this will preserve any FAM proxy so go ahead and mark
// that.
- PreservedAnalyses PA;
+ auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
AM.invalidate(*OldC, PA);
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index 5fe4f9befc86..8955658cb9e7 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -98,10 +98,10 @@ namespace {
/// as the given instruction and the use.
struct CapturesBefore : public CaptureTracker {
- CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT,
- bool IncludeI)
- : BeforeHere(I), DT(DT),
- ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
+ CapturesBefore(bool ReturnCaptures, const Instruction *I,
+ const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
+ : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
+ IncludeI(IncludeI), Captured(false), LI(LI) {}
void tooManyUses() override { Captured = true; }
@@ -115,7 +115,7 @@ namespace {
return true;
// Check whether there is a path from I to BeforeHere.
- return !isPotentiallyReachable(I, BeforeHere, nullptr, DT);
+ return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI);
}
bool captured(const Use *U) override {
@@ -140,6 +140,68 @@ namespace {
bool IncludeI;
bool Captured;
+
+ const LoopInfo *LI;
+ };
+
+ /// Find the 'earliest' instruction before which the pointer is known not to
+ /// be captured. Here an instruction A is considered earlier than instruction
+ /// B, if A dominates B. If 2 escapes do not dominate each other, the
+ /// terminator of the common dominator is chosen. If not all uses cannot be
+ /// analyzed, the earliest escape is set to the first instruction in the
+ /// function entry block.
+ // NOTE: Users have to make sure instructions compared against the earliest
+ // escape are not in a cycle.
+ struct EarliestCaptures : public CaptureTracker {
+
+ EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
+ : DT(DT), ReturnCaptures(ReturnCaptures), Captured(false), F(F) {}
+
+ void tooManyUses() override {
+ Captured = true;
+ EarliestCapture = &*F.getEntryBlock().begin();
+ }
+
+ bool captured(const Use *U) override {
+ Instruction *I = cast<Instruction>(U->getUser());
+ if (isa<ReturnInst>(I) && !ReturnCaptures)
+ return false;
+
+ if (!EarliestCapture) {
+ EarliestCapture = I;
+ } else if (EarliestCapture->getParent() == I->getParent()) {
+ if (I->comesBefore(EarliestCapture))
+ EarliestCapture = I;
+ } else {
+ BasicBlock *CurrentBB = I->getParent();
+ BasicBlock *EarliestBB = EarliestCapture->getParent();
+ if (DT.dominates(EarliestBB, CurrentBB)) {
+ // EarliestCapture already comes before the current use.
+ } else if (DT.dominates(CurrentBB, EarliestBB)) {
+ EarliestCapture = I;
+ } else {
+ // Otherwise find the nearest common dominator and use its terminator.
+ auto *NearestCommonDom =
+ DT.findNearestCommonDominator(CurrentBB, EarliestBB);
+ EarliestCapture = NearestCommonDom->getTerminator();
+ }
+ }
+ Captured = true;
+
+ // Return false to continue analysis; we need to see all potential
+ // captures.
+ return false;
+ }
+
+ Instruction *EarliestCapture = nullptr;
+
+ const DominatorTree &DT;
+
+ bool ReturnCaptures;
+
+ bool Captured;
+
+ Function &F;
};
}
@@ -183,7 +245,8 @@ bool llvm::PointerMayBeCaptured(const Value *V,
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI,
- unsigned MaxUsesToExplore) {
+ unsigned MaxUsesToExplore,
+ const LoopInfo *LI) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -194,7 +257,7 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
// TODO: See comment in PointerMayBeCaptured regarding what could be done
// with StoreCaptures.
- CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
+ CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, LI);
PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
if (CB.Captured)
++NumCapturedBefore;
@@ -203,6 +266,22 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
return CB.Captured;
}
+Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
+ bool ReturnCaptures, bool StoreCaptures,
+ const DominatorTree &DT,
+ unsigned MaxUsesToExplore) {
+ assert(!isa<GlobalValue>(V) &&
+ "It doesn't make sense to ask whether a global is captured.");
+
+ EarliestCaptures CB(ReturnCaptures, F, DT);
+ PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
+ if (CB.Captured)
+ ++NumCapturedBefore;
+ else
+ ++NumNotCapturedBefore;
+ return CB.EarliestCapture;
+}
+
void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
unsigned MaxUsesToExplore) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp
index a5757be2c4f4..5b951980a0aa 100644
--- a/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -77,28 +77,28 @@ bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
return false;
case ICmpInst::ICMP_SLT:
// X < 0 is equivalent to (X & SignMask) != 0.
- if (!C->isNullValue())
+ if (!C->isZero())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_NE;
break;
case ICmpInst::ICMP_SLE:
// X <= -1 is equivalent to (X & SignMask) != 0.
- if (!C->isAllOnesValue())
+ if (!C->isAllOnes())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_NE;
break;
case ICmpInst::ICMP_SGT:
// X > -1 is equivalent to (X & SignMask) == 0.
- if (!C->isAllOnesValue())
+ if (!C->isAllOnes())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_EQ;
break;
case ICmpInst::ICMP_SGE:
// X >= 0 is equivalent to (X & SignMask) == 0.
- if (!C->isNullValue())
+ if (!C->isZero())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_EQ;
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 8c8e2ee6627f..27c52506352f 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -34,8 +34,9 @@ appendSpeculatableOperands(const Value *V,
for (const Value *Operand : U->operands())
if (Visited.insert(Operand).second)
- if (isSafeToSpeculativelyExecute(Operand))
- Worklist.push_back(Operand);
+ if (const auto *I = dyn_cast<Instruction>(Operand))
+ if (!I->mayHaveSideEffects() && !I->isTerminator())
+ Worklist.push_back(I);
}
static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited,
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index b28a0d6c78cd..3ed3b8902343 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -63,11 +63,6 @@
using namespace llvm;
namespace {
-Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
- ArrayRef<Constant *> Ops,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool ForLoadOperand);
//===----------------------------------------------------------------------===//
// Constant Folding internal helper functions
@@ -357,9 +352,9 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
const DataLayout &DL) {
do {
Type *SrcTy = C->getType();
- uint64_t DestSize = DL.getTypeSizeInBits(DestTy);
- uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy);
- if (SrcSize < DestSize)
+ TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
+ TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
+ if (!TypeSize::isKnownGE(SrcSize, DestSize))
return nullptr;
// Catch the obvious splat cases (since all-zeros can coerce non-integral
@@ -550,19 +545,16 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
return false;
}
-Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
- const DataLayout &DL) {
+Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
+ int64_t Offset, const DataLayout &DL) {
// Bail out early. Not expect to load from scalable global variable.
if (isa<ScalableVectorType>(LoadTy))
return nullptr;
- auto *PTy = cast<PointerType>(C->getType());
auto *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
- unsigned AS = PTy->getAddressSpace();
-
// If this is a float/double load, we can try folding it as an int32/64 load
// and then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
@@ -580,8 +572,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
} else
return nullptr;
- C = FoldBitCast(C, MapTy->getPointerTo(AS), DL);
- if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) {
+ if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
!LoadTy->isX86_AMXTy())
// Materializing a zero can be done trivially without a bitcast
@@ -607,19 +598,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
if (BytesLoaded > 32 || BytesLoaded == 0)
return nullptr;
- GlobalValue *GVal;
- APInt OffsetAI;
- if (!IsConstantOffsetFromGlobal(C, GVal, OffsetAI, DL))
- return nullptr;
-
- auto *GV = dyn_cast<GlobalVariable>(GVal);
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- !GV->getInitializer()->getType()->isSized())
- return nullptr;
-
- int64_t Offset = OffsetAI.getSExtValue();
- int64_t InitializerSize =
- DL.getTypeAllocSize(GV->getInitializer()->getType()).getFixedSize();
+ int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize();
// If we're not accessing anything in this constant, the result is undefined.
if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
@@ -640,7 +619,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
Offset = 0;
}
- if (!ReadDataFromGlobal(GV->getInitializer(), Offset, CurPtr, BytesLeft, DL))
+ if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
return nullptr;
APInt ResultVal = APInt(IntType->getBitWidth(), 0);
@@ -661,111 +640,70 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
return ConstantInt::get(IntType->getContext(), ResultVal);
}
-Constant *ConstantFoldLoadThroughBitcastExpr(ConstantExpr *CE, Type *DestTy,
- const DataLayout &DL) {
- auto *SrcPtr = CE->getOperand(0);
- if (!SrcPtr->getType()->isPointerTy())
+/// If this Offset points exactly to the start of an aggregate element, return
+/// that element, otherwise return nullptr.
+Constant *getConstantAtOffset(Constant *Base, APInt Offset,
+ const DataLayout &DL) {
+ if (Offset.isZero())
+ return Base;
+
+ if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))
+ return nullptr;
+
+ Type *ElemTy = Base->getType();
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
+ if (!Offset.isZero() || !Indices[0].isZero())
return nullptr;
- return ConstantFoldLoadFromConstPtr(SrcPtr, DestTy, DL);
+ Constant *C = Base;
+ for (const APInt &Index : drop_begin(Indices)) {
+ if (Index.isNegative() || Index.getActiveBits() >= 32)
+ return nullptr;
+
+ C = C->getAggregateElement(Index.getZExtValue());
+ if (!C)
+ return nullptr;
+ }
+
+ return C;
}
} // end anonymous namespace
-Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
- const DataLayout &DL) {
- // First, try the easy cases:
- if (auto *GV = dyn_cast<GlobalVariable>(C))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- return ConstantFoldLoadThroughBitcast(GV->getInitializer(), Ty, DL);
+Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+ const APInt &Offset,
+ const DataLayout &DL) {
+ if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
+ if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
+ return Result;
- if (auto *GA = dyn_cast<GlobalAlias>(C))
- if (GA->getAliasee() && !GA->isInterposable())
- return ConstantFoldLoadFromConstPtr(GA->getAliasee(), Ty, DL);
+ // Try hard to fold loads from bitcasted strange and non-type-safe things.
+ if (Offset.getMinSignedBits() <= 64)
+ return FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL);
- // If the loaded value isn't a constant expr, we can't handle it.
- auto *CE = dyn_cast<ConstantExpr>(C);
- if (!CE)
- return nullptr;
+ return nullptr;
+}
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- if (auto *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(
- GV->getInitializer(), CE, Ty, DL))
- return V;
- }
- } else {
- // Try to simplify GEP if the pointer operand wasn't a GlobalVariable.
- // SymbolicallyEvaluateGEP() with `ForLoadOperand = true` can potentially
- // simplify the GEP more than it normally would have been, but should only
- // be used for const folding loads.
- SmallVector<Constant *> Ops;
- for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I)
- Ops.push_back(cast<Constant>(CE->getOperand(I)));
- if (auto *Simplified = dyn_cast_or_null<ConstantExpr>(
- SymbolicallyEvaluateGEP(cast<GEPOperator>(CE), Ops, DL, nullptr,
- /*ForLoadOperand*/ true))) {
- // If the symbolically evaluated GEP is another GEP, we can only const
- // fold it if the resulting pointer operand is a GlobalValue. Otherwise
- // there is nothing else to simplify since the GEP is already in the
- // most simplified form.
- if (isa<GEPOperator>(Simplified)) {
- if (auto *GV = dyn_cast<GlobalVariable>(Simplified->getOperand(0))) {
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(
- GV->getInitializer(), Simplified, Ty, DL))
- return V;
- }
- }
- } else {
- return ConstantFoldLoadFromConstPtr(Simplified, Ty, DL);
- }
- }
- }
- }
+Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+ const DataLayout &DL) {
+ return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);
+}
- if (CE->getOpcode() == Instruction::BitCast)
- if (Constant *LoadedC = ConstantFoldLoadThroughBitcastExpr(CE, Ty, DL))
- return LoadedC;
-
- // Instead of loading constant c string, use corresponding integer value
- // directly if string length is small enough.
- StringRef Str;
- if (getConstantStringInfo(CE, Str) && !Str.empty()) {
- size_t StrLen = Str.size();
- unsigned NumBits = Ty->getPrimitiveSizeInBits();
- // Replace load with immediate integer if the result is an integer or fp
- // value.
- if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
- (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
- APInt StrVal(NumBits, 0);
- APInt SingleChar(NumBits, 0);
- if (DL.isLittleEndian()) {
- for (unsigned char C : reverse(Str.bytes())) {
- SingleChar = static_cast<uint64_t>(C);
- StrVal = (StrVal << 8) | SingleChar;
- }
- } else {
- for (unsigned char C : Str.bytes()) {
- SingleChar = static_cast<uint64_t>(C);
- StrVal = (StrVal << 8) | SingleChar;
- }
- // Append NULL at the end.
- SingleChar = 0;
- StrVal = (StrVal << 8) | SingleChar;
- }
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+ APInt Offset,
+ const DataLayout &DL) {
+ C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
- Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
- if (Ty->isFloatingPointTy())
- Res = ConstantExpr::getBitCast(Res, Ty);
- return Res;
- }
- }
+ if (auto *GV = dyn_cast<GlobalVariable>(C))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
+ Offset, DL))
+ return Result;
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
- if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(CE))) {
+ if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (GV->getInitializer()->isNullValue())
return Constant::getNullValue(Ty);
@@ -774,8 +712,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
}
}
- // Try hard to fold loads from bitcasted strange and non-type-safe things.
- return FoldReinterpretLoadFromConstPtr(CE, Ty, DL);
+ return nullptr;
+}
+
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+ const DataLayout &DL) {
+ APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
+ return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL);
}
namespace {
@@ -795,11 +738,11 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
if (Opc == Instruction::And) {
KnownBits Known0 = computeKnownBits(Op0, DL);
KnownBits Known1 = computeKnownBits(Op1, DL);
- if ((Known1.One | Known0.Zero).isAllOnesValue()) {
+ if ((Known1.One | Known0.Zero).isAllOnes()) {
// All the bits of Op0 that the 'and' could be masking are already zero.
return Op0;
}
- if ((Known0.One | Known1.Zero).isAllOnesValue()) {
+ if ((Known0.One | Known1.Zero).isAllOnes()) {
// All the bits of Op1 that the 'and' could be masking are already zero.
return Op1;
}
@@ -867,17 +810,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
}
/// Strip the pointer casts, but preserve the address space information.
-Constant *StripPtrCastKeepAS(Constant *Ptr, bool ForLoadOperand) {
+Constant *StripPtrCastKeepAS(Constant *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
auto *OldPtrTy = cast<PointerType>(Ptr->getType());
Ptr = cast<Constant>(Ptr->stripPointerCasts());
- if (ForLoadOperand) {
- while (isa<GlobalAlias>(Ptr) && !cast<GlobalAlias>(Ptr)->isInterposable() &&
- !cast<GlobalAlias>(Ptr)->getBaseObject()->isInterposable()) {
- Ptr = cast<GlobalAlias>(Ptr)->getAliasee();
- }
- }
-
auto *NewPtrTy = cast<PointerType>(Ptr->getType());
// Preserve the address space number of the pointer.
@@ -893,8 +829,7 @@ Constant *StripPtrCastKeepAS(Constant *Ptr, bool ForLoadOperand) {
Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool ForLoadOperand) {
+ const TargetLibraryInfo *TLI) {
const GEPOperator *InnermostGEP = GEP;
bool InBounds = GEP->isInBounds();
@@ -939,7 +874,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
DL.getIndexedOffsetInType(
SrcElemTy,
makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
- Ptr = StripPtrCastKeepAS(Ptr, ForLoadOperand);
+ Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
@@ -961,7 +896,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Ptr = cast<Constant>(GEP->getOperand(0));
SrcElemTy = GEP->getSourceElementType();
Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps));
- Ptr = StripPtrCastKeepAS(Ptr, ForLoadOperand);
+ Ptr = StripPtrCastKeepAS(Ptr);
}
// If the base value for this address is a literal integer value, fold the
@@ -985,72 +920,41 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// we eliminate over-indexing of the notional static type array bounds.
// This makes it easy to determine if the getelementptr is "inbounds".
// Also, this helps GlobalOpt do SROA on GlobalVariables.
- SmallVector<Constant *, 32> NewIdxs;
- Type *Ty = PTy;
- SrcElemTy = PTy->getElementType();
- do {
- if (!Ty->isStructTy()) {
- if (Ty->isPointerTy()) {
- // The only pointer indexing we'll do is on the first index of the GEP.
- if (!NewIdxs.empty())
- break;
+ // For GEPs of GlobalValues, use the value type even for opaque pointers.
+ // Otherwise use an i8 GEP.
+ if (auto *GV = dyn_cast<GlobalValue>(Ptr))
+ SrcElemTy = GV->getValueType();
+ else if (!PTy->isOpaque())
+ SrcElemTy = PTy->getElementType();
+ else
+ SrcElemTy = Type::getInt8Ty(Ptr->getContext());
- Ty = SrcElemTy;
+ if (!SrcElemTy->isSized())
+ return nullptr;
- // Only handle pointers to sized types, not pointers to functions.
- if (!Ty->isSized())
- return nullptr;
- } else {
- Type *NextTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0);
- if (!NextTy)
- break;
- Ty = NextTy;
- }
+ Type *ElemTy = SrcElemTy;
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
+ if (Offset != 0)
+ return nullptr;
- // Determine which element of the array the offset points into.
- APInt ElemSize(BitWidth, DL.getTypeAllocSize(Ty));
- if (ElemSize == 0) {
- // The element size is 0. This may be [0 x Ty]*, so just use a zero
- // index for this level and proceed to the next level to see if it can
- // accommodate the offset.
- NewIdxs.push_back(ConstantInt::get(IntIdxTy, 0));
- } else {
- // The element size is non-zero divide the offset by the element
- // size (rounding down), to compute the index at this level.
- bool Overflow;
- APInt NewIdx = Offset.sdiv_ov(ElemSize, Overflow);
- if (Overflow)
- break;
- Offset -= NewIdx * ElemSize;
- NewIdxs.push_back(ConstantInt::get(IntIdxTy, NewIdx));
- }
- } else {
- auto *STy = cast<StructType>(Ty);
- // If we end up with an offset that isn't valid for this struct type, we
- // can't re-form this GEP in a regular form, so bail out. The pointer
- // operand likely went through casts that are necessary to make the GEP
- // sensible.
- const StructLayout &SL = *DL.getStructLayout(STy);
- if (Offset.isNegative() || Offset.uge(SL.getSizeInBytes()))
- break;
+ // Try to add additional zero indices to reach the desired result element
+ // type.
+ // TODO: Should we avoid extra zero indices if ResElemTy can't be reached and
+ // we'll have to insert a bitcast anyway?
+ while (ElemTy != ResElemTy) {
+ Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0);
+ if (!NextTy)
+ break;
- // Determine which field of the struct the offset points into. The
- // getZExtValue is fine as we've already ensured that the offset is
- // within the range representable by the StructLayout API.
- unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
- NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
- ElIdx));
- Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
- Ty = STy->getTypeAtIndex(ElIdx);
- }
- } while (Ty != ResElemTy);
+ Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth));
+ ElemTy = NextTy;
+ }
- // If we haven't used up the entire offset by descending the static
- // type, then the offset is pointing into the middle of an indivisible
- // member, so we can't simplify it.
- if (Offset != 0)
- return nullptr;
+ SmallVector<Constant *, 32> NewIdxs;
+ for (const APInt &Index : Indices)
+ NewIdxs.push_back(ConstantInt::get(
+ Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index));
// Preserve the inrange index from the innermost GEP if possible. We must
// have calculated the same indices up to and including the inrange index.
@@ -1067,8 +971,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// Create a GEP.
Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
InBounds, InRangeIndex);
- assert(C->getType()->getPointerElementType() == Ty &&
- "Computed GetElementPtr has unexpected type!");
+ assert(
+ cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&
+ "Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
@@ -1099,8 +1004,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
- if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI,
- /*ForLoadOperand*/ false))
+ if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
return C;
return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
@@ -1375,21 +1279,31 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
default:
llvm_unreachable("Missing case");
case Instruction::PtrToInt:
- // If the input is a inttoptr, eliminate the pair. This requires knowing
- // the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ Constant *FoldedValue = nullptr;
+ // If the input is a inttoptr, eliminate the pair. This requires knowing
+ // the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (CE->getOpcode() == Instruction::IntToPtr) {
- Constant *Input = CE->getOperand(0);
- unsigned InWidth = Input->getType()->getScalarSizeInBits();
- unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
- if (PtrWidth < InWidth) {
- Constant *Mask =
- ConstantInt::get(CE->getContext(),
- APInt::getLowBitsSet(InWidth, PtrWidth));
- Input = ConstantExpr::getAnd(Input, Mask);
+ // zext/trunc the inttoptr to pointer size.
+ FoldedValue = ConstantExpr::getIntegerCast(
+ CE->getOperand(0), DL.getIntPtrType(CE->getType()),
+ /*IsSigned=*/false);
+ } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
+ // If we have GEP, we can perform the following folds:
+ // (ptrtoint (gep null, x)) -> x
+ // (ptrtoint (gep (gep null, x), y) -> x + y, etc.
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
+ APInt BaseOffset(BitWidth, 0);
+ auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
+ DL, BaseOffset, /*AllowNonInbounds=*/true));
+ if (Base->isNullValue()) {
+ FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
}
- // Do a zext or trunc to get to the dest size.
- return ConstantExpr::getIntegerCast(Input, DestTy, false);
+ }
+ if (FoldedValue) {
+ // Do a zext or trunc to get to the ptrtoint dest size.
+ return ConstantExpr::getIntegerCast(FoldedValue, DestTy,
+ /*IsSigned=*/false);
}
}
return ConstantExpr::getCast(Opcode, C, DestTy);
@@ -1446,19 +1360,6 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
return ConstantFoldLoadThroughBitcast(C, Ty, DL);
}
-Constant *
-llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
- ArrayRef<Constant *> Indices) {
- // Loop over all of the operands, tracking down which value we are
- // addressing.
- for (Constant *Index : Indices) {
- C = C->getAggregateElement(Index);
- if (!C)
- return nullptr;
- }
- return C;
-}
-
//===----------------------------------------------------------------------===//
// Constant Folding for Calls
//
@@ -1879,7 +1780,7 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
// know that its evaluation does not raise exceptions, so side effect
// is absent. To allow removing the call, mark it as not accessing memory.
if (EB && *EB != fp::ExceptionBehavior::ebIgnore)
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ CI->addFnAttr(Attribute::ReadNone);
return true;
}
@@ -2112,7 +2013,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
/// the host native double versions. Float versions are not called
/// directly but for all these it is true (float)(f((double)arg)) ==
/// f(arg). Long double not supported yet.
- APFloat APF = Op->getValueAPF();
+ const APFloat &APF = Op->getValueAPF();
switch (IntrinsicID) {
default: break;
@@ -2163,7 +2064,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return nullptr;
LibFunc Func = NotLibFunc;
- TLI->getLibFunc(Name, Func);
+ if (!TLI->getLibFunc(Name, Func))
+ return nullptr;
+
switch (Func) {
default:
break;
@@ -2416,12 +2319,12 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isFloatingPointTy())
return nullptr;
- APFloat Op1V = Op1->getValueAPF();
+ const APFloat &Op1V = Op1->getValueAPF();
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
return nullptr;
- APFloat Op2V = Op2->getValueAPF();
+ const APFloat &Op2V = Op2->getValueAPF();
if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
@@ -2487,7 +2390,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return nullptr;
LibFunc Func = NotLibFunc;
- TLI->getLibFunc(Name, Func);
+ if (!TLI->getLibFunc(Name, Func))
+ return nullptr;
+
switch (Func) {
default:
break;
@@ -2671,7 +2576,7 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
assert(C1 && "Must be constant int");
// cttz(0, 1) and ctlz(0, 1) are undef.
- if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+ if (C1->isOne() && (!C0 || C0->isZero()))
return UndefValue::get(Ty);
if (!C0)
return Constant::getNullValue(Ty);
@@ -2683,11 +2588,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
case Intrinsic::abs:
// Undef or minimum val operand with poison min --> undef
assert(C1 && "Must be constant int");
- if (C1->isOneValue() && (!C0 || C0->isMinSignedValue()))
+ if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
return UndefValue::get(Ty);
// Undef operand with no poison min --> 0 (sign bit must be clear)
- if (C1->isNullValue() && !C0)
+ if (C1->isZero() && !C0)
return Constant::getNullValue(Ty);
return ConstantInt::get(Ty, C0->abs());
@@ -3191,7 +3096,7 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
if (!TLI || !TLI->getLibFunc(*F, Func))
return false;
- if (Call->getNumArgOperands() == 1) {
+ if (Call->arg_size() == 1) {
if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
const APFloat &Op = OpC->getValueAPF();
switch (Func) {
@@ -3280,7 +3185,7 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
}
}
- if (Call->getNumArgOperands() == 2) {
+ if (Call->arg_size() == 2) {
ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
if (Op0C && Op1C) {
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 83b7d5cbfc3e..f407ec0d017a 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -16,10 +16,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/CostModel.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -113,3 +115,23 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
}
}
}
+
+PreservedAnalyses CostModelPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ OS << "Cost Model for function '" << F.getName() << "'\n";
+ for (BasicBlock &B : F) {
+ for (Instruction &Inst : B) {
+ // TODO: Use a pass parameter instead of cl::opt CostKind to determine
+ // which cost kind to print.
+ InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind);
+ if (auto CostVal = Cost.getValue())
+ OS << "Cost Model: Found an estimated cost of " << *CostVal;
+ else
+ OS << "Cost Model: Invalid cost";
+
+ OS << " for instruction: " << Inst << "\n";
+ }
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 448e970e9bcc..670532c6d9a8 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionDivision.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -36,6 +37,492 @@ using namespace llvm;
#define DL_NAME "delinearize"
#define DEBUG_TYPE DL_NAME
+// Return true when S contains at least an undef value.
+static inline bool containsUndefs(const SCEV *S) {
+ return SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+ return isa<UndefValue>(SU->getValue());
+ return false;
+ });
+}
+
+namespace {
+
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+ ScalarEvolution &SE;
+ SmallVectorImpl<const SCEV *> &Strides;
+
+ SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+ : SE(SE), Strides(S) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ Strides.push_back(AR->getStepRecurrence(SE));
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+ SmallVectorImpl<const SCEV *> &Terms;
+
+ SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
+ isa<SCEVSignExtendExpr>(S)) {
+ if (!containsUndefs(S))
+ Terms.push_back(S);
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+ bool &ContainsAddRec;
+
+ SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+ ContainsAddRec = false;
+ }
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVAddRecExpr>(S)) {
+ ContainsAddRec = true;
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+ SmallVectorImpl<const SCEV *> &Terms;
+ ScalarEvolution &SE;
+
+ SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
+ ScalarEvolution &SE)
+ : Terms(T), SE(SE) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ bool HasAddRec = false;
+ SmallVector<const SCEV *, 0> Operands;
+ for (auto Op : Mul->operands()) {
+ const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
+ if (Unknown && !isa<CallInst>(Unknown->getValue())) {
+ Operands.push_back(Op);
+ } else if (Unknown) {
+ HasAddRec = true;
+ } else {
+ bool ContainsAddRec = false;
+ SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+ visitAll(Op, ContiansAddRec);
+ HasAddRec |= ContainsAddRec;
+ }
+ }
+ if (Operands.size() == 0)
+ return true;
+
+ if (!HasAddRec)
+ return false;
+
+ Terms.push_back(SE.getMulExpr(Operands));
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+} // end anonymous namespace
+
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+/// 1) The strides of AddRec expressions.
+/// 2) Unknowns that are multiplied with AddRec expressions.
+void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Terms) {
+ SmallVector<const SCEV *, 4> Strides;
+ SCEVCollectStrides StrideCollector(SE, Strides);
+ visitAll(Expr, StrideCollector);
+
+ LLVM_DEBUG({
+ dbgs() << "Strides:\n";
+ for (const SCEV *S : Strides)
+ dbgs() << *S << "\n";
+ });
+
+ for (const SCEV *S : Strides) {
+ SCEVCollectTerms TermCollector(Terms);
+ visitAll(S, TermCollector);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+
+ SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
+ visitAll(Expr, MulCollector);
+}
+
+static bool findArrayDimensionsRec(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes) {
+ int Last = Terms.size() - 1;
+ const SCEV *Step = Terms[Last];
+
+ // End of recursion.
+ if (Last == 0) {
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
+ SmallVector<const SCEV *, 2> Qs;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Qs.push_back(Op);
+
+ Step = SE.getMulExpr(Qs);
+ }
+
+ Sizes.push_back(Step);
+ return true;
+ }
+
+ for (const SCEV *&Term : Terms) {
+ // Normalize the terms before the next call to findArrayDimensionsRec.
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, Step, &Q, &R);
+
+ // Bail out when GCD does not evenly divide one of the terms.
+ if (!R->isZero())
+ return false;
+
+ Term = Q;
+ }
+
+ // Remove all SCEVConstants.
+ erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
+
+ if (Terms.size() > 0)
+ if (!findArrayDimensionsRec(SE, Terms, Sizes))
+ return false;
+
+ Sizes.push_back(Step);
+ return true;
+}
+
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+ for (const SCEV *T : Terms)
+ if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
+ return true;
+
+ return false;
+}
+
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+ if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+ return Expr->getNumOperands();
+ return 1;
+}
+
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+ if (isa<SCEVConstant>(T))
+ return nullptr;
+
+ if (isa<SCEVUnknown>(T))
+ return T;
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+ SmallVector<const SCEV *, 2> Factors;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Factors.push_back(Op);
+
+ return SE.getMulExpr(Factors);
+ }
+
+ return T;
+}
+
+void llvm::findArrayDimensions(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) {
+ if (Terms.size() < 1 || !ElementSize)
+ return;
+
+ // Early return when Terms do not contain parameters: we do not delinearize
+ // non parametric SCEVs.
+ if (!containsParameters(Terms))
+ return;
+
+ LLVM_DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+
+ // Remove duplicates.
+ array_pod_sort(Terms.begin(), Terms.end());
+ Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
+
+ // Put larger terms first.
+ llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
+ return numberOfTerms(LHS) > numberOfTerms(RHS);
+ });
+
+ // Try to divide all terms by the element size. If term is not divisible by
+ // element size, proceed with the original term.
+ for (const SCEV *&Term : Terms) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+ if (!Q->isZero())
+ Term = Q;
+ }
+
+ SmallVector<const SCEV *, 4> NewTerms;
+
+ // Remove constant factors.
+ for (const SCEV *T : Terms)
+ if (const SCEV *NewT = removeConstantFactors(SE, T))
+ NewTerms.push_back(NewT);
+
+ LLVM_DEBUG({
+ dbgs() << "Terms after sorting:\n";
+ for (const SCEV *T : NewTerms)
+ dbgs() << *T << "\n";
+ });
+
+ if (NewTerms.empty() || !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+ Sizes.clear();
+ return;
+ }
+
+ // The last element to be pushed into Sizes is the size of an element.
+ Sizes.push_back(ElementSize);
+
+ LLVM_DEBUG({
+ dbgs() << "Sizes:\n";
+ for (const SCEV *S : Sizes)
+ dbgs() << *S << "\n";
+ });
+}
+
+void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) {
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (Sizes.empty())
+ return;
+
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
+ if (!AR->isAffine())
+ return;
+
+ const SCEV *Res = Expr;
+ int Last = Sizes.size() - 1;
+ for (int i = Last; i >= 0; i--) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+
+ LLVM_DEBUG({
+ dbgs() << "Res: " << *Res << "\n";
+ dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+ dbgs() << "Res divided by Sizes[i]:\n";
+ dbgs() << "Quotient: " << *Q << "\n";
+ dbgs() << "Remainder: " << *R << "\n";
+ });
+
+ Res = Q;
+
+ // Do not record the last subscript corresponding to the size of elements in
+ // the array.
+ if (i == Last) {
+
+ // Bail out if the byte offset is non-zero.
+ if (!R->isZero()) {
+ Subscripts.clear();
+ Sizes.clear();
+ return;
+ }
+
+ continue;
+ }
+
+ // Record the access function for the current subscript.
+ Subscripts.push_back(R);
+ }
+
+ // Also push in last position the remainder of the last division: it will be
+ // the access function of the innermost dimension.
+ Subscripts.push_back(Res);
+
+ std::reverse(Subscripts.begin(), Subscripts.end());
+
+ LLVM_DEBUG({
+ dbgs() << "Subscripts:\n";
+ for (const SCEV *S : Subscripts)
+ dbgs() << *S << "\n";
+ });
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array. The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride. When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+/// void foo(long n, long m, long o, double A[n][m][o]) {
+///
+/// for (long i = 0; i < n; i++)
+/// for (long j = 0; j < m; j++)
+/// for (long k = 0; k < o; k++)
+/// A[i][j][k] = 1.0;
+/// }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+/// CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
+/// bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) {
+ // First step: collect parametric terms.
+ SmallVector<const SCEV *, 4> Terms;
+ collectParametricTerms(SE, Expr, Terms);
+
+ if (Terms.empty())
+ return;
+
+ // Second step: find subscript sizes.
+ findArrayDimensions(SE, Terms, Sizes, ElementSize);
+
+ if (Sizes.empty())
+ return;
+
+ // Third step: compute the access functions for each subscript.
+ computeAccessFunctions(SE, Expr, Subscripts, Sizes);
+
+ if (Subscripts.empty())
+ return;
+
+ LLVM_DEBUG({
+ dbgs() << "succeeded to delinearize " << *Expr << "\n";
+ dbgs() << "ArrayDecl[UnknownSize]";
+ for (const SCEV *S : Sizes)
+ dbgs() << "[" << *S << "]";
+
+ dbgs() << "\nArrayRef";
+ for (const SCEV *S : Subscripts)
+ dbgs() << "[" << *S << "]";
+ dbgs() << "\n";
+ });
+}
+
+bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
+ const GetElementPtrInst *GEP,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<int> &Sizes) {
+ assert(Subscripts.empty() && Sizes.empty() &&
+ "Expected output lists to be empty on entry to this function.");
+ assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
+ Type *Ty = nullptr;
+ bool DroppedFirstDim = false;
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ const SCEV *Expr = SE.getSCEV(GEP->getOperand(i));
+ if (i == 1) {
+ Ty = GEP->getSourceElementType();
+ if (auto *Const = dyn_cast<SCEVConstant>(Expr))
+ if (Const->getValue()->isZero()) {
+ DroppedFirstDim = true;
+ continue;
+ }
+ Subscripts.push_back(Expr);
+ continue;
+ }
+
+ auto *ArrayTy = dyn_cast<ArrayType>(Ty);
+ if (!ArrayTy) {
+ Subscripts.clear();
+ Sizes.clear();
+ return false;
+ }
+
+ Subscripts.push_back(Expr);
+ if (!(DroppedFirstDim && i == 2))
+ Sizes.push_back(ArrayTy->getNumElements());
+
+ Ty = ArrayTy->getElementType();
+ }
+ return !Subscripts.empty();
+}
+
namespace {
class Delinearization : public FunctionPass {
@@ -84,7 +571,7 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
O << "AccessFunction: " << *AccessFn << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
- SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
+ delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
if (Subscripts.size() == 0 || Sizes.size() == 0 ||
Subscripts.size() != Sizes.size()) {
O << "failed to delinearize\n";
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index ca6d58fac825..117b12fc0701 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -362,7 +362,7 @@ void DemandedBits::performAnalysis() {
if (Instruction *J = dyn_cast<Instruction>(OI)) {
Type *T = J->getType();
if (T->isIntOrIntVectorTy())
- AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+ AliveBits[J] = APInt::getAllOnes(T->getScalarSizeInBits());
else
Visited.insert(J);
Worklist.insert(J);
@@ -407,7 +407,7 @@ void DemandedBits::performAnalysis() {
Type *T = OI->getType();
if (T->isIntOrIntVectorTy()) {
unsigned BitWidth = T->getScalarSizeInBits();
- APInt AB = APInt::getAllOnesValue(BitWidth);
+ APInt AB = APInt::getAllOnes(BitWidth);
if (InputIsKnownDead) {
AB = APInt(BitWidth, 0);
} else {
@@ -417,7 +417,7 @@ void DemandedBits::performAnalysis() {
Known, Known2, KnownBitsComputed);
// Keep track of uses which have no demanded bits.
- if (AB.isNullValue())
+ if (AB.isZero())
DeadUses.insert(&OI);
else
DeadUses.erase(&OI);
@@ -448,8 +448,7 @@ APInt DemandedBits::getDemandedBits(Instruction *I) {
return Found->second;
const DataLayout &DL = I->getModule()->getDataLayout();
- return APInt::getAllOnesValue(
- DL.getTypeSizeInBits(I->getType()->getScalarType()));
+ return APInt::getAllOnes(DL.getTypeSizeInBits(I->getType()->getScalarType()));
}
APInt DemandedBits::getDemandedBits(Use *U) {
@@ -461,7 +460,7 @@ APInt DemandedBits::getDemandedBits(Use *U) {
// We only track integer uses, everything else produces a mask with all bits
// set
if (!T->isIntOrIntVectorTy())
- return APInt::getAllOnesValue(BitWidth);
+ return APInt::getAllOnes(BitWidth);
if (isUseDead(U))
return APInt(BitWidth, 0);
@@ -469,7 +468,7 @@ APInt DemandedBits::getDemandedBits(Use *U) {
performAnalysis();
APInt AOut = getDemandedBits(UserI);
- APInt AB = APInt::getAllOnesValue(BitWidth);
+ APInt AB = APInt::getAllOnes(BitWidth);
KnownBits Known, Known2;
bool KnownBitsComputed = false;
@@ -504,7 +503,7 @@ bool DemandedBits::isUseDead(Use *U) {
// is dead. These uses might not be explicitly present in the DeadUses map.
if (UserI->getType()->isIntOrIntVectorTy()) {
auto Found = AliveBits.find(UserI);
- if (Found != AliveBits.end() && Found->second.isNullValue())
+ if (Found != AliveBits.end() && Found->second.isZero())
return true;
}
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 9564cfb2aa45..f827f74d5367 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -53,6 +53,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -119,6 +120,11 @@ static cl::opt<bool> DisableDelinearizationChecks(
"dependence vectors for languages that allow the subscript of one "
"dimension to underflow or overflow into another dimension."));
+static cl::opt<unsigned> MIVMaxLevelThreshold(
+ "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Maximum depth allowed for the recursive algorithm used to "
+ "explore MIV direction vectors."));
+
//===----------------------------------------------------------------------===//
// basics
@@ -2319,7 +2325,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
LLVM_DEBUG(dbgs() << "starting gcd\n");
++GCDapplications;
unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
- APInt RunningGCD = APInt::getNullValue(BitWidth);
+ APInt RunningGCD = APInt::getZero(BitWidth);
// Examine Src coefficients.
// Compute running GCD and record source constant.
@@ -2359,7 +2365,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
}
const SCEV *DstConst = Coefficients;
- APInt ExtraGCD = APInt::getNullValue(BitWidth);
+ APInt ExtraGCD = APInt::getZero(BitWidth);
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
@@ -2602,6 +2608,19 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
const SmallBitVector &Loops,
unsigned &DepthExpanded,
const SCEV *Delta) const {
+ // This algorithm has worst case complexity of O(3^n), where 'n' is the number
+ // of common loop levels. To avoid excessive compile-time, pessimize all the
+ // results and immediately return when the number of common levels is beyond
+ // the given threshold.
+ if (CommonLevels > MIVMaxLevelThreshold) {
+ LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
+ "direction exploration is terminated.\n");
+ for (unsigned K = 1; K <= CommonLevels; ++K)
+ if (Loops[K])
+ Bound[K].DirSet = Dependence::DVEntry::ALL;
+ return 1;
+ }
+
if (Level > CommonLevels) {
// record result
LLVM_DEBUG(dbgs() << "\t[");
@@ -3320,8 +3339,8 @@ bool DependenceInfo::tryDelinearizeFixedSize(
return false;
SmallVector<int, 4> SrcSizes, DstSizes;
- SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
- SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
+ getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes);
+ getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes);
// Check that the two size arrays are non-empty and equal in length and
// value.
@@ -3421,16 +3440,16 @@ bool DependenceInfo::tryDelinearizeParametricSize(
// First step: collect parametric terms in both array references.
SmallVector<const SCEV *, 4> Terms;
- SE->collectParametricTerms(SrcAR, Terms);
- SE->collectParametricTerms(DstAR, Terms);
+ collectParametricTerms(*SE, SrcAR, Terms);
+ collectParametricTerms(*SE, DstAR, Terms);
// Second step: find subscript sizes.
SmallVector<const SCEV *, 4> Sizes;
- SE->findArrayDimensions(Terms, Sizes, ElementSize);
+ findArrayDimensions(*SE, Terms, Sizes, ElementSize);
// Third step: compute the access functions for each subscript.
- SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
- SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
+ computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
+ computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
// Fail when there is only a subscript: that's a linearized access function.
if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index ecfefa36918c..d87fa849d839 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -1,9 +1,8 @@
//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -228,6 +227,8 @@ private:
(*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
getAdvisor()->updateNativeSizeEstimate(Reward);
log(Reward, /*Success=*/true);
+ } else {
+ log(NoReward, /*Success=*/true);
}
}
@@ -377,7 +378,7 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
void TrainingLogger::print() {
std::error_code EC;
raw_fd_ostream OutFile(LogFileName, EC);
- L->print(OutFile);
+ L->flush(OutFile);
}
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
diff --git a/llvm/lib/Analysis/HeatUtils.cpp b/llvm/lib/Analysis/HeatUtils.cpp
index a1a11be5fee3..0057de322cac 100644
--- a/llvm/lib/Analysis/HeatUtils.cpp
+++ b/llvm/lib/Analysis/HeatUtils.cpp
@@ -1,9 +1,8 @@
//===-- HeatUtils.cpp - Utility for printing heat colors --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index a6298afb66f5..f22c6aa04f5e 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -23,13 +23,23 @@
using namespace llvm;
using namespace IRSimilarity;
+cl::opt<bool>
+ DisableBranches("no-ir-sim-branch-matching", cl::init(false),
+ cl::ReallyHidden,
+ cl::desc("disable similarity matching, and outlining, "
+ "across branches for debugging purposes."));
+
IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
IRInstructionDataList &IDList)
: Inst(&I), Legal(Legality), IDL(&IDList) {
+ initializeInstruction();
+}
+
+void IRInstructionData::initializeInstruction() {
// We check for whether we have a comparison instruction. If it is, we
// find the "less than" version of the predicate for consistency for
// comparison instructions throught the program.
- if (CmpInst *C = dyn_cast<CmpInst>(&I)) {
+ if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
CmpInst::Predicate Predicate = predicateForConsistency(C);
if (Predicate != C->getPredicate())
RevisedPredicate = Predicate;
@@ -37,8 +47,8 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
// Here we collect the operands and their types for determining whether
// the structure of the operand use matches between two different candidates.
- for (Use &OI : I.operands()) {
- if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) {
+ for (Use &OI : Inst->operands()) {
+ if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
// If we have a CmpInst where the predicate is reversed, it means the
// operands must be reversed as well.
OperVals.insert(OperVals.begin(), OI.get());
@@ -49,6 +59,33 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
}
}
+IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
+ : Inst(nullptr), Legal(false), IDL(&IDList) {}
+
+void IRInstructionData::setBranchSuccessors(
+ DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
+ assert(isa<BranchInst>(Inst) && "Instruction must be branch");
+
+ BranchInst *BI = cast<BranchInst>(Inst);
+ DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
+
+ BBNumIt = BasicBlockToInteger.find(BI->getParent());
+ assert(BBNumIt != BasicBlockToInteger.end() &&
+ "Could not find location for BasicBlock!");
+
+ int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
+
+ for (BasicBlock *Successor : BI->successors()) {
+ BBNumIt = BasicBlockToInteger.find(Successor);
+ assert(BBNumIt != BasicBlockToInteger.end() &&
+ "Could not find number for BasicBlock!");
+ int OtherBlockNumber = static_cast<int>(BBNumIt->second);
+
+ int Relative = OtherBlockNumber - CurrentBlockNumber;
+ RelativeBlockLocations.push_back(Relative);
+ }
+}
+
CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
switch (CI->getPredicate()) {
case CmpInst::FCMP_OGT:
@@ -143,6 +180,10 @@ bool IRSimilarity::isClose(const IRInstructionData &A,
return false;
}
+ if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
+ A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
+ return false;
+
return true;
}
@@ -156,10 +197,6 @@ void IRInstructionMapper::convertToUnsignedVec(
std::vector<unsigned> IntegerMappingForBB;
std::vector<IRInstructionData *> InstrListForBB;
- HaveLegalRange = false;
- CanCombineWithPrevInstr = false;
- AddedIllegalLastTime = true;
-
for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
switch (InstClassifier.visit(*It)) {
case InstrType::Legal:
@@ -175,7 +212,8 @@ void IRInstructionMapper::convertToUnsignedVec(
}
if (HaveLegalRange) {
- mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
+ if (AddedIllegalLastTime)
+ mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
for (IRInstructionData *ID : InstrListForBB)
this->IDL->push_back(*ID);
llvm::append_range(InstrList, InstrListForBB);
@@ -203,6 +241,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
InstrListForBB.push_back(ID);
+ if (isa<BranchInst>(*It))
+ ID->setBranchSuccessors(BasicBlockToInteger);
+
// Add to the instruction list
bool WasInserted;
DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
@@ -235,6 +276,11 @@ IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
}
+IRInstructionData *
+IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
+ return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
+}
+
IRInstructionDataList *
IRInstructionMapper::allocateIRInstructionDataList() {
return new (IDLAllocator->Allocate()) IRInstructionDataList();
@@ -255,6 +301,8 @@ unsigned IRInstructionMapper::mapToIllegalUnsigned(
IRInstructionData *ID = nullptr;
if (!End)
ID = allocateIRInstructionData(*It, false, *IDL);
+ else
+ ID = allocateIRInstructionData(*IDL);
InstrListForBB.push_back(ID);
// Remember that we added an illegal number last time.
@@ -563,8 +611,50 @@ bool IRSimilarityCandidate::compareCommutativeOperandMapping(
return true;
}
+bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
+ RelativeLocMapping B) {
+ // Get the basic blocks the label refers to.
+ BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
+ BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
+
+ // Get the basic blocks contained in each region.
+ DenseSet<BasicBlock *> BasicBlockA;
+ DenseSet<BasicBlock *> BasicBlockB;
+ A.IRSC.getBasicBlocks(BasicBlockA);
+ B.IRSC.getBasicBlocks(BasicBlockB);
+
+ // Determine if the block is contained in the region.
+ bool AContained = BasicBlockA.contains(ABB);
+ bool BContained = BasicBlockB.contains(BBB);
+
+ // Both blocks need to be contained in the region, or both need to be outside
+ // the reigon.
+ if (AContained != BContained)
+ return false;
+
+ // If both are contained, then we need to make sure that the relative
+ // distance to the target blocks are the same.
+ if (AContained)
+ return A.RelativeLocation == B.RelativeLocation;
+ return true;
+}
+
bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
+ DenseMap<unsigned, DenseSet<unsigned>> MappingA;
+ DenseMap<unsigned, DenseSet<unsigned>> MappingB;
+ return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
+}
+
+typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
+ SmallVector<int, 4> &, ArrayRef<Value *> &,
+ ArrayRef<Value *> &>
+ ZippedRelativeLocationsT;
+
+bool IRSimilarityCandidate::compareStructure(
+ const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
if (A.getLength() != B.getLength())
return false;
@@ -574,15 +664,12 @@ bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
iterator ItA = A.begin();
iterator ItB = B.begin();
- // These sets create a create a mapping between the values in one candidate
- // to values in the other candidate. If we create a set with one element,
- // and that same element maps to the original element in the candidate
- // we have a good mapping.
- DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
- DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
+ // These ValueNumber Mapping sets create a create a mapping between the values
+ // in one candidate to values in the other candidate. If we create a set with
+ // one element, and that same element maps to the original element in the
+ // candidate we have a good mapping.
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
- bool WasInserted;
// Iterate over the instructions contained in each candidate
unsigned SectionLength = A.getStartIdx() + A.getLength();
@@ -605,6 +692,7 @@ bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
unsigned InstValA = A.ValueToNumber.find(IA)->second;
unsigned InstValB = B.ValueToNumber.find(IB)->second;
+ bool WasInserted;
// Ensure that the mappings for the instructions exists.
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
@@ -632,6 +720,37 @@ bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
{A, OperValsA, ValueNumberMappingA},
{B, OperValsB, ValueNumberMappingB}))
return false;
+
+ // Here we check that between two corresponding instructions,
+ // when referring to a basic block in the same region, the
+ // relative locations are the same. And, that the instructions refer to
+ // basic blocks outside the region in the same corresponding locations.
+
+ // We are able to make the assumption about blocks outside of the region
+ // since the target block labels are considered values and will follow the
+ // same number matching that we defined for the other instructions in the
+ // region. So, at this point, in each location we target a specific block
+ // outside the region, we are targeting a corresponding block in each
+ // analagous location in the region we are comparing to.
+ if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
+ !(isa<PHINode>(IA) && isa<PHINode>(IB)))
+ continue;
+
+ SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
+ SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
+ if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
+ OperValsA.size() != OperValsB.size())
+ return false;
+
+ ZippedRelativeLocationsT ZippedRelativeLocations =
+ zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
+ if (any_of(ZippedRelativeLocations,
+ [&A, &B](std::tuple<int, int, Value *, Value *> R) {
+ return !checkRelativeLocations(
+ {A, std::get<0>(R), std::get<2>(R)},
+ {B, std::get<1>(R), std::get<3>(R)});
+ }))
+ return false;
}
return true;
}
@@ -657,6 +776,8 @@ void IRSimilarityIdentifier::populateMapper(
std::vector<unsigned> IntegerMappingForModule;
// Iterate over the functions in the module to map each Instruction in each
// BasicBlock to an unsigned integer.
+ Mapper.initializeForBBs(M);
+
for (Function &F : M) {
if (F.empty())
@@ -664,15 +785,18 @@ void IRSimilarityIdentifier::populateMapper(
for (BasicBlock &BB : F) {
- if (BB.sizeWithoutDebug() < 2)
- continue;
-
// BB has potential to have similarity since it has a size greater than 2
// and can therefore match other regions greater than 2. Map it to a list
// of unsigned integers.
Mapper.convertToUnsignedVec(BB, InstrListForModule,
IntegerMappingForModule);
}
+
+ BasicBlock::iterator It = F.begin()->end();
+ Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
+ true);
+ if (InstrListForModule.size() > 0)
+ Mapper.IDL->push_back(*InstrListForModule.back());
}
// Insert the InstrListForModule at the end of the overall InstrList so that
@@ -707,6 +831,8 @@ static void createCandidatesFromSuffixTree(
std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
unsigned StringLen = RS.Length;
+ if (StringLen < 2)
+ return;
// Create an IRSimilarityCandidate for instance of this subsequence \p RS.
for (const unsigned &StartIdx : RS.StartIndices) {
@@ -739,6 +865,84 @@ static void createCandidatesFromSuffixTree(
}
}
+void IRSimilarityCandidate::createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand,
+ DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+ DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
+ assert(SourceCand.CanonNumToNumber.size() != 0 &&
+ "Base canonical relationship is empty!");
+ assert(SourceCand.NumberToCanonNum.size() != 0 &&
+ "Base canonical relationship is empty!");
+
+ assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
+ assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
+
+ DenseSet<unsigned> UsedGVNs;
+ // Iterate over the mappings provided from this candidate to SourceCand. We
+ // are then able to map the GVN in this candidate to the same canonical number
+ // given to the corresponding GVN in SourceCand.
+ for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
+ unsigned SourceGVN = GVNMapping.first;
+
+ assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
+
+ unsigned ResultGVN;
+ // We need special handling if we have more than one potential value. This
+ // means that there are at least two GVNs that could correspond to this GVN.
+ // This could lead to potential swapping later on, so we make a decision
+ // here to ensure a one-to-one mapping.
+ if (GVNMapping.second.size() > 1) {
+ bool Found = false;
+ for (unsigned Val : GVNMapping.second) {
+ // We make sure the target value number hasn't already been reserved.
+ if (UsedGVNs.contains(Val))
+ continue;
+
+ // We make sure that the opposite mapping is still consistent.
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
+ FromSourceMapping.find(Val);
+
+ if (!It->second.contains(SourceGVN))
+ continue;
+
+ // We pick the first item that satisfies these conditions.
+ Found = true;
+ ResultGVN = Val;
+ break;
+ }
+
+ assert(Found && "Could not find matching value for source GVN");
+ (void)Found;
+
+ } else
+ ResultGVN = *GVNMapping.second.begin();
+
+ // Whatever GVN is found, we mark it as used.
+ UsedGVNs.insert(ResultGVN);
+
+ unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
+ CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
+ NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
+ }
+}
+
+void IRSimilarityCandidate::createCanonicalMappingFor(
+ IRSimilarityCandidate &CurrCand) {
+ assert(CurrCand.CanonNumToNumber.size() == 0 &&
+ "Canonical Relationship is non-empty");
+ assert(CurrCand.NumberToCanonNum.size() == 0 &&
+ "Canonical Relationship is non-empty");
+
+ unsigned CanonNum = 0;
+ // Iterate over the value numbers found, the order does not matter in this
+ // case.
+ for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
+ CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
+ CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
+ CanonNum++;
+ }
+}
+
/// From the list of IRSimilarityCandidates, perform a comparison between each
/// IRSimilarityCandidate to determine if there are overlapping
/// IRInstructionData, or if they do not have the same structure.
@@ -774,6 +978,8 @@ static void findCandidateStructures(
// Iterate over the candidates to determine its structural and overlapping
// compatibility with other instructions
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
for (CandIt = CandsForRepSubstring.begin(),
CandEndIt = CandsForRepSubstring.end();
CandIt != CandEndIt; CandIt++) {
@@ -792,9 +998,11 @@ static void findCandidateStructures(
// Check if we already have a list of IRSimilarityCandidates for the current
// structural group. Create one if one does not exist.
CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
- if (CurrentGroupPair == StructuralGroups.end())
+ if (CurrentGroupPair == StructuralGroups.end()) {
+ IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
+ }
// Iterate over the IRSimilarityCandidates following the current
// IRSimilarityCandidate in the list to determine whether the two
@@ -811,11 +1019,15 @@ static void findCandidateStructures(
// Otherwise we determine if they have the same structure and add it to
// vector if they match.
- SameStructure =
- IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt);
+ ValueNumberMappingA.clear();
+ ValueNumberMappingB.clear();
+ SameStructure = IRSimilarityCandidate::compareStructure(
+ *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
if (!SameStructure)
continue;
+ InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
+ ValueNumberMappingB);
CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
CurrentGroupPair->second.push_back(*InnerCandIt);
}
@@ -862,6 +1074,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
+ Mapper.InstClassifier.EnableBranches = this->EnableBranches;
populateMapper(Modules, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
@@ -871,6 +1084,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
resetSimilarityCandidates();
+ Mapper.InstClassifier.EnableBranches = this->EnableBranches;
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
@@ -891,7 +1105,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
}
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
- IRSI.reset(new IRSimilarityIdentifier());
+ IRSI.reset(new IRSimilarityIdentifier(!DisableBranches));
return false;
}
@@ -907,9 +1121,9 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
AnalysisKey IRSimilarityAnalysis::Key;
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
- ModuleAnalysisManager &) {
+ ModuleAnalysisManager &) {
- auto IRSI = IRSimilarityIdentifier();
+ auto IRSI = IRSimilarityIdentifier(!DisableBranches);
IRSI.findSimilarity(M);
return IRSI;
}
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index fc6051b35efc..c4b7239b43ab 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -43,8 +43,8 @@ using namespace llvm::PatternMatch;
bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
SmallPtrSetImpl<Instruction *> &Set) {
- for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
- if (!Set.count(dyn_cast<Instruction>(*Use)))
+ for (const Use &Use : I->operands())
+ if (!Set.count(dyn_cast<Instruction>(Use)))
return false;
return true;
}
@@ -62,6 +62,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
return true;
}
return false;
@@ -144,12 +146,9 @@ static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
// meaning that we will use sext instructions instead of zext
// instructions to restore the original type.
IsSigned = true;
- if (!Bits.isNegative())
- // If the value is not known to be negative, we don't known what the
- // upper bit is, and therefore, we don't know what kind of extend we
- // will need. In this case, just increase the bit width by one bit and
- // use sext.
- ++MaxBitWidth;
+ // Make sure at at least one sign bit is included in the result, so it
+ // will get properly sign-extended.
+ ++MaxBitWidth;
}
}
if (!isPowerOf2_64(MaxBitWidth))
@@ -199,7 +198,10 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
if (Kind != RecurKind::FAdd)
return false;
- if (Exit->getOpcode() != Instruction::FAdd || Exit != ExactFPMathInst)
+ // Ensure the exit instruction is an FAdd, and that it only has one user
+ // other than the reduction PHI
+ if (Exit->getOpcode() != Instruction::FAdd || Exit->hasNUsesOrMore(3) ||
+ Exit != ExactFPMathInst)
return false;
// The only pattern accepted is the one in which the reduction PHI
@@ -272,7 +274,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
} else if (RecurrenceType->isIntegerTy()) {
if (!isIntegerRecurrenceKind(Kind))
return false;
- if (isArithmeticRecurrenceKind(Kind))
+ if (!isMinMaxRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
} else {
// Pointer min/max may exist, but it is not supported as a reduction op.
@@ -327,7 +329,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).
if (Cur != Start) {
- ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, FuncFMF);
+ ReduxDesc =
+ isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
if (!ReduxDesc.isRecurrence())
return false;
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -360,6 +363,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
+ !isSelectCmpRecurrenceKind(Kind) &&
hasMultipleUsesOf(Cur, VisitedInsts, 1))
return false;
@@ -367,10 +371,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
- if (isIntMinMaxRecurrenceKind(Kind) &&
+ if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
- if (isFPMinMaxRecurrenceKind(Kind) &&
+ if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
(isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
@@ -423,7 +427,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
!isa<SelectInst>(UI)) ||
(!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
- !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())))
+ !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
+ .isRecurrence() &&
+ !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
return false;
// Remember that we completed the cycle.
@@ -435,8 +441,13 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
}
// This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2)
+ // pattern or more than just a select and cmp. Zero implies that we saw a
+ // llvm.min/max instrinsic, which is always OK.
+ if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
+ NumCmpSelectPatternInst != 0)
+ return false;
+
+ if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
return false;
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
@@ -505,11 +516,70 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
return true;
}
+// We are looking for loops that do something like this:
+// int r = 0;
+// for (int i = 0; i < n; i++) {
+// if (src[i] > 3)
+// r = 3;
+// }
+// where the reduction value (r) only has two states, in this example 0 or 3.
+// The generated LLVM IR for this type of loop will be like this:
+// for.body:
+// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
+// ...
+// %cmp = icmp sgt i32 %5, 3
+// %spec.select = select i1 %cmp, i32 3, i32 %r
+// ...
+// In general we can support vectorization of loops where 'r' flips between
+// any two non-constants, provided they are loop invariant. The only thing
+// we actually care about at the end of the loop is whether or not any lane
+// in the selected vector is different from the start value. The final
+// across-vector reduction after the loop simply involves choosing the start
+// value if nothing changed (0 in the example above) or the other selected
+// value (3 in the example above).
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
- const InstDesc &Prev) {
- assert((isa<CmpInst>(I) || isa<SelectInst>(I)) &&
- "Expected a cmp or select instruction");
+RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
+ Instruction *I, InstDesc &Prev) {
+ // We must handle the select(cmp(),x,y) as a single instruction. Advance to
+ // the select.
+ CmpInst::Predicate Pred;
+ if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
+ if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
+ return InstDesc(Select, Prev.getRecKind());
+ }
+
+ // Only match select with single use cmp condition.
+ if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
+ m_Value())))
+ return InstDesc(false, I);
+
+ SelectInst *SI = cast<SelectInst>(I);
+ Value *NonPhi = nullptr;
+
+ if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
+ NonPhi = SI->getFalseValue();
+ else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
+ NonPhi = SI->getTrueValue();
+ else
+ return InstDesc(false, I);
+
+ // We are looking for selects of the form:
+ // select(cmp(), phi, loop_invariant) or
+ // select(cmp(), loop_invariant, phi)
+ if (!Loop->isLoopInvariant(NonPhi))
+ return InstDesc(false, I);
+
+ return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
+ : RecurKind::SelectFCmp);
+}
+
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
+ const InstDesc &Prev) {
+ assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
+ "Expected a cmp or select or call instruction");
+ if (!isMinMaxRecurrenceKind(Kind))
+ return InstDesc(false, I);
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
@@ -519,28 +589,33 @@ RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
return InstDesc(Select, Prev.getRecKind());
}
- // Only match select with single use cmp condition.
- if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
+ // Only match select with single use cmp condition, or a min/max intrinsic.
+ if (!isa<IntrinsicInst>(I) &&
+ !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
m_Value())))
return InstDesc(false, I);
// Look for a min/max pattern.
if (match(I, m_UMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::UMin);
+ return InstDesc(Kind == RecurKind::UMin, I);
if (match(I, m_UMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::UMax);
+ return InstDesc(Kind == RecurKind::UMax, I);
if (match(I, m_SMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::SMax);
+ return InstDesc(Kind == RecurKind::SMax, I);
if (match(I, m_SMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::SMin);
+ return InstDesc(Kind == RecurKind::SMin, I);
if (match(I, m_OrdFMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMin);
+ return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_OrdFMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMax);
+ return InstDesc(Kind == RecurKind::FMax, I);
if (match(I, m_UnordFMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMin);
+ return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_UnordFMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMax);
+ return InstDesc(Kind == RecurKind::FMax, I);
+ if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMin, I);
+ if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMax, I);
return InstDesc(false, I);
}
@@ -592,8 +667,10 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
}
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
- InstDesc &Prev, FastMathFlags FMF) {
+RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
+ Instruction *I, RecurKind Kind,
+ InstDesc &Prev, FastMathFlags FuncFMF) {
+ assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
switch (I->getOpcode()) {
default:
return InstDesc(false, I);
@@ -624,9 +701,15 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
LLVM_FALLTHROUGH;
case Instruction::FCmp:
case Instruction::ICmp:
+ case Instruction::Call:
+ if (isSelectCmpRecurrenceKind(Kind))
+ return isSelectCmpPattern(L, OrigPhi, I, Prev);
if (isIntMinMaxRecurrenceKind(Kind) ||
- (FMF.noNaNs() && FMF.noSignedZeros() && isFPMinMaxRecurrenceKind(Kind)))
- return isMinMaxSelectCmpPattern(I, Prev);
+ (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
+ (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
+ I->hasNoSignedZeros())) &&
+ isFPMinMaxRecurrenceKind(Kind)))
+ return isMinMaxPattern(I, Kind, Prev);
return InstDesc(false, I);
}
}
@@ -649,7 +732,6 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes,
DemandedBits *DB, AssumptionCache *AC,
DominatorTree *DT) {
-
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
FastMathFlags FMF;
@@ -694,6 +776,12 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
+ DT)) {
+ LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
+ << *Phi << "\n");
+ return true;
+ }
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
@@ -710,6 +798,12 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
+ DT)) {
+ LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
+ << " PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
@@ -816,8 +910,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
/// This function returns the identity element (or neutral element) for
/// the operation K.
-Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
- FastMathFlags FMF) {
+Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
+ FastMathFlags FMF) {
switch (K) {
case RecurKind::Xor:
case RecurKind::Add:
@@ -857,6 +951,10 @@ Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
return ConstantFP::getInfinity(Tp, true);
case RecurKind::FMax:
return ConstantFP::getInfinity(Tp, false);
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
+ return getRecurrenceStartValue();
+ break;
default:
llvm_unreachable("Unknown recurrence kind");
}
@@ -882,9 +980,11 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
+ case RecurKind::SelectICmp:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
+ case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown recurrence operation");
@@ -963,8 +1063,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
const SCEV *Step, BinaryOperator *BOp,
+ Type *ElementType,
SmallVectorImpl<Instruction *> *Casts)
- : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp),
+ ElementType(ElementType) {
assert(IK != IK_NoInduction && "Not an induction");
// Start value type should match the induction kind and the value
@@ -992,6 +1094,11 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
InductionBinOp->getOpcode() == Instruction::FSub))) &&
"Binary opcode should be specified for FP induction");
+ if (IK == IK_PtrInduction)
+ assert(ElementType && "Pointer induction must have element type");
+ else
+ assert(!ElementType && "Non-pointer induction cannot have element type");
+
if (Casts) {
for (auto &Inst : *Casts) {
RedundantCasts.push_back(Inst);
@@ -1239,8 +1346,6 @@ bool InductionDescriptor::isInductionPHI(
BasicBlock *Latch = AR->getLoop()->getLoopLatch();
if (!Latch)
return false;
- BinaryOperator *BOp =
- dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
@@ -1250,8 +1355,10 @@ bool InductionDescriptor::isInductionPHI(
return false;
if (PhiTy->isIntegerTy()) {
+ BinaryOperator *BOp =
+ dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
- CastsToIgnore);
+ /* ElementType */ nullptr, CastsToIgnore);
return true;
}
@@ -1260,15 +1367,16 @@ bool InductionDescriptor::isInductionPHI(
if (!ConstStep)
return false;
- ConstantInt *CV = ConstStep->getValue();
- Type *PointerElementType = PhiTy->getPointerElementType();
- // The pointer stride cannot be determined if the pointer element type is not
- // sized.
- if (!PointerElementType->isSized())
+ // Always use i8 element type for opaque pointer inductions.
+ PointerType *PtrTy = cast<PointerType>(PhiTy);
+ Type *ElementType = PtrTy->isOpaque() ? Type::getInt8Ty(PtrTy->getContext())
+ : PtrTy->getElementType();
+ if (!ElementType->isSized())
return false;
+ ConstantInt *CV = ConstStep->getValue();
const DataLayout &DL = Phi->getModule()->getDataLayout();
- int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(ElementType));
if (!Size)
return false;
@@ -1277,6 +1385,7 @@ bool InductionDescriptor::isInductionPHI(
return false;
auto *StepValue =
SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue,
+ /* BinOp */ nullptr, ElementType);
return true;
}
diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp
index db6cff720642..d7b202f83189 100644
--- a/llvm/lib/Analysis/IVUsers.cpp
+++ b/llvm/lib/Analysis/IVUsers.cpp
@@ -90,34 +90,6 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
return false;
}
-/// Return true if all loop headers that dominate this block are in simplified
-/// form.
-static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
- const LoopInfo *LI,
- SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
- Loop *NearestLoop = nullptr;
- for (DomTreeNode *Rung = DT->getNode(BB);
- Rung; Rung = Rung->getIDom()) {
- BasicBlock *DomBB = Rung->getBlock();
- Loop *DomLoop = LI->getLoopFor(DomBB);
- if (DomLoop && DomLoop->getHeader() == DomBB) {
- // If we have already checked this loop nest, stop checking.
- if (SimpleLoopNests.count(DomLoop))
- break;
- // If the domtree walk reaches a loop with no preheader, return false.
- if (!DomLoop->isLoopSimplifyForm())
- return false;
- // If we have not already checked this loop nest, remember the loop
- // header nearest to BB. The nearest loop may not contain BB.
- if (!NearestLoop)
- NearestLoop = DomLoop;
- }
- }
- if (NearestLoop)
- SimpleLoopNests.insert(NearestLoop);
- return true;
-}
-
/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
/// and now we need to decide whether the user should use the preinc or post-inc
/// value. If this user should use the post-inc version of the IV, return true.
@@ -162,11 +134,10 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
return true;
}
-/// AddUsersImpl - Inspect the specified instruction. If it is a
-/// reducible SCEV, recursively add its users to the IVUsesByStride set and
-/// return true. Otherwise, return false.
-bool IVUsers::AddUsersImpl(Instruction *I,
- SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+/// Inspect the specified instruction. If it is a reducible SCEV, recursively
+/// add its users to the IVUsesByStride set and return true. Otherwise, return
+/// false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
const DataLayout &DL = I->getModule()->getDataLayout();
// Add this IV user to the Processed set before returning false to ensure that
@@ -213,18 +184,6 @@ bool IVUsers::AddUsersImpl(Instruction *I,
if (isa<PHINode>(User) && Processed.count(User))
continue;
- // Only consider IVUsers that are dominated by simplified loop
- // headers. Otherwise, SCEVExpander will crash.
- BasicBlock *UseBB = User->getParent();
- // A phi's use is live out of its predecessor block.
- if (PHINode *PHI = dyn_cast<PHINode>(User)) {
- unsigned OperandNo = U.getOperandNo();
- unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
- UseBB = PHI->getIncomingBlock(ValNo);
- }
- if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
- return false;
-
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
@@ -234,12 +193,12 @@ bool IVUsers::AddUsersImpl(Instruction *I,
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
- !AddUsersImpl(User, SimpleLoopNests)) {
+ !AddUsersIfInteresting(User)) {
LLVM_DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
- } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
+ } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
LLVM_DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
@@ -288,15 +247,6 @@ bool IVUsers::AddUsersImpl(Instruction *I,
return true;
}
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
- // SCEVExpander can only handle users that are dominated by simplified loop
- // entries. Keep track of all loops that are only dominated by other simple
- // loops so we don't traverse the domtree for each user.
- SmallPtrSet<Loop*,16> SimpleLoopNests;
-
- return AddUsersImpl(I, SimpleLoopNests);
-}
-
IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
IVUses.push_back(new IVStrideUse(this, User, Operand));
return IVUses.back();
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index a8ad2d6696bf..73d1eff1b968 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -49,6 +49,42 @@ static cl::opt<int>
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
+namespace {
+using namespace llvm::ore;
+class MandatoryInlineAdvice : public InlineAdvice {
+public:
+ MandatoryInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
+ OptimizationRemarkEmitter &ORE,
+ bool IsInliningMandatory)
+ : InlineAdvice(Advisor, CB, ORE, IsInliningMandatory) {}
+
+private:
+ void recordInliningWithCalleeDeletedImpl() override { recordInliningImpl(); }
+
+ void recordInliningImpl() override {
+ if (IsInliningRecommended)
+ emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, IsInliningRecommended,
+ [&](OptimizationRemark &Remark) {
+ Remark << ": always inline attribute";
+ });
+ }
+
+ void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
+ if (IsInliningRecommended)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ << "'" << NV("Callee", Callee) << "' is not AlwaysInline into '"
+ << NV("Caller", Caller)
+ << "': " << NV("Reason", Result.getFailureReason());
+ });
+ }
+
+ void recordUnattemptedInliningImpl() override {
+ assert(!IsInliningRecommended && "Expected to attempt inlining");
+ }
+};
+} // namespace
+
void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
const InlineResult &Result) {
using namespace ore;
@@ -56,20 +92,20 @@ void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
"; " + inlineCostStr(*OIC));
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller) << ": "
- << NV("Reason", Result.getFailureReason());
+ << "'" << NV("Callee", Callee) << "' is not inlined into '"
+ << NV("Caller", Caller)
+ << "': " << NV("Reason", Result.getFailureReason());
});
}
void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() {
if (EmitRemarks)
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
}
void DefaultInlineAdvice::recordInliningImpl() {
if (EmitRemarks)
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
}
llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
@@ -151,9 +187,9 @@ void InlineAdvice::recordInliningWithCalleeDeleted() {
AnalysisKey InlineAdvisorAnalysis::Key;
-bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
- InliningAdvisorMode Mode,
- StringRef ReplayFile) {
+bool InlineAdvisorAnalysis::Result::tryCreate(
+ InlineParams Params, InliningAdvisorMode Mode,
+ const ReplayInlinerSettings &ReplaySettings) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
switch (Mode) {
case InliningAdvisorMode::Default:
@@ -161,10 +197,10 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params));
// Restrict replay to default advisor, ML advisors are stateful so
// replay will need augmentations to interleave with them correctly.
- if (!ReplayFile.empty()) {
- Advisor = std::make_unique<ReplayInlineAdvisor>(
- M, FAM, M.getContext(), std::move(Advisor), ReplayFile,
- /* EmitRemarks =*/true);
+ if (!ReplaySettings.ReplayFile.empty()) {
+ Advisor = llvm::getReplayInlineAdvisor(M, FAM, M.getContext(),
+ std::move(Advisor), ReplaySettings,
+ /* EmitRemarks =*/true);
}
break;
case InliningAdvisorMode::Development:
@@ -313,7 +349,7 @@ void llvm::setInlineRemark(CallBase &CB, StringRef Message) {
return;
Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message);
- CB.addAttribute(AttributeList::FunctionIndex, Attr);
+ CB.addFnAttr(Attr);
}
/// Return the cost only if the inliner should attempt to inline at the given
@@ -343,15 +379,15 @@ llvm::shouldInline(CallBase &CB,
if (IC.isNever()) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because it should never be inlined "
- << IC;
+ << "'" << NV("Callee", Callee) << "' not inlined into '"
+ << NV("Caller", Caller)
+ << "' because it should never be inlined " << IC;
});
} else {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because too costly to inline "
+ << "'" << NV("Callee", Callee) << "' not inlined into '"
+ << NV("Caller", Caller) << "' because too costly to inline "
<< IC;
});
}
@@ -368,9 +404,9 @@ llvm::shouldInline(CallBase &CB,
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
Call)
- << "Not inlining. Cost of inlining " << NV("Callee", Callee)
- << " increases the cost of inlining " << NV("Caller", Caller)
- << " in other contexts";
+ << "Not inlining. Cost of inlining '" << NV("Callee", Callee)
+ << "' increases the cost of inlining '" << NV("Caller", Caller)
+ << "' in other contexts";
});
setInlineRemark(CB, "deferred");
// IC does not bool() to false, so get an InlineCost that will.
@@ -383,7 +419,8 @@ llvm::shouldInline(CallBase &CB,
return IC;
}
-std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
+std::string llvm::formatCallSiteLocation(DebugLoc DLoc,
+ const CallSiteFormat &Format) {
std::string Buffer;
raw_string_ostream CallSiteLoc(Buffer);
bool First = true;
@@ -399,9 +436,10 @@ std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
- CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset) << ":"
- << llvm::utostr(DIL->getColumn());
- if (Discriminator)
+ CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
+ if (Format.outputColumn())
+ CallSiteLoc << ":" << llvm::utostr(DIL->getColumn());
+ if (Format.outputDiscriminator() && Discriminator)
CallSiteLoc << "." << llvm::utostr(Discriminator);
First = false;
}
@@ -435,25 +473,38 @@ void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
Remark << ";";
}
-void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
- const BasicBlock *Block, const Function &Callee,
- const Function &Caller, const InlineCost &IC,
- bool ForProfileContext, const char *PassName) {
+void llvm::emitInlinedInto(
+ OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block,
+ const Function &Callee, const Function &Caller, bool AlwaysInline,
+ function_ref<void(OptimizationRemark &)> ExtraContext,
+ const char *PassName) {
ORE.emit([&]() {
- bool AlwaysInline = IC.isAlways();
StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
OptimizationRemark Remark(PassName ? PassName : DEBUG_TYPE, RemarkName,
DLoc, Block);
- Remark << ore::NV("Callee", &Callee) << " inlined into ";
- Remark << ore::NV("Caller", &Caller);
- if (ForProfileContext)
- Remark << " to match profiling context";
- Remark << " with " << IC;
+ Remark << "'" << ore::NV("Callee", &Callee) << "' inlined into '"
+ << ore::NV("Caller", &Caller) << "'";
+ if (ExtraContext)
+ ExtraContext(Remark);
addLocationToRemarks(Remark, DLoc);
return Remark;
});
}
+void llvm::emitInlinedIntoBasedOnCost(
+ OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block,
+ const Function &Callee, const Function &Caller, const InlineCost &IC,
+ bool ForProfileContext, const char *PassName) {
+ llvm::emitInlinedInto(
+ ORE, DLoc, Block, Callee, Caller, IC.isAlways(),
+ [&](OptimizationRemark &Remark) {
+ if (ForProfileContext)
+ Remark << " to match profiling context";
+ Remark << " with " << IC;
+ },
+ PassName);
+}
+
InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM)
: M(M), FAM(FAM) {
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
@@ -475,7 +526,8 @@ InlineAdvisor::~InlineAdvisor() {
std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB,
bool Advice) {
- return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
+ return std::make_unique<MandatoryInlineAdvice>(this, CB, getCallerORE(CB),
+ Advice);
}
InlineAdvisor::MandatoryInliningKind
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 4c2413e14435..ff31e81aad08 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -135,6 +135,31 @@ static cl::opt<bool> DisableGEPConstOperand(
namespace {
class InlineCostCallAnalyzer;
+/// This function behaves more like CallBase::hasFnAttr: when it looks for the
+/// requested attribute, it check both the call instruction and the called
+/// function (if it's available and operand bundles don't prohibit that).
+Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
+ Attribute CallAttr = CB.getFnAttr(AttrKind);
+ if (CallAttr.isValid())
+ return CallAttr;
+
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the call instruction.
+ if (!CB.isFnAttrDisallowedByOpBundle(AttrKind))
+ if (const Function *F = CB.getCalledFunction())
+ return F->getFnAttribute(AttrKind);
+
+ return {};
+}
+
+Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
+ Attribute Attr = getFnAttr(CB, AttrKind);
+ int AttrValue;
+ if (Attr.getValueAsString().getAsInteger(10, AttrValue))
+ return None;
+ return AttrValue;
+}
+
// This struct is used to store information about inline cost of a
// particular instruction
struct InstructionCostDetail {
@@ -235,6 +260,10 @@ protected:
/// Called the analysis engine determines load elimination won't happen.
virtual void onDisableLoadElimination() {}
+ /// Called when we visit a CallBase, before the analysis starts. Return false
+ /// to stop further processing of the instruction.
+ virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
+
/// Called to account for a call.
virtual void onCallPenalty() {}
@@ -333,6 +362,10 @@ protected:
/// whenever we simplify away the stores that would otherwise cause them to be
/// loads.
bool EnableLoadElimination;
+
+ /// Whether we allow inlining for recursive call.
+ bool AllowRecursiveCall;
+
SmallPtrSet<Value *, 16> LoadAddrSet;
AllocaInst *getSROAArgForValueOrNull(Value *V) const {
@@ -354,6 +387,7 @@ protected:
bool simplifyCallSite(Function *F, CallBase &Call);
template <typename Callable>
bool simplifyInstruction(Instruction &I, Callable Evaluate);
+ bool simplifyIntrinsicCallIsConstant(CallBase &CB);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
/// Return true if the given argument to the function being considered for
@@ -421,7 +455,8 @@ public:
OptimizationRemarkEmitter *ORE = nullptr)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
- CandidateCall(Call), EnableLoadElimination(true) {}
+ CandidateCall(Call), EnableLoadElimination(true),
+ AllowRecursiveCall(false) {}
InlineResult analyze();
@@ -510,6 +545,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// sense that it's not weighted by profile counts at all.
int ColdSize = 0;
+ // Whether inlining is decided by cost-threshold analysis.
+ bool DecidedByCostThreshold = false;
+
// Whether inlining is decided by cost-benefit analysis.
bool DecidedByCostBenefit = false;
@@ -558,6 +596,22 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
addCost(LoadEliminationCost);
LoadEliminationCost = 0;
}
+
+ bool onCallBaseVisitStart(CallBase &Call) override {
+ if (Optional<int> AttrCallThresholdBonus =
+ getStringFnAttrAsInt(Call, "call-threshold-bonus"))
+ Threshold += *AttrCallThresholdBonus;
+
+ if (Optional<int> AttrCallCost =
+ getStringFnAttrAsInt(Call, "call-inline-cost")) {
+ addCost(*AttrCallCost);
+ // Prevent further processing of the call since we want to override its
+ // inline cost, not just add to it.
+ return false;
+ }
+ return true;
+ }
+
void onCallPenalty() override { addCost(CallPenalty); }
void onCallArgumentSetup(const CallBase &Call) override {
// Pay the price of the argument setup. We account for the average 1
@@ -717,7 +771,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Make sure we have a nonzero entry count.
auto EntryCount = F.getEntryCount();
- if (!EntryCount || !EntryCount.getCount())
+ if (!EntryCount || !EntryCount->getCount())
return false;
BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
@@ -763,7 +817,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
// Count a conditional branch as savings if it becomes unconditional.
if (BI->isConditional() &&
- dyn_cast_or_null<ConstantInt>(
+ isa_and_nonnull<ConstantInt>(
SimplifiedValues.lookup(BI->getCondition()))) {
CurrentSavings += InlineConstants::InstrCost;
}
@@ -783,8 +837,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Compute the cycle savings per call.
auto EntryProfileCount = F.getEntryCount();
- assert(EntryProfileCount.hasValue() && EntryProfileCount.getCount());
- auto EntryCount = EntryProfileCount.getCount();
+ assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
+ auto EntryCount = EntryProfileCount->getCount();
CycleSavings += EntryCount / 2;
CycleSavings = CycleSavings.udiv(EntryCount);
@@ -847,6 +901,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= VectorBonus / 2;
+ if (Optional<int> AttrCost =
+ getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
+ Cost = *AttrCost;
+
+ if (Optional<int> AttrThreshold =
+ getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
+ Threshold = *AttrThreshold;
+
if (auto Result = costBenefitAnalysis()) {
DecidedByCostBenefit = true;
if (Result.getValue())
@@ -855,14 +917,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
return InlineResult::failure("Cost over threshold.");
}
- if (IgnoreThreshold || Cost < std::max(1, Threshold))
+ if (IgnoreThreshold)
return InlineResult::success();
- return InlineResult::failure("Cost over threshold.");
+
+ DecidedByCostThreshold = true;
+ return Cost < std::max(1, Threshold)
+ ? InlineResult::success()
+ : InlineResult::failure("Cost over threshold.");
}
+
bool shouldStop() override {
+ if (IgnoreThreshold || ComputeFullInlineCost)
+ return false;
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
+ if (Cost < Threshold)
+ return false;
+ DecidedByCostThreshold = true;
+ return true;
}
void onLoadEliminationOpportunity() override {
@@ -930,7 +1002,9 @@ public:
Params(Params), Threshold(Params.DefaultThreshold),
BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
- Writer(this) {}
+ Writer(this) {
+ AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
+ }
/// Annotation Writer for instruction details
InlineCostAnnotationWriter Writer;
@@ -939,7 +1013,7 @@ public:
// Prints the same analysis as dump(), but its definition is not dependent
// on the build.
- void print();
+ void print(raw_ostream &OS);
Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
@@ -952,6 +1026,7 @@ public:
int getCost() const { return Cost; }
Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
+ bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
};
class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
@@ -1310,7 +1385,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
// Or could we skip the getPointerSizeInBits call completely? As far as I can
// see the ZeroOffset is used as a dummy value, so we can probably use any
// bit width for the ZeroOffset?
- APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
+ APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
bool CheckSROA = I.getType()->isPointerTy();
// Track the constant or pointer with constant offset we've seen so far.
@@ -1471,6 +1546,27 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
return true;
}
+/// Try to simplify a call to llvm.is.constant.
+///
+/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
+/// we expect calls of this specific intrinsic to be infrequent.
+///
+/// FIXME: Given that we know CB's parent (F) caller
+/// (CandidateCall->getParent()->getParent()), we might be able to determine
+/// whether inlining F into F's caller would change how the call to
+/// llvm.is.constant would evaluate.
+bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
+ Value *Arg = CB.getArgOperand(0);
+ auto *C = dyn_cast<Constant>(Arg);
+
+ if (!C)
+ C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
+
+ Type *RT = CB.getFunctionType()->getReturnType();
+ SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
+ return true;
+}
+
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
// Propagate constants through bitcasts.
if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
@@ -1799,8 +1895,8 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
VectorBonus = Threshold * VectorBonusPercent / 100;
- bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
+ &F == Call.getCalledFunction();
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically. It may seem odd to update
// Cost in updateThreshold, but the bonus depends on the logic in this method.
@@ -2029,6 +2125,9 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
}
bool CallAnalyzer::visitCallBase(CallBase &Call) {
+ if (!onCallBaseVisitStart(Call))
+ return true;
+
if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
!F.hasFnAttribute(Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
@@ -2091,6 +2190,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
SROAArgValues[II] = SROAArg;
return true;
+ case Intrinsic::is_constant:
+ return simplifyIntrinsicCallIsConstant(Call);
}
}
@@ -2098,7 +2199,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
// This flag will fully abort the analysis, so don't bother with anything
// else.
IsRecursiveCall = true;
- return false;
+ if (!AllowRecursiveCall)
+ return false;
}
if (TTI.isLoweredToCall(F)) {
@@ -2123,7 +2225,7 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
// inliner more regular and predictable. Interestingly, conditional branches
// which will fold away are also free.
return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
- dyn_cast_or_null<ConstantInt>(
+ isa_and_nonnull<ConstantInt>(
SimplifiedValues.lookup(BI.getCondition()));
}
@@ -2305,11 +2407,8 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// inlining due to debug symbols. Eventually, the number of unsimplified
// instructions shouldn't factor into the cost computation, but until then,
// hack around it here.
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- // Skip pseudo-probes.
- if (isa<PseudoProbeInst>(I))
+ // Similarly, skip pseudo-probes.
+ if (I.isDebugOrPseudoInst())
continue;
// Skip ephemeral values.
@@ -2336,7 +2435,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
InlineResult IR = InlineResult::success();
- if (IsRecursiveCall)
+ if (IsRecursiveCall && !AllowRecursiveCall)
IR = InlineResult::failure("recursive");
else if (ExposesReturnsTwice)
IR = InlineResult::failure("exposes returns twice");
@@ -2398,7 +2497,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
unsigned AS = V->getType()->getPointerAddressSpace();
unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
- APInt Offset = APInt::getNullValue(IntPtrWidth);
+ APInt Offset = APInt::getZero(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
@@ -2601,7 +2700,7 @@ InlineResult CallAnalyzer::analyze() {
onBlockAnalyzed(BB);
}
- bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
&F == CandidateCall.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
@@ -2612,10 +2711,10 @@ InlineResult CallAnalyzer::analyze() {
return finalizeAnalysis();
}
-void InlineCostCallAnalyzer::print() {
-#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
+void InlineCostCallAnalyzer::print(raw_ostream &OS) {
+#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
if (PrintInstructionComments)
- F.print(dbgs(), &Writer);
+ F.print(OS, &Writer);
DEBUG_PRINT_STAT(NumConstantArgs);
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
DEBUG_PRINT_STAT(NumAllocaArgs);
@@ -2634,7 +2733,7 @@ void InlineCostCallAnalyzer::print() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Dump stats about this call's analysis.
-LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(); }
+LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
#endif
/// Test that there are no attribute conflicts between Caller and Callee
@@ -2849,13 +2948,13 @@ InlineCost llvm::getInlineCost(
return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
}
- // Check if there was a reason to force inlining or no inlining.
- if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
- return InlineCost::getNever(ShouldInline.getFailureReason());
- if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
- return InlineCost::getAlways("empty function");
+ if (CA.wasDecidedByCostThreshold())
+ return InlineCost::get(CA.getCost(), CA.getThreshold());
- return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+ // No details on how the decision was made, simply return always or never.
+ return ShouldInline.isSuccess()
+ ? InlineCost::getAlways("empty function")
+ : InlineCost::getNever(ShouldInline.getFailureReason());
}
InlineResult llvm::isInlineViable(Function &F) {
@@ -3028,7 +3127,8 @@ InlineCostAnnotationPrinterPass::run(Function &F,
ICCA.analyze();
OS << " Analyzing call of " << CalledFunction->getName()
<< "... (caller:" << CI->getCaller()->getName() << ")\n";
- ICCA.print();
+ ICCA.print(OS);
+ OS << "\n";
}
}
}
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index 3c90e82fb952..a2e231e2d0f4 100644
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -1,9 +1,8 @@
//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 7d1e630e6e80..9fee57c54b85 100644
--- a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -19,11 +19,15 @@
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
+#define DEBUG_TYPE "ipt"
+STATISTIC(NumInstScanned, "Number of insts scanned while updating ibt");
+
#ifndef NDEBUG
static cl::opt<bool> ExpensiveAsserts(
"ipt-expensive-asserts",
@@ -64,11 +68,13 @@ bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction(
void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
FirstSpecialInsts.erase(BB);
- for (auto &I : *BB)
+ for (auto &I : *BB) {
+ NumInstScanned++;
if (isSpecialInstruction(&I)) {
FirstSpecialInsts[BB] = &I;
return;
}
+ }
// Mark this block as having no special instructions.
FirstSpecialInsts[BB] = nullptr;
@@ -107,8 +113,10 @@ void InstructionPrecedenceTracking::insertInstructionTo(const Instruction *Inst,
}
void InstructionPrecedenceTracking::removeInstruction(const Instruction *Inst) {
- if (isSpecialInstruction(Inst))
- FirstSpecialInsts.erase(Inst->getParent());
+ auto *BB = Inst->getParent();
+ assert(BB && "must be called before instruction is actually removed");
+ if (FirstSpecialInsts.count(BB) && FirstSpecialInsts[BB] == Inst)
+ FirstSpecialInsts.erase(BB);
}
void InstructionPrecedenceTracking::removeUsersOf(const Instruction *Inst) {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 23083bc8178e..864eeea4f8bf 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -70,8 +70,8 @@ static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyCastInst(unsigned, Value *, Type *,
const SimplifyQuery &, unsigned);
-static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &,
- unsigned);
+static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, bool,
+ const SimplifyQuery &, unsigned);
static Value *SimplifySelectInst(Value *, Value *, Value *,
const SimplifyQuery &, unsigned);
@@ -698,13 +698,12 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
assert(V->getType()->isPtrOrPtrVectorTy());
- Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
- APInt Offset = APInt::getNullValue(IntIdxTy->getIntegerBitWidth());
+ APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType()));
V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
// As that strip may trace through `addrspacecast`, need to sext or trunc
// the offset calculated.
- IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
+ Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
@@ -1407,8 +1406,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
*ShRAmt == *ShLAmt) {
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- const unsigned Width = Op0->getType()->getScalarSizeInBits();
- const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (ShRAmt->uge(EffWidthY))
return X;
}
@@ -1429,9 +1427,11 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
MaxRecurse))
return V;
- // all ones >>a X -> -1
+ // -1 >>a X --> -1
+ // (-1 << X) a>> X --> -1
// Do not return Op0 because it may contain undef elements if it's a vector.
- if (match(Op0, m_AllOnes()))
+ if (match(Op0, m_AllOnes()) ||
+ match(Op0, m_Shl(m_AllOnes(), m_Specific(Op1))))
return Constant::getAllOnesValue(Op0->getType());
// (X << A) >> A -> X
@@ -1765,7 +1765,7 @@ static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1,
if (match(Cmp0->getOperand(1), m_APInt(C)))
MinMaxC = HasNotOp ? ~*C : *C;
else if (isa<ConstantPointerNull>(Cmp0->getOperand(1)))
- MinMaxC = APInt::getNullValue(8);
+ MinMaxC = APInt::getZero(8);
else
return nullptr;
@@ -2040,24 +2040,32 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
return Op0;
+ // (X | Y) & (X | ~Y) --> X (commuted 8 ways)
+ Value *X, *Y;
+ if (match(Op0, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Op1, m_c_Or(m_Deferred(X), m_Deferred(Y))))
+ return X;
+ if (match(Op1, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Op0, m_c_Or(m_Deferred(X), m_Deferred(Y))))
+ return X;
+
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And))
return V;
// A mask that only clears known zeros of a shifted value is a no-op.
- Value *X;
const APInt *Mask;
const APInt *ShAmt;
if (match(Op1, m_APInt(Mask))) {
// If all bits in the inverted and shifted mask are clear:
// and (shl X, ShAmt), Mask --> shl X, ShAmt
if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) &&
- (~(*Mask)).lshr(*ShAmt).isNullValue())
+ (~(*Mask)).lshr(*ShAmt).isZero())
return Op0;
// If all bits in the inverted and shifted mask are clear:
// and (lshr X, ShAmt), Mask --> lshr X, ShAmt
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
- (~(*Mask)).shl(*ShAmt).isNullValue())
+ (~(*Mask)).shl(*ShAmt).isZero())
return Op0;
}
@@ -2141,7 +2149,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// if Mask = ((1 << effective_width_of(X)) - 1) << A
// SimplifyDemandedBits in InstCombine can optimize the general case.
// This pattern aims to help other passes for a common case.
- Value *Y, *XShifted;
+ Value *XShifted;
if (match(Op1, m_APInt(Mask)) &&
match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
m_Value(XShifted)),
@@ -2149,11 +2157,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
const unsigned Width = Op0->getType()->getScalarSizeInBits();
const unsigned ShftCnt = ShAmt->getLimitedValue(Width);
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (EffWidthY <= ShftCnt) {
const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
Q.DT);
- const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+ const unsigned EffWidthX = XKnown.countMaxActiveBits();
const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
// If the mask is extracting all bits from X or Y as is, we can skip
@@ -2257,6 +2265,19 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
+ // (A | B) | (A ^ B) --> A | B
+ // (B | A) | (A ^ B) --> B | A
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Op0;
+
+ // Commute the outer 'or' operands.
+ // (A ^ B) | (A | B) --> A | B
+ // (A ^ B) | (B | A) --> B | A
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Op1;
+
// (~A & B) | ~(A | B) --> ~A
// (~A & B) | ~(B | A) --> ~A
// (B & ~A) | ~(A | B) --> ~A
@@ -2276,6 +2297,23 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
return NotA;
+ // Rotated -1 is still -1:
+ // (-1 << X) | (-1 >> (C - X)) --> -1
+ // (-1 >> X) | (-1 << (C - X)) --> -1
+ // ...with C <= bitwidth (and commuted variants).
+ Value *X, *Y;
+ if ((match(Op0, m_Shl(m_AllOnes(), m_Value(X))) &&
+ match(Op1, m_LShr(m_AllOnes(), m_Value(Y)))) ||
+ (match(Op1, m_Shl(m_AllOnes(), m_Value(X))) &&
+ match(Op0, m_LShr(m_AllOnes(), m_Value(Y))))) {
+ const APInt *C;
+ if ((match(X, m_Sub(m_APInt(C), m_Specific(Y))) ||
+ match(Y, m_Sub(m_APInt(C), m_Specific(X)))) &&
+ C->ule(X->getType()->getScalarSizeInBits())) {
+ return ConstantInt::getAllOnesValue(X->getType());
+ }
+ }
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
@@ -3090,7 +3128,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// - C isn't zero.
if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
- match(LHS, m_Shl(m_One(), m_Value())) || !C->isNullValue()) {
+ match(LHS, m_Shl(m_One(), m_Value())) || !C->isZero()) {
if (Pred == ICmpInst::ICMP_EQ)
return ConstantInt::getFalse(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_NE)
@@ -3640,30 +3678,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
CRHS->getPointerOperand(), Q))
return C;
- if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
- if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
- if (GLHS->getPointerOperand() == GRHS->getPointerOperand() &&
- GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() &&
- (ICmpInst::isEquality(Pred) ||
- (GLHS->isInBounds() && GRHS->isInBounds() &&
- Pred == ICmpInst::getSignedPredicate(Pred)))) {
- // The bases are equal and the indices are constant. Build a constant
- // expression GEP with the same indices and a null base pointer to see
- // what constant folding can make out of it.
- Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
- SmallVector<Value *, 4> IndicesLHS(GLHS->indices());
- Constant *NewLHS = ConstantExpr::getGetElementPtr(
- GLHS->getSourceElementType(), Null, IndicesLHS);
-
- SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
- Constant *NewRHS = ConstantExpr::getGetElementPtr(
- GLHS->getSourceElementType(), Null, IndicesRHS);
- Constant *NewICmp = ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
- return ConstantFoldConstant(NewICmp, Q.DL);
- }
- }
- }
-
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -3966,7 +3980,8 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(),
- NewOps, Q, MaxRecurse - 1));
+ NewOps, GEP->isInBounds(), Q,
+ MaxRecurse - 1));
if (isa<SelectInst>(I))
return PreventSelfSimplify(
@@ -4080,6 +4095,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
std::swap(TrueVal, FalseVal);
}
+ // Check for integer min/max with a limit constant:
+ // X > MIN_INT ? X : MIN_INT --> X
+ // X < MAX_INT ? X : MAX_INT --> X
+ if (TrueVal->getType()->isIntOrIntVectorTy()) {
+ Value *X, *Y;
+ SelectPatternFlavor SPF =
+ matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
+ X, Y).Flavor;
+ if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
+ APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
+ X->getType()->getScalarSizeInBits());
+ if (match(Y, m_SpecificInt(LimitC)))
+ return X;
+ }
+ }
+
if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
Value *X;
const APInt *Y;
@@ -4210,14 +4241,27 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
return FalseVal;
}
- // select i1 Cond, i1 true, i1 false --> i1 Cond
assert(Cond->getType()->isIntOrIntVectorTy(1) &&
"Select must have bool or bool vector condition");
assert(TrueVal->getType() == FalseVal->getType() &&
"Select must have same types for true/false ops");
- if (Cond->getType() == TrueVal->getType() &&
- match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
- return Cond;
+
+ if (Cond->getType() == TrueVal->getType()) {
+ // select i1 Cond, i1 true, i1 false --> i1 Cond
+ if (match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
+ return Cond;
+
+ // (X || Y) && (X || !Y) --> X (commuted 8 ways)
+ Value *X, *Y;
+ if (match(FalseVal, m_ZeroInt())) {
+ if (match(Cond, m_c_LogicalOr(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(TrueVal, m_c_LogicalOr(m_Specific(X), m_Specific(Y))))
+ return X;
+ if (match(TrueVal, m_c_LogicalOr(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Cond, m_c_LogicalOr(m_Specific(X), m_Specific(Y))))
+ return X;
+ }
+ }
// select ?, X, X -> X
if (TrueVal == FalseVal)
@@ -4295,7 +4339,7 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
/// Given operands for an GetElementPtrInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
const SimplifyQuery &Q, unsigned) {
// The type of the GEP pointer operand.
unsigned AS =
@@ -4396,14 +4440,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
// gep (gep V, C), (sub 0, V) -> C
if (match(Ops.back(),
m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) &&
- !BasePtrOffset.isNullValue()) {
+ !BasePtrOffset.isZero()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
// gep (gep V, C), (xor V, -1) -> C-1
if (match(Ops.back(),
m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) &&
- !BasePtrOffset.isOneValue()) {
+ !BasePtrOffset.isOne()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
@@ -4415,13 +4459,13 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
return nullptr;
auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
- Ops.slice(1));
+ Ops.slice(1), InBounds);
return ConstantFoldConstant(CE, Q.DL);
}
-Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
const SimplifyQuery &Q) {
- return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit);
+ return ::SimplifyGEPInst(SrcTy, Ops, InBounds, Q, RecursionLimit);
}
/// Given operands for an InsertValueInst, see if we can fold the result.
@@ -4891,6 +4935,11 @@ static Constant *simplifyFPOp(ArrayRef<Value *> Ops, FastMathFlags FMF,
return nullptr;
}
+// TODO: Move this out to a header file:
+static inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) {
+ return (EB == fp::ebIgnore || FMF.noNaNs());
+}
+
/// Given operands for an FAdd, see if we can fold the result. If not, this
/// returns null.
static Value *
@@ -4905,17 +4954,25 @@ SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
- if (!isDefaultFPEnvironment(ExBehavior, Rounding))
- return nullptr;
-
// fadd X, -0 ==> X
- if (match(Op1, m_NegZeroFP()))
- return Op0;
+ // With strict/constrained FP, we have these possible edge cases that do
+ // not simplify to Op0:
+ // fadd SNaN, -0.0 --> QNaN
+ // fadd +0.0, -0.0 --> -0.0 (but only with round toward negative)
+ if (canIgnoreSNaN(ExBehavior, FMF) &&
+ (!canRoundingModeBe(Rounding, RoundingMode::TowardNegative) ||
+ FMF.noSignedZeros()))
+ if (match(Op1, m_NegZeroFP()))
+ return Op0;
// fadd X, 0 ==> X, when we know X is not -0
- if (match(Op1, m_PosZeroFP()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
- return Op0;
+ if (canIgnoreSNaN(ExBehavior, FMF))
+ if (match(Op1, m_PosZeroFP()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ return Op0;
+
+ if (!isDefaultFPEnvironment(ExBehavior, Rounding))
+ return nullptr;
// With nnan: -X + X --> 0.0 (and commuted variant)
// We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
@@ -5457,6 +5514,9 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
if (match(Op0,
m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
return X;
+ // experimental.vector.reverse(splat(X)) -> splat(X)
+ if (isSplatValue(Op0))
+ return Op0;
break;
default:
break;
@@ -5772,13 +5832,32 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
- // Intrinsics with no operands have some kind of side effect. Don't simplify.
- unsigned NumOperands = Call->getNumArgOperands();
- if (!NumOperands)
- return nullptr;
-
+ unsigned NumOperands = Call->arg_size();
Function *F = cast<Function>(Call->getCalledFunction());
Intrinsic::ID IID = F->getIntrinsicID();
+
+ // Most of the intrinsics with no operands have some kind of side effect.
+ // Don't simplify.
+ if (!NumOperands) {
+ switch (IID) {
+ case Intrinsic::vscale: {
+ // Call may not be inserted into the IR yet at point of calling simplify.
+ if (!Call->getParent() || !Call->getParent()->getParent())
+ return nullptr;
+ auto Attr = Call->getFunction()->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return nullptr;
+ unsigned VScaleMin, VScaleMax;
+ std::tie(VScaleMin, VScaleMax) = Attr.getVScaleRangeArgs();
+ if (VScaleMin == VScaleMax && VScaleMax != 0)
+ return ConstantInt::get(F->getReturnType(), VScaleMin);
+ return nullptr;
+ }
+ default:
+ return nullptr;
+ }
+ }
+
if (NumOperands == 1)
return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
@@ -5814,9 +5893,18 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
- if (ShAmtC->urem(BitWidth).isNullValue())
+ if (ShAmtC->urem(BitWidth).isZero())
return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
}
+
+ // Rotating zero by anything is zero.
+ if (match(Op0, m_Zero()) && match(Op1, m_Zero()))
+ return ConstantInt::getNullValue(F->getReturnType());
+
+ // Rotating -1 by anything is -1.
+ if (match(Op0, m_AllOnes()) && match(Op1, m_AllOnes()))
+ return ConstantInt::getAllOnesValue(F->getReturnType());
+
return nullptr;
}
case Intrinsic::experimental_constrained_fma: {
@@ -5939,7 +6027,7 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
- unsigned NumArgs = Call->getNumArgOperands();
+ unsigned NumArgs = Call->arg_size();
ConstantArgs.reserve(NumArgs);
for (auto &Arg : Call->args()) {
Constant *C = dyn_cast<Constant>(&Arg);
@@ -5990,73 +6078,27 @@ Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
return ::SimplifyFreezeInst(Op0, Q);
}
-static Constant *ConstructLoadOperandConstant(Value *Op) {
- SmallVector<Value *, 4> Worklist;
- // Invalid IR in unreachable code may contain self-referential values. Don't infinitely loop.
- SmallPtrSet<Value *, 4> Visited;
- Worklist.push_back(Op);
- while (true) {
- Value *CurOp = Worklist.back();
- if (!Visited.insert(CurOp).second)
- return nullptr;
- if (isa<Constant>(CurOp))
- break;
- if (auto *BC = dyn_cast<BitCastOperator>(CurOp)) {
- Worklist.push_back(BC->getOperand(0));
- } else if (auto *GEP = dyn_cast<GEPOperator>(CurOp)) {
- for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
- if (!isa<Constant>(GEP->getOperand(I)))
- return nullptr;
- }
- Worklist.push_back(GEP->getOperand(0));
- } else if (auto *II = dyn_cast<IntrinsicInst>(CurOp)) {
- if (II->isLaunderOrStripInvariantGroup())
- Worklist.push_back(II->getOperand(0));
- else
- return nullptr;
- } else {
- return nullptr;
- }
- }
-
- Constant *NewOp = cast<Constant>(Worklist.pop_back_val());
- while (!Worklist.empty()) {
- Value *CurOp = Worklist.pop_back_val();
- if (isa<BitCastOperator>(CurOp)) {
- NewOp = ConstantExpr::getBitCast(NewOp, CurOp->getType());
- } else if (auto *GEP = dyn_cast<GEPOperator>(CurOp)) {
- SmallVector<Constant *> Idxs;
- Idxs.reserve(GEP->getNumOperands() - 1);
- for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
- Idxs.push_back(cast<Constant>(GEP->getOperand(I)));
- }
- NewOp = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), NewOp,
- Idxs, GEP->isInBounds(),
- GEP->getInRangeIndex());
- } else {
- assert(isa<IntrinsicInst>(CurOp) &&
- cast<IntrinsicInst>(CurOp)->isLaunderOrStripInvariantGroup() &&
- "expected invariant group intrinsic");
- NewOp = ConstantExpr::getBitCast(NewOp, CurOp->getType());
- }
- }
- return NewOp;
-}
-
static Value *SimplifyLoadInst(LoadInst *LI, Value *PtrOp,
const SimplifyQuery &Q) {
if (LI->isVolatile())
return nullptr;
- // Try to make the load operand a constant, specifically handle
- // invariant.group intrinsics.
+ APInt Offset(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
auto *PtrOpC = dyn_cast<Constant>(PtrOp);
- if (!PtrOpC)
- PtrOpC = ConstructLoadOperandConstant(PtrOp);
+ // Try to convert operand into a constant by stripping offsets while looking
+ // through invariant.group intrinsics. Don't bother if the underlying object
+ // is not constant, as calculating GEP offsets is expensive.
+ if (!PtrOpC && isa<Constant>(getUnderlyingObject(PtrOp))) {
+ PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
+ Q.DL, Offset, /* AllowNonInbounts */ true,
+ /* AllowInvariantGroup */ true);
+ // Index size may have changed due to address space casts.
+ Offset = Offset.sextOrTrunc(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()));
+ PtrOpC = dyn_cast<Constant>(PtrOp);
+ }
if (PtrOpC)
- return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Q.DL);
-
+ return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Offset, Q.DL);
return nullptr;
}
@@ -6156,8 +6198,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
Result = SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
break;
case Instruction::GetElementPtr: {
- Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(),
- NewOps, Q);
+ auto *GEPI = cast<GetElementPtrInst>(I);
+ Result = SimplifyGEPInst(GEPI->getSourceElementType(), NewOps,
+ GEPI->isInBounds(), Q);
break;
}
case Instruction::InsertValue: {
diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp
index 8f87552fca1f..0007c54b16d0 100644
--- a/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -220,8 +220,7 @@ bool LazyCallGraph::invalidate(Module &, const PreservedAnalyses &PA,
// Check whether the analysis, all analyses on functions, or the function's
// CFG have been preserved.
auto PAC = PA.getChecker<llvm::LazyCallGraphAnalysis>();
- return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>() ||
- PAC.preservedSet<CFGAnalyses>());
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>());
}
LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
@@ -1962,6 +1961,29 @@ void LazyCallGraph::buildRefSCCs() {
});
}
+void LazyCallGraph::visitReferences(SmallVectorImpl<Constant *> &Worklist,
+ SmallPtrSetImpl<Constant *> &Visited,
+ function_ref<void(Function &)> Callback) {
+ while (!Worklist.empty()) {
+ Constant *C = Worklist.pop_back_val();
+
+ if (Function *F = dyn_cast<Function>(C)) {
+ if (!F->isDeclaration())
+ Callback(*F);
+ continue;
+ }
+
+ // blockaddresses are weird and don't participate in the call graph anyway,
+ // skip them.
+ if (isa<BlockAddress>(C))
+ continue;
+
+ for (Value *Op : C->operand_values())
+ if (Visited.insert(cast<Constant>(Op)).second)
+ Worklist.push_back(cast<Constant>(Op));
+ }
+}
+
AnalysisKey LazyCallGraphAnalysis::Key;
LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 1dababafb8a6..50fa169c2081 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -126,7 +126,7 @@ static ValueLatticeElement intersect(const ValueLatticeElement &A,
// Note: An empty range is implicitly converted to unknown or undef depending
// on MayIncludeUndef internally.
return ValueLatticeElement::getRange(
- std::move(Range), /*MayIncludeUndef=*/A.isConstantRangeIncludingUndef() |
+ std::move(Range), /*MayIncludeUndef=*/A.isConstantRangeIncludingUndef() ||
B.isConstantRangeIncludingUndef());
}
@@ -832,7 +832,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect(
};
}();
return ValueLatticeElement::getRange(
- ResultCR, TrueVal.isConstantRangeIncludingUndef() |
+ ResultCR, TrueVal.isConstantRangeIncludingUndef() ||
FalseVal.isConstantRangeIncludingUndef());
}
@@ -846,7 +846,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect(
}
if (SPR.Flavor == SPF_NABS) {
- ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth()));
+ ConstantRange Zero(APInt::getZero(TrueCR.getBitWidth()));
if (LHS == SI->getTrueValue())
return ValueLatticeElement::getRange(
Zero.sub(TrueCR.abs()), FalseVal.isConstantRangeIncludingUndef());
@@ -1117,12 +1117,11 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
}
// If (Val & Mask) != 0 then the value must be larger than the lowest set
// bit of Mask.
- if (EdgePred == ICmpInst::ICMP_NE && !Mask->isNullValue() &&
- C->isNullValue()) {
+ if (EdgePred == ICmpInst::ICMP_NE && !Mask->isZero() && C->isZero()) {
unsigned BitWidth = Ty->getIntegerBitWidth();
return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()),
- APInt::getNullValue(BitWidth)));
+ APInt::getZero(BitWidth)));
}
}
@@ -1780,62 +1779,62 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
// We could consider extending this to search further backwards through the
// CFG and/or value graph, but there are non-obvious compile time vs quality
// tradeoffs.
- if (CxtI) {
- BasicBlock *BB = CxtI->getParent();
-
- // Function entry or an unreachable block. Bail to avoid confusing
- // analysis below.
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (PI == PE)
- return Unknown;
-
- // If V is a PHI node in the same block as the context, we need to ask
- // questions about the predicate as applied to the incoming value along
- // each edge. This is useful for eliminating cases where the predicate is
- // known along all incoming edges.
- if (auto *PHI = dyn_cast<PHINode>(V))
- if (PHI->getParent() == BB) {
- Tristate Baseline = Unknown;
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
- Value *Incoming = PHI->getIncomingValue(i);
- BasicBlock *PredBB = PHI->getIncomingBlock(i);
- // Note that PredBB may be BB itself.
- Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB,
- CxtI);
-
- // Keep going as long as we've seen a consistent known result for
- // all inputs.
- Baseline = (i == 0) ? Result /* First iteration */
- : (Baseline == Result ? Baseline : Unknown); /* All others */
- if (Baseline == Unknown)
- break;
- }
- if (Baseline != Unknown)
- return Baseline;
+ BasicBlock *BB = CxtI->getParent();
+
+ // Function entry or an unreachable block. Bail to avoid confusing
+ // analysis below.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE)
+ return Unknown;
+
+ // If V is a PHI node in the same block as the context, we need to ask
+ // questions about the predicate as applied to the incoming value along
+ // each edge. This is useful for eliminating cases where the predicate is
+ // known along all incoming edges.
+ if (auto *PHI = dyn_cast<PHINode>(V))
+ if (PHI->getParent() == BB) {
+ Tristate Baseline = Unknown;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
+ Value *Incoming = PHI->getIncomingValue(i);
+ BasicBlock *PredBB = PHI->getIncomingBlock(i);
+ // Note that PredBB may be BB itself.
+ Tristate Result =
+ getPredicateOnEdge(Pred, Incoming, C, PredBB, BB, CxtI);
+
+ // Keep going as long as we've seen a consistent known result for
+ // all inputs.
+ Baseline = (i == 0) ? Result /* First iteration */
+ : (Baseline == Result ? Baseline
+ : Unknown); /* All others */
+ if (Baseline == Unknown)
+ break;
}
+ if (Baseline != Unknown)
+ return Baseline;
+ }
- // For a comparison where the V is outside this block, it's possible
- // that we've branched on it before. Look to see if the value is known
- // on all incoming edges.
- if (!isa<Instruction>(V) ||
- cast<Instruction>(V)->getParent() != BB) {
- // For predecessor edge, determine if the comparison is true or false
- // on that edge. If they're all true or all false, we can conclude
- // the value of the comparison in this block.
- Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
- if (Baseline != Unknown) {
- // Check that all remaining incoming values match the first one.
- while (++PI != PE) {
- Tristate Ret = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
- if (Ret != Baseline) break;
- }
- // If we terminated early, then one of the values didn't match.
- if (PI == PE) {
- return Baseline;
- }
+ // For a comparison where the V is outside this block, it's possible
+ // that we've branched on it before. Look to see if the value is known
+ // on all incoming edges.
+ if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can conclude
+ // the value of the comparison in this block.
+ Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+ if (Baseline != Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ Tristate Ret = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+ if (Ret != Baseline)
+ break;
+ }
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ return Baseline;
}
}
}
+
return Unknown;
}
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 4de5e1e06c7e..f9a7a5bdf434 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -235,7 +235,7 @@ void Lint::visitCallBase(CallBase &I) {
for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack so we're not really passing the pointer anyway.
- if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal))
+ if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
continue;
// If both arguments are readonly, they have no dependence.
if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo))
@@ -268,7 +268,7 @@ void Lint::visitCallBase(CallBase &I) {
for (Value *Arg : I.args()) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack anyway.
- if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+ if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
continue;
Value *Obj = findValue(Arg, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj),
@@ -715,6 +715,7 @@ PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) {
return PreservedAnalyses::all();
}
+namespace {
class LintLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
@@ -733,6 +734,7 @@ public:
}
void print(raw_ostream &O, const Module *M) const override {}
};
+} // namespace
char LintLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 1c55f485aa76..0fbf1db0685d 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -147,7 +147,7 @@ static bool isDereferenceableAndAlignedPointer(
Alignment, Size, DL, CtxI, DT,
TLI, Visited, MaxDepth);
- if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
+ if (const AddrSpaceCastOperator *ASC = dyn_cast<AddrSpaceCastOperator>(V))
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
Size, DL, CtxI, DT, TLI,
Visited, MaxDepth);
@@ -451,8 +451,8 @@ static bool areNonOverlapSameBaseLoadAndStore(const Value *LoadPtr,
const Value *StorePtr,
Type *StoreTy,
const DataLayout &DL) {
- APInt LoadOffset(DL.getTypeSizeInBits(LoadPtr->getType()), 0);
- APInt StoreOffset(DL.getTypeSizeInBits(StorePtr->getType()), 0);
+ APInt LoadOffset(DL.getIndexTypeSizeInBits(LoadPtr->getType()), 0);
+ APInt StoreOffset(DL.getIndexTypeSizeInBits(StorePtr->getType()), 0);
const Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets(
DL, LoadOffset, /* AllowNonInbounds */ false);
const Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets(
@@ -511,8 +511,11 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL))
return Val;
- if (auto *C = dyn_cast<Constant>(Val))
- return ConstantFoldLoadThroughBitcast(C, AccessTy, DL);
+ TypeSize StoreSize = DL.getTypeStoreSize(Val->getType());
+ TypeSize LoadSize = DL.getTypeStoreSize(AccessTy);
+ if (TypeSize::isKnownLE(LoadSize, StoreSize))
+ if (auto *C = dyn_cast<Constant>(Val))
+ return ConstantFoldLoadFromConst(C, AccessTy, DL);
}
return nullptr;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index a239928ecf38..f9bd7167317f 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -142,13 +142,12 @@ Value *llvm::stripIntegerCast(Value *V) {
const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr) {
+ Value *Ptr) {
const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
// If there is an entry in the map return the SCEV of the pointer with the
// symbolic stride replaced by one.
- ValueToValueMap::const_iterator SI =
- PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+ ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
if (SI == PtrToStride.end())
// For a non-symbolic stride, just return the original expression.
return OrigSCEV;
@@ -659,7 +658,8 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
+ Type *AccessTy = Ptr->getType()->getPointerElementType();
+ int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@@ -1026,15 +1026,17 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// Check whether the access through \p Ptr has a constant stride.
-int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
- const Loop *Lp, const ValueToValueMap &StridesMap,
- bool Assume, bool ShouldCheckWrap) {
+int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
+ Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap, bool Assume,
+ bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
+ unsigned AddrSpace = Ty->getPointerAddressSpace();
- // Make sure that the pointer does not point to aggregate types.
- auto *PtrTy = cast<PointerType>(Ty);
- if (PtrTy->getElementType()->isAggregateType()) {
+ // Make sure we're not accessing an aggregate type.
+ // TODO: Why? This doesn't make any sense.
+ if (AccessTy->isAggregateType()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
@@ -1071,8 +1073,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
if (!IsNoWrapAddRec && !IsInBoundsGEP &&
- NullPointerIsDefined(Lp->getHeader()->getParent(),
- PtrTy->getAddressSpace())) {
+ NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
IsNoWrapAddRec = true;
@@ -1100,7 +1101,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
- int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+ int64_t Size = DL.getTypeAllocSize(AccessTy);
const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
@@ -1120,7 +1121,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// zero we know that this won't happen without triggering undefined behavior.
if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
(IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
- PtrTy->getAddressSpace()))) {
+ AddrSpace))) {
if (Assume) {
// We can avoid this case by adding a run-time check.
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
@@ -1262,6 +1263,47 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return Diff && *Diff == 1;
}
+static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
+ function_ref<void(Value *)> AddPointer) {
+ SmallPtrSet<Value *, 8> Visited;
+ SmallVector<Value *> WorkList;
+ WorkList.push_back(StartPtr);
+
+ while (!WorkList.empty()) {
+ Value *Ptr = WorkList.pop_back_val();
+ if (!Visited.insert(Ptr).second)
+ continue;
+ auto *PN = dyn_cast<PHINode>(Ptr);
+ // SCEV does not look through non-header PHIs inside the loop. Such phis
+ // can be analyzed by adding separate accesses for each incoming pointer
+ // value.
+ if (PN && InnermostLoop.contains(PN->getParent()) &&
+ PN->getParent() != InnermostLoop.getHeader()) {
+ for (const Use &Inc : PN->incoming_values())
+ WorkList.push_back(Inc);
+ } else
+ AddPointer(Ptr);
+ }
+}
+
+void MemoryDepChecker::addAccess(StoreInst *SI) {
+ visitPointers(SI->getPointerOperand(), *InnermostLoop,
+ [this, SI](Value *Ptr) {
+ Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
+ InstMap.push_back(SI);
+ ++AccessIdx;
+ });
+}
+
+void MemoryDepChecker::addAccess(LoadInst *LI) {
+ visitPointers(LI->getPointerOperand(), *InnermostLoop,
+ [this, LI](Value *Ptr) {
+ Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
+ InstMap.push_back(LI);
+ ++AccessIdx;
+ });
+}
+
MemoryDepChecker::VectorizationSafetyStatus
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
@@ -1478,6 +1520,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
@@ -1488,8 +1532,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
- int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
- int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
+ int64_t StrideAPtr =
+ getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
+ int64_t StrideBPtr =
+ getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1498,6 +1544,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// dependence.
if (StrideAPtr < 0) {
std::swap(APtr, BPtr);
+ std::swap(ATy, BTy);
std::swap(Src, Sink);
std::swap(AIsWrite, BIsWrite);
std::swap(AIdx, BIdx);
@@ -1519,8 +1566,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
uint64_t Stride = std::abs(StrideAPtr);
@@ -1958,7 +2003,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
- Accesses.addStore(Loc);
+ visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+ [&Accesses, Loc](Value *Ptr) {
+ MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
+ Accesses.addStore(NewLoc);
+ });
}
}
@@ -1982,7 +2031,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// words may be written to the same address.
bool IsReadOnlyPtr = false;
if (Seen.insert(Ptr).second ||
- !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
+ !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -2002,7 +2051,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
- Accesses.addLoad(Loc, IsReadOnlyPtr);
+ visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+ [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
+ MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
+ Accesses.addLoad(NewLoc, IsReadOnlyPtr);
+ });
}
// If we write (or read-write) to a single destination and there are no
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 8a613647bbea..7b895d8a5dc2 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -290,8 +291,8 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
const SCEV *Coeff = getLastCoefficient();
const SCEV *ElemSize = Sizes.back();
const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
- const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
+ const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
if (SE.isKnownNegative(Stride))
Stride = SE.getNegativeSCEV(Stride);
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
@@ -344,8 +345,8 @@ bool IndexedReference::delinearize(const LoopInfo &LI) {
LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
<< "', AccessFn: " << *AccessFn << "\n");
- SE.delinearize(AccessFn, Subscripts, Sizes,
- SE.getElementSize(&StoreOrLoadInst));
+ llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
+ SE.getElementSize(&StoreOrLoadInst));
if (Subscripts.empty() || Sizes.empty() ||
Subscripts.size() != Sizes.size()) {
@@ -425,9 +426,7 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
const SCEV *IndexedReference::getLastCoefficient() const {
const SCEV *LastSubscript = getLastSubscript();
- assert(isa<SCEVAddRecExpr>(LastSubscript) &&
- "Expecting a SCEV add recurrence expression");
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
+ auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
return AR->getStepRecurrence(SE);
}
@@ -522,10 +521,9 @@ void CacheCost::calculateCacheFootprint() {
LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
for (const Loop *L : Loops) {
- assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
- [L](const LoopCacheCostTy &LCC) {
- return LCC.first == L;
- }) == LoopCosts.end()) &&
+ assert(llvm::none_of(
+ LoopCosts,
+ [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) &&
"Should not add duplicate element");
CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
LoopCosts.push_back(std::make_pair(L, LoopCost));
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 66aab4c195c8..b35fb2a190f6 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -301,15 +301,16 @@ PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
if (!CmpInst)
return nullptr;
- Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
- Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+ Value *LatchCmpOp0 = CmpInst->getOperand(0);
+ Value *LatchCmpOp1 = CmpInst->getOperand(1);
for (PHINode &IndVar : Header->phis()) {
InductionDescriptor IndDesc;
if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
continue;
- Instruction *StepInst = IndDesc.getInductionBinOp();
+ BasicBlock *Latch = getLoopLatch();
+ Value *StepInst = IndVar.getIncomingValueForBlock(Latch);
// case 1:
// IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
@@ -1102,6 +1103,11 @@ llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
return IntMD->getSExtValue();
}
+int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
+ int Default) {
+ return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
+}
+
static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::hasMustProgress(const Loop *L) {
diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp
index 2649ed60f762..675bb7a7749c 100644
--- a/llvm/lib/Analysis/LoopNestAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp
@@ -50,8 +50,66 @@ std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root,
return std::make_unique<LoopNest>(Root, SE);
}
+static CmpInst *getOuterLoopLatchCmp(const Loop &OuterLoop) {
+
+ const BasicBlock *Latch = OuterLoop.getLoopLatch();
+ assert(Latch && "Expecting a valid loop latch");
+
+ const BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
+ assert(BI && BI->isConditional() &&
+ "Expecting loop latch terminator to be a branch instruction");
+
+ CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(BI->getCondition());
+ DEBUG_WITH_TYPE(
+ VerboseDebug, if (OuterLoopLatchCmp) {
+ dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp
+ << "\n";
+ });
+ return OuterLoopLatchCmp;
+}
+
+static CmpInst *getInnerLoopGuardCmp(const Loop &InnerLoop) {
+
+ BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch();
+ CmpInst *InnerLoopGuardCmp =
+ (InnerGuard) ? dyn_cast<CmpInst>(InnerGuard->getCondition()) : nullptr;
+
+ DEBUG_WITH_TYPE(
+ VerboseDebug, if (InnerLoopGuardCmp) {
+ dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp
+ << "\n";
+ });
+ return InnerLoopGuardCmp;
+}
+
+static bool checkSafeInstruction(const Instruction &I,
+ const CmpInst *InnerLoopGuardCmp,
+ const CmpInst *OuterLoopLatchCmp,
+ Optional<Loop::LoopBounds> OuterLoopLB) {
+
+ bool IsAllowed =
+ isSafeToSpeculativelyExecute(&I) || isa<PHINode>(I) || isa<BranchInst>(I);
+ if (!IsAllowed)
+ return false;
+ // The only binary instruction allowed is the outer loop step instruction,
+ // the only comparison instructions allowed are the inner loop guard
+ // compare instruction and the outer loop latch compare instruction.
+ if ((isa<BinaryOperator>(I) && &I != &OuterLoopLB->getStepInst()) ||
+ (isa<CmpInst>(I) && &I != OuterLoopLatchCmp && &I != InnerLoopGuardCmp)) {
+ return false;
+ }
+ return true;
+}
+
bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE) {
+ return (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE) ==
+ PerfectLoopNest);
+}
+
+LoopNest::LoopNestEnum LoopNest::analyzeLoopNestForPerfectNest(
+ const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) {
+
assert(!OuterLoop.isInnermost() && "Outer loop should have subloops");
assert(!InnerLoop.isOutermost() && "Inner loop should have a parent");
LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName()
@@ -66,7 +124,7 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
// the outer loop latch.
if (!checkLoopsStructure(OuterLoop, InnerLoop, SE)) {
LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure.\n");
- return false;
+ return InvalidLoopStructure;
}
// Bail out if we cannot retrieve the outer loop bounds.
@@ -74,33 +132,11 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
if (OuterLoopLB == None) {
LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: "
<< OuterLoop << "\n";);
- return false;
+ return OuterLoopLowerBoundUnknown;
}
- // Identify the outer loop latch comparison instruction.
- const BasicBlock *Latch = OuterLoop.getLoopLatch();
- assert(Latch && "Expecting a valid loop latch");
- const BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
- assert(BI && BI->isConditional() &&
- "Expecting loop latch terminator to be a branch instruction");
-
- const CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(BI->getCondition());
- DEBUG_WITH_TYPE(
- VerboseDebug, if (OuterLoopLatchCmp) {
- dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp
- << "\n";
- });
-
- // Identify the inner loop guard instruction.
- BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch();
- const CmpInst *InnerLoopGuardCmp =
- (InnerGuard) ? dyn_cast<CmpInst>(InnerGuard->getCondition()) : nullptr;
-
- DEBUG_WITH_TYPE(
- VerboseDebug, if (InnerLoopGuardCmp) {
- dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp
- << "\n";
- });
+ CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop);
+ CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop);
// Determine whether instructions in a basic block are one of:
// - the inner loop guard comparison
@@ -109,29 +145,15 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
// - a phi node, a cast or a branch
auto containsOnlySafeInstructions = [&](const BasicBlock &BB) {
return llvm::all_of(BB, [&](const Instruction &I) {
- bool isAllowed = isSafeToSpeculativelyExecute(&I) || isa<PHINode>(I) ||
- isa<BranchInst>(I);
- if (!isAllowed) {
- DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "Instruction: " << I << "\nin basic block: " << BB
- << " is considered unsafe.\n";
- });
- return false;
- }
-
- // The only binary instruction allowed is the outer loop step instruction,
- // the only comparison instructions allowed are the inner loop guard
- // compare instruction and the outer loop latch compare instruction.
- if ((isa<BinaryOperator>(I) && &I != &OuterLoopLB->getStepInst()) ||
- (isa<CmpInst>(I) && &I != OuterLoopLatchCmp &&
- &I != InnerLoopGuardCmp)) {
+ bool IsSafeInstr = checkSafeInstruction(I, InnerLoopGuardCmp,
+ OuterLoopLatchCmp, OuterLoopLB);
+ if (IsSafeInstr) {
DEBUG_WITH_TYPE(VerboseDebug, {
dbgs() << "Instruction: " << I << "\nin basic block:" << BB
<< "is unsafe.\n";
});
- return false;
}
- return true;
+ return IsSafeInstr;
});
};
@@ -148,13 +170,72 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
!containsOnlySafeInstructions(*InnerLoop.getExitBlock())) {
LLVM_DEBUG(dbgs() << "Not perfectly nested: code surrounding inner loop is "
"unsafe\n";);
- return false;
+ return ImperfectLoopNest;
}
LLVM_DEBUG(dbgs() << "Loop '" << OuterLoop.getName() << "' and '"
<< InnerLoop.getName() << "' are perfectly nested.\n");
- return true;
+ return PerfectLoopNest;
+}
+
+LoopNest::InstrVectorTy LoopNest::getInterveningInstructions(
+ const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) {
+ InstrVectorTy Instr;
+ switch (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE)) {
+ case PerfectLoopNest:
+ LLVM_DEBUG(dbgs() << "The loop Nest is Perfect, returning empty "
+ "instruction vector. \n";);
+ return Instr;
+
+ case InvalidLoopStructure:
+ LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure. "
+ "Instruction vector is empty.\n";);
+ return Instr;
+
+ case OuterLoopLowerBoundUnknown:
+ LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: "
+ << OuterLoop << "\nInstruction vector is empty.\n";);
+ return Instr;
+
+ case ImperfectLoopNest:
+ break;
+ }
+
+ // Identify the outer loop latch comparison instruction.
+ auto OuterLoopLB = OuterLoop.getBounds(SE);
+
+ CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop);
+ CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop);
+
+ auto GetUnsafeInstructions = [&](const BasicBlock &BB) {
+ for (const Instruction &I : BB) {
+ if (!checkSafeInstruction(I, InnerLoopGuardCmp, OuterLoopLatchCmp,
+ OuterLoopLB)) {
+ Instr.push_back(&I);
+ DEBUG_WITH_TYPE(VerboseDebug, {
+ dbgs() << "Instruction: " << I << "\nin basic block:" << BB
+ << "is unsafe.\n";
+ });
+ }
+ }
+ };
+
+ // Check the code surrounding the inner loop for instructions that are deemed
+ // unsafe.
+ const BasicBlock *OuterLoopHeader = OuterLoop.getHeader();
+ const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch();
+ const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader();
+ const BasicBlock *InnerLoopExitBlock = InnerLoop.getExitBlock();
+
+ GetUnsafeInstructions(*OuterLoopHeader);
+ GetUnsafeInstructions(*OuterLoopLatch);
+ GetUnsafeInstructions(*InnerLoopExitBlock);
+
+ if (InnerLoopPreHeader != OuterLoopHeader) {
+ GetUnsafeInstructions(*InnerLoopPreHeader);
+ }
+ return Instr;
}
SmallVector<LoopVectorTy, 4>
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 5b95ed223fd9..6fc4c42bdd71 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -116,6 +116,8 @@ MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
void MLInlineAdvisor::onPassEntry() {
// Function passes executed between InlinerPass runs may have changed the
// module-wide features.
+ if (!Invalid)
+ return;
NodeCount = 0;
EdgeCount = 0;
for (auto &F : M)
@@ -123,6 +125,7 @@ void MLInlineAdvisor::onPassEntry() {
++NodeCount;
EdgeCount += getLocalCalls(F);
}
+ Invalid = false;
}
int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 68e997656d84..4f2b5b34304d 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -111,7 +111,7 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_reallocf, {ReallocLike, 2, 1, -1}},
{LibFunc_strdup, {StrDupLike, 1, -1, -1}},
{LibFunc_strndup, {StrDupLike, 2, 1, -1}},
- {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1}}
+ {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1}},
// TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
@@ -135,9 +135,8 @@ static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast
return nullptr;
}
-/// Returns the allocation data for the given value if it's either a call to a
-/// known allocation function, or a call to a function with the allocsize
-/// attribute.
+/// Returns the allocation data for the given value if it's a call to a known
+/// allocation function.
static Optional<AllocFnsTy>
getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
const TargetLibraryInfo *TLI) {
@@ -610,7 +609,7 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
- Zero = APInt::getNullValue(IntTyBits);
+ Zero = APInt::getZero(IntTyBits);
V = V->stripPointerCasts();
if (Instruction *I = dyn_cast<Instruction>(V)) {
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index ef9cda37ce35..7f2d04c49565 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -35,54 +35,44 @@ void LocationSize::print(raw_ostream &OS) const {
}
MemoryLocation MemoryLocation::get(const LoadInst *LI) {
- AAMDNodes AATags;
- LI->getAAMetadata(AATags);
const auto &DL = LI->getModule()->getDataLayout();
return MemoryLocation(
LI->getPointerOperand(),
- LocationSize::precise(DL.getTypeStoreSize(LI->getType())), AATags);
+ LocationSize::precise(DL.getTypeStoreSize(LI->getType())),
+ LI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const StoreInst *SI) {
- AAMDNodes AATags;
- SI->getAAMetadata(AATags);
const auto &DL = SI->getModule()->getDataLayout();
return MemoryLocation(SI->getPointerOperand(),
LocationSize::precise(DL.getTypeStoreSize(
SI->getValueOperand()->getType())),
- AATags);
+ SI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
- AAMDNodes AATags;
- VI->getAAMetadata(AATags);
-
return MemoryLocation(VI->getPointerOperand(),
- LocationSize::afterPointer(), AATags);
+ LocationSize::afterPointer(), VI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
- AAMDNodes AATags;
- CXI->getAAMetadata(AATags);
const auto &DL = CXI->getModule()->getDataLayout();
return MemoryLocation(CXI->getPointerOperand(),
LocationSize::precise(DL.getTypeStoreSize(
CXI->getCompareOperand()->getType())),
- AATags);
+ CXI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
- AAMDNodes AATags;
- RMWI->getAAMetadata(AATags);
const auto &DL = RMWI->getModule()->getDataLayout();
return MemoryLocation(RMWI->getPointerOperand(),
LocationSize::precise(DL.getTypeStoreSize(
RMWI->getValOperand()->getType())),
- AATags);
+ RMWI->getAAMetadata());
}
Optional<MemoryLocation> MemoryLocation::getOrNone(const Instruction *Inst) {
@@ -117,10 +107,7 @@ MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- AAMDNodes AATags;
- MTI->getAAMetadata(AATags);
-
- return MemoryLocation(MTI->getRawSource(), Size, AATags);
+ return MemoryLocation(MTI->getRawSource(), Size, MTI->getAAMetadata());
}
MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
@@ -138,17 +125,13 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- AAMDNodes AATags;
- MI->getAAMetadata(AATags);
-
- return MemoryLocation(MI->getRawDest(), Size, AATags);
+ return MemoryLocation(MI->getRawDest(), Size, MI->getAAMetadata());
}
MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
unsigned ArgIdx,
const TargetLibraryInfo *TLI) {
- AAMDNodes AATags;
- Call->getAAMetadata(AATags);
+ AAMDNodes AATags = Call->getAAMetadata();
const Value *Arg = Call->getArgOperand(ArgIdx);
// We may be able to produce an exact size for known intrinsics.
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index b402b0467f5d..ac20e20f0c0d 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -90,22 +90,18 @@ bool llvm::VerifyMemorySSA = true;
#else
bool llvm::VerifyMemorySSA = false;
#endif
-/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
-cl::opt<bool> llvm::EnableMSSALoopDependency(
- "enable-mssa-loop-dependency", cl::Hidden, cl::init(true),
- cl::desc("Enable MemorySSA dependency for loop pass manager"));
static cl::opt<bool, true>
VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
cl::Hidden, cl::desc("Enable verification of MemorySSA."));
-namespace llvm {
+const static char LiveOnEntryStr[] = "liveOnEntry";
+
+namespace {
/// An assembly annotator class to print Memory SSA information in
/// comments.
class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
- friend class MemorySSA;
-
const MemorySSA *MSSA;
public:
@@ -124,7 +120,34 @@ public:
}
};
-} // end namespace llvm
+/// An assembly annotator class to print Memory SSA information in
+/// comments.
+class MemorySSAWalkerAnnotatedWriter : public AssemblyAnnotationWriter {
+ MemorySSA *MSSA;
+ MemorySSAWalker *Walker;
+
+public:
+ MemorySSAWalkerAnnotatedWriter(MemorySSA *M)
+ : MSSA(M), Walker(M->getWalker()) {}
+
+ void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override {
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
+ MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA);
+ OS << "; " << *MA;
+ if (Clobber) {
+ OS << " - clobbered by ";
+ if (MSSA->isLiveOnEntryDef(Clobber))
+ OS << LiveOnEntryStr;
+ else
+ OS << *Clobber;
+ }
+ OS << "\n";
+ }
+ }
+};
+
+} // namespace
namespace {
@@ -286,6 +309,7 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
case Intrinsic::invariant_end:
case Intrinsic::assume:
case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::pseudoprobe:
return {false, AliasResult(AliasResult::NoAlias)};
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare:
@@ -1016,7 +1040,8 @@ public:
// updated if a new clobber is found by this SkipSelf search. If this
// additional query becomes heavily used we may decide to cache the result.
// Walker instantiations will decide how to set the SkipSelf bool.
- MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool);
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool,
+ bool UseInvariantGroup = true);
};
/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
@@ -1041,6 +1066,11 @@ public:
unsigned &UWL) {
return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
}
+ // This method is not accessible outside of this file.
+ MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(MemoryAccess *MA,
+ unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, UWL, false, false);
+ }
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
unsigned UpwardWalkLimit = MaxCheckLimit;
@@ -1437,10 +1467,13 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
unsigned UpwardWalkLimit = MaxCheckLimit;
while (UpperBound > LocInfo.LowerBound) {
if (isa<MemoryPhi>(VersionStack[UpperBound])) {
- // For phis, use the walker, see where we ended up, go there
+ // For phis, use the walker, see where we ended up, go there.
+ // The invariant.group handling in MemorySSA is ad-hoc and doesn't
+ // support updates, so don't use it to optimize uses.
MemoryAccess *Result =
- Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit);
- // We are guaranteed to find it or something is wrong
+ Walker->getClobberingMemoryAccessWithoutInvariantGroup(
+ MU, UpwardWalkLimit);
+ // We are guaranteed to find it or something is wrong.
while (VersionStack[UpperBound] != Result) {
assert(UpperBound != 0);
--UpperBound;
@@ -1750,6 +1783,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
break;
case Intrinsic::assume:
case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::pseudoprobe:
return nullptr;
}
}
@@ -1864,10 +1898,17 @@ void MemorySSA::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
#endif
-void MemorySSA::verifyMemorySSA() const {
- verifyOrderingDominationAndDefUses(F);
+void MemorySSA::verifyMemorySSA(VerificationLevel VL) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+ VL = VerificationLevel::Full;
+#endif
+
+#ifndef NDEBUG
+ verifyOrderingDominationAndDefUses(F, VL);
verifyDominationNumbers(F);
- verifyPrevDefInPhis(F);
+ if (VL == VerificationLevel::Full)
+ verifyPrevDefInPhis(F);
+#endif
// Previously, the verification used to also verify that the clobberingAccess
// cached by MemorySSA is the same as the clobberingAccess found at a later
// query to AA. This does not hold true in general due to the current fragility
@@ -1881,7 +1922,6 @@ void MemorySSA::verifyMemorySSA() const {
}
void MemorySSA::verifyPrevDefInPhis(Function &F) const {
-#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
for (const BasicBlock &BB : F) {
if (MemoryPhi *Phi = getMemoryAccess(&BB)) {
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
@@ -1896,6 +1936,8 @@ void MemorySSA::verifyPrevDefInPhis(Function &F) const {
auto *LastAcc = &*(--DefList->end());
assert(LastAcc == IncAcc &&
"Incorrect incoming access into phi.");
+ (void)IncAcc;
+ (void)LastAcc;
break;
}
DTNode = DTNode->getIDom();
@@ -1911,13 +1953,11 @@ void MemorySSA::verifyPrevDefInPhis(Function &F) const {
}
}
}
-#endif
}
/// Verify that all of the blocks we believe to have valid domination numbers
/// actually have valid domination numbers.
void MemorySSA::verifyDominationNumbers(const Function &F) const {
-#ifndef NDEBUG
if (BlockNumberingValid.empty())
return;
@@ -1943,13 +1983,13 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
unsigned long ThisNumber = ThisNumberIter->second;
assert(ThisNumber > LastNumber &&
"Domination numbers should be strictly increasing!");
+ (void)LastNumber;
LastNumber = ThisNumber;
}
}
assert(ValidBlocks.empty() &&
"All valid BasicBlocks should exist in F -- dangling pointers?");
-#endif
}
/// Verify ordering: the order and existence of MemoryAccesses matches the
@@ -1958,8 +1998,8 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
/// Verify def-uses: the immediate use information - walk all the memory
/// accesses and verifying that, for each use, it appears in the appropriate
/// def's use list
-void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
-#if !defined(NDEBUG)
+void MemorySSA::verifyOrderingDominationAndDefUses(Function &F,
+ VerificationLevel VL) const {
// Walk all the blocks, comparing what the lookups think and what the access
// lists think, as well as the order in the blocks vs the order in the access
// lists.
@@ -1974,19 +2014,21 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
ActualAccesses.push_back(Phi);
ActualDefs.push_back(Phi);
// Verify domination
- for (const Use &U : Phi->uses())
+ for (const Use &U : Phi->uses()) {
assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses");
-#if defined(EXPENSIVE_CHECKS)
- // Verify def-uses.
- assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
- pred_begin(&B), pred_end(&B))) &&
- "Incomplete MemoryPhi Node");
- for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
- verifyUseInDefs(Phi->getIncomingValue(I), Phi);
- assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
- "Incoming phi block not a block predecessor");
+ (void)U;
+ }
+ // Verify def-uses for full verify.
+ if (VL == VerificationLevel::Full) {
+ assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
+ pred_begin(&B), pred_end(&B))) &&
+ "Incomplete MemoryPhi Node");
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+ assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
+ "Incoming phi block not a block predecessor");
+ }
}
-#endif
}
for (Instruction &I : B) {
@@ -2002,14 +2044,15 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
// Verify ordering.
ActualDefs.push_back(MA);
// Verify domination.
- for (const Use &U : MD->uses())
+ for (const Use &U : MD->uses()) {
assert(dominates(MD, U) &&
"Memory Def does not dominate it's uses");
+ (void)U;
+ }
}
-#if defined(EXPENSIVE_CHECKS)
- // Verify def-uses.
- verifyUseInDefs(MA->getDefiningAccess(), MA);
-#endif
+ // Verify def-uses for full verify.
+ if (VL == VerificationLevel::Full)
+ verifyUseInDefs(MA->getDefiningAccess(), MA);
}
}
// Either we hit the assert, really have no accesses, or we have both
@@ -2044,13 +2087,11 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
}
ActualDefs.clear();
}
-#endif
}
/// Verify the def-use lists in MemorySSA, by verifying that \p Use
/// appears in the use list of \p Def.
void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
-#ifndef NDEBUG
// The live on entry use may cause us to get a NULL def here
if (!Def)
assert(isLiveOnEntryDef(Use) &&
@@ -2058,7 +2099,6 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
else
assert(is_contained(Def->users(), Use) &&
"Did not find use in def's use list");
-#endif
}
/// Perform a local numbering on blocks so that instruction ordering can be
@@ -2138,8 +2178,6 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator,
return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
}
-const static char LiveOnEntryStr[] = "liveOnEntry";
-
void MemoryAccess::print(raw_ostream &OS) const {
switch (getValueID()) {
case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
@@ -2355,6 +2393,16 @@ PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
return PreservedAnalyses::all();
}
+PreservedAnalyses MemorySSAWalkerPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ OS << "MemorySSA (walker) for function: " << F.getName() << "\n";
+ MemorySSAWalkerAnnotatedWriter Writer(&MSSA);
+ F.print(OS, &Writer);
+
+ return PreservedAnalyses::all();
+}
+
PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
FunctionAnalysisManager &AM) {
AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA();
@@ -2438,15 +2486,88 @@ MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
return Clobber;
}
+static const Instruction *
+getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) {
+ if (!I.hasMetadata(LLVMContext::MD_invariant_group) || I.isVolatile())
+ return nullptr;
+
+ // We consider bitcasts and zero GEPs to be the same pointer value. Start by
+ // stripping bitcasts and zero GEPs, then we will recursively look at loads
+ // and stores through bitcasts and zero GEPs.
+ Value *PointerOperand = getLoadStorePointerOperand(&I)->stripPointerCasts();
+
+ // It's not safe to walk the use list of a global value because function
+ // passes aren't allowed to look outside their functions.
+ // FIXME: this could be fixed by filtering instructions from outside of
+ // current function.
+ if (isa<Constant>(PointerOperand))
+ return nullptr;
+
+ // Queue to process all pointers that are equivalent to load operand.
+ SmallVector<const Value *, 8> PointerUsesQueue;
+ PointerUsesQueue.push_back(PointerOperand);
+
+ const Instruction *MostDominatingInstruction = &I;
+
+ // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case
+ // we will see all the instructions. It may not matter in practice. If it
+ // does, we will have to support MemorySSA construction and updates.
+ while (!PointerUsesQueue.empty()) {
+ const Value *Ptr = PointerUsesQueue.pop_back_val();
+ assert(Ptr && !isa<GlobalValue>(Ptr) &&
+ "Null or GlobalValue should not be inserted");
+
+ for (const User *Us : Ptr->users()) {
+ auto *U = dyn_cast<Instruction>(Us);
+ if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction))
+ continue;
+
+ // Add bitcasts and zero GEPs to queue.
+ if (isa<BitCastInst>(U)) {
+ PointerUsesQueue.push_back(U);
+ continue;
+ }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEP->hasAllZeroIndices())
+ PointerUsesQueue.push_back(U);
+ continue;
+ }
+
+ // If we hit a load/store with an invariant.group metadata and the same
+ // pointer operand, we can assume that value pointed to by the pointer
+ // operand didn't change.
+ if (U->hasMetadata(LLVMContext::MD_invariant_group) &&
+ getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) {
+ MostDominatingInstruction = U;
+ }
+ }
+ }
+ return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction;
+}
+
template <typename AliasAnalysisType>
MemoryAccess *
MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
- MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) {
+ MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf,
+ bool UseInvariantGroup) {
auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
// If this is a MemoryPhi, we can't do anything.
if (!StartingAccess)
return MA;
+ if (UseInvariantGroup) {
+ if (auto *I = getInvariantGroupClobberingInstruction(
+ *StartingAccess->getMemoryInst(), MSSA->getDomTree())) {
+ assert(isa<LoadInst>(I) || isa<StoreInst>(I));
+
+ auto *ClobberMA = MSSA->getMemoryAccess(I);
+ assert(ClobberMA);
+ if (isa<MemoryUse>(ClobberMA))
+ return ClobberMA->getDefiningAccess();
+ return ClobberMA;
+ }
+ }
+
bool IsOptimized = false;
// If this is an already optimized use or def, return the optimized result.
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 616864f360bf..9c841883de6d 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -296,9 +296,8 @@ static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
assert(i != -1 && "Should have found the basic block in the phi");
// We can't just compare i against getNumOperands since one is signed and the
// other not. So use it to index into the block iterator.
- for (auto BBIter = MP->block_begin() + i; BBIter != MP->block_end();
- ++BBIter) {
- if (*BBIter != BB)
+ for (const BasicBlock *BlockBB : llvm::drop_begin(MP->blocks(), i)) {
+ if (BlockBB != BB)
break;
MP->setIncomingValue(i, NewDef);
++i;
@@ -491,8 +490,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
}
while (!Worklist.empty()) {
- const BasicBlock *FixupBlock = Worklist.back();
- Worklist.pop_back();
+ const BasicBlock *FixupBlock = Worklist.pop_back_val();
// Get the first def in the block that isn't a phi node.
if (auto *Defs = MSSA->getWritableBlockDefs(FixupBlock)) {
@@ -822,25 +820,30 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
}
if (!DeleteUpdates.empty()) {
- if (!UpdateDT) {
- SmallVector<CFGUpdate, 0> Empty;
- // Deletes are reversed applied, because this CFGView is pretending the
- // deletes did not happen yet, hence the edges still exist.
- DT.applyUpdates(Empty, RevDeleteUpdates);
+ if (!InsertUpdates.empty()) {
+ if (!UpdateDT) {
+ SmallVector<CFGUpdate, 0> Empty;
+ // Deletes are reversed applied, because this CFGView is pretending the
+ // deletes did not happen yet, hence the edges still exist.
+ DT.applyUpdates(Empty, RevDeleteUpdates);
+ } else {
+ // Apply all updates, with the RevDeleteUpdates as PostCFGView.
+ DT.applyUpdates(Updates, RevDeleteUpdates);
+ }
+
+ // Note: the MSSA update below doesn't distinguish between a GD with
+ // (RevDelete,false) and (Delete, true), but this matters for the DT
+ // updates above; for "children" purposes they are equivalent; but the
+ // updates themselves convey the desired update, used inside DT only.
+ GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
+ applyInsertUpdates(InsertUpdates, DT, &GD);
+ // Update DT to redelete edges; this matches the real CFG so we can
+ // perform the standard update without a postview of the CFG.
+ DT.applyUpdates(DeleteUpdates);
} else {
- // Apply all updates, with the RevDeleteUpdates as PostCFGView.
- DT.applyUpdates(Updates, RevDeleteUpdates);
+ if (UpdateDT)
+ DT.applyUpdates(DeleteUpdates);
}
-
- // Note: the MSSA update below doesn't distinguish between a GD with
- // (RevDelete,false) and (Delete, true), but this matters for the DT
- // updates above; for "children" purposes they are equivalent; but the
- // updates themselves convey the desired update, used inside DT only.
- GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
- applyInsertUpdates(InsertUpdates, DT, &GD);
- // Update DT to redelete edges; this matches the real CFG so we can perform
- // the standard update without a postview of the CFG.
- DT.applyUpdates(DeleteUpdates);
} else {
if (UpdateDT)
DT.applyUpdates(Updates);
@@ -1131,11 +1134,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) {
for (auto &DefToReplaceUses : *DefsList) {
BasicBlock *DominatingBlock = DefToReplaceUses.getBlock();
- Value::use_iterator UI = DefToReplaceUses.use_begin(),
- E = DefToReplaceUses.use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(DefToReplaceUses.uses())) {
MemoryAccess *Usr = cast<MemoryAccess>(U.getUser());
if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index e43553222128..d80814852e19 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -264,11 +264,27 @@ static void computeFunctionSummary(
std::vector<const Instruction *> NonVolatileStores;
bool HasInlineAsmMaybeReferencingInternal = false;
- for (const BasicBlock &BB : F)
+ bool HasIndirBranchToBlockAddress = false;
+ bool HasUnknownCall = false;
+ bool MayThrow = false;
+ for (const BasicBlock &BB : F) {
+ // We don't allow inlining of function with indirect branch to blockaddress.
+ // If the blockaddress escapes the function, e.g., via a global variable,
+ // inlining may lead to an invalid cross-function reference. So we shouldn't
+ // import such function either.
+ if (BB.hasAddressTaken()) {
+ for (User *U : BlockAddress::get(const_cast<BasicBlock *>(&BB))->users())
+ if (!isa<CallBrInst>(*U)) {
+ HasIndirBranchToBlockAddress = true;
+ break;
+ }
+ }
+
for (const Instruction &I : BB) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
++NumInsts;
+
// Regular LTO module doesn't participate in ThinLTO import,
// so no reference from it can be read/writeonly, since this
// would require importing variable as local copy
@@ -300,8 +316,11 @@ static void computeFunctionSummary(
}
findRefEdges(Index, &I, RefEdges, Visited);
const auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
+ if (!CB) {
+ if (I.mayThrow())
+ MayThrow = true;
continue;
+ }
const auto *CI = dyn_cast<CallInst>(&I);
// Since we don't know exactly which local values are referenced in inline
@@ -323,7 +342,7 @@ static void computeFunctionSummary(
// called aliasee for the checks below.
if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
assert(!CalledFunction && "Expected null called function in callsite for alias");
- CalledFunction = dyn_cast<Function>(GA->getBaseObject());
+ CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
}
// Check if this is a direct call to a known function or a known
// intrinsic, or an indirect call with profile data.
@@ -357,6 +376,7 @@ static void computeFunctionSummary(
ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
}
} else {
+ HasUnknownCall = true;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
continue;
@@ -386,6 +406,7 @@ static void computeFunctionSummary(
.updateHotness(getHotness(Candidate.Count, PSI));
}
}
+ }
Index.addBlockCount(F.size());
std::vector<ValueInfo> Refs;
@@ -452,8 +473,9 @@ static void computeFunctionSummary(
: CalleeInfo::HotnessType::Critical);
bool NonRenamableLocal = isNonRenamableLocal(F);
- bool NotEligibleForImport =
- NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
+ bool NotEligibleForImport = NonRenamableLocal ||
+ HasInlineAsmMaybeReferencingInternal ||
+ HasIndirBranchToBlockAddress;
GlobalValueSummary::GVFlags Flags(
F.getLinkage(), F.getVisibility(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal(),
@@ -464,8 +486,9 @@ static void computeFunctionSummary(
F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
// FIXME: refactor this to use the same code that inliner is using.
// Don't try to import functions with noinline attribute.
- F.getAttributes().hasFnAttribute(Attribute::NoInline),
- F.hasFnAttribute(Attribute::AlwaysInline)};
+ F.getAttributes().hasFnAttr(Attribute::NoInline),
+ F.hasFnAttribute(Attribute::AlwaysInline),
+ F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall};
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
if (auto *SSI = GetSSICallback(F))
ParamAccesses = SSI->getParamAccesses(Index);
@@ -622,7 +645,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
/* Live = */ false, A.isDSOLocal(),
A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
auto AS = std::make_unique<AliasSummary>(Flags);
- auto *Aliasee = A.getBaseObject();
+ auto *Aliasee = A.getAliaseeObject();
auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
assert(AliaseeVI && "Alias expects aliasee summary to be available");
assert(AliaseeVI.getSummaryList().size() == 1 &&
@@ -711,7 +734,10 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
F->hasFnAttribute(Attribute::NoRecurse),
F->returnDoesNotAlias(),
/* NoInline = */ false,
- F->hasFnAttribute(Attribute::AlwaysInline)},
+ F->hasFnAttribute(Attribute::AlwaysInline),
+ F->hasFnAttribute(Attribute::NoUnwind),
+ /* MayThrow */ true,
+ /* HasUnknownCall */ true},
/*EntryCount=*/0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
diff --git a/llvm/lib/Analysis/ObjCARCInstKind.cpp b/llvm/lib/Analysis/ObjCARCInstKind.cpp
index 704d15f3280d..f74a9f7f104f 100644
--- a/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -296,9 +296,8 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
// operand isn't actually being dereferenced, it is being stored to
// memory where we can no longer track who might read it and dereference
// it, so we have to consider it potentially used.
- for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- if (IsPotentialRetainableObjPtr(*OI))
+ for (const Use &U : I->operands())
+ if (IsPotentialRetainableObjPtr(U))
return ARCInstKind::User;
}
}
diff --git a/llvm/lib/Analysis/OverflowInstAnalysis.cpp b/llvm/lib/Analysis/OverflowInstAnalysis.cpp
index 9f17d5b2064d..87a85e6a7364 100644
--- a/llvm/lib/Analysis/OverflowInstAnalysis.cpp
+++ b/llvm/lib/Analysis/OverflowInstAnalysis.cpp
@@ -69,4 +69,4 @@ bool llvm::isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1,
bool IsAnd) {
Use *Y;
return isCheckForZeroAndMulWithOverflow(Op0, Op1, IsAnd, Y);
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index 7f77ab146c4c..c73e1fd82915 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -226,8 +226,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(),
- GEPOps, {DL, TLI, DT, AC})) {
+ if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps,
+ GEP->isInBounds(), {DL, TLI, DT, AC})) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 6dda0bf0a1b4..268ed9d04741 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -103,7 +103,7 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const {
// FIXME: The heuristic used below for determining hotness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return FunctionCount && isHotCount(FunctionCount.getCount());
+ return FunctionCount && isHotCount(FunctionCount->getCount());
}
/// Returns true if the function contains hot code. This can include a hot
@@ -116,7 +116,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(
if (!F || !hasProfileSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
- if (isHotCount(FunctionCount.getCount()))
+ if (isHotCount(FunctionCount->getCount()))
return true;
if (hasSampleProfile()) {
@@ -145,7 +145,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(
if (!F || !hasProfileSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
- if (!isColdCount(FunctionCount.getCount()))
+ if (!isColdCount(FunctionCount->getCount()))
return false;
if (hasSampleProfile()) {
@@ -176,10 +176,10 @@ bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
return false;
if (auto FunctionCount = F->getEntryCount()) {
if (isHot &&
- isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+ isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
return true;
if (!isHot &&
- !isColdCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+ !isColdCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
return false;
}
if (hasSampleProfile()) {
@@ -230,7 +230,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const {
// FIXME: The heuristic used below for determining coldness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return FunctionCount && isColdCount(FunctionCount.getCount());
+ return FunctionCount && isColdCount(FunctionCount->getCount());
}
/// Compute the hot and cold thresholds.
@@ -316,11 +316,11 @@ bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff,
}
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const {
- return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX;
+ return HotCountThreshold.getValueOr(UINT64_MAX);
}
uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const {
- return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
+ return ColdCountThreshold.getValueOr(0);
}
bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB,
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
index b9dac2f3ff11..f83d8b0fd230 100644
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -17,18 +17,21 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/LineIterator.h"
+#include <memory>
using namespace llvm;
-#define DEBUG_TYPE "inline-replay"
+#define DEBUG_TYPE "replay-inline"
ReplayInlineAdvisor::ReplayInlineAdvisor(
Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
- std::unique_ptr<InlineAdvisor> OriginalAdvisor, StringRef RemarksFile,
- bool EmitRemarks)
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
: InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
- HasReplayRemarks(false), EmitRemarks(EmitRemarks) {
- auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
+ HasReplayRemarks(false), ReplaySettings(ReplaySettings),
+ EmitRemarks(EmitRemarks) {
+
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile);
std::error_code EC = BufferOrErr.getError();
if (EC) {
Context.emitError("Could not open remarks file: " + EC.message());
@@ -36,47 +39,112 @@ ReplayInlineAdvisor::ReplayInlineAdvisor(
}
// Example for inline remarks to parse:
- // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1
+ // main:3:1.1: '_Z3subii' inlined into 'main' at callsite sum:1 @
+ // main:3:1.1;
// We use the callsite string after `at callsite` to replay inlining.
line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
+ const std::string PositiveRemark = "' inlined into '";
+ const std::string NegativeRemark = "' will not be inlined into '";
+
for (; !LineIt.is_at_eof(); ++LineIt) {
StringRef Line = *LineIt;
auto Pair = Line.split(" at callsite ");
- auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second;
+ bool IsPositiveRemark = true;
+ if (Pair.first.contains(NegativeRemark))
+ IsPositiveRemark = false;
+
+ auto CalleeCaller =
+ Pair.first.split(IsPositiveRemark ? PositiveRemark : NegativeRemark);
+
+ StringRef Callee = CalleeCaller.first.rsplit(": '").second;
+ StringRef Caller = CalleeCaller.second.rsplit("'").first;
auto CallSite = Pair.second.split(";").first;
- if (Callee.empty() || CallSite.empty())
- continue;
+ if (Callee.empty() || Caller.empty() || CallSite.empty()) {
+ Context.emitError("Invalid remark format: " + Line);
+ return;
+ }
std::string Combined = (Callee + CallSite).str();
- InlineSitesFromRemarks.insert(Combined);
+ InlineSitesFromRemarks[Combined] = IsPositiveRemark;
+ if (ReplaySettings.ReplayScope == ReplayInlinerSettings::Scope::Function)
+ CallersToReplay.insert(Caller);
}
HasReplayRemarks = true;
}
+std::unique_ptr<InlineAdvisor> llvm::getReplayInlineAdvisor(
+ Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) {
+ auto Advisor = std::make_unique<ReplayInlineAdvisor>(
+ M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks);
+ if (!Advisor->areReplayRemarksLoaded())
+ Advisor.reset();
+ return Advisor;
+}
+
std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) {
assert(HasReplayRemarks);
Function &Caller = *CB.getCaller();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
- if (InlineSitesFromRemarks.empty())
- return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
- EmitRemarks);
+ // Decision not made by replay system
+ if (!hasInlineAdvice(*CB.getFunction())) {
+ // If there's a registered original advisor, return its decision
+ if (OriginalAdvisor)
+ return OriginalAdvisor->getAdvice(CB);
- std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
+ // If no decision is made above, return non-decision
+ return {};
+ }
+
+ std::string CallSiteLoc =
+ formatCallSiteLocation(CB.getDebugLoc(), ReplaySettings.ReplayFormat);
StringRef Callee = CB.getCalledFunction()->getName();
std::string Combined = (Callee + CallSiteLoc).str();
- auto Iter = InlineSitesFromRemarks.find(Combined);
- Optional<InlineCost> InlineRecommended = None;
+ // Replay decision, if it has one
+ auto Iter = InlineSitesFromRemarks.find(Combined);
if (Iter != InlineSitesFromRemarks.end()) {
- InlineRecommended = llvm::InlineCost::getAlways("found in replay");
+ if (InlineSitesFromRemarks[Combined]) {
+ LLVM_DEBUG(dbgs() << "Replay Inliner: Inlined " << Callee << " @ "
+ << CallSiteLoc << "\n");
+ return std::make_unique<DefaultInlineAdvice>(
+ this, CB, llvm::InlineCost::getAlways("previously inlined"), ORE,
+ EmitRemarks);
+ } else {
+ LLVM_DEBUG(dbgs() << "Replay Inliner: Not Inlined " << Callee << " @ "
+ << CallSiteLoc << "\n");
+ // A negative inline is conveyed by "None" Optional<InlineCost>
+ return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
+ EmitRemarks);
+ }
+ }
+
+ // Fallback decisions
+ if (ReplaySettings.ReplayFallback ==
+ ReplayInlinerSettings::Fallback::AlwaysInline)
+ return std::make_unique<DefaultInlineAdvice>(
+ this, CB, llvm::InlineCost::getAlways("AlwaysInline Fallback"), ORE,
+ EmitRemarks);
+ else if (ReplaySettings.ReplayFallback ==
+ ReplayInlinerSettings::Fallback::NeverInline)
+ // A negative inline is conveyed by "None" Optional<InlineCost>
+ return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
+ EmitRemarks);
+ else {
+ assert(ReplaySettings.ReplayFallback ==
+ ReplayInlinerSettings::Fallback::Original);
+ // If there's a registered original advisor, return its decision
+ if (OriginalAdvisor)
+ return OriginalAdvisor->getAdvice(CB);
}
- return std::make_unique<DefaultInlineAdvice>(this, CB, InlineRecommended, ORE,
- EmitRemarks);
+ // If no decision is made above, return non-decision
+ return {};
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f22d834b5e57..f7c22cfb0310 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -139,8 +139,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "scalar-evolution"
-STATISTIC(NumArrayLenItCounts,
- "Number of trip counts computed with array length");
STATISTIC(NumTripCountsComputed,
"Number of loops with predictable loop counts");
STATISTIC(NumTripCountsNotComputed,
@@ -1100,7 +1098,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op,
SCEV *S = new (SCEVAllocator)
SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1220,7 +1218,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
SCEV *S =
new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1274,7 +1272,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1603,7 +1601,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1872,7 +1870,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1911,7 +1909,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -2108,7 +2106,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, { Op });
return S;
}
@@ -2390,6 +2388,24 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
}
}
+ // <0,+,nonnegative><nw> is also nuw
+ // TODO: Add corresponding nsw case
+ if (Type == scAddRecExpr && ScalarEvolution::hasFlags(Flags, SCEV::FlagNW) &&
+ !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && Ops.size() == 2 &&
+ Ops[0]->isZero() && IsKnownNonNegative(Ops[1]))
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+
+ // both (udiv X, Y) * Y and Y * (udiv X, Y) are always NUW
+ if (Type == scMulExpr && !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) &&
+ Ops.size() == 2) {
+ if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[0]))
+ if (UDiv->getOperand(1) == Ops[1])
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[1]))
+ if (UDiv->getOperand(1) == Ops[0])
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ }
+
return Flags;
}
@@ -2449,7 +2465,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
- if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) {
+ if (SCEV *S = findExistingSCEVInCache(scAddExpr, Ops)) {
// Don't strengthen flags if we have no new information.
SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S);
if (Add->getNoWrapFlags(OrigFlags) != OrigFlags)
@@ -2562,8 +2578,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
APInt ConstAdd = C1 + C2;
auto AddFlags = AddExpr->getNoWrapFlags();
// Adding a smaller constant is NUW if the original AddExpr was NUW.
- if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNUW) ==
- SCEV::FlagNUW &&
+ if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNUW) &&
ConstAdd.ule(C1)) {
PreservedFlags =
ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW);
@@ -2571,8 +2586,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Adding a constant with the same sign and small magnitude is NSW, if the
// original AddExpr was NSW.
- if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNSW) ==
- SCEV::FlagNSW &&
+ if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNSW) &&
C1.isSignBitSet() == ConstAdd.isSignBitSet() &&
ConstAdd.abs().ule(C1.abs())) {
PreservedFlags =
@@ -2580,14 +2594,26 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
if (PreservedFlags != SCEV::FlagAnyWrap) {
- SmallVector<const SCEV *, 4> NewOps(AddExpr->op_begin(),
- AddExpr->op_end());
+ SmallVector<const SCEV *, 4> NewOps(AddExpr->operands());
NewOps[0] = getConstant(ConstAdd);
return getAddExpr(NewOps, PreservedFlags);
}
}
}
+ // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)
+ if (Ops.size() == 2) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[0]);
+ if (Mul && Mul->getNumOperands() == 2 &&
+ Mul->getOperand(0)->isAllOnesValue()) {
+ const SCEV *X;
+ const SCEV *Y;
+ if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) {
+ return getMulExpr(Y, getUDivExpr(X, Y));
+ }
+ }
+ }
+
// Skip past any other cast SCEVs.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
++Idx;
@@ -2766,7 +2792,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// Compute nowrap flags for the addition of the loop-invariant ops and
- // the addrec. Temporarily push it as an operand for that purpose.
+ // the addrec. Temporarily push it as an operand for that purpose. These
+ // flags are valid in the scope of the addrec only.
LIOps.push_back(AddRec);
SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
LIOps.pop_back();
@@ -2775,10 +2802,25 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
LIOps.push_back(AddRec->getStart());
SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
- // This follows from the fact that the no-wrap flags on the outer add
- // expression are applicable on the 0th iteration, when the add recurrence
- // will be equal to its start value.
- AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
+
+ // It is not in general safe to propagate flags valid on an add within
+ // the addrec scope to one outside it. We must prove that the inner
+ // scope is guaranteed to execute if the outer one does to be able to
+ // safely propagate. We know the program is undefined if poison is
+ // produced on the inner scoped addrec. We also know that *for this use*
+ // the outer scoped add can't overflow (because of the flags we just
+ // computed for the inner scoped add) without the program being undefined.
+ // Proving that entry to the outer scope neccesitates entry to the inner
+ // scope, thus proves the program undefined if the flags would be violated
+ // in the outer scope.
+ SCEV::NoWrapFlags AddFlags = Flags;
+ if (AddFlags != SCEV::FlagAnyWrap) {
+ auto *DefI = getDefiningScopeBound(LIOps);
+ auto *ReachI = &*AddRecLoop->getHeader()->begin();
+ if (!isGuaranteedToTransferExecutionTo(DefI, ReachI))
+ AddFlags = SCEV::FlagAnyWrap;
+ }
+ AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1);
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer add and the inner addrec are guaranteed to have no overflow.
@@ -2862,7 +2904,7 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
S = new (SCEVAllocator)
SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Ops);
}
S->setNoWrapFlags(Flags);
return S;
@@ -2885,7 +2927,8 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
S = new (SCEVAllocator)
SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ LoopUsers[L].push_back(S);
+ registerUser(S, Ops);
}
setNoWrapFlags(S, Flags);
return S;
@@ -2907,7 +2950,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Ops);
}
S->setNoWrapFlags(Flags);
return S;
@@ -3022,7 +3065,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
- if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
+ if (SCEV *S = findExistingSCEVInCache(scMulExpr, Ops)) {
// Don't strengthen flags if we have no new information.
SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S);
if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
@@ -3416,7 +3459,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, {LHS, RHS});
return S;
}
@@ -3593,13 +3636,21 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// getSCEV(Base)->getType() has the same address space as Base->getType()
// because SCEV::getType() preserves the address space.
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
- // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
- // instruction to its SCEV, because the Instruction may be guarded by control
- // flow and the no-overflow bits may not be valid for the expression in any
- // context. This can be fixed similarly to how these flags are handled for
- // adds.
+ const bool AssumeInBoundsFlags = [&]() {
+ if (!GEP->isInBounds())
+ return false;
+
+ // We'd like to propagate flags from the IR to the corresponding SCEV nodes,
+ // but to do that, we have to ensure that said flag is valid in the entire
+ // defined scope of the SCEV.
+ auto *GEPI = dyn_cast<Instruction>(GEP);
+ // TODO: non-instructions have global scope. We might be able to prove
+ // some global scope cases
+ return GEPI && isSCEVExprNeverPoison(GEPI);
+ }();
+
SCEV::NoWrapFlags OffsetWrap =
- GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+ AssumeInBoundsFlags ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
Type *CurTy = GEP->getType();
bool FirstIter = true;
@@ -3645,21 +3696,22 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// Add the base address and the offset. We cannot use the nsw flag, as the
// base address is unsigned. However, if we know that the offset is
// non-negative, we can use nuw.
- SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
+ SCEV::NoWrapFlags BaseWrap = AssumeInBoundsFlags && isKnownNonNegative(Offset)
? SCEV::FlagNUW : SCEV::FlagAnyWrap;
- return getAddExpr(BaseExpr, Offset, BaseWrap);
+ auto *GEPExpr = getAddExpr(BaseExpr, Offset, BaseWrap);
+ assert(BaseExpr->getType() == GEPExpr->getType() &&
+ "GEP should not change type mid-flight.");
+ return GEPExpr;
}
-std::tuple<SCEV *, FoldingSetNodeID, void *>
-ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
- ArrayRef<const SCEV *> Ops) {
+SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
+ ArrayRef<const SCEV *> Ops) {
FoldingSetNodeID ID;
- void *IP = nullptr;
ID.AddInteger(SCEVType);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- return std::tuple<SCEV *, FoldingSetNodeID, void *>(
- UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
+ void *IP = nullptr;
+ return UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
}
const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
@@ -3689,7 +3741,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
GroupByComplexity(Ops, &LI, DT);
// Check if we have created the same expression before.
- if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
+ if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) {
return S;
}
@@ -3787,10 +3839,12 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
// Okay, it looks like we really DO need an expr. Check to see if we
// already have one, otherwise create a new one.
- const SCEV *ExistingSCEV;
FoldingSetNodeID ID;
- void *IP;
- std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
+ ID.AddInteger(Kind);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = nullptr;
+ const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
if (ExistingSCEV)
return ExistingSCEV;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
@@ -3799,7 +3853,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Ops);
return S;
}
@@ -3943,6 +3997,21 @@ Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
}
+bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A,
+ const SCEV *B) {
+ /// For a valid use point to exist, the defining scope of one operand
+ /// must dominate the other.
+ bool PreciseA, PreciseB;
+ auto *ScopeA = getDefiningScopeBound({A}, PreciseA);
+ auto *ScopeB = getDefiningScopeBound({B}, PreciseB);
+ if (!PreciseA || !PreciseB)
+ // Can't tell.
+ return false;
+ return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) ||
+ DT.dominates(ScopeB, ScopeA);
+}
+
+
const SCEV *ScalarEvolution::getCouldNotCompute() {
return CouldNotCompute.get();
}
@@ -4025,24 +4094,6 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
}
-/// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursively
-/// but this is better than nothing.
-static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (isa<OverflowingBinaryOperator>(I)) {
- if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
- if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
- return true;
- if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
- return true;
- }
- } else if (isa<PossiblyExactOperator>(I) && I->isExact())
- return true;
- }
- return false;
-}
-
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4056,7 +4107,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
// ValueExprMap before insert S->{V, 0} into ExprValueMap.
std::pair<ValueExprMapType::iterator, bool> Pair =
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- if (Pair.second && !SCEVLostPoisonFlags(S, V)) {
+ if (Pair.second) {
ExprValueMap[S].insert({V, nullptr});
// If S == Stripped + Offset, add Stripped -> {V, Offset} into
@@ -4120,6 +4171,8 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) {
/// Return a SCEV corresponding to ~V = -1-V
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+ assert(!V->getType()->isPointerTy() && "Can't negate pointer");
+
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
@@ -4146,17 +4199,16 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
return getMinusSCEV(getMinusOne(Ty), V);
}
-/// Compute an expression equivalent to S - getPointerBase(S).
-static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) {
+const SCEV *ScalarEvolution::removePointerBase(const SCEV *P) {
assert(P->getType()->isPointerTy());
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(P)) {
// The base of an AddRec is the first operand.
SmallVector<const SCEV *> Ops{AddRec->operands()};
- Ops[0] = removePointerBase(SE, Ops[0]);
+ Ops[0] = removePointerBase(Ops[0]);
// Don't try to transfer nowrap flags for now. We could in some cases
// (for example, if pointer operand of the AddRec is a SCEVUnknown).
- return SE->getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
+ return getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
if (auto *Add = dyn_cast<SCEVAddExpr>(P)) {
// The base of an Add is the pointer operand.
@@ -4164,21 +4216,17 @@ static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) {
const SCEV **PtrOp = nullptr;
for (const SCEV *&AddOp : Ops) {
if (AddOp->getType()->isPointerTy()) {
- // If we find an Add with multiple pointer operands, treat it as a
- // pointer base to be consistent with getPointerBase. Eventually
- // we should be able to assert this is impossible.
- if (PtrOp)
- return SE->getZero(P->getType());
+ assert(!PtrOp && "Cannot have multiple pointer ops");
PtrOp = &AddOp;
}
}
- *PtrOp = removePointerBase(SE, *PtrOp);
+ *PtrOp = removePointerBase(*PtrOp);
// Don't try to transfer nowrap flags for now. We could in some cases
// (for example, if the pointer operand of the Add is a SCEVUnknown).
- return SE->getAddExpr(Ops);
+ return getAddExpr(Ops);
}
// Any other expression must be a pointer base.
- return SE->getZero(P->getType());
+ return getZero(P->getType());
}
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
@@ -4195,8 +4243,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
if (!LHS->getType()->isPointerTy() ||
getPointerBase(LHS) != getPointerBase(RHS))
return getCouldNotCompute();
- LHS = removePointerBase(this, LHS);
- RHS = removePointerBase(this, RHS);
+ LHS = removePointerBase(LHS);
+ RHS = removePointerBase(RHS);
}
// We represent LHS - RHS as LHS + (-1)*RHS. This transformation
@@ -4204,7 +4252,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
auto AddFlags = SCEV::FlagAnyWrap;
const bool RHSIsNotMinSigned =
!getSignedRangeMin(RHS).isMinSignedValue();
- if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+ if (hasFlags(Flags, SCEV::FlagNSW)) {
// Let M be the minimum representable signed value. Then (-1)*RHS
// signed-wraps if and only if RHS is M. That can happen even for
// a NSW subtraction because e.g. (-1)*M signed-wraps even though
@@ -4359,14 +4407,11 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
const SCEV *PtrOp = nullptr;
for (const SCEV *AddOp : Add->operands()) {
if (AddOp->getType()->isPointerTy()) {
- // Cannot find the base of an expression with multiple pointer ops.
- if (PtrOp)
- return V;
+ assert(!PtrOp && "Cannot have multiple pointer ops");
PtrOp = AddOp;
}
}
- if (!PtrOp) // All operands were non-pointer.
- return V;
+ assert(PtrOp && "Must have pointer op");
V = PtrOp;
} else // Not something we can look further into.
return V;
@@ -4374,24 +4419,25 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
}
/// Push users of the given Instruction onto the given Worklist.
-static void
-PushDefUseChildren(Instruction *I,
- SmallVectorImpl<Instruction *> &Worklist) {
+static void PushDefUseChildren(Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSetImpl<Instruction *> &Visited) {
// Push the def-use children onto the Worklist stack.
- for (User *U : I->users())
- Worklist.push_back(cast<Instruction>(U));
+ for (User *U : I->users()) {
+ auto *UserInsn = cast<Instruction>(U);
+ if (Visited.insert(UserInsn).second)
+ Worklist.push_back(UserInsn);
+ }
}
void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
SmallVector<Instruction *, 16> Worklist;
- PushDefUseChildren(PN, Worklist);
-
SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<const SCEV *, 8> ToForget;
Visited.insert(PN);
+ Worklist.push_back(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
auto It = ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
@@ -4413,12 +4459,13 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
!isa<SCEVUnknown>(Old) ||
(I != PN && Old == SymName)) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(Old);
+ ToForget.push_back(Old);
}
}
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(I, Worklist, Visited);
}
+ forgetMemoizedResults(ToForget);
}
namespace {
@@ -6109,7 +6156,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// initial value.
if (AddRec->hasNoUnsignedWrap()) {
APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
- if (!UnsignedMinValue.isNullValue())
+ if (!UnsignedMinValue.isZero())
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
}
@@ -6211,9 +6258,9 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (NS > 1) {
// If we know any of the sign bits, we know all of the sign bits.
- if (!Known.Zero.getHiBits(NS).isNullValue())
+ if (!Known.Zero.getHiBits(NS).isZero())
Known.Zero.setHighBits(NS);
- if (!Known.One.getHiBits(NS).isNullValue())
+ if (!Known.One.getHiBits(NS).isZero())
Known.One.setHighBits(NS);
}
@@ -6549,17 +6596,99 @@ SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
}
-bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
- // Here we check that I is in the header of the innermost loop containing I,
- // since we only deal with instructions in the loop header. The actual loop we
- // need to check later will come from an add recurrence, but getting that
- // requires computing the SCEV of the operands, which can be expensive. This
- // check we can do cheaply to rule out some cases early.
- Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
- if (InnermostContainingLoop == nullptr ||
- InnermostContainingLoop->getHeader() != I->getParent())
- return false;
+const Instruction *
+ScalarEvolution::getNonTrivialDefiningScopeBound(const SCEV *S) {
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
+ return &*AddRec->getLoop()->getHeader()->begin();
+ if (auto *U = dyn_cast<SCEVUnknown>(S))
+ if (auto *I = dyn_cast<Instruction>(U->getValue()))
+ return I;
+ return nullptr;
+}
+/// Fills \p Ops with unique operands of \p S, if it has operands. If not,
+/// \p Ops remains unmodified.
+static void collectUniqueOps(const SCEV *S,
+ SmallVectorImpl<const SCEV *> &Ops) {
+ SmallPtrSet<const SCEV *, 4> Unique;
+ auto InsertUnique = [&](const SCEV *S) {
+ if (Unique.insert(S).second)
+ Ops.push_back(S);
+ };
+ if (auto *S2 = dyn_cast<SCEVCastExpr>(S))
+ for (auto *Op : S2->operands())
+ InsertUnique(Op);
+ else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S))
+ for (auto *Op : S2->operands())
+ InsertUnique(Op);
+ else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S))
+ for (auto *Op : S2->operands())
+ InsertUnique(Op);
+}
+
+const Instruction *
+ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+ bool &Precise) {
+ Precise = true;
+ // Do a bounded search of the def relation of the requested SCEVs.
+ SmallSet<const SCEV *, 16> Visited;
+ SmallVector<const SCEV *> Worklist;
+ auto pushOp = [&](const SCEV *S) {
+ if (!Visited.insert(S).second)
+ return;
+ // Threshold of 30 here is arbitrary.
+ if (Visited.size() > 30) {
+ Precise = false;
+ return;
+ }
+ Worklist.push_back(S);
+ };
+
+ for (auto *S : Ops)
+ pushOp(S);
+
+ const Instruction *Bound = nullptr;
+ while (!Worklist.empty()) {
+ auto *S = Worklist.pop_back_val();
+ if (auto *DefI = getNonTrivialDefiningScopeBound(S)) {
+ if (!Bound || DT.dominates(Bound, DefI))
+ Bound = DefI;
+ } else {
+ SmallVector<const SCEV *, 4> Ops;
+ collectUniqueOps(S, Ops);
+ for (auto *Op : Ops)
+ pushOp(Op);
+ }
+ }
+ return Bound ? Bound : &*F.getEntryBlock().begin();
+}
+
+const Instruction *
+ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) {
+ bool Discard;
+ return getDefiningScopeBound(Ops, Discard);
+}
+
+bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
+ const Instruction *B) {
+ if (A->getParent() == B->getParent() &&
+ isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
+ B->getIterator()))
+ return true;
+
+ auto *BLoop = LI.getLoopFor(B->getParent());
+ if (BLoop && BLoop->getHeader() == B->getParent() &&
+ BLoop->getLoopPreheader() == A->getParent() &&
+ isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
+ A->getParent()->end()) &&
+ isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(),
+ B->getIterator()))
+ return true;
+ return false;
+}
+
+
+bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
// Only proceed if we can prove that I does not yield poison.
if (!programUndefinedIfPoison(I))
return false;
@@ -6570,39 +6699,20 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
// instructions can map to the same SCEV. If we apply NSW or NUW from I to
// the SCEV, we must guarantee no wrapping for that SCEV also when it is
// derived from other instructions that map to the same SCEV. We cannot make
- // that guarantee for cases where I is not executed. So we need to find the
- // loop that I is considered in relation to and prove that I is executed for
- // every iteration of that loop. That implies that the value that I
- // calculates does not wrap anywhere in the loop, so then we can apply the
- // flags to the SCEV.
- //
- // We check isLoopInvariant to disambiguate in case we are adding recurrences
- // from different loops, so that we know which loop to prove that I is
- // executed in.
- for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
+ // that guarantee for cases where I is not executed. So we need to find a
+ // upper bound on the defining scope for the SCEV, and prove that I is
+ // executed every time we enter that scope. When the bounding scope is a
+ // loop (the common case), this is equivalent to proving I executes on every
+ // iteration of that loop.
+ SmallVector<const SCEV *> SCEVOps;
+ for (const Use &Op : I->operands()) {
// I could be an extractvalue from a call to an overflow intrinsic.
// TODO: We can do better here in some cases.
- if (!isSCEVable(I->getOperand(OpIndex)->getType()))
- return false;
- const SCEV *Op = getSCEV(I->getOperand(OpIndex));
- if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
- bool AllOtherOpsLoopInvariant = true;
- for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands();
- ++OtherOpIndex) {
- if (OtherOpIndex != OpIndex) {
- const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex));
- if (!isLoopInvariant(OtherOp, AddRec->getLoop())) {
- AllOtherOpsLoopInvariant = false;
- break;
- }
- }
- }
- if (AllOtherOpsLoopInvariant &&
- isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop()))
- return true;
- }
+ if (isSCEVable(Op->getType()))
+ SCEVOps.push_back(getSCEV(Op));
}
- return false;
+ auto *DefI = getDefiningScopeBound(SCEVOps);
+ return isGuaranteedToTransferExecutionTo(DefI, I);
}
bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
@@ -7144,10 +7254,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
-const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount) {
- // Get the trip count from the BE count by adding 1. Overflow, results
- // in zero which means "unknown".
- return getAddExpr(ExitCount, getOne(ExitCount->getType()));
+const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount,
+ bool Extend) {
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ return getCouldNotCompute();
+
+ auto *ExitCountType = ExitCount->getType();
+ assert(ExitCountType->isIntegerTy());
+
+ if (!Extend)
+ return getAddExpr(ExitCount, getOne(ExitCountType));
+
+ auto *WiderType = Type::getIntNTy(ExitCountType->getContext(),
+ 1 + ExitCountType->getScalarSizeInBits());
+ return getAddExpr(getNoopOrZeroExtend(ExitCount, WiderType),
+ getOne(WiderType));
}
static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
@@ -7186,6 +7307,131 @@ unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
return getConstantTripCount(MaxExitCount);
}
+const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
+ // We can't infer from Array in Irregular Loop.
+ // FIXME: It's hard to infer loop bound from array operated in Nested Loop.
+ if (!L->isLoopSimplifyForm() || !L->isInnermost())
+ return getCouldNotCompute();
+
+ // FIXME: To make the scene more typical, we only analysis loops that have
+ // one exiting block and that block must be the latch. To make it easier to
+ // capture loops that have memory access and memory access will be executed
+ // in each iteration.
+ const BasicBlock *LoopLatch = L->getLoopLatch();
+ assert(LoopLatch && "See defination of simplify form loop.");
+ if (L->getExitingBlock() != LoopLatch)
+ return getCouldNotCompute();
+
+ const DataLayout &DL = getDataLayout();
+ SmallVector<const SCEV *> InferCountColl;
+ for (auto *BB : L->getBlocks()) {
+ // Go here, we can know that Loop is a single exiting and simplified form
+ // loop. Make sure that infer from Memory Operation in those BBs must be
+ // executed in loop. First step, we can make sure that max execution time
+ // of MemAccessBB in loop represents latch max excution time.
+ // If MemAccessBB does not dom Latch, skip.
+ // Entry
+ // │
+ // ┌─────▼─────┐
+ // │Loop Header◄─────┐
+ // └──┬──────┬─┘ │
+ // │ │ │
+ // ┌────────▼──┐ ┌─▼─────┐ │
+ // │MemAccessBB│ │OtherBB│ │
+ // └────────┬──┘ └─┬─────┘ │
+ // │ │ │
+ // ┌─▼──────▼─┐ │
+ // │Loop Latch├─────┘
+ // └────┬─────┘
+ // ▼
+ // Exit
+ if (!DT.dominates(BB, LoopLatch))
+ continue;
+
+ for (Instruction &Inst : *BB) {
+ // Find Memory Operation Instruction.
+ auto *GEP = getLoadStorePointerOperand(&Inst);
+ if (!GEP)
+ continue;
+
+ auto *ElemSize = dyn_cast<SCEVConstant>(getElementSize(&Inst));
+ // Do not infer from scalar type, eg."ElemSize = sizeof()".
+ if (!ElemSize)
+ continue;
+
+ // Use a existing polynomial recurrence on the trip count.
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(GEP));
+ if (!AddRec)
+ continue;
+ auto *ArrBase = dyn_cast<SCEVUnknown>(getPointerBase(AddRec));
+ auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*this));
+ if (!ArrBase || !Step)
+ continue;
+ assert(isLoopInvariant(ArrBase, L) && "See addrec definition");
+
+ // Only handle { %array + step },
+ // FIXME: {(SCEVAddRecExpr) + step } could not be analysed here.
+ if (AddRec->getStart() != ArrBase)
+ continue;
+
+ // Memory operation pattern which have gaps.
+ // Or repeat memory opreation.
+ // And index of GEP wraps arround.
+ if (Step->getAPInt().getActiveBits() > 32 ||
+ Step->getAPInt().getZExtValue() !=
+ ElemSize->getAPInt().getZExtValue() ||
+ Step->isZero() || Step->getAPInt().isNegative())
+ continue;
+
+ // Only infer from stack array which has certain size.
+ // Make sure alloca instruction is not excuted in loop.
+ AllocaInst *AllocateInst = dyn_cast<AllocaInst>(ArrBase->getValue());
+ if (!AllocateInst || L->contains(AllocateInst->getParent()))
+ continue;
+
+ // Make sure only handle normal array.
+ auto *Ty = dyn_cast<ArrayType>(AllocateInst->getAllocatedType());
+ auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize());
+ if (!Ty || !ArrSize || !ArrSize->isOne())
+ continue;
+ // Also make sure step was increased the same with sizeof allocated
+ // element type.
+ const PointerType *GEPT = dyn_cast<PointerType>(GEP->getType());
+ if (Ty->getElementType() != GEPT->getElementType())
+ continue;
+
+ // FIXME: Since gep indices are silently zext to the indexing type,
+ // we will have a narrow gep index which wraps around rather than
+ // increasing strictly, we shoule ensure that step is increasing
+ // strictly by the loop iteration.
+ // Now we can infer a max execution time by MemLength/StepLength.
+ const SCEV *MemSize =
+ getConstant(Step->getType(), DL.getTypeAllocSize(Ty));
+ auto *MaxExeCount =
+ dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
+ if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32)
+ continue;
+
+ // If the loop reaches the maximum number of executions, we can not
+ // access bytes starting outside the statically allocated size without
+ // being immediate UB. But it is allowed to enter loop header one more
+ // time.
+ auto *InferCount = dyn_cast<SCEVConstant>(
+ getAddExpr(MaxExeCount, getOne(MaxExeCount->getType())));
+ // Discard the maximum number of execution times under 32bits.
+ if (!InferCount || InferCount->getAPInt().getActiveBits() > 32)
+ continue;
+
+ InferCountColl.push_back(InferCount);
+ }
+ }
+
+ if (InferCountColl.size() == 0)
+ return getCouldNotCompute();
+
+ return getUMinFromMismatchedTypes(InferCountColl);
+}
+
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -7287,13 +7533,15 @@ bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
}
/// Push PHI nodes in the header of the given loop onto the given Worklist.
-static void
-PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+static void PushLoopPHIs(const Loop *L,
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSetImpl<Instruction *> &Visited) {
BasicBlock *Header = L->getHeader();
// Push all Loop-header PHIs onto the Worklist stack.
for (PHINode &PN : Header->phis())
- Worklist.push_back(&PN);
+ if (Visited.insert(&PN).second)
+ Worklist.push_back(&PN);
}
const ScalarEvolution::BackedgeTakenInfo &
@@ -7354,9 +7602,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// it handles SCEVUnknown PHI nodes specially.
if (Result.hasAnyInfo()) {
SmallVector<Instruction *, 16> Worklist;
- PushLoopPHIs(L, Worklist);
-
SmallPtrSet<Instruction *, 8> Discovered;
+ SmallVector<const SCEV *, 8> ToForget;
+ PushLoopPHIs(L, Worklist, Discovered);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
@@ -7373,7 +7621,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(Old);
+ ToForget.push_back(Old);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -7405,6 +7653,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
Worklist.push_back(I);
}
}
+ forgetMemoizedResults(ToForget);
}
// Re-lookup the insert position, since the call to
@@ -7441,6 +7690,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
SmallVector<const Loop *, 16> LoopWorklist(1, L);
SmallVector<Instruction *, 32> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<const SCEV *, 16> ToForget;
// Iterate over all the loops and sub-loops to drop SCEV information.
while (!LoopWorklist.empty()) {
@@ -7462,29 +7712,27 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
auto LoopUsersItr = LoopUsers.find(CurrL);
if (LoopUsersItr != LoopUsers.end()) {
- for (auto *S : LoopUsersItr->second)
- forgetMemoizedResults(S);
+ ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(),
+ LoopUsersItr->second.end());
LoopUsers.erase(LoopUsersItr);
}
// Drop information about expressions based on loop-header PHIs.
- PushLoopPHIs(CurrL, Worklist);
+ PushLoopPHIs(CurrL, Worklist, Visited);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(It->second);
+ ToForget.push_back(It->second);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(I, Worklist, Visited);
}
LoopPropertiesCache.erase(CurrL);
@@ -7492,6 +7740,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
// ValuesAtScopes map.
LoopWorklist.append(CurrL->begin(), CurrL->end());
}
+ forgetMemoizedResults(ToForget);
}
void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
@@ -7506,25 +7755,25 @@ void ScalarEvolution::forgetValue(Value *V) {
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<const SCEV *, 8> ToForget;
Worklist.push_back(I);
+ Visited.insert(I);
- SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
-
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(It->second);
+ ToForget.push_back(It->second);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(I, Worklist, Visited);
}
+ forgetMemoizedResults(ToForget);
}
void ScalarEvolution::forgetLoopDispositions(const Loop *L) {
@@ -7598,7 +7847,7 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
return !ENT.hasAlwaysTruePredicate();
};
- if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax())
+ if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue))
return SE->getCouldNotCompute();
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
@@ -7635,6 +7884,12 @@ ScalarEvolution::ExitLimit::ExitLimit(
const SCEV *E, const SCEV *M, bool MaxOrZero,
ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
: ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
+ // If we prove the max count is zero, so is the symbolic bound. This happens
+ // in practice due to differences in a) how context sensitive we've chosen
+ // to be and b) how we reason about bounds impied by UB.
+ if (MaxNotTaken->isZero())
+ ExactNotTaken = MaxNotTaken;
+
assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
!isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
"Exact is not allowed to be less precise than Max");
@@ -7740,7 +7995,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
- if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne()))
+ if (ExitIfTrue == CI->isZero())
continue;
}
@@ -8030,15 +8285,6 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
Pred = ExitCond->getInversePredicate();
const ICmpInst::Predicate OriginalPred = Pred;
- // Handle common loops like: for (X = "string"; *X; ++X)
- if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
- if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
- ExitLimit ItCnt =
- computeLoadConstantCompareExitLimit(LI, RHS, L, Pred);
- if (ItCnt.hasAnyInfo())
- return ItCnt;
- }
-
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
@@ -8070,6 +8316,32 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
}
+ // If this loop must exit based on this condition (or execute undefined
+ // behaviour), and we can prove the test sequence produced must repeat
+ // the same values on self-wrap of the IV, then we can infer that IV
+ // doesn't self wrap because if it did, we'd have an infinite (undefined)
+ // loop.
+ if (ControlsExit && isLoopInvariant(RHS, L) && loopHasNoAbnormalExits(L) &&
+ loopIsFiniteByAssumption(L)) {
+
+ // TODO: We can peel off any functions which are invertible *in L*. Loop
+ // invariant terms are effectively constants for our purposes here.
+ auto *InnerLHS = LHS;
+ if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS))
+ InnerLHS = ZExt->getOperand();
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS)) {
+ auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
+ if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
+ StrideC && StrideC->getAPInt().isPowerOf2()) {
+ auto Flags = AR->getNoWrapFlags();
+ Flags = setFlags(Flags, SCEV::FlagNW);
+ SmallVector<const SCEV*> Operands{AR->operands()};
+ Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ }
+ }
+ }
+
switch (Pred) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
@@ -8169,85 +8441,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
return cast<SCEVConstant>(Val)->getValue();
}
-/// Given an exit condition of 'icmp op load X, cst', try to see if we can
-/// compute the backedge execution count.
-ScalarEvolution::ExitLimit
-ScalarEvolution::computeLoadConstantCompareExitLimit(
- LoadInst *LI,
- Constant *RHS,
- const Loop *L,
- ICmpInst::Predicate predicate) {
- if (LI->isVolatile()) return getCouldNotCompute();
-
- // Check to see if the loaded pointer is a getelementptr of a global.
- // TODO: Use SCEV instead of manually grubbing with GEPs.
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
- if (!GEP) return getCouldNotCompute();
-
- // Make sure that it is really a constant global we are gepping, with an
- // initializer, and make sure the first IDX is really 0.
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
- !cast<Constant>(GEP->getOperand(1))->isNullValue())
- return getCouldNotCompute();
-
- // Okay, we allow one non-constant index into the GEP instruction.
- Value *VarIdx = nullptr;
- std::vector<Constant*> Indexes;
- unsigned VarIdxNum = 0;
- for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
- Indexes.push_back(CI);
- } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
- if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
- VarIdx = GEP->getOperand(i);
- VarIdxNum = i-2;
- Indexes.push_back(nullptr);
- }
-
- // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
- if (!VarIdx)
- return getCouldNotCompute();
-
- // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
- // Check to see if X is a loop variant variable value now.
- const SCEV *Idx = getSCEV(VarIdx);
- Idx = getSCEVAtScope(Idx, L);
-
- // We can only recognize very limited forms of loop index expressions, in
- // particular, only affine AddRec's like {C1,+,C2}<L>.
- const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
- if (!IdxExpr || IdxExpr->getLoop() != L || !IdxExpr->isAffine() ||
- isLoopInvariant(IdxExpr, L) ||
- !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
- !isa<SCEVConstant>(IdxExpr->getOperand(1)))
- return getCouldNotCompute();
-
- unsigned MaxSteps = MaxBruteForceIterations;
- for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
- ConstantInt *ItCst = ConstantInt::get(
- cast<IntegerType>(IdxExpr->getType()), IterationNum);
- ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
-
- // Form the GEP offset.
- Indexes[VarIdxNum] = Val;
-
- Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
- Indexes);
- if (!Result) break; // Cannot compute!
-
- // Evaluate the condition for this iteration.
- Result = ConstantExpr::getICmp(predicate, Result, RHS);
- if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
- if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
- ++NumArrayLenItCounts;
- return getConstant(ItCst); // Found terminating iteration!
- }
- }
- return getCouldNotCompute();
-}
-
ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
@@ -9160,7 +9353,7 @@ GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
APInt L = LC->getAPInt();
APInt M = MC->getAPInt();
APInt N = NC->getAPInt();
- assert(!N.isNullValue() && "This is not a quadratic addrec");
+ assert(!N.isZero() && "This is not a quadratic addrec");
unsigned BitWidth = LC->getAPInt().getBitWidth();
unsigned NewWidth = BitWidth + 1;
@@ -9486,9 +9679,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// N = Distance (as unsigned)
if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
- APInt MaxBECountBase = getUnsignedRangeMax(Distance);
- if (MaxBECountBase.ult(MaxBECount))
- MaxBECount = MaxBECountBase;
+ MaxBECount = APIntOps::umin(MaxBECount, getUnsignedRangeMax(Distance));
// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
// we end up with a loop whose backedge-taken count is n - 1. Detect this
@@ -9521,11 +9712,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
const SCEV *Max = getCouldNotCompute();
if (Exact != getCouldNotCompute()) {
APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L));
- APInt BaseMaxInt = getUnsignedRangeMax(Exact);
- if (BaseMaxInt.ult(MaxInt))
- Max = getConstant(BaseMaxInt);
- else
- Max = getConstant(MaxInt);
+ Max = getConstant(APIntOps::umin(MaxInt, getUnsignedRangeMax(Exact)));
}
return ExitLimit(Exact, Max, false, Predicates);
}
@@ -9533,9 +9720,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// Solve the general equation.
const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
getNegativeSCEV(Start), *this);
- const SCEV *M = E == getCouldNotCompute()
- ? E
- : getConstant(getUnsignedRangeMax(E));
+
+ const SCEV *M = E;
+ if (E != getCouldNotCompute()) {
+ APInt MaxWithGuards = getUnsignedRangeMax(applyLoopGuards(E, L));
+ M = getConstant(APIntOps::umin(MaxWithGuards, getUnsignedRangeMax(E)));
+ }
return ExitLimit(E, M, false, Predicates);
}
@@ -9911,23 +10101,23 @@ Optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred,
bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
// TODO: Analyze guards and assumes from Context's block.
return isKnownPredicate(Pred, LHS, RHS) ||
- isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS);
+ isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS);
}
-Optional<bool>
-ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS,
- const Instruction *Context) {
+Optional<bool> ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS,
+ const Instruction *CtxI) {
Optional<bool> KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS);
if (KnownWithoutContext)
return KnownWithoutContext;
- if (isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS))
+ if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS))
return true;
- else if (isBasicBlockEntryGuardedByCond(Context->getParent(),
+ else if (isBasicBlockEntryGuardedByCond(CtxI->getParent(),
ICmpInst::getInversePredicate(Pred),
LHS, RHS))
return false;
@@ -10057,7 +10247,7 @@ ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred,
Optional<ScalarEvolution::LoopInvariantPredicate>
ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
- const Instruction *Context, const SCEV *MaxIter) {
+ const Instruction *CtxI, const SCEV *MaxIter) {
// Try to prove the following set of facts:
// - The predicate is monotonic in the iteration space.
// - If the check does not fail on the 1st iteration:
@@ -10111,7 +10301,7 @@ ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
if (Step == MinusOne)
NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
const SCEV *Start = AR->getStart();
- if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))
+ if (!isKnownPredicateAt(NoOverflowPred, Start, Last, CtxI))
return None;
// Everything is fine.
@@ -10448,12 +10638,12 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
// Try to prove (Pred, LHS, RHS) using isImpliedCond.
auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
- const Instruction *Context = &BB->front();
- if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context))
+ const Instruction *CtxI = &BB->front();
+ if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, CtxI))
return true;
if (ProvingStrictComparison) {
auto ProofFn = [&](ICmpInst::Predicate P) {
- return isImpliedCond(P, LHS, RHS, Condition, Inverse, Context);
+ return isImpliedCond(P, LHS, RHS, Condition, Inverse, CtxI);
};
if (SplitAndProve(ProofFn))
return true;
@@ -10525,7 +10715,7 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
const Value *FoundCondValue, bool Inverse,
- const Instruction *Context) {
+ const Instruction *CtxI) {
// False conditions implies anything. Do not bother analyzing it further.
if (FoundCondValue ==
ConstantInt::getBool(FoundCondValue->getContext(), Inverse))
@@ -10541,12 +10731,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const Value *Op0, *Op1;
if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
if (!Inverse)
- return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) ||
- isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
+ return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) ||
+ isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI);
} else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
if (Inverse)
- return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) ||
- isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
+ return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) ||
+ isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI);
}
const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
@@ -10563,14 +10753,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
- return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context);
+ return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI);
}
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
ICmpInst::Predicate FoundPred,
const SCEV *FoundLHS, const SCEV *FoundRHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
// Balance the types.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(FoundLHS->getType())) {
@@ -10583,12 +10773,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
auto BitWidth = getTypeSizeInBits(NarrowType);
const SCEV *MaxValue = getZeroExtendExpr(
getConstant(APInt::getMaxValue(BitWidth)), WideType);
- if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) &&
- isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) {
+ if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundLHS,
+ MaxValue) &&
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundRHS,
+ MaxValue)) {
const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType);
const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType);
if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
- TruncFoundRHS, Context))
+ TruncFoundRHS, CtxI))
return true;
}
}
@@ -10615,13 +10807,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
}
}
return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
- FoundRHS, Context);
+ FoundRHS, CtxI);
}
bool ScalarEvolution::isImpliedCondBalancedTypes(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
assert(getTypeSizeInBits(LHS->getType()) ==
getTypeSizeInBits(FoundLHS->getType()) &&
"Types should be balanced!");
@@ -10647,7 +10839,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// Check whether the found predicate is the same as the desired predicate.
if (FoundPred == Pred)
- return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
// Check whether swapping the found predicate makes it the same as the
// desired predicate.
@@ -10663,27 +10855,70 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// do this if it would break canonical constant/addrec ordering.
if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS))
return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS,
- Context);
+ CtxI);
if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS))
- return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, CtxI);
+
+ // There's no clear preference between forms 3. and 4., try both. Avoid
+ // forming getNotSCEV of pointer values as the resulting subtract is
+ // not legal.
+ if (!LHS->getType()->isPointerTy() && !RHS->getType()->isPointerTy() &&
+ isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
+ FoundLHS, FoundRHS, CtxI))
+ return true;
- // Don't try to getNotSCEV pointers.
- if (LHS->getType()->isPointerTy() || FoundLHS->getType()->isPointerTy())
- return false;
+ if (!FoundLHS->getType()->isPointerTy() &&
+ !FoundRHS->getType()->isPointerTy() &&
+ isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
+ getNotSCEV(FoundRHS), CtxI))
+ return true;
- // There's no clear preference between forms 3. and 4., try both.
- return isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
- FoundLHS, FoundRHS, Context) ||
- isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
- getNotSCEV(FoundRHS), Context);
+ return false;
}
- // Unsigned comparison is the same as signed comparison when both the operands
- // are non-negative.
- if (CmpInst::isUnsigned(FoundPred) &&
- CmpInst::getSignedPredicate(FoundPred) == Pred &&
- isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
- return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
+ auto IsSignFlippedPredicate = [](CmpInst::Predicate P1,
+ CmpInst::Predicate P2) {
+ assert(P1 != P2 && "Handled earlier!");
+ return CmpInst::isRelational(P2) &&
+ P1 == CmpInst::getFlippedSignednessPredicate(P2);
+ };
+ if (IsSignFlippedPredicate(Pred, FoundPred)) {
+ // Unsigned comparison is the same as signed comparison when both the
+ // operands are non-negative or negative.
+ if ((isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) ||
+ (isKnownNegative(FoundLHS) && isKnownNegative(FoundRHS)))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
+ // Create local copies that we can freely swap and canonicalize our
+ // conditions to "le/lt".
+ ICmpInst::Predicate CanonicalPred = Pred, CanonicalFoundPred = FoundPred;
+ const SCEV *CanonicalLHS = LHS, *CanonicalRHS = RHS,
+ *CanonicalFoundLHS = FoundLHS, *CanonicalFoundRHS = FoundRHS;
+ if (ICmpInst::isGT(CanonicalPred) || ICmpInst::isGE(CanonicalPred)) {
+ CanonicalPred = ICmpInst::getSwappedPredicate(CanonicalPred);
+ CanonicalFoundPred = ICmpInst::getSwappedPredicate(CanonicalFoundPred);
+ std::swap(CanonicalLHS, CanonicalRHS);
+ std::swap(CanonicalFoundLHS, CanonicalFoundRHS);
+ }
+ assert((ICmpInst::isLT(CanonicalPred) || ICmpInst::isLE(CanonicalPred)) &&
+ "Must be!");
+ assert((ICmpInst::isLT(CanonicalFoundPred) ||
+ ICmpInst::isLE(CanonicalFoundPred)) &&
+ "Must be!");
+ if (ICmpInst::isSigned(CanonicalPred) && isKnownNonNegative(CanonicalRHS))
+ // Use implication:
+ // x <u y && y >=s 0 --> x <s y.
+ // If we can prove the left part, the right part is also proven.
+ return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
+ CanonicalRHS, CanonicalFoundLHS,
+ CanonicalFoundRHS);
+ if (ICmpInst::isUnsigned(CanonicalPred) && isKnownNegative(CanonicalRHS))
+ // Use implication:
+ // x <s y && y <s 0 --> x <u y.
+ // If we can prove the left part, the right part is also proven.
+ return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
+ CanonicalRHS, CanonicalFoundLHS,
+ CanonicalFoundRHS);
+ }
// Check if we can make progress by sharpening ranges.
if (FoundPred == ICmpInst::ICMP_NE &&
@@ -10721,7 +10956,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// We know V `Pred` SharperMin. If this implies LHS `Pred`
// RHS, we're done.
if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin),
- Context))
+ CtxI))
return true;
LLVM_FALLTHROUGH;
@@ -10736,8 +10971,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
//
// If V `Pred` Min implies LHS `Pred` RHS, we're done.
- if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min),
- Context))
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), CtxI))
return true;
break;
@@ -10745,14 +10979,14 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_ULE:
if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
- LHS, V, getConstant(SharperMin), Context))
+ LHS, V, getConstant(SharperMin), CtxI))
return true;
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT:
if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
- LHS, V, getConstant(Min), Context))
+ LHS, V, getConstant(Min), CtxI))
return true;
break;
@@ -10766,12 +11000,11 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
- if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context))
+ if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
return true;
if (Pred == ICmpInst::ICMP_NE)
if (!ICmpInst::isTrueWhenEqual(FoundPred))
- if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS,
- Context))
+ if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
return true;
// Otherwise assume the worst.
@@ -10852,7 +11085,7 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
- const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) {
+ const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) {
// Try to recognize the following pattern:
//
// FoundRHS = ...
@@ -10866,9 +11099,9 @@ bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
// each iteration of this loop, including the first iteration. Therefore, in
// this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to
// prove the original pred using this fact.
- if (!Context)
+ if (!CtxI)
return false;
- const BasicBlock *ContextBB = Context->getParent();
+ const BasicBlock *ContextBB = CtxI->getParent();
// Make sure AR varies in the context block.
if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
const Loop *L = AR->getLoop();
@@ -11090,7 +11323,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
@@ -11098,7 +11331,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
return true;
if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
- Context))
+ CtxI))
return true;
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
@@ -11534,6 +11767,12 @@ const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
if (IsSigned && BitWidth == 1)
return getZero(Stride->getType());
+ // This code has only been closely audited for negative strides in the
+ // unsigned comparison case, it may be correct for signed comparison, but
+ // that needs to be established.
+ assert((!IsSigned || !isKnownNonPositive(Stride)) &&
+ "Stride is expected strictly positive for signed case!");
+
// Calculate the maximum backedge count based on the range of values
// permitted by Start, End, and Stride.
APInt MinStart =
@@ -11576,6 +11815,80 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
bool PredicatedIV = false;
+ auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) {
+ // Can we prove this loop *must* be UB if overflow of IV occurs?
+ // Reasoning goes as follows:
+ // * Suppose the IV did self wrap.
+ // * If Stride evenly divides the iteration space, then once wrap
+ // occurs, the loop must revisit the same values.
+ // * We know that RHS is invariant, and that none of those values
+ // caused this exit to be taken previously. Thus, this exit is
+ // dynamically dead.
+ // * If this is the sole exit, then a dead exit implies the loop
+ // must be infinite if there are no abnormal exits.
+ // * If the loop were infinite, then it must either not be mustprogress
+ // or have side effects. Otherwise, it must be UB.
+ // * It can't (by assumption), be UB so we have contradicted our
+ // premise and can conclude the IV did not in fact self-wrap.
+ if (!isLoopInvariant(RHS, L))
+ return false;
+
+ auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
+ if (!StrideC || !StrideC->getAPInt().isPowerOf2())
+ return false;
+
+ if (!ControlsExit || !loopHasNoAbnormalExits(L))
+ return false;
+
+ return loopIsFiniteByAssumption(L);
+ };
+
+ if (!IV) {
+ if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
+ if (AR && AR->getLoop() == L && AR->isAffine()) {
+ auto canProveNUW = [&]() {
+ if (!isLoopInvariant(RHS, L))
+ return false;
+
+ if (!isKnownNonZero(AR->getStepRecurrence(*this)))
+ // We need the sequence defined by AR to strictly increase in the
+ // unsigned integer domain for the logic below to hold.
+ return false;
+
+ const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
+ const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
+ // If RHS <=u Limit, then there must exist a value V in the sequence
+ // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and
+ // V <=u UINT_MAX. Thus, we must exit the loop before unsigned
+ // overflow occurs. This limit also implies that a signed comparison
+ // (in the wide bitwidth) is equivalent to an unsigned comparison as
+ // the high bits on both sides must be zero.
+ APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
+ APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1);
+ Limit = Limit.zext(OuterBitWidth);
+ return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit);
+ };
+ auto Flags = AR->getNoWrapFlags();
+ if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ if (AR->hasNoUnsignedWrap()) {
+ // Emulate what getZeroExtendExpr would have done during construction
+ // if we'd been able to infer the fact just above at that time.
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ Type *Ty = ZExt->getType();
+ auto *S = getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 0),
+ getZeroExtendExpr(Step, Ty, 0), L, AR->getNoWrapFlags());
+ IV = dyn_cast<SCEVAddRecExpr>(S);
+ }
+ }
+ }
+ }
+
+
if (!IV && AllowPredicates) {
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
@@ -11626,32 +11939,29 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
//
// a) IV is either nuw or nsw depending upon signedness (indicated by the
// NoWrap flag).
- // b) loop is single exit with no side effects.
- //
+ // b) the loop is guaranteed to be finite (e.g. is mustprogress and has
+ // no side effects within the loop)
+ // c) loop has a single static exit (with no abnormal exits)
//
// Precondition a) implies that if the stride is negative, this is a single
// trip loop. The backedge taken count formula reduces to zero in this case.
//
- // Precondition b) implies that if rhs is invariant in L, then unknown
- // stride being zero means the backedge can't be taken without UB.
+ // Precondition b) and c) combine to imply that if rhs is invariant in L,
+ // then a zero stride means the backedge can't be taken without executing
+ // undefined behavior.
//
// The positive stride case is the same as isKnownPositive(Stride) returning
// true (original behavior of the function).
//
- // We want to make sure that the stride is truly unknown as there are edge
- // cases where ScalarEvolution propagates no wrap flags to the
- // post-increment/decrement IV even though the increment/decrement operation
- // itself is wrapping. The computed backedge taken count may be wrong in
- // such cases. This is prevented by checking that the stride is not known to
- // be either positive or non-positive. For example, no wrap flags are
- // propagated to the post-increment IV of this loop with a trip count of 2 -
- //
- // unsigned char i;
- // for(i=127; i<128; i+=129)
- // A[i] = i;
- //
- if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
- !loopIsFiniteByAssumption(L))
+ if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) ||
+ !loopHasNoAbnormalExits(L))
+ return getCouldNotCompute();
+
+ // This bailout is protecting the logic in computeMaxBECountForLT which
+ // has not yet been sufficiently auditted or tested with negative strides.
+ // We used to filter out all known-non-positive cases here, we're in the
+ // process of being less restrictive bit by bit.
+ if (IsSigned && isKnownNonPositive(Stride))
return getCouldNotCompute();
if (!isKnownNonZero(Stride)) {
@@ -11687,37 +11997,12 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
}
} else if (!Stride->isOne() && !NoWrap) {
auto isUBOnWrap = [&]() {
- // Can we prove this loop *must* be UB if overflow of IV occurs?
- // Reasoning goes as follows:
- // * Suppose the IV did self wrap.
- // * If Stride evenly divides the iteration space, then once wrap
- // occurs, the loop must revisit the same values.
- // * We know that RHS is invariant, and that none of those values
- // caused this exit to be taken previously. Thus, this exit is
- // dynamically dead.
- // * If this is the sole exit, then a dead exit implies the loop
- // must be infinite if there are no abnormal exits.
- // * If the loop were infinite, then it must either not be mustprogress
- // or have side effects. Otherwise, it must be UB.
- // * It can't (by assumption), be UB so we have contradicted our
- // premise and can conclude the IV did not in fact self-wrap.
// From no-self-wrap, we need to then prove no-(un)signed-wrap. This
// follows trivially from the fact that every (un)signed-wrapped, but
// not self-wrapped value must be LT than the last value before
// (un)signed wrap. Since we know that last value didn't exit, nor
// will any smaller one.
-
- if (!isLoopInvariant(RHS, L))
- return false;
-
- auto *StrideC = dyn_cast<SCEVConstant>(Stride);
- if (!StrideC || !StrideC->getAPInt().isPowerOf2())
- return false;
-
- if (!ControlsExit || !loopHasNoAbnormalExits(L))
- return false;
-
- return loopIsFiniteByAssumption(L);
+ return canAssumeNoSelfWrap(IV);
};
// Avoid proven overflow cases: this will ensure that the backedge taken
@@ -11740,7 +12025,9 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = IV->getStart();
// Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond.
- // Use integer-typed versions for actual computation.
+ // If we convert to integers, isLoopEntryGuardedByCond will miss some cases.
+ // Use integer-typed versions for actual computation; we can't subtract
+ // pointers in general.
const SCEV *OrigStart = Start;
const SCEV *OrigRHS = RHS;
if (Start->getType()->isPointerTy()) {
@@ -11771,10 +12058,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// is End and so the result is as above, and if not max(End,Start) is Start
// so we get a backedge count of zero.
const SCEV *BECount = nullptr;
- auto *StartMinusStride = getMinusSCEV(OrigStart, Stride);
+ auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride);
+ assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!");
+ assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!");
+ assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!");
// Can we prove (max(RHS,Start) > Start - Stride?
- if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) &&
- isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) {
+ if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) &&
+ isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) {
// In this case, we can use a refined formula for computing backedge taken
// count. The general formula remains:
// "End-Start /uceiling Stride" where "End = max(RHS,Start)"
@@ -11795,10 +12085,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// Our preconditions trivially imply no overflow in that form.
const SCEV *MinusOne = getMinusOne(Stride->getType());
const SCEV *Numerator =
- getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride);
- if (!isa<SCEVCouldNotCompute>(Numerator)) {
- BECount = getUDivExpr(Numerator, Stride);
- }
+ getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride));
+ BECount = getUDivExpr(Numerator, Stride);
}
const SCEV *BECountIfBackedgeTaken = nullptr;
@@ -12141,7 +12429,7 @@ SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const {
}
// Return true when S contains at least an undef value.
-static inline bool containsUndefs(const SCEV *S) {
+bool ScalarEvolution::containsUndefs(const SCEV *S) const {
return SCEVExprContains(S, [](const SCEV *S) {
if (const auto *SU = dyn_cast<SCEVUnknown>(S))
return isa<UndefValue>(SU->getValue());
@@ -12149,237 +12437,6 @@ static inline bool containsUndefs(const SCEV *S) {
});
}
-namespace {
-
-// Collect all steps of SCEV expressions.
-struct SCEVCollectStrides {
- ScalarEvolution &SE;
- SmallVectorImpl<const SCEV *> &Strides;
-
- SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
- : SE(SE), Strides(S) {}
-
- bool follow(const SCEV *S) {
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
- Strides.push_back(AR->getStepRecurrence(SE));
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-// Collect all SCEVUnknown and SCEVMulExpr expressions.
-struct SCEVCollectTerms {
- SmallVectorImpl<const SCEV *> &Terms;
-
- SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
-
- bool follow(const SCEV *S) {
- if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
- isa<SCEVSignExtendExpr>(S)) {
- if (!containsUndefs(S))
- Terms.push_back(S);
-
- // Stop recursion: once we collected a term, do not walk its operands.
- return false;
- }
-
- // Keep looking.
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-// Check if a SCEV contains an AddRecExpr.
-struct SCEVHasAddRec {
- bool &ContainsAddRec;
-
- SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
- ContainsAddRec = false;
- }
-
- bool follow(const SCEV *S) {
- if (isa<SCEVAddRecExpr>(S)) {
- ContainsAddRec = true;
-
- // Stop recursion: once we collected a term, do not walk its operands.
- return false;
- }
-
- // Keep looking.
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-// Find factors that are multiplied with an expression that (possibly as a
-// subexpression) contains an AddRecExpr. In the expression:
-//
-// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
-//
-// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
-// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
-// parameters as they form a product with an induction variable.
-//
-// This collector expects all array size parameters to be in the same MulExpr.
-// It might be necessary to later add support for collecting parameters that are
-// spread over different nested MulExpr.
-struct SCEVCollectAddRecMultiplies {
- SmallVectorImpl<const SCEV *> &Terms;
- ScalarEvolution &SE;
-
- SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
- : Terms(T), SE(SE) {}
-
- bool follow(const SCEV *S) {
- if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
- bool HasAddRec = false;
- SmallVector<const SCEV *, 0> Operands;
- for (auto Op : Mul->operands()) {
- const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
- if (Unknown && !isa<CallInst>(Unknown->getValue())) {
- Operands.push_back(Op);
- } else if (Unknown) {
- HasAddRec = true;
- } else {
- bool ContainsAddRec = false;
- SCEVHasAddRec ContiansAddRec(ContainsAddRec);
- visitAll(Op, ContiansAddRec);
- HasAddRec |= ContainsAddRec;
- }
- }
- if (Operands.size() == 0)
- return true;
-
- if (!HasAddRec)
- return false;
-
- Terms.push_back(SE.getMulExpr(Operands));
- // Stop recursion: once we collected a term, do not walk its operands.
- return false;
- }
-
- // Keep looking.
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-} // end anonymous namespace
-
-/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
-/// two places:
-/// 1) The strides of AddRec expressions.
-/// 2) Unknowns that are multiplied with AddRec expressions.
-void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Terms) {
- SmallVector<const SCEV *, 4> Strides;
- SCEVCollectStrides StrideCollector(*this, Strides);
- visitAll(Expr, StrideCollector);
-
- LLVM_DEBUG({
- dbgs() << "Strides:\n";
- for (const SCEV *S : Strides)
- dbgs() << *S << "\n";
- });
-
- for (const SCEV *S : Strides) {
- SCEVCollectTerms TermCollector(Terms);
- visitAll(S, TermCollector);
- }
-
- LLVM_DEBUG({
- dbgs() << "Terms:\n";
- for (const SCEV *T : Terms)
- dbgs() << *T << "\n";
- });
-
- SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
- visitAll(Expr, MulCollector);
-}
-
-static bool findArrayDimensionsRec(ScalarEvolution &SE,
- SmallVectorImpl<const SCEV *> &Terms,
- SmallVectorImpl<const SCEV *> &Sizes) {
- int Last = Terms.size() - 1;
- const SCEV *Step = Terms[Last];
-
- // End of recursion.
- if (Last == 0) {
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
- SmallVector<const SCEV *, 2> Qs;
- for (const SCEV *Op : M->operands())
- if (!isa<SCEVConstant>(Op))
- Qs.push_back(Op);
-
- Step = SE.getMulExpr(Qs);
- }
-
- Sizes.push_back(Step);
- return true;
- }
-
- for (const SCEV *&Term : Terms) {
- // Normalize the terms before the next call to findArrayDimensionsRec.
- const SCEV *Q, *R;
- SCEVDivision::divide(SE, Term, Step, &Q, &R);
-
- // Bail out when GCD does not evenly divide one of the terms.
- if (!R->isZero())
- return false;
-
- Term = Q;
- }
-
- // Remove all SCEVConstants.
- erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
-
- if (Terms.size() > 0)
- if (!findArrayDimensionsRec(SE, Terms, Sizes))
- return false;
-
- Sizes.push_back(Step);
- return true;
-}
-
-// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
-static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
- for (const SCEV *T : Terms)
- if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
- return true;
-
- return false;
-}
-
-// Return the number of product terms in S.
-static inline int numberOfTerms(const SCEV *S) {
- if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
- return Expr->getNumOperands();
- return 1;
-}
-
-static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
- if (isa<SCEVConstant>(T))
- return nullptr;
-
- if (isa<SCEVUnknown>(T))
- return T;
-
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
- SmallVector<const SCEV *, 2> Factors;
- for (const SCEV *Op : M->operands())
- if (!isa<SCEVConstant>(Op))
- Factors.push_back(Op);
-
- return SE.getMulExpr(Factors);
- }
-
- return T;
-}
-
/// Return the size of an element read or written by Inst.
const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
Type *Ty;
@@ -12394,248 +12451,6 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
return getSizeOfExpr(ETy, Ty);
}
-void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize) {
- if (Terms.size() < 1 || !ElementSize)
- return;
-
- // Early return when Terms do not contain parameters: we do not delinearize
- // non parametric SCEVs.
- if (!containsParameters(Terms))
- return;
-
- LLVM_DEBUG({
- dbgs() << "Terms:\n";
- for (const SCEV *T : Terms)
- dbgs() << *T << "\n";
- });
-
- // Remove duplicates.
- array_pod_sort(Terms.begin(), Terms.end());
- Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
-
- // Put larger terms first.
- llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
- return numberOfTerms(LHS) > numberOfTerms(RHS);
- });
-
- // Try to divide all terms by the element size. If term is not divisible by
- // element size, proceed with the original term.
- for (const SCEV *&Term : Terms) {
- const SCEV *Q, *R;
- SCEVDivision::divide(*this, Term, ElementSize, &Q, &R);
- if (!Q->isZero())
- Term = Q;
- }
-
- SmallVector<const SCEV *, 4> NewTerms;
-
- // Remove constant factors.
- for (const SCEV *T : Terms)
- if (const SCEV *NewT = removeConstantFactors(*this, T))
- NewTerms.push_back(NewT);
-
- LLVM_DEBUG({
- dbgs() << "Terms after sorting:\n";
- for (const SCEV *T : NewTerms)
- dbgs() << *T << "\n";
- });
-
- if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
- Sizes.clear();
- return;
- }
-
- // The last element to be pushed into Sizes is the size of an element.
- Sizes.push_back(ElementSize);
-
- LLVM_DEBUG({
- dbgs() << "Sizes:\n";
- for (const SCEV *S : Sizes)
- dbgs() << *S << "\n";
- });
-}
-
-void ScalarEvolution::computeAccessFunctions(
- const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes) {
- // Early exit in case this SCEV is not an affine multivariate function.
- if (Sizes.empty())
- return;
-
- if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
- if (!AR->isAffine())
- return;
-
- const SCEV *Res = Expr;
- int Last = Sizes.size() - 1;
- for (int i = Last; i >= 0; i--) {
- const SCEV *Q, *R;
- SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
-
- LLVM_DEBUG({
- dbgs() << "Res: " << *Res << "\n";
- dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
- dbgs() << "Res divided by Sizes[i]:\n";
- dbgs() << "Quotient: " << *Q << "\n";
- dbgs() << "Remainder: " << *R << "\n";
- });
-
- Res = Q;
-
- // Do not record the last subscript corresponding to the size of elements in
- // the array.
- if (i == Last) {
-
- // Bail out if the remainder is too complex.
- if (isa<SCEVAddRecExpr>(R)) {
- Subscripts.clear();
- Sizes.clear();
- return;
- }
-
- continue;
- }
-
- // Record the access function for the current subscript.
- Subscripts.push_back(R);
- }
-
- // Also push in last position the remainder of the last division: it will be
- // the access function of the innermost dimension.
- Subscripts.push_back(Res);
-
- std::reverse(Subscripts.begin(), Subscripts.end());
-
- LLVM_DEBUG({
- dbgs() << "Subscripts:\n";
- for (const SCEV *S : Subscripts)
- dbgs() << *S << "\n";
- });
-}
-
-/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
-/// sizes of an array access. Returns the remainder of the delinearization that
-/// is the offset start of the array. The SCEV->delinearize algorithm computes
-/// the multiples of SCEV coefficients: that is a pattern matching of sub
-/// expressions in the stride and base of a SCEV corresponding to the
-/// computation of a GCD (greatest common divisor) of base and stride. When
-/// SCEV->delinearize fails, it returns the SCEV unchanged.
-///
-/// For example: when analyzing the memory access A[i][j][k] in this loop nest
-///
-/// void foo(long n, long m, long o, double A[n][m][o]) {
-///
-/// for (long i = 0; i < n; i++)
-/// for (long j = 0; j < m; j++)
-/// for (long k = 0; k < o; k++)
-/// A[i][j][k] = 1.0;
-/// }
-///
-/// the delinearization input is the following AddRec SCEV:
-///
-/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
-///
-/// From this SCEV, we are able to say that the base offset of the access is %A
-/// because it appears as an offset that does not divide any of the strides in
-/// the loops:
-///
-/// CHECK: Base offset: %A
-///
-/// and then SCEV->delinearize determines the size of some of the dimensions of
-/// the array as these are the multiples by which the strides are happening:
-///
-/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
-///
-/// Note that the outermost dimension remains of UnknownSize because there are
-/// no strides that would help identifying the size of the last dimension: when
-/// the array has been statically allocated, one could compute the size of that
-/// dimension by dividing the overall size of the array by the size of the known
-/// dimensions: %m * %o * 8.
-///
-/// Finally delinearize provides the access functions for the array reference
-/// that does correspond to A[i][j][k] of the above C testcase:
-///
-/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
-///
-/// The testcases are checking the output of a function pass:
-/// DelinearizationPass that walks through all loads and stores of a function
-/// asking for the SCEV of the memory access with respect to all enclosing
-/// loops, calling SCEV->delinearize on that and printing the results.
-void ScalarEvolution::delinearize(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize) {
- // First step: collect parametric terms.
- SmallVector<const SCEV *, 4> Terms;
- collectParametricTerms(Expr, Terms);
-
- if (Terms.empty())
- return;
-
- // Second step: find subscript sizes.
- findArrayDimensions(Terms, Sizes, ElementSize);
-
- if (Sizes.empty())
- return;
-
- // Third step: compute the access functions for each subscript.
- computeAccessFunctions(Expr, Subscripts, Sizes);
-
- if (Subscripts.empty())
- return;
-
- LLVM_DEBUG({
- dbgs() << "succeeded to delinearize " << *Expr << "\n";
- dbgs() << "ArrayDecl[UnknownSize]";
- for (const SCEV *S : Sizes)
- dbgs() << "[" << *S << "]";
-
- dbgs() << "\nArrayRef";
- for (const SCEV *S : Subscripts)
- dbgs() << "[" << *S << "]";
- dbgs() << "\n";
- });
-}
-
-bool ScalarEvolution::getIndexExpressionsFromGEP(
- const GetElementPtrInst *GEP, SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<int> &Sizes) {
- assert(Subscripts.empty() && Sizes.empty() &&
- "Expected output lists to be empty on entry to this function.");
- assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
- Type *Ty = nullptr;
- bool DroppedFirstDim = false;
- for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
- const SCEV *Expr = getSCEV(GEP->getOperand(i));
- if (i == 1) {
- Ty = GEP->getSourceElementType();
- if (auto *Const = dyn_cast<SCEVConstant>(Expr))
- if (Const->getValue()->isZero()) {
- DroppedFirstDim = true;
- continue;
- }
- Subscripts.push_back(Expr);
- continue;
- }
-
- auto *ArrayTy = dyn_cast<ArrayType>(Ty);
- if (!ArrayTy) {
- Subscripts.clear();
- Sizes.clear();
- return false;
- }
-
- Subscripts.push_back(Expr);
- if (!(DroppedFirstDim && i == 2))
- Sizes.push_back(ArrayTy->getNumElements());
-
- Ty = ArrayTy->getElementType();
- }
- return !Subscripts.empty();
-}
-
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
//===----------------------------------------------------------------------===//
@@ -12722,6 +12537,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
LoopDispositions(std::move(Arg.LoopDispositions)),
LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
BlockDispositions(std::move(Arg.BlockDispositions)),
+ SCEVUsers(std::move(Arg.SCEVUsers)),
UnsignedRanges(std::move(Arg.UnsignedRanges)),
SignedRanges(std::move(Arg.SignedRanges)),
UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
@@ -12934,7 +12750,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
Values.emplace_back(L, LoopVariant);
LoopDisposition D = computeLoopDisposition(S, L);
auto &Values2 = LoopDispositions[S];
- for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ for (auto &V : llvm::reverse(Values2)) {
if (V.getPointer() == L) {
V.setInt(D);
break;
@@ -13042,7 +12858,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
Values.emplace_back(BB, DoesNotDominateBlock);
BlockDisposition D = computeBlockDisposition(S, BB);
auto &Values2 = BlockDispositions[S];
- for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ for (auto &V : llvm::reverse(Values2)) {
if (V.getPointer() == BB) {
V.setInt(D);
break;
@@ -13130,41 +12946,58 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
-void
-ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
- ValuesAtScopes.erase(S);
- LoopDispositions.erase(S);
- BlockDispositions.erase(S);
- UnsignedRanges.erase(S);
- SignedRanges.erase(S);
- ExprValueMap.erase(S);
- HasRecMap.erase(S);
- MinTrailingZerosCache.erase(S);
+void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
+ SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end());
+ SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end());
+
+ while (!Worklist.empty()) {
+ const SCEV *Curr = Worklist.pop_back_val();
+ auto Users = SCEVUsers.find(Curr);
+ if (Users != SCEVUsers.end())
+ for (auto *User : Users->second)
+ if (ToForget.insert(User).second)
+ Worklist.push_back(User);
+ }
+
+ for (auto *S : ToForget)
+ forgetMemoizedResultsImpl(S);
for (auto I = PredicatedSCEVRewrites.begin();
I != PredicatedSCEVRewrites.end();) {
std::pair<const SCEV *, const Loop *> Entry = I->first;
- if (Entry.first == S)
+ if (ToForget.count(Entry.first))
PredicatedSCEVRewrites.erase(I++);
else
++I;
}
- auto RemoveSCEVFromBackedgeMap =
- [S](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+ auto RemoveSCEVFromBackedgeMap = [&ToForget](
+ DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
for (auto I = Map.begin(), E = Map.end(); I != E;) {
BackedgeTakenInfo &BEInfo = I->second;
- if (BEInfo.hasOperand(S))
+ if (any_of(ToForget,
+ [&BEInfo](const SCEV *S) { return BEInfo.hasOperand(S); }))
Map.erase(I++);
else
++I;
}
- };
+ };
RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
+void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
+ ValuesAtScopes.erase(S);
+ LoopDispositions.erase(S);
+ BlockDispositions.erase(S);
+ UnsignedRanges.erase(S);
+ SignedRanges.erase(S);
+ ExprValueMap.erase(S);
+ HasRecMap.erase(S);
+ MinTrailingZerosCache.erase(S);
+}
+
void
ScalarEvolution::getUsedLoops(const SCEV *S,
SmallPtrSetImpl<const Loop *> &LoopsUsed) {
@@ -13185,13 +13018,6 @@ ScalarEvolution::getUsedLoops(const SCEV *S,
SCEVTraversal<FindUsedLoops>(F).visitAll(S);
}
-void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
- SmallPtrSet<const Loop *, 8> LoopsUsed;
- getUsedLoops(S, LoopsUsed);
- for (auto *L : LoopsUsed)
- LoopUsers[L].push_back(S);
-}
-
void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
ScalarEvolution SE2(F, TLI, AC, DT, LI);
@@ -13282,6 +13108,23 @@ void ScalarEvolution::verify() const {
assert(ValidLoops.contains(AR->getLoop()) &&
"AddRec references invalid loop");
}
+
+ // Verify intergity of SCEV users.
+ for (const auto &S : UniqueSCEVs) {
+ SmallVector<const SCEV *, 4> Ops;
+ collectUniqueOps(&S, Ops);
+ for (const auto *Op : Ops) {
+ // We do not store dependencies of constants.
+ if (isa<SCEVConstant>(Op))
+ continue;
+ auto It = SCEVUsers.find(Op);
+ if (It != SCEVUsers.end() && It->second.count(&S))
+ continue;
+ dbgs() << "Use of operand " << *Op << " by user " << S
+ << " is not being tracked!\n";
+ std::abort();
+ }
+ }
}
bool ScalarEvolution::invalidate(
@@ -13685,6 +13528,16 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Loop &L)
: SE(SE), L(L) {}
+void ScalarEvolution::registerUser(const SCEV *User,
+ ArrayRef<const SCEV *> Ops) {
+ for (auto *Op : Ops)
+ // We do not expect that forgetting cached data for SCEVConstants will ever
+ // open any prospects for sharpening or introduce any correctness issues,
+ // so we don't bother storing their dependencies.
+ if (!isa<SCEVConstant>(Op))
+ SCEVUsers[Op].insert(User);
+}
+
const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
const SCEV *Expr = SE.getSCEV(V);
RewriteEntry &Entry = RewriteMap[Expr];
@@ -13897,52 +13750,51 @@ ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
return getUMinFromMismatchedTypes(ExitCounts);
}
-/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown
-/// components following the Map (Value -> SCEV)), but skips AddRecExpr because
-/// we cannot guarantee that the replacement is loop invariant in the loop of
-/// the AddRec.
+/// A rewriter to replace SCEV expressions in Map with the corresponding entry
+/// in the map. It skips AddRecExpr because we cannot guarantee that the
+/// replacement is loop invariant in the loop of the AddRec.
+///
+/// At the moment only rewriting SCEVUnknown and SCEVZeroExtendExpr is
+/// supported.
class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
- ValueToSCEVMapTy &Map;
+ const DenseMap<const SCEV *, const SCEV *> &Map;
public:
- SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M)
+ SCEVLoopGuardRewriter(ScalarEvolution &SE,
+ DenseMap<const SCEV *, const SCEV *> &M)
: SCEVRewriteVisitor(SE), Map(M) {}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; }
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- auto I = Map.find(Expr->getValue());
+ auto I = Map.find(Expr);
if (I == Map.end())
return Expr;
return I->second;
}
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitZeroExtendExpr(
+ Expr);
+ return I->second;
+ }
};
const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
+ SmallVector<const SCEV *> ExprsToRewrite;
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
- const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) {
- // If we have LHS == 0, check if LHS is computing a property of some unknown
- // SCEV %v which we can rewrite %v to express explicitly.
- const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
- if (Predicate == CmpInst::ICMP_EQ && RHSC &&
- RHSC->getValue()->isNullValue()) {
- // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
- // explicitly express that.
- const SCEV *URemLHS = nullptr;
- const SCEV *URemRHS = nullptr;
- if (matchURem(LHS, URemLHS, URemRHS)) {
- if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
- Value *V = LHSUnknown->getValue();
- auto Multiple =
- getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS,
- (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
- RewriteMap[V] = Multiple;
- return;
- }
- }
- }
-
- if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
+ const SCEV *RHS,
+ DenseMap<const SCEV *, const SCEV *>
+ &RewriteMap) {
+ // WARNING: It is generally unsound to apply any wrap flags to the proposed
+ // replacement SCEV which isn't directly implied by the structure of that
+ // SCEV. In particular, using contextual facts to imply flags is *NOT*
+ // legal. See the scoping rules for flags in the header to understand why.
+
+ // If LHS is a constant, apply information to the other expression.
+ if (isa<SCEVConstant>(LHS)) {
std::swap(LHS, RHS);
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
@@ -13950,7 +13802,8 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
// Check for a condition of the form (-C1 + X < C2). InstCombine will
// create this form when combining two checks of the form (X u< C2 + C1) and
// (X >=u C1).
- auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() {
+ auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap,
+ &ExprsToRewrite]() {
auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
if (!AddExpr || AddExpr->getNumOperands() != 2)
return false;
@@ -13968,26 +13821,55 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
// Bail out, unless we have a non-wrapping, monotonic range.
if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
return false;
- auto I = RewriteMap.find(LHSUnknown->getValue());
- const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
- RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
+ auto I = RewriteMap.find(LHSUnknown);
+ const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown;
+ RewriteMap[LHSUnknown] = getUMaxExpr(
getConstant(ExactRegion.getUnsignedMin()),
getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
+ ExprsToRewrite.push_back(LHSUnknown);
return true;
};
if (MatchRangeCheckIdiom())
return;
- // For now, limit to conditions that provide information about unknown
- // expressions. RHS also cannot contain add recurrences.
- auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);
- if (!LHSUnknown || containsAddRecurrence(RHS))
+ // If we have LHS == 0, check if LHS is computing a property of some unknown
+ // SCEV %v which we can rewrite %v to express explicitly.
+ const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
+ if (Predicate == CmpInst::ICMP_EQ && RHSC &&
+ RHSC->getValue()->isNullValue()) {
+ // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
+ // explicitly express that.
+ const SCEV *URemLHS = nullptr;
+ const SCEV *URemRHS = nullptr;
+ if (matchURem(LHS, URemLHS, URemRHS)) {
+ if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
+ auto Multiple = getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS);
+ RewriteMap[LHSUnknown] = Multiple;
+ ExprsToRewrite.push_back(LHSUnknown);
+ return;
+ }
+ }
+ }
+
+ // Do not apply information for constants or if RHS contains an AddRec.
+ if (isa<SCEVConstant>(LHS) || containsAddRecurrence(RHS))
+ return;
+
+ // If RHS is SCEVUnknown, make sure the information is applied to it.
+ if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
+ std::swap(LHS, RHS);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+
+ // Limit to expressions that can be rewritten.
+ if (!isa<SCEVUnknown>(LHS) && !isa<SCEVZeroExtendExpr>(LHS))
return;
// Check whether LHS has already been rewritten. In that case we want to
// chain further rewrites onto the already rewritten value.
- auto I = RewriteMap.find(LHSUnknown->getValue());
+ auto I = RewriteMap.find(LHS);
const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
+
const SCEV *RewrittenRHS = nullptr;
switch (Predicate) {
case CmpInst::ICMP_ULT:
@@ -14031,14 +13913,17 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
break;
}
- if (RewrittenRHS)
- RewriteMap[LHSUnknown->getValue()] = RewrittenRHS;
+ if (RewrittenRHS) {
+ RewriteMap[LHS] = RewrittenRHS;
+ if (LHS == RewrittenLHS)
+ ExprsToRewrite.push_back(LHS);
+ }
};
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
- ValueToSCEVMapTy RewriteMap;
+ DenseMap<const SCEV *, const SCEV *> RewriteMap;
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), L->getHeader());
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
@@ -14088,6 +13973,19 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
if (RewriteMap.empty())
return Expr;
+
+ // Now that all rewrite information is collect, rewrite the collected
+ // expressions with the information in the map. This applies information to
+ // sub-expressions.
+ if (ExprsToRewrite.size() > 1) {
+ for (const SCEV *Expr : ExprsToRewrite) {
+ const SCEV *RewriteTo = RewriteMap[Expr];
+ RewriteMap.erase(Expr);
+ SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
+ RewriteMap.insert({Expr, Rewriter.visit(RewriteTo)});
+ }
+ }
+
SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
return Rewriter.visit(Expr);
}
diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 2262fc9d7913..f4fa159d1ec7 100644
--- a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -23,6 +23,15 @@
#include "llvm/InitializePasses.h"
using namespace llvm;
+static bool canComputePointerDiff(ScalarEvolution &SE,
+ const SCEV *A, const SCEV *B) {
+ if (SE.getEffectiveSCEVType(A->getType()) !=
+ SE.getEffectiveSCEVType(B->getType()))
+ return false;
+
+ return SE.instructionCouldExistWitthOperands(A, B);
+}
+
AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB, AAQueryInfo &AAQI) {
// If either of the memory references is empty, it doesn't matter what the
@@ -41,8 +50,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
// If something is known about the difference between the two addresses,
// see if it's enough to prove a NoAlias.
- if (SE.getEffectiveSCEVType(AS->getType()) ==
- SE.getEffectiveSCEVType(BS->getType())) {
+ if (canComputePointerDiff(SE, AS, BS)) {
unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
APInt ASizeInt(BitWidth, LocA.Size.hasValue()
? LocA.Size.getValue()
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index ab5f2db7d1cd..9056cc01484d 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -257,14 +257,12 @@ void StackLifetime::calculateLiveIntervals() {
unsigned AllocaNo = It.second.AllocaNo;
if (IsStart) {
- assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart);
if (!Started.test(AllocaNo)) {
Started.set(AllocaNo);
Ended.reset(AllocaNo);
Start[AllocaNo] = InstNo;
}
} else {
- assert(!Ended.test(AllocaNo));
if (Started.test(AllocaNo)) {
LiveRanges[AllocaNo].addRange(Start[AllocaNo], InstNo);
Started.reset(AllocaNo);
@@ -400,3 +398,19 @@ PreservedAnalyses StackLifetimePrinterPass::run(Function &F,
SL.print(OS);
return PreservedAnalyses::all();
}
+
+void StackLifetimePrinterPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<StackLifetimePrinterPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ switch (Type) {
+ case StackLifetime::LivenessType::May:
+ OS << "may";
+ break;
+ case StackLifetime::LivenessType::Must:
+ OS << "must";
+ break;
+ }
+ OS << ">";
+}
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 76f195fedf31..74cc39b7f2c0 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
+#include <tuple>
using namespace llvm;
@@ -116,6 +117,7 @@ template <typename CalleeTy> struct UseInfo {
// Access range if the address (alloca or parameters).
// It is allowed to be empty-set when there are no known accesses.
ConstantRange Range;
+ std::map<const Instruction *, ConstantRange> Accesses;
// List of calls which pass address as an argument.
// Value is offset range of address from base address (alloca or calling
@@ -129,6 +131,12 @@ template <typename CalleeTy> struct UseInfo {
UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); }
+ void addRange(const Instruction *I, const ConstantRange &R) {
+ auto Ins = Accesses.emplace(I, R);
+ if (!Ins.second)
+ Ins.first->second = unionNoWrap(Ins.first->second, R);
+ updateRange(R);
+ }
};
template <typename CalleeTy>
@@ -146,7 +154,7 @@ raw_ostream &operator<<(raw_ostream &OS, const UseInfo<CalleeTy> &U) {
ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) {
const DataLayout &DL = AI.getModule()->getDataLayout();
TypeSize TS = DL.getTypeAllocSize(AI.getAllocatedType());
- unsigned PointerSize = DL.getMaxPointerSizeInBits();
+ unsigned PointerSize = DL.getPointerTypeSizeInBits(AI.getType());
// Fallback to empty range for alloca size.
ConstantRange R = ConstantRange::getEmpty(PointerSize);
if (TS.isScalable())
@@ -167,7 +175,7 @@ ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) {
if (Overflow)
return R;
}
- R = ConstantRange(APInt::getNullValue(PointerSize), APSize);
+ R = ConstantRange(APInt::getZero(PointerSize), APSize);
assert(!isUnsafe(R));
return R;
}
@@ -208,7 +216,6 @@ template <typename CalleeTy> struct FunctionInfo {
} else {
assert(Allocas.empty());
}
- O << "\n";
}
};
@@ -223,6 +230,7 @@ struct StackSafetyInfo::InfoTy {
struct StackSafetyGlobalInfo::InfoTy {
GVToSSI Info;
SmallPtrSet<const AllocaInst *, 8> SafeAllocas;
+ std::map<const Instruction *, bool> AccessIsUnsafe;
};
namespace {
@@ -242,7 +250,7 @@ class StackSafetyLocalAnalysis {
ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U,
Value *Base);
- bool analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS,
+ void analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS,
const StackLifetime &SL);
public:
@@ -297,8 +305,8 @@ ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base,
APInt APSize(PointerSize, Size.getFixedSize(), true);
if (APSize.isNegative())
return UnknownRange;
- return getAccessRange(
- Addr, Base, ConstantRange(APInt::getNullValue(PointerSize), APSize));
+ return getAccessRange(Addr, Base,
+ ConstantRange(APInt::getZero(PointerSize), APSize));
}
ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
@@ -321,14 +329,13 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
if (Sizes.getUpper().isNegative() || isUnsafe(Sizes))
return UnknownRange;
Sizes = Sizes.sextOrTrunc(PointerSize);
- ConstantRange SizeRange(APInt::getNullValue(PointerSize),
- Sizes.getUpper() - 1);
+ ConstantRange SizeRange(APInt::getZero(PointerSize), Sizes.getUpper() - 1);
return getAccessRange(U, Base, SizeRange);
}
/// The function analyzes all local uses of Ptr (alloca or argument) and
/// calculates local access range and all function calls where it was used.
-bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
+void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
UseInfo<GlobalValue> &US,
const StackLifetime &SL) {
SmallPtrSet<const Value *, 16> Visited;
@@ -349,11 +356,11 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
switch (I->getOpcode()) {
case Instruction::Load: {
if (AI && !SL.isAliveAfter(AI, I)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
- US.updateRange(
- getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+ US.addRange(I,
+ getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
break;
}
@@ -363,15 +370,16 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
case Instruction::Store: {
if (V == I->getOperand(0)) {
// Stored the pointer - conservatively assume it may be unsafe.
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
if (AI && !SL.isAliveAfter(AI, I)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
- US.updateRange(getAccessRange(
- UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+ US.addRange(
+ I, getAccessRange(
+ UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
break;
}
@@ -379,8 +387,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
// Information leak.
// FIXME: Process parameters correctly. This is a leak only if we return
// alloca.
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
case Instruction::Call:
case Instruction::Invoke: {
@@ -388,25 +396,31 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
break;
if (AI && !SL.isAliveAfter(AI, I)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr));
+ US.addRange(I, getMemIntrinsicAccessRange(MI, UI, Ptr));
break;
}
const auto &CB = cast<CallBase>(*I);
+ if (CB.getReturnedArgOperand() == V) {
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+
if (!CB.isArgOperand(&UI)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
unsigned ArgNo = CB.getArgOperandNo(&UI);
if (CB.isByValArgument(ArgNo)) {
- US.updateRange(getAccessRange(
- UI, Ptr, DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
+ US.addRange(I, getAccessRange(
+ UI, Ptr,
+ DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
break;
}
@@ -416,8 +430,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
const GlobalValue *Callee =
dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts());
if (!Callee) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
@@ -435,8 +449,6 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
}
}
}
-
- return true;
}
FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() {
@@ -468,7 +480,7 @@ FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() {
}
LLVM_DEBUG(Info.print(dbgs(), F.getName(), &F));
- LLVM_DEBUG(dbgs() << "[StackSafety] done\n");
+ LLVM_DEBUG(dbgs() << "\n[StackSafety] done\n");
return Info;
}
@@ -588,8 +600,7 @@ void StackSafetyDataFlowAnalysis<CalleeTy>::runDataFlow() {
updateAllNodes();
while (!WorkList.empty()) {
- const CalleeTy *Callee = WorkList.back();
- WorkList.pop_back();
+ const CalleeTy *Callee = WorkList.pop_back_val();
updateOneNode(Callee);
}
}
@@ -674,7 +685,7 @@ const Function *findCalleeInModule(const GlobalValue *GV) {
const GlobalAlias *A = dyn_cast<GlobalAlias>(GV);
if (!A)
return nullptr;
- GV = A->getBaseObject();
+ GV = A->getAliaseeObject();
if (GV == A)
return nullptr;
}
@@ -741,10 +752,8 @@ GVToSSI createGlobalStackSafetyInfo(
KV.second.Calls.clear();
}
- uint32_t PointerSize = Copy.begin()
- ->first->getParent()
- ->getDataLayout()
- .getMaxPointerSizeInBits();
+ uint32_t PointerSize =
+ Copy.begin()->first->getParent()->getDataLayout().getPointerSizeInBits();
StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy));
for (auto &F : SSDFA.run()) {
@@ -794,6 +803,7 @@ const StackSafetyInfo::InfoTy &StackSafetyInfo::getInfo() const {
void StackSafetyInfo::print(raw_ostream &O) const {
getInfo().Info.print(O, F->getName(), dyn_cast<Function>(F));
+ O << "\n";
}
const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
@@ -806,17 +816,22 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
}
}
Info.reset(new InfoTy{
- createGlobalStackSafetyInfo(std::move(Functions), Index), {}});
+ createGlobalStackSafetyInfo(std::move(Functions), Index), {}, {}});
+
for (auto &FnKV : Info->Info) {
for (auto &KV : FnKV.second.Allocas) {
++NumAllocaTotal;
const AllocaInst *AI = KV.first;
- if (getStaticAllocaSizeRange(*AI).contains(KV.second.Range)) {
+ auto AIRange = getStaticAllocaSizeRange(*AI);
+ if (AIRange.contains(KV.second.Range)) {
Info->SafeAllocas.insert(AI);
++NumAllocaStackSafe;
}
+ for (const auto &A : KV.second.Accesses)
+ Info->AccessIsUnsafe[A.first] |= !AIRange.contains(A.second);
}
}
+
if (StackSafetyPrint)
print(errs());
}
@@ -886,6 +901,15 @@ bool StackSafetyGlobalInfo::isSafe(const AllocaInst &AI) const {
return Info.SafeAllocas.count(&AI);
}
+bool StackSafetyGlobalInfo::stackAccessIsSafe(const Instruction &I) const {
+ const auto &Info = getInfo();
+ auto It = Info.AccessIsUnsafe.find(&I);
+ if (It == Info.AccessIsUnsafe.end()) {
+ return true;
+ }
+ return !It->second;
+}
+
void StackSafetyGlobalInfo::print(raw_ostream &O) const {
auto &SSI = getInfo().Info;
if (SSI.empty())
@@ -894,6 +918,16 @@ void StackSafetyGlobalInfo::print(raw_ostream &O) const {
for (auto &F : M.functions()) {
if (!F.isDeclaration()) {
SSI.find(&F)->second.print(O, F.getName(), &F);
+ O << " safe accesses:"
+ << "\n";
+ for (const auto &I : instructions(F)) {
+ const CallInst *Call = dyn_cast<CallInst>(&I);
+ if ((isa<StoreInst>(I) || isa<LoadInst>(I) || isa<MemIntrinsic>(I) ||
+ (Call && Call->hasByValArgument())) &&
+ stackAccessIsSafe(I)) {
+ O << " " << I << "\n";
+ }
+ }
O << "\n";
}
}
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index e93dc303ae63..3d10479c4544 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -1,9 +1,8 @@
//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -262,29 +261,58 @@ private:
class LoggerDataImpl {
const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
const TensorSpec RewardSpec;
+ const bool IncludeReward;
+
+ std::vector<tensorflow::FeatureList> FeatureLists;
+ tensorflow::FeatureList Reward;
+
+ bool isSelfConsistent(const tensorflow::SequenceExample &SE,
+ size_t NrRecords) const {
+ bool Ret = true;
+ for (const auto &TSpecs : LoggedFeatureSpecs) {
+ const auto &Name = TSpecs.getLoggingName();
+ const auto &FL = SE.feature_lists().feature_list().at(Name).feature();
+ if (NrRecords != static_cast<size_t>(FL.size())) {
+ dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected "
+ << NrRecords << " got " << FL.size() << "\n";
+ Ret = false;
+ }
+ }
+ if (IncludeReward && static_cast<size_t>(SE.feature_lists()
+ .feature_list()
+ .at(RewardSpec.name())
+ .feature()
+ .size()) != NrRecords) {
+ dbgs() << "[TF-UTILS]: reward is missing records.\n";
+ Ret = false;
+ }
+ return Ret;
+ }
- tensorflow::SequenceExample SE;
- std::vector<tensorflow::FeatureList *> FeatureLists;
- tensorflow::FeatureList *Reward = nullptr;
-
-public:
- LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec) {
+ void transferLog(tensorflow::SequenceExample &SE) {
auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
if (IncludeReward)
- Reward = &(*FL)[RewardSpec.name()];
- // Allocate first the map entries, then capture their address. We will not
- // mutate the set of features after this (i.e. the pointers won't dangle).
- for (const auto &LFS : LoggedSpecs) {
- (*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()] = {};
+ (*FL)[RewardSpec.name()] = std::move(Reward);
+ assert(FeatureLists.size() == LoggedFeatureSpecs.size());
+ for (size_t I = 0; I < FeatureLists.size(); ++I) {
+ const auto &LFS = LoggedFeatureSpecs[I];
+ (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]);
}
- for (const auto &LFS : LoggedSpecs)
- FeatureLists.push_back(
- &(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()]);
}
- void print(raw_ostream &OS) {
+public:
+ LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
+ const TensorSpec &RewardSpec, bool IncludeReward)
+ : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec),
+ IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {}
+
+ // flush the logged info to a stream and clear the log contents.
+ void flush(raw_ostream &OS) {
+ size_t NrRecords = getNrRecords();
+ (void)NrRecords;
+ tensorflow::SequenceExample SE;
+ transferLog(SE);
+ assert(isSelfConsistent(SE, NrRecords));
std::string OutStr;
if (ProtobufTextMode)
google::protobuf::TextFormat::PrintToString(SE, &OutStr);
@@ -298,14 +326,14 @@ public:
const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
if (Spec.isElementType<float>()) {
auto *RF = FeatureLists[FeatureID]
- ->add_feature()
+ .add_feature()
->mutable_float_list()
->mutable_value();
RF->Resize(Spec.getElementCount(), 0.0);
return reinterpret_cast<char *>(RF->mutable_data());
} else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
auto *RF = FeatureLists[FeatureID]
- ->add_feature()
+ .add_feature()
->mutable_int64_list()
->mutable_value();
RF->Resize(Spec.getElementCount(), 0);
@@ -315,17 +343,18 @@ public:
}
template <typename T> void logReward(T Value) {
+ assert(IncludeReward);
if (RewardSpec.isElementType<float>())
- Reward->add_feature()->mutable_float_list()->add_value(Value);
+ Reward.add_feature()->mutable_float_list()->add_value(Value);
else if (RewardSpec.isElementType<int32_t>() ||
RewardSpec.isElementType<int64_t>())
- Reward->add_feature()->mutable_int64_list()->add_value(Value);
+ Reward.add_feature()->mutable_int64_list()->add_value(Value);
else
llvm_unreachable("Unsupported tensor type.");
}
size_t getNrRecords() const {
- return FeatureLists.empty() ? 0 : FeatureLists[0]->feature().size();
+ return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size();
}
};
} // namespace llvm
@@ -538,5 +567,5 @@ char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
}
-void Logger::print(raw_ostream &OS) { LoggerData->print(OS); }
+void Logger::flush(raw_ostream &OS) { LoggerData->flush(OS); }
#endif // defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 4a8818f2e2a8..7326ba74c071 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -123,6 +123,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// Set IO unlocked variants as unavailable
// Set them as available per system below
+ TLI.setUnavailable(LibFunc_getc_unlocked);
TLI.setUnavailable(LibFunc_getchar_unlocked);
TLI.setUnavailable(LibFunc_putc_unlocked);
TLI.setUnavailable(LibFunc_putchar_unlocked);
@@ -156,15 +157,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// isn't true for a target those defaults should be overridden below.
TLI.setIntSize(T.isArch16Bit() ? 16 : 32);
- if (T.isAMDGPU())
- TLI.disableAllFunctions();
-
- // There are no library implementations of memcpy and memset for AMD gpus and
- // these can be difficult to lower in the backend.
+ // There is really no runtime library on AMDGPU, apart from
+ // __kmpc_alloc/free_shared.
if (T.isAMDGPU()) {
- TLI.setUnavailable(LibFunc_memcpy);
- TLI.setUnavailable(LibFunc_memset);
- TLI.setUnavailable(LibFunc_memset_pattern16);
+ TLI.disableAllFunctions();
TLI.setAvailable(llvm::LibFunc___kmpc_alloc_shared);
TLI.setAvailable(llvm::LibFunc___kmpc_free_shared);
return;
@@ -418,6 +414,65 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_utimes);
}
+ // Pick just one set of new/delete variants.
+ if (T.isOSMSVCRT()) {
+ // MSVC, doesn't have the Itanium new/delete.
+ TLI.setUnavailable(LibFunc_ZdaPv);
+ TLI.setUnavailable(LibFunc_ZdaPvRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdaPvSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdaPvj);
+ TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdaPvm);
+ TLI.setUnavailable(LibFunc_ZdaPvmSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPv);
+ TLI.setUnavailable(LibFunc_ZdlPvRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdlPvSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdlPvj);
+ TLI.setUnavailable(LibFunc_ZdlPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPvm);
+ TLI.setUnavailable(LibFunc_ZdlPvmSt11align_val_t);
+ TLI.setUnavailable(LibFunc_Znaj);
+ TLI.setUnavailable(LibFunc_ZnajRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znam);
+ TLI.setUnavailable(LibFunc_ZnamRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnamSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnamSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwj);
+ TLI.setUnavailable(LibFunc_ZnwjRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwm);
+ TLI.setUnavailable(LibFunc_ZnwmRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwmSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t);
+ } else {
+ // Not MSVC, assume it's Itanium.
+ TLI.setUnavailable(LibFunc_msvc_new_int);
+ TLI.setUnavailable(LibFunc_msvc_new_int_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_new_longlong);
+ TLI.setUnavailable(LibFunc_msvc_new_longlong_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr32);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr32_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr32_int);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr64);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr64_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr64_longlong);
+ TLI.setUnavailable(LibFunc_msvc_new_array_int);
+ TLI.setUnavailable(LibFunc_msvc_new_array_int_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_new_array_longlong);
+ TLI.setUnavailable(LibFunc_msvc_new_array_longlong_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr32);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr32_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr32_int);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr64);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr64_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr64_longlong);
+ }
+
switch (T.getOS()) {
case Triple::MacOSX:
// exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
@@ -572,6 +627,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_sinh_finite);
TLI.setUnavailable(LibFunc_sinhf_finite);
TLI.setUnavailable(LibFunc_sinhl_finite);
+ TLI.setUnavailable(LibFunc_sqrt_finite);
+ TLI.setUnavailable(LibFunc_sqrtf_finite);
+ TLI.setUnavailable(LibFunc_sqrtl_finite);
}
if ((T.isOSLinux() && T.isGNUEnvironment()) ||
@@ -589,6 +647,140 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailable(LibFunc_fgets_unlocked);
}
+ if (T.isAndroid() && T.isAndroidVersionLT(21)) {
+ TLI.setUnavailable(LibFunc_stpcpy);
+ TLI.setUnavailable(LibFunc_stpncpy);
+ }
+
+ if (T.isPS4()) {
+ // PS4 does have memalign.
+ TLI.setAvailable(LibFunc_memalign);
+
+ // PS4 does not have new/delete with "unsigned int" size parameter;
+ // it only has the "unsigned long" versions.
+ TLI.setUnavailable(LibFunc_ZdaPvj);
+ TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPvj);
+ TLI.setUnavailable(LibFunc_ZdlPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_Znaj);
+ TLI.setUnavailable(LibFunc_ZnajRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwj);
+ TLI.setUnavailable(LibFunc_ZnwjRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t);
+
+ // None of the *_chk functions.
+ TLI.setUnavailable(LibFunc_memccpy_chk);
+ TLI.setUnavailable(LibFunc_memcpy_chk);
+ TLI.setUnavailable(LibFunc_memmove_chk);
+ TLI.setUnavailable(LibFunc_mempcpy_chk);
+ TLI.setUnavailable(LibFunc_memset_chk);
+ TLI.setUnavailable(LibFunc_snprintf_chk);
+ TLI.setUnavailable(LibFunc_sprintf_chk);
+ TLI.setUnavailable(LibFunc_stpcpy_chk);
+ TLI.setUnavailable(LibFunc_stpncpy_chk);
+ TLI.setUnavailable(LibFunc_strcat_chk);
+ TLI.setUnavailable(LibFunc_strcpy_chk);
+ TLI.setUnavailable(LibFunc_strlcat_chk);
+ TLI.setUnavailable(LibFunc_strlcat_chk);
+ TLI.setUnavailable(LibFunc_strlcpy_chk);
+ TLI.setUnavailable(LibFunc_strlen_chk);
+ TLI.setUnavailable(LibFunc_strncat_chk);
+ TLI.setUnavailable(LibFunc_strncpy_chk);
+ TLI.setUnavailable(LibFunc_vsnprintf_chk);
+ TLI.setUnavailable(LibFunc_vsprintf_chk);
+
+ // Various Posix system functions.
+ TLI.setUnavailable(LibFunc_access);
+ TLI.setUnavailable(LibFunc_chmod);
+ TLI.setUnavailable(LibFunc_chown);
+ TLI.setUnavailable(LibFunc_closedir);
+ TLI.setUnavailable(LibFunc_ctermid);
+ TLI.setUnavailable(LibFunc_execl);
+ TLI.setUnavailable(LibFunc_execle);
+ TLI.setUnavailable(LibFunc_execlp);
+ TLI.setUnavailable(LibFunc_execv);
+ TLI.setUnavailable(LibFunc_execvP);
+ TLI.setUnavailable(LibFunc_execve);
+ TLI.setUnavailable(LibFunc_execvp);
+ TLI.setUnavailable(LibFunc_execvpe);
+ TLI.setUnavailable(LibFunc_fork);
+ TLI.setUnavailable(LibFunc_fstat);
+ TLI.setUnavailable(LibFunc_fstatvfs);
+ TLI.setUnavailable(LibFunc_getenv);
+ TLI.setUnavailable(LibFunc_getitimer);
+ TLI.setUnavailable(LibFunc_getlogin_r);
+ TLI.setUnavailable(LibFunc_getpwnam);
+ TLI.setUnavailable(LibFunc_gettimeofday);
+ TLI.setUnavailable(LibFunc_lchown);
+ TLI.setUnavailable(LibFunc_lstat);
+ TLI.setUnavailable(LibFunc_mkdir);
+ TLI.setUnavailable(LibFunc_open);
+ TLI.setUnavailable(LibFunc_opendir);
+ TLI.setUnavailable(LibFunc_pclose);
+ TLI.setUnavailable(LibFunc_popen);
+ TLI.setUnavailable(LibFunc_pread);
+ TLI.setUnavailable(LibFunc_pwrite);
+ TLI.setUnavailable(LibFunc_read);
+ TLI.setUnavailable(LibFunc_readlink);
+ TLI.setUnavailable(LibFunc_realpath);
+ TLI.setUnavailable(LibFunc_rename);
+ TLI.setUnavailable(LibFunc_rmdir);
+ TLI.setUnavailable(LibFunc_setitimer);
+ TLI.setUnavailable(LibFunc_stat);
+ TLI.setUnavailable(LibFunc_statvfs);
+ TLI.setUnavailable(LibFunc_system);
+ TLI.setUnavailable(LibFunc_times);
+ TLI.setUnavailable(LibFunc_tmpfile);
+ TLI.setUnavailable(LibFunc_unlink);
+ TLI.setUnavailable(LibFunc_uname);
+ TLI.setUnavailable(LibFunc_unsetenv);
+ TLI.setUnavailable(LibFunc_utime);
+ TLI.setUnavailable(LibFunc_utimes);
+ TLI.setUnavailable(LibFunc_valloc);
+ TLI.setUnavailable(LibFunc_write);
+
+ // Miscellaneous other functions not provided.
+ TLI.setUnavailable(LibFunc_atomic_load);
+ TLI.setUnavailable(LibFunc_atomic_store);
+ TLI.setUnavailable(LibFunc___kmpc_alloc_shared);
+ TLI.setUnavailable(LibFunc___kmpc_free_shared);
+ TLI.setUnavailable(LibFunc_dunder_strndup);
+ TLI.setUnavailable(LibFunc_bcmp);
+ TLI.setUnavailable(LibFunc_bcopy);
+ TLI.setUnavailable(LibFunc_bzero);
+ TLI.setUnavailable(LibFunc_cabs);
+ TLI.setUnavailable(LibFunc_cabsf);
+ TLI.setUnavailable(LibFunc_cabsl);
+ TLI.setUnavailable(LibFunc_ffs);
+ TLI.setUnavailable(LibFunc_flockfile);
+ TLI.setUnavailable(LibFunc_fseeko);
+ TLI.setUnavailable(LibFunc_ftello);
+ TLI.setUnavailable(LibFunc_ftrylockfile);
+ TLI.setUnavailable(LibFunc_funlockfile);
+ TLI.setUnavailable(LibFunc_htonl);
+ TLI.setUnavailable(LibFunc_htons);
+ TLI.setUnavailable(LibFunc_isascii);
+ TLI.setUnavailable(LibFunc_memccpy);
+ TLI.setUnavailable(LibFunc_mempcpy);
+ TLI.setUnavailable(LibFunc_memrchr);
+ TLI.setUnavailable(LibFunc_ntohl);
+ TLI.setUnavailable(LibFunc_ntohs);
+ TLI.setUnavailable(LibFunc_reallocf);
+ TLI.setUnavailable(LibFunc_roundeven);
+ TLI.setUnavailable(LibFunc_roundevenf);
+ TLI.setUnavailable(LibFunc_roundevenl);
+ TLI.setUnavailable(LibFunc_stpcpy);
+ TLI.setUnavailable(LibFunc_stpncpy);
+ TLI.setUnavailable(LibFunc_strlcat);
+ TLI.setUnavailable(LibFunc_strlcpy);
+ TLI.setUnavailable(LibFunc_strndup);
+ TLI.setUnavailable(LibFunc_strnlen);
+ TLI.setUnavailable(LibFunc_toascii);
+ }
+
// As currently implemented in clang, NVPTX code has no standard library to
// speak of. Headers provide a standard-ish library implementation, but many
// of the signatures are wrong -- for example, many libm functions are not
@@ -691,7 +883,7 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&
static StringRef sanitizeFunctionName(StringRef funcName) {
// Filter out empty names and names containing null bytes, those can't be in
// our table.
- if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ if (funcName.empty() || funcName.contains('\0'))
return StringRef();
// Check for \01 prefix that is used to mangle __asm declarations and
@@ -716,12 +908,12 @@ bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const {
bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
LibFunc F,
- const DataLayout *DL) const {
- LLVMContext &Ctx = FTy.getContext();
- Type *SizeTTy = DL ? DL->getIntPtrType(Ctx, /*AddressSpace=*/0) : nullptr;
- auto IsSizeTTy = [SizeTTy](Type *Ty) {
- return SizeTTy ? Ty == SizeTTy : Ty->isIntegerTy();
- };
+ const Module &M) const {
+ // FIXME: There is really no guarantee that sizeof(size_t) is equal to
+ // sizeof(int*) for every target. So the assumption used here to derive the
+ // SizeTBits based on the size of an integer pointer in address space zero
+ // isn't always valid.
+ unsigned SizeTBits = M.getDataLayout().getPointerSizeInBits(/*AddrSpace=*/0);
unsigned NumParams = FTy.getNumParams();
switch (F) {
@@ -745,12 +937,12 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getReturnType()->isIntegerTy(32));
case LibFunc_strlen_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strlen:
- return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getReturnType()->isIntegerTy());
+ return NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(SizeTBits);
case LibFunc_strchr:
case LibFunc_strrchr:
@@ -770,7 +962,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(1)->isPointerTy());
case LibFunc_strcat_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strcat:
@@ -780,19 +972,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_strncat_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strncat:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_strcpy_chk:
case LibFunc_stpcpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strcpy:
@@ -804,20 +996,20 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_strlcat_chk:
case LibFunc_strlcpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strlcat:
case LibFunc_strlcpy:
- return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) &&
+ return NumParams == 3 && FTy.getReturnType()->isIntegerTy(SizeTBits) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(2));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits);
case LibFunc_strncpy_chk:
case LibFunc_stpncpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strncpy:
@@ -825,7 +1017,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_strxfrm:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
@@ -840,7 +1032,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(0) == FTy.getParamType(1) &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_strspn:
case LibFunc_strcspn:
@@ -888,20 +1080,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_sprintf_chk:
return NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(2)) &&
+ FTy.getParamType(2)->isIntegerTy(SizeTBits) &&
FTy.getParamType(3)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32);
case LibFunc_snprintf:
- return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(2)->isPointerTy() &&
- FTy.getReturnType()->isIntegerTy(32));
+ return NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isIntegerTy(SizeTBits) &&
+ FTy.getParamType(2)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32);
case LibFunc_snprintf_chk:
return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)) &&
+ FTy.getParamType(1)->isIntegerTy(SizeTBits) &&
FTy.getParamType(2)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(3)) &&
+ FTy.getParamType(3)->isIntegerTy(SizeTBits) &&
FTy.getParamType(4)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32);
@@ -915,16 +1108,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vec_malloc:
return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
case LibFunc_memcmp:
- return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
- FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(1)->isPointerTy());
+ return NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getParamType(2)->isIntegerTy(SizeTBits);
case LibFunc_memchr:
case LibFunc_memrchr:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(1)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_modf:
case LibFunc_modff:
case LibFunc_modfl:
@@ -934,7 +1128,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_mempcpy_chk:
case LibFunc_memmove_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_memcpy:
@@ -943,22 +1137,22 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_memset_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_memset:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_memccpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_memccpy:
@@ -970,7 +1164,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vec_realloc:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
- IsSizeTTy(FTy.getParamType(1)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits));
case LibFunc_read:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
case LibFunc_rewind:
@@ -1051,7 +1245,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams != 0 && FTy.getParamType(0)->isPointerTy());
case LibFunc___kmpc_free_shared:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits));
case LibFunc_fopen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
@@ -1141,14 +1335,14 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vsprintf_chk:
return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy();
+ FTy.getParamType(2)->isIntegerTy(SizeTBits) && FTy.getParamType(3)->isPointerTy();
case LibFunc_vsnprintf:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
case LibFunc_vsnprintf_chk:
return NumParams == 6 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy();
+ FTy.getParamType(3)->isIntegerTy(SizeTBits) && FTy.getParamType(4)->isPointerTy();
case LibFunc_open:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
case LibFunc_opendir:
@@ -1560,12 +1754,13 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_strnlen:
return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(1) &&
FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits));
case LibFunc_posix_memalign:
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)) && IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits) &&
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_wcslen:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
@@ -1605,10 +1800,11 @@ bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
// avoid string normalization and comparison.
if (FDecl.isIntrinsic()) return false;
- const DataLayout *DL =
- FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
+ const Module *M = FDecl.getParent();
+ assert(M && "Expecting FDecl to be connected to a Module.");
+
return getLibFunc(FDecl.getName(), F) &&
- isValidProtoForLibFunc(*FDecl.getFunctionType(), F, DL);
+ isValidProtoForLibFunc(*FDecl.getFunctionType(), F, *M);
}
void TargetLibraryInfoImpl::disableAllFunctions() {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 304d24fe8e4a..5067f493f02d 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -167,11 +167,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
ExitBlock = BB;
- TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType()));
-
- if (!EC->getType()->isPointerTy() && EC->getType() != CountType)
- TripCount = SE.getZeroExtendExpr(TripCount, CountType);
-
+ ExitCount = EC;
break;
}
@@ -263,10 +259,20 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
+bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
+ unsigned AS) const {
+ return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+}
+
unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
return TTIImpl->getAssumedAddrSpace(V);
}
+std::pair<const Value *, unsigned>
+TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
+ return TTIImpl->getPredicatedAddrSpace(V);
+}
+
Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
IntrinsicInst *II, Value *OldV, Value *NewV) const {
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -317,8 +323,9 @@ Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
}
void TargetTransformInfo::getUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
- return TTIImpl->getUnrollingPreferences(L, SE, UP);
+ Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ return TTIImpl->getUnrollingPreferences(L, SE, UP, ORE);
}
void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
@@ -409,6 +416,10 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedExpandLoad(DataType);
}
+bool TargetTransformInfo::enableOrderedReductions() const {
+ return TTIImpl->enableOrderedReductions();
+}
+
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
@@ -598,6 +609,10 @@ Optional<unsigned> TargetTransformInfo::getMaxVScale() const {
return TTIImpl->getMaxVScale();
}
+Optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
+ return TTIImpl->getVScaleForTuning();
+}
+
bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
return TTIImpl->shouldMaximizeVectorBandwidth();
}
@@ -818,6 +833,15 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
return Cost;
}
+InstructionCost TargetTransformInfo::getReplicationShuffleCost(
+ Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
+ EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost TargetTransformInfo::getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 20d718f4fad3..23dbb32f38de 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -521,21 +521,21 @@ static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
return Ret;
}
-void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
- if (Merge) {
- N.TBAA =
- MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
- N.TBAAStruct = nullptr;
- N.Scope = MDNode::getMostGenericAliasScope(
- N.Scope, getMetadata(LLVMContext::MD_alias_scope));
- N.NoAlias =
- MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
- } else {
- N.TBAA = getMetadata(LLVMContext::MD_tbaa);
- N.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct);
- N.Scope = getMetadata(LLVMContext::MD_alias_scope);
- N.NoAlias = getMetadata(LLVMContext::MD_noalias);
- }
+AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
+ AAMDNodes Result;
+ Result.TBAA = MDNode::getMostGenericTBAA(TBAA, Other.TBAA);
+ Result.TBAAStruct = nullptr;
+ Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
+ Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ return Result;
+}
+
+AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
+ AAMDNodes Result;
+ Result.TBAA = Result.TBAAStruct = nullptr;
+ Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
+ Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ return Result;
}
static const MDNode *createAccessTag(const MDNode *AccessType) {
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index f015ba9a09ca..80051fd5f7c1 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -126,7 +126,8 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
Offset->getZExtValue(), CI, DT);
}
-Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
+Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
+ Constant *TopLevelGlobal) {
if (I->getType()->isPointerTy()) {
if (Offset == 0)
return I;
@@ -142,7 +143,8 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
unsigned Op = SL->getElementContainingOffset(Offset);
return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset - SL->getElementOffset(Op), M);
+ Offset - SL->getElementOffset(Op), M,
+ TopLevelGlobal);
}
if (auto *C = dyn_cast<ConstantArray>(I)) {
ArrayType *VTableTy = C->getType();
@@ -153,7 +155,62 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
return nullptr;
return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset % ElemSize, M);
+ Offset % ElemSize, M, TopLevelGlobal);
+ }
+
+ // (Swift-specific) relative-pointer support starts here.
+ if (auto *CI = dyn_cast<ConstantInt>(I)) {
+ if (Offset == 0 && CI->getZExtValue() == 0) {
+ return I;
+ }
+ }
+ if (auto *C = dyn_cast<ConstantExpr>(I)) {
+ switch (C->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::PtrToInt:
+ return getPointerAtOffset(cast<Constant>(C->getOperand(0)), Offset, M,
+ TopLevelGlobal);
+ case Instruction::Sub: {
+ auto *Operand0 = cast<Constant>(C->getOperand(0));
+ auto *Operand1 = cast<Constant>(C->getOperand(1));
+
+ auto StripGEP = [](Constant *C) {
+ auto *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE)
+ return C;
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return C;
+ return CE->getOperand(0);
+ };
+ auto *Operand1TargetGlobal = StripGEP(getPointerAtOffset(Operand1, 0, M));
+
+ // Check that in the "sub (@a, @b)" expression, @b points back to the top
+ // level global (or a GEP thereof) that we're processing. Otherwise bail.
+ if (Operand1TargetGlobal != TopLevelGlobal)
+ return nullptr;
+
+ return getPointerAtOffset(Operand0, Offset, M, TopLevelGlobal);
+ }
+ default:
+ return nullptr;
+ }
}
return nullptr;
}
+
+void llvm::replaceRelativePointerUsersWithZero(Function *F) {
+ for (auto *U : F->users()) {
+ auto *PtrExpr = dyn_cast<ConstantExpr>(U);
+ if (!PtrExpr || PtrExpr->getOpcode() != Instruction::PtrToInt)
+ continue;
+
+ for (auto *PtrToIntUser : PtrExpr->users()) {
+ auto *SubExpr = dyn_cast<ConstantExpr>(PtrToIntUser);
+ if (!SubExpr || SubExpr->getOpcode() != Instruction::Sub)
+ continue;
+
+ SubExpr->replaceNonMetadataUsesWith(
+ ConstantInt::get(SubExpr->getType(), 0));
+ }
+ }
+}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 522d21812c6a..1c41c77a8cfb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -84,6 +84,17 @@ using namespace llvm::PatternMatch;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
+// According to the LangRef, branching on a poison condition is absolutely
+// immediate full UB. However, historically we haven't implemented that
+// consistently as we have an important transformation (non-trivial unswitch)
+// which introduces instances of branch on poison/undef to otherwise well
+// defined programs. This flag exists to let us test optimization benefit
+// of exploiting the specified behavior (in combination with enabling the
+// unswitch fix.)
+static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub",
+ cl::Hidden, cl::init(false));
+
+
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -165,8 +176,8 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
int NumElts =
cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements();
- DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts);
- if (DemandedElts.isNullValue())
+ DemandedLHS = DemandedRHS = APInt::getZero(NumElts);
+ if (DemandedElts.isZero())
return true;
// Simple case of a shuffle with zeroinitializer.
if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) {
@@ -206,7 +217,7 @@ static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
- FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1);
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
computeKnownBits(V, DemandedElts, Known, Depth, Q);
}
@@ -279,16 +290,11 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
}
-bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
- for (const User *U : CxtI->users()) {
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (IC->isEquality())
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- return false;
- }
- return true;
+bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
+ return !I->user_empty() && all_of(I->users(), [](const User *U) {
+ ICmpInst::Predicate P;
+ return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
+ });
}
static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
@@ -378,7 +384,7 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
- FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1);
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
return ComputeNumSignBits(V, DemandedElts, Depth, Q);
}
@@ -390,6 +396,14 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
+unsigned llvm::ComputeMinSignedBits(const Value *V, const DataLayout &DL,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
+ return V->getType()->getScalarSizeInBits() - SignBits + 1;
+}
+
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
bool NSW, const APInt &DemandedElts,
KnownBits &KnownOut, KnownBits &Known2,
@@ -499,7 +513,9 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
if (V == E)
return true;
- if (V == I || isSafeToSpeculativelyExecute(V)) {
+ if (V == I || (isa<Instruction>(V) &&
+ !cast<Instruction>(V)->mayHaveSideEffects() &&
+ !cast<Instruction>(V)->isTerminator())) {
EphValues.insert(V);
if (const User *U = dyn_cast<User>(V))
append_range(WorkSet, U->operands());
@@ -547,10 +563,9 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
// We limit the scan distance between the assume and its context instruction
// to avoid a compile-time explosion. This limit is chosen arbitrarily, so
// it can be adjusted if needed (could be turned into a cl::opt).
- unsigned ScanLimit = 15;
- for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I)
- if (!isGuaranteedToTransferExecutionToSuccessor(&*I) || --ScanLimit == 0)
- return false;
+ auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
+ if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15))
+ return false;
return !isEphemeralValueOf(Inv, CxtI);
}
@@ -582,7 +597,7 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
return false;
ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
- return !TrueValues.contains(APInt::getNullValue(C->getBitWidth()));
+ return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
}
static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
@@ -641,7 +656,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (V->getType()->isPointerTy()) {
if (RetainedKnowledge RK = getKnowledgeValidInContext(
V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
- Known.Zero.setLowBits(Log2_32(RK.ArgValue));
+ Known.Zero.setLowBits(Log2_64(RK.ArgValue));
}
}
@@ -1210,7 +1225,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// (dependent on endian) to form the full result of known bits.
unsigned NumElts = DemandedElts.getBitWidth();
unsigned SubScale = BitWidth / SubBitWidth;
- APInt SubDemandedElts = APInt::getNullValue(NumElts * SubScale);
+ APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i])
SubDemandedElts.setBit(i * SubScale);
@@ -1383,7 +1398,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
Known = KnownBits::computeForAddSub(
/*Add=*/true, /*NSW=*/false, Known, IndexBits);
}
- if (!Known.isUnknown() && !AccConstIndices.isNullValue()) {
+ if (!Known.isUnknown() && !AccConstIndices.isZero()) {
KnownBits Index = KnownBits::makeConstant(AccConstIndices);
Known = KnownBits::computeForAddSub(
/*Add=*/true, /*NSW=*/false, Known, Index);
@@ -1512,7 +1527,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// taking conservative care to avoid excessive recursion.
if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) {
// Skip if every incoming value references to ourself.
- if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
+ if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
break;
Known.Zero.setAllBits();
@@ -1689,6 +1704,33 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (BitWidth >= 32)
Known.Zero.setBitsFrom(31);
break;
+ case Intrinsic::vscale: {
+ if (!II->getParent() || !II->getFunction() ||
+ !II->getFunction()->hasFnAttribute(Attribute::VScaleRange))
+ break;
+
+ auto VScaleRange = II->getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs();
+
+ if (VScaleRange.second == 0)
+ break;
+
+ // If vscale min = max then we know the exact value at compile time
+ // and hence we know the exact bits.
+ if (VScaleRange.first == VScaleRange.second) {
+ Known.One = VScaleRange.first;
+ Known.Zero = VScaleRange.first;
+ Known.Zero.flipAllBits();
+ break;
+ }
+
+ unsigned FirstZeroHighBit = 32 - countLeadingZeros(VScaleRange.second);
+ if (FirstZeroHighBit < BitWidth)
+ Known.Zero.setBitsFrom(FirstZeroHighBit);
+
+ break;
+ }
}
}
break;
@@ -1763,7 +1805,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
- APInt DemandedVecElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedVecElts = APInt::getAllOnes(NumElts);
if (CIdx && CIdx->getValue().ult(NumElts))
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
@@ -2248,7 +2290,7 @@ static bool isNonZeroRecurrence(const PHINode *PN) {
Value *Start = nullptr, *Step = nullptr;
const APInt *StartC, *StepC;
if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
- !match(Start, m_APInt(StartC)) || StartC->isNullValue())
+ !match(Start, m_APInt(StartC)) || StartC->isZero())
return false;
switch (BO->getOpcode()) {
@@ -2260,7 +2302,7 @@ static bool isNonZeroRecurrence(const PHINode *PN) {
StartC->isNegative() == StepC->isNegative());
case Instruction::Mul:
return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
- match(Step, m_APInt(StepC)) && !StepC->isNullValue();
+ match(Step, m_APInt(StepC)) && !StepC->isZero();
case Instruction::Shl:
return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
case Instruction::AShr:
@@ -2532,7 +2574,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
auto *CIdx = dyn_cast<ConstantInt>(Idx);
if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
unsigned NumElts = VecTy->getNumElements();
- APInt DemandedVecElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedVecElts = APInt::getAllOnes(NumElts);
if (CIdx && CIdx->getValue().ult(NumElts))
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
@@ -2559,7 +2601,7 @@ bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) {
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
- FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1);
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
return isKnownNonZero(V, DemandedElts, Depth, Q);
}
@@ -2694,8 +2736,7 @@ static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
const APInt *C;
return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
(OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
- !C->isNullValue() && !C->isOneValue() &&
- isKnownNonZero(V1, Depth + 1, Q);
+ !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q);
}
return false;
}
@@ -2708,7 +2749,7 @@ static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
const APInt *C;
return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
(OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
- !C->isNullValue() && isKnownNonZero(V1, Depth + 1, Q);
+ !C->isZero() && isKnownNonZero(V1, Depth + 1, Q);
}
return false;
}
@@ -3051,7 +3092,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// If the input is known to be 0 or 1, the output is 0/-1, which is
// all sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return TyBits;
// If we are subtracting one from a positive number, there is no carry
@@ -3075,7 +3116,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is
// all sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return TyBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -4533,6 +4574,12 @@ AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
if (OffsetZero && !GEP->hasAllZeroIndices())
return nullptr;
AddWork(GEP->getPointerOperand());
+ } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
+ Value *Returned = CB->getReturnedArgOperand();
+ if (Returned)
+ AddWork(Returned);
+ else
+ return nullptr;
} else {
return nullptr;
}
@@ -4614,7 +4661,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
if (*Denominator == 0)
return false;
// It's safe to hoist if the denominator is not 0 or -1.
- if (!Denominator->isAllOnesValue())
+ if (!Denominator->isAllOnes())
return true;
// At this point we know that the denominator is -1. It is safe to hoist as
// long we know that the numerator is not INT_MIN.
@@ -4922,15 +4969,14 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
}
-static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
- // See whether I has flags that may create poison
- if (const auto *OvOp = dyn_cast<OverflowingBinaryOperator>(Op)) {
- if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap())
- return true;
- }
- if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(Op))
- if (ExactOp->isExact())
- return true;
+static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
+ bool ConsiderFlags) {
+
+ if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
+ return true;
+
+ // TODO: this should really be under the ConsiderFlags block, but currently
+ // these are not dropped by dropPoisonGeneratingFlags
if (const auto *FP = dyn_cast<FPMathOperator>(Op)) {
auto FMF = FP->getFastMathFlags();
if (FMF.noNaNs() || FMF.noInfs())
@@ -5019,10 +5065,10 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
case Instruction::ICmp:
case Instruction::FCmp:
return false;
- case Instruction::GetElementPtr: {
- const auto *GEP = cast<GEPOperator>(Op);
- return GEP->isInBounds();
- }
+ case Instruction::GetElementPtr:
+ // inbounds is handled above
+ // TODO: what about inrange on constexpr?
+ return false;
default: {
const auto *CE = dyn_cast<ConstantExpr>(Op);
if (isa<CastInst>(Op) || (CE && CE->isCast()))
@@ -5035,12 +5081,12 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
}
}
-bool llvm::canCreateUndefOrPoison(const Operator *Op) {
- return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false);
+bool llvm::canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlags) {
+ return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false, ConsiderFlags);
}
-bool llvm::canCreatePoison(const Operator *Op) {
- return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true);
+bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlags) {
+ return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true, ConsiderFlags);
}
static bool directlyImpliesPoison(const Value *ValAssumedPoison,
@@ -5068,7 +5114,7 @@ static bool directlyImpliesPoison(const Value *ValAssumedPoison,
const WithOverflowInst *II;
if (match(I, m_ExtractValue(m_WithOverflowInst(II))) &&
(match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) ||
- llvm::is_contained(II->arg_operands(), ValAssumedPoison)))
+ llvm::is_contained(II->args(), ValAssumedPoison)))
return true;
}
return false;
@@ -5225,8 +5271,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
Dominator = Dominator->getIDom();
}
- SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NoUndef};
- if (getKnowledgeValidInContext(V, AttrKinds, CtxI, DT, AC))
+ if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC))
return true;
return false;
@@ -5304,6 +5349,27 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
return true;
}
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(
+ BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
+ unsigned ScanLimit) {
+ return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End),
+ ScanLimit);
+}
+
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(
+ iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
+ assert(ScanLimit && "scan limit must be non-zero");
+ for (const Instruction &I : Range) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (--ScanLimit == 0)
+ return false;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ return false;
+ }
+ return true;
+}
+
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
const Loop *L) {
// The loop header is guaranteed to be executed for every iteration.
@@ -5391,7 +5457,10 @@ void llvm::getGuaranteedWellDefinedOps(
}
break;
}
-
+ case Instruction::Ret:
+ if (I->getFunction()->hasRetAttribute(Attribute::NoUndef))
+ Operands.insert(I->getOperand(0));
+ break;
default:
break;
}
@@ -5408,7 +5477,16 @@ void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
case Instruction::SRem:
Operands.insert(I->getOperand(1));
break;
-
+ case Instruction::Switch:
+ if (BranchOnPoisonAsUB)
+ Operands.insert(cast<SwitchInst>(I)->getCondition());
+ break;
+ case Instruction::Br: {
+ auto *BR = cast<BranchInst>(I);
+ if (BranchOnPoisonAsUB && BR->isConditional())
+ Operands.insert(BR->getCondition());
+ break;
+ }
default:
break;
}
@@ -5835,15 +5913,13 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
// Is the sign bit set?
// (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
// (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
- if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() &&
- C2->isMaxSignedValue())
+ if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue())
return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
// Is the sign bit clear?
// (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
// (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
- if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
- C2->isMinSignedValue())
+ if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue())
return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
}
@@ -6253,6 +6329,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
}
+APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
+ switch (SPF) {
+ case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
+ case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
+ case SPF_UMAX: return APInt::getMaxValue(BitWidth);
+ case SPF_UMIN: return APInt::getMinValue(BitWidth);
+ default: llvm_unreachable("Unexpected flavor");
+ }
+}
+
std::pair<Intrinsic::ID, bool>
llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
// Check if VL contains select instructions that can be folded into a min/max
@@ -6681,7 +6767,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
const APInt *C;
switch (BO.getOpcode()) {
case Instruction::Add:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
// FIXME: If we have both nuw and nsw, we should reduce the range further.
if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
// 'add nuw x, C' produces [C, UINT_MAX].
@@ -6719,7 +6805,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
+ if (!C->isZero() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
if (C->isNegative()) {
// 'ashr C, x' produces [C, C >> (Width-1)]
@@ -6736,11 +6822,11 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
case Instruction::LShr:
if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
// 'lshr x, C' produces [0, UINT_MAX >> C].
- Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+ Upper = APInt::getAllOnes(Width).lshr(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
// 'lshr C, x' produces [C >> (Width-1), C].
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
+ if (!C->isZero() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
Lower = C->lshr(ShiftAmount);
Upper = *C + 1;
@@ -6773,7 +6859,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
if (match(BO.getOperand(1), m_APInt(C))) {
APInt IntMin = APInt::getSignedMinValue(Width);
APInt IntMax = APInt::getSignedMaxValue(Width);
- if (C->isAllOnesValue()) {
+ if (C->isAllOnes()) {
// 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
// where C != -1 and C != 0 and C != 1
Lower = IntMin + 1;
@@ -6802,7 +6888,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
break;
case Instruction::UDiv:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
// 'udiv x, C' produces [0, UINT_MAX / C].
Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
@@ -6946,7 +7032,7 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
// If the negation part of the abs (in RHS) has the NSW flag,
// then the result of abs(X) is [0..SIGNED_MAX],
// otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
- Lower = APInt::getNullValue(BitWidth);
+ Lower = APInt::getZero(BitWidth);
if (match(RHS, m_Neg(m_Specific(LHS))) &&
IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
Upper = APInt::getSignedMaxValue(BitWidth) + 1;
@@ -6986,9 +7072,27 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
}
}
+static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
+ // The maximum representable value of a half is 65504. For floats the maximum
+ // value is 3.4e38 which requires roughly 129 bits.
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy())
+ return;
+ if (isa<FPToSIInst>(I) && BitWidth >= 17) {
+ Lower = APInt(BitWidth, -65504);
+ Upper = APInt(BitWidth, 65505);
+ }
+
+ if (isa<FPToUIInst>(I) && BitWidth >= 16) {
+ // For a fptoui the lower limit is left as 0.
+ Upper = APInt(BitWidth, 65505);
+ }
+}
+
ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
AssumptionCache *AC,
const Instruction *CtxI,
+ const DominatorTree *DT,
unsigned Depth) {
assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
@@ -7009,6 +7113,8 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
setLimitsForIntrinsic(*II, Lower, Upper);
else if (auto *SI = dyn_cast<SelectInst>(V))
setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
+ else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V))
+ setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
@@ -7027,7 +7133,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
"must be an assume intrinsic");
- if (!isValidAssumeForContext(I, CtxI, nullptr))
+ if (!isValidAssumeForContext(I, CtxI, DT))
continue;
Value *Arg = I->getArgOperand(0);
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -7035,9 +7141,9 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
if (!Cmp || Cmp->getOperand(0) != V)
continue;
ConstantRange RHS = computeConstantRange(Cmp->getOperand(1), UseInstrInfo,
- AC, I, Depth + 1);
+ AC, I, DT, Depth + 1);
CR = CR.intersectWith(
- ConstantRange::makeSatisfyingICmpRegion(Cmp->getPredicate(), RHS));
+ ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS));
}
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 0a14a1432934..655c248907f6 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -331,6 +331,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
+ // If the vector is a splat then we can trivially find the scalar element.
+ if (isa<ScalableVectorType>(VTy))
+ if (Value *Splat = getSplatValue(V))
+ if (EltNo < VTy->getElementCount().getKnownMinValue())
+ return Splat;
+
// Otherwise, we don't know.
return nullptr;
}
@@ -824,6 +830,23 @@ llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,
return Mask;
}
+llvm::SmallVector<int, 16> llvm::createUnaryMask(ArrayRef<int> Mask,
+ unsigned NumElts) {
+ // Avoid casts in the loop and make sure we have a reasonable number.
+ int NumEltsSigned = NumElts;
+ assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
+
+ // If the mask chooses an element from operand 1, reduce it to choose from the
+ // corresponding element of operand 0. Undef mask elements are unchanged.
+ SmallVector<int, 16> UnaryMask;
+ for (int MaskElt : Mask) {
+ assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
+ int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
+ UnaryMask.push_back(UnaryElt);
+ }
+ return UnaryMask;
+}
+
/// A helper function for concatenating vectors. This function concatenates two
/// vectors having the same element type. If the second vector has fewer
/// elements than the first, it is padded with undefs.
@@ -940,7 +963,7 @@ APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
const unsigned VWidth =
cast<FixedVectorType>(Mask->getType())->getNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+ APInt DemandedElts = APInt::getAllOnes(VWidth);
if (auto *CV = dyn_cast<ConstantVector>(Mask))
for (unsigned i = 0; i < VWidth; i++)
if (CV->getAggregateElement(i)->isNullValue())
@@ -980,7 +1003,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
- int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
@@ -1193,15 +1216,24 @@ void InterleavedAccessInfo::analyzeInterleaving(
} // Iteration over A accesses.
} // Iteration over B accesses.
- // Remove interleaved store groups with gaps.
- for (auto *Group : StoreGroups)
- if (Group->getNumMembers() != Group->getFactor()) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved store group due "
- "to gaps.\n");
- releaseGroup(Group);
- }
- // Remove interleaved groups with gaps (currently only loads) whose memory
+ auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
+ int Index,
+ std::string FirstOrLast) -> bool {
+ Instruction *Member = Group->getMember(Index);
+ assert(Member && "Group member does not exist");
+ Value *MemberPtr = getLoadStorePointerOperand(Member);
+ Type *AccessTy = getLoadStoreType(Member);
+ if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
+ /*Assume=*/false, /*ShouldCheckWrap=*/true))
+ return false;
+ LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+ << FirstOrLast
+ << " group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ return true;
+ };
+
+ // Remove interleaved groups with gaps whose memory
// accesses may wrap around. We have to revisit the getPtrStride analysis,
// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
// not check wrapping (see documentation there).
@@ -1227,26 +1259,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
// So we check only group member 0 (which is always guaranteed to exist),
// and group member Factor - 1; If the latter doesn't exist we rely on
// peeling (if it is a non-reversed accsess -- see Case 3).
- Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
- if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "first group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
+ if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
continue;
- }
- Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
- if (LastMember) {
- Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
- if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "last group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
- }
- } else {
+ if (Group->getMember(Group->getFactor() - 1))
+ InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
+ std::string("last"));
+ else {
// Case 3: A non-reversed interleaved load group with gaps: We need
// to execute at least one scalar epilogue iteration. This will ensure
// we don't speculatively access memory out-of-bounds. We only need
@@ -1264,6 +1282,39 @@ void InterleavedAccessInfo::analyzeInterleaving(
RequiresScalarEpilogue = true;
}
}
+
+ for (auto *Group : StoreGroups) {
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // store would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Interleave-store-group with gaps is implemented using masked wide store.
+ // Remove interleaved store groups with gaps if
+ // masked-interleaved-accesses are not enabled by the target.
+ if (!EnablePredicatedInterleavedMemAccesses) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
+ releaseGroup(Group);
+ continue;
+ }
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and the last group member. Case 3 (scalar epilog) is not relevant for
+ // stores with gaps, which are implemented with masked-store (rather than
+ // speculative access, as in loads).
+ if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
+ continue;
+ for (int Index = Group->getFactor() - 1; Index > 0; Index--)
+ if (Group->getMember(Index)) {
+ InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
+ break;
+ }
+ }
}
void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
@@ -1325,9 +1376,7 @@ std::string VFABI::mangleTLIVectorName(StringRef VectorName,
void VFABI::getVectorVariantNames(
const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
- const StringRef S =
- CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName)
- .getValueAsString();
+ const StringRef S = CI.getFnAttr(VFABI::MappingsAttrName).getValueAsString();
if (S.empty())
return;
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 4f72c6f9921a..41fb0b9008be 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -643,6 +643,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(convergent);
KEYWORD(dereferenceable);
KEYWORD(dereferenceable_or_null);
+ KEYWORD(disable_sanitizer_instrumentation);
KEYWORD(elementtype);
KEYWORD(inaccessiblememonly);
KEYWORD(inaccessiblemem_or_argmemonly);
@@ -769,6 +770,9 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(returnDoesNotAlias);
KEYWORD(noInline);
KEYWORD(alwaysInline);
+ KEYWORD(noUnwind);
+ KEYWORD(mayThrow);
+ KEYWORD(hasUnknownCall);
KEYWORD(calls);
KEYWORD(callee);
KEYWORD(params);
@@ -848,7 +852,15 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
TYPEKEYWORD("token", Type::getTokenTy(Context));
- TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
+
+ if (Keyword == "ptr") {
+ if (Context.supportsTypedPointers()) {
+ Warning("ptr type is only supported in -opaque-pointers mode");
+ return lltok::Error;
+ }
+ TyVal = PointerType::getUnqual(Context);
+ return lltok::Type;
+ }
#undef TYPEKEYWORD
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 799cb03c8c8c..5bce1eaa59a0 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -140,8 +140,8 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
if (Function *Fn = dyn_cast<Function>(V)) {
AttributeList AS = Fn->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
@@ -152,32 +152,28 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
FnAttrs.removeAttribute(Attribute::Alignment);
}
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
Fn->setAttributes(AS);
} else if (CallInst *CI = dyn_cast<CallInst>(V)) {
AttributeList AS = CI->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
CI->setAttributes(AS);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
AttributeList AS = II->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
II->setAttributes(AS);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(V)) {
AttributeList AS = CBI->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
CBI->setAttributes(AS);
} else if (auto *GV = dyn_cast<GlobalVariable>(V)) {
AttrBuilder Attrs(GV->getAttributes());
@@ -239,18 +235,18 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
}
- // Look for intrinsic functions and CallInst that need to be upgraded
- for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
- UpgradeCallsToIntrinsic(&*FI++); // must be post-increment, as we remove
+ // Look for intrinsic functions and CallInst that need to be upgraded. We use
+ // make_early_inc_range here because we may remove some functions.
+ for (Function &F : llvm::make_early_inc_range(*M))
+ UpgradeCallsToIntrinsic(&F);
// Some types could be renamed during loading if several modules are
// loaded in the same LLVMContext (LTO scenario). In this case we should
// remangle intrinsics names as well.
- for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) {
- Function *F = &*FI++;
- if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
- F->replaceAllUsesWith(Remangled.getValue());
- F->eraseFromParent();
+ for (Function &F : llvm::make_early_inc_range(*M)) {
+ if (auto Remangled = Intrinsic::remangleIntrinsicFunction(&F)) {
+ F.replaceAllUsesWith(Remangled.getValue());
+ F.eraseFromParent();
}
}
@@ -605,12 +601,15 @@ bool LLParser::parseUnnamedGlobal() {
parseOptionalThreadLocal(TLM) || parseOptionalUnnamedAddr(UnnamedAddr))
return true;
- if (Lex.getKind() != lltok::kw_alias && Lex.getKind() != lltok::kw_ifunc)
+ switch (Lex.getKind()) {
+ default:
return parseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
-
- return parseIndirectSymbol(Name, NameLoc, Linkage, Visibility,
+ case lltok::kw_alias:
+ case lltok::kw_ifunc:
+ return parseAliasOrIFunc(Name, NameLoc, Linkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
+ }
}
/// parseNamedGlobal:
@@ -635,12 +634,15 @@ bool LLParser::parseNamedGlobal() {
parseOptionalThreadLocal(TLM) || parseOptionalUnnamedAddr(UnnamedAddr))
return true;
- if (Lex.getKind() != lltok::kw_alias && Lex.getKind() != lltok::kw_ifunc)
+ switch (Lex.getKind()) {
+ default:
return parseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
-
- return parseIndirectSymbol(Name, NameLoc, Linkage, Visibility,
+ case lltok::kw_alias:
+ case lltok::kw_ifunc:
+ return parseAliasOrIFunc(Name, NameLoc, Linkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
+ }
}
bool LLParser::parseComdat() {
@@ -913,25 +915,25 @@ static std::string typeComparisonErrorMessage(StringRef Message, Type *Ty1,
return ErrOS.str();
}
-/// parseIndirectSymbol:
+/// parseAliasOrIFunc:
/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// OptionalThreadLocal OptionalUnnamedAddr
-/// 'alias|ifunc' IndirectSymbol IndirectSymbolAttr*
+/// 'alias|ifunc' AliaseeOrResolver SymbolAttrs*
///
-/// IndirectSymbol
+/// AliaseeOrResolver
/// ::= TypeAndValue
///
-/// IndirectSymbolAttr
+/// SymbolAttrs
/// ::= ',' 'partition' StringConstant
///
/// Everything through OptionalUnnamedAddr has already been parsed.
///
-bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
- unsigned L, unsigned Visibility,
- unsigned DLLStorageClass, bool DSOLocal,
- GlobalVariable::ThreadLocalMode TLM,
- GlobalVariable::UnnamedAddr UnnamedAddr) {
+bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
+ unsigned L, unsigned Visibility,
+ unsigned DLLStorageClass, bool DSOLocal,
+ GlobalVariable::ThreadLocalMode TLM,
+ GlobalVariable::UnnamedAddr UnnamedAddr) {
bool IsAlias;
if (Lex.getKind() == lltok::kw_alias)
IsAlias = true;
@@ -1013,21 +1015,26 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
}
}
- // Okay, create the alias but do not insert it into the module yet.
- std::unique_ptr<GlobalIndirectSymbol> GA;
- if (IsAlias)
+ // Okay, create the alias/ifunc but do not insert it into the module yet.
+ std::unique_ptr<GlobalAlias> GA;
+ std::unique_ptr<GlobalIFunc> GI;
+ GlobalValue *GV;
+ if (IsAlias) {
GA.reset(GlobalAlias::create(Ty, AddrSpace,
(GlobalValue::LinkageTypes)Linkage, Name,
Aliasee, /*Parent*/ nullptr));
- else
- GA.reset(GlobalIFunc::create(Ty, AddrSpace,
+ GV = GA.get();
+ } else {
+ GI.reset(GlobalIFunc::create(Ty, AddrSpace,
(GlobalValue::LinkageTypes)Linkage, Name,
Aliasee, /*Parent*/ nullptr));
- GA->setThreadLocalMode(TLM);
- GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
- GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
- GA->setUnnamedAddr(UnnamedAddr);
- maybeSetDSOLocal(DSOLocal, *GA);
+ GV = GI.get();
+ }
+ GV->setThreadLocalMode(TLM);
+ GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ GV->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
+ GV->setUnnamedAddr(UnnamedAddr);
+ maybeSetDSOLocal(DSOLocal, *GV);
// At this point we've parsed everything except for the IndirectSymbolAttrs.
// Now parse them if there are any.
@@ -1036,7 +1043,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
if (Lex.getKind() == lltok::kw_partition) {
Lex.Lex();
- GA->setPartition(Lex.getStrVal());
+ GV->setPartition(Lex.getStrVal());
if (parseToken(lltok::StringConstant, "expected partition string"))
return true;
} else {
@@ -1045,30 +1052,27 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
}
if (Name.empty())
- NumberedVals.push_back(GA.get());
+ NumberedVals.push_back(GV);
if (GVal) {
// Verify that types agree.
- if (GVal->getType() != GA->getType())
+ if (GVal->getType() != GV->getType())
return error(
ExplicitTypeLoc,
"forward reference and definition of alias have different types");
// If they agree, just RAUW the old value with the alias and remove the
// forward ref info.
- GVal->replaceAllUsesWith(GA.get());
+ GVal->replaceAllUsesWith(GV);
GVal->eraseFromParent();
}
// Insert into the module, we know its name won't collide now.
if (IsAlias)
- M->getAliasList().push_back(cast<GlobalAlias>(GA.get()));
+ M->getAliasList().push_back(GA.release());
else
- M->getIFuncList().push_back(cast<GlobalIFunc>(GA.get()));
- assert(GA->getName() == Name && "Should not be a name conflict!");
-
- // The module owns this now
- GA.release();
+ M->getIFuncList().push_back(GI.release());
+ assert(GV->getName() == Name && "Should not be a name conflict!");
return false;
}
@@ -1408,14 +1412,10 @@ static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy) {
}
Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
- Value *Val, bool IsCall) {
+ Value *Val) {
Type *ValTy = Val->getType();
if (ValTy == Ty)
return Val;
- // For calls, we also allow opaque pointers.
- if (IsCall && ValTy == PointerType::get(Ty->getContext(),
- Ty->getPointerAddressSpace()))
- return Val;
if (Ty->isLabelTy())
error(Loc, "'" + Name + "' is not a basic block");
else
@@ -1429,7 +1429,7 @@ Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
- LocTy Loc, bool IsCall) {
+ LocTy Loc) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
error(Loc, "global variable reference must have pointer type");
@@ -1451,7 +1451,7 @@ GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
return cast_or_null<GlobalValue>(
- checkValidVariableType(Loc, "@" + Name, Ty, Val, IsCall));
+ checkValidVariableType(Loc, "@" + Name, Ty, Val));
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal = createGlobalFwdRef(M, PTy);
@@ -1459,8 +1459,7 @@ GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
- bool IsCall) {
+GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
error(Loc, "global variable reference must have pointer type");
@@ -1480,7 +1479,7 @@ GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
return cast_or_null<GlobalValue>(
- checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val, IsCall));
+ checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val));
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal = createGlobalFwdRef(M, PTy);
@@ -1936,7 +1935,7 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
if (!EatIfPresent(lltok::kw_align))
return false;
LocTy AlignLoc = Lex.getLoc();
- uint32_t Value = 0;
+ uint64_t Value = 0;
LocTy ParenLoc = Lex.getLoc();
bool HaveParens = false;
@@ -1945,13 +1944,13 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
HaveParens = true;
}
- if (parseUInt32(Value))
+ if (parseUInt64(Value))
return true;
if (HaveParens && !EatIfPresent(lltok::rparen))
return error(ParenLoc, "expected ')'");
- if (!isPowerOf2_32(Value))
+ if (!isPowerOf2_64(Value))
return error(AlignLoc, "alignment is not a power of two");
if (Value > Value::MaximumAlignment)
return error(AlignLoc, "huge alignments are not supported yet");
@@ -2221,6 +2220,26 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
// Type ::= 'float' | 'void' (etc)
Result = Lex.getTyVal();
Lex.Lex();
+
+ // Handle "ptr" opaque pointer type.
+ //
+ // Type ::= ptr ('addrspace' '(' uint32 ')')?
+ if (Result->isOpaquePointerTy()) {
+ unsigned AddrSpace;
+ if (parseOptionalAddrSpace(AddrSpace))
+ return true;
+ Result = PointerType::get(getContext(), AddrSpace);
+
+ // Give a nice error for 'ptr*'.
+ if (Lex.getKind() == lltok::star)
+ return tokError("ptr* is invalid - use ptr instead");
+
+ // Fall through to parsing the type suffixes only if this 'ptr' is a
+ // function return. Otherwise, return success, implicitly rejecting other
+ // suffixes.
+ if (Lex.getKind() != lltok::lparen)
+ return false;
+ }
break;
case lltok::lbrace:
// Type ::= StructType
@@ -2274,26 +2293,6 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
}
}
- // Handle (explicit) opaque pointer types (not --force-opaque-pointers).
- //
- // Type ::= ptr ('addrspace' '(' uint32 ')')?
- if (Result->isOpaquePointerTy()) {
- unsigned AddrSpace;
- if (parseOptionalAddrSpace(AddrSpace))
- return true;
- Result = PointerType::get(getContext(), AddrSpace);
-
- // Give a nice error for 'ptr*'.
- if (Lex.getKind() == lltok::star)
- return tokError("ptr* is invalid - use ptr instead");
-
- // Fall through to parsing the type suffixes only if this 'ptr' is a
- // function return. Otherwise, return success, implicitly rejecting other
- // suffixes.
- if (Lex.getKind() != lltok::lparen)
- return false;
- }
-
// parse the type suffixes.
while (true) {
switch (Lex.getKind()) {
@@ -2798,7 +2797,7 @@ bool LLParser::PerFunctionState::finishFunction() {
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
- LocTy Loc, bool IsCall) {
+ LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = F.getValueSymbolTable()->lookup(Name);
@@ -2812,7 +2811,7 @@ Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
- return P.checkValidVariableType(Loc, "%" + Name, Ty, Val, IsCall);
+ return P.checkValidVariableType(Loc, "%" + Name, Ty, Val);
// Don't make placeholders with invalid type.
if (!Ty->isFirstClassType()) {
@@ -2832,8 +2831,7 @@ Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc,
- bool IsCall) {
+Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
@@ -2847,7 +2845,7 @@ Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
- return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val, IsCall);
+ return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val);
if (!Ty->isFirstClassType()) {
P.error(Loc, "invalid use of a non-first-class type");
@@ -2934,12 +2932,12 @@ bool LLParser::PerFunctionState::setInstName(int NameID,
BasicBlock *LLParser::PerFunctionState::getBB(const std::string &Name,
LocTy Loc) {
return dyn_cast_or_null<BasicBlock>(
- getVal(Name, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
+ getVal(Name, Type::getLabelTy(F.getContext()), Loc));
}
BasicBlock *LLParser::PerFunctionState::getBB(unsigned ID, LocTy Loc) {
return dyn_cast_or_null<BasicBlock>(
- getVal(ID, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
+ getVal(ID, Type::getLabelTy(F.getContext()), Loc));
}
/// defineBB - Define the specified basic block, which is either named or
@@ -3652,7 +3650,7 @@ bool LLParser::parseGlobalValue(Type *Ty, Constant *&C) {
ValID ID;
Value *V = nullptr;
bool Parsed = parseValID(ID, /*PFS=*/nullptr, Ty) ||
- convertValIDToValue(Ty, ID, V, nullptr, /*IsCall=*/false);
+ convertValIDToValue(Ty, ID, V, nullptr);
if (V && !(C = dyn_cast<Constant>(V)))
return error(ID.Loc, "global values must be constants");
return Parsed;
@@ -3876,10 +3874,6 @@ struct MDField : public MDFieldImpl<Metadata *> {
MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {}
};
-struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> {
- MDConstant() : ImplTy(nullptr) {}
-};
-
struct MDStringField : public MDFieldImpl<MDString *> {
bool AllowEmpty;
MDStringField(bool AllowEmpty = true)
@@ -3914,22 +3908,6 @@ struct MDSignedOrMDField : MDEitherFieldImpl<MDSignedField, MDField> {
}
};
-struct MDSignedOrUnsignedField
- : MDEitherFieldImpl<MDSignedField, MDUnsignedField> {
- MDSignedOrUnsignedField() : ImplTy(MDSignedField(0), MDUnsignedField(0)) {}
-
- bool isMDSignedField() const { return WhatIs == IsTypeA; }
- bool isMDUnsignedField() const { return WhatIs == IsTypeB; }
- int64_t getMDSignedValue() const {
- assert(isMDSignedField() && "Wrong field type");
- return A.Val;
- }
- uint64_t getMDUnsignedValue() const {
- assert(isMDUnsignedField() && "Wrong field type");
- return B.Val;
- }
-};
-
} // end anonymous namespace
namespace llvm {
@@ -4578,7 +4556,8 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX)); \
OPTIONAL(flags, DIFlagField, ); \
OPTIONAL(extraData, MDField, ); \
- OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX));
+ OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX)); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4590,7 +4569,7 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
(Context, tag.Val, name.Val, file.Val, line.Val,
scope.Val, baseType.Val, size.Val, align.Val,
offset.Val, DWARFAddressSpace, flags.Val,
- extraData.Val));
+ extraData.Val, annotations.Val));
return false;
}
@@ -4615,7 +4594,8 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(dataLocation, MDField, ); \
OPTIONAL(associated, MDField, ); \
OPTIONAL(allocated, MDField, ); \
- OPTIONAL(rank, MDSignedOrMDField, );
+ OPTIONAL(rank, MDSignedOrMDField, ); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4633,7 +4613,7 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
scope.Val, baseType.Val, size.Val, align.Val, offset.Val, flags.Val,
elements.Val, runtimeLang.Val, vtableHolder.Val, templateParams.Val,
discriminator.Val, dataLocation.Val, associated.Val, allocated.Val,
- Rank)) {
+ Rank, annotations.Val)) {
Result = CT;
return false;
}
@@ -4645,8 +4625,8 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
(Context, tag.Val, name.Val, file.Val, line.Val, scope.Val, baseType.Val,
size.Val, align.Val, offset.Val, flags.Val, elements.Val,
runtimeLang.Val, vtableHolder.Val, templateParams.Val, identifier.Val,
- discriminator.Val, dataLocation.Val, associated.Val, allocated.Val,
- Rank));
+ discriminator.Val, dataLocation.Val, associated.Val, allocated.Val, Rank,
+ annotations.Val));
return false;
}
@@ -4746,7 +4726,8 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
/// virtuality: DW_VIRTUALTIY_pure_virtual,
/// virtualIndex: 10, thisAdjustment: 4, flags: 11,
/// spFlags: 10, isOptimized: false, templateParams: !4,
-/// declaration: !5, retainedNodes: !6, thrownTypes: !7)
+/// declaration: !5, retainedNodes: !6, thrownTypes: !7,
+/// annotations: !8)
bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
auto Loc = Lex.getLoc();
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
@@ -4770,7 +4751,8 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
OPTIONAL(retainedNodes, MDField, ); \
- OPTIONAL(thrownTypes, MDField, );
+ OPTIONAL(thrownTypes, MDField, ); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4789,7 +4771,7 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
(Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
type.Val, scopeLine.Val, containingType.Val, virtualIndex.Val,
thisAdjustment.Val, flags.Val, SPFlags, unit.Val, templateParams.Val,
- declaration.Val, retainedNodes.Val, thrownTypes.Val));
+ declaration.Val, retainedNodes.Val, thrownTypes.Val, annotations.Val));
return false;
}
@@ -4966,7 +4948,8 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
OPTIONAL(isDefinition, MDBoolField, (true)); \
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
- OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
+ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4974,7 +4957,8 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
GET_OR_DISTINCT(DIGlobalVariable,
(Context, scope.Val, name.Val, linkageName.Val, file.Val,
line.Val, type.Val, isLocal.Val, isDefinition.Val,
- declaration.Val, templateParams.Val, align.Val));
+ declaration.Val, templateParams.Val, align.Val,
+ annotations.Val));
return false;
}
@@ -4994,13 +4978,15 @@ bool LLParser::parseDILocalVariable(MDNode *&Result, bool IsDistinct) {
OPTIONAL(line, LineField, ); \
OPTIONAL(type, MDField, ); \
OPTIONAL(flags, DIFlagField, ); \
- OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
+ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
Result = GET_OR_DISTINCT(DILocalVariable,
(Context, scope.Val, name.Val, file.Val, line.Val,
- type.Val, arg.Val, flags.Val, align.Val));
+ type.Val, arg.Val, flags.Val, align.Val,
+ annotations.Val));
return false;
}
@@ -5136,7 +5122,7 @@ bool LLParser::parseDIObjCProperty(MDNode *&Result, bool IsDistinct) {
/// parseDIImportedEntity:
/// ::= !DIImportedEntity(tag: DW_TAG_imported_module, scope: !0, entity: !1,
-/// line: 7, name: "foo")
+/// line: 7, name: "foo", elements: !2)
bool LLParser::parseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
@@ -5144,13 +5130,14 @@ bool LLParser::parseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
OPTIONAL(entity, MDField, ); \
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
- OPTIONAL(name, MDStringField, );
+ OPTIONAL(name, MDStringField, ); \
+ OPTIONAL(elements, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- Result = GET_OR_DISTINCT(
- DIImportedEntity,
- (Context, tag.Val, scope.Val, entity.Val, file.Val, line.Val, name.Val));
+ Result = GET_OR_DISTINCT(DIImportedEntity,
+ (Context, tag.Val, scope.Val, entity.Val, file.Val,
+ line.Val, name.Val, elements.Val));
return false;
}
@@ -5254,7 +5241,7 @@ bool LLParser::parseMetadata(Metadata *&MD, PerFunctionState *PFS) {
//===----------------------------------------------------------------------===//
bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
- PerFunctionState *PFS, bool IsCall) {
+ PerFunctionState *PFS) {
if (Ty->isFunctionTy())
return error(ID.Loc, "functions are not values, refer to them as pointers");
@@ -5262,12 +5249,12 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
case ValID::t_LocalID:
if (!PFS)
return error(ID.Loc, "invalid use of function-local name");
- V = PFS->getVal(ID.UIntVal, Ty, ID.Loc, IsCall);
+ V = PFS->getVal(ID.UIntVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_LocalName:
if (!PFS)
return error(ID.Loc, "invalid use of function-local name");
- V = PFS->getVal(ID.StrVal, Ty, ID.Loc, IsCall);
+ V = PFS->getVal(ID.StrVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_InlineAsm: {
if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2))
@@ -5278,10 +5265,10 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return false;
}
case ValID::t_GlobalName:
- V = getGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall);
+ V = getGlobalVal(ID.StrVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_GlobalID:
- V = getGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall);
+ V = getGlobalVal(ID.UIntVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
@@ -5405,7 +5392,7 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct: {
Value *V;
- if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr, /*IsCall=*/false))
+ if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr))
return true;
assert(isa<Constant>(V) && "Expected a constant value");
C = cast<Constant>(V);
@@ -5423,7 +5410,7 @@ bool LLParser::parseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
V = nullptr;
ValID ID;
return parseValID(ID, PFS, Ty) ||
- convertValIDToValue(Ty, ID, V, PFS, /*IsCall=*/false);
+ convertValIDToValue(Ty, ID, V, PFS);
}
bool LLParser::parseTypeAndValue(Value *&V, PerFunctionState *PFS) {
@@ -5571,7 +5558,7 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
AttributeList::get(Context, AttributeSet::get(Context, FuncAttrs),
AttributeSet::get(Context, RetAttrs), Attrs);
- if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy())
+ if (PAL.hasParamAttr(0, Attribute::StructRet) && !RetType->isVoidTy())
return error(RetTypeLoc, "functions with 'sret' argument must return void");
FunctionType *FT = FunctionType::get(RetType, ParamTypeList, IsVarArg);
@@ -5718,7 +5705,7 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
Value *ResolvedVal = BlockAddress::get(&F, BB);
ResolvedVal = P.checkValidVariableType(BBID.Loc, BBID.StrVal, GV->getType(),
- ResolvedVal, false);
+ ResolvedVal);
if (!ResolvedVal)
return true;
GV->replaceAllUsesWith(ResolvedVal);
@@ -6287,7 +6274,7 @@ bool LLParser::parseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
if (convertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID,
- Callee, &PFS, /*IsCall=*/true))
+ Callee, &PFS))
return true;
// Set up the Attribute for the function.
@@ -6612,8 +6599,7 @@ bool LLParser::parseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
- if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
- /*IsCall=*/true))
+ if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
return true;
// Set up the Attribute for the function.
@@ -7019,7 +7005,7 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
// Look up the callee.
Value *Callee;
if (convertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee,
- &PFS, /*IsCall=*/true))
+ &PFS))
return true;
// Set up the Attribute for the function.
@@ -8543,12 +8529,15 @@ bool LLParser::parseFlag(unsigned &Val) {
/// [',' 'returnDoesNotAlias' ':' Flag]? ')'
/// [',' 'noInline' ':' Flag]? ')'
/// [',' 'alwaysInline' ':' Flag]? ')'
+/// [',' 'noUnwind' ':' Flag]? ')'
+/// [',' 'mayThrow' ':' Flag]? ')'
+/// [',' 'hasUnknownCall' ':' Flag]? ')'
bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
assert(Lex.getKind() == lltok::kw_funcFlags);
Lex.Lex();
- if (parseToken(lltok::colon, "expected ':' in funcFlags") |
+ if (parseToken(lltok::colon, "expected ':' in funcFlags") ||
parseToken(lltok::lparen, "expected '(' in funcFlags"))
return true;
@@ -8591,6 +8580,24 @@ bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
return true;
FFlags.AlwaysInline = Val;
break;
+ case lltok::kw_noUnwind:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.NoUnwind = Val;
+ break;
+ case lltok::kw_mayThrow:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.MayThrow = Val;
+ break;
+ case lltok::kw_hasUnknownCall:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.HasUnknownCall = Val;
+ break;
default:
return error(Lex.getLoc(), "expected function flag type");
}
@@ -8610,7 +8617,7 @@ bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
assert(Lex.getKind() == lltok::kw_calls);
Lex.Lex();
- if (parseToken(lltok::colon, "expected ':' in calls") |
+ if (parseToken(lltok::colon, "expected ':' in calls") ||
parseToken(lltok::lparen, "expected '(' in calls"))
return true;
@@ -8702,7 +8709,7 @@ bool LLParser::parseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
assert(Lex.getKind() == lltok::kw_vTableFuncs);
Lex.Lex();
- if (parseToken(lltok::colon, "expected ':' in vTableFuncs") |
+ if (parseToken(lltok::colon, "expected ':' in vTableFuncs") ||
parseToken(lltok::lparen, "expected '(' in vTableFuncs"))
return true;
diff --git a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
index 1d9c81ef8ebc..3de3dccce0c6 100644
--- a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
@@ -1,9 +1,8 @@
//===-- MsgPackDocumentYAML.cpp - MsgPack Document YAML interface -------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index f577d3886e01..2723105b092f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -529,10 +529,9 @@ Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
if (R.AtEndOfStream())
return reportError("bad length");
- Expected<uint32_t> MaybeSize = R.ReadVBR(6);
- if (!MaybeSize)
- return MaybeSize.takeError();
- uint32_t Size = MaybeSize.get();
+ uint32_t Size;
+ if (Error E = R.ReadVBR(6).moveInto(Size))
+ return E;
if (Strings.size() < Size)
return reportError("truncated chars");
@@ -555,11 +554,8 @@ BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
Optional<StringRef> CheckHash) {
- Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
- if (!MaybeType)
- return MaybeType.takeError();
- else
- CurStreamType = *MaybeType;
+ if (Error E = analyzeHeader(O, Stream).moveInto(CurStreamType))
+ return E;
Stream.setBlockInfo(&BlockInfo);
@@ -567,9 +563,8 @@ Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
// The block info must be a top-level block.
if (BlockInfoStream) {
BitstreamCursor BlockInfoCursor(*BlockInfoStream);
- Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor);
- if (!H)
- return H.takeError();
+ if (Error E = analyzeHeader(O, BlockInfoCursor).takeError())
+ return E;
while (!BlockInfoCursor.AtEndOfStream()) {
Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
@@ -582,12 +577,11 @@ Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
if (!MaybeBlockID)
return MaybeBlockID.takeError();
if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
- Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
- BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
- if (!MaybeNewBlockInfo)
- return MaybeNewBlockInfo.takeError();
- Optional<BitstreamBlockInfo> NewBlockInfo =
- std::move(MaybeNewBlockInfo.get());
+ Optional<BitstreamBlockInfo> NewBlockInfo;
+ if (Error E =
+ BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
+ .moveInto(NewBlockInfo))
+ return E;
if (!NewBlockInfo)
return reportError("Malformed BlockInfoBlock in block info file");
BlockInfo = std::move(*NewBlockInfo);
@@ -744,22 +738,20 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
// BLOCKINFO is a special part of the stream.
bool DumpRecords = O.hasValue();
if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
- if (O)
+ if (O && !O->DumpBlockinfo)
O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
- Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
- Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
- if (!MaybeNewBlockInfo)
- return MaybeNewBlockInfo.takeError();
- Optional<BitstreamBlockInfo> NewBlockInfo =
- std::move(MaybeNewBlockInfo.get());
+ Optional<BitstreamBlockInfo> NewBlockInfo;
+ if (Error E = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
+ .moveInto(NewBlockInfo))
+ return E;
if (!NewBlockInfo)
return reportError("Malformed BlockInfoBlock");
BlockInfo = std::move(*NewBlockInfo);
if (Error Err = Stream.JumpToBit(BlockBitStart))
return Err;
// It's not really interesting to dump the contents of the blockinfo
- // block.
- DumpRecords = false;
+ // block, so only do it if the user explicitly requests it.
+ DumpRecords = O && O->DumpBlockinfo;
}
unsigned NumWords = 0;
@@ -796,11 +788,10 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
uint64_t RecordStartBit = Stream.GetCurrentBitNo();
- Expected<BitstreamEntry> MaybeEntry =
- Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs)
+ .moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -847,10 +838,9 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
StringRef Blob;
uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
- Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob);
- if (!MaybeCode)
- return MaybeCode.takeError();
- unsigned Code = MaybeCode.get();
+ unsigned Code;
+ if (Error E = Stream.readRecord(Entry.ID, Record, &Blob).moveInto(Code))
+ return E;
// Increment the # occurrences of this code.
if (BlockStats.CodeFreq.size() <= Code)
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index d5e366c21f7d..c568461e62b0 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -41,7 +41,6 @@
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -180,10 +179,8 @@ static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
while (true) {
BitstreamEntry Entry;
- if (Expected<BitstreamEntry> Res = Stream.advance())
- Entry = Res.get();
- else
- return Res.takeError();
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
default:
@@ -227,10 +224,8 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
return "";
BitstreamEntry Entry;
- if (Expected<BitstreamEntry> Res = Stream.advance())
- Entry = std::move(Res.get());
- else
- return Res.takeError();
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
@@ -246,10 +241,9 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
return std::move(Err);
continue;
case BitstreamEntry::Record:
- if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
- continue;
- else
- return Skipped.takeError();
+ if (Error E = Stream.skipRecord(Entry.ID).takeError())
+ return std::move(E);
+ continue;
}
}
}
@@ -306,10 +300,8 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
// need to understand them all.
while (true) {
BitstreamEntry Entry;
- if (Expected<BitstreamEntry> Res = Stream.advance())
- Entry = std::move(Res.get());
- else
- return Res.takeError();
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -327,10 +319,9 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
continue;
case BitstreamEntry::Record:
- if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
- continue;
- else
- return Skipped.takeError();
+ if (Error E = Stream.skipRecord(Entry.ID).takeError())
+ return std::move(E);
+ continue;
}
}
}
@@ -500,10 +491,15 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
SmallVector<Instruction *, 64> InstructionList;
std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInits;
- std::vector<std::pair<GlobalIndirectSymbol *, unsigned>> IndirectSymbolInits;
- std::vector<std::pair<Function *, unsigned>> FunctionPrefixes;
- std::vector<std::pair<Function *, unsigned>> FunctionPrologues;
- std::vector<std::pair<Function *, unsigned>> FunctionPersonalityFns;
+ std::vector<std::pair<GlobalValue *, unsigned>> IndirectSymbolInits;
+
+ struct FunctionOperandInfo {
+ Function *F;
+ unsigned PersonalityFn;
+ unsigned Prefix;
+ unsigned Prologue;
+ };
+ std::vector<FunctionOperandInfo> FunctionOperands;
/// The set of attributes by index. Index zero in the file is for null, and
/// is thus not represented here. As such all indices are off by one.
@@ -933,6 +929,9 @@ static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
Flags.NoInline = (RawFlags >> 4) & 0x1;
Flags.AlwaysInline = (RawFlags >> 5) & 0x1;
+ Flags.NoUnwind = (RawFlags >> 6) & 0x1;
+ Flags.MayThrow = (RawFlags >> 7) & 0x1;
+ Flags.HasUnknownCall = (RawFlags >> 8) & 0x1;
return Flags;
}
@@ -1388,6 +1387,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Cold;
case bitc::ATTR_KIND_CONVERGENT:
return Attribute::Convergent;
+ case bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION:
+ return Attribute::DisableSanitizerInstrumentation;
case bitc::ATTR_KIND_ELEMENTTYPE:
return Attribute::ElementType;
case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY:
@@ -1785,6 +1786,9 @@ Error BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_OPAQUE_POINTER: { // OPAQUE_POINTER: [addrspace]
if (Record.size() != 1)
return error("Invalid record");
+ if (Context.supportsTypedPointers())
+ return error(
+ "Opaque pointers are only supported in -opaque-pointers mode");
unsigned AddressSpace = Record[0];
ResultTy = PointerType::get(Context, AddressSpace);
break;
@@ -1913,7 +1917,7 @@ Error BitcodeReader::parseTypeTableBody() {
if (Record[0] == 0)
return error("Invalid vector length");
ResultTy = getTypeByID(Record[1]);
- if (!ResultTy || !StructType::isValidElementType(ResultTy))
+ if (!ResultTy || !VectorType::isValidElementType(ResultTy))
return error("Invalid type");
bool Scalable = Record.size() > 2 ? Record[2] : false;
ResultTy = VectorType::get(ResultTy, Record[0], Scalable);
@@ -2240,17 +2244,12 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
/// Resolve all of the initializers for global values and aliases that we can.
Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInitWorklist;
- std::vector<std::pair<GlobalIndirectSymbol *, unsigned>>
- IndirectSymbolInitWorklist;
- std::vector<std::pair<Function *, unsigned>> FunctionPrefixWorklist;
- std::vector<std::pair<Function *, unsigned>> FunctionPrologueWorklist;
- std::vector<std::pair<Function *, unsigned>> FunctionPersonalityFnWorklist;
+ std::vector<std::pair<GlobalValue *, unsigned>> IndirectSymbolInitWorklist;
+ std::vector<FunctionOperandInfo> FunctionOperandWorklist;
GlobalInitWorklist.swap(GlobalInits);
IndirectSymbolInitWorklist.swap(IndirectSymbolInits);
- FunctionPrefixWorklist.swap(FunctionPrefixes);
- FunctionPrologueWorklist.swap(FunctionPrologues);
- FunctionPersonalityFnWorklist.swap(FunctionPersonalityFns);
+ FunctionOperandWorklist.swap(FunctionOperands);
while (!GlobalInitWorklist.empty()) {
unsigned ValID = GlobalInitWorklist.back().second;
@@ -2274,51 +2273,59 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]);
if (!C)
return error("Expected a constant");
- GlobalIndirectSymbol *GIS = IndirectSymbolInitWorklist.back().first;
- if (isa<GlobalAlias>(GIS) && C->getType() != GIS->getType())
- return error("Alias and aliasee types don't match");
- GIS->setIndirectSymbol(C);
+ GlobalValue *GV = IndirectSymbolInitWorklist.back().first;
+ if (auto *GA = dyn_cast<GlobalAlias>(GV)) {
+ if (C->getType() != GV->getType())
+ return error("Alias and aliasee types don't match");
+ GA->setAliasee(C);
+ } else if (auto *GI = dyn_cast<GlobalIFunc>(GV)) {
+ Type *ResolverFTy =
+ GlobalIFunc::getResolverFunctionType(GI->getValueType());
+ // Transparently fix up the type for compatiblity with older bitcode
+ GI->setResolver(
+ ConstantExpr::getBitCast(C, ResolverFTy->getPointerTo()));
+ } else {
+ return error("Expected an alias or an ifunc");
+ }
}
IndirectSymbolInitWorklist.pop_back();
}
- while (!FunctionPrefixWorklist.empty()) {
- unsigned ValID = FunctionPrefixWorklist.back().second;
- if (ValID >= ValueList.size()) {
- FunctionPrefixes.push_back(FunctionPrefixWorklist.back());
- } else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- FunctionPrefixWorklist.back().first->setPrefixData(C);
- else
- return error("Expected a constant");
+ while (!FunctionOperandWorklist.empty()) {
+ FunctionOperandInfo &Info = FunctionOperandWorklist.back();
+ if (Info.PersonalityFn) {
+ unsigned ValID = Info.PersonalityFn - 1;
+ if (ValID < ValueList.size()) {
+ if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+ Info.F->setPersonalityFn(C);
+ else
+ return error("Expected a constant");
+ Info.PersonalityFn = 0;
+ }
}
- FunctionPrefixWorklist.pop_back();
- }
-
- while (!FunctionPrologueWorklist.empty()) {
- unsigned ValID = FunctionPrologueWorklist.back().second;
- if (ValID >= ValueList.size()) {
- FunctionPrologues.push_back(FunctionPrologueWorklist.back());
- } else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- FunctionPrologueWorklist.back().first->setPrologueData(C);
- else
- return error("Expected a constant");
+ if (Info.Prefix) {
+ unsigned ValID = Info.Prefix - 1;
+ if (ValID < ValueList.size()) {
+ if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+ Info.F->setPrefixData(C);
+ else
+ return error("Expected a constant");
+ Info.Prefix = 0;
+ }
}
- FunctionPrologueWorklist.pop_back();
- }
-
- while (!FunctionPersonalityFnWorklist.empty()) {
- unsigned ValID = FunctionPersonalityFnWorklist.back().second;
- if (ValID >= ValueList.size()) {
- FunctionPersonalityFns.push_back(FunctionPersonalityFnWorklist.back());
- } else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- FunctionPersonalityFnWorklist.back().first->setPersonalityFn(C);
- else
- return error("Expected a constant");
+ if (Info.Prologue) {
+ unsigned ValID = Info.Prologue - 1;
+ if (ValID < ValueList.size()) {
+ if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+ Info.F->setPrologueData(C);
+ else
+ return error("Expected a constant");
+ Info.Prologue = 0;
+ }
}
- FunctionPersonalityFnWorklist.pop_back();
+ if (Info.PersonalityFn || Info.Prefix || Info.Prologue)
+ FunctionOperands.push_back(Info);
+ FunctionOperandWorklist.pop_back();
}
return Error::success();
@@ -2351,6 +2358,15 @@ Error BitcodeReader::parseConstants() {
unsigned CstNo;
};
std::vector<DelayedShufTy> DelayedShuffles;
+ struct DelayedSelTy {
+ Type *OpTy;
+ uint64_t Op0Idx;
+ uint64_t Op1Idx;
+ uint64_t Op2Idx;
+ unsigned CstNo;
+ };
+ std::vector<DelayedSelTy> DelayedSelectors;
+
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
if (!MaybeEntry)
@@ -2387,6 +2403,27 @@ Error BitcodeReader::parseConstants() {
Value *V = ConstantExpr::getShuffleVector(Op0, Op1, Mask);
ValueList.assignValue(V, CstNo);
}
+ for (auto &DelayedSelector : DelayedSelectors) {
+ Type *OpTy = DelayedSelector.OpTy;
+ Type *SelectorTy = Type::getInt1Ty(Context);
+ uint64_t Op0Idx = DelayedSelector.Op0Idx;
+ uint64_t Op1Idx = DelayedSelector.Op1Idx;
+ uint64_t Op2Idx = DelayedSelector.Op2Idx;
+ uint64_t CstNo = DelayedSelector.CstNo;
+ Constant *Op1 = ValueList.getConstantFwdRef(Op1Idx, OpTy);
+ Constant *Op2 = ValueList.getConstantFwdRef(Op2Idx, OpTy);
+ // The selector might be an i1 or an <n x i1>
+ // Get the type from the ValueList before getting a forward ref.
+ if (VectorType *VTy = dyn_cast<VectorType>(OpTy)) {
+ Value *V = ValueList[Op0Idx];
+ assert(V);
+ if (SelectorTy != V->getType())
+ SelectorTy = VectorType::get(SelectorTy, VTy->getElementCount());
+ }
+ Constant *Op0 = ValueList.getConstantFwdRef(Op0Idx, SelectorTy);
+ Value *V = ConstantExpr::getSelect(Op0, Op1, Op2);
+ ValueList.assignValue(V, CstNo);
+ }
if (NextCstNo != ValueList.size())
return error("Invalid constant reference");
@@ -2683,21 +2720,11 @@ Error BitcodeReader::parseConstants() {
if (Record.size() < 3)
return error("Invalid record");
- Type *SelectorTy = Type::getInt1Ty(Context);
-
- // The selector might be an i1, an <n x i1>, or a <vscale x n x i1>
- // Get the type from the ValueList before getting a forward ref.
- if (VectorType *VTy = dyn_cast<VectorType>(CurTy))
- if (Value *V = ValueList[Record[0]])
- if (SelectorTy != V->getType())
- SelectorTy = VectorType::get(SelectorTy,
- VTy->getElementCount());
-
- V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
- SelectorTy),
- ValueList.getConstantFwdRef(Record[1],CurTy),
- ValueList.getConstantFwdRef(Record[2],CurTy));
- break;
+ DelayedSelectors.push_back(
+ {CurTy, Record[0], Record[1], Record[2], NextCstNo});
+ (void)ValueList.getConstantFwdRef(NextCstNo, CurTy);
+ ++NextCstNo;
+ continue;
}
case bitc::CST_CODE_CE_EXTRACTELT
: { // CE_EXTRACTELT: [opty, opval, opty, opval]
@@ -3091,8 +3118,7 @@ Error BitcodeReader::globalCleanup() {
// Force deallocation of memory for these vectors to favor the client that
// want lazy deserialization.
std::vector<std::pair<GlobalVariable *, unsigned>>().swap(GlobalInits);
- std::vector<std::pair<GlobalIndirectSymbol *, unsigned>>().swap(
- IndirectSymbolInits);
+ std::vector<std::pair<GlobalValue *, unsigned>>().swap(IndirectSymbolInits);
return Error::success();
}
@@ -3270,7 +3296,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
}
if (Record.size() > 12) {
- auto AS = getAttributes(Record[12]).getFnAttributes();
+ auto AS = getAttributes(Record[12]).getFnAttrs();
NewGV->setAttributes(AS);
}
@@ -3383,8 +3409,10 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
if (Record.size() > 9)
UnnamedAddr = getDecodedUnnamedAddrType(Record[9]);
Func->setUnnamedAddr(UnnamedAddr);
- if (Record.size() > 10 && Record[10] != 0)
- FunctionPrologues.push_back(std::make_pair(Func, Record[10] - 1));
+
+ FunctionOperandInfo OperandInfo = {Func, 0, 0, 0};
+ if (Record.size() > 10)
+ OperandInfo.Prologue = Record[10];
if (Record.size() > 11)
Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11]));
@@ -3401,11 +3429,11 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->setComdat(reinterpret_cast<Comdat *>(1));
}
- if (Record.size() > 13 && Record[13] != 0)
- FunctionPrefixes.push_back(std::make_pair(Func, Record[13] - 1));
+ if (Record.size() > 13)
+ OperandInfo.Prefix = Record[13];
- if (Record.size() > 14 && Record[14] != 0)
- FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1));
+ if (Record.size() > 14)
+ OperandInfo.PersonalityFn = Record[14];
if (Record.size() > 15) {
Func->setDSOLocal(getDecodedDSOLocal(Record[15]));
@@ -3423,6 +3451,9 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
ValueList.push_back(Func);
+ if (OperandInfo.PersonalityFn || OperandInfo.Prefix || OperandInfo.Prologue)
+ FunctionOperands.push_back(OperandInfo);
+
// If this is a function with a body, remember the prototype we are
// creating now, so that we can match up the body with them later.
if (!isProto) {
@@ -3467,7 +3498,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
auto Val = Record[OpNum++];
auto Linkage = Record[OpNum++];
- GlobalIndirectSymbol *NewGA;
+ GlobalValue *NewGA;
if (BitCode == bitc::MODULE_CODE_ALIAS ||
BitCode == bitc::MODULE_CODE_ALIAS_OLD)
NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
@@ -4898,8 +4929,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
MaybeAlign Align;
- if (Error Err =
- parseAlignmentValue(Bitfield::get<APV::Align>(Rec), Align)) {
+ uint64_t AlignExp =
+ Bitfield::get<APV::AlignLower>(Rec) |
+ (Bitfield::get<APV::AlignUpper>(Rec) << APV::AlignLower::Bits);
+ if (Error Err = parseAlignmentValue(AlignExp, Align)) {
return Err;
}
if (!Ty || !Size)
@@ -5505,21 +5538,16 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
// Upgrade any old intrinsic calls in the function.
for (auto &I : UpgradedIntrinsics) {
- for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
- UI != UE;) {
- User *U = *UI;
- ++UI;
+ for (User *U : llvm::make_early_inc_range(I.first->materialized_users()))
if (CallInst *CI = dyn_cast<CallInst>(U))
UpgradeIntrinsicCall(CI, I.second);
- }
}
// Update calls to the remangled intrinsics
for (auto &I : RemangledIntrinsics)
- for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
- UI != UE;)
+ for (User *U : llvm::make_early_inc_range(I.first->materialized_users()))
// Don't expect any other users than call sites
- cast<CallBase>(*UI++)->setCalledFunction(I.second);
+ cast<CallBase>(U)->setCalledFunction(I.second);
// Finish fn->subprogram upgrade for materialized functions.
if (DISubprogram *SP = MDLoader->lookupSubprogramForFunction(F))
@@ -5567,9 +5595,8 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
// Remove incompatible attributes on function calls.
if (auto *CI = dyn_cast<CallBase>(&I)) {
- CI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(
- CI->getFunctionType()->getReturnType()));
+ CI->removeRetAttrs(AttributeFuncs::typeIncompatible(
+ CI->getFunctionType()->getReturnType()));
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ++ArgNo)
CI->removeParamAttrs(ArgNo, AttributeFuncs::typeIncompatible(
@@ -6742,10 +6769,9 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
continue;
}
case BitstreamEntry::Record:
- if (Expected<unsigned> StreamFailed = Stream.skipRecord(Entry.ID))
- continue;
- else
- return StreamFailed.takeError();
+ if (Error E = Stream.skipRecord(Entry.ID).takeError())
+ return std::move(E);
+ continue;
}
}
}
@@ -6768,12 +6794,9 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
if (IdentificationBit != -1ull) {
if (Error JumpFailed = Stream.JumpToBit(IdentificationBit))
return std::move(JumpFailed);
- Expected<std::string> ProducerIdentificationOrErr =
- readIdentificationBlock(Stream);
- if (!ProducerIdentificationOrErr)
- return ProducerIdentificationOrErr.takeError();
-
- ProducerIdentification = *ProducerIdentificationOrErr;
+ if (Error E =
+ readIdentificationBlock(Stream).moveInto(ProducerIdentification))
+ return std::move(E);
}
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
@@ -6847,10 +6870,9 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
SmallVector<uint64_t, 64> Record;
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -6895,10 +6917,9 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
return std::move(Err);
while (true) {
- Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- llvm::BitstreamEntry Entry = MaybeEntry.get();
+ llvm::BitstreamEntry Entry;
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::Error:
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 8493eb7a28b2..6df5a4a64d51 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -21,8 +21,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/AutoUpgrade.h"
@@ -40,7 +40,6 @@
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -363,7 +362,8 @@ class PlaceholderQueue {
public:
~PlaceholderQueue() {
- assert(empty() && "PlaceholderQueue hasn't been flushed before being destroyed");
+ assert(empty() &&
+ "PlaceholderQueue hasn't been flushed before being destroyed");
}
bool empty() const { return PHs.empty(); }
DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
@@ -546,7 +546,7 @@ class MetadataLoader::MetadataLoaderImpl {
if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))
if (auto *DIExpr = DDI->getExpression())
if (DIExpr->startsWithDeref() &&
- dyn_cast_or_null<Argument>(DDI->getAddress())) {
+ isa_and_nonnull<Argument>(DDI->getAddress())) {
SmallVector<uint64_t, 8> Ops;
Ops.append(std::next(DIExpr->elements_begin()),
DIExpr->elements_end());
@@ -604,7 +604,7 @@ class MetadataLoader::MetadataLoaderImpl {
// If the expression is malformed, make sure we don't
// copy more elements than we should.
HistoricSize = std::min(SubExpr.size(), HistoricSize);
- ArrayRef<uint64_t> Args = SubExpr.slice(1, HistoricSize-1);
+ ArrayRef<uint64_t> Args = SubExpr.slice(1, HistoricSize - 1);
switch (SubExpr.front()) {
case dwarf::DW_OP_plus:
@@ -698,11 +698,12 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
// Get the abbrevs, and preload record positions to make them lazy-loadable.
while (true) {
uint64_t SavedPos = IndexCursor.GetCurrentBitNo();
- Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks(
- BitstreamCursor::AF_DontPopBlockAtEnd);
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E =
+ IndexCursor
+ .advanceSkippingSubblocks(BitstreamCursor::AF_DontPopBlockAtEnd)
+ .moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -715,10 +716,9 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
// The interesting case.
++NumMDRecordLoaded;
uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
- Expected<unsigned> MaybeCode = IndexCursor.skipRecord(Entry.ID);
- if (!MaybeCode)
- return MaybeCode.takeError();
- unsigned Code = MaybeCode.get();
+ unsigned Code;
+ if (Error E = IndexCursor.skipRecord(Entry.ID).moveInto(Code))
+ return std::move(E);
switch (Code) {
case bitc::METADATA_STRINGS: {
// Rewind and parse the strings.
@@ -905,11 +905,12 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
if (Error Err = TempCursor.JumpToBit(GlobalDeclAttachmentPos))
return std::move(Err);
while (true) {
- Expected<BitstreamEntry> MaybeEntry = TempCursor.advanceSkippingSubblocks(
- BitstreamCursor::AF_DontPopBlockAtEnd);
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E =
+ TempCursor
+ .advanceSkippingSubblocks(BitstreamCursor::AF_DontPopBlockAtEnd)
+ .moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1025,10 +1026,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Read all the records.
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1081,22 +1081,22 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
if (Error Err = IndexCursor.JumpToBit(
GlobalMetadataBitPosIndex[ID - MDStringRef.size()]))
report_fatal_error("lazyLoadOneMetadata failed jumping: " +
- toString(std::move(Err)));
- Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks();
- if (!MaybeEntry)
+ Twine(toString(std::move(Err))));
+ BitstreamEntry Entry;
+ if (Error E = IndexCursor.advanceSkippingSubblocks().moveInto(Entry))
// FIXME this drops the error on the floor.
report_fatal_error("lazyLoadOneMetadata failed advanceSkippingSubblocks: " +
- toString(MaybeEntry.takeError()));
- BitstreamEntry Entry = MaybeEntry.get();
+ Twine(toString(std::move(E))));
++NumMDRecordLoaded;
if (Expected<unsigned> MaybeCode =
IndexCursor.readRecord(Entry.ID, Record, &Blob)) {
if (Error Err =
parseOneMetadata(Record, MaybeCode.get(), Placeholders, Blob, ID))
report_fatal_error("Can't lazyload MD, parseOneMetadata: " +
- toString(std::move(Err)));
+ Twine(toString(std::move(Err))));
} else
- report_fatal_error("Can't lazyload MD: " + toString(MaybeCode.takeError()));
+ report_fatal_error("Can't lazyload MD: " +
+ Twine(toString(MaybeCode.takeError())));
}
/// Ensure that all forward-references and placeholders are resolved.
@@ -1193,10 +1193,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// Read name of the named metadata.
SmallString<8> Name(Record.begin(), Record.end());
Record.clear();
- Expected<unsigned> MaybeCode = Stream.ReadCode();
- if (!MaybeCode)
- return MaybeCode.takeError();
- Code = MaybeCode.get();
+ if (Error E = Stream.ReadCode().moveInto(Code))
+ return E;
++NumMDRecordLoaded;
if (Expected<unsigned> MaybeNextBitCode = Stream.readRecord(Code, Record)) {
@@ -1411,8 +1409,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
return error("Invalid record");
IsDistinct = Record[0];
- DINode::DIFlags Flags = (Record.size() > 6) ?
- static_cast<DINode::DIFlags>(Record[6]) : DINode::FlagZero;
+ DINode::DIFlags Flags = (Record.size() > 6)
+ ? static_cast<DINode::DIFlags>(Record[6])
+ : DINode::FlagZero;
MetadataList.assignValue(
GET_OR_DISTINCT(DIBasicType,
@@ -1437,7 +1436,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_DERIVED_TYPE: {
- if (Record.size() < 12 || Record.size() > 13)
+ if (Record.size() < 12 || Record.size() > 14)
return error("Invalid record");
// DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means
@@ -1446,6 +1445,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Record.size() > 12 && Record[12])
DWARFAddressSpace = Record[12] - 1;
+ Metadata *Annotations = nullptr;
+ if (Record.size() > 13 && Record[13])
+ Annotations = getMDOrNull(Record[13]);
+
IsDistinct = Record[0];
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[10]);
MetadataList.assignValue(
@@ -1455,13 +1458,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
Record[9], DWARFAddressSpace, Flags,
- getDITypeRefOrNull(Record[11]))),
+ getDITypeRefOrNull(Record[11]), Annotations)),
NextMetadataNo);
NextMetadataNo++;
break;
}
case bitc::METADATA_COMPOSITE_TYPE: {
- if (Record.size() < 16 || Record.size() > 21)
+ if (Record.size() < 16 || Record.size() > 22)
return error("Invalid record");
// If we have a UUID and this is not a forward declaration, lookup the
@@ -1489,6 +1492,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Metadata *Associated = nullptr;
Metadata *Allocated = nullptr;
Metadata *Rank = nullptr;
+ Metadata *Annotations = nullptr;
auto *Identifier = getMDString(Record[15]);
// If this module is being parsed so that it can be ThinLTO imported
// into another module, composite types only need to be imported
@@ -1520,6 +1524,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Record.size() > 20) {
Rank = getMDOrNull(Record[20]);
}
+ if (Record.size() > 21) {
+ Annotations = getMDOrNull(Record[21]);
+ }
}
DICompositeType *CT = nullptr;
if (Identifier)
@@ -1527,7 +1534,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Context, *Identifier, Tag, Name, File, Line, Scope, BaseType,
SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
VTableHolder, TemplateParams, Discriminator, DataLocation, Associated,
- Allocated, Rank);
+ Allocated, Rank, Annotations);
// Create a node if we didn't get a lazy ODR type.
if (!CT)
@@ -1536,7 +1543,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SizeInBits, AlignInBits, OffsetInBits, Flags,
Elements, RuntimeLang, VTableHolder, TemplateParams,
Identifier, Discriminator, DataLocation, Associated,
- Allocated, Rank));
+ Allocated, Rank, Annotations));
if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
@@ -1665,9 +1672,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SPFlags |= DISubprogram::SPFlagMainSubprogram;
else if (!HasSPFlags)
SPFlags = DISubprogram::toSPFlags(
- /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
- /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11],
- /*DIFlagMainSubprogram*/HasOldMainSubprogramFlag);
+ /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
+ /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11],
+ /*DIFlagMainSubprogram=*/HasOldMainSubprogramFlag);
// All definitions should be distinct.
IsDistinct = (Record[0] & 1) || (SPFlags & DISubprogram::SPFlagDefinition);
@@ -1685,6 +1692,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
bool HasFn = false;
bool HasThisAdj = true;
bool HasThrownTypes = true;
+ bool HasAnnotations = false;
unsigned OffsetA = 0;
unsigned OffsetB = 0;
if (!HasSPFlags) {
@@ -1696,29 +1704,33 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
}
HasThisAdj = Record.size() >= 20;
HasThrownTypes = Record.size() >= 21;
+ } else {
+ HasAnnotations = Record.size() >= 19;
}
Metadata *CUorFn = getMDOrNull(Record[12 + OffsetB]);
DISubprogram *SP = GET_OR_DISTINCT(
DISubprogram,
(Context,
- getDITypeRefOrNull(Record[1]), // scope
- getMDString(Record[2]), // name
- getMDString(Record[3]), // linkageName
- getMDOrNull(Record[4]), // file
- Record[5], // line
- getMDOrNull(Record[6]), // type
- Record[7 + OffsetA], // scopeLine
- getDITypeRefOrNull(Record[8 + OffsetA]), // containingType
- Record[10 + OffsetA], // virtualIndex
- HasThisAdj ? Record[16 + OffsetB] : 0, // thisAdjustment
- Flags, // flags
- SPFlags, // SPFlags
- HasUnit ? CUorFn : nullptr, // unit
- getMDOrNull(Record[13 + OffsetB]), // templateParams
- getMDOrNull(Record[14 + OffsetB]), // declaration
- getMDOrNull(Record[15 + OffsetB]), // retainedNodes
+ getDITypeRefOrNull(Record[1]), // scope
+ getMDString(Record[2]), // name
+ getMDString(Record[3]), // linkageName
+ getMDOrNull(Record[4]), // file
+ Record[5], // line
+ getMDOrNull(Record[6]), // type
+ Record[7 + OffsetA], // scopeLine
+ getDITypeRefOrNull(Record[8 + OffsetA]), // containingType
+ Record[10 + OffsetA], // virtualIndex
+ HasThisAdj ? Record[16 + OffsetB] : 0, // thisAdjustment
+ Flags, // flags
+ SPFlags, // SPFlags
+ HasUnit ? CUorFn : nullptr, // unit
+ getMDOrNull(Record[13 + OffsetB]), // templateParams
+ getMDOrNull(Record[14 + OffsetB]), // declaration
+ getMDOrNull(Record[15 + OffsetB]), // retainedNodes
HasThrownTypes ? getMDOrNull(Record[17 + OffsetB])
- : nullptr // thrownTypes
+ : nullptr, // thrownTypes
+ HasAnnotations ? getMDOrNull(Record[18 + OffsetB])
+ : nullptr // annotations
));
MetadataList.assignValue(SP, NextMetadataNo);
NextMetadataNo++;
@@ -1860,13 +1872,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
unsigned Version = Record[0] >> 1;
if (Version == 2) {
+ Metadata *Annotations = nullptr;
+ if (Record.size() > 12)
+ Annotations = getMDOrNull(Record[12]);
+
MetadataList.assignValue(
- GET_OR_DISTINCT(
- DIGlobalVariable,
- (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
- getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
- getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[9]), getMDOrNull(Record[10]), Record[11])),
+ GET_OR_DISTINCT(DIGlobalVariable,
+ (Context, getMDOrNull(Record[1]),
+ getMDString(Record[2]), getMDString(Record[3]),
+ getMDOrNull(Record[4]), Record[5],
+ getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+ getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+ Record[11], Annotations)),
NextMetadataNo);
NextMetadataNo++;
@@ -1874,12 +1891,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// No upgrade necessary. A null field will be introduced to indicate
// that no parameter information is available.
MetadataList.assignValue(
- GET_OR_DISTINCT(DIGlobalVariable,
- (Context, getMDOrNull(Record[1]),
- getMDString(Record[2]), getMDString(Record[3]),
- getMDOrNull(Record[4]), Record[5],
- getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[10]), nullptr, Record[11])),
+ GET_OR_DISTINCT(
+ DIGlobalVariable,
+ (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+ getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+ getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+ getMDOrNull(Record[10]), nullptr, Record[11], nullptr)),
NextMetadataNo);
NextMetadataNo++;
@@ -1912,7 +1929,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]), getMDString(Record[2]),
getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[10]), nullptr, AlignInBits));
+ getMDOrNull(Record[10]), nullptr, AlignInBits, nullptr));
DIGlobalVariableExpression *DGVE = nullptr;
if (Attach || Expr)
@@ -1942,18 +1959,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
bool HasTag = !HasAlignment && Record.size() > 8;
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[7 + HasTag]);
uint32_t AlignInBits = 0;
+ Metadata *Annotations = nullptr;
if (HasAlignment) {
- if (Record[8 + HasTag] > (uint64_t)std::numeric_limits<uint32_t>::max())
+ if (Record[8] > (uint64_t)std::numeric_limits<uint32_t>::max())
return error("Alignment value is too large");
- AlignInBits = Record[8 + HasTag];
+ AlignInBits = Record[8];
+ if (Record.size() > 9)
+ Annotations = getMDOrNull(Record[9]);
}
+
MetadataList.assignValue(
GET_OR_DISTINCT(DILocalVariable,
(Context, getMDOrNull(Record[1 + HasTag]),
getMDString(Record[2 + HasTag]),
getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
getDITypeRefOrNull(Record[5 + HasTag]),
- Record[6 + HasTag], Flags, AlignInBits)),
+ Record[6 + HasTag], Flags, AlignInBits, Annotations)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1964,10 +1985,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
IsDistinct = Record[0] & 1;
MetadataList.assignValue(
- GET_OR_DISTINCT(DILabel,
- (Context, getMDOrNull(Record[1]),
- getMDString(Record[2]),
- getMDOrNull(Record[3]), Record[4])),
+ GET_OR_DISTINCT(DILabel, (Context, getMDOrNull(Record[1]),
+ getMDString(Record[2]),
+ getMDOrNull(Record[3]), Record[4])),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1984,8 +2004,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Error Err = upgradeDIExpression(Version, Elts, Buffer))
return Err;
- MetadataList.assignValue(
- GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo);
+ MetadataList.assignValue(GET_OR_DISTINCT(DIExpression, (Context, Elts)),
+ NextMetadataNo);
NextMetadataNo++;
break;
}
@@ -2020,17 +2040,19 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
- if (Record.size() != 6 && Record.size() != 7)
+ if (Record.size() < 6 && Record.size() > 8)
return error("Invalid record");
IsDistinct = Record[0];
- bool HasFile = (Record.size() == 7);
+ bool HasFile = (Record.size() >= 7);
+ bool HasElements = (Record.size() >= 8);
MetadataList.assignValue(
GET_OR_DISTINCT(DIImportedEntity,
(Context, Record[1], getMDOrNull(Record[2]),
getDITypeRefOrNull(Record[3]),
HasFile ? getMDOrNull(Record[6]) : nullptr,
- HasFile ? Record[4] : 0, getMDString(Record[5]))),
+ HasFile ? Record[4] : 0, getMDString(Record[5]),
+ HasElements ? getMDOrNull(Record[7]) : nullptr)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -2121,10 +2143,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
if (R.AtEndOfStream())
return error("Invalid record: metadata strings bad length");
- Expected<uint32_t> MaybeSize = R.ReadVBR(6);
- if (!MaybeSize)
- return MaybeSize.takeError();
- uint32_t Size = MaybeSize.get();
+ uint32_t Size;
+ if (Error E = R.ReadVBR(6).moveInto(Size))
+ return E;
if (Strings.size() < Size)
return error("Invalid record: metadata strings truncated chars");
@@ -2161,10 +2182,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
PlaceholderQueue Placeholders;
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -2265,10 +2285,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
// Read all the records.
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0a202c376981..1e9a9197aed7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRSymtab.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -67,7 +68,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SHA1.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -142,7 +142,6 @@ public:
: Stream(Stream), StrtabBuilder(StrtabBuilder) {}
protected:
- void writeBitcodeHeader();
void writeModuleVersion();
};
@@ -374,7 +373,6 @@ private:
void writeModuleMetadata();
void writeFunctionMetadata(const Function &F);
void writeFunctionMetadataAttachment(const Function &F);
- void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV);
void pushGlobalMetadataAttachment(SmallVectorImpl<uint64_t> &Record,
const GlobalObject &GO);
void writeModuleMetadataKinds();
@@ -628,6 +626,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_IN_ALLOCA;
case Attribute::Cold:
return bitc::ATTR_KIND_COLD;
+ case Attribute::DisableSanitizerInstrumentation:
+ return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION;
case Attribute::Hot:
return bitc::ATTR_KIND_HOT;
case Attribute::ElementType:
@@ -835,7 +835,7 @@ void ModuleBitcodeWriter::writeAttributeTable() {
SmallVector<uint64_t, 64> Record;
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
AttributeList AL = Attrs[i];
- for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) {
+ for (unsigned i : AL.indexes()) {
AttributeSet AS = AL.getAttributes(i);
if (AS.hasAttributes())
Record.push_back(VE.getAttributeGroupID({i, AS}));
@@ -973,9 +973,8 @@ void ModuleBitcodeWriter::writeTypeTable() {
// STRUCT: [ispacked, eltty x N]
TypeVals.push_back(ST->isPacked());
// Output all of the element types.
- for (StructType::element_iterator I = ST->element_begin(),
- E = ST->element_end(); I != E; ++I)
- TypeVals.push_back(VE.getTypeID(*I));
+ for (Type *ET : ST->elements())
+ TypeVals.push_back(VE.getTypeID(ET));
if (ST->isLiteral()) {
Code = bitc::TYPE_CODE_STRUCT_ANON;
@@ -1066,6 +1065,9 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
RawFlags |= (Flags.ReturnDoesNotAlias << 3);
RawFlags |= (Flags.NoInline << 4);
RawFlags |= (Flags.AlwaysInline << 5);
+ RawFlags |= (Flags.NoUnwind << 6);
+ RawFlags |= (Flags.MayThrow << 7);
+ RawFlags |= (Flags.HasUnknownCall << 8);
return RawFlags;
}
@@ -1687,6 +1689,8 @@ void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,
else
Record.push_back(0);
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
+
Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
Record.clear();
}
@@ -1716,6 +1720,7 @@ void ModuleBitcodeWriter::writeDICompositeType(
Record.push_back(VE.getMetadataOrNullID(N->getRawAssociated()));
Record.push_back(VE.getMetadataOrNullID(N->getRawAllocated()));
Record.push_back(VE.getMetadataOrNullID(N->getRawRank()));
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
Record.clear();
@@ -1811,6 +1816,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
Record.push_back(VE.getMetadataOrNullID(N->getRetainedNodes().get()));
Record.push_back(N->getThisAdjustment());
Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
Record.clear();
@@ -1958,6 +1964,7 @@ void ModuleBitcodeWriter::writeDIGlobalVariable(
Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
Record.push_back(N->getAlignInBits());
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
Record.clear();
@@ -1989,6 +1996,7 @@ void ModuleBitcodeWriter::writeDILocalVariable(
Record.push_back(N->getArg());
Record.push_back(N->getFlags());
Record.push_back(N->getAlignInBits());
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
Record.clear();
@@ -2056,6 +2064,7 @@ void ModuleBitcodeWriter::writeDIImportedEntity(
Record.push_back(N->getLine());
Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
Record.push_back(VE.getMetadataOrNullID(N->getRawFile()));
+ Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev);
Record.clear();
@@ -2907,8 +2916,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = FTy->getNumParams(), e = II->getNumArgOperands();
- i != e; ++i)
+ for (unsigned i = FTy->getNumParams(), e = II->arg_size(); i != e; ++i)
pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
}
break;
@@ -2989,8 +2997,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = FTy->getNumParams(), e = CBI->getNumArgOperands();
- i != e; ++i)
+ for (unsigned i = FTy->getNumParams(), e = CBI->arg_size(); i != e; ++i)
pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
}
break;
@@ -3047,7 +3054,11 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
using APV = AllocaPackedValues;
unsigned Record = 0;
- Bitfield::set<APV::Align>(Record, getEncodedAlign(AI.getAlign()));
+ unsigned EncodedAlign = getEncodedAlign(AI.getAlign());
+ Bitfield::set<APV::AlignLower>(
+ Record, EncodedAlign & ((1 << APV::AlignLower::Bits) - 1));
+ Bitfield::set<APV::AlignUpper>(Record,
+ EncodedAlign >> APV::AlignLower::Bits);
Bitfield::set<APV::UsedWithInAlloca>(Record, AI.isUsedWithInAlloca());
Bitfield::set<APV::ExplicitType>(Record, true);
Bitfield::set<APV::SwiftError>(Record, AI.isSwiftError());
@@ -3154,8 +3165,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
- i != e; ++i)
+ for (unsigned i = FTy->getNumParams(), e = CI.arg_size(); i != e; ++i)
pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs
}
break;
@@ -4028,7 +4038,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
FSModVTableRefsAbbrev);
for (const GlobalAlias &A : M.aliases()) {
- auto *Aliasee = A.getBaseObject();
+ auto *Aliasee = A.getAliaseeObject();
if (!Aliasee->hasName())
// Nameless function don't have an entry in the summary, skip it.
continue;
@@ -4141,7 +4151,14 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// For local linkage, we also emit the original name separately
// immediately after the record.
auto MaybeEmitOriginalName = [&](GlobalValueSummary &S) {
- if (!GlobalValue::isLocalLinkage(S.linkage()))
+ // We don't need to emit the original name if we are writing the index for
+ // distributed backends (in which case ModuleToSummariesForIndex is
+ // non-null). The original name is only needed during the thin link, since
+ // for SamplePGO the indirect call targets for local functions have
+ // have the original name annotated in profile.
+ // Continue to emit it when writing out the entire combined index, which is
+ // used in testing the thin link via llvm-lto.
+ if (ModuleToSummariesForIndex || !GlobalValue::isLocalLinkage(S.linkage()))
return;
NameVals.push_back(S.getOriginalName());
Stream.EmitRecord(bitc::FS_COMBINED_ORIGINAL_NAME, NameVals);
@@ -4194,33 +4211,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
- GlobalValue::GUID GUID = VI.getGUID();
- Optional<unsigned> CallValueId = getValueId(GUID);
- if (CallValueId)
- return CallValueId;
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- GUID = Index.getGUIDFromOriginalID(GUID);
- if (!GUID)
- return None;
- CallValueId = getValueId(GUID);
- if (!CallValueId)
- return None;
- // The mapping from OriginalId to GUID may return a GUID
- // that corresponds to a static variable. Filter it out here.
- // This can happen when
- // 1) There is a call to a library function which does not have
- // a CallValidId;
- // 2) There is a static variable with the OriginalGUID identical
- // to the GUID of the library function in 1);
- // When this happens, the logic for SamplePGO kicks in and
- // the static variable in 2) will be found, which needs to be
- // filtered out.
- auto *GVSum = Index.getGlobalValueSummary(GUID, false);
- if (GVSum && GVSum->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
- return None;
- return CallValueId;
+ return getValueId(VI.getGUID());
};
auto *FS = cast<FunctionSummary>(S);
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index d86db61ee1f4..9465a3b11c8f 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -229,8 +229,11 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
// have been read (despite having earlier IDs). Rather than awkwardly
// modeling this behaviour here, orderModule() has assigned IDs to
// initializers of GlobalValues before GlobalValues themselves.
- if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID))
+ if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) {
+ if (LID == RID)
+ return LU->getOperandNo() > RU->getOperandNo();
return LID < RID;
+ }
// If ID is 4, then expect: 7 6 5 1 2 3.
if (LID < RID) {
@@ -1036,7 +1039,7 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
}
// Do lookups for all attribute groups.
- for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) {
+ for (unsigned i : PAL.indexes()) {
AttributeSet AS = PAL.getAttributes(i);
if (!AS.hasAttributes())
continue;
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index e5d576d879b5..7d8a73e12d3a 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -221,9 +221,6 @@ ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
}
}
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
switch (Pred) {
case ICmpInst::ICMP_EQ: return ISD::SETEQ;
@@ -241,6 +238,33 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) {
+ switch (Pred) {
+ case ISD::SETEQ:
+ return ICmpInst::ICMP_EQ;
+ case ISD::SETNE:
+ return ICmpInst::ICMP_NE;
+ case ISD::SETLE:
+ return ICmpInst::ICMP_SLE;
+ case ISD::SETULE:
+ return ICmpInst::ICMP_ULE;
+ case ISD::SETGE:
+ return ICmpInst::ICMP_SGE;
+ case ISD::SETUGE:
+ return ICmpInst::ICMP_UGE;
+ case ISD::SETLT:
+ return ICmpInst::ICMP_SLT;
+ case ISD::SETULT:
+ return ICmpInst::ICMP_ULT;
+ case ISD::SETGT:
+ return ICmpInst::ICMP_SGT;
+ case ISD::SETUGT:
+ return ICmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("Invalid ISD integer condition code!");
+ }
+}
+
static bool isNoopBitcast(Type *T1, Type *T2,
const TargetLoweringBase& TLI) {
return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
@@ -524,10 +548,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
if (&*BBI == &Call)
break;
// Debug info intrinsics do not get in the way of tail call optimization.
- if (isa<DbgInfoIntrinsic>(BBI))
- continue;
// Pseudo probe intrinsics do not block tail call optimization either.
- if (isa<PseudoProbeInst>(BBI))
+ if (BBI->isDebugOrPseudoInst())
continue;
// A lifetime end, assume or noalias.decl intrinsic should not stop tail
// call optimization.
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index db4215e92d44..223840c21d8b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -75,7 +75,6 @@ void ARMException::endFunction(const MachineFunction *MF) {
// Emit references to personality.
if (Per) {
MCSymbol *PerSym = Asm->getSymbol(Per);
- Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global);
ATS.emitPersonality(PerSym);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e528d33b5f8c..cc848d28a9a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -71,7 +71,6 @@
#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -102,6 +101,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
@@ -115,7 +115,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -275,7 +274,7 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.getModuleMetadata(M);
- OutStreamer->InitSections(false);
+ OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
if (DisableDebugInfoPrinting)
MMI->setDebugInfoAvailability(false);
@@ -326,16 +325,10 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
- // We're at the module level. Construct MCSubtarget from the default CPU
- // and target triple.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple().str(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
- assert(STI && "Unable to create subtarget info");
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
- emitInlineAsm(M.getModuleInlineAsm() + "\n",
- OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
+ emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
+ TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->AddBlankLine();
}
@@ -1422,7 +1415,7 @@ void AsmPrinter::emitFunctionBody() {
});
R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";
for (auto &KV : MnemonicVec) {
- auto Name = (Twine("INST_") + KV.first.trim()).str();
+ auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str();
R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";
}
ORE->emit(R);
@@ -1610,14 +1603,13 @@ void AsmPrinter::emitGlobalGOTEquivs() {
emitGlobalVariable(GV);
}
-void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
- const GlobalIndirectSymbol& GIS) {
- MCSymbol *Name = getSymbol(&GIS);
- bool IsFunction = GIS.getValueType()->isFunctionTy();
+void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
+ MCSymbol *Name = getSymbol(&GA);
+ bool IsFunction = GA.getValueType()->isFunctionTy();
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
- if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol()))
+ if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee()))
if (CE->getOpcode() == Instruction::BitCast)
IsFunction =
CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
@@ -1627,61 +1619,80 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
// point, all the extra label is emitted, we just have to emit linkage for
// those labels.
if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
- assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX.");
assert(MAI->hasVisibilityOnlyWithLinkage() &&
"Visibility should be handled with emitLinkage() on AIX.");
- emitLinkage(&GIS, Name);
+ emitLinkage(&GA, Name);
// If it's a function, also emit linkage for aliases of function entry
// point.
if (IsFunction)
- emitLinkage(&GIS,
- getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM));
+ emitLinkage(&GA,
+ getObjFileLowering().getFunctionEntryPointSymbol(&GA, TM));
return;
}
- if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ if (GA.hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
- else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ else if (GA.hasWeakLinkage() || GA.hasLinkOnceLinkage())
OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
else
- assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+ assert(GA.hasLocalLinkage() && "Invalid alias linkage");
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
if (IsFunction)
- OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
- ? MCSA_ELF_TypeIndFunction
- : MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
- emitVisibility(Name, GIS.getVisibility());
+ emitVisibility(Name, GA.getVisibility());
- const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol());
+ const MCExpr *Expr = lowerConstant(GA.getAliasee());
- if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
+ if (MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry);
// Emit the directives as assignments aka .set:
OutStreamer->emitAssignment(Name, Expr);
- MCSymbol *LocalAlias = getSymbolPreferLocal(GIS);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GA);
if (LocalAlias != Name)
OutStreamer->emitAssignment(LocalAlias, Expr);
- if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) {
- // If the aliasee does not correspond to a symbol in the output, i.e. the
- // alias is not of an object or the aliased object is private, then set the
- // size of the alias symbol from the type of the alias. We don't do this in
- // other situations as the alias and aliasee having differing types but same
- // size may be intentional.
- const GlobalObject *BaseObject = GA->getBaseObject();
- if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() &&
- (!BaseObject || BaseObject->hasPrivateLinkage())) {
- const DataLayout &DL = M.getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
- OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
- }
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = GA.getAliaseeObject();
+ if (MAI->hasDotTypeDotSizeDirective() && GA.getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GA.getValueType());
+ OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
}
}
+void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) {
+ assert(!TM.getTargetTriple().isOSBinFormatXCOFF() &&
+ "IFunc is not supported on AIX.");
+
+ MCSymbol *Name = getSymbol(&GI);
+
+ if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GI.hasLocalLinkage() && "Invalid ifunc linkage");
+
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ emitVisibility(Name, GI.getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ const MCExpr *Expr = lowerConstant(GI.getResolver());
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GI);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
+}
+
void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
if (!RS.needsSection())
return;
@@ -1815,6 +1826,11 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
+ // This needs to happen before emitting debug information since that can end
+ // arbitrary sections.
+ if (auto *TS = OutStreamer->getTargetStreamer())
+ TS->emitConstantPools();
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -1857,11 +1873,11 @@ bool AsmPrinter::doFinalization(Module &M) {
AliasStack.push_back(Cur);
}
for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
- emitGlobalIndirectSymbol(M, *AncestorAlias);
+ emitGlobalAlias(M, *AncestorAlias);
AliasStack.clear();
}
for (const auto &IFunc : M.ifuncs())
- emitGlobalIndirectSymbol(M, IFunc);
+ emitGlobalIFunc(M, IFunc);
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
@@ -2455,9 +2471,14 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {
if (Alignment == Align(1))
return; // 1-byte aligned: no need to emit alignment.
- if (getCurrentSection()->getKind().isText())
- OutStreamer->emitCodeAlignment(Alignment.value());
- else
+ if (getCurrentSection()->getKind().isText()) {
+ const MCSubtargetInfo *STI = nullptr;
+ if (this->MF)
+ STI = &getSubtargetInfo();
+ else
+ STI = TM.getMCSubtargetInfo();
+ OutStreamer->emitCodeAlignment(Alignment.value(), STI);
+ } else
OutStreamer->emitValueToAlignment(Alignment.value());
}
@@ -2513,7 +2534,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::GetElementPtr: {
// Generate a symbolic expression for the byte address
@@ -3265,21 +3286,21 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
// the references were generated.
+ const BasicBlock *BB = MBB.getBasicBlock();
if (MBB.hasAddressTaken()) {
- const BasicBlock *BB = MBB.getBasicBlock();
if (isVerbose())
OutStreamer->AddComment("Block address taken");
// MBBs can have their address taken as part of CodeGen without having
// their corresponding BB's address taken in IR
- if (BB->hasAddressTaken())
+ if (BB && BB->hasAddressTaken())
for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
OutStreamer->emitLabel(Sym);
}
// Print some verbose block comments.
if (isVerbose()) {
- if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB) {
if (BB->hasName()) {
BB->printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, BB->getModule());
@@ -3538,7 +3559,7 @@ void AsmPrinter::emitXRayTable() {
// pointers. This should work for both 32-bit and 64-bit platforms.
if (FnSledIndex) {
OutStreamer->SwitchSection(FnSledIndex);
- OutStreamer->emitCodeAlignment(2 * WordSizeBytes);
+ OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());
OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
OutStreamer->SwitchSection(PrevSection);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4a93181f5439..ef1abc47701a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -30,10 +30,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -129,13 +129,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, AsmPrinter *AP,
- uint64_t LocCookie, raw_ostream &OS) {
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
// Switch to the inline assembly variant.
OS << "\t.intel_syntax\n\t";
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
+ int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
while (*LastEmitted) {
switch (*LastEmitted) {
@@ -145,8 +148,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
*LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
++LiteralEnd;
-
- OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
LastEmitted = LiteralEnd;
break;
}
@@ -164,6 +167,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
case '$':
++LastEmitted; // Consume second '$' character.
break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // Consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // This is gcc's behavior for | outside a variant.
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // This is gcc's behavior for } outside a variant.
+ else
+ CurVariant = -1;
+ break;
}
if (Done) break;
@@ -176,16 +200,15 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
- if (HasCurlyBraces && LastEmitted[0] == ':') {
+ if (HasCurlyBraces && *LastEmitted == ':') {
++LastEmitted;
const char *StrStart = LastEmitted;
const char *StrEnd = strchr(StrStart, '}');
if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
- std::string Val(StrStart, StrEnd);
- AP->PrintSpecial(MI, OS, Val.c_str());
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
LastEmitted = StrEnd+1;
break;
}
@@ -201,7 +224,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
- if (Val >= NumOperands-1)
+ if (Val >= NumOperands - 1)
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
@@ -228,40 +251,50 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// Okay, we finally have a value number. Ask the target to print this
// operand!
- unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
- bool Error = false;
+ bool Error = false;
- // Scan to find the machine operand number for the operand.
- for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands()) break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
- }
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands())
+ break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
- Error = true;
- } else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
-
- if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
} else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ // FIXME: Shouldn't arch-independent output template handling go into
+ // PrintAsmOperand?
+ // Labels are target independent.
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
+ } else if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
}
- }
- if (Error) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
}
break;
}
@@ -274,10 +307,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
MachineModuleInfo *MMI, const MCAsmInfo *MAI,
AsmPrinter *AP, uint64_t LocCookie,
raw_ostream &OS) {
- int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = MAI->getAssemblerDialect();
+ int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
if (MAI->getEmitGNUAsmStartIndentationMarker())
OS << '\t';
@@ -291,7 +324,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
*LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
++LiteralEnd;
if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
LastEmitted = LiteralEnd;
break;
}
@@ -311,24 +344,24 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
OS << '$';
++LastEmitted; // Consume second '$' character.
break;
- case '(': // $( -> same as GCC's { character.
- ++LastEmitted; // Consume '(' character.
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
if (CurVariant != -1)
report_fatal_error("Nested variants found in inline asm string: '" +
Twine(AsmStr) + "'");
- CurVariant = 0; // We're in the first variant now.
+ CurVariant = 0; // We're in the first variant now.
break;
case '|':
- ++LastEmitted; // consume '|' character.
+ ++LastEmitted; // Consume '|' character.
if (CurVariant == -1)
- OS << '|'; // this is gcc's behavior for | outside a variant
+ OS << '|'; // This is gcc's behavior for | outside a variant.
else
- ++CurVariant; // We're in the next variant.
+ ++CurVariant; // We're in the next variant.
break;
- case ')': // $) -> same as GCC's } char.
- ++LastEmitted; // consume ')' character.
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
if (CurVariant == -1)
- OS << '}'; // this is gcc's behavior for } outside a variant
+ OS << '}'; // This is gcc's behavior for } outside a variant.
else
CurVariant = -1;
break;
@@ -351,9 +384,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
- std::string Val(StrStart, StrEnd);
- AP->PrintSpecial(MI, OS, Val.c_str());
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
LastEmitted = StrEnd+1;
break;
}
@@ -369,6 +401,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
+ if (Val >= NumOperands - 1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
char Modifier[2] = { 0, 0 };
if (HasCurlyBraces) {
@@ -390,10 +426,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++LastEmitted; // Consume '}' character.
}
- if (Val >= NumOperands-1)
- report_fatal_error("Invalid $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
-
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
@@ -403,7 +435,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// Scan to find the machine operand number for the operand.
for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands()) break;
+ if (OpNo >= MI->getNumOperands())
+ break;
unsigned OpFlags = MI->getOperand(OpNo).getImm();
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
@@ -411,12 +444,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// We may have a location metadata attached to the end of the
// instruction, and at no point should see metadata at any
// other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
+ ++OpNo; // Skip over the ID number.
// FIXME: Shouldn't arch-independent output template handling go into
// PrintAsmOperand?
@@ -429,8 +461,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
- } else if (Modifier[0] == 'l') {
- Error = true;
} else if (InlineAsm::isMemKind(OpFlags)) {
Error = AP->PrintAsmMemoryOperand(
MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
@@ -506,7 +536,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might lead to undefined behaviour.
@@ -540,7 +570,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
"preserved across the asm statement, and clobbering them may "
"lead to undefined behaviour.";
MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
- LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning));
+ LocCookie, Msg, DiagnosticSeverity::DS_Warning));
MMI->getModule()->getContext().diagnose(
DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
}
@@ -560,13 +590,13 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
/// syntax used is ${:comment}. Targets can override this to add support
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
- const char *Code) const {
- if (!strcmp(Code, "private")) {
+ StringRef Code) const {
+ if (Code == "private") {
const DataLayout &DL = MF->getDataLayout();
OS << DL.getPrivateGlobalPrefix();
- } else if (!strcmp(Code, "comment")) {
+ } else if (Code == "comment") {
OS << MAI->getCommentString();
- } else if (!strcmp(Code, "uid")) {
+ } else if (Code == "uid") {
// Comparing the address of MI isn't sufficient, because machineinstrs may
// be allocated to the same address across functions.
@@ -582,7 +612,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
raw_string_ostream Msg(msg);
Msg << "Unknown special formatter '" << Code
<< "' for machine instr: " << *MI;
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index bbb0504550c3..85ff84484ced 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -341,7 +341,16 @@ std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {
TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
// No scope means global scope and that uses the zero index.
- if (!Scope || isa<DIFile>(Scope))
+ //
+ // We also use zero index when the scope is a DISubprogram
+ // to suppress the emission of LF_STRING_ID for the function,
+ // which can trigger a link-time error with the linker in
+ // VS2019 version 16.11.2 or newer.
+ // Note, however, skipping the debug info emission for the DISubprogram
+ // is a temporary fix. The root issue here is that we need to figure out
+ // the proper way to encode a function nested in another function
+ // (as introduced by the Fortran 'contains' keyword) in CodeView.
+ if (!Scope || isa<DIFile>(Scope) || isa<DISubprogram>(Scope))
return TypeIndex();
assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type");
@@ -561,6 +570,44 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+ switch (DWLang) {
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::C;
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ return SourceLanguage::Cpp;
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ return SourceLanguage::Fortran;
+ case dwarf::DW_LANG_Pascal83:
+ return SourceLanguage::Pascal;
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ return SourceLanguage::Cobol;
+ case dwarf::DW_LANG_Java:
+ return SourceLanguage::Java;
+ case dwarf::DW_LANG_D:
+ return SourceLanguage::D;
+ case dwarf::DW_LANG_Swift:
+ return SourceLanguage::Swift;
+ default:
+ // There's no CodeView representation for this language, and CV doesn't
+ // have an "unknown" option for the language field, so we'll use MASM,
+ // as it's very low level.
+ return SourceLanguage::Masm;
+ }
+}
+
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
@@ -574,6 +621,13 @@ void CodeViewDebug::beginModule(Module *M) {
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
+ // Get the current source language.
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
+
collectGlobalVariableInfo();
// Check if we should emit type record hashes.
@@ -731,43 +785,6 @@ void CodeViewDebug::emitTypeGlobalHashes() {
}
}
-static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
- switch (DWLang) {
- case dwarf::DW_LANG_C:
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
- case dwarf::DW_LANG_C11:
- case dwarf::DW_LANG_ObjC:
- return SourceLanguage::C;
- case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_C_plus_plus_03:
- case dwarf::DW_LANG_C_plus_plus_11:
- case dwarf::DW_LANG_C_plus_plus_14:
- return SourceLanguage::Cpp;
- case dwarf::DW_LANG_Fortran77:
- case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran03:
- case dwarf::DW_LANG_Fortran08:
- return SourceLanguage::Fortran;
- case dwarf::DW_LANG_Pascal83:
- return SourceLanguage::Pascal;
- case dwarf::DW_LANG_Cobol74:
- case dwarf::DW_LANG_Cobol85:
- return SourceLanguage::Cobol;
- case dwarf::DW_LANG_Java:
- return SourceLanguage::Java;
- case dwarf::DW_LANG_D:
- return SourceLanguage::D;
- case dwarf::DW_LANG_Swift:
- return SourceLanguage::Swift;
- default:
- // There's no CodeView representation for this language, and CV doesn't
- // have an "unknown" option for the language field, so we'll use MASM,
- // as it's very low level.
- return SourceLanguage::Masm;
- }
-}
-
namespace {
struct Version {
int Part[4];
@@ -797,12 +814,8 @@ void CodeViewDebug::emitCompilerInformation() {
MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
uint32_t Flags = 0;
- NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
- const MDNode *Node = *CUs->operands().begin();
- const auto *CU = cast<DICompileUnit>(Node);
-
// The low byte of the flags indicates the source language.
- Flags = MapDWLangToCVLang(CU->getSourceLanguage());
+ Flags = CurrentSourceLanguage;
// TODO: Figure out which other flags need to be set.
if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
@@ -814,6 +827,10 @@ void CodeViewDebug::emitCompilerInformation() {
OS.AddComment("CPUType");
OS.emitInt16(static_cast<uint64_t>(TheCPU));
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
@@ -1573,6 +1590,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
return lowerTypeClass(cast<DICompositeType>(Ty));
case dwarf::DW_TAG_union_type:
return lowerTypeUnion(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_string_type:
+ return lowerTypeString(cast<DIStringType>(Ty));
case dwarf::DW_TAG_unspecified_type:
if (Ty->getName() == "decltype(nullptr)")
return TypeIndex::NullptrT();
@@ -1617,14 +1636,19 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
const DISubrange *Subrange = cast<DISubrange>(Element);
int64_t Count = -1;
- // Calculate the count if either LowerBound is absent or is zero and
- // either of Count or UpperBound are constant.
- auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
- if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) {
- if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
- Count = CI->getSExtValue();
- else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>())
- Count = UI->getSExtValue() + 1; // LowerBound is zero
+
+ // If Subrange has a Count field, use it.
+ // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1),
+ // where lowerbound is from the LowerBound field of the Subrange,
+ // or the language default lowerbound if that field is unspecified.
+ if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>())
+ Count = CI->getSExtValue();
+ else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) {
+ // Fortran uses 1 as the default lowerbound; other languages use 0.
+ int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0;
+ auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+ Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound;
+ Count = UI->getSExtValue() - Lowerbound + 1;
}
// Forward declarations of arrays without a size and VLAs use a count of -1.
@@ -1650,6 +1674,26 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
return ElementTypeIndex;
}
+// This function lowers a Fortran character type (DIStringType).
+// Note that it handles only the character*n variant (using SizeInBits
+// field in DIString to describe the type size) at the moment.
+// Other variants (leveraging the StringLength and StringLengthExp
+// fields in DIStringType) remain TBD.
+TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) {
+ TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter);
+ uint64_t ArraySize = Ty->getSizeInBits() >> 3;
+ StringRef Name = Ty->getName();
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = getPointerSizeInBytes() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ // Create a type of character array of ArraySize.
+ ArrayRecord AR(CharType, IndexType, ArraySize, Name);
+
+ return TypeTable.writeLeafType(AR);
+}
+
TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
TypeIndex Index;
dwarf::TypeKind Kind;
@@ -1728,9 +1772,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
}
// Apply some fixups based on the source-level type name.
- if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int")
+ // Include some amount of canonicalization from an old naming scheme Clang
+ // used to use for integer types (in an outdated effort to be compatible with
+ // GCC's debug info/GDB's behavior, which has since been addressed).
+ if (STK == SimpleTypeKind::Int32 &&
+ (Ty->getName() == "long int" || Ty->getName() == "long"))
STK = SimpleTypeKind::Int32Long;
- if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int")
+ if (STK == SimpleTypeKind::UInt32 && (Ty->getName() == "long unsigned int" ||
+ Ty->getName() == "unsigned long"))
STK = SimpleTypeKind::UInt32Long;
if (STK == SimpleTypeKind::UInt16Short &&
(Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t"))
@@ -2177,6 +2226,7 @@ void CodeViewDebug::clear() {
TypeIndices.clear();
CompleteTypeIndices.clear();
ScopeGlobals.clear();
+ CVGlobalVariableOffsets.clear();
}
void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
@@ -3062,6 +3112,15 @@ void CodeViewDebug::collectGlobalVariableInfo() {
const DIGlobalVariable *DIGV = GVE->getVariable();
const DIExpression *DIE = GVE->getExpression();
+ if ((DIE->getNumElements() == 2) &&
+ (DIE->getElement(0) == dwarf::DW_OP_plus_uconst))
+ // Record the constant offset for the variable.
+ //
+ // A Fortran common block uses this idiom to encode the offset
+ // of a variable from the common block's starting address.
+ CVGlobalVariableOffsets.insert(
+ std::make_pair(DIGV, DIE->getElement(1)));
+
// Emit constant global variables in a global symbol section.
if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
CVGlobalVariable CVGV = {DIGV, DIE};
@@ -3226,7 +3285,11 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
DIGV->getRawStaticDataMemberDeclaration()))
Scope = MemberDecl->getScope();
- std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName());
+ // For Fortran, the scoping portion is elided in its name so that we can
+ // reference the variable in the command line of the VS debugger.
+ std::string QualifiedName =
+ (moduleIsInFortran()) ? std::string(DIGV->getName())
+ : getFullyQualifiedName(Scope, DIGV->getName());
if (const GlobalVariable *GV =
CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
@@ -3242,7 +3305,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
OS.AddComment("Type");
OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex());
OS.AddComment("DataOffset");
- OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
+
+ uint64_t Offset = 0;
+ if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
+ // Use the offset seen while collecting info on globals.
+ Offset = CVGlobalVariableOffsets[DIGV];
+ OS.EmitCOFFSecRel32(GVSym, Offset);
+
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index d133474ee5aa..6f88e15ee8fe 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -186,6 +186,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
};
FunctionInfo *CurFn = nullptr;
+ codeview::SourceLanguage CurrentSourceLanguage =
+ codeview::SourceLanguage::Masm;
+
+ // This map records the constant offset in DIExpression of the
+ // DIGlobalVariableExpression referencing the DIGlobalVariable.
+ DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets;
+
// Map used to seperate variables according to the lexical scope they belong
// in. This is populated by recordLocalVariable() before
// collectLexicalBlocks() separates the variables between the FunctionInfo
@@ -400,6 +407,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeString(const DIStringType *Ty);
codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
codeview::TypeIndex lowerTypePointer(
const DIDerivedType *Ty,
@@ -464,6 +472,11 @@ protected:
/// Gather post-function debug information.
void endFunctionImpl(const MachineFunction *) override;
+ /// Check if the current module is in Fortran.
+ bool moduleIsInFortran() {
+ return CurrentSourceLanguage == codeview::SourceLanguage::Fortran;
+ }
+
public:
CodeViewDebug(AsmPrinter *AP);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 802f0e880514..5f4ee747fcca 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -93,19 +93,15 @@ void DIEHash::addParentContext(const DIE &Parent) {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(),
- E = Parents.rend();
- I != E; ++I) {
- const DIE &Die = **I;
-
+ for (const DIE *Die : llvm::reverse(Parents)) {
// ... Append the letter "C" to the sequence...
addULEB128('C');
// ... Followed by the DWARF tag of the construct...
- addULEB128(Die.getTag());
+ addULEB128(Die->getTag());
// ... Then the name, taken from the DW_AT_name attribute.
- StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
+ StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name);
LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");
if (!Name.empty())
addString(Name);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index bb24f1414ef1..dd795079ac1a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -252,8 +252,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// Now actually remove the entries. Iterate backwards so that our remaining
// ToRemove indices are valid after each erase.
- for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr)
- HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr);
+ for (EntryIndex Idx : llvm::reverse(ToRemove))
+ HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index c81288c0e460..4df34d2c9402 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -174,21 +174,26 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
}
bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
- // SROA may generate dbg value intrinsics to assign an unsigned value to a
- // Fortran CHARACTER(1) type variables. Make them as unsigned.
if (isa<DIStringType>(Ty)) {
- assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!");
+ // Some transformations (e.g. instcombine) may decide to turn a Fortran
+ // character object into an integer, and later ones (e.g. SROA) may
+ // further inject a constant integer in a llvm.dbg.value call to track
+ // the object's value. Here we trust the transformations are doing the
+ // right thing, and treat the constant as unsigned to preserve that value
+ // (i.e. avoid sign extension).
return true;
}
- if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
- return false;
- // (Pieces of) aggregate types that get hacked apart by SROA may be
- // represented by a constant. Encode them as unsigned bytes.
- return true;
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (!(Ty = CTy->getBaseType()))
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ return false;
+ } else
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
}
if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 62ebadaf3cbe..d7ab2091967f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -158,7 +158,7 @@ public:
friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- for (DbgValueLocEntry DV : ValueLocEntries)
+ for (const DbgValueLocEntry &DV : ValueLocEntries)
DV.dump();
if (Expression)
Expression->dump();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index faa14dca1c3f..922c91840520 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -143,8 +143,6 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
auto *GVContext = GV->getScope();
const DIType *GTy = GV->getType();
- // Construct the context before querying for the existence of the DIE in
- // case such construction creates the DIE.
auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
: getOrCreateContextDIE(GVContext);
@@ -183,6 +181,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
else
addGlobalName(GV->getName(), *VariableDIE, DeclContext);
+ addAnnotation(*VariableDIE, GV->getAnnotations());
+
if (uint32_t AlignInBytes = GV->getAlignInBytes())
addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
@@ -260,14 +260,14 @@ void DwarfCompileUnit::addLocationAttribute(
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
if (Global->isThreadLocal()) {
if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
// 1) Start with a constNu of the appropriate pointer size
@@ -290,6 +290,24 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
+ } else if (Asm->TM.getRelocationModel() == Reloc::RWPI ||
+ Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) {
+ // Constant
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // Relocation offset
+ addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4
+ : dwarf::DW_FORM_data8,
+ Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym));
+ // Base register
+ Register BaseReg = Asm->getObjFileLowering().getStaticBase();
+ BaseReg = Asm->TM.getMCRegisterInfo()->getDwarfRegNum(BaseReg, false);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + BaseReg);
+ // Offset from base register
+ addSInt(*Loc, dwarf::DW_FORM_sdata, 0);
+ // Operation
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
@@ -331,12 +349,10 @@ void DwarfCompileUnit::addLocationAttribute(
DIE *DwarfCompileUnit::getOrCreateCommonBlock(
const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
- // Construct the context before querying for the existence of the DIE in case
- // such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
-
+ // Check for pre-existence.
if (DIE *NDie = getDIE(CB))
return NDie;
+ DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
addString(NDie, dwarf::DW_AT_name, Name);
@@ -351,7 +367,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock(
void DwarfCompileUnit::addRange(RangeSpan Range) {
DD->insertSectionLabel(Range.Begin);
- bool SameAsPrevCU = this == DD->getPrevCU();
+ auto *PrevCU = DD->getPrevCU();
+ bool SameAsPrevCU = this == PrevCU;
DD->setPrevCU(this);
// If we have no current ranges just add the range and return, otherwise,
// check the current section and CU against the previous section and CU we
@@ -360,6 +377,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
if (CURanges.empty() || !SameAsPrevCU ||
(&CURanges.back().End->getSection() !=
&Range.End->getSection())) {
+ // Before a new range is added, always terminate the prior line table.
+ if (PrevCU)
+ DD->terminateLineTable(PrevCU);
CURanges.push_back(Range);
return;
}
@@ -470,7 +490,6 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
if (!isDwoUnit()) {
addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
- DD->addArangeLabel(SymbolCU(this, SPSym));
} else {
// FIXME: when writing dwo, we need to avoid relocations. Probably
// the "right" solution is to treat globals the way func and data
@@ -961,9 +980,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
bool visitedAllDependencies = Item.getInt();
WorkList.pop_back();
- // Dependency is in a different lexical scope or a global.
- if (!Var)
- continue;
+ assert(Var);
// Already handled.
if (Visited.count(Var))
@@ -987,8 +1004,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
// visited again after all of its dependencies are handled.
WorkList.push_back({Var, 1});
for (auto *Dependency : dependencies(Var)) {
- auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency);
- WorkList.push_back({DbgVar[Dep], 0});
+ // Don't add dependency if it is in a different lexical scope or a global.
+ if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency))
+ if (DbgVariable *Var = DbgVar.lookup(Dep))
+ WorkList.push_back({Var, 0});
}
}
return Result;
@@ -1103,9 +1122,10 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
// shouldn't be found by lookup.
AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
-
- if (!ContextCU->includeMinimalInlineScopes())
- ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline,
+ DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>()
+ : dwarf::DW_FORM_implicit_const,
+ dwarf::DW_INL_inlined);
if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
@@ -1162,7 +1182,7 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
}
DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
- DIE *CalleeDIE,
+ const DISubprogram *CalleeSP,
bool IsTail,
const MCSymbol *PCAddr,
const MCSymbol *CallAddr,
@@ -1176,7 +1196,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- assert(CalleeDIE && "No DIE for call site entry origin");
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
@@ -1265,6 +1286,16 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
if (!Name.empty())
addString(*IMDie, dwarf::DW_AT_name, Name);
+ // This is for imported module with renamed entities (such as variables and
+ // subprograms).
+ DINodeArray Elements = Module->getElements();
+ for (const auto *Element : Elements) {
+ if (!Element)
+ continue;
+ IMDie->addChild(
+ constructImportedEntityDIE(cast<DIImportedEntity>(Element)));
+ }
+
return IMDie;
}
@@ -1479,10 +1510,12 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
if (!Name.empty())
addString(VariableDie, dwarf::DW_AT_name, Name);
const auto *DIVar = Var.getVariable();
- if (DIVar)
+ if (DIVar) {
if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
+ addAnnotation(VariableDie, DIVar->getAnnotations());
+ }
addSourceLine(VariableDie, DIVar);
addType(VariableDie, Var.getType());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6d8186a5ee2b..6e9261087686 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -249,16 +249,14 @@ public:
dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeDIE.
- /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee.
- /// For indirect calls \p CalleeDIE is set to nullptr.
+ /// callee described by \p CalleeSP.
/// \p IsTail specifies whether the call is a tail call.
/// \p PCAddr points to the PC value after the call instruction.
/// \p CallAddr points to the PC value at the call instruction (or is null).
/// \p CallReg is a register location for an indirect call. For direct calls
/// the \p CallReg is set to 0.
- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail,
- const MCSymbol *PCAddr,
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
+ bool IsTail, const MCSymbol *PCAddr,
const MCSymbol *CallAddr, unsigned CallReg);
/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
/// were collected by the \ref collectCallSiteParameters.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee14423ca3d0..047676d4c11e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -480,7 +480,7 @@ static bool hasObjCCategory(StringRef Name) {
if (!isObjCClass(Name))
return false;
- return Name.find(") ") != StringRef::npos;
+ return Name.contains(") ");
}
static void getObjCClassCategory(StringRef In, StringRef &Class,
@@ -587,14 +587,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
-DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
- DICompileUnit *Unit = SP->getUnit();
- assert(SP->isDefinition() && "Subprogram not a definition");
- assert(Unit && "Subprogram definition without parent unit");
- auto &CU = getOrCreateDwarfCompileUnit(Unit);
- return *CU.getOrCreateSubprogramDIE(SP);
-}
-
/// Represents a parameter whose call site value can be described by applying a
/// debug expression to a register in the forwarded register worklist.
struct FwdRegParamInfo {
@@ -945,7 +937,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
continue;
unsigned CallReg = 0;
- DIE *CalleeDIE = nullptr;
+ const DISubprogram *CalleeSP = nullptr;
const Function *CalleeDecl = nullptr;
if (CalleeOp.isReg()) {
CallReg = CalleeOp.getReg();
@@ -955,19 +947,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
- const DISubprogram *CalleeSP = CalleeDecl->getSubprogram();
-
- if (CalleeSP->isDefinition()) {
- // Ensure that a subprogram DIE for the callee is available in the
- // appropriate CU.
- CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP);
- } else {
- // Create the declaration DIE if it is missing. This is required to
- // support compilation of old bitcode with an incomplete list of
- // retained metadata.
- CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP);
- }
- assert(CalleeDIE && "Must have a DIE for the callee");
+ CalleeSP = CalleeDecl->getSubprogram();
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
@@ -1004,7 +984,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
<< (IsTail ? " [IsTail]" : "") << "\n");
DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
- ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg);
+ ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg);
// Optionally emit call-site-param debug info.
if (emitDebugEntryValues()) {
@@ -1427,6 +1407,10 @@ void DwarfDebug::finalizeModuleInfo() {
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
+ PrevCU = nullptr;
assert(CurFn == nullptr);
assert(CurMI == nullptr);
@@ -2102,12 +2086,22 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
- for (const auto &MBB : *MF)
- for (const auto &MI : MBB)
+ DebugLoc LineZeroLoc;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc())
- return MI.getDebugLoc();
- return DebugLoc();
+ MI.getDebugLoc()) {
+ // Scan forward to try to find a non-zero line number. The prologue_end
+ // marks the first breakpoint in the function after the frame setup, and
+ // a compiler-generated line 0 location is not a meaningful breakpoint.
+ // If none is found, return the first location after the frame setup.
+ if (MI.getDebugLoc().getLine())
+ return MI.getDebugLoc();
+ LineZeroLoc = MI.getDebugLoc();
+ }
+ }
+ }
+ return LineZeroLoc;
}
/// Register a source line with debug info. Returns the unique label that was
@@ -2162,24 +2156,42 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(
+ getDwarfCompileUnitIDForLineTable(CU));
+
+ // Record beginning of function.
+ PrologEndLoc = emitInitialLocDirective(
+ *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+}
+
+unsigned
+DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) {
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
+ return 0;
else
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
+ return CU.getUniqueID();
+}
- // Record beginning of function.
- PrologEndLoc = emitInitialLocDirective(
- *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) {
+ const auto &CURanges = CU->getRanges();
+ auto &LineTable = Asm->OutStreamer->getContext().getMCDwarfLineTable(
+ getDwarfCompileUnitIDForLineTable(*CU));
+ // Add the last range label for the given CU.
+ LineTable.getMCLineSections().addEndEntry(
+ const_cast<MCSymbol *>(CURanges.back().End));
}
void DwarfDebug::skippedNonDebugFunction() {
// If we don't have a subprogram for this function then there will be a hole
// in the range information. Keep note of this by setting the previously used
// section to nullptr.
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
PrevCU = nullptr;
CurFn = nullptr;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6356a65b50d3..4e1a1b1e068d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -65,19 +65,21 @@ class Module;
/// such that it could levarage polymorphism to extract common code for
/// DbgVariable and DbgLabel.
class DbgEntity {
- const DINode *Entity;
- const DILocation *InlinedAt;
- DIE *TheDIE = nullptr;
- unsigned SubclassID;
-
public:
enum DbgEntityKind {
DbgVariableKind,
DbgLabelKind
};
- DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
- : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+private:
+ const DINode *Entity;
+ const DILocation *InlinedAt;
+ DIE *TheDIE = nullptr;
+ const DbgEntityKind SubclassID;
+
+public:
+ DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID)
+ : Entity(N), InlinedAt(IA), SubclassID(ID) {}
virtual ~DbgEntity() {}
/// Accessors.
@@ -85,19 +87,18 @@ public:
const DINode *getEntity() const { return Entity; }
const DILocation *getInlinedAt() const { return InlinedAt; }
DIE *getDIE() const { return TheDIE; }
- unsigned getDbgEntityID() const { return SubclassID; }
+ DbgEntityKind getDbgEntityID() const { return SubclassID; }
/// @}
void setDIE(DIE &D) { TheDIE = &D; }
static bool classof(const DbgEntity *N) {
switch (N->getDbgEntityID()) {
- default:
- return false;
case DbgVariableKind:
case DbgLabelKind:
return true;
}
+ llvm_unreachable("Invalid DbgEntityKind");
}
};
@@ -471,9 +472,6 @@ private:
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
- /// Construct a DIE for the subprogram definition \p SP and return it.
- DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
-
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
@@ -615,7 +613,7 @@ private:
DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
- /// function that describe the same variable. If the resulting
+ /// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
@@ -635,6 +633,9 @@ protected:
/// Gather and emit post-function debug information.
void endFunctionImpl(const MachineFunction *MF) override;
+ /// Get Dwarf compile unit ID for line table.
+ unsigned getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU);
+
void skippedNonDebugFunction() override;
public:
@@ -781,6 +782,9 @@ public:
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
+ /// Terminate the line table by adding the last range label.
+ void terminateLineTable(const DwarfCompileUnit *CU);
+
/// Returns the entries for the .debug_loc section.
const DebugLocStream &getDebugLocs() const { return DebugLocs; }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 344d30fad347..976e35905144 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -186,9 +186,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system (this includes subprogram
- // declarations *and* subprogram definitions, even local definitions), the
- // DIE must be shared across CUs.
+ // When the MDNode can be part of the type system, the DIE can be shared
+ // across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
@@ -196,7 +195,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
+ return (isa<DIType>(D) ||
+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+ !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -671,7 +672,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
+ for (const DIScope *Ctx : llvm::reverse(Parents)) {
StringRef Name = Ctx->getName();
if (Name.empty() && isa<DINamespace>(Ctx))
Name = "(anonymous namespace)";
@@ -753,6 +754,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ addAnnotation(Buffer, DTy->getAnnotations());
+
// If alignment is specified for a typedef , create and insert DW_AT_alignment
// attribute in DW_TAG_typedef DIE.
if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
@@ -832,6 +835,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
}
+void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) {
+ if (!Annotations)
+ return;
+
+ for (const Metadata *Annotation : Annotations->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotation);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+
+ // Currently, only MDString is supported with btf_decl_tag attribute.
+ const MDString *Value = cast<MDString>(MD->getOperand(1));
+
+ DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer);
+ addString(AnnotationDie, dwarf::DW_AT_name, Name->getString());
+ addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString());
+ }
+}
+
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -849,7 +869,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
case dwarf::DW_TAG_variant_part:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
- case dwarf::DW_TAG_class_type: {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_namelist: {
// Emit the discriminator for a variant part.
DIDerivedType *Discriminator = nullptr;
if (Tag == dwarf::DW_TAG_variant_part) {
@@ -918,6 +939,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
constructTypeDIE(VariantPart, Composite);
}
+ } else if (Tag == dwarf::DW_TAG_namelist) {
+ auto *Var = dyn_cast<DINode>(Element);
+ auto *VarDIE = getDIE(Var);
+ if (VarDIE) {
+ DIE &ItemDie = createAndAddDIE(dwarf::DW_TAG_namelist_item, Buffer);
+ addDIEEntry(ItemDie, dwarf::DW_AT_namelist_item, *VarDIE);
+ }
}
}
@@ -960,6 +988,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ addAnnotation(Buffer, CTy->getAnnotations());
+
if (Tag == dwarf::DW_TAG_enumeration_type ||
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
@@ -1196,6 +1226,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
+ addAnnotation(SPDie, SP->getAnnotations());
+
if (!SkipSPSourceLocation)
addSourceLine(SPDie, SP);
@@ -1546,6 +1578,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
+ addAnnotation(MemberDie, DT->getAnnotations());
+
if (DIType *Resolved = DT->getBaseType())
addType(MemberDie, Resolved);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 4d31dd0daf59..8140279adaef 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -294,6 +294,9 @@ public:
void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec);
+ /// Add DW_TAG_LLVM_annotation.
+ void addAnnotation(DIE &Buffer, DINodeArray Annotations);
+
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e589c2e64abd..150f19324834 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -812,8 +812,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
Entry = TypeInfos.size();
}
- for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
- TypeInfos.rend())) {
+ for (const GlobalValue *GV : llvm::reverse(TypeInfos)) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->emitTTypeReference(GV, TTypeEncoding);
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 35a830f416f6..9e6f1a537de3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
+PseudoProbeHandler::~PseudoProbeHandler() = default;
+
void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
uint64_t Type, uint64_t Attr,
const DILocation *DebugLoc) {
@@ -35,7 +37,10 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
auto Name = SP->getLinkageName();
if (Name.empty())
Name = SP->getName();
- uint64_t CallerGuid = Function::getGUID(Name);
+ // Use caching to avoid redundant md5 computation for build speed.
+ uint64_t &CallerGuid = NameGuidMap[Name];
+ if (!CallerGuid)
+ CallerGuid = Function::getGUID(Name);
uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
InlinedAt->getDiscriminator());
ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
index f2026a118bf5..7d5e51218693 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -26,9 +26,12 @@ class DILocation;
class PseudoProbeHandler : public AsmPrinterHandler {
// Target of pseudo probe emission.
AsmPrinter *Asm;
+ // Name to GUID map, used as caching/memoization for speed.
+ DenseMap<StringRef, uint64_t> NameGuidMap;
public:
PseudoProbeHandler(AsmPrinter *A) : Asm(A){};
+ ~PseudoProbeHandler() override;
void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attr, const DILocation *DebugLoc);
diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index 352a33e8639d..a17a2ca2790e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -18,16 +18,25 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'catch' instruction to denote this
- // is a C++ exception. This symbol has to be emitted somewhere once in the
- // module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'catch' instruction in the module, and emit the symbol
- // only if so.
- SmallString<60> NameStr;
- Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
- if (Asm->OutContext.lookupSymbol(NameStr)) {
- MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
- Asm->OutStreamer->emitLabel(ExceptionSym);
+ // These are symbols used to throw/catch C++ exceptions and C longjmps. These
+ // symbols have to be emitted somewhere once in the module. Check if each of
+ // the symbols has already been created, i.e., we have at least one 'throw' or
+ // 'catch' instruction with the symbol in the module, and emit the symbol only
+ // if so.
+ //
+ // But in dynamic linking, it is in general not possible to come up with a
+ // module instantiating order in which tag-defining modules are loaded before
+ // the importing modules. So we make them undefined symbols here, define tags
+ // in the JS side, and feed them to each importing module.
+ if (!Asm->isPositionIndependent()) {
+ for (const char *SymName : {"__cpp_exception", "__c_longjmp"}) {
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, SymName, Asm->getDataLayout());
+ if (Asm->OutContext.lookupSymbol(NameStr)) {
+ MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol(SymName);
+ Asm->OutStreamer->emitLabel(ExceptionSym);
+ }
+ }
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index b30d9cc12abc..ef57031c7294 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -43,6 +43,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
// platforms use an imagerel32 relocation to refer to symbols.
useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
isAArch64 = Asm->TM.getTargetTriple().isAArch64();
+ isThumb = Asm->TM.getTargetTriple().isThumb();
}
WinException::~WinException() {}
@@ -330,10 +331,12 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
}
const MCExpr *WinException::getLabel(const MCSymbol *Label) {
- if (isAArch64)
- return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
- Asm->OutContext);
- return MCBinaryExpr::createAdd(create32bitRef(Label),
+ return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(getLabel(Label),
MCConstantExpr::create(1, Asm->OutContext),
Asm->OutContext);
}
@@ -561,8 +564,8 @@ InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
/// struct Table {
/// int NumEntries;
/// struct Entry {
-/// imagerel32 LabelStart;
-/// imagerel32 LabelEnd;
+/// imagerel32 LabelStart; // Inclusive
+/// imagerel32 LabelEnd; // Exclusive
/// imagerel32 FilterOrFinally; // One means catch-all.
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
@@ -664,7 +667,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
AddComment("LabelStart");
OS.emitValue(getLabel(BeginLabel), 4);
AddComment("LabelEnd");
- OS.emitValue(getLabel(EndLabel), 4);
+ OS.emitValue(getLabelPlusOne(EndLabel), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
: "CatchAll");
OS.emitValue(FilterOrFinally, 4);
@@ -949,8 +952,15 @@ void WinException::computeIP2StateTable(
if (!ChangeLabel)
ChangeLabel = StateChange.PreviousEndLabel;
// Emit an entry indicating that PCs after 'Label' have this EH state.
+ // NOTE: On ARM architectures, the StateFromIp automatically takes into
+ // account that the return address is after the call instruction (whose EH
+ // state we should be using), but on other platforms we need to +1 to the
+ // label so that we are using the correct EH state.
+ const MCExpr *LabelExpression = (isAArch64 || isThumb)
+ ? getLabel(ChangeLabel)
+ : getLabelPlusOne(ChangeLabel);
IPToStateTable.push_back(
- std::make_pair(getLabel(ChangeLabel), StateChange.NewState));
+ std::make_pair(LabelExpression, StateChange.NewState));
// FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.h b/llvm/lib/CodeGen/AsmPrinter/WinException.h
index feea05ba63ad..638589adf0dd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -39,6 +39,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if we are generating exception handling on Windows for ARM64.
bool isAArch64 = false;
+ /// True if we are generating exception handling on Windows for ARM (Thumb).
+ bool isThumb = false;
+
/// Pointer to the current funclet entry BB.
const MachineBasicBlock *CurrentFuncletEntry = nullptr;
@@ -77,6 +80,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
const MCExpr *getLabel(const MCSymbol *Label);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
const MCSymbol *OffsetFrom);
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 125a3be585cb..4838f6da750d 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -179,11 +180,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
// Changing control-flow while iterating through it is a bad idea, so gather a
// list of all atomic instructions before we start.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
- if (I->isAtomic() && !isa<FenceInst>(I))
- AtomicInsts.push_back(I);
- }
+ for (Instruction &I : instructions(F))
+ if (I.isAtomic() && !isa<FenceInst>(&I))
+ AtomicInsts.push_back(&I);
bool MadeChange = false;
for (auto I : AtomicInsts) {
@@ -570,7 +569,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ LLVMContext &Ctx = AI->getModule()->getContext();
+ TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+ switch (Kind) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
@@ -600,6 +601,18 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[AI->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[AI->getSyncScopeID()];
+ OptimizationRemarkEmitter ORE(AI->getFunction());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
+ << "A compare and swap loop was generated for an atomic "
+ << AI->getOperationName(AI->getOperation()) << " operation at "
+ << MemScope << " memory scope";
+ });
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
return true;
@@ -1850,7 +1863,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// Now, the return type.
if (CASExpected) {
ResultTy = Type::getInt1Ty(Ctx);
- Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
+ Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
} else if (HasResult && UseSizedLibcall)
ResultTy = SizedIntTy;
else
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 1a6eed272ca2..c1901bc46d72 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -21,9 +21,21 @@
// clusters of basic blocks. Every cluster will be emitted into a separate
// section with its basic blocks sequenced in the given order. To get the
// optimized performance, the clusters must form an optimal BB layout for the
-// function. Every cluster's section is labeled with a symbol to allow the
-// linker to reorder the sections in any arbitrary sequence. A global order of
-// these sections would encapsulate the function layout.
+// function. We insert a symbol at the beginning of every cluster's section to
+// allow the linker to reorder the sections in any arbitrary sequence. A global
+// order of these sections would encapsulate the function layout.
+// For example, consider the following clusters for a function foo (consisting
+// of 6 basic blocks 0, 1, ..., 5).
+//
+// 0 2
+// 1 3 5
+//
+// * Basic blocks 0 and 2 are placed in one section with symbol `foo`
+// referencing the beginning of this section.
+// * Basic blocks 1, 3, 5 are placed in a separate section. A new symbol
+// `foo.__part.1` will reference the beginning of this section.
+// * Basic block 4 (note that it is not referenced in the list) is placed in
+// one section, and a new symbol `foo.cold` will point to it.
//
// There are a couple of challenges to be addressed:
//
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 65e7e92fe152..5ac8f49a9522 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -611,7 +611,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// there are fallthroughs, and we don't know until after layout.
if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
- if (MBB->succ_size() != 0 && !MBB->canFallThrough())
+ if (!MBB->succ_empty() && !MBB->canFallThrough())
return false;
MachineFunction::iterator I(MBB);
MachineFunction *MF = MBB->getParent();
@@ -1198,14 +1198,13 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Renumbering blocks alters EH scope membership, recalculate it.
EHScopeMembership = getEHScopeMembership(MF);
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ) {
- MachineBasicBlock *MBB = &*I++;
- MadeChange |= OptimizeBlock(MBB);
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(MF))) {
+ MadeChange |= OptimizeBlock(&MBB);
// If it is dead, remove it.
- if (MBB->pred_empty()) {
- RemoveDeadBlock(MBB);
+ if (MBB.pred_empty()) {
+ RemoveDeadBlock(&MBB);
MadeChange = true;
++NumDeadBlocks;
}
@@ -1753,10 +1752,8 @@ ReoptimizeBlock:
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = &*I++;
- MadeChange |= HoistCommonCodeInSuccs(MBB);
- }
+ for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF))
+ MadeChange |= HoistCommonCodeInSuccs(&MBB);
return MadeChange;
}
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 366c303614d6..50825ccf9bac 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
MI.eraseFromParent();
- BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
- *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+ // Create the optional restore block and, initially, place it at the end of
+ // function. That block will be placed later if it's used; otherwise, it will
+ // be erased.
+ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
+
+ TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
+ DestOffset - SrcOffset, RS.get());
+
+ BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
adjustBlockOffsets(*MBB);
+
+ // If RestoreBB is required, try to place just before DestBB.
+ if (!RestoreBB->empty()) {
+ // TODO: For multiple far branches to the same destination, there are
+ // chances that some restore blocks could be shared if they clobber the
+ // same registers and share the same restore sequence. So far, those
+ // restore blocks are just duplicated for each far branch.
+ assert(!DestBB->isEntryBlock());
+ MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
+ if (auto *FT = PrevBB->getFallThrough()) {
+ assert(FT == DestBB);
+ TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
+ // Recalculate the block size.
+ BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
+ }
+ // Now, RestoreBB could be placed directly before DestBB.
+ MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
+ // Update successors and predecessors.
+ RestoreBB->addSuccessor(DestBB);
+ BranchBB->replaceSuccessor(DestBB, RestoreBB);
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *RestoreBB);
+ // Compute the restore block size.
+ BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
+ // Update the offset starting from the previous block.
+ adjustBlockOffsets(*PrevBB);
+ } else {
+ // Remove restore block if it's not required.
+ MF->erase(RestoreBB);
+ }
+
return true;
}
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index b11db3e65770..558700bd9b3b 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -244,7 +244,7 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
MachineInstr *UndefMI = UndefReads.back().first;
unsigned OpIdx = UndefReads.back().second;
- for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
+ for (MachineInstr &I : llvm::reverse(*MBB)) {
// Update liveness, including the current instruction's defs.
LiveRegSet.stepBackward(I);
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
new file mode 100644
index 000000000000..877aa69c3e58
--- /dev/null
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -0,0 +1,169 @@
+//===-- CodeGenCommonISel.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common utilies that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+using namespace llvm;
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+StackProtectorDescriptor::addSuccessorMBB(
+ const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI(ParentMBB);
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
+ return SuccMBB;
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI.isCopy() && !MI.isImplicitDef()) {
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ if (MI.isDebugInstr())
+ return true;
+
+ // For GlobalISel, we may have extension instructions for arguments within
+ // copy sequences. Allow these.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI.isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI.operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // Call frames cannot be nested, so if this frame is describing the tail
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail
+ // call has to register moves of its own and should be the split point. For
+ // example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
+ while (MIIsInTerminatorSequence(*Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
+}
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 77ce3d2fb563..ac4180c4c3ab 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -530,10 +530,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
while (MadeChange) {
MadeChange = false;
DT.reset();
- for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = &*I++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -660,12 +659,8 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
return;
auto &GEPVector = VecI->second;
- const auto &I =
- llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
- if (I == GEPVector.end())
- return;
+ llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
- GEPVector.erase(I);
if (GEPVector.empty())
LargeOffsetGEPMap.erase(VecI);
}
@@ -2037,7 +2032,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Only handle legal scalar cases. Anything else requires too much work.
Type *Ty = CountZeros->getType();
- unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+ unsigned SizeInBits = Ty->getScalarSizeInBits();
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
@@ -2108,7 +2103,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// idea
unsigned MinSize, PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
// sense for GEPs if the offset is a multiple of the desired alignment and
@@ -2159,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// into their uses. TODO: generalize this to work over profiling data
if (CI->hasFnAttr(Attribute::Cold) &&
!OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
if (!Arg->getType()->isPointerTy())
continue;
unsigned AS = Arg->getType()->getPointerAddressSpace();
@@ -3718,7 +3713,8 @@ private:
// Traverse all Phis until we found equivalent or fail to do that.
bool IsMatched = false;
for (auto &P : PHI->getParent()->phis()) {
- if (&P == PHI)
+ // Skip new Phi nodes.
+ if (PhiNodesToMatch.count(&P))
continue;
if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
break;
@@ -4187,7 +4183,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
if (Inst->getOpcode() == Instruction::Xor) {
const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
// Make sure it is not a NOT.
- if (Cst && !Cst->getValue().isAllOnesValue())
+ if (Cst && !Cst->getValue().isAllOnes())
return true;
}
@@ -4858,10 +4854,9 @@ static constexpr int MaxMemoryUsesToScan = 20;
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
+/// Add accessed addresses and types to MemoryUses.
static bool FindAllMemoryUses(
- Instruction *I,
- SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, int SeenInsts = 0) {
@@ -4882,31 +4877,28 @@ static bool FindAllMemoryUses(
Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
- MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
+ MemoryUses.push_back({U.get(), LI->getType()});
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != StoreInst::getPointerOperandIndex())
+ if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(SI, opNo));
+ MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});
continue;
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(RMW, opNo));
+ MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
continue;
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
continue;
}
@@ -5016,7 +5008,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// we can remove the addressing mode and effectively trade one live register
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
- SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
PSI, BFI))
@@ -5032,18 +5024,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
- for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
- Instruction *User = MemoryUses[i].first;
- unsigned OpNo = MemoryUses[i].second;
-
- // Get the access type of this use. If the use isn't a pointer, we don't
- // know what it accesses.
- Value *Address = User->getOperand(OpNo);
- PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
- if (!AddrTy)
- return false;
- Type *AddressAccessTy = AddrTy->getElementType();
- unsigned AS = AddrTy->getAddressSpace();
+ for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first;
+ Type *AddressAccessTy = Pair.second;
+ unsigned AS = Address->getType()->getPointerAddressSpace();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -5124,8 +5108,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
- Value *V = worklist.back();
- worklist.pop_back();
+ Value *V = worklist.pop_back_val();
// We allow traversing cyclic Phi nodes.
// In case of success after this loop we ensure that traversing through
@@ -6477,8 +6460,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
APInt WidestAndBits(BitWidth, 0);
while (!WorkList.empty()) {
- Instruction *I = WorkList.back();
- WorkList.pop_back();
+ Instruction *I = WorkList.pop_back_val();
// Break use-def graph loops.
if (!Visited.insert(I).second)
@@ -6950,16 +6932,26 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
BasicBlock *TargetBB = I->getParent();
bool Changed = false;
SmallVector<Use *, 4> ToReplace;
+ Instruction *InsertPoint = I;
+ DenseMap<const Instruction *, unsigned long> InstOrdering;
+ unsigned long InstNumber = 0;
+ for (const auto &I : *TargetBB)
+ InstOrdering[&I] = InstNumber++;
+
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+ if (isa<PHINode>(UI))
continue;
+ if (UI->getParent() == TargetBB) {
+ if (InstOrdering[UI] < InstOrdering[InsertPoint])
+ InsertPoint = UI;
+ continue;
+ }
ToReplace.push_back(U);
}
SetVector<Instruction *> MaybeDead;
DenseMap<Instruction *, Instruction *> NewInstructions;
- Instruction *InsertPoint = I;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
@@ -7863,8 +7855,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
- if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking)
- return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
+ sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
+ return true;
// TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
@@ -8030,9 +8023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
DominatorTree DT(F);
for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- Instruction *Insn = &*BI++;
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
if (!DVI)
continue;
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index f3cba6225107..a1ff02178ffa 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -65,6 +65,7 @@ CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
CGOPT(FloatABI::ABIType, FloatABIForCalls)
CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
+CGOPT(SwiftAsyncFramePointerMode, SwiftAsyncFramePointer)
CGOPT(bool, DontPlaceZerosInBSS)
CGOPT(bool, EnableGuaranteedTailCallOpt)
CGOPT(bool, DisableTailCalls)
@@ -89,11 +90,11 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, PseudoProbeForProfiling)
CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
+CGOPT(unsigned, AlignLoops)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -277,6 +278,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Only fuse FP ops when the result won't be affected.")));
CGBINDOPT(FuseFPOps);
+ static cl::opt<SwiftAsyncFramePointerMode> SwiftAsyncFramePointer(
+ "swift-async-fp",
+ cl::desc("Determine when the Swift async frame pointer should be set"),
+ cl::init(SwiftAsyncFramePointerMode::Always),
+ cl::values(clEnumValN(SwiftAsyncFramePointerMode::DeploymentBased, "auto",
+ "Determine based on deployment target"),
+ clEnumValN(SwiftAsyncFramePointerMode::Always, "always",
+ "Always set the bit"),
+ clEnumValN(SwiftAsyncFramePointerMode::Never, "never",
+ "Never set the bit")));
+ CGBINDOPT(SwiftAsyncFramePointer);
+
static cl::opt<bool> DontPlaceZerosInBSS(
"nozero-initialized-in-bss",
cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -420,11 +433,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
- static cl::opt<bool> PseudoProbeForProfiling(
- "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
- cl::init(false));
- CGBINDOPT(PseudoProbeForProfiling);
-
static cl::opt<bool> ValueTrackingVariableLocations(
"experimental-debug-variable-locations",
cl::desc("Use experimental new value-tracking variable locations"),
@@ -452,6 +460,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
CGBINDOPT(DebugStrictDwarf);
+ static cl::opt<unsigned> AlignLoops("align-loops",
+ cl::desc("Default alignment for loops"));
+ CGBINDOPT(AlignLoops);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -522,18 +534,18 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
+ Options.LoopAlignment = getAlignLoops();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
Options.EABIVersion = getEABIVersion();
Options.DebuggerTuning = getDebuggerTuningOpt();
-
+ Options.SwiftAsyncFramePointer = getSwiftAsyncFramePointer();
return Options;
}
@@ -666,13 +678,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
if (const auto *F = Call->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::debugtrap ||
F->getIntrinsicID() == Intrinsic::trap)
- Call->addAttribute(
- AttributeList::FunctionIndex,
+ Call->addFnAttr(
Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));
// Let NewAttrs override Attrs.
- F.setAttributes(
- Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+ F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs));
}
/// Set function attributes of functions in Module M based on CPU,
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index c56c8c87734f..981f5973fee8 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -212,6 +212,21 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
+ }
+ }
+ }
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isValid())
+ continue;
// If this reg is tied and live (Classes[Reg] is set to -1), we can't change
// it or any of its sub or super regs. We need to use KeepRegs to mark the
// reg because not all uses of the same reg within an instruction are
@@ -222,7 +237,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// of a register? In the above 'xor' example, the uses of %eax are undef, so
// earlier instructions could still replace %eax even though the 'xor'
// itself can't be changed.
- if (MI.isRegTiedToUseOperand(i) &&
+ if (MI.isRegTiedToUseOperand(I) &&
Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
@@ -233,14 +248,6 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
KeepRegs.set(*SuperRegs);
}
}
-
- if (MO.isUse() && Special) {
- if (!KeepRegs.test(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- KeepRegs.set(*SubRegs);
- }
- }
}
}
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6e7db95b5c2a..c6c0b79cd7e7 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -138,26 +138,22 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
- MIE = MBB->rend();
- MII != MIE;) {
- MachineInstr *MI = &*MII++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// If the instruction is dead, delete it!
- if (isDead(MI)) {
- LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ if (isDead(&MI)) {
+ LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
// It is possible that some DBG_VALUE instructions refer to this
// instruction. They get marked as undef and will be deleted
// in the live debug variable analysis.
- MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
++NumDeletes;
continue;
}
// Record the physreg defs.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -175,8 +171,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg uses, after the defs, in case a physreg is
// both defined and used in the same instruction.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 5ca1e91cc5f4..fb8a3e383950 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -54,13 +55,11 @@ namespace {
class DwarfEHPrepare {
CodeGenOpt::Level OptLevel;
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee &RewindFunction;
-
Function &F;
const TargetLowering &TLI;
DomTreeUpdater *DTU;
const TargetTransformInfo *TTI;
+ const Triple &TargetTriple;
/// Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
@@ -78,11 +77,11 @@ class DwarfEHPrepare {
bool InsertUnwindResumeCalls();
public:
- DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_,
- Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_,
- const TargetTransformInfo *TTI_)
- : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_),
- DTU(DTU_), TTI(TTI_) {}
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_,
+ const TargetLowering &TLI_, DomTreeUpdater *DTU_,
+ const TargetTransformInfo *TTI_, const Triple &TargetTriple_)
+ : OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_),
+ TargetTriple(TargetTriple_) {}
bool run();
};
@@ -211,13 +210,28 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
if (ResumesLeft == 0)
return true; // We pruned them all.
- // Find the rewind function if we didn't already.
- if (!RewindFunction) {
- FunctionType *FTy =
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee RewindFunction;
+ CallingConv::ID RewindFunctionCallingConv;
+ FunctionType *FTy;
+ const char *RewindName;
+ bool DoesRewindFunctionNeedExceptionObject;
+
+ if ((Pers == EHPersonality::GNU_CXX || Pers == EHPersonality::GNU_CXX_SjLj) &&
+ TargetTriple.isTargetEHABICompatible()) {
+ RewindName = TLI.getLibcallName(RTLIB::CXA_END_CLEANUP);
+ FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
+ RewindFunctionCallingConv =
+ TLI.getLibcallCallingConv(RTLIB::CXA_END_CLEANUP);
+ DoesRewindFunctionNeedExceptionObject = false;
+ } else {
+ RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
+ FTy =
FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
+ RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME);
+ DoesRewindFunctionNeedExceptionObject = true;
}
+ RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
// Create the basic block where the _Unwind_Resume call will live.
if (ResumesLeft == 1) {
@@ -226,10 +240,14 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
ResumeInst *RI = Resumes.front();
BasicBlock *UnwindBB = RI->getParent();
Value *ExnObj = GetExceptionObject(RI);
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(ExnObj);
- // Call the _Unwind_Resume function.
- CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
- CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ // Call the rewind function.
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
// We never expect _Unwind_Resume to return.
CI->setDoesNotReturn();
@@ -240,6 +258,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
std::vector<DominatorTree::UpdateType> Updates;
Updates.reserve(Resumes.size());
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+
BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
UnwindBB);
@@ -257,9 +277,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
++NumResumesLowered;
}
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(PN);
+
// Call the function.
- CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
- CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
// We never expect _Unwind_Resume to return.
CI->setDoesNotReturn();
@@ -277,22 +301,20 @@ bool DwarfEHPrepare::run() {
return Changed;
}
-static bool prepareDwarfEH(CodeGenOpt::Level OptLevel,
- FunctionCallee &RewindFunction, Function &F,
+static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F,
const TargetLowering &TLI, DominatorTree *DT,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ const Triple &TargetTriple) {
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr,
- TTI)
+ return DwarfEHPrepare(OptLevel, F, TLI, DT ? &DTU : nullptr, TTI,
+ TargetTriple)
.run();
}
namespace {
class DwarfEHPrepareLegacyPass : public FunctionPass {
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee RewindFunction = nullptr;
CodeGenOpt::Level OptLevel;
@@ -315,7 +337,7 @@ public:
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
}
- return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI);
+ return prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM.getTargetTriple());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 50fdc2114780..d0c2b8c267ff 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -348,17 +348,17 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
ConstantInt::get(Diff->getType(), 0));
BranchInst *CmpBr =
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates(
{{DominatorTree::Insert, BB, EndBlock},
{DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
- Builder.Insert(CmpBr);
} else {
// The last block has an unconditional branch to EndBlock.
BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
- Builder.Insert(CmpBr);
}
}
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index d909d6aa5b0a..7300ea6b50ee 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -189,12 +189,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end();
- mi != me;) {
- MachineInstr &MI = *mi;
- // Advance iterator here because MI may be erased.
- ++mi;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Only expand pseudos.
if (!MI.isPseudo())
continue;
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index a8d4d4ebe8bd..bb8d2b3e9a78 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -158,6 +158,11 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// \brief Lower this VP reduction to a call to an unpredicated reduction
+ /// intrinsic.
+ Value *expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &PI);
+
/// \brief Query TTI and expand the vector predication in \p P accordingly.
Value *expandPredication(VPIntrinsic &PI);
@@ -248,6 +253,136 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
+ Type *EltTy) {
+ bool Negative = false;
+ unsigned EltBits = EltTy->getScalarSizeInBits();
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Expecting a VP reduction intrinsic");
+ case Intrinsic::vp_reduce_add:
+ case Intrinsic::vp_reduce_or:
+ case Intrinsic::vp_reduce_xor:
+ case Intrinsic::vp_reduce_umax:
+ return Constant::getNullValue(EltTy);
+ case Intrinsic::vp_reduce_mul:
+ return ConstantInt::get(EltTy, 1, /*IsSigned*/ false);
+ case Intrinsic::vp_reduce_and:
+ case Intrinsic::vp_reduce_umin:
+ return ConstantInt::getAllOnesValue(EltTy);
+ case Intrinsic::vp_reduce_smin:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMaxValue(EltBits));
+ case Intrinsic::vp_reduce_smax:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMinValue(EltBits));
+ case Intrinsic::vp_reduce_fmax:
+ Negative = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::vp_reduce_fmin: {
+ FastMathFlags Flags = VPI.getFastMathFlags();
+ const fltSemantics &Semantics = EltTy->getFltSemantics();
+ return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
+ : !Flags.noInfs()
+ ? ConstantFP::getInfinity(EltTy, Negative)
+ : ConstantFP::get(EltTy,
+ APFloat::getLargest(Semantics, Negative));
+ }
+ case Intrinsic::vp_reduce_fadd:
+ return ConstantFP::getNegativeZero(EltTy);
+ case Intrinsic::vp_reduce_fmul:
+ return ConstantFP::get(EltTy, 1.0);
+ }
+}
+
+Value *
+CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &VPI) {
+ assert((isSafeToSpeculativelyExecute(&VPI) ||
+ VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ Value *Mask = VPI.getMaskParam();
+ Value *RedOp = VPI.getOperand(VPI.getVectorParamPos());
+
+ // Insert neutral element in masked-out positions
+ if (Mask && !isAllTrueMask(Mask)) {
+ auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType());
+ auto *NeutralVector = Builder.CreateVectorSplat(
+ cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt);
+ RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector);
+ }
+
+ Value *Reduction;
+ Value *Start = VPI.getOperand(VPI.getStartParamPos());
+
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Impossible reduction kind");
+ case Intrinsic::vp_reduce_add:
+ Reduction = Builder.CreateAddReduce(RedOp);
+ Reduction = Builder.CreateAdd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_mul:
+ Reduction = Builder.CreateMulReduce(RedOp);
+ Reduction = Builder.CreateMul(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_and:
+ Reduction = Builder.CreateAndReduce(RedOp);
+ Reduction = Builder.CreateAnd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_or:
+ Reduction = Builder.CreateOrReduce(RedOp);
+ Reduction = Builder.CreateOr(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_xor:
+ Reduction = Builder.CreateXorReduce(RedOp);
+ Reduction = Builder.CreateXor(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmax:
+ Reduction = Builder.CreateFPMaxReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmin:
+ Reduction = Builder.CreateFPMinReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fadd:
+ Reduction = Builder.CreateFAddReduce(Start, RedOp);
+ break;
+ case Intrinsic::vp_reduce_fmul:
+ Reduction = Builder.CreateFMulReduce(Start, RedOp);
+ break;
+ }
+
+ replaceOperation(*Reduction, VPI);
+ return Reduction;
+}
+
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
@@ -321,6 +456,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (OC && Instruction::isBinaryOp(*OC))
return expandPredicationInBinaryOperator(Builder, VPI);
+ if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
+ return expandPredicationInReduction(Builder, *VPRI);
+
return &VPI;
}
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index e3c4e86d203b..ec6bf18b2769 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -1,9 +1,8 @@
//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp
index 8fae798b31d9..af5515cc6bfd 100644
--- a/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/llvm/lib/CodeGen/GCMetadata.cpp
@@ -145,24 +145,9 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
if (NMI != GCStrategyMap.end())
return NMI->getValue();
- for (auto& Entry : GCRegistry::entries()) {
- if (Name == Entry.getName()) {
- std::unique_ptr<GCStrategy> S = Entry.instantiate();
- S->Name = std::string(Name);
- GCStrategyMap[Name] = S.get();
- GCStrategyList.push_back(std::move(S));
- return GCStrategyList.back().get();
- }
- }
-
- if (GCRegistry::begin() == GCRegistry::end()) {
- // In normal operation, the registry should not be empty. There should
- // be the builtin GCs if nothing else. The most likely scenario here is
- // that we got here without running the initializers used by the Registry
- // itself and it's registration mechanism.
- const std::string error = ("unsupported GC: " + Name).str() +
- " (did you remember to link and initialize the CodeGen library?)";
- report_fatal_error(error);
- } else
- report_fatal_error(std::string("unsupported GC: ") + Name);
+ std::unique_ptr<GCStrategy> S = llvm::getGCStrategy(Name);
+ S->Name = std::string(Name);
+ GCStrategyMap[Name] = S.get();
+ GCStrategyList.push_back(std::move(S));
+ return GCStrategyList.back().get();
}
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 58269e172c57..637a877810a1 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -193,8 +193,8 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
bool MadeChange = false;
for (BasicBlock &BB : F)
- for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
- IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I);
if (!CI)
continue;
@@ -271,16 +271,15 @@ void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
- for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end();
- MI != ME; ++MI)
- if (MI->isCall()) {
+ for (MachineInstr &MI : MBB)
+ if (MI.isCall()) {
// Do not treat tail or sibling call sites as safe points. This is
// legal since any arguments passed to the callee which live in the
// remnants of the callers frame will be owned and updated by the
// callee if required.
- if (MI->isTerminator())
+ if (MI.isTerminator())
continue;
- VisitCallPoint(MI);
+ VisitCallPoint(&MI);
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index dd560e8ff145..2676becdd807 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,6 +13,8 @@
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -187,6 +189,14 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
// Try to constant fold these.
assert(SrcOps.size() == 2 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
+ // Try to constant fold vector constants.
+ auto VecCst = ConstantFoldVectorBinop(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
+ if (VecCst)
+ return MachineInstrBuilder(getMF(), *VecCst);
+ break;
+ }
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
SrcOps[1].getReg(), *getMRI()))
return buildConstant(DstOps[0], *Cst);
@@ -213,6 +223,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildFConstant(DstOps[0], *Cst);
break;
}
+ case TargetOpcode::G_CTLZ: {
+ assert(SrcOps.size() == 1 && "Expected one source");
+ assert(DstOps.size() == 1 && "Expected one dest");
+ auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
+ if (!MaybeCsts)
+ break;
+ if (MaybeCsts->size() == 1)
+ return buildConstant(DstOps[0], (*MaybeCsts)[0]);
+ // This was a vector constant. Build a G_BUILD_VECTOR for them.
+ SmallVector<Register> ConstantRegs;
+ LLT VecTy = DstOps[0].getLLTTy(*getMRI());
+ for (unsigned Cst : *MaybeCsts)
+ ConstantRegs.emplace_back(
+ buildConstant(VecTy.getScalarType(), Cst).getReg(0));
+ return buildBuildVector(DstOps[0], ConstantRegs);
+ }
}
bool CanCopy = checkCopyToDefsPossible(DstOps);
if (!canPerformCSEForOpc(Opc))
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d2cda9ece31a..17094a8e44f8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -73,7 +74,7 @@ void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
const AttributeList &Attrs,
unsigned OpIdx) const {
addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
- return Attrs.hasAttribute(OpIdx, Attr);
+ return Attrs.hasAttributeAtIndex(OpIdx, Attr);
});
}
@@ -139,6 +140,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
+ Info.CB = &CB;
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
@@ -165,18 +167,21 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
Align MemAlign = DL.getABITypeAlign(Arg.Ty);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
assert(OpIdx >= AttributeList::FirstArgIndex);
- Type *ElementTy = PtrTy->getElementType();
+ unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex;
- auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
- Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+ Type *ElementTy = FuncInfo.getParamByValType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamInAllocaType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx);
+ assert(ElementTy && "Must have byval, inalloca or preallocated type");
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
- if (auto ParamAlign =
- FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex))
+ if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx))
MemAlign = *ParamAlign;
- else if ((ParamAlign =
- FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex)))
+ else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx)))
MemAlign = *ParamAlign;
else
MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
@@ -613,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const unsigned NumArgs = Args.size();
+ // Stores thunks for outgoing register assignments. This is used so we delay
+ // generating register copies until mem loc assignments are done. We do this
+ // so that if the target is using the delayed stack protector feature, we can
+ // find the split point of the block accurately. E.g. if we have:
+ // G_STORE %val, %memloc
+ // $x0 = COPY %foo
+ // $x1 = COPY %bar
+ // CALL func
+ // ... then the split point for the block will correctly be at, and including,
+ // the copy to $x0. If instead the G_STORE instruction immediately precedes
+ // the CALL, then we'd prematurely choose the CALL as the split point, thus
+ // generating a split block with a CALL that uses undefined physregs.
+ SmallVector<std::function<void()>> DelayedOutgoingRegAssignments;
+
for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) {
- unsigned NumArgRegs =
- Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ std::function<void()> Thunk;
+ unsigned NumArgRegs = Handler.assignCustomValue(
+ Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk);
+ if (Thunk)
+ DelayedOutgoingRegAssignments.emplace_back(Thunk);
if (!NumArgRegs)
return false;
j += NumArgRegs;
@@ -739,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ });
+ }
}
// Now that all pieces have been assigned, re-pack the register typed values
@@ -753,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
j += NumParts - 1;
}
+ for (auto &Fn : DelayedOutgoingRegAssignments)
+ Fn();
return true;
}
@@ -1153,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 6f103bca6892..381c6df5c97a 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -130,16 +130,15 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
WrapperObserver.addObserver(CSEInfo);
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
- for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) {
- MachineInstr *CurMI = &*MII;
- ++MII;
+ for (MachineInstr &CurMI :
+ llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// Erase dead insts before even adding to the list.
- if (isTriviallyDead(*CurMI, *MRI)) {
- LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n");
- CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
+ if (isTriviallyDead(CurMI, *MRI)) {
+ LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
+ CurMI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
- WorkList.deferred_insert(CurMI);
+ WorkList.deferred_insert(&CurMI);
}
}
WorkList.finalize();
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 06d827de2e96..3a52959d54bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -12,9 +12,11 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,8 +28,10 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -46,8 +50,9 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
const LegalizerInfo *LI)
- : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB), MDT(MDT), LI(LI) {
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
+ MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
+ TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
(void)this->KB;
}
@@ -64,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
return I;
}
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(V);
+ auto Ctlz = MIB.buildCTLZ(Ty, V);
+ auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
+ return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
+}
+
/// \returns The big endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
@@ -143,6 +158,24 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
Observer.changedInstr(*FromRegOp.getParent());
}
+void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
+ unsigned ToOpcode) const {
+ Observer.changingInstr(FromMI);
+
+ FromMI.setDesc(Builder.getTII().get(ToOpcode));
+
+ Observer.changedInstr(FromMI);
+}
+
+const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
+ return RBI->getRegBank(Reg, MRI, *TRI);
+}
+
+void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
+ if (RegBank)
+ MRI.setRegBank(Reg, *RegBank);
+}
+
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (matchCombineCopy(MI)) {
applyCombineCopy(MI);
@@ -486,10 +519,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
continue;
// Check for legality.
if (LI) {
- LegalityQuery::MemDesc MMDesc;
- MMDesc.MemoryTy = MMO.getMemoryType();
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
+ LegalityQuery::MemDesc MMDesc(MMO);
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
@@ -623,13 +653,83 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
+bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // If we have the following code:
+ // %mask = G_CONSTANT 255
+ // %ld = G_LOAD %ptr, (load s16)
+ // %and = G_AND %ld, %mask
+ //
+ // Try to fold it into
+ // %ld = G_ZEXTLOAD %ptr, (load s8)
+
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI.getType(Dst).isVector())
+ return false;
+
+ auto MaybeMask =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeMask)
+ return false;
+
+ APInt MaskVal = MaybeMask->Value;
+
+ if (!MaskVal.isMask())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
+ !LoadMI->isSimple())
+ return false;
+
+ Register LoadReg = LoadMI->getDstReg();
+ LLT LoadTy = MRI.getType(LoadReg);
+ Register PtrReg = LoadMI->getPointerReg();
+ uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
+ unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+
+ // The mask may not be larger than the in-memory type, as it might cover sign
+ // extended bits
+ if (MaskSizeBits > LoadSizeBits)
+ return false;
+
+ // If the mask covers the whole destination register, there's nothing to
+ // extend
+ if (MaskSizeBits >= LoadTy.getSizeInBits())
+ return false;
+
+ // Most targets cannot deal with loads of size < 8 and need to re-legalize to
+ // at least byte loads. Avoid creating such loads here
+ if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
+ return false;
+
+ const MachineMemOperand &MMO = LoadMI->getMMO();
+ LegalityQuery::MemDesc MemDesc(MMO);
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*LoadMI);
+ auto &MF = B.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
+ B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ };
+ return true;
+}
+
bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
const MachineInstr &UseMI) {
assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
"shouldn't consider debug uses");
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
- return false;
+ return true;
const MachineBasicBlock &MBB = *DefMI.getParent();
auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
return &MI == &DefMI || &MI == &UseMI;
@@ -711,6 +811,16 @@ bool CombinerHelper::matchSextInRegOfLoad(
// anyway for most targets.
if (!isPowerOf2_32(NewSizeBits))
return false;
+
+ const MachineMemOperand &MMO = LoadDef->getMMO();
+ LegalityQuery::MemDesc MMDesc(MMO);
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
+ {MRI.getType(LoadDef->getDstReg()),
+ MRI.getType(LoadDef->getPointerReg())},
+ {MMDesc}}))
+ return false;
+
MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
return true;
}
@@ -1093,81 +1203,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
- // On Darwin, -Os means optimize for size without hurting performance, so
- // only really optimize for size when -Oz (MinSize) is used.
- if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
-}
-
-// Returns a list of types to use for memory op lowering in MemOps. A partial
-// port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
- unsigned Limit, const MemOp &Op,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- const TargetLowering &TLI) {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
- return false;
-
- LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
-
- if (Ty == LLT()) {
- // Use the largest scalar type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- Ty = LLT::scalar(64);
- if (Op.isFixedDstAlign())
- while (Op.getDstAlign() < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
- Ty = LLT::scalar(Ty.getSizeInBytes());
- assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
- // FIXME: check for the largest legal type we can load/store to.
- }
-
- unsigned NumMemOps = 0;
- uint64_t Size = Op.size();
- while (Size) {
- unsigned TySize = Ty.getSizeInBytes();
- while (TySize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- LLT NewTy = Ty;
- // FIXME: check for mem op safety and legality of the types. Not all of
- // SDAGisms map cleanly to GISel concepts.
- if (NewTy.isVector())
- NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
- unsigned NewTySize = NewTy.getSizeInBytes();
- assert(NewTySize > 0 && "Could not find appropriate type");
-
- // If the new LLT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
- MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
- TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
- MachineMemOperand::MONone, &Fast) &&
- Fast)
- TySize = Size;
- else {
- Ty = NewTy;
- TySize = NewTySize;
- }
- }
-
- if (++NumMemOps > Limit)
- return false;
-
- MemOps.push_back(Ty);
- Size -= TySize;
- }
-
- return true;
-}
-
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
@@ -1175,460 +1210,20 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
return IntegerType::get(C, Ty.getSizeInBits());
}
-// Get a vectorized representation of the memset value operand, GISel edition.
-static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NumBits = Ty.getScalarSizeInBits();
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
- APInt SplatVal = APInt::getSplat(NumBits, Scalar);
- return MIB.buildConstant(Ty, SplatVal).getReg(0);
- }
-
- // Extend the byte value to the larger type, and then multiply by a magic
- // value 0x010101... in order to replicate it across every byte.
- // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
- if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
- return MIB.buildConstant(Ty, 0).getReg(0);
- }
-
- LLT ExtType = Ty.getScalarType();
- auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
- if (NumBits > 8) {
- APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
- auto MagicMI = MIB.buildConstant(ExtType, Magic);
- Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
- }
-
- // For vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- Val = MIB.buildSplatVector(Ty, Val).getReg(0);
-
- return Val;
-}
-
-bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
- Register Val, uint64_t KnownLen,
- Align Alignment, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memset length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
-
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
-
- if (!findGISelOptimalMemOpLowering(MemOps, Limit,
- MemOp::Set(KnownLen, DstAlignCanChange,
- Alignment,
- /*IsZeroMemset=*/IsZeroVal,
- /*IsVolatile=*/IsVolatile),
- DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- MachineIRBuilder MIB(MI);
- // Find the largest store and generate the bit pattern for it.
- LLT LargestTy = MemOps[0];
- for (unsigned i = 1; i < MemOps.size(); i++)
- if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
- LargestTy = MemOps[i];
-
- // The memset stored value is always defined as an s8, so in order to make it
- // work with larger store types we need to repeat the bit pattern across the
- // wider type.
- Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
-
- if (!MemSetValue)
- return false;
-
- // Generate the stores. For each store type in the list, we generate the
- // matching store of that type to the destination address.
- LLT PtrTy = MRI.getType(Dst);
- unsigned DstOff = 0;
- unsigned Size = KnownLen;
- for (unsigned I = 0; I < MemOps.size(); I++) {
- LLT Ty = MemOps[I];
- unsigned TySize = Ty.getSizeInBytes();
- if (TySize > Size) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- assert(I == MemOps.size() - 1 && I != 0);
- DstOff -= TySize - Size;
- }
-
- // If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
- Register Value = MemSetValue;
- if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
- MVT VT = getMVTForLLT(Ty);
- MVT LargestVT = getMVTForLLT(LargestTy);
- if (!LargestTy.isVector() && !Ty.isVector() &&
- TLI.isTruncateFree(LargestVT, VT))
- Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
- else
- Value = getMemsetValue(Val, Ty, MIB);
- if (!Value)
- return false;
- }
-
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
-
- Register Ptr = Dst;
- if (DstOff != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
-
- MIB.buildStore(Value, Ptr, *StoreMMO);
- DstOff += Ty.getSizeInBytes();
- Size -= TySize;
- }
-
- MI.eraseFromParent();
- return true;
-}
-
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
-
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- const auto *MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
- bool IsVolatile = MemOp->isVolatile();
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- // FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
- "inline memcpy with dynamic size is not yet supported");
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- Align DstAlign = DstMMO.getBaseAlign();
- Align SrcAlign = SrcMMO.getBaseAlign();
-
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-}
-
-bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- return optimizeMemcpy(MI, Dst, Src, KnownLen,
- std::numeric_limits<uint64_t>::max(), DstAlign,
- SrcAlign, IsVolatile);
-}
-
-bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memcpy length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- // FIXME: infer better src pointer alignment like SelectionDAG does here.
- // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
- // if the memcpy is in a tail call position.
-
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- IsVolatile),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Now we need to emit a pair of load and stores for each of the types we've
- // collected. I.e. for each type, generate a load from the source pointer of
- // that type width, and then generate a corresponding store to the dest buffer
- // of that value loaded. This can result in a sequence of loads and stores
- // mixed types, depending on what the target specifies as good types to use.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- unsigned Size = KnownLen;
- for (auto CopyTy : MemOps) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- if (CopyTy.getSizeInBytes() > Size)
- CurrOffset -= CopyTy.getSizeInBytes() - Size;
-
- // Construct MMOs for the accesses.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- Register Offset;
- if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
- .getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
-
- // Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- MIB.buildStore(LdVal, StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- Size -= CopyTy.getSizeInBytes();
- }
-
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memmove length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
- // to a bug in it's findOptimalMemOpLowering implementation. For now do the
- // same thing here.
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- /*IsVolatile*/ true),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Memmove requires that we perform the loads first before issuing the stores.
- // Apart from that, this loop is pretty much doing the same thing as the
- // memcpy codegen function.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- SmallVector<Register, 16> LoadVals;
- for (auto CopyTy : MemOps) {
- // Construct MMO for the load.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
- CurrOffset += CopyTy.getSizeInBytes();
- }
-
- CurrOffset = 0;
- for (unsigned I = 0; I < MemOps.size(); ++I) {
- LLT CopyTy = MemOps[I];
- // Now store the values loaded.
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- Register StorePtr = Dst;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- }
- MI.eraseFromParent();
- return true;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemcpyInline(MI) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
- const unsigned Opc = MI.getOpcode();
- // This combine is fairly complex so it's not written with a separate
- // matcher function.
- assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
- Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
-
- auto MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
-
- Align DstAlign = MemOp->getBaseAlign();
- Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- if (Opc != TargetOpcode::G_MEMSET) {
- assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
- MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlign();
- }
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- if (!LenVRegAndVal)
- return false; // Leave it to the legalizer to lower it to a libcall.
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
-
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- bool IsVolatile = MemOp->isVolatile();
- if (Opc == TargetOpcode::G_MEMCPY_INLINE)
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-
- // Don't try to optimize volatile.
- if (IsVolatile)
- return false;
-
- if (MaxLen && KnownLen > MaxLen)
- return false;
-
- if (Opc == TargetOpcode::G_MEMCPY) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
- return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
- IsVolatile);
- }
- if (Opc == TargetOpcode::G_MEMMOVE)
- return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMSET)
- return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
- return false;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
@@ -1706,30 +1301,52 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
Register Add2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
- // Don't do this combine if there multiple uses of the first PTR_ADD,
- // since we may be able to compute the second PTR_ADD as an immediate
- // offset anyway. Folding the first offset into the second may cause us
- // to go beyond the bounds of our legal addressing modes.
- if (!MRI.hasOneNonDBGUse(Add2))
- return false;
-
- MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+ MachineInstr *Add2Def = MRI.getVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
Register Base = Add2Def->getOperand(1).getReg();
Register Imm2 = Add2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
+ // Check if the new combined immediate forms an illegal addressing mode.
+ // Do not combine if it was legal before but would get illegal.
+ // To do so, we need to find a load/store user of the pointer to get
+ // the access type.
+ Type *AccessTy = nullptr;
+ auto &MF = *MI.getMF();
+ for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
+ if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
+ AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
+ MF.getFunction().getContext());
+ break;
+ }
+ }
+ TargetLoweringBase::AddrMode AMNew;
+ APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ AMNew.BaseOffs = CombinedImm.getSExtValue();
+ if (AccessTy) {
+ AMNew.HasBaseReg = true;
+ TargetLoweringBase::AddrMode AMOld;
+ AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.HasBaseReg = true;
+ unsigned AS = MRI.getType(Add2).getAddressSpace();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
+ !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
+ return false;
+ }
+
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Imm = AMNew.BaseOffs;
MatchInfo.Base = Base;
+ MatchInfo.Bank = getRegBank(Imm2);
return true;
}
@@ -1739,6 +1356,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
MachineIRBuilder MIB(MI);
LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
@@ -1762,7 +1380,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Shl2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
@@ -1772,7 +1390,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Base = Shl2Def->getOperand(1).getReg();
Register Imm2 = Shl2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
@@ -1856,7 +1474,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Find a matching one-use shift by constant.
const Register C1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
if (!MaybeImmVal)
return false;
@@ -1870,7 +1488,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Must be a constant.
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1932,8 +1550,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
// These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParent();
- MatchInfo.Logic->eraseFromParent();
+ MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
+ MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
MI.eraseFromParent();
}
@@ -1942,7 +1560,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1977,7 +1595,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
// TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -2045,26 +1663,23 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
- Register SrcReg =
- peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
- MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
- if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
- SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
- SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
+ if (!SrcInstr)
return false;
// Check the source type of the merge.
- LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
- LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
+ LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
if (SrcMergeTy != Dst0Ty && !SameSize)
return false;
// They are the same now (modulo a bitcast).
// We can collect all the src registers.
- for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
- ++Idx)
- Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
+ Operands.push_back(SrcInstr->getSourceReg(Idx));
return true;
}
@@ -2241,7 +1856,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
return false;
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -2410,12 +2025,12 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register LHS = PtrAdd.getBaseReg();
+ Register RHS = PtrAdd.getOffsetReg();
MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
- if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {
int64_t Cst;
if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
NewCst = Cst + *RHSCst;
@@ -2428,12 +2043,12 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register Dst = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register Dst = PtrAdd.getReg(0);
Builder.setInstrAndDebugLoc(MI);
Builder.buildConstant(Dst, NewCst);
- MI.eraseFromParent();
+ PtrAdd.eraseFromParent();
}
bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
@@ -2536,6 +2151,23 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
}
+bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Src = MI.getOperand(1).getReg();
+ Register NegSrc;
+
+ if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NegSrc);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2587,7 +2219,7 @@ bool CombinerHelper::matchCombineTruncOfShl(
{DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
KnownBits Known = KB->getKnownBits(ShiftAmt);
unsigned Size = DstTy.getSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
return true;
}
@@ -2644,13 +2276,13 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
}
bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
- assert(MI.getOpcode() == TargetOpcode::G_SELECT);
- if (auto MaybeCstCmp =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
- OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
- return true;
- }
- return false;
+ GSelect &SelMI = cast<GSelect>(MI);
+ auto Cst =
+ isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
+ if (!Cst)
+ return false;
+ OpIdx = Cst->isZero() ? 3 : 2;
+ return true;
}
bool CombinerHelper::eraseInst(MachineInstr &MI) {
@@ -2662,12 +2294,14 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
if (!MOP1.isReg() || !MOP2.isReg())
return false;
- MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
- if (!I1)
+ auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
+ if (!InstAndDef1)
return false;
- MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
- if (!I2)
+ auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
+ if (!InstAndDef2)
return false;
+ MachineInstr *I1 = InstAndDef1->MI;
+ MachineInstr *I2 = InstAndDef2->MI;
// Handle a case like this:
//
@@ -2727,15 +2361,26 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
//
// On the off-chance that there's some target instruction feeding into the
// instruction, let's use produceSameValue instead of isIdenticalTo.
- return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
+ if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
+ // Handle instructions with multiple defs that produce same values. Values
+ // are same for operands with same index.
+ // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // I1 and I2 are different instructions but produce same values,
+ // %1 and %6 are same, %1 and %7 are not the same value.
+ return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
+ I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
+ }
+ return false;
}
bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
if (!MOP.isReg())
return false;
- // MIPatternMatch doesn't let us look through G_ZEXT etc.
- auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
- return ValAndVReg && ValAndVReg->Value == C;
+ auto *MI = MRI.getVRegDef(MOP.getReg());
+ auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
+ return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
+ MaybeCst->getSExtValue() == C;
}
bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
@@ -3115,14 +2760,14 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
//
// Check if we can replace AndDst with the LHS of the G_AND
if (canReplaceReg(AndDst, LHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace AndDst with the RHS of the G_AND
if (canReplaceReg(AndDst, RHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3161,14 +2806,14 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
//
// Check if we can replace OrDst with the LHS of the G_OR
if (canReplaceReg(OrDst, LHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace OrDst with the RHS of the G_OR
if (canReplaceReg(OrDst, RHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3346,7 +2991,8 @@ void CombinerHelper::applyXorOfAndWithSameReg(
}
bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register DstReg = PtrAdd.getReg(0);
LLT Ty = MRI.getType(DstReg);
const DataLayout &DL = Builder.getMF().getDataLayout();
@@ -3354,20 +3000,20 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
return false;
if (Ty.isPointer()) {
- auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
return ConstVal && *ConstVal == 0;
}
assert(Ty.isVector() && "Expecting a vector type");
- const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
return isBuildVectorAllZeros(*VecMI, MRI);
}
void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
- MI.eraseFromParent();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Builder.setInstrAndDebugLoc(PtrAdd);
+ Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
+ PtrAdd.eraseFromParent();
}
/// The second source operand is known to be a power of 2.
@@ -3704,10 +3350,8 @@ bool CombinerHelper::matchLoadOrCombine(
// may not use index 0.
Register Ptr = LowestIdxLoad->getPointerReg();
const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
- LegalityQuery::MemDesc MMDesc;
+ LegalityQuery::MemDesc MMDesc(MMO);
MMDesc.MemoryTy = Ty;
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
return false;
@@ -3732,6 +3376,274 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return None;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return None;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits!= 0)
+ return None;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return None;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return None;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+ auto &StoreMI = cast<GStore>(MI);
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ // Search the block up for more stores.
+ // We use a search threshold of 10 instructions here because the combiner
+ // works top-down within a block, and we don't want to search an unbounded
+ // number of predecessor instructions trying to find matching stores.
+ // If we moved this optimization into a separate pass then we could probably
+ // use a more efficient search without having a hard-coded threshold.
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ return false;
+ }
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = getTargetLowering().allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresRequired; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
+ return false;
+
+ MatchInfo.NeedBSwap = NeedBswap;
+ MatchInfo.NeedRotate = NeedRotate;
+ MatchInfo.LowestIdxStore = LowestIdxStore;
+ MatchInfo.WideSrcVal = WideSrcVal;
+ MatchInfo.FoundStores = std::move(FoundStores);
+ return true;
+}
+
+void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+
+ Builder.setInstrAndDebugLoc(MI);
+ Register WideSrcVal = MatchInfo.WideSrcVal;
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+
+ if (MatchInfo.NeedBSwap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (MatchInfo.NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
+ MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
+ MatchInfo.LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : MatchInfo.FoundStores)
+ ST->eraseFromParent();
+}
+
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -3844,7 +3756,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
{TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
return false;
- auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
return false;
@@ -3917,7 +3829,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
MRI.use_instr_nodbg_end())) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
- auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
unsigned Idx = Cst.getValue().getZExtValue();
@@ -4064,6 +3976,78 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchICmpToLHSKnownBits(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Given:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP ne %x, 0
+ //
+ // Or:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP eq %x, 1
+ //
+ // We can replace %cmp with %x assuming true is 1 on the target.
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (!CmpInst::isEquality(Pred))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
+ /* IsFP = */ false) != 1)
+ return false;
+ int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
+ return false;
+ Register LHS = MI.getOperand(2).getReg();
+ auto KnownLHS = KB->getKnownBits(LHS);
+ if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
+ return false;
+ // Make sure replacing Dst with the LHS is a legal operation.
+ LLT LHSTy = MRI.getType(LHS);
+ unsigned LHSSize = LHSTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Op = TargetOpcode::COPY;
+ if (DstSize != LHSSize)
+ Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
+ if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
+ return true;
+}
+
+// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
+bool CombinerHelper::matchAndOrDisjointMask(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // Ignore vector types to simplify matching the two constants.
+ // TODO: do this for vectors and scalars via a demanded bits analysis.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector())
+ return false;
+
+ Register Src;
+ int64_t MaskAnd;
+ int64_t MaskOr;
+ if (!mi_match(MI, MRI,
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
+ return false;
+
+ // Check if MaskOr could turn on any bits in Src.
+ if (MaskAnd & MaskOr)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Src);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
@@ -4130,6 +4114,104 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
+bool CombinerHelper::matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+
+ const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SBFX
+ : TargetOpcode::G_UBFX;
+
+ // Check if the type we would use for the extract is legal
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+ return false;
+
+ Register ShlSrc;
+ int64_t ShrAmt;
+ int64_t ShlAmt;
+ const unsigned Size = Ty.getScalarSizeInBits();
+
+ // Try to match shr (shl x, c1), c2
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ // Make sure that the shift sizes can fit a bitfield extract
+ if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+ return false;
+
+ // Skip this combine if the G_SEXT_INREG combine could handle it
+ if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+ return false;
+
+ // Calculate start position and width of the extract
+ const int64_t Pos = ShrAmt - ShlAmt;
+ const int64_t Width = Size - ShrAmt;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchBitfieldExtractFromShrAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
+ TargetOpcode::G_UBFX, Ty, Ty))
+ return false;
+
+ // Try to match shr (and x, c1), c2
+ Register AndSrc;
+ int64_t ShrAmt;
+ int64_t SMask;
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (ShrAmt < 0 || ShrAmt >= Size)
+ return false;
+
+ // Check that ubfx can do the extraction, with no holes in the mask.
+ uint64_t UMask = SMask;
+ UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
+ UMask &= maskTrailingOnes<uint64_t>(Size);
+ if (!isMask_64(UMask))
+ return false;
+
+ // Calculate start position and width of the extract.
+ const int64_t Pos = ShrAmt;
+ const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+
+ // It's preferable to keep the shift, rather than form G_SBFX.
+ // TODO: remove the G_AND via demanded bits analysis.
+ if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(Ty, Width);
+ auto PosCst = B.buildConstant(Ty, Pos);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
@@ -4144,10 +4226,10 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
- auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
if (!C1)
return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
if (!C2)
return false;
@@ -4198,9 +4280,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
return false;
}
-bool CombinerHelper::matchReassocPtrAdd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // if and only if (G_PTR_ADD X, C) has one use.
+ Register LHSBase;
+ Optional<ValueAndVReg> LHSCstOff;
+ if (!mi_match(MI.getBaseReg(), MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
+ return false;
+
+ auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ // When we change LHSPtrAdd's offset register we might cause it to use a reg
+ // before its def. Sink the instruction so the outer PTR_ADD to ensure this
+ // doesn't happen.
+ LHSPtrAdd->moveBefore(&MI);
+ Register RHSReg = MI.getOffsetReg();
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(LHSCstOff->VReg);
+ Observer.changedInstr(MI);
+ Observer.changingInstr(*LHSPtrAdd);
+ LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ Observer.changedInstr(*LHSPtrAdd);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
+ if (!LHSPtrAdd)
+ return false;
+
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register LHSSrc1 = LHSPtrAdd->getBaseReg();
+ Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
+ auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
// We're trying to match a few pointer computation patterns here for
// re-association opportunities.
// 1) Isolating a constant operand to be on the RHS, e.g.:
@@ -4209,49 +4373,26 @@ bool CombinerHelper::matchReassocPtrAdd(
// 2) Folding two constants in each sub-tree as long as such folding
// doesn't break a legal addressing mode.
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
- MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
-
- if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
- // Try to match example 1).
- if (RHS->getOpcode() != TargetOpcode::G_ADD)
- return false;
- auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
- if (!C2)
- return false;
+ //
+ // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // iif (G_PTR_ADD X, C) has one use.
+ MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
+ MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
+
+ // Try to match example 2.
+ if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- MatchInfo = [=,&MI](MachineIRBuilder &B) {
- LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+ // Try to match example 3.
+ if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NewBase.getReg(0));
- MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
- Observer.changedInstr(MI);
- };
- } else {
- // Try to match example 2.
- Register LHSSrc1 = LHS->getOperand(1).getReg();
- Register LHSSrc2 = LHS->getOperand(2).getReg();
- auto C1 = getConstantVRegVal(LHSSrc2, MRI);
- if (!C1)
- return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
- if (!C2)
- return false;
+ // Try to match example 1.
+ if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
+ return true;
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(LHSSrc1);
- MI.getOperand(2).setReg(NewCst.getReg(0));
- Observer.changedInstr(MI);
- };
- }
- return !reassociationCanBreakAddressingModePattern(MI);
+ return false;
}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
@@ -4264,6 +4405,361 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ // Look for a binop feeding into an AND with a mask:
+ //
+ // %add = G_ADD %lhs, %rhs
+ // %and = G_AND %add, 000...11111111
+ //
+ // Check if it's possible to perform the binop at a narrower width and zext
+ // back to the original width like so:
+ //
+ // %narrow_lhs = G_TRUNC %lhs
+ // %narrow_rhs = G_TRUNC %rhs
+ // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
+ // %new_add = G_ZEXT %narrow_add
+ // %and = G_AND %new_add, 000...11111111
+ //
+ // This can allow later combines to eliminate the G_AND if it turns out
+ // that the mask is irrelevant.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ Register AndLHS = MI.getOperand(1).getReg();
+ Register AndRHS = MI.getOperand(2).getReg();
+ LLT WideTy = MRI.getType(Dst);
+
+ // If the potential binop has more than one use, then it's possible that one
+ // of those uses will need its full width.
+ if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
+ return false;
+
+ // Check if the LHS feeding the AND is impacted by the high bits that we're
+ // masking out.
+ //
+ // e.g. for 64-bit x, y:
+ //
+ // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
+ MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
+ if (!LHSInst)
+ return false;
+ unsigned LHSOpc = LHSInst->getOpcode();
+ switch (LHSOpc) {
+ default:
+ return false;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ break;
+ }
+
+ // Find the mask on the RHS.
+ auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
+ if (!Cst)
+ return false;
+ auto Mask = Cst->Value;
+ if (!Mask.isMask())
+ return false;
+
+ // No point in combining if there's nothing to truncate.
+ unsigned NarrowWidth = Mask.countTrailingOnes();
+ if (NarrowWidth == WideTy.getSizeInBits())
+ return false;
+ LLT NarrowTy = LLT::scalar(NarrowWidth);
+
+ // Check if adding the zext + truncates could be harmful.
+ auto &MF = *MI.getMF();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
+ !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
+ return false;
+ Register BinOpLHS = LHSInst->getOperand(1).getReg();
+ Register BinOpRHS = LHSInst->getOperand(2).getReg();
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
+ auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
+ auto NarrowBinOp =
+ Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
+ auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Ext.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+ // Check for a constant 2 or a splat of 2 on the RHS.
+ auto RHS = MI.getOperand(3).getReg();
+ bool IsVector = MRI.getType(RHS).isVector();
+ if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
+ return false;
+ if (IsVector) {
+ // FIXME: There's no mi_match pattern for this yet.
+ auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
+ if (!RHSDef)
+ return false;
+ auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
+ if (!Splat || *Splat != 2)
+ return false;
+ }
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+ : TargetOpcode::G_SADDO;
+ MI.setDesc(Builder.getTII().get(NewOpc));
+ MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseNPQ = false;
+ SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](const Constant *C) {
+ auto *CI = cast<ConstantInt>(C);
+ const APInt &Divisor = CI->getValue();
+ UnsignedDivisonByConstantInfo magics =
+ UnsignedDivisonByConstantInfo::get(Divisor);
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.IsAdd != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics =
+ UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ }
+
+ APInt Magic = magics.Magic;
+
+ unsigned SelNPQ;
+ if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.ShiftAmount;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.ShiftAmount - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ NPQFactors.push_back(
+ MIB.buildConstant(ScalarTy,
+ SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits))
+ .getReg(0));
+ PostShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register PreShift, PostShift, MagicFactor, NPQFactor;
+ auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+ if (RHSDef) {
+ PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+ MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+ NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+ PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+ } else {
+ assert(MRI.getType(RHS).isScalar() &&
+ "Non-build_vector operation should have been a scalar");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ Register Q = LHS;
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+ if (UseNPQ) {
+ Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (Ty.isVector())
+ NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+ else
+ NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+ Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+ }
+
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ auto One = MIB.buildConstant(Ty, 1);
+ auto IsOne = MIB.buildICmp(
+ CmpInst::Predicate::ICMP_EQ,
+ Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+ return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ }
+
+ auto CheckEltValue = [&](const Constant *C) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+ return !CI->isZero();
+ return false;
+ };
+ return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildUDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UMULH);
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ auto MatchPow2ExceptOne = [&](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
+ return false;
+ };
+ if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
+ return false;
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
+}
+
+void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ unsigned NumEltBits = Ty.getScalarSizeInBits();
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto LogBase2 = buildLogBase2(RHS, Builder);
+ auto ShiftAmt =
+ Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
+ auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
+ Builder.buildLShr(Dst, LHS, Trunc);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
+ Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ LLT Type = MRI.getType(Dst);
+
+ // fold (fadd x, fneg(y)) -> (fsub x, y)
+ // fold (fadd fneg(y), x) -> (fsub x, y)
+ // G_ADD is commutative so both cases are checked by m_GFAdd
+ if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
+ Opc = TargetOpcode::G_FSUB;
+ }
+ /// fold (fsub x, fneg(y)) -> (fadd x, y)
+ else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
+ Opc = TargetOpcode::G_FADD;
+ }
+ // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
+ mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
+ mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
+ // no opcode change
+ } else
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(Opc));
+ MI.getOperand(1).setReg(X);
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 8146a67d4dfb..306af808659a 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -9,7 +9,7 @@
/// Provides analysis for querying information about KnownBits during GISel
/// passes.
//
-//===------------------
+//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -57,7 +57,7 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
KnownBits GISelKnownBits::getKnownBits(Register R) {
const LLT Ty = MRI.getType(R);
APInt DemandedElts =
- Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return getKnownBits(R, DemandedElts);
}
@@ -198,8 +198,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
- Known.One = APInt::getAllOnesValue(BitWidth);
- Known.Zero = APInt::getAllOnesValue(BitWidth);
+ Known.One = APInt::getAllOnes(BitWidth);
+ Known.Zero = APInt::getAllOnes(BitWidth);
// Destination registers should not have subregisters at this
// point of the pipeline, otherwise the main live-range will be
// defined more than once, which is against SSA.
@@ -245,7 +245,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CONSTANT: {
- auto CstVal = getConstantVRegVal(R, MRI);
+ auto CstVal = getIConstantVRegVal(R, MRI);
if (!CstVal)
break;
Known = KnownBits::makeConstant(*CstVal);
@@ -510,6 +510,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.reverseBits();
break;
}
+ case TargetOpcode::G_CTPOP: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // We can bound the space the count needs. Also, bits known to be zero can't
+ // contribute to the population.
+ unsigned BitsPossiblySet = Known2.countMaxPopulation();
+ unsigned LowBits = Log2_32(BitsPossiblySet)+1;
+ Known.Zero.setBitsFrom(LowBits);
+ // TODO: we could bound Known.One using the lower bound on the number of
+ // bits which might be set provided by popcnt KnownOne2.
+ break;
+ }
case TargetOpcode::G_UBFX: {
KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
@@ -676,9 +688,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
LLT Ty = MRI.getType(R);
- APInt DemandedElts = Ty.isVector()
- ? APInt::getAllOnesValue(Ty.getNumElements())
- : APInt(1, 1);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return computeNumSignBits(R, DemandedElts, Depth);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index e0391e6f6467..252b931602c6 100644
--- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -18,6 +18,7 @@ using namespace llvm;
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
initializeLegalizerPass(Registry);
+ initializeLoadStoreOptPass(Registry);
initializeLocalizerPass(Registry);
initializeRegBankSelectPass(Registry);
initializeInstructionSelectPass(Registry);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 73b763710fdf..87cc60d51bc2 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,6 +33,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -47,6 +49,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -114,7 +117,7 @@ static void reportTranslationError(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (TPC.isGlobalISelAbortEnabled())
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
else
ORE.emit(R);
}
@@ -566,7 +569,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
if (BrInst.isUnconditional()) {
// If the unconditional target is the layout successor, fallthrough.
- if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+ if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
MIRBuilder.buildBr(*Succ0MBB);
// Link successors.
@@ -739,8 +742,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
// FIXME: At the moment we don't do any splitting optimizations here like
// SelectionDAG does, so this worklist only has one entry.
while (!WorkList.empty()) {
- SwitchWorkListItem W = WorkList.back();
- WorkList.pop_back();
+ SwitchWorkListItem W = WorkList.pop_back_val();
if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
return false;
}
@@ -784,7 +786,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
JT.Reg = Sub.getReg(0);
- if (JTH.OmitRangeCheck) {
+ if (JTH.FallthroughUnreachable) {
if (JT.MBB != HeaderBB->getNextNode())
MIB.buildBr(*JT.MBB);
return true;
@@ -936,11 +938,10 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
}
}
- // Skip the range check if the fallthrough block is unreachable.
if (FallthroughUnreachable)
- JTH->OmitRangeCheck = true;
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -1004,14 +1005,22 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
- // Ensure that the type will fit the mask value.
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
LLT MaskTy = SwitchOpTy;
- for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
- if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
- // Switch table case range are encoded into series of masks.
- // Just use pointer type, it's guaranteed to fit.
- MaskTy = LLT::scalar(64);
- break;
+ if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
+ !isPowerOf2_32(MaskTy.getSizeInBits()))
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ else {
+ // Ensure that the type will fit the mask value.
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ break;
+ }
}
}
Register SubReg = RangeSub.getReg(0);
@@ -1023,13 +1032,13 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
MachineBasicBlock *MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
@@ -1129,10 +1138,8 @@ bool IRTranslator::lowerBitTestWorkItem(
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
@@ -1297,11 +1304,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
Align BaseAlign = getMemOpAlign(LI);
- AAMDNodes AAMetadata;
- LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Regs[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
+ commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -1339,11 +1344,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
Align BaseAlign = getMemOpAlign(SI);
- AAMDNodes AAMetadata;
- SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Vals[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
+ commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
}
@@ -1590,8 +1593,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
Align DstAlign;
Align SrcAlign;
unsigned IsVol =
- cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
- ->getZExtValue();
+ cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
@@ -1763,6 +1765,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_UMAX;
case Intrinsic::vector_reduce_umin:
return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::lround:
+ return TargetOpcode::G_LROUND;
+ case Intrinsic::llround:
+ return TargetOpcode::G_LLROUND;
}
return Intrinsic::not_intrinsic;
}
@@ -1779,7 +1785,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
// Yes. Let's translate it.
SmallVector<llvm::SrcOp, 4> VRegs;
- for (auto &Arg : CI.arg_operands())
+ for (auto &Arg : CI.args())
VRegs.push_back(getOrCreateVReg(*Arg));
MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
@@ -2172,7 +2178,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) {
Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -2228,6 +2234,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::trap:
+ case Intrinsic::debugtrap:
+ case Intrinsic::ubsantrap: {
+ StringRef TrapFuncName =
+ CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
+ if (TrapFuncName.empty())
+ break; // Use the default handling.
+ CallLowering::CallLoweringInfo Info;
+ if (ID == Intrinsic::ubsantrap) {
+ Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
+ CI.getArgOperand(0)->getType(), 0});
+ }
+ Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
+ Info.CB = &CI;
+ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
+ return CLI->lowerCall(MIRBuilder, Info);
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2321,6 +2344,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);
+ diagnoseDontCall(CI);
+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (F && F->isIntrinsic()) {
ID = F->getIntrinsicID();
@@ -2347,7 +2372,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
- for (auto &Arg : enumerate(CI.arg_operands())) {
+ for (auto &Arg : enumerate(CI.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
@@ -2360,10 +2385,15 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
- } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
- auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
- if (!MDN) // This was probably an MDString.
- return false;
+ } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MD = MDVal->getMetadata();
+ auto *MDN = dyn_cast<MDNode>(MD);
+ if (!MDN) {
+ if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD))
+ MDN = MDNode::get(MF->getFunction().getContext(), ConstMD);
+ else // This was probably an MDString.
+ return false;
+ }
MIB.addMetadata(MDN);
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
@@ -2472,32 +2502,19 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
- bool LowerInlineAsm = false;
- if (I.isInlineAsm()) {
- const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand());
- if (!IA->canThrow()) {
- // Fast path without emitting EH_LABELs.
-
- if (!translateInlineAsm(I, MIRBuilder))
- return false;
-
- MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(),
- *ReturnMBB = &getMBB(*ReturnBB);
-
- // Update successor info.
- addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne());
-
- MIRBuilder.buildBr(*ReturnMBB);
- return true;
- } else {
- LowerInlineAsm = true;
- }
- }
+ bool LowerInlineAsm = I.isInlineAsm();
+ bool NeedEHLabel = true;
+ // If it can't throw then use a fast-path without emitting EH labels.
+ if (LowerInlineAsm)
+ NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow();
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
- MCSymbol *BeginSymbol = Context.createTempSymbol();
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ MCSymbol *BeginSymbol = nullptr;
+ if (NeedEHLabel) {
+ BeginSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ }
if (LowerInlineAsm) {
if (!translateInlineAsm(I, MIRBuilder))
@@ -2505,8 +2522,11 @@ bool IRTranslator::translateInvoke(const User &U,
} else if (!translateCallBase(I, MIRBuilder))
return false;
- MCSymbol *EndSymbol = Context.createTempSymbol();
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ MCSymbol *EndSymbol = nullptr;
+ if (NeedEHLabel) {
+ EndSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ }
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -2528,7 +2548,12 @@ bool IRTranslator::translateInvoke(const User &U,
}
InvokeMBB->normalizeSuccProbs();
- MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ if (NeedEHLabel) {
+ assert(BeginSymbol && "Expected a begin symbol!");
+ assert(EndSymbol && "Expected an end symbol!");
+ MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ }
+
MIRBuilder.buildBr(ReturnMBB);
return true;
}
@@ -2670,6 +2695,28 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
+ if (!MF->getTarget().Options.TrapUnreachable)
+ return true;
+
+ auto &UI = cast<UnreachableInst>(U);
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (MF->getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *UI.getParent();
+ if (&UI != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(UI));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return true;
+ }
+ }
+ }
+
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ return true;
+}
+
bool IRTranslator::translateInsertElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
@@ -2757,14 +2804,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Register Cmp = getOrCreateVReg(*I.getCompareOperand());
Register NewVal = getOrCreateVReg(*I.getNewValOperand());
- AAMDNodes AAMetadata;
- I.getAAMetadata(AAMetadata);
-
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
*MF->getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp),
- getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(),
+ getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(),
I.getSuccessOrdering(), I.getFailureOrdering()));
return true;
}
@@ -2824,14 +2868,11 @@ bool IRTranslator::translateAtomicRMW(const User &U,
break;
}
- AAMDNodes AAMetadata;
- I.getAAMetadata(AAMetadata);
-
MIRBuilder.buildAtomicRMW(
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, MRI->getType(Val), getMemOpAlign(I),
- AAMetadata, nullptr, I.getSyncScopeID(),
+ I.getAAMetadata(), nullptr, I.getSyncScopeID(),
I.getOrdering()));
return true;
}
@@ -2985,7 +3026,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
return true;
}
-void IRTranslator::finalizeBasicBlock() {
+bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
+ MachineBasicBlock &MBB) {
for (auto &BTB : SL->BitTestCases) {
// Emit header first, if it wasn't already emitted.
if (!BTB.Emitted)
@@ -3005,7 +3047,7 @@ void IRTranslator::finalizeBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Second-to-last bit-test with contiguous range: fall through to the
// target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
@@ -3019,7 +3061,7 @@ void IRTranslator::finalizeBasicBlock() {
emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// We need to record the replacement phi edge here that normally
// happens in emitBitTestCase before we delete the case, otherwise the
// phi edge will be lost.
@@ -3054,6 +3096,176 @@ void IRTranslator::finalizeBasicBlock() {
for (auto &SwCase : SL->SwitchCases)
emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
SL->SwitchCases.clear();
+
+ // Check if we need to generate stack-protector guard checks.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ if (SP.shouldEmitSDCheck(BB)) {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ bool FunctionBasedInstrumentation =
+ TLI.getSSPStackGuardCheck(*MF->getFunction().getParent());
+ SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation);
+ }
+ // Handle stack protector.
+ if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
+ return false;
+ } else if (SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector(
+ ParentMBB, *MF->getSubtarget().getInstrInfo());
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ if (!emitSPDescriptorParent(SPDescriptor, ParentMBB))
+ return false;
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB))
+ return false;
+ }
+
+ // Clear the Per-BB State.
+ SPDescriptor.resetPerBBState();
+ }
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+ CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
+
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
+
+ Register Guard;
+ Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
+ Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ Register GuardVal =
+ CurBuilder
+ ->buildLoad(PtrMemTy, StackSlotPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile)
+ .getReg(0);
+
+ if (TLI.useStackGuardXorFP()) {
+ LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
+ return false;
+ }
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ // This path is currently untestable on GlobalISel, since the only platform
+ // that needs this seems to be Windows, and we fall back on that currently.
+ // The code still lives here in case that changes.
+ // Silence warning about unused variable until the code below that uses
+ // 'GuardCheckFn' is enabled.
+ (void)GuardCheckFn;
+ return false;
+#if 0
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+ ISD::ArgFlagsTy Flags;
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Flags.setInReg();
+ CallLowering::ArgInfo GuardArgInfo(
+ {GuardVal, FnTy->getParamType(0), {Flags}});
+
+ CallLowering::CallLoweringInfo Info;
+ Info.OrigArgs.push_back(GuardArgInfo);
+ Info.CallConv = GuardCheckFn->getCallingConv();
+ Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0);
+ Info.OrigRet = {Register(), FnTy->getReturnType()};
+ if (!CLI->lowerCall(MIRBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
+ return false;
+ }
+ return true;
+#endif
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ if (TLI.useLoadStackGuardNode()) {
+ Guard =
+ MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
+ getStackGuard(Guard, *CurBuilder);
+ } else {
+ // TODO: test using android subtarget when we support @llvm.thread.pointer.
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ Register GuardPtr = getOrCreateVReg(*IRGuard);
+
+ Guard = CurBuilder
+ ->buildLoad(PtrMemTy, GuardPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile)
+ .getReg(0);
+ }
+
+ // Perform the comparison.
+ auto Cmp =
+ CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal);
+ // If the guard/stackslot do not equal, branch to failure MBB.
+ CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
+ // Otherwise branch to success MBB.
+ CurBuilder->buildBr(*SPD.getSuccessMBB());
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *FailureBB) {
+ CurBuilder->setInsertPt(*FailureBB, FailureBB->end());
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+
+ const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL;
+ const char *Name = TLI.getLibcallName(Libcall);
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()),
+ 0};
+ if (!CLI->lowerCall(*CurBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
+ return false;
+ }
+
+ // On PS4, the "return address" must still be within the calling function,
+ // even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ const TargetMachine &TM = MF->getTarget();
+ if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) {
+ LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
+ return false;
+ }
+ return true;
}
void IRTranslator::finalizeFunction() {
@@ -3069,6 +3281,7 @@ void IRTranslator::finalizeFunction() {
EntryBuilder.reset();
CurBuilder.reset();
FuncInfo.clear();
+ SPDescriptor.resetPerFunctionState();
}
/// Returns true if a BasicBlock \p BB within a variadic function contains a
@@ -3079,7 +3292,7 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
// Walk the block backwards, because tail calls usually only appear at the end
// of a block.
- return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
+ return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) {
const auto *CI = dyn_cast<CallInst>(&I);
return CI && CI->isMustTailCall();
});
@@ -3088,8 +3301,6 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF = &CurMF;
const Function &F = MF->getFunction();
- if (F.empty())
- return false;
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
// Set the CSEConfig and run the analysis.
@@ -3257,7 +3468,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
- finalizeBasicBlock();
+ if (!finalizeBasicBlock(*BB, MBB))
+ return false;
}
#ifndef NDEBUG
WrapperObserver.removeObserver(&Verifier);
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index bb4d41cfd69f..4ae427484945 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -325,7 +325,8 @@ bool InlineAsmLowering::lowerInlineAsm(
return false;
}
- OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT();
+ OpInfo.ConstraintVT =
+ TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT();
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
@@ -334,13 +335,17 @@ bool InlineAsmLowering::lowerInlineAsm(
TLI->getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType());
+ OpInfo.ConstraintVT =
+ TLI->getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
}
++ResNo;
} else {
OpInfo.ConstraintVT = MVT::Other;
}
+ if (OpInfo.ConstraintVT == MVT::i64x8)
+ return false;
+
// Compute the constraint code and ConstraintType to use.
computeConstraintToUse(TLI, OpInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 75a8f03fcb3f..9b2692486384 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -30,9 +30,9 @@
#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "instruction-select"
@@ -130,9 +130,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
#endif
+ // Keep track of selected blocks, so we can delete unreachable ones later.
+ DenseSet<MachineBasicBlock *> SelectedBlocks;
for (MachineBasicBlock *MBB : post_order(&MF)) {
ISel->CurMBB = MBB;
+ SelectedBlocks.insert(MBB);
if (MBB->empty())
continue;
@@ -205,6 +208,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
if (MBB.empty())
continue;
+ if (!SelectedBlocks.contains(&MBB)) {
+ // This is an unreachable block and therefore hasn't been selected, since
+ // the main selection loop above uses a postorder block traversal.
+ // We delete all the instructions in this block since it's unreachable.
+ MBB.clear();
+ // Don't delete the block in case the block has it's address taken or is
+ // still being referenced by a phi somewhere.
+ continue;
+ }
// Try to find redundant copies b/w vregs of the same register class.
bool ReachedBegin = false;
for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) {
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 4fec9e628ddb..dc5a4d8f85aa 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -37,7 +37,7 @@ bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
return VRegVal->Value.getSExtValue() == Value;
return false;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 7c5e4e52ca3e..1f0738a8d9d2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -153,6 +153,14 @@ LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
};
}
+LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() % Size != 0;
+ };
+}
+
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index fc2570ae4b8e..75b7fcb5663a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -63,6 +63,16 @@ LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
};
}
+LegalizeMutation
+LegalizeMutations::widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits = alignTo(Ty.getScalarSizeInBits(), Size);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
+ };
+}
+
LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 635b1445ee07..0ab4a7f64840 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -218,9 +218,6 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
- auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
- WrapperObserver.erasingInstr(*DeadMI);
- };
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
@@ -232,9 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
- LocObserver.checkpoint(false);
+ eraseInstr(MI, MRI, &LocObserver);
continue;
}
@@ -281,10 +276,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead\n");
- RemoveDeadInstFromLists(&MI);
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
- LocObserver.checkpoint(false);
+ eraseInstr(MI, MRI, &LocObserver);
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
@@ -292,11 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
- for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
- RemoveDeadInstFromLists(DeadMI);
- DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
- }
+ eraseInstrs(DeadInstructions, MRI, &LocObserver);
LocObserver.checkpoint(
VerifyDebugLocs ==
DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c1e0d2549c42..c74bec7dfc0d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "legalizer"
@@ -497,8 +498,8 @@ static bool isLibCallInTailPosition(MachineInstr &MI,
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
+ CallerAttrs.hasRetAttr(Attribute::SExt))
return false;
// Only tail call if the following instruction is a standard return or if we
@@ -2051,10 +2052,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Register SrcReg = MI.getOperand(1).getReg();
- // First ZEXT the input.
- auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+ // First extend the input.
+ unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
+ MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
+ ? TargetOpcode::G_ANYEXT
+ : TargetOpcode::G_ZEXT;
+ auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
LLT CurTy = MRI.getType(SrcReg);
- if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+ unsigned NewOpc = MI.getOpcode();
+ if (NewOpc == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
@@ -2062,10 +2068,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
MIBSrc = MIRBuilder.buildOr(
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+ // Now we know the operand is non-zero, use the more relaxed opcode.
+ NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
// Perform the operation at the larger size.
- auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+ auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
@@ -2427,7 +2435,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(
MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
- TargetOpcode::G_SEXT);
+ TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
@@ -2662,7 +2670,7 @@ static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
// Now figure out the amount we need to shift to get the target bits.
auto OffsetMask = B.buildConstant(
- IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+ IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
return B.buildShl(IdxTy, OffsetIdx,
B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
@@ -2886,13 +2894,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
MachineMemOperand &MMO = LoadMI.getMMO();
LLT MemTy = MMO.getMemoryType();
MachineFunction &MF = MIRBuilder.getMF();
- if (MemTy.isVector())
- return UnableToLegalize;
unsigned MemSizeInBits = MemTy.getSizeInBits();
unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (MemSizeInBits != MemStoreSizeInBits) {
+ if (MemTy.isVector())
+ return UnableToLegalize;
+
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
@@ -2928,16 +2937,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
return Legalized;
}
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(MemSizeInBits))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
// Big endian lowering not implemented.
if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
+ // This load needs splitting into power of 2 sized loads.
+ //
// Our strategy here is to generate anyextending loads for the smaller
// types up to next power-2 result type, and then combine the two larger
// result values together, before truncating back down to the non-pow-2
@@ -2950,8 +2955,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// v1 = i24 trunc v5
// By doing this we generate the correct truncate which should get
// combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
- uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ // This load needs splitting into power of 2 sized loads.
+ LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ SmallSplitSize = MemSizeInBits - LargeSplitSize;
+ } else {
+ // This is already a power of 2, but we still need to split this in half.
+ //
+ // Assume we're being asked to decompose an unaligned load.
+ // TODO: If this requires multiple splits, handle them all at once.
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize;
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
+
+ if (MemTy.isVector()) {
+ // TODO: Handle vector extloads
+ if (MemTy != DstTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
+ }
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -2976,9 +3007,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (AnyExtTy == DstTy)
MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
- else {
+ else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
+ } else {
+ assert(DstTy.isPointer() && "expected pointer");
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+
+ // FIXME: We currently consider this to be illegal for non-integral address
+ // spaces, but we need still need a way to reinterpret the bits.
+ MIRBuilder.buildIntToPtr(DstReg, Or);
}
LoadMI.eraseFromParent();
@@ -2999,13 +3037,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
MachineMemOperand &MMO = **StoreMI.memoperands_begin();
LLT MemTy = MMO.getMemoryType();
- if (SrcTy.isVector())
- return UnableToLegalize;
-
unsigned StoreWidth = MemTy.getSizeInBits();
unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (StoreWidth != StoreSizeInBits) {
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
@@ -3026,18 +3064,44 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
return Legalized;
}
- if (isPowerOf2_32(MemTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
+ if (MemTy.isVector()) {
+ // TODO: Handle vector trunc stores
+ if (MemTy != SrcTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
+ }
+
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
+ } else {
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
// Extend to the next pow-2. If this store was itself the result of lowering,
// e.g. an s56 store being broken into s32 + s24, we might have a stored type
- // that's wider the stored size.
- const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
+ // that's wider than the stored size.
+ unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
+ const LLT NewSrcTy = LLT::scalar(AnyExtSize);
+
+ if (SrcTy.isPointer()) {
+ const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
+ }
+
auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
- uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
@@ -3045,9 +3109,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
LLT PtrTy = MRI.getType(PtrReg);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -3424,6 +3487,14 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_ROTL:
case G_ROTR:
return lowerRotate(MI);
+ case G_MEMSET:
+ case G_MEMCPY:
+ case G_MEMMOVE:
+ return lowerMemCpyFamily(MI);
+ case G_MEMCPY_INLINE:
+ return lowerMemcpyInline(MI);
+ GISEL_VECREDUCE_CASES_NONSEQ
+ return lowerVectorReduction(MI);
}
}
@@ -4004,9 +4075,7 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
int64_t IdxVal;
- auto MaybeCst =
- getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
- /*HandleFConstants*/ false);
+ auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
if (MaybeCst) {
IdxVal = MaybeCst->Value.getSExtValue();
// Avoid out of bounds indexing the pieces.
@@ -4363,6 +4432,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXIMUM:
case G_FSHL:
case G_FSHR:
+ case G_ROTL:
+ case G_ROTR:
case G_FREEZE:
case G_SADDSAT:
case G_SSUBSAT:
@@ -4572,35 +4643,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
return Legalized;
}
-LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
- MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- unsigned Opc = MI.getOpcode();
- assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
- "Sequential reductions not expected");
-
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- // The semantics of the normal non-sequential reductions allow us to freely
- // re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
- return UnableToLegalize;
-
- SmallVector<Register> SplitSrcs;
- const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
- extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
- SmallVector<Register> PartialReductions;
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- PartialReductions.push_back(
- MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
- }
-
+static unsigned getScalarOpcForReduction(unsigned Opc) {
unsigned ScalarOpc;
switch (Opc) {
case TargetOpcode::G_VECREDUCE_FADD:
@@ -4643,10 +4686,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
ScalarOpc = TargetOpcode::G_UMIN;
break;
default:
- LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
+ llvm_unreachable("Unhandled reduction");
+ }
+ return ScalarOpc;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
+ Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
+ "Sequential reductions not expected");
+
+ if (TypeIdx != 1)
return UnableToLegalize;
+
+ // The semantics of the normal non-sequential reductions allow us to freely
+ // re-associate the operation.
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ if (NarrowTy.isVector() &&
+ (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
+ return UnableToLegalize;
+
+ unsigned ScalarOpc = getScalarOpcForReduction(Opc);
+ SmallVector<Register> SplitSrcs;
+ // If NarrowTy is a scalar then we're being asked to scalarize.
+ const unsigned NumParts =
+ NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
+ : SrcTy.getNumElements();
+
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ if (NarrowTy.isScalar()) {
+ if (DstTy != NarrowTy)
+ return UnableToLegalize; // FIXME: handle implicit extensions.
+
+ if (isPowerOf2_32(NumParts)) {
+ // Generate a tree of scalar operations to reduce the critical path.
+ SmallVector<Register> PartialResults;
+ unsigned NumPartsLeft = NumParts;
+ while (NumPartsLeft > 1) {
+ for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
+ PartialResults.emplace_back(
+ MIRBuilder
+ .buildInstr(ScalarOpc, {NarrowTy},
+ {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
+ .getReg(0));
+ }
+ SplitSrcs = PartialResults;
+ PartialResults.clear();
+ NumPartsLeft = SplitSrcs.size();
+ }
+ assert(SplitSrcs.size() == 1);
+ MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // If we can't generate a tree, then just do sequential operations.
+ Register Acc = SplitSrcs[0];
+ for (unsigned Idx = 1; Idx < NumParts; ++Idx)
+ Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
+ .getReg(0);
+ MIRBuilder.buildCopy(DstReg, Acc);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ SmallVector<Register> PartialReductions;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ PartialReductions.push_back(
+ MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
}
+
// If the types involved are powers of 2, we can generate intermediate vector
// ops, before generating a final reduction operation.
if (isPowerOf2_32(SrcTy.getNumElements()) &&
@@ -4706,7 +4820,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
Register InH = MRI.createGenericVirtualRegister(HalfTy);
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
- if (Amt.isNullValue()) {
+ if (Amt.isZero()) {
MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
MI.eraseFromParent();
return Legalized;
@@ -4815,10 +4929,9 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
const LLT HalfTy = LLT::scalar(NewBitSize);
const LLT CondTy = LLT::scalar(1);
- if (const MachineInstr *KShiftAmt =
- getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
- return narrowScalarShiftByConstant(
- MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+ if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
+ return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
+ ShiftAmtTy);
}
// TODO: Expand with known bits.
@@ -5224,26 +5337,23 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
if (Ty.isVector())
return UnableToLegalize;
- unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
- unsigned DstSize = Ty.getSizeInBits();
+ unsigned Size = Ty.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
+ if (Size % NarrowSize != 0)
return UnableToLegalize;
- unsigned NumDstParts = DstSize / NarrowSize;
- unsigned NumSrcParts = SrcSize / NarrowSize;
+ unsigned NumParts = Size / NarrowSize;
bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
- unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
+ unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
SmallVector<Register, 2> Src1Parts, Src2Parts;
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
- extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
- extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+ extractParts(Src1, NarrowTy, NumParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumParts, Src2Parts);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
// Take only high half of registers if this is high mul.
- ArrayRef<Register> DstRegs(
- IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
+ ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
@@ -5951,7 +6061,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
Register Src = MI.getOperand(1).getReg();
Register Amt = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
LLT AmtTy = MRI.getType(Amt);
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
@@ -5965,6 +6075,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
isPowerOf2_32(EltSizeInBits))
return lowerRotateWithReverseRotate(MI);
+ // If a funnel shift is supported, use it.
+ unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ bool IsFShLegal = false;
+ if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+ LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+ auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+ Register R3) {
+ MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+ MI.eraseFromParent();
+ return Legalized;
+ };
+ // If a funnel shift in the other direction is supported, use it.
+ if (IsFShLegal) {
+ return buildFunnelShift(FShOpc, Dst, Src, Amt);
+ } else if (isPowerOf2_32(EltSizeInBits)) {
+ Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+ return buildFunnelShift(RevFsh, Dst, Src, Amt);
+ }
+ }
+
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
@@ -6150,7 +6281,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
: APFloat::IEEEdouble(),
- APInt::getNullValue(SrcTy.getSizeInBits()));
+ APInt::getZero(SrcTy.getSizeInBits()));
TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
@@ -7293,3 +7424,563 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(SrcReg);
+
+ // The source could be a scalar if the IR type was <1 x sN>.
+ if (SrcTy.isScalar()) {
+ if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
+ return UnableToLegalize; // FIXME: handle extension.
+ // This can be just a plain copy.
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ return UnableToLegalize;;
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+// Returns a list of types to use for memory op lowering in MemOps. A partial
+// port of findOptimalMemOpLowering in TargetLowering.
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ const TargetLowering &TLI) {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+ return false;
+
+ LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
+
+ if (Ty == LLT()) {
+ // Use the largest scalar type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ Ty = LLT::scalar(64);
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
+ assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
+ // FIXME: check for the largest legal type we can load/store to.
+ }
+
+ unsigned NumMemOps = 0;
+ uint64_t Size = Op.size();
+ while (Size) {
+ unsigned TySize = Ty.getSizeInBytes();
+ while (TySize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ LLT NewTy = Ty;
+ // FIXME: check for mem op safety and legality of the types. Not all of
+ // SDAGisms map cleanly to GISel concepts.
+ if (NewTy.isVector())
+ NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
+ NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ unsigned NewTySize = NewTy.getSizeInBytes();
+ assert(NewTySize > 0 && "Could not find appropriate type");
+
+ // If the new LLT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
+ MVT VT = getMVTForLLT(Ty);
+ if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ TySize = Size;
+ else {
+ Ty = NewTy;
+ TySize = NewTySize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(Ty);
+ Size -= TySize;
+ }
+
+ return true;
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+// Get a vectorized representation of the memset value operand, GISel edition.
+static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NumBits = Ty.getScalarSizeInBits();
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ if (!Ty.isVector() && ValVRegAndVal) {
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
+ APInt SplatVal = APInt::getSplat(NumBits, Scalar);
+ return MIB.buildConstant(Ty, SplatVal).getReg(0);
+ }
+
+ // Extend the byte value to the larger type, and then multiply by a magic
+ // value 0x010101... in order to replicate it across every byte.
+ // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
+ if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
+ return MIB.buildConstant(Ty, 0).getReg(0);
+ }
+
+ LLT ExtType = Ty.getScalarType();
+ auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
+ if (NumBits > 8) {
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ auto MagicMI = MIB.buildConstant(ExtType, Magic);
+ Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
+ }
+
+ // For vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+
+ return Val;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+ uint64_t KnownLen, Align Alignment,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memset length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
+
+ if (!findGISelOptimalMemOpLowering(MemOps, Limit,
+ MemOp::Set(KnownLen, DstAlignCanChange,
+ Alignment,
+ /*IsZeroMemset=*/IsZeroVal,
+ /*IsVolatile=*/IsVolatile),
+ DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ MachineIRBuilder MIB(MI);
+ // Find the largest store and generate the bit pattern for it.
+ LLT LargestTy = MemOps[0];
+ for (unsigned i = 1; i < MemOps.size(); i++)
+ if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
+ LargestTy = MemOps[i];
+
+ // The memset stored value is always defined as an s8, so in order to make it
+ // work with larger store types we need to repeat the bit pattern across the
+ // wider type.
+ Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
+
+ if (!MemSetValue)
+ return UnableToLegalize;
+
+ // Generate the stores. For each store type in the list, we generate the
+ // matching store of that type to the destination address.
+ LLT PtrTy = MRI.getType(Dst);
+ unsigned DstOff = 0;
+ unsigned Size = KnownLen;
+ for (unsigned I = 0; I < MemOps.size(); I++) {
+ LLT Ty = MemOps[I];
+ unsigned TySize = Ty.getSizeInBytes();
+ if (TySize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(I == MemOps.size() - 1 && I != 0);
+ DstOff -= TySize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ Register Value = MemSetValue;
+ if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
+ MVT VT = getMVTForLLT(Ty);
+ MVT LargestVT = getMVTForLLT(LargestTy);
+ if (!LargestTy.isVector() && !Ty.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
+ else
+ Value = getMemsetValue(Val, Ty, MIB);
+ if (!Value)
+ return UnableToLegalize;
+ }
+
+ auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
+
+ Register Ptr = Dst;
+ if (DstOff != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
+ Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+
+ MIB.buildStore(Value, Ptr, *StoreMMO);
+ DstOff += Ty.getSizeInBytes();
+ Size -= TySize;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ const auto *MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ // FIXME: support dynamically sized G_MEMCPY_INLINE
+ assert(LenVRegAndVal.hasValue() &&
+ "inline memcpy with dynamic size is not yet supported");
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ Align DstAlign = DstMMO.getBaseAlign();
+ Align SrcAlign = SrcMMO.getBaseAlign();
+
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+ return lowerMemcpy(MI, Dst, Src, KnownLen,
+ std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memcpy length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ // FIXME: infer better src pointer alignment like SelectionDAG does here.
+ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
+ // if the memcpy is in a tail call position.
+
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ IsVolatile),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Now we need to emit a pair of load and stores for each of the types we've
+ // collected. I.e. for each type, generate a load from the source pointer of
+ // that type width, and then generate a corresponding store to the dest buffer
+ // of that value loaded. This can result in a sequence of loads and stores
+ // mixed types, depending on what the target specifies as good types to use.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ unsigned Size = KnownLen;
+ for (auto CopyTy : MemOps) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ if (CopyTy.getSizeInBytes() > Size)
+ CurrOffset -= CopyTy.getSizeInBytes() - Size;
+
+ // Construct MMOs for the accesses.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ Register Offset;
+ if (CurrOffset != 0) {
+ Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ .getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ }
+ auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
+
+ // Create the store.
+ Register StorePtr =
+ CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildStore(LdVal, StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ Size -= CopyTy.getSizeInBytes();
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memmove length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
+ // to a bug in it's findOptimalMemOpLowering implementation. For now do the
+ // same thing here.
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Memmove requires that we perform the loads first before issuing the stores.
+ // Apart from that, this loop is pretty much doing the same thing as the
+ // memcpy codegen function.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ SmallVector<Register, 16> LoadVals;
+ for (auto CopyTy : MemOps) {
+ // Construct MMO for the load.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ }
+ LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+
+ CurrOffset = 0;
+ for (unsigned I = 0; I < MemOps.size(); ++I) {
+ LLT CopyTy = MemOps[I];
+ // Now store the values loaded.
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
+ // This combine is fairly complex so it's not written with a separate
+ // matcher function.
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) &&
+ "Expected memcpy like instruction");
+
+ auto MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+
+ Align DstAlign = MemOp->getBaseAlign();
+ Align SrcAlign;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ if (Opc != TargetOpcode::G_MEMSET) {
+ assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
+ MemOp = *(++MMOIt);
+ SrcAlign = MemOp->getBaseAlign();
+ }
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ if (!LenVRegAndVal)
+ return UnableToLegalize;
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ bool IsVolatile = MemOp->isVolatile();
+ if (Opc == TargetOpcode::G_MEMCPY_INLINE)
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return UnableToLegalize;
+
+ if (MaxLen && KnownLen > MaxLen)
+ return UnableToLegalize;
+
+ if (Opc == TargetOpcode::G_MEMCPY) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
+ IsVolatile);
+ }
+ if (Opc == TargetOpcode::G_MEMMOVE)
+ return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (Opc == TargetOpcode::G_MEMSET)
+ return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
+ return UnableToLegalize;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 3e3141657e87..30697913a6a4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -352,8 +352,7 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
- MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(),
- MMO->getSuccessOrdering()});
+ MemDescrs.push_back({*MMO});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
new file mode 100644
index 000000000000..03dda806cb1e
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -0,0 +1,669 @@
+//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the LoadStoreOpt optimization pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "loadstore-opt"
+
+using namespace llvm;
+using namespace ore;
+using namespace MIPatternMatch;
+
+STATISTIC(NumStoresMerged, "Number of stores merged");
+
+const unsigned MaxStoreSizeToForm = 128;
+
+char LoadStoreOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+
+LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+LoadStoreOpt::LoadStoreOpt()
+ : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
+
+void LoadStoreOpt::init(MachineFunction &MF) {
+ this->MF = &MF;
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TLI = MF.getSubtarget().getTargetLowering();
+ LI = MF.getSubtarget().getLegalizerInfo();
+ Builder.setMF(MF);
+ IsPreLegalizer = !MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized);
+ InstsToErase.clear();
+}
+
+void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
+ MachineRegisterInfo &MRI) {
+ BaseIndexOffset Info;
+ Register PtrAddRHS;
+ if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
+ Info.BaseReg = Ptr;
+ Info.IndexReg = Register();
+ Info.IsIndexSignExt = false;
+ return Info;
+ }
+
+ auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
+ if (RHSCst)
+ Info.Offset = RHSCst->Value.getSExtValue();
+
+ // Just recognize a simple case for now. In future we'll need to match
+ // indexing patterns for base + index + constant.
+ Info.IndexReg = PtrAddRHS;
+ Info.IsIndexSignExt = false;
+ return Info;
+}
+
+bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
+ const MachineInstr &MI2,
+ bool &IsAlias,
+ MachineRegisterInfo &MRI) {
+ auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
+ auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
+ if (!LdSt1 || !LdSt2)
+ return false;
+
+ BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
+ BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
+
+ if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
+ return false;
+
+ int64_t Size1 = LdSt1->getMemSize();
+ int64_t Size2 = LdSt2->getMemSize();
+
+ int64_t PtrDiff;
+ if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
+ PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
+ // If the size of memory access is unknown, do not use it to do analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ if (PtrDiff >= 0 &&
+ Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(Size1 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + Size2) <= 0);
+ return true;
+ }
+ return false;
+ }
+
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
+ auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
+ if (!Base0Def || !Base1Def)
+ return false; // Couldn't tell anything.
+
+
+ if (Base0Def->getOpcode() != Base1Def->getOpcode())
+ return false;
+
+ if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
+ // If the bases have the same frame index but we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (Base0Def != Base1Def &&
+ (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
+ !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // This implementation is a lot more primitive than the SDAG one for now.
+ // FIXME: what about constant pools?
+ if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
+ auto GV0 = Base0Def->getOperand(1).getGlobal();
+ auto GV1 = Base1Def->getOperand(1).getGlobal();
+ if (GV0 != GV1) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // Can't tell anything about aliasing.
+ return false;
+}
+
+bool GISelAddressing::instMayAlias(const MachineInstr &MI,
+ const MachineInstr &Other,
+ MachineRegisterInfo &MRI,
+ AliasAnalysis *AA) {
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ bool IsAtomic;
+ Register BasePtr;
+ int64_t Offset;
+ uint64_t NumBytes;
+ MachineMemOperand *MMO;
+ };
+
+ auto getCharacteristics =
+ [&](const MachineInstr *MI) -> MemUseCharacteristics {
+ if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
+ Register BaseReg;
+ int64_t Offset = 0;
+ // No pre/post-inc addressing modes are considered here, unlike in SDAG.
+ if (!mi_match(LS->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
+ BaseReg = LS->getPointerReg();
+ Offset = 0;
+ }
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ LS->getMMO().getMemoryType().getSizeInBytes());
+ return {LS->isVolatile(), LS->isAtomic(), BaseReg,
+ Offset /*base offset*/, Size, &LS->getMMO()};
+ }
+ // FIXME: support recognizing lifetime instructions.
+ // Default.
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, Register(),
+ (int64_t)0 /*offset*/, 0 /*size*/,
+ (MachineMemOperand *)nullptr};
+ };
+ MemUseCharacteristics MUC0 = getCharacteristics(&MI),
+ MUC1 = getCharacteristics(&Other);
+
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias.
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+ }
+
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // FIXME: port the alignment based alias analysis from SDAG's isAlias().
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ uint64_t Size0 = MUC0.NumBytes;
+ uint64_t Size1 = MUC1.NumBytes;
+ if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0 != MemoryLocation::UnknownSize &&
+ Size1 != MemoryLocation::UnknownSize) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
+ if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ MUC0.MMO->getAAInfo()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ MUC1.MMO->getAAInfo())))
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Returns true if the instruction creates an unavoidable hazard that
+/// forces a boundary between store merge candidates.
+static bool isInstHardMergeHazard(MachineInstr &MI) {
+ return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
+}
+
+bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
+ // Try to merge all the stores in the vector, splitting into separate segments
+ // as necessary.
+ assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
+ LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
+ LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ // Ensure the legal store info is computed for this address space.
+ initializeStoreMergeTargetInfo(AS);
+ const auto &LegalSizes = LegalStoreSizes[AS];
+
+#ifndef NDEBUG
+ for (auto StoreMI : StoresToMerge)
+ assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
+#endif
+
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ bool AnyMerged = false;
+ do {
+ unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
+ // Compute the biggest store we can generate to handle the number of stores.
+ unsigned MergeSizeBits;
+ for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
+ LLT StoreTy = LLT::scalar(MergeSizeBits);
+ EVT StoreEVT =
+ getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
+ if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
+ TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
+ (TLI->isTypeLegal(StoreEVT)))
+ break; // We can generate a MergeSize bits store.
+ }
+ if (MergeSizeBits <= OrigTy.getSizeInBits())
+ return AnyMerged; // No greater merge.
+
+ unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
+ // Perform the actual merging.
+ SmallVector<GStore *, 8> SingleMergeStores(
+ StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
+ AnyMerged |= doSingleStoreMerge(SingleMergeStores);
+ StoresToMerge.erase(StoresToMerge.begin(),
+ StoresToMerge.begin() + NumStoresToMerge);
+ } while (StoresToMerge.size() > 1);
+ return AnyMerged;
+}
+
+bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+ MachineFunction &MF) const {
+ auto Action = LI->getAction(Query).Action;
+ // If the instruction is unsupported, it can't be legalized at all.
+ if (Action == LegalizeActions::Unsupported)
+ return false;
+ return IsPreLegalizer || Action == LegalizeAction::Legal;
+}
+
+bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
+ assert(Stores.size() > 1);
+ // We know that all the stores are consecutive and there are no aliasing
+ // operations in the range. However, the values that are being stored may be
+ // generated anywhere before each store. To ensure we have the values
+ // available, we materialize the wide value and new store at the place of the
+ // final store in the merge sequence.
+ GStore *FirstStore = Stores[0];
+ const unsigned NumStores = Stores.size();
+ LLT SmallTy = MRI->getType(FirstStore->getValueReg());
+ LLT WideValueTy =
+ LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
+
+ // For each store, compute pairwise merged debug locs.
+ DebugLoc MergedLoc;
+ for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
+ MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(),
+ Stores[BIdx]->getDebugLoc());
+ Builder.setInstr(*Stores.back());
+ Builder.setDebugLoc(MergedLoc);
+
+ // If all of the store values are constants, then create a wide constant
+ // directly. Otherwise, we need to generate some instructions to merge the
+ // existing values together into a wider type.
+ SmallVector<APInt, 8> ConstantVals;
+ for (auto Store : Stores) {
+ auto MaybeCst =
+ getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
+ if (!MaybeCst) {
+ ConstantVals.clear();
+ break;
+ }
+ ConstantVals.emplace_back(MaybeCst->Value);
+ }
+
+ Register WideReg;
+ auto *WideMMO =
+ MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
+ if (ConstantVals.empty()) {
+ // Mimic the SDAG behaviour here and don't try to do anything for unknown
+ // values. In future, we should also support the cases of loads and
+ // extracted vector elements.
+ return false;
+ }
+
+ assert(ConstantVals.size() == NumStores);
+ // Check if our wide constant is legal.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
+ return false;
+ APInt WideConst(WideValueTy.getSizeInBits(), 0);
+ for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
+ // Insert the smaller constant into the corresponding position in the
+ // wider one.
+ WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
+ }
+ WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
+ auto NewStore =
+ Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
+ (void) NewStore;
+ LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
+ NumStoresMerged += Stores.size();
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
+ FirstStore->getDebugLoc(),
+ FirstStore->getParent());
+ R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
+ << NV("OrigWidth", SmallTy.getSizeInBytes())
+ << " bytes into a single store of "
+ << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
+ return R;
+ });
+
+ for (auto MI : Stores)
+ InstsToErase.insert(MI);
+ return true;
+}
+
+bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
+ if (C.Stores.size() < 2) {
+ C.reset();
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
+ << " stores, starting with " << *C.Stores[0]);
+ // We know that the stores in the candidate are adjacent.
+ // Now we need to check if any potential aliasing instructions recorded
+ // during the search alias with load/stores added to the candidate after.
+ // For example, if we have the candidate:
+ // C.Stores = [ST1, ST2, ST3, ST4]
+ // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
+ // ST2, then we would have recorded it into the PotentialAliases structure
+ // with the associated index value of "1". Then we see ST3 and ST4 and add
+ // them to the candidate group. We know that LD1 does not alias with ST1 or
+ // ST2, since we already did that check. However we don't yet know if it
+ // may alias ST3 and ST4, so we perform those checks now.
+ SmallVector<GStore *> StoresToMerge;
+
+ auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
+ for (auto AliasInfo : reverse(C.PotentialAliases)) {
+ MachineInstr *PotentialAliasOp = AliasInfo.first;
+ unsigned PreCheckedIdx = AliasInfo.second;
+ if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
+ } else {
+ // Once our store index is lower than the index associated with the
+ // potential alias, we know that we've already checked for this alias
+ // and all of the earlier potential aliases too.
+ return false;
+ }
+ }
+ return false;
+ };
+ // Start from the last store in the group, and check if it aliases with any
+ // of the potential aliasing operations in the list.
+ for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
+ auto *CheckStore = C.Stores[StoreIdx];
+ if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
+ continue;
+ StoresToMerge.emplace_back(CheckStore);
+ }
+
+ LLVM_DEBUG(dbgs() << StoresToMerge.size()
+ << " stores remaining after alias checks. Merging...\n");
+
+ // Now we've checked for aliasing hazards, merge any stores left.
+ C.reset();
+ if (StoresToMerge.size() < 2)
+ return false;
+ return mergeStores(StoresToMerge);
+}
+
+bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
+ StoreMergeCandidate &C) {
+ if (C.Stores.empty())
+ return false;
+ return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
+ return instMayAlias(MI, *OtherMI, *MRI, AA);
+ });
+}
+
+void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
+ PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
+}
+
+bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
+ StoreMergeCandidate &C) {
+ // Check if the given store writes to an adjacent address, and other
+ // requirements.
+ LLT ValueTy = MRI->getType(StoreMI.getValueReg());
+ LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
+
+ // Only handle scalars.
+ if (!ValueTy.isScalar())
+ return false;
+
+ // Don't allow truncating stores for now.
+ if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
+ return false;
+
+ Register StoreAddr = StoreMI.getPointerReg();
+ auto BIO = getPointerInfo(StoreAddr, *MRI);
+ Register StoreBase = BIO.BaseReg;
+ uint64_t StoreOffCst = BIO.Offset;
+ if (C.Stores.empty()) {
+ // This is the first store of the candidate.
+ // If the offset can't possibly allow for a lower addressed store with the
+ // same base, don't bother adding it.
+ if (StoreOffCst < ValueTy.getSizeInBytes())
+ return false;
+ C.BasePtr = StoreBase;
+ C.CurrentLowestOffset = StoreOffCst;
+ C.Stores.emplace_back(&StoreMI);
+ LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
+ << StoreMI);
+ return true;
+ }
+
+ // Check the store is the same size as the existing ones in the candidate.
+ if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
+ ValueTy.getSizeInBits())
+ return false;
+
+ if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
+ PtrTy.getAddressSpace())
+ return false;
+
+ // There are other stores in the candidate. Check that the store address
+ // writes to the next lowest adjacent address.
+ if (C.BasePtr != StoreBase)
+ return false;
+ if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
+ static_cast<uint64_t>(StoreOffCst))
+ return false;
+
+ // This writes to an adjacent address. Allow it.
+ C.Stores.emplace_back(&StoreMI);
+ C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
+ LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
+ return true;
+}
+
+bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ // Walk through the block bottom-up, looking for merging candidates.
+ StoreMergeCandidate Candidate;
+ for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+ if (InstsToErase.contains(&MI))
+ continue;
+
+ if (auto StoreMI = dyn_cast<GStore>(&*II)) {
+ // We have a G_STORE. Add it to the candidate if it writes to an adjacent
+ // address.
+ if (!addStoreToCandidate(*StoreMI, Candidate)) {
+ // Store wasn't eligible to be added. May need to record it as a
+ // potential alias.
+ if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+ Candidate.addPotentialAlias(*StoreMI);
+ }
+ continue;
+ }
+
+ // If we don't have any stores yet, this instruction can't pose a problem.
+ if (Candidate.Stores.empty())
+ continue;
+
+ // We're dealing with some other kind of instruction.
+ if (isInstHardMergeHazard(MI)) {
+ Changed |= processMergeCandidate(Candidate);
+ Candidate.Stores.clear();
+ continue;
+ }
+
+ if (!MI.mayLoadOrStore())
+ continue;
+
+ if (operationAliasesWithCandidate(MI, Candidate)) {
+ // We have a potential alias, so process the current candidate if we can
+ // and then continue looking for a new candidate.
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+
+ // Record this instruction as a potential alias for future stores that are
+ // added to the candidate.
+ Candidate.addPotentialAlias(MI);
+ }
+
+ // Process any candidate left after finishing searching the entire block.
+ Changed |= processMergeCandidate(Candidate);
+
+ // Erase instructions now that we're no longer iterating over the block.
+ for (auto *MI : InstsToErase)
+ MI->eraseFromParent();
+ InstsToErase.clear();
+ return Changed;
+}
+
+bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &BB : MF) {
+ Changed |= mergeBlockStores(BB);
+ }
+ return Changed;
+}
+
+void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
+ // Query the legalizer info to record what store types are legal.
+ // We record this because we don't want to bother trying to merge stores into
+ // illegal ones, which would just result in being split again.
+
+ if (LegalStoreSizes.count(AddrSpace)) {
+ assert(LegalStoreSizes[AddrSpace].any());
+ return; // Already cached sizes for this address space.
+ }
+
+ // Need to reserve at least MaxStoreSizeToForm + 1 bits.
+ BitVector LegalSizes(MaxStoreSizeToForm * 2);
+ const auto &LI = *MF->getSubtarget().getLegalizerInfo();
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ Type *IntPtrIRTy =
+ DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
+ LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
+ // We assume that we're not going to be generating any stores wider than
+ // MaxStoreSizeToForm bits for now.
+ for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
+ LLT Ty = LLT::scalar(Size);
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ SmallVector<LLT> StoreTys({Ty, PtrTy});
+ LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
+ LegalizeActionStep ActionStep = LI.getAction(Q);
+ if (ActionStep.Action == LegalizeActions::Legal)
+ LegalSizes.set(Size);
+ }
+ assert(LegalSizes.any() && "Expected some store sizes to be legal!");
+ LegalStoreSizes[AddrSpace] = LegalSizes;
+}
+
+bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
+ << '\n');
+
+ init(MF);
+ bool Changed = false;
+ Changed |= mergeFunctionStores(MF);
+
+ LegalStoreSizes.clear();
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index d45fdae43f01..a1acc4195840 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -92,9 +92,8 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
// Check if all the users of MI are local.
// We are going to invalidation the list of use operands, so we
// can't use range iterator.
- for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
- MOIt != MOItEnd;) {
- MachineOperand &MOUse = *MOIt++;
+ for (MachineOperand &MOUse :
+ llvm::make_early_inc_range(MRI->use_operands(Reg))) {
// Check if the use is already local.
MachineBasicBlock *InsertMBB;
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 54ac62793b08..fb5ed35c1f72 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -673,7 +673,8 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
LLT Src1Ty = Src1.getLLTTy(*getMRI());
LLT Src2Ty = Src2.getLLTTy(*getMRI());
- assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >=
+ Mask.size());
assert(DstTy.getElementType() == Src1Ty.getElementType() &&
DstTy.getElementType() == Src2Ty.getElementType());
(void)DstTy;
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 644a81d8021e..937d94764be1 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -699,11 +699,11 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// Set a sensible insertion point so that subsequent calls to
// MIRBuilder.
MIRBuilder.setMBB(*MBB);
- for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end();
- MII != End;) {
- // MI might be invalidated by the assignment, so move the
- // iterator before hand.
- MachineInstr &MI = *MII++;
+ SmallVector<MachineInstr *> WorkList(
+ make_pointer_range(reverse(MBB->instrs())));
+
+ while (!WorkList.empty()) {
+ MachineInstr &MI = *WorkList.pop_back_val();
// Ignore target-specific post-isel instructions: they should use proper
// regclasses.
@@ -728,18 +728,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
"unable to map instruction", MI);
return false;
}
-
- // It's possible the mapping changed control flow, and moved the following
- // instruction to a new block, so figure out the new parent.
- if (MII != End) {
- MachineBasicBlock *NextInstBB = MII->getParent();
- if (NextInstBB != MBB) {
- LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
- MBB = NextInstBB;
- MIRBuilder.setMBB(*MBB);
- End = MBB->end();
- }
- }
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index e2a963747101..1a2102e3ef21 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -570,7 +570,7 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
assert((ValueMask & PartMapMask) == PartMapMask &&
"Some partial mappings overlap");
}
- assert(ValueMask.isAllOnesValue() && "Value is not fully mapped");
+ assert(ValueMask.isAllOnes() && "Value is not fully mapped");
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f64e41b9dccc..1a440c064a59 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -15,7 +15,9 @@
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -60,6 +62,8 @@ Register llvm::constrainOperandRegClass(
if (ConstrainedReg != Reg) {
MachineBasicBlock::iterator InsertIt(&InsertPt);
MachineBasicBlock &MBB = *InsertPt.getParent();
+ // FIXME: The copy needs to have the classes constrained for its operands.
+ // Use operand's regbank to get the class for old register (Reg).
if (RegMO.isUse()) {
BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
TII.get(TargetOpcode::COPY), ConstrainedReg)
@@ -99,19 +103,25 @@ Register llvm::constrainOperandRegClass(
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
- const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+ const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);
// Some of the target independent instructions, like COPY, may not impose any
// register class constraints on some of their operands: If it's a use, we can
// skip constraining as the instruction defining the register would constrain
// it.
- // We can't constrain unallocatable register classes, because we can't create
- // virtual registers for these classes, so we need to let targets handled this
- // case.
- if (RegClass && !RegClass->isAllocatable())
- RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI);
+ if (OpRC) {
+ // Obtain the RC from incoming regbank if it is a proper sub-class. Operands
+ // can have multiple regbanks for a superclass that combine different
+ // register types (E.g., AMDGPU's VGPR and AGPR). The regbank ambiguity
+ // resolved by targets during regbankselect should not be overridden.
+ if (const auto *SubRC = TRI.getCommonSubClass(
+ OpRC, TRI.getConstrainedRegClassForOperand(RegMO, MRI)))
+ OpRC = SubRC;
- if (!RegClass) {
+ OpRC = TRI.getAllocatableClass(OpRC);
+ }
+
+ if (!OpRC) {
assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) &&
"Register class constraint is required unless either the "
"instruction is target independent or the operand is a use");
@@ -127,7 +137,7 @@ Register llvm::constrainOperandRegClass(
// and they never reach this function.
return Reg;
}
- return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *OpRC,
RegMO);
}
@@ -236,7 +246,7 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity,
R << (" (in function: " + MF.getName() + ")").str();
if (IsFatal)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
else
MORE.emit(R);
}
@@ -267,10 +277,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<APInt> llvm::getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<ValueAndVReg> ValAndVReg =
- getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
+Optional<APInt> llvm::getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
+ VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
"Value found while looking through instrs");
if (!ValAndVReg)
@@ -278,41 +288,27 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg,
return ValAndVReg->Value;
}
-Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+Optional<int64_t>
+llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
+ Optional<APInt> Val = getIConstantVRegVal(VReg, MRI);
if (Val && Val->getBitWidth() <= 64)
return Val->getSExtValue();
return None;
}
-Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
- Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
- bool HandleFConstant, bool LookThroughAnyExt) {
+namespace {
+
+typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
+typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
+
+Optional<ValueAndVReg> getConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
+ GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
+ bool LookThroughAnyExt = false) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
- auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
- return Opcode == TargetOpcode::G_CONSTANT ||
- (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT);
- };
- auto GetImmediateValue = [HandleFConstant,
- &MRI](const MachineInstr &MI) -> Optional<APInt> {
- const MachineOperand &CstVal = MI.getOperand(1);
- if (!CstVal.isImm() && !CstVal.isCImm() &&
- (!HandleFConstant || !CstVal.isFPImm()))
- return None;
- if (!CstVal.isFPImm()) {
- unsigned BitWidth =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
- : CstVal.getCImm()->getValue();
- assert(Val.getBitWidth() == BitWidth &&
- "Value bitwidth doesn't match definition type");
- return Val;
- }
- return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
- };
- while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
+
+ while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI) &&
LookThroughInstrs) {
switch (MI->getOpcode()) {
case TargetOpcode::G_ANYEXT:
@@ -339,10 +335,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return None;
}
}
- if (!MI || !IsConstantOpcode(MI->getOpcode()))
+ if (!MI || !IsConstantOpcode(MI))
return None;
- Optional<APInt> MaybeVal = GetImmediateValue(*MI);
+ Optional<APInt> MaybeVal = getAPCstValue(MI);
if (!MaybeVal)
return None;
APInt &Val = *MaybeVal;
@@ -365,12 +361,65 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return ValueAndVReg{Val, VReg};
}
-const ConstantInt *llvm::getConstantIntVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- MachineInstr *MI = MRI.getVRegDef(VReg);
- if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
- return nullptr;
- return MI->getOperand(1).getCImm();
+bool isIConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+bool isFConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_FCONSTANT;
+}
+
+bool isAnyConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
+}
+
+Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ return None;
+}
+
+Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ if (CstVal.isFPImm())
+ return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ return None;
+}
+
+} // end anonymous namespace
+
+Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
+ getCImmAsAPInt, LookThroughInstrs);
+}
+
+Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ bool LookThroughAnyExt) {
+ return getConstantVRegValWithLookThrough(
+ VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
+ LookThroughAnyExt);
+}
+
+Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ auto Reg = getConstantVRegValWithLookThrough(
+ VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+ if (!Reg)
+ return None;
+ return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),
+ Reg->VReg};
}
const ConstantFP *
@@ -437,16 +486,16 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI) {
- auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+ auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false);
if (!MaybeOp2Cst)
return None;
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false);
if (!MaybeOp1Cst)
return None;
- const APInt &C1 = *MaybeOp1Cst;
- const APInt &C2 = *MaybeOp2Cst;
+ const APInt &C1 = MaybeOp1Cst->Value;
+ const APInt &C2 = MaybeOp2Cst->Value;
switch (Opcode) {
default:
break;
@@ -543,6 +592,35 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
+Optional<MachineInstr *>
+llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
+ auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ if (!SrcVec1)
+ return None;
+ auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
+ if (!SrcVec2)
+ return None;
+
+ const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
+
+ SmallVector<Register, 16> FoldedElements;
+ for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
+ auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
+ SrcVec2->getSourceReg(Idx), MRI);
+ if (!MaybeCst)
+ return None;
+ auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
+ FoldedElements.emplace_back(FoldedCstReg);
+ }
+ // Create the new vector constant.
+ auto CstVec =
+ MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
+ return &*CstVec;
+}
+
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
@@ -659,7 +737,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI);
if (MaybeOp1Cst) {
switch (Opcode) {
default:
@@ -677,7 +755,7 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI) {
assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
- if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) {
+ if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) {
APFloat DstVal(getFltSemanticForLLT(DstTy));
DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
APFloat::rmNearestTiesToEven);
@@ -686,6 +764,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
return None;
}
+Optional<SmallVector<unsigned>>
+llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(Src);
+ SmallVector<unsigned> FoldedCTLZs;
+ auto tryFoldScalar = [&](Register R) -> Optional<unsigned> {
+ auto MaybeCst = getIConstantVRegVal(R, MRI);
+ if (!MaybeCst)
+ return None;
+ return MaybeCst->countLeadingZeros();
+ };
+ if (Ty.isVector()) {
+ // Try to constant fold each element.
+ auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BV)
+ return None;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
+ FoldedCTLZs.emplace_back(*MaybeFold);
+ continue;
+ }
+ return None;
+ }
+ return FoldedCTLZs;
+ }
+ if (auto MaybeCst = tryFoldScalar(Src)) {
+ FoldedCTLZs.emplace_back(*MaybeCst);
+ return FoldedCTLZs;
+ }
+ return None;
+}
+
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
@@ -707,7 +816,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// shifting the bit off the end is undefined.
// TODO: Constant splat
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
if (*ConstLHS == 1)
return true;
}
@@ -715,7 +824,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
break;
}
case TargetOpcode::G_LSHR: {
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
if (ConstLHS->isSignMask())
return true;
}
@@ -737,7 +846,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// zeros is greater than the truncation amount.
const unsigned BitWidth = Ty.getScalarSizeInBits();
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
return false;
}
@@ -885,53 +994,81 @@ static bool isBuildVectorOp(unsigned Opcode) {
Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
}
-// TODO: Handle mixed undef elements.
-static bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return false;
+namespace {
- const unsigned NumOps = MI.getNumOperands();
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
- return false;
+Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ MachineInstr *MI = getDefIgnoringCopies(VReg, MRI);
+ if (!MI)
+ return None;
+
+ if (!isBuildVectorOp(MI->getOpcode()))
+ return None;
+
+ Optional<ValueAndVReg> SplatValAndReg = None;
+ for (MachineOperand &Op : MI->uses()) {
+ Register Element = Op.getReg();
+ auto ElementValAndReg =
+ getAnyConstantVRegValWithLookThrough(Element, MRI, true, true);
+
+ // If AllowUndef, treat undef as value that will result in a constant splat.
+ if (!ElementValAndReg) {
+ if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element)))
+ continue;
+ return None;
+ }
+
+ // Record splat value
+ if (!SplatValAndReg)
+ SplatValAndReg = ElementValAndReg;
+
+ // Different constant then the one already recorded, not a constant splat.
+ if (SplatValAndReg->Value != ElementValAndReg->Value)
+ return None;
}
- return true;
+ return SplatValAndReg;
}
+bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef))
+ return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
+ return false;
+}
+
+} // end anonymous namespace
+
Optional<int64_t>
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return None;
-
- const unsigned NumOps = MI.getNumOperands();
- Optional<int64_t> Scalar;
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- int64_t ElementValue;
- if (!mi_match(Element, MRI, m_ICst(ElementValue)))
- return None;
- if (!Scalar)
- Scalar = ElementValue;
- else if (*Scalar != ElementValue)
- return None;
- }
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false))
+ return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
+ return None;
+}
- return Scalar;
+Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef))
+ return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return None;
}
bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, 0);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, 0, AllowUndef);
}
bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, -1);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef);
}
Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
@@ -948,6 +1085,36 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
+bool llvm::isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return true;
+ GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
+ if (!BV)
+ return false;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
+ getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+Optional<APInt>
+llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return C->Value;
+ auto MaybeCst = getBuildVectorConstantSplat(MI, MRI);
+ if (!MaybeCst)
+ return None;
+ const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
+ return APInt(ScalarSize, *MaybeCst, true);
+}
+
bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
@@ -1011,3 +1178,59 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
return F.hasOptSize() || F.hasMinSize() ||
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}
+
+/// These artifacts generally don't have any debug users because they don't
+/// directly originate from IR instructions, but instead usually from
+/// legalization. Avoiding checking for debug users improves compile time.
+/// Note that truncates or extends aren't included because they have IR
+/// counterparts which can have debug users after translation.
+static bool shouldSkipDbgValueFor(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_INSERT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver,
+ SmallInstListTy &DeadInstChain) {
+ for (MachineOperand &Op : MI.uses()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DeadInstChain.insert(MRI.getVRegDef(Op.getReg()));
+ }
+ LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ DeadInstChain.remove(&MI);
+ if (shouldSkipDbgValueFor(MI))
+ MI.eraseFromParent();
+ else
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ if (LocObserver)
+ LocObserver->checkpoint(false);
+}
+
+void llvm::eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs,
+ MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ SmallInstListTy DeadInstChain;
+ for (MachineInstr *MI : DeadInstrs)
+ saveUsesAndErase(*MI, MRI, LocObserver, DeadInstChain);
+
+ while (!DeadInstChain.empty()) {
+ MachineInstr *Inst = DeadInstChain.pop_back_val();
+ if (!isTriviallyDead(*Inst, MRI))
+ continue;
+ saveUsesAndErase(*Inst, MRI, LocObserver, DeadInstChain);
+ }
+}
+
+void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ return eraseInstrs({&MI}, MRI, LocObserver);
+}
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 4316034371a5..83b8c2d0eacb 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -187,7 +187,7 @@ namespace {
const DataLayout &DL,
OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
- TripCount(Info.TripCount),
+ ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
LoopDecrement(Info.LoopDecrement),
@@ -202,7 +202,7 @@ namespace {
OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
- const SCEV *TripCount = nullptr;
+ const SCEV *ExitCount = nullptr;
Type *CountType = nullptr;
BranchInst *ExitBranch = nullptr;
Value *LoopDecrement = nullptr;
@@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
}
assert(
- (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
"Hardware Loop must have set exit info.");
BasicBlock *Preheader = L->getLoopPreheader();
@@ -365,7 +365,13 @@ static bool CanGenerateTest(Loop *L, Value *Count) {
return false;
};
- if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
+ // Check if Count is a zext.
+ Value *CountBefZext =
+ isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
+
+ if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
+ !IsCompareZero(ICmp, CountBefZext, 0) &&
+ !IsCompareZero(ICmp, CountBefZext, 1))
return false;
unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
@@ -381,13 +387,18 @@ Value *HardwareLoop::InitLoopCount() {
// loop counter and tests that is not zero?
SCEVExpander SCEVE(SE, DL, "loopcnt");
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
+
+ ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
// If we're trying to use the 'test and set' form of the intrinsic, we need
// to replace a conditional branch that is controlling entry to the loop. It
// is likely (guaranteed?) that the preheader has an unconditional branch to
// the loop header, so also check if it has a single predecessor.
- if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
- SE.getZero(TripCount->getType()))) {
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+ SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry;
} else
@@ -399,19 +410,19 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *Predecessor = BB->getSinglePredecessor();
// If it's not safe to create a while loop then don't force it and create a
// do-while loop instead
- if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
+ if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
UseLoopGuard = false;
else
BB = Predecessor;
}
- if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
- LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount
- << "\n");
+ if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
+ << *ExitCount << "\n");
return nullptr;
}
- Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
+ Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
BB->getTerminator());
// FIXME: We've expanded Count where we hope to insert the counter setting
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 71e91b445d9a..64e1f4351456 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -341,9 +341,8 @@ void InlineSpiller::collectRegsToSpill() {
if (Original == Reg)
return;
- for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
- MachineInstr &MI = *RI++;
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
Register SnipReg = isFullCopyOf(MI, Reg);
if (!isSibling(SnipReg))
continue;
@@ -465,10 +464,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
// Find all spills and copies of VNI.
- for (MachineRegisterInfo::use_instr_nodbg_iterator
- UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
- UI != E; ) {
- MachineInstr &MI = *UI++;
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {
if (!MI.isCopy() && !MI.mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
@@ -676,11 +673,7 @@ void InlineSpiller::reMaterializeAll() {
bool anyRemat = false;
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (MachineRegisterInfo::reg_bundle_iterator
- RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
- RegI != E; ) {
- MachineInstr &MI = *RegI++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
// Debug values are not allowed to affect codegen.
if (MI.isDebugValue())
continue;
@@ -928,6 +921,39 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// Update the call site info.
if (MI->isCandidateForCallSiteEntry())
MI->getMF()->moveCallSiteInfo(MI, FoldMI);
+
+ // If we've folded a store into an instruction labelled with debug-info,
+ // record a substitution from the old operand to the memory operand. Handle
+ // the simple common case where operand 0 is the one being folded, plus when
+ // the destination operand is also a tied def. More values could be
+ // substituted / preserved with more analysis.
+ if (MI->peekDebugInstrNum() && Ops[0].second == 0) {
+ // Helper lambda.
+ auto MakeSubstitution = [this,FoldMI,MI,&Ops]() {
+ // Substitute old operand zero to the new instructions memory operand.
+ unsigned OldOperandNum = Ops[0].second;
+ unsigned NewNum = FoldMI->getDebugInstrNum();
+ unsigned OldNum = MI->getDebugInstrNum();
+ MF.makeDebugValueSubstitution({OldNum, OldOperandNum},
+ {NewNum, MachineFunction::DebugOperandMemNumber});
+ };
+
+ const MachineOperand &Op0 = MI->getOperand(Ops[0].second);
+ if (Ops.size() == 1 && Op0.isDef()) {
+ MakeSubstitution();
+ } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(1).isTied() &&
+ Op0.getReg() == MI->getOperand(1).getReg()) {
+ MakeSubstitution();
+ }
+ } else if (MI->peekDebugInstrNum()) {
+ // This is a debug-labelled instruction, but the operand being folded isn't
+ // at operand zero. Most likely this means it's a load being folded in.
+ // Substitute any register defs from operand zero up to the one being
+ // folded -- past that point, we don't know what the new operand indexes
+ // will be.
+ MF.substituteDebugValuesForInst(*MI, *FoldMI, Ops[0].second);
+ }
+
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -1038,57 +1064,53 @@ void InlineSpiller::spillAroundUses(Register Reg) {
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
- for (MachineRegisterInfo::reg_bundle_iterator
- RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
- RegI != E; ) {
- MachineInstr *MI = &*(RegI++);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
// Debug values are not allowed to affect codegen.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
- MachineBasicBlock *MBB = MI->getParent();
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
- buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg);
+ MachineBasicBlock *MBB = MI.getParent();
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI);
+ buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg);
MBB->erase(MI);
continue;
}
- assert(!MI->isDebugInstr() && "Did not expect to find a use in debug "
+ assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "
"instruction that isn't a DBG_VALUE");
// Ignore copies to/from snippets. We'll delete them.
- if (SnippetCopies.count(MI))
+ if (SnippetCopies.count(&MI))
continue;
// Stack slot accesses may coalesce away.
- if (coalesceStackAccess(MI, Reg))
+ if (coalesceStackAccess(&MI, Reg))
continue;
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
- SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
if (SlotIndex::isSameInstr(Idx, VNI->def))
Idx = VNI->def;
// Check for a sibling copy.
- Register SibReg = isFullCopyOf(*MI, Reg);
+ Register SibReg = isFullCopyOf(MI, Reg);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
- LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI);
- SnippetCopies.insert(MI);
+ LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI);
+ SnippetCopies.insert(&MI);
continue;
}
if (RI.Writes) {
- if (hoistSpillInsideBB(OldLI, *MI)) {
+ if (hoistSpillInsideBB(OldLI, MI)) {
// This COPY is now dead, the value is already in the stack slot.
- MI->getOperand(0).setIsDead();
- DeadDefs.push_back(MI);
+ MI.getOperand(0).setIsDead();
+ DeadDefs.push_back(&MI);
continue;
}
} else {
@@ -1108,7 +1130,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
Register NewVReg = Edit->createFrom(Reg);
if (RI.Reads)
- insertReload(NewVReg, Idx, MI);
+ insertReload(NewVReg, Idx, &MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
@@ -1123,12 +1145,12 @@ void InlineSpiller::spillAroundUses(Register Reg) {
hasLiveDef = true;
}
}
- LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
+ LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n');
// FIXME: Use a second vreg if instruction has no tied ops.
if (RI.Writes)
if (hasLiveDef)
- insertSpill(NewVReg, true, MI);
+ insertSpill(NewVReg, true, &MI);
}
}
@@ -1163,10 +1185,8 @@ void InlineSpiller::spillAll() {
// Finally delete the SnippetCopies.
for (Register Reg : RegsToSpill) {
- for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
- RI != E; ) {
- MachineInstr &MI = *(RI++);
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
LIS.RemoveMachineInstrFromMaps(MI);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 24a57cc21c57..5a20580e5479 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -95,7 +95,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
}
private:
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 71bfb1d87d66..9fabcfb1f326 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -308,12 +308,12 @@ public:
}
// Multiplying by one is a no-op.
- if (C.isOneValue()) {
+ if (C.isOne()) {
return *this;
}
// Multiplying by zero removes the coefficient B and defines all bits.
- if (C.isNullValue()) {
+ if (C.isZero()) {
ErrorMSBs = 0;
deleteB();
}
@@ -464,7 +464,7 @@ public:
return *this;
}
- if (C.isNullValue())
+ if (C.isZero())
return *this;
// Test if the result will be zero
@@ -571,7 +571,7 @@ public:
bool isProvenEqualTo(const Polynomial &o) {
// Subtract both polynomials and test if it is fully defined and zero.
Polynomial r = *this - o;
- return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
+ return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());
}
/// Print the polynomial into a stream.
@@ -1131,6 +1131,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
InstructionCost InterleavedCost;
InstructionCost InstructionCost = 0;
+ const TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency;
// Get the interleave factor
unsigned Factor = InterleavedLoad.size();
@@ -1158,8 +1159,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// be expected. Also sum the cost of the Instructions beeing left dead.
for (auto &I : Is) {
// Compute the old cost
- InstructionCost +=
- TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ InstructionCost += TTI.getInstructionCost(I, CostKind);
// The final SVIs are allowed not to be dead, all uses will be replaced
if (SVIs.find(I) != SVIs.end())
@@ -1212,7 +1212,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
Indices.push_back(i);
InterleavedCost = TTI.getInterleavedMemoryOpCost(
Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
- InsertionPoint->getPointerAddressSpace());
+ InsertionPoint->getPointerAddressSpace(), CostKind);
if (InterleavedCost >= InstructionCost) {
return false;
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 55089d3b90d0..808a79d9792a 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,8 +453,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
// Verify this is a simple bswap.
- if (CI->getNumArgOperands() != 1 ||
- CI->getType() != CI->getArgOperand(0)->getType() ||
+ if (CI->arg_size() != 1 || CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 37c0b44ea2b2..0d3685d4141c 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -25,10 +25,10 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index dc9907058340..a4eb3094612b 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -11,114 +11,48 @@
/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
///
/// This pass propagates variable locations between basic blocks, resolving
-/// control flow conflicts between them. The problem is much like SSA
-/// construction, where each DBG_VALUE instruction assigns the *value* that
-/// a variable has, and every instruction where the variable is in scope uses
-/// that variable. The resulting map of instruction-to-value is then translated
-/// into a register (or spill) location for each variable over each instruction.
+/// control flow conflicts between them. The problem is SSA construction, where
+/// each debug instruction assigns the *value* that a variable has, and every
+/// instruction where the variable is in scope uses that variable. The resulting
+/// map of instruction-to-value is then translated into a register (or spill)
+/// location for each variable over each instruction.
///
-/// This pass determines which DBG_VALUE dominates which instructions, or if
-/// none do, where values must be merged (like PHI nodes). The added
-/// complication is that because codegen has already finished, a PHI node may
-/// be needed for a variable location to be correct, but no register or spill
-/// slot merges the necessary values. In these circumstances, the variable
-/// location is dropped.
+/// The primary difference from normal SSA construction is that we cannot
+/// _create_ PHI values that contain variable values. CodeGen has already
+/// completed, and we can't alter it just to make debug-info complete. Thus:
+/// we can identify function positions where we would like a PHI value for a
+/// variable, but must search the MachineFunction to see whether such a PHI is
+/// available. If no such PHI exists, the variable location must be dropped.
///
-/// What makes this analysis non-trivial is loops: we cannot tell in advance
-/// whether a variable location is live throughout a loop, or whether its
-/// location is clobbered (or redefined by another DBG_VALUE), without
-/// exploring all the way through.
-///
-/// To make this simpler we perform two kinds of analysis. First, we identify
+/// To achieve this, we perform two kinds of analysis. First, we identify
/// every value defined by every instruction (ignoring those that only move
-/// another value), then compute a map of which values are available for each
-/// instruction. This is stronger than a reaching-def analysis, as we create
-/// PHI values where other values merge.
-///
-/// Secondly, for each variable, we effectively re-construct SSA using each
-/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the
-/// first analysis from the location they refer to. We can then compute the
-/// dominance frontiers of where a variable has a value, and create PHI nodes
-/// where they merge.
-/// This isn't precisely SSA-construction though, because the function shape
-/// is pre-defined. If a variable location requires a PHI node, but no
-/// PHI for the relevant values is present in the function (as computed by the
-/// first analysis), the location must be dropped.
-///
-/// Once both are complete, we can pass back over all instructions knowing:
-/// * What _value_ each variable should contain, either defined by an
-/// instruction or where control flow merges
-/// * What the location of that value is (if any).
-/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when
-/// a value moves location. After this pass runs, all variable locations within
-/// a block should be specified by DBG_VALUEs within that block, allowing
-/// DbgEntityHistoryCalculator to focus on individual blocks.
-///
-/// This pass is able to go fast because the size of the first
-/// reaching-definition analysis is proportional to the working-set size of
-/// the function, which the compiler tries to keep small. (It's also
-/// proportional to the number of blocks). Additionally, we repeatedly perform
-/// the second reaching-definition analysis with only the variables and blocks
-/// in a single lexical scope, exploiting their locality.
-///
-/// Determining where PHIs happen is trickier with this approach, and it comes
-/// to a head in the major problem for LiveDebugValues: is a value live-through
-/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of
-/// facts about a function, however this analysis needs to generate new value
-/// numbers at joins.
-///
-/// To do this, consider a lattice of all definition values, from instructions
-/// and from PHIs. Each PHI is characterised by the RPO number of the block it
-/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B):
-/// with non-PHI values at the top, and any PHI value in the last block (by RPO
-/// order) at the bottom.
-///
-/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below,
-/// "rank" always refers to the former).
-///
-/// At any join, for each register, we consider:
-/// * All incoming values, and
-/// * The PREVIOUS live-in value at this join.
-/// If all incoming values agree: that's the live-in value. If they do not, the
-/// incoming values are ranked according to the partial order, and the NEXT
-/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of
-/// the same rank are ignored as conflicting). If there are no candidate values,
-/// or if the rank of the live-in would be lower than the rank of the current
-/// blocks PHIs, create a new PHI value.
-///
-/// Intuitively: if it's not immediately obvious what value a join should result
-/// in, we iteratively descend from instruction-definitions down through PHI
-/// values, getting closer to the current block each time. If the current block
-/// is a loop head, this ordering is effectively searching outer levels of
-/// loops, to find a value that's live-through the current loop.
+/// another value), then re-compute an SSA-form representation of the
+/// MachineFunction, using value propagation to eliminate any un-necessary
+/// PHI values. This gives us a map of every value computed in the function,
+/// and its location within the register file / stack.
///
-/// If there is no value that's live-through this loop, a PHI is created for
-/// this location instead. We can't use a lower-ranked PHI because by definition
-/// it doesn't dominate the current block. We can't create a PHI value any
-/// earlier, because we risk creating a PHI value at a location where values do
-/// not in fact merge, thus misrepresenting the truth, and not making the true
-/// live-through value for variable locations.
+/// Secondly, for each variable we perform the same analysis, where each debug
+/// instruction is considered a def, and every instruction where the variable
+/// is in lexical scope as a use. Value propagation is used again to eliminate
+/// any un-necessary PHIs. This gives us a map of each variable to the value
+/// it should have in a block.
///
-/// This algorithm applies to both calculating the availability of values in
-/// the first analysis, and the location of variables in the second. However
-/// for the second we add an extra dimension of pain: creating a variable
-/// location PHI is only valid if, for each incoming edge,
-/// * There is a value for the variable on the incoming edge, and
-/// * All the edges have that value in the same register.
-/// Or put another way: we can only create a variable-location PHI if there is
-/// a matching machine-location PHI, each input to which is the variables value
-/// in the predecessor block.
+/// Once both are complete, we have two maps for each block:
+/// * Variables to the values they should have,
+/// * Values to the register / spill slot they are located in.
+/// After which we can marry-up variable values with a location, and emit
+/// DBG_VALUE instructions specifying those locations. Variable locations may
+/// be dropped in this process due to the desired variable value not being
+/// resident in any machine location, or because there is no PHI value in any
+/// location that accurately represents the desired value. The building of
+/// location lists for each block is left to DbgEntityHistoryCalculator.
///
-/// To accommodate this difference, each point on the lattice is split in
-/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately
-/// have a location determined are "definite" PHIs, and no further work is
-/// needed. Otherwise, a location that all non-backedge predecessors agree
-/// on is picked and propagated as a "proposed" PHI value. If that PHI value
-/// is truly live-through, it'll appear on the loop backedges on the next
-/// dataflow iteration, after which the block live-in moves to be a "definite"
-/// PHI. If it's not truly live-through, the variable value will be downgraded
-/// further as we explore the lattice, or remains "proposed" and is considered
-/// invalid once dataflow completes.
+/// This pass is kept efficient because the size of the first SSA problem
+/// is proportional to the working-set size of the function, which the compiler
+/// tries to keep small. (It's also proportional to the number of blocks).
+/// Additionally, we repeatedly perform the second SSA problem analysis with
+/// only the variables and blocks in a single lexical scope, exploiting their
+/// locality.
///
/// ### Terminology
///
@@ -128,15 +62,13 @@
/// contain the appropriate variable value. A value that is a PHI node is
/// occasionally called an mphi.
///
-/// The first dataflow problem is the "machine value location" problem,
+/// The first SSA problem is the "machine value location" problem,
/// because we're determining which machine locations contain which values.
/// The "locations" are constant: what's unknown is what value they contain.
///
-/// The second dataflow problem (the one for variables) is the "variable value
+/// The second SSA problem (the one for variables) is the "variable value
/// problem", because it's determining what values a variable has, rather than
-/// what location those values are placed in. Unfortunately, it's not that
-/// simple, because producing a PHI value always involves picking a location.
-/// This is an imperfection that we just have to accept, at least for now.
+/// what location those values are placed in.
///
/// TODO:
/// Overlapping fragments
@@ -153,9 +85,10 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -192,16 +125,18 @@
#include <cassert>
#include <cstdint>
#include <functional>
+#include <limits.h>
+#include <limits>
#include <queue>
#include <tuple>
#include <utility>
#include <vector>
-#include <limits.h>
-#include <limits>
+#include "InstrRefBasedImpl.h"
#include "LiveDebugValues.h"
using namespace llvm;
+using namespace LiveDebugValues;
// SSAUpdaterImple sets DEBUG_TYPE, change it.
#undef DEBUG_TYPE
@@ -213,730 +148,6 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
cl::desc("Act like old LiveDebugValues did"),
cl::init(false));
-namespace {
-
-// The location at which a spilled value resides. It consists of a register and
-// an offset.
-struct SpillLoc {
- unsigned SpillBase;
- StackOffset SpillOffset;
- bool operator==(const SpillLoc &Other) const {
- return std::make_pair(SpillBase, SpillOffset) ==
- std::make_pair(Other.SpillBase, Other.SpillOffset);
- }
- bool operator<(const SpillLoc &Other) const {
- return std::make_tuple(SpillBase, SpillOffset.getFixed(),
- SpillOffset.getScalable()) <
- std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
- Other.SpillOffset.getScalable());
- }
-};
-
-class LocIdx {
- unsigned Location;
-
- // Default constructor is private, initializing to an illegal location number.
- // Use only for "not an entry" elements in IndexedMaps.
- LocIdx() : Location(UINT_MAX) { }
-
-public:
- #define NUM_LOC_BITS 24
- LocIdx(unsigned L) : Location(L) {
- assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
- }
-
- static LocIdx MakeIllegalLoc() {
- return LocIdx();
- }
-
- bool isIllegal() const {
- return Location == UINT_MAX;
- }
-
- uint64_t asU64() const {
- return Location;
- }
-
- bool operator==(unsigned L) const {
- return Location == L;
- }
-
- bool operator==(const LocIdx &L) const {
- return Location == L.Location;
- }
-
- bool operator!=(unsigned L) const {
- return !(*this == L);
- }
-
- bool operator!=(const LocIdx &L) const {
- return !(*this == L);
- }
-
- bool operator<(const LocIdx &Other) const {
- return Location < Other.Location;
- }
-};
-
-class LocIdxToIndexFunctor {
-public:
- using argument_type = LocIdx;
- unsigned operator()(const LocIdx &L) const {
- return L.asU64();
- }
-};
-
-/// Unique identifier for a value defined by an instruction, as a value type.
-/// Casts back and forth to a uint64_t. Probably replacable with something less
-/// bit-constrained. Each value identifies the instruction and machine location
-/// where the value is defined, although there may be no corresponding machine
-/// operand for it (ex: regmasks clobbering values). The instructions are
-/// one-based, and definitions that are PHIs have instruction number zero.
-///
-/// The obvious limits of a 1M block function or 1M instruction blocks are
-/// problematic; but by that point we should probably have bailed out of
-/// trying to analyse the function.
-class ValueIDNum {
- uint64_t BlockNo : 20; /// The block where the def happens.
- uint64_t InstNo : 20; /// The Instruction where the def happens.
- /// One based, is distance from start of block.
- uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens.
-
-public:
- // XXX -- temporarily enabled while the live-in / live-out tables are moved
- // to something more type-y
- ValueIDNum() : BlockNo(0xFFFFF),
- InstNo(0xFFFFF),
- LocNo(0xFFFFFF) { }
-
- ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc)
- : BlockNo(Block), InstNo(Inst), LocNo(Loc) { }
-
- ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc)
- : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { }
-
- uint64_t getBlock() const { return BlockNo; }
- uint64_t getInst() const { return InstNo; }
- uint64_t getLoc() const { return LocNo; }
- bool isPHI() const { return InstNo == 0; }
-
- uint64_t asU64() const {
- uint64_t TmpBlock = BlockNo;
- uint64_t TmpInst = InstNo;
- return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo;
- }
-
- static ValueIDNum fromU64(uint64_t v) {
- uint64_t L = (v & 0x3FFF);
- return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L};
- }
-
- bool operator<(const ValueIDNum &Other) const {
- return asU64() < Other.asU64();
- }
-
- bool operator==(const ValueIDNum &Other) const {
- return std::tie(BlockNo, InstNo, LocNo) ==
- std::tie(Other.BlockNo, Other.InstNo, Other.LocNo);
- }
-
- bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
-
- std::string asString(const std::string &mlocname) const {
- return Twine("Value{bb: ")
- .concat(Twine(BlockNo).concat(
- Twine(", inst: ")
- .concat((InstNo ? Twine(InstNo) : Twine("live-in"))
- .concat(Twine(", loc: ").concat(Twine(mlocname)))
- .concat(Twine("}")))))
- .str();
- }
-
- static ValueIDNum EmptyValue;
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
-/// the the value, and Boolean of whether or not it's indirect.
-class DbgValueProperties {
-public:
- DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
- : DIExpr(DIExpr), Indirect(Indirect) {}
-
- /// Extract properties from an existing DBG_VALUE instruction.
- DbgValueProperties(const MachineInstr &MI) {
- assert(MI.isDebugValue());
- DIExpr = MI.getDebugExpression();
- Indirect = MI.getOperand(1).isImm();
- }
-
- bool operator==(const DbgValueProperties &Other) const {
- return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
- }
-
- bool operator!=(const DbgValueProperties &Other) const {
- return !(*this == Other);
- }
-
- const DIExpression *DIExpr;
- bool Indirect;
-};
-
-/// Tracker for what values are in machine locations. Listens to the Things
-/// being Done by various instructions, and maintains a table of what machine
-/// locations have what values (as defined by a ValueIDNum).
-///
-/// There are potentially a much larger number of machine locations on the
-/// target machine than the actual working-set size of the function. On x86 for
-/// example, we're extremely unlikely to want to track values through control
-/// or debug registers. To avoid doing so, MLocTracker has several layers of
-/// indirection going on, with two kinds of ``location'':
-/// * A LocID uniquely identifies a register or spill location, with a
-/// predictable value.
-/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum.
-/// Whenever a location is def'd or used by a MachineInstr, we automagically
-/// create a new LocIdx for a location, but not otherwise. This ensures we only
-/// account for locations that are actually used or defined. The cost is another
-/// vector lookup (of LocID -> LocIdx) over any other implementation. This is
-/// fairly cheap, and the compiler tries to reduce the working-set at any one
-/// time in the function anyway.
-///
-/// Register mask operands completely blow this out of the water; I've just
-/// piled hacks on top of hacks to get around that.
-class MLocTracker {
-public:
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const TargetLowering &TLI;
-
- /// IndexedMap type, mapping from LocIdx to ValueIDNum.
- using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
-
- /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
- /// packed, entries only exist for locations that are being tracked.
- LocToValueType LocIdxToIDNum;
-
- /// "Map" of machine location IDs (i.e., raw register or spill number) to the
- /// LocIdx key / number for that location. There are always at least as many
- /// as the number of registers on the target -- if the value in the register
- /// is not being tracked, then the LocIdx value will be zero. New entries are
- /// appended if a new spill slot begins being tracked.
- /// This, and the corresponding reverse map persist for the analysis of the
- /// whole function, and is necessarying for decoding various vectors of
- /// values.
- std::vector<LocIdx> LocIDToLocIdx;
-
- /// Inverse map of LocIDToLocIdx.
- IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
-
- /// Unique-ification of spill slots. Used to number them -- their LocID
- /// number is the index in SpillLocs minus one plus NumRegs.
- UniqueVector<SpillLoc> SpillLocs;
-
- // If we discover a new machine location, assign it an mphi with this
- // block number.
- unsigned CurBB;
-
- /// Cached local copy of the number of registers the target has.
- unsigned NumRegs;
-
- /// Collection of register mask operands that have been observed. Second part
- /// of pair indicates the instruction that they happened in. Used to
- /// reconstruct where defs happened if we start tracking a location later
- /// on.
- SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
-
- /// Iterator for locations and the values they contain. Dereferencing
- /// produces a struct/pair containing the LocIdx key for this location,
- /// and a reference to the value currently stored. Simplifies the process
- /// of seeking a particular location.
- class MLocIterator {
- LocToValueType &ValueMap;
- LocIdx Idx;
-
- public:
- class value_type {
- public:
- value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { }
- const LocIdx Idx; /// Read-only index of this location.
- ValueIDNum &Value; /// Reference to the stored value at this location.
- };
-
- MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
- : ValueMap(ValueMap), Idx(Idx) { }
-
- bool operator==(const MLocIterator &Other) const {
- assert(&ValueMap == &Other.ValueMap);
- return Idx == Other.Idx;
- }
-
- bool operator!=(const MLocIterator &Other) const {
- return !(*this == Other);
- }
-
- void operator++() {
- Idx = LocIdx(Idx.asU64() + 1);
- }
-
- value_type operator*() {
- return value_type(Idx, ValueMap[LocIdx(Idx)]);
- }
- };
-
- MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI, const TargetLowering &TLI)
- : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
- LocIdxToIDNum(ValueIDNum::EmptyValue),
- LocIdxToLocID(0) {
- NumRegs = TRI.getNumRegs();
- reset();
- LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
- assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
-
- // Always track SP. This avoids the implicit clobbering caused by regmasks
- // from affectings its values. (LiveDebugValues disbelieves calls and
- // regmasks that claim to clobber SP).
- Register SP = TLI.getStackPointerRegisterToSaveRestore();
- if (SP) {
- unsigned ID = getLocID(SP, false);
- (void)lookupOrTrackRegister(ID);
- }
- }
-
- /// Produce location ID number for indexing LocIDToLocIdx. Takes the register
- /// or spill number, and flag for whether it's a spill or not.
- unsigned getLocID(Register RegOrSpill, bool isSpill) {
- return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id();
- }
-
- /// Accessor for reading the value at Idx.
- ValueIDNum getNumAtPos(LocIdx Idx) const {
- assert(Idx.asU64() < LocIdxToIDNum.size());
- return LocIdxToIDNum[Idx];
- }
-
- unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
-
- /// Reset all locations to contain a PHI value at the designated block. Used
- /// sometimes for actual PHI values, othertimes to indicate the block entry
- /// value (before any more information is known).
- void setMPhis(unsigned NewCurBB) {
- CurBB = NewCurBB;
- for (auto Location : locations())
- Location.Value = {CurBB, 0, Location.Idx};
- }
-
- /// Load values for each location from array of ValueIDNums. Take current
- /// bbnum just in case we read a value from a hitherto untouched register.
- void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
- CurBB = NewCurBB;
- // Iterate over all tracked locations, and load each locations live-in
- // value into our local index.
- for (auto Location : locations())
- Location.Value = Locs[Location.Idx.asU64()];
- }
-
- /// Wipe any un-necessary location records after traversing a block.
- void reset(void) {
- // We could reset all the location values too; however either loadFromArray
- // or setMPhis should be called before this object is re-used. Just
- // clear Masks, they're definitely not needed.
- Masks.clear();
- }
-
- /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
- /// the information in this pass uninterpretable.
- void clear(void) {
- reset();
- LocIDToLocIdx.clear();
- LocIdxToLocID.clear();
- LocIdxToIDNum.clear();
- //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0
- SpillLocs = decltype(SpillLocs)();
-
- LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
- }
-
- /// Set a locaiton to a certain value.
- void setMLoc(LocIdx L, ValueIDNum Num) {
- assert(L.asU64() < LocIdxToIDNum.size());
- LocIdxToIDNum[L] = Num;
- }
-
- /// Create a LocIdx for an untracked register ID. Initialize it to either an
- /// mphi value representing a live-in, or a recent register mask clobber.
- LocIdx trackRegister(unsigned ID) {
- assert(ID != 0);
- LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
- LocIdxToIDNum.grow(NewIdx);
- LocIdxToLocID.grow(NewIdx);
-
- // Default: it's an mphi.
- ValueIDNum ValNum = {CurBB, 0, NewIdx};
- // Was this reg ever touched by a regmask?
- for (const auto &MaskPair : reverse(Masks)) {
- if (MaskPair.first->clobbersPhysReg(ID)) {
- // There was an earlier def we skipped.
- ValNum = {CurBB, MaskPair.second, NewIdx};
- break;
- }
- }
-
- LocIdxToIDNum[NewIdx] = ValNum;
- LocIdxToLocID[NewIdx] = ID;
- return NewIdx;
- }
-
- LocIdx lookupOrTrackRegister(unsigned ID) {
- LocIdx &Index = LocIDToLocIdx[ID];
- if (Index.isIllegal())
- Index = trackRegister(ID);
- return Index;
- }
-
- /// Record a definition of the specified register at the given block / inst.
- /// This doesn't take a ValueIDNum, because the definition and its location
- /// are synonymous.
- void defReg(Register R, unsigned BB, unsigned Inst) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- ValueIDNum ValueID = {BB, Inst, Idx};
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- /// Set a register to a value number. To be used if the value number is
- /// known in advance.
- void setReg(Register R, ValueIDNum ValueID) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- ValueIDNum readReg(Register R) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- return LocIdxToIDNum[Idx];
- }
-
- /// Reset a register value to zero / empty. Needed to replicate the
- /// VarLoc implementation where a copy to/from a register effectively
- /// clears the contents of the source register. (Values can only have one
- /// machine location in VarLocBasedImpl).
- void wipeRegister(Register R) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = LocIDToLocIdx[ID];
- LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
- }
-
- /// Determine the LocIdx of an existing register.
- LocIdx getRegMLoc(Register R) {
- unsigned ID = getLocID(R, false);
- return LocIDToLocIdx[ID];
- }
-
- /// Record a RegMask operand being executed. Defs any register we currently
- /// track, stores a pointer to the mask in case we have to account for it
- /// later.
- void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) {
- // Ensure SP exists, so that we don't override it later.
- Register SP = TLI.getStackPointerRegisterToSaveRestore();
-
- // Def any register we track have that isn't preserved. The regmask
- // terminates the liveness of a register, meaning its value can't be
- // relied upon -- we represent this by giving it a new value.
- for (auto Location : locations()) {
- unsigned ID = LocIdxToLocID[Location.Idx];
- // Don't clobber SP, even if the mask says it's clobbered.
- if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID))
- defReg(ID, CurBB, InstID);
- }
- Masks.push_back(std::make_pair(MO, InstID));
- }
-
- /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
- LocIdx getOrTrackSpillLoc(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0) {
- SpillID = SpillLocs.insert(L);
- unsigned L = getLocID(SpillID, true);
- LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
- LocIdxToIDNum.grow(Idx);
- LocIdxToLocID.grow(Idx);
- LocIDToLocIdx.push_back(Idx);
- LocIdxToLocID[Idx] = L;
- return Idx;
- } else {
- unsigned L = getLocID(SpillID, true);
- LocIdx Idx = LocIDToLocIdx[L];
- return Idx;
- }
- }
-
- /// Set the value stored in a spill slot.
- void setSpill(SpillLoc L, ValueIDNum ValueID) {
- LocIdx Idx = getOrTrackSpillLoc(L);
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- /// Read whatever value is in a spill slot, or None if it isn't tracked.
- Optional<ValueIDNum> readSpill(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0)
- return None;
-
- unsigned LocID = getLocID(SpillID, true);
- LocIdx Idx = LocIDToLocIdx[LocID];
- return LocIdxToIDNum[Idx];
- }
-
- /// Determine the LocIdx of a spill slot. Return None if it previously
- /// hasn't had a value assigned.
- Optional<LocIdx> getSpillMLoc(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0)
- return None;
- unsigned LocNo = getLocID(SpillID, true);
- return LocIDToLocIdx[LocNo];
- }
-
- /// Return true if Idx is a spill machine location.
- bool isSpill(LocIdx Idx) const {
- return LocIdxToLocID[Idx] >= NumRegs;
- }
-
- MLocIterator begin() {
- return MLocIterator(LocIdxToIDNum, 0);
- }
-
- MLocIterator end() {
- return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
- }
-
- /// Return a range over all locations currently tracked.
- iterator_range<MLocIterator> locations() {
- return llvm::make_range(begin(), end());
- }
-
- std::string LocIdxToName(LocIdx Idx) const {
- unsigned ID = LocIdxToLocID[Idx];
- if (ID >= NumRegs)
- return Twine("slot ").concat(Twine(ID - NumRegs)).str();
- else
- return TRI.getRegAsmName(ID).str();
- }
-
- std::string IDAsString(const ValueIDNum &Num) const {
- std::string DefName = LocIdxToName(Num.getLoc());
- return Num.asString(DefName);
- }
-
- LLVM_DUMP_METHOD
- void dump() {
- for (auto Location : locations()) {
- std::string MLocName = LocIdxToName(Location.Value.getLoc());
- std::string DefName = Location.Value.asString(MLocName);
- dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
- }
- }
-
- LLVM_DUMP_METHOD
- void dump_mloc_map() {
- for (auto Location : locations()) {
- std::string foo = LocIdxToName(Location.Idx);
- dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
- }
- }
-
- /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
- /// information in \pProperties, for variable Var. Don't insert it anywhere,
- /// just return the builder for it.
- MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
- const DbgValueProperties &Properties) {
- DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
- Var.getVariable()->getScope(),
- const_cast<DILocation *>(Var.getInlinedAt()));
- auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
-
- const DIExpression *Expr = Properties.DIExpr;
- if (!MLoc) {
- // No location -> DBG_VALUE $noreg
- MIB.addReg(0, RegState::Debug);
- MIB.addReg(0, RegState::Debug);
- } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
- unsigned LocID = LocIdxToLocID[*MLoc];
- const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1];
-
- auto *TRI = MF.getSubtarget().getRegisterInfo();
- Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset,
- Spill.SpillOffset);
- unsigned Base = Spill.SpillBase;
- MIB.addReg(Base, RegState::Debug);
- MIB.addImm(0);
- } else {
- unsigned LocID = LocIdxToLocID[*MLoc];
- MIB.addReg(LocID, RegState::Debug);
- if (Properties.Indirect)
- MIB.addImm(0);
- else
- MIB.addReg(0, RegState::Debug);
- }
-
- MIB.addMetadata(Var.getVariable());
- MIB.addMetadata(Expr);
- return MIB;
- }
-};
-
-/// Class recording the (high level) _value_ of a variable. Identifies either
-/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
-/// This class also stores meta-information about how the value is qualified.
-/// Used to reason about variable values when performing the second
-/// (DebugVariable specific) dataflow analysis.
-class DbgValue {
-public:
- union {
- /// If Kind is Def, the value number that this value is based on.
- ValueIDNum ID;
- /// If Kind is Const, the MachineOperand defining this value.
- MachineOperand MO;
- /// For a NoVal DbgValue, which block it was generated in.
- unsigned BlockNo;
- };
- /// Qualifiers for the ValueIDNum above.
- DbgValueProperties Properties;
-
- typedef enum {
- Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
- Def, // This value is defined by an inst, or is a PHI value.
- Const, // A constant value contained in the MachineOperand field.
- Proposed, // This is a tentative PHI value, which may be confirmed or
- // invalidated later.
- NoVal // Empty DbgValue, generated during dataflow. BlockNo stores
- // which block this was generated in.
- } KindT;
- /// Discriminator for whether this is a constant or an in-program value.
- KindT Kind;
-
- DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
- : ID(Val), Properties(Prop), Kind(Kind) {
- assert(Kind == Def || Kind == Proposed);
- }
-
- DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
- : BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
- assert(Kind == NoVal);
- }
-
- DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
- : MO(MO), Properties(Prop), Kind(Kind) {
- assert(Kind == Const);
- }
-
- DbgValue(const DbgValueProperties &Prop, KindT Kind)
- : Properties(Prop), Kind(Kind) {
- assert(Kind == Undef &&
- "Empty DbgValue constructor must pass in Undef kind");
- }
-
- void dump(const MLocTracker *MTrack) const {
- if (Kind == Const) {
- MO.dump();
- } else if (Kind == NoVal) {
- dbgs() << "NoVal(" << BlockNo << ")";
- } else if (Kind == Proposed) {
- dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")";
- } else {
- assert(Kind == Def);
- dbgs() << MTrack->IDAsString(ID);
- }
- if (Properties.Indirect)
- dbgs() << " indir";
- if (Properties.DIExpr)
- dbgs() << " " << *Properties.DIExpr;
- }
-
- bool operator==(const DbgValue &Other) const {
- if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
- return false;
- else if (Kind == Proposed && ID != Other.ID)
- return false;
- else if (Kind == Def && ID != Other.ID)
- return false;
- else if (Kind == NoVal && BlockNo != Other.BlockNo)
- return false;
- else if (Kind == Const)
- return MO.isIdenticalTo(Other.MO);
-
- return true;
- }
-
- bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
-};
-
-/// Types for recording sets of variable fragments that overlap. For a given
-/// local variable, we record all other fragments of that variable that could
-/// overlap it, to reduce search time.
-using FragmentOfVar =
- std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
-using OverlapMap =
- DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
-
-/// Collection of DBG_VALUEs observed when traversing a block. Records each
-/// variable and the value the DBG_VALUE refers to. Requires the machine value
-/// location dataflow algorithm to have run already, so that values can be
-/// identified.
-class VLocTracker {
-public:
- /// Map DebugVariable to the latest Value it's defined to have.
- /// Needs to be a MapVector because we determine order-in-the-input-MIR from
- /// the order in this container.
- /// We only retain the last DbgValue in each block for each variable, to
- /// determine the blocks live-out variable value. The Vars container forms the
- /// transfer function for this block, as part of the dataflow analysis. The
- /// movement of values between locations inside of a block is handled at a
- /// much later stage, in the TransferTracker class.
- MapVector<DebugVariable, DbgValue> Vars;
- DenseMap<DebugVariable, const DILocation *> Scopes;
- MachineBasicBlock *MBB;
-
-public:
- VLocTracker() {}
-
- void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
- Optional<ValueIDNum> ID) {
- assert(MI.isDebugValue() || MI.isDebugRef());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
- : DbgValue(Properties, DbgValue::Undef);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
- }
-
- void defVar(const MachineInstr &MI, const MachineOperand &MO) {
- // Only DBG_VALUEs can define constant-valued variables.
- assert(MI.isDebugValue());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValueProperties Properties(MI);
- DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
- }
-};
-
/// Tracker for converting machine value locations and variable values into
/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
/// specifying block live-in locations and transfers within blocks.
@@ -985,12 +196,12 @@ public:
/// between TransferTrackers view of variable locations and MLocTrackers. For
/// example, MLocTracker observes all clobbers, but TransferTracker lazily
/// does not.
- std::vector<ValueIDNum> VarLocs;
+ SmallVector<ValueIDNum, 32> VarLocs;
/// Map from LocIdxes to which DebugVariables are based that location.
/// Mantained while stepping through the block. Not accurate if
/// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
- std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+ DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
/// Map from DebugVariable to it's current location and qualifying meta
/// information. To be used in conjunction with ActiveMLocs to construct
@@ -1062,6 +273,8 @@ public:
// Map of the preferred location for each value.
std::map<ValueIDNum, LocIdx> ValueToLoc;
+ ActiveMLocs.reserve(VLocs.size());
+ ActiveVLocs.reserve(VLocs.size());
// Produce a map of value numbers to the current machine locs they live
// in. When emulating VarLocBasedImpl, there should only be one
@@ -1088,7 +301,7 @@ public:
for (auto Var : VLocs) {
if (Var.second.Kind == DbgValue::Const) {
PendingDbgValues.push_back(
- emitMOLoc(Var.second.MO, Var.first, Var.second.Properties));
+ emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties));
continue;
}
@@ -1142,7 +355,7 @@ public:
// instruction or similar with an instruction number, where it doesn't
// actually define a new value, instead it moves a value. In case this
// happens, discard.
- if (MTracker->LocIdxToIDNum[L] != Use.ID)
+ if (MTracker->readMLoc(L) != Use.ID)
continue;
// If a different debug instruction defined the variable value / location
@@ -1220,7 +433,6 @@ public:
DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue);
Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
MachineOperand MO = MachineOperand::CreateReg(Reg, false);
- MO.setIsDebug(true);
PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect}));
return true;
@@ -1274,12 +486,12 @@ public:
// Check whether our local copy of values-by-location in #VarLocs is out of
// date. Wipe old tracking data for the location if it's been clobbered in
// the meantime.
- if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
for (auto &P : ActiveMLocs[NewLoc]) {
ActiveVLocs.erase(P);
}
ActiveMLocs[NewLoc.asU64()].clear();
- VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc);
+ VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
}
ActiveMLocs[NewLoc].insert(Var);
@@ -1358,6 +570,8 @@ public:
flushDbgValues(Pos, nullptr);
+ // Re-find ActiveMLocIt, iterator could have been invalidated.
+ ActiveMLocIt = ActiveMLocs.find(MLoc);
ActiveMLocIt->second.clear();
}
@@ -1367,21 +581,23 @@ public:
void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {
// Does Src still contain the value num we expect? If not, it's been
// clobbered in the meantime, and our variable locations are stale.
- if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src))
+ if (VarLocs[Src.asU64()] != MTracker->readMLoc(Src))
return;
// assert(ActiveMLocs[Dst].size() == 0);
//^^^ Legitimate scenario on account of un-clobbered slot being assigned to?
- ActiveMLocs[Dst] = ActiveMLocs[Src];
+
+ // Move set of active variables from one location to another.
+ auto MovingVars = ActiveMLocs[Src];
+ ActiveMLocs[Dst] = MovingVars;
VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
// For each variable based on Src; create a location at Dst.
- for (auto &Var : ActiveMLocs[Src]) {
+ for (auto &Var : MovingVars) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
assert(ActiveVLocIt != ActiveVLocs.end());
ActiveVLocIt->second.Loc = Dst;
- assert(Dst != 0);
MachineInstr *MI =
MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);
PendingDbgValues.push_back(MI);
@@ -1413,306 +629,245 @@ public:
}
};
-class InstrRefBasedLDV : public LDVImpl {
-private:
- using FragmentInfo = DIExpression::FragmentInfo;
- using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
-
- // Helper while building OverlapMap, a map of all fragments seen for a given
- // DILocalVariable.
- using VarToFragments =
- DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
-
- /// Machine location/value transfer function, a mapping of which locations
- /// are assigned which new values.
- using MLocTransferMap = std::map<LocIdx, ValueIDNum>;
-
- /// Live in/out structure for the variable values: a per-block map of
- /// variables to their values. XXX, better name?
- using LiveIdxT =
- DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>;
-
- using VarAndLoc = std::pair<DebugVariable, DbgValue>;
-
- /// Type for a live-in value: the predecessor block, and its value.
- using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
-
- /// Vector (per block) of a collection (inner smallvector) of live-ins.
- /// Used as the result type for the variable value dataflow problem.
- using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
-
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const TargetFrameLowering *TFI;
- const MachineFrameInfo *MFI;
- BitVector CalleeSavedRegs;
- LexicalScopes LS;
- TargetPassConfig *TPC;
-
- /// Object to track machine locations as we step through a block. Could
- /// probably be a field rather than a pointer, as it's always used.
- MLocTracker *MTracker;
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
- /// Number of the current block LiveDebugValues is stepping through.
- unsigned CurBB;
+ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1};
- /// Number of the current instruction LiveDebugValues is evaluating.
- unsigned CurInst;
+#ifndef NDEBUG
+void DbgValue::dump(const MLocTracker *MTrack) const {
+ if (Kind == Const) {
+ MO->dump();
+ } else if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == VPHI) {
+ dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")";
+ } else {
+ assert(Kind == Def);
+ dbgs() << MTrack->IDAsString(ID);
+ }
+ if (Properties.Indirect)
+ dbgs() << " indir";
+ if (Properties.DIExpr)
+ dbgs() << " " << *Properties.DIExpr;
+}
+#endif
- /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
- /// steps through a block. Reads the values at each location from the
- /// MLocTracker object.
- VLocTracker *VTracker;
+MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const TargetLowering &TLI)
+ : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
+ LocIdxToIDNum(ValueIDNum::EmptyValue), LocIdxToLocID(0) {
+ NumRegs = TRI.getNumRegs();
+ reset();
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
+
+ // Always track SP. This avoids the implicit clobbering caused by regmasks
+ // from affectings its values. (LiveDebugValues disbelieves calls and
+ // regmasks that claim to clobber SP).
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (SP) {
+ unsigned ID = getLocID(SP);
+ (void)lookupOrTrackRegister(ID);
+
+ for (MCRegAliasIterator RAI(SP, &TRI, true); RAI.isValid(); ++RAI)
+ SPAliases.insert(*RAI);
+ }
+
+ // Build some common stack positions -- full registers being spilt to the
+ // stack.
+ StackSlotIdxes.insert({{8, 0}, 0});
+ StackSlotIdxes.insert({{16, 0}, 1});
+ StackSlotIdxes.insert({{32, 0}, 2});
+ StackSlotIdxes.insert({{64, 0}, 3});
+ StackSlotIdxes.insert({{128, 0}, 4});
+ StackSlotIdxes.insert({{256, 0}, 5});
+ StackSlotIdxes.insert({{512, 0}, 6});
+
+ // Traverse all the subregister idxes, and ensure there's an index for them.
+ // Duplicates are no problem: we're interested in their position in the
+ // stack slot, we don't want to type the slot.
+ for (unsigned int I = 1; I < TRI.getNumSubRegIndices(); ++I) {
+ unsigned Size = TRI.getSubRegIdxSize(I);
+ unsigned Offs = TRI.getSubRegIdxOffset(I);
+ unsigned Idx = StackSlotIdxes.size();
+
+ // Some subregs have -1, -2 and so forth fed into their fields, to mean
+ // special backend things. Ignore those.
+ if (Size > 60000 || Offs > 60000)
+ continue;
- /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
- /// between locations during stepping, creates new DBG_VALUEs when values move
- /// location.
- TransferTracker *TTracker;
+ StackSlotIdxes.insert({{Size, Offs}, Idx});
+ }
- /// Blocks which are artificial, i.e. blocks which exclusively contain
- /// instructions without DebugLocs, or with line 0 locations.
- SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+ for (auto &Idx : StackSlotIdxes)
+ StackIdxesToPos[Idx.second] = Idx.first;
- // Mapping of blocks to and from their RPOT order.
- DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
- DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
- DenseMap<unsigned, unsigned> BBNumToRPO;
+ NumSlotIdxes = StackSlotIdxes.size();
+}
- /// Pair of MachineInstr, and its 1-based offset into the containing block.
- using InstAndNum = std::pair<const MachineInstr *, unsigned>;
- /// Map from debug instruction number to the MachineInstr labelled with that
- /// number, and its location within the function. Used to transform
- /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
- std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+LocIdx MLocTracker::trackRegister(unsigned ID) {
+ assert(ID != 0);
+ LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
+ LocIdxToIDNum.grow(NewIdx);
+ LocIdxToLocID.grow(NewIdx);
+
+ // Default: it's an mphi.
+ ValueIDNum ValNum = {CurBB, 0, NewIdx};
+ // Was this reg ever touched by a regmask?
+ for (const auto &MaskPair : reverse(Masks)) {
+ if (MaskPair.first->clobbersPhysReg(ID)) {
+ // There was an earlier def we skipped.
+ ValNum = {CurBB, MaskPair.second, NewIdx};
+ break;
+ }
+ }
- /// Record of where we observed a DBG_PHI instruction.
- class DebugPHIRecord {
- public:
- uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
- MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
- ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
- LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+ LocIdxToIDNum[NewIdx] = ValNum;
+ LocIdxToLocID[NewIdx] = ID;
+ return NewIdx;
+}
- operator unsigned() const { return InstrNum; }
- };
+void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB,
+ unsigned InstID) {
+ // Def any register we track have that isn't preserved. The regmask
+ // terminates the liveness of a register, meaning its value can't be
+ // relied upon -- we represent this by giving it a new value.
+ for (auto Location : locations()) {
+ unsigned ID = LocIdxToLocID[Location.Idx];
+ // Don't clobber SP, even if the mask says it's clobbered.
+ if (ID < NumRegs && !SPAliases.count(ID) && MO->clobbersPhysReg(ID))
+ defReg(ID, CurBB, InstID);
+ }
+ Masks.push_back(std::make_pair(MO, InstID));
+}
- /// Map from instruction numbers defined by DBG_PHIs to a record of what that
- /// DBG_PHI read and where. Populated and edited during the machine value
- /// location problem -- we use LLVMs SSA Updater to fix changes by
- /// optimizations that destroy PHI instructions.
- SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
-
- // Map of overlapping variable fragments.
- OverlapMap OverlapFragments;
- VarToFragments SeenFragments;
-
- /// Tests whether this instruction is a spill to a stack slot.
- bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
-
- /// Decide if @MI is a spill instruction and return true if it is. We use 2
- /// criteria to make this decision:
- /// - Is this instruction a store to a spill slot?
- /// - Is there a register operand that is both used and killed?
- /// TODO: Store optimization can fold spills into other stores (including
- /// other spills). We do not handle this yet (more than one memory operand).
- bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
- unsigned &Reg);
-
- /// If a given instruction is identified as a spill, return the spill slot
- /// and set \p Reg to the spilled register.
- Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg);
-
- /// Given a spill instruction, extract the register and offset used to
- /// address the spill slot in a target independent way.
- SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
-
- /// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
- ValueIDNum **MLiveIns = nullptr);
-
- /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferDebugValue(const MachineInstr &MI);
-
- /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns);
-
- /// Stores value-information about where this PHI occurred, and what
- /// instruction number is associated with it.
- /// \returns true if MI was recognized and processed.
- bool transferDebugPHI(MachineInstr &MI);
-
- /// Examines whether \p MI is copy instruction, and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferRegisterCopy(MachineInstr &MI);
-
- /// Examines whether \p MI is stack spill or restore instruction, and
- /// notifies trackers. \returns true if MI was recognized and processed.
- bool transferSpillOrRestoreInst(MachineInstr &MI);
-
- /// Examines \p MI for any registers that it defines, and notifies trackers.
- void transferRegisterDef(MachineInstr &MI);
-
- /// Copy one location to the other, accounting for movement of subregisters
- /// too.
- void performCopy(Register Src, Register Dst);
-
- void accumulateFragmentMap(MachineInstr &MI);
-
- /// Determine the machine value number referred to by (potentially several)
- /// DBG_PHI instructions. Block duplication and tail folding can duplicate
- /// DBG_PHIs, shifting the position where values in registers merge, and
- /// forming another mini-ssa problem to solve.
- /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
- /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
- /// \returns The machine value number at position Here, or None.
- Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns, MachineInstr &Here,
- uint64_t InstrNum);
-
- /// Step through the function, recording register definitions and movements
- /// in an MLocTracker. Convert the observations into a per-block transfer
- /// function in \p MLocTransfer, suitable for using with the machine value
- /// location dataflow problem.
- void
- produceMLocTransferFunction(MachineFunction &MF,
- SmallVectorImpl<MLocTransferMap> &MLocTransfer,
- unsigned MaxNumBlocks);
-
- /// Solve the machine value location dataflow problem. Takes as input the
- /// transfer functions in \p MLocTransfer. Writes the output live-in and
- /// live-out arrays to the (initialized to zero) multidimensional arrays in
- /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
- /// number, the inner by LocIdx.
- void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
- SmallVectorImpl<MLocTransferMap> &MLocTransfer);
-
- /// Perform a control flow join (lattice value meet) of the values in machine
- /// locations at \p MBB. Follows the algorithm described in the file-comment,
- /// reading live-outs of predecessors from \p OutLocs, the current live ins
- /// from \p InLocs, and assigning the newly computed live ins back into
- /// \p InLocs. \returns two bools -- the first indicates whether a change
- /// was made, the second whether a lattice downgrade occurred. If the latter
- /// is true, revisiting this block is necessary.
- std::tuple<bool, bool>
- mlocJoin(MachineBasicBlock &MBB,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs);
-
- /// Solve the variable value dataflow problem, for a single lexical scope.
- /// Uses the algorithm from the file comment to resolve control flow joins,
- /// although there are extra hacks, see vlocJoin. Reads the
- /// locations of values from the \p MInLocs and \p MOutLocs arrays (see
- /// mlocDataflow) and reads the variable values transfer function from
- /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally,
- /// with the live-ins permanently stored to \p Output once the fixedpoint is
- /// reached.
- /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
- /// that we should be tracking.
- /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but
- /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations
- /// through.
- void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
- SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- SmallVectorImpl<VLocTracker> &AllTheVLocs);
-
- /// Compute the live-ins to a block, considering control flow merges according
- /// to the method in the file comment. Live out and live in variable values
- /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB
- /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins
- /// are modified.
- /// \p InLocsT Output argument, storage for calculated live-ins.
- /// \returns two bools -- the first indicates whether a change
- /// was made, the second whether a lattice downgrade occurred. If the latter
- /// is true, revisiting this block is necessary.
- std::tuple<bool, bool>
- vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
- SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited,
- unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
- SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
- DenseMap<DebugVariable, DbgValue> &InLocsT);
-
- /// Continue exploration of the variable-value lattice, as explained in the
- /// file-level comment. \p OldLiveInLocation contains the current
- /// exploration position, from which we need to descend further. \p Values
- /// contains the set of live-in values, \p CurBlockRPONum the RPO number of
- /// the current block, and \p CandidateLocations a set of locations that
- /// should be considered as PHI locations, if we reach the bottom of the
- /// lattice. \returns true if we should downgrade; the value is the agreeing
- /// value number in a non-backedge predecessor.
- bool vlocDowngradeLattice(const MachineBasicBlock &MBB,
- const DbgValue &OldLiveInLocation,
- const SmallVectorImpl<InValueT> &Values,
- unsigned CurBlockRPONum);
-
- /// For the given block and live-outs feeding into it, try to find a
- /// machine location where they all join. If a solution for all predecessors
- /// can't be found, a location where all non-backedge-predecessors join
- /// will be returned instead. While this method finds a join location, this
- /// says nothing as to whether it should be used.
- /// \returns Pair of value ID if found, and true when the correct value
- /// is available on all predecessor edges, or false if it's only available
- /// for non-backedge predecessors.
- std::tuple<Optional<ValueIDNum>, bool>
- pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- const SmallVectorImpl<MachineBasicBlock *> &BlockOrders);
-
- /// Given the solutions to the two dataflow problems, machine value locations
- /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
- /// TransferTracker class over the function to produce live-in and transfer
- /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
- /// order given by AllVarsNumbering -- this could be any stable order, but
- /// right now "order of appearence in function, when explored in RPO", so
- /// that we can compare explictly against VarLocBasedImpl.
- void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
- const TargetPassConfig &TPC);
-
- /// Boilerplate computation of some initial sets, artifical blocks and
- /// RPOT block ordering.
- void initialSetup(MachineFunction &MF);
-
- bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
+ SpillLocationNo SpillID(SpillLocs.idFor(L));
+ if (SpillID.id() == 0) {
+ // Spill location is untracked: create record for this one, and all
+ // subregister slots too.
+ SpillID = SpillLocationNo(SpillLocs.insert(L));
+ for (unsigned StackIdx = 0; StackIdx < NumSlotIdxes; ++StackIdx) {
+ unsigned L = getSpillIDWithIdx(SpillID, StackIdx);
+ LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
+ LocIdxToIDNum.grow(Idx);
+ LocIdxToLocID.grow(Idx);
+ LocIDToLocIdx.push_back(Idx);
+ LocIdxToLocID[Idx] = L;
+ // Initialize to PHI value; corresponds to the location's live-in value
+ // during transfer function construction.
+ LocIdxToIDNum[Idx] = ValueIDNum(CurBB, 0, Idx);
+ }
+ }
+ return SpillID;
+}
-public:
- /// Default construct and initialize the pass.
- InstrRefBasedLDV();
+std::string MLocTracker::LocIdxToName(LocIdx Idx) const {
+ unsigned ID = LocIdxToLocID[Idx];
+ if (ID >= NumRegs) {
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ ID -= NumRegs;
+ unsigned Slot = ID / NumSlotIdxes;
+ return Twine("slot ")
+ .concat(Twine(Slot).concat(Twine(" sz ").concat(Twine(Pos.first)
+ .concat(Twine(" offs ").concat(Twine(Pos.second))))))
+ .str();
+ } else {
+ return TRI.getRegAsmName(ID).str();
+ }
+}
- LLVM_DUMP_METHOD
- void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+std::string MLocTracker::IDAsString(const ValueIDNum &Num) const {
+ std::string DefName = LocIdxToName(Num.getLoc());
+ return Num.asString(DefName);
+}
- bool isCalleeSaved(LocIdx L) {
- unsigned Reg = MTracker->LocIdxToLocID[L];
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- if (CalleeSavedRegs.test(*RAI))
- return true;
- return false;
+#ifndef NDEBUG
+LLVM_DUMP_METHOD void MLocTracker::dump() {
+ for (auto Location : locations()) {
+ std::string MLocName = LocIdxToName(Location.Value.getLoc());
+ std::string DefName = Location.Value.asString(MLocName);
+ dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
}
-};
+}
-} // end anonymous namespace
+LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() {
+ for (auto Location : locations()) {
+ std::string foo = LocIdxToName(Location.Idx);
+ dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
+ }
+}
+#endif
-//===----------------------------------------------------------------------===//
-// Implementation
-//===----------------------------------------------------------------------===//
+MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
+
+ const DIExpression *Expr = Properties.DIExpr;
+ if (!MLoc) {
+ // No location -> DBG_VALUE $noreg
+ MIB.addReg(0);
+ MIB.addReg(0);
+ } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ SpillLocationNo SpillID = locIDToSpill(LocID);
+ StackSlotPos StackIdx = locIDToSpillIdx(LocID);
+ unsigned short Offset = StackIdx.second;
+
+ // TODO: support variables that are located in spill slots, with non-zero
+ // offsets from the start of the spill slot. It would require some more
+ // complex DIExpression calculations. This doesn't seem to be produced by
+ // LLVM right now, so don't try and support it.
+ // Accept no-subregister slots and subregisters where the offset is zero.
+ // The consumer should already have type information to work out how large
+ // the variable is.
+ if (Offset == 0) {
+ const SpillLoc &Spill = SpillLocs[SpillID.id()];
+ Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ unsigned Base = Spill.SpillBase;
+ MIB.addReg(Base);
+ MIB.addImm(0);
+ } else {
+ // This is a stack location with a weird subregister offset: emit an undef
+ // DBG_VALUE instead.
+ MIB.addReg(0);
+ MIB.addReg(0);
+ }
+ } else {
+ // Non-empty, non-stack slot, must be a plain register.
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ MIB.addReg(LocID);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0);
+ }
-ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Expr);
+ return MIB;
+}
/// Default construct and initialize the pass.
InstrRefBasedLDV::InstrRefBasedLDV() {}
+bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -1722,7 +877,7 @@ InstrRefBasedLDV::InstrRefBasedLDV() {}
// void InstrRefBasedLDV::printVarLocInMBB(..)
#endif
-SpillLoc
+SpillLocationNo
InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
@@ -1734,7 +889,28 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
const MachineBasicBlock *MBB = MI.getParent();
Register Reg;
StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
- return {Reg, Offset};
+ return MTracker->getOrTrackSpillLoc({Reg, Offset});
+}
+
+Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
+ SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI);
+
+ // Where in the stack slot is this value defined -- i.e., what size of value
+ // is this? An important question, because it could be loaded into a register
+ // from the stack at some point. Happily the memory operand will tell us
+ // the size written to the stack.
+ auto *MemOperand = *MI.memoperands_begin();
+ unsigned SizeInBits = MemOperand->getSizeInBits();
+
+ // Find that position in the stack indexes we're tracking.
+ auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0});
+ if (IdxIt == MTracker->StackSlotIdxes.end())
+ // That index is not tracked. This is suprising, and unlikely to ever
+ // occur, but the safe action is to indicate the variable is optimised out.
+ return None;
+
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second);
+ return MTracker->getSpillMLoc(SpillID);
}
/// End all previous ranges related to @MI and start a new range from @MI
@@ -1759,6 +935,17 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
if (Scope == nullptr)
return true; // handled it; by doing nothing
+ // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to
+ // contribute to locations in this block, but don't propagate further.
+ // Interpret it like a DBG_VALUE $noreg.
+ if (MI.isDebugValueList()) {
+ if (VTracker)
+ VTracker->defVar(MI, Properties, None);
+ if (TTracker)
+ TTracker->redefVar(MI, Properties, None);
+ return true;
+ }
+
const MachineOperand &MO = MI.getOperand(0);
// MLocTracker needs to know that this register is read, even if it's only
@@ -1852,16 +1039,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
const MachineInstr &TargetInstr = *InstrIt->second.first;
uint64_t BlockNo = TargetInstr.getParent()->getNumber();
- // Pick out the designated operand.
- assert(OpNo < TargetInstr.getNumOperands());
- const MachineOperand &MO = TargetInstr.getOperand(OpNo);
-
- // Today, this can only be a register.
- assert(MO.isReg() && MO.isDef());
-
- unsigned LocID = MTracker->getLocID(MO.getReg(), false);
- LocIdx L = MTracker->LocIDToLocIdx[LocID];
- NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ // Pick out the designated operand. It might be a memory reference, if
+ // a register def was folded into a stack store.
+ if (OpNo == MachineFunction::DebugOperandMemNumber &&
+ TargetInstr.hasOneMemOperand()) {
+ Optional<LocIdx> L = findLocationForMemOperand(TargetInstr);
+ if (L)
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
+ } else if (OpNo != MachineFunction::DebugOperandMemNumber) {
+ assert(OpNo < TargetInstr.getNumOperands());
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ // Today, this can only be a register.
+ assert(MO.isReg() && MO.isDef());
+
+ unsigned LocID = MTracker->getLocID(MO.getReg());
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+ // else: NewID is left as None.
} else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
// It's actually a PHI value. Which value it is might not be obvious, use
// the resolver helper to find out.
@@ -1957,7 +1153,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
Optional<LocIdx> FoundLoc = None;
for (auto Location : MTracker->locations()) {
LocIdx CurL = Location.Idx;
- ValueIDNum ID = MTracker->LocIdxToIDNum[CurL];
+ ValueIDNum ID = MTracker->readMLoc(CurL);
if (NewID && ID == NewID) {
// If this is the first location with that value, pick it. Otherwise,
// consider whether it's a "longer term" location.
@@ -2016,6 +1212,10 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
auto PHIRec = DebugPHIRecord(
{InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)});
DebugPHINumToValue.push_back(PHIRec);
+
+ // Ensure this register is tracked.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ MTracker->lookupOrTrackRegister(*RAI);
} else {
// The value is whatever's in this stack slot.
assert(MO.isFI());
@@ -2026,19 +1226,46 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
if (MFI->isDeadObjectIndex(FI))
return true;
- // Identify this spill slot.
+ // Identify this spill slot, ensure it's tracked.
Register Base;
StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
SpillLoc SL = {Base, Offs};
- Optional<ValueIDNum> Num = MTracker->readSpill(SL);
+ SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL);
+
+ // Problem: what value should we extract from the stack? LLVM does not
+ // record what size the last store to the slot was, and it would become
+ // sketchy after stack slot colouring anyway. Take a look at what values
+ // are stored on the stack, and pick the largest one that wasn't def'd
+ // by a spill (i.e., the value most likely to have been def'd in a register
+ // and then spilt.
+ std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
+ Optional<ValueIDNum> Result = None;
+ Optional<LocIdx> SpillLoc = None;
+ for (unsigned int I = 0; I < CandidateSizes.size(); ++I) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0});
+ SpillLoc = MTracker->getSpillMLoc(SpillID);
+ ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
+ // If this value was defined in it's own position, then it was probably
+ // an aliasing index of a small value that was spilt.
+ if (Val.getLoc() != SpillLoc->asU64()) {
+ Result = Val;
+ break;
+ }
+ }
- if (!Num)
- // Nothing ever writes to this slot. Curious, but nothing we can do.
- return true;
+ // If we didn't find anything, we're probably looking at a PHI, or a memory
+ // store folded into an instruction. FIXME: Take a guess that's it's 64
+ // bits. This isn't ideal, but tracking the size that the spill is
+ // "supposed" to be is more complex, and benefits a small number of
+ // locations.
+ if (!Result) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0});
+ SpillLoc = MTracker->getSpillMLoc(SpillID);
+ Result = MTracker->readMLoc(*SpillLoc);
+ }
// Record this DBG_PHI for later analysis.
- auto DbgPHI = DebugPHIRecord(
- {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)});
+ auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc});
DebugPHINumToValue.push_back(DbgPHI);
}
@@ -2061,10 +1288,6 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
} else if (MI.isMetaInstruction())
return;
- MachineFunction *MF = MI.getMF();
- const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- Register SP = TLI->getStackPointerRegisterToSaveRestore();
-
// Find the regs killed by MI, and find regmasks of preserved regs.
// Max out the number of statically allocated elements in `DeadRegs`, as this
// prevents fallback to std::set::count() operations.
@@ -2075,7 +1298,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
- !(MI.isCall() && MO.getReg() == SP)) {
+ !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
@@ -2093,6 +1316,16 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
for (auto *MO : RegMaskPtrs)
MTracker->writeRegMask(MO, CurBB, CurInst);
+ // If this instruction writes to a spill slot, def that slot.
+ if (hasFoldedStackStore(MI)) {
+ SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L));
+ }
+ }
+
if (!TTracker)
return;
@@ -2118,32 +1351,27 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
}
+
+ // Tell TTracker about any folded stack store.
+ if (hasFoldedStackStore(MI)) {
+ SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ TTracker->clobberMloc(L, MI.getIterator(), true);
+ }
+ }
}
void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
- ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
+ // In all circumstances, re-def all aliases. It's definitely a new value now.
+ for (MCRegAliasIterator RAI(DstRegNum, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+ ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
MTracker->setReg(DstRegNum, SrcValue);
- // In all circumstances, re-def the super registers. It's definitely a new
- // value now. This doesn't uniquely identify the composition of subregs, for
- // example, two identical values in subregisters composed in different
- // places would not get equal value numbers.
- for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI)
- MTracker->defReg(*SRI, CurBB, CurInst);
-
- // If we're emulating VarLocBasedImpl, just define all the subregisters.
- // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not
- // through prior copies.
- if (EmulateOldLDV) {
- for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI)
- MTracker->defReg(DRI.getSubReg(), CurBB, CurInst);
- return;
- }
-
- // Otherwise, actually copy subregisters from one location to another.
- // XXX: in addition, any subregisters of DstRegNum that don't line up with
- // the source register should be def'd.
+ // Copy subregisters from one location to another.
for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {
unsigned SrcSubReg = SRI.getSubReg();
unsigned SubRegIdx = SRI.getSubRegIndex();
@@ -2154,15 +1382,13 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
// Do copy. There are two matching subregisters, the source value should
// have been def'd when the super-reg was, the latter might not be tracked
// yet.
- // This will force SrcSubReg to be tracked, if it isn't yet.
- (void)MTracker->readReg(SrcSubReg);
- LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg);
- assert(SrcL.asU64());
- (void)MTracker->readReg(DstSubReg);
- LocIdx DstL = MTracker->getRegMLoc(DstSubReg);
- assert(DstL.asU64());
+ // This will force SrcSubReg to be tracked, if it isn't yet. Will read
+ // mphi values if it wasn't tracked.
+ LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg);
+ LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg);
+ (void)SrcL;
(void)DstL;
- ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL};
+ ValueIDNum CpyValue = MTracker->readReg(SrcSubReg);
MTracker->setReg(DstSubReg, CpyValue);
}
@@ -2174,6 +1400,12 @@ bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
if (!MI.hasOneMemOperand())
return false;
+ // Reject any memory operand that's aliased -- we can't guarantee its value.
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ if (PVal->isAliased(MFI))
+ return false;
+
if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
return false; // This is not a spill instruction, since no valid size was
// returned from either function.
@@ -2191,7 +1423,7 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
return Reg != 0;
}
-Optional<SpillLoc>
+Optional<SpillLocationNo>
InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
if (!MI.hasOneMemOperand())
@@ -2213,84 +1445,117 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
if (EmulateOldLDV)
return false;
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int DummyFI = -1;
+ if (!TII->isStoreToStackSlotPostFE(MI, DummyFI) &&
+ !TII->isLoadFromStackSlotPostFE(MI, DummyFI))
+ return false;
+
MachineFunction *MF = MI.getMF();
unsigned Reg;
- Optional<SpillLoc> Loc;
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int FIDummy;
+ if (!TII->isStoreToStackSlotPostFE(MI, FIDummy) &&
+ !TII->isLoadFromStackSlotPostFE(MI, FIDummy))
+ return false;
+
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, terminate that variable location. The value in memory
// will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
if (isSpillInstruction(MI, MF)) {
- Loc = extractSpillBaseRegAndOffset(MI);
-
- if (TTracker) {
- Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc);
- if (MLoc) {
- // Un-set this location before clobbering, so that we don't salvage
- // the variable location back to the same place.
- MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue);
+ SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
+
+ // Un-set this location and clobber, so that earlier locations don't
+ // continue past this store.
+ for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx);
+ Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
+ if (!MLoc)
+ continue;
+
+ // We need to over-write the stack slot with something (here, a def at
+ // this instruction) to ensure no values are preserved in this stack slot
+ // after the spill. It also prevents TTracker from trying to recover the
+ // location and re-installing it in the same place.
+ ValueIDNum Def(CurBB, CurInst, *MLoc);
+ MTracker->setMLoc(*MLoc, Def);
+ if (TTracker)
TTracker->clobberMloc(*MLoc, MI.getIterator());
- }
}
}
// Try to recognise spill and restore instructions that may transfer a value.
if (isLocationSpill(MI, MF, Reg)) {
- Loc = extractSpillBaseRegAndOffset(MI);
- auto ValueID = MTracker->readReg(Reg);
+ SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
- // If the location is empty, produce a phi, signify it's the live-in value.
- if (ValueID.getLoc() == 0)
- ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)};
+ auto DoTransfer = [&](Register SrcReg, unsigned SpillID) {
+ auto ReadValue = MTracker->readReg(SrcReg);
+ LocIdx DstLoc = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(DstLoc, ReadValue);
+
+ if (TTracker) {
+ LocIdx SrcLoc = MTracker->getRegMLoc(SrcReg);
+ TTracker->transferMlocs(SrcLoc, DstLoc, MI.getIterator());
+ }
+ };
- MTracker->setSpill(*Loc, ValueID);
- auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc);
- assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?");
- LocIdx SpillLocIdx = *OptSpillLocIdx;
+ // Then, transfer subreg bits.
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ // Ensure this reg is tracked,
+ (void)MTracker->lookupOrTrackRegister(*SRI);
+ unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SpillID = MTracker->getLocID(Loc, SubregIdx);
+ DoTransfer(*SRI, SpillID);
+ }
- // Tell TransferTracker about this spill, produce DBG_VALUEs for it.
- if (TTracker)
- TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx,
- MI.getIterator());
+ // Directly lookup size of main source reg, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
} else {
- if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg);
+ if (!OptLoc)
return false;
+ SpillLocationNo Loc = *OptLoc;
- // Is there a value to be restored?
- auto OptValueID = MTracker->readSpill(*Loc);
- if (OptValueID) {
- ValueIDNum ValueID = *OptValueID;
- LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc);
- // XXX -- can we recover sub-registers of this value? Until we can, first
- // overwrite all defs of the register being restored to.
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- MTracker->defReg(*RAI, CurBB, CurInst);
+ // Assumption: we're reading from the base of the stack slot, not some
+ // offset into it. It seems very unlikely LLVM would ever generate
+ // restores where this wasn't true. This then becomes a question of what
+ // subregisters in the destination register line up with positions in the
+ // stack slot.
- // Now override the reg we're restoring to.
- MTracker->setReg(Reg, ValueID);
+ // Def all registers that alias the destination.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Now find subregisters within the destination register, and load values
+ // from stack slot positions.
+ auto DoTransfer = [&](Register DestReg, unsigned SpillID) {
+ LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID);
+ auto ReadValue = MTracker->readMLoc(SrcIdx);
+ MTracker->setReg(DestReg, ReadValue);
+
+ if (TTracker) {
+ LocIdx DstLoc = MTracker->getRegMLoc(DestReg);
+ TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator());
+ }
+ };
- // Report this restore to the transfer tracker too.
- if (TTracker)
- TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg),
- MI.getIterator());
- } else {
- // There isn't anything in the location; not clear if this is a code path
- // that still runs. Def this register anyway just in case.
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- MTracker->defReg(*RAI, CurBB, CurInst);
-
- // Force the spill slot to be tracked.
- LocIdx L = MTracker->getOrTrackSpillLoc(*Loc);
-
- // Set the restored value to be a machine phi number, signifying that it's
- // whatever the spills live-in value is in this block. Definitely has
- // a LocIdx due to the setSpill above.
- ValueIDNum ValueID = {CurBB, 0, L};
- MTracker->setReg(Reg, ValueID);
- MTracker->setSpill(*Loc, ValueID);
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SpillID = MTracker->getLocID(Loc, Subreg);
+ DoTransfer(*SRI, SpillID);
}
+
+ // Directly look up this registers slot idx by size, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
}
return true;
}
@@ -2510,12 +1775,11 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
}
// Compute a bitvector of all the registers that are tracked in this block.
- const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
- Register SP = TLI->getStackPointerRegisterToSaveRestore();
BitVector UsedRegs(TRI->getNumRegs());
for (auto Location : MTracker->locations()) {
unsigned ID = MTracker->LocIdxToLocID[Location.Idx];
- if (ID >= TRI->getNumRegs() || ID == SP)
+ // Ignore stack slots, and aliases of the stack pointer.
+ if (ID >= TRI->getNumRegs() || MTracker->SPAliases.count(ID))
continue;
UsedRegs.set(ID);
}
@@ -2531,7 +1795,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
// they're all clobbered or at least set in the designated transfer
// elem.
for (unsigned Bit : BV.set_bits()) {
- unsigned ID = MTracker->getLocID(Bit, false);
+ unsigned ID = MTracker->getLocID(Bit);
LocIdx Idx = MTracker->LocIDToLocIdx[ID];
auto &TransferMap = MLocTransfer[I];
@@ -2553,23 +1817,20 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
}
}
-std::tuple<bool, bool>
-InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+bool InstrRefBasedLDV::mlocJoin(
+ MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
- bool DowngradeOccurred = false;
- // Collect predecessors that have been visited. Anything that hasn't been
- // visited yet is a backedge on the first iteration, and the meet of it's
- // lattice value for all locations will be unaffected.
+ // Handle value-propagation when control flow merges on entry to a block. For
+ // any location without a PHI already placed, the location has the same value
+ // as its predecessors. If a PHI is placed, test to see whether it's now a
+ // redundant PHI that we can eliminate.
+
SmallVector<const MachineBasicBlock *, 8> BlockOrders;
- for (auto Pred : MBB.predecessors()) {
- if (Visited.count(Pred)) {
- BlockOrders.push_back(Pred);
- }
- }
+ for (auto Pred : MBB.predecessors())
+ BlockOrders.push_back(Pred);
// Visit predecessors in RPOT order.
auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
@@ -2579,83 +1840,216 @@ InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
// Skip entry block.
if (BlockOrders.size() == 0)
- return std::tuple<bool, bool>(false, false);
+ return false;
- // Step through all machine locations, then look at each predecessor and
- // detect disagreements.
- unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second;
+ // Step through all machine locations, look at each predecessor and test
+ // whether we can eliminate redundant PHIs.
for (auto Location : MTracker->locations()) {
LocIdx Idx = Location.Idx;
+
// Pick out the first predecessors live-out value for this location. It's
- // guaranteed to be not a backedge, as we order by RPO.
- ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+ // guaranteed to not be a backedge, as we order by RPO.
+ ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+ // If we've already eliminated a PHI here, do no further checking, just
+ // propagate the first live-in value into this block.
+ if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) {
+ if (InLocs[Idx.asU64()] != FirstVal) {
+ InLocs[Idx.asU64()] = FirstVal;
+ Changed |= true;
+ }
+ continue;
+ }
- // Some flags for whether there's a disagreement, and whether it's a
- // disagreement with a backedge or not.
+ // We're now examining a PHI to see whether it's un-necessary. Loop around
+ // the other live-in values and test whether they're all the same.
bool Disagree = false;
- bool NonBackEdgeDisagree = false;
-
- // Loop around everything that wasn't 'base'.
for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
- auto *MBB = BlockOrders[I];
- if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) {
- // Live-out of a predecessor disagrees with the first predecessor.
- Disagree = true;
-
- // Test whether it's a disagreemnt in the backedges or not.
- if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e
- NonBackEdgeDisagree = true;
- }
- }
+ const MachineBasicBlock *PredMBB = BlockOrders[I];
+ const ValueIDNum &PredLiveOut =
+ OutLocs[PredMBB->getNumber()][Idx.asU64()];
- bool OverRide = false;
- if (Disagree && !NonBackEdgeDisagree) {
- // Only the backedges disagree. Consider demoting the livein
- // lattice value, as per the file level comment. The value we consider
- // demoting to is the value that the non-backedge predecessors agree on.
- // The order of values is that non-PHIs are \top, a PHI at this block
- // \bot, and phis between the two are ordered by their RPO number.
- // If there's no agreement, or we've already demoted to this PHI value
- // before, replace with a PHI value at this block.
-
- // Calculate order numbers: zero means normal def, nonzero means RPO
- // number.
- unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1;
- if (!BaseVal.isPHI())
- BaseBlockRPONum = 0;
-
- ValueIDNum &InLocID = InLocs[Idx.asU64()];
- unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1;
- if (!InLocID.isPHI())
- InLocRPONum = 0;
-
- // Should we ignore the disagreeing backedges, and override with the
- // value the other predecessors agree on (in "base")?
- unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1;
- if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) {
- // Override.
- OverRide = true;
- DowngradeOccurred = true;
- }
+ // Incoming values agree, continue trying to eliminate this PHI.
+ if (FirstVal == PredLiveOut)
+ continue;
+
+ // We can also accept a PHI value that feeds back into itself.
+ if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx))
+ continue;
+
+ // Live-out of a predecessor disagrees with the first predecessor.
+ Disagree = true;
}
- // else: if we disagree in the non-backedges, then this is definitely
- // a control flow merge where different values merge. Make it a PHI.
- // Generate a phi...
- ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx};
- ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal;
- if (InLocs[Idx.asU64()] != NewVal) {
+ // No disagreement? No PHI. Otherwise, leave the PHI in live-ins.
+ if (!Disagree) {
+ InLocs[Idx.asU64()] = FirstVal;
Changed |= true;
- InLocs[Idx.asU64()] = NewVal;
}
}
// TODO: Reimplement NumInserted and NumRemoved.
- return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+ return Changed;
+}
+
+void InstrRefBasedLDV::findStackIndexInterference(
+ SmallVectorImpl<unsigned> &Slots) {
+ // We could spend a bit of time finding the exact, minimal, set of stack
+ // indexes that interfere with each other, much like reg units. Or, we can
+ // rely on the fact that:
+ // * The smallest / lowest index will interfere with everything at zero
+ // offset, which will be the largest set of registers,
+ // * Most indexes with non-zero offset will end up being interference units
+ // anyway.
+ // So just pick those out and return them.
+
+ // We can rely on a single-byte stack index existing already, because we
+ // initialize them in MLocTracker.
+ auto It = MTracker->StackSlotIdxes.find({8, 0});
+ assert(It != MTracker->StackSlotIdxes.end());
+ Slots.push_back(It->second);
+
+ // Find anything that has a non-zero offset and add that too.
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ // Is offset zero? If so, ignore.
+ if (!Pair.first.second)
+ continue;
+ Slots.push_back(Pair.second);
+ }
}
-void InstrRefBasedLDV::mlocDataflow(
- ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+void InstrRefBasedLDV::placeMLocPHIs(
+ MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ SmallVector<unsigned, 4> StackUnits;
+ findStackIndexInterference(StackUnits);
+
+ // To avoid repeatedly running the PHI placement algorithm, leverage the
+ // fact that a def of register MUST also def its register units. Find the
+ // units for registers, place PHIs for them, and then replicate them for
+ // aliasing registers. Some inputs that are never def'd (DBG_PHIs of
+ // arguments) don't lead to register units being tracked, just place PHIs for
+ // those registers directly. Stack slots have their own form of "unit",
+ // store them to one side.
+ SmallSet<Register, 32> RegUnitsToPHIUp;
+ SmallSet<LocIdx, 32> NormalLocsToPHI;
+ SmallSet<SpillLocationNo, 32> StackSlots;
+ for (auto Location : MTracker->locations()) {
+ LocIdx L = Location.Idx;
+ if (MTracker->isSpill(L)) {
+ StackSlots.insert(MTracker->locIDToSpill(MTracker->LocIdxToLocID[L]));
+ continue;
+ }
+
+ Register R = MTracker->LocIdxToLocID[L];
+ SmallSet<Register, 8> FoundRegUnits;
+ bool AnyIllegal = false;
+ for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) {
+ for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){
+ if (!MTracker->isRegisterTracked(*URoot)) {
+ // Not all roots were loaded into the tracking map: this register
+ // isn't actually def'd anywhere, we only read from it. Generate PHIs
+ // for this reg, but don't iterate units.
+ AnyIllegal = true;
+ } else {
+ FoundRegUnits.insert(*URoot);
+ }
+ }
+ }
+
+ if (AnyIllegal) {
+ NormalLocsToPHI.insert(L);
+ continue;
+ }
+
+ RegUnitsToPHIUp.insert(FoundRegUnits.begin(), FoundRegUnits.end());
+ }
+
+ // Lambda to fetch PHIs for a given location, and write into the PHIBlocks
+ // collection.
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
+ auto CollectPHIsForLoc = [&](LocIdx L) {
+ // Collect the set of defs.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ MachineBasicBlock *MBB = OrderToBB[I];
+ const auto &TransferFunc = MLocTransfer[MBB->getNumber()];
+ if (TransferFunc.find(L) != TransferFunc.end())
+ DefBlocks.insert(MBB);
+ }
+
+ // The entry block defs the location too: it's the live-in / argument value.
+ // Only insert if there are other defs though; everything is trivially live
+ // through otherwise.
+ if (!DefBlocks.empty())
+ DefBlocks.insert(&*MF.begin());
+
+ // Ask the SSA construction algorithm where we should put PHIs. Clear
+ // anything that might have been hanging around from earlier.
+ PHIBlocks.clear();
+ BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks);
+ };
+
+ auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) {
+ for (const MachineBasicBlock *MBB : PHIBlocks)
+ MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L);
+ };
+
+ // For locations with no reg units, just place PHIs.
+ for (LocIdx L : NormalLocsToPHI) {
+ CollectPHIsForLoc(L);
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+ }
+
+ // For stack slots, calculate PHIs for the equivalent of the units, then
+ // install for each index.
+ for (SpillLocationNo Slot : StackSlots) {
+ for (unsigned Idx : StackUnits) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Slot, Idx);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ CollectPHIsForLoc(L);
+ InstallPHIsAtLoc(L);
+
+ // Find anything that aliases this stack index, install PHIs for it too.
+ unsigned Size, Offset;
+ std::tie(Size, Offset) = MTracker->StackIdxesToPos[Idx];
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ unsigned ThisSize, ThisOffset;
+ std::tie(ThisSize, ThisOffset) = Pair.first;
+ if (ThisSize + ThisOffset <= Offset || Size + Offset <= ThisOffset)
+ continue;
+
+ unsigned ThisID = MTracker->getSpillIDWithIdx(Slot, Pair.second);
+ LocIdx ThisL = MTracker->getSpillMLoc(ThisID);
+ InstallPHIsAtLoc(ThisL);
+ }
+ }
+ }
+
+ // For reg units, place PHIs, and then place them for any aliasing registers.
+ for (Register R : RegUnitsToPHIUp) {
+ LocIdx L = MTracker->lookupOrTrackRegister(R);
+ CollectPHIsForLoc(L);
+
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+
+ // Now find aliases and install PHIs for those.
+ for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) {
+ // Super-registers that are "above" the largest register read/written by
+ // the function will alias, but will not be tracked.
+ if (!MTracker->isRegisterTracked(*RAI))
+ continue;
+
+ LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI);
+ InstallPHIsAtLoc(AliasLoc);
+ }
+ }
+}
+
+void InstrRefBasedLDV::buildMLocValueMap(
+ MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
std::priority_queue<unsigned int, std::vector<unsigned int>,
std::greater<unsigned int>>
@@ -2666,20 +2060,34 @@ void InstrRefBasedLDV::mlocDataflow(
// but this is probably not worth it.
SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
- // Initialize worklist with every block to be visited.
+ // Initialize worklist with every block to be visited. Also produce list of
+ // all blocks.
+ SmallPtrSet<MachineBasicBlock *, 32> AllBlocks;
for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
Worklist.push(I);
OnWorklist.insert(OrderToBB[I]);
+ AllBlocks.insert(OrderToBB[I]);
}
- MTracker->reset();
-
- // Set inlocs for entry block -- each as a PHI at the entry block. Represents
- // the incoming value to the function.
- MTracker->setMPhis(0);
+ // Initialize entry block to PHIs. These represent arguments.
for (auto Location : MTracker->locations())
- MInLocs[0][Location.Idx.asU64()] = Location.Value;
+ MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx);
+ MTracker->reset();
+
+ // Start by placing PHIs, using the usual SSA constructor algorithm. Consider
+ // any machine-location that isn't live-through a block to be def'd in that
+ // block.
+ placeMLocPHIs(MF, AllBlocks, MInLocs, MLocTransfer);
+
+ // Propagate values to eliminate redundant PHIs. At the same time, this
+ // produces the table of Block x Location => Value for the entry to each
+ // block.
+ // The kind of PHIs we can eliminate are, for example, where one path in a
+ // conditional spills and restores a register, and the register still has
+ // the same value once control flow joins, unbeknowns to the PHI placement
+ // code. Propagating values allows us to identify such un-necessary PHIs and
+ // remove them.
SmallPtrSet<const MachineBasicBlock *, 16> Visited;
while (!Worklist.empty() || !Pending.empty()) {
// Vector for storing the evaluated block transfer function.
@@ -2691,16 +2099,10 @@ void InstrRefBasedLDV::mlocDataflow(
Worklist.pop();
// Join the values in all predecessor blocks.
- bool InLocsChanged, DowngradeOccurred;
- std::tie(InLocsChanged, DowngradeOccurred) =
- mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ bool InLocsChanged;
+ InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
InLocsChanged |= Visited.insert(MBB).second;
- // If a downgrade occurred, book us in for re-examination on the next
- // iteration.
- if (DowngradeOccurred && OnPending.insert(MBB).second)
- Pending.push(BBToOrder[MBB]);
-
// Don't examine transfer function if we've visited this loc at least
// once, and inlocs haven't changed.
if (!InLocsChanged)
@@ -2715,7 +2117,7 @@ void InstrRefBasedLDV::mlocDataflow(
for (auto &P : MLocTransfer[CurBB]) {
if (P.second.getBlock() == CurBB && P.second.isPHI()) {
// This is a movement of whatever was live in. Read it.
- ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc());
+ ValueIDNum NewID = MTracker->readMLoc(P.second.getLoc());
ToRemap.push_back(std::make_pair(P.first, NewID));
} else {
// It's a def. Just set it.
@@ -2745,8 +2147,8 @@ void InstrRefBasedLDV::mlocDataflow(
continue;
// All successors should be visited: put any back-edges on the pending
- // list for the next dataflow iteration, and any other successors to be
- // visited this iteration, if they're not going to be already.
+ // list for the next pass-through, and any other successors to be
+ // visited this pass, if they're not going to be already.
for (auto s : MBB->successors()) {
// Does branching to this successor represent a back-edge?
if (BBToOrder[s] > BBToOrder[MBB]) {
@@ -2769,170 +2171,169 @@ void InstrRefBasedLDV::mlocDataflow(
assert(Pending.empty() && "Pending should be empty");
}
- // Once all the live-ins don't change on mlocJoin(), we've reached a
- // fixedpoint.
+ // Once all the live-ins don't change on mlocJoin(), we've eliminated all
+ // redundant PHIs.
}
-bool InstrRefBasedLDV::vlocDowngradeLattice(
- const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation,
- const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) {
- // Ranking value preference: see file level comment, the highest rank is
- // a plain def, followed by PHI values in reverse post-order. Numerically,
- // we assign all defs the rank '0', all PHIs their blocks RPO number plus
- // one, and consider the lowest value the highest ranked.
- int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1;
- if (!OldLiveInLocation.ID.isPHI())
- OldLiveInRank = 0;
-
- // Allow any unresolvable conflict to be over-ridden.
- if (OldLiveInLocation.Kind == DbgValue::NoVal) {
- // Although if it was an unresolvable conflict from _this_ block, then
- // all other seeking of downgrades and PHIs must have failed before hand.
- if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber())
- return false;
- OldLiveInRank = INT_MIN;
- }
-
- auto &InValue = *Values[0].second;
+// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide
+// template specialisations for graph traits and a successor enumerator.
+namespace llvm {
+template <> struct GraphTraits<MachineBasicBlock> {
+ using NodeRef = MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::succ_iterator;
- if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal)
- return false;
+ static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
- unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()];
- int ThisRank = ThisRPO + 1;
- if (!InValue.ID.isPHI())
- ThisRank = 0;
+template <> struct GraphTraits<const MachineBasicBlock> {
+ using NodeRef = const MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
- // Too far down the lattice?
- if (ThisRPO >= CurBlockRPONum)
- return false;
+ static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
- // Higher in the lattice than what we've already explored?
- if (ThisRank <= OldLiveInRank)
- return false;
+using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType;
+using MachineDomTreeChildGetter =
+ typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>;
- return true;
+namespace IDFCalculatorDetail {
+template <>
+typename MachineDomTreeChildGetter::ChildrenTy
+MachineDomTreeChildGetter::get(const NodeRef &N) {
+ return {N->succ_begin(), N->succ_end()};
+}
+} // namespace IDFCalculatorDetail
+} // namespace llvm
+
+void InstrRefBasedLDV::BlockPHIPlacement(
+ const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) {
+ // Apply IDF calculator to the designated set of location defs, storing
+ // required PHIs into PHIBlocks. Uses the dominator tree stored in the
+ // InstrRefBasedLDV object.
+ IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo;
+ IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo);
+
+ IDF.setLiveInBlocks(AllBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ IDF.calculate(PHIBlocks);
}
-std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc(
- MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) {
+Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
+ const MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
// Collect a set of locations from predecessor where its live-out value can
// be found.
SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ SmallVector<const DbgValueProperties *, 4> Properties;
unsigned NumLocs = MTracker->getNumLocs();
- unsigned BackEdgesStart = 0;
- for (auto p : BlockOrders) {
- // Pick out where backedges start in the list of predecessors. Relies on
- // BlockOrders being sorted by RPO.
- if (BBToOrder[p] < BBToOrder[&MBB])
- ++BackEdgesStart;
+ // No predecessors means no PHIs.
+ if (BlockOrders.empty())
+ return None;
- // For each predecessor, create a new set of locations.
- Locs.resize(Locs.size() + 1);
+ for (auto p : BlockOrders) {
unsigned ThisBBNum = p->getNumber();
- auto LiveOutMap = LiveOuts.find(p);
- if (LiveOutMap == LiveOuts.end())
- // This predecessor isn't in scope, it must have no live-in/live-out
- // locations.
- continue;
-
- auto It = LiveOutMap->second->find(Var);
- if (It == LiveOutMap->second->end())
- // There's no value recorded for this variable in this predecessor,
- // leave an empty set of locations.
- continue;
-
- const DbgValue &OutVal = It->second;
+ auto OutValIt = LiveOuts.find(p);
+ if (OutValIt == LiveOuts.end())
+ // If we have a predecessor not in scope, we'll never find a PHI position.
+ return None;
+ const DbgValue &OutVal = *OutValIt->second;
if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)
// Consts and no-values cannot have locations we can join on.
- continue;
+ return None;
- assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def);
- ValueIDNum ValToLookFor = OutVal.ID;
+ Properties.push_back(&OutVal.Properties);
+
+ // Create new empty vector of locations.
+ Locs.resize(Locs.size() + 1);
- // Search the live-outs of the predecessor for the specified value.
- for (unsigned int I = 0; I < NumLocs; ++I) {
- if (MOutLocs[ThisBBNum][I] == ValToLookFor)
- Locs.back().push_back(LocIdx(I));
+ // If the live-in value is a def, find the locations where that value is
+ // present. Do the same for VPHIs where we know the VPHI value.
+ if (OutVal.Kind == DbgValue::Def ||
+ (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() &&
+ OutVal.ID != ValueIDNum::EmptyValue)) {
+ ValueIDNum ValToLookFor = OutVal.ID;
+ // Search the live-outs of the predecessor for the specified value.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ Locs.back().push_back(LocIdx(I));
+ }
+ } else {
+ assert(OutVal.Kind == DbgValue::VPHI);
+ // For VPHIs where we don't know the location, we definitely can't find
+ // a join loc.
+ if (OutVal.BlockNo != MBB.getNumber())
+ return None;
+
+ // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e.
+ // a value that's live-through the whole loop. (It has to be a backedge,
+ // because a block can't dominate itself). We can accept as a PHI location
+ // any location where the other predecessors agree, _and_ the machine
+ // locations feed back into themselves. Therefore, add all self-looping
+ // machine-value PHI locations.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I));
+ if (MOutLocs[ThisBBNum][I] == MPHI)
+ Locs.back().push_back(LocIdx(I));
+ }
}
}
- // If there were no locations at all, return an empty result.
- if (Locs.empty())
- return std::tuple<Optional<ValueIDNum>, bool>(None, false);
-
- // Lambda for seeking a common location within a range of location-sets.
- using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator;
- auto SeekLocation =
- [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> {
- // Starting with the first set of locations, take the intersection with
- // subsequent sets.
- SmallVector<LocIdx, 4> base = Locs[0];
- for (auto &S : SearchRange) {
- SmallVector<LocIdx, 4> new_base;
- std::set_intersection(base.begin(), base.end(), S.begin(), S.end(),
- std::inserter(new_base, new_base.begin()));
- base = new_base;
- }
- if (base.empty())
- return None;
+ // We should have found locations for all predecessors, or returned.
+ assert(Locs.size() == BlockOrders.size());
- // We now have a set of LocIdxes that contain the right output value in
- // each of the predecessors. Pick the lowest; if there's a register loc,
- // that'll be it.
- return *base.begin();
- };
+ // Check that all properties are the same. We can't pick a location if they're
+ // not.
+ const DbgValueProperties *Properties0 = Properties[0];
+ for (auto *Prop : Properties)
+ if (*Prop != *Properties0)
+ return None;
- // Search for a common location for all predecessors. If we can't, then fall
- // back to only finding a common location between non-backedge predecessors.
- bool ValidForAllLocs = true;
- auto TheLoc = SeekLocation(Locs);
- if (!TheLoc) {
- ValidForAllLocs = false;
- TheLoc =
- SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart));
- }
+ // Starting with the first set of locations, take the intersection with
+ // subsequent sets.
+ SmallVector<LocIdx, 4> CandidateLocs = Locs[0];
+ for (unsigned int I = 1; I < Locs.size(); ++I) {
+ auto &LocVec = Locs[I];
+ SmallVector<LocIdx, 4> NewCandidates;
+ std::set_intersection(CandidateLocs.begin(), CandidateLocs.end(),
+ LocVec.begin(), LocVec.end(), std::inserter(NewCandidates, NewCandidates.begin()));
+ CandidateLocs = NewCandidates;
+ }
+ if (CandidateLocs.empty())
+ return None;
- if (!TheLoc)
- return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+ // We now have a set of LocIdxes that contain the right output value in
+ // each of the predecessors. Pick the lowest; if there's a register loc,
+ // that'll be it.
+ LocIdx L = *CandidateLocs.begin();
// Return a PHI-value-number for the found location.
- LocIdx L = *TheLoc;
ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L};
- return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs);
+ return PHIVal;
}
-std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
- MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
- SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum,
- const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
+bool InstrRefBasedLDV::vlocJoin(
+ MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
- DenseMap<DebugVariable, DbgValue> &InLocsT) {
- bool DowngradeOccurred = false;
-
+ DbgValue &LiveIn) {
// To emulate VarLocBasedImpl, process this block if it's not in scope but
// _does_ assign a variable value. No live-ins for this scope are transferred
// in though, so we can return immediately.
- if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) {
- if (VLOCVisited)
- return std::tuple<bool, bool>(true, false);
- return std::tuple<bool, bool>(false, false);
- }
+ if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB))
+ return false;
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
- // Find any live-ins computed in a prior iteration.
- auto ILSIt = VLOCInLocs.find(&MBB);
- assert(ILSIt != VLOCInLocs.end());
- auto &ILS = *ILSIt->second;
-
// Order predecessors by RPOT order, for exploring them in that order.
SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors());
@@ -2944,244 +2345,102 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
unsigned CurBlockRPONum = BBToOrder[&MBB];
- // Force a re-visit to loop heads in the first dataflow iteration.
- // FIXME: if we could "propose" Const values this wouldn't be needed,
- // because they'd need to be confirmed before being emitted.
- if (!BlockOrders.empty() &&
- BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum &&
- VLOCVisited)
- DowngradeOccurred = true;
-
- auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) {
- auto Result = InLocsT.insert(std::make_pair(DV, VR));
- (void)Result;
- assert(Result.second);
- };
-
- auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) {
- DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal);
-
- ConfirmValue(Var, NoLocPHIVal);
- };
+ // Collect all the incoming DbgValues for this variable, from predecessor
+ // live-out values.
+ SmallVector<InValueT, 8> Values;
+ bool Bail = false;
+ int BackEdgesStart = 0;
+ for (auto p : BlockOrders) {
+ // If the predecessor isn't in scope / to be explored, we'll never be
+ // able to join any locations.
+ if (!BlocksToExplore.contains(p)) {
+ Bail = true;
+ break;
+ }
- // Attempt to join the values for each variable.
- for (auto &Var : AllVars) {
- // Collect all the DbgValues for this variable.
- SmallVector<InValueT, 8> Values;
- bool Bail = false;
- unsigned BackEdgesStart = 0;
- for (auto p : BlockOrders) {
- // If the predecessor isn't in scope / to be explored, we'll never be
- // able to join any locations.
- if (!BlocksToExplore.contains(p)) {
- Bail = true;
- break;
- }
+ // All Live-outs will have been initialized.
+ DbgValue &OutLoc = *VLOCOutLocs.find(p)->second;
- // Don't attempt to handle unvisited predecessors: they're implicitly
- // "unknown"s in the lattice.
- if (VLOCVisited && !VLOCVisited->count(p))
- continue;
+ // Keep track of where back-edges begin in the Values vector. Relies on
+ // BlockOrders being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[p];
+ if (ThisBBRPONum < CurBlockRPONum)
+ ++BackEdgesStart;
- // If the predecessors OutLocs is absent, there's not much we can do.
- auto OL = VLOCOutLocs.find(p);
- if (OL == VLOCOutLocs.end()) {
- Bail = true;
- break;
- }
+ Values.push_back(std::make_pair(p, &OutLoc));
+ }
- // No live-out value for this predecessor also means we can't produce
- // a joined value.
- auto VIt = OL->second->find(Var);
- if (VIt == OL->second->end()) {
- Bail = true;
- break;
- }
+ // If there were no values, or one of the predecessors couldn't have a
+ // value, then give up immediately. It's not safe to produce a live-in
+ // value. Leave as whatever it was before.
+ if (Bail || Values.size() == 0)
+ return false;
- // Keep track of where back-edges begin in the Values vector. Relies on
- // BlockOrders being sorted by RPO.
- unsigned ThisBBRPONum = BBToOrder[p];
- if (ThisBBRPONum < CurBlockRPONum)
- ++BackEdgesStart;
+ // All (non-entry) blocks have at least one non-backedge predecessor.
+ // Pick the variable value from the first of these, to compare against
+ // all others.
+ const DbgValue &FirstVal = *Values[0].second;
+
+ // If the old live-in value is not a PHI then either a) no PHI is needed
+ // here, or b) we eliminated the PHI that was here. If so, we can just
+ // propagate in the first parent's incoming value.
+ if (LiveIn.Kind != DbgValue::VPHI || LiveIn.BlockNo != MBB.getNumber()) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ }
+
+ // Scan for variable values that can never be resolved: if they have
+ // different DIExpressions, different indirectness, or are mixed constants /
+ // non-constants.
+ for (auto &V : Values) {
+ if (V.second->Properties != FirstVal.Properties)
+ return false;
+ if (V.second->Kind == DbgValue::NoVal)
+ return false;
+ if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
+ return false;
+ }
- Values.push_back(std::make_pair(p, &VIt->second));
- }
+ // Try to eliminate this PHI. Do the incoming values all agree?
+ bool Disagree = false;
+ for (auto &V : Values) {
+ if (*V.second == FirstVal)
+ continue; // No disagreement.
- // If there were no values, or one of the predecessors couldn't have a
- // value, then give up immediately. It's not safe to produce a live-in
- // value.
- if (Bail || Values.size() == 0)
+ // Eliminate if a backedge feeds a VPHI back into itself.
+ if (V.second->Kind == DbgValue::VPHI &&
+ V.second->BlockNo == MBB.getNumber() &&
+ // Is this a backedge?
+ std::distance(Values.begin(), &V) >= BackEdgesStart)
continue;
- // Enumeration identifying the current state of the predecessors values.
- enum {
- Unset = 0,
- Agreed, // All preds agree on the variable value.
- PropDisagree, // All preds agree, but the value kind is Proposed in some.
- BEDisagree, // Only back-edges disagree on variable value.
- PHINeeded, // Non-back-edge predecessors have conflicing values.
- NoSolution // Conflicting Value metadata makes solution impossible.
- } OurState = Unset;
-
- // All (non-entry) blocks have at least one non-backedge predecessor.
- // Pick the variable value from the first of these, to compare against
- // all others.
- const DbgValue &FirstVal = *Values[0].second;
- const ValueIDNum &FirstID = FirstVal.ID;
-
- // Scan for variable values that can't be resolved: if they have different
- // DIExpressions, different indirectness, or are mixed constants /
- // non-constants.
- for (auto &V : Values) {
- if (V.second->Properties != FirstVal.Properties)
- OurState = NoSolution;
- if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
- OurState = NoSolution;
- }
-
- // Flags diagnosing _how_ the values disagree.
- bool NonBackEdgeDisagree = false;
- bool DisagreeOnPHINess = false;
- bool IDDisagree = false;
- bool Disagree = false;
- if (OurState == Unset) {
- for (auto &V : Values) {
- if (*V.second == FirstVal)
- continue; // No disagreement.
-
- Disagree = true;
-
- // Flag whether the value number actually diagrees.
- if (V.second->ID != FirstID)
- IDDisagree = true;
-
- // Distinguish whether disagreement happens in backedges or not.
- // Relies on Values (and BlockOrders) being sorted by RPO.
- unsigned ThisBBRPONum = BBToOrder[V.first];
- if (ThisBBRPONum < CurBlockRPONum)
- NonBackEdgeDisagree = true;
-
- // Is there a difference in whether the value is definite or only
- // proposed?
- if (V.second->Kind != FirstVal.Kind &&
- (V.second->Kind == DbgValue::Proposed ||
- V.second->Kind == DbgValue::Def) &&
- (FirstVal.Kind == DbgValue::Proposed ||
- FirstVal.Kind == DbgValue::Def))
- DisagreeOnPHINess = true;
- }
-
- // Collect those flags together and determine an overall state for
- // what extend the predecessors agree on a live-in value.
- if (!Disagree)
- OurState = Agreed;
- else if (!IDDisagree && DisagreeOnPHINess)
- OurState = PropDisagree;
- else if (!NonBackEdgeDisagree)
- OurState = BEDisagree;
- else
- OurState = PHINeeded;
- }
-
- // An extra indicator: if we only disagree on whether the value is a
- // Def, or proposed, then also flag whether that disagreement happens
- // in backedges only.
- bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess &&
- !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def;
-
- const auto &Properties = FirstVal.Properties;
-
- auto OldLiveInIt = ILS.find(Var);
- const DbgValue *OldLiveInLocation =
- (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr;
-
- bool OverRide = false;
- if (OurState == BEDisagree && OldLiveInLocation) {
- // Only backedges disagree: we can consider downgrading. If there was a
- // previous live-in value, use it to work out whether the current
- // incoming value represents a lattice downgrade or not.
- OverRide =
- vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum);
- }
-
- // Use the current state of predecessor agreement and other flags to work
- // out what to do next. Possibilities include:
- // * Accept a value all predecessors agree on, or accept one that
- // represents a step down the exploration lattice,
- // * Use a PHI value number, if one can be found,
- // * Propose a PHI value number, and see if it gets confirmed later,
- // * Emit a 'NoVal' value, indicating we couldn't resolve anything.
- if (OurState == Agreed) {
- // Easiest solution: all predecessors agree on the variable value.
- ConfirmValue(Var, FirstVal);
- } else if (OurState == BEDisagree && OverRide) {
- // Only backedges disagree, and the other predecessors have produced
- // a new live-in value further down the exploration lattice.
- DowngradeOccurred = true;
- ConfirmValue(Var, FirstVal);
- } else if (OurState == PropDisagree) {
- // Predecessors agree on value, but some say it's only a proposed value.
- // Propagate it as proposed: unless it was proposed in this block, in
- // which case we're able to confirm the value.
- if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) {
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
- } else if (PropOnlyInBEs) {
- // If only backedges disagree, a higher (in RPO) block confirmed this
- // location, and we need to propagate it into this loop.
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
- } else {
- // Otherwise; a Def meeting a Proposed is still a Proposed.
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed));
- }
- } else if ((OurState == PHINeeded || OurState == BEDisagree)) {
- // Predecessors disagree and can't be downgraded: this can only be
- // solved with a PHI. Use pickVPHILoc to go look for one.
- Optional<ValueIDNum> VPHI;
- bool AllEdgesVPHI = false;
- std::tie(VPHI, AllEdgesVPHI) =
- pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders);
-
- if (VPHI && AllEdgesVPHI) {
- // There's a PHI value that's valid for all predecessors -- we can use
- // it. If any of the non-backedge predecessors have proposed values
- // though, this PHI is also only proposed, until the predecessors are
- // confirmed.
- DbgValue::KindT K = DbgValue::Def;
- for (unsigned int I = 0; I < BackEdgesStart; ++I)
- if (Values[I].second->Kind == DbgValue::Proposed)
- K = DbgValue::Proposed;
-
- ConfirmValue(Var, DbgValue(*VPHI, Properties, K));
- } else if (VPHI) {
- // There's a PHI value, but it's only legal for backedges. Leave this
- // as a proposed PHI value: it might come back on the backedges,
- // and allow us to confirm it in the future.
- DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed);
- ConfirmValue(Var, NoBEValue);
- } else {
- ConfirmNoVal(Var, Properties);
- }
- } else {
- // Otherwise: we don't know. Emit a "phi but no real loc" phi.
- ConfirmNoVal(Var, Properties);
- }
+ Disagree = true;
}
- // Store newly calculated in-locs into VLOCInLocs, if they've changed.
- Changed = ILS != InLocsT;
- if (Changed)
- ILS = InLocsT;
-
- return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+ // No disagreement -> live-through value.
+ if (!Disagree) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ } else {
+ // Otherwise use a VPHI.
+ DbgValue VPHI(MBB.getNumber(), FirstVal.Properties, DbgValue::VPHI);
+ Changed = LiveIn != VPHI;
+ if (Changed)
+ LiveIn = VPHI;
+ return Changed;
+ }
}
-void InstrRefBasedLDV::vlocDataflow(
- const LexicalScope *Scope, const DILocation *DILoc,
+void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
- // This method is much like mlocDataflow: but focuses on a single
+ // This method is much like buildMLocValueMap: but focuses on a single
// LexicalScope at a time. Pick out a set of blocks and variables that are
// to have their value assignments solved, then run our dataflow algorithm
// until a fixedpoint is reached.
@@ -3235,8 +2494,8 @@ void InstrRefBasedLDV::vlocDataflow(
continue;
if (!ArtificialBlocks.count(succ))
continue;
- DFS.push_back(std::make_pair(succ, succ->succ_begin()));
ToAdd.insert(succ);
+ DFS.push_back(std::make_pair(succ, succ->succ_begin()));
}
// Search all those blocks, depth first.
@@ -3252,8 +2511,8 @@ void InstrRefBasedLDV::vlocDataflow(
// If the current successor is artificial and unexplored, descend into
// it.
if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
- DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
ToAdd.insert(*CurSucc);
+ DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
continue;
}
@@ -3278,6 +2537,13 @@ void InstrRefBasedLDV::vlocDataflow(
if (BlocksToExplore.size() == 1)
return;
+ // Convert a const set to a non-const set. LexicalScopes
+ // getMachineBasicBlocks returns const MBB pointers, IDF wants mutable ones.
+ // (Neither of them mutate anything).
+ SmallPtrSet<MachineBasicBlock *, 8> MutBlocksToExplore;
+ for (const auto *MBB : BlocksToExplore)
+ MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB));
+
// Picks out relevants blocks RPO order and sort them.
for (auto *MBB : BlocksToExplore)
BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
@@ -3286,9 +2552,18 @@ void InstrRefBasedLDV::vlocDataflow(
unsigned NumBlocks = BlockOrders.size();
// Allocate some vectors for storing the live ins and live outs. Large.
- SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts;
- LiveIns.resize(NumBlocks);
- LiveOuts.resize(NumBlocks);
+ SmallVector<DbgValue, 32> LiveIns, LiveOuts;
+ LiveIns.reserve(NumBlocks);
+ LiveOuts.reserve(NumBlocks);
+
+ // Initialize all values to start as NoVals. This signifies "it's live
+ // through, but we don't know what it is".
+ DbgValueProperties EmptyProperties(EmptyExpr, false);
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns.push_back(EmptyDbgValue);
+ LiveOuts.push_back(EmptyDbgValue);
+ }
// Produce by-MBB indexes of live-in/live-outs, to ease lookup within
// vlocJoin.
@@ -3300,108 +2575,164 @@ void InstrRefBasedLDV::vlocDataflow(
LiveInIdx[BlockOrders[I]] = &LiveIns[I];
}
- for (auto *MBB : BlockOrders) {
- Worklist.push(BBToOrder[MBB]);
- OnWorklist.insert(MBB);
- }
+ // Loop over each variable and place PHIs for it, then propagate values
+ // between blocks. This keeps the locality of working on one lexical scope at
+ // at time, but avoids re-processing variable values because some other
+ // variable has been assigned.
+ for (auto &Var : VarsWeCareAbout) {
+ // Re-initialize live-ins and live-outs, to clear the remains of previous
+ // variables live-ins / live-outs.
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns[I] = EmptyDbgValue;
+ LiveOuts[I] = EmptyDbgValue;
+ }
- // Iterate over all the blocks we selected, propagating variable values.
- bool FirstTrip = true;
- SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited;
- while (!Worklist.empty() || !Pending.empty()) {
- while (!Worklist.empty()) {
- auto *MBB = OrderToBB[Worklist.top()];
- CurBB = MBB->getNumber();
- Worklist.pop();
+ // Place PHIs for variable values, using the LLVM IDF calculator.
+ // Collect the set of blocks where variables are def'd.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
+ auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
+ if (TransferFunc.find(Var) != TransferFunc.end())
+ DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
+ }
- DenseMap<DebugVariable, DbgValue> JoinedInLocs;
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
- // Join values from predecessors. Updates LiveInIdx, and writes output
- // into JoinedInLocs.
- bool InLocsChanged, DowngradeOccurred;
- std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin(
- *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr,
- CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks,
- BlocksToExplore, JoinedInLocs);
+ // Request the set of PHIs we should insert for this variable.
+ BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks);
- bool FirstVisit = VLOCVisited.insert(MBB).second;
+ // Insert PHIs into the per-block live-in tables for this variable.
+ for (MachineBasicBlock *PHIMBB : PHIBlocks) {
+ unsigned BlockNo = PHIMBB->getNumber();
+ DbgValue *LiveIn = LiveInIdx[PHIMBB];
+ *LiveIn = DbgValue(BlockNo, EmptyProperties, DbgValue::VPHI);
+ }
- // Always explore transfer function if inlocs changed, or if we've not
- // visited this block before.
- InLocsChanged |= FirstVisit;
+ for (auto *MBB : BlockOrders) {
+ Worklist.push(BBToOrder[MBB]);
+ OnWorklist.insert(MBB);
+ }
- // If a downgrade occurred, book us in for re-examination on the next
- // iteration.
- if (DowngradeOccurred && OnPending.insert(MBB).second)
- Pending.push(BBToOrder[MBB]);
+ // Iterate over all the blocks we selected, propagating the variables value.
+ // This loop does two things:
+ // * Eliminates un-necessary VPHIs in vlocJoin,
+ // * Evaluates the blocks transfer function (i.e. variable assignments) and
+ // stores the result to the blocks live-outs.
+ // Always evaluate the transfer function on the first iteration, and when
+ // the live-ins change thereafter.
+ bool FirstTrip = true;
+ while (!Worklist.empty() || !Pending.empty()) {
+ while (!Worklist.empty()) {
+ auto *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ auto LiveInsIt = LiveInIdx.find(MBB);
+ assert(LiveInsIt != LiveInIdx.end());
+ DbgValue *LiveIn = LiveInsIt->second;
+
+ // Join values from predecessors. Updates LiveInIdx, and writes output
+ // into JoinedInLocs.
+ bool InLocsChanged =
+ vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn);
+
+ SmallVector<const MachineBasicBlock *, 8> Preds;
+ for (const auto *Pred : MBB->predecessors())
+ Preds.push_back(Pred);
+
+ // If this block's live-in value is a VPHI, try to pick a machine-value
+ // for it. This makes the machine-value available and propagated
+ // through all blocks by the time value propagation finishes. We can't
+ // do this any earlier as it needs to read the block live-outs.
+ if (LiveIn->Kind == DbgValue::VPHI && LiveIn->BlockNo == (int)CurBB) {
+ // There's a small possibility that on a preceeding path, a VPHI is
+ // eliminated and transitions from VPHI-with-location to
+ // live-through-value. As a result, the selected location of any VPHI
+ // might change, so we need to re-compute it on each iteration.
+ Optional<ValueIDNum> ValueNum =
+ pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds);
+
+ if (ValueNum) {
+ InLocsChanged |= LiveIn->ID != *ValueNum;
+ LiveIn->ID = *ValueNum;
+ }
+ }
- if (!InLocsChanged)
- continue;
+ if (!InLocsChanged && !FirstTrip)
+ continue;
+
+ DbgValue *LiveOut = LiveOutIdx[MBB];
+ bool OLChanged = false;
- // Do transfer function.
- auto &VTracker = AllTheVLocs[MBB->getNumber()];
- for (auto &Transfer : VTracker.Vars) {
- // Is this var we're mangling in this scope?
- if (VarsWeCareAbout.count(Transfer.first)) {
+ // Do transfer function.
+ auto &VTracker = AllTheVLocs[MBB->getNumber()];
+ auto TransferIt = VTracker.Vars.find(Var);
+ if (TransferIt != VTracker.Vars.end()) {
// Erase on empty transfer (DBG_VALUE $noreg).
- if (Transfer.second.Kind == DbgValue::Undef) {
- JoinedInLocs.erase(Transfer.first);
+ if (TransferIt->second.Kind == DbgValue::Undef) {
+ DbgValue NewVal(MBB->getNumber(), EmptyProperties, DbgValue::NoVal);
+ if (*LiveOut != NewVal) {
+ *LiveOut = NewVal;
+ OLChanged = true;
+ }
} else {
// Insert new variable value; or overwrite.
- auto NewValuePair = std::make_pair(Transfer.first, Transfer.second);
- auto Result = JoinedInLocs.insert(NewValuePair);
- if (!Result.second)
- Result.first->second = Transfer.second;
+ if (*LiveOut != TransferIt->second) {
+ *LiveOut = TransferIt->second;
+ OLChanged = true;
+ }
+ }
+ } else {
+ // Just copy live-ins to live-outs, for anything not transferred.
+ if (*LiveOut != *LiveIn) {
+ *LiveOut = *LiveIn;
+ OLChanged = true;
}
}
- }
-
- // Did the live-out locations change?
- bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB];
-
- // If they haven't changed, there's no need to explore further.
- if (!OLChanged)
- continue;
- // Commit to the live-out record.
- *LiveOutIdx[MBB] = JoinedInLocs;
-
- // We should visit all successors. Ensure we'll visit any non-backedge
- // successors during this dataflow iteration; book backedge successors
- // to be visited next time around.
- for (auto s : MBB->successors()) {
- // Ignore out of scope / not-to-be-explored successors.
- if (LiveInIdx.find(s) == LiveInIdx.end())
+ // If no live-out value changed, there's no need to explore further.
+ if (!OLChanged)
continue;
- if (BBToOrder[s] > BBToOrder[MBB]) {
- if (OnWorklist.insert(s).second)
- Worklist.push(BBToOrder[s]);
- } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
- Pending.push(BBToOrder[s]);
+ // We should visit all successors. Ensure we'll visit any non-backedge
+ // successors during this dataflow iteration; book backedge successors
+ // to be visited next time around.
+ for (auto s : MBB->successors()) {
+ // Ignore out of scope / not-to-be-explored successors.
+ if (LiveInIdx.find(s) == LiveInIdx.end())
+ continue;
+
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
+ Pending.push(BBToOrder[s]);
+ }
}
}
+ Worklist.swap(Pending);
+ std::swap(OnWorklist, OnPending);
+ OnPending.clear();
+ assert(Pending.empty());
+ FirstTrip = false;
}
- Worklist.swap(Pending);
- std::swap(OnWorklist, OnPending);
- OnPending.clear();
- assert(Pending.empty());
- FirstTrip = false;
- }
-
- // Dataflow done. Now what? Save live-ins. Ignore any that are still marked
- // as being variable-PHIs, because those did not have their machine-PHI
- // value confirmed. Such variable values are places that could have been
- // PHIs, but are not.
- for (auto *MBB : BlockOrders) {
- auto &VarMap = *LiveInIdx[MBB];
- for (auto &P : VarMap) {
- if (P.second.Kind == DbgValue::Proposed ||
- P.second.Kind == DbgValue::NoVal)
+
+ // Save live-ins to output vector. Ignore any that are still marked as being
+ // VPHIs with no location -- those are variables that we know the value of,
+ // but are not actually available in the register file.
+ for (auto *MBB : BlockOrders) {
+ DbgValue *BlockLiveIn = LiveInIdx[MBB];
+ if (BlockLiveIn->Kind == DbgValue::NoVal)
continue;
- Output[MBB->getNumber()].push_back(P);
+ if (BlockLiveIn->Kind == DbgValue::VPHI &&
+ BlockLiveIn->ID == ValueIDNum::EmptyValue)
+ continue;
+ if (BlockLiveIn->Kind == DbgValue::VPHI)
+ BlockLiveIn->Kind = DbgValue::Def;
+ Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
}
- }
+ } // Per-variable loop.
BlockOrders.clear();
BlocksToExplore.clear();
@@ -3485,6 +2816,10 @@ void InstrRefBasedLDV::emitLocations(
void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
// Build some useful data structures.
+
+ LLVMContext &Context = MF.getFunction().getContext();
+ EmptyExpr = DIExpression::get(Context, {});
+
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
if (const DebugLoc &DL = MI.getDebugLoc())
return DL.getLine() != 0;
@@ -3524,7 +2859,10 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
- TargetPassConfig *TPC) {
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC,
+ unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
// No subprogram means this function contains no debuginfo.
if (!MF.getFunction().getSubprogram())
return false;
@@ -3532,7 +2870,9 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
this->TPC = TPC;
+ this->DomTree = DomTree;
TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
TFI->getCalleeSaves(MF, CalleeSavedRegs);
@@ -3569,6 +2909,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
unsigned NumLocs = MTracker->getNumLocs();
for (int i = 0; i < MaxNumBlocks; ++i) {
+ // These all auto-initialize to ValueIDNum::EmptyValue
MOutLocs[i] = new ValueIDNum[NumLocs];
MInLocs[i] = new ValueIDNum[NumLocs];
}
@@ -3577,7 +2918,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// storing the computed live-ins / live-outs into the array-of-arrays. We use
// both live-ins and live-outs for decision making in the variable value
// dataflow problem.
- mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+ buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer);
// Patch up debug phi numbers, turning unknown block-live-in values into
// either live-through machine values, or PHIs.
@@ -3626,6 +2967,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
// the order is unimportant, it just has to be stable.
+ unsigned VarAssignCount = 0;
for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
auto *MBB = OrderToBB[I];
auto *VTracker = &vlocs[MBB->getNumber()];
@@ -3643,24 +2985,42 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
ScopeToVars[Scope].insert(Var);
ScopeToBlocks[Scope].insert(VTracker->MBB);
ScopeToDILocation[Scope] = ScopeLoc;
+ ++VarAssignCount;
}
}
- // OK. Iterate over scopes: there might be something to be said for
- // ordering them by size/locality, but that's for the future. For each scope,
- // solve the variable value problem, producing a map of variables to values
- // in SavedLiveIns.
- for (auto &P : ScopeToVars) {
- vlocDataflow(P.first, ScopeToDILocation[P.first], P.second,
- ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
- vlocs);
- }
+ bool Changed = false;
+
+ // If we have an extremely large number of variable assignments and blocks,
+ // bail out at this point. We've burnt some time doing analysis already,
+ // however we should cut our losses.
+ if ((unsigned)MaxNumBlocks > InputBBLimit &&
+ VarAssignCount > InputDbgValLimit) {
+ LLVM_DEBUG(dbgs() << "Disabling InstrRefBasedLDV: " << MF.getName()
+ << " has " << MaxNumBlocks << " basic blocks and "
+ << VarAssignCount
+ << " variable assignments, exceeding limits.\n");
+ } else {
+ // Compute the extended ranges, iterating over scopes. There might be
+ // something to be said for ordering them by size/locality, but that's for
+ // the future. For each scope, solve the variable value problem, producing
+ // a map of variables to values in SavedLiveIns.
+ for (auto &P : ScopeToVars) {
+ buildVLocValueMap(ScopeToDILocation[P.first], P.second,
+ ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+ vlocs);
+ }
+
+ // Using the computed value locations and variable values for each block,
+ // create the DBG_VALUE instructions representing the extended variable
+ // locations.
+ emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
- // Using the computed value locations and variable values for each block,
- // create the DBG_VALUE instructions representing the extended variable
- // locations.
- emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
+ // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
+ Changed = TTracker->Transfers.size() != 0;
+ }
+ // Common clean-up of memory.
for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
delete[] MOutLocs[Idx];
delete[] MInLocs[Idx];
@@ -3668,9 +3028,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
delete[] MOutLocs;
delete[] MInLocs;
- // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
- bool Changed = TTracker->Transfers.size() != 0;
-
delete MTracker;
delete TTracker;
MTracker = nullptr;
@@ -3883,10 +3240,8 @@ public:
/// vector.
static void FindPredecessorBlocks(LDVSSABlock *BB,
SmallVectorImpl<LDVSSABlock *> *Preds) {
- for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(),
- E = BB->BB.pred_end();
- PI != E; ++PI)
- Preds->push_back(BB->Updater.getSSALDVBlock(*PI));
+ for (MachineBasicBlock *Pred : BB->BB.predecessors())
+ Preds->push_back(BB->Updater.getSSALDVBlock(Pred));
}
/// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
new file mode 100644
index 000000000000..d96ef6d4f6e5
--- /dev/null
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -0,0 +1,1051 @@
+//===- InstrRefBasedImpl.h - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+#include "LiveDebugValues.h"
+
+class TransferTracker;
+
+// Forward dec of unit test class, so that we can peer into the LDV object.
+class InstrRefLDVTest;
+
+namespace LiveDebugValues {
+
+class MLocTracker;
+
+using namespace llvm;
+
+/// Handle-class for a particular "location". This value-type uniquely
+/// symbolises a register or stack location, allowing manipulation of locations
+/// without concern for where that location is. Practically, this allows us to
+/// treat the state of the machine at a particular point as an array of values,
+/// rather than a map of values.
+class LocIdx {
+ unsigned Location;
+
+ // Default constructor is private, initializing to an illegal location number.
+ // Use only for "not an entry" elements in IndexedMaps.
+ LocIdx() : Location(UINT_MAX) {}
+
+public:
+#define NUM_LOC_BITS 24
+ LocIdx(unsigned L) : Location(L) {
+ assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
+ }
+
+ static LocIdx MakeIllegalLoc() { return LocIdx(); }
+ static LocIdx MakeTombstoneLoc() {
+ LocIdx L = LocIdx();
+ --L.Location;
+ return L;
+ }
+
+ bool isIllegal() const { return Location == UINT_MAX; }
+
+ uint64_t asU64() const { return Location; }
+
+ bool operator==(unsigned L) const { return Location == L; }
+
+ bool operator==(const LocIdx &L) const { return Location == L.Location; }
+
+ bool operator!=(unsigned L) const { return !(*this == L); }
+
+ bool operator!=(const LocIdx &L) const { return !(*this == L); }
+
+ bool operator<(const LocIdx &Other) const {
+ return Location < Other.Location;
+ }
+};
+
+// The location at which a spilled value resides. It consists of a register and
+// an offset.
+struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return std::make_pair(SpillBase, SpillOffset) ==
+ std::make_pair(Other.SpillBase, Other.SpillOffset);
+ }
+ bool operator<(const SpillLoc &Other) const {
+ return std::make_tuple(SpillBase, SpillOffset.getFixed(),
+ SpillOffset.getScalable()) <
+ std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
+ Other.SpillOffset.getScalable());
+ }
+};
+
+/// Unique identifier for a value defined by an instruction, as a value type.
+/// Casts back and forth to a uint64_t. Probably replacable with something less
+/// bit-constrained. Each value identifies the instruction and machine location
+/// where the value is defined, although there may be no corresponding machine
+/// operand for it (ex: regmasks clobbering values). The instructions are
+/// one-based, and definitions that are PHIs have instruction number zero.
+///
+/// The obvious limits of a 1M block function or 1M instruction blocks are
+/// problematic; but by that point we should probably have bailed out of
+/// trying to analyse the function.
+class ValueIDNum {
+ union {
+ struct {
+ uint64_t BlockNo : 20; /// The block where the def happens.
+ uint64_t InstNo : 20; /// The Instruction where the def happens.
+ /// One based, is distance from start of block.
+ uint64_t LocNo
+ : NUM_LOC_BITS; /// The machine location where the def happens.
+ } s;
+ uint64_t Value;
+ } u;
+
+ static_assert(sizeof(u) == 8, "Badly packed ValueIDNum?");
+
+public:
+ // Default-initialize to EmptyValue. This is necessary to make IndexedMaps
+ // of values to work.
+ ValueIDNum() { u.Value = EmptyValue.asU64(); }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) {
+ u.s = {Block, Inst, Loc};
+ }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) {
+ u.s = {Block, Inst, Loc.asU64()};
+ }
+
+ uint64_t getBlock() const { return u.s.BlockNo; }
+ uint64_t getInst() const { return u.s.InstNo; }
+ uint64_t getLoc() const { return u.s.LocNo; }
+ bool isPHI() const { return u.s.InstNo == 0; }
+
+ uint64_t asU64() const { return u.Value; }
+
+ static ValueIDNum fromU64(uint64_t v) {
+ ValueIDNum Val;
+ Val.u.Value = v;
+ return Val;
+ }
+
+ bool operator<(const ValueIDNum &Other) const {
+ return asU64() < Other.asU64();
+ }
+
+ bool operator==(const ValueIDNum &Other) const {
+ return u.Value == Other.u.Value;
+ }
+
+ bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
+
+ std::string asString(const std::string &mlocname) const {
+ return Twine("Value{bb: ")
+ .concat(Twine(u.s.BlockNo)
+ .concat(Twine(", inst: ")
+ .concat((u.s.InstNo ? Twine(u.s.InstNo)
+ : Twine("live-in"))
+ .concat(Twine(", loc: ").concat(
+ Twine(mlocname)))
+ .concat(Twine("}")))))
+ .str();
+ }
+
+ static ValueIDNum EmptyValue;
+ static ValueIDNum TombstoneValue;
+};
+
+/// Thin wrapper around an integer -- designed to give more type safety to
+/// spill location numbers.
+class SpillLocationNo {
+public:
+ explicit SpillLocationNo(unsigned SpillNo) : SpillNo(SpillNo) {}
+ unsigned SpillNo;
+ unsigned id() const { return SpillNo; }
+
+ bool operator<(const SpillLocationNo &Other) const {
+ return SpillNo < Other.SpillNo;
+ }
+
+ bool operator==(const SpillLocationNo &Other) const {
+ return SpillNo == Other.SpillNo;
+ }
+ bool operator!=(const SpillLocationNo &Other) const {
+ return !(*this == Other);
+ }
+};
+
+/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
+/// the the value, and Boolean of whether or not it's indirect.
+class DbgValueProperties {
+public:
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
+ : DIExpr(DIExpr), Indirect(Indirect) {}
+
+ /// Extract properties from an existing DBG_VALUE instruction.
+ DbgValueProperties(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ DIExpr = MI.getDebugExpression();
+ Indirect = MI.getOperand(1).isImm();
+ }
+
+ bool operator==(const DbgValueProperties &Other) const {
+ return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
+ }
+
+ bool operator!=(const DbgValueProperties &Other) const {
+ return !(*this == Other);
+ }
+
+ const DIExpression *DIExpr;
+ bool Indirect;
+};
+
+/// Class recording the (high level) _value_ of a variable. Identifies either
+/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
+/// This class also stores meta-information about how the value is qualified.
+/// Used to reason about variable values when performing the second
+/// (DebugVariable specific) dataflow analysis.
+class DbgValue {
+public:
+ /// If Kind is Def, the value number that this value is based on. VPHIs set
+ /// this field to EmptyValue if there is no machine-value for this VPHI, or
+ /// the corresponding machine-value if there is one.
+ ValueIDNum ID;
+ /// If Kind is Const, the MachineOperand defining this value.
+ Optional<MachineOperand> MO;
+ /// For a NoVal or VPHI DbgValue, which block it was generated in.
+ int BlockNo;
+
+ /// Qualifiers for the ValueIDNum above.
+ DbgValueProperties Properties;
+
+ typedef enum {
+ Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
+ Def, // This value is defined by an inst, or is a PHI value.
+ Const, // A constant value contained in the MachineOperand field.
+ VPHI, // Incoming values to BlockNo differ, those values must be joined by
+ // a PHI in this block.
+ NoVal, // Empty DbgValue indicating an unknown value. Used as initializer,
+ // before dominating blocks values are propagated in.
+ } KindT;
+ /// Discriminator for whether this is a constant or an in-program value.
+ KindT Kind;
+
+ DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
+ : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) {
+ assert(Kind == Def);
+ }
+
+ DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo),
+ Properties(Prop), Kind(Kind) {
+ assert(Kind == NoVal || Kind == VPHI);
+ }
+
+ DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop),
+ Kind(Kind) {
+ assert(Kind == Const);
+ }
+
+ DbgValue(const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop),
+ Kind(Kind) {
+ assert(Kind == Undef &&
+ "Empty DbgValue constructor must pass in Undef kind");
+ }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
+
+ bool operator==(const DbgValue &Other) const {
+ if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
+ return false;
+ else if (Kind == Def && ID != Other.ID)
+ return false;
+ else if (Kind == NoVal && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == Const)
+ return MO->isIdenticalTo(*Other.MO);
+ else if (Kind == VPHI && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == VPHI && ID != Other.ID)
+ return false;
+
+ return true;
+ }
+
+ bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+};
+
+class LocIdxToIndexFunctor {
+public:
+ using argument_type = LocIdx;
+ unsigned operator()(const LocIdx &L) const { return L.asU64(); }
+};
+
+/// Tracker for what values are in machine locations. Listens to the Things
+/// being Done by various instructions, and maintains a table of what machine
+/// locations have what values (as defined by a ValueIDNum).
+///
+/// There are potentially a much larger number of machine locations on the
+/// target machine than the actual working-set size of the function. On x86 for
+/// example, we're extremely unlikely to want to track values through control
+/// or debug registers. To avoid doing so, MLocTracker has several layers of
+/// indirection going on, described below, to avoid unnecessarily tracking
+/// any location.
+///
+/// Here's a sort of diagram of the indexes, read from the bottom up:
+///
+/// Size on stack Offset on stack
+/// \ /
+/// Stack Idx (Where in slot is this?)
+/// /
+/// /
+/// Slot Num (%stack.0) /
+/// FrameIdx => SpillNum /
+/// \ /
+/// SpillID (int) Register number (int)
+/// \ /
+/// LocationID => LocIdx
+/// |
+/// LocIdx => ValueIDNum
+///
+/// The aim here is that the LocIdx => ValueIDNum vector is just an array of
+/// values in numbered locations, so that later analyses can ignore whether the
+/// location is a register or otherwise. To map a register / spill location to
+/// a LocIdx, you have to use the (sparse) LocationID => LocIdx map. And to
+/// build a LocationID for a stack slot, you need to combine identifiers for
+/// which stack slot it is and where within that slot is being described.
+///
+/// Register mask operands cause trouble by technically defining every register;
+/// various hacks are used to avoid tracking registers that are never read and
+/// only written by regmasks.
+class MLocTracker {
+public:
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const TargetLowering &TLI;
+
+ /// IndexedMap type, mapping from LocIdx to ValueIDNum.
+ using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
+
+ /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
+ /// packed, entries only exist for locations that are being tracked.
+ LocToValueType LocIdxToIDNum;
+
+ /// "Map" of machine location IDs (i.e., raw register or spill number) to the
+ /// LocIdx key / number for that location. There are always at least as many
+ /// as the number of registers on the target -- if the value in the register
+ /// is not being tracked, then the LocIdx value will be zero. New entries are
+ /// appended if a new spill slot begins being tracked.
+ /// This, and the corresponding reverse map persist for the analysis of the
+ /// whole function, and is necessarying for decoding various vectors of
+ /// values.
+ std::vector<LocIdx> LocIDToLocIdx;
+
+ /// Inverse map of LocIDToLocIdx.
+ IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
+
+ /// When clobbering register masks, we chose to not believe the machine model
+ /// and don't clobber SP. Do the same for SP aliases, and for efficiency,
+ /// keep a set of them here.
+ SmallSet<Register, 8> SPAliases;
+
+ /// Unique-ification of spill. Used to number them -- their LocID number is
+ /// the index in SpillLocs minus one plus NumRegs.
+ UniqueVector<SpillLoc> SpillLocs;
+
+ // If we discover a new machine location, assign it an mphi with this
+ // block number.
+ unsigned CurBB;
+
+ /// Cached local copy of the number of registers the target has.
+ unsigned NumRegs;
+
+ /// Number of slot indexes the target has -- distinct segments of a stack
+ /// slot that can take on the value of a subregister, when a super-register
+ /// is written to the stack.
+ unsigned NumSlotIdxes;
+
+ /// Collection of register mask operands that have been observed. Second part
+ /// of pair indicates the instruction that they happened in. Used to
+ /// reconstruct where defs happened if we start tracking a location later
+ /// on.
+ SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
+
+ /// Pair for describing a position within a stack slot -- first the size in
+ /// bits, then the offset.
+ typedef std::pair<unsigned short, unsigned short> StackSlotPos;
+
+ /// Map from a size/offset pair describing a position in a stack slot, to a
+ /// numeric identifier for that position. Allows easier identification of
+ /// individual positions.
+ DenseMap<StackSlotPos, unsigned> StackSlotIdxes;
+
+ /// Inverse of StackSlotIdxes.
+ DenseMap<unsigned, StackSlotPos> StackIdxesToPos;
+
+ /// Iterator for locations and the values they contain. Dereferencing
+ /// produces a struct/pair containing the LocIdx key for this location,
+ /// and a reference to the value currently stored. Simplifies the process
+ /// of seeking a particular location.
+ class MLocIterator {
+ LocToValueType &ValueMap;
+ LocIdx Idx;
+
+ public:
+ class value_type {
+ public:
+ value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) {}
+ const LocIdx Idx; /// Read-only index of this location.
+ ValueIDNum &Value; /// Reference to the stored value at this location.
+ };
+
+ MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
+ : ValueMap(ValueMap), Idx(Idx) {}
+
+ bool operator==(const MLocIterator &Other) const {
+ assert(&ValueMap == &Other.ValueMap);
+ return Idx == Other.Idx;
+ }
+
+ bool operator!=(const MLocIterator &Other) const {
+ return !(*this == Other);
+ }
+
+ void operator++() { Idx = LocIdx(Idx.asU64() + 1); }
+
+ value_type operator*() { return value_type(Idx, ValueMap[LocIdx(Idx)]); }
+ };
+
+ MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI, const TargetLowering &TLI);
+
+ /// Produce location ID number for a Register. Provides some small amount of
+ /// type safety.
+ /// \param Reg The register we're looking up.
+ unsigned getLocID(Register Reg) { return Reg.id(); }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \param SpillSubReg Subregister within the spill we're addressing.
+ unsigned getLocID(SpillLocationNo Spill, unsigned SpillSubReg) {
+ unsigned short Size = TRI.getSubRegIdxSize(SpillSubReg);
+ unsigned short Offs = TRI.getSubRegIdxOffset(SpillSubReg);
+ return getLocID(Spill, {Size, Offs});
+ }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \apram SpillIdx size/offset within the spill slot to be addressed.
+ unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end());
+ SlotNo += StackSlotIdxes[Idx];
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Given a spill number, and a slot within the spill, calculate the ID number
+ /// for that location.
+ unsigned getSpillIDWithIdx(SpillLocationNo Spill, unsigned Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ SlotNo += Idx;
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Return the spill number that a location ID corresponds to.
+ SpillLocationNo locIDToSpill(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ // Truncate away the index part, leaving only the spill number.
+ ID /= NumSlotIdxes;
+ return SpillLocationNo(ID + 1); // The UniqueVector is one-based.
+ }
+
+ /// Returns the spill-slot size/offs that a location ID corresponds to.
+ StackSlotPos locIDToSpillIdx(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ unsigned Idx = ID % NumSlotIdxes;
+ return StackIdxesToPos.find(Idx)->second;
+ }
+
+ unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
+
+ /// Reset all locations to contain a PHI value at the designated block. Used
+ /// sometimes for actual PHI values, othertimes to indicate the block entry
+ /// value (before any more information is known).
+ void setMPhis(unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ for (auto Location : locations())
+ Location.Value = {CurBB, 0, Location.Idx};
+ }
+
+ /// Load values for each location from array of ValueIDNums. Take current
+ /// bbnum just in case we read a value from a hitherto untouched register.
+ void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ // Iterate over all tracked locations, and load each locations live-in
+ // value into our local index.
+ for (auto Location : locations())
+ Location.Value = Locs[Location.Idx.asU64()];
+ }
+
+ /// Wipe any un-necessary location records after traversing a block.
+ void reset(void) {
+ // We could reset all the location values too; however either loadFromArray
+ // or setMPhis should be called before this object is re-used. Just
+ // clear Masks, they're definitely not needed.
+ Masks.clear();
+ }
+
+ /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
+ /// the information in this pass uninterpretable.
+ void clear(void) {
+ reset();
+ LocIDToLocIdx.clear();
+ LocIdxToLocID.clear();
+ LocIdxToIDNum.clear();
+ // SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from
+ // 0
+ SpillLocs = decltype(SpillLocs)();
+ StackSlotIdxes.clear();
+ StackIdxesToPos.clear();
+
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ }
+
+ /// Set a locaiton to a certain value.
+ void setMLoc(LocIdx L, ValueIDNum Num) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ LocIdxToIDNum[L] = Num;
+ }
+
+ /// Read the value of a particular location
+ ValueIDNum readMLoc(LocIdx L) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ return LocIdxToIDNum[L];
+ }
+
+ /// Create a LocIdx for an untracked register ID. Initialize it to either an
+ /// mphi value representing a live-in, or a recent register mask clobber.
+ LocIdx trackRegister(unsigned ID);
+
+ LocIdx lookupOrTrackRegister(unsigned ID) {
+ LocIdx &Index = LocIDToLocIdx[ID];
+ if (Index.isIllegal())
+ Index = trackRegister(ID);
+ return Index;
+ }
+
+ /// Is register R currently tracked by MLocTracker?
+ bool isRegisterTracked(Register R) {
+ LocIdx &Index = LocIDToLocIdx[R];
+ return !Index.isIllegal();
+ }
+
+ /// Record a definition of the specified register at the given block / inst.
+ /// This doesn't take a ValueIDNum, because the definition and its location
+ /// are synonymous.
+ void defReg(Register R, unsigned BB, unsigned Inst) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ ValueIDNum ValueID = {BB, Inst, Idx};
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Set a register to a value number. To be used if the value number is
+ /// known in advance.
+ void setReg(Register R, ValueIDNum ValueID) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ ValueIDNum readReg(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Reset a register value to zero / empty. Needed to replicate the
+ /// VarLoc implementation where a copy to/from a register effectively
+ /// clears the contents of the source register. (Values can only have one
+ /// machine location in VarLocBasedImpl).
+ void wipeRegister(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = LocIDToLocIdx[ID];
+ LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
+ }
+
+ /// Determine the LocIdx of an existing register.
+ LocIdx getRegMLoc(Register R) {
+ unsigned ID = getLocID(R);
+ assert(ID < LocIDToLocIdx.size());
+ assert(LocIDToLocIdx[ID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[ID];
+ }
+
+ /// Record a RegMask operand being executed. Defs any register we currently
+ /// track, stores a pointer to the mask in case we have to account for it
+ /// later.
+ void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID);
+
+ /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
+ SpillLocationNo getOrTrackSpillLoc(SpillLoc L);
+
+ // Get LocIdx of a spill ID.
+ LocIdx getSpillMLoc(unsigned SpillID) {
+ assert(LocIDToLocIdx[SpillID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[SpillID];
+ }
+
+ /// Return true if Idx is a spill machine location.
+ bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; }
+
+ MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); }
+
+ MLocIterator end() {
+ return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
+ }
+
+ /// Return a range over all locations currently tracked.
+ iterator_range<MLocIterator> locations() {
+ return llvm::make_range(begin(), end());
+ }
+
+ std::string LocIdxToName(LocIdx Idx) const;
+
+ std::string IDAsString(const ValueIDNum &Num) const;
+
+#ifndef NDEBUG
+ LLVM_DUMP_METHOD void dump();
+
+ LLVM_DUMP_METHOD void dump_mloc_map();
+#endif
+
+ /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
+ /// information in \pProperties, for variable Var. Don't insert it anywhere,
+ /// just return the builder for it.
+ MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
+ const DbgValueProperties &Properties);
+};
+
+/// Collection of DBG_VALUEs observed when traversing a block. Records each
+/// variable and the value the DBG_VALUE refers to. Requires the machine value
+/// location dataflow algorithm to have run already, so that values can be
+/// identified.
+class VLocTracker {
+public:
+ /// Map DebugVariable to the latest Value it's defined to have.
+ /// Needs to be a MapVector because we determine order-in-the-input-MIR from
+ /// the order in this container.
+ /// We only retain the last DbgValue in each block for each variable, to
+ /// determine the blocks live-out variable value. The Vars container forms the
+ /// transfer function for this block, as part of the dataflow analysis. The
+ /// movement of values between locations inside of a block is handled at a
+ /// much later stage, in the TransferTracker class.
+ MapVector<DebugVariable, DbgValue> Vars;
+ DenseMap<DebugVariable, const DILocation *> Scopes;
+ MachineBasicBlock *MBB = nullptr;
+
+public:
+ VLocTracker() {}
+
+ void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<ValueIDNum> ID) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
+ : DbgValue(Properties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+
+ void defVar(const MachineInstr &MI, const MachineOperand &MO) {
+ // Only DBG_VALUEs can define constant-valued variables.
+ assert(MI.isDebugValue());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+ DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+};
+
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+// XXX XXX docs
+class InstrRefBasedLDV : public LDVImpl {
+public:
+ friend class ::InstrRefLDVTest;
+
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Machine location/value transfer function, a mapping of which locations
+ /// are assigned which new values.
+ using MLocTransferMap = SmallDenseMap<LocIdx, ValueIDNum>;
+
+ /// Live in/out structure for the variable values: a per-block map of
+ /// variables to their values.
+ using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>;
+
+ using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+
+ /// Type for a live-in value: the predecessor block, and its value.
+ using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
+
+ /// Vector (per block) of a collection (inner smallvector) of live-ins.
+ /// Used as the result type for the variable value dataflow problem.
+ using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+
+private:
+ MachineDominatorTree *DomTree;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ const MachineFrameInfo *MFI;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ TargetPassConfig *TPC;
+
+ // An empty DIExpression. Used default / placeholder DbgValueProperties
+ // objects, as we can't have null expressions.
+ const DIExpression *EmptyExpr;
+
+ /// Object to track machine locations as we step through a block. Could
+ /// probably be a field rather than a pointer, as it's always used.
+ MLocTracker *MTracker = nullptr;
+
+ /// Number of the current block LiveDebugValues is stepping through.
+ unsigned CurBB;
+
+ /// Number of the current instruction LiveDebugValues is evaluating.
+ unsigned CurInst;
+
+ /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
+ /// steps through a block. Reads the values at each location from the
+ /// MLocTracker object.
+ VLocTracker *VTracker = nullptr;
+
+ /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
+ /// between locations during stepping, creates new DBG_VALUEs when values move
+ /// location.
+ TransferTracker *TTracker = nullptr;
+
+ /// Blocks which are artificial, i.e. blocks which exclusively contain
+ /// instructions without DebugLocs, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
+ // Mapping of blocks to and from their RPOT order.
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder;
+ DenseMap<unsigned, unsigned> BBNumToRPO;
+
+ /// Pair of MachineInstr, and its 1-based offset into the containing block.
+ using InstAndNum = std::pair<const MachineInstr *, unsigned>;
+ /// Map from debug instruction number to the MachineInstr labelled with that
+ /// number, and its location within the function. Used to transform
+ /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
+ std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+
+ /// Record of where we observed a DBG_PHI instruction.
+ class DebugPHIRecord {
+ public:
+ uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
+ MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
+ ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
+ LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+
+ operator unsigned() const { return InstrNum; }
+ };
+
+ /// Map from instruction numbers defined by DBG_PHIs to a record of what that
+ /// DBG_PHI read and where. Populated and edited during the machine value
+ /// location problem -- we use LLVMs SSA Updater to fix changes by
+ /// optimizations that destroy PHI instructions.
+ SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
+
+ // Map of overlapping variable fragments.
+ OverlapMap OverlapFragments;
+ VarToFragments SeenFragments;
+
+ /// Tests whether this instruction is a spill to a stack slot.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
+ /// If a given instruction is identified as a spill, return the spill slot
+ /// and set \p Reg to the spilled register.
+ Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg);
+
+ /// Given a spill instruction, extract the spill slot information, ensure it's
+ /// tracked, and return the spill number.
+ SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI);
+
+ /// Observe a single instruction while stepping through a block.
+ void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
+ ValueIDNum **MLiveIns = nullptr);
+
+ /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugValue(const MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns);
+
+ /// Stores value-information about where this PHI occurred, and what
+ /// instruction number is associated with it.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugPHI(MachineInstr &MI);
+
+ /// Examines whether \p MI is copy instruction, and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferRegisterCopy(MachineInstr &MI);
+
+ /// Examines whether \p MI is stack spill or restore instruction, and
+ /// notifies trackers. \returns true if MI was recognized and processed.
+ bool transferSpillOrRestoreInst(MachineInstr &MI);
+
+ /// Examines \p MI for any registers that it defines, and notifies trackers.
+ void transferRegisterDef(MachineInstr &MI);
+
+ /// Copy one location to the other, accounting for movement of subregisters
+ /// too.
+ void performCopy(Register Src, Register Dst);
+
+ void accumulateFragmentMap(MachineInstr &MI);
+
+ /// Determine the machine value number referred to by (potentially several)
+ /// DBG_PHI instructions. Block duplication and tail folding can duplicate
+ /// DBG_PHIs, shifting the position where values in registers merge, and
+ /// forming another mini-ssa problem to solve.
+ /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
+ /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
+ /// \returns The machine value number at position Here, or None.
+ Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
+ ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns, MachineInstr &Here,
+ uint64_t InstrNum);
+
+ /// Step through the function, recording register definitions and movements
+ /// in an MLocTracker. Convert the observations into a per-block transfer
+ /// function in \p MLocTransfer, suitable for using with the machine value
+ /// location dataflow problem.
+ void
+ produceMLocTransferFunction(MachineFunction &MF,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks);
+
+ /// Solve the machine value location dataflow problem. Takes as input the
+ /// transfer functions in \p MLocTransfer. Writes the output live-in and
+ /// live-out arrays to the (initialized to zero) multidimensional arrays in
+ /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
+ /// number, the inner by LocIdx.
+ void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs,
+ ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Examine the stack indexes (i.e. offsets within the stack) to find the
+ /// basic units of interference -- like reg units, but for the stack.
+ void findStackIndexInterference(SmallVectorImpl<unsigned> &Slots);
+
+ /// Install PHI values into the live-in array for each block, according to
+ /// the IDF of each register.
+ void placeMLocPHIs(MachineFunction &MF,
+ SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Calculate the iterated-dominance-frontier for a set of defs, using the
+ /// existing LLVM facilities for this. Works for a single "value" or
+ /// machine/variable location.
+ /// \p AllBlocks Set of blocks where we might consume the value.
+ /// \p DefBlocks Set of blocks where the value/location is defined.
+ /// \p PHIBlocks Output set of blocks where PHIs must be placed.
+ void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks);
+
+ /// Perform a control flow join (lattice value meet) of the values in machine
+ /// locations at \p MBB. Follows the algorithm described in the file-comment,
+ /// reading live-outs of predecessors from \p OutLocs, the current live ins
+ /// from \p InLocs, and assigning the newly computed live ins back into
+ /// \p InLocs. \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ bool mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs);
+
+ /// Solve the variable value dataflow problem, for a single lexical scope.
+ /// Uses the algorithm from the file comment to resolve control flow joins
+ /// using PHI placement and value propagation. Reads the locations of machine
+ /// values from the \p MInLocs and \p MOutLocs arrays (see buildMLocValueMap)
+ /// and reads the variable values transfer function from \p AllTheVlocs.
+ /// Live-in and Live-out variable values are stored locally, with the live-ins
+ /// permanently stored to \p Output once a fixedpoint is reached.
+ /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
+ /// that we should be tracking.
+ /// \p AssignBlocks contains the set of blocks that aren't in \p DILoc's
+ /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
+ /// locations through.
+ void buildVLocValueMap(const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
+
+ /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the
+ /// live-in values coming from predecessors live-outs, and replaces any PHIs
+ /// already present in this blocks live-ins with a live-through value if the
+ /// PHI isn't needed.
+ /// \p LiveIn Old live-in value, overwritten with new one if live-in changes.
+ /// \returns true if any live-ins change value, either from value propagation
+ /// or PHI elimination.
+ bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DbgValue &LiveIn);
+
+ /// For the given block and live-outs feeding into it, try to find a
+ /// machine location where all the variable values join together.
+ /// \returns Value ID of a machine PHI if an appropriate one is available.
+ Optional<ValueIDNum>
+ pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+
+ /// Given the solutions to the two dataflow problems, machine value locations
+ /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
+ /// TransferTracker class over the function to produce live-in and transfer
+ /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
+ /// order given by AllVarsNumbering -- this could be any stable order, but
+ /// right now "order of appearence in function, when explored in RPO", so
+ /// that we can compare explictly against VarLocBasedImpl.
+ void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC);
+
+ /// Boilerplate computation of some initial sets, artifical blocks and
+ /// RPOT block ordering.
+ void initialSetup(MachineFunction &MF);
+
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
+
+public:
+ /// Default construct and initialize the pass.
+ InstrRefBasedLDV();
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+
+ bool isCalleeSaved(LocIdx L) const;
+
+ bool hasFoldedStackStore(const MachineInstr &MI) {
+ // Instruction must have a memory operand that's a stack slot, and isn't
+ // aliased, meaning it's a spill from regalloc instead of a variable.
+ // If it's aliased, we can't guarantee its value.
+ if (!MI.hasOneMemOperand())
+ return false;
+ auto *MemOperand = *MI.memoperands_begin();
+ return MemOperand->isStore() &&
+ MemOperand->getPseudoValue() &&
+ MemOperand->getPseudoValue()->kind() == PseudoSourceValue::FixedStack
+ && !MemOperand->getPseudoValue()->isAliased(MFI);
+ }
+
+ Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
+};
+
+} // namespace LiveDebugValues
+
+namespace llvm {
+using namespace LiveDebugValues;
+
+template <> struct DenseMapInfo<LocIdx> {
+ static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); }
+ static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); }
+
+ static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); }
+
+ static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; }
+};
+
+template <> struct DenseMapInfo<ValueIDNum> {
+ static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; }
+ static inline ValueIDNum getTombstoneKey() {
+ return ValueIDNum::TombstoneValue;
+ }
+
+ static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); }
+
+ static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
+ return A == B;
+ }
+};
+
+} // end namespace llvm
+
+#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 38e803d1abb5..691977dc34e6 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -40,6 +40,19 @@ static cl::opt<bool>
"normal DBG_VALUE inputs"),
cl::init(false));
+// Options to prevent pathological compile-time behavior. If InputBBLimit and
+// InputDbgValueLimit are both exceeded, range extension is disabled.
+static cl::opt<unsigned> InputBBLimit(
+ "livedebugvalues-input-bb-limit",
+ cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
+ cl::init(10000), cl::Hidden);
+static cl::opt<unsigned> InputDbgValueLimit(
+ "livedebugvalues-input-dbg-value-limit",
+ cl::desc(
+ "Maximum input DBG_VALUE insts supported by debug range extension"),
+ cl::init(50000), cl::Hidden);
+
+namespace {
/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
/// base class.
@@ -48,10 +61,7 @@ public:
static char ID;
LiveDebugValues();
- ~LiveDebugValues() {
- if (TheImpl)
- delete TheImpl;
- }
+ ~LiveDebugValues() {}
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -67,9 +77,12 @@ public:
}
private:
- LDVImpl *TheImpl;
+ std::unique_ptr<LDVImpl> InstrRefImpl;
+ std::unique_ptr<LDVImpl> VarLocImpl;
TargetPassConfig *TPC;
+ MachineDominatorTree MDT;
};
+} // namespace
char LiveDebugValues::ID = 0;
@@ -81,27 +94,26 @@ INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,
/// Default construct and initialize the pass.
LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
- TheImpl = nullptr;
+ InstrRefImpl =
+ std::unique_ptr<LDVImpl>(llvm::makeInstrRefBasedLiveDebugValues());
+ VarLocImpl = std::unique_ptr<LDVImpl>(llvm::makeVarLocBasedLiveDebugValues());
}
bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
- if (!TheImpl) {
- TPC = getAnalysisIfAvailable<TargetPassConfig>();
-
- bool InstrRefBased = false;
- if (TPC) {
- auto &TM = TPC->getTM<TargetMachine>();
- InstrRefBased = TM.Options.ValueTrackingVariableLocations;
- }
-
- // Allow the user to force selection of InstrRef LDV.
- InstrRefBased |= ForceInstrRefLDV;
-
- if (InstrRefBased)
- TheImpl = llvm::makeInstrRefBasedLiveDebugValues();
- else
- TheImpl = llvm::makeVarLocBasedLiveDebugValues();
+ bool InstrRefBased = MF.useDebugInstrRef();
+ // Allow the user to force selection of InstrRef LDV.
+ InstrRefBased |= ForceInstrRefLDV;
+
+ TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ LDVImpl *TheImpl = &*VarLocImpl;
+
+ MachineDominatorTree *DomTree = nullptr;
+ if (InstrRefBased) {
+ DomTree = &MDT;
+ MDT.calculate(MF);
+ TheImpl = &*InstrRefImpl;
}
- return TheImpl->ExtendRanges(MF, TPC);
+ return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit,
+ InputDbgValueLimit);
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index 9c910f180b9f..a5936c8a96f0 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -23,7 +24,9 @@ inline namespace SharedLiveDebugValues {
// implementation.
class LDVImpl {
public:
- virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0;
+ virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) = 0;
virtual ~LDVImpl() {}
};
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 1e6d65c18953..a632d3d9ce76 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -155,6 +155,7 @@
#include <cassert>
#include <cstdint>
#include <functional>
+#include <map>
#include <queue>
#include <tuple>
#include <utility>
@@ -166,18 +167,6 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-// Options to prevent pathological compile-time behavior. If InputBBLimit and
-// InputDbgValueLimit are both exceeded, range extension is disabled.
-static cl::opt<unsigned> InputBBLimit(
- "livedebugvalues-input-bb-limit",
- cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
- cl::init(10000), cl::Hidden);
-static cl::opt<unsigned> InputDbgValueLimit(
- "livedebugvalues-input-dbg-value-limit",
- cl::desc(
- "Maximum input DBG_VALUE insts supported by debug range extension"),
- cl::init(50000), cl::Hidden);
-
/// If \p Op is a stack or frame register return true, otherwise return false.
/// This is used to avoid basing the debug entry values on the registers, since
/// we do not support it at the moment.
@@ -296,6 +285,8 @@ private:
LexicalScopes LS;
VarLocSet::Allocator Alloc;
+ const MachineInstr *LastNonDbgMI;
+
enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
using FragmentInfo = DIExpression::FragmentInfo;
@@ -555,7 +546,6 @@ private:
EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg()
: Register(Loc.RegNo),
false));
- MOs.back().setIsDebug();
break;
case MachineLocKind::SpillLocKind: {
// Spills are indirect DBG_VALUEs, with a base register and offset.
@@ -565,9 +555,10 @@ private:
unsigned Base = Loc.SpillLocation.SpillBase;
auto *TRI = MF.getSubtarget().getRegisterInfo();
if (MI.isNonListDebugValue()) {
- DIExpr =
- TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset,
- Loc.SpillLocation.SpillOffset);
+ auto Deref = Indirect ? DIExpression::DerefAfter : 0;
+ DIExpr = TRI->prependOffsetExpression(
+ DIExpr, DIExpression::ApplyOffset | Deref,
+ Loc.SpillLocation.SpillOffset);
Indirect = true;
} else {
SmallVector<uint64_t, 4> Ops;
@@ -576,7 +567,6 @@ private:
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I);
}
MOs.push_back(MachineOperand::CreateReg(Base, false));
- MOs.back().setIsDebug();
break;
}
case MachineLocKind::ImmediateKind: {
@@ -626,7 +616,7 @@ private:
unsigned getRegIdx(Register Reg) const {
for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
if (Locs[Idx].Kind == MachineLocKind::RegisterKind &&
- Locs[Idx].Value.RegNo == Reg)
+ Register{static_cast<unsigned>(Locs[Idx].Value.RegNo)} == Reg)
return Idx;
llvm_unreachable("Could not find given Reg in Locs");
}
@@ -635,7 +625,7 @@ private:
/// add each of them to \p Regs and return true.
bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const {
bool AnyRegs = false;
- for (auto Loc : Locs)
+ for (const auto &Loc : Locs)
if (Loc.Kind == MachineLocKind::RegisterKind) {
Regs.push_back(Loc.Value.RegNo);
AnyRegs = true;
@@ -801,6 +791,10 @@ private:
LocIndex LocationID; ///< Location number for the transfer dest.
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
+ // Types for recording Entry Var Locations emitted by a single MachineInstr,
+ // as well as recording MachineInstr which last defined a register.
+ using InstToEntryLocMap = std::multimap<const MachineInstr *, LocIndex>;
+ using RegDefToInstMap = DenseMap<Register, MachineInstr *>;
// Types for recording sets of variable fragments that overlap. For a given
// local variable, we record all other fragments of that variable that could
@@ -974,13 +968,22 @@ private:
Register NewReg = Register());
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs);
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
- bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, const VarLoc &EntryVL);
+ void cleanupEntryValueTransfers(const MachineInstr *MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers);
+ void removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
VarLocsInRange &KillSet);
void recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
@@ -988,12 +991,16 @@ private:
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
OverlapMap &OLapMap);
@@ -1007,7 +1014,9 @@ private:
/// had their instruction creation deferred.
void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
- bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
public:
/// Default construct and initialize the pass.
@@ -1225,62 +1234,100 @@ VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return {Reg, Offset};
}
+/// Do cleanup of \p EntryValTransfers created by \p TRInst, by removing the
+/// Transfer, which uses the to-be-deleted \p EntryVL.
+void VarLocBasedLDV::cleanupEntryValueTransfers(
+ const MachineInstr *TRInst, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL, InstToEntryLocMap &EntryValTransfers) {
+ if (EntryValTransfers.empty() || TRInst == nullptr)
+ return;
+
+ auto TransRange = EntryValTransfers.equal_range(TRInst);
+ for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
+ const VarLoc &EmittedEV = VarLocIDs[TDPair.second];
+ if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) ==
+ std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo,
+ EmittedEV.Expr)) {
+ OpenRanges.erase(EmittedEV);
+ EntryValTransfers.erase(TRInst);
+ break;
+ }
+ }
+}
+
/// Try to salvage the debug entry value if we encounter a new debug value
/// describing the same parameter, otherwise stop tracking the value. Return
-/// true if we should stop tracking the entry value, otherwise return false.
-bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs,
- const VarLoc &EntryVL) {
+/// true if we should stop tracking the entry value and do the cleanup of
+/// emitted Entry Value Transfers, otherwise return false.
+void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
// Skip the DBG_VALUE which is the debug entry value itself.
- if (MI.isIdenticalTo(EntryVL.MI))
- return false;
+ if (&MI == &EntryVL.MI)
+ return;
// If the parameter's location is not register location, we can not track
- // the entry value any more. In addition, if the debug expression from the
- // DBG_VALUE is not empty, we can assume the parameter's value has changed
- // indicating that we should stop tracking its entry value as well.
- if (!MI.getDebugOperand(0).isReg() ||
- MI.getDebugExpression()->getNumElements() != 0)
- return true;
-
- // If the DBG_VALUE comes from a copy instruction that copies the entry value,
- // it means the parameter's value has not changed and we should be able to use
- // its entry value.
+ // the entry value any more. It doesn't have the TransferInst which defines
+ // register, so no Entry Value Transfers have been emitted already.
+ if (!MI.getDebugOperand(0).isReg())
+ return;
+
+ // Try to get non-debug instruction responsible for the DBG_VALUE.
+ const MachineInstr *TransferInst = nullptr;
Register Reg = MI.getDebugOperand(0).getReg();
- auto I = std::next(MI.getReverseIterator());
- const MachineOperand *SrcRegOp, *DestRegOp;
- if (I != MI.getParent()->rend()) {
+ if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end())
+ TransferInst = RegSetInstrs.find(Reg)->second;
+
+ // Case of the parameter's DBG_VALUE at the start of entry MBB.
+ if (!TransferInst && !LastNonDbgMI && MI.getParent()->isEntryBlock())
+ return;
+ // If the debug expression from the DBG_VALUE is not empty, we can assume the
+ // parameter's value has changed indicating that we should stop tracking its
+ // entry value as well.
+ if (MI.getDebugExpression()->getNumElements() == 0 && TransferInst) {
+ // If the DBG_VALUE comes from a copy instruction that copies the entry
+ // value, it means the parameter's value has not changed and we should be
+ // able to use its entry value.
// TODO: Try to keep tracking of an entry value if we encounter a propagated
// DBG_VALUE describing the copy of the entry value. (Propagated entry value
// does not indicate the parameter modification.)
- auto DestSrc = TII->isCopyInstr(*I);
- if (!DestSrc)
- return true;
-
- SrcRegOp = DestSrc->Source;
- DestRegOp = DestSrc->Destination;
- if (Reg != DestRegOp->getReg())
- return true;
-
- for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
- const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
- if (VL.isEntryValueCopyBackupReg(Reg) &&
- // Entry Values should not be variadic.
- VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
- return false;
+ auto DestSrc = TII->isCopyInstr(*TransferInst);
+ if (DestSrc) {
+ const MachineOperand *SrcRegOp, *DestRegOp;
+ SrcRegOp = DestSrc->Source;
+ DestRegOp = DestSrc->Destination;
+ if (Reg == DestRegOp->getReg()) {
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
+ if (VL.isEntryValueCopyBackupReg(Reg) &&
+ // Entry Values should not be variadic.
+ VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
+ return;
+ }
+ }
}
}
- return true;
+ LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+ MI.print(dbgs(), /*IsStandalone*/ false,
+ /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+ /*AddNewLine*/ true, TII));
+ cleanupEntryValueTransfers(TransferInst, OpenRanges, VarLocIDs, EntryVL,
+ EntryValTransfers);
+ OpenRanges.erase(EntryVL);
}
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs) {
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
if (!MI.isDebugValue())
return;
const DILocalVariable *Var = MI.getDebugVariable();
@@ -1297,13 +1344,8 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
if (Var->isParameter() && EntryValBackupID) {
const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()];
- if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
- LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
- MI.print(dbgs(), /*IsStandalone*/ false,
- /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
- /*AddNewLine*/ true, TII));
- OpenRanges.erase(EntryVL);
- }
+ removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL, EntryValTransfers,
+ RegSetInstrs);
}
if (all_of(MI.debug_operands(), [](const MachineOperand &MO) {
@@ -1351,7 +1393,7 @@ void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
- TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
VarLocsInRange &KillSet) {
// Do not insert entry value locations after a terminator.
if (MI.isTerminator())
@@ -1377,7 +1419,9 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr,
EntryVL.Locs[0].Value.RegNo);
LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc);
- Transfers.push_back({&MI, EntryValueIDs.back()});
+ assert(EntryValueIDs.size() == 1 &&
+ "EntryValue loc should not be variadic");
+ EntryValTransfers.insert({&MI, EntryValueIDs.back()});
OpenRanges.insert(EntryValueIDs, EntryLoc);
}
}
@@ -1454,9 +1498,11 @@ void VarLocBasedLDV::insertTransferDebugPair(
}
/// A definition of a register may mark the end of a range.
-void VarLocBasedLDV::transferRegisterDef(
- MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
- TransferMap &Transfers) {
+void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
// Meta Instructions do not affect the debug liveness of any register they
// define.
@@ -1479,6 +1525,8 @@ void VarLocBasedLDV::transferRegisterDef(
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
DeadRegs.insert(*RAI);
+ RegSetInstrs.erase(MO.getReg());
+ RegSetInstrs.insert({MO.getReg(), &MI});
} else if (MO.isRegMask()) {
RegMasks.push_back(MO.getRegMask());
}
@@ -1505,6 +1553,10 @@ void VarLocBasedLDV::transferRegisterDef(
});
if (AnyRegMaskKillsReg)
DeadRegs.insert(Reg);
+ if (AnyRegMaskKillsReg) {
+ RegSetInstrs.erase(Reg);
+ RegSetInstrs.insert({Reg, &MI});
+ }
}
}
@@ -1518,7 +1570,7 @@ void VarLocBasedLDV::transferRegisterDef(
if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.ShouldEmitDebugEntryValues())
- emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
+ emitEntryValues(MI, OpenRanges, VarLocIDs, EntryValTransfers, KillSet);
}
}
@@ -1851,9 +1903,15 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
/// This routine creates OpenRanges.
void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers) {
- transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+ if (!MI.isDebugInstr())
+ LastNonDbgMI = &MI;
+ transferDebugValue(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
@@ -2048,7 +2106,11 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
-bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
+ (void)DomTree;
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
if (!MF.getFunction().getSubprogram())
@@ -2079,6 +2141,10 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
// spills, copies and restores).
+ // Map responsible MI to attached Transfer emitted from Backup Entry Value.
+ InstToEntryLocMap EntryValTransfers;
+ // Map a Register to the last MI which clobbered it.
+ RegDefToInstMap RegSetInstrs;
VarToFragments SeenFragments;
@@ -2141,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
for (auto &MI : MBB)
if (MI.isDebugValue())
++NumInputDbgValues;
- if (NumInputDbgValues > InputDbgValueLimit) {
+ if (NumInputDbgValues > InputDbgValLimit) {
LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()
<< " has " << RPONumber << " basic blocks and "
<< NumInputDbgValues
@@ -2175,8 +2241,11 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
// operate with registers that correspond to user variables.
// First load any pending inlocs.
OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs);
+ LastNonDbgMI = nullptr;
+ RegSetInstrs.clear();
for (auto &MI : *MBB)
- process(MI, OpenRanges, VarLocIDs, Transfers);
+ process(MI, OpenRanges, VarLocIDs, Transfers, EntryValTransfers,
+ RegSetInstrs);
OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
@@ -2210,6 +2279,18 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
}
Transfers.clear();
+ // Add DBG_VALUEs created using Backup Entry Value location.
+ for (auto &TR : EntryValTransfers) {
+ MachineInstr *TRInst = const_cast<MachineInstr *>(TR.first);
+ assert(!TRInst->isTerminator() &&
+ "Cannot insert DBG_VALUE after terminator");
+ MachineBasicBlock *MBB = TRInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.second];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TRInst->getIterator(), MI);
+ }
+ EntryValTransfers.clear();
+
// Deferred inlocs will not have had any DBG_VALUE insts created; do
// that now.
flushPendingLocs(InLocs, VarLocIDs);
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 54058a547928..dcd546f9c6db 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -417,7 +417,7 @@ public:
void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect,
bool IsList, const DIExpression &Expr) {
SmallVector<unsigned> Locs;
- for (MachineOperand Op : LocMOs)
+ for (const MachineOperand &Op : LocMOs)
Locs.push_back(getLocationNo(Op));
DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr);
// Add a singular (Idx,Idx) -> value mapping.
@@ -1294,13 +1294,9 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {
static void removeDebugInstrs(MachineFunction &mf) {
for (MachineBasicBlock &MBB : mf) {
- for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
- if (!MBBI->isDebugInstr()) {
- ++MBBI;
- continue;
- }
- MBBI = MBB.erase(MBBI);
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ if (MI.isDebugInstr())
+ MBB.erase(&MI);
}
}
@@ -1314,12 +1310,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
// Have we been asked to track variable locations using instruction
// referencing?
- bool InstrRef = false;
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (TPC) {
- auto &TM = TPC->getTM<TargetMachine>();
- InstrRef = TM.Options.ValueTrackingVariableLocations;
- }
+ bool InstrRef = mf.useDebugInstrRef();
if (!pImpl)
pImpl = new LDVImpl(this);
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 1eed0ec5bbbe..9ded0fb6ae0a 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -592,21 +592,10 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
VNInfo *ValNo = I->valno;
if (I->start == Start) {
if (I->end == End) {
- if (RemoveDeadValNo) {
- // Check if val# is dead.
- bool isDead = true;
- for (const_iterator II = begin(), EE = end(); II != EE; ++II)
- if (II != I && II->valno == ValNo) {
- isDead = false;
- break;
- }
- if (isDead) {
- // Now that ValNo is dead, remove it.
- markValNoForDeletion(ValNo);
- }
- }
-
segments.erase(I); // Removed the whole Segment.
+
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
} else
I->start = End;
return;
@@ -627,13 +616,25 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
segments.insert(std::next(I), Segment(End, OldEnd, ValNo));
}
+LiveRange::iterator LiveRange::removeSegment(iterator I, bool RemoveDeadValNo) {
+ VNInfo *ValNo = I->valno;
+ I = segments.erase(I);
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
+ return I;
+}
+
+void LiveRange::removeValNoIfDead(VNInfo *ValNo) {
+ if (none_of(*this, [=](const Segment &S) { return S.valno == ValNo; }))
+ markValNoForDeletion(ValNo);
+}
+
/// removeValNo - Remove all the segments defined by the specified value#.
/// Also remove the value# from value# list.
void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- segments.erase(remove_if(*this, [ValNo](const Segment &S) {
- return S.valno == ValNo;
- }), end());
+ llvm::erase_if(segments,
+ [ValNo](const Segment &S) { return S.valno == ValNo; });
// Now that ValNo is dead, remove it.
markValNoForDeletion(ValNo);
}
@@ -1019,7 +1020,7 @@ void LiveRange::print(raw_ostream &OS) const {
// Print value number info.
if (getNumValNums()) {
- OS << " ";
+ OS << ' ';
unsigned vnum = 0;
for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
++i, ++vnum) {
@@ -1038,8 +1039,8 @@ void LiveRange::print(raw_ostream &OS) const {
}
void LiveInterval::SubRange::print(raw_ostream &OS) const {
- OS << " L" << PrintLaneMask(LaneMask) << ' '
- << static_cast<const LiveRange&>(*this);
+ OS << " L" << PrintLaneMask(LaneMask) << ' '
+ << static_cast<const LiveRange &>(*this);
}
void LiveInterval::print(raw_ostream &OS) const {
@@ -1048,7 +1049,7 @@ void LiveInterval::print(raw_ostream &OS) const {
// Print subranges
for (const SubRange &SR : subranges())
OS << SR;
- OS << " weight:" << Weight;
+ OS << " weight:" << Weight;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index dfa523d4bf41..50b31e1eb247 100644
--- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
- return is_contained(*InterferingVRegs, VirtReg);
+ return is_contained(InterferingVRegs, VirtReg);
}
// Collect virtual registers in this union that interfere with this
@@ -124,14 +124,11 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
// 3. Iterators left at the last seen intersection.
//
-unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs) {
- if (!InterferingVRegs)
- InterferingVRegs.emplace();
-
+unsigned
+LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {
// Fast path return if we already have the desired information.
- if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
- return InterferingVRegs->size();
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
// Set up iterators on the first call.
if (!CheckedFirstInterference) {
@@ -160,14 +157,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
- InterferingVRegs->push_back(VReg);
- if (InterferingVRegs->size() >= MaxInterferingRegs)
- return InterferingVRegs->size();
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
}
// This LiveUnion segment is no longer interesting.
if (!(++LiveUnionI).valid()) {
SeenAllInterferences = true;
- return InterferingVRegs->size();
+ return InterferingVRegs.size();
}
}
@@ -188,7 +185,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
- return InterferingVRegs->size();
+ return InterferingVRegs.size();
}
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 23036c2b115f..2f97386b6d18 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -1571,15 +1571,14 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
LaneBitmask LaneMask) {
LiveInterval::iterator LII = LR.find(EndIdx);
SlotIndex lastUseIdx;
- if (LII == LR.begin()) {
- // This happens when the function is called for a subregister that only
- // occurs _after_ the range that is to be repaired.
- return;
- }
- if (LII != LR.end() && LII->start < EndIdx)
+ if (LII != LR.end() && LII->start < EndIdx) {
lastUseIdx = LII->end;
- else
+ } else if (LII == LR.begin()) {
+ // We may not have a liverange at all if this is a subregister untouched
+ // between \p Begin and \p End.
+ } else {
--LII;
+ }
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
@@ -1593,10 +1592,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
// FIXME: This doesn't currently handle early-clobber or multiple removed
// defs inside of the region to repair.
- for (MachineInstr::mop_iterator OI = MI.operands_begin(),
- OE = MI.operands_end();
- OI != OE; ++OI) {
- const MachineOperand &MO = *OI;
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() != Reg)
continue;
@@ -1608,17 +1604,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
if (MO.isDef()) {
if (!isStartValid) {
if (LII->end.isDead()) {
- SlotIndex prevStart;
+ LII = LR.removeSegment(LII, true);
if (LII != LR.begin())
- prevStart = std::prev(LII)->start;
-
- // FIXME: This could be more efficient if there was a
- // removeSegment method that returned an iterator.
- LR.removeSegment(*LII, true);
- if (prevStart.isValid())
- LII = LR.find(prevStart);
- else
- LII = LR.begin();
+ --LII;
} else {
LII->start = instrIdx.getRegSlot();
LII->valno->def = instrIdx.getRegSlot();
@@ -1656,6 +1644,10 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
}
}
}
+
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ if (!isStartValid && LII->end.isDead())
+ LR.removeSegment(*LII, true);
}
void
@@ -1678,22 +1670,33 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
Indexes->repairIndexesInRange(MBB, Begin, End);
+ // Make sure a live interval exists for all register operands in the range.
+ SmallVector<Register> RegsToRepair(OrigRegs.begin(), OrigRegs.end());
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
if (MI.isDebugOrPseudoInstr())
continue;
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) &&
- !hasInterval(MOI->getReg())) {
- createAndComputeVirtRegInterval(MOI->getReg());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ Register Reg = MO.getReg();
+ // If the new instructions refer to subregs but the old instructions did
+ // not, throw away any old live interval so it will be recomputed with
+ // subranges.
+ if (MO.getSubReg() && hasInterval(Reg) &&
+ !getInterval(Reg).hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ removeInterval(Reg);
+ if (!hasInterval(Reg)) {
+ createAndComputeVirtRegInterval(Reg);
+ // Don't bother to repair a freshly calculated live interval.
+ erase_value(RegsToRepair, Reg);
+ }
}
}
}
- for (Register Reg : OrigRegs) {
+ for (Register Reg : RegsToRepair) {
if (!Reg.isVirtual())
continue;
@@ -1704,6 +1707,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (LiveInterval::SubRange &S : LI.subranges())
repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask);
+ LI.removeEmptySubRanges();
repairOldRegInRange(Begin, End, EndIdx, LI, Reg);
}
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index c0c7848139e4..d4848f16dcf2 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -81,22 +81,24 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg() && !O->isDebug()) {
+ if (O->isReg()) {
+ if (O->isDebug())
+ continue;
Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
continue;
if (O->isDef()) {
// Note, dead defs are still recorded. The caller should decide how to
// handle them.
Clobbers.push_back(std::make_pair(Reg, &*O));
} else {
- if (!O->isKill())
- continue;
assert(O->isUse());
- removeReg(Reg);
+ if (O->isKill())
+ removeReg(Reg);
}
- } else if (O->isRegMask())
+ } else if (O->isRegMask()) {
removeRegsInMask(*O, &Clobbers);
+ }
}
// Add defs to the set.
@@ -250,7 +252,7 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ for (const MachineInstr &MI : llvm::reverse(MBB))
LiveRegs.stepBackward(MI);
}
@@ -287,7 +289,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
// Recompute dead flags.
for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
if (!MO->isReg() || !MO->isDef() || MO->isDebug())
@@ -296,7 +298,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(Register::isPhysicalRegister(Reg));
+ assert(Reg.isPhysical());
bool IsNotLive = LiveRegs.available(MRI, Reg);
@@ -325,7 +327,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(Register::isPhysicalRegister(Reg));
+ assert(Reg.isPhysical());
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsKill(IsNotLive);
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 64a2dd275643..d91ff734ad8f 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -107,7 +107,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
- UseIdx = UseIdx.getRegSlot(true);
+ UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
@@ -305,17 +305,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
}
+ bool HasLiveVRegUses = false;
+
// Check for live intervals that may shrink
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (!MOI->isReg())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MOI->getReg();
+ Register Reg = MO.getReg();
if (!Register::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
- if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+ if (Reg && MO.readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
- else if (MOI->isDef())
+ else if (MO.isDef())
LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
continue;
}
@@ -325,12 +326,14 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) ||
- (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI))))
+ if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
ToShrink.insert(&LI);
+ else if (MO.readsReg())
+ HasLiveVRegUses = true;
// Remove defined value.
- if (MOI->isDef()) {
+ if (MO.isDef()) {
if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
TheDelegate->LRE_WillShrinkVirtReg(LI.reg());
LIS.removeVRegDefAt(LI, Idx);
@@ -362,7 +365,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// the inst for remat of other siblings. The inst is saved in
// LiveRangeEdit::DeadRemats and will be deleted after all the
// allocations of the func are done.
- if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
+ // However, immediately delete instructions which have unshrunk virtual
+ // register uses. That may provoke RA to split an interval at the KILL
+ // and later result in an invalid live segment end.
+ if (isOrigDef && DeadRemats && !HasLiveVRegUses &&
+ TII.isTriviallyReMaterializable(*MI, AA)) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
@@ -405,8 +412,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
break;
// Shrink just one live interval. Then delete new dead defs.
- LiveInterval *LI = ToShrink.back();
- ToShrink.pop_back();
+ LiveInterval *LI = ToShrink.pop_back_val();
if (foldAsLoad(LI, Dead))
continue;
unsigned VReg = LI->reg();
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 7181dbc9c870..51ba4b7e53eb 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -119,8 +119,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
while (!WorkList.empty()) {
- MachineBasicBlock *Pred = WorkList.back();
- WorkList.pop_back();
+ MachineBasicBlock *Pred = WorkList.pop_back_val();
MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
}
}
@@ -484,8 +483,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
- Register Reg = Defs.back();
- Defs.pop_back();
+ Register Reg = Defs.pop_back_val();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
@@ -671,6 +669,86 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
return false;
}
+void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
+ assert(Reg.isVirtual());
+
+ VarInfo &VI = getVarInfo(Reg);
+ VI.AliveBlocks.clear();
+ VI.Kills.clear();
+
+ MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg);
+ MachineBasicBlock &DefBB = *DefMI.getParent();
+
+ // Handle the case where all uses have been removed.
+ if (MRI->use_nodbg_empty(Reg)) {
+ VI.Kills.push_back(&DefMI);
+ DefMI.addRegisterDead(Reg, nullptr);
+ return;
+ }
+ DefMI.clearRegisterDeads(Reg);
+
+ // Initialize a worklist of BBs that Reg is live-to-end of. (Here
+ // "live-to-end" means Reg is live at the end of a block even if it is only
+ // live because of phi uses in a successor. This is different from isLiveOut()
+ // which does not consider phi uses.)
+ SmallVector<MachineBasicBlock *> LiveToEndBlocks;
+ SparseBitVector<> UseBlocks;
+ for (auto &UseMO : MRI->use_nodbg_operands(Reg)) {
+ UseMO.setIsKill(false);
+ MachineInstr &UseMI = *UseMO.getParent();
+ MachineBasicBlock &UseBB = *UseMI.getParent();
+ UseBlocks.set(UseBB.getNumber());
+ if (UseMI.isPHI()) {
+ // If Reg is used in a phi then it is live-to-end of the corresponding
+ // predecessor.
+ unsigned Idx = UseMI.getOperandNo(&UseMO);
+ LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB());
+ } else if (&UseBB == &DefBB) {
+ // A non-phi use in the same BB as the single def must come after the def.
+ } else {
+ // Otherwise Reg must be live-to-end of all predecessors.
+ LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end());
+ }
+ }
+
+ // Iterate over the worklist adding blocks to AliveBlocks.
+ bool LiveToEndOfDefBB = false;
+ while (!LiveToEndBlocks.empty()) {
+ MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val();
+ if (&BB == &DefBB) {
+ LiveToEndOfDefBB = true;
+ continue;
+ }
+ if (VI.AliveBlocks.test(BB.getNumber()))
+ continue;
+ VI.AliveBlocks.set(BB.getNumber());
+ LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end());
+ }
+
+ // Recompute kill flags. For each block in which Reg is used but is not
+ // live-through, find the last instruction that uses Reg. Ignore phi nodes
+ // because they should not be included in Kills.
+ for (unsigned UseBBNum : UseBlocks) {
+ if (VI.AliveBlocks.test(UseBBNum))
+ continue;
+ MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum);
+ if (&UseBB == &DefBB && LiveToEndOfDefBB)
+ continue;
+ for (auto &MI : reverse(UseBB)) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+ if (MI.isPHI())
+ break;
+ if (MI.readsRegister(Reg)) {
+ assert(!MI.killsRegister(Reg));
+ MI.addRegisterKilled(Reg, nullptr);
+ VI.Kills.push_back(&MI);
+ break;
+ }
+ }
+ }
+}
+
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
diff --git a/llvm/lib/CodeGen/LoopTraversal.cpp b/llvm/lib/CodeGen/LoopTraversal.cpp
index 9490dfc40a82..0d400253c652 100644
--- a/llvm/lib/CodeGen/LoopTraversal.cpp
+++ b/llvm/lib/CodeGen/LoopTraversal.cpp
@@ -39,8 +39,7 @@ LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {
bool Primary = true;
Workqueue.push_back(MBB);
while (!Workqueue.empty()) {
- MachineBasicBlock *ActiveMBB = &*Workqueue.back();
- Workqueue.pop_back();
+ MachineBasicBlock *ActiveMBB = Workqueue.pop_back_val();
bool Done = isBlockDone(ActiveMBB);
MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));
for (MachineBasicBlock *Succ : ActiveMBB->successors()) {
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index 62e9c6b629d3..dce64ab9f5ca 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -52,6 +52,16 @@ MVT llvm::getMVTForLLT(LLT Ty) {
Ty.getNumElements());
}
+EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
+ LLVMContext &Ctx) {
+ if (Ty.isVector()) {
+ EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
+ return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
+ }
+
+ return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
+}
+
LLT llvm::getLLTForMVT(MVT Ty) {
if (!Ty.isVector())
return LLT::scalar(Ty.getSizeInBits());
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 8ef6aca602a1..3ec8c627f131 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -38,10 +38,6 @@
using namespace llvm;
-namespace llvm {
-extern char &MIRCanonicalizerID;
-} // namespace llvm
-
#define DEBUG_TYPE "mir-canonicalizer"
static cl::opt<unsigned>
@@ -332,8 +328,8 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
continue;
std::vector<MachineOperand *> Uses;
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
- Uses.push_back(&*UI);
+ for (MachineOperand &MO : MRI.use_operands(Dst))
+ Uses.push_back(&MO);
for (auto *MO : Uses)
MO->setReg(Src);
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 87fde7d39a60..0ca820f160aa 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -261,6 +261,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("liveout", MIToken::kw_liveout)
.Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("inlineasm-br-indirect-target",
+ MIToken::kw_inlineasm_br_indirect_target)
.Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
.Case("liveins", MIToken::kw_liveins)
.Case("successors", MIToken::kw_successors)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 68425b41c3fb..70d17f819ce3 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -116,6 +116,7 @@ struct MIToken {
kw_liveout,
kw_address_taken,
kw_landing_pad,
+ kw_inlineasm_br_indirect_target,
kw_ehfunclet_entry,
kw_liveins,
kw_successors,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 34e1f9225d42..1a04e1ca56a9 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -498,7 +498,7 @@ public:
MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
- bool parseAlignment(unsigned &Alignment);
+ bool parseAlignment(uint64_t &Alignment);
bool parseAddrspace(unsigned &Addrspace);
bool parseSectionID(Optional<MBBSectionID> &SID);
bool parseOperandsOffset(MachineOperand &Op);
@@ -674,9 +674,10 @@ bool MIParser::parseBasicBlockDefinition(
lex();
bool HasAddressTaken = false;
bool IsLandingPad = false;
+ bool IsInlineAsmBrIndirectTarget = false;
bool IsEHFuncletEntry = false;
Optional<MBBSectionID> SectionID;
- unsigned Alignment = 0;
+ uint64_t Alignment = 0;
BasicBlock *BB = nullptr;
if (consumeIfPresent(MIToken::lparen)) {
do {
@@ -690,6 +691,10 @@ bool MIParser::parseBasicBlockDefinition(
IsLandingPad = true;
lex();
break;
+ case MIToken::kw_inlineasm_br_indirect_target:
+ IsInlineAsmBrIndirectTarget = true;
+ lex();
+ break;
case MIToken::kw_ehfunclet_entry:
IsEHFuncletEntry = true;
lex();
@@ -737,6 +742,7 @@ bool MIParser::parseBasicBlockDefinition(
if (HasAddressTaken)
MBB->setHasAddressTaken();
MBB->setIsEHPad(IsLandingPad);
+ MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
if (SectionID.hasValue()) {
MBB->setSectionID(SectionID.getValue());
@@ -1011,10 +1017,6 @@ bool MIParser::parse(MachineInstr *&MI) {
Optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
- if ((OpCode == TargetOpcode::DBG_VALUE ||
- OpCode == TargetOpcode::DBG_VALUE_LIST) &&
- MO.isReg())
- MO.setIsDebug();
Operands.push_back(
ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -2898,16 +2900,16 @@ bool MIParser::parseOffset(int64_t &Offset) {
return false;
}
-bool MIParser::parseAlignment(unsigned &Alignment) {
+bool MIParser::parseAlignment(uint64_t &Alignment) {
assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
lex();
if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
return error("expected an integer literal after 'align'");
- if (getUnsigned(Alignment))
+ if (getUint64(Alignment))
return true;
lex();
- if (!isPowerOf2_32(Alignment))
+ if (!isPowerOf2_64(Alignment))
return error("expected a power-of-2 literal after 'align'");
return false;
@@ -3261,7 +3263,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMachinePointerInfo(Ptr))
return true;
}
- unsigned BaseAlignment =
+ uint64_t BaseAlignment =
(Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1);
AAMDNodes AAInfo;
MDNode *Range = nullptr;
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index d77104752880..6221b5929301 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -454,6 +454,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
if (YamlMF.FailedISel)
MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (YamlMF.FailsVerification)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 2a78bb62762a..f1369396e37f 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -217,6 +217,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::Selected);
YamlMF.FailedISel = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel);
+ YamlMF.FailsVerification = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
MachineModuleSlotTracker MST(&MF);
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
new file mode 100644
index 000000000000..90ecc6fc68fc
--- /dev/null
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -0,0 +1,343 @@
+//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MIRSampleProfile loader, mainly
+// for flow sensitive SampleFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRSampleProfile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::sampleprofutil;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "fs-profile-loader"
+
+static cl::opt<bool> ShowFSBranchProb(
+ "show-fs-branchprob", cl::Hidden, cl::init(false),
+ cl::desc("Print setting flow sensitive branch probabilities"));
+static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
+ "fs-profile-debug-prob-diff-threshold", cl::init(10),
+ cl::desc("Only show debug message if the branch probility is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> FSProfileDebugBWThreshold(
+ "fs-profile-debug-bw-threshold", cl::init(10000),
+ cl::desc("Only show debug message if the source branch weight is greater "
+ " than this value."));
+
+static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI before MIR loader"));
+static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI after MIR loader"));
+
+char MIRProfileLoaderPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
+ "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
+
+FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
+ std::string RemappingFile,
+ FSDiscriminatorPass P) {
+ return new MIRProfileLoaderPass(File, RemappingFile, P);
+}
+
+namespace llvm {
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi={none | fraction | integer | count}
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+namespace afdo_detail {
+template <> struct IRTraits<MachineBasicBlock> {
+ using InstructionT = MachineInstr;
+ using BasicBlockT = MachineBasicBlock;
+ using FunctionT = MachineFunction;
+ using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
+ using LoopT = MachineLoop;
+ using LoopInfoPtrT = MachineLoopInfo *;
+ using DominatorTreePtrT = MachineDominatorTree *;
+ using PostDominatorTreePtrT = MachinePostDominatorTree *;
+ using PostDominatorTreeT = MachinePostDominatorTree;
+ using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
+ using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
+ using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
+ static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
+ return GraphTraits<const MachineFunction *>::getEntryNode(F);
+ }
+ static PredRangeT getPredecessors(MachineBasicBlock *BB) {
+ return BB->predecessors();
+ }
+ static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
+ return BB->successors();
+ }
+};
+} // namespace afdo_detail
+
+class MIRProfileLoader final
+ : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+public:
+ void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
+ MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
+ MachineOptimizationRemarkEmitter *MORE) {
+ DT = MDT;
+ PDT = MPDT;
+ LI = MLI;
+ BFI = MBFI;
+ ORE = MORE;
+ }
+ void setFSPass(FSDiscriminatorPass Pass) {
+ P = Pass;
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+ }
+
+ MIRProfileLoader(StringRef Name, StringRef RemapName)
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
+ }
+
+ void setBranchProbs(MachineFunction &F);
+ bool runOnFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool isValid() const { return ProfileIsValid; }
+
+protected:
+ friend class SampleCoverageTracker;
+
+ /// Hold the information of the basic block frequency.
+ MachineBlockFrequencyInfo *BFI;
+
+ /// PassNum is the sequence number this pass is called, start from 1.
+ FSDiscriminatorPass P;
+
+ // LowBit in the FS discriminator used by this instance. Note the number is
+ // 0-based. Base discrimnator use bit 0 to bit 11.
+ unsigned LowBit;
+ // HighwBit in the FS discriminator used by this instance. Note the number
+ // is 0-based.
+ unsigned HighBit;
+
+ bool ProfileIsValid = true;
+};
+
+template <>
+void SampleProfileLoaderBaseImpl<
+ MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+
+void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
+ for (auto &BI : F) {
+ MachineBasicBlock *BB = &BI;
+ if (BB->succ_size() < 2)
+ continue;
+ const MachineBasicBlock *EC = EquivalenceClass[BB];
+ uint64_t BBWeight = BlockWeights[EC];
+ uint64_t SumEdgeWeight = 0;
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ Edge E = std::make_pair(BB, Succ);
+ SumEdgeWeight += EdgeWeights[E];
+ }
+
+ if (BBWeight != SumEdgeWeight) {
+ LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
+ << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
+ << "\n");
+ BBWeight = SumEdgeWeight;
+ }
+ if (BBWeight == 0) {
+ LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ continue;
+ }
+
+#ifndef NDEBUG
+ uint64_t BBWeightOrig = BBWeight;
+#endif
+ uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
+ uint32_t Factor = 1;
+ if (BBWeight > MaxWeight) {
+ Factor = BBWeight / MaxWeight + 1;
+ BBWeight /= Factor;
+ LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
+ }
+
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ MachineBasicBlock *Succ = *SI;
+ Edge E = std::make_pair(BB, Succ);
+ uint64_t EdgeWeight = EdgeWeights[E];
+ EdgeWeight /= Factor;
+
+ assert(BBWeight >= EdgeWeight &&
+ "BBweight is larger than EdgeWeight -- should not happen.\n");
+
+ BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
+ BranchProbability NewProb(EdgeWeight, BBWeight);
+ if (OldProb == NewProb)
+ continue;
+ BB->setSuccProbability(SI, NewProb);
+#ifndef NDEBUG
+ if (!ShowFSBranchProb)
+ continue;
+ bool Show = false;
+ BranchProbability Diff;
+ if (OldProb > NewProb)
+ Diff = OldProb - NewProb;
+ else
+ Diff = NewProb - OldProb;
+ Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
+ Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
+
+ auto DIL = BB->findBranchDebugLoc();
+ auto SuccDIL = Succ->findBranchDebugLoc();
+ if (Show) {
+ dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
+ << Succ->getNumber() << "): ";
+ if (DIL)
+ dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn();
+ if (SuccDIL)
+ dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
+ << ":" << SuccDIL->getColumn();
+ dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
+ << "\n";
+ }
+#endif
+ }
+ }
+}
+
+bool MIRProfileLoader::doInitialization(Module &M) {
+ auto &Ctx = M.getContext();
+
+ auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
+ RemappingFilename);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+
+ Reader = std::move(ReaderOrErr.get());
+ Reader->setModule(&M);
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ Reader->getSummary();
+
+ return true;
+}
+
+bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+ Function &Func = MF.getFunction();
+ clearFunctionData(false);
+ Samples = Reader->getSamplesFor(Func);
+ if (!Samples || Samples->empty())
+ return false;
+
+ if (getFunctionLoc(MF) == 0)
+ return false;
+
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
+
+ // Set the new BPI, BFI.
+ setBranchProbs(MF);
+
+ return Changed;
+}
+
+} // namespace llvm
+
+MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName,
+ std::string RemappingFileName,
+ FSDiscriminatorPass P)
+ : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
+ MIRSampleLoader(
+ std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+}
+
+bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
+ if (!MIRSampleLoader->isValid())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
+ << MF.getFunction().getName() << "\n");
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MIRSampleLoader->setInitVals(
+ &getAnalysis<MachineDominatorTree>(),
+ &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
+ MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+
+ MF.RenumberBlocks();
+ if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
+ }
+
+ bool Changed = MIRSampleLoader->runOnFunction(MF);
+
+ if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
+ }
+
+ return Changed;
+}
+
+bool MIRProfileLoaderPass::doInitialization(Module &M) {
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
+ << "\n");
+
+ MIRSampleLoader->setFSPass(P);
+ return MIRSampleLoader->doInitialization(M);
+}
+
+void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c6914dcd0e54..23c511aaa056 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -134,9 +134,8 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (MachineBasicBlock::instr_iterator
- I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
- I->AddRegOperandsToUseLists(RegInfo);
+ for (MachineInstr &MI : N->instrs())
+ MI.AddRegOperandsToUseLists(RegInfo);
}
void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
@@ -281,8 +280,8 @@ MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) {
}
bool MachineBasicBlock::hasEHPadSuccessor() const {
- for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
- if ((*I)->isEHPad())
+ for (const MachineBasicBlock *Succ : successors())
+ if (Succ->isEHPad())
return true;
return false;
}
@@ -517,6 +516,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
os << "landing-pad";
hasAttributes = true;
}
+ if (isInlineAsmBrIndirectTarget()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "inlineasm-br-indirect-target";
+ hasAttributes = true;
+ }
if (isEHFuncletEntry()) {
os << (hasAttributes ? ", " : " (");
os << "ehfunclet-entry";
@@ -1037,17 +1041,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
MachineInstr *MI = &*I;
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || OI->getReg() == 0 ||
- !OI->isUse() || !OI->isKill() || OI->isUndef())
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
+ MO.isUndef())
continue;
- Register Reg = OI->getReg();
+ Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
- LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);
- OI->setIsKill(false);
+ LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
+ MO.setIsKill(false);
}
}
}
@@ -1058,12 +1061,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
I != E; ++I) {
MachineInstr *MI = &*I;
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || OI->getReg() == 0)
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() == 0)
continue;
- Register Reg = OI->getReg();
+ Register Reg = MO.getReg();
if (!is_contained(UsedRegs, Reg))
UsedRegs.push_back(Reg);
}
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f61142d202eb..8a1b4031642d 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1185,7 +1185,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// The integrated tail duplication is really designed for increasing
// fallthrough from predecessors from Succ to its successors. We may need
// other machanism to handle different cases.
- if (Succ->succ_size() == 0)
+ if (Succ->succ_empty())
return true;
// Plus the already placed predecessor.
@@ -2050,6 +2050,8 @@ MachineBlockPlacement::findBestLoopTopHelper(
BlockChain &HeaderChain = *BlockToChain[OldTop];
if (!LoopBlockSet.count(*HeaderChain.begin()))
return OldTop;
+ if (OldTop != *HeaderChain.begin())
+ return OldTop;
LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
<< "\n");
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index cb2e18e8c813..0fcb07252d0e 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -514,41 +514,38 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
SmallVector<unsigned, 2> ImplicitDefsToUpdate;
SmallVector<unsigned, 2> ImplicitDefs;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
-
- if (!isCSECandidate(MI))
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isCSECandidate(&MI))
continue;
- bool FoundCSE = VNT.count(MI);
+ bool FoundCSE = VNT.count(&MI);
if (!FoundCSE) {
// Using trivial copy propagation to find more CSE opportunities.
- if (PerformTrivialCopyPropagation(MI, MBB)) {
+ if (PerformTrivialCopyPropagation(&MI, MBB)) {
Changed = true;
// After coalescing MI itself may become a copy.
- if (MI->isCopyLike())
+ if (MI.isCopyLike())
continue;
// Try again to see if CSE is possible.
- FoundCSE = VNT.count(MI);
+ FoundCSE = VNT.count(&MI);
}
}
// Commute commutable instructions.
bool Commuted = false;
- if (!FoundCSE && MI->isCommutable()) {
- if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
+ if (!FoundCSE && MI.isCommutable()) {
+ if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
Commuted = true;
FoundCSE = VNT.count(NewMI);
- if (NewMI != MI) {
+ if (NewMI != &MI) {
// New instruction. It doesn't need to be kept.
NewMI->eraseFromParent();
Changed = true;
} else if (!FoundCSE)
// MI was changed but it didn't help, commute it back!
- (void)TII->commuteInstruction(*MI);
+ (void)TII->commuteInstruction(MI);
}
}
@@ -559,8 +556,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
SmallSet<MCRegister, 8> PhysRefs;
PhysDefVector PhysDefs;
bool PhysUseDef = false;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
- PhysDefs, PhysUseDef)) {
+ if (FoundCSE &&
+ hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
FoundCSE = false;
// ... Unless the CS is local or is in the sole predecessor block
@@ -569,23 +566,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// This can never be the case if the instruction both uses and
// defines the same physical register, which was detected above.
if (!PhysUseDef) {
- unsigned CSVN = VNT.lookup(MI);
+ unsigned CSVN = VNT.lookup(&MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
FoundCSE = true;
}
}
if (!FoundCSE) {
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
continue;
}
// Found a common subexpression, eliminate it.
- unsigned CSVN = VNT.lookup(MI);
+ unsigned CSVN = VNT.lookup(&MI);
MachineInstr *CSMI = Exps[CSVN];
- LLVM_DEBUG(dbgs() << "Examining: " << *MI);
+ LLVM_DEBUG(dbgs() << "Examining: " << MI);
LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
// Prevent CSE-ing non-local convergent instructions.
@@ -597,20 +594,20 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// definition, so it's necessary to use `isConvergent` to prevent illegally
// CSE-ing the subset of `isConvergent` instructions which do fall into this
// extended definition.
- if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) {
+ if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
"different BBs, avoid CSE!\n");
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
continue;
}
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
- for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
Register OldReg = MO.getReg();
@@ -635,7 +632,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
Register::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
- if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
@@ -674,7 +671,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
for (const auto &PhysDef : PhysDefs)
- if (!MI->getOperand(PhysDef.first).isDead())
+ if (!MI.getOperand(PhysDef.first).isDead())
CSMI->getOperand(PhysDef.first).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
@@ -687,8 +684,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// Since we eliminated MI, and reused a register imp-def'd by CSMI
// (here %nzcv), that register, if it was killed before MI, should have
// that kill flag removed, because it's lifetime was extended.
- if (CSMI->getParent() == MI->getParent()) {
- for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
+ if (CSMI->getParent() == MI.getParent()) {
+ for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
for (auto ImplicitDef : ImplicitDefs)
if (MachineOperand *MO = II->findRegisterUseOperand(
ImplicitDef, /*isKill=*/true, TRI))
@@ -711,7 +708,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
++NumCrossBBCSEs;
}
- MI->eraseFromParent();
+ MI.eraseFromParent();
++NumCSEs;
if (!PhysRefs.empty())
++NumPhysCSEs;
@@ -719,8 +716,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
++NumCommutes;
Changed = true;
} else {
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
}
CSEPairs.clear();
ImplicitDefsToUpdate.clear();
@@ -807,19 +804,16 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
- if (!isPRECandidate(MI))
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isPRECandidate(&MI))
continue;
- if (!PREMap.count(MI)) {
- PREMap[MI] = MBB;
+ if (!PREMap.count(&MI)) {
+ PREMap[&MI] = MBB;
continue;
}
- auto MBB1 = PREMap[MI];
+ auto MBB1 = PREMap[&MI];
assert(
!DT->properlyDominates(MBB, MBB1) &&
"MBB cannot properly dominate MBB1 while DFS through dominators tree!");
@@ -844,17 +838,17 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
// it's necessary to use `isConvergent` to prevent illegally PRE-ing the
// subset of `isConvergent` instructions which do fall into this
// extended definition.
- if (MI->isConvergent() && CMBB != MBB)
+ if (MI.isConvergent() && CMBB != MBB)
continue;
- assert(MI->getOperand(0).isDef() &&
+ assert(MI.getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
- Register VReg = MI->getOperand(0).getReg();
+ Register VReg = MI.getOperand(0).getReg();
Register NewReg = MRI->cloneVirtualRegister(VReg);
- if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
continue;
MachineInstr &NewMI =
- TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
// When hoisting, make sure we don't carry the debug location of
// the original instruction, as that's not correct and can cause
@@ -864,7 +858,7 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
NewMI.getOperand(0).setReg(NewReg);
- PREMap[MI] = CMBB;
+ PREMap[&MI] = CMBB;
++NumPREs;
Changed = true;
}
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 10b74f5f47f5..7c83bacd80d9 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
if (!UseI.isCopy())
return false;
+ const TargetRegisterClass *CopySrcRC =
+ TRI->getMinimalPhysRegClass(CopySrcReg);
+ const TargetRegisterClass *UseDstRC =
+ TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
+ const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
+
+ // If cross copy register class is not the same as copy source register class
+ // then it is not possible to copy the register directly and requires a cross
+ // register class copy. Fowarding this copy without checking register class of
+ // UseDst may create additional cross register copies when expanding the copy
+ // instruction in later passes.
+ if (CopySrcRC != CrossCopyRC) {
+ const TargetRegisterClass *CopyDstRC =
+ TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
+
+ // Check if UseDstRC matches the necessary register class to copy from
+ // CopySrc's register class. If so then forwarding the copy will not
+ // introduce any cross-class copys. Else if CopyDstRC matches then keep the
+ // copy and do not forward. If neither UseDstRC or CopyDstRC matches then
+ // we may need a cross register copy later but we do not worry about it
+ // here.
+ if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
+ return false;
+ }
+
/// COPYs don't have register class constraints, so if the user instruction
/// is a COPY, we just try to avoid introducing additional cross-class
/// COPYs. For example:
@@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
///
/// so we have reduced the number of cross-class COPYs and potentially
/// introduced a nop COPY that can be removed.
- const TargetRegisterClass *UseDstRC =
- TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
-
const TargetRegisterClass *SuperRC = UseDstRC;
for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
SuperRC; SuperRC = *SuperRCI++)
@@ -554,6 +576,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
MOUse.setReg(CopySrcReg);
if (!CopySrc.isRenamable())
MOUse.setIsRenamable(false);
+ MOUse.setIsUndef(CopySrc.isUndef());
LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
@@ -571,19 +594,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
<< "\n");
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Analyze copies (which don't overlap themselves).
- if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg())) {
- assert(MI->getOperand(0).getReg().isPhysical() &&
- MI->getOperand(1).getReg().isPhysical() &&
+ if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg())) {
+ assert(MI.getOperand(0).getReg().isPhysical() &&
+ MI.getOperand(1).getReg().isPhysical() &&
"MachineCopyPropagation should be run after register allocation!");
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Def = MI.getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI.getOperand(1).getReg().asMCReg();
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
@@ -600,31 +620,31 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// %ecx = COPY %eax
// =>
// %ecx = COPY %eax
- if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
+ if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
continue;
- forwardUses(*MI);
+ forwardUses(MI);
// Src may have been changed by forwardUses()
- Src = MI->getOperand(1).getReg().asMCReg();
+ Src = MI.getOperand(1).getReg().asMCReg();
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- ReadRegister(Src, *MI, RegularUse);
- for (const MachineOperand &MO : MI->implicit_operands()) {
+ ReadRegister(Src, MI, RegularUse);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
- ReadRegister(Reg, *MI, RegularUse);
+ ReadRegister(Reg, MI, RegularUse);
}
- LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
// Copy is now a candidate for deletion.
if (!MRI->isReserved(Def))
- MaybeDeadCopies.insert(MI);
+ MaybeDeadCopies.insert(&MI);
// If 'Def' is previously source of another copy, then this earlier copy's
// source is no longer available. e.g.
@@ -634,7 +654,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// ...
// %xmm2 = copy %xmm9
Tracker.clobberRegister(Def, *TRI);
- for (const MachineOperand &MO : MI->implicit_operands()) {
+ for (const MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
MCRegister Reg = MO.getReg().asMCReg();
@@ -643,29 +663,29 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clobberRegister(Reg, *TRI);
}
- Tracker.trackCopy(MI, *TRI);
+ Tracker.trackCopy(&MI, *TRI);
continue;
}
// Clobber any earlyclobber regs first.
- for (const MachineOperand &MO : MI->operands())
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isEarlyClobber()) {
MCRegister Reg = MO.getReg().asMCReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
if (MO.isTied())
- ReadRegister(Reg, *MI, RegularUse);
+ ReadRegister(Reg, MI, RegularUse);
Tracker.clobberRegister(Reg, *TRI);
}
- forwardUses(*MI);
+ forwardUses(MI);
// Not a copy.
SmallVector<Register, 2> Defs;
const MachineOperand *RegMask = nullptr;
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask())
RegMask = &MO;
if (!MO.isReg())
@@ -681,7 +701,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Defs.push_back(Reg.asMCReg());
continue;
} else if (MO.readsReg())
- ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse);
+ ReadRegister(Reg.asMCReg(), MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp
index c8845d838282..28cff2a4f3f3 100644
--- a/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/llvm/lib/CodeGen/MachineDominators.cpp
@@ -73,7 +73,7 @@ void MachineDominatorTree::releaseMemory() {
void MachineDominatorTree::verifyAnalysis() const {
if (DT && VerifyMachineDomInfo)
- if (!DT->verify(DomTreeT::VerificationLevel::Basic)) {
+ if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) {
errs() << "MachineDominatorTree verification failed\n";
abort();
}
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 0a454b68aca3..366d06871245 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -99,6 +99,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
case P::Selected: return "Selected";
case P::TracksLiveness: return "TracksLiveness";
case P::TiedOpsRewritten: return "TiedOpsRewritten";
+ case P::FailsVerification: return "FailsVerification";
}
llvm_unreachable("Invalid machine function property");
}
@@ -129,8 +130,8 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
const Function &F) {
- if (F.hasFnAttribute(Attribute::StackAlignment))
- return F.getFnStackAlignment();
+ if (auto MA = F.getFnStackAlign())
+ return MA->value();
return STI->getFrameLowering()->getStackAlign().value();
}
@@ -745,9 +746,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
// Add filters in a list.
auto *CVal = cast<Constant>(Val);
SmallVector<const GlobalValue *, 4> FilterList;
- for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
- II != IE; ++II)
- FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+ for (const Use &U : CVal->operands())
+ FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts()));
addFilterTypeInfo(LandingPad, FilterList);
}
@@ -973,6 +973,9 @@ void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
unsigned Subreg) {
// Catch any accidental self-loops.
assert(A.first != B.first);
+ // Don't allow any substitutions _from_ the memory operand number.
+ assert(A.second != DebugOperandMemNumber);
+
DebugValueSubstitutions.push_back({A, B, Subreg});
}
@@ -1148,17 +1151,17 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
// locations.
;
} else {
- // Assert that this is the entry block. If it isn't, then there is some
- // code construct we don't recognise that deals with physregs across
- // blocks.
+ // Assert that this is the entry block, or an EH pad. If it isn't, then
+ // there is some code construct we don't recognise that deals with physregs
+ // across blocks.
assert(!State.first.isVirtual());
- assert(&*InsertBB.getParent()->begin() == &InsertBB);
+ assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad());
}
// Create DBG_PHI for specified physreg.
auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
TII.get(TargetOpcode::DBG_PHI));
- Builder.addReg(State.first, RegState::Debug);
+ Builder.addReg(State.first);
unsigned NewNum = getNewDebugInstrNum();
Builder.addImm(NewNum);
return ApplySubregisters({NewNum, 0u});
@@ -1171,10 +1174,9 @@ void MachineFunction::finalizeDebugInstrRefs() {
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
MI.setDesc(RefII);
MI.getOperand(1).ChangeToRegister(0, false);
- MI.getOperand(0).setIsDebug();
};
- if (!getTarget().Options.ValueTrackingVariableLocations)
+ if (!useDebugInstrRef())
return;
for (auto &MBB : *this) {
@@ -1221,6 +1223,27 @@ void MachineFunction::finalizeDebugInstrRefs() {
}
}
+bool MachineFunction::useDebugInstrRef() const {
+ // Disable instr-ref at -O0: it's very slow (in compile time). We can still
+ // have optimized code inlined into this unoptimized code, however with
+ // fewer and less aggressive optimizations happening, coverage and accuracy
+ // should not suffer.
+ if (getTarget().getOptLevel() == CodeGenOpt::None)
+ return false;
+
+ // Don't use instr-ref if this function is marked optnone.
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ if (getTarget().Options.ValueTrackingVariableLocations)
+ return true;
+
+ return false;
+}
+
+// Use one million as a high / reserved number.
+const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
+
/// \}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 0707945e7fb7..5c4f75e9ceb9 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -294,6 +294,9 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
NewMO->setIsEarlyClobber(true);
}
+ // Ensure debug instructions set debug flag on register uses.
+ if (NewMO->isUse() && isDebugInstr())
+ NewMO->setIsDebug();
}
}
@@ -2111,11 +2114,11 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug);
+ auto MIB = BuildMI(MF, DL, MCID).addReg(Reg);
if (IsIndirect)
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Variable).addMetadata(Expr);
}
@@ -2134,7 +2137,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
if (IsIndirect)
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Variable).addMetadata(Expr);
}
@@ -2153,7 +2156,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MIB.addMetadata(Variable).addMetadata(Expr);
for (const MachineOperand &MO : MOs)
if (MO.isReg())
- MIB.addReg(MO.getReg(), RegState::Debug);
+ MIB.addReg(MO.getReg());
else
MIB.add(MO);
return MIB;
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 883299c452b7..500cf8e0b79b 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -230,6 +230,9 @@ namespace {
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+ bool isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const;
+
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
@@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
+/// Check if \p MI is trivially remateralizable and if it does not have any
+/// virtual register uses. Even though rematerializable RA might not actually
+/// rematerialize it in this scenario. In that case we do not want to hoist such
+/// instruction out of the loop in a belief RA will sink it back if needed.
+bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const {
+ if (!TII->isTriviallyReMaterializable(MI, AA))
+ return false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ return false;
+ }
+
+ return true;
+}
+
void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
@@ -761,15 +781,11 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// Process the block
SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = MBB->begin(), E = MBB->end(); MII != E; ) {
- MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- MachineInstr *MI = &*MII;
- if (!Hoist(MI, Preheader))
- UpdateRegPressure(MI);
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!Hoist(&MI, Preheader))
+ UpdateRegPressure(&MI);
// If we have hoisted an instruction that may store, it can only be a
// constant store.
- MII = NextMII;
}
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
@@ -1156,9 +1172,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
return false;
}
- // Rematerializable instructions should always be hoisted since the register
- // allocator can just pull them down again when needed.
- if (TII->isTriviallyReMaterializable(MI, AA))
+ // Rematerializable instructions should always be hoisted providing the
+ // register allocator can just pull them down again when needed.
+ if (isTriviallyReMaterializable(MI, AA))
return true;
// FIXME: If there are long latency loop-invariant instructions inside the
@@ -1211,7 +1227,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
- if (!TII->isTriviallyReMaterializable(MI, AA) &&
+ if (!isTriviallyReMaterializable(MI, AA) &&
!MI.isDereferenceableInvariantLoad(AA)) {
LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 8f91a5b698d0..9b96bc5e5e7f 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
@@ -154,7 +155,9 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
// The instruction is loop invariant if all of its operands are.
for (const MachineOperand &MO : I.operands()) {
@@ -174,7 +177,8 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
// However, if the physreg is known to always be caller saved/restored
// then this use is safe to hoist.
if (!MRI->isConstantPhysReg(Reg) &&
- !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())))
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
return false;
// Otherwise it's safe to move.
continue;
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index b8ba0453d24c..4d080e1a4f82 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -250,6 +250,11 @@ void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,
if (RegInfo && WasReg)
RegInfo->removeRegOperandFromUseList(this);
+ // Ensure debug instructions set debug flag on register uses.
+ const MachineInstr *MI = getParent();
+ if (!isDef && MI && MI->isDebugInstr())
+ isDebug = true;
+
// Change this to a register and set the reg#.
assert(!(isDead && !isDef) && "Dead flag on non-def");
assert(!(isKill && isDef) && "Kill flag on def");
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 1d55bd00e033..cfbccebaff3e 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -798,6 +798,7 @@ bool MachineOutliner::outline(Module &M,
Last = std::next(CallInst.getReverse());
Iter != Last; Iter++) {
MachineInstr *MI = &*Iter;
+ SmallSet<Register, 2> InstrUseRegs;
for (MachineOperand &MOP : MI->operands()) {
// Skip over anything that isn't a register.
if (!MOP.isReg())
@@ -806,7 +807,8 @@ bool MachineOutliner::outline(Module &M,
if (MOP.isDef()) {
// Introduce DefRegs set to skip the redundant register.
DefRegs.insert(MOP.getReg());
- if (!MOP.isDead() && UseRegs.count(MOP.getReg()))
+ if (UseRegs.count(MOP.getReg()) &&
+ !InstrUseRegs.count(MOP.getReg()))
// Since the regiester is modeled as defined,
// it is not necessary to be put in use register set.
UseRegs.erase(MOP.getReg());
@@ -814,6 +816,7 @@ bool MachineOutliner::outline(Module &M,
// Any register which is not undefined should
// be put in the use register set.
UseRegs.insert(MOP.getReg());
+ InstrUseRegs.insert(MOP.getReg());
}
}
if (MI->isCandidateForCallSiteEntry())
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index caa3f8049aeb..e18318386def 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -200,8 +200,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
if (!EnableSWP)
return false;
- if (mf.getFunction().getAttributes().hasAttribute(
- AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
+ if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -386,7 +385,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
MachineRegisterInfo &MRI = MF->getRegInfo();
SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
- for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
+ for (MachineInstr &PI : B.phis()) {
MachineOperand &DefOp = PI.getOperand(0);
assert(DefOp.getSubReg() == 0);
auto *RC = MRI.getRegClass(DefOp.getReg());
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 3f6b11e072b4..19bf87d3e290 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -383,9 +383,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
// TODO: This could be more efficient by bulk changing the operands.
- for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) {
if (Register::isPhysicalRegister(ToReg)) {
O.substPhysReg(ToReg, *TRI);
} else {
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 4f42a2c8aeff..47d40f0823c8 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -583,7 +583,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
<< " " << MBB->getName() << "\n From: " << *I
<< " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
+ else dbgs() << "End\n";
dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index ec98394dca79..30745c7a5583 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -131,7 +131,7 @@ namespace {
// will be split.
SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
- SparseBitVector<> RegsToClearKillFlags;
+ DenseSet<Register> RegsToClearKillFlags;
using AllSuccsCache =
std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
@@ -476,14 +476,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;
- for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
+ for (MachineInstr *I : llvm::reverse(Candidates)) {
if (i++ == SinkIntoLoopLimit) {
LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
"be analysed.");
break;
}
- MachineInstr *I = *It;
if (!SinkIntoLoop(L, *I))
break;
EverMadeChange = true;
@@ -683,13 +682,9 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
// There is no need to do this check if all the uses are PHI nodes. PHI
// sources are only defined on the specific predecessor edges.
if (!BreakPHIEdge) {
- for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
- E = ToBB->pred_end(); PI != E; ++PI) {
- if (*PI == FromBB)
- continue;
- if (!DT->dominates(ToBB, *PI))
+ for (MachineBasicBlock *Pred : ToBB->predecessors())
+ if (Pred != FromBB && !DT->dominates(ToBB, Pred))
return false;
- }
}
ToSplit.insert(std::make_pair(FromBB, ToBB));
@@ -1329,7 +1324,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
+ if (!MO.isReg() || MO.isUse())
+ continue;
Register Reg = MO.getReg();
if (Reg == 0 || !Register::isPhysicalRegister(Reg))
continue;
@@ -1439,7 +1435,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// used registers.
for (MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse())
- RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
+ RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
}
return true;
@@ -1718,10 +1714,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
UsedRegUnits.clear();
SeenDbgInstrs.clear();
- for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {
// Track the operand index for use in Copy.
SmallVector<unsigned, 2> UsedOpsInCopy;
// Track the register number defed in Copy.
@@ -1729,14 +1722,14 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// We must sink this DBG_VALUE if its operand is sunk. To avoid searching
// for DBG_VALUEs later, record them when they're encountered.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
bool IsValid = true;
- for (MachineOperand &MO : MI->debug_operands()) {
+ for (MachineOperand &MO : MI.debug_operands()) {
if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
IsValid = false;
break;
@@ -1750,28 +1743,28 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
if (IsValid) {
for (auto RegOps : MIUnits)
- SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second});
+ SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second});
}
continue;
}
- if (MI->isDebugOrPseudoInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
// Do not move any instruction across function call.
- if (MI->isCall())
+ if (MI.isCall())
return false;
- if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
// Don't sink the COPY if it would violate a register dependency.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
@@ -1782,7 +1775,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Don't sink if we cannot find a single sinkable successor in which Reg
// is live-in.
if (!SuccBB) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
@@ -1793,7 +1786,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
- for (auto &MO : MI->operands()) {
+ for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1811,10 +1804,10 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
- clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
+ clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
- updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
+ performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
+ updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
++NumPostRACopySink;
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
index 584d43b42004..28712d1a816b 100644
--- a/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -82,7 +82,7 @@ bool isFunctionColdInCallGraph(
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCount(FunctionCount.getCount()))
+ if (!PSI->isColdCount(FunctionCount->getCount()))
return false;
for (const auto &MBB : *MF)
if (!isColdBlock(&MBB, PSI, &MBFI))
@@ -99,7 +99,7 @@ bool isFunctionHotInCallGraphNthPercentile(
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
if (PSI->isHotCountNthPercentile(PercentileCutoff,
- FunctionCount.getCount()))
+ FunctionCount->getCount()))
return true;
for (const auto &MBB : *MF)
if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
@@ -112,7 +112,7 @@ bool isFunctionColdInCallGraphNthPercentile(
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
if (!PSI->isColdCountNthPercentile(PercentileCutoff,
- FunctionCount.getCount()))
+ FunctionCount->getCount()))
return false;
for (const auto &MBB : *MF)
if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp
index a1cb12f91275..86cf4999d4b0 100644
--- a/llvm/lib/CodeGen/MachineStripDebug.cpp
+++ b/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -50,29 +50,26 @@ struct StripDebugMachineModule : public ModulePass {
continue;
MachineFunction &MF = *MaybeMF;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E;) {
- if (I->isDebugInstr()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.isDebugInstr()) {
// FIXME: We should remove all of them. However, AArch64 emits an
// invalid `DBG_VALUE $lr` with only one operand instead of
// the usual three and has a test that depends on it's
// preservation. Preserve it for now.
- if (I->getNumOperands() > 1) {
- LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I);
- I = MBB.erase(I);
+ if (MI.getNumOperands() > 1) {
+ LLVM_DEBUG(dbgs() << "Removing debug instruction " << MI);
+ MBB.erase(&MI);
Changed |= true;
continue;
}
}
- if (I->getDebugLoc()) {
- LLVM_DEBUG(dbgs() << "Removing location " << *I);
- I->setDebugLoc(DebugLoc());
+ if (MI.getDebugLoc()) {
+ LLVM_DEBUG(dbgs() << "Removing location " << MI);
+ MI.setDebugLoc(DebugLoc());
Changed |= true;
- ++I;
continue;
}
- LLVM_DEBUG(dbgs() << "Keeping " << *I);
- ++I;
+ LLVM_DEBUG(dbgs() << "Keeping " << MI);
}
}
}
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 7e3198af02cd..d6bb3e7c9e58 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -210,6 +210,11 @@ namespace {
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
+ /// Verify that all of \p MI's virtual register operands are scalars.
+ /// \returns True if all virtual register operands are scalar. False
+ /// otherwise.
+ bool verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
@@ -287,6 +292,13 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ // Skip functions that have known verification problems.
+ // FIXME: Remove this mechanism when all problematic passes have been
+ // fixed.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification))
+ return false;
+
unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);
if (FoundErrors)
report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
@@ -849,6 +861,21 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
}
}
+bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (none_of(MI.explicit_operands(), [&MRI](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ const auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return false;
+ return !MRI.getType(Reg).isScalar();
+ }))
+ return true;
+ report("All register operands must have scalar types", &MI);
+ return false;
+}
+
/// Check that types are consistent when two operands need to have the same
/// number of vector elements.
/// \return true if the types are valid.
@@ -1392,7 +1419,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
AttributeList Attrs
= Intrinsic::getAttributes(MF->getFunction().getContext(),
static_cast<Intrinsic::ID>(IntrID));
- bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone);
+ bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone);
if (NoSideEffects && DeclHasSideEffects) {
report("G_INTRINSIC used with intrinsic that accesses memory", MI);
break;
@@ -1570,11 +1597,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
case TargetOpcode::G_VECREDUCE_UMAX:
case TargetOpcode::G_VECREDUCE_UMIN: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
- LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (!DstTy.isScalar())
report("Vector reduction requires a scalar destination type", MI);
- if (!SrcTy.isVector())
- report("Vector reduction requires vector source=", MI);
break;
}
@@ -1598,7 +1622,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
-
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_LROUND: {
+ verifyAllRegOpsScalar(*MI, *MRI);
+ break;
+ }
default:
break;
}
@@ -1632,6 +1660,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("Unspillable Terminator does not define a reg", MI);
Register Def = MI->getOperand(0).getReg();
if (Def.isVirtual() &&
+ !MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs) &&
std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
report("Unspillable Terminator expected to have at most one use!", MI);
}
@@ -1866,6 +1896,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
switch (MO->getType()) {
case MachineOperand::MO_Register: {
+ // Verify debug flag on debug instructions. Check this first because reg0
+ // indicates an undefined debug value.
+ if (MI->isDebugInstr() && MO->isUse()) {
+ if (!MO->isDebug())
+ report("Register operand must be marked debug", MO, MONum);
+ } else if (MO->isDebug()) {
+ report("Register operand must not be marked debug", MO, MONum);
+ }
+
const Register Reg = MO->getReg();
if (!Reg)
return;
@@ -1932,10 +1971,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
}
- if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) {
- report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum);
- return;
- }
} else {
// Virtual register.
const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
@@ -2182,14 +2217,30 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const Register Reg = MO->getReg();
+ const unsigned SubRegIdx = MO->getSubReg();
+
+ const LiveInterval *LI = nullptr;
+ if (LiveInts && Reg.isVirtual()) {
+ if (LiveInts->hasInterval(Reg)) {
+ LI = &LiveInts->getInterval(Reg);
+ if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ report("Live interval for subreg operand has no subranges", MO, MONum);
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
// Both use and def operands can read a register.
if (MO->readsReg()) {
if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
- // Check that LiveVars knows this kill.
- if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) {
+ // Check that LiveVars knows this kill (unless we are inside a bundle, in
+ // which case we have already checked that LiveVars knows any kills on the
+ // bundle header instead).
+ if (LiveVars && Reg.isVirtual() && MO->isKill() &&
+ !MI->isBundledWithPred()) {
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
if (!is_contained(VI.Kills, MI))
report("Kill missing from LiveVariables", MO, MONum);
@@ -2209,42 +2260,36 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
- if (Register::isVirtualRegister(Reg)) {
- if (LiveInts->hasInterval(Reg)) {
- // This is a virtual register interval.
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg);
-
- if (LI.hasSubRanges() && !MO->isDef()) {
- unsigned SubRegIdx = MO->getSubReg();
- LaneBitmask MOMask = SubRegIdx != 0
- ? TRI->getSubRegIndexLaneMask(SubRegIdx)
- : MRI->getMaxLaneMaskForVReg(Reg);
- LaneBitmask LiveInMask;
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((MOMask & SR.LaneMask).none())
- continue;
- checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
- LiveQueryResult LRQ = SR.Query(UseIdx);
- if (LRQ.valueIn())
- LiveInMask |= SR.LaneMask;
- }
- // At least parts of the register has to be live at the use.
- if ((LiveInMask & MOMask).none()) {
- report("No live subrange at use", MO, MONum);
- report_context(LI);
- report_context(UseIdx);
- }
+ if (Reg.isVirtual()) {
+ // This is a virtual register interval.
+ checkLivenessAtUse(MO, MONum, UseIdx, *LI, Reg);
+
+ if (LI->hasSubRanges() && !MO->isDef()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask LiveInMask;
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((MOMask & SR.LaneMask).none())
+ continue;
+ checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
+ LiveQueryResult LRQ = SR.Query(UseIdx);
+ if (LRQ.valueIn())
+ LiveInMask |= SR.LaneMask;
+ }
+ // At least parts of the register has to be live at the use.
+ if ((LiveInMask & MOMask).none()) {
+ report("No live subrange at use", MO, MONum);
+ report_context(*LI);
+ report_context(UseIdx);
}
- } else {
- report("Virtual register has no live interval", MO, MONum);
}
}
}
// Use of a dead register.
if (!regsLive.count(Reg)) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// Reserved registers may be used even when 'dead'.
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
@@ -2266,7 +2311,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!Register::isPhysicalRegister(MOP.getReg()))
+ if (!MOP.getReg().isPhysical())
continue;
if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg))
@@ -2299,7 +2344,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDefined, Reg);
// Verify SSA form.
- if (MRI->isSSA() && Register::isVirtualRegister(Reg) &&
+ if (MRI->isSSA() && Reg.isVirtual() &&
std::next(MRI->def_begin(Reg)) != MRI->def_end())
report("Multiple virtual register defs in SSA form", MO, MONum);
@@ -2308,24 +2353,18 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
- if (Register::isVirtualRegister(Reg)) {
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
-
- if (LI.hasSubRanges()) {
- unsigned SubRegIdx = MO->getSubReg();
- LaneBitmask MOMask = SubRegIdx != 0
- ? TRI->getSubRegIndexLaneMask(SubRegIdx)
- : MRI->getMaxLaneMaskForVReg(Reg);
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((SR.LaneMask & MOMask).none())
- continue;
- checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
- }
+ if (Reg.isVirtual()) {
+ checkLivenessAtDef(MO, MONum, DefIdx, *LI, Reg);
+
+ if (LI->hasSubRanges()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((SR.LaneMask & MOMask).none())
+ continue;
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
}
@@ -2918,9 +2957,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
}
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (S.end.isEarlyClobber()) {
+ // After tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if it is being redefined by an early-clobber def.
+ // TODO: Before tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if the last use is tied to an early-clobber def.
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
"redefined by an EC def in the same instruction", EndMBB);
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index d2ee21c8720f..b0760322064c 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -44,15 +44,15 @@ static SUnit *getPredClusterSU(const SUnit &SU) {
return nullptr;
}
-static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
unsigned Num = 1;
const SUnit *CurrentSU = &SU;
while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
return Num < FuseLimit;
}
-static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
- SUnit &SecondSU) {
+bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
// between them.
for (SDep &SI : FirstSU.Succs)
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index b5517c40a28a..8b3cdfab4d42 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -81,10 +81,7 @@ void ModuloScheduleExpander::expand() {
Register Reg = Op.getReg();
unsigned MaxDiff = 0;
bool PhiIsSwapped = false;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- MachineOperand &UseOp = *UI;
+ for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
MachineInstr *UseMI = UseOp.getParent();
int UseStage = Schedule.getStage(UseMI);
unsigned Diff = 0;
@@ -141,13 +138,11 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Copy any terminator instructions to the new kernel, and update
// names as needed.
- for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
- E = BB->instr_end();
- I != E; ++I) {
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ for (MachineInstr &MI : BB->terminators()) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
- InstrMap[NewMI] = &*I;
+ InstrMap[NewMI] = &MI;
}
NewKernel = KernelBB;
@@ -334,14 +329,10 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
- E = MRI.use_end();
- I != E;) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
- }
if (!LIS.hasInterval(ToReg))
LIS.createEmptyInterval(ToReg);
}
@@ -350,10 +341,8 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
/// specified loop.
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
MachineRegisterInfo &MRI) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
- E = MRI.use_end();
- I != E; ++I)
- if (I->getParent()->getParent() != BB)
+ for (const MachineOperand &MO : MRI.use_operands(Reg))
+ if (MO.getParent()->getParent() != BB)
return true;
return false;
}
@@ -702,11 +691,9 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
MBBVectorTy &EpilogBBs) {
// For each epilog block, check that the value defined by each instruction
// is used. If not, delete it.
- for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
- MBE = EpilogBBs.rend();
- MBB != MBE; ++MBB)
- for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
- ME = (*MBB)->instr_rend();
+ for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
+ for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
+ ME = MBB->instr_rend();
MI != ME;) {
// From DeadMachineInstructionElem. Don't delete inline assembly.
if (MI->isInlineAsm()) {
@@ -721,26 +708,22 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
continue;
}
bool used = true;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
continue;
- Register reg = MOI->getReg();
+ Register reg = MO.getReg();
// Assume physical registers are used, unless they are marked dead.
if (Register::isPhysicalRegister(reg)) {
- used = !MOI->isDead();
+ used = !MO.isDead();
if (used)
break;
continue;
}
unsigned realUses = 0;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
+ for (const MachineOperand &U : MRI.use_operands(reg)) {
// Check if there are any uses that occur only in the original
// loop. If so, that's not a real use.
- if (UI->getParent()->getParent() != BB) {
+ if (U.getParent()->getParent() != BB) {
realUses++;
used = true;
break;
@@ -759,15 +742,11 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
}
// In the kernel block, check if we can remove a Phi that generates a value
// used in an instruction removed in the epilog block.
- for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
- BBE = KernelBB->getFirstNonPHI();
- BBI != BBE;) {
- MachineInstr *MI = &*BBI;
- ++BBI;
- Register reg = MI->getOperand(0).getReg();
+ for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
+ Register reg = MI.getOperand(0).getReg();
if (MRI.use_begin(reg) == MRI.use_end()) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
}
}
}
@@ -1145,12 +1124,9 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
int StagePhi = Schedule.getStage(Phi) + PhiNum;
// Rewrite uses that have been scheduled already to use the new
// Phi register.
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
- EI = MRI.use_end();
- UI != EI;) {
- MachineOperand &UseOp = *UI;
+ for (MachineOperand &UseOp :
+ llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
MachineInstr *UseMI = UseOp.getParent();
- ++UI;
if (UseMI->getParent() != BB)
continue;
if (UseMI->isPHI()) {
@@ -1223,8 +1199,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
bool Changed = true;
while (Changed) {
Changed = false;
- for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
assert(MI.isPHI());
if (MRI.use_empty(MI.getOperand(0).getReg())) {
if (LIS)
@@ -1624,32 +1599,32 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
auto InsertPt = DestBB->getFirstNonPHI();
DenseMap<Register, Register> Remaps;
- for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
- MachineInstr *MI = &*I++;
- if (MI->isPHI()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(
+ llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
+ if (MI.isPHI()) {
// This is an illegal PHI. If we move any instructions using an illegal
// PHI, we need to create a legal Phi.
- if (getStage(MI) != Stage) {
+ if (getStage(&MI) != Stage) {
// The legal Phi is not necessary if the illegal phi's stage
// is being moved.
- Register PhiR = MI->getOperand(0).getReg();
+ Register PhiR = MI.getOperand(0).getReg();
auto RC = MRI.getRegClass(PhiR);
Register NR = MRI.createVirtualRegister(RC);
MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
DebugLoc(), TII->get(TargetOpcode::PHI), NR)
.addReg(PhiR)
.addMBB(SourceBB);
- BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
- CanonicalMIs[NI] = CanonicalMIs[MI];
+ BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[&MI];
Remaps[PhiR] = NR;
}
}
- if (getStage(MI) != Stage)
+ if (getStage(&MI) != Stage)
continue;
- MI->removeFromParent();
- DestBB->insert(InsertPt, MI);
- auto *KernelMI = CanonicalMIs[MI];
- BlockMIs[{DestBB, KernelMI}] = MI;
+ MI.removeFromParent();
+ DestBB->insert(InsertPt, &MI);
+ auto *KernelMI = CanonicalMIs[&MI];
+ BlockMIs[{DestBB, KernelMI}] = &MI;
BlockMIs.erase({SourceBB, KernelMI});
}
SmallVector<MachineInstr *, 4> PhiToDelete;
@@ -1768,8 +1743,8 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Keep track at which iteration each phi belongs to. We need it to know
// what version of the variable to use during prologue/epilogue stitching.
EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
- for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
- PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
+ for (MachineInstr &Phi : B->phis())
+ PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
}
for (size_t I = 0; I < Epilogs.size(); I++) {
LS.reset();
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 54805584dbc1..77a6c37e1362 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -107,6 +107,7 @@ namespace {
using BBVRegPair = std::pair<unsigned, Register>;
using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
+ // Count the number of non-undef PHI uses of each register in each BB.
VRegPHIUse VRegPHIUseCount;
// Defs of PHI sources which are implicit_def.
@@ -426,9 +427,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
// Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
- --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
- MPhi->getOperand(i).getReg())];
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ if (!MPhi->getOperand(i).isUndef()) {
+ --VRegPHIUseCount[BBVRegPair(
+ MPhi->getOperand(i + 1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+ }
+ }
// Now loop over all of the incoming arguments, changing them to copy into the
// IncomingReg register in the corresponding predecessor basic block.
@@ -461,6 +466,15 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
assert(MRI->use_empty(SrcReg) &&
"Expected a single use from UnspillableTerminator");
SrcRegDef->getOperand(0).setReg(IncomingReg);
+
+ // Update LiveVariables.
+ if (LV) {
+ LiveVariables::VarInfo &SrcVI = LV->getVarInfo(SrcReg);
+ LiveVariables::VarInfo &IncomingVI = LV->getVarInfo(IncomingReg);
+ IncomingVI.AliveBlocks = std::move(SrcVI.AliveBlocks);
+ SrcVI.AliveBlocks.clear();
+ }
+
continue;
}
@@ -515,9 +529,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// case, we should mark the last such terminator as being the killing
// block, not the copy.
MachineBasicBlock::iterator KillInst = opBlock.end();
- MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
- for (MachineBasicBlock::iterator Term = FirstTerm;
- Term != opBlock.end(); ++Term) {
+ for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end();
+ ++Term) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
}
@@ -527,7 +540,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't insert a copy this time.
- KillInst = FirstTerm;
+ KillInst = InsertPos;
while (KillInst != opBlock.begin()) {
--KillInst;
if (KillInst->isDebugInstr())
@@ -574,9 +587,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (!isLiveOut) {
MachineBasicBlock::iterator KillInst = opBlock.end();
- MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
- for (MachineBasicBlock::iterator Term = FirstTerm;
- Term != opBlock.end(); ++Term) {
+ for (MachineBasicBlock::iterator Term = InsertPos;
+ Term != opBlock.end(); ++Term) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
}
@@ -586,7 +598,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't just insert a copy.
- KillInst = FirstTerm;
+ KillInst = InsertPos;
while (KillInst != opBlock.begin()) {
--KillInst;
if (KillInst->isDebugInstr())
@@ -623,14 +635,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
- for (const auto &MBB : MF)
+ for (const auto &MBB : MF) {
for (const auto &BBI : MBB) {
if (!BBI.isPHI())
break;
- for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
- ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(),
- BBI.getOperand(i).getReg())];
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
+ if (!BBI.getOperand(i).isUndef()) {
+ ++VRegPHIUseCount[BBVRegPair(
+ BBI.getOperand(i + 1).getMBB()->getNumber(),
+ BBI.getOperand(i).getReg())];
+ }
+ }
}
+ }
}
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 49bdba518322..f9b16d2630d6 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -626,7 +626,7 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
Register SrcReg, SrcReg2;
- int CmpMask, CmpValue;
+ int64_t CmpMask, CmpValue;
if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
SrcReg.isPhysical() || SrcReg2.isPhysical())
return false;
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 80c38f3ec341..e3eb3f825851 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -36,9 +37,8 @@ static bool lowerLoadRelative(Function &F) {
Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int8Ty = Type::getInt8Ty(F.getContext());
- for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto CI = dyn_cast<CallInst>(U.getUser());
if (!CI || CI->getCalledOperand() != &F)
continue;
@@ -90,10 +90,22 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
- for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto *CI = cast<CallInst>(I->getUser());
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto *CB = cast<CallBase>(U.getUser());
+
+ if (CB->getCalledFunction() != &F) {
+ objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
+ (void)Kind;
+ assert((Kind == objcarc::ARCInstKind::RetainRV ||
+ Kind == objcarc::ARCInstKind::ClaimRV) &&
+ "use expected to be the argument of operand bundle "
+ "\"clang.arc.attachedcall\"");
+ U.set(FCache.getCallee());
+ continue;
+ }
+
+ auto *CI = cast<CallInst>(CB);
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
- ++I;
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(CI->args());
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 2f65a450fb02..9a4f70a6070f 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -285,7 +285,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
(void)Failed;
}
if (StackSize > Threshold) {
- DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold);
+ DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
F.getContext().diagnose(DiagStackSize);
}
ORE->emit([&]() {
@@ -395,12 +395,28 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
+ BitVector CSMask(SavedRegs.size());
+
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CSMask.set(CSRegs[i]);
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (SavedRegs.test(Reg))
- CSI.push_back(CalleeSavedInfo(Reg));
+ if (SavedRegs.test(Reg)) {
+ bool SavedSuper = false;
+ for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) {
+ // Some backends set all aliases for some registers as saved, such as
+ // Mips's $fp, so they appear in SavedRegs but not CSRegs.
+ if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) {
+ SavedSuper = true;
+ break;
+ }
+ }
+
+ if (!SavedSuper)
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
}
const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
@@ -1237,7 +1253,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
StackOffset Offset =
TFI->getFrameIndexReference(MF, FrameIdx, Reg);
Op.ChangeToRegister(Reg, false /*isDef*/);
- Op.setIsDebug();
const DIExpression *DIExpr = MI.getDebugExpression();
diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index a9fb577d5735..5f69f9194125 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -44,7 +44,14 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+ bool doInitialization(Module &M) override {
+ ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName);
+ return false;
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!ShouldRun)
+ return false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -129,6 +136,8 @@ private:
Name = SP->getName();
return Function::getGUID(Name);
}
+
+ bool ShouldRun = false;
};
} // namespace
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index d92c6a997f31..d704cf7b3213 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -171,7 +171,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
SmallSet<NodeId,32> Defs;
- // Remove all non-phi defs that are not aliased to RefRR, and segregate
+ // Remove all non-phi defs that are not aliased to RefRR, and separate
// the the remaining defs into buckets for containing blocks.
std::map<NodeId, NodeAddr<InstrNode*>> Owners;
std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index c850571da2ed..1264e6021b6e 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -30,16 +30,32 @@ static bool isValidRegUse(const MachineOperand &MO) {
return isValidReg(MO) && MO.isUse();
}
-static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) {
- return isValidRegUse(MO) && MO.getReg() == PhysReg;
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegUse(MO))
+ return false;
+ if (MO.getReg() == PhysReg)
+ return true;
+ for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
+ if (MO.getReg() == *R)
+ return true;
+ return false;
}
static bool isValidRegDef(const MachineOperand &MO) {
return isValidReg(MO) && MO.isDef();
}
-static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) {
- return isValidRegDef(MO) && MO.getReg() == PhysReg;
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegDef(MO))
+ return false;
+ if (MO.getReg() == PhysReg)
+ return true;
+ for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
+ if (MO.getReg() == *R)
+ return true;
+ return false;
}
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
@@ -337,7 +353,7 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
return;
for (auto &MO : MI->operands()) {
- if (!isValidRegUseOf(MO, PhysReg))
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
continue;
Uses.insert(&*MI);
@@ -353,7 +369,7 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
for (MachineInstr &MI :
instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
for (auto &MO : MI.operands()) {
- if (!isValidRegUseOf(MO, PhysReg))
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
continue;
if (getReachingDef(&MI, PhysReg) >= 0)
return false;
@@ -381,8 +397,7 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());
SmallPtrSet<MachineBasicBlock*, 4>Visited;
while (!ToVisit.empty()) {
- MachineBasicBlock *MBB = ToVisit.back();
- ToVisit.pop_back();
+ MachineBasicBlock *MBB = ToVisit.pop_back_val();
if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
continue;
if (getLiveInUses(MBB, PhysReg, Uses))
@@ -419,7 +434,7 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
VisitedBBs.insert(MBB);
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return;
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
@@ -469,7 +484,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
LiveRegs.addLiveOuts(*MBB);
// Yes if the register is live out of the basic block.
- if (LiveRegs.contains(PhysReg))
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return true;
// Walk backwards through the block to see if the register is live at some
@@ -477,7 +492,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
for (MachineInstr &Last :
instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
LiveRegs.stepBackward(Last);
- if (LiveRegs.contains(PhysReg))
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
@@ -502,7 +517,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return false;
auto Last = MBB->getLastNonDebugInstr();
@@ -512,7 +527,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
// Finally check that the last instruction doesn't redefine the register.
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return false;
return true;
@@ -523,7 +538,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
MCRegister PhysReg) const {
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return nullptr;
auto Last = MBB->getLastNonDebugInstr();
@@ -532,7 +547,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
int Def = getReachingDef(&*Last, PhysReg);
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return &*Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
@@ -700,7 +715,7 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
if (Ignore.count(&*I))
continue;
for (auto &MO : I->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return false;
}
}
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index b65d58077958..a9816b13e798 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -217,9 +217,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// Collect interferences assigned to any alias of the physical register.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- Q.collectInterferingVRegs();
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ for (auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
new file mode 100644
index 000000000000..85fd3207888b
--- /dev/null
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -0,0 +1,90 @@
+//===- RegAllocEvictionAdvisor.h - Interference resolution ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+
+#include "AllocationOrder.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+using SmallVirtRegSet = SmallSet<Register, 16>;
+
+// Live ranges pass through a number of stages as we try to allocate them.
+// Some of the stages may also create new live ranges:
+//
+// - Region splitting.
+// - Per-block splitting.
+// - Local splitting.
+// - Spilling.
+//
+// Ranges produced by one of the stages skip the previous stages when they are
+// dequeued. This improves performance because we can skip interference checks
+// that are unlikely to give any results. It also guarantees that the live
+// range splitting algorithm terminates, something that is otherwise hard to
+// ensure.
+enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+};
+
+/// Cost of evicting interference - used by default advisor, and the eviction
+/// chain heuristic in RegAllocGreedy.
+// FIXME: this can be probably made an implementation detail of the default
+// advisor, if the eviction chain logic can be refactored.
+struct EvictionCost {
+ unsigned BrokenHints = 0; ///< Total number of broken hints.
+ float MaxWeight = 0; ///< Maximum spill weight evicted.
+
+ EvictionCost() = default;
+
+ bool isMax() const { return BrokenHints == ~0u; }
+
+ void setMax() { BrokenHints = ~0u; }
+
+ void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
+ bool operator<(const EvictionCost &O) const {
+ return std::tie(BrokenHints, MaxWeight) <
+ std::tie(O.BrokenHints, O.MaxWeight);
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 707161d5a8b0..68920e2e50df 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
@@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
// every definition of it, meaning we can switch all the DBG_VALUEs over
// to just reference the stack slot.
SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg];
- SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>>
+ SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2>
SpilledOperandsMap;
for (MachineOperand *MO : LRIDbgOperands)
SpilledOperandsMap[MO->getParent()].push_back(MO);
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4eb12aa30ee9..5a93b58e0baf 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -15,6 +15,7 @@
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
+#include "RegAllocEvictionAdvisor.h"
#include "SpillPlacement.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -57,6 +58,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -69,7 +71,6 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -148,7 +149,6 @@ class RAGreedy : public MachineFunctionPass,
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
- using SmallVirtRegSet = SmallSet<Register, 16>;
// context
MachineFunction *MF;
@@ -175,47 +175,6 @@ class RAGreedy : public MachineFunctionPass,
unsigned NextCascade;
std::unique_ptr<VirtRegAuxInfo> VRAI;
- // Live ranges pass through a number of stages as we try to allocate them.
- // Some of the stages may also create new live ranges:
- //
- // - Region splitting.
- // - Per-block splitting.
- // - Local splitting.
- // - Spilling.
- //
- // Ranges produced by one of the stages skip the previous stages when they are
- // dequeued. This improves performance because we can skip interference checks
- // that are unlikely to give any results. It also guarantees that the live
- // range splitting algorithm terminates, something that is otherwise hard to
- // ensure.
- enum LiveRangeStage {
- /// Newly created live range that has never been queued.
- RS_New,
-
- /// Only attempt assignment and eviction. Then requeue as RS_Split.
- RS_Assign,
-
- /// Attempt live range splitting if assignment is impossible.
- RS_Split,
-
- /// Attempt more aggressive live range splitting that is guaranteed to make
- /// progress. This is used for split products that may not be making
- /// progress.
- RS_Split2,
-
- /// Live range will be spilled. No more splitting will be attempted.
- RS_Spill,
-
-
- /// Live range is in memory. Because of other evictions, it might get moved
- /// in a register in the end.
- RS_Memory,
-
- /// There is nothing more we can do to this live range. Abort compilation
- /// if it can't be assigned.
- RS_Done
- };
-
// Enum CutOffStage to keep a track whether the register allocation failed
// because of the cutoffs encountered in last chance recoloring.
// Note: This is used as bitmask. New value should be next power of 2.
@@ -267,25 +226,6 @@ class RAGreedy : public MachineFunctionPass,
}
}
- /// Cost of evicting interference.
- struct EvictionCost {
- unsigned BrokenHints = 0; ///< Total number of broken hints.
- float MaxWeight = 0; ///< Maximum spill weight evicted.
-
- EvictionCost() = default;
-
- bool isMax() const { return BrokenHints == ~0u; }
-
- void setMax() { BrokenHints = ~0u; }
-
- void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
-
- bool operator<(const EvictionCost &O) const {
- return std::tie(BrokenHints, MaxWeight) <
- std::tie(O.BrokenHints, O.MaxWeight);
- }
- };
-
/// EvictionTrack - Keeps track of past evictions in order to optimize region
/// split decision.
class EvictionTrack {
@@ -488,6 +428,8 @@ private:
MCRegister tryAssign(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&,
const SmallVirtRegSet&);
+ MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
+ uint8_t, const SmallVirtRegSet &) const;
MCRegister tryEvict(LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, uint8_t,
const SmallVirtRegSet &);
@@ -760,10 +702,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
- bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
bool ForceGlobal = !ReverseLocal &&
- (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
+ (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
@@ -785,8 +726,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// interference. Mark a bit to prioritize global above local ranges.
Prio = (1u << 29) + Size;
- if (AddPriorityToGlobal)
- Prio |= RC.AllocationPriority << 24;
+ Prio |= RC.AllocationPriority << 24;
}
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -860,7 +800,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
return PhysReg;
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
- << Cost << '\n');
+ << (unsigned)Cost << '\n');
MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -957,11 +897,12 @@ bool RAGreedy::canEvictInterference(
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
- if (Q.collectInterferingVRegs(10) >= 10)
+ const auto &Interferences = Q.interferingVRegs(10);
+ if (Interferences.size() >= 10)
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ for (LiveInterval *Intf : reverse(Interferences)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
@@ -1039,7 +980,6 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- Q.collectInterferingVRegs();
// Check if any interfering live range is heavier than MaxWeight.
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
@@ -1129,7 +1069,6 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
// against it.
- Q.collectInterferingVRegs();
ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
@@ -1162,17 +1101,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-/// tryEvict - Try to evict all interferences for a physreg.
-/// @param VirtReg Currently unassigned virtual register.
-/// @param Order Physregs to try.
-/// @return Physreg to assign VirtReg, or 0.
-MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs,
- uint8_t CostPerUseLimit,
- const SmallVirtRegSet &FixedRegisters) {
- NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
- TimePassesIsEnabled);
-
+MCRegister RAGreedy::tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
BestCost.setMax();
@@ -1230,7 +1161,22 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
if (I.isHint())
break;
}
+ return BestPhys;
+}
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) {
+ NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+ TimePassesIsEnabled);
+
+ MCRegister BestPhys =
+ tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters);
if (BestPhys.isValid())
evictInterference(VirtReg, BestPhys, NewVRegs);
return BestPhys;
@@ -2135,7 +2081,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
// the allocation.
- for (const auto &Use : Uses) {
+ for (const SlotIndex Use : Uses) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
@@ -2462,12 +2408,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
if (NewGaps >= NumGaps) {
- LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
if (IntvMap[I] == 1) {
setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
- LLVM_DEBUG(dbgs() << printReg(LREdit.get(I)));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2506,17 +2452,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SA->analyze(&VirtReg);
- // FIXME: SplitAnalysis may repair broken live ranges coming from the
- // coalescer. That may cause the range to become allocatable which means that
- // tryRegionSplit won't be making progress. This check should be replaced with
- // an assertion when the coalescer is fixed.
- if (SA->didRepairRange()) {
- // VirtReg has changed, so all cached queries are invalid.
- Matrix->invalidateVirtRegs();
- if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
- return PhysReg;
- }
-
// First try to split around a region spanning multiple blocks. RS_Split2
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
@@ -2560,8 +2495,9 @@ bool RAGreedy::mayRecolorAllInterferences(
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is LastChanceRecoloringMaxInterference or more interferences,
// chances are one would not be recolorable.
- if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
- LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
+ if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
+ LastChanceRecoloringMaxInterference &&
+ !ExhaustiveSearch) {
LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
CutOffInfo |= CO_Interf;
return false;
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 751f79e66b73..c847068bca90 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -932,12 +932,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// = B
// Update uses of IntA of the specific Val# with IntB.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()),
- UE = MRI->use_end();
- UI != UE;
- /* ++UI is below because of possible MI removal */) {
- MachineOperand &UseMO = *UI;
- ++UI;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
if (UseMO.isUndef())
continue;
MachineInstr *UseMI = UseMO.getParent();
@@ -1573,9 +1569,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
// to describe DstReg instead.
if (MRI->use_nodbg_empty(SrcReg)) {
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
- UI != MRI->use_end();) {
- MachineOperand &UseMO = *UI++;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugInstr()) {
if (Register::isPhysicalRegister(DstReg))
@@ -3708,7 +3703,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
// vreg => DbgValueLoc map.
auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
for (auto *X : ToInsert) {
- for (auto Op : X->debug_operands()) {
+ for (const auto &Op : X->debug_operands()) {
if (Op.isReg() && Op.getReg().isVirtual())
DbgVRegToValues[Op.getReg()].push_back({Slot, X});
}
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index e35cf7aa6958..c0a07ec4c91d 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -495,21 +495,20 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
// Spill the scavenged register before \p Before.
int FI = Scavenged[SI].FrameIndex;
if (FI < FIB || FI >= FIE) {
- std::string Msg = std::string("Error while trying to spill ") +
- TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) +
- ": Cannot scavenge register without an emergency spill slot!";
- report_fatal_error(Msg.c_str());
+ report_fatal_error(Twine("Error while trying to spill ") +
+ TRI->getName(Reg) + " from class " +
+ TRI->getRegClassName(&RC) +
+ ": Cannot scavenge register without an emergency "
+ "spill slot!");
}
- TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex,
- &RC, TRI);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI);
MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex,
- &RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI);
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 1619381967c4..0ff045fa787e 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -70,7 +70,7 @@ static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
// Replace the call to the vector intrinsic with a call
// to the corresponding function from the vector library.
IRBuilder<> IRBuilder(&CI);
- SmallVector<Value *> Args(CI.arg_operands());
+ SmallVector<Value *> Args(CI.args());
// Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
@@ -106,7 +106,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// all vector operands have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
SmallVector<Type *> ScalarTypes;
- for (auto Arg : enumerate(CI.arg_operands())) {
+ for (auto Arg : enumerate(CI.args())) {
auto *ArgType = Arg.value()->getType();
// Vector calls to intrinsics can still have
// scalar operands for specific arguments.
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 94add920f284..50d9d64bfcfd 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -147,7 +147,7 @@ class SafeStack {
///
/// 16 seems like a reasonable upper bound on the alignment of objects that we
/// might expect to appear on the stack on most common targets.
- enum { StackAlignment = 16 };
+ static constexpr uint64_t StackAlignment = 16;
/// Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
@@ -221,6 +221,8 @@ public:
bool run();
};
+constexpr uint64_t SafeStack::StackAlignment;
+
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
@@ -519,7 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
StackLayout SSL(StackAlignment);
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
- unsigned Align =
+ uint64_t Align =
std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
@@ -532,8 +534,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty),
- Arg->getParamAlignment());
+ uint64_t Align =
+ std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment());
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -544,21 +546,20 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align =
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment());
+ uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment());
SSL.addObject(AI, Size, Align,
ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
}
SSL.computeLayout();
- unsigned FrameAlignment = SSL.getFrameAlignment();
+ uint64_t FrameAlignment = SSL.getFrameAlignment();
// FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
// (AlignmentSkew).
if (FrameAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
- assert(isPowerOf2_32(FrameAlignment));
+ assert(isPowerOf2_64(FrameAlignment));
IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
@@ -676,9 +677,9 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
- unsigned Align = std::max(
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
- (unsigned)StackAlignment);
+ uint64_t Align =
+ std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
+ StackAlignment);
assert(isPowerOf2_32(Align));
Value *NewTop = IRB.CreateIntToPtr(
@@ -701,9 +702,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (!DynamicAllocas.empty()) {
// Now go through the instructions again, replacing stacksave/stackrestore.
- for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) {
- Instruction *I = &*(It++);
- auto II = dyn_cast<IntrinsicInst>(I);
+ for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
continue;
diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp
index 5d61b3a146b4..7cdda7743c16 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
}
}
-void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
+void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
const StackLifetime::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
ObjectAlignments[V] = Alignment;
@@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
}
static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
- unsigned Alignment) {
+ uint64_t Alignment) {
return alignTo(Offset + Size, Alignment) - Size;
}
diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h
index f0db1b42aa00..b72450e57080 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/llvm/lib/CodeGen/SafeStackLayout.h
@@ -22,7 +22,7 @@ namespace safestack {
/// Compute the layout of an unsafe stack frame.
class StackLayout {
- unsigned MaxAlignment;
+ uint64_t MaxAlignment;
struct StackRegion {
unsigned Start;
@@ -39,23 +39,24 @@ class StackLayout {
struct StackObject {
const Value *Handle;
- unsigned Size, Alignment;
+ unsigned Size;
+ uint64_t Alignment;
StackLifetime::LiveRange Range;
};
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
- DenseMap<const Value *, unsigned> ObjectAlignments;
+ DenseMap<const Value *, uint64_t> ObjectAlignments;
void layoutObject(StackObject &Obj);
public:
- StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+ StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {}
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
- void addObject(const Value *V, unsigned Size, unsigned Alignment,
+ void addObject(const Value *V, unsigned Size, uint64_t Alignment,
const StackLifetime::LiveRange &Range);
/// Run the layout computation for all previously added objects.
@@ -65,13 +66,13 @@ public:
unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
/// Returns the alignment of the object
- unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+ uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
/// Returns the size of the entire frame.
unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
/// Returns the alignment of the frame.
- unsigned getFrameAlignment() { return MaxAlignment; }
+ uint64_t getFrameAlignment() { return MaxAlignment; }
void print(raw_ostream &OS);
};
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index 60f8eec1b9bc..ef3afab2b730 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -577,8 +577,7 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
SU = WorkList.back();
WorkList.pop_back();
Visited.set(SU->NodeNum);
- for (const SDep &SuccDep
- : make_range(SU->Succs.rbegin(), SU->Succs.rend())) {
+ for (const SDep &SuccDep : llvm::reverse(SU->Succs)) {
unsigned s = SuccDep.getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index daff3af3bc3c..3f013eb6024e 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -271,15 +271,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
} else {
Dep.setLatency(0);
- // FIXME: We could always let target to adjustSchedDependency(), and
- // remove this condition, but that currently asserts in Hexagon BE.
- if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
}
-
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
UseSU->addPred(Dep);
}
}
@@ -1117,7 +1112,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
LiveRegs.addLiveOuts(MBB);
// Examine block from end to start...
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
if (MI.isDebugOrPseudoInstr())
continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b104e995019f..ce400ea43f29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
static cl::opt<bool> EnableReduceLoadOpStoreWidth(
"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable reducing the width of load/op/store "
+ cl::desc("DAG combiner enable reducing the width of load/op/store "
"sequence"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
namespace {
@@ -319,7 +319,7 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ APInt DemandedBits = APInt::getAllOnes(BitWidth);
return SimplifyDemandedBits(Op, DemandedBits);
}
@@ -345,7 +345,7 @@ namespace {
return false;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
@@ -436,7 +436,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
- SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -515,6 +515,7 @@ namespace {
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
+ SDValue visitVPOp(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
@@ -615,7 +616,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(SDNode *Op0, SDNode *Op1) const;
+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
+ return DAG.getNode(Opc, DL, VT, N00, OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ return SDValue();
}
}
return SDValue();
@@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
+#include "llvm/IR/VPIntrinsics.def"
+ return visitVPOp(N);
}
return SDValue();
}
@@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
@@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
N0.getOperand(0));
// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
- if (N0.getOpcode() == ISD::ADD ||
- N0.getOpcode() == ISD::UADDO ||
- N0.getOpcode() == ISD::SADDO) {
+ if (N0.getOpcode() == ISD::ADD) {
SDValue A, Xor;
if (isBitwiseNot(N0.getOperand(0))) {
@@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- IsFlip = Const->isAllOnesValue();
+ IsFlip = Const->isAllOnes();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
@@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
@@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- SDValue Result;
if (N1->getOpcode() == ISD::ABS &&
- !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
- TLI.expandABS(N1.getNode(), Result, DAG, true))
- return Result;
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
+ return Result;
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
@@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1.isNullValue())
+ if (N1IsConst && ConstValue1.isZero())
return N1;
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue())
+ if (N1IsConst && ConstValue1.isOne())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ if (N1IsConst && ConstValue1.isAllOnes()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
@@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SmallBitVector ClearMask;
ClearMask.reserve(NumElts);
auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
- if (!V || V->isNullValue()) {
+ if (!V || V->isZero()) {
ClearMask.push_back(true);
return true;
}
@@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Op0.getNode()->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isNullValue())
+ if (N0C && N0C->isZero())
return N0;
// X / X -> 1
@@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
return C;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
@@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
- if (C->isNullValue() || C->isOpaque())
+ if (C->isZero() || C->isOpaque())
return false;
if (C->getAPIntValue().isPowerOf2())
return true;
- if ((-C->getAPIntValue()).isPowerOf2())
+ if (C->getAPIntValue().isNegatedPowerOf2())
return true;
return false;
};
@@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
return C;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
- if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return C;
// fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (!isSigned && N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(0, DL, VT), N0);
@@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
}
}
+ // Simplify the operands using demanded-bits information.
+ // We don't have demanded bits support for MULHU so this just enables constant
+ // folding based on known bits.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold operation with constant operands.
- if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
- return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
// Simplify the operands using demanded-bits information.
@@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
return V;
+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
@@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Setcc, DL, VT);
}
+/// For targets that support usubsat, match a bit-hack form of that operation
+/// that ends in 'and' and convert it.
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // Canonicalize SRA as operand 1.
+ if (N0.getOpcode() == ISD::SRA)
+ std::swap(N0, N1);
+
+ // xor/add with SMIN (signmask) are logically equivalent.
+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||
+ !SraC || SraC->getAPIntValue() != BitWidth - 1)
+ return SDValue();
+
+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
+ SDLoc DL(N);
+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
@@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(BitWidth)))
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
- APInt Constant = APInt::getNullValue(1);
+ APInt Constant = APInt::getZero(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
@@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnesValue(EltBitWidth);
+ Constant = APInt::getAllOnes(EltBitWidth);
for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
@@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
case ISD::NON_EXTLOAD: B = true; break;
}
- if (B && Constant.isAllOnesValue()) {
+ if (B && Constant.isAllOnes()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
@@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+ if (hasOperation(ISD::USUBSAT, VT))
+ if (SDValue V = foldAndToUsubsat(N, DAG))
+ return V;
+
return SDValue();
}
@@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
@@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
- // Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
// The target must have at least one rotate/funnel flavor.
+ // We still try to match rotate by constant pre-legalization.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
- if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
@@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
- if (IsRotate && (HasROTL || HasROTR))
- Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt);
- else
- Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return Res;
}
+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
+ // shift.
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
@@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations)
+ if (LegalOperations || OptLevel == CodeGenOpt::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// | D |
// Into:
// (x & m) | (y & ~m)
-// If y is a constant, and the 'andn' does not work with immediates,
-// we unfold into a different pattern:
+// If y is a constant, m is not a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
// ~(~x & m) & (m | y)
+// If x is a constant, m is a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// (x | ~m) & ~(~m & ~y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
// the very least that breaks andnpd / andnps patterns, and because those
// patterns are simplified in IR and shouldn't be created in the DAG
@@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDLoc DL(N);
- // If Y is a constant, check that 'andn' works with immediates.
- if (!TLI.hasAndNot(Y)) {
+ // If Y is a constant, check that 'andn' works with immediates. Unless M is
+ // a bitwise not that would already allow ANDN to be used.
+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
// If not, we need to do a bit more work to make sure andn is still used.
SDValue NotX = DAG.getNOT(DL, X, VT);
@@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
}
+ // If X is a constant and M is a bitwise not, check that 'andn' works with
+ // immediates.
+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotM = M.getOperand(0);
+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
+ SDValue NotY = DAG.getNOT(DL, Y, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
+ }
+
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
SDValue NotM = DAG.getNOT(DL, M, VT);
SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
@@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
@@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
- SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// shift has been simplified to undef.
uint64_t ShiftAmt = ShiftC->getLimitedValue();
if (ShiftAmt < BitWidth) {
- APInt Ones = APInt::getAllOnesValue(BitWidth);
+ APInt Ones = APInt::getAllOnes(BitWidth);
Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
if (XorC->getAPIntValue() == Ones) {
// If the xor constant is a shifted -1, do a 'not' before the shift:
@@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
@@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSel;
// if (shl x, c) is known to be zero, return 0
- if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
@@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// Both operands must be equivalent extend nodes.
SDValue LeftOp = ShiftOperand.getOperand(0);
SDValue RightOp = ShiftOperand.getOperand(1);
+
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
- if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ if (!IsSignExt && !IsZeroExt)
return SDValue();
- EVT WideVT1 = LeftOp.getValueType();
- EVT WideVT2 = RightOp.getValueType();
- (void)WideVT2;
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+
+ SDValue MulhRightOp;
+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
+ unsigned ActiveBits = IsSignExt
+ ? Constant->getAPIntValue().getMinSignedBits()
+ : Constant->getAPIntValue().getActiveBits();
+ if (ActiveBits > NarrowVTSize)
+ return SDValue();
+ MulhRightOp = DAG.getConstant(
+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
+ NarrowVT);
+ } else {
+ if (LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+ MulhRightOp = RightOp.getOperand(0);
+ }
+
+ EVT WideVT = LeftOp.getValueType();
// Proceed with the transformation if the wide types match.
- assert((WideVT1 == WideVT2) &&
+ assert((WideVT == RightOp.getValueType()) &&
"Cannot have a multiply node with two different operand types.");
- EVT NarrowVT = LeftOp.getOperand(0).getValueType();
- // Check that the two extend nodes are the same type.
- if (NarrowVT != RightOp.getOperand(0).getValueType())
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
- unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
- if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
return SDValue();
// Check the shift amount with the narrow type size.
@@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
return SDValue();
- SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
- RightOp.getOperand(0));
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
- : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+ SDValue Result =
+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSel;
// if (srl x, c) is known to be zero, return 0
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (N1C &&
+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
@@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isNullValue() && C2->isOne()) {
+ if (C1->isZero() && C2->isOne()) {
// select Cond, 0, 1 --> zext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isNullValue() && C2->isAllOnesValue()) {
+ if (C1->isZero() && C2->isAllOnes()) {
// select Cond, 0, -1 --> sext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isOne() && C2->isNullValue()) {
+ if (C1->isOne() && C2->isZero()) {
// select Cond, 1, 0 --> zext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return Cond;
}
- if (C1->isAllOnesValue() && C2->isNullValue()) {
+ if (C1->isAllOnes() && C2->isZero()) {
// select Cond, -1, 0 --> sext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
}
// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
@@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
TargetLowering::ZeroOrOneBooleanContent &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
- C1->isNullValue() && C2->isOne()) {
+ C1->isZero() && C2->isOne()) {
SDValue NotCond =
DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
@@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Cond0 = N0.getOperand(0);
+ SDValue Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (VT != Cond0.getValueType())
+ return SDValue();
+
+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
+ // compare is inverted from that pattern ("Cond0 s> -1").
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
+ ; // This is the pattern we are looking for.
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
+ std::swap(N1, N2);
+ else
+ return SDValue();
+
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ if (isNullOrNullSplat(N2)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ }
+
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ if (isAllOnesOrAllOnesSplat(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ }
+
+ // If we have to invert the sign bit mask, only do that transform if the
+ // target has a bitwise 'and not' instruction (the invert is free).
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ SDValue Not = DAG.getNOT(DL, Sra, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ }
+
+ // TODO: There's another pattern in this family, but it may require
+ // implementing hasOrNot() to check for profitability:
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"same value. This should have been addressed before this function.");
return DAG.getNode(
ISD::CONCAT_VECTORS, DL, VT,
- BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
- TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
@@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectOfConstants(N))
return V;
+ if (hasOperation(ISD::SRA, VT))
+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isNullValue())
+ if (!SCCC->isZero())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
@@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// Is 'X Cond C' always true or false?
auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
(Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
(Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
(Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isNullValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
(Cond == ISD::SETGE && C->isMinSignedValue());
return True || False;
};
@@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
Known = DAG.computeKnownBits(Op);
- return (Known.Zero | 1).isAllOnesValue();
+ return (Known.Zero | 1).isAllOnes();
}
/// Given an extending node with a pop-count operand, if the target does not
@@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
+ if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
+ if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
- ExtVTBits))) &&
+ DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
@@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
- LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
// A BUILD_PAIR is always having the least significant part in elt 0 and the
// most significant part in elt 1. So when combining into one large load, we
@@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
+ !LD1->hasOneUse() || !LD2->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
+
+ bool LD1Fast = false;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
- if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
- DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- Align Alignment = LD1->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign <= Alignment &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Alignment);
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
return SDValue();
}
@@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
- SDLoc DL(BV);
-
// Okay, we know the src/dst types are both integers of differing types.
- // Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
- if (SrcBitSize < DstBitSize) {
- unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e;
- i += NumInputsPerOutput) {
- bool isLE = DAG.getDataLayout().isLittleEndian();
- APInt NewBits = APInt(DstBitSize, 0);
- bool EltIsUndef = true;
- for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
- // Shift the previously computed bits over.
- NewBits <<= SrcBitSize;
- SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
- if (Op.isUndef()) continue;
- EltIsUndef = false;
-
- NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
- zextOrTrunc(SrcBitSize).zext(DstBitSize);
- }
-
- if (EltIsUndef)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
- else
- Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
- }
+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
+ // BuildVectorSDNode?
+ auto *BVN = cast<BuildVectorSDNode>(BV);
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ // Extract the constant raw bit data.
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
+ return SDValue();
- // Finally, this must be the case where we are shrinking elements: each input
- // turns into multiple outputs.
- unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- NumOutputsPerInput*BV->getNumOperands());
+ SDLoc DL(BV);
SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
+ if (UndefElements[I])
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
+ }
- for (const SDValue &Op : BV->op_values()) {
- if (Op.isUndef()) {
- Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
- continue;
- }
-
- APInt OpVal = cast<ConstantSDNode>(Op)->
- getAPIntValue().zextOrTrunc(SrcBitSize);
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+}
- for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal.lshrInPlace(DstBitSize);
- }
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
- // For big endian targets, swap the order of the pieces of each element.
- if (DAG.getDataLayout().isBigEndian())
- std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
- }
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
- return DAG.getBuildVector(VT, DL, Ops);
+// Returns true if `N` can assume no infinities involved in its computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
@@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.
SDValue FMA, E;
- if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanReassociate && isFusedOp(N0) &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {
FMA = N0;
E = N1;
- } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ } else if (CanReassociate && isFusedOp(N1) &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {
FMA = N1;
@@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
};
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N1)) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
@@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N10)) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
@@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N1) &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
@@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0->hasOneUse()) {
+ if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
- if (!Options.NoInfsFPMath)
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isContractableFMUL(Options, SDValue(N, 0)) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
@@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
@@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
@@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
}
@@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
- // fold (fmul X, -1.0) -> (fneg X)
- if (N1CFP && N1CFP->isExactlyValue(-1.0))
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N0);
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
@@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
@@ -16245,11 +16381,12 @@ struct LoadedSlice {
return false;
// Check if it will be merged with the load.
- // 1. Check the alignment constraint.
- Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
- ResVT.getTypeForEVT(*DAG->getContext()));
-
- if (RequiredAlignment > getAlign())
+ // 1. Check the alignment / fast memory access constraint.
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
+ Origin->getAddressSpace(), getAlign(),
+ Origin->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return false;
// 2. Check that the load is a legal operation for that type.
@@ -16270,7 +16407,7 @@ struct LoadedSlice {
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
- if (UsedBits.isAllOnesValue())
+ if (UsedBits.isAllOnes())
return true;
// Get rid of the unused bits on the right.
@@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
- return NarrowedUsedBits.isAllOnesValue();
+ return NarrowedUsedBits.isAllOnes();
}
/// Check whether or not \p First and \p Second are next to each other
@@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
- Imm ^= APInt::getAllOnesValue(BitWidth);
- if (Imm == 0 || Imm.isAllOnesValue())
+ Imm ^= APInt::getAllOnes(BitWidth);
+ if (Imm == 0 || Imm.isAllOnes())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
@@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
- NewImm ^= APInt::getAllOnesValue(NewBW);
+ NewImm ^= APInt::getAllOnes(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+ bool IsFast = false;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
- Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return SDValue();
SDValue NewPtr =
@@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
+ bool FastLD = false, FastST = false;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
- !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
- return SDValue();
-
- Align LDAlign = LD->getAlign();
- Align STAlign = ST->getAlign();
- Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
- if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *LD->getMemOperand(), &FastLD) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *ST->getMemOperand(), &FastST) ||
+ !FastLD || !FastST)
return SDValue();
SDValue NewLD =
DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LDAlign);
+ LD->getPointerInfo(), LD->getAlign());
SDValue NewST =
DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
- ST->getPointerInfo(), STAlign);
+ ST->getPointerInfo(), ST->getAlign());
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
@@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &ConstNode) {
APInt Val;
- // If the add only has one use, this would be OK to do.
- if (AddNode.getNode()->hasOneUse())
+ // If the add only has one use, and the target thinks the folding is
+ // profitable or does not lead to worse code, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse() &&
+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
// Walk all the users of the constant with which we're multiplying.
@@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ Optional<MachineMemOperand::Flags> Flags;
+ AAMDNodes AAInfo;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ if (!Flags) {
+ Flags = St->getMemOperand()->getFlags();
+ AAInfo = St->getAAInfo();
+ continue;
+ }
+ // Skip merging if there's an inconsistent flag.
+ if (Flags != St->getMemOperand()->getFlags())
+ return false;
+ // Concatenate AA metadata.
+ AAInfo = AAInfo.concat(St->getAAInfo());
+ }
+
EVT StoreTy;
if (UseVector) {
unsigned Elts = NumStores * NumMemElts;
@@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore =
- DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstInChain->getAlign());
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
}
// Replace all merged stores with the new store.
@@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
+ IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
@@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
break;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
bool IsFastSt = false;
bool IsFastLd = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ // Don't try vector types if we need a rotate. We may still fail the
+ // legality checks for the integer type, but we can't handle the rotate
+ // case with vectors.
+ // FIXME: We could use a shuffle in place of the rotate.
+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
@@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
case ISD::LIFETIME_END:
// We can forward past any lifetime start/end that can be proven not to
// alias the node.
- if (!isAlias(Chain.getNode(), N))
+ if (!mayAlias(Chain.getNode(), N))
Chains.push_back(Chain.getOperand(0));
break;
case ISD::STORE: {
@@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!VecEltVT.isByteSized())
return SDValue();
- Align Alignment = OriginalLoad->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VecEltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Alignment ||
- !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
- return SDValue();
-
- ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
- ISD::NON_EXTLOAD : ISD::EXTLOAD;
- if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Alignment = NewAlign;
-
+ Align Alignment = OriginalLoad->getAlign();
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
+
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
@@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
if (CstElt->getAPIntValue().ult(NumElts))
@@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
- APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
@@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
+ // Offset must be a constant multiple of the
+ // known-minimum vector length of the result type.
int64_t Offset = checkElem(Op0);
- if (Offset < 0)
+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
@@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
+// Attempt to merge nested concat_vectors/undefs.
+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
+ EVT SubVT;
+ SDValue FirstConcat;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef())
+ continue;
+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+ if (!FirstConcat) {
+ SubVT = Op.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+ FirstConcat = Op;
+ continue;
+ }
+ if (SubVT != Op.getOperand(0).getValueType())
+ return SDValue();
+ }
+ assert(FirstConcat && "Concat of all-undefs found");
+
+ SmallVector<SDValue> ConcatOps;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef()) {
+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
+ continue;
+ }
+ ConcatOps.append(Op->op_begin(), Op->op_end());
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
+}
+
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
@@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
- // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ }
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
@@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
return SDValue();
// Allow targets to opt-out.
@@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (!VT.isByteSized())
return SDValue();
- unsigned Index = ExtIdx->getZExtValue();
+ unsigned Index = Extract->getConstantOperandVal(1);
unsigned NumElts = VT.getVectorMinNumElements();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
@@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the concatenated source types match this extract, it's a direct
// simplification:
// extract_subvec (concat V1, V2, ...), i --> Vi
- if (ConcatSrcNumElts == ExtNumElts)
+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
return V.getOperand(ConcatOpIdx);
// If the concatenated source vectors are a multiple length of this extract,
@@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
+ ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
@@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
- ExtIdx * NVT.getScalarSizeInBits())
+ ExtIdx * NVT.getScalarSizeInBits()) {
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
+ return SDValue();
+
return DAG.getBitcast(NVT, V.getOperand(1));
+ }
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
@@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
- if (N0 == N1) {
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (Idx >= (int)NumElts) Idx -= NumElts;
- NewMask.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
- }
+ if (N0 == N1)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
+ createUnaryMask(SVN->getMask(), NumElts));
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.isUndef())
@@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // See if we can replace a shuffle with an insert_subvector.
+ // e.g. v2i32 into v8i32:
+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
+ // --> insert_subvector(lhs,rhs1,4).
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
+ // Ensure RHS subvectors are legal.
+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
+ EVT SubVT = RHS.getOperand(0).getValueType();
+ int NumSubVecs = RHS.getNumOperands();
+ int NumSubElts = SubVT.getVectorNumElements();
+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
+ if (!TLI.isTypeLegal(SubVT))
+ return SDValue();
+
+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).
+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
+ return SDValue();
+
+ // Search [NumSubElts] spans for RHS sequence.
+ // TODO: Can we avoid nested loops to increase performance?
+ SmallVector<int> InsertionMask(NumElts);
+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
+ // Reset mask to identity.
+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
+
+ // Add subvector insertion.
+ std::iota(InsertionMask.begin() + SubIdx,
+ InsertionMask.begin() + SubIdx + NumSubElts,
+ NumElts + (SubVec * NumSubElts));
+
+ // See if the shuffle mask matches the reference insertion mask.
+ bool MatchingShuffle = true;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int ExpectIdx = InsertionMask[i];
+ int ActualIdx = Mask[i];
+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
+ MatchingShuffle = false;
+ break;
+ }
+ }
+
+ if (MatchingShuffle)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
+ RHS.getOperand(SubVec),
+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
+ }
+ }
+ return SDValue();
+ };
+ ArrayRef<int> Mask = SVN->getMask();
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)
+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
+ return InsertN1;
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommuteMask);
+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
+ return InsertN0;
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPOp(SDNode *N) {
+ // VP operations in which all vector elements are disabled - either by
+ // determining that the mask is all false or that the EVL is 0 - can be
+ // eliminated.
+ bool AreAllEltsDisabled = false;
+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
+ AreAllEltsDisabled |=
+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
+
+ // This is the only generic VP combine we support for now.
+ if (!AreAllEltsDisabled)
+ return SDValue();
+
+ // Binary operations can be replaced by UNDEF.
+ if (ISD::isVPBinaryOp(N->getOpcode()))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ // VP Memory operations can be replaced by either the chain (stores) or the
+ // chain + undef (loads).
+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
+ if (MemSD->writeMem())
+ return MemSD->getChain();
+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
+ }
+
+ // Reduction operations return the start operand when no elements are active.
+ if (ISD::isVPReduction(N->getOpcode()))
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
else
Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
- if (Bits.isAllOnesValue())
+ if (Bits.isAllOnes())
Indices.push_back(i);
else if (Bits == 0)
Indices.push_back(i + NumSubElts);
@@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
-static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
- SDLoc DL(N);
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
@@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
}
/// Visit a binary vector operation, like ADD.
-SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVBinOp only works on vectors!");
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
- EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
+ LHS.getValueType(), Ops))
return Fold;
// Move unary shuffles with identical masks after a vector binop:
@@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
- SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
@@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
- SDLoc DL(N);
SDValue X = Shuf0->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
- SDLoc DL(N);
SDValue X = Shuf1->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
- SDLoc DL(N);
SDValue VecC =
DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
@@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT NarrowVT = LHS.getOperand(0).getValueType();
if (NarrowVT == RHS.getOperand(0).getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
- SDLoc DL(N);
unsigned NumOperands = LHS.getNumOperands();
SmallVector<SDValue, 4> ConcatOps;
for (unsigned i = 0; i != NumOperands; ++i) {
@@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
return V;
return SDValue();
@@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
return SDValue();
- if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
+ // The use checks are intentionally on SDNode because we may be dealing
+ // with opcodes that produce more than one SDValue.
+ // TODO: Do we really need to check N0 (the condition operand of the select)?
+ // But removing that clause could cause an infinite loop...
+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
return SDValue();
+ // Binops may include opcodes that return multiple values, so all values
+ // must be created/propagated from the newly created binops below.
+ SDVTList OpVTs = N1->getVTList();
+
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
VT == N2.getOperand(1).getValueType()) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
- return !(SCCC->isNullValue()) ? N2 : N3;
+ return !(SCCC->isZero()) ? N2 : N3;
}
}
@@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue ValueOnZero = N2;
SDValue Count = N3;
// If the condition is NE instead of E, swap the operands.
@@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
+ if (!NotExtCompare && N1C && N2C && N3C &&
+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||
+ (N1C->isZero() && CC == ISD::SETLT)) &&
+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASR = DAG.getNode(
+ ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
+ }
+
return SDValue();
}
@@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (C->isNullValue())
+ if (C->isZero())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
-/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// F(X) = 1/X - A [which has a zero at X = 1/A]
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
@@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
@@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
cast<LSBaseSDNode>(C.getNode())->isSimple();
- if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
@@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LIFETIME_END: {
// We can forward past any lifetime start/end that can be proven not to
// alias the memory access.
- if (!isAlias(N, C.getNode())) {
+ if (!mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4ca731cfdf62..4d1449bc2751 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -75,6 +75,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
@@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() {
EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
: FuncInfo.MBB->rend();
MachineBasicBlock::reverse_iterator RI(LastLocalValue);
- for (; RI != RE;) {
- MachineInstr &LocalMI = *RI;
- // Increment before erasing what it points to.
- ++RI;
+ for (MachineInstr &LocalMI :
+ llvm::make_early_inc_range(llvm::make_range(RI, RE))) {
Register DefReg = findLocalRegDef(LocalMI);
if (!DefReg)
continue;
@@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
const CallInst *CI, unsigned StartIdx) {
- for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {
Value *Val = CI->getArgOperand(i);
// Check for constants and encode them with a StackMaps::ConstantOp prefix.
if (const auto *C = dyn_cast<ConstantInt>(Val)) {
@@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// This includes all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ assert(I->arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
@@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) {
CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
.setTailCall(IsTailCall);
+ diagnoseDontCall(*CI);
+
return lowerCallTo(CLI);
}
@@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
// the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) {
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
auto *NewExpr =
@@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ // See if there's an expression to constant-fold.
+ DIExpression *Expr = DI->getExpression();
+ if (Expr)
+ std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
@@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs.
- if (TM.Options.ValueTrackingVariableLocations) {
+ if (FuncInfo.MF->useDebugInstrRef()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
}
@@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I->getAAMetadata();
if (!Alignment) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlign(ValTy);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 348fad6daf8f..c1bb65409282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps(
MIB.addFrameIndex(Op.getFrameIx());
break;
case SDDbgOperand::VREG:
- MIB.addReg(Op.getVReg(), RegState::Debug);
+ MIB.addReg(Op.getVReg());
break;
case SDDbgOperand::SDNODE: {
SDValue V = SDValue(Op.getSDNode(), Op.getResNo());
@@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
DebugLoc DL = SD->getDebugLoc();
auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
MIB.addReg(0U);
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
return &*MIB;
@@ -872,22 +872,33 @@ MachineInstr *
InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
assert(SD->getLocationOps().size() == 1 &&
"Non variadic dbg_value should have only one location op");
+ // See about constant-folding the expression.
+ // Copy the location operand in case we replace it.
+ SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]);
+ if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) {
+ const Value *V = LocationOps[0].getConst();
+ if (auto *C = dyn_cast<ConstantInt>(V)) {
+ std::tie(Expr, C) = Expr->constantFold(C);
+ LocationOps[0] = SDDbgOperand::fromConst(C);
+ }
+ }
+
// Emit non-variadic dbg_value nodes as DBG_VALUE.
// DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr
auto MIB = BuildMI(*MF, DL, II);
- AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap);
+ AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);
if (SD->isIndirect())
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Var).addMetadata(Expr);
}
@@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;
+ EmitDebugInstrRefs = MF->useDebugInstrRef();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d92b23f56e4d..eb9d2286aeb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::VP_SCATTER:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VP_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStoreSDNode>(Node)->getValue().getValueType());
+ break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_SEQ_FMUL:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(1).getValueType());
break;
@@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Visited.insert(Op.getNode());
Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
- for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
- UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Vec.getNode()->uses()) {
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
if (ST->isIndexed() || ST->isTruncatingStore() ||
ST->getValue() != Vec)
@@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) {
? ISD::FCOS : ISD::FSIN;
SDValue Op0 = Node->getOperand(0);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : Op0.getNode()->uses()) {
if (User == Node)
continue;
// The other user might have been turned into sincos already.
@@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
// If CTPOP is legal, use it. Otherwise use shifts and xor.
SDValue Result;
- if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+ if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {
Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
} else {
Result = Op;
@@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandABS(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
@@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
"Don't know how to expand this subtraction!");
- Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
- VT));
+ Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
break;
@@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
EVT VT = Node->getValueType(0);
- assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))
- ->isNullValue() &&
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&
"Unable to expand as libcall if it is not normal rounding");
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
@@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FROUND:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3553f9ec16c2..27f9cede1922 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
@@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+ return NewFence;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- APInt API = APInt::getAllOnesValue(Size);
+ APInt API = APInt::getAllOnes(Size);
API.clearBit(Size - 1);
SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// We actually deal with the partially-softened FP_TO_FP16 node too, which
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
- EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
+ EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ ? MVT::f16
+ : RVT;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8a3dd014901..1fa4d88fcb4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SMAX:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UMIN:
case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
- case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ false);
+ break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
- case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
- case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::SRA:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ false);
+ break;
+ case ISD::SRL:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ false);
+ break;
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
- case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+ case ISD::UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECREDUCE(N);
break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntRes_VP_REDUCE(N);
+ break;
+
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
@@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
+
+ case ISD::VP_AND:
+ case ISD::VP_OR:
+ case ISD::VP_XOR:
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ case ISD::VP_MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SDIV:
+ case ISD::VP_SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_UDIV:
+ case ISD::VP_UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_ASHR:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_LSHR:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ true);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
-// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
- SelectionDAG &DAG) {
- EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If any possible shift value won't fit in the prefered type, just use
- // something safe. It will be legalized when the shift is expanded.
- if (!ShiftVT.isVector() &&
- ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
- ShiftVT = MVT::i32;
- return ShiftVT;
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
SDValue V = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::FREEZE, SDLoc(N),
@@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ // If the larger CTLZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) {
+ if (SDValue Result = TLI.expandCTLZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- SDLoc dl(N);
- EVT OVT = N->getValueType(0);
- EVT NVT = Op.getValueType();
Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(
@@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If the larger CTPOP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to
+ // TargetLowering.
+ if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) {
+ if (SDValue Result = TLI.expandCTPOP(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
@@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
+
+ // If the larger CTTZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. Don't expand if we can use CTPOP or CTLZ expansion on the
+ // larger type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) &&
+ !TLI.isOperationLegal(ISD::CTPOP, NVT) &&
+ !TLI.isOperationLegal(ISD::CTLZ, NVT)) {
+ if (SDValue Result = TLI.expandCTTZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
if (N->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
@@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getOffset(), N->getMask(), ExtPassThru,
N->getMemoryVT(), N->getMemOperand(),
- N->getAddressingMode(), ISD::EXTLOAD);
+ N->getAddressingMode(), ExtType,
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
unsigned NewBits = PromotedType.getScalarSizeInBits();
if (Opcode == ISD::UADDSAT) {
- APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
@@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
- if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
@@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
Op.getValueType(), Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {
// The input may have strange things in the top bits of the registers, but
// these operations don't care. They may have weird bits going out, but
// that too is okay if they are integer operations.
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {
// Sign extend the input.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
@@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
// The input value must be properly zero extended.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
@@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
+ EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
case ISD::SHL:
case ISD::SRA:
@@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntOp_VP_REDUCE(N, OpNo);
+ break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
}
@@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redundant eventually.
- unsigned OpLEffectiveBits =
- OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
- unsigned OpREffectiveBits =
- OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL);
+ unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);
if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
@@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
-
SDValue DataOp = N->getValue();
- EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- SDLoc dl(N);
- bool TruncateStore = false;
if (OpNo == 4) {
+ // The Mask. Update in place.
+ EVT DataVT = DataOp.getValueType();
Mask = PromoteTargetBoolean(Mask, DataVT);
- // Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- } else { // Data operand
- assert(OpNo == 1 && "Unexpected operand for promotion");
- DataOp = GetPromotedInteger(DataOp);
- TruncateStore = true;
}
- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+
+ return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
N->getOffset(), Mask, N->getMemoryVT(),
N->getMemOperand(), N->getAddressingMode(),
- TruncateStore, N->isCompressingStore());
+ /*IsTruncating*/ true, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
return SDValue();
}
-SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
- SDLoc dl(N);
- SDValue Op;
+static unsigned getExtendForIntVecReduction(SDNode *N) {
switch (N->getOpcode()) {
- default: llvm_unreachable("Expected integer vector reduction");
+ default:
+ llvm_unreachable("Expected integer vector reduction");
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
- Op = GetPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ return ISD::ANY_EXTEND;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
- Op = SExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ return ISD::SIGN_EXTEND;
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
- Op = ZExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ return ISD::ZERO_EXTEND;
}
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) {
+ switch (getExtendForIntVecReduction(N)) {
+ default:
+ llvm_unreachable("Impossible extension kind for integer reduction");
+ case ISD::ANY_EXTEND:
+ return GetPromotedInteger(V);
+ case ISD::SIGN_EXTEND:
+ return SExtPromotedInteger(V);
+ case ISD::ZERO_EXTEND:
+ return ZExtPromotedInteger(V);
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
EVT EltVT = Op.getValueType().getVectorElementType();
EVT VT = N->getValueType(0);
+
if (VT.bitsGE(EltVT))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
@@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(OpNo);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) { // Mask
+ // Update in place.
+ NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType());
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+
+ Op = PromoteIntOpVectorReduction(N, Op);
+
+ NewOps[OpNo] = Op;
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = Op.getValueType().getScalarType();
+
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps);
+
+ // Result size must be >= element/start-value size. If this is not the case
+ // after promotion, also promote both the start value and result type and
+ // then truncate.
+ NewOps[0] =
+ DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0));
+ SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
SDValue Op = ZExtPromotedInteger(N->getOperand(1));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
@@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to expand the result of this "
"operator!");
+ case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
bool HasAddCarry = TLI.isOperationLegalOrCustom(
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasAddCarry) {
- EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
SDValue Sign =
DAG.getNode(ISD::SRA, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
@@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
@@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
- // The type from TLI is too small to fit the shift amount we want.
- // Override it with i32. The shift will have to be legalized.
- ShiftAmtTy = MVT::i32;
- }
SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
@@ -3464,8 +3633,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
} else {
// For unsigned multiplication, we only need to check the max since we
@@ -3638,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// Saturate to signed maximum.
APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
- APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnes(NVTSize);
Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
// Saturate to signed minimum.
@@ -3808,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- assert(ShiftTy.getScalarSizeInBits() >=
- Log2_32_Ceil(VT.getScalarSizeInBits()) &&
- "ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -3857,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
}
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ EVT ShAmtTy =
+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
+ SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);
+ SDValue Ops[2] = {N->getOperand(0), ShAmt};
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
@@ -4035,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
LC = RTLIB::MULO_I64;
else if (VT == MVT::i128)
LC = RTLIB::MULO_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // FIXME: This is not an optimal expansion, but better than crashing.
+ EVT WideVT =
+ EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ SDValue MulLo, MulHi;
+ SplitInteger(Mul, MulLo, MulHi);
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VT, MulLo,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+ SDValue Overflow =
+ DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);
+ SplitInteger(MulLo, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
@@ -4188,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- // Lower the rotate to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ // Delegate to funnel-shift expansion.
+ SDLoc DL(N);
+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;
+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),
+ N->getOperand(0), N->getOperand(1));
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- // Lower the funnel shift to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandFunnelShift(N, Res, DAG);
- SplitInteger(Res, Lo, Hi);
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+
+ SDLoc DL(N);
+ unsigned Opc = N->getOpcode();
+ SDValue ShAmt = N->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);
+
+ // If the shift amount is at least half the bitwidth, swap the inputs.
+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();
+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,
+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));
+ SDValue Cond =
+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),
+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);
+
+ // Expand to a pair of funnel shifts.
+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);
+
+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);
+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);
+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);
+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);
+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
@@ -4297,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
if (RHSLo == RHSHi) {
if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
- if (RHSCST->isAllOnesValue()) {
+ if (RHSCST->isAllOnes()) {
// Equality comparison to -1.
NewLHS = DAG.getNode(ISD::AND, dl,
LHSLo.getValueType(), LHSLo, LHSHi);
@@ -4317,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
// If this is a comparison of the sign bit, just look at the top part.
// X > -1, x < 0
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
- if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
- (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1
NewLHS = LHSHi;
NewRHS = RHSHi;
return;
@@ -4369,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
- if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
- (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
- (LoCmpC && LoCmpC->isNullValue())))) {
+ // FIXME: Is the HiCmpC->isOne() here correct for
+ // ZeroOrNegativeOneBooleanContent.
+ if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) ||
+ (!EqAllowed &&
+ ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {
// For LE / GE, if high part is known false, ignore the low part.
// For LT / GT: if low part is known false, return the high part.
// if high part is known true, ignore the low part.
@@ -4706,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp0 = N->getOperand(0);
EVT InVT = InOp0.getValueType();
+ // Try and extract from a smaller type so that it eventually falls
+ // into the promotion code below.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||
+ getTypeAction(InVT) == TargetLowering::TypeLegal) {
+ EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NElts = NInVT.getVectorMinNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();
+
+ SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,
+ DAG.getConstant(alignDown(IdxVal, NElts), dl,
+ BaseIdx.getValueType()));
+ SDValue Step2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,
+ DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);
+ }
+
+ // Try and extract from a widened type.
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx};
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+
// Promote operands and see if this is handled by target lowering,
// Otherwise, use the BUILD_VECTOR approach below
if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
@@ -4873,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumOperands = N->getNumOperands();
+ unsigned NumOutElem = NOutVT.getVectorMinNumElements();
EVT OutElemTy = NOutVT.getVectorElementType();
+ if (OutVT.isScalableVector()) {
+ // Find the largest promoted element type for each of the operands.
+ SDUse *MaxSizedValue = std::max_element(
+ N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) {
+ EVT AVT = A.getValueType().getVectorElementType();
+ EVT BVT = B.getValueType().getVectorElementType();
+ return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits();
+ });
+ EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType();
+
+ // Then promote all vectors to the largest element type.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ SDValue Op = N->getOperand(I);
+ EVT OpVT = Op.getValueType();
+ if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ else
+ assert(getTypeAction(OpVT) == TargetLowering::TypeLegal &&
+ "Unhandled legalization type");
+
+ if (OpVT.getVectorElementType().getScalarSizeInBits() <
+ MaxElementVT.getScalarSizeInBits())
+ Op = DAG.getAnyExtOrTrunc(Op, dl,
+ OpVT.changeVectorElementType(MaxElementVT));
+ Ops.push_back(Op);
+ }
+
+ // Do the CONCAT on the promoted type and finally truncate to (the promoted)
+ // NOutVT.
+ return DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ OutVT.changeVectorElementType(MaxElementVT), Ops),
+ dl, NOutVT);
+ }
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
- unsigned NumOutElem = NOutVT.getVectorNumElements();
- unsigned NumOperands = N->getNumOperands();
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
@@ -4957,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
// we can simply change the result type.
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {
+ // The VP_REDUCE result size may be larger than the element size, so we can
+ // simply change the result type. However the start value and result must be
+ // the same.
+ SDLoc DL(N);
+ SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start,
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -4974,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // The result type is equal to the first input operand's type, so the
+ // type that needs promoting must be the second source vector.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ SDValue Idx = N->getOperand(2);
+ EVT PromVT = EVT::getVectorVT(*DAG.getContext(),
+ V1.getValueType().getVectorElementType(),
+ V0.getValueType().getVectorElementCount());
+ V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT);
+ SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx);
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 05a974af3b55..1f73c9eea104 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() {
#endif
PerformExpensiveChecks();
- SDNode *N = Worklist.back();
- Worklist.pop_back();
+ SDNode *N = Worklist.pop_back_val();
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8d17d8fc68b1..da282ecad282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -289,6 +289,12 @@ private:
return DAG.getZeroExtendInReg(Op, DL, OldVT);
}
+ // Promote the given operand V (vector or scalar) according to N's specific
+ // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns
+ // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the
+ // promoted value.
+ SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V);
+
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -332,14 +338,14 @@ private:
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
- SDValue PromoteIntRes_SHL(SDNode *N);
- SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
- SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
- SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);
SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
- SDValue PromoteIntRes_SRA(SDNode *N);
- SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
@@ -353,6 +359,7 @@ private:
SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
@@ -369,6 +376,7 @@ private:
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
@@ -394,6 +402,7 @@ private:
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+ SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -518,6 +527,7 @@ private:
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N);
@@ -816,7 +826,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
- void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -898,6 +908,7 @@ private:
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
@@ -912,7 +923,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
- SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
@@ -972,10 +983,10 @@ private:
LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper function to generate a set of stores to store a widen vector into
- /// non-widen memory.
+ /// non-widen memory. Returns true if successful, false otherwise.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
- void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+ bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
@@ -1011,6 +1022,7 @@ private:
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
+ void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 81cc2bf10d25..3d3c9a2ad837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
}
+
+void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc DL(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ebe3bfc4b75a..88a28a3be53e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return RecursivelyLegalizeResults(Op, ResultVals);
}
-// FIXME: This is very similar to the X86 override of
-// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
+// merge them somehow?
bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
@@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandBITREVERSE(Node, Results);
return;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
- Mask = DAG.getSelect(DL, BitTy, Mask,
- DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
- BitTy),
- DAG.getConstant(0, DL, BitTy));
+ Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
+ DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all one or all zero.
if (VT.isFixedLengthVector())
@@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
@@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
EVT VT = Node->getValueType(0);
+ // Scalable vectors can't use shuffle expansion.
+ if (VT.isScalableVector())
+ return TLI.expandBSWAP(Node, DAG);
+
// Generate a byte wise shuffle mask for the BSWAP.
SmallVector<int, 16> ShuffleMask;
createBSWAPShuffleMask(VT, ShuffleMask);
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
// Only emit a shuffle if the mask is legal.
- if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
- return DAG.UnrollVectorOp(Node);
+ if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
- SDLoc DL(Node);
- SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return TLI.expandBSWAP(Node, DAG);
+
+ // Otherwise unroll.
+ return DAG.UnrollVectorOp(Node);
}
void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
+ // We can't unroll or use shuffles for scalable vectors.
+ if (VT.isScalableVector()) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
+ return;
+ }
+
// If we have the scalar operation, it's probably cheaper to unroll it.
if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- // Let LegalizeDAG handle this later.
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
return;
+ }
// Otherwise unroll.
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, VT);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
@@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
if (Node->getOpcode() == ISD::STRICT_FSETCC ||
Node->getOpcode() == ISD::STRICT_FSETCCS)
ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
- DAG.getConstant(0, dl, EltVT));
+ DAG.getAllOnesConstant(dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
OpValues.push_back(ScalarResult);
OpChains.push_back(ScalarChain);
@@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 91242bbf866f..539c9cb9c256 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
SDValue Arg = N->getOperand(2).getOperand(0);
if (Arg.isUndef())
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
- unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
return GetScalarizedVector(N->getOperand(Op));
}
@@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- SplitVecRes_BinOp(N, Lo, Hi);
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);
break;
case ISD::FMA:
case ISD::FSHL:
@@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
}
}
-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
+ bool IsVP) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ if (!IsVP) {
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ return;
+ }
+
+ // Split the mask.
+ SDValue MaskLo, MaskHi;
+ SDValue Mask = N->getOperand(2);
+ EVT MaskVT = Mask.getValueType();
+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
+
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ SDValue EVL = N->getOperand(3);
+ EVT VecVT = N->getValueType(0);
+ EVT EVLVT = EVL.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
+ : DAG.getVScale(dl, EVLVT,
+ APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
+ SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
+
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
+ {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
+ {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = WidenVecRes_INSERT_SUBVECTOR(N);
+ break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
@@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- Res = WidenVecRes_Binary(N);
+ Res = WidenVecRes_Binary(N, /*IsVP*/ false);
break;
case ISD::FADD:
@@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ // Vector-predicated binary op widening. Note that -- unlike the
+ // unpredicated versions -- we don't have to worry about trapping on
+ // operations like UDIV, FADD, etc., as we pass on the original vector
+ // length parameter. This means the widened elements containing garbage
+ // aren't active.
+ Res = WidenVecRes_Binary(N, /*IsVP*/ true);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {
// Binary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
+ N->getFlags());
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle the
+ // case where the mask needs widening to an identically-sized type as the
+ // vector inputs.
+ SDValue Mask = N->getOperand(2);
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Unable to widen binary VP op");
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
@@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
EVT InVT = InOp.getValueType();
@@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
- unsigned InVTNumElts = InVT.getVectorNumElements();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
+ ElementCount InVTEC = InVT.getVectorElementCount();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts) {
+ InVTEC = InVT.getVectorElementCount();
+ if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
@@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
// Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
+ unsigned NumConcat =
+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
@@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
- if (InVTNumElts % WidenNumElts == 0) {
+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
@@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(WidenVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ auto InOpTypeAction = getTypeAction(InOp.getValueType());
+ if (InOpTypeAction == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
@@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
- if (VT.isScalableVector())
- report_fatal_error("Don't know how to widen the result of "
- "EXTRACT_SUBVECTOR for scalable vectors");
-
// Check if we can extract from the vector.
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned InNumElts = InVT.getVectorMinNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ if (VT.isScalableVector()) {
+ // Try to split the operation up into smaller extracts and concat the
+ // results together, e.g.
+ // nxv6i64 extract_subvector(nxv12i64, 6)
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv16i64, 6)
+ // nxv2i64 extract_subvector(nxv16i64, 8)
+ // nxv2i64 extract_subvector(nxv16i64, 10)
+ // undef)
+ unsigned VTNElts = VT.getVectorMinNumElements();
+ unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ // Avoid recursion around e.g. nxv1i8.
+ if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
+ SmallVector<SDValue> Parts;
+ unsigned I = 0;
+ for (; I < VTNElts / GCD; ++I)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
+ for (; I < WidenNumElts / GCD; ++I)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ report_fatal_error("Don't know how to widen the result of "
+ "EXTRACT_SUBVECTOR for scalable vectors");
+ }
+
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i = 0; i < NumElts; ++i)
@@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
else
Result = GenWidenVectorLoads(LdChain, LD);
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
- return Result;
+ return Result;
+ }
+
+ // Generate a vector-predicated load if it is custom/legal on the target. To
+ // avoid possible recursion, only do this if the widened mask type is legal.
+ // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ EVT LdVT = LD->getMemoryVT();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ SDLoc DL(N);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = LdVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = LD->getMemOperand();
+ SDValue NewLoad =
+ DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
+ MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(),
+ MMO->getAAInfo());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+
+ report_fatal_error("Unable to widen vector load");
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
@@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
@@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
}
EVT CondEltVT = CondVT.getVectorElementType();
- EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
- CondEltVT, WidenNumElts);
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
@@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return TLI.scalarizeVectorStore(ST, DAG);
SmallVector<SDValue, 16> StChain;
- GenWidenVectorStores(StChain, ST);
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
- if (StChain.size() == 1)
- return StChain[0];
- else
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
+ // Generate a vector-predicated store if it is custom/legal on the target.
+ // To avoid possible recursion, only do this if the widened mask type is
+ // legal.
+ // FIXME: Not all targets may support EVL in VP_STORE. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ SDValue StVal = ST->getValue();
+ EVT StVT = StVal.getValueType();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ // Widen the value.
+ SDLoc DL(N);
+ StVal = GetWidenedVector(StVal);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = StVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = ST->getMemOperand();
+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,
+ EVL, MMO->getPointerInfo(), MMO->getAlign(),
+ MMO->getFlags(), MMO->getAAInfo());
+ }
+
+ report_fatal_error("Unable to widen vector store");
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
@@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
+static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
const bool Scalable = WidenVT.isScalableVector();
unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
@@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
}
}
+ // Using element-wise loads and stores for widening operations is not
+ // supported for scalable vectors
if (Scalable)
- report_fatal_error("Using element-wise loads and stores for widening "
- "operations is not supported for scalable vectors");
+ return None;
+
return RetVT;
}
@@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
- unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
+ unsigned LdAlign =
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- TypeSize NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ Optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ // Unless we're able to load in one instruction we must work out how to load
+ // the remainder.
+ if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
+ Optional<EVT> NewVT = FirstVT;
+ TypeSize RemainingWidth = LdWidth;
+ TypeSize NewVTWidth = FirstVTWidth;
+ do {
+ RemainingWidth -= NewVTWidth;
+ if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
+ LdAlign, WidthDiff.getKnownMinSize());
+ if (!NewVT)
+ return SDValue();
+ NewVTWidth = NewVT->getSizeInBits();
+ }
+ MemVTs.push_back(*NewVT);
+ } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
+ }
+
+ SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
- if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ if (MemVTs.empty()) {
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ if (!FirstVT->isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
- if (NewVT == WidenVT)
+ if (FirstVT == WidenVT)
return LdOp;
// TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
- unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
+ assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+ unsigned NumConcat =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
@@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
uint64_t ScaledOffset = 0;
MachinePointerInfo MPI = LD->getPointerInfo();
- do {
- LdWidth -= NewVTWidth;
- IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
- &ScaledOffset);
-
- if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
- // The current type we are using is too large. Find a better size.
- NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- NewVTWidth = NewVT.getSizeInBits();
- }
+ // First incremement past the first load.
+ IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
+ &ScaledOffset);
+
+ for (EVT MemVT : MemVTs) {
Align NewAlign = ScaledOffset == 0
? LD->getOriginalAlign()
: commonAlignment(LD->getAlign(), ScaledOffset);
SDValue L =
- DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
+ DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
LdOps.push_back(L);
- LdOp = L;
- } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
+ LdChain.push_back(L.getValue(1));
+ IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
+ }
// Build the vector from the load operations.
unsigned End = LdOps.size();
@@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
return DAG.getBuildVector(WidenVT, dl, Ops);
}
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
// The routine chops the vector into the largest vector stores with the same
@@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MachinePointerInfo MPI = ST->getPointerInfo();
uint64_t ScaledOffset = 0;
+
+ // A breakdown of how to widen this vector store. Each element of the vector
+ // is a memory VT combined with the number of times it is to be stored to,
+ // e,g., v5i32 -> {{v2i32,2},{i32,1}}
+ SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;
+
while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ Optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ if (!NewVT)
+ return false;
+ MemVTs.push_back({*NewVT, 0});
+ TypeSize NewVTWidth = NewVT->getSizeInBits();
+
+ do {
+ StWidth -= NewVTWidth;
+ MemVTs.back().second++;
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ }
+
+ for (const auto &Pair : MemVTs) {
+ EVT NewVT = Pair.first;
+ unsigned Count = Pair.second;
TypeSize NewVTWidth = NewVT.getSizeInBits();
if (NewVT.isVector()) {
@@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
Idx += NumVTElts;
-
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
&ScaledOffset);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
} else {
// Cast the vector to the scalar type we can store.
unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
@@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
}
}
+
+ return true;
}
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 75b4242a415c..f64b332a7fef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -192,7 +192,7 @@ public:
// Returns the SDNodes which this SDDbgValue depends on.
SmallVector<SDNode *> getSDNodes() const {
SmallVector<SDNode *> Dependencies;
- for (SDDbgOperand DbgOp : getLocationOps())
+ for (const SDDbgOperand &DbgOp : getLocationOps())
if (DbgOp.getKind() == SDDbgOperand::SDNODE)
Dependencies.push_back(DbgOp.getSDNode());
for (SDNode *Node : getAdditionalDependencies())
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0022e5ec31f0..1b89864116cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -56,9 +56,7 @@ namespace {
SUnit *pop() {
if (empty()) return nullptr;
- SUnit *V = Queue.back();
- Queue.pop_back();
- return V;
+ return Queue.pop_back_val();
}
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b2a8c8bdd78c..95f7e43b151d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// There are either zero or one users of the Glue result.
bool HasGlueUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI)
- if (GlueVal.isOperandOf(*UI)) {
+ for (SDNode *U : N->uses())
+ if (GlueVal.isOperandOf(U)) {
HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
- N = *UI;
+ N = U;
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
NodeSUnit->isCall = true;
break;
@@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
/// Returns true if \p DV has any VReg operand locations which don't exist in
/// VRBaseMap.
auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {
- for (SDDbgOperand L : DV->getLocationOps()) {
+ for (const SDDbgOperand &L : DV->getLocationOps()) {
if (L.getKind() == SDDbgOperand::SDNODE &&
VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a98464425c4..008665d50233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) {
}
}
+bool ISD::isVPBinaryOp(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+bool ISD::isVPReduction(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
/// The operand position of the vector mask.
Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
@@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::VP_LOAD: {
+ const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N);
+ ID.AddInteger(ELD->getMemoryVT().getRawBits());
+ ID.AddInteger(ELD->getRawSubclassData());
+ ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_STORE: {
+ const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
+ ID.AddInteger(EST->getMemoryVT().getRawBits());
+ ID.AddInteger(EST->getRawSubclassData());
+ ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const VPGatherSDNode *EG = cast<VPGatherSDNode>(N);
+ ID.AddInteger(EG->getMemoryVT().getRawBits());
+ ID.AddInteger(EG->getRawSubclassData());
+ ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const VPScatterSDNode *ES = cast<VPScatterSDNode>(N);
+ ID.AddInteger(ES->getMemoryVT().getRawBits());
+ ID.AddInteger(ES->getRawSubclassData());
+ ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::MLOAD: {
const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
ID.AddInteger(MLD->getMemoryVT().getRawBits());
@@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue NegOne =
- getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
- return getNode(ISD::XOR, DL, VT, Val, NegOne);
+ return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
@@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isNullValue())
+ if (C->isZero())
return N1;
}
@@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &C1 = N1C->getAPIntValue();
- switch (Cond) {
- default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
- case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
- case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
- case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
- case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
- case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
- case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
- case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
- case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
- case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
- }
+ return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)),
+ dl, VT, OpVT);
}
}
@@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
return SDValue();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return GetDemandedBits(V, DemandedBits, DemandedElts);
}
@@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
switch (V.getOpcode()) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
- ? APInt::getAllOnesValue(DemandedElts.getBitWidth())
+ ? APInt::getAllOnes(DemandedElts.getBitWidth())
: APInt(DemandedElts.getBitWidth(), 0);
return true;
case ISD::ADD:
@@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
unsigned NumElts = VT.getVectorNumElements();
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
switch (V.getOpcode()) {
case ISD::BUILD_VECTOR: {
@@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
// For now we don't support this with scalable vectors.
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
APInt DemandedElts;
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
if (VT.isScalableVector()) {
@@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
}
@@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Known.One.setAllBits();
Known.Zero.setAllBits();
@@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// bits from the overlapping larger input elements and extracting the
// sub sections we actually care about.
unsigned SubScale = SubBitWidth / BitWidth;
- APInt SubDemandedElts(NumElts / SubScale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SubDemandedElts.setBit(i / SubScale);
-
+ APInt SubDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
}))
return true;
+ // Is the operand of a splat vector a constant power of two?
+ if (Val.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0)))
+ if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
@@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return 1;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
}
@@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(VT.isVector() && "Expected bitcast to vector");
unsigned Scale = SrcBits / VTBits;
- APInt SrcDemandedElts(NumElts / Scale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
+ APInt SrcDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);
// Fast case - sign splat can be simply split across the small elements.
Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
@@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS =
isConstOrConstSplat(Op.getOperand(1), DemandedElts))
- if (CRHS->isAllOnesValue()) {
+ if (CRHS->isAllOnes()) {
KnownBits Known =
computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Handle NEG.
if (ConstantSDNode *CLHS =
isConstOrConstSplat(Op.getOperand(0), DemandedElts))
- if (CLHS->isNullValue()) {
+ if (CLHS->isZero()) {
KnownBits Known =
computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedSubElts) {
@@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(FirstAnswer, Mask.countLeadingOnes());
}
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
unsigned Depth) const {
// Early out for FREEZE.
@@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
return false;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
}
@@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::UNDEF:
return PoisonOnly;
- // TODO: ISD::BUILD_VECTOR handling
+ case ISD::BUILD_VECTOR:
+ // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -
+ // this shouldn't affect the result.
+ for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly,
+ Depth + 1))
+ return false;
+ }
+ return true;
// TODO: Search for noundef attributes from library functions.
@@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
"Floating point types unsupported - use isKnownNeverZeroFloat");
// If the value is a constant, we can obviously see if it is a zero or not.
- if (ISD::matchUnaryPredicate(
- Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ if (ISD::matchUnaryPredicate(Op,
+ [](ConstantSDNode *C) { return !C->isZero(); }))
return true;
// TODO: Recognize more cases here.
@@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
SelectionDAG &DAG) {
- if (cast<ConstantSDNode>(Step)->isNullValue())
+ if (cast<ConstantSDNode>(Step)->isZero())
return DAG.getConstant(0, DL, VT);
return SDValue();
@@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getNullValue(VT.getSizeInBits()));
+ APInt::getZero(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = {Operand};
- if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
@@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+ return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
return SDValue();
- // For now, the array Ops should only contain two values.
- // This enforcement will be removed once this function is merged with
- // FoldConstantVectorArithmetic
- if (Ops.size() != 2)
+ unsigned NumOps = Ops.size();
+ if (NumOps == 0)
return SDValue();
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
- SDNode *N1 = Ops[0].getNode();
- SDNode *N2 = Ops[1].getNode();
-
// Handle the case of two scalars.
- if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
- if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
- if (C1->isOpaque() || C2->isOpaque())
- return SDValue();
-
- Optional<APInt> FoldAttempt =
- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
- if (!FoldAttempt)
- return SDValue();
-
- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
- assert((!Folded || !VT.isVector()) &&
- "Can't fold vectors ops with scalar operands");
- return Folded;
- }
- }
-
- // fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
- return FoldSymbolOffset(Opcode, VT, GA, N2);
- if (TLI->isCommutativeBinOp(Opcode))
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
- return FoldSymbolOffset(Opcode, VT, GA, N1);
-
- // For fixed width vectors, extract each constant element and fold them
- // individually. Either input may be an undef value.
- bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
- N1->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV1 && !N1->isUndef())
- return SDValue();
- bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV2 && !N2->isUndef())
- return SDValue();
- // If both operands are undef, that's handled the same way as scalars.
- if (!IsBVOrSV1 && !IsBVOrSV2)
- return SDValue();
-
- EVT SVT = VT.getScalarType();
- EVT LegalSVT = SVT;
- if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
- if (LegalSVT.bitsLT(SVT))
- return SDValue();
- }
-
- SmallVector<SDValue, 4> Outputs;
- unsigned NumOps = 0;
- if (IsBVOrSV1)
- NumOps = std::max(NumOps, N1->getNumOperands());
- if (IsBVOrSV2)
- NumOps = std::max(NumOps, N2->getNumOperands());
- assert(NumOps != 0 && "Expected non-zero operands");
- // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
- // one iteration for that.
- assert((!VT.isScalableVector() || NumOps == 1) &&
- "Scalable vector should only have one scalar");
-
- for (unsigned I = 0; I != NumOps; ++I) {
- // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
- // to use operand 0 of the SPLAT_VECTOR for each fixed element.
- SDValue V1;
- if (N1->getOpcode() == ISD::BUILD_VECTOR)
- V1 = N1->getOperand(I);
- else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
- V1 = N1->getOperand(0);
- else
- V1 = getUNDEF(SVT);
-
- SDValue V2;
- if (N2->getOpcode() == ISD::BUILD_VECTOR)
- V2 = N2->getOperand(I);
- else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
- V2 = N2->getOperand(0);
- else
- V2 = getUNDEF(SVT);
-
- if (SVT.isInteger()) {
- if (V1.getValueType().bitsGT(SVT))
- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2.getValueType().bitsGT(SVT))
- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ if (NumOps == 2) {
+ // TODO: Move foldConstantFPMath here?
+
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ Optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
+
+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
}
- if (V1.getValueType() != SVT || V2.getValueType() != SVT)
- return SDValue();
-
- // Fold one vector element.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
- if (LegalSVT != SVT)
- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
-
- // Scalar folding only succeeded if the result is a constant or UNDEF.
- if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
- ScalarResult.getOpcode() != ISD::ConstantFP)
- return SDValue();
- Outputs.push_back(ScalarResult);
- }
-
- if (N1->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::BUILD_VECTOR) {
- assert(VT.getVectorNumElements() == Outputs.size() &&
- "Vector size mismatch!");
-
- // Build a big vector out of the scalar elements we generated.
- return getBuildVector(VT, SDLoc(), Outputs);
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
+ if (TLI->isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
}
- assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR) &&
- "One operand should be a splat vector");
-
- assert(Outputs.size() == 1 && "Vector size mismatch!");
- return getSplatVector(VT, SDLoc(), Outputs[0]);
-}
-
-// TODO: Merge with FoldConstantArithmetic
-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
- const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags) {
- // If the opcode is a target-specific ISD node, there's nothing we can
- // do here and the operand rules may not line up with the below, so
- // bail early.
- if (Opcode >= ISD::BUILTIN_OP_END)
- return SDValue();
-
- if (isUndef(Opcode, Ops))
- return getUNDEF(VT);
-
- // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ // This is for vector folding only from here on.
if (!VT.isVector())
return SDValue();
ElementCount NumElts = VT.getVectorElementCount();
+ // See if we can fold through bitcasted integer ops.
+ // TODO: Can we handle undef elements?
+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+ Ops[0].getOpcode() == ISD::BITCAST &&
+ Ops[1].getOpcode() == ISD::BITCAST) {
+ SDValue N1 = peekThroughBitcasts(Ops[0]);
+ SDValue N2 = peekThroughBitcasts(Ops[1]);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ EVT BVVT = N1.getValueType();
+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ bool IsLE = getDataLayout().isLittleEndian();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SmallVector<APInt> RawBits1, RawBits2;
+ BitVector UndefElts1, UndefElts2;
+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
+ UndefElts1.none() && UndefElts2.none()) {
+ SmallVector<APInt> RawBits;
+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
+ Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ if (!Fold)
+ break;
+ RawBits.push_back(Fold.getValue());
+ }
+ if (RawBits.size() == NumElts.getFixedValue()) {
+ // We have constant folded, but we need to cast this again back to
+ // the original (possibly legalized) type.
+ SmallVector<APInt> DstBits;
+ BitVector DstUndefs;
+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
+ DstBits, RawBits, DstUndefs,
+ BitVector(RawBits.size(), false));
+ EVT BVEltVT = BV1->getOperand(0).getValueType();
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
+ if (DstUndefs[I])
+ continue;
+ Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ }
+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
+ }
+ }
+ }
+ }
+
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorElementCount() == NumElts;
};
- auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
- APInt SplatVal;
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
- (BV && BV->isConstant()) ||
- (Op.getOpcode() == ISD::SPLAT_VECTOR &&
- ISD::isConstantSplatVector(Op.getNode(), SplatVal));
+ Op.getOpcode() == ISD::BUILD_VECTOR ||
+ Op.getOpcode() == ISD::SPLAT_VECTOR;
};
// All operands must be vector types with the same number of elements as
- // the result type and must be either UNDEF or a build vector of constant
+ // the result type and must be either UNDEF or a build/splat vector
// or UNDEF scalars.
- if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
+ if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
@@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// For scalable vector types we know we're dealing with SPLAT_VECTORs. We
// only have one operand to check. For fixed-length vector types we may have
// a combination of BUILD_VECTOR and SPLAT_VECTOR.
- unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+ unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
- for (unsigned I = 0; I != NumOperands; I++) {
+ for (unsigned I = 0; I != NumVectorElts; I++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
Op.getOpcode() != ISD::SPLAT_VECTOR) {
- // We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
else
@@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N2;
- if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ if (N2C && N2C->isAllOnes()) // X & -1 -> X
return N1;
break;
case ISD::OR:
@@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
@@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
break;
case ISD::FP_ROUND:
@@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
return V;
}
@@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::VECTOR_SPLICE: {
+ if (cast<ConstantSDNode>(N3)->isNullValue())
+ return N1;
+ break;
+ }
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
@@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
ArgChains.push_back(Chain);
// Add a chain value for each stack argument.
- for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
- UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ for (SDNode *U : getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0)
ArgChains.push_back(SDValue(L, 1));
@@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool IsZeroVal =
- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
if (!TLI.findOptimalMemOpLowering(
MemOps, TLI.getMaxStoresPerMemset(OptSize),
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
@@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memcpy with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemcpyLoadsAndStores(
@@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memmove with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemmoveLoadsAndStores(
@@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memset with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
@@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT,
+ MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
+ ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *LD = cast<VPLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getMask(),
+ LD->getVectorLength(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(),
+ nullptr, LD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, IsCompressing, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
+ return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
+ IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *ST = cast<VPStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {ST->getChain(), ST->getValue(), Base,
+ Offset, ST->getMask(), ST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStoreSDNode>(
+ dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(),
+ ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 7 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Base, SDValue Offset, SDValue Mask,
SDValue PassThru, EVT MemVT,
@@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
// select true, T, F --> T
// select false, T, F --> F
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
- return CondC->isNullValue() ? F : T;
+ return CondC->isZero() ? F : T;
// TODO: This should simplify VSELECT with constant condition using something
// like this (but check boolean contents to be complete?):
@@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
}
#ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence() {
+void SelectionDAG::VerifyDAGDivergence() {
std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
for (auto *N : TopoOrder) {
@@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// before SortedPos will contain the topological sort index, and the
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = &*I++;
- checkForCycles(N, this);
- unsigned Degree = N->getNumOperands();
+ for (SDNode &N : llvm::make_early_inc_range(allnodes())) {
+ checkForCycles(&N, this);
+ unsigned Degree = N.getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
- N->setNodeId(DAGSize++);
- allnodes_iterator Q(N);
+ N.setNodeId(DAGSize++);
+ allnodes_iterator Q(&N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
} else {
// Temporarily use the Node Id as scratch space for the degree count.
- N->setNodeId(Degree);
+ N.setNodeId(Degree);
}
}
@@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
std::string ErrorStr;
raw_string_ostream ErrorFormatter(ErrorStr);
-
ErrorFormatter << "Undefined external symbol ";
ErrorFormatter << '"' << Symbol << '"';
- ErrorFormatter.flush();
-
- report_fatal_error(ErrorStr);
+ report_fatal_error(Twine(ErrorFormatter.str()));
}
//===----------------------------------------------------------------------===//
@@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
bool llvm::isNullConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
+ return Const != nullptr && Const->isZero();
}
bool llvm::isNullFPConstant(SDValue V) {
@@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) {
bool llvm::isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
+ return Const != nullptr && Const->isAllOnes();
}
bool llvm::isOneConstant(SDValue V) {
@@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
ConstantSDNode *C =
isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);
- return C && C->isNullValue();
+ return C && C->isZero();
}
bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
@@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
- return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+ return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;
}
HandleSDNode::~HandleSDNode() {
@@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
/// isOnlyUserOf - Return true if this node is the only use of N.
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (User == this)
Seen = true;
else
@@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
/// Return true if the only users of N are contained in Nodes.
bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (llvm::is_contained(Nodes, User))
Seen = true;
else
@@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
"Mixing fixed width and scalable vectors when enveloping a type");
EVT LoVT, HiVT;
if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
- LoVT = EnvVT;
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
*HiIsEmpty = false;
} else {
// Flag that hi type has zero storage size, but return split envelop type
// (this would be easier if vector types with zero elements were allowed).
LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
- HiVT = EnvVT;
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
*HiIsEmpty = true;
}
return std::make_pair(LoVT, HiVT);
@@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
}
SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getSplatValue(DemandedElts, UndefElements);
}
@@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
}
@@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
return -1;
}
+bool BuildVectorSDNode::getConstantRawBits(
+ bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const {
+ // Early-out if this contains anything but Undef/Constant/ConstantFP.
+ if (!isConstant())
+ return false;
+
+ unsigned NumSrcOps = getNumOperands();
+ unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+
+ // Extract raw src bits.
+ SmallVector<APInt> SrcBitElements(NumSrcOps,
+ APInt::getNullValue(SrcEltSizeInBits));
+ BitVector SrcUndeElements(NumSrcOps, false);
+
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ SrcUndeElements.set(I);
+ continue;
+ }
+ auto *CInt = dyn_cast<ConstantSDNode>(Op);
+ auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
+ assert((CInt || CFP) && "Unknown constant");
+ SrcBitElements[I] =
+ CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
+ }
+
+ // Recast to dst width.
+ recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements,
+ SrcBitElements, UndefElements, SrcUndeElements);
+ return true;
+}
+
+void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
+ unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements) {
+ unsigned NumSrcOps = SrcBitElements.size();
+ unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+ assert(NumSrcOps == SrcUndefElements.size() &&
+ "Vector size mismatch");
+
+ unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
+ DstUndefElements.clear();
+ DstUndefElements.resize(NumDstOps, false);
+ DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));
+
+ // Concatenate src elements constant bits together into dst element.
+ if (SrcEltSizeInBits <= DstEltSizeInBits) {
+ unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits;
+ for (unsigned I = 0; I != NumDstOps; ++I) {
+ DstUndefElements.set(I);
+ APInt &DstBits = DstBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ if (SrcUndefElements[Idx])
+ continue;
+ DstUndefElements.reset(I);
+ const APInt &SrcBits = SrcBitElements[Idx];
+ assert(SrcBits.getBitWidth() == SrcEltSizeInBits &&
+ "Illegal constant bitwidths");
+ DstBits.insertBits(SrcBits, J * SrcEltSizeInBits);
+ }
+ }
+ return;
+ }
+
+ // Split src element constant bits into dst elements.
+ unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits;
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ if (SrcUndefElements[I]) {
+ DstUndefElements.set(I * Scale, (I + 1) * Scale);
+ continue;
+ }
+ const APInt &SrcBits = SrcBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ APInt &DstBits = DstBitElements[Idx];
+ DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits);
+ }
+ }
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 20c7d771bfb6..6d8252046501 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include <cstdint>
@@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
- IsAlias = false;
- return true;
+ if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ // We can derive NoAlias In case of mismatched base types.
+ if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) {
+ IsAlias = false;
+ return true;
+ }
+ if (IsGV0 && IsGV1) {
+ auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal();
+ auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal();
+ // It doesn't make sense to access one global value using another globals
+ // values address, so we can assume that there is no aliasing in case of
+ // two different globals (unless we have symbols that may indirectly point
+ // to each other).
+ // FIXME: This is perhaps a bit too defensive. We could try to follow the
+ // chain with aliasee information for GlobalAlias variables to find out if
+ // we indirect symbols may alias or not.
+ if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) {
+ IsAlias = false;
+ return true;
+ }
+ }
}
return false; // Cannot determine whether the pointers alias.
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d56d4bcc9169..5d911c165293 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
if (PartEVT.isVector()) {
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
- if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
(PartEVT.getVectorElementCount().isScalable() ==
ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getVectorIdxConstant(0, DL));
+ PartEVT =
+ EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
+ if (PartEVT == ValueVT)
+ return Val;
}
- // Vector/Vector bitcast.
- if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
-
- assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
- "Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
-
}
// Trivial bitcast if the types are the same size and the destination
@@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else if (PartEVT.isVector() &&
+ PartEVT.getVectorElementType() !=
+ ValueVT.getVectorElementType() &&
+ TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
+ TargetLowering::TypeWidenVector) {
+ // Combination of widening and promotion.
+ EVT WidenVT =
+ EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
+ PartVT.getVectorElementCount());
+ SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
+ Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
@@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
- } else if (SDValue Widened =
- widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
- Val = Widened;
- } else if (BuiltVectorTy.getVectorElementType().bitsGE(
- ValueVT.getVectorElementType()) &&
- BuiltVectorTy.getVectorElementCount() ==
- ValueVT.getVectorElementCount()) {
- // Promoted vector extract
- Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
+ } else {
+ if (BuiltVectorTy.getVectorElementType().bitsGT(
+ ValueVT.getVectorElementType())) {
+ // Integer promotion.
+ ValueVT = EVT::getVectorVT(*DAG.getContext(),
+ BuiltVectorTy.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+ Val = Widened;
+ }
}
assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
@@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
// Temporary "0", awaiting real implementation.
+ SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
- DIExpression *SalvagedExpr =
- salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);
-
+ V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
+ if (!V)
+ break;
+
// TODO: If AdditionalValues isn't empty, then the salvage can only be
// represented with a DBG_VALUE_LIST, so we give up. When we have support
// here for variadic dbg_values, remove that condition.
- if (!SalvagedExpr || !AdditionalValues.empty())
+ if (!AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
- V = VAsInst.getOperand(0);
- Expr = SalvagedExpr;
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
@@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
BitsToDescribe = *VarSize;
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
@@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
/*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::SExt))
+ if (F->getAttributes().hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::ZExt))
+ else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(
- AttributeList::ReturnIndex, Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
- Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ Outs.push_back(ISD::OutputArg(Flags,
+ Parts[i].getValueType().getSimpleVT(),
VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
@@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
- Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
- EVT(TLI.getPointerTy(DL)) /*argvt*/,
- true /*isfixed*/, 1 /*origidx*/,
- 0 /*partOffs*/));
+ Outs.push_back(ISD::OutputArg(
+ Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
+ /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
@@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
- if (!JTH.OmitRangeCheck) {
+ if (!JTH.FallthroughUnreachable) {
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the
// largest case in the switch.
@@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
- if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
@@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
SDValue Root = CopyTo;
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
SDValue RangeCmp = DAG.getSetCC(dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
@@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
+ else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
// Otherwise we'll need to temporarily settle for some other convenient
// type. Type legalization will make adjustments once the shiftee is split.
@@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs, MemVTs;
@@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
const Value *SV = I.getOperand(0);
Type *Ty = I.getType();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
assert(
(!AA ||
!AA->pointsToConstantMemory(MemoryLocation(
SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
- AAInfo))) &&
+ I.getAAMetadata()))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
// Handle splat constant pointer.
if (auto *C = dyn_cast<Constant>(Ptr)) {
@@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
@@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
@@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
->getMaybeAlignValue()
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SDValue Root = DAG.getRoot();
@@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
@@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Info.align, Info.flags, Info.size,
+ I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// we've been asked to pursue.
auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
bool Indirect) {
- if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
+ if (Reg.isVirtual() && MF.useDebugInstrRef()) {
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(MF, DL, Inst);
- MIB.addReg(Reg, RegState::Debug);
+ MIB.addReg(Reg);
MIB.addImm(0);
MIB.addMetadata(Variable);
auto *NewDIExpr = FragExpr;
@@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
- for (auto RegAndSize : SplitRegs) {
+ for (const auto &RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
// offset+size might extend beyond the fragment. In this case, only
// the register bits that are inside the fragment are relevant.
@@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
- MachinePointerInfo(I.getArgOperand(0)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(0)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MM);
return;
}
@@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Values.empty())
return;
- if (std::count(Values.begin(), Values.end(), nullptr))
+ if (llvm::is_contained(Values, nullptr))
return;
bool IsVariadic = DI.hasArgList();
@@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
- I.getAttributes()
- .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
- .getValueAsString();
+ I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
if (TrapFuncName.empty()) {
switch (Intrinsic) {
case Intrinsic::trap:
@@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
};
SmallVector<BranchFunnelTarget, 8> Targets;
- for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(Op), Offset, DAG.getDataLayout()));
if (ElemBase != Base)
@@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
+ if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
+ *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
+ if (VPIntrin.getFastMathFlags().allowReassoc())
+ return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
+ : ISD::VP_REDUCE_FMUL;
+ }
+
return ResOPC.getValue();
}
+void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isGather) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ bool AddToChain = true;
+ if (!isGather) {
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ MemoryLocation ML;
+ if (VT.isScalableVector())
+ ML = MemoryLocation::getAfter(PtrOperand);
+ else
+ ML = MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),
+ AAInfo);
+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ }
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isScatter) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!isScatter) {
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ ST =
+ DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
+ }
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
@@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
// Request operands.
SmallVector<SDValue, 7> OpValues;
- for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
+ for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
auto Op = getValue(VPIntrin.getArgOperand(I));
if (I == EVLParamPos)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
OpValues.push_back(Op);
}
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
- setValue(&VPIntrin, Result);
+ switch (Opcode) {
+ default: {
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+ break;
+ }
+ case ISD::VP_LOAD:
+ case ISD::VP_GATHER:
+ visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
+ Opcode == ISD::VP_GATHER);
+ break;
+ case ISD::VP_STORE:
+ case ISD::VP_SCATTER:
+ visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
+ break;
+ }
}
SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
@@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
@@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
if (Function *F = I.getCalledFunction()) {
+ diagnoseDontCall(I);
+
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
@@ -8176,7 +8307,7 @@ public:
}
}
- return TLI.getValueType(DL, OpTy, true);
+ return TLI.getAsmOperandValueType(DL, OpTy, true);
}
};
@@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
- SDISelAsmOperandInfo &OpInfo,
- SDISelAsmOperandInfo &RefOpInfo) {
+static llvm::Optional<unsigned>
+getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No work to do for memory operations.
if (OpInfo.ConstraintType == TargetLowering::C_Memory)
- return;
+ return None;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
@@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
- return;
+ return None;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
@@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
- return;
+ return None;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
@@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// Do not check for single registers.
if (AssignedReg) {
- for (; *I != AssignedReg; ++I)
- assert(I != RC->end() && "AssignedReg should be member of RC");
+ I = std::find(I, RC->end(), AssignedReg);
+ if (I == RC->end()) {
+ // RC does not contain the selected register, which indicates a
+ // mismatch between the register and the required type/bitwidth.
+ return {AssignedReg};
+ }
}
for (; NumRegs; --NumRegs, ++I) {
@@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return None;
}
static unsigned
@@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
if (isa<CallBrInst>(Call) &&
- ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -
cast<CallBrInst>(&Call)->getNumIndirectDests() -
NumMatchingOps) &&
(NumMatchingOps == 0 ||
- ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
- NumMatchingOps))) {
+ ArgNo - 1 <
+ (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
@@ -8479,8 +8616,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT =
- TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType());
+ OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
+ DAG.getDataLayout(), Call.getType()).getSimpleVT();
}
++ResNo;
} else {
@@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
: OpInfo;
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ const auto RegError =
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ if (RegError.hasValue()) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const char *RegName = TRI.getName(RegError.getValue());
+ emitInlineAsmError(Call, "register '" + Twine(RegName) +
+ "' allocated for constraint '" +
+ Twine(OpInfo.ConstraintCode) +
+ "' does not match required type");
+ return;
+ }
auto DetectWriteToReservedRegister = [&]() {
const MachineFunction &MF = DAG.getMachineFunction();
@@ -8674,11 +8822,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
+ auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
Register TiedReg = R->getReg();
MVT RegVT = R->getSimpleValueType(0);
- const TargetRegisterClass *RC = TiedReg.isVirtual() ?
- MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
+ const TargetRegisterClass *RC =
+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
+ : TRI.getMinimalPhysRegClass(TiedReg);
unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
@@ -9317,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2;
- if (I.getNumArgOperands() > 1)
+ if (I.arg_size() > 1)
Op2 = getValue(I.getArgOperand(1));
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -9671,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// if it isn't first piece, alignment must be 1
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
- i < CLI.NumFixedArgs, i,
- j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
+ ISD::OutputArg MyFlags(
+ Flags, Parts[j].getValueType().getSimpleVT(), VT,
+ i < CLI.NumFixedArgs, i,
+ j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9841,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
- ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
- FuncInfo.PreferredExtendType.end())
- ? ISD::ANY_EXTEND
- : FuncInfo.PreferredExtendType[V];
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -10490,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
ConstantsOut.clear();
}
-/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
-/// is 0.
-MachineBasicBlock *
-SelectionDAGBuilder::StackProtectorDescriptor::
-AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB) {
- // If SuccBB has not been created yet, create it.
- if (!SuccMBB) {
- MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI(ParentMBB);
- SuccMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(++BBI, SuccMBB);
- }
- // Add it as a successor of ParentMBB.
- ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
- return SuccMBB;
-}
-
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
@@ -10675,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- JTH->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -10718,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index df5be156821f..d6122aa0a739 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
@@ -180,204 +181,6 @@ private:
SwitchCG::CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb);
- /// A class which encapsulates all of the information needed to generate a
- /// stack protector check and signals to isel via its state being initialized
- /// that a stack protector needs to be generated.
- ///
- /// *NOTE* The following is a high level documentation of SelectionDAG Stack
- /// Protector Generation. The reason that it is placed here is for a lack of
- /// other good places to stick it.
- ///
- /// High Level Overview of SelectionDAG Stack Protector Generation:
- ///
- /// Previously, generation of stack protectors was done exclusively in the
- /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
- /// splitting basic blocks at the IR level to create the success/failure basic
- /// blocks in the tail of the basic block in question. As a result of this,
- /// calls that would have qualified for the sibling call optimization were no
- /// longer eligible for optimization since said calls were no longer right in
- /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
- /// instruction).
- ///
- /// Then it was noticed that since the sibling call optimization causes the
- /// callee to reuse the caller's stack, if we could delay the generation of
- /// the stack protector check until later in CodeGen after the sibling call
- /// decision was made, we get both the tail call optimization and the stack
- /// protector check!
- ///
- /// A few goals in solving this problem were:
- ///
- /// 1. Preserve the architecture independence of stack protector generation.
- ///
- /// 2. Preserve the normal IR level stack protector check for platforms like
- /// OpenBSD for which we support platform-specific stack protector
- /// generation.
- ///
- /// The main problem that guided the present solution is that one can not
- /// solve this problem in an architecture independent manner at the IR level
- /// only. This is because:
- ///
- /// 1. The decision on whether or not to perform a sibling call on certain
- /// platforms (for instance i386) requires lower level information
- /// related to available registers that can not be known at the IR level.
- ///
- /// 2. Even if the previous point were not true, the decision on whether to
- /// perform a tail call is done in LowerCallTo in SelectionDAG which
- /// occurs after the Stack Protector Pass. As a result, one would need to
- /// put the relevant callinst into the stack protector check success
- /// basic block (where the return inst is placed) and then move it back
- /// later at SelectionDAG/MI time before the stack protector check if the
- /// tail call optimization failed. The MI level option was nixed
- /// immediately since it would require platform-specific pattern
- /// matching. The SelectionDAG level option was nixed because
- /// SelectionDAG only processes one IR level basic block at a time
- /// implying one could not create a DAG Combine to move the callinst.
- ///
- /// To get around this problem a few things were realized:
- ///
- /// 1. While one can not handle multiple IR level basic blocks at the
- /// SelectionDAG Level, one can generate multiple machine basic blocks
- /// for one IR level basic block. This is how we handle bit tests and
- /// switches.
- ///
- /// 2. At the MI level, tail calls are represented via a special return
- /// MIInst called "tcreturn". Thus if we know the basic block in which we
- /// wish to insert the stack protector check, we get the correct behavior
- /// by always inserting the stack protector check right before the return
- /// statement. This is a "magical transformation" since no matter where
- /// the stack protector check intrinsic is, we always insert the stack
- /// protector check code at the end of the BB.
- ///
- /// Given the aforementioned constraints, the following solution was devised:
- ///
- /// 1. On platforms that do not support SelectionDAG stack protector check
- /// generation, allow for the normal IR level stack protector check
- /// generation to continue.
- ///
- /// 2. On platforms that do support SelectionDAG stack protector check
- /// generation:
- ///
- /// a. Use the IR level stack protector pass to decide if a stack
- /// protector is required/which BB we insert the stack protector check
- /// in by reusing the logic already therein. If we wish to generate a
- /// stack protector check in a basic block, we place a special IR
- /// intrinsic called llvm.stackprotectorcheck right before the BB's
- /// returninst or if there is a callinst that could potentially be
- /// sibling call optimized, before the call inst.
- ///
- /// b. Then when a BB with said intrinsic is processed, we codegen the BB
- /// normally via SelectBasicBlock. In said process, when we visit the
- /// stack protector check, we do not actually emit anything into the
- /// BB. Instead, we just initialize the stack protector descriptor
- /// class (which involves stashing information/creating the success
- /// mbbb and the failure mbb if we have not created one for this
- /// function yet) and export the guard variable that we are going to
- /// compare.
- ///
- /// c. After we finish selecting the basic block, in FinishBasicBlock if
- /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
- /// initialized, we produce the validation code with one of these
- /// techniques:
- /// 1) with a call to a guard check function
- /// 2) with inlined instrumentation
- ///
- /// 1) We insert a call to the check function before the terminator.
- ///
- /// 2) We first find a splice point in the parent basic block
- /// before the terminator and then splice the terminator of said basic
- /// block into the success basic block. Then we code-gen a new tail for
- /// the parent basic block consisting of the two loads, the comparison,
- /// and finally two branches to the success/failure basic blocks. We
- /// conclude by code-gening the failure basic block if we have not
- /// code-gened it already (all stack protector checks we generate in
- /// the same function, use the same failure basic block).
- class StackProtectorDescriptor {
- public:
- StackProtectorDescriptor() = default;
-
- /// Returns true if all fields of the stack protector descriptor are
- /// initialized implying that we should/are ready to emit a stack protector.
- bool shouldEmitStackProtector() const {
- return ParentMBB && SuccessMBB && FailureMBB;
- }
-
- bool shouldEmitFunctionBasedCheckStackProtector() const {
- return ParentMBB && !SuccessMBB && !FailureMBB;
- }
-
- /// Initialize the stack protector descriptor structure for a new basic
- /// block.
- void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
- bool FunctionBasedInstrumentation) {
- // Make sure we are not initialized yet.
- assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
- "already initialized!");
- ParentMBB = MBB;
- if (!FunctionBasedInstrumentation) {
- SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
- FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
- }
- }
-
- /// Reset state that changes when we handle different basic blocks.
- ///
- /// This currently includes:
- ///
- /// 1. The specific basic block we are generating a
- /// stack protector for (ParentMBB).
- ///
- /// 2. The successor machine basic block that will contain the tail of
- /// parent mbb after we create the stack protector check (SuccessMBB). This
- /// BB is visited only on stack protector check success.
- void resetPerBBState() {
- ParentMBB = nullptr;
- SuccessMBB = nullptr;
- }
-
- /// Reset state that only changes when we switch functions.
- ///
- /// This currently includes:
- ///
- /// 1. FailureMBB since we reuse the failure code path for all stack
- /// protector checks created in an individual function.
- ///
- /// 2.The guard variable since the guard variable we are checking against is
- /// always the same.
- void resetPerFunctionState() {
- FailureMBB = nullptr;
- }
-
- MachineBasicBlock *getParentMBB() { return ParentMBB; }
- MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
- MachineBasicBlock *getFailureMBB() { return FailureMBB; }
-
- private:
- /// The basic block for which we are generating the stack protector.
- ///
- /// As a result of stack protector generation, we will splice the
- /// terminators of this basic block into the successor mbb SuccessMBB and
- /// replace it with a compare/branch to the successor mbbs
- /// SuccessMBB/FailureMBB depending on whether or not the stack protector
- /// was violated.
- MachineBasicBlock *ParentMBB = nullptr;
-
- /// A basic block visited on stack protector check success that contains the
- /// terminators of ParentMBB.
- MachineBasicBlock *SuccessMBB = nullptr;
-
- /// This basic block visited on stack protector check failure that will
- /// contain a call to __stack_chk_fail().
- MachineBasicBlock *FailureMBB = nullptr;
-
- /// Add a successor machine basic block to ParentMBB. If the successor mbb
- /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
- /// block will be created. Assign a large weight if IsLikely is true.
- MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB = nullptr);
- };
-
private:
const TargetMachine &TM;
@@ -764,6 +567,10 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues, bool isGather);
+ void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues, bool isScatter);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 40083c614a6c..77e9e53668f9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
- else if (!G)
+ if (!G)
return "Unknown intrinsic";
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
}
@@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
if (G) {
const MachineFunction *MF = &G->getMachineFunction();
return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
- &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
- *G->getContext());
- } else {
- LLVMContext Ctx;
- return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
- /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ &MF->getFrameInfo(),
+ G->getSubtarget().getInstrInfo(), *G->getContext());
}
+
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
if (!Value.getNode()) {
OS << "<null>";
return false;
- } else if (shouldPrintInline(*Value.getNode(), G)) {
+ }
+
+ if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
return true;
- } else {
- OS << PrintNodeId(*Value.getNode());
- if (unsigned RN = Value.getResNo())
- OS << ':' << RN;
- return false;
}
+
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
N->print(OS, G);
- if (depth < 1)
- return;
-
for (const SDValue &Op : N->op_values()) {
// Don't follow chain operands.
if (Op.getValueType() == MVT::Other)
continue;
OS << '\n';
- printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
+ printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1415cce3b1df..c7e37cf8ca14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
- bool InstrRef = TM.Options.ValueTrackingVariableLocations;
+ bool InstrRef = MF->useDebugInstrRef();
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
@@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (ShouldAbort)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
ORE.emit(R);
}
@@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine1 && MatchFilterBB)
@@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Second step, hack on the DAG until it only uses operations and types that
@@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Only allow creation of legal node types.
@@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
{
@@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombineLT && MatchFilterBB)
@@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine2 && MatchFilterBB)
@@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (OptLevel != CodeGenOpt::None)
@@ -1045,25 +1046,25 @@ public:
} // end anonymous namespace
// This function is used to enforce the topological node id property
-// property leveraged during Instruction selection. Before selection all
-// nodes are given a non-negative id such that all nodes have a larger id than
+// leveraged during instruction selection. Before the selection process all
+// nodes are given a non-negative id such that all nodes have a greater id than
// their operands. As this holds transitively we can prune checks that a node N
// is a predecessor of M another by not recursively checking through M's
-// operands if N's ID is larger than M's ID. This is significantly improves
-// performance of for various legality checks (e.g. IsLegalToFold /
-// UpdateChains).
-
-// However, when we fuse multiple nodes into a single node
-// during selection we may induce a predecessor relationship between inputs and
-// outputs of distinct nodes being merged violating the topological property.
-// Should a fused node have a successor which has yet to be selected, our
-// legality checks would be incorrect. To avoid this we mark all unselected
-// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// operands if N's ID is larger than M's ID. This significantly improves
+// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).
+
+// However, when we fuse multiple nodes into a single node during the
+// selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged, violating the topological property.
+// Should a fused node have a successor which has yet to be selected,
+// our legality checks would be incorrect. To avoid this we mark all unselected
+// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>
// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
// We use bit-negation to more clearly enforce that node id -1 can only be
-// achieved by selected nodes). As the conversion is reversable the original Id,
-// topological pruning can still be leveraged when looking for unselected nodes.
-// This method is call internally in all ISel replacement calls.
+// achieved by selected nodes. As the conversion is reversable to the original
+// Id, topological pruning can still be leveraged when looking for unselected
+// nodes. This method is called internally in all ISel replacement related
+// functions.
void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
SmallVector<SDNode *, 4> Nodes;
Nodes.push_back(Node);
@@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
}
}
-// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a
// NodeId with the equivalent node id which is invalid for topological
// pruning.
void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
@@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
bool IsSingleCatchAllClause =
CPI->getNumArgOperands() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue();
- if (!IsSingleCatchAllClause) {
+ // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
+ // and they don't need LSDA info
+ bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;
+ if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
// Create a mapping from landing pad label to landing pad index.
bool IntrFound = false;
for (const User *U : CPI->users()) {
@@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
SDB->SPDescriptor.resetPerFunctionState();
}
-/// Given that the input MI is before a partial terminator sequence TSeq, return
-/// true if M + TSeq also a partial terminator sequence.
-///
-/// A Terminator sequence is a sequence of MachineInstrs which at this point in
-/// lowering copy vregs into physical registers, which are then passed into
-/// terminator instructors so we can satisfy ABI constraints. A partial
-/// terminator sequence is an improper subset of a terminator sequence (i.e. it
-/// may be the whole terminator sequence).
-static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
- // If we do not have a copy or an implicit def, we return true if and only if
- // MI is a debug value.
- if (!MI.isCopy() && !MI.isImplicitDef())
- // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
- // physical registers if there is debug info associated with the terminator
- // of our mbb. We want to include said debug info in our terminator
- // sequence, so we return true in that case.
- return MI.isDebugValue();
-
- // We have left the terminator sequence if we are not doing one of the
- // following:
- //
- // 1. Copying a vreg into a physical register.
- // 2. Copying a vreg into a vreg.
- // 3. Defining a register via an implicit def.
-
- // OPI should always be a register definition...
- MachineInstr::const_mop_iterator OPI = MI.operands_begin();
- if (!OPI->isReg() || !OPI->isDef())
- return false;
-
- // Defining any register via an implicit def is always ok.
- if (MI.isImplicitDef())
- return true;
-
- // Grab the copy source...
- MachineInstr::const_mop_iterator OPI2 = OPI;
- ++OPI2;
- assert(OPI2 != MI.operands_end()
- && "Should have a copy implying we should have 2 arguments.");
-
- // Make sure that the copy dest is not a vreg when the copy source is a
- // physical register.
- if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
- Register::isPhysicalRegister(OPI2->getReg())))
- return false;
-
- return true;
-}
-
-/// Find the split point at which to splice the end of BB into its success stack
-/// protector check machine basic block.
-///
-/// On many platforms, due to ABI constraints, terminators, even before register
-/// allocation, use physical registers. This creates an issue for us since
-/// physical registers at this point can not travel across basic
-/// blocks. Luckily, selectiondag always moves physical registers into vregs
-/// when they enter functions and moves them through a sequence of copies back
-/// into the physical registers right before the terminator creating a
-/// ``Terminator Sequence''. This function is searching for the beginning of the
-/// terminator sequence so that we can ensure that we splice off not just the
-/// terminator, but additionally the copies that move the vregs into the
-/// physical registers.
-static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB,
- const TargetInstrInfo &TII) {
- MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- if (SplitPoint == BB->begin())
- return SplitPoint;
-
- MachineBasicBlock::iterator Start = BB->begin();
- MachineBasicBlock::iterator Previous = SplitPoint;
- --Previous;
-
- if (TII.isTailCall(*SplitPoint) &&
- Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
- // call itself, then we must insert before the sequence even starts. For
- // example:
- // <split point>
- // ADJCALLSTACKDOWN ...
- // <Moves>
- // ADJCALLSTACKUP ...
- // TAILJMP somewhere
- // On the other hand, it could be an unrelated call in which case this tail call
- // has to register moves of its own and should be the split point. For example:
- // ADJCALLSTACKDOWN
- // CALL something_else
- // ADJCALLSTACKUP
- // <split point>
- // TAILJMP somewhere
- do {
- --Previous;
- if (Previous->isCall())
- return SplitPoint;
- } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
-
- return Previous;
- }
-
- while (MIIsInTerminatorSequence(*Previous)) {
- SplitPoint = Previous;
- if (Previous == Start)
- break;
- --Previous;
- }
-
- return SplitPoint;
-}
-
void
SelectionDAGISel::FinishBasicBlock() {
LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
@@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
- // Second-to-last bit-test with contiguous range: fall through to the
- // target of the final bit test.
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range or omitted range
+ // check: fall through to the target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
} else if (j + 1 == ej) {
// For the last bit test, fall through to Default.
@@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->clear();
CodeGenAndEmitDAG();
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Since we're not going to use the final bit test, remove it.
BTB.Cases.pop_back();
break;
@@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
else
Msg << "unknown intrinsic #" << iid;
}
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
char SelectionDAGISel::ID = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index a903c2401264..e2db9633bfb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
StatepointLoweringInfo SI(DAG);
unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
- SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
false);
if (!VarArgDisallowed)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1c1dae8f953f..e4a69adff05b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
@@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
@@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
@@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ // If we don't demand the inserted subvector, return the base vector.
+ if (DemandedSubElts == 0)
return Vec;
+ // If this simply widens the lowest subvector, see if we can do it earlier.
+ if (Idx == 0 && Vec.isUndef()) {
+ if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), NewSub, Op.getOperand(2));
+ }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
@@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
- APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
@@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
- DemandedBits = APInt::getAllOnesValue(BitWidth);
- DemandedElts = APInt::getAllOnesValue(NumElts);
+ DemandedBits = APInt::getAllOnes(BitWidth);
+ DemandedElts = APInt::getAllOnes(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
@@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
+ !DemandedSrcElts.isAllOnes()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
@@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
@@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue() &&
- DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
@@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If we can't turn this into a 'not', try to shrink the constant.
- if (!C || !C->isAllOnesValue())
+ if (!C || !C->isAllOnes())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
@@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedBits.isOneValue())
+ if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
@@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
@@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits =
- TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
+ unsigned MinSignedBits =
+ TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);
+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
@@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcBits.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
@@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
@@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
- if (C && !C->isAllOnesValue() && !C->isOne() &&
- (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ if (C && !C->isAllOnes() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnes()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
return SDValue();
};
- APInt KnownUndef = APInt::getNullValue(NumElts);
+ APInt KnownUndef = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
@@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
@@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
- APInt SrcZero, SrcUndef;
- APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+ APInt SrcDemandedElts, SrcZero, SrcUndef;
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
- APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
@@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// of this vector.
if ((NumSrcElts % NumElts) == 0) {
unsigned Scale = NumSrcElts / NumElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// the output element will be as well, assuming it was demanded.
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i]) {
- if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
KnownZero.setBit(i);
- if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
KnownUndef.setBit(i);
}
}
@@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
// Don't simplify BROADCASTS.
if (llvm::any_of(Op->op_values(),
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
@@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
@@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero.insertBits(SubZero, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcElts.isAllOnesValue() ||
- !DemandedSubElts.isAllOnesValue()) {
+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
@@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.extractBits(NumElts, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSrc) {
@@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
}
break;
}
@@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
} else {
KnownBits Known;
- APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
TLO, Depth, AssumeSingleUse))
return true;
@@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal.isOneValue();
+ return CVal.isOne();
case ZeroOrNegativeOneBooleanContent:
- return CVal.isAllOnesValue();
+ return CVal.isAllOnes();
}
llvm_unreachable("Invalid boolean contents");
@@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
- return CN->isNullValue();
+ return CN->isZero();
}
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
@@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
case TargetLowering::UndefinedBooleanContent:
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return N->isAllOnesValue() && SExt;
+ return N->isAllOnes() && SExt;
}
llvm_unreachable("Unexpected enumeration.");
}
@@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isNullValue())
+ if (YConst && YConst->isZero())
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
isPowerOf2_32(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
@@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isConstFalseVal(N1C) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
- bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
- (!N1C->isNullValue() && Cond == ISD::SETNE);
+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
+ (!N1C->isZero() && Cond == ISD::SETNE);
if (!Inverse)
return TopSetCC;
@@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
- } else if ((N1C->isNullValue() || N1C->isOne()) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ } else if ((N1C->isZero() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
@@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
- if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
@@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
}
+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
+ N1C && N1C->isAllOnes()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, OpVT),
+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
- if (C1.isNullValue())
+ if (C1.isZero())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
@@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isNullValue();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ bool CmpZero = N1C->getAPIntValue().isZero();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
- if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
@@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
if (N0.getOpcode() == ISD::UREM) {
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
@@ -4687,7 +4716,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
+ OpInfo.ConstraintVT =
+ getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
}
++ResNo;
break;
@@ -5049,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
SmallVector<SDValue, 16> Shifts, Factors;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countTrailingZeros();
@@ -5151,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
const APInt &Divisor = C->getAPIntValue();
- APInt::ms magics = Divisor.magic();
+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
- if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
// If d is +1/-1, we just multiply the numerator by +1/-1.
NumeratorFactor = Divisor.getSExtValue();
- magics.m = 0;
- magics.s = 0;
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
ShiftMask = 0;
- } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
- } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
}
- MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
- Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
return true;
};
@@ -5296,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- APInt::mu magics = Divisor.magicu();
+ UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
+ if (magics.IsAdd != 0 && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
- magics = Divisor.lshr(PreShift).magicu(PreShift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
}
- APInt Magic = magics.m;
+ APInt Magic = magics.Magic;
unsigned SelNPQ;
- if (magics.a == 0 || Divisor.isOneValue()) {
- assert(magics.s < Divisor.getBitWidth() &&
+ if (magics.IsAdd == 0 || Divisor.isOne()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- PostShift = magics.s;
+ PostShift = magics.ShiftAmount;
SelNPQ = false;
} else {
- PostShift = magics.s - 1;
+ PostShift = magics.ShiftAmount - 1;
SelNPQ = true;
}
@@ -5330,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getNullValue(EltBits),
+ : APInt::getZero(EltBits),
dl, SVT));
PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
UseNPQ |= SelNPQ;
@@ -5510,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (CDiv->isNullValue())
+ if (CDiv->isZero())
return false;
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
- ComparingWithAllZeros &= Cmp.isNullValue();
+ ComparingWithAllZeros &= Cmp.isZero();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
@@ -5528,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
- bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
- if (!Cmp.isNullValue())
+ if (!Cmp.isZero())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
// D is even if it has trailing zeros.
HadEvenDivisor |= (K != 0);
// D is a power-of-two if D0 is one.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5555,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
- APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the lane is tautological the result can be constant-folded.
@@ -5751,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isZero())
return SDValue();
bool HadIntMinDivisor = false;
@@ -5764,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildSREMPattern = [&](ConstantSDNode *C) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
@@ -5777,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
HadIntMinDivisor |= D.isMinSignedValue();
// If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ HadOneDivisor |= D.isOne();
+ AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
if (!D.isMinSignedValue()) {
@@ -5793,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// D is a power-of-two if D0 is one. This includes INT_MIN.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5801,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
@@ -5817,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// Q = floor((2 * A) / (2^K))
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
- assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
"We are expecting that A is always less than all-ones for SVT");
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
- if (D.isOneValue()) {
+ if (D.isOne()) {
// Set P, A and K to a bogus values so we can try to splat them.
P = 0;
A = -1;
@@ -5950,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue IntMax = DAG.getConstant(
APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue Zero =
- DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
@@ -6776,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// the destination signmask can't be represented by the float, so we can
// just use FP_TO_SINT directly.
const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
- APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
@@ -6969,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
-bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+// Only expand vector types if we have the appropriate vector bit operations.
+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
+ assert(VT.isVector() && "Expected vector type");
+ unsigned Len = VT.getScalarSizeInBits();
+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
+}
+
+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -6980,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
// TODO: Add support for irregular type lengths.
if (!(Len <= 128 && Len % 8 == 0))
- return false;
+ return SDValue();
// Only expand vector types if we have the appropriate vector bit operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
+ return SDValue();
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -7025,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
DAG.getConstant(Len - 8, dl, ShVT));
- Result = Op;
- return true;
+ return Op;
}
-bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7039,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
@@ -7051,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
- !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
// for now, we do this:
// x = x | (x >> 1);
@@ -7078,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
}
Op = DAG.getNOT(dl, Op, VT);
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
-bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue Op = Node->getOperand(0);
@@ -7091,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTTZ, VT)) {
- Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTTZ, VT))
+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
@@ -7103,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -7127,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
- Result =
- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
- return true;
+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
}
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
-bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG, bool IsNegative) const {
+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7148,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// abs(x) -> umin(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// Only expand vector types if we have the appropriate vector operations.
@@ -7177,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
if (!IsNegative) {
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- } else {
- // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
}
- return true;
+
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
@@ -7265,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
@@ -7802,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
EVT VecVT, const SDLoc &dl,
- unsigned NumSubElts) {
- if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
- return Idx;
+ ElementCount SubEC) {
+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
+ "Cannot index a scalable vector within a fixed-width vector");
- EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
- if (VecVT.isScalableVector()) {
+ unsigned NumSubElts = SubEC.getKnownMinValue();
+ EVT IdxVT = Idx.getValueType();
+
+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {
// If this is a constant index and we know the value plus the number of the
// elements in the subvector minus one is less than the minimum number of
// elements then it's safe to return Idx.
@@ -7855,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
-
- // Scalable vectors don't need clamping as these are checked at compile time
- if (SubVecVT.isFixedLengthVector()) {
- assert(SubVecVT.getVectorElementType() == EltVT &&
- "Sub-vector must be a fixed vector with matching element type");
- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
- SubVecVT.getVectorNumElements());
- }
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorElementCount());
EVT IdxVT = Index.getValueType();
+ if (SubVecVT.isScalableVector())
+ Index =
+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
@@ -7920,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isNullValue() && CC == ISD::SETEQ) {
+ if (C->isZero() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
@@ -7948,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
// Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
- IsScaledIndex = false;
- IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
- }
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
+ return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
return IndexType;
}
@@ -8072,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
- // SatMax -> Overflow && SumDiff < 0
- // SatMin -> Overflow && SumDiff >= 0
+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
@@ -8154,8 +8172,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
SDValue Result =
@@ -8390,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
- if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ if (isOperationLegal(OpcSat, LHS.getValueType())) {
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
@@ -8443,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
if (VT.isVector())
- WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
- VT.getVectorNumElements());
+ WideVT =
+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
SDValue BottomHalf;
SDValue TopHalf;
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index c70620fd7532..7f9518e4c075 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -50,7 +50,6 @@ STATISTIC(NumFinished, "Number of splits finished");
STATISTIC(NumSimple, "Number of splits that were simple");
STATISTIC(NumCopies, "Number of copies inserted for splitting");
STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
-STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
//===----------------------------------------------------------------------===//
// Last Insert Point Analysis
@@ -160,7 +159,6 @@ void SplitAnalysis::clear() {
UseBlocks.clear();
ThroughBlocks.clear();
CurLI = nullptr;
- DidRepairRange = false;
}
/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -188,20 +186,7 @@ void SplitAnalysis::analyzeUses() {
UseSlots.end());
// Compute per-live block info.
- if (!calcLiveBlockInfo()) {
- // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
- // I am looking at you, RegisterCoalescer!
- DidRepairRange = true;
- ++NumRepairs;
- LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
- const_cast<LiveIntervals&>(LIS)
- .shrinkToUses(const_cast<LiveInterval*>(CurLI));
- UseBlocks.clear();
- ThroughBlocks.clear();
- bool fixed = calcLiveBlockInfo();
- (void)fixed;
- assert(fixed && "Couldn't fix broken live interval");
- }
+ calcLiveBlockInfo();
LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in "
<< UseBlocks.size() << " blocks, through "
@@ -210,11 +195,11 @@ void SplitAnalysis::analyzeUses() {
/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
/// where CurLI is live.
-bool SplitAnalysis::calcLiveBlockInfo() {
+void SplitAnalysis::calcLiveBlockInfo() {
ThroughBlocks.resize(MF.getNumBlockIDs());
NumThroughBlocks = NumGapBlocks = 0;
if (CurLI->empty())
- return true;
+ return;
LiveInterval::const_iterator LVI = CurLI->begin();
LiveInterval::const_iterator LVE = CurLI->end();
@@ -240,8 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
ThroughBlocks.set(BI.MBB->getNumber());
// The range shouldn't end mid-block if there are no uses. This shouldn't
// happen.
- if (LVI->end < Stop)
- return false;
+ assert(LVI->end >= Stop && "range ends mid block with no uses");
} else {
// This block has uses. Find the first and last uses in the block.
BI.FirstInstr = *UseI;
@@ -312,7 +296,6 @@ bool SplitAnalysis::calcLiveBlockInfo() {
}
assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
- return true;
}
unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
@@ -529,19 +512,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
| getInternalReadRegState(!FirstCopy), SubIdx)
.addReg(FromReg, 0, SubIdx);
- BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
} else {
CopyMI->bundleWithPred();
}
- LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
- DestLI.refineSubRanges(Allocator, LaneMask,
- [Def, &Allocator](LiveInterval::SubRange &SR) {
- SR.createDeadDef(Def, Allocator);
- },
- Indexes, TRI);
return Def;
}
@@ -549,11 +525,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
MachineInstr *CopyMI =
BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
- SlotIndexes &Indexes = *LIS.getSlotIndexes();
return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
}
@@ -567,18 +543,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
- SmallVector<unsigned, 8> Indexes;
+ SmallVector<unsigned, 8> SubIndexes;
// Abort if we cannot possibly implement the COPY with the given indexes.
- if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes))
+ if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes))
report_fatal_error("Impossible to implement partial COPY");
SlotIndex Def;
- for (unsigned BestIdx : Indexes) {
+ for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
DestLI, Late, Def);
}
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ DestLI.refineSubRanges(
+ Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
+
return Def;
}
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index fbcffacb49ab..902546fe16d8 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -160,14 +160,11 @@ private:
/// NumThroughBlocks - Number of live-through blocks.
unsigned NumThroughBlocks;
- /// DidRepairRange - analyze was forced to shrinkToUses().
- bool DidRepairRange;
-
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
/// calcLiveBlockInfo - Compute per-block information about CurLI.
- bool calcLiveBlockInfo();
+ void calcLiveBlockInfo();
public:
SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
@@ -177,11 +174,6 @@ public:
/// split.
void analyze(const LiveInterval *li);
- /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
- /// by analyze(). This really shouldn't happen, but sometimes the coalescer
- /// can create live ranges that end in mid-air.
- bool didRepairRange() const { return DidRepairRange; }
-
/// clear - clear all data structures so SplitAnalysis is ready to analyze a
/// new interval.
void clear();
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 162f3aab024d..623d5da9831e 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -687,6 +687,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// Walk the instructions in the block to look for start/end ops.
for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugInstr())
+ continue;
if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) {
int Slot = getStartOrEndSlot(MI);
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 9f229d51b985..7445f77c955d 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -148,10 +148,8 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return false;
bool NeedsProtector = false;
- for (StructType::element_iterator I = ST->element_begin(),
- E = ST->element_end();
- I != E; ++I)
- if (ContainsProtectableArray(*I, IsLarge, Strong, true)) {
+ for (Type *ET : ST->elements())
+ if (ContainsProtectableArray(ET, IsLarge, Strong, true)) {
// If the element is a protectable array and is large (>= SSPBufferSize)
// then we are done. If the protectable array is not large, then
// keep looking in case a subsequent element is a large array.
@@ -436,13 +434,11 @@ bool StackProtector::InsertStackProtectors() {
// protection in SDAG.
bool SupportsSelectionDAGSP =
TLI->useStackGuardXorFP() ||
- (EnableSelectionDAGSP && !TM->Options.EnableFastISel &&
- !TM->Options.EnableGlobalISel);
- AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
- for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
- BasicBlock *BB = &*I++;
- ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ for (BasicBlock &BB : llvm::make_early_inc_range(*F)) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
if (!RI)
continue;
@@ -530,23 +526,23 @@ bool StackProtector::InsertStackProtectors() {
// Split the basic block before the return instruction.
BasicBlock *NewBB =
- BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
+ BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
- if (DT && DT->isReachableFromEntry(BB)) {
- DT->addNewBlock(NewBB, BB);
- DT->addNewBlock(FailBB, BB);
+ if (DT && DT->isReachableFromEntry(&BB)) {
+ DT->addNewBlock(NewBB, &BB);
+ DT->addNewBlock(FailBB, &BB);
}
// Remove default branch instruction to the new BB.
- BB->getTerminator()->eraseFromParent();
+ BB.getTerminator()->eraseFromParent();
// Move the newly created basic block to the point right after the old
// basic block so that it's in the "fall through" position.
- NewBB->moveAfter(BB);
+ NewBB->moveAfter(&BB);
// Generate the stack protector instructions in the old basic block.
- IRBuilder<> B(BB);
+ IRBuilder<> B(&BB);
Value *Guard = getStackGuard(TLI, M, B);
LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
Value *Cmp = B.CreateICmpEQ(Guard, LI2);
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index ebe00bd7402f..9aea5a7a8853 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -169,7 +169,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
if (!LS->hasInterval(FI))
continue;
LiveInterval &li = LS->getInterval(FI);
- if (!MI.isDebugValue())
+ if (!MI.isDebugInstr())
li.incrementWeight(
LiveIntervals::getSpillWeight(false, true, MBFI, MI));
}
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index dfcec32d9537..36a02d5beb4b 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -405,7 +405,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
// Optimize the case where all the case values fit in a word without having
// to subtract minValue. In this case, we can optimize away the subtraction.
- LowBound = APInt::getNullValue(Low.getBitWidth());
+ LowBound = APInt::getZero(Low.getBitWidth());
CmpRange = High;
ContiguousRange = false;
} else {
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index af735f2a0216..943bd18c6c8b 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
+static cl::opt<unsigned> TailDupJmpTableLoopSize(
+ "tail-dup-jmptable-loop-size",
+ cl::desc("Maximum loop latches to consider tail duplication that are "
+ "successors of loop header."),
+ cl::init(128), cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -100,12 +106,11 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
}
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
- MBB->pred_end());
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != MBB->end()) {
+ for (MachineBasicBlock &MBB : llvm::drop_begin(MF)) {
+ SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(),
+ MBB.pred_end());
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (MI != MBB.end()) {
if (!MI->isPHI())
break;
for (MachineBasicBlock *PredBB : Preds) {
@@ -118,7 +123,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
}
if (!Found) {
- dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
<< *MI;
dbgs() << " missing input from predecessor "
<< printMBBReference(*PredBB) << '\n';
@@ -129,14 +134,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
if (CheckExtra && !Preds.count(PHIBB)) {
- dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB)
+ dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB)
<< ": " << *MI;
dbgs() << " extra input from predecessor "
<< printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
}
if (PHIBB->getNumber() < 0) {
- dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
<< *MI;
dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
@@ -279,18 +284,17 @@ bool TailDuplicator::tailDuplicateBlocks() {
VerifyPHIs(*MF, true);
}
- for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
- MachineBasicBlock *MBB = &*I++;
-
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(*MF))) {
if (NumTails == TailDupLimit)
break;
- bool IsSimple = isSimpleBB(MBB);
+ bool IsSimple = isSimpleBB(&MBB);
- if (!shouldTailDuplicate(IsSimple, *MBB))
+ if (!shouldTailDuplicate(IsSimple, MBB))
continue;
- MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
+ MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr);
}
if (PreRegAlloc && TailDupVerify)
@@ -565,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
+ // When doing tail-duplication with jumptable loops like:
+ // 1 -> 2 <-> 3 |
+ // \ <-> 4 |
+ // \ <-> 5 |
+ // \ <-> ... |
+ // \---> rest |
+ // quadratic number of edges and much more loops are added to CFG. This
+ // may cause compile time regression when jumptable is quiet large.
+ // So set the limit on jumptable cases.
+ auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
+ const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
+ TailBB.pred_end());
+ // Check the basic block has large number of successors, all of them only
+ // have one successor which is the basic block itself.
+ return llvm::count_if(
+ TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
+ return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
+ }) > TailDupJmpTableLoopSize;
+ };
+
+ if (isLargeJumpTableLoop(TailBB))
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
@@ -874,18 +901,15 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// Clone the contents of TailBB into PredBB.
DenseMap<Register, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
- I != E; /* empty */) {
- MachineInstr *MI = &*I;
- ++I;
- if (MI->isPHI()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(*TailBB)) {
+ if (MI.isPHI()) {
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
- processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
} else {
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
- duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
+ duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
}
}
appendCopies(PredBB, CopyInfos, Copies);
@@ -930,44 +954,56 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// There may be a branch to the layout successor. This is unlikely but it
// happens. The correct thing to do is to remove the branch before
// duplicating the instructions in all cases.
- TII->removeBranch(*PrevBB);
- if (PreRegAlloc) {
- DenseMap<Register, RegSubRegPair> LocalVRMap;
- SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- // Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
- // Replace the uses of the def of the PHI with the register coming
- // from PredBB.
- MachineInstr *MI = &*I++;
- processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
- }
+ bool RemovedBranches = TII->removeBranch(*PrevBB) != 0;
+
+ // If there are still tail instructions, abort the merge
+ if (PrevBB->getFirstTerminator() == PrevBB->end()) {
+ if (PreRegAlloc) {
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
+ true);
+ }
- // Now copy the non-PHI instructions.
- while (I != TailBB->end()) {
- // Replace def of virtual registers with new registers, and update
- // uses with PHI source register or the new registers.
- MachineInstr *MI = &*I++;
- assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
- duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
- MI->eraseFromParent();
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ appendCopies(PrevBB, CopyInfos, Copies);
+ } else {
+ TII->removeBranch(*PrevBB);
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
}
- appendCopies(PrevBB, CopyInfos, Copies);
- } else {
- TII->removeBranch(*PrevBB);
- // No PHIs to worry about, just splice the instructions over.
- PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
- }
- PrevBB->removeSuccessor(PrevBB->succ_begin());
- assert(PrevBB->succ_empty());
- PrevBB->transferSuccessors(TailBB);
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
- // Update branches in PrevBB based on Tail's layout successor.
- if (ShouldUpdateTerminators)
- PrevBB->updateTerminator(TailBB->getNextNode());
+ // Update branches in PrevBB based on Tail's layout successor.
+ if (ShouldUpdateTerminators)
+ PrevBB->updateTerminator(TailBB->getNextNode());
- TDBBs.push_back(PrevBB);
- Changed = true;
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still "
+ "contains terminator instructions");
+ // Return early if no changes were made
+ if (!Changed)
+ return RemovedBranches;
+ }
+ Changed |= RemovedBranches;
}
// If this is after register allocation, there are no phis to fix.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 2e4a656ea0c8..e74b3195a130 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -366,7 +366,7 @@ bool TargetInstrInfo::hasLoadFromStackSlot(
oe = MI.memoperands_end();
o != oe; ++o) {
if ((*o)->isLoad() &&
- dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
Accesses.push_back(*o);
}
return Accesses.size() != StartSize;
@@ -380,7 +380,7 @@ bool TargetInstrInfo::hasStoreToStackSlot(
oe = MI.memoperands_end();
o != oe; ++o) {
if ((*o)->isStore() &&
- dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
Accesses.push_back(*o);
}
return Accesses.size() != StartSize;
@@ -1264,22 +1264,6 @@ int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-/// If we can determine the operand latency from the def only, without itinerary
-/// lookup, do so. Otherwise return -1.
-int TargetInstrInfo::computeDefOperandLatency(
- const InstrItineraryData *ItinData, const MachineInstr &DefMI) const {
-
- // Let the target hook getInstrLatency handle missing itineraries.
- if (!ItinData)
- return getInstrLatency(ItinData, DefMI);
-
- if(ItinData->isEmpty())
- return defaultDefLatency(ItinData->SchedModel, DefMI);
-
- // ...operand lookup required
- return -1;
-}
-
bool TargetInstrInfo::getRegSequenceInputs(
const MachineInstr &MI, unsigned DefIdx,
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3c5dd29036db..c0a7efff9e98 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -52,6 +52,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -236,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
return FPEXT_F16_F32;
if (RetVT == MVT::f64)
return FPEXT_F16_F64;
+ if (RetVT == MVT::f80)
+ return FPEXT_F16_F80;
if (RetVT == MVT::f128)
return FPEXT_F16_F128;
} else if (OpVT == MVT::f32) {
@@ -659,7 +662,7 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
/// InitCmpLibcallCCs - Set default comparison libcall CC.
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
- memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID);
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
@@ -896,8 +899,6 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::FROUND, VT, Expand);
- setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::LROUND, VT, Expand);
setOperationAction(ISD::LLROUND, VT, Expand);
setOperationAction(ISD::LRINT, VT, Expand);
@@ -924,8 +925,15 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
- : getPointerTy(DL);
+ MVT ShiftVT =
+ LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL);
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. Assume it will be legalized when the shift is expanded.
+ if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits()))
+ ShiftVT = MVT::i32;
+ assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) &&
+ "ShiftVT is still too small!");
+ return ShiftVT;
}
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
@@ -1556,7 +1564,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
// Scalable vectors cannot be scalarized, so handle the legalisation of the
// types like done elsewhere in SelectionDAG.
- if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) {
+ if (EltCnt.isScalable()) {
LegalizeKind LK;
EVT PartVT = VT;
do {
@@ -1565,16 +1573,14 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
PartVT = LK.second;
} while (LK.first != TypeLegal);
- NumIntermediates = VT.getVectorElementCount().getKnownMinValue() /
- PartVT.getVectorElementCount().getKnownMinValue();
+ if (!PartVT.isVector()) {
+ report_fatal_error(
+ "Don't know how to legalize this scalable vector type");
+ }
- // FIXME: This code needs to be extended to handle more complex vector
- // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
- // supported cases are vectors that are broken down into equal parts
- // such as nxv6i64 -> 3 x nxv2i64.
- assert((PartVT.getVectorElementCount() * NumIntermediates) ==
- VT.getVectorElementCount() &&
- "Expected an integer multiple of PartVT");
+ NumIntermediates =
+ divideCeil(VT.getVectorElementCount().getKnownMinValue(),
+ PartVT.getVectorElementCount().getKnownMinValue());
IntermediateVT = PartVT;
RegisterVT = getRegisterType(Context, IntermediateVT);
return NumIntermediates;
@@ -1657,9 +1663,9 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (attr.hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1679,13 +1685,13 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
+ if (attr.hasRetAttr(Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (attr.hasRetAttr(Attribute::SExt))
Flags.setSExt();
- else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
@@ -1696,7 +1702,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
-unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
return DL.getABITypeAlign(Ty).value();
}
@@ -1749,8 +1755,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,
bool *Fast) const {
- return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(),
- MMO.getAlign(), MMO.getFlags(), Fast);
+ EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
+ MMO.getFlags(), Fast);
}
//===----------------------------------------------------------------------===//
@@ -1849,8 +1856,12 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
while (true) {
LegalizeKind LK = getTypeConversion(C, MTy);
- if (LK.first == TypeScalarizeScalableVector)
- return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty));
+ if (LK.first == TypeScalarizeScalableVector) {
+ // Ensure we return a sensible simple VT here, since many callers of this
+ // function require it.
+ MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
+ return std::make_pair(InstructionCost::getInvalid(), VT);
+ }
if (LK.first == TypeLegal)
return std::make_pair(Cost, MTy.getSimpleVT());
@@ -1980,8 +1991,11 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
GlobalVariable::ExternalLinkage, nullptr,
"__stack_chk_guard");
+
+ // FreeBSD has "__stack_chk_guard" defined externally on libc.so
if (TM.getRelocationModel() == Reloc::Static &&
- !TM.getTargetTriple().isWindowsGNUEnvironment())
+ !TM.getTargetTriple().isWindowsGNUEnvironment() &&
+ !TM.getTargetTriple().isOSFreeBSD())
GV->setDSOLocal(true);
}
}
@@ -2020,6 +2034,12 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}
+Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
+ if (TM.Options.LoopAlignment)
+ return Align(TM.Options.LoopAlignment);
+ return PrefLoopAlignment;
+}
+
//===----------------------------------------------------------------------===//
// Reciprocal Estimates
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index add34eccc1f3..1d3bb286c882 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
}
if (Retain) {
- if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
+ if ((Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
+ !TM.getTargetTriple().isOSSolaris())
Flags |= ELF::SHF_GNU_RETAIN;
return NextUniqueID++;
}
@@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal(
EmitUniqueSection = true;
Flags |= ELF::SHF_LINK_ORDER;
}
- if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) {
+ if (Retain &&
+ (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
+ !TM.getTargetTriple().isOSSolaris()) {
EmitUniqueSection = true;
Flags |= ELF::SHF_GNU_RETAIN;
}
@@ -1492,7 +1495,7 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix(
SmallVectorImpl<char> &OutName, const GlobalValue *GV,
const TargetMachine &TM) const {
bool CannotUsePrivateLabel = true;
- if (auto *GO = GV->getBaseObject()) {
+ if (auto *GO = GV->getAliaseeObject()) {
SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
CannotUsePrivateLabel =
@@ -1563,7 +1566,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
if (const Comdat *C = GV->getComdat()) {
const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
- ComdatKey = GA->getBaseObject();
+ ComdatKey = GA->getAliaseeObject();
if (ComdatKey == GV) {
switch (C->getSelectionKind()) {
case Comdat::Any:
@@ -1942,7 +1945,7 @@ static std::string APIntToHexString(const APInt &AI) {
static std::string scalarConstantToHexString(const Constant *C) {
Type *Ty = C->getType();
if (isa<UndefValue>(C)) {
- return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+ return APIntToHexString(APInt::getZero(Ty->getPrimitiveSizeInBits()));
} else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
} else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
@@ -2414,7 +2417,20 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
Align &Alignment) const {
- //TODO: Enable emiting constant pool to unique sections when we support it.
+ // TODO: Enable emiting constant pool to unique sections when we support it.
+ if (Alignment > Align(16))
+ report_fatal_error("Alignments greater than 16 not yet supported.");
+
+ if (Alignment == Align(8)) {
+ assert(ReadOnly8Section && "Section should always be initialized.");
+ return ReadOnly8Section;
+ }
+
+ if (Alignment == Align(16)) {
+ assert(ReadOnly16Section && "Section should always be initialized.");
+ return ReadOnly16Section;
+ }
+
return ReadOnlySection;
}
@@ -2443,7 +2459,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
- report_fatal_error("XCOFF not yet implemented.");
+ /* Not implemented yet, but don't crash, return nullptr. */
+ return nullptr;
}
XCOFF::StorageClass
@@ -2473,12 +2490,12 @@ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {
MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
const GlobalValue *Func, const TargetMachine &TM) const {
- assert(
- (isa<Function>(Func) ||
- (isa<GlobalAlias>(Func) &&
- isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) &&
- "Func must be a function or an alias which has a function as base "
- "object.");
+ assert((isa<Function>(Func) ||
+ (isa<GlobalAlias>(Func) &&
+ isa_and_nonnull<Function>(
+ cast<GlobalAlias>(Func)->getAliaseeObject()))) &&
+ "Func must be a function or an alias which has a function as base "
+ "object.");
SmallString<128> NameStr;
NameStr.push_back('.');
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4024fd452fc4..402e21d3708b 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -172,6 +172,24 @@ static cl::opt<bool>
FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
cl::desc("Do not insert FS-AFDO discriminators before "
"emit."));
+// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
+// tuning purpose.
+static cl::opt<bool> DisableRAFSProfileLoader(
+ "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before RegAlloc"));
+// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
+// and tuning purpose.
+static cl::opt<bool> DisableLayoutFSProfileLoader(
+ "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before BlockPlacement"));
+// Specify FSProfile file name.
+static cl::opt<std::string>
+ FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile file name."), cl::Hidden);
+// Specify Remapping file for FSProfile.
+static cl::opt<std::string> FSRemappingFile(
+ "fs-remapping-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
@@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
return TargetID;
}
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static const std::string getFSProfileFile(const TargetMachine *TM) {
+ if (!FSProfileFile.empty())
+ return FSProfileFile.getValue();
+ const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileFile;
+}
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static const std::string getFSRemappingFile(const TargetMachine *TM) {
+ if (!FSRemappingFile.empty())
+ return FSRemappingFile.getValue();
+ const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileRemappingFile;
+}
+
//===---------------------------------------------------------------------===//
/// TargetPassConfig
//===---------------------------------------------------------------------===//
@@ -321,12 +361,9 @@ namespace {
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
- bool VerifyAfter;
- InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter)
- : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
- VerifyAfter(VerifyAfter) {}
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID) {}
Pass *getInsertedPass() const {
assert(InsertedPassID.isValid() && "Illegal Pass ID!");
@@ -601,14 +638,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter) {
+ IdentifyingPassPtr InsertedPassID) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -686,7 +722,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
/// a later pass or that it should stop after an earlier pass, then do not add
/// the pass. Finally, compare the current pass against the StartAfter
/// and StopAfter options and change the Started/Stopped flags accordingly.
-void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
+void TargetPassConfig::addPass(Pass *P) {
assert(!Initialized && "PassConfig is immutable");
// Cache the Pass ID here in case the pass manager finds this pass is
@@ -704,16 +740,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
addMachinePrePasses();
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses && verifyAfter)
+ if (AddingMachinePasses)
Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
if (AddingMachinePasses)
- addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter);
+ addMachinePostPasses(Banner);
// Add the passes after the pass P if there is any.
for (const auto &IP : Impl->InsertedPasses) {
if (IP.TargetPassID == PassID)
- addPass(IP.getInsertedPass(), IP.VerifyAfter);
+ addPass(IP.getInsertedPass());
}
} else {
delete P;
@@ -733,7 +769,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
///
/// addPass cannot return a pointer to the pass instance because is internal the
/// PassManager and the instance we create here may already be freed.
-AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
@@ -748,7 +784,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
llvm_unreachable("Pass ID not registered");
}
AnalysisID FinalID = P->getPassID();
- addPass(P, verifyAfter); // Ends the lifetime of P.
+ addPass(P); // Ends the lifetime of P.
return FinalID;
}
@@ -792,8 +828,7 @@ void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
addDebugifyPass();
}
-void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
- bool AllowVerify, bool AllowStrip) {
+void TargetPassConfig::addMachinePostPasses(const std::string &Banner) {
if (DebugifyIsSafe) {
if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {
addCheckDebugPass();
@@ -801,8 +836,7 @@ void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
} else if (DebugifyAndStripAll == cl::BOU_TRUE)
addStripDebugPass();
}
- if (AllowVerify)
- addVerifyPass(Banner);
+ addVerifyPass(Banner);
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -1113,6 +1147,18 @@ void TargetPassConfig::addMachinePasses() {
// where it becomes safe again so stop debugifying here.
DebugifyIsSafe = false;
+ // Add a FSDiscriminator pass right before RA, so that we could get
+ // more precise SampleFDO profile for RA.
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass1));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
+ addPass(
+ createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass1));
+ }
+
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
@@ -1123,7 +1169,7 @@ void TargetPassConfig::addMachinePasses() {
// Run post-ra passes.
addPostRegAlloc();
- addPass(&RemoveRedundantDebugValuesID, false);
+ addPass(&RemoveRedundantDebugValuesID);
addPass(&FixupStatepointCallerSavedID);
@@ -1165,7 +1211,7 @@ void TargetPassConfig::addMachinePasses() {
// GC
if (addGCPasses()) {
if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()), false);
+ addPass(createGCInfoPrinter(dbgs()));
}
// Basic block placement.
@@ -1195,10 +1241,10 @@ void TargetPassConfig::addMachinePasses() {
// FIXME: Some backends are incompatible with running the verifier after
// addPreEmitPass. Maybe only pass "false" here for those targets?
- addPass(&FuncletLayoutID, false);
+ addPass(&FuncletLayoutID);
- addPass(&StackMapLivenessID, false);
- addPass(&LiveDebugValuesID, false);
+ addPass(&StackMapLivenessID);
+ addPass(&LiveDebugValuesID);
if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
EnableMachineOutliner != RunOutliner::NeverOutline) {
@@ -1224,10 +1270,6 @@ void TargetPassConfig::addMachinePasses() {
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
- // Insert pseudo probe annotation for callsite profiling
- if (TM->Options.PseudoProbeForProfiling)
- addPass(createPseudoProbeInserter());
-
AddingMachinePasses = false;
}
@@ -1369,8 +1411,8 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc() {
- addPass(&PHIEliminationID, false);
- addPass(&TwoAddressInstructionPassID, false);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
addRegAssignAndRewriteFast();
}
@@ -1379,9 +1421,9 @@ void TargetPassConfig::addFastRegAlloc() {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc() {
- addPass(&DetectDeadLanesID, false);
+ addPass(&DetectDeadLanesID);
- addPass(&ProcessImplicitDefsID, false);
+ addPass(&ProcessImplicitDefsID);
// LiveVariables currently requires pure SSA form.
//
@@ -1393,18 +1435,18 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// When LiveVariables is removed this has to be removed/moved either.
// Explicit addition of UnreachableMachineBlockElim allows stopping before or
// after it with -stop-before/-stop-after.
- addPass(&UnreachableMachineBlockElimID, false);
- addPass(&LiveVariablesID, false);
+ addPass(&UnreachableMachineBlockElimID);
+ addPass(&LiveVariablesID);
// Edge splitting is smarter with machine loop info.
- addPass(&MachineLoopInfoID, false);
- addPass(&PHIEliminationID, false);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
// Eventually, we want to run LiveIntervals before PHI elimination.
if (EarlyLiveIntervals)
- addPass(&LiveIntervalsID, false);
+ addPass(&LiveIntervalsID);
- addPass(&TwoAddressInstructionPassID, false);
+ addPass(&TwoAddressInstructionPassID);
addPass(&RegisterCoalescerID);
// The machine scheduler may accidentally create disconnected components
@@ -1417,9 +1459,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {
if (addRegAssignAndRewriteOptimized()) {
// Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
addPass(&StackSlotColoringID);
// Allow targets to expand pseudo instructions depending on the choice of
@@ -1459,12 +1498,21 @@ void TargetPassConfig::addMachineLateOptimization() {
/// Add standard GC passes.
bool TargetPassConfig::addGCPasses() {
- addPass(&GCMachineCodeAnalysisID, false);
+ addPass(&GCMachineCodeAnalysisID);
return true;
}
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass2));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
+ addPass(
+ createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass2));
+ }
if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1664b4dadfec..46cec5407565 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// registers. e.g. r1 = move v1024.
DenseMap<Register, Register> DstRegMap;
+ void removeClobberedSrcRegMap(MachineInstr *MI);
+
bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
@@ -132,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi, Register RegA,
- Register RegB, unsigned Dist);
+ Register RegB, unsigned &Dist);
bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
@@ -144,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist, bool shouldOnlyCommute);
+ unsigned &Dist, bool shouldOnlyCommute);
bool tryInstructionCommute(MachineInstr *MI,
unsigned DstOpIdx,
@@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
return nullptr;
- MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
+ MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
+ MachineInstr &UseMI = *UseOp.getParent();
if (UseMI.getParent() != MBB)
return nullptr;
Register SrcReg;
@@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
+ if (UseMI.isCommutable()) {
+ unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ unsigned Src2 = UseMI.getOperandNo(&UseOp);
+ if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
+ MachineOperand &MO = UseMI.getOperand(Src1);
+ if (MO.isReg() && MO.isUse() &&
+ isTwoAddrUse(UseMI, MO.getReg(), DstReg)) {
+ IsDstPhys = DstReg.isPhysical();
+ return &UseMI;
+ }
+ }
+ }
return nullptr;
}
@@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB,
return TRI->regsOverlap(RegA, RegB);
}
+/// From RegMap remove entries mapped to a physical register which overlaps MO.
+static void removeMapRegEntry(const MachineOperand &MO,
+ DenseMap<Register, Register> &RegMap,
+ const TargetRegisterInfo *TRI) {
+ assert(
+ (MO.isReg() || MO.isRegMask()) &&
+ "removeMapRegEntry must be called with a register or regmask operand.");
+
+ SmallVector<Register, 2> Srcs;
+ for (auto SI : RegMap) {
+ Register ToReg = SI.second;
+ if (ToReg.isVirtual())
+ continue;
+
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (TRI->regsOverlap(ToReg, Reg))
+ Srcs.push_back(SI.first);
+ } else if (MO.clobbersPhysReg(ToReg))
+ Srcs.push_back(SI.first);
+ }
+
+ for (auto SrcReg : Srcs)
+ RegMap.erase(SrcReg);
+}
+
+/// If a physical register is clobbered, old entries mapped to it should be
+/// deleted. For example
+///
+/// %2:gr64 = COPY killed $rdx
+/// MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx
+///
+/// After the MUL instruction, $rdx contains different value than in the COPY
+/// instruction. So %2 should not map to $rdx after MUL.
+void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ // If a virtual register is copied to its mapped physical register, it
+ // doesn't change the potential coalescing between them, so we don't remove
+ // entries mapped to the physical register. For example
+ //
+ // %100 = COPY $r8
+ // ...
+ // $r8 = COPY %100
+ //
+ // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't
+ // destroy the content of $r8, and should not impact SrcRegMap.
+ Register Dst = MI->getOperand(0).getReg();
+ if (!Dst || Dst.isVirtual())
+ return;
+
+ Register Src = MI->getOperand(1).getReg();
+ if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
+ return;
+ }
+
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg || Reg.isVirtual())
+ continue;
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ }
+}
+
// Returns true if Reg is equal or aliased to at least one register in Set.
static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
const TargetRegisterInfo *TRI) {
@@ -589,21 +674,15 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
/// Return true if this transformation was successful.
bool TwoAddressInstructionPass::convertInstTo3Addr(
MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
- Register RegA, Register RegB, unsigned Dist) {
- // FIXME: Why does convertToThreeAddress() need an iterator reference?
- MachineFunction::iterator MFI = MBB->getIterator();
- MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
- assert(MBB->getIterator() == MFI &&
- "convertToThreeAddress changed iterator reference");
+ Register RegA, Register RegB, unsigned &Dist) {
+ MachineInstrSpan MIS(mi, MBB);
+ MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV, LIS);
if (!NewMI)
return false;
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
-
// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
// Sanity check.
@@ -624,7 +703,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
MBB->erase(mi); // Nuke the old inst.
- DistanceMap.insert(std::make_pair(NewMI, Dist));
+ for (MachineInstr &MI : MIS)
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ Dist--;
mi = NewMI;
nmi = std::next(mi);
@@ -656,9 +737,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
VirtRegPairs.push_back(NewReg);
break;
}
- bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
- if (!isNew)
- assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ SrcRegMap[NewReg] = Reg;
VirtRegPairs.push_back(NewReg);
Reg = NewReg;
}
@@ -667,8 +746,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
unsigned ToReg = VirtRegPairs.back();
VirtRegPairs.pop_back();
while (!VirtRegPairs.empty()) {
- unsigned FromReg = VirtRegPairs.back();
- VirtRegPairs.pop_back();
+ unsigned FromReg = VirtRegPairs.pop_back_val();
bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
if (!isNew)
assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
@@ -857,12 +935,13 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
nmi = End;
MachineBasicBlock::iterator InsertPos = KillPos;
if (LIS) {
- // We have to move the copies first so that the MBB is still well-formed
- // when calling handleMove().
+ // We have to move the copies (and any interleaved debug instructions)
+ // first so that the MBB is still well-formed when calling handleMove().
for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
auto CopyMI = MBBI++;
MBB->splice(InsertPos, MBB, CopyMI);
- LIS->handleMove(*CopyMI);
+ if (!CopyMI->isDebugOrPseudoInstr())
+ LIS->handleMove(*CopyMI);
InsertPos = CopyMI;
}
End = std::next(MachineBasicBlock::iterator(MI));
@@ -1130,7 +1209,7 @@ bool TwoAddressInstructionPass::
tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist, bool shouldOnlyCommute) {
+ unsigned &Dist, bool shouldOnlyCommute) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -1238,6 +1317,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// look "normal" to the transformation logic.
MBB->insert(mi, NewMIs[0]);
MBB->insert(mi, NewMIs[1]);
+ DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+ DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
<< "2addr: NEW INST: " << *NewMIs[1]);
@@ -1288,9 +1369,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (MO.isReg())
OrigRegs.push_back(MO.getReg());
}
+
+ LIS->RemoveMachineInstrFromMaps(MI);
}
MI.eraseFromParent();
+ DistanceMap.erase(&MI);
// Update LiveIntervals.
if (LIS) {
@@ -1307,6 +1391,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
+ DistanceMap.erase(NewMIs[0]);
+ DistanceMap.erase(NewMIs[1]);
+ Dist--;
}
}
}
@@ -1320,7 +1407,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Return true if any tied operands where found, including the trivial ones.
bool TwoAddressInstructionPass::
collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
- const MCInstrDesc &MCID = MI->getDesc();
bool AnyOps = false;
unsigned NumOps = MI->getNumOperands();
@@ -1342,10 +1428,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (DstReg.isVirtual())
- if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
- TRI, *MF))
- MRI->constrainRegClass(DstReg, RC);
+ if (DstReg.isVirtual()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ MRI->constrainRegClass(DstReg, RC);
+ }
SrcMO.setReg(DstReg);
SrcMO.setSubReg(0);
LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
@@ -1434,12 +1520,24 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
if (RegA.isVirtual()) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
- SlotIndex endIdx =
- LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
- LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI));
+ LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ for (auto &S : LI.subranges()) {
+ VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ } else {
+ for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) {
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) {
+ VNInfo *VNI =
+ LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ }
}
}
@@ -1461,49 +1559,58 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);
-
- // Propagate SrcRegMap.
- SrcRegMap[RegA] = RegB;
}
if (AllUsesCopied) {
- bool ReplacedAllUntiedUses = true;
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
- if (MO.getSubReg() == SubRegB) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- MO.setSubReg(0);
- } else {
- ReplacedAllUntiedUses = false;
+ LaneBitmask RemainingUses = LaneBitmask::getNone();
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
}
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ } else {
+ RemainingUses |= TRI->getSubRegIndexLaneMask(MO.getSubReg());
}
}
}
// Update live variables for regB.
- if (RemovedKillFlag && ReplacedAllUntiedUses &&
- LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+ if (RemovedKillFlag && RemainingUses.none() && LV &&
+ LV->getVarInfo(RegB).removeKill(*MI)) {
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
LV->addVirtualRegisterKilled(RegB, *PrevMI);
}
+ if (RemovedKillFlag && RemainingUses.none())
+ SrcRegMap[LastCopiedReg] = RegB;
+
// Update LiveIntervals.
if (LIS) {
- LiveInterval &LI = LIS->getInterval(RegB);
- SlotIndex MIIdx = LIS->getInstructionIndex(*MI);
- LiveInterval::const_iterator I = LI.find(MIIdx);
- assert(I != LI.end() && "RegB must be live-in to use.");
+ SlotIndex UseIdx = LIS->getInstructionIndex(*MI);
+ auto Shrink = [=](LiveRange &LR, LaneBitmask LaneMask) {
+ LiveRange::Segment *S = LR.getSegmentContaining(LastCopyIdx);
+ if (!S)
+ return true;
+ if ((LaneMask & RemainingUses).any())
+ return false;
+ if (S->end.getBaseIndex() != UseIdx)
+ return false;
+ S->end = LastCopyIdx;
+ return true;
+ };
- SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
- if (I->end == UseIdx)
- LI.removeSegment(LastCopyIdx, UseIdx);
+ LiveInterval &LI = LIS->getInterval(RegB);
+ bool ShrinkLI = true;
+ for (auto &S : LI.subranges())
+ ShrinkLI &= Shrink(S, S.LaneMask);
+ if (ShrinkLI)
+ Shrink(LI, LaneBitmask::getAll());
}
} else if (RemovedKillFlag) {
// Some tied uses of regB matched their destination registers, so
@@ -1580,6 +1687,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
if (!collectTiedOperands(&*mi, TiedOperands)) {
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
@@ -1604,6 +1712,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// The tied operands have been eliminated or shifted further down
// the block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
@@ -1628,18 +1737,44 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
mi->RemoveOperand(1);
mi->setDesc(TII->get(TargetOpcode::COPY));
LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+
+ // Update LiveIntervals.
+ if (LIS) {
+ Register Reg = mi->getOperand(0).getReg();
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ // The COPY no longer defines subregs of %reg except for
+ // %reg.subidx.
+ LaneBitmask LaneMask =
+ TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
+ SlotIndex Idx = LIS->getInstructionIndex(*mi);
+ for (auto &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none()) {
+ LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
+ LiveRange::iterator DefSeg = std::next(UseSeg);
+ S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ }
+ }
+
+ // The COPY no longer has a use of %reg.
+ LIS->shrinkToUses(&LI);
+ } else {
+ // The live interval for Reg did not have subranges but now it needs
+ // them because we have introduced a subreg def. Recompute it.
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
}
// Clear TiedOperands here instead of at the top of the loop
// since most instructions do not have tied operands.
TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
}
}
- if (LIS)
- MF->verify(this, "After two-address instruction pass");
-
return MadeChange;
}
@@ -1722,6 +1857,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
MI.RemoveOperand(j);
} else {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+
LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
MI.eraseFromParent();
}
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 2ce6ea1d4212..d042deefd746 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -108,7 +108,7 @@ class IRPromoter {
SetVector<Value*> &Visited;
SetVector<Value*> &Sources;
SetVector<Instruction*> &Sinks;
- SmallVectorImpl<Instruction*> &SafeWrap;
+ SmallPtrSetImpl<Instruction *> &SafeWrap;
IntegerType *ExtTy = nullptr;
SmallPtrSet<Value*, 8> NewInsts;
SmallPtrSet<Instruction*, 4> InstsToRemove;
@@ -116,7 +116,6 @@ class IRPromoter {
SmallPtrSet<Value*, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
- void PrepareWrappingAdds(void);
void ExtendSources(void);
void ConvertTruncs(void);
void PromoteTree(void);
@@ -125,11 +124,11 @@ class IRPromoter {
public:
IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
- SetVector<Value*> &visited, SetVector<Value*> &sources,
- SetVector<Instruction*> &sinks,
- SmallVectorImpl<Instruction*> &wrap) :
- Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
- Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+ SetVector<Value *> &visited, SetVector<Value *> &sources,
+ SetVector<Instruction *> &sinks,
+ SmallPtrSetImpl<Instruction *> &wrap)
+ : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
@@ -145,7 +144,7 @@ class TypePromotion : public FunctionPass {
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value*, 16> AllVisited;
SmallPtrSet<Instruction*, 8> SafeToPromote;
- SmallVector<Instruction*, 4> SafeWrap;
+ SmallPtrSet<Instruction *, 4> SafeWrap;
// Does V have the same size result type as TypeSize.
bool EqualTypeSize(Value *V);
@@ -183,6 +182,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
}
StringRef getPassName() const override { return PASS_NAME; }
@@ -192,11 +192,8 @@ public:
}
-static bool GenerateSignBits(Value *V) {
- if (!isa<Instruction>(V))
- return false;
-
- unsigned Opc = cast<Instruction>(V)->getOpcode();
+static bool GenerateSignBits(Instruction *I) {
+ unsigned Opc = I->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
Opc == Instruction::SRem || Opc == Instruction::SExt;
}
@@ -283,7 +280,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
// just underflows the range, the icmp would give the same result whether the
// result has been truncated or not. We calculate this by:
- // - Zero extending both constants, if needed, to 32-bits.
+ // - Zero extending both constants, if needed, to RegisterBitWidth.
// - Take the absolute value of I's constant, adding this to the icmp const.
// - Check that this value is not out of range for small type. If it is, it
// means that it has underflowed enough to wrap around the icmp constant.
@@ -335,53 +332,46 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
if (Opc != Instruction::Add && Opc != Instruction::Sub)
return false;
- if (!I->hasOneUse() ||
- !isa<ICmpInst>(*I->user_begin()) ||
+ if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
!isa<ConstantInt>(I->getOperand(1)))
return false;
- ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
- bool NegImm = OverflowConst->isNegative();
- bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
- ((Opc == Instruction::Add) && NegImm);
- if (!IsDecreasing)
- return false;
-
// Don't support an icmp that deals with sign bits.
auto *CI = cast<ICmpInst>(*I->user_begin());
if (CI->isSigned() || CI->isEquality())
return false;
- ConstantInt *ICmpConst = nullptr;
+ ConstantInt *ICmpConstant = nullptr;
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
- ICmpConst = Const;
+ ICmpConstant = Const;
else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
- ICmpConst = Const;
+ ICmpConstant = Const;
else
return false;
- // Now check that the result can't wrap on itself.
- APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
- ICmpConst->getValue().zext(32) : ICmpConst->getValue();
-
- Total += OverflowConst->getValue().getBitWidth() < 32 ?
- OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
-
- APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);
-
- if (Total.getBitWidth() > Max.getBitWidth()) {
- if (Total.ugt(Max.zext(Total.getBitWidth())))
- return false;
- } else if (Max.getBitWidth() > Total.getBitWidth()) {
- if (Total.zext(Max.getBitWidth()).ugt(Max))
- return false;
- } else if (Total.ugt(Max))
+ const APInt &ICmpConst = ICmpConstant->getValue();
+ APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if (Opc == Instruction::Sub)
+ OverflowConst = -OverflowConst;
+ if (!OverflowConst.isNonPositive())
return false;
- LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
- << *I << "\n");
- SafeWrap.push_back(I);
- return true;
+ // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that:
+ // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
+ // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
+ if (OverflowConst.sgt(ICmpConst)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << "\n");
+ SafeWrap.insert(I);
+ return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << " and " << *CI << "\n");
+ SafeWrap.insert(I);
+ SafeWrap.insert(CI);
+ return true;
+ }
+ return false;
}
bool TypePromotion::shouldPromote(Value *V) {
@@ -403,17 +393,14 @@ bool TypePromotion::shouldPromote(Value *V) {
/// Return whether we can safely mutate V's type to ExtTy without having to be
/// concerned with zero extending or truncation.
-static bool isPromotedResultSafe(Value *V) {
- if (GenerateSignBits(V))
+static bool isPromotedResultSafe(Instruction *I) {
+ if (GenerateSignBits(I))
return false;
- if (!isa<Instruction>(V))
+ if (!isa<OverflowingBinaryOperator>(I))
return true;
- if (!isa<OverflowingBinaryOperator>(V))
- return true;
-
- return cast<Instruction>(V)->hasNoUnsignedWrap();
+ return I->hasNoUnsignedWrap();
}
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
@@ -422,7 +409,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
bool ReplacedAll = true;
LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
- << "\n");
+ << "\n");
for (Use &U : From->uses()) {
auto *User = cast<Instruction>(U.getUser());
@@ -441,39 +428,6 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
InstsToRemove.insert(I);
}
-void IRPromoter::PrepareWrappingAdds() {
- LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
- IRBuilder<> Builder{Ctx};
-
- // For adds that safely wrap and use a negative immediate as operand 1, we
- // create an equivalent instruction using a positive immediate.
- // That positive immediate can then be zext along with all the other
- // immediates later.
- for (auto *I : SafeWrap) {
- if (I->getOpcode() != Instruction::Add)
- continue;
-
- LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
- assert((isa<ConstantInt>(I->getOperand(1)) &&
- cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
- "Wrapping should have a negative immediate as the second operand");
-
- auto Const = cast<ConstantInt>(I->getOperand(1));
- auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
- Builder.SetInsertPoint(I);
- Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
- if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
- NewInst->copyIRFlags(I);
- NewInsts.insert(NewInst);
- }
- InstsToRemove.insert(I);
- I->replaceAllUsesWith(NewVal);
- LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
- }
- for (auto *I : NewInsts)
- Visited.insert(I);
-}
-
void IRPromoter::ExtendSources() {
IRBuilder<> Builder{Ctx};
@@ -515,8 +469,6 @@ void IRPromoter::ExtendSources() {
void IRPromoter::PromoteTree() {
LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
- IRBuilder<> Builder{Ctx};
-
// Mutate the types of the instructions within the tree. Here we handle
// constant operands.
for (auto *V : Visited) {
@@ -533,14 +485,16 @@ void IRPromoter::PromoteTree() {
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
- Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+ Constant *NewConst = SafeWrap.contains(I)
+ ? ConstantExpr::getSExt(Const, ExtTy)
+ : ConstantExpr::getZExt(Const, ExtTy);
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
I->setOperand(i, UndefValue::get(ExtTy));
}
- // Mutate the result type, unless this is an icmp.
- if (!isa<ICmpInst>(I)) {
+ // Mutate the result type, unless this is an icmp or switch.
+ if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
I->mutateType(ExtTy);
Promoted.insert(I);
}
@@ -575,7 +529,7 @@ void IRPromoter::TruncateSinks() {
// Handle calls separately as we need to iterate over arg operands.
if (auto *Call = dyn_cast<CallInst>(I)) {
- for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ for (unsigned i = 0; i < Call->arg_size(); ++i) {
Value *Arg = Call->getArgOperand(i);
Type *Ty = TruncTysMap[Call][i];
if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
@@ -678,10 +632,8 @@ void IRPromoter::Mutate() {
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
if (auto *Call = dyn_cast<CallInst>(I)) {
- for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
- Value *Arg = Call->getArgOperand(i);
+ for (Value *Arg : Call->args())
TruncTysMap[Call].push_back(Arg->getType());
- }
} else if (auto *Switch = dyn_cast<SwitchInst>(I))
TruncTysMap[I].push_back(Switch->getCondition()->getType());
else {
@@ -696,10 +648,6 @@ void IRPromoter::Mutate() {
TruncTysMap[Trunc].push_back(Trunc->getDestTy());
}
- // Convert adds using negative immediates to equivalent instructions that use
- // positive constants.
- PrepareWrappingAdds();
-
// Insert zext instructions between sources and their users.
ExtendSources();
@@ -798,7 +746,7 @@ bool TypePromotion::isLegalToPromote(Value *V) {
if (SafeToPromote.count(I))
return true;
- if (isPromotedResultSafe(V) || isSafeWrap(I)) {
+ if (isPromotedResultSafe(I) || isSafeWrap(I)) {
SafeToPromote.insert(I);
return true;
}
@@ -815,7 +763,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
return false;
LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
- << TypeSize << " bits to " << PromotedWidth << "\n");
+ << TypeSize << " bits to " << PromotedWidth << "\n");
SetVector<Value*> WorkList;
SetVector<Value*> Sources;
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 9daebfd9e63d..4876b9e23717 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -167,6 +167,7 @@ std::string EVT::getEVTString() const {
case MVT::Glue: return "glue";
case MVT::x86mmx: return "x86mmx";
case MVT::x86amx: return "x86amx";
+ case MVT::i64x8: return "i64x8";
case MVT::Metadata: return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::funcref: return "funcref";
@@ -198,6 +199,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
case MVT::x86amx: return Type::getX86_AMXTy(Context);
+ case MVT::i64x8: return IntegerType::get(Context, 512);
case MVT::externref:
return PointerType::get(StructType::create(Context), 10);
case MVT::funcref:
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 0f164e2637a2..069aca742da0 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -541,15 +541,8 @@ void VirtRegRewriter::rewrite() {
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
- for (MachineBasicBlock::instr_iterator
- MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
- MachineInstr *MI = &*MII;
- ++MII;
-
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
+ for (MachineOperand &MO : MI.operands()) {
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
@@ -574,7 +567,7 @@ void VirtRegRewriter::rewrite() {
// have to add implicit killed operands for the super-register. A
// partial redef always kills and redefines the super-register.
if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
- (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
+ (MO.isDef() && subRegLiveThrough(MI, PhysReg)))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {
@@ -619,20 +612,20 @@ void VirtRegRewriter::rewrite() {
// Add any missing super-register kills after rewriting the whole
// instruction.
while (!SuperKills.empty())
- MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+ MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
while (!SuperDeads.empty())
- MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+ MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
while (!SuperDefs.empty())
- MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+ MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI);
- LLVM_DEBUG(dbgs() << "> " << *MI);
+ LLVM_DEBUG(dbgs() << "> " << MI);
- expandCopyBundle(*MI);
+ expandCopyBundle(MI);
// We can remove identity copies right now.
- handleIdentityCopy(*MI);
+ handleIdentityCopy(MI);
}
}
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index c4c84cd921fa..c04a7b28eff9 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -29,7 +29,7 @@
// __wasm_lpad_context.lpad_index = index;
// __wasm_lpad_context.lsda = wasm.lsda();
// _Unwind_CallPersonality(exn);
-// selector = __wasm.landingpad_context.selector;
+// selector = __wasm_lpad_context.selector;
// ...
//
//
@@ -329,7 +329,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
- // Pseudocode: int selector = __wasm.landingpad_context.selector;
+ // Pseudocode: int selector = __wasm_lpad_context.selector;
Instruction *Selector =
IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 4449cd8ef555..a3dec6c25e44 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -549,6 +549,7 @@ static void updateChildIncompleteness(const DWARFDie &Die, CompileUnit &CU,
switch (Die.getTag()) {
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_union_type:
break;
default:
return;
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 3a9f79e47012..46e7457f2368 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -21,8 +21,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
namespace llvm {
diff --git a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
index 799cffb7116e..c7b1c65f2f9a 100644
--- a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
+++ b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
@@ -103,7 +103,7 @@ void ContinuationRecordBuilder::writeMemberType(RecordType &Record) {
if (getCurrentSegmentLength() > MaxSegmentLength) {
// We need to inject some bytes before the member we just wrote but after
// the previous member. Save off the length of the member we just wrote so
- // that we can do some sanity checking on it.
+ // that we can do validate it.
uint32_t MemberLength = SegmentWriter.getOffset() - OriginalOffset;
(void) MemberLength;
insertSegmentEnd(OriginalOffset);
diff --git a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
index ac3b30175956..d963e34628db 100644
--- a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -53,7 +53,7 @@ ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
Prefix->RecordKind = CVT.kind();
Prefix->RecordLen = Writer.getOffset() - sizeof(uint16_t);
- return {ScratchBuffer.data(), Writer.getOffset()};
+ return {ScratchBuffer.data(), static_cast<size_t>(Writer.getOffset())};
}
// Explicitly instantiate the member function for each known type so that we can
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index ee1ff5460b9b..1be5a752453a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -147,41 +147,57 @@ DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const {
return None;
}
-Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
- const uint64_t DIEOffset, const dwarf::Attribute Attr,
- const DWARFUnit &U) const {
- // Check if this abbreviation has this attribute without needing to skip
- // any data so we can return quickly if it doesn't.
- Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
- if (!MatchAttrIndex)
- return None;
-
- auto DebugInfoData = U.getDebugInfoExtractor();
+uint64_t DWARFAbbreviationDeclaration::getAttributeOffsetFromIndex(
+ uint32_t AttrIndex, uint64_t DIEOffset, const DWARFUnit &U) const {
+ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
// Add the byte size of ULEB that for the abbrev Code so we can start
// skipping the attribute data.
uint64_t Offset = DIEOffset + CodeByteSize;
- for (uint32_t CurAttrIdx = 0; CurAttrIdx != *MatchAttrIndex; ++CurAttrIdx)
+ for (uint32_t CurAttrIdx = 0; CurAttrIdx != AttrIndex; ++CurAttrIdx)
// Match Offset along until we get to the attribute we want.
if (auto FixedSize = AttributeSpecs[CurAttrIdx].getByteSize(U))
Offset += *FixedSize;
else
DWARFFormValue::skipValue(AttributeSpecs[CurAttrIdx].Form, DebugInfoData,
&Offset, U.getFormParams());
+ return Offset;
+}
+
+Optional<DWARFFormValue>
+DWARFAbbreviationDeclaration::getAttributeValueFromOffset(
+ uint32_t AttrIndex, uint64_t Offset, const DWARFUnit &U) const {
+ assert(AttributeSpecs.size() > AttrIndex &&
+ "Attribute Index is out of bounds.");
// We have arrived at the attribute to extract, extract if from Offset.
- const AttributeSpec &Spec = AttributeSpecs[*MatchAttrIndex];
+ const AttributeSpec &Spec = AttributeSpecs[AttrIndex];
if (Spec.isImplicitConst())
return DWARFFormValue::createFromSValue(Spec.Form,
Spec.getImplicitConstValue());
DWARFFormValue FormValue(Spec.Form);
+ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U))
return FormValue;
-
return None;
}
+Optional<DWARFFormValue>
+DWARFAbbreviationDeclaration::getAttributeValue(const uint64_t DIEOffset,
+ const dwarf::Attribute Attr,
+ const DWARFUnit &U) const {
+ // Check if this abbreviation has this attribute without needing to skip
+ // any data so we can return quickly if it doesn't.
+ Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
+ if (!MatchAttrIndex)
+ return None;
+
+ uint64_t Offset = getAttributeOffsetFromIndex(*MatchAttrIndex, DIEOffset, U);
+
+ return getAttributeValueFromOffset(*MatchAttrIndex, Offset, U);
+}
+
size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize(
const DWARFUnit &U) const {
size_t ByteSize = NumBytes;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 28d35b609c24..c77d4d4d989c 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -767,7 +767,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const {
}
W.startLine() << "Hash table not present\n";
- for (NameTableEntry NTE : *this)
+ for (const NameTableEntry &NTE : *this)
dumpName(W, NTE, None);
}
@@ -799,7 +799,7 @@ DWARFDebugNames::ValueIterator::findEntryOffsetInCurrentIndex() {
const Header &Hdr = CurrentIndex->Hdr;
if (Hdr.BucketCount == 0) {
// No Hash Table, We need to search through all names in the Name Index.
- for (NameTableEntry NTE : *CurrentIndex) {
+ for (const NameTableEntry &NTE : *CurrentIndex) {
if (NTE.getString() == Key)
return NTE.getEntryOffset();
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 4e1cafeb2126..c8331487f282 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -33,6 +33,7 @@
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
@@ -693,6 +693,18 @@ void DWARFContext::dump(
getDebugNames().dump(OS);
}
+DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint16_t Version, uint64_t Hash,
+ bool IsDWO) {
+ // FIXME: Check for/use the tu_index here, if there is one.
+ for (const auto &U : IsDWO ? dwo_units() : normal_units()) {
+ if (DWARFTypeUnit *TU = dyn_cast<DWARFTypeUnit>(U.get())) {
+ if (TU->getTypeHash() == Hash)
+ return TU;
+ }
+ }
+ return nullptr;
+}
+
DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
parseDWOUnits(LazyParse);
@@ -1411,7 +1423,8 @@ DWARFContext::getDWOContext(StringRef AbsolutePath) {
auto S = std::make_shared<DWOFile>();
S->File = std::move(Obj.get());
- S->Context = DWARFContext::create(*S->File.getBinary());
+ S->Context = DWARFContext::create(*S->File.getBinary(),
+ ProcessDebugRelocations::Ignore);
*Entry = S;
auto *Ctxt = S->Context.get();
return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
@@ -1652,7 +1665,9 @@ public:
}
}
DWARFObjInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
- function_ref<void(Error)> HandleError, function_ref<void(Error)> HandleWarning )
+ function_ref<void(Error)> HandleError,
+ function_ref<void(Error)> HandleWarning,
+ DWARFContext::ProcessDebugRelocations RelocAction)
: IsLittleEndian(Obj.isLittleEndian()),
AddressSize(Obj.getBytesInAddress()), FileName(Obj.getFileName()),
Obj(&Obj) {
@@ -1735,7 +1750,12 @@ public:
S.Data = Data;
}
- if (RelocatedSection == Obj.section_end())
+ if (RelocatedSection != Obj.section_end() && Name.contains(".dwo"))
+ HandleWarning(
+ createError("Unexpected relocations for dwo section " + Name));
+
+ if (RelocatedSection == Obj.section_end() ||
+ (RelocAction == DWARFContext::ProcessDebugRelocations::Ignore))
continue;
StringRef RelSecName;
@@ -1772,18 +1792,10 @@ public:
if (RelSecName == "debug_info")
Map = &static_cast<DWARFSectionMap &>(InfoSections[*RelocatedSection])
.Relocs;
- else if (RelSecName == "debug_info.dwo")
- Map = &static_cast<DWARFSectionMap &>(
- InfoDWOSections[*RelocatedSection])
- .Relocs;
else if (RelSecName == "debug_types")
Map =
&static_cast<DWARFSectionMap &>(TypesSections[*RelocatedSection])
.Relocs;
- else if (RelSecName == "debug_types.dwo")
- Map = &static_cast<DWARFSectionMap &>(
- TypesDWOSections[*RelocatedSection])
- .Relocs;
else
continue;
}
@@ -1966,12 +1978,13 @@ public:
} // namespace
std::unique_ptr<DWARFContext>
-DWARFContext::create(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
- std::string DWPName,
+DWARFContext::create(const object::ObjectFile &Obj,
+ ProcessDebugRelocations RelocAction,
+ const LoadedObjectInfo *L, std::string DWPName,
std::function<void(Error)> RecoverableErrorHandler,
std::function<void(Error)> WarningHandler) {
- auto DObj =
- std::make_unique<DWARFObjInMemory>(Obj, L, RecoverableErrorHandler, WarningHandler);
+ auto DObj = std::make_unique<DWARFObjInMemory>(
+ Obj, L, RecoverableErrorHandler, WarningHandler, RelocAction);
return std::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName),
RecoverableErrorHandler,
WarningHandler);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index dcf2aefeb39f..5b1c62e6a259 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -8,7 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
using namespace llvm;
@@ -18,12 +18,10 @@ Error DWARFDebugAddrTable::extractAddresses(const DWARFDataExtractor &Data,
assert(EndOffset >= *OffsetPtr);
uint64_t DataSize = EndOffset - *OffsetPtr;
assert(Data.isValidOffsetForDataOfSize(*OffsetPtr, DataSize));
- if (AddrSize != 4 && AddrSize != 8)
- return createStringError(errc::not_supported,
- "address table at offset 0x%" PRIx64
- " has unsupported address size %" PRIu8
- " (4 and 8 are supported)",
- Offset, AddrSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ AddrSize, errc::not_supported, "address table at offset 0x%" PRIx64,
+ Offset))
+ return SizeErr;
if (DataSize % AddrSize != 0) {
invalidateLength();
return createStringError(errc::invalid_argument,
@@ -148,8 +146,20 @@ void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
}
if (Addrs.size() > 0) {
- const char *AddrFmt =
- (AddrSize == 4) ? "0x%8.8" PRIx64 "\n" : "0x%16.16" PRIx64 "\n";
+ const char *AddrFmt;
+ switch (AddrSize) {
+ case 2:
+ AddrFmt = "0x%4.4" PRIx64 "\n";
+ break;
+ case 4:
+ AddrFmt = "0x%8.8" PRIx64 "\n";
+ break;
+ case 8:
+ AddrFmt = "0x%16.16" PRIx64 "\n";
+ break;
+ default:
+ llvm_unreachable("unsupported address size");
+ }
OS << "Addrs: [\n";
for (uint64_t Addr : Addrs)
OS << format(AddrFmt, Addr);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index 598e3ecee30e..c60c9d9d7227 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
@@ -87,12 +88,10 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
"the length of address range table at offset "
"0x%" PRIx64 " exceeds section size",
Offset);
- if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
- return createStringError(errc::invalid_argument,
- "address range table at offset 0x%" PRIx64
- " has unsupported address size: %d "
- "(4 and 8 supported)",
- Offset, HeaderData.AddrSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ HeaderData.AddrSize, errc::invalid_argument,
+ "address range table at offset 0x%" PRIx64, Offset))
+ return SizeErr;
if (HeaderData.SegSize != 0)
return createStringError(errc::not_supported,
"non-zero segment selector size in address range "
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 7ebb0092c34a..385bde51e2e7 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -19,18 +19,11 @@
using namespace llvm;
using namespace dwarf;
-bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
- uint64_t *OffsetPtr) {
- DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
- const uint64_t UEndOffset = U.getNextUnitOffset();
- return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
-}
-
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
const DWARFDataExtractor &DebugInfoData,
- uint64_t UEndOffset, uint32_t D) {
+ uint64_t UEndOffset, uint32_t ParentIdx) {
Offset = *OffsetPtr;
- Depth = D;
+ this->ParentIdx = ParentIdx;
if (Offset >= UEndOffset) {
U.getContext().getWarningHandler()(
createStringError(errc::invalid_argument,
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index dc7da5d9348f..cad3dcab8a7e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -16,6 +16,12 @@
using namespace llvm;
+bool DWARFDebugRangeList::RangeListEntry::isBaseAddressSelectionEntry(
+ uint8_t AddressSize) const {
+ assert(DWARFContext::isAddressSizeSupported(AddressSize));
+ return StartAddress == dwarf::computeTombstoneAddress(AddressSize);
+}
+
void DWARFDebugRangeList::clear() {
Offset = -1ULL;
AddressSize = 0;
@@ -30,9 +36,10 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
"invalid range list offset 0x%" PRIx64, *offset_ptr);
AddressSize = data.getAddressSize();
- if (AddressSize != 4 && AddressSize != 8)
- return createStringError(errc::invalid_argument,
- "invalid address size: %" PRIu8, AddressSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ AddressSize, errc::invalid_argument,
+ "range list at offset 0x%" PRIx64, *offset_ptr))
+ return SizeErr;
Offset = *offset_ptr;
while (true) {
RangeListEntry Entry;
@@ -58,12 +65,22 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
}
void DWARFDebugRangeList::dump(raw_ostream &OS) const {
- for (const RangeListEntry &RLE : Entries) {
- const char *format_str =
- (AddressSize == 4 ? "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n"
- : "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n");
- OS << format(format_str, Offset, RLE.StartAddress, RLE.EndAddress);
+ const char *AddrFmt;
+ switch (AddressSize) {
+ case 2:
+ AddrFmt = "%08" PRIx64 " %04" PRIx64 " %04" PRIx64 "\n";
+ break;
+ case 4:
+ AddrFmt = "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n";
+ break;
+ case 8:
+ AddrFmt = "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n";
+ break;
+ default:
+ llvm_unreachable("unsupported address size");
}
+ for (const RangeListEntry &RLE : Entries)
+ OS << format(AddrFmt, Offset, RLE.StartAddress, RLE.EndAddress);
OS << format("%08" PRIx64 " <End of list>\n", Offset);
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 0501e3ee3f9b..ed50f2635738 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -108,17 +108,41 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue,
return;
}
-/// Dump the name encoded in the type tag.
-static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) {
- StringRef TagStr = TagString(T);
- if (!TagStr.startswith("DW_TAG_") || !TagStr.endswith("_type"))
- return;
- OS << TagStr.substr(7, TagStr.size() - 12) << " ";
+static DWARFDie resolveReferencedType(DWARFDie D,
+ dwarf::Attribute Attr = DW_AT_type) {
+ return D.getAttributeValueAsReferencedDie(Attr).resolveTypeUnitReference();
+}
+static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) {
+ return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference();
}
-static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
- for (const DWARFDie &C : D.children())
- if (C.getTag() == DW_TAG_subrange_type) {
+namespace {
+
+// FIXME: We should have pretty printers per language. Currently we print
+// everything as if it was C++ and fall back to the TAG type name.
+struct DWARFTypePrinter {
+ raw_ostream &OS;
+ bool Word = true;
+ bool EndedWithTemplate = false;
+
+ DWARFTypePrinter(raw_ostream &OS) : OS(OS) {}
+
+ /// Dump the name encoded in the type tag.
+ void appendTypeTagName(dwarf::Tag T) {
+ StringRef TagStr = TagString(T);
+ static constexpr StringRef Prefix = "DW_TAG_";
+ static constexpr StringRef Suffix = "_type";
+ if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix))
+ return;
+ OS << TagStr.substr(Prefix.size(),
+ TagStr.size() - (Prefix.size() + Suffix.size()))
+ << " ";
+ }
+
+ void appendArrayType(const DWARFDie &D) {
+ for (const DWARFDie &C : D.children()) {
+ if (C.getTag() != DW_TAG_subrange_type)
+ continue;
Optional<uint64_t> LB;
Optional<uint64_t> Count;
Optional<uint64_t> UB;
@@ -159,79 +183,503 @@ static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
OS << ")]";
}
}
-}
-
-/// Recursively dump the DIE type name when applicable.
-static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
- if (!D.isValid())
- return;
+ EndedWithTemplate = false;
+ }
- if (const char *Name = D.getName(DINameKind::LinkageName)) {
- OS << Name;
- return;
+ DWARFDie skipQualifiers(DWARFDie D) {
+ while (D && (D.getTag() == DW_TAG_const_type ||
+ D.getTag() == DW_TAG_volatile_type))
+ D = resolveReferencedType(D);
+ return D;
}
- // FIXME: We should have pretty printers per language. Currently we print
- // everything as if it was C++ and fall back to the TAG type name.
- const dwarf::Tag T = D.getTag();
- switch (T) {
- case DW_TAG_array_type:
- case DW_TAG_pointer_type:
- case DW_TAG_ptr_to_member_type:
- case DW_TAG_reference_type:
- case DW_TAG_rvalue_reference_type:
- case DW_TAG_subroutine_type:
- break;
- default:
- dumpTypeTagName(OS, T);
+ bool needsParens(DWARFDie D) {
+ D = skipQualifiers(D);
+ return D && (D.getTag() == DW_TAG_subroutine_type || D.getTag() == DW_TAG_array_type);
}
- // Follow the DW_AT_type if possible.
- DWARFDie TypeDie = D.getAttributeValueAsReferencedDie(DW_AT_type);
- dumpTypeName(OS, TypeDie);
+ void appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, StringRef Ptr) {
+ appendQualifiedNameBefore(Inner);
+ if (Word)
+ OS << ' ';
+ if (needsParens(Inner))
+ OS << '(';
+ OS << Ptr;
+ Word = false;
+ EndedWithTemplate = false;
+ }
- switch (T) {
- case DW_TAG_subroutine_type: {
- if (!TypeDie)
+ DWARFDie
+ appendUnqualifiedNameBefore(DWARFDie D,
+ std::string *OriginalFullName = nullptr) {
+ Word = true;
+ if (!D) {
OS << "void";
+ return DWARFDie();
+ }
+ DWARFDie Inner = resolveReferencedType(D);
+ const dwarf::Tag T = D.getTag();
+ switch (T) {
+ case DW_TAG_pointer_type: {
+ appendPointerLikeTypeBefore(D, Inner, "*");
+ break;
+ }
+ case DW_TAG_subroutine_type: {
+ appendQualifiedNameBefore(Inner);
+ if (Word) {
+ OS << ' ';
+ }
+ Word = false;
+ break;
+ }
+ case DW_TAG_array_type: {
+ appendQualifiedNameBefore(Inner);
+ break;
+ }
+ case DW_TAG_reference_type:
+ appendPointerLikeTypeBefore(D, Inner, "&");
+ break;
+ case DW_TAG_rvalue_reference_type:
+ appendPointerLikeTypeBefore(D, Inner, "&&");
+ break;
+ case DW_TAG_ptr_to_member_type: {
+ appendQualifiedNameBefore(Inner);
+ if (needsParens(Inner))
+ OS << '(';
+ else if (Word)
+ OS << ' ';
+ if (DWARFDie Cont = resolveReferencedType(D, DW_AT_containing_type)) {
+ appendQualifiedName(Cont);
+ OS << "::";
+ }
+ OS << "*";
+ Word = false;
+ break;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ appendConstVolatileQualifierBefore(D);
+ break;
+ case DW_TAG_namespace: {
+ if (const char *Name = dwarf::toString(D.find(DW_AT_name), nullptr))
+ OS << Name;
+ else
+ OS << "(anonymous namespace)";
+ break;
+ }
+ case DW_TAG_unspecified_type: {
+ StringRef TypeName = D.getShortName();
+ if (TypeName == "decltype(nullptr)")
+ TypeName = "std::nullptr_t";
+ Word = true;
+ OS << TypeName;
+ EndedWithTemplate = false;
+ break;
+ }
+ /*
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_base_type:
+ */
+ default: {
+ const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr);
+ if (!NamePtr) {
+ appendTypeTagName(D.getTag());
+ return Inner;
+ }
+ Word = true;
+ StringRef Name = NamePtr;
+ static constexpr StringRef MangledPrefix = "_STN";
+ if (Name.startswith(MangledPrefix)) {
+ Name = Name.drop_front(MangledPrefix.size());
+ auto Separator = Name.find('|');
+ assert(Separator != StringRef::npos);
+ StringRef BaseName = Name.substr(0, Separator);
+ StringRef TemplateArgs = Name.substr(Separator + 1);
+ if (OriginalFullName)
+ *OriginalFullName = (BaseName + TemplateArgs).str();
+ Name = BaseName;
+ } else
+ EndedWithTemplate = Name.endswith(">");
+ OS << Name;
+ // This check would be insufficient for operator overloads like
+ // "operator>>" - but for now Clang doesn't try to simplify them, so this
+ // is OK. Add more nuanced operator overload handling here if/when needed.
+ if (Name.endswith(">"))
+ break;
+ if (!appendTemplateParameters(D))
+ break;
+
+ if (EndedWithTemplate)
+ OS << ' ';
+ OS << '>';
+ EndedWithTemplate = true;
+ Word = true;
+ break;
+ }
+ }
+ return Inner;
+ }
+
+ void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner,
+ bool SkipFirstParamIfArtificial = false) {
+ if (!D)
+ return;
+ switch (D.getTag()) {
+ case DW_TAG_subroutine_type: {
+ appendSubroutineNameAfter(D, Inner, SkipFirstParamIfArtificial, false,
+ false);
+ break;
+ }
+ case DW_TAG_array_type: {
+ appendArrayType(D);
+ break;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ appendConstVolatileQualifierAfter(D);
+ break;
+ case DW_TAG_ptr_to_member_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_rvalue_reference_type:
+ case DW_TAG_pointer_type: {
+ if (needsParens(Inner))
+ OS << ')';
+ appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner),
+ /*SkipFirstParamIfArtificial=*/D.getTag() ==
+ DW_TAG_ptr_to_member_type);
+ break;
+ }
+ /*
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_base_type:
+ case DW_TAG_namespace:
+ */
+ default:
+ break;
+ }
+ }
+
+ void appendQualifiedName(DWARFDie D) {
+ if (D)
+ appendScopes(D.getParent());
+ appendUnqualifiedName(D);
+ }
+ DWARFDie appendQualifiedNameBefore(DWARFDie D) {
+ if (D)
+ appendScopes(D.getParent());
+ return appendUnqualifiedNameBefore(D);
+ }
+ bool appendTemplateParameters(DWARFDie D, bool *FirstParameter = nullptr) {
+ bool FirstParameterValue = true;
+ bool IsTemplate = false;
+ if (!FirstParameter)
+ FirstParameter = &FirstParameterValue;
+ for (const DWARFDie &C : D) {
+ auto Sep = [&] {
+ if (*FirstParameter)
+ OS << '<';
+ else
+ OS << ", ";
+ IsTemplate = true;
+ EndedWithTemplate = false;
+ *FirstParameter = false;
+ };
+ if (C.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ IsTemplate = true;
+ appendTemplateParameters(C, FirstParameter);
+ }
+ if (C.getTag() == dwarf::DW_TAG_template_value_parameter) {
+ DWARFDie T = resolveReferencedType(C);
+ Sep();
+ if (T.getTag() == DW_TAG_enumeration_type) {
+ auto V = C.find(DW_AT_const_value);
+ bool FoundEnumerator = false;
+ for (const DWARFDie &Enumerator : T) {
+ auto EV = Enumerator.find(DW_AT_const_value);
+ if (V && EV &&
+ V->getAsSignedConstant() == EV->getAsSignedConstant()) {
+ if (T.find(DW_AT_enum_class)) {
+ appendQualifiedName(T);
+ OS << "::";
+ } else
+ appendScopes(T.getParent());
+ OS << Enumerator.getShortName();
+ FoundEnumerator = true;
+ break;
+ }
+ }
+ if (FoundEnumerator)
+ continue;
+ OS << '(';
+ appendQualifiedName(T);
+ OS << ')';
+ OS << to_string(*V->getAsSignedConstant());
+ continue;
+ }
+ // /Maybe/ we could do pointer type parameters, looking for the
+ // symbol in the ELF symbol table to get back to the variable...
+ // but probably not worth it.
+ if (T.getTag() == DW_TAG_pointer_type)
+ continue;
+ const char *RawName = dwarf::toString(T.find(DW_AT_name), nullptr);
+ assert(RawName);
+ StringRef Name = RawName;
+ auto V = C.find(DW_AT_const_value);
+ bool IsQualifiedChar = false;
+ if (Name == "bool") {
+ OS << (*V->getAsUnsignedConstant() ? "true" : "false");
+ } else if (Name == "short") {
+ OS << "(short)";
+ OS << to_string(*V->getAsSignedConstant());
+ } else if (Name == "unsigned short") {
+ OS << "(unsigned short)";
+ OS << to_string(*V->getAsSignedConstant());
+ } else if (Name == "int")
+ OS << to_string(*V->getAsSignedConstant());
+ else if (Name == "long") {
+ OS << to_string(*V->getAsSignedConstant());
+ OS << "L";
+ } else if (Name == "long long") {
+ OS << to_string(*V->getAsSignedConstant());
+ OS << "LL";
+ } else if (Name == "unsigned int") {
+ OS << to_string(*V->getAsUnsignedConstant());
+ OS << "U";
+ } else if (Name == "unsigned long") {
+ OS << to_string(*V->getAsUnsignedConstant());
+ OS << "UL";
+ } else if (Name == "unsigned long long") {
+ OS << to_string(*V->getAsUnsignedConstant());
+ OS << "ULL";
+ } else if (Name == "char" ||
+ (IsQualifiedChar =
+ (Name == "unsigned char" || Name == "signed char"))) {
+ // FIXME: check T's DW_AT_type to see if it's signed or not (since
+ // char signedness is implementation defined).
+ auto Val = *V->getAsSignedConstant();
+ // Copied/hacked up from Clang's CharacterLiteral::print - incomplete
+ // (doesn't actually support different character types/widths, sign
+ // handling's not done, and doesn't correctly test if a character is
+ // printable or needs to use a numeric escape sequence instead)
+ if (IsQualifiedChar) {
+ OS << '(';
+ OS << Name;
+ OS << ')';
+ }
+ switch (Val) {
+ case '\\':
+ OS << "'\\\\'";
+ break;
+ case '\'':
+ OS << "'\\''";
+ break;
+ case '\a':
+ // TODO: K&R: the meaning of '\\a' is different in traditional C
+ OS << "'\\a'";
+ break;
+ case '\b':
+ OS << "'\\b'";
+ break;
+ case '\f':
+ OS << "'\\f'";
+ break;
+ case '\n':
+ OS << "'\\n'";
+ break;
+ case '\r':
+ OS << "'\\r'";
+ break;
+ case '\t':
+ OS << "'\\t'";
+ break;
+ case '\v':
+ OS << "'\\v'";
+ break;
+ default:
+ if ((Val & ~0xFFu) == ~0xFFu)
+ Val &= 0xFFu;
+ if (Val < 127 && Val >= 32) {
+ OS << "'";
+ OS << (char)Val;
+ OS << "'";
+ } else if (Val < 256)
+ OS << to_string(llvm::format("'\\x%02x'", Val));
+ else if (Val <= 0xFFFF)
+ OS << to_string(llvm::format("'\\u%04x'", Val));
+ else
+ OS << to_string(llvm::format("'\\U%08x'", Val));
+ }
+ }
+ continue;
+ }
+ if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+ const char *RawName =
+ dwarf::toString(C.find(DW_AT_GNU_template_name), nullptr);
+ assert(RawName);
+ StringRef Name = RawName;
+ Sep();
+ OS << Name;
+ continue;
+ }
+ if (C.getTag() != dwarf::DW_TAG_template_type_parameter)
+ continue;
+ auto TypeAttr = C.find(DW_AT_type);
+ Sep();
+ appendQualifiedName(TypeAttr ? resolveReferencedType(C, *TypeAttr)
+ : DWARFDie());
+ }
+ if (IsTemplate && *FirstParameter && FirstParameter == &FirstParameterValue)
+ OS << '<';
+ return IsTemplate;
+ }
+ void decomposeConstVolatile(DWARFDie &N, DWARFDie &T, DWARFDie &C,
+ DWARFDie &V) {
+ (N.getTag() == DW_TAG_const_type ? C : V) = N;
+ T = resolveReferencedType(N);
+ if (T) {
+ auto Tag = T.getTag();
+ if (Tag == DW_TAG_const_type) {
+ C = T;
+ T = resolveReferencedType(T);
+ } else if (Tag == DW_TAG_volatile_type) {
+ V = T;
+ T = resolveReferencedType(T);
+ }
+ }
+ }
+ void appendConstVolatileQualifierAfter(DWARFDie N) {
+ DWARFDie C;
+ DWARFDie V;
+ DWARFDie T;
+ decomposeConstVolatile(N, T, C, V);
+ if (T && T.getTag() == DW_TAG_subroutine_type)
+ appendSubroutineNameAfter(T, resolveReferencedType(T), false, C.isValid(),
+ V.isValid());
+ else
+ appendUnqualifiedNameAfter(T, resolveReferencedType(T));
+ }
+ void appendConstVolatileQualifierBefore(DWARFDie N) {
+ DWARFDie C;
+ DWARFDie V;
+ DWARFDie T;
+ decomposeConstVolatile(N, T, C, V);
+ bool Subroutine = T && T.getTag() == DW_TAG_subroutine_type;
+ DWARFDie A = T;
+ while (A && A.getTag() == DW_TAG_array_type)
+ A = resolveReferencedType(A);
+ bool Leading =
+ (!A || (A.getTag() != DW_TAG_pointer_type &&
+ A.getTag() != llvm::dwarf::DW_TAG_ptr_to_member_type)) &&
+ !Subroutine;
+ if (Leading) {
+ if (C)
+ OS << "const ";
+ if (V)
+ OS << "volatile ";
+ }
+ appendQualifiedNameBefore(T);
+ if (!Leading && !Subroutine) {
+ Word = true;
+ if (C)
+ OS << "const";
+ if (V) {
+ if (C)
+ OS << ' ';
+ OS << "volatile";
+ }
+ }
+ }
+
+ /// Recursively append the DIE type name when applicable.
+ void appendUnqualifiedName(DWARFDie D,
+ std::string *OriginalFullName = nullptr) {
+ // FIXME: We should have pretty printers per language. Currently we print
+ // everything as if it was C++ and fall back to the TAG type name.
+ DWARFDie Inner = appendUnqualifiedNameBefore(D, OriginalFullName);
+ appendUnqualifiedNameAfter(D, Inner);
+ }
+
+ void appendSubroutineNameAfter(DWARFDie D, DWARFDie Inner,
+ bool SkipFirstParamIfArtificial, bool Const,
+ bool Volatile) {
+ DWARFDie FirstParamIfArtificial;
OS << '(';
+ EndedWithTemplate = false;
bool First = true;
- for (const DWARFDie &C : D.children()) {
- if (C.getTag() == DW_TAG_formal_parameter) {
- if (!First)
- OS << ", ";
- First = false;
- dumpTypeName(OS, C.getAttributeValueAsReferencedDie(DW_AT_type));
+ bool RealFirst = true;
+ for (DWARFDie P : D) {
+ if (P.getTag() != DW_TAG_formal_parameter)
+ return;
+ DWARFDie T = resolveReferencedType(P);
+ if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) {
+ FirstParamIfArtificial = T;
+ RealFirst = false;
+ continue;
}
+ if (!First) {
+ OS << ", ";
+ }
+ First = false;
+ appendQualifiedName(T);
}
+ EndedWithTemplate = false;
OS << ')';
- break;
- }
- case DW_TAG_array_type: {
- dumpArrayType(OS, D);
- break;
- }
- case DW_TAG_pointer_type:
- OS << '*';
- break;
- case DW_TAG_ptr_to_member_type:
- if (DWARFDie Cont =
- D.getAttributeValueAsReferencedDie(DW_AT_containing_type)) {
- dumpTypeName(OS << ' ', Cont);
- OS << "::";
+ if (FirstParamIfArtificial) {
+ if (DWARFDie P = FirstParamIfArtificial) {
+ if (P.getTag() == DW_TAG_pointer_type) {
+ DWARFDie C;
+ DWARFDie V;
+ auto CVStep = [&](DWARFDie CV) {
+ if (DWARFDie U = resolveReferencedType(CV)) {
+ if (U.getTag() == DW_TAG_const_type)
+ return C = U;
+ if (U.getTag() == DW_TAG_volatile_type)
+ return V = U;
+ }
+ return DWARFDie();
+ };
+ if (DWARFDie CV = CVStep(P)) {
+ CVStep(CV);
+ }
+ if (C)
+ OS << " const";
+ if (V)
+ OS << " volatile";
+ }
+ }
+ } else {
+ if (Const)
+ OS << " const";
+ if (Volatile)
+ OS << " volatile";
}
- OS << '*';
- break;
- case DW_TAG_reference_type:
- OS << '&';
- break;
- case DW_TAG_rvalue_reference_type:
- OS << "&&";
- break;
- default:
- break;
+ if (D.find(DW_AT_reference))
+ OS << " &";
+ if (D.find(DW_AT_rvalue_reference))
+ OS << " &&";
+ appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner));
}
-}
+ void appendScopes(DWARFDie D) {
+ if (D.getTag() == DW_TAG_compile_unit)
+ return;
+ if (D.getTag() == DW_TAG_type_unit)
+ return;
+ if (D.getTag() == DW_TAG_skeleton_unit)
+ return;
+ if (D.getTag() == DW_TAG_subprogram)
+ return;
+ D = D.resolveTypeUnitReference();
+ if (DWARFDie P = D.getParent())
+ appendScopes(P);
+ appendUnqualifiedName(D);
+ OS << "::";
+ }
+};
+} // anonymous namespace
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFAttribute &AttrValue, unsigned Indent,
@@ -316,9 +764,12 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
DINameKind::LinkageName))
OS << Space << "\"" << Name << '\"';
} else if (Attr == DW_AT_type) {
- OS << Space << "\"";
- dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(FormValue));
- OS << '"';
+ DWARFDie D = resolveReferencedType(Die, FormValue);
+ if (D && !D.isNULL()) {
+ OS << Space << "\"";
+ DWARFTypePrinter(OS).appendQualifiedName(D);
+ OS << '"';
+ }
} else if (Attr == DW_AT_APPLE_property_attribute) {
if (Optional<uint64_t> OptVal = FormValue.getAsUnsignedConstant())
dumpApplePropertyAttribute(OS, *OptVal);
@@ -345,6 +796,14 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
OS << ")\n";
}
+void DWARFDie::getFullName(raw_string_ostream &OS,
+ std::string *OriginalFullName) const {
+ const char *NamePtr = getShortName();
+ if (!NamePtr)
+ return;
+ DWARFTypePrinter(OS).appendUnqualifiedName(*this, OriginalFullName);
+}
+
bool DWARFDie::isSubprogramDIE() const { return getTag() == DW_TAG_subprogram; }
bool DWARFDie::isSubroutineDIE() const {
@@ -417,13 +876,27 @@ DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
DWARFDie
DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const {
+ DWARFDie Result;
if (auto SpecRef = V.getAsRelativeReference()) {
if (SpecRef->Unit)
- return SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() + SpecRef->Offset);
- if (auto SpecUnit = U->getUnitVector().getUnitForOffset(SpecRef->Offset))
- return SpecUnit->getDIEForOffset(SpecRef->Offset);
+ Result = SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() +
+ SpecRef->Offset);
+ else if (auto SpecUnit =
+ U->getUnitVector().getUnitForOffset(SpecRef->Offset))
+ Result = SpecUnit->getDIEForOffset(SpecRef->Offset);
}
- return DWARFDie();
+ return Result;
+}
+
+DWARFDie DWARFDie::resolveTypeUnitReference() const {
+ if (auto Attr = find(DW_AT_signature)) {
+ if (Optional<uint64_t> Sig = Attr->getAsReferenceUVal()) {
+ if (DWARFTypeUnit *TU = U->getContext().getTypeUnitForHash(
+ U->getVersion(), *Sig, U->isDWOUnit()))
+ return TU->getDIEForOffset(TU->getTypeOffset() + TU->getOffset());
+ }
+ }
+ return *this;
}
Optional<uint64_t> DWARFDie::getRangesBaseAttribute() const {
@@ -483,21 +956,6 @@ Expected<DWARFAddressRangesVector> DWARFDie::getAddressRanges() const {
return DWARFAddressRangesVector();
}
-void DWARFDie::collectChildrenAddressRanges(
- DWARFAddressRangesVector &Ranges) const {
- if (isNULL())
- return;
- if (isSubprogramDIE()) {
- if (auto DIERangesOrError = getAddressRanges())
- llvm::append_range(Ranges, DIERangesOrError.get());
- else
- llvm::consumeError(DIERangesOrError.takeError());
- }
-
- for (auto Child : children())
- Child.collectChildrenAddressRanges(Ranges);
-}
-
bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
auto RangesOrError = getAddressRanges();
if (!RangesOrError) {
@@ -581,18 +1039,10 @@ uint64_t DWARFDie::getDeclLine() const {
std::string
DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const {
- auto D = getAttributeValueAsReferencedDie(DW_AT_abstract_origin);
- if (!D)
- D = *this;
- std::string FileName;
- if (auto DeclFile = toUnsigned(D.find(DW_AT_decl_file))) {
- if (const auto *LineTable =
- getDwarfUnit()->getContext().getLineTableForUnit(
- D.getDwarfUnit()->getLinkedUnit()))
- LineTable->getFileNameByIndex(
- *DeclFile, D.getDwarfUnit()->getCompilationDir(), Kind, FileName);
- }
- return FileName;
+ if (auto FormValue = findRecursively(DW_AT_decl_file))
+ if (auto OptString = FormValue->getAsFile(Kind))
+ return *OptString;
+ return {};
}
void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
@@ -641,9 +1091,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
if (AbbrevDecl) {
WithColor(OS, HighlightColor::Tag).get().indent(Indent)
<< formatv("{0}", getTag());
- if (DumpOpts.Verbose)
+ if (DumpOpts.Verbose) {
OS << format(" [%u] %c", abbrCode,
AbbrevDecl->hasChildren() ? '*' : ' ');
+ if (Optional<uint32_t> ParentIdx = Die->getParentIdx())
+ OS << format(" (0x%8.8" PRIx64 ")",
+ U->getDIEAtIndex(*ParentIdx).getOffset());
+ }
OS << '\n';
// Dump all data in the DIE for the attributes.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 4b9be85f6885..d0fbd702e831 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -207,7 +207,8 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
}
static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
- DIDumpOptions DumpOpts, uint64_t Operands[2],
+ DIDumpOptions DumpOpts,
+ const uint64_t Operands[2],
unsigned Operand) {
assert(Operand < 2 && "operand out of bounds");
auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
@@ -226,7 +227,7 @@ static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
DIDumpOptions DumpOpts, uint8_t Opcode,
- uint64_t Operands[2],
+ const uint64_t Operands[2],
const MCRegisterInfo *MRI, bool isEH) {
if (!MRI)
return false;
@@ -262,7 +263,7 @@ static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
const DWARFExpression *Expr,
const MCRegisterInfo *RegInfo,
- DWARFUnit *U, bool isEH) {
+ DWARFUnit *U, bool isEH) const {
if (Error) {
OS << "<decoding error>";
return false;
@@ -356,10 +357,9 @@ void DWARFExpression::print(raw_ostream &OS, DIDumpOptions DumpOpts,
}
}
-bool DWARFExpression::Operation::verify(DWARFUnit *U) {
-
+bool DWARFExpression::Operation::verify(const Operation &Op, DWARFUnit *U) {
for (unsigned Operand = 0; Operand < 2; ++Operand) {
- unsigned Size = Desc.Op[Operand];
+ unsigned Size = Op.Desc.Op[Operand];
if (Size == Operation::SizeNA)
break;
@@ -369,13 +369,11 @@ bool DWARFExpression::Operation::verify(DWARFUnit *U) {
// the generic type should be done, so don't look up a base type in that
// case. The same holds for DW_OP_reinterpret, which is currently not
// supported.
- if (Opcode == DW_OP_convert && Operands[Operand] == 0)
+ if (Op.Opcode == DW_OP_convert && Op.Operands[Operand] == 0)
continue;
- auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
- if (!Die || Die.getTag() != dwarf::DW_TAG_base_type) {
- Error = true;
+ auto Die = U->getDIEForOffset(U->getOffset() + Op.Operands[Operand]);
+ if (!Die || Die.getTag() != dwarf::DW_TAG_base_type)
return false;
- }
}
}
@@ -384,7 +382,7 @@ bool DWARFExpression::Operation::verify(DWARFUnit *U) {
bool DWARFExpression::verify(DWARFUnit *U) {
for (auto &Op : *this)
- if (!Op.verify(U))
+ if (!Operation::verify(Op, U))
return false;
return true;
@@ -410,7 +408,7 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I,
SmallVector<PrintedExpr, 4> Stack;
while (I != E) {
- DWARFExpression::Operation &Op = *I;
+ const DWARFExpression::Operation &Op = *I;
uint8_t Opcode = Op.getCode();
switch (Opcode) {
case dwarf::DW_OP_regx: {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 2244a69bc121..cea0f63bbf81 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -332,7 +332,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
break;
case DW_FORM_LLVM_addrx_offset:
Value.uval = Data.getULEB128(OffsetPtr, &Err) << 32;
- Value.uval = Data.getU32(OffsetPtr, &Err);
+ Value.uval |= Data.getU32(OffsetPtr, &Err);
break;
case DW_FORM_string:
Value.cstr = Data.getCStr(OffsetPtr, &Err);
@@ -690,7 +690,7 @@ Optional<uint64_t> DWARFFormValue::getAsReference() const {
return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset;
return None;
}
-
+
Optional<DWARFFormValue::UnitOffset> DWARFFormValue::getAsRelativeReference() const {
if (!isFormClass(FC_Reference))
return None;
@@ -762,3 +762,17 @@ Optional<uint64_t> DWARFFormValue::getAsReferenceUVal() const {
return None;
return Value.uval;
}
+
+Optional<std::string>
+DWARFFormValue::getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const {
+ if (U == nullptr || !isFormClass(FC_Constant))
+ return None;
+ DWARFUnit *DLU = const_cast<DWARFUnit *>(U)->getLinkedUnit();
+ if (auto *LT = DLU->getContext().getLineTableForUnit(DLU)) {
+ std::string FileName;
+ if (LT->getFileNameByIndex(Value.uval, DLU->getCompilationDir(), Kind,
+ FileName))
+ return FileName;
+ }
+ return None;
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index c876af1e9b51..b73dda3ff9ce 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
@@ -54,11 +55,10 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
"unrecognised %s table version %" PRIu16
" in table at offset 0x%" PRIx64,
SectionName.data(), HeaderData.Version, HeaderOffset);
- if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
- return createStringError(errc::not_supported,
- "%s table at offset 0x%" PRIx64
- " has unsupported address size %" PRIu8,
- SectionName.data(), HeaderOffset, HeaderData.AddrSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ HeaderData.AddrSize, errc::not_supported,
+ "%s table at offset 0x%" PRIx64, SectionName.data(), HeaderOffset))
+ return SizeErr;
if (HeaderData.SegSize != 0)
return createStringError(errc::not_supported,
"%s table at offset 0x%" PRIx64
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index f17dacfce665..82c34f537036 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -315,15 +315,10 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
return false;
}
- if (!DWARFContext::isAddressSizeSupported(getAddressByteSize())) {
- SmallVector<std::string, 3> Sizes;
- for (auto Size : DWARFContext::getSupportedAddressSizes())
- Sizes.push_back(std::to_string(Size));
- Context.getWarningHandler()(createStringError(
- errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64 " "
- "has unsupported address size %" PRIu8 ", supported are %s",
- Offset, getAddressByteSize(), llvm::join(Sizes, ", ").c_str()));
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ getAddressByteSize(), errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64, Offset)) {
+ Context.getWarningHandler()(std::move(SizeErr));
return false;
}
@@ -349,29 +344,6 @@ bool DWARFUnitHeader::applyIndexEntry(const DWARFUnitIndex::Entry *Entry) {
return true;
}
-// Parse the rangelist table header, including the optional array of offsets
-// following it (DWARF v5 and later).
-template<typename ListTableType>
-static Expected<ListTableType>
-parseListTableHeader(DWARFDataExtractor &DA, uint64_t Offset,
- DwarfFormat Format) {
- // We are expected to be called with Offset 0 or pointing just past the table
- // header. Correct Offset in the latter case so that it points to the start
- // of the header.
- if (Offset > 0) {
- uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format);
- if (Offset < HeaderSize)
- return createStringError(errc::invalid_argument, "did not detect a valid"
- " list table with base = 0x%" PRIx64 "\n",
- Offset);
- Offset -= HeaderSize;
- }
- ListTableType Table;
- if (Error E = Table.extractHeaderAndOffsets(DA, &Offset))
- return std::move(E);
- return Table;
-}
-
Error DWARFUnit::extractRangeList(uint64_t RangeListOffset,
DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
@@ -411,11 +383,39 @@ void DWARFUnit::extractDIEsToVector(
DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
// The end offset has been already checked by DWARFUnitHeader::extract.
assert(DebugInfoData.isValidOffset(NextCUOffset - 1));
- uint32_t Depth = 0;
+ std::vector<uint32_t> Parents;
+ std::vector<uint32_t> PrevSiblings;
bool IsCUDie = true;
- while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset,
- Depth)) {
+ assert(
+ ((AppendCUDie && Dies.empty()) || (!AppendCUDie && Dies.size() == 1)) &&
+ "Dies array is not empty");
+
+ // Fill Parents and Siblings stacks with initial value.
+ Parents.push_back(UINT32_MAX);
+ if (!AppendCUDie)
+ Parents.push_back(0);
+ PrevSiblings.push_back(0);
+
+ // Start to extract dies.
+ do {
+ assert(Parents.size() > 0 && "Empty parents stack");
+ assert((Parents.back() == UINT32_MAX || Parents.back() <= Dies.size()) &&
+ "Wrong parent index");
+
+ // Extract die. Stop if any error occured.
+ if (!DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset,
+ Parents.back()))
+ break;
+
+ // If previous sibling is remembered then update it`s SiblingIdx field.
+ if (PrevSiblings.back() > 0) {
+ assert(PrevSiblings.back() < Dies.size() &&
+ "Previous sibling index is out of Dies boundaries");
+ Dies[PrevSiblings.back()].setSiblingIdx(Dies.size());
+ }
+
+ // Store die into the Dies vector.
if (IsCUDie) {
if (AppendCUDie)
Dies.push_back(DIE);
@@ -425,26 +425,36 @@ void DWARFUnit::extractDIEsToVector(
// around 14-20 so let's pre-reserve the needed memory for
// our DIE entries accordingly.
Dies.reserve(Dies.size() + getDebugInfoSize() / 14);
- IsCUDie = false;
} else {
+ // Remember last previous sibling.
+ PrevSiblings.back() = Dies.size();
+
Dies.push_back(DIE);
}
+ // Check for new children scope.
if (const DWARFAbbreviationDeclaration *AbbrDecl =
DIE.getAbbreviationDeclarationPtr()) {
- // Normal DIE
- if (AbbrDecl->hasChildren())
- ++Depth;
- else if (Depth == 0)
- break; // This unit has a single DIE with no children.
+ if (AbbrDecl->hasChildren()) {
+ if (AppendCUDie || !IsCUDie) {
+ assert(Dies.size() > 0 && "Dies does not contain any die");
+ Parents.push_back(Dies.size() - 1);
+ PrevSiblings.push_back(0);
+ }
+ } else if (IsCUDie)
+ // Stop if we have single compile unit die w/o children.
+ break;
} else {
- // NULL DIE.
- if (Depth > 0)
- --Depth;
- if (Depth == 0)
- break; // We are done with this compile unit!
+ // NULL DIE: finishes current children scope.
+ Parents.pop_back();
+ PrevSiblings.pop_back();
}
- }
+
+ if (IsCUDie)
+ IsCUDie = false;
+
+ // Stop when compile unit die is removed from the parents stack.
+ } while (Parents.size() > 1);
}
void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
@@ -600,10 +610,14 @@ bool DWARFUnit::parseDWO() {
}
void DWARFUnit::clearDIEs(bool KeepCUDie) {
- if (DieArray.size() > (unsigned)KeepCUDie) {
- DieArray.resize((unsigned)KeepCUDie);
- DieArray.shrink_to_fit();
- }
+ // Do not use resize() + shrink_to_fit() to free memory occupied by dies.
+ // shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
+ // It depends on the implementation whether the request is fulfilled.
+ // Create a new vector with a small capacity and assign it to the DieArray to
+ // have previous contents freed.
+ DieArray = (KeepCUDie && !DieArray.empty())
+ ? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
+ : std::vector<DWARFDebugInfoEntry>();
}
Expected<DWARFAddressRangesVector>
@@ -750,65 +764,65 @@ const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context,
DWARFDie DWARFUnit::getParent(const DWARFDebugInfoEntry *Die) {
if (!Die)
return DWARFDie();
- const uint32_t Depth = Die->getDepth();
- // Unit DIEs always have a depth of zero and never have parents.
- if (Depth == 0)
- return DWARFDie();
- // Depth of 1 always means parent is the compile/type unit.
- if (Depth == 1)
- return getUnitDIE();
- // Look for previous DIE with a depth that is one less than the Die's depth.
- const uint32_t ParentDepth = Depth - 1;
- for (uint32_t I = getDIEIndex(Die) - 1; I > 0; --I) {
- if (DieArray[I].getDepth() == ParentDepth)
- return DWARFDie(this, &DieArray[I]);
+
+ if (Optional<uint32_t> ParentIdx = Die->getParentIdx()) {
+ assert(*ParentIdx < DieArray.size() &&
+ "ParentIdx is out of DieArray boundaries");
+ return DWARFDie(this, &DieArray[*ParentIdx]);
}
+
return DWARFDie();
}
DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) {
if (!Die)
return DWARFDie();
- uint32_t Depth = Die->getDepth();
- // Unit DIEs always have a depth of zero and never have siblings.
- if (Depth == 0)
- return DWARFDie();
- // NULL DIEs don't have siblings.
- if (Die->getAbbreviationDeclarationPtr() == nullptr)
- return DWARFDie();
- // Find the next DIE whose depth is the same as the Die's depth.
- for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx;
- ++I) {
- if (DieArray[I].getDepth() == Depth)
- return DWARFDie(this, &DieArray[I]);
+ if (Optional<uint32_t> SiblingIdx = Die->getSiblingIdx()) {
+ assert(*SiblingIdx < DieArray.size() &&
+ "SiblingIdx is out of DieArray boundaries");
+ return DWARFDie(this, &DieArray[*SiblingIdx]);
}
+
return DWARFDie();
}
DWARFDie DWARFUnit::getPreviousSibling(const DWARFDebugInfoEntry *Die) {
if (!Die)
return DWARFDie();
- uint32_t Depth = Die->getDepth();
- // Unit DIEs always have a depth of zero and never have siblings.
- if (Depth == 0)
+
+ Optional<uint32_t> ParentIdx = Die->getParentIdx();
+ if (!ParentIdx)
+ // Die is a root die, there is no previous sibling.
return DWARFDie();
- // Find the previous DIE whose depth is the same as the Die's depth.
- for (size_t I = getDIEIndex(Die); I > 0;) {
- --I;
- if (DieArray[I].getDepth() == Depth - 1)
- return DWARFDie();
- if (DieArray[I].getDepth() == Depth)
- return DWARFDie(this, &DieArray[I]);
+ assert(*ParentIdx < DieArray.size() &&
+ "ParentIdx is out of DieArray boundaries");
+ assert(getDIEIndex(Die) > 0 && "Die is a root die");
+
+ uint32_t PrevDieIdx = getDIEIndex(Die) - 1;
+ if (PrevDieIdx == *ParentIdx)
+ // Immediately previous node is parent, there is no previous sibling.
+ return DWARFDie();
+
+ while (DieArray[PrevDieIdx].getParentIdx() != *ParentIdx) {
+ PrevDieIdx = *DieArray[PrevDieIdx].getParentIdx();
+
+ assert(PrevDieIdx < DieArray.size() &&
+ "PrevDieIdx is out of DieArray boundaries");
+ assert(PrevDieIdx >= *ParentIdx &&
+ "PrevDieIdx is not a child of parent of Die");
}
- return DWARFDie();
+
+ return DWARFDie(this, &DieArray[PrevDieIdx]);
}
DWARFDie DWARFUnit::getFirstChild(const DWARFDebugInfoEntry *Die) {
if (!Die->hasChildren())
return DWARFDie();
+ // TODO: Instead of checking here for invalid die we might reject
+ // invalid dies at parsing stage(DWARFUnit::extractDIEsToVector).
// We do not want access out of bounds when parsing corrupted debug data.
size_t I = getDIEIndex(Die) + 1;
if (I >= DieArray.size())
@@ -820,14 +834,30 @@ DWARFDie DWARFUnit::getLastChild(const DWARFDebugInfoEntry *Die) {
if (!Die->hasChildren())
return DWARFDie();
- uint32_t Depth = Die->getDepth();
- for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx;
- ++I) {
- if (DieArray[I].getDepth() == Depth + 1 &&
- DieArray[I].getTag() == dwarf::DW_TAG_null)
- return DWARFDie(this, &DieArray[I]);
- assert(DieArray[I].getDepth() > Depth && "Not processing children?");
+ if (Optional<uint32_t> SiblingIdx = Die->getSiblingIdx()) {
+ assert(*SiblingIdx < DieArray.size() &&
+ "SiblingIdx is out of DieArray boundaries");
+ assert(DieArray[*SiblingIdx - 1].getTag() == dwarf::DW_TAG_null &&
+ "Bad end of children marker");
+ return DWARFDie(this, &DieArray[*SiblingIdx - 1]);
+ }
+
+ // If SiblingIdx is set for non-root dies we could be sure that DWARF is
+ // correct and "end of children marker" must be found. For root die we do not
+ // have such a guarantee(parsing root die might be stopped if "end of children
+ // marker" is missing, SiblingIdx is always zero for root die). That is why we
+ // do not use assertion for checking for "end of children marker" for root
+ // die.
+
+ // TODO: Instead of checking here for invalid die we might reject
+ // invalid dies at parsing stage(DWARFUnit::extractDIEsToVector).
+ if (getDIEIndex(Die) == 0 && DieArray.size() > 1 &&
+ DieArray.back().getTag() == dwarf::DW_TAG_null) {
+ // For the unit die we might take last item from DieArray.
+ assert(getDIEIndex(Die) == getDIEIndex(getUnitDIE()) && "Bad unit die");
+ return DWARFDie(this, &DieArray.back());
}
+
return DWARFDie();
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index ac624ec8b80f..dcabefb9896e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
@@ -50,6 +51,9 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
DWARFVerifier::DieRangeInfo::die_range_info_iterator
DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
+ if (RI.Ranges.empty())
+ return Children.end();
+
auto End = Children.end();
auto Iter = Children.begin();
while (Iter != End) {
@@ -158,7 +162,30 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
return Success;
}
-unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
+bool DWARFVerifier::verifyName(const DWARFDie &Die) {
+ // FIXME Add some kind of record of which DIE names have already failed and
+ // don't bother checking a DIE that uses an already failed DIE.
+
+ std::string ReconstructedName;
+ raw_string_ostream OS(ReconstructedName);
+ std::string OriginalFullName;
+ Die.getFullName(OS, &OriginalFullName);
+ OS.flush();
+ if (OriginalFullName.empty() || OriginalFullName == ReconstructedName)
+ return 0;
+
+ error() << "Simplified template DW_AT_name could not be reconstituted:\n"
+ << formatv(" original: {0}\n"
+ " reconstituted: {1}\n",
+ OriginalFullName, ReconstructedName);
+ dump(Die) << '\n';
+ dump(Die.getDwarfUnit()->getUnitDIE()) << '\n';
+ return 1;
+}
+
+unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit,
+ ReferenceMap &UnitLocalReferences,
+ ReferenceMap &CrossUnitReferences) {
unsigned NumUnitErrors = 0;
unsigned NumDies = Unit.getNumDIEs();
for (unsigned I = 0; I < NumDies; ++I) {
@@ -169,9 +196,12 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
for (auto AttrValue : Die.attributes()) {
NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
- NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
+ NumUnitErrors += verifyDebugInfoForm(Die, AttrValue, UnitLocalReferences,
+ CrossUnitReferences);
}
+ NumUnitErrors += verifyName(Die);
+
if (Die.hasChildren()) {
if (Die.getFirstChild().isValid() &&
Die.getFirstChild().getTag() == DW_TAG_null) {
@@ -299,6 +329,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
bool hasDIE = DebugInfoData.isValidOffset(Offset);
DWARFUnitVector TypeUnitVector;
DWARFUnitVector CompileUnitVector;
+ /// A map that tracks all references (converted absolute references) so we
+ /// can verify each reference points to a valid DIE and not an offset that
+ /// lies between to valid DIEs.
+ ReferenceMap CrossUnitReferences;
while (hasDIE) {
OffsetStart = Offset;
if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
@@ -309,6 +343,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
} else {
DWARFUnitHeader Header;
Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
+ ReferenceMap UnitLocalReferences;
DWARFUnit *Unit;
switch (UnitType) {
case dwarf::DW_UT_type:
@@ -337,7 +372,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
}
default: { llvm_unreachable("Invalid UnitType."); }
}
- NumDebugInfoErrors += verifyUnitContents(*Unit);
+ NumDebugInfoErrors +=
+ verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ UnitLocalReferences, [&](uint64_t Offset) { return Unit; });
}
hasDIE = DebugInfoData.isValidOffset(Offset);
++UnitIdx;
@@ -348,7 +386,14 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
}
if (!isHeaderChainValid)
++NumDebugInfoErrors;
- NumDebugInfoErrors += verifyDebugInfoReferences();
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
+ if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset))
+ return U;
+ if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset))
+ return U;
+ return nullptr;
+ });
return NumDebugInfoErrors;
}
@@ -383,7 +428,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
return NumErrors;
}
- DWARFAddressRangesVector Ranges = RangesOrError.get();
+ const DWARFAddressRangesVector &Ranges = RangesOrError.get();
// Build RI for this DIE and check that ranges within this DIE do not
// overlap.
DieRangeInfo RI(Die);
@@ -409,7 +454,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
bool DumpDieAfterError = false;
- for (auto Range : Ranges) {
+ for (const auto &Range : Ranges) {
if (!Range.valid()) {
++NumErrors;
error() << "Invalid address range " << Range << "\n";
@@ -444,7 +489,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
}
// Verify that ranges are contained within their parent.
- bool ShouldBeContained = !Ranges.empty() && !ParentRI.Ranges.empty() &&
+ bool ShouldBeContained = !RI.Ranges.empty() && !ParentRI.Ranges.empty() &&
!(Die.getTag() == DW_TAG_subprogram &&
ParentRI.Die.getTag() == DW_TAG_subprogram);
if (ShouldBeContained && !ParentRI.contains(RI)) {
@@ -507,9 +552,10 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0);
DWARFExpression Expression(Data, U->getAddressByteSize(),
U->getFormParams().Format);
- bool Error = any_of(Expression, [](DWARFExpression::Operation &Op) {
- return Op.isError();
- });
+ bool Error =
+ any_of(Expression, [](const DWARFExpression::Operation &Op) {
+ return Op.isError();
+ });
if (Error || !Expression.verify(U))
ReportError("DIE contains invalid DWARF expression:");
}
@@ -587,7 +633,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
}
unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
- DWARFAttribute &AttrValue) {
+ DWARFAttribute &AttrValue,
+ ReferenceMap &LocalReferences,
+ ReferenceMap &CrossUnitReferences) {
const DWARFObject &DObj = DCtx.getDWARFObj();
auto DieCU = Die.getDwarfUnit();
unsigned NumErrors = 0;
@@ -615,7 +663,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
} else {
// Valid reference, but we will verify it points to an actual
// DIE later.
- ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+ LocalReferences[*RefVal].insert(Die.getOffset());
}
}
break;
@@ -634,7 +682,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
} else {
// Valid reference, but we will verify it points to an actual
// DIE later.
- ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+ CrossUnitReferences[*RefVal].insert(Die.getOffset());
}
}
break;
@@ -694,20 +742,24 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
return NumErrors;
}
-unsigned DWARFVerifier::verifyDebugInfoReferences() {
- // Take all references and make sure they point to an actual DIE by
- // getting the DIE by offset and emitting an error
- OS << "Verifying .debug_info references...\n";
+unsigned DWARFVerifier::verifyDebugInfoReferences(
+ const ReferenceMap &References,
+ llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForOffset) {
+ auto GetDIEForOffset = [&](uint64_t Offset) {
+ if (DWARFUnit *U = GetUnitForOffset(Offset))
+ return U->getDIEForOffset(Offset);
+ return DWARFDie();
+ };
unsigned NumErrors = 0;
for (const std::pair<const uint64_t, std::set<uint64_t>> &Pair :
- ReferenceToDIEOffsets) {
- if (DCtx.getDIEForOffset(Pair.first))
+ References) {
+ if (GetDIEForOffset(Pair.first))
continue;
++NumErrors;
error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
<< ". Offset is in between DIEs:\n";
for (auto Offset : Pair.second)
- dump(DCtx.getDIEForOffset(Offset)) << '\n';
+ dump(GetDIEForOffset(Offset)) << '\n';
OS << "\n";
}
return NumErrors;
@@ -1349,11 +1401,12 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
U->getAddressByteSize());
DWARFExpression Expression(Data, U->getAddressByteSize(),
U->getFormParams().Format);
- bool IsInteresting = any_of(Expression, [](DWARFExpression::Operation &Op) {
- return !Op.isError() && (Op.getCode() == DW_OP_addr ||
- Op.getCode() == DW_OP_form_tls_address ||
- Op.getCode() == DW_OP_GNU_push_tls_address);
- });
+ bool IsInteresting =
+ any_of(Expression, [](const DWARFExpression::Operation &Op) {
+ return !Op.isError() && (Op.getCode() == DW_OP_addr ||
+ Op.getCode() == DW_OP_form_tls_address ||
+ Op.getCode() == DW_OP_GNU_push_tls_address);
+ });
if (IsInteresting)
return true;
}
@@ -1488,7 +1541,7 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
if (NumErrors > 0)
return NumErrors;
for (const auto &NI : AccelTable)
- for (DWARFDebugNames::NameTableEntry NTE : NI)
+ for (const DWARFDebugNames::NameTableEntry &NTE : NI)
NumErrors += verifyNameIndexEntries(NI, NTE);
if (NumErrors > 0)
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index cdea0e39486d..b2c43b893cd3 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -260,17 +260,15 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
// If we have a DW_TAG_subprogram but no line entries, fall back to using
// the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
- if (auto FileIdx =
- dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) {
- if (auto Line =
- dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
- LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx),
- *Line);
- FI.OptLineTable = LineTable();
- FI.OptLineTable->push(LE);
- // LE.Addr = EndAddress;
- // FI.OptLineTable->push(LE);
- }
+ std::string FilePath = Die.getDeclFile(
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
+ if (FilePath.empty())
+ return;
+ if (auto Line =
+ dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
+ LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
+ FI.OptLineTable = LineTable();
+ FI.OptLineTable->push(LE);
}
return;
}
@@ -394,11 +392,11 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
if (Range.LowPC != 0) {
if (!Gsym.isQuiet()) {
// Unexpected invalid address, emit a warning
- Log << "warning: DIE has an address range whose start address is "
- "not in any executable sections ("
- << *Gsym.GetValidTextRanges()
- << ") and will not be processed:\n";
- Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
+ OS << "warning: DIE has an address range whose start address is "
+ "not in any executable sections ("
+ << *Gsym.GetValidTextRanges()
+ << ") and will not be processed:\n";
+ Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
}
}
break;
diff --git a/llvm/lib/DebugInfo/GSYM/FileWriter.cpp b/llvm/lib/DebugInfo/GSYM/FileWriter.cpp
index 4b30dcb60a7b..b725f3ac74f5 100644
--- a/llvm/lib/DebugInfo/GSYM/FileWriter.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FileWriter.cpp
@@ -1,9 +1,8 @@
//===- FileWriter.cpp -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp
index 044ddb8ba1ba..c1e8eccd0daa 100644
--- a/llvm/lib/DebugInfo/GSYM/Range.cpp
+++ b/llvm/lib/DebugInfo/GSYM/Range.cpp
@@ -1,9 +1,8 @@
//===- Range.cpp ------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp b/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
index 5dc9c86b34fd..00fc70ca5a54 100644
--- a/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -35,7 +35,7 @@ public:
} // end anonymous namespace
-using Interval = std::pair<uint32_t, uint32_t>;
+using Interval = std::pair<uint64_t, uint64_t>;
static Interval intersect(const Interval &I1, const Interval &I2) {
return std::make_pair(std::max(I1.first, I2.first),
@@ -85,7 +85,7 @@ MappedBlockStream::createFpmStream(const MSFLayout &Layout,
return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
-Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+Error MappedBlockStream::readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) {
// Make sure we aren't trying to read beyond the end of the stream.
if (auto EC = checkOffsetForRead(Offset, Size))
@@ -138,7 +138,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
if (Intersection != RequestExtent)
continue;
- uint32_t CacheRangeOffset =
+ uint64_t CacheRangeOffset =
AbsoluteDifference(CachedExtent.first, Intersection.first);
Buffer = CachedAlloc.slice(CacheRangeOffset, Size);
return Error::success();
@@ -163,14 +163,14 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
return Error::success();
}
-Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
+Error MappedBlockStream::readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) {
// Make sure we aren't trying to read beyond the end of the stream.
if (auto EC = checkOffsetForRead(Offset, 1))
return EC;
- uint32_t First = Offset / BlockSize;
- uint32_t Last = First;
+ uint64_t First = Offset / BlockSize;
+ uint64_t Last = First;
while (Last < getNumBlocks() - 1) {
if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1)
@@ -178,13 +178,13 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
++Last;
}
- uint32_t OffsetInFirstBlock = Offset % BlockSize;
- uint32_t BytesFromFirstBlock = BlockSize - OffsetInFirstBlock;
- uint32_t BlockSpan = Last - First + 1;
- uint32_t ByteSpan = BytesFromFirstBlock + (BlockSpan - 1) * BlockSize;
+ uint64_t OffsetInFirstBlock = Offset % BlockSize;
+ uint64_t BytesFromFirstBlock = BlockSize - OffsetInFirstBlock;
+ uint64_t BlockSpan = Last - First + 1;
+ uint64_t ByteSpan = BytesFromFirstBlock + (BlockSpan - 1) * BlockSize;
ArrayRef<uint8_t> BlockData;
- uint32_t MsfOffset = blockToOffset(StreamLayout.Blocks[First], BlockSize);
+ uint64_t MsfOffset = blockToOffset(StreamLayout.Blocks[First], BlockSize);
if (auto EC = MsfData.readBytes(MsfOffset, BlockSize, BlockData))
return EC;
@@ -193,9 +193,9 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
return Error::success();
}
-uint32_t MappedBlockStream::getLength() { return StreamLayout.Length; }
+uint64_t MappedBlockStream::getLength() { return StreamLayout.Length; }
-bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
+bool MappedBlockStream::tryReadContiguously(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) {
if (Size == 0) {
Buffer = ArrayRef<uint8_t>();
@@ -206,15 +206,15 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
// all subsequent blocks are contiguous. For example, a 10k read with a 4k
// block size can be filled with a reference if, from the starting offset,
// 3 blocks in a row are contiguous.
- uint32_t BlockNum = Offset / BlockSize;
- uint32_t OffsetInBlock = Offset % BlockSize;
- uint32_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock);
- uint32_t NumAdditionalBlocks =
+ uint64_t BlockNum = Offset / BlockSize;
+ uint64_t OffsetInBlock = Offset % BlockSize;
+ uint64_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock);
+ uint64_t NumAdditionalBlocks =
alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize;
- uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
- uint32_t E = StreamLayout.Blocks[BlockNum];
- for (uint32_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
+ uint64_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
+ uint64_t E = StreamLayout.Blocks[BlockNum];
+ for (uint64_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
if (StreamLayout.Blocks[I + BlockNum] != E)
return false;
}
@@ -225,8 +225,8 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
// cross-block span, explicitly resize the ArrayRef to cover the entire
// request length.
ArrayRef<uint8_t> BlockData;
- uint32_t FirstBlockAddr = StreamLayout.Blocks[BlockNum];
- uint32_t MsfOffset = blockToOffset(FirstBlockAddr, BlockSize);
+ uint64_t FirstBlockAddr = StreamLayout.Blocks[BlockNum];
+ uint64_t MsfOffset = blockToOffset(FirstBlockAddr, BlockSize);
if (auto EC = MsfData.readBytes(MsfOffset, BlockSize, BlockData)) {
consumeError(std::move(EC));
return false;
@@ -236,28 +236,28 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
return true;
}
-Error MappedBlockStream::readBytes(uint32_t Offset,
+Error MappedBlockStream::readBytes(uint64_t Offset,
MutableArrayRef<uint8_t> Buffer) {
- uint32_t BlockNum = Offset / BlockSize;
- uint32_t OffsetInBlock = Offset % BlockSize;
+ uint64_t BlockNum = Offset / BlockSize;
+ uint64_t OffsetInBlock = Offset % BlockSize;
// Make sure we aren't trying to read beyond the end of the stream.
if (auto EC = checkOffsetForRead(Offset, Buffer.size()))
return EC;
- uint32_t BytesLeft = Buffer.size();
- uint32_t BytesWritten = 0;
+ uint64_t BytesLeft = Buffer.size();
+ uint64_t BytesWritten = 0;
uint8_t *WriteBuffer = Buffer.data();
while (BytesLeft > 0) {
- uint32_t StreamBlockAddr = StreamLayout.Blocks[BlockNum];
+ uint64_t StreamBlockAddr = StreamLayout.Blocks[BlockNum];
ArrayRef<uint8_t> BlockData;
- uint32_t Offset = blockToOffset(StreamBlockAddr, BlockSize);
+ uint64_t Offset = blockToOffset(StreamBlockAddr, BlockSize);
if (auto EC = MsfData.readBytes(Offset, BlockSize, BlockData))
return EC;
const uint8_t *ChunkStart = BlockData.data() + OffsetInBlock;
- uint32_t BytesInChunk = std::min(BytesLeft, BlockSize - OffsetInBlock);
+ uint64_t BytesInChunk = std::min(BytesLeft, BlockSize - OffsetInBlock);
::memcpy(WriteBuffer + BytesWritten, ChunkStart, BytesInChunk);
BytesWritten += BytesInChunk;
@@ -271,7 +271,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset,
void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); }
-void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
+void MappedBlockStream::fixCacheAfterWrite(uint64_t Offset,
ArrayRef<uint8_t> Data) const {
// If this write overlapped a read which previously came from the pool,
// someone may still be holding a pointer to that alloc which is now invalid.
@@ -297,10 +297,10 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
auto Intersection = intersect(WriteInterval, CachedInterval);
assert(Intersection.first <= Intersection.second);
- uint32_t Length = Intersection.second - Intersection.first;
- uint32_t SrcOffset =
+ uint64_t Length = Intersection.second - Intersection.first;
+ uint64_t SrcOffset =
AbsoluteDifference(WriteInterval.first, Intersection.first);
- uint32_t DestOffset =
+ uint64_t DestOffset =
AbsoluteDifference(CachedInterval.first, Intersection.first);
::memcpy(Alloc.data() + DestOffset, Data.data() + SrcOffset, Length);
}
@@ -370,39 +370,39 @@ WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
return createStream(Layout.SB->BlockSize, MinLayout, MsfData, Allocator);
}
-Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+Error WritableMappedBlockStream::readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) {
return ReadInterface.readBytes(Offset, Size, Buffer);
}
Error WritableMappedBlockStream::readLongestContiguousChunk(
- uint32_t Offset, ArrayRef<uint8_t> &Buffer) {
+ uint64_t Offset, ArrayRef<uint8_t> &Buffer) {
return ReadInterface.readLongestContiguousChunk(Offset, Buffer);
}
-uint32_t WritableMappedBlockStream::getLength() {
+uint64_t WritableMappedBlockStream::getLength() {
return ReadInterface.getLength();
}
-Error WritableMappedBlockStream::writeBytes(uint32_t Offset,
+Error WritableMappedBlockStream::writeBytes(uint64_t Offset,
ArrayRef<uint8_t> Buffer) {
// Make sure we aren't trying to write beyond the end of the stream.
if (auto EC = checkOffsetForWrite(Offset, Buffer.size()))
return EC;
- uint32_t BlockNum = Offset / getBlockSize();
- uint32_t OffsetInBlock = Offset % getBlockSize();
+ uint64_t BlockNum = Offset / getBlockSize();
+ uint64_t OffsetInBlock = Offset % getBlockSize();
- uint32_t BytesLeft = Buffer.size();
- uint32_t BytesWritten = 0;
+ uint64_t BytesLeft = Buffer.size();
+ uint64_t BytesWritten = 0;
while (BytesLeft > 0) {
- uint32_t StreamBlockAddr = getStreamLayout().Blocks[BlockNum];
- uint32_t BytesToWriteInChunk =
+ uint64_t StreamBlockAddr = getStreamLayout().Blocks[BlockNum];
+ uint64_t BytesToWriteInChunk =
std::min(BytesLeft, getBlockSize() - OffsetInBlock);
const uint8_t *Chunk = Buffer.data() + BytesWritten;
ArrayRef<uint8_t> ChunkData(Chunk, BytesToWriteInChunk);
- uint32_t MsfOffset = blockToOffset(StreamBlockAddr, getBlockSize());
+ uint64_t MsfOffset = blockToOffset(StreamBlockAddr, getBlockSize());
MsfOffset += OffsetInBlock;
if (auto EC = WriteInterface.writeBytes(MsfOffset, ChunkData))
return EC;
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 98a8acaffd60..0584966a98c5 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -334,8 +334,6 @@ static uint16_t toSecMapFlags(uint32_t Flags) {
Ret |= static_cast<uint16_t>(OMFSegDescFlags::Write);
if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute);
- if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
- Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute);
if (!(Flags & COFF::IMAGE_SCN_MEM_16BIT))
Ret |= static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit);
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
index 7a258acbd7c0..5e6412275063 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -17,8 +17,8 @@ namespace pdb {
namespace {
-Expected<std::string> readStreamData(BinaryStream &Stream, uint32_t Limit) {
- uint32_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
+Expected<std::string> readStreamData(BinaryStream &Stream, uint64_t Limit) {
+ uint64_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
std::string Result;
Result.reserve(DataLength);
while (Offset < DataLength) {
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index a508f163a2d8..f33125474e3a 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -103,7 +103,7 @@ void PDBFileBuilder::addInjectedSource(StringRef Name,
// table and the hash value is dependent on the exact contents of the string.
// link.exe lowercases a path and converts / to \, so we must do the same.
SmallString<64> VName;
- sys::path::native(Name.lower(), VName);
+ sys::path::native(Name.lower(), VName, sys::path::Style::windows_backslash);
uint32_t NI = getStringTableBuilder().insert(Name);
uint32_t VNI = getStringTableBuilder().insert(VName);
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 72ca72230507..f3f09584fdc9 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -280,10 +280,7 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
return false;
for (const SectionRef &Section : Obj->sections()) {
StringRef Name;
- if (Expected<StringRef> NameOrErr = Section.getName())
- Name = *NameOrErr;
- else
- consumeError(NameOrErr.takeError());
+ consumeError(Section.getName().moveInto(Name));
Name = Name.substr(Name.find_first_not_of("._"));
if (Name == "gnu_debuglink") {
@@ -600,7 +597,9 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
}
}
if (!Context)
- Context = DWARFContext::create(*Objects.second, nullptr, Opts.DWPName);
+ Context = DWARFContext::create(
+ *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
+ nullptr, Opts.DWPName);
return createModuleInfo(Objects.first, std::move(Context), ModuleName);
}
@@ -650,18 +649,9 @@ StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
std::string
LLVMSymbolizer::DemangleName(const std::string &Name,
const SymbolizableModule *DbiModuleDescriptor) {
- // We can spoil names of symbols with C linkage, so use an heuristic
- // approach to check if the name should be demangled.
- if (Name.substr(0, 2) == "_Z") {
- int status = 0;
- char *DemangledName =
- itaniumDemangle(Name.c_str(), nullptr, nullptr, &status);
- if (status != 0)
- return Name;
- std::string Result = DemangledName;
- free(DemangledName);
+ std::string Result;
+ if (nonMicrosoftDemangle(Name.c_str(), Result))
return Result;
- }
if (!Name.empty() && Name.front() == '?') {
// Only do MSVC C++ demangling on symbols starting with '?'.
@@ -672,7 +662,7 @@ LLVMSymbolizer::DemangleName(const std::string &Name,
MSDF_NoMemberType | MSDF_NoReturnType));
if (status != 0)
return Name;
- std::string Result = DemangledName;
+ Result = DemangledName;
free(DemangledName);
return Result;
}
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
new file mode 100644
index 000000000000..d2f1bf4323ee
--- /dev/null
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -0,0 +1,45 @@
+//===--- DLangDemangle.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines a demangler for the D programming language as specified
+/// in the ABI specification, available at:
+/// https://dlang.org/spec/abi.html#name_mangling
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/Utility.h"
+
+#include <cstring>
+
+using namespace llvm;
+using llvm::itanium_demangle::OutputBuffer;
+
+char *llvm::dlangDemangle(const char *MangledName) {
+ if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
+ return nullptr;
+
+ OutputBuffer Demangled;
+ if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
+ return nullptr;
+
+ if (strcmp(MangledName, "_Dmain") == 0)
+ Demangled << "D main";
+
+ // OutputBuffer's internal buffer is not null terminated and therefore we need
+ // to add it to comply with C null terminated strings.
+ if (Demangled.getCurrentPosition() > 0) {
+ Demangled << '\0';
+ Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
+ return Demangled.getBuffer();
+ }
+
+ free(Demangled.getBuffer());
+ return nullptr;
+}
diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp
index 1851fb77b09e..13aa2864c183 100644
--- a/llvm/lib/Demangle/Demangle.cpp
+++ b/llvm/lib/Demangle/Demangle.cpp
@@ -12,32 +12,53 @@
#include "llvm/Demangle/Demangle.h"
#include <cstdlib>
+#include <cstring>
-static bool isItaniumEncoding(const std::string &MangledName) {
- size_t Pos = MangledName.find_first_not_of('_');
- // A valid Itanium encoding requires 1-4 leading underscores, followed by 'Z'.
- return Pos > 0 && Pos <= 4 && MangledName[Pos] == 'Z';
+static bool isItaniumEncoding(const char *S) {
+ // Itanium encoding requires 1 or 3 leading underscores, followed by 'Z'.
+ return std::strncmp(S, "_Z", 2) == 0 || std::strncmp(S, "___Z", 4) == 0;
}
-static bool isRustEncoding(const std::string &MangledName) {
+static bool isRustEncoding(const char *S) { return S[0] == '_' && S[1] == 'R'; }
+
+static bool isDLangEncoding(const std::string &MangledName) {
return MangledName.size() >= 2 && MangledName[0] == '_' &&
- MangledName[1] == 'R';
+ MangledName[1] == 'D';
}
std::string llvm::demangle(const std::string &MangledName) {
- char *Demangled;
+ std::string Result;
+ const char *S = MangledName.c_str();
+
+ if (nonMicrosoftDemangle(S, Result))
+ return Result;
+
+ if (S[0] == '_' && nonMicrosoftDemangle(S + 1, Result))
+ return Result;
+
+ if (char *Demangled =
+ microsoftDemangle(S, nullptr, nullptr, nullptr, nullptr)) {
+ Result = Demangled;
+ std::free(Demangled);
+ return Result;
+ }
+
+ return MangledName;
+}
+
+bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) {
+ char *Demangled = nullptr;
if (isItaniumEncoding(MangledName))
- Demangled = itaniumDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
+ Demangled = itaniumDemangle(MangledName, nullptr, nullptr, nullptr);
else if (isRustEncoding(MangledName))
- Demangled = rustDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
- else
- Demangled = microsoftDemangle(MangledName.c_str(), nullptr, nullptr,
- nullptr, nullptr);
+ Demangled = rustDemangle(MangledName, nullptr, nullptr, nullptr);
+ else if (isDLangEncoding(MangledName))
+ Demangled = dlangDemangle(MangledName);
if (!Demangled)
- return MangledName;
+ return false;
- std::string Ret = Demangled;
+ Result = Demangled;
std::free(Demangled);
- return Ret;
+ return true;
}
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index fad9b6b7b63b..3f68f76761ce 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -333,21 +333,21 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
int InternalStatus = demangle_success;
Demangler Parser(MangledName, MangledName + std::strlen(MangledName));
- OutputStream S;
+ OutputBuffer OB;
Node *AST = Parser.parse();
if (AST == nullptr)
InternalStatus = demangle_invalid_mangled_name;
- else if (!initializeOutputStream(Buf, N, S, 1024))
+ else if (!initializeOutputBuffer(Buf, N, OB, 1024))
InternalStatus = demangle_memory_alloc_failure;
else {
assert(Parser.ForwardTemplateRefs.empty());
- AST->print(S);
- S += '\0';
+ AST->print(OB);
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- Buf = S.getBuffer();
+ *N = OB.getCurrentPosition();
+ Buf = OB.getBuffer();
}
if (Status)
@@ -385,14 +385,14 @@ bool ItaniumPartialDemangler::partialDemangle(const char *MangledName) {
}
static char *printNode(const Node *RootNode, char *Buf, size_t *N) {
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
- RootNode->print(S);
- S += '\0';
+ RootNode->print(OB);
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::getFunctionBaseName(char *Buf, size_t *N) const {
@@ -430,8 +430,8 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
return nullptr;
const Node *Name = static_cast<const FunctionEncoding *>(RootNode)->getName();
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
KeepGoingLocalFunction:
@@ -449,25 +449,25 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
switch (Name->getKind()) {
case Node::KStdQualifiedName:
- S += "std";
+ OB += "std";
break;
case Node::KNestedName:
- static_cast<const NestedName *>(Name)->Qual->print(S);
+ static_cast<const NestedName *>(Name)->Qual->print(OB);
break;
case Node::KLocalName: {
auto *LN = static_cast<const LocalName *>(Name);
- LN->Encoding->print(S);
- S += "::";
+ LN->Encoding->print(OB);
+ OB += "::";
Name = LN->Entity;
goto KeepGoingLocalFunction;
}
default:
break;
}
- S += '\0';
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::getFunctionName(char *Buf, size_t *N) const {
@@ -483,17 +483,17 @@ char *ItaniumPartialDemangler::getFunctionParameters(char *Buf,
return nullptr;
NodeArray Params = static_cast<FunctionEncoding *>(RootNode)->getParams();
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
- S += '(';
- Params.printWithComma(S);
- S += ')';
- S += '\0';
+ OB += '(';
+ Params.printWithComma(OB);
+ OB += ')';
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::getFunctionReturnType(
@@ -501,18 +501,18 @@ char *ItaniumPartialDemangler::getFunctionReturnType(
if (!isFunction())
return nullptr;
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
if (const Node *Ret =
static_cast<const FunctionEncoding *>(RootNode)->getReturnType())
- Ret->print(S);
+ Ret->print(OB);
- S += '\0';
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::finishDemangle(char *Buf, size_t *N) const {
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 303207176be7..d8da3b48e25b 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -965,13 +965,13 @@ NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
// Render this class template name into a string buffer so that we can
// memorize it for the purpose of back-referencing.
- OutputStream OS;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
// FIXME: Propagate out-of-memory as an error?
std::terminate();
- Identifier->output(OS, OF_Default);
- OS << '\0';
- char *Name = OS.getBuffer();
+ Identifier->output(OB, OF_Default);
+ OB << '\0';
+ char *Name = OB.getBuffer();
StringView Owned = copyString(Name);
memorizeString(Owned);
@@ -1107,7 +1107,7 @@ static void writeHexDigit(char *Buffer, uint8_t Digit) {
*Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
}
-static void outputHex(OutputStream &OS, unsigned C) {
+static void outputHex(OutputBuffer &OB, unsigned C) {
assert (C != 0);
// It's easier to do the math if we can work from right to left, but we need
@@ -1130,43 +1130,43 @@ static void outputHex(OutputStream &OS, unsigned C) {
TempBuffer[Pos--] = 'x';
assert(Pos >= 0);
TempBuffer[Pos--] = '\\';
- OS << StringView(&TempBuffer[Pos + 1]);
+ OB << StringView(&TempBuffer[Pos + 1]);
}
-static void outputEscapedChar(OutputStream &OS, unsigned C) {
+static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
switch (C) {
case '\0': // nul
- OS << "\\0";
+ OB << "\\0";
return;
case '\'': // single quote
- OS << "\\\'";
+ OB << "\\\'";
return;
case '\"': // double quote
- OS << "\\\"";
+ OB << "\\\"";
return;
case '\\': // backslash
- OS << "\\\\";
+ OB << "\\\\";
return;
case '\a': // bell
- OS << "\\a";
+ OB << "\\a";
return;
case '\b': // backspace
- OS << "\\b";
+ OB << "\\b";
return;
case '\f': // form feed
- OS << "\\f";
+ OB << "\\f";
return;
case '\n': // new line
- OS << "\\n";
+ OB << "\\n";
return;
case '\r': // carriage return
- OS << "\\r";
+ OB << "\\r";
return;
case '\t': // tab
- OS << "\\t";
+ OB << "\\t";
return;
case '\v': // vertical tab
- OS << "\\v";
+ OB << "\\v";
return;
default:
break;
@@ -1174,11 +1174,11 @@ static void outputEscapedChar(OutputStream &OS, unsigned C) {
if (C > 0x1F && C < 0x7F) {
// Standard ascii char.
- OS << (char)C;
+ OB << (char)C;
return;
}
- outputHex(OS, C);
+ outputHex(OB, C);
}
static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
@@ -1273,7 +1273,7 @@ FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
EncodedStringLiteralNode *
Demangler::demangleStringLiteral(StringView &MangledName) {
// This function uses goto, so declare all variables up front.
- OutputStream OS;
+ OutputBuffer OB;
StringView CRC;
uint64_t StringByteSize;
bool IsWcharT = false;
@@ -1284,7 +1284,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
// Must happen before the first `goto StringLiteralError`.
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
// FIXME: Propagate out-of-memory as an error?
std::terminate();
@@ -1329,7 +1329,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
goto StringLiteralError;
wchar_t W = demangleWcharLiteral(MangledName);
if (StringByteSize != 2 || Result->IsTruncated)
- outputEscapedChar(OS, W);
+ outputEscapedChar(OB, W);
StringByteSize -= 2;
if (Error)
goto StringLiteralError;
@@ -1371,19 +1371,19 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
unsigned NextChar =
decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
if (CharIndex + 1 < NumChars || Result->IsTruncated)
- outputEscapedChar(OS, NextChar);
+ outputEscapedChar(OB, NextChar);
}
}
- OS << '\0';
- ResultBuffer = OS.getBuffer();
+ OB << '\0';
+ ResultBuffer = OB.getBuffer();
Result->DecodedString = copyString(ResultBuffer);
std::free(ResultBuffer);
return Result;
StringLiteralError:
Error = true;
- std::free(OS.getBuffer());
+ std::free(OB.getBuffer());
return nullptr;
}
@@ -1447,16 +1447,16 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
return nullptr;
// Render the parent symbol's name into a buffer.
- OutputStream OS;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
// FIXME: Propagate out-of-memory as an error?
std::terminate();
- OS << '`';
- Scope->output(OS, OF_Default);
- OS << '\'';
- OS << "::`" << Number << "'";
- OS << '\0';
- char *Result = OS.getBuffer();
+ OB << '`';
+ Scope->output(OB, OF_Default);
+ OB << '\'';
+ OB << "::`" << Number << "'";
+ OB << '\0';
+ char *Result = OB.getBuffer();
Identifier->Name = copyString(Result);
std::free(Result);
return Identifier;
@@ -2313,19 +2313,19 @@ void Demangler::dumpBackReferences() {
(int)Backrefs.FunctionParamCount);
// Create an output stream so we can render each type.
- OutputStream OS;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
std::terminate();
for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
- OS.setCurrentPosition(0);
+ OB.setCurrentPosition(0);
TypeNode *T = Backrefs.FunctionParams[I];
- T->output(OS, OF_Default);
+ T->output(OB, OF_Default);
- std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(),
- OS.getBuffer());
+ std::printf(" [%d] - %.*s\n", (int)I, (int)OB.getCurrentPosition(),
+ OB.getBuffer());
}
- std::free(OS.getBuffer());
+ std::free(OB.getBuffer());
if (Backrefs.FunctionParamCount > 0)
std::printf("\n");
@@ -2342,7 +2342,7 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
char *Buf, size_t *N,
int *Status, MSDemangleFlags Flags) {
Demangler D;
- OutputStream S;
+ OutputBuffer OB;
StringView Name{MangledName};
SymbolNode *AST = D.parse(Name);
@@ -2361,18 +2361,20 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
OF = OutputFlags(OF | OF_NoReturnType);
if (Flags & MSDF_NoMemberType)
OF = OutputFlags(OF | OF_NoMemberType);
+ if (Flags & MSDF_NoVariableType)
+ OF = OutputFlags(OF | OF_NoVariableType);
int InternalStatus = demangle_success;
if (D.Error)
InternalStatus = demangle_invalid_mangled_name;
- else if (!initializeOutputStream(Buf, N, S, 1024))
+ else if (!initializeOutputBuffer(Buf, N, OB, 1024))
InternalStatus = demangle_memory_alloc_failure;
else {
- AST->output(S, OF);
- S += '\0';
+ AST->output(OB, OF);
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- Buf = S.getBuffer();
+ *N = OB.getCurrentPosition();
+ Buf = OB.getBuffer();
}
if (Status)
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 9fe157bf0d2a..32d8dff66c3f 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -21,97 +21,97 @@ using namespace ms_demangle;
#define OUTPUT_ENUM_CLASS_VALUE(Enum, Value, Desc) \
case Enum::Value: \
- OS << Desc; \
+ OB << Desc; \
break;
// Writes a space if the last token does not end with a punctuation.
-static void outputSpaceIfNecessary(OutputStream &OS) {
- if (OS.empty())
+static void outputSpaceIfNecessary(OutputBuffer &OB) {
+ if (OB.empty())
return;
- char C = OS.back();
+ char C = OB.back();
if (std::isalnum(C) || C == '>')
- OS << " ";
+ OB << " ";
}
-static void outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+static void outputSingleQualifier(OutputBuffer &OB, Qualifiers Q) {
switch (Q) {
case Q_Const:
- OS << "const";
+ OB << "const";
break;
case Q_Volatile:
- OS << "volatile";
+ OB << "volatile";
break;
case Q_Restrict:
- OS << "__restrict";
+ OB << "__restrict";
break;
default:
break;
}
}
-static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
+static bool outputQualifierIfPresent(OutputBuffer &OB, Qualifiers Q,
Qualifiers Mask, bool NeedSpace) {
if (!(Q & Mask))
return NeedSpace;
if (NeedSpace)
- OS << " ";
+ OB << " ";
- outputSingleQualifier(OS, Mask);
+ outputSingleQualifier(OB, Mask);
return true;
}
-static void outputQualifiers(OutputStream &OS, Qualifiers Q, bool SpaceBefore,
+static void outputQualifiers(OutputBuffer &OB, Qualifiers Q, bool SpaceBefore,
bool SpaceAfter) {
if (Q == Q_None)
return;
- size_t Pos1 = OS.getCurrentPosition();
- SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Const, SpaceBefore);
- SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Volatile, SpaceBefore);
- SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Restrict, SpaceBefore);
- size_t Pos2 = OS.getCurrentPosition();
+ size_t Pos1 = OB.getCurrentPosition();
+ SpaceBefore = outputQualifierIfPresent(OB, Q, Q_Const, SpaceBefore);
+ SpaceBefore = outputQualifierIfPresent(OB, Q, Q_Volatile, SpaceBefore);
+ SpaceBefore = outputQualifierIfPresent(OB, Q, Q_Restrict, SpaceBefore);
+ size_t Pos2 = OB.getCurrentPosition();
if (SpaceAfter && Pos2 > Pos1)
- OS << " ";
+ OB << " ";
}
-static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
- outputSpaceIfNecessary(OS);
+static void outputCallingConvention(OutputBuffer &OB, CallingConv CC) {
+ outputSpaceIfNecessary(OB);
switch (CC) {
case CallingConv::Cdecl:
- OS << "__cdecl";
+ OB << "__cdecl";
break;
case CallingConv::Fastcall:
- OS << "__fastcall";
+ OB << "__fastcall";
break;
case CallingConv::Pascal:
- OS << "__pascal";
+ OB << "__pascal";
break;
case CallingConv::Regcall:
- OS << "__regcall";
+ OB << "__regcall";
break;
case CallingConv::Stdcall:
- OS << "__stdcall";
+ OB << "__stdcall";
break;
case CallingConv::Thiscall:
- OS << "__thiscall";
+ OB << "__thiscall";
break;
case CallingConv::Eabi:
- OS << "__eabi";
+ OB << "__eabi";
break;
case CallingConv::Vectorcall:
- OS << "__vectorcall";
+ OB << "__vectorcall";
break;
case CallingConv::Clrcall:
- OS << "__clrcall";
+ OB << "__clrcall";
break;
case CallingConv::Swift:
- OS << "__attribute__((__swiftcall__)) ";
+ OB << "__attribute__((__swiftcall__)) ";
break;
case CallingConv::SwiftAsync:
- OS << "__attribute__((__swiftasynccall__)) ";
+ OB << "__attribute__((__swiftasynccall__)) ";
break;
default:
break;
@@ -119,16 +119,16 @@ static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
}
std::string Node::toString(OutputFlags Flags) const {
- OutputStream OS;
- initializeOutputStream(nullptr, nullptr, OS, 1024);
- this->output(OS, Flags);
- OS << '\0';
- std::string Owned(OS.getBuffer());
- std::free(OS.getBuffer());
+ OutputBuffer OB;
+ initializeOutputBuffer(nullptr, nullptr, OB, 1024);
+ this->output(OB, Flags);
+ OB << '\0';
+ std::string Owned(OB.getBuffer());
+ std::free(OB.getBuffer());
return Owned;
}
-void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+void PrimitiveTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
switch (PrimKind) {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Bool, "bool");
@@ -152,107 +152,107 @@ void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ldouble, "long double");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Nullptr, "std::nullptr_t");
}
- outputQualifiers(OS, Quals, true, false);
+ outputQualifiers(OB, Quals, true, false);
}
-void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags) const {
- output(OS, Flags, ", ");
+void NodeArrayNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ output(OB, Flags, ", ");
}
-void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags,
+void NodeArrayNode::output(OutputBuffer &OB, OutputFlags Flags,
StringView Separator) const {
if (Count == 0)
return;
if (Nodes[0])
- Nodes[0]->output(OS, Flags);
+ Nodes[0]->output(OB, Flags);
for (size_t I = 1; I < Count; ++I) {
- OS << Separator;
- Nodes[I]->output(OS, Flags);
+ OB << Separator;
+ Nodes[I]->output(OB, Flags);
}
}
-void EncodedStringLiteralNode::output(OutputStream &OS,
+void EncodedStringLiteralNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
switch (Char) {
case CharKind::Wchar:
- OS << "L\"";
+ OB << "L\"";
break;
case CharKind::Char:
- OS << "\"";
+ OB << "\"";
break;
case CharKind::Char16:
- OS << "u\"";
+ OB << "u\"";
break;
case CharKind::Char32:
- OS << "U\"";
+ OB << "U\"";
break;
}
- OS << DecodedString << "\"";
+ OB << DecodedString << "\"";
if (IsTruncated)
- OS << "...";
+ OB << "...";
}
-void IntegerLiteralNode::output(OutputStream &OS, OutputFlags Flags) const {
+void IntegerLiteralNode::output(OutputBuffer &OB, OutputFlags Flags) const {
if (IsNegative)
- OS << '-';
- OS << Value;
+ OB << '-';
+ OB << Value;
}
-void TemplateParameterReferenceNode::output(OutputStream &OS,
+void TemplateParameterReferenceNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
if (ThunkOffsetCount > 0)
- OS << "{";
+ OB << "{";
else if (Affinity == PointerAffinity::Pointer)
- OS << "&";
+ OB << "&";
if (Symbol) {
- Symbol->output(OS, Flags);
+ Symbol->output(OB, Flags);
if (ThunkOffsetCount > 0)
- OS << ", ";
+ OB << ", ";
}
if (ThunkOffsetCount > 0)
- OS << ThunkOffsets[0];
+ OB << ThunkOffsets[0];
for (int I = 1; I < ThunkOffsetCount; ++I) {
- OS << ", " << ThunkOffsets[I];
+ OB << ", " << ThunkOffsets[I];
}
if (ThunkOffsetCount > 0)
- OS << "}";
+ OB << "}";
}
-void IdentifierNode::outputTemplateParameters(OutputStream &OS,
+void IdentifierNode::outputTemplateParameters(OutputBuffer &OB,
OutputFlags Flags) const {
if (!TemplateParams)
return;
- OS << "<";
- TemplateParams->output(OS, Flags);
- OS << ">";
+ OB << "<";
+ TemplateParams->output(OB, Flags);
+ OB << ">";
}
-void DynamicStructorIdentifierNode::output(OutputStream &OS,
+void DynamicStructorIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
if (IsDestructor)
- OS << "`dynamic atexit destructor for ";
+ OB << "`dynamic atexit destructor for ";
else
- OS << "`dynamic initializer for ";
+ OB << "`dynamic initializer for ";
if (Variable) {
- OS << "`";
- Variable->output(OS, Flags);
- OS << "''";
+ OB << "`";
+ Variable->output(OB, Flags);
+ OB << "''";
} else {
- OS << "'";
- Name->output(OS, Flags);
- OS << "''";
+ OB << "'";
+ Name->output(OB, Flags);
+ OB << "''";
}
}
-void NamedIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
- OS << Name;
- outputTemplateParameters(OS, Flags);
+void NamedIdentifierNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ OB << Name;
+ outputTemplateParameters(OB, Flags);
}
-void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
+void IntrinsicFunctionIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
switch (Operator) {
OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, New, "operator new");
@@ -350,188 +350,188 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
case IntrinsicFunctionKind::None:
break;
}
- outputTemplateParameters(OS, Flags);
+ outputTemplateParameters(OB, Flags);
}
-void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
+void LocalStaticGuardIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
if (IsThread)
- OS << "`local static thread guard'";
+ OB << "`local static thread guard'";
else
- OS << "`local static guard'";
+ OB << "`local static guard'";
if (ScopeIndex > 0)
- OS << "{" << ScopeIndex << "}";
+ OB << "{" << ScopeIndex << "}";
}
-void ConversionOperatorIdentifierNode::output(OutputStream &OS,
+void ConversionOperatorIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "operator";
- outputTemplateParameters(OS, Flags);
- OS << " ";
- TargetType->output(OS, Flags);
+ OB << "operator";
+ outputTemplateParameters(OB, Flags);
+ OB << " ";
+ TargetType->output(OB, Flags);
}
-void StructorIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
+void StructorIdentifierNode::output(OutputBuffer &OB, OutputFlags Flags) const {
if (IsDestructor)
- OS << "~";
- Class->output(OS, Flags);
- outputTemplateParameters(OS, Flags);
+ OB << "~";
+ Class->output(OB, Flags);
+ outputTemplateParameters(OB, Flags);
}
-void LiteralOperatorIdentifierNode::output(OutputStream &OS,
+void LiteralOperatorIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "operator \"\"" << Name;
- outputTemplateParameters(OS, Flags);
+ OB << "operator \"\"" << Name;
+ outputTemplateParameters(OB, Flags);
}
-void FunctionSignatureNode::outputPre(OutputStream &OS,
+void FunctionSignatureNode::outputPre(OutputBuffer &OB,
OutputFlags Flags) const {
if (!(Flags & OF_NoAccessSpecifier)) {
if (FunctionClass & FC_Public)
- OS << "public: ";
+ OB << "public: ";
if (FunctionClass & FC_Protected)
- OS << "protected: ";
+ OB << "protected: ";
if (FunctionClass & FC_Private)
- OS << "private: ";
+ OB << "private: ";
}
if (!(Flags & OF_NoMemberType)) {
if (!(FunctionClass & FC_Global)) {
if (FunctionClass & FC_Static)
- OS << "static ";
+ OB << "static ";
}
if (FunctionClass & FC_Virtual)
- OS << "virtual ";
+ OB << "virtual ";
if (FunctionClass & FC_ExternC)
- OS << "extern \"C\" ";
+ OB << "extern \"C\" ";
}
if (!(Flags & OF_NoReturnType) && ReturnType) {
- ReturnType->outputPre(OS, Flags);
- OS << " ";
+ ReturnType->outputPre(OB, Flags);
+ OB << " ";
}
if (!(Flags & OF_NoCallingConvention))
- outputCallingConvention(OS, CallConvention);
+ outputCallingConvention(OB, CallConvention);
}
-void FunctionSignatureNode::outputPost(OutputStream &OS,
+void FunctionSignatureNode::outputPost(OutputBuffer &OB,
OutputFlags Flags) const {
if (!(FunctionClass & FC_NoParameterList)) {
- OS << "(";
+ OB << "(";
if (Params)
- Params->output(OS, Flags);
+ Params->output(OB, Flags);
else
- OS << "void";
+ OB << "void";
if (IsVariadic) {
- if (OS.back() != '(')
- OS << ", ";
- OS << "...";
+ if (OB.back() != '(')
+ OB << ", ";
+ OB << "...";
}
- OS << ")";
+ OB << ")";
}
if (Quals & Q_Const)
- OS << " const";
+ OB << " const";
if (Quals & Q_Volatile)
- OS << " volatile";
+ OB << " volatile";
if (Quals & Q_Restrict)
- OS << " __restrict";
+ OB << " __restrict";
if (Quals & Q_Unaligned)
- OS << " __unaligned";
+ OB << " __unaligned";
if (IsNoexcept)
- OS << " noexcept";
+ OB << " noexcept";
if (RefQualifier == FunctionRefQualifier::Reference)
- OS << " &";
+ OB << " &";
else if (RefQualifier == FunctionRefQualifier::RValueReference)
- OS << " &&";
+ OB << " &&";
if (!(Flags & OF_NoReturnType) && ReturnType)
- ReturnType->outputPost(OS, Flags);
+ ReturnType->outputPost(OB, Flags);
}
-void ThunkSignatureNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
- OS << "[thunk]: ";
+void ThunkSignatureNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
+ OB << "[thunk]: ";
- FunctionSignatureNode::outputPre(OS, Flags);
+ FunctionSignatureNode::outputPre(OB, Flags);
}
-void ThunkSignatureNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+void ThunkSignatureNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {
if (FunctionClass & FC_StaticThisAdjust) {
- OS << "`adjustor{" << ThisAdjust.StaticOffset << "}'";
+ OB << "`adjustor{" << ThisAdjust.StaticOffset << "}'";
} else if (FunctionClass & FC_VirtualThisAdjust) {
if (FunctionClass & FC_VirtualThisAdjustEx) {
- OS << "`vtordispex{" << ThisAdjust.VBPtrOffset << ", "
+ OB << "`vtordispex{" << ThisAdjust.VBPtrOffset << ", "
<< ThisAdjust.VBOffsetOffset << ", " << ThisAdjust.VtordispOffset
<< ", " << ThisAdjust.StaticOffset << "}'";
} else {
- OS << "`vtordisp{" << ThisAdjust.VtordispOffset << ", "
+ OB << "`vtordisp{" << ThisAdjust.VtordispOffset << ", "
<< ThisAdjust.StaticOffset << "}'";
}
}
- FunctionSignatureNode::outputPost(OS, Flags);
+ FunctionSignatureNode::outputPost(OB, Flags);
}
-void PointerTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+void PointerTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
if (Pointee->kind() == NodeKind::FunctionSignature) {
// If this is a pointer to a function, don't output the calling convention.
// It needs to go inside the parentheses.
const FunctionSignatureNode *Sig =
static_cast<const FunctionSignatureNode *>(Pointee);
- Sig->outputPre(OS, OF_NoCallingConvention);
+ Sig->outputPre(OB, OF_NoCallingConvention);
} else
- Pointee->outputPre(OS, Flags);
+ Pointee->outputPre(OB, Flags);
- outputSpaceIfNecessary(OS);
+ outputSpaceIfNecessary(OB);
if (Quals & Q_Unaligned)
- OS << "__unaligned ";
+ OB << "__unaligned ";
if (Pointee->kind() == NodeKind::ArrayType) {
- OS << "(";
+ OB << "(";
} else if (Pointee->kind() == NodeKind::FunctionSignature) {
- OS << "(";
+ OB << "(";
const FunctionSignatureNode *Sig =
static_cast<const FunctionSignatureNode *>(Pointee);
- outputCallingConvention(OS, Sig->CallConvention);
- OS << " ";
+ outputCallingConvention(OB, Sig->CallConvention);
+ OB << " ";
}
if (ClassParent) {
- ClassParent->output(OS, Flags);
- OS << "::";
+ ClassParent->output(OB, Flags);
+ OB << "::";
}
switch (Affinity) {
case PointerAffinity::Pointer:
- OS << "*";
+ OB << "*";
break;
case PointerAffinity::Reference:
- OS << "&";
+ OB << "&";
break;
case PointerAffinity::RValueReference:
- OS << "&&";
+ OB << "&&";
break;
default:
assert(false);
}
- outputQualifiers(OS, Quals, false, false);
+ outputQualifiers(OB, Quals, false, false);
}
-void PointerTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+void PointerTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {
if (Pointee->kind() == NodeKind::ArrayType ||
Pointee->kind() == NodeKind::FunctionSignature)
- OS << ")";
+ OB << ")";
- Pointee->outputPost(OS, Flags);
+ Pointee->outputPost(OB, Flags);
}
-void TagTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+void TagTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
if (!(Flags & OF_NoTagSpecifier)) {
switch (Tag) {
OUTPUT_ENUM_CLASS_VALUE(TagKind, Class, "class");
@@ -539,59 +539,59 @@ void TagTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
OUTPUT_ENUM_CLASS_VALUE(TagKind, Union, "union");
OUTPUT_ENUM_CLASS_VALUE(TagKind, Enum, "enum");
}
- OS << " ";
+ OB << " ";
}
- QualifiedName->output(OS, Flags);
- outputQualifiers(OS, Quals, true, false);
+ QualifiedName->output(OB, Flags);
+ outputQualifiers(OB, Quals, true, false);
}
-void TagTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+void TagTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {}
-void ArrayTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
- ElementType->outputPre(OS, Flags);
- outputQualifiers(OS, Quals, true, false);
+void ArrayTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
+ ElementType->outputPre(OB, Flags);
+ outputQualifiers(OB, Quals, true, false);
}
-void ArrayTypeNode::outputOneDimension(OutputStream &OS, OutputFlags Flags,
+void ArrayTypeNode::outputOneDimension(OutputBuffer &OB, OutputFlags Flags,
Node *N) const {
assert(N->kind() == NodeKind::IntegerLiteral);
IntegerLiteralNode *ILN = static_cast<IntegerLiteralNode *>(N);
if (ILN->Value != 0)
- ILN->output(OS, Flags);
+ ILN->output(OB, Flags);
}
-void ArrayTypeNode::outputDimensionsImpl(OutputStream &OS,
+void ArrayTypeNode::outputDimensionsImpl(OutputBuffer &OB,
OutputFlags Flags) const {
if (Dimensions->Count == 0)
return;
- outputOneDimension(OS, Flags, Dimensions->Nodes[0]);
+ outputOneDimension(OB, Flags, Dimensions->Nodes[0]);
for (size_t I = 1; I < Dimensions->Count; ++I) {
- OS << "][";
- outputOneDimension(OS, Flags, Dimensions->Nodes[I]);
+ OB << "][";
+ outputOneDimension(OB, Flags, Dimensions->Nodes[I]);
}
}
-void ArrayTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
- OS << "[";
- outputDimensionsImpl(OS, Flags);
- OS << "]";
+void ArrayTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {
+ OB << "[";
+ outputDimensionsImpl(OB, Flags);
+ OB << "]";
- ElementType->outputPost(OS, Flags);
+ ElementType->outputPost(OB, Flags);
}
-void SymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
- Name->output(OS, Flags);
+void SymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ Name->output(OB, Flags);
}
-void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
- Signature->outputPre(OS, Flags);
- outputSpaceIfNecessary(OS);
- Name->output(OS, Flags);
- Signature->outputPost(OS, Flags);
+void FunctionSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ Signature->outputPre(OB, Flags);
+ outputSpaceIfNecessary(OB);
+ Name->output(OB, Flags);
+ Signature->outputPost(OB, Flags);
}
-void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+void VariableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
const char *AccessSpec = nullptr;
bool IsStatic = true;
switch (SC) {
@@ -609,52 +609,52 @@ void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
break;
}
if (!(Flags & OF_NoAccessSpecifier) && AccessSpec)
- OS << AccessSpec << ": ";
+ OB << AccessSpec << ": ";
if (!(Flags & OF_NoMemberType) && IsStatic)
- OS << "static ";
+ OB << "static ";
- if (Type) {
- Type->outputPre(OS, Flags);
- outputSpaceIfNecessary(OS);
+ if (!(Flags & OF_NoVariableType) && Type) {
+ Type->outputPre(OB, Flags);
+ outputSpaceIfNecessary(OB);
}
- Name->output(OS, Flags);
- if (Type)
- Type->outputPost(OS, Flags);
+ Name->output(OB, Flags);
+ if (!(Flags & OF_NoVariableType) && Type)
+ Type->outputPost(OB, Flags);
}
-void CustomTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
- Identifier->output(OS, Flags);
+void CustomTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
+ Identifier->output(OB, Flags);
}
-void CustomTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+void CustomTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {}
-void QualifiedNameNode::output(OutputStream &OS, OutputFlags Flags) const {
- Components->output(OS, Flags, "::");
+void QualifiedNameNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ Components->output(OB, Flags, "::");
}
-void RttiBaseClassDescriptorNode::output(OutputStream &OS,
+void RttiBaseClassDescriptorNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "`RTTI Base Class Descriptor at (";
- OS << NVOffset << ", " << VBPtrOffset << ", " << VBTableOffset << ", "
+ OB << "`RTTI Base Class Descriptor at (";
+ OB << NVOffset << ", " << VBPtrOffset << ", " << VBTableOffset << ", "
<< this->Flags;
- OS << ")'";
+ OB << ")'";
}
-void LocalStaticGuardVariableNode::output(OutputStream &OS,
+void LocalStaticGuardVariableNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- Name->output(OS, Flags);
+ Name->output(OB, Flags);
}
-void VcallThunkIdentifierNode::output(OutputStream &OS,
+void VcallThunkIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "`vcall'{" << OffsetInVTable << ", {flat}}";
+ OB << "`vcall'{" << OffsetInVTable << ", {flat}}";
}
-void SpecialTableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
- outputQualifiers(OS, Quals, false, true);
- Name->output(OS, Flags);
+void SpecialTableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ outputQualifiers(OB, Quals, false, true);
+ Name->output(OB, Flags);
if (TargetName) {
- OS << "{for `";
- TargetName->output(OS, Flags);
- OS << "'}";
+ OB << "{for `";
+ TargetName->output(OB, Flags);
+ OB << "'}";
}
}
diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
index f916300835ce..dcac0bd63859 100644
--- a/llvm/lib/Demangle/RustDemangle.cpp
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -23,7 +23,7 @@
using namespace llvm;
-using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::OutputBuffer;
using llvm::itanium_demangle::StringView;
using llvm::itanium_demangle::SwapAndRestore;
@@ -88,7 +88,7 @@ class Demangler {
public:
// Demangled output.
- OutputStream Output;
+ OutputBuffer Output;
Demangler(size_t MaxRecursionLevel = 500);
@@ -135,6 +135,7 @@ private:
void printDecimalNumber(uint64_t N);
void printBasicType(BasicType);
void printLifetime(uint64_t Index);
+ void printIdentifier(Identifier Ident);
char look() const;
char consume();
@@ -163,7 +164,7 @@ char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
}
Demangler D;
- if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
+ if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
if (Status != nullptr)
*Status = demangle_memory_alloc_failure;
return nullptr;
@@ -283,8 +284,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
switch (consume()) {
case 'C': {
parseOptionalBase62Number('s');
- Identifier Ident = parseIdentifier();
- print(Ident.Name);
+ printIdentifier(parseIdentifier());
break;
}
case 'M': {
@@ -333,7 +333,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
print(NS);
if (!Ident.empty()) {
print(":");
- print(Ident.Name);
+ printIdentifier(Ident);
}
print('#');
printDecimalNumber(Disambiguator);
@@ -342,7 +342,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
// Implementation internal namespaces.
if (!Ident.empty()) {
print("::");
- print(Ident.Name);
+ printIdentifier(Ident);
}
}
break;
@@ -669,6 +669,8 @@ void Demangler::demangleFnSig() {
print("C");
} else {
Identifier Ident = parseIdentifier();
+ if (Ident.Punycode)
+ Error = true;
for (char C : Ident.Name) {
// When mangling ABI string, the "-" is replaced with "_".
if (C == '_')
@@ -1078,6 +1080,172 @@ void Demangler::printLifetime(uint64_t Index) {
}
}
+static inline bool decodePunycodeDigit(char C, size_t &Value) {
+ if (isLower(C)) {
+ Value = C - 'a';
+ return true;
+ }
+
+ if (isDigit(C)) {
+ Value = 26 + (C - '0');
+ return true;
+ }
+
+ return false;
+}
+
+static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
+ char *Buffer = Output.getBuffer();
+ char *Start = Buffer + StartIdx;
+ char *End = Buffer + Output.getCurrentPosition();
+ Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
+}
+
+// Encodes code point as UTF-8 and stores results in Output. Returns false if
+// CodePoint is not a valid unicode scalar value.
+static inline bool encodeUTF8(size_t CodePoint, char *Output) {
+ if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
+ return false;
+
+ if (CodePoint <= 0x7F) {
+ Output[0] = CodePoint;
+ return true;
+ }
+
+ if (CodePoint <= 0x7FF) {
+ Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
+ Output[1] = 0x80 | (CodePoint & 0x3F);
+ return true;
+ }
+
+ if (CodePoint <= 0xFFFF) {
+ Output[0] = 0xE0 | (CodePoint >> 12);
+ Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
+ Output[2] = 0x80 | (CodePoint & 0x3F);
+ return true;
+ }
+
+ if (CodePoint <= 0x10FFFF) {
+ Output[0] = 0xF0 | (CodePoint >> 18);
+ Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
+ Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
+ Output[3] = 0x80 | (CodePoint & 0x3F);
+ return true;
+ }
+
+ return false;
+}
+
+// Decodes string encoded using punycode and appends results to Output.
+// Returns true if decoding was successful.
+static bool decodePunycode(StringView Input, OutputBuffer &Output) {
+ size_t OutputSize = Output.getCurrentPosition();
+ size_t InputIdx = 0;
+
+ // Rust uses an underscore as a delimiter.
+ size_t DelimiterPos = StringView::npos;
+ for (size_t I = 0; I != Input.size(); ++I)
+ if (Input[I] == '_')
+ DelimiterPos = I;
+
+ if (DelimiterPos != StringView::npos) {
+ // Copy basic code points before the last delimiter to the output.
+ for (; InputIdx != DelimiterPos; ++InputIdx) {
+ char C = Input[InputIdx];
+ if (!isValid(C))
+ return false;
+ // Code points are padded with zeros while decoding is in progress.
+ char UTF8[4] = {C};
+ Output += StringView(UTF8, UTF8 + 4);
+ }
+ // Skip over the delimiter.
+ ++InputIdx;
+ }
+
+ size_t Base = 36;
+ size_t Skew = 38;
+ size_t Bias = 72;
+ size_t N = 0x80;
+ size_t TMin = 1;
+ size_t TMax = 26;
+ size_t Damp = 700;
+
+ auto Adapt = [&](size_t Delta, size_t NumPoints) {
+ Delta /= Damp;
+ Delta += Delta / NumPoints;
+ Damp = 2;
+
+ size_t K = 0;
+ while (Delta > (Base - TMin) * TMax / 2) {
+ Delta /= Base - TMin;
+ K += Base;
+ }
+ return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
+ };
+
+ // Main decoding loop.
+ for (size_t I = 0; InputIdx != Input.size(); I += 1) {
+ size_t OldI = I;
+ size_t W = 1;
+ size_t Max = std::numeric_limits<size_t>::max();
+ for (size_t K = Base; true; K += Base) {
+ if (InputIdx == Input.size())
+ return false;
+ char C = Input[InputIdx++];
+ size_t Digit = 0;
+ if (!decodePunycodeDigit(C, Digit))
+ return false;
+
+ if (Digit > (Max - I) / W)
+ return false;
+ I += Digit * W;
+
+ size_t T;
+ if (K <= Bias)
+ T = TMin;
+ else if (K >= Bias + TMax)
+ T = TMax;
+ else
+ T = K - Bias;
+
+ if (Digit < T)
+ break;
+
+ if (W > Max / (Base - T))
+ return false;
+ W *= (Base - T);
+ }
+ size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
+ Bias = Adapt(I - OldI, NumPoints);
+
+ if (I / NumPoints > Max - N)
+ return false;
+ N += I / NumPoints;
+ I = I % NumPoints;
+
+ // Insert N at position I in the output.
+ char UTF8[4] = {};
+ if (!encodeUTF8(N, UTF8))
+ return false;
+ Output.insert(OutputSize + I * 4, UTF8, 4);
+ }
+
+ removeNullBytes(Output, OutputSize);
+ return true;
+}
+
+void Demangler::printIdentifier(Identifier Ident) {
+ if (Error || !Print)
+ return;
+
+ if (Ident.Punycode) {
+ if (!decodePunycode(Ident.Name, Output))
+ Error = true;
+ } else {
+ print(Ident.Name);
+ }
+}
+
char Demangler::look() const {
if (Error || Position >= Input.size())
return 0;
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index c8bbf0bcdfda..fe3c433bd2c5 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -28,13 +28,13 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index addec6871fa1..672fd7b991c2 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -188,8 +188,7 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
for (auto &F : *Mod) {
auto Attrs = F.getAttributes();
StringRef Value = options.NoFramePointerElim ? "all" : "none";
- Attrs = Attrs.addAttribute(F.getContext(), AttributeList::FunctionIndex,
- "frame-pointer", Value);
+ Attrs = Attrs.addFnAttribute(F.getContext(), "frame-pointer", Value);
F.setAttributes(Attrs);
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index c85e80b52e5a..4d7d5ce26668 100644
--- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -1,9 +1,8 @@
//===-------- JITLink_EHFrameSupport.cpp - JITLink eh-frame utils ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -300,7 +299,7 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
return Err;
- // Read and sanity check the code alignment factor.
+ // Read and validate the code alignment factor.
{
uint64_t CodeAlignmentFactor = 0;
if (auto Err = RecordReader.readULEB128(CodeAlignmentFactor))
@@ -311,7 +310,7 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
" (expected 1)");
}
- // Read and sanity check the data alignment factor.
+ // Read and validate the data alignment factor.
{
int64_t DataAlignmentFactor = 0;
if (auto Err = RecordReader.readSLEB128(DataAlignmentFactor))
@@ -665,7 +664,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding,
EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4;
JITTargetAddress Addr;
- Edge::Kind PointerEdgeKind;
+ Edge::Kind PointerEdgeKind = Edge::Invalid;
switch (EffectiveType) {
case DW_EH_PE_udata4: {
uint32_t Val;
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
index 252e44fe4a74..eb98e4ba4041 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
@@ -1,9 +1,8 @@
//===-------------- ELF.cpp - JIT linker function for ELF -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "llvm/ExecutionEngine/JITLink/ELF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/Object/ELF.h"
@@ -65,6 +65,8 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
return TargetMachineArch.takeError();
switch (*TargetMachineArch) {
+ case ELF::EM_AARCH64:
+ return createLinkGraphFromELFObject_aarch64(ObjectBuffer);
case ELF::EM_RISCV:
return createLinkGraphFromELFObject_riscv(ObjectBuffer);
case ELF::EM_X86_64:
@@ -79,6 +81,9 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
void link_ELF(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx) {
switch (G->getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ link_ELF_aarch64(std::move(G), std::move(Ctx));
+ return;
case Triple::riscv32:
case Triple::riscv64:
link_ELF_riscv(std::move(G), std::move(Ctx));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index 2b2a1a8db4c1..fdc987751286 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -36,11 +36,9 @@ protected:
}
Section &getCommonSection() {
- if (!CommonSection) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_WRITE);
- CommonSection = &G->createSection(CommonSectionName, Prot);
- }
+ if (!CommonSection)
+ CommonSection =
+ &G->createSection(CommonSectionName, MemProt::Read | MemProt::Write);
return *CommonSection;
}
@@ -110,6 +108,31 @@ protected:
Error graphifySections();
Error graphifySymbols();
+ /// Traverse all matching relocation records in the given section. The handler
+ /// function Func should be callable with this signature:
+ /// Error(const typename ELFT::Rela &,
+ /// const typename ELFT::Shdr &, Section &)
+ ///
+ template <typename RelocHandlerFunction>
+ Error forEachRelocation(const typename ELFT::Shdr &RelSect,
+ RelocHandlerFunction &&Func,
+ bool ProcessDebugSections = false);
+
+ /// Traverse all matching relocation records in the given section. Convenience
+ /// wrapper to allow passing a member function for the handler.
+ ///
+ template <typename ClassT, typename RelocHandlerMethod>
+ Error forEachRelocation(const typename ELFT::Shdr &RelSect, ClassT *Instance,
+ RelocHandlerMethod &&Method,
+ bool ProcessDebugSections = false) {
+ return forEachRelocation(
+ RelSect,
+ [Instance, Method](const auto &Rel, const auto &Target, auto &GS) {
+ return (Instance->*Method)(Rel, Target, GS);
+ },
+ ProcessDebugSections);
+ }
+
const ELFFile &Obj;
typename ELFFile::Elf_Shdr_Range Sections;
@@ -170,11 +193,14 @@ ELFLinkGraphBuilder<ELFT>::getSymbolLinkageAndScope(
// Nothing to do here.
break;
case ELF::STB_WEAK:
+ case ELF::STB_GNU_UNIQUE:
L = Linkage::Weak;
break;
default:
- return make_error<StringError>("Unrecognized symbol binding for " + Name,
- inconvertibleErrorCode());
+ return make_error<StringError>(
+ "Unrecognized symbol binding " +
+ Twine(static_cast<int>(Sym.getBinding())) + " for " + Name,
+ inconvertibleErrorCode());
}
switch (Sym.getVisibility()) {
@@ -190,8 +216,10 @@ ELFLinkGraphBuilder<ELFT>::getSymbolLinkageAndScope(
S = Scope::Hidden;
break;
case ELF::STV_INTERNAL:
- return make_error<StringError>("Unrecognized symbol visibility for " + Name,
- inconvertibleErrorCode());
+ return make_error<StringError>(
+ "Unrecognized symbol visibility " +
+ Twine(static_cast<int>(Sym.getVisibility())) + " for " + Name,
+ inconvertibleErrorCode());
}
return std::make_pair(L, S);
@@ -265,13 +293,11 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
});
// Get the section's memory protection flags.
- sys::Memory::ProtectionFlags Prot;
+ MemProt Prot;
if (Sec.sh_flags & ELF::SHF_EXECINSTR)
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
+ Prot = MemProt::Read | MemProt::Exec;
else
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
+ Prot = MemProt::Read | MemProt::Write;
// For now we just use this to skip the "undefined" section, probably need
// to revist.
@@ -374,7 +400,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
if (Sym.isDefined() &&
(Sym.getType() == ELF::STT_NOTYPE || Sym.getType() == ELF::STT_FUNC ||
Sym.getType() == ELF::STT_OBJECT ||
- Sym.getType() == ELF::STT_SECTION)) {
+ Sym.getType() == ELF::STT_SECTION || Sym.getType() == ELF::STT_TLS)) {
// FIXME: Handle extended tables.
if (auto *GraphSec = getGraphSection(Sym.st_shndx)) {
@@ -421,6 +447,54 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
return Error::success();
}
+template <typename ELFT>
+template <typename RelocHandlerFunction>
+Error ELFLinkGraphBuilder<ELFT>::forEachRelocation(
+ const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func,
+ bool ProcessDebugSections) {
+
+ // Only look into sections that store relocation entries.
+ if (RelSect.sh_type != ELF::SHT_RELA && RelSect.sh_type != ELF::SHT_REL)
+ return Error::success();
+
+ // sh_info contains the section header index of the target (FixupSection),
+ // which is the section to which all relocations in RelSect apply.
+ auto FixupSection = Obj.getSection(RelSect.sh_info);
+ if (!FixupSection)
+ return FixupSection.takeError();
+
+ // Target sections have names in valid ELF object files.
+ Expected<StringRef> Name = Obj.getSectionName(**FixupSection);
+ if (!Name)
+ return Name.takeError();
+ LLVM_DEBUG(dbgs() << " " << *Name << ":\n");
+
+ // Consider skipping these relocations.
+ if (!ProcessDebugSections && isDwarfSection(*Name)) {
+ LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n");
+ return Error::success();
+ }
+
+ // Lookup the link-graph node corresponding to the target section name.
+ Section *GraphSect = G->findSectionByName(*Name);
+ if (!GraphSect)
+ return make_error<StringError>(
+ "Refencing a section that wasn't added to the graph: " + *Name,
+ inconvertibleErrorCode());
+
+ auto RelEntries = Obj.relas(RelSect);
+ if (!RelEntries)
+ return RelEntries.takeError();
+
+ // Let the callee process relocation entries one by one.
+ for (const typename ELFT::Rela &R : *RelEntries)
+ if (Error Err = Func(R, **FixupSection, *GraphSect))
+ return Err;
+
+ LLVM_DEBUG(dbgs() << "\n");
+ return Error::success();
+}
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
new file mode 100644
index 000000000000..dc183dfddfae
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -0,0 +1,185 @@
+//===----- ELF_aarch64.cpp - JIT linker implementation for ELF/aarch64 ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF/aarch64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
+#include "ELFLinkGraphBuilder.h"
+#include "JITLinkGeneric.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+#include "llvm/Object/ELFObjectFile.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+
+namespace llvm {
+namespace jitlink {
+
+class ELFJITLinker_aarch64 : public JITLinker<ELFJITLinker_aarch64> {
+ friend class JITLinker<ELFJITLinker_aarch64>;
+
+public:
+ ELFJITLinker_aarch64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G,
+ PassConfiguration PassConfig)
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
+
+private:
+ Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
+ using namespace aarch64;
+ using namespace llvm::support;
+
+ char *BlockWorkingMem = B.getAlreadyMutableContent().data();
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+ JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
+ switch (E.getKind()) {
+ case aarch64::R_AARCH64_CALL26: {
+ assert((FixupAddress & 0x3) == 0 && "Call-inst is not 32-bit aligned");
+ int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+
+ if (static_cast<uint64_t>(Value) & 0x3)
+ return make_error<JITLinkError>("Call target is not 32-bit aligned");
+
+ if (!fitsRangeSignedInt<27>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+
+ uint32_t RawInstr = *(little32_t *)FixupPtr;
+ assert((RawInstr & 0x7fffffff) == 0x14000000 &&
+ "RawInstr isn't a B or BR immediate instruction");
+ uint32_t Imm = (static_cast<uint32_t>(Value) & ((1 << 28) - 1)) >> 2;
+ uint32_t FixedInstr = RawInstr | Imm;
+ *(little32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ }
+ return Error::success();
+ }
+
+ template <uint8_t Bits> static bool fitsRangeSignedInt(int64_t Value) {
+ return Value >= -(1ll << Bits) && Value < (1ll << Bits);
+ }
+};
+
+template <typename ELFT>
+class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder<ELFT> {
+private:
+ static Expected<aarch64::EdgeKind_aarch64>
+ getRelocationKind(const uint32_t Type) {
+ using namespace aarch64;
+ switch (Type) {
+ case ELF::R_AARCH64_CALL26:
+ return EdgeKind_aarch64::R_AARCH64_CALL26;
+ }
+
+ return make_error<JITLinkError>("Unsupported aarch64 relocation:" +
+ formatv("{0:d}", Type));
+ }
+
+ Error addRelocations() override {
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
+ using Base = ELFLinkGraphBuilder<ELFT>;
+ using Self = ELFLinkGraphBuilder_aarch64<ELFT>;
+ for (const auto &RelSect : Base::Sections)
+ if (Error Err = Base::forEachRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+
+ return Error::success();
+ }
+
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSect,
+ Section &GraphSection) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ uint32_t Type = Rel.getType(false);
+ Expected<aarch64::EdgeKind_aarch64> Kind = getRelocationKind(Type);
+ if (!Kind)
+ return Kind.takeError();
+
+ int64_t Addend = Rel.r_addend;
+ Block *BlockToFix = *(GraphSection.blocks().begin());
+ JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress();
+ Edge GE(*Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), *BlockToFix, GE, aarch64::getEdgeKindName(*Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix->addEdge(std::move(GE));
+ return Error::success();
+ }
+
+public:
+ ELFLinkGraphBuilder_aarch64(StringRef FileName,
+ const object::ELFFile<ELFT> &Obj, const Triple T)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(T), FileName,
+ aarch64::getEdgeKindName) {}
+};
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer) {
+ LLVM_DEBUG({
+ dbgs() << "Building jitlink graph for new input "
+ << ObjectBuffer.getBufferIdentifier() << "...\n";
+ });
+
+ auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer);
+ if (!ELFObj)
+ return ELFObj.takeError();
+
+ assert((*ELFObj)->getArch() == Triple::aarch64 &&
+ "Only AArch64 (little endian) is supported for now");
+
+ auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
+ return ELFLinkGraphBuilder_aarch64<object::ELF64LE>((*ELFObj)->getFileName(),
+ ELFObjFile.getELFFile(),
+ (*ELFObj)->makeTriple())
+ .buildGraph();
+}
+
+void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ PassConfiguration Config;
+ const Triple &TT = G->getTargetTriple();
+ if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ }
+ if (auto Err = Ctx->modifyPassConfig(*G, Config))
+ return Ctx->notifyFailed(std::move(Err));
+
+ ELFJITLinker_aarch64::link(std::move(Ctx), std::move(G), std::move(Config));
+}
+
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index d0e65ef1c3ac..b057788ce3ef 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -11,17 +11,117 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
+#include "ELFLinkGraphBuilder.h"
+#include "JITLinkGeneric.h"
+#include "PerGraphGOTAndPLTStubsBuilder.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITLink/riscv.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
-#include "ELFLinkGraphBuilder.h"
-#include "JITLinkGeneric.h"
-
#define DEBUG_TYPE "jitlink"
using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::jitlink::riscv;
+
+namespace {
+
+class PerGraphGOTAndPLTStubsBuilder_ELF_riscv
+ : public PerGraphGOTAndPLTStubsBuilder<
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv> {
+public:
+ static constexpr size_t StubEntrySize = 16;
+ static const uint8_t NullGOTEntryContent[8];
+ static const uint8_t RV64StubContent[StubEntrySize];
+ static const uint8_t RV32StubContent[StubEntrySize];
+
+ using PerGraphGOTAndPLTStubsBuilder<
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv>::PerGraphGOTAndPLTStubsBuilder;
+
+ bool isRV64() const { return G.getPointerSize() == 8; }
+
+ bool isGOTEdgeToFix(Edge &E) const { return E.getKind() == R_RISCV_GOT_HI20; }
+
+ Symbol &createGOTEntry(Symbol &Target) {
+ Block &GOTBlock = G.createContentBlock(
+ getGOTSection(), getGOTEntryBlockContent(), 0, G.getPointerSize(), 0);
+ GOTBlock.addEdge(isRV64() ? R_RISCV_64 : R_RISCV_32, 0, Target, 0);
+ return G.addAnonymousSymbol(GOTBlock, 0, G.getPointerSize(), false, false);
+ }
+
+ Symbol &createPLTStub(Symbol &Target) {
+ Block &StubContentBlock =
+ G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 4, 0);
+ auto &GOTEntrySymbol = getGOTEntry(Target);
+ StubContentBlock.addEdge(R_RISCV_CALL, 0, GOTEntrySymbol, 0);
+ return G.addAnonymousSymbol(StubContentBlock, 0, StubEntrySize, true,
+ false);
+ }
+
+ void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
+ // Replace the relocation pair (R_RISCV_GOT_HI20, R_RISCV_PCREL_LO12)
+ // with (R_RISCV_PCREL_HI20, R_RISCV_PCREL_LO12)
+ // Therefore, here just change the R_RISCV_GOT_HI20 to R_RISCV_PCREL_HI20
+ E.setKind(R_RISCV_PCREL_HI20);
+ E.setTarget(GOTEntry);
+ }
+
+ void fixPLTEdge(Edge &E, Symbol &PLTStubs) {
+ assert(E.getKind() == R_RISCV_CALL_PLT && "Not a R_RISCV_CALL_PLT edge?");
+ E.setKind(R_RISCV_CALL);
+ E.setTarget(PLTStubs);
+ }
+
+ bool isExternalBranchEdge(Edge &E) const {
+ return E.getKind() == R_RISCV_CALL_PLT;
+ }
+
+private:
+ Section &getGOTSection() const {
+ if (!GOTSection)
+ GOTSection = &G.createSection("$__GOT", MemProt::Read);
+ return *GOTSection;
+ }
+
+ Section &getStubsSection() const {
+ if (!StubsSection)
+ StubsSection =
+ &G.createSection("$__STUBS", MemProt::Read | MemProt::Exec);
+ return *StubsSection;
+ }
+
+ ArrayRef<char> getGOTEntryBlockContent() {
+ return {reinterpret_cast<const char *>(NullGOTEntryContent),
+ G.getPointerSize()};
+ }
+
+ ArrayRef<char> getStubBlockContent() {
+ auto StubContent = isRV64() ? RV64StubContent : RV32StubContent;
+ return {reinterpret_cast<const char *>(StubContent), StubEntrySize};
+ }
+
+ mutable Section *GOTSection = nullptr;
+ mutable Section *StubsSection = nullptr;
+};
+const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_riscv::NullGOTEntryContent[8] =
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+const uint8_t
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv::RV64StubContent[StubEntrySize] = {
+ 0x17, 0x0e, 0x00, 0x00, // auipc t3, literal
+ 0x03, 0x3e, 0x0e, 0x00, // ld t3, literal(t3)
+ 0x67, 0x00, 0x0e, 0x00, // jr t3
+ 0x13, 0x00, 0x00, 0x00}; // nop
+
+const uint8_t
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv::RV32StubContent[StubEntrySize] = {
+ 0x17, 0x0e, 0x00, 0x00, // auipc t3, literal
+ 0x03, 0x2e, 0x0e, 0x00, // lw t3, literal(t3)
+ 0x67, 0x00, 0x0e, 0x00, // jr t3
+ 0x13, 0x00, 0x00, 0x00}; // nop
+} // namespace
namespace llvm {
namespace jitlink {
@@ -78,6 +178,16 @@ private:
char *FixupPtr = BlockWorkingMem + E.getOffset();
JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
switch (E.getKind()) {
+ case R_RISCV_32: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(little32_t *)FixupPtr = static_cast<uint32_t>(Value);
+ break;
+ }
+ case R_RISCV_64: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(little64_t *)FixupPtr = static_cast<uint64_t>(Value);
+ break;
+ }
case R_RISCV_HI20: {
int64_t Value = E.getTarget().getAddress() + E.getAddend();
int32_t Hi = (Value + 0x800) & 0xFFFFF000;
@@ -163,6 +273,10 @@ private:
return EdgeKind_riscv::R_RISCV_PCREL_LO12_I;
case ELF::R_RISCV_PCREL_LO12_S:
return EdgeKind_riscv::R_RISCV_PCREL_LO12_S;
+ case ELF::R_RISCV_GOT_HI20:
+ return EdgeKind_riscv::R_RISCV_GOT_HI20;
+ case ELF::R_RISCV_CALL_PLT:
+ return EdgeKind_riscv::R_RISCV_CALL_PLT;
}
return make_error<JITLinkError>("Unsupported riscv relocation:" +
@@ -170,93 +284,54 @@ private:
}
Error addRelocations() override {
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
using Base = ELFLinkGraphBuilder<ELFT>;
- LLVM_DEBUG(dbgs() << "Adding relocations\n");
-
- // TODO a partern is forming of iterate some sections but only give me
- // ones I am interested, I should abstract that concept some where
- for (auto &SecRef : Base::Sections) {
- if (SecRef.sh_type != ELF::SHT_RELA && SecRef.sh_type != ELF::SHT_REL)
- continue;
- auto RelSectName = Base::Obj.getSectionName(SecRef);
- if (!RelSectName)
- return RelSectName.takeError();
-
- LLVM_DEBUG({
- dbgs() << "Adding relocations from section " << *RelSectName << "\n";
- });
-
- auto UpdateSection = Base::Obj.getSection(SecRef.sh_info);
- if (!UpdateSection)
- return UpdateSection.takeError();
-
- auto UpdateSectionName = Base::Obj.getSectionName(**UpdateSection);
- if (!UpdateSectionName)
- return UpdateSectionName.takeError();
- // Don't process relocations for debug sections.
- if (Base::isDwarfSection(*UpdateSectionName)) {
- LLVM_DEBUG({
- dbgs() << " Target is dwarf section " << *UpdateSectionName
- << ". Skipping.\n";
- });
- continue;
- } else
- LLVM_DEBUG({
- dbgs() << " For target section " << *UpdateSectionName << "\n";
- });
-
- auto *JITSection = Base::G->findSectionByName(*UpdateSectionName);
- if (!JITSection)
- return make_error<llvm::StringError>(
- "Refencing a section that wasn't added to graph" +
- *UpdateSectionName,
- llvm::inconvertibleErrorCode());
-
- auto Relocations = Base::Obj.relas(SecRef);
- if (!Relocations)
- return Relocations.takeError();
-
- for (const auto &Rela : *Relocations) {
- auto Type = Rela.getType(false);
-
- LLVM_DEBUG({
- dbgs() << "Relocation Type: " << Type << "\n"
- << "Name: " << Base::Obj.getRelocationTypeName(Type) << "\n";
- });
-
- auto SymbolIndex = Rela.getSymbol(false);
- auto Symbol = Base::Obj.getRelocationSymbol(Rela, Base::SymTabSec);
- if (!Symbol)
- return Symbol.takeError();
-
- auto BlockToFix = *(JITSection->blocks().begin());
- auto *TargetSymbol = Base::getGraphSymbol(SymbolIndex);
-
- if (!TargetSymbol) {
- return make_error<llvm::StringError>(
- "Could not find symbol at given index, did you add it to "
- "JITSymbolTable? index: " +
- std::to_string(SymbolIndex) + ", shndx: " +
- std::to_string((*Symbol)->st_shndx) + " Size of table: " +
- std::to_string(Base::GraphSymbols.size()),
- llvm::inconvertibleErrorCode());
- }
- int64_t Addend = Rela.r_addend;
- JITTargetAddress FixupAddress =
- (*UpdateSection)->sh_addr + Rela.r_offset;
-
- LLVM_DEBUG({
- dbgs() << "Processing relocation at "
- << format("0x%016" PRIx64, FixupAddress) << "\n";
- });
- auto Kind = getRelocationKind(Type);
- if (!Kind)
- return Kind.takeError();
-
- BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
- *TargetSymbol, Addend);
- }
- }
+ using Self = ELFLinkGraphBuilder_riscv<ELFT>;
+ for (const auto &RelSect : Base::Sections)
+ if (Error Err = Base::forEachRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+
+ return Error::success();
+ }
+
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSect,
+ Section &GraphSection) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ uint32_t Type = Rel.getType(false);
+ Expected<riscv::EdgeKind_riscv> Kind = getRelocationKind(Type);
+ if (!Kind)
+ return Kind.takeError();
+
+ int64_t Addend = Rel.r_addend;
+ Block *BlockToFix = *(GraphSection.blocks().begin());
+ JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress();
+ Edge GE(*Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), *BlockToFix, GE, riscv::getEdgeKindName(*Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix->addEdge(std::move(GE));
return Error::success();
}
@@ -304,6 +379,8 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ Config.PostPrunePasses.push_back(
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv::asPass);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
return Ctx->notifyFailed(std::move(Err));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index a5aed6d25200..3ea9ffee6554 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -12,6 +12,7 @@
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Endian.h"
@@ -20,7 +21,6 @@
#include "EHFrameSupportImpl.h"
#include "ELFLinkGraphBuilder.h"
#include "JITLinkGeneric.h"
-#include "PerGraphGOTAndPLTStubsBuilder.h"
#define DEBUG_TYPE "jitlink"
@@ -30,196 +30,82 @@ using namespace llvm::jitlink::ELF_x86_64_Edges;
namespace {
-constexpr StringRef ELFGOTSectionName = "$__GOT";
constexpr StringRef ELFGOTSymbolName = "_GLOBAL_OFFSET_TABLE_";
+constexpr StringRef ELFTLSInfoSectionName = "$__TLSINFO";
-class PerGraphGOTAndPLTStubsBuilder_ELF_x86_64
- : public PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_ELF_x86_64> {
+class TLSInfoTableManager_ELF_x86_64
+ : public TableManager<TLSInfoTableManager_ELF_x86_64> {
public:
- static const uint8_t NullGOTEntryContent[8];
- static const uint8_t StubContent[6];
-
- using PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_ELF_x86_64>::PerGraphGOTAndPLTStubsBuilder;
-
- bool isGOTEdgeToFix(Edge &E) const {
- if (E.getKind() == GOTOFF64) {
- // We need to make sure that the GOT section exists, but don't otherwise
- // need to fix up this edge.
- getGOTSection();
- return false;
- }
-
- return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad ||
- E.getKind() == PCRel64GOT || E.getKind() == GOT64;
- }
+ static const uint8_t TLSInfoEntryContent[16];
- Symbol &createGOTEntry(Symbol &Target) {
- auto &GOTEntryBlock = G.createContentBlock(
- getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0);
- GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
- return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
- }
+ static StringRef getSectionName() { return ELFTLSInfoSectionName; }
- void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- // If this is a PCRel32GOT/PCRel64GOT then change it to an ordinary
- // PCRel32/PCRel64. If it is a PCRel32GOTLoad then leave it as-is for now:
- // We will use the kind to check for GOT optimization opportunities in the
- // optimizeMachO_x86_64_GOTAndStubs pass below.
- // If it's a GOT64 leave it as is.
- switch (E.getKind()) {
- case PCRel32GOT:
- E.setKind(PCRel32);
- break;
- case PCRel64GOT:
- E.setKind(PCRel64);
- break;
- case GOT64:
- break;
- case PCRel32GOTLoad:
- break;
- default:
- llvm_unreachable("Unexpected GOT edge kind");
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getKind() == x86_64::RequestTLSDescInGOTAndTransformToDelta32) {
+ LLVM_DEBUG({
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+ << formatv("{0:x}", B->getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setKind(x86_64::Delta32);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
}
-
- E.setTarget(GOTEntry);
- // Leave the edge addend as-is.
+ return false;
}
- bool isExternalBranchEdge(Edge &E) {
- return E.getKind() == Branch32 && !E.getTarget().isDefined();
- }
-
- Symbol &createPLTStub(Symbol &Target) {
- auto &StubContentBlock =
- G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0);
- // Re-use GOT entries for stub targets.
- auto &GOTEntrySymbol = getGOTEntry(Target);
- StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, -4);
- return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false);
- }
-
- void fixPLTEdge(Edge &E, Symbol &Stub) {
- assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
-
- // Set the edge kind to Branch32ToStub. We will use this to check for stub
- // optimization opportunities in the optimize ELF_x86_64_GOTAndStubs pass
- // below.
- E.setKind(Branch32ToStub);
- E.setTarget(Stub);
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ // the TLS Info entry's key value will be written by the fixTLVSectionByName
+ // pass, so create mutable content.
+ auto &TLSInfoEntry = G.createMutableContentBlock(
+ getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), 0, 8,
+ 0);
+ TLSInfoEntry.addEdge(x86_64::Pointer64, 8, Target, 0);
+ return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false);
}
private:
- Section &getGOTSection() const {
- if (!GOTSection)
- GOTSection = &G.createSection(ELFGOTSectionName, sys::Memory::MF_READ);
- return *GOTSection;
- }
-
- Section &getStubsSection() const {
- if (!StubsSection) {
- auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", StubsProt);
- }
- return *StubsSection;
- }
-
- ArrayRef<char> getGOTEntryBlockContent() {
- return {reinterpret_cast<const char *>(NullGOTEntryContent),
- sizeof(NullGOTEntryContent)};
+ Section &getTLSInfoSection(LinkGraph &G) {
+ if (!TLSInfoTable)
+ TLSInfoTable = &G.createSection(ELFTLSInfoSectionName, MemProt::Read);
+ return *TLSInfoTable;
}
- ArrayRef<char> getStubBlockContent() {
- return {reinterpret_cast<const char *>(StubContent), sizeof(StubContent)};
+ ArrayRef<char> getTLSInfoEntryContent() const {
+ return {reinterpret_cast<const char *>(TLSInfoEntryContent),
+ sizeof(TLSInfoEntryContent)};
}
- mutable Section *GOTSection = nullptr;
- mutable Section *StubsSection = nullptr;
+ Section *TLSInfoTable = nullptr;
};
-} // namespace
+const uint8_t TLSInfoTableManager_ELF_x86_64::TLSInfoEntryContent[16] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/
+};
-const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::NullGOTEntryContent[8] =
- {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent[6] = {
- 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
-
-static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) {
- LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
-
- for (auto *B : G.blocks())
- for (auto &E : B->edges())
- if (E.getKind() == PCRel32GOTLoad) {
- // Replace GOT load with LEA only for MOVQ instructions.
- constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
- if (E.getOffset() < 3 ||
- strncmp(B->getContent().data() + E.getOffset() - 3,
- reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
- continue;
-
- auto &GOTBlock = E.getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT entry block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT entry should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- // Change the edge kind as we don't go through GOT anymore. This is
- // for formal correctness only. Technically, the two relocation kinds
- // are resolved the same way.
- E.setKind(PCRel32);
- E.setTarget(GOTTarget);
- auto *BlockData = reinterpret_cast<uint8_t *>(
- const_cast<char *>(B->getContent().data()));
- BlockData[E.getOffset() - 2] = 0x8d;
- LLVM_DEBUG({
- dbgs() << " Replaced GOT load wih LEA:\n ";
- printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- } else if (E.getKind() == Branch32ToStub) {
- auto &StubBlock = E.getTarget().getBlock();
- assert(
- StubBlock.getSize() ==
- sizeof(PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent) &&
- "Stub block should be stub sized");
- assert(StubBlock.edges_size() == 1 &&
- "Stub block should only have one outgoing edge");
-
- auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT block should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- E.setKind(Branch32);
- E.setTarget(GOTTarget);
- LLVM_DEBUG({
- dbgs() << " Replaced stub branch with direct branch:\n ";
- printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- }
+Error buildTables_ELF_x86_64(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+ x86_64::GOTTableManager GOT;
+ x86_64::PLTTableManager PLT(GOT);
+ TLSInfoTableManager_ELF_x86_64 TLSInfo;
+ visitExistingEdges(G, GOT, PLT, TLSInfo);
return Error::success();
}
+} // namespace
+
+static const char *getELFX86_64RelocName(uint32_t Type) {
+ switch (Type) {
+#define ELF_RELOC(Name, Number) \
+ case Number: \
+ return #Name;
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+#undef ELF_RELOC
+ }
+ return "Unrecognized ELF/x86-64 relocation type";
+}
namespace llvm {
namespace jitlink {
@@ -228,10 +114,13 @@ namespace jitlink {
// generic
class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder<object::ELF64LE> {
private:
+ using ELFT = object::ELF64LE;
static Expected<ELF_x86_64_Edges::ELFX86RelocationKind>
getRelocationKind(const uint32_t Type) {
switch (Type) {
+ case ELF::R_X86_64_32S:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer32Signed;
case ELF::R_X86_64_PC32:
return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32;
case ELF::R_X86_64_PC64:
@@ -240,9 +129,11 @@ private:
case ELF::R_X86_64_64:
return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer64;
case ELF::R_X86_64_GOTPCREL:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad;
case ELF::R_X86_64_GOTPCRELX:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoadRelaxable;
case ELF::R_X86_64_REX_GOTPCRELX:
- return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad;
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32REXGOTLoadRelaxable;
case ELF::R_X86_64_GOTPCREL64:
return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel64GOT;
case ELF::R_X86_64_GOT64:
@@ -251,109 +142,121 @@ private:
return ELF_x86_64_Edges::ELFX86RelocationKind::GOTOFF64;
case ELF::R_X86_64_PLT32:
return ELF_x86_64_Edges::ELFX86RelocationKind::Branch32;
+ case ELF::R_X86_64_TLSGD:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32TLV;
}
- return make_error<JITLinkError>("Unsupported x86-64 relocation:" +
- formatv("{0:d}", Type));
+ return make_error<JITLinkError>("Unsupported x86-64 relocation type " +
+ formatv("{0:d}: ", Type) +
+ getELFX86_64RelocName(Type));
}
Error addRelocations() override {
- LLVM_DEBUG(dbgs() << "Adding relocations\n");
- // TODO a partern is forming of iterate some sections but only give me
- // ones I am interested, i should abstract that concept some where
- for (auto &SecRef : Sections) {
- if (SecRef.sh_type != ELF::SHT_RELA && SecRef.sh_type != ELF::SHT_REL)
- continue;
- // TODO can the elf obj file do this for me?
- if (SecRef.sh_type == ELF::SHT_REL)
- return make_error<llvm::StringError>("Shouldn't have REL in x64",
- llvm::inconvertibleErrorCode());
-
- auto RelSectName = Obj.getSectionName(SecRef);
- if (!RelSectName)
- return RelSectName.takeError();
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
+ using Base = ELFLinkGraphBuilder<ELFT>;
+ using Self = ELFLinkGraphBuilder_x86_64;
+ for (const auto &RelSect : Base::Sections) {
+ // Validate the section to read relocation entries from.
+ if (RelSect.sh_type == ELF::SHT_REL)
+ return make_error<StringError>(
+ "No SHT_REL in valid x64 ELF object files",
+ inconvertibleErrorCode());
+
+ if (Error Err = Base::forEachRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+ }
- LLVM_DEBUG({
- dbgs() << "Adding relocations from section " << *RelSectName << "\n";
- });
+ return Error::success();
+ }
- auto UpdateSection = Obj.getSection(SecRef.sh_info);
- if (!UpdateSection)
- return UpdateSection.takeError();
-
- auto UpdateSectionName = Obj.getSectionName(**UpdateSection);
- if (!UpdateSectionName)
- return UpdateSectionName.takeError();
-
- // Don't process relocations for debug sections.
- if (isDwarfSection(*UpdateSectionName)) {
- LLVM_DEBUG({
- dbgs() << " Target is dwarf section " << *UpdateSectionName
- << ". Skipping.\n";
- });
- continue;
- } else
- LLVM_DEBUG({
- dbgs() << " For target section " << *UpdateSectionName << "\n";
- });
-
- auto JITSection = G->findSectionByName(*UpdateSectionName);
- if (!JITSection)
- return make_error<llvm::StringError>(
- "Refencing a a section that wasn't added to graph" +
- *UpdateSectionName,
- llvm::inconvertibleErrorCode());
-
- auto Relocations = Obj.relas(SecRef);
- if (!Relocations)
- return Relocations.takeError();
-
- for (const auto &Rela : *Relocations) {
- auto Type = Rela.getType(false);
-
- LLVM_DEBUG({
- dbgs() << "Relocation Type: " << Type << "\n"
- << "Name: " << Obj.getRelocationTypeName(Type) << "\n";
- });
- auto SymbolIndex = Rela.getSymbol(false);
- auto Symbol = Obj.getRelocationSymbol(Rela, SymTabSec);
- if (!Symbol)
- return Symbol.takeError();
-
- auto BlockToFix = *(JITSection->blocks().begin());
- auto *TargetSymbol = getGraphSymbol(SymbolIndex);
-
- if (!TargetSymbol) {
- return make_error<llvm::StringError>(
- "Could not find symbol at given index, did you add it to "
- "JITSymbolTable? index: " +
- std::to_string(SymbolIndex) +
- ", shndx: " + std::to_string((*Symbol)->st_shndx) +
- " Size of table: " + std::to_string(GraphSymbols.size()),
- llvm::inconvertibleErrorCode());
- }
- uint64_t Addend = Rela.r_addend;
- JITTargetAddress FixupAddress =
- (*UpdateSection)->sh_addr + Rela.r_offset;
-
- LLVM_DEBUG({
- dbgs() << "Processing relocation at "
- << format("0x%016" PRIx64, FixupAddress) << "\n";
- });
- auto Kind = getRelocationKind(Type);
- if (!Kind)
- return Kind.takeError();
-
- LLVM_DEBUG({
- Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
- Addend);
- printEdge(dbgs(), *BlockToFix, GE,
- getELFX86RelocationKindName(*Kind));
- dbgs() << "\n";
- });
- BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
- *TargetSymbol, Addend);
- }
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSection,
+ Section &GraphSection) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ // Validate the relocation kind.
+ auto ELFRelocKind = getRelocationKind(Rel.getType(false));
+ if (!ELFRelocKind)
+ return ELFRelocKind.takeError();
+
+ int64_t Addend = Rel.r_addend;
+ Edge::Kind Kind = Edge::Invalid;
+ switch (*ELFRelocKind) {
+ case PCRel32:
+ Kind = x86_64::Delta32;
+ break;
+ case Delta64:
+ Kind = x86_64::Delta64;
+ break;
+ case Pointer32Signed:
+ Kind = x86_64::Pointer32Signed;
+ break;
+ case Pointer64:
+ Kind = x86_64::Pointer64;
+ break;
+ case PCRel32GOTLoad: {
+ Kind = x86_64::RequestGOTAndTransformToDelta32;
+ break;
+ }
+ case PCRel32REXGOTLoadRelaxable: {
+ Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable;
+ Addend = 0;
+ break;
+ }
+ case PCRel32TLV: {
+ Kind = x86_64::RequestTLSDescInGOTAndTransformToDelta32;
+ break;
+ }
+ case PCRel32GOTLoadRelaxable: {
+ Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
+ Addend = 0;
+ break;
+ }
+ case PCRel64GOT: {
+ Kind = x86_64::RequestGOTAndTransformToDelta64;
+ break;
+ }
+ case GOT64: {
+ Kind = x86_64::RequestGOTAndTransformToDelta64FromGOT;
+ break;
+ }
+ case GOTOFF64: {
+ Kind = x86_64::Delta64FromGOT;
+ break;
+ }
+ case Branch32: {
+ Kind = x86_64::BranchPCRel32;
+ Addend = 0;
+ break;
}
+ }
+
+ Block *BlockToFix = *(GraphSection.blocks().begin());
+ JITTargetAddress FixupAddress = FixupSection.sh_addr + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress();
+ Edge GE(Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), *BlockToFix, GE, getELFX86RelocationKindName(Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix->addEdge(std::move(GE));
return Error::success();
}
@@ -361,7 +264,7 @@ public:
ELFLinkGraphBuilder_x86_64(StringRef FileName,
const object::ELFFile<object::ELF64LE> &Obj)
: ELFLinkGraphBuilder(Obj, Triple("x86_64-unknown-linux"), FileName,
- getELFX86RelocationKindName) {}
+ x86_64::getEdgeKindName) {}
};
class ELFJITLinker_x86_64 : public JITLinker<ELFJITLinker_x86_64> {
@@ -384,7 +287,8 @@ private:
createDefineExternalSectionStartAndEndSymbolsPass(
[&](LinkGraph &LG, Symbol &Sym) -> SectionRangeSymbolDesc {
if (Sym.getName() == ELFGOTSymbolName)
- if (auto *GOTSection = G.findSectionByName(ELFGOTSectionName)) {
+ if (auto *GOTSection = G.findSectionByName(
+ x86_64::GOTTableManager::getSectionName())) {
GOTSymbol = &Sym;
return {*GOTSection, true};
}
@@ -403,7 +307,8 @@ private:
// Otherwise look for a GOT section: If it already has a start symbol we'll
// record it, otherwise we'll create our own.
// If there's a GOT section but we didn't find an external GOT symbol...
- if (auto *GOTSection = G.findSectionByName(ELFGOTSectionName)) {
+ if (auto *GOTSection =
+ G.findSectionByName(x86_64::GOTTableManager::getSectionName())) {
// Check for an existing defined symbol.
for (auto *Sym : GOTSection->symbols())
@@ -427,81 +332,7 @@ private:
}
Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
- using namespace ELF_x86_64_Edges;
- using namespace llvm::support;
-
- char *BlockWorkingMem = B.getAlreadyMutableContent().data();
- char *FixupPtr = BlockWorkingMem + E.getOffset();
- JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
- switch (E.getKind()) {
- case ELFX86RelocationKind::Branch32:
- case ELFX86RelocationKind::Branch32ToStub:
- case ELFX86RelocationKind::PCRel32:
- case ELFX86RelocationKind::PCRel32GOTLoad: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- if (LLVM_LIKELY(x86_64::isInRangeForImmS32(Value)))
- *(little32_t *)FixupPtr = Value;
- else
- return makeTargetOutOfRangeError(G, B, E);
- break;
- }
- case ELFX86RelocationKind::PCRel64: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- *(little64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::Pointer64: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend();
- *(ulittle64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::Delta32: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- if (LLVM_LIKELY(x86_64::isInRangeForImmS32(Value)))
- *(little32_t *)FixupPtr = Value;
- else
- return makeTargetOutOfRangeError(G, B, E);
- break;
- }
- case ELFX86RelocationKind::Delta64: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- *(little64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::NegDelta32: {
- int64_t Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
- if (LLVM_LIKELY(x86_64::isInRangeForImmS32(Value)))
- *(little32_t *)FixupPtr = Value;
- else
- return makeTargetOutOfRangeError(G, B, E);
- break;
- }
- case ELFX86RelocationKind::NegDelta64: {
- int64_t Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
- *(little64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::GOT64:
- case ELFX86RelocationKind::GOTOFF64: {
- // GOT64: Offset of GOT entry within GOT.
- // GOTOFF64: Offset from GOT base to target.
- // The expressions are the same in both cases, but in the GOT64 case the
- // edge will have been fixed to point at the GOT entry, and in the
- // GOTOFF64 case it will still point at the original target.
- assert(GOTSymbol && "No GOT section symbol");
- int64_t Value =
- E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend();
- *(little64_t *)FixupPtr = Value;
- break;
- }
- default:
- LLVM_DEBUG({
- dbgs() << "Bad edge: " << getELFX86RelocationKindName(E.getKind())
- << "\n";
- });
- llvm_unreachable("Unsupported relocation");
- }
- return Error::success();
+ return x86_64::applyFixup(G, B, E, GOTSymbol);
}
};
@@ -547,8 +378,9 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
Config.PrePrunePasses.push_back(EHFrameSplitter(".eh_frame"));
- Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
- ".eh_frame", G->getPointerSize(), Delta64, Delta32, NegDelta32));
+ Config.PrePrunePasses.push_back(
+ EHFrameEdgeFixer(".eh_frame", x86_64::PointerSize, x86_64::Delta64,
+ x86_64::Delta32, x86_64::NegDelta32));
Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame"));
// Construct a JITLinker and run the link function.
@@ -558,9 +390,8 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
- // Add an in-place GOT/Stubs pass.
- Config.PostPrunePasses.push_back(
- PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::asPass);
+ // Add an in-place GOT/Stubs/TLSInfoEntry build pass.
+ Config.PostPrunePasses.push_back(buildTables_ELF_x86_64);
// Resolve any external section start / end symbols.
Config.PostAllocationPasses.push_back(
@@ -568,7 +399,7 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
identifyELFSectionStartAndEndSymbols));
// Add GOT/Stubs optimizer pass.
- Config.PreFixupPasses.push_back(optimizeELF_x86_64_GOTAndStubs);
+ Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
@@ -580,44 +411,26 @@ const char *getELFX86RelocationKindName(Edge::Kind R) {
switch (R) {
case Branch32:
return "Branch32";
- case Branch32ToStub:
- return "Branch32ToStub";
- case Pointer32:
- return "Pointer32";
+ case Pointer32Signed:
+ return "Pointer32Signed";
case Pointer64:
return "Pointer64";
- case Pointer64Anon:
- return "Pointer64Anon";
case PCRel32:
return "PCRel32";
- case PCRel32Minus1:
- return "PCRel32Minus1";
- case PCRel32Minus2:
- return "PCRel32Minus2";
- case PCRel32Minus4:
- return "PCRel32Minus4";
- case PCRel32Anon:
- return "PCRel32Anon";
- case PCRel32Minus1Anon:
- return "PCRel32Minus1Anon";
- case PCRel32Minus2Anon:
- return "PCRel32Minus2Anon";
- case PCRel32Minus4Anon:
- return "PCRel32Minus4Anon";
case PCRel32GOTLoad:
return "PCRel32GOTLoad";
- case PCRel32GOT:
- return "PCRel32GOT";
- case PCRel32TLV:
- return "PCRel32TLV";
- case Delta32:
- return "Delta32";
+ case PCRel32GOTLoadRelaxable:
+ return "PCRel32GOTLoadRelaxable";
+ case PCRel32REXGOTLoadRelaxable:
+ return "PCRel32REXGOTLoad";
+ case PCRel64GOT:
+ return "PCRel64GOT";
case Delta64:
return "Delta64";
- case NegDelta32:
- return "NegDelta32";
- case NegDelta64:
- return "NegDelta64";
+ case GOT64:
+ return "GOT64";
+ case GOTOFF64:
+ return "GOTOFF64";
}
return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index a4976f2f3d27..51dcc1c35fad 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -1,9 +1,8 @@
//===------------- JITLink.cpp - Core Run-time JIT linker APIs ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -214,7 +213,12 @@ Block &LinkGraph::splitBlock(Block &B, size_t SplitIndex,
// Transfer all symbols with offset less than SplitIndex to NewBlock.
while (!BlockSymbols.empty() &&
BlockSymbols.back()->getOffset() < SplitIndex) {
- BlockSymbols.back()->setBlock(NewBlock);
+ auto *Sym = BlockSymbols.back();
+ // If the symbol extends beyond the split, update the size to be within
+ // the new block.
+ if (Sym->getOffset() + Sym->getSize() > SplitIndex)
+ Sym->setSize(SplitIndex - Sym->getOffset());
+ Sym->setBlock(NewBlock);
BlockSymbols.pop_back();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 5b163ab6316d..706688aba4ec 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -48,12 +48,21 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
if (auto Err = runPasses(Passes.PostPrunePasses))
return Ctx->notifyFailed(std::move(Err));
- // Sort blocks into segments.
- auto Layout = layOutBlocks();
+ Ctx->getMemoryManager().allocate(
+ Ctx->getJITLinkDylib(), *G,
+ [S = std::move(Self)](AllocResult AR) mutable {
+ auto *TmpSelf = S.get();
+ TmpSelf->linkPhase2(std::move(S), std::move(AR));
+ });
+}
- // Allocate memory for segments.
- if (auto Err = allocateSegments(Layout))
- return Ctx->notifyFailed(std::move(Err));
+void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
+ AllocResult AR) {
+
+ if (AR)
+ Alloc = std::move(*AR);
+ else
+ return Ctx->notifyFailed(AR.takeError());
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
@@ -73,16 +82,16 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
auto ExternalSymbols = getExternalSymbolNames();
- // If there are no external symbols then proceed immediately with phase 2.
+ // If there are no external symbols then proceed immediately with phase 3.
if (ExternalSymbols.empty()) {
LLVM_DEBUG({
dbgs() << "No external symbols for " << G->getName()
- << ". Proceeding immediately with link phase 2.\n";
+ << ". Proceeding immediately with link phase 3.\n";
});
// FIXME: Once callee expressions are defined to be sequenced before
// argument expressions (c++17) we can simplify this. See below.
auto &TmpSelf = *Self;
- TmpSelf.linkPhase2(std::move(Self), AsyncLookupResult(), std::move(Layout));
+ TmpSelf.linkPhase3(std::move(Self), AsyncLookupResult());
return;
}
@@ -100,37 +109,31 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
//
// Ctx->lookup(std::move(UnresolvedExternals),
// [Self=std::move(Self)](Expected<AsyncLookupResult> Result) {
- // Self->linkPhase2(std::move(Self), std::move(Result));
+ // Self->linkPhase3(std::move(Self), std::move(Result));
// });
- auto *TmpCtx = Ctx.get();
- TmpCtx->lookup(std::move(ExternalSymbols),
- createLookupContinuation(
- [S = std::move(Self), L = std::move(Layout)](
- Expected<AsyncLookupResult> LookupResult) mutable {
- auto &TmpSelf = *S;
- TmpSelf.linkPhase2(std::move(S), std::move(LookupResult),
- std::move(L));
- }));
+ Ctx->lookup(std::move(ExternalSymbols),
+ createLookupContinuation(
+ [S = std::move(Self)](
+ Expected<AsyncLookupResult> LookupResult) mutable {
+ auto &TmpSelf = *S;
+ TmpSelf.linkPhase3(std::move(S), std::move(LookupResult));
+ }));
}
-void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
- Expected<AsyncLookupResult> LR,
- SegmentLayoutMap Layout) {
+void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
+ Expected<AsyncLookupResult> LR) {
LLVM_DEBUG({
- dbgs() << "Starting link phase 2 for graph " << G->getName() << "\n";
+ dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n";
});
// If the lookup failed, bail out.
if (!LR)
- return deallocateAndBailOut(LR.takeError());
+ return abandonAllocAndBailOut(std::move(Self), LR.takeError());
// Assign addresses to external addressables.
applyLookupResult(*LR);
- // Copy block content to working memory.
- copyBlockContentToWorkingMemory(Layout, *Alloc);
-
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
<< "\" before pre-fixup passes:\n";
@@ -138,7 +141,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
});
if (auto Err = runPasses(Passes.PreFixupPasses))
- return deallocateAndBailOut(std::move(Err));
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n";
@@ -147,7 +150,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
// Fix up block content.
if (auto Err = fixUpBlocks(*G))
- return deallocateAndBailOut(std::move(Err));
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n";
@@ -155,27 +158,25 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
});
if (auto Err = runPasses(Passes.PostFixupPasses))
- return deallocateAndBailOut(std::move(Err));
-
- // FIXME: Use move capture once we have c++14.
- auto *UnownedSelf = Self.release();
- auto Phase3Continuation = [UnownedSelf](Error Err) {
- std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
- UnownedSelf->linkPhase3(std::move(Self), std::move(Err));
- };
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
- Alloc->finalizeAsync(std::move(Phase3Continuation));
+ Alloc->finalize([S = std::move(Self)](FinalizeResult FR) mutable {
+ auto *TmpSelf = S.get();
+ TmpSelf->linkPhase4(std::move(S), std::move(FR));
+ });
}
-void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err) {
+void JITLinkerBase::linkPhase4(std::unique_ptr<JITLinkerBase> Self,
+ FinalizeResult FR) {
LLVM_DEBUG({
- dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n";
+ dbgs() << "Starting link phase 4 for graph " << G->getName() << "\n";
});
- if (Err)
- return deallocateAndBailOut(std::move(Err));
- Ctx->notifyFinalized(std::move(Alloc));
+ if (!FR)
+ return Ctx->notifyFailed(FR.takeError());
+
+ Ctx->notifyFinalized(std::move(*FR));
LLVM_DEBUG({ dbgs() << "Link of graph " << G->getName() << " complete\n"; });
}
@@ -187,131 +188,6 @@ Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) {
return Error::success();
}
-JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() {
-
- SegmentLayoutMap Layout;
-
- /// Partition blocks based on permissions and content vs. zero-fill.
- for (auto *B : G->blocks()) {
- auto &SegLists = Layout[B->getSection().getProtectionFlags()];
- if (!B->isZeroFill())
- SegLists.ContentBlocks.push_back(B);
- else
- SegLists.ZeroFillBlocks.push_back(B);
- }
-
- /// Sort blocks within each list.
- for (auto &KV : Layout) {
-
- auto CompareBlocks = [](const Block *LHS, const Block *RHS) {
- // Sort by section, address and size
- if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal())
- return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal();
- if (LHS->getAddress() != RHS->getAddress())
- return LHS->getAddress() < RHS->getAddress();
- return LHS->getSize() < RHS->getSize();
- };
-
- auto &SegLists = KV.second;
- llvm::sort(SegLists.ContentBlocks, CompareBlocks);
- llvm::sort(SegLists.ZeroFillBlocks, CompareBlocks);
- }
-
- LLVM_DEBUG({
- dbgs() << "Computed segment ordering:\n";
- for (auto &KV : Layout) {
- dbgs() << " Segment "
- << static_cast<sys::Memory::ProtectionFlags>(KV.first) << ":\n";
- auto &SL = KV.second;
- for (auto &SIEntry :
- {std::make_pair(&SL.ContentBlocks, "content block"),
- std::make_pair(&SL.ZeroFillBlocks, "zero-fill block")}) {
- dbgs() << " " << SIEntry.second << ":\n";
- for (auto *B : *SIEntry.first)
- dbgs() << " " << *B << "\n";
- }
- }
- });
-
- return Layout;
-}
-
-Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
-
- // Compute segment sizes and allocate memory.
- LLVM_DEBUG(dbgs() << "JIT linker requesting: { ");
- JITLinkMemoryManager::SegmentsRequestMap Segments;
- for (auto &KV : Layout) {
- auto &Prot = KV.first;
- auto &SegLists = KV.second;
-
- uint64_t SegAlign = 1;
-
- // Calculate segment content size.
- size_t SegContentSize = 0;
- for (auto *B : SegLists.ContentBlocks) {
- SegAlign = std::max(SegAlign, B->getAlignment());
- SegContentSize = alignToBlock(SegContentSize, *B);
- SegContentSize += B->getSize();
- }
-
- uint64_t SegZeroFillStart = SegContentSize;
- uint64_t SegZeroFillEnd = SegZeroFillStart;
-
- for (auto *B : SegLists.ZeroFillBlocks) {
- SegAlign = std::max(SegAlign, B->getAlignment());
- SegZeroFillEnd = alignToBlock(SegZeroFillEnd, *B);
- SegZeroFillEnd += B->getSize();
- }
-
- Segments[Prot] = {SegAlign, SegContentSize,
- SegZeroFillEnd - SegZeroFillStart};
-
- LLVM_DEBUG({
- dbgs() << (&KV == &*Layout.begin() ? "" : "; ")
- << static_cast<sys::Memory::ProtectionFlags>(Prot)
- << ": alignment = " << SegAlign
- << ", content size = " << SegContentSize
- << ", zero-fill size = " << (SegZeroFillEnd - SegZeroFillStart);
- });
- }
- LLVM_DEBUG(dbgs() << " }\n");
-
- if (auto AllocOrErr =
- Ctx->getMemoryManager().allocate(Ctx->getJITLinkDylib(), Segments))
- Alloc = std::move(*AllocOrErr);
- else
- return AllocOrErr.takeError();
-
- LLVM_DEBUG({
- dbgs() << "JIT linker got memory (working -> target):\n";
- for (auto &KV : Layout) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(KV.first);
- dbgs() << " " << Prot << ": "
- << (const void *)Alloc->getWorkingMemory(Prot).data() << " -> "
- << formatv("{0:x16}", Alloc->getTargetMemory(Prot)) << "\n";
- }
- });
-
- // Update block target addresses.
- for (auto &KV : Layout) {
- auto &Prot = KV.first;
- auto &SL = KV.second;
-
- JITTargetAddress NextBlockAddr =
- Alloc->getTargetMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
-
- for (auto *SIList : {&SL.ContentBlocks, &SL.ZeroFillBlocks})
- for (auto *B : *SIList) {
- NextBlockAddr = alignToBlock(NextBlockAddr, *B);
- B->setAddress(NextBlockAddr);
- NextBlockAddr += B->getSize();
- }
- }
-
- return Error::success();
-}
-
JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const {
// Identify unresolved external symbols.
JITLinkContext::LookupMap UnresolvedExternals;
@@ -351,81 +227,13 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
});
}
-void JITLinkerBase::copyBlockContentToWorkingMemory(
- const SegmentLayoutMap &Layout, JITLinkMemoryManager::Allocation &Alloc) {
-
- LLVM_DEBUG(dbgs() << "Copying block content:\n");
- for (auto &KV : Layout) {
- auto &Prot = KV.first;
- auto &SegLayout = KV.second;
-
- auto SegMem =
- Alloc.getWorkingMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
- char *LastBlockEnd = SegMem.data();
- char *BlockDataPtr = LastBlockEnd;
-
- LLVM_DEBUG({
- dbgs() << " Processing segment "
- << static_cast<sys::Memory::ProtectionFlags>(Prot) << " [ "
- << (const void *)SegMem.data() << " .. "
- << (const void *)((char *)SegMem.data() + SegMem.size())
- << " ]\n Processing content sections:\n";
- });
-
- for (auto *B : SegLayout.ContentBlocks) {
- LLVM_DEBUG(dbgs() << " " << *B << ":\n");
-
- // Pad to alignment/alignment-offset.
- BlockDataPtr = alignToBlock(BlockDataPtr, *B);
-
- LLVM_DEBUG({
- dbgs() << " Bumped block pointer to " << (const void *)BlockDataPtr
- << " to meet block alignment " << B->getAlignment()
- << " and alignment offset " << B->getAlignmentOffset() << "\n";
- });
-
- // Zero pad up to alignment.
- LLVM_DEBUG({
- if (LastBlockEnd != BlockDataPtr)
- dbgs() << " Zero padding from " << (const void *)LastBlockEnd
- << " to " << (const void *)BlockDataPtr << "\n";
- });
-
- while (LastBlockEnd != BlockDataPtr)
- *LastBlockEnd++ = 0;
-
- // Copy initial block content.
- LLVM_DEBUG({
- dbgs() << " Copying block " << *B << " content, "
- << B->getContent().size() << " bytes, from "
- << (const void *)B->getContent().data() << " to "
- << (const void *)BlockDataPtr << "\n";
- });
- memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size());
-
- // Point the block's content to the fixed up buffer.
- B->setMutableContent({BlockDataPtr, B->getContent().size()});
-
- // Update block end pointer.
- LastBlockEnd = BlockDataPtr + B->getContent().size();
- BlockDataPtr = LastBlockEnd;
- }
-
- // Zero pad the rest of the segment.
- LLVM_DEBUG({
- dbgs() << " Zero padding end of segment from "
- << (const void *)LastBlockEnd << " to "
- << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
- });
- while (LastBlockEnd != SegMem.data() + SegMem.size())
- *LastBlockEnd++ = 0;
- }
-}
-
-void JITLinkerBase::deallocateAndBailOut(Error Err) {
+void JITLinkerBase::abandonAllocAndBailOut(std::unique_ptr<JITLinkerBase> Self,
+ Error Err) {
assert(Err && "Should not be bailing out on success value");
- assert(Alloc && "can not call deallocateAndBailOut before allocation");
- Ctx->notifyFailed(joinErrors(std::move(Err), Alloc->deallocate()));
+ assert(Alloc && "can not call abandonAllocAndBailOut before allocation");
+ Alloc->abandon([S = std::move(Self), E1 = std::move(Err)](Error E2) mutable {
+ S->Ctx->notifyFailed(joinErrors(std::move(E1), std::move(E2)));
+ });
}
void prune(LinkGraph &G) {
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index 6b815fe4fb31..e4fdda0783a4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -42,14 +42,9 @@ public:
virtual ~JITLinkerBase();
protected:
- struct SegmentLayout {
- using BlocksList = std::vector<Block *>;
-
- BlocksList ContentBlocks;
- BlocksList ZeroFillBlocks;
- };
-
- using SegmentLayoutMap = DenseMap<unsigned, SegmentLayout>;
+ using InFlightAlloc = JITLinkMemoryManager::InFlightAlloc;
+ using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>;
+ using FinalizeResult = Expected<JITLinkMemoryManager::FinalizedAlloc>;
// Returns the PassConfiguration for this instance. This can be used by
// JITLinkerBase implementations to add late passes that reference their
@@ -61,39 +56,27 @@ protected:
// 1.1: Run pre-prune passes
// 1.2: Prune graph
// 1.3: Run post-prune passes
- // 1.4: Sort blocks into segments
- // 1.5: Allocate segment memory, update node vmaddrs to target vmaddrs
- // 1.6: Run post-allocation passes
- // 1.7: Notify context of final assigned symbol addresses
- // 1.8: Identify external symbols and make an async call to resolve
+ // 1.4: Allocate memory.
void linkPhase1(std::unique_ptr<JITLinkerBase> Self);
// Phase 2:
- // 2.1: Apply resolution results
- // 2.2: Run pre-fixup passes
- // 2.3: Fix up block contents
- // 2.4: Run post-fixup passes
- // 2.5: Make an async call to transfer and finalize memory.
- void linkPhase2(std::unique_ptr<JITLinkerBase> Self,
- Expected<AsyncLookupResult> LookupResult,
- SegmentLayoutMap Layout);
+ // 2.2: Run post-allocation passes
+ // 2.3: Notify context of final assigned symbol addresses
+ // 2.4: Identify external symbols and make an async call to resolve
+ void linkPhase2(std::unique_ptr<JITLinkerBase> Self, AllocResult AR);
// Phase 3:
- // 3.1: Call OnFinalized callback, handing off allocation.
- void linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err);
-
- // Align a JITTargetAddress to conform with block alignment requirements.
- static JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) {
- uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
- return Addr + Delta;
- }
-
- // Align a pointer to conform with block alignment requirements.
- static char *alignToBlock(char *P, Block &B) {
- uint64_t PAddr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(P));
- uint64_t Delta = (B.getAlignmentOffset() - PAddr) % B.getAlignment();
- return P + Delta;
- }
+ // 3.1: Apply resolution results
+ // 3.2: Run pre-fixup passes
+ // 3.3: Fix up block contents
+ // 3.4: Run post-fixup passes
+ // 3.5: Make an async call to transfer and finalize memory.
+ void linkPhase3(std::unique_ptr<JITLinkerBase> Self,
+ Expected<AsyncLookupResult> LookupResult);
+
+ // Phase 4:
+ // 4.1: Call OnFinalized callback, handing off allocation.
+ void linkPhase4(std::unique_ptr<JITLinkerBase> Self, FinalizeResult FR);
private:
// Run all passes in the given pass list, bailing out immediately if any pass
@@ -104,18 +87,14 @@ private:
// Implemented in JITLinker.
virtual Error fixUpBlocks(LinkGraph &G) const = 0;
- SegmentLayoutMap layOutBlocks();
- Error allocateSegments(const SegmentLayoutMap &Layout);
JITLinkContext::LookupMap getExternalSymbolNames() const;
void applyLookupResult(AsyncLookupResult LR);
- void copyBlockContentToWorkingMemory(const SegmentLayoutMap &Layout,
- JITLinkMemoryManager::Allocation &Alloc);
- void deallocateAndBailOut(Error Err);
+ void abandonAllocAndBailOut(std::unique_ptr<JITLinkerBase> Self, Error Err);
std::unique_ptr<JITLinkContext> Ctx;
std::unique_ptr<LinkGraph> G;
PassConfiguration Passes;
- std::unique_ptr<JITLinkMemoryManager::Allocation> Alloc;
+ std::unique_ptr<InFlightAlloc> Alloc;
};
template <typename LinkerImpl> class JITLinker : public JITLinkerBase {
@@ -152,6 +131,8 @@ private:
// Copy Block data and apply fixups.
LLVM_DEBUG(dbgs() << " Applying fixups.\n");
+ assert((!B->isZeroFill() || B->edges_size() == 0) &&
+ "Edges in zero-fill block?");
for (auto &E : B->edges()) {
// Skip non-relocation edges.
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 36067ccf2753..831b9b26d2fd 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -1,135 +1,528 @@
//===--- JITLinkMemoryManager.cpp - JITLinkMemoryManager implementation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Process.h"
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+
+namespace {
+
+// FIXME: Remove this copy of CWrapperFunctionResult as soon as JITLink can
+// depend on shared utils from Orc.
+
+// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
+union CWrapperFunctionResultDataUnion {
+ char *ValuePtr;
+ char Value[sizeof(ValuePtr)];
+};
+
+// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
+typedef struct {
+ CWrapperFunctionResultDataUnion Data;
+ size_t Size;
+} CWrapperFunctionResult;
+
+Error toError(CWrapperFunctionResult R) {
+ bool HasError = false;
+ std::string ErrMsg;
+ if (R.Size) {
+ bool Large = R.Size > sizeof(CWrapperFunctionResultDataUnion);
+ char *Content = Large ? R.Data.ValuePtr : R.Data.Value;
+ if (Content[0]) {
+ HasError = true;
+ constexpr unsigned StrStart = 1 + sizeof(uint64_t);
+ ErrMsg.resize(R.Size - StrStart);
+ memcpy(&ErrMsg[0], Content + StrStart, R.Size - StrStart);
+ }
+ if (Large)
+ free(R.Data.ValuePtr);
+ } else if (R.Data.ValuePtr) {
+ HasError = true;
+ ErrMsg = R.Data.ValuePtr;
+ free(R.Data.ValuePtr);
+ }
+
+ if (HasError)
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+ return Error::success();
+}
+} // namespace
+
namespace llvm {
namespace jitlink {
JITLinkMemoryManager::~JITLinkMemoryManager() = default;
-JITLinkMemoryManager::Allocation::~Allocation() = default;
-
-Expected<std::unique_ptr<JITLinkMemoryManager::Allocation>>
-InProcessMemoryManager::allocate(const JITLinkDylib *JD,
- const SegmentsRequestMap &Request) {
-
- using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
-
- // Local class for allocation.
- class IPMMAlloc : public Allocation {
- public:
- IPMMAlloc(AllocationMap SegBlocks) : SegBlocks(std::move(SegBlocks)) {}
- MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
- assert(SegBlocks.count(Seg) && "No allocation for segment");
- return {static_cast<char *>(SegBlocks[Seg].base()),
- SegBlocks[Seg].allocatedSize()};
+JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default;
+
+static Error runAllocAction(JITLinkMemoryManager::AllocActionCall &C) {
+ using WrapperFnTy = CWrapperFunctionResult (*)(const void *, size_t);
+ auto *Fn = jitTargetAddressToPointer<WrapperFnTy>(C.FnAddr);
+
+ return toError(Fn(jitTargetAddressToPointer<const void *>(C.CtxAddr),
+ static_cast<size_t>(C.CtxSize)));
+}
+
+BasicLayout::BasicLayout(LinkGraph &G) : G(G) {
+
+ for (auto &Sec : G.sections()) {
+ // Skip empty sections.
+ if (empty(Sec.blocks()))
+ continue;
+
+ auto &Seg = Segments[{Sec.getMemProt(), Sec.getMemDeallocPolicy()}];
+ for (auto *B : Sec.blocks())
+ if (LLVM_LIKELY(!B->isZeroFill()))
+ Seg.ContentBlocks.push_back(B);
+ else
+ Seg.ZeroFillBlocks.push_back(B);
+ }
+
+ // Build Segments map.
+ auto CompareBlocks = [](const Block *LHS, const Block *RHS) {
+ // Sort by section, address and size
+ if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal())
+ return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal();
+ if (LHS->getAddress() != RHS->getAddress())
+ return LHS->getAddress() < RHS->getAddress();
+ return LHS->getSize() < RHS->getSize();
+ };
+
+ LLVM_DEBUG(dbgs() << "Generated BasicLayout for " << G.getName() << ":\n");
+ for (auto &KV : Segments) {
+ auto &Seg = KV.second;
+
+ llvm::sort(Seg.ContentBlocks, CompareBlocks);
+ llvm::sort(Seg.ZeroFillBlocks, CompareBlocks);
+
+ for (auto *B : Seg.ContentBlocks) {
+ Seg.ContentSize = alignToBlock(Seg.ContentSize, *B);
+ Seg.ContentSize += B->getSize();
+ Seg.Alignment = std::max(Seg.Alignment, Align(B->getAlignment()));
}
- JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
- assert(SegBlocks.count(Seg) && "No allocation for segment");
- return pointerToJITTargetAddress(SegBlocks[Seg].base());
+
+ uint64_t SegEndOffset = Seg.ContentSize;
+ for (auto *B : Seg.ZeroFillBlocks) {
+ SegEndOffset = alignToBlock(SegEndOffset, *B);
+ SegEndOffset += B->getSize();
+ Seg.Alignment = std::max(Seg.Alignment, Align(B->getAlignment()));
}
- void finalizeAsync(FinalizeContinuation OnFinalize) override {
- OnFinalize(applyProtections());
+ Seg.ZeroFillSize = SegEndOffset - Seg.ContentSize;
+
+ LLVM_DEBUG({
+ dbgs() << " Seg " << KV.first
+ << ": content-size=" << formatv("{0:x}", Seg.ContentSize)
+ << ", zero-fill-size=" << formatv("{0:x}", Seg.ZeroFillSize)
+ << ", align=" << formatv("{0:x}", Seg.Alignment.value()) << "\n";
+ });
+ }
+}
+
+Expected<BasicLayout::ContiguousPageBasedLayoutSizes>
+BasicLayout::getContiguousPageBasedLayoutSizes(uint64_t PageSize) {
+ ContiguousPageBasedLayoutSizes SegsSizes;
+
+ for (auto &KV : segments()) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ if (Seg.Alignment > PageSize)
+ return make_error<StringError>("Segment alignment greater than page size",
+ inconvertibleErrorCode());
+
+ uint64_t SegSize = alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize);
+ if (AG.getMemDeallocPolicy() == MemDeallocPolicy::Standard)
+ SegsSizes.StandardSegs += SegSize;
+ else
+ SegsSizes.FinalizeSegs += SegSize;
+ }
+
+ return SegsSizes;
+}
+
+Error BasicLayout::apply() {
+ for (auto &KV : Segments) {
+ auto &Seg = KV.second;
+
+ assert(!(Seg.ContentBlocks.empty() && Seg.ZeroFillBlocks.empty()) &&
+ "Empty section recorded?");
+
+ for (auto *B : Seg.ContentBlocks) {
+ // Align addr and working-mem-offset.
+ Seg.Addr = alignToBlock(Seg.Addr, *B);
+ Seg.NextWorkingMemOffset = alignToBlock(Seg.NextWorkingMemOffset, *B);
+
+ // Update block addr.
+ B->setAddress(Seg.Addr);
+ Seg.Addr += B->getSize();
+
+ // Copy content to working memory, then update content to point at working
+ // memory.
+ memcpy(Seg.WorkingMem + Seg.NextWorkingMemOffset, B->getContent().data(),
+ B->getSize());
+ B->setMutableContent(
+ {Seg.WorkingMem + Seg.NextWorkingMemOffset, B->getSize()});
+ Seg.NextWorkingMemOffset += B->getSize();
}
- Error deallocate() override {
- if (SegBlocks.empty())
- return Error::success();
- void *SlabStart = SegBlocks.begin()->second.base();
- char *SlabEnd = (char *)SlabStart;
- for (auto &KV : SegBlocks) {
- SlabStart = std::min(SlabStart, KV.second.base());
- SlabEnd = std::max(SlabEnd, (char *)(KV.second.base()) +
- KV.second.allocatedSize());
- }
- size_t SlabSize = SlabEnd - (char *)SlabStart;
- assert((SlabSize % sys::Process::getPageSizeEstimate()) == 0 &&
- "Slab size is not a multiple of page size");
- sys::MemoryBlock Slab(SlabStart, SlabSize);
- if (auto EC = sys::Memory::releaseMappedMemory(Slab))
+
+ for (auto *B : Seg.ZeroFillBlocks) {
+ // Align addr.
+ Seg.Addr = alignToBlock(Seg.Addr, *B);
+ // Update block addr.
+ B->setAddress(Seg.Addr);
+ Seg.Addr += B->getSize();
+ }
+
+ Seg.ContentBlocks.clear();
+ Seg.ZeroFillBlocks.clear();
+ }
+
+ return Error::success();
+}
+
+JITLinkMemoryManager::AllocActions &BasicLayout::graphAllocActions() {
+ return G.allocActions();
+}
+
+void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD, SegmentMap Segments,
+ OnCreatedFunction OnCreated) {
+
+ static_assert(AllocGroup::NumGroups == 16,
+ "AllocGroup has changed. Section names below must be updated");
+ StringRef AGSectionNames[] = {
+ "__---.standard", "__R--.standard", "__-W-.standard", "__RW-.standard",
+ "__--X.standard", "__R-X.standard", "__-WX.standard", "__RWX.standard",
+ "__---.finalize", "__R--.finalize", "__-W-.finalize", "__RW-.finalize",
+ "__--X.finalize", "__R-X.finalize", "__-WX.finalize", "__RWX.finalize"};
+
+ auto G =
+ std::make_unique<LinkGraph>("", Triple(), 0, support::native, nullptr);
+ AllocGroupSmallMap<Block *> ContentBlocks;
+
+ JITTargetAddress NextAddr = 0x100000;
+ for (auto &KV : Segments) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ auto AGSectionName =
+ AGSectionNames[static_cast<unsigned>(AG.getMemProt()) |
+ static_cast<bool>(AG.getMemDeallocPolicy()) << 3];
+
+ auto &Sec = G->createSection(AGSectionName, AG.getMemProt());
+ Sec.setMemDeallocPolicy(AG.getMemDeallocPolicy());
+
+ if (Seg.ContentSize != 0) {
+ NextAddr = alignTo(NextAddr, Seg.ContentAlign);
+ auto &B =
+ G->createMutableContentBlock(Sec, G->allocateBuffer(Seg.ContentSize),
+ NextAddr, Seg.ContentAlign.value(), 0);
+ ContentBlocks[AG] = &B;
+ NextAddr += Seg.ContentSize;
+ }
+ }
+
+ // GRef declared separately since order-of-argument-eval isn't specified.
+ auto &GRef = *G;
+ MemMgr.allocate(JD, GRef,
+ [G = std::move(G), ContentBlocks = std::move(ContentBlocks),
+ OnCreated = std::move(OnCreated)](
+ JITLinkMemoryManager::AllocResult Alloc) mutable {
+ if (!Alloc)
+ OnCreated(Alloc.takeError());
+ else
+ OnCreated(SimpleSegmentAlloc(std::move(G),
+ std::move(ContentBlocks),
+ std::move(*Alloc)));
+ });
+}
+
+Expected<SimpleSegmentAlloc>
+SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ SegmentMap Segments) {
+ std::promise<MSVCPExpected<SimpleSegmentAlloc>> AllocP;
+ auto AllocF = AllocP.get_future();
+ Create(MemMgr, JD, std::move(Segments),
+ [&](Expected<SimpleSegmentAlloc> Result) {
+ AllocP.set_value(std::move(Result));
+ });
+ return AllocF.get();
+}
+
+SimpleSegmentAlloc::SimpleSegmentAlloc(SimpleSegmentAlloc &&) = default;
+SimpleSegmentAlloc &
+SimpleSegmentAlloc::operator=(SimpleSegmentAlloc &&) = default;
+SimpleSegmentAlloc::~SimpleSegmentAlloc() {}
+
+SimpleSegmentAlloc::SegmentInfo SimpleSegmentAlloc::getSegInfo(AllocGroup AG) {
+ auto I = ContentBlocks.find(AG);
+ if (I != ContentBlocks.end()) {
+ auto &B = *I->second;
+ return {B.getAddress(), B.getAlreadyMutableContent()};
+ }
+ return {};
+}
+
+SimpleSegmentAlloc::SimpleSegmentAlloc(
+ std::unique_ptr<LinkGraph> G, AllocGroupSmallMap<Block *> ContentBlocks,
+ std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc)
+ : G(std::move(G)), ContentBlocks(std::move(ContentBlocks)),
+ Alloc(std::move(Alloc)) {}
+
+class InProcessMemoryManager::IPInFlightAlloc
+ : public JITLinkMemoryManager::InFlightAlloc {
+public:
+ IPInFlightAlloc(InProcessMemoryManager &MemMgr, LinkGraph &G, BasicLayout BL,
+ sys::MemoryBlock StandardSegments,
+ sys::MemoryBlock FinalizationSegments)
+ : MemMgr(MemMgr), G(G), BL(std::move(BL)),
+ StandardSegments(std::move(StandardSegments)),
+ FinalizationSegments(std::move(FinalizationSegments)) {}
+
+ void finalize(OnFinalizedFunction OnFinalized) override {
+
+ // Apply memory protections to all segments.
+ if (auto Err = applyProtections()) {
+ OnFinalized(std::move(Err));
+ return;
+ }
+
+ // Run finalization actions.
+ // FIXME: Roll back previous successful actions on failure.
+ std::vector<AllocActionCall> DeallocActions;
+ DeallocActions.reserve(G.allocActions().size());
+ for (auto &ActPair : G.allocActions()) {
+ if (ActPair.Finalize.FnAddr)
+ if (auto Err = runAllocAction(ActPair.Finalize)) {
+ OnFinalized(std::move(Err));
+ return;
+ }
+ if (ActPair.Dealloc.FnAddr)
+ DeallocActions.push_back(ActPair.Dealloc);
+ }
+ G.allocActions().clear();
+
+ // Release the finalize segments slab.
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
+ OnFinalized(errorCodeToError(EC));
+ return;
+ }
+
+ // Continue with finalized allocation.
+ OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
+ std::move(DeallocActions)));
+ }
+
+ void abandon(OnAbandonedFunction OnAbandoned) override {
+ Error Err = Error::success();
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ OnAbandoned(std::move(Err));
+ }
+
+private:
+ Error applyProtections() {
+ for (auto &KV : BL.segments()) {
+ const auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ auto Prot = toSysMemoryProtectionFlags(AG.getMemProt());
+
+ uint64_t SegSize =
+ alignTo(Seg.ContentSize + Seg.ZeroFillSize, MemMgr.PageSize);
+ sys::MemoryBlock MB(Seg.WorkingMem, SegSize);
+ if (auto EC = sys::Memory::protectMappedMemory(MB, Prot))
return errorCodeToError(EC);
- return Error::success();
+ if (Prot & sys::Memory::MF_EXEC)
+ sys::Memory::InvalidateInstructionCache(MB.base(), MB.allocatedSize());
}
+ return Error::success();
+ }
+
+ InProcessMemoryManager &MemMgr;
+ LinkGraph &G;
+ BasicLayout BL;
+ sys::MemoryBlock StandardSegments;
+ sys::MemoryBlock FinalizationSegments;
+};
+
+Expected<std::unique_ptr<InProcessMemoryManager>>
+InProcessMemoryManager::Create() {
+ if (auto PageSize = sys::Process::getPageSize())
+ return std::make_unique<InProcessMemoryManager>(*PageSize);
+ else
+ return PageSize.takeError();
+}
+
+void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) {
+
+ // FIXME: Just check this once on startup.
+ if (!isPowerOf2_64((uint64_t)PageSize)) {
+ OnAllocated(make_error<StringError>("Page size is not a power of 2",
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ BasicLayout BL(G);
+
+ /// Scan the request and calculate the group and total sizes.
+ /// Check that segment size is no larger than a page.
+ auto SegsSizes = BL.getContiguousPageBasedLayoutSizes(PageSize);
+ if (!SegsSizes) {
+ OnAllocated(SegsSizes.takeError());
+ return;
+ }
+
+ /// Check that the total size requested (including zero fill) is not larger
+ /// than a size_t.
+ if (SegsSizes->total() > std::numeric_limits<size_t>::max()) {
+ OnAllocated(make_error<JITLinkError>(
+ "Total requested size " + formatv("{0:x}", SegsSizes->total()) +
+ " for graph " + G.getName() + " exceeds address space"));
+ return;
+ }
+
+ // Allocate one slab for the whole thing (to make sure everything is
+ // in-range), then partition into standard and finalization blocks.
+ //
+ // FIXME: Make two separate allocations in the future to reduce
+ // fragmentation: finalization segments will usually be a single page, and
+ // standard segments are likely to be more than one page. Where multiple
+ // allocations are in-flight at once (likely) the current approach will leave
+ // a lot of single-page holes.
+ sys::MemoryBlock Slab;
+ sys::MemoryBlock StandardSegsMem;
+ sys::MemoryBlock FinalizeSegsMem;
+ {
+ const sys::Memory::ProtectionFlags ReadWrite =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+
+ std::error_code EC;
+ Slab = sys::Memory::allocateMappedMemory(SegsSizes->total(), nullptr,
+ ReadWrite, EC);
- private:
- Error applyProtections() {
- for (auto &KV : SegBlocks) {
- auto &Prot = KV.first;
- auto &Block = KV.second;
- if (auto EC = sys::Memory::protectMappedMemory(Block, Prot))
- return errorCodeToError(EC);
- if (Prot & sys::Memory::MF_EXEC)
- sys::Memory::InvalidateInstructionCache(Block.base(),
- Block.allocatedSize());
- }
- return Error::success();
+ if (EC) {
+ OnAllocated(errorCodeToError(EC));
+ return;
}
- AllocationMap SegBlocks;
- };
+ // Zero-fill the whole slab up-front.
+ memset(Slab.base(), 0, Slab.allocatedSize());
+
+ StandardSegsMem = {Slab.base(),
+ static_cast<size_t>(SegsSizes->StandardSegs)};
+ FinalizeSegsMem = {(void *)((char *)Slab.base() + SegsSizes->StandardSegs),
+ static_cast<size_t>(SegsSizes->FinalizeSegs)};
+ }
- if (!isPowerOf2_64((uint64_t)sys::Process::getPageSizeEstimate()))
- return make_error<StringError>("Page size is not a power of 2",
- inconvertibleErrorCode());
+ auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegsMem.base());
+ auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegsMem.base());
- AllocationMap Blocks;
- const sys::Memory::ProtectionFlags ReadWrite =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
+ LLVM_DEBUG({
+ dbgs() << "InProcessMemoryManager allocated:\n";
+ if (SegsSizes->StandardSegs)
+ dbgs() << formatv(" [ {0:x16} -- {1:x16} ]", NextStandardSegAddr,
+ NextStandardSegAddr + StandardSegsMem.allocatedSize())
+ << " to stardard segs\n";
+ else
+ dbgs() << " no standard segs\n";
+ if (SegsSizes->FinalizeSegs)
+ dbgs() << formatv(" [ {0:x16} -- {1:x16} ]", NextFinalizeSegAddr,
+ NextFinalizeSegAddr + FinalizeSegsMem.allocatedSize())
+ << " to finalize segs\n";
+ else
+ dbgs() << " no finalize segs\n";
+ });
- // Compute the total number of pages to allocate.
- size_t TotalSize = 0;
- for (auto &KV : Request) {
- const auto &Seg = KV.second;
+ // Build ProtMap, assign addresses.
+ for (auto &KV : BL.segments()) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
- if (Seg.getAlignment() > sys::Process::getPageSizeEstimate())
- return make_error<StringError>("Cannot request higher than page "
- "alignment",
- inconvertibleErrorCode());
+ auto &SegAddr = (AG.getMemDeallocPolicy() == MemDeallocPolicy::Standard)
+ ? NextStandardSegAddr
+ : NextFinalizeSegAddr;
- TotalSize = alignTo(TotalSize, sys::Process::getPageSizeEstimate());
- TotalSize += Seg.getContentSize();
- TotalSize += Seg.getZeroFillSize();
+ Seg.WorkingMem = jitTargetAddressToPointer<char *>(SegAddr);
+ Seg.Addr = SegAddr;
+
+ SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize);
}
- // Allocate one slab to cover all the segments.
- std::error_code EC;
- auto SlabRemaining =
- sys::Memory::allocateMappedMemory(TotalSize, nullptr, ReadWrite, EC);
+ if (auto Err = BL.apply()) {
+ OnAllocated(std::move(Err));
+ return;
+ }
- if (EC)
- return errorCodeToError(EC);
+ OnAllocated(std::make_unique<IPInFlightAlloc>(*this, G, std::move(BL),
+ std::move(StandardSegsMem),
+ std::move(FinalizeSegsMem)));
+}
- // Allocate segment memory from the slab.
- for (auto &KV : Request) {
+void InProcessMemoryManager::deallocate(std::vector<FinalizedAlloc> Allocs,
+ OnDeallocatedFunction OnDeallocated) {
+ std::vector<sys::MemoryBlock> StandardSegmentsList;
+ std::vector<std::vector<AllocActionCall>> DeallocActionsList;
- const auto &Seg = KV.second;
+ {
+ std::lock_guard<std::mutex> Lock(FinalizedAllocsMutex);
+ for (auto &Alloc : Allocs) {
+ auto *FA =
+ jitTargetAddressToPointer<FinalizedAllocInfo *>(Alloc.release());
+ StandardSegmentsList.push_back(std::move(FA->StandardSegments));
+ if (!FA->DeallocActions.empty())
+ DeallocActionsList.push_back(std::move(FA->DeallocActions));
+ FA->~FinalizedAllocInfo();
+ FinalizedAllocInfos.Deallocate(FA);
+ }
+ }
+
+ Error DeallocErr = Error::success();
- uint64_t SegmentSize = alignTo(Seg.getContentSize() + Seg.getZeroFillSize(),
- sys::Process::getPageSizeEstimate());
- assert(SlabRemaining.allocatedSize() >= SegmentSize &&
- "Mapping exceeds allocation");
+ while (!DeallocActionsList.empty()) {
+ auto &DeallocActions = DeallocActionsList.back();
+ auto &StandardSegments = StandardSegmentsList.back();
- sys::MemoryBlock SegMem(SlabRemaining.base(), SegmentSize);
- SlabRemaining = sys::MemoryBlock((char *)SlabRemaining.base() + SegmentSize,
- SlabRemaining.allocatedSize() - SegmentSize);
+ /// Run any deallocate calls.
+ while (!DeallocActions.empty()) {
+ if (auto Err = runAllocAction(DeallocActions.back()))
+ DeallocErr = joinErrors(std::move(DeallocErr), std::move(Err));
+ DeallocActions.pop_back();
+ }
- // Zero out the zero-fill memory.
- memset(static_cast<char *>(SegMem.base()) + Seg.getContentSize(), 0,
- Seg.getZeroFillSize());
+ /// Release the standard segments slab.
+ if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+ DeallocErr = joinErrors(std::move(DeallocErr), errorCodeToError(EC));
- // Record the block for this segment.
- Blocks[KV.first] = std::move(SegMem);
+ DeallocActionsList.pop_back();
+ StandardSegmentsList.pop_back();
}
- return std::unique_ptr<InProcessMemoryManager::Allocation>(
- new IPMMAlloc(std::move(Blocks)));
+ OnDeallocated(std::move(DeallocErr));
+}
+
+JITLinkMemoryManager::FinalizedAlloc
+InProcessMemoryManager::createFinalizedAlloc(
+ sys::MemoryBlock StandardSegments,
+ std::vector<AllocActionCall> DeallocActions) {
+ std::lock_guard<std::mutex> Lock(FinalizedAllocsMutex);
+ auto *FA = FinalizedAllocInfos.Allocate<FinalizedAllocInfo>();
+ new (FA) FinalizedAllocInfo(
+ {std::move(StandardSegments), std::move(DeallocActions)});
+ return FinalizedAlloc(pointerToJITTargetAddress(FA));
}
} // end namespace jitlink
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
index eda2b8811deb..e49480c78662 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -1,9 +1,8 @@
//===-------------- MachO.cpp - JIT linker function for MachO -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index 03a8b98dff18..d588b63d9e88 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -23,7 +23,7 @@ MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
- // Sanity check: we only operate on relocatable objects.
+ // We only operate on relocatable objects.
if (!Obj.isRelocatableObject())
return make_error<JITLinkError>("Object is not a relocatable MachO");
@@ -107,11 +107,9 @@ MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
}
Section &MachOLinkGraphBuilder::getCommonSection() {
- if (!CommonSection) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_WRITE);
- CommonSection = &G->createSection(CommonSectionName, Prot);
- }
+ if (!CommonSection)
+ CommonSection =
+ &G->createSection(CommonSectionName, MemProt::Read | MemProt::Write);
return *CommonSection;
}
@@ -176,25 +174,16 @@ Error MachOLinkGraphBuilder::createNormalizedSections() {
// Get prot flags.
// FIXME: Make sure this test is correct (it's probably missing cases
// as-is).
- sys::Memory::ProtectionFlags Prot;
+ MemProt Prot;
if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
+ Prot = MemProt::Read | MemProt::Exec;
else
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
-
- if (!isDebugSection(NSec)) {
- auto FullyQualifiedName =
- G->allocateString(StringRef(NSec.SegName) + "," + NSec.SectName);
- NSec.GraphSection = &G->createSection(
- StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()),
- Prot);
- } else
- LLVM_DEBUG({
- dbgs() << " " << NSec.SegName << "," << NSec.SectName
- << " is a debug section: No graph section will be created.\n";
- });
+ Prot = MemProt::Read | MemProt::Write;
+
+ auto FullyQualifiedName =
+ G->allocateString(StringRef(NSec.SegName) + "," + NSec.SectName);
+ NSec.GraphSection = &G->createSection(
+ StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()), Prot);
IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
}
@@ -292,15 +281,16 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() {
dbgs() << "\n";
});
- // If this symbol has a section, sanity check that the addresses line up.
+ // If this symbol has a section, verify that the addresses line up.
if (Sect != 0) {
auto NSec = findSectionByIndex(Sect - 1);
if (!NSec)
return NSec.takeError();
if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
- return make_error<JITLinkError>("Symbol address does not fall within "
- "section");
+ return make_error<JITLinkError>("Address " + formatv("{0:x}", Value) +
+ " for symbol " + *Name +
+ " does not fall within section");
if (!NSec->GraphSection) {
LLVM_DEBUG({
@@ -321,16 +311,19 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() {
}
void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
- Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
- uint32_t Alignment, bool IsLive) {
+ unsigned SecIndex, Section &GraphSec, uint64_t Address, const char *Data,
+ uint64_t Size, uint32_t Alignment, bool IsLive) {
Block &B =
Data ? G->createContentBlock(GraphSec, ArrayRef<char>(Data, Size),
Address, Alignment, 0)
: G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
- assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
+ auto SecI = IndexToSection.find(SecIndex);
+ assert(SecI != IndexToSection.end() && "SecIndex invalid");
+ auto &NSec = SecI->second;
+ assert(!NSec.CanonicalSymbols.count(Sym.getAddress()) &&
"Anonymous block start symbol clashes with existing symbol address");
- AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
+ NSec.CanonicalSymbols[Sym.getAddress()] = &Sym;
}
Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
@@ -444,8 +437,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
<< formatv("{0:x16}", NSec.Address) << " -- "
<< formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
});
- addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
- NSec.Size, NSec.Alignment,
+ addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address,
+ NSec.Data, NSec.Size, NSec.Alignment,
SectionIsNoDeadStrip);
} else
LLVM_DEBUG({
@@ -483,8 +476,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
<< formatv("{0:x16}", NSec.Address) << " -- "
<< formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
});
- addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
- AnonBlockSize, NSec.Alignment,
+ addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address,
+ NSec.Data, AnonBlockSize, NSec.Alignment,
SectionIsNoDeadStrip);
}
@@ -583,7 +576,7 @@ Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym,
NSym.GraphSymbol = &Sym;
if (IsCanonical)
- setCanonicalSymbol(Sym);
+ setCanonicalSymbol(getSectionByIndex(NSym.Sect - 1), Sym);
return Sym;
}
@@ -610,7 +603,6 @@ Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
Error MachOLinkGraphBuilder::graphifyCStringSection(
NormalizedSection &NSec, std::vector<NormalizedSymbol *> NSyms) {
-
assert(NSec.GraphSection && "C string literal section missing graph section");
assert(NSec.Data && "C string literal section has no data");
@@ -664,7 +656,7 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
// If there's no symbol at the start of this block then create one.
if (NSyms.empty() || NSyms.back()->Value != B.getAddress()) {
auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false);
- setCanonicalSymbol(S);
+ setCanonicalSymbol(NSec, S);
LLVM_DEBUG({
dbgs() << " Adding anonymous symbol for c-string block "
<< formatv("{0:x16} -- {1:x16}", S.getAddress(),
@@ -700,5 +692,119 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
return Error::success();
}
+Error CompactUnwindSplitter::operator()(LinkGraph &G) {
+ auto *CUSec = G.findSectionByName(CompactUnwindSectionName);
+ if (!CUSec)
+ return Error::success();
+
+ if (!G.getTargetTriple().isOSBinFormatMachO())
+ return make_error<JITLinkError>(
+ "Error linking " + G.getName() +
+ ": compact unwind splitting not supported on non-macho target " +
+ G.getTargetTriple().str());
+
+ unsigned CURecordSize = 0;
+ unsigned PersonalityEdgeOffset = 0;
+ unsigned LSDAEdgeOffset = 0;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ case Triple::x86_64:
+ // 64-bit compact-unwind record format:
+ // Range start: 8 bytes.
+ // Range size: 4 bytes.
+ // CU encoding: 4 bytes.
+ // Personality: 8 bytes.
+ // LSDA: 8 bytes.
+ CURecordSize = 32;
+ PersonalityEdgeOffset = 16;
+ LSDAEdgeOffset = 24;
+ break;
+ default:
+ return make_error<JITLinkError>(
+ "Error linking " + G.getName() +
+ ": compact unwind splitting not supported on " +
+ G.getTargetTriple().getArchName());
+ }
+
+ std::vector<Block *> OriginalBlocks(CUSec->blocks().begin(),
+ CUSec->blocks().end());
+ LLVM_DEBUG({
+ dbgs() << "In " << G.getName() << " splitting compact unwind section "
+ << CompactUnwindSectionName << " containing "
+ << OriginalBlocks.size() << " initial blocks...\n";
+ });
+
+ while (!OriginalBlocks.empty()) {
+ auto *B = OriginalBlocks.back();
+ OriginalBlocks.pop_back();
+
+ if (B->getSize() == 0) {
+ LLVM_DEBUG({
+ dbgs() << " Skipping empty block at "
+ << formatv("{0:x16}", B->getAddress()) << "\n";
+ });
+ continue;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " Splitting block at " << formatv("{0:x16}", B->getAddress())
+ << " into " << (B->getSize() / CURecordSize)
+ << " compact unwind record(s)\n";
+ });
+
+ if (B->getSize() % CURecordSize)
+ return make_error<JITLinkError>(
+ "Error splitting compact unwind record in " + G.getName() +
+ ": block at " + formatv("{0:x}", B->getAddress()) + " has size " +
+ formatv("{0:x}", B->getSize()) +
+ " (not a multiple of CU record size of " +
+ formatv("{0:x}", CURecordSize) + ")");
+
+ unsigned NumBlocks = B->getSize() / CURecordSize;
+ LinkGraph::SplitBlockCache C;
+
+ for (unsigned I = 0; I != NumBlocks; ++I) {
+ auto &CURec = G.splitBlock(*B, CURecordSize, &C);
+ bool AddedKeepAlive = false;
+
+ for (auto &E : CURec.edges()) {
+ if (E.getOffset() == 0) {
+ LLVM_DEBUG({
+ dbgs() << " Updating compact unwind record at "
+ << formatv("{0:x16}", CURec.getAddress()) << " to point to "
+ << (E.getTarget().hasName() ? E.getTarget().getName()
+ : StringRef())
+ << " (at " << formatv("{0:x16}", E.getTarget().getAddress())
+ << ")\n";
+ });
+
+ if (E.getTarget().isExternal())
+ return make_error<JITLinkError>(
+ "Error adding keep-alive edge for compact unwind record at " +
+ formatv("{0:x}", CURec.getAddress()) + ": target " +
+ E.getTarget().getName() + " is an external symbol");
+ auto &TgtBlock = E.getTarget().getBlock();
+ auto &CURecSym =
+ G.addAnonymousSymbol(CURec, 0, CURecordSize, 0, false);
+ TgtBlock.addEdge(Edge::KeepAlive, 0, CURecSym, 0);
+ AddedKeepAlive = true;
+ } else if (E.getOffset() != PersonalityEdgeOffset &&
+ E.getOffset() != LSDAEdgeOffset)
+ return make_error<JITLinkError>("Unexpected edge at offset " +
+ formatv("{0:x}", E.getOffset()) +
+ " in compact unwind record at " +
+ formatv("{0:x}", CURec.getAddress()));
+ }
+
+ if (!AddedKeepAlive)
+ return make_error<JITLinkError>(
+ "Error adding keep-alive edge for compact unwind record at " +
+ formatv("{0:x}", CURec.getAddress()) +
+ ": no outgoing target edge at offset 0");
+ }
+ }
+ return Error::success();
+}
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 90b14c44ff8a..d29732ebdba8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -77,6 +77,7 @@ protected:
uint32_t Flags = 0;
const char *Data = nullptr;
Section *GraphSection = nullptr;
+ std::map<JITTargetAddress, Symbol *> CanonicalSymbols;
};
using SectionParserFunction = std::function<Error(NormalizedSection &S)>;
@@ -125,30 +126,31 @@ protected:
/// given index is out of range, or if no symbol has been added for the given
/// index.
Expected<NormalizedSymbol &> findSymbolByIndex(uint64_t Index) {
- if (Index >= IndexToSymbol.size())
- return make_error<JITLinkError>("Symbol index out of range");
- auto *Sym = IndexToSymbol[Index];
- if (!Sym)
+ auto I = IndexToSymbol.find(Index);
+ if (I == IndexToSymbol.end())
return make_error<JITLinkError>("No symbol at index " +
formatv("{0:d}", Index));
- return *Sym;
+ assert(I->second && "Null symbol at index");
+ return *I->second;
}
/// Returns the symbol with the highest address not greater than the search
/// address, or null if no such symbol exists.
- Symbol *getSymbolByAddress(JITTargetAddress Address) {
- auto I = AddrToCanonicalSymbol.upper_bound(Address);
- if (I == AddrToCanonicalSymbol.begin())
+ Symbol *getSymbolByAddress(NormalizedSection &NSec,
+ JITTargetAddress Address) {
+ auto I = NSec.CanonicalSymbols.upper_bound(Address);
+ if (I == NSec.CanonicalSymbols.begin())
return nullptr;
return std::prev(I)->second;
}
/// Returns the symbol with the highest address not greater than the search
/// address, or an error if no such symbol exists.
- Expected<Symbol &> findSymbolByAddress(JITTargetAddress Address) {
- auto *Sym = getSymbolByAddress(Address);
+ Expected<Symbol &> findSymbolByAddress(NormalizedSection &NSec,
+ JITTargetAddress Address) {
+ auto *Sym = getSymbolByAddress(NSec, Address);
if (Sym)
- if (Address < Sym->getAddress() + Sym->getSize())
+ if (Address <= Sym->getAddress() + Sym->getSize())
return *Sym;
return make_error<JITLinkError>("No symbol covering address " +
formatv("{0:x16}", Address));
@@ -179,8 +181,8 @@ private:
static unsigned getPointerSize(const object::MachOObjectFile &Obj);
static support::endianness getEndianness(const object::MachOObjectFile &Obj);
- void setCanonicalSymbol(Symbol &Sym) {
- auto *&CanonicalSymEntry = AddrToCanonicalSymbol[Sym.getAddress()];
+ void setCanonicalSymbol(NormalizedSection &NSec, Symbol &Sym) {
+ auto *&CanonicalSymEntry = NSec.CanonicalSymbols[Sym.getAddress()];
// There should be no symbol at this address, or, if there is,
// it should be a zero-sized symbol from an empty section (which
// we can safely override).
@@ -190,9 +192,10 @@ private:
}
Section &getCommonSection();
- void addSectionStartSymAndBlock(Section &GraphSec, uint64_t Address,
- const char *Data, uint64_t Size,
- uint32_t Alignment, bool IsLive);
+ void addSectionStartSymAndBlock(unsigned SecIndex, Section &GraphSec,
+ uint64_t Address, const char *Data,
+ uint64_t Size, uint32_t Alignment,
+ bool IsLive);
Error createNormalizedSections();
Error createNormalizedSymbols();
@@ -227,10 +230,20 @@ private:
Section *CommonSection = nullptr;
DenseMap<uint32_t, NormalizedSymbol *> IndexToSymbol;
- std::map<JITTargetAddress, Symbol *> AddrToCanonicalSymbol;
StringMap<SectionParserFunction> CustomSectionParserFunctions;
};
+/// A pass to split up __LD,__compact_unwind sections.
+class CompactUnwindSplitter {
+public:
+ CompactUnwindSplitter(StringRef CompactUnwindSectionName)
+ : CompactUnwindSectionName(CompactUnwindSectionName) {}
+ Error operator()(LinkGraph &G);
+
+private:
+ StringRef CompactUnwindSectionName;
+};
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 169e20a1d1d3..f2a029d35cd5 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -81,6 +81,14 @@ private:
if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2)
return PairedAddend;
break;
+ case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return TLVPage21;
+ break;
+ case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
+ if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return TLVPageOffset12;
+ break;
}
return make_error<JITLinkError>(
@@ -152,7 +160,7 @@ private:
auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
if (!ToSymbolSec)
return ToSymbolSec.takeError();
- ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+ ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address);
assert(ToSymbol && "No symbol for section");
FixupValue -= ToSymbol->getAddress();
}
@@ -197,14 +205,18 @@ private:
continue;
}
- // Skip relocations for debug symbols.
+ auto NSec =
+ findSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
+ if (!NSec)
+ return NSec.takeError();
+
+ // Skip relocations for MachO sections without corresponding graph
+ // sections.
{
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- if (!NSec.GraphSection) {
+ if (!NSec->GraphSection) {
LLVM_DEBUG({
dbgs() << " Skipping relocations for MachO section "
- << NSec.SegName << "/" << NSec.SectName
+ << NSec->SegName << "/" << NSec->SectName
<< " which has no associated graph section\n";
});
continue;
@@ -216,25 +228,22 @@ private:
MachO::relocation_info RI = getRelocationInfo(RelItr);
- // Sanity check the relocation kind.
+ // Validate the relocation kind.
auto Kind = getRelocationKind(RI);
if (!Kind)
return Kind.takeError();
// Find the address of the value to fix up.
JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
-
LLVM_DEBUG({
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- dbgs() << " " << NSec.SectName << " + "
+ dbgs() << " " << NSec->SectName << " + "
<< formatv("{0:x8}", RI.r_address) << ":\n";
});
// Find the block that the fixup points to.
Block *BlockToFix = nullptr;
{
- auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress);
+ auto SymbolToFixOrErr = findSymbolByAddress(*NSec, FixupAddress);
if (!SymbolToFixOrErr)
return SymbolToFixOrErr.takeError();
BlockToFix = &SymbolToFixOrErr->getBlock();
@@ -316,7 +325,11 @@ private:
break;
case Pointer64Anon: {
JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -324,6 +337,7 @@ private:
break;
}
case Page21:
+ case TLVPage21:
case GOTPage21: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
@@ -348,6 +362,7 @@ private:
"encoded addend");
break;
}
+ case TLVPageOffset12:
case GOTPageOffset12: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
@@ -414,6 +429,7 @@ public:
bool isGOTEdgeToFix(Edge &E) const {
return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
+ E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12 ||
E.getKind() == PointerToGOT;
}
@@ -425,7 +441,8 @@ public:
}
void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12) {
+ if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
+ E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12) {
// Update the target, but leave the edge addend as-is.
E.setTarget(GOTEntry);
} else if (E.getKind() == PointerToGOT) {
@@ -457,16 +474,14 @@ public:
private:
Section &getGOTSection() {
if (!GOTSection)
- GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
+ GOTSection = &G.createSection("$__GOT", MemProt::Read);
return *GOTSection;
}
Section &getStubsSection() {
- if (!StubsSection) {
- auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", StubsProt);
- }
+ if (!StubsSection)
+ StubsSection =
+ &G.createSection("$__STUBS", MemProt::Read | MemProt::Exec);
return *StubsSection;
}
@@ -567,6 +582,7 @@ private:
break;
}
case Page21:
+ case TLVPage21:
case GOTPage21: {
assert((E.getKind() != GOTPage21 || E.getAddend() == 0) &&
"GOTPAGE21 with non-zero addend");
@@ -603,6 +619,7 @@ private:
*(ulittle32_t *)FixupPtr = FixedInstr;
break;
}
+ case TLVPageOffset12:
case GOTPageOffset12: {
assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend");
@@ -629,7 +646,8 @@ private:
if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1))
return makeTargetOutOfRangeError(G, B, E);
- uint32_t EncodedImm = (static_cast<uint32_t>(Delta) >> 2) << 5;
+ uint32_t EncodedImm =
+ ((static_cast<uint32_t>(Delta) >> 2) & 0x7ffff) << 5;
uint32_t FixedInstr = RawInstr | EncodedImm;
*(ulittle32_t *)FixupPtr = FixedInstr;
break;
@@ -683,6 +701,10 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ // Add compact unwind splitter pass.
+ Config.PrePrunePasses.push_back(
+ CompactUnwindSplitter("__LD,__compact_unwind"));
+
// Add an in-place GOT/Stubs pass.
Config.PostPrunePasses.push_back(
PerGraphGOTAndPLTStubsBuilder_MachO_arm64::asPass);
@@ -711,6 +733,10 @@ const char *getMachOARM64RelocationKindName(Edge::Kind R) {
return "GOTPage21";
case GOTPageOffset12:
return "GOTPageOffset12";
+ case TLVPage21:
+ return "TLVPage21";
+ case TLVPageOffset12:
+ return "TLVPageOffset12";
case PointerToGOT:
return "PointerToGOT";
case PairedAddend:
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 61d5c5e21ff1..a4fcd3b9a5f5 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -170,7 +170,7 @@ private:
auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
if (!ToSymbolSec)
return ToSymbolSec.takeError();
- ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+ ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address);
assert(ToSymbol && "No symbol for section");
FixupValue -= ToSymbol->getAddress();
}
@@ -216,14 +216,18 @@ private:
continue;
}
- // Skip relocations for debug symbols.
+ auto NSec =
+ findSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
+ if (!NSec)
+ return NSec.takeError();
+
+ // Skip relocations for MachO sections without corresponding graph
+ // sections.
{
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- if (!NSec.GraphSection) {
+ if (!NSec->GraphSection) {
LLVM_DEBUG({
dbgs() << " Skipping relocations for MachO section "
- << NSec.SegName << "/" << NSec.SectName
+ << NSec->SegName << "/" << NSec->SectName
<< " which has no associated graph section\n";
});
continue;
@@ -240,16 +244,14 @@ private:
JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
LLVM_DEBUG({
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- dbgs() << " " << NSec.SectName << " + "
+ dbgs() << " " << NSec->SectName << " + "
<< formatv("{0:x8}", RI.r_address) << ":\n";
});
// Find the block that the fixup points to.
Block *BlockToFix = nullptr;
{
- auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress);
+ auto SymbolToFixOrErr = findSymbolByAddress(*NSec, FixupAddress);
if (!SymbolToFixOrErr)
return SymbolToFixOrErr.takeError();
BlockToFix = &SymbolToFixOrErr->getBlock();
@@ -270,7 +272,7 @@ private:
Symbol *TargetSymbol = nullptr;
uint64_t Addend = 0;
- // Sanity check the relocation kind.
+ // Validate the relocation kind.
auto MachORelocKind = getRelocKind(RI);
if (!MachORelocKind)
return MachORelocKind.takeError();
@@ -300,7 +302,7 @@ private:
else
return TargetSymbolOrErr.takeError();
Addend = *(const little32_t *)FixupContent;
- Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
+ Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable;
if (FixupOffset < 3)
return make_error<JITLinkError>("GOTLD at invalid offset " +
formatv("{0}", FixupOffset));
@@ -319,7 +321,10 @@ private:
else
return TargetSymbolOrErr.takeError();
Addend = *(const little32_t *)FixupContent;
- Kind = x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable;
+ Kind = x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable;
+ if (FixupOffset < 3)
+ return make_error<JITLinkError>("TLV at invalid offset " +
+ formatv("{0}", FixupOffset));
break;
case MachOPointer32:
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
@@ -339,7 +344,11 @@ private:
break;
case MachOPointer64Anon: {
JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -360,7 +369,11 @@ private:
case MachOPCRel32Anon: {
JITTargetAddress TargetAddress =
FixupAddress + 4 + *(const little32_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -376,7 +389,11 @@ private:
1ULL << (*MachORelocKind - MachOPCRel32Minus1Anon));
JITTargetAddress TargetAddress =
FixupAddress + Delta + *(const little32_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -417,157 +434,15 @@ private:
}
};
-class PerGraphGOTAndPLTStubsBuilder_MachO_x86_64
- : public PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_MachO_x86_64> {
-public:
-
- using PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_MachO_x86_64>::
- PerGraphGOTAndPLTStubsBuilder;
-
- bool isGOTEdgeToFix(Edge &E) const {
- return E.getKind() == x86_64::RequestGOTAndTransformToDelta32 ||
- E.getKind() ==
- x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
- }
-
- Symbol &createGOTEntry(Symbol &Target) {
- return x86_64::createAnonymousPointer(G, getGOTSection(), &Target);
- }
-
- void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- // Fix the edge kind.
- switch (E.getKind()) {
- case x86_64::RequestGOTAndTransformToDelta32:
- E.setKind(x86_64::Delta32);
- break;
- case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
- E.setKind(x86_64::PCRel32GOTLoadRelaxable);
- break;
- default:
- llvm_unreachable("Not a GOT transform edge");
- }
- // Fix the target, leave the addend as-is.
- E.setTarget(GOTEntry);
- }
-
- bool isExternalBranchEdge(Edge &E) {
- return E.getKind() == x86_64::BranchPCRel32 && E.getTarget().isExternal();
- }
-
- Symbol &createPLTStub(Symbol &Target) {
- return x86_64::createAnonymousPointerJumpStub(G, getStubsSection(),
- getGOTEntry(Target));
- }
-
- void fixPLTEdge(Edge &E, Symbol &Stub) {
- assert(E.getKind() == x86_64::BranchPCRel32 && "Not a Branch32 edge?");
- assert(E.getAddend() == 0 &&
- "BranchPCRel32 edge has unexpected addend value");
-
- // Set the edge kind to BranchPCRel32ToPtrJumpStubRelaxable. We will use
- // this to check for stub optimization opportunities in the
- // optimizeMachO_x86_64_GOTAndStubs pass below.
- E.setKind(x86_64::BranchPCRel32ToPtrJumpStubRelaxable);
- E.setTarget(Stub);
- }
-
-private:
- Section &getGOTSection() {
- if (!GOTSection)
- GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
- return *GOTSection;
- }
-
- Section &getStubsSection() {
- if (!StubsSection) {
- auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", StubsProt);
- }
- return *StubsSection;
- }
-
- Section *GOTSection = nullptr;
- Section *StubsSection = nullptr;
-};
-
-} // namespace
-
-static Error optimizeMachO_x86_64_GOTAndStubs(LinkGraph &G) {
- LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
-
- for (auto *B : G.blocks())
- for (auto &E : B->edges())
- if (E.getKind() == x86_64::PCRel32GOTLoadRelaxable) {
- assert(E.getOffset() >= 3 && "GOT edge occurs too early in block");
-
- // Optimize GOT references.
- auto &GOTBlock = E.getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT entry block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT entry should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- // Check that this is a recognized MOV instruction.
- // FIXME: Can we assume this?
- constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
- if (strncmp(B->getContent().data() + E.getOffset() - 3,
- reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
- continue;
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- E.setTarget(GOTTarget);
- E.setKind(x86_64::Delta32);
- E.setAddend(E.getAddend() - 4);
- char *BlockData = B->getMutableContent(G).data();
- BlockData[E.getOffset() - 2] = (char)0x8d;
- LLVM_DEBUG({
- dbgs() << " Replaced GOT load wih LEA:\n ";
- printEdge(dbgs(), *B, E, x86_64::getEdgeKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubRelaxable) {
- auto &StubBlock = E.getTarget().getBlock();
- assert(StubBlock.getSize() == sizeof(x86_64::PointerJumpStubContent) &&
- "Stub block should be stub sized");
- assert(StubBlock.edges_size() == 1 &&
- "Stub block should only have one outgoing edge");
-
- auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT block should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- E.setKind(x86_64::BranchPCRel32);
- E.setTarget(GOTTarget);
- LLVM_DEBUG({
- dbgs() << " Replaced stub branch with direct branch:\n ";
- printEdge(dbgs(), *B, E, x86_64::getEdgeKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- }
-
+Error buildGOTAndStubs_MachO_x86_64(LinkGraph &G) {
+ x86_64::GOTTableManager GOT;
+ x86_64::PLTTableManager PLT(GOT);
+ visitExistingEdges(G, GOT, PLT);
return Error::success();
}
+} // namespace
+
namespace llvm {
namespace jitlink {
@@ -582,7 +457,7 @@ public:
private:
Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
- return x86_64::applyFixup(G, B, E);
+ return x86_64::applyFixup(G, B, E, nullptr);
}
};
@@ -604,6 +479,10 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_x86_64());
Config.PrePrunePasses.push_back(createEHFrameEdgeFixerPass_MachO_x86_64());
+ // Add compact unwind splitter pass.
+ Config.PrePrunePasses.push_back(
+ CompactUnwindSplitter("__LD,__compact_unwind"));
+
// Add a mark-live pass.
if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
Config.PrePrunePasses.push_back(std::move(MarkLive));
@@ -611,11 +490,10 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(markAllSymbolsLive);
// Add an in-place GOT/Stubs pass.
- Config.PostPrunePasses.push_back(
- PerGraphGOTAndPLTStubsBuilder_MachO_x86_64::asPass);
+ Config.PostPrunePasses.push_back(buildGOTAndStubs_MachO_x86_64);
// Add GOT/Stubs optimizer pass.
- Config.PreFixupPasses.push_back(optimizeMachO_x86_64_GOTAndStubs);
+ Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
diff --git a/llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp b/llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp
new file mode 100644
index 000000000000..b73a310b2910
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp
@@ -0,0 +1,33 @@
+//===------------- MemoryFlags.cpp - Memory allocation flags --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+raw_ostream &operator<<(raw_ostream &OS, MemProt MP) {
+ return OS << (((MP & MemProt::Read) != MemProt::None) ? 'R' : '-')
+ << (((MP & MemProt::Write) != MemProt::None) ? 'W' : '-')
+ << (((MP & MemProt::Exec) != MemProt::None) ? 'X' : '-');
+}
+
+raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP) {
+ return OS << (MDP == MemDeallocPolicy::Standard ? "standard" : "finalize");
+}
+
+raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG) {
+ return OS << '(' << AG.getMemProt() << ", " << AG.getMemDeallocPolicy()
+ << ')';
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
new file mode 100644
index 000000000000..6dccc4811885
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
@@ -0,0 +1,30 @@
+//===---- aarch64.cpp - Generic JITLink aarch64 edge kinds, utilities -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing aarch64 objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch64 {
+
+const char *getEdgeKindName(Edge::Kind K) {
+ switch (K) {
+ case R_AARCH64_CALL26:
+ return "R_AARCH64_CALL26";
+ }
+ return getGenericEdgeKindName(K);
+}
+} // namespace aarch64
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
index c951ed6d95be..48521280059d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
@@ -24,6 +24,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "Pointer64";
case Pointer32:
return "Pointer32";
+ case Pointer32Signed:
+ return "Pointer32Signed";
case Delta64:
return "Delta64";
case Delta32:
@@ -32,22 +34,32 @@ const char *getEdgeKindName(Edge::Kind K) {
return "NegDelta64";
case NegDelta32:
return "NegDelta32";
+ case Delta64FromGOT:
+ return "Delta64FromGOT";
case BranchPCRel32:
return "BranchPCRel32";
case BranchPCRel32ToPtrJumpStub:
return "BranchPCRel32ToPtrJumpStub";
- case BranchPCRel32ToPtrJumpStubRelaxable:
- return "BranchPCRel32ToPtrJumpStubRelaxable";
+ case BranchPCRel32ToPtrJumpStubBypassable:
+ return "BranchPCRel32ToPtrJumpStubBypassable";
case RequestGOTAndTransformToDelta32:
return "RequestGOTAndTransformToDelta32";
+ case RequestGOTAndTransformToDelta64:
+ return "RequestGOTAndTransformToDelta64";
+ case RequestGOTAndTransformToDelta64FromGOT:
+ return "RequestGOTAndTransformToDelta64FromGOT";
+ case PCRel32GOTLoadREXRelaxable:
+ return "PCRel32GOTLoadREXRelaxable";
+ case RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable:
+ return "RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable";
case PCRel32GOTLoadRelaxable:
return "PCRel32GOTLoadRelaxable";
case RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
return "RequestGOTAndTransformToPCRel32GOTLoadRelaxable";
- case PCRel32TLVPLoadRelaxable:
- return "PCRel32TLVPLoadRelaxable";
- case RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable:
- return "RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable";
+ case PCRel32TLVPLoadREXRelaxable:
+ return "PCRel32TLVPLoadREXRelaxable";
+ case RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable:
+ return "RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable";
default:
return getGenericEdgeKindName(static_cast<Edge::Kind>(K));
}
@@ -59,6 +71,119 @@ const char NullPointerContent[PointerSize] = {0x00, 0x00, 0x00, 0x00,
const char PointerJumpStubContent[6] = {
static_cast<char>(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00};
+Error optimizeGOTAndStubAccesses(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
+
+ for (auto *B : G.blocks())
+ for (auto &E : B->edges()) {
+ if (E.getKind() == x86_64::PCRel32GOTLoadRelaxable ||
+ E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable) {
+#ifndef NDEBUG
+ bool REXPrefix = E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable;
+ assert(E.getOffset() >= (REXPrefix ? 3u : 2u) &&
+ "GOT edge occurs too early in block");
+#endif
+ auto *FixupData = reinterpret_cast<uint8_t *>(
+ const_cast<char *>(B->getContent().data())) +
+ E.getOffset();
+ const uint8_t Op = FixupData[-2];
+ const uint8_t ModRM = FixupData[-1];
+
+ auto &GOTEntryBlock = E.getTarget().getBlock();
+ assert(GOTEntryBlock.getSize() == G.getPointerSize() &&
+ "GOT entry block should be pointer sized");
+ assert(GOTEntryBlock.edges_size() == 1 &&
+ "GOT entry should only have one outgoing edge");
+ auto &GOTTarget = GOTEntryBlock.edges().begin()->getTarget();
+ JITTargetAddress TargetAddr = GOTTarget.getAddress();
+ JITTargetAddress EdgeAddr = B->getFixupAddress(E);
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr);
+ bool DisplacementInRangeForImmS32 = isInRangeForImmS32(Displacement);
+
+ // If both of the Target and displacement is out of range, then
+ // there isn't optimization chance.
+ if (!(TargetInRangeForImmU32 || DisplacementInRangeForImmS32))
+ continue;
+
+ // Transform "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
+ if (Op == 0x8b && DisplacementInRangeForImmS32) {
+ FixupData[-2] = 0x8d;
+ E.setKind(x86_64::Delta32);
+ E.setTarget(GOTTarget);
+ E.setAddend(E.getAddend() - 4);
+ LLVM_DEBUG({
+ dbgs() << " Replaced GOT load wih LEA:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ continue;
+ }
+
+ // Transform call/jmp instructions
+ if (Op == 0xff && TargetInRangeForImmU32) {
+ if (ModRM == 0x15) {
+ // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call
+ // foo" But lld convert it to "addr32 call foo, because that makes
+ // result expression to be a single instruction.
+ FixupData[-2] = 0x67;
+ FixupData[-1] = 0xe8;
+ LLVM_DEBUG({
+ dbgs() << " replaced call instruction's memory operand wih imm "
+ "operand:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ } else {
+ // Transform "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop"
+ assert(ModRM == 0x25 && "Invalid ModRm for call/jmp instructions");
+ FixupData[-2] = 0xe9;
+ FixupData[3] = 0x90;
+ E.setOffset(E.getOffset() - 1);
+ LLVM_DEBUG({
+ dbgs() << " replaced jmp instruction's memory operand wih imm "
+ "operand:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ E.setKind(x86_64::Pointer32);
+ E.setTarget(GOTTarget);
+ continue;
+ }
+ } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubBypassable) {
+ auto &StubBlock = E.getTarget().getBlock();
+ assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) &&
+ "Stub block should be stub sized");
+ assert(StubBlock.edges_size() == 1 &&
+ "Stub block should only have one outgoing edge");
+
+ auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
+ assert(GOTBlock.getSize() == G.getPointerSize() &&
+ "GOT block should be pointer sized");
+ assert(GOTBlock.edges_size() == 1 &&
+ "GOT block should only have one outgoing edge");
+
+ auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+ JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+ JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ if (isInRangeForImmS32(Displacement)) {
+ E.setKind(x86_64::BranchPCRel32);
+ E.setTarget(GOTTarget);
+ LLVM_DEBUG({
+ dbgs() << " Replaced stub branch with direct branch:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ }
+ }
+
+ return Error::success();
+}
+
} // end namespace x86_64
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 144329aa8bea..200f42aec067 100644
--- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -218,8 +218,7 @@ void MCJIT::generateCodeForModule(Module *M) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(LoadedObject.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
std::unique_ptr<RuntimeDyld::LoadedObjectInfo> L =
Dyld.loadObject(*LoadedObject.get());
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 5b73c0e2fbc8..9ff6cec8c6c5 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -184,6 +184,8 @@ void CompileOnDemandLayer::emit(
CompileOnDemandLayer::PerDylibResources &
CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) {
+ std::lock_guard<std::mutex> Lock(CODLayerMutex);
+
auto I = DylibResources.find(&TargetD);
if (I == DylibResources.end()) {
auto &ImplD =
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 12a501f7f98c..64e5090e4c53 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -29,7 +29,6 @@ char SymbolsNotFound::ID = 0;
char SymbolsCouldNotBeRemoved::ID = 0;
char MissingSymbolDefinitions::ID = 0;
char UnexpectedSymbolDefinitions::ID = 0;
-char Task::ID = 0;
char MaterializationTask::ID = 0;
RegisterDependenciesFunction NoDependenciesToRegister =
@@ -90,14 +89,17 @@ void FailedToMaterialize::log(raw_ostream &OS) const {
OS << "Failed to materialize symbols: " << *Symbols;
}
-SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) {
+SymbolsNotFound::SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameSet Symbols)
+ : SSP(std::move(SSP)) {
for (auto &Sym : Symbols)
this->Symbols.push_back(Sym);
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
-SymbolsNotFound::SymbolsNotFound(SymbolNameVector Symbols)
- : Symbols(std::move(Symbols)) {
+SymbolsNotFound::SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameVector Symbols)
+ : SSP(std::move(SSP)), Symbols(std::move(Symbols)) {
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
@@ -109,8 +111,9 @@ void SymbolsNotFound::log(raw_ostream &OS) const {
OS << "Symbols not found: " << Symbols;
}
-SymbolsCouldNotBeRemoved::SymbolsCouldNotBeRemoved(SymbolNameSet Symbols)
- : Symbols(std::move(Symbols)) {
+SymbolsCouldNotBeRemoved::SymbolsCouldNotBeRemoved(
+ std::shared_ptr<SymbolStringPool> SSP, SymbolNameSet Symbols)
+ : SSP(std::move(SSP)), Symbols(std::move(Symbols)) {
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
@@ -1333,11 +1336,13 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
// If any of the symbols are not defined, return an error.
if (!Missing.empty())
- return make_error<SymbolsNotFound>(std::move(Missing));
+ return make_error<SymbolsNotFound>(ES.getSymbolStringPool(),
+ std::move(Missing));
// If any of the symbols are currently materializing, return an error.
if (!Materializing.empty())
- return make_error<SymbolsCouldNotBeRemoved>(std::move(Materializing));
+ return make_error<SymbolsCouldNotBeRemoved>(ES.getSymbolStringPool(),
+ std::move(Materializing));
// Remove the symbols.
for (auto &SymbolMaterializerItrPair : SymbolsToRemove) {
@@ -1793,8 +1798,6 @@ void Platform::lookupInitSymbolsAsync(
}
}
-void Task::anchor() {}
-
void MaterializationTask::printDescription(raw_ostream &OS) {
OS << "Materialization task: " << MU->getName() << " in "
<< MR->getTargetJITDylib().getName();
@@ -2086,8 +2089,8 @@ Error ExecutionSession::registerJITDispatchHandlers(
}
void ExecutionSession::runJITDispatchHandler(
- ExecutorProcessControl::SendResultFunction SendResult,
- JITTargetAddress HandlerFnTagAddr, ArrayRef<char> ArgBuffer) {
+ SendResultFunction SendResult, JITTargetAddress HandlerFnTagAddr,
+ ArrayRef<char> ArgBuffer) {
std::shared_ptr<JITDispatchHandlerFunction> F;
{
@@ -2234,7 +2237,8 @@ Error ExecutionSession::IL_updateCandidatesFor(
// weakly referenced" specific error here to reduce confusion.
if (SymI->second.getFlags().hasMaterializationSideEffectsOnly() &&
SymLookupFlags != SymbolLookupFlags::WeaklyReferencedSymbol)
- return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ SymbolNameVector({Name}));
// If we matched against this symbol but it is in the error state
// then bail out and treat it as a failure to materialize.
@@ -2422,7 +2426,7 @@ void ExecutionSession::OL_applyQueryPhase1(
} else {
LLVM_DEBUG(dbgs() << "Phase 1 failed with unresolved symbols.\n");
IPLS->fail(make_error<SymbolsNotFound>(
- IPLS->DefGeneratorCandidates.getSymbolNames()));
+ getSymbolStringPool(), IPLS->DefGeneratorCandidates.getSymbolNames()));
}
}
@@ -2492,7 +2496,8 @@ void ExecutionSession::OL_completeLookup(
dbgs() << "error: "
"required, but symbol is has-side-effects-only\n";
});
- return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ SymbolNameVector({Name}));
}
// If we matched against this symbol but it is in the error state
@@ -2594,7 +2599,7 @@ void ExecutionSession::OL_completeLookup(
}
}
- LLVM_DEBUG(dbgs() << "Stripping unmatched weakly-refererced symbols\n");
+ LLVM_DEBUG(dbgs() << "Stripping unmatched weakly-referenced symbols\n");
IPLS->LookupSet.forEachWithRemoval(
[&](const SymbolStringPtr &Name, SymbolLookupFlags SymLookupFlags) {
if (SymLookupFlags == SymbolLookupFlags::WeaklyReferencedSymbol) {
@@ -2606,7 +2611,8 @@ void ExecutionSession::OL_completeLookup(
if (!IPLS->LookupSet.empty()) {
LLVM_DEBUG(dbgs() << "Failing due to unresolved symbols\n");
- return make_error<SymbolsNotFound>(IPLS->LookupSet.getSymbolNames());
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ IPLS->LookupSet.getSymbolNames());
}
// Record whether the query completed.
@@ -2733,7 +2739,8 @@ void ExecutionSession::OL_completeLookupFlags(
if (!IPLS->LookupSet.empty()) {
LLVM_DEBUG(dbgs() << "Failing due to unresolved symbols\n");
- return make_error<SymbolsNotFound>(IPLS->LookupSet.getSymbolNames());
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ IPLS->LookupSet.getSymbolNames());
}
LLVM_DEBUG(dbgs() << "Succeded, result = " << Result << "\n");
@@ -2911,6 +2918,7 @@ void ExecutionSession::dumpDispatchInfo(Task &T) {
runSessionLocked([&]() {
dbgs() << "Dispatching: ";
T.printDescription(dbgs());
+ dbgs() << "\n";
});
}
#endif // NDEBUG
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
index 36efc744bf30..fcfe389f82a8 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
@@ -1,10 +1,15 @@
-//===---- DebugObjectManagerPlugin.h - JITLink debug objects ---*- C++ -*-===//
+//===------- DebugObjectManagerPlugin.cpp - JITLink debug objects ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+//
+// FIXME: Update Plugin to poke the debug object into a new JITLink section,
+// rather than creating a new allocation.
+//
+//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
@@ -108,70 +113,77 @@ void ELFDebugObjectSection<ELFT>::dump(raw_ostream &OS, StringRef Name) {
}
}
-static constexpr sys::Memory::ProtectionFlags ReadOnly =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ);
-
enum class Requirement {
// Request final target memory load-addresses for all sections.
ReportFinalSectionLoadAddresses,
};
-/// The plugin creates a debug object from JITLinkContext when JITLink starts
-/// processing the corresponding LinkGraph. It provides access to the pass
-/// configuration of the LinkGraph and calls the finalization function, once
-/// the resulting link artifact was emitted.
+/// The plugin creates a debug object from when JITLink starts processing the
+/// corresponding LinkGraph. It provides access to the pass configuration of
+/// the LinkGraph and calls the finalization function, once the resulting link
+/// artifact was emitted.
///
class DebugObject {
public:
- DebugObject(JITLinkContext &Ctx, ExecutionSession &ES) : Ctx(Ctx), ES(ES) {}
+ DebugObject(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ ExecutionSession &ES)
+ : MemMgr(MemMgr), JD(JD), ES(ES) {}
void set(Requirement Req) { Reqs.insert(Req); }
bool has(Requirement Req) const { return Reqs.count(Req) > 0; }
- using FinalizeContinuation = std::function<void(Expected<sys::MemoryBlock>)>;
+ using FinalizeContinuation = std::function<void(Expected<ExecutorAddrRange>)>;
+
void finalizeAsync(FinalizeContinuation OnFinalize);
virtual ~DebugObject() {
- if (Alloc)
- if (Error Err = Alloc->deallocate())
+ if (Alloc) {
+ std::vector<FinalizedAlloc> Allocs;
+ Allocs.push_back(std::move(Alloc));
+ if (Error Err = MemMgr.deallocate(std::move(Allocs)))
ES.reportError(std::move(Err));
+ }
}
virtual void reportSectionTargetMemoryRange(StringRef Name,
SectionRange TargetMem) {}
protected:
- using Allocation = JITLinkMemoryManager::Allocation;
+ using InFlightAlloc = JITLinkMemoryManager::InFlightAlloc;
+ using FinalizedAlloc = JITLinkMemoryManager::FinalizedAlloc;
- virtual Expected<std::unique_ptr<Allocation>>
- finalizeWorkingMemory(JITLinkContext &Ctx) = 0;
+ virtual Expected<SimpleSegmentAlloc> finalizeWorkingMemory() = 0;
+
+ JITLinkMemoryManager &MemMgr;
+ const JITLinkDylib *JD = nullptr;
private:
- JITLinkContext &Ctx;
ExecutionSession &ES;
std::set<Requirement> Reqs;
- std::unique_ptr<Allocation> Alloc{nullptr};
+ FinalizedAlloc Alloc;
};
// Finalize working memory and take ownership of the resulting allocation. Start
// copying memory over to the target and pass on the result once we're done.
// Ownership of the allocation remains with us for the rest of our lifetime.
void DebugObject::finalizeAsync(FinalizeContinuation OnFinalize) {
- assert(Alloc == nullptr && "Cannot finalize more than once");
-
- auto AllocOrErr = finalizeWorkingMemory(Ctx);
- if (!AllocOrErr)
- OnFinalize(AllocOrErr.takeError());
- Alloc = std::move(*AllocOrErr);
-
- Alloc->finalizeAsync([this, OnFinalize](Error Err) {
- if (Err)
- OnFinalize(std::move(Err));
- else
- OnFinalize(sys::MemoryBlock(
- jitTargetAddressToPointer<void *>(Alloc->getTargetMemory(ReadOnly)),
- Alloc->getWorkingMemory(ReadOnly).size()));
- });
+ assert(!Alloc && "Cannot finalize more than once");
+
+ if (auto SimpleSegAlloc = finalizeWorkingMemory()) {
+ auto ROSeg = SimpleSegAlloc->getSegInfo(MemProt::Read);
+ ExecutorAddrRange DebugObjRange(ExecutorAddr(ROSeg.Addr),
+ ExecutorAddrDiff(ROSeg.WorkingMem.size()));
+ SimpleSegAlloc->finalize(
+ [this, DebugObjRange,
+ OnFinalize = std::move(OnFinalize)](Expected<FinalizedAlloc> FA) {
+ if (FA) {
+ Alloc = std::move(*FA);
+ OnFinalize(DebugObjRange);
+ } else
+ OnFinalize(FA.takeError());
+ });
+ } else
+ OnFinalize(SimpleSegAlloc.takeError());
}
/// The current implementation of ELFDebugObject replicates the approach used in
@@ -190,8 +202,7 @@ public:
StringRef getBuffer() const { return Buffer->getMemBufferRef().getBuffer(); }
protected:
- Expected<std::unique_ptr<Allocation>>
- finalizeWorkingMemory(JITLinkContext &Ctx) override;
+ Expected<SimpleSegmentAlloc> finalizeWorkingMemory() override;
template <typename ELFT>
Error recordSection(StringRef Name,
@@ -201,15 +212,16 @@ protected:
private:
template <typename ELFT>
static Expected<std::unique_ptr<ELFDebugObject>>
- CreateArchType(MemoryBufferRef Buffer, JITLinkContext &Ctx,
- ExecutionSession &ES);
+ CreateArchType(MemoryBufferRef Buffer, JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD, ExecutionSession &ES);
static std::unique_ptr<WritableMemoryBuffer>
CopyBuffer(MemoryBufferRef Buffer, Error &Err);
ELFDebugObject(std::unique_ptr<WritableMemoryBuffer> Buffer,
- JITLinkContext &Ctx, ExecutionSession &ES)
- : DebugObject(Ctx, ES), Buffer(std::move(Buffer)) {
+ JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ ExecutionSession &ES)
+ : DebugObject(MemMgr, JD, ES), Buffer(std::move(Buffer)) {
set(Requirement::ReportFinalSectionLoadAddresses);
}
@@ -244,13 +256,14 @@ ELFDebugObject::CopyBuffer(MemoryBufferRef Buffer, Error &Err) {
template <typename ELFT>
Expected<std::unique_ptr<ELFDebugObject>>
-ELFDebugObject::CreateArchType(MemoryBufferRef Buffer, JITLinkContext &Ctx,
- ExecutionSession &ES) {
+ELFDebugObject::CreateArchType(MemoryBufferRef Buffer,
+ JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD, ExecutionSession &ES) {
using SectionHeader = typename ELFT::Shdr;
Error Err = Error::success();
std::unique_ptr<ELFDebugObject> DebugObj(
- new ELFDebugObject(CopyBuffer(Buffer, Err), Ctx, ES));
+ new ELFDebugObject(CopyBuffer(Buffer, Err), MemMgr, JD, ES));
if (Err)
return std::move(Err);
@@ -299,23 +312,26 @@ ELFDebugObject::Create(MemoryBufferRef Buffer, JITLinkContext &Ctx,
if (Class == ELF::ELFCLASS32) {
if (Endian == ELF::ELFDATA2LSB)
- return CreateArchType<ELF32LE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF32LE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
if (Endian == ELF::ELFDATA2MSB)
- return CreateArchType<ELF32BE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF32BE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
return nullptr;
}
if (Class == ELF::ELFCLASS64) {
if (Endian == ELF::ELFDATA2LSB)
- return CreateArchType<ELF64LE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF64LE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
if (Endian == ELF::ELFDATA2MSB)
- return CreateArchType<ELF64BE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF64BE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
return nullptr;
}
return nullptr;
}
-Expected<std::unique_ptr<DebugObject::Allocation>>
-ELFDebugObject::finalizeWorkingMemory(JITLinkContext &Ctx) {
+Expected<SimpleSegmentAlloc> ELFDebugObject::finalizeWorkingMemory() {
LLVM_DEBUG({
dbgs() << "Section load-addresses in debug object for \""
<< Buffer->getBufferIdentifier() << "\":\n";
@@ -324,28 +340,21 @@ ELFDebugObject::finalizeWorkingMemory(JITLinkContext &Ctx) {
});
// TODO: This works, but what actual alignment requirements do we have?
- unsigned Alignment = sys::Process::getPageSizeEstimate();
- JITLinkMemoryManager &MemMgr = Ctx.getMemoryManager();
- const JITLinkDylib *JD = Ctx.getJITLinkDylib();
+ unsigned PageSize = sys::Process::getPageSizeEstimate();
size_t Size = Buffer->getBufferSize();
// Allocate working memory for debug object in read-only segment.
- JITLinkMemoryManager::SegmentsRequestMap SingleReadOnlySegment;
- SingleReadOnlySegment[ReadOnly] =
- JITLinkMemoryManager::SegmentRequest(Alignment, Size, 0);
-
- auto AllocOrErr = MemMgr.allocate(JD, SingleReadOnlySegment);
- if (!AllocOrErr)
- return AllocOrErr.takeError();
+ auto Alloc = SimpleSegmentAlloc::Create(
+ MemMgr, JD, {{MemProt::Read, {Size, Align(PageSize)}}});
+ if (!Alloc)
+ return Alloc;
// Initialize working memory with a copy of our object buffer.
- // TODO: Use our buffer as working memory directly.
- std::unique_ptr<Allocation> Alloc = std::move(*AllocOrErr);
- MutableArrayRef<char> WorkingMem = Alloc->getWorkingMemory(ReadOnly);
- memcpy(WorkingMem.data(), Buffer->getBufferStart(), Size);
+ auto SegInfo = Alloc->getSegInfo(MemProt::Read);
+ memcpy(SegInfo.WorkingMem.data(), Buffer->getBufferStart(), Size);
Buffer.reset();
- return std::move(Alloc);
+ return Alloc;
}
void ELFDebugObject::reportSectionTargetMemoryRange(StringRef Name,
@@ -447,7 +456,7 @@ Error DebugObjectManagerPlugin::notifyEmitted(
std::future<MSVCPError> FinalizeErr = FinalizePromise.get_future();
It->second->finalizeAsync(
- [this, &FinalizePromise, &MR](Expected<sys::MemoryBlock> TargetMem) {
+ [this, &FinalizePromise, &MR](Expected<ExecutorAddrRange> TargetMem) {
// Any failure here will fail materialization.
if (!TargetMem) {
FinalizePromise.set_value(TargetMem.takeError());
diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
new file mode 100644
index 000000000000..8479495623b8
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
@@ -0,0 +1,450 @@
+//===------- DebuggerSupportPlugin.cpp - Utils for debugger support -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/BinaryFormat/MachO.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::orc;
+
+static const char *SynthDebugSectionName = "__jitlink_synth_debug_object";
+
+namespace {
+
+struct MachO64LE {
+ using UIntPtr = uint64_t;
+
+ using Header = MachO::mach_header_64;
+ using SegmentLC = MachO::segment_command_64;
+ using Section = MachO::section_64;
+ using NList = MachO::nlist_64;
+
+ static constexpr support::endianness Endianness = support::little;
+ static constexpr const uint32_t Magic = MachO::MH_MAGIC_64;
+ static constexpr const uint32_t SegmentCmd = MachO::LC_SEGMENT_64;
+};
+
+class MachODebugObjectSynthesizerBase
+ : public GDBJITDebugInfoRegistrationPlugin::DebugSectionSynthesizer {
+public:
+ static bool isDebugSection(Section &Sec) {
+ return Sec.getName().startswith("__DWARF,");
+ }
+
+ MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr)
+ : G(G), RegisterActionAddr(RegisterActionAddr) {}
+ virtual ~MachODebugObjectSynthesizerBase() {}
+
+ Error preserveDebugSections() {
+ if (G.findSectionByName(SynthDebugSectionName)) {
+ LLVM_DEBUG({
+ dbgs() << "MachODebugObjectSynthesizer skipping graph " << G.getName()
+ << " which contains an unexpected existing "
+ << SynthDebugSectionName << " section.\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "MachODebugObjectSynthesizer visiting graph " << G.getName()
+ << "\n";
+ });
+ for (auto &Sec : G.sections()) {
+ if (!isDebugSection(Sec))
+ continue;
+ // Preserve blocks in this debug section by marking one existing symbol
+ // live for each block, and introducing a new live, anonymous symbol for
+ // each currently unreferenced block.
+ LLVM_DEBUG({
+ dbgs() << " Preserving debug section " << Sec.getName() << "\n";
+ });
+ SmallSet<Block *, 8> PreservedBlocks;
+ for (auto *Sym : Sec.symbols()) {
+ bool NewPreservedBlock =
+ PreservedBlocks.insert(&Sym->getBlock()).second;
+ if (NewPreservedBlock)
+ Sym->setLive(true);
+ }
+ for (auto *B : Sec.blocks())
+ if (!PreservedBlocks.count(B))
+ G.addAnonymousSymbol(*B, 0, 0, false, true);
+ }
+ return Error::success();
+ }
+
+protected:
+ LinkGraph &G;
+ ExecutorAddr RegisterActionAddr;
+};
+
+template <typename MachOTraits>
+class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase {
+private:
+ class MachOStructWriter {
+ public:
+ MachOStructWriter(MutableArrayRef<char> Buffer) : Buffer(Buffer) {}
+
+ size_t getOffset() const { return Offset; }
+
+ template <typename MachOStruct> void write(MachOStruct S) {
+ assert(Offset + sizeof(S) <= Buffer.size() &&
+ "Container block overflow while constructing debug MachO");
+ if (MachOTraits::Endianness != support::endian::system_endianness())
+ MachO::swapStruct(S);
+ memcpy(Buffer.data() + Offset, &S, sizeof(S));
+ Offset += sizeof(S);
+ }
+
+ private:
+ MutableArrayRef<char> Buffer;
+ size_t Offset = 0;
+ };
+
+public:
+ using MachODebugObjectSynthesizerBase::MachODebugObjectSynthesizerBase;
+
+ Error startSynthesis() override {
+ LLVM_DEBUG({
+ dbgs() << "Creating " << SynthDebugSectionName << " for " << G.getName()
+ << "\n";
+ });
+ auto &SDOSec = G.createSection(SynthDebugSectionName, MemProt::Read);
+
+ struct DebugSectionInfo {
+ Section *Sec = nullptr;
+ StringRef SegName;
+ StringRef SecName;
+ JITTargetAddress Alignment = 0;
+ JITTargetAddress StartAddr = 0;
+ uint64_t Size = 0;
+ };
+
+ SmallVector<DebugSectionInfo, 12> DebugSecInfos;
+ size_t NumSections = 0;
+ for (auto &Sec : G.sections()) {
+ if (llvm::empty(Sec.blocks()))
+ continue;
+
+ ++NumSections;
+ if (isDebugSection(Sec)) {
+ size_t SepPos = Sec.getName().find(',');
+ if (SepPos > 16 || (Sec.getName().size() - (SepPos + 1) > 16)) {
+ LLVM_DEBUG({
+ dbgs() << "Skipping debug object synthesis for graph "
+ << G.getName()
+ << ": encountered non-standard DWARF section name \""
+ << Sec.getName() << "\"\n";
+ });
+ return Error::success();
+ }
+ DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos),
+ Sec.getName().substr(SepPos + 1), 0, 0});
+ } else
+ NonDebugSections.push_back(&Sec);
+ }
+
+ // Create container block.
+ size_t SectionsCmdSize =
+ sizeof(typename MachOTraits::Section) * NumSections;
+ size_t SegmentLCSize =
+ sizeof(typename MachOTraits::SegmentLC) + SectionsCmdSize;
+ size_t ContainerBlockSize =
+ sizeof(typename MachOTraits::Header) + SegmentLCSize;
+ auto ContainerBlockContent = G.allocateBuffer(ContainerBlockSize);
+ MachOContainerBlock =
+ &G.createMutableContentBlock(SDOSec, ContainerBlockContent, 0, 8, 0);
+
+ // Copy debug section blocks and symbols.
+ JITTargetAddress NextBlockAddr = MachOContainerBlock->getSize();
+ for (auto &SI : DebugSecInfos) {
+ assert(!llvm::empty(SI.Sec->blocks()) && "Empty debug info section?");
+
+ // Update addresses in debug section.
+ LLVM_DEBUG({
+ dbgs() << " Appending " << SI.Sec->getName() << " ("
+ << SI.Sec->blocks_size() << " block(s)) at "
+ << formatv("{0:x8}", NextBlockAddr) << "\n";
+ });
+ for (auto *B : SI.Sec->blocks()) {
+ NextBlockAddr = alignToBlock(NextBlockAddr, *B);
+ B->setAddress(NextBlockAddr);
+ NextBlockAddr += B->getSize();
+ }
+
+ auto &FirstBlock = **SI.Sec->blocks().begin();
+ if (FirstBlock.getAlignmentOffset() != 0)
+ return make_error<StringError>(
+ "First block in " + SI.Sec->getName() +
+ " section has non-zero alignment offset",
+ inconvertibleErrorCode());
+ if (FirstBlock.getAlignment() > std::numeric_limits<uint32_t>::max())
+ return make_error<StringError>("First block in " + SI.Sec->getName() +
+ " has alignment >4Gb",
+ inconvertibleErrorCode());
+
+ SI.Alignment = FirstBlock.getAlignment();
+ SI.StartAddr = FirstBlock.getAddress();
+ SI.Size = NextBlockAddr - SI.StartAddr;
+ G.mergeSections(SDOSec, *SI.Sec);
+ SI.Sec = nullptr;
+ }
+ size_t DebugSectionsSize = NextBlockAddr - MachOContainerBlock->getSize();
+
+ // Write MachO header and debug section load commands.
+ MachOStructWriter Writer(MachOContainerBlock->getAlreadyMutableContent());
+ typename MachOTraits::Header Hdr;
+ memset(&Hdr, 0, sizeof(Hdr));
+ Hdr.magic = MachOTraits::Magic;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ Hdr.cputype = MachO::CPU_TYPE_X86_64;
+ Hdr.cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL;
+ break;
+ case Triple::aarch64:
+ Hdr.cputype = MachO::CPU_TYPE_ARM64;
+ Hdr.cpusubtype = MachO::CPU_SUBTYPE_ARM64_ALL;
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+ Hdr.filetype = MachO::MH_OBJECT;
+ Hdr.ncmds = 1;
+ Hdr.sizeofcmds = SegmentLCSize;
+ Hdr.flags = 0;
+ Writer.write(Hdr);
+
+ typename MachOTraits::SegmentLC SegLC;
+ memset(&SegLC, 0, sizeof(SegLC));
+ SegLC.cmd = MachOTraits::SegmentCmd;
+ SegLC.cmdsize = SegmentLCSize;
+ SegLC.vmaddr = ContainerBlockSize;
+ SegLC.vmsize = DebugSectionsSize;
+ SegLC.fileoff = ContainerBlockSize;
+ SegLC.filesize = DebugSectionsSize;
+ SegLC.maxprot =
+ MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
+ SegLC.initprot =
+ MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
+ SegLC.nsects = NumSections;
+ SegLC.flags = 0;
+ Writer.write(SegLC);
+
+ StringSet<> ExistingLongNames;
+ for (auto &SI : DebugSecInfos) {
+ typename MachOTraits::Section Sec;
+ memset(&Sec, 0, sizeof(Sec));
+ memcpy(Sec.sectname, SI.SecName.data(), SI.SecName.size());
+ memcpy(Sec.segname, SI.SegName.data(), SI.SegName.size());
+ Sec.addr = SI.StartAddr;
+ Sec.size = SI.Size;
+ Sec.offset = SI.StartAddr;
+ Sec.align = SI.Alignment;
+ Sec.reloff = 0;
+ Sec.nreloc = 0;
+ Sec.flags = MachO::S_ATTR_DEBUG;
+ Writer.write(Sec);
+ }
+
+ // Set MachOContainerBlock to indicate success to
+ // completeSynthesisAndRegister.
+ NonDebugSectionsStart = Writer.getOffset();
+ return Error::success();
+ }
+
+ Error completeSynthesisAndRegister() override {
+ if (!MachOContainerBlock) {
+ LLVM_DEBUG({
+ dbgs() << "Not writing MachO debug object header for " << G.getName()
+ << " since createDebugSection failed\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Writing MachO debug object header for " << G.getName() << "\n";
+ });
+
+ MachOStructWriter Writer(
+ MachOContainerBlock->getAlreadyMutableContent().drop_front(
+ NonDebugSectionsStart));
+
+ unsigned LongSectionNameIdx = 0;
+ for (auto *Sec : NonDebugSections) {
+ size_t SepPos = Sec->getName().find(',');
+ StringRef SegName, SecName;
+ std::string CustomSecName;
+
+ if ((SepPos == StringRef::npos && Sec->getName().size() <= 16)) {
+ // No embedded segment name, short section name.
+ SegName = "__JITLINK_CUSTOM";
+ SecName = Sec->getName();
+ } else if (SepPos < 16 && (Sec->getName().size() - (SepPos + 1) <= 16)) {
+ // Canonical embedded segment and section name.
+ SegName = Sec->getName().substr(0, SepPos);
+ SecName = Sec->getName().substr(SepPos + 1);
+ } else {
+ // Long section name that needs to be truncated.
+ assert(Sec->getName().size() > 16 &&
+ "Short section name should have been handled above");
+ SegName = "__JITLINK_CUSTOM";
+ auto IdxStr = std::to_string(++LongSectionNameIdx);
+ CustomSecName = Sec->getName().substr(0, 15 - IdxStr.size()).str();
+ CustomSecName += ".";
+ CustomSecName += IdxStr;
+ SecName = StringRef(CustomSecName.data(), 16);
+ }
+
+ SectionRange R(*Sec);
+ if (R.getFirstBlock()->getAlignmentOffset() != 0)
+ return make_error<StringError>(
+ "While building MachO debug object for " + G.getName() +
+ " first block has non-zero alignment offset",
+ inconvertibleErrorCode());
+
+ typename MachOTraits::Section SecCmd;
+ memset(&SecCmd, 0, sizeof(SecCmd));
+ memcpy(SecCmd.sectname, SecName.data(), SecName.size());
+ memcpy(SecCmd.segname, SegName.data(), SegName.size());
+ SecCmd.addr = R.getStart();
+ SecCmd.size = R.getSize();
+ SecCmd.offset = 0;
+ SecCmd.align = R.getFirstBlock()->getAlignment();
+ SecCmd.reloff = 0;
+ SecCmd.nreloc = 0;
+ SecCmd.flags = 0;
+ Writer.write(SecCmd);
+ }
+
+ SectionRange R(MachOContainerBlock->getSection());
+ G.allocActions().push_back(
+ {{RegisterActionAddr.getValue(), R.getStart(), R.getSize()}, {}});
+ return Error::success();
+ }
+
+private:
+ Block *MachOContainerBlock = nullptr;
+ SmallVector<Section *, 16> NonDebugSections;
+ size_t NonDebugSectionsStart = 0;
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
+GDBJITDebugInfoRegistrationPlugin::Create(ExecutionSession &ES,
+ JITDylib &ProcessJD,
+ const Triple &TT) {
+ auto RegisterActionAddr =
+ TT.isOSBinFormatMachO()
+ ? ES.intern("_llvm_orc_registerJITLoaderGDBAllocAction")
+ : ES.intern("llvm_orc_registerJITLoaderGDBAllocAction");
+
+ if (auto Addr = ES.lookup({&ProcessJD}, RegisterActionAddr))
+ return std::make_unique<GDBJITDebugInfoRegistrationPlugin>(
+ ExecutorAddr(Addr->getAddress()));
+ else
+ return Addr.takeError();
+}
+
+Error GDBJITDebugInfoRegistrationPlugin::notifyFailed(
+ MaterializationResponsibility &MR) {
+ return Error::success();
+}
+
+Error GDBJITDebugInfoRegistrationPlugin::notifyRemovingResources(
+ ResourceKey K) {
+ return Error::success();
+}
+
+void GDBJITDebugInfoRegistrationPlugin::notifyTransferringResources(
+ ResourceKey DstKey, ResourceKey SrcKey) {}
+
+void GDBJITDebugInfoRegistrationPlugin::modifyPassConfig(
+ MaterializationResponsibility &MR, LinkGraph &LG,
+ PassConfiguration &PassConfig) {
+
+ if (LG.getTargetTriple().getObjectFormat() == Triple::MachO)
+ modifyPassConfigForMachO(MR, LG, PassConfig);
+ else {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unspported graph "
+ << LG.getName() << "(triple = " << LG.getTargetTriple().str()
+ << "\n";
+ });
+ }
+}
+
+void GDBJITDebugInfoRegistrationPlugin::modifyPassConfigForMachO(
+ MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig) {
+
+ switch (LG.getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ case Triple::aarch64:
+ // Supported, continue.
+ assert(LG.getPointerSize() == 8 && "Graph has incorrect pointer size");
+ assert(LG.getEndianness() == support::little &&
+ "Graph has incorrect endianness");
+ break;
+ default:
+ // Unsupported.
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unsupported "
+ << "MachO graph " << LG.getName()
+ << "(triple = " << LG.getTargetTriple().str()
+ << ", pointer size = " << LG.getPointerSize() << ", endianness = "
+ << (LG.getEndianness() == support::big ? "big" : "little")
+ << ")\n";
+ });
+ return;
+ }
+
+ // Scan for debug sections. If we find one then install passes.
+ bool HasDebugSections = false;
+ for (auto &Sec : LG.sections())
+ if (MachODebugObjectSynthesizerBase::isDebugSection(Sec)) {
+ HasDebugSections = true;
+ break;
+ }
+
+ if (HasDebugSections) {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
+ << " contains debug info. Installing debugger support passes.\n";
+ });
+
+ auto MDOS = std::make_shared<MachODebugObjectSynthesizer<MachO64LE>>(
+ LG, RegisterActionAddr);
+ PassConfig.PrePrunePasses.push_back(
+ [=](LinkGraph &G) { return MDOS->preserveDebugSections(); });
+ PassConfig.PostPrunePasses.push_back(
+ [=](LinkGraph &G) { return MDOS->startSynthesis(); });
+ PassConfig.PreFixupPasses.push_back(
+ [=](LinkGraph &G) { return MDOS->completeSynthesisAndRegister(); });
+ } else {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
+ << " contains no debug info. Skipping.\n";
+ });
+ }
+}
+
+} // namespace orc
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
new file mode 100644
index 000000000000..b17d196f01b6
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -0,0 +1,818 @@
+//===------ ELFNixPlatform.cpp - Utilities for executing MachO in Orc -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::orc::shared;
+
+namespace {
+
+class DSOHandleMaterializationUnit : public MaterializationUnit {
+public:
+ DSOHandleMaterializationUnit(ELFNixPlatform &ENP,
+ const SymbolStringPtr &DSOHandleSymbol)
+ : MaterializationUnit(createDSOHandleSectionSymbols(ENP, DSOHandleSymbol),
+ DSOHandleSymbol),
+ ENP(ENP) {}
+
+ StringRef getName() const override { return "DSOHandleMU"; }
+
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
+ unsigned PointerSize;
+ support::endianness Endianness;
+ jitlink::Edge::Kind EdgeKind;
+ const auto &TT =
+ ENP.getExecutionSession().getExecutorProcessControl().getTargetTriple();
+
+ switch (TT.getArch()) {
+ case Triple::x86_64:
+ PointerSize = 8;
+ Endianness = support::endianness::little;
+ EdgeKind = jitlink::x86_64::Pointer64;
+ break;
+ default:
+ llvm_unreachable("Unrecognized architecture");
+ }
+
+ // void *__dso_handle = &__dso_handle;
+ auto G = std::make_unique<jitlink::LinkGraph>(
+ "<DSOHandleMU>", TT, PointerSize, Endianness,
+ jitlink::getGenericEdgeKindName);
+ auto &DSOHandleSection =
+ G->createSection(".data.__dso_handle", jitlink::MemProt::Read);
+ auto &DSOHandleBlock = G->createContentBlock(
+ DSOHandleSection, getDSOHandleContent(PointerSize), 0, 8, 0);
+ auto &DSOHandleSymbol = G->addDefinedSymbol(
+ DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(),
+ jitlink::Linkage::Strong, jitlink::Scope::Default, false, true);
+ DSOHandleBlock.addEdge(EdgeKind, 0, DSOHandleSymbol, 0);
+
+ ENP.getObjectLinkingLayer().emit(std::move(R), std::move(G));
+ }
+
+ void discard(const JITDylib &JD, const SymbolStringPtr &Sym) override {}
+
+private:
+ static SymbolFlagsMap
+ createDSOHandleSectionSymbols(ELFNixPlatform &ENP,
+ const SymbolStringPtr &DSOHandleSymbol) {
+ SymbolFlagsMap SymbolFlags;
+ SymbolFlags[DSOHandleSymbol] = JITSymbolFlags::Exported;
+ return SymbolFlags;
+ }
+
+ ArrayRef<char> getDSOHandleContent(size_t PointerSize) {
+ static const char Content[8] = {0};
+ assert(PointerSize <= sizeof Content);
+ return {Content, PointerSize};
+ }
+
+ ELFNixPlatform &ENP;
+};
+
+StringRef EHFrameSectionName = ".eh_frame";
+StringRef InitArrayFuncSectionName = ".init_array";
+
+StringRef ThreadBSSSectionName = ".tbss";
+StringRef ThreadDataSectionName = ".tdata";
+
+StringRef InitSectionNames[] = {InitArrayFuncSectionName};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<ELFNixPlatform>>
+ELFNixPlatform::Create(ExecutionSession &ES,
+ ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ Optional<SymbolAliasMap> RuntimeAliases) {
+
+ auto &EPC = ES.getExecutorProcessControl();
+
+ // If the target is not supported then bail out immediately.
+ if (!supportedTarget(EPC.getTargetTriple()))
+ return make_error<StringError>("Unsupported ELFNixPlatform triple: " +
+ EPC.getTargetTriple().str(),
+ inconvertibleErrorCode());
+
+ // Create default aliases if the caller didn't supply any.
+ if (!RuntimeAliases)
+ RuntimeAliases = standardPlatformAliases(ES);
+
+ // Define the aliases.
+ if (auto Err = PlatformJD.define(symbolAliases(std::move(*RuntimeAliases))))
+ return std::move(Err);
+
+ // Add JIT-dispatch function support symbols.
+ if (auto Err = PlatformJD.define(absoluteSymbols(
+ {{ES.intern("__orc_rt_jit_dispatch"),
+ {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
+ JITSymbolFlags::Exported}},
+ {ES.intern("__orc_rt_jit_dispatch_ctx"),
+ {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
+ JITSymbolFlags::Exported}}})))
+ return std::move(Err);
+
+ // Create a generator for the ORC runtime archive.
+ auto OrcRuntimeArchiveGenerator = StaticLibraryDefinitionGenerator::Load(
+ ObjLinkingLayer, OrcRuntimePath, EPC.getTargetTriple());
+ if (!OrcRuntimeArchiveGenerator)
+ return OrcRuntimeArchiveGenerator.takeError();
+
+ // Create the instance.
+ Error Err = Error::success();
+ auto P = std::unique_ptr<ELFNixPlatform>(
+ new ELFNixPlatform(ES, ObjLinkingLayer, PlatformJD,
+ std::move(*OrcRuntimeArchiveGenerator), Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(P);
+}
+
+Error ELFNixPlatform::setupJITDylib(JITDylib &JD) {
+ return JD.define(
+ std::make_unique<DSOHandleMaterializationUnit>(*this, DSOHandleSymbol));
+}
+
+Error ELFNixPlatform::notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) {
+ auto &JD = RT.getJITDylib();
+ const auto &InitSym = MU.getInitializerSymbol();
+ if (!InitSym)
+ return Error::success();
+
+ RegisteredInitSymbols[&JD].add(InitSym,
+ SymbolLookupFlags::WeaklyReferencedSymbol);
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform: Registered init symbol " << *InitSym
+ << " for MU " << MU.getName() << "\n";
+ });
+ return Error::success();
+}
+
+Error ELFNixPlatform::notifyRemoving(ResourceTracker &RT) {
+ llvm_unreachable("Not supported yet");
+}
+
+static void addAliases(ExecutionSession &ES, SymbolAliasMap &Aliases,
+ ArrayRef<std::pair<const char *, const char *>> AL) {
+ for (auto &KV : AL) {
+ auto AliasName = ES.intern(KV.first);
+ assert(!Aliases.count(AliasName) && "Duplicate symbol name in alias map");
+ Aliases[std::move(AliasName)] = {ES.intern(KV.second),
+ JITSymbolFlags::Exported};
+ }
+}
+
+SymbolAliasMap ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES) {
+ SymbolAliasMap Aliases;
+ addAliases(ES, Aliases, requiredCXXAliases());
+ addAliases(ES, Aliases, standardRuntimeUtilityAliases());
+ return Aliases;
+}
+
+ArrayRef<std::pair<const char *, const char *>>
+ELFNixPlatform::requiredCXXAliases() {
+ static const std::pair<const char *, const char *> RequiredCXXAliases[] = {
+ {"__cxa_atexit", "__orc_rt_elfnix_cxa_atexit"},
+ {"atexit", "__orc_rt_elfnix_atexit"}};
+
+ return ArrayRef<std::pair<const char *, const char *>>(RequiredCXXAliases);
+}
+
+ArrayRef<std::pair<const char *, const char *>>
+ELFNixPlatform::standardRuntimeUtilityAliases() {
+ static const std::pair<const char *, const char *>
+ StandardRuntimeUtilityAliases[] = {
+ {"__orc_rt_run_program", "__orc_rt_elfnix_run_program"},
+ {"__orc_rt_log_error", "__orc_rt_log_error_to_stderr"}};
+
+ return ArrayRef<std::pair<const char *, const char *>>(
+ StandardRuntimeUtilityAliases);
+}
+
+bool ELFNixPlatform::isInitializerSection(StringRef SecName) {
+ for (auto &Name : InitSectionNames) {
+ if (Name.equals(SecName))
+ return true;
+ }
+ return false;
+}
+
+bool ELFNixPlatform::supportedTarget(const Triple &TT) {
+ switch (TT.getArch()) {
+ case Triple::x86_64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ELFNixPlatform::ELFNixPlatform(
+ ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator, Error &Err)
+ : ES(ES), ObjLinkingLayer(ObjLinkingLayer),
+ DSOHandleSymbol(ES.intern("__dso_handle")) {
+ ErrorAsOutParameter _(&Err);
+
+ ObjLinkingLayer.addPlugin(std::make_unique<ELFNixPlatformPlugin>(*this));
+
+ PlatformJD.addGenerator(std::move(OrcRuntimeGenerator));
+
+ // PlatformJD hasn't been 'set-up' by the platform yet (since we're creating
+ // the platform now), so set it up.
+ if (auto E2 = setupJITDylib(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+
+ RegisteredInitSymbols[&PlatformJD].add(
+ DSOHandleSymbol, SymbolLookupFlags::WeaklyReferencedSymbol);
+
+ // Associate wrapper function tags with JIT-side function implementations.
+ if (auto E2 = associateRuntimeSupportFunctions(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+
+ // Lookup addresses of runtime functions callable by the platform,
+ // call the platform bootstrap function to initialize the platform-state
+ // object in the executor.
+ if (auto E2 = bootstrapELFNixRuntime(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+}
+
+Error ELFNixPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
+ ExecutionSession::JITDispatchHandlerAssociationMap WFs;
+
+ using GetInitializersSPSSig =
+ SPSExpected<SPSELFNixJITDylibInitializerSequence>(SPSString);
+ WFs[ES.intern("__orc_rt_elfnix_get_initializers_tag")] =
+ ES.wrapAsyncWithSPS<GetInitializersSPSSig>(
+ this, &ELFNixPlatform::rt_getInitializers);
+
+ using GetDeinitializersSPSSig =
+ SPSExpected<SPSELFJITDylibDeinitializerSequence>(SPSExecutorAddr);
+ WFs[ES.intern("__orc_rt_elfnix_get_deinitializers_tag")] =
+ ES.wrapAsyncWithSPS<GetDeinitializersSPSSig>(
+ this, &ELFNixPlatform::rt_getDeinitializers);
+
+ using LookupSymbolSPSSig =
+ SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSString);
+ WFs[ES.intern("__orc_rt_elfnix_symbol_lookup_tag")] =
+ ES.wrapAsyncWithSPS<LookupSymbolSPSSig>(this,
+ &ELFNixPlatform::rt_lookupSymbol);
+
+ return ES.registerJITDispatchHandlers(PlatformJD, std::move(WFs));
+}
+
+void ELFNixPlatform::getInitializersBuildSequencePhase(
+ SendInitializerSequenceFn SendResult, JITDylib &JD,
+ std::vector<JITDylibSP> DFSLinkOrder) {
+ ELFNixJITDylibInitializerSequence FullInitSeq;
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ for (auto &InitJD : reverse(DFSLinkOrder)) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform: Appending inits for \"" << InitJD->getName()
+ << "\" to sequence\n";
+ });
+ auto ISItr = InitSeqs.find(InitJD.get());
+ if (ISItr != InitSeqs.end()) {
+ FullInitSeq.emplace_back(std::move(ISItr->second));
+ InitSeqs.erase(ISItr);
+ }
+ }
+ }
+
+ SendResult(std::move(FullInitSeq));
+}
+
+void ELFNixPlatform::getInitializersLookupPhase(
+ SendInitializerSequenceFn SendResult, JITDylib &JD) {
+
+ auto DFSLinkOrder = JD.getDFSLinkOrder();
+ DenseMap<JITDylib *, SymbolLookupSet> NewInitSymbols;
+ ES.runSessionLocked([&]() {
+ for (auto &InitJD : DFSLinkOrder) {
+ auto RISItr = RegisteredInitSymbols.find(InitJD.get());
+ if (RISItr != RegisteredInitSymbols.end()) {
+ NewInitSymbols[InitJD.get()] = std::move(RISItr->second);
+ RegisteredInitSymbols.erase(RISItr);
+ }
+ }
+ });
+
+ // If there are no further init symbols to look up then move on to the next
+ // phase.
+ if (NewInitSymbols.empty()) {
+ getInitializersBuildSequencePhase(std::move(SendResult), JD,
+ std::move(DFSLinkOrder));
+ return;
+ }
+
+ // Otherwise issue a lookup and re-run this phase when it completes.
+ lookupInitSymbolsAsync(
+ [this, SendResult = std::move(SendResult), &JD](Error Err) mutable {
+ if (Err)
+ SendResult(std::move(Err));
+ else
+ getInitializersLookupPhase(std::move(SendResult), JD);
+ },
+ ES, std::move(NewInitSymbols));
+}
+
+void ELFNixPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult,
+ StringRef JDName) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform::rt_getInitializers(\"" << JDName << "\")\n";
+ });
+
+ JITDylib *JD = ES.getJITDylibByName(JDName);
+ if (!JD) {
+ LLVM_DEBUG({
+ dbgs() << " No such JITDylib \"" << JDName << "\". Sending error.\n";
+ });
+ SendResult(make_error<StringError>("No JITDylib named " + JDName,
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ getInitializersLookupPhase(std::move(SendResult), *JD);
+}
+
+void ELFNixPlatform::rt_getDeinitializers(
+ SendDeinitializerSequenceFn SendResult, ExecutorAddr Handle) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform::rt_getDeinitializers(\""
+ << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ });
+
+ JITDylib *JD = nullptr;
+
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ auto I = HandleAddrToJITDylib.find(Handle.getValue());
+ if (I != HandleAddrToJITDylib.end())
+ JD = I->second;
+ }
+
+ if (!JD) {
+ LLVM_DEBUG({
+ dbgs() << " No JITDylib for handle "
+ << formatv("{0:x}", Handle.getValue()) << "\n";
+ });
+ SendResult(make_error<StringError>("No JITDylib associated with handle " +
+ formatv("{0:x}", Handle.getValue()),
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ SendResult(ELFNixJITDylibDeinitializerSequence());
+}
+
+void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
+ ExecutorAddr Handle,
+ StringRef SymbolName) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform::rt_lookupSymbol(\""
+ << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ });
+
+ JITDylib *JD = nullptr;
+
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ auto I = HandleAddrToJITDylib.find(Handle.getValue());
+ if (I != HandleAddrToJITDylib.end())
+ JD = I->second;
+ }
+
+ if (!JD) {
+ LLVM_DEBUG({
+ dbgs() << " No JITDylib for handle "
+ << formatv("{0:x}", Handle.getValue()) << "\n";
+ });
+ SendResult(make_error<StringError>("No JITDylib associated with handle " +
+ formatv("{0:x}", Handle.getValue()),
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ // Use functor class to work around XL build compiler issue on AIX.
+ class RtLookupNotifyComplete {
+ public:
+ RtLookupNotifyComplete(SendSymbolAddressFn &&SendResult)
+ : SendResult(std::move(SendResult)) {}
+ void operator()(Expected<SymbolMap> Result) {
+ if (Result) {
+ assert(Result->size() == 1 && "Unexpected result map count");
+ SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
+ } else {
+ SendResult(Result.takeError());
+ }
+ }
+
+ private:
+ SendSymbolAddressFn SendResult;
+ };
+
+ ES.lookup(
+ LookupKind::DLSym, {{JD, JITDylibLookupFlags::MatchExportedSymbolsOnly}},
+ SymbolLookupSet(ES.intern(SymbolName)), SymbolState::Ready,
+ RtLookupNotifyComplete(std::move(SendResult)), NoDependenciesToRegister);
+}
+
+Error ELFNixPlatform::bootstrapELFNixRuntime(JITDylib &PlatformJD) {
+
+ std::pair<const char *, ExecutorAddr *> Symbols[] = {
+ {"__orc_rt_elfnix_platform_bootstrap", &orc_rt_elfnix_platform_bootstrap},
+ {"__orc_rt_elfnix_platform_shutdown", &orc_rt_elfnix_platform_shutdown},
+ {"__orc_rt_elfnix_register_object_sections",
+ &orc_rt_elfnix_register_object_sections},
+ {"__orc_rt_elfnix_create_pthread_key",
+ &orc_rt_elfnix_create_pthread_key}};
+
+ SymbolLookupSet RuntimeSymbols;
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> AddrsToRecord;
+ for (const auto &KV : Symbols) {
+ auto Name = ES.intern(KV.first);
+ RuntimeSymbols.add(Name);
+ AddrsToRecord.push_back({std::move(Name), KV.second});
+ }
+
+ auto RuntimeSymbolAddrs = ES.lookup(
+ {{&PlatformJD, JITDylibLookupFlags::MatchAllSymbols}}, RuntimeSymbols);
+ if (!RuntimeSymbolAddrs)
+ return RuntimeSymbolAddrs.takeError();
+
+ for (const auto &KV : AddrsToRecord) {
+ auto &Name = KV.first;
+ assert(RuntimeSymbolAddrs->count(Name) && "Missing runtime symbol?");
+ KV.second->setValue((*RuntimeSymbolAddrs)[Name].getAddress());
+ }
+
+ auto PJDDSOHandle = ES.lookup(
+ {{&PlatformJD, JITDylibLookupFlags::MatchAllSymbols}}, DSOHandleSymbol);
+ if (!PJDDSOHandle)
+ return PJDDSOHandle.takeError();
+
+ if (auto Err = ES.callSPSWrapper<void(uint64_t)>(
+ orc_rt_elfnix_platform_bootstrap, PJDDSOHandle->getAddress()))
+ return Err;
+
+ // FIXME: Ordering is fuzzy here. We're probably best off saying
+ // "behavior is undefined if code that uses the runtime is added before
+ // the platform constructor returns", then move all this to the constructor.
+ RuntimeBootstrapped = true;
+ std::vector<ELFPerObjectSectionsToRegister> DeferredPOSRs;
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ DeferredPOSRs = std::move(BootstrapPOSRs);
+ }
+
+ for (auto &D : DeferredPOSRs)
+ if (auto Err = registerPerObjectSections(D))
+ return Err;
+
+ return Error::success();
+}
+
+Error ELFNixPlatform::registerInitInfo(
+ JITDylib &JD, ArrayRef<jitlink::Section *> InitSections) {
+
+ std::unique_lock<std::mutex> Lock(PlatformMutex);
+
+ ELFNixJITDylibInitializers *InitSeq = nullptr;
+ {
+ auto I = InitSeqs.find(&JD);
+ if (I == InitSeqs.end()) {
+ // If there's no init sequence entry yet then we need to look up the
+ // header symbol to force creation of one.
+ Lock.unlock();
+
+ auto SearchOrder =
+ JD.withLinkOrderDo([](const JITDylibSearchOrder &SO) { return SO; });
+ if (auto Err = ES.lookup(SearchOrder, DSOHandleSymbol).takeError())
+ return Err;
+
+ Lock.lock();
+ I = InitSeqs.find(&JD);
+ assert(I != InitSeqs.end() &&
+ "Entry missing after header symbol lookup?");
+ }
+ InitSeq = &I->second;
+ }
+
+ for (auto *Sec : InitSections) {
+ // FIXME: Avoid copy here.
+ jitlink::SectionRange R(*Sec);
+ InitSeq->InitSections[Sec->getName()].push_back(
+ {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())});
+ }
+
+ return Error::success();
+}
+
+Error ELFNixPlatform::registerPerObjectSections(
+ const ELFPerObjectSectionsToRegister &POSR) {
+
+ if (!orc_rt_elfnix_register_object_sections)
+ return make_error<StringError>("Attempting to register per-object "
+ "sections, but runtime support has not "
+ "been loaded yet",
+ inconvertibleErrorCode());
+
+ Error ErrResult = Error::success();
+ if (auto Err = ES.callSPSWrapper<shared::SPSError(
+ SPSELFPerObjectSectionsToRegister)>(
+ orc_rt_elfnix_register_object_sections, ErrResult, POSR))
+ return Err;
+ return ErrResult;
+}
+
+Expected<uint64_t> ELFNixPlatform::createPThreadKey() {
+ if (!orc_rt_elfnix_create_pthread_key)
+ return make_error<StringError>(
+ "Attempting to create pthread key in target, but runtime support has "
+ "not been loaded yet",
+ inconvertibleErrorCode());
+
+ Expected<uint64_t> Result(0);
+ if (auto Err = ES.callSPSWrapper<SPSExpected<uint64_t>(void)>(
+ orc_rt_elfnix_create_pthread_key, Result))
+ return std::move(Err);
+ return Result;
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::modifyPassConfig(
+ MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &Config) {
+
+ // If the initializer symbol is the __dso_handle symbol then just add
+ // the DSO handle support passes.
+ if (MR.getInitializerSymbol() == MP.DSOHandleSymbol) {
+ addDSOHandleSupportPasses(MR, Config);
+ // The DSOHandle materialization unit doesn't require any other
+ // support, so we can bail out early.
+ return;
+ }
+
+ // If the object contains initializers then add passes to record them.
+ if (MR.getInitializerSymbol())
+ addInitializerSupportPasses(MR, Config);
+
+ // Add passes for eh-frame and TLV support.
+ addEHAndTLVSupportPasses(MR, Config);
+}
+
+ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap
+ELFNixPlatform::ELFNixPlatformPlugin::getSyntheticSymbolDependencies(
+ MaterializationResponsibility &MR) {
+ std::lock_guard<std::mutex> Lock(PluginMutex);
+ auto I = InitSymbolDeps.find(&MR);
+ if (I != InitSymbolDeps.end()) {
+ SyntheticSymbolDependenciesMap Result;
+ Result[MR.getInitializerSymbol()] = std::move(I->second);
+ InitSymbolDeps.erase(&MR);
+ return Result;
+ }
+ return SyntheticSymbolDependenciesMap();
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::addInitializerSupportPasses(
+ MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
+
+ /// Preserve init sections.
+ Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) -> Error {
+ if (auto Err = preserveInitSections(G, MR))
+ return Err;
+ return Error::success();
+ });
+
+ Config.PostFixupPasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return registerInitSections(G, JD);
+ });
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::addDSOHandleSupportPasses(
+ MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
+
+ Config.PostAllocationPasses.push_back([this, &JD = MR.getTargetJITDylib()](
+ jitlink::LinkGraph &G) -> Error {
+ auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
+ return Sym->getName() == *MP.DSOHandleSymbol;
+ });
+ assert(I != G.defined_symbols().end() && "Missing DSO handle symbol");
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ JITTargetAddress HandleAddr = (*I)->getAddress();
+ MP.HandleAddrToJITDylib[HandleAddr] = &JD;
+ assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
+ MP.InitSeqs.insert(std::make_pair(
+ &JD,
+ ELFNixJITDylibInitializers(JD.getName(), ExecutorAddr(HandleAddr))));
+ }
+ return Error::success();
+ });
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::addEHAndTLVSupportPasses(
+ MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
+
+ // Insert TLV lowering at the start of the PostPrunePasses, since we want
+ // it to run before GOT/PLT lowering.
+
+ // TODO: Check that before the fixTLVSectionsAndEdges pass, the GOT/PLT build
+ // pass has done. Because the TLS descriptor need to be allocate in GOT.
+ Config.PostPrunePasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return fixTLVSectionsAndEdges(G, JD);
+ });
+
+ // Add a pass to register the final addresses of the eh-frame and TLV sections
+ // with the runtime.
+ Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error {
+ ELFPerObjectSectionsToRegister POSR;
+
+ if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
+ jitlink::SectionRange R(*EHFrameSection);
+ if (!R.empty())
+ POSR.EHFrameSection = {ExecutorAddr(R.getStart()),
+ ExecutorAddr(R.getEnd())};
+ }
+
+ // Get a pointer to the thread data section if there is one. It will be used
+ // below.
+ jitlink::Section *ThreadDataSection =
+ G.findSectionByName(ThreadDataSectionName);
+
+ // Handle thread BSS section if there is one.
+ if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+ // If there's already a thread data section in this graph then merge the
+ // thread BSS section content into it, otherwise just treat the thread
+ // BSS section as the thread data section.
+ if (ThreadDataSection)
+ G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
+ else
+ ThreadDataSection = ThreadBSSSection;
+ }
+
+ // Having merged thread BSS (if present) and thread data (if present),
+ // record the resulting section range.
+ if (ThreadDataSection) {
+ jitlink::SectionRange R(*ThreadDataSection);
+ if (!R.empty())
+ POSR.ThreadDataSection = {ExecutorAddr(R.getStart()),
+ ExecutorAddr(R.getEnd())};
+ }
+
+ if (POSR.EHFrameSection.Start || POSR.ThreadDataSection.Start) {
+
+ // If we're still bootstrapping the runtime then just record this
+ // frame for now.
+ if (!MP.RuntimeBootstrapped) {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ MP.BootstrapPOSRs.push_back(POSR);
+ return Error::success();
+ }
+
+ // Otherwise register it immediately.
+ if (auto Err = MP.registerPerObjectSections(POSR))
+ return Err;
+ }
+
+ return Error::success();
+ });
+}
+
+Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
+
+ JITLinkSymbolSet InitSectionSymbols;
+ for (auto &InitSectionName : InitSectionNames) {
+ // Skip non-init sections.
+ auto *InitSection = G.findSectionByName(InitSectionName);
+ if (!InitSection)
+ continue;
+
+ // Make a pass over live symbols in the section: those blocks are already
+ // preserved.
+ DenseSet<jitlink::Block *> AlreadyLiveBlocks;
+ for (auto &Sym : InitSection->symbols()) {
+ auto &B = Sym->getBlock();
+ if (Sym->isLive() && Sym->getOffset() == 0 &&
+ Sym->getSize() == B.getSize() && !AlreadyLiveBlocks.count(&B)) {
+ InitSectionSymbols.insert(Sym);
+ AlreadyLiveBlocks.insert(&B);
+ }
+ }
+
+ // Add anonymous symbols to preserve any not-already-preserved blocks.
+ for (auto *B : InitSection->blocks())
+ if (!AlreadyLiveBlocks.count(B))
+ InitSectionSymbols.insert(
+ &G.addAnonymousSymbol(*B, 0, B->getSize(), false, true));
+ }
+
+ if (!InitSectionSymbols.empty()) {
+ std::lock_guard<std::mutex> Lock(PluginMutex);
+ InitSymbolDeps[&MR] = std::move(InitSectionSymbols);
+ }
+
+ return Error::success();
+}
+
+Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
+ jitlink::LinkGraph &G, JITDylib &JD) {
+
+ SmallVector<jitlink::Section *> InitSections;
+
+ LLVM_DEBUG({ dbgs() << "ELFNixPlatform::registerInitSections\n"; });
+
+ for (auto InitSectionName : InitSectionNames) {
+ if (auto *Sec = G.findSectionByName(InitSectionName)) {
+ InitSections.push_back(Sec);
+ }
+ }
+
+ // Dump the scraped inits.
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform: Scraped " << G.getName() << " init sections:\n";
+ for (auto *Sec : InitSections) {
+ jitlink::SectionRange R(*Sec);
+ dbgs() << " " << Sec->getName() << ": "
+ << formatv("[ {0:x} -- {1:x} ]", R.getStart(), R.getEnd()) << "\n";
+ }
+ });
+
+ return MP.registerInitInfo(JD, InitSections);
+}
+
+Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges(
+ jitlink::LinkGraph &G, JITDylib &JD) {
+
+ // TODO implement TLV support
+ for (auto *Sym : G.external_symbols())
+ if (Sym->getName() == "__tls_get_addr") {
+ Sym->setName("___orc_rt_elfnix_tls_get_addr");
+ }
+
+ auto *TLSInfoEntrySection = G.findSectionByName("$__TLSINFO");
+
+ if (TLSInfoEntrySection) {
+ Optional<uint64_t> Key;
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ auto I = MP.JITDylibToPThreadKey.find(&JD);
+ if (I != MP.JITDylibToPThreadKey.end())
+ Key = I->second;
+ }
+ if (!Key) {
+ if (auto KeyOrErr = MP.createPThreadKey())
+ Key = *KeyOrErr;
+ else
+ return KeyOrErr.takeError();
+ }
+
+ uint64_t PlatformKeyBits =
+ support::endian::byte_swap(*Key, G.getEndianness());
+
+ for (auto *B : TLSInfoEntrySection->blocks()) {
+ // FIXME: The TLS descriptor byte length may different with different
+ // ISA
+ assert(B->getSize() == (G.getPointerSize() * 2) &&
+ "TLS descriptor must be 2 words length");
+ auto TLSInfoEntryContent = B->getMutableContent(G);
+ memcpy(TLSInfoEntryContent.data(), &PlatformKeyBits, G.getPointerSize());
+ }
+ }
+
+ return Error::success();
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
index 5715eda71eee..f3fe0555fa75 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
@@ -39,13 +39,13 @@ createJITLoaderGDBRegistrar(ExecutionSession &ES) {
assert((*Result)[0].size() == 1 &&
"Unexpected number of addresses in result");
- return std::make_unique<EPCDebugObjectRegistrar>(ES, (*Result)[0][0]);
+ return std::make_unique<EPCDebugObjectRegistrar>(
+ ES, ExecutorAddr((*Result)[0][0]));
}
-Error EPCDebugObjectRegistrar::registerDebugObject(sys::MemoryBlock TargetMem) {
- return ES.callSPSWrapper<void(SPSExecutorAddress, uint64_t)>(
- RegisterFn, ExecutorAddress::fromPtr(TargetMem.base()),
- static_cast<uint64_t>(TargetMem.allocatedSize()));
+Error EPCDebugObjectRegistrar::registerDebugObject(
+ ExecutorAddrRange TargetMem) {
+ return ES.callSPSWrapper<void(SPSExecutorAddrRange)>(RegisterFn, TargetMem);
}
} // namespace orc
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
index 8cdda9ab5a15..4c0fab8aa9fa 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
@@ -51,21 +51,22 @@ EPCEHFrameRegistrar::Create(ExecutionSession &ES) {
auto RegisterEHFrameWrapperFnAddr = (*Result)[0][0];
auto DeregisterEHFrameWrapperFnAddr = (*Result)[0][1];
- return std::make_unique<EPCEHFrameRegistrar>(ES, RegisterEHFrameWrapperFnAddr,
- DeregisterEHFrameWrapperFnAddr);
+ return std::make_unique<EPCEHFrameRegistrar>(
+ ES, ExecutorAddr(RegisterEHFrameWrapperFnAddr),
+ ExecutorAddr(DeregisterEHFrameWrapperFnAddr));
}
Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr,
size_t EHFrameSectionSize) {
- return ES.callSPSWrapper<void(SPSExecutorAddress, uint64_t)>(
- RegisterEHFrameWrapperFnAddr, EHFrameSectionAddr,
+ return ES.callSPSWrapper<void(SPSExecutorAddr, uint64_t)>(
+ RegisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr),
static_cast<uint64_t>(EHFrameSectionSize));
}
Error EPCEHFrameRegistrar::deregisterEHFrames(
JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) {
- return ES.callSPSWrapper<void(SPSExecutorAddress, uint64_t)>(
- DeregisterEHFrameWrapperFnAddr, EHFrameSectionAddr,
+ return ES.callSPSWrapper<void(SPSExecutorAddr, uint64_t)>(
+ DeregisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr),
static_cast<uint64_t>(EHFrameSectionSize));
}
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp
new file mode 100644
index 000000000000..6c47c5c5f7bb
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp
@@ -0,0 +1,107 @@
+//===------- EPCGenericDylibManager.cpp -- Dylib management via EPC -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+
+namespace llvm {
+namespace orc {
+namespace shared {
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookupSetElement,
+ SymbolLookupSet::value_type> {
+public:
+ static size_t size(const SymbolLookupSet::value_type &V) {
+ return SPSArgList<SPSString, bool>::size(
+ *V.first, V.second == SymbolLookupFlags::RequiredSymbol);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const SymbolLookupSet::value_type &V) {
+ return SPSArgList<SPSString, bool>::serialize(
+ OB, *V.first, V.second == SymbolLookupFlags::RequiredSymbol);
+ }
+};
+
+template <>
+class TrivialSPSSequenceSerialization<SPSRemoteSymbolLookupSetElement,
+ SymbolLookupSet> {
+public:
+ static constexpr bool available = true;
+};
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookup,
+ ExecutorProcessControl::LookupRequest> {
+ using MemberSerialization =
+ SPSArgList<SPSExecutorAddr, SPSRemoteSymbolLookupSet>;
+
+public:
+ static size_t size(const ExecutorProcessControl::LookupRequest &LR) {
+ return MemberSerialization::size(ExecutorAddr(LR.Handle), LR.Symbols);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ExecutorProcessControl::LookupRequest &LR) {
+ return MemberSerialization::serialize(OB, ExecutorAddr(LR.Handle),
+ LR.Symbols);
+ }
+};
+
+} // end namespace shared
+
+Expected<EPCGenericDylibManager>
+EPCGenericDylibManager::CreateWithDefaultBootstrapSymbols(
+ ExecutorProcessControl &EPC) {
+ SymbolAddrs SAs;
+ if (auto Err = EPC.getBootstrapSymbols(
+ {{SAs.Instance, rt::SimpleExecutorDylibManagerInstanceName},
+ {SAs.Open, rt::SimpleExecutorDylibManagerOpenWrapperName},
+ {SAs.Lookup, rt::SimpleExecutorDylibManagerLookupWrapperName}}))
+ return std::move(Err);
+ return EPCGenericDylibManager(EPC, std::move(SAs));
+}
+
+Expected<tpctypes::DylibHandle> EPCGenericDylibManager::open(StringRef Path,
+ uint64_t Mode) {
+ Expected<tpctypes::DylibHandle> H(0);
+ if (auto Err =
+ EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerOpenSignature>(
+ SAs.Open, H, SAs.Instance, Path, Mode))
+ return std::move(Err);
+ return H;
+}
+
+Expected<std::vector<ExecutorAddr>>
+EPCGenericDylibManager::lookup(tpctypes::DylibHandle H,
+ const SymbolLookupSet &Lookup) {
+ Expected<std::vector<ExecutorAddr>> Result((std::vector<ExecutorAddr>()));
+ if (auto Err =
+ EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerLookupSignature>(
+ SAs.Lookup, Result, SAs.Instance, H, Lookup))
+ return std::move(Err);
+ return Result;
+}
+
+Expected<std::vector<ExecutorAddr>>
+EPCGenericDylibManager::lookup(tpctypes::DylibHandle H,
+ const RemoteSymbolLookupSet &Lookup) {
+ Expected<std::vector<ExecutorAddr>> Result((std::vector<ExecutorAddr>()));
+ if (auto Err =
+ EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerLookupSignature>(
+ SAs.Lookup, Result, SAs.Instance, H, Lookup))
+ return std::move(Err);
+ return Result;
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
new file mode 100644
index 000000000000..9b712cb8f7ca
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -0,0 +1,184 @@
+//===---- EPCGenericJITLinkMemoryManager.cpp -- Mem management via EPC ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+#include <limits>
+
+using namespace llvm::jitlink;
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericJITLinkMemoryManager::InFlightAlloc
+ : public jitlink::JITLinkMemoryManager::InFlightAlloc {
+public:
+
+ // FIXME: The C++98 initializer is an attempt to work around compile failures
+ // due to http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397.
+ // We should be able to switch this back to member initialization once that
+ // issue is fixed.
+ struct SegInfo {
+ SegInfo() : WorkingMem(nullptr), ContentSize(0), ZeroFillSize(0) {}
+
+ char *WorkingMem;
+ ExecutorAddr Addr;
+ uint64_t ContentSize;
+ uint64_t ZeroFillSize;
+ };
+
+ using SegInfoMap = AllocGroupSmallMap<SegInfo>;
+
+ InFlightAlloc(EPCGenericJITLinkMemoryManager &Parent, LinkGraph &G,
+ ExecutorAddr AllocAddr, SegInfoMap Segs)
+ : Parent(Parent), G(G), AllocAddr(AllocAddr), Segs(std::move(Segs)) {}
+
+ void finalize(OnFinalizedFunction OnFinalize) override {
+ tpctypes::FinalizeRequest FR;
+ for (auto &KV : Segs) {
+ assert(KV.second.ContentSize <= std::numeric_limits<size_t>::max());
+ FR.Segments.push_back(tpctypes::SegFinalizeRequest{
+ tpctypes::toWireProtectionFlags(
+ toSysMemoryProtectionFlags(KV.first.getMemProt())),
+ KV.second.Addr,
+ alignTo(KV.second.ContentSize + KV.second.ZeroFillSize,
+ Parent.EPC.getPageSize()),
+ {KV.second.WorkingMem, static_cast<size_t>(KV.second.ContentSize)}});
+ }
+
+ // Transfer allocation actions.
+ // FIXME: Merge JITLink and ORC SupportFunctionCall and Action list types,
+ // turn this into a std::swap.
+ FR.Actions.reserve(G.allocActions().size());
+ for (auto &ActPair : G.allocActions())
+ FR.Actions.push_back({{ExecutorAddr(ActPair.Finalize.FnAddr),
+ {ExecutorAddr(ActPair.Finalize.CtxAddr),
+ ExecutorAddrDiff(ActPair.Finalize.CtxSize)}},
+ {ExecutorAddr(ActPair.Dealloc.FnAddr),
+ {ExecutorAddr(ActPair.Dealloc.CtxAddr),
+ ExecutorAddrDiff(ActPair.Dealloc.CtxSize)}}});
+ G.allocActions().clear();
+
+ Parent.EPC.callSPSWrapperAsync<
+ rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
+ Parent.SAs.Finalize,
+ [OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr](
+ Error SerializationErr, Error FinalizeErr) mutable {
+ // FIXME: Release abandoned alloc.
+ if (SerializationErr) {
+ cantFail(std::move(FinalizeErr));
+ OnFinalize(std::move(SerializationErr));
+ } else if (FinalizeErr)
+ OnFinalize(std::move(FinalizeErr));
+ else
+ OnFinalize(FinalizedAlloc(AllocAddr.getValue()));
+ },
+ Parent.SAs.Allocator, std::move(FR));
+ }
+
+ void abandon(OnAbandonedFunction OnAbandoned) override {
+ // FIXME: Return memory to pool instead.
+ Parent.EPC.callSPSWrapperAsync<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ Parent.SAs.Deallocate,
+ [OnAbandoned = std::move(OnAbandoned)](Error SerializationErr,
+ Error DeallocateErr) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(DeallocateErr));
+ OnAbandoned(std::move(SerializationErr));
+ } else
+ OnAbandoned(std::move(DeallocateErr));
+ },
+ Parent.SAs.Allocator, ArrayRef<ExecutorAddr>(AllocAddr));
+ }
+
+private:
+ EPCGenericJITLinkMemoryManager &Parent;
+ LinkGraph &G;
+ ExecutorAddr AllocAddr;
+ SegInfoMap Segs;
+};
+
+void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD,
+ LinkGraph &G,
+ OnAllocatedFunction OnAllocated) {
+ BasicLayout BL(G);
+
+ auto Pages = BL.getContiguousPageBasedLayoutSizes(EPC.getPageSize());
+ if (!Pages)
+ return OnAllocated(Pages.takeError());
+
+ EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReserveSignature>(
+ SAs.Reserve,
+ [this, BL = std::move(BL), OnAllocated = std::move(OnAllocated)](
+ Error SerializationErr, Expected<ExecutorAddr> AllocAddr) mutable {
+ if (SerializationErr) {
+ cantFail(AllocAddr.takeError());
+ return OnAllocated(std::move(SerializationErr));
+ }
+ if (!AllocAddr)
+ return OnAllocated(AllocAddr.takeError());
+
+ completeAllocation(*AllocAddr, std::move(BL), std::move(OnAllocated));
+ },
+ SAs.Allocator, Pages->total());
+}
+
+void EPCGenericJITLinkMemoryManager::deallocate(
+ std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
+ EPC.callSPSWrapperAsync<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ SAs.Deallocate,
+ [OnDeallocated = std::move(OnDeallocated)](Error SerErr,
+ Error DeallocErr) mutable {
+ if (SerErr) {
+ cantFail(std::move(DeallocErr));
+ OnDeallocated(std::move(SerErr));
+ } else
+ OnDeallocated(std::move(DeallocErr));
+ },
+ SAs.Allocator, Allocs);
+ for (auto &A : Allocs)
+ A.release();
+}
+
+void EPCGenericJITLinkMemoryManager::completeAllocation(
+ ExecutorAddr AllocAddr, BasicLayout BL, OnAllocatedFunction OnAllocated) {
+
+ InFlightAlloc::SegInfoMap SegInfos;
+
+ ExecutorAddr NextSegAddr = AllocAddr;
+ for (auto &KV : BL.segments()) {
+ const auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ Seg.Addr = NextSegAddr.getValue();
+ KV.second.WorkingMem = BL.getGraph().allocateBuffer(Seg.ContentSize).data();
+ NextSegAddr += ExecutorAddrDiff(
+ alignTo(Seg.ContentSize + Seg.ZeroFillSize, EPC.getPageSize()));
+
+ auto &SegInfo = SegInfos[AG];
+ SegInfo.ContentSize = Seg.ContentSize;
+ SegInfo.ZeroFillSize = Seg.ZeroFillSize;
+ SegInfo.Addr = ExecutorAddr(Seg.Addr);
+ SegInfo.WorkingMem = Seg.WorkingMem;
+ }
+
+ if (auto Err = BL.apply())
+ return OnAllocated(std::move(Err));
+
+ OnAllocated(std::make_unique<InFlightAlloc>(*this, BL.getGraph(), AllocAddr,
+ std::move(SegInfos)));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
new file mode 100644
index 000000000000..1d98e104a4d7
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -0,0 +1,317 @@
+//===----- EPCGenericRTDyldMemoryManager.cpp - EPC-bbasde MemMgr -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<EPCGenericRTDyldMemoryManager>>
+EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
+ ExecutorProcessControl &EPC) {
+ SymbolAddrs SAs;
+ if (auto Err = EPC.getBootstrapSymbols(
+ {{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName},
+ {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
+ {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
+ {SAs.Deallocate,
+ rt::SimpleExecutorMemoryManagerDeallocateWrapperName},
+ {SAs.RegisterEHFrame,
+ rt::RegisterEHFrameSectionCustomDirectWrapperName},
+ {SAs.DeregisterEHFrame,
+ rt::DeregisterEHFrameSectionCustomDirectWrapperName}}))
+ return std::move(Err);
+ return std::make_unique<EPCGenericRTDyldMemoryManager>(EPC, std::move(SAs));
+}
+
+EPCGenericRTDyldMemoryManager::EPCGenericRTDyldMemoryManager(
+ ExecutorProcessControl &EPC, SymbolAddrs SAs)
+ : EPC(EPC), SAs(std::move(SAs)) {
+ LLVM_DEBUG(dbgs() << "Created remote allocator " << (void *)this << "\n");
+}
+
+EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() {
+ LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << (void *)this << "\n");
+ if (!ErrMsg.empty())
+ errs() << "Destroying with existing errors:\n" << ErrMsg << "\n";
+
+ Error Err = Error::success();
+ if (auto Err2 = EPC.callSPSWrapper<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) {
+ // FIXME: Report errors through EPC once that functionality is available.
+ logAllUnhandledErrors(std::move(Err2), errs(), "");
+ return;
+ }
+
+ if (Err)
+ logAllUnhandledErrors(std::move(Err), errs(), "");
+}
+
+uint8_t *EPCGenericRTDyldMemoryManager::allocateCodeSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName) {
+ std::lock_guard<std::mutex> Lock(M);
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " allocating code section "
+ << SectionName << ": size = " << formatv("{0:x}", Size)
+ << " bytes, alignment = " << Alignment << "\n";
+ });
+ auto &Seg = Unmapped.back().CodeAllocs;
+ Seg.emplace_back(Size, Alignment);
+ return reinterpret_cast<uint8_t *>(
+ alignAddr(Seg.back().Contents.get(), Align(Alignment)));
+}
+
+uint8_t *EPCGenericRTDyldMemoryManager::allocateDataSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName, bool IsReadOnly) {
+ std::lock_guard<std::mutex> Lock(M);
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " allocating "
+ << (IsReadOnly ? "ro" : "rw") << "-data section " << SectionName
+ << ": size = " << formatv("{0:x}", Size) << " bytes, alignment "
+ << Alignment << ")\n";
+ });
+
+ auto &Seg =
+ IsReadOnly ? Unmapped.back().RODataAllocs : Unmapped.back().RWDataAllocs;
+
+ Seg.emplace_back(Size, Alignment);
+ return reinterpret_cast<uint8_t *>(
+ alignAddr(Seg.back().Contents.get(), Align(Alignment)));
+}
+
+void EPCGenericRTDyldMemoryManager::reserveAllocationSpace(
+ uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize,
+ uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) {
+
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ // If there's already an error then bail out.
+ if (!ErrMsg.empty())
+ return;
+
+ if (!isPowerOf2_32(CodeAlign) || CodeAlign > EPC.getPageSize()) {
+ ErrMsg = "Invalid code alignment in reserveAllocationSpace";
+ return;
+ }
+ if (!isPowerOf2_32(RODataAlign) || RODataAlign > EPC.getPageSize()) {
+ ErrMsg = "Invalid ro-data alignment in reserveAllocationSpace";
+ return;
+ }
+ if (!isPowerOf2_32(RWDataAlign) || RWDataAlign > EPC.getPageSize()) {
+ ErrMsg = "Invalid rw-data alignment in reserveAllocationSpace";
+ return;
+ }
+ }
+
+ uint64_t TotalSize = 0;
+ TotalSize += alignTo(CodeSize, EPC.getPageSize());
+ TotalSize += alignTo(RODataSize, EPC.getPageSize());
+ TotalSize += alignTo(RWDataSize, EPC.getPageSize());
+
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " reserving "
+ << formatv("{0:x}", TotalSize) << " bytes.\n";
+ });
+
+ Expected<ExecutorAddr> TargetAllocAddr((ExecutorAddr()));
+ if (auto Err = EPC.callSPSWrapper<
+ rt::SPSSimpleExecutorMemoryManagerReserveSignature>(
+ SAs.Reserve, TargetAllocAddr, SAs.Instance, TotalSize)) {
+ std::lock_guard<std::mutex> Lock(M);
+ ErrMsg = toString(std::move(Err));
+ return;
+ }
+ if (!TargetAllocAddr) {
+ std::lock_guard<std::mutex> Lock(M);
+ ErrMsg = toString(TargetAllocAddr.takeError());
+ return;
+ }
+
+ std::lock_guard<std::mutex> Lock(M);
+ Unmapped.push_back(AllocGroup());
+ Unmapped.back().RemoteCode = {
+ *TargetAllocAddr, ExecutorAddrDiff(alignTo(CodeSize, EPC.getPageSize()))};
+ Unmapped.back().RemoteROData = {
+ Unmapped.back().RemoteCode.End,
+ ExecutorAddrDiff(alignTo(RODataSize, EPC.getPageSize()))};
+ Unmapped.back().RemoteRWData = {
+ Unmapped.back().RemoteROData.End,
+ ExecutorAddrDiff(alignTo(RWDataSize, EPC.getPageSize()))};
+}
+
+bool EPCGenericRTDyldMemoryManager::needsToReserveAllocationSpace() {
+ return true;
+}
+
+void EPCGenericRTDyldMemoryManager::registerEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " added unfinalized eh-frame "
+ << formatv("[ {0:x} {1:x} ]", LoadAddr, LoadAddr + Size) << "\n";
+ });
+ std::lock_guard<std::mutex> Lock(M);
+ // Bail out early if there's already an error.
+ if (!ErrMsg.empty())
+ return;
+
+ ExecutorAddr LA(LoadAddr);
+ for (auto &Alloc : llvm::reverse(Unfinalized)) {
+ if (Alloc.RemoteCode.contains(LA) || Alloc.RemoteROData.contains(LA) ||
+ Alloc.RemoteRWData.contains(LA)) {
+ Alloc.UnfinalizedEHFrames.push_back({LA, Size});
+ return;
+ }
+ }
+ ErrMsg = "eh-frame does not lie inside unfinalized alloc";
+}
+
+void EPCGenericRTDyldMemoryManager::deregisterEHFrames() {
+ // This is a no-op for us: We've registered a deallocation action for it.
+}
+
+void EPCGenericRTDyldMemoryManager::notifyObjectLoaded(
+ RuntimeDyld &Dyld, const object::ObjectFile &Obj) {
+ std::lock_guard<std::mutex> Lock(M);
+ LLVM_DEBUG(dbgs() << "Allocator " << (void *)this << " applied mappings:\n");
+ for (auto &ObjAllocs : Unmapped) {
+ mapAllocsToRemoteAddrs(Dyld, ObjAllocs.CodeAllocs,
+ ObjAllocs.RemoteCode.Start);
+ mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RODataAllocs,
+ ObjAllocs.RemoteROData.Start);
+ mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RWDataAllocs,
+ ObjAllocs.RemoteRWData.Start);
+ Unfinalized.push_back(std::move(ObjAllocs));
+ }
+ Unmapped.clear();
+}
+
+bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
+ LLVM_DEBUG(dbgs() << "Allocator " << (void *)this << " finalizing:\n");
+
+ // If there's an error then bail out here.
+ std::vector<AllocGroup> Allocs;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ if (ErrMsg && !this->ErrMsg.empty()) {
+ *ErrMsg = std::move(this->ErrMsg);
+ return true;
+ }
+ std::swap(Allocs, Unfinalized);
+ }
+
+ // Loop over unfinalized objects to make finalization requests.
+ for (auto &ObjAllocs : Allocs) {
+
+ tpctypes::WireProtectionFlags SegProts[3] = {
+ tpctypes::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC)),
+ tpctypes::toWireProtectionFlags(sys::Memory::MF_READ),
+ tpctypes::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE))};
+
+ ExecutorAddrRange *RemoteAddrs[3] = {&ObjAllocs.RemoteCode,
+ &ObjAllocs.RemoteROData,
+ &ObjAllocs.RemoteRWData};
+
+ std::vector<Alloc> *SegSections[3] = {&ObjAllocs.CodeAllocs,
+ &ObjAllocs.RODataAllocs,
+ &ObjAllocs.RWDataAllocs};
+
+ tpctypes::FinalizeRequest FR;
+ std::unique_ptr<char[]> AggregateContents[3];
+
+ for (unsigned I = 0; I != 3; ++I) {
+ FR.Segments.push_back({});
+ auto &Seg = FR.Segments.back();
+ Seg.Prot = SegProts[I];
+ Seg.Addr = RemoteAddrs[I]->Start;
+ for (auto &SecAlloc : *SegSections[I]) {
+ Seg.Size = alignTo(Seg.Size, SecAlloc.Align);
+ Seg.Size += SecAlloc.Size;
+ }
+ AggregateContents[I] = std::make_unique<char[]>(Seg.Size);
+ size_t SecOffset = 0;
+ for (auto &SecAlloc : *SegSections[I]) {
+ SecOffset = alignTo(SecOffset, SecAlloc.Align);
+ memcpy(&AggregateContents[I][SecOffset],
+ reinterpret_cast<const char *>(
+ alignAddr(SecAlloc.Contents.get(), Align(SecAlloc.Align))),
+ SecAlloc.Size);
+ SecOffset += SecAlloc.Size;
+ // FIXME: Can we reset SecAlloc.Content here, now that it's copied into
+ // the aggregated content?
+ }
+ Seg.Content = {AggregateContents[I].get(), SecOffset};
+ }
+
+ for (auto &Frame : ObjAllocs.UnfinalizedEHFrames)
+ FR.Actions.push_back(
+ {{SAs.RegisterEHFrame,
+ {ExecutorAddr(Frame.Addr), ExecutorAddrDiff(Frame.Size)}},
+ {SAs.DeregisterEHFrame,
+ {ExecutorAddr(Frame.Addr), ExecutorAddrDiff(Frame.Size)}}});
+
+ // We'll also need to make an extra allocation for the eh-frame wrapper call
+ // arguments.
+ Error FinalizeErr = Error::success();
+ if (auto Err = EPC.callSPSWrapper<
+ rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
+ SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) {
+ std::lock_guard<std::mutex> Lock(M);
+ this->ErrMsg = toString(std::move(Err));
+ dbgs() << "Serialization error: " << this->ErrMsg << "\n";
+ if (ErrMsg)
+ *ErrMsg = this->ErrMsg;
+ return true;
+ }
+ if (FinalizeErr) {
+ std::lock_guard<std::mutex> Lock(M);
+ this->ErrMsg = toString(std::move(FinalizeErr));
+ dbgs() << "Finalization error: " << this->ErrMsg << "\n";
+ if (ErrMsg)
+ *ErrMsg = this->ErrMsg;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void EPCGenericRTDyldMemoryManager::mapAllocsToRemoteAddrs(
+ RuntimeDyld &Dyld, std::vector<Alloc> &Allocs, ExecutorAddr NextAddr) {
+ for (auto &Alloc : Allocs) {
+ NextAddr.setValue(alignTo(NextAddr.getValue(), Alloc.Align));
+ LLVM_DEBUG({
+ dbgs() << " " << static_cast<void *>(Alloc.Contents.get()) << " -> "
+ << format("0x%016" PRIx64, NextAddr.getValue()) << "\n";
+ });
+ Dyld.mapSectionAddress(reinterpret_cast<const void *>(alignAddr(
+ Alloc.Contents.get(), Align(Alloc.Align))),
+ NextAddr.getValue());
+ Alloc.RemoteAddr = NextAddr;
+ // Only advance NextAddr if it was non-null to begin with,
+ // otherwise leave it as null.
+ if (NextAddr)
+ NextAddr += ExecutorAddrDiff(Alloc.Size);
+ }
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index b9c70b0aeb3c..818b6b52ff83 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -43,12 +43,12 @@ public:
protected:
Error grow() override;
- using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+ using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
EPCIndirectionUtils &EPCIU;
unsigned TrampolineSize = 0;
unsigned TrampolinesPerPage = 0;
- std::vector<std::unique_ptr<Allocation>> TrampolineBlocks;
+ std::vector<FinalizedAlloc> TrampolineBlocks;
};
class EPCIndirectStubsManager : public IndirectStubsManager,
@@ -89,12 +89,19 @@ EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU)
Error EPCTrampolinePool::deallocatePool() {
Error Err = Error::success();
- for (auto &Alloc : TrampolineBlocks)
- Err = joinErrors(std::move(Err), Alloc->deallocate());
- return Err;
+ std::promise<MSVCPError> DeallocResultP;
+ auto DeallocResultF = DeallocResultP.get_future();
+
+ EPCIU.getExecutorProcessControl().getMemMgr().deallocate(
+ std::move(TrampolineBlocks),
+ [&](Error Err) { DeallocResultP.set_value(std::move(Err)); });
+
+ return DeallocResultF.get();
}
Error EPCTrampolinePool::grow() {
+ using namespace jitlink;
+
assert(AvailableTrampolines.empty() &&
"Grow called with trampolines still available");
@@ -102,34 +109,26 @@ Error EPCTrampolinePool::grow() {
assert(ResolverAddress && "Resolver address can not be null");
auto &EPC = EPCIU.getExecutorProcessControl();
- constexpr auto TrampolinePagePermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
auto PageSize = EPC.getPageSize();
- jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
- Request[TrampolinePagePermissions] = {PageSize, static_cast<size_t>(PageSize),
- 0};
- auto Alloc = EPC.getMemMgr().allocate(nullptr, Request);
-
+ auto Alloc = SimpleSegmentAlloc::Create(
+ EPC.getMemMgr(), nullptr,
+ {{MemProt::Read | MemProt::Exec, {PageSize, Align(PageSize)}}});
if (!Alloc)
return Alloc.takeError();
unsigned NumTrampolines = TrampolinesPerPage;
- auto WorkingMemory = (*Alloc)->getWorkingMemory(TrampolinePagePermissions);
- auto TargetAddress = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
-
- EPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress,
- ResolverAddress, NumTrampolines);
-
- auto TargetAddr = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
+ auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
+ EPCIU.getABISupport().writeTrampolines(
+ SegInfo.WorkingMem.data(), SegInfo.Addr, ResolverAddress, NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
- AvailableTrampolines.push_back(TargetAddr + (I * TrampolineSize));
+ AvailableTrampolines.push_back(SegInfo.Addr + (I * TrampolineSize));
- if (auto Err = (*Alloc)->finalize())
- return Err;
+ auto FA = Alloc->finalize();
+ if (!FA)
+ return FA.takeError();
- TrampolineBlocks.push_back(std::move(*Alloc));
+ TrampolineBlocks.push_back(std::move(*FA));
return Error::success();
}
@@ -162,16 +161,18 @@ Error EPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt32Write> PtrUpdates;
for (auto &SI : StubInits)
- PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
- static_cast<uint32_t>(SI.second.first)});
+ PtrUpdates.push_back(
+ {ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
+ static_cast<uint32_t>(SI.second.first)});
return MemAccess.writeUInt32s(PtrUpdates);
}
case 8: {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt64Write> PtrUpdates;
for (auto &SI : StubInits)
- PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
- static_cast<uint64_t>(SI.second.first)});
+ PtrUpdates.push_back(
+ {ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
+ static_cast<uint64_t>(SI.second.first)});
return MemAccess.writeUInt64s(PtrUpdates);
}
default:
@@ -213,11 +214,11 @@ Error EPCIndirectStubsManager::updatePointer(StringRef Name,
auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess();
switch (EPCIU.getABISupport().getPointerSize()) {
case 4: {
- tpctypes::UInt32Write PUpdate(PtrAddr, NewAddr);
+ tpctypes::UInt32Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
return MemAccess.writeUInt32s(PUpdate);
}
case 8: {
- tpctypes::UInt64Write PUpdate(PtrAddr, NewAddr);
+ tpctypes::UInt64Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
return MemAccess.writeUInt64s(PUpdate);
}
default:
@@ -267,17 +268,17 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
}
Error EPCIndirectionUtils::cleanup() {
- Error Err = Error::success();
- for (auto &A : IndirectStubAllocs)
- Err = joinErrors(std::move(Err), A->deallocate());
+ auto &MemMgr = EPC.getMemMgr();
+ auto Err = MemMgr.deallocate(std::move(IndirectStubAllocs));
if (TP)
Err = joinErrors(std::move(Err),
static_cast<EPCTrampolinePool &>(*TP).deallocatePool());
if (ResolverBlock)
- Err = joinErrors(std::move(Err), ResolverBlock->deallocate());
+ Err =
+ joinErrors(std::move(Err), MemMgr.deallocate(std::move(ResolverBlock)));
return Err;
}
@@ -285,29 +286,29 @@ Error EPCIndirectionUtils::cleanup() {
Expected<JITTargetAddress>
EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr) {
+ using namespace jitlink;
+
assert(ABI && "ABI can not be null");
- constexpr auto ResolverBlockPermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
auto ResolverSize = ABI->getResolverCodeSize();
- jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
- Request[ResolverBlockPermissions] = {EPC.getPageSize(),
- static_cast<size_t>(ResolverSize), 0};
- auto Alloc = EPC.getMemMgr().allocate(nullptr, Request);
+ auto Alloc =
+ SimpleSegmentAlloc::Create(EPC.getMemMgr(), nullptr,
+ {{MemProt::Read | MemProt::Exec,
+ {ResolverSize, Align(EPC.getPageSize())}}});
+
if (!Alloc)
return Alloc.takeError();
- auto WorkingMemory = (*Alloc)->getWorkingMemory(ResolverBlockPermissions);
- ResolverBlockAddr = (*Alloc)->getTargetMemory(ResolverBlockPermissions);
- ABI->writeResolverCode(WorkingMemory.data(), ResolverBlockAddr, ReentryFnAddr,
+ auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
+ ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr, ReentryFnAddr,
ReentryCtxAddr);
- if (auto Err = (*Alloc)->finalize())
- return std::move(Err);
+ auto FA = Alloc->finalize();
+ if (!FA)
+ return FA.takeError();
- ResolverBlock = std::move(*Alloc);
- return ResolverBlockAddr;
+ ResolverBlock = std::move(*FA);
+ return SegInfo.Addr;
}
std::unique_ptr<IndirectStubsManager>
@@ -341,6 +342,7 @@ EPCIndirectionUtils::EPCIndirectionUtils(ExecutorProcessControl &EPC,
Expected<EPCIndirectionUtils::IndirectStubInfoVector>
EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
+ using namespace jitlink;
std::lock_guard<std::mutex> Lock(EPCUIMutex);
@@ -350,42 +352,40 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
auto PageSize = EPC.getPageSize();
auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize);
NumStubsToAllocate = StubBytes / ABI->getStubSize();
- auto PointerBytes =
+ auto PtrBytes =
alignTo(NumStubsToAllocate * ABI->getPointerSize(), PageSize);
- constexpr auto StubPagePermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
- constexpr auto PointerPagePermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
-
- jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
- Request[StubPagePermissions] = {PageSize, static_cast<size_t>(StubBytes),
- 0};
- Request[PointerPagePermissions] = {PageSize, 0, PointerBytes};
- auto Alloc = EPC.getMemMgr().allocate(nullptr, Request);
+ auto StubProt = MemProt::Read | MemProt::Exec;
+ auto PtrProt = MemProt::Read | MemProt::Write;
+
+ auto Alloc = SimpleSegmentAlloc::Create(
+ EPC.getMemMgr(), nullptr,
+ {{StubProt, {static_cast<size_t>(StubBytes), Align(PageSize)}},
+ {PtrProt, {static_cast<size_t>(PtrBytes), Align(PageSize)}}});
+
if (!Alloc)
return Alloc.takeError();
- auto StubTargetAddr = (*Alloc)->getTargetMemory(StubPagePermissions);
- auto PointerTargetAddr = (*Alloc)->getTargetMemory(PointerPagePermissions);
+ auto StubSeg = Alloc->getSegInfo(StubProt);
+ auto PtrSeg = Alloc->getSegInfo(PtrProt);
+
+ ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), StubSeg.Addr,
+ PtrSeg.Addr, NumStubsToAllocate);
- ABI->writeIndirectStubsBlock(
- (*Alloc)->getWorkingMemory(StubPagePermissions).data(), StubTargetAddr,
- PointerTargetAddr, NumStubsToAllocate);
+ auto FA = Alloc->finalize();
+ if (!FA)
+ return FA.takeError();
- if (auto Err = (*Alloc)->finalize())
- return std::move(Err);
+ IndirectStubAllocs.push_back(std::move(*FA));
+ auto StubExecutorAddr = StubSeg.Addr;
+ auto PtrExecutorAddr = PtrSeg.Addr;
for (unsigned I = 0; I != NumStubsToAllocate; ++I) {
AvailableIndirectStubs.push_back(
- IndirectStubInfo(StubTargetAddr, PointerTargetAddr));
- StubTargetAddr += ABI->getStubSize();
- PointerTargetAddr += ABI->getPointerSize();
+ IndirectStubInfo(StubExecutorAddr, PtrExecutorAddr));
+ StubExecutorAddr += ABI->getStubSize();
+ PtrExecutorAddr += ABI->getPointerSize();
}
-
- IndirectStubAllocs.push_back(std::move(*Alloc));
}
assert(NumStubs <= AvailableIndirectStubs.size() &&
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 7a76a6ccc122..2ab9ed4f856b 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -12,9 +12,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <string>
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
index 7d86d125d1db..2eb835551adb 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
@@ -24,20 +24,22 @@ ExecutorProcessControl::MemoryAccess::~MemoryAccess() {}
ExecutorProcessControl::~ExecutorProcessControl() {}
SelfExecutorProcessControl::SelfExecutorProcessControl(
- std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
- unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr)
- : ExecutorProcessControl(std::move(SSP)) {
+ std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
+ Triple TargetTriple, unsigned PageSize,
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr)
+ : ExecutorProcessControl(std::move(SSP), std::move(D)) {
OwnedMemMgr = std::move(MemMgr);
if (!OwnedMemMgr)
- OwnedMemMgr = std::make_unique<jitlink::InProcessMemoryManager>();
+ OwnedMemMgr = std::make_unique<jitlink::InProcessMemoryManager>(
+ sys::Process::getPageSizeEstimate());
this->TargetTriple = std::move(TargetTriple);
this->PageSize = PageSize;
this->MemMgr = OwnedMemMgr.get();
this->MemAccess = this;
- this->JDI = {ExecutorAddress::fromPtr(jitDispatchViaWrapperFunctionManager),
- ExecutorAddress::fromPtr(this)};
+ this->JDI = {ExecutorAddr::fromPtr(jitDispatchViaWrapperFunctionManager),
+ ExecutorAddr::fromPtr(this)};
if (this->TargetTriple.isOSBinFormatMachO())
GlobalManglingPrefix = '_';
}
@@ -45,11 +47,20 @@ SelfExecutorProcessControl::SelfExecutorProcessControl(
Expected<std::unique_ptr<SelfExecutorProcessControl>>
SelfExecutorProcessControl::Create(
std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<TaskDispatcher> D,
std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr) {
if (!SSP)
SSP = std::make_shared<SymbolStringPool>();
+ if (!D) {
+#if LLVM_ENABLE_THREADS
+ D = std::make_unique<DynamicThreadPoolTaskDispatcher>();
+#else
+ D = std::make_unique<InPlaceTaskDispatcher>();
+#endif
+ }
+
auto PageSize = sys::Process::getPageSize();
if (!PageSize)
return PageSize.takeError();
@@ -57,7 +68,8 @@ SelfExecutorProcessControl::Create(
Triple TT(sys::getProcessTriple());
return std::make_unique<SelfExecutorProcessControl>(
- std::move(SSP), std::move(TT), *PageSize, std::move(MemMgr));
+ std::move(SSP), std::move(D), std::move(TT), *PageSize,
+ std::move(MemMgr));
}
Expected<tpctypes::DylibHandle>
@@ -93,7 +105,7 @@ SelfExecutorProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) {
// FIXME: Collect all failing symbols before erroring out.
SymbolNameVector MissingSymbols;
MissingSymbols.push_back(Sym);
- return make_error<SymbolsNotFound>(std::move(MissingSymbols));
+ return make_error<SymbolsNotFound>(SSP, std::move(MissingSymbols));
}
R.back().push_back(pointerToJITTargetAddress(Addr));
}
@@ -103,60 +115,62 @@ SelfExecutorProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) {
}
Expected<int32_t>
-SelfExecutorProcessControl::runAsMain(JITTargetAddress MainFnAddr,
+SelfExecutorProcessControl::runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) {
using MainTy = int (*)(int, char *[]);
- return orc::runAsMain(jitTargetAddressToFunction<MainTy>(MainFnAddr), Args);
+ return orc::runAsMain(MainFnAddr.toPtr<MainTy>(), Args);
}
-void SelfExecutorProcessControl::callWrapperAsync(
- SendResultFunction SendResult, JITTargetAddress WrapperFnAddr,
- ArrayRef<char> ArgBuffer) {
+void SelfExecutorProcessControl::callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler SendResult,
+ ArrayRef<char> ArgBuffer) {
using WrapperFnTy =
- shared::detail::CWrapperFunctionResult (*)(const char *Data, size_t Size);
- auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
+ shared::CWrapperFunctionResult (*)(const char *Data, size_t Size);
+ auto *WrapperFn = WrapperFnAddr.toPtr<WrapperFnTy>();
SendResult(WrapperFn(ArgBuffer.data(), ArgBuffer.size()));
}
-Error SelfExecutorProcessControl::disconnect() { return Error::success(); }
+Error SelfExecutorProcessControl::disconnect() {
+ D->shutdown();
+ return Error::success();
+}
-void SelfExecutorProcessControl::writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) {
+void SelfExecutorProcessControl::writeUInt8sAsync(
+ ArrayRef<tpctypes::UInt8Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint8_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint8_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt16s(
+void SelfExecutorProcessControl::writeUInt16sAsync(
ArrayRef<tpctypes::UInt16Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint16_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint16_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt32s(
+void SelfExecutorProcessControl::writeUInt32sAsync(
ArrayRef<tpctypes::UInt32Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint32_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint32_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt64s(
+void SelfExecutorProcessControl::writeUInt64sAsync(
ArrayRef<tpctypes::UInt64Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint64_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint64_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeBuffers(
+void SelfExecutorProcessControl::writeBuffersAsync(
ArrayRef<tpctypes::BufferWrite> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
- W.Buffer.size());
+ memcpy(W.Addr.toPtr<char *>(), W.Buffer.data(), W.Buffer.size());
OnWriteComplete(Error::success());
}
-shared::detail::CWrapperFunctionResult
+shared::CWrapperFunctionResult
SelfExecutorProcessControl::jitDispatchViaWrapperFunctionManager(
void *Ctx, const void *FnTag, const char *Data, size_t Size) {
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index e8dd1bb90c9a..ee1630a2ffa8 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -9,12 +9,17 @@
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/Support/Format.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <sstream>
+#define DEBUG_TYPE "orc"
+
using namespace llvm;
using namespace llvm::orc;
@@ -372,5 +377,77 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
Dst.addModuleFlag(MapMetadata(MF, VMap));
}
+Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
+ jitlink::LinkGraph &G,
+ MCDisassembler &Disassembler,
+ MCInstrAnalysis &MIA) {
+ // AArch64 appears to already come with the necessary relocations. Among other
+ // architectures, only x86_64 is currently implemented here.
+ if (G.getTargetTriple().getArch() != Triple::x86_64)
+ return Error::success();
+
+ raw_null_ostream CommentStream;
+ auto &STI = Disassembler.getSubtargetInfo();
+
+ // Determine the function bounds
+ auto &B = Sym.getBlock();
+ assert(!B.isZeroFill() && "expected content block");
+ auto SymAddress = Sym.getAddress();
+ auto SymStartInBlock =
+ (const uint8_t *)B.getContent().data() + Sym.getOffset();
+ auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset();
+ auto Content = makeArrayRef(SymStartInBlock, SymSize);
+
+ LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n");
+
+ SmallDenseSet<uintptr_t, 8> ExistingRelocations;
+ for (auto &E : B.edges()) {
+ if (E.isRelocation())
+ ExistingRelocations.insert(E.getOffset());
+ }
+
+ size_t I = 0;
+ while (I < Content.size()) {
+ MCInst Instr;
+ uint64_t InstrSize = 0;
+ uint64_t InstrStart = SymAddress + I;
+ auto DecodeStatus = Disassembler.getInstruction(
+ Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream);
+ if (DecodeStatus != MCDisassembler::Success) {
+ LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address "
+ << InstrStart);
+ return make_error<StringError>(
+ formatv("failed to disassemble at address {0:x16}", InstrStart),
+ inconvertibleErrorCode());
+ }
+ // Advance to the next instruction.
+ I += InstrSize;
+
+ // Check for a PC-relative address equal to the symbol itself.
+ auto PCRelAddr =
+ MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize);
+ if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress)
+ continue;
+
+ auto RelocOffInInstr =
+ MIA.getMemoryOperandRelocationOffset(Instr, InstrSize);
+ if (!RelocOffInInstr.hasValue() ||
+ InstrSize - RelocOffInInstr.getValue() != 4) {
+ LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at "
+ << InstrStart);
+ continue;
+ }
+
+ auto RelocOffInBlock =
+ InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset();
+ if (ExistingRelocations.contains(RelocOffInBlock))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart);
+ B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4);
+ }
+ return Error::success();
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 4257137a2212..0fbf79b8a56d 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -8,8 +8,8 @@
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 2ac32293e4db..0ab0d7d2e2b6 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -105,16 +105,18 @@ private:
/// llvm.global_ctors.
class GlobalCtorDtorScraper {
public:
-
GlobalCtorDtorScraper(GenericLLVMIRPlatformSupport &PS,
- StringRef InitFunctionPrefix)
- : PS(PS), InitFunctionPrefix(InitFunctionPrefix) {}
+ StringRef InitFunctionPrefix,
+ StringRef DeInitFunctionPrefix)
+ : PS(PS), InitFunctionPrefix(InitFunctionPrefix),
+ DeInitFunctionPrefix(DeInitFunctionPrefix) {}
Expected<ThreadSafeModule> operator()(ThreadSafeModule TSM,
MaterializationResponsibility &R);
private:
GenericLLVMIRPlatformSupport &PS;
StringRef InitFunctionPrefix;
+ StringRef DeInitFunctionPrefix;
};
/// Generic IR Platform Support
@@ -125,12 +127,14 @@ private:
class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport {
public:
GenericLLVMIRPlatformSupport(LLJIT &J)
- : J(J), InitFunctionPrefix(J.mangle("__orc_init_func.")) {
+ : J(J), InitFunctionPrefix(J.mangle("__orc_init_func.")),
+ DeInitFunctionPrefix(J.mangle("__orc_deinit_func.")) {
getExecutionSession().setPlatform(
std::make_unique<GenericLLVMIRPlatform>(*this));
- setInitTransform(J, GlobalCtorDtorScraper(*this, InitFunctionPrefix));
+ setInitTransform(J, GlobalCtorDtorScraper(*this, InitFunctionPrefix,
+ DeInitFunctionPrefix));
SymbolMap StdInterposes;
@@ -203,6 +207,8 @@ public:
InitSymbols[&JD].add(KV.first,
SymbolLookupFlags::WeaklyReferencedSymbol);
InitFunctions[&JD].add(KV.first);
+ } else if ((*KV.first).startswith(DeInitFunctionPrefix)) {
+ DeInitFunctions[&JD].add(KV.first);
}
}
return Error::success();
@@ -256,6 +262,11 @@ public:
});
}
+ void registerDeInitFunc(JITDylib &JD, SymbolStringPtr DeInitName) {
+ getExecutionSession().runSessionLocked(
+ [&]() { DeInitFunctions[&JD].add(DeInitName); });
+ }
+
private:
Expected<std::vector<JITTargetAddress>> getInitializers(JITDylib &JD) {
@@ -438,6 +449,7 @@ private:
LLJIT &J;
std::string InitFunctionPrefix;
+ std::string DeInitFunctionPrefix;
DenseMap<JITDylib *, SymbolLookupSet> InitSymbols;
DenseMap<JITDylib *, SymbolLookupSet> InitFunctions;
DenseMap<JITDylib *, SymbolLookupSet> DeInitFunctions;
@@ -459,40 +471,63 @@ GlobalCtorDtorScraper::operator()(ThreadSafeModule TSM,
auto Err = TSM.withModuleDo([&](Module &M) -> Error {
auto &Ctx = M.getContext();
auto *GlobalCtors = M.getNamedGlobal("llvm.global_ctors");
-
- // If there's no llvm.global_ctors or it's just a decl then skip.
- if (!GlobalCtors || GlobalCtors->isDeclaration())
+ auto *GlobalDtors = M.getNamedGlobal("llvm.global_dtors");
+
+ auto RegisterCOrDtors = [&](GlobalVariable *GlobalCOrDtors,
+ bool isCtor) -> Error {
+ // If there's no llvm.global_c/dtor or it's just a decl then skip.
+ if (!GlobalCOrDtors || GlobalCOrDtors->isDeclaration())
+ return Error::success();
+ std::string InitOrDeInitFunctionName;
+ if (isCtor)
+ raw_string_ostream(InitOrDeInitFunctionName)
+ << InitFunctionPrefix << M.getModuleIdentifier();
+ else
+ raw_string_ostream(InitOrDeInitFunctionName)
+ << DeInitFunctionPrefix << M.getModuleIdentifier();
+
+ MangleAndInterner Mangle(PS.getExecutionSession(), M.getDataLayout());
+ auto InternedInitOrDeInitName = Mangle(InitOrDeInitFunctionName);
+ if (auto Err = R.defineMaterializing(
+ {{InternedInitOrDeInitName, JITSymbolFlags::Callable}}))
+ return Err;
+
+ auto *InitOrDeInitFunc = Function::Create(
+ FunctionType::get(Type::getVoidTy(Ctx), {}, false),
+ GlobalValue::ExternalLinkage, InitOrDeInitFunctionName, &M);
+ InitOrDeInitFunc->setVisibility(GlobalValue::HiddenVisibility);
+ std::vector<std::pair<Function *, unsigned>> InitsOrDeInits;
+ auto COrDtors = isCtor ? getConstructors(M) : getDestructors(M);
+
+ for (auto E : COrDtors)
+ InitsOrDeInits.push_back(std::make_pair(E.Func, E.Priority));
+ llvm::sort(InitsOrDeInits,
+ [](const std::pair<Function *, unsigned> &LHS,
+ const std::pair<Function *, unsigned> &RHS) {
+ return LHS.first < RHS.first;
+ });
+
+ auto *InitOrDeInitFuncEntryBlock =
+ BasicBlock::Create(Ctx, "entry", InitOrDeInitFunc);
+ IRBuilder<> IB(InitOrDeInitFuncEntryBlock);
+ for (auto &KV : InitsOrDeInits)
+ IB.CreateCall(KV.first);
+ IB.CreateRetVoid();
+
+ if (isCtor)
+ PS.registerInitFunc(R.getTargetJITDylib(), InternedInitOrDeInitName);
+ else
+ PS.registerDeInitFunc(R.getTargetJITDylib(), InternedInitOrDeInitName);
+
+ GlobalCOrDtors->eraseFromParent();
return Error::success();
+ };
- std::string InitFunctionName;
- raw_string_ostream(InitFunctionName)
- << InitFunctionPrefix << M.getModuleIdentifier();
-
- MangleAndInterner Mangle(PS.getExecutionSession(), M.getDataLayout());
- auto InternedName = Mangle(InitFunctionName);
- if (auto Err =
- R.defineMaterializing({{InternedName, JITSymbolFlags::Callable}}))
+ if (auto Err = RegisterCOrDtors(GlobalCtors, true))
+ return Err;
+ if (auto Err = RegisterCOrDtors(GlobalDtors, false))
return Err;
- auto *InitFunc =
- Function::Create(FunctionType::get(Type::getVoidTy(Ctx), {}, false),
- GlobalValue::ExternalLinkage, InitFunctionName, &M);
- InitFunc->setVisibility(GlobalValue::HiddenVisibility);
- std::vector<std::pair<Function *, unsigned>> Inits;
- for (auto E : getConstructors(M))
- Inits.push_back(std::make_pair(E.Func, E.Priority));
- llvm::sort(Inits, [](const std::pair<Function *, unsigned> &LHS,
- const std::pair<Function *, unsigned> &RHS) {
- return LHS.first < RHS.first;
- });
- auto *EntryBlock = BasicBlock::Create(Ctx, "entry", InitFunc);
- IRBuilder<> IB(EntryBlock);
- for (auto &KV : Inits)
- IB.CreateCall(KV.first);
- IB.CreateRetVoid();
-
- PS.registerInitFunc(R.getTargetJITDylib(), InternedName);
- GlobalCtors->eraseFromParent();
return Error::success();
});
diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
new file mode 100644
index 000000000000..44cb78c773c9
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
@@ -0,0 +1,82 @@
+//===------- LookupAndRecordAddrs.h - Symbol lookup support utility -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+
+#include <future>
+
+namespace llvm {
+namespace orc {
+
+void lookupAndRecordAddrs(
+ unique_function<void(Error)> OnRecorded, ExecutionSession &ES, LookupKind K,
+ const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags) {
+
+ SymbolLookupSet Symbols;
+ for (auto &KV : Pairs)
+ Symbols.add(KV.first, LookupFlags);
+
+ ES.lookup(
+ K, SearchOrder, Symbols, SymbolState::Ready,
+ [Pairs = std::move(Pairs),
+ OnRec = std::move(OnRecorded)](Expected<SymbolMap> Result) mutable {
+ if (!Result)
+ return OnRec(Result.takeError());
+ for (auto &KV : Pairs) {
+ auto I = Result->find(KV.first);
+ KV.second->setValue((I != Result->end()) ? I->second.getAddress()
+ : 0);
+ }
+ OnRec(Error::success());
+ },
+ NoDependenciesToRegister);
+}
+
+Error lookupAndRecordAddrs(
+ ExecutionSession &ES, LookupKind K, const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags) {
+
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ lookupAndRecordAddrs([&](Error Err) { ResultP.set_value(std::move(Err)); },
+ ES, K, SearchOrder, Pairs, LookupFlags);
+ return ResultF.get();
+}
+
+Error lookupAndRecordAddrs(
+ ExecutorProcessControl &EPC, tpctypes::DylibHandle H,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags) {
+
+ SymbolLookupSet Symbols;
+ for (auto &KV : Pairs)
+ Symbols.add(KV.first, LookupFlags);
+
+ ExecutorProcessControl::LookupRequest LR(H, Symbols);
+ auto Result = EPC.lookupSymbols(LR);
+ if (!Result)
+ return Result.takeError();
+
+ if (Result->size() != 1)
+ return make_error<StringError>("Error in lookup result",
+ inconvertibleErrorCode());
+ if (Result->front().size() != Pairs.size())
+ return make_error<StringError>("Error in lookup result elements",
+ inconvertibleErrorCode());
+
+ for (unsigned I = 0; I != Pairs.size(); ++I)
+ Pairs[I].second->setValue(Result->front()[I]);
+
+ return Error::success();
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 66ef835dc34d..46c915dfea9e 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -12,6 +12,7 @@
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/Debug.h"
@@ -52,7 +53,7 @@ public:
auto G = std::make_unique<jitlink::LinkGraph>(
"<MachOHeaderMU>", TT, PointerSize, Endianness,
jitlink::getGenericEdgeKindName);
- auto &HeaderSection = G->createSection("__header", sys::Memory::MF_READ);
+ auto &HeaderSection = G->createSection("__header", jitlink::MemProt::Read);
auto &HeaderBlock = createHeaderBlock(*G, HeaderSection);
// Init symbol is header-start symbol.
@@ -135,13 +136,14 @@ StringRef ObjCImageInfoSectionName = "__DATA,__objc_image_info";
StringRef ObjCSelRefsSectionName = "__DATA,__objc_selrefs";
StringRef Swift5ProtoSectionName = "__TEXT,__swift5_proto";
StringRef Swift5ProtosSectionName = "__TEXT,__swift5_protos";
+StringRef Swift5TypesSectionName = "__TEXT,__swift5_types";
StringRef ThreadBSSSectionName = "__DATA,__thread_bss";
StringRef ThreadDataSectionName = "__DATA,__thread_data";
StringRef ThreadVarsSectionName = "__DATA,__thread_vars";
StringRef InitSectionNames[] = {
ModInitFuncSectionName, ObjCSelRefsSectionName, ObjCClassListSectionName,
- Swift5ProtosSectionName, Swift5ProtoSectionName};
+ Swift5ProtosSectionName, Swift5ProtoSectionName, Swift5TypesSectionName};
} // end anonymous namespace
@@ -172,10 +174,10 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
// Add JIT-dispatch function support symbols.
if (auto Err = PlatformJD.define(absoluteSymbols(
{{ES.intern("___orc_rt_jit_dispatch"),
- {EPC.getJITDispatchInfo().JITDispatchFunctionAddress.getValue(),
+ {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
JITSymbolFlags::Exported}},
{ES.intern("___orc_rt_jit_dispatch_ctx"),
- {EPC.getJITDispatchInfo().JITDispatchContextAddress.getValue(),
+ {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
JITSymbolFlags::Exported}}})))
return std::move(Err);
@@ -267,6 +269,7 @@ bool MachOPlatform::isInitializerSection(StringRef SegName,
bool MachOPlatform::supportedTarget(const Triple &TT) {
switch (TT.getArch()) {
+ case Triple::aarch64:
case Triple::x86_64:
return true;
default:
@@ -286,6 +289,19 @@ MachOPlatform::MachOPlatform(
PlatformJD.addGenerator(std::move(OrcRuntimeGenerator));
+ // Force linking of eh-frame registration functions.
+ if (auto Err2 = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder(&PlatformJD),
+ {{ES.intern("___orc_rt_macho_register_ehframe_section"),
+ &orc_rt_macho_register_ehframe_section},
+ {ES.intern("___orc_rt_macho_deregister_ehframe_section"),
+ &orc_rt_macho_deregister_ehframe_section}})) {
+ Err = std::move(Err2);
+ return;
+ }
+
+ State = BootstrapPhase2;
+
// PlatformJD hasn't been 'set-up' by the platform yet (since we're creating
// the platform now), so set it up.
if (auto E2 = setupJITDylib(PlatformJD)) {
@@ -309,6 +325,8 @@ MachOPlatform::MachOPlatform(
Err = std::move(E2);
return;
}
+
+ State = Initialized;
}
Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
@@ -321,13 +339,13 @@ Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
this, &MachOPlatform::rt_getInitializers);
using GetDeinitializersSPSSig =
- SPSExpected<SPSMachOJITDylibDeinitializerSequence>(SPSExecutorAddress);
+ SPSExpected<SPSMachOJITDylibDeinitializerSequence>(SPSExecutorAddr);
WFs[ES.intern("___orc_rt_macho_get_deinitializers_tag")] =
ES.wrapAsyncWithSPS<GetDeinitializersSPSSig>(
this, &MachOPlatform::rt_getDeinitializers);
using LookupSymbolSPSSig =
- SPSExpected<SPSExecutorAddress>(SPSExecutorAddress, SPSString);
+ SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSString);
WFs[ES.intern("___orc_rt_macho_symbol_lookup_tag")] =
ES.wrapAsyncWithSPS<LookupSymbolSPSSig>(this,
&MachOPlatform::rt_lookupSymbol);
@@ -411,7 +429,7 @@ void MachOPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult,
}
void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
- ExecutorAddress Handle) {
+ ExecutorAddr Handle) {
LLVM_DEBUG({
dbgs() << "MachOPlatform::rt_getDeinitializers(\""
<< formatv("{0:x}", Handle.getValue()) << "\")\n";
@@ -441,8 +459,7 @@ void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
}
void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
- ExecutorAddress Handle,
- StringRef SymbolName) {
+ ExecutorAddr Handle, StringRef SymbolName) {
LLVM_DEBUG({
dbgs() << "MachOPlatform::rt_lookupSymbol(\""
<< formatv("{0:x}", Handle.getValue()) << "\")\n";
@@ -476,7 +493,7 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
void operator()(Expected<SymbolMap> Result) {
if (Result) {
assert(Result->size() == 1 && "Unexpected result map count");
- SendResult(ExecutorAddress(Result->begin()->second.getAddress()));
+ SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
} else {
SendResult(Result.takeError());
}
@@ -495,56 +512,25 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
}
Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) {
-
- std::pair<const char *, ExecutorAddress *> Symbols[] = {
- {"___orc_rt_macho_platform_bootstrap", &orc_rt_macho_platform_bootstrap},
- {"___orc_rt_macho_platform_shutdown", &orc_rt_macho_platform_shutdown},
- {"___orc_rt_macho_register_object_sections",
- &orc_rt_macho_register_object_sections},
- {"___orc_rt_macho_create_pthread_key", &orc_rt_macho_create_pthread_key}};
-
- SymbolLookupSet RuntimeSymbols;
- std::vector<std::pair<SymbolStringPtr, ExecutorAddress *>> AddrsToRecord;
- for (const auto &KV : Symbols) {
- auto Name = ES.intern(KV.first);
- RuntimeSymbols.add(Name);
- AddrsToRecord.push_back({std::move(Name), KV.second});
- }
-
- auto RuntimeSymbolAddrs = ES.lookup(
- {{&PlatformJD, JITDylibLookupFlags::MatchAllSymbols}}, RuntimeSymbols);
- if (!RuntimeSymbolAddrs)
- return RuntimeSymbolAddrs.takeError();
-
- for (const auto &KV : AddrsToRecord) {
- auto &Name = KV.first;
- assert(RuntimeSymbolAddrs->count(Name) && "Missing runtime symbol?");
- KV.second->setValue((*RuntimeSymbolAddrs)[Name].getAddress());
- }
-
- if (auto Err =
- ES.callSPSWrapper<void()>(orc_rt_macho_platform_bootstrap.getValue()))
+ if (auto Err = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder(&PlatformJD),
+ {{ES.intern("___orc_rt_macho_platform_bootstrap"),
+ &orc_rt_macho_platform_bootstrap},
+ {ES.intern("___orc_rt_macho_platform_shutdown"),
+ &orc_rt_macho_platform_shutdown},
+ {ES.intern("___orc_rt_macho_register_thread_data_section"),
+ &orc_rt_macho_register_thread_data_section},
+ {ES.intern("___orc_rt_macho_deregister_thread_data_section"),
+ &orc_rt_macho_deregister_thread_data_section},
+ {ES.intern("___orc_rt_macho_create_pthread_key"),
+ &orc_rt_macho_create_pthread_key}}))
return Err;
- // FIXME: Ordering is fuzzy here. We're probably best off saying
- // "behavior is undefined if code that uses the runtime is added before
- // the platform constructor returns", then move all this to the constructor.
- RuntimeBootstrapped = true;
- std::vector<MachOPerObjectSectionsToRegister> DeferredPOSRs;
- {
- std::lock_guard<std::mutex> Lock(PlatformMutex);
- DeferredPOSRs = std::move(BootstrapPOSRs);
- }
-
- for (auto &D : DeferredPOSRs)
- if (auto Err = registerPerObjectSections(D))
- return Err;
-
- return Error::success();
+ return ES.callSPSWrapper<void()>(orc_rt_macho_platform_bootstrap);
}
Error MachOPlatform::registerInitInfo(
- JITDylib &JD, ExecutorAddress ObjCImageInfoAddr,
+ JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
ArrayRef<jitlink::Section *> InitSections) {
std::unique_lock<std::mutex> Lock(PlatformMutex);
@@ -576,29 +562,12 @@ Error MachOPlatform::registerInitInfo(
// FIXME: Avoid copy here.
jitlink::SectionRange R(*Sec);
InitSeq->InitSections[Sec->getName()].push_back(
- {ExecutorAddress(R.getStart()), ExecutorAddress(R.getEnd())});
+ {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())});
}
return Error::success();
}
-Error MachOPlatform::registerPerObjectSections(
- const MachOPerObjectSectionsToRegister &POSR) {
-
- if (!orc_rt_macho_register_object_sections)
- return make_error<StringError>("Attempting to register per-object "
- "sections, but runtime support has not "
- "been loaded yet",
- inconvertibleErrorCode());
-
- Error ErrResult = Error::success();
- if (auto Err = ES.callSPSWrapper<shared::SPSError(
- SPSMachOPerObjectSectionsToRegister)>(
- orc_rt_macho_register_object_sections.getValue(), ErrResult, POSR))
- return Err;
- return ErrResult;
-}
-
Expected<uint64_t> MachOPlatform::createPThreadKey() {
if (!orc_rt_macho_create_pthread_key)
return make_error<StringError>(
@@ -608,7 +577,7 @@ Expected<uint64_t> MachOPlatform::createPThreadKey() {
Expected<uint64_t> Result(0);
if (auto Err = ES.callSPSWrapper<SPSExpected<uint64_t>(void)>(
- orc_rt_macho_create_pthread_key.getValue(), Result))
+ orc_rt_macho_create_pthread_key, Result))
return std::move(Err);
return Result;
}
@@ -617,21 +586,55 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
jitlink::PassConfiguration &Config) {
- // If the initializer symbol is the MachOHeader start symbol then just add
- // the macho header support passes.
- if (MR.getInitializerSymbol() == MP.MachOHeaderStartSymbol) {
- addMachOHeaderSupportPasses(MR, Config);
- // The header materialization unit doesn't require any other support, so we
- // can bail out early.
+ auto PS = MP.State.load();
+
+ // --- Handle Initializers ---
+ if (auto InitSymbol = MR.getInitializerSymbol()) {
+
+ // If the initializer symbol is the MachOHeader start symbol then just
+ // register it and then bail out -- the header materialization unit
+ // definitely doesn't need any other passes.
+ if (InitSymbol == MP.MachOHeaderStartSymbol) {
+ Config.PostAllocationPasses.push_back([this, &MR](jitlink::LinkGraph &G) {
+ return associateJITDylibHeaderSymbol(G, MR);
+ });
+ return;
+ }
+
+ // If the object contains an init symbol other than the header start symbol
+ // then add passes to preserve, process and register the init
+ // sections/symbols.
+ Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) {
+ if (auto Err = preserveInitSections(G, MR))
+ return Err;
+ return processObjCImageInfo(G, MR);
+ });
+
+ Config.PostFixupPasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return registerInitSections(G, JD);
+ });
+ }
+
+ // --- Add passes for eh-frame and TLV support ---
+ if (PS == MachOPlatform::BootstrapPhase1) {
+ Config.PostFixupPasses.push_back(
+ [this](jitlink::LinkGraph &G) { return registerEHSectionsPhase1(G); });
return;
}
- // If the object contains initializers then add passes to record them.
- if (MR.getInitializerSymbol())
- addInitializerSupportPasses(MR, Config);
+ // Insert TLV lowering at the start of the PostPrunePasses, since we want
+ // it to run before GOT/PLT lowering.
+ Config.PostPrunePasses.insert(
+ Config.PostPrunePasses.begin(),
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return fixTLVSectionsAndEdges(G, JD);
+ });
- // Add passes for eh-frame and TLV support.
- addEHAndTLVSupportPasses(MR, Config);
+ // Add a pass to register the final addresses of the eh-frame and TLV sections
+ // with the runtime.
+ Config.PostFixupPasses.push_back(
+ [this](jitlink::LinkGraph &G) { return registerEHAndTLVSections(G); });
}
ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap
@@ -648,111 +651,22 @@ MachOPlatform::MachOPlatformPlugin::getSyntheticSymbolDependencies(
return SyntheticSymbolDependenciesMap();
}
-void MachOPlatform::MachOPlatformPlugin::addInitializerSupportPasses(
- MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
-
- /// Preserve init sections.
- Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) {
- if (auto Err = preserveInitSections(G, MR))
- return Err;
- return processObjCImageInfo(G, MR);
- });
-
- Config.PostFixupPasses.push_back(
- [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
- return registerInitSections(G, JD);
- });
-}
-
-void MachOPlatform::MachOPlatformPlugin::addMachOHeaderSupportPasses(
- MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
-
- Config.PostAllocationPasses.push_back([this, &JD = MR.getTargetJITDylib()](
- jitlink::LinkGraph &G) -> Error {
- auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
- return Sym->getName() == *MP.MachOHeaderStartSymbol;
- });
- assert(I != G.defined_symbols().end() &&
- "Missing MachO header start symbol");
- {
- std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
- JITTargetAddress HeaderAddr = (*I)->getAddress();
- MP.HeaderAddrToJITDylib[HeaderAddr] = &JD;
- assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
- MP.InitSeqs.insert(
- std::make_pair(&JD, MachOJITDylibInitializers(
- JD.getName(), ExecutorAddress(HeaderAddr))));
- }
- return Error::success();
- });
-}
-
-void MachOPlatform::MachOPlatformPlugin::addEHAndTLVSupportPasses(
- MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
-
- // Insert TLV lowering at the start of the PostPrunePasses, since we want
- // it to run before GOT/PLT lowering.
- Config.PostPrunePasses.insert(
- Config.PostPrunePasses.begin(),
- [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
- return fixTLVSectionsAndEdges(G, JD);
- });
-
- // Add a pass to register the final addresses of the eh-frame and TLV sections
- // with the runtime.
- Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error {
- MachOPerObjectSectionsToRegister POSR;
-
- if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
- jitlink::SectionRange R(*EHFrameSection);
- if (!R.empty())
- POSR.EHFrameSection = {ExecutorAddress(R.getStart()),
- ExecutorAddress(R.getEnd())};
- }
-
- // Get a pointer to the thread data section if there is one. It will be used
- // below.
- jitlink::Section *ThreadDataSection =
- G.findSectionByName(ThreadDataSectionName);
-
- // Handle thread BSS section if there is one.
- if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
- // If there's already a thread data section in this graph then merge the
- // thread BSS section content into it, otherwise just treat the thread
- // BSS section as the thread data section.
- if (ThreadDataSection)
- G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
- else
- ThreadDataSection = ThreadBSSSection;
- }
-
- // Having merged thread BSS (if present) and thread data (if present),
- // record the resulting section range.
- if (ThreadDataSection) {
- jitlink::SectionRange R(*ThreadDataSection);
- if (!R.empty())
- POSR.ThreadDataSection = {ExecutorAddress(R.getStart()),
- ExecutorAddress(R.getEnd())};
- }
-
- if (POSR.EHFrameSection.StartAddress ||
- POSR.ThreadDataSection.StartAddress) {
-
- // If we're still bootstrapping the runtime then just record this
- // frame for now.
- if (!MP.RuntimeBootstrapped) {
- std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
- MP.BootstrapPOSRs.push_back(POSR);
- return Error::success();
- }
-
- // Otherwise register it immediately.
- if (auto Err = MP.registerPerObjectSections(POSR))
- return Err;
- }
+Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
- return Error::success();
+ auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
+ return Sym->getName() == *MP.MachOHeaderStartSymbol;
});
+ assert(I != G.defined_symbols().end() && "Missing MachO header start symbol");
+
+ auto &JD = MR.getTargetJITDylib();
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ JITTargetAddress HeaderAddr = (*I)->getAddress();
+ MP.HeaderAddrToJITDylib[HeaderAddr] = &JD;
+ assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
+ MP.InitSeqs.insert(std::make_pair(
+ &JD, MachOJITDylibInitializers(JD.getName(), ExecutorAddr(HeaderAddr))));
+ return Error::success();
}
Error MachOPlatform::MachOPlatformPlugin::preserveInitSections(
@@ -873,7 +787,7 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
Error MachOPlatform::MachOPlatformPlugin::registerInitSections(
jitlink::LinkGraph &G, JITDylib &JD) {
- ExecutorAddress ObjCImageInfoAddr;
+ ExecutorAddr ObjCImageInfoAddr;
SmallVector<jitlink::Section *> InitSections;
if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) {
@@ -950,9 +864,109 @@ Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
for (auto *B : G.blocks())
for (auto &E : B->edges())
if (E.getKind() ==
- jitlink::x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable)
- E.setKind(
- jitlink::x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable);
+ jitlink::x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable)
+ E.setKind(jitlink::x86_64::
+ RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable);
+
+ return Error::success();
+}
+
+Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections(
+ jitlink::LinkGraph &G) {
+
+ // Add a pass to register the final addresses of the eh-frame and TLV sections
+ // with the runtime.
+ if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
+ jitlink::SectionRange R(*EHFrameSection);
+ if (!R.empty())
+ G.allocActions().push_back(
+ {{MP.orc_rt_macho_register_ehframe_section.getValue(), R.getStart(),
+ R.getSize()},
+ {MP.orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(),
+ R.getSize()}});
+ }
+
+ // Get a pointer to the thread data section if there is one. It will be used
+ // below.
+ jitlink::Section *ThreadDataSection =
+ G.findSectionByName(ThreadDataSectionName);
+
+ // Handle thread BSS section if there is one.
+ if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+ // If there's already a thread data section in this graph then merge the
+ // thread BSS section content into it, otherwise just treat the thread
+ // BSS section as the thread data section.
+ if (ThreadDataSection)
+ G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
+ else
+ ThreadDataSection = ThreadBSSSection;
+ }
+
+ // Having merged thread BSS (if present) and thread data (if present),
+ // record the resulting section range.
+ if (ThreadDataSection) {
+ jitlink::SectionRange R(*ThreadDataSection);
+ if (!R.empty()) {
+ if (MP.State != MachOPlatform::Initialized)
+ return make_error<StringError>("__thread_data section encountered, but "
+ "MachOPlatform has not finished booting",
+ inconvertibleErrorCode());
+
+ G.allocActions().push_back(
+ {{MP.orc_rt_macho_register_thread_data_section.getValue(),
+ R.getStart(), R.getSize()},
+ {MP.orc_rt_macho_deregister_thread_data_section.getValue(),
+ R.getStart(), R.getSize()}});
+ }
+ }
+ return Error::success();
+}
+
+Error MachOPlatform::MachOPlatformPlugin::registerEHSectionsPhase1(
+ jitlink::LinkGraph &G) {
+
+ // If there's no eh-frame there's nothing to do.
+ auto *EHFrameSection = G.findSectionByName(EHFrameSectionName);
+ if (!EHFrameSection)
+ return Error::success();
+
+ // If the eh-frame section is empty there's nothing to do.
+ jitlink::SectionRange R(*EHFrameSection);
+ if (R.empty())
+ return Error::success();
+
+ // Since we're linking the object containing the registration code now the
+ // addresses won't be ready in the platform. We'll have to find them in this
+ // graph instead.
+ ExecutorAddr orc_rt_macho_register_ehframe_section;
+ ExecutorAddr orc_rt_macho_deregister_ehframe_section;
+ for (auto *Sym : G.defined_symbols()) {
+ if (!Sym->hasName())
+ continue;
+ if (Sym->getName() == "___orc_rt_macho_register_ehframe_section")
+ orc_rt_macho_register_ehframe_section = ExecutorAddr(Sym->getAddress());
+ else if (Sym->getName() == "___orc_rt_macho_deregister_ehframe_section")
+ orc_rt_macho_deregister_ehframe_section = ExecutorAddr(Sym->getAddress());
+
+ if (orc_rt_macho_register_ehframe_section &&
+ orc_rt_macho_deregister_ehframe_section)
+ break;
+ }
+
+ // If we failed to find the required functions then bail out.
+ if (!orc_rt_macho_register_ehframe_section ||
+ !orc_rt_macho_deregister_ehframe_section)
+ return make_error<StringError>("Could not find eh-frame registration "
+ "functions during platform bootstrap",
+ inconvertibleErrorCode());
+
+ // Otherwise, add allocation actions to the graph to register eh-frames for
+ // this object.
+ G.allocActions().push_back(
+ {{orc_rt_macho_register_ehframe_section.getValue(), R.getStart(),
+ R.getSize()},
+ {orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(),
+ R.getSize()}});
return Error::success();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Mangling.cpp b/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
index 14b22880ab7e..7b21e6a684ca 100644
--- a/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
@@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/Mangling.h"
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
@@ -83,17 +85,29 @@ void IRSymbolMapper::add(ExecutionSession &ES, const ManglingOptions &MO,
}
}
-Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
-getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
- auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+static SymbolStringPtr addInitSymbol(SymbolFlagsMap &SymbolFlags,
+ ExecutionSession &ES,
+ StringRef ObjFileName) {
+ SymbolStringPtr InitSymbol;
+ size_t Counter = 0;
- if (!Obj)
- return Obj.takeError();
+ do {
+ std::string InitSymString;
+ raw_string_ostream(InitSymString)
+ << "$." << ObjFileName << ".__inits." << Counter++;
+ InitSymbol = ES.intern(InitSymString);
+ } while (SymbolFlags.count(InitSymbol));
- bool IsMachO = isa<object::MachOObjectFile>(Obj->get());
+ SymbolFlags[InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
+ return InitSymbol;
+}
+static Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getMachOObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::MachOObjectFile &Obj) {
SymbolFlagsMap SymbolFlags;
- for (auto &Sym : (*Obj)->symbols()) {
+
+ for (auto &Sym : Obj.symbols()) {
Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
if (!SymFlagsOrErr)
// TODO: Test this error.
@@ -123,48 +137,135 @@ getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
return SymFlags.takeError();
// Strip the 'exported' flag from MachO linker-private symbols.
- if (IsMachO && Name->startswith("l"))
+ if (Name->startswith("l"))
*SymFlags &= ~JITSymbolFlags::Exported;
SymbolFlags[InternedName] = std::move(*SymFlags);
}
SymbolStringPtr InitSymbol;
+ for (auto &Sec : Obj.sections()) {
+ auto SecType = Obj.getSectionType(Sec);
+ if ((SecType & MachO::SECTION_TYPE) == MachO::S_MOD_INIT_FUNC_POINTERS) {
+ InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
+ break;
+ }
+ auto SegName = Obj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
+ auto SecName = cantFail(Obj.getSectionName(Sec.getRawDataRefImpl()));
+ if (MachOPlatform::isInitializerSection(SegName, SecName)) {
+ InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
+ break;
+ }
+ }
- size_t Counter = 0;
- auto AddInitSymbol = [&]() {
- while (true) {
- std::string InitSymString;
- raw_string_ostream(InitSymString)
- << "$." << ObjBuffer.getBufferIdentifier() << ".__inits."
- << Counter++;
- InitSymbol = ES.intern(InitSymString);
- if (SymbolFlags.count(InitSymbol))
+ return std::make_pair(std::move(SymbolFlags), std::move(InitSymbol));
+}
+
+static Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getELFObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::ELFObjectFileBase &Obj) {
+ SymbolFlagsMap SymbolFlags;
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
continue;
- SymbolFlags[InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
- return;
- }
- };
-
- if (IsMachO) {
- auto &MachOObj = cast<object::MachOObjectFile>(*Obj->get());
- for (auto &Sec : MachOObj.sections()) {
- auto SecType = MachOObj.getSectionType(Sec);
- if ((SecType & MachO::SECTION_TYPE) == MachO::S_MOD_INIT_FUNC_POINTERS) {
- AddInitSymbol();
- break;
- }
- auto SegName =
- MachOObj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
- auto SecName = cantFail(MachOObj.getSectionName(Sec.getRawDataRefImpl()));
- if (MachOPlatform::isInitializerSection(SegName, SecName)) {
- AddInitSymbol();
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ // ELF STB_GNU_UNIQUE should map to Weak for ORC.
+ if (Sym.getBinding() == ELF::STB_GNU_UNIQUE)
+ *SymFlags |= JITSymbolFlags::Weak;
+
+ SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ SymbolStringPtr InitSymbol;
+ for (auto &Sec : Obj.sections()) {
+ if (auto SecName = Sec.getName()) {
+ if (ELFNixPlatform::isInitializerSection(*SecName)) {
+ InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
break;
}
}
}
- return std::make_pair(std::move(SymbolFlags), std::move(InitSymbol));
+ return std::make_pair(std::move(SymbolFlags), InitSymbol);
+}
+
+Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getGenericObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::ObjectFile &Obj) {
+ SymbolFlagsMap SymbolFlags;
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
+ continue;
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ return std::make_pair(std::move(SymbolFlags), nullptr);
+}
+
+Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
+ auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+
+ if (!Obj)
+ return Obj.takeError();
+
+ if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(Obj->get()))
+ return getMachOObjectFileSymbolInfo(ES, *MachOObj);
+ else if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj->get()))
+ return getELFObjectFileSymbolInfo(ES, *ELFObj);
+
+ return getGenericObjectFileSymbolInfo(ES, **Obj);
}
} // End namespace orc.
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index fd260089c04b..6f840a079dd1 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -64,9 +64,9 @@ private:
LGI.SymbolFlags[ES.intern(Sym->getName())] = Flags;
}
- if (G.getTargetTriple().isOSBinFormatMachO())
- if (hasMachOInitSection(G))
- LGI.InitSymbol = makeInitSymbol(ES, G);
+ if ((G.getTargetTriple().isOSBinFormatMachO() && hasMachOInitSection(G)) ||
+ (G.getTargetTriple().isOSBinFormatELF() && hasELFInitSection(G)))
+ LGI.InitSymbol = makeInitSymbol(ES, G);
return LGI;
}
@@ -77,11 +77,19 @@ private:
Sec.getName() == "__DATA,__objc_classlist" ||
Sec.getName() == "__TEXT,__swift5_protos" ||
Sec.getName() == "__TEXT,__swift5_proto" ||
+ Sec.getName() == "__TEXT,__swift5_types" ||
Sec.getName() == "__DATA,__mod_init_func")
return true;
return false;
}
+ static bool hasELFInitSection(LinkGraph &G) {
+ for (auto &Sec : G.sections())
+ if (Sec.getName() == ".init_array")
+ return true;
+ return false;
+ }
+
static SymbolStringPtr makeInitSymbol(ExecutionSession &ES, LinkGraph &G) {
std::string InitSymString;
raw_string_ostream(InitSymString)
@@ -272,8 +280,9 @@ public:
// If there were missing symbols then report the error.
if (!MissingSymbols.empty())
- return make_error<MissingSymbolDefinitions>(G.getName(),
- std::move(MissingSymbols));
+ return make_error<MissingSymbolDefinitions>(
+ Layer.getExecutionSession().getSymbolStringPool(), G.getName(),
+ std::move(MissingSymbols));
// If there are more definitions than expected, add them to the
// ExtraSymbols vector.
@@ -286,8 +295,9 @@ public:
// If there were extra definitions then report the error.
if (!ExtraSymbols.empty())
- return make_error<UnexpectedSymbolDefinitions>(G.getName(),
- std::move(ExtraSymbols));
+ return make_error<UnexpectedSymbolDefinitions>(
+ Layer.getExecutionSession().getSymbolStringPool(), G.getName(),
+ std::move(ExtraSymbols));
}
if (auto Err = MR->notifyResolved(InternedResult))
@@ -297,8 +307,7 @@ public:
return Error::success();
}
- void notifyFinalized(
- std::unique_ptr<JITLinkMemoryManager::Allocation> A) override {
+ void notifyFinalized(JITLinkMemoryManager::FinalizedAlloc A) override {
if (auto Err = Layer.notifyEmitted(*MR, std::move(A))) {
Layer.getExecutionSession().reportError(std::move(Err));
MR->failMaterialization();
@@ -414,7 +423,8 @@ private:
std::vector<std::pair<SymbolStringPtr, Symbol *>> NameToSym;
auto ProcessSymbol = [&](Symbol *Sym) {
- if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) {
+ if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak &&
+ Sym->getScope() != Scope::Local) {
auto Name = ES.intern(Sym->getName());
if (!MR->getSymbols().count(ES.intern(Sym->getName()))) {
JITSymbolFlags SF = JITSymbolFlags::Weak;
@@ -543,8 +553,7 @@ private:
// Propagate block-level dependencies through the block-dependence graph.
while (!WorkList.empty()) {
- auto *B = WorkList.back();
- WorkList.pop_back();
+ auto *B = WorkList.pop_back_val();
auto &BI = BlockInfos[B];
assert(BI.DependenciesChanged &&
@@ -672,7 +681,7 @@ void ObjectLinkingLayer::notifyLoaded(MaterializationResponsibility &MR) {
}
Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
- AllocPtr Alloc) {
+ FinalizedAlloc FA) {
Error Err = Error::success();
for (auto &P : Plugins)
Err = joinErrors(std::move(Err), P->notifyEmitted(MR));
@@ -681,17 +690,20 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
return Err;
return MR.withResourceKeyDo(
- [&](ResourceKey K) { Allocs[K].push_back(std::move(Alloc)); });
+ [&](ResourceKey K) { Allocs[K].push_back(std::move(FA)); });
}
Error ObjectLinkingLayer::handleRemoveResources(ResourceKey K) {
- Error Err = Error::success();
-
- for (auto &P : Plugins)
- Err = joinErrors(std::move(Err), P->notifyRemovingResources(K));
+ {
+ Error Err = Error::success();
+ for (auto &P : Plugins)
+ Err = joinErrors(std::move(Err), P->notifyRemovingResources(K));
+ if (Err)
+ return Err;
+ }
- std::vector<AllocPtr> AllocsToRemove;
+ std::vector<FinalizedAlloc> AllocsToRemove;
getExecutionSession().runSessionLocked([&] {
auto I = Allocs.find(K);
if (I != Allocs.end()) {
@@ -700,12 +712,10 @@ Error ObjectLinkingLayer::handleRemoveResources(ResourceKey K) {
}
});
- while (!AllocsToRemove.empty()) {
- Err = joinErrors(std::move(Err), AllocsToRemove.back()->deallocate());
- AllocsToRemove.pop_back();
- }
+ if (AllocsToRemove.empty())
+ return Error::success();
- return Err;
+ return MemMgr.deallocate(std::move(AllocsToRemove));
}
void ObjectLinkingLayer::handleTransferResources(ResourceKey DstKey,
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index d6f73a8b0864..673f7394450f 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -619,6 +619,61 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
return LLVMErrorSuccess;
}
+LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, const char *FileName,
+ char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx) {
+ assert(Result && "Result can not be null");
+ assert(FileName && "FileName can not be null");
+ assert((Filter || !FilterCtx) &&
+ "if Filter is null then FilterCtx must also be null");
+
+ DynamicLibrarySearchGenerator::SymbolPredicate Pred;
+ if (Filter)
+ Pred = [=](const SymbolStringPtr &Name) -> bool {
+ return Filter(FilterCtx, wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)));
+ };
+
+ auto LibrarySymsGenerator =
+ DynamicLibrarySearchGenerator::Load(FileName, GlobalPrefix, Pred);
+
+ if (!LibrarySymsGenerator) {
+ *Result = 0;
+ return wrap(LibrarySymsGenerator.takeError());
+ }
+
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
+}
+
+LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer,
+ const char *FileName, const char *TargetTriple) {
+ assert(Result && "Result can not be null");
+ assert(FileName && "Filename can not be null");
+ assert(ObjLayer && "ObjectLayer can not be null");
+
+ if (TargetTriple) {
+ auto TT = Triple(TargetTriple);
+ auto LibrarySymsGenerator =
+ StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName, TT);
+ if (!LibrarySymsGenerator) {
+ *Result = 0;
+ return wrap(LibrarySymsGenerator.takeError());
+ }
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
+ } else {
+ auto LibrarySymsGenerator =
+ StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName);
+ if (!LibrarySymsGenerator) {
+ *Result = 0;
+ return wrap(LibrarySymsGenerator.takeError());
+ }
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
+ }
+}
+
LLVMOrcThreadSafeContextRef LLVMOrcCreateNewThreadSafeContext(void) {
return wrap(new ThreadSafeContext(std::make_unique<LLVMContext>()));
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
new file mode 100644
index 000000000000..02044e4af29a
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -0,0 +1,47 @@
+//===------ OrcRTBridge.cpp - Executor functions for bootstrap -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+namespace llvm {
+namespace orc {
+namespace rt {
+
+const char *SimpleExecutorDylibManagerInstanceName =
+ "__llvm_orc_SimpleExecutorDylibManager_Instance";
+const char *SimpleExecutorDylibManagerOpenWrapperName =
+ "__llvm_orc_SimpleExecutorDylibManager_open_wrapper";
+const char *SimpleExecutorDylibManagerLookupWrapperName =
+ "__llvm_orc_SimpleExecutorDylibManager_lookup_wrapper";
+const char *SimpleExecutorMemoryManagerInstanceName =
+ "__llvm_orc_SimpleExecutorMemoryManager_Instance";
+const char *SimpleExecutorMemoryManagerReserveWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper";
+const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
+const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+const char *MemoryWriteUInt8sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint8s_wrapper";
+const char *MemoryWriteUInt16sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint16s_wrapper";
+const char *MemoryWriteUInt32sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint32s_wrapper";
+const char *MemoryWriteUInt64sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint64s_wrapper";
+const char *MemoryWriteBuffersWrapperName =
+ "__llvm_orc_bootstrap_mem_write_buffers_wrapper";
+const char *RegisterEHFrameSectionCustomDirectWrapperName =
+ "__llvm_orc_bootstrap_register_ehframe_section_custom_direct_wrapper";
+const char *DeregisterEHFrameSectionCustomDirectWrapperName =
+ "__llvm_orc_bootstrap_deregister_ehframe_section_custom_direct_wrapper";
+const char *RunAsMainWrapperName = "__llvm_orc_bootstrap_run_as_main_wrapper";
+
+} // end namespace rt
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp
deleted file mode 100644
index a55cb220f218..000000000000
--- a/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===--------------- RPCError.cpp - RPCERror implementation ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// RPC Error type implmentations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <string>
-#include <system_error>
-
-char llvm::orc::shared::RPCFatalError::ID = 0;
-char llvm::orc::shared::ConnectionClosed::ID = 0;
-char llvm::orc::shared::ResponseAbandoned::ID = 0;
-char llvm::orc::shared::CouldNotNegotiate::ID = 0;
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-std::error_code ConnectionClosed::convertToErrorCode() const {
- return orcError(OrcErrorCode::RPCConnectionClosed);
-}
-
-void ConnectionClosed::log(raw_ostream &OS) const {
- OS << "RPC connection already closed";
-}
-
-std::error_code ResponseAbandoned::convertToErrorCode() const {
- return orcError(OrcErrorCode::RPCResponseAbandoned);
-}
-
-void ResponseAbandoned::log(raw_ostream &OS) const {
- OS << "RPC response abandoned";
-}
-
-CouldNotNegotiate::CouldNotNegotiate(std::string Signature)
- : Signature(std::move(Signature)) {}
-
-std::error_code CouldNotNegotiate::convertToErrorCode() const {
- return orcError(OrcErrorCode::RPCCouldNotNegotiateFunction);
-}
-
-void CouldNotNegotiate::log(raw_ostream &OS) const {
- OS << "Could not negotiate RPC function " << Signature;
-}
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
new file mode 100644
index 000000000000..64fc717b7b56
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
@@ -0,0 +1,250 @@
+//===------ SimpleRemoteEPCUtils.cpp - Utils for Simple Remote EPC --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Message definitions and other utilities for SimpleRemoteEPC and
+// SimpleRemoteEPCServer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+namespace {
+
+struct FDMsgHeader {
+ static constexpr unsigned MsgSizeOffset = 0;
+ static constexpr unsigned OpCOffset = MsgSizeOffset + sizeof(uint64_t);
+ static constexpr unsigned SeqNoOffset = OpCOffset + sizeof(uint64_t);
+ static constexpr unsigned TagAddrOffset = SeqNoOffset + sizeof(uint64_t);
+ static constexpr unsigned Size = TagAddrOffset + sizeof(uint64_t);
+};
+
+} // namespace
+
+namespace llvm {
+namespace orc {
+namespace SimpleRemoteEPCDefaultBootstrapSymbolNames {
+
+const char *ExecutorSessionObjectName =
+ "__llvm_orc_SimpleRemoteEPC_dispatch_ctx";
+const char *DispatchFnName = "__llvm_orc_SimpleRemoteEPC_dispatch_fn";
+
+} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames
+
+SimpleRemoteEPCTransportClient::~SimpleRemoteEPCTransportClient() {}
+SimpleRemoteEPCTransport::~SimpleRemoteEPCTransport() {}
+
+Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+FDSimpleRemoteEPCTransport::Create(SimpleRemoteEPCTransportClient &C, int InFD,
+ int OutFD) {
+#if LLVM_ENABLE_THREADS
+ if (InFD == -1)
+ return make_error<StringError>("Invalid input file descriptor " +
+ Twine(InFD),
+ inconvertibleErrorCode());
+ if (OutFD == -1)
+ return make_error<StringError>("Invalid output file descriptor " +
+ Twine(OutFD),
+ inconvertibleErrorCode());
+ std::unique_ptr<FDSimpleRemoteEPCTransport> FDT(
+ new FDSimpleRemoteEPCTransport(C, InFD, OutFD));
+ return std::move(FDT);
+#else
+ return make_error<StringError>("FD-based SimpleRemoteEPC transport requires "
+ "thread support, but llvm was built with "
+ "LLVM_ENABLE_THREADS=Off",
+ inconvertibleErrorCode());
+#endif
+}
+
+FDSimpleRemoteEPCTransport::~FDSimpleRemoteEPCTransport() {
+#if LLVM_ENABLE_THREADS
+ ListenerThread.join();
+#endif
+}
+
+Error FDSimpleRemoteEPCTransport::start() {
+#if LLVM_ENABLE_THREADS
+ ListenerThread = std::thread([this]() { listenLoop(); });
+ return Error::success();
+#endif
+ llvm_unreachable("Should not be called with LLVM_ENABLE_THREADS=Off");
+}
+
+Error FDSimpleRemoteEPCTransport::sendMessage(SimpleRemoteEPCOpcode OpC,
+ uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ ArrayRef<char> ArgBytes) {
+ char HeaderBuffer[FDMsgHeader::Size];
+
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::MsgSizeOffset)) =
+ FDMsgHeader::Size + ArgBytes.size();
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::OpCOffset)) =
+ static_cast<uint64_t>(OpC);
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::SeqNoOffset)) = SeqNo;
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::TagAddrOffset)) =
+ TagAddr.getValue();
+
+ std::lock_guard<std::mutex> Lock(M);
+ if (Disconnected)
+ return make_error<StringError>("FD-transport disconnected",
+ inconvertibleErrorCode());
+ if (int ErrNo = writeBytes(HeaderBuffer, FDMsgHeader::Size))
+ return errorCodeToError(std::error_code(ErrNo, std::generic_category()));
+ if (int ErrNo = writeBytes(ArgBytes.data(), ArgBytes.size()))
+ return errorCodeToError(std::error_code(ErrNo, std::generic_category()));
+ return Error::success();
+}
+
+void FDSimpleRemoteEPCTransport::disconnect() {
+ if (Disconnected)
+ return; // Return if already disconnected.
+
+ Disconnected = true;
+ bool CloseOutFD = InFD != OutFD;
+
+ // Close InFD.
+ while (close(InFD) == -1) {
+ if (errno == EBADF)
+ break;
+ }
+
+ // Close OutFD.
+ if (CloseOutFD) {
+ while (close(OutFD) == -1) {
+ if (errno == EBADF)
+ break;
+ }
+ }
+}
+
+static Error makeUnexpectedEOFError() {
+ return make_error<StringError>("Unexpected end-of-file",
+ inconvertibleErrorCode());
+}
+
+Error FDSimpleRemoteEPCTransport::readBytes(char *Dst, size_t Size,
+ bool *IsEOF) {
+ assert(Dst && "Attempt to read into null.");
+ ssize_t Completed = 0;
+ while (Completed < static_cast<ssize_t>(Size)) {
+ ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
+ if (Read <= 0) {
+ auto ErrNo = errno;
+ if (Read == 0) {
+ if (Completed == 0 && IsEOF) {
+ *IsEOF = true;
+ return Error::success();
+ } else
+ return makeUnexpectedEOFError();
+ } else if (ErrNo == EAGAIN || ErrNo == EINTR)
+ continue;
+ else {
+ std::lock_guard<std::mutex> Lock(M);
+ if (Disconnected && IsEOF) { // disconnect called, pretend this is EOF.
+ *IsEOF = true;
+ return Error::success();
+ }
+ return errorCodeToError(
+ std::error_code(ErrNo, std::generic_category()));
+ }
+ }
+ Completed += Read;
+ }
+ return Error::success();
+}
+
+int FDSimpleRemoteEPCTransport::writeBytes(const char *Src, size_t Size) {
+ assert(Src && "Attempt to append from null.");
+ ssize_t Completed = 0;
+ while (Completed < static_cast<ssize_t>(Size)) {
+ ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
+ if (Written < 0) {
+ auto ErrNo = errno;
+ if (ErrNo == EAGAIN || ErrNo == EINTR)
+ continue;
+ else
+ return ErrNo;
+ }
+ Completed += Written;
+ }
+ return 0;
+}
+
+void FDSimpleRemoteEPCTransport::listenLoop() {
+ Error Err = Error::success();
+ do {
+
+ char HeaderBuffer[FDMsgHeader::Size];
+ // Read the header buffer.
+ {
+ bool IsEOF = false;
+ if (auto Err2 = readBytes(HeaderBuffer, FDMsgHeader::Size, &IsEOF)) {
+ Err = joinErrors(std::move(Err), std::move(Err2));
+ break;
+ }
+ if (IsEOF)
+ break;
+ }
+
+ // Decode header buffer.
+ uint64_t MsgSize;
+ SimpleRemoteEPCOpcode OpC;
+ uint64_t SeqNo;
+ ExecutorAddr TagAddr;
+
+ MsgSize =
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::MsgSizeOffset));
+ OpC = static_cast<SimpleRemoteEPCOpcode>(static_cast<uint64_t>(
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::OpCOffset))));
+ SeqNo =
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::SeqNoOffset));
+ TagAddr.setValue(
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::TagAddrOffset)));
+
+ if (MsgSize < FDMsgHeader::Size) {
+ Err = joinErrors(std::move(Err),
+ make_error<StringError>("Message size too small",
+ inconvertibleErrorCode()));
+ break;
+ }
+
+ // Read the argument bytes.
+ SimpleRemoteEPCArgBytesVector ArgBytes;
+ ArgBytes.resize(MsgSize - FDMsgHeader::Size);
+ if (auto Err2 = readBytes(ArgBytes.data(), ArgBytes.size())) {
+ Err = joinErrors(std::move(Err), std::move(Err2));
+ break;
+ }
+
+ if (auto Action = C.handleMessage(OpC, SeqNo, TagAddr, ArgBytes)) {
+ if (*Action == SimpleRemoteEPCTransportClient::EndSession)
+ break;
+ } else {
+ Err = joinErrors(std::move(Err), Action.takeError());
+ break;
+ }
+ } while (true);
+
+ // Attempt to close FDs, set Disconnected to true so that subsequent
+ // sendMessage calls fail.
+ disconnect();
+
+ // Call up to the client to handle the disconnection.
+ C.handleDisconnect(std::move(Err));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
new file mode 100644
index 000000000000..47364a92a451
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -0,0 +1,406 @@
+//===------- SimpleRemoteEPC.cpp -- Simple remote executor control --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+SimpleRemoteEPC::~SimpleRemoteEPC() {
+#ifndef NDEBUG
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ assert(Disconnected && "Destroyed without disconnection");
+#endif // NDEBUG
+}
+
+Expected<tpctypes::DylibHandle>
+SimpleRemoteEPC::loadDylib(const char *DylibPath) {
+ return DylibMgr->open(DylibPath, 0);
+}
+
+Expected<std::vector<tpctypes::LookupResult>>
+SimpleRemoteEPC::lookupSymbols(ArrayRef<LookupRequest> Request) {
+ std::vector<tpctypes::LookupResult> Result;
+
+ for (auto &Element : Request) {
+ if (auto R = DylibMgr->lookup(Element.Handle, Element.Symbols)) {
+ Result.push_back({});
+ Result.back().reserve(R->size());
+ for (auto Addr : *R)
+ Result.back().push_back(Addr.getValue());
+ } else
+ return R.takeError();
+ }
+ return std::move(Result);
+}
+
+Expected<int32_t> SimpleRemoteEPC::runAsMain(ExecutorAddr MainFnAddr,
+ ArrayRef<std::string> Args) {
+ int64_t Result = 0;
+ if (auto Err = callSPSWrapper<rt::SPSRunAsMainSignature>(
+ RunAsMainAddr, Result, ExecutorAddr(MainFnAddr), Args))
+ return std::move(Err);
+ return Result;
+}
+
+void SimpleRemoteEPC::callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
+ ArrayRef<char> ArgBuffer) {
+ uint64_t SeqNo;
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ SeqNo = getNextSeqNo();
+ assert(!PendingCallWrapperResults.count(SeqNo) && "SeqNo already in use");
+ PendingCallWrapperResults[SeqNo] = std::move(OnComplete);
+ }
+
+ if (auto Err = sendMessage(SimpleRemoteEPCOpcode::CallWrapper, SeqNo,
+ WrapperFnAddr, ArgBuffer)) {
+ IncomingWFRHandler H;
+
+ // We just registered OnComplete, but there may be a race between this
+ // thread returning from sendMessage and handleDisconnect being called from
+ // the transport's listener thread. If handleDisconnect gets there first
+ // then it will have failed 'H' for us. If we get there first (or if
+ // handleDisconnect already ran) then we need to take care of it.
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ auto I = PendingCallWrapperResults.find(SeqNo);
+ if (I != PendingCallWrapperResults.end()) {
+ H = std::move(I->second);
+ PendingCallWrapperResults.erase(I);
+ }
+ }
+
+ if (H)
+ H(shared::WrapperFunctionResult::createOutOfBandError("disconnecting"));
+
+ getExecutionSession().reportError(std::move(Err));
+ }
+}
+
+Error SimpleRemoteEPC::disconnect() {
+ T->disconnect();
+ D->shutdown();
+ std::unique_lock<std::mutex> Lock(SimpleRemoteEPCMutex);
+ DisconnectCV.wait(Lock, [this] { return Disconnected; });
+ return std::move(DisconnectErr);
+}
+
+Expected<SimpleRemoteEPCTransportClient::HandleMessageAction>
+SimpleRemoteEPC::handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC::handleMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ dbgs() << "Setup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+
+ using UT = std::underlying_type_t<SimpleRemoteEPCOpcode>;
+ if (static_cast<UT>(OpC) > static_cast<UT>(SimpleRemoteEPCOpcode::LastOpC))
+ return make_error<StringError>("Unexpected opcode",
+ inconvertibleErrorCode());
+
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ if (auto Err = handleSetup(SeqNo, TagAddr, std::move(ArgBytes)))
+ return std::move(Err);
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ T->disconnect();
+ if (auto Err = handleHangup(std::move(ArgBytes)))
+ return std::move(Err);
+ return EndSession;
+ case SimpleRemoteEPCOpcode::Result:
+ if (auto Err = handleResult(SeqNo, TagAddr, std::move(ArgBytes)))
+ return std::move(Err);
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ handleCallWrapper(SeqNo, TagAddr, std::move(ArgBytes));
+ break;
+ }
+ return ContinueSession;
+}
+
+void SimpleRemoteEPC::handleDisconnect(Error Err) {
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC::handleDisconnect: "
+ << (Err ? "failure" : "success") << "\n";
+ });
+
+ PendingCallWrapperResultsMap TmpPending;
+
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ std::swap(TmpPending, PendingCallWrapperResults);
+ }
+
+ for (auto &KV : TmpPending)
+ KV.second(
+ shared::WrapperFunctionResult::createOutOfBandError("disconnecting"));
+
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ DisconnectErr = joinErrors(std::move(DisconnectErr), std::move(Err));
+ Disconnected = true;
+ DisconnectCV.notify_all();
+}
+
+Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>
+SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) {
+ EPCGenericJITLinkMemoryManager::SymbolAddrs SAs;
+ if (auto Err = SREPC.getBootstrapSymbols(
+ {{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName},
+ {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
+ {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
+ {SAs.Deallocate,
+ rt::SimpleExecutorMemoryManagerDeallocateWrapperName}}))
+ return std::move(Err);
+
+ return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs);
+}
+
+Expected<std::unique_ptr<ExecutorProcessControl::MemoryAccess>>
+SimpleRemoteEPC::createDefaultMemoryAccess(SimpleRemoteEPC &SREPC) {
+ return nullptr;
+}
+
+Error SimpleRemoteEPC::sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ ArrayRef<char> ArgBytes) {
+ assert(OpC != SimpleRemoteEPCOpcode::Setup &&
+ "SimpleRemoteEPC sending Setup message? That's the wrong direction.");
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC::sendMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ default:
+ llvm_unreachable("Invalid opcode");
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+ auto Err = T->sendMessage(OpC, SeqNo, TagAddr, ArgBytes);
+ LLVM_DEBUG({
+ if (Err)
+ dbgs() << " \\--> SimpleRemoteEPC::sendMessage failed\n";
+ });
+ return Err;
+}
+
+Error SimpleRemoteEPC::handleSetup(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ if (SeqNo != 0)
+ return make_error<StringError>("Setup packet SeqNo not zero",
+ inconvertibleErrorCode());
+
+ if (TagAddr)
+ return make_error<StringError>("Setup packet TagAddr not zero",
+ inconvertibleErrorCode());
+
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ auto I = PendingCallWrapperResults.find(0);
+ assert(PendingCallWrapperResults.size() == 1 &&
+ I != PendingCallWrapperResults.end() &&
+ "Setup message handler not connectly set up");
+ auto SetupMsgHandler = std::move(I->second);
+ PendingCallWrapperResults.erase(I);
+
+ auto WFR =
+ shared::WrapperFunctionResult::copyFrom(ArgBytes.data(), ArgBytes.size());
+ SetupMsgHandler(std::move(WFR));
+ return Error::success();
+}
+
+Error SimpleRemoteEPC::setup(Setup S) {
+ using namespace SimpleRemoteEPCDefaultBootstrapSymbolNames;
+
+ std::promise<MSVCPExpected<SimpleRemoteEPCExecutorInfo>> EIP;
+ auto EIF = EIP.get_future();
+
+ // Prepare a handler for the setup packet.
+ PendingCallWrapperResults[0] =
+ RunInPlace()(
+ [&](shared::WrapperFunctionResult SetupMsgBytes) {
+ if (const char *ErrMsg = SetupMsgBytes.getOutOfBandError()) {
+ EIP.set_value(
+ make_error<StringError>(ErrMsg, inconvertibleErrorCode()));
+ return;
+ }
+ using SPSSerialize =
+ shared::SPSArgList<shared::SPSSimpleRemoteEPCExecutorInfo>;
+ shared::SPSInputBuffer IB(SetupMsgBytes.data(), SetupMsgBytes.size());
+ SimpleRemoteEPCExecutorInfo EI;
+ if (SPSSerialize::deserialize(IB, EI))
+ EIP.set_value(EI);
+ else
+ EIP.set_value(make_error<StringError>(
+ "Could not deserialize setup message", inconvertibleErrorCode()));
+ });
+
+ // Start the transport.
+ if (auto Err = T->start())
+ return Err;
+
+ // Wait for setup packet to arrive.
+ auto EI = EIF.get();
+ if (!EI) {
+ T->disconnect();
+ return EI.takeError();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC received setup message:\n"
+ << " Triple: " << EI->TargetTriple << "\n"
+ << " Page size: " << EI->PageSize << "\n"
+ << " Bootstrap symbols:\n";
+ for (const auto &KV : EI->BootstrapSymbols)
+ dbgs() << " " << KV.first() << ": "
+ << formatv("{0:x16}", KV.second.getValue()) << "\n";
+ });
+ TargetTriple = Triple(EI->TargetTriple);
+ PageSize = EI->PageSize;
+ BootstrapSymbols = std::move(EI->BootstrapSymbols);
+
+ if (auto Err = getBootstrapSymbols(
+ {{JDI.JITDispatchContext, ExecutorSessionObjectName},
+ {JDI.JITDispatchFunction, DispatchFnName},
+ {RunAsMainAddr, rt::RunAsMainWrapperName}}))
+ return Err;
+
+ if (auto DM =
+ EPCGenericDylibManager::CreateWithDefaultBootstrapSymbols(*this))
+ DylibMgr = std::make_unique<EPCGenericDylibManager>(std::move(*DM));
+ else
+ return DM.takeError();
+
+ // Set a default CreateMemoryManager if none is specified.
+ if (!S.CreateMemoryManager)
+ S.CreateMemoryManager = createDefaultMemoryManager;
+
+ if (auto MemMgr = S.CreateMemoryManager(*this)) {
+ OwnedMemMgr = std::move(*MemMgr);
+ this->MemMgr = OwnedMemMgr.get();
+ } else
+ return MemMgr.takeError();
+
+ // Set a default CreateMemoryAccess if none is specified.
+ if (!S.CreateMemoryAccess)
+ S.CreateMemoryAccess = createDefaultMemoryAccess;
+
+ if (auto MemAccess = S.CreateMemoryAccess(*this)) {
+ OwnedMemAccess = std::move(*MemAccess);
+ this->MemAccess = OwnedMemAccess.get();
+ } else
+ return MemAccess.takeError();
+
+ return Error::success();
+}
+
+Error SimpleRemoteEPC::handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ IncomingWFRHandler SendResult;
+
+ if (TagAddr)
+ return make_error<StringError>("Unexpected TagAddr in result message",
+ inconvertibleErrorCode());
+
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ auto I = PendingCallWrapperResults.find(SeqNo);
+ if (I == PendingCallWrapperResults.end())
+ return make_error<StringError>("No call for sequence number " +
+ Twine(SeqNo),
+ inconvertibleErrorCode());
+ SendResult = std::move(I->second);
+ PendingCallWrapperResults.erase(I);
+ releaseSeqNo(SeqNo);
+ }
+
+ auto WFR =
+ shared::WrapperFunctionResult::copyFrom(ArgBytes.data(), ArgBytes.size());
+ SendResult(std::move(WFR));
+ return Error::success();
+}
+
+void SimpleRemoteEPC::handleCallWrapper(
+ uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ assert(ES && "No ExecutionSession attached");
+ D->dispatch(makeGenericNamedTask(
+ [this, RemoteSeqNo, TagAddr, ArgBytes = std::move(ArgBytes)]() {
+ ES->runJITDispatchHandler(
+ [this, RemoteSeqNo](shared::WrapperFunctionResult WFR) {
+ if (auto Err =
+ sendMessage(SimpleRemoteEPCOpcode::Result, RemoteSeqNo,
+ ExecutorAddr(), {WFR.data(), WFR.size()}))
+ getExecutionSession().reportError(std::move(Err));
+ },
+ TagAddr.getValue(), ArgBytes);
+ },
+ "callWrapper task"));
+}
+
+Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) {
+ using namespace llvm::orc::shared;
+ auto WFR = WrapperFunctionResult::copyFrom(ArgBytes.data(), ArgBytes.size());
+ if (const char *ErrMsg = WFR.getOutOfBandError())
+ return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
+
+ detail::SPSSerializableError Info;
+ SPSInputBuffer IB(WFR.data(), WFR.size());
+ if (!SPSArgList<SPSError>::deserialize(IB, Info))
+ return make_error<StringError>("Could not deserialize hangup info",
+ inconvertibleErrorCode());
+ return fromSPSSerializable(std::move(Info));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
index 43c2a44835fd..4c15e25b1d89 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
@@ -10,6 +10,7 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ManagedStatic.h"
#include <cstdint>
@@ -64,14 +65,23 @@ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() {
}
using namespace llvm;
+using namespace llvm::orc;
// Serialize rendezvous with the debugger as well as access to shared data.
ManagedStatic<std::mutex> JITDebugLock;
// Register debug object, return error message or null for success.
-static void registerJITLoaderGDBImpl(JITTargetAddress Addr, uint64_t Size) {
+static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) {
+ LLVM_DEBUG({
+ dbgs() << "Registering debug object with GDB JIT interface "
+ << formatv("([{0:x16} -- {1:x16}])",
+ reinterpret_cast<uintptr_t>(ObjAddr),
+ reinterpret_cast<uintptr_t>(ObjAddr + Size))
+ << "\n";
+ });
+
jit_code_entry *E = new jit_code_entry;
- E->symfile_addr = jitTargetAddressToPointer<const char *>(Addr);
+ E->symfile_addr = ObjAddr;
E->symfile_size = Size;
E->prev_entry = nullptr;
@@ -92,10 +102,26 @@ static void registerJITLoaderGDBImpl(JITTargetAddress Addr, uint64_t Size) {
__jit_debug_register_code();
}
-extern "C" orc::shared::detail::CWrapperFunctionResult
+extern "C" orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(nullptr, 0,
+ [=]() -> Error {
+ registerJITLoaderGDBImpl(Data,
+ Size);
+ return Error::success();
+ })
+ .release();
+}
+
+extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size) {
using namespace orc::shared;
- return WrapperFunction<void(SPSExecutorAddress, uint64_t)>::handle(
- Data, Size, registerJITLoaderGDBImpl)
+ return WrapperFunction<void(SPSExecutorAddrRange)>::handle(
+ Data, Size,
+ [](ExecutorAddrRange R) {
+ registerJITLoaderGDBImpl(R.Start.toPtr<char *>(),
+ R.size().getValue());
+ })
.release();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp
new file mode 100644
index 000000000000..82aa62a0c0d9
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp
@@ -0,0 +1,84 @@
+//===------------------------ OrcRTBootstrap.cpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcRTBootstrap.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm::orc::shared;
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+template <typename WriteT, typename SPSWriteT>
+static llvm::orc::shared::CWrapperFunctionResult
+writeUIntsWrapper(const char *ArgData, size_t ArgSize) {
+ return WrapperFunction<void(SPSSequence<SPSWriteT>)>::handle(
+ ArgData, ArgSize,
+ [](std::vector<WriteT> Ws) {
+ for (auto &W : Ws)
+ *W.Addr.template toPtr<decltype(W.Value) *>() = W.Value;
+ })
+ .release();
+}
+
+static llvm::orc::shared::CWrapperFunctionResult
+writeBuffersWrapper(const char *ArgData, size_t ArgSize) {
+ return WrapperFunction<void(SPSSequence<SPSMemoryAccessBufferWrite>)>::handle(
+ ArgData, ArgSize,
+ [](std::vector<tpctypes::BufferWrite> Ws) {
+ for (auto &W : Ws)
+ memcpy(W.Addr.template toPtr<char *>(), W.Buffer.data(),
+ W.Buffer.size());
+ })
+ .release();
+}
+
+static llvm::orc::shared::CWrapperFunctionResult
+runAsMainWrapper(const char *ArgData, size_t ArgSize) {
+ return WrapperFunction<rt::SPSRunAsMainSignature>::handle(
+ ArgData, ArgSize,
+ [](ExecutorAddr MainAddr,
+ std::vector<std::string> Args) -> int64_t {
+ return runAsMain(MainAddr.toPtr<int (*)(int, char *[])>(), Args);
+ })
+ .release();
+}
+
+void addTo(StringMap<ExecutorAddr> &M) {
+ M[rt::MemoryWriteUInt8sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt8Write,
+ shared::SPSMemoryAccessUInt8Write>);
+ M[rt::MemoryWriteUInt16sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt16Write,
+ shared::SPSMemoryAccessUInt16Write>);
+ M[rt::MemoryWriteUInt32sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt32Write,
+ shared::SPSMemoryAccessUInt32Write>);
+ M[rt::MemoryWriteUInt64sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt64Write,
+ shared::SPSMemoryAccessUInt64Write>);
+ M[rt::MemoryWriteBuffersWrapperName] =
+ ExecutorAddr::fromPtr(&writeBuffersWrapper);
+ M[rt::RegisterEHFrameSectionCustomDirectWrapperName] = ExecutorAddr::fromPtr(
+ &llvm_orc_registerEHFrameSectionCustomDirectWrapper);
+ M[rt::DeregisterEHFrameSectionCustomDirectWrapperName] =
+ ExecutorAddr::fromPtr(
+ &llvm_orc_deregisterEHFrameSectionCustomDirectWrapper);
+ M[rt::RunAsMainWrapperName] = ExecutorAddr::fromPtr(&runAsMainWrapper);
+}
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h
new file mode 100644
index 000000000000..6b7ff79a3efc
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h
@@ -0,0 +1,36 @@
+//===----------------------- OrcRTBootstrap.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// OrcRTPrelinkImpl provides functions that should be linked into the executor
+// to bootstrap common JIT functionality (e.g. memory allocation and memory
+// access).
+//
+// Call rt_impl::addTo to add these functions to a bootstrap symbols map.
+//
+// FIXME: The functionality in this file should probably be moved to an ORC
+// runtime bootstrap library in compiler-rt.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRTBOOTSTRAP_H
+#define LIB_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRTBOOTSTRAP_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+void addTo(StringMap<ExecutorAddr> &M);
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRTBOOTSTRAP_H
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
index 4a408d61ee38..e331bad84200 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
@@ -1,9 +1,8 @@
//===--------- RegisterEHFrames.cpp - Register EH frame sections ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -86,11 +85,11 @@ static Error deregisterFrameWrapper(const void *P) {
}
#endif
-#ifdef __APPLE__
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
template <typename HandleFDEFn>
-Error walkAppleEHFrameSection(const char *const SectionStart,
- size_t SectionSize, HandleFDEFn HandleFDE) {
+Error walkLibunwindEHFrameSection(const char *const SectionStart,
+ size_t SectionSize, HandleFDEFn HandleFDE) {
const char *CurCFIRecord = SectionStart;
const char *End = SectionStart + SectionSize;
uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
@@ -124,16 +123,19 @@ Error walkAppleEHFrameSection(const char *const SectionStart,
return Error::success();
}
-#endif // __APPLE__
+#endif // HAVE_UNW_ADD_DYNAMIC_FDE || __APPLE__
Error registerEHFrameSection(const void *EHFrameSectionAddr,
size_t EHFrameSectionSize) {
-#ifdef __APPLE__
- // On Darwin __register_frame has to be called for each FDE entry.
- return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
- EHFrameSectionSize, registerFrameWrapper);
+ /* libgcc and libunwind __register_frame behave differently. We use the
+ * presence of __unw_add_dynamic_fde to detect libunwind. */
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
+ // With libunwind, __register_frame has to be called for each FDE entry.
+ return walkLibunwindEHFrameSection(
+ static_cast<const char *>(EHFrameSectionAddr), EHFrameSectionSize,
+ registerFrameWrapper);
#else
- // On Linux __register_frame takes a single argument:
+ // With libgcc, __register_frame takes a single argument:
// a pointer to the start of the .eh_frame section.
// How can it find the end? Because crtendS.o is linked
@@ -144,9 +146,10 @@ Error registerEHFrameSection(const void *EHFrameSectionAddr,
Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
size_t EHFrameSectionSize) {
-#ifdef __APPLE__
- return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
- EHFrameSectionSize, deregisterFrameWrapper);
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
+ return walkLibunwindEHFrameSection(
+ static_cast<const char *>(EHFrameSectionAddr), EHFrameSectionSize,
+ deregisterFrameWrapper);
#else
return deregisterFrameWrapper(EHFrameSectionAddr);
#endif
@@ -155,26 +158,42 @@ Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
} // end namespace orc
} // end namespace llvm
-static Error registerEHFrameWrapper(JITTargetAddress Addr, uint64_t Size) {
- return llvm::orc::registerEHFrameSection(
- jitTargetAddressToPointer<const void *>(Addr), Size);
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size) {
+ if (auto Err = registerEHFrameSection(EHFrameSectionAddr, Size))
+ return WrapperFunctionResult::createOutOfBandError(toString(std::move(Err)))
+ .release();
+ return llvm::orc::shared::CWrapperFunctionResult();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size) {
+ if (auto Err = deregisterEHFrameSection(EHFrameSectionAddr, Size))
+ return WrapperFunctionResult::createOutOfBandError(toString(std::move(Err)))
+ .release();
+ return llvm::orc::shared::CWrapperFunctionResult();
+}
+
+static Error registerEHFrameWrapper(ExecutorAddr Addr, uint64_t Size) {
+ return llvm::orc::registerEHFrameSection(Addr.toPtr<const void *>(), Size);
}
-static Error deregisterEHFrameWrapper(JITTargetAddress Addr, uint64_t Size) {
- return llvm::orc::deregisterEHFrameSection(
- jitTargetAddressToPointer<const void *>(Addr), Size);
+static Error deregisterEHFrameWrapper(ExecutorAddr Addr, uint64_t Size) {
+ return llvm::orc::deregisterEHFrameSection(Addr.toPtr<const void *>(), Size);
}
-extern "C" orc::shared::detail::CWrapperFunctionResult
+extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size) {
- return WrapperFunction<SPSError(SPSExecutorAddress, uint64_t)>::handle(
+ return WrapperFunction<SPSError(SPSExecutorAddr, uint64_t)>::handle(
Data, Size, registerEHFrameWrapper)
.release();
}
-extern "C" orc::shared::detail::CWrapperFunctionResult
+extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size) {
- return WrapperFunction<SPSError(SPSExecutorAddress, uint64_t)>::handle(
+ return WrapperFunction<SPSError(SPSExecutorAddr, uint64_t)>::handle(
Data, Size, deregisterEHFrameWrapper)
.release();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp
new file mode 100644
index 000000000000..3c9dd21b0832
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp
@@ -0,0 +1,129 @@
+//===--- SimpleExecutorDylibManager.cpp - Executor-side dylib management --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+SimpleExecutorDylibManager::~SimpleExecutorDylibManager() {
+ assert(Dylibs.empty() && "shutdown not called?");
+}
+
+Expected<tpctypes::DylibHandle>
+SimpleExecutorDylibManager::open(const std::string &Path, uint64_t Mode) {
+ if (Mode != 0)
+ return make_error<StringError>("open: non-zero mode bits not yet supported",
+ inconvertibleErrorCode());
+
+ const char *PathCStr = Path.empty() ? nullptr : Path.c_str();
+ std::string ErrMsg;
+
+ auto DL = sys::DynamicLibrary::getPermanentLibrary(PathCStr, &ErrMsg);
+ if (!DL.isValid())
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+
+ std::lock_guard<std::mutex> Lock(M);
+ Dylibs[NextId] = std::move(DL);
+ return NextId++;
+}
+
+Expected<std::vector<ExecutorAddr>>
+SimpleExecutorDylibManager::lookup(tpctypes::DylibHandle H,
+ const RemoteSymbolLookupSet &L) {
+ std::vector<ExecutorAddr> Result;
+
+ std::lock_guard<std::mutex> Lock(M);
+ auto I = Dylibs.find(H);
+ if (I == Dylibs.end())
+ return make_error<StringError>("No dylib for handle " + formatv("{0:x}", H),
+ inconvertibleErrorCode());
+ auto &DL = I->second;
+
+ for (const auto &E : L) {
+
+ if (E.Name.empty()) {
+ if (E.Required)
+ return make_error<StringError>("Required address for empty symbol \"\"",
+ inconvertibleErrorCode());
+ else
+ Result.push_back(ExecutorAddr());
+ } else {
+
+ const char *DemangledSymName = E.Name.c_str();
+#ifdef __APPLE__
+ if (E.Name.front() != '_')
+ return make_error<StringError>(Twine("MachO symbol \"") + E.Name +
+ "\" missing leading '_'",
+ inconvertibleErrorCode());
+ ++DemangledSymName;
+#endif
+
+ void *Addr = DL.getAddressOfSymbol(DemangledSymName);
+ if (!Addr && E.Required)
+ return make_error<StringError>(Twine("Missing definition for ") +
+ DemangledSymName,
+ inconvertibleErrorCode());
+
+ Result.push_back(ExecutorAddr::fromPtr(Addr));
+ }
+ }
+
+ return Result;
+}
+
+Error SimpleExecutorDylibManager::shutdown() {
+
+ DylibsMap DM;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ std::swap(DM, Dylibs);
+ }
+
+ // There is no removal of dylibs at the moment, so nothing to do here.
+ return Error::success();
+}
+
+void SimpleExecutorDylibManager::addBootstrapSymbols(
+ StringMap<ExecutorAddr> &M) {
+ M[rt::SimpleExecutorDylibManagerInstanceName] = ExecutorAddr::fromPtr(this);
+ M[rt::SimpleExecutorDylibManagerOpenWrapperName] =
+ ExecutorAddr::fromPtr(&openWrapper);
+ M[rt::SimpleExecutorDylibManagerLookupWrapperName] =
+ ExecutorAddr::fromPtr(&lookupWrapper);
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorDylibManager::openWrapper(const char *ArgData, size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleExecutorDylibManagerOpenSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorDylibManager::open))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorDylibManager::lookupWrapper(const char *ArgData, size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleExecutorDylibManagerLookupSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorDylibManager::lookup))
+ .release();
+}
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
new file mode 100644
index 000000000000..232340c22a32
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -0,0 +1,261 @@
+//===- SimpleExecuorMemoryManagare.cpp - Simple executor-side memory mgmt -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() {
+ assert(Allocations.empty() && "shutdown not called?");
+}
+
+Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) {
+ std::error_code EC;
+ auto MB = sys::Memory::allocateMappedMemory(
+ Size, 0, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
+ if (EC)
+ return errorCodeToError(EC);
+ std::lock_guard<std::mutex> Lock(M);
+ assert(!Allocations.count(MB.base()) && "Duplicate allocation addr");
+ Allocations[MB.base()].Size = Size;
+ return ExecutorAddr::fromPtr(MB.base());
+}
+
+Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
+ ExecutorAddr Base(~0ULL);
+ std::vector<tpctypes::WrapperFunctionCall> DeallocationActions;
+ size_t SuccessfulFinalizationActions = 0;
+
+ if (FR.Segments.empty()) {
+ // NOTE: Finalizing nothing is currently a no-op. Should it be an error?
+ if (FR.Actions.empty())
+ return Error::success();
+ else
+ return make_error<StringError>("Finalization actions attached to empty "
+ "finalization request",
+ inconvertibleErrorCode());
+ }
+
+ for (auto &Seg : FR.Segments)
+ Base = std::min(Base, Seg.Addr);
+
+ for (auto &ActPair : FR.Actions)
+ if (ActPair.Deallocate.Func)
+ DeallocationActions.push_back(ActPair.Deallocate);
+
+ // Get the Allocation for this finalization.
+ size_t AllocSize = 0;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto I = Allocations.find(Base.toPtr<void *>());
+ if (I == Allocations.end())
+ return make_error<StringError>("Attempt to finalize unrecognized "
+ "allocation " +
+ formatv("{0:x}", Base.getValue()),
+ inconvertibleErrorCode());
+ AllocSize = I->second.Size;
+ I->second.DeallocationActions = std::move(DeallocationActions);
+ }
+ ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize);
+
+ // Bail-out function: this will run deallocation actions corresponding to any
+ // completed finalization actions, then deallocate memory.
+ auto BailOut = [&](Error Err) {
+ std::pair<void *, Allocation> AllocToDestroy;
+
+ // Get allocation to destory.
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto I = Allocations.find(Base.toPtr<void *>());
+
+ // Check for missing allocation (effective a double free).
+ if (I == Allocations.end())
+ return joinErrors(
+ std::move(Err),
+ make_error<StringError>("No allocation entry found "
+ "for " +
+ formatv("{0:x}", Base.getValue()),
+ inconvertibleErrorCode()));
+ AllocToDestroy = std::move(*I);
+ Allocations.erase(I);
+ }
+
+ // Run deallocation actions for all completed finalization actions.
+ while (SuccessfulFinalizationActions)
+ Err =
+ joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions]
+ .Deallocate.runWithSPSRet());
+
+ // Deallocate memory.
+ sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size);
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+
+ return Err;
+ };
+
+ // Copy content and apply permissions.
+ for (auto &Seg : FR.Segments) {
+
+ // Check segment ranges.
+ if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size()))
+ return BailOut(make_error<StringError>(
+ formatv("Segment {0:x} content size ({1:x} bytes) "
+ "exceeds segment size ({2:x} bytes)",
+ Seg.Addr.getValue(), Seg.Content.size(), Seg.Size),
+ inconvertibleErrorCode()));
+ ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size);
+ if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd))
+ return BailOut(make_error<StringError>(
+ formatv("Segment {0:x} -- {1:x} crosses boundary of "
+ "allocation {2:x} -- {3:x}",
+ Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(),
+ AllocEnd.getValue()),
+ inconvertibleErrorCode()));
+
+ char *Mem = Seg.Addr.toPtr<char *>();
+ memcpy(Mem, Seg.Content.data(), Seg.Content.size());
+ memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
+ assert(Seg.Size <= std::numeric_limits<size_t>::max());
+ if (auto EC = sys::Memory::protectMappedMemory(
+ {Mem, static_cast<size_t>(Seg.Size)},
+ tpctypes::fromWireProtectionFlags(Seg.Prot)))
+ return BailOut(errorCodeToError(EC));
+ if (Seg.Prot & tpctypes::WPF_Exec)
+ sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
+ }
+
+ // Run finalization actions.
+ for (auto &ActPair : FR.Actions) {
+ if (auto Err = ActPair.Finalize.runWithSPSRet())
+ return BailOut(std::move(Err));
+ ++SuccessfulFinalizationActions;
+ }
+
+ return Error::success();
+}
+
+Error SimpleExecutorMemoryManager::deallocate(
+ const std::vector<ExecutorAddr> &Bases) {
+ std::vector<std::pair<void *, Allocation>> AllocPairs;
+ AllocPairs.reserve(Bases.size());
+
+ // Get allocation to destory.
+ Error Err = Error::success();
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ for (auto &Base : Bases) {
+ auto I = Allocations.find(Base.toPtr<void *>());
+
+ // Check for missing allocation (effective a double free).
+ if (I != Allocations.end()) {
+ AllocPairs.push_back(std::move(*I));
+ Allocations.erase(I);
+ } else
+ Err = joinErrors(
+ std::move(Err),
+ make_error<StringError>("No allocation entry found "
+ "for " +
+ formatv("{0:x}", Base.getValue()),
+ inconvertibleErrorCode()));
+ }
+ }
+
+ while (!AllocPairs.empty()) {
+ auto &P = AllocPairs.back();
+ Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second));
+ AllocPairs.pop_back();
+ }
+
+ return Err;
+}
+
+Error SimpleExecutorMemoryManager::shutdown() {
+
+ AllocationsMap AM;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ AM = std::move(Allocations);
+ }
+
+ Error Err = Error::success();
+ for (auto &KV : AM)
+ Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second));
+ return Err;
+}
+
+void SimpleExecutorMemoryManager::addBootstrapSymbols(
+ StringMap<ExecutorAddr> &M) {
+ M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this);
+ M[rt::SimpleExecutorMemoryManagerReserveWrapperName] =
+ ExecutorAddr::fromPtr(&reserveWrapper);
+ M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] =
+ ExecutorAddr::fromPtr(&finalizeWrapper);
+ M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] =
+ ExecutorAddr::fromPtr(&deallocateWrapper);
+}
+
+Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) {
+ Error Err = Error::success();
+
+ while (!A.DeallocationActions.empty()) {
+ Err = joinErrors(std::move(Err),
+ A.DeallocationActions.back().runWithSPSRet());
+ A.DeallocationActions.pop_back();
+ }
+
+ sys::MemoryBlock MB(Base, A.Size);
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+
+ return Err;
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSSimpleExecutorMemoryManagerReserveSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::allocate))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::finalize))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::deallocate))
+ .release();
+}
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
new file mode 100644
index 000000000000..b6b21bde1182
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
@@ -0,0 +1,293 @@
+//===------- SimpleEPCServer.cpp - EPC over simple abstract channel -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Process.h"
+
+#include "OrcRTBootstrap.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm::orc::shared;
+
+namespace llvm {
+namespace orc {
+
+ExecutorBootstrapService::~ExecutorBootstrapService() {}
+
+SimpleRemoteEPCServer::Dispatcher::~Dispatcher() {}
+
+#if LLVM_ENABLE_THREADS
+void SimpleRemoteEPCServer::ThreadDispatcher::dispatch(
+ unique_function<void()> Work) {
+ {
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ if (!Running)
+ return;
+ ++Outstanding;
+ }
+
+ std::thread([this, Work = std::move(Work)]() mutable {
+ Work();
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ --Outstanding;
+ OutstandingCV.notify_all();
+ }).detach();
+}
+
+void SimpleRemoteEPCServer::ThreadDispatcher::shutdown() {
+ std::unique_lock<std::mutex> Lock(DispatchMutex);
+ Running = false;
+ OutstandingCV.wait(Lock, [this]() { return Outstanding == 0; });
+}
+#endif
+
+StringMap<ExecutorAddr> SimpleRemoteEPCServer::defaultBootstrapSymbols() {
+ StringMap<ExecutorAddr> DBS;
+ rt_bootstrap::addTo(DBS);
+ return DBS;
+}
+
+Expected<SimpleRemoteEPCTransportClient::HandleMessageAction>
+SimpleRemoteEPCServer::handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPCServer::handleMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ dbgs() << "Setup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+
+ using UT = std::underlying_type_t<SimpleRemoteEPCOpcode>;
+ if (static_cast<UT>(OpC) > static_cast<UT>(SimpleRemoteEPCOpcode::LastOpC))
+ return make_error<StringError>("Unexpected opcode",
+ inconvertibleErrorCode());
+
+ // TODO: Clean detach message?
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ return make_error<StringError>("Unexpected Setup opcode",
+ inconvertibleErrorCode());
+ case SimpleRemoteEPCOpcode::Hangup:
+ return SimpleRemoteEPCTransportClient::EndSession;
+ case SimpleRemoteEPCOpcode::Result:
+ if (auto Err = handleResult(SeqNo, TagAddr, std::move(ArgBytes)))
+ return std::move(Err);
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ handleCallWrapper(SeqNo, TagAddr, std::move(ArgBytes));
+ break;
+ }
+ return ContinueSession;
+}
+
+Error SimpleRemoteEPCServer::waitForDisconnect() {
+ std::unique_lock<std::mutex> Lock(ServerStateMutex);
+ ShutdownCV.wait(Lock, [this]() { return RunState == ServerShutDown; });
+ return std::move(ShutdownErr);
+}
+
+void SimpleRemoteEPCServer::handleDisconnect(Error Err) {
+ PendingJITDispatchResultsMap TmpPending;
+
+ {
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ std::swap(TmpPending, PendingJITDispatchResults);
+ RunState = ServerShuttingDown;
+ }
+
+ // Send out-of-band errors to any waiting threads.
+ for (auto &KV : TmpPending)
+ KV.second->set_value(
+ shared::WrapperFunctionResult::createOutOfBandError("disconnecting"));
+
+ // Wait for dispatcher to clear.
+ D->shutdown();
+
+ // Shut down services.
+ while (!Services.empty()) {
+ ShutdownErr =
+ joinErrors(std::move(ShutdownErr), Services.back()->shutdown());
+ Services.pop_back();
+ }
+
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ ShutdownErr = joinErrors(std::move(ShutdownErr), std::move(Err));
+ RunState = ServerShutDown;
+ ShutdownCV.notify_all();
+}
+
+Error SimpleRemoteEPCServer::sendMessage(SimpleRemoteEPCOpcode OpC,
+ uint64_t SeqNo, ExecutorAddr TagAddr,
+ ArrayRef<char> ArgBytes) {
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPCServer::sendMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ dbgs() << "Setup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+ auto Err = T->sendMessage(OpC, SeqNo, TagAddr, ArgBytes);
+ LLVM_DEBUG({
+ if (Err)
+ dbgs() << " \\--> SimpleRemoteEPC::sendMessage failed\n";
+ });
+ return Err;
+}
+
+Error SimpleRemoteEPCServer::sendSetupMessage(
+ StringMap<ExecutorAddr> BootstrapSymbols) {
+
+ using namespace SimpleRemoteEPCDefaultBootstrapSymbolNames;
+
+ std::vector<char> SetupPacket;
+ SimpleRemoteEPCExecutorInfo EI;
+ EI.TargetTriple = sys::getProcessTriple();
+ if (auto PageSize = sys::Process::getPageSize())
+ EI.PageSize = *PageSize;
+ else
+ return PageSize.takeError();
+ EI.BootstrapSymbols = std::move(BootstrapSymbols);
+
+ assert(!EI.BootstrapSymbols.count(ExecutorSessionObjectName) &&
+ "Dispatch context name should not be set");
+ assert(!EI.BootstrapSymbols.count(DispatchFnName) &&
+ "Dispatch function name should not be set");
+ EI.BootstrapSymbols[ExecutorSessionObjectName] = ExecutorAddr::fromPtr(this);
+ EI.BootstrapSymbols[DispatchFnName] = ExecutorAddr::fromPtr(jitDispatchEntry);
+
+ using SPSSerialize =
+ shared::SPSArgList<shared::SPSSimpleRemoteEPCExecutorInfo>;
+ auto SetupPacketBytes =
+ shared::WrapperFunctionResult::allocate(SPSSerialize::size(EI));
+ shared::SPSOutputBuffer OB(SetupPacketBytes.data(), SetupPacketBytes.size());
+ if (!SPSSerialize::serialize(OB, EI))
+ return make_error<StringError>("Could not send setup packet",
+ inconvertibleErrorCode());
+
+ return sendMessage(SimpleRemoteEPCOpcode::Setup, 0, ExecutorAddr(),
+ {SetupPacketBytes.data(), SetupPacketBytes.size()});
+}
+
+Error SimpleRemoteEPCServer::handleResult(
+ uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ std::promise<shared::WrapperFunctionResult> *P = nullptr;
+ {
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ auto I = PendingJITDispatchResults.find(SeqNo);
+ if (I == PendingJITDispatchResults.end())
+ return make_error<StringError>("No call for sequence number " +
+ Twine(SeqNo),
+ inconvertibleErrorCode());
+ P = I->second;
+ PendingJITDispatchResults.erase(I);
+ releaseSeqNo(SeqNo);
+ }
+ auto R = shared::WrapperFunctionResult::allocate(ArgBytes.size());
+ memcpy(R.data(), ArgBytes.data(), ArgBytes.size());
+ P->set_value(std::move(R));
+ return Error::success();
+}
+
+void SimpleRemoteEPCServer::handleCallWrapper(
+ uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ D->dispatch([this, RemoteSeqNo, TagAddr, ArgBytes = std::move(ArgBytes)]() {
+ using WrapperFnTy =
+ shared::CWrapperFunctionResult (*)(const char *, size_t);
+ auto *Fn = TagAddr.toPtr<WrapperFnTy>();
+ shared::WrapperFunctionResult ResultBytes(
+ Fn(ArgBytes.data(), ArgBytes.size()));
+ if (auto Err = sendMessage(SimpleRemoteEPCOpcode::Result, RemoteSeqNo,
+ ExecutorAddr(),
+ {ResultBytes.data(), ResultBytes.size()}))
+ ReportError(std::move(Err));
+ });
+}
+
+shared::WrapperFunctionResult
+SimpleRemoteEPCServer::doJITDispatch(const void *FnTag, const char *ArgData,
+ size_t ArgSize) {
+ uint64_t SeqNo;
+ std::promise<shared::WrapperFunctionResult> ResultP;
+ auto ResultF = ResultP.get_future();
+ {
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ if (RunState != ServerRunning)
+ return shared::WrapperFunctionResult::createOutOfBandError(
+ "jit_dispatch not available (EPC server shut down)");
+
+ SeqNo = getNextSeqNo();
+ assert(!PendingJITDispatchResults.count(SeqNo) && "SeqNo already in use");
+ PendingJITDispatchResults[SeqNo] = &ResultP;
+ }
+
+ if (auto Err = sendMessage(SimpleRemoteEPCOpcode::CallWrapper, SeqNo,
+ ExecutorAddr::fromPtr(FnTag), {ArgData, ArgSize}))
+ ReportError(std::move(Err));
+
+ return ResultF.get();
+}
+
+shared::CWrapperFunctionResult
+SimpleRemoteEPCServer::jitDispatchEntry(void *DispatchCtx, const void *FnTag,
+ const char *ArgData, size_t ArgSize) {
+ return reinterpret_cast<SimpleRemoteEPCServer *>(DispatchCtx)
+ ->doJITDispatch(FnTag, ArgData, ArgSize)
+ .release();
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
new file mode 100644
index 000000000000..111c84ec87ed
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
@@ -0,0 +1,48 @@
+//===------------ TaskDispatch.cpp - ORC task dispatch utils --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
+
+namespace llvm {
+namespace orc {
+
+char Task::ID = 0;
+char GenericNamedTask::ID = 0;
+const char *GenericNamedTask::DefaultDescription = "Generic Task";
+
+void Task::anchor() {}
+TaskDispatcher::~TaskDispatcher() {}
+
+void InPlaceTaskDispatcher::dispatch(std::unique_ptr<Task> T) { T->run(); }
+
+void InPlaceTaskDispatcher::shutdown() {}
+
+#if LLVM_ENABLE_THREADS
+void DynamicThreadPoolTaskDispatcher::dispatch(std::unique_ptr<Task> T) {
+ {
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ ++Outstanding;
+ }
+
+ std::thread([this, T = std::move(T)]() mutable {
+ T->run();
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ --Outstanding;
+ OutstandingCV.notify_all();
+ }).detach();
+}
+
+void DynamicThreadPoolTaskDispatcher::shutdown() {
+ std::unique_lock<std::mutex> Lock(DispatchMutex);
+ Running = false;
+ OutstandingCV.wait(Lock, [this]() { return Outstanding == 0; });
+}
+#endif
+
+} // namespace orc
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 0f6f9efe1102..210fbf6e43e3 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -84,7 +84,7 @@ llvm::JITSymbolFlags::fromObjectSymbol(const object::SymbolRef &Symbol) {
if (!SymbolType)
return SymbolType.takeError();
- if (*SymbolType & object::SymbolRef::ST_Function)
+ if (*SymbolType == object::SymbolRef::ST_Function)
Flags |= JITSymbolFlags::Callable;
return Flags;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index b6ccd02405c1..9c8d402364c6 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -67,7 +67,9 @@ static void __deregister_frame(void *p) {
}
#endif
-#ifdef __APPLE__
+/* libgcc and libunwind __register_frame behave differently. We use the presence
+ * of __unw_add_dynamic_fde to detect libunwind. */
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
static const char *processFDE(const char *Entry, bool isDeregister) {
const char *P = Entry;
@@ -284,7 +286,7 @@ void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
uint64_t Addr = getSymbolAddress(Name);
if (!Addr && AbortOnFailure)
- report_fatal_error("Program used external function '" + Name +
+ report_fatal_error(Twine("Program used external function '") + Name +
"' which could not be resolved!");
return (void*)Addr;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 687fd839805f..f16c6bdbfa4f 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -520,6 +520,13 @@ static bool isZeroInit(const SectionRef Section) {
SectionType == MachO::S_GB_ZEROFILL;
}
+static bool isTLS(const SectionRef Section) {
+ const ObjectFile *Obj = Section.getObject();
+ if (isa<object::ELFObjectFileBase>(Obj))
+ return ELFSectionRef(Section).getFlags() & ELF::SHF_TLS;
+ return false;
+}
+
// Compute an upper bound of the memory size that is required to load all
// sections
Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
@@ -549,6 +556,7 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
bool IsCode = Section.isText();
bool IsReadOnly = isReadOnlyData(Section);
+ bool IsTLS = isTLS(Section);
Expected<StringRef> NameOrErr = Section.getName();
if (!NameOrErr)
@@ -582,7 +590,7 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
} else if (IsReadOnly) {
RODataAlign = std::max(RODataAlign, Alignment);
ROSectionSizes.push_back(SectionSize);
- } else {
+ } else if (!IsTLS) {
RWDataAlign = std::max(RWDataAlign, Alignment);
RWSectionSizes.push_back(SectionSize);
}
@@ -672,7 +680,7 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj,
Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
if (!RelSecOrErr)
- report_fatal_error(toString(RelSecOrErr.takeError()));
+ report_fatal_error(Twine(toString(RelSecOrErr.takeError())));
section_iterator RelSecI = *RelSecOrErr;
if (!(RelSecI == Section))
@@ -800,6 +808,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
bool IsVirtual = Section.isVirtual();
bool IsZeroInit = isZeroInit(Section);
bool IsReadOnly = isReadOnlyData(Section);
+ bool IsTLS = isTLS(Section);
uint64_t DataSize = Section.getSize();
// An alignment of 0 (at least with ELF) is identical to an alignment of 1,
@@ -823,6 +832,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
uintptr_t Allocate;
unsigned SectionID = Sections.size();
uint8_t *Addr;
+ uint64_t LoadAddress = 0;
const char *pData = nullptr;
// If this section contains any bits (i.e. isn't a virtual or bss section),
@@ -851,10 +861,17 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
Allocate = DataSize + PaddingSize + StubBufSize;
if (!Allocate)
Allocate = 1;
- Addr = IsCode ? MemMgr.allocateCodeSection(Allocate, Alignment, SectionID,
- Name)
- : MemMgr.allocateDataSection(Allocate, Alignment, SectionID,
- Name, IsReadOnly);
+ if (IsTLS) {
+ auto TLSSection =
+ MemMgr.allocateTLSSection(Allocate, Alignment, SectionID, Name);
+ Addr = TLSSection.InitializationImage;
+ LoadAddress = TLSSection.Offset;
+ } else if (IsCode) {
+ Addr = MemMgr.allocateCodeSection(Allocate, Alignment, SectionID, Name);
+ } else {
+ Addr = MemMgr.allocateDataSection(Allocate, Alignment, SectionID, Name,
+ IsReadOnly);
+ }
if (!Addr)
report_fatal_error("Unable to allocate section memory!");
@@ -897,6 +914,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
Sections.push_back(
SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData));
+ // The load address of a TLS section is not equal to the address of its
+ // initialization image
+ if (IsTLS)
+ Sections.back().setLoadAddress(LoadAddress);
// Debug info sections are linked as if their load address was zero
if (!IsRequired)
Sections.back().setLoadAddress(0);
@@ -1118,7 +1139,7 @@ void RuntimeDyldImpl::applyExternalSymbolRelocations(
// FIXME: Implement error handling that doesn't kill the host program!
if (!Addr && !Resolver.allowsZeroSymbols())
- report_fatal_error("Program used external function '" + Name +
+ report_fatal_error(Twine("Program used external function '") + Name +
"' which could not be resolved!");
// If Resolver returned UINT64_MAX, the client wants to handle this symbol
@@ -1261,6 +1282,14 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
return 0;
}
+RuntimeDyld::MemoryManager::TLSSection
+RuntimeDyld::MemoryManager::allocateTLSSection(uintptr_t Size,
+ unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) {
+ report_fatal_error("allocation of TLS not implemented");
+}
+
void RuntimeDyld::MemoryManager::anchor() {}
void JITSymbolResolver::anchor() {}
void LegacyJITSymbolResolver::anchor() {}
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index a3005f786cf9..2b88c481dab0 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -232,6 +232,26 @@ private:
EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()),
"");
+ // if there is an offset number expr
+ int64_t Offset = 0;
+ BinOpToken BinOp;
+ std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr);
+ switch (BinOp) {
+ case BinOpToken::Add: {
+ EvalResult Number;
+ std::tie(Number, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ Offset = Number.getValue();
+ break;
+ }
+ case BinOpToken::Invalid:
+ break;
+ default:
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected '+' for offset or ',' if no offset"),
+ "");
+ }
+
if (!RemainingExpr.startswith(","))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ','"), "");
@@ -249,7 +269,7 @@ private:
MCInst Inst;
uint64_t Size;
- if (!decodeInst(Symbol, Inst, Size))
+ if (!decodeInst(Symbol, Inst, Size, Offset))
return std::make_pair(
EvalResult(("Couldn't decode instruction at '" + Symbol + "'").str()),
"");
@@ -307,7 +327,7 @@ private:
MCInst Inst;
uint64_t InstSize;
- if (!decodeInst(Symbol, Inst, InstSize))
+ if (!decodeInst(Symbol, Inst, InstSize, 0))
return std::make_pair(
EvalResult(("Couldn't decode instruction at '" + Symbol + "'").str()),
"");
@@ -664,10 +684,12 @@ private:
return evalComplexExpr(std::make_pair(ThisResult, RemainingExpr), PCtx);
}
- bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
+ bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size,
+ int64_t Offset) const {
MCDisassembler *Dis = Checker.Disassembler;
StringRef SymbolMem = Checker.getSymbolContent(Symbol);
- ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin(), SymbolMem.size());
+ ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin() + Offset,
+ SymbolMem.size() - Offset);
MCDisassembler::DecodeStatus S =
Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls());
@@ -675,7 +697,7 @@ private:
return (S == MCDisassembler::Success);
}
};
-}
+} // namespace llvm
RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(
IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index efe0b9cd61cd..1b7fdb588275 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -345,6 +345,32 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = GOTOffset;
break;
}
+ case ELF::R_X86_64_DTPMOD64: {
+ // We only have one DSO, so the module id is always 1.
+ support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = 1;
+ break;
+ }
+ case ELF::R_X86_64_DTPOFF64:
+ case ELF::R_X86_64_TPOFF64: {
+ // DTPOFF64 should resolve to the offset in the TLS block, TPOFF64 to the
+ // offset in the *initial* TLS block. Since we are statically linking, all
+ // TLS blocks already exist in the initial block, so resolve both
+ // relocations equally.
+ support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
+ Value + Addend;
+ break;
+ }
+ case ELF::R_X86_64_DTPOFF32:
+ case ELF::R_X86_64_TPOFF32: {
+ // As for the (D)TPOFF64 relocations above, both DTPOFF32 and TPOFF32 can
+ // be resolved equally.
+ int64_t RealValue = Value + Addend;
+ assert(RealValue >= INT32_MIN && RealValue <= INT32_MAX);
+ int32_t TruncValue = RealValue;
+ support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+ TruncValue;
+ break;
+ }
}
}
@@ -674,7 +700,7 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
Expected<section_iterator> RelSecOrErr = si->getRelocatedSection();
if (!RelSecOrErr)
- report_fatal_error(toString(RelSecOrErr.takeError()));
+ report_fatal_error(Twine(toString(RelSecOrErr.takeError())));
section_iterator RelSecI = *RelSecOrErr;
if (RelSecI == Obj.section_end())
@@ -1210,8 +1236,7 @@ RuntimeDyldELF::processRelocationRef(
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
SymType = *SymTypeOrErr;
}
@@ -1231,8 +1256,7 @@ RuntimeDyldELF::processRelocationRef(
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SectionOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
section_iterator si = *SectionOrErr;
if (si == Obj.section_end())
@@ -1813,11 +1837,14 @@ RuntimeDyldELF::processRelocationRef(
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
- } else if (RelType == ELF::R_X86_64_GOTPC64) {
+ } else if (RelType == ELF::R_X86_64_GOTPC32) {
// Materialize the address of the base of the GOT relative to the PC.
// This doesn't create a GOT entry, but it does mean we need a GOT
// section.
(void)allocateGOTEntries(0);
+ resolveGOTOffsetRelocation(SectionID, Offset, Addend, ELF::R_X86_64_PC32);
+ } else if (RelType == ELF::R_X86_64_GOTPC64) {
+ (void)allocateGOTEntries(0);
resolveGOTOffsetRelocation(SectionID, Offset, Addend, ELF::R_X86_64_PC64);
} else if (RelType == ELF::R_X86_64_GOTOFF64) {
// GOTOFF relocations ultimately require a section difference relocation.
@@ -1829,6 +1856,15 @@ RuntimeDyldELF::processRelocationRef(
} else if (RelType == ELF::R_X86_64_PC64) {
Value.Addend += support::ulittle64_t::ref(computePlaceholderAddress(SectionID, Offset));
processSimpleRelocation(SectionID, Offset, RelType, Value);
+ } else if (RelType == ELF::R_X86_64_GOTTPOFF) {
+ processX86_64GOTTPOFFRelocation(SectionID, Offset, Value, Addend);
+ } else if (RelType == ELF::R_X86_64_TLSGD ||
+ RelType == ELF::R_X86_64_TLSLD) {
+ // The next relocation must be the relocation for __tls_get_addr.
+ ++RelI;
+ auto &GetAddrRelocation = *RelI;
+ processX86_64TLSRelocation(SectionID, Offset, RelType, Value, Addend,
+ GetAddrRelocation);
} else {
processSimpleRelocation(SectionID, Offset, RelType, Value);
}
@@ -1841,6 +1877,330 @@ RuntimeDyldELF::processRelocationRef(
return ++RelI;
}
+void RuntimeDyldELF::processX86_64GOTTPOFFRelocation(unsigned SectionID,
+ uint64_t Offset,
+ RelocationValueRef Value,
+ int64_t Addend) {
+ // Use the approach from "x86-64 Linker Optimizations" from the TLS spec
+ // to replace the GOTTPOFF relocation with a TPOFF relocation. The spec
+ // only mentions one optimization even though there are two different
+ // code sequences for the Initial Exec TLS Model. We match the code to
+ // find out which one was used.
+
+ // A possible TLS code sequence and its replacement
+ struct CodeSequence {
+ // The expected code sequence
+ ArrayRef<uint8_t> ExpectedCodeSequence;
+ // The negative offset of the GOTTPOFF relocation to the beginning of
+ // the sequence
+ uint64_t TLSSequenceOffset;
+ // The new code sequence
+ ArrayRef<uint8_t> NewCodeSequence;
+ // The offset of the new TPOFF relocation
+ uint64_t TpoffRelocationOffset;
+ };
+
+ std::array<CodeSequence, 2> CodeSequences;
+
+ // Initial Exec Code Model Sequence
+ {
+ static const std::initializer_list<uint8_t> ExpectedCodeSequenceList = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00,
+ 0x00, // mov %fs:0, %rax
+ 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // add x@gotpoff(%rip),
+ // %rax
+ };
+ CodeSequences[0].ExpectedCodeSequence =
+ ArrayRef<uint8_t>(ExpectedCodeSequenceList);
+ CodeSequences[0].TLSSequenceOffset = 12;
+
+ static const std::initializer_list<uint8_t> NewCodeSequenceList = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0, %rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff(%rax), %rax
+ };
+ CodeSequences[0].NewCodeSequence = ArrayRef<uint8_t>(NewCodeSequenceList);
+ CodeSequences[0].TpoffRelocationOffset = 12;
+ }
+
+ // Initial Exec Code Model Sequence, II
+ {
+ static const std::initializer_list<uint8_t> ExpectedCodeSequenceList = {
+ 0x48, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, // mov x@gotpoff(%rip), %rax
+ 0x64, 0x48, 0x8b, 0x00, 0x00, 0x00, 0x00 // mov %fs:(%rax), %rax
+ };
+ CodeSequences[1].ExpectedCodeSequence =
+ ArrayRef<uint8_t>(ExpectedCodeSequenceList);
+ CodeSequences[1].TLSSequenceOffset = 3;
+
+ static const std::initializer_list<uint8_t> NewCodeSequenceList = {
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, // 6 byte nop
+ 0x64, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:x@tpoff, %rax
+ };
+ CodeSequences[1].NewCodeSequence = ArrayRef<uint8_t>(NewCodeSequenceList);
+ CodeSequences[1].TpoffRelocationOffset = 10;
+ }
+
+ bool Resolved = false;
+ auto &Section = Sections[SectionID];
+ for (const auto &C : CodeSequences) {
+ assert(C.ExpectedCodeSequence.size() == C.NewCodeSequence.size() &&
+ "Old and new code sequences must have the same size");
+
+ if (Offset < C.TLSSequenceOffset ||
+ (Offset - C.TLSSequenceOffset + C.NewCodeSequence.size()) >
+ Section.getSize()) {
+ // This can't be a matching sequence as it doesn't fit in the current
+ // section
+ continue;
+ }
+
+ auto TLSSequenceStartOffset = Offset - C.TLSSequenceOffset;
+ auto *TLSSequence = Section.getAddressWithOffset(TLSSequenceStartOffset);
+ if (ArrayRef<uint8_t>(TLSSequence, C.ExpectedCodeSequence.size()) !=
+ C.ExpectedCodeSequence) {
+ continue;
+ }
+
+ memcpy(TLSSequence, C.NewCodeSequence.data(), C.NewCodeSequence.size());
+
+ // The original GOTTPOFF relocation has an addend as it is PC relative,
+ // so it needs to be corrected. The TPOFF32 relocation is used as an
+ // absolute value (which is an offset from %fs:0), so remove the addend
+ // again.
+ RelocationEntry RE(SectionID,
+ TLSSequenceStartOffset + C.TpoffRelocationOffset,
+ ELF::R_X86_64_TPOFF32, Value.Addend - Addend);
+
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+
+ Resolved = true;
+ break;
+ }
+
+ if (!Resolved) {
+ // The GOTTPOFF relocation was not used in one of the sequences
+ // described in the spec, so we can't optimize it to a TPOFF
+ // relocation.
+ uint64_t GOTOffset = allocateGOTEntries(1);
+ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
+ ELF::R_X86_64_PC32);
+ RelocationEntry RE =
+ computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_X86_64_TPOFF64);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+}
+
+void RuntimeDyldELF::processX86_64TLSRelocation(
+ unsigned SectionID, uint64_t Offset, uint64_t RelType,
+ RelocationValueRef Value, int64_t Addend,
+ const RelocationRef &GetAddrRelocation) {
+ // Since we are statically linking and have no additional DSOs, we can resolve
+ // the relocation directly without using __tls_get_addr.
+ // Use the approach from "x86-64 Linker Optimizations" from the TLS spec
+ // to replace it with the Local Exec relocation variant.
+
+ // Find out whether the code was compiled with the large or small memory
+ // model. For this we look at the next relocation which is the relocation
+ // for the __tls_get_addr function. If it's a 32 bit relocation, it's the
+ // small code model, with a 64 bit relocation it's the large code model.
+ bool IsSmallCodeModel;
+ // Is the relocation for the __tls_get_addr a PC-relative GOT relocation?
+ bool IsGOTPCRel = false;
+
+ switch (GetAddrRelocation.getType()) {
+ case ELF::R_X86_64_GOTPCREL:
+ case ELF::R_X86_64_REX_GOTPCRELX:
+ case ELF::R_X86_64_GOTPCRELX:
+ IsGOTPCRel = true;
+ LLVM_FALLTHROUGH;
+ case ELF::R_X86_64_PLT32:
+ IsSmallCodeModel = true;
+ break;
+ case ELF::R_X86_64_PLTOFF64:
+ IsSmallCodeModel = false;
+ break;
+ default:
+ report_fatal_error(
+ "invalid TLS relocations for General/Local Dynamic TLS Model: "
+ "expected PLT or GOT relocation for __tls_get_addr function");
+ }
+
+ // The negative offset to the start of the TLS code sequence relative to
+ // the offset of the TLSGD/TLSLD relocation
+ uint64_t TLSSequenceOffset;
+ // The expected start of the code sequence
+ ArrayRef<uint8_t> ExpectedCodeSequence;
+ // The new TLS code sequence that will replace the existing code
+ ArrayRef<uint8_t> NewCodeSequence;
+
+ if (RelType == ELF::R_X86_64_TLSGD) {
+ // The offset of the new TPOFF32 relocation (offset starting from the
+ // beginning of the whole TLS sequence)
+ uint64_t TpoffRelocOffset;
+
+ if (IsSmallCodeModel) {
+ if (!IsGOTPCRel) {
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x66, // data16 (no-op prefix)
+ 0x48, 0x8d, 0x3d, 0x00, 0x00,
+ 0x00, 0x00, // lea <disp32>(%rip), %rdi
+ 0x66, 0x66, // two data16 prefixes
+ 0x48, // rex64 (no-op prefix)
+ 0xe8, 0x00, 0x00, 0x00, 0x00 // call __tls_get_addr@plt
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 4;
+ } else {
+ // This code sequence is not described in the TLS spec but gcc
+ // generates it sometimes.
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x66, // data16 (no-op prefix)
+ 0x48, 0x8d, 0x3d, 0x00, 0x00,
+ 0x00, 0x00, // lea <disp32>(%rip), %rdi
+ 0x66, // data16 prefix (no-op prefix)
+ 0x48, // rex64 (no-op prefix)
+ 0xff, 0x15, 0x00, 0x00, 0x00,
+ 0x00 // call *__tls_get_addr@gotpcrel(%rip)
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 4;
+ }
+
+ // The replacement code for the small code model. It's the same for
+ // both sequences.
+ static const std::initializer_list<uint8_t> SmallSequence = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00,
+ 0x00, // mov %fs:0, %rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff(%rax),
+ // %rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(SmallSequence);
+ TpoffRelocOffset = 12;
+ } else {
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, // lea <disp32>(%rip),
+ // %rdi
+ 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, // movabs $__tls_get_addr@pltoff, %rax
+ 0x48, 0x01, 0xd8, // add %rbx, %rax
+ 0xff, 0xd0 // call *%rax
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement code for the large code model
+ static const std::initializer_list<uint8_t> LargeSequence = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00,
+ 0x00, // mov %fs:0, %rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00, // lea x@tpoff(%rax),
+ // %rax
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 // nopw 0x0(%rax,%rax,1)
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(LargeSequence);
+ TpoffRelocOffset = 12;
+ }
+
+ // The TLSGD/TLSLD relocations are PC-relative, so they have an addend.
+ // The new TPOFF32 relocations is used as an absolute offset from
+ // %fs:0, so remove the TLSGD/TLSLD addend again.
+ RelocationEntry RE(SectionID, Offset - TLSSequenceOffset + TpoffRelocOffset,
+ ELF::R_X86_64_TPOFF32, Value.Addend - Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ } else if (RelType == ELF::R_X86_64_TLSLD) {
+ if (IsSmallCodeModel) {
+ if (!IsGOTPCRel) {
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, // leaq <disp32>(%rip), %rdi
+ 0x00, 0xe8, 0x00, 0x00, 0x00, 0x00 // call __tls_get_addr@plt
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement code for the small code model
+ static const std::initializer_list<uint8_t> SmallSequence = {
+ 0x66, 0x66, 0x66, // three data16 prefixes (no-op)
+ 0x64, 0x48, 0x8b, 0x04, 0x25,
+ 0x00, 0x00, 0x00, 0x00 // mov %fs:0, %rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(SmallSequence);
+ } else {
+ // This code sequence is not described in the TLS spec but gcc
+ // generates it sometimes.
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00,
+ 0x00, 0x00, 0x00, // leaq <disp32>(%rip), %rdi
+ 0xff, 0x15, 0x00, 0x00,
+ 0x00, 0x00 // call
+ // *__tls_get_addr@gotpcrel(%rip)
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement is code is just like above but it needs to be
+ // one byte longer.
+ static const std::initializer_list<uint8_t> SmallSequence = {
+ 0x0f, 0x1f, 0x40, 0x00, // 4 byte nop
+ 0x64, 0x48, 0x8b, 0x04, 0x25,
+ 0x00, 0x00, 0x00, 0x00 // mov %fs:0, %rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(SmallSequence);
+ }
+ } else {
+ // This is the same sequence as for the TLSGD sequence with the large
+ // memory model above
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, // lea <disp32>(%rip),
+ // %rdi
+ 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x48, // movabs $__tls_get_addr@pltoff, %rax
+ 0x01, 0xd8, // add %rbx, %rax
+ 0xff, 0xd0 // call *%rax
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement code for the large code model
+ static const std::initializer_list<uint8_t> LargeSequence = {
+ 0x66, 0x66, 0x66, // three data16 prefixes (no-op)
+ 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00,
+ 0x00, // 10 byte nop
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(LargeSequence);
+ }
+ } else {
+ llvm_unreachable("both TLS relocations handled above");
+ }
+
+ assert(ExpectedCodeSequence.size() == NewCodeSequence.size() &&
+ "Old and new code sequences must have the same size");
+
+ auto &Section = Sections[SectionID];
+ if (Offset < TLSSequenceOffset ||
+ (Offset - TLSSequenceOffset + NewCodeSequence.size()) >
+ Section.getSize()) {
+ report_fatal_error("unexpected end of section in TLS sequence");
+ }
+
+ auto *TLSSequence = Section.getAddressWithOffset(Offset - TLSSequenceOffset);
+ if (ArrayRef<uint8_t>(TLSSequence, ExpectedCodeSequence.size()) !=
+ ExpectedCodeSequence) {
+ report_fatal_error(
+ "invalid TLS sequence for Global/Local Dynamic TLS Model");
+ }
+
+ memcpy(TLSSequence, NewCodeSequence.data(), NewCodeSequence.size());
+}
+
size_t RuntimeDyldELF::getGOTEntrySize() {
// We don't use the GOT in all of these cases, but it's essentially free
// to put them all here.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 31892b7466e6..1251036f4caa 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -161,6 +161,18 @@ private:
bool relocationNeedsGot(const RelocationRef &R) const override;
bool relocationNeedsStub(const RelocationRef &R) const override;
+ // Process a GOTTPOFF TLS relocation for x86-64
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void processX86_64GOTTPOFFRelocation(unsigned SectionID, uint64_t Offset,
+ RelocationValueRef Value,
+ int64_t Addend);
+ // Process a TLSLD/TLSGD relocation for x86-64
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void processX86_64TLSRelocation(unsigned SectionID, uint64_t Offset,
+ uint64_t RelType, RelocationValueRef Value,
+ int64_t Addend,
+ const RelocationRef &GetAddrRelocation);
+
public:
RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
JITSymbolResolver &Resolver);
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 721f2b14829a..dd66ff7ecf70 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -29,8 +29,7 @@ static bool isThumbFunc(object::symbol_iterator Symbol,
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
if (*SymTypeOrErr != object::SymbolRef::ST_Function)
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
index 14fb36f070f8..f03acb41d670 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFMIPS_H
#include "../RuntimeDyldELF.h"
-#include <string>
#define DEBUG_TYPE "dyld"
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index a76958a9e2c2..fcf723aaea28 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOARM_H
#include "../RuntimeDyldMachO.h"
-#include <string>
#define DEBUG_TYPE "dyld"
@@ -141,7 +140,7 @@ public:
return ++RelI;
}
- // Sanity check relocation type.
+ // Validate the relocation type.
switch (RelType) {
UNIMPLEMENTED_RELOC(MachO::ARM_RELOC_PAIR);
UNIMPLEMENTED_RELOC(MachO::ARM_RELOC_SECTDIFF);
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 523deb29b723..d029d3266f79 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOI386_H
#include "../RuntimeDyldMachO.h"
-#include <string>
#define DEBUG_TYPE "dyld"
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 28febbdb948c..a4d91cf338cb 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOX86_64_H
#include "../RuntimeDyldMachO.h"
-#include <string>
#define DEBUG_TYPE "dyld"
diff --git a/llvm/lib/ExecutionEngine/TargetSelect.cpp b/llvm/lib/ExecutionEngine/TargetSelect.cpp
index 28ea04be1a5e..c67a1a7661d6 100644
--- a/llvm/lib/ExecutionEngine/TargetSelect.cpp
+++ b/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -17,8 +17,8 @@
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index 04476d999336..c962231cbdc1 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -954,8 +954,8 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// Check to see if this is a fixed string, or if it has regex pieces.
if (!MatchFullLinesHere &&
- (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
- PatternStr.find("[[") == StringRef::npos))) {
+ (PatternStr.size() < 2 ||
+ (!PatternStr.contains("{{") && !PatternStr.contains("[[")))) {
FixedStr = PatternStr;
return false;
}
@@ -1034,7 +1034,8 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
bool IsLegacyLineExpr = false;
StringRef DefName;
StringRef SubstStr;
- std::string MatchRegexp;
+ StringRef MatchRegexp;
+ std::string WildcardRegexp;
size_t SubstInsertIdx = RegExStr.size();
// Parse string variable or legacy @LINE expression.
@@ -1078,7 +1079,7 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
return true;
}
DefName = Name;
- MatchRegexp = MatchStr.str();
+ MatchRegexp = MatchStr;
} else {
if (IsPseudo) {
MatchStr = OrigMatchStr;
@@ -1117,7 +1118,8 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
SubstStr = MatchStr;
else {
ExpressionFormat Format = ExpressionPointer->getFormat();
- MatchRegexp = cantFail(Format.getWildcardRegex());
+ WildcardRegexp = cantFail(Format.getWildcardRegex());
+ MatchRegexp = WildcardRegexp;
}
}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 76954f9a37e1..ce998df757ec 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -13,18 +13,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
-
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <sstream>
@@ -39,16 +50,22 @@ static cl::opt<bool>
"'as-if' properties of runtime calls."),
cl::init(false));
+static cl::opt<double> UnrollThresholdFactor(
+ "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
+ cl::desc("Factor for the unroll threshold to account for code "
+ "simplifications still taking place"),
+ cl::init(1.5));
+
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
// Get the function's current attributes.
auto Attrs = Fn.getAttributes();
- auto FnAttrs = Attrs.getFnAttributes();
- auto RetAttrs = Attrs.getRetAttributes();
+ auto FnAttrs = Attrs.getFnAttrs();
+ auto RetAttrs = Attrs.getRetAttrs();
SmallVector<AttributeSet, 4> ArgAttrs;
for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
- ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo));
+ ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
@@ -228,6 +245,16 @@ OpenMPIRBuilder::~OpenMPIRBuilder() {
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
}
+GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
+ IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
+ auto *GV =
+ new GlobalVariable(M, I32Ty,
+ /* isConstant = */ true, GlobalValue::WeakODRLinkage,
+ ConstantInt::get(I32Ty, Value), Name);
+
+ return GV;
+}
+
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
IdentFlag LocFlags,
unsigned Reserve2Flags) {
@@ -241,32 +268,29 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
Constant *IdentData[] = {
I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
- Constant *Initializer = ConstantStruct::get(
- cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
+ Constant *Initializer =
+ ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
// Look for existing encoding of the location + flags, not needed but
// minimizes the difference to the existing solution while we transition.
for (GlobalVariable &GV : M.getGlobalList())
- if (GV.getType() == IdentPtr && GV.hasInitializer())
+ if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
if (GV.getInitializer() == Initializer)
- return Ident = &GV;
-
- auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
- /* isConstant = */ true,
- GlobalValue::PrivateLinkage, Initializer);
- GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(Align(8));
- Ident = GV;
+ Ident = &GV;
+
+ if (!Ident) {
+ auto *GV = new GlobalVariable(
+ M, OpenMPIRBuilder::Ident,
+ /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
+ nullptr, GlobalValue::NotThreadLocal,
+ M.getDataLayout().getDefaultGlobalsAddressSpace());
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ GV->setAlignment(Align(8));
+ Ident = GV;
+ }
}
- return Builder.CreatePointerCast(Ident, IdentPtr);
-}
-Type *OpenMPIRBuilder::getLanemaskType() {
- LLVMContext &Ctx = M.getContext();
- Triple triple(M.getTargetTriple());
-
- // This test is adequate until deviceRTL has finer grained lane widths
- return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
+ return Builder.CreatePointerCast(Ident, IdentPtr);
}
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
@@ -310,9 +334,8 @@ Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
}
-Constant *
-OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
- DILocation *DIL = Loc.DL.get();
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) {
+ DILocation *DIL = DL.get();
if (!DIL)
return getOrCreateDefaultSrcLocStr();
StringRef FileName = M.getName();
@@ -320,12 +343,17 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
if (Optional<StringRef> Source = DIF->getSource())
FileName = *Source;
StringRef Function = DIL->getScope()->getSubprogram()->getName();
- Function =
- !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
+ if (Function.empty() && F)
+ Function = F->getName();
return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
DIL->getColumn());
}
+Constant *
+OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
+ return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
+}
+
Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
return Builder.CreateCall(
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
@@ -581,8 +609,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Add some fake uses for OpenMP provided arguments.
ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
- Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
- "zero.addr.use");
+ Instruction *ZeroAddrUse =
+ Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
ToBeDeleted.push_back(ZeroAddrUse);
// ThenBB
@@ -965,8 +993,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
Value *ST = ConstantInt::get(I32Ty, 1);
llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
- LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true);
- BasicBlock *LoopAfterBB = LoopInfo->getAfter();
+ InsertPointTy AfterIP =
+ applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
+ BasicBlock *LoopAfterBB = AfterIP.getBlock();
Instruction *SplitPos = LoopAfterBB->getTerminator();
if (!isa_and_nonnull<BranchInst>(SplitPos))
SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
@@ -1022,6 +1051,179 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc,
/*IsCancellable*/ true);
}
+/// Create a function with a unique name and a "void (i8*, i8*)" signature in
+/// the given module and return it.
+Function *getFreshReductionFunc(Module &M) {
+ Type *VoidTy = Type::getVoidTy(M.getContext());
+ Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ auto *FuncTy =
+ FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
+ return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
+ M.getDataLayout().getDefaultGlobalsAddressSpace(),
+ ".omp.reduction.func", &M);
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
+ for (const ReductionInfo &RI : ReductionInfos) {
+ (void)RI;
+ assert(RI.Variable && "expected non-null variable");
+ assert(RI.PrivateVariable && "expected non-null private variable");
+ assert(RI.ReductionGen && "expected non-null reduction generator callback");
+ assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
+ "expected variables and their private equivalents to have the same "
+ "type");
+ assert(RI.Variable->getType()->isPointerTy() &&
+ "expected variables to be pointers");
+ }
+
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ BasicBlock *InsertBlock = Loc.IP.getBlock();
+ BasicBlock *ContinuationBlock =
+ InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
+ InsertBlock->getTerminator()->eraseFromParent();
+
+ // Create and populate array of type-erased pointers to private reduction
+ // values.
+ unsigned NumReductions = ReductionInfos.size();
+ Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
+ Builder.restoreIP(AllocaIP);
+ Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
+
+ Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
+
+ for (auto En : enumerate(ReductionInfos)) {
+ unsigned Index = En.index();
+ const ReductionInfo &RI = En.value();
+ Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
+ RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
+ Value *Casted =
+ Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
+ "private.red.var." + Twine(Index) + ".casted");
+ Builder.CreateStore(Casted, RedArrayElemPtr);
+ }
+
+ // Emit a call to the runtime function that orchestrates the reduction.
+ // Declare the reduction function in the process.
+ Function *Func = Builder.GetInsertBlock()->getParent();
+ Module *Module = Func->getParent();
+ Value *RedArrayPtr =
+ Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ bool CanGenerateAtomic =
+ llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
+ return RI.AtomicReductionGen;
+ });
+ Value *Ident = getOrCreateIdent(
+ SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
+ : IdentFlag(0));
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Constant *NumVariables = Builder.getInt32(NumReductions);
+ const DataLayout &DL = Module->getDataLayout();
+ unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
+ Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
+ Function *ReductionFunc = getFreshReductionFunc(*Module);
+ Value *Lock = getOMPCriticalRegionLock(".reduction");
+ Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
+ IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
+ : RuntimeFunction::OMPRTL___kmpc_reduce);
+ CallInst *ReduceCall =
+ Builder.CreateCall(ReduceFunc,
+ {Ident, ThreadId, NumVariables, RedArraySize,
+ RedArrayPtr, ReductionFunc, Lock},
+ "reduce");
+
+ // Create final reduction entry blocks for the atomic and non-atomic case.
+ // Emit IR that dispatches control flow to one of the blocks based on the
+ // reduction supporting the atomic mode.
+ BasicBlock *NonAtomicRedBlock =
+ BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
+ BasicBlock *AtomicRedBlock =
+ BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
+ SwitchInst *Switch =
+ Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
+ Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
+ Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
+
+ // Populate the non-atomic reduction using the elementwise reduction function.
+ // This loads the elements from the global and private variables and reduces
+ // them before storing back the result to the global variable.
+ Builder.SetInsertPoint(NonAtomicRedBlock);
+ for (auto En : enumerate(ReductionInfos)) {
+ const ReductionInfo &RI = En.value();
+ Type *ValueType = RI.getElementType();
+ Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
+ "red.value." + Twine(En.index()));
+ Value *PrivateRedValue =
+ Builder.CreateLoad(ValueType, RI.PrivateVariable,
+ "red.private.value." + Twine(En.index()));
+ Value *Reduced;
+ Builder.restoreIP(
+ RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
+ if (!Builder.GetInsertBlock())
+ return InsertPointTy();
+ Builder.CreateStore(Reduced, RI.Variable);
+ }
+ Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
+ IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
+ : RuntimeFunction::OMPRTL___kmpc_end_reduce);
+ Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
+ Builder.CreateBr(ContinuationBlock);
+
+ // Populate the atomic reduction using the atomic elementwise reduction
+ // function. There are no loads/stores here because they will be happening
+ // inside the atomic elementwise reduction.
+ Builder.SetInsertPoint(AtomicRedBlock);
+ if (CanGenerateAtomic) {
+ for (const ReductionInfo &RI : ReductionInfos) {
+ Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
+ RI.PrivateVariable));
+ if (!Builder.GetInsertBlock())
+ return InsertPointTy();
+ }
+ Builder.CreateBr(ContinuationBlock);
+ } else {
+ Builder.CreateUnreachable();
+ }
+
+ // Populate the outlined reduction function using the elementwise reduction
+ // function. Partial values are extracted from the type-erased array of
+ // pointers to private variables.
+ BasicBlock *ReductionFuncBlock =
+ BasicBlock::Create(Module->getContext(), "", ReductionFunc);
+ Builder.SetInsertPoint(ReductionFuncBlock);
+ Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
+ RedArrayTy->getPointerTo());
+ Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
+ RedArrayTy->getPointerTo());
+ for (auto En : enumerate(ReductionInfos)) {
+ const ReductionInfo &RI = En.value();
+ Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
+ RedArrayTy, LHSArrayPtr, 0, En.index());
+ Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
+ Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
+ Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
+ Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
+ RedArrayTy, RHSArrayPtr, 0, En.index());
+ Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
+ Value *RHSPtr =
+ Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
+ Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
+ Value *Reduced;
+ Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
+ if (!Builder.GetInsertBlock())
+ return InsertPointTy();
+ Builder.CreateStore(Reduced, LHSPtr);
+ }
+ Builder.CreateRetVoid();
+
+ Builder.SetInsertPoint(ContinuationBlock);
+ return Builder.saveIP();
+}
+
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
@@ -1133,8 +1335,6 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
CL->Exit = Exit;
CL->After = After;
- CL->IsValid = true;
-
#ifndef NDEBUG
CL->assertOK();
#endif
@@ -1271,14 +1471,17 @@ void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
CLI->assertOK();
}
-CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
- const LocationDescription &Loc, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ bool NeedsBarrier, Value *Chunk) {
+ assert(CLI->isValid() && "Requires a valid canonical loop");
+
// Set up the source location value for OpenMP runtime.
- if (!updateToLocation(Loc))
- return nullptr;
+ Builder.restoreIP(CLI->getPreheaderIP());
+ Builder.SetCurrentDebugLocation(DL);
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
Value *SrcLoc = getOrCreateIdent(SrcLocStr);
// Declare useful OpenMP runtime functions.
@@ -1308,6 +1511,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
Builder.CreateStore(UpperBound, PUpperBound);
Builder.CreateStore(One, PStride);
+ // FIXME: schedule(static) is NOT the same as schedule(static,1)
if (!Chunk)
Chunk = One;
@@ -1348,19 +1552,21 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
// Add the barrier if requested.
if (NeedsBarrier)
- createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ createBarrier(LocationDescription(Builder.saveIP(), DL),
omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
/* CheckCancelFlag */ false);
- CLI->assertOK();
- return CLI;
+ InsertPointTy AfterIP = CLI->getAfterIP();
+ CLI->invalidate();
+
+ return AfterIP;
}
-CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
- const LocationDescription &Loc, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier) {
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, bool NeedsBarrier) {
// Currently only supports static schedules.
- return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
+ return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
}
/// Returns an LLVM function to call for initializing loop bounds using OpenMP
@@ -1395,14 +1601,15 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
}
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
- const LocationDescription &Loc, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier,
- Value *Chunk) {
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
+ DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+ OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
+ assert(CLI->isValid() && "Requires a valid canonical loop");
+
// Set up the source location value for OpenMP runtime.
- Builder.SetCurrentDebugLocation(Loc.DL);
+ Builder.SetCurrentDebugLocation(DL);
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
Value *SrcLoc = getOrCreateIdent(SrcLocStr);
// Declare useful OpenMP runtime functions.
@@ -1496,11 +1703,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
// Add the barrier if requested.
if (NeedsBarrier) {
Builder.SetInsertPoint(&Exit->back());
- createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ createBarrier(LocationDescription(Builder.saveIP(), DL),
omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
/* CheckCancelFlag */ false);
}
+ CLI->invalidate();
return AfterIP;
}
@@ -1592,6 +1800,8 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
// TODO: Find common/largest indvar type.
Value *CollapsedTripCount = nullptr;
for (CanonicalLoopInfo *L : Loops) {
+ assert(L->isValid() &&
+ "All loops to collapse must be valid canonical loops");
Value *OrigTripCount = L->getTripCount();
if (!CollapsedTripCount) {
CollapsedTripCount = OrigTripCount;
@@ -1680,6 +1890,9 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
+ for (CanonicalLoopInfo *L : Loops)
+ L->invalidate();
+
#ifndef NDEBUG
Result->assertOK();
#endif
@@ -1706,6 +1919,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
// any original CanonicalLoopInfo.
SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
for (CanonicalLoopInfo *L : Loops) {
+ assert(L->isValid() && "All input loops must be valid canonical loops");
OrigTripCounts.push_back(L->getTripCount());
OrigIndVars.push_back(L->getIndVar());
}
@@ -1864,6 +2078,9 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
+ for (CanonicalLoopInfo *L : Loops)
+ L->invalidate();
+
#ifndef NDEBUG
for (CanonicalLoopInfo *GenL : Result)
GenL->assertOK();
@@ -1871,6 +2088,287 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
return Result;
}
+/// Attach loop metadata \p Properties to the loop described by \p Loop. If the
+/// loop already has metadata, the loop properties are appended.
+static void addLoopMetadata(CanonicalLoopInfo *Loop,
+ ArrayRef<Metadata *> Properties) {
+ assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
+
+ // Nothing to do if no property to attach.
+ if (Properties.empty())
+ return;
+
+ LLVMContext &Ctx = Loop->getFunction()->getContext();
+ SmallVector<Metadata *> NewLoopProperties;
+ NewLoopProperties.push_back(nullptr);
+
+ // If the loop already has metadata, prepend it to the new metadata.
+ BasicBlock *Latch = Loop->getLatch();
+ assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
+ MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
+ if (Existing)
+ append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
+
+ append_range(NewLoopProperties, Properties);
+ MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
+ LoopID->replaceOperandWith(0, LoopID);
+
+ Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
+}
+
+void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
+ LLVMContext &Ctx = Builder.getContext();
+ addLoopMetadata(
+ Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
+ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
+}
+
+void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) {
+ LLVMContext &Ctx = Builder.getContext();
+ addLoopMetadata(
+ Loop, {
+ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
+ });
+}
+
+/// Create the TargetMachine object to query the backend for optimization
+/// preferences.
+///
+/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
+/// e.g. Clang does not pass it to its CodeGen layer and creates it only when
+/// needed for the LLVM pass pipline. We use some default options to avoid
+/// having to pass too many settings from the frontend that probably do not
+/// matter.
+///
+/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
+/// method. If we are going to use TargetMachine for more purposes, especially
+/// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
+/// might become be worth requiring front-ends to pass on their TargetMachine,
+/// or at least cache it between methods. Note that while fontends such as Clang
+/// have just a single main TargetMachine per translation unit, "target-cpu" and
+/// "target-features" that determine the TargetMachine are per-function and can
+/// be overrided using __attribute__((target("OPTIONS"))).
+static std::unique_ptr<TargetMachine>
+createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) {
+ Module *M = F->getParent();
+
+ StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
+ StringRef Features = F->getFnAttribute("target-features").getValueAsString();
+ const std::string &Triple = M->getTargetTriple();
+
+ std::string Error;
+ const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+ if (!TheTarget)
+ return {};
+
+ llvm::TargetOptions Options;
+ return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
+ Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
+ OptLevel));
+}
+
+/// Heuristically determine the best-performant unroll factor for \p CLI. This
+/// depends on the target processor. We are re-using the same heuristics as the
+/// LoopUnrollPass.
+static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
+ Function *F = CLI->getFunction();
+
+ // Assume the user requests the most aggressive unrolling, even if the rest of
+ // the code is optimized using a lower setting.
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
+
+ FunctionAnalysisManager FAM;
+ FAM.registerPass([]() { return TargetLibraryAnalysis(); });
+ FAM.registerPass([]() { return AssumptionAnalysis(); });
+ FAM.registerPass([]() { return DominatorTreeAnalysis(); });
+ FAM.registerPass([]() { return LoopAnalysis(); });
+ FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
+ FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
+ TargetIRAnalysis TIRA;
+ if (TM)
+ TIRA = TargetIRAnalysis(
+ [&](const Function &F) { return TM->getTargetTransformInfo(F); });
+ FAM.registerPass([&]() { return TIRA; });
+
+ TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
+ ScalarEvolutionAnalysis SEA;
+ ScalarEvolution &&SE = SEA.run(*F, FAM);
+ DominatorTreeAnalysis DTA;
+ DominatorTree &&DT = DTA.run(*F, FAM);
+ LoopAnalysis LIA;
+ LoopInfo &&LI = LIA.run(*F, FAM);
+ AssumptionAnalysis ACT;
+ AssumptionCache &&AC = ACT.run(*F, FAM);
+ OptimizationRemarkEmitter ORE{F};
+
+ Loop *L = LI.getLoopFor(CLI->getHeader());
+ assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
+
+ TargetTransformInfo::UnrollingPreferences UP =
+ gatherUnrollingPreferences(L, SE, TTI,
+ /*BlockFrequencyInfo=*/nullptr,
+ /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
+ /*UserThreshold=*/None,
+ /*UserCount=*/None,
+ /*UserAllowPartial=*/true,
+ /*UserAllowRuntime=*/true,
+ /*UserUpperBound=*/None,
+ /*UserFullUnrollMaxCount=*/None);
+
+ UP.Force = true;
+
+ // Account for additional optimizations taking place before the LoopUnrollPass
+ // would unroll the loop.
+ UP.Threshold *= UnrollThresholdFactor;
+ UP.PartialThreshold *= UnrollThresholdFactor;
+
+ // Use normal unroll factors even if the rest of the code is optimized for
+ // size.
+ UP.OptSizeThreshold = UP.Threshold;
+ UP.PartialOptSizeThreshold = UP.PartialThreshold;
+
+ LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
+ << " Threshold=" << UP.Threshold << "\n"
+ << " PartialThreshold=" << UP.PartialThreshold << "\n"
+ << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
+ << " PartialOptSizeThreshold="
+ << UP.PartialOptSizeThreshold << "\n");
+
+ // Disable peeling.
+ TargetTransformInfo::PeelingPreferences PP =
+ gatherPeelingPreferences(L, SE, TTI,
+ /*UserAllowPeeling=*/false,
+ /*UserAllowProfileBasedPeeling=*/false,
+ /*UserUnrollingSpecficValues=*/false);
+
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+
+ // Assume that reads and writes to stack variables can be eliminated by
+ // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
+ // size.
+ for (BasicBlock *BB : L->blocks()) {
+ for (Instruction &I : *BB) {
+ Value *Ptr;
+ if (auto *Load = dyn_cast<LoadInst>(&I)) {
+ Ptr = Load->getPointerOperand();
+ } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
+ Ptr = Store->getPointerOperand();
+ } else
+ continue;
+
+ Ptr = Ptr->stripPointerCasts();
+
+ if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
+ if (Alloca->getParent() == &F->getEntryBlock())
+ EphValues.insert(&I);
+ }
+ }
+ }
+
+ unsigned NumInlineCandidates;
+ bool NotDuplicatable;
+ bool Convergent;
+ unsigned LoopSize =
+ ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
+ TTI, EphValues, UP.BEInsns);
+ LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
+
+ // Loop is not unrollable if the loop contains certain instructions.
+ if (NotDuplicatable || Convergent) {
+ LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
+ return 1;
+ }
+
+ // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
+ // be able to use it.
+ int TripCount = 0;
+ int MaxTripCount = 0;
+ bool MaxOrZero = false;
+ unsigned TripMultiple = 0;
+
+ bool UseUpperBound = false;
+ computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
+ MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
+ UseUpperBound);
+ unsigned Factor = UP.Count;
+ LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
+
+ // This function returns 1 to signal to not unroll a loop.
+ if (Factor == 0)
+ return 1;
+ return Factor;
+}
+
+void OpenMPIRBuilder::unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop,
+ int32_t Factor,
+ CanonicalLoopInfo **UnrolledCLI) {
+ assert(Factor >= 0 && "Unroll factor must not be negative");
+
+ Function *F = Loop->getFunction();
+ LLVMContext &Ctx = F->getContext();
+
+ // If the unrolled loop is not used for another loop-associated directive, it
+ // is sufficient to add metadata for the LoopUnrollPass.
+ if (!UnrolledCLI) {
+ SmallVector<Metadata *, 2> LoopMetadata;
+ LoopMetadata.push_back(
+ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
+
+ if (Factor >= 1) {
+ ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
+ LoopMetadata.push_back(MDNode::get(
+ Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
+ }
+
+ addLoopMetadata(Loop, LoopMetadata);
+ return;
+ }
+
+ // Heuristically determine the unroll factor.
+ if (Factor == 0)
+ Factor = computeHeuristicUnrollFactor(Loop);
+
+ // No change required with unroll factor 1.
+ if (Factor == 1) {
+ *UnrolledCLI = Loop;
+ return;
+ }
+
+ assert(Factor >= 2 &&
+ "unrolling only makes sense with a factor of 2 or larger");
+
+ Type *IndVarTy = Loop->getIndVarType();
+
+ // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
+ // unroll the inner loop.
+ Value *FactorVal =
+ ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
+ /*isSigned=*/false));
+ std::vector<CanonicalLoopInfo *> LoopNest =
+ tileLoops(DL, {Loop}, {FactorVal});
+ assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
+ *UnrolledCLI = LoopNest[0];
+ CanonicalLoopInfo *InnerLoop = LoopNest[1];
+
+ // LoopUnrollPass can only fully unroll loops with constant trip count.
+ // Unroll by the unroll factor with a fallback epilog for the remainder
+ // iterations if necessary.
+ ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
+ addLoopMetadata(
+ InnerLoop,
+ {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
+ MDNode::get(
+ Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
+
+#ifndef NDEBUG
+ (*UnrolledCLI)->assertOK();
+#endif
+}
+
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
llvm::Value *BufSize, llvm::Value *CpyBuf,
@@ -1960,6 +2458,74 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
/*Conditional*/ false, /*hasFinalize*/ true);
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, unsigned NumLoops,
+ ArrayRef<llvm::Value *> StoreValues,
+ const Twine &Name, bool IsDependSource) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ // Allocate space for vector and generate alloc instruction.
+ auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
+ ArgsBase->setAlignment(Align(8));
+ Builder.restoreIP(Loc.IP);
+
+ // Store the index value with offset in depend vector.
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
+ ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
+ Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
+ }
+
+ Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
+ ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
+
+ Function *RTLFn = nullptr;
+ if (IsDependSource)
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
+ else
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
+ Builder.CreateCall(RTLFn, Args);
+
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
+ const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB, bool IsThreads) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Directive OMPD = Directive::OMPD_ordered;
+ Instruction *EntryCall = nullptr;
+ Instruction *ExitCall = nullptr;
+
+ if (IsThreads) {
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId};
+
+ Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
+ EntryCall = Builder.CreateCall(EntryRTLFn, Args);
+
+ Function *ExitRTLFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
+ ExitCall = Builder.CreateCall(ExitRTLFn, Args);
+ }
+
+ return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+ /*Conditional*/ false, /*hasFinalize*/ true);
+}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
@@ -2193,25 +2759,30 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
+OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+ bool RequiresFullRuntime) {
if (!updateToLocation(Loc))
return Loc.IP;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
- ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
+ ConstantInt *IsSPMDVal = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(Int8->getContext()),
+ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);
- ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
+ ConstantInt *RequiresFullRuntimeVal =
+ ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
- CallInst *ThreadKind =
- Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
+ CallInst *ThreadKind = Builder.CreateCall(
+ Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
Value *ExecUserCode = Builder.CreateICmpEQ(
- ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
+ ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
+ "exec_user_code");
// ThreadKind = __kmpc_target_init(...)
// if (ThreadKind == -1)
@@ -2241,14 +2812,18 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, b
}
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
- bool IsSPMD, bool RequiresFullRuntime) {
+ bool IsSPMD,
+ bool RequiresFullRuntime) {
if (!updateToLocation(Loc))
return;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
- ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
- ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
+ ConstantInt *IsSPMDVal = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(Int8->getContext()),
+ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+ ConstantInt *RequiresFullRuntimeVal =
+ ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
@@ -2749,7 +3324,8 @@ void CanonicalLoopInfo::collectControlBlocks(
void CanonicalLoopInfo::assertOK() const {
#ifndef NDEBUG
- if (!IsValid)
+ // No constraints if this object currently does not describe a loop.
+ if (!isValid())
return;
// Verify standard control-flow we use for OpenMP loops.
@@ -2835,3 +3411,13 @@ void CanonicalLoopInfo::assertOK() const {
"Exit condition must compare with the trip count");
#endif
}
+
+void CanonicalLoopInfo::invalidate() {
+ Preheader = nullptr;
+ Header = nullptr;
+ Cond = nullptr;
+ Body = nullptr;
+ Latch = nullptr;
+ Exit = nullptr;
+ After = nullptr;
+}
diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp
index 6504e566ba4b..2e41799e13e9 100644
--- a/llvm/lib/IR/AbstractCallSite.cpp
+++ b/llvm/lib/IR/AbstractCallSite.cpp
@@ -121,7 +121,7 @@ AbstractCallSite::AbstractCallSite(const Use *U)
assert(CallbackEncMD->getNumOperands() >= 2 && "Incomplete !callback metadata");
- unsigned NumCallOperands = CB->getNumArgOperands();
+ unsigned NumCallOperands = CB->arg_size();
// Skip the var-arg flag at the end when reading the metadata.
for (unsigned u = 0, e = CallbackEncMD->getNumOperands() - 1; u < e; u++) {
Metadata *OpAsM = CallbackEncMD->getOperand(u).get();
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 69e2d85e58fe..7734c0a8de58 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -44,7 +45,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -72,6 +72,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -554,16 +555,13 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
FunctionType *FTy = cast<FunctionType>(Ty);
print(FTy->getReturnType(), OS);
OS << " (";
- for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I) {
- if (I != FTy->param_begin())
- OS << ", ";
- print(*I, OS);
- }
- if (FTy->isVarArg()) {
- if (FTy->getNumParams()) OS << ", ";
- OS << "...";
+ ListSeparator LS;
+ for (Type *Ty : FTy->params()) {
+ OS << LS;
+ print(Ty, OS);
}
+ if (FTy->isVarArg())
+ OS << LS << "...";
OS << ')';
return;
}
@@ -633,12 +631,11 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
if (STy->getNumElements() == 0) {
OS << "{}";
} else {
- StructType::element_iterator I = STy->element_begin();
OS << "{ ";
- print(*I++, OS);
- for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
- OS << ", ";
- print(*I, OS);
+ ListSeparator LS;
+ for (Type *Ty : STy->elements()) {
+ OS << LS;
+ print(Ty, OS);
}
OS << " }";
@@ -988,7 +985,7 @@ void SlotTracker::processModule() {
// Add all the function attributes to the table.
// FIXME: Add attributes of other objects?
- AttributeSet FnAttrs = F.getAttributes().getFnAttributes();
+ AttributeSet FnAttrs = F.getAttributes().getFnAttrs();
if (FnAttrs.hasAttributes())
CreateAttributeSetSlot(FnAttrs);
}
@@ -1029,7 +1026,7 @@ void SlotTracker::processFunction() {
// target may not be linked into the optimizer.
if (const auto *Call = dyn_cast<CallBase>(&I)) {
// Add all the call attributes to the table.
- AttributeSet Attrs = Call->getAttributes().getFnAttributes();
+ AttributeSet Attrs = Call->getAttributes().getFnAttrs();
if (Attrs.hasAttributes())
CreateAttributeSetSlot(Attrs);
}
@@ -1277,18 +1274,38 @@ void SlotTracker::CreateTypeIdSlot(StringRef Id) {
TypeIdMap[Id] = TypeIdNext++;
}
+namespace {
+/// Common instances used by most of the printer functions.
+struct AsmWriterContext {
+ TypePrinting *TypePrinter = nullptr;
+ SlotTracker *Machine = nullptr;
+ const Module *Context = nullptr;
+
+ AsmWriterContext(TypePrinting *TP, SlotTracker *ST, const Module *M = nullptr)
+ : TypePrinter(TP), Machine(ST), Context(M) {}
+
+ static AsmWriterContext &getEmpty() {
+ static AsmWriterContext EmptyCtx(nullptr, nullptr);
+ return EmptyCtx;
+ }
+
+ /// A callback that will be triggered when the underlying printer
+ /// prints a Metadata as operand.
+ virtual void onWriteMetadataAsOperand(const Metadata *) {}
+
+ virtual ~AsmWriterContext() {}
+};
+} // end anonymous namespace
+
//===----------------------------------------------------------------------===//
// AsmWriter Implementation
//===----------------------------------------------------------------------===//
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context);
+ AsmWriterContext &WriterCtx);
static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context,
+ AsmWriterContext &WriterCtx,
bool FromValue = false);
static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
@@ -1331,9 +1348,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
}
static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
- TypePrinting &TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
if (CI->getType()->isIntegerTy(1)) {
Out << (CI->getZExtValue() ? "true" : "false");
@@ -1442,36 +1457,30 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
Out << "blockaddress(";
- WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, BA->getFunction(), WriterCtx);
Out << ", ";
- WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, BA->getBasicBlock(), WriterCtx);
Out << ")";
return;
}
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV)) {
Out << "dso_local_equivalent ";
- WriteAsOperandInternal(Out, Equiv->getGlobalValue(), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, Equiv->getGlobalValue(), WriterCtx);
return;
}
if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
Type *ETy = CA->getType()->getElementType();
Out << '[';
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(0),
- &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CA->getOperand(0), WriterCtx);
for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
Out << ", ";
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CA->getOperand(i), WriterCtx);
}
Out << ']';
return;
@@ -1489,17 +1498,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Type *ETy = CA->getType()->getElementType();
Out << '[';
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
- &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(0), WriterCtx);
for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
Out << ", ";
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
- Machine, Context);
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(i), WriterCtx);
}
Out << ']';
return;
@@ -1512,19 +1518,17 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
unsigned N = CS->getNumOperands();
if (N) {
Out << ' ';
- TypePrinter.print(CS->getOperand(0)->getType(), Out);
+ WriterCtx.TypePrinter->print(CS->getOperand(0)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CS->getOperand(0), WriterCtx);
for (unsigned i = 1; i < N; i++) {
Out << ", ";
- TypePrinter.print(CS->getOperand(i)->getType(), Out);
+ WriterCtx.TypePrinter->print(CS->getOperand(i)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CS->getOperand(i), WriterCtx);
}
Out << ' ';
}
@@ -1539,16 +1543,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
auto *CVVTy = cast<FixedVectorType>(CV->getType());
Type *ETy = CVVTy->getElementType();
Out << '<';
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
- Machine, Context);
+ WriteAsOperandInternal(Out, CV->getAggregateElement(0U), WriterCtx);
for (unsigned i = 1, e = CVVTy->getNumElements(); i != e; ++i) {
Out << ", ";
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
- Machine, Context);
+ WriteAsOperandInternal(Out, CV->getAggregateElement(i), WriterCtx);
}
Out << '>';
return;
@@ -1584,7 +1586,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Optional<unsigned> InRangeOp;
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
- TypePrinter.print(GEP->getSourceElementType(), Out);
+ WriterCtx.TypePrinter->print(GEP->getSourceElementType(), Out);
Out << ", ";
InRangeOp = GEP->getInRangeIndex();
if (InRangeOp)
@@ -1594,9 +1596,9 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
if (InRangeOp && unsigned(OI - CE->op_begin()) == *InRangeOp)
Out << "inrange ";
- TypePrinter.print((*OI)->getType(), Out);
+ WriterCtx.TypePrinter->print((*OI)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, *OI, WriterCtx);
if (OI+1 != CE->op_end())
Out << ", ";
}
@@ -1609,7 +1611,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
if (CE->isCast()) {
Out << " to ";
- TypePrinter.print(CE->getType(), Out);
+ WriterCtx.TypePrinter->print(CE->getType(), Out);
}
if (CE->getOpcode() == Instruction::ShuffleVector)
@@ -1623,8 +1625,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
}
static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!{";
for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
const Metadata *MD = Node->getOperand(mi);
@@ -1632,11 +1633,12 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
Out << "null";
else if (auto *MDV = dyn_cast<ValueAsMetadata>(MD)) {
Value *V = MDV->getValue();
- TypePrinter->print(V->getType(), Out);
+ WriterCtx.TypePrinter->print(V->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, V, TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, V, WriterCtx);
} else {
- WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, MD, WriterCtx);
+ WriterCtx.onWriteMetadataAsOperand(MD);
}
if (mi + 1 != me)
Out << ", ";
@@ -1665,15 +1667,12 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
struct MDFieldPrinter {
raw_ostream &Out;
FieldSeparator FS;
- TypePrinting *TypePrinter = nullptr;
- SlotTracker *Machine = nullptr;
- const Module *Context = nullptr;
+ AsmWriterContext &WriterCtx;
- explicit MDFieldPrinter(raw_ostream &Out) : Out(Out) {}
- MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context)
- : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
- }
+ explicit MDFieldPrinter(raw_ostream &Out)
+ : Out(Out), WriterCtx(AsmWriterContext::getEmpty()) {}
+ MDFieldPrinter(raw_ostream &Out, AsmWriterContext &Ctx)
+ : Out(Out), WriterCtx(Ctx) {}
void printTag(const DINode *N);
void printMacinfoType(const DIMacroNode *N);
@@ -1734,14 +1733,13 @@ void MDFieldPrinter::printString(StringRef Name, StringRef Value,
}
static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
if (!MD) {
Out << "null";
return;
}
- WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, MD, WriterCtx);
+ WriterCtx.onWriteMetadataAsOperand(MD);
}
void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD,
@@ -1750,7 +1748,7 @@ void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD,
return;
Out << FS << Name << ": ";
- writeMetadataAsOperand(Out, MD, TypePrinter, Machine, Context);
+ writeMetadataAsOperand(Out, MD, WriterCtx);
}
template <class IntTy>
@@ -1763,7 +1761,7 @@ void MDFieldPrinter::printInt(StringRef Name, IntTy Int, bool ShouldSkipZero) {
void MDFieldPrinter::printAPInt(StringRef Name, const APInt &Int,
bool IsUnsigned, bool ShouldSkipZero) {
- if (ShouldSkipZero && Int.isNullValue())
+ if (ShouldSkipZero && Int.isZero())
return;
Out << FS << Name << ": ";
@@ -1847,10 +1845,9 @@ void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
}
static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!GenericDINode(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("header", N->getHeader());
if (N->getNumDwarfOperands()) {
@@ -1858,7 +1855,7 @@ static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
FieldSeparator IFS;
for (auto &I : N->dwarf_operands()) {
Out << IFS;
- writeMetadataAsOperand(Out, I, TypePrinter, Machine, Context);
+ writeMetadataAsOperand(Out, I, WriterCtx);
}
Out << "}";
}
@@ -1866,10 +1863,9 @@ static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
}
static void writeDILocation(raw_ostream &Out, const DILocation *DL,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILocation(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
// Always output the line, since 0 is a relevant and important value for it.
Printer.printInt("line", DL->getLine(), /* ShouldSkipZero */ false);
Printer.printInt("column", DL->getColumn());
@@ -1881,10 +1877,9 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL,
}
static void writeDISubrange(raw_ostream &Out, const DISubrange *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DISubrange(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
auto *Count = N->getRawCountNode();
if (auto *CE = dyn_cast_or_null<ConstantAsMetadata>(Count)) {
@@ -1923,18 +1918,15 @@ static void writeDISubrange(raw_ostream &Out, const DISubrange *N,
}
static void writeDIGenericSubrange(raw_ostream &Out, const DIGenericSubrange *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIGenericSubrange(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
auto IsConstant = [&](Metadata *Bound) -> bool {
if (auto *BE = dyn_cast_or_null<DIExpression>(Bound)) {
- return BE->isConstant()
- ? DIExpression::SignedOrUnsignedConstant::SignedConstant ==
- *BE->isConstant()
- : false;
+ return BE->isConstant() &&
+ DIExpression::SignedOrUnsignedConstant::SignedConstant ==
+ *BE->isConstant();
}
return false;
};
@@ -1977,7 +1969,7 @@ static void writeDIGenericSubrange(raw_ostream &Out, const DIGenericSubrange *N,
}
static void writeDIEnumerator(raw_ostream &Out, const DIEnumerator *N,
- TypePrinting *, SlotTracker *, const Module *) {
+ AsmWriterContext &) {
Out << "!DIEnumerator(";
MDFieldPrinter Printer(Out);
Printer.printString("name", N->getName(), /* ShouldSkipEmpty */ false);
@@ -1989,7 +1981,7 @@ static void writeDIEnumerator(raw_ostream &Out, const DIEnumerator *N,
}
static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
- TypePrinting *, SlotTracker *, const Module *) {
+ AsmWriterContext &) {
Out << "!DIBasicType(";
MDFieldPrinter Printer(Out);
if (N->getTag() != dwarf::DW_TAG_base_type)
@@ -2004,10 +1996,9 @@ static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
}
static void writeDIStringType(raw_ostream &Out, const DIStringType *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIStringType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
if (N->getTag() != dwarf::DW_TAG_string_type)
Printer.printTag(N);
Printer.printString("name", N->getName());
@@ -2021,10 +2012,9 @@ static void writeDIStringType(raw_ostream &Out, const DIStringType *N,
}
static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIDerivedType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope());
@@ -2040,14 +2030,14 @@ static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
if (const auto &DWARFAddressSpace = N->getDWARFAddressSpace())
Printer.printInt("dwarfAddressSpace", *DWARFAddressSpace,
/* ShouldSkipZero */ false);
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDICompositeType(raw_ostream &Out, const DICompositeType *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DICompositeType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope());
@@ -2073,14 +2063,14 @@ static void writeDICompositeType(raw_ostream &Out, const DICompositeType *N,
/* ShouldSkipZero */ false);
else
Printer.printMetadata("rank", N->getRawRank(), /*ShouldSkipNull */ true);
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDISubroutineType(raw_ostream &Out, const DISubroutineType *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DISubroutineType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printDIFlags("flags", N->getFlags());
Printer.printDwarfEnum("cc", N->getCC(), dwarf::ConventionString);
Printer.printMetadata("types", N->getRawTypeArray(),
@@ -2088,8 +2078,7 @@ static void writeDISubroutineType(raw_ostream &Out, const DISubroutineType *N,
Out << ")";
}
-static void writeDIFile(raw_ostream &Out, const DIFile *N, TypePrinting *,
- SlotTracker *, const Module *) {
+static void writeDIFile(raw_ostream &Out, const DIFile *N, AsmWriterContext &) {
Out << "!DIFile(";
MDFieldPrinter Printer(Out);
Printer.printString("filename", N->getFilename(),
@@ -2105,10 +2094,9 @@ static void writeDIFile(raw_ostream &Out, const DIFile *N, TypePrinting *,
}
static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DICompileUnit(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printDwarfEnum("language", N->getSourceLanguage(),
dwarf::LanguageString, /* ShouldSkipZero */ false);
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
@@ -2136,10 +2124,9 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
}
static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DISubprogram(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printString("linkageName", N->getLinkageName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
@@ -2159,14 +2146,14 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
Printer.printMetadata("declaration", N->getRawDeclaration());
Printer.printMetadata("retainedNodes", N->getRawRetainedNodes());
Printer.printMetadata("thrownTypes", N->getRawThrownTypes());
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDILexicalBlock(raw_ostream &Out, const DILexicalBlock *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILexicalBlock(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
@@ -2176,11 +2163,9 @@ static void writeDILexicalBlock(raw_ostream &Out, const DILexicalBlock *N,
static void writeDILexicalBlockFile(raw_ostream &Out,
const DILexicalBlockFile *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILexicalBlockFile(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("discriminator", N->getDiscriminator(),
@@ -2189,10 +2174,9 @@ static void writeDILexicalBlockFile(raw_ostream &Out,
}
static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DINamespace(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printBool("exportSymbols", N->getExportSymbols(), false);
@@ -2200,10 +2184,9 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
}
static void writeDICommonBlock(raw_ostream &Out, const DICommonBlock *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DICommonBlock(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), false);
Printer.printMetadata("declaration", N->getRawDecl(), false);
Printer.printString("name", N->getName());
@@ -2213,10 +2196,9 @@ static void writeDICommonBlock(raw_ostream &Out, const DICommonBlock *N,
}
static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIMacro(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMacinfoType(N);
Printer.printInt("line", N->getLine());
Printer.printString("name", N->getName());
@@ -2225,10 +2207,9 @@ static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
}
static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIMacroFile(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printInt("line", N->getLine());
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
Printer.printMetadata("nodes", N->getRawElements());
@@ -2236,10 +2217,9 @@ static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N,
}
static void writeDIModule(raw_ostream &Out, const DIModule *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIModule(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printString("name", N->getName());
Printer.printString("configMacros", N->getConfigurationMacros());
@@ -2251,14 +2231,11 @@ static void writeDIModule(raw_ostream &Out, const DIModule *N,
Out << ")";
}
-
static void writeDITemplateTypeParameter(raw_ostream &Out,
const DITemplateTypeParameter *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DITemplateTypeParameter(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printMetadata("type", N->getRawType(), /* ShouldSkipNull */ false);
Printer.printBool("defaulted", N->isDefault(), /* Default= */ false);
@@ -2267,11 +2244,9 @@ static void writeDITemplateTypeParameter(raw_ostream &Out,
static void writeDITemplateValueParameter(raw_ostream &Out,
const DITemplateValueParameter *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DITemplateValueParameter(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
if (N->getTag() != dwarf::DW_TAG_template_value_parameter)
Printer.printTag(N);
Printer.printString("name", N->getName());
@@ -2282,10 +2257,9 @@ static void writeDITemplateValueParameter(raw_ostream &Out,
}
static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIGlobalVariable(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printString("linkageName", N->getLinkageName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
@@ -2297,14 +2271,14 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
Printer.printMetadata("templateParams", N->getRawTemplateParams());
Printer.printInt("align", N->getAlignInBits());
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILocalVariable(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printInt("arg", N->getArg());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
@@ -2313,14 +2287,14 @@ static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
Printer.printMetadata("type", N->getRawType());
Printer.printDIFlags("flags", N->getFlags());
Printer.printInt("align", N->getAlignInBits());
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDILabel(raw_ostream &Out, const DILabel *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILabel(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printString("name", N->getName());
Printer.printMetadata("file", N->getRawFile());
@@ -2329,8 +2303,7 @@ static void writeDILabel(raw_ostream &Out, const DILabel *N,
}
static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIExpression(";
FieldSeparator FS;
if (N->isValid()) {
@@ -2355,37 +2328,34 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
}
static void writeDIArgList(raw_ostream &Out, const DIArgList *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context, bool FromValue = false) {
+ AsmWriterContext &WriterCtx,
+ bool FromValue = false) {
assert(FromValue &&
"Unexpected DIArgList metadata outside of value argument");
Out << "!DIArgList(";
FieldSeparator FS;
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
for (Metadata *Arg : N->getArgs()) {
Out << FS;
- WriteAsOperandInternal(Out, Arg, TypePrinter, Machine, Context, true);
+ WriteAsOperandInternal(Out, Arg, WriterCtx, true);
}
Out << ")";
}
static void writeDIGlobalVariableExpression(raw_ostream &Out,
const DIGlobalVariableExpression *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIGlobalVariableExpression(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("var", N->getVariable());
Printer.printMetadata("expr", N->getExpression());
Out << ")";
}
static void writeDIObjCProperty(raw_ostream &Out, const DIObjCProperty *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIObjCProperty(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
@@ -2397,23 +2367,21 @@ static void writeDIObjCProperty(raw_ostream &Out, const DIObjCProperty *N,
}
static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIImportedEntity(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("entity", N->getRawEntity());
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
+ Printer.printMetadata("elements", N->getRawElements());
Out << ")";
}
static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &Ctx) {
if (Node->isDistinct())
Out << "distinct ";
else if (Node->isTemporary())
@@ -2424,7 +2392,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
llvm_unreachable("Expected uniquable MDNode");
#define HANDLE_MDNODE_LEAF(CLASS) \
case Metadata::CLASS##Kind: \
- write##CLASS(Out, cast<CLASS>(Node), TypePrinter, Machine, Context); \
+ write##CLASS(Out, cast<CLASS>(Node), Ctx); \
break;
#include "llvm/IR/Metadata.def"
}
@@ -2433,9 +2401,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
// Full implementation of printing a Value as an operand with support for
// TypePrinting, etc.
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
if (V->hasName()) {
PrintLLVMName(Out, V);
return;
@@ -2443,8 +2409,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
const Constant *CV = dyn_cast<Constant>(V);
if (CV && !isa<GlobalValue>(CV)) {
- assert(TypePrinter && "Constants require TypePrinting!");
- WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+ assert(WriterCtx.TypePrinter && "Constants require TypePrinting!");
+ WriteConstantInternal(Out, CV, WriterCtx);
return;
}
@@ -2468,13 +2434,14 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
}
if (auto *MD = dyn_cast<MetadataAsValue>(V)) {
- WriteAsOperandInternal(Out, MD->getMetadata(), TypePrinter, Machine,
- Context, /* FromValue */ true);
+ WriteAsOperandInternal(Out, MD->getMetadata(), WriterCtx,
+ /* FromValue */ true);
return;
}
char Prefix = '%';
int Slot;
+ auto *Machine = WriterCtx.Machine;
// If we have a SlotTracker, use it.
if (Machine) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
@@ -2513,30 +2480,30 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
}
static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context,
+ AsmWriterContext &WriterCtx,
bool FromValue) {
// Write DIExpressions and DIArgLists inline when used as a value. Improves
// readability of debug info intrinsics.
if (const DIExpression *Expr = dyn_cast<DIExpression>(MD)) {
- writeDIExpression(Out, Expr, TypePrinter, Machine, Context);
+ writeDIExpression(Out, Expr, WriterCtx);
return;
}
if (const DIArgList *ArgList = dyn_cast<DIArgList>(MD)) {
- writeDIArgList(Out, ArgList, TypePrinter, Machine, Context, FromValue);
+ writeDIArgList(Out, ArgList, WriterCtx, FromValue);
return;
}
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
std::unique_ptr<SlotTracker> MachineStorage;
- if (!Machine) {
- MachineStorage = std::make_unique<SlotTracker>(Context);
- Machine = MachineStorage.get();
+ SaveAndRestore<SlotTracker *> SARMachine(WriterCtx.Machine);
+ if (!WriterCtx.Machine) {
+ MachineStorage = std::make_unique<SlotTracker>(WriterCtx.Context);
+ WriterCtx.Machine = MachineStorage.get();
}
- int Slot = Machine->getMetadataSlot(N);
+ int Slot = WriterCtx.Machine->getMetadataSlot(N);
if (Slot == -1) {
if (const DILocation *Loc = dyn_cast<DILocation>(N)) {
- writeDILocation(Out, Loc, TypePrinter, Machine, Context);
+ writeDILocation(Out, Loc, WriterCtx);
return;
}
// Give the pointer value instead of "badref", since this comes up all
@@ -2555,13 +2522,13 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
}
auto *V = cast<ValueAsMetadata>(MD);
- assert(TypePrinter && "TypePrinter required for metadata values");
+ assert(WriterCtx.TypePrinter && "TypePrinter required for metadata values");
assert((FromValue || !isa<LocalAsMetadata>(V)) &&
"Unexpected function-local metadata outside of value argument");
- TypePrinter->print(V->getValue()->getType(), Out);
+ WriterCtx.TypePrinter->print(V->getValue()->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, V->getValue(), WriterCtx);
}
namespace {
@@ -2592,6 +2559,10 @@ public:
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug);
+ AsmWriterContext getContext() {
+ return AsmWriterContext(&TypePrinter, &Machine, TheModule);
+ }
+
void printMDNodeBody(const MDNode *MD);
void printNamedMDNode(const NamedMDNode *NMD);
@@ -2618,7 +2589,8 @@ public:
void printTypeIdentities();
void printGlobal(const GlobalVariable *GV);
- void printIndirectSymbol(const GlobalIndirectSymbol *GIS);
+ void printAlias(const GlobalAlias *GA);
+ void printIFunc(const GlobalIFunc *GI);
void printComdat(const Comdat *C);
void printFunction(const Function *F);
void printArgument(const Argument *FA, AttributeSet Attrs);
@@ -2693,7 +2665,8 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
TypePrinter.print(Operand->getType(), Out);
Out << ' ';
}
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+ auto WriterCtx = getContext();
+ WriteAsOperandInternal(Out, Operand, WriterCtx);
}
void AssemblyWriter::writeSyncScope(const LLVMContext &Context,
@@ -2752,7 +2725,8 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
}
Out << ' ';
// Print the operand
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+ auto WriterCtx = getContext();
+ WriteAsOperandInternal(Out, Operand, WriterCtx);
}
void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
@@ -2776,6 +2750,7 @@ void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
Out << '(';
bool FirstInput = true;
+ auto WriterCtx = getContext();
for (const auto &Input : BU.Inputs) {
if (!FirstInput)
Out << ", ";
@@ -2783,7 +2758,7 @@ void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
TypePrinter.print(Input->getType(), Out);
Out << " ";
- WriteAsOperandInternal(Out, Input, &TypePrinter, &Machine, TheModule);
+ WriteAsOperandInternal(Out, Input, WriterCtx);
}
Out << ')';
@@ -2853,12 +2828,12 @@ void AssemblyWriter::printModule(const Module *M) {
// Output all aliases.
if (!M->alias_empty()) Out << "\n";
for (const GlobalAlias &GA : M->aliases())
- printIndirectSymbol(&GA);
+ printAlias(&GA);
// Output all ifuncs.
if (!M->ifunc_empty()) Out << "\n";
for (const GlobalIFunc &GI : M->ifuncs())
- printIndirectSymbol(&GI);
+ printIFunc(&GI);
// Output all of the functions.
for (const Function &F : *M) {
@@ -3198,19 +3173,9 @@ static const char *getVisibilityName(GlobalValue::VisibilityTypes Vis) {
void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
Out << ", insts: " << FS->instCount();
+ if (FS->fflags().anyFlagSet())
+ Out << ", " << FS->fflags();
- FunctionSummary::FFlags FFlags = FS->fflags();
- if (FFlags.ReadNone | FFlags.ReadOnly | FFlags.NoRecurse |
- FFlags.ReturnDoesNotAlias | FFlags.NoInline | FFlags.AlwaysInline) {
- Out << ", funcFlags: (";
- Out << "readNone: " << FFlags.ReadNone;
- Out << ", readOnly: " << FFlags.ReadOnly;
- Out << ", noRecurse: " << FFlags.NoRecurse;
- Out << ", returnDoesNotAlias: " << FFlags.ReturnDoesNotAlias;
- Out << ", noInline: " << FFlags.NoInline;
- Out << ", alwaysInline: " << FFlags.AlwaysInline;
- Out << ")";
- }
if (!FS->calls().empty()) {
Out << ", calls: (";
FieldSeparator IFS;
@@ -3453,7 +3418,7 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
assert(!isa<DIArgList>(Op) &&
"DIArgLists should not appear in NamedMDNodes");
if (auto *Expr = dyn_cast<DIExpression>(Op)) {
- writeDIExpression(Out, Expr, nullptr, nullptr, nullptr);
+ writeDIExpression(Out, Expr, AsmWriterContext::getEmpty());
continue;
}
@@ -3544,7 +3509,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
if (GV->isMaterializable())
Out << "; Materializable\n";
- WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent());
+ WriteAsOperandInternal(Out, GV, WriterCtx);
Out << " = ";
if (!GV->hasInitializer() && GV->hasExternalLinkage())
@@ -3596,49 +3562,76 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printInfoComment(*GV);
}
-void AssemblyWriter::printIndirectSymbol(const GlobalIndirectSymbol *GIS) {
- if (GIS->isMaterializable())
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+ if (GA->isMaterializable())
Out << "; Materializable\n";
- WriteAsOperandInternal(Out, GIS, &TypePrinter, &Machine, GIS->getParent());
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, GA->getParent());
+ WriteAsOperandInternal(Out, GA, WriterCtx);
Out << " = ";
- Out << getLinkageNameWithSpace(GIS->getLinkage());
- PrintDSOLocation(*GIS, Out);
- PrintVisibility(GIS->getVisibility(), Out);
- PrintDLLStorageClass(GIS->getDLLStorageClass(), Out);
- PrintThreadLocalModel(GIS->getThreadLocalMode(), Out);
- StringRef UA = getUnnamedAddrEncoding(GIS->getUnnamedAddr());
+ Out << getLinkageNameWithSpace(GA->getLinkage());
+ PrintDSOLocation(*GA, Out);
+ PrintVisibility(GA->getVisibility(), Out);
+ PrintDLLStorageClass(GA->getDLLStorageClass(), Out);
+ PrintThreadLocalModel(GA->getThreadLocalMode(), Out);
+ StringRef UA = getUnnamedAddrEncoding(GA->getUnnamedAddr());
if (!UA.empty())
Out << UA << ' ';
- if (isa<GlobalAlias>(GIS))
- Out << "alias ";
- else if (isa<GlobalIFunc>(GIS))
- Out << "ifunc ";
- else
- llvm_unreachable("Not an alias or ifunc!");
-
- TypePrinter.print(GIS->getValueType(), Out);
+ Out << "alias ";
+ TypePrinter.print(GA->getValueType(), Out);
Out << ", ";
- const Constant *IS = GIS->getIndirectSymbol();
-
- if (!IS) {
- TypePrinter.print(GIS->getType(), Out);
+ if (const Constant *Aliasee = GA->getAliasee()) {
+ writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
+ } else {
+ TypePrinter.print(GA->getType(), Out);
Out << " <<NULL ALIASEE>>";
+ }
+
+ if (GA->hasPartition()) {
+ Out << ", partition \"";
+ printEscapedString(GA->getPartition(), Out);
+ Out << '"';
+ }
+
+ printInfoComment(*GA);
+ Out << '\n';
+}
+
+void AssemblyWriter::printIFunc(const GlobalIFunc *GI) {
+ if (GI->isMaterializable())
+ Out << "; Materializable\n";
+
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, GI->getParent());
+ WriteAsOperandInternal(Out, GI, WriterCtx);
+ Out << " = ";
+
+ Out << getLinkageNameWithSpace(GI->getLinkage());
+ PrintDSOLocation(*GI, Out);
+ PrintVisibility(GI->getVisibility(), Out);
+
+ Out << "ifunc ";
+
+ TypePrinter.print(GI->getValueType(), Out);
+ Out << ", ";
+
+ if (const Constant *Resolver = GI->getResolver()) {
+ writeOperand(Resolver, !isa<ConstantExpr>(Resolver));
} else {
- writeOperand(IS, !isa<ConstantExpr>(IS));
+ TypePrinter.print(GI->getType(), Out);
+ Out << " <<NULL RESOLVER>>";
}
- if (GIS->hasPartition()) {
+ if (GI->hasPartition()) {
Out << ", partition \"";
- printEscapedString(GIS->getPartition(), Out);
+ printEscapedString(GI->getPartition(), Out);
Out << '"';
}
- printInfoComment(*GIS);
+ printInfoComment(*GI);
Out << '\n';
}
@@ -3683,8 +3676,8 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << "; Materializable\n";
const AttributeList &Attrs = F->getAttributes();
- if (Attrs.hasAttributes(AttributeList::FunctionIndex)) {
- AttributeSet AS = Attrs.getFnAttributes();
+ if (Attrs.hasFnAttrs()) {
+ AttributeSet AS = Attrs.getFnAttrs();
std::string AttrStr;
for (const Attribute &Attr : AS) {
@@ -3721,11 +3714,12 @@ void AssemblyWriter::printFunction(const Function *F) {
}
FunctionType *FT = F->getFunctionType();
- if (Attrs.hasAttributes(AttributeList::ReturnIndex))
+ if (Attrs.hasRetAttrs())
Out << Attrs.getAsString(AttributeList::ReturnIndex) << ' ';
TypePrinter.print(F->getReturnType(), Out);
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, F->getParent());
Out << ' ';
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+ WriteAsOperandInternal(Out, F, WriterCtx);
Out << '(';
// Loop over the arguments, printing them...
@@ -3738,7 +3732,7 @@ void AssemblyWriter::printFunction(const Function *F) {
// Output type...
TypePrinter.print(FT->getParamType(I), Out);
- AttributeSet ArgAttrs = Attrs.getParamAttributes(I);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
if (ArgAttrs.hasAttributes()) {
Out << ' ';
writeAttributeSet(ArgAttrs);
@@ -3750,7 +3744,7 @@ void AssemblyWriter::printFunction(const Function *F) {
// Insert commas as we go... the first arg doesn't get a comma
if (Arg.getArgNo() != 0)
Out << ", ";
- printArgument(&Arg, Attrs.getParamAttributes(Arg.getArgNo()));
+ printArgument(&Arg, Attrs.getParamAttrs(Arg.getArgNo()));
}
}
@@ -3770,8 +3764,8 @@ void AssemblyWriter::printFunction(const Function *F) {
if (F->getAddressSpace() != 0 || !Mod ||
Mod->getDataLayout().getProgramAddressSpace() != 0)
Out << " addrspace(" << F->getAddressSpace() << ")";
- if (Attrs.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
+ if (Attrs.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttrs());
if (F->hasSection()) {
Out << " section \"";
printEscapedString(F->getSection(), Out);
@@ -4127,7 +4121,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Type *RetTy = FTy->getReturnType();
const AttributeList &PAL = CI->getAttributes();
- if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ if (PAL.hasRetAttrs())
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// Only print addrspace(N) if necessary:
@@ -4142,10 +4136,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
- for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+ for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) {
if (op > 0)
Out << ", ";
- writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op));
}
// Emit an ellipsis if this is a musttail call in a vararg function. This
@@ -4156,8 +4150,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ...";
Out << ')';
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ if (PAL.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttrs());
writeOperandBundles(CI);
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
@@ -4172,7 +4166,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
PrintCallingConv(II->getCallingConv(), Out);
}
- if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ if (PAL.hasRetAttrs())
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// Only print addrspace(N) if necessary:
@@ -4187,15 +4181,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
- for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+ for (unsigned op = 0, Eop = II->arg_size(); op < Eop; ++op) {
if (op)
Out << ", ";
- writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(II->getArgOperand(op), PAL.getParamAttrs(op));
}
Out << ')';
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ if (PAL.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttrs());
writeOperandBundles(II);
@@ -4215,7 +4209,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
PrintCallingConv(CBI->getCallingConv(), Out);
}
- if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ if (PAL.hasRetAttrs())
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// If possible, print out the short form of the callbr instruction. We can
@@ -4227,15 +4221,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
- for (unsigned op = 0, Eop = CBI->getNumArgOperands(); op < Eop; ++op) {
+ for (unsigned op = 0, Eop = CBI->arg_size(); op < Eop; ++op) {
if (op)
Out << ", ";
- writeParamOperand(CBI->getArgOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(CBI->getArgOperand(op), PAL.getParamAttrs(op));
}
Out << ')';
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ if (PAL.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttrs());
writeOperandBundles(CBI);
@@ -4375,6 +4369,7 @@ void AssemblyWriter::printMetadataAttachments(
if (MDNames.empty())
MDs[0].second->getContext().getMDKindNames(MDNames);
+ auto WriterCtx = getContext();
for (const auto &I : MDs) {
unsigned Kind = I.first;
Out << Separator;
@@ -4384,7 +4379,7 @@ void AssemblyWriter::printMetadataAttachments(
} else
Out << "!<unknown kind #" << Kind << ">";
Out << ' ';
- WriteAsOperandInternal(Out, I.second, &TypePrinter, &Machine, TheModule);
+ WriteAsOperandInternal(Out, I.second, WriterCtx);
}
}
@@ -4406,7 +4401,8 @@ void AssemblyWriter::writeAllMDNodes() {
}
void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
- WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+ auto WriterCtx = getContext();
+ WriteMDNodeBodyInternal(Out, Node, WriterCtx);
}
void AssemblyWriter::writeAttribute(const Attribute &Attr, bool InAttrGroup) {
@@ -4626,15 +4622,20 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST,
W.printGlobal(V);
else if (const Function *F = dyn_cast<Function>(GV))
W.printFunction(F);
+ else if (const GlobalAlias *A = dyn_cast<GlobalAlias>(GV))
+ W.printAlias(A);
+ else if (const GlobalIFunc *I = dyn_cast<GlobalIFunc>(GV))
+ W.printIFunc(I);
else
- W.printIndirectSymbol(cast<GlobalIndirectSymbol>(GV));
+ llvm_unreachable("Unknown GlobalValue to print out!");
} else if (const MetadataAsValue *V = dyn_cast<MetadataAsValue>(this)) {
V->getMetadata()->print(ROS, MST, getModuleFromVal(V));
} else if (const Constant *C = dyn_cast<Constant>(this)) {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
OS << ' ';
- WriteConstantInternal(OS, C, TypePrinter, MST.getMachine(), nullptr);
+ AsmWriterContext WriterCtx(&TypePrinter, MST.getMachine());
+ WriteConstantInternal(OS, C, WriterCtx);
} else if (isa<InlineAsm>(this) || isa<Argument>(this)) {
this->printAsOperand(OS, /* PrintType */ true, MST);
} else {
@@ -4649,7 +4650,8 @@ static bool printWithoutType(const Value &V, raw_ostream &O,
SlotTracker *Machine, const Module *M) {
if (V.hasName() || isa<GlobalValue>(V) ||
(!isa<Constant>(V) && !isa<MetadataAsValue>(V))) {
- WriteAsOperandInternal(O, &V, nullptr, Machine, M);
+ AsmWriterContext WriterCtx(nullptr, Machine, M);
+ WriteAsOperandInternal(O, &V, WriterCtx);
return true;
}
return false;
@@ -4663,8 +4665,8 @@ static void printAsOperandImpl(const Value &V, raw_ostream &O, bool PrintType,
O << ' ';
}
- WriteAsOperandInternal(O, &V, &TypePrinter, MST.getMachine(),
- MST.getModule());
+ AsmWriterContext WriterCtx(&TypePrinter, MST.getMachine(), MST.getModule());
+ WriteAsOperandInternal(O, &V, WriterCtx);
}
void Value::printAsOperand(raw_ostream &O, bool PrintType,
@@ -4691,22 +4693,87 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType,
printAsOperandImpl(*this, O, PrintType, MST);
}
+/// Recursive version of printMetadataImpl.
+static void printMetadataImplRec(raw_ostream &ROS, const Metadata &MD,
+ AsmWriterContext &WriterCtx) {
+ formatted_raw_ostream OS(ROS);
+ WriteAsOperandInternal(OS, &MD, WriterCtx, /* FromValue */ true);
+
+ auto *N = dyn_cast<MDNode>(&MD);
+ if (!N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
+ return;
+
+ OS << " = ";
+ WriteMDNodeBodyInternal(OS, N, WriterCtx);
+}
+
+namespace {
+struct MDTreeAsmWriterContext : public AsmWriterContext {
+ unsigned Level;
+ // {Level, Printed string}
+ using EntryTy = std::pair<unsigned, std::string>;
+ SmallVector<EntryTy, 4> Buffer;
+
+ // Used to break the cycle in case there is any.
+ SmallPtrSet<const Metadata *, 4> Visited;
+
+ raw_ostream &MainOS;
+
+ MDTreeAsmWriterContext(TypePrinting *TP, SlotTracker *ST, const Module *M,
+ raw_ostream &OS, const Metadata *InitMD)
+ : AsmWriterContext(TP, ST, M), Level(0U), Visited({InitMD}), MainOS(OS) {}
+
+ void onWriteMetadataAsOperand(const Metadata *MD) override {
+ if (Visited.count(MD))
+ return;
+ Visited.insert(MD);
+
+ std::string Str;
+ raw_string_ostream SS(Str);
+ ++Level;
+ // A placeholder entry to memorize the correct
+ // position in buffer.
+ Buffer.emplace_back(std::make_pair(Level, ""));
+ unsigned InsertIdx = Buffer.size() - 1;
+
+ printMetadataImplRec(SS, *MD, *this);
+ Buffer[InsertIdx].second = std::move(SS.str());
+ --Level;
+ }
+
+ ~MDTreeAsmWriterContext() {
+ for (const auto &Entry : Buffer) {
+ MainOS << "\n";
+ unsigned NumIndent = Entry.first * 2U;
+ MainOS.indent(NumIndent) << Entry.second;
+ }
+ }
+};
+} // end anonymous namespace
+
static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
ModuleSlotTracker &MST, const Module *M,
- bool OnlyAsOperand) {
+ bool OnlyAsOperand, bool PrintAsTree = false) {
formatted_raw_ostream OS(ROS);
TypePrinting TypePrinter(M);
- WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M,
- /* FromValue */ true);
+ std::unique_ptr<AsmWriterContext> WriterCtx;
+ if (PrintAsTree && !OnlyAsOperand)
+ WriterCtx = std::make_unique<MDTreeAsmWriterContext>(
+ &TypePrinter, MST.getMachine(), M, OS, &MD);
+ else
+ WriterCtx =
+ std::make_unique<AsmWriterContext>(&TypePrinter, MST.getMachine(), M);
+
+ WriteAsOperandInternal(OS, &MD, *WriterCtx, /* FromValue */ true);
auto *N = dyn_cast<MDNode>(&MD);
if (OnlyAsOperand || !N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
return;
OS << " = ";
- WriteMDNodeBodyInternal(OS, N, &TypePrinter, MST.getMachine(), M);
+ WriteMDNodeBodyInternal(OS, N, *WriterCtx);
}
void Metadata::printAsOperand(raw_ostream &OS, const Module *M) const {
@@ -4730,6 +4797,18 @@ void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST,
printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
}
+void MDNode::printTree(raw_ostream &OS, const Module *M) const {
+ ModuleSlotTracker MST(M, true);
+ printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false,
+ /*PrintAsTree=*/true);
+}
+
+void MDNode::printTree(raw_ostream &OS, ModuleSlotTracker &MST,
+ const Module *M) const {
+ printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false,
+ /*PrintAsTree=*/true);
+}
+
void ModuleSummaryIndex::print(raw_ostream &ROS, bool IsForDebug) const {
SlotTracker SlotTable(this);
formatted_raw_ostream OS(ROS);
@@ -4781,6 +4860,15 @@ void Metadata::dump(const Module *M) const {
dbgs() << '\n';
}
+LLVM_DUMP_METHOD
+void MDNode::dumpTree() const { dumpTree(nullptr); }
+
+LLVM_DUMP_METHOD
+void MDNode::dumpTree(const Module *M) const {
+ printTree(dbgs(), M);
+ dbgs() << '\n';
+}
+
// Allow printing of ModuleSummaryIndex from the debugger.
LLVM_DUMP_METHOD
void ModuleSummaryIndex::dump() const { print(dbgs(), /*IsForDebug=*/true); }
diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index 6498114cd60d..3d24ae062841 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -6,17 +6,23 @@
//
//===----------------------------------------------------------------------===//
//
+// This file implements helper functions for accessing assumption infomration
+// inside of the "llvm.assume" metadata.
+//
//===----------------------------------------------------------------------===//
#include "llvm/IR/Assumptions.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
using namespace llvm;
-bool llvm::hasAssumption(Function &F,
- const KnownAssumptionString &AssumptionStr) {
- const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+namespace {
+bool hasAssumption(const Attribute &A,
+ const KnownAssumptionString &AssumptionStr) {
if (!A.isValid())
return false;
assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -24,9 +30,76 @@ bool llvm::hasAssumption(Function &F,
SmallVector<StringRef, 8> Strings;
A.getValueAsString().split(Strings, ",");
- return llvm::any_of(Strings, [=](StringRef Assumption) {
- return Assumption == AssumptionStr;
- });
+ return llvm::is_contained(Strings, AssumptionStr);
+}
+
+DenseSet<StringRef> getAssumptions(const Attribute &A) {
+ if (!A.isValid())
+ return DenseSet<StringRef>();
+ assert(A.isStringAttribute() && "Expected a string attribute!");
+
+ DenseSet<StringRef> Assumptions;
+ SmallVector<StringRef, 8> Strings;
+ A.getValueAsString().split(Strings, ",");
+
+ for (StringRef Str : Strings)
+ Assumptions.insert(Str);
+ return Assumptions;
+}
+
+template <typename AttrSite>
+bool addAssumptionsImpl(AttrSite &Site,
+ const DenseSet<StringRef> &Assumptions) {
+ if (Assumptions.empty())
+ return false;
+
+ DenseSet<StringRef> CurAssumptions = getAssumptions(Site);
+
+ if (!set_union(CurAssumptions, Assumptions))
+ return false;
+
+ LLVMContext &Ctx = Site.getContext();
+ Site.addFnAttr(llvm::Attribute::get(
+ Ctx, llvm::AssumptionAttrKey,
+ llvm::join(CurAssumptions.begin(), CurAssumptions.end(), ",")));
+
+ return true;
+}
+} // namespace
+
+bool llvm::hasAssumption(const Function &F,
+ const KnownAssumptionString &AssumptionStr) {
+ const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+ return ::hasAssumption(A, AssumptionStr);
+}
+
+bool llvm::hasAssumption(const CallBase &CB,
+ const KnownAssumptionString &AssumptionStr) {
+ if (Function *F = CB.getCalledFunction())
+ if (hasAssumption(*F, AssumptionStr))
+ return true;
+
+ const Attribute &A = CB.getFnAttr(AssumptionAttrKey);
+ return ::hasAssumption(A, AssumptionStr);
+}
+
+DenseSet<StringRef> llvm::getAssumptions(const Function &F) {
+ const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+ return ::getAssumptions(A);
+}
+
+DenseSet<StringRef> llvm::getAssumptions(const CallBase &CB) {
+ const Attribute &A = CB.getFnAttr(AssumptionAttrKey);
+ return ::getAssumptions(A);
+}
+
+bool llvm::addAssumptions(Function &F, const DenseSet<StringRef> &Assumptions) {
+ return ::addAssumptionsImpl(F, Assumptions);
+}
+
+bool llvm::addAssumptions(CallBase &CB,
+ const DenseSet<StringRef> &Assumptions) {
+ return ::addAssumptionsImpl(CB, Assumptions);
}
StringSet<> llvm::KnownAssumptionStrings({
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 5cd1bafccc47..f81a446d6e46 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -812,42 +812,13 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
if (!B.contains(Kind))
continue;
- if (Attribute::isTypeAttrKind(Kind)) {
- Attrs.push_back(Attribute::get(C, Kind, B.getTypeAttr(Kind)));
- continue;
- }
-
Attribute Attr;
- switch (Kind) {
- case Attribute::Alignment:
- assert(B.getAlignment() && "Alignment must be set");
- Attr = Attribute::getWithAlignment(C, *B.getAlignment());
- break;
- case Attribute::StackAlignment:
- assert(B.getStackAlignment() && "StackAlignment must be set");
- Attr = Attribute::getWithStackAlignment(C, *B.getStackAlignment());
- break;
- case Attribute::Dereferenceable:
- Attr = Attribute::getWithDereferenceableBytes(
- C, B.getDereferenceableBytes());
- break;
- case Attribute::DereferenceableOrNull:
- Attr = Attribute::getWithDereferenceableOrNullBytes(
- C, B.getDereferenceableOrNullBytes());
- break;
- case Attribute::AllocSize: {
- auto A = B.getAllocSizeArgs();
- Attr = Attribute::getWithAllocSizeArgs(C, A.first, A.second);
- break;
- }
- case Attribute::VScaleRange: {
- auto A = B.getVScaleRangeArgs();
- Attr = Attribute::getWithVScaleRangeArgs(C, A.first, A.second);
- break;
- }
- default:
+ if (Attribute::isTypeAttrKind(Kind))
+ Attr = Attribute::get(C, Kind, B.getTypeAttr(Kind));
+ else if (Attribute::isIntAttrKind(Kind))
+ Attr = Attribute::get(C, Kind, B.getRawIntAttr(Kind));
+ else
Attr = Attribute::get(C, Kind);
- }
Attrs.push_back(Attr);
}
@@ -1209,33 +1180,36 @@ AttributeList AttributeList::get(LLVMContext &C,
return getImpl(C, NewAttrSets);
}
-AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const {
- if (hasAttribute(Index, Kind)) return *this;
+AttributeList
+AttributeList::addAttributeAtIndex(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const {
+ if (hasAttributeAtIndex(Index, Kind))
+ return *this;
AttributeSet Attrs = getAttributes(Index);
// TODO: Insert at correct position and avoid sort.
SmallVector<Attribute, 8> NewAttrs(Attrs.begin(), Attrs.end());
NewAttrs.push_back(Attribute::get(C, Kind));
- return setAttributes(C, Index, AttributeSet::get(C, NewAttrs));
+ return setAttributesAtIndex(C, Index, AttributeSet::get(C, NewAttrs));
}
-AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind,
- StringRef Value) const {
+AttributeList AttributeList::addAttributeAtIndex(LLVMContext &C, unsigned Index,
+ StringRef Kind,
+ StringRef Value) const {
AttrBuilder B;
B.addAttribute(Kind, Value);
- return addAttributes(C, Index, B);
+ return addAttributesAtIndex(C, Index, B);
}
-AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
- Attribute A) const {
+AttributeList AttributeList::addAttributeAtIndex(LLVMContext &C, unsigned Index,
+ Attribute A) const {
AttrBuilder B;
B.addAttribute(A);
- return addAttributes(C, Index, B);
+ return addAttributesAtIndex(C, Index, B);
}
-AttributeList AttributeList::setAttributes(LLVMContext &C, unsigned Index,
- AttributeSet Attrs) const {
+AttributeList AttributeList::setAttributesAtIndex(LLVMContext &C,
+ unsigned Index,
+ AttributeSet Attrs) const {
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
if (Index >= AttrSets.size())
@@ -1244,8 +1218,9 @@ AttributeList AttributeList::setAttributes(LLVMContext &C, unsigned Index,
return AttributeList::getImpl(C, AttrSets);
}
-AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) const {
+AttributeList AttributeList::addAttributesAtIndex(LLVMContext &C,
+ unsigned Index,
+ const AttrBuilder &B) const {
if (!B.hasAttributes())
return *this;
@@ -1263,7 +1238,7 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
AttrBuilder Merged(getAttributes(Index));
Merged.merge(B);
- return setAttributes(C, Index, AttributeSet::get(C, Merged));
+ return setAttributesAtIndex(C, Index, AttributeSet::get(C, Merged));
}
AttributeList AttributeList::addParamAttribute(LLVMContext &C,
@@ -1286,9 +1261,11 @@ AttributeList AttributeList::addParamAttribute(LLVMContext &C,
return getImpl(C, AttrSets);
}
-AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const {
- if (!hasAttribute(Index, Kind)) return *this;
+AttributeList
+AttributeList::removeAttributeAtIndex(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const {
+ if (!hasAttributeAtIndex(Index, Kind))
+ return *this;
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
@@ -1299,9 +1276,11 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
return getImpl(C, AttrSets);
}
-AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind) const {
- if (!hasAttribute(Index, Kind)) return *this;
+AttributeList AttributeList::removeAttributeAtIndex(LLVMContext &C,
+ unsigned Index,
+ StringRef Kind) const {
+ if (!hasAttributeAtIndex(Index, Kind))
+ return *this;
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
@@ -1313,18 +1292,19 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
}
AttributeList
-AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &AttrsToRemove) const {
+AttributeList::removeAttributesAtIndex(LLVMContext &C, unsigned Index,
+ const AttrBuilder &AttrsToRemove) const {
AttributeSet Attrs = getAttributes(Index);
AttributeSet NewAttrs = Attrs.removeAttributes(C, AttrsToRemove);
// If nothing was removed, return the original list.
if (Attrs == NewAttrs)
return *this;
- return setAttributes(C, Index, NewAttrs);
+ return setAttributesAtIndex(C, Index, NewAttrs);
}
-AttributeList AttributeList::removeAttributes(LLVMContext &C,
- unsigned WithoutIndex) const {
+AttributeList
+AttributeList::removeAttributesAtIndex(LLVMContext &C,
+ unsigned WithoutIndex) const {
if (!pImpl)
return {};
WithoutIndex = attrIdxToArrayIdx(WithoutIndex);
@@ -1335,79 +1315,73 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C,
return getImpl(C, AttrSets);
}
-AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C,
- unsigned Index,
- uint64_t Bytes) const {
+AttributeList AttributeList::addDereferenceableRetAttr(LLVMContext &C,
+ uint64_t Bytes) const {
AttrBuilder B;
B.addDereferenceableAttr(Bytes);
- return addAttributes(C, Index, B);
+ return addRetAttributes(C, B);
}
-AttributeList
-AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
- uint64_t Bytes) const {
+AttributeList AttributeList::addDereferenceableParamAttr(LLVMContext &C,
+ unsigned Index,
+ uint64_t Bytes) const {
AttrBuilder B;
- B.addDereferenceableOrNullAttr(Bytes);
- return addAttributes(C, Index, B);
+ B.addDereferenceableAttr(Bytes);
+ return addParamAttributes(C, Index, B);
}
AttributeList
-AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index,
- unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg) {
+AttributeList::addDereferenceableOrNullParamAttr(LLVMContext &C, unsigned Index,
+ uint64_t Bytes) const {
AttrBuilder B;
- B.addAllocSizeAttr(ElemSizeArg, NumElemsArg);
- return addAttributes(C, Index, B);
+ B.addDereferenceableOrNullAttr(Bytes);
+ return addParamAttributes(C, Index, B);
}
-AttributeList AttributeList::addVScaleRangeAttr(LLVMContext &C, unsigned Index,
- unsigned MinValue,
- unsigned MaxValue) {
+AttributeList
+AttributeList::addAllocSizeParamAttr(LLVMContext &C, unsigned Index,
+ unsigned ElemSizeArg,
+ const Optional<unsigned> &NumElemsArg) {
AttrBuilder B;
- B.addVScaleRangeAttr(MinValue, MaxValue);
- return addAttributes(C, Index, B);
+ B.addAllocSizeAttr(ElemSizeArg, NumElemsArg);
+ return addParamAttributes(C, Index, B);
}
//===----------------------------------------------------------------------===//
// AttributeList Accessor Methods
//===----------------------------------------------------------------------===//
-AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const {
+AttributeSet AttributeList::getParamAttrs(unsigned ArgNo) const {
return getAttributes(ArgNo + FirstArgIndex);
}
-AttributeSet AttributeList::getRetAttributes() const {
+AttributeSet AttributeList::getRetAttrs() const {
return getAttributes(ReturnIndex);
}
-AttributeSet AttributeList::getFnAttributes() const {
+AttributeSet AttributeList::getFnAttrs() const {
return getAttributes(FunctionIndex);
}
-bool AttributeList::hasAttribute(unsigned Index,
- Attribute::AttrKind Kind) const {
+bool AttributeList::hasAttributeAtIndex(unsigned Index,
+ Attribute::AttrKind Kind) const {
return getAttributes(Index).hasAttribute(Kind);
}
-bool AttributeList::hasAttribute(unsigned Index, StringRef Kind) const {
+bool AttributeList::hasAttributeAtIndex(unsigned Index, StringRef Kind) const {
return getAttributes(Index).hasAttribute(Kind);
}
-bool AttributeList::hasAttributes(unsigned Index) const {
+bool AttributeList::hasAttributesAtIndex(unsigned Index) const {
return getAttributes(Index).hasAttributes();
}
-bool AttributeList::hasFnAttribute(Attribute::AttrKind Kind) const {
+bool AttributeList::hasFnAttr(Attribute::AttrKind Kind) const {
return pImpl && pImpl->hasFnAttribute(Kind);
}
-bool AttributeList::hasFnAttribute(StringRef Kind) const {
- return hasAttribute(AttributeList::FunctionIndex, Kind);
-}
-
-bool AttributeList::hasParamAttribute(unsigned ArgNo,
- Attribute::AttrKind Kind) const {
- return hasAttribute(ArgNo + FirstArgIndex, Kind);
+bool AttributeList::hasFnAttr(StringRef Kind) const {
+ return hasAttributeAtIndex(AttributeList::FunctionIndex, Kind);
}
bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr,
@@ -1415,12 +1389,13 @@ bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr,
return pImpl && pImpl->hasAttrSomewhere(Attr, Index);
}
-Attribute AttributeList::getAttribute(unsigned Index,
- Attribute::AttrKind Kind) const {
+Attribute AttributeList::getAttributeAtIndex(unsigned Index,
+ Attribute::AttrKind Kind) const {
return getAttributes(Index).getAttribute(Kind);
}
-Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const {
+Attribute AttributeList::getAttributeAtIndex(unsigned Index,
+ StringRef Kind) const {
return getAttributes(Index).getAttribute(Kind);
}
@@ -1460,26 +1435,29 @@ Type *AttributeList::getParamElementType(unsigned Index) const {
return getAttributes(Index + FirstArgIndex).getElementType();
}
-MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
- return getAttributes(Index).getStackAlignment();
+MaybeAlign AttributeList::getFnStackAlignment() const {
+ return getFnAttrs().getStackAlignment();
}
-uint64_t AttributeList::getDereferenceableBytes(unsigned Index) const {
- return getAttributes(Index).getDereferenceableBytes();
+MaybeAlign AttributeList::getRetStackAlignment() const {
+ return getRetAttrs().getStackAlignment();
}
-uint64_t AttributeList::getDereferenceableOrNullBytes(unsigned Index) const {
- return getAttributes(Index).getDereferenceableOrNullBytes();
+uint64_t AttributeList::getRetDereferenceableBytes() const {
+ return getRetAttrs().getDereferenceableBytes();
}
-std::pair<unsigned, Optional<unsigned>>
-AttributeList::getAllocSizeArgs(unsigned Index) const {
- return getAttributes(Index).getAllocSizeArgs();
+uint64_t AttributeList::getParamDereferenceableBytes(unsigned Index) const {
+ return getParamAttrs(Index).getDereferenceableBytes();
}
-std::pair<unsigned, unsigned>
-AttributeList::getVScaleRangeArgs(unsigned Index) const {
- return getAttributes(Index).getVScaleRangeArgs();
+uint64_t AttributeList::getRetDereferenceableOrNullBytes() const {
+ return getRetAttrs().getDereferenceableOrNullBytes();
+}
+
+uint64_t
+AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
+ return getParamAttrs(Index).getDereferenceableOrNullBytes();
}
std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const {
@@ -1520,7 +1498,7 @@ unsigned AttributeList::getNumAttrSets() const {
void AttributeList::print(raw_ostream &O) const {
O << "AttributeList[\n";
- for (unsigned i = index_begin(), e = index_end(); i != e; ++i) {
+ for (unsigned i : indexes()) {
if (!getAttributes(i).hasAttributes())
continue;
O << " { ";
@@ -1563,15 +1541,18 @@ AttrBuilder::AttrBuilder(AttributeSet AS) {
void AttrBuilder::clear() {
Attrs.reset();
TargetDepAttrs.clear();
- Alignment.reset();
- StackAlignment.reset();
- DerefBytes = DerefOrNullBytes = 0;
- AllocSizeArgs = 0;
- VScaleRangeArgs = 0;
+ IntAttrs = {};
TypeAttrs = {};
}
Optional<unsigned>
+AttrBuilder::kindToIntIndex(Attribute::AttrKind Kind) const {
+ if (Attribute::isIntAttrKind(Kind))
+ return Kind - Attribute::FirstIntAttr;
+ return None;
+}
+
+Optional<unsigned>
AttrBuilder::kindToTypeIndex(Attribute::AttrKind Kind) const {
if (Attribute::isTypeAttrKind(Kind))
return Kind - Attribute::FirstTypeAttr;
@@ -1589,18 +1570,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
if (Optional<unsigned> TypeIndex = kindToTypeIndex(Kind))
TypeAttrs[*TypeIndex] = Attr.getValueAsType();
- else if (Kind == Attribute::Alignment)
- Alignment = Attr.getAlignment();
- else if (Kind == Attribute::StackAlignment)
- StackAlignment = Attr.getStackAlignment();
- else if (Kind == Attribute::Dereferenceable)
- DerefBytes = Attr.getDereferenceableBytes();
- else if (Kind == Attribute::DereferenceableOrNull)
- DerefOrNullBytes = Attr.getDereferenceableOrNullBytes();
- else if (Kind == Attribute::AllocSize)
- AllocSizeArgs = Attr.getValueAsInt();
- else if (Kind == Attribute::VScaleRange)
- VScaleRangeArgs = Attr.getValueAsInt();
+ else if (Optional<unsigned> IntIndex = kindToIntIndex(Kind))
+ IntAttrs[*IntIndex] = Attr.getValueAsInt();
return *this;
}
@@ -1616,18 +1587,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
if (Optional<unsigned> TypeIndex = kindToTypeIndex(Val))
TypeAttrs[*TypeIndex] = nullptr;
- else if (Val == Attribute::Alignment)
- Alignment.reset();
- else if (Val == Attribute::StackAlignment)
- StackAlignment.reset();
- else if (Val == Attribute::Dereferenceable)
- DerefBytes = 0;
- else if (Val == Attribute::DereferenceableOrNull)
- DerefOrNullBytes = 0;
- else if (Val == Attribute::AllocSize)
- AllocSizeArgs = 0;
- else if (Val == Attribute::VScaleRange)
- VScaleRangeArgs = 0;
+ else if (Optional<unsigned> IntIndex = kindToIntIndex(Val))
+ IntAttrs[*IntIndex] = 0;
return *this;
}
@@ -1638,18 +1599,32 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) {
}
AttrBuilder &AttrBuilder::removeAttribute(StringRef A) {
- auto I = TargetDepAttrs.find(A);
- if (I != TargetDepAttrs.end())
- TargetDepAttrs.erase(I);
+ TargetDepAttrs.erase(A);
+ return *this;
+}
+
+uint64_t AttrBuilder::getRawIntAttr(Attribute::AttrKind Kind) const {
+ Optional<unsigned> IntIndex = kindToIntIndex(Kind);
+ assert(IntIndex && "Not an int attribute");
+ return IntAttrs[*IntIndex];
+}
+
+AttrBuilder &AttrBuilder::addRawIntAttr(Attribute::AttrKind Kind,
+ uint64_t Value) {
+ Optional<unsigned> IntIndex = kindToIntIndex(Kind);
+ assert(IntIndex && "Not an int attribute");
+ assert(Value && "Value cannot be zero");
+ Attrs[Kind] = true;
+ IntAttrs[*IntIndex] = Value;
return *this;
}
std::pair<unsigned, Optional<unsigned>> AttrBuilder::getAllocSizeArgs() const {
- return unpackAllocSizeArgs(AllocSizeArgs);
+ return unpackAllocSizeArgs(getRawIntAttr(Attribute::AllocSize));
}
std::pair<unsigned, unsigned> AttrBuilder::getVScaleRangeArgs() const {
- return unpackVScaleRangeArgs(VScaleRangeArgs);
+ return unpackVScaleRangeArgs(getRawIntAttr(Attribute::VScaleRange));
}
AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) {
@@ -1657,10 +1632,7 @@ AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) {
return *this;
assert(*Align <= llvm::Value::MaximumAlignment && "Alignment too large.");
-
- Attrs[Attribute::Alignment] = true;
- Alignment = Align;
- return *this;
+ return addRawIntAttr(Attribute::Alignment, Align->value());
}
AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) {
@@ -1669,27 +1641,20 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) {
return *this;
assert(*Align <= 0x100 && "Alignment too large.");
-
- Attrs[Attribute::StackAlignment] = true;
- StackAlignment = Align;
- return *this;
+ return addRawIntAttr(Attribute::StackAlignment, Align->value());
}
AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) {
if (Bytes == 0) return *this;
- Attrs[Attribute::Dereferenceable] = true;
- DerefBytes = Bytes;
- return *this;
+ return addRawIntAttr(Attribute::Dereferenceable, Bytes);
}
AttrBuilder &AttrBuilder::addDereferenceableOrNullAttr(uint64_t Bytes) {
if (Bytes == 0)
return *this;
- Attrs[Attribute::DereferenceableOrNull] = true;
- DerefOrNullBytes = Bytes;
- return *this;
+ return addRawIntAttr(Attribute::DereferenceableOrNull, Bytes);
}
AttrBuilder &AttrBuilder::addAllocSizeAttr(unsigned ElemSize,
@@ -1700,12 +1665,7 @@ AttrBuilder &AttrBuilder::addAllocSizeAttr(unsigned ElemSize,
AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
// (0, 0) is our "not present" value, so we need to check for it here.
assert(RawArgs && "Invalid allocsize arguments -- given allocsize(0, 0)");
-
- Attrs[Attribute::AllocSize] = true;
- // Reuse existing machinery to store this as a single 64-bit integer so we can
- // save a few bytes over using a pair<unsigned, Optional<unsigned>>.
- AllocSizeArgs = RawArgs;
- return *this;
+ return addRawIntAttr(Attribute::AllocSize, RawArgs);
}
AttrBuilder &AttrBuilder::addVScaleRangeAttr(unsigned MinValue,
@@ -1718,11 +1678,7 @@ AttrBuilder &AttrBuilder::addVScaleRangeAttrFromRawRepr(uint64_t RawArgs) {
if (RawArgs == 0)
return *this;
- Attrs[Attribute::VScaleRange] = true;
- // Reuse existing machinery to store this as a single 64-bit integer so we can
- // save a few bytes over using a pair<unsigned, unsigned>.
- VScaleRangeArgs = RawArgs;
- return *this;
+ return addRawIntAttr(Attribute::VScaleRange, RawArgs);
}
Type *AttrBuilder::getTypeAttr(Attribute::AttrKind Kind) const {
@@ -1760,24 +1716,10 @@ AttrBuilder &AttrBuilder::addInAllocaAttr(Type *Ty) {
}
AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
- // FIXME: What if both have alignments, but they don't match?!
- if (!Alignment)
- Alignment = B.Alignment;
-
- if (!StackAlignment)
- StackAlignment = B.StackAlignment;
-
- if (!DerefBytes)
- DerefBytes = B.DerefBytes;
-
- if (!DerefOrNullBytes)
- DerefOrNullBytes = B.DerefOrNullBytes;
-
- if (!AllocSizeArgs)
- AllocSizeArgs = B.AllocSizeArgs;
-
- if (!VScaleRangeArgs)
- VScaleRangeArgs = B.VScaleRangeArgs;
+ // FIXME: What if both have an int/type attribute, but they don't match?!
+ for (unsigned Index = 0; Index < Attribute::NumIntAttrKinds; ++Index)
+ if (!IntAttrs[Index])
+ IntAttrs[Index] = B.IntAttrs[Index];
for (unsigned Index = 0; Index < Attribute::NumTypeAttrKinds; ++Index)
if (!TypeAttrs[Index])
@@ -1792,24 +1734,10 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
}
AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
- // FIXME: What if both have alignments, but they don't match?!
- if (B.Alignment)
- Alignment.reset();
-
- if (B.StackAlignment)
- StackAlignment.reset();
-
- if (B.DerefBytes)
- DerefBytes = 0;
-
- if (B.DerefOrNullBytes)
- DerefOrNullBytes = 0;
-
- if (B.AllocSizeArgs)
- AllocSizeArgs = 0;
-
- if (B.VScaleRangeArgs)
- VScaleRangeArgs = 0;
+ // FIXME: What if both have an int/type attribute, but they don't match?!
+ for (unsigned Index = 0; Index < Attribute::NumIntAttrKinds; ++Index)
+ if (B.IntAttrs[Index])
+ IntAttrs[Index] = 0;
for (unsigned Index = 0; Index < Attribute::NumTypeAttrKinds; ++Index)
if (B.TypeAttrs[Index])
@@ -1861,7 +1789,7 @@ bool AttrBuilder::hasAttributes(AttributeList AL, uint64_t Index) const {
}
bool AttrBuilder::hasAlignmentAttr() const {
- return Alignment != 0;
+ return getRawIntAttr(Attribute::Alignment) != 0;
}
bool AttrBuilder::operator==(const AttrBuilder &B) const {
@@ -1872,9 +1800,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const {
if (B.TargetDepAttrs.find(TDA.first) == B.TargetDepAttrs.end())
return false;
- return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
- DerefBytes == B.DerefBytes && TypeAttrs == B.TypeAttrs &&
- VScaleRangeArgs == B.VScaleRangeArgs;
+ return IntAttrs == B.IntAttrs && TypeAttrs == B.TypeAttrs;
}
//===----------------------------------------------------------------------===//
@@ -1966,11 +1892,11 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
.addAttribute(Attribute::StackProtectReq);
if (Callee.hasFnAttribute(Attribute::StackProtectReq)) {
- Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
+ Caller.removeFnAttrs(OldSSPAttr);
Caller.addFnAttr(Attribute::StackProtectReq);
} else if (Callee.hasFnAttribute(Attribute::StackProtectStrong) &&
!Caller.hasFnAttribute(Attribute::StackProtectReq)) {
- Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
+ Caller.removeFnAttrs(OldSSPAttr);
Caller.addFnAttr(Attribute::StackProtectStrong);
} else if (Callee.hasFnAttribute(Attribute::StackProtect) &&
!Caller.hasFnAttribute(Attribute::StackProtectReq) &&
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 6271385183eb..d73d1e9c20b3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -583,8 +583,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Can't use Intrinsic::getDeclaration here as the return types might
// then only be structurally equal.
FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
+ StringRef Suffix =
+ F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
- "llvm." + Name + ".p0i8", F->getParent());
+ "llvm." + Name + "." + Suffix, F->getParent());
return true;
}
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
@@ -601,7 +603,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
auto fArgs = F->getFunctionType()->params();
Type *Tys[] = {fArgs[0], fArgs[1]};
- if (Name.find("lane") == StringRef::npos)
+ if (!Name.contains("lane"))
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreInts[fArgs.size() - 3], Tys);
else
@@ -1273,7 +1275,7 @@ static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
- if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
@@ -1300,7 +1302,7 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
- if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
@@ -1370,7 +1372,7 @@ static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
if (NumArgs >= 4) { // For masked intrinsics.
Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
@@ -1431,7 +1433,7 @@ static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
Value *Op0 = CI.getArgOperand(0);
Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
- if (CI.getNumArgOperands() == 3)
+ if (CI.arg_size() == 3)
Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
return Res;
}
@@ -1459,7 +1461,7 @@ static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
Value *Res = Builder.CreateMul(LHS, RHS);
- if (CI.getNumArgOperands() == 4)
+ if (CI.arg_size() == 4)
Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
return Res;
@@ -1514,7 +1516,7 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
}
- Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
+ Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
}
@@ -1779,13 +1781,12 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
} else
return false;
- SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
- CI.arg_operands().end());
+ SmallVector<Value *, 4> Args(CI.args());
Args.pop_back();
Args.pop_back();
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
CI.getArgOperand(NumArgs - 2));
return true;
@@ -1964,7 +1965,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getType()),
{CI->getArgOperand(0)});
} else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
- if (CI->getNumArgOperands() == 4 &&
+ if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
@@ -2124,8 +2125,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.cmp.p")) {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
Type *OpTy = Args[0]->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
@@ -2257,7 +2257,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
if (IsPS2PD)
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
- else if (CI->getNumArgOperands() == 4 &&
+ else if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
@@ -2270,7 +2270,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
: Builder.CreateSIToFP(Rep, DstTy, "cvt");
}
- if (CI->getNumArgOperands() >= 3)
+ if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
@@ -2286,7 +2286,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(
Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
- if (CI->getNumArgOperands() >= 3)
+ if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
@@ -2353,7 +2353,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
llvm_unreachable("Unknown suffix");
unsigned Imm;
- if (CI->getNumArgOperands() == 3) {
+ if (CI->arg_size() == 3) {
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
} else {
Name = Name.substr(9); // strip off "xop.vpcom"
@@ -2417,7 +2417,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
EltTy->getPointerTo());
Value *Load = Builder.CreateLoad(EltTy, Cast);
Type *I32Ty = Type::getInt32Ty(C);
- Rep = UndefValue::get(VecTy);
+ Rep = PoisonValue::get(VecTy);
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
@@ -2442,7 +2442,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
// If there are 3 arguments, it's a masked intrinsic so we need a select.
- if (CI->getNumArgOperands() == 3)
+ if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (Name == "avx512.mask.pmov.qd.256" ||
@@ -2518,7 +2518,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
Rep = Builder.CreateShuffleVector(Op, M);
- if (CI->getNumArgOperands() == 3)
+ if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("sse2.padds.") ||
@@ -2636,7 +2636,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
// If the intrinsic has a mask operand, handle that.
- if (CI->getNumArgOperands() == 5)
+ if (CI->arg_size() == 5)
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
@@ -2661,7 +2661,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
// If the intrinsic has a mask operand, handle that.
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
@@ -2679,7 +2679,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
@@ -2739,7 +2739,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufl.w" ||
@@ -2758,7 +2758,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufh.w" ||
@@ -2777,7 +2777,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
@@ -3346,7 +3346,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (NegAcc)
C = Builder.CreateFNeg(C);
- if (CI->getNumArgOperands() == 5 &&
+ if (CI->arg_size() == 5 &&
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
Intrinsic::ID IID;
@@ -3399,7 +3399,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Drop the "avx512.mask." to make it easier.
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
bool IsSubAdd = Name[3] == 's';
- if (CI->getNumArgOperands() == 5) {
+ if (CI->arg_size() == 5) {
Intrinsic::ID IID;
// Check the character before ".512" in string.
if (Name[Name.size()-5] == 's')
@@ -3686,8 +3686,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
@@ -3701,14 +3700,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::aarch64_neon_bfmlalb:
case Intrinsic::aarch64_neon_bfmlalt: {
SmallVector<Value *, 3> Args;
- assert(CI->getNumArgOperands() == 3 &&
+ assert(CI->arg_size() == 3 &&
"Mismatch between function args and call args");
size_t OperandWidth =
CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
assert((OperandWidth == 64 || OperandWidth == 128) &&
"Unexpected operand width");
Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
- auto Iter = CI->arg_operands().begin();
+ auto Iter = CI->args().begin();
Args.push_back(*Iter++);
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
@@ -3722,18 +3721,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::ctlz:
case Intrinsic::cttz:
- assert(CI->getNumArgOperands() == 1 &&
+ assert(CI->arg_size() == 1 &&
"Mismatch between function args and call args");
NewCall =
Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
break;
case Intrinsic::objectsize: {
- Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
- ? Builder.getFalse()
- : CI->getArgOperand(2);
+ Value *NullIsUnknownSize =
+ CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
Value *Dynamic =
- CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
+ CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
NewCall = Builder.CreateCall(
NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
break;
@@ -3749,7 +3747,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::dbg_value:
// Upgrade from the old version that had an extra offset argument.
- assert(CI->getNumArgOperands() == 4);
+ assert(CI->arg_size() == 4);
// Drop nonzero offsets instead of attempting to upgrade them.
if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
if (Offset->isZeroValue()) {
@@ -3763,7 +3761,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::ptr_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
- assert(CI->getNumArgOperands() == 4 &&
+ assert(CI->arg_size() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
@@ -3777,7 +3775,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::var_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
- assert(CI->getNumArgOperands() == 4 &&
+ assert(CI->arg_size() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
@@ -3796,8 +3794,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_xop_vpermil2ps:
case Intrinsic::x86_xop_vpermil2pd_256:
case Intrinsic::x86_xop_vpermil2ps_256: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
@@ -3858,8 +3855,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_avx2_mpsadbw: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
// Replace the last argument with a trunc.
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
@@ -3873,8 +3869,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_avx512_mask_cmp_ps_128:
case Intrinsic::x86_avx512_mask_cmp_ps_256:
case Intrinsic::x86_avx512_mask_cmp_ps_512: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
unsigned NumElts =
cast<FixedVectorType>(Args[0]->getType())->getNumElements();
Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
@@ -3895,8 +3890,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::invariant_start:
case Intrinsic::invariant_end: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
@@ -3904,8 +3898,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
NewCall->copyMetadata(*CI);
break;
@@ -3921,7 +3914,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// @llvm.memset...(i8*, i8, i[32|64], i32, i1)
// -> @llvm.memset...(i8*, i8, i[32|64], i1)
// Note: i8*'s in the above can be any pointer type
- if (CI->getNumArgOperands() != 5) {
+ if (CI->arg_size() != 5) {
DefaultCase();
return;
}
@@ -4111,7 +4104,7 @@ void llvm::UpgradeARCRuntime(Module &M) {
bool InvalidCast = false;
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
Value *Arg = CI->getArgOperand(I);
// Bitcast argument to the parameter type of the new function if it's
@@ -4361,8 +4354,8 @@ struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
return;
// If we get here, the caller doesn't have the strictfp attribute
// but this callsite does. Replace the strictfp attribute with nobuiltin.
- Call.removeAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
- Call.addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
+ Call.removeFnAttr(Attribute::StrictFP);
+ Call.addFnAttr(Attribute::NoBuiltin);
}
};
} // namespace
@@ -4383,8 +4376,7 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
}
// Remove all incompatibile attributes from function.
- F.removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(F.getReturnType()));
+ F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
for (auto &Arg : F.args())
Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
}
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index d14abafdef2e..ed1956e0f7e9 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/BasicBlock.h"
#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -23,6 +24,9 @@
using namespace llvm;
+#define DEBUG_TYPE "ir"
+STATISTIC(NumInstrRenumberings, "Number of renumberings across all blocks");
+
ValueSymbolTable *BasicBlock::getValueSymbolTable() {
if (Function *F = getParent())
return F->getValueSymbolTable();
@@ -505,6 +509,8 @@ void BasicBlock::renumberInstructions() {
BasicBlockBits Bits = getBasicBlockBits();
Bits.InstrOrderValid = true;
setBasicBlockBits(Bits);
+
+ NumInstrRenumberings++;
}
#ifndef NDEBUG
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 5f05aa2e94e7..437fd0558447 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
}
}
-/// Wrapper around getFoldedSizeOfImpl() that adds caching.
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache);
-
-/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known
-/// factors factored out. If Folded is false, return null if no factoring was
-/// possible, to avoid endlessly bouncing an unfoldable expression back into the
-/// top-level folder.
-static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache) {
- // This is the actual implementation of getFoldedSizeOf(). To get the caching
- // behavior, we need to call getFoldedSizeOf() when we recurse.
-
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
- Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache);
- return ConstantExpr::getNUWMul(E, N);
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- unsigned NumElems = STy->getNumElements();
- // An empty struct has size zero.
- if (NumElems == 0)
- return ConstantExpr::getNullValue(DestTy);
- // Check for a struct with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) {
- AllSame = false;
- break;
- }
- if (AllSame) {
- Constant *N = ConstantInt::get(DestTy, NumElems);
- return ConstantExpr::getNUWMul(MemberSize, N);
- }
- }
-
- // Pointer size doesn't depend on the pointee type, so canonicalize them
- // to an arbitrary pointee.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isIntegerTy(1))
- return getFoldedSizeOf(
- PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace()),
- DestTy, true, Cache);
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular sizeof expression.
- Constant *C = ConstantExpr::getSizeOf(Ty);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache) {
- // Check for previously generated folded size constant.
- auto It = Cache.find(Ty);
- if (It != Cache.end())
- return It->second;
- return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache);
-}
-
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) {
- DenseMap<Type *, Constant *> Cache;
- return getFoldedSizeOf(Ty, DestTy, Folded, Cache);
-}
-
-/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known
-/// factors factored out. If Folded is false, return null if no factoring was
-/// possible, to avoid endlessly bouncing an unfoldable expression back into the
-/// top-level folder.
-static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) {
- // The alignment of an array is equal to the alignment of the
- // array element. Note that this is not always true for vectors.
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy,
- false),
- C, DestTy);
- return C;
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- // Packed structs always have an alignment of 1.
- if (STy->isPacked())
- return ConstantInt::get(DestTy, 1);
-
- // Otherwise, struct alignment is the maximum alignment of any member.
- // Without target data, we can't compare much, but we can check to see
- // if all the members have the same alignment.
- unsigned NumElems = STy->getNumElements();
- // An empty struct has minimal alignment.
- if (NumElems == 0)
- return ConstantInt::get(DestTy, 1);
- // Check for a struct with all members having the same alignment.
- Constant *MemberAlign =
- getFoldedAlignOf(STy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberAlign;
- }
-
- // Pointer alignment doesn't depend on the pointee type, so canonicalize them
- // to an arbitrary pointee.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isIntegerTy(1))
- return
- getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
- 1),
- PTy->getAddressSpace()),
- DestTy, true);
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular alignof expression.
- Constant *C = ConstantExpr::getAlignOf(Ty);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
-/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with
-/// any known factors factored out. If Folded is false, return null if no
-/// factoring was possible, to avoid endlessly bouncing an unfoldable expression
-/// back into the top-level folder.
-static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy,
- bool Folded) {
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
- DestTy, false),
- FieldNo, DestTy);
- Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- unsigned NumElems = STy->getNumElements();
- // An empty struct has no members.
- if (NumElems == 0)
- return nullptr;
- // Check for a struct with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(STy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
- false,
- DestTy,
- false),
- FieldNo, DestTy);
- return ConstantExpr::getNUWMul(MemberSize, N);
- }
- }
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular offsetof expression.
- Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Type *DestTy) {
if (isa<PoisonValue>(V))
@@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// Is it a null pointer value?
if (V->isNullValue())
return ConstantInt::get(DestTy, 0);
- // If this is a sizeof-like expression, pull out multiplications by
- // known factors to expose them to subsequent folding. If it's an
- // alignof-like expression, factor out known factors.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- CE->getOperand(0)->isNullValue()) {
- // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and
- // getFoldedAlignOf() don't handle the case when DestTy is a vector of
- // pointers yet. We end up in asserts in CastInst::getCastOpcode (see
- // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this
- // happen in one "real" C-code test case, so it does not seem to be an
- // important optimization to handle vectors here. For now, simply bail
- // out.
- if (DestTy->isVectorTy())
- return nullptr;
- GEPOperator *GEPO = cast<GEPOperator>(CE);
- Type *Ty = GEPO->getSourceElementType();
- if (CE->getNumOperands() == 2) {
- // Handle a sizeof-like expression.
- Constant *Idx = CE->getOperand(1);
- bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
- if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
- Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
- DestTy, false),
- Idx, DestTy);
- return ConstantExpr::getMul(C, Idx);
- }
- } else if (CE->getNumOperands() == 3 &&
- CE->getOperand(1)->isNullValue()) {
- // Handle an alignof-like expression.
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
- if (CI->isOne() &&
- STy->getNumElements() == 2 &&
- STy->getElementType(0)->isIntegerTy(1)) {
- return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
- }
- }
- // Handle an offsetof-like expression.
- if (Ty->isStructTy() || Ty->isArrayTy()) {
- if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
- DestTy, false))
- return C;
- }
- }
- }
// Other pointer types cannot be casted
return nullptr;
case Instruction::UIToFP:
@@ -720,7 +479,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
const APInt &api = CI->getValue();
APFloat apf(DestTy->getFltSemantics(),
- APInt::getNullValue(DestTy->getPrimitiveSizeInBits()));
+ APInt::getZero(DestTy->getPrimitiveSizeInBits()));
apf.convertFromAPInt(api, opc==Instruction::SIToFP,
APFloat::rmNearestTiesToEven);
return ConstantFP::get(V->getContext(), apf);
@@ -908,13 +667,16 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
}
}
+ if (Constant *C = Val->getAggregateElement(CIdx))
+ return C;
+
// Lane < Splat minimum vector width => extractelt Splat(x), Lane -> x
if (CIdx->getValue().ult(ValVTy->getElementCount().getKnownMinValue())) {
if (Constant *SplatVal = Val->getSplatValue())
return SplatVal;
}
- return Val->getAggregateElement(CIdx);
+ return nullptr;
}
Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
@@ -969,12 +731,16 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
// If the mask is all zeros this is a splat, no need to go through all
// elements.
- if (all_of(Mask, [](int Elt) { return Elt == 0; }) &&
- !MaskEltCount.isScalable()) {
+ if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
Type *Ty = IntegerType::get(V1->getContext(), 32);
Constant *Elt =
ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0));
- return ConstantVector::getSplat(MaskEltCount, Elt);
+
+ if (Elt->isNullValue()) {
+ auto *VTy = VectorType::get(EltTy, MaskEltCount);
+ return ConstantAggregateZero::get(VTy);
+ } else if (!MaskEltCount.isScalable())
+ return ConstantVector::getSplat(MaskEltCount, Elt);
}
// Do not iterate on scalable vector. The num of elements is unknown at
// compile-time.
@@ -1379,7 +1145,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
case Instruction::SDiv:
assert(!CI2->isZero() && "Div by zero handled above");
- if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ if (C2V.isAllOnes() && C1V.isMinSignedValue())
return PoisonValue::get(CI1->getType()); // MIN_INT / -1 -> poison
return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
case Instruction::URem:
@@ -1387,7 +1153,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
case Instruction::SRem:
assert(!CI2->isZero() && "Div by zero handled above");
- if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ if (C2V.isAllOnes() && C1V.isMinSignedValue())
return PoisonValue::get(CI1->getType()); // MIN_INT % -1 -> poison
return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
case Instruction::And:
@@ -2030,19 +1796,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
const APInt &V1 = cast<ConstantInt>(C1)->getValue();
const APInt &V2 = cast<ConstantInt>(C2)->getValue();
- switch (pred) {
- default: llvm_unreachable("Invalid ICmp Predicate");
- case ICmpInst::ICMP_EQ: return ConstantInt::get(ResultTy, V1 == V2);
- case ICmpInst::ICMP_NE: return ConstantInt::get(ResultTy, V1 != V2);
- case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
- case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
- case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
- case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
- case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
- case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
- case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
- case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
- }
+ return ConstantInt::get(
+ ResultTy, ICmpInst::compare(V1, V2, (ICmpInst::Predicate)pred));
} else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
const APFloat &C1V = cast<ConstantFP>(C1)->getValueAPF();
const APFloat &C2V = cast<ConstantFP>(C2)->getValueAPF();
@@ -2564,7 +2319,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
if (isIndexInRangeOfArrayType(STy->getNumElements(), CI))
// It's in range, skip to the next index.
continue;
- if (CI->getSExtValue() < 0) {
+ if (CI->isNegative()) {
// It's out of range and negative, don't try to factor it.
Unknown = true;
continue;
@@ -2575,7 +2330,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
auto *CI = cast<ConstantInt>(CV->getElementAsConstant(I));
InRange &= isIndexInRangeOfArrayType(STy->getNumElements(), CI);
- if (CI->getSExtValue() < 0) {
+ if (CI->isNegative()) {
Unknown = true;
break;
}
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index 0649776dbc22..a0f2179bddb4 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -110,7 +110,7 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
APInt UMin(CR.getUnsignedMin());
if (UMin.isMaxValue())
return getEmpty(W);
- return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W));
+ return ConstantRange(std::move(UMin) + 1, APInt::getZero(W));
}
case CmpInst::ICMP_SGT: {
APInt SMin(CR.getSignedMin());
@@ -119,7 +119,7 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W));
}
case CmpInst::ICMP_UGE:
- return getNonEmpty(CR.getUnsignedMin(), APInt::getNullValue(W));
+ return getNonEmpty(CR.getUnsignedMin(), APInt::getZero(W));
case CmpInst::ICMP_SGE:
return getNonEmpty(CR.getSignedMin(), APInt::getSignedMinValue(W));
}
@@ -147,38 +147,77 @@ ConstantRange ConstantRange::makeExactICmpRegion(CmpInst::Predicate Pred,
return makeAllowedICmpRegion(Pred, C);
}
-bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
- APInt &RHS) const {
- bool Success = false;
+bool ConstantRange::areInsensitiveToSignednessOfICmpPredicate(
+ const ConstantRange &CR1, const ConstantRange &CR2) {
+ if (CR1.isEmptySet() || CR2.isEmptySet())
+ return true;
+
+ return (CR1.isAllNonNegative() && CR2.isAllNonNegative()) ||
+ (CR1.isAllNegative() && CR2.isAllNegative());
+}
+
+bool ConstantRange::areInsensitiveToSignednessOfInvertedICmpPredicate(
+ const ConstantRange &CR1, const ConstantRange &CR2) {
+ if (CR1.isEmptySet() || CR2.isEmptySet())
+ return true;
+
+ return (CR1.isAllNonNegative() && CR2.isAllNegative()) ||
+ (CR1.isAllNegative() && CR2.isAllNonNegative());
+}
+
+CmpInst::Predicate ConstantRange::getEquivalentPredWithFlippedSignedness(
+ CmpInst::Predicate Pred, const ConstantRange &CR1,
+ const ConstantRange &CR2) {
+ assert(CmpInst::isIntPredicate(Pred) && CmpInst::isRelational(Pred) &&
+ "Only for relational integer predicates!");
+ CmpInst::Predicate FlippedSignednessPred =
+ CmpInst::getFlippedSignednessPredicate(Pred);
+
+ if (areInsensitiveToSignednessOfICmpPredicate(CR1, CR2))
+ return FlippedSignednessPred;
+
+ if (areInsensitiveToSignednessOfInvertedICmpPredicate(CR1, CR2))
+ return CmpInst::getInversePredicate(FlippedSignednessPred);
+
+ return CmpInst::Predicate::BAD_ICMP_PREDICATE;
+}
+
+void ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
+ APInt &RHS, APInt &Offset) const {
+ Offset = APInt(getBitWidth(), 0);
if (isFullSet() || isEmptySet()) {
Pred = isEmptySet() ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
RHS = APInt(getBitWidth(), 0);
- Success = true;
} else if (auto *OnlyElt = getSingleElement()) {
Pred = CmpInst::ICMP_EQ;
RHS = *OnlyElt;
- Success = true;
} else if (auto *OnlyMissingElt = getSingleMissingElement()) {
Pred = CmpInst::ICMP_NE;
RHS = *OnlyMissingElt;
- Success = true;
} else if (getLower().isMinSignedValue() || getLower().isMinValue()) {
Pred =
getLower().isMinSignedValue() ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
RHS = getUpper();
- Success = true;
} else if (getUpper().isMinSignedValue() || getUpper().isMinValue()) {
Pred =
getUpper().isMinSignedValue() ? CmpInst::ICMP_SGE : CmpInst::ICMP_UGE;
RHS = getLower();
- Success = true;
+ } else {
+ Pred = CmpInst::ICMP_ULT;
+ RHS = getUpper() - getLower();
+ Offset = -getLower();
}
- assert((!Success || ConstantRange::makeExactICmpRegion(Pred, RHS) == *this) &&
+ assert(ConstantRange::makeExactICmpRegion(Pred, RHS) == add(Offset) &&
"Bad result!");
+}
- return Success;
+bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
+ APInt &RHS) const {
+ APInt Offset;
+ getEquivalentICmp(Pred, RHS, Offset);
+ return Offset.isZero();
}
bool ConstantRange::icmp(CmpInst::Predicate Pred,
@@ -204,13 +243,13 @@ static ConstantRange makeExactMulNSWRegion(const APInt &V) {
// Handle special case for 0, -1 and 1. See the last for reason why we
// specialize -1 and 1.
unsigned BitWidth = V.getBitWidth();
- if (V == 0 || V.isOneValue())
+ if (V == 0 || V.isOne())
return ConstantRange::getFull(BitWidth);
APInt MinValue = APInt::getSignedMinValue(BitWidth);
APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
// e.g. Returning [-127, 127], represented as [-127, -128).
- if (V.isAllOnesValue())
+ if (V.isAllOnes())
return ConstantRange(-MaxValue, MinValue);
APInt Lower, Upper;
@@ -248,8 +287,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
case Instruction::Add: {
if (Unsigned)
- return getNonEmpty(APInt::getNullValue(BitWidth),
- -Other.getUnsignedMax());
+ return getNonEmpty(APInt::getZero(BitWidth), -Other.getUnsignedMax());
APInt SignedMinVal = APInt::getSignedMinValue(BitWidth);
APInt SMin = Other.getSignedMin(), SMax = Other.getSignedMax();
@@ -291,7 +329,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
// to be at most bitwidth-1, which results in most conservative range.
APInt ShAmtUMax = ShAmt.getUnsignedMax();
if (Unsigned)
- return getNonEmpty(APInt::getNullValue(BitWidth),
+ return getNonEmpty(APInt::getZero(BitWidth),
APInt::getMaxValue(BitWidth).lshr(ShAmtUMax) + 1);
return getNonEmpty(APInt::getSignedMinValue(BitWidth).ashr(ShAmtUMax),
APInt::getSignedMaxValue(BitWidth).ashr(ShAmtUMax) + 1);
@@ -316,7 +354,7 @@ bool ConstantRange::isEmptySet() const {
}
bool ConstantRange::isWrappedSet() const {
- return Lower.ugt(Upper) && !Upper.isNullValue();
+ return Lower.ugt(Upper) && !Upper.isZero();
}
bool ConstantRange::isUpperWrapped() const {
@@ -343,11 +381,10 @@ ConstantRange::isSizeStrictlySmallerThan(const ConstantRange &Other) const {
bool
ConstantRange::isSizeLargerThan(uint64_t MaxSize) const {
- assert(MaxSize && "MaxSize can't be 0.");
// If this a full set, we need special handling to avoid needing an extra bit
// to represent the size.
if (isFullSet())
- return APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1);
+ return MaxSize == 0 || APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1);
return (Upper - Lower).ugt(MaxSize);
}
@@ -595,7 +632,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR,
APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper;
- if (L.isNullValue() && U.isNullValue())
+ if (L.isZero() && U.isZero())
return getFull();
return ConstantRange(std::move(L), std::move(U));
@@ -644,6 +681,24 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR,
return ConstantRange(std::move(L), std::move(U));
}
+Optional<ConstantRange>
+ConstantRange::exactIntersectWith(const ConstantRange &CR) const {
+ // TODO: This can be implemented more efficiently.
+ ConstantRange Result = intersectWith(CR);
+ if (Result == inverse().unionWith(CR.inverse()).inverse())
+ return Result;
+ return None;
+}
+
+Optional<ConstantRange>
+ConstantRange::exactUnionWith(const ConstantRange &CR) const {
+ // TODO: This can be implemented more efficiently.
+ ConstantRange Result = unionWith(CR);
+ if (Result == inverse().intersectWith(CR.inverse()).inverse())
+ return Result;
+ return None;
+}
+
ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
uint32_t ResultBitWidth) const {
switch (CastOp) {
@@ -1055,6 +1110,25 @@ ConstantRange::multiply(const ConstantRange &Other) const {
return UR.isSizeStrictlySmallerThan(SR) ? UR : SR;
}
+ConstantRange ConstantRange::smul_fast(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt Min = getSignedMin();
+ APInt Max = getSignedMax();
+ APInt OtherMin = Other.getSignedMin();
+ APInt OtherMax = Other.getSignedMax();
+
+ bool O1, O2, O3, O4;
+ auto Muls = {Min.smul_ov(OtherMin, O1), Min.smul_ov(OtherMax, O2),
+ Max.smul_ov(OtherMin, O3), Max.smul_ov(OtherMax, O4)};
+ if (O1 || O2 || O3 || O4)
+ return getFull();
+
+ auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
+ return getNonEmpty(std::min(Muls, Compare), std::max(Muls, Compare) + 1);
+}
+
ConstantRange
ConstantRange::smax(const ConstantRange &Other) const {
// X smax Y is: range(smax(X_smin, Y_smin),
@@ -1113,13 +1187,13 @@ ConstantRange::umin(const ConstantRange &Other) const {
ConstantRange
ConstantRange::udiv(const ConstantRange &RHS) const {
- if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isZero())
return getEmpty();
APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
APInt RHS_umin = RHS.getUnsignedMin();
- if (RHS_umin.isNullValue()) {
+ if (RHS_umin.isZero()) {
// We want the lowest value in RHS excluding zero. Usually that would be 1
// except for a range in the form of [X, 1) in which case it would be X.
if (RHS.getUpper() == 1)
@@ -1136,7 +1210,7 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
// We split up the LHS and RHS into positive and negative components
// and then also compute the positive and negative components of the result
// separately by combining division results with the appropriate signs.
- APInt Zero = APInt::getNullValue(getBitWidth());
+ APInt Zero = APInt::getZero(getBitWidth());
APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin);
ConstantRange NegFilter(SignedMin, Zero);
@@ -1159,12 +1233,12 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
// (For APInts the operation is well-defined and yields SignedMin.) We
// handle this by dropping either SignedMin from the LHS or -1 from the RHS.
APInt Lo = (NegL.Upper - 1).sdiv(NegR.Lower);
- if (NegL.Lower.isMinSignedValue() && NegR.Upper.isNullValue()) {
+ if (NegL.Lower.isMinSignedValue() && NegR.Upper.isZero()) {
// Remove -1 from the LHS. Skip if it's the only element, as this would
// leave us with an empty set.
- if (!NegR.Lower.isAllOnesValue()) {
+ if (!NegR.Lower.isAllOnes()) {
APInt AdjNegRUpper;
- if (RHS.Lower.isAllOnesValue())
+ if (RHS.Lower.isAllOnes())
// Negative part of [-1, X] without -1 is [SignedMin, X].
AdjNegRUpper = RHS.Upper;
else
@@ -1218,12 +1292,12 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
}
ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
- if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isZero())
return getEmpty();
if (const APInt *RHSInt = RHS.getSingleElement()) {
// UREM by null is UB.
- if (RHSInt->isNullValue())
+ if (RHSInt->isZero())
return getEmpty();
// Use APInt's implementation of UREM for single element ranges.
if (const APInt *LHSInt = getSingleElement())
@@ -1236,7 +1310,7 @@ ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
// L % R is <= L and < R.
APInt Upper = APIntOps::umin(getUnsignedMax(), RHS.getUnsignedMax() - 1) + 1;
- return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(Upper));
+ return getNonEmpty(APInt::getZero(getBitWidth()), std::move(Upper));
}
ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
@@ -1245,7 +1319,7 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
if (const APInt *RHSInt = RHS.getSingleElement()) {
// SREM by null is UB.
- if (RHSInt->isNullValue())
+ if (RHSInt->isZero())
return getEmpty();
// Use APInt's implementation of SREM for single element ranges.
if (const APInt *LHSInt = getSingleElement())
@@ -1257,10 +1331,10 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
APInt MaxAbsRHS = AbsRHS.getUnsignedMax();
// Modulus by zero is UB.
- if (MaxAbsRHS.isNullValue())
+ if (MaxAbsRHS.isZero())
return getEmpty();
- if (MinAbsRHS.isNullValue())
+ if (MinAbsRHS.isZero())
++MinAbsRHS;
APInt MinLHS = getSignedMin(), MaxLHS = getSignedMax();
@@ -1272,7 +1346,7 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
// L % R is <= L and < R.
APInt Upper = APIntOps::umin(MaxLHS, MaxAbsRHS - 1) + 1;
- return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(Upper));
+ return ConstantRange(APInt::getZero(getBitWidth()), std::move(Upper));
}
// Same basic logic as above, but the result is negative.
@@ -1291,7 +1365,7 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
}
ConstantRange ConstantRange::binaryNot() const {
- return ConstantRange(APInt::getAllOnesValue(getBitWidth())).sub(*this);
+ return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this);
}
ConstantRange
@@ -1306,7 +1380,7 @@ ConstantRange::binaryAnd(const ConstantRange &Other) const {
// TODO: replace this with something less conservative
APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
- return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
+ return getNonEmpty(APInt::getZero(getBitWidth()), std::move(umin) + 1);
}
ConstantRange
@@ -1321,7 +1395,7 @@ ConstantRange::binaryOr(const ConstantRange &Other) const {
// TODO: replace this with something less conservative
APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
- return getNonEmpty(std::move(umax), APInt::getNullValue(getBitWidth()));
+ return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth()));
}
ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
@@ -1333,9 +1407,9 @@ ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
return {*getSingleElement() ^ *Other.getSingleElement()};
// Special-case binary complement, since we can give a precise answer.
- if (Other.isSingleElement() && Other.getSingleElement()->isAllOnesValue())
+ if (Other.isSingleElement() && Other.getSingleElement()->isAllOnes())
return binaryNot();
- if (isSingleElement() && getSingleElement()->isAllOnesValue())
+ if (isSingleElement() && getSingleElement()->isAllOnes())
return Other.binaryNot();
// TODO: replace this with something less conservative
@@ -1347,24 +1421,33 @@ ConstantRange::shl(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return getEmpty();
- APInt max = getUnsignedMax();
- APInt Other_umax = Other.getUnsignedMax();
+ APInt Min = getUnsignedMin();
+ APInt Max = getUnsignedMax();
+ if (const APInt *RHS = Other.getSingleElement()) {
+ unsigned BW = getBitWidth();
+ if (RHS->uge(BW))
+ return getEmpty();
- // If we are shifting by maximum amount of
- // zero return return the original range.
- if (Other_umax.isNullValue())
- return *this;
- // there's overflow!
- if (Other_umax.ugt(max.countLeadingZeros()))
+ unsigned EqualLeadingBits = (Min ^ Max).countLeadingZeros();
+ if (RHS->ule(EqualLeadingBits))
+ return getNonEmpty(Min << *RHS, (Max << *RHS) + 1);
+
+ return getNonEmpty(APInt::getZero(BW),
+ APInt::getBitsSetFrom(BW, RHS->getZExtValue()) + 1);
+ }
+
+ APInt OtherMax = Other.getUnsignedMax();
+
+ // There's overflow!
+ if (OtherMax.ugt(Max.countLeadingZeros()))
return getFull();
// FIXME: implement the other tricky cases
- APInt min = getUnsignedMin();
- min <<= Other.getUnsignedMin();
- max <<= Other_umax;
+ Min <<= Other.getUnsignedMin();
+ Max <<= OtherMax;
- return ConstantRange(std::move(min), std::move(max) + 1);
+ return ConstantRange::getNonEmpty(std::move(Min), std::move(Max) + 1);
}
ConstantRange
@@ -1483,20 +1566,15 @@ ConstantRange ConstantRange::smul_sat(const ConstantRange &Other) const {
// [-1,4) * [-2,3) = min(-1*-2, -1*2, 3*-2, 3*2) = -6.
// Similarly for the upper bound, swapping min for max.
- APInt this_min = getSignedMin().sext(getBitWidth() * 2);
- APInt this_max = getSignedMax().sext(getBitWidth() * 2);
- APInt Other_min = Other.getSignedMin().sext(getBitWidth() * 2);
- APInt Other_max = Other.getSignedMax().sext(getBitWidth() * 2);
+ APInt Min = getSignedMin();
+ APInt Max = getSignedMax();
+ APInt OtherMin = Other.getSignedMin();
+ APInt OtherMax = Other.getSignedMax();
- auto L = {this_min * Other_min, this_min * Other_max, this_max * Other_min,
- this_max * Other_max};
+ auto L = {Min.smul_sat(OtherMin), Min.smul_sat(OtherMax),
+ Max.smul_sat(OtherMin), Max.smul_sat(OtherMax)};
auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
-
- // Note that we wanted to perform signed saturating multiplication,
- // so since we performed plain multiplication in twice the bitwidth,
- // we need to perform signed saturating truncation.
- return getNonEmpty(std::min(L, Compare).truncSSat(getBitWidth()),
- std::max(L, Compare).truncSSat(getBitWidth()) + 1);
+ return getNonEmpty(std::min(L, Compare), std::max(L, Compare) + 1);
}
ConstantRange ConstantRange::ushl_sat(const ConstantRange &Other) const {
@@ -1535,7 +1613,7 @@ ConstantRange ConstantRange::abs(bool IntMinIsPoison) const {
APInt Lo;
// Check whether the range crosses zero.
if (Upper.isStrictlyPositive() || !Lower.isStrictlyPositive())
- Lo = APInt::getNullValue(getBitWidth());
+ Lo = APInt::getZero(getBitWidth());
else
Lo = APIntOps::umin(Lower, -Upper + 1);
@@ -1565,7 +1643,7 @@ ConstantRange ConstantRange::abs(bool IntMinIsPoison) const {
return ConstantRange(-SMax, -SMin + 1);
// Range crosses zero.
- return ConstantRange(APInt::getNullValue(getBitWidth()),
+ return ConstantRange(APInt::getZero(getBitWidth()),
APIntOps::umax(-SMin, SMax) + 1);
}
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 6c75085a6678..c66cfb6e9ac1 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -95,7 +95,7 @@ bool Constant::isAllOnesValue() const {
// Check for FP which are bitcasted from -1 integers
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
- return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
+ return CFP->getValueAPF().bitcastToAPInt().isAllOnes();
// Check for constant splat vectors of 1 values.
if (getType()->isVectorTy())
@@ -112,7 +112,7 @@ bool Constant::isOneValue() const {
// Check for FP which are bitcasted from 1 integers
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
- return CFP->getValueAPF().bitcastToAPInt().isOneValue();
+ return CFP->getValueAPF().bitcastToAPInt().isOne();
// Check for constant splat vectors of 1 values.
if (getType()->isVectorTy())
@@ -129,7 +129,7 @@ bool Constant::isNotOneValue() const {
// Check for FP which are bitcasted from 1 integers
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
- return !CFP->getValueAPF().bitcastToAPInt().isOneValue();
+ return !CFP->getValueAPF().bitcastToAPInt().isOne();
// Check that vectors don't contain 1
if (auto *VTy = dyn_cast<FixedVectorType>(getType())) {
@@ -315,9 +315,11 @@ containsUndefinedElement(const Constant *C,
return false;
for (unsigned i = 0, e = cast<FixedVectorType>(VTy)->getNumElements();
- i != e; ++i)
- if (HasFn(C->getAggregateElement(i)))
- return true;
+ i != e; ++i) {
+ if (Constant *Elem = C->getAggregateElement(i))
+ if (HasFn(Elem))
+ return true;
+ }
}
return false;
@@ -366,9 +368,8 @@ Constant *Constant::getNullValue(Type *Ty) {
return ConstantFP::get(Ty->getContext(),
APFloat::getZero(APFloat::IEEEquad()));
case Type::PPC_FP128TyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat(APFloat::PPCDoubleDouble(),
- APInt::getNullValue(128)));
+ return ConstantFP::get(Ty->getContext(), APFloat(APFloat::PPCDoubleDouble(),
+ APInt::getZero(128)));
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
@@ -404,11 +405,10 @@ Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
Constant *Constant::getAllOnesValue(Type *Ty) {
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
return ConstantInt::get(Ty->getContext(),
- APInt::getAllOnesValue(ITy->getBitWidth()));
+ APInt::getAllOnes(ITy->getBitWidth()));
if (Ty->isFloatingPointTy()) {
- APFloat FL = APFloat::getAllOnesValue(Ty->getFltSemantics(),
- Ty->getPrimitiveSizeInBits());
+ APFloat FL = APFloat::getAllOnesValue(Ty->getFltSemantics());
return ConstantFP::get(Ty->getContext(), FL);
}
@@ -714,29 +714,41 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
return Result;
}
-/// If the specified constantexpr is dead, remove it. This involves recursively
-/// eliminating any dead users of the constantexpr.
-static bool removeDeadUsersOfConstant(const Constant *C) {
+/// Return true if the specified constantexpr is dead. This involves
+/// recursively traversing users of the constantexpr.
+/// If RemoveDeadUsers is true, also remove dead users at the same time.
+static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) {
if (isa<GlobalValue>(C)) return false; // Cannot remove this
- while (!C->use_empty()) {
- const Constant *User = dyn_cast<Constant>(C->user_back());
+ Value::const_user_iterator I = C->user_begin(), E = C->user_end();
+ while (I != E) {
+ const Constant *User = dyn_cast<Constant>(*I);
if (!User) return false; // Non-constant usage;
- if (!removeDeadUsersOfConstant(User))
+ if (!constantIsDead(User, RemoveDeadUsers))
return false; // Constant wasn't dead
+
+ // Just removed User, so the iterator was invalidated.
+ // Since we return immediately upon finding a live user, we can always
+ // restart from user_begin().
+ if (RemoveDeadUsers)
+ I = C->user_begin();
+ else
+ ++I;
}
- // If C is only used by metadata, it should not be preserved but should have
- // its uses replaced.
- if (C->isUsedByMetadata()) {
- const_cast<Constant *>(C)->replaceAllUsesWith(
- UndefValue::get(C->getType()));
+ if (RemoveDeadUsers) {
+ // If C is only used by metadata, it should not be preserved but should
+ // have its uses replaced.
+ if (C->isUsedByMetadata()) {
+ const_cast<Constant *>(C)->replaceAllUsesWith(
+ UndefValue::get(C->getType()));
+ }
+ const_cast<Constant *>(C)->destroyConstant();
}
- const_cast<Constant*>(C)->destroyConstant();
+
return true;
}
-
void Constant::removeDeadConstantUsers() const {
Value::const_user_iterator I = user_begin(), E = user_end();
Value::const_user_iterator LastNonDeadUser = E;
@@ -748,7 +760,7 @@ void Constant::removeDeadConstantUsers() const {
continue;
}
- if (!removeDeadUsersOfConstant(User)) {
+ if (!constantIsDead(User, /* RemoveDeadUsers= */ true)) {
// If the constant wasn't dead, remember that this was the last live use
// and move on to the next constant.
LastNonDeadUser = I;
@@ -764,6 +776,20 @@ void Constant::removeDeadConstantUsers() const {
}
}
+bool Constant::hasOneLiveUse() const {
+ unsigned NumUses = 0;
+ for (const Use &use : uses()) {
+ const Constant *User = dyn_cast<Constant>(use.getUser());
+ if (!User || !constantIsDead(User, /* RemoveDeadUsers= */ false)) {
+ ++NumUses;
+
+ if (NumUses > 1)
+ return false;
+ }
+ }
+ return NumUses == 1;
+}
+
Constant *Constant::replaceUndefsWith(Constant *C, Constant *Replacement) {
assert(C && Replacement && "Expected non-nullptr constant arguments");
Type *Ty = C->getType();
@@ -1430,12 +1456,12 @@ Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
Type *I32Ty = Type::getInt32Ty(VTy->getContext());
// Move scalar into vector.
- Constant *UndefV = UndefValue::get(VTy);
- V = ConstantExpr::getInsertElement(UndefV, V, ConstantInt::get(I32Ty, 0));
+ Constant *PoisonV = PoisonValue::get(VTy);
+ V = ConstantExpr::getInsertElement(PoisonV, V, ConstantInt::get(I32Ty, 0));
// Build shuffle mask to perform the splat.
SmallVector<int, 8> Zeros(EC.getKnownMinValue(), 0);
// Splat.
- return ConstantExpr::getShuffleVector(V, UndefV, Zeros);
+ return ConstantExpr::getShuffleVector(V, PoisonV, Zeros);
}
ConstantTokenNone *ConstantTokenNone::get(LLVMContext &Context) {
@@ -1508,20 +1534,6 @@ Constant *ConstantExpr::getShuffleMaskForBitcode() const {
return cast<ShuffleVectorConstantExpr>(this)->ShuffleMaskForBitcode;
}
-Constant *
-ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
- assert(Op->getType() == getOperand(OpNo)->getType() &&
- "Replacing operand with value of different type!");
- if (getOperand(OpNo) == Op)
- return const_cast<ConstantExpr*>(this);
-
- SmallVector<Constant*, 8> NewOps;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- NewOps.push_back(i == OpNo ? Op : getOperand(i));
-
- return getWithOperands(NewOps);
-}
-
Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
bool OnlyIfReduced, Type *SrcTy) const {
assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
@@ -3282,7 +3294,7 @@ bool ConstantDataSequential::isCString() const {
if (Str.back() != 0) return false;
// Other elements must be non-nul.
- return Str.drop_back().find(0) == StringRef::npos;
+ return !Str.drop_back().contains(0);
}
bool ConstantDataVector::isSplatData() const {
@@ -3480,7 +3492,7 @@ Value *ConstantExpr::handleOperandChangeImpl(Value *From, Value *ToV) {
NewOps, this, From, To, NumUpdated, OperandNo);
}
-Instruction *ConstantExpr::getAsInstruction() const {
+Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const {
SmallVector<Value *, 4> ValueOperands(operands());
ArrayRef<Value*> Ops(ValueOperands);
@@ -3498,40 +3510,43 @@ Instruction *ConstantExpr::getAsInstruction() const {
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
- return CastInst::Create((Instruction::CastOps)getOpcode(),
- Ops[0], getType());
+ return CastInst::Create((Instruction::CastOps)getOpcode(), Ops[0],
+ getType(), "", InsertBefore);
case Instruction::Select:
- return SelectInst::Create(Ops[0], Ops[1], Ops[2]);
+ return SelectInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::InsertElement:
- return InsertElementInst::Create(Ops[0], Ops[1], Ops[2]);
+ return InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::ExtractElement:
- return ExtractElementInst::Create(Ops[0], Ops[1]);
+ return ExtractElementInst::Create(Ops[0], Ops[1], "", InsertBefore);
case Instruction::InsertValue:
- return InsertValueInst::Create(Ops[0], Ops[1], getIndices());
+ return InsertValueInst::Create(Ops[0], Ops[1], getIndices(), "",
+ InsertBefore);
case Instruction::ExtractValue:
- return ExtractValueInst::Create(Ops[0], getIndices());
+ return ExtractValueInst::Create(Ops[0], getIndices(), "", InsertBefore);
case Instruction::ShuffleVector:
- return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask());
+ return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask(), "",
+ InsertBefore);
case Instruction::GetElementPtr: {
const auto *GO = cast<GEPOperator>(this);
if (GO->isInBounds())
- return GetElementPtrInst::CreateInBounds(GO->getSourceElementType(),
- Ops[0], Ops.slice(1));
+ return GetElementPtrInst::CreateInBounds(
+ GO->getSourceElementType(), Ops[0], Ops.slice(1), "", InsertBefore);
return GetElementPtrInst::Create(GO->getSourceElementType(), Ops[0],
- Ops.slice(1));
+ Ops.slice(1), "", InsertBefore);
}
case Instruction::ICmp:
case Instruction::FCmp:
return CmpInst::Create((Instruction::OtherOps)getOpcode(),
- (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]);
+ (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1],
+ "", InsertBefore);
case Instruction::FNeg:
- return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]);
+ return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0], "",
+ InsertBefore);
default:
assert(getNumOperands() == 2 && "Must be binary operator?");
- BinaryOperator *BO =
- BinaryOperator::Create((Instruction::BinaryOps)getOpcode(),
- Ops[0], Ops[1]);
+ BinaryOperator *BO = BinaryOperator::Create(
+ (Instruction::BinaryOps)getOpcode(), Ops[0], Ops[1], "", InsertBefore);
if (isa<OverflowingBinaryOperator>(BO)) {
BO->setHasNoUnsignedWrap(SubclassOptionalData &
OverflowingBinaryOperator::NoUnsignedWrap);
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 8a7060c148c9..905372982dc2 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -2460,7 +2460,7 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef A) {
- unwrap<Function>(F)->addAttribute(Idx, unwrap(A));
+ unwrap<Function>(F)->addAttributeAtIndex(Idx, unwrap(A));
}
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
@@ -2478,31 +2478,32 @@ void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
unsigned KindID) {
- return wrap(unwrap<Function>(F)->getAttribute(Idx,
- (Attribute::AttrKind)KindID));
+ return wrap(unwrap<Function>(F)->getAttributeAtIndex(
+ Idx, (Attribute::AttrKind)KindID));
}
LLVMAttributeRef LLVMGetStringAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- return wrap(unwrap<Function>(F)->getAttribute(Idx, StringRef(K, KLen)));
+ return wrap(
+ unwrap<Function>(F)->getAttributeAtIndex(Idx, StringRef(K, KLen)));
}
void LLVMRemoveEnumAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
unsigned KindID) {
- unwrap<Function>(F)->removeAttribute(Idx, (Attribute::AttrKind)KindID);
+ unwrap<Function>(F)->removeAttributeAtIndex(Idx, (Attribute::AttrKind)KindID);
}
void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- unwrap<Function>(F)->removeAttribute(Idx, StringRef(K, KLen));
+ unwrap<Function>(F)->removeAttributeAtIndex(Idx, StringRef(K, KLen));
}
void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
const char *V) {
Function *Func = unwrap<Function>(Fn);
Attribute Attr = Attribute::get(Func->getContext(), A, V);
- Func->addAttribute(AttributeList::FunctionIndex, Attr);
+ Func->addFnAttr(Attr);
}
/*--.. Operations on parameters ............................................--*/
@@ -2843,7 +2844,7 @@ unsigned LLVMGetNumArgOperands(LLVMValueRef Instr) {
if (FuncletPadInst *FPI = dyn_cast<FuncletPadInst>(unwrap(Instr))) {
return FPI->getNumArgOperands();
}
- return unwrap<CallBase>(Instr)->getNumArgOperands();
+ return unwrap<CallBase>(Instr)->arg_size();
}
/*--.. Call and invoke instructions ........................................--*/
@@ -2857,17 +2858,17 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
static_cast<CallingConv::ID>(CC));
}
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, LLVMAttributeIndex Idx,
unsigned align) {
auto *Call = unwrap<CallBase>(Instr);
Attribute AlignAttr =
Attribute::getWithAlignment(Call->getContext(), Align(align));
- Call->addAttribute(index, AlignAttr);
+ Call->addAttributeAtIndex(Idx, AlignAttr);
}
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef A) {
- unwrap<CallBase>(C)->addAttribute(Idx, unwrap(A));
+ unwrap<CallBase>(C)->addAttributeAtIndex(Idx, unwrap(A));
}
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
@@ -2888,24 +2889,25 @@ void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID) {
- return wrap(
- unwrap<CallBase>(C)->getAttribute(Idx, (Attribute::AttrKind)KindID));
+ return wrap(unwrap<CallBase>(C)->getAttributeAtIndex(
+ Idx, (Attribute::AttrKind)KindID));
}
LLVMAttributeRef LLVMGetCallSiteStringAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- return wrap(unwrap<CallBase>(C)->getAttribute(Idx, StringRef(K, KLen)));
+ return wrap(
+ unwrap<CallBase>(C)->getAttributeAtIndex(Idx, StringRef(K, KLen)));
}
void LLVMRemoveCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
unsigned KindID) {
- unwrap<CallBase>(C)->removeAttribute(Idx, (Attribute::AttrKind)KindID);
+ unwrap<CallBase>(C)->removeAttributeAtIndex(Idx, (Attribute::AttrKind)KindID);
}
void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- unwrap<CallBase>(C)->removeAttribute(Idx, StringRef(K, KLen));
+ unwrap<CallBase>(C)->removeAttributeAtIndex(Idx, StringRef(K, KLen));
}
LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr) {
@@ -3131,6 +3133,10 @@ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
}
+void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+ unwrap(Builder)->AddMetadataToInst(unwrap<Instruction>(Inst));
+}
+
void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder,
LLVMMetadataRef FPMathTag) {
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index 61d3b5e69e9e..ca7dafc814ce 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -32,8 +32,8 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU)
- : M(m), VMContext(M.getContext()), CUNode(CU),
- DeclareFn(nullptr), ValueFn(nullptr), LabelFn(nullptr),
+ : M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr),
+ ValueFn(nullptr), LabelFn(nullptr),
AllowUnresolvedNodes(AllowUnresolvedNodes) {}
void DIBuilder::trackIfUnresolved(MDNode *N) {
@@ -73,7 +73,8 @@ void DIBuilder::finalize() {
return;
}
- CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
+ if (!AllEnumTypes.empty())
+ CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
SmallVector<Metadata *, 16> RetainValues;
// Declarations and definitions of the same type may be retained. Some
@@ -164,12 +165,13 @@ DICompileUnit *DIBuilder::createCompileUnit(
static DIImportedEntity *
createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
Metadata *NS, DIFile *File, unsigned Line, StringRef Name,
+ DINodeArray Elements,
SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
if (Line)
assert(File && "Source location has line number but no file");
unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size();
auto *M = DIImportedEntity::get(C, Tag, Context, cast_or_null<DINode>(NS),
- File, Line, Name);
+ File, Line, Name, Elements);
if (EntitiesCount < C.pImpl->DIImportedEntitys.size())
// A new Imported Entity was just added to the context.
// Add it to the Imported Modules list.
@@ -179,36 +181,38 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DINamespace *NS, DIFile *File,
- unsigned Line) {
+ unsigned Line,
+ DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, NS, File, Line, StringRef(),
+ Context, NS, File, Line, StringRef(), Elements,
AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DIImportedEntity *NS,
- DIFile *File, unsigned Line) {
+ DIFile *File, unsigned Line,
+ DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, NS, File, Line, StringRef(),
+ Context, NS, File, Line, StringRef(), Elements,
AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M,
- DIFile *File, unsigned Line) {
+ DIFile *File, unsigned Line,
+ DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, M, File, Line, StringRef(),
+ Context, M, File, Line, StringRef(), Elements,
AllImportedModules);
}
-DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context,
- DINode *Decl,
- DIFile *File,
- unsigned Line,
- StringRef Name) {
+DIImportedEntity *
+DIBuilder::createImportedDeclaration(DIScope *Context, DINode *Decl,
+ DIFile *File, unsigned Line,
+ StringRef Name, DINodeArray Elements) {
// Make sure to use the unique identifier based metadata reference for
// types that have one.
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
- Context, Decl, File, Line, Name,
+ Context, Decl, File, Line, Name, Elements,
AllImportedModules);
}
@@ -250,7 +254,7 @@ DIEnumerator *DIBuilder::createEnumerator(StringRef Name, uint64_t Val,
Name);
}
-DIEnumerator *DIBuilder::createEnumerator(StringRef Name, APSInt Value) {
+DIEnumerator *DIBuilder::createEnumerator(StringRef Name, const APSInt &Value) {
assert(!Name.empty() && "Unable to create enumerator without name");
return DIEnumerator::get(VMContext, APInt(Value), Value.isUnsigned(), Name);
}
@@ -283,17 +287,16 @@ DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
0, 0, None, DINode::FlagZero);
}
-DIDerivedType *DIBuilder::createPointerType(
- DIType *PointeeTy,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- Optional<unsigned> DWARFAddressSpace,
- StringRef Name) {
+DIDerivedType *
+DIBuilder::createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
+ uint32_t AlignInBits,
+ Optional<unsigned> DWARFAddressSpace,
+ StringRef Name, DINodeArray Annotations) {
// FIXME: Why is there a name here?
return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
nullptr, 0, nullptr, PointeeTy, SizeInBits,
- AlignInBits, 0, DWARFAddressSpace,
- DINode::FlagZero);
+ AlignInBits, 0, DWARFAddressSpace, DINode::FlagZero,
+ nullptr, Annotations);
}
DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
@@ -306,11 +309,10 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
AlignInBits, 0, None, Flags, Base);
}
-DIDerivedType *DIBuilder::createReferenceType(
- unsigned Tag, DIType *RTy,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- Optional<unsigned> DWARFAddressSpace) {
+DIDerivedType *
+DIBuilder::createReferenceType(unsigned Tag, DIType *RTy, uint64_t SizeInBits,
+ uint32_t AlignInBits,
+ Optional<unsigned> DWARFAddressSpace) {
assert(RTy && "Unable to create reference type");
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, RTy,
SizeInBits, AlignInBits, 0, DWARFAddressSpace,
@@ -319,11 +321,12 @@ DIDerivedType *DIBuilder::createReferenceType(
DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
DIFile *File, unsigned LineNo,
- DIScope *Context,
- uint32_t AlignInBits) {
+ DIScope *Context, uint32_t AlignInBits,
+ DINodeArray Annotations) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
LineNo, getNonCompileUnitScope(Context), Ty, 0,
- AlignInBits, 0, None, DINode::FlagZero);
+ AlignInBits, 0, None, DINode::FlagZero, nullptr,
+ Annotations);
}
DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
@@ -341,19 +344,18 @@ DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
Metadata *ExtraData = ConstantAsMetadata::get(
ConstantInt::get(IntegerType::get(VMContext, 32), VBPtrOffset));
return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
- 0, Ty, BaseTy, 0, 0, BaseOffset, None,
- Flags, ExtraData);
+ 0, Ty, BaseTy, 0, 0, BaseOffset, None, Flags,
+ ExtraData);
}
-DIDerivedType *DIBuilder::createMemberType(DIScope *Scope, StringRef Name,
- DIFile *File, unsigned LineNumber,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- uint64_t OffsetInBits,
- DINode::DIFlags Flags, DIType *Ty) {
+DIDerivedType *DIBuilder::createMemberType(
+ DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty,
- SizeInBits, AlignInBits, OffsetInBits, None, Flags);
+ SizeInBits, AlignInBits, OffsetInBits, None, Flags,
+ nullptr, Annotations);
}
static ConstantAsMetadata *getConstantOrNull(Constant *C) {
@@ -375,14 +377,15 @@ DIDerivedType *DIBuilder::createVariantMemberType(
DIDerivedType *DIBuilder::createBitFieldMemberType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t OffsetInBits, uint64_t StorageOffsetInBits,
- DINode::DIFlags Flags, DIType *Ty) {
+ DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) {
Flags |= DINode::FlagBitField;
return DIDerivedType::get(
VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
- getNonCompileUnitScope(Scope), Ty, SizeInBits, /* AlignInBits */ 0,
+ getNonCompileUnitScope(Scope), Ty, SizeInBits, /*AlignInBits=*/0,
OffsetInBits, None, Flags,
ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64),
- StorageOffsetInBits)));
+ StorageOffsetInBits)),
+ Annotations);
}
DIDerivedType *
@@ -498,10 +501,12 @@ DICompositeType *DIBuilder::createUnionType(
return R;
}
-DICompositeType *DIBuilder::createVariantPart(
- DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
- uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags,
- DIDerivedType *Discriminator, DINodeArray Elements, StringRef UniqueIdentifier) {
+DICompositeType *
+DIBuilder::createVariantPart(DIScope *Scope, StringRef Name, DIFile *File,
+ unsigned LineNumber, uint64_t SizeInBits,
+ uint32_t AlignInBits, DINode::DIFlags Flags,
+ DIDerivedType *Discriminator, DINodeArray Elements,
+ StringRef UniqueIdentifier) {
auto *R = DICompositeType::get(
VMContext, dwarf::DW_TAG_variant_part, Name, File, LineNumber,
getNonCompileUnitScope(Scope), nullptr, SizeInBits, AlignInBits, 0, Flags,
@@ -542,16 +547,17 @@ DIDerivedType *DIBuilder::createSetType(DIScope *Scope, StringRef Name,
return R;
}
-DICompositeType *DIBuilder::createArrayType(
- uint64_t Size, uint32_t AlignInBits, DIType *Ty, DINodeArray Subscripts,
- PointerUnion<DIExpression *, DIVariable *> DL,
- PointerUnion<DIExpression *, DIVariable *> AS,
- PointerUnion<DIExpression *, DIVariable *> AL,
- PointerUnion<DIExpression *, DIVariable *> RK) {
+DICompositeType *
+DIBuilder::createArrayType(uint64_t Size, uint32_t AlignInBits, DIType *Ty,
+ DINodeArray Subscripts,
+ PointerUnion<DIExpression *, DIVariable *> DL,
+ PointerUnion<DIExpression *, DIVariable *> AS,
+ PointerUnion<DIExpression *, DIVariable *> AL,
+ PointerUnion<DIExpression *, DIVariable *> RK) {
auto *R = DICompositeType::get(
- VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0,
- nullptr, Ty, Size, AlignInBits, 0, DINode::FlagZero,
- Subscripts, 0, nullptr, nullptr, "", nullptr,
+ VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty, Size,
+ AlignInBits, 0, DINode::FlagZero, Subscripts, 0, nullptr, nullptr, "",
+ nullptr,
DL.is<DIExpression *>() ? (Metadata *)DL.get<DIExpression *>()
: (Metadata *)DL.get<DIVariable *>(),
AS.is<DIExpression *>() ? (Metadata *)AS.get<DIExpression *>()
@@ -628,12 +634,14 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIScope *Scope,
DICompositeType *DIBuilder::createReplaceableCompositeType(
unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
unsigned RuntimeLang, uint64_t SizeInBits, uint32_t AlignInBits,
- DINode::DIFlags Flags, StringRef UniqueIdentifier) {
+ DINode::DIFlags Flags, StringRef UniqueIdentifier,
+ DINodeArray Annotations) {
auto *RetTy =
DICompositeType::getTemporary(
VMContext, Tag, Name, F, Line, getNonCompileUnitScope(Scope), nullptr,
SizeInBits, AlignInBits, 0, Flags, nullptr, RuntimeLang, nullptr,
- nullptr, UniqueIdentifier)
+ nullptr, UniqueIdentifier, nullptr, nullptr, nullptr, nullptr,
+ nullptr, Annotations)
.release();
trackIfUnresolved(RetTy);
return RetTy;
@@ -701,15 +709,16 @@ static void checkGlobalVariableScope(DIScope *Context) {
DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
- unsigned LineNumber, DIType *Ty, bool IsLocalToUnit,
- bool isDefined, DIExpression *Expr,
- MDNode *Decl, MDTuple *TemplateParams, uint32_t AlignInBits) {
+ unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, bool isDefined,
+ DIExpression *Expr, MDNode *Decl, MDTuple *TemplateParams,
+ uint32_t AlignInBits, DINodeArray Annotations) {
checkGlobalVariableScope(Context);
auto *GV = DIGlobalVariable::getDistinct(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
- LineNumber, Ty, IsLocalToUnit, isDefined, cast_or_null<DIDerivedType>(Decl),
- TemplateParams, AlignInBits);
+ LineNumber, Ty, IsLocalToUnit, isDefined,
+ cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits,
+ Annotations);
if (!Expr)
Expr = createExpression();
auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr);
@@ -726,7 +735,8 @@ DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
return DIGlobalVariable::getTemporary(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
LineNumber, Ty, IsLocalToUnit, false,
- cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits)
+ cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits,
+ nullptr)
.release();
}
@@ -735,16 +745,16 @@ static DILocalVariable *createLocalVariable(
DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> &PreservedVariables,
DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
- uint32_t AlignInBits) {
+ uint32_t AlignInBits, DINodeArray Annotations = nullptr) {
// FIXME: Why getNonCompileUnitScope()?
// FIXME: Why is "!Context" okay here?
// FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
// the only valid scopes)?
DIScope *Context = getNonCompileUnitScope(Scope);
- auto *Node =
- DILocalVariable::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
- File, LineNo, Ty, ArgNo, Flags, AlignInBits);
+ auto *Node = DILocalVariable::get(
+ VMContext, cast_or_null<DILocalScope>(Context), Name, File, LineNo, Ty,
+ ArgNo, Flags, AlignInBits, Annotations);
if (AlwaysPreserve) {
// The optimizer may remove local variables. If there is an interest
// to preserve variable info in such situation then stash it in a
@@ -768,21 +778,20 @@ DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name,
DILocalVariable *DIBuilder::createParameterVariable(
DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
- unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags) {
+ unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
+ DINodeArray Annotations) {
assert(ArgNo && "Expected non-zero argument number for parameter");
return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo,
File, LineNo, Ty, AlwaysPreserve, Flags,
- /* AlignInBits */0);
+ /*AlignInBits=*/0, Annotations);
}
-DILabel *DIBuilder::createLabel(
- DIScope *Scope, StringRef Name, DIFile *File,
- unsigned LineNo, bool AlwaysPreserve) {
+DILabel *DIBuilder::createLabel(DIScope *Scope, StringRef Name, DIFile *File,
+ unsigned LineNo, bool AlwaysPreserve) {
DIScope *Context = getNonCompileUnitScope(Scope);
- auto *Node =
- DILabel::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
- File, LineNo);
+ auto *Node = DILabel::get(VMContext, cast_or_null<DILocalScope>(Context),
+ Name, File, LineNo);
if (AlwaysPreserve) {
/// The optimizer may remove labels. If there is an interest
@@ -806,7 +815,7 @@ DIExpression *DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
}
template <class... Ts>
-static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
+static DISubprogram *getSubprogram(bool IsDistinct, Ts &&...Args) {
if (IsDistinct)
return DISubprogram::getDistinct(std::forward<Ts>(Args)...);
return DISubprogram::get(std::forward<Ts>(Args)...);
@@ -817,13 +826,14 @@ DISubprogram *DIBuilder::createFunction(
unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
DITemplateParameterArray TParams, DISubprogram *Decl,
- DITypeArray ThrownTypes) {
+ DITypeArray ThrownTypes, DINodeArray Annotations) {
bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
auto *Node = getSubprogram(
/*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context),
Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags,
SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl,
- MDTuple::getTemporary(VMContext, None).release(), ThrownTypes);
+ MDTuple::getTemporary(VMContext, None).release(), ThrownTypes,
+ Annotations);
if (IsDefinition)
AllSubprograms.push_back(Node);
@@ -869,11 +879,11 @@ DISubprogram *DIBuilder::createMethod(
return SP;
}
-DICommonBlock *DIBuilder::createCommonBlock(
- DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, DIFile *File,
- unsigned LineNo) {
- return DICommonBlock::get(
- VMContext, Scope, Decl, Name, File, LineNo);
+DICommonBlock *DIBuilder::createCommonBlock(DIScope *Scope,
+ DIGlobalVariable *Decl,
+ StringRef Name, DIFile *File,
+ unsigned LineNo) {
+ return DICommonBlock::get(VMContext, Scope, Decl, Name, File, LineNo);
}
DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
@@ -929,9 +939,9 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
Instruction *InsertBefore) {
- return insertLabel(
- LabelInfo, DL, InsertBefore ? InsertBefore->getParent() : nullptr,
- InsertBefore);
+ return insertLabel(LabelInfo, DL,
+ InsertBefore ? InsertBefore->getParent() : nullptr,
+ InsertBefore);
}
Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
@@ -980,7 +990,8 @@ static Function *getDeclareIntrin(Module &M) {
Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
DIExpression *Expr, const DILocation *DL,
- BasicBlock *InsertBB, Instruction *InsertBefore) {
+ BasicBlock *InsertBB,
+ Instruction *InsertBefore) {
assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare");
assert(DL && "Expected debug loc");
assert(DL->getScope()->getSubprogram() ==
@@ -1023,9 +1034,9 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(
return B.CreateCall(ValueFn, Args);
}
-Instruction *DIBuilder::insertLabel(
- DILabel *LabelInfo, const DILocation *DL,
- BasicBlock *InsertBB, Instruction *InsertBefore) {
+Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
+ BasicBlock *InsertBB,
+ Instruction *InsertBefore) {
assert(LabelInfo && "empty or invalid DILabel* passed to dbg.label");
assert(DL && "Expected debug loc");
assert(DL->getScope()->getSubprogram() ==
@@ -1042,8 +1053,7 @@ Instruction *DIBuilder::insertLabel(
return B.CreateCall(LabelFn, Args);
}
-void DIBuilder::replaceVTableHolder(DICompositeType *&T,
- DIType *VTableHolder) {
+void DIBuilder::replaceVTableHolder(DICompositeType *&T, DIType *VTableHolder) {
{
TypedTrackingMDRef<DICompositeType> N(T);
N->replaceVTableHolder(VTableHolder);
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index ecd74449dc38..2ace18048262 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -151,6 +151,8 @@ PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
//===----------------------------------------------------------------------===//
const char *DataLayout::getManglingComponent(const Triple &T) {
+ if (T.isOSBinFormatGOFF())
+ return "-m:l";
if (T.isOSBinFormatMachO())
return "-m:o";
if (T.isOSWindows() && T.isOSBinFormatCOFF())
@@ -258,12 +260,12 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
while (!Desc.empty()) {
// Split at '-'.
std::pair<StringRef, StringRef> Split;
- if (Error Err = split(Desc, '-', Split))
+ if (Error Err = ::split(Desc, '-', Split))
return Err;
Desc = Split.second;
// Split at ':'.
- if (Error Err = split(Split.first, ':', Split))
+ if (Error Err = ::split(Split.first, ':', Split))
return Err;
// Aliases used below.
@@ -272,7 +274,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Tok == "ni") {
do {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
Rest = Split.second;
unsigned AS;
@@ -313,7 +315,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Rest.empty())
return reportError(
"Missing size specification for pointer in datalayout string");
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned PointerMemSize;
if (Error Err = getIntInBytes(Tok, PointerMemSize))
@@ -325,7 +327,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Rest.empty())
return reportError(
"Missing alignment specification for pointer in datalayout string");
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned PointerABIAlign;
if (Error Err = getIntInBytes(Tok, PointerABIAlign))
@@ -340,7 +342,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
// Preferred alignment.
unsigned PointerPrefAlign = PointerABIAlign;
if (!Rest.empty()) {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
if (Error Err = getIntInBytes(Tok, PointerPrefAlign))
return Err;
@@ -350,7 +352,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
// Now read the index. It is the second optional parameter here.
if (!Rest.empty()) {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
if (Error Err = getIntInBytes(Tok, IndexSize))
return Err;
@@ -391,7 +393,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Rest.empty())
return reportError(
"Missing alignment specification in datalayout string");
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned ABIAlign;
if (Error Err = getIntInBytes(Tok, ABIAlign))
@@ -408,7 +410,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
// Preferred alignment.
unsigned PrefAlign = ABIAlign;
if (!Rest.empty()) {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
if (Error Err = getIntInBytes(Tok, PrefAlign))
return Err;
@@ -437,7 +439,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
LegalIntWidths.push_back(Width);
if (Rest.empty())
break;
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
}
break;
@@ -500,6 +502,9 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
case 'e':
ManglingMode = MM_ELF;
break;
+ case 'l':
+ ManglingMode = MM_GOFF;
+ break;
case 'o':
ManglingMode = MM_MachO;
break;
@@ -702,12 +707,12 @@ unsigned DataLayout::getPointerSize(unsigned AS) const {
return getPointerAlignElem(AS).TypeByteWidth;
}
-unsigned DataLayout::getMaxPointerSize() const {
- unsigned MaxPointerSize = 0;
+unsigned DataLayout::getMaxIndexSize() const {
+ unsigned MaxIndexSize = 0;
for (auto &P : Pointers)
- MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth);
+ MaxIndexSize = std::max(MaxIndexSize, P.IndexWidth);
- return MaxPointerSize;
+ return MaxIndexSize;
}
unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
@@ -800,15 +805,11 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
// By default, use natural alignment for vector types. This is consistent
// with what clang and llvm-gcc do.
- // TODO: This should probably not be using the alloc size.
- unsigned Alignment =
- getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ //
// We're only calculating a natural alignment, so it doesn't have to be
// based on the full size for scalable vectors. Using the minimum element
// count should be enough here.
- Alignment *= cast<VectorType>(Ty)->getElementCount().getKnownMinValue();
- Alignment = PowerOf2Ceil(Alignment);
- return Align(Alignment);
+ return Align(PowerOf2Ceil(getTypeStoreSize(Ty).getKnownMinSize()));
}
case Type::X86_AMXTyID:
return Align(64);
@@ -818,7 +819,7 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
}
/// TODO: Remove this function once the transition to Align is over.
-unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
+uint64_t DataLayout::getABITypeAlignment(Type *Ty) const {
return getABITypeAlign(Ty).value();
}
@@ -827,7 +828,7 @@ Align DataLayout::getABITypeAlign(Type *Ty) const {
}
/// TODO: Remove this function once the transition to Align is over.
-unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
+uint64_t DataLayout::getPrefTypeAlignment(Type *Ty) const {
return getPrefTypeAlign(Ty).value();
}
@@ -900,6 +901,72 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
return Result;
}
+static void addElementIndex(SmallVectorImpl<APInt> &Indices, TypeSize ElemSize,
+ APInt &Offset) {
+ // Skip over scalable or zero size elements. Also skip element sizes larger
+ // than the positive index space, because the arithmetic below may not be
+ // correct in that case.
+ unsigned BitWidth = Offset.getBitWidth();
+ if (ElemSize.isScalable() || ElemSize == 0 ||
+ !isUIntN(BitWidth - 1, ElemSize)) {
+ Indices.push_back(APInt::getZero(BitWidth));
+ return;
+ }
+
+ APInt Index = Offset.sdiv(ElemSize);
+ Offset -= Index * ElemSize;
+ if (Offset.isNegative()) {
+ // Prefer a positive remaining offset to allow struct indexing.
+ --Index;
+ Offset += ElemSize;
+ assert(Offset.isNonNegative() && "Remaining offset shouldn't be negative");
+ }
+ Indices.push_back(Index);
+}
+
+SmallVector<APInt> DataLayout::getGEPIndicesForOffset(Type *&ElemTy,
+ APInt &Offset) const {
+ assert(ElemTy->isSized() && "Element type must be sized");
+ SmallVector<APInt> Indices;
+ addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset);
+ while (Offset != 0) {
+ if (auto *ArrTy = dyn_cast<ArrayType>(ElemTy)) {
+ ElemTy = ArrTy->getElementType();
+ addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset);
+ continue;
+ }
+
+ if (auto *VecTy = dyn_cast<VectorType>(ElemTy)) {
+ ElemTy = VecTy->getElementType();
+ unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedSize();
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElemSizeInBits % 8 != 0)
+ break;
+
+ addElementIndex(Indices, TypeSize::Fixed(ElemSizeInBits / 8), Offset);
+ continue;
+ }
+
+ if (auto *STy = dyn_cast<StructType>(ElemTy)) {
+ const StructLayout *SL = getStructLayout(STy);
+ uint64_t IntOffset = Offset.getZExtValue();
+ if (IntOffset >= SL->getSizeInBytes())
+ break;
+
+ unsigned Index = SL->getElementContainingOffset(IntOffset);
+ Offset -= SL->getElementOffset(Index);
+ ElemTy = STy->getElementType(Index);
+ Indices.push_back(APInt(32, Index));
+ continue;
+ }
+
+ // Can't index into non-aggregate type.
+ break;
+ }
+
+ return Indices;
+}
+
/// getPreferredAlign - Return the preferred alignment of the specified global.
/// This includes an explicitly requested alignment (if the global has one).
Align DataLayout::getPreferredAlign(const GlobalVariable *GV) const {
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 06c511f8530a..7c69fbf7085d 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -447,8 +447,7 @@ bool llvm::stripDebugInfo(Function &F) {
DenseMap<MDNode *, MDNode *> LoopIDsMap;
for (BasicBlock &BB : F) {
- for (auto II = BB.begin(), End = BB.end(); II != End;) {
- Instruction &I = *II++; // We may delete the instruction, increment now.
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
if (isa<DbgInfoIntrinsic>(&I)) {
I.eraseFromParent();
Changed = true;
@@ -909,6 +908,11 @@ void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder) {
unwrap(Builder)->finalize();
}
+void LLVMDIBuilderFinalizeSubprogram(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef subprogram) {
+ unwrap(Builder)->finalizeSubprogram(unwrapDI<DISubprogram>(subprogram));
+}
+
LLVMMetadataRef LLVMDIBuilderCreateCompileUnit(
LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang,
LLVMMetadataRef FileRef, const char *Producer, size_t ProducerLen,
@@ -1003,41 +1007,43 @@ LLVMDIBuilderCreateImportedModuleFromNamespace(LLVMDIBuilderRef Builder,
Line));
}
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromAlias(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef ImportedEntity,
- LLVMMetadataRef File,
- unsigned Line) {
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromAlias(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope,
+ LLVMMetadataRef ImportedEntity, LLVMMetadataRef File, unsigned Line,
+ LLVMMetadataRef *Elements, unsigned NumElements) {
+ auto Elts =
+ (NumElements > 0)
+ ? unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements})
+ : nullptr;
return wrap(unwrap(Builder)->createImportedModule(
- unwrapDI<DIScope>(Scope),
- unwrapDI<DIImportedEntity>(ImportedEntity),
- unwrapDI<DIFile>(File), Line));
-}
-
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromModule(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef M,
- LLVMMetadataRef File,
- unsigned Line) {
- return wrap(unwrap(Builder)->createImportedModule(unwrapDI<DIScope>(Scope),
- unwrapDI<DIModule>(M),
- unwrapDI<DIFile>(File),
- Line));
-}
-
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedDeclaration(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef Decl,
- LLVMMetadataRef File,
- unsigned Line,
- const char *Name, size_t NameLen) {
+ unwrapDI<DIScope>(Scope), unwrapDI<DIImportedEntity>(ImportedEntity),
+ unwrapDI<DIFile>(File), Line, Elts));
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromModule(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef M,
+ LLVMMetadataRef File, unsigned Line, LLVMMetadataRef *Elements,
+ unsigned NumElements) {
+ auto Elts =
+ (NumElements > 0)
+ ? unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements})
+ : nullptr;
+ return wrap(unwrap(Builder)->createImportedModule(
+ unwrapDI<DIScope>(Scope), unwrapDI<DIModule>(M), unwrapDI<DIFile>(File),
+ Line, Elts));
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateImportedDeclaration(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef Decl,
+ LLVMMetadataRef File, unsigned Line, const char *Name, size_t NameLen,
+ LLVMMetadataRef *Elements, unsigned NumElements) {
+ auto Elts =
+ (NumElements > 0)
+ ? unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements})
+ : nullptr;
return wrap(unwrap(Builder)->createImportedDeclaration(
- unwrapDI<DIScope>(Scope),
- unwrapDI<DINode>(Decl),
- unwrapDI<DIFile>(File), Line, {Name, NameLen}));
+ unwrapDI<DIScope>(Scope), unwrapDI<DINode>(Decl), unwrapDI<DIFile>(File),
+ Line, {Name, NameLen}, Elts));
}
LLVMMetadataRef
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 7b0dab799e1a..b20e581d283a 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -82,8 +82,8 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
Storage, Context.pImpl->DILocations);
}
-const
-DILocation *DILocation::getMergedLocations(ArrayRef<const DILocation *> Locs) {
+const DILocation *
+DILocation::getMergedLocations(ArrayRef<const DILocation *> Locs) {
if (Locs.empty())
return nullptr;
if (Locs.size() == 1)
@@ -139,7 +139,8 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
return DILocation::get(Result->getContext(), 0, 0, S, L);
}
-Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI) {
+Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF,
+ unsigned CI) {
std::array<unsigned, 3> Components = {BD, DF, CI};
uint64_t RemainingWork = 0U;
// We use RemainingWork to figure out if we have no remaining components to
@@ -147,7 +148,8 @@ Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, uns
// encode anything for the latter 2.
// Since any of the input components is at most 32 bits, their sum will be
// less than 34 bits, and thus RemainingWork won't overflow.
- RemainingWork = std::accumulate(Components.begin(), Components.end(), RemainingWork);
+ RemainingWork =
+ std::accumulate(Components.begin(), Components.end(), RemainingWork);
int I = 0;
unsigned Ret = 0;
@@ -179,7 +181,6 @@ void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
}
-
DINode::DIFlags DINode::getFlag(StringRef Flag) {
return StringSwitch<DIFlags>(Flag)
#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
@@ -546,8 +547,8 @@ DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
DEFINE_GETIMPL_LOOKUP(DIBasicType,
(Tag, Name, SizeInBits, AlignInBits, Encoding, Flags));
Metadata *Ops[] = {nullptr, nullptr, Name};
- DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding,
- Flags), Ops);
+ DEFINE_GETIMPL_STORE(DIBasicType,
+ (Tag, SizeInBits, AlignInBits, Encoding, Flags), Ops);
}
Optional<DIBasicType::Signedness> DIBasicType::getSignedness() const {
@@ -582,16 +583,17 @@ DIDerivedType *DIDerivedType::getImpl(
unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags, Metadata *ExtraData,
- StorageType Storage, bool ShouldCreate) {
+ Metadata *Annotations, StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIDerivedType,
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
- ExtraData));
- Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData};
- DEFINE_GETIMPL_STORE(
- DIDerivedType, (Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
- DWARFAddressSpace, Flags), Ops);
+ ExtraData, Annotations));
+ Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData, Annotations};
+ DEFINE_GETIMPL_STORE(DIDerivedType,
+ (Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
+ DWARFAddressSpace, Flags),
+ Ops);
}
DICompositeType *DICompositeType::getImpl(
@@ -601,22 +603,25 @@ DICompositeType *DICompositeType::getImpl(
Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank, StorageType Storage, bool ShouldCreate) {
+ Metadata *Rank, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
// Keep this in sync with buildODRType.
- DEFINE_GETIMPL_LOOKUP(
- DICompositeType,
- (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
- OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation, Associated, Allocated, Rank));
+ DEFINE_GETIMPL_LOOKUP(DICompositeType,
+ (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
+ AlignInBits, OffsetInBits, Flags, Elements,
+ RuntimeLang, VTableHolder, TemplateParams, Identifier,
+ Discriminator, DataLocation, Associated, Allocated,
+ Rank, Annotations));
Metadata *Ops[] = {File, Scope, Name, BaseType,
Elements, VTableHolder, TemplateParams, Identifier,
Discriminator, DataLocation, Associated, Allocated,
- Rank};
- DEFINE_GETIMPL_STORE(DICompositeType, (Tag, Line, RuntimeLang, SizeInBits,
- AlignInBits, OffsetInBits, Flags),
- Ops);
+ Rank, Annotations};
+ DEFINE_GETIMPL_STORE(
+ DICompositeType,
+ (Tag, Line, RuntimeLang, SizeInBits, AlignInBits, OffsetInBits, Flags),
+ Ops);
}
DICompositeType *DICompositeType::buildODRType(
@@ -626,7 +631,7 @@ DICompositeType *DICompositeType::buildODRType(
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank) {
+ Metadata *Rank, Metadata *Annotations) {
assert(!Identifier.getString().empty() && "Expected valid identifier");
if (!Context.isODRUniquingDebugTypes())
return nullptr;
@@ -636,7 +641,10 @@ DICompositeType *DICompositeType::buildODRType(
Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
VTableHolder, TemplateParams, &Identifier, Discriminator,
- DataLocation, Associated, Allocated, Rank);
+ DataLocation, Associated, Allocated, Rank, Annotations);
+
+ if (CT->getTag() != Tag)
+ return nullptr;
// Only mutate CT if it's a forward declaration and the new operands aren't.
assert(CT->getRawIdentifier() == &Identifier && "Wrong ODR identifier?");
@@ -649,7 +657,7 @@ DICompositeType *DICompositeType::buildODRType(
Metadata *Ops[] = {File, Scope, Name, BaseType,
Elements, VTableHolder, TemplateParams, &Identifier,
Discriminator, DataLocation, Associated, Allocated,
- Rank};
+ Rank, Annotations};
assert((std::end(Ops) - std::begin(Ops)) == (int)CT->getNumOperands() &&
"Mismatched number of operands");
for (unsigned I = 0, E = CT->getNumOperands(); I != E; ++I)
@@ -665,17 +673,21 @@ DICompositeType *DICompositeType::getODRType(
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank) {
+ Metadata *Rank, Metadata *Annotations) {
assert(!Identifier.getString().empty() && "Expected valid identifier");
if (!Context.isODRUniquingDebugTypes())
return nullptr;
auto *&CT = (*Context.pImpl->DITypeMap)[&Identifier];
- if (!CT)
+ if (!CT) {
CT = DICompositeType::getDistinct(
Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder,
TemplateParams, &Identifier, Discriminator, DataLocation, Associated,
- Allocated, Rank);
+ Allocated, Rank, Annotations);
+ } else {
+ if (CT->getTag() != Tag)
+ return nullptr;
+ }
return CT;
}
@@ -789,10 +801,14 @@ DICompileUnit::getNameTableKind(StringRef Str) {
const char *DICompileUnit::emissionKindString(DebugEmissionKind EK) {
switch (EK) {
- case NoDebug: return "NoDebug";
- case FullDebug: return "FullDebug";
- case LineTablesOnly: return "LineTablesOnly";
- case DebugDirectivesOnly: return "DebugDirectivesOnly";
+ case NoDebug:
+ return "NoDebug";
+ case FullDebug:
+ return "FullDebug";
+ case LineTablesOnly:
+ return "LineTablesOnly";
+ case DebugDirectivesOnly:
+ return "DebugDirectivesOnly";
}
return nullptr;
}
@@ -862,23 +878,28 @@ DISubprogram *DISubprogram::getImpl(
unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) {
+ Metadata *ThrownTypes, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DISubprogram,
(Scope, Name, LinkageName, File, Line, Type, ScopeLine,
ContainingType, VirtualIndex, ThisAdjustment, Flags,
SPFlags, Unit, TemplateParams, Declaration,
- RetainedNodes, ThrownTypes));
- SmallVector<Metadata *, 11> Ops = {
- File, Scope, Name, LinkageName, Type, Unit,
- Declaration, RetainedNodes, ContainingType, TemplateParams, ThrownTypes};
- if (!ThrownTypes) {
+ RetainedNodes, ThrownTypes, Annotations));
+ SmallVector<Metadata *, 12> Ops = {
+ File, Scope, Name, LinkageName,
+ Type, Unit, Declaration, RetainedNodes,
+ ContainingType, TemplateParams, ThrownTypes, Annotations};
+ if (!Annotations) {
Ops.pop_back();
- if (!TemplateParams) {
+ if (!ThrownTypes) {
Ops.pop_back();
- if (!ContainingType)
+ if (!TemplateParams) {
Ops.pop_back();
+ if (!ContainingType)
+ Ops.pop_back();
+ }
}
}
DEFINE_GETIMPL_STORE_N(
@@ -977,13 +998,14 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration,
Metadata *TemplateParams, uint32_t AlignInBits,
- StorageType Storage, bool ShouldCreate) {
+ Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DIGlobalVariable, (Scope, Name, LinkageName, File, Line,
- Type, IsLocalToUnit, IsDefinition,
- StaticDataMemberDeclaration,
- TemplateParams, AlignInBits));
+ DEFINE_GETIMPL_LOOKUP(
+ DIGlobalVariable,
+ (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations));
Metadata *Ops[] = {Scope,
Name,
File,
@@ -991,27 +1013,26 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Name,
LinkageName,
StaticDataMemberDeclaration,
- TemplateParams};
+ TemplateParams,
+ Annotations};
DEFINE_GETIMPL_STORE(DIGlobalVariable,
(Line, IsLocalToUnit, IsDefinition, AlignInBits), Ops);
}
-DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, Metadata *File,
- unsigned Line, Metadata *Type,
- unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits,
- StorageType Storage,
- bool ShouldCreate) {
+DILocalVariable *
+DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ Metadata *File, unsigned Line, Metadata *Type,
+ unsigned Arg, DIFlags Flags, uint32_t AlignInBits,
+ Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
// 64K ought to be enough for any frontend.
assert(Arg <= UINT16_MAX && "Expected argument number to fit in 16-bits");
assert(Scope && "Expected scope");
assert(isCanonical(Name) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DILocalVariable,
- (Scope, Name, File, Line, Type, Arg, Flags,
- AlignInBits));
- Metadata *Ops[] = {Scope, Name, File, Type};
+ DEFINE_GETIMPL_LOOKUP(DILocalVariable, (Scope, Name, File, Line, Type, Arg,
+ Flags, AlignInBits, Annotations));
+ Metadata *Ops[] = {Scope, Name, File, Type, Annotations};
DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags, AlignInBits), Ops);
}
@@ -1038,14 +1059,12 @@ Optional<uint64_t> DIVariable::getSizeInBits() const {
return None;
}
-DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, Metadata *File, unsigned Line,
- StorageType Storage,
+DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate) {
assert(Scope && "Expected scope");
assert(isCanonical(Name) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DILabel,
- (Scope, Name, File, Line));
+ DEFINE_GETIMPL_LOOKUP(DILabel, (Scope, Name, File, Line));
Metadata *Ops[] = {Scope, Name, File};
DEFINE_GETIMPL_STORE(DILabel, (Line), Ops);
}
@@ -1194,10 +1213,11 @@ bool DIExpression::isComplex() const {
// kind of complex computation occurs.
for (const auto &It : expr_ops()) {
switch (It.getOp()) {
- case dwarf::DW_OP_LLVM_tag_offset:
- case dwarf::DW_OP_LLVM_fragment:
- continue;
- default: return true;
+ case dwarf::DW_OP_LLVM_tag_offset:
+ case dwarf::DW_OP_LLVM_fragment:
+ continue;
+ default:
+ return true;
}
}
@@ -1346,8 +1366,7 @@ DIExpression *DIExpression::replaceArg(const DIExpression *Expr,
DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr,
SmallVectorImpl<uint64_t> &Ops,
- bool StackValue,
- bool EntryValue) {
+ bool StackValue, bool EntryValue) {
assert(Expr && "Can't prepend ops to this expression");
if (EntryValue) {
@@ -1442,7 +1461,8 @@ Optional<DIExpression *> DIExpression::createFragmentExpression(
if (Expr) {
for (auto Op : Expr->expr_ops()) {
switch (Op.getOp()) {
- default: break;
+ default:
+ break;
case dwarf::DW_OP_shr:
case dwarf::DW_OP_shra:
case dwarf::DW_OP_shl:
@@ -1476,6 +1496,45 @@ Optional<DIExpression *> DIExpression::createFragmentExpression(
return DIExpression::get(Expr->getContext(), Ops);
}
+std::pair<DIExpression *, const ConstantInt *>
+DIExpression::constantFold(const ConstantInt *CI) {
+ // Copy the APInt so we can modify it.
+ APInt NewInt = CI->getValue();
+ SmallVector<uint64_t, 8> Ops;
+
+ // Fold operators only at the beginning of the expression.
+ bool First = true;
+ bool Changed = false;
+ for (auto Op : expr_ops()) {
+ switch (Op.getOp()) {
+ default:
+ // We fold only the leading part of the expression; if we get to a part
+ // that we're going to copy unchanged, and haven't done any folding,
+ // then the entire expression is unchanged and we can return early.
+ if (!Changed)
+ return {this, CI};
+ First = false;
+ break;
+ case dwarf::DW_OP_LLVM_convert:
+ if (!First)
+ break;
+ Changed = true;
+ if (Op.getArg(1) == dwarf::DW_ATE_signed)
+ NewInt = NewInt.sextOrTrunc(Op.getArg(0));
+ else {
+ assert(Op.getArg(1) == dwarf::DW_ATE_unsigned && "Unexpected operand");
+ NewInt = NewInt.zextOrTrunc(Op.getArg(0));
+ }
+ continue;
+ }
+ Op.appendToVector(Ops);
+ }
+ if (!Changed)
+ return {this, CI};
+ return {DIExpression::get(getContext(), Ops),
+ ConstantInt::get(getContext(), NewInt)};
+}
+
uint64_t DIExpression::getNumLocationOperands() const {
uint64_t Result = 0;
for (auto ExprOp : expr_ops())
@@ -1552,21 +1611,22 @@ DIObjCProperty *DIObjCProperty::getImpl(
DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag,
Metadata *Scope, Metadata *Entity,
Metadata *File, unsigned Line,
- MDString *Name, StorageType Storage,
+ MDString *Name, Metadata *Elements,
+ StorageType Storage,
bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIImportedEntity,
- (Tag, Scope, Entity, File, Line, Name));
- Metadata *Ops[] = {Scope, Entity, Name, File};
+ (Tag, Scope, Entity, File, Line, Name, Elements));
+ Metadata *Ops[] = {Scope, Entity, Name, File, Elements};
DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops);
}
-DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType,
- unsigned Line, MDString *Name, MDString *Value,
- StorageType Storage, bool ShouldCreate) {
+DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
+ MDString *Name, MDString *Value, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIMacro, (MIType, Line, Name, Value));
- Metadata *Ops[] = { Name, Value };
+ Metadata *Ops[] = {Name, Value};
DEFINE_GETIMPL_STORE(DIMacro, (MIType, Line), Ops);
}
@@ -1574,9 +1634,8 @@ DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType,
unsigned Line, Metadata *File,
Metadata *Elements, StorageType Storage,
bool ShouldCreate) {
- DEFINE_GETIMPL_LOOKUP(DIMacroFile,
- (MIType, Line, File, Elements));
- Metadata *Ops[] = { File, Elements };
+ DEFINE_GETIMPL_LOOKUP(DIMacroFile, (MIType, Line, File, Elements));
+ Metadata *Ops[] = {File, Elements};
DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops);
}
@@ -1592,6 +1651,12 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
assert((!New || isa<ValueAsMetadata>(New)) &&
"DIArgList must be passed a ValueAsMetadata");
untrack();
+ bool Uniq = isUniqued();
+ if (Uniq) {
+ // We need to update the uniqueness once the Args are updated since they
+ // form the key to the DIArgLists store.
+ eraseFromStore();
+ }
ValueAsMetadata *NewVM = cast_or_null<ValueAsMetadata>(New);
for (ValueAsMetadata *&VM : Args) {
if (&VM == OldVMPtr) {
@@ -1601,6 +1666,10 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
VM = ValueAsMetadata::get(UndefValue::get(VM->getValue()->getType()));
}
}
+ if (Uniq) {
+ if (uniquify() != this)
+ storeDistinctInContext();
+ }
track();
}
void DIArgList::track() {
diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp
index 2fe634803894..7b40728a34e8 100644
--- a/llvm/lib/IR/DiagnosticHandler.cpp
+++ b/llvm/lib/IR/DiagnosticHandler.cpp
@@ -30,7 +30,7 @@ struct PassRemarksOpt {
Pattern = std::make_shared<Regex>(Val);
std::string RegexError;
if (!Pattern->isValid(RegexError))
- report_fatal_error("Invalid regular expression '" + Val +
+ report_fatal_error(Twine("Invalid regular expression '") + Val +
"' in -pass-remarks: " + RegexError,
false);
}
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index f92138274801..0a872a81f911 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
+//===- llvm/IR/DiagnosticInfo.cpp - Diagnostic Definitions ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -70,10 +70,8 @@ void DiagnosticInfoInlineAsm::print(DiagnosticPrinter &DP) const {
}
void DiagnosticInfoResourceLimit::print(DiagnosticPrinter &DP) const {
- DP << getResourceName() << " (" << getResourceSize() << ") exceeds limit";
- if (getResourceLimit() != 0)
- DP << " (" << getResourceLimit() << ')';
- DP << " in function '" << getFunction() << '\'';
+ DP << getResourceName() << " (" << getResourceSize() << ") exceeds limit ("
+ << getResourceLimit() << ") in function '" << getFunction() << '\'';
}
void DiagnosticInfoDebugMetadataVersion::print(DiagnosticPrinter &DP) const {
@@ -401,3 +399,35 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
+
+void llvm::diagnoseDontCall(const CallInst &CI) {
+ auto *F = CI.getCalledFunction();
+ if (!F)
+ return;
+
+ for (int i = 0; i != 2; ++i) {
+ auto AttrName = i == 0 ? "dontcall-error" : "dontcall-warn";
+ auto Sev = i == 0 ? DS_Error : DS_Warning;
+
+ if (F->hasFnAttribute(AttrName)) {
+ unsigned LocCookie = 0;
+ auto A = F->getFnAttribute(AttrName);
+ if (MDNode *MD = CI.getMetadata("srcloc"))
+ LocCookie =
+ mdconst::extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+ DiagnosticInfoDontCall D(F->getName(), A.getValueAsString(), Sev,
+ LocCookie);
+ F->getContext().diagnose(D);
+ }
+ }
+}
+
+void DiagnosticInfoDontCall::print(DiagnosticPrinter &DP) const {
+ DP << "call to " << getFunctionName() << " marked \"dontcall-";
+ if (getSeverity() == DiagnosticSeverity::DS_Error)
+ DP << "error\"";
+ else
+ DP << "warn\"";
+ if (!getNote().empty())
+ DP << ": " << getNote();
+}
diff --git a/llvm/lib/IR/DiagnosticPrinter.cpp b/llvm/lib/IR/DiagnosticPrinter.cpp
index 496bd18e78e2..49b8bbae53be 100644
--- a/llvm/lib/IR/DiagnosticPrinter.cpp
+++ b/llvm/lib/IR/DiagnosticPrinter.cpp
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
+//===- llvm/IR/DiagnosticPrinter.cpp - Diagnostic Printer -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp
index 516c702acec7..c6e0938e71a6 100644
--- a/llvm/lib/IR/FPEnv.cpp
+++ b/llvm/lib/IR/FPEnv.cpp
@@ -17,7 +17,7 @@
namespace llvm {
-Optional<RoundingMode> StrToRoundingMode(StringRef RoundingArg) {
+Optional<RoundingMode> convertStrToRoundingMode(StringRef RoundingArg) {
// For dynamic rounding mode, we use round to nearest but we will set the
// 'exact' SDNodeFlag so that the value will not be rounded.
return StringSwitch<Optional<RoundingMode>>(RoundingArg)
@@ -30,7 +30,7 @@ Optional<RoundingMode> StrToRoundingMode(StringRef RoundingArg) {
.Default(None);
}
-Optional<StringRef> RoundingModeToStr(RoundingMode UseRounding) {
+Optional<StringRef> convertRoundingModeToStr(RoundingMode UseRounding) {
Optional<StringRef> RoundingStr = None;
switch (UseRounding) {
case RoundingMode::Dynamic:
@@ -57,7 +57,8 @@ Optional<StringRef> RoundingModeToStr(RoundingMode UseRounding) {
return RoundingStr;
}
-Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef ExceptionArg) {
+Optional<fp::ExceptionBehavior>
+convertStrToExceptionBehavior(StringRef ExceptionArg) {
return StringSwitch<Optional<fp::ExceptionBehavior>>(ExceptionArg)
.Case("fpexcept.ignore", fp::ebIgnore)
.Case("fpexcept.maytrap", fp::ebMayTrap)
@@ -65,7 +66,8 @@ Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef ExceptionArg) {
.Default(None);
}
-Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
+Optional<StringRef>
+convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
Optional<StringRef> ExceptStr = None;
switch (UseExcept) {
case fp::ebStrict:
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 4034b1505bd0..82b20a8af91b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -140,25 +140,25 @@ bool Argument::hasPreallocatedAttr() const {
bool Argument::hasPassPointeeByValueCopyAttr() const {
if (!getType()->isPointerTy()) return false;
AttributeList Attrs = getParent()->getAttributes();
- return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated);
+ return Attrs.hasParamAttr(getArgNo(), Attribute::ByVal) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::InAlloca) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::Preallocated);
}
bool Argument::hasPointeeInMemoryValueAttr() const {
if (!getType()->isPointerTy())
return false;
AttributeList Attrs = getParent()->getAttributes();
- return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::StructRet) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::ByRef);
+ return Attrs.hasParamAttr(getArgNo(), Attribute::ByVal) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::StructRet) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::InAlloca) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::Preallocated) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::ByRef);
}
/// For a byval, sret, inalloca, or preallocated parameter, get the in-memory
/// parameter type.
-static Type *getMemoryParamAllocType(AttributeSet ParamAttrs, Type *ArgTy) {
+static Type *getMemoryParamAllocType(AttributeSet ParamAttrs) {
// FIXME: All the type carrying attributes are mutually exclusive, so there
// should be a single query to get the stored type that handles any of them.
if (Type *ByValTy = ParamAttrs.getByValType())
@@ -177,19 +177,19 @@ static Type *getMemoryParamAllocType(AttributeSet ParamAttrs, Type *ArgTy) {
uint64_t Argument::getPassPointeeByValueCopySize(const DataLayout &DL) const {
AttributeSet ParamAttrs =
- getParent()->getAttributes().getParamAttributes(getArgNo());
- if (Type *MemTy = getMemoryParamAllocType(ParamAttrs, getType()))
+ getParent()->getAttributes().getParamAttrs(getArgNo());
+ if (Type *MemTy = getMemoryParamAllocType(ParamAttrs))
return DL.getTypeAllocSize(MemTy);
return 0;
}
Type *Argument::getPointeeInMemoryValueType() const {
AttributeSet ParamAttrs =
- getParent()->getAttributes().getParamAttributes(getArgNo());
- return getMemoryParamAllocType(ParamAttrs, getType());
+ getParent()->getAttributes().getParamAttrs(getArgNo());
+ return getMemoryParamAllocType(ParamAttrs);
}
-unsigned Argument::getParamAlignment() const {
+uint64_t Argument::getParamAlignment() const {
assert(getType()->isPointerTy() && "Only pointers have alignments");
return getParent()->getParamAlignment(getArgNo());
}
@@ -278,8 +278,8 @@ bool Argument::hasSExtAttr() const {
bool Argument::onlyReadsMemory() const {
AttributeList Attrs = getParent()->getAttributes();
- return Attrs.hasParamAttribute(getArgNo(), Attribute::ReadOnly) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone);
+ return Attrs.hasParamAttr(getArgNo(), Attribute::ReadOnly) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::ReadNone);
}
void Argument::addAttrs(AttrBuilder &B) {
@@ -354,7 +354,7 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty,
B.addAttribute("frame-pointer", "all");
break;
}
- F->addAttributes(AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
return F;
}
@@ -529,101 +529,144 @@ void Function::dropAllReferences() {
clearMetadata();
}
-void Function::addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+void Function::addAttributeAtIndex(unsigned i, Attribute Attr) {
+ AttributeSets = AttributeSets.addAttributeAtIndex(getContext(), i, Attr);
}
-void Function::addAttribute(unsigned i, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Attr);
- setAttributes(PAL);
+void Function::addFnAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.addFnAttribute(getContext(), Kind);
}
-void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttributes(getContext(), i, Attrs);
- setAttributes(PAL);
+void Function::addFnAttr(StringRef Kind, StringRef Val) {
+ AttributeSets = AttributeSets.addFnAttribute(getContext(), Kind, Val);
+}
+
+void Function::addFnAttr(Attribute Attr) {
+ AttributeSets = AttributeSets.addFnAttribute(getContext(), Attr);
+}
+
+void Function::addFnAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.addFnAttributes(getContext(), Attrs);
+}
+
+void Function::addRetAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.addRetAttribute(getContext(), Kind);
+}
+
+void Function::addRetAttr(Attribute Attr) {
+ AttributeSets = AttributeSets.addRetAttribute(getContext(), Attr);
+}
+
+void Function::addRetAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.addRetAttributes(getContext(), Attrs);
}
void Function::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addParamAttribute(getContext(), ArgNo, Kind);
}
void Function::addParamAttr(unsigned ArgNo, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addParamAttribute(getContext(), ArgNo, Attr);
}
void Function::addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttributes(getContext(), ArgNo, Attrs);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addParamAttributes(getContext(), ArgNo, Attrs);
}
-void Function::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+void Function::removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.removeAttributeAtIndex(getContext(), i, Kind);
}
-void Function::removeAttribute(unsigned i, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+void Function::removeAttributeAtIndex(unsigned i, StringRef Kind) {
+ AttributeSets = AttributeSets.removeAttributeAtIndex(getContext(), i, Kind);
}
-void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttributes(getContext(), i, Attrs);
- setAttributes(PAL);
+void Function::removeFnAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.removeFnAttribute(getContext(), Kind);
+}
+
+void Function::removeFnAttr(StringRef Kind) {
+ AttributeSets = AttributeSets.removeFnAttribute(getContext(), Kind);
+}
+
+void Function::removeFnAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.removeFnAttributes(getContext(), Attrs);
+}
+
+void Function::removeRetAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.removeRetAttribute(getContext(), Kind);
+}
+
+void Function::removeRetAttr(StringRef Kind) {
+ AttributeSets = AttributeSets.removeRetAttribute(getContext(), Kind);
+}
+
+void Function::removeRetAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.removeRetAttributes(getContext(), Attrs);
}
void Function::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.removeParamAttribute(getContext(), ArgNo, Kind);
}
void Function::removeParamAttr(unsigned ArgNo, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.removeParamAttribute(getContext(), ArgNo, Kind);
}
void Function::removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs);
- setAttributes(PAL);
+ AttributeSets =
+ AttributeSets.removeParamAttributes(getContext(), ArgNo, Attrs);
+}
+
+void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) {
+ AttributeSets =
+ AttributeSets.addDereferenceableParamAttr(getContext(), ArgNo, Bytes);
}
-void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+bool Function::hasFnAttribute(Attribute::AttrKind Kind) const {
+ return AttributeSets.hasFnAttr(Kind);
}
-void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableParamAttr(getContext(), ArgNo, Bytes);
- setAttributes(PAL);
+bool Function::hasFnAttribute(StringRef Kind) const {
+ return AttributeSets.hasFnAttr(Kind);
+}
+
+bool Function::hasRetAttribute(Attribute::AttrKind Kind) const {
+ return AttributeSets.hasRetAttr(Kind);
+}
+
+bool Function::hasParamAttribute(unsigned ArgNo,
+ Attribute::AttrKind Kind) const {
+ return AttributeSets.hasParamAttr(ArgNo, Kind);
+}
+
+Attribute Function::getAttributeAtIndex(unsigned i,
+ Attribute::AttrKind Kind) const {
+ return AttributeSets.getAttributeAtIndex(i, Kind);
+}
+
+Attribute Function::getAttributeAtIndex(unsigned i, StringRef Kind) const {
+ return AttributeSets.getAttributeAtIndex(i, Kind);
}
-void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+Attribute Function::getFnAttribute(Attribute::AttrKind Kind) const {
+ return AttributeSets.getFnAttr(Kind);
+}
+
+Attribute Function::getFnAttribute(StringRef Kind) const {
+ return AttributeSets.getFnAttr(Kind);
+}
+
+/// gets the specified attribute from the list of attributes.
+Attribute Function::getParamAttribute(unsigned ArgNo,
+ Attribute::AttrKind Kind) const {
+ return AttributeSets.getParamAttr(ArgNo, Kind);
}
void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo,
uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullParamAttr(getContext(), ArgNo, Bytes);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addDereferenceableOrNullParamAttr(getContext(),
+ ArgNo, Bytes);
}
DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const {
@@ -936,7 +979,8 @@ enum IIT_Info {
IIT_BF16 = 48,
IIT_STRUCT9 = 49,
IIT_V256 = 50,
- IIT_AMX = 51
+ IIT_AMX = 51,
+ IIT_PPCF128 = 52
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -983,6 +1027,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_F128:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Quad, 0));
return;
+ case IIT_PPCF128:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::PPCQuad, 0));
+ return;
case IIT_I1:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
return;
@@ -1207,6 +1254,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::Float: return Type::getFloatTy(Context);
case IITDescriptor::Double: return Type::getDoubleTy(Context);
case IITDescriptor::Quad: return Type::getFP128Ty(Context);
+ case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context);
case IITDescriptor::Integer:
return IntegerType::get(Context, D.Integer_Width);
@@ -1389,6 +1437,7 @@ static bool matchIntrinsicType(
case IITDescriptor::Float: return !Ty->isFloatTy();
case IITDescriptor::Double: return !Ty->isDoubleTy();
case IITDescriptor::Quad: return !Ty->isFP128Ty();
+ case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty();
case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
case IITDescriptor::Vector: {
VectorType *VT = dyn_cast<VectorType>(Ty);
@@ -1403,11 +1452,6 @@ static bool matchIntrinsicType(
if (!PT->isOpaque())
return matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
DeferredChecks, IsDeferredCheck);
- // If typed pointers are supported, do not allow using opaque pointer in
- // place of fixed pointer type. This would make the intrinsic signature
- // non-unique.
- if (Ty->getContext().supportsTypedPointers())
- return true;
// Consume IIT descriptors relating to the pointer element type.
while (Infos.front().Kind == IITDescriptor::Pointer)
Infos = Infos.slice(1);
@@ -1525,11 +1569,8 @@ static bool matchIntrinsicType(
if (!ThisArgType || !ReferenceType)
return true;
- if (!ThisArgType->isOpaque())
- return ThisArgType->getElementType() != ReferenceType->getElementType();
- // If typed pointers are supported, do not allow opaque pointer to ensure
- // uniqueness.
- return Ty->getContext().supportsTypedPointers();
+ return !ThisArgType->isOpaqueOrPointeeTypeMatches(
+ ReferenceType->getElementType());
}
case IITDescriptor::VecOfAnyPtrsToElt: {
unsigned RefArgNumber = D.getRefArgNumber();
@@ -1702,8 +1743,8 @@ Optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
/// and llvm.compiler.used variables.
bool Function::hasAddressTaken(const User **PutOffender,
bool IgnoreCallbackUses,
- bool IgnoreAssumeLikeCalls,
- bool IgnoreLLVMUsed) const {
+ bool IgnoreAssumeLikeCalls, bool IgnoreLLVMUsed,
+ bool IgnoreARCAttachedCall) const {
for (const Use &U : uses()) {
const User *FU = U.getUser();
if (isa<BlockAddress>(FU))
@@ -1747,6 +1788,11 @@ bool Function::hasAddressTaken(const User **PutOffender,
return true;
}
if (!Call->isCallee(&U)) {
+ if (IgnoreARCAttachedCall &&
+ Call->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall,
+ U.getOperandNo()))
+ continue;
+
if (PutOffender)
*PutOffender = FU;
return true;
@@ -1846,10 +1892,9 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
void Function::setEntryCount(ProfileCount Count,
const DenseSet<GlobalValue::GUID> *S) {
- assert(Count.hasValue());
#if !defined(NDEBUG)
auto PrevCount = getEntryCount();
- assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType());
+ assert(!PrevCount.hasValue() || PrevCount->getType() == Count.getType());
#endif
auto ImportGUIDs = getImportGUIDs();
@@ -1867,7 +1912,7 @@ void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type,
setEntryCount(ProfileCount(Count, Type), Imports);
}
-ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
+Optional<ProfileCount> Function::getEntryCount(bool AllowSynthetic) const {
MDNode *MD = getMetadata(LLVMContext::MD_prof);
if (MD && MD->getOperand(0))
if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
@@ -1877,7 +1922,7 @@ ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
// A value of -1 is used for SamplePGO when there were no samples.
// Treat this the same as unknown.
if (Count == (uint64_t)-1)
- return ProfileCount::getInvalid();
+ return None;
return ProfileCount(Count, PCT_Real);
} else if (AllowSynthetic &&
MDS->getString().equals("synthetic_function_entry_count")) {
@@ -1886,7 +1931,7 @@ ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
return ProfileCount(Count, PCT_Synthetic);
}
}
- return ProfileCount::getInvalid();
+ return None;
}
DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
diff --git a/llvm/lib/IR/GCStrategy.cpp b/llvm/lib/IR/GCStrategy.cpp
index 25dad5bec9ef..f3bc5b74f8fd 100644
--- a/llvm/lib/IR/GCStrategy.cpp
+++ b/llvm/lib/IR/GCStrategy.cpp
@@ -18,3 +18,21 @@ using namespace llvm;
LLVM_INSTANTIATE_REGISTRY(GCRegistry)
GCStrategy::GCStrategy() = default;
+
+std::unique_ptr<GCStrategy> llvm::getGCStrategy(const StringRef Name) {
+ for (auto &S : GCRegistry::entries())
+ if (S.getName() == Name)
+ return S.instantiate();
+
+ if (GCRegistry::begin() == GCRegistry::end()) {
+ // In normal operation, the registry should not be empty. There should
+ // be the builtin GCs if nothing else. The most likely scenario here is
+ // that we got here without running the initializers used by the Registry
+ // itself and it's registration mechanism.
+ const std::string error =
+ std::string("unsupported GC: ") + Name.str() +
+ " (did you remember to link and initialize the library?)";
+ report_fatal_error(error);
+ } else
+ report_fatal_error(std::string("unsupported GC: ") + Name.str());
+}
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index b1c6dcc6672d..9f38288095e3 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -162,7 +162,7 @@ std::string GlobalValue::getGlobalIdentifier() const {
StringRef GlobalValue::getSection() const {
if (auto *GA = dyn_cast<GlobalAlias>(this)) {
// In general we cannot compute this at the IR level, but we try.
- if (const GlobalObject *GO = GA->getBaseObject())
+ if (const GlobalObject *GO = GA->getAliaseeObject())
return GO->getSection();
return "";
}
@@ -172,7 +172,7 @@ StringRef GlobalValue::getSection() const {
const Comdat *GlobalValue::getComdat() const {
if (auto *GA = dyn_cast<GlobalAlias>(this)) {
// In general we cannot compute this at the IR level, but we try.
- if (const GlobalObject *GO = GA->getBaseObject())
+ if (const GlobalObject *GO = GA->getAliaseeObject())
return const_cast<GlobalObject *>(GO)->getComdat();
return nullptr;
}
@@ -235,7 +235,7 @@ bool GlobalValue::isDeclaration() const {
return F->empty() && !F->isMaterializable();
// Aliases and ifuncs are always definitions.
- assert(isa<GlobalIndirectSymbol>(this));
+ assert(isa<GlobalAlias>(this) || isa<GlobalIFunc>(this));
return false;
}
@@ -280,14 +280,44 @@ bool GlobalObject::canIncreaseAlignment() const {
return true;
}
-const GlobalObject *GlobalValue::getBaseObject() const {
- if (auto *GO = dyn_cast<GlobalObject>(this))
+static const GlobalObject *
+findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
+ if (auto *GO = dyn_cast<GlobalObject>(C))
return GO;
- if (auto *GA = dyn_cast<GlobalIndirectSymbol>(this))
- return GA->getBaseObject();
+ if (auto *GA = dyn_cast<GlobalAlias>(C))
+ if (Aliases.insert(GA).second)
+ return findBaseObject(GA->getOperand(0), Aliases);
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Add: {
+ auto *LHS = findBaseObject(CE->getOperand(0), Aliases);
+ auto *RHS = findBaseObject(CE->getOperand(1), Aliases);
+ if (LHS && RHS)
+ return nullptr;
+ return LHS ? LHS : RHS;
+ }
+ case Instruction::Sub: {
+ if (findBaseObject(CE->getOperand(1), Aliases))
+ return nullptr;
+ return findBaseObject(CE->getOperand(0), Aliases);
+ }
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ return findBaseObject(CE->getOperand(0), Aliases);
+ default:
+ break;
+ }
+ }
return nullptr;
}
+const GlobalObject *GlobalValue::getAliaseeObject() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return findBaseObject(this, Aliases);
+}
+
bool GlobalValue::isAbsoluteSymbolRef() const {
auto *GO = dyn_cast<GlobalObject>(this);
if (!GO)
@@ -421,63 +451,15 @@ void GlobalVariable::dropAllReferences() {
}
//===----------------------------------------------------------------------===//
-// GlobalIndirectSymbol Implementation
-//===----------------------------------------------------------------------===//
-
-GlobalIndirectSymbol::GlobalIndirectSymbol(Type *Ty, ValueTy VTy,
- unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name,
- Constant *Symbol)
- : GlobalValue(Ty, VTy, &Op<0>(), 1, Linkage, Name, AddressSpace) {
- Op<0>() = Symbol;
-}
-
-static const GlobalObject *
-findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
- if (auto *GO = dyn_cast<GlobalObject>(C))
- return GO;
- if (auto *GA = dyn_cast<GlobalAlias>(C))
- if (Aliases.insert(GA).second)
- return findBaseObject(GA->getOperand(0), Aliases);
- if (auto *CE = dyn_cast<ConstantExpr>(C)) {
- switch (CE->getOpcode()) {
- case Instruction::Add: {
- auto *LHS = findBaseObject(CE->getOperand(0), Aliases);
- auto *RHS = findBaseObject(CE->getOperand(1), Aliases);
- if (LHS && RHS)
- return nullptr;
- return LHS ? LHS : RHS;
- }
- case Instruction::Sub: {
- if (findBaseObject(CE->getOperand(1), Aliases))
- return nullptr;
- return findBaseObject(CE->getOperand(0), Aliases);
- }
- case Instruction::IntToPtr:
- case Instruction::PtrToInt:
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- return findBaseObject(CE->getOperand(0), Aliases);
- default:
- break;
- }
- }
- return nullptr;
-}
-
-const GlobalObject *GlobalIndirectSymbol::getBaseObject() const {
- DenseSet<const GlobalAlias *> Aliases;
- return findBaseObject(getOperand(0), Aliases);
-}
-
-//===----------------------------------------------------------------------===//
// GlobalAlias Implementation
//===----------------------------------------------------------------------===//
GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
const Twine &Name, Constant *Aliasee,
Module *ParentModule)
- : GlobalIndirectSymbol(Ty, Value::GlobalAliasVal, AddressSpace, Link, Name,
- Aliasee) {
+ : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name,
+ AddressSpace) {
+ setAliasee(Aliasee);
if (ParentModule)
ParentModule->getAliasList().push_back(this);
}
@@ -521,7 +503,12 @@ void GlobalAlias::eraseFromParent() {
void GlobalAlias::setAliasee(Constant *Aliasee) {
assert((!Aliasee || Aliasee->getType() == getType()) &&
"Alias and aliasee types should match!");
- setIndirectSymbol(Aliasee);
+ Op<0>().set(Aliasee);
+}
+
+const GlobalObject *GlobalAlias::getAliaseeObject() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return findBaseObject(getOperand(0), Aliases);
}
//===----------------------------------------------------------------------===//
@@ -531,8 +518,9 @@ void GlobalAlias::setAliasee(Constant *Aliasee) {
GlobalIFunc::GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
const Twine &Name, Constant *Resolver,
Module *ParentModule)
- : GlobalIndirectSymbol(Ty, Value::GlobalIFuncVal, AddressSpace, Link, Name,
- Resolver) {
+ : GlobalObject(Ty, Value::GlobalIFuncVal, &Op<0>(), 1, Link, Name,
+ AddressSpace) {
+ setResolver(Resolver);
if (ParentModule)
ParentModule->getIFuncList().push_back(this);
}
@@ -550,3 +538,8 @@ void GlobalIFunc::removeFromParent() {
void GlobalIFunc::eraseFromParent() {
getParent()->getIFuncList().erase(getIterator());
}
+
+const Function *GlobalIFunc::getResolverFunction() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return dyn_cast<Function>(findBaseObject(getResolver(), Aliases));
+}
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 0f4945bad5ab..98f6ccf81973 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -94,11 +94,22 @@ Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) {
}
Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) {
- if (isa<ScalableVectorType>(DstType))
- return CreateIntrinsic(Intrinsic::experimental_stepvector, {DstType}, {},
- nullptr, Name);
-
Type *STy = DstType->getScalarType();
+ if (isa<ScalableVectorType>(DstType)) {
+ Type *StepVecType = DstType;
+ // TODO: We expect this special case (element type < 8 bits) to be
+ // temporary - once the intrinsic properly supports < 8 bits this code
+ // can be removed.
+ if (STy->getScalarSizeInBits() < 8)
+ StepVecType =
+ VectorType::get(getInt8Ty(), cast<ScalableVectorType>(DstType));
+ Value *Res = CreateIntrinsic(Intrinsic::experimental_stepvector,
+ {StepVecType}, {}, nullptr, Name);
+ if (StepVecType != DstType)
+ Res = CreateTrunc(Res, DstType);
+ return Res;
+ }
+
unsigned NumEls = cast<FixedVectorType>(DstType)->getNumElements();
// Create a vector of consecutive numbers from zero to VF.
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 937dc6957806..a4659da7e807 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -141,6 +141,10 @@ bool Instruction::hasNoSignedWrap() const {
return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
}
+bool Instruction::hasPoisonGeneratingFlags() const {
+ return cast<Operator>(this)->hasPoisonGeneratingFlags();
+}
+
void Instruction::dropPoisonGeneratingFlags() {
switch (getOpcode()) {
case Instruction::Add:
@@ -163,6 +167,8 @@ void Instruction::dropPoisonGeneratingFlags() {
break;
}
// TODO: FastMathFlags!
+
+ assert(!hasPoisonGeneratingFlags() && "must be kept in sync");
}
void Instruction::dropUndefImplyingAttrsAndUnknownMetadata(
@@ -178,9 +184,9 @@ void Instruction::dropUndefImplyingAttrsAndUnknownMetadata(
if (AL.isEmpty())
return;
AttrBuilder UBImplyingAttributes = AttributeFuncs::getUBImplyingAttributes();
- for (unsigned ArgNo = 0; ArgNo < CB->getNumArgOperands(); ArgNo++)
+ for (unsigned ArgNo = 0; ArgNo < CB->arg_size(); ArgNo++)
CB->removeParamAttrs(ArgNo, UBImplyingAttributes);
- CB->removeAttributes(AttributeList::ReturnIndex, UBImplyingAttributes);
+ CB->removeRetAttrs(UBImplyingAttributes);
}
bool Instruction::isExact() const {
@@ -307,20 +313,20 @@ void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) {
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(V))
if (auto *DestGEP = dyn_cast<GetElementPtrInst>(this))
- DestGEP->setIsInBounds(SrcGEP->isInBounds() | DestGEP->isInBounds());
+ DestGEP->setIsInBounds(SrcGEP->isInBounds() || DestGEP->isInBounds());
}
void Instruction::andIRFlags(const Value *V) {
if (auto *OB = dyn_cast<OverflowingBinaryOperator>(V)) {
if (isa<OverflowingBinaryOperator>(this)) {
- setHasNoSignedWrap(hasNoSignedWrap() & OB->hasNoSignedWrap());
- setHasNoUnsignedWrap(hasNoUnsignedWrap() & OB->hasNoUnsignedWrap());
+ setHasNoSignedWrap(hasNoSignedWrap() && OB->hasNoSignedWrap());
+ setHasNoUnsignedWrap(hasNoUnsignedWrap() && OB->hasNoUnsignedWrap());
}
}
if (auto *PE = dyn_cast<PossiblyExactOperator>(V))
if (isa<PossiblyExactOperator>(this))
- setIsExact(isExact() & PE->isExact());
+ setIsExact(isExact() && PE->isExact());
if (auto *FP = dyn_cast<FPMathOperator>(V)) {
if (isa<FPMathOperator>(this)) {
@@ -332,7 +338,7 @@ void Instruction::andIRFlags(const Value *V) {
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(V))
if (auto *DestGEP = dyn_cast<GetElementPtrInst>(this))
- DestGEP->setIsInBounds(SrcGEP->isInBounds() & DestGEP->isInBounds());
+ DestGEP->setIsInBounds(SrcGEP->isInBounds() && DestGEP->isInBounds());
}
const char *Instruction::getOpcodeName(unsigned OpCode) {
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 5b01c70dec8d..c42df49d97ea 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -318,9 +318,8 @@ bool CallBase::isReturnNonNull() const {
if (hasRetAttr(Attribute::NonNull))
return true;
- if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
- !NullPointerIsDefined(getCaller(),
- getType()->getPointerAddressSpace()))
+ if (getRetDereferenceableBytes() > 0 &&
+ !NullPointerIsDefined(getCaller(), getType()->getPointerAddressSpace()))
return true;
return false;
@@ -329,11 +328,10 @@ bool CallBase::isReturnNonNull() const {
Value *CallBase::getReturnedArgOperand() const {
unsigned Index;
- if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+ if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index))
return getArgOperand(Index - AttributeList::FirstArgIndex);
if (const Function *F = getCalledFunction())
- if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
- Index)
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index))
return getArgOperand(Index - AttributeList::FirstArgIndex);
return nullptr;
@@ -341,24 +339,36 @@ Value *CallBase::getReturnedArgOperand() const {
/// Determine whether the argument or parameter has the given attribute.
bool CallBase::paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
+ assert(ArgNo < arg_size() && "Param index out of bounds!");
- if (Attrs.hasParamAttribute(ArgNo, Kind))
+ if (Attrs.hasParamAttr(ArgNo, Kind))
return true;
if (const Function *F = getCalledFunction())
- return F->getAttributes().hasParamAttribute(ArgNo, Kind);
+ return F->getAttributes().hasParamAttr(ArgNo, Kind);
return false;
}
bool CallBase::hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const {
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasFnAttribute(Kind);
+ Value *V = getCalledOperand();
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == BitCast)
+ V = CE->getOperand(0);
+
+ if (auto *F = dyn_cast<Function>(V))
+ return F->getAttributes().hasFnAttr(Kind);
+
return false;
}
bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const {
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasFnAttribute(Kind);
+ Value *V = getCalledOperand();
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == BitCast)
+ V = CE->getOperand(0);
+
+ if (auto *F = dyn_cast<Function>(V))
+ return F->getAttributes().hasFnAttr(Kind);
+
return false;
}
@@ -933,7 +943,7 @@ void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) {
if (BasicBlock *OldBB = getIndirectDest(i)) {
BlockAddress *Old = BlockAddress::get(OldBB);
BlockAddress *New = BlockAddress::get(B);
- for (unsigned ArgNo = 0, e = getNumArgOperands(); ArgNo != e; ++ArgNo)
+ for (unsigned ArgNo = 0, e = arg_size(); ArgNo != e; ++ArgNo)
if (dyn_cast<BlockAddress>(getArgOperand(ArgNo)) == Old)
setArgOperand(ArgNo, New);
}
@@ -1909,6 +1919,32 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
// ShuffleVectorInst Implementation
//===----------------------------------------------------------------------===//
+static Value *createPlaceholderForShuffleVector(Value *V) {
+ assert(V && "Cannot create placeholder of nullptr V");
+ return PoisonValue::get(V->getType());
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *Mask, const Twine &Name,
+ Instruction *InsertBefore)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertBefore) {}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *Mask, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertAtEnd) {}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, ArrayRef<int> Mask,
+ const Twine &Name,
+ Instruction *InsertBefore)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertBefore) {}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, ArrayRef<int> Mask,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertAtEnd) {}
+
ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
const Twine &Name,
Instruction *InsertBefore)
@@ -2259,6 +2295,80 @@ bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
return false;
}
+bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
+ int NumSrcElts, int &NumSubElts,
+ int &Index) {
+ int NumMaskElts = Mask.size();
+
+ // Don't try to match if we're shuffling to a smaller size.
+ if (NumMaskElts < NumSrcElts)
+ return false;
+
+ // TODO: We don't recognize self-insertion/widening.
+ if (isSingleSourceMaskImpl(Mask, NumSrcElts))
+ return false;
+
+ // Determine which mask elements are attributed to which source.
+ APInt UndefElts = APInt::getZero(NumMaskElts);
+ APInt Src0Elts = APInt::getZero(NumMaskElts);
+ APInt Src1Elts = APInt::getZero(NumMaskElts);
+ bool Src0Identity = true;
+ bool Src1Identity = true;
+
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (M < NumSrcElts) {
+ Src0Elts.setBit(i);
+ Src0Identity &= (M == i);
+ continue;
+ }
+ Src1Elts.setBit(i);
+ Src1Identity &= (M == (i + NumSrcElts));
+ continue;
+ }
+ assert((Src0Elts | Src1Elts | UndefElts).isAllOnes() &&
+ "unknown shuffle elements");
+ assert(!Src0Elts.isZero() && !Src1Elts.isZero() &&
+ "2-source shuffle not found");
+
+ // Determine lo/hi span ranges.
+ // TODO: How should we handle undefs at the start of subvector insertions?
+ int Src0Lo = Src0Elts.countTrailingZeros();
+ int Src1Lo = Src1Elts.countTrailingZeros();
+ int Src0Hi = NumMaskElts - Src0Elts.countLeadingZeros();
+ int Src1Hi = NumMaskElts - Src1Elts.countLeadingZeros();
+
+ // If src0 is in place, see if the src1 elements is inplace within its own
+ // span.
+ if (Src0Identity) {
+ int NumSub1Elts = Src1Hi - Src1Lo;
+ ArrayRef<int> Sub1Mask = Mask.slice(Src1Lo, NumSub1Elts);
+ if (isIdentityMaskImpl(Sub1Mask, NumSrcElts)) {
+ NumSubElts = NumSub1Elts;
+ Index = Src1Lo;
+ return true;
+ }
+ }
+
+ // If src1 is in place, see if the src0 elements is inplace within its own
+ // span.
+ if (Src1Identity) {
+ int NumSub0Elts = Src0Hi - Src0Lo;
+ ArrayRef<int> Sub0Mask = Mask.slice(Src0Lo, NumSub0Elts);
+ if (isIdentityMaskImpl(Sub0Mask, NumSrcElts)) {
+ NumSubElts = NumSub0Elts;
+ Index = Src0Lo;
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool ShuffleVectorInst::isIdentityWithPadding() const {
if (isa<UndefValue>(Op<2>()))
return false;
@@ -2326,6 +2436,87 @@ bool ShuffleVectorInst::isConcat() const {
return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
}
+static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
+ int ReplicationFactor, int VF) {
+ assert(Mask.size() == (unsigned)ReplicationFactor * VF &&
+ "Unexpected mask size.");
+
+ for (int CurrElt : seq(0, VF)) {
+ ArrayRef<int> CurrSubMask = Mask.take_front(ReplicationFactor);
+ assert(CurrSubMask.size() == (unsigned)ReplicationFactor &&
+ "Run out of mask?");
+ Mask = Mask.drop_front(ReplicationFactor);
+ if (!all_of(CurrSubMask, [CurrElt](int MaskElt) {
+ return MaskElt == UndefMaskElem || MaskElt == CurrElt;
+ }))
+ return false;
+ }
+ assert(Mask.empty() && "Did not consume the whole mask?");
+
+ return true;
+}
+
+bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask,
+ int &ReplicationFactor, int &VF) {
+ // undef-less case is trivial.
+ if (none_of(Mask, [](int MaskElt) { return MaskElt == UndefMaskElem; })) {
+ ReplicationFactor =
+ Mask.take_while([](int MaskElt) { return MaskElt == 0; }).size();
+ if (ReplicationFactor == 0 || Mask.size() % ReplicationFactor != 0)
+ return false;
+ VF = Mask.size() / ReplicationFactor;
+ return isReplicationMaskWithParams(Mask, ReplicationFactor, VF);
+ }
+
+ // However, if the mask contains undef's, we have to enumerate possible tuples
+ // and pick one. There are bounds on replication factor: [1, mask size]
+ // (where RF=1 is an identity shuffle, RF=mask size is a broadcast shuffle)
+ // Additionally, mask size is a replication factor multiplied by vector size,
+ // which further significantly reduces the search space.
+
+ // Before doing that, let's perform basic sanity check first.
+ int Largest = -1;
+ for (int MaskElt : Mask) {
+ if (MaskElt == UndefMaskElem)
+ continue;
+ // Elements must be in non-decreasing order.
+ if (MaskElt < Largest)
+ return false;
+ Largest = std::max(Largest, MaskElt);
+ }
+
+ // Prefer larger replication factor if all else equal.
+ for (int PossibleReplicationFactor :
+ reverse(seq_inclusive<unsigned>(1, Mask.size()))) {
+ if (Mask.size() % PossibleReplicationFactor != 0)
+ continue;
+ int PossibleVF = Mask.size() / PossibleReplicationFactor;
+ if (!isReplicationMaskWithParams(Mask, PossibleReplicationFactor,
+ PossibleVF))
+ continue;
+ ReplicationFactor = PossibleReplicationFactor;
+ VF = PossibleVF;
+ return true;
+ }
+
+ return false;
+}
+
+bool ShuffleVectorInst::isReplicationMask(int &ReplicationFactor,
+ int &VF) const {
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ VF = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ if (ShuffleMask.size() % VF != 0)
+ return false;
+ ReplicationFactor = ShuffleMask.size() / VF;
+
+ return isReplicationMaskWithParams(ShuffleMask, ReplicationFactor, VF);
+}
+
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
@@ -3945,6 +4136,35 @@ bool CmpInst::isSigned(Predicate predicate) {
}
}
+bool ICmpInst::compare(const APInt &LHS, const APInt &RHS,
+ ICmpInst::Predicate Pred) {
+ assert(ICmpInst::isIntPredicate(Pred) && "Only for integer predicates!");
+ switch (Pred) {
+ case ICmpInst::Predicate::ICMP_EQ:
+ return LHS.eq(RHS);
+ case ICmpInst::Predicate::ICMP_NE:
+ return LHS.ne(RHS);
+ case ICmpInst::Predicate::ICMP_UGT:
+ return LHS.ugt(RHS);
+ case ICmpInst::Predicate::ICMP_UGE:
+ return LHS.uge(RHS);
+ case ICmpInst::Predicate::ICMP_ULT:
+ return LHS.ult(RHS);
+ case ICmpInst::Predicate::ICMP_ULE:
+ return LHS.ule(RHS);
+ case ICmpInst::Predicate::ICMP_SGT:
+ return LHS.sgt(RHS);
+ case ICmpInst::Predicate::ICMP_SGE:
+ return LHS.sge(RHS);
+ case ICmpInst::Predicate::ICMP_SLT:
+ return LHS.slt(RHS);
+ case ICmpInst::Predicate::ICMP_SLE:
+ return LHS.sle(RHS);
+ default:
+ llvm_unreachable("Unexpected non-integer predicate.");
+ };
+}
+
CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) {
assert(CmpInst::isRelational(pred) &&
"Call only with non-equality predicates!");
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 19942fa187fd..7552906fd07a 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -188,26 +188,26 @@ Value *InstrProfIncrementInst::getStep() const {
}
Optional<RoundingMode> ConstrainedFPIntrinsic::getRoundingMode() const {
- unsigned NumOperands = getNumArgOperands();
+ unsigned NumOperands = arg_size();
Metadata *MD = nullptr;
auto *MAV = dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2));
if (MAV)
MD = MAV->getMetadata();
if (!MD || !isa<MDString>(MD))
return None;
- return StrToRoundingMode(cast<MDString>(MD)->getString());
+ return convertStrToRoundingMode(cast<MDString>(MD)->getString());
}
Optional<fp::ExceptionBehavior>
ConstrainedFPIntrinsic::getExceptionBehavior() const {
- unsigned NumOperands = getNumArgOperands();
+ unsigned NumOperands = arg_size();
Metadata *MD = nullptr;
auto *MAV = dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1));
if (MAV)
MD = MAV->getMetadata();
if (!MD || !isa<MDString>(MD))
return None;
- return StrToExceptionBehavior(cast<MDString>(MD)->getString());
+ return convertStrToExceptionBehavior(cast<MDString>(MD)->getString());
}
bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const {
@@ -473,8 +473,17 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
assert(isVPIntrinsic(VPID) && "not a VP intrinsic");
Function *VPFunc;
switch (VPID) {
- default:
- VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType());
+ default: {
+ Type *OverloadTy = Params[0]->getType();
+ if (VPReductionIntrinsic::isVPReduction(VPID))
+ OverloadTy =
+ Params[*VPReductionIntrinsic::getVectorParamPos(VPID)]->getType();
+
+ VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
+ break;
+ }
+ case Intrinsic::vp_select:
+ VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()});
break;
case Intrinsic::vp_load:
VPFunc = Intrinsic::getDeclaration(
@@ -504,6 +513,48 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
return VPFunc;
}
+bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ return false;
+#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
+ case Intrinsic::VPID: \
+ break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return true;
+}
+
+unsigned VPReductionIntrinsic::getVectorParamPos() const {
+ return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID());
+}
+
+unsigned VPReductionIntrinsic::getStartParamPos() const {
+ return *VPReductionIntrinsic::getStartParamPos(getIntrinsicID());
+}
+
+Optional<unsigned> VPReductionIntrinsic::getVectorParamPos(Intrinsic::ID ID) {
+ switch (ID) {
+#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
+ case Intrinsic::VPID: \
+ return VECTORPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ default:
+ return None;
+ }
+}
+
+Optional<unsigned> VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) {
+ switch (ID) {
+#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
+ case Intrinsic::VPID: \
+ return STARTPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ default:
+ return None;
+ }
+}
+
Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
switch (getIntrinsicID()) {
case Intrinsic::uadd_with_overflow:
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index c4a713db455b..90716d9c81a6 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -348,6 +348,12 @@ std::unique_ptr<DiagnosticHandler> LLVMContext::getDiagnosticHandler() {
return std::move(pImpl->DiagHandler);
}
+void LLVMContext::enableOpaquePointers() const {
+ assert(pImpl->PointerTypes.empty() && pImpl->ASPointerTypes.empty() &&
+ "Must be called before creating any pointer types");
+ pImpl->setOpaquePointers(true);
+}
+
bool LLVMContext::supportsTypedPointers() const {
- return !pImpl->ForceOpaquePointers;
+ return !pImpl->getOpaquePointers();
}
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 99819602c545..ebbf382aea38 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -23,9 +23,8 @@
using namespace llvm;
static cl::opt<bool>
- ForceOpaquePointersCL("force-opaque-pointers",
- cl::desc("Force all pointers to be opaque pointers"),
- cl::init(false));
+ OpaquePointersCL("opaque-pointers", cl::desc("Use opaque pointers"),
+ cl::init(false));
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
: DiagHandler(std::make_unique<DiagnosticHandler>()),
@@ -36,8 +35,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID),
X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8),
- Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128),
- ForceOpaquePointers(ForceOpaquePointersCL) {}
+ Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {}
LLVMContextImpl::~LLVMContextImpl() {
// NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -55,8 +53,15 @@ LLVMContextImpl::~LLVMContextImpl() {
// Drop references for MDNodes. Do this before Values get deleted to avoid
// unnecessary RAUW when nodes are still unresolved.
- for (auto *I : DistinctMDNodes)
+ for (auto *I : DistinctMDNodes) {
+ // We may have DIArgList that were uniqued, and as it has a custom
+ // implementation of dropAllReferences, it needs to be explicitly invoked.
+ if (auto *AL = dyn_cast<DIArgList>(I)) {
+ AL->dropAllReferences();
+ continue;
+ }
I->dropAllReferences();
+ }
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
for (auto *I : CLASS##s) \
I->dropAllReferences();
@@ -227,3 +232,11 @@ OptPassGate &LLVMContextImpl::getOptPassGate() const {
void LLVMContextImpl::setOptPassGate(OptPassGate& OPG) {
this->OPG = &OPG;
}
+
+bool LLVMContextImpl::getOpaquePointers() {
+ if (LLVM_UNLIKELY(!(OpaquePointers.hasValue())))
+ OpaquePointers = OpaquePointersCL;
+ return *OpaquePointers;
+}
+
+void LLVMContextImpl::setOpaquePointers(bool OP) { OpaquePointers = OP; }
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 2ae23fdc95a8..b2909c425846 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -61,7 +61,9 @@ using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>;
struct DenseMapAPFloatKeyInfo {
static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); }
- static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); }
+ static inline APFloat getTombstoneKey() {
+ return APFloat(APFloat::Bogus(), 2);
+ }
static unsigned getHashValue(const APFloat &Key) {
return static_cast<unsigned>(hash_value(Key));
@@ -74,46 +76,42 @@ struct DenseMapAPFloatKeyInfo {
struct AnonStructTypeKeyInfo {
struct KeyTy {
- ArrayRef<Type*> ETypes;
+ ArrayRef<Type *> ETypes;
bool isPacked;
- KeyTy(const ArrayRef<Type*>& E, bool P) :
- ETypes(E), isPacked(P) {}
+ KeyTy(const ArrayRef<Type *> &E, bool P) : ETypes(E), isPacked(P) {}
KeyTy(const StructType *ST)
: ETypes(ST->elements()), isPacked(ST->isPacked()) {}
- bool operator==(const KeyTy& that) const {
+ bool operator==(const KeyTy &that) const {
if (isPacked != that.isPacked)
return false;
if (ETypes != that.ETypes)
return false;
return true;
}
- bool operator!=(const KeyTy& that) const {
- return !this->operator==(that);
- }
+ bool operator!=(const KeyTy &that) const { return !this->operator==(that); }
};
- static inline StructType* getEmptyKey() {
- return DenseMapInfo<StructType*>::getEmptyKey();
+ static inline StructType *getEmptyKey() {
+ return DenseMapInfo<StructType *>::getEmptyKey();
}
- static inline StructType* getTombstoneKey() {
- return DenseMapInfo<StructType*>::getTombstoneKey();
+ static inline StructType *getTombstoneKey() {
+ return DenseMapInfo<StructType *>::getTombstoneKey();
}
- static unsigned getHashValue(const KeyTy& Key) {
- return hash_combine(hash_combine_range(Key.ETypes.begin(),
- Key.ETypes.end()),
- Key.isPacked);
+ static unsigned getHashValue(const KeyTy &Key) {
+ return hash_combine(
+ hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), Key.isPacked);
}
static unsigned getHashValue(const StructType *ST) {
return getHashValue(KeyTy(ST));
}
- static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
+ static bool isEqual(const KeyTy &LHS, const StructType *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return LHS == KeyTy(RHS);
@@ -127,16 +125,16 @@ struct AnonStructTypeKeyInfo {
struct FunctionTypeKeyInfo {
struct KeyTy {
const Type *ReturnType;
- ArrayRef<Type*> Params;
+ ArrayRef<Type *> Params;
bool isVarArg;
- KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
- ReturnType(R), Params(P), isVarArg(V) {}
+ KeyTy(const Type *R, const ArrayRef<Type *> &P, bool V)
+ : ReturnType(R), Params(P), isVarArg(V) {}
KeyTy(const FunctionType *FT)
: ReturnType(FT->getReturnType()), Params(FT->params()),
isVarArg(FT->isVarArg()) {}
- bool operator==(const KeyTy& that) const {
+ bool operator==(const KeyTy &that) const {
if (ReturnType != that.ReturnType)
return false;
if (isVarArg != that.isVarArg)
@@ -145,31 +143,28 @@ struct FunctionTypeKeyInfo {
return false;
return true;
}
- bool operator!=(const KeyTy& that) const {
- return !this->operator==(that);
- }
+ bool operator!=(const KeyTy &that) const { return !this->operator==(that); }
};
- static inline FunctionType* getEmptyKey() {
- return DenseMapInfo<FunctionType*>::getEmptyKey();
+ static inline FunctionType *getEmptyKey() {
+ return DenseMapInfo<FunctionType *>::getEmptyKey();
}
- static inline FunctionType* getTombstoneKey() {
- return DenseMapInfo<FunctionType*>::getTombstoneKey();
+ static inline FunctionType *getTombstoneKey() {
+ return DenseMapInfo<FunctionType *>::getTombstoneKey();
}
- static unsigned getHashValue(const KeyTy& Key) {
- return hash_combine(Key.ReturnType,
- hash_combine_range(Key.Params.begin(),
- Key.Params.end()),
- Key.isVarArg);
+ static unsigned getHashValue(const KeyTy &Key) {
+ return hash_combine(
+ Key.ReturnType,
+ hash_combine_range(Key.Params.begin(), Key.Params.end()), Key.isVarArg);
}
static unsigned getHashValue(const FunctionType *FT) {
return getHashValue(KeyTy(FT));
}
- static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
+ static bool isEqual(const KeyTy &LHS, const FunctionType *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return LHS == KeyTy(RHS);
@@ -412,14 +407,14 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
Encoding(Encoding), Flags(Flags) {}
MDNodeKeyImpl(const DIBasicType *N)
: Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()),
- AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), Flags(N->getFlags()) {}
+ AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()),
+ Flags(N->getFlags()) {}
bool isKeyOf(const DIBasicType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
SizeInBits == RHS->getSizeInBits() &&
AlignInBits == RHS->getAlignInBits() &&
- Encoding == RHS->getEncoding() &&
- Flags == RHS->getFlags();
+ Encoding == RHS->getEncoding() && Flags == RHS->getFlags();
}
unsigned getHashValue() const {
@@ -471,23 +466,24 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
Optional<unsigned> DWARFAddressSpace;
unsigned Flags;
Metadata *ExtraData;
+ Metadata *Annotations;
MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, unsigned Flags,
- Metadata *ExtraData)
+ Metadata *ExtraData, Metadata *Annotations)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
AlignInBits(AlignInBits), DWARFAddressSpace(DWARFAddressSpace),
- Flags(Flags), ExtraData(ExtraData) {}
+ Flags(Flags), ExtraData(ExtraData), Annotations(Annotations) {}
MDNodeKeyImpl(const DIDerivedType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()),
DWARFAddressSpace(N->getDWARFAddressSpace()), Flags(N->getFlags()),
- ExtraData(N->getRawExtraData()) {}
+ ExtraData(N->getRawExtraData()), Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DIDerivedType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
@@ -497,8 +493,8 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
AlignInBits == RHS->getAlignInBits() &&
OffsetInBits == RHS->getOffsetInBits() &&
DWARFAddressSpace == RHS->getDWARFAddressSpace() &&
- Flags == RHS->getFlags() &&
- ExtraData == RHS->getRawExtraData();
+ Flags == RHS->getFlags() && ExtraData == RHS->getRawExtraData() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -525,7 +521,8 @@ template <> struct MDNodeSubsetEqualImpl<DIDerivedType> {
return isODRMember(LHS.Tag, LHS.Scope, LHS.Name, RHS);
}
- static bool isSubsetEqual(const DIDerivedType *LHS, const DIDerivedType *RHS) {
+ static bool isSubsetEqual(const DIDerivedType *LHS,
+ const DIDerivedType *RHS) {
return isODRMember(LHS->getTag(), LHS->getRawScope(), LHS->getRawName(),
RHS);
}
@@ -569,6 +566,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Metadata *Associated;
Metadata *Allocated;
Metadata *Rank;
+ Metadata *Annotations;
MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
@@ -577,14 +575,15 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Metadata *VTableHolder, Metadata *TemplateParams,
MDString *Identifier, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated,
- Metadata *Allocated, Metadata *Rank)
+ Metadata *Allocated, Metadata *Rank, Metadata *Annotations)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
AlignInBits(AlignInBits), Flags(Flags), Elements(Elements),
RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
TemplateParams(TemplateParams), Identifier(Identifier),
Discriminator(Discriminator), DataLocation(DataLocation),
- Associated(Associated), Allocated(Allocated), Rank(Rank) {}
+ Associated(Associated), Allocated(Allocated), Rank(Rank),
+ Annotations(Annotations) {}
MDNodeKeyImpl(const DICompositeType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
@@ -597,7 +596,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Discriminator(N->getRawDiscriminator()),
DataLocation(N->getRawDataLocation()),
Associated(N->getRawAssociated()), Allocated(N->getRawAllocated()),
- Rank(N->getRawRank()) {}
+ Rank(N->getRawRank()), Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DICompositeType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
@@ -614,7 +613,8 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Discriminator == RHS->getRawDiscriminator() &&
DataLocation == RHS->getRawDataLocation() &&
Associated == RHS->getRawAssociated() &&
- Allocated == RHS->getRawAllocated() && Rank == RHS->getRawRank();
+ Allocated == RHS->getRawAllocated() && Rank == RHS->getRawRank() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -623,7 +623,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
// collision "most of the time". There is no correctness issue in case of
// collision because of the full check above.
return hash_combine(Name, File, Line, BaseType, Scope, Elements,
- TemplateParams);
+ TemplateParams, Annotations);
}
};
@@ -663,14 +663,13 @@ template <> struct MDNodeKeyImpl<DIFile> {
bool isKeyOf(const DIFile *RHS) const {
return Filename == RHS->getRawFilename() &&
Directory == RHS->getRawDirectory() &&
- Checksum == RHS->getRawChecksum() &&
- Source == RHS->getRawSource();
+ Checksum == RHS->getRawChecksum() && Source == RHS->getRawSource();
}
unsigned getHashValue() const {
- return hash_combine(
- Filename, Directory, Checksum ? Checksum->Kind : 0,
- Checksum ? Checksum->Value : nullptr, Source.getValueOr(nullptr));
+ return hash_combine(Filename, Directory, Checksum ? Checksum->Kind : 0,
+ Checksum ? Checksum->Value : nullptr,
+ Source.getValueOr(nullptr));
}
};
@@ -692,6 +691,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Metadata *Declaration;
Metadata *RetainedNodes;
Metadata *ThrownTypes;
+ Metadata *Annotations;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
@@ -699,13 +699,14 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
unsigned SPFlags, Metadata *Unit, Metadata *TemplateParams,
Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes)
+ Metadata *ThrownTypes, Metadata *Annotations)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), ScopeLine(ScopeLine),
ContainingType(ContainingType), VirtualIndex(VirtualIndex),
ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags),
Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration),
- RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes) {}
+ RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes),
+ Annotations(Annotations) {}
MDNodeKeyImpl(const DISubprogram *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
@@ -717,7 +718,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
TemplateParams(N->getRawTemplateParams()),
Declaration(N->getRawDeclaration()),
RetainedNodes(N->getRawRetainedNodes()),
- ThrownTypes(N->getRawThrownTypes()) {}
+ ThrownTypes(N->getRawThrownTypes()),
+ Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DISubprogram *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -732,7 +734,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
TemplateParams == RHS->getRawTemplateParams() &&
Declaration == RHS->getRawDeclaration() &&
RetainedNodes == RHS->getRawRetainedNodes() &&
- ThrownTypes == RHS->getRawThrownTypes();
+ ThrownTypes == RHS->getRawThrownTypes() &&
+ Annotations == RHS->getRawAnnotations();
}
bool isDefinition() const { return SPFlags & DISubprogram::SPFlagDefinition; }
@@ -853,9 +856,7 @@ template <> struct MDNodeKeyImpl<DINamespace> {
ExportSymbols == RHS->getExportSymbols();
}
- unsigned getHashValue() const {
- return hash_combine(Scope, Name);
- }
+ unsigned getHashValue() const { return hash_combine(Scope, Name); }
};
template <> struct MDNodeKeyImpl<DICommonBlock> {
@@ -865,8 +866,8 @@ template <> struct MDNodeKeyImpl<DICommonBlock> {
Metadata *File;
unsigned LineNo;
- MDNodeKeyImpl(Metadata *Scope, Metadata *Decl, MDString *Name,
- Metadata *File, unsigned LineNo)
+ MDNodeKeyImpl(Metadata *Scope, Metadata *Decl, MDString *Name, Metadata *File,
+ unsigned LineNo)
: Scope(Scope), Decl(Decl), Name(Name), File(File), LineNo(LineNo) {}
MDNodeKeyImpl(const DICommonBlock *N)
: Scope(N->getRawScope()), Decl(N->getRawDecl()), Name(N->getRawName()),
@@ -874,8 +875,8 @@ template <> struct MDNodeKeyImpl<DICommonBlock> {
bool isKeyOf(const DICommonBlock *RHS) const {
return Scope == RHS->getRawScope() && Decl == RHS->getRawDecl() &&
- Name == RHS->getRawName() && File == RHS->getRawFile() &&
- LineNo == RHS->getLineNo();
+ Name == RHS->getRawName() && File == RHS->getRawFile() &&
+ LineNo == RHS->getLineNo();
}
unsigned getHashValue() const {
@@ -976,17 +977,19 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
Metadata *StaticDataMemberDeclaration;
Metadata *TemplateParams;
uint32_t AlignInBits;
+ Metadata *Annotations;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
- uint32_t AlignInBits)
+ uint32_t AlignInBits, Metadata *Annotations)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
IsDefinition(IsDefinition),
StaticDataMemberDeclaration(StaticDataMemberDeclaration),
- TemplateParams(TemplateParams), AlignInBits(AlignInBits) {}
+ TemplateParams(TemplateParams), AlignInBits(AlignInBits),
+ Annotations(Annotations) {}
MDNodeKeyImpl(const DIGlobalVariable *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
@@ -994,7 +997,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()),
TemplateParams(N->getRawTemplateParams()),
- AlignInBits(N->getAlignInBits()) {}
+ AlignInBits(N->getAlignInBits()), Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DIGlobalVariable *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -1005,7 +1008,8 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
StaticDataMemberDeclaration ==
RHS->getRawStaticDataMemberDeclaration() &&
TemplateParams == RHS->getRawTemplateParams() &&
- AlignInBits == RHS->getAlignInBits();
+ AlignInBits == RHS->getAlignInBits() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -1018,7 +1022,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
// TODO: make hashing work fine with such situations
return hash_combine(Scope, Name, LinkageName, File, Line, Type,
IsLocalToUnit, IsDefinition, /* AlignInBits, */
- StaticDataMemberDeclaration);
+ StaticDataMemberDeclaration, Annotations);
}
};
@@ -1031,22 +1035,25 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
unsigned Arg;
unsigned Flags;
uint32_t AlignInBits;
+ Metadata *Annotations;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, Metadata *File, unsigned Line,
Metadata *Type, unsigned Arg, unsigned Flags,
- uint32_t AlignInBits)
+ uint32_t AlignInBits, Metadata *Annotations)
: Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), Arg(Arg),
- Flags(Flags), AlignInBits(AlignInBits) {}
+ Flags(Flags), AlignInBits(AlignInBits), Annotations(Annotations) {}
MDNodeKeyImpl(const DILocalVariable *N)
: Scope(N->getRawScope()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Type(N->getRawType()), Arg(N->getArg()),
- Flags(N->getFlags()), AlignInBits(N->getAlignInBits()) {}
+ Flags(N->getFlags()), AlignInBits(N->getAlignInBits()),
+ Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DILocalVariable *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
File == RHS->getRawFile() && Line == RHS->getLine() &&
Type == RHS->getRawType() && Arg == RHS->getArg() &&
- Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits();
+ Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -1057,7 +1064,7 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
// clang/test/CodeGen/debug-info-257-args.c is an example of this problem,
// generated IR is random for each run and test fails with Align included.
// TODO: make hashing work fine with such situations
- return hash_combine(Scope, Name, File, Line, Type, Arg, Flags);
+ return hash_combine(Scope, Name, File, Line, Type, Arg, Flags, Annotations);
}
};
@@ -1079,9 +1086,7 @@ template <> struct MDNodeKeyImpl<DILabel> {
}
/// Using name and line to get hash value. It should already be mostly unique.
- unsigned getHashValue() const {
- return hash_combine(Scope, Name, Line);
- }
+ unsigned getHashValue() const { return hash_combine(Scope, Name, Line); }
};
template <> struct MDNodeKeyImpl<DIExpression> {
@@ -1155,23 +1160,26 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> {
Metadata *File;
unsigned Line;
MDString *Name;
+ Metadata *Elements;
MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, Metadata *File,
- unsigned Line, MDString *Name)
+ unsigned Line, MDString *Name, Metadata *Elements)
: Tag(Tag), Scope(Scope), Entity(Entity), File(File), Line(Line),
- Name(Name) {}
+ Name(Name), Elements(Elements) {}
MDNodeKeyImpl(const DIImportedEntity *N)
: Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()),
- File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()) {}
+ File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()),
+ Elements(N->getRawElements()) {}
bool isKeyOf(const DIImportedEntity *RHS) const {
return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
Entity == RHS->getRawEntity() && File == RHS->getFile() &&
- Line == RHS->getLine() && Name == RHS->getRawName();
+ Line == RHS->getLine() && Name == RHS->getRawName() &&
+ Elements == RHS->getRawElements();
}
unsigned getHashValue() const {
- return hash_combine(Tag, Scope, Entity, File, Line, Name);
+ return hash_combine(Tag, Scope, Entity, File, Line, Name, Elements);
}
};
@@ -1325,7 +1333,7 @@ class LLVMContextImpl {
public:
/// OwnedModules - The set of modules instantiated in this context, and which
/// will be automatically deleted if this context is deleted.
- SmallPtrSet<Module*, 4> OwnedModules;
+ SmallPtrSet<Module *, 4> OwnedModules;
/// The main remark streamer used by all the other streamers (e.g. IR, MIR,
/// frontends, etc.). This should only be used by the specific streamers, and
@@ -1377,7 +1385,7 @@ public:
DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata;
DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues;
- DenseMap<const Value*, ValueName*> ValueNames;
+ DenseMap<const Value *, ValueName *> ValueNames;
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
DenseSet<CLASS *, CLASS##Info> CLASS##s;
@@ -1412,7 +1420,7 @@ public:
StringMap<std::unique_ptr<ConstantDataSequential>> CDSConstants;
DenseMap<std::pair<const Function *, const BasicBlock *>, BlockAddress *>
- BlockAddresses;
+ BlockAddresses;
DenseMap<const GlobalValue *, DSOLocalEquivalent *> DSOLocalEquivalents;
@@ -1434,22 +1442,19 @@ public:
BumpPtrAllocator Alloc;
UniqueStringSaver Saver{Alloc};
- DenseMap<unsigned, IntegerType*> IntegerTypes;
+ DenseMap<unsigned, IntegerType *> IntegerTypes;
using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>;
FunctionTypeSet FunctionTypes;
using StructTypeSet = DenseSet<StructType *, AnonStructTypeKeyInfo>;
StructTypeSet AnonStructTypes;
- StringMap<StructType*> NamedStructTypes;
+ StringMap<StructType *> NamedStructTypes;
unsigned NamedStructTypesUniqueID = 0;
- DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
- DenseMap<std::pair<Type *, ElementCount>, VectorType*> VectorTypes;
- // TODO: clean up the following after we no longer support non-opaque pointer
- // types.
- bool ForceOpaquePointers;
- DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
- DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
+ DenseMap<std::pair<Type *, uint64_t>, ArrayType *> ArrayTypes;
+ DenseMap<std::pair<Type *, ElementCount>, VectorType *> VectorTypes;
+ DenseMap<Type *, PointerType *> PointerTypes; // Pointers in AddrSpace = 0
+ DenseMap<std::pair<Type *, unsigned>, PointerType *> ASPointerTypes;
/// ValueHandles - This map keeps track of all of the value handles that are
/// watching a Value*. The Value::HasValueHandle bit is used to know
@@ -1503,7 +1508,7 @@ public:
/// This saves allocating an additional word in Function for programs which
/// do not use GC (i.e., most programs) at the cost of increased overhead for
/// clients which do use GC.
- DenseMap<const Function*, std::string> GCNames;
+ DenseMap<const Function *, std::string> GCNames;
/// Flag to indicate if Value (other than GlobalValue) retains their name or
/// not.
@@ -1526,7 +1531,15 @@ public:
///
/// The lifetime of the object must be guaranteed to extend as long as the
/// LLVMContext is used by compilation.
- void setOptPassGate(OptPassGate&);
+ void setOptPassGate(OptPassGate &);
+
+ // TODO: clean up the following after we no longer support non-opaque pointer
+ // types.
+ bool getOpaquePointers();
+ void setOpaquePointers(bool OP);
+
+private:
+ Optional<bool> OpaquePointers;
};
} // end namespace llvm
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 32840fdeddf7..7bccf09012ca 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -1351,7 +1351,7 @@ void FunctionPassManager::add(Pass *P) {
///
bool FunctionPassManager::run(Function &F) {
handleAllErrors(F.materialize(), [&](ErrorInfoBase &EIB) {
- report_fatal_error("Error reading bitcode file: " + EIB.message());
+ report_fatal_error(Twine("Error reading bitcode file: ") + EIB.message());
});
return FPM->run(F);
}
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index bbdde586e6e0..2399ea27ee9d 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -99,6 +99,11 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
const unsigned PtrSize = DL.getPointerSize();
for (const Argument &A : F->args()) {
+ // For the purposes of the byte count suffix, structs returned by pointer
+ // do not count as function arguments.
+ if (A.hasStructRetAttr())
+ continue;
+
// 'Dereference' type in case of byval or inalloca parameter attribute.
uint64_t AllocSize = A.hasPassPointeeByValueCopyAttr() ?
A.getPassPointeeByValueCopySize(DL) :
@@ -186,7 +191,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// Check if the name needs quotes to be safe for the linker to interpret.
static bool canBeUnquotedInDirective(char C) {
- return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '@';
+ return isAlnum(C) || C == '_' || C == '@';
}
static bool canBeUnquotedInDirective(StringRef Name) {
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 4f87ef537765..ebcc493407cc 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -345,7 +345,7 @@ ReplaceableMetadataImpl *ReplaceableMetadataImpl::getIfExists(Metadata &MD) {
bool ReplaceableMetadataImpl::isReplaceable(const Metadata &MD) {
if (auto *N = dyn_cast<MDNode>(&MD))
return !N->isResolved();
- return dyn_cast<ValueAsMetadata>(&MD);
+ return isa<ValueAsMetadata>(&MD);
}
static DISubprogram *getLocalFunctionMetadata(Value *V) {
@@ -1367,6 +1367,15 @@ void Instruction::addAnnotationMetadata(StringRef Name) {
setMetadata(LLVMContext::MD_annotation, MD);
}
+AAMDNodes Instruction::getAAMetadata() const {
+ AAMDNodes Result;
+ Result.TBAA = getMetadata(LLVMContext::MD_tbaa);
+ Result.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct);
+ Result.Scope = getMetadata(LLVMContext::MD_alias_scope);
+ Result.NoAlias = getMetadata(LLVMContext::MD_noalias);
+ return Result;
+}
+
void Instruction::setAAMetadata(const AAMDNodes &N) {
setMetadata(LLVMContext::MD_tbaa, N.TBAA);
setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct);
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 7c18dc0ed299..63ea41fba89a 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const {
return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
}
+unsigned Module::getNumNamedValues() const {
+ return getValueSymbolTable().size();
+}
+
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
/// This ID is uniqued across modules in the current LLVMContext.
unsigned Module::getMDKindID(StringRef Name) const {
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index f4ac6caf4f93..31c5cd938d03 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -251,12 +251,13 @@ void ModuleSummaryIndex::propagateAttributes(
bool IsDSOLocal = true;
for (auto &S : P.second.SummaryList) {
if (!isGlobalValueLive(S.get())) {
- // computeDeadSymbols should have marked all copies live. Note that
- // it is possible that there is a GUID collision between internal
- // symbols with the same name in different files of the same name but
- // not enough distinguishing path. Because computeDeadSymbols should
- // conservatively mark all copies live we can assert here that all are
- // dead if any copy is dead.
+ // computeDeadSymbolsAndUpdateIndirectCalls should have marked all
+ // copies live. Note that it is possible that there is a GUID collision
+ // between internal symbols with the same name in different files of the
+ // same name but not enough distinguishing path. Because
+ // computeDeadSymbolsAndUpdateIndirectCalls should conservatively mark
+ // all copies live we can assert here that all are dead if any copy is
+ // dead.
assert(llvm::none_of(
P.second.SummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &Summary) {
@@ -446,9 +447,11 @@ static std::string linkageToString(GlobalValue::LinkageTypes LT) {
static std::string fflagsToString(FunctionSummary::FFlags F) {
auto FlagValue = [](unsigned V) { return V ? '1' : '0'; };
- char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
- FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
- FlagValue(F.NoInline), FlagValue(F.AlwaysInline), 0};
+ char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
+ FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
+ FlagValue(F.NoInline), FlagValue(F.AlwaysInline),
+ FlagValue(F.NoUnwind), FlagValue(F.MayThrow),
+ FlagValue(F.HasUnknownCall), 0};
return FlagRep;
}
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index 18a1c84933e0..cf309ffd6212 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -19,6 +19,31 @@
#include "ConstantsContext.h"
namespace llvm {
+bool Operator::hasPoisonGeneratingFlags() const {
+ switch (getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ auto *OBO = cast<OverflowingBinaryOperator>(this);
+ return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ return cast<PossiblyExactOperator>(this)->isExact();
+ case Instruction::GetElementPtr: {
+ auto *GEP = cast<GEPOperator>(this);
+ // Note: inrange exists on constexpr only
+ return GEP->isInBounds() || GEP->getInRangeIndex() != None;
+ }
+ default:
+ return false;
+ }
+ // TODO: FastMathFlags! (On instructions, but not constexpr)
+}
+
Type *GEPOperator::getSourceElementType() const {
if (auto *I = dyn_cast<GetElementPtrInst>(this))
return I->getSourceElementType();
@@ -190,12 +215,14 @@ bool GEPOperator::collectOffset(
if (STy || ScalableType)
return false;
- // Insert an initial offset of 0 for V iff none exists already, then
- // increment the offset by IndexedSize.
- VariableOffsets.insert({V, APInt(BitWidth, 0)});
APInt IndexedSize =
APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
- VariableOffsets[V] += IndexedSize;
+ // Insert an initial offset of 0 for V iff none exists already, then
+ // increment the offset by IndexedSize.
+ if (!IndexedSize.isZero()) {
+ VariableOffsets.insert({V, APInt(BitWidth, 0)});
+ VariableOffsets[V] += IndexedSize;
+ }
}
return true;
}
diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp
index 2cf2298e0005..55c0dbad5aab 100644
--- a/llvm/lib/IR/OptBisect.cpp
+++ b/llvm/lib/IR/OptBisect.cpp
@@ -22,14 +22,12 @@
using namespace llvm;
static cl::opt<int> OptBisectLimit("opt-bisect-limit", cl::Hidden,
- cl::init(std::numeric_limits<int>::max()),
- cl::Optional,
+ cl::init(OptBisect::Disabled), cl::Optional,
+ cl::cb<void, int>([](int Limit) {
+ llvm::OptBisector->setLimit(Limit);
+ }),
cl::desc("Maximum optimization to perform"));
-OptBisect::OptBisect() : OptPassGate() {
- BisectEnabled = OptBisectLimit != std::numeric_limits<int>::max();
-}
-
static void printPassMessage(const StringRef &Name, int PassNum,
StringRef TargetDesc, bool Running) {
StringRef Status = Running ? "" : "NOT ";
@@ -38,19 +36,21 @@ static void printPassMessage(const StringRef &Name, int PassNum,
}
bool OptBisect::shouldRunPass(const Pass *P, StringRef IRDescription) {
- assert(BisectEnabled);
+ assert(isEnabled());
return checkPass(P->getPassName(), IRDescription);
}
bool OptBisect::checkPass(const StringRef PassName,
const StringRef TargetDesc) {
- assert(BisectEnabled);
+ assert(isEnabled());
int CurBisectNum = ++LastBisectNum;
- bool ShouldRun = (OptBisectLimit == -1 || CurBisectNum <= OptBisectLimit);
+ bool ShouldRun = (BisectLimit == -1 || CurBisectNum <= BisectLimit);
printPassMessage(PassName, CurBisectNum, TargetDesc, ShouldRun);
return ShouldRun;
}
+const int OptBisect::Disabled;
+
ManagedStatic<OptBisect> llvm::OptBisector;
diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp
index 4cf7ab2a602b..d933003ccdf7 100644
--- a/llvm/lib/IR/PassManager.cpp
+++ b/llvm/lib/IR/PassManager.cpp
@@ -10,12 +10,13 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManagerImpl.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+namespace llvm {
// Explicit template instantiations and specialization defininitions for core
// template typedefs.
-namespace llvm {
template class AllAnalysesOn<Module>;
template class AllAnalysesOn<Function>;
template class PassManager<Module>;
@@ -91,6 +92,16 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
}
} // namespace llvm
+void ModuleToFunctionPassAdaptor::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "function";
+ if (EagerlyInvalidate)
+ OS << "<eager-inv>";
+ OS << "(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+}
+
PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
@@ -122,7 +133,7 @@ PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
- FAM.invalidate(F, PassPA);
+ FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA);
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp
index 453a278a7f3f..05d5ac2c5ddf 100644
--- a/llvm/lib/IR/ProfileSummary.cpp
+++ b/llvm/lib/IR/ProfileSummary.cpp
@@ -249,7 +249,7 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) {
PartialProfileRatio);
}
-void ProfileSummary::printSummary(raw_ostream &OS) {
+void ProfileSummary::printSummary(raw_ostream &OS) const {
OS << "Total functions: " << NumFunctions << "\n";
OS << "Maximum function count: " << MaxFunctionCount << "\n";
OS << "Maximum block count: " << MaxCount << "\n";
@@ -257,7 +257,7 @@ void ProfileSummary::printSummary(raw_ostream &OS) {
OS << "Total count: " << TotalCount << "\n";
}
-void ProfileSummary::printDetailedSummary(raw_ostream &OS) {
+void ProfileSummary::printDetailedSummary(raw_ostream &OS) const {
OS << "Detailed summary:\n";
for (const auto &Entry : DetailedSummary) {
OS << Entry.NumCounts << " blocks with count >= " << Entry.MinCount
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index bd92c604da2c..101cada77ff9 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -98,12 +98,4 @@ void setProbeDistributionFactor(Instruction &Inst, float Factor) {
}
}
-void addPseudoProbeAttribute(PseudoProbeInst &Inst,
- PseudoProbeAttributes Attr) {
- IRBuilder<> Builder(&Inst);
- uint32_t OldAttr = Inst.getAttributes()->getZExtValue();
- uint32_t NewAttr = OldAttr | (uint32_t)Attr;
- if (OldAttr != NewAttr)
- Inst.replaceUsesOfWith(Inst.getAttributes(), Builder.getInt32(NewAttr));
-}
} // namespace llvm
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index fd73a1a8e5af..cfd8deba5a53 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -15,15 +15,9 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/ValueMap.h"
namespace llvm {
-// Replace a constant expression by instructions with equivalent operations at
-// a specified location.
-Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
- auto *CEInstr = CE->getAsInstruction();
- CEInstr->insertBefore(Instr);
- return CEInstr;
-}
void convertConstantExprsToInstructions(Instruction *I, ConstantExpr *CE,
SmallPtrSetImpl<Instruction *> *Insts) {
@@ -40,7 +34,8 @@ void convertConstantExprsToInstructions(
Instruction *I,
std::map<Use *, std::vector<std::vector<ConstantExpr *>>> &CEPaths,
SmallPtrSetImpl<Instruction *> *Insts) {
- SmallPtrSet<ConstantExpr *, 8> Visited;
+ ValueMap<ConstantExpr *, Instruction *> Visited;
+
for (Use &U : I->operands()) {
// The operand U is either not a constant expression operand or the
// constant expression paths do not belong to U, ignore U.
@@ -55,24 +50,47 @@ void convertConstantExprsToInstructions(
BI = &(*(BB->getFirstInsertionPt()));
}
- // Go through the paths associated with operand U, and convert all the
- // constant expressions along all paths to corresponding instructions.
+ // Go through all the paths associated with operand U, and convert all the
+ // constant expressions along all the paths to corresponding instructions.
auto *II = I;
auto &Paths = CEPaths[&U];
for (auto &Path : Paths) {
for (auto *CE : Path) {
- if (!Visited.insert(CE).second)
- continue;
- auto *NI = CE->getAsInstruction();
- NI->insertBefore(BI);
+ // Instruction which is equivalent to CE.
+ Instruction *NI = nullptr;
+
+ if (!Visited.count(CE)) {
+ // CE is encountered first time, convert it into a corresponding
+ // instruction NI, and appropriately insert NI before the parent
+ // instruction.
+ NI = CE->getAsInstruction(BI);
+
+ // Mark CE as visited by mapping CE to NI.
+ Visited[CE] = NI;
+
+ // If required collect NI.
+ if (Insts)
+ Insts->insert(NI);
+ } else {
+ // We had already encountered CE, the correponding instruction already
+ // exist, use it to replace CE.
+ NI = Visited[CE];
+ }
+
+ assert(NI && "Expected an instruction corresponding to constant "
+ "expression.");
+
+ // Replace all uses of constant expression CE by the corresponding
+ // instruction NI within the current parent instruction.
II->replaceUsesOfWith(CE, NI);
- CE->removeDeadConstantUsers();
BI = II = NI;
- if (Insts)
- Insts->insert(NI);
}
}
}
+
+ // Remove all converted constant expressions which are dead by now.
+ for (auto Item : Visited)
+ Item.first->removeDeadConstantUsers();
}
void collectConstantExprPaths(
diff --git a/llvm/lib/IR/Statepoint.cpp b/llvm/lib/IR/Statepoint.cpp
index bbfbbe489bae..b5916e4937c6 100644
--- a/llvm/lib/IR/Statepoint.cpp
+++ b/llvm/lib/IR/Statepoint.cpp
@@ -26,16 +26,14 @@ StatepointDirectives
llvm::parseStatepointDirectivesFromAttrs(AttributeList AS) {
StatepointDirectives Result;
- Attribute AttrID =
- AS.getAttribute(AttributeList::FunctionIndex, "statepoint-id");
+ Attribute AttrID = AS.getFnAttr("statepoint-id");
uint64_t StatepointID;
if (AttrID.isStringAttribute())
if (!AttrID.getValueAsString().getAsInteger(10, StatepointID))
Result.StatepointID = StatepointID;
uint32_t NumPatchBytes;
- Attribute AttrNumPatchBytes = AS.getAttribute(AttributeList::FunctionIndex,
- "statepoint-num-patch-bytes");
+ Attribute AttrNumPatchBytes = AS.getFnAttr("statepoint-num-patch-bytes");
if (AttrNumPatchBytes.isStringAttribute())
if (!AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes))
Result.NumPatchBytes = NumPatchBytes;
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index a21998976066..d59d87ad631b 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -66,6 +66,44 @@ bool Type::isOpaquePointerTy() const {
return false;
}
+const fltSemantics &Type::getFltSemantics() const {
+ switch (getTypeID()) {
+ case HalfTyID: return APFloat::IEEEhalf();
+ case BFloatTyID: return APFloat::BFloat();
+ case FloatTyID: return APFloat::IEEEsingle();
+ case DoubleTyID: return APFloat::IEEEdouble();
+ case X86_FP80TyID: return APFloat::x87DoubleExtended();
+ case FP128TyID: return APFloat::IEEEquad();
+ case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
+ default: llvm_unreachable("Invalid floating type");
+ }
+}
+
+bool Type::isIEEE() const {
+ return APFloat::getZero(getFltSemantics()).isIEEE();
+}
+
+Type *Type::getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
+ Type *Ty;
+ if (&S == &APFloat::IEEEhalf())
+ Ty = Type::getHalfTy(C);
+ else if (&S == &APFloat::BFloat())
+ Ty = Type::getBFloatTy(C);
+ else if (&S == &APFloat::IEEEsingle())
+ Ty = Type::getFloatTy(C);
+ else if (&S == &APFloat::IEEEdouble())
+ Ty = Type::getDoubleTy(C);
+ else if (&S == &APFloat::x87DoubleExtended())
+ Ty = Type::getX86_FP80Ty(C);
+ else if (&S == &APFloat::IEEEquad())
+ Ty = Type::getFP128Ty(C);
+ else {
+ assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format");
+ Ty = Type::getPPC_FP128Ty(C);
+ }
+ return Ty;
+}
+
bool Type::canLosslesslyBitCastTo(Type *Ty) const {
// Identity cast means no change so return true
if (this == Ty)
@@ -296,9 +334,7 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
return Entry;
}
-APInt IntegerType::getMask() const {
- return APInt::getAllOnesValue(getBitWidth());
-}
+APInt IntegerType::getMask() const { return APInt::getAllOnes(getBitWidth()); }
//===----------------------------------------------------------------------===//
// FunctionType Implementation
@@ -696,8 +732,8 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
- // Create opaque pointer for pointer to opaque pointer.
- if (CImpl->ForceOpaquePointers || EltTy->isOpaquePointerTy())
+ // Automatically convert typed pointers to opaque pointers.
+ if (CImpl->getOpaquePointers())
return get(EltTy->getContext(), AddressSpace);
// Since AddressSpace #0 is the common case, we special case it.
@@ -711,6 +747,8 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
PointerType *PointerType::get(LLVMContext &C, unsigned AddressSpace) {
LLVMContextImpl *CImpl = C.pImpl;
+ assert(CImpl->getOpaquePointers() &&
+ "Can only create opaque pointers in opaque pointer mode");
// Since AddressSpace #0 is the common case, we special case it.
PointerType *&Entry =
diff --git a/llvm/lib/IR/TypeFinder.cpp b/llvm/lib/IR/TypeFinder.cpp
index 724b8f6b6ad2..1f757d7dbf4e 100644
--- a/llvm/lib/IR/TypeFinder.cpp
+++ b/llvm/lib/IR/TypeFinder.cpp
@@ -106,11 +106,9 @@ void TypeFinder::incorporateType(Type *Ty) {
StructTypes.push_back(STy);
// Add all unvisited subtypes to worklist for processing
- for (Type::subtype_reverse_iterator I = Ty->subtype_rbegin(),
- E = Ty->subtype_rend();
- I != E; ++I)
- if (VisitedTypes.insert(*I).second)
- TypeWorklist.push_back(*I);
+ for (Type *SubTy : llvm::reverse(Ty->subtypes()))
+ if (VisitedTypes.insert(SubTy).second)
+ TypeWorklist.push_back(SubTy);
} while (!TypeWorklist.empty());
}
diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index 8837151f2e18..68489075cd88 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -107,7 +107,7 @@ MutableArrayRef<uint8_t> User::getDescriptor() {
}
bool User::isDroppable() const {
- return isa<AssumeInst>(this);
+ return isa<AssumeInst>(this) || isa<PseudoProbeInst>(this);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 1c595651b3d7..b475c8327874 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -176,6 +176,18 @@ Use *Value::getSingleUndroppableUse() {
return Result;
}
+User *Value::getUniqueUndroppableUser() {
+ User *Result = nullptr;
+ for (auto *U : users()) {
+ if (!U->isDroppable()) {
+ if (Result && Result != U)
+ return nullptr;
+ Result = U;
+ }
+ }
+ return Result;
+}
+
bool Value::hasNUndroppableUses(unsigned int N) const {
return hasNItems(user_begin(), user_end(), N, isUnDroppableUser);
}
@@ -534,9 +546,7 @@ void Value::replaceUsesWithIf(Value *New,
SmallVector<TrackingVH<Constant>, 8> Consts;
SmallPtrSet<Constant *, 8> Visited;
- for (use_iterator UI = use_begin(), E = use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(uses())) {
if (!ShouldReplace(U))
continue;
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -694,6 +704,7 @@ const Value *Value::stripPointerCastsForAliasAnalysis() const {
const Value *Value::stripAndAccumulateConstantOffsets(
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
+ bool AllowInvariantGroup,
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
if (!getType()->isPtrOrPtrVectorTy())
return this;
@@ -753,6 +764,8 @@ const Value *Value::stripAndAccumulateConstantOffsets(
} else if (const auto *Call = dyn_cast<CallBase>(V)) {
if (const Value *RV = Call->getReturnedArgOperand())
V = RV;
+ if (AllowInvariantGroup && Call->isLaunderOrStripInvariantGroup())
+ V = Call->getArgOperand(0);
}
assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
@@ -852,10 +865,9 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
CanBeNull = true;
}
} else if (const auto *Call = dyn_cast<CallBase>(this)) {
- DerefBytes = Call->getDereferenceableBytes(AttributeList::ReturnIndex);
+ DerefBytes = Call->getRetDereferenceableBytes();
if (DerefBytes == 0) {
- DerefBytes =
- Call->getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
+ DerefBytes = Call->getRetDereferenceableOrNullBytes();
CanBeNull = true;
}
} else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) {
@@ -1014,8 +1026,7 @@ bool Value::isTransitiveUsedByMetadataOnly() const {
llvm::SmallPtrSet<const User *, 32> Visited;
WorkList.insert(WorkList.begin(), user_begin(), user_end());
while (!WorkList.empty()) {
- const User *U = WorkList.back();
- WorkList.pop_back();
+ const User *U = WorkList.pop_back_val();
Visited.insert(U);
// If it is transitively used by a global value or a non-constant value,
// it's obviously not only used by metadata.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 758205a39eb3..dc4370d4b6ed 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -415,15 +415,18 @@ public:
for (const GlobalAlias &GA : M.aliases())
visitGlobalAlias(GA);
+ for (const GlobalIFunc &GI : M.ifuncs())
+ visitGlobalIFunc(GI);
+
for (const NamedMDNode &NMD : M.named_metadata())
visitNamedMDNode(NMD);
for (const StringMapEntry<Comdat> &SMEC : M.getComdatSymbolTable())
visitComdat(SMEC.getValue());
- visitModuleFlags(M);
- visitModuleIdents(M);
- visitModuleCommandLines(M);
+ visitModuleFlags();
+ visitModuleIdents();
+ visitModuleCommandLines();
verifyCompileUnits();
@@ -440,6 +443,7 @@ private:
void visitGlobalValue(const GlobalValue &GV);
void visitGlobalVariable(const GlobalVariable &GV);
void visitGlobalAlias(const GlobalAlias &GA);
+ void visitGlobalIFunc(const GlobalIFunc &GI);
void visitAliaseeSubExpr(const GlobalAlias &A, const Constant &C);
void visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias *> &Visited,
const GlobalAlias &A, const Constant &C);
@@ -448,9 +452,9 @@ private:
void visitMetadataAsValue(const MetadataAsValue &MD, Function *F);
void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F);
void visitComdat(const Comdat &C);
- void visitModuleIdents(const Module &M);
- void visitModuleCommandLines(const Module &M);
- void visitModuleFlags(const Module &M);
+ void visitModuleIdents();
+ void visitModuleCommandLines();
+ void visitModuleFlags();
void visitModuleFlag(const MDNode *Op,
DenseMap<const MDString *, const MDNode *> &SeenIDs,
SmallVectorImpl<const MDNode *> &Requirements);
@@ -461,6 +465,8 @@ private:
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitProfMetadata(Instruction &I, MDNode *MD);
void visitAnnotationMetadata(MDNode *Annotation);
+ void visitAliasScopeMetadata(const MDNode *MD);
+ void visitAliasScopeListMetadata(const MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -547,6 +553,8 @@ private:
void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
const Value *V, bool IsIntrinsic);
void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
+ template <typename T>
+ void verifyODRTypeAsScopeOperand(const MDNode &MD, T * = nullptr);
void visitConstantExprsRecursively(const Constant *EntryC);
void visitConstantExpr(const ConstantExpr *CE);
@@ -569,6 +577,9 @@ private:
/// declarations share the same calling convention.
void verifyDeoptimizeCallingConvs();
+ void verifyAttachedCallBundle(const CallBase &Call,
+ const OperandBundleUse &BU);
+
/// Verify all-or-nothing property of DIFile source attribute within a CU.
void verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F);
@@ -816,6 +827,21 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
visitGlobalValue(GA);
}
+void Verifier::visitGlobalIFunc(const GlobalIFunc &GI) {
+ // Pierce through ConstantExprs and GlobalAliases and check that the resolver
+ // has a Function
+ const Function *Resolver = GI.getResolverFunction();
+ Assert(Resolver, "IFunc must have a Function resolver", &GI);
+
+ // Check that the immediate resolver operand (prior to any bitcasts) has the
+ // correct type
+ const Type *ResolverTy = GI.getResolver()->getType();
+ const Type *ResolverFuncTy =
+ GlobalIFunc::getResolverFunctionType(GI.getValueType());
+ Assert(ResolverTy == ResolverFuncTy->getPointerTo(),
+ "IFunc resolver has incorrect type", &GI);
+}
+
void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
// There used to be various other llvm.dbg.* nodes, but we don't support
// upgrading them and we want to reserve the namespace for future uses.
@@ -834,6 +860,19 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
}
}
+template <typename T>
+void Verifier::verifyODRTypeAsScopeOperand(const MDNode &MD, T *) {
+ if (isa<T>(MD)) {
+ if (auto *N = dyn_cast_or_null<DICompositeType>(cast<T>(MD).getScope()))
+ // Of all the supported tags for DICompositeType(see visitDICompositeType)
+ // we know that enum type cannot be a scope.
+ AssertDI(N->getTag() != dwarf::DW_TAG_enumeration_type,
+ "enum type is not a scope; check enum type ODR "
+ "violation",
+ N, &MD);
+ }
+}
+
void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
// Only visit each node once. Metadata can be mutually recursive, so this
// avoids infinite recursion here, as well as being an optimization.
@@ -843,6 +882,12 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
Assert(&MD.getContext() == &Context,
"MDNode context does not match Module context!", &MD);
+ // Makes sure when a scope operand is a ODR type, the ODR type uniquing does
+ // not create invalid debug metadata.
+ // TODO: check that the non-ODR-type scope operand is valid.
+ verifyODRTypeAsScopeOperand<DIType>(MD);
+ verifyODRTypeAsScopeOperand<DILocalScope>(MD);
+
switch (MD.getMetadataID()) {
default:
llvm_unreachable("Invalid MDNode subclass");
@@ -1091,7 +1136,8 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
N.getTag() == dwarf::DW_TAG_union_type ||
N.getTag() == dwarf::DW_TAG_enumeration_type ||
N.getTag() == dwarf::DW_TAG_class_type ||
- N.getTag() == dwarf::DW_TAG_variant_part,
+ N.getTag() == dwarf::DW_TAG_variant_part ||
+ N.getTag() == dwarf::DW_TAG_namelist,
"invalid tag", &N);
AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
@@ -1470,7 +1516,7 @@ void Verifier::visitComdat(const Comdat &C) {
"comdat global value has private linkage", GV);
}
-void Verifier::visitModuleIdents(const Module &M) {
+void Verifier::visitModuleIdents() {
const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident");
if (!Idents)
return;
@@ -1487,7 +1533,7 @@ void Verifier::visitModuleIdents(const Module &M) {
}
}
-void Verifier::visitModuleCommandLines(const Module &M) {
+void Verifier::visitModuleCommandLines() {
const NamedMDNode *CommandLines = M.getNamedMetadata("llvm.commandline");
if (!CommandLines)
return;
@@ -1505,7 +1551,7 @@ void Verifier::visitModuleCommandLines(const Module &M) {
}
}
-void Verifier::visitModuleFlags(const Module &M) {
+void Verifier::visitModuleFlags() {
const NamedMDNode *Flags = M.getModuleFlagsMetadata();
if (!Flags) return;
@@ -1824,9 +1870,8 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
void Verifier::checkUnsignedBaseTenFuncAttr(AttributeList Attrs, StringRef Attr,
const Value *V) {
- if (Attrs.hasFnAttribute(Attr)) {
- StringRef S = Attrs.getAttribute(AttributeList::FunctionIndex, Attr)
- .getValueAsString();
+ if (Attrs.hasFnAttr(Attr)) {
+ StringRef S = Attrs.getFnAttr(Attr).getValueAsString();
unsigned N;
if (S.getAsInteger(10, N))
CheckFailed("\"" + Attr + "\" takes an unsigned integer: " + S, V);
@@ -1861,7 +1906,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
bool SawSwiftError = false;
// Verify return value attributes.
- AttributeSet RetAttrs = Attrs.getRetAttributes();
+ AttributeSet RetAttrs = Attrs.getRetAttrs();
for (Attribute RetAttr : RetAttrs)
Assert(RetAttr.isStringAttribute() ||
Attribute::canUseAsRetAttr(RetAttr.getKindAsEnum()),
@@ -1874,7 +1919,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
// Verify parameter attributes.
for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
Type *Ty = FT->getParamType(i);
- AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(i);
if (!IsIntrinsic) {
Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
@@ -1928,63 +1973,63 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
}
}
- if (!Attrs.hasAttributes(AttributeList::FunctionIndex))
+ if (!Attrs.hasFnAttrs())
return;
- verifyAttributeTypes(Attrs.getFnAttributes(), V);
- for (Attribute FnAttr : Attrs.getFnAttributes())
+ verifyAttributeTypes(Attrs.getFnAttrs(), V);
+ for (Attribute FnAttr : Attrs.getFnAttrs())
Assert(FnAttr.isStringAttribute() ||
Attribute::canUseAsFnAttr(FnAttr.getKindAsEnum()),
"Attribute '" + FnAttr.getAsString() +
"' does not apply to functions!",
V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::ReadOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::ReadOnly)),
"Attributes 'readnone and readonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::WriteOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::WriteOnly)),
"Attributes 'readnone and writeonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadOnly) &&
- Attrs.hasFnAttribute(Attribute::WriteOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadOnly) &&
+ Attrs.hasFnAttr(Attribute::WriteOnly)),
"Attributes 'readonly and writeonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)),
"Attributes 'readnone and inaccessiblemem_or_argmemonly' are "
"incompatible!",
V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::InaccessibleMemOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)),
"Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::NoInline) &&
- Attrs.hasFnAttribute(Attribute::AlwaysInline)),
+ Assert(!(Attrs.hasFnAttr(Attribute::NoInline) &&
+ Attrs.hasFnAttr(Attribute::AlwaysInline)),
"Attributes 'noinline and alwaysinline' are incompatible!", V);
- if (Attrs.hasFnAttribute(Attribute::OptimizeNone)) {
- Assert(Attrs.hasFnAttribute(Attribute::NoInline),
+ if (Attrs.hasFnAttr(Attribute::OptimizeNone)) {
+ Assert(Attrs.hasFnAttr(Attribute::NoInline),
"Attribute 'optnone' requires 'noinline'!", V);
- Assert(!Attrs.hasFnAttribute(Attribute::OptimizeForSize),
+ Assert(!Attrs.hasFnAttr(Attribute::OptimizeForSize),
"Attributes 'optsize and optnone' are incompatible!", V);
- Assert(!Attrs.hasFnAttribute(Attribute::MinSize),
+ Assert(!Attrs.hasFnAttr(Attribute::MinSize),
"Attributes 'minsize and optnone' are incompatible!", V);
}
- if (Attrs.hasFnAttribute(Attribute::JumpTable)) {
+ if (Attrs.hasFnAttr(Attribute::JumpTable)) {
const GlobalValue *GV = cast<GlobalValue>(V);
Assert(GV->hasGlobalUnnamedAddr(),
"Attribute 'jumptable' requires 'unnamed_addr'", V);
}
- if (Attrs.hasFnAttribute(Attribute::AllocSize)) {
+ if (Attrs.hasFnAttr(Attribute::AllocSize)) {
std::pair<unsigned, Optional<unsigned>> Args =
- Attrs.getAllocSizeArgs(AttributeList::FunctionIndex);
+ Attrs.getFnAttrs().getAllocSizeArgs();
auto CheckParam = [&](StringRef Name, unsigned ParamNo) {
if (ParamNo >= FT->getNumParams()) {
@@ -2009,17 +2054,16 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
return;
}
- if (Attrs.hasFnAttribute(Attribute::VScaleRange)) {
+ if (Attrs.hasFnAttr(Attribute::VScaleRange)) {
std::pair<unsigned, unsigned> Args =
- Attrs.getVScaleRangeArgs(AttributeList::FunctionIndex);
+ Attrs.getFnAttrs().getVScaleRangeArgs();
if (Args.first > Args.second && Args.second != 0)
CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
}
- if (Attrs.hasFnAttribute("frame-pointer")) {
- StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
- "frame-pointer").getValueAsString();
+ if (Attrs.hasFnAttr("frame-pointer")) {
+ StringRef FP = Attrs.getFnAttr("frame-pointer").getValueAsString();
if (FP != "all" && FP != "non-leaf" && FP != "none")
CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V);
}
@@ -2168,7 +2212,7 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
Call);
if (TargetFuncType->isVarArg()) {
- AttributeSet ArgAttrs = Attrs.getParamAttributes(5 + i);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(5 + i);
Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
"Attribute 'sret' cannot be used for vararg call arguments!",
Call);
@@ -2334,7 +2378,7 @@ void Verifier::visitFunction(const Function &F) {
// On function declarations/definitions, we do not support the builtin
// attribute. We do not check this in VerifyFunctionAttrs since that is
// checking for Attributes that can/can not ever be on functions.
- Assert(!Attrs.hasFnAttribute(Attribute::Builtin),
+ Assert(!Attrs.hasFnAttr(Attribute::Builtin),
"Attribute 'builtin' can only be applied to a callsite.", &F);
Assert(!Attrs.hasAttrSomewhere(Attribute::ElementType),
@@ -2348,7 +2392,7 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::C:
break;
case CallingConv::X86_INTR: {
- Assert(F.arg_empty() || Attrs.hasParamAttribute(0, Attribute::ByVal),
+ Assert(F.arg_empty() || Attrs.hasParamAttr(0, Attribute::ByVal),
"Calling convention parameter requires byval", &F);
break;
}
@@ -2368,14 +2412,14 @@ void Verifier::visitFunction(const Function &F) {
const unsigned StackAS = DL.getAllocaAddrSpace();
unsigned i = 0;
for (const Argument &Arg : F.args()) {
- Assert(!Attrs.hasParamAttribute(i, Attribute::ByVal),
+ Assert(!Attrs.hasParamAttr(i, Attribute::ByVal),
"Calling convention disallows byval", &F);
- Assert(!Attrs.hasParamAttribute(i, Attribute::Preallocated),
+ Assert(!Attrs.hasParamAttr(i, Attribute::Preallocated),
"Calling convention disallows preallocated", &F);
- Assert(!Attrs.hasParamAttribute(i, Attribute::InAlloca),
+ Assert(!Attrs.hasParamAttr(i, Attribute::InAlloca),
"Calling convention disallows inalloca", &F);
- if (Attrs.hasParamAttribute(i, Attribute::ByRef)) {
+ if (Attrs.hasParamAttr(i, Attribute::ByRef)) {
// FIXME: Should also disallow LDS and GDS, but we don't have the enum
// value here.
Assert(Arg.getType()->getPointerAddressSpace() != StackAS,
@@ -2416,7 +2460,7 @@ void Verifier::visitFunction(const Function &F) {
}
// Check that swifterror argument is only used by loads and stores.
- if (Attrs.hasParamAttribute(i, Attribute::SwiftError)) {
+ if (Attrs.hasParamAttr(i, Attribute::SwiftError)) {
verifySwiftErrorValue(&Arg);
}
++i;
@@ -2523,7 +2567,8 @@ void Verifier::visitFunction(const Function &F) {
// uses.
if (F.isIntrinsic() && F.getParent()->isMaterialized()) {
const User *U;
- if (F.hasAddressTaken(&U))
+ if (F.hasAddressTaken(&U, false, true, false,
+ /*IgnoreARCAttachedCall=*/true))
Assert(false, "Invalid user of intrinsic instruction!", U);
}
@@ -2693,6 +2738,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
}
void Verifier::visitSwitchInst(SwitchInst &SI) {
+ Assert(SI.getType()->isVoidTy(), "Switch must have void result type!", &SI);
// Check to make sure that all of the constants in the switch instruction
// have the same type as the switched-on value.
Type *SwitchTy = SI.getCondition()->getType();
@@ -2726,7 +2772,7 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
Assert(CBI.getSuccessor(i)->getType()->isLabelTy(),
"Callbr successors must all have pointer type!", &CBI);
for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) {
- Assert(i >= CBI.getNumArgOperands() || !isa<BasicBlock>(CBI.getOperand(i)),
+ Assert(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)),
"Using an unescaped label as a callbr argument!", &CBI);
if (isa<BasicBlock>(CBI.getOperand(i)))
for (unsigned j = i + 1; j != e; ++j)
@@ -3071,14 +3117,14 @@ void Verifier::visitCallBase(CallBase &Call) {
Assert(Callee->getValueType() == FTy,
"Intrinsic called with incompatible signature", Call);
- if (Attrs.hasFnAttribute(Attribute::Speculatable)) {
+ if (Attrs.hasFnAttr(Attribute::Speculatable)) {
// Don't allow speculatable on call sites, unless the underlying function
// declaration is also speculatable.
Assert(Callee && Callee->isSpeculatable(),
"speculatable attribute may not apply to call sites", Call);
}
- if (Attrs.hasFnAttribute(Attribute::Preallocated)) {
+ if (Attrs.hasFnAttr(Attribute::Preallocated)) {
Assert(Call.getCalledFunction()->getIntrinsicID() ==
Intrinsic::call_preallocated_arg,
"preallocated as a call site attribute can only be on "
@@ -3118,7 +3164,7 @@ void Verifier::visitCallBase(CallBase &Call) {
Call);
}
- if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) {
+ if (Attrs.hasParamAttr(i, Attribute::ImmArg)) {
// Don't allow immarg on call sites, unless the underlying declaration
// also has the matching immarg.
Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
@@ -3150,16 +3196,16 @@ void Verifier::visitCallBase(CallBase &Call) {
bool SawReturned = false;
for (unsigned Idx = 0; Idx < FTy->getNumParams(); ++Idx) {
- if (Attrs.hasParamAttribute(Idx, Attribute::Nest))
+ if (Attrs.hasParamAttr(Idx, Attribute::Nest))
SawNest = true;
- if (Attrs.hasParamAttribute(Idx, Attribute::Returned))
+ if (Attrs.hasParamAttr(Idx, Attribute::Returned))
SawReturned = true;
}
// Check attributes on the varargs part.
for (unsigned Idx = FTy->getNumParams(); Idx < Call.arg_size(); ++Idx) {
Type *Ty = Call.getArgOperand(Idx)->getType();
- AttributeSet ArgAttrs = Attrs.getParamAttributes(Idx);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(Idx);
verifyParameterAttrs(ArgAttrs, Ty, &Call);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
@@ -3265,17 +3311,10 @@ void Verifier::visitCallBase(CallBase &Call) {
Assert(!FoundAttachedCallBundle,
"Multiple \"clang.arc.attachedcall\" operand bundles", Call);
FoundAttachedCallBundle = true;
+ verifyAttachedCallBundle(Call, BU);
}
}
- if (FoundAttachedCallBundle)
- Assert((FTy->getReturnType()->isPointerTy() ||
- (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())),
- "a call with operand bundle \"clang.arc.attachedcall\" must call a "
- "function returning a pointer or a non-returning function that has "
- "a void return type",
- Call);
-
// Verify that each inlinable callsite of a debug-info-bearing function in a
// debug-info-bearing function has a debug location attached to it. Failure to
// do so causes assertion failures when the inliner sets up inline scope info.
@@ -3315,7 +3354,7 @@ static bool isTypeCongruent(Type *L, Type *R) {
return PL->getAddressSpace() == PR->getAddressSpace();
}
-static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
+static AttrBuilder getParameterABIAttributes(unsigned I, AttributeList Attrs) {
static const Attribute::AttrKind ABIAttrs[] = {
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
Attribute::InReg, Attribute::StackAlignment, Attribute::SwiftSelf,
@@ -3323,15 +3362,15 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
Attribute::ByRef};
AttrBuilder Copy;
for (auto AK : ABIAttrs) {
- Attribute Attr = Attrs.getParamAttributes(I).getAttribute(AK);
+ Attribute Attr = Attrs.getParamAttrs(I).getAttribute(AK);
if (Attr.isValid())
Copy.addAttribute(Attr);
}
// `align` is ABI-affecting only in combination with `byval` or `byref`.
- if (Attrs.hasParamAttribute(I, Attribute::Alignment) &&
- (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
- Attrs.hasParamAttribute(I, Attribute::ByRef)))
+ if (Attrs.hasParamAttr(I, Attribute::Alignment) &&
+ (Attrs.hasParamAttr(I, Attribute::ByVal) ||
+ Attrs.hasParamAttr(I, Attribute::ByRef)))
Copy.addAlignmentAttr(Attrs.getParamAlignment(I));
return Copy;
}
@@ -3383,12 +3422,12 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// - Only sret, byval, swiftself, and swiftasync ABI-impacting attributes
// are allowed in swifttailcc call
- for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder ABIAttrs = getParameterABIAttributes(I, CallerAttrs);
SmallString<32> Context{CCName, StringRef(" musttail caller")};
verifyTailCCMustTailAttrs(ABIAttrs, Context);
}
- for (int I = 0, E = CalleeTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CalleeTy->getNumParams(); I != E; ++I) {
AttrBuilder ABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
SmallString<32> Context{CCName, StringRef(" musttail callee")};
verifyTailCCMustTailAttrs(ABIAttrs, Context);
@@ -3406,7 +3445,7 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
"cannot guarantee tail call due to mismatched parameter counts",
&CI);
- for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
Assert(
isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
"cannot guarantee tail call due to mismatched parameter types", &CI);
@@ -3415,7 +3454,7 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// - All ABI-impacting function attributes, such as sret, byval, inreg,
// returned, preallocated, and inalloca, must match.
- for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
Assert(CallerABIAttrs == CalleeABIAttrs,
@@ -4347,6 +4386,38 @@ void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
Assert(isa<MDString>(Op.get()), "operands must be strings");
}
+void Verifier::visitAliasScopeMetadata(const MDNode *MD) {
+ unsigned NumOps = MD->getNumOperands();
+ Assert(NumOps >= 2 && NumOps <= 3, "scope must have two or three operands",
+ MD);
+ Assert(MD->getOperand(0).get() == MD || isa<MDString>(MD->getOperand(0)),
+ "first scope operand must be self-referential or string", MD);
+ if (NumOps == 3)
+ Assert(isa<MDString>(MD->getOperand(2)),
+ "third scope operand must be string (if used)", MD);
+
+ MDNode *Domain = dyn_cast<MDNode>(MD->getOperand(1));
+ Assert(Domain != nullptr, "second scope operand must be MDNode", MD);
+
+ unsigned NumDomainOps = Domain->getNumOperands();
+ Assert(NumDomainOps >= 1 && NumDomainOps <= 2,
+ "domain must have one or two operands", Domain);
+ Assert(Domain->getOperand(0).get() == Domain ||
+ isa<MDString>(Domain->getOperand(0)),
+ "first domain operand must be self-referential or string", Domain);
+ if (NumDomainOps == 2)
+ Assert(isa<MDString>(Domain->getOperand(1)),
+ "second domain operand must be string (if used)", Domain);
+}
+
+void Verifier::visitAliasScopeListMetadata(const MDNode *MD) {
+ for (const MDOperand &Op : MD->operands()) {
+ const MDNode *OpMD = dyn_cast<MDNode>(Op);
+ Assert(OpMD != nullptr, "scope list must consist of MDNodes", MD);
+ visitAliasScopeMetadata(OpMD);
+ }
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -4403,10 +4474,21 @@ void Verifier::visitInstruction(Instruction &I) {
}
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+ // This code checks whether the function is used as the operand of a
+ // clang_arc_attachedcall operand bundle.
+ auto IsAttachedCallOperand = [](Function *F, const CallBase *CBI,
+ int Idx) {
+ return CBI && CBI->isOperandBundleOfType(
+ LLVMContext::OB_clang_arc_attachedcall, Idx);
+ };
+
// Check to make sure that the "address of" an intrinsic function is never
- // taken.
- Assert(!F->isIntrinsic() ||
- (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)),
+ // taken. Ignore cases where the address of the intrinsic function is used
+ // as the argument of operand bundle "clang.arc.attachedcall" as those
+ // cases are handled in verifyAttachedCallBundle.
+ Assert((!F->isIntrinsic() ||
+ (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
+ IsAttachedCallOperand(F, CBI, i)),
"Cannot take the address of an intrinsic!", &I);
Assert(
!F->isIntrinsic() || isa<CallInst>(I) ||
@@ -4420,9 +4502,10 @@ void Verifier::visitInstruction(Instruction &I) {
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
- F->getIntrinsicID() == Intrinsic::wasm_rethrow,
+ F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
+ IsAttachedCallOperand(F, CBI, i),
"Cannot invoke an intrinsic other than donothing, patchpoint, "
- "statepoint, coro_resume or coro_destroy",
+ "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall",
&I);
Assert(F->getParent() == &M, "Referencing function in another module!",
&I, &M, F, F->getParent());
@@ -4471,6 +4554,11 @@ void Verifier::visitInstruction(Instruction &I) {
visitRangeMetadata(I, Range, I.getType());
}
+ if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
+ Assert(isa<LoadInst>(I) || isa<StoreInst>(I),
+ "invariant.group metadata is only for loads and stores", &I);
+ }
+
if (I.getMetadata(LLVMContext::MD_nonnull)) {
Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types",
&I);
@@ -4489,6 +4577,11 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias))
+ visitAliasScopeListMetadata(MD);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope))
+ visitAliasScopeListMetadata(MD);
+
if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
&I);
@@ -4599,33 +4692,34 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
for (auto &Elem : Call.bundle_op_infos()) {
Assert(Elem.Tag->getKey() == "ignore" ||
Attribute::isExistingAttribute(Elem.Tag->getKey()),
- "tags must be valid attribute names");
+ "tags must be valid attribute names", Call);
Attribute::AttrKind Kind =
Attribute::getAttrKindFromName(Elem.Tag->getKey());
unsigned ArgCount = Elem.End - Elem.Begin;
if (Kind == Attribute::Alignment) {
Assert(ArgCount <= 3 && ArgCount >= 2,
- "alignment assumptions should have 2 or 3 arguments");
+ "alignment assumptions should have 2 or 3 arguments", Call);
Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
- "first argument should be a pointer");
+ "first argument should be a pointer", Call);
Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
- "second argument should be an integer");
+ "second argument should be an integer", Call);
if (ArgCount == 3)
Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
- "third argument should be an integer if present");
+ "third argument should be an integer if present", Call);
return;
}
- Assert(ArgCount <= 2, "to many arguments");
+ Assert(ArgCount <= 2, "too many arguments", Call);
if (Kind == Attribute::None)
break;
if (Attribute::isIntAttrKind(Kind)) {
- Assert(ArgCount == 2, "this attribute should have 2 arguments");
+ Assert(ArgCount == 2, "this attribute should have 2 arguments", Call);
Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
- "the second argument should be a constant integral value");
+ "the second argument should be a constant integral value", Call);
} else if (Attribute::canUseAsParamAttr(Kind)) {
- Assert((ArgCount) == 1, "this attribute should have one argument");
+ Assert((ArgCount) == 1, "this attribute should have one argument",
+ Call);
} else if (Attribute::canUseAsFnAttr(Kind)) {
- Assert((ArgCount) == 0, "this attribute has no argument");
+ Assert((ArgCount) == 0, "this attribute has no argument", Call);
}
}
break;
@@ -4736,7 +4830,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"llvm.call.preallocated.setup");
FoundCall = true;
size_t NumPreallocatedArgs = 0;
- for (unsigned i = 0; i < UseCall->getNumArgOperands(); i++) {
+ for (unsigned i = 0; i < UseCall->arg_size(); i++) {
if (UseCall->paramHasAttr(i, Attribute::Preallocated)) {
++NumPreallocatedArgs;
}
@@ -4834,7 +4928,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Assert(AI && AI->isStaticAlloca(),
"llvm.localescape only accepts static allocas", Call);
}
- FrameEscapeInfo[BB->getParent()].first = Call.getNumArgOperands();
+ FrameEscapeInfo[BB->getParent()].first = Call.arg_size();
SawFrameEscape = true;
break;
}
@@ -4883,7 +4977,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::experimental_gc_relocate: {
- Assert(Call.getNumArgOperands() == 3, "wrong number of arguments", Call);
+ Assert(Call.arg_size() == 3, "wrong number of arguments", Call);
Assert(isa<PointerType>(Call.getType()->getScalarType()),
"gc.relocate must return a pointer or a vector of pointers", Call);
@@ -5017,14 +5111,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::masked_gather: {
const APInt &Alignment =
cast<ConstantInt>(Call.getArgOperand(1))->getValue();
- Assert(Alignment.isNullValue() || Alignment.isPowerOf2(),
+ Assert(Alignment.isZero() || Alignment.isPowerOf2(),
"masked_gather: alignment must be 0 or a power of 2", Call);
break;
}
case Intrinsic::masked_scatter: {
const APInt &Alignment =
cast<ConstantInt>(Call.getArgOperand(2))->getValue();
- Assert(Alignment.isNullValue() || Alignment.isPowerOf2(),
+ Assert(Alignment.isZero() || Alignment.isPowerOf2(),
"masked_scatter: alignment must be 0 or a power of 2", Call);
break;
}
@@ -5340,7 +5434,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
// Compare intrinsics carry an extra predicate metadata operand.
if (isa<ConstrainedFPCmpIntrinsic>(FPI))
NumOperands += 1;
- Assert((FPI.getNumArgOperands() == NumOperands),
+ Assert((FPI.arg_size() == NumOperands),
"invalid arguments for constrained FP intrinsic", &FPI);
switch (FPI.getIntrinsicID()) {
@@ -5643,6 +5737,41 @@ void Verifier::verifyDeoptimizeCallingConvs() {
}
}
+void Verifier::verifyAttachedCallBundle(const CallBase &Call,
+ const OperandBundleUse &BU) {
+ FunctionType *FTy = Call.getFunctionType();
+
+ Assert((FTy->getReturnType()->isPointerTy() ||
+ (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())),
+ "a call with operand bundle \"clang.arc.attachedcall\" must call a "
+ "function returning a pointer or a non-returning function that has a "
+ "void return type",
+ Call);
+
+ Assert((BU.Inputs.empty() ||
+ (BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()))),
+ "operand bundle \"clang.arc.attachedcall\" can take either no "
+ "arguments or one function as an argument",
+ Call);
+
+ if (BU.Inputs.empty())
+ return;
+
+ auto *Fn = cast<Function>(BU.Inputs.front());
+ Intrinsic::ID IID = Fn->getIntrinsicID();
+
+ if (IID) {
+ Assert((IID == Intrinsic::objc_retainAutoreleasedReturnValue ||
+ IID == Intrinsic::objc_unsafeClaimAutoreleasedReturnValue),
+ "invalid function argument", Call);
+ } else {
+ StringRef FnName = Fn->getName();
+ Assert((FnName == "objc_retainAutoreleasedReturnValue" ||
+ FnName == "objc_unsafeClaimAutoreleasedReturnValue"),
+ "invalid function argument", Call);
+ }
+}
+
void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
bool HasSource = F.getSource().hasValue();
if (!HasSourceDebugInfo.count(&U))
@@ -5671,6 +5800,7 @@ void Verifier::verifyNoAliasScopeDecl() {
II);
Assert(ScopeListMD->getNumOperands() == 1,
"!id.scope.list must point to a list with a single scope", II);
+ visitAliasScopeListMetadata(ScopeListMD);
}
// Only check the domination rule when requested. Once all passes have been
@@ -6036,11 +6166,7 @@ static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
// In the new format type nodes shall have a reference to the parent type as
// its first operand.
- MDNode *Parent = dyn_cast_or_null<MDNode>(Type->getOperand(0));
- if (!Parent)
- return false;
-
- return true;
+ return isa_and_nonnull<MDNode>(Type->getOperand(0));
}
bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
index 112c1cea354a..d41c7d3217d7 100644
--- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp
+++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -367,7 +367,7 @@ Error appendToError(Error Err, StringRef After) {
Stream << Err;
Stream << " " << After;
consumeError(std::move(Err));
- return createError(Stream.str().c_str());
+ return createError(Stream.str());
}
/// This function populates a DynamicEntries struct using an ELFT::DynRange.
diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp
index d3d351fa2ed4..e6bf09232ce2 100644
--- a/llvm/lib/InterfaceStub/IFSHandler.cpp
+++ b/llvm/lib/InterfaceStub/IFSHandler.cpp
@@ -163,7 +163,7 @@ bool usesTriple(StringRef Buf) {
for (line_iterator I(MemoryBufferRef(Buf, "ELFStub")); !I.is_at_eof(); ++I) {
StringRef Line = (*I).trim();
if (Line.startswith("Target:")) {
- if (Line == "Target:" || (Line.find("{") != Line.npos)) {
+ if (Line == "Target:" || Line.contains("{")) {
return false;
}
}
@@ -327,3 +327,13 @@ void ifs::stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
Stub.Target.ObjectFormat.reset();
}
}
+
+void ifs::stripIFSUndefinedSymbols(IFSStub &Stub) {
+ for (auto Iter = Stub.Symbols.begin(); Iter != Stub.Symbols.end();) {
+ if (Iter->Undefined) {
+ Iter = Stub.Symbols.erase(Iter);
+ } else {
+ Iter++;
+ }
+ }
+}
diff --git a/llvm/lib/InterfaceStub/IFSStub.cpp b/llvm/lib/InterfaceStub/IFSStub.cpp
index bbc91ada1ded..008263f8db9f 100644
--- a/llvm/lib/InterfaceStub/IFSStub.cpp
+++ b/llvm/lib/InterfaceStub/IFSStub.cpp
@@ -29,7 +29,7 @@ IFSStub::IFSStub(IFSStub &&Stub) {
Symbols = std::move(Stub.Symbols);
}
-IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) {
+IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) : IFSStub() {
IfsVersion = Stub.IfsVersion;
Target = Stub.Target;
SoName = Stub.SoName;
@@ -37,7 +37,7 @@ IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) {
Symbols = Stub.Symbols;
}
-IFSStubTriple::IFSStubTriple(IFSStub const &Stub) {
+IFSStubTriple::IFSStubTriple(IFSStub const &Stub) : IFSStub() {
IfsVersion = Stub.IfsVersion;
Target = Stub.Target;
SoName = Stub.SoName;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 69d500ba9bce..6ce2ed265739 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/LTO/LTO.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -32,6 +33,7 @@
#include "llvm/LTO/LTOBackend.h"
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
@@ -41,7 +43,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/TimeProfiler.h"
@@ -536,12 +537,12 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
auto *ResI = Res.begin();
auto *ResE = Res.end();
(void)ResE;
+ const Triple TT(RegularLTO.CombinedModule->getTargetTriple());
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
StringRef Name = Sym.getName();
- Triple TT(RegularLTO.CombinedModule->getTargetTriple());
// Strip the __imp_ prefix from COFF dllimport symbols (similar to the
// way they are handled by lld), otherwise we can end up with two
// global resolutions (one with and one for a copy of the symbol without).
@@ -732,7 +733,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
DenseSet<GlobalObject *> AliasedGlobals;
for (auto &GA : M.aliases())
- if (GlobalObject *GO = GA.getBaseObject())
+ if (GlobalObject *GO = GA.getAliaseeObject())
AliasedGlobals.insert(GO);
// In this function we need IR GlobalValues matching the symbols in Syms
@@ -856,10 +857,14 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
for (GlobalValue *GV : Mod.Keep) {
if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) {
if (Function *F = dyn_cast<Function>(GV)) {
- OptimizationRemarkEmitter ORE(F, nullptr);
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
- << ore::NV("Function", F)
- << " not added to the combined module ");
+ if (DiagnosticOutputFile) {
+ if (Error Err = F->materialize())
+ return Err;
+ OptimizationRemarkEmitter ORE(F, nullptr);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
+ << ore::NV("Function", F)
+ << " not added to the combined module ");
+ }
}
continue;
}
@@ -992,7 +997,7 @@ Error LTO::checkPartiallySplit() {
return Error::success();
}
-Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
+Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
@@ -1048,6 +1053,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
Conf.RemarksHotnessThreshold);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
+ DiagnosticOutputFile = std::move(*DiagFileOrErr);
// Finalize linking of regular LTO modules containing summaries now that
// we have computed liveness information.
@@ -1136,7 +1142,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
return Err;
}
- return finalizeOptimizationRemarks(std::move(*DiagFileOrErr));
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
}
static const char *libcallRoutineNames[] = {
@@ -1177,7 +1183,7 @@ namespace {
class InProcessThinBackend : public ThinBackendProc {
ThreadPool BackendThreadPool;
AddStreamFn AddStream;
- NativeObjectCache Cache;
+ FileCache Cache;
std::set<GlobalValue::GUID> CfiFunctionDefs;
std::set<GlobalValue::GUID> CfiFunctionDecls;
@@ -1189,7 +1195,7 @@ public:
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache)
+ AddStreamFn AddStream, FileCache Cache)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
Cache(std::move(Cache)) {
@@ -1202,8 +1208,8 @@ public:
}
Error runThinLTOBackendThread(
- AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task,
- BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
+ AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
+ ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
@@ -1233,7 +1239,11 @@ public:
computeLTOCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList,
ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs,
CfiFunctionDecls);
- if (AddStreamFn CacheAddStream = Cache(Task, Key))
+ Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key);
+ if (Error Err = CacheAddStreamOrErr.takeError())
+ return Err;
+ AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
+ if (CacheAddStream)
return RunThinBackend(CacheAddStream);
return Error::success();
@@ -1295,7 +1305,7 @@ public:
ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) {
return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache) {
+ AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<InProcessThinBackend>(
Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream,
Cache);
@@ -1389,15 +1399,20 @@ ThinBackend lto::createWriteIndexesThinBackend(
raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache) {
+ AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<WriteIndexesThinBackend>(
Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix,
ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite);
};
}
-Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ timeTraceProfilerBegin("ThinLink", StringRef(""));
+ auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+ });
if (ThinLTO.ModuleMap.empty())
return Error::success();
@@ -1510,8 +1525,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
thinLTOResolvePrevailingInIndex(Conf, ThinLTO.CombinedIndex, isPrevailing,
recordNewLinkage, GUIDPreservedSymbols);
+ thinLTOPropagateFunctionAttrs(ThinLTO.CombinedIndex, isPrevailing);
+
generateParamAccessSummary(ThinLTO.CombinedIndex);
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+
+ TimeTraceScopeExit.release();
+
std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
AddStream, Cache);
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 4e4ba4f3a58e..be06556b0c3b 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTO.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
@@ -37,7 +38,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -74,7 +74,11 @@ static cl::opt<bool> ThinLTOAssumeMerged(
cl::desc("Assume the input has already undergone ThinLTO function "
"importing and the other pre-optimization pipeline changes."));
-LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
+namespace llvm {
+extern cl::opt<bool> NoPGOWarnMismatch;
+}
+
+[[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
exit(1);
@@ -221,10 +225,13 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
PGOOptions::IRUse, PGOOptions::CSIRUse,
Conf.AddFSDiscriminator);
+ NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
} else if (Conf.AddFSDiscriminator) {
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
PGOOptions::NoCSAction, true);
}
+ if (TM)
+ TM->setPGOOption(PGOOpt);
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
@@ -244,18 +251,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
TLII->disableAllFunctions();
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
- AAManager AA;
// Parse a custom AA pipeline if asked to.
if (!Conf.AAPipeline.empty()) {
+ AAManager AA;
if (auto Err = PB.parseAAPipeline(AA, Conf.AAPipeline)) {
- report_fatal_error("unable to parse AA pipeline description '" +
+ report_fatal_error(Twine("unable to parse AA pipeline description '") +
Conf.AAPipeline + "': " + toString(std::move(Err)));
}
- } else {
- AA = PB.buildDefaultAAPipeline();
+ // Register the AA manager first so that our version is the one used.
+ FAM.registerPass([&] { return std::move(AA); });
}
- // Register the AA manager first so that our version is the one used.
- FAM.registerPass([&] { return std::move(AA); });
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
@@ -269,29 +274,29 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
if (!Conf.DisableVerify)
MPM.addPass(VerifierPass());
- PassBuilder::OptimizationLevel OL;
+ OptimizationLevel OL;
switch (OptLevel) {
default:
llvm_unreachable("Invalid optimization level");
case 0:
- OL = PassBuilder::OptimizationLevel::O0;
+ OL = OptimizationLevel::O0;
break;
case 1:
- OL = PassBuilder::OptimizationLevel::O1;
+ OL = OptimizationLevel::O1;
break;
case 2:
- OL = PassBuilder::OptimizationLevel::O2;
+ OL = OptimizationLevel::O2;
break;
case 3:
- OL = PassBuilder::OptimizationLevel::O3;
+ OL = OptimizationLevel::O3;
break;
}
// Parse a custom pipeline if asked to.
if (!Conf.OptPipeline.empty()) {
if (auto Err = PB.parsePassPipeline(MPM, Conf.OptPipeline)) {
- report_fatal_error("unable to parse pass pipeline description '" +
+ report_fatal_error(Twine("unable to parse pass pipeline description '") +
Conf.OptPipeline + "': " + toString(std::move(Err)));
}
} else if (IsThinLTO) {
@@ -387,8 +392,8 @@ static void codegen(const Config &Conf, TargetMachine *TM,
if (!Conf.DwoDir.empty()) {
std::error_code EC;
if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
- report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " +
- EC.message());
+ report_fatal_error(Twine("Failed to create directory ") + Conf.DwoDir +
+ ": " + EC.message());
DwoFile = Conf.DwoDir;
sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
@@ -400,10 +405,14 @@ static void codegen(const Config &Conf, TargetMachine *TM,
std::error_code EC;
DwoOut = std::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::OF_None);
if (EC)
- report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
+ report_fatal_error(Twine("Failed to open ") + DwoFile + ": " +
+ EC.message());
}
- auto Stream = AddStream(Task);
+ Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = AddStream(Task);
+ if (Error Err = StreamOrErr.takeError())
+ report_fatal_error(std::move(Err));
+ std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
legacy::PassManager CodeGenPasses;
CodeGenPasses.add(
createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex));
@@ -599,7 +608,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
- thinLTOResolvePrevailingInModule(Mod, DefinedGlobals);
+ thinLTOFinalizeInModule(Mod, DefinedGlobals, /*PropagateAttrs=*/true);
if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 7bffcbf01b03..088e45c9e8dc 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -44,13 +44,13 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
@@ -245,8 +245,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
// make unique temp output file to put generated code
SmallString<128> Filename;
- auto AddStream =
- [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+ auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
StringRef Extension(Config.CGFileType == CGFT_AssemblyFile ? "s" : "o");
int FD;
@@ -255,7 +254,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
if (EC)
emitError(EC.message());
- return std::make_unique<lto::NativeObjectStream>(
+ return std::make_unique<CachedFileStream>(
std::make_unique<llvm::raw_fd_ostream>(FD, true));
};
@@ -557,7 +556,7 @@ bool LTOCodeGenerator::optimize() {
return true;
}
-bool LTOCodeGenerator::compileOptimized(lto::AddStreamFn AddStream,
+bool LTOCodeGenerator::compileOptimized(AddStreamFn AddStream,
unsigned ParallelismLevel) {
if (!this->determineTarget())
return false;
diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp
index 155790041a75..4cc1b307c553 100644
--- a/llvm/lib/LTO/LTOModule.cpp
+++ b/llvm/lib/LTO/LTOModule.cpp
@@ -27,6 +27,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
@@ -35,7 +36,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -688,3 +688,16 @@ Expected<uint32_t> LTOModule::getMachOCPUType() const {
Expected<uint32_t> LTOModule::getMachOCPUSubType() const {
return MachO::getCPUSubType(Triple(Mod->getTargetTriple()));
}
+
+bool LTOModule::hasCtorDtor() const {
+ for (auto Sym : SymTab.symbols()) {
+ if (auto *GV = Sym.dyn_cast<GlobalValue *>()) {
+ StringRef Name = GV->getName();
+ if (Name.consume_front("llvm.global_")) {
+ if (Name.equals("ctors") || Name.equals("dtors"))
+ return true;
+ }
+ }
+ }
+ return false;
+}
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 8f0fa933a6a1..9474d8c9dafb 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -14,6 +14,7 @@
#include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -37,6 +38,7 @@
#include "llvm/LTO/LTO.h"
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/StandardInstrumentations.h"
@@ -48,12 +50,12 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -289,11 +291,6 @@ static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM,
TLII->disableAllFunctions();
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
- AAManager AA = PB.buildDefaultAAPipeline();
-
- // Register the AA manager first so that our version is the one used.
- FAM.registerPass([&] { return std::move(AA); });
-
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
PB.registerCGSCCAnalyses(CGAM);
@@ -303,22 +300,22 @@ static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM,
ModulePassManager MPM;
- PassBuilder::OptimizationLevel OL;
+ OptimizationLevel OL;
switch (OptLevel) {
default:
llvm_unreachable("Invalid optimization level");
case 0:
- OL = PassBuilder::OptimizationLevel::O0;
+ OL = OptimizationLevel::O0;
break;
case 1:
- OL = PassBuilder::OptimizationLevel::O1;
+ OL = OptimizationLevel::O1;
break;
case 2:
- OL = PassBuilder::OptimizationLevel::O2;
+ OL = OptimizationLevel::O2;
break;
case 3:
- OL = PassBuilder::OptimizationLevel::O3;
+ OL = OptimizationLevel::O3;
break;
}
@@ -503,7 +500,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
promoteModule(TheModule, Index, ClearDSOLocalOnDeclarations);
// Apply summary-based prevailing-symbol resolution decisions.
- thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals);
+ thinLTOFinalizeInModule(TheModule, DefinedGlobals, /*PropagateAttrs=*/true);
// Save temps: after promotion.
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
@@ -607,7 +604,7 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
auto InputOrError = lto::InputFile::create(Buffer);
if (!InputOrError)
- report_fatal_error("ThinLTO cannot create input file: " +
+ report_fatal_error(Twine("ThinLTO cannot create input file: ") +
toString(InputOrError.takeError()));
auto TripleStr = (*InputOrError)->getTargetTriple();
@@ -642,7 +639,7 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
const Target *TheTarget =
TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
if (!TheTarget) {
- report_fatal_error("Can't load target for this Triple: " + ErrMsg);
+ report_fatal_error(Twine("Can't load target for this Triple: ") + ErrMsg);
}
// Use MAttr as the default set of features.
@@ -762,8 +759,9 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
PrevailingCopy);
- thinLTOResolvePrevailingInModule(
- TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+ thinLTOFinalizeInModule(TheModule,
+ ModuleToDefinedGVSummaries[ModuleIdentifier],
+ /*PropagateAttrs=*/false);
// Promote the exported values in the index, so that they are promoted
// in the module.
@@ -937,8 +935,9 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false);
// Internalization
- thinLTOResolvePrevailingInModule(
- TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+ thinLTOFinalizeInModule(TheModule,
+ ModuleToDefinedGVSummaries[ModuleIdentifier],
+ /*PropagateAttrs=*/false);
thinLTOInternalizeModule(TheModule,
ModuleToDefinedGVSummaries[ModuleIdentifier]);
@@ -989,13 +988,18 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
std::error_code Err;
raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None);
if (Err)
- report_fatal_error("Can't open output '" + OutputPath + "'\n");
+ report_fatal_error(Twine("Can't open output '") + OutputPath + "'\n");
OS << OutputBuffer.getBuffer();
return std::string(OutputPath.str());
}
// Main entry point for the ThinLTO processing
void ThinLTOCodeGenerator::run() {
+ timeTraceProfilerBegin("ThinLink", StringRef(""));
+ auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+ });
// Prepare the resulting object vector
assert(ProducedBinaries.empty() && "The generator should not be reused");
if (SavedObjectsDirectoryPath.empty())
@@ -1005,7 +1009,7 @@ void ThinLTOCodeGenerator::run() {
bool IsDir;
sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
if (!IsDir)
- report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
+ report_fatal_error(Twine("Unexistent dir: '") + SavedObjectsDirectoryPath + "'");
ProducedBinaryFiles.resize(Modules.size());
}
@@ -1124,6 +1128,8 @@ void ThinLTOCodeGenerator::run() {
*Index, IsExported(ExportLists, GUIDPreservedSymbols),
IsPrevailing(PrevailingCopy));
+ thinLTOPropagateFunctionAttrs(*Index, IsPrevailing(PrevailingCopy));
+
// Make sure that every module has an entry in the ExportLists, ImportList,
// GVSummary and ResolvedODR maps to enable threaded access to these maps
// below.
@@ -1141,6 +1147,11 @@ void ThinLTOCodeGenerator::run() {
ModulesVec.push_back(&Mod->getSingleBitcodeModule());
std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+
+ TimeTraceScopeExit.release();
+
// Parallel optimizer + codegen
{
ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount));
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 7bc6f0585921..bad483be197d 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/TypeFinder.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <utility>
using namespace llvm;
@@ -491,8 +492,8 @@ class IRLinker {
void linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src);
Error linkFunctionBody(Function &Dst, Function &Src);
- void linkIndirectSymbolBody(GlobalIndirectSymbol &Dst,
- GlobalIndirectSymbol &Src);
+ void linkAliasAliasee(GlobalAlias &Dst, GlobalAlias &Src);
+ void linkIFuncResolver(GlobalIFunc &Dst, GlobalIFunc &Src);
Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
/// Replace all types in the source AttributeList with the
@@ -503,7 +504,7 @@ class IRLinker {
/// into the destination module.
GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
Function *copyFunctionProto(const Function *SF);
- GlobalValue *copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS);
+ GlobalValue *copyIndirectSymbolProto(const GlobalValue *SGV);
/// Perform "replace all uses with" operations. These work items need to be
/// performed as part of materialization, but we postpone them to happen after
@@ -605,10 +606,14 @@ Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) {
} else if (auto *V = dyn_cast<GlobalVariable>(New)) {
if (V->hasInitializer() || V->hasAppendingLinkage())
return New;
- } else {
- auto *IS = cast<GlobalIndirectSymbol>(New);
- if (IS->getIndirectSymbol())
+ } else if (auto *GA = dyn_cast<GlobalAlias>(New)) {
+ if (GA->getAliasee())
+ return New;
+ } else if (auto *GI = dyn_cast<GlobalIFunc>(New)) {
+ if (GI->getResolver())
return New;
+ } else {
+ llvm_unreachable("Invalid GlobalValue type");
}
// If the global is being linked for an indirect symbol, it may have already
@@ -648,12 +653,14 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- for (Attribute::AttrKind TypedAttr :
- {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef,
- Attribute::InAlloca}) {
- if (Attrs.hasAttribute(i, TypedAttr)) {
- if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
- Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, TypeMap.get(Ty));
+ for (int AttrIdx = Attribute::FirstTypeAttr;
+ AttrIdx <= Attribute::LastTypeAttr; AttrIdx++) {
+ Attribute::AttrKind TypedAttr = (Attribute::AttrKind)AttrIdx;
+ if (Attrs.hasAttributeAtIndex(i, TypedAttr)) {
+ if (Type *Ty =
+ Attrs.getAttributeAtIndex(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeTypeAtIndex(C, i, TypedAttr,
+ TypeMap.get(Ty));
break;
}
}
@@ -677,22 +684,28 @@ Function *IRLinker::copyFunctionProto(const Function *SF) {
/// Set up prototypes for any indirect symbols that come over from the source
/// module.
-GlobalValue *
-IRLinker::copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS) {
+GlobalValue *IRLinker::copyIndirectSymbolProto(const GlobalValue *SGV) {
// If there is no linkage to be performed or we're linking from the source,
// bring over SGA.
- auto *Ty = TypeMap.get(SGIS->getValueType());
- GlobalIndirectSymbol *GIS;
- if (isa<GlobalAlias>(SGIS))
- GIS = GlobalAlias::create(Ty, SGIS->getAddressSpace(),
- GlobalValue::ExternalLinkage, SGIS->getName(),
- &DstM);
- else
- GIS = GlobalIFunc::create(Ty, SGIS->getAddressSpace(),
- GlobalValue::ExternalLinkage, SGIS->getName(),
- nullptr, &DstM);
- GIS->copyAttributesFrom(SGIS);
- return GIS;
+ auto *Ty = TypeMap.get(SGV->getValueType());
+
+ if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
+ auto *DGA = GlobalAlias::create(Ty, SGV->getAddressSpace(),
+ GlobalValue::ExternalLinkage,
+ SGV->getName(), &DstM);
+ DGA->copyAttributesFrom(GA);
+ return DGA;
+ }
+
+ if (auto *GI = dyn_cast<GlobalIFunc>(SGV)) {
+ auto *DGI = GlobalIFunc::create(Ty, SGV->getAddressSpace(),
+ GlobalValue::ExternalLinkage,
+ SGV->getName(), nullptr, &DstM);
+ DGI->copyAttributesFrom(GI);
+ return DGI;
+ }
+
+ llvm_unreachable("Invalid source global value type");
}
GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
@@ -704,7 +717,7 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
NewGV = copyFunctionProto(SF);
} else {
if (ForDefinition)
- NewGV = copyGlobalIndirectSymbolProto(cast<GlobalIndirectSymbol>(SGV));
+ NewGV = copyIndirectSymbolProto(SGV);
else if (SGV->getValueType()->isFunctionTy())
NewGV =
Function::Create(cast<FunctionType>(TypeMap.get(SGV->getValueType())),
@@ -1108,10 +1121,12 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
return Error::success();
}
-void IRLinker::linkIndirectSymbolBody(GlobalIndirectSymbol &Dst,
- GlobalIndirectSymbol &Src) {
- Mapper.scheduleMapGlobalIndirectSymbol(Dst, *Src.getIndirectSymbol(),
- IndirectSymbolMCID);
+void IRLinker::linkAliasAliasee(GlobalAlias &Dst, GlobalAlias &Src) {
+ Mapper.scheduleMapGlobalAlias(Dst, *Src.getAliasee(), IndirectSymbolMCID);
+}
+
+void IRLinker::linkIFuncResolver(GlobalIFunc &Dst, GlobalIFunc &Src) {
+ Mapper.scheduleMapGlobalIFunc(Dst, *Src.getResolver(), IndirectSymbolMCID);
}
Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
@@ -1121,7 +1136,11 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
linkGlobalVariable(cast<GlobalVariable>(Dst), *GVar);
return Error::success();
}
- linkIndirectSymbolBody(cast<GlobalIndirectSymbol>(Dst), cast<GlobalIndirectSymbol>(Src));
+ if (auto *GA = dyn_cast<GlobalAlias>(&Src)) {
+ linkAliasAliasee(cast<GlobalAlias>(Dst), *GA);
+ return Error::success();
+ }
+ linkIFuncResolver(cast<GlobalIFunc>(Dst), cast<GlobalIFunc>(Src));
return Error::success();
}
@@ -1443,7 +1462,39 @@ Error IRLinker::run() {
if (DstM.getDataLayout().isDefault())
DstM.setDataLayout(SrcM->getDataLayout());
- if (SrcM->getDataLayout() != DstM.getDataLayout()) {
+ // Copy the target triple from the source to dest if the dest's is empty.
+ if (DstM.getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+ DstM.setTargetTriple(SrcM->getTargetTriple());
+
+ Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple());
+
+ // During CUDA compilation we have to link with the bitcode supplied with
+ // CUDA. libdevice bitcode either has no data layout set (pre-CUDA-11), or has
+ // the layout that is different from the one used by LLVM/clang (it does not
+ // include i128). Issuing a warning is not very helpful as there's not much
+ // the user can do about it.
+ bool EnableDLWarning = true;
+ bool EnableTripleWarning = true;
+ if (SrcTriple.isNVPTX() && DstTriple.isNVPTX()) {
+ std::string ModuleId = SrcM->getModuleIdentifier();
+ StringRef FileName = llvm::sys::path::filename(ModuleId);
+ bool SrcIsLibDevice =
+ FileName.startswith("libdevice") && FileName.endswith(".10.bc");
+ bool SrcHasLibDeviceDL =
+ (SrcM->getDataLayoutStr().empty() ||
+ SrcM->getDataLayoutStr() == "e-i64:64-v16:16-v32:32-n16:32:64");
+ // libdevice bitcode uses nvptx64-nvidia-gpulibs or just
+ // 'nvptx-unknown-unknown' triple (before CUDA-10.x) and is compatible with
+ // all NVPTX variants.
+ bool SrcHasLibDeviceTriple = (SrcTriple.getVendor() == Triple::NVIDIA &&
+ SrcTriple.getOSName() == "gpulibs") ||
+ (SrcTriple.getVendorName() == "unknown" &&
+ SrcTriple.getOSName() == "unknown");
+ EnableTripleWarning = !(SrcIsLibDevice && SrcHasLibDeviceTriple);
+ EnableDLWarning = !(SrcIsLibDevice && SrcHasLibDeviceDL);
+ }
+
+ if (EnableDLWarning && (SrcM->getDataLayout() != DstM.getDataLayout())) {
emitWarning("Linking two modules of different data layouts: '" +
SrcM->getModuleIdentifier() + "' is '" +
SrcM->getDataLayoutStr() + "' whereas '" +
@@ -1451,13 +1502,7 @@ Error IRLinker::run() {
DstM.getDataLayoutStr() + "'\n");
}
- // Copy the target triple from the source to dest if the dest's is empty.
- if (DstM.getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
- DstM.setTargetTriple(SrcM->getTargetTriple());
-
- Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple());
-
- if (!SrcM->getTargetTriple().empty()&&
+ if (EnableTripleWarning && !SrcM->getTargetTriple().empty() &&
!SrcTriple.isCompatibleWith(DstTriple))
emitWarning("Linking two modules of different target triples: '" +
SrcM->getModuleIdentifier() + "' is '" +
diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp
index 97d6f8cd8075..f9f51bf17d95 100644
--- a/llvm/lib/Linker/LinkModules.cpp
+++ b/llvm/lib/Linker/LinkModules.cpp
@@ -24,6 +24,8 @@ using namespace llvm;
namespace {
+enum class LinkFrom { Dst, Src, Both };
+
/// This is an implementation class for the LinkModules function, which is the
/// entrypoint for this file.
class ModuleLinker {
@@ -67,11 +69,11 @@ class ModuleLinker {
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
- bool &LinkFromSrc);
- std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
+ LinkFrom &From);
+ DenseMap<const Comdat *, std::pair<Comdat::SelectionKind, LinkFrom>>
ComdatsChosen;
bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
- bool &LinkFromSrc);
+ LinkFrom &From);
// Keep track of the lazy linked global members of each comdat in source.
DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
@@ -103,7 +105,7 @@ class ModuleLinker {
void dropReplacedComdat(GlobalValue &GV,
const DenseSet<const Comdat *> &ReplacedDstComdats);
- bool linkIfNeeded(GlobalValue &GV);
+ bool linkIfNeeded(GlobalValue &GV, SmallVectorImpl<GlobalValue *> &GVToClone);
public:
ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
@@ -114,7 +116,7 @@ public:
bool run();
};
-}
+} // namespace
static GlobalValue::VisibilityTypes
getMinVisibility(GlobalValue::VisibilityTypes A,
@@ -131,7 +133,7 @@ bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar) {
const GlobalValue *GVal = M.getNamedValue(ComdatName);
if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
- GVal = GA->getBaseObject();
+ GVal = GA->getAliaseeObject();
if (!GVal)
// We cannot resolve the size of the aliasee yet.
return emitError("Linking COMDATs named '" + ComdatName +
@@ -151,7 +153,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
- bool &LinkFromSrc) {
+ LinkFrom &From) {
Module &DstM = Mover.getModule();
// The ability to mix Comdat::SelectionKind::Any with
// Comdat::SelectionKind::Largest is a behavior that comes from COFF.
@@ -175,11 +177,11 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
switch (Result) {
case Comdat::SelectionKind::Any:
// Go with Dst.
- LinkFromSrc = false;
+ From = LinkFrom::Dst;
break;
case Comdat::SelectionKind::NoDeduplicate:
- return emitError("Linking COMDATs named '" + ComdatName +
- "': nodeduplicate has been violated!");
+ From = LinkFrom::Both;
+ break;
case Comdat::SelectionKind::ExactMatch:
case Comdat::SelectionKind::Largest:
case Comdat::SelectionKind::SameSize: {
@@ -197,14 +199,14 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
if (SrcGV->getInitializer() != DstGV->getInitializer())
return emitError("Linking COMDATs named '" + ComdatName +
"': ExactMatch violated!");
- LinkFromSrc = false;
+ From = LinkFrom::Dst;
} else if (Result == Comdat::SelectionKind::Largest) {
- LinkFromSrc = SrcSize > DstSize;
+ From = SrcSize > DstSize ? LinkFrom::Src : LinkFrom::Dst;
} else if (Result == Comdat::SelectionKind::SameSize) {
if (SrcSize != DstSize)
return emitError("Linking COMDATs named '" + ComdatName +
"': SameSize violated!");
- LinkFromSrc = false;
+ From = LinkFrom::Dst;
} else {
llvm_unreachable("unknown selection kind");
}
@@ -217,7 +219,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
bool ModuleLinker::getComdatResult(const Comdat *SrcC,
Comdat::SelectionKind &Result,
- bool &LinkFromSrc) {
+ LinkFrom &From) {
Module &DstM = Mover.getModule();
Comdat::SelectionKind SSK = SrcC->getSelectionKind();
StringRef ComdatName = SrcC->getName();
@@ -226,15 +228,14 @@ bool ModuleLinker::getComdatResult(const Comdat *SrcC,
if (DstCI == ComdatSymTab.end()) {
// Use the comdat if it is only available in one of the modules.
- LinkFromSrc = true;
+ From = LinkFrom::Src;
Result = SSK;
return false;
}
const Comdat *DstC = &DstCI->second;
Comdat::SelectionKind DSK = DstC->getSelectionKind();
- return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
- LinkFromSrc);
+ return computeResultingSelectionKind(ComdatName, SSK, DSK, Result, From);
}
bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
@@ -325,7 +326,8 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
"': symbol multiply defined!");
}
-bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
+bool ModuleLinker::linkIfNeeded(GlobalValue &GV,
+ SmallVectorImpl<GlobalValue *> &GVToClone) {
GlobalValue *DGV = getLinkedToGlobal(&GV);
if (shouldLinkOnlyNeeded()) {
@@ -377,17 +379,18 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
if (GV.isDeclaration())
return false;
+ LinkFrom ComdatFrom = LinkFrom::Dst;
if (const Comdat *SC = GV.getComdat()) {
- bool LinkFromSrc;
- Comdat::SelectionKind SK;
- std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
- if (!LinkFromSrc)
+ std::tie(std::ignore, ComdatFrom) = ComdatsChosen[SC];
+ if (ComdatFrom == LinkFrom::Dst)
return false;
}
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
return true;
+ if (DGV && ComdatFrom == LinkFrom::Both)
+ GVToClone.push_back(LinkFromSrc ? DGV : &GV);
if (LinkFromSrc)
ValuesToLink.insert(&GV);
return false;
@@ -462,12 +465,12 @@ bool ModuleLinker::run() {
if (ComdatsChosen.count(&C))
continue;
Comdat::SelectionKind SK;
- bool LinkFromSrc;
- if (getComdatResult(&C, SK, LinkFromSrc))
+ LinkFrom From;
+ if (getComdatResult(&C, SK, From))
return true;
- ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
+ ComdatsChosen[&C] = std::make_pair(SK, From);
- if (!LinkFromSrc)
+ if (From != LinkFrom::Src)
continue;
Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
@@ -482,20 +485,14 @@ bool ModuleLinker::run() {
// Alias have to go first, since we are not able to find their comdats
// otherwise.
- for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
- GlobalAlias &GV = *I++;
+ for (GlobalAlias &GV : llvm::make_early_inc_range(DstM.aliases()))
dropReplacedComdat(GV, ReplacedDstComdats);
- }
- for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
- GlobalVariable &GV = *I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(DstM.globals()))
dropReplacedComdat(GV, ReplacedDstComdats);
- }
- for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
- Function &GV = *I++;
+ for (Function &GV : llvm::make_early_inc_range(DstM))
dropReplacedComdat(GV, ReplacedDstComdats);
- }
for (GlobalVariable &GV : SrcM->globals())
if (GV.hasLinkOnceLinkage())
@@ -514,18 +511,45 @@ bool ModuleLinker::run() {
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
+ SmallVector<GlobalValue *, 0> GVToClone;
for (GlobalVariable &GV : SrcM->globals())
- if (linkIfNeeded(GV))
+ if (linkIfNeeded(GV, GVToClone))
return true;
for (Function &SF : *SrcM)
- if (linkIfNeeded(SF))
+ if (linkIfNeeded(SF, GVToClone))
return true;
for (GlobalAlias &GA : SrcM->aliases())
- if (linkIfNeeded(GA))
+ if (linkIfNeeded(GA, GVToClone))
+ return true;
+
+ for (GlobalIFunc &GI : SrcM->ifuncs())
+ if (linkIfNeeded(GI, GVToClone))
return true;
+ // For a variable in a comdat nodeduplicate, its initializer should be
+ // preserved (its content may be implicitly used by other members) even if
+ // symbol resolution does not pick it. Clone it into an unnamed private
+ // variable.
+ for (GlobalValue *GV : GVToClone) {
+ if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
+ auto *NewVar = new GlobalVariable(*Var->getParent(), Var->getValueType(),
+ Var->isConstant(), Var->getLinkage(),
+ Var->getInitializer());
+ NewVar->copyAttributesFrom(Var);
+ NewVar->setVisibility(GlobalValue::DefaultVisibility);
+ NewVar->setLinkage(GlobalValue::PrivateLinkage);
+ NewVar->setDSOLocal(true);
+ NewVar->setComdat(Var->getComdat());
+ if (Var->getParent() != &Mover.getModule())
+ ValuesToLink.insert(NewVar);
+ } else {
+ emitError("linking '" + GV->getName() +
+ "': non-variables in comdat nodeduplicate are not handled");
+ }
+ }
+
for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
GlobalValue *GV = ValuesToLink[I];
const Comdat *SC = GV->getComdat();
diff --git a/llvm/lib/MC/ConstantPools.cpp b/llvm/lib/MC/ConstantPools.cpp
index d4199025ad77..d8a08a4bd439 100644
--- a/llvm/lib/MC/ConstantPools.cpp
+++ b/llvm/lib/MC/ConstantPools.cpp
@@ -28,7 +28,7 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
return;
Streamer.emitDataRegion(MCDR_DataRegion);
for (const ConstantPoolEntry &Entry : Entries) {
- Streamer.emitCodeAlignment(Entry.Size); // align naturally
+ Streamer.emitValueToAlignment(Entry.Size); // align naturally
Streamer.emitLabel(Entry.Label);
Streamer.emitValue(Entry.Value, Entry.Size, Entry.Loc);
}
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index e0ea44626b7f..883735fcc293 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -796,7 +796,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
else
EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
- unsigned Flags = 0;
+ unsigned Flags = ELF::SHF_INFO_LINK;
if (Sec.getFlags() & ELF::SHF_GROUP)
Flags = ELF::SHF_GROUP;
@@ -1311,6 +1311,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
case MCSymbolRefExpr::VK_GOT:
case MCSymbolRefExpr::VK_PLT:
case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_GOTPCREL_NORELAX:
case MCSymbolRefExpr::VK_PPC_GOT_LO:
case MCSymbolRefExpr::VK_PPC_GOT_HI:
case MCSymbolRefExpr::VK_PPC_GOT_HA:
diff --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp
new file mode 100644
index 000000000000..81704ffe4b24
--- /dev/null
+++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp
@@ -0,0 +1,27 @@
+//===- MCAsmInfoGOFF.cpp - MCGOFFAsmInfo properties -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines certain target specific asm properties for GOFF (z/OS)
+/// based targets.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoGOFF.h"
+
+using namespace llvm;
+
+void MCAsmInfoGOFF::anchor() {}
+
+MCAsmInfoGOFF::MCAsmInfoGOFF() {
+ Data64bitsDirective = "\t.quad\t";
+ HasDotTypeDotSizeDirective = false;
+ PrivateGlobalPrefix = "@@";
+ PrivateLabelPrefix = "@";
+ ZeroDirective = "\t.space\t";
+}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 72f4ee3f33be..154b2d051f34 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -30,13 +30,13 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolXCOFF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cctype>
using namespace llvm;
@@ -245,7 +245,7 @@ public:
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) override;
- void emitCodeAlignment(unsigned ByteAlignment,
+ void emitCodeAlignment(unsigned ByteAlignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0) override;
void emitValueToOffset(const MCExpr *Offset,
@@ -1429,6 +1429,7 @@ void MCAsmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
}
void MCAsmStreamer::emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {
// Emit with a text fill value.
emitValueToAlignment(ByteAlignment, MAI->getTextAlignFillValue(),
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 9ed8d1083a40..d5e9f4fc66bc 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -483,6 +483,7 @@ void MCAssembler::writeFragmentPadding(raw_ostream &OS,
"Writing bundle padding for a fragment without instructions");
unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize);
+ const MCSubtargetInfo *STI = EF.getSubtargetInfo();
if (EF.alignToBundleEnd() && TotalLength > getBundleAlignSize()) {
// If the padding itself crosses a bundle boundary, it must be emitted
// in 2 pieces, since even nop instructions must not cross boundaries.
@@ -493,12 +494,12 @@ void MCAssembler::writeFragmentPadding(raw_ostream &OS,
// ----------------------------
// ^-------------------^ <- TotalLength
unsigned DistanceToBoundary = TotalLength - getBundleAlignSize();
- if (!getBackend().writeNopData(OS, DistanceToBoundary))
+ if (!getBackend().writeNopData(OS, DistanceToBoundary, STI))
report_fatal_error("unable to write NOP sequence of " +
Twine(DistanceToBoundary) + " bytes");
BundlePadding -= DistanceToBoundary;
}
- if (!getBackend().writeNopData(OS, BundlePadding))
+ if (!getBackend().writeNopData(OS, BundlePadding, STI))
report_fatal_error("unable to write NOP sequence of " +
Twine(BundlePadding) + " bytes");
}
@@ -544,7 +545,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
// bytes left to fill use the Value and ValueSize to fill the rest.
// If we are aligning with nops, ask that target to emit the right data.
if (AF.hasEmitNops()) {
- if (!Asm.getBackend().writeNopData(OS, Count))
+ if (!Asm.getBackend().writeNopData(OS, Count, AF.getSubtargetInfo()))
report_fatal_error("unable to write nop sequence of " +
Twine(Count) + " bytes");
break;
@@ -621,9 +622,11 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_Nops: {
++stats::EmittedNopsFragments;
const MCNopsFragment &NF = cast<MCNopsFragment>(F);
+
int64_t NumBytes = NF.getNumBytes();
int64_t ControlledNopLength = NF.getControlledNopLength();
- int64_t MaximumNopLength = Asm.getBackend().getMaximumNopSize();
+ int64_t MaximumNopLength =
+ Asm.getBackend().getMaximumNopSize(*NF.getSubtargetInfo());
assert(NumBytes > 0 && "Expected positive NOPs fragment size");
assert(ControlledNopLength >= 0 && "Expected non-negative NOP size");
@@ -647,7 +650,8 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
uint64_t NumBytesToEmit =
(uint64_t)std::min(NumBytes, ControlledNopLength);
assert(NumBytesToEmit && "try to emit empty NOP instruction");
- if (!Asm.getBackend().writeNopData(OS, NumBytesToEmit)) {
+ if (!Asm.getBackend().writeNopData(OS, NumBytesToEmit,
+ NF.getSubtargetInfo())) {
report_fatal_error("unable to write nop sequence of the remaining " +
Twine(NumBytesToEmit) + " bytes");
break;
@@ -664,7 +668,8 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
}
case MCFragment::FT_BoundaryAlign: {
- if (!Asm.getBackend().writeNopData(OS, FragmentSize))
+ const MCBoundaryAlignFragment &BF = cast<MCBoundaryAlignFragment>(F);
+ if (!Asm.getBackend().writeNopData(OS, FragmentSize, BF.getSubtargetInfo()))
report_fatal_error("unable to write nop sequence of " +
Twine(FragmentSize) + " bytes");
break;
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 52ab0b41f539..aaa3b747682c 100644
--- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
diff --git a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 64e216e0051d..735be23206e4 100644
--- a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -8,7 +8,7 @@
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm-c/Disassembler.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 27bb7a103165..1c9cfb9042e2 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -27,7 +27,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
@@ -66,29 +65,6 @@ MCSymbol *mcdwarf::emitListsTableHeaderStart(MCStreamer &S) {
return End;
}
-/// Manage the .debug_line_str section contents, if we use it.
-class llvm::MCDwarfLineStr {
- MCSymbol *LineStrLabel = nullptr;
- StringTableBuilder LineStrings{StringTableBuilder::DWARF};
- bool UseRelocs = false;
-
-public:
- /// Construct an instance that can emit .debug_line_str (for use in a normal
- /// v5 line table).
- explicit MCDwarfLineStr(MCContext &Ctx) {
- UseRelocs = Ctx.getAsmInfo()->doesDwarfUseRelocationsAcrossSections();
- if (UseRelocs)
- LineStrLabel =
- Ctx.getObjectFileInfo()->getDwarfLineStrSection()->getBeginSymbol();
- }
-
- /// Emit a reference to the string.
- void emitRef(MCStreamer *MCOS, StringRef Path);
-
- /// Emit the .debug_line_str section if appropriate.
- void emitSection(MCStreamer *MCOS);
-};
-
static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment();
if (MinInsnLength == 1)
@@ -100,6 +76,13 @@ static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
return AddrDelta / MinInsnLength;
}
+MCDwarfLineStr::MCDwarfLineStr(MCContext &Ctx) {
+ UseRelocs = Ctx.getAsmInfo()->doesDwarfUseRelocationsAcrossSections();
+ if (UseRelocs)
+ LineStrLabel =
+ Ctx.getObjectFileInfo()->getDwarfLineStrSection()->getBeginSymbol();
+}
+
//
// This is called when an instruction is assembled into the specified section
// and if there is information from the last .loc directive that has yet to have
@@ -158,23 +141,58 @@ makeStartPlusIntExpr(MCContext &Ctx, const MCSymbol &Start, int IntVal) {
return Res;
}
+void MCLineSection::addEndEntry(MCSymbol *EndLabel) {
+ auto *Sec = &EndLabel->getSection();
+ // The line table may be empty, which we should skip adding an end entry.
+ // There are two cases:
+ // (1) MCAsmStreamer - emitDwarfLocDirective emits a location directive in
+ // place instead of adding a line entry if the target has
+ // usesDwarfFileAndLocDirectives.
+ // (2) MCObjectStreamer - if a function has incomplete debug info where
+ // instructions don't have DILocations, the line entries are missing.
+ auto I = MCLineDivisions.find(Sec);
+ if (I != MCLineDivisions.end()) {
+ auto &Entries = I->second;
+ auto EndEntry = Entries.back();
+ EndEntry.setEndLabel(EndLabel);
+ Entries.push_back(EndEntry);
+ }
+}
+
//
// This emits the Dwarf line table for the specified section from the entries
// in the LineSection.
//
-static inline void emitDwarfLineTable(
+void MCDwarfLineTable::emitOne(
MCStreamer *MCOS, MCSection *Section,
const MCLineSection::MCDwarfLineEntryCollection &LineEntries) {
- unsigned FileNum = 1;
- unsigned LastLine = 1;
- unsigned Column = 0;
- unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
- unsigned Isa = 0;
- unsigned Discriminator = 0;
- MCSymbol *LastLabel = nullptr;
+
+ unsigned FileNum, LastLine, Column, Flags, Isa, Discriminator;
+ MCSymbol *LastLabel;
+ auto init = [&]() {
+ FileNum = 1;
+ LastLine = 1;
+ Column = 0;
+ Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ Isa = 0;
+ Discriminator = 0;
+ LastLabel = nullptr;
+ };
+ init();
// Loop through each MCDwarfLineEntry and encode the dwarf line number table.
+ bool EndEntryEmitted = false;
for (const MCDwarfLineEntry &LineEntry : LineEntries) {
+ MCSymbol *Label = LineEntry.getLabel();
+ const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
+ if (LineEntry.IsEndEntry) {
+ MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, Label,
+ asmInfo->getCodePointerSize());
+ init();
+ EndEntryEmitted = true;
+ continue;
+ }
+
int64_t LineDelta = static_cast<int64_t>(LineEntry.getLine()) - LastLine;
if (FileNum != LineEntry.getFileNum()) {
@@ -212,12 +230,9 @@ static inline void emitDwarfLineTable(
if (LineEntry.getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
MCOS->emitInt8(dwarf::DW_LNS_set_epilogue_begin);
- MCSymbol *Label = LineEntry.getLabel();
-
// At this point we want to emit/create the sequence to encode the delta in
// line numbers and the increment of the address from the previous Label
// and the current Label.
- const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
MCOS->emitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
asmInfo->getCodePointerSize());
@@ -227,7 +242,12 @@ static inline void emitDwarfLineTable(
}
// Generate DWARF line end entry.
- MCOS->emitDwarfLineEndEntry(Section, LastLabel);
+ // We do not need this for DwarfDebug that explicitly terminates the line
+ // table using ranges whenever CU or section changes. However, the MC path
+ // does not track ranges nor terminate the line table. In that case,
+ // conservatively use the section end symbol to end the line table.
+ if (!EndEntryEmitted)
+ MCOS->emitDwarfLineEndEntry(Section, LastLabel);
}
//
@@ -522,7 +542,7 @@ void MCDwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
// Put out the line tables.
for (const auto &LineSec : MCLineSections.getMCLineEntries())
- emitDwarfLineTable(MCOS, LineSec.first, LineSec.second);
+ emitOne(MCOS, LineSec.first, LineSec.second);
// This is the end of the section, so set the value of the symbol at the end
// of this section (that was used in a previous expression).
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 784d66805d63..1ba999a63113 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -29,10 +29,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -88,10 +88,10 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}
-void MCELFStreamer::InitSections(bool NoExecStack) {
+void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
MCContext &Ctx = getContext();
SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(Ctx.getObjectFileInfo()->getTextSectionAlignment(), &STI);
if (NoExecStack)
SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
@@ -224,6 +224,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_ELF_TypeGnuUniqueObject:
Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT));
Symbol->setBinding(ELF::STB_GNU_UNIQUE);
+ getAssembler().getWriter().markGnuAbi();
break;
case MCSA_Global:
@@ -325,7 +326,7 @@ void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
SwitchSection(P.first, P.second);
} else {
if(Symbol->declareCommon(Size, ByteAlignment))
- report_fatal_error("Symbol: " + Symbol->getName() +
+ report_fatal_error(Twine("Symbol: ") + Symbol->getName() +
" redeclared as different type");
}
@@ -500,7 +501,7 @@ void MCELFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE,
*MCOffset, "BFD_RELOC_NONE", SRE, SRE->getLoc(),
*getContext().getSubtargetInfo()))
report_fatal_error("Relocation for CG Profile could not be created: " +
- Err->second);
+ Twine(Err->second));
}
void MCELFStreamer::finalizeCGProfile() {
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 84ec0f6bb57b..10d494b5ac61 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -230,6 +230,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_GOTREL: return "GOTREL";
case VK_PCREL: return "PCREL";
case VK_GOTPCREL: return "GOTPCREL";
+ case VK_GOTPCREL_NORELAX: return "GOTPCREL_NORELAX";
case VK_GOTTPOFF: return "GOTTPOFF";
case VK_INDNTPOFF: return "INDNTPOFF";
case VK_NTPOFF: return "NTPOFF";
@@ -358,6 +359,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_WASM_MBREL: return "MBREL";
case VK_WASM_TLSREL: return "TLSREL";
case VK_WASM_TBREL: return "TBREL";
+ case VK_WASM_GOT_TLS: return "GOT@TLS";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
@@ -393,6 +395,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("gotrel", VK_GOTREL)
.Case("pcrel", VK_PCREL)
.Case("gotpcrel", VK_GOTPCREL)
+ .Case("gotpcrel_norelax", VK_GOTPCREL_NORELAX)
.Case("gottpoff", VK_GOTTPOFF)
.Case("indntpoff", VK_INDNTPOFF)
.Case("ntpoff", VK_NTPOFF)
@@ -499,6 +502,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("tbrel", VK_WASM_TBREL)
.Case("mbrel", VK_WASM_MBREL)
.Case("tlsrel", VK_WASM_TLSREL)
+ .Case("got@tls", VK_WASM_GOT_TLS)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 0f8543f51096..4634de863b2f 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -128,7 +128,11 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
const MCSymbolRefExpr *A = Target.getSymA();
if (A) {
uint64_t ValA;
- if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
+ // FIXME: On most platforms, `Target`'s component symbols are labels from
+ // having been simplified during evaluation, but on Mach-O they can be
+ // variables due to PR19203. This, and the line below for `B` can be
+ // restored to call `getLabelOffset` when PR19203 is fixed.
+ if (!getSymbolOffsetImpl(Layout, A->getSymbol(), ReportError, ValA))
return false;
Offset += ValA;
}
@@ -136,7 +140,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
const MCSymbolRefExpr *B = Target.getSymB();
if (B) {
uint64_t ValB;
- if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
+ if (!getSymbolOffsetImpl(Layout, B->getSymbol(), ReportError, ValB))
return false;
Offset -= ValB;
}
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index a7dc0626d0ab..52b59185c6fc 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -29,8 +29,14 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
-Optional<uint64_t>
-MCInstrAnalysis::evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+Optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
+ const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
+ uint64_t Size) const {
return None;
}
+
+Optional<uint64_t>
+MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
+ uint64_t Size) const {
+ return None;
+} \ No newline at end of file
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index f0948a184598..aa94b141d8be 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -30,9 +30,9 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <vector>
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 7ea1106068b7..d7f85f793c55 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -896,6 +896,19 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
".rodata", SectionKind::getReadOnly(),
XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD),
/* MultiSymbolsAllowed*/ true);
+ ReadOnlySection->setAlignment(Align(4));
+
+ ReadOnly8Section = Ctx->getXCOFFSection(
+ ".rodata.8", SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+ ReadOnly8Section->setAlignment(Align(8));
+
+ ReadOnly16Section = Ctx->getXCOFFSection(
+ ".rodata.16", SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+ ReadOnly16Section->setAlignment(Align(16));
TLSDataSection = Ctx->getXCOFFSection(
".tdata", SectionKind::getThreadData(),
@@ -968,6 +981,8 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
/* MultiSymbolsAllowed */ true, ".dwmac", XCOFF::SSUBTYP_DWMAC);
}
+MCObjectFileInfo::~MCObjectFileInfo() {}
+
void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
bool LargeCodeModel) {
PositionIndependent = PIC;
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 2865a2ad80a9..9c86fcc86bcb 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -368,7 +368,7 @@ void MCObjectStreamer::emitInstruction(const MCInst &Inst,
"' cannot have instructions");
return;
}
- getAssembler().getBackend().emitInstructionBegin(*this, Inst);
+ getAssembler().getBackend().emitInstructionBegin(*this, Inst, STI);
emitInstructionImpl(Inst, STI);
getAssembler().getBackend().emitInstructionEnd(*this, Inst);
}
@@ -609,9 +609,10 @@ void MCObjectStreamer::emitValueToAlignment(unsigned ByteAlignment,
}
void MCObjectStreamer::emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {
emitValueToAlignment(ByteAlignment, 0, 1, MaxBytesToEmit);
- cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true);
+ cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true, STI);
}
void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
@@ -835,13 +836,14 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
}
void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength,
- SMLoc Loc) {
+ SMLoc Loc, const MCSubtargetInfo &STI) {
// Emit an NOP fragment.
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
assert(getCurrentSectionOnly() && "need a section");
- insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc));
+
+ insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc, STI));
}
void MCObjectStreamer::emitFileDirective(StringRef Filename) {
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index e328ba5315af..bf9b9e916d6f 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -228,6 +228,7 @@ AsmToken AsmLexer::LexLineComment() {
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
CurChar = getNextChar();
+ const char *NewlinePtr = CurPtr;
if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
++CurPtr;
@@ -235,7 +236,7 @@ AsmToken AsmLexer::LexLineComment() {
if (CommentConsumer) {
CommentConsumer->HandleComment(
SMLoc::getFromPointer(CommentTextStart),
- StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
+ StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
}
IsAtStartOfLine = true;
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index d3cb5ca59bf3..ed9f2066dc20 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -749,6 +749,7 @@ namespace llvm {
extern MCAsmParserExtension *createDarwinAsmParser();
extern MCAsmParserExtension *createELFAsmParser();
extern MCAsmParserExtension *createCOFFAsmParser();
+extern MCAsmParserExtension *createGOFFAsmParser();
extern MCAsmParserExtension *createXCOFFAsmParser();
extern MCAsmParserExtension *createWasmAsmParser();
@@ -783,7 +784,8 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
PlatformParser.reset(createELFAsmParser());
break;
case MCContext::IsGOFF:
- report_fatal_error("GOFFAsmParser support not implemented yet");
+ PlatformParser.reset(createGOFFAsmParser());
+ break;
case MCContext::IsWasm:
PlatformParser.reset(createWasmAsmParser());
break;
@@ -950,7 +952,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
if (!NoInitialTextSection)
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
// Prime the lexer.
Lex();
@@ -1052,18 +1054,21 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
}
}
}
-
// Finalize the output stream if there are no errors and if the client wants
// us to.
- if (!HadError && !NoFinalize)
+ if (!HadError && !NoFinalize) {
+ if (auto *TS = Out.getTargetStreamer())
+ TS->emitConstantPools();
+
Out.Finish(Lexer.getLoc());
+ }
return HadError || getContext().hadError();
}
bool AsmParser::checkForValidSection() {
if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
return Error(getTok().getLoc(),
"expected section directive before assembly directive");
}
@@ -3451,7 +3456,8 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
bool UseCodeAlign = Section->UseCodeAlign();
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
- getStreamer().emitCodeAlignment(Alignment, MaxBytesToFill);
+ getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
+ MaxBytesToFill);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
getStreamer().emitValueToAlignment(Alignment, FillExpr, ValueSize,
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 70d69fc8dd32..ddc41d0a08ab 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -502,6 +502,23 @@ static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back();
}
+static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName,
+ unsigned Type) {
+ if (TT.getArch() == Triple::x86_64) {
+ // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame,
+ // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't
+ // error for SHT_PROGBITS .eh_frame
+ return SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS;
+ }
+ if (TT.isMIPS()) {
+ // MIPS .debug_* sections should have SHT_MIPS_DWARF section type to
+ // distinguish among sections contain DWARF and ECOFF debug formats,
+ // but in assembly files these sections have SHT_PROGBITS type.
+ return hasPrefix(SectionName, ".debug_") && Type == ELF::SHT_PROGBITS;
+ }
+ return false;
+}
+
bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
StringRef SectionName;
@@ -659,11 +676,9 @@ EndStmt:
getContext().getELFSection(SectionName, Type, Flags, Size, GroupName,
IsComdat, UniqueID, LinkedToSym);
getStreamer().SwitchSection(Section, Subsection);
- // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame,
- // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't error
- // for SHT_PROGBITS .eh_frame
if (Section->getType() != Type &&
- !(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS))
+ !allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName,
+ Type))
Error(loc, "changed section type for " + SectionName + ", expected: 0x" +
utohexstr(Section->getType()));
// Check that flags are used consistently. However, the GNU assembler permits
@@ -815,7 +830,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
- if (Name.find('@') == StringRef::npos)
+ if (!Name.contains('@'))
return TokError("expected a '@' in the name");
bool KeepOriginalSym = !Name.contains("@@@");
if (parseOptionalToken(AsmToken::Comma)) {
diff --git a/llvm/lib/MC/MCParser/GOFFAsmParser.cpp b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
new file mode 100644
index 000000000000..c2a7eaee8029
--- /dev/null
+++ b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
@@ -0,0 +1,48 @@
+//===- GOFFAsmParser.cpp - GOFF Assembly Parser ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSectionGOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolGOFF.h"
+
+using namespace llvm;
+
+namespace {
+
+class GOFFAsmParser : public MCAsmParserExtension {
+ template <bool (GOFFAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+ void addDirectiveHandler(StringRef Directive) {
+ MCAsmParser::ExtensionDirectiveHandler Handler =
+ std::make_pair(this, HandleDirective<GOFFAsmParser, HandlerMethod>);
+
+ getParser().addDirectiveHandler(Directive, Handler);
+ }
+
+public:
+ GOFFAsmParser() {}
+
+ void Initialize(MCAsmParser &Parser) override {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+ }
+};
+
+} // namespace
+
+namespace llvm {
+
+MCAsmParserExtension *createGOFFAsmParser() { return new GOFFAsmParser; }
+
+} // namespace llvm
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 7b4d6e529cc2..f1704cef46ac 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1319,7 +1319,7 @@ bool MasmParser::enabledGenDwarfForAssembly() {
bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
if (!NoInitialTextSection)
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
// Prime the lexer.
Lex();
@@ -1437,7 +1437,7 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
bool MasmParser::checkForValidSection() {
if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
return Error(getTok().getLoc(),
"expected section directive before assembly directive");
}
@@ -4772,7 +4772,8 @@ bool MasmParser::emitAlignTo(int64_t Alignment) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign()) {
- getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0);
+ getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
+ /*MaxBytesToEmit=*/0);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
getStreamer().emitValueToAlignment(Alignment, /*Value=*/0,
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 731831d3bce3..e35bcec8fe75 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -12,10 +12,17 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+#include <memory>
+#include <sstream>
#define DEBUG_TYPE "mcpseudoprobe"
using namespace llvm;
+using namespace support;
#ifndef NDEBUG
int MCPseudoProbeTable::DdgPrintIndent = 0;
@@ -69,23 +76,6 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
});
}
-MCPseudoProbeInlineTree::~MCPseudoProbeInlineTree() {
- for (auto &Inlinee : Inlinees)
- delete Inlinee.second;
-}
-
-MCPseudoProbeInlineTree *
-MCPseudoProbeInlineTree::getOrAddNode(InlineSite Site) {
- auto Iter = Inlinees.find(Site);
- if (Iter == Inlinees.end()) {
- auto *Node = new MCPseudoProbeInlineTree(std::get<0>(Site));
- Inlinees[Site] = Node;
- return Node;
- } else {
- return Iter->second;
- }
-}
-
void MCPseudoProbeInlineTree::addPseudoProbe(
const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) {
// The function should not be called on the root.
@@ -147,7 +137,7 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
// Emit number of probes in this node
MCOS->emitULEB128IntValue(Probes.size());
// Emit number of direct inlinees
- MCOS->emitULEB128IntValue(Inlinees.size());
+ MCOS->emitULEB128IntValue(Children.size());
// Emit probes in this group
for (const auto &Probe : Probes) {
Probe.emit(MCOS, LastProbe);
@@ -157,7 +147,13 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
assert(Probes.empty() && "Root should not have probes");
}
- // Emit descendent
+ // Emit sorted descendant
+ // InlineSite is unique for each pair,
+ // so there will be no ordering of Inlinee based on MCPseudoProbeInlineTree*
+ std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+ for (auto Child = Children.begin(); Child != Children.end(); ++Child)
+ Inlinees[Child->first] = Child->second.get();
+
for (const auto &Inlinee : Inlinees) {
if (Guid) {
// Emit probe index
@@ -211,3 +207,361 @@ void MCPseudoProbeTable::emit(MCObjectStreamer *MCOS) {
// Put out the probe.
ProbeSections.emit(MCOS);
}
+
+static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP,
+ uint64_t GUID) {
+ auto It = GUID2FuncMAP.find(GUID);
+ assert(It != GUID2FuncMAP.end() &&
+ "Probe function must exist for a valid GUID");
+ return It->second.FuncName;
+}
+
+void MCPseudoProbeFuncDesc::print(raw_ostream &OS) {
+ OS << "GUID: " << FuncGUID << " Name: " << FuncName << "\n";
+ OS << "Hash: " << FuncHash << "\n";
+}
+
+void MCDecodedPseudoProbe::getInlineContext(
+ SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
+ const GUIDProbeFunctionMap &GUID2FuncMAP) const {
+ uint32_t Begin = ContextStack.size();
+ MCDecodedPseudoProbeInlineTree *Cur = InlineTree;
+ // It will add the string of each node's inline site during iteration.
+ // Note that it won't include the probe's belonging function(leaf location)
+ while (Cur->hasInlineSite()) {
+ StringRef FuncName =
+ getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite));
+ ContextStack.emplace_back(
+ MCPseduoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite)));
+ Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
+ }
+ // Make the ContextStack in caller-callee order
+ std::reverse(ContextStack.begin() + Begin, ContextStack.end());
+}
+
+std::string MCDecodedPseudoProbe::getInlineContextStr(
+ const GUIDProbeFunctionMap &GUID2FuncMAP) const {
+ std::ostringstream OContextStr;
+ SmallVector<MCPseduoProbeFrameLocation, 16> ContextStack;
+ getInlineContext(ContextStack, GUID2FuncMAP);
+ for (auto &Cxt : ContextStack) {
+ if (OContextStr.str().size())
+ OContextStr << " @ ";
+ OContextStr << Cxt.first.str() << ":" << Cxt.second;
+ }
+ return OContextStr.str();
+}
+
+static const char *PseudoProbeTypeStr[3] = {"Block", "IndirectCall",
+ "DirectCall"};
+
+void MCDecodedPseudoProbe::print(raw_ostream &OS,
+ const GUIDProbeFunctionMap &GUID2FuncMAP,
+ bool ShowName) const {
+ OS << "FUNC: ";
+ if (ShowName) {
+ StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Guid);
+ OS << FuncName.str() << " ";
+ } else {
+ OS << Guid << " ";
+ }
+ OS << "Index: " << Index << " ";
+ OS << "Type: " << PseudoProbeTypeStr[static_cast<uint8_t>(Type)] << " ";
+ std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP);
+ if (InlineContextStr.size()) {
+ OS << "Inlined: @ ";
+ OS << InlineContextStr;
+ }
+ OS << "\n";
+}
+
+template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readUnencodedNumber() {
+ if (Data + sizeof(T) > End) {
+ return std::error_code();
+ }
+ T Val = endian::readNext<T, little, unaligned>(Data);
+ return ErrorOr<T>(Val);
+}
+
+template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readUnsignedNumber() {
+ unsigned NumBytesRead = 0;
+ uint64_t Val = decodeULEB128(Data, &NumBytesRead);
+ if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
+ return std::error_code();
+ }
+ Data += NumBytesRead;
+ return ErrorOr<T>(static_cast<T>(Val));
+}
+
+template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readSignedNumber() {
+ unsigned NumBytesRead = 0;
+ int64_t Val = decodeSLEB128(Data, &NumBytesRead);
+ if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
+ return std::error_code();
+ }
+ Data += NumBytesRead;
+ return ErrorOr<T>(static_cast<T>(Val));
+}
+
+ErrorOr<StringRef> MCPseudoProbeDecoder::readString(uint32_t Size) {
+ StringRef Str(reinterpret_cast<const char *>(Data), Size);
+ if (Data + Size > End) {
+ return std::error_code();
+ }
+ Data += Size;
+ return ErrorOr<StringRef>(Str);
+}
+
+bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
+ std::size_t Size) {
+ // The pseudo_probe_desc section has a format like:
+ // .section .pseudo_probe_desc,"",@progbits
+ // .quad -5182264717993193164 // GUID
+ // .quad 4294967295 // Hash
+ // .uleb 3 // Name size
+ // .ascii "foo" // Name
+ // .quad -2624081020897602054
+ // .quad 174696971957
+ // .uleb 34
+ // .ascii "main"
+
+ Data = Start;
+ End = Data + Size;
+
+ while (Data < End) {
+ auto ErrorOrGUID = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrGUID)
+ return false;
+
+ auto ErrorOrHash = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrHash)
+ return false;
+
+ auto ErrorOrNameSize = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrNameSize)
+ return false;
+ uint32_t NameSize = std::move(*ErrorOrNameSize);
+
+ auto ErrorOrName = readString(NameSize);
+ if (!ErrorOrName)
+ return false;
+
+ uint64_t GUID = std::move(*ErrorOrGUID);
+ uint64_t Hash = std::move(*ErrorOrHash);
+ StringRef Name = std::move(*ErrorOrName);
+
+ // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap
+ GUID2FuncDescMap.emplace(GUID, MCPseudoProbeFuncDesc(GUID, Hash, Name));
+ }
+ assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
+ return true;
+}
+
+bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
+ std::size_t Size) {
+ // The pseudo_probe section encodes an inline forest and each tree has a
+ // format like:
+ // FUNCTION BODY (one for each uninlined function present in the text
+ // section)
+ // GUID (uint64)
+ // GUID of the function
+ // NPROBES (ULEB128)
+ // Number of probes originating from this function.
+ // NUM_INLINED_FUNCTIONS (ULEB128)
+ // Number of callees inlined into this function, aka number of
+ // first-level inlinees
+ // PROBE RECORDS
+ // A list of NPROBES entries. Each entry contains:
+ // INDEX (ULEB128)
+ // TYPE (uint4)
+ // 0 - block probe, 1 - indirect call, 2 - direct call
+ // ATTRIBUTE (uint3)
+ // 1 - tail call, 2 - dangling
+ // ADDRESS_TYPE (uint1)
+ // 0 - code address, 1 - address delta
+ // CODE_ADDRESS (uint64 or ULEB128)
+ // code address or address delta, depending on Flag
+ // INLINED FUNCTION RECORDS
+ // A list of NUM_INLINED_FUNCTIONS entries describing each of the
+ // inlined callees. Each record contains:
+ // INLINE SITE
+ // Index of the callsite probe (ULEB128)
+ // FUNCTION BODY
+ // A FUNCTION BODY entry describing the inlined function.
+
+ Data = Start;
+ End = Data + Size;
+
+ MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot;
+ MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot;
+ uint64_t LastAddr = 0;
+ uint32_t Index = 0;
+ // A DFS-based decoding
+ while (Data < End) {
+ if (Root == Cur) {
+ // Use a sequential id for top level inliner.
+ Index = Root->getChildren().size();
+ } else {
+ // Read inline site for inlinees
+ auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrIndex)
+ return false;
+ Index = std::move(*ErrorOrIndex);
+ }
+ // Switch/add to a new tree node(inlinee)
+ Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
+ // Read guid
+ auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrCurGuid)
+ return false;
+ Cur->Guid = std::move(*ErrorOrCurGuid);
+ // Read number of probes in the current node.
+ auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrNodeCount)
+ return false;
+ uint32_t NodeCount = std::move(*ErrorOrNodeCount);
+ // Read number of direct inlinees
+ auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrCurChildrenToProcess)
+ return false;
+ Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
+ // Read all probes in this node
+ for (std::size_t I = 0; I < NodeCount; I++) {
+ // Read index
+ auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrIndex)
+ return false;
+ uint32_t Index = std::move(*ErrorOrIndex);
+ // Read type | flag.
+ auto ErrorOrValue = readUnencodedNumber<uint8_t>();
+ if (!ErrorOrValue)
+ return false;
+ uint8_t Value = std::move(*ErrorOrValue);
+ uint8_t Kind = Value & 0xf;
+ uint8_t Attr = (Value & 0x70) >> 4;
+ // Read address
+ uint64_t Addr = 0;
+ if (Value & 0x80) {
+ auto ErrorOrOffset = readSignedNumber<int64_t>();
+ if (!ErrorOrOffset)
+ return false;
+ int64_t Offset = std::move(*ErrorOrOffset);
+ Addr = LastAddr + Offset;
+ } else {
+ auto ErrorOrAddr = readUnencodedNumber<int64_t>();
+ if (!ErrorOrAddr)
+ return false;
+ Addr = std::move(*ErrorOrAddr);
+ }
+ // Populate Address2ProbesMap
+ auto &Probes = Address2ProbesMap[Addr];
+ Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
+ Cur);
+ Cur->addProbes(&Probes.back());
+ LastAddr = Addr;
+ }
+
+ // Look for the parent for the next node by subtracting the current
+ // node count from tree counts along the parent chain. The first node
+ // in the chain that has a non-zero tree count is the target.
+ while (Cur != Root) {
+ if (Cur->ChildrenToProcess == 0) {
+ Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
+ if (Cur != Root) {
+ assert(Cur->ChildrenToProcess > 0 &&
+ "Should have some unprocessed nodes");
+ Cur->ChildrenToProcess -= 1;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+
+ assert(Data == End && "Have unprocessed data in pseudo_probe section");
+ assert(Cur == Root &&
+ " Cur should point to root when the forest is fully built up");
+ return true;
+}
+
+void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
+ OS << "Pseudo Probe Desc:\n";
+ // Make the output deterministic
+ std::map<uint64_t, MCPseudoProbeFuncDesc> OrderedMap(GUID2FuncDescMap.begin(),
+ GUID2FuncDescMap.end());
+ for (auto &I : OrderedMap) {
+ I.second.print(OS);
+ }
+}
+
+void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
+ uint64_t Address) {
+ auto It = Address2ProbesMap.find(Address);
+ if (It != Address2ProbesMap.end()) {
+ for (auto &Probe : It->second) {
+ OS << " [Probe]:\t";
+ Probe.print(OS, GUID2FuncDescMap, true);
+ }
+ }
+}
+
+void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
+ std::vector<uint64_t> Addresses;
+ for (auto Entry : Address2ProbesMap)
+ Addresses.push_back(Entry.first);
+ std::sort(Addresses.begin(), Addresses.end());
+ for (auto K : Addresses) {
+ OS << "Address:\t";
+ OS << K;
+ OS << "\n";
+ printProbeForAddress(OS, K);
+ }
+}
+
+const MCDecodedPseudoProbe *
+MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
+ auto It = Address2ProbesMap.find(Address);
+ if (It == Address2ProbesMap.end())
+ return nullptr;
+ const auto &Probes = It->second;
+
+ const MCDecodedPseudoProbe *CallProbe = nullptr;
+ for (const auto &Probe : Probes) {
+ if (Probe.isCall()) {
+ assert(!CallProbe &&
+ "There should be only one call probe corresponding to address "
+ "which is a callsite.");
+ CallProbe = &Probe;
+ }
+ }
+ return CallProbe;
+}
+
+const MCPseudoProbeFuncDesc *
+MCPseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
+ auto It = GUID2FuncDescMap.find(GUID);
+ assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist");
+ return &It->second;
+}
+
+void MCPseudoProbeDecoder::getInlineContextForProbe(
+ const MCDecodedPseudoProbe *Probe,
+ SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
+ bool IncludeLeaf) const {
+ Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap);
+ if (!IncludeLeaf)
+ return;
+ // Note that the context from probe doesn't include leaf frame,
+ // hence we need to retrieve and prepend leaf if requested.
+ const auto *FuncDesc = getFuncDescForGUID(Probe->getGuid());
+ InlineContextStack.emplace_back(
+ MCPseduoProbeFrameLocation(FuncDesc->FuncName, Probe->getIndex()));
+}
+
+const MCPseudoProbeFuncDesc *MCPseudoProbeDecoder::getInlinerDescForProbe(
+ const MCDecodedPseudoProbe *Probe) const {
+ MCDecodedPseudoProbeInlineTree *InlinerNode = Probe->getInlineTreeNode();
+ if (!InlinerNode->hasInlineSite())
+ return nullptr;
+ return getFuncDescForGUID(std::get<0>(InlinerNode->ISite));
+}
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index 648efc14da06..7f7380bf810d 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -118,6 +118,10 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); }
bool MCSectionXCOFF::isVirtualSection() const {
- assert(isCsect() && "Only csect section can be virtual!");
+ // DWARF sections are always not virtual.
+ if (isDwarfSect())
+ return false;
+ assert(isCsect() &&
+ "Handling for isVirtualSection not implemented for this section!");
return XCOFF::XTY_CM == CsectProp->Type;
}
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index fc7fb555f0b9..f4e64b42c817 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -53,6 +53,8 @@ void MCTargetStreamer::emitLabel(MCSymbol *Symbol) {}
void MCTargetStreamer::finish() {}
+void MCTargetStreamer::emitConstantPools() {}
+
void MCTargetStreamer::changeSection(const MCSection *CurSection,
MCSection *Section,
const MCExpr *Subsection,
@@ -218,7 +220,7 @@ void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
}
void llvm::MCStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLen,
- llvm::SMLoc) {}
+ llvm::SMLoc, const MCSubtargetInfo& STI) {}
/// The implementation in this class just redirects to emitFill.
void MCStreamer::emitZeros(uint64_t NumBytes) { emitFill(NumBytes, 0); }
@@ -397,7 +399,7 @@ void MCStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
}
-void MCStreamer::InitSections(bool NoExecStack) {
+void MCStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
@@ -1198,6 +1200,7 @@ void MCStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {}
void MCStreamer::emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {}
void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) {}
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index e3d2439cef81..90249fb7380a 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -26,10 +26,10 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -49,6 +49,27 @@ void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) {
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}
+void MCWasmStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
+ auto *Symbol = cast<MCSymbolWasm>(S);
+ MCObjectStreamer::emitLabel(Symbol, Loc);
+
+ const MCSectionWasm &Section =
+ static_cast<const MCSectionWasm &>(*getCurrentSectionOnly());
+ if (Section.getSegmentFlags() & wasm::WASM_SEG_FLAG_TLS)
+ Symbol->setTLS();
+}
+
+void MCWasmStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) {
+ auto *Symbol = cast<MCSymbolWasm>(S);
+ MCObjectStreamer::emitLabelAtPos(Symbol, Loc, F, Offset);
+
+ const MCSectionWasm &Section =
+ static_cast<const MCSectionWasm &>(*getCurrentSectionOnly());
+ if (Section.getSegmentFlags() & wasm::WASM_SEG_FLAG_TLS)
+ Symbol->setTLS();
+}
+
void MCWasmStreamer::emitAssemblerFlag(MCAssemblerFlag Flag) {
// Let the target do whatever target specific stuff it needs to do.
getAssembler().getBackend().handleAssemblerFlag(Flag);
@@ -117,6 +138,10 @@ bool MCWasmStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
Symbol->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
break;
+ case MCSA_ELF_TypeTLS:
+ Symbol->setTLS();
+ break;
+
case MCSA_ELF_TypeObject:
case MCSA_Cold:
break;
@@ -156,6 +181,10 @@ void MCWasmStreamer::emitIdent(StringRef IdentString) {
void MCWasmStreamer::emitInstToFragment(const MCInst &Inst,
const MCSubtargetInfo &STI) {
this->MCObjectStreamer::emitInstToFragment(Inst, STI);
+ MCRelaxableFragment &F = *cast<MCRelaxableFragment>(getCurrentFragment());
+
+ for (auto &Fixup : F.getFixups())
+ fixSymbolsInTLSFixups(Fixup.getValue());
}
void MCWasmStreamer::emitInstToData(const MCInst &Inst,
@@ -166,6 +195,9 @@ void MCWasmStreamer::emitInstToData(const MCInst &Inst,
raw_svector_ostream VecOS(Code);
Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ for (auto &Fixup : Fixups)
+ fixSymbolsInTLSFixups(Fixup.getValue());
+
// Append the encoded instruction to the current data fragment (or create a
// new such fragment if the current fragment is not a data fragment).
MCDataFragment *DF = getOrCreateDataFragment();
@@ -185,16 +217,37 @@ void MCWasmStreamer::finishImpl() {
this->MCObjectStreamer::finishImpl();
}
-MCStreamer *llvm::createWasmStreamer(MCContext &Context,
- std::unique_ptr<MCAsmBackend> &&MAB,
- std::unique_ptr<MCObjectWriter> &&OW,
- std::unique_ptr<MCCodeEmitter> &&CE,
- bool RelaxAll) {
- MCWasmStreamer *S =
- new MCWasmStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
- if (RelaxAll)
- S->getAssembler().setRelaxAll(true);
- return S;
+void MCWasmStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+ switch (expr->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+ fixSymbolsInTLSFixups(be->getLHS());
+ fixSymbolsInTLSFixups(be->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+ switch (symRef.getKind()) {
+ case MCSymbolRefExpr::VK_WASM_TLSREL:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
+ getAssembler().registerSymbol(symRef.getSymbol());
+ cast<MCSymbolWasm>(symRef.getSymbol()).setTLS();
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+ break;
+ }
}
void MCWasmStreamer::emitThumbFunc(MCSymbol *Func) {
@@ -215,3 +268,15 @@ void MCWasmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
llvm_unreachable("Wasm doesn't support this directive");
}
+
+MCStreamer *llvm::createWasmStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll) {
+ MCWasmStreamer *S =
+ new MCWasmStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index de1b0fd3c742..7773d8828931 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -144,8 +144,8 @@ static void EmitRuntimeFunction(MCStreamer &streamer,
MCContext &context = streamer.getContext();
streamer.emitValueToAlignment(4);
- EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
- EmitSymbolRefWithOfs(streamer, info->Function, info->End);
+ EmitSymbolRefWithOfs(streamer, info->Begin, info->Begin);
+ EmitSymbolRefWithOfs(streamer, info->Begin, info->End);
streamer.emitValue(MCSymbolRefExpr::create(info->Symbol,
MCSymbolRefExpr::VK_COFF_IMGREL32,
context), 4);
@@ -1073,7 +1073,7 @@ static void ARM64EmitRuntimeFunction(MCStreamer &streamer,
MCContext &context = streamer.getContext();
streamer.emitValueToAlignment(4);
- EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
+ EmitSymbolRefWithOfs(streamer, info->Begin, info->Begin);
if (info->PackedInfo)
streamer.emitInt32(info->PackedInfo);
else
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 69dc71b39fd1..0dfe5a5c2bdb 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -66,18 +66,19 @@ void MCWinCOFFStreamer::emitInstToData(const MCInst &Inst,
DF->getContents().append(Code.begin(), Code.end());
}
-void MCWinCOFFStreamer::InitSections(bool NoExecStack) {
+void MCWinCOFFStreamer::initSections(bool NoExecStack,
+ const MCSubtargetInfo &STI) {
// FIXME: this is identical to the ELF one.
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(4, &STI);
SwitchSection(getContext().getObjectFileInfo()->getDataSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(4, &STI);
SwitchSection(getContext().getObjectFileInfo()->getBSSSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(4, &STI);
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index ec9e89fac416..90604782de13 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -18,7 +18,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 10ae27c2acc2..277d88cf1cd2 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -965,7 +965,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
// Write the section relocation entries, in reverse order to match 'as'
// (approximately, the exact algorithm is more complicated than this).
std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
- for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
+ for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
W.write<uint32_t>(Rel.MRE.r_word0);
W.write<uint32_t>(Rel.MRE.r_word1);
}
diff --git a/llvm/lib/Support/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 1f9c3bbf8229..0948a6b9f1a1 100644
--- a/llvm/lib/Support/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 0dc5c9111db2..636c1d238932 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -292,6 +292,8 @@ private:
W->OS << Str;
}
+ void writeStringWithAlignment(const StringRef Str, unsigned Alignment);
+
void writeI32(int32_t val) {
char Buffer[4];
support::endian::write32le(Buffer, val);
@@ -317,7 +319,7 @@ private:
uint32_t writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
ArrayRef<WasmFunction> Functions);
uint32_t writeDataSection(const MCAsmLayout &Layout);
- void writeTagSection(ArrayRef<wasm::WasmTagType> Tags);
+ void writeTagSection(ArrayRef<uint32_t> TagTypes);
void writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals);
void writeTableSection(ArrayRef<wasm::WasmTable> Tables);
void writeRelocSection(uint32_t SectionIndex, StringRef Name,
@@ -362,6 +364,28 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section,
Section.Index = SectionCount++;
}
+// Write a string with extra paddings for trailing alignment
+// TODO: support alignment at asm and llvm level?
+void WasmObjectWriter::writeStringWithAlignment(const StringRef Str,
+ unsigned Alignment) {
+
+ // Calculate the encoded size of str length and add pads based on it and
+ // alignment.
+ raw_null_ostream NullOS;
+ uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS);
+ uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size();
+ uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment));
+ Offset += Paddings;
+
+ // LEB128 greater than 5 bytes is invalid
+ assert((StrSizeLength + Paddings) <= 5 && "too long string to align");
+
+ encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings);
+ W->OS << Str;
+
+ assert(W->OS.tell() == Offset && "invalid padding");
+}
+
void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
StringRef Name) {
LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n");
@@ -371,7 +395,12 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
Section.PayloadOffset = W->OS.tell();
// Custom sections in wasm also have a string identifier.
- writeString(Name);
+ if (Name != "__clangast") {
+ writeString(Name);
+ } else {
+ // The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+ writeStringWithAlignment(Name, 4);
+ }
// The position where the custom section starts.
Section.ContentsOffset = W->OS.tell();
@@ -565,8 +594,14 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
SymA->setUsedInReloc();
}
- if (RefA->getKind() == MCSymbolRefExpr::VK_GOT)
+ switch (RefA->getKind()) {
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
SymA->setUsedInGOT();
+ break;
+ default:
+ break;
+ }
WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n");
@@ -825,8 +860,8 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
encodeULEB128(NumElements, W->OS); // initial
break;
case wasm::WASM_EXTERNAL_TAG:
- W->OS << char(Import.Tag.Attribute);
- encodeULEB128(Import.Tag.SigIndex, W->OS);
+ W->OS << char(0); // Reserved 'attribute' field
+ encodeULEB128(Import.SigIndex, W->OS);
break;
default:
llvm_unreachable("unsupported import kind");
@@ -850,17 +885,17 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
endSection(Section);
}
-void WasmObjectWriter::writeTagSection(ArrayRef<wasm::WasmTagType> Tags) {
- if (Tags.empty())
+void WasmObjectWriter::writeTagSection(ArrayRef<uint32_t> TagTypes) {
+ if (TagTypes.empty())
return;
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_TAG);
- encodeULEB128(Tags.size(), W->OS);
- for (const wasm::WasmTagType &Tag : Tags) {
- W->OS << char(Tag.Attribute);
- encodeULEB128(Tag.SigIndex, W->OS);
+ encodeULEB128(TagTypes.size(), W->OS);
+ for (uint32_t Index : TagTypes) {
+ W->OS << char(0); // Reserved 'attribute' field
+ encodeULEB128(Index, W->OS);
}
endSection(Section);
@@ -1052,7 +1087,7 @@ uint32_t WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) {
void WasmObjectWriter::writeRelocSection(
uint32_t SectionIndex, StringRef Name,
std::vector<WasmRelocationEntry> &Relocs) {
- // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // See: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md
// for descriptions of the reloc sections.
if (Relocs.empty())
@@ -1340,8 +1375,7 @@ void WasmObjectWriter::prepareImports(
Import.Module = WS.getImportModule();
Import.Field = WS.getImportName();
Import.Kind = wasm::WASM_EXTERNAL_TAG;
- Import.Tag.Attribute = wasm::WASM_TAG_ATTRIBUTE_EXCEPTION;
- Import.Tag.SigIndex = getTagType(WS);
+ Import.SigIndex = getTagType(WS);
Imports.push_back(Import);
assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = NumTagImports++;
@@ -1409,7 +1443,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
SmallVector<uint32_t, 4> TableElems;
SmallVector<wasm::WasmImport, 4> Imports;
SmallVector<wasm::WasmExport, 4> Exports;
- SmallVector<wasm::WasmTagType, 1> Tags;
+ SmallVector<uint32_t, 2> TagTypes;
SmallVector<wasm::WasmGlobal, 1> Globals;
SmallVector<wasm::WasmTable, 1> Tables;
SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
@@ -1644,16 +1678,15 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
LLVM_DEBUG(dbgs() << " -> table index: "
<< WasmIndices.find(&WS)->second << "\n");
} else if (WS.isTag()) {
- // C++ exception symbol (__cpp_exception)
+ // C++ exception symbol (__cpp_exception) or longjmp symbol
+ // (__c_longjmp)
unsigned Index;
if (WS.isDefined()) {
- Index = NumTagImports + Tags.size();
- wasm::WasmTagType Tag;
- Tag.SigIndex = getTagType(WS);
- Tag.Attribute = wasm::WASM_TAG_ATTRIBUTE_EXCEPTION;
+ Index = NumTagImports + TagTypes.size();
+ uint32_t SigIndex = getTagType(WS);
assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = Index;
- Tags.push_back(Tag);
+ TagTypes.push_back(SigIndex);
} else {
// An import; the index was assigned above.
assert(WasmIndices.count(&WS) > 0);
@@ -1747,6 +1780,8 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
if (WS.hasExportName())
Flags |= wasm::WASM_SYMBOL_EXPORTED;
+ if (WS.isTLS())
+ Flags |= wasm::WASM_SYMBOL_TLS;
wasm::WasmSymbolInfo Info;
Info.Name = WS.getName();
@@ -1869,7 +1904,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
writeFunctionSection(Functions);
writeTableSection(Tables);
// Skip the "memory" section; we import the memory instead.
- writeTagSection(Tags);
+ writeTagSection(TagTypes);
writeGlobalSection(Globals);
writeExportSection(Exports);
const MCSymbol *IndirectFunctionTable =
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index adf0d3eb443c..177253d7a9d7 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -168,6 +168,24 @@ struct CsectSectionEntry : public SectionEntry {
virtual ~CsectSectionEntry() {}
};
+struct DwarfSectionEntry : public SectionEntry {
+ // For DWARF section entry.
+ std::unique_ptr<XCOFFSection> DwarfSect;
+
+ DwarfSectionEntry(StringRef N, int32_t Flags,
+ std::unique_ptr<XCOFFSection> Sect)
+ : SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)) {
+ assert(DwarfSect->MCSec->isDwarfSect() &&
+ "This should be a DWARF section!");
+ assert(N.size() <= XCOFF::NameSize && "section name too long");
+ memcpy(Name, N.data(), N.size());
+ }
+
+ DwarfSectionEntry(DwarfSectionEntry &&s) = default;
+
+ virtual ~DwarfSectionEntry() {}
+};
+
class XCOFFObjectWriter : public MCObjectWriter {
uint32_t SymbolTableEntryCount = 0;
@@ -213,6 +231,8 @@ class XCOFFObjectWriter : public MCObjectWriter {
std::array<CsectSectionEntry *const, 5> Sections{
{&Text, &Data, &BSS, &TData, &TBSS}};
+ std::vector<DwarfSectionEntry> DwarfSections;
+
CsectGroup &getCsectGroup(const MCSectionXCOFF *MCSec);
virtual void reset() override;
@@ -231,12 +251,21 @@ class XCOFFObjectWriter : public MCObjectWriter {
uint64_t);
void writeSymbolTableEntryForControlSection(const XCOFFSection &, int16_t,
XCOFF::StorageClass);
+ void writeSymbolTableEntryForDwarfSection(const XCOFFSection &, int16_t);
void writeFileHeader();
void writeSectionHeaderTable();
void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ void writeSectionForControlSectionEntry(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const CsectSectionEntry &CsectEntry,
+ uint32_t &CurrentAddressLocation);
+ void writeSectionForDwarfSectionEntry(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const DwarfSectionEntry &DwarfEntry,
+ uint32_t &CurrentAddressLocation);
void writeSymbolTable(const MCAsmLayout &Layout);
void writeRelocations();
- void writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &CSection);
+ void writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &Section);
// Called after all the csects and symbols have been processed by
// `executePostLayoutBinding`, this function handles building up the majority
@@ -290,6 +319,8 @@ void XCOFFObjectWriter::reset() {
// Reset any sections we have written to, and empty the section header table.
for (auto *Sec : Sections)
Sec->reset();
+ for (auto &DwarfSec : DwarfSections)
+ DwarfSec.reset();
// Reset states in XCOFFObjectWriter.
SymbolTableEntryCount = 0;
@@ -372,17 +403,32 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const auto *MCSec = cast<const MCSectionXCOFF>(&S);
assert(SectionMap.find(MCSec) == SectionMap.end() &&
"Cannot add a section twice.");
- assert(XCOFF::XTY_ER != MCSec->getCSectType() &&
- "An undefined csect should not get registered.");
// If the name does not fit in the storage provided in the symbol table
// entry, add it to the string table.
if (nameShouldBeInStringTable(MCSec->getSymbolTableName()))
Strings.add(MCSec->getSymbolTableName());
-
- CsectGroup &Group = getCsectGroup(MCSec);
- Group.emplace_back(MCSec);
- SectionMap[MCSec] = &Group.back();
+ if (MCSec->isCsect()) {
+ // A new control section. Its CsectSectionEntry should already be staticly
+ // generated as Text/Data/BSS/TDATA/TBSS. Add this section to the group of
+ // the CsectSectionEntry.
+ assert(XCOFF::XTY_ER != MCSec->getCSectType() &&
+ "An undefined csect should not get registered.");
+ CsectGroup &Group = getCsectGroup(MCSec);
+ Group.emplace_back(MCSec);
+ SectionMap[MCSec] = &Group.back();
+ } else if (MCSec->isDwarfSect()) {
+ // A new DwarfSectionEntry.
+ std::unique_ptr<XCOFFSection> DwarfSec =
+ std::make_unique<XCOFFSection>(MCSec);
+ SectionMap[MCSec] = DwarfSec.get();
+
+ DwarfSectionEntry SecEntry(MCSec->getName(),
+ MCSec->getDwarfSubtypeFlags().getValue(),
+ std::move(DwarfSec));
+ DwarfSections.push_back(std::move(SecEntry));
+ } else
+ llvm_unreachable("unsupport section type!");
}
for (const MCSymbol &S : Asm.symbols()) {
@@ -443,13 +489,20 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
: SymbolIndexMap[ContainingCsect->getQualNameSymbol()];
};
- auto getVirtualAddress = [this,
- &Layout](const MCSymbol *Sym,
- const MCSectionXCOFF *ContainingCsect) {
- // If Sym is a csect, return csect's address.
- // If Sym is a label, return csect's address + label's offset from the csect.
- return SectionMap[ContainingCsect]->Address +
- (Sym->isDefined() ? Layout.getSymbolOffset(*Sym) : 0);
+ auto getVirtualAddress =
+ [this, &Layout](const MCSymbol *Sym,
+ const MCSectionXCOFF *ContainingSect) -> uint64_t {
+ // A DWARF section.
+ if (ContainingSect->isDwarfSect())
+ return Layout.getSymbolOffset(*Sym);
+
+ // A csect.
+ if (!Sym->isDefined())
+ return SectionMap[ContainingSect]->Address;
+
+ // A label.
+ assert(Sym->isDefined() && "not a valid object that has address!");
+ return SectionMap[ContainingSect]->Address + Layout.getSymbolOffset(*Sym);
};
const MCSymbol *const SymA = &Target.getSymA()->getSymbol();
@@ -538,41 +591,12 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
void XCOFFObjectWriter::writeSections(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint32_t CurrentAddressLocation = 0;
- for (const auto *Section : Sections) {
- // Nothing to write for this Section.
- if (Section->Index == SectionEntry::UninitializedIndex ||
- Section->IsVirtual)
- continue;
-
- // There could be a gap (without corresponding zero padding) between
- // sections.
- assert(((CurrentAddressLocation <= Section->Address) ||
- (Section->Flags == XCOFF::STYP_TDATA) ||
- (Section->Flags == XCOFF::STYP_TBSS)) &&
- "CurrentAddressLocation should be less than or equal to section "
- "address if the section is not TData or TBSS.");
-
- CurrentAddressLocation = Section->Address;
-
- for (const auto *Group : Section->Groups) {
- for (const auto &Csect : *Group) {
- if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
- W.OS.write_zeros(PaddingSize);
- if (Csect.Size)
- Asm.writeSectionData(W.OS, Csect.MCSec, Layout);
- CurrentAddressLocation = Csect.Address + Csect.Size;
- }
- }
-
- // The size of the tail padding in a section is the end virtual address of
- // the current section minus the the end virtual address of the last csect
- // in that section.
- if (uint32_t PaddingSize =
- Section->Address + Section->Size - CurrentAddressLocation) {
- W.OS.write_zeros(PaddingSize);
- CurrentAddressLocation += PaddingSize;
- }
- }
+ for (const auto *Section : Sections)
+ writeSectionForControlSectionEntry(Asm, Layout, *Section,
+ CurrentAddressLocation);
+ for (const auto &DwarfSection : DwarfSections)
+ writeSectionForDwarfSectionEntry(Asm, Layout, DwarfSection,
+ CurrentAddressLocation);
}
uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
@@ -654,6 +678,36 @@ void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel(
W.write<uint16_t>(0);
}
+void XCOFFObjectWriter::writeSymbolTableEntryForDwarfSection(
+ const XCOFFSection &DwarfSectionRef, int16_t SectionIndex) {
+ assert(DwarfSectionRef.MCSec->isDwarfSect() && "Not a DWARF section!");
+
+ // n_name, n_zeros, n_offset
+ writeSymbolName(DwarfSectionRef.getSymbolTableName());
+ // n_value
+ W.write<uint32_t>(0);
+ // n_scnum
+ W.write<int16_t>(SectionIndex);
+ // n_type
+ W.write<uint16_t>(0);
+ // n_sclass
+ W.write<uint8_t>(XCOFF::C_DWARF);
+ // Always 1 aux entry for now.
+ W.write<uint8_t>(1);
+
+ // Now output the auxiliary entry.
+ // x_scnlen
+ W.write<uint32_t>(DwarfSectionRef.Size);
+ // Reserved
+ W.write<uint32_t>(0);
+ // x_nreloc. Set to 0 for now.
+ W.write<uint32_t>(0);
+ // Reserved
+ W.write<uint32_t>(0);
+ // Reserved
+ W.write<uint16_t>(0);
+}
+
void XCOFFObjectWriter::writeSymbolTableEntryForControlSection(
const XCOFFSection &CSectionRef, int16_t SectionIndex,
XCOFF::StorageClass StorageClass) {
@@ -711,10 +765,10 @@ void XCOFFObjectWriter::writeFileHeader() {
}
void XCOFFObjectWriter::writeSectionHeaderTable() {
- for (const auto *Sec : Sections) {
+ auto writeSectionHeader = [&](const SectionEntry *Sec, bool IsDwarf) {
// Nothing to write for this Section.
if (Sec->Index == SectionEntry::UninitializedIndex)
- continue;
+ return false;
// Write Name.
ArrayRef<char> NameRef(Sec->Name, XCOFF::NameSize);
@@ -722,8 +776,14 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
// Write the Physical Address and Virtual Address. In an object file these
// are the same.
- W.write<uint32_t>(Sec->Address);
- W.write<uint32_t>(Sec->Address);
+ // We use 0 for DWARF sections' Physical and Virtual Addresses.
+ if (!IsDwarf) {
+ W.write<uint32_t>(Sec->Address);
+ W.write<uint32_t>(Sec->Address);
+ } else {
+ W.write<uint32_t>(0);
+ W.write<uint32_t>(0);
+ }
W.write<uint32_t>(Sec->Size);
W.write<uint32_t>(Sec->FileOffsetToData);
@@ -738,12 +798,25 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
W.write<uint16_t>(0);
W.write<int32_t>(Sec->Flags);
- }
+
+ return true;
+ };
+
+ for (const auto *CsectSec : Sections)
+ writeSectionHeader(CsectSec, /* IsDwarf */ false);
+ for (const auto &DwarfSec : DwarfSections)
+ writeSectionHeader(&DwarfSec, /* IsDwarf */ true);
}
void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc,
- const XCOFFSection &CSection) {
- W.write<uint32_t>(CSection.Address + Reloc.FixupOffsetInCsect);
+ const XCOFFSection &Section) {
+ if (Section.MCSec->isCsect())
+ W.write<uint32_t>(Section.Address + Reloc.FixupOffsetInCsect);
+ else {
+ // DWARF sections' address is set to 0.
+ assert(Section.MCSec->isDwarfSect() && "unsupport section type!");
+ W.write<uint32_t>(Reloc.FixupOffsetInCsect);
+ }
W.write<uint32_t>(Reloc.SymbolTableIndex);
W.write<uint8_t>(Reloc.SignAndSize);
W.write<uint8_t>(Reloc.Type);
@@ -765,6 +838,10 @@ void XCOFFObjectWriter::writeRelocations() {
}
}
}
+
+ for (const auto &DwarfSection : DwarfSections)
+ for (const auto &Reloc : DwarfSection.DwarfSect->Relocations)
+ writeRelocation(Reloc, *DwarfSection.DwarfSect);
}
void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
@@ -819,6 +896,10 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
}
}
}
+
+ for (const auto &DwarfSection : DwarfSections)
+ writeSymbolTableEntryForDwarfSection(*DwarfSection.DwarfSect,
+ DwarfSection.Index);
}
void XCOFFObjectWriter::finalizeSectionInfo() {
@@ -844,11 +925,17 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
}
}
+ for (auto &DwarfSection : DwarfSections)
+ DwarfSection.RelocationCount = DwarfSection.DwarfSect->Relocations.size();
+
// Calculate the file offset to the relocation entries.
uint64_t RawPointer = RelocationEntryOffset;
- for (auto Sec : Sections) {
- if (Sec->Index == SectionEntry::UninitializedIndex || !Sec->RelocationCount)
- continue;
+ auto calcOffsetToRelocations = [&](SectionEntry *Sec, bool IsDwarf) {
+ if (!IsDwarf && Sec->Index == SectionEntry::UninitializedIndex)
+ return false;
+
+ if (!Sec->RelocationCount)
+ return false;
Sec->FileOffsetToRelocations = RawPointer;
const uint32_t RelocationSizeInSec =
@@ -856,7 +943,15 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
RawPointer += RelocationSizeInSec;
if (RawPointer > UINT32_MAX)
report_fatal_error("Relocation data overflowed this object file.");
- }
+
+ return true;
+ };
+
+ for (auto *Sec : Sections)
+ calcOffsetToRelocations(Sec, /* IsDwarf */ false);
+
+ for (auto &DwarfSec : DwarfSections)
+ calcOffsetToRelocations(&DwarfSec, /* IsDwarf */ true);
// TODO Error check that the number of symbol table entries fits in 32-bits
// signed ...
@@ -944,6 +1039,37 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
Section->Size = Address - Section->Address;
}
+ for (auto &DwarfSection : DwarfSections) {
+ assert((SectionIndex <= MaxSectionIndex) && "Section index overflow!");
+
+ XCOFFSection &DwarfSect = *DwarfSection.DwarfSect;
+ const MCSectionXCOFF *MCSec = DwarfSect.MCSec;
+
+ // Section index.
+ DwarfSection.Index = SectionIndex++;
+ SectionCount++;
+
+ // Symbol index.
+ DwarfSect.SymbolTableIndex = SymbolTableIndex;
+ SymbolIndexMap[MCSec->getQualNameSymbol()] = DwarfSect.SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for the csect.
+ SymbolTableIndex += 2;
+
+ // Section address. Make it align to section alignment.
+ // We use address 0 for DWARF sections' Physical and Virtual Addresses.
+ // This address is used to tell where is the section in the final object.
+ // See writeSectionForDwarfSectionEntry().
+ DwarfSection.Address = DwarfSect.Address =
+ alignTo(Address, MCSec->getAlignment());
+
+ // Section size.
+ // For DWARF section, we must use the real size which may be not aligned.
+ DwarfSection.Size = DwarfSect.Size = Layout.getSectionAddressSize(MCSec);
+
+ // Make the Address align to default alignment for follow section.
+ Address = alignTo(DwarfSect.Address + DwarfSect.Size, DefaultSectionAlign);
+ }
+
SymbolTableEntryCount = SymbolTableIndex;
// Calculate the RawPointer value for each section.
@@ -959,9 +1085,102 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
report_fatal_error("Section raw data overflowed this object file.");
}
+ for (auto &DwarfSection : DwarfSections) {
+ // Address of csect sections are always aligned to DefaultSectionAlign, but
+ // address of DWARF section are aligned to Section alignment which may be
+ // bigger than DefaultSectionAlign, need to execlude the padding bits.
+ RawPointer =
+ alignTo(RawPointer, DwarfSection.DwarfSect->MCSec->getAlignment());
+
+ DwarfSection.FileOffsetToData = RawPointer;
+ // Some section entries, like DWARF section size is not aligned, so
+ // RawPointer may be not aligned.
+ RawPointer += DwarfSection.Size;
+ // Make sure RawPointer is aligned.
+ RawPointer = alignTo(RawPointer, DefaultSectionAlign);
+
+ assert(RawPointer <= UINT32_MAX &&
+ "Section raw data overflowed this object file.");
+ }
+
RelocationEntryOffset = RawPointer;
}
+void XCOFFObjectWriter::writeSectionForControlSectionEntry(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const CsectSectionEntry &CsectEntry, uint32_t &CurrentAddressLocation) {
+ // Nothing to write for this Section.
+ if (CsectEntry.Index == SectionEntry::UninitializedIndex)
+ return;
+
+ // There could be a gap (without corresponding zero padding) between
+ // sections.
+ // There could be a gap (without corresponding zero padding) between
+ // sections.
+ assert(((CurrentAddressLocation <= CsectEntry.Address) ||
+ (CsectEntry.Flags == XCOFF::STYP_TDATA) ||
+ (CsectEntry.Flags == XCOFF::STYP_TBSS)) &&
+ "CurrentAddressLocation should be less than or equal to section "
+ "address if the section is not TData or TBSS.");
+
+ CurrentAddressLocation = CsectEntry.Address;
+
+ // For virtual sections, nothing to write. But need to increase
+ // CurrentAddressLocation for later sections like DWARF section has a correct
+ // writing location.
+ if (CsectEntry.IsVirtual) {
+ CurrentAddressLocation += CsectEntry.Size;
+ return;
+ }
+
+ for (const auto &Group : CsectEntry.Groups) {
+ for (const auto &Csect : *Group) {
+ if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+ if (Csect.Size)
+ Asm.writeSectionData(W.OS, Csect.MCSec, Layout);
+ CurrentAddressLocation = Csect.Address + Csect.Size;
+ }
+ }
+
+ // The size of the tail padding in a section is the end virtual address of
+ // the current section minus the the end virtual address of the last csect
+ // in that section.
+ if (uint32_t PaddingSize =
+ CsectEntry.Address + CsectEntry.Size - CurrentAddressLocation) {
+ W.OS.write_zeros(PaddingSize);
+ CurrentAddressLocation += PaddingSize;
+ }
+}
+
+void XCOFFObjectWriter::writeSectionForDwarfSectionEntry(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const DwarfSectionEntry &DwarfEntry, uint32_t &CurrentAddressLocation) {
+ // There could be a gap (without corresponding zero padding) between
+ // sections. For example DWARF section alignment is bigger than
+ // DefaultSectionAlign.
+ assert(CurrentAddressLocation <= DwarfEntry.Address &&
+ "CurrentAddressLocation should be less than or equal to section "
+ "address.");
+
+ if (uint32_t PaddingSize = DwarfEntry.Address - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+
+ if (DwarfEntry.Size)
+ Asm.writeSectionData(W.OS, DwarfEntry.DwarfSect->MCSec, Layout);
+
+ CurrentAddressLocation = DwarfEntry.Address + DwarfEntry.Size;
+
+ // DWARF section size is not aligned to DefaultSectionAlign.
+ // Make sure CurrentAddressLocation is aligned to DefaultSectionAlign.
+ uint32_t Mod = CurrentAddressLocation % DefaultSectionAlign;
+ uint32_t TailPaddingSize = Mod ? DefaultSectionAlign - Mod : 0;
+ if (TailPaddingSize)
+ W.OS.write_zeros(TailPaddingSize);
+
+ CurrentAddressLocation += TailPaddingSize;
+}
+
// Takes the log base 2 of the alignment and shifts the result into the 5 most
// significant bits of a byte, then or's in the csect type into the least
// significant 3 bits.
diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp
index 99d2373588ac..c21ec9e62dff 100644
--- a/llvm/lib/MCA/Context.cpp
+++ b/llvm/lib/MCA/Context.cpp
@@ -74,14 +74,17 @@ Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr,
CustomBehaviour &CB) {
const MCSchedModel &SM = STI.getSchedModel();
auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+ auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+ Opts.StoreQueueSize, Opts.AssumeNoAlias);
// Create the pipeline stages.
auto Entry = std::make_unique<EntryStage>(SrcMgr);
- auto InOrderIssue = std::make_unique<InOrderIssueStage>(STI, *PRF, CB);
+ auto InOrderIssue = std::make_unique<InOrderIssueStage>(STI, *PRF, CB, *LSU);
auto StagePipeline = std::make_unique<Pipeline>();
// Pass the ownership of all the hardware units to this Context.
addHardwareUnit(std::move(PRF));
+ addHardwareUnit(std::move(LSU));
// Build the pipeline.
StagePipeline->appendStage(std::move(Entry));
diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp
index 23211f402927..a9ea8edff059 100644
--- a/llvm/lib/MCA/CustomBehaviour.cpp
+++ b/llvm/lib/MCA/CustomBehaviour.cpp
@@ -24,5 +24,23 @@ unsigned CustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
return 0;
}
+std::vector<std::unique_ptr<View>>
+CustomBehaviour::getStartViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts) {
+ return std::vector<std::unique_ptr<View>>();
+}
+
+std::vector<std::unique_ptr<View>>
+CustomBehaviour::getPostInstrInfoViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts) {
+ return std::vector<std::unique_ptr<View>>();
+}
+
+std::vector<std::unique_ptr<View>>
+CustomBehaviour::getEndViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts) {
+ return std::vector<std::unique_ptr<View>>();
+}
+
} // namespace mca
} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 81c4f682f63d..474bf84cf891 100644
--- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -288,6 +288,19 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// If this move has been eliminated, then method tryEliminateMoveOrSwap should
// have already updated all the register mappings.
if (!IsEliminated) {
+ // Check if this is one of multiple writes performed by this
+ // instruction to register RegID.
+ const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+ const WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && OtherWrite.getSourceIndex() == Write.getSourceIndex()) {
+ if (OtherWS->getLatency() > WS.getLatency()) {
+ // Conservatively keep the slowest write on RegID.
+ if (ShouldAllocatePhysRegs)
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+ return;
+ }
+ }
+
// Update the mapping for register RegID including its sub-registers.
RegisterMappings[RegID].first = Write;
RegisterMappings[RegID].second.AliasRegID = 0U;
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 4067d86930d1..0ab845a4c28f 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -687,7 +687,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
if (IsDepBreaking) {
// A mask of all zeroes means: explicit input operands are not
// independent.
- if (Mask.isNullValue()) {
+ if (Mask.isZero()) {
if (!RD.isImplicitRead())
RS.setIndependentFromDef();
} else {
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index ccf6f20a6737..fa5c0fc66b9e 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MCA/Stages/InOrderIssueStage.h"
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/MCA/Instruction.h"
@@ -43,9 +44,10 @@ void StallInfo::cycleEnd() {
}
InOrderIssueStage::InOrderIssueStage(const MCSubtargetInfo &STI,
- RegisterFile &PRF, CustomBehaviour &CB)
- : STI(STI), PRF(PRF), RM(STI.getSchedModel()), CB(CB), NumIssued(), SI(),
- CarryOver(), Bandwidth(), LastWriteBackCycle() {}
+ RegisterFile &PRF, CustomBehaviour &CB,
+ LSUnit &LSU)
+ : STI(STI), PRF(PRF), RM(STI.getSchedModel()), CB(CB), LSU(LSU),
+ NumIssued(), SI(), CarryOver(), Bandwidth(), LastWriteBackCycle() {}
unsigned InOrderIssueStage::getIssueWidth() const {
return STI.getSchedModel().IssueWidth;
@@ -125,6 +127,13 @@ bool InOrderIssueStage::canExecute(const InstRef &IR) {
return false;
}
+ if (IR.getInstruction()->isMemOp() && !LSU.isReady(IR)) {
+ // This load (store) aliases with a preceding store (load). Delay
+ // it until the depenency is cleared.
+ SI.update(IR, /* delay */ 1, StallInfo::StallKind::LOAD_STORE);
+ return false;
+ }
+
if (unsigned CustomStallCycles = CB.checkCustomHazard(IssuedInst, IR)) {
SI.update(IR, CustomStallCycles, StallInfo::StallKind::CUSTOM_STALL);
return false;
@@ -188,6 +197,10 @@ void InOrderIssueStage::notifyInstructionRetired(const InstRef &IR,
}
llvm::Error InOrderIssueStage::execute(InstRef &IR) {
+ Instruction &IS = *IR.getInstruction();
+ if (IS.isMemOp())
+ IS.setLSUTokenID(LSU.dispatch(IR));
+
if (llvm::Error E = tryIssue(IR))
return E;
@@ -222,6 +235,9 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
RM.issueInstruction(Desc, UsedResources);
IS.execute(SourceIndex);
+ if (IS.isMemOp())
+ LSU.onInstructionIssued(IR);
+
// Replace resource masks with valid resource processor IDs.
for (ResourceUse &Use : UsedResources) {
uint64_t Mask = Use.first.first;
@@ -245,6 +261,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
// the execution and retirement now.
if (IS.isExecuted()) {
PRF.onInstructionExecuted(&IS);
+ LSU.onInstructionExecuted(IR);
notifyEvent<HWInstructionEvent>(
HWInstructionEvent(HWInstructionEvent::Executed, IR));
LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
@@ -279,6 +296,7 @@ void InOrderIssueStage::updateIssuedInst() {
}
PRF.onInstructionExecuted(&IS);
+ LSU.onInstructionExecuted(IR);
notifyInstructionExecuted(IR);
++NumExecuted;
@@ -324,6 +342,9 @@ void InOrderIssueStage::retireInstruction(InstRef &IR) {
for (const WriteState &WS : IS.getDefs())
PRF.removeRegisterWrite(WS, FreedRegs);
+ if (IS.isMemOp())
+ LSU.onInstructionRetired(IR);
+
notifyInstructionRetired(IR, FreedRegs);
}
@@ -363,6 +384,7 @@ llvm::Error InOrderIssueStage::cycleStart() {
Bandwidth = getIssueWidth();
PRF.cycleStart();
+ LSU.cycleEvent();
// Release consumed resources.
SmallVector<ResourceRef, 4> Freed;
diff --git a/llvm/lib/MCA/Stages/InstructionTables.cpp b/llvm/lib/MCA/Stages/InstructionTables.cpp
index 93e368123066..a842b52dcd39 100644
--- a/llvm/lib/MCA/Stages/InstructionTables.cpp
+++ b/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -24,7 +24,7 @@ Error InstructionTables::execute(InstRef &IR) {
UsedResources.clear();
// Identify the resources consumed by this instruction.
- for (const std::pair<const uint64_t, ResourceUsage> Resource :
+ for (const std::pair<uint64_t, ResourceUsage> &Resource :
Desc.Resources) {
// Skip zero-cycle resources (i.e., unused resources).
if (!Resource.second.size())
diff --git a/llvm/tools/llvm-mca/Views/View.cpp b/llvm/lib/MCA/View.cpp
index 09d08d3ae007..a56d3a124934 100644
--- a/llvm/tools/llvm-mca/Views/View.cpp
+++ b/llvm/lib/MCA/View.cpp
@@ -11,7 +11,7 @@
///
//===----------------------------------------------------------------------===//
-#include "Views/View.h"
+#include "llvm/MCA/View.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 6ff896cf347e..5492692445e7 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -418,7 +418,7 @@ Expected<bool> Archive::Child::isThinMember() const {
if (!NameOrErr)
return NameOrErr.takeError();
StringRef Name = NameOrErr.get();
- return Parent->IsThin && Name != "/" && Name != "//";
+ return Parent->IsThin && Name != "/" && Name != "//" && Name != "/SYM64/";
}
Expected<std::string> Archive::Child::getFullName() const {
diff --git a/llvm/lib/Object/COFFModuleDefinition.cpp b/llvm/lib/Object/COFFModuleDefinition.cpp
index 8f29f7a658fd..55ddd3baca2b 100644
--- a/llvm/lib/Object/COFFModuleDefinition.cpp
+++ b/llvm/lib/Object/COFFModuleDefinition.cpp
@@ -80,11 +80,6 @@ static bool isDecorated(StringRef Sym, bool MingwDef) {
(!MingwDef && Sym.contains('@'));
}
-static Error createError(const Twine &Err) {
- return make_error<StringError>(StringRef(Err.str()),
- object_error::parse_failed);
-}
-
class Lexer {
public:
Lexer(StringRef S) : Buf(S) {}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index ca2ed4449120..84181ae5e501 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -246,6 +246,9 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
}
break;
+ case ELF::EM_MSP430:
+ switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_MSP430_ATTRIBUTES); }
+ break;
case ELF::EM_RISCV:
switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_RISCV_ATTRIBUTES); }
break;
@@ -333,40 +336,26 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
std::vector<Elf_Rel> Relocs;
// Word type: uint32_t for Elf32, and uint64_t for Elf64.
- typedef typename ELFT::uint Word;
-
- // Word size in number of bytes.
- const size_t WordSize = sizeof(Word);
+ using Addr = typename ELFT::uint;
- // Number of bits used for the relocation offsets bitmap.
- // These many relative relocations can be encoded in a single entry.
- const size_t NBits = 8*WordSize - 1;
-
- Word Base = 0;
- for (const Elf_Relr &R : relrs) {
- Word Entry = R;
- if ((Entry&1) == 0) {
+ Addr Base = 0;
+ for (Elf_Relr R : relrs) {
+ typename ELFT::uint Entry = R;
+ if ((Entry & 1) == 0) {
// Even entry: encodes the offset for next relocation.
Rel.r_offset = Entry;
Relocs.push_back(Rel);
// Set base offset for subsequent bitmap entries.
- Base = Entry + WordSize;
- continue;
- }
-
- // Odd entry: encodes bitmap for relocations starting at base.
- Word Offset = Base;
- while (Entry != 0) {
- Entry >>= 1;
- if ((Entry&1) != 0) {
- Rel.r_offset = Offset;
- Relocs.push_back(Rel);
- }
- Offset += WordSize;
+ Base = Entry + sizeof(Addr);
+ } else {
+ // Odd entry: encodes bitmap for relocations starting at base.
+ for (Addr Offset = Base; (Entry >>= 1) != 0; Offset += sizeof(Addr))
+ if ((Entry & 1) != 0) {
+ Rel.r_offset = Offset;
+ Relocs.push_back(Rel);
+ }
+ Base += (CHAR_BIT * sizeof(Entry) - 1) * sizeof(Addr);
}
-
- // Advance base offset by NBits words.
- Base += NBits * WordSize;
}
return Relocs;
@@ -474,6 +463,14 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
}
break;
+ case ELF::EM_PPC:
+ switch (Type) {
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC_DYNAMIC_TAG
+ }
+ break;
+
case ELF::EM_PPC64:
switch (Type) {
#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
@@ -481,6 +478,14 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#undef PPC64_DYNAMIC_TAG
}
break;
+
+ case ELF::EM_RISCV:
+ switch (Type) {
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef RISCV_DYNAMIC_TAG
+ }
+ break;
}
#undef DYNAMIC_TAG
switch (Type) {
@@ -488,7 +493,9 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#define AARCH64_DYNAMIC_TAG(name, value)
#define MIPS_DYNAMIC_TAG(name, value)
#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG(name, value)
#define PPC64_DYNAMIC_TAG(name, value)
+#define RISCV_DYNAMIC_TAG(name, value)
// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
#define DYNAMIC_TAG_MARKER(name, value)
#define DYNAMIC_TAG(name, value) case value: return #name;
@@ -497,7 +504,9 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#undef AARCH64_DYNAMIC_TAG
#undef MIPS_DYNAMIC_TAG
#undef HEXAGON_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG
+#undef RISCV_DYNAMIC_TAG
#undef DYNAMIC_TAG_MARKER
#undef DYNAMIC_STRINGIFY_ENUM
default:
@@ -613,14 +622,14 @@ ELFFile<ELFT>::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const {
}
template <class ELFT>
-Expected<std::vector<typename ELFT::BBAddrMap>>
+Expected<std::vector<BBAddrMap>>
ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
if (!ContentsOrErr)
return ContentsOrErr.takeError();
ArrayRef<uint8_t> Content = *ContentsOrErr;
DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4);
- std::vector<Elf_BBAddrMap> FunctionEntries;
+ std::vector<BBAddrMap> FunctionEntries;
DataExtractor::Cursor Cur(0);
Error ULEBSizeErr = Error::success();
@@ -647,7 +656,7 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) {
uintX_t Address = static_cast<uintX_t>(Data.getAddress(Cur));
uint32_t NumBlocks = ReadULEB128AsUInt32();
- std::vector<typename Elf_BBAddrMap::BBEntry> BBEntries;
+ std::vector<BBAddrMap::BBEntry> BBEntries;
for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks);
++BlockID) {
uint32_t Offset = ReadULEB128AsUInt32();
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 6613d79ab3d0..50035d6c7523 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
@@ -25,7 +26,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributeParser.h"
#include "llvm/Support/RISCVAttributes.h"
-#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -538,9 +538,16 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
case ARMBuildAttrs::v6K:
Triple += "v6k";
break;
- case ARMBuildAttrs::v7:
- Triple += "v7";
+ case ARMBuildAttrs::v7: {
+ Optional<unsigned> ArchProfileAttr =
+ Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
+ if (ArchProfileAttr.hasValue() &&
+ ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile)
+ Triple += "v7m";
+ else
+ Triple += "v7";
break;
+ }
case ARMBuildAttrs::v6_M:
Triple += "v6m";
break;
@@ -647,3 +654,72 @@ ELFObjectFileBase::getPltAddresses() const {
}
return Result;
}
+
+template <class ELFT>
+static Expected<std::vector<VersionEntry>>
+readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
+ ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
+ using Elf_Shdr = typename ELFT::Shdr;
+ const Elf_Shdr *VerSec = nullptr;
+ const Elf_Shdr *VerNeedSec = nullptr;
+ const Elf_Shdr *VerDefSec = nullptr;
+ // The user should ensure sections() can't fail here.
+ for (const Elf_Shdr &Sec : cantFail(EF.sections())) {
+ if (Sec.sh_type == ELF::SHT_GNU_versym)
+ VerSec = &Sec;
+ else if (Sec.sh_type == ELF::SHT_GNU_verdef)
+ VerDefSec = &Sec;
+ else if (Sec.sh_type == ELF::SHT_GNU_verneed)
+ VerNeedSec = &Sec;
+ }
+ if (!VerSec)
+ return std::vector<VersionEntry>();
+
+ Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
+ EF.loadVersionMap(VerNeedSec, VerDefSec);
+ if (!MapOrErr)
+ return MapOrErr.takeError();
+
+ std::vector<VersionEntry> Ret;
+ size_t I = 0;
+ for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
+ ++I;
+ Expected<const typename ELFT::Versym *> VerEntryOrErr =
+ EF.template getEntry<typename ELFT::Versym>(*VerSec, I);
+ if (!VerEntryOrErr)
+ return createError("unable to read an entry with index " + Twine(I) +
+ " from " + describe(EF, *VerSec) + ": " +
+ toString(VerEntryOrErr.takeError()));
+
+ Expected<uint32_t> FlagsOrErr = It->getFlags();
+ if (!FlagsOrErr)
+ return createError("unable to read flags for symbol with index " +
+ Twine(I) + ": " + toString(FlagsOrErr.takeError()));
+
+ bool IsDefault;
+ Expected<StringRef> VerOrErr = EF.getSymbolVersionByIndex(
+ (*VerEntryOrErr)->vs_index, IsDefault, *MapOrErr,
+ (*FlagsOrErr) & SymbolRef::SF_Undefined);
+ if (!VerOrErr)
+ return createError("unable to get a version for entry " + Twine(I) +
+ " of " + describe(EF, *VerSec) + ": " +
+ toString(VerOrErr.takeError()));
+
+ Ret.push_back({(*VerOrErr).str(), IsDefault});
+ }
+
+ return Ret;
+}
+
+Expected<std::vector<VersionEntry>>
+ELFObjectFileBase::readDynsymVersions() const {
+ elf_symbol_iterator_range Symbols = getDynamicSymbolIterators();
+ if (const auto *Obj = dyn_cast<ELF32LEObjectFile>(this))
+ return readDynsymVersionsImpl(Obj->getELFFile(), Symbols);
+ if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this))
+ return readDynsymVersionsImpl(Obj->getELFFile(), Symbols);
+ if (const auto *Obj = dyn_cast<ELF64LEObjectFile>(this))
+ return readDynsymVersionsImpl(Obj->getELFFile(), Symbols);
+ return readDynsymVersionsImpl(cast<ELF64BEObjectFile>(this)->getELFFile(),
+ Symbols);
+}
diff --git a/llvm/lib/Object/IRObjectFile.cpp b/llvm/lib/Object/IRObjectFile.cpp
index befba5d57127..c653262791cc 100644
--- a/llvm/lib/Object/IRObjectFile.cpp
+++ b/llvm/lib/Object/IRObjectFile.cpp
@@ -18,9 +18,9 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace object;
diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp
index 746b00867157..093ae1bbc267 100644
--- a/llvm/lib/Object/IRSymtab.cpp
+++ b/llvm/lib/Object/IRSymtab.cpp
@@ -41,10 +41,15 @@
using namespace llvm;
using namespace irsymtab;
-static const char *LibcallRoutineNames[] = {
+static const char *PreservedSymbols[] = {
#define HANDLE_LIBCALL(code, name) name,
#include "llvm/IR/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
+ // There are global variables, so put it here instead of in
+ // RuntimeLibcalls.def.
+ // TODO: Are there similar such variables?
+ "__ssp_canary_word",
+ "__stack_chk_guard",
};
namespace {
@@ -261,9 +266,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
setStr(Sym.IRName, GV->getName());
- bool IsBuiltinFunc = llvm::is_contained(LibcallRoutineNames, GV->getName());
+ bool IsPreservedSymbol = llvm::is_contained(PreservedSymbols, GV->getName());
- if (Used.count(GV) || IsBuiltinFunc)
+ if (Used.count(GV) || IsPreservedSymbol)
Sym.Flags |= 1 << storage::Symbol::FB_used;
if (GV->isThreadLocal())
Sym.Flags |= 1 << storage::Symbol::FB_tls;
@@ -283,11 +288,15 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
Uncommon().CommonAlign = GVar->getAlignment();
}
- const GlobalObject *Base = GV->getBaseObject();
- if (!Base)
- return make_error<StringError>("Unable to determine comdat of alias!",
- inconvertibleErrorCode());
- if (const Comdat *C = Base->getComdat()) {
+ const GlobalObject *GO = GV->getAliaseeObject();
+ if (!GO) {
+ if (isa<GlobalIFunc>(GV))
+ GO = cast<GlobalIFunc>(GV)->getResolverFunction();
+ if (!GO)
+ return make_error<StringError>("Unable to determine comdat of alias!",
+ inconvertibleErrorCode());
+ }
+ if (const Comdat *C = GO->getComdat()) {
Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent());
if (!ComdatIndexOrErr)
return ComdatIndexOrErr.takeError();
@@ -312,8 +321,8 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
}
}
- if (!Base->getSection().empty())
- setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
+ if (!GO->getSection().empty())
+ setStr(Uncommon().SectionName, Saver.save(GO->getSection()));
return Error::success();
}
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 177314a9a790..7501661591f0 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -246,8 +246,8 @@ static Error checkOverlappingElement(std::list<MachOElement> &Elements,
if (Size == 0)
return Error::success();
- for (auto it=Elements.begin() ; it != Elements.end(); ++it) {
- auto E = *it;
+ for (auto it = Elements.begin(); it != Elements.end(); ++it) {
+ const auto &E = *it;
if ((Offset >= E.Offset && Offset < E.Offset + E.Size) ||
(Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) ||
(Offset <= E.Offset && Offset + Size >= E.Offset + E.Size))
@@ -258,7 +258,7 @@ static Error checkOverlappingElement(std::list<MachOElement> &Elements,
auto nt = it;
nt++;
if (nt != Elements.end()) {
- auto N = *nt;
+ const auto &N = *nt;
if (Offset + Size <= N.Offset) {
Elements.insert(nt, {Offset, Size, Name});
return Error::success();
@@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
SectionName == "__swift_ast";
}
+namespace {
+template <typename LoadCommandType>
+ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
+ MachOObjectFile::LoadCommandInfo LoadCmd,
+ StringRef SegmentName) {
+ auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr);
+ if (!SegmentOrErr) {
+ consumeError(SegmentOrErr.takeError());
+ return {};
+ }
+ auto &Segment = SegmentOrErr.get();
+ if (StringRef(Segment.segname, 16).startswith(SegmentName))
+ return arrayRefFromStringRef(Obj.getData().slice(
+ Segment.fileoff, Segment.fileoff + Segment.filesize));
+ return {};
+}
+} // namespace
+
+ArrayRef<uint8_t>
+MachOObjectFile::getSegmentContents(StringRef SegmentName) const {
+ for (auto LoadCmd : load_commands()) {
+ ArrayRef<uint8_t> Contents;
+ switch (LoadCmd.C.cmd) {
+ case MachO::LC_SEGMENT:
+ Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd,
+ SegmentName);
+ break;
+ case MachO::LC_SEGMENT_64:
+ Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd,
+ SegmentName);
+ break;
+ default:
+ continue;
+ }
+ if (!Contents.empty())
+ return Contents;
+ }
+ return {};
+}
+
unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
return Sec.getRawDataRefImpl().d.a;
}
diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index 9a79de77af16..954d1f09f4e9 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -43,7 +44,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -204,8 +204,9 @@ uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
if (GVar->isConstant())
Res |= BasicSymbolRef::SF_Const;
}
- if (dyn_cast_or_null<Function>(GV->getBaseObject()))
- Res |= BasicSymbolRef::SF_Executable;
+ if (const GlobalObject *GO = GV->getAliaseeObject())
+ if (isa<Function>(GO) || isa<GlobalIFunc>(GO))
+ Res |= BasicSymbolRef::SF_Executable;
if (isa<GlobalAlias>(GV))
Res |= BasicSymbolRef::SF_Indirect;
if (GV->hasPrivateLinkage())
diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp
index b486e9f5c9a8..0659cf6a2d41 100644
--- a/llvm/lib/Object/Object.cpp
+++ b/llvm/lib/Object/Object.cpp
@@ -222,8 +222,7 @@ void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SecOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
*unwrap(Sect) = *SecOrErr;
}
@@ -304,8 +303,7 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(Ret.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
return Ret->data();
}
@@ -316,8 +314,7 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(Ret.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
return *Ret;
}
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 5c894439ff67..6fd02f3b9592 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -55,14 +55,15 @@ bool SectionRef::containsSymbol(SymbolRef S) const {
}
Expected<uint64_t> ObjectFile::getSymbolValue(DataRefImpl Ref) const {
- if (Expected<uint32_t> FlagsOrErr = getSymbolFlags(Ref)) {
- if (*FlagsOrErr & SymbolRef::SF_Undefined)
- return 0;
- if (*FlagsOrErr & SymbolRef::SF_Common)
- return getCommonSymbolSize(Ref);
- } else
+ uint32_t Flags;
+ if (Error E = getSymbolFlags(Ref).moveInto(Flags))
// TODO: Test this error.
- return FlagsOrErr.takeError();
+ return std::move(E);
+
+ if (Flags & SymbolRef::SF_Undefined)
+ return 0;
+ if (Flags & SymbolRef::SF_Common)
+ return getCommonSymbolSize(Ref);
return getSymbolValueImpl(Ref);
}
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index ab98a2dd2ac1..00a45e2c5d4e 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -18,7 +18,7 @@ namespace object {
static int64_t getELFAddend(RelocationRef R) {
Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) {
- report_fatal_error(EI.message());
+ report_fatal_error(Twine(EI.message()));
});
return *AddendOrErr;
}
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index a08c648358c0..6a19b159f3d5 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -286,9 +286,9 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
return;
}
- WasmSection Sec;
WasmSectionOrderChecker Checker;
while (Ctx.Ptr < Ctx.End) {
+ WasmSection Sec;
if ((Err = readSection(Sec, Ctx, Checker)))
return;
if ((Err = parseSection(Sec)))
@@ -339,7 +339,8 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
}
Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
- // See https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+ // Legacy "dylink" section support.
+ // See parseDylink0Section for the current "dylink.0" section parsing.
HasDylinkSection = true;
DylinkInfo.MemorySize = readVaruint32(Ctx);
DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
@@ -349,17 +350,77 @@ Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
while (Count--) {
DylinkInfo.Needed.push_back(readString(Ctx));
}
+
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("dylink section ended prematurely",
object_error::parse_failed);
return Error::success();
}
+Error WasmObjectFile::parseDylink0Section(ReadContext &Ctx) {
+ // See
+ // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md
+ HasDylinkSection = true;
+
+ const uint8_t *OrigEnd = Ctx.End;
+ while (Ctx.Ptr < OrigEnd) {
+ Ctx.End = OrigEnd;
+ uint8_t Type = readUint8(Ctx);
+ uint32_t Size = readVaruint32(Ctx);
+ LLVM_DEBUG(dbgs() << "readSubsection type=" << int(Type) << " size=" << Size
+ << "\n");
+ Ctx.End = Ctx.Ptr + Size;
+ uint32_t Count;
+ switch (Type) {
+ case wasm::WASM_DYLINK_MEM_INFO:
+ DylinkInfo.MemorySize = readVaruint32(Ctx);
+ DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
+ DylinkInfo.TableSize = readVaruint32(Ctx);
+ DylinkInfo.TableAlignment = readVaruint32(Ctx);
+ break;
+ case wasm::WASM_DYLINK_NEEDED:
+ Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.Needed.push_back(readString(Ctx));
+ }
+ break;
+ case wasm::WASM_DYLINK_EXPORT_INFO: {
+ uint32_t Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.ExportInfo.push_back({readString(Ctx), readVaruint32(Ctx)});
+ }
+ break;
+ }
+ case wasm::WASM_DYLINK_IMPORT_INFO: {
+ uint32_t Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.ImportInfo.push_back(
+ {readString(Ctx), readString(Ctx), readVaruint32(Ctx)});
+ }
+ break;
+ }
+ default:
+ LLVM_DEBUG(dbgs() << "unknown dylink.0 sub-section: " << Type << "\n");
+ Ctx.Ptr += Size;
+ break;
+ }
+ if (Ctx.Ptr != Ctx.End) {
+ return make_error<GenericBinaryError>(
+ "dylink.0 sub-section ended prematurely", object_error::parse_failed);
+ }
+ }
+
+ if (Ctx.Ptr != Ctx.End)
+ return make_error<GenericBinaryError>("dylink.0 section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
llvm::DenseSet<uint64_t> SeenFunctions;
llvm::DenseSet<uint64_t> SeenGlobals;
llvm::DenseSet<uint64_t> SeenSegments;
- if (FunctionTypes.size() && !SeenCodeSection) {
+ if (Functions.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>("names must come after code section",
object_error::parse_failed);
}
@@ -427,7 +488,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
HasLinkingSection = true;
- if (FunctionTypes.size() && !SeenCodeSection) {
+ if (Functions.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>(
"linking data must come after code section",
object_error::parse_failed);
@@ -529,7 +590,6 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
const wasm::WasmSignature *Signature = nullptr;
const wasm::WasmGlobalType *GlobalType = nullptr;
const wasm::WasmTableType *TableType = nullptr;
- const wasm::WasmTagType *TagType = nullptr;
Info.Kind = readUint8(Ctx);
Info.Flags = readVaruint32(Ctx);
@@ -545,8 +605,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
if (IsDefined) {
Info.Name = readString(Ctx);
unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
- Signature = &Signatures[FunctionTypes[FuncIndex]];
wasm::WasmFunction &Function = Functions[FuncIndex];
+ Signature = &Signatures[Function.SigIndex];
if (Function.SymbolName.empty())
Function.SymbolName = Info.Name;
} else {
@@ -674,8 +734,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Info.Name = readString(Ctx);
unsigned TagIndex = Info.ElementIndex - NumImportedTags;
wasm::WasmTag &Tag = Tags[TagIndex];
- Signature = &Signatures[Tag.Type.SigIndex];
- TagType = &Tag.Type;
+ Signature = &Signatures[Tag.SigIndex];
if (Tag.SymbolName.empty())
Tag.SymbolName = Info.Name;
@@ -687,8 +746,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
} else {
Info.Name = Import.Field;
}
- TagType = &Import.Tag;
- Signature = &Signatures[TagType->SigIndex];
+ Signature = &Signatures[Import.SigIndex];
if (!Import.Module.empty()) {
Info.ImportModule = Import.Module;
}
@@ -710,7 +768,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
object_error::parse_failed);
LinkingData.SymbolTable.emplace_back(Info);
Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, TableType,
- TagType, Signature);
+ Signature);
LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
}
@@ -984,6 +1042,9 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
if (Sec.Name == "dylink") {
if (Error Err = parseDylinkSection(Ctx))
return Err;
+ } else if (Sec.Name == "dylink.0") {
+ if (Error Err = parseDylink0Section(Ctx))
+ return Err;
} else if (Sec.Name == "name") {
if (Error Err = parseNameSection(Ctx))
return Err;
@@ -1034,6 +1095,7 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
+ uint32_t NumTypes = Signatures.size();
Imports.reserve(Count);
for (uint32_t I = 0; I < Count; I++) {
wasm::WasmImport Im;
@@ -1044,6 +1106,9 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
case wasm::WASM_EXTERNAL_FUNCTION:
NumImportedFunctions++;
Im.SigIndex = readVaruint32(Ctx);
+ if (Im.SigIndex >= NumTypes)
+ return make_error<GenericBinaryError>("invalid function type",
+ object_error::parse_failed);
break;
case wasm::WASM_EXTERNAL_GLOBAL:
NumImportedGlobals++;
@@ -1067,8 +1132,13 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
}
case wasm::WASM_EXTERNAL_TAG:
NumImportedTags++;
- Im.Tag.Attribute = readUint8(Ctx);
- Im.Tag.SigIndex = readVarint32(Ctx);
+ if (readUint8(Ctx) != 0) // Reserved 'attribute' field
+ return make_error<GenericBinaryError>("invalid attribute",
+ object_error::parse_failed);
+ Im.SigIndex = readVaruint32(Ctx);
+ if (Im.SigIndex >= NumTypes)
+ return make_error<GenericBinaryError>("invalid tag type",
+ object_error::parse_failed);
break;
default:
return make_error<GenericBinaryError>("unexpected import kind",
@@ -1084,15 +1154,16 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
Error WasmObjectFile::parseFunctionSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
- FunctionTypes.reserve(Count);
- Functions.resize(Count);
+ Functions.reserve(Count);
uint32_t NumTypes = Signatures.size();
while (Count--) {
uint32_t Type = readVaruint32(Ctx);
if (Type >= NumTypes)
return make_error<GenericBinaryError>("invalid function type",
object_error::parse_failed);
- FunctionTypes.push_back(Type);
+ wasm::WasmFunction F;
+ F.SigIndex = Type;
+ Functions.push_back(F);
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("function section ended prematurely",
@@ -1141,11 +1212,18 @@ Error WasmObjectFile::parseTagSection(ReadContext &Ctx) {
TagSection = Sections.size();
uint32_t Count = readVaruint32(Ctx);
Tags.reserve(Count);
+ uint32_t NumTypes = Signatures.size();
while (Count--) {
+ if (readUint8(Ctx) != 0) // Reserved 'attribute' field
+ return make_error<GenericBinaryError>("invalid attribute",
+ object_error::parse_failed);
+ uint32_t Type = readVaruint32(Ctx);
+ if (Type >= NumTypes)
+ return make_error<GenericBinaryError>("invalid tag type",
+ object_error::parse_failed);
wasm::WasmTag Tag;
Tag.Index = NumImportedTags + Tags.size();
- Tag.Type.Attribute = readUint8(Ctx);
- Tag.Type.SigIndex = readVaruint32(Ctx);
+ Tag.SigIndex = Type;
Tags.push_back(Tag);
}
@@ -1216,7 +1294,7 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
}
bool WasmObjectFile::isValidFunctionIndex(uint32_t Index) const {
- return Index < NumImportedFunctions + FunctionTypes.size();
+ return Index < NumImportedFunctions + Functions.size();
}
bool WasmObjectFile::isDefinedFunctionIndex(uint32_t Index) const {
@@ -1304,7 +1382,7 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
SeenCodeSection = true;
CodeSection = Sections.size();
uint32_t FunctionCount = readVaruint32(Ctx);
- if (FunctionCount != FunctionTypes.size()) {
+ if (FunctionCount != Functions.size()) {
return make_error<GenericBinaryError>("invalid function count",
object_error::parse_failed);
}
@@ -1793,6 +1871,7 @@ int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
case wasm::WASM_SEC_CUSTOM:
return StringSwitch<unsigned>(CustomSectionName)
.Case("dylink", WASM_SEC_ORDER_DYLINK)
+ .Case("dylink.0", WASM_SEC_ORDER_DYLINK)
.Case("linking", WASM_SEC_ORDER_LINKING)
.StartsWith("reloc.", WASM_SEC_ORDER_RELOC)
.Case("name", WASM_SEC_ORDER_NAME)
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index 53447d0c97b2..9b0a5efacba7 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -69,15 +69,18 @@ bool XCOFFSectionHeader<T>::isReservedSectionType() const {
return getSectionType() & SectionFlagsReservedMask;
}
-bool XCOFFRelocation32::isRelocationSigned() const {
+template <typename AddressType>
+bool XCOFFRelocation<AddressType>::isRelocationSigned() const {
return Info & XR_SIGN_INDICATOR_MASK;
}
-bool XCOFFRelocation32::isFixupIndicated() const {
+template <typename AddressType>
+bool XCOFFRelocation<AddressType>::isFixupIndicated() const {
return Info & XR_FIXUP_INDICATOR_MASK;
}
-uint8_t XCOFFRelocation32::getRelocatedLength() const {
+template <typename AddressType>
+uint8_t XCOFFRelocation<AddressType>::getRelocatedLength() const {
// The relocation encodes the bit length being relocated minus 1. Add back
// the 1 to get the actual length being relocated.
return (Info & XR_BIASED_LENGTH_MASK) + 1;
@@ -146,6 +149,20 @@ const XCOFFFileHeader64 *XCOFFObjectFile::fileHeader64() const {
return static_cast<const XCOFFFileHeader64 *>(FileHeader);
}
+const XCOFFAuxiliaryHeader32 *XCOFFObjectFile::auxiliaryHeader32() const {
+ assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+ return static_cast<const XCOFFAuxiliaryHeader32 *>(AuxiliaryHeader);
+}
+
+const XCOFFAuxiliaryHeader64 *XCOFFObjectFile::auxiliaryHeader64() const {
+ assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+ return static_cast<const XCOFFAuxiliaryHeader64 *>(AuxiliaryHeader);
+}
+
+template <typename T> const T *XCOFFObjectFile::sectionHeaderTable() const {
+ return static_cast<const T *>(SectionHeaderTable);
+}
+
const XCOFFSectionHeader32 *
XCOFFObjectFile::sectionHeaderTable32() const {
assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
@@ -183,12 +200,16 @@ XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const {
if (StringTable.Data != nullptr && StringTable.Size > Offset)
return (StringTable.Data + Offset);
- return make_error<GenericBinaryError>("Bad offset for string table entry",
- object_error::parse_failed);
+ return createError("entry with offset 0x" + Twine::utohexstr(Offset) +
+ " in a string table with size 0x" +
+ Twine::utohexstr(StringTable.Size) + " is invalid");
}
StringRef XCOFFObjectFile::getStringTable() const {
- return StringRef(StringTable.Data, StringTable.Size);
+ // If the size is less than or equal to 4, then the string table contains no
+ // string data.
+ return StringRef(StringTable.Data,
+ StringTable.Size <= 4 ? 0 : StringTable.Size);
}
Expected<StringRef>
@@ -210,15 +231,85 @@ uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
return toSymbolRef(Symb).getValue();
}
+uint32_t XCOFFObjectFile::getSymbolAlignment(DataRefImpl Symb) const {
+ uint64_t Result = 0;
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxRefOrError =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (!CsectAuxRefOrError)
+ // TODO: report the error up the stack.
+ consumeError(CsectAuxRefOrError.takeError());
+ else
+ Result = 1ULL << CsectAuxRefOrError.get().getAlignmentLog2();
+ }
+ return Result;
+}
+
uint64_t XCOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
uint64_t Result = 0;
- llvm_unreachable("Not yet implemented!");
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxRefOrError =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (!CsectAuxRefOrError)
+ // TODO: report the error up the stack.
+ consumeError(CsectAuxRefOrError.takeError());
+ else {
+ XCOFFCsectAuxRef CsectAuxRef = CsectAuxRefOrError.get();
+ assert(CsectAuxRef.getSymbolType() == XCOFF::XTY_CM);
+ Result = CsectAuxRef.getSectionOrLength();
+ }
+ }
return Result;
}
Expected<SymbolRef::Type>
XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
- // TODO: Return the correct symbol type.
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+
+ if (XCOFFSym.isFunction())
+ return SymbolRef::ST_Function;
+
+ if (XCOFF::C_FILE == XCOFFSym.getStorageClass())
+ return SymbolRef::ST_File;
+
+ int16_t SecNum = XCOFFSym.getSectionNumber();
+ if (SecNum <= 0)
+ return SymbolRef::ST_Other;
+
+ Expected<DataRefImpl> SecDRIOrErr =
+ getSectionByNum(XCOFFSym.getSectionNumber());
+
+ if (!SecDRIOrErr)
+ return SecDRIOrErr.takeError();
+
+ DataRefImpl SecDRI = SecDRIOrErr.get();
+
+ Expected<StringRef> SymNameOrError = XCOFFSym.getName();
+ if (SymNameOrError) {
+ // The "TOC" symbol is treated as SymbolRef::ST_Other.
+ if (SymNameOrError.get() == "TOC")
+ return SymbolRef::ST_Other;
+
+ // The symbol for a section name is treated as SymbolRef::ST_Other.
+ StringRef SecName;
+ if (is64Bit())
+ SecName = XCOFFObjectFile::toSection64(SecDRIOrErr.get())->getName();
+ else
+ SecName = XCOFFObjectFile::toSection32(SecDRIOrErr.get())->getName();
+
+ if (SecName == SymNameOrError.get())
+ return SymbolRef::ST_Other;
+ } else
+ return SymNameOrError.takeError();
+
+ if (isSectionData(SecDRI) || isSectionBSS(SecDRI))
+ return SymbolRef::ST_Data;
+
+ if (isDebugSection(SecDRI))
+ return SymbolRef::ST_Debug;
+
return SymbolRef::ST_Other;
}
@@ -285,8 +376,12 @@ XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
const uint8_t * ContentStart = base() + OffsetToRaw;
uint64_t SectionSize = getSectionSize(Sec);
- if (checkOffset(Data, reinterpret_cast<uintptr_t>(ContentStart), SectionSize))
- return make_error<BinaryError>();
+ if (Error E = Binary::checkOffset(
+ Data, reinterpret_cast<uintptr_t>(ContentStart), SectionSize))
+ return createError(
+ toString(std::move(E)) + ": section data with offset 0x" +
+ Twine::utohexstr(OffsetToRaw) + " and size 0x" +
+ Twine::utohexstr(SectionSize) + " goes past the end of the file");
return makeArrayRef(ContentStart,SectionSize);
}
@@ -297,6 +392,43 @@ uint64_t XCOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
return Result;
}
+Expected<uintptr_t> XCOFFObjectFile::getLoaderSectionAddress() const {
+ uint64_t OffsetToLoaderSection = 0;
+ uint64_t SizeOfLoaderSection = 0;
+
+ if (is64Bit()) {
+ for (const auto &Sec64 : sections64())
+ if (Sec64.getSectionType() == XCOFF::STYP_LOADER) {
+ OffsetToLoaderSection = Sec64.FileOffsetToRawData;
+ SizeOfLoaderSection = Sec64.SectionSize;
+ break;
+ }
+ } else {
+ for (const auto &Sec32 : sections32())
+ if (Sec32.getSectionType() == XCOFF::STYP_LOADER) {
+ OffsetToLoaderSection = Sec32.FileOffsetToRawData;
+ SizeOfLoaderSection = Sec32.SectionSize;
+ break;
+ }
+ }
+
+ // No loader section is not an error.
+ if (!SizeOfLoaderSection)
+ return 0;
+
+ uintptr_t LoderSectionStart =
+ reinterpret_cast<uintptr_t>(base() + OffsetToLoaderSection);
+ if (Error E =
+ Binary::checkOffset(Data, LoderSectionStart, SizeOfLoaderSection))
+ return createError(toString(std::move(E)) +
+ ": loader section with offset 0x" +
+ Twine::utohexstr(OffsetToLoaderSection) +
+ " and size 0x" + Twine::utohexstr(SizeOfLoaderSection) +
+ " goes past the end of the file");
+
+ return LoderSectionStart;
+}
+
bool XCOFFObjectFile::isSectionCompressed(DataRefImpl Sec) const {
return false;
}
@@ -326,61 +458,112 @@ bool XCOFFObjectFile::isSectionVirtual(DataRefImpl Sec) const {
}
relocation_iterator XCOFFObjectFile::section_rel_begin(DataRefImpl Sec) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
- auto RelocationsOrErr = relocations(*SectionEntPtr);
- if (Error E = RelocationsOrErr.takeError())
- return relocation_iterator(RelocationRef());
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin());
+ if (is64Bit()) {
+ const XCOFFSectionHeader64 *SectionEntPtr = toSection64(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader64, XCOFFRelocation64>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin());
+ } else {
+ const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader32, XCOFFRelocation32>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin());
+ }
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator XCOFFObjectFile::section_rel_end(DataRefImpl Sec) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
- auto RelocationsOrErr = relocations(*SectionEntPtr);
- if (Error E = RelocationsOrErr.takeError())
- return relocation_iterator(RelocationRef());
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end());
+ if (is64Bit()) {
+ const XCOFFSectionHeader64 *SectionEntPtr = toSection64(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader64, XCOFFRelocation64>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end());
+ } else {
+ const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader32, XCOFFRelocation32>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end());
+ }
return relocation_iterator(RelocationRef(Ret, this));
}
void XCOFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
- Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation32>(Rel.p) + 1);
+ if (is64Bit())
+ Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation64>(Rel.p) + 1);
+ else
+ Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation32>(Rel.p) + 1);
}
uint64_t XCOFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
- const XCOFFSectionHeader32 *Sec32 = sectionHeaderTable32();
- const uint32_t RelocAddress = Reloc->VirtualAddress;
- const uint16_t NumberOfSections = getNumberOfSections();
- for (uint16_t i = 0; i < NumberOfSections; ++i) {
- // Find which section this relocation is belonging to, and get the
- // relocation offset relative to the start of the section.
- if (Sec32->VirtualAddress <= RelocAddress &&
- RelocAddress < Sec32->VirtualAddress + Sec32->SectionSize) {
- return RelocAddress - Sec32->VirtualAddress;
+ if (is64Bit()) {
+ const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p);
+ const XCOFFSectionHeader64 *Sec64 = sectionHeaderTable64();
+ const uint64_t RelocAddress = Reloc->VirtualAddress;
+ const uint16_t NumberOfSections = getNumberOfSections();
+ for (uint16_t I = 0; I < NumberOfSections; ++I) {
+ // Find which section this relocation belongs to, and get the
+ // relocation offset relative to the start of the section.
+ if (Sec64->VirtualAddress <= RelocAddress &&
+ RelocAddress < Sec64->VirtualAddress + Sec64->SectionSize) {
+ return RelocAddress - Sec64->VirtualAddress;
+ }
+ ++Sec64;
+ }
+ } else {
+ const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
+ const XCOFFSectionHeader32 *Sec32 = sectionHeaderTable32();
+ const uint32_t RelocAddress = Reloc->VirtualAddress;
+ const uint16_t NumberOfSections = getNumberOfSections();
+ for (uint16_t I = 0; I < NumberOfSections; ++I) {
+ // Find which section this relocation belongs to, and get the
+ // relocation offset relative to the start of the section.
+ if (Sec32->VirtualAddress <= RelocAddress &&
+ RelocAddress < Sec32->VirtualAddress + Sec32->SectionSize) {
+ return RelocAddress - Sec32->VirtualAddress;
+ }
+ ++Sec32;
}
- ++Sec32;
}
return InvalidRelocOffset;
}
symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
- const uint32_t Index = Reloc->SymbolIndex;
-
- if (Index >= getLogicalNumberOfSymbolTableEntries32())
- return symbol_end();
-
+ uint32_t Index;
+ if (is64Bit()) {
+ const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p);
+ Index = Reloc->SymbolIndex;
+
+ if (Index >= getNumberOfSymbolTableEntries64())
+ return symbol_end();
+ } else {
+ const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
+ Index = Reloc->SymbolIndex;
+
+ if (Index >= getLogicalNumberOfSymbolTableEntries32())
+ return symbol_end();
+ }
DataRefImpl SymDRI;
SymDRI.p = getSymbolEntryAddressByIndex(Index);
return symbol_iterator(SymbolRef(SymDRI, this));
@@ -388,22 +571,50 @@ symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
uint64_t XCOFFObjectFile::getRelocationType(DataRefImpl Rel) const {
if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
+ return viewAs<XCOFFRelocation64>(Rel.p)->Type;
return viewAs<XCOFFRelocation32>(Rel.p)->Type;
}
void XCOFFObjectFile::getRelocationTypeName(
DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
- StringRef Res = XCOFF::getRelocationTypeString(Reloc->Type);
+ StringRef Res;
+ if (is64Bit()) {
+ const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p);
+ Res = XCOFF::getRelocationTypeString(Reloc->Type);
+ } else {
+ const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
+ Res = XCOFF::getRelocationTypeString(Reloc->Type);
+ }
Result.append(Res.begin(), Res.end());
}
Expected<uint32_t> XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
- uint32_t Result = 0;
- // TODO: Return correct symbol flags.
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ uint32_t Result = SymbolRef::SF_None;
+
+ if (XCOFFSym.getSectionNumber() == XCOFF::N_ABS)
+ Result |= SymbolRef::SF_Absolute;
+
+ XCOFF::StorageClass SC = XCOFFSym.getStorageClass();
+ if (XCOFF::C_EXT == SC || XCOFF::C_WEAKEXT == SC)
+ Result |= SymbolRef::SF_Global;
+
+ if (XCOFF::C_WEAKEXT == SC)
+ Result |= SymbolRef::SF_Weak;
+
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxEntOrErr =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (CsectAuxEntOrErr) {
+ if (CsectAuxEntOrErr.get().getSymbolType() == XCOFF::XTY_CM)
+ Result |= SymbolRef::SF_Common;
+ } else
+ return CsectAuxEntOrErr.takeError();
+ }
+
+ if (XCOFFSym.getSectionNumber() == XCOFF::N_UNDEF)
+ Result |= SymbolRef::SF_Undefined;
+
return Result;
}
@@ -494,7 +705,9 @@ uint16_t XCOFFObjectFile::getMagic() const {
Expected<DataRefImpl> XCOFFObjectFile::getSectionByNum(int16_t Num) const {
if (Num <= 0 || Num > getNumberOfSections())
- return errorCodeToError(object_error::invalid_section_index);
+ return createStringError(object_error::invalid_section_index,
+ "the section index (" + Twine(Num) +
+ ") is invalid");
DataRefImpl DRI;
DRI.p = getWithOffset(getSectionHeaderTableAddress(),
@@ -602,6 +815,25 @@ uint32_t XCOFFObjectFile::getSymbolIndex(uintptr_t SymbolEntPtr) const {
XCOFF::SymbolTableEntrySize;
}
+uint64_t XCOFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+ uint64_t Result = 0;
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxRefOrError =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (!CsectAuxRefOrError)
+ // TODO: report the error up the stack.
+ consumeError(CsectAuxRefOrError.takeError());
+ else {
+ XCOFFCsectAuxRef CsectAuxRef = CsectAuxRefOrError.get();
+ uint8_t SymType = CsectAuxRef.getSymbolType();
+ if (SymType == XCOFF::XTY_SD || SymType == XCOFF::XTY_CM)
+ Result = CsectAuxRef.getSectionOrLength();
+ }
+ }
+ return Result;
+}
+
uintptr_t XCOFFObjectFile::getSymbolEntryAddressByIndex(uint32_t Index) const {
return getAdvancedSymbolEntryAddress(
reinterpret_cast<uintptr_t>(getPointerToSymbolTable()), Index);
@@ -612,7 +844,9 @@ XCOFFObjectFile::getSymbolNameByIndex(uint32_t Index) const {
const uint32_t NumberOfSymTableEntries = getNumberOfSymbolTableEntries();
if (Index >= NumberOfSymTableEntries)
- return errorCodeToError(object_error::invalid_symbol_index);
+ return createError("symbol index " + Twine(Index) +
+ " exceeds symbol count " +
+ Twine(NumberOfSymTableEntries));
DataRefImpl SymDRI;
SymDRI.p = getSymbolEntryAddressByIndex(Index);
@@ -658,13 +892,16 @@ ArrayRef<XCOFFSectionHeader32> XCOFFObjectFile::sections32() const {
// section header contains the actual count of relocation entries in the s_paddr
// field. STYP_OVRFLO headers contain the section index of their corresponding
// sections as their raw "NumberOfRelocations" field value.
-Expected<uint32_t> XCOFFObjectFile::getLogicalNumberOfRelocationEntries(
- const XCOFFSectionHeader32 &Sec) const {
-
- uint16_t SectionIndex = &Sec - sectionHeaderTable32() + 1;
+template <typename T>
+Expected<uint32_t> XCOFFObjectFile::getNumberOfRelocationEntries(
+ const XCOFFSectionHeader<T> &Sec) const {
+ const T &Section = static_cast<const T &>(Sec);
+ if (is64Bit())
+ return Section.NumberOfRelocations;
- if (Sec.NumberOfRelocations < XCOFF::RelocOverflow)
- return Sec.NumberOfRelocations;
+ uint16_t SectionIndex = &Section - sectionHeaderTable<T>() + 1;
+ if (Section.NumberOfRelocations < XCOFF::RelocOverflow)
+ return Section.NumberOfRelocations;
for (const auto &Sec : sections32()) {
if (Sec.Flags == XCOFF::STYP_OVRFLO &&
Sec.NumberOfRelocations == SectionIndex)
@@ -673,27 +910,31 @@ Expected<uint32_t> XCOFFObjectFile::getLogicalNumberOfRelocationEntries(
return errorCodeToError(object_error::parse_failed);
}
-Expected<ArrayRef<XCOFFRelocation32>>
-XCOFFObjectFile::relocations(const XCOFFSectionHeader32 &Sec) const {
+template <typename Shdr, typename Reloc>
+Expected<ArrayRef<Reloc>> XCOFFObjectFile::relocations(const Shdr &Sec) const {
uintptr_t RelocAddr = getWithOffset(reinterpret_cast<uintptr_t>(FileHeader),
Sec.FileOffsetToRelocationInfo);
- auto NumRelocEntriesOrErr = getLogicalNumberOfRelocationEntries(Sec);
+ auto NumRelocEntriesOrErr = getNumberOfRelocationEntries(Sec);
if (Error E = NumRelocEntriesOrErr.takeError())
return std::move(E);
uint32_t NumRelocEntries = NumRelocEntriesOrErr.get();
-
- static_assert(
- sizeof(XCOFFRelocation32) == XCOFF::RelocationSerializationSize32, "");
+ static_assert((sizeof(Reloc) == XCOFF::RelocationSerializationSize64 ||
+ sizeof(Reloc) == XCOFF::RelocationSerializationSize32),
+ "Relocation structure is incorrect");
auto RelocationOrErr =
- getObject<XCOFFRelocation32>(Data, reinterpret_cast<void *>(RelocAddr),
- NumRelocEntries * sizeof(XCOFFRelocation32));
- if (Error E = RelocationOrErr.takeError())
- return std::move(E);
+ getObject<Reloc>(Data, reinterpret_cast<void *>(RelocAddr),
+ NumRelocEntries * sizeof(Reloc));
+ if (!RelocationOrErr)
+ return createError(
+ toString(RelocationOrErr.takeError()) + ": relocations with offset 0x" +
+ Twine::utohexstr(Sec.FileOffsetToRelocationInfo) + " and size 0x" +
+ Twine::utohexstr(NumRelocEntries * sizeof(Reloc)) +
+ " go past the end of the file");
- const XCOFFRelocation32 *StartReloc = RelocationOrErr.get();
+ const Reloc *StartReloc = RelocationOrErr.get();
- return ArrayRef<XCOFFRelocation32>(StartReloc, StartReloc + NumRelocEntries);
+ return ArrayRef<Reloc>(StartReloc, StartReloc + NumRelocEntries);
}
Expected<XCOFFStringTable>
@@ -716,8 +957,12 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
auto StringTableOrErr =
getObject<char>(Obj->Data, Obj->base() + Offset, Size);
- if (Error E = StringTableOrErr.takeError())
- return std::move(E);
+ if (!StringTableOrErr)
+ return createError(toString(StringTableOrErr.takeError()) +
+ ": string table with offset 0x" +
+ Twine::utohexstr(Offset) + " and size 0x" +
+ Twine::utohexstr(Size) +
+ " goes past the end of the file");
const char *StringTablePtr = StringTableOrErr.get();
if (StringTablePtr[Size - 1] != '\0')
@@ -726,6 +971,54 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
return XCOFFStringTable{Size, StringTablePtr};
}
+// This function returns the import file table. Each entry in the import file
+// table consists of: "path_name\0base_name\0archive_member_name\0".
+Expected<StringRef> XCOFFObjectFile::getImportFileTable() const {
+ Expected<uintptr_t> LoaderSectionAddrOrError = getLoaderSectionAddress();
+ if (!LoaderSectionAddrOrError)
+ return LoaderSectionAddrOrError.takeError();
+
+ uintptr_t LoaderSectionAddr = LoaderSectionAddrOrError.get();
+ if (!LoaderSectionAddr)
+ return StringRef();
+
+ uint64_t OffsetToImportFileTable = 0;
+ uint64_t LengthOfImportFileTable = 0;
+ if (is64Bit()) {
+ const LoaderSectionHeader64 *LoaderSec64 =
+ viewAs<LoaderSectionHeader64>(LoaderSectionAddr);
+ OffsetToImportFileTable = LoaderSec64->OffsetToImpid;
+ LengthOfImportFileTable = LoaderSec64->LengthOfImpidStrTbl;
+ } else {
+ const LoaderSectionHeader32 *LoaderSec32 =
+ viewAs<LoaderSectionHeader32>(LoaderSectionAddr);
+ OffsetToImportFileTable = LoaderSec32->OffsetToImpid;
+ LengthOfImportFileTable = LoaderSec32->LengthOfImpidStrTbl;
+ }
+
+ auto ImportTableOrErr = getObject<char>(
+ Data,
+ reinterpret_cast<void *>(LoaderSectionAddr + OffsetToImportFileTable),
+ LengthOfImportFileTable);
+ if (!ImportTableOrErr)
+ return createError(
+ toString(ImportTableOrErr.takeError()) +
+ ": import file table with offset 0x" +
+ Twine::utohexstr(LoaderSectionAddr + OffsetToImportFileTable) +
+ " and size 0x" + Twine::utohexstr(LengthOfImportFileTable) +
+ " goes past the end of the file");
+
+ const char *ImportTablePtr = ImportTableOrErr.get();
+ if (ImportTablePtr[LengthOfImportFileTable - 1] != '\0')
+ return createError(
+ ": import file name table with offset 0x" +
+ Twine::utohexstr(LoaderSectionAddr + OffsetToImportFileTable) +
+ " and size 0x" + Twine::utohexstr(LengthOfImportFileTable) +
+ " must end with a null terminator");
+
+ return StringRef(ImportTablePtr, LengthOfImportFileTable);
+}
+
Expected<std::unique_ptr<XCOFFObjectFile>>
XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
// Can't use std::make_unique because of the private constructor.
@@ -744,17 +1037,30 @@ XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
Obj->FileHeader = FileHeaderOrErr.get();
CurOffset += Obj->getFileHeaderSize();
- // TODO FIXME we don't have support for an optional header yet, so just skip
- // past it.
+
+ if (Obj->getOptionalHeaderSize()) {
+ auto AuxiliaryHeaderOrErr =
+ getObject<void>(Data, Base + CurOffset, Obj->getOptionalHeaderSize());
+ if (Error E = AuxiliaryHeaderOrErr.takeError())
+ return std::move(E);
+ Obj->AuxiliaryHeader = AuxiliaryHeaderOrErr.get();
+ }
+
CurOffset += Obj->getOptionalHeaderSize();
// Parse the section header table if it is present.
if (Obj->getNumberOfSections()) {
- auto SecHeadersOrErr = getObject<void>(Data, Base + CurOffset,
- Obj->getNumberOfSections() *
- Obj->getSectionHeaderSize());
- if (Error E = SecHeadersOrErr.takeError())
- return std::move(E);
+ uint64_t SectionHeadersSize =
+ Obj->getNumberOfSections() * Obj->getSectionHeaderSize();
+ auto SecHeadersOrErr =
+ getObject<void>(Data, Base + CurOffset, SectionHeadersSize);
+ if (!SecHeadersOrErr)
+ return createError(toString(SecHeadersOrErr.takeError()) +
+ ": section headers with offset 0x" +
+ Twine::utohexstr(CurOffset) + " and size 0x" +
+ Twine::utohexstr(SectionHeadersSize) +
+ " go past the end of the file");
+
Obj->SectionHeaderTable = SecHeadersOrErr.get();
}
@@ -773,8 +1079,12 @@ XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
NumberOfSymbolTableEntries;
auto SymTableOrErr =
getObject<void *>(Data, Base + CurOffset, SymbolTableSize);
- if (Error E = SymTableOrErr.takeError())
- return std::move(E);
+ if (!SymTableOrErr)
+ return createError(
+ toString(SymTableOrErr.takeError()) + ": symbol table with offset 0x" +
+ Twine::utohexstr(CurOffset) + " and size 0x" +
+ Twine::utohexstr(SymbolTableSize) + " goes past the end of the file");
+
Obj->SymbolTblPtr = SymTableOrErr.get();
CurOffset += SymbolTableSize;
@@ -844,10 +1154,10 @@ Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const {
if (auto Err = NameOrErr.takeError())
return std::move(Err);
+ uint32_t SymbolIdx = OwningObjectPtr->getSymbolIndex(getEntryAddress());
if (!NumberOfAuxEntries) {
- return createStringError(object_error::parse_failed,
- "csect symbol \"" + *NameOrErr +
- "\" contains no auxiliary entry");
+ return createError("csect symbol \"" + *NameOrErr + "\" with index " +
+ Twine(SymbolIdx) + " contains no auxiliary entry");
}
if (!OwningObjectPtr->is64Bit()) {
@@ -872,9 +1182,9 @@ Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const {
}
}
- return createStringError(
- object_error::parse_failed,
- "a csect auxiliary entry is not found for symbol \"" + *NameOrErr + "\"");
+ return createError(
+ "a csect auxiliary entry has not been found for symbol \"" + *NameOrErr +
+ "\" with index " + Twine(SymbolIdx));
}
Expected<StringRef> XCOFFSymbolRef::getName() const {
@@ -897,6 +1207,18 @@ Expected<StringRef> XCOFFSymbolRef::getName() const {
template struct XCOFFSectionHeader<XCOFFSectionHeader32>;
template struct XCOFFSectionHeader<XCOFFSectionHeader64>;
+template struct XCOFFRelocation<llvm::support::ubig32_t>;
+template struct XCOFFRelocation<llvm::support::ubig64_t>;
+
+template llvm::Expected<llvm::ArrayRef<llvm::object::XCOFFRelocation64>>
+llvm::object::XCOFFObjectFile::relocations<llvm::object::XCOFFSectionHeader64,
+ llvm::object::XCOFFRelocation64>(
+ llvm::object::XCOFFSectionHeader64 const &) const;
+template llvm::Expected<llvm::ArrayRef<llvm::object::XCOFFRelocation32>>
+llvm::object::XCOFFObjectFile::relocations<llvm::object::XCOFFSectionHeader32,
+ llvm::object::XCOFFRelocation32>(
+ llvm::object::XCOFFSectionHeader32 const &) const;
+
bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes) {
if (Bytes.size() < 4)
return false;
diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 06ce93affd38..5f38ca13cfc2 100644
--- a/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -170,8 +170,8 @@ static bool layoutOptionalHeader(COFFParser &CP) {
unsigned PEHeaderSize = CP.is64Bit() ? sizeof(object::pe32plus_header)
: sizeof(object::pe32_header);
CP.Obj.Header.SizeOfOptionalHeader =
- PEHeaderSize +
- sizeof(object::data_directory) * (COFF::NUM_DATA_DIRECTORIES + 1);
+ PEHeaderSize + sizeof(object::data_directory) *
+ CP.Obj.OptionalHeader->Header.NumberOfRvaAndSize;
return true;
}
@@ -397,7 +397,7 @@ static uint32_t initializeOptionalHeader(COFFParser &CP, uint16_t Magic,
Header->SizeOfStackCommit = CP.Obj.OptionalHeader->Header.SizeOfStackCommit;
Header->SizeOfHeapReserve = CP.Obj.OptionalHeader->Header.SizeOfHeapReserve;
Header->SizeOfHeapCommit = CP.Obj.OptionalHeader->Header.SizeOfHeapCommit;
- Header->NumberOfRvaAndSize = COFF::NUM_DATA_DIRECTORIES + 1;
+ Header->NumberOfRvaAndSize = CP.Obj.OptionalHeader->Header.NumberOfRvaAndSize;
return BaseOfData;
}
@@ -458,18 +458,20 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
PEH.BaseOfData = BaseOfData;
OS.write(reinterpret_cast<char *>(&PEH), sizeof(PEH));
}
- for (const Optional<COFF::DataDirectory> &DD :
- CP.Obj.OptionalHeader->DataDirectories) {
- if (!DD.hasValue()) {
+ for (uint32_t I = 0; I < CP.Obj.OptionalHeader->Header.NumberOfRvaAndSize;
+ ++I) {
+ const Optional<COFF::DataDirectory> *DataDirectories =
+ CP.Obj.OptionalHeader->DataDirectories;
+ uint32_t NumDataDir = sizeof(CP.Obj.OptionalHeader->DataDirectories) /
+ sizeof(Optional<COFF::DataDirectory>);
+ if (I >= NumDataDir || !DataDirectories[I].hasValue()) {
OS << zeros(uint32_t(0));
OS << zeros(uint32_t(0));
} else {
- OS << binary_le(DD->RelativeVirtualAddress);
- OS << binary_le(DD->Size);
+ OS << binary_le(DataDirectories[I]->RelativeVirtualAddress);
+ OS << binary_le(DataDirectories[I]->Size);
}
}
- OS << zeros(uint32_t(0));
- OS << zeros(uint32_t(0));
}
assert(OS.tell() == CP.SectionTableStart);
diff --git a/llvm/lib/ObjectYAML/COFFYAML.cpp b/llvm/lib/ObjectYAML/COFFYAML.cpp
index 96069c0c590f..6e5cdce89060 100644
--- a/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -448,25 +448,27 @@ void MappingTraits<COFFYAML::PEHeader>::mapping(IO &IO,
MappingNormalization<NDLLCharacteristics, uint16_t> NDC(
IO, PH.Header.DLLCharacteristics);
- IO.mapRequired("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint);
- IO.mapRequired("ImageBase", PH.Header.ImageBase);
- IO.mapRequired("SectionAlignment", PH.Header.SectionAlignment);
- IO.mapRequired("FileAlignment", PH.Header.FileAlignment);
- IO.mapRequired("MajorOperatingSystemVersion",
+ IO.mapOptional("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint);
+ IO.mapOptional("ImageBase", PH.Header.ImageBase);
+ IO.mapOptional("SectionAlignment", PH.Header.SectionAlignment, 1);
+ IO.mapOptional("FileAlignment", PH.Header.FileAlignment, 1);
+ IO.mapOptional("MajorOperatingSystemVersion",
PH.Header.MajorOperatingSystemVersion);
- IO.mapRequired("MinorOperatingSystemVersion",
+ IO.mapOptional("MinorOperatingSystemVersion",
PH.Header.MinorOperatingSystemVersion);
- IO.mapRequired("MajorImageVersion", PH.Header.MajorImageVersion);
- IO.mapRequired("MinorImageVersion", PH.Header.MinorImageVersion);
- IO.mapRequired("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion);
- IO.mapRequired("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion);
- IO.mapRequired("Subsystem", NWS->Subsystem);
- IO.mapRequired("DLLCharacteristics", NDC->Characteristics);
- IO.mapRequired("SizeOfStackReserve", PH.Header.SizeOfStackReserve);
- IO.mapRequired("SizeOfStackCommit", PH.Header.SizeOfStackCommit);
- IO.mapRequired("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve);
- IO.mapRequired("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit);
-
+ IO.mapOptional("MajorImageVersion", PH.Header.MajorImageVersion);
+ IO.mapOptional("MinorImageVersion", PH.Header.MinorImageVersion);
+ IO.mapOptional("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion);
+ IO.mapOptional("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion);
+ IO.mapOptional("Subsystem", NWS->Subsystem);
+ IO.mapOptional("DLLCharacteristics", NDC->Characteristics);
+ IO.mapOptional("SizeOfStackReserve", PH.Header.SizeOfStackReserve);
+ IO.mapOptional("SizeOfStackCommit", PH.Header.SizeOfStackCommit);
+ IO.mapOptional("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve);
+ IO.mapOptional("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit);
+
+ IO.mapOptional("NumberOfRvaAndSize", PH.Header.NumberOfRvaAndSize,
+ COFF::NUM_DATA_DIRECTORIES + 1);
IO.mapOptional("ExportTable", PH.DataDirectories[COFF::EXPORT_TABLE]);
IO.mapOptional("ImportTable", PH.DataDirectories[COFF::IMPORT_TABLE]);
IO.mapOptional("ResourceTable", PH.DataDirectories[COFF::RESOURCE_TABLE]);
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index f8f2f0c12020..e378be3892fe 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1380,9 +1380,6 @@ void ELFState<ELFT>::writeSectionContent(
if (!Section.Entries)
return;
- if (!Section.Entries)
- return;
-
for (const ELFYAML::StackSizeEntry &E : *Section.Entries) {
CBA.write<uintX_t>(E.Address, ELFT::TargetEndianness);
SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(E.Size);
@@ -1488,9 +1485,6 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
if (!Section.Bucket)
return;
- if (!Section.Bucket)
- return;
-
CBA.write<uint32_t>(
Section.NBucket.getValueOr(llvm::yaml::Hex64(Section.Bucket->size())),
ELFT::TargetEndianness);
@@ -1663,9 +1657,6 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
if (!Section.Symbols)
return;
- if (!Section.Symbols)
- return;
-
for (StringRef Sym : *Section.Symbols)
SHeader.sh_size +=
CBA.writeULEB128(toSymbolIndex(Sym, Section.Name, /*IsDynamic=*/false));
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 50821544a687..fdf9aeae1622 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -155,6 +155,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_FREEBSD_PROCSTAT_OSREL);
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
+ // OpenBSD core note types.
+ ECase(NT_OPENBSD_PROCINFO);
+ ECase(NT_OPENBSD_AUXV);
+ ECase(NT_OPENBSD_REGS);
+ ECase(NT_OPENBSD_FPREGS);
+ ECase(NT_OPENBSD_XFPREGS);
+ ECase(NT_OPENBSD_WCOOKIE);
// AMD specific notes. (Code Object V2)
ECase(NT_AMD_HSA_CODE_OBJECT_VERSION);
ECase(NT_AMD_HSA_HSAIL);
@@ -655,6 +662,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
case ELF::EM_RISCV:
ECase(SHT_RISCV_ATTRIBUTES);
break;
+ case ELF::EM_MSP430:
+ ECase(SHT_MSP430_ATTRIBUTES);
+ break;
default:
// Nothing to do.
break;
@@ -887,6 +897,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
#undef PPC64_DYNAMIC_TAG
#define PPC64_DYNAMIC_TAG(name, value)
break;
+ case ELF::EM_RISCV:
+#undef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value)
+ break;
default:
#include "llvm/BinaryFormat/DynamicTags.def"
break;
@@ -1165,6 +1182,8 @@ struct NormalizedOther {
if (EMachine == ELF::EM_AARCH64)
Map["STO_AARCH64_VARIANT_PCS"] = ELF::STO_AARCH64_VARIANT_PCS;
+ if (EMachine == ELF::EM_RISCV)
+ Map["STO_RISCV_VARIANT_CC"] = ELF::STO_RISCV_VARIANT_CC;
return Map;
}
diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 46e4dd05a737..c653c29ec9a7 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -184,6 +184,30 @@ size_t writeLoadCommandData<MachO::rpath_command>(MachOYAML::LoadCommand &LC,
}
template <>
+size_t writeLoadCommandData<MachO::sub_framework_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::sub_umbrella_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::sub_client_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::sub_library_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
size_t writeLoadCommandData<MachO::build_version_command>(
MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
size_t BytesWritten = 0;
@@ -264,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) {
}
Error MachOWriter::writeSectionData(raw_ostream &OS) {
+ uint64_t LinkEditOff = 0;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SEGMENT:
@@ -273,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
if (0 ==
strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
FoundLinkEditSeg = true;
+ LinkEditOff = segOff;
+ if (Obj.RawLinkEditSegment)
+ continue;
writeLinkEditData(OS);
}
for (auto &Sec : LC.Sections) {
@@ -320,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
}
}
+ if (Obj.RawLinkEditSegment) {
+ ZeroToOffset(OS, LinkEditOff);
+ if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff)
+ return createStringError(errc::invalid_argument,
+ "section offsets don't line up");
+ Obj.RawLinkEditSegment->writeAsBinary(OS);
+ }
return Error::success();
}
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index dce82ab1cada..c9562bd72258 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -110,6 +110,9 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
Object.Header.magic == MachO::MH_CIGAM_64;
IO.mapOptional("LoadCommands", Object.LoadCommands);
+
+ if (Object.RawLinkEditSegment || !IO.outputting())
+ IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment);
if(!Object.LinkEdit.isEmpty() || !IO.outputting())
IO.mapOptional("LinkEditData", Object.LinkEdit);
@@ -234,6 +237,30 @@ void mapLoadCommandData<MachO::dylinker_command>(
}
template <>
+void mapLoadCommandData<MachO::sub_framework_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
+void mapLoadCommandData<MachO::sub_umbrella_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
+void mapLoadCommandData<MachO::sub_client_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
+void mapLoadCommandData<MachO::sub_library_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
void mapLoadCommandData<MachO::build_version_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("Tools", LoadCommand.Tools);
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 888ba115e2d9..80a8c56f6912 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -157,13 +157,24 @@ void WasmWriter::writeInitExpr(raw_ostream &OS,
void WasmWriter::writeSectionContent(raw_ostream &OS,
WasmYAML::DylinkSection &Section) {
writeStringRef(Section.Name, OS);
- encodeULEB128(Section.MemorySize, OS);
- encodeULEB128(Section.MemoryAlignment, OS);
- encodeULEB128(Section.TableSize, OS);
- encodeULEB128(Section.TableAlignment, OS);
- encodeULEB128(Section.Needed.size(), OS);
- for (StringRef Needed : Section.Needed)
- writeStringRef(Needed, OS);
+
+ writeUint8(OS, wasm::WASM_DYLINK_MEM_INFO);
+ SubSectionWriter SubSection(OS);
+ raw_ostream &SubOS = SubSection.getStream();
+ encodeULEB128(Section.MemorySize, SubOS);
+ encodeULEB128(Section.MemoryAlignment, SubOS);
+ encodeULEB128(Section.TableSize, SubOS);
+ encodeULEB128(Section.TableAlignment, SubOS);
+ SubSection.done();
+
+ if (Section.Needed.size()) {
+ writeUint8(OS, wasm::WASM_DYLINK_NEEDED);
+ raw_ostream &SubOS = SubSection.getStream();
+ encodeULEB128(Section.Needed.size(), SubOS);
+ for (StringRef Needed : Section.Needed)
+ writeStringRef(Needed, SubOS);
+ SubSection.done();
+ }
}
void WasmWriter::writeSectionContent(raw_ostream &OS,
@@ -386,8 +397,8 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
NumImportedGlobals++;
break;
case wasm::WASM_EXTERNAL_TAG:
- writeUint32(OS, Import.TagImport.Attribute);
- writeUint32(OS, Import.TagImport.SigIndex);
+ writeUint8(OS, 0); // Reserved 'attribute' field
+ encodeULEB128(Import.SigIndex, OS);
NumImportedTags++;
break;
case wasm::WASM_EXTERNAL_MEMORY:
@@ -451,16 +462,10 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
void WasmWriter::writeSectionContent(raw_ostream &OS,
WasmYAML::TagSection &Section) {
- encodeULEB128(Section.Tags.size(), OS);
- uint32_t ExpectedIndex = NumImportedTags;
- for (auto &Tag : Section.Tags) {
- if (Tag.Index != ExpectedIndex) {
- reportError("unexpected tag index: " + Twine(Tag.Index));
- return;
- }
- ++ExpectedIndex;
- encodeULEB128(Tag.Attribute, OS);
- encodeULEB128(Tag.SigIndex, OS);
+ encodeULEB128(Section.TagTypes.size(), OS);
+ for (uint32_t TagType : Section.TagTypes) {
+ writeUint8(OS, 0); // Reserved 'attribute' field
+ encodeULEB128(TagType, OS);
}
}
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 752654ddbbaf..3f0172ebf361 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -55,6 +55,8 @@ static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
IO.mapRequired("TableSize", Section.TableSize);
IO.mapRequired("TableAlignment", Section.TableAlignment);
IO.mapRequired("Needed", Section.Needed);
+ IO.mapOptional("ImportInfo", Section.ImportInfo);
+ IO.mapOptional("ExportInfo", Section.ExportInfo);
}
static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
@@ -122,7 +124,7 @@ static void sectionMapping(IO &IO, WasmYAML::MemorySection &Section) {
static void sectionMapping(IO &IO, WasmYAML::TagSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Tags", Section.Tags);
+ IO.mapOptional("TagTypes", Section.TagTypes);
}
static void sectionMapping(IO &IO, WasmYAML::GlobalSection &Section) {
@@ -177,7 +179,7 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
} else {
IO.mapRequired("Name", SectionName);
}
- if (SectionName == "dylink") {
+ if (SectionName == "dylink" || SectionName == "dylink.0") {
if (!IO.outputting())
Section.reset(new WasmYAML::DylinkSection());
sectionMapping(IO, *cast<WasmYAML::DylinkSection>(Section.get()));
@@ -391,14 +393,12 @@ void MappingTraits<WasmYAML::Import>::mapping(IO &IO,
IO.mapRequired("Module", Import.Module);
IO.mapRequired("Field", Import.Field);
IO.mapRequired("Kind", Import.Kind);
- if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
+ if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION ||
+ Import.Kind == wasm::WASM_EXTERNAL_TAG) {
IO.mapRequired("SigIndex", Import.SigIndex);
} else if (Import.Kind == wasm::WASM_EXTERNAL_GLOBAL) {
IO.mapRequired("GlobalType", Import.GlobalImport.Type);
IO.mapRequired("GlobalMutable", Import.GlobalImport.Mutable);
- } else if (Import.Kind == wasm::WASM_EXTERNAL_TAG) {
- IO.mapRequired("TagAttribute", Import.TagImport.Attribute);
- IO.mapRequired("TagSigIndex", Import.TagImport.SigIndex);
} else if (Import.Kind == wasm::WASM_EXTERNAL_TABLE) {
IO.mapRequired("Table", Import.TableImport);
} else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY) {
@@ -525,10 +525,17 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
}
}
-void MappingTraits<WasmYAML::Tag>::mapping(IO &IO, WasmYAML::Tag &Tag) {
- IO.mapRequired("Index", Tag.Index);
- IO.mapRequired("Attribute", Tag.Attribute);
- IO.mapRequired("SigIndex", Tag.SigIndex);
+void MappingTraits<WasmYAML::DylinkImportInfo>::mapping(
+ IO &IO, WasmYAML::DylinkImportInfo &Info) {
+ IO.mapRequired("Module", Info.Module);
+ IO.mapRequired("Field", Info.Field);
+ IO.mapRequired("Flags", Info.Flags);
+}
+
+void MappingTraits<WasmYAML::DylinkExportInfo>::mapping(
+ IO &IO, WasmYAML::DylinkExportInfo &Info) {
+ IO.mapRequired("Name", Info.Name);
+ IO.mapRequired("Flags", Info.Flags);
}
void ScalarBitSetTraits<WasmYAML::LimitFlags>::bitset(
@@ -561,6 +568,7 @@ void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
BCaseMask(EXPORTED, EXPORTED);
BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME);
BCaseMask(NO_STRIP, NO_STRIP);
+ BCaseMask(TLS, TLS);
#undef BCaseMask
}
diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index 14fea5437a32..85d1f82bfafc 100644
--- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -18,8 +18,9 @@
#include "llvm/ObjectYAML/ObjectYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -33,7 +34,7 @@ class XCOFFWriter {
public:
XCOFFWriter(XCOFFYAML::Object &Obj, raw_ostream &OS, yaml::ErrorHandler EH)
: Obj(Obj), W(OS, support::big), ErrHandler(EH),
- Strings(StringTableBuilder::XCOFF) {
+ StrTblBuilder(StringTableBuilder::XCOFF) {
Is64Bit = Obj.Header.Magic == (llvm::yaml::Hex16)XCOFF::XCOFF64;
}
bool writeXCOFF();
@@ -41,20 +42,24 @@ public:
private:
bool nameShouldBeInStringTable(StringRef SymbolName);
bool initFileHeader(uint64_t CurrentOffset);
+ void initAuxFileHeader();
bool initSectionHeader(uint64_t &CurrentOffset);
bool initRelocations(uint64_t &CurrentOffset);
+ bool initStringTable();
bool assignAddressesAndIndices();
void writeFileHeader();
+ void writeAuxFileHeader();
void writeSectionHeader();
bool writeSectionData();
bool writeRelocations();
bool writeSymbols();
+ void writeStringTable();
XCOFFYAML::Object &Obj;
bool Is64Bit = false;
support::endian::Writer W;
yaml::ErrorHandler ErrHandler;
- StringTableBuilder Strings;
+ StringTableBuilder StrTblBuilder;
uint64_t StartOffset;
// Map the section name to its corrresponding section index.
DenseMap<StringRef, int16_t> SectionIndexMap = {
@@ -62,6 +67,7 @@ private:
{StringRef("N_ABS"), XCOFF::N_ABS},
{StringRef("N_UNDEF"), XCOFF::N_UNDEF}};
XCOFFYAML::FileHeader InitFileHdr = Obj.Header;
+ XCOFFYAML::AuxiliaryHeader InitAuxFileHdr;
std::vector<XCOFFYAML::Section> InitSections = Obj.Sections;
};
@@ -75,7 +81,8 @@ static void writeName(StringRef StrName, support::endian::Writer W) {
}
bool XCOFFWriter::nameShouldBeInStringTable(StringRef SymbolName) {
- return SymbolName.size() > XCOFF::NameSize;
+ // For XCOFF64: The symbol name is always in the string table.
+ return (SymbolName.size() > XCOFF::NameSize) || Is64Bit;
}
bool XCOFFWriter::initRelocations(uint64_t &CurrentOffset) {
@@ -83,8 +90,9 @@ bool XCOFFWriter::initRelocations(uint64_t &CurrentOffset) {
if (!InitSections[I].Relocations.empty()) {
InitSections[I].NumberOfRelocations = InitSections[I].Relocations.size();
InitSections[I].FileOffsetToRelocations = CurrentOffset;
- CurrentOffset += InitSections[I].NumberOfRelocations *
- XCOFF::RelocationSerializationSize32;
+ uint64_t RelSize = Is64Bit ? XCOFF::RelocationSerializationSize64
+ : XCOFF::RelocationSerializationSize32;
+ CurrentOffset += InitSections[I].NumberOfRelocations * RelSize;
if (CurrentOffset > MaxRawDataSize) {
ErrHandler("maximum object size of" + Twine(MaxRawDataSize) +
"exceeded when writing relocation data");
@@ -138,20 +146,79 @@ bool XCOFFWriter::initSectionHeader(uint64_t &CurrentOffset) {
return initRelocations(CurrentOffset);
}
+bool XCOFFWriter::initStringTable() {
+ if (Obj.StrTbl.RawContent) {
+ size_t RawSize = Obj.StrTbl.RawContent->binary_size();
+ if (Obj.StrTbl.Strings || Obj.StrTbl.Length) {
+ ErrHandler(
+ "can't specify Strings or Length when RawContent is specified");
+ return false;
+ }
+ if (Obj.StrTbl.ContentSize && *Obj.StrTbl.ContentSize < RawSize) {
+ ErrHandler("specified ContentSize (" + Twine(*Obj.StrTbl.ContentSize) +
+ ") is less than the RawContent data size (" + Twine(RawSize) +
+ ")");
+ return false;
+ }
+ return true;
+ }
+ if (Obj.StrTbl.ContentSize && *Obj.StrTbl.ContentSize <= 3) {
+ ErrHandler("ContentSize shouldn't be less than 4 without RawContent");
+ return false;
+ }
+
+ // Build the string table.
+ StrTblBuilder.clear();
+
+ if (Obj.StrTbl.Strings) {
+ // All specified strings should be added to the string table.
+ for (StringRef StringEnt : *Obj.StrTbl.Strings)
+ StrTblBuilder.add(StringEnt);
+
+ size_t StrTblIdx = 0;
+ size_t NumOfStrings = Obj.StrTbl.Strings->size();
+ for (XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
+ if (nameShouldBeInStringTable(YamlSym.SymbolName)) {
+ if (StrTblIdx < NumOfStrings) {
+ // Overwrite the symbol name with the specified string.
+ YamlSym.SymbolName = (*Obj.StrTbl.Strings)[StrTblIdx];
+ ++StrTblIdx;
+ } else
+ // Names that are not overwritten are still stored in the string
+ // table.
+ StrTblBuilder.add(YamlSym.SymbolName);
+ }
+ }
+ } else {
+ for (XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
+ if (nameShouldBeInStringTable(YamlSym.SymbolName))
+ StrTblBuilder.add(YamlSym.SymbolName);
+ }
+ }
+
+ StrTblBuilder.finalize();
+
+ size_t StrTblSize = StrTblBuilder.getSize();
+ if (Obj.StrTbl.ContentSize && *Obj.StrTbl.ContentSize < StrTblSize) {
+ ErrHandler("specified ContentSize (" + Twine(*Obj.StrTbl.ContentSize) +
+ ") is less than the size of the data that would otherwise be "
+ "written (" +
+ Twine(StrTblSize) + ")");
+ return false;
+ }
+
+ return true;
+}
+
bool XCOFFWriter::initFileHeader(uint64_t CurrentOffset) {
// The default format of the object file is XCOFF32.
InitFileHdr.Magic = XCOFF::XCOFF32;
InitFileHdr.NumberOfSections = Obj.Sections.size();
InitFileHdr.NumberOfSymTableEntries = Obj.Symbols.size();
- for (const XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
+ for (const XCOFFYAML::Symbol &YamlSym : Obj.Symbols)
// Add the number of auxiliary symbols to the total number.
InitFileHdr.NumberOfSymTableEntries += YamlSym.NumberOfAuxEntries;
- if (nameShouldBeInStringTable(YamlSym.SymbolName))
- Strings.add(YamlSym.SymbolName);
- }
- // Finalize the string table.
- Strings.finalize();
// Calculate SymbolTableOffset for the file header.
if (InitFileHdr.NumberOfSymTableEntries) {
@@ -168,17 +235,87 @@ bool XCOFFWriter::initFileHeader(uint64_t CurrentOffset) {
return true;
}
+void XCOFFWriter::initAuxFileHeader() {
+ InitAuxFileHdr = *Obj.AuxHeader;
+ // In general, an object file might contain multiple sections of a given type,
+ // but in a loadable module, there must be exactly one .text, .data, .bss, and
+ // .loader section. A loadable object might also have one .tdata section and
+ // one .tbss section.
+ // Set these section-related values if not set explicitly. We assume that the
+ // input YAML matches the format of the loadable object, but if multiple input
+ // sections still have the same type, the first section with that type
+ // prevails.
+ for (uint16_t I = 0, E = InitSections.size(); I < E; ++I) {
+ switch (InitSections[I].Flags) {
+ case XCOFF::STYP_TEXT:
+ if (!InitAuxFileHdr.TextSize)
+ InitAuxFileHdr.TextSize = InitSections[I].Size;
+ if (!InitAuxFileHdr.TextStartAddr)
+ InitAuxFileHdr.TextStartAddr = InitSections[I].Address;
+ if (!InitAuxFileHdr.SecNumOfText)
+ InitAuxFileHdr.SecNumOfText = I + 1;
+ break;
+ case XCOFF::STYP_DATA:
+ if (!InitAuxFileHdr.InitDataSize)
+ InitAuxFileHdr.InitDataSize = InitSections[I].Size;
+ if (!InitAuxFileHdr.DataStartAddr)
+ InitAuxFileHdr.DataStartAddr = InitSections[I].Address;
+ if (!InitAuxFileHdr.SecNumOfData)
+ InitAuxFileHdr.SecNumOfData = I + 1;
+ break;
+ case XCOFF::STYP_BSS:
+ if (!InitAuxFileHdr.BssDataSize)
+ InitAuxFileHdr.BssDataSize = InitSections[I].Size;
+ if (!InitAuxFileHdr.SecNumOfBSS)
+ InitAuxFileHdr.SecNumOfBSS = I + 1;
+ break;
+ case XCOFF::STYP_TDATA:
+ if (!InitAuxFileHdr.SecNumOfTData)
+ InitAuxFileHdr.SecNumOfTData = I + 1;
+ break;
+ case XCOFF::STYP_TBSS:
+ if (!InitAuxFileHdr.SecNumOfTBSS)
+ InitAuxFileHdr.SecNumOfTBSS = I + 1;
+ break;
+ case XCOFF::STYP_LOADER:
+ if (!InitAuxFileHdr.SecNumOfLoader)
+ InitAuxFileHdr.SecNumOfLoader = I + 1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
bool XCOFFWriter::assignAddressesAndIndices() {
- Strings.clear();
+ uint64_t FileHdrSize =
+ Is64Bit ? XCOFF::FileHeaderSize64 : XCOFF::FileHeaderSize32;
+ uint64_t AuxFileHdrSize = 0;
+ if (Obj.AuxHeader)
+ AuxFileHdrSize = Obj.Header.AuxHeaderSize
+ ? Obj.Header.AuxHeaderSize
+ : (Is64Bit ? XCOFF::AuxFileHeaderSize64
+ : XCOFF::AuxFileHeaderSize32);
+ uint64_t SecHdrSize =
+ Is64Bit ? XCOFF::SectionHeaderSize64 : XCOFF::SectionHeaderSize32;
uint64_t CurrentOffset =
- XCOFF::FileHeaderSize32 /* TODO: + auxiliaryHeaderSize() */ +
- InitSections.size() * XCOFF::SectionHeaderSize32;
+ FileHdrSize + AuxFileHdrSize + InitSections.size() * SecHdrSize;
// Calculate section header info.
if (!initSectionHeader(CurrentOffset))
return false;
+ InitFileHdr.AuxHeaderSize = AuxFileHdrSize;
+
// Calculate file header info.
- return initFileHeader(CurrentOffset);
+ if (!initFileHeader(CurrentOffset))
+ return false;
+
+ // Initialize the auxiliary file header.
+ if (Obj.AuxHeader)
+ initAuxFileHeader();
+
+ // Initialize the string table.
+ return initStringTable();
}
void XCOFFWriter::writeFileHeader() {
@@ -186,14 +323,86 @@ void XCOFFWriter::writeFileHeader() {
W.write<uint16_t>(Obj.Header.NumberOfSections ? Obj.Header.NumberOfSections
: InitFileHdr.NumberOfSections);
W.write<int32_t>(Obj.Header.TimeStamp);
- W.write<uint32_t>(Obj.Header.SymbolTableOffset
- ? Obj.Header.SymbolTableOffset
- : InitFileHdr.SymbolTableOffset);
- W.write<int32_t>(Obj.Header.NumberOfSymTableEntries
- ? Obj.Header.NumberOfSymTableEntries
- : InitFileHdr.NumberOfSymTableEntries);
- W.write<uint16_t>(Obj.Header.AuxHeaderSize);
- W.write<uint16_t>(Obj.Header.Flags);
+ if (Is64Bit) {
+ W.write<uint64_t>(Obj.Header.SymbolTableOffset
+ ? Obj.Header.SymbolTableOffset
+ : InitFileHdr.SymbolTableOffset);
+ W.write<uint16_t>(InitFileHdr.AuxHeaderSize);
+ W.write<uint16_t>(Obj.Header.Flags);
+ W.write<int32_t>(Obj.Header.NumberOfSymTableEntries
+ ? Obj.Header.NumberOfSymTableEntries
+ : InitFileHdr.NumberOfSymTableEntries);
+ } else {
+ W.write<uint32_t>(Obj.Header.SymbolTableOffset
+ ? Obj.Header.SymbolTableOffset
+ : InitFileHdr.SymbolTableOffset);
+ W.write<int32_t>(Obj.Header.NumberOfSymTableEntries
+ ? Obj.Header.NumberOfSymTableEntries
+ : InitFileHdr.NumberOfSymTableEntries);
+ W.write<uint16_t>(InitFileHdr.AuxHeaderSize);
+ W.write<uint16_t>(Obj.Header.Flags);
+ }
+}
+
+void XCOFFWriter::writeAuxFileHeader() {
+ W.write<uint16_t>(InitAuxFileHdr.Magic.getValueOr(yaml::Hex16(1)));
+ W.write<uint16_t>(InitAuxFileHdr.Version.getValueOr(yaml::Hex16(1)));
+ if (Is64Bit) {
+ W.OS.write_zeros(4); // Reserved for debugger.
+ W.write<uint64_t>(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0)));
+ } else {
+ W.write<uint32_t>(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0)));
+ }
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfEntryPoint.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfText.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfData.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTOC.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfLoader.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfBSS.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfText.getValueOr(yaml::Hex16(0)));
+ W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfData.getValueOr(yaml::Hex16(0)));
+ W.write<uint16_t>(InitAuxFileHdr.ModuleType.getValueOr(yaml::Hex16(0)));
+ W.write<uint8_t>(InitAuxFileHdr.CpuFlag.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(0); // Reserved for CPU type.
+ if (Is64Bit) {
+ W.write<uint8_t>(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(
+ InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0x80)));
+ W.write<uint64_t>(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0)));
+ } else {
+ W.write<uint32_t>(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0)));
+ W.OS.write_zeros(4); // Reserved for debugger.
+ W.write<uint8_t>(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(
+ InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0)));
+ }
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTData.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTBSS.getValueOr(0));
+ if (Is64Bit) {
+ W.write<uint16_t>(InitAuxFileHdr.Flag.getValueOr(yaml::Hex16(XCOFF::SHR_SYMTAB)));
+ if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize64)
+ W.OS.write_zeros(InitFileHdr.AuxHeaderSize - XCOFF::AuxFileHeaderSize64);
+ } else if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize32) {
+ W.OS.write_zeros(InitFileHdr.AuxHeaderSize - XCOFF::AuxFileHeaderSize32);
+ }
}
void XCOFFWriter::writeSectionHeader() {
@@ -202,22 +411,40 @@ void XCOFFWriter::writeSectionHeader() {
XCOFFYAML::Section DerivedSec = InitSections[I];
writeName(YamlSec.SectionName, W);
// Virtual address is the same as physical address.
- uint32_t SectionAddress =
+ uint64_t SectionAddress =
YamlSec.Address ? YamlSec.Address : DerivedSec.Address;
- W.write<uint32_t>(SectionAddress); // Physical address
- W.write<uint32_t>(SectionAddress); // Virtual address
- W.write<uint32_t>(YamlSec.Size ? YamlSec.Size : DerivedSec.Size);
- W.write<uint32_t>(YamlSec.FileOffsetToData ? YamlSec.FileOffsetToData
- : DerivedSec.FileOffsetToData);
- W.write<uint32_t>(YamlSec.FileOffsetToRelocations
- ? YamlSec.FileOffsetToRelocations
- : DerivedSec.FileOffsetToRelocations);
- W.write<uint32_t>(YamlSec.FileOffsetToLineNumbers);
- W.write<uint16_t>(YamlSec.NumberOfRelocations
- ? YamlSec.NumberOfRelocations
- : DerivedSec.NumberOfRelocations);
- W.write<uint16_t>(YamlSec.NumberOfLineNumbers);
- W.write<int32_t>(YamlSec.Flags);
+ if (Is64Bit) {
+ W.write<uint64_t>(SectionAddress); // Physical address
+ W.write<uint64_t>(SectionAddress); // Virtual address
+ W.write<uint64_t>(YamlSec.Size ? YamlSec.Size : DerivedSec.Size);
+ W.write<uint64_t>(YamlSec.FileOffsetToData ? YamlSec.FileOffsetToData
+ : DerivedSec.FileOffsetToData);
+ W.write<uint64_t>(YamlSec.FileOffsetToRelocations
+ ? YamlSec.FileOffsetToRelocations
+ : DerivedSec.FileOffsetToRelocations);
+ W.write<uint64_t>(YamlSec.FileOffsetToLineNumbers);
+ W.write<uint32_t>(YamlSec.NumberOfRelocations
+ ? YamlSec.NumberOfRelocations
+ : DerivedSec.NumberOfRelocations);
+ W.write<uint32_t>(YamlSec.NumberOfLineNumbers);
+ W.write<int32_t>(YamlSec.Flags);
+ W.OS.write_zeros(4);
+ } else {
+ W.write<uint32_t>(SectionAddress); // Physical address
+ W.write<uint32_t>(SectionAddress); // Virtual address
+ W.write<uint32_t>(YamlSec.Size ? YamlSec.Size : DerivedSec.Size);
+ W.write<uint32_t>(YamlSec.FileOffsetToData ? YamlSec.FileOffsetToData
+ : DerivedSec.FileOffsetToData);
+ W.write<uint32_t>(YamlSec.FileOffsetToRelocations
+ ? YamlSec.FileOffsetToRelocations
+ : DerivedSec.FileOffsetToRelocations);
+ W.write<uint32_t>(YamlSec.FileOffsetToLineNumbers);
+ W.write<uint16_t>(YamlSec.NumberOfRelocations
+ ? YamlSec.NumberOfRelocations
+ : DerivedSec.NumberOfRelocations);
+ W.write<uint16_t>(YamlSec.NumberOfLineNumbers);
+ W.write<int32_t>(YamlSec.Flags);
+ }
}
}
@@ -232,8 +459,7 @@ bool XCOFFWriter::writeSectionData() {
ErrHandler("redundant data was written before section data");
return false;
}
- if (PaddingSize > 0)
- W.OS.write_zeros(PaddingSize);
+ W.OS.write_zeros(PaddingSize);
YamlSec.SectionData.writeAsBinary(W.OS);
}
}
@@ -250,10 +476,12 @@ bool XCOFFWriter::writeRelocations() {
ErrHandler("redundant data was written before relocations");
return false;
}
- if (PaddingSize > 0)
- W.OS.write_zeros(PaddingSize);
+ W.OS.write_zeros(PaddingSize);
for (const XCOFFYAML::Relocation &YamlRel : YamlSec.Relocations) {
- W.write<uint32_t>(YamlRel.VirtualAddress);
+ if (Is64Bit)
+ W.write<uint64_t>(YamlRel.VirtualAddress);
+ else
+ W.write<uint32_t>(YamlRel.VirtualAddress);
W.write<uint32_t>(YamlRel.SymbolIndex);
W.write<uint8_t>(YamlRel.Info);
W.write<uint8_t>(YamlRel.Type);
@@ -270,20 +498,39 @@ bool XCOFFWriter::writeSymbols() {
ErrHandler("redundant data was written before symbols");
return false;
}
- if (PaddingSize > 0)
- W.OS.write_zeros(PaddingSize);
+ W.OS.write_zeros(PaddingSize);
for (const XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
- if (nameShouldBeInStringTable(YamlSym.SymbolName)) {
- // For XCOFF32: A value of 0 indicates that the symbol name is in the
- // string table.
- W.write<int32_t>(0);
- W.write<uint32_t>(Strings.getOffset(YamlSym.SymbolName));
+ if (Is64Bit) {
+ W.write<uint64_t>(YamlSym.Value);
+ W.write<uint32_t>(StrTblBuilder.getOffset(YamlSym.SymbolName));
+ } else {
+ if (nameShouldBeInStringTable(YamlSym.SymbolName)) {
+ // For XCOFF32: A value of 0 indicates that the symbol name is in the
+ // string table.
+ W.write<int32_t>(0);
+ W.write<uint32_t>(StrTblBuilder.getOffset(YamlSym.SymbolName));
+ } else {
+ writeName(YamlSym.SymbolName, W);
+ }
+ W.write<uint32_t>(YamlSym.Value);
+ }
+ if (YamlSym.SectionName) {
+ if (!SectionIndexMap.count(*YamlSym.SectionName)) {
+ ErrHandler("the SectionName " + *YamlSym.SectionName +
+ " specified in the symbol does not exist");
+ return false;
+ }
+ if (YamlSym.SectionIndex &&
+ SectionIndexMap[*YamlSym.SectionName] != *YamlSym.SectionIndex) {
+ ErrHandler("the SectionName " + *YamlSym.SectionName +
+ " and the SectionIndex (" + Twine(*YamlSym.SectionIndex) +
+ ") refer to different sections");
+ return false;
+ }
+ W.write<int16_t>(SectionIndexMap[*YamlSym.SectionName]);
} else {
- writeName(YamlSym.SymbolName, W);
+ W.write<int16_t>(YamlSym.SectionIndex ? *YamlSym.SectionIndex : 0);
}
- W.write<uint32_t>(YamlSym.Value);
- W.write<int16_t>(
- YamlSym.SectionName.size() ? SectionIndexMap[YamlSym.SectionName] : 0);
W.write<uint16_t>(YamlSym.Type);
W.write<uint8_t>(YamlSym.StorageClass);
W.write<uint8_t>(YamlSym.NumberOfAuxEntries);
@@ -295,21 +542,61 @@ bool XCOFFWriter::writeSymbols() {
// length of each auxiliary entry is the same as a symbol table entry (18
// bytes). The format and quantity of auxiliary entries depend on the
// storage class (n_sclass) and type (n_type) of the symbol table entry.
- W.OS.write_zeros(18);
+ W.OS.write_zeros(XCOFF::SymbolTableEntrySize);
}
}
return true;
}
-bool XCOFFWriter::writeXCOFF() {
- if (Is64Bit) {
- ErrHandler("only XCOFF32 is currently supported");
- return false;
+void XCOFFWriter::writeStringTable() {
+ if (Obj.StrTbl.RawContent) {
+ Obj.StrTbl.RawContent->writeAsBinary(W.OS);
+ if (Obj.StrTbl.ContentSize) {
+ assert(*Obj.StrTbl.ContentSize >= Obj.StrTbl.RawContent->binary_size() &&
+ "Specified ContentSize is less than the RawContent size.");
+ W.OS.write_zeros(*Obj.StrTbl.ContentSize -
+ Obj.StrTbl.RawContent->binary_size());
+ }
+ return;
+ }
+
+ size_t StrTblBuilderSize = StrTblBuilder.getSize();
+ // If neither Length nor ContentSize is specified, write the StrTblBuilder
+ // directly, which contains the auto-generated Length value.
+ if (!Obj.StrTbl.Length && !Obj.StrTbl.ContentSize) {
+ if (StrTblBuilderSize <= 4)
+ return;
+ StrTblBuilder.write(W.OS);
+ return;
+ }
+
+ // Serialize the string table's content to a temporary buffer.
+ std::unique_ptr<WritableMemoryBuffer> Buf =
+ WritableMemoryBuffer::getNewMemBuffer(StrTblBuilderSize);
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+ StrTblBuilder.write(Ptr);
+ // Replace the first 4 bytes, which contain the auto-generated Length value,
+ // with the specified value.
+ memset(Ptr, 0, 4);
+ support::endian::write32be(Ptr, Obj.StrTbl.Length ? *Obj.StrTbl.Length
+ : *Obj.StrTbl.ContentSize);
+ // Copy the buffer content to the actual output stream.
+ W.OS.write(Buf->getBufferStart(), Buf->getBufferSize());
+ // Add zeros as padding after strings.
+ if (Obj.StrTbl.ContentSize) {
+ assert(*Obj.StrTbl.ContentSize >= StrTblBuilderSize &&
+ "Specified ContentSize is less than the StringTableBuilder size.");
+ W.OS.write_zeros(*Obj.StrTbl.ContentSize - StrTblBuilderSize);
}
+}
+
+bool XCOFFWriter::writeXCOFF() {
if (!assignAddressesAndIndices())
return false;
StartOffset = W.OS.tell();
writeFileHeader();
+ if (Obj.AuxHeader)
+ writeAuxFileHeader();
if (!Obj.Sections.empty()) {
writeSectionHeader();
if (!writeSectionData())
@@ -319,9 +606,7 @@ bool XCOFFWriter::writeXCOFF() {
}
if (!Obj.Symbols.empty() && !writeSymbols())
return false;
- // Write the string table.
- if (Strings.getSize() > 4)
- Strings.write(W.OS);
+ writeStringTable();
return true;
}
diff --git a/llvm/lib/ObjectYAML/XCOFFYAML.cpp b/llvm/lib/ObjectYAML/XCOFFYAML.cpp
index 73d188e274b1..221cf3b064c0 100644
--- a/llvm/lib/ObjectYAML/XCOFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFYAML.cpp
@@ -118,6 +118,37 @@ void MappingTraits<XCOFFYAML::FileHeader>::mapping(
IO.mapOptional("Flags", FileHdr.Flags);
}
+void MappingTraits<XCOFFYAML::AuxiliaryHeader>::mapping(
+ IO &IO, XCOFFYAML::AuxiliaryHeader &AuxHdr) {
+ IO.mapOptional("Magic", AuxHdr.Magic);
+ IO.mapOptional("Version", AuxHdr.Version);
+ IO.mapOptional("TextStartAddr", AuxHdr.TextStartAddr);
+ IO.mapOptional("DataStartAddr", AuxHdr.DataStartAddr);
+ IO.mapOptional("TOCAnchorAddr", AuxHdr.TOCAnchorAddr);
+ IO.mapOptional("TextSectionSize", AuxHdr.TextSize);
+ IO.mapOptional("DataSectionSize", AuxHdr.InitDataSize);
+ IO.mapOptional("BssSectionSize", AuxHdr.BssDataSize);
+ IO.mapOptional("SecNumOfEntryPoint", AuxHdr.SecNumOfEntryPoint);
+ IO.mapOptional("SecNumOfText", AuxHdr.SecNumOfText);
+ IO.mapOptional("SecNumOfData", AuxHdr.SecNumOfData);
+ IO.mapOptional("SecNumOfTOC", AuxHdr.SecNumOfTOC);
+ IO.mapOptional("SecNumOfLoader", AuxHdr.SecNumOfLoader);
+ IO.mapOptional("SecNumOfBSS", AuxHdr.SecNumOfBSS);
+ IO.mapOptional("MaxAlignOfText", AuxHdr.MaxAlignOfText);
+ IO.mapOptional("MaxAlignOfData", AuxHdr.MaxAlignOfData);
+ IO.mapOptional("ModuleType", AuxHdr.CpuFlag);
+ IO.mapOptional("TextPageSize", AuxHdr.TextPageSize);
+ IO.mapOptional("DataPageSize", AuxHdr.DataPageSize);
+ IO.mapOptional("StackPageSize", AuxHdr.StackPageSize);
+ IO.mapOptional("FlagAndTDataAlignment", AuxHdr.FlagAndTDataAlignment);
+ IO.mapOptional("EntryPointAddr", AuxHdr.EntryPointAddr);
+ IO.mapOptional("MaxStackSize", AuxHdr.MaxStackSize);
+ IO.mapOptional("MaxDataSize", AuxHdr.MaxDataSize);
+ IO.mapOptional("SecNumOfTData", AuxHdr.SecNumOfTData);
+ IO.mapOptional("SecNumOfTBSS", AuxHdr.SecNumOfTBSS);
+ IO.mapOptional("Flag", AuxHdr.Flag);
+}
+
void MappingTraits<XCOFFYAML::Relocation>::mapping(IO &IO,
XCOFFYAML::Relocation &R) {
IO.mapOptional("Address", R.VirtualAddress);
@@ -143,19 +174,29 @@ void MappingTraits<XCOFFYAML::Section>::mapping(IO &IO,
}
void MappingTraits<XCOFFYAML::Symbol>::mapping(IO &IO, XCOFFYAML::Symbol &S) {
- IO.mapRequired("Name", S.SymbolName);
+ IO.mapOptional("Name", S.SymbolName);
IO.mapOptional("Value", S.Value);
IO.mapOptional("Section", S.SectionName);
+ IO.mapOptional("SectionIndex", S.SectionIndex);
IO.mapOptional("Type", S.Type);
IO.mapOptional("StorageClass", S.StorageClass);
IO.mapOptional("NumberOfAuxEntries", S.NumberOfAuxEntries);
}
+void MappingTraits<XCOFFYAML::StringTable>::mapping(IO &IO, XCOFFYAML::StringTable &Str) {
+ IO.mapOptional("ContentSize", Str.ContentSize);
+ IO.mapOptional("Length", Str.Length);
+ IO.mapOptional("Strings", Str.Strings);
+ IO.mapOptional("RawContent", Str.RawContent);
+}
+
void MappingTraits<XCOFFYAML::Object>::mapping(IO &IO, XCOFFYAML::Object &Obj) {
IO.mapTag("!XCOFF", true);
IO.mapRequired("FileHeader", Obj.Header);
+ IO.mapOptional("AuxiliaryHeader", Obj.AuxHeader);
IO.mapOptional("Sections", Obj.Sections);
IO.mapOptional("Symbols", Obj.Symbols);
+ IO.mapOptional("StringTable", Obj.StrTbl);
}
} // namespace yaml
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index f5bf166e9e65..37c2fcbab181 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -104,11 +104,11 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
unsigned Kind = getInfo(i + 1).Kind;
if (Kind == Option::InputClass) {
- assert(!TheInputOptionID && "Cannot have multiple input options!");
- TheInputOptionID = getInfo(i + 1).ID;
+ assert(!InputOptionID && "Cannot have multiple input options!");
+ InputOptionID = getInfo(i + 1).ID;
} else if (Kind == Option::UnknownClass) {
- assert(!TheUnknownOptionID && "Cannot have multiple unknown options!");
- TheUnknownOptionID = getInfo(i + 1).ID;
+ assert(!UnknownOptionID && "Cannot have multiple unknown options!");
+ UnknownOptionID = getInfo(i + 1).ID;
} else if (Kind != Option::GroupClass) {
FirstSearchableIndex = i;
break;
@@ -337,13 +337,14 @@ bool OptTable::addValues(const char *Option, const char *Values) {
// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will
// be updated to "-bc". This overload does not support
// FlagsToInclude/FlagsToExclude or case insensitive options.
-Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
+std::unique_ptr<Arg> OptTable::parseOneArgGrouped(InputArgList &Args,
+ unsigned &Index) const {
// Anything that doesn't start with PrefixesUnion is an input, as is '-'
// itself.
const char *CStr = Args.getArgString(Index);
StringRef Str(CStr);
if (isInput(PrefixesUnion, Str))
- return new Arg(getOption(TheInputOptionID), Str, Index++, CStr);
+ return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, CStr);
const Info *End = OptionInfos.data() + OptionInfos.size();
StringRef Name = Str.ltrim(PrefixChars);
@@ -359,8 +360,9 @@ Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
continue;
Option Opt(Start, this);
- if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
- false, Index))
+ if (std::unique_ptr<Arg> A =
+ Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+ /*GroupedShortOption=*/false, Index))
return A;
// If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of
@@ -375,28 +377,39 @@ Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
}
if (Fallback) {
Option Opt(Fallback, this);
- if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) {
- if (Str.size() == 2)
- ++Index;
- else
- Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+ // Check that the last option isn't a flag wrongly given an argument.
+ if (Str[2] == '=')
+ return std::make_unique<Arg>(getOption(UnknownOptionID), Str, Index++,
+ CStr);
+
+ if (std::unique_ptr<Arg> A = Opt.accept(
+ Args, Str.substr(0, 2), /*GroupedShortOption=*/true, Index)) {
+ Args.replaceArgString(Index, Twine('-') + Str.substr(2));
return A;
}
}
- return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr);
+ // In the case of an incorrect short option extract the character and move to
+ // the next one.
+ if (Str[1] != '-') {
+ CStr = Args.MakeArgString(Str.substr(0, 2));
+ Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+ return std::make_unique<Arg>(getOption(UnknownOptionID), CStr, Index, CStr);
+ }
+
+ return std::make_unique<Arg>(getOption(UnknownOptionID), Str, Index++, CStr);
}
-Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
- unsigned FlagsToInclude,
- unsigned FlagsToExclude) const {
+std::unique_ptr<Arg> OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
+ unsigned FlagsToInclude,
+ unsigned FlagsToExclude) const {
unsigned Prev = Index;
const char *Str = Args.getArgString(Index);
// Anything that doesn't start with PrefixesUnion is an input, as is '-'
// itself.
if (isInput(PrefixesUnion, Str))
- return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+ return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, Str);
const Info *Start = OptionInfos.data() + FirstSearchableIndex;
const Info *End = OptionInfos.data() + OptionInfos.size();
@@ -430,8 +443,9 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
continue;
// See if this option matches.
- if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
- false, Index))
+ if (std::unique_ptr<Arg> A =
+ Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+ /*GroupedShortOption=*/false, Index))
return A;
// Otherwise, see if this argument was missing values.
@@ -442,9 +456,9 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
// If we failed to find an option and this arg started with /, then it's
// probably an input path.
if (Str[0] == '/')
- return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+ return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, Str);
- return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str);
+ return std::make_unique<Arg>(getOption(UnknownOptionID), Str, Index++, Str);
}
InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
@@ -472,7 +486,7 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
}
unsigned Prev = Index;
- Arg *A = GroupedShortOptions
+ std::unique_ptr<Arg> A = GroupedShortOptions
? parseOneArgGrouped(Args, Index)
: ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
assert((Index > Prev || GroupedShortOptions) &&
@@ -487,7 +501,7 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
break;
}
- Args.append(A);
+ Args.append(A.release());
}
return Args;
@@ -654,7 +668,7 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title,
HelpText = getOptionHelpText(Alias.getID());
}
- if (HelpText) {
+ if (HelpText && (strlen(HelpText) != 0)) {
const char *HelpGroup = getOptionHelpGroup(*this, Id);
const std::string &OptName = getOptionHelpName(*this, Id);
GroupedOptionHelp[HelpGroup].push_back({OptName, HelpText});
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 68d074b2702e..ebdba8949223 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -106,23 +106,24 @@ bool Option::matches(OptSpecifier Opt) const {
return false;
}
-Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
- unsigned &Index) const {
+std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
+ StringRef Spelling,
+ unsigned &Index) const {
size_t ArgSize = Spelling.size();
switch (getKind()) {
case FlagClass: {
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
- return new Arg(*this, Spelling, Index++);
+ return std::make_unique<Arg>(*this, Spelling, Index++);
}
case JoinedClass: {
const char *Value = Args.getArgString(Index) + ArgSize;
- return new Arg(*this, Spelling, Index++, Value);
+ return std::make_unique<Arg>(*this, Spelling, Index++, Value);
}
case CommaJoinedClass: {
// Always matches.
const char *Str = Args.getArgString(Index) + ArgSize;
- Arg *A = new Arg(*this, Spelling, Index++);
+ auto A = std::make_unique<Arg>(*this, Spelling, Index++);
// Parse out the comma separated values.
const char *Prev = Str;
@@ -158,7 +159,8 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
+ return std::make_unique<Arg>(*this, Spelling, Index - 2,
+ Args.getArgString(Index - 1));
case MultiArgClass: {
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
@@ -169,8 +171,8 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
if (Index > Args.getNumInputArgStrings())
return nullptr;
- Arg *A = new Arg(*this, Spelling, Index - 1 - getNumArgs(),
- Args.getArgString(Index - getNumArgs()));
+ auto A = std::make_unique<Arg>(*this, Spelling, Index - 1 - getNumArgs(),
+ Args.getArgString(Index - getNumArgs()));
for (unsigned i = 1; i != getNumArgs(); ++i)
A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
return A;
@@ -180,7 +182,7 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index))) {
const char *Value = Args.getArgString(Index) + ArgSize;
- return new Arg(*this, Spelling, Index++, Value);
+ return std::make_unique<Arg>(*this, Spelling, Index++, Value);
}
// Otherwise it must be separate.
@@ -189,7 +191,8 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
+ return std::make_unique<Arg>(*this, Spelling, Index - 2,
+ Args.getArgString(Index - 1));
}
case JoinedAndSeparateClass:
// Always matches.
@@ -198,22 +201,22 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(*this, Spelling, Index - 2,
- Args.getArgString(Index - 2) + ArgSize,
- Args.getArgString(Index - 1));
+ return std::make_unique<Arg>(*this, Spelling, Index - 2,
+ Args.getArgString(Index - 2) + ArgSize,
+ Args.getArgString(Index - 1));
case RemainingArgsClass: {
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
- Arg *A = new Arg(*this, Spelling, Index++);
+ auto A = std::make_unique<Arg>(*this, Spelling, Index++);
while (Index < Args.getNumInputArgStrings() &&
Args.getArgString(Index) != nullptr)
A->getValues().push_back(Args.getArgString(Index++));
return A;
}
case RemainingArgsJoinedClass: {
- Arg *A = new Arg(*this, Spelling, Index);
+ auto A = std::make_unique<Arg>(*this, Spelling, Index);
if (ArgSize != strlen(Args.getArgString(Index))) {
// An inexact match means there is a joined arg.
A->getValues().push_back(Args.getArgString(Index) + ArgSize);
@@ -230,17 +233,18 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
}
}
-Arg *Option::accept(const ArgList &Args, StringRef CurArg,
- bool GroupedShortOption, unsigned &Index) const {
- std::unique_ptr<Arg> A(GroupedShortOption && getKind() == FlagClass
- ? new Arg(*this, CurArg, Index)
+std::unique_ptr<Arg> Option::accept(const ArgList &Args, StringRef CurArg,
+ bool GroupedShortOption,
+ unsigned &Index) const {
+ auto A(GroupedShortOption && getKind() == FlagClass
+ ? std::make_unique<Arg>(*this, CurArg, Index)
: acceptInternal(Args, CurArg, Index));
if (!A)
return nullptr;
const Option &UnaliasedOption = getUnaliasedOption();
if (getID() == UnaliasedOption.getID())
- return A.release();
+ return A;
// "A" is an alias for a different flag. For most clients it's more convenient
// if this function returns unaliased Args, so create an unaliased arg for
@@ -259,7 +263,8 @@ Arg *Option::accept(const ArgList &Args, StringRef CurArg,
// Due to this, ArgList::getArgString(A->getIndex()) will return the spelling
// of the aliased arg always, while A->getSpelling() returns either the
// unaliased or the aliased arg, depending on which Arg object it's called on.
- Arg *UnaliasedA = new Arg(UnaliasedOption, UnaliasedSpelling, A->getIndex());
+ auto UnaliasedA =
+ std::make_unique<Arg>(UnaliasedOption, UnaliasedSpelling, A->getIndex());
Arg *RawA = A.get();
UnaliasedA->setAlias(std::move(A));
diff --git a/llvm/lib/Passes/OptimizationLevel.cpp b/llvm/lib/Passes/OptimizationLevel.cpp
new file mode 100644
index 000000000000..a1f8c1e14b1f
--- /dev/null
+++ b/llvm/lib/Passes/OptimizationLevel.cpp
@@ -0,0 +1,30 @@
+//===- OptimizationLevel.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/OptimizationLevel.h"
+
+using namespace llvm;
+
+const OptimizationLevel OptimizationLevel::O0 = {
+ /*SpeedLevel*/ 0,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O1 = {
+ /*SpeedLevel*/ 1,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O2 = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O3 = {
+ /*SpeedLevel*/ 3,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::Os = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 1};
+const OptimizationLevel OptimizationLevel::Oz = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 2};
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 79fcc8569b6d..561a881bab0c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1,4 +1,4 @@
-//===- Parsing, selection, and construction of pass pipelines -------------===//
+//===- Parsing and selection of pass pipelines ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,6 +27,7 @@
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CostModel.h"
#include "llvm/Analysis/DDG.h"
#include "llvm/Analysis/DDGPrinter.h"
#include "llvm/Analysis/Delinearization.h"
@@ -109,6 +110,7 @@
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
@@ -241,103 +243,16 @@
using namespace llvm;
-static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
- "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
- cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
- cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
- "Heuristics-based inliner version."),
- clEnumValN(InliningAdvisorMode::Development, "development",
- "Use development mode (runtime-loadable model)."),
- clEnumValN(InliningAdvisorMode::Release, "release",
- "Use release mode (AOT-compiled model).")));
-
-static cl::opt<bool> EnableSyntheticCounts(
- "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Run synthetic function entry count generation "
- "pass"));
-
static const Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
-/// Flag to enable inline deferral during PGO.
-static cl::opt<bool>
- EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
- cl::Hidden,
- cl::desc("Enable inline deferral during PGO"));
-
-static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("Enable memory profiler"));
-
-static cl::opt<bool> PerformMandatoryInliningsFirst(
- "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Perform mandatory inlinings module-wide, before performing "
- "inlining."));
-
-static cl::opt<bool> EnableO3NonTrivialUnswitching(
- "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
-
-PipelineTuningOptions::PipelineTuningOptions() {
- LoopInterleaving = true;
- LoopVectorization = true;
- SLPVectorization = false;
- LoopUnrolling = true;
- ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
- LicmMssaOptCap = SetLicmMssaOptCap;
- LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
- CallGraphProfile = true;
- MergeFunctions = false;
-}
-
namespace llvm {
-extern cl::opt<unsigned> MaxDevirtIterations;
-extern cl::opt<bool> EnableConstraintElimination;
-extern cl::opt<bool> EnableFunctionSpecialization;
-extern cl::opt<bool> EnableGVNHoist;
-extern cl::opt<bool> EnableGVNSink;
-extern cl::opt<bool> EnableHotColdSplit;
-extern cl::opt<bool> EnableIROutliner;
-extern cl::opt<bool> EnableOrderFileInstrumentation;
-extern cl::opt<bool> EnableCHR;
-extern cl::opt<bool> EnableLoopInterchange;
-extern cl::opt<bool> EnableUnrollAndJam;
-extern cl::opt<bool> EnableLoopFlatten;
-extern cl::opt<bool> EnableDFAJumpThreading;
-extern cl::opt<bool> RunNewGVN;
-extern cl::opt<bool> RunPartialInlining;
-extern cl::opt<bool> ExtraVectorizerPasses;
-
-extern cl::opt<bool> FlattenedProfileUsed;
-
-extern cl::opt<AttributorRunOption> AttributorRun;
-extern cl::opt<bool> EnableKnowledgeRetention;
-
-extern cl::opt<bool> EnableMatrix;
-
-extern cl::opt<bool> DisablePreInliner;
-extern cl::opt<int> PreInlineThreshold;
+cl::opt<bool> PrintPipelinePasses(
+ "print-pipeline-passes",
+ cl::desc("Print a '-passes' compatible string describing the pipeline "
+ "(best-effort only)."));
} // namespace llvm
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
- /*SpeedLevel*/ 0,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
- /*SpeedLevel*/ 1,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
- /*SpeedLevel*/ 3,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 1};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 2};
-
namespace {
// The following passes/analyses have custom names, otherwise their name will
@@ -405,6 +320,15 @@ public:
static StringRef name() { return "NoOpFunctionAnalysis"; }
};
+/// No-op loop nest pass which does nothing.
+struct NoOpLoopNestPass : PassInfoMixin<NoOpLoopNestPass> {
+ PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &,
+ LoopStandardAnalysisResults &, LPMUpdater &) {
+ return PreservedAnalyses::all();
+ }
+ static StringRef name() { return "NoOpLoopNestPass"; }
+};
+
/// No-op loop pass which does nothing.
struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> {
PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
@@ -439,7 +363,8 @@ AnalysisKey NoOpLoopAnalysis::Key;
/// it. This should be updated if new pass instrumentation wants to use the map.
/// We currently only use this for --print-before/after.
bool shouldPopulateClassToPassNames() {
- return !printBeforePasses().empty() || !printAfterPasses().empty();
+ return PrintPipelinePasses || !printBeforePasses().empty() ||
+ !printAfterPasses().empty();
}
} // namespace
@@ -453,6 +378,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
if (PIC && shouldPopulateClassToPassNames()) {
#define MODULE_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ PIC->addClassToPassName(CLASS, NAME);
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define FUNCTION_PASS(NAME, CREATE_PASS) \
@@ -461,6 +388,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
PIC->addClassToPassName(CLASS, NAME);
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define LOOP_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -469,18 +398,14 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define CGSCC_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ PIC->addClassToPassName(CLASS, NAME);
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#include "PassRegistry.def"
}
}
-void PassBuilder::invokePeepholeEPCallbacks(
- FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
- for (auto &C : PeepholeEPCallbacks)
- C(FPM, Level);
-}
-
void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
MAM.registerPass([&] { return CREATE_PASS; });
@@ -500,6 +425,11 @@ void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
}
void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
+ // We almost always want the default alias analysis pipeline.
+ // If a user wants a different one, they can register their own before calling
+ // registerFunctionAnalyses().
+ FAM.registerPass([&] { return buildDefaultAAPipeline(); });
+
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
FAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
@@ -517,1518 +447,6 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
C(LAM);
}
-// Helper to add AnnotationRemarksPass.
-static void addAnnotationRemarksPass(ModulePassManager &MPM) {
- FunctionPassManager FPM;
- FPM.addPass(AnnotationRemarksPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-}
-
-// Helper to check if the current compilation phase is preparing for LTO
-static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
- return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
- Phase == ThinOrFullLTOPhase::FullLTOPreLink;
-}
-
-// TODO: Investigate the cost/benefit of tail call elimination on debugging.
-FunctionPassManager
-PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
-
- FunctionPassManager FPM;
-
- // Form SSA out of local memory accesses after breaking apart aggregates into
- // scalars.
- FPM.addPass(SROA());
-
- // Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
-
- // Hoisting of scalars and load expressions.
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
-
- FPM.addPass(LibCallsShrinkWrapPass());
-
- invokePeepholeEPCallbacks(FPM, Level);
-
- FPM.addPass(SimplifyCFGPass());
-
- // Form canonically associated expression trees, and simplify the trees using
- // basic mathematical properties. For example, this will form (nearly)
- // minimal multiplication trees.
- FPM.addPass(ReassociatePass());
-
- // Add the primary loop simplification pipeline.
- // FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be removed
- // and/or replaced by scheduling the loop pass equivalents in the correct
- // positions. But those equivalent passes aren't powerful enough yet.
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
- // `LoopInstSimplify`.
- LoopPassManager LPM1, LPM2;
-
- // Simplify the loop body. We do this initially to clean up after other loop
- // passes run, either when iterating on a loop or on inner loops with
- // implications on the outer loop.
- LPM1.addPass(LoopInstSimplifyPass());
- LPM1.addPass(LoopSimplifyCFGPass());
-
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-
- LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
- isLTOPreLink(Phase)));
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM1.addPass(SimpleLoopUnswitchPass());
-
- LPM2.addPass(LoopIdiomRecognizePass());
- LPM2.addPass(IndVarSimplifyPass());
-
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
-
- LPM2.addPass(LoopDeletionPass());
-
- if (EnableLoopInterchange)
- LPM2.addPass(LoopInterchangePass());
-
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
- // because it changes IR to makes profile annotation in back compile
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
- // attributes so we need to make sure and allow the full unroll pass to pay
- // attention to it.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse)
- LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
-
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
-
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- if (EnableLoopFlatten)
- FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
- // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
-
- // Delete small array after loop unroll.
- FPM.addPass(SROA());
-
- // Specially optimize memory movement as it doesn't look like dataflow in SSA.
- FPM.addPass(MemCpyOptPass());
-
- // Sparse conditional constant propagation.
- // FIXME: It isn't clear why we do this *after* loop passes rather than
- // before...
- FPM.addPass(SCCPPass());
-
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- FPM.addPass(BDCEPass());
-
- // Run instcombine after redundancy and dead bit elimination to exploit
- // opportunities opened up by them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- FPM.addPass(CoroElidePass());
-
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
-
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- return FPM;
-}
-
-FunctionPassManager
-PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
-
- // The O1 pipeline has a separate pipeline creation function to simplify
- // construction readability.
- if (Level.getSpeedupLevel() == 1)
- return buildO1FunctionSimplificationPipeline(Level, Phase);
-
- FunctionPassManager FPM;
-
- // Form SSA out of local memory accesses after breaking apart aggregates into
- // scalars.
- FPM.addPass(SROA());
-
- // Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
- if (EnableKnowledgeRetention)
- FPM.addPass(AssumeSimplifyPass());
-
- // Hoisting of scalars and load expressions.
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
-
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
- }
-
- if (EnableConstraintElimination)
- FPM.addPass(ConstraintEliminationPass());
-
- // Speculative execution if the target has divergent branches; otherwise nop.
- FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
-
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
-
- FPM.addPass(SimplifyCFGPass());
- if (Level == OptimizationLevel::O3)
- FPM.addPass(AggressiveInstCombinePass());
- FPM.addPass(InstCombinePass());
-
- if (!Level.isOptimizingForSize())
- FPM.addPass(LibCallsShrinkWrapPass());
-
- invokePeepholeEPCallbacks(FPM, Level);
-
- // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
- // using the size value profile. Don't perform this when optimizing for size.
- if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !Level.isOptimizingForSize())
- FPM.addPass(PGOMemOPSizeOpt());
-
- FPM.addPass(TailCallElimPass());
- FPM.addPass(SimplifyCFGPass());
-
- // Form canonically associated expression trees, and simplify the trees using
- // basic mathematical properties. For example, this will form (nearly)
- // minimal multiplication trees.
- FPM.addPass(ReassociatePass());
-
- // Add the primary loop simplification pipeline.
- // FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be removed
- // and/or replaced by scheduling the loop pass equivalents in the correct
- // positions. But those equivalent passes aren't powerful enough yet.
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
- // `LoopInstSimplify`.
- LoopPassManager LPM1, LPM2;
-
- // Simplify the loop body. We do this initially to clean up after other loop
- // passes run, either when iterating on a loop or on inner loops with
- // implications on the outer loop.
- LPM1.addPass(LoopInstSimplifyPass());
- LPM1.addPass(LoopSimplifyCFGPass());
-
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-
- // Disable header duplication in loop rotation at -Oz.
- LPM1.addPass(
- LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM1.addPass(
- SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
- EnableO3NonTrivialUnswitching));
- LPM2.addPass(LoopIdiomRecognizePass());
- LPM2.addPass(IndVarSimplifyPass());
-
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
-
- LPM2.addPass(LoopDeletionPass());
-
- if (EnableLoopInterchange)
- LPM2.addPass(LoopInterchangePass());
-
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
- // because it changes IR to makes profile annotation in back compile
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
- // attributes so we need to make sure and allow the full unroll pass to pay
- // attention to it.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse)
- LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
-
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
-
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- if (EnableLoopFlatten)
- FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
- // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
- // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
-
- // Delete small array after loop unroll.
- FPM.addPass(SROA());
-
- // Eliminate redundancies.
- FPM.addPass(MergedLoadStoreMotionPass());
- if (RunNewGVN)
- FPM.addPass(NewGVNPass());
- else
- FPM.addPass(GVN());
-
- // Sparse conditional constant propagation.
- // FIXME: It isn't clear why we do this *after* loop passes rather than
- // before...
- FPM.addPass(SCCPPass());
-
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- FPM.addPass(BDCEPass());
-
- // Run instcombine after redundancy and dead bit elimination to exploit
- // opportunities opened up by them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- // Re-consider control flow based optimizations after redundancy elimination,
- // redo DCE, etc.
- if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
- FPM.addPass(DFAJumpThreadingPass());
-
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
-
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
-
- // Specially optimize memory movement as it doesn't look like dataflow in SSA.
- FPM.addPass(MemCpyOptPass());
-
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
-
- FPM.addPass(CoroElidePass());
-
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
-
- FPM.addPass(SimplifyCFGPass(
- SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
- (PGOOpt->Action == PGOOptions::IRUse ||
- PGOOpt->Action == PGOOptions::SampleUse))
- FPM.addPass(ControlHeightReductionPass());
-
- return FPM;
-}
-
-void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
- MPM.addPass(CanonicalizeAliasesPass());
- MPM.addPass(NameAnonGlobalPass());
-}
-
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
- std::string ProfileRemappingFile) {
- assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
- if (!IsCS && !DisablePreInliner) {
- InlineParams IP;
-
- IP.DefaultThreshold = PreInlineThreshold;
-
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // FIXME: this comment is cargo culted from the old pass manager, revisit).
- IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
- ModuleInlinerWrapperPass MIWP(IP);
- CGSCCPassManager &CGPipeline = MIWP.getPM();
-
- FunctionPassManager FPM;
- FPM.addPass(SROA());
- FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
- FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
- FPM.addPass(InstCombinePass()); // Combine silly sequences.
- invokePeepholeEPCallbacks(FPM, Level);
-
- CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
-
- MPM.addPass(std::move(MIWP));
-
- // Delete anything that is now dead to make sure that we don't instrument
- // dead code. Instrumentation can end up keeping dead code around and
- // dramatically increase code size.
- MPM.addPass(GlobalDCEPass());
- }
-
- if (!RunProfileGen) {
- assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- return;
- }
-
- // Perform PGO instrumentation.
- MPM.addPass(PGOInstrumentationGen(IsCS));
-
- FunctionPassManager FPM;
- // Disable header duplication in loop rotation at -Oz.
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/false));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- // Do counter promotion at Level greater than O0.
- Options.DoCounterPromotion = true;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
-}
-
-void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
- std::string ProfileRemappingFile) {
- if (!RunProfileGen) {
- assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- return;
- }
-
- // Perform PGO instrumentation.
- MPM.addPass(PGOInstrumentationGen(IsCS));
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- // Do not do counter promotion at O0.
- Options.DoCounterPromotion = false;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
-}
-
-static InlineParams
-getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
- return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
-}
-
-ModuleInlinerWrapperPass
-PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
- IP.HotCallSiteThreshold = 0;
-
- if (PGOOpt)
- IP.EnableDeferral = EnablePGOInlineDeferral;
-
- ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
- UseInlineAdvisor, MaxDevirtIterations);
-
- // Require the GlobalsAA analysis for the module so we can query it within
- // the CGSCC pipeline.
- MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MIWP.addModulePass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
-
- // Require the ProfileSummaryAnalysis for the module so we can query it within
- // the inliner pass.
- MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-
- // Now begin the main postorder CGSCC pipeline.
- // FIXME: The current CGSCC pipeline has its origins in the legacy pass
- // manager and trying to emulate its precise behavior. Much of this doesn't
- // make a lot of sense and we should revisit the core CGSCC structure.
- CGSCCPassManager &MainCGPipeline = MIWP.getPM();
-
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
-
- if (AttributorRun & AttributorRunOption::CGSCC)
- MainCGPipeline.addPass(AttributorCGSCCPass());
-
- // Now deduce any function attributes based in the current code.
- MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
-
- // When at O3 add argument promotion to the pass pipeline.
- // FIXME: It isn't at all clear why this should be limited to O3.
- if (Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(ArgumentPromotionPass());
-
- // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
- // there are no OpenMP runtime calls present in the module.
- if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(OpenMPOptCGSCCPass());
-
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
-
- // Lastly, add the core function simplification pipeline nested inside the
- // CGSCC walk.
- MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase)));
-
- MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
-
- return MIWP;
-}
-
-ModulePassManager
-PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- ModulePassManager MPM;
-
- // Place pseudo probe instrumentation as the first pass of the pipeline to
- // minimize the impact of optimization changes.
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
- Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
- MPM.addPass(SampleProfileProbePass(TM));
-
- bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
-
- // In ThinLTO mode, when flattened profile is used, all the available
- // profile information will be annotated in PreLink phase so there is
- // no need to load the profile again in PostLink.
- bool LoadSampleProfile =
- HasSampleProfile &&
- !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
-
- // During the ThinLTO backend phase we perform early indirect call promotion
- // here, before globalopt. Otherwise imported available_externally functions
- // look unreferenced and are removed. If we are going to load the sample
- // profile then defer until later.
- // TODO: See if we can move later and consolidate with the location where
- // we perform ICP when we are loading a sample profile.
- // TODO: We pass HasSampleProfile (whether there was a sample profile file
- // passed to the compile) to the SamplePGO flag of ICP. This is used to
- // determine whether the new direct calls are annotated with prof metadata.
- // Ideally this should be determined from whether the IR is annotated with
- // sample profile, and not whether the a sample profile was provided on the
- // command line. E.g. for flattened profiles where we will not be reloading
- // the sample profile in the ThinLTO backend, we ideally shouldn't have to
- // provide the sample profile file.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
-
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
-
- // Create an early function pass manager to cleanup the output of the
- // frontend.
- FunctionPassManager EarlyFPM;
- // Lower llvm.expect to metadata before attempting transforms.
- // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
- EarlyFPM.addPass(LowerExpectIntrinsicPass());
- EarlyFPM.addPass(SimplifyCFGPass());
- EarlyFPM.addPass(SROA());
- EarlyFPM.addPass(EarlyCSEPass());
- EarlyFPM.addPass(CoroEarlyPass());
- if (Level == OptimizationLevel::O3)
- EarlyFPM.addPass(CallSiteSplittingPass());
-
- // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
- // to convert bitcast to direct calls so that they can be inlined during the
- // profile annotation prepration step.
- // More details about SamplePGO design can be found in:
- // https://research.google.com/pubs/pub45290.html
- // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
- if (LoadSampleProfile)
- EarlyFPM.addPass(InstCombinePass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
-
- if (LoadSampleProfile) {
- // Annotate sample profile right after early FPM to ensure freshness of
- // the debug info.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, Phase));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- // Do not invoke ICP in the LTOPrelink phase as it makes it hard
- // for the profile annotation to be accurate in the LTO backend.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
- Phase != ThinOrFullLTOPhase::FullLTOPreLink)
- // We perform early indirect call promotion here, before globalopt.
- // This is important for the ThinLTO backend phase because otherwise
- // imported available_externally functions look unreferenced and are
- // removed.
- MPM.addPass(
- PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
- }
-
- // Try to perform OpenMP specific optimizations on the module. This is a
- // (quick!) no-op if there are no OpenMP runtime calls present in the module.
- if (Level != OptimizationLevel::O0)
- MPM.addPass(OpenMPOptPass());
-
- if (AttributorRun & AttributorRunOption::MODULE)
- MPM.addPass(AttributorPass());
-
- // Lower type metadata and the type.test intrinsic in the ThinLTO
- // post link pipeline after ICP. This is to enable usage of the type
- // tests in ICP sequences.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
-
- // Specialize functions with IPSCCP.
- if (EnableFunctionSpecialization)
- MPM.addPass(FunctionSpecializationPass());
-
- // Interprocedural constant propagation now that basic cleanup has occurred
- // and prior to optimizing globals.
- // FIXME: This position in the pipeline hasn't been carefully considered in
- // years, it should be re-analyzed.
- MPM.addPass(IPSCCPPass());
-
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
-
- // Optimize globals to try and fold them into constants.
- MPM.addPass(GlobalOptPass());
-
- // Promote any localized globals to SSA registers.
- // FIXME: Should this instead by a run of SROA?
- // FIXME: We should probably run instcombine and simplifycfg afterward to
- // delete control flows that are dead once globals have been folded to
- // constants.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
- // Remove any dead arguments exposed by cleanups and constant folding
- // globals.
- MPM.addPass(DeadArgumentEliminationPass());
-
- // Create a small function pass pipeline to cleanup after all the global
- // optimizations.
- FunctionPassManager GlobalCleanupPM;
- GlobalCleanupPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
-
- GlobalCleanupPM.addPass(SimplifyCFGPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
-
- // Add all the requested passes for instrumentation PGO, if requested.
- if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
- (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse)) {
- addPGOInstrPasses(MPM, Level,
- /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
- /* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- MPM.addPass(PGOIndirectCallPromotion(false, false));
- }
- if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
- PGOOpt->CSAction == PGOOptions::CSIRInstr)
- MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
-
- // Synthesize function entry counts for non-PGO compilation.
- if (EnableSyntheticCounts && !PGOOpt)
- MPM.addPass(SyntheticCountsPropagation());
-
- MPM.addPass(buildInlinerPipeline(Level, Phase));
-
- if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
- MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
- MPM.addPass(ModuleMemProfilerPass());
- }
-
- return MPM;
-}
-
-/// TODO: Should LTO cause any differences to this set of passes?
-void PassBuilder::addVectorPasses(OptimizationLevel Level,
- FunctionPassManager &FPM, bool IsFullLTO) {
- FPM.addPass(LoopVectorizePass(
- LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
-
- if (IsFullLTO) {
- // The vectorizer may have significantly shortened a loop body; unroll
- // again. Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling)
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopUnrollAndJamPass(Level.getSpeedupLevel())));
- FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- FPM.addPass(WarnMissedTransformationsPass());
- }
-
- if (!IsFullLTO) {
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- FPM.addPass(LoopLoadEliminationPass());
- }
- // Cleanup after the loop optimization passes.
- FPM.addPass(InstCombinePass());
-
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- // At higher optimization levels, try to clean up any runtime overlap and
- // alignment checks inserted by the vectorizer. We want to track correlated
- // runtime checks for two inner loops in the same outer loop, fold any
- // common computations, hoist loop-invariant aspects out of any outer loop,
- // and unswitch the runtime checks if possible. Once hoisted, we may have
- // dead (or speculatable) control flows or more combining opportunities.
- FPM.addPass(EarlyCSEPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(InstCombinePass());
- LoopPassManager LPM;
- LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
- OptimizationLevel::O3));
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- }
-
- // Now that we've formed fast to execute loop structures, we do further
- // optimizations. These are run afterward as they might block doing complex
- // analyses and transforms such as what are needed for loop vectorization.
-
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
-
- if (IsFullLTO) {
- FPM.addPass(SCCPPass());
- FPM.addPass(InstCombinePass());
- FPM.addPass(BDCEPass());
- }
-
- // Optimize parallel scalar instruction chains into SIMD instructions.
- if (PTO.SLPVectorization) {
- FPM.addPass(SLPVectorizerPass());
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- FPM.addPass(EarlyCSEPass());
- }
- }
- // Enhance/cleanup vector code.
- FPM.addPass(VectorCombinePass());
-
- if (!IsFullLTO) {
- FPM.addPass(InstCombinePass());
- // Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopUnrollAndJamPass(Level.getSpeedupLevel())));
- }
- FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- FPM.addPass(WarnMissedTransformationsPass());
- FPM.addPass(InstCombinePass());
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
- }
-
- // Now that we've vectorized and unrolled loops, we may have more refined
- // alignment information, try to re-derive it here.
- FPM.addPass(AlignmentFromAssumptionsPass());
-
- if (IsFullLTO)
- FPM.addPass(InstCombinePass());
-}
-
-ModulePassManager
-PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- ModulePassManager MPM;
-
- // Optimize globals now that the module is fully simplified.
- MPM.addPass(GlobalOptPass());
- MPM.addPass(GlobalDCEPass());
-
- // Run partial inlining pass to partially inline functions that have
- // large bodies.
- if (RunPartialInlining)
- MPM.addPass(PartialInlinerPass());
-
- // Remove avail extern fns and globals definitions since we aren't compiling
- // an object file for later LTO. For LTO we want to preserve these so they
- // are eligible for inlining at link-time. Note if they are unreferenced they
- // will be removed by GlobalDCE later, so this only impacts referenced
- // available externally globals. Eventually they will be suppressed during
- // codegen, but eliminating here enables more opportunity for GlobalDCE as it
- // may make globals referenced by available external functions dead and saves
- // running remaining passes on the eliminated functions. These should be
- // preserved during prelinking for link-time inlining decisions.
- if (!LTOPreLink)
- MPM.addPass(EliminateAvailableExternallyPass());
-
- if (EnableOrderFileInstrumentation)
- MPM.addPass(InstrOrderFilePass());
-
- // Do RPO function attribute inference across the module to forward-propagate
- // attributes where applicable.
- // FIXME: Is this really an optimization rather than a canonicalization?
- MPM.addPass(ReversePostOrderFunctionAttrsPass());
-
- // Do a post inline PGO instrumentation and use pass. This is a context
- // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
- // cross-module inline has not been done yet. The context sensitive
- // instrumentation is after all the inlines are done.
- if (!LTOPreLink && PGOOpt) {
- if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
- else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- }
-
- // Re-require GloblasAA here prior to function passes. This is particularly
- // useful as the above will have inlined, DCE'ed, and function-attr
- // propagated everything. We should at this point have a reasonably minimal
- // and richly annotated call graph. By computing aliasing and mod/ref
- // information for all local globals here, the late loop passes and notably
- // the vectorizer will be able to use them to help recognize vectorizable
- // memory operations.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
-
- FunctionPassManager OptimizePM;
- OptimizePM.addPass(Float2IntPass());
- OptimizePM.addPass(LowerConstantIntrinsicsPass());
-
- if (EnableMatrix) {
- OptimizePM.addPass(LowerMatrixIntrinsicsPass());
- OptimizePM.addPass(EarlyCSEPass());
- }
-
- // FIXME: We need to run some loop optimizations to re-rotate loops after
- // simplifycfg and others undo their rotation.
-
- // Optimize the loop execution. These passes operate on entire loop nests
- // rather than on each loop in an inside-out manner, and so they are actually
- // function passes.
-
- for (auto &C : VectorizerStartEPCallbacks)
- C(OptimizePM, Level);
-
- // First rotate loops that may have been un-rotated by prior passes.
- // Disable header duplication at -Oz.
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
- EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/false));
-
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
- // into separate loop that would otherwise inhibit vectorization. This is
- // currently only performed for loops marked with the metadata
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
- OptimizePM.addPass(LoopDistributePass());
-
- // Populates the VFABI attribute with the scalar-to-vector mappings
- // from the TargetLibraryInfo.
- OptimizePM.addPass(InjectTLIMappings());
-
- addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
-
- // Split out cold code. Splitting is done late to avoid hiding context from
- // other optimizations and inadvertently regressing performance. The tradeoff
- // is that this has a higher code size cost than splitting early.
- if (EnableHotColdSplit && !LTOPreLink)
- MPM.addPass(HotColdSplittingPass());
-
- // Search the code for similar regions of code. If enough similar regions can
- // be found where extracting the regions into their own function will decrease
- // the size of the program, we extract the regions, a deduplicate the
- // structurally similar regions.
- if (EnableIROutliner)
- MPM.addPass(IROutlinerPass());
-
- // Merge functions if requested.
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
- // LoopSink pass sinks instructions hoisted by LICM, which serves as a
- // canonicalization pass that enables other optimizations. As a result,
- // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
- // result too early.
- OptimizePM.addPass(LoopSinkPass());
-
- // And finally clean up LCSSA form before generating code.
- OptimizePM.addPass(InstSimplifyPass());
-
- // This hoists/decomposes div/rem ops. It should run after other sink/hoist
- // passes to avoid re-sinking, but before SimplifyCFG because it can allow
- // flattening of blocks.
- OptimizePM.addPass(DivRemPairsPass());
-
- // LoopSink (and other loop passes since the last simplifyCFG) might have
- // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- OptimizePM.addPass(SimplifyCFGPass());
-
- OptimizePM.addPass(CoroCleanupPass());
-
- // Add the core optimizing pipeline.
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
-
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
-
- if (PTO.CallGraphProfile)
- MPM.addPass(CGProfilePass());
-
- // Now we need to do some global optimization transforms.
- // FIXME: It would seem like these should come first in the optimization
- // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
- // ordering here.
- MPM.addPass(GlobalDCEPass());
- MPM.addPass(ConstantMergePass());
-
- // TODO: Relative look table converter pass caused an issue when full lto is
- // enabled. See https://reviews.llvm.org/D94355 for more details.
- // Until the issue fixed, disable this pass during pre-linking phase.
- if (!LTOPreLink)
- MPM.addPass(RelLookupTableConverterPass());
-
- return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
-
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
- // Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
- : ThinOrFullLTOPhase::None));
-
- // Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
-
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
- MPM.addPass(PseudoProbeUpdatePass());
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- if (LTOPreLink)
- addRequiredLTOPreLinkPasses(MPM);
-
- return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
-
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
- // If we are planning to perform ThinLTO later, we don't bloat the code with
- // unrolling/vectorization/... now. Just simplify the module as much as we
- // can.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, ThinOrFullLTOPhase::ThinLTOPreLink));
-
- // Run partial inlining pass to partially inline functions that have
- // large bodies.
- // FIXME: It isn't clear whether this is really the right place to run this
- // in ThinLTO. Because there is another canonicalization and simplification
- // phase that will run after the thin link, running this here ends up with
- // less information than will be available later and it may grow functions in
- // ways that aren't beneficial.
- if (RunPartialInlining)
- MPM.addPass(PartialInlinerPass());
-
- // Reduce the size of the IR as much as possible.
- MPM.addPass(GlobalOptPass());
-
- // Module simplification splits coroutines, but does not fully clean up
- // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
- // on these, we schedule the cleanup here.
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
- MPM.addPass(PseudoProbeUpdatePass());
-
- // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
- // optimization is going to be done in PostLink stage, but clang can't
- // add callbacks there in case of in-process ThinLTO called by linker.
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- addRequiredLTOPreLinkPasses(MPM);
-
- return MPM;
-}
-
-ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
- OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- if (ImportSummary) {
- // These passes import type identifier resolutions for whole-program
- // devirtualization and CFI. They must run early because other passes may
- // disturb the specific instruction patterns that these passes look for,
- // creating dependencies on resolutions that may not appear in the summary.
- //
- // For example, GVN may transform the pattern assume(type.test) appearing in
- // two basic blocks into assume(phi(type.test, type.test)), which would
- // transform a dependency on a WPD resolution into a dependency on a type
- // identifier resolution for CFI.
- //
- // Also, WPD has access to more precise information than ICP and can
- // devirtualize more effectively, so it should operate on the IR first.
- //
- // The WPD and LowerTypeTest passes need to run at -O0 to lower type
- // metadata and intrinsics.
- MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
- MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
- }
-
- if (Level == OptimizationLevel::O0) {
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP.
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Drop available_externally and unreferenced globals. This is necessary
- // with ThinLTO in order to avoid leaving undefined references to dead
- // globals in the object file.
- MPM.addPass(EliminateAvailableExternallyPass());
- MPM.addPass(GlobalDCEPass());
- return MPM;
- }
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- // Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, ThinOrFullLTOPhase::ThinLTOPostLink));
-
- // Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level));
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
- // FIXME: We should use a customized pre-link pipeline!
- return buildPerModuleDefaultPipeline(Level,
- /* LTOPreLink */ true);
-}
-
-ModulePassManager
-PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
- ModuleSummaryIndex *ExportSummary) {
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- // Create a function that performs CFI checks for cross-DSO calls with targets
- // in the current module.
- MPM.addPass(CrossDSOCFIPass());
-
- if (Level == OptimizationLevel::O0) {
- // The WPD and LowerTypeTest passes need to run at -O0 to lower type
- // metadata and intrinsics.
- MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP.
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
- }
-
- if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
- // Load sample profile before running the LTO optimization pipeline.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- }
-
- // Remove unused virtual tables to improve the quality of code generated by
- // whole-program devirtualization and bitset lowering.
- MPM.addPass(GlobalDCEPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
-
- if (Level.getSpeedupLevel() > 1) {
- FunctionPassManager EarlyFPM;
- EarlyFPM.addPass(CallSiteSplittingPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
-
- // Indirect call promotion. This should promote all the targets that are
- // left by the earlier promotion pass that promotes intra-module targets.
- // This two-step promotion is to save the compile time. For LTO, it should
- // produce the same result as if we only do promotion here.
- MPM.addPass(PGOIndirectCallPromotion(
- true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
-
- if (EnableFunctionSpecialization)
- MPM.addPass(FunctionSpecializationPass());
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting function
- // pointers passed as arguments to direct uses of functions.
- MPM.addPass(IPSCCPPass());
-
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
- }
-
- // Now deduce any function attributes based in the current code.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
- PostOrderFunctionAttrsPass()));
-
- // Do RPO function attribute inference across the module to forward-propagate
- // attributes where applicable.
- // FIXME: Is this really an optimization rather than a canonicalization?
- MPM.addPass(ReversePostOrderFunctionAttrsPass());
-
- // Use in-range annotations on GEP indices to split globals where beneficial.
- MPM.addPass(GlobalSplitPass());
-
- // Run whole program optimization of virtual call when the list of callees
- // is fixed.
- MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
-
- // Stop here at -O1.
- if (Level == OptimizationLevel::O1) {
- // The LowerTypeTestsPass needs to run to lower type metadata and the
- // type.test intrinsics. The pass does nothing if CFI is disabled.
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO
- // pipeline).
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
- }
-
- // Optimize globals to try and fold them into constants.
- MPM.addPass(GlobalOptPass());
-
- // Promote any localized globals to SSA registers.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
- // Linking modules together can lead to duplicate global constant, only
- // keep one copy of each constant.
- MPM.addPass(ConstantMergePass());
-
- // Remove unused arguments from functions.
- MPM.addPass(DeadArgumentEliminationPass());
-
- // Reduce the code after globalopt and ipsccp. Both can open up significant
- // simplification opportunities, and both can propagate functions through
- // function pointers. When this happens, we often have to resolve varargs
- // calls, etc, so let instcombine do this.
- FunctionPassManager PeepholeFPM;
- if (Level == OptimizationLevel::O3)
- PeepholeFPM.addPass(AggressiveInstCombinePass());
- PeepholeFPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(PeepholeFPM, Level);
-
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
-
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
- // Run the inliner now.
- MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
-
- // Optimize globals again after we ran the inliner.
- MPM.addPass(GlobalOptPass());
-
- // Garbage collect dead functions.
- // FIXME: Add ArgumentPromotion pass after once it's ported.
- MPM.addPass(GlobalDCEPass());
-
- FunctionPassManager FPM;
- // The IPO Passes may leave cruft around. Clean up after them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
-
- // Do a post inline PGO instrumentation and use pass. This is a context
- // sensitive PGO pass.
- if (PGOOpt) {
- if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
- else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- }
-
- // Break up allocas
- FPM.addPass(SROA());
-
- // LTO provides additional opportunities for tailcall elimination due to
- // link-time inlining, and visibility of nocapture attribute.
- FPM.addPass(TailCallElimPass());
-
- // Run a few AA driver optimizations here and now to cleanup the code.
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
-
- // Require the GlobalsAA analysis for the module so we can query it within
- // MainFPM.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MPM.addPass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
-
- FunctionPassManager MainFPM;
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
-
- if (RunNewGVN)
- MainFPM.addPass(NewGVNPass());
- else
- MainFPM.addPass(GVN());
-
- // Remove dead memcpy()'s.
- MainFPM.addPass(MemCpyOptPass());
-
- // Nuke dead stores.
- MainFPM.addPass(DSEPass());
- MainFPM.addPass(MergedLoadStoreMotionPass());
-
- // More loops are countable; try to optimize them.
- if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
- MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
-
- if (EnableConstraintElimination)
- MainFPM.addPass(ConstraintEliminationPass());
-
- LoopPassManager LPM;
- LPM.addPass(IndVarSimplifyPass());
- LPM.addPass(LoopDeletionPass());
- // FIXME: Add loop interchange.
-
- // Unroll small loops and perform peeling.
- LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
- // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
-
- MainFPM.addPass(LoopDistributePass());
-
- addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
-
- invokePeepholeEPCallbacks(MainFPM, Level);
- MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
-
- // Lower type metadata and the type.test intrinsic. This pass supports
- // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
- // to be run at link time if CFI is enabled. This pass does nothing if
- // CFI is disabled.
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO pipeline).
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- // Enable splitting late in the FullLTO post-link pipeline. This is done in
- // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
- if (EnableHotColdSplit)
- MPM.addPass(HotColdSplittingPass());
-
- // Add late LTO optimization passes.
- // Delete basic blocks, which optimization passes may have killed.
- MPM.addPass(createModuleToFunctionPassAdaptor(
- SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
-
- // Drop bodies of available eternally objects to improve GlobalDCE.
- MPM.addPass(EliminateAvailableExternallyPass());
-
- // Now that we have optimized the program, discard unreachable functions.
- MPM.addPass(GlobalDCEPass());
-
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
-}
-
-ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- assert(Level == OptimizationLevel::O0 &&
- "buildO0DefaultPipeline should only be used with O0");
-
- ModulePassManager MPM;
-
- if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse))
- addPGOInstrPassesForO0(
- MPM,
- /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
- /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
-
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
-
- // Build a minimal pipeline based on the semantics required by LLVM,
- // which is just that always inlining occurs. Further, disable generating
- // lifetime intrinsics to avoid enabling further optimizations during
- // code generation.
- MPM.addPass(AlwaysInlinerPass(
- /*InsertLifetimeIntrinsics=*/false));
-
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
- if (EnableMatrix)
- MPM.addPass(
- createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
-
- if (!CGSCCOptimizerLateEPCallbacks.empty()) {
- CGSCCPassManager CGPM;
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(CGPM, Level);
- if (!CGPM.isEmpty())
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- }
- if (!LateLoopOptimizationsEPCallbacks.empty()) {
- LoopPassManager LPM;
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM, Level);
- if (!LPM.isEmpty()) {
- MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(std::move(LPM))));
- }
- }
- if (!LoopOptimizerEndEPCallbacks.empty()) {
- LoopPassManager LPM;
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM, Level);
- if (!LPM.isEmpty()) {
- MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(std::move(LPM))));
- }
- }
- if (!ScalarOptimizerLateEPCallbacks.empty()) {
- FunctionPassManager FPM;
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
- if (!FPM.isEmpty())
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
- if (!VectorizerStartEPCallbacks.empty()) {
- FunctionPassManager FPM;
- for (auto &C : VectorizerStartEPCallbacks)
- C(FPM, Level);
- if (!FPM.isEmpty())
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
-
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
- CGSCCPassManager CGPM;
- CGPM.addPass(CoroSplitPass());
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
-
- if (LTOPreLink)
- addRequiredLTOPreLinkPasses(MPM);
-
- return MPM;
-}
-
-AAManager PassBuilder::buildDefaultAAPipeline() {
- AAManager AA;
-
- // The order in which these are registered determines their priority when
- // being queried.
-
- // First we register the basic alias analysis that provides the majority of
- // per-function local AA logic. This is a stateless, on-demand local set of
- // AA techniques.
- AA.registerFunctionAnalysis<BasicAA>();
-
- // Next we query fast, specialized alias analyses that wrap IR-embedded
- // information about aliasing.
- AA.registerFunctionAnalysis<ScopedNoAliasAA>();
- AA.registerFunctionAnalysis<TypeBasedAA>();
-
- // Add support for querying global aliasing information when available.
- // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
- // analysis, all that the `AAManager` can do is query for any *cached*
- // results from `GlobalsAA` through a readonly proxy.
- AA.registerModuleAnalysis<GlobalsAA>();
-
- // Add target-specific alias analyses.
- if (TM)
- TM->registerDefaultAliasAnalyses(AA);
-
- return AA;
-}
-
static Optional<int> parseRepeatPassName(StringRef Name) {
if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
return None;
@@ -2140,6 +558,83 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
return UnrollOpts;
}
+Expected<bool> parseSinglePassOption(StringRef Params, StringRef OptionName,
+ StringRef PassName) {
+ bool Result = false;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == OptionName) {
+ Result = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid {1} pass parameter '{0}' ", ParamName, PassName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+Expected<bool> parseInlinerPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "only-mandatory", "InlinerPass");
+}
+
+Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "memssa", "EarlyCSE");
+}
+
+Expected<bool> parseEntryExitInstrumenterPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "post-inline", "EntryExitInstrumenter");
+}
+
+Expected<bool> parseLoopExtractorPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "single", "LoopExtractor");
+}
+
+Expected<bool> parseLowerMatrixIntrinsicsPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics");
+}
+
+Expected<AddressSanitizerOptions> parseASanPassOptions(StringRef Params) {
+ AddressSanitizerOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "kernel") {
+ Result.CompileKernel = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid AddressSanitizer pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+Expected<HWAddressSanitizerOptions> parseHWASanPassOptions(StringRef Params) {
+ HWAddressSanitizerOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "recover") {
+ Result.Recover = true;
+ } else if (ParamName == "kernel") {
+ Result.CompileKernel = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid HWAddressSanitizer pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
MemorySanitizerOptions Result;
while (!Params.empty()) {
@@ -2349,7 +844,7 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
return true;
if (Name == "cgscc")
return true;
- if (Name == "function")
+ if (Name == "function" || Name == "function<eager-inv>")
return true;
// Explicitly handle custom-parsed pass names.
@@ -2359,6 +854,9 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
#define MODULE_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) \
+ return true;
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
@@ -2372,7 +870,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "cgscc")
return true;
- if (Name == "function")
+ if (Name == "function" || Name == "function<eager-inv>")
return true;
// Explicitly handle custom-parsed pass names.
@@ -2384,6 +882,9 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) \
+ return true;
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
@@ -2395,7 +896,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
template <typename CallbacksT>
static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
- if (Name == "function")
+ if (Name == "function" || Name == "function<eager-inv>")
return true;
if (Name == "loop" || Name == "loop-mssa")
return true;
@@ -2419,15 +920,41 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
}
template <typename CallbacksT>
-static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
- // Explicitly handle pass manager names.
- if (Name == "loop" || Name == "loop-mssa")
+static bool isLoopNestPassName(StringRef Name, CallbacksT &Callbacks,
+ bool &UseMemorySSA) {
+ UseMemorySSA = false;
+
+ // Explicitly handle custom-parsed pass names.
+ if (parseRepeatPassName(Name))
return true;
+ if (Name == "lnicm") {
+ UseMemorySSA = true;
+ return true;
+ }
+
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) \
+ return true;
+#include "PassRegistry.def"
+
+ return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks);
+}
+
+template <typename CallbacksT>
+static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks,
+ bool &UseMemorySSA) {
+ UseMemorySSA = false;
+
// Explicitly handle custom-parsed pass names.
if (parseRepeatPassName(Name))
return true;
+ if (Name == "licm") {
+ UseMemorySSA = true;
+ return true;
+ }
+
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
@@ -2520,11 +1047,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
return Error::success();
}
- if (Name == "function") {
+ if (Name == "function" || Name == "function<eager-inv>") {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
+ Name != "function"));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -2599,6 +1127,14 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(CREATE_PASS); \
return Error::success(); \
}
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ MPM.addPass(CREATE_PASS(Params.get())); \
+ return Error::success(); \
+ }
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
MPM.addPass( \
@@ -2616,6 +1152,15 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \
return Error::success(); \
}
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ MPM.addPass( \
+ createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS(Params.get()))); \
+ return Error::success(); \
+ }
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \
@@ -2629,6 +1174,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
return Error::success(); \
}
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ MPM.addPass(createModuleToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
@@ -2670,12 +1221,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(std::move(NestedCGPM));
return Error::success();
}
- if (Name == "function") {
+ if (Name == "function" || Name == "function<eager-inv>") {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
- CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+ CGPM.addPass(
+ createCGSCCToFunctionPassAdaptor(std::move(FPM), Name != "function"));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -2710,6 +1262,14 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(CREATE_PASS); \
return Error::success(); \
}
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ CGPM.addPass(CREATE_PASS(Params.get())); \
+ return Error::success(); \
+ }
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
CGPM.addPass(RequireAnalysisPass< \
@@ -2736,6 +1296,12 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
return Error::success(); \
}
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
@@ -2785,8 +1351,11 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
bool UseMemorySSA = (Name == "loop-mssa");
bool UseBFI = llvm::any_of(
InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
+ bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
+ return Pipeline.Name == "loop-predication";
+ });
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
- UseBFI));
+ UseBFI, UseBPI));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -2837,6 +1406,11 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
// bool UseMemorySSA = !("canon-freeze" || "loop-predication" ||
// "guard-widening");
// The risk is that it may become obsolete if we're not careful.
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
@@ -2895,6 +1469,11 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM,
}
// Now expand the basic registered passes from the .inc file.
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ LPM.addPass(CREATE_PASS); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
LPM.addPass(CREATE_PASS); \
@@ -3016,13 +1595,20 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
StringRef FirstName = Pipeline->front().Name;
if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) {
+ bool UseMemorySSA;
if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) {
Pipeline = {{"cgscc", std::move(*Pipeline)}};
} else if (isFunctionPassName(FirstName,
FunctionPipelineParsingCallbacks)) {
Pipeline = {{"function", std::move(*Pipeline)}};
- } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) {
- Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
+ } else if (isLoopNestPassName(FirstName, LoopPipelineParsingCallbacks,
+ UseMemorySSA)) {
+ Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop",
+ std::move(*Pipeline)}}}};
+ } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks,
+ UseMemorySSA)) {
+ Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop",
+ std::move(*Pipeline)}}}};
} else {
for (auto &C : TopLevelPipelineParsingCallbacks)
if (C(MPM, *Pipeline))
@@ -3172,6 +1758,11 @@ void PassBuilder::printPassNames(raw_ostream &OS) {
#define MODULE_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
+ OS << "Module passes with params:\n";
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ printPassName(NAME, PARAMS, OS);
+#include "PassRegistry.def"
+
OS << "Module analyses:\n";
#define MODULE_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
@@ -3184,6 +1775,11 @@ void PassBuilder::printPassNames(raw_ostream &OS) {
#define CGSCC_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
+ OS << "CGSCC passes with params:\n";
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ printPassName(NAME, PARAMS, OS);
+#include "PassRegistry.def"
+
OS << "CGSCC analyses:\n";
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
@@ -3205,6 +1801,10 @@ void PassBuilder::printPassNames(raw_ostream &OS) {
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
+ OS << "LoopNest passes:\n";
+#define LOOPNEST_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
+#include "PassRegistry.def"
+
OS << "Loop passes:\n";
#define LOOP_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
new file mode 100644
index 000000000000..ac5dfdbdd540
--- /dev/null
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -0,0 +1,1798 @@
+//===- Construction of pass pipelines -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of the PassBuilder based on our
+/// static pass registry as well as related functionality. It also provides
+/// helpers to aid in analyzing, debugging, and testing passes and pass
+/// pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/PGOOptions.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroCleanup.h"
+#include "llvm/Transforms/Coroutines/CoroEarly.h"
+#include "llvm/Transforms/Coroutines/CoroElide.h"
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Annotation2Metadata.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/IPO/CalledValuePropagation.h"
+#include "llvm/Transforms/IPO/ConstantMerge.h"
+#include "llvm/Transforms/IPO/CrossDSOCFI.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
+#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/GlobalOpt.h"
+#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+#include "llvm/Transforms/IPO/PartialInlining.h"
+#include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
+#include "llvm/Transforms/Scalar/BDCE.h"
+#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
+#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
+#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
+#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
+#include "llvm/Transforms/Scalar/DivRemPairs.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/Float2Int.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/IndVarSimplify.h"
+#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopDeletion.h"
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
+#include "llvm/Transforms/Scalar/LoopInterchange.h"
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopRotation.h"
+#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/Transforms/Scalar/LoopSink.h"
+#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/NewGVN.h"
+#include "llvm/Transforms/Scalar/Reassociate.h"
+#include "llvm/Transforms/Scalar/SCCP.h"
+#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
+#include "llvm/Transforms/Vectorize/VectorCombine.h"
+
+using namespace llvm;
+
+static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
+ "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
+ cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
+ cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
+ "Heuristics-based inliner version."),
+ clEnumValN(InliningAdvisorMode::Development, "development",
+ "Use development mode (runtime-loadable model)."),
+ clEnumValN(InliningAdvisorMode::Release, "release",
+ "Use release mode (AOT-compiled model).")));
+
+static cl::opt<bool> EnableSyntheticCounts(
+ "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run synthetic function entry count generation "
+ "pass"));
+
+/// Flag to enable inline deferral during PGO.
+static cl::opt<bool>
+ EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
+ cl::Hidden,
+ cl::desc("Enable inline deferral during PGO"));
+
+static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable memory profiler"));
+
+static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable module inliner"));
+
+static cl::opt<bool> PerformMandatoryInliningsFirst(
+ "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Perform mandatory inlinings module-wide, before performing "
+ "inlining."));
+
+static cl::opt<bool> EnableO3NonTrivialUnswitching(
+ "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
+
+static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
+ "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
+ cl::desc("Eagerly invalidate more analyses in default pipelines"));
+
+static cl::opt<bool> EnableNoRerunSimplificationPipeline(
+ "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Prevent running the simplification pipeline on a function more "
+ "than once in the case that SCC mutations cause a function to be "
+ "visited multiple times as long as the function has not been changed"));
+
+PipelineTuningOptions::PipelineTuningOptions() {
+ LoopInterleaving = true;
+ LoopVectorization = true;
+ SLPVectorization = false;
+ LoopUnrolling = true;
+ ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+ LicmMssaOptCap = SetLicmMssaOptCap;
+ LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+ CallGraphProfile = true;
+ MergeFunctions = false;
+ EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+}
+
+namespace llvm {
+
+extern cl::opt<unsigned> MaxDevirtIterations;
+extern cl::opt<bool> EnableConstraintElimination;
+extern cl::opt<bool> EnableFunctionSpecialization;
+extern cl::opt<bool> EnableGVNHoist;
+extern cl::opt<bool> EnableGVNSink;
+extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableIROutliner;
+extern cl::opt<bool> EnableOrderFileInstrumentation;
+extern cl::opt<bool> EnableCHR;
+extern cl::opt<bool> EnableLoopInterchange;
+extern cl::opt<bool> EnableUnrollAndJam;
+extern cl::opt<bool> EnableLoopFlatten;
+extern cl::opt<bool> EnableDFAJumpThreading;
+extern cl::opt<bool> RunNewGVN;
+extern cl::opt<bool> RunPartialInlining;
+extern cl::opt<bool> ExtraVectorizerPasses;
+
+extern cl::opt<bool> FlattenedProfileUsed;
+
+extern cl::opt<AttributorRunOption> AttributorRun;
+extern cl::opt<bool> EnableKnowledgeRetention;
+
+extern cl::opt<bool> EnableMatrix;
+
+extern cl::opt<bool> DisablePreInliner;
+extern cl::opt<int> PreInlineThreshold;
+} // namespace llvm
+
+void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
+ OptimizationLevel Level) {
+ for (auto &C : PeepholeEPCallbacks)
+ C(FPM, Level);
+}
+
+// Helper to add AnnotationRemarksPass.
+static void addAnnotationRemarksPass(ModulePassManager &MPM) {
+ FunctionPassManager FPM;
+ FPM.addPass(AnnotationRemarksPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+}
+
+// Helper to check if the current compilation phase is preparing for LTO
+static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
+ return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
+ Phase == ThinOrFullLTOPhase::FullLTOPreLink;
+}
+
+// TODO: Investigate the cost/benefit of tail call elimination on debugging.
+FunctionPassManager
+PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+
+ FunctionPassManager FPM;
+
+ // Form SSA out of local memory accesses after breaking apart aggregates into
+ // scalars.
+ FPM.addPass(SROAPass());
+
+ // Catch trivial redundancies
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+
+ // Hoisting of scalars and load expressions.
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+
+ FPM.addPass(LibCallsShrinkWrapPass());
+
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(SimplifyCFGPass());
+
+ // Form canonically associated expression trees, and simplify the trees using
+ // basic mathematical properties. For example, this will form (nearly)
+ // minimal multiplication trees.
+ FPM.addPass(ReassociatePass());
+
+ // Add the primary loop simplification pipeline.
+ // FIXME: Currently this is split into two loop pass pipelines because we run
+ // some function passes in between them. These can and should be removed
+ // and/or replaced by scheduling the loop pass equivalents in the correct
+ // positions. But those equivalent passes aren't powerful enough yet.
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+ // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+ // `LoopInstSimplify`.
+ LoopPassManager LPM1, LPM2;
+
+ // Simplify the loop body. We do this initially to clean up after other loop
+ // passes run, either when iterating on a loop or on inner loops with
+ // implications on the outer loop.
+ LPM1.addPass(LoopInstSimplifyPass());
+ LPM1.addPass(LoopSimplifyCFGPass());
+
+ // Try to remove as much code from the loop header as possible,
+ // to reduce amount of IR that will have to be duplicated.
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+
+ LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
+ isLTOPreLink(Phase)));
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(SimpleLoopUnswitchPass());
+
+ LPM2.addPass(LoopIdiomRecognizePass());
+ LPM2.addPass(IndVarSimplifyPass());
+
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM2, Level);
+
+ LPM2.addPass(LoopDeletionPass());
+
+ if (EnableLoopInterchange)
+ LPM2.addPass(LoopInterchangePass());
+
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
+ LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM2, Level);
+
+ // We provide the opt remark emitter pass for LICM to use. We only need to do
+ // this once as it is immutable.
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
+ /*UseMemorySSA=*/true,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ if (EnableLoopFlatten)
+ FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+ // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
+ /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false));
+
+ // Delete small array after loop unroll.
+ FPM.addPass(SROAPass());
+
+ // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+ FPM.addPass(MemCpyOptPass());
+
+ // Sparse conditional constant propagation.
+ // FIXME: It isn't clear why we do this *after* loop passes rather than
+ // before...
+ FPM.addPass(SCCPPass());
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ FPM.addPass(BDCEPass());
+
+ // Run instcombine after redundancy and dead bit elimination to exploit
+ // opportunities opened up by them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(CoroElidePass());
+
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
+ FPM.addPass(ADCEPass());
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ return FPM;
+}
+
+FunctionPassManager
+PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
+
+ // The O1 pipeline has a separate pipeline creation function to simplify
+ // construction readability.
+ if (Level.getSpeedupLevel() == 1)
+ return buildO1FunctionSimplificationPipeline(Level, Phase);
+
+ FunctionPassManager FPM;
+
+ // Form SSA out of local memory accesses after breaking apart aggregates into
+ // scalars.
+ FPM.addPass(SROAPass());
+
+ // Catch trivial redundancies
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+ if (EnableKnowledgeRetention)
+ FPM.addPass(AssumeSimplifyPass());
+
+ // Hoisting of scalars and load expressions.
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
+ // Global value numbering based sinking.
+ if (EnableGVNSink) {
+ FPM.addPass(GVNSinkPass());
+ FPM.addPass(SimplifyCFGPass());
+ }
+
+ if (EnableConstraintElimination)
+ FPM.addPass(ConstraintEliminationPass());
+
+ // Speculative execution if the target has divergent branches; otherwise nop.
+ FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
+
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+
+ FPM.addPass(SimplifyCFGPass());
+ if (Level == OptimizationLevel::O3)
+ FPM.addPass(AggressiveInstCombinePass());
+ FPM.addPass(InstCombinePass());
+
+ if (!Level.isOptimizingForSize())
+ FPM.addPass(LibCallsShrinkWrapPass());
+
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
+ // using the size value profile. Don't perform this when optimizing for size.
+ if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
+ !Level.isOptimizingForSize())
+ FPM.addPass(PGOMemOPSizeOpt());
+
+ FPM.addPass(TailCallElimPass());
+ FPM.addPass(SimplifyCFGPass());
+
+ // Form canonically associated expression trees, and simplify the trees using
+ // basic mathematical properties. For example, this will form (nearly)
+ // minimal multiplication trees.
+ FPM.addPass(ReassociatePass());
+
+ // Add the primary loop simplification pipeline.
+ // FIXME: Currently this is split into two loop pass pipelines because we run
+ // some function passes in between them. These can and should be removed
+ // and/or replaced by scheduling the loop pass equivalents in the correct
+ // positions. But those equivalent passes aren't powerful enough yet.
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+ // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+ // `LoopInstSimplify`.
+ LoopPassManager LPM1, LPM2;
+
+ // Simplify the loop body. We do this initially to clean up after other loop
+ // passes run, either when iterating on a loop or on inner loops with
+ // implications on the outer loop.
+ LPM1.addPass(LoopInstSimplifyPass());
+ LPM1.addPass(LoopSimplifyCFGPass());
+
+ // Try to remove as much code from the loop header as possible,
+ // to reduce amount of IR that will have to be duplicated.
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+
+ // Disable header duplication in loop rotation at -Oz.
+ LPM1.addPass(
+ LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(
+ SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
+ EnableO3NonTrivialUnswitching));
+ LPM2.addPass(LoopIdiomRecognizePass());
+ LPM2.addPass(IndVarSimplifyPass());
+
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM2, Level);
+
+ LPM2.addPass(LoopDeletionPass());
+
+ if (EnableLoopInterchange)
+ LPM2.addPass(LoopInterchangePass());
+
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
+ LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM2, Level);
+
+ // We provide the opt remark emitter pass for LICM to use. We only need to do
+ // this once as it is immutable.
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
+ /*UseMemorySSA=*/true,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ if (EnableLoopFlatten)
+ FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+ // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
+ // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
+ /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false));
+
+ // Delete small array after loop unroll.
+ FPM.addPass(SROAPass());
+
+ // The matrix extension can introduce large vector operations early, which can
+ // benefit from running vector-combine early on.
+ if (EnableMatrix)
+ FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
+
+ // Eliminate redundancies.
+ FPM.addPass(MergedLoadStoreMotionPass());
+ if (RunNewGVN)
+ FPM.addPass(NewGVNPass());
+ else
+ FPM.addPass(GVNPass());
+
+ // Sparse conditional constant propagation.
+ // FIXME: It isn't clear why we do this *after* loop passes rather than
+ // before...
+ FPM.addPass(SCCPPass());
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ FPM.addPass(BDCEPass());
+
+ // Run instcombine after redundancy and dead bit elimination to exploit
+ // opportunities opened up by them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ // Re-consider control flow based optimizations after redundancy elimination,
+ // redo DCE, etc.
+ if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
+ FPM.addPass(DFAJumpThreadingPass());
+
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
+ FPM.addPass(ADCEPass());
+
+ // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+ FPM.addPass(MemCpyOptPass());
+
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+
+ FPM.addPass(CoroElidePass());
+
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+
+ FPM.addPass(SimplifyCFGPass(
+ SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
+ (PGOOpt->Action == PGOOptions::IRUse ||
+ PGOOpt->Action == PGOOptions::SampleUse))
+ FPM.addPass(ControlHeightReductionPass());
+
+ return FPM;
+}
+
+void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
+ MPM.addPass(CanonicalizeAliasesPass());
+ MPM.addPass(NameAnonGlobalPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
+ OptimizationLevel Level, bool RunProfileGen,
+ bool IsCS, std::string ProfileFile,
+ std::string ProfileRemappingFile) {
+ assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
+ if (!IsCS && !DisablePreInliner) {
+ InlineParams IP;
+
+ IP.DefaultThreshold = PreInlineThreshold;
+
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
+ // FIXME: this comment is cargo culted from the old pass manager, revisit).
+ IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
+ ModuleInlinerWrapperPass MIWP(IP);
+ CGSCCPassManager &CGPipeline = MIWP.getPM();
+
+ FunctionPassManager FPM;
+ FPM.addPass(SROAPass());
+ FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
+ FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+ FPM.addPass(InstCombinePass()); // Combine silly sequences.
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ std::move(FPM), PTO.EagerlyInvalidateAnalyses));
+
+ MPM.addPass(std::move(MIWP));
+
+ // Delete anything that is now dead to make sure that we don't instrument
+ // dead code. Instrumentation can end up keeping dead code around and
+ // dramatically increase code size.
+ MPM.addPass(GlobalDCEPass());
+ }
+
+ if (!RunProfileGen) {
+ assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+ MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ return;
+ }
+
+ // Perform PGO instrumentation.
+ MPM.addPass(PGOInstrumentationGen(IsCS));
+
+ FunctionPassManager FPM;
+ // Disable header duplication in loop rotation at -Oz.
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
+ // Do counter promotion at Level greater than O0.
+ Options.DoCounterPromotion = true;
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
+ bool RunProfileGen, bool IsCS,
+ std::string ProfileFile,
+ std::string ProfileRemappingFile) {
+ if (!RunProfileGen) {
+ assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+ MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ return;
+ }
+
+ // Perform PGO instrumentation.
+ MPM.addPass(PGOInstrumentationGen(IsCS));
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
+ // Do not do counter promotion at O0.
+ Options.DoCounterPromotion = false;
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
+ return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
+}
+
+ModuleInlinerWrapperPass
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ IP.HotCallSiteThreshold = 0;
+
+ if (PGOOpt)
+ IP.EnableDeferral = EnablePGOInlineDeferral;
+
+ ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
+ UseInlineAdvisor, MaxDevirtIterations);
+
+ // Require the GlobalsAA analysis for the module so we can query it within
+ // the CGSCC pipeline.
+ MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
+ // Invalidate AAManager so it can be recreated and pick up the newly available
+ // GlobalsAA.
+ MIWP.addModulePass(
+ createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+
+ // Require the ProfileSummaryAnalysis for the module so we can query it within
+ // the inliner pass.
+ MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+
+ // Now begin the main postorder CGSCC pipeline.
+ // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+ // manager and trying to emulate its precise behavior. Much of this doesn't
+ // make a lot of sense and we should revisit the core CGSCC structure.
+ CGSCCPassManager &MainCGPipeline = MIWP.getPM();
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+
+ if (AttributorRun & AttributorRunOption::CGSCC)
+ MainCGPipeline.addPass(AttributorCGSCCPass());
+
+ // Now deduce any function attributes based in the current code.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+ // When at O3 add argument promotion to the pass pipeline.
+ // FIXME: It isn't at all clear why this should be limited to O3.
+ if (Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(ArgumentPromotionPass());
+
+ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+ // there are no OpenMP runtime calls present in the module.
+ if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(OpenMPOptCGSCCPass());
+
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(MainCGPipeline, Level);
+
+ // Lastly, add the core function simplification pipeline nested inside the
+ // CGSCC walk.
+ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ buildFunctionSimplificationPipeline(Level, Phase),
+ PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline));
+
+ MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+
+ if (EnableNoRerunSimplificationPipeline)
+ MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
+ InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
+
+ return MIWP;
+}
+
+ModuleInlinerPass
+PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ IP.HotCallSiteThreshold = 0;
+
+ if (PGOOpt)
+ IP.EnableDeferral = EnablePGOInlineDeferral;
+
+ // The inline deferral logic is used to avoid losing some
+ // inlining chance in future. It is helpful in SCC inliner, in which
+ // inlining is processed in bottom-up order.
+ // While in module inliner, the inlining order is a priority-based order
+ // by default. The inline deferral is unnecessary there. So we disable the
+ // inline deferral logic in module inliner.
+ IP.EnableDeferral = false;
+
+ return ModuleInlinerPass(IP, UseInlineAdvisor);
+}
+
+ModulePassManager
+PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ ModulePassManager MPM;
+
+ // Place pseudo probe instrumentation as the first pass of the pipeline to
+ // minimize the impact of optimization changes.
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
+ MPM.addPass(SampleProfileProbePass(TM));
+
+ bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
+
+ // In ThinLTO mode, when flattened profile is used, all the available
+ // profile information will be annotated in PreLink phase so there is
+ // no need to load the profile again in PostLink.
+ bool LoadSampleProfile =
+ HasSampleProfile &&
+ !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
+
+ // During the ThinLTO backend phase we perform early indirect call promotion
+ // here, before globalopt. Otherwise imported available_externally functions
+ // look unreferenced and are removed. If we are going to load the sample
+ // profile then defer until later.
+ // TODO: See if we can move later and consolidate with the location where
+ // we perform ICP when we are loading a sample profile.
+ // TODO: We pass HasSampleProfile (whether there was a sample profile file
+ // passed to the compile) to the SamplePGO flag of ICP. This is used to
+ // determine whether the new direct calls are annotated with prof metadata.
+ // Ideally this should be determined from whether the IR is annotated with
+ // sample profile, and not whether the a sample profile was provided on the
+ // command line. E.g. for flattened profiles where we will not be reloading
+ // the sample profile in the ThinLTO backend, we ideally shouldn't have to
+ // provide the sample profile file.
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
+ MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
+
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+
+ // Create an early function pass manager to cleanup the output of the
+ // frontend.
+ FunctionPassManager EarlyFPM;
+ // Lower llvm.expect to metadata before attempting transforms.
+ // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
+ EarlyFPM.addPass(LowerExpectIntrinsicPass());
+ EarlyFPM.addPass(SimplifyCFGPass());
+ EarlyFPM.addPass(SROAPass());
+ EarlyFPM.addPass(EarlyCSEPass());
+ EarlyFPM.addPass(CoroEarlyPass());
+ if (Level == OptimizationLevel::O3)
+ EarlyFPM.addPass(CallSiteSplittingPass());
+
+ // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
+ // to convert bitcast to direct calls so that they can be inlined during the
+ // profile annotation prepration step.
+ // More details about SamplePGO design can be found in:
+ // https://research.google.com/pubs/pub45290.html
+ // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
+ if (LoadSampleProfile)
+ EarlyFPM.addPass(InstCombinePass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ if (LoadSampleProfile) {
+ // Annotate sample profile right after early FPM to ensure freshness of
+ // the debug info.
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile, Phase));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ // Do not invoke ICP in the LTOPrelink phase as it makes it hard
+ // for the profile annotation to be accurate in the LTO backend.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
+ Phase != ThinOrFullLTOPhase::FullLTOPreLink)
+ // We perform early indirect call promotion here, before globalopt.
+ // This is important for the ThinLTO backend phase because otherwise
+ // imported available_externally functions look unreferenced and are
+ // removed.
+ MPM.addPass(
+ PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
+ }
+
+ // Try to perform OpenMP specific optimizations on the module. This is a
+ // (quick!) no-op if there are no OpenMP runtime calls present in the module.
+ if (Level != OptimizationLevel::O0)
+ MPM.addPass(OpenMPOptPass());
+
+ if (AttributorRun & AttributorRunOption::MODULE)
+ MPM.addPass(AttributorPass());
+
+ // Lower type metadata and the type.test intrinsic in the ThinLTO
+ // post link pipeline after ICP. This is to enable usage of the type
+ // tests in ICP sequences.
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+
+ // Specialize functions with IPSCCP.
+ if (EnableFunctionSpecialization && Level == OptimizationLevel::O3)
+ MPM.addPass(FunctionSpecializationPass());
+
+ // Interprocedural constant propagation now that basic cleanup has occurred
+ // and prior to optimizing globals.
+ // FIXME: This position in the pipeline hasn't been carefully considered in
+ // years, it should be re-analyzed.
+ MPM.addPass(IPSCCPPass());
+
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ MPM.addPass(CalledValuePropagationPass());
+
+ // Optimize globals to try and fold them into constants.
+ MPM.addPass(GlobalOptPass());
+
+ // Promote any localized globals to SSA registers.
+ // FIXME: Should this instead by a run of SROA?
+ // FIXME: We should probably run instcombine and simplifycfg afterward to
+ // delete control flows that are dead once globals have been folded to
+ // constants.
+ MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+ // Remove any dead arguments exposed by cleanups and constant folding
+ // globals.
+ MPM.addPass(DeadArgumentEliminationPass());
+
+ // Create a small function pass pipeline to cleanup after all the global
+ // optimizations.
+ FunctionPassManager GlobalCleanupPM;
+ GlobalCleanupPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
+
+ GlobalCleanupPM.addPass(SimplifyCFGPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Add all the requested passes for instrumentation PGO, if requested.
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse)) {
+ addPGOInstrPasses(MPM, Level,
+ /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
+ /* IsCS */ false, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ MPM.addPass(PGOIndirectCallPromotion(false, false));
+ }
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
+
+ // Synthesize function entry counts for non-PGO compilation.
+ if (EnableSyntheticCounts && !PGOOpt)
+ MPM.addPass(SyntheticCountsPropagation());
+
+ if (EnableModuleInliner)
+ MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
+ else
+ MPM.addPass(buildInlinerPipeline(Level, Phase));
+
+ if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+ MPM.addPass(ModuleMemProfilerPass());
+ }
+
+ return MPM;
+}
+
+/// TODO: Should LTO cause any differences to this set of passes?
+void PassBuilder::addVectorPasses(OptimizationLevel Level,
+ FunctionPassManager &FPM, bool IsFullLTO) {
+ FPM.addPass(LoopVectorizePass(
+ LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+
+ if (IsFullLTO) {
+ // The vectorizer may have significantly shortened a loop body; unroll
+ // again. Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && PTO.LoopUnrolling)
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopUnrollAndJamPass(Level.getSpeedupLevel())));
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
+ FPM.addPass(WarnMissedTransformationsPass());
+ }
+
+ if (!IsFullLTO) {
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ FPM.addPass(LoopLoadEliminationPass());
+ }
+ // Cleanup after the loop optimization passes.
+ FPM.addPass(InstCombinePass());
+
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correlated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ FPM.addPass(EarlyCSEPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(InstCombinePass());
+ LoopPassManager LPM;
+ LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+ OptimizationLevel::O3));
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ }
+
+ // Now that we've formed fast to execute loop structures, we do further
+ // optimizations. These are run afterward as they might block doing complex
+ // analyses and transforms such as what are needed for loop vectorization.
+
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+ // GVN, loop transforms, and others have already run, so it's now better to
+ // convert to more optimized IR using more aggressive simplify CFG options.
+ // The extra sinking transform can create larger basic blocks, so do this
+ // before SLP vectorization.
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
+
+ if (IsFullLTO) {
+ FPM.addPass(SCCPPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(BDCEPass());
+ }
+
+ // Optimize parallel scalar instruction chains into SIMD instructions.
+ if (PTO.SLPVectorization) {
+ FPM.addPass(SLPVectorizerPass());
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ FPM.addPass(EarlyCSEPass());
+ }
+ }
+ // Enhance/cleanup vector code.
+ FPM.addPass(VectorCombinePass());
+
+ if (!IsFullLTO) {
+ FPM.addPass(InstCombinePass());
+ // Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && PTO.LoopUnrolling) {
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopUnrollAndJamPass(Level.getSpeedupLevel())));
+ }
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
+ FPM.addPass(WarnMissedTransformationsPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+ }
+
+ // Now that we've vectorized and unrolled loops, we may have more refined
+ // alignment information, try to re-derive it here.
+ FPM.addPass(AlignmentFromAssumptionsPass());
+
+ if (IsFullLTO)
+ FPM.addPass(InstCombinePass());
+}
+
+ModulePassManager
+PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ ModulePassManager MPM;
+
+ // Optimize globals now that the module is fully simplified.
+ MPM.addPass(GlobalOptPass());
+ MPM.addPass(GlobalDCEPass());
+
+ // Run partial inlining pass to partially inline functions that have
+ // large bodies.
+ if (RunPartialInlining)
+ MPM.addPass(PartialInlinerPass());
+
+ // Remove avail extern fns and globals definitions since we aren't compiling
+ // an object file for later LTO. For LTO we want to preserve these so they
+ // are eligible for inlining at link-time. Note if they are unreferenced they
+ // will be removed by GlobalDCE later, so this only impacts referenced
+ // available externally globals. Eventually they will be suppressed during
+ // codegen, but eliminating here enables more opportunity for GlobalDCE as it
+ // may make globals referenced by available external functions dead and saves
+ // running remaining passes on the eliminated functions. These should be
+ // preserved during prelinking for link-time inlining decisions.
+ if (!LTOPreLink)
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ if (EnableOrderFileInstrumentation)
+ MPM.addPass(InstrOrderFilePass());
+
+ // Do RPO function attribute inference across the module to forward-propagate
+ // attributes where applicable.
+ // FIXME: Is this really an optimization rather than a canonicalization?
+ MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+ // Do a post inline PGO instrumentation and use pass. This is a context
+ // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
+ // cross-module inline has not been done yet. The context sensitive
+ // instrumentation is after all the inlines are done.
+ if (!LTOPreLink && PGOOpt) {
+ if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+ /* IsCS */ true, PGOOpt->CSProfileGenFile,
+ PGOOpt->ProfileRemappingFile);
+ else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
+ /* IsCS */ true, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ }
+
+ // Re-require GloblasAA here prior to function passes. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+
+ FunctionPassManager OptimizePM;
+ OptimizePM.addPass(Float2IntPass());
+ OptimizePM.addPass(LowerConstantIntrinsicsPass());
+
+ if (EnableMatrix) {
+ OptimizePM.addPass(LowerMatrixIntrinsicsPass());
+ OptimizePM.addPass(EarlyCSEPass());
+ }
+
+ // FIXME: We need to run some loop optimizations to re-rotate loops after
+ // simplifycfg and others undo their rotation.
+
+ // Optimize the loop execution. These passes operate on entire loop nests
+ // rather than on each loop in an inside-out manner, and so they are actually
+ // function passes.
+
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(OptimizePM, Level);
+
+ LoopPassManager LPM;
+ // First rotate loops that may have been un-rotated by prior passes.
+ // Disable header duplication at -Oz.
+ LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
+ // Some loops may have become dead by now. Try to delete them.
+ // FIXME: see disscussion in https://reviews.llvm.org/D112851
+ // this may need to be revisited once GVN is more powerful.
+ LPM.addPass(LoopDeletionPass());
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
+ // into separate loop that would otherwise inhibit vectorization. This is
+ // currently only performed for loops marked with the metadata
+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
+ OptimizePM.addPass(LoopDistributePass());
+
+ // Populates the VFABI attribute with the scalar-to-vector mappings
+ // from the TargetLibraryInfo.
+ OptimizePM.addPass(InjectTLIMappings());
+
+ addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
+
+ // Split out cold code. Splitting is done late to avoid hiding context from
+ // other optimizations and inadvertently regressing performance. The tradeoff
+ // is that this has a higher code size cost than splitting early.
+ if (EnableHotColdSplit && !LTOPreLink)
+ MPM.addPass(HotColdSplittingPass());
+
+ // Search the code for similar regions of code. If enough similar regions can
+ // be found where extracting the regions into their own function will decrease
+ // the size of the program, we extract the regions, a deduplicate the
+ // structurally similar regions.
+ if (EnableIROutliner)
+ MPM.addPass(IROutlinerPass());
+
+ // Merge functions if requested.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+ // canonicalization pass that enables other optimizations. As a result,
+ // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+ // result too early.
+ OptimizePM.addPass(LoopSinkPass());
+
+ // And finally clean up LCSSA form before generating code.
+ OptimizePM.addPass(InstSimplifyPass());
+
+ // This hoists/decomposes div/rem ops. It should run after other sink/hoist
+ // passes to avoid re-sinking, but before SimplifyCFG because it can allow
+ // flattening of blocks.
+ OptimizePM.addPass(DivRemPairsPass());
+
+ // LoopSink (and other loop passes since the last simplifyCFG) might have
+ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+ OptimizePM.addPass(SimplifyCFGPass());
+
+ OptimizePM.addPass(CoroCleanupPass());
+
+ // Add the core optimizing pipeline.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ if (PTO.CallGraphProfile)
+ MPM.addPass(CGProfilePass());
+
+ // Now we need to do some global optimization transforms.
+ // FIXME: It would seem like these should come first in the optimization
+ // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
+ // ordering here.
+ MPM.addPass(GlobalDCEPass());
+ MPM.addPass(ConstantMergePass());
+
+ // TODO: Relative look table converter pass caused an issue when full lto is
+ // enabled. See https://reviews.llvm.org/D94355 for more details.
+ // Until the issue fixed, disable this pass during pre-linking phase.
+ if (!LTOPreLink)
+ MPM.addPass(RelLookupTableConverterPass());
+
+ return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
+
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Apply module pipeline start EP callback.
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+ // Add the core simplification pipeline.
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
+ : ThinOrFullLTOPhase::None));
+
+ // Now add the optimization pipeline.
+ MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ MPM.addPass(PseudoProbeUpdatePass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ if (LTOPreLink)
+ addRequiredLTOPreLinkPasses(MPM);
+
+ return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
+
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+ // Apply module pipeline start EP callback.
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+
+ // If we are planning to perform ThinLTO later, we don't bloat the code with
+ // unrolling/vectorization/... now. Just simplify the module as much as we
+ // can.
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPreLink));
+
+ // Run partial inlining pass to partially inline functions that have
+ // large bodies.
+ // FIXME: It isn't clear whether this is really the right place to run this
+ // in ThinLTO. Because there is another canonicalization and simplification
+ // phase that will run after the thin link, running this here ends up with
+ // less information than will be available later and it may grow functions in
+ // ways that aren't beneficial.
+ if (RunPartialInlining)
+ MPM.addPass(PartialInlinerPass());
+
+ // Reduce the size of the IR as much as possible.
+ MPM.addPass(GlobalOptPass());
+
+ // Module simplification splits coroutines, but does not fully clean up
+ // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
+ // on these, we schedule the cleanup here.
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ MPM.addPass(PseudoProbeUpdatePass());
+
+ // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
+ // optimization is going to be done in PostLink stage, but clang can't
+ // add callbacks there in case of in-process ThinLTO called by linker.
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ addRequiredLTOPreLinkPasses(MPM);
+
+ return MPM;
+}
+
+ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
+ OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ if (ImportSummary) {
+ // These passes import type identifier resolutions for whole-program
+ // devirtualization and CFI. They must run early because other passes may
+ // disturb the specific instruction patterns that these passes look for,
+ // creating dependencies on resolutions that may not appear in the summary.
+ //
+ // For example, GVN may transform the pattern assume(type.test) appearing in
+ // two basic blocks into assume(phi(type.test, type.test)), which would
+ // transform a dependency on a WPD resolution into a dependency on a type
+ // identifier resolution for CFI.
+ //
+ // Also, WPD has access to more precise information than ICP and can
+ // devirtualize more effectively, so it should operate on the IR first.
+ //
+ // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+ // metadata and intrinsics.
+ MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
+ MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
+ }
+
+ if (Level == OptimizationLevel::O0) {
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP.
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+ // Drop available_externally and unreferenced globals. This is necessary
+ // with ThinLTO in order to avoid leaving undefined references to dead
+ // globals in the object file.
+ MPM.addPass(EliminateAvailableExternallyPass());
+ MPM.addPass(GlobalDCEPass());
+ return MPM;
+ }
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Add the core simplification pipeline.
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPostLink));
+
+ // Now add the optimization pipeline.
+ MPM.addPass(buildModuleOptimizationPipeline(Level));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
+ // FIXME: We should use a customized pre-link pipeline!
+ return buildPerModuleDefaultPipeline(Level,
+ /* LTOPreLink */ true);
+}
+
+ModulePassManager
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
+ ModuleSummaryIndex *ExportSummary) {
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ // Create a function that performs CFI checks for cross-DSO calls with targets
+ // in the current module.
+ MPM.addPass(CrossDSOCFIPass());
+
+ if (Level == OptimizationLevel::O0) {
+ // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+ // metadata and intrinsics.
+ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP.
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+ }
+
+ if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
+ // Load sample profile before running the LTO optimization pipeline.
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile,
+ ThinOrFullLTOPhase::FullLTOPostLink));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ }
+
+ // Remove unused virtual tables to improve the quality of code generated by
+ // whole-program devirtualization and bitset lowering.
+ MPM.addPass(GlobalDCEPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+
+ if (Level.getSpeedupLevel() > 1) {
+ FunctionPassManager EarlyFPM;
+ EarlyFPM.addPass(CallSiteSplittingPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
+
+ // Indirect call promotion. This should promote all the targets that are
+ // left by the earlier promotion pass that promotes intra-module targets.
+ // This two-step promotion is to save the compile time. For LTO, it should
+ // produce the same result as if we only do promotion here.
+ MPM.addPass(PGOIndirectCallPromotion(
+ true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
+
+ if (EnableFunctionSpecialization && Level == OptimizationLevel::O3)
+ MPM.addPass(FunctionSpecializationPass());
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ MPM.addPass(IPSCCPPass());
+
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ MPM.addPass(CalledValuePropagationPass());
+ }
+
+ // Now deduce any function attributes based in the current code.
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+
+ // Do RPO function attribute inference across the module to forward-propagate
+ // attributes where applicable.
+ // FIXME: Is this really an optimization rather than a canonicalization?
+ MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+ // Use in-range annotations on GEP indices to split globals where beneficial.
+ MPM.addPass(GlobalSplitPass());
+
+ // Run whole program optimization of virtual call when the list of callees
+ // is fixed.
+ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+
+ // Stop here at -O1.
+ if (Level == OptimizationLevel::O1) {
+ // The LowerTypeTestsPass needs to run to lower type metadata and the
+ // type.test intrinsics. The pass does nothing if CFI is disabled.
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO
+ // pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+ }
+
+ // Optimize globals to try and fold them into constants.
+ MPM.addPass(GlobalOptPass());
+
+ // Promote any localized globals to SSA registers.
+ MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+ // Linking modules together can lead to duplicate global constant, only
+ // keep one copy of each constant.
+ MPM.addPass(ConstantMergePass());
+
+ // Remove unused arguments from functions.
+ MPM.addPass(DeadArgumentEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ FunctionPassManager PeepholeFPM;
+ if (Level == OptimizationLevel::O3)
+ PeepholeFPM.addPass(AggressiveInstCombinePass());
+ PeepholeFPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(PeepholeFPM, Level);
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+ // Run the inliner now.
+ MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
+
+ // Optimize globals again after we ran the inliner.
+ MPM.addPass(GlobalOptPass());
+
+ // Garbage collect dead functions.
+ MPM.addPass(GlobalDCEPass());
+
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
+
+ FunctionPassManager FPM;
+ // The IPO Passes may leave cruft around. Clean up after them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+
+ // Do a post inline PGO instrumentation and use pass. This is a context
+ // sensitive PGO pass.
+ if (PGOOpt) {
+ if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+ /* IsCS */ true, PGOOpt->CSProfileGenFile,
+ PGOOpt->ProfileRemappingFile);
+ else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
+ /* IsCS */ true, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ }
+
+ // Break up allocas
+ FPM.addPass(SROAPass());
+
+ // LTO provides additional opportunities for tailcall elimination due to
+ // link-time inlining, and visibility of nocapture attribute.
+ FPM.addPass(TailCallElimPass());
+
+ // Run a few AA driver optimizations here and now to cleanup the code.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+
+ // Require the GlobalsAA analysis for the module so we can query it within
+ // MainFPM.
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ // Invalidate AAManager so it can be recreated and pick up the newly available
+ // GlobalsAA.
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+
+ FunctionPassManager MainFPM;
+ MainFPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+
+ if (RunNewGVN)
+ MainFPM.addPass(NewGVNPass());
+ else
+ MainFPM.addPass(GVNPass());
+
+ // Remove dead memcpy()'s.
+ MainFPM.addPass(MemCpyOptPass());
+
+ // Nuke dead stores.
+ MainFPM.addPass(DSEPass());
+ MainFPM.addPass(MergedLoadStoreMotionPass());
+
+ // More loops are countable; try to optimize them.
+ if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
+ MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+
+ if (EnableConstraintElimination)
+ MainFPM.addPass(ConstraintEliminationPass());
+
+ LoopPassManager LPM;
+ LPM.addPass(IndVarSimplifyPass());
+ LPM.addPass(LoopDeletionPass());
+ // FIXME: Add loop interchange.
+
+ // Unroll small loops and perform peeling.
+ LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+ // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ MainFPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
+
+ MainFPM.addPass(LoopDistributePass());
+
+ addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
+
+ invokePeepholeEPCallbacks(MainFPM, Level);
+ MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Lower type metadata and the type.test intrinsic. This pass supports
+ // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
+ // to be run at link time if CFI is enabled. This pass does nothing if
+ // CFI is disabled.
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Enable splitting late in the FullLTO post-link pipeline. This is done in
+ // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+ if (EnableHotColdSplit)
+ MPM.addPass(HotColdSplittingPass());
+
+ // Add late LTO optimization passes.
+ // Delete basic blocks, which optimization passes may have killed.
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
+
+ // Drop bodies of available eternally objects to improve GlobalDCE.
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ MPM.addPass(GlobalDCEPass());
+
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+}
+
+ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ assert(Level == OptimizationLevel::O0 &&
+ "buildO0DefaultPipeline should only be used with O0");
+
+ ModulePassManager MPM;
+
+ // Perform pseudo probe instrumentation in O0 mode. This is for the
+ // consistency between different build modes. For example, a LTO build can be
+ // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
+ // the postlink will require pseudo probe instrumentation in the prelink.
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(SampleProfileProbePass(TM));
+
+ if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse))
+ addPGOInstrPassesForO0(
+ MPM,
+ /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
+ /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
+
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+
+ // Build a minimal pipeline based on the semantics required by LLVM,
+ // which is just that always inlining occurs. Further, disable generating
+ // lifetime intrinsics to avoid enabling further optimizations during
+ // code generation.
+ MPM.addPass(AlwaysInlinerPass(
+ /*InsertLifetimeIntrinsics=*/false));
+
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ if (EnableMatrix)
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
+
+ if (!CGSCCOptimizerLateEPCallbacks.empty()) {
+ CGSCCPassManager CGPM;
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(CGPM, Level);
+ if (!CGPM.isEmpty())
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ }
+ if (!LateLoopOptimizationsEPCallbacks.empty()) {
+ LoopPassManager LPM;
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM, Level);
+ if (!LPM.isEmpty()) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(std::move(LPM))));
+ }
+ }
+ if (!LoopOptimizerEndEPCallbacks.empty()) {
+ LoopPassManager LPM;
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM, Level);
+ if (!LPM.isEmpty()) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(std::move(LPM))));
+ }
+ }
+ if (!ScalarOptimizerLateEPCallbacks.empty()) {
+ FunctionPassManager FPM;
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+ if (!FPM.isEmpty())
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+ if (!VectorizerStartEPCallbacks.empty()) {
+ FunctionPassManager FPM;
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(FPM, Level);
+ if (!FPM.isEmpty())
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+ CGSCCPassManager CGPM;
+ CGPM.addPass(CoroSplitPass());
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ if (LTOPreLink)
+ addRequiredLTOPreLinkPasses(MPM);
+
+ return MPM;
+}
+
+AAManager PassBuilder::buildDefaultAAPipeline() {
+ AAManager AA;
+
+ // The order in which these are registered determines their priority when
+ // being queried.
+
+ // First we register the basic alias analysis that provides the majority of
+ // per-function local AA logic. This is a stateless, on-demand local set of
+ // AA techniques.
+ AA.registerFunctionAnalysis<BasicAA>();
+
+ // Next we query fast, specialized alias analyses that wrap IR-embedded
+ // information about aliasing.
+ AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+ AA.registerFunctionAnalysis<TypeBasedAA>();
+
+ // Add support for querying global aliasing information when available.
+ // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
+ // analysis, all that the `AAManager` can do is query for any *cached*
+ // results from `GlobalsAA` through a readonly proxy.
+ AA.registerModuleAnalysis<GlobalsAA>();
+
+ // Add target-specific alias analyses.
+ if (TM)
+ TM->registerDefaultAliasAnalyses(AA);
+
+ return AA;
+}
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 7525d59f94a5..c2032b5b8276 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -60,8 +60,6 @@ MODULE_PASS("globaldce", GlobalDCEPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
-MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
-MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(
@@ -75,7 +73,6 @@ MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
MODULE_PASS("iroutliner", IROutlinerPass())
MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
-MODULE_PASS("loop-extract", LoopExtractorPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
@@ -101,7 +98,6 @@ MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
MODULE_PASS("scc-oz-module-inliner",
buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None))
-MODULE_PASS("loop-extract-single", LoopExtractorPass(1))
MODULE_PASS("strip", StripSymbolsPass())
MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass())
MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM))
@@ -113,16 +109,43 @@ MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
MODULE_PASS("dfsan", DataFlowSanitizerPass())
-MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
-MODULE_PASS("msan-module", MemorySanitizerPass({}))
-MODULE_PASS("tsan-module", ThreadSanitizerPass())
-MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("msan-module", ModuleMemorySanitizerPass({}))
+MODULE_PASS("module-inline", ModuleInlinerPass())
+MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
+#ifndef MODULE_PASS_WITH_PARAMS
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
+#endif
+MODULE_PASS_WITH_PARAMS("loop-extract",
+ "LoopExtractorPass",
+ [](bool Single) {
+ if (Single)
+ return LoopExtractorPass(1);
+ return LoopExtractorPass();
+ },
+ parseLoopExtractorPassOptions,
+ "single")
+MODULE_PASS_WITH_PARAMS("hwasan",
+ "HWAddressSanitizerPass",
+ [](HWAddressSanitizerOptions Opts) {
+ return HWAddressSanitizerPass(Opts);
+ },
+ parseHWASanPassOptions,
+ "kernel;recover")
+MODULE_PASS_WITH_PARAMS("asan-module",
+ "ModuleAddressSanitizerPass",
+ [](AddressSanitizerOptions Opts) {
+ return ModuleAddressSanitizerPass(Opts);
+ },
+ parseASanPassOptions,
+ "kernel")
+#undef MODULE_PASS_WITH_PARAMS
+
#ifndef CGSCC_ANALYSIS
#define CGSCC_ANALYSIS(NAME, CREATE_PASS)
#endif
@@ -138,12 +161,23 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass())
-CGSCC_PASS("inline", InlinerPass())
CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
CGSCC_PASS("coro-split", CoroSplitPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
#undef CGSCC_PASS
+#ifndef CGSCC_PASS_WITH_PARAMS
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
+#endif
+CGSCC_PASS_WITH_PARAMS("inline",
+ "InlinerPass",
+ [](bool OnlyMandatory) {
+ return InlinerPass(OnlyMandatory);
+ },
+ parseInlinerPassOptions,
+ "only-mandatory")
+#undef CGSCC_PASS_WITH_PARAMS
+
#ifndef FUNCTION_ANALYSIS
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#endif
@@ -167,6 +201,7 @@ FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
+FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis())
FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("targetir",
@@ -217,12 +252,8 @@ FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dot-cfg", CFGPrinterPass())
FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
-FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
-FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
-FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false))
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
-FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true))
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("gvn-sink", GVNSinkPass())
FUNCTION_PASS("helloworld", HelloWorldPass())
@@ -242,8 +273,6 @@ FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
-FUNCTION_PASS("lower-matrix-intrinsics", LowerMatrixIntrinsicsPass())
-FUNCTION_PASS("lower-matrix-intrinsics-minimal", LowerMatrixIntrinsicsPass(true))
FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
@@ -273,6 +302,7 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
+FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
@@ -286,6 +316,7 @@ FUNCTION_PASS("print<inliner-size-estimator>",
InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
+FUNCTION_PASS("print<memoryssa-walker>", MemorySSAWalkerPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
@@ -306,7 +337,7 @@ FUNCTION_PASS("sink", SinkingPass())
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
-FUNCTION_PASS("sroa", SROA())
+FUNCTION_PASS("sroa", SROAPass())
FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
@@ -322,10 +353,6 @@ FUNCTION_PASS("verify<scalar-evolution>", ScalarEvolutionVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false))
-FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
-FUNCTION_PASS("msan", MemorySanitizerPass({}))
-FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("memprof", MemProfilerPass())
#undef FUNCTION_PASS
@@ -333,6 +360,27 @@ FUNCTION_PASS("memprof", MemProfilerPass())
#ifndef FUNCTION_PASS_WITH_PARAMS
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
+FUNCTION_PASS_WITH_PARAMS("early-cse",
+ "EarlyCSEPass",
+ [](bool UseMemorySSA) {
+ return EarlyCSEPass(UseMemorySSA);
+ },
+ parseEarlyCSEPassOptions,
+ "memssa")
+FUNCTION_PASS_WITH_PARAMS("ee-instrument",
+ "EntryExitInstrumenterPass",
+ [](bool PostInlining) {
+ return EntryExitInstrumenterPass(PostInlining);
+ },
+ parseEntryExitInstrumenterPassOptions,
+ "post-inline")
+FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
+ "LowerMatrixIntrinsicsPass",
+ [](bool Minimal) {
+ return LowerMatrixIntrinsicsPass(Minimal);
+ },
+ parseLowerMatrixIntrinsicsPassOptions,
+ "minimal")
FUNCTION_PASS_WITH_PARAMS("loop-unroll",
"LoopUnrollPass",
[](LoopUnrollOptions Opts) {
@@ -345,6 +393,13 @@ FUNCTION_PASS_WITH_PARAMS("loop-unroll",
"no-profile-peeling;profile-peeling;"
"no-runtime;runtime;"
"no-upperbound;upperbound")
+FUNCTION_PASS_WITH_PARAMS("asan",
+ "AddressSanitizerPass",
+ [](AddressSanitizerOptions Opts) {
+ return AddressSanitizerPass(Opts);
+ },
+ parseASanPassOptions,
+ "kernel")
FUNCTION_PASS_WITH_PARAMS("msan",
"MemorySanitizerPass",
[](MemorySanitizerOptions Opts) {
@@ -381,9 +436,9 @@ FUNCTION_PASS_WITH_PARAMS("mldst-motion",
parseMergedLoadStoreMotionOptions,
"no-split-footer-bb;split-footer-bb")
FUNCTION_PASS_WITH_PARAMS("gvn",
- "GVN",
+ "GVNPass",
[](GVNOptions Opts) {
- return GVN(Opts);
+ return GVNPass(Opts);
},
parseGVNOptions,
"no-pre;pre;"
@@ -399,6 +454,16 @@ FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
"may;must")
#undef FUNCTION_PASS_WITH_PARAMS
+#ifndef LOOPNEST_PASS
+#define LOOPNEST_PASS(NAME, CREATE_PASS)
+#endif
+LOOPNEST_PASS("lnicm", LNICMPass())
+LOOPNEST_PASS("loop-flatten", LoopFlattenPass())
+LOOPNEST_PASS("loop-interchange", LoopInterchangePass())
+LOOPNEST_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
+LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass())
+#undef LOOPNEST_PASS
+
#ifndef LOOP_ANALYSIS
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#endif
@@ -416,11 +481,8 @@ LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
LOOP_PASS("dot-ddg", DDGDotPrinterPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
LOOP_PASS("licm", LICMPass())
-LOOP_PASS("lnicm", LNICMPass())
-LOOP_PASS("loop-flatten", LoopFlattenPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
-LOOP_PASS("loop-interchange", LoopInterchangePass())
LOOP_PASS("loop-rotate", LoopRotatePass())
LOOP_PASS("no-op-loop", NoOpLoopPass())
LOOP_PASS("print", PrintLoopPass(dbgs()))
@@ -428,7 +490,6 @@ LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
-LOOP_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 5a48923bce8a..8e6be6730ea4 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -29,10 +29,14 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
+#include <unordered_map>
#include <unordered_set>
+#include <utility>
#include <vector>
using namespace llvm;
@@ -40,10 +44,11 @@ using namespace llvm;
cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG(
"verify-cfg-preserved", cl::Hidden,
#ifdef NDEBUG
- cl::init(false));
+ cl::init(false)
#else
- cl::init(true));
+ cl::init(true)
#endif
+ );
// An option that prints out the IR after passes, similar to
// -print-after-all except that it only prints the IR after passes that
@@ -79,7 +84,9 @@ enum class ChangePrinter {
PrintChangedDiffVerbose,
PrintChangedDiffQuiet,
PrintChangedColourDiffVerbose,
- PrintChangedColourDiffQuiet
+ PrintChangedColourDiffQuiet,
+ PrintChangedDotCfgVerbose,
+ PrintChangedDotCfgQuiet
};
static cl::opt<ChangePrinter> PrintChanged(
"print-changed", cl::desc("Print changed IRs"), cl::Hidden,
@@ -95,6 +102,10 @@ static cl::opt<ChangePrinter> PrintChanged(
"Display patch-like changes with color"),
clEnumValN(ChangePrinter::PrintChangedColourDiffQuiet, "cdiff-quiet",
"Display patch-like changes in quiet mode with color"),
+ clEnumValN(ChangePrinter::PrintChangedDotCfgVerbose, "dot-cfg",
+ "Create a website with graphical changes"),
+ clEnumValN(ChangePrinter::PrintChangedDotCfgQuiet, "dot-cfg-quiet",
+ "Create a website with graphical changes in quiet mode"),
// Sentinel value for unspecified option.
clEnumValN(ChangePrinter::PrintChangedVerbose, "", "")));
@@ -119,6 +130,40 @@ static cl::opt<std::string>
DiffBinary("print-changed-diff-path", cl::Hidden, cl::init("diff"),
cl::desc("system diff used by change reporters"));
+// An option for specifying the dot used by
+// print-changed=[dot-cfg | dot-cfg-quiet]
+static cl::opt<std::string>
+ DotBinary("print-changed-dot-path", cl::Hidden, cl::init("dot"),
+ cl::desc("system dot used by change reporters"));
+
+// An option that determines the colour used for elements that are only
+// in the before part. Must be a colour named in appendix J of
+// https://graphviz.org/pdf/dotguide.pdf
+cl::opt<std::string>
+ BeforeColour("dot-cfg-before-color",
+ cl::desc("Color for dot-cfg before elements."), cl::Hidden,
+ cl::init("red"));
+// An option that determines the colour used for elements that are only
+// in the after part. Must be a colour named in appendix J of
+// https://graphviz.org/pdf/dotguide.pdf
+cl::opt<std::string> AfterColour("dot-cfg-after-color",
+ cl::desc("Color for dot-cfg after elements."),
+ cl::Hidden, cl::init("forestgreen"));
+// An option that determines the colour used for elements that are in both
+// the before and after parts. Must be a colour named in appendix J of
+// https://graphviz.org/pdf/dotguide.pdf
+cl::opt<std::string>
+ CommonColour("dot-cfg-common-color",
+ cl::desc("Color for dot-cfg common elements."), cl::Hidden,
+ cl::init("black"));
+
+// An option that determines where the generated website file (named
+// passes.html) and the associated pdf files (named diff_*.pdf) are saved.
+static cl::opt<std::string> DotCfgDir(
+ "dot-cfg-dir",
+ cl::desc("Generate dot files into specified directory for changed IRs"),
+ cl::Hidden, cl::init("./"));
+
namespace {
// Perform a system based diff between \p Before and \p After, using
@@ -166,7 +211,8 @@ std::string doSystemDiff(StringRef Before, StringRef After,
SmallString<128> ULF =
formatv("--unchanged-line-format={0}", UnchangedLineFormat);
- StringRef Args[] = {"-w", "-d", OLF, NLF, ULF, FileName[0], FileName[1]};
+ StringRef Args[] = {DiffBinary, "-w", "-d", OLF,
+ NLF, ULF, FileName[0], FileName[1]};
Optional<StringRef> Redirects[] = {None, StringRef(FileName[2]), None};
int Result = sys::ExecuteAndWait(*DiffExe, Args, None, Redirects);
if (Result < 0)
@@ -230,10 +276,9 @@ void printIR(raw_ostream &OS, const Function *F) {
OS << *F;
}
-void printIR(raw_ostream &OS, const Module *M,
- bool ShouldPreserveUseListOrder = false) {
+void printIR(raw_ostream &OS, const Module *M) {
if (isFunctionInPrintList("*") || forcePrintModuleIR()) {
- M->print(OS, nullptr, ShouldPreserveUseListOrder);
+ M->print(OS, nullptr);
} else {
for (const auto &F : M->functions()) {
printIR(OS, &F);
@@ -323,21 +368,20 @@ bool shouldPrintIR(Any IR) {
/// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into
/// llvm::Any and does actual print job.
-void unwrapAndPrint(raw_ostream &OS, Any IR,
- bool ShouldPreserveUseListOrder = false) {
+void unwrapAndPrint(raw_ostream &OS, Any IR) {
if (!shouldPrintIR(IR))
return;
if (forcePrintModuleIR()) {
auto *M = unwrapModule(IR);
assert(M && "should have unwrapped module");
- printIR(OS, M, ShouldPreserveUseListOrder);
+ printIR(OS, M);
return;
}
if (any_isa<const Module *>(IR)) {
const Module *M = any_cast<const Module *>(IR);
- printIR(OS, M, ShouldPreserveUseListOrder);
+ printIR(OS, M);
return;
}
@@ -368,20 +412,46 @@ bool isIgnored(StringRef PassID) {
"DevirtSCCRepeatedPass", "ModuleInlinerWrapperPass"});
}
+std::string makeHTMLReady(StringRef SR) {
+ std::string S;
+ while (true) {
+ StringRef Clean =
+ SR.take_until([](char C) { return C == '<' || C == '>'; });
+ S.append(Clean.str());
+ SR = SR.drop_front(Clean.size());
+ if (SR.size() == 0)
+ return S;
+ S.append(SR[0] == '<' ? "&lt;" : "&gt;");
+ SR = SR.drop_front();
+ }
+ llvm_unreachable("problems converting string to HTML");
+}
+
+// Return the module when that is the appropriate level of comparison for \p IR.
+const Module *getModuleForComparison(Any IR) {
+ if (any_isa<const Module *>(IR))
+ return any_cast<const Module *>(IR);
+ if (any_isa<const LazyCallGraph::SCC *>(IR))
+ return any_cast<const LazyCallGraph::SCC *>(IR)
+ ->begin()
+ ->getFunction()
+ .getParent();
+ return nullptr;
+}
+
} // namespace
-template <typename IRUnitT>
-ChangeReporter<IRUnitT>::~ChangeReporter<IRUnitT>() {
+template <typename T> ChangeReporter<T>::~ChangeReporter<T>() {
assert(BeforeStack.empty() && "Problem with Change Printer stack.");
}
-template <typename IRUnitT>
-bool ChangeReporter<IRUnitT>::isInterestingFunction(const Function &F) {
+template <typename T>
+bool ChangeReporter<T>::isInterestingFunction(const Function &F) {
return isFunctionInPrintList(F.getName());
}
-template <typename IRUnitT>
-bool ChangeReporter<IRUnitT>::isInterestingPass(StringRef PassID) {
+template <typename T>
+bool ChangeReporter<T>::isInterestingPass(StringRef PassID) {
if (isIgnored(PassID))
return false;
@@ -392,8 +462,8 @@ bool ChangeReporter<IRUnitT>::isInterestingPass(StringRef PassID) {
// Return true when this is a pass on IR for which printing
// of changes is desired.
-template <typename IRUnitT>
-bool ChangeReporter<IRUnitT>::isInteresting(Any IR, StringRef PassID) {
+template <typename T>
+bool ChangeReporter<T>::isInteresting(Any IR, StringRef PassID) {
if (!isInterestingPass(PassID))
return false;
if (any_isa<const Function *>(IR))
@@ -401,8 +471,8 @@ bool ChangeReporter<IRUnitT>::isInteresting(Any IR, StringRef PassID) {
return true;
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::saveIRBeforePass(Any IR, StringRef PassID) {
+template <typename T>
+void ChangeReporter<T>::saveIRBeforePass(Any IR, StringRef PassID) {
// Always need to place something on the stack because invalidated passes
// are not given the IR so it cannot be determined whether the pass was for
// something that was filtered out.
@@ -418,12 +488,12 @@ void ChangeReporter<IRUnitT>::saveIRBeforePass(Any IR, StringRef PassID) {
}
// Save the IR representation on the stack.
- IRUnitT &Data = BeforeStack.back();
+ T &Data = BeforeStack.back();
generateIRRepresentation(IR, PassID, Data);
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
+template <typename T>
+void ChangeReporter<T>::handleIRAfterPass(Any IR, StringRef PassID) {
assert(!BeforeStack.empty() && "Unexpected empty stack encountered.");
std::string Name = getIRName(IR);
@@ -436,13 +506,13 @@ void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
handleFiltered(PassID, Name);
} else {
// Get the before rep from the stack
- IRUnitT &Before = BeforeStack.back();
+ T &Before = BeforeStack.back();
// Create the after rep
- IRUnitT After;
+ T After;
generateIRRepresentation(IR, PassID, After);
// Was there a change in IR?
- if (same(Before, After)) {
+ if (Before == After) {
if (VerboseMode)
omitAfter(PassID, Name);
} else
@@ -451,8 +521,8 @@ void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
BeforeStack.pop_back();
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::handleInvalidatedPass(StringRef PassID) {
+template <typename T>
+void ChangeReporter<T>::handleInvalidatedPass(StringRef PassID) {
assert(!BeforeStack.empty() && "Unexpected empty stack encountered.");
// Always flag it as invalidated as we cannot determine when
@@ -464,8 +534,8 @@ void ChangeReporter<IRUnitT>::handleInvalidatedPass(StringRef PassID) {
BeforeStack.pop_back();
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::registerRequiredCallbacks(
+template <typename T>
+void ChangeReporter<T>::registerRequiredCallbacks(
PassInstrumentationCallbacks &PIC) {
PIC.registerBeforeNonSkippedPassCallback(
[this](StringRef P, Any IR) { saveIRBeforePass(IR, P); });
@@ -480,50 +550,40 @@ void ChangeReporter<IRUnitT>::registerRequiredCallbacks(
});
}
-ChangedBlockData::ChangedBlockData(const BasicBlock &B)
- : Label(B.getName().str()) {
- raw_string_ostream SS(Body);
- B.print(SS, nullptr, true, true);
-}
-
-template <typename IRUnitT>
-TextChangeReporter<IRUnitT>::TextChangeReporter(bool Verbose)
- : ChangeReporter<IRUnitT>(Verbose), Out(dbgs()) {}
+template <typename T>
+TextChangeReporter<T>::TextChangeReporter(bool Verbose)
+ : ChangeReporter<T>(Verbose), Out(dbgs()) {}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleInitialIR(Any IR) {
+template <typename T> void TextChangeReporter<T>::handleInitialIR(Any IR) {
// Always print the module.
// Unwrap and print directly to avoid filtering problems in general routines.
auto *M = unwrapModule(IR, /*Force=*/true);
assert(M && "Expected module to be unwrapped when forced.");
Out << "*** IR Dump At Start ***\n";
- M->print(Out, nullptr,
- /*ShouldPreserveUseListOrder=*/true);
+ M->print(Out, nullptr);
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::omitAfter(StringRef PassID,
- std::string &Name) {
+template <typename T>
+void TextChangeReporter<T>::omitAfter(StringRef PassID, std::string &Name) {
Out << formatv("*** IR Dump After {0} on {1} omitted because no change ***\n",
PassID, Name);
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleInvalidated(StringRef PassID) {
+template <typename T>
+void TextChangeReporter<T>::handleInvalidated(StringRef PassID) {
Out << formatv("*** IR Pass {0} invalidated ***\n", PassID);
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleFiltered(StringRef PassID,
- std::string &Name) {
+template <typename T>
+void TextChangeReporter<T>::handleFiltered(StringRef PassID,
+ std::string &Name) {
SmallString<20> Banner =
formatv("*** IR Dump After {0} on {1} filtered out ***\n", PassID, Name);
Out << Banner;
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleIgnored(StringRef PassID,
- std::string &Name) {
+template <typename T>
+void TextChangeReporter<T>::handleIgnored(StringRef PassID, std::string &Name) {
Out << formatv("*** IR Pass {0} on {1} ignored ***\n", PassID, Name);
}
@@ -538,8 +598,7 @@ void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
void IRChangedPrinter::generateIRRepresentation(Any IR, StringRef PassID,
std::string &Output) {
raw_string_ostream OS(Output);
- unwrapAndPrint(OS, IR,
- /*ShouldPreserveUseListOrder=*/true);
+ unwrapAndPrint(OS, IR);
OS.str();
}
@@ -561,14 +620,10 @@ void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,
Out << "*** IR Dump After " << PassID << " on " << Name << " ***\n" << After;
}
-bool IRChangedPrinter::same(const std::string &S1, const std::string &S2) {
- return S1 == S2;
-}
-
-template <typename IRData>
-void OrderedChangedData<IRData>::report(
+template <typename T>
+void OrderedChangedData<T>::report(
const OrderedChangedData &Before, const OrderedChangedData &After,
- function_ref<void(const IRData *, const IRData *)> HandlePair) {
+ function_ref<void(const T *, const T *)> HandlePair) {
const auto &BFD = Before.getData();
const auto &AFD = After.getData();
std::vector<std::string>::const_iterator BI = Before.getOrder().begin();
@@ -576,21 +631,21 @@ void OrderedChangedData<IRData>::report(
std::vector<std::string>::const_iterator AI = After.getOrder().begin();
std::vector<std::string>::const_iterator AE = After.getOrder().end();
- auto handlePotentiallyRemovedIRData = [&](std::string S) {
+ auto HandlePotentiallyRemovedData = [&](std::string S) {
// The order in LLVM may have changed so check if still exists.
if (!AFD.count(S)) {
// This has been removed.
HandlePair(&BFD.find(*BI)->getValue(), nullptr);
}
};
- auto handleNewIRData = [&](std::vector<const IRData *> &Q) {
+ auto HandleNewData = [&](std::vector<const T *> &Q) {
// Print out any queued up new sections
- for (const IRData *NBI : Q)
+ for (const T *NBI : Q)
HandlePair(nullptr, NBI);
Q.clear();
};
- // Print out the IRData in the after order, with before ones interspersed
+ // Print out the data in the after order, with before ones interspersed
// appropriately (ie, somewhere near where they were in the before list).
// Start at the beginning of both lists. Loop through the
// after list. If an element is common, then advance in the before list
@@ -599,26 +654,26 @@ void OrderedChangedData<IRData>::report(
// common, then enqueue it for reporting. When the after list is exhausted,
// loop through the before list, reporting any removed ones. Finally,
// report the rest of the enqueued new ones.
- std::vector<const IRData *> NewIRDataQueue;
+ std::vector<const T *> NewDataQueue;
while (AI != AE) {
if (!BFD.count(*AI)) {
// This section is new so place it in the queue. This will cause it
// to be reported after deleted sections.
- NewIRDataQueue.emplace_back(&AFD.find(*AI)->getValue());
+ NewDataQueue.emplace_back(&AFD.find(*AI)->getValue());
++AI;
continue;
}
// This section is in both; advance and print out any before-only
// until we get to it.
while (*BI != *AI) {
- handlePotentiallyRemovedIRData(*BI);
+ HandlePotentiallyRemovedData(*BI);
++BI;
}
// Report any new sections that were queued up and waiting.
- handleNewIRData(NewIRDataQueue);
+ HandleNewData(NewDataQueue);
- const IRData &AData = AFD.find(*AI)->getValue();
- const IRData &BData = BFD.find(*AI)->getValue();
+ const T &AData = AFD.find(*AI)->getValue();
+ const T &BData = BFD.find(*AI)->getValue();
HandlePair(&BData, &AData);
++BI;
++AI;
@@ -626,39 +681,42 @@ void OrderedChangedData<IRData>::report(
// Check any remaining before sections to see if they have been removed
while (BI != BE) {
- handlePotentiallyRemovedIRData(*BI);
+ HandlePotentiallyRemovedData(*BI);
++BI;
}
- handleNewIRData(NewIRDataQueue);
+ HandleNewData(NewDataQueue);
}
-void ChangedIRComparer::compare(Any IR, StringRef Prefix, StringRef PassID,
- StringRef Name) {
- if (!getModuleForComparison(IR)) {
- // Not a module so just handle the single function.
- assert(Before.getData().size() == 1 && "Expected only one function.");
- assert(After.getData().size() == 1 && "Expected only one function.");
- handleFunctionCompare(Name, Prefix, PassID, false,
- Before.getData().begin()->getValue(),
- After.getData().begin()->getValue());
+template <typename T>
+void IRComparer<T>::compare(
+ bool CompareModule,
+ std::function<void(bool InModule, unsigned Minor,
+ const FuncDataT<T> &Before, const FuncDataT<T> &After)>
+ CompareFunc) {
+ if (!CompareModule) {
+ // Just handle the single function.
+ assert(Before.getData().size() == 1 && After.getData().size() == 1 &&
+ "Expected only one function.");
+ CompareFunc(false, 0, Before.getData().begin()->getValue(),
+ After.getData().begin()->getValue());
return;
}
- ChangedIRData::report(
- Before, After, [&](const ChangedFuncData *B, const ChangedFuncData *A) {
- ChangedFuncData Missing;
- if (!B)
- B = &Missing;
- else if (!A)
- A = &Missing;
- assert(B != &Missing && A != &Missing &&
- "Both functions cannot be missing.");
- handleFunctionCompare(Name, Prefix, PassID, true, *B, *A);
- });
+ unsigned Minor = 0;
+ FuncDataT<T> Missing("");
+ IRDataT<T>::report(Before, After,
+ [&](const FuncDataT<T> *B, const FuncDataT<T> *A) {
+ assert((B || A) && "Both functions cannot be missing.");
+ if (!B)
+ B = &Missing;
+ else if (!A)
+ A = &Missing;
+ CompareFunc(true, Minor++, *B, *A);
+ });
}
-void ChangedIRComparer::analyzeIR(Any IR, ChangedIRData &Data) {
+template <typename T> void IRComparer<T>::analyzeIR(Any IR, IRDataT<T> &Data) {
if (const Module *M = getModuleForComparison(IR)) {
// Create data for each existing/interesting function in the module.
for (const Function &F : *M)
@@ -678,27 +736,16 @@ void ChangedIRComparer::analyzeIR(Any IR, ChangedIRData &Data) {
generateFunctionData(Data, *F);
}
-const Module *ChangedIRComparer::getModuleForComparison(Any IR) {
- if (any_isa<const Module *>(IR))
- return any_cast<const Module *>(IR);
- if (any_isa<const LazyCallGraph::SCC *>(IR))
- return any_cast<const LazyCallGraph::SCC *>(IR)
- ->begin()
- ->getFunction()
- .getParent();
- return nullptr;
-}
-
-bool ChangedIRComparer::generateFunctionData(ChangedIRData &Data,
- const Function &F) {
+template <typename T>
+bool IRComparer<T>::generateFunctionData(IRDataT<T> &Data, const Function &F) {
if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) {
- ChangedFuncData CFD;
+ FuncDataT<T> FD(F.getEntryBlock().getName().str());
for (const auto &B : F) {
- CFD.getOrder().emplace_back(B.getName());
- CFD.getData().insert({B.getName(), B});
+ FD.getOrder().emplace_back(B.getName());
+ FD.getData().insert({B.getName(), B});
}
Data.getOrder().emplace_back(F.getName());
- Data.getData().insert({F.getName(), CFD});
+ Data.getData().insert({F.getName(), FD});
return true;
}
return false;
@@ -792,7 +839,7 @@ bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) {
return true;
StringRef PassName = PIC->getPassNameForClassName(PassID);
- return llvm::is_contained(printBeforePasses(), PassName);
+ return is_contained(printBeforePasses(), PassName);
}
bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) {
@@ -800,7 +847,7 @@ bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) {
return true;
StringRef PassName = PIC->getPassNameForClassName(PassID);
- return llvm::is_contained(printAfterPasses(), PassName);
+ return is_contained(printAfterPasses(), PassName);
}
void PrintIRInstrumentation::registerCallbacks(
@@ -874,14 +921,13 @@ void PrintPassInstrumentation::registerCallbacks(
SpecialPasses.emplace_back("PassAdaptor");
}
- PIC.registerBeforeSkippedPassCallback(
- [this, SpecialPasses](StringRef PassID, Any IR) {
- assert(!isSpecialPass(PassID, SpecialPasses) &&
- "Unexpectedly skipping special pass");
+ PIC.registerBeforeSkippedPassCallback([this, SpecialPasses](StringRef PassID,
+ Any IR) {
+ assert(!isSpecialPass(PassID, SpecialPasses) &&
+ "Unexpectedly skipping special pass");
- print() << "Skipping pass: " << PassID << " on " << getIRName(IR)
- << "\n";
- });
+ print() << "Skipping pass: " << PassID << " on " << getIRName(IR) << "\n";
+ });
PIC.registerBeforeNonSkippedPassCallback([this, SpecialPasses](
StringRef PassID, Any IR) {
if (isSpecialPass(PassID, SpecialPasses))
@@ -1079,19 +1125,18 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks(
report_fatal_error(Twine("CFG unexpectedly changed by ", Pass));
};
- PIC.registerBeforeNonSkippedPassCallback(
- [this, &FAM](StringRef P, Any IR) {
+ PIC.registerBeforeNonSkippedPassCallback([this, &FAM](StringRef P, Any IR) {
#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
- assert(&PassStack.emplace_back(P));
+ assert(&PassStack.emplace_back(P));
#endif
- (void)this;
- if (!any_isa<const Function *>(IR))
- return;
+ (void)this;
+ if (!any_isa<const Function *>(IR))
+ return;
- const auto *F = any_cast<const Function *>(IR);
- // Make sure a fresh CFG snapshot is available before the pass.
- FAM.getResult<PreservedCFGCheckerAnalysis>(*const_cast<Function *>(F));
- });
+ const auto *F = any_cast<const Function *>(IR);
+ // Make sure a fresh CFG snapshot is available before the pass.
+ FAM.getResult<PreservedCFGCheckerAnalysis>(*const_cast<Function *>(F));
+ });
PIC.registerAfterPassInvalidatedCallback(
[this](StringRef P, const PreservedAnalyses &PassPA) {
@@ -1165,36 +1210,38 @@ void VerifyInstrumentation::registerCallbacks(
InLineChangePrinter::~InLineChangePrinter() {}
void InLineChangePrinter::generateIRRepresentation(Any IR, StringRef PassID,
- ChangedIRData &D) {
- ChangedIRComparer::analyzeIR(IR, D);
+ IRDataT<EmptyData> &D) {
+ IRComparer<EmptyData>::analyzeIR(IR, D);
}
void InLineChangePrinter::handleAfter(StringRef PassID, std::string &Name,
- const ChangedIRData &Before,
- const ChangedIRData &After, Any IR) {
+ const IRDataT<EmptyData> &Before,
+ const IRDataT<EmptyData> &After, Any IR) {
SmallString<20> Banner =
formatv("*** IR Dump After {0} on {1} ***\n", PassID, Name);
Out << Banner;
- ChangedIRComparer(Out, Before, After, UseColour)
- .compare(IR, "", PassID, Name);
+ IRComparer<EmptyData>(Before, After)
+ .compare(getModuleForComparison(IR),
+ [&](bool InModule, unsigned Minor,
+ const FuncDataT<EmptyData> &Before,
+ const FuncDataT<EmptyData> &After) -> void {
+ handleFunctionCompare(Name, "", PassID, " on ", InModule,
+ Minor, Before, After);
+ });
Out << "\n";
}
-bool InLineChangePrinter::same(const ChangedIRData &D1,
- const ChangedIRData &D2) {
- return D1 == D2;
-}
-
-void ChangedIRComparer::handleFunctionCompare(StringRef Name, StringRef Prefix,
- StringRef PassID, bool InModule,
- const ChangedFuncData &Before,
- const ChangedFuncData &After) {
+void InLineChangePrinter::handleFunctionCompare(
+ StringRef Name, StringRef Prefix, StringRef PassID, StringRef Divider,
+ bool InModule, unsigned Minor, const FuncDataT<EmptyData> &Before,
+ const FuncDataT<EmptyData> &After) {
// Print a banner when this is being shown in the context of a module
if (InModule)
Out << "\n*** IR for function " << Name << " ***\n";
- ChangedFuncData::report(
- Before, After, [&](const ChangedBlockData *B, const ChangedBlockData *A) {
+ FuncDataT<EmptyData>::report(
+ Before, After,
+ [&](const BlockDataT<EmptyData> *B, const BlockDataT<EmptyData> *A) {
StringRef BStr = B ? B->getBody() : "\n";
StringRef AStr = A ? A->getBody() : "\n";
const std::string Removed =
@@ -1210,7 +1257,863 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
PrintChanged == ChangePrinter::PrintChangedDiffQuiet ||
PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose ||
PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet)
- TextChangeReporter<ChangedIRData>::registerRequiredCallbacks(PIC);
+ TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);
+}
+
+namespace {
+
+enum IRChangeDiffType { InBefore, InAfter, IsCommon, NumIRChangeDiffTypes };
+
+// Describe where a given element exists.
+std::string Colours[NumIRChangeDiffTypes];
+
+class DisplayNode;
+class DotCfgDiffDisplayGraph;
+
+// Base class for a node or edge in the dot-cfg-changes graph.
+class DisplayElement {
+public:
+ // Is this in before, after, or both?
+ IRChangeDiffType getType() const { return Type; }
+
+protected:
+ DisplayElement(IRChangeDiffType T) : Type(T) {}
+ const IRChangeDiffType Type;
+};
+
+// An edge representing a transition between basic blocks in the
+// dot-cfg-changes graph.
+class DisplayEdge : public DisplayElement {
+public:
+ DisplayEdge(std::string V, DisplayNode &Node, IRChangeDiffType T)
+ : DisplayElement(T), Value(V), Node(Node) {}
+ // The value on which the transition is made.
+ std::string getValue() const { return Value; }
+ // The node (representing a basic block) reached by this transition.
+ const DisplayNode &getDestinationNode() const { return Node; }
+
+protected:
+ std::string Value;
+ const DisplayNode &Node;
+};
+
+// A node in the dot-cfg-changes graph which represents a basic block.
+class DisplayNode : public DisplayElement {
+public:
+ // \p C is the content for the node, \p T indicates the colour for the
+ // outline of the node
+ DisplayNode(std::string C, IRChangeDiffType T)
+ : DisplayElement(T), Content(C) {}
+
+ // Iterator to the child nodes. Required by GraphWriter.
+ using ChildIterator = std::unordered_set<DisplayNode *>::const_iterator;
+ ChildIterator children_begin() const { return Children.cbegin(); }
+ ChildIterator children_end() const { return Children.cend(); }
+
+ // Iterator for the edges. Required by GraphWriter.
+ using EdgeIterator = std::vector<DisplayEdge *>::const_iterator;
+ EdgeIterator edges_begin() const { return EdgePtrs.cbegin(); }
+ EdgeIterator edges_end() const { return EdgePtrs.cend(); }
+
+ // Create an edge to \p Node on value \p V, with type \p T.
+ void createEdge(StringRef V, DisplayNode &Node, IRChangeDiffType T);
+
+ // Return the content of this node.
+ std::string getContent() const { return Content; }
+
+ // Return the type of the edge to node \p S.
+ const DisplayEdge &getEdge(const DisplayNode &To) const {
+ assert(EdgeMap.find(&To) != EdgeMap.end() && "Expected to find edge.");
+ return *EdgeMap.find(&To)->second;
+ }
+
+ // Return the value for the transition to basic block \p S.
+ // Required by GraphWriter.
+ std::string getEdgeSourceLabel(const DisplayNode &Sink) const {
+ return getEdge(Sink).getValue();
+ }
+
+ void createEdgeMap();
+
+protected:
+ const std::string Content;
+
+ // Place to collect all of the edges. Once they are all in the vector,
+ // the vector will not reallocate so then we can use pointers to them,
+ // which are required by the graph writing routines.
+ std::vector<DisplayEdge> Edges;
+
+ std::vector<DisplayEdge *> EdgePtrs;
+ std::unordered_set<DisplayNode *> Children;
+ std::unordered_map<const DisplayNode *, const DisplayEdge *> EdgeMap;
+
+ // Safeguard adding of edges.
+ bool AllEdgesCreated = false;
+};
+
+// Class representing a difference display (corresponds to a pdf file).
+class DotCfgDiffDisplayGraph {
+public:
+ DotCfgDiffDisplayGraph(std::string Name) : GraphName(Name) {}
+
+ // Generate the file into \p DotFile.
+ void generateDotFile(StringRef DotFile);
+
+ // Iterator to the nodes. Required by GraphWriter.
+ using NodeIterator = std::vector<DisplayNode *>::const_iterator;
+ NodeIterator nodes_begin() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return NodePtrs.cbegin();
+ }
+ NodeIterator nodes_end() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return NodePtrs.cend();
+ }
+
+ // Record the index of the entry node. At this point, we can build up
+ // vectors of pointers that are required by the graph routines.
+ void setEntryNode(unsigned N) {
+ // At this point, there will be no new nodes.
+ assert(!NodeGenerationComplete && "Unexpected node creation");
+ NodeGenerationComplete = true;
+ for (auto &N : Nodes)
+ NodePtrs.emplace_back(&N);
+
+ EntryNode = NodePtrs[N];
+ }
+
+ // Create a node.
+ void createNode(std::string C, IRChangeDiffType T) {
+ assert(!NodeGenerationComplete && "Unexpected node creation");
+ Nodes.emplace_back(C, T);
+ }
+ // Return the node at index \p N to avoid problems with vectors reallocating.
+ DisplayNode &getNode(unsigned N) {
+ assert(N < Nodes.size() && "Node is out of bounds");
+ return Nodes[N];
+ }
+ unsigned size() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return Nodes.size();
+ }
+
+ // Return the name of the graph. Required by GraphWriter.
+ std::string getGraphName() const { return GraphName; }
+
+ // Return the string representing the differences for basic block \p Node.
+ // Required by GraphWriter.
+ std::string getNodeLabel(const DisplayNode &Node) const {
+ return Node.getContent();
+ }
+
+ // Return a string with colour information for Dot. Required by GraphWriter.
+ std::string getNodeAttributes(const DisplayNode &Node) const {
+ return attribute(Node.getType());
+ }
+
+ // Return a string with colour information for Dot. Required by GraphWriter.
+ std::string getEdgeColorAttr(const DisplayNode &From,
+ const DisplayNode &To) const {
+ return attribute(From.getEdge(To).getType());
+ }
+
+ // Get the starting basic block. Required by GraphWriter.
+ DisplayNode *getEntryNode() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return EntryNode;
+ }
+
+protected:
+ // Return the string containing the colour to use as a Dot attribute.
+ std::string attribute(IRChangeDiffType T) const;
+
+ bool NodeGenerationComplete = false;
+ const std::string GraphName;
+ std::vector<DisplayNode> Nodes;
+ std::vector<DisplayNode *> NodePtrs;
+ DisplayNode *EntryNode = nullptr;
+};
+
+void DisplayNode::createEdge(StringRef V, DisplayNode &Node,
+ IRChangeDiffType T) {
+ assert(!AllEdgesCreated && "Expected to be able to still create edges.");
+ Edges.emplace_back(V.str(), Node, T);
+ Children.insert(&Node);
+}
+
+void DisplayNode::createEdgeMap() {
+ // No more edges will be added so we can now use pointers to the edges
+ // as the vector will not grow and reallocate.
+ AllEdgesCreated = true;
+ for (auto &E : Edges)
+ EdgeMap.insert({&E.getDestinationNode(), &E});
+}
+
+class DotCfgDiffNode;
+class DotCfgDiff;
+
+// A class representing a basic block in the Dot difference graph.
+class DotCfgDiffNode {
+public:
+ DotCfgDiffNode() = delete;
+
+ // Create a node in Dot difference graph \p G representing the basic block
+ // represented by \p BD with type \p T (where it exists).
+ DotCfgDiffNode(DotCfgDiff &G, unsigned N, const BlockDataT<DCData> &BD,
+ IRChangeDiffType T)
+ : Graph(G), N(N), Data{&BD, nullptr}, Type(T) {}
+ DotCfgDiffNode(const DotCfgDiffNode &DN)
+ : Graph(DN.Graph), N(DN.N), Data{DN.Data[0], DN.Data[1]}, Type(DN.Type),
+ EdgesMap(DN.EdgesMap), Children(DN.Children), Edges(DN.Edges) {}
+
+ unsigned getIndex() const { return N; }
+
+ // The label of the basic block
+ StringRef getLabel() const {
+ assert(Data[0] && "Expected Data[0] to be set.");
+ return Data[0]->getLabel();
+ }
+ // Return where this block exists.
+ IRChangeDiffType getType() const { return Type; }
+ // Change this basic block from being only in before to being common.
+ // Save the pointer to \p Other.
+ void setCommon(const BlockDataT<DCData> &Other) {
+ assert(!Data[1] && "Expected only one block datum");
+ Data[1] = &Other;
+ Type = IsCommon;
+ }
+ // Add an edge to \p E of type {\p Value, \p T}.
+ void addEdge(unsigned E, StringRef Value, IRChangeDiffType T) {
+ // This is a new edge or it is an edge being made common.
+ assert((EdgesMap.count(E) == 0 || T == IsCommon) &&
+ "Unexpected edge count and type.");
+ EdgesMap[E] = {Value.str(), T};
+ }
+ // Record the children and create edges.
+ void finalize(DotCfgDiff &G);
+
+ // Return the type of the edge to node \p S.
+ std::pair<std::string, IRChangeDiffType> getEdge(const unsigned S) const {
+ assert(EdgesMap.count(S) == 1 && "Expected to find edge.");
+ return EdgesMap.at(S);
+ }
+
+ // Return the string representing the basic block.
+ std::string getBodyContent() const;
+
+ void createDisplayEdges(DotCfgDiffDisplayGraph &Graph, unsigned DisplayNode,
+ std::map<const unsigned, unsigned> &NodeMap) const;
+
+protected:
+ DotCfgDiff &Graph;
+ const unsigned N;
+ const BlockDataT<DCData> *Data[2];
+ IRChangeDiffType Type;
+ std::map<const unsigned, std::pair<std::string, IRChangeDiffType>> EdgesMap;
+ std::vector<unsigned> Children;
+ std::vector<unsigned> Edges;
+};
+
+// Class representing the difference graph between two functions.
+class DotCfgDiff {
+public:
+ // \p Title is the title given to the graph. \p EntryNodeName is the
+ // entry node for the function. \p Before and \p After are the before
+ // after versions of the function, respectively. \p Dir is the directory
+ // in which to store the results.
+ DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After);
+
+ DotCfgDiff(const DotCfgDiff &) = delete;
+ DotCfgDiff &operator=(const DotCfgDiff &) = delete;
+
+ DotCfgDiffDisplayGraph createDisplayGraph(StringRef Title,
+ StringRef EntryNodeName);
+
+ // Return a string consisting of the labels for the \p Source and \p Sink.
+ // The combination allows distinguishing changing transitions on the
+ // same value (ie, a transition went to X before and goes to Y after).
+ // Required by GraphWriter.
+ StringRef getEdgeSourceLabel(const unsigned &Source,
+ const unsigned &Sink) const {
+ std::string S =
+ getNode(Source).getLabel().str() + " " + getNode(Sink).getLabel().str();
+ assert(EdgeLabels.count(S) == 1 && "Expected to find edge label.");
+ return EdgeLabels.find(S)->getValue();
+ }
+
+ // Return the number of basic blocks (nodes). Required by GraphWriter.
+ unsigned size() const { return Nodes.size(); }
+
+ const DotCfgDiffNode &getNode(unsigned N) const {
+ assert(N < Nodes.size() && "Unexpected index for node reference");
+ return Nodes[N];
+ }
+
+protected:
+ // Return the string surrounded by HTML to make it the appropriate colour.
+ std::string colourize(std::string S, IRChangeDiffType T) const;
+
+ void createNode(StringRef Label, const BlockDataT<DCData> &BD,
+ IRChangeDiffType T) {
+ unsigned Pos = Nodes.size();
+ Nodes.emplace_back(*this, Pos, BD, T);
+ NodePosition.insert({Label, Pos});
+ }
+
+ // TODO Nodes should probably be a StringMap<DotCfgDiffNode> after the
+ // display graph is separated out, which would remove the need for
+ // NodePosition.
+ std::vector<DotCfgDiffNode> Nodes;
+ StringMap<unsigned> NodePosition;
+ const std::string GraphName;
+
+ StringMap<std::string> EdgeLabels;
+};
+
+std::string DotCfgDiffNode::getBodyContent() const {
+ if (Type == IsCommon) {
+ assert(Data[1] && "Expected Data[1] to be set.");
+
+ StringRef SR[2];
+ for (unsigned I = 0; I < 2; ++I) {
+ SR[I] = Data[I]->getBody();
+ // drop initial '\n' if present
+ if (SR[I][0] == '\n')
+ SR[I] = SR[I].drop_front();
+ // drop predecessors as they can be big and are redundant
+ SR[I] = SR[I].drop_until([](char C) { return C == '\n'; }).drop_front();
+ }
+
+ SmallString<80> OldLineFormat = formatv(
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[InBefore]);
+ SmallString<80> NewLineFormat = formatv(
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[InAfter]);
+ SmallString<80> UnchangedLineFormat = formatv(
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[IsCommon]);
+ std::string Diff = Data[0]->getLabel().str();
+ Diff += ":\n<BR align=\"left\"/>" +
+ doSystemDiff(makeHTMLReady(SR[0]), makeHTMLReady(SR[1]),
+ OldLineFormat, NewLineFormat, UnchangedLineFormat);
+
+ // Diff adds in some empty colour changes which are not valid HTML
+ // so remove them. Colours are all lowercase alpha characters (as
+ // listed in https://graphviz.org/pdf/dotguide.pdf).
+ Regex R("<FONT COLOR=\"\\w+\"></FONT>");
+ while (true) {
+ std::string Error;
+ std::string S = R.sub("", Diff, &Error);
+ if (Error != "")
+ return Error;
+ if (S == Diff)
+ return Diff;
+ Diff = S;
+ }
+ llvm_unreachable("Should not get here");
+ }
+
+ // Put node out in the appropriate colour.
+ assert(!Data[1] && "Data[1] is set unexpectedly.");
+ std::string Body = makeHTMLReady(Data[0]->getBody());
+ const StringRef BS = Body;
+ StringRef BS1 = BS;
+ // Drop leading newline, if present.
+ if (BS.front() == '\n')
+ BS1 = BS1.drop_front(1);
+ // Get label.
+ StringRef Label = BS1.take_until([](char C) { return C == ':'; });
+ // drop predecessors as they can be big and are redundant
+ BS1 = BS1.drop_until([](char C) { return C == '\n'; }).drop_front();
+
+ std::string S = "<FONT COLOR=\"" + Colours[Type] + "\">" + Label.str() + ":";
+
+ // align each line to the left.
+ while (BS1.size()) {
+ S.append("<BR align=\"left\"/>");
+ StringRef Line = BS1.take_until([](char C) { return C == '\n'; });
+ S.append(Line.str());
+ BS1 = BS1.drop_front(Line.size() + 1);
+ }
+ S.append("<BR align=\"left\"/></FONT>");
+ return S;
+}
+
+std::string DotCfgDiff::colourize(std::string S, IRChangeDiffType T) const {
+ if (S.length() == 0)
+ return S;
+ return "<FONT COLOR=\"" + Colours[T] + "\">" + S + "</FONT>";
+}
+
+std::string DotCfgDiffDisplayGraph::attribute(IRChangeDiffType T) const {
+ return "color=" + Colours[T];
+}
+
+DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After)
+ : GraphName(Title.str()) {
+ StringMap<IRChangeDiffType> EdgesMap;
+
+ // Handle each basic block in the before IR.
+ for (auto &B : Before.getData()) {
+ StringRef Label = B.getKey();
+ const BlockDataT<DCData> &BD = B.getValue();
+ createNode(Label, BD, InBefore);
+
+ // Create transitions with names made up of the from block label, the value
+ // on which the transition is made and the to block label.
+ for (StringMap<std::string>::const_iterator Sink = BD.getData().begin(),
+ E = BD.getData().end();
+ Sink != E; ++Sink) {
+ std::string Key = (Label + " " + Sink->getKey().str()).str() + " " +
+ BD.getData().getSuccessorLabel(Sink->getKey()).str();
+ EdgesMap.insert({Key, InBefore});
+ }
+ }
+
+ // Handle each basic block in the after IR
+ for (auto &A : After.getData()) {
+ StringRef Label = A.getKey();
+ const BlockDataT<DCData> &BD = A.getValue();
+ unsigned C = NodePosition.count(Label);
+ if (C == 0)
+ // This only exists in the after IR. Create the node.
+ createNode(Label, BD, InAfter);
+ else {
+ assert(C == 1 && "Unexpected multiple nodes.");
+ Nodes[NodePosition[Label]].setCommon(BD);
+ }
+ // Add in the edges between the nodes (as common or only in after).
+ for (StringMap<std::string>::const_iterator Sink = BD.getData().begin(),
+ E = BD.getData().end();
+ Sink != E; ++Sink) {
+ std::string Key = (Label + " " + Sink->getKey().str()).str() + " " +
+ BD.getData().getSuccessorLabel(Sink->getKey()).str();
+ unsigned C = EdgesMap.count(Key);
+ if (C == 0)
+ EdgesMap.insert({Key, InAfter});
+ else {
+ EdgesMap[Key] = IsCommon;
+ }
+ }
+ }
+
+ // Now go through the map of edges and add them to the node.
+ for (auto &E : EdgesMap) {
+ // Extract the source, sink and value from the edge key.
+ StringRef S = E.getKey();
+ auto SP1 = S.rsplit(' ');
+ auto &SourceSink = SP1.first;
+ auto SP2 = SourceSink.split(' ');
+ StringRef Source = SP2.first;
+ StringRef Sink = SP2.second;
+ StringRef Value = SP1.second;
+
+ assert(NodePosition.count(Source) == 1 && "Expected to find node.");
+ DotCfgDiffNode &SourceNode = Nodes[NodePosition[Source]];
+ assert(NodePosition.count(Sink) == 1 && "Expected to find node.");
+ unsigned SinkNode = NodePosition[Sink];
+ IRChangeDiffType T = E.second;
+
+ // Look for an edge from Source to Sink
+ if (EdgeLabels.count(SourceSink) == 0)
+ EdgeLabels.insert({SourceSink, colourize(Value.str(), T)});
+ else {
+ StringRef V = EdgeLabels.find(SourceSink)->getValue();
+ std::string NV = colourize(V.str() + " " + Value.str(), T);
+ T = IsCommon;
+ EdgeLabels[SourceSink] = NV;
+ }
+ SourceNode.addEdge(SinkNode, Value, T);
+ }
+ for (auto &I : Nodes)
+ I.finalize(*this);
+}
+
+DotCfgDiffDisplayGraph DotCfgDiff::createDisplayGraph(StringRef Title,
+ StringRef EntryNodeName) {
+ assert(NodePosition.count(EntryNodeName) == 1 &&
+ "Expected to find entry block in map.");
+ unsigned Entry = NodePosition[EntryNodeName];
+ assert(Entry < Nodes.size() && "Expected to find entry node");
+ DotCfgDiffDisplayGraph G(Title.str());
+
+ std::map<const unsigned, unsigned> NodeMap;
+
+ int EntryIndex = -1;
+ unsigned Index = 0;
+ for (auto &I : Nodes) {
+ if (I.getIndex() == Entry)
+ EntryIndex = Index;
+ G.createNode(I.getBodyContent(), I.getType());
+ NodeMap.insert({I.getIndex(), Index++});
+ }
+ assert(EntryIndex >= 0 && "Expected entry node index to be set.");
+ G.setEntryNode(EntryIndex);
+
+ for (auto &I : NodeMap) {
+ unsigned SourceNode = I.first;
+ unsigned DisplayNode = I.second;
+ getNode(SourceNode).createDisplayEdges(G, DisplayNode, NodeMap);
+ }
+ return G;
+}
+
+void DotCfgDiffNode::createDisplayEdges(
+ DotCfgDiffDisplayGraph &DisplayGraph, unsigned DisplayNodeIndex,
+ std::map<const unsigned, unsigned> &NodeMap) const {
+
+ DisplayNode &SourceDisplayNode = DisplayGraph.getNode(DisplayNodeIndex);
+
+ for (auto I : Edges) {
+ unsigned SinkNodeIndex = I;
+ IRChangeDiffType Type = getEdge(SinkNodeIndex).second;
+ const DotCfgDiffNode *SinkNode = &Graph.getNode(SinkNodeIndex);
+
+ StringRef Label = Graph.getEdgeSourceLabel(getIndex(), SinkNodeIndex);
+ DisplayNode &SinkDisplayNode = DisplayGraph.getNode(SinkNode->getIndex());
+ SourceDisplayNode.createEdge(Label, SinkDisplayNode, Type);
+ }
+ SourceDisplayNode.createEdgeMap();
+}
+
+void DotCfgDiffNode::finalize(DotCfgDiff &G) {
+ for (auto E : EdgesMap) {
+ Children.emplace_back(E.first);
+ Edges.emplace_back(E.first);
+ }
+}
+
+} // namespace
+
+namespace llvm {
+
+template <> struct GraphTraits<DotCfgDiffDisplayGraph *> {
+ using NodeRef = const DisplayNode *;
+ using ChildIteratorType = DisplayNode::ChildIterator;
+ using nodes_iterator = DotCfgDiffDisplayGraph::NodeIterator;
+ using EdgeRef = const DisplayEdge *;
+ using ChildEdgeIterator = DisplayNode::EdgeIterator;
+
+ static NodeRef getEntryNode(const DotCfgDiffDisplayGraph *G) {
+ return G->getEntryNode();
+ }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return N->children_begin();
+ }
+ static ChildIteratorType child_end(NodeRef N) { return N->children_end(); }
+ static nodes_iterator nodes_begin(const DotCfgDiffDisplayGraph *G) {
+ return G->nodes_begin();
+ }
+ static nodes_iterator nodes_end(const DotCfgDiffDisplayGraph *G) {
+ return G->nodes_end();
+ }
+ static ChildEdgeIterator child_edge_begin(NodeRef N) {
+ return N->edges_begin();
+ }
+ static ChildEdgeIterator child_edge_end(NodeRef N) { return N->edges_end(); }
+ static NodeRef edge_dest(EdgeRef E) { return &E->getDestinationNode(); }
+ static unsigned size(const DotCfgDiffDisplayGraph *G) { return G->size(); }
+};
+
+template <>
+struct DOTGraphTraits<DotCfgDiffDisplayGraph *> : public DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool Simple = false)
+ : DefaultDOTGraphTraits(Simple) {}
+
+ static bool renderNodesUsingHTML() { return true; }
+ static std::string getGraphName(const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getGraphName();
+ }
+ static std::string
+ getGraphProperties(const DotCfgDiffDisplayGraph *DiffData) {
+ return "\tsize=\"190, 190\";\n";
+ }
+ static std::string getNodeLabel(const DisplayNode *Node,
+ const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getNodeLabel(*Node);
+ }
+ static std::string getNodeAttributes(const DisplayNode *Node,
+ const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getNodeAttributes(*Node);
+ }
+ static std::string getEdgeSourceLabel(const DisplayNode *From,
+ DisplayNode::ChildIterator &To) {
+ return From->getEdgeSourceLabel(**To);
+ }
+ static std::string getEdgeAttributes(const DisplayNode *From,
+ DisplayNode::ChildIterator &To,
+ const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getEdgeColorAttr(*From, **To);
+ }
+};
+
+} // namespace llvm
+
+namespace {
+
+void DotCfgDiffDisplayGraph::generateDotFile(StringRef DotFile) {
+ std::error_code EC;
+ raw_fd_ostream OutStream(DotFile, EC);
+ if (EC) {
+ errs() << "Error: " << EC.message() << "\n";
+ return;
+ }
+ WriteGraph(OutStream, this, false);
+ OutStream.flush();
+ OutStream.close();
+}
+
+} // namespace
+
+namespace llvm {
+
+DCData::DCData(const BasicBlock &B) {
+ // Build up transition labels.
+ const Instruction *Term = B.getTerminator();
+ if (const BranchInst *Br = dyn_cast<const BranchInst>(Term))
+ if (Br->isUnconditional())
+ addSuccessorLabel(Br->getSuccessor(0)->getName().str(), "");
+ else {
+ addSuccessorLabel(Br->getSuccessor(0)->getName().str(), "true");
+ addSuccessorLabel(Br->getSuccessor(1)->getName().str(), "false");
+ }
+ else if (const SwitchInst *Sw = dyn_cast<const SwitchInst>(Term)) {
+ addSuccessorLabel(Sw->case_default()->getCaseSuccessor()->getName().str(),
+ "default");
+ for (auto &C : Sw->cases()) {
+ assert(C.getCaseValue() && "Expected to find case value.");
+ SmallString<20> Value = formatv("{0}", C.getCaseValue()->getSExtValue());
+ addSuccessorLabel(C.getCaseSuccessor()->getName().str(), Value);
+ }
+ } else
+ for (const_succ_iterator I = succ_begin(&B), E = succ_end(&B); I != E; ++I)
+ addSuccessorLabel((*I)->getName().str(), "");
+}
+
+DotCfgChangeReporter::DotCfgChangeReporter(bool Verbose)
+ : ChangeReporter<IRDataT<DCData>>(Verbose) {
+ // Set up the colours based on the hidden options.
+ Colours[InBefore] = BeforeColour;
+ Colours[InAfter] = AfterColour;
+ Colours[IsCommon] = CommonColour;
+}
+
+void DotCfgChangeReporter::handleFunctionCompare(
+ StringRef Name, StringRef Prefix, StringRef PassID, StringRef Divider,
+ bool InModule, unsigned Minor, const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<8> Extender;
+ SmallString<8> Number;
+ // Handle numbering and file names.
+ if (InModule) {
+ Extender = formatv("{0}_{1}", N, Minor);
+ Number = formatv("{0}.{1}", N, Minor);
+ } else {
+ Extender = formatv("{0}", N);
+ Number = formatv("{0}", N);
+ }
+ // Create a temporary file name for the dot file.
+ SmallVector<char, 128> SV;
+ sys::fs::createUniquePath("cfgdot-%%%%%%.dot", SV, true);
+ std::string DotFile = Twine(SV).str();
+
+ SmallString<20> PDFFileName = formatv("diff_{0}.pdf", Extender);
+ SmallString<200> Text;
+
+ Text = formatv("{0}.{1}{2}{3}{4}", Number, Prefix, makeHTMLReady(PassID),
+ Divider, Name);
+
+ DotCfgDiff Diff(Text, Before, After);
+ std::string EntryBlockName = After.getEntryBlockName();
+ // Use the before entry block if the after entry block was removed.
+ if (EntryBlockName == "")
+ EntryBlockName = Before.getEntryBlockName();
+ assert(EntryBlockName != "" && "Expected to find entry block");
+
+ DotCfgDiffDisplayGraph DG = Diff.createDisplayGraph(Text, EntryBlockName);
+ DG.generateDotFile(DotFile);
+
+ *HTML << genHTML(Text, DotFile, PDFFileName);
+ std::error_code EC = sys::fs::remove(DotFile);
+ if (EC)
+ errs() << "Error: " << EC.message() << "\n";
+}
+
+std::string DotCfgChangeReporter::genHTML(StringRef Text, StringRef DotFile,
+ StringRef PDFFileName) {
+ SmallString<20> PDFFile = formatv("{0}/{1}", DotCfgDir, PDFFileName);
+ // Create the PDF file.
+ static ErrorOr<std::string> DotExe = sys::findProgramByName(DotBinary);
+ if (!DotExe)
+ return "Unable to find dot executable.";
+
+ StringRef Args[] = {DotBinary, "-Tpdf", "-o", PDFFile, DotFile};
+ int Result = sys::ExecuteAndWait(*DotExe, Args, None);
+ if (Result < 0)
+ return "Error executing system dot.";
+
+ // Create the HTML tag refering to the PDF file.
+ SmallString<200> S = formatv(
+ " <a href=\"{0}\" target=\"_blank\">{1}</a><br/>\n", PDFFileName, Text);
+ return S.c_str();
+}
+
+void DotCfgChangeReporter::handleInitialIR(Any IR) {
+ assert(HTML && "Expected outstream to be set");
+ *HTML << "<button type=\"button\" class=\"collapsible\">0. "
+ << "Initial IR (by function)</button>\n"
+ << "<div class=\"content\">\n"
+ << " <p>\n";
+ // Create representation of IR
+ IRDataT<DCData> Data;
+ IRComparer<DCData>::analyzeIR(IR, Data);
+ // Now compare it against itself, which will have everything the
+ // same and will generate the files.
+ IRComparer<DCData>(Data, Data)
+ .compare(getModuleForComparison(IR),
+ [&](bool InModule, unsigned Minor,
+ const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After) -> void {
+ handleFunctionCompare("", " ", "Initial IR", "", InModule,
+ Minor, Before, After);
+ });
+ *HTML << " </p>\n"
+ << "</div><br/>\n";
+ ++N;
+}
+
+void DotCfgChangeReporter::generateIRRepresentation(Any IR, StringRef PassID,
+ IRDataT<DCData> &Data) {
+ IRComparer<DCData>::analyzeIR(IR, Data);
+}
+
+void DotCfgChangeReporter::omitAfter(StringRef PassID, std::string &Name) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner =
+ formatv(" <a>{0}. Pass {1} on {2} omitted because no change</a><br/>\n",
+ N, makeHTMLReady(PassID), Name);
+ *HTML << Banner;
+ ++N;
+}
+
+void DotCfgChangeReporter::handleAfter(StringRef PassID, std::string &Name,
+ const IRDataT<DCData> &Before,
+ const IRDataT<DCData> &After, Any IR) {
+ assert(HTML && "Expected outstream to be set");
+ IRComparer<DCData>(Before, After)
+ .compare(getModuleForComparison(IR),
+ [&](bool InModule, unsigned Minor,
+ const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After) -> void {
+ handleFunctionCompare(Name, " Pass ", PassID, " on ", InModule,
+ Minor, Before, After);
+ });
+ *HTML << " </p></div>\n";
+ ++N;
+}
+
+void DotCfgChangeReporter::handleInvalidated(StringRef PassID) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner =
+ formatv(" <a>{0}. {1} invalidated</a><br/>\n", N, makeHTMLReady(PassID));
+ *HTML << Banner;
+ ++N;
+}
+
+void DotCfgChangeReporter::handleFiltered(StringRef PassID, std::string &Name) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner =
+ formatv(" <a>{0}. Pass {1} on {2} filtered out</a><br/>\n", N,
+ makeHTMLReady(PassID), Name);
+ *HTML << Banner;
+ ++N;
+}
+
+void DotCfgChangeReporter::handleIgnored(StringRef PassID, std::string &Name) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner = formatv(" <a>{0}. {1} on {2} ignored</a><br/>\n", N,
+ makeHTMLReady(PassID), Name);
+ *HTML << Banner;
+ ++N;
+}
+
+bool DotCfgChangeReporter::initializeHTML() {
+ std::error_code EC;
+ HTML = std::make_unique<raw_fd_ostream>(DotCfgDir + "/passes.html", EC);
+ if (EC) {
+ HTML = nullptr;
+ return false;
+ }
+
+ *HTML << "<!doctype html>"
+ << "<html>"
+ << "<head>"
+ << "<style>.collapsible { "
+ << "background-color: #777;"
+ << " color: white;"
+ << " cursor: pointer;"
+ << " padding: 18px;"
+ << " width: 100%;"
+ << " border: none;"
+ << " text-align: left;"
+ << " outline: none;"
+ << " font-size: 15px;"
+ << "} .active, .collapsible:hover {"
+ << " background-color: #555;"
+ << "} .content {"
+ << " padding: 0 18px;"
+ << " display: none;"
+ << " overflow: hidden;"
+ << " background-color: #f1f1f1;"
+ << "}"
+ << "</style>"
+ << "<title>passes.html</title>"
+ << "</head>\n"
+ << "<body>";
+ return true;
+}
+
+DotCfgChangeReporter::~DotCfgChangeReporter() {
+ if (!HTML)
+ return;
+ *HTML
+ << "<script>var coll = document.getElementsByClassName(\"collapsible\");"
+ << "var i;"
+ << "for (i = 0; i < coll.length; i++) {"
+ << "coll[i].addEventListener(\"click\", function() {"
+ << " this.classList.toggle(\"active\");"
+ << " var content = this.nextElementSibling;"
+ << " if (content.style.display === \"block\"){"
+ << " content.style.display = \"none\";"
+ << " }"
+ << " else {"
+ << " content.style.display= \"block\";"
+ << " }"
+ << " });"
+ << " }"
+ << "</script>"
+ << "</body>"
+ << "</html>\n";
+ HTML->flush();
+ HTML->close();
+}
+
+void DotCfgChangeReporter::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ if ((PrintChanged == ChangePrinter::PrintChangedDotCfgVerbose ||
+ PrintChanged == ChangePrinter::PrintChangedDotCfgQuiet)) {
+ SmallString<128> OutputDir;
+ sys::fs::expand_tilde(DotCfgDir, OutputDir);
+ sys::fs::make_absolute(OutputDir);
+ assert(!OutputDir.empty() && "expected output dir to be non-empty");
+ DotCfgDir = OutputDir.c_str();
+ if (initializeHTML()) {
+ ChangeReporter<IRDataT<DCData>>::registerRequiredCallbacks(PIC);
+ return;
+ }
+ dbgs() << "Unable to open output stream for -cfg-dot-changed\n";
+ }
}
StandardInstrumentations::StandardInstrumentations(
@@ -1222,6 +2125,8 @@ StandardInstrumentations::StandardInstrumentations(
PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose,
PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose ||
PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet),
+ WebsiteChangeReporter(PrintChanged ==
+ ChangePrinter::PrintChangedDotCfgVerbose),
Verify(DebugLogging), VerifyEach(VerifyEach) {}
void StandardInstrumentations::registerCallbacks(
@@ -1238,14 +2143,17 @@ void StandardInstrumentations::registerCallbacks(
if (VerifyEach)
Verify.registerCallbacks(PIC);
PrintChangedDiff.registerCallbacks(PIC);
+ WebsiteChangeReporter.registerCallbacks(PIC);
}
-namespace llvm {
-
template class ChangeReporter<std::string>;
template class TextChangeReporter<std::string>;
-template class ChangeReporter<ChangedIRData>;
-template class TextChangeReporter<ChangedIRData>;
+template class BlockDataT<EmptyData>;
+template class FuncDataT<EmptyData>;
+template class IRDataT<EmptyData>;
+template class ChangeReporter<IRDataT<EmptyData>>;
+template class TextChangeReporter<IRDataT<EmptyData>>;
+template class IRComparer<EmptyData>;
} // namespace llvm
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 9fffb249e72d..94bd4807041d 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -567,7 +567,8 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
if (Error Err = CFR->template getFuncName<Endian>(ProfileNames, FuncName))
return Err;
if (FuncName.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "function name is empty");
++CovMapNumUsedRecords;
Records.emplace_back(Version, FuncName, FuncHash, Mapping,
FileRange.StartingIndex, FileRange.Length);
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index a83b56ed67f1..1168ad27fe52 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -74,53 +74,82 @@ static cl::opt<unsigned> StaticFuncStripDirNamePrefix(
cl::desc("Strip specified level of directory name from source path in "
"the profile counter name for static functions."));
-static std::string getInstrProfErrString(instrprof_error Err) {
+static std::string getInstrProfErrString(instrprof_error Err,
+ const std::string &ErrMsg = "") {
+ std::string Msg;
+ raw_string_ostream OS(Msg);
+
switch (Err) {
case instrprof_error::success:
- return "success";
+ OS << "success";
+ break;
case instrprof_error::eof:
- return "end of File";
+ OS << "end of File";
+ break;
case instrprof_error::unrecognized_format:
- return "unrecognized instrumentation profile encoding format";
+ OS << "unrecognized instrumentation profile encoding format";
+ break;
case instrprof_error::bad_magic:
- return "invalid instrumentation profile data (bad magic)";
+ OS << "invalid instrumentation profile data (bad magic)";
+ break;
case instrprof_error::bad_header:
- return "invalid instrumentation profile data (file header is corrupt)";
+ OS << "invalid instrumentation profile data (file header is corrupt)";
+ break;
case instrprof_error::unsupported_version:
- return "unsupported instrumentation profile format version";
+ OS << "unsupported instrumentation profile format version";
+ break;
case instrprof_error::unsupported_hash_type:
- return "unsupported instrumentation profile hash type";
+ OS << "unsupported instrumentation profile hash type";
+ break;
case instrprof_error::too_large:
- return "too much profile data";
+ OS << "too much profile data";
+ break;
case instrprof_error::truncated:
- return "truncated profile data";
+ OS << "truncated profile data";
+ break;
case instrprof_error::malformed:
- return "malformed instrumentation profile data";
+ OS << "malformed instrumentation profile data";
+ break;
case instrprof_error::invalid_prof:
- return "invalid profile created. Please file a bug "
- "at: " BUG_REPORT_URL
- " and include the profraw files that caused this error.";
+ OS << "invalid profile created. Please file a bug "
+ "at: " BUG_REPORT_URL
+ " and include the profraw files that caused this error.";
+ break;
case instrprof_error::unknown_function:
- return "no profile data available for function";
+ OS << "no profile data available for function";
+ break;
case instrprof_error::hash_mismatch:
- return "function control flow change detected (hash mismatch)";
+ OS << "function control flow change detected (hash mismatch)";
+ break;
case instrprof_error::count_mismatch:
- return "function basic block count change detected (counter mismatch)";
+ OS << "function basic block count change detected (counter mismatch)";
+ break;
case instrprof_error::counter_overflow:
- return "counter overflow";
+ OS << "counter overflow";
+ break;
case instrprof_error::value_site_count_mismatch:
- return "function value site count change detected (counter mismatch)";
+ OS << "function value site count change detected (counter mismatch)";
+ break;
case instrprof_error::compress_failed:
- return "failed to compress data (zlib)";
+ OS << "failed to compress data (zlib)";
+ break;
case instrprof_error::uncompress_failed:
- return "failed to uncompress data (zlib)";
+ OS << "failed to uncompress data (zlib)";
+ break;
case instrprof_error::empty_raw_profile:
- return "empty raw profile file";
+ OS << "empty raw profile file";
+ break;
case instrprof_error::zlib_unavailable:
- return "profile uses zlib compression but the profile reader was built "
- "without zlib support";
+ OS << "profile uses zlib compression but the profile reader was built "
+ "without zlib support";
+ break;
}
- llvm_unreachable("A value of instrprof_error has no message.");
+
+ // If optional error message is not empty, append it to the message.
+ if (!ErrMsg.empty())
+ OS << ": " << ErrMsg;
+
+ return OS.str();
}
namespace {
@@ -217,7 +246,7 @@ void SoftInstrProfErrors::addError(instrprof_error IE) {
}
std::string InstrProfError::message() const {
- return getInstrProfErrString(Err);
+ return getInstrProfErrString(Err, Msg);
}
char InstrProfError::ID = 0;
@@ -878,18 +907,23 @@ static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
Error ValueProfData::checkIntegrity() {
if (NumValueKinds > IPVK_Last + 1)
- return make_error<InstrProfError>(instrprof_error::malformed);
- // Total size needs to be mulltiple of quadword size.
+ return make_error<InstrProfError>(
+ instrprof_error::malformed, "number of value profile kinds is invalid");
+ // Total size needs to be multiple of quadword size.
if (TotalSize % sizeof(uint64_t))
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(
+ instrprof_error::malformed, "total size is not multiples of quardword");
ValueProfRecord *VR = getFirstValueProfRecord(this);
for (uint32_t K = 0; K < this->NumValueKinds; K++) {
if (VR->Kind > IPVK_Last)
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "value kind is invalid");
VR = getValueProfRecordNext(VR);
if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize)
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "value profile address is greater than total size");
}
return Error::success();
}
@@ -1098,10 +1132,14 @@ bool needsComdatForCounter(const Function &F, const Module &M) {
bool isIRPGOFlagSet(const Module *M) {
auto IRInstrVar =
M->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
- if (!IRInstrVar || IRInstrVar->isDeclaration() ||
- IRInstrVar->hasLocalLinkage())
+ if (!IRInstrVar || IRInstrVar->hasLocalLinkage())
return false;
+ // For CSPGO+LTO, this variable might be marked as non-prevailing and we only
+ // have the decl.
+ if (IRInstrVar->isDeclaration())
+ return true;
+
// Check if the flag is set.
if (!IRInstrVar->hasInitializer())
return false;
@@ -1137,8 +1175,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled) {
+GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
+ bool InstrEntryBBEnabled) {
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
Type *IntTy64 = Type::getInt64Ty(M.getContext());
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
@@ -1155,6 +1193,7 @@ void createIRLevelProfileFlagVar(Module &M, bool IsCS,
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
}
+ return IRLevelVersionVariable;
}
// Create the variable for the profile file name.
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 8a4470ae207d..b4e8025dbef9 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -204,13 +204,15 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
return success();
}
if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "number of value kinds is invalid");
Line++;
for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
VP_READ_ADVANCE(ValueKind);
if (ValueKind > IPVK_Last)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "value kind is invalid");
+ ;
VP_READ_ADVANCE(NumValueSites);
if (!NumValueSites)
continue;
@@ -268,16 +270,18 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
if (Line.is_at_end())
return error(instrprof_error::truncated);
if ((Line++)->getAsInteger(0, Record.Hash))
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "function hash is not a valid integer");
// Read the number of counters.
uint64_t NumCounters;
if (Line.is_at_end())
return error(instrprof_error::truncated);
if ((Line++)->getAsInteger(10, NumCounters))
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "number of counters is not a valid integer");
if (NumCounters == 0)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "number of counters is zero");
// Read each counter and fill our internal storage with the values.
Record.Clear();
@@ -287,7 +291,7 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
return error(instrprof_error::truncated);
uint64_t Count;
if ((Line++)->getAsInteger(10, Count))
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "count is invalid");
Record.Counts.push_back(Count);
}
@@ -332,10 +336,12 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
// If there isn't enough space for another header, this is probably just
// garbage at the end of the file.
if (CurrentPos + sizeof(RawInstrProf::Header) > End)
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "not enough space for another header");
// The writer ensures each profile is padded to start at an aligned address.
if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "insufficient padding");
// The magic should have the same byte order as in the previous header.
uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
@@ -366,6 +372,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
if (GET_VERSION(Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
+ BinaryIdsSize = swap(Header.BinaryIdsSize);
+ if (BinaryIdsSize % sizeof(uint64_t))
+ return error(instrprof_error::bad_header);
+
CountersDelta = swap(Header.CountersDelta);
NamesDelta = swap(Header.NamesDelta);
auto DataSize = swap(Header.DataSize);
@@ -374,7 +384,6 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
NamesSize = swap(Header.NamesSize);
ValueKindLast = swap(Header.ValueKindLast);
- BinaryIdsSize = swap(Header.BinaryIdsSize);
auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
auto PaddingSize = getNumPaddingBytes(NamesSize);
@@ -402,6 +411,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
NamesStart = Start + NamesOffset;
ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
+ const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
+ if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
+ return error(instrprof_error::bad_header);
+
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = createSymtab(*NewSymtab.get()))
return E;
@@ -426,21 +439,46 @@ template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readRawCounts(
InstrProfRecord &Record) {
uint32_t NumCounters = swap(Data->NumCounters);
- IntPtrT CounterPtr = Data->CounterPtr;
if (NumCounters == 0)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "number of counters is zero");
+ IntPtrT CounterPtr = Data->CounterPtr;
auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart;
// Check bounds. Note that the counter pointer embedded in the data record
// may itself be corrupt.
if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "counter pointer is out of bounds");
+
+ // We need to compute the in-buffer counter offset from the in-memory address
+ // distance. The initial CountersDelta is the in-memory address difference
+ // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr -
+ // CountersDelta computes the offset into the in-buffer counter section.
+ //
+ // CountersDelta decreases as we advance to the next data record.
ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
- if (CounterOffset < 0 || CounterOffset > MaxNumCounters ||
- ((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
- return error(instrprof_error::malformed);
+ CountersDelta -= sizeof(*Data);
+ if (CounterOffset < 0)
+ return error(
+ instrprof_error::malformed,
+ ("counter offset " + Twine(CounterOffset) + " is negative").str());
+
+ if (CounterOffset > MaxNumCounters)
+ return error(instrprof_error::malformed,
+ ("counter offset " + Twine(CounterOffset) +
+ " is greater than the maximum number of counters " +
+ Twine((uint32_t)MaxNumCounters))
+ .str());
+
+ if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
+ return error(instrprof_error::malformed,
+ ("number of counters " +
+ Twine(((uint32_t)CounterOffset + NumCounters)) +
+ " is greater than the maximum number of counters " +
+ Twine((uint32_t)MaxNumCounters))
+ .str());
auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters);
@@ -512,6 +550,10 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
return success();
}
+static size_t RoundUp(size_t size, size_t align) {
+ return (size + align - 1) & ~(align - 1);
+}
+
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
if (BinaryIdsSize == 0)
@@ -519,19 +561,38 @@ Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
OS << "Binary IDs: \n";
const uint8_t *BI = BinaryIdsStart;
- while (BI < BinaryIdsStart + BinaryIdsSize) {
+ const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
+ while (BI < BIEnd) {
+ size_t Remaining = BIEnd - BI;
+
+ // There should be enough left to read the binary ID size field.
+ if (Remaining < sizeof(uint64_t))
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "not enough data to read binary id length");
+
uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
+
+ // There should be enough left to read the binary ID size field, and the
+ // binary ID.
+ if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
+ return make_error<InstrProfError>(
+ instrprof_error::malformed, "not enough data to read binary id data");
+
// Increment by binary id length data type size.
BI += sizeof(BinaryIdLen);
if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "binary id that is read is bigger than buffer size");
for (uint64_t I = 0; I < BinaryIdLen; I++)
OS << format("%02x", BI[I]);
OS << "\n";
- // Increment by binary id data length.
- BI += BinaryIdLen;
+ // Increment by binary id data length, rounded to the next 8 bytes. This
+ // accounts for the zero-padding after each build ID.
+ BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
return make_error<InstrProfError>(instrprof_error::malformed);
}
@@ -624,7 +685,8 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords(
Data = (*Iter);
if (Data.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "profile data is empty");
return Error::success();
}
@@ -638,7 +700,8 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords(
Data = *RecordIterator;
if (Data.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "profile data is empty");
return Error::success();
}
@@ -669,7 +732,7 @@ public:
return Underlying.getRecords(FuncName, Data);
}
};
-}
+} // namespace
/// A remapper that applies remappings based on a symbol remapping file.
template <typename HashTableImpl>
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 987c0b175d3c..492e3541cb5a 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -215,8 +215,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
InstrProfRecord &Dest = Where->second;
uint64_t ValueCutoff = FuncFilter.ValueCutoff;
- if (!FuncFilter.NameFilter.empty() &&
- Name.find(FuncFilter.NameFilter) != Name.npos)
+ if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))
ValueCutoff = 0;
Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
@@ -272,7 +271,7 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary,
ProfileSummary &PS) {
using namespace IndexedInstrProf;
- std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
+ const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
TheSummary->NumSummaryFields = Summary::NumKinds;
TheSummary->NumCutoffEntries = Res.size();
TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 2ab0f0cbc17a..f54df7b295e3 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -80,7 +80,7 @@ const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
DefaultCutoffsData;
const ProfileSummaryEntry &
-ProfileSummaryBuilder::getEntryForPercentile(SummaryEntryVector &DS,
+ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS,
uint64_t Percentile) {
auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
return Entry.Cutoff < Percentile;
@@ -154,7 +154,8 @@ void ProfileSummaryBuilder::computeDetailedSummary() {
}
}
-uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
+uint64_t
+ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) {
auto &HotEntry =
ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
uint64_t HotCountThreshold = HotEntry.MinCount;
@@ -163,7 +164,8 @@ uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
return HotCountThreshold;
}
-uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) {
+uint64_t
+ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) {
auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
DS, ProfileSummaryCutoffCold);
uint64_t ColdCountThreshold = ColdEntry.MinCount;
@@ -181,17 +183,17 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
std::unique_ptr<ProfileSummary>
SampleProfileSummaryBuilder::computeSummaryForProfiles(
- const StringMap<sampleprof::FunctionSamples> &Profiles) {
+ const SampleProfileMap &Profiles) {
assert(NumFunctions == 0 &&
"This can only be called on an empty summary builder");
- StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
- const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
+ sampleprof::SampleProfileMap ContextLessProfiles;
+ const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles;
// For CSSPGO, context-sensitive profile effectively split a function profile
// into many copies each representing the CFG profile of a particular calling
// context. That makes the count distribution looks more flat as we now have
// more function profiles each with lower counts, which in turn leads to lower
- // hot thresholds. To compensate for that, by defauly we merge context
- // profiles before coumputing profile summary.
+ // hot thresholds. To compensate for that, by default we merge context
+ // profiles before computing profile summary.
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
for (const auto &I : Profiles) {
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 60e707b146d5..fd8fd3b675b7 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -198,6 +198,21 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
return OS;
}
+void sampleprof::sortFuncProfiles(
+ const SampleProfileMap &ProfileMap,
+ std::vector<NameFunctionSamples> &SortedProfiles) {
+ for (const auto &I : ProfileMap) {
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
+ }
+ llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
+ const NameFunctionSamples &B) {
+ if (A.second->getTotalSamples() == B.second->getTotalSamples())
+ return A.first < B.first;
+ return A.second->getTotalSamples() > B.second->getTotalSamples();
+ });
+}
+
unsigned FunctionSamples::getOffset(const DILocation *DIL) {
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
0xffff;
@@ -230,9 +245,13 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
else
Discriminator = DIL->getBaseDiscriminator();
+ // Use C++ linkage name if possible.
+ StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = PrevDIL->getScope()->getSubprogram()->getName();
+
S.push_back(
- std::make_pair(LineLocation(getOffset(DIL), Discriminator),
- PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ std::make_pair(LineLocation(getOffset(DIL), Discriminator), Name));
PrevDIL = DIL;
}
if (S.size() == 0)
@@ -245,7 +264,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
}
void FunctionSamples::findAllNames(DenseSet<StringRef> &NameSet) const {
- NameSet.insert(Name);
+ NameSet.insert(getName());
for (const auto &BS : BodySamples)
for (const auto &TS : BS.second.getCallTargets())
NameSet.insert(TS.getKey());
@@ -316,7 +335,7 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
void SampleContextTrimmer::trimAndMergeColdContextProfiles(
uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
- uint32_t ColdContextFrameLength) {
+ uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly) {
if (!TrimColdContext && !MergeColdContext)
return;
@@ -324,25 +343,32 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
if (ColdCountThreshold == 0)
return;
+ // Trimming base profiles only is mainly to honor the preinliner decsion. When
+ // MergeColdContext is true preinliner decsion is not honored anyway so turn
+ // off TrimBaseProfileOnly.
+ if (MergeColdContext)
+ TrimBaseProfileOnly = false;
+
// Filter the cold profiles from ProfileMap and move them into a tmp
// container
- std::vector<std::pair<StringRef, const FunctionSamples *>> ColdProfiles;
+ std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
for (const auto &I : ProfileMap) {
+ const SampleContext &Context = I.first;
const FunctionSamples &FunctionProfile = I.second;
- if (FunctionProfile.getTotalSamples() >= ColdCountThreshold)
- continue;
- ColdProfiles.emplace_back(I.getKey(), &I.second);
+ if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
+ (!TrimBaseProfileOnly || Context.isBaseContext()))
+ ColdProfiles.emplace_back(Context, &I.second);
}
// Remove the cold profile from ProfileMap and merge them into
// MergedProfileMap by the last K frames of context
- StringMap<FunctionSamples> MergedProfileMap;
+ SampleProfileMap MergedProfileMap;
for (const auto &I : ColdProfiles) {
if (MergeColdContext) {
- auto Ret = MergedProfileMap.try_emplace(
- I.second->getContext().getContextWithLastKFrames(
- ColdContextFrameLength),
- FunctionSamples());
+ auto MergedContext = I.second->getContext().getContextFrames();
+ if (ColdContextFrameLength < MergedContext.size())
+ MergedContext = MergedContext.take_back(ColdContextFrameLength);
+ auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
FunctionSamples &MergedProfile = Ret.first->second;
MergedProfile.merge(*I.second);
}
@@ -353,16 +379,15 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
for (const auto &I : MergedProfileMap) {
// Filter the cold merged profile
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
- ProfileMap.find(I.getKey()) == ProfileMap.end())
+ ProfileMap.find(I.first) == ProfileMap.end())
continue;
// Merge the profile if the original profile exists, otherwise just insert
// as a new profile
- auto Ret = ProfileMap.try_emplace(I.getKey(), FunctionSamples());
+ auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
if (Ret.second) {
- SampleContext FContext(Ret.first->first(), RawContext);
+ SampleContext FContext(Ret.first->first, RawContext);
FunctionSamples &FProfile = Ret.first->second;
FProfile.setContext(FContext);
- FProfile.setName(FContext.getNameWithoutContext());
}
FunctionSamples &OrigProfile = Ret.first->second;
OrigProfile.merge(I.second);
@@ -370,12 +395,12 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
}
void SampleContextTrimmer::canonicalizeContextProfiles() {
- std::vector<StringRef> ProfilesToBeRemoved;
- StringMap<FunctionSamples> ProfilesToBeAdded;
+ std::vector<SampleContext> ProfilesToBeRemoved;
+ SampleProfileMap ProfilesToBeAdded;
for (auto &I : ProfileMap) {
FunctionSamples &FProfile = I.second;
- StringRef ContextStr = FProfile.getNameWithContext();
- if (I.first() == ContextStr)
+ SampleContext &Context = FProfile.getContext();
+ if (I.first == Context)
continue;
// Use the context string from FunctionSamples to update the keys of
@@ -390,10 +415,10 @@ void SampleContextTrimmer::canonicalizeContextProfiles() {
// with different profiles) from the map can cause a conflict if they are
// not handled in a right order. This can be solved by just caching the
// profiles to be added.
- auto Ret = ProfilesToBeAdded.try_emplace(ContextStr, FProfile);
+ auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
(void)Ret;
assert(Ret.second && "Context conflict during canonicalization");
- ProfilesToBeRemoved.push_back(I.first());
+ ProfilesToBeRemoved.push_back(I.first);
}
for (auto &I : ProfilesToBeRemoved) {
@@ -401,7 +426,7 @@ void SampleContextTrimmer::canonicalizeContextProfiles() {
}
for (auto &I : ProfilesToBeAdded) {
- ProfileMap.try_emplace(I.first(), I.second);
+ ProfileMap.emplace(I.first, I.second);
}
}
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6058eddb13dc..c99a19020511 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -53,21 +53,23 @@ using namespace sampleprof;
// For ext-binary format profiles, the flag is set in the summary.
static cl::opt<bool> ProfileIsFSDisciminator(
"profile-isfs", cl::Hidden, cl::init(false),
- cl::desc("Profile uses flow senstive discriminators"));
+ cl::desc("Profile uses flow sensitive discriminators"));
/// Dump the function profile for \p FName.
///
-/// \param FName Name of the function to print.
+/// \param FContext Name + context of the function to print.
/// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(StringRef FName,
+void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
raw_ostream &OS) {
- OS << "Function: " << FName << ": " << Profiles[FName];
+ OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
}
/// Dump all the function profiles found on stream \p OS.
void SampleProfileReader::dump(raw_ostream &OS) {
- for (const auto &I : Profiles)
- dumpFunctionProfile(I.getKey(), OS);
+ std::vector<NameFunctionSamples> V;
+ sortFuncProfiles(Profiles, V);
+ for (const auto &I : V)
+ dumpFunctionProfile(I.first, OS);
}
/// Parse \p Input as function head.
@@ -249,6 +251,7 @@ std::error_code SampleProfileReaderText::readImpl() {
bool SeenMetadata = false;
ProfileIsFS = ProfileIsFSDisciminator;
+ FunctionSamples::ProfileIsFS = ProfileIsFS;
for (; !LineIt.is_at_eof(); ++LineIt) {
if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
continue;
@@ -273,12 +276,11 @@ std::error_code SampleProfileReaderText::readImpl() {
return sampleprof_error::malformed;
}
SeenMetadata = false;
- SampleContext FContext(FName);
+ SampleContext FContext(FName, CSNameTable);
if (FContext.hasContext())
++CSProfileCount;
Profiles[FContext] = FunctionSamples();
FunctionSamples &FProfile = Profiles[FContext];
- FProfile.setName(FContext.getNameWithoutContext());
FProfile.setContext(FContext);
MergeResult(Result, FProfile.addTotalSamples(NumSamples));
MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
@@ -450,6 +452,13 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
return NameTable[*Idx];
}
+ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ return SampleContext(*FName);
+}
+
ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
if (!FixedLengthMD5)
return SampleProfileReaderBinary::readStringFromTable();
@@ -576,18 +585,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
if (std::error_code EC = NumHeadSamples.getError())
return EC;
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
+ ErrorOr<SampleContext> FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- SampleContext FContext(*FName);
- Profiles[FContext] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[FContext];
- FProfile.setName(FContext.getNameWithoutContext());
- FProfile.setContext(FContext);
+ Profiles[*FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FContext];
+ FProfile.setContext(*FContext);
FProfile.addHeadSamples(*NumHeadSamples);
- if (FContext.hasContext())
+ if (FContext->hasContext())
CSProfileCount++;
if (std::error_code EC = readProfile(FProfile))
@@ -597,6 +604,7 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
std::error_code SampleProfileReaderBinary::readImpl() {
ProfileIsFS = ProfileIsFSDisciminator;
+ FunctionSamples::ProfileIsFS = ProfileIsFS;
while (!at_eof()) {
if (std::error_code EC = readFuncProfile(Data))
return EC;
@@ -605,6 +613,31 @@ std::error_code SampleProfileReaderBinary::readImpl() {
return sampleprof_error::success;
}
+ErrorOr<SampleContextFrames>
+SampleProfileReaderExtBinaryBase::readContextFromTable() {
+ auto ContextIdx = readNumber<uint32_t>();
+ if (std::error_code EC = ContextIdx.getError())
+ return EC;
+ if (*ContextIdx >= CSNameTable->size())
+ return sampleprof_error::truncated_name_table;
+ return (*CSNameTable)[*ContextIdx];
+}
+
+ErrorOr<SampleContext>
+SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
+ if (ProfileIsCS) {
+ auto FContext(readContextFromTable());
+ if (std::error_code EC = FContext.getError())
+ return EC;
+ return SampleContext(*FContext);
+ } else {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ return SampleContext(*FName);
+ }
+}
+
std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
Data = Start;
@@ -632,11 +665,17 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
return EC;
break;
}
+ case SecCSNameTable: {
+ if (std::error_code EC = readCSNameTableSec())
+ return EC;
+ break;
+ }
case SecLBRProfile:
if (std::error_code EC = readFuncProfiles())
return EC;
break;
case SecFuncOffsetTable:
+ FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
if (std::error_code EC = readFuncOffsetTable())
return EC;
break;
@@ -682,17 +721,27 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
return EC;
FuncOffsetTable.reserve(*Size);
+
+ if (FuncOffsetsOrdered) {
+ OrderedFuncOffsets =
+ std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
+ OrderedFuncOffsets->reserve(*Size);
+ }
+
for (uint32_t I = 0; I < *Size; ++I) {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
auto Offset = readNumber<uint64_t>();
if (std::error_code EC = Offset.getError())
return EC;
- FuncOffsetTable[*FName] = *Offset;
+ FuncOffsetTable[*FContext] = *Offset;
+ if (FuncOffsetsOrdered)
+ OrderedFuncOffsets->emplace_back(*FContext, *Offset);
}
+
return sampleprof_error::success;
}
@@ -721,75 +770,77 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
}
- if (useMD5()) {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (ProfileIsCS) {
+ DenseSet<uint64_t> FuncGuidsToUse;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse)
+ FuncGuidsToUse.insert(Function::getGUID(Name));
}
- } else if (FunctionSamples::ProfileIsCS) {
- // Compute the ordered set of names, so we can
- // get all context profiles under a subtree by
- // iterating through the ordered names.
- struct Comparer {
- // Ignore the closing ']' when ordering context
- bool operator()(const StringRef &L, const StringRef &R) const {
- return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
+
+ // For each function in current module, load all context profiles for
+ // the function as well as their callee contexts which can help profile
+ // guided importing for ThinLTO. This can be achieved by walking
+ // through an ordered context container, where contexts are laid out
+ // as if they were walked in preorder of a context trie. While
+ // traversing the trie, a link to the highest common ancestor node is
+ // kept so that all of its decendants will be loaded.
+ assert(OrderedFuncOffsets.get() &&
+ "func offset table should always be sorted in CS profile");
+ const SampleContext *CommonContext = nullptr;
+ for (const auto &NameOffset : *OrderedFuncOffsets) {
+ const auto &FContext = NameOffset.first;
+ auto FName = FContext.getName();
+ // For function in the current module, keep its farthest ancestor
+ // context. This can be used to load itself and its child and
+ // sibling contexts.
+ if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
+ (!useMD5() && (FuncsToUse.count(FName) ||
+ (Remapper && Remapper->exist(FName))))) {
+ if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
+ CommonContext = &FContext;
}
- };
- std::set<StringRef, Comparer> OrderedNames;
- for (auto Name : FuncOffsetTable) {
- OrderedNames.insert(Name.first);
- }
- // For each function in current module, load all
- // context profiles for the function.
- for (auto NameOffset : FuncOffsetTable) {
- StringRef ContextName = NameOffset.first;
- SampleContext FContext(ContextName);
- auto FuncName = FContext.getNameWithoutContext();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
-
- // For each context profile we need, try to load
- // all context profile in the subtree. This can
- // help profile guided importing for ThinLTO.
- auto It = OrderedNames.find(ContextName);
- while (It != OrderedNames.end() &&
- It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
- const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
+ if (CommonContext == &FContext ||
+ (CommonContext && CommonContext->IsPrefixOf(FContext))) {
+ // Load profile for the current context which originated from
+ // the common ancestor.
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
assert(FuncProfileAddr < End && "out of LBRProfile section");
if (std::error_code EC = readFuncProfile(FuncProfileAddr))
return EC;
- // Remove loaded context profile so we won't
- // load it repeatedly.
- It = OrderedNames.erase(It);
}
}
} else {
- for (auto NameOffset : FuncOffsetTable) {
- SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getNameWithoutContext();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ } else {
+ for (auto NameOffset : FuncOffsetTable) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getName();
+ if (!FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName)))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
}
}
Data = End;
}
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
- assert(ProfileIsCS == (CSProfileCount > 0) &&
+ assert((!CSProfileCount || ProfileIsCS) &&
"Section flag should be consistent with actual profile");
return sampleprof_error::success;
}
@@ -885,6 +936,7 @@ std::error_code SampleProfileReaderCompactBinary::readImpl() {
// given a module.
bool LoadFuncsToBeUsed = collectFuncsFromModule();
ProfileIsFS = ProfileIsFSDisciminator;
+ FunctionSamples::ProfileIsFS = ProfileIsFS;
std::vector<uint64_t> OffsetsToUse;
if (!LoadFuncsToBeUsed) {
// load all the function profiles.
@@ -983,22 +1035,62 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
return SampleProfileReaderBinary::readNameTable();
}
+// Read in the CS name table section, which basically contains a list of context
+// vectors. Each element of a context vector, aka a frame, refers to the
+// underlying raw function names that are stored in the name table, as well as
+// a callsite identifier that only makes sense for non-leaf frames.
+std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
+ auto Size = readNumber<uint32_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+
+ std::vector<SampleContextFrameVector> *PNameVec =
+ new std::vector<SampleContextFrameVector>();
+ PNameVec->reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ PNameVec->emplace_back(SampleContextFrameVector());
+ auto ContextSize = readNumber<uint32_t>();
+ if (std::error_code EC = ContextSize.getError())
+ return EC;
+ for (uint32_t J = 0; J < *ContextSize; ++J) {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
+ return EC;
+
+ if (!isOffsetLegal(*LineOffset))
+ return std::error_code();
+
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
+ return EC;
+
+ PNameVec->back().emplace_back(
+ FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
+ }
+ }
+
+ // From this point the underlying object of CSNameTable should be immutable.
+ CSNameTable.reset(PNameVec);
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
while (Data < End) {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- SampleContext FContext(*FName);
- bool ProfileInMap = Profiles.count(FContext);
-
+ bool ProfileInMap = Profiles.count(*FContext);
if (ProfileIsProbeBased) {
auto Checksum = readNumber<uint64_t>();
if (std::error_code EC = Checksum.getError())
return EC;
if (ProfileInMap)
- Profiles[FContext].setFunctionHash(*Checksum);
+ Profiles[*FContext].setFunctionHash(*Checksum);
}
if (ProfileHasAttribute) {
@@ -1006,7 +1098,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
if (std::error_code EC = Attributes.getError())
return EC;
if (ProfileInMap)
- Profiles[FContext].getContext().setAllAttributes(*Attributes);
+ Profiles[*FContext].getContext().setAllAttributes(*Attributes);
}
}
@@ -1132,6 +1224,16 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
Flags.append("fs-discriminator,");
break;
+ case SecFuncOffsetTable:
+ if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
+ Flags.append("ordered,");
+ break;
+ case SecFuncMetadata:
+ if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
+ Flags.append("probe,");
+ if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
+ Flags.append("attr,");
+ break;
default:
break;
}
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 535f87968104..78006aab1541 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -41,23 +41,10 @@
using namespace llvm;
using namespace sampleprof;
-std::error_code SampleProfileWriter::writeFuncProfiles(
- const StringMap<FunctionSamples> &ProfileMap) {
- // Sort the ProfileMap by total samples.
- typedef std::pair<StringRef, const FunctionSamples *> NameFunctionSamples;
+std::error_code
+SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
std::vector<NameFunctionSamples> V;
- for (const auto &I : ProfileMap) {
- assert(I.getKey() == I.second.getNameWithContext() &&
- "Inconsistent profile map");
- V.push_back(std::make_pair(I.second.getNameWithContext(), &I.second));
- }
- llvm::stable_sort(
- V, [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
- if (A.second->getTotalSamples() == B.second->getTotalSamples())
- return A.first > B.first;
- return A.second->getTotalSamples() > B.second->getTotalSamples();
- });
-
+ sortFuncProfiles(ProfileMap, V);
for (const auto &I : V) {
if (std::error_code EC = writeSample(*I.second))
return EC;
@@ -65,8 +52,7 @@ std::error_code SampleProfileWriter::writeFuncProfiles(
return sampleprof_error::success;
}
-std::error_code
-SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code SampleProfileWriter::write(const SampleProfileMap &ProfileMap) {
if (std::error_code EC = writeHeader(ProfileMap))
return EC;
@@ -130,8 +116,8 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterExtBinaryBase::write(
- const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code
+SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
if (std::error_code EC = writeHeader(ProfileMap))
return EC;
@@ -146,11 +132,28 @@ std::error_code SampleProfileWriterExtBinaryBase::write(
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterExtBinaryBase::writeContextIdx(
+ const SampleContext &Context) {
+ if (Context.hasContext())
+ return writeCSNameIdx(Context);
+ else
+ return SampleProfileWriterBinary::writeNameIdx(Context.getName());
+}
+
+std::error_code
+SampleProfileWriterExtBinaryBase::writeCSNameIdx(const SampleContext &Context) {
+ const auto &Ret = CSNameTable.find(Context);
+ if (Ret == CSNameTable.end())
+ return sampleprof_error::truncated_name_table;
+ encodeULEB128(Ret->second, *OutputStream);
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
uint64_t Offset = OutputStream->tell();
- StringRef Name = S.getNameWithContext();
- FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
+ auto &Context = S.getContext();
+ FuncOffsetTable[Context] = Offset - SecLBRProfileStart;
encodeULEB128(S.getHeadSamples(), *OutputStream);
return writeBody(S);
}
@@ -162,24 +165,42 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
encodeULEB128(FuncOffsetTable.size(), OS);
// Write out FuncOffsetTable.
- for (auto Entry : FuncOffsetTable) {
- if (std::error_code EC =
- writeNameIdx(Entry.first, FunctionSamples::ProfileIsCS))
+ auto WriteItem = [&](const SampleContext &Context, uint64_t Offset) {
+ if (std::error_code EC = writeContextIdx(Context))
return EC;
- encodeULEB128(Entry.second, OS);
+ encodeULEB128(Offset, OS);
+ return (std::error_code)sampleprof_error::success;
+ };
+
+ if (FunctionSamples::ProfileIsCS) {
+ // Sort the contexts before writing them out. This is to help fast load all
+ // context profiles for a function as well as their callee contexts which
+ // can help profile-guided importing for ThinLTO.
+ std::map<SampleContext, uint64_t> OrderedFuncOffsetTable(
+ FuncOffsetTable.begin(), FuncOffsetTable.end());
+ for (const auto &Entry : OrderedFuncOffsetTable) {
+ if (std::error_code EC = WriteItem(Entry.first, Entry.second))
+ return EC;
+ }
+ addSectionFlag(SecFuncOffsetTable, SecFuncOffsetFlags::SecFlagOrdered);
+ } else {
+ for (const auto &Entry : FuncOffsetTable) {
+ if (std::error_code EC = WriteItem(Entry.first, Entry.second))
+ return EC;
+ }
}
+
FuncOffsetTable.clear();
return sampleprof_error::success;
}
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
- const StringMap<FunctionSamples> &Profiles) {
+ const SampleProfileMap &Profiles) {
if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS)
return sampleprof_error::success;
auto &OS = *OutputStream;
for (const auto &Entry : Profiles) {
- if (std::error_code EC = writeNameIdx(Entry.second.getNameWithContext(),
- FunctionSamples::ProfileIsCS))
+ if (std::error_code EC = writeContextIdx(Entry.second.getContext()))
return EC;
if (FunctionSamples::ProfileIsProbeBased)
encodeULEB128(Entry.second.getFunctionHash(), OS);
@@ -195,7 +216,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
- stablizeNameTable(V);
+ stablizeNameTable(NameTable, V);
// Write out the MD5 name table. We wrote unencoded MD5 so reader can
// retrieve the name using the name index without having to read the
@@ -208,11 +229,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
for (const auto &I : ProfileMap) {
- assert(I.first() == I.second.getNameWithContext() &&
- "Inconsistent profile map");
- addName(I.second.getNameWithContext(), FunctionSamples::ProfileIsCS);
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ addContext(I.second.getContext());
addNames(I.second);
}
@@ -220,7 +240,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
// so compiler won't strip the suffix during profile matching after
// seeing the flag in the profile.
for (const auto &I : NameTable) {
- if (I.first.find(FunctionSamples::UniqSuffix) != StringRef::npos) {
+ if (I.first.contains(FunctionSamples::UniqSuffix)) {
addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix);
break;
}
@@ -231,6 +251,34 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() {
+ // Sort the names to make CSNameTable deterministic.
+ std::set<SampleContext> OrderedContexts;
+ for (const auto &I : CSNameTable)
+ OrderedContexts.insert(I.first);
+ assert(OrderedContexts.size() == CSNameTable.size() &&
+ "Unmatched ordered and unordered contexts");
+ uint64_t I = 0;
+ for (auto &Context : OrderedContexts)
+ CSNameTable[Context] = I++;
+
+ auto &OS = *OutputStream;
+ encodeULEB128(OrderedContexts.size(), OS);
+ support::endian::Writer Writer(OS, support::little);
+ for (auto Context : OrderedContexts) {
+ auto Frames = Context.getContextFrames();
+ encodeULEB128(Frames.size(), OS);
+ for (auto &Callsite : Frames) {
+ if (std::error_code EC = writeNameIdx(Callsite.FuncName))
+ return EC;
+ encodeULEB128(Callsite.Location.LineOffset, OS);
+ encodeULEB128(Callsite.Location.Discriminator, OS);
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
if (ProfSymList && ProfSymList->size() > 0)
@@ -241,8 +289,7 @@ SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
- SecType Type, uint32_t LayoutIdx,
- const StringMap<FunctionSamples> &ProfileMap) {
+ SecType Type, uint32_t LayoutIdx, const SampleProfileMap &ProfileMap) {
// The setting of SecFlagCompress should happen before markSectionStart.
if (Type == SecProfileSymbolList && ProfSymList && ProfSymList->toCompress())
setToCompressSection(SecProfileSymbolList);
@@ -266,6 +313,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
if (auto EC = writeNameTableSection(ProfileMap))
return EC;
break;
+ case SecCSNameTable:
+ if (auto EC = writeCSNameTableSection())
+ return EC;
+ break;
case SecLBRProfile:
SecLBRProfileStart = OutputStream->tell();
if (std::error_code EC = writeFuncProfiles(ProfileMap))
@@ -294,7 +345,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
}
std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
// The const indices passed to writeOneSection below are specifying the
// positions of the sections in SectionHdrLayout. Look at
// initSectionHdrLayout to find out where each section is located in
@@ -303,32 +354,33 @@ std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
return EC;
if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecLBRProfile, 3, ProfileMap))
+ if (auto EC = writeOneSection(SecCSNameTable, 2, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecLBRProfile, 4, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecProfileSymbolList, 4, ProfileMap))
+ if (auto EC = writeOneSection(SecProfileSymbolList, 5, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ProfileMap))
+ if (auto EC = writeOneSection(SecFuncOffsetTable, 3, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecFuncMetadata, 5, ProfileMap))
+ if (auto EC = writeOneSection(SecFuncMetadata, 6, ProfileMap))
return EC;
return sampleprof_error::success;
}
-static void
-splitProfileMapToTwo(const StringMap<FunctionSamples> &ProfileMap,
- StringMap<FunctionSamples> &ContextProfileMap,
- StringMap<FunctionSamples> &NoContextProfileMap) {
+static void splitProfileMapToTwo(const SampleProfileMap &ProfileMap,
+ SampleProfileMap &ContextProfileMap,
+ SampleProfileMap &NoContextProfileMap) {
for (const auto &I : ProfileMap) {
if (I.second.getCallsiteSamples().size())
- ContextProfileMap.insert({I.first(), I.second});
+ ContextProfileMap.insert({I.first, I.second});
else
- NoContextProfileMap.insert({I.first(), I.second});
+ NoContextProfileMap.insert({I.first, I.second});
}
}
std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
- const StringMap<FunctionSamples> &ProfileMap) {
- StringMap<FunctionSamples> ContextProfileMap, NoContextProfileMap;
+ const SampleProfileMap &ProfileMap) {
+ SampleProfileMap ContextProfileMap, NoContextProfileMap;
splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap);
if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
@@ -358,7 +410,7 @@ std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
}
std::error_code SampleProfileWriterExtBinary::writeSections(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
std::error_code EC;
if (SecLayout == DefaultLayout)
EC = writeDefaultLayout(ProfileMap);
@@ -369,8 +421,8 @@ std::error_code SampleProfileWriterExtBinary::writeSections(
return EC;
}
-std::error_code SampleProfileWriterCompactBinary::write(
- const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code
+SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
return EC;
if (std::error_code EC = writeFuncOffsetTable())
@@ -389,7 +441,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
if (FunctionSamples::ProfileIsCS)
- OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples();
+ OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
else
OS << S.getName() << ":" << S.getTotalSamples();
@@ -445,27 +497,28 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName,
- bool IsContextName) {
- std::string BracketedName;
- if (IsContextName) {
- BracketedName = "[" + FName.str() + "]";
- FName = StringRef(BracketedName);
- }
+std::error_code
+SampleProfileWriterBinary::writeContextIdx(const SampleContext &Context) {
+ assert(!Context.hasContext() && "cs profile is not supported");
+ return writeNameIdx(Context.getName());
+}
- const auto &Ret = NameTable.find(FName);
- if (Ret == NameTable.end())
+std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
+ auto &NTable = getNameTable();
+ const auto &Ret = NTable.find(FName);
+ if (Ret == NTable.end())
return sampleprof_error::truncated_name_table;
encodeULEB128(Ret->second, *OutputStream);
return sampleprof_error::success;
}
-void SampleProfileWriterBinary::addName(StringRef FName, bool IsContextName) {
- if (IsContextName) {
- auto It = BracketedContextStr.insert("[" + FName.str() + "]");
- FName = StringRef(*It.first);
- }
- NameTable.insert(std::make_pair(FName, 0));
+void SampleProfileWriterBinary::addName(StringRef FName) {
+ auto &NTable = getNameTable();
+ NTable.insert(std::make_pair(FName, 0));
+}
+
+void SampleProfileWriterBinary::addContext(const SampleContext &Context) {
+ addName(Context.getName());
}
void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
@@ -485,7 +538,19 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
}
}
-void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
+void SampleProfileWriterExtBinaryBase::addContext(
+ const SampleContext &Context) {
+ if (Context.hasContext()) {
+ for (auto &Callsite : Context.getContextFrames())
+ SampleProfileWriterBinary::addName(Callsite.FuncName);
+ CSNameTable.insert(std::make_pair(Context, 0));
+ } else {
+ SampleProfileWriterBinary::addName(Context.getName());
+ }
+}
+
+void SampleProfileWriterBinary::stablizeNameTable(
+ MapVector<StringRef, uint32_t> &NameTable, std::set<StringRef> &V) {
// Sort the names to make NameTable deterministic.
for (const auto &I : NameTable)
V.insert(I.first);
@@ -497,7 +562,7 @@ void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
std::error_code SampleProfileWriterBinary::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
- stablizeNameTable(V);
+ stablizeNameTable(NameTable, V);
// Write out the name table.
encodeULEB128(NameTable.size(), OS);
@@ -526,8 +591,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
// Write out FuncOffsetTable.
for (auto Entry : FuncOffsetTable) {
- if (std::error_code EC =
- writeNameIdx(Entry.first, FunctionSamples::ProfileIsCS))
+ if (std::error_code EC = writeNameIdx(Entry.first))
return EC;
encodeULEB128(Entry.second, OS);
}
@@ -537,7 +601,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
- stablizeNameTable(V);
+ stablizeNameTable(NameTable, V);
// Write out the name table.
encodeULEB128(NameTable.size(), OS);
@@ -556,8 +620,8 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterBinary::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code
+SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
writeMagicIdent(Format);
computeSummary(ProfileMap);
@@ -566,9 +630,8 @@ std::error_code SampleProfileWriterBinary::writeHeader(
// Generate the name table for all the functions referenced in the profile.
for (const auto &I : ProfileMap) {
- assert(I.first() == I.second.getNameWithContext() &&
- "Inconsistent profile map");
- addName(I.first(), FunctionSamples::ProfileIsCS);
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ addContext(I.first);
addNames(I.second);
}
@@ -642,7 +705,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
auto &OS = *OutputStream;
FileStart = OS.tell();
writeMagicIdent(Format);
@@ -652,7 +715,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
}
std::error_code SampleProfileWriterCompactBinary::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
support::endian::Writer Writer(*OutputStream, support::little);
if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
return EC;
@@ -671,7 +734,8 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
encodeULEB128(Summary->getMaxFunctionCount(), OS);
encodeULEB128(Summary->getNumCounts(), OS);
encodeULEB128(Summary->getNumFunctions(), OS);
- std::vector<ProfileSummaryEntry> &Entries = Summary->getDetailedSummary();
+ const std::vector<ProfileSummaryEntry> &Entries =
+ Summary->getDetailedSummary();
encodeULEB128(Entries.size(), OS);
for (auto Entry : Entries) {
encodeULEB128(Entry.Cutoff, OS);
@@ -682,9 +746,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
}
std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
auto &OS = *OutputStream;
-
- if (std::error_code EC =
- writeNameIdx(S.getNameWithContext(), FunctionSamples::ProfileIsCS))
+ if (std::error_code EC = writeContextIdx(S.getContext()))
return EC;
encodeULEB128(S.getTotalSamples(), OS);
@@ -803,8 +865,7 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
return std::move(Writer);
}
-void SampleProfileWriter::computeSummary(
- const StringMap<FunctionSamples> &ProfileMap) {
+void SampleProfileWriter::computeSummary(const SampleProfileMap &ProfileMap) {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
Summary = Builder.computeSummaryForProfiles(ProfileMap);
}
diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp
index 2993892097e7..b3136a91e7f5 100644
--- a/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/llvm/lib/Support/AArch64TargetParser.cpp
@@ -98,6 +98,8 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions,
Features.push_back("+sve2-sha3");
if (Extensions & AEK_SVE2BITPERM)
Features.push_back("+sve2-bitperm");
+ if (Extensions & AArch64::AEK_TME)
+ Features.push_back("+tme");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");
if (Extensions & AEK_BRBE)
@@ -118,6 +120,8 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions,
bool AArch64::getArchFeatures(AArch64::ArchKind AK,
std::vector<StringRef> &Features) {
+ if (AK == ArchKind::ARMV8A)
+ Features.push_back("+v8a");
if (AK == ArchKind::ARMV8_1A)
Features.push_back("+v8.1a");
if (AK == ArchKind::ARMV8_2A)
@@ -132,6 +136,12 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK,
Features.push_back("+v8.6a");
if (AK == AArch64::ArchKind::ARMV8_7A)
Features.push_back("+v8.7a");
+ if (AK == AArch64::ArchKind::ARMV9A)
+ Features.push_back("+v9a");
+ if (AK == AArch64::ArchKind::ARMV9_1A)
+ Features.push_back("+v9.1a");
+ if (AK == AArch64::ArchKind::ARMV9_2A)
+ Features.push_back("+v9.2a");
if(AK == AArch64::ArchKind::ARMV8R)
Features.push_back("+v8r");
diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp
index 9764dd51f572..61b30b5c5c60 100644
--- a/llvm/lib/Support/APFixedPoint.cpp
+++ b/llvm/lib/Support/APFixedPoint.cpp
@@ -306,7 +306,7 @@ APFixedPoint APFixedPoint::div(const APFixedPoint &Other,
APInt::sdivrem(ThisVal, OtherVal, Result, Rem);
// If the quotient is negative and the remainder is nonzero, round
// towards negative infinity by subtracting epsilon from the result.
- if (ThisVal.isNegative() != OtherVal.isNegative() && !Rem.isNullValue())
+ if (ThisVal.isNegative() != OtherVal.isNegative() && !Rem.isZero())
Result = Result - 1;
} else
Result = ThisVal.udiv(OtherVal);
@@ -381,7 +381,7 @@ void APFixedPoint::toString(SmallVectorImpl<char> &Str) const {
// Add 4 digits to hold the value after multiplying 10 (the radix)
unsigned Width = Val.getBitWidth() + 4;
APInt FractPart = Val.zextOrTrunc(Scale).zext(Width);
- APInt FractPartMask = APInt::getAllOnesValue(Scale).zext(Width);
+ APInt FractPartMask = APInt::getAllOnes(Scale).zext(Width);
APInt RadixInt = APInt(Width, 10);
IntPart.toString(Str, /*Radix=*/10);
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 7abca8391f70..4b75c9db8526 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -92,7 +92,7 @@ namespace llvm {
Note: we need to make the value different from semBogus as otherwise
an unsafe optimization may collapse both values to a single address,
and we heavily rely on them having distinct addresses. */
- static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
+ static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
/* These are legacy semantics for the fallback, inaccrurate implementation of
IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
@@ -1288,6 +1288,23 @@ IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
return cmpEqual;
}
+/* Set the least significant BITS bits of a bignum, clear the
+ rest. */
+static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
+ unsigned bits) {
+ unsigned i = 0;
+ while (bits > APInt::APINT_BITS_PER_WORD) {
+ dst[i++] = ~(APInt::WordType)0;
+ bits -= APInt::APINT_BITS_PER_WORD;
+ }
+
+ if (bits)
+ dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
+
+ while (i < parts)
+ dst[i++] = 0;
+}
+
/* Handle overflow. Sign is preserved. We either become infinity or
the largest finite number. */
IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
@@ -1303,8 +1320,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
/* Otherwise we become the largest finite number. */
category = fcNormal;
exponent = semantics->maxExponent;
- APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
- semantics->precision);
+ tcSetLeastSignificantBits(significandParts(), partCount(),
+ semantics->precision);
return opInexact;
}
@@ -2412,7 +2429,7 @@ IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
else
bits = width - isSigned;
- APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
+ tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
if (sign && isSigned)
APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
}
@@ -3379,7 +3396,6 @@ double IEEEFloat::convertToDouble() const {
/// exponent = 0, integer bit 1 ("pseudodenormal")
/// At the moment, the first three are treated as NaNs, the last one as Normal.
void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
- assert(api.getBitWidth()==80);
uint64_t i1 = api.getRawData()[0];
uint64_t i2 = api.getRawData()[1];
uint64_t myexponent = (i2 & 0x7fff);
@@ -3411,7 +3427,6 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
}
void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
- assert(api.getBitWidth()==128);
uint64_t i1 = api.getRawData()[0];
uint64_t i2 = api.getRawData()[1];
opStatus fs;
@@ -3435,7 +3450,6 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
}
void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- assert(api.getBitWidth()==128);
uint64_t i1 = api.getRawData()[0];
uint64_t i2 = api.getRawData()[1];
uint64_t myexponent = (i2 >> 48) & 0x7fff;
@@ -3471,7 +3485,6 @@ void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
}
void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- assert(api.getBitWidth()==64);
uint64_t i = *api.getRawData();
uint64_t myexponent = (i >> 52) & 0x7ff;
uint64_t mysignificand = i & 0xfffffffffffffLL;
@@ -3500,7 +3513,6 @@ void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
}
void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- assert(api.getBitWidth()==32);
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 23) & 0xff;
uint32_t mysignificand = i & 0x7fffff;
@@ -3529,7 +3541,6 @@ void IEEEFloat::initFromFloatAPInt(const APInt &api) {
}
void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- assert(api.getBitWidth() == 16);
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 7) & 0xff;
uint32_t mysignificand = i & 0x7f;
@@ -3558,7 +3569,6 @@ void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
}
void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- assert(api.getBitWidth()==16);
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 10) & 0x1f;
uint32_t mysignificand = i & 0x3ff;
@@ -3591,6 +3601,7 @@ void IEEEFloat::initFromHalfAPInt(const APInt &api) {
/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
/// when the size is anything else).
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
+ assert(api.getBitWidth() == Sem->sizeInBits);
if (Sem == &semIEEEhalf)
return initFromHalfAPInt(api);
if (Sem == &semBFloat)
@@ -4847,9 +4858,8 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
llvm_unreachable("Unexpected semantics");
}
-APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics,
- unsigned BitWidth) {
- return APFloat(Semantics, APInt::getAllOnesValue(BitWidth));
+APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
+ return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
}
void APFloat::print(raw_ostream &OS) const {
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index a8a950f09747..4940b61602d1 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -89,7 +89,6 @@ void APInt::initSlowCase(const APInt& that) {
}
void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
- assert(BitWidth && "Bitwidth too small");
assert(bigVal.data() && "Null pointer detected!");
if (isSingleWord())
U.VAL = bigVal[0];
@@ -105,19 +104,17 @@ void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
clearUnusedBits();
}
-APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
- : BitWidth(numBits) {
+APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) : BitWidth(numBits) {
initFromArray(bigVal);
}
APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
- : BitWidth(numBits) {
+ : BitWidth(numBits) {
initFromArray(makeArrayRef(bigVal, numWords));
}
APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
- : BitWidth(numbits) {
- assert(BitWidth && "Bitwidth too small");
+ : BitWidth(numbits) {
fromString(numbits, Str, radix);
}
@@ -140,7 +137,7 @@ void APInt::reallocate(unsigned NewBitWidth) {
U.pVal = getMemory(getNumWords());
}
-void APInt::AssignSlowCase(const APInt& RHS) {
+void APInt::assignSlowCase(const APInt &RHS) {
// Don't do anything for X = X
if (this == &RHS)
return;
@@ -233,27 +230,30 @@ APInt APInt::operator*(const APInt& RHS) const {
return APInt(BitWidth, U.VAL * RHS.U.VAL);
APInt Result(getMemory(getNumWords()), getBitWidth());
-
tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
-
Result.clearUnusedBits();
return Result;
}
-void APInt::AndAssignSlowCase(const APInt& RHS) {
- tcAnd(U.pVal, RHS.U.pVal, getNumWords());
+void APInt::andAssignSlowCase(const APInt &RHS) {
+ WordType *dst = U.pVal, *rhs = RHS.U.pVal;
+ for (size_t i = 0, e = getNumWords(); i != e; ++i)
+ dst[i] &= rhs[i];
}
-void APInt::OrAssignSlowCase(const APInt& RHS) {
- tcOr(U.pVal, RHS.U.pVal, getNumWords());
+void APInt::orAssignSlowCase(const APInt &RHS) {
+ WordType *dst = U.pVal, *rhs = RHS.U.pVal;
+ for (size_t i = 0, e = getNumWords(); i != e; ++i)
+ dst[i] |= rhs[i];
}
-void APInt::XorAssignSlowCase(const APInt& RHS) {
- tcXor(U.pVal, RHS.U.pVal, getNumWords());
+void APInt::xorAssignSlowCase(const APInt &RHS) {
+ WordType *dst = U.pVal, *rhs = RHS.U.pVal;
+ for (size_t i = 0, e = getNumWords(); i != e; ++i)
+ dst[i] ^= rhs[i];
}
-APInt& APInt::operator*=(const APInt& RHS) {
- assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+APInt &APInt::operator*=(const APInt &RHS) {
*this = *this * RHS;
return *this;
}
@@ -268,7 +268,7 @@ APInt& APInt::operator*=(uint64_t RHS) {
return clearUnusedBits();
}
-bool APInt::EqualSlowCase(const APInt& RHS) const {
+bool APInt::equalSlowCase(const APInt &RHS) const {
return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
}
@@ -327,12 +327,29 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
U.pVal[word] = WORDTYPE_MAX;
}
+// Complement a bignum in-place.
+static void tcComplement(APInt::WordType *dst, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] = ~dst[i];
+}
+
/// Toggle every bit to its opposite value.
void APInt::flipAllBitsSlowCase() {
tcComplement(U.pVal, getNumWords());
clearUnusedBits();
}
+/// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
+/// equivalent to:
+/// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
+/// In the slow case, we know the result is large.
+APInt APInt::concatSlowCase(const APInt &NewLSB) const {
+ unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
+ APInt Result = NewLSB.zextOrSelf(NewWidth);
+ Result.insertBits(*this, NewLSB.getBitWidth());
+ return Result;
+}
+
/// Toggle a given bit to its opposite value whose position is given
/// as "bitPosition".
/// Toggles a given bit to its opposite value.
@@ -343,8 +360,11 @@ void APInt::flipBit(unsigned bitPosition) {
void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
unsigned subBitWidth = subBits.getBitWidth();
- assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
- "Illegal bit insertion");
+ assert((subBitWidth + bitPosition) <= BitWidth && "Illegal bit insertion");
+
+ // inserting no bits is a noop.
+ if (subBitWidth == 0)
+ return;
// Insertion is a direct copy.
if (subBitWidth == BitWidth) {
@@ -424,7 +444,6 @@ void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits)
}
APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
- assert(numBits > 0 && "Can't extract zero bits");
assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
"Illegal bit extraction");
@@ -550,7 +569,7 @@ hash_code llvm::hash_value(const APInt &Arg) {
hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
}
-unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) {
+unsigned DenseMapInfo<APInt, void>::getHashValue(const APInt &Key) {
return static_cast<unsigned>(hash_value(Key));
}
@@ -702,6 +721,8 @@ APInt APInt::reverseBits() const {
return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
case 8:
return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
+ case 0:
+ return *this;
default:
break;
}
@@ -861,7 +882,6 @@ double APInt::roundToDouble(bool isSigned) const {
// Truncate to new width.
APInt APInt::trunc(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
- assert(width && "Can't truncate to 0 bits");
if (width <= APINT_BITS_PER_WORD)
return APInt(width, getRawData()[0]);
@@ -884,7 +904,6 @@ APInt APInt::trunc(unsigned width) const {
// Truncate to new width with unsigned saturation.
APInt APInt::truncUSat(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
- assert(width && "Can't truncate to 0 bits");
// Can we just losslessly truncate it?
if (isIntN(width))
@@ -896,7 +915,6 @@ APInt APInt::truncUSat(unsigned width) const {
// Truncate to new width with signed saturation.
APInt APInt::truncSSat(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
- assert(width && "Can't truncate to 0 bits");
// Can we just losslessly truncate it?
if (isSignedIntN(width))
@@ -1059,6 +1077,8 @@ void APInt::shlSlowCase(unsigned ShiftAmt) {
// Calculate the rotate amount modulo the bit width.
static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ return 0;
unsigned rotBitWidth = rotateAmt.getBitWidth();
APInt rot = rotateAmt;
if (rotBitWidth < BitWidth) {
@@ -1075,6 +1095,8 @@ APInt APInt::rotl(const APInt &rotateAmt) const {
}
APInt APInt::rotl(unsigned rotateAmt) const {
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ return *this;
rotateAmt %= BitWidth;
if (rotateAmt == 0)
return *this;
@@ -1086,12 +1108,43 @@ APInt APInt::rotr(const APInt &rotateAmt) const {
}
APInt APInt::rotr(unsigned rotateAmt) const {
+ if (BitWidth == 0)
+ return *this;
rotateAmt %= BitWidth;
if (rotateAmt == 0)
return *this;
return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
}
+/// \returns the nearest log base 2 of this APInt. Ties round up.
+///
+/// NOTE: When we have a BitWidth of 1, we define:
+///
+/// log2(0) = UINT32_MAX
+/// log2(1) = 0
+///
+/// to get around any mathematical concerns resulting from
+/// referencing 2 in a space where 2 does no exist.
+unsigned APInt::nearestLogBase2() const {
+ // Special case when we have a bitwidth of 1. If VAL is 1, then we
+ // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
+ // UINT32_MAX.
+ if (BitWidth == 1)
+ return U.VAL - 1;
+
+ // Handle the zero case.
+ if (isZero())
+ return UINT32_MAX;
+
+ // The non-zero case is handled by computing:
+ //
+ // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
+ //
+ // where x[i] is referring to the value of the ith bit of x.
+ unsigned lg = logBase2();
+ return lg + unsigned((*this)[lg - 1]);
+}
+
// Square Root - this method computes and returns the square root of "this".
// Three mechanisms are used for computation. For small values (<= 5 bits),
// a table lookup is done. This gets some performance for common cases. For
@@ -1222,98 +1275,6 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const {
return std::move(t[i]);
}
-/// Calculate the magic numbers required to implement a signed integer division
-/// by a constant as a sequence of multiplies, adds and shifts. Requires that
-/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
-/// Warren, Jr., chapter 10.
-APInt::ms APInt::magic() const {
- const APInt& d = *this;
- unsigned p;
- APInt ad, anc, delta, q1, r1, q2, r2, t;
- APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
- struct ms mag;
-
- ad = d.abs();
- t = signedMin + (d.lshr(d.getBitWidth() - 1));
- anc = t - 1 - t.urem(ad); // absolute value of nc
- p = d.getBitWidth() - 1; // initialize p
- q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
- r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
- q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
- r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
- do {
- p = p + 1;
- q1 = q1<<1; // update q1 = 2p/abs(nc)
- r1 = r1<<1; // update r1 = rem(2p/abs(nc))
- if (r1.uge(anc)) { // must be unsigned comparison
- q1 = q1 + 1;
- r1 = r1 - anc;
- }
- q2 = q2<<1; // update q2 = 2p/abs(d)
- r2 = r2<<1; // update r2 = rem(2p/abs(d))
- if (r2.uge(ad)) { // must be unsigned comparison
- q2 = q2 + 1;
- r2 = r2 - ad;
- }
- delta = ad - r2;
- } while (q1.ult(delta) || (q1 == delta && r1 == 0));
-
- mag.m = q2 + 1;
- if (d.isNegative()) mag.m = -mag.m; // resulting magic number
- mag.s = p - d.getBitWidth(); // resulting shift
- return mag;
-}
-
-/// Calculate the magic numbers required to implement an unsigned integer
-/// division by a constant as a sequence of multiplies, adds and shifts.
-/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
-/// S. Warren, Jr., chapter 10.
-/// LeadingZeros can be used to simplify the calculation if the upper bits
-/// of the divided value are known zero.
-APInt::mu APInt::magicu(unsigned LeadingZeros) const {
- const APInt& d = *this;
- unsigned p;
- APInt nc, delta, q1, r1, q2, r2;
- struct mu magu;
- magu.a = 0; // initialize "add" indicator
- APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
- APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
- APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
-
- nc = allOnes - (allOnes - d).urem(d);
- p = d.getBitWidth() - 1; // initialize p
- q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
- r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
- q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
- r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
- do {
- p = p + 1;
- if (r1.uge(nc - r1)) {
- q1 = q1 + q1 + 1; // update q1
- r1 = r1 + r1 - nc; // update r1
- }
- else {
- q1 = q1+q1; // update q1
- r1 = r1+r1; // update r1
- }
- if ((r2 + 1).uge(d - r2)) {
- if (q2.uge(signedMax)) magu.a = 1;
- q2 = q2+q2 + 1; // update q2
- r2 = r2+r2 + 1 - d; // update r2
- }
- else {
- if (q2.uge(signedMin)) magu.a = 1;
- q2 = q2+q2; // update q2
- r2 = r2+r2 + 1; // update r2
- }
- delta = d - 1 - r2;
- } while (p < d.getBitWidth()*2 &&
- (q1.ult(delta) || (q1 == delta && r1 == 0)));
- magu.m = q2 + 1; // resulting magic number
- magu.s = p - d.getBitWidth(); // resulting shift
- return magu;
-}
-
/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
/// variables here have the same names as in the algorithm. Comments explain
@@ -1984,15 +1945,16 @@ APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
// MININT/-1 --> overflow.
- Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+ Overflow = isMinSignedValue() && RHS.isAllOnes();
return sdiv(RHS);
}
APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
APInt Res = *this * RHS;
- if (*this != 0 && RHS != 0)
- Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+ if (RHS != 0)
+ Overflow = Res.sdiv(RHS) != *this ||
+ (isMinSignedValue() && RHS.isAllOnes());
else
Overflow = false;
return Res;
@@ -2196,7 +2158,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
}
// First, check for a zero value and just short circuit the logic below.
- if (*this == 0) {
+ if (isZero()) {
while (*Prefix) {
Str.push_back(*Prefix);
++Prefix;
@@ -2305,55 +2267,51 @@ void APInt::print(raw_ostream &OS, bool isSigned) const {
static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
"Part width must be divisible by 2!");
-/* Some handy functions local to this file. */
-
-/* Returns the integer part with the least significant BITS set.
- BITS cannot be zero. */
+// Returns the integer part with the least significant BITS set.
+// BITS cannot be zero.
static inline APInt::WordType lowBitMask(unsigned bits) {
assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
-
return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
}
-/* Returns the value of the lower half of PART. */
+/// Returns the value of the lower half of PART.
static inline APInt::WordType lowHalf(APInt::WordType part) {
return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
}
-/* Returns the value of the upper half of PART. */
+/// Returns the value of the upper half of PART.
static inline APInt::WordType highHalf(APInt::WordType part) {
return part >> (APInt::APINT_BITS_PER_WORD / 2);
}
-/* Returns the bit number of the most significant set bit of a part.
- If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the most significant set bit of a part.
+/// If the input number has no bits set -1U is returned.
static unsigned partMSB(APInt::WordType value) {
return findLastSet(value, ZB_Max);
}
-/* Returns the bit number of the least significant set bit of a
- part. If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the least significant set bit of a part. If the
+/// input number has no bits set -1U is returned.
static unsigned partLSB(APInt::WordType value) {
return findFirstSet(value, ZB_Max);
}
-/* Sets the least significant part of a bignum to the input value, and
- zeroes out higher parts. */
+/// Sets the least significant part of a bignum to the input value, and zeroes
+/// out higher parts.
void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
assert(parts > 0);
-
dst[0] = part;
for (unsigned i = 1; i < parts; i++)
dst[i] = 0;
}
-/* Assign one bignum to another. */
+/// Assign one bignum to another.
void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
for (unsigned i = 0; i < parts; i++)
dst[i] = src[i];
}
-/* Returns true if a bignum is zero, false otherwise. */
+/// Returns true if a bignum is zero, false otherwise.
bool APInt::tcIsZero(const WordType *src, unsigned parts) {
for (unsigned i = 0; i < parts; i++)
if (src[i])
@@ -2362,28 +2320,27 @@ bool APInt::tcIsZero(const WordType *src, unsigned parts) {
return true;
}
-/* Extract the given bit of a bignum; returns 0 or 1. */
+/// Extract the given bit of a bignum; returns 0 or 1.
int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
return (parts[whichWord(bit)] & maskBit(bit)) != 0;
}
-/* Set the given bit of a bignum. */
+/// Set the given bit of a bignum.
void APInt::tcSetBit(WordType *parts, unsigned bit) {
parts[whichWord(bit)] |= maskBit(bit);
}
-/* Clears the given bit of a bignum. */
+/// Clears the given bit of a bignum.
void APInt::tcClearBit(WordType *parts, unsigned bit) {
parts[whichWord(bit)] &= ~maskBit(bit);
}
-/* Returns the bit number of the least significant set bit of a
- number. If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the least significant set bit of a number. If the
+/// input number has no bits set -1U is returned.
unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
for (unsigned i = 0; i < n; i++) {
if (parts[i] != 0) {
unsigned lsb = partLSB(parts[i]);
-
return lsb + i * APINT_BITS_PER_WORD;
}
}
@@ -2391,8 +2348,8 @@ unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
return -1U;
}
-/* Returns the bit number of the most significant set bit of a number.
- If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the most significant set bit of a number.
+/// If the input number has no bits set -1U is returned.
unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
do {
--n;
@@ -2407,10 +2364,10 @@ unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
return -1U;
}
-/* Copy the bit vector of width srcBITS from SRC, starting at bit
- srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
- the least significant bit of DST. All high bits above srcBITS in
- DST are zero-filled. */
+/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
+/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
+/// significant bit of DST. All high bits above srcBITS in DST are zero-filled.
+/// */
void
APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
unsigned srcBits, unsigned srcLSB) {
@@ -2418,14 +2375,14 @@ APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
assert(dstParts <= dstCount);
unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
- tcAssign (dst, src + firstSrcPart, dstParts);
+ tcAssign(dst, src + firstSrcPart, dstParts);
unsigned shift = srcLSB % APINT_BITS_PER_WORD;
- tcShiftRight (dst, dstParts, shift);
+ tcShiftRight(dst, dstParts, shift);
- /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
- in DST. If this is less that srcBits, append the rest, else
- clear the high bits. */
+ // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
+ // in DST. If this is less that srcBits, append the rest, else
+ // clear the high bits.
unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
if (n < srcBits) {
WordType mask = lowBitMask (srcBits - n);
@@ -2436,12 +2393,12 @@ APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
}
- /* Clear high parts. */
+ // Clear high parts.
while (dstParts < dstCount)
dst[dstParts++] = 0;
}
-/* DST += RHS + C where C is zero or one. Returns the carry flag. */
+//// DST += RHS + C where C is zero or one. Returns the carry flag.
APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
WordType c, unsigned parts) {
assert(c <= 1);
@@ -2476,7 +2433,7 @@ APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
return 1;
}
-/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+/// DST -= RHS + C where C is zero or one. Returns the carry flag.
APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
WordType c, unsigned parts) {
assert(c <= 1);
@@ -2515,47 +2472,39 @@ APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
return 1;
}
-/* Negate a bignum in-place. */
+/// Negate a bignum in-place.
void APInt::tcNegate(WordType *dst, unsigned parts) {
tcComplement(dst, parts);
tcIncrement(dst, parts);
}
-/* DST += SRC * MULTIPLIER + CARRY if add is true
- DST = SRC * MULTIPLIER + CARRY if add is false
-
- Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
- they must start at the same point, i.e. DST == SRC.
-
- If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
- returned. Otherwise DST is filled with the least significant
- DSTPARTS parts of the result, and if all of the omitted higher
- parts were zero return zero, otherwise overflow occurred and
- return one. */
+/// DST += SRC * MULTIPLIER + CARRY if add is true
+/// DST = SRC * MULTIPLIER + CARRY if add is false
+/// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
+/// they must start at the same point, i.e. DST == SRC.
+/// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+/// returned. Otherwise DST is filled with the least significant
+/// DSTPARTS parts of the result, and if all of the omitted higher
+/// parts were zero return zero, otherwise overflow occurred and
+/// return one.
int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
WordType multiplier, WordType carry,
unsigned srcParts, unsigned dstParts,
bool add) {
- /* Otherwise our writes of DST kill our later reads of SRC. */
+ // Otherwise our writes of DST kill our later reads of SRC.
assert(dst <= src || dst >= src + srcParts);
assert(dstParts <= srcParts + 1);
- /* N loops; minimum of dstParts and srcParts. */
+ // N loops; minimum of dstParts and srcParts.
unsigned n = std::min(dstParts, srcParts);
for (unsigned i = 0; i < n; i++) {
- WordType low, mid, high, srcPart;
-
- /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
-
- This cannot overflow, because
-
- (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
-
- which is less than n^2. */
-
- srcPart = src[i];
-
+ // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+ // This cannot overflow, because:
+ // (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+ // which is less than n^2.
+ WordType srcPart = src[i];
+ WordType low, mid, high;
if (multiplier == 0 || srcPart == 0) {
low = carry;
high = 0;
@@ -2577,14 +2526,14 @@ int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
high++;
low += mid;
- /* Now add carry. */
+ // Now add carry.
if (low + carry < low)
high++;
low += carry;
}
if (add) {
- /* And now DST[i], and store the new low part there. */
+ // And now DST[i], and store the new low part there.
if (low + dst[i] < low)
high++;
dst[i] += low;
@@ -2595,32 +2544,32 @@ int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
}
if (srcParts < dstParts) {
- /* Full multiplication, there is no overflow. */
+ // Full multiplication, there is no overflow.
assert(srcParts + 1 == dstParts);
dst[srcParts] = carry;
return 0;
}
- /* We overflowed if there is carry. */
+ // We overflowed if there is carry.
if (carry)
return 1;
- /* We would overflow if any significant unwritten parts would be
- non-zero. This is true if any remaining src parts are non-zero
- and the multiplier is non-zero. */
+ // We would overflow if any significant unwritten parts would be
+ // non-zero. This is true if any remaining src parts are non-zero
+ // and the multiplier is non-zero.
if (multiplier)
for (unsigned i = dstParts; i < srcParts; i++)
if (src[i])
return 1;
- /* We fitted in the narrow destination. */
+ // We fitted in the narrow destination.
return 0;
}
-/* DST = LHS * RHS, where DST has the same width as the operands and
- is filled with the least significant parts of the result. Returns
- one if overflow occurred, otherwise zero. DST must be disjoint
- from both operands. */
+/// DST = LHS * RHS, where DST has the same width as the operands and
+/// is filled with the least significant parts of the result. Returns
+/// one if overflow occurred, otherwise zero. DST must be disjoint
+/// from both operands.
int APInt::tcMultiply(WordType *dst, const WordType *lhs,
const WordType *rhs, unsigned parts) {
assert(dst != lhs && dst != rhs);
@@ -2640,7 +2589,7 @@ int APInt::tcMultiply(WordType *dst, const WordType *lhs,
void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
const WordType *rhs, unsigned lhsParts,
unsigned rhsParts) {
- /* Put the narrower number on the LHS for less loops below. */
+ // Put the narrower number on the LHS for less loops below.
if (lhsParts > rhsParts)
return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
@@ -2652,16 +2601,15 @@ void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
}
-/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
- Otherwise set LHS to LHS / RHS with the fractional part discarded,
- set REMAINDER to the remainder, return zero. i.e.
-
- OLD_LHS = RHS * LHS + REMAINDER
-
- SCRATCH is a bignum of the same size as the operands and result for
- use by the routine; its contents need not be initialized and are
- destroyed. LHS, REMAINDER and SCRATCH must be distinct.
-*/
+// If RHS is zero LHS and REMAINDER are left unchanged, return one.
+// Otherwise set LHS to LHS / RHS with the fractional part discarded,
+// set REMAINDER to the remainder, return zero. i.e.
+//
+// OLD_LHS = RHS * LHS + REMAINDER
+//
+// SCRATCH is a bignum of the same size as the operands and result for
+// use by the routine; its contents need not be initialized and are
+// destroyed. LHS, REMAINDER and SCRATCH must be distinct.
int APInt::tcDivide(WordType *lhs, const WordType *rhs,
WordType *remainder, WordType *srhs,
unsigned parts) {
@@ -2680,8 +2628,8 @@ int APInt::tcDivide(WordType *lhs, const WordType *rhs,
tcAssign(remainder, lhs, parts);
tcSet(lhs, 0, parts);
- /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
- the total. */
+ // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
+ // total.
for (;;) {
int compare = tcCompare(remainder, srhs, parts);
if (compare >= 0) {
@@ -2756,31 +2704,7 @@ void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
}
-/* Bitwise and of two bignums. */
-void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] &= rhs[i];
-}
-
-/* Bitwise inclusive or of two bignums. */
-void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] |= rhs[i];
-}
-
-/* Bitwise exclusive or of two bignums. */
-void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] ^= rhs[i];
-}
-
-/* Complement a bignum in-place. */
-void APInt::tcComplement(WordType *dst, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] = ~dst[i];
-}
-
-/* Comparison (unsigned) of two bignums. */
+// Comparison (unsigned) of two bignums.
int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
unsigned parts) {
while (parts) {
@@ -2792,23 +2716,6 @@ int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
return 0;
}
-/* Set the least significant BITS bits of a bignum, clear the
- rest. */
-void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
- unsigned bits) {
- unsigned i = 0;
- while (bits > APINT_BITS_PER_WORD) {
- dst[i++] = ~(WordType) 0;
- bits -= APINT_BITS_PER_WORD;
- }
-
- if (bits)
- dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
-
- while (i < parts)
- dst[i++] = 0;
-}
-
APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
APInt::Rounding RM) {
// Currently udivrem always rounds down.
@@ -2819,7 +2726,7 @@ APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
case APInt::Rounding::UP: {
APInt Quo, Rem;
APInt::udivrem(A, B, Quo, Rem);
- if (Rem == 0)
+ if (Rem.isZero())
return Quo;
return Quo + 1;
}
@@ -2834,7 +2741,7 @@ APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
case APInt::Rounding::UP: {
APInt Quo, Rem;
APInt::sdivrem(A, B, Quo, Rem);
- if (Rem == 0)
+ if (Rem.isZero())
return Quo;
// This algorithm deals with arbitrary rounding mode used by sdivrem.
// We want to check whether the non-integer part of the mathematical value
@@ -2870,7 +2777,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
<< "x + " << C << ", rw:" << RangeWidth << '\n');
// Identify 0 as a (non)solution immediately.
- if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
+ if (C.sextOrTrunc(RangeWidth).isZero()) {
LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
return APInt(CoeffWidth, 0);
}
@@ -2932,7 +2839,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
assert(A.isStrictlyPositive());
APInt T = V.abs().urem(A);
- if (T.isNullValue())
+ if (T.isZero())
return V;
return V.isNegative() ? V+T : V+(A-T);
};
@@ -3016,7 +2923,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
// can be 0, but cannot be negative.
assert(X.isNonNegative() && "Solution should be non-negative");
- if (!InexactSQ && Rem.isNullValue()) {
+ if (!InexactSQ && Rem.isZero()) {
LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
return X;
}
@@ -3032,8 +2939,8 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
APInt VX = (A*X + B)*X + C;
APInt VY = VX + TwoA*X + A + B;
- bool SignChange = VX.isNegative() != VY.isNegative() ||
- VX.isNullValue() != VY.isNullValue();
+ bool SignChange =
+ VX.isNegative() != VY.isNegative() || VX.isZero() != VY.isZero();
// If the sign did not change between X and X+1, X is not a valid solution.
// This could happen when the actual (exact) roots don't have an integer
// between them, so they would both be contained between X and X+1.
@@ -3055,6 +2962,40 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
}
+APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) {
+ unsigned OldBitWidth = A.getBitWidth();
+ assert((((OldBitWidth % NewBitWidth) == 0) ||
+ ((NewBitWidth % OldBitWidth) == 0)) &&
+ "One size should be a multiple of the other one. "
+ "Can't do fractional scaling.");
+
+ // Check for matching bitwidths.
+ if (OldBitWidth == NewBitWidth)
+ return A;
+
+ APInt NewA = APInt::getZero(NewBitWidth);
+
+ // Check for null input.
+ if (A.isZero())
+ return NewA;
+
+ if (NewBitWidth > OldBitWidth) {
+ // Repeat bits.
+ unsigned Scale = NewBitWidth / OldBitWidth;
+ for (unsigned i = 0; i != OldBitWidth; ++i)
+ if (A[i])
+ NewA.setBits(i * Scale, (i + 1) * Scale);
+ } else {
+ // Merge bits - if any old bit is set, then set scale equivalent new bit.
+ unsigned Scale = OldBitWidth / NewBitWidth;
+ for (unsigned i = 0; i != NewBitWidth; ++i)
+ if (!A.extractBits(Scale, i * Scale).isZero())
+ NewA.setBit(i);
+ }
+
+ return NewA;
+}
+
/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
/// with the integer held in IntVal.
void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index 94b48df27993..4405ed176fe2 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -82,6 +82,10 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
case ArchKind::ARMV8MMainline:
case ArchKind::ARMV8_1MMainline:
return 8;
+ case ArchKind::ARMV9A:
+ case ArchKind::ARMV9_1A:
+ case ArchKind::ARMV9_2A:
+ return 9;
case ArchKind::INVALID:
return 0;
}
@@ -113,6 +117,9 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
case ArchKind::ARMV8_5A:
case ArchKind::ARMV8_6A:
case ArchKind::ARMV8_7A:
+ case ArchKind::ARMV9A:
+ case ArchKind::ARMV9_1A:
+ case ArchKind::ARMV9_2A:
return ProfileKind::A;
case ArchKind::ARMV2:
case ArchKind::ARMV2A:
@@ -158,6 +165,9 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8.6a", "v8.6-a")
.Case("v8.7a", "v8.7-a")
.Case("v8r", "v8-r")
+ .Cases("v9", "v9a", "v9-a")
+ .Case("v9.1a", "v9.1-a")
+ .Case("v9.2a", "v9.2-a")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
.Case("v8.1m.main", "v8.1-m.main")
@@ -297,7 +307,7 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
else if (A.startswith("aarch64")) {
offset = 7;
// AArch64 uses "_be", not "eb" suffix.
- if (A.find("eb") != StringRef::npos)
+ if (A.contains("eb"))
return Error;
if (A.substr(offset, 3) == "_be")
offset += 3;
@@ -323,7 +333,7 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
return Error;
// Can't have an extra 'eb'.
- if (A.find("eb") != StringRef::npos)
+ if (A.contains("eb"))
return Error;
}
diff --git a/llvm/lib/Support/BinaryStreamReader.cpp b/llvm/lib/Support/BinaryStreamReader.cpp
index a0434bdc6115..2fe450db11dd 100644
--- a/llvm/lib/Support/BinaryStreamReader.cpp
+++ b/llvm/lib/Support/BinaryStreamReader.cpp
@@ -72,10 +72,10 @@ Error BinaryStreamReader::readSLEB128(int64_t &Dest) {
}
Error BinaryStreamReader::readCString(StringRef &Dest) {
- uint32_t OriginalOffset = getOffset();
- uint32_t FoundOffset = 0;
+ uint64_t OriginalOffset = getOffset();
+ uint64_t FoundOffset = 0;
while (true) {
- uint32_t ThisOffset = getOffset();
+ uint64_t ThisOffset = getOffset();
ArrayRef<uint8_t> Buffer;
if (auto EC = readLongestContiguousChunk(Buffer))
return EC;
@@ -100,8 +100,8 @@ Error BinaryStreamReader::readCString(StringRef &Dest) {
}
Error BinaryStreamReader::readWideString(ArrayRef<UTF16> &Dest) {
- uint32_t Length = 0;
- uint32_t OriginalOffset = getOffset();
+ uint64_t Length = 0;
+ uint64_t OriginalOffset = getOffset();
const UTF16 *C;
while (true) {
if (auto EC = readObject(C))
@@ -110,7 +110,7 @@ Error BinaryStreamReader::readWideString(ArrayRef<UTF16> &Dest) {
break;
++Length;
}
- uint32_t NewOffset = getOffset();
+ uint64_t NewOffset = getOffset();
setOffset(OriginalOffset);
if (auto EC = readArray(Dest, Length))
@@ -145,7 +145,7 @@ Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Ref,
return readStreamRef(Ref.StreamData, Length);
}
-Error BinaryStreamReader::skip(uint32_t Amount) {
+Error BinaryStreamReader::skip(uint64_t Amount) {
if (Amount > bytesRemaining())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
Offset += Amount;
@@ -166,7 +166,7 @@ uint8_t BinaryStreamReader::peek() const {
}
std::pair<BinaryStreamReader, BinaryStreamReader>
-BinaryStreamReader::split(uint32_t Off) const {
+BinaryStreamReader::split(uint64_t Off) const {
assert(getLength() >= Off);
BinaryStreamRef First = Stream.drop_front(Offset);
diff --git a/llvm/lib/Support/BinaryStreamRef.cpp b/llvm/lib/Support/BinaryStreamRef.cpp
index 53e71baad57a..6d79d95e1bf0 100644
--- a/llvm/lib/Support/BinaryStreamRef.cpp
+++ b/llvm/lib/Support/BinaryStreamRef.cpp
@@ -21,15 +21,15 @@ public:
llvm::support::endianness getEndian() const override {
return BBS.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return BBS.getLength(); }
+ uint64_t getLength() override { return BBS.getLength(); }
private:
BinaryByteStream BBS;
@@ -44,17 +44,17 @@ public:
llvm::support::endianness getEndian() const override {
return BBS.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return BBS.getLength(); }
+ uint64_t getLength() override { return BBS.getLength(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) override {
return BBS.writeBytes(Offset, Data);
}
Error commit() override { return BBS.commit(); }
@@ -66,8 +66,8 @@ private:
BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream)
: BinaryStreamRefBase(Stream) {}
-BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint32_t Offset,
- Optional<uint32_t> Length)
+BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint64_t Offset,
+ Optional<uint64_t> Length)
: BinaryStreamRefBase(Stream, Offset, Length) {}
BinaryStreamRef::BinaryStreamRef(ArrayRef<uint8_t> Data, endianness Endian)
: BinaryStreamRefBase(std::make_shared<ArrayRefImpl>(Data, Endian), 0,
@@ -76,7 +76,7 @@ BinaryStreamRef::BinaryStreamRef(StringRef Data, endianness Endian)
: BinaryStreamRef(makeArrayRef(Data.bytes_begin(), Data.bytes_end()),
Endian) {}
-Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size,
+Error BinaryStreamRef::readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) const {
if (auto EC = checkOffsetForRead(Offset, Size))
return EC;
@@ -84,7 +84,7 @@ Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size,
}
Error BinaryStreamRef::readLongestContiguousChunk(
- uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+ uint64_t Offset, ArrayRef<uint8_t> &Buffer) const {
if (auto EC = checkOffsetForRead(Offset, 1))
return EC;
@@ -94,7 +94,7 @@ Error BinaryStreamRef::readLongestContiguousChunk(
// This StreamRef might refer to a smaller window over a larger stream. In
// that case we will have read out more bytes than we should return, because
// we should not read past the end of the current view.
- uint32_t MaxLength = getLength() - Offset;
+ uint64_t MaxLength = getLength() - Offset;
if (Buffer.size() > MaxLength)
Buffer = Buffer.slice(0, MaxLength);
return Error::success();
@@ -104,8 +104,8 @@ WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream)
: BinaryStreamRefBase(Stream) {}
WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream,
- uint32_t Offset,
- Optional<uint32_t> Length)
+ uint64_t Offset,
+ Optional<uint64_t> Length)
: BinaryStreamRefBase(Stream, Offset, Length) {}
WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
@@ -113,8 +113,7 @@ WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
: BinaryStreamRefBase(std::make_shared<MutableArrayRefImpl>(Data, Endian),
0, Data.size()) {}
-
-Error WritableBinaryStreamRef::writeBytes(uint32_t Offset,
+Error WritableBinaryStreamRef::writeBytes(uint64_t Offset,
ArrayRef<uint8_t> Data) const {
if (auto EC = checkOffsetForWrite(Offset, Data.size()))
return EC;
diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp
index 986e18da281d..8c9efa0ed9a9 100644
--- a/llvm/lib/Support/BinaryStreamWriter.cpp
+++ b/llvm/lib/Support/BinaryStreamWriter.cpp
@@ -62,7 +62,7 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref) {
return writeStreamRef(Ref, Ref.getLength());
}
-Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) {
+Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint64_t Length) {
BinaryStreamReader SrcReader(Ref.slice(0, Length));
// This is a bit tricky. If we just call readBytes, we are requiring that it
// return us the entire stream as a contiguous buffer. There is no guarantee
@@ -80,7 +80,7 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) {
}
std::pair<BinaryStreamWriter, BinaryStreamWriter>
-BinaryStreamWriter::split(uint32_t Off) const {
+BinaryStreamWriter::split(uint64_t Off) const {
assert(getLength() >= Off);
WritableBinaryStreamRef First = Stream.drop_front(Offset);
@@ -93,7 +93,7 @@ BinaryStreamWriter::split(uint32_t Off) const {
}
Error BinaryStreamWriter::padToAlignment(uint32_t Align) {
- uint32_t NewOffset = alignTo(Offset, Align);
+ uint64_t NewOffset = alignTo(Offset, Align);
if (NewOffset > getLength())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
while (Offset < NewOffset)
diff --git a/llvm/lib/LTO/Caching.cpp b/llvm/lib/Support/Caching.cpp
index 75a89e729f43..a2fe37a26617 100644
--- a/llvm/lib/LTO/Caching.cpp
+++ b/llvm/lib/Support/Caching.cpp
@@ -1,4 +1,4 @@
-//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
+//===-Caching.cpp - LLVM Local File Cache ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,18 +6,17 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the Caching for ThinLTO.
+// This file implements the localCache function, which simplifies creating,
+// adding to, and querying a local file system cache. localCache takes care of
+// periodically pruning older files from the cache using a CachePruningPolicy.
//
//===----------------------------------------------------------------------===//
-#include "llvm/LTO/Caching.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
@@ -26,14 +25,21 @@
#endif
using namespace llvm;
-using namespace llvm::lto;
-Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
- AddBufferFn AddBuffer) {
- if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPath))
+Expected<FileCache> llvm::localCache(Twine CacheNameRef,
+ Twine TempFilePrefixRef,
+ Twine CacheDirectoryPathRef,
+ AddBufferFn AddBuffer) {
+ if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPathRef))
return errorCodeToError(EC);
- return [=](unsigned Task, StringRef Key) -> AddStreamFn {
+ // Create local copies which are safely captured-by-copy in lambdas
+ SmallString<64> CacheName, TempFilePrefix, CacheDirectoryPath;
+ CacheNameRef.toVector(CacheName);
+ TempFilePrefixRef.toVector(TempFilePrefix);
+ CacheDirectoryPathRef.toVector(CacheDirectoryPath);
+
+ return [=](unsigned Task, StringRef Key) -> Expected<AddStreamFn> {
// This choice of file name allows the cache to be pruned (see pruneCache()
// in include/llvm/Support/CachePruning.h).
SmallString<64> EntryPath;
@@ -65,12 +71,12 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
// Since the file is probably being deleted we handle it in the same way as
// if the file did not exist at all.
if (EC != errc::no_such_file_or_directory && EC != errc::permission_denied)
- report_fatal_error(Twine("Failed to open cache file ") + EntryPath +
- ": " + EC.message() + "\n");
+ return createStringError(EC, Twine("Failed to open cache file ") +
+ EntryPath + ": " + EC.message() + "\n");
- // This native object stream is responsible for commiting the resulting
- // file to the cache and calling AddBuffer to add it to the link.
- struct CacheStream : NativeObjectStream {
+ // This file stream is responsible for commiting the resulting file to the
+ // cache and calling AddBuffer to add it to the link.
+ struct CacheStream : CachedFileStream {
AddBufferFn AddBuffer;
sys::fs::TempFile TempFile;
std::string EntryPath;
@@ -79,11 +85,14 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddBufferFn AddBuffer,
sys::fs::TempFile TempFile, std::string EntryPath,
unsigned Task)
- : NativeObjectStream(std::move(OS)), AddBuffer(std::move(AddBuffer)),
+ : CachedFileStream(std::move(OS)), AddBuffer(std::move(AddBuffer)),
TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)),
Task(Task) {}
~CacheStream() {
+ // TODO: Manually commit rather than using non-trivial destructor,
+ // allowing to replace report_fatal_errors with a return Error.
+
// Make sure the stream is closed before committing it.
OS.reset();
@@ -131,16 +140,17 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
}
};
- return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> {
+ return [=](size_t Task) -> Expected<std::unique_ptr<CachedFileStream>> {
// Write to a temporary to avoid race condition
SmallString<64> TempFilenameModel;
- sys::path::append(TempFilenameModel, CacheDirectoryPath, "Thin-%%%%%%.tmp.o");
+ sys::path::append(TempFilenameModel, CacheDirectoryPath,
+ TempFilePrefix + "-%%%%%%.tmp.o");
Expected<sys::fs::TempFile> Temp = sys::fs::TempFile::create(
TempFilenameModel, sys::fs::owner_read | sys::fs::owner_write);
- if (!Temp) {
- errs() << "Error: " << toString(Temp.takeError()) << "\n";
- report_fatal_error("ThinLTO: Can't get a temporary file");
- }
+ if (!Temp)
+ return createStringError(errc::io_error,
+ toString(Temp.takeError()) + ": " + CacheName +
+ ": Can't get a temporary file");
// This CacheStream will move the temporary file into the cache when done.
return std::make_unique<CacheStream>(
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 4ae3ad4c2453..e64934aa90cc 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1321,12 +1321,20 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
Errs, LongOptionsUseDoubleDash);
}
+/// Reset all options at least once, so that we can parse different options.
void CommandLineParser::ResetAllOptionOccurrences() {
- // So that we can parse different command lines multiple times in succession
- // we reset all option values to look like they have never been seen before.
+ // Reset all option values to look like they have never been seen before.
+ // Options might be reset twice (they can be reference in both OptionsMap
+ // and one of the other members), but that does not harm.
for (auto *SC : RegisteredSubCommands) {
for (auto &O : SC->OptionsMap)
O.second->reset();
+ for (Option *O : SC->PositionalOpts)
+ O->reset();
+ for (Option *O : SC->SinkOpts)
+ O->reset();
+ if (SC->ConsumeAfterOpt)
+ SC->ConsumeAfterOpt->reset();
}
}
@@ -2633,6 +2641,7 @@ void cl::AddExtraVersionPrinter(VersionPrinterTy func) {
}
StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {
+ initCommonOptions();
auto &Subs = GlobalParser->RegisteredSubCommands;
(void)Subs;
assert(is_contained(Subs, &Sub));
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index 433d99df5932..b6aaf373a522 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -428,8 +428,7 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
#endif // !_MSC_VER
-LLVM_ATTRIBUTE_NORETURN
-void CrashRecoveryContext::HandleExit(int RetCode) {
+[[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) {
#if defined(_WIN32)
// SEH and VEH
::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
diff --git a/llvm/lib/Support/DebugOptions.h b/llvm/lib/Support/DebugOptions.h
index 4d5250649f6a..75e557d7d8d7 100644
--- a/llvm/lib/Support/DebugOptions.h
+++ b/llvm/lib/Support/DebugOptions.h
@@ -26,4 +26,4 @@ void initWithColorOptions();
void initDebugOptions();
void initRandomSeedOptions();
-} // namespace llvm \ No newline at end of file
+} // namespace llvm
diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp
new file mode 100644
index 000000000000..077629670e40
--- /dev/null
+++ b/llvm/lib/Support/DivisionByConstantInfo.cpp
@@ -0,0 +1,107 @@
+//===----- DivisonByConstantInfo.cpp - division by constant -*- C++ -*-----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements support for optimizing divisions by a constant
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DivisionByConstantInfo.h"
+
+using namespace llvm;
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts. Requires that
+/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., Chapter 10.
+SignedDivisionByConstantInfo SignedDivisionByConstantInfo::get(const APInt &D) {
+ unsigned P;
+ APInt AD, ANC, Delta, Q1, R1, Q2, R2, T;
+ APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth());
+ struct SignedDivisionByConstantInfo Retval;
+
+ AD = D.abs();
+ T = SignedMin + (D.lshr(D.getBitWidth() - 1));
+ ANC = T - 1 - T.urem(AD); // absolute value of NC
+ P = D.getBitWidth() - 1; // initialize P
+ Q1 = SignedMin.udiv(ANC); // initialize Q1 = 2P/abs(NC)
+ R1 = SignedMin - Q1 * ANC; // initialize R1 = rem(2P,abs(NC))
+ Q2 = SignedMin.udiv(AD); // initialize Q2 = 2P/abs(D)
+ R2 = SignedMin - Q2 * AD; // initialize R2 = rem(2P,abs(D))
+ do {
+ P = P + 1;
+ Q1 = Q1 << 1; // update Q1 = 2P/abs(NC)
+ R1 = R1 << 1; // update R1 = rem(2P/abs(NC))
+ if (R1.uge(ANC)) { // must be unsigned comparison
+ Q1 = Q1 + 1;
+ R1 = R1 - ANC;
+ }
+ Q2 = Q2 << 1; // update Q2 = 2P/abs(D)
+ R2 = R2 << 1; // update R2 = rem(2P/abs(D))
+ if (R2.uge(AD)) { // must be unsigned comparison
+ Q2 = Q2 + 1;
+ R2 = R2 - AD;
+ }
+ Delta = AD - R2;
+ } while (Q1.ult(Delta) || (Q1 == Delta && R1 == 0));
+
+ Retval.Magic = Q2 + 1;
+ if (D.isNegative())
+ Retval.Magic = -Retval.Magic; // resulting magic number
+ Retval.ShiftAmount = P - D.getBitWidth(); // resulting shift
+ return Retval;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+UnsignedDivisonByConstantInfo
+UnsignedDivisonByConstantInfo::get(const APInt &D, unsigned LeadingZeros) {
+ unsigned P;
+ APInt NC, Delta, Q1, R1, Q2, R2;
+ struct UnsignedDivisonByConstantInfo Retval;
+ Retval.IsAdd = 0; // initialize "add" indicator
+ APInt AllOnes = APInt::getAllOnes(D.getBitWidth()).lshr(LeadingZeros);
+ APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth());
+ APInt SignedMax = APInt::getSignedMaxValue(D.getBitWidth());
+
+ NC = AllOnes - (AllOnes - D).urem(D);
+ P = D.getBitWidth() - 1; // initialize P
+ Q1 = SignedMin.udiv(NC); // initialize Q1 = 2P/NC
+ R1 = SignedMin - Q1 * NC; // initialize R1 = rem(2P,NC)
+ Q2 = SignedMax.udiv(D); // initialize Q2 = (2P-1)/D
+ R2 = SignedMax - Q2 * D; // initialize R2 = rem((2P-1),D)
+ do {
+ P = P + 1;
+ if (R1.uge(NC - R1)) {
+ Q1 = Q1 + Q1 + 1; // update Q1
+ R1 = R1 + R1 - NC; // update R1
+ } else {
+ Q1 = Q1 + Q1; // update Q1
+ R1 = R1 + R1; // update R1
+ }
+ if ((R2 + 1).uge(D - R2)) {
+ if (Q2.uge(SignedMax))
+ Retval.IsAdd = 1;
+ Q2 = Q2 + Q2 + 1; // update Q2
+ R2 = R2 + R2 + 1 - D; // update R2
+ } else {
+ if (Q2.uge(SignedMin))
+ Retval.IsAdd = 1;
+ Q2 = Q2 + Q2; // update Q2
+ R2 = R2 + R2 + 1; // update R2
+ }
+ Delta = D - 1 - R2;
+ } while (P < D.getBitWidth() * 2 &&
+ (Q1.ult(Delta) || (Q1 == Delta && R1 == 0)));
+ Retval.Magic = Q2 + 1; // resulting magic number
+ Retval.ShiftAmount = P - D.getBitWidth(); // resulting shift
+ return Retval;
+}
diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp
index e7ab4387dfd1..8bfc8ee7a8cc 100644
--- a/llvm/lib/Support/Error.cpp
+++ b/llvm/lib/Support/Error.cpp
@@ -80,8 +80,11 @@ std::error_code inconvertibleErrorCode() {
}
std::error_code FileError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
- *ErrorErrorCat);
+ std::error_code NestedEC = Err->convertToErrorCode();
+ if (NestedEC == inconvertibleErrorCode())
+ return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
+ *ErrorErrorCat);
+ return NestedEC;
}
Error errorCodeToError(std::error_code EC) {
@@ -96,7 +99,7 @@ std::error_code errorToErrorCode(Error Err) {
EC = EI.convertToErrorCode();
});
if (EC == inconvertibleErrorCode())
- report_fatal_error(EC.message());
+ report_fatal_error(Twine(EC.message()));
return EC;
}
@@ -144,7 +147,7 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
raw_string_ostream ErrStream(ErrMsg);
logAllUnhandledErrors(std::move(Err), ErrStream);
}
- report_fatal_error(ErrMsg);
+ report_fatal_error(Twine(ErrMsg));
}
} // end namespace llvm
diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp
index ce6344284f06..80c0e00439a5 100644
--- a/llvm/lib/Support/ErrorHandling.cpp
+++ b/llvm/lib/Support/ErrorHandling.cpp
@@ -83,10 +83,6 @@ void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
report_fatal_error(Twine(Reason), GenCrashDiag);
}
-void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) {
- report_fatal_error(Twine(Reason), GenCrashDiag);
-}
-
void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
report_fatal_error(Twine(Reason), GenCrashDiag);
}
@@ -105,7 +101,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
}
if (handler) {
- handler(handlerData, Reason.str(), GenCrashDiag);
+ handler(handlerData, Reason.str().c_str(), GenCrashDiag);
} else {
// Blast the result out to stderr. We don't try hard to make sure this
// succeeds (e.g. handling EINTR) and we can't use errs() here because
@@ -218,11 +214,11 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file,
#endif
}
-static void bindingsErrorHandler(void *user_data, const std::string& reason,
+static void bindingsErrorHandler(void *user_data, const char *reason,
bool gen_crash_diag) {
LLVMFatalErrorHandler handler =
LLVM_EXTENSION reinterpret_cast<LLVMFatalErrorHandler>(user_data);
- handler(reason.c_str());
+ handler(reason);
}
void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) {
@@ -247,7 +243,10 @@ std::error_code llvm::mapWindowsError(unsigned EV) {
switch (EV) {
MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_BAD_PATHNAME, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+ MAP_ERR_TO_COND(ERROR_BROKEN_PIPE, broken_pipe);
MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
@@ -269,18 +268,20 @@ std::error_code llvm::mapWindowsError(unsigned EV) {
MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_INVALID_PARAMETER, invalid_argument);
MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_NOT_SUPPORTED, not_supported);
MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
- MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_REPARSE_TAG_INVALID, invalid_argument);
MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
MAP_ERR_TO_COND(ERROR_SEEK, io_error);
MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
diff --git a/llvm/lib/Support/ExtensibleRTTI.cpp b/llvm/lib/Support/ExtensibleRTTI.cpp
index 1c98d1bb8feb..a6a5c196fb35 100644
--- a/llvm/lib/Support/ExtensibleRTTI.cpp
+++ b/llvm/lib/Support/ExtensibleRTTI.cpp
@@ -1,9 +1,8 @@
//===----- lib/Support/ExtensibleRTTI.cpp - ExtensibleRTTI utilities ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index e4a86bb69de4..dbe28e56b2c3 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -300,8 +300,7 @@ llvm::Error llvm::writeFileAtomically(
std::function<llvm::Error(llvm::raw_ostream &)> Writer) {
SmallString<128> GeneratedUniqPath;
int TempFD;
- if (sys::fs::createUniqueFile(TempPathModel.str(), TempFD,
- GeneratedUniqPath)) {
+ if (sys::fs::createUniqueFile(TempPathModel, TempFD, GeneratedUniqPath)) {
return llvm::make_error<AtomicFileWriteError>(
atomic_write_error::failed_to_create_uniq_file);
}
@@ -319,8 +318,7 @@ llvm::Error llvm::writeFileAtomically(
atomic_write_error::output_stream_error);
}
- if (sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(),
- /*to=*/FinalPath.str().c_str())) {
+ if (sys::fs::rename(/*from=*/GeneratedUniqPath, /*to=*/FinalPath)) {
return llvm::make_error<AtomicFileWriteError>(
atomic_write_error::failed_to_rename_temp_file);
}
diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp
index b41869aba95f..696e6b7a99d8 100644
--- a/llvm/lib/Support/GraphWriter.cpp
+++ b/llvm/lib/Support/GraphWriter.cpp
@@ -23,11 +23,12 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <system_error>
#include <string>
+#include <system_error>
#include <vector>
using namespace llvm;
@@ -94,11 +95,8 @@ StringRef llvm::DOT::getColorString(unsigned ColorNumber) {
static std::string replaceIllegalFilenameChars(std::string Filename,
const char ReplacementChar) {
-#ifdef _WIN32
- std::string IllegalChars = "\\/:?\"<>|";
-#else
- std::string IllegalChars = "/";
-#endif
+ std::string IllegalChars =
+ is_style_windows(sys::path::Style::native) ? "\\/:?\"<>|" : "/";
for (char IllegalChar : IllegalChars) {
std::replace(Filename.begin(), Filename.end(), IllegalChar,
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index f873ff06f1f7..7b14616f6fea 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -772,6 +772,22 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
break;
+ // Tigerlake:
+ case 0x8c:
+ case 0x8d:
+ CPU = "tigerlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_TIGERLAKE;
+ break;
+
+ // Alderlake:
+ case 0x97:
+ case 0x9a:
+ CPU = "alderlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ALDERLAKE;
+ break;
+
// Icelake Xeon:
case 0x6a:
case 0x6c:
@@ -1055,8 +1071,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_FMA);
if ((ECX >> 19) & 1)
setFeature(X86::FEATURE_SSE4_1);
- if ((ECX >> 20) & 1)
+ if ((ECX >> 20) & 1) {
setFeature(X86::FEATURE_SSE4_2);
+ setFeature(X86::FEATURE_CRC32);
+ }
if ((ECX >> 23) & 1)
setFeature(X86::FEATURE_POPCNT);
if ((ECX >> 25) & 1)
@@ -1338,6 +1356,16 @@ StringRef sys::getHostCPUName() {
return "generic";
}
}
+#elif defined(__riscv)
+StringRef sys::getHostCPUName() {
+#if __riscv_xlen == 64
+ return "generic-rv64";
+#elif __riscv_xlen == 32
+ return "generic-rv32";
+#else
+#error "Unhandled value of __riscv_xlen"
+#endif
+}
#else
StringRef sys::getHostCPUName() { return "generic"; }
namespace llvm {
@@ -1502,6 +1530,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["cx16"] = (ECX >> 13) & 1;
Features["sse4.1"] = (ECX >> 19) & 1;
Features["sse4.2"] = (ECX >> 20) & 1;
+ Features["crc32"] = Features["sse4.2"];
Features["movbe"] = (ECX >> 22) & 1;
Features["popcnt"] = (ECX >> 23) & 1;
Features["aes"] = (ECX >> 25) & 1;
@@ -1617,6 +1646,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// For more info, see X86 ISA docs.
Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
+ Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index dbfd673553f4..17b36ed51850 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -109,6 +109,7 @@ void Value::copyFrom(const Value &M) {
case T_Boolean:
case T_Double:
case T_Integer:
+ case T_UINT64:
memcpy(&Union, &M.Union, sizeof(Union));
break;
case T_StringRef:
@@ -133,6 +134,7 @@ void Value::moveFrom(const Value &&M) {
case T_Boolean:
case T_Double:
case T_Integer:
+ case T_UINT64:
memcpy(&Union, &M.Union, sizeof(Union));
break;
case T_StringRef:
@@ -159,6 +161,7 @@ void Value::destroy() {
case T_Boolean:
case T_Double:
case T_Integer:
+ case T_UINT64:
break;
case T_StringRef:
as<StringRef>().~StringRef();
@@ -750,6 +753,8 @@ void llvm::json::OStream::value(const Value &V) {
valueBegin();
if (V.Type == Value::T_Integer)
OS << *V.getAsInteger();
+ else if (V.Type == Value::T_UINT64)
+ OS << *V.getAsUINT64();
else
OS << format("%.*g", std::numeric_limits<double>::max_digits10,
*V.getAsNumber());
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index d997bd85f1e0..90483817c302 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -404,7 +404,7 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
// We only know that the absolute values's MSB will be zero if INT_MIN is
// poison, or there is a set bit that isn't the sign bit (otherwise it could
// be INT_MIN).
- if (IntMinIsPoison || (!One.isNullValue() && !One.isMinSignedValue()))
+ if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue()))
KnownAbs.Zero.setSignBit();
// FIXME: Handle known negative input?
@@ -412,10 +412,13 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
return KnownAbs;
}
-KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
+ bool SelfMultiply) {
unsigned BitWidth = LHS.getBitWidth();
assert(BitWidth == RHS.getBitWidth() && !LHS.hasConflict() &&
!RHS.hasConflict() && "Operand mismatch");
+ assert((!SelfMultiply || (LHS.One == RHS.One && LHS.Zero == RHS.Zero)) &&
+ "Self multiplication knownbits mismatch");
// Compute a conservative estimate for high known-0 bits.
unsigned LeadZ =
@@ -489,6 +492,14 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS) {
Res.Zero.setHighBits(LeadZ);
Res.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
Res.One = BottomKnown.getLoBits(ResultBitsKnown);
+
+ // If we're self-multiplying then bit[1] is guaranteed to be zero.
+ if (SelfMultiply && BitWidth > 1) {
+ assert(Res.One[1] == 0 &&
+ "Self-multiplication failed Quadratic Reciprocity!");
+ Res.Zero.setBit(1);
+ }
+
return Res;
}
diff --git a/llvm/lib/Support/LockFileManager.cpp b/llvm/lib/Support/LockFileManager.cpp
index a2b56ab295c4..5fd52999adb5 100644
--- a/llvm/lib/Support/LockFileManager.cpp
+++ b/llvm/lib/Support/LockFileManager.cpp
@@ -35,7 +35,7 @@
#include <unistd.h>
#endif
-#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED > 1050)
+#if defined(__APPLE__) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1050)
#define USE_OSX_GETHOSTUUID 1
#else
#define USE_OSX_GETHOSTUUID 0
diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp
index 5e0b076f176e..9dceb4d418cd 100644
--- a/llvm/lib/Support/MD5.cpp
+++ b/llvm/lib/Support/MD5.cpp
@@ -67,11 +67,11 @@
// SET reads 4 input bytes in little-endian byte order and stores them
// in a properly aligned word in host byte order.
#define SET(n) \
- (block[(n)] = \
- (MD5_u32plus) ptr[(n) * 4] | ((MD5_u32plus) ptr[(n) * 4 + 1] << 8) | \
- ((MD5_u32plus) ptr[(n) * 4 + 2] << 16) | \
- ((MD5_u32plus) ptr[(n) * 4 + 3] << 24))
-#define GET(n) (block[(n)])
+ (InternalState.block[(n)] = (MD5_u32plus)ptr[(n)*4] | \
+ ((MD5_u32plus)ptr[(n)*4 + 1] << 8) | \
+ ((MD5_u32plus)ptr[(n)*4 + 2] << 16) | \
+ ((MD5_u32plus)ptr[(n)*4 + 3] << 24))
+#define GET(n) (InternalState.block[(n)])
using namespace llvm;
@@ -85,10 +85,10 @@ const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
ptr = Data.data();
- a = this->a;
- b = this->b;
- c = this->c;
- d = this->d;
+ a = InternalState.a;
+ b = InternalState.b;
+ c = InternalState.c;
+ d = InternalState.d;
do {
saved_a = a;
@@ -176,10 +176,10 @@ const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
ptr += 64;
} while (Size -= 64);
- this->a = a;
- this->b = b;
- this->c = c;
- this->d = d;
+ InternalState.a = a;
+ InternalState.b = b;
+ InternalState.c = c;
+ InternalState.d = d;
return ptr;
}
@@ -193,10 +193,10 @@ void MD5::update(ArrayRef<uint8_t> Data) {
const uint8_t *Ptr = Data.data();
unsigned long Size = Data.size();
- saved_lo = lo;
- if ((lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
- hi++;
- hi += Size >> 29;
+ saved_lo = InternalState.lo;
+ if ((InternalState.lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
+ InternalState.hi++;
+ InternalState.hi += Size >> 29;
used = saved_lo & 0x3f;
@@ -204,14 +204,14 @@ void MD5::update(ArrayRef<uint8_t> Data) {
free = 64 - used;
if (Size < free) {
- memcpy(&buffer[used], Ptr, Size);
+ memcpy(&InternalState.buffer[used], Ptr, Size);
return;
}
- memcpy(&buffer[used], Ptr, free);
+ memcpy(&InternalState.buffer[used], Ptr, free);
Ptr = Ptr + free;
Size -= free;
- body(makeArrayRef(buffer, 64));
+ body(makeArrayRef(InternalState.buffer, 64));
}
if (Size >= 64) {
@@ -219,7 +219,7 @@ void MD5::update(ArrayRef<uint8_t> Data) {
Size &= 0x3f;
}
- memcpy(buffer, Ptr, Size);
+ memcpy(InternalState.buffer, Ptr, Size);
}
/// Add the bytes in the StringRef \p Str to the hash.
@@ -235,31 +235,48 @@ void MD5::update(StringRef Str) {
void MD5::final(MD5Result &Result) {
unsigned long used, free;
- used = lo & 0x3f;
+ used = InternalState.lo & 0x3f;
- buffer[used++] = 0x80;
+ InternalState.buffer[used++] = 0x80;
free = 64 - used;
if (free < 8) {
- memset(&buffer[used], 0, free);
- body(makeArrayRef(buffer, 64));
+ memset(&InternalState.buffer[used], 0, free);
+ body(makeArrayRef(InternalState.buffer, 64));
used = 0;
free = 64;
}
- memset(&buffer[used], 0, free - 8);
+ memset(&InternalState.buffer[used], 0, free - 8);
- lo <<= 3;
- support::endian::write32le(&buffer[56], lo);
- support::endian::write32le(&buffer[60], hi);
+ InternalState.lo <<= 3;
+ support::endian::write32le(&InternalState.buffer[56], InternalState.lo);
+ support::endian::write32le(&InternalState.buffer[60], InternalState.hi);
- body(makeArrayRef(buffer, 64));
+ body(makeArrayRef(InternalState.buffer, 64));
- support::endian::write32le(&Result[0], a);
- support::endian::write32le(&Result[4], b);
- support::endian::write32le(&Result[8], c);
- support::endian::write32le(&Result[12], d);
+ support::endian::write32le(&Result[0], InternalState.a);
+ support::endian::write32le(&Result[4], InternalState.b);
+ support::endian::write32le(&Result[8], InternalState.c);
+ support::endian::write32le(&Result[12], InternalState.d);
+}
+
+StringRef MD5::final() {
+ final(Result);
+ return StringRef(reinterpret_cast<char *>(Result.Bytes.data()),
+ Result.Bytes.size());
+}
+
+StringRef MD5::result() {
+ auto StateToRestore = InternalState;
+
+ auto Hash = final();
+
+ // Restore the state
+ InternalState = StateToRestore;
+
+ return Hash;
}
SmallString<32> MD5::MD5Result::digest() const {
diff --git a/llvm/lib/Support/MSP430AttributeParser.cpp b/llvm/lib/Support/MSP430AttributeParser.cpp
new file mode 100644
index 000000000000..a9948a158fc0
--- /dev/null
+++ b/llvm/lib/Support/MSP430AttributeParser.cpp
@@ -0,0 +1,53 @@
+//===-- MSP430AttributeParser.cpp - MSP430 Attribute Parser ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MSP430AttributeParser.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+using namespace llvm::MSP430Attrs;
+
+constexpr std::array<MSP430AttributeParser::DisplayHandler, 4>
+ MSP430AttributeParser::DisplayRoutines{
+ {{MSP430Attrs::TagISA, &MSP430AttributeParser::parseISA},
+ {MSP430Attrs::TagCodeModel, &MSP430AttributeParser::parseCodeModel},
+ {MSP430Attrs::TagDataModel, &MSP430AttributeParser::parseDataModel},
+ {MSP430Attrs::TagEnumSize, &MSP430AttributeParser::parseEnumSize}}};
+
+Error MSP430AttributeParser::parseISA(AttrType Tag) {
+ static const char *StringVals[] = {"None", "MSP430", "MSP430X"};
+ return parseStringAttribute("ISA", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::parseCodeModel(AttrType Tag) {
+ static const char *StringVals[] = {"None", "Small", "Large"};
+ return parseStringAttribute("Code Model", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::parseDataModel(AttrType Tag) {
+ static const char *StringVals[] = {"None", "Small", "Large", "Restricted"};
+ return parseStringAttribute("Data Model", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::parseEnumSize(AttrType Tag) {
+ static const char *StringVals[] = {"None", "Small", "Integer", "Don't Care"};
+ return parseStringAttribute("Enum Size", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::handler(uint64_t Tag, bool &Handled) {
+ Handled = false;
+ for (const DisplayHandler &Disp : DisplayRoutines) {
+ if (uint64_t(Disp.Attribute) != Tag)
+ continue;
+ if (Error E = (this->*Disp.Routine)(static_cast<AttrType>(Tag)))
+ return E;
+ Handled = true;
+ break;
+ }
+ return Error::success();
+}
diff --git a/llvm/lib/Support/MSP430Attributes.cpp b/llvm/lib/Support/MSP430Attributes.cpp
new file mode 100644
index 000000000000..4483a6872559
--- /dev/null
+++ b/llvm/lib/Support/MSP430Attributes.cpp
@@ -0,0 +1,22 @@
+//===-- MSP430Attributes.cpp - MSP430 Attributes --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MSP430Attributes.h"
+
+using namespace llvm;
+using namespace llvm::MSP430Attrs;
+
+static constexpr TagNameItem TagData[] = {{TagISA, "Tag_ISA"},
+ {TagCodeModel, "Tag_Code_Model"},
+ {TagDataModel, "Tag_Data_Model"},
+ {TagEnumSize, "Tag_Enum_Size"}};
+
+constexpr TagNameMap MSP430AttributeTags{TagData};
+const TagNameMap &llvm::MSP430Attrs::getMSP430AttributeTags() {
+ return MSP430AttributeTags;
+}
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 9a2e1003da5a..71e3a1362f7e 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -151,7 +151,12 @@ static std::atomic<int> TaskGroupInstances;
// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario
// of nested parallel_for_each(), only the outermost one runs parallelly.
TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {}
-TaskGroup::~TaskGroup() { --TaskGroupInstances; }
+TaskGroup::~TaskGroup() {
+ // We must ensure that all the workloads have finished before decrementing the
+ // instances count.
+ L.sync();
+ --TaskGroupInstances;
+}
void TaskGroup::spawn(std::function<void()> F) {
if (Parallel) {
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index a724ba2faf93..3957547dfaaa 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -12,6 +12,7 @@
#include "llvm/Support/Path.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Config/config.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
@@ -37,15 +38,16 @@ namespace {
using llvm::sys::path::Style;
inline Style real_style(Style style) {
-#ifdef _WIN32
- return (style == Style::posix) ? Style::posix : Style::windows;
-#else
- return (style == Style::windows) ? Style::windows : Style::posix;
-#endif
+ if (style != Style::native)
+ return style;
+ if (is_style_posix(style))
+ return Style::posix;
+ return LLVM_WINDOWS_PREFER_FORWARD_SLASH ? Style::windows_slash
+ : Style::windows_backslash;
}
inline const char *separators(Style style) {
- if (real_style(style) == Style::windows)
+ if (is_style_windows(style))
return "\\/";
return "/";
}
@@ -66,7 +68,7 @@ namespace {
if (path.empty())
return path;
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
// C:
if (path.size() >= 2 &&
std::isalpha(static_cast<unsigned char>(path[0])) && path[1] == ':')
@@ -98,7 +100,7 @@ namespace {
size_t pos = str.find_last_of(separators(style), str.size() - 1);
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
if (pos == StringRef::npos)
pos = str.find_last_of(':', str.size() - 2);
}
@@ -113,7 +115,7 @@ namespace {
// directory in str, it returns StringRef::npos.
size_t root_dir_start(StringRef str, Style style) {
// case "c:/"
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
if (str.size() > 2 && str[1] == ':' && is_separator(str[2], style))
return 2;
}
@@ -259,7 +261,7 @@ const_iterator &const_iterator::operator++() {
// Root dir.
if (was_net ||
// c:/
- (real_style(S) == Style::windows && Component.endswith(":"))) {
+ (is_style_windows(S) && Component.endswith(":"))) {
Component = Path.substr(Position, 1);
return *this;
}
@@ -348,7 +350,7 @@ StringRef root_path(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->endswith(":");
if (has_net || has_drive) {
if ((++pos != e) && is_separator((*pos)[0], style)) {
@@ -373,7 +375,7 @@ StringRef root_name(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->endswith(":");
if (has_net || has_drive) {
// just {C:,//net}, return the first component.
@@ -390,7 +392,7 @@ StringRef root_directory(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->endswith(":");
if ((has_net || has_drive) &&
// {C:,//net}, skip to the next component.
@@ -497,7 +499,7 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
static bool starts_with(StringRef Path, StringRef Prefix,
Style style = Style::native) {
// Windows prefix matching : case and separator insensitive
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
if (Path.size() < Prefix.size())
return false;
for (size_t I = 0, E = Prefix.size(); I != E; ++I) {
@@ -548,8 +550,10 @@ void native(const Twine &path, SmallVectorImpl<char> &result, Style style) {
void native(SmallVectorImpl<char> &Path, Style style) {
if (Path.empty())
return;
- if (real_style(style) == Style::windows) {
- std::replace(Path.begin(), Path.end(), '/', '\\');
+ if (is_style_windows(style)) {
+ for (char &Ch : Path)
+ if (is_separator(Ch, style))
+ Ch = preferred_separator(style);
if (Path[0] == '~' && (Path.size() == 1 || is_separator(Path[1], style))) {
SmallString<128> PathHome;
home_directory(PathHome);
@@ -557,14 +561,12 @@ void native(SmallVectorImpl<char> &Path, Style style) {
Path = PathHome;
}
} else {
- for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI)
- if (*PI == '\\')
- *PI = '/';
+ std::replace(Path.begin(), Path.end(), '\\', '/');
}
}
std::string convert_to_slash(StringRef path, Style style) {
- if (real_style(style) != Style::windows)
+ if (is_style_posix(style))
return std::string(path);
std::string s = path.str();
@@ -599,7 +601,7 @@ StringRef extension(StringRef path, Style style) {
bool is_separator(char value, Style style) {
if (value == '/')
return true;
- if (real_style(style) == Style::windows)
+ if (is_style_windows(style))
return value == '\\';
return false;
}
@@ -671,8 +673,7 @@ bool is_absolute(const Twine &path, Style style) {
StringRef p = path.toStringRef(path_storage);
bool rootDir = has_root_directory(p, style);
- bool rootName =
- (real_style(style) != Style::windows) || has_root_name(p, style);
+ bool rootName = is_style_posix(style) || has_root_name(p, style);
return rootDir && rootName;
}
@@ -686,7 +687,7 @@ bool is_absolute_gnu(const Twine &path, Style style) {
if (!p.empty() && is_separator(p.front(), style))
return true;
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
// Handle drive letter pattern (a character followed by ':') on Windows.
if (p.size() >= 2 && (p[0] && p[1] == ':'))
return true;
@@ -906,8 +907,7 @@ void make_absolute(const Twine &current_directory,
bool rootName = path::has_root_name(p);
// Already absolute.
- if ((rootName || real_style(Style::native) != Style::windows) &&
- rootDirectory)
+ if ((rootName || is_style_posix(Style::native)) && rootDirectory)
return;
// All of the following conditions will need the current directory.
@@ -1190,6 +1190,10 @@ TempFile &TempFile::operator=(TempFile &&Other) {
FD = Other.FD;
Other.Done = true;
Other.FD = -1;
+#ifdef _WIN32
+ RemoveOnClose = Other.RemoveOnClose;
+ Other.RemoveOnClose = false;
+#endif
return *this;
}
@@ -1204,20 +1208,23 @@ Error TempFile::discard() {
FD = -1;
#ifdef _WIN32
- // On windows closing will remove the file.
- TmpName = "";
- return Error::success();
+ // On Windows, closing will remove the file, if we set the delete
+ // disposition. If not, remove it manually.
+ bool Remove = RemoveOnClose;
#else
- // Always try to close and remove.
+ // Always try to remove the file.
+ bool Remove = true;
+#endif
std::error_code RemoveEC;
- if (!TmpName.empty()) {
+ if (Remove && !TmpName.empty()) {
RemoveEC = fs::remove(TmpName);
sys::DontRemoveFileOnSignal(TmpName);
if (!RemoveEC)
TmpName = "";
+ } else {
+ TmpName = "";
}
return errorCodeToError(RemoveEC);
-#endif
}
Error TempFile::keep(const Twine &Name) {
@@ -1228,19 +1235,26 @@ Error TempFile::keep(const Twine &Name) {
// If we can't cancel the delete don't rename.
auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
std::error_code RenameEC = setDeleteDisposition(H, false);
+ bool ShouldDelete = false;
if (!RenameEC) {
RenameEC = rename_handle(H, Name);
// If rename failed because it's cross-device, copy instead
if (RenameEC ==
std::error_code(ERROR_NOT_SAME_DEVICE, std::system_category())) {
RenameEC = copy_file(TmpName, Name);
- setDeleteDisposition(H, true);
+ ShouldDelete = true;
}
}
- // If we can't rename, discard the temporary file.
+ // If we can't rename or copy, discard the temporary file.
if (RenameEC)
- setDeleteDisposition(H, true);
+ ShouldDelete = true;
+ if (ShouldDelete) {
+ if (!RemoveOnClose)
+ setDeleteDisposition(H, true);
+ else
+ remove(TmpName);
+ }
#else
std::error_code RenameEC = fs::rename(TmpName, Name);
if (RenameEC) {
@@ -1250,8 +1264,8 @@ Error TempFile::keep(const Twine &Name) {
if (RenameEC)
remove(TmpName);
}
- sys::DontRemoveFileOnSignal(TmpName);
#endif
+ sys::DontRemoveFileOnSignal(TmpName);
if (!RenameEC)
TmpName = "";
@@ -1273,9 +1287,8 @@ Error TempFile::keep() {
auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
if (std::error_code EC = setDeleteDisposition(H, false))
return errorCodeToError(EC);
-#else
- sys::DontRemoveFileOnSignal(TmpName);
#endif
+ sys::DontRemoveFileOnSignal(TmpName);
TmpName = "";
@@ -1297,14 +1310,22 @@ Expected<TempFile> TempFile::create(const Twine &Model, unsigned Mode,
return errorCodeToError(EC);
TempFile Ret(ResultPath, FD);
-#ifndef _WIN32
- if (sys::RemoveFileOnSignal(ResultPath)) {
+#ifdef _WIN32
+ auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
+ bool SetSignalHandler = false;
+ if (std::error_code EC = setDeleteDisposition(H, true)) {
+ Ret.RemoveOnClose = true;
+ SetSignalHandler = true;
+ }
+#else
+ bool SetSignalHandler = true;
+#endif
+ if (SetSignalHandler && sys::RemoveFileOnSignal(ResultPath)) {
// Make sure we delete the file when RemoveFileOnSignal fails.
consumeError(Ret.discard());
std::error_code EC(errc::operation_not_permitted);
return errorCodeToError(EC);
}
-#endif
return std::move(Ret);
}
} // namespace fs
diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp
index e7e9a8b56f74..547b3b73eec2 100644
--- a/llvm/lib/Support/Process.cpp
+++ b/llvm/lib/Support/Process.cpp
@@ -92,8 +92,7 @@ static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
-LLVM_ATTRIBUTE_NORETURN
-void Process::Exit(int RetCode, bool NoCleanup) {
+[[noreturn]] void Process::Exit(int RetCode, bool NoCleanup) {
if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent())
CRC->HandleExit(RetCode);
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
new file mode 100644
index 000000000000..8e984002f90d
--- /dev/null
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -0,0 +1,718 @@
+//===-- RISCVISAInfo.cpp - RISCV Arch String Parser --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <array>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+/// Represents the major and version number components of a RISC-V extension
+struct RISCVExtensionVersion {
+ unsigned Major;
+ unsigned Minor;
+};
+
+struct RISCVSupportedExtension {
+ const char *Name;
+ /// Supported version.
+ RISCVExtensionVersion Version;
+};
+
+} // end anonymous namespace
+
+static constexpr StringLiteral AllStdExts = "mafdqlcbjtpvn";
+
+static const RISCVSupportedExtension SupportedExtensions[] = {
+ {"i", RISCVExtensionVersion{2, 0}},
+ {"e", RISCVExtensionVersion{1, 9}},
+ {"m", RISCVExtensionVersion{2, 0}},
+ {"a", RISCVExtensionVersion{2, 0}},
+ {"f", RISCVExtensionVersion{2, 0}},
+ {"d", RISCVExtensionVersion{2, 0}},
+ {"c", RISCVExtensionVersion{2, 0}},
+};
+
+static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
+ {"v", RISCVExtensionVersion{0, 10}},
+ {"zba", RISCVExtensionVersion{1, 0}},
+ {"zbb", RISCVExtensionVersion{1, 0}},
+ {"zbc", RISCVExtensionVersion{1, 0}},
+ {"zbe", RISCVExtensionVersion{0, 93}},
+ {"zbf", RISCVExtensionVersion{0, 93}},
+ {"zbm", RISCVExtensionVersion{0, 93}},
+ {"zbp", RISCVExtensionVersion{0, 93}},
+ {"zbr", RISCVExtensionVersion{0, 93}},
+ {"zbs", RISCVExtensionVersion{1, 0}},
+ {"zbt", RISCVExtensionVersion{0, 93}},
+
+ {"zvamo", RISCVExtensionVersion{0, 10}},
+ {"zvlsseg", RISCVExtensionVersion{0, 10}},
+
+ {"zfhmin", RISCVExtensionVersion{0, 1}},
+ {"zfh", RISCVExtensionVersion{0, 1}},
+};
+
+static bool stripExperimentalPrefix(StringRef &Ext) {
+ return Ext.consume_front("experimental-");
+}
+
+struct FindByName {
+ FindByName(StringRef Ext) : Ext(Ext){};
+ StringRef Ext;
+ bool operator()(const RISCVSupportedExtension &ExtInfo) {
+ return ExtInfo.Name == Ext;
+ }
+};
+
+static Optional<RISCVExtensionVersion> findDefaultVersion(StringRef ExtName) {
+ // Find default version of an extension.
+ // TODO: We might set default version based on profile or ISA spec.
+ for (auto &ExtInfo : {makeArrayRef(SupportedExtensions),
+ makeArrayRef(SupportedExperimentalExtensions)}) {
+ auto ExtensionInfoIterator = llvm::find_if(ExtInfo, FindByName(ExtName));
+
+ if (ExtensionInfoIterator == ExtInfo.end()) {
+ continue;
+ }
+ return ExtensionInfoIterator->Version;
+ }
+ return None;
+}
+
+void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion,
+ unsigned MinorVersion) {
+ RISCVExtensionInfo Ext;
+ Ext.ExtName = ExtName.str();
+ Ext.MajorVersion = MajorVersion;
+ Ext.MinorVersion = MinorVersion;
+ Exts[ExtName.str()] = Ext;
+}
+
+static StringRef getExtensionTypeDesc(StringRef Ext) {
+ if (Ext.startswith("sx"))
+ return "non-standard supervisor-level extension";
+ if (Ext.startswith("s"))
+ return "standard supervisor-level extension";
+ if (Ext.startswith("x"))
+ return "non-standard user-level extension";
+ if (Ext.startswith("z"))
+ return "standard user-level extension";
+ return StringRef();
+}
+
+static StringRef getExtensionType(StringRef Ext) {
+ if (Ext.startswith("sx"))
+ return "sx";
+ if (Ext.startswith("s"))
+ return "s";
+ if (Ext.startswith("x"))
+ return "x";
+ if (Ext.startswith("z"))
+ return "z";
+ return StringRef();
+}
+
+static Optional<RISCVExtensionVersion> isExperimentalExtension(StringRef Ext) {
+ auto ExtIterator =
+ llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext));
+ if (ExtIterator == std::end(SupportedExperimentalExtensions))
+ return None;
+
+ return ExtIterator->Version;
+}
+
+bool RISCVISAInfo::isSupportedExtensionFeature(StringRef Ext) {
+ bool IsExperimental = stripExperimentalPrefix(Ext);
+
+ if (IsExperimental)
+ return llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
+ else
+ return llvm::any_of(SupportedExtensions, FindByName(Ext));
+}
+
+bool RISCVISAInfo::isSupportedExtension(StringRef Ext) {
+ return llvm::any_of(SupportedExtensions, FindByName(Ext)) ||
+ llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
+}
+
+bool RISCVISAInfo::isSupportedExtension(StringRef Ext, unsigned MajorVersion,
+ unsigned MinorVersion) {
+ auto FindByNameAndVersion = [=](const RISCVSupportedExtension &ExtInfo) {
+ return ExtInfo.Name == Ext && (MajorVersion == ExtInfo.Version.Major) &&
+ (MinorVersion == ExtInfo.Version.Minor);
+ };
+ return llvm::any_of(SupportedExtensions, FindByNameAndVersion) ||
+ llvm::any_of(SupportedExperimentalExtensions, FindByNameAndVersion);
+}
+
+bool RISCVISAInfo::hasExtension(StringRef Ext) const {
+ stripExperimentalPrefix(Ext);
+
+ if (!isSupportedExtension(Ext))
+ return false;
+
+ return Exts.count(Ext.str()) != 0;
+}
+
+// Get the rank for single-letter extension, lower value meaning higher
+// priority.
+static int singleLetterExtensionRank(char Ext) {
+ switch (Ext) {
+ case 'i':
+ return -2;
+ case 'e':
+ return -1;
+ default:
+ break;
+ }
+
+ size_t Pos = AllStdExts.find(Ext);
+ int Rank;
+ if (Pos == StringRef::npos)
+ // If we got an unknown extension letter, then give it an alphabetical
+ // order, but after all known standard extensions.
+ Rank = AllStdExts.size() + (Ext - 'a');
+ else
+ Rank = Pos;
+
+ return Rank;
+}
+
+// Get the rank for multi-letter extension, lower value meaning higher
+// priority/order in canonical order.
+static int multiLetterExtensionRank(const std::string &ExtName) {
+ assert(ExtName.length() >= 2);
+ int HighOrder;
+ int LowOrder = 0;
+ // The order between multi-char extensions: s -> h -> z -> x.
+ char ExtClass = ExtName[0];
+ switch (ExtClass) {
+ case 's':
+ HighOrder = 0;
+ break;
+ case 'h':
+ HighOrder = 1;
+ break;
+ case 'z':
+ HighOrder = 2;
+ // `z` extension must be sorted by canonical order of second letter.
+ // e.g. zmx has higher rank than zax.
+ LowOrder = singleLetterExtensionRank(ExtName[1]);
+ break;
+ case 'x':
+ HighOrder = 3;
+ break;
+ default:
+ llvm_unreachable("Unknown prefix for multi-char extension");
+ return -1;
+ }
+
+ return (HighOrder << 8) + LowOrder;
+}
+
+// Compare function for extension.
+// Only compare the extension name, ignore version comparison.
+bool RISCVISAInfo::compareExtension(const std::string &LHS,
+ const std::string &RHS) {
+ size_t LHSLen = LHS.length();
+ size_t RHSLen = RHS.length();
+ if (LHSLen == 1 && RHSLen != 1)
+ return true;
+
+ if (LHSLen != 1 && RHSLen == 1)
+ return false;
+
+ if (LHSLen == 1 && RHSLen == 1)
+ return singleLetterExtensionRank(LHS[0]) <
+ singleLetterExtensionRank(RHS[0]);
+
+ // Both are multi-char ext here.
+ int LHSRank = multiLetterExtensionRank(LHS);
+ int RHSRank = multiLetterExtensionRank(RHS);
+ if (LHSRank != RHSRank)
+ return LHSRank < RHSRank;
+
+ // If the rank is same, it must be sorted by lexicographic order.
+ return LHS < RHS;
+}
+
+void RISCVISAInfo::toFeatures(
+ std::vector<StringRef> &Features,
+ std::function<StringRef(const Twine &)> StrAlloc) const {
+ for (auto &Ext : Exts) {
+ StringRef ExtName = Ext.first;
+
+ if (ExtName == "i")
+ continue;
+
+ if (ExtName == "zvlsseg") {
+ Features.push_back("+experimental-v");
+ Features.push_back("+experimental-zvlsseg");
+ } else if (ExtName == "zvamo") {
+ Features.push_back("+experimental-v");
+ Features.push_back("+experimental-zvlsseg");
+ Features.push_back("+experimental-zvamo");
+ } else if (isExperimentalExtension(ExtName)) {
+ Features.push_back(StrAlloc("+experimental-" + ExtName));
+ } else {
+ Features.push_back(StrAlloc("+" + ExtName));
+ }
+ }
+}
+
+// Extensions may have a version number, and may be separated by
+// an underscore '_' e.g.: rv32i2_m2.
+// Version number is divided into major and minor version numbers,
+// separated by a 'p'. If the minor version is 0 then 'p0' can be
+// omitted from the version string. E.g., rv32i2p0, rv32i2, rv32i2p1.
+static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major,
+ unsigned &Minor, unsigned &ConsumeLength,
+ bool EnableExperimentalExtension,
+ bool ExperimentalExtensionVersionCheck) {
+ StringRef MajorStr, MinorStr;
+ Major = 0;
+ Minor = 0;
+ ConsumeLength = 0;
+ MajorStr = In.take_while(isDigit);
+ In = In.substr(MajorStr.size());
+
+ if (!MajorStr.empty() && In.consume_front("p")) {
+ MinorStr = In.take_while(isDigit);
+ In = In.substr(MajorStr.size() + 1);
+
+ // Expected 'p' to be followed by minor version number.
+ if (MinorStr.empty()) {
+ return createStringError(
+ errc::invalid_argument,
+ "minor version number missing after 'p' for extension '" + Ext + "'");
+ }
+ }
+
+ if (!MajorStr.empty() && MajorStr.getAsInteger(10, Major))
+ return createStringError(
+ errc::invalid_argument,
+ "Failed to parse major version number for extension '" + Ext + "'");
+
+ if (!MinorStr.empty() && MinorStr.getAsInteger(10, Minor))
+ return createStringError(
+ errc::invalid_argument,
+ "Failed to parse minor version number for extension '" + Ext + "'");
+
+ ConsumeLength = MajorStr.size();
+
+ if (!MinorStr.empty())
+ ConsumeLength += MinorStr.size() + 1 /*'p'*/;
+
+ // Expected multi-character extension with version number to have no
+ // subsequent characters (i.e. must either end string or be followed by
+ // an underscore).
+ if (Ext.size() > 1 && In.size()) {
+ std::string Error =
+ "multi-character extensions must be separated by underscores";
+ return createStringError(errc::invalid_argument, Error);
+ }
+
+ // If experimental extension, require use of current version number number
+ if (auto ExperimentalExtension = isExperimentalExtension(Ext)) {
+ if (!EnableExperimentalExtension) {
+ std::string Error = "requires '-menable-experimental-extensions' for "
+ "experimental extension '" +
+ Ext.str() + "'";
+ return createStringError(errc::invalid_argument, Error);
+ }
+
+ if (ExperimentalExtensionVersionCheck &&
+ (MajorStr.empty() && MinorStr.empty())) {
+ std::string Error =
+ "experimental extension requires explicit version number `" +
+ Ext.str() + "`";
+ return createStringError(errc::invalid_argument, Error);
+ }
+
+ auto SupportedVers = *ExperimentalExtension;
+ if (ExperimentalExtensionVersionCheck &&
+ (Major != SupportedVers.Major || Minor != SupportedVers.Minor)) {
+ std::string Error = "unsupported version number " + MajorStr.str();
+ if (!MinorStr.empty())
+ Error += "." + MinorStr.str();
+ Error += " for experimental extension '" + Ext.str() +
+ "'(this compiler supports " + utostr(SupportedVers.Major) + "." +
+ utostr(SupportedVers.Minor) + ")";
+ return createStringError(errc::invalid_argument, Error);
+ }
+ return Error::success();
+ }
+
+ // Exception rule for `g`, we don't have clear version scheme for that on
+ // ISA spec.
+ if (Ext == "g")
+ return Error::success();
+
+ if (MajorStr.empty() && MinorStr.empty()) {
+ if (auto DefaultVersion = findDefaultVersion(Ext)) {
+ Major = DefaultVersion->Major;
+ Minor = DefaultVersion->Minor;
+ }
+ // No matter found or not, return success, assume other place will
+ // verify.
+ return Error::success();
+ }
+
+ if (RISCVISAInfo::isSupportedExtension(Ext, Major, Minor))
+ return Error::success();
+
+ std::string Error = "unsupported version number " + std::string(MajorStr);
+ if (!MinorStr.empty())
+ Error += "." + MinorStr.str();
+ Error += " for extension '" + Ext.str() + "'";
+ return createStringError(errc::invalid_argument, Error);
+}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::parseFeatures(unsigned XLen,
+ const std::vector<std::string> &Features) {
+ assert(XLen == 32 || XLen == 64);
+ std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+
+ bool HasE = false;
+ for (auto &Feature : Features) {
+ StringRef ExtName = Feature;
+ bool Experimental = false;
+ assert(ExtName.size() > 1 && (ExtName[0] == '+' || ExtName[0] == '-'));
+ bool Add = ExtName[0] == '+';
+ ExtName = ExtName.drop_front(1); // Drop '+' or '-'
+ Experimental = stripExperimentalPrefix(ExtName);
+ auto ExtensionInfos = Experimental
+ ? makeArrayRef(SupportedExperimentalExtensions)
+ : makeArrayRef(SupportedExtensions);
+ auto ExtensionInfoIterator =
+ llvm::find_if(ExtensionInfos, FindByName(ExtName));
+
+ // Not all features is related to ISA extension, like `relax` or
+ // `save-restore`, skip those feature.
+ if (ExtensionInfoIterator == ExtensionInfos.end())
+ continue;
+
+ if (Add) {
+ if (ExtName == "e") {
+ if (XLen != 32)
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension 'e' requires 'rv32'");
+ HasE = true;
+ }
+
+ ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version.Major,
+ ExtensionInfoIterator->Version.Minor);
+ } else
+ ISAInfo->Exts.erase(ExtName.str());
+ }
+ if (!HasE) {
+ if (auto Version = findDefaultVersion("i"))
+ ISAInfo->addExtension("i", Version->Major, Version->Minor);
+ else
+ llvm_unreachable("Default extension version for 'i' not found?");
+ }
+
+ ISAInfo->updateFLen();
+
+ return std::move(ISAInfo);
+}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
+ bool ExperimentalExtensionVersionCheck) {
+ // RISC-V ISA strings must be lowercase.
+ if (llvm::any_of(Arch, isupper)) {
+ return createStringError(errc::invalid_argument,
+ "string must be lowercase");
+ }
+
+ bool HasRV64 = Arch.startswith("rv64");
+ // ISA string must begin with rv32 or rv64.
+ if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) {
+ return createStringError(errc::invalid_argument,
+ "string must begin with rv32{i,e,g} or rv64{i,g}");
+ }
+
+ unsigned XLen = HasRV64 ? 64 : 32;
+ std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+
+ // The canonical order specified in ISA manual.
+ // Ref: Table 22.1 in RISC-V User-Level ISA V2.2
+ StringRef StdExts = AllStdExts;
+ bool HasF = false, HasD = false;
+ char Baseline = Arch[4];
+
+ // First letter should be 'e', 'i' or 'g'.
+ switch (Baseline) {
+ default:
+ return createStringError(errc::invalid_argument,
+ "first letter should be 'e', 'i' or 'g'");
+ case 'e': {
+ // Extension 'e' is not allowed in rv64.
+ if (HasRV64)
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension 'e' requires 'rv32'");
+ break;
+ }
+ case 'i':
+ break;
+ case 'g':
+ // g = imafd
+ StdExts = StdExts.drop_front(4);
+ HasF = true;
+ HasD = true;
+ break;
+ }
+
+ // Skip rvxxx
+ StringRef Exts = Arch.substr(5);
+
+ // Remove multi-letter standard extensions, non-standard extensions and
+ // supervisor-level extensions. They have 'z', 'x', 's', 'sx' prefixes.
+ // Parse them at the end.
+ // Find the very first occurrence of 's', 'x' or 'z'.
+ StringRef OtherExts;
+ size_t Pos = Exts.find_first_of("zsx");
+ if (Pos != StringRef::npos) {
+ OtherExts = Exts.substr(Pos);
+ Exts = Exts.substr(0, Pos);
+ }
+
+ unsigned Major, Minor, ConsumeLength;
+ if (auto E = getExtensionVersion(std::string(1, Baseline), Exts, Major, Minor,
+ ConsumeLength, EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
+
+ if (Baseline == 'g') {
+ // No matter which version is given to `g`, we always set imafd to default
+ // version since the we don't have clear version scheme for that on
+ // ISA spec.
+ for (auto Ext : {"i", "m", "a", "f", "d"})
+ if (auto Version = findDefaultVersion(Ext))
+ ISAInfo->addExtension(Ext, Version->Major, Version->Minor);
+ else
+ llvm_unreachable("Default extension version not found?");
+ } else
+ // Baseline is `i` or `e`
+ ISAInfo->addExtension(std::string(1, Baseline), Major, Minor);
+
+ // Consume the base ISA version number and any '_' between rvxxx and the
+ // first extension
+ Exts = Exts.drop_front(ConsumeLength);
+ Exts.consume_front("_");
+
+ // TODO: Use version number when setting target features
+
+ auto StdExtsItr = StdExts.begin();
+ auto StdExtsEnd = StdExts.end();
+ for (auto I = Exts.begin(), E = Exts.end(); I != E;) {
+ char C = *I;
+
+ // Check ISA extensions are specified in the canonical order.
+ while (StdExtsItr != StdExtsEnd && *StdExtsItr != C)
+ ++StdExtsItr;
+
+ if (StdExtsItr == StdExtsEnd) {
+ // Either c contains a valid extension but it was not given in
+ // canonical order or it is an invalid extension.
+ if (StdExts.contains(C)) {
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension not given in canonical order '%c'",
+ C);
+ }
+
+ return createStringError(errc::invalid_argument,
+ "invalid standard user-level extension '%c'", C);
+ }
+
+ // Move to next char to prevent repeated letter.
+ ++StdExtsItr;
+
+ std::string Next;
+ unsigned Major, Minor, ConsumeLength;
+ if (std::next(I) != E)
+ Next = std::string(std::next(I), E);
+ if (auto E = getExtensionVersion(std::string(1, C), Next, Major, Minor,
+ ConsumeLength, EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
+
+ // The order is OK, then push it into features.
+ // TODO: Use version number when setting target features
+ switch (C) {
+ default:
+ // Currently LLVM supports only "mafdcbv".
+ return createStringError(errc::invalid_argument,
+ "unsupported standard user-level extension '%c'",
+ C);
+ case 'm':
+ ISAInfo->addExtension("m", Major, Minor);
+ break;
+ case 'a':
+ ISAInfo->addExtension("a", Major, Minor);
+ break;
+ case 'f':
+ ISAInfo->addExtension("f", Major, Minor);
+ HasF = true;
+ break;
+ case 'd':
+ ISAInfo->addExtension("d", Major, Minor);
+ HasD = true;
+ break;
+ case 'c':
+ ISAInfo->addExtension("c", Major, Minor);
+ break;
+ case 'v':
+ ISAInfo->addExtension("v", Major, Minor);
+ ISAInfo->addExtension("zvlsseg", Major, Minor);
+ break;
+ }
+ // Consume full extension name and version, including any optional '_'
+ // between this extension and the next
+ ++I;
+ I += ConsumeLength;
+ if (*I == '_')
+ ++I;
+ }
+ // Dependency check.
+ // It's illegal to specify the 'd' (double-precision floating point)
+ // extension without also specifying the 'f' (single precision
+ // floating-point) extension.
+ // TODO: This has been removed in later specs, which specify that D implies F
+ if (HasD && !HasF)
+ return createStringError(errc::invalid_argument,
+ "d requires f extension to also be specified");
+
+ // Additional dependency checks.
+ // TODO: The 'q' extension requires rv64.
+ // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
+
+ if (OtherExts.empty())
+ return std::move(ISAInfo);
+
+ // Handle other types of extensions other than the standard
+ // general purpose and standard user-level extensions.
+ // Parse the ISA string containing non-standard user-level
+ // extensions, standard supervisor-level extensions and
+ // non-standard supervisor-level extensions.
+ // These extensions start with 'z', 'x', 's', 'sx' prefixes, follow a
+ // canonical order, might have a version number (major, minor)
+ // and are separated by a single underscore '_'.
+ // Set the hardware features for the extensions that are supported.
+
+ // Multi-letter extensions are seperated by a single underscore
+ // as described in RISC-V User-Level ISA V2.2.
+ SmallVector<StringRef, 8> Split;
+ OtherExts.split(Split, '_');
+
+ SmallVector<StringRef, 8> AllExts;
+ std::array<StringRef, 4> Prefix{"z", "x", "s", "sx"};
+ auto I = Prefix.begin();
+ auto E = Prefix.end();
+
+ for (StringRef Ext : Split) {
+ if (Ext.empty())
+ return createStringError(errc::invalid_argument,
+ "extension name missing after separator '_'");
+
+ StringRef Type = getExtensionType(Ext);
+ StringRef Desc = getExtensionTypeDesc(Ext);
+ auto Pos = Ext.find_if(isDigit);
+ StringRef Name(Ext.substr(0, Pos));
+ StringRef Vers(Ext.substr(Pos));
+
+ if (Type.empty())
+ return createStringError(errc::invalid_argument,
+ "invalid extension prefix '" + Ext + "'");
+
+ // Check ISA extensions are specified in the canonical order.
+ while (I != E && *I != Type)
+ ++I;
+
+ if (I == E)
+ return createStringError(errc::invalid_argument,
+ "%s not given in canonical order '%s'",
+ Desc.str().c_str(), Ext.str().c_str());
+
+ if (Name.size() == Type.size()) {
+ return createStringError(errc::invalid_argument,
+ "%s name missing after '%s'", Desc.str().c_str(),
+ Type.str().c_str());
+ }
+
+ unsigned Major, Minor, ConsumeLength;
+ if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
+ EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
+
+ // Check if duplicated extension.
+ if (llvm::is_contained(AllExts, Name))
+ return createStringError(errc::invalid_argument, "duplicated %s '%s'",
+ Desc.str().c_str(), Name.str().c_str());
+
+ ISAInfo->addExtension(Name, Major, Minor);
+ // Extension format is correct, keep parsing the extensions.
+ // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
+ AllExts.push_back(Name);
+ }
+
+ for (auto Ext : AllExts) {
+ if (!isSupportedExtension(Ext)) {
+ StringRef Desc = getExtensionTypeDesc(getExtensionType(Ext));
+ return createStringError(errc::invalid_argument, "unsupported %s '%s'",
+ Desc.str().c_str(), Ext.str().c_str());
+ }
+ }
+
+ ISAInfo->updateFLen();
+
+ return std::move(ISAInfo);
+}
+
+void RISCVISAInfo::updateFLen() {
+ FLen = 0;
+ // TODO: Handle q extension.
+ if (Exts.count("d"))
+ FLen = 64;
+ else if (Exts.count("f"))
+ FLen = 32;
+}
+
+std::string RISCVISAInfo::toString() const {
+ std::string Buffer;
+ raw_string_ostream Arch(Buffer);
+
+ Arch << "rv" << XLen;
+
+ ListSeparator LS("_");
+ for (auto &Ext : Exts) {
+ StringRef ExtName = Ext.first;
+ auto ExtInfo = Ext.second;
+ Arch << LS << ExtName;
+ Arch << ExtInfo.MajorVersion << "p" << ExtInfo.MinorVersion;
+ }
+
+ return Arch.str();
+}
diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp
index 49a0b16baa02..58fafb26cdf3 100644
--- a/llvm/lib/Support/Signposts.cpp
+++ b/llvm/lib/Support/Signposts.cpp
@@ -1,23 +1,27 @@
//===-- Signposts.cpp - Interval debug annotations ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Signposts.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Config/config.h"
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Mutex.h"
-#endif
+#include <Availability.h>
+#include <os/signpost.h>
+#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
using namespace llvm;
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+#define SIGNPOSTS_AVAILABLE() \
+ __builtin_available(macos 10.14, iOS 12, tvOS 12, watchOS 5, *)
namespace {
os_log_t *LogCreator() {
os_log_t *X = new os_log_t;
@@ -35,13 +39,13 @@ struct LogDeleter {
namespace llvm {
class SignpostEmitterImpl {
using LogPtrTy = std::unique_ptr<os_log_t, LogDeleter>;
+ using LogTy = LogPtrTy::element_type;
LogPtrTy SignpostLog;
DenseMap<const void *, os_signpost_id_t> Signposts;
sys::SmartMutex<true> Mutex;
-public:
- os_log_t &getLogger() const { return *SignpostLog; }
+ LogTy &getLogger() const { return *SignpostLog; }
os_signpost_id_t getSignpostForObject(const void *O) {
sys::SmartScopedLock<true> Lock(Mutex);
const auto &I = Signposts.find(O);
@@ -55,6 +59,7 @@ public:
return Inserted.first->second;
}
+public:
SignpostEmitterImpl() : SignpostLog(LogCreator()) {}
bool isEnabled() const {
@@ -73,7 +78,7 @@ public:
}
}
- void endInterval(const void *O) {
+ void endInterval(const void *O, llvm::StringRef Name) {
if (isEnabled()) {
if (SIGNPOSTS_AVAILABLE()) {
// Both strings used here are required to be constant literal strings.
@@ -119,17 +124,10 @@ void SignpostEmitter::startInterval(const void *O, StringRef Name) {
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
-#if HAVE_ANY_SIGNPOST_IMPL
-os_log_t &SignpostEmitter::getLogger() const { return Impl->getLogger(); }
-os_signpost_id_t SignpostEmitter::getSignpostForObject(const void *O) {
- return Impl->getSignpostForObject(O);
-}
-#endif
-
-void SignpostEmitter::endInterval(const void *O) {
+void SignpostEmitter::endInterval(const void *O, StringRef Name) {
#if HAVE_ANY_SIGNPOST_IMPL
if (Impl == nullptr)
return;
- Impl->endInterval(O);
+ Impl->endInterval(O, Name);
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index 0005f7840912..2d7721e4e1fb 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
#include <cstdint>
#ifdef LLVM_ENABLE_EXCEPTIONS
#include <stdexcept>
@@ -19,12 +20,21 @@ using namespace llvm;
// Check that no bytes are wasted and everything is well-aligned.
namespace {
+// These structures may cause binary compat warnings on AIX. Suppress the
+// warning since we are only using these types for the static assertions below.
+#if defined(_AIX)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Waix-compat"
+#endif
struct Struct16B {
alignas(16) void *X;
};
struct Struct32B {
alignas(32) void *X;
};
+#if defined(_AIX)
+#pragma GCC diagnostic pop
+#endif
}
static_assert(sizeof(SmallVector<void *, 0>) ==
sizeof(unsigned) * 2 + sizeof(void *),
@@ -47,8 +57,7 @@ static_assert(sizeof(SmallVector<char, 0>) ==
/// Report that MinSize doesn't fit into this vector's size type. Throws
/// std::length_error or calls report_fatal_error.
-LLVM_ATTRIBUTE_NORETURN
-static void report_size_overflow(size_t MinSize, size_t MaxSize);
+[[noreturn]] static void report_size_overflow(size_t MinSize, size_t MaxSize);
static void report_size_overflow(size_t MinSize, size_t MaxSize) {
std::string Reason = "SmallVector unable to grow. Requested capacity (" +
std::to_string(MinSize) +
@@ -57,13 +66,13 @@ static void report_size_overflow(size_t MinSize, size_t MaxSize) {
#ifdef LLVM_ENABLE_EXCEPTIONS
throw std::length_error(Reason);
#else
- report_fatal_error(Reason);
+ report_fatal_error(Twine(Reason));
#endif
}
/// Report that this vector is already at maximum capacity. Throws
/// std::length_error or calls report_fatal_error.
-LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(size_t MaxSize);
+[[noreturn]] static void report_at_maximum_capacity(size_t MaxSize);
static void report_at_maximum_capacity(size_t MaxSize) {
std::string Reason =
"SmallVector capacity unable to grow. Already at maximum size " +
@@ -71,7 +80,7 @@ static void report_at_maximum_capacity(size_t MaxSize) {
#ifdef LLVM_ENABLE_EXCEPTIONS
throw std::length_error(Reason);
#else
- report_fatal_error(Reason);
+ report_fatal_error(Twine(Reason));
#endif
}
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 73f852624a69..1939ed9e9547 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -64,7 +64,7 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
return It->second;
if (Trigrams.isDefinitelyOut(Query))
return false;
- for (auto& RegExKV : RegExes)
+ for (const auto &RegExKV : RegExes)
if (RegExKV.first->match(Query))
return RegExKV.second;
return 0;
@@ -93,7 +93,7 @@ SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
std::string Error;
if (auto SCL = create(Paths, FS, Error))
return SCL;
- report_fatal_error(Error);
+ report_fatal_error(Twine(Error));
}
bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
@@ -209,7 +209,7 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query,
StringRef Category) const {
- for (auto &SectionIter : Sections)
+ for (const auto &SectionIter : Sections)
if (SectionIter.SectionMatcher->match(Section)) {
unsigned Blame =
inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index 8f2544e9e26d..2b094a4983a0 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -110,9 +110,8 @@ struct llvm::TimeTraceProfiler {
// templates from within, we only want to add the topmost one. "topmost"
// happens to be the ones that don't have any currently open entries above
// itself.
- if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
- return Val.Name == E.Name;
- }) == Stack.rend()) {
+ if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
+ [&](const Entry &Val) { return Val.Name == E.Name; })) {
auto &CountAndTotal = CountAndTotalPerName[E.Name];
CountAndTotal.first++;
CountAndTotal.second += Duration;
@@ -272,8 +271,9 @@ void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
// Called from main thread.
void llvm::timeTraceProfilerCleanup() {
delete TimeTraceProfilerInstance;
+ TimeTraceProfilerInstance = nullptr;
std::lock_guard<std::mutex> Lock(Mu);
- for (auto TTP : *ThreadTimeTraceProfilerInstances)
+ for (auto *TTP : *ThreadTimeTraceProfilerInstances)
delete TTP;
ThreadTimeTraceProfilerInstances->clear();
}
diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp
index f025ecd3d45c..08e1a8a0e0aa 100644
--- a/llvm/lib/Support/Timer.cpp
+++ b/llvm/lib/Support/Timer.cpp
@@ -199,7 +199,7 @@ void Timer::stopTimer() {
Running = false;
Time += TimeRecord::getCurrentTime(false);
Time -= StartTime;
- Signposts->endInterval(this);
+ Signposts->endInterval(this, getName());
}
void Timer::clear() {
@@ -393,8 +393,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
OS << " --- Name ---\n";
// Loop through all of the timing data, printing it out.
- for (const PrintRecord &Record : make_range(TimersToPrint.rbegin(),
- TimersToPrint.rend())) {
+ for (const PrintRecord &Record : llvm::reverse(TimersToPrint)) {
Record.Time.print(Total, OS);
OS << Record.Description << '\n';
}
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 88311546354b..b9a92e280576 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -67,6 +67,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case sparcv9: return "sparcv9";
case spir64: return "spir64";
case spir: return "spir";
+ case spirv32: return "spirv32";
+ case spirv64: return "spirv64";
case systemz: return "s390x";
case tce: return "tce";
case tcele: return "tcele";
@@ -147,6 +149,10 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case spir:
case spir64: return "spir";
+
+ case spirv32:
+ case spirv64: return "spirv";
+
case kalimba: return "kalimba";
case lanai: return "lanai";
case shave: return "shave";
@@ -323,6 +329,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("hsail64", hsail64)
.Case("spir", spir)
.Case("spir64", spir64)
+ .Case("spirv32", spirv32)
+ .Case("spirv64", spirv64)
.Case("kalimba", kalimba)
.Case("lanai", lanai)
.Case("shave", shave)
@@ -456,6 +464,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("hsail64", Triple::hsail64)
.Case("spir", Triple::spir)
.Case("spir64", Triple::spir64)
+ .Case("spirv32", Triple::spirv32)
+ .Case("spirv64", Triple::spirv64)
.StartsWith("kalimba", Triple::kalimba)
.Case("lanai", Triple::lanai)
.Case("renderscript32", Triple::renderscript32)
@@ -653,6 +663,12 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8_6a;
case ARM::ArchKind::ARMV8_7A:
return Triple::ARMSubArch_v8_7a;
+ case ARM::ArchKind::ARMV9A:
+ return Triple::ARMSubArch_v9;
+ case ARM::ArchKind::ARMV9_1A:
+ return Triple::ARMSubArch_v9_1a;
+ case ARM::ArchKind::ARMV9_2A:
+ return Triple::ARMSubArch_v9_2a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:
@@ -753,6 +769,11 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::wasm32:
case Triple::wasm64:
return Triple::Wasm;
+
+ case Triple::spirv32:
+ case Triple::spirv64:
+ // TODO: In future this will be Triple::SPIRV.
+ return Triple::UnknownObjectFormat;
}
llvm_unreachable("unknown architecture");
}
@@ -1024,6 +1045,30 @@ StringRef Triple::getArchName() const {
return StringRef(Data).split('-').first; // Isolate first component
}
+StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) const {
+ switch (Kind) {
+ case Triple::mips:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa32r6";
+ break;
+ case Triple::mipsel:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa32r6el";
+ break;
+ case Triple::mips64:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa64r6";
+ break;
+ case Triple::mips64el:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa64r6el";
+ break;
+ default:
+ break;
+ }
+ return getArchTypeName(Kind);
+}
+
StringRef Triple::getVendorName() const {
StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
return Tmp.split('-').first; // Isolate second component
@@ -1205,8 +1250,8 @@ void Triple::setTriple(const Twine &Str) {
*this = Triple(Str);
}
-void Triple::setArch(ArchType Kind) {
- setArchName(getArchTypeName(Kind));
+void Triple::setArch(ArchType Kind, SubArchType SubArch) {
+ setArchName(getArchName(Kind, SubArch));
}
void Triple::setVendor(VendorType Kind) {
@@ -1298,6 +1343,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::spir:
+ case llvm::Triple::spirv32:
case llvm::Triple::tce:
case llvm::Triple::tcele:
case llvm::Triple::thumb:
@@ -1324,6 +1370,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::riscv64:
case llvm::Triple::sparcv9:
case llvm::Triple::spir64:
+ case llvm::Triple::spirv64:
case llvm::Triple::systemz:
case llvm::Triple::ve:
case llvm::Triple::wasm64:
@@ -1383,6 +1430,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::sparc:
case Triple::sparcel:
case Triple::spir:
+ case Triple::spirv32:
case Triple::tce:
case Triple::tcele:
case Triple::thumb:
@@ -1398,8 +1446,12 @@ Triple Triple::get32BitArchVariant() const {
case Triple::amdil64: T.setArch(Triple::amdil); break;
case Triple::hsail64: T.setArch(Triple::hsail); break;
case Triple::le64: T.setArch(Triple::le32); break;
- case Triple::mips64: T.setArch(Triple::mips); break;
- case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::mips64:
+ T.setArch(Triple::mips, getSubArch());
+ break;
+ case Triple::mips64el:
+ T.setArch(Triple::mipsel, getSubArch());
+ break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::ppc64le: T.setArch(Triple::ppcle); break;
@@ -1407,6 +1459,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::riscv64: T.setArch(Triple::riscv32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::spir64: T.setArch(Triple::spir); break;
+ case Triple::spirv64: T.setArch(Triple::spirv32); break;
case Triple::wasm64: T.setArch(Triple::wasm32); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
}
@@ -1451,6 +1504,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::riscv64:
case Triple::sparcv9:
case Triple::spir64:
+ case Triple::spirv64:
case Triple::systemz:
case Triple::ve:
case Triple::wasm64:
@@ -1464,8 +1518,12 @@ Triple Triple::get64BitArchVariant() const {
case Triple::armeb: T.setArch(Triple::aarch64_be); break;
case Triple::hsail: T.setArch(Triple::hsail64); break;
case Triple::le32: T.setArch(Triple::le64); break;
- case Triple::mips: T.setArch(Triple::mips64); break;
- case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::mips:
+ T.setArch(Triple::mips64, getSubArch());
+ break;
+ case Triple::mipsel:
+ T.setArch(Triple::mips64el, getSubArch());
+ break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::ppcle: T.setArch(Triple::ppc64le); break;
@@ -1473,6 +1531,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::riscv32: T.setArch(Triple::riscv64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::spir: T.setArch(Triple::spir64); break;
+ case Triple::spirv32: T.setArch(Triple::spirv64); break;
case Triple::thumb: T.setArch(Triple::aarch64); break;
case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
case Triple::wasm32: T.setArch(Triple::wasm64); break;
@@ -1509,6 +1568,8 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::shave:
case Triple::spir64:
case Triple::spir:
+ case Triple::spirv32:
+ case Triple::spirv64:
case Triple::wasm32:
case Triple::wasm64:
case Triple::x86:
@@ -1526,8 +1587,12 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::aarch64: T.setArch(Triple::aarch64_be); break;
case Triple::bpfel: T.setArch(Triple::bpfeb); break;
- case Triple::mips64el:T.setArch(Triple::mips64); break;
- case Triple::mipsel: T.setArch(Triple::mips); break;
+ case Triple::mips64el:
+ T.setArch(Triple::mips64, getSubArch());
+ break;
+ case Triple::mipsel:
+ T.setArch(Triple::mips, getSubArch());
+ break;
case Triple::ppcle: T.setArch(Triple::ppc); break;
case Triple::ppc64le: T.setArch(Triple::ppc64); break;
case Triple::sparcel: T.setArch(Triple::sparc); break;
@@ -1559,8 +1624,12 @@ Triple Triple::getLittleEndianArchVariant() const {
case Triple::aarch64_be: T.setArch(Triple::aarch64); break;
case Triple::bpfeb: T.setArch(Triple::bpfel); break;
- case Triple::mips64: T.setArch(Triple::mips64el); break;
- case Triple::mips: T.setArch(Triple::mipsel); break;
+ case Triple::mips64:
+ T.setArch(Triple::mips64el, getSubArch());
+ break;
+ case Triple::mips:
+ T.setArch(Triple::mipsel, getSubArch());
+ break;
case Triple::ppc: T.setArch(Triple::ppcle); break;
case Triple::ppc64: T.setArch(Triple::ppc64le); break;
case Triple::sparc: T.setArch(Triple::sparcel); break;
@@ -1604,6 +1673,8 @@ bool Triple::isLittleEndian() const {
case Triple::sparcel:
case Triple::spir64:
case Triple::spir:
+ case Triple::spirv32:
+ case Triple::spirv64:
case Triple::tcele:
case Triple::thumb:
case Triple::ve:
@@ -1709,6 +1780,7 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const {
switch (getOS()) {
case llvm::Triple::FreeBSD:
case llvm::Triple::NetBSD:
+ case llvm::Triple::OpenBSD:
if (!MArch.empty() && MArch == "v6")
return "arm1176jzf-s";
if (!MArch.empty() && MArch == "v7")
diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc
index be88e7db1400..b83477e0e4cc 100644
--- a/llvm/lib/Support/Unix/Memory.inc
+++ b/llvm/lib/Support/Unix/Memory.inc
@@ -29,14 +29,6 @@
#include <zircon/syscalls.h>
#endif
-#if defined(__mips__)
-# if defined(__OpenBSD__)
-# include <mips64/sysarch.h>
-# elif !defined(__FreeBSD__)
-# include <sys/cachectl.h>
-# endif
-#endif
-
#if defined(__APPLE__)
extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
#else
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index c37b3a54644a..19d89db55627 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -39,6 +39,9 @@
#include <mach-o/dyld.h>
#include <sys/attr.h>
#include <copyfile.h>
+#if __has_include(<sys/clonefile.h>)
+#include <sys/clonefile.h>
+#endif
#elif defined(__FreeBSD__)
#include <osreldate.h>
#if __FreeBSD_version >= 1300057
@@ -125,7 +128,8 @@ const file_t kInvalidFile = -1;
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__minix) || defined(__FreeBSD_kernel__) || defined(__linux__) || \
- defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__)
+ defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__) || \
+ (defined(__sun__) && defined(__svr4__))
static int
test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
{
@@ -283,6 +287,20 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// Fall back to the classical detection.
if (getprogpath(exe_path, argv0))
return exe_path;
+#elif defined(__sun__) && defined(__svr4__)
+ char exe_path[PATH_MAX];
+ const char *aPath = "/proc/self/execname";
+ if (sys::fs::exists(aPath)) {
+ int fd = open(aPath, O_RDONLY);
+ if (fd == -1)
+ return "";
+ if (read(fd, exe_path, sizeof(exe_path)) < 0)
+ return "";
+ return exe_path;
+ }
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0) != NULL)
+ return exe_path;
#elif defined(__MVS__)
int token = 0;
W_PSPROC buf;
@@ -1442,22 +1460,37 @@ namespace fs {
/// file descriptor variant of this function still uses the default
/// implementation.
std::error_code copy_file(const Twine &From, const Twine &To) {
- uint32_t Flag = COPYFILE_DATA;
-#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
+ std::string FromS = From.str();
+ std::string ToS = To.str();
+#if __has_builtin(__builtin_available)
if (__builtin_available(macos 10.12, *)) {
- bool IsSymlink;
- if (std::error_code Error = is_symlink_file(From, IsSymlink))
- return Error;
- // COPYFILE_CLONE clones the symlink instead of following it
- // and returns EEXISTS if the target file already exists.
- if (!IsSymlink && !exists(To))
- Flag = COPYFILE_CLONE;
+ // Optimistically try to use clonefile() and handle errors, rather than
+ // calling stat() to see if it'll work.
+ //
+ // Note: It's okay if From is a symlink. In contrast to the behaviour of
+ // copyfile() with COPYFILE_CLONE, clonefile() clones targets (not the
+ // symlink itself) unless the flag CLONE_NOFOLLOW is passed.
+ if (!clonefile(FromS.c_str(), ToS.c_str(), 0))
+ return std::error_code();
+
+ auto Errno = errno;
+ switch (Errno) {
+ case EEXIST: // To already exists.
+ case ENOTSUP: // Device does not support cloning.
+ case EXDEV: // From and To are on different devices.
+ break;
+ default:
+ // Anything else will also break copyfile().
+ return std::error_code(Errno, std::generic_category());
+ }
+
+ // TODO: For EEXIST, profile calling fs::generateUniqueName() and
+ // clonefile() in a retry loop (then rename() on success) before falling
+ // back to copyfile(). Depending on the size of the file this could be
+ // cheaper.
}
#endif
- int Status =
- copyfile(From.str().c_str(), To.str().c_str(), /* State */ NULL, Flag);
-
- if (Status == 0)
+ if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
return std::error_code();
return std::error_code(errno, std::generic_category());
}
diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc
index 30b957e6a1c4..d3d9fb7d7187 100644
--- a/llvm/lib/Support/Unix/Process.inc
+++ b/llvm/lib/Support/Unix/Process.inc
@@ -461,5 +461,4 @@ unsigned llvm::sys::Process::GetRandomNumber() {
#endif
}
-LLVM_ATTRIBUTE_NORETURN
-void Process::ExitNoCleanup(int RetCode) { _Exit(RetCode); }
+[[noreturn]] void Process::ExitNoCleanup(int RetCode) { _Exit(RetCode); }
diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index be59bb0232de..089342030b97 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -71,7 +71,8 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
assert(!Name.empty() && "Must have a name!");
// Use the given path verbatim if it contains any slashes; this matches
// the behavior of sh(1) and friends.
- if (Name.find('/') != StringRef::npos) return std::string(Name);
+ if (Name.contains('/'))
+ return std::string(Name);
SmallVector<StringRef, 16> EnvironmentPaths;
if (Paths.empty())
diff --git a/llvm/lib/Support/Unix/Unix.h b/llvm/lib/Support/Unix/Unix.h
index 60929139598b..1599241a344a 100644
--- a/llvm/lib/Support/Unix/Unix.h
+++ b/llvm/lib/Support/Unix/Unix.h
@@ -67,11 +67,10 @@ static inline bool MakeErrMsg(
}
// Include StrError(errnum) in a fatal error message.
-LLVM_ATTRIBUTE_NORETURN static inline void ReportErrnumFatal(const char *Msg,
- int errnum) {
+[[noreturn]] static inline void ReportErrnumFatal(const char *Msg, int errnum) {
std::string ErrMsg;
MakeErrMsg(&ErrMsg, Msg, errnum);
- llvm::report_fatal_error(ErrMsg);
+ llvm::report_fatal_error(llvm::Twine(ErrMsg));
}
namespace llvm {
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 15bb54e61817..9bf0384b5f1b 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileSystem/UniqueID.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
@@ -193,6 +194,7 @@ public:
bool RequiresNullTerminator,
bool IsVolatile) override;
std::error_code close() override;
+ void setPath(const Twine &Path) override;
};
} // namespace
@@ -228,6 +230,12 @@ std::error_code RealFile::close() {
return EC;
}
+void RealFile::setPath(const Twine &Path) {
+ RealName = Path.str();
+ if (auto Status = status())
+ S = Status.get().copyWithNewName(Status.get(), Path);
+}
+
namespace {
/// A file system according to your operating system.
@@ -442,7 +450,7 @@ std::error_code OverlayFileSystem::isLocal(const Twine &Path, bool &Result) {
std::error_code
OverlayFileSystem::getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) const {
- for (auto &FS : FSList)
+ for (const auto &FS : FSList)
if (FS->exists(Path))
return FS->getRealPath(Path, Output);
return errc::no_such_file_or_directory;
@@ -638,6 +646,8 @@ public:
}
std::error_code close() override { return {}; }
+
+ void setPath(const Twine &Path) override { RequestedName = Path.str(); }
};
} // namespace
@@ -655,6 +665,9 @@ public:
Status getStatus(const Twine &RequestedName) const {
return Status::copyWithNewName(Stat, RequestedName);
}
+
+ UniqueID getUniqueID() const { return Stat.getUniqueID(); }
+
InMemoryNode *getChild(StringRef Name) {
auto I = Entries.find(Name);
if (I != Entries.end())
@@ -698,10 +711,28 @@ Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) {
} // namespace
} // namespace detail
+// The UniqueID of in-memory files is derived from path and content.
+// This avoids difficulties in creating exactly equivalent in-memory FSes,
+// as often needed in multithreaded programs.
+static sys::fs::UniqueID getUniqueID(hash_code Hash) {
+ return sys::fs::UniqueID(std::numeric_limits<uint64_t>::max(),
+ uint64_t(size_t(Hash)));
+}
+static sys::fs::UniqueID getFileID(sys::fs::UniqueID Parent,
+ llvm::StringRef Name,
+ llvm::StringRef Contents) {
+ return getUniqueID(llvm::hash_combine(Parent.getFile(), Name, Contents));
+}
+static sys::fs::UniqueID getDirectoryID(sys::fs::UniqueID Parent,
+ llvm::StringRef Name) {
+ return getUniqueID(llvm::hash_combine(Parent.getFile(), Name));
+}
+
InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
: Root(new detail::InMemoryDirectory(
- Status("", getNextVirtualUniqueID(), llvm::sys::TimePoint<>(), 0, 0,
- 0, llvm::sys::fs::file_type::directory_file,
+ Status("", getDirectoryID(llvm::sys::fs::UniqueID(), ""),
+ llvm::sys::TimePoint<>(), 0, 0, 0,
+ llvm::sys::fs::file_type::directory_file,
llvm::sys::fs::perms::all_all))),
UseNormalizedPaths(UseNormalizedPaths) {}
@@ -754,10 +785,14 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
else {
// Create a new file or directory.
- Status Stat(P.str(), getNextVirtualUniqueID(),
- llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
- ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
- ResolvedPerms);
+ Status Stat(
+ P.str(),
+ (ResolvedType == sys::fs::file_type::directory_file)
+ ? getDirectoryID(Dir->getUniqueID(), Name)
+ : getFileID(Dir->getUniqueID(), Name, Buffer->getBuffer()),
+ llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+ ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+ ResolvedPerms);
if (ResolvedType == sys::fs::file_type::directory_file) {
Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
} else {
@@ -772,9 +807,9 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
// Create a new directory. Use the path up to here.
Status Stat(
StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
- getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
- ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
- NewDirectoryPerms);
+ getDirectoryID(Dir->getUniqueID(), Name),
+ llvm::sys::toTimePoint(ModificationTime), ResolvedUser, ResolvedGroup,
+ 0, sys::fs::file_type::directory_file, NewDirectoryPerms);
Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
continue;
@@ -1015,9 +1050,10 @@ static llvm::sys::path::Style getExistingStyle(llvm::StringRef Path) {
// Detect the path style in use by checking the first separator.
llvm::sys::path::Style style = llvm::sys::path::Style::native;
const size_t n = Path.find_first_of("/\\");
+ // Can't distinguish between posix and windows_slash here.
if (n != static_cast<size_t>(-1))
style = (Path[n] == '/') ? llvm::sys::path::Style::posix
- : llvm::sys::path::Style::windows;
+ : llvm::sys::path::Style::windows_backslash;
return style;
}
@@ -1091,6 +1127,7 @@ public:
}
};
+namespace {
/// Directory iterator implementation for \c RedirectingFileSystem's
/// directory remap entries that maps the paths reported by the external
/// file system's directory iterator back to the virtual directory's path.
@@ -1129,6 +1166,7 @@ public:
return EC;
}
};
+} // namespace
llvm::ErrorOr<std::string>
RedirectingFileSystem::getCurrentWorkingDirectory() const {
@@ -1161,8 +1199,10 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path_,
}
std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+ // is_absolute(..., Style::windows_*) accepts paths with both slash types.
if (llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::posix) ||
- llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::windows))
+ llvm::sys::path::is_absolute(Path,
+ llvm::sys::path::Style::windows_backslash))
return {};
auto WorkingDir = getCurrentWorkingDirectory();
@@ -1173,9 +1213,15 @@ std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path)
// is native and there is no way to override that. Since we know WorkingDir
// is absolute, we can use it to determine which style we actually have and
// append Path ourselves.
- sys::path::Style style = sys::path::Style::windows;
+ sys::path::Style style = sys::path::Style::windows_backslash;
if (sys::path::is_absolute(WorkingDir.get(), sys::path::Style::posix)) {
style = sys::path::Style::posix;
+ } else {
+ // Distinguish between windows_backslash and windows_slash; getExistingStyle
+ // returns posix for a path with windows_slash.
+ if (getExistingStyle(WorkingDir.get()) !=
+ sys::path::Style::windows_backslash)
+ style = sys::path::Style::windows_slash;
}
std::string Result = WorkingDir.get();
@@ -1207,7 +1253,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
}
// Use status to make sure the path exists and refers to a directory.
- ErrorOr<Status> S = status(Path, *Result);
+ ErrorOr<Status> S = status(Path, Dir, *Result);
if (!S) {
if (shouldFallBackToExternalFS(S.getError(), Result->E))
return ExternalFS->dir_begin(Dir, EC);
@@ -1593,8 +1639,9 @@ private:
// which style we have, and use it consistently.
if (sys::path::is_absolute(Name, sys::path::Style::posix)) {
path_style = sys::path::Style::posix;
- } else if (sys::path::is_absolute(Name, sys::path::Style::windows)) {
- path_style = sys::path::Style::windows;
+ } else if (sys::path::is_absolute(Name,
+ sys::path::Style::windows_backslash)) {
+ path_style = sys::path::Style::windows_backslash;
} else {
assert(NameValueNode && "Name presence should be checked earlier");
error(NameValueNode,
@@ -1933,47 +1980,68 @@ RedirectingFileSystem::lookupPathImpl(
return make_error_code(llvm::errc::no_such_file_or_directory);
}
-static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
+static Status getRedirectedFileStatus(const Twine &OriginalPath,
+ bool UseExternalNames,
Status ExternalStatus) {
Status S = ExternalStatus;
if (!UseExternalNames)
- S = Status::copyWithNewName(S, Path);
+ S = Status::copyWithNewName(S, OriginalPath);
S.IsVFSMapped = true;
return S;
}
ErrorOr<Status> RedirectingFileSystem::status(
- const Twine &Path, const RedirectingFileSystem::LookupResult &Result) {
+ const Twine &CanonicalPath, const Twine &OriginalPath,
+ const RedirectingFileSystem::LookupResult &Result) {
if (Optional<StringRef> ExtRedirect = Result.getExternalRedirect()) {
- ErrorOr<Status> S = ExternalFS->status(*ExtRedirect);
+ SmallString<256> CanonicalRemappedPath((*ExtRedirect).str());
+ if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+ return EC;
+
+ ErrorOr<Status> S = ExternalFS->status(CanonicalRemappedPath);
if (!S)
return S;
+ S = Status::copyWithNewName(*S, *ExtRedirect);
auto *RE = cast<RedirectingFileSystem::RemapEntry>(Result.E);
- return getRedirectedFileStatus(Path, RE->useExternalName(UseExternalNames),
- *S);
+ return getRedirectedFileStatus(OriginalPath,
+ RE->useExternalName(UseExternalNames), *S);
}
auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(Result.E);
- return Status::copyWithNewName(DE->getStatus(), Path);
+ return Status::copyWithNewName(DE->getStatus(), CanonicalPath);
}
-ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path_) {
- SmallString<256> Path;
- Path_.toVector(Path);
+ErrorOr<Status>
+RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
+ const Twine &OriginalPath) const {
+ if (auto Result = ExternalFS->status(CanonicalPath)) {
+ return Result.get().copyWithNewName(Result.get(), OriginalPath);
+ } else {
+ return Result.getError();
+ }
+}
- if (std::error_code EC = makeCanonical(Path))
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &OriginalPath) {
+ SmallString<256> CanonicalPath;
+ OriginalPath.toVector(CanonicalPath);
+
+ if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
- ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
+ ErrorOr<RedirectingFileSystem::LookupResult> Result =
+ lookupPath(CanonicalPath);
if (!Result) {
- if (shouldFallBackToExternalFS(Result.getError()))
- return ExternalFS->status(Path);
+ if (shouldFallBackToExternalFS(Result.getError())) {
+ return getExternalStatus(CanonicalPath, OriginalPath);
+ }
return Result.getError();
}
- ErrorOr<Status> S = status(Path, *Result);
- if (!S && shouldFallBackToExternalFS(S.getError(), Result->E))
- S = ExternalFS->status(Path);
+ ErrorOr<Status> S = status(CanonicalPath, OriginalPath, *Result);
+ if (!S && shouldFallBackToExternalFS(S.getError(), Result->E)) {
+ return getExternalStatus(CanonicalPath, OriginalPath);
+ }
+
return S;
}
@@ -1998,22 +2066,39 @@ public:
}
std::error_code close() override { return InnerFile->close(); }
+
+ void setPath(const Twine &Path) override { S = S.copyWithNewName(S, Path); }
};
} // namespace
ErrorOr<std::unique_ptr<File>>
-RedirectingFileSystem::openFileForRead(const Twine &Path_) {
- SmallString<256> Path;
- Path_.toVector(Path);
+File::getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P) {
+ if (!Result)
+ return Result;
- if (std::error_code EC = makeCanonical(Path))
+ ErrorOr<std::unique_ptr<File>> F = std::move(*Result);
+ auto Name = F->get()->getName();
+ if (Name && Name.get() != P.str())
+ F->get()->setPath(P);
+ return F;
+}
+
+ErrorOr<std::unique_ptr<File>>
+RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
+ SmallString<256> CanonicalPath;
+ OriginalPath.toVector(CanonicalPath);
+
+ if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
- ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
+ ErrorOr<RedirectingFileSystem::LookupResult> Result =
+ lookupPath(CanonicalPath);
if (!Result) {
if (shouldFallBackToExternalFS(Result.getError()))
- return ExternalFS->openFileForRead(Path);
+ return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
+ OriginalPath);
+
return Result.getError();
}
@@ -2021,12 +2106,18 @@ RedirectingFileSystem::openFileForRead(const Twine &Path_) {
return make_error_code(llvm::errc::invalid_argument);
StringRef ExtRedirect = *Result->getExternalRedirect();
+ SmallString<256> CanonicalRemappedPath(ExtRedirect.str());
+ if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+ return EC;
+
auto *RE = cast<RedirectingFileSystem::RemapEntry>(Result->E);
- auto ExternalFile = ExternalFS->openFileForRead(ExtRedirect);
+ auto ExternalFile = File::getWithPath(
+ ExternalFS->openFileForRead(CanonicalRemappedPath), ExtRedirect);
if (!ExternalFile) {
if (shouldFallBackToExternalFS(ExternalFile.getError(), Result->E))
- return ExternalFS->openFileForRead(Path);
+ return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
+ OriginalPath);
return ExternalFile;
}
@@ -2036,7 +2127,7 @@ RedirectingFileSystem::openFileForRead(const Twine &Path_) {
// FIXME: Update the status with the name and VFSMapped.
Status S = getRedirectedFileStatus(
- Path, RE->useExternalName(UseExternalNames), *ExternalStatus);
+ OriginalPath, RE->useExternalName(UseExternalNames), *ExternalStatus);
return std::unique_ptr<File>(
std::make_unique<FileWithFixedStatus>(std::move(*ExternalFile), S));
}
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index c1d291731a88..b15e71a9ce2a 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -74,6 +74,11 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
SmallString<MAX_PATH> Path8Str;
Path8.toVector(Path8Str);
+ // If the path is a long path, mangled into forward slashes, normalize
+ // back to backslashes here.
+ if (Path8Str.startswith("//?/"))
+ llvm::sys::path::native(Path8Str, path::Style::windows_backslash);
+
if (std::error_code EC = UTF8ToUTF16(Path8Str, Path16))
return EC;
@@ -100,8 +105,10 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
}
// Remove '.' and '..' because long paths treat these as real path components.
+ // Explicitly use the backslash form here, as we're prepending the \\?\
+ // prefix.
llvm::sys::path::native(Path8Str, path::Style::windows);
- llvm::sys::path::remove_dots(Path8Str, true);
+ llvm::sys::path::remove_dots(Path8Str, true, path::Style::windows);
const StringRef RootName = llvm::sys::path::root_name(Path8Str);
assert(!RootName.empty() &&
@@ -145,6 +152,7 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8))
return "";
+ llvm::sys::path::make_preferred(PathNameUTF8);
return std::string(PathNameUTF8.data());
}
@@ -207,7 +215,13 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
// On success, GetCurrentDirectoryW returns the number of characters not
// including the null-terminator.
cur_path.set_size(len);
- return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
+
+ if (std::error_code EC =
+ UTF16ToUTF8(cur_path.begin(), cur_path.size(), result))
+ return EC;
+
+ llvm::sys::path::make_preferred(result);
+ return std::error_code();
}
std::error_code set_current_path(const Twine &path) {
@@ -388,7 +402,11 @@ static std::error_code realPathFromHandle(HANDLE H,
}
// Convert the result from UTF-16 to UTF-8.
- return UTF16ToUTF8(Data, CountChars, RealPath);
+ if (std::error_code EC = UTF16ToUTF8(Data, CountChars, RealPath))
+ return EC;
+
+ llvm::sys::path::make_preferred(RealPath);
+ return std::error_code();
}
std::error_code is_local(int FD, bool &Result) {
@@ -416,8 +434,7 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
// Check if the file is on a network (non-local) drive. If so, don't
// continue when DeleteFile is true, since it prevents opening the file for
- // writes. Note -- this will leak temporary files on disk, but only when the
- // target file is on a network drive.
+ // writes.
SmallVector<wchar_t, 128> FinalPath;
if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
return EC;
@@ -427,7 +444,7 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
return EC;
if (!IsLocal)
- return std::error_code();
+ return errc::not_supported;
// The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
// flag.
@@ -1183,12 +1200,6 @@ Expected<file_t> openNativeFile(const Twine &Name, CreationDisposition Disp,
}
}
- if (Flags & OF_Delete) {
- if ((EC = setDeleteDisposition(Result, true))) {
- ::CloseHandle(Result);
- return errorCodeToError(EC);
- }
- }
return Result;
}
@@ -1414,6 +1425,8 @@ static bool getKnownFolderPath(KNOWNFOLDERID folderId,
bool ok = !UTF16ToUTF8(path, ::wcslen(path), result);
::CoTaskMemFree(path);
+ if (ok)
+ llvm::sys::path::make_preferred(result);
return ok;
}
@@ -1474,6 +1487,7 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
// Fall back to a system default.
const char *DefaultResult = "C:\\Temp";
Result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
+ llvm::sys::path::make_preferred(Result);
}
} // end namespace path
diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc
index 6f58c52e0746..6732063b562e 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -261,6 +261,7 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
EC = GetExecutableName(Filename);
if (EC)
return EC;
+ sys::path::make_preferred(Arg0);
sys::path::append(Arg0, Filename);
Args[0] = Saver.save(Arg0).data();
return std::error_code();
@@ -504,8 +505,7 @@ bool llvm::RunningWindows8OrGreater() {
return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0);
}
-LLVM_ATTRIBUTE_NORETURN
-void Process::ExitNoCleanup(int RetCode) {
+[[noreturn]] void Process::ExitNoCleanup(int RetCode) {
TerminateProcess(GetCurrentProcess(), RetCode);
llvm_unreachable("TerminateProcess doesn't return");
}
diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index 824834c1cbbe..a9cf2db7ec72 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -103,6 +103,7 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
if (U8Result.empty())
return mapWindowsError(::GetLastError());
+ llvm::sys::path::make_preferred(U8Result);
return std::string(U8Result.begin(), U8Result.end());
}
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index c9530659caad..ab49ac548f89 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/X86TargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include <numeric>
using namespace llvm;
using namespace llvm::X86;
@@ -137,8 +139,8 @@ constexpr FeatureBitset FeaturesNocona =
// Basic 64-bit capable CPU.
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
- FeaturePOPCNT | FeatureSSE4_2 |
- FeatureCMPXCHG16B;
+ FeaturePOPCNT | FeatureCRC32 |
+ FeatureSSE4_2 | FeatureCMPXCHG16B;
constexpr FeatureBitset FeaturesX86_64_V3 =
FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
@@ -151,7 +153,7 @@ constexpr FeatureBitset FeaturesCore2 =
FeaturesNocona | FeatureSAHF | FeatureSSSE3;
constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
constexpr FeatureBitset FeaturesNehalem =
- FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2;
+ FeaturesPenryn | FeaturePOPCNT | FeatureCRC32 | FeatureSSE4_2;
constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem | FeaturePCLMUL;
constexpr FeatureBitset FeaturesSandyBridge =
FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT;
@@ -201,11 +203,11 @@ constexpr FeatureBitset FeaturesTigerlake =
FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B |
FeatureCLWB | FeatureMOVDIRI | FeatureSHSTK | FeatureKL | FeatureWIDEKL;
constexpr FeatureBitset FeaturesSapphireRapids =
- FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
- FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE |
- FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
- FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
- FeatureWAITPKG | FeatureAVXVNNI;
+ FeaturesICLServer | FeatureAMX_BF16 | FeatureAMX_INT8 | FeatureAMX_TILE |
+ FeatureAVX512BF16 | FeatureAVX512FP16 | FeatureAVX512VP2INTERSECT |
+ FeatureAVXVNNI | FeatureCLDEMOTE | FeatureENQCMD | FeatureMOVDIR64B |
+ FeatureMOVDIRI | FeaturePTWRITE | FeatureSERIALIZE | FeatureSHSTK |
+ FeatureTSXLDTRK | FeatureUINTR | FeatureWAITPKG;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
@@ -254,16 +256,17 @@ constexpr FeatureBitset FeaturesBTVER1 =
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A |
FeatureSAHF;
constexpr FeatureBitset FeaturesBTVER2 =
- FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C |
- FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
+ FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureCRC32 |
+ FeatureF16C | FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
// AMD Bulldozer architecture processors.
constexpr FeatureBitset FeaturesBDVER1 =
FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B |
- FeatureCMPXCHG16B | Feature64BIT | FeatureFMA4 | FeatureFXSR | FeatureLWP |
- FeatureLZCNT | FeatureMMX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW |
- FeatureSAHF | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 |
- FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXOP | FeatureXSAVE;
+ FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | FeatureFMA4 |
+ FeatureFXSR | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL |
+ FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 |
+ FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A |
+ FeatureXOP | FeatureXSAVE;
constexpr FeatureBitset FeaturesBDVER2 =
FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM;
constexpr FeatureBitset FeaturesBDVER3 =
@@ -276,9 +279,9 @@ constexpr FeatureBitset FeaturesBDVER4 = FeaturesBDVER3 | FeatureAVX2 |
constexpr FeatureBitset FeaturesZNVER1 =
FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 |
FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO |
- FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT | FeatureF16C |
- FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | FeatureMMX |
- FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
+ FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT |
+ FeatureF16C | FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT |
+ FeatureMMX | FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | FeatureSHA |
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
@@ -470,6 +473,7 @@ constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
constexpr FeatureBitset ImpliedFeaturesCMOV = {};
constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
+constexpr FeatureBitset ImpliedFeaturesCRC32 = {};
constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
constexpr FeatureBitset ImpliedFeaturesFXSR = {};
@@ -576,6 +580,8 @@ constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
+static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
+ FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
@@ -660,3 +666,45 @@ void llvm::X86::updateImpliedFeatures(
if (ImpliedBits[i] && !FeatureInfos[i].Name.empty())
Features[FeatureInfos[i].Name] = Enabled;
}
+
+uint64_t llvm::X86::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
+ // Processor features and mapping to processor feature value.
+ uint64_t FeaturesMask = 0;
+ for (const StringRef &FeatureStr : FeatureStrs) {
+ unsigned Feature = StringSwitch<unsigned>(FeatureStr)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
+ .Case(STR, llvm::X86::FEATURE_##ENUM)
+#include "llvm/Support/X86TargetParser.def"
+ ;
+ FeaturesMask |= (1ULL << Feature);
+ }
+ return FeaturesMask;
+}
+
+unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
+#ifndef NDEBUG
+ // Check that priorities are set properly in the .def file. We expect that
+ // "compat" features are assigned non-duplicate consecutive priorities
+ // starting from zero (0, 1, ..., num_features - 1).
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY,
+ unsigned Priorities[] = {
+#include "llvm/Support/X86TargetParser.def"
+ std::numeric_limits<unsigned>::max() // Need to consume last comma.
+ };
+ std::array<unsigned, array_lengthof(Priorities) - 1> HelperList;
+ std::iota(HelperList.begin(), HelperList.end(), 0);
+ assert(std::is_permutation(HelperList.begin(), HelperList.end(),
+ std::begin(Priorities),
+ std::prev(std::end(Priorities))) &&
+ "Priorities don't form consecutive range!");
+#endif
+
+ switch (Feat) {
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
+ case X86::FEATURE_##ENUM: \
+ return PRIORITY;
+#include "llvm/Support/X86TargetParser.def"
+ default:
+ llvm_unreachable("No Feature Priority for non-CPUSupports Features");
+ }
+}
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index d4e1c884d125..4590a3d19b0d 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -185,7 +185,7 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str,
// Write out the escaped representation.
if (UseHexEscapes) {
*this << '\\' << 'x';
- *this << hexdigit((c >> 4 & 0xF));
+ *this << hexdigit((c >> 4) & 0xF);
*this << hexdigit((c >> 0) & 0xF);
} else {
// Always use a full 3-character octal escape.
@@ -679,7 +679,8 @@ raw_fd_ostream::~raw_fd_ostream() {
// has_error() and clear the error flag with clear_error() before
// destructing raw_ostream objects which may have errors.
if (has_error())
- report_fatal_error("IO failure on output stream: " + error().message(),
+ report_fatal_error(Twine("IO failure on output stream: ") +
+ error().message(),
/*gen_crash_diag=*/false);
}
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 0b1024648b66..762255b43136 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -55,6 +55,10 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
+static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
+ "no-warn-on-unused-template-args",
+ cl::desc("Disable unused template argument warnings."));
+
static int reportError(const char *ProgName, Twine Msg) {
errs() << ProgName << ": " << Msg;
errs().flush();
@@ -107,7 +111,7 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
- TGParser Parser(SrcMgr, MacroNames, Records);
+ TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
if (Parser.ParseFile())
return 1;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 8663863d968f..eb7d4838a9f6 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
@@ -41,24 +42,70 @@ using namespace llvm;
#define DEBUG_TYPE "tblgen-records"
-static BumpPtrAllocator Allocator;
+//===----------------------------------------------------------------------===//
+// Context
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace detail {
+/// This class contains all of the contextual static state of the Record
+/// classes. This allows for better lifetime management and control of the used
+/// static data.
+struct RecordContext {
+ RecordContext()
+ : AnyRecord(0), TrueBitInit(true, &SharedBitRecTy),
+ FalseBitInit(false, &SharedBitRecTy), StringInitStringPool(Allocator),
+ StringInitCodePool(Allocator), LastRecordID(0) {}
+
+ BumpPtrAllocator Allocator;
+ std::vector<BitsRecTy *> SharedBitsRecTys;
+ BitRecTy SharedBitRecTy;
+ IntRecTy SharedIntRecTy;
+ StringRecTy SharedStringRecTy;
+ DagRecTy SharedDagRecTy;
+
+ RecordRecTy AnyRecord;
+ UnsetInit TheUnsetInit;
+ BitInit TrueBitInit;
+ BitInit FalseBitInit;
+
+ FoldingSet<BitsInit> TheBitsInitPool;
+ std::map<int64_t, IntInit *> TheIntInitPool;
+ StringMap<StringInit *, BumpPtrAllocator &> StringInitStringPool;
+ StringMap<StringInit *, BumpPtrAllocator &> StringInitCodePool;
+ FoldingSet<ListInit> TheListInitPool;
+ FoldingSet<UnOpInit> TheUnOpInitPool;
+ FoldingSet<BinOpInit> TheBinOpInitPool;
+ FoldingSet<TernOpInit> TheTernOpInitPool;
+ FoldingSet<FoldOpInit> TheFoldOpInitPool;
+ FoldingSet<IsAOpInit> TheIsAOpInitPool;
+ DenseMap<std::pair<RecTy *, Init *>, VarInit *> TheVarInitPool;
+ DenseMap<std::pair<TypedInit *, unsigned>, VarBitInit *> TheVarBitInitPool;
+ DenseMap<std::pair<TypedInit *, unsigned>, VarListElementInit *>
+ TheVarListElementInitPool;
+ FoldingSet<VarDefInit> TheVarDefInitPool;
+ DenseMap<std::pair<Init *, StringInit *>, FieldInit *> TheFieldInitPool;
+ FoldingSet<CondOpInit> TheCondOpInitPool;
+ FoldingSet<DagInit> TheDagInitPool;
+
+ unsigned LastRecordID;
+};
+} // namespace detail
+} // namespace llvm
+
+ManagedStatic<detail::RecordContext> Context;
//===----------------------------------------------------------------------===//
// Type implementations
//===----------------------------------------------------------------------===//
-BitRecTy BitRecTy::Shared;
-IntRecTy IntRecTy::Shared;
-StringRecTy StringRecTy::Shared;
-DagRecTy DagRecTy::Shared;
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecTy::dump() const { print(errs()); }
#endif
ListRecTy *RecTy::getListTy() {
if (!ListTy)
- ListTy = new(Allocator) ListRecTy(this);
+ ListTy = new(Context->Allocator) ListRecTy(this);
return ListTy;
}
@@ -69,6 +116,8 @@ bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const {
bool RecTy::typeIsA(const RecTy *RHS) const { return this == RHS; }
+BitRecTy *BitRecTy::get() { return &Context->SharedBitRecTy; }
+
bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
if (RecTy::typeIsConvertibleTo(RHS) || RHS->getRecTyKind() == IntRecTyKind)
return true;
@@ -78,12 +127,11 @@ bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
}
BitsRecTy *BitsRecTy::get(unsigned Sz) {
- static std::vector<BitsRecTy*> Shared;
- if (Sz >= Shared.size())
- Shared.resize(Sz + 1);
- BitsRecTy *&Ty = Shared[Sz];
+ if (Sz >= Context->SharedBitsRecTys.size())
+ Context->SharedBitsRecTys.resize(Sz + 1);
+ BitsRecTy *&Ty = Context->SharedBitsRecTys[Sz];
if (!Ty)
- Ty = new(Allocator) BitsRecTy(Sz);
+ Ty = new (Context->Allocator) BitsRecTy(Sz);
return Ty;
}
@@ -104,11 +152,15 @@ bool BitsRecTy::typeIsA(const RecTy *RHS) const {
return false;
}
+IntRecTy *IntRecTy::get() { return &Context->SharedIntRecTy; }
+
bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
RecTyKind kind = RHS->getRecTyKind();
return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
}
+StringRecTy *StringRecTy::get() { return &Context->SharedStringRecTy; }
+
std::string StringRecTy::getAsString() const {
return "string";
}
@@ -134,6 +186,8 @@ bool ListRecTy::typeIsA(const RecTy *RHS) const {
return false;
}
+DagRecTy *DagRecTy::get() { return &Context->SharedDagRecTy; }
+
std::string DagRecTy::getAsString() const {
return "dag";
}
@@ -146,10 +200,8 @@ static void ProfileRecordRecTy(FoldingSetNodeID &ID,
}
RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
- if (UnsortedClasses.empty()) {
- static RecordRecTy AnyRecord(0);
- return &AnyRecord;
- }
+ if (UnsortedClasses.empty())
+ return &Context->AnyRecord;
FoldingSet<RecordRecTy> &ThePool =
UnsortedClasses[0]->getRecords().RecordTypePool;
@@ -177,8 +229,8 @@ RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
}
#endif
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Record *>(Classes.size()),
- alignof(RecordRecTy));
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Record *>(Classes.size()), alignof(RecordRecTy));
RecordRecTy *Ty = new(Mem) RecordRecTy(Classes.size());
std::uninitialized_copy(Classes.begin(), Classes.end(),
Ty->getTrailingObjects<Record *>());
@@ -283,10 +335,7 @@ void Init::anchor() {}
LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); }
#endif
-UnsetInit *UnsetInit::get() {
- static UnsetInit TheInit;
- return &TheInit;
-}
+UnsetInit *UnsetInit::get() { return &Context->TheUnsetInit; }
Init *UnsetInit::getCastTo(RecTy *Ty) const {
return const_cast<UnsetInit *>(this);
@@ -297,10 +346,7 @@ Init *UnsetInit::convertInitializerTo(RecTy *Ty) const {
}
BitInit *BitInit::get(bool V) {
- static BitInit True(true);
- static BitInit False(false);
-
- return V ? &True : &False;
+ return V ? &Context->TrueBitInit : &Context->FalseBitInit;
}
Init *BitInit::convertInitializerTo(RecTy *Ty) const {
@@ -328,21 +374,19 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
}
BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
- static FoldingSet<BitsInit> ThePool;
-
FoldingSetNodeID ID;
ProfileBitsInit(ID, Range);
void *IP = nullptr;
- if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (BitsInit *I = Context->TheBitsInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
- alignof(BitsInit));
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *>(Range.size()), alignof(BitsInit));
BitsInit *I = new(Mem) BitsInit(Range.size());
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
- ThePool.InsertNode(I, IP);
+ Context->TheBitsInitPool.InsertNode(I, IP);
return I;
}
@@ -446,10 +490,9 @@ Init *BitsInit::resolveReferences(Resolver &R) const {
}
IntInit *IntInit::get(int64_t V) {
- static std::map<int64_t, IntInit*> ThePool;
-
- IntInit *&I = ThePool[V];
- if (!I) I = new(Allocator) IntInit(V);
+ IntInit *&I = Context->TheIntInitPool[V];
+ if (!I)
+ I = new (Context->Allocator) IntInit(V);
return I;
}
@@ -503,7 +546,7 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
}
AnonymousNameInit *AnonymousNameInit::get(unsigned V) {
- return new (Allocator) AnonymousNameInit(V);
+ return new (Context->Allocator) AnonymousNameInit(V);
}
StringInit *AnonymousNameInit::getNameInit() const {
@@ -525,20 +568,12 @@ Init *AnonymousNameInit::resolveReferences(Resolver &R) const {
}
StringInit *StringInit::get(StringRef V, StringFormat Fmt) {
- static StringMap<StringInit*, BumpPtrAllocator &> StringPool(Allocator);
- static StringMap<StringInit*, BumpPtrAllocator &> CodePool(Allocator);
-
- if (Fmt == SF_String) {
- auto &Entry = *StringPool.insert(std::make_pair(V, nullptr)).first;
- if (!Entry.second)
- Entry.second = new (Allocator) StringInit(Entry.getKey(), Fmt);
- return Entry.second;
- } else {
- auto &Entry = *CodePool.insert(std::make_pair(V, nullptr)).first;
- if (!Entry.second)
- Entry.second = new (Allocator) StringInit(Entry.getKey(), Fmt);
- return Entry.second;
- }
+ auto &InitMap = Fmt == SF_String ? Context->StringInitStringPool
+ : Context->StringInitCodePool;
+ auto &Entry = *InitMap.insert(std::make_pair(V, nullptr)).first;
+ if (!Entry.second)
+ Entry.second = new (Context->Allocator) StringInit(Entry.getKey(), Fmt);
+ return Entry.second;
}
Init *StringInit::convertInitializerTo(RecTy *Ty) const {
@@ -559,24 +594,22 @@ static void ProfileListInit(FoldingSetNodeID &ID,
}
ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
- static FoldingSet<ListInit> ThePool;
-
FoldingSetNodeID ID;
ProfileListInit(ID, Range, EltTy);
void *IP = nullptr;
- if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (ListInit *I = Context->TheListInitPool.FindNodeOrInsertPos(ID, IP))
return I;
assert(Range.empty() || !isa<TypedInit>(Range[0]) ||
cast<TypedInit>(Range[0])->getType()->typeIsConvertibleTo(EltTy));
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
- alignof(ListInit));
- ListInit *I = new(Mem) ListInit(Range.size(), EltTy);
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *>(Range.size()), alignof(ListInit));
+ ListInit *I = new (Mem) ListInit(Range.size(), EltTy);
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
- ThePool.InsertNode(I, IP);
+ Context->TheListInitPool.InsertNode(I, IP);
return I;
}
@@ -696,17 +729,15 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) {
}
UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
- static FoldingSet<UnOpInit> ThePool;
-
FoldingSetNodeID ID;
ProfileUnOpInit(ID, Opc, LHS, Type);
void *IP = nullptr;
- if (UnOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (UnOpInit *I = Context->TheUnOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type);
- ThePool.InsertNode(I, IP);
+ UnOpInit *I = new (Context->Allocator) UnOpInit(Opc, LHS, Type);
+ Context->TheUnOpInitPool.InsertNode(I, IP);
return I;
}
@@ -860,19 +891,16 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS,
ID.AddPointer(Type);
}
-BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS,
- Init *RHS, RecTy *Type) {
- static FoldingSet<BinOpInit> ThePool;
-
+BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) {
FoldingSetNodeID ID;
ProfileBinOpInit(ID, Opc, LHS, RHS, Type);
void *IP = nullptr;
- if (BinOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (BinOpInit *I = Context->TheBinOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type);
- ThePool.InsertNode(I, IP);
+ BinOpInit *I = new (Context->Allocator) BinOpInit(Opc, LHS, RHS, Type);
+ Context->TheBinOpInitPool.InsertNode(I, IP);
return I;
}
@@ -884,7 +912,7 @@ static StringInit *ConcatStringInits(const StringInit *I0,
const StringInit *I1) {
SmallString<80> Concat(I0->getValue());
Concat.append(I1->getValue());
- return StringInit::get(Concat,
+ return StringInit::get(Concat,
StringInit::determineFormat(I0->getFormat(),
I1->getFormat()));
}
@@ -1189,17 +1217,15 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS,
TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
RecTy *Type) {
- static FoldingSet<TernOpInit> ThePool;
-
FoldingSetNodeID ID;
ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type);
void *IP = nullptr;
- if (TernOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (TernOpInit *I = Context->TheTernOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
- ThePool.InsertNode(I, IP);
+ TernOpInit *I = new (Context->Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
+ Context->TheTernOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1273,8 +1299,8 @@ static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
if (!Include)
return nullptr;
if (IntInit *IncludeInt = dyn_cast_or_null<IntInit>(
- Include->convertInitializerTo(IntRecTy::get()))) {
- if (IncludeInt->getValue())
+ Include->convertInitializerTo(IntRecTy::get()))) {
+ if (IncludeInt->getValue())
NewList.push_back(Item);
} else {
return nullptr;
@@ -1482,17 +1508,17 @@ static void ProfileFoldOpInit(FoldingSetNodeID &ID, Init *Start, Init *List,
FoldOpInit *FoldOpInit::get(Init *Start, Init *List, Init *A, Init *B,
Init *Expr, RecTy *Type) {
- static FoldingSet<FoldOpInit> ThePool;
FoldingSetNodeID ID;
ProfileFoldOpInit(ID, Start, List, A, B, Expr, Type);
void *IP = nullptr;
- if (FoldOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (FoldOpInit *I = Context->TheFoldOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- FoldOpInit *I = new (Allocator) FoldOpInit(Start, List, A, B, Expr, Type);
- ThePool.InsertNode(I, IP);
+ FoldOpInit *I =
+ new (Context->Allocator) FoldOpInit(Start, List, A, B, Expr, Type);
+ Context->TheFoldOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1547,17 +1573,16 @@ static void ProfileIsAOpInit(FoldingSetNodeID &ID, RecTy *CheckType,
}
IsAOpInit *IsAOpInit::get(RecTy *CheckType, Init *Expr) {
- static FoldingSet<IsAOpInit> ThePool;
FoldingSetNodeID ID;
ProfileIsAOpInit(ID, CheckType, Expr);
void *IP = nullptr;
- if (IsAOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (IsAOpInit *I = Context->TheIsAOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- IsAOpInit *I = new (Allocator) IsAOpInit(CheckType, Expr);
- ThePool.InsertNode(I, IP);
+ IsAOpInit *I = new (Context->Allocator) IsAOpInit(CheckType, Expr);
+ Context->TheIsAOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1680,14 +1705,9 @@ VarInit *VarInit::get(StringRef VN, RecTy *T) {
}
VarInit *VarInit::get(Init *VN, RecTy *T) {
- using Key = std::pair<RecTy *, Init *>;
- static DenseMap<Key, VarInit*> ThePool;
-
- Key TheKey(std::make_pair(T, VN));
-
- VarInit *&I = ThePool[TheKey];
+ VarInit *&I = Context->TheVarInitPool[std::make_pair(T, VN)];
if (!I)
- I = new(Allocator) VarInit(VN, T);
+ I = new (Context->Allocator) VarInit(VN, T);
return I;
}
@@ -1709,14 +1729,9 @@ Init *VarInit::resolveReferences(Resolver &R) const {
}
VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
- using Key = std::pair<TypedInit *, unsigned>;
- static DenseMap<Key, VarBitInit*> ThePool;
-
- Key TheKey(std::make_pair(T, B));
-
- VarBitInit *&I = ThePool[TheKey];
+ VarBitInit *&I = Context->TheVarBitInitPool[std::make_pair(T, B)];
if (!I)
- I = new(Allocator) VarBitInit(T, B);
+ I = new(Context->Allocator) VarBitInit(T, B);
return I;
}
@@ -1732,15 +1747,11 @@ Init *VarBitInit::resolveReferences(Resolver &R) const {
return const_cast<VarBitInit*>(this);
}
-VarListElementInit *VarListElementInit::get(TypedInit *T,
- unsigned E) {
- using Key = std::pair<TypedInit *, unsigned>;
- static DenseMap<Key, VarListElementInit*> ThePool;
-
- Key TheKey(std::make_pair(T, E));
-
- VarListElementInit *&I = ThePool[TheKey];
- if (!I) I = new(Allocator) VarListElementInit(T, E);
+VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) {
+ VarListElementInit *&I =
+ Context->TheVarListElementInitPool[std::make_pair(T, E)];
+ if (!I)
+ I = new (Context->Allocator) VarListElementInit(T, E);
return I;
}
@@ -1800,21 +1811,19 @@ static void ProfileVarDefInit(FoldingSetNodeID &ID,
}
VarDefInit *VarDefInit::get(Record *Class, ArrayRef<Init *> Args) {
- static FoldingSet<VarDefInit> ThePool;
-
FoldingSetNodeID ID;
ProfileVarDefInit(ID, Class, Args);
void *IP = nullptr;
- if (VarDefInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (VarDefInit *I = Context->TheVarDefInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
- alignof(VarDefInit));
- VarDefInit *I = new(Mem) VarDefInit(Class, Args.size());
+ void *Mem = Context->Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
+ alignof(VarDefInit));
+ VarDefInit *I = new (Mem) VarDefInit(Class, Args.size());
std::uninitialized_copy(Args.begin(), Args.end(),
I->getTrailingObjects<Init *>());
- ThePool.InsertNode(I, IP);
+ Context->TheVarDefInitPool.InsertNode(I, IP);
return I;
}
@@ -1920,13 +1929,9 @@ std::string VarDefInit::getAsString() const {
}
FieldInit *FieldInit::get(Init *R, StringInit *FN) {
- using Key = std::pair<Init *, StringInit *>;
- static DenseMap<Key, FieldInit*> ThePool;
-
- Key TheKey(std::make_pair(R, FN));
-
- FieldInit *&I = ThePool[TheKey];
- if (!I) I = new(Allocator) FieldInit(R, FN);
+ FieldInit *&I = Context->TheFieldInitPool[std::make_pair(R, FN)];
+ if (!I)
+ I = new (Context->Allocator) FieldInit(R, FN);
return I;
}
@@ -1995,23 +2000,22 @@ CondOpInit::get(ArrayRef<Init *> CondRange,
assert(CondRange.size() == ValRange.size() &&
"Number of conditions and values must match!");
- static FoldingSet<CondOpInit> ThePool;
FoldingSetNodeID ID;
ProfileCondOpInit(ID, CondRange, ValRange, Ty);
void *IP = nullptr;
- if (CondOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (CondOpInit *I = Context->TheCondOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(2*CondRange.size()),
- alignof(BitsInit));
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *>(2 * CondRange.size()), alignof(BitsInit));
CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
std::uninitialized_copy(CondRange.begin(), CondRange.end(),
I->getTrailingObjects<Init *>());
std::uninitialized_copy(ValRange.begin(), ValRange.end(),
I->getTrailingObjects<Init *>()+CondRange.size());
- ThePool.InsertNode(I, IP);
+ Context->TheCondOpInitPool.InsertNode(I, IP);
return I;
}
@@ -2113,25 +2117,24 @@ static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
assert(Name == NameRange.end() && "Arg name overflow!");
}
-DagInit *
-DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
- ArrayRef<StringInit *> NameRange) {
- static FoldingSet<DagInit> ThePool;
-
+DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
+ ArrayRef<StringInit *> NameRange) {
FoldingSetNodeID ID;
ProfileDagInit(ID, V, VN, ArgRange, NameRange);
void *IP = nullptr;
- if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (DagInit *I = Context->TheDagInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *, StringInit *>(ArgRange.size(), NameRange.size()), alignof(BitsInit));
- DagInit *I = new(Mem) DagInit(V, VN, ArgRange.size(), NameRange.size());
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *, StringInit *>(ArgRange.size(), NameRange.size()),
+ alignof(BitsInit));
+ DagInit *I = new (Mem) DagInit(V, VN, ArgRange.size(), NameRange.size());
std::uninitialized_copy(ArgRange.begin(), ArgRange.end(),
I->getTrailingObjects<Init *>());
std::uninitialized_copy(NameRange.begin(), NameRange.end(),
I->getTrailingObjects<StringInit *>());
- ThePool.InsertNode(I, IP);
+ Context->TheDagInitPool.InsertNode(I, IP);
return I;
}
@@ -2301,8 +2304,6 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
if (PrintSem) OS << ";\n";
}
-unsigned Record::LastID = 0;
-
void Record::checkName() {
// Ensure the record name has string type.
const TypedInit *TypedName = cast<const TypedInit>(Name);
@@ -2319,10 +2320,12 @@ RecordRecTy *Record::getType() {
DefInit *Record::getDefInit() {
if (!CorrespondingDefInit)
- CorrespondingDefInit = new (Allocator) DefInit(this);
+ CorrespondingDefInit = new (Context->Allocator) DefInit(this);
return CorrespondingDefInit;
}
+unsigned Record::getNewUID() { return Context->LastRecordID++; }
+
void Record::setName(Init *NewName) {
Name = NewName;
checkName();
@@ -2501,7 +2504,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue()))
return BI;
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
"' exists but does not have a bits value");
}
@@ -2513,7 +2516,7 @@ ListInit *Record::getValueAsListInit(StringRef FieldName) const {
if (ListInit *LI = dyn_cast<ListInit>(R->getValue()))
return LI;
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
"' exists but does not have a list value");
}
@@ -2653,13 +2656,23 @@ void Record::checkRecordAssertions() {
RecordResolver R(*this);
R.setFinal(true);
- for (auto Assertion : getAssertions()) {
+ for (const auto &Assertion : getAssertions()) {
Init *Condition = Assertion.Condition->resolveReferences(R);
Init *Message = Assertion.Message->resolveReferences(R);
CheckAssert(Assertion.Loc, Condition, Message);
}
}
+// Report a warning if the record has unused template arguments.
+void Record::checkUnusedTemplateArgs() {
+ for (const Init *TA : getTemplateArgs()) {
+ const RecordVal *Arg = getValue(TA);
+ if (!Arg->isUsed())
+ PrintWarning(Arg->getLoc(),
+ "unused template argument: " + Twine(Arg->getName()));
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; }
#endif
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index ed7963031b24..6ccca4d69f40 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -874,8 +874,9 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
Record *TemplateRec = CurMultiClass ? &CurMultiClass->Rec : CurRec;
if (TemplateRec->isTemplateArg(TemplateArgName)) {
- const RecordVal *RV = TemplateRec->getValue(TemplateArgName);
+ RecordVal *RV = TemplateRec->getValue(TemplateArgName);
assert(RV && "Template arg doesn't exist??");
+ RV->setUsed(true);
return VarInit::get(TemplateArgName, RV->getType());
} else if (Name->getValue() == "NAME") {
return VarInit::get(TemplateArgName, StringRecTy::get());
@@ -3346,7 +3347,12 @@ bool TGParser::ParseClass() {
if (ParseTemplateArgList(CurRec))
return true;
- return ParseObjectBody(CurRec);
+ if (ParseObjectBody(CurRec))
+ return true;
+
+ if (!NoWarnOnUnusedTemplateArgs)
+ CurRec->checkUnusedTemplateArgs();
+ return false;
}
/// ParseLetList - Parse a non-empty list of assignment expressions into a list
@@ -3541,6 +3547,9 @@ bool TGParser::ParseMultiClass() {
PopLocalScope(MulticlassScope);
}
+ if (!NoWarnOnUnusedTemplateArgs)
+ CurMultiClass->Rec.checkUnusedTemplateArgs();
+
CurMultiClass = nullptr;
return false;
}
diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h
index 6e3c5186e4f6..00883c858d58 100644
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@@ -160,10 +160,13 @@ class TGParser {
// exist.
};
+ bool NoWarnOnUnusedTemplateArgs = false;
+
public:
- TGParser(SourceMgr &SM, ArrayRef<std::string> Macros,
- RecordKeeper &records)
- : Lex(SM, Macros), CurMultiClass(nullptr), Records(records) {}
+ TGParser(SourceMgr &SM, ArrayRef<std::string> Macros, RecordKeeper &records,
+ const bool NoWarnOnUnusedTemplateArgs = false)
+ : Lex(SM, Macros), CurMultiClass(nullptr), Records(records),
+ NoWarnOnUnusedTemplateArgs(NoWarnOnUnusedTemplateArgs) {}
/// ParseFile - Main entrypoint for parsing a tblgen file. These parser
/// routines return true on error, or false on success.
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 658d44771e8d..b0dd30c13137 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -51,6 +51,7 @@ FunctionPass *createAArch64A53Fix835769();
FunctionPass *createFalkorHWPFFixPass();
FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64BranchTargetsPass();
+FunctionPass *createAArch64MIPeepholeOptPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -82,6 +83,7 @@ void initializeAArch64SLSHardeningPass(PassRegistry&);
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
+void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index d8dd9d1b2f91..548e4e0c9389 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
+ "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">;
+
def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
"Enable out of line atomics to support LSE instructions">;
@@ -126,8 +129,12 @@ def FeatureExperimentalZeroingPseudos
"merged with destructive operations",
[]>;
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+ "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
- "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
+ "Enable Scalable Vector Extension 2 (SVE2) instructions",
+ [FeatureSVE, FeatureUseScalarIncVL]>;
def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
"Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
@@ -309,10 +316,6 @@ def FeatureSEL2 : SubtargetFeature<
"sel2", "HasSEL2", "true",
"Enable v8.4-A Secure Exception Level 2 extension">;
-def FeaturePMU : SubtargetFeature<
- "pmu", "HasPMU", "true",
- "Enable v8.4-A PMU extension">;
-
def FeatureTLB_RMI : SubtargetFeature<
"tlb-rmi", "HasTLB_RMI", "true",
"Enable v8.4-A TLB Range and Maintenance Instructions">;
@@ -429,10 +432,13 @@ def FeatureEnhancedCounterVirtualization :
def FeatureRME : SubtargetFeature<"rme", "HasRME",
"true", "Enable Realm Management Extension">;
-// FIXME: SME should only imply the subset of SVE(2) instructions that are
-// legal in streaming mode.
+// A subset of SVE(2) instructions are legal in Streaming SVE execution mode
+// defined by SME.
+def FeatureStreamingSVE : SubtargetFeature<"streaming-sve",
+ "HasStreamingSVE", "true",
+ "Enable subset of SVE(2) instructions for Streaming SVE execution mode">;
def FeatureSME : SubtargetFeature<"sme", "HasSME", "true",
- "Enable Scalable Matrix Extension (SME)", [FeatureSVE2, FeatureBF16]>;
+ "Enable Scalable Matrix Extension (SME)", [FeatureStreamingSVE, FeatureBF16]>;
def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
"Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>;
@@ -440,13 +446,24 @@ def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true",
"Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>;
+def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
+ "Apple A7 (the CPU formerly known as Cyclone)">;
+
+def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true",
+ "Enable Exception Level 2 Virtual Memory System Architecture">;
+
+def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
+ "Enable Exception Level 3">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
+def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true",
+ "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM,
- FeaturePAN, FeatureLOR, FeatureVH]>;
+ "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE,
+ FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
@@ -459,8 +476,8 @@ def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
FeatureNV, FeatureMPAM, FeatureDIT,
- FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI,
- FeatureFlagM, FeatureRCPC_IMMO]>;
+ FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+ FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>;
def HasV8_5aOps : SubtargetFeature<
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
@@ -477,6 +494,18 @@ def HasV8_7aOps : SubtargetFeature<
"v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
[HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
+def HasV9_0aOps : SubtargetFeature<
+ "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions",
+ [HasV8_5aOps, FeatureSVE2]>;
+
+def HasV9_1aOps : SubtargetFeature<
+ "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions",
+ [HasV8_6aOps, HasV9_0aOps]>;
+
+def HasV9_2aOps : SubtargetFeature<
+ "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions",
+ [HasV8_7aOps, HasV9_1aOps]>;
+
def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
[//v8.1
@@ -553,7 +582,7 @@ class AArch64Unsupported { list<Predicate> F; }
def SVEUnsupported : AArch64Unsupported {
let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
- HasSVE2BitPerm];
+ HasSVE2BitPerm, HasSVEorStreamingSVE, HasSVE2orStreamingSVE];
}
def PAUnsupported : AArch64Unsupported {
@@ -579,660 +608,553 @@ include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
include "AArch64SchedTSV110.td"
-def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
- "Cortex-A35 ARM processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureNEON,
- FeaturePerfMon
- ]>;
+def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors">;
-def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors", [
+ FeatureFuseAES,
FeatureBalanceFPOps,
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- ]>;
+ FeaturePostRAScheduler]>;
-def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
+def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
"Cortex-A55 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeatureFullFP16,
- FeatureDotProd,
- FeatureRCPC,
- FeaturePerfMon,
FeaturePostRAScheduler,
+ FeatureFuseAddress]>;
+
+def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
+ "Cortex-A510 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler
]>;
-def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", [
+ FeatureFuseAES,
FeatureBalanceFPOps,
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureFuseAES,
FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive
- ]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
+def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
"Cortex-A65 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureFuseAddress,
FeatureFuseAES,
- FeatureFuseLiterals,
- FeatureNEON,
- FeatureRAS,
- FeatureRCPC,
- FeatureSSBS,
- ]>;
+ FeatureFuseAddress,
+ FeatureFuseLiterals]>;
-def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
"Cortex-A72 ARM processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon
- ]>;
+ FeatureFuseLiterals]>;
-def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
+def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon
- ]>;
+ FeatureFuseAES]>;
-def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
+def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
"Cortex-A75 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeatureFullFP16,
- FeatureDotProd,
- FeatureRCPC,
- FeaturePerfMon
- ]>;
+ FeatureFuseAES]>;
-def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
- HasV8_2aOps,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeatureRCPC,
- FeatureCrypto,
- FeatureFullFP16,
- FeatureDotProd,
- FeatureSSBS
- ]>;
-
-def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
+ FeatureFuseAES]>;
+
+def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", [
- HasV8_2aOps,
- FeatureCmpBccFusion,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON, FeatureRCPC,
- FeatureCrypto,
- FeatureFullFP16,
- FeatureDotProd
- ]>;
-
-def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
- "CortexA78",
+ FeatureCmpBccFusion,
+ FeatureFuseAES]>;
+
+def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", [
- HasV8_2aOps,
FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeatureRCPC,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureSPE,
- FeatureFullFP16,
- FeatureSSBS,
- FeatureDotProd]>;
-
-def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
+ FeaturePostRAScheduler]>;
+
+def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
"CortexA78C",
"Cortex-A78C ARM processors", [
- HasV8_2aOps,
FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFlagM,
- FeatureFP16FML,
- FeatureFPARMv8,
- FeatureFullFP16,
FeatureFuseAES,
- FeatureNEON,
- FeaturePAuth,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureRCPC,
- FeatureSPE,
- FeatureSSBS]>;
-
-def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
+ FeaturePostRAScheduler]>;
+
+def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
+ "Cortex-A710 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureCmpBccFusion]>;
+
+def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
"CortexR82",
- "Cortex-R82 ARM Processors", [
- FeaturePostRAScheduler,
- // All other features are implied by v8_0r ops:
- HasV8_0rOps,
- ]>;
+ "Cortex-R82 ARM processors", [
+ FeaturePostRAScheduler]>;
-def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
+def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", [
- HasV8_2aOps,
FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeatureRCPC,
- FeaturePerfMon,
+ FeaturePostRAScheduler]>;
+
+def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
+ "Cortex-X2 ARM processors", [
+ FeatureFuseAES,
FeaturePostRAScheduler,
- FeatureSPE,
- FeatureFullFP16,
- FeatureDotProd]>;
+ FeatureCmpBccFusion]>;
-def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
+def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
"Fujitsu A64FX processors", [
- HasV8_2aOps,
- FeatureFPARMv8,
- FeatureNEON,
- FeatureSHA2,
- FeaturePerfMon,
- FeatureFullFP16,
- FeatureSVE,
- FeaturePostRAScheduler,
- FeatureComplxNum,
- FeatureAggressiveFMA,
- FeatureArithmeticBccFusion,
- FeaturePredictableSelectIsExpensive
- ]>;
-
-def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
- "Nvidia Carmel processors", [
- HasV8_2aOps,
- FeatureNEON,
- FeatureCrypto,
- FeatureFullFP16
- ]>;
+ FeaturePostRAScheduler,
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeaturePredictableSelectIsExpensive
+ ]>;
+
+def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
+ "Nvidia Carmel processors">;
// Note that cyclone does not fuse AES instructions, but newer apple chips do
// perform the fusion and cyclone is used by default when targetting apple OSes.
-def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
- "Apple A7 (the CPU formerly known as Cyclone)", [
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureZCZeroingFPWorkaround
- ]>;
+def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
+ "Apple A7 (the CPU formerly known as Cyclone)", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAES, FeatureFuseCryptoEOR,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureZCZeroingFPWorkaround]
+ >;
-def ProcAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
+def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
"Apple A10", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureCRC,
- FeatureRDM,
- FeaturePAN,
- FeatureLOR,
- FeatureVH,
- ]>;
-
-def ProcAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
+ FeatureZCZeroing]
+ >;
+
+def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
"Apple A11", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- HasV8_2aOps
- ]>;
+ FeatureZCZeroing]
+ >;
-def ProcAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
+def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
"Apple A12", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- HasV8_3aOps
- ]>;
-
-def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
- "Apple A13", [
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureSHA3,
- HasV8_4aOps
- ]>;
-
-def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
- "Apple A14", [
- FeatureAggressiveFMA,
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureAltFPCmp,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFRInt3264,
- FeatureFuseAddress,
- FeatureFuseAES,
- FeatureFuseArithmeticLogic,
- FeatureFuseCCSelect,
- FeatureFuseCryptoEOR,
- FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon,
- FeatureSpecRestrict,
- FeatureSSBS,
- FeatureSB,
- FeaturePredRes,
- FeatureCacheDeepPersist,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureSHA3,
- HasV8_4aOps
- ]>;
-
-def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
+ FeatureZCZeroing]
+ >;
+
+def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
+ "Apple A13", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAES,
+ FeatureFuseCryptoEOR,
+ FeatureZCRegMove,
+ FeatureZCZeroing]
+ >;
+
+def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
+ "Apple A14", [
+ FeatureAggressiveFMA,
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureZCRegMove,
+ FeatureZCZeroing]>;
+
+def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
- [FeatureCRC,
- FeatureCrypto,
- FeatureExynosCheapAsMoveHandling,
+ [FeatureExynosCheapAsMoveHandling,
FeatureForce32BitJumpTables,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseCCSelect,
FeatureFuseLiterals,
FeatureLSLFast,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
-def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
- "Samsung Exynos-M4 processors",
- [HasV8_2aOps,
- FeatureArithmeticBccFusion,
+def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
+ "Samsung Exynos-M3 processors",
+ [FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDotProd,
FeatureExynosCheapAsMoveHandling,
FeatureForce32BitJumpTables,
- FeatureFullFP16,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseLiterals,
FeatureLSLFast,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeatureZCZeroing]>;
-def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
"Qualcomm Kryo processors", [
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureNEON,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast
- ]>;
+ FeatureLSLFast]
+ >;
-def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
+def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
"Qualcomm Falkor processors", [
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureNEON,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureRDM,
FeatureZCZeroing,
FeatureLSLFast,
FeatureSlowSTRQro
]>;
-def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily",
- "NeoverseE1",
+def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
"Neoverse E1 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureNEON,
- FeatureRCPC,
- FeatureSSBS,
FeaturePostRAScheduler,
- FeatureFuseAES,
+ FeatureFuseAES
]>;
-def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
- "NeoverseN1",
+def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1",
"Neoverse N1 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureNEON,
- FeatureRCPC,
- FeatureSPE,
- FeatureSSBS,
FeaturePostRAScheduler,
- FeatureFuseAES,
+ FeatureFuseAES
]>;
-def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
- "NeoverseN2",
+def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2",
"Neoverse N2 ARM processors", [
- HasV8_5aOps,
- FeatureBF16,
- FeatureETE,
- FeatureMatMulInt8,
- FeatureMTE,
- FeatureSVE2,
- FeatureSVE2BitPerm,
- FeatureTRBE,
FeaturePostRAScheduler,
- FeatureCrypto,
- FeatureFuseAES,
+ FeatureFuseAES
+ ]>;
+def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB",
+ "Neoverse 512-TVB ARM processors", [
+ FeaturePostRAScheduler,
+ FeatureFuseAES
]>;
-def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
- "NeoverseV1",
+def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1",
"Neoverse V1 ARM processors", [
- HasV8_4aOps,
- FeatureBF16,
- FeatureCacheDeepPersist,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFP16FML,
- FeatureFullFP16,
FeatureFuseAES,
- FeatureMatMulInt8,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureRandGen,
- FeatureSPE,
- FeatureSSBS,
- FeatureSVE]>;
+ FeaturePostRAScheduler]>;
-def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
+def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureNEON,
- FeatureSPE,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast,
- HasV8_4aOps]>;
+ FeatureLSLFast]>;
-def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
- "ThunderX2T99",
+def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
"Cavium ThunderX2 processors", [
FeatureAggressiveFMA,
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureArithmeticBccFusion,
- FeatureNEON,
- FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureLSE,
- HasV8_1aOps]>;
-
-def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
- "ThunderX3T110",
- "Marvell ThunderX3 processors", [
- FeatureAggressiveFMA,
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureArithmeticBccFusion,
- FeatureNEON,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureLSE,
- FeaturePAuth,
- FeatureBalanceFPOps,
- FeaturePerfMon,
- FeatureStrictAlign,
- HasV8_3aOps]>;
-
-def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+ FeaturePredictableSelectIsExpensive]>;
+
+def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
+ "ThunderX3T110",
+ "Marvell ThunderX3 processors", [
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureBalanceFPOps,
+ FeatureStrictAlign]>;
+
+def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
"ThunderXT88",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
"ThunderXT81",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
"ThunderXT83",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
"HiSilicon TS-V110 processors", [
- HasV8_2aOps,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureSPE,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureDotProd]>;
-
-def : ProcessorModel<"generic", NoSchedModel, [
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
-// ETE and TRBE are future architecture extensions. We temporarily enable them
-// by default for users targeting generic AArch64, until it is decided in which
-// armv8.x-a architecture revision they will end up. The extensions do not
-// affect code generated by the compiler and can be used only by explicitly
-// mentioning the new system register names in assembly.
- FeatureETE
- ]>;
-
-def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
-def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
-def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
-def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
-def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
-def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
-def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
-def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
-def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
-def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
-def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
-def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
-def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
-def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
-def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
-def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
-def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
-def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
-def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
-def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
-def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
-def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
-def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
-def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
-def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
+ FeaturePostRAScheduler]>;
+
+
+def ProcessorFeatures {
+ list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeaturePerfMon];
+ list<SubtargetFeature> A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeaturePerfMon];
+ list<SubtargetFeature> A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureMatMulInt8, FeatureBF16, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
+ list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeatureSSBS, FeatureRAS];
+ list<SubtargetFeature> A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeatureSSBS];
+ list<SubtargetFeature> A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC];
+ list<SubtargetFeature> A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeaturePerfMon, FeatureSPE,
+ FeatureSSBS];
+ list<SubtargetFeature> A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureFlagM, FeatureFP16FML, FeaturePAuth,
+ FeaturePerfMon, FeatureRCPC, FeatureSPE,
+ FeatureSSBS];
+ list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureETE, FeatureMTE, FeatureFP16FML,
+ FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
+ list<SubtargetFeature> R82 = [HasV8_0rOps];
+ list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureRCPC, FeaturePerfMon,
+ FeatureSPE, FeatureFullFP16, FeatureDotProd];
+ list<SubtargetFeature> X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureMatMulInt8, FeatureBF16, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
+ list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
+ FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
+ FeatureSVE, FeatureComplxNum];
+ list<SubtargetFeature> Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto,
+ FeatureFullFP16];
+ list<SubtargetFeature> AppleA7 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg];
+ list<SubtargetFeature> AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureCRC,
+ FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH];
+ list<SubtargetFeature> AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFullFP16];
+ list<SubtargetFeature> AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFullFP16];
+ list<SubtargetFeature> AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFullFP16,
+ FeatureFP16FML, FeatureSHA3];
+ list<SubtargetFeature> AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFRInt3264,
+ FeatureSpecRestrict, FeatureSSBS, FeatureSB,
+ FeaturePredRes, FeatureCacheDeepPersist,
+ FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
+ FeatureAltFPCmp];
+ list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeaturePerfMon];
+ list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+ FeatureFullFP16, FeaturePerfMon];
+ list<SubtargetFeature> Falkor = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeaturePerfMon,
+ FeatureRDM];
+ list<SubtargetFeature> NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+ FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
+ FeatureRCPC, FeatureSSBS];
+ list<SubtargetFeature> NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+ FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
+ FeatureRCPC, FeatureSPE, FeatureSSBS];
+ list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
+ FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
+ FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto];
+ list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
+ FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
+ FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
+ FeaturePerfMon, FeatureRandGen, FeatureSPE,
+ FeatureSSBS, FeatureSVE];
+ list<SubtargetFeature> NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
+ FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
+ FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
+ FeaturePerfMon, FeatureRandGen, FeatureSPE,
+ FeatureSSBS, FeatureSVE];
+ list<SubtargetFeature> Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureSPE, FeaturePerfMon];
+ list<SubtargetFeature> ThunderX = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeaturePerfMon, FeatureNEON];
+ list<SubtargetFeature> ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeatureLSE];
+ list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeatureLSE,
+ FeaturePAuth, FeaturePerfMon];
+ list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureSPE,
+ FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+
+ // ETE and TRBE are future architecture extensions. We temporarily enable them
+ // by default for users targeting generic AArch64. The extensions do not
+ // affect code generated by the compiler and can be used only by explicitly
+ // mentioning the new system register names in assembly.
+ list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureETE];
+}
+
+
+def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
+ [FeatureFuseAES, FeaturePostRAScheduler]>;
+def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
+ [TuneA35]>;
+def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53,
+ [TuneA35]>;
+def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53,
+ [TuneA53]>;
+def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
+ [TuneA55]>;
+def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510,
+ [TuneA510]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
+ [TuneA57]>;
+def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
+ [TuneA65]>;
+def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65,
+ [TuneA65]>;
+def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53,
+ [TuneA72]>;
+def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53,
+ [TuneA73]>;
+def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55,
+ [TuneA75]>;
+def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76,
+ [TuneA76]>;
+def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76,
+ [TuneA76]>;
+def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77,
+ [TuneA77]>;
+def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78,
+ [TuneA78]>;
+def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C,
+ [TuneA78C]>;
+def : ProcessorModel<"cortex-a710", CortexA57Model, ProcessorFeatures.A710,
+ [TuneA710]>;
+def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
+ [TuneR82]>;
+def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
+ [TuneX1]>;
+def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2,
+ [TuneX2]>;
+def : ProcessorModel<"neoverse-e1", CortexA53Model,
+ ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
+def : ProcessorModel<"neoverse-n1", CortexA57Model,
+ ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>;
+def : ProcessorModel<"neoverse-n2", CortexA57Model,
+ ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>;
+def : ProcessorModel<"neoverse-512tvb", CortexA57Model,
+ ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>;
+def : ProcessorModel<"neoverse-v1", CortexA57Model,
+ ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
+def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
+ [TuneExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4,
+ [TuneExynosM4]>;
+def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4,
+ [TuneExynosM4]>;
+def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor,
+ [TuneFalkor]>;
+def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira,
+ [TuneSaphira]>;
+def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>;
+
// Cavium ThunderX/ThunderX T8X Processors
-def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
-def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
-def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
-def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
+def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX,
+ [TuneThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel,
+ ProcessorFeatures.ThunderX, [TuneThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel,
+ ProcessorFeatures.ThunderX, [TuneThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel,
+ ProcessorFeatures.ThunderX, [TuneThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
-def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
+ ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>;
// Marvell ThunderX3T110 Processors.
-def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
-def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
+def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
+ ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
+def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
+ [TuneTSV110]>;
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
-def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
+def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
// iPhone and iPad CPUs
-def : ProcessorModel<"apple-a7", CycloneModel, [ProcAppleA7]>;
-def : ProcessorModel<"apple-a8", CycloneModel, [ProcAppleA7]>;
-def : ProcessorModel<"apple-a9", CycloneModel, [ProcAppleA7]>;
-def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
-def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
-def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
-def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
-def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
+def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
+def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
+def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10,
+ [TuneAppleA10]>;
+def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11,
+ [TuneAppleA11]>;
+def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12,
+ [TuneAppleA12]>;
+def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
+ [TuneAppleA13]>;
+def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
+ [TuneAppleA14]>;
// Mac CPUs
-def : ProcessorModel<"apple-m1", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
+ [TuneAppleA14]>;
// watch CPUs.
-def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
-def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
+def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
+ [TuneAppleA12]>;
+def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
+ [TuneAppleA12]>;
// Alias for the latest Apple processor model supported by LLVM.
-def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
+ [TuneAppleA14]>;
// Fujitsu A64FX
-def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
+def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
+ [TuneA64FX]>;
// Nvidia Carmel
-def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
+def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
+ [TuneCarmel]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index e80fe2cada09..7fd51a98ad94 100644
--- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -159,7 +159,7 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
// If there is no non-pseudo in the current block, loop back around and try
// the previous block (if there is one).
while ((FMBB = getBBFallenThrough(FMBB, TII))) {
- for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend()))
+ for (MachineInstr &I : llvm::reverse(*FMBB))
if (!I.isPseudo())
return &I;
}
diff --git a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index c996d2df8c38..cd67e058a9c1 100644
--- a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -377,8 +377,7 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
// processMachineBasicBlock - Main optimzation loop.
bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
if (isProfitableToTransform(MI)) {
transformInstruction(MI);
Changed = true;
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index adefe3b37ee0..9f527a17d390 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -50,9 +50,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
@@ -293,7 +293,7 @@ void AArch64AsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) {
// ;DATA: higher 32 bits of the address of the trampoline
// LDP X0, X30, [SP], #16 ; pop X0 and the link register from the stack
//
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
@@ -653,6 +653,9 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
case 'x':
Reg = getXRegFromWReg(Reg);
break;
+ case 't':
+ Reg = getXRegFromXRegTuple(Reg);
+ break;
}
O << AArch64InstPrinter::getRegisterName(Reg);
@@ -749,6 +752,10 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
AArch64::GPR64allRegClass.contains(Reg))
return printAsmMRegister(MO, 'x', O);
+ // If this is an x register tuple, print an x register.
+ if (AArch64::GPR64x8ClassRegClass.contains(Reg))
+ return printAsmMRegister(MO, 't', O);
+
unsigned AltName = AArch64::NoRegAltName;
const TargetRegisterClass *RegClass;
if (AArch64::ZPRRegClass.contains(Reg)) {
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 4b7ce565eb1e..c90601443934 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -50,9 +50,9 @@ def CC_AArch64_AAPCS : CallingConv<[
// "sret" on argument 1 means instance methods.
CCIfInReg<CCIfType<[i64],
- CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1], [W0, W1]>>>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
- CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
// slot is 64-bit.
@@ -64,14 +64,14 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfNest<CCAssignToReg<[X18]>>,
// Pass SwiftSelf in a callee saved register.
- CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
// A SwiftError is passed in X21.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
// Pass SwiftAsync in an otherwise callee saved register so that it will be
// preserved for normal function calls.
- CCIfSwiftAsync<CCIfType<[i64], CCAssignToRegWithShadow<[X22], [W22]>>>,
+ CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
@@ -90,8 +90,7 @@ def CC_AArch64_AAPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
[X0, X1, X3, X5]>>>,
@@ -99,19 +98,13 @@ def CC_AArch64_AAPCS : CallingConv<[
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
- CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
@@ -131,7 +124,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
@@ -141,21 +134,14 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCBitConvertToType<f128>>>,
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
- CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
@@ -195,49 +181,41 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
// An SRet is passed in X8, not X0 like a normal pointer parameter.
- CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
// Pass SwiftSelf in a callee saved register.
- CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
// A SwiftError is passed in X21.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
// Pass SwiftAsync in an otherwise callee saved register so that it will be
// preserved for normal function calls.
- CCIfSwiftAsync<CCIfType<[i64], CCAssignToRegWithShadow<[X22], [W22]>>>,
+ CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64],
- CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
- [W0, W1, W2, W3, W4, W5, W6]>>>,
+ CCIfSplit<CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6]>>>,
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
- CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
@@ -310,8 +288,8 @@ let Entry = 1 in
def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
+ CCIfType<[i32], CCAssignToReg<[W0]>>,
+ CCIfType<[i64], CCAssignToReg<[X0]>>,
// Pass the remaining arguments on the stack instead.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
@@ -320,14 +298,10 @@ def CC_AArch64_WebKit_JS : CallingConv<[
let Entry = 1 in
def RetCC_AArch64_WebKit_JS : CallingConv<[
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d938008a1e07..d2097f7e6ee3 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -189,6 +189,13 @@ def fold_merge_to_zext : GICombineRule<
(apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }])
>;
+def mutate_anyext_to_zext : GICombineRule<
+ (defs root:$d),
+ (match (wip_match_opcode G_ANYEXT):$d,
+ [{ return matchMutateAnyExtToZExt(*${d}, MRI); }]),
+ (apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -204,7 +211,7 @@ def AArch64PostLegalizerLoweringHelper
def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
[copy_prop, erase_undef_store, combines_for_extload,
- sext_trunc_sextload,
+ sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
extractvecelt_pairwise_add, redundant_or,
@@ -212,6 +219,7 @@ def AArch64PostLegalizerCombinerHelper
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
- constant_fold, identity_combines]> {
+ constant_fold, identity_combines,
+ ptr_add_immed_chain, overlapping_and]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index e90e8e3da057..533ab3b05de9 100644
--- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -295,10 +295,7 @@ bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
bool LocalChange = false;
- for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(),
- E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : MBB.terminators()) {
switch (MI.getOpcode()) {
default:
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index b2eee2845ba9..4c04e04a7d3c 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -937,12 +937,16 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
}
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ MachineFunction &MF = *MBB.getParent();
+ // Try to create new inst without implicit operands added.
+ MachineInstr *NewMI = MF.CreateMachineInstr(
+ TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
+ MBB.insert(MBBI, NewMI);
+ MachineInstrBuilder MIB1(MF, NewMI);
+ MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
@@ -1049,6 +1053,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::MOVaddrEXT: {
// Expand into ADRP + ADD.
Register DstReg = MI.getOperand(0).getReg();
+ assert(DstReg != AArch64::XZR);
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
.add(MI.getOperand(1));
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9acda17b816f..3dc694df509d 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3483,7 +3483,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
- return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
+ return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -3499,7 +3499,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// address spaces.
return false;
- return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memset", II->arg_size() - 1);
}
case Intrinsic::sin:
case Intrinsic::cos:
@@ -3533,10 +3533,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
ArgListTy Args;
- Args.reserve(II->getNumArgOperands());
+ Args.reserve(II->arg_size());
// Populate the argument list.
- for (auto &Arg : II->arg_operands()) {
+ for (auto &Arg : II->args()) {
ArgListEntry Entry;
Entry.Val = Arg;
Entry.Ty = Arg->getType();
@@ -4806,7 +4806,7 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
- !(C.isPowerOf2() || (-C).isPowerOf2()))
+ !(C.isPowerOf2() || C.isNegatedPowerOf2()))
return selectBinaryOp(I, ISD::SDIV);
unsigned Lg2 = C.countTrailingZeros();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f6a528c0e6fd..b630f4f0df5f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1158,11 +1158,33 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
// bits so that is still true.
if (HasFP && AFI->hasSwiftAsyncContext()) {
- // ORR x29, x29, #0x1000_0000_0000_0000
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
- .addUse(AArch64::FP)
- .addImm(0x1100)
- .setMIFlag(MachineInstr::FrameSetup);
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* that can be
+ // combined with the frame pointer to signal an extended frame.
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ AArch64II::MO_GOT);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addUse(AArch64::X16)
+ .addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+
+ case SwiftAsyncFramePointerMode::Always:
+ // ORR x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x1100)
+ .setMIFlag(MachineInstr::FrameSetup);
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
}
// All calls are tail calls in GHC calling conv, and functions have no
@@ -1205,7 +1227,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (!NeedsWinCFI && needsFrameMoves) {
+ if (needsFrameMoves) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
@@ -1631,7 +1653,8 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
// The AUTIASP instruction assembles to a hint instruction before v8.3a so
// this instruction can safely used for any v8a architecture.
// From v8.3a onwards there are optimised authenticate LR and return
- // instructions, namely RETA{A,B}, that can be used instead.
+ // instructions, namely RETA{A,B}, that can be used instead. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
@@ -1643,6 +1666,12 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
MBB, MBBI, DL,
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
+
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
@@ -2472,22 +2501,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
- if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
- // Emit a CFI instruction that causes 8 to be subtracted from the value of
- // x18 when unwinding past this frame.
- static const char CFIInst[] = {
- dwarf::DW_CFA_val_expression,
- 18, // register
- 2, // length
- static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
- static_cast<char>(-8) & 0x7f, // addend (sleb128)
- };
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
- nullptr, StringRef(CFIInst, sizeof(CFIInst))));
- BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
- }
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
+ nullptr, StringRef(CFIInst, sizeof(CFIInst))));
+ BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
// This instruction also makes x18 live-in to the entry block.
MBB.addLiveIn(AArch64::X18);
@@ -2509,9 +2536,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
return true;
}
- for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
- ++RPII) {
- RegPairInfo RPI = *RPII;
+ for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
unsigned StrOpc;
@@ -3512,7 +3537,14 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
return StackOffset::getFixed(MFI.getObjectOffset(FI));
}
- return getFrameIndexReference(MF, FI, FrameReg);
+ // Go to common code if we cannot provide sp + offset.
+ if (MFI.hasVarSizedObjects() ||
+ MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+ MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
+ return getFrameIndexReference(MF, FI, FrameReg);
+
+ FrameReg = AArch64::SP;
+ return getStackOffset(MF, MFI.getObjectOffset(FI));
}
/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index f8adaf36db84..e6d997f91b47 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -67,8 +67,6 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- bool hasSwiftExtendedFrame(const MachineFunction &MF) const;
-
bool assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 17e530a4641d..fe9b2f8883b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -167,7 +167,7 @@ public:
case ISD::SPLAT_VECTOR: {
auto Opnd0 = N->getOperand(0);
if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
- if (CN->isNullValue())
+ if (CN->isZero())
return true;
if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
if (CN->isZero())
@@ -187,7 +187,7 @@ public:
case ISD::SPLAT_VECTOR: {
auto Opnd0 = N->getOperand(0);
if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
- if (CN->isNullValue())
+ if (CN->isZero())
return true;
if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
if (CN->isZero())
@@ -286,7 +286,8 @@ public:
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
- unsigned Opc_rr, unsigned Opc_ri);
+ unsigned Opc_rr, unsigned Opc_ri,
+ bool IsIntr = false);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
@@ -1487,7 +1488,7 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
unsigned Scale, unsigned Opc_ri,
- unsigned Opc_rr) {
+ unsigned Opc_rr, bool IsIntr) {
assert(Scale < 4 && "Invalid scaling value.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
@@ -1497,11 +1498,11 @@ void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
SDValue Base, Offset;
unsigned Opc;
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
- N, Opc_rr, Opc_ri, N->getOperand(2),
+ N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
- SDValue Ops[] = {N->getOperand(1), // Predicate
- Base, // Memory operand
+ SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
+ Base, // Memory operand
Offset, Chain};
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
@@ -2167,7 +2168,7 @@ static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
- (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
+ (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
}
// Look for bits that will be useful for later uses.
@@ -2965,8 +2966,8 @@ static int getIntOperandFromRegisterString(StringRef RegString) {
// form described in getIntOperandsFromRegsterString) or is a named register
// known by the MRS SysReg mapper.
bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
SDLoc DL(N);
int Reg = getIntOperandFromRegisterString(RegString->getString());
@@ -3011,8 +3012,8 @@ bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
// form described in getIntOperandsFromRegsterString) or is a named register
// known by the MSR SysReg mapper.
bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
SDLoc DL(N);
int Reg = getIntOperandFromRegisterString(RegString->getString());
@@ -3152,7 +3153,6 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SD
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
return true;
} else if ((ImmVal & 0xFF) == 0) {
- assert((ImmVal >= -32768) && (ImmVal <= 32512));
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32);
return true;
@@ -3521,7 +3521,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
- if (ConstNode->isNullValue()) {
+ if (ConstNode->isZero()) {
if (VT == MVT::i32) {
SDValue New = CurDAG->getCopyFromReg(
CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
@@ -3895,6 +3895,69 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_ld64b:
SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
return;
+ case Intrinsic::aarch64_sve_ld2_sret: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
+ true);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
+ true);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
+ true);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
+ true);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ld3_sret: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
+ true);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
+ true);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
+ true);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
+ true);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ld4_sret: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
+ true);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
+ true);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
+ true);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
+ true);
+ return;
+ }
+ break;
+ }
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
@@ -4987,6 +5050,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
SDValue &Base,
SDValue &OffImm) {
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
+ const DataLayout &DL = CurDAG->getDataLayout();
+
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+ return true;
+ }
if (MemVT == EVT())
return false;
@@ -5010,6 +5081,11 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
return false;
Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e7282aad05e2..6e9e61c8e7ac 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -246,6 +246,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
+ if (Subtarget->hasLS64()) {
+ addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
+ setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
+ setOperationAction(ISD::STORE, MVT::i64x8, Custom);
+ }
+
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
@@ -779,6 +785,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
+ // Aligned 128-bit loads and stores are single-copy atomic according to the
+ // v8.4a spec.
+ if (Subtarget->hasLSE2()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+ }
+
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
// custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
@@ -882,9 +895,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
- // TODO: Do the same for FP_TO_*INT_SAT.
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+ setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
setTargetDAGCombine(ISD::FDIV);
// Try and combine setcc with csel
@@ -899,6 +913,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
@@ -991,16 +1006,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
+
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -1013,6 +1027,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
@@ -1020,6 +1038,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
@@ -1034,6 +1056,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
+ for (auto VT : {MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ }
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@@ -1260,6 +1288,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
@@ -1447,6 +1476,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
@@ -1502,6 +1533,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}
@@ -1771,6 +1804,11 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
+ case AArch64ISD::ASSERT_ZEXT_BOOL: {
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
@@ -2023,6 +2061,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
+ MAKE_CASE(AArch64ISD::LS64_BUILD)
+ MAKE_CASE(AArch64ISD::LS64_EXTRACT)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
@@ -2160,6 +2200,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
+ MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
}
#undef MAKE_CASE
return nullptr;
@@ -2245,9 +2286,15 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
+ case TargetOpcode::STATEPOINT:
+ // STATEPOINT is a pseudo instruction which has no implicit defs/uses
+ // while bl call instruction (where statepoint will be lowered at the end)
+ // has implicit def. Add this implicit dead def here as a workaround.
+ MI.addOperand(*MI.getMF(), MachineOperand::CreateReg(AArch64::LR, true,
+ true, false, true));
+ LLVM_FALLTHROUGH;
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
- case TargetOpcode::STATEPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
@@ -2285,7 +2332,7 @@ static bool isZerosVector(const SDNode *N) {
auto Opnd0 = N->getOperand(0);
auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
- return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
+ return (CINT && CINT->isZero()) || (CFP && CFP->isZero());
}
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
@@ -2967,9 +3014,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
- if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+ if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
- if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+ if ((CC == ISD::SETNE) ^ RHSC->isZero())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
@@ -3134,14 +3181,14 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
- if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
+ if (CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// If the constants line up, perform the transform!
- if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
+ if (CTVal->isZero() && CFVal->isAllOnes()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
@@ -3364,42 +3411,132 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
return SDValue();
}
+SDValue
+AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // AArch64 FP-to-int conversions saturate to the destination element size, so
+ // we can lower common saturating conversions to simple instructions.
+ SDValue SrcVal = Op.getOperand(0);
+ EVT SrcVT = SrcVal.getValueType();
+ EVT DstVT = Op.getValueType();
+ EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
+ uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
+ uint64_t SatWidth = SatVT.getScalarSizeInBits();
+ assert(SatWidth <= DstElementWidth &&
+ "Saturation width cannot exceed result width");
+
+ // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
+ // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
+ // types, so this is hard to reach.
+ if (DstVT.isScalableVector())
+ return SDValue();
+
+ EVT SrcElementVT = SrcVT.getVectorElementType();
+
+ // In the absence of FP16 support, promote f16 to f32 and saturate the result.
+ if (SrcElementVT == MVT::f16 &&
+ (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
+ MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
+ SrcVT = F32VT;
+ SrcElementVT = MVT::f32;
+ SrcElementWidth = 32;
+ } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
+ SrcElementVT != MVT::f16)
+ return SDValue();
+
+ SDLoc DL(Op);
+ // Cases that we can emit directly.
+ if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
+ return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+ DAG.getValueType(DstVT.getScalarType()));
+
+ // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
+ // result. This is only valid if the legal cvt is larger than the saturate
+ // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
+ // (at least until sqxtn is selected).
+ if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
+ return SDValue();
+
+ EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
+ SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
+ DAG.getValueType(IntVT.getScalarType()));
+ SDValue Sat;
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
+ SDValue MinC = DAG.getConstant(
+ APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
+ SDValue MaxC = DAG.getConstant(
+ APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
+ } else {
+ SDValue MinC = DAG.getConstant(
+ APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
+}
+
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination register size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
-
EVT SrcVT = SrcVal.getValueType();
- EVT DstVT = Op.getValueType();
+ if (SrcVT.isVector())
+ return LowerVectorFP_TO_INT_SAT(Op, DAG);
+
+ EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
uint64_t DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
- // TODO: Support lowering of NEON and SVE conversions.
- if (SrcVT.isVector())
- return SDValue();
-
- // TODO: Saturate to SatWidth explicitly.
- if (SatWidth != DstWidth)
+ // In the absence of FP16 support, promote f16 to f32 and saturate the result.
+ if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
+ SrcVT = MVT::f32;
+ } else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
return SDValue();
- // In the absence of FP16 support, promote f32 to f16, like LowerFP_TO_INT().
- if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
- Op.getOperand(1));
-
+ SDLoc DL(Op);
// Cases that we can emit directly.
if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
- (DstVT == MVT::i64 || DstVT == MVT::i32))
- return Op;
+ DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
+ return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+ DAG.getValueType(DstVT));
+
+ // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
+ // result. This is only valid if the legal cvt is larger than the saturate
+ // width.
+ if (DstWidth < SatWidth)
+ return SDValue();
- // For all other cases, fall back on the expanded form.
- return SDValue();
+ SDValue NativeCvt =
+ DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
+ SDValue Sat;
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
+ SDValue MinC = DAG.getConstant(
+ APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
+ SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
+ SDValue MaxC = DAG.getConstant(
+ APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
+ Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
+ } else {
+ SDValue MinC = DAG.getConstant(
+ APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT);
+ Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
@@ -3938,8 +4075,8 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_ptrue:
- return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
- Op.getOperand(1));
+ return getPTrue(DAG, dl, Op.getValueType(),
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4004,6 +4141,18 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_frecpx:
return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frecpe_x:
+ return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frecps_x:
+ return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_frsqrte_x:
+ return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frsqrts_x:
+ return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_fabs:
return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4153,14 +4302,17 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
if (VT.getVectorElementType() == MVT::i32 &&
- VT.getVectorElementCount().getKnownMinValue() >= 4)
+ VT.getVectorElementCount().getKnownMinValue() >= 4 &&
+ !VT.isFixedLengthVector())
return true;
return false;
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
- return ExtVal.getValueType().isScalableVector();
+ return ExtVal.getValueType().isScalableVector() ||
+ useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
+ /*OverrideNEON=*/true);
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
@@ -4345,9 +4497,17 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
+ IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
+ MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ } else {
+ MemVT = getContainerForFixedLengthVector(DAG, MemVT);
+ IndexVT = MemVT.changeTypeToInteger();
+ }
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
+ Mask = DAG.getNode(
+ ISD::ZERO_EXTEND, DL,
+ VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
}
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
@@ -4442,8 +4602,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
+ IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
+ MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ } else {
+ MemVT = getContainerForFixedLengthVector(DAG, MemVT);
+ IndexVT = MemVT.changeTypeToInteger();
+ }
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
StoreVal =
@@ -4452,6 +4617,9 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
ISD::ANY_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
+ Mask = DAG.getNode(
+ ISD::ZERO_EXTEND, DL,
+ VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
} else if (VT.isFloatingPoint()) {
// Handle FP data by casting the data so an integer scatter can be used.
EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
@@ -4593,29 +4761,77 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
return Result;
}
} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
- assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
- SDValue Lo =
- DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
- DAG.getConstant(0, Dl, MVT::i64));
- SDValue Hi =
- DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
- DAG.getConstant(1, Dl, MVT::i64));
- SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
- {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
- StoreNode->getMemoryVT(), StoreNode->getMemOperand());
- return Result;
+ return LowerStore128(Op, DAG);
+ } else if (MemVT == MVT::i64x8) {
+ SDValue Value = StoreNode->getValue();
+ assert(Value->getValueType(0) == MVT::i64x8);
+ SDValue Chain = StoreNode->getChain();
+ SDValue Base = StoreNode->getBasePtr();
+ EVT PtrVT = Base.getValueType();
+ for (unsigned i = 0; i < 8; i++) {
+ SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
+ Value, DAG.getConstant(i, Dl, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
+ DAG.getConstant(i * 8, Dl, PtrVT));
+ Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
+ StoreNode->getOriginalAlign());
+ }
+ return Chain;
}
return SDValue();
}
-// Custom lowering for extending v4i8 vector loads.
+/// Lower atomic or volatile 128-bit stores to a single STP instruction.
+SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
+ SelectionDAG &DAG) const {
+ MemSDNode *StoreNode = cast<MemSDNode>(Op);
+ assert(StoreNode->getMemoryVT() == MVT::i128);
+ assert(StoreNode->isVolatile() || StoreNode->isAtomic());
+ assert(!StoreNode->isAtomic() ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
+
+ SDValue Value = StoreNode->getOpcode() == ISD::STORE
+ ? StoreNode->getOperand(1)
+ : StoreNode->getOperand(2);
+ SDLoc DL(Op);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
+ DAG.getConstant(1, DL, MVT::i64));
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
+ {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ StoreNode->getMemoryVT(), StoreNode->getMemOperand());
+ return Result;
+}
+
SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
+
+ if (LoadNode->getMemoryVT() == MVT::i64x8) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Base = LoadNode->getBasePtr();
+ SDValue Chain = LoadNode->getChain();
+ EVT PtrVT = Base.getValueType();
+ for (unsigned i = 0; i < 8; i++) {
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
+ DAG.getConstant(i * 8, DL, PtrVT));
+ SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
+ LoadNode->getPointerInfo(),
+ LoadNode->getOriginalAlign());
+ Ops.push_back(Part);
+ Chain = SDValue(Part.getNode(), 1);
+ }
+ SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
+ return DAG.getMergeValues({Loaded, Chain}, DL);
+ }
+
+ // Custom lowering for extending v4i8 vector loads.
EVT VT = Op->getValueType(0);
assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
@@ -4777,17 +4993,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
- /*OverrideNEON=*/true);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
- /*OverrideNEON=*/true);
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
- /*OverrideNEON=*/true);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerMinMax(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -4835,6 +5044,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
/*OverrideNEON=*/true);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::ATOMIC_STORE:
+ if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
+ assert(Subtarget->hasLSE2());
+ return LowerStore128(Op, DAG);
+ }
+ return SDValue();
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::MSTORE:
@@ -5025,8 +5240,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
@@ -5186,10 +5400,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(
- ExtType, DL, VA.getLocVT(), Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- MemVT);
+ ArgValue =
+ DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI), MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
@@ -5229,6 +5442,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
ArgValue, DAG.getValueType(MVT::i32));
+
+ // i1 arguments are zero-extended to i8 by the caller. Emit a
+ // hint to reflect this.
+ if (Ins[i].isOrigArg()) {
+ Argument *OrigArg = MF.getFunction().getArg(Ins[i].getOrigArgIndex());
+ if (OrigArg->getType()->isIntegerTy(1)) {
+ if (!Ins[i].Flags.isZExt()) {
+ ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
+ ArgValue.getValueType(), ArgValue);
+ }
+ }
+ }
+
InVals.push_back(ArgValue);
}
}
@@ -5350,13 +5576,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
- SDValue Store = DAG.getStore(
- Val.getValue(1), DL, Val, FIN,
- IsWin64
- ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
- GPRIdx,
- (i - FirstVariadicGPR) * 8)
- : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ IsWin64 ? MachinePointerInfo::getFixedStack(
+ MF, GPRIdx, (i - FirstVariadicGPR) * 8)
+ : MachinePointerInfo::getStack(MF, i * 8));
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -5383,9 +5607,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store = DAG.getStore(
- Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(MF, i * 16));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
@@ -5645,10 +5868,8 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
- for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
- UE = DAG.getEntryNode().getNode()->use_end();
- U != UE; ++U)
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ for (SDNode *U : DAG.getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
@@ -5670,6 +5891,19 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
}
+// Check if the value is zero-extended from i1 to i8
+static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
+ unsigned SizeInBits = Arg.getValueType().getSizeInBits();
+ if (SizeInBits < 8)
+ return false;
+
+ APInt LowBits(SizeInBits, 0xFF);
+ APInt RequredZero(SizeInBits, 0xFE);
+ KnownBits Bits = DAG.computeKnownBits(Arg, LowBits, 4);
+ bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
+ return ZExtBool;
+}
+
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
/// and add input and output parameter nodes.
SDValue
@@ -5730,8 +5964,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -5868,8 +6101,22 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
case CCValAssign::AExt:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
- Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
- Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
+ //
+ // Check if we actually have to do this, because the value may
+ // already be zero-extended.
+ //
+ // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
+ // and rely on DAGCombiner to fold this, because the following
+ // (anyext i32) is combined with (zext i8) in DAG.getNode:
+ //
+ // (ext (zext x)) -> (zext x)
+ //
+ // This will give us (zext i32), which we cannot remove, so
+ // try to check this beforehand.
+ if (!checkZExtBool(Arg, DAG)) {
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
+ }
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
@@ -5902,14 +6149,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
StoreSize *= NumParts;
}
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
MFI.setStackID(FI, TargetStackID::ScalableVector);
- MachinePointerInfo MPI =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
SDValue SpillSlot = Ptr;
@@ -6004,8 +6250,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
- DstInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
@@ -6015,8 +6260,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
- DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
- LocMemOffset);
+ DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -6196,8 +6440,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
@@ -6274,8 +6517,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
- const MCPhysReg *I =
- TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
if (I) {
for (; *I; ++I) {
if (AArch64::GPR64RegClass.contains(*I))
@@ -6938,6 +7180,30 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
+ if (VT.isScalableVector()) {
+ if (VT != SrcVT)
+ return SDValue();
+
+ // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
+ //
+ // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
+ // maybe useful for copysign operations with mismatched VTs.
+ //
+ // IntVT here is chosen so it's a legal type with the same element width
+ // as the input.
+ EVT IntVT =
+ getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
+ unsigned NumBits = VT.getScalarSizeInBits();
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
+ SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
+ SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
+ getSVESafeBitCast(IntVT, In2, DAG));
+ SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
+ getSVESafeBitCast(IntVT, In1, DAG));
+ SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
+ return getSVESafeBitCast(VT, IntResult, DAG);
+ }
+
if (SrcVT.bitsLT(VT))
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT.bitsGT(VT))
@@ -7083,6 +7349,56 @@ SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
+SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
+ ISD::CondCode CC;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Wrong instruction");
+ case ISD::SMAX:
+ CC = ISD::SETGT;
+ break;
+ case ISD::SMIN:
+ CC = ISD::SETLT;
+ break;
+ case ISD::UMAX:
+ CC = ISD::SETUGT;
+ break;
+ case ISD::UMIN:
+ CC = ISD::SETULT;
+ break;
+ }
+
+ if (VT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Wrong instruction");
+ case ISD::SMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::SMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::UMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::UMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
+ /*OverrideNEON=*/true);
+ }
+ }
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+}
+
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -7255,8 +7571,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
- if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
- CTVal->isOne() && CFVal->isAllOnesValue() &&
+ if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
+ CTVal->isOne() && CFVal->isAllOnes() &&
LHS.getValueType() == TVal.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
@@ -7269,11 +7585,11 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// If both the TVal and the FVal are constants, see if we can swap them in
// order to for a CSINV or CSINC out of them.
- if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
+ if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
- } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
+ } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
@@ -7352,7 +7668,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// FVal, respectively.
ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
- !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
+ !RHSVal->isZero() && !RHSVal->isAllOnes()) {
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
// "a != C ? x : a" to avoid materializing C.
@@ -7425,11 +7741,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {
-
EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);
- if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
+
+ // This will select to an EXT instruction, which has a maximum immediate
+ // value of 255, hence 2048-bits is the maximum value we can lower.
+ if (Idx.sge(-1) && Idx.slt(2048 / Ty.getVectorElementType().getSizeInBits()))
return Op;
+
return SDValue();
}
@@ -7937,10 +8256,12 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
SDValue Operand, SelectionDAG &DAG,
int &ExtraSteps) {
EVT VT = Operand.getValueType();
- if (ST->hasNEON() &&
- (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
- VT == MVT::f32 || VT == MVT::v1f32 ||
- VT == MVT::v2f32 || VT == MVT::v4f32)) {
+ if ((ST->hasNEON() &&
+ (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
+ VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
+ VT == MVT::v4f32)) ||
+ (ST->hasSVE() &&
+ (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
// For the reciprocal estimates, convergence is quadratic, so the number
// of digits is doubled after each iteration. In ARMv8, the accuracy of
@@ -8173,6 +8494,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
case 'r':
if (VT.isScalableVector())
return std::make_pair(0U, nullptr);
+ if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
+ return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
@@ -8260,6 +8583,15 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return Res;
}
+EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
+ llvm::Type *Ty,
+ bool AllowUnknown) const {
+ if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
+ return EVT(MVT::i64x8);
+
+ return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
+}
+
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void AArch64TargetLowering::LowerAsmOperandForConstraint(
@@ -8618,7 +8950,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
Src.WindowBase *= Src.WindowScale;
}
- // Final sanity check before we try to actually produce a shuffle.
+ // Final check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
@@ -9250,8 +9582,11 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
// The lane is incremented by the index of the extract.
// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
- Lane += V.getConstantOperandVal(1);
- V = V.getOperand(0);
+ auto VecVT = V.getOperand(0).getValueType();
+ if (VecVT.isFixedLengthVector() && VecVT.getFixedSizeInBits() <= 128) {
+ Lane += V.getConstantOperandVal(1);
+ V = V.getOperand(0);
+ }
} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
// The lane is decremented if we are splatting from the 2nd operand.
// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
@@ -9265,6 +9600,86 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
}
+// Return true if we can get a new shuffle mask by checking the parameter mask
+// array to test whether every two adjacent mask values are continuous and
+// starting from an even number.
+static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
+ SmallVectorImpl<int> &NewMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
+
+ NewMask.clear();
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ int M0 = M[i];
+ int M1 = M[i + 1];
+
+ // If both elements are undef, new mask is undef too.
+ if (M0 == -1 && M1 == -1) {
+ NewMask.push_back(-1);
+ continue;
+ }
+
+ if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
+ NewMask.push_back(M1 / 2);
+ continue;
+ }
+
+ if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
+ NewMask.push_back(M0 / 2);
+ continue;
+ }
+
+ NewMask.clear();
+ return false;
+ }
+
+ assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
+ return true;
+}
+
+// Try to widen element type to get a new mask value for a better permutation
+// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
+// UZP1/2, TRN1/2, REV, INS, etc.
+// For example:
+// shufflevector <4 x i32> %a, <4 x i32> %b,
+// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
+// is equivalent to:
+// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
+// Finally, we can get:
+// mov v0.d[0], v1.d[1]
+static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT ScalarVT = VT.getVectorElementType();
+ unsigned ElementSize = ScalarVT.getFixedSizeInBits();
+ SDValue V0 = Op.getOperand(0);
+ SDValue V1 = Op.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
+ // We need to make sure the wider element type is legal. Thus, ElementSize
+ // should be not larger than 32 bits, and i1 type should also be excluded.
+ if (ElementSize > 32 || ElementSize == 1)
+ return SDValue();
+
+ SmallVector<int, 8> NewMask;
+ if (isWideTypeMask(Mask, VT, NewMask)) {
+ MVT NewEltVT = VT.isFloatingPoint()
+ ? MVT::getFloatingPointVT(ElementSize * 2)
+ : MVT::getIntegerVT(ElementSize * 2);
+ MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
+ if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
+ V0 = DAG.getBitcast(NewVT, V0);
+ V1 = DAG.getBitcast(NewVT, V1);
+ return DAG.getBitcast(VT,
+ DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9412,6 +9827,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
DstLaneV);
}
+ if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
+ return NewSD;
+
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
@@ -9454,9 +9872,10 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+ if (ConstVal->isZero())
+ return SDValue(DAG.getMachineNode(AArch64::PFALSE, dl, VT), 0);
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
- // TODO: Add special case for constant false
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
@@ -10007,7 +10426,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned BitSize = VT.getVectorElementType().getSizeInBits();
APInt Val(BitSize,
Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
- if (Val.isNullValue() || Val.isAllOnesValue())
+ if (Val.isZero() || Val.isAllOnes())
return Op;
}
}
@@ -10311,8 +10730,29 @@ SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
- if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
- return Op;
+ if (isTypeLegal(Op.getOperand(0).getValueType())) {
+ unsigned NumOperands = Op->getNumOperands();
+ assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
+ "Unexpected number of operands in CONCAT_VECTORS");
+
+ if (NumOperands == 2)
+ return Op;
+
+ // Concat each pair of subvectors and pack into the lower half of the array.
+ SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
+ while (ConcatOps.size() > 1) {
+ for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
+ SDValue V1 = ConcatOps[I];
+ SDValue V2 = ConcatOps[I + 1];
+ EVT SubVT = V1.getValueType();
+ EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ ConcatOps[I / 2] =
+ DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
+ }
+ ConcatOps.resize(ConcatOps.size() / 2);
+ }
+ return ConcatOps[0];
+ }
return SDValue();
}
@@ -10432,6 +10872,10 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
+ // If we don't have legal types yet, do nothing
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
+ return SDValue();
+
if (InVT.isScalableVector()) {
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG))
@@ -10450,6 +10894,18 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
InVT.getSizeInBits() == 128)
return Op;
+ if (useSVEForFixedLengthVectorVT(InVT)) {
+ SDLoc DL(Op);
+
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ SDValue NewInVec =
+ convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+
+ SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
+ NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
+ return convertFromScalableVector(DAG, Op.getValueType(), Splice);
+ }
+
return SDValue();
}
@@ -10465,7 +10921,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
- if (!isTypeLegal(VT) || !VT.isInteger())
+ if (!isTypeLegal(VT))
return SDValue();
SDValue Vec0 = Op.getOperand(0);
@@ -10475,9 +10931,19 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
- // Extend elements of smaller vector...
- EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
- SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+ EVT WideVT;
+ SDValue ExtVec;
+
+ if (VT.isFloatingPoint()) {
+ // The InVT type should be legal. We can safely cast the unpacked
+ // subvector from InVT -> VT.
+ WideVT = VT;
+ ExtVec = getSVESafeBitCast(VT, Vec1, DAG);
+ } else {
+ // Extend elements of smaller vector...
+ WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
+ ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+ }
if (Idx == 0) {
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
@@ -11085,7 +11551,7 @@ setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
// memVT is `NumVecs * VT`.
Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
EC * NumVecs);
- Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
+ Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
Info.flags = MachineMemOperand::MOStore;
@@ -11123,7 +11589,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
@@ -11142,14 +11608,14 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
- for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
- Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ for (const Value *Arg : I.args()) {
+ Type *ArgTy = Arg->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile stores with NEON intrinsics not supported
@@ -11203,9 +11669,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
- Info.flags = MachineMemOperand::MOLoad;
- if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
- Info.flags |= MachineMemOperand::MONonTemporal;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
@@ -11215,9 +11679,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
- Info.flags = MachineMemOperand::MOStore;
- if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
- Info.flags |= MachineMemOperand::MONonTemporal;
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
default:
@@ -11502,7 +11964,7 @@ bool AArch64TargetLowering::shouldSinkOperands(
// can sink them too.
auto Ext1 = cast<Instruction>(I->getOperand(0));
auto Ext2 = cast<Instruction>(I->getOperand(1));
- if (areExtractShuffleVectors(Ext1, Ext2)) {
+ if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
Ops.push_back(&Ext1->getOperandUse(0));
Ops.push_back(&Ext2->getOperandUse(0));
}
@@ -11568,10 +12030,10 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
-unsigned
-AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
- const DataLayout &DL) const {
- return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+unsigned AArch64TargetLowering::getNumInterleavedAccesses(
+ VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
+ unsigned VecSize = UseScalable ? Subtarget->getMinSVEVectorSizeInBits() : 128;
+ return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
}
MachineMemOperand::Flags
@@ -11583,24 +12045,63 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
}
bool AArch64TargetLowering::isLegalInterleavedAccessType(
- VectorType *VecTy, const DataLayout &DL) const {
+ VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
+
+ UseScalable = false;
// Ensure the number of vector elements is greater than 1.
- if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
+ if (NumElements < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
+ if (Subtarget->useSVEForFixedLengthVectors() &&
+ (VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
+ (VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
+ isPowerOf2_32(NumElements) && VecSize > 128))) {
+ UseScalable = true;
+ return true;
+ }
+
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
return VecSize == 64 || VecSize % 128 == 0;
}
+static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) {
+ if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 2);
+
+ if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 4);
+
+ if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 8);
+
+ if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 8);
+
+ if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 2);
+
+ if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 4);
+
+ if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 8);
+
+ if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 16);
+
+ llvm_unreachable("Cannot handle input vector type");
+}
+
/// Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -11628,10 +12129,12 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
+ bool UseScalable;
+ if (!Subtarget->hasNEON() ||
+ !isLegalInterleavedAccessType(VTy, DL, UseScalable))
return false;
- unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
+ unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
auto *FVTy = cast<FixedVectorType>(VTy);
@@ -11642,48 +12145,84 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
FVTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ FVTy = FixedVectorType::get(FVTy->getElementType(),
+ FVTy->getNumElements() / NumLoads);
+
+ auto *LDVTy =
+ UseScalable ? cast<VectorType>(getSVEContainerIRType(FVTy)) : FVTy;
+
IRBuilder<> Builder(LI);
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
if (NumLoads > 1) {
- // If we're going to generate more than one load, reset the sub-vector type
- // to something legal.
- FVTy = FixedVectorType::get(FVTy->getElementType(),
- FVTy->getNumElements() / NumLoads);
-
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
- FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
- }
-
- Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
- Type *Tys[2] = {FVTy, PtrTy};
- static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
- Intrinsic::aarch64_neon_ld3,
- Intrinsic::aarch64_neon_ld4};
- Function *LdNFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+ LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
+ }
+
+ Type *PtrTy =
+ UseScalable
+ ? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
+ : LDVTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
+ LDVTy->getElementCount());
+
+ static const Intrinsic::ID SVELoadIntrs[3] = {
+ Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
+ Intrinsic::aarch64_sve_ld4_sret};
+ static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
+ Intrinsic::aarch64_neon_ld3,
+ Intrinsic::aarch64_neon_ld4};
+ Function *LdNFunc;
+ if (UseScalable)
+ LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
+ SVELoadIntrs[Factor - 2], {LDVTy});
+ else
+ LdNFunc = Intrinsic::getDeclaration(
+ LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
// replace.
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
+ Value *PTrue = nullptr;
+ if (UseScalable) {
+ unsigned PgPattern =
+ getSVEPredPatternFromNumElements(FVTy->getNumElements());
+ if (Subtarget->getMinSVEVectorSizeInBits() ==
+ Subtarget->getMaxSVEVectorSizeInBits() &&
+ Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
+ PgPattern = AArch64SVEPredPattern::all;
+
+ auto *PTruePat =
+ ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), PgPattern);
+ PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
+ {PTruePat});
+ }
+
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
+ BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
FVTy->getNumElements() * Factor);
- CallInst *LdN = Builder.CreateCall(
- LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
+ CallInst *LdN;
+ if (UseScalable)
+ LdN = Builder.CreateCall(
+ LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
+ else
+ LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
+ "ldN");
// Extract and store the sub-vectors returned by the load intrinsic.
for (unsigned i = 0; i < Shuffles.size(); i++) {
@@ -11692,11 +12231,17 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+ if (UseScalable)
+ SubVec = Builder.CreateExtractVector(
+ FVTy, SubVec,
+ ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
+
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
FVTy->getNumElements()));
+
SubVecs[SVI].push_back(SubVec);
}
}
@@ -11755,14 +12300,16 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
+ bool UseScalable;
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
+ if (!Subtarget->hasNEON() ||
+ !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
return false;
- unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
@@ -11783,15 +12330,18 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
+
+ auto *STVTy = UseScalable ? cast<VectorType>(getSVEContainerIRType(SubVecTy))
+ : SubVecTy;
+
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
if (NumStores > 1) {
- // If we're going to generate more than one store, reset the lane length
- // and sub-vector type to something legal.
- LaneLen /= NumStores;
- SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
-
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
@@ -11802,13 +12352,42 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
auto Mask = SVI->getShuffleMask();
- Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
- Type *Tys[2] = {SubVecTy, PtrTy};
- static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
- Intrinsic::aarch64_neon_st3,
- Intrinsic::aarch64_neon_st4};
- Function *StNFunc =
- Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+ Type *PtrTy =
+ UseScalable
+ ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
+ : STVTy->getPointerTo(SI->getPointerAddressSpace());
+ Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
+ STVTy->getElementCount());
+
+ static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
+ Intrinsic::aarch64_sve_st3,
+ Intrinsic::aarch64_sve_st4};
+ static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
+ Intrinsic::aarch64_neon_st3,
+ Intrinsic::aarch64_neon_st4};
+ Function *StNFunc;
+ if (UseScalable)
+ StNFunc = Intrinsic::getDeclaration(SI->getModule(),
+ SVEStoreIntrs[Factor - 2], {STVTy});
+ else
+ StNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
+
+ Value *PTrue = nullptr;
+ if (UseScalable) {
+ unsigned PgPattern =
+ getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
+ if (Subtarget->getMinSVEVectorSizeInBits() ==
+ Subtarget->getMaxSVEVectorSizeInBits() &&
+ Subtarget->getMinSVEVectorSizeInBits() ==
+ DL.getTypeSizeInBits(SubVecTy))
+ PgPattern = AArch64SVEPredPattern::all;
+
+ auto *PTruePat =
+ ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), PgPattern);
+ PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
+ {PTruePat});
+ }
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
@@ -11816,10 +12395,11 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// Split the shufflevector operands into sub vectors for the new stN call.
for (unsigned i = 0; i < Factor; i++) {
+ Value *Shuffle;
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
+ Shuffle = Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
@@ -11834,11 +12414,21 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
+ Shuffle = Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(StartMask, LaneLen, 0));
}
+
+ if (UseScalable)
+ Shuffle = Builder.CreateInsertVector(
+ STVTy, UndefValue::get(STVTy), Shuffle,
+ ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
+
+ Ops.push_back(Shuffle);
}
+ if (UseScalable)
+ Ops.push_back(PTrue);
+
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
@@ -11905,8 +12495,7 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- bool CanImplicitFloat =
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
@@ -11923,8 +12512,8 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
- AlignmentIsAcceptable(MVT::v2i64, Align(16)))
- return MVT::v2i64;
+ AlignmentIsAcceptable(MVT::v16i8, Align(16)))
+ return MVT::v16i8;
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return MVT::f128;
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
@@ -11936,8 +12525,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
LLT AArch64TargetLowering::getOptimalMemOpLLT(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- bool CanImplicitFloat =
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
@@ -11981,6 +12569,33 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
return IsLegal;
}
+// Return false to prevent folding
+// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
+// if the folding leads to worse code.
+bool AArch64TargetLowering::isMulAddWithConstProfitable(
+ const SDValue &AddNode, const SDValue &ConstNode) const {
+ // Let the DAGCombiner decide for vector types and large types.
+ const EVT VT = AddNode.getValueType();
+ if (VT.isVector() || VT.getScalarSizeInBits() > 64)
+ return true;
+
+ // It is worse if c1 is legal add immediate, while c1*c2 is not
+ // and has to be composed by at least two instructions.
+ const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
+ const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
+ const int64_t C1 = C1Node->getSExtValue();
+ const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
+ if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
+ return true;
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
+ if (Insn.size() > 1)
+ return false;
+
+ // Default to true and let the DAGCombiner decide.
+ return true;
+}
+
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
// immediates is the same as for an add or a sub.
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
@@ -12100,7 +12715,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
bool AArch64TargetLowering::generateFMAsInMachineCombiner(
EVT VT, CodeGenOpt::Level OptLevel) const {
- return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
+ return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
+ !useSVEForFixedLengthVectorVT(VT);
}
const MCPhysReg *
@@ -12348,7 +12964,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if ((VT != MVT::i32 && VT != MVT::i64) ||
- !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
SDLoc DL(N);
@@ -12505,7 +13121,7 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
DAG.getConstant(0, DL, MVT::i64));
- std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
+ std::vector<int> ShuffleMask(TargetType.getVectorNumElements());
SDValue VectorShuffleNode =
DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
@@ -12547,12 +13163,44 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
+ // and in MachineCombiner pass, add+mul will be combined into madd.
+ // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue MulOper;
+ unsigned AddSubOpc;
+
+ auto IsAddSubWith1 = [&](SDValue V) -> bool {
+ AddSubOpc = V->getOpcode();
+ if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
+ SDValue Opnd = V->getOperand(1);
+ MulOper = V->getOperand(0);
+ if (AddSubOpc == ISD::SUB)
+ std::swap(Opnd, MulOper);
+ if (auto C = dyn_cast<ConstantSDNode>(Opnd))
+ return C->isOne();
+ }
+ return false;
+ };
+
+ if (IsAddSubWith1(N0)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
+ }
+
+ if (IsAddSubWith1(N1)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
+ }
+
// The below optimizations require a constant RHS.
- if (!isa<ConstantSDNode>(N->getOperand(1)))
+ if (!isa<ConstantSDNode>(N1))
return SDValue();
- SDValue N0 = N->getOperand(0);
- ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
// Allow the scaling to be folded into the `cnt` instruction by preventing
@@ -12593,7 +13241,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// and shift+add+shift.
APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
- unsigned ShiftAmt, AddSubOpc;
+ unsigned ShiftAmt;
// Is the shifted value the LHS operand of the add/sub?
bool ShiftValUseIsN0 = true;
// Do we need to negate the result?
@@ -12630,8 +13278,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(ShiftAmt, DL, MVT::i64));
@@ -12757,7 +13403,8 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
- if (FloatBits != 32 && FloatBits != 64)
+ if (FloatBits != 32 && FloatBits != 64 &&
+ (FloatBits != 16 || !Subtarget->hasFullFP16()))
return SDValue();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
@@ -12776,27 +13423,20 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
if (C == -1 || C == 0 || C > Bits)
return SDValue();
- MVT ResTy;
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
- switch (NumLanes) {
- default:
- return SDValue();
- case 2:
- ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
- break;
- case 4:
- ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
- break;
- }
-
- if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
+ EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
return SDValue();
- assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
- "Illegal vector type after legalization");
+ if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT) {
+ EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (SatVT.getScalarSizeInBits() != IntBits)
+ return SDValue();
+ }
SDLoc DL(N);
- bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
@@ -13097,6 +13737,9 @@ static SDValue performSVEAndCombine(SDNode *N,
SDLoc DL(N);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
+ if (!C)
+ return SDValue();
+
uint64_t ExtVal = C->getZExtValue();
// If the mask is fully covered by the unpack, we don't need to push
@@ -13289,7 +13932,7 @@ performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
APInt CAsAPInt(ElemSizeInBits, C);
- if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
+ if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits))
return SDValue();
ExtendOpA = Xor.getOperand(0);
@@ -13475,7 +14118,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N0 == N1 && VT.getVectorNumElements() == 2) {
+ if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getScalarSizeInBits() == 64);
return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, dl, MVT::i64));
@@ -13490,7 +14133,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- if (N1Opc != ISD::BITCAST)
+ if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
return SDValue();
SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
@@ -13509,6 +14152,48 @@ static SDValue performConcatVectorsCombine(SDNode *N,
RHS));
}
+static SDValue
+performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ uint64_t IdxVal = N->getConstantOperandVal(2);
+ EVT VecVT = Vec.getValueType();
+ EVT SubVT = SubVec.getValueType();
+
+ // Only do this for legal fixed vector types.
+ if (!VecVT.isFixedLengthVector() ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+
+ // Ignore widening patterns.
+ if (IdxVal == 0 && Vec.isUndef())
+ return SDValue();
+
+ // Subvector must be half the width and an "aligned" insertion.
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
+ (IdxVal != 0 && IdxVal != NumSubElts))
+ return SDValue();
+
+ // Fold insert_subvector -> concat_vectors
+ // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
+ // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ if (IdxVal == 0) {
+ Lo = SubVec;
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DAG.getVectorIdxConstant(NumSubElts, DL));
+ } else {
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi = SubVec;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
+}
+
static SDValue tryCombineFixedPointConvert(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -13611,6 +14296,8 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) {
N = N.getOperand(0);
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
+ if (N.getOperand(0).getValueType().isScalableVector())
+ return false;
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
@@ -13687,7 +14374,7 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
SetCCInfo.Info.AArch64.CC =
AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
}
- return TValue->isOne() && FValue->isNullValue();
+ return TValue->isOne() && FValue->isZero();
}
// Returns true if Op is setcc or zext of setcc.
@@ -13765,7 +14452,7 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
- if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
+ if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
return SDValue();
SDValue Op1 = LHS->getOperand(0);
@@ -14237,20 +14924,20 @@ static bool isAllActivePredicate(SDValue N) {
// or unpredicated operation, which potentially allows better isel (perhaps
// using immediate forms) or relaxing register reuse requirements.
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
- SelectionDAG &DAG,
- bool UnpredOp = false) {
+ SelectionDAG &DAG, bool UnpredOp = false,
+ bool SwapOperands = false) {
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
SDValue Pg = N->getOperand(1);
+ SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
+ SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
// ISD way to specify an all active predicate.
if (isAllActivePredicate(Pg)) {
if (UnpredOp)
- return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
- N->getOperand(3));
- else
- return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
- N->getOperand(2), N->getOperand(3));
+ return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
+
+ return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
}
// FUTURE: SplatVector(true)
@@ -14372,6 +15059,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
case Intrinsic::aarch64_sve_sub:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
+ case Intrinsic::aarch64_sve_subr:
+ return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
case Intrinsic::aarch64_sve_and:
return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
case Intrinsic::aarch64_sve_bic:
@@ -14927,6 +15616,18 @@ static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
+ N->getOpcode() == AArch64ISD::UUNPKLO) &&
+ "Unexpected Opcode!");
+
+ // uunpklo/hi undef -> undef
+ if (N->getOperand(0).isUndef())
+ return DAG.getUNDEF(N->getValueType(0));
+
+ return SDValue();
+}
+
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
@@ -15169,11 +15870,10 @@ static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
ExtOpCode != ISD::ANY_EXTEND)
return SDValue();
SDValue Orig = Ext->getOperand(0);
- if (Store->getMemoryVT() != Orig->getValueType(0))
+ if (Store->getMemoryVT() != Orig.getValueType())
return SDValue();
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
- Store->getBasePtr(), Store->getPointerInfo(),
- Store->getAlign());
+ Store->getBasePtr(), Store->getMemOperand());
}
return SDValue();
@@ -15844,7 +16544,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
MVT::v2i32, MVT::v4i32, MVT::v2i64}),
VT.getSimpleVT().SimpleTy) &&
ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
- SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
+ SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(
@@ -16544,6 +17244,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
@@ -16565,6 +17267,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
+ case ISD::INSERT_SUBVECTOR:
+ return performInsertSubvectorCombine(N, DCI, DAG);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::VSELECT:
@@ -16592,6 +17296,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
return performSpliceCombine(N, DAG);
+ case AArch64ISD::UUNPKLO:
+ case AArch64ISD::UUNPKHI:
+ return performUnpackCombine(N, DAG);
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
@@ -17212,18 +17919,22 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
assert(SDValue(N, 0).getValueType() == MVT::i128 &&
"unexpected load's value type");
- LoadSDNode *LoadNode = cast<LoadSDNode>(N);
- if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
- // Non-volatile loads are optimized later in AArch64's load/store
+ MemSDNode *LoadNode = cast<MemSDNode>(N);
+ if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
+ LoadNode->getMemoryVT() != MVT::i128) {
+ // Non-volatile or atomic loads are optimized later in AArch64's load/store
// optimizer.
return;
}
@@ -17314,12 +18025,37 @@ AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+// In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
+// provided the address is 16-byte aligned.
+bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
+ if (!Subtarget->hasLSE2())
+ return false;
+
+ if (auto LI = dyn_cast<LoadInst>(I))
+ return LI->getType()->getPrimitiveSizeInBits() == 128 &&
+ LI->getAlignment() >= 16;
+
+ if (auto SI = dyn_cast<StoreInst>(I))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
+ SI->getAlignment() >= 16;
+
+ return false;
+}
+
+bool AArch64TargetLowering::shouldInsertFencesForAtomic(
+ const Instruction *I) const {
+ return isOpSuitableForLDPSTP(I);
+}
+
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- return Size == 128;
+ if (Size != 128)
+ return false;
+
+ return !isOpSuitableForLDPSTP(SI);
}
// Loads and stores less than 128-bits are already atomic; ones above that
@@ -17328,7 +18064,19 @@ bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
+
+ if (Size != 128 || isOpSuitableForLDPSTP(LI))
+ return AtomicExpansionKind::None;
+
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on the
+ // stack and close enough to the spill slot, this can lead to a situation
+ // where the monitor always gets cleared and the atomic operation can never
+ // succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+
+ return AtomicExpansionKind::LLSC;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
@@ -17531,7 +18279,7 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::Win64);
- F->addAttribute(1, Attribute::AttrKind::InReg);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
return;
}
@@ -17657,7 +18405,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
+ bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
return OptSize && !VT.isVector();
}
@@ -17759,42 +18507,20 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
- int PgPattern;
- switch (VT.getVectorNumElements()) {
- default:
- llvm_unreachable("unexpected element count for SVE predicate");
- case 1:
- PgPattern = AArch64SVEPredPattern::vl1;
- break;
- case 2:
- PgPattern = AArch64SVEPredPattern::vl2;
- break;
- case 4:
- PgPattern = AArch64SVEPredPattern::vl4;
- break;
- case 8:
- PgPattern = AArch64SVEPredPattern::vl8;
- break;
- case 16:
- PgPattern = AArch64SVEPredPattern::vl16;
- break;
- case 32:
- PgPattern = AArch64SVEPredPattern::vl32;
- break;
- case 64:
- PgPattern = AArch64SVEPredPattern::vl64;
- break;
- case 128:
- PgPattern = AArch64SVEPredPattern::vl128;
- break;
- case 256:
- PgPattern = AArch64SVEPredPattern::vl256;
- break;
- }
+ unsigned PgPattern =
+ getSVEPredPatternFromNumElements(VT.getVectorNumElements());
+ assert(PgPattern && "Unexpected element count for SVE predicate");
- // TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
- // use AArch64SVEPredPattern::all, which can enable the use of unpredicated
+ // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
+ // AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.
+ const auto &Subtarget =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+ unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+ if (MaxSVESize && MinSVESize == MaxSVESize &&
+ MaxSVESize == VT.getSizeInBits())
+ PgPattern = AArch64SVEPredPattern::all;
MVT MaskVT;
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
@@ -17817,8 +18543,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
break;
}
- return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
- DAG.getTargetConstant(PgPattern, DL, MVT::i64));
+ return getPTrue(DAG, DL, MaskVT, PgPattern);
}
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
@@ -17898,9 +18623,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<MaskedLoadSDNode>(Op);
- if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
- return SDValue();
-
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 386e1c2d8400..392e22b68366 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -330,6 +330,10 @@ enum NodeType : unsigned {
// Cast between vectors of the same element type but differ in length.
REINTERPRET_CAST,
+ // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
+ LS64_BUILD,
+ LS64_EXTRACT,
+
LD1_MERGE_ZERO,
LD1S_MERGE_ZERO,
LDNF1_MERGE_ZERO,
@@ -401,6 +405,10 @@ enum NodeType : unsigned {
SSTNT1_PRED,
SSTNT1_INDEX_PRED,
+ // Asserts that a function argument (i32) is zero-extended to i8 by
+ // the caller
+ ASSERT_ZEXT_BOOL,
+
// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,
@@ -591,6 +599,9 @@ public:
bool isLegalAddImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
+ bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const override;
+
bool shouldConsiderGEPOffsetSplit() const override;
EVT getOptimalMemOpType(const MemOp &Op,
@@ -653,6 +664,9 @@ public:
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
+ bool isOpSuitableForLDPSTP(const Instruction *I) const;
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override;
+
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
@@ -697,12 +711,11 @@ public:
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const override {
+ const MachineFunction &MF) const override {
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
- bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
+ bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
if (NoFloat)
return (MemVT.getSizeInBits() <= 64);
@@ -730,7 +743,9 @@ public:
if (!VT.isVector())
return hasAndNotCompare(Y);
- return VT.getSizeInBits() >= 64; // vector 'bic'
+ TypeSize TS = VT.getSizeInBits();
+ // TODO: We should be able to use bic/bif too for SVE.
+ return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
}
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
@@ -786,13 +801,13 @@ public:
/// Returns true if \p VecTy is a legal interleaved access type. This
/// function checks the vector element type and the overall width of the
/// vector.
- bool isLegalInterleavedAccessType(VectorType *VecTy,
- const DataLayout &DL) const;
+ bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
+ bool &UseScalable) const;
/// Returns the number of interleaved accesses that will be generated when
/// lowering accesses of the given type.
- unsigned getNumInterleavedAccesses(VectorType *VecTy,
- const DataLayout &DL) const;
+ unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
+ bool UseScalable) const;
MachineMemOperand::Flags getTargetMMOFlags(
const Instruction &I) const override;
@@ -824,6 +839,9 @@ public:
bool isAllActivePredicate(SDValue N) const;
EVT getPromotedVTForPredicate(EVT VT) const;
+ EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
+ bool AllowUnknown = false) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -854,6 +872,7 @@ private:
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
@@ -959,10 +978,12 @@ private:
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 9bc2539e95f0..cd4bc8a61a8a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -721,6 +721,7 @@ def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm1_64Operand;
}
+def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_3Operand : AsmImmRange<0, 3>;
def Imm0_7Operand : AsmImmRange<0, 7>;
@@ -845,13 +846,13 @@ def logical_imm64_not : Operand<i64> {
let ParserMatchClass = LogicalImm64NotOperand;
}
-// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
+// immXX_0_65535 predicates - True if the immediate is in the range [0,65535].
let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
-def i32_imm0_65535 : Operand<i32>, TImmLeaf<i32, [{
+def timm32_0_65535 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536;
}]>;
-def i64_imm0_65535 : Operand<i64>, TImmLeaf<i64, [{
+def timm64_0_65535 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 65536;
}]>;
}
@@ -955,8 +956,8 @@ def imm0_3 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_3Operand;
}
-// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
-def imm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
+// timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
+def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 8;
}]> {
let ParserMatchClass = Imm0_7Operand;
@@ -1215,6 +1216,18 @@ def fpimm0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+0.0);
}]>;
+def fpimm_half : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+0.5);
+}]>;
+
+def fpimm_one : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+1.0);
+}]>;
+
+def fpimm_two : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+2.0);
+}]>;
+
def gi_fpimm16 : GICustomOperandRenderer<"renderFPImm16">,
GISDNodeXFormEquiv<fpimm16XForm>;
def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">,
@@ -1241,12 +1254,15 @@ multiclass VectorIndex<ValueType ty, AsmOperandClass mc, code pred> {
def _timm : AsmVectorIndexOpnd<ty, mc>, TImmLeaf<ty, pred>;
}
+def VectorIndex0Operand : AsmVectorIndex<0, 0>;
def VectorIndex1Operand : AsmVectorIndex<1, 1>;
def VectorIndexBOperand : AsmVectorIndex<0, 15>;
def VectorIndexHOperand : AsmVectorIndex<0, 7>;
def VectorIndexSOperand : AsmVectorIndex<0, 3>;
def VectorIndexDOperand : AsmVectorIndex<0, 1>;
+defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
+ [{ return ((uint64_t)Imm) == 0; }]>;
defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
[{ return ((uint64_t)Imm) == 1; }]>;
defm VectorIndexB : VectorIndex<i64, VectorIndexBOperand,
@@ -1291,6 +1307,37 @@ defm sve_elm_idx_extdup_q
: VectorIndex<i64, SVEVectorIndexExtDupQOperand,
[{ return ((uint64_t)Imm) < 4; }]>;
+def sme_elm_idx0_0 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) == 0;
+}]> {
+ let ParserMatchClass = Imm0_0Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_1 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 1;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_3 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 3;
+}]> {
+ let ParserMatchClass = Imm0_3Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_7 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 7;
+}]> {
+ let ParserMatchClass = Imm0_7Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_15 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 15;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+
// 8-bit immediate for AdvSIMD where 64-bit values of the form:
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
// are encoded as the eight bit value 'abcdefgh'.
@@ -1379,7 +1426,7 @@ class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
// System instructions for exit from transactions
class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
- : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>,
+ : I<(outs), (ins timm64_0_65535:$imm), asm, "\t$imm", "", pattern>,
Sched<[WriteSys]> {
bits<16> imm;
let Inst{31-24} = 0b11010100;
@@ -1703,7 +1750,7 @@ class AuthReturn<bits<3> op, bits<1> M, string asm>
let mayLoad = 1 in
class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
- string operands, string cstr, Operand opr>
+ string operands, string cstr>
: I<oops, iops, asm, operands, cstr, []>, Sched<[]> {
bits<10> offset;
bits<5> Rn;
@@ -1725,11 +1772,11 @@ class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
multiclass AuthLoad<bit M, string asm, Operand opr> {
def indexed : BaseAuthLoad<M, 0, (outs GPR64:$Rt),
(ins GPR64sp:$Rn, opr:$offset),
- asm, "\t$Rt, [$Rn, $offset]", "", opr>;
+ asm, "\t$Rt, [$Rn, $offset]", "">;
def writeback : BaseAuthLoad<M, 1, (outs GPR64sp:$wback, GPR64:$Rt),
(ins GPR64sp:$Rn, opr:$offset),
asm, "\t$Rt, [$Rn, $offset]!",
- "$Rn = $wback,@earlyclobber $wback", opr>;
+ "$Rn = $wback,@earlyclobber $wback">;
def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>;
@@ -1965,10 +2012,10 @@ class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
let Inst{31} = 1;
}
-class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm>
- : I<(outs GPR64:$Rd), (ins GPR64:$src, GPR64sp:$Rn), asm, "\t$Rd, $Rn",
- "$Rd = $src",
- []>,
+class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm,
+ SDPatternOperator op>
+ : I<(outs GPR64:$dst), (ins GPR64:$Rd, GPR64sp:$Rn), asm, "\t$Rd, $Rn",
+ "$dst = $Rd", [(set GPR64:$dst, (op GPR64:$Rd, opcode, GPR64sp:$Rn))]>,
Sched<[WriteI, ReadI]> {
bits<5> Rd;
bits<5> Rn;
@@ -1979,9 +2026,11 @@ class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm>
let Inst{4-0} = Rd;
}
-class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm>
- : I<(outs GPR64:$Rd), (ins GPR64:$src), asm, "\t$Rd", "$Rd = $src",
- []>, Sched<[]> {
+class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm,
+ SDPatternOperator op>
+ : I<(outs GPR64:$dst), (ins GPR64:$Rd), asm, "\t$Rd", "$dst = $Rd",
+ [(set GPR64:$dst, (op GPR64:$Rd, opcode, (i64 0)))]>,
+ Sched<[]> {
bits<5> Rd;
let Inst{31-15} = 0b11011010110000010;
let Inst{14-12} = opcode_prefix;
@@ -2193,16 +2242,14 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
let Inst{4-0} = Rd;
}
-multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
+multiclass MulAccum<bit isSub, string asm> {
// MADD/MSUB generation is decided by MachineCombiner.cpp
- def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
- [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>,
+ def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm, []>,
Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
- def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
- [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>,
+ def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm, []>,
Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -3421,8 +3468,8 @@ def ro64 : ROAddrMode<ro_Windexed64, ro_Xindexed64, ro_Wextend64, ro_Xextend64>;
def ro128 : ROAddrMode<ro_Windexed128, ro_Xindexed128, ro_Wextend128,
ro_Xextend128>;
-class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3450,7 +3497,7 @@ class ROInstAlias<string asm, DAGOperand regtype, Instruction INST>
multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10 in
- def roW : LoadStore8RO<sz, V, opc, regtype, asm,
+ def roW : LoadStore8RO<sz, V, opc, asm,
(outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
[(set (Ty regtype:$Rt),
@@ -3461,7 +3508,7 @@ multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10 in
- def roX : LoadStore8RO<sz, V, opc, regtype, asm,
+ def roX : LoadStore8RO<sz, V, opc, asm,
(outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
[(set (Ty regtype:$Rt),
@@ -3477,30 +3524,30 @@ multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10 in
- def roW : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore8RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend8:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10 in
- def roX : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore8RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend8:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3524,7 +3571,7 @@ class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10 in
- def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore16RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
@@ -3534,7 +3581,7 @@ multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10 in
- def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore16RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
@@ -3549,30 +3596,30 @@ multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10 in
- def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore16RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10 in
- def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore16RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3596,7 +3643,7 @@ class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10 in
- def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore32RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
@@ -3606,7 +3653,7 @@ multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10 in
- def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore32RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
@@ -3621,30 +3668,30 @@ multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10 in
- def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore32RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend32:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10 in
- def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore32RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend32:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3668,7 +3715,7 @@ class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore64RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
@@ -3678,7 +3725,7 @@ multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore64RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
@@ -3693,30 +3740,30 @@ multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore64RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore64RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3740,7 +3787,7 @@ class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore128RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
@@ -3750,7 +3797,7 @@ multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore128RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
@@ -3763,20 +3810,20 @@ multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, ValueType Ty, SDPatternOperator storeop> {
+ string asm> {
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore128RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
[]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore128RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
[]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
@@ -4466,7 +4513,7 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
- : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>,
+ : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", []>,
Sched<[WriteSys]> {
bits<16> imm;
let Inst{31-24} = 0b11010100;
@@ -5309,7 +5356,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -5332,7 +5379,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -5351,7 +5398,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern>
: Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>,
- Sched<[WriteV]>;
+ Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]>;
multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> {
def v8i8 : BaseSIMDThreeSameVectorPseudo<V64,
@@ -5704,7 +5751,7 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -5729,7 +5776,7 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -5775,7 +5822,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
: I<(outs V128:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
"|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6032,7 +6079,7 @@ multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
[(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
}
-
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand inreg, RegisterOperand outreg,
string asm, string outkind, string inkind,
@@ -6040,7 +6087,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind #
"|" # outkind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6055,6 +6102,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand inreg, RegisterOperand outreg,
string asm, string outkind, string inkind,
@@ -6062,7 +6110,7 @@ class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind #
"|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6114,7 +6162,7 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
[(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6212,7 +6260,7 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
list<dag> pattern>
: I<(outs outtype:$Rd), (ins intype:$Rn), asm,
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6227,13 +6275,14 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand outtype, RegisterOperand intype,
string asm, string VdTy, string VnTy,
list<dag> pattern>
: I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6296,7 +6345,7 @@ class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
: I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
"|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6322,7 +6371,7 @@ class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
: I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
"|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6662,7 +6711,7 @@ class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
"|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
[(set (vty regtype:$Rd),
(AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
- Sched<[WriteV]> {
+ Sched<[!if(size, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6696,7 +6745,7 @@ class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm}", "",
[(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
- Sched<[WriteV]> {
+ Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6752,7 +6801,7 @@ class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
"\t$Rd, $Rn, $Rm", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6772,7 +6821,7 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
dag oops, dag iops, string asm,
list<dag> pattern>
: I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6815,8 +6864,7 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
}
-multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm> {
def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
(ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm),
asm, []>;
@@ -6826,16 +6874,19 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
}
multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
+ SDPatternOperator OpNode = null_frag,
+ Predicate pred = HasNEON> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ let Predicates = [pred] in {
def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- let Predicates = [HasNEON, HasFullFP16] in {
+ }
+ let Predicates = [pred, HasFullFP16] in {
def NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
[(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]>;
- } // Predicates = [HasNEON, HasFullFP16]
+ }
}
def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -6863,7 +6914,7 @@ class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
dag oops, dag iops, string asm, string cstr, list<dag> pat>
: I<oops, iops, asm,
"\t$Rd, $Rn, $Rm", cstr, pat>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6916,7 +6967,7 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
"\t$Rd, $Rn", "", pat>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -6938,7 +6989,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
string asm, list<dag> pat>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
"\t$Rd, $Rn", "$Rd = $dst", pat>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -6958,7 +7009,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, string asm, string zero>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"\t$Rd, $Rn, #" # zero, "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -6977,7 +7028,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
: I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
[(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-17} = 0b011111100110000;
@@ -7025,10 +7076,13 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
(!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
}
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
+ Predicate pred = HasNEON> {
+ let Predicates = [pred] in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
- let Predicates = [HasNEON, HasFullFP16] in {
+ }
+ let Predicates = [pred, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
}
}
@@ -7096,7 +7150,7 @@ class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
string asm, string kind>
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -7136,7 +7190,7 @@ class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
string asm, string kind, list<dag> pattern>
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -7202,7 +7256,7 @@ multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
string operands, string constraints, list<dag> pattern>
: I<outs, ins, asm, operands, constraints, pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -7228,7 +7282,7 @@ class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
class SIMDDupFromElement<bit Q, string dstkind, string srckind,
ValueType vectype, ValueType insreg,
RegisterOperand vecreg, Operand idxtype,
- ValueType elttype, SDNode OpNode>
+ SDNode OpNode>
: BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup",
"{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" #
"|" # dstkind # "\t$Rd, $Rn$idx}", "",
@@ -7239,7 +7293,7 @@ class SIMDDupFromElement<bit Q, string dstkind, string srckind,
class SIMDDup64FromElement
: SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
- VectorIndexD, i64, AArch64duplane64> {
+ VectorIndexD, AArch64duplane64> {
bits<1> idx;
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
@@ -7248,7 +7302,7 @@ class SIMDDup64FromElement
class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
RegisterOperand vecreg>
: SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
- VectorIndexS, i64, AArch64duplane32> {
+ VectorIndexS, AArch64duplane32> {
bits<2> idx;
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
@@ -7257,7 +7311,7 @@ class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
RegisterOperand vecreg>
: SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
- VectorIndexH, i64, AArch64duplane16> {
+ VectorIndexH, AArch64duplane16> {
bits<3> idx;
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
@@ -7266,7 +7320,7 @@ class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
RegisterOperand vecreg>
: SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
- VectorIndexB, i64, AArch64duplane8> {
+ VectorIndexB, AArch64duplane8> {
bits<4> idx;
let Inst{20-17} = idx;
let Inst{16} = 1;
@@ -7295,6 +7349,25 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
(inst regtype:$dst, V128:$src, idxtype:$idx)>;
multiclass SMov {
+ // SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
+ // streaming mode.
+ let Predicates = [HasNEONorStreamingSVE] in {
+ def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00001;
+ }
+ def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b00001;
+ }
+ def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00010;
+ }
+ def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b00010;
+ }
+ def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b00100;
+ }
+ }
def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
@@ -7323,6 +7396,28 @@ multiclass SMov {
}
multiclass UMov {
+ // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
+ // streaming mode.
+ let Predicates = [HasNEONorStreamingSVE] in {
+ def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00001;
+ }
+ def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00010;
+ }
+ def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00100;
+ }
+ def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b01000;
+ }
+ def : SIMDMovAlias<"mov", ".s",
+ !cast<Instruction>(NAME # vi32_idx0),
+ GPR32, VectorIndex0>;
+ def : SIMDMovAlias<"mov", ".d",
+ !cast<Instruction>(NAME # vi64_idx0),
+ GPR64, VectorIndex0>;
+ }
def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
@@ -7473,7 +7568,7 @@ class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
RegisterOperand listtype, string asm, string kind>
: I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Vd;
bits<5> Vn;
bits<5> Vm;
@@ -7494,7 +7589,7 @@ class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectyp
RegisterOperand listtype, string asm, string kind>
: I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Vd;
bits<5> Vn;
bits<5> Vm;
@@ -7609,11 +7704,11 @@ multiclass SIMDTableLookupTied<bit op, string asm> {
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
- string kind, Operand idxtype>
- : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov",
+ string asm, string kind, Operand idxtype>
+ : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm,
"{\t$dst, $src" # kind # "$idx" #
"|\t$dst, $src$idx}", "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> dst;
bits<5> src;
let Inst{31-21} = 0b01011110000;
@@ -7630,22 +7725,22 @@ class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
multiclass SIMDScalarCPY<string asm> {
- def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> {
+ def i8 : BaseSIMDScalarCPY<FPR8, V128, asm, ".b", VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
let Inst{16} = 1;
}
- def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> {
+ def i16 : BaseSIMDScalarCPY<FPR16, V128, asm, ".h", VectorIndexH> {
bits<3> idx;
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
}
- def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> {
+ def i32 : BaseSIMDScalarCPY<FPR32, V128, asm, ".s", VectorIndexS> {
bits<2> idx;
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
}
- def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> {
+ def i64 : BaseSIMDScalarCPY<FPR64, V128, asm, ".d", VectorIndexD> {
bits<1> idx;
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
@@ -7678,7 +7773,7 @@ class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
string asm, string op_string,
string cstr, list<dag> pattern>
: I<oops, iops, asm, op_string, cstr, pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<8> imm8;
let Inst{31} = 0;
@@ -7848,7 +7943,7 @@ class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -7878,7 +7973,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
(ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -7971,7 +8066,7 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
(v8bf16
(AArch64duplane16 (v8bf16 V128_lo:$Rm),
VectorIndexH:$idx)))))]>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
bits<4> Rm;
@@ -8892,7 +8987,7 @@ class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
Operand immtype, string asm, list<dag> pattern>
: I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
asm, "\t$Rd, $Rn, $imm", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<7> imm;
@@ -8912,7 +9007,7 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
Operand immtype, string asm, list<dag> pattern>
: I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<7> imm;
@@ -9076,7 +9171,7 @@ class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
: I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -9099,7 +9194,7 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
: I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -10646,7 +10741,7 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10720,7 +10815,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
(ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10796,7 +10891,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
"$idx, $rot" # "|" # apple_kind #
"\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10822,8 +10917,8 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
// The complex instructions index by pairs of elements, so the VectorIndexes
// don't match the lane types, and the index bits are different to the other
// classes.
-multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
- string asm, SDPatternOperator OpNode> {
+multiclass SIMDIndexedTiedComplexHSD<bit opc1, bit opc2, Operand rottype,
+ string asm> {
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
@@ -10861,7 +10956,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
list<dag> pat>
: I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
- Sched<[WriteV]>{
+ Sched<[WriteVq]>{
bits<5> Rd;
bits<5> Rn;
let Inst{31-16} = 0b0100111000101000;
@@ -10887,7 +10982,7 @@ class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
: I<oops, iops, asm,
"{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
"|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
- Sched<[WriteV]>{
+ Sched<[WriteVq]>{
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10927,7 +11022,7 @@ class SHA2OpInst<bits<4> opc, string asm, string kind,
list<dag> pat>
: I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
"|" # kind # "\t$Rd, $Rn}", cstr, pat>,
- Sched<[WriteV]>{
+ Sched<[WriteVq]>{
bits<5> Rd;
bits<5> Rn;
let Inst{31-16} = 0b0101111000101000;
@@ -10950,7 +11045,7 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
// Armv8.2-A Crypto extensions
class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
list<dag> pattern>
- : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> {
+ : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteVq]> {
bits<5> Vd;
bits<5> Vn;
let Inst{31-25} = 0b1100111;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b03d421d3e6d..f8f8ee3f1e6c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1112,8 +1112,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
// The first operand can be a frame index where we'd normally expect a
// register.
assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
@@ -1155,8 +1155,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME: In order to convert CmpValue to 0 or 1
- CmpValue = MI.getOperand(2).getImm() != 0;
+ CmpValue = MI.getOperand(2).getImm();
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -1165,14 +1164,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
- // while the type of CmpValue is int. When converting uint64_t to int,
- // the high 32 bits of uint64_t will be lost.
- // In fact it causes a bug in spec2006-483.xalancbmk
- // CmpValue is only used to compare with zero in OptimizeCompareInstr
CmpValue = AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(),
- MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+ MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
return true;
}
@@ -1433,8 +1427,8 @@ bool AArch64InstrInfo::optimizePTestInstr(
/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue, const MachineRegisterInfo *MRI) const {
assert(CmpInstr.getParent());
assert(MRI);
@@ -1462,10 +1456,6 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
- // Continue only if we have a "ri" where immediate is zero.
- // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
- // function.
- assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
if (SrcReg2 != 0)
return false;
@@ -1473,9 +1463,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
- if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+ if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true;
- return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+ return (CmpValue == 0 || CmpValue == 1) &&
+ removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
}
/// Get opcode of S version of Instr.
@@ -2099,10 +2090,8 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
default:
break;
case TargetOpcode::COPY: {
- // FPR64 copies will by lowered to ORR.16b
Register DstReg = MI.getOperand(0).getReg();
- return (AArch64::FPR64RegClass.contains(DstReg) ||
- AArch64::FPR128RegClass.contains(DstReg));
+ return AArch64::FPR128RegClass.contains(DstReg);
}
case AArch64::ORRv16i8:
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
@@ -2274,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
+
case AArch64::LD1B_IMM:
- case AArch64::LD1H_IMM:
- case AArch64::LD1W_IMM:
- case AArch64::LD1D_IMM:
- case AArch64::ST1B_IMM:
- case AArch64::ST1H_IMM:
- case AArch64::ST1W_IMM:
- case AArch64::ST1D_IMM:
case AArch64::LD1B_H_IMM:
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::LD1H_IMM:
case AArch64::LD1H_S_IMM:
+ case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::LD1W_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
+ case AArch64::LD1D_IMM:
+
+ case AArch64::ST1B_IMM:
case AArch64::ST1B_H_IMM:
- case AArch64::ST1H_S_IMM:
- case AArch64::ST1W_D_IMM:
- case AArch64::LD1B_S_IMM:
- case AArch64::LD1SB_S_IMM:
- case AArch64::LD1H_D_IMM:
- case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
- case AArch64::ST1H_D_IMM:
- case AArch64::LD1B_D_IMM:
- case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::ST1D_IMM:
+
case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM:
@@ -2316,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
case AArch64::LD1RD_IMM:
+
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1SB_D_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1SH_D_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
+ case AArch64::LDNF1D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
@@ -2866,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1H_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1D_IMM:
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
case AArch64::ST1B_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1D_IMM:
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(16);
@@ -2886,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(8);
@@ -2899,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_D_IMM:
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(4);
@@ -2909,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_D_IMM:
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(2);
@@ -3503,77 +3545,37 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ DestReg =
+ RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
+ SrcReg =
+ RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ DestReg =
+ RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
+ SrcReg =
+ RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -4339,6 +4341,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
case AArch64::ST1Twov1d:
case AArch64::ST1Threev1d:
case AArch64::ST1Fourv1d:
+ case AArch64::ST1i8:
+ case AArch64::ST1i16:
+ case AArch64::ST1i32:
+ case AArch64::ST1i64:
case AArch64::IRG:
case AArch64::IRGstack:
case AArch64::STGloop:
@@ -4911,6 +4917,55 @@ static bool getFMAPatterns(MachineInstr &Root,
return Found;
}
+static bool getFMULPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ auto Match = [&](unsigned Opcode, int Operand,
+ MachineCombinerPattern Pattern) -> bool {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineOperand &MO = Root.getOperand(Operand);
+ MachineInstr *MI = nullptr;
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ if (MI && MI->getOpcode() == Opcode) {
+ Patterns.push_back(Pattern);
+ return true;
+ }
+ return false;
+ };
+
+ typedef MachineCombinerPattern MCP;
+
+ switch (Root.getOpcode()) {
+ default:
+ return false;
+ case AArch64::FMULv2f32:
+ Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
+ break;
+ case AArch64::FMULv2f64:
+ Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
+ break;
+ case AArch64::FMULv4f16:
+ Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
+ break;
+ case AArch64::FMULv4f32:
+ Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
+ break;
+ case AArch64::FMULv8f16:
+ Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
+ break;
+ }
+
+ return Found;
+}
+
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
@@ -4974,6 +5029,16 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4f32_OP2:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2:
case MachineCombinerPattern::MULADDv8i8_OP1:
case MachineCombinerPattern::MULADDv8i8_OP2:
case MachineCombinerPattern::MULADDv16i8_OP1:
@@ -5030,6 +5095,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMaddPatterns(Root, Patterns))
return true;
// Floating point patterns
+ if (getFMULPatterns(Root, Patterns))
+ return true;
if (getFMAPatterns(Root, Patterns))
return true;
@@ -5118,6 +5185,42 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
+static MachineInstr *
+genIndexedMultiply(MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxDupOp, unsigned MulOpc,
+ const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
+ assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
+ "Invalid index of FMUL operand");
+
+ MachineFunction &MF = *Root.getMF();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineInstr *Dup =
+ MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
+
+ Register DupSrcReg = Dup->getOperand(1).getReg();
+ MRI.clearKillFlags(DupSrcReg);
+ MRI.constrainRegClass(DupSrcReg, RC);
+
+ unsigned DupSrcLane = Dup->getOperand(2).getImm();
+
+ unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
+ MachineOperand &MulOp = Root.getOperand(IdxMulOp);
+
+ Register ResultReg = Root.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
+ .add(MulOp)
+ .addReg(DupSrcReg)
+ .addImm(DupSrcLane);
+
+ InsInstrs.push_back(MIB);
+ return &Root;
+}
+
/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
/// instructions.
///
@@ -5329,15 +5432,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
uint64_t UImm = SignExtend64(Imm, BitSize);
uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- }
+ if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ return;
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case MachineCombinerPattern::MULSUBW_OP1:
@@ -5420,15 +5523,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- }
+ if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ return;
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
@@ -6076,12 +6179,50 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
- // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
- // CodeGen/AArch64/urem-seteq-nonzero.ll.
- // assert(MUL && "MUL was never set");
- DelInstrs.push_back(MUL);
+ if (MUL)
+ DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
@@ -6624,13 +6765,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- const std::vector<MCCFIInstruction> &CFIInstructions =
- RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
- if (MBBI->isCFIInstruction()) {
- unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
- MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ if (MBBI->isCFIInstruction())
CFICount++;
- }
MBBI++;
}
@@ -7212,7 +7348,8 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
// If v8.3a features are available we can replace a RET instruction by
- // RETAA or RETAB and omit the AUT instructions
+ // RETAA or RETAB and omit the AUT instructions. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
@@ -7225,6 +7362,11 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
: AArch64::AUTIBSP))
.setMIFlag(MachineInstr::FrameDestroy);
+ unsigned CFIIndexAuth =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndexAuth)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
}
@@ -7401,7 +7543,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
unsigned Reg = findRegisterToSaveLRTo(C);
assert(Reg != 0 && "No callee-saved register available?");
- // Save and restore LR from that register.
+ // LR has to be a live in so that we can save it.
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
+
+ // Save and restore LR from Reg.
Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
.addReg(AArch64::XZR)
.addReg(AArch64::LR)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index e25189e409a3..b2f9e82a7e8b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -227,12 +227,12 @@ public:
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
/// optimizeCompareInstr - Convert the instruction supplying the argument to
/// the comparison into one that sets the zero bit in the flags register.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
bool optimizeCondBranch(MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 682cec361728..db8e0c5dac4a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -27,6 +27,21 @@ def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
+ AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">;
+def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
+ AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
+def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
+ AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
+def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
+ AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
+
+def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
+ AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">;
+
+def HasEL3 : Predicate<"Subtarget->hasEL3()">,
+ AssemblerPredicate<(all_of FeatureEL3), "el3">;
+
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
@@ -63,9 +78,6 @@ def HasAM : Predicate<"Subtarget->hasAM()">,
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
-def HasPMU : Predicate<"Subtarget->hasPMU()">,
- AssemblerPredicate<(all_of FeaturePMU), "pmu">;
-
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
@@ -128,6 +140,24 @@ def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">,
AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">;
def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">,
AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">;
+def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(all_of FeatureStreamingSVE), "streaming-sve">;
+// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
+// they should be enabled if either has been specified.
+def HasSVEorStreamingSVE
+ : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE),
+ "streaming-sve or sve">;
+def HasSVE2orStreamingSVE
+ : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE),
+ "streaming-sve or sve2">;
+// A subset of NEON instructions are legal in Streaming SVE execution mode,
+// they should be enabled if either has been specified.
+def HasNEONorStreamingSVE
+ : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE),
+ "streaming-sve or neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -180,6 +210,8 @@ def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
"NegativeImmediates">;
+def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
@@ -673,40 +705,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in {
// removed, along with the AArch64Wrapper node.
let AddedComplexity = 10 in
-def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
- [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
+def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
+ [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
Sched<[WriteLDAdr]>;
// The MOVaddr instruction should match only when the add is not folded
// into a load or store address.
def MOVaddr
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
tglobaladdr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrJT
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
tjumptable:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrCP
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
tconstpool:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrBA
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
tblockaddress:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrTLS
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
tglobaltlsaddr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrEXT
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
texternalsym:$low))]>,
Sched<[WriteAdrAdr]>;
// Normally AArch64addlow either gets folded into a following ldr/str,
@@ -714,8 +746,8 @@ def MOVaddrEXT
// might appear without either of them, so allow lowering it into a plain
// add.
def ADDlowTLS
- : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow GPR64:$src,
+ : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
+ [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
tglobaltlsaddr:$low))]>,
Sched<[WriteAdr]>;
@@ -855,7 +887,7 @@ defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
}
// ARMv8.6-A BFloat
-let Predicates = [HasBF16] in {
+let Predicates = [HasNEON, HasBF16] in {
defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
@@ -865,7 +897,6 @@ def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
def BFCVTN : SIMD_BFCVTN;
def BFCVTN2 : SIMD_BFCVTN2;
-def BFCVT : BF16ToSinglePrecision<"bfcvt">;
// Vector-scalar BFDOT:
// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
@@ -885,6 +916,10 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot
VectorIndexS:$idx)>;
}
+let Predicates = [HasNEONorStreamingSVE, HasBF16] in {
+def BFCVT : BF16ToSinglePrecision<"bfcvt">;
+}
+
// ARMv8.6A AArch64 matrix multiplication
let Predicates = [HasMatMulInt8] in {
def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
@@ -958,6 +993,15 @@ def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
+class EOR3_pattern<ValueType VecTy>
+ : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
+ (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
+
+def : EOR3_pattern<v16i8>;
+def : EOR3_pattern<v8i16>;
+def : EOR3_pattern<v4i32>;
+def : EOR3_pattern<v2i64>;
+
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
@@ -1034,8 +1078,7 @@ defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
"fcmla", null_frag>;
defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
"fcadd", null_frag>;
-defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
- null_frag>;
+defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
@@ -1172,23 +1215,25 @@ let Predicates = [HasPAuth] in {
def : InstAlias<"autib1716", (AUTIB1716), 1>;
def : InstAlias<"xpaclri", (XPACLRI), 1>;
- multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
- def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
- def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
- def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>;
- def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>;
- def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>;
- def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>;
- def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>;
- def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>;
+ multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
+ SDPatternOperator op> {
+ def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>;
+ def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>;
+ def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>;
+ def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>;
+ def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>;
+ def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>;
+ def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>;
+ def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
- defm PAC : SignAuth<0b000, 0b010, "pac">;
- defm AUT : SignAuth<0b001, 0b011, "aut">;
+ defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
+ defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
def XPACD : ClearAuth<1, "xpacd">;
- def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
+
+ def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
// Combined Instructions
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -1272,6 +1317,7 @@ def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
def : InstAlias<"ssbb", (DSB 0)>;
def : InstAlias<"pssbb", (DSB 4)>;
+def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
def MRS : MRSI;
def MSR : MSRI;
@@ -1325,7 +1371,7 @@ def TSTART : TMSystemI<0b0000, "tstart",
def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
def TCANCEL : TMSystemException<0b011, "tcancel",
- [(int_aarch64_tcancel i64_imm0_65535:$imm)]>;
+ [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
let mayLoad = 0;
@@ -1344,12 +1390,12 @@ let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : MoveImmediate<0b10, "movz">;
// First group of aliases covers an implicit "lsl #0".
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
@@ -1620,8 +1666,8 @@ def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
let AddedComplexity = 5 in {
-defm MADD : MulAccum<0, "madd", add>;
-defm MSUB : MulAccum<1, "msub", sub>;
+defm MADD : MulAccum<0, "madd">;
+defm MSUB : MulAccum<1, "msub">;
def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
(MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
@@ -2334,16 +2380,16 @@ def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
}
def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
-def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
+def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
-def SMC : ExceptionGeneration<0b000, 0b11, "smc">;
+def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
// DCPSn defaults to an immediate operand of zero if unspecified.
def : InstAlias<"dcps1", (DCPS1 0)>;
def : InstAlias<"dcps2", (DCPS2 0)>;
-def : InstAlias<"dcps3", (DCPS3 0)>;
+def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
def UDF : UDFType<0, "udf">;
@@ -3114,7 +3160,7 @@ defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
-defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str", f128, store>;
+defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
let Predicates = [UseSTRQro], AddedComplexity = 10 in {
def : Pat<(store (f128 FPR128:$Rt),
@@ -3710,35 +3756,56 @@ defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}
defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
+ }
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
@@ -3763,10 +3830,12 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, strin
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
// These instructions saturate like fp_to_[su]int_sat.
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
@@ -4127,6 +4196,22 @@ defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
+// AArch64's FCVT instructions saturate when out of range.
+multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
+ def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
+ (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
+ def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
+ (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
+ def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
+ (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
+ def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
+ (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
+ def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
+ (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
+}
+defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
+defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
+
def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
@@ -4606,9 +4691,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -4707,9 +4792,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
@@ -5211,7 +5296,7 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
// AdvSIMD scalar CPY instruction
//----------------------------------------------------------------------------
-defm CPY : SIMDScalarCPY<"cpy">;
+defm CPY : SIMDScalarCPY<"mov">;
//----------------------------------------------------------------------------
// AdvSIMD scalar pairwise instructions
@@ -5693,7 +5778,7 @@ def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
(v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
(i64 0))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (UADDLVv4i16v V64:$op), ssub), ssub)>;
+ (UADDLVv8i8v V64:$op), hsub), ssub)>;
def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
(v16i8 V128:$op))))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
@@ -6964,9 +7049,9 @@ def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
// for AES fusion on some CPUs.
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
- Sched<[WriteV]>;
+ Sched<[WriteVq]>;
def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
- Sched<[WriteV]>;
+ Sched<[WriteVq]>;
}
// Only use constrained versions of AES(I)MC instructions if they are paired with
@@ -8092,6 +8177,20 @@ let AddedComplexity = 10 in {
// FIXME: add SVE dot-product patterns.
}
+// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
+// so that it can be used as input to inline asm, and vice versa.
+def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
+def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
+def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
+ GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
+ (REG_SEQUENCE GPR64x8Class,
+ $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
+ $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
+foreach i = 0-7 in {
+ def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
+ (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
+}
+
let Predicates = [HasLS64] in {
def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
(outs GPR64x8:$Rt)>;
@@ -8114,6 +8213,10 @@ def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
[]>, Sched<[]>;
+def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
+def : Pat<(AArch64AssertZExtBool GPR32:$op),
+ (i32 GPR32:$op)>;
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index bf042c83294a..3a836ac33064 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1613,8 +1613,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the stored value and the address of the second instruction is
// the same, it needs to be using the updated register and therefore
// it must not be folded.
- bool IsMIRegTheSame =
- getLdStRegOp(MI).getReg() == getLdStBaseOp(MI).getReg();
+ bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
+ getLdStBaseOp(MI).getReg());
if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
IsMIRegTheSame) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index be19d4953857..487e1f6162b9 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -363,7 +363,7 @@ static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
int InstCount = RegCount / 2;
// Do not use a helper call when not saving LR.
- if (std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end())
+ if (!llvm::is_contained(Regs, AArch64::LR))
return false;
switch (Type) {
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
new file mode 100644
index 000000000000..42db18332f1c
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -0,0 +1,293 @@
+//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs below peephole optimizations on MIR level.
+//
+// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
+// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+//
+// The mov pseudo instruction could be expanded to multiple mov instructions
+// later. In this case, we could try to split the constant operand of mov
+// instruction into two bitmask immediates. It makes two AND instructions
+// intead of multiple `mov` + `and` instructions.
+//
+// 2. Remove redundant ORRWrs which is generated by zero-extend.
+//
+// %3:gpr32 = ORRWrs $wzr, %2, 0
+// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
+//
+// If AArch64's 32-bit form of instruction defines the source operand of
+// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
+// operand are set to zero.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64ExpandImm.h"
+#include "AArch64InstrInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-mi-peephole-opt"
+
+namespace {
+
+struct AArch64MIPeepholeOpt : public MachineFunctionPass {
+ static char ID;
+
+ AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
+ initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ const AArch64InstrInfo *TII;
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+
+ template <typename T>
+ bool visitAND(MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ bool visitORR(MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "AArch64 MI Peephole Optimization pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+char AArch64MIPeepholeOpt::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
+ "AArch64 MI Peephole Optimization", false, false)
+
+template <typename T>
+static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
+ T UImm = static_cast<T>(Imm);
+ if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
+ return false;
+
+ // If this immediate can be handled by one instruction, do not split it.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
+ if (Insn.size() == 1)
+ return false;
+
+ // The bitmask immediate consists of consecutive ones. Let's say there is
+ // constant 0b00000000001000000000010000000000 which does not consist of
+ // consecutive ones. We can split it in to two bitmask immediate like
+ // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
+ // If we do AND with these two bitmask immediate, we can see original one.
+ unsigned LowestBitSet = countTrailingZeros(UImm);
+ unsigned HighestBitSet = Log2_64(UImm);
+
+ // Create a mask which is filled with one from the position of lowest bit set
+ // to the position of highest bit set.
+ T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
+ (static_cast<T>(1) << LowestBitSet);
+ // Create a mask which is filled with one outside the position of lowest bit
+ // set and the position of highest bit set.
+ T NewImm2 = UImm | ~NewImm1;
+
+ // If the split value is not valid bitmask immediate, do not split this
+ // constant.
+ if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
+ return false;
+
+ Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
+ Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
+ return true;
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::visitAND(
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Try below transformation.
+ //
+ // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
+ // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+ //
+ // The mov pseudo instruction could be expanded to multiple mov instructions
+ // later. Let's try to split the constant operand of mov instruction into two
+ // bitmask immediates. It makes only two AND instructions intead of multiple
+ // mov + and instructions.
+
+ unsigned RegSize = sizeof(T) * 8;
+ assert((RegSize == 32 || RegSize == 64) &&
+ "Invalid RegSize for AND bitmask peephole optimization");
+
+ // Check whether AND's MBB is in loop and the AND is loop invariant.
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineLoop *L = MLI->getLoopFor(MBB);
+ if (L && !L->isLoopInvariant(MI))
+ return false;
+
+ // Check whether AND's operand is MOV with immediate.
+ MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+ if (!MovMI)
+ return false;
+
+ MachineInstr *SubregToRegMI = nullptr;
+ // If it is SUBREG_TO_REG, check its operand.
+ if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
+ SubregToRegMI = MovMI;
+ MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
+ if (!MovMI)
+ return false;
+ }
+
+ if (MovMI->getOpcode() != AArch64::MOVi32imm &&
+ MovMI->getOpcode() != AArch64::MOVi64imm)
+ return false;
+
+ // If the MOV has multiple uses, do not split the immediate because it causes
+ // more instructions.
+ if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
+ return false;
+
+ if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
+ return false;
+
+ // Split the bitmask immediate into two.
+ T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
+ // For the 32 bit form of instruction, the upper 32 bits of the destination
+ // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
+ // of UImm to zero.
+ if (SubregToRegMI)
+ UImm &= 0xFFFFFFFF;
+ T Imm1Enc;
+ T Imm2Enc;
+ if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
+ return false;
+
+ // Create new AND MIs.
+ DebugLoc DL = MI.getDebugLoc();
+ const TargetRegisterClass *ANDImmRC =
+ (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
+ Register NewDstReg = MRI->createVirtualRegister(ANDImmRC);
+ unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
+
+ MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ .addReg(SrcReg)
+ .addImm(Imm1Enc);
+
+ MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ .addReg(NewTmpReg)
+ .addImm(Imm2Enc);
+
+ MRI->replaceRegWith(DstReg, NewDstReg);
+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
+ // deleting MI.
+ MI.getOperand(0).setReg(DstReg);
+
+ ToBeRemoved.insert(&MI);
+ if (SubregToRegMI)
+ ToBeRemoved.insert(SubregToRegMI);
+ ToBeRemoved.insert(MovMI);
+
+ return true;
+}
+
+bool AArch64MIPeepholeOpt::visitORR(
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Check this ORR comes from below zero-extend pattern.
+ //
+ // def : Pat<(i64 (zext GPR32:$src)),
+ // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
+ if (MI.getOperand(3).getImm() != 0)
+ return false;
+
+ if (MI.getOperand(1).getReg() != AArch64::WZR)
+ return false;
+
+ MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+ if (!SrcMI)
+ return false;
+
+ // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
+ //
+ // When you use the 32-bit form of an instruction, the upper 32 bits of the
+ // source registers are ignored and the upper 32 bits of the destination
+ // register are set to zero.
+ //
+ // If AArch64's 32-bit form of instruction defines the source operand of
+ // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
+ // real AArch64 instruction and if it is not, do not process the opcode
+ // conservatively.
+ if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
+ return false;
+
+ Register DefReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
+ MRI->replaceRegWith(DefReg, SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
+ // deleting MI.
+ MI.getOperand(0).setReg(DefReg);
+ ToBeRemoved.insert(&MI);
+
+ LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; });
+
+ return true;
+}
+
+bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MRI = &MF.getRegInfo();
+
+ if (!MRI->isSSA())
+ return false;
+
+ bool Changed = false;
+ SmallSetVector<MachineInstr *, 8> ToBeRemoved;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::ANDWrr:
+ Changed = visitAND<uint32_t>(MI, ToBeRemoved);
+ break;
+ case AArch64::ANDXrr:
+ Changed = visitAND<uint64_t>(MI, ToBeRemoved);
+ break;
+ case AArch64::ORRWrs:
+ Changed = visitORR(MI, ToBeRemoved);
+ }
+ }
+ }
+
+ for (MachineInstr *MI : ToBeRemoved)
+ MI->eraseFromParent();
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
+ return new AArch64MIPeepholeOpt();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 07dee3ce1fbc..70daf5abf81d 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -732,7 +732,9 @@ def Tuples8X : RegisterTuples<
!foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
!foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
-def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>;
+def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> {
+ let Size = 512;
+}
def GPR64x8AsmOp : AsmOperandClass {
let Name = "GPR64x8";
let ParserMethod = "tryParseGPR64x8";
@@ -899,16 +901,8 @@ def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
-def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
-def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
-def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
-def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
-def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
-def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
-def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
-def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
diff --git a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 03b32967a212..80d98d17e1d6 100644
--- a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -641,7 +641,7 @@ bool AArch64SIMDInstrOpt::processSeqRegInst(MachineInstr *DefiningMI,
StReg[i] = DefiningMI->getOperand(2*i+1).getReg();
StRegKill[i] = getKillRegState(DefiningMI->getOperand(2*i+1).isKill());
- // Sanity check for the other arguments.
+ // Validation check for the other arguments.
if (DefiningMI->getOperand(2*i+2).isImm()) {
switch (DefiningMI->getOperand(2*i+2).getImm()) {
default:
@@ -711,9 +711,7 @@ bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) {
if (!shouldExitEarly(&MF, OptimizationKind)) {
SmallVector<MachineInstr *, 8> RemoveMIs;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
- MII != MIE;) {
- MachineInstr &MI = *MII;
+ for (MachineInstr &MI : MBB) {
bool InstRewrite;
if (OptimizationKind == VectorElem)
InstRewrite = optimizeVectElement(MI) ;
@@ -725,7 +723,6 @@ bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) {
RemoveMIs.push_back(&MI);
Changed = true;
}
- ++MII;
}
}
for (MachineInstr *MI : RemoveMIs)
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 6a0fa2fc4f4e..aacace64e998 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -138,6 +138,6 @@ def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
-defm DUP_PPzPRI : sve2_int_perm_dup_p<"dup">;
+defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
} // End let Predicates = [HasSME]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 91c3aec30a15..67d8fbb45cf5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -261,11 +261,6 @@ def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED
def AArch64splice : SDNode<"AArch64ISD::SPLICE", SDT_AArch64Arith>;
-def step_vector_oneuse : PatFrag<(ops node:$idx),
- (step_vector node:$idx), [{
- return N->hasOneUse();
-}]>;
-
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
@@ -286,7 +281,9 @@ let Predicates = [HasSVE] in {
defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
@@ -305,13 +302,15 @@ let Predicates = [HasSVE] in {
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
+} // End HasSVEorStreamingSVE
- let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
- defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
- defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
- }
+let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
+ defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
+ defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
+} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+let Predicates = [HasSVEorStreamingSVE] in {
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>;
@@ -403,17 +402,37 @@ let Predicates = [HasSVE] in {
defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
- defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
- defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
+ defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", AArch64frecpe>;
+ defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>;
+
+ defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>;
+ defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>;
+ defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>;
+ defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
+ defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
+ defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
+
+ defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fadd_p>;
+ defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsub_p>;
+ defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, AArch64fmul_p>;
+ defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one>;
+ defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmaxnm_p>;
+ defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fminnm_p>;
+ defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmax_p>;
+ defm FMIN_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmin_p>;
- defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
- defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
- defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>;
- defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>;
- defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>;
- defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>;
- defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
- defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
+ let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
+ defm FADD_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>;
+ defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>;
+ defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>;
+ defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
+ defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
+ defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
+ }
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
@@ -437,31 +456,43 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>;
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
-
- let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
- defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
- defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
- defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
- defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
- defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
- defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
- defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
- defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
- defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
- defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
- defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
- }
-
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
+ defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
+ defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
+ defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
+ defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
+ defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
+ defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
+ defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
+ defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
+ defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
+} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVE] in {
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
- defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
- defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
+ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>;
+ defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>;
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVE] in {
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
@@ -475,10 +506,10 @@ let Predicates = [HasSVE] in {
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>;
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>;
- defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fmla, int_aarch64_sve_fmad>;
- defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fmls, int_aarch64_sve_fmsb>;
- defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fnmla, int_aarch64_sve_fnmad>;
- defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fnmls, int_aarch64_sve_fnmsb>;
+ defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
+ defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
+ defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
+ defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
multiclass fma<ValueType Ty, ValueType PredTy, string Suffix> {
// Zd = Za + Zn * Zm
@@ -516,17 +547,26 @@ let Predicates = [HasSVE] in {
defm : fma<nxv4f32, nxv4i1, "S">;
defm : fma<nxv2f32, nxv2i1, "S">;
defm : fma<nxv2f64, nxv2i1, "D">;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// SVE floating point reductions.
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>;
+} // End HasSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>;
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>;
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>;
@@ -614,8 +654,13 @@ let Predicates = [HasSVE] in {
defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
+} // End HasSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
@@ -639,8 +684,13 @@ let Predicates = [HasSVE] in {
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVE] in {
defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>;
defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>;
defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>;
@@ -752,7 +802,9 @@ let Predicates = [HasSVE] in {
defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>;
defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>;
defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// non-faulting continuous load with reg+immediate
defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>;
defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>;
@@ -788,7 +840,9 @@ let Predicates = [HasSVE] in {
defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>;
defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>;
defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
// LD(2|3|4) structured loads with reg+immediate
defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>;
defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>;
@@ -816,7 +870,9 @@ let Predicates = [HasSVE] in {
def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>;
def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>;
def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// Gathers using unscaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
@@ -928,7 +984,9 @@ let Predicates = [HasSVE] in {
defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
// Non-temporal contiguous loads (register + immediate)
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>;
@@ -964,7 +1022,9 @@ let Predicates = [HasSVE] in {
defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// Scatters using unpacked, unscaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw]
defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
@@ -1014,7 +1074,9 @@ let Predicates = [HasSVE] in {
defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
// ST(2|3|4) structured stores (register + immediate)
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;
@@ -1073,7 +1135,7 @@ let Predicates = [HasSVE] in {
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
- multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, ComplexPattern AddrCP> {
// reg + imm
let AddedComplexity = 2 in {
def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
@@ -1091,11 +1153,13 @@ let Predicates = [HasSVE] in {
(RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, (i64 0))>;
}
- defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, 0, am_sve_regreg_lsl0>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, 1, am_sve_regreg_lsl1>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, 2, am_sve_regreg_lsl2>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, 3, am_sve_regreg_lsl3>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, am_sve_regreg_lsl0>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, am_sve_regreg_lsl1>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, am_sve_regreg_lsl2>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, am_sve_regreg_lsl3>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// Gather prefetch using scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>;
@@ -1153,6 +1217,53 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)),
(ADR_LSL_ZZZ_D_3 $Op1, $Op2)>;
+ // Patterns to generate adr instruction.
+ // adr z0.d, [z0.d, z0.d, uxtw]
+ def : Pat<(add nxv2i64:$Op1,
+ (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))),
+ (ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>;
+ // adr z0.d, [z0.d, z0.d, sxtw]
+ def : Pat<(add nxv2i64:$Op1,
+ (nxv2i64 (sext_inreg nxv2i64:$Op2, nxv2i32))),
+ (ADR_SXTW_ZZZ_D_0 $Op1, $Op2)>;
+
+ // adr z0.s, [z0.s, z0.s, lsl #<shift>]
+ // adr z0.d, [z0.d, z0.d, lsl #<shift>]
+ multiclass adrShiftPat<ValueType Ty, ValueType PredTy, ValueType ShiftTy, Instruction DestAdrIns, int ShiftAmt> {
+ def : Pat<(add Ty:$Op1,
+ (Ty (AArch64lsl_p (PredTy (SVEAllActive)),
+ Ty:$Op2,
+ (Ty (AArch64dup (ShiftTy ShiftAmt)))))),
+ (DestAdrIns $Op1, $Op2)>;
+ }
+ defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_1, 1>;
+ defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_2, 2>;
+ defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_3, 3>;
+ defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_1, 1>;
+ defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_2, 2>;
+ defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_3, 3>;
+
+ // adr z0.d, [z0.d, z0.d, uxtw #<shift>]
+ // adr z0.d, [z0.d, z0.d, sxtw #<shift>]
+ multiclass adrXtwShiftPat<ValueType Ty, ValueType PredTy, int ShiftAmt> {
+ def : Pat<(add Ty:$Op1,
+ (Ty (AArch64lsl_p (PredTy (SVEAllActive)),
+ (Ty (and Ty:$Op2, (Ty (AArch64dup (i64 0xFFFFFFFF))))),
+ (Ty (AArch64dup (i64 ShiftAmt)))))),
+ (!cast<Instruction>("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>;
+
+ def : Pat<(add Ty:$Op1,
+ (Ty (AArch64lsl_p (PredTy (SVEAllActive)),
+ (Ty (sext_inreg Ty:$Op2, nxv2i32)),
+ (Ty (AArch64dup (i64 ShiftAmt)))))),
+ (!cast<Instruction>("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>;
+ }
+ defm : adrXtwShiftPat<nxv2i64, nxv2i1, 1>;
+ defm : adrXtwShiftPat<nxv2i64, nxv2i1, 2>;
+ defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>;
+} // End HasSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>;
@@ -1171,17 +1282,52 @@ let Predicates = [HasSVE] in {
// Extract lo/hi halves of legal predicate types.
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
- (ZIP1_PPP_S PPR:$Ps, (PFALSE))>;
+ (PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
- (ZIP2_PPP_S PPR:$Ps, (PFALSE))>;
+ (PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
- (ZIP1_PPP_H PPR:$Ps, (PFALSE))>;
+ (PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
- (ZIP2_PPP_H PPR:$Ps, (PFALSE))>;
+ (PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
- (ZIP1_PPP_B PPR:$Ps, (PFALSE))>;
+ (PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
- (ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
+ (PUNPKHI_PP PPR:$Ps)>;
+
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
// Extract subvectors from FP SVE vectors
def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
@@ -1206,6 +1352,24 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
(UUNPKHI_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
+ (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))),
+ (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
+ (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),
+ (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+
// Concatenate two predicates.
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;
@@ -1308,16 +1472,18 @@ let Predicates = [HasSVE] in {
defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
+}
- defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
- defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
- defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">;
- defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">;
- defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">;
- defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">;
- defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
- defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
+ defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
+ defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>;
+ defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>;
+ defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>;
+ defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>;
+ defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>;
+ defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>;
+ defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>;
+let Predicates = [HasSVEorStreamingSVE] in {
defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>;
defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>;
defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>;
@@ -1391,10 +1557,10 @@ let Predicates = [HasSVE] in {
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
- defm INDEX_RR : sve_int_index_rr<"index", step_vector, step_vector_oneuse, AArch64mul_p_oneuse>;
- defm INDEX_IR : sve_int_index_ir<"index", step_vector, step_vector_oneuse, AArch64mul_p, AArch64mul_p_oneuse>;
- defm INDEX_RI : sve_int_index_ri<"index", step_vector, step_vector_oneuse>;
- defm INDEX_II : sve_int_index_ii<"index", step_vector, step_vector_oneuse>;
+ defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>;
+ defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>;
+ defm INDEX_RI : sve_int_index_ri<"index">;
+ defm INDEX_II : sve_int_index_ii<"index">;
// Unpredicated shifts
defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
@@ -1414,14 +1580,16 @@ let Predicates = [HasSVE] in {
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+} // End HasSVEorStreamingSVE
- let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
- defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
- defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
- defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
- }
+let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
+ defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
+ defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
+ defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
+} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
@@ -1536,19 +1704,27 @@ let Predicates = [HasSVE] in {
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
- defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
- defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
- defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
- defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
- defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
- defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
- defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
- defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
- }
-
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasBF16, HasSVEorStreamingSVE] in {
+ defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
+ defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
+} // End HasBF16, HasSVEorStreamingSVE
+
+let Predicates = [HasBF16, HasSVE] in {
+ defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
+} // End HasBF16, HasSVE
+
+let Predicates = [HasBF16, HasSVEorStreamingSVE] in {
+ defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
+ defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
+ defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
+ defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
+ defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
+ defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
+} // End HasBF16, HasSVEorStreamingSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
@@ -1739,6 +1915,72 @@ let Predicates = [HasSVE] in {
def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
}
+ let AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDVL_XXI GPR64:$op, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), $imm),
+ sub_32))>;
+
+ def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ (INCH_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ (INCW_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ (INCD_ZPiI ZPR:$op, 31, $imm)>;
+
+ def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ (DECH_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ (DECW_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ (DECD_ZPiI ZPR:$op, 31, $imm)>;
+ }
+
+ let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL], AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))),
+ (INCH_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))),
+ (INCW_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))),
+ (INCD_XPiI GPR64:$op, 31, $imm)>;
+
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+ (DECH_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+ (DECW_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+ (DECD_XPiI GPR64:$op, 31, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ }
+
def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
(ADDVL_XXI GPR64:$op, $imm)>;
@@ -1864,25 +2106,27 @@ let Predicates = [HasSVE] in {
}
// 2-element contiguous loads
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
- defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2bf16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous loads
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4bf16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous loads
defm : pred_load<nxv8i16, nxv8i1, zext_masked_load_i8, LD1B_H, LD1B_H_IMM, am_sve_regreg_lsl0>;
@@ -1909,20 +2153,22 @@ let Predicates = [HasSVE] in {
}
// 2-element contiguous stores
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D, ST1B_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
- defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D, ST1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2bf16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous stores
- defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S, ST1B_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
- defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S, ST1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4bf16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous stores
defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
@@ -1954,32 +2200,30 @@ let Predicates = [HasSVE] in {
def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)),
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
}
- let AddedComplexity = 3 in {
- def _fi : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
- (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
- }
def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
}
- defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>;
- defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>;
- defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>;
- defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>;
- defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
- defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
- defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
- defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
- defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
- defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
- defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
- defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
+ defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>;
+ defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>;
+ defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>;
+ defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>;
+ defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
+ defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
+ defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
+ defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
+ defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4bf16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv2bf16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
+ defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
+ defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegRegInst,
Instruction RegImmInst, Instruction PTrue,
@@ -1992,10 +2236,6 @@ let Predicates = [HasSVE] in {
def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))),
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
}
- let AddedComplexity = 3 in {
- def _fi : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
- (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
- }
def : Pat<(Ty (Load GPR64:$base)),
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
@@ -2026,7 +2266,9 @@ let Predicates = [HasSVE] in {
defm : unpred_load< load, nxv8f16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv8bf16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv4f16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_load< load, nxv4bf16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv2f16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_load< load, nxv2bf16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv4f32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
@@ -2059,9 +2301,6 @@ let Predicates = [HasSVE] in {
}
defm Pat_Store_P16 : unpred_store_predicate<nxv16i1, STR_PXI>;
- defm Pat_Store_P8 : unpred_store_predicate<nxv8i1, STR_PXI>;
- defm Pat_Store_P4 : unpred_store_predicate<nxv4i1, STR_PXI>;
- defm Pat_Store_P2 : unpred_store_predicate<nxv2i1, STR_PXI>;
multiclass unpred_load_predicate<ValueType Ty, Instruction Load> {
def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))),
@@ -2072,9 +2311,6 @@ let Predicates = [HasSVE] in {
}
defm Pat_Load_P16 : unpred_load_predicate<nxv16i1, LDR_PXI>;
- defm Pat_Load_P8 : unpred_load_predicate<nxv8i1, LDR_PXI>;
- defm Pat_Load_P4 : unpred_load_predicate<nxv4i1, LDR_PXI>;
- defm Pat_Load_P2 : unpred_load_predicate<nxv2i1, LDR_PXI>;
multiclass ld1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
@@ -2122,7 +2358,9 @@ let Predicates = [HasSVE] in {
// 16-element contiguous loads
defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
// scalar + immediate (mul vl)
let AddedComplexity = 1 in {
@@ -2203,7 +2441,9 @@ let Predicates = [HasSVE] in {
// 16-element contiguous first faulting loads
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
multiclass st1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
SDPatternOperator Store, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
// reg + reg
@@ -2400,6 +2640,19 @@ let Predicates = [HasSVE] in {
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
}
+ def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
+ (i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+ def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
+ (i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+
+ def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16),
+ (i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+ def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16),
+ (i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+
+ def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ (i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
+
// Extract first element from vector.
let AddedComplexity = 2 in {
def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
@@ -2425,28 +2678,32 @@ let Predicates = [HasSVE] in {
}
// Splice with lane bigger or equal to 0
- def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_15 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_15:$index)>;
- def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_7 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_7:$index)>;
- def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_3 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_3:$index)>;
- def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_1 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_1:$index)>;
-}
+ def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_255:$index)>;
+ def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_127:$index)>;
+ def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_63:$index)>;
+ def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_31:$index)>;
+
+} // End HasSVEorStreamingSVE
let Predicates = [HasSVE, HasMatMulInt8] in {
defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;
defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
+} // End HasSVE, HasMatMulInt8
+
+let Predicates = [HasSVEorStreamingSVE, HasMatMulInt8] in {
defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
-}
+} // End HasSVEorStreamingSVE, HasMatMulInt8
let Predicates = [HasSVE, HasMatMulFP32] in {
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
-}
+} // End HasSVE, HasMatMulFP32
let Predicates = [HasSVE, HasMatMulFP64] in {
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
@@ -2458,15 +2715,18 @@ let Predicates = [HasSVE, HasMatMulFP64] in {
defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>;
defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>;
defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>;
+} // End HasSVE, HasMatMulFP64
+
+let Predicates = [HasSVEorStreamingSVE, HasMatMulFP64] in {
defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>;
defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>;
defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>;
defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>;
defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>;
defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>;
-}
+} // End HasSVEorStreamingSVE, HasMatMulFP64
-let Predicates = [HasSVE2] in {
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 integer multiply-add (indexed)
defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>;
defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>;
@@ -2614,15 +2874,17 @@ let Predicates = [HasSVE2] in {
defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;
+} // End HasSVE2orStreamingSVE
- let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
- defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
- defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
- defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
- defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
- defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
- }
+let Predicates = [HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
+ defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
+ defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
+ defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
+} // End HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>;
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>;
@@ -2735,11 +2997,15 @@ let Predicates = [HasSVE2] in {
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>;
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>;
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 character match
defm MATCH_PPzZZ : sve2_char_match<0b0, "match", int_aarch64_sve_match>;
defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 bitwise exclusive-or interleaved
defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>;
defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>;
@@ -2754,13 +3020,17 @@ let Predicates = [HasSVE2] in {
defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>;
defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>;
defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 histogram generation (segment)
def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>;
// SVE2 histogram generation (vector)
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 floating-point base 2 logarithm as integer
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
@@ -2802,7 +3072,9 @@ let Predicates = [HasSVE2] in {
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 non-temporal gather loads
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>;
@@ -2817,10 +3089,14 @@ let Predicates = [HasSVE2] in {
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 non-temporal scatter stores
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>;
@@ -2830,7 +3106,9 @@ let Predicates = [HasSVE2] in {
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;
@@ -2849,7 +3127,7 @@ let Predicates = [HasSVE2] in {
// SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">;
-}
+} // End HasSVE2orStreamingSVE
let Predicates = [HasSVE2AES] in {
// SVE2 crypto destructive binary operations
@@ -2865,23 +3143,23 @@ let Predicates = [HasSVE2AES] in {
// to NEON PMULL2 instruction.
defm PMULLB_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11010, "pmullb", int_aarch64_sve_pmullb_pair>;
defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>;
-}
+} // End HasSVE2AES
let Predicates = [HasSVE2SM4] in {
// SVE2 crypto constructive binary operations
defm SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32, int_aarch64_sve_sm4ekey, nxv4i32>;
// SVE2 crypto destructive binary operations
defm SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32, int_aarch64_sve_sm4e, nxv4i32>;
-}
+} // End HasSVE2SM4
let Predicates = [HasSVE2SHA3] in {
// SVE2 crypto constructive binary operations
defm RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64, int_aarch64_sve_rax1, nxv2i64>;
-}
+} // End HasSVE2SHA3
let Predicates = [HasSVE2BitPerm] in {
// SVE2 bitwise permute
defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext", int_aarch64_sve_bext_x>;
defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep", int_aarch64_sve_bdep_x>;
defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp", int_aarch64_sve_bgrp_x>;
-}
+} // End HasSVE2BitPerm
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td
index 65c84b1f39c0..d18a05fda191 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -127,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
@@ -149,6 +150,7 @@ def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
// No forwarding for these reads.
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
def : ReadAdvance<ReadVLD, 0>;
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 0e680078c348..877c4d2ced41 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -149,9 +149,11 @@ def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
// FP ALU specific new schedwrite definitions
+def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
@@ -182,6 +184,7 @@ def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency =
def : ReadAdvance<ReadVLD, 0>;
def : ReadAdvance<ReadExtrHi, 1>;
def : ReadAdvance<ReadAdrBase, 1>;
+def : ReadAdvance<ReadST, 1>;
// ALU - ALU input operands are generally needed in EX1. An operand produced in
// in say EX2 can be forwarded for consumption to ALU in EX1, thereby
@@ -330,6 +333,8 @@ def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16
//---
// Floating Point Conversions, MAC, DIV, SQRT
//---
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td
index c1eacca8cc1f..168a762241ca 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -96,7 +96,8 @@ def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
-def : SchedAlias<WriteV, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVd, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVq, A57Write_3cyc_1V>;
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
@@ -116,6 +117,7 @@ def : ReadAdvance<ReadIM, 0>;
def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadST, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index b6741d418ef0..1d25a6c00f95 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -21,7 +21,8 @@ def A64FXModel : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures =
- [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth];
+ [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
+ HasSVE2orStreamingSVE];
let FullInstRWOverlapCheck = 0;
}
@@ -760,6 +761,7 @@ def : ReadAdvance<ReadIMA, 0>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
@@ -1625,7 +1627,11 @@ def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [A64FXGI03]> {
+def : WriteRes<WriteVd, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteVq, [A64FXGI03]> {
let Latency = 4;
let ResourceCycles = [1];
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 11df304a974c..9fbb46919427 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -258,6 +258,7 @@ def CyReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
+def : ReadAdvance<ReadST, 0>;
//---
// 7.8.9,7.8.11. Load/Store, paired
@@ -303,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
// 7.9 Vector Unit Instructions
// Simple vector operations take 2 cycles.
-def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}
// Define some longer latency vector op types for Cyclone.
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
@@ -334,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
// COPY is handled above in the WriteMov Variant.
def WriteVMov : SchedWriteVariant<[
SchedVar<WriteVMovPred, [WriteX]>,
- SchedVar<NoSchedPred, [WriteV]>]>;
+ SchedVar<NoSchedPred, [WriteVq]>]>;
def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
// FMOVSr,FMOVDr are WriteF.
@@ -354,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
// INS V[x],R
-def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
// SMOV,UMOV R,V[x]
@@ -570,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
//---
// FCVT lengthen f16/s32
-def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
// FCVT,FCVTN,FCVTXN
// SCVTF,UCVTF V,V
@@ -680,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
def : InstRW<[WriteVLDShuffle, WriteAdr],
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
(instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
(instregex "LD2Twov(8b|4h|2s)_POST$")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
(instregex "LD2i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
(instregex "LD2i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
(instregex "LD2i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
(instregex "LD2i64_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
(instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
(instregex "LD3Threev(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
(instregex "LD3i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
(instregex "LD3i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
(instregex "LD3i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
(instregex "LD3i64_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
(instrs LD3Rv1d,LD3Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
(instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD4Fourv(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle],
@@ -743,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle],
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
(instregex "LD4i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
(instregex "LD4i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
(instrs LD4i64_POST)>;
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4Rv1d,LD4Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
//---
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 6a33258be02c..14df8236504b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -254,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
let NumMicroOps = 1; }
// ASIMD FP instructions.
-def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }
// Other miscellaneous instructions.
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -277,6 +278,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// Finer scheduling model.
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index db066a19b0b6..8f740a9a0d35 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -558,7 +558,8 @@ def : SchedAlias<WriteVLD, M4WriteL5>;
def : SchedAlias<WriteVST, M4WriteVST1>;
// ASIMD FP instructions.
-def : SchedAlias<WriteV, M4WriteNALU1>;
+def : SchedAlias<WriteVd, M4WriteNALU1>;
+def : SchedAlias<WriteVq, M4WriteNALU1>;
// Other miscellaneous instructions.
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -581,6 +582,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// Finer scheduling model.
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index 0429b6ab2ee2..93e1b66bea03 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -594,7 +594,8 @@ def : SchedAlias<WriteVLD, M5WriteL6>;
def : SchedAlias<WriteVST, M5WriteVST1>;
// ASIMD FP instructions.
-def : SchedAlias<WriteV, M5WriteNALU1>;
+def : SchedAlias<WriteVd, M5WriteNALU1>;
+def : SchedAlias<WriteVq, M5WriteNALU1>;
// Other miscellaneous instructions.
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
@@ -616,6 +617,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// Finer scheduling model.
diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
index 8bb95e442249..7c9b0afdd169 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
-def : WriteRes<WriteV, []> { let Unsupported = 1; }
+def : WriteRes<WriteVd, []> { let Unsupported = 1; }
+def : WriteRes<WriteVq, []> { let Unsupported = 1; }
def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
def : WriteRes<WriteVST, []> { let Unsupported = 1; }
def : WriteRes<WriteSys, []> { let Unsupported = 1; }
@@ -111,6 +112,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
// Detailed Refinements
// -----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
index 45964e1ed6de..cc568a2f2f17 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@@ -95,7 +95,8 @@ def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
{ let Latency = 6; let NumMicroOps = 2; }
def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVd, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVq, [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }
@@ -117,6 +118,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index 438371c1b6a8..77fca22a5f55 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -90,7 +90,8 @@ def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
// FP Div, Sqrt
def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
-def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; }
+def : WriteRes<WriteVd, [TSV110UnitF]> { let Latency = 4; }
+def : WriteRes<WriteVq, [TSV110UnitF]> { let Latency = 4; }
def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
@@ -113,6 +114,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
def : InstRW<[WriteI], (instrs COPY)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
index 125eb284cfd1..ff34c0ce9a0c 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
@@ -192,6 +193,7 @@ def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
def : ReadAdvance<ReadExtrHi, 1>;
def : ReadAdvance<ReadAdrBase, 2>;
def : ReadAdvance<ReadVLD, 2>;
+def : ReadAdvance<ReadST, 2>;
// FIXME: This needs more targeted benchmarking.
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 8d8675b7ac6f..e4cae97b5524 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -362,6 +362,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -1249,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> {
+def : WriteRes<WriteVd, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+def : WriteRes<WriteVq, [THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 4;
let ResourceCycles = [4];
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
index 00838cc4b9bd..08be2b3a55b3 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@@ -621,6 +621,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -1356,7 +1357,12 @@ def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX3T110FP0123]> {
+def : WriteRes<WriteVd, [THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+def : WriteRes<WriteVq, [THX3T110FP0123]> {
let Latency = 5;
let NumMicroOps = 4;
let ResourceCycles = [4];
diff --git a/llvm/lib/Target/AArch64/AArch64Schedule.td b/llvm/lib/Target/AArch64/AArch64Schedule.td
index 49c0c1782236..b8572c9b4572 100644
--- a/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -47,6 +47,7 @@ def WriteAdr : SchedWrite; // Address pre/post increment.
def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadST : SchedRead; // Read the stored value.
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
// Serialized two-level address load.
@@ -76,7 +77,8 @@ def WriteFImm : SchedWrite; // Floating-point immediate.
def WriteFMul : SchedWrite; // Floating-point multiply.
def WriteFDiv : SchedWrite; // Floating-point division.
-def WriteV : SchedWrite; // Vector ops.
+def WriteVd : SchedWrite; // 64bit Vector D ops.
+def WriteVq : SchedWrite; // 128bit Vector Q ops.
def WriteVLD : SchedWrite; // Vector loads.
def WriteVST : SchedWrite; // Vector stores.
@@ -86,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
def ReadVLD : SchedRead;
// Sequential vector load and shuffle.
-def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteVq]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;
// Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
+def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 3eb4c04570de..d2d84b2a3f6d 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -24,8 +24,10 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- const char *bzeroName = (V && V->isNullValue())
- ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) : nullptr;
+ const char *bzeroName =
+ (V && V->isZero())
+ ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
+ : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) {
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index f37fedd50378..5cec4cb66339 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -1,9 +1,8 @@
//===- AArch64StackTagging.cpp - Stack tagging in IR --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -652,7 +651,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); };
if (!DT || !PDT ||
- !forAllReachableExits(*DT, *PDT, Start, End, RetVec, TagEnd))
+ !forAllReachableExits(*DT, *PDT, Start, Info.LifetimeEnd, RetVec,
+ TagEnd))
End->eraseFromParent();
} else {
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 076ed9b13c99..d2488f61eb4b 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -1,9 +1,8 @@
//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -177,20 +176,19 @@ bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
}
void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
- for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
- UI != E;) {
- MachineInstr *UseI = &*(UI++);
- if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
+ for (MachineInstr &UseI :
+ llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
+ if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
// FI operand is always the one before the immediate offset.
- unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
- if (UseI->getOperand(OpIdx).isReg() &&
- UseI->getOperand(OpIdx).getReg() == TaggedReg) {
- UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
- UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
+ unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
+ if (UseI.getOperand(OpIdx).isReg() &&
+ UseI.getOperand(OpIdx).getReg() == TaggedReg) {
+ UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
+ UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
}
- } else if (UseI->isCopy() &&
- Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
- uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
+ } else if (UseI.isCopy() &&
+ Register::isVirtualRegister(UseI.getOperand(0).getReg())) {
+ uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
}
}
}
@@ -277,8 +275,7 @@ Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
WorkList.push_back(RetagReg);
while (!WorkList.empty()) {
- Register UseReg = WorkList.back();
- WorkList.pop_back();
+ Register UseReg = WorkList.pop_back_val();
for (auto &UseI : MRI->use_instructions(UseReg)) {
unsigned Opcode = UseI.getOpcode();
if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index a94856ef4fba..64f13eab0413 100644
--- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -119,7 +119,7 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
}
bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
+ if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize())
return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index b22eb3b154f5..d782d6352cbe 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -50,15 +50,17 @@ static cl::opt<bool>
static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
-AArch64Subtarget &
-AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
- StringRef CPUString) {
+AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
+ StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
// Determine default and user-specified characteristics
if (CPUString.empty())
CPUString = "generic";
- ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
+ if (TuneCPUString.empty())
+ TuneCPUString = CPUString;
+
+ ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
initializeProperties();
return *this;
@@ -98,6 +100,12 @@ void AArch64Subtarget::initializeProperties() {
case CortexX1:
PrefFunctionLogAlignment = 4;
break;
+ case CortexA510:
+ case CortexA710:
+ case CortexX2:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ break;
case A64FX:
CacheLineSize = 256;
PrefFunctionLogAlignment = 3;
@@ -106,6 +114,7 @@ void AArch64Subtarget::initializeProperties() {
PrefetchDistance = 128;
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 4;
+ VScaleForTuning = 4;
break;
case AppleA7:
case AppleA10:
@@ -147,9 +156,20 @@ void AArch64Subtarget::initializeProperties() {
PrefFunctionLogAlignment = 3;
break;
case NeoverseN1:
+ PrefFunctionLogAlignment = 4;
+ break;
case NeoverseN2:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ break;
case NeoverseV1:
PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 2;
+ break;
+ case Neoverse512TVB:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ MaxInterleaveFactor = 4;
break;
case Saphira:
MaxInterleaveFactor = 4;
@@ -197,18 +217,20 @@ void AArch64Subtarget::initializeProperties() {
}
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
+ const std::string &TuneCPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride)
- : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)),
- TSInfo(), TLInfo(TM, *this) {
+ FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(),
+ TLInfo(TM, *this) {
if (AArch64::isX18ReservedByDefault(TT))
ReserveXRegister.set(18);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e0ef8df6fca9..19db774ccd7b 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -50,6 +50,7 @@ public:
CortexA35,
CortexA53,
CortexA55,
+ CortexA510,
CortexA57,
CortexA65,
CortexA72,
@@ -59,14 +60,17 @@ public:
CortexA77,
CortexA78,
CortexA78C,
+ CortexA710,
CortexR82,
CortexX1,
+ CortexX2,
ExynosM3,
Falkor,
Kryo,
NeoverseE1,
NeoverseN1,
NeoverseN2,
+ Neoverse512TVB,
NeoverseV1,
Saphira,
ThunderX2T99,
@@ -82,6 +86,7 @@ protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
+ bool HasV8_0aOps = false;
bool HasV8_1aOps = false;
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
@@ -89,16 +94,21 @@ protected:
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8_7aOps = false;
-
+ bool HasV9_0aOps = false;
+ bool HasV9_1aOps = false;
+ bool HasV9_2aOps = false;
bool HasV8_0rOps = false;
- bool HasCONTEXTIDREL2 = false;
+ bool HasCONTEXTIDREL2 = false;
+ bool HasEL2VMSA = false;
+ bool HasEL3 = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
bool HasDotProd = false;
bool HasCRC = false;
bool HasLSE = false;
+ bool HasLSE2 = false;
bool HasRAS = false;
bool HasRDM = false;
bool HasPerfMon = false;
@@ -119,6 +129,7 @@ protected:
// SVE extensions
bool HasSVE = false;
bool UseExperimentalZeroingPseudos = false;
+ bool UseScalarIncVL = false;
// Armv8.2 Crypto extensions
bool HasSM4 = false;
@@ -139,7 +150,6 @@ protected:
bool HasTRACEV8_4 = false;
bool HasAM = false;
bool HasSEL2 = false;
- bool HasPMU = false;
bool HasTLB_RMI = false;
bool HasFlagM = false;
bool HasRCPC_IMMO = false;
@@ -190,6 +200,10 @@ protected:
bool HasSME = false;
bool HasSMEF64 = false;
bool HasSMEI64 = false;
+ bool HasStreamingSVE = false;
+
+ // AppleA7 system register.
+ bool HasAppleA7SysReg = false;
// Future architecture extensions.
bool HasETE = false;
@@ -271,6 +285,7 @@ protected:
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
+ unsigned VScaleForTuning = 2;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -292,7 +307,8 @@ private:
/// passed in feature string so that we can use initializer lists for
/// subtarget initialization.
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
- StringRef CPUString);
+ StringRef CPUString,
+ StringRef TuneCPUString);
/// Initialize properties based on the selected processor family.
void initializeProperties();
@@ -301,8 +317,8 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM,
- bool LittleEndian,
+ const std::string &TuneCPU, const std::string &FS,
+ const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0);
@@ -338,11 +354,15 @@ public:
return ARMProcFamily;
}
+ bool hasV8_0aOps() const { return HasV8_0aOps; }
bool hasV8_1aOps() const { return HasV8_1aOps; }
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
+ bool hasV9_0aOps() const { return HasV9_0aOps; }
+ bool hasV9_1aOps() const { return HasV9_1aOps; }
+ bool hasV9_2aOps() const { return HasV9_2aOps; }
bool hasV8_0rOps() const { return HasV8_0rOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -375,6 +395,7 @@ public:
bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
+ bool hasLSE2() const { return HasLSE2; }
bool hasRAS() const { return HasRAS; }
bool hasRDM() const { return HasRDM; }
bool hasSM4() const { return HasSM4; }
@@ -449,6 +470,8 @@ public:
return UseExperimentalZeroingPseudos;
}
+ bool useScalarIncVL() const { return UseScalarIncVL; }
+
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
@@ -494,6 +517,7 @@ public:
bool hasSME() const { return HasSME; }
bool hasSMEF64() const { return HasSMEF64; }
bool hasSMEI64() const { return HasSMEI64; }
+ bool hasStreamingSVE() const { return HasStreamingSVE; }
bool isLittleEndian() const { return IsLittle; }
@@ -541,10 +565,11 @@ public:
bool hasHCX() const { return HasHCX; }
bool hasLS64() const { return HasLS64; }
bool hasSEL2() const { return HasSEL2; }
- bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
bool hasFlagM() const { return HasFlagM; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+ bool hasEL2VMSA() const { return HasEL2VMSA; }
+ bool hasEL3() const { return HasEL3; }
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
@@ -598,6 +623,31 @@ public:
}
}
+ /// Return whether FrameLowering should always set the "extended frame
+ /// present" bit in FP, or set it based on a symbol in the runtime.
+ bool swiftAsyncContextIsDynamicallySet() const {
+ // Older OS versions (particularly system unwinders) are confused by the
+ // Swift extended frame, so when building code that might be run on them we
+ // must dynamically query the concurrency library to determine whether
+ // extended frames should be flagged as present.
+ const Triple &TT = getTargetTriple();
+
+ unsigned Major, Minor, Micro;
+ TT.getOSVersion(Major, Minor, Micro);
+ switch(TT.getOS()) {
+ default:
+ return false;
+ case Triple::IOS:
+ case Triple::TvOS:
+ return Major < 15;
+ case Triple::WatchOS:
+ return Major < 8;
+ case Triple::MacOSX:
+ case Triple::Darwin:
+ return Major < 12;
+ }
+ }
+
void mirFileLoaded(MachineFunction &MF) const override;
// Return the known range for the bit length of SVE data registers. A value
@@ -614,6 +664,8 @@ public:
}
bool useSVEForFixedLengthVectors() const;
+
+ unsigned getVScaleForTuning() const { return VScaleForTuning; }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index f400916c97c9..f9fe804865a5 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -586,6 +586,7 @@ class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
let EnumValueField = "Encoding";
string Name = name;
+ string AltName = name;
bits<16> Encoding;
let Encoding{15-14} = op0;
let Encoding{13-11} = op1;
@@ -912,13 +913,19 @@ def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>;
def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>;
def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>;
def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>;
-def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>;
def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>;
+
+let Requires = [{ {AArch64::FeatureEL2VMSA} }] in {
+def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
+ let AltName = "VSCTLR_EL2";
+}
+def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
+}
+
def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>;
def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>;
def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>;
def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>;
-def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>;
def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>;
def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>;
@@ -970,6 +977,7 @@ def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>;
def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>;
def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>;
def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>;
+def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>;
def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>;
def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>;
@@ -1292,6 +1300,57 @@ def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>;
def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>;
def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
+// v8r system registers
+let Requires = [{ {AArch64::HasV8_0rOps} }] in {
+//Virtualization System Control Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"VSCTLR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
+ let AltName = "TTBR0_EL2";
+}
+
+//MPU Type Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"MPUIR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b100>;
+def : RWSysReg<"MPUIR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b100>;
+
+//Protection Region Enable Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRENR_EL1", 0b11, 0b000, 0b0110, 0b0001, 0b001>;
+def : RWSysReg<"PRENR_EL2", 0b11, 0b100, 0b0110, 0b0001, 0b001>;
+
+//Protection Region Selection Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRSELR_EL1", 0b11, 0b000, 0b0110, 0b0010, 0b001>;
+def : RWSysReg<"PRSELR_EL2", 0b11, 0b100, 0b0110, 0b0010, 0b001>;
+
+//Protection Region Base Address Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRBAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b000>;
+def : RWSysReg<"PRBAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b000>;
+
+//Protection Region Limit Address Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRLAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b001>;
+def : RWSysReg<"PRLAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b001>;
+
+foreach n = 0-15 in {
+foreach x = 1-2 in {
+//Direct acces to Protection Region Base Address Register for n th MPU region
+ def : RWSysReg<!strconcat("PRBAR"#n, "_EL"#x),
+ 0b11, 0b000, 0b0110, 0b1000, 0b000>{
+ let Encoding{5-2} = n;
+ let Encoding{13} = !add(x,-1);
+ }
+
+ def : RWSysReg<!strconcat("PRLAR"#n, "_EL"#x),
+ 0b11, 0b000, 0b0110, 0b1000, 0b001>{
+ let Encoding{5-2} = n;
+ let Encoding{13} = !add(x,-1);
+ }
+} //foreach x = 1-2 in
+} //foreach n = 0-15 in
+} //let Requires = [{ {AArch64::HasV8_0rOps} }] in
+
// v8.1a "Privileged Access Never" extension-specific system registers
let Requires = [{ {AArch64::FeaturePAN} }] in
def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
@@ -1395,7 +1454,9 @@ let Requires = [{ {AArch64::FeatureSEL2} }] in {
// v8.4a "Virtualization secure second stage translation" registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
-def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>;
+def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000> {
+ let Requires = [{ {AArch64::HasV8_0aOps} }];
+}
// v8.4a "Virtualization timer" registers
// Op0 Op1 CRn CRm Op2
@@ -1411,12 +1472,6 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
} // FeatureSEL2
-// v8.4a PMU registers
-// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeaturePMU} }] in {
-def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
-} // FeaturePMU
-
// v8.4a RAS registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
@@ -1640,7 +1695,7 @@ def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::ProcAppleA7} }] in
+let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
// Scalable Matrix Extension (SME)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 99bcb2f4649a..ce26c62af61a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
@@ -36,10 +37,10 @@
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/CFGuard.h"
@@ -175,6 +176,16 @@ static cl::opt<unsigned> SVEVectorBitsMinOpt(
extern cl::opt<bool> EnableHomogeneousPrologEpilog;
+static cl::opt<bool> EnableGISelLoadStoreOptPreLegal(
+ "aarch64-enable-gisel-ldst-prelegal",
+ cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
+ "aarch64-enable-gisel-ldst-postlegal",
+ cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
+ cl::init(false), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -195,6 +206,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
+ initializeAArch64MIPeepholeOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
@@ -354,10 +366,13 @@ AArch64TargetMachine::~AArch64TargetMachine() = default;
const AArch64Subtarget *
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
std::string FS =
FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
@@ -398,6 +413,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Key += "SVEMax";
Key += std::to_string(MaxSVEVectorSize);
Key += CPU;
+ Key += TuneCPU;
Key += FS;
auto &I = SubtargetMap[Key];
@@ -406,8 +422,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
- isLittle, MinSVEVectorSize,
+ I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, TuneCPU, FS,
+ *this, isLittle, MinSVEVectorSize,
MaxSVEVectorSize);
}
return I.get();
@@ -471,6 +487,7 @@ public:
void addIRPasses() override;
bool addPreISel() override;
+ void addCodeGenPrepare() override;
bool addInstSelector() override;
bool addIRTranslator() override;
void addPreLegalizeMachineIR() override;
@@ -479,6 +496,7 @@ public:
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
+ void addMachineSSAOptimization() override;
bool addILPOpts() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
@@ -597,6 +615,12 @@ bool AArch64PassConfig::addPreISel() {
return false;
}
+void AArch64PassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createTypePromotionPass());
+ TargetPassConfig::addCodeGenPrepare();
+}
+
bool AArch64PassConfig::addInstSelector() {
addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
@@ -617,8 +641,11 @@ bool AArch64PassConfig::addIRTranslator() {
void AArch64PassConfig::addPreLegalizeMachineIR() {
if (getOptLevel() == CodeGenOpt::None)
addPass(createAArch64O0PreLegalizerCombiner());
- else
+ else {
addPass(createAArch64PreLegalizerCombiner());
+ if (EnableGISelLoadStoreOptPreLegal)
+ addPass(new LoadStoreOpt());
+ }
}
bool AArch64PassConfig::addLegalizeMachineIR() {
@@ -628,8 +655,11 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
void AArch64PassConfig::addPreRegBankSelect() {
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
- if (!IsOptNone)
+ if (!IsOptNone) {
addPass(createAArch64PostLegalizerCombiner(IsOptNone));
+ if (EnableGISelLoadStoreOptPostLegal)
+ addPass(new LoadStoreOpt());
+ }
addPass(createAArch64PostLegalizerLowering());
}
@@ -649,6 +679,14 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
return false;
}
+void AArch64PassConfig::addMachineSSAOptimization() {
+ // Run default MachineSSAOptimization first.
+ TargetPassConfig::addMachineSSAOptimization();
+
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64MIPeepholeOptPass());
+}
+
bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 01236aa6b527..63d6fa5bbb26 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -9,11 +9,13 @@
#include "AArch64TargetTransformInfo.h"
#include "AArch64ExpandImm.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
@@ -220,19 +222,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
case Intrinsic::umin:
- case Intrinsic::umax: {
- auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
- // umin(x,y) -> sub(x,usubsat(x,y))
- // umax(x,y) -> add(x,usubsat(y,x))
- if (LT.second == MVT::v2i64)
- return LT.first * 2;
- LLVM_FALLTHROUGH;
- }
+ case Intrinsic::umax:
case Intrinsic::smin:
case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
MVT::v8i16, MVT::v2i32, MVT::v4i32};
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // v2i64 types get converted to cmp+bif hence the cost of 2
+ if (LT.second == MVT::v2i64)
+ return LT.first * 2;
if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
return LT.first;
break;
@@ -291,13 +289,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
const auto LegalisationCost = TLI->getTypeLegalizationCost(DL, RetTy);
const auto *Entry =
CostTableLookup(BitreverseTbl, ICA.getID(), LegalisationCost.second);
- // Cost Model is using the legal type(i32) that i8 and i16 will be converted
- // to +1 so that we match the actual lowering cost
- if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||
- TLI->getValueType(DL, RetTy, true) == MVT::i16)
- return LegalisationCost.first * Entry->Cost + 1;
- if (Entry)
+ if (Entry) {
+ // Cost Model is using the legal type(i32) that i8 and i16 will be
+ // converted to +1 so that we match the actual lowering cost
+ if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||
+ TLI->getValueType(DL, RetTy, true) == MVT::i16)
+ return LegalisationCost.first * Entry->Cost + 1;
+
return LegalisationCost.first * Entry->Cost;
+ }
break;
}
case Intrinsic::ctpop: {
@@ -440,6 +440,18 @@ static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
return IC.replaceInstUsesWith(II, Insert);
}
+static Optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // Replace DupX with a regular IR splat.
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ auto *RetTy = cast<ScalableVectorType>(II.getType());
+ Value *Splat =
+ Builder.CreateVectorSplat(RetTy->getElementCount(), II.getArgOperand(0));
+ Splat->takeName(&II);
+ return IC.replaceInstUsesWith(II, Splat);
+}
+
static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
@@ -457,12 +469,9 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
return None;
// Check that we have a compare of zero..
- auto *DupX = dyn_cast<IntrinsicInst>(II.getArgOperand(2));
- if (!DupX || DupX->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
- return None;
-
- auto *DupXArg = dyn_cast<ConstantInt>(DupX->getArgOperand(0));
- if (!DupXArg || !DupXArg->isZero())
+ auto *SplatValue =
+ dyn_cast_or_null<ConstantInt>(getSplatValue(II.getArgOperand(2)));
+ if (!SplatValue || !SplatValue->isZero())
return None;
// ..against a dupq
@@ -547,14 +556,34 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
Value *Pg = II.getArgOperand(0);
Value *Vec = II.getArgOperand(1);
- bool IsAfter = II.getIntrinsicID() == Intrinsic::aarch64_sve_lasta;
+ auto IntrinsicID = II.getIntrinsicID();
+ bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
// lastX(splat(X)) --> X
if (auto *SplatVal = getSplatValue(Vec))
return IC.replaceInstUsesWith(II, SplatVal);
+ // If x and/or y is a splat value then:
+ // lastX (binop (x, y)) --> binop(lastX(x), lastX(y))
+ Value *LHS, *RHS;
+ if (match(Vec, m_OneUse(m_BinOp(m_Value(LHS), m_Value(RHS))))) {
+ if (isSplatValue(LHS) || isSplatValue(RHS)) {
+ auto *OldBinOp = cast<BinaryOperator>(Vec);
+ auto OpC = OldBinOp->getOpcode();
+ auto *NewLHS =
+ Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS});
+ auto *NewRHS =
+ Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS});
+ auto *NewBinOp = BinaryOperator::CreateWithCopiedFlags(
+ OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
+ return IC.replaceInstUsesWith(II, NewBinOp);
+ }
+ }
+
auto *C = dyn_cast<Constant>(Pg);
if (IsAfter && C && C->isNullValue()) {
// The intrinsic is extracting lane 0 so use an extract instead.
@@ -576,39 +605,11 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
// Can the intrinsic's predicate be converted to a known constant index?
- unsigned Idx;
- switch (PTruePattern) {
- default:
+ unsigned MinNumElts = getNumElementsFromSVEPredPattern(PTruePattern);
+ if (!MinNumElts)
return None;
- case AArch64SVEPredPattern::vl1:
- Idx = 0;
- break;
- case AArch64SVEPredPattern::vl2:
- Idx = 1;
- break;
- case AArch64SVEPredPattern::vl3:
- Idx = 2;
- break;
- case AArch64SVEPredPattern::vl4:
- Idx = 3;
- break;
- case AArch64SVEPredPattern::vl5:
- Idx = 4;
- break;
- case AArch64SVEPredPattern::vl6:
- Idx = 5;
- break;
- case AArch64SVEPredPattern::vl7:
- Idx = 6;
- break;
- case AArch64SVEPredPattern::vl8:
- Idx = 7;
- break;
- case AArch64SVEPredPattern::vl16:
- Idx = 15;
- break;
- }
+ unsigned Idx = MinNumElts - 1;
// Increment the index if extracting the element after the last active
// predicate element.
if (IsAfter)
@@ -661,26 +662,9 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
return IC.replaceInstUsesWith(II, VScale);
}
- unsigned MinNumElts = 0;
- switch (Pattern) {
- default:
- return None;
- case AArch64SVEPredPattern::vl1:
- case AArch64SVEPredPattern::vl2:
- case AArch64SVEPredPattern::vl3:
- case AArch64SVEPredPattern::vl4:
- case AArch64SVEPredPattern::vl5:
- case AArch64SVEPredPattern::vl6:
- case AArch64SVEPredPattern::vl7:
- case AArch64SVEPredPattern::vl8:
- MinNumElts = Pattern;
- break;
- case AArch64SVEPredPattern::vl16:
- MinNumElts = 16;
- break;
- }
+ unsigned MinNumElts = getNumElementsFromSVEPredPattern(Pattern);
- return NumElts >= MinNumElts
+ return MinNumElts && NumElts >= MinNumElts
? Optional<Instruction *>(IC.replaceInstUsesWith(
II, ConstantInt::get(II.getType(), MinNumElts)))
: None;
@@ -711,6 +695,116 @@ static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
return None;
}
+static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // fold (fadd p a (fmul p b c)) -> (fma p a b c)
+ Value *P = II.getOperand(0);
+ Value *A = II.getOperand(1);
+ auto FMul = II.getOperand(2);
+ Value *B, *C;
+ if (!match(FMul, m_Intrinsic<Intrinsic::aarch64_sve_fmul>(
+ m_Specific(P), m_Value(B), m_Value(C))))
+ return None;
+
+ if (!FMul->hasOneUse())
+ return None;
+
+ llvm::FastMathFlags FAddFlags = II.getFastMathFlags();
+ // Stop the combine when the flags on the inputs differ in case dropping flags
+ // would lead to us missing out on more beneficial optimizations.
+ if (FAddFlags != cast<CallInst>(FMul)->getFastMathFlags())
+ return None;
+ if (!FAddFlags.allowContract())
+ return None;
+
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ auto FMLA = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmla,
+ {II.getType()}, {P, A, B, C}, &II);
+ FMLA->setFastMathFlags(FAddFlags);
+ return IC.replaceInstUsesWith(II, FMLA);
+}
+
+static Optional<Instruction *>
+instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Value *Pred = II.getOperand(0);
+ Value *PtrOp = II.getOperand(1);
+ Type *VecTy = II.getType();
+ Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo());
+
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()))) {
+ LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
+ return IC.replaceInstUsesWith(II, Load);
+ }
+
+ CallInst *MaskedLoad =
+ Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL),
+ Pred, ConstantAggregateZero::get(VecTy));
+ return IC.replaceInstUsesWith(II, MaskedLoad);
+}
+
+static Optional<Instruction *>
+instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Value *VecOp = II.getOperand(0);
+ Value *Pred = II.getOperand(1);
+ Value *PtrOp = II.getOperand(2);
+ Value *VecPtr =
+ Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
+
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()))) {
+ Builder.CreateStore(VecOp, VecPtr);
+ return IC.eraseInstFromFunction(II);
+ }
+
+ Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL),
+ Pred);
+ return IC.eraseInstFromFunction(II);
+}
+
+static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::aarch64_sve_fmul:
+ return Instruction::BinaryOps::FMul;
+ case Intrinsic::aarch64_sve_fadd:
+ return Instruction::BinaryOps::FAdd;
+ case Intrinsic::aarch64_sve_fsub:
+ return Instruction::BinaryOps::FSub;
+ default:
+ return Instruction::BinaryOpsEnd;
+ }
+}
+
+static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
+ IntrinsicInst &II) {
+ auto *OpPredicate = II.getOperand(0);
+ auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
+ if (BinOpCode == Instruction::BinaryOpsEnd ||
+ !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>())))
+ return None;
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Builder.setFastMathFlags(II.getFastMathFlags());
+ auto BinOp =
+ Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
+ return IC.replaceInstUsesWith(II, BinOp);
+}
+
+static Optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC,
+ IntrinsicInst &II) {
+ if (auto FMLA = instCombineSVEVectorFMLA(IC, II))
+ return FMLA;
+ return instCombineSVEVectorBinOp(IC, II);
+}
+
static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpPredicate = II.getOperand(0);
@@ -720,14 +814,11 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
- // Return true if a given instruction is an aarch64_sve_dup_x intrinsic call
- // with a unit splat value, false otherwise.
- auto IsUnitDupX = [](auto *I) {
- auto *IntrI = dyn_cast<IntrinsicInst>(I);
- if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
+ // Return true if a given instruction is a unit splat value, false otherwise.
+ auto IsUnitSplat = [](auto *I) {
+ auto *SplatValue = getSplatValue(I);
+ if (!SplatValue)
return false;
-
- auto *SplatValue = IntrI->getOperand(0);
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
};
@@ -744,10 +835,10 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
// The OpMultiplier variable should always point to the dup (if any), so
// swap if necessary.
- if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand))
+ if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand))
std::swap(OpMultiplier, OpMultiplicand);
- if (IsUnitDupX(OpMultiplier)) {
+ if (IsUnitSplat(OpMultiplier)) {
// [f]mul pg (dupx 1) %n => %n
OpMultiplicand->takeName(&II);
return IC.replaceInstUsesWith(II, OpMultiplicand);
@@ -763,22 +854,40 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
}
}
- return None;
+ return instCombineSVEVectorBinOp(IC, II);
}
+static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Value *UnpackArg = II.getArgOperand(0);
+ auto *RetTy = cast<ScalableVectorType>(II.getType());
+ bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
+ II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
+
+ // Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X))
+ // Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
+ if (auto *ScalarArg = getSplatValue(UnpackArg)) {
+ ScalarArg =
+ Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
+ Value *NewVal =
+ Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
+ NewVal->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewVal);
+ }
+
+ return None;
+}
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpVal = II.getOperand(0);
auto *OpIndices = II.getOperand(1);
VectorType *VTy = cast<VectorType>(II.getType());
- // Check whether OpIndices is an aarch64_sve_dup_x intrinsic call with
- // constant splat value < minimal element count of result.
- auto *DupXIntrI = dyn_cast<IntrinsicInst>(OpIndices);
- if (!DupXIntrI || DupXIntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
- return None;
-
- auto *SplatValue = dyn_cast<ConstantInt>(DupXIntrI->getOperand(0));
+ // Check whether OpIndices is a constant splat value < minimal element count
+ // of result.
+ auto *SplatValue = dyn_cast_or_null<ConstantInt>(getSplatValue(OpIndices));
if (!SplatValue ||
SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
return None;
@@ -795,6 +904,115 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
return IC.replaceInstUsesWith(II, VectorSplat);
}
+static Optional<Instruction *> instCombineSVETupleGet(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // Try to remove sequences of tuple get/set.
+ Value *SetTuple, *SetIndex, *SetValue;
+ auto *GetTuple = II.getArgOperand(0);
+ auto *GetIndex = II.getArgOperand(1);
+ // Check that we have tuple_get(GetTuple, GetIndex) where GetTuple is a
+ // call to tuple_set i.e. tuple_set(SetTuple, SetIndex, SetValue).
+ // Make sure that the types of the current intrinsic and SetValue match
+ // in order to safely remove the sequence.
+ if (!match(GetTuple,
+ m_Intrinsic<Intrinsic::aarch64_sve_tuple_set>(
+ m_Value(SetTuple), m_Value(SetIndex), m_Value(SetValue))) ||
+ SetValue->getType() != II.getType())
+ return None;
+ // Case where we get the same index right after setting it.
+ // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) --> SetValue
+ if (GetIndex == SetIndex)
+ return IC.replaceInstUsesWith(II, SetValue);
+ // If we are getting a different index than what was set in the tuple_set
+ // intrinsic. We can just set the input tuple to the one up in the chain.
+ // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex)
+ // --> tuple_get(SetTuple, GetIndex)
+ return IC.replaceOperand(II, 0, SetTuple);
+}
+
+static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // zip1(uzp1(A, B), uzp2(A, B)) --> A
+ // zip2(uzp1(A, B), uzp2(A, B)) --> B
+ Value *A, *B;
+ if (match(II.getArgOperand(0),
+ m_Intrinsic<Intrinsic::aarch64_sve_uzp1>(m_Value(A), m_Value(B))) &&
+ match(II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
+ m_Specific(A), m_Specific(B))))
+ return IC.replaceInstUsesWith(
+ II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
+
+ return None;
+}
+
+static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
+ IntrinsicInst &II) {
+ Value *Mask = II.getOperand(0);
+ Value *BasePtr = II.getOperand(1);
+ Value *Index = II.getOperand(2);
+ Type *Ty = II.getType();
+ Type *BasePtrTy = BasePtr->getType();
+ Value *PassThru = ConstantAggregateZero::get(Ty);
+
+ // Contiguous gather => masked load.
+ // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
+ // => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
+ Value *IndexBase;
+ if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
+ m_Value(IndexBase), m_SpecificInt(1)))) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Align Alignment =
+ BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
+
+ Type *VecPtrTy = PointerType::getUnqual(Ty);
+ Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
+ IndexBase);
+ Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
+ CallInst *MaskedLoad =
+ Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
+ MaskedLoad->takeName(&II);
+ return IC.replaceInstUsesWith(II, MaskedLoad);
+ }
+
+ return None;
+}
+
+static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
+ IntrinsicInst &II) {
+ Value *Val = II.getOperand(0);
+ Value *Mask = II.getOperand(1);
+ Value *BasePtr = II.getOperand(2);
+ Value *Index = II.getOperand(3);
+ Type *Ty = Val->getType();
+ Type *BasePtrTy = BasePtr->getType();
+
+ // Contiguous scatter => masked store.
+ // (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
+ // => (masked.store Value (gep BasePtr IndexBase) Align Mask)
+ Value *IndexBase;
+ if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
+ m_Value(IndexBase), m_SpecificInt(1)))) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Align Alignment =
+ BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
+
+ Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
+ IndexBase);
+ Type *VecPtrTy = PointerType::getUnqual(Ty);
+ Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
+
+ (void)Builder.CreateMaskedStore(Val, Ptr, Alignment, Mask);
+
+ return IC.eraseInstFromFunction(II);
+ }
+
+ return None;
+}
+
Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -806,6 +1024,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
return instCombineSVEDup(IC, II);
+ case Intrinsic::aarch64_sve_dup_x:
+ return instCombineSVEDupX(IC, II);
case Intrinsic::aarch64_sve_cmpne:
case Intrinsic::aarch64_sve_cmpne_wide:
return instCombineSVECmpNE(IC, II);
@@ -829,8 +1049,30 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_mul:
case Intrinsic::aarch64_sve_fmul:
return instCombineSVEVectorMul(IC, II);
+ case Intrinsic::aarch64_sve_fadd:
+ return instCombineSVEVectorFAdd(IC, II);
+ case Intrinsic::aarch64_sve_fsub:
+ return instCombineSVEVectorBinOp(IC, II);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
+ case Intrinsic::aarch64_sve_uunpkhi:
+ case Intrinsic::aarch64_sve_uunpklo:
+ case Intrinsic::aarch64_sve_sunpkhi:
+ case Intrinsic::aarch64_sve_sunpklo:
+ return instCombineSVEUnpack(IC, II);
+ case Intrinsic::aarch64_sve_tuple_get:
+ return instCombineSVETupleGet(IC, II);
+ case Intrinsic::aarch64_sve_zip1:
+ case Intrinsic::aarch64_sve_zip2:
+ return instCombineSVEZip(IC, II);
+ case Intrinsic::aarch64_sve_ld1_gather_index:
+ return instCombineLD1GatherIndex(IC, II);
+ case Intrinsic::aarch64_sve_st1_scatter_index:
+ return instCombineST1ScatterIndex(IC, II);
+ case Intrinsic::aarch64_sve_ld1:
+ return instCombineSVELD1(IC, II, DL);
+ case Intrinsic::aarch64_sve_st1:
+ return instCombineSVEST1(IC, II, DL);
}
return None;
@@ -1393,9 +1635,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
return (Cost + 1) * LT.first;
case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FNEG:
// These nodes are marked as 'custom' just to lower them to SVE.
// We know said lowering will incur no additional cost.
- if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
+ if (!Ty->getScalarType()->isFP128Ty())
return (Cost + 2) * LT.first;
return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
@@ -1525,8 +1771,7 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
-
- if (!isa<ScalableVectorType>(DataTy))
+ if (useNeonVector(DataTy))
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
auto *VT = cast<VectorType>(DataTy);
@@ -1623,9 +1868,10 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
// ldN/stN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
// matched to more than one ldN/stN instruction.
+ bool UseScalable;
if (NumElts % Factor == 0 &&
- TLI->isLegalInterleavedAccessType(SubVecTy, DL))
- return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -1705,9 +1951,12 @@ getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Enable partial unrolling and runtime unrolling.
- BaseT::getUnrollingPreferences(L, SE, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+
+ UP.UpperBound = true;
// For inner loop, it is more likely to be a hot one, and the runtime check
// can be promoted out from LICM pass, so the overhead is less, let's try
@@ -1749,7 +1998,6 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
!ST->getSchedModel().isOutOfOrder()) {
UP.Runtime = true;
UP.Partial = true;
- UP.UpperBound = true;
UP.UnrollRemainder = true;
UP.DefaultUnrollRuntimeCount = 4;
@@ -1775,7 +2023,7 @@ Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
StructType *ST = dyn_cast<StructType>(ExpectedType);
if (!ST)
return nullptr;
- unsigned NumElts = Inst->getNumArgOperands() - 1;
+ unsigned NumElts = Inst->arg_size() - 1;
if (ST->getNumElements() != NumElts)
return nullptr;
for (unsigned i = 0, e = NumElts; i != e; ++i) {
@@ -1816,7 +2064,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
+ Info.PtrVal = Inst->getArgOperand(Inst->arg_size() - 1);
break;
}
@@ -1892,6 +2140,8 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::UMax:
case RecurKind::FMin:
case RecurKind::FMax:
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
return true;
default:
return false;
@@ -1902,23 +2152,23 @@ InstructionCost
AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsUnsigned,
TTI::TargetCostKind CostKind) {
- if (!isa<ScalableVectorType>(Ty))
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+
+ if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
- assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
- "Both vector needs to be scalable");
- std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ assert((isa<ScalableVectorType>(Ty) == isa<ScalableVectorType>(CondTy)) &&
+ "Both vector needs to be equally scalable");
+
InstructionCost LegalizationCost = 0;
if (LT.first > 1) {
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
- unsigned CmpOpcode =
- Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
- LegalizationCost =
- getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- LegalizationCost *= LT.first - 1;
+ unsigned MinMaxOpcode =
+ Ty->isFPOrFPVectorTy()
+ ? Intrinsic::maxnum
+ : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin);
+ IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy});
+ LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1);
}
return LegalizationCost + /*Cost of horizontal reduction*/ 2;
@@ -1954,8 +2204,13 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
if (TTI::requiresOrderedReduction(FMF)) {
- if (!isa<ScalableVectorType>(ValTy))
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+ if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
+ InstructionCost BaseCost =
+ BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+ // Add on extra cost to reflect the extra overhead on some CPUs. We still
+ // end up vectorizing for more computationally intensive loops.
+ return BaseCost + FixedVTy->getNumElements();
+ }
if (Opcode != Instruction::FAdd)
return InstructionCost::getInvalid();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index d55fd5b4f815..d1e8cd204b3a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -125,10 +125,8 @@ public:
return ST->getMinVectorRegisterBitWidth();
}
- Optional<unsigned> getMaxVScale() const {
- if (ST->hasSVE())
- return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
- return BaseT::getMaxVScale();
+ Optional<unsigned> getVScaleForTuning() const {
+ return ST->getVScaleForTuning();
}
/// Try to return an estimate cost factor that can be used as a multiplier
@@ -138,9 +136,8 @@ public:
unsigned getMaxNumElements(ElementCount VF) const {
if (!VF.isScalable())
return VF.getFixedValue();
- Optional<unsigned> MaxNumVScale = getMaxVScale();
- assert(MaxNumVScale && "Expected valid max vscale value");
- return *MaxNumVScale * VF.getKnownMinValue();
+
+ return VF.getKnownMinValue() * ST->getVScaleForTuning();
}
unsigned getMaxInterleaveFactor(unsigned VF);
@@ -180,8 +177,7 @@ public:
InstructionCost getSpliceCost(VectorType *Tp, int Index);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -209,7 +205,8 @@ public:
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -229,7 +226,7 @@ public:
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return true;
- if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+ if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
return true;
@@ -244,8 +241,7 @@ public:
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
return false; // Fall back to scalarization of masked operations.
- return !DataType->getScalarType()->isIntegerTy(1) &&
- isElementTypeLegalForScalableVector(DataType->getScalarType());
+ return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -266,8 +262,7 @@ public:
DataTypeFVTy->getNumElements() < 2))
return false;
- return !DataType->getScalarType()->isIntegerTy(1) &&
- isElementTypeLegalForScalableVector(DataType->getScalarType());
+ return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
@@ -295,10 +290,11 @@ public:
return BaseT::isLegalNTStore(DataType, Alignment);
}
+ bool enableOrderedReductions() const { return true; }
+
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
bool
@@ -316,9 +312,9 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
- InstructionCost getArithmeticReductionCost(
- unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
+ InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f27e9b2ef0f0..6d3aea2721de 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6,13 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "TargetInfo/AArch64TargetInfo.h"
-#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -40,15 +40,15 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cctype>
@@ -1511,7 +1511,7 @@ public:
}
bool isAdrpLabel() const {
- // Validation was handled during parsing, so we just sanity check that
+ // Validation was handled during parsing, so we just verify that
// something didn't go haywire.
if (!isImm())
return false;
@@ -1527,7 +1527,7 @@ public:
}
bool isAdrLabel() const {
- // Validation was handled during parsing, so we just sanity check that
+ // Validation was handled during parsing, so we just verify that
// something didn't go haywire.
if (!isImm())
return false;
@@ -2672,8 +2672,7 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
/// the register is added to the operand list.
OperandMatchResultTy
AArch64AsmParser::tryParseScalarRegister(unsigned &RegNum) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -2683,22 +2682,21 @@ AArch64AsmParser::tryParseScalarRegister(unsigned &RegNum) {
return MatchOperand_NoMatch;
RegNum = Reg;
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
OperandMatchResultTy
AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ if (getTok().isNot(AsmToken::Identifier)) {
Error(S, "Expected cN operand where 0 <= N <= 15");
return MatchOperand_ParseFail;
}
- StringRef Tok = Parser.getTok().getIdentifier();
+ StringRef Tok = getTok().getIdentifier();
if (Tok[0] != 'c' && Tok[0] != 'C') {
Error(S, "Expected cN operand where 0 <= N <= 15");
return MatchOperand_ParseFail;
@@ -2711,7 +2709,7 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
Operands.push_back(
AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext()));
return MatchOperand_Success;
@@ -2721,9 +2719,8 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
template <bool IsSVEPrefetch>
OperandMatchResultTy
AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
auto LookupByName = [](StringRef N) {
if (IsSVEPrefetch) {
@@ -2783,16 +2780,15 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreatePrefetch(
*PRFM, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
OperandMatchResultTy
AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier)) {
TokError("invalid operand for instruction");
return MatchOperand_ParseFail;
@@ -2806,16 +2802,15 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreatePSBHint(
PSB->Encoding, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
/// tryParseBTIHint - Try to parse a BTI operand, mapped to Hint command
OperandMatchResultTy
AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier)) {
TokError("invalid operand for instruction");
return MatchOperand_ParseFail;
@@ -2829,7 +2824,7 @@ AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateBTIHint(
BTI->Encoding, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
@@ -2837,12 +2832,11 @@ AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
/// instruction.
OperandMatchResultTy
AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const MCExpr *Expr = nullptr;
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat hash token.
+ if (getTok().is(AsmToken::Hash)) {
+ Lex(); // Eat hash token.
}
if (parseSymbolicImmVal(Expr))
@@ -2894,11 +2888,11 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
const MCExpr *Expr = nullptr;
// Leave anything with a bracket to the default for SVE
- if (getParser().getTok().is(AsmToken::LBrac))
+ if (getTok().is(AsmToken::LBrac))
return MatchOperand_NoMatch;
- if (getParser().getTok().is(AsmToken::Hash))
- getParser().Lex(); // Eat hash token.
+ if (getTok().is(AsmToken::Hash))
+ Lex(); // Eat hash token.
if (parseSymbolicImmVal(Expr))
return MatchOperand_ParseFail;
@@ -2927,7 +2921,6 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
template<bool AddFPZeroAsLiteral>
OperandMatchResultTy
AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
bool Hash = parseOptionalToken(AsmToken::Hash);
@@ -2935,7 +2928,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
// Handle negation, as that still comes through as a separate token.
bool isNegative = parseOptionalToken(AsmToken::Minus);
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (!Tok.is(AsmToken::Real) && !Tok.is(AsmToken::Integer)) {
if (!Hash)
return MatchOperand_NoMatch;
@@ -2974,7 +2967,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
RealVal, *StatusOrErr == APFloat::opOK, S, getContext()));
}
- Parser.Lex(); // Eat the token.
+ Lex(); // Eat the token.
return MatchOperand_Success;
}
@@ -2983,51 +2976,50 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
/// a shift suffix, for example '#1, lsl #12'.
OperandMatchResultTy
AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- if (Parser.getTok().is(AsmToken::Hash))
- Parser.Lex(); // Eat '#'
- else if (Parser.getTok().isNot(AsmToken::Integer))
+ if (getTok().is(AsmToken::Hash))
+ Lex(); // Eat '#'
+ else if (getTok().isNot(AsmToken::Integer))
// Operand should start from # or should be integer, emit error otherwise.
return MatchOperand_NoMatch;
const MCExpr *Imm = nullptr;
if (parseSymbolicImmVal(Imm))
return MatchOperand_ParseFail;
- else if (Parser.getTok().isNot(AsmToken::Comma)) {
+ else if (getTok().isNot(AsmToken::Comma)) {
Operands.push_back(
AArch64Operand::CreateImm(Imm, S, getLoc(), getContext()));
return MatchOperand_Success;
}
// Eat ','
- Parser.Lex();
+ Lex();
// The optional operand must be "lsl #N" where N is non-negative.
- if (!Parser.getTok().is(AsmToken::Identifier) ||
- !Parser.getTok().getIdentifier().equals_insensitive("lsl")) {
+ if (!getTok().is(AsmToken::Identifier) ||
+ !getTok().getIdentifier().equals_insensitive("lsl")) {
Error(getLoc(), "only 'lsl #+N' valid after immediate");
return MatchOperand_ParseFail;
}
// Eat 'lsl'
- Parser.Lex();
+ Lex();
parseOptionalToken(AsmToken::Hash);
- if (Parser.getTok().isNot(AsmToken::Integer)) {
+ if (getTok().isNot(AsmToken::Integer)) {
Error(getLoc(), "only 'lsl #+N' valid after immediate");
return MatchOperand_ParseFail;
}
- int64_t ShiftAmount = Parser.getTok().getIntVal();
+ int64_t ShiftAmount = getTok().getIntVal();
if (ShiftAmount < 0) {
Error(getLoc(), "positive shift amount required");
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat the number
+ Lex(); // Eat the number
// Just in case the optional lsl #0 is used for immediates other than zero.
if (ShiftAmount == 0 && Imm != nullptr) {
@@ -3085,16 +3077,15 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
/// parseCondCode - Parse a Condition Code operand.
bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
bool invertCondCode) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef Cond = Tok.getString();
AArch64CC::CondCode CC = parseCondCodeString(Cond);
if (CC == AArch64CC::Invalid)
return TokError("invalid condition code");
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
if (invertCondCode) {
if (CC == AArch64CC::AL || CC == AArch64CC::NV)
@@ -3109,8 +3100,7 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
OperandMatchResultTy
AArch64AsmParser::tryParseSVCR(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
SMLoc S = getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -3125,20 +3115,19 @@ AArch64AsmParser::tryParseSVCR(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateSVCR(PStateImm, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
OperandMatchResultTy
AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
SMLoc S = getLoc();
StringRef Name = Tok.getString();
if (Name.equals_insensitive("za")) {
- Parser.Lex(); // eat "za"
+ Lex(); // eat "za"
Operands.push_back(AArch64Operand::CreateMatrixRegister(
AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
getContext()));
@@ -3176,7 +3165,7 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
}
unsigned ElementWidth = KindRes->second;
- Parser.Lex();
+ Lex();
Operands.push_back(AArch64Operand::CreateMatrixRegister(
Reg, ElementWidth, Kind, S, getLoc(), getContext()));
@@ -3194,8 +3183,7 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
/// them if present.
OperandMatchResultTy
AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
std::string LowerID = Tok.getString().lower();
AArch64_AM::ShiftExtendType ShOp =
StringSwitch<AArch64_AM::ShiftExtendType>(LowerID)
@@ -3218,7 +3206,7 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_NoMatch;
SMLoc S = Tok.getLoc();
- Parser.Lex();
+ Lex();
bool Hash = parseOptionalToken(AsmToken::Hash);
@@ -3241,9 +3229,8 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
// Make sure we do actually have a number, identifier or a parenthesized
// expression.
SMLoc E = getLoc();
- if (!Parser.getTok().is(AsmToken::Integer) &&
- !Parser.getTok().is(AsmToken::LParen) &&
- !Parser.getTok().is(AsmToken::Identifier)) {
+ if (!getTok().is(AsmToken::Integer) && !getTok().is(AsmToken::LParen) &&
+ !getTok().is(AsmToken::Identifier)) {
Error(E, "expected integer shift amount");
return MatchOperand_ParseFail;
}
@@ -3309,6 +3296,8 @@ static const struct Extension {
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
+ if (FBS[AArch64::HasV8_0aOps])
+ Str += "ARMv8a";
if (FBS[AArch64::HasV8_1aOps])
Str += "ARMv8.1a";
else if (FBS[AArch64::HasV8_2aOps])
@@ -3323,6 +3312,14 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.6a";
else if (FBS[AArch64::HasV8_7aOps])
Str += "ARMv8.7a";
+ else if (FBS[AArch64::HasV9_0aOps])
+ Str += "ARMv9-a";
+ else if (FBS[AArch64::HasV9_1aOps])
+ Str += "ARMv9.1a";
+ else if (FBS[AArch64::HasV9_2aOps])
+ Str += "ARMv9.2a";
+ else if (FBS[AArch64::HasV8_0rOps])
+ Str += "ARMv8r";
else {
SmallVector<std::string, 2> ExtMatches;
for (const auto& Ext : ExtensionMap) {
@@ -3358,14 +3355,13 @@ void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands
/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- if (Name.find('.') != StringRef::npos)
+ if (Name.contains('.'))
return TokError("invalid operand");
Mnemonic = Name;
Operands.push_back(AArch64Operand::CreateToken("sys", NameLoc, getContext()));
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
@@ -3376,7 +3372,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("IC " + std::string(IC->Name) + " requires: ");
setRequiredFeatureString(IC->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(IC->Encoding, Operands, S);
} else if (Mnemonic == "dc") {
@@ -3386,7 +3382,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("DC " + std::string(DC->Name) + " requires: ");
setRequiredFeatureString(DC->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(DC->Encoding, Operands, S);
} else if (Mnemonic == "at") {
@@ -3396,7 +3392,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("AT " + std::string(AT->Name) + " requires: ");
setRequiredFeatureString(AT->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(AT->Encoding, Operands, S);
} else if (Mnemonic == "tlbi") {
@@ -3406,7 +3402,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("TLBI " + std::string(TLBI->Name) + " requires: ");
setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(TLBI->Encoding, Operands, S);
} else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp") {
@@ -3417,7 +3413,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
std::string Str(
Mnemonic.upper() + std::string(PRCTX->Name) + " requires: ");
setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
uint16_t PRCTX_Op2 =
Mnemonic == "cfp" ? 4 :
@@ -3428,7 +3424,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
createSysAlias(PRCTX->Encoding << 3 | PRCTX_Op2 , Operands, S);
}
- Parser.Lex(); // Eat operand.
+ Lex(); // Eat operand.
bool ExpectRegister = (Op.lower().find("all") == StringRef::npos);
bool HasRegister = false;
@@ -3454,7 +3450,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
OperandMatchResultTy
AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Mnemonic == "tsb" && Tok.isNot(AsmToken::Identifier)) {
TokError("'csync' operand expected");
@@ -3519,15 +3515,14 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateBarrier(
DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(),
getContext(), false /*hasnXSModifier*/));
- Parser.Lex(); // Consume the option
+ Lex(); // Consume the option
return MatchOperand_Success;
}
OperandMatchResultTy
AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
if (Mnemonic != "dsb")
@@ -3574,15 +3569,14 @@ AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
getContext(), true /*hasnXSModifier*/));
- Parser.Lex(); // Consume the option
+ Lex(); // Consume the option
return MatchOperand_Success;
}
OperandMatchResultTy
AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -3606,15 +3600,14 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateSysReg(Tok.getString(), getLoc(), MRSReg, MSRReg,
PStateImm, getContext()));
- Parser.Lex(); // Eat identifier
+ Lex(); // Eat identifier
return MatchOperand_Success;
}
/// tryParseNeonVectorRegister - Parse a vector register operand.
bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Identifier))
+ if (getTok().isNot(AsmToken::Identifier))
return true;
SMLoc S = getLoc();
@@ -3675,8 +3668,7 @@ AArch64AsmParser::tryParseVectorIndex(OperandVector &Operands) {
OperandMatchResultTy
AArch64AsmParser::tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
RegKind MatchKind) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -3696,7 +3688,7 @@ AArch64AsmParser::tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
return MatchOperand_ParseFail;
}
}
- Parser.Lex(); // Eat the register token.
+ Lex(); // Eat the register token.
Reg = RegNum;
return MatchOperand_Success;
@@ -3733,8 +3725,7 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
}
// Not all predicates are followed by a '/m' or '/z'.
- MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Slash))
+ if (getTok().isNot(AsmToken::Slash))
return MatchOperand_Success;
// But when they do they shouldn't have an element type suffix.
@@ -3746,10 +3737,10 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
// Add a literal slash as operand
Operands.push_back(AArch64Operand::CreateToken("/", getLoc(), getContext()));
- Parser.Lex(); // Eat the slash.
+ Lex(); // Eat the slash.
// Zeroing or merging?
- auto Pred = Parser.getTok().getString().lower();
+ auto Pred = getTok().getString().lower();
if (Pred != "z" && Pred != "m") {
Error(getLoc(), "expecting 'm' or 'z' predication");
return MatchOperand_ParseFail;
@@ -3759,7 +3750,7 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
const char *ZM = Pred == "z" ? "z" : "m";
Operands.push_back(AArch64Operand::CreateToken(ZM, getLoc(), getContext()));
- Parser.Lex(); // Eat zero/merge token.
+ Lex(); // Eat zero/merge token.
return MatchOperand_Success;
}
@@ -3777,17 +3768,16 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
}
bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
- MCAsmParser &Parser = getParser();
bool HasELFModifier = false;
AArch64MCExpr::VariantKind RefKind;
if (parseOptionalToken(AsmToken::Colon)) {
HasELFModifier = true;
- if (Parser.getTok().isNot(AsmToken::Identifier))
+ if (getTok().isNot(AsmToken::Identifier))
return TokError("expect relocation specifier in operand after ':'");
- std::string LowerCase = Parser.getTok().getIdentifier().lower();
+ std::string LowerCase = getTok().getIdentifier().lower();
RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
.Case("lo12", AArch64MCExpr::VK_LO12)
.Case("abs_g3", AArch64MCExpr::VK_ABS_G3)
@@ -3840,7 +3830,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
if (RefKind == AArch64MCExpr::VK_INVALID)
return TokError("expect relocation specifier in operand after ':'");
- Parser.Lex(); // Eat identifier
+ Lex(); // Eat identifier
if (parseToken(AsmToken::Colon, "expect ':' after relocation specifier"))
return true;
@@ -3857,14 +3847,11 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
OperandMatchResultTy
AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
-
- if (Parser.getTok().isNot(AsmToken::LCurly))
+ if (getTok().isNot(AsmToken::LCurly))
return MatchOperand_NoMatch;
- auto ParseMatrixTile = [this, &Parser](unsigned &Reg,
- unsigned &ElementWidth) {
- StringRef Name = Parser.getTok().getString();
+ auto ParseMatrixTile = [this](unsigned &Reg, unsigned &ElementWidth) {
+ StringRef Name = getTok().getString();
size_t DotPosition = Name.find('.');
if (DotPosition == StringRef::npos)
return MatchOperand_NoMatch;
@@ -3882,13 +3869,13 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
}
ElementWidth = KindRes->second;
Reg = RegNum;
- Parser.Lex(); // Eat the register.
+ Lex(); // Eat the register.
return MatchOperand_Success;
};
SMLoc S = getLoc();
- auto LCurly = Parser.getTok();
- Parser.Lex(); // Eat left bracket token.
+ auto LCurly = getTok();
+ Lex(); // Eat left bracket token.
// Empty matrix list
if (parseOptionalToken(AsmToken::RCurly)) {
@@ -3898,8 +3885,8 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
}
// Try parse {za} alias early
- if (Parser.getTok().getString().equals_insensitive("za")) {
- Parser.Lex(); // Eat 'za'
+ if (getTok().getString().equals_insensitive("za")) {
+ Lex(); // Eat 'za'
if (parseToken(AsmToken::RCurly, "'}' expected"))
return MatchOperand_ParseFail;
@@ -3914,7 +3901,7 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
unsigned FirstReg, ElementWidth;
auto ParseRes = ParseMatrixTile(FirstReg, ElementWidth);
if (ParseRes != MatchOperand_Success) {
- Parser.getLexer().UnLex(LCurly);
+ getLexer().UnLex(LCurly);
return ParseRes;
}
@@ -3974,13 +3961,13 @@ OperandMatchResultTy
AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
bool ExpectMatch) {
MCAsmParser &Parser = getParser();
- if (!Parser.getTok().is(AsmToken::LCurly))
+ if (!getTok().is(AsmToken::LCurly))
return MatchOperand_NoMatch;
// Wrapper around parse function
- auto ParseVector = [this, &Parser](unsigned &Reg, StringRef &Kind, SMLoc Loc,
- bool NoMatchIsError) {
- auto RegTok = Parser.getTok();
+ auto ParseVector = [this](unsigned &Reg, StringRef &Kind, SMLoc Loc,
+ bool NoMatchIsError) {
+ auto RegTok = getTok();
auto ParseRes = tryParseVectorRegister(Reg, Kind, VectorKind);
if (ParseRes == MatchOperand_Success) {
if (parseVectorKind(Kind, VectorKind))
@@ -4000,8 +3987,8 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
};
SMLoc S = getLoc();
- auto LCurly = Parser.getTok();
- Parser.Lex(); // Eat left bracket token.
+ auto LCurly = getTok();
+ Lex(); // Eat left bracket token.
StringRef Kind;
unsigned FirstReg;
@@ -4117,7 +4104,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
parseOptionalToken(AsmToken::Hash);
- if (getParser().getTok().isNot(AsmToken::Integer)) {
+ if (getTok().isNot(AsmToken::Integer)) {
Error(getLoc(), "index must be absent or #0");
return MatchOperand_ParseFail;
}
@@ -4145,14 +4132,14 @@ AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) {
return Res;
// No shift/extend is the default.
- if (!ParseShiftExtend || getParser().getTok().isNot(AsmToken::Comma)) {
+ if (!ParseShiftExtend || getTok().isNot(AsmToken::Comma)) {
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::Scalar, StartLoc, getLoc(), getContext(), EqTy));
return MatchOperand_Success;
}
// Eat the comma
- getParser().Lex();
+ Lex();
// Match the shift
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> ExtOpnd;
@@ -4178,23 +4165,23 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
bool NextIsVL =
Parser.getLexer().peekTok().getString().equals_insensitive("vl");
bool NextIsHash = Parser.getLexer().peekTok().is(AsmToken::Hash);
- if (!Parser.getTok().getString().equals_insensitive("mul") ||
+ if (!getTok().getString().equals_insensitive("mul") ||
!(NextIsVL || NextIsHash))
return true;
Operands.push_back(
AArch64Operand::CreateToken("mul", getLoc(), getContext()));
- Parser.Lex(); // Eat the "mul"
+ Lex(); // Eat the "mul"
if (NextIsVL) {
Operands.push_back(
AArch64Operand::CreateToken("vl", getLoc(), getContext()));
- Parser.Lex(); // Eat the "vl"
+ Lex(); // Eat the "vl"
return false;
}
if (NextIsHash) {
- Parser.Lex(); // Eat the #
+ Lex(); // Eat the #
SMLoc S = getLoc();
// Parse immediate operand.
@@ -4212,8 +4199,7 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
}
bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- auto Tok = Parser.getTok();
+ auto Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return true;
@@ -4225,7 +4211,7 @@ bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateToken(Keyword, Tok.getLoc(), getContext()));
- Parser.Lex();
+ Lex();
return false;
}
@@ -4264,7 +4250,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
case AsmToken::LBrac: {
Operands.push_back(
AArch64Operand::CreateToken("[", getLoc(), getContext()));
- Parser.Lex(); // Eat '['
+ Lex(); // Eat '['
// There's no comma after a '[', so we can parse the next operand
// immediately.
@@ -4276,7 +4262,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
Operands.push_back(
AArch64Operand::CreateToken("{", getLoc(), getContext()));
- Parser.Lex(); // Eat '{'
+ Lex(); // Eat '{'
// There's no comma after a '{', so we can parse the next operand
// immediately.
@@ -4332,18 +4318,18 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
// Parse a negative sign
bool isNegative = false;
- if (Parser.getTok().is(AsmToken::Minus)) {
+ if (getTok().is(AsmToken::Minus)) {
isNegative = true;
// We need to consume this token only when we have a Real, otherwise
// we let parseSymbolicImmVal take care of it
if (Parser.getLexer().peekTok().is(AsmToken::Real))
- Parser.Lex();
+ Lex();
}
// The only Real that should come through here is a literal #0.0 for
// the fcmp[e] r, #0.0 instructions. They expect raw token operands,
// so convert the value.
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.is(AsmToken::Real)) {
APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
@@ -4353,7 +4339,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
return TokError("unexpected floating point literal");
else if (IntVal != 0 || isNegative)
return TokError("expected floating-point constant #0.0");
- Parser.Lex(); // Eat the token.
+ Lex(); // Eat the token.
Operands.push_back(AArch64Operand::CreateToken("#0", S, getContext()));
Operands.push_back(AArch64Operand::CreateToken(".0", S, getContext()));
@@ -4372,7 +4358,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
SMLoc Loc = getLoc();
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
return TokError("unexpected token in operand");
- Parser.Lex(); // Eat '='
+ Lex(); // Eat '='
const MCExpr *SubExprVal;
if (getParser().parseExpression(SubExprVal))
return true;
@@ -4431,11 +4417,10 @@ bool AArch64AsmParser::parseImmExpr(int64_t &Out) {
}
bool AArch64AsmParser::parseComma() {
- if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(),
- "expected comma"))
+ if (check(getTok().isNot(AsmToken::Comma), getLoc(), "expected comma"))
return true;
// Eat the comma
- getParser().Lex();
+ Lex();
return false;
}
@@ -4507,7 +4492,6 @@ bool AArch64AsmParser::regsEqual(const MCParsedAsmOperand &Op1,
bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
Name = StringSwitch<StringRef>(Name.lower())
.Case("beq", "b.eq")
.Case("bne", "b.ne")
@@ -4530,8 +4514,8 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
.Default(Name);
// First check for the AArch64-specific .req directive.
- if (Parser.getTok().is(AsmToken::Identifier) &&
- Parser.getTok().getIdentifier().lower() == ".req") {
+ if (getTok().is(AsmToken::Identifier) &&
+ getTok().getIdentifier().lower() == ".req") {
parseDirectiveReq(Name, NameLoc);
// We always return 'error' for this, as we're done with this
// statement and don't need to match the 'instruction."
@@ -5084,6 +5068,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be a multiple of 8 in range [0, 32760].");
case Match_InvalidMemoryIndexed16:
return Error(Loc, "index must be a multiple of 16 in range [0, 65520].");
+ case Match_InvalidImm0_0:
+ return Error(Loc, "immediate must be 0.");
case Match_InvalidImm0_1:
return Error(Loc, "immediate must be an integer in range [0, 1].");
case Match_InvalidImm0_3:
@@ -5128,6 +5114,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
case Match_InvalidSVECpyImm64:
return Error(Loc, "immediate must be an integer in range [-128, 127] or a "
"multiple of 256 in range [-32768, 32512]");
+ case Match_InvalidIndexRange0_0:
+ return Error(Loc, "expected lane specifier '[0]'");
case Match_InvalidIndexRange1_1:
return Error(Loc, "expected lane specifier '[1]'");
case Match_InvalidIndexRange0_15:
@@ -5256,14 +5244,6 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "invalid predicate register.");
case Match_InvalidSVEPredicate3bAnyReg:
return Error(Loc, "invalid restricted predicate register, expected p0..p7 (without element suffix)");
- case Match_InvalidSVEPredicate3bBReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.b..p7.b");
- case Match_InvalidSVEPredicate3bHReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.h..p7.h");
- case Match_InvalidSVEPredicate3bSReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.s..p7.s");
- case Match_InvalidSVEPredicate3bDReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.d..p7.d");
case Match_InvalidSVEExactFPImmOperandHalfOne:
return Error(Loc, "Invalid floating point constant, expected 0.5 or 1.0.");
case Match_InvalidSVEExactFPImmOperandHalfTwo:
@@ -5724,6 +5704,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexedSImm9:
case Match_InvalidMemoryIndexed16SImm9:
case Match_InvalidMemoryIndexed8SImm10:
+ case Match_InvalidImm0_0:
case Match_InvalidImm0_1:
case Match_InvalidImm0_3:
case Match_InvalidImm0_7:
@@ -5745,6 +5726,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSVECpyImm16:
case Match_InvalidSVECpyImm32:
case Match_InvalidSVECpyImm64:
+ case Match_InvalidIndexRange0_0:
case Match_InvalidIndexRange1_1:
case Match_InvalidIndexRange0_15:
case Match_InvalidIndexRange0_7:
@@ -5811,10 +5793,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSVEPredicateSReg:
case Match_InvalidSVEPredicateDReg:
case Match_InvalidSVEPredicate3bAnyReg:
- case Match_InvalidSVEPredicate3bBReg:
- case Match_InvalidSVEPredicate3bHReg:
- case Match_InvalidSVEPredicate3bSReg:
- case Match_InvalidSVEPredicate3bDReg:
case Match_InvalidSVEExactFPImmOperandHalfOne:
case Match_InvalidSVEExactFPImmOperandHalfTwo:
case Match_InvalidSVEExactFPImmOperandZeroOne:
@@ -5958,6 +5936,9 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV9A:
+ case AArch64::ArchKind::ARMV9_1A:
+ case AArch64::ArchKind::ARMV9_2A:
case AArch64::ArchKind::ARMV8R:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
@@ -5980,6 +5961,9 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV9A:
+ case AArch64::ArchKind::ARMV9_1A:
+ case AArch64::ArchKind::ARMV9_2A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -6206,12 +6190,12 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
/// The number of arguments depends on the loh identifier.
bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
MCLOHType Kind;
- if (getParser().getTok().isNot(AsmToken::Identifier)) {
- if (getParser().getTok().isNot(AsmToken::Integer))
+ if (getTok().isNot(AsmToken::Identifier)) {
+ if (getTok().isNot(AsmToken::Integer))
return TokError("expected an identifier or a number in directive");
// We successfully get a numeric value for the identifier.
// Check if it is valid.
- int64_t Id = getParser().getTok().getIntVal();
+ int64_t Id = getTok().getIntVal();
if (Id <= -1U && !isValidMCLOHType(Id))
return TokError("invalid numeric identifier in directive");
Kind = (MCLOHType)Id;
@@ -6265,8 +6249,7 @@ bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
/// parseDirectiveReq
/// ::= name .req registername
bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
- MCAsmParser &Parser = getParser();
- Parser.Lex(); // Eat the '.req' token.
+ Lex(); // Eat the '.req' token.
SMLoc SRegLoc = getLoc();
RegKind RegisterKind = RegKind::Scalar;
unsigned RegNum;
@@ -6329,11 +6312,10 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// parseDirectiveUneq
/// ::= .unreq registername
bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
- MCAsmParser &Parser = getParser();
if (getTok().isNot(AsmToken::Identifier))
return TokError("unexpected input in .unreq directive.");
- RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
- Parser.Lex(); // Eat the identifier.
+ RegisterReqs.erase(getTok().getIdentifier().lower());
+ Lex(); // Eat the identifier.
return parseToken(AsmToken::EndOfStatement);
}
@@ -6357,9 +6339,7 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
/// parseDirectiveVariantPCS
/// ::= .variant_pcs symbolname
bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
- MCAsmParser &Parser = getParser();
-
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return TokError("expected symbol name");
@@ -6369,7 +6349,7 @@ bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
if (!Sym)
return TokError("unknown symbol");
- Parser.Lex(); // Eat the symbol
+ Lex(); // Eat the symbol
if (parseEOL())
return true;
@@ -6741,7 +6721,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
SMLoc S = getLoc();
- if (getParser().getTok().isNot(AsmToken::Identifier)) {
+ if (getTok().isNot(AsmToken::Identifier)) {
Error(S, "expected register");
return MatchOperand_ParseFail;
}
@@ -6773,12 +6753,12 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- if (getParser().getTok().isNot(AsmToken::Comma)) {
+ if (getTok().isNot(AsmToken::Comma)) {
Error(getLoc(), "expected comma");
return MatchOperand_ParseFail;
}
// Eat the comma
- getParser().Lex();
+ Lex();
SMLoc E = getLoc();
unsigned SecondReg;
@@ -6833,7 +6813,7 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
unsigned ElementWidth = KindRes->second;
// No shift/extend is the default.
- if (!ParseShiftExtend || getParser().getTok().isNot(AsmToken::Comma)) {
+ if (!ParseShiftExtend || getTok().isNot(AsmToken::Comma)) {
Operands.push_back(AArch64Operand::CreateVectorReg(
RegNum, RegKind::SVEDataVector, ElementWidth, S, S, getContext()));
@@ -6844,7 +6824,7 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
}
// Eat the comma
- getParser().Lex();
+ Lex();
// Match the shift
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> ExtOpnd;
@@ -6866,7 +6846,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc SS = getLoc();
- const AsmToken &TokE = Parser.getTok();
+ const AsmToken &TokE = getTok();
bool IsHash = TokE.is(AsmToken::Hash);
if (!IsHash && TokE.isNot(AsmToken::Identifier))
@@ -6874,7 +6854,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
int64_t Pattern;
if (IsHash) {
- Parser.Lex(); // Eat hash
+ Lex(); // Eat hash
// Parse the immediate operand.
const MCExpr *ImmVal;
@@ -6893,7 +6873,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
if (!Pat)
return MatchOperand_NoMatch;
- Parser.Lex();
+ Lex();
Pattern = Pat->Encoding;
assert(Pattern >= 0 && Pattern < 32);
}
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 1ed8a80a4600..96d410e42be2 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -21,10 +21,10 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <memory>
@@ -225,13 +225,12 @@ static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Addr,
const void *Decoder);
-static DecodeStatus DecodeSVELogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-template<int Bits>
-static DecodeStatus DecodeSImm(llvm::MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder);
+template <int Bits>
+static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+ const void *Decoder);
template <int ElementWidth>
static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
@@ -324,6 +323,33 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
// ^ insert implicit 8-bit element tile
MI.insert(MI.begin()+2, MCOperand::createReg(AArch64::ZAB0));
break;
+ case AArch64::LD1_MXIPXX_H_Q:
+ case AArch64::LD1_MXIPXX_V_Q:
+ case AArch64::ST1_MXIPXX_H_Q:
+ case AArch64::ST1_MXIPXX_V_Q:
+ // 128-bit load/store have implicit zero vector index.
+ MI.insert(MI.begin()+2, MCOperand::createImm(0));
+ break;
+ // 128-bit mova have implicit zero vector index.
+ case AArch64::INSERT_MXIPZ_H_Q:
+ case AArch64::INSERT_MXIPZ_V_Q:
+ MI.insert(MI.begin()+2, MCOperand::createImm(0));
+ break;
+ case AArch64::EXTRACT_ZPMXI_H_Q:
+ case AArch64::EXTRACT_ZPMXI_V_Q:
+ MI.addOperand(MCOperand::createImm(0));
+ break;
+ case AArch64::SMOVvi8to32_idx0:
+ case AArch64::SMOVvi8to64_idx0:
+ case AArch64::SMOVvi16to32_idx0:
+ case AArch64::SMOVvi16to64_idx0:
+ case AArch64::SMOVvi32to64_idx0:
+ case AArch64::UMOVvi8_idx0:
+ case AArch64::UMOVvi16_idx0:
+ case AArch64::UMOVvi32_idx0:
+ case AArch64::UMOVvi64_idx0:
+ MI.addOperand(MCOperand::createImm(0));
+ break;
}
if (Result != MCDisassembler::Fail)
@@ -366,23 +392,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Disassembler() {
createAArch64ExternalSymbolizer);
}
-static const unsigned FPR128DecoderTable[] = {
- AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
- AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
- AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
- AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
- AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
- AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
- AArch64::Q30, AArch64::Q31
-};
-
static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR128DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR128RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -395,107 +412,63 @@ static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
}
-static const unsigned FPR64DecoderTable[] = {
- AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
- AArch64::D5, AArch64::D6, AArch64::D7, AArch64::D8, AArch64::D9,
- AArch64::D10, AArch64::D11, AArch64::D12, AArch64::D13, AArch64::D14,
- AArch64::D15, AArch64::D16, AArch64::D17, AArch64::D18, AArch64::D19,
- AArch64::D20, AArch64::D21, AArch64::D22, AArch64::D23, AArch64::D24,
- AArch64::D25, AArch64::D26, AArch64::D27, AArch64::D28, AArch64::D29,
- AArch64::D30, AArch64::D31
-};
-
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR64DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR64RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned FPR32DecoderTable[] = {
- AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
- AArch64::S5, AArch64::S6, AArch64::S7, AArch64::S8, AArch64::S9,
- AArch64::S10, AArch64::S11, AArch64::S12, AArch64::S13, AArch64::S14,
- AArch64::S15, AArch64::S16, AArch64::S17, AArch64::S18, AArch64::S19,
- AArch64::S20, AArch64::S21, AArch64::S22, AArch64::S23, AArch64::S24,
- AArch64::S25, AArch64::S26, AArch64::S27, AArch64::S28, AArch64::S29,
- AArch64::S30, AArch64::S31
-};
-
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR32DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR32RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned FPR16DecoderTable[] = {
- AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
- AArch64::H5, AArch64::H6, AArch64::H7, AArch64::H8, AArch64::H9,
- AArch64::H10, AArch64::H11, AArch64::H12, AArch64::H13, AArch64::H14,
- AArch64::H15, AArch64::H16, AArch64::H17, AArch64::H18, AArch64::H19,
- AArch64::H20, AArch64::H21, AArch64::H22, AArch64::H23, AArch64::H24,
- AArch64::H25, AArch64::H26, AArch64::H27, AArch64::H28, AArch64::H29,
- AArch64::H30, AArch64::H31
-};
-
static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR16DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR16RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned FPR8DecoderTable[] = {
- AArch64::B0, AArch64::B1, AArch64::B2, AArch64::B3, AArch64::B4,
- AArch64::B5, AArch64::B6, AArch64::B7, AArch64::B8, AArch64::B9,
- AArch64::B10, AArch64::B11, AArch64::B12, AArch64::B13, AArch64::B14,
- AArch64::B15, AArch64::B16, AArch64::B17, AArch64::B18, AArch64::B19,
- AArch64::B20, AArch64::B21, AArch64::B22, AArch64::B23, AArch64::B24,
- AArch64::B25, AArch64::B26, AArch64::B27, AArch64::B28, AArch64::B29,
- AArch64::B30, AArch64::B31
-};
-
static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR8DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR8RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned GPR64DecoderTable[] = {
- AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
- AArch64::X5, AArch64::X6, AArch64::X7, AArch64::X8, AArch64::X9,
- AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14,
- AArch64::X15, AArch64::X16, AArch64::X17, AArch64::X18, AArch64::X19,
- AArch64::X20, AArch64::X21, AArch64::X22, AArch64::X23, AArch64::X24,
- AArch64::X25, AArch64::X26, AArch64::X27, AArch64::X28, AArch64::FP,
- AArch64::LR, AArch64::XZR
-};
-
static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 30)
return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64commonRegClassID].getRegister(
+ RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -506,26 +479,12 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned GPR64x8DecoderTable[] = {
- AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
- AArch64::X2_X3_X4_X5_X6_X7_X8_X9,
- AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
- AArch64::X6_X7_X8_X9_X10_X11_X12_X13,
- AArch64::X8_X9_X10_X11_X12_X13_X14_X15,
- AArch64::X10_X11_X12_X13_X14_X15_X16_X17,
- AArch64::X12_X13_X14_X15_X16_X17_X18_X19,
- AArch64::X14_X15_X16_X17_X18_X19_X20_X21,
- AArch64::X16_X17_X18_X19_X20_X21_X22_X23,
- AArch64::X18_X19_X20_X21_X22_X23_X24_X25,
- AArch64::X20_X21_X22_X23_X24_X25_X26_X27,
- AArch64::X22_X23_X24_X25_X26_X27_X28_FP,
-};
-
static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -535,7 +494,9 @@ static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
if (RegNo & 1)
return Fail;
- unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].getRegister(
+ RegNo >> 1);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -545,17 +506,12 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
- if (Register == AArch64::XZR)
- Register = AArch64::SP;
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned MatrixIndexGPR32_12_15DecoderTable[] = {
- AArch64::W12, AArch64::W13, AArch64::W14, AArch64::W15
-};
-
static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Addr,
@@ -563,28 +519,21 @@ static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
if (RegNo > 3)
return Fail;
- unsigned Register = MatrixIndexGPR32_12_15DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::MatrixIndexGPR32_12_15RegClassID]
+ .getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned GPR32DecoderTable[] = {
- AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
- AArch64::W5, AArch64::W6, AArch64::W7, AArch64::W8, AArch64::W9,
- AArch64::W10, AArch64::W11, AArch64::W12, AArch64::W13, AArch64::W14,
- AArch64::W15, AArch64::W16, AArch64::W17, AArch64::W18, AArch64::W19,
- AArch64::W20, AArch64::W21, AArch64::W22, AArch64::W23, AArch64::W24,
- AArch64::W25, AArch64::W26, AArch64::W27, AArch64::W28, AArch64::W29,
- AArch64::W30, AArch64::WZR
-};
-
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = GPR32DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR32RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -595,22 +544,11 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
- unsigned Register = GPR32DecoderTable[RegNo];
- if (Register == AArch64::WZR)
- Register = AArch64::WSP;
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR32spRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned ZPRDecoderTable[] = {
- AArch64::Z0, AArch64::Z1, AArch64::Z2, AArch64::Z3,
- AArch64::Z4, AArch64::Z5, AArch64::Z6, AArch64::Z7,
- AArch64::Z8, AArch64::Z9, AArch64::Z10, AArch64::Z11,
- AArch64::Z12, AArch64::Z13, AArch64::Z14, AArch64::Z15,
- AArch64::Z16, AArch64::Z17, AArch64::Z18, AArch64::Z19,
- AArch64::Z20, AArch64::Z21, AArch64::Z22, AArch64::Z23,
- AArch64::Z24, AArch64::Z25, AArch64::Z26, AArch64::Z27,
- AArch64::Z28, AArch64::Z29, AArch64::Z30, AArch64::Z31
-};
static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
@@ -618,7 +556,8 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
- unsigned Register = ZPRDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPRRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -639,71 +578,35 @@ static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeZPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned ZZDecoderTable[] = {
- AArch64::Z0_Z1, AArch64::Z1_Z2, AArch64::Z2_Z3, AArch64::Z3_Z4,
- AArch64::Z4_Z5, AArch64::Z5_Z6, AArch64::Z6_Z7, AArch64::Z7_Z8,
- AArch64::Z8_Z9, AArch64::Z9_Z10, AArch64::Z10_Z11, AArch64::Z11_Z12,
- AArch64::Z12_Z13, AArch64::Z13_Z14, AArch64::Z14_Z15, AArch64::Z15_Z16,
- AArch64::Z16_Z17, AArch64::Z17_Z18, AArch64::Z18_Z19, AArch64::Z19_Z20,
- AArch64::Z20_Z21, AArch64::Z21_Z22, AArch64::Z22_Z23, AArch64::Z23_Z24,
- AArch64::Z24_Z25, AArch64::Z25_Z26, AArch64::Z26_Z27, AArch64::Z27_Z28,
- AArch64::Z28_Z29, AArch64::Z29_Z30, AArch64::Z30_Z31, AArch64::Z31_Z0
-};
-
static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void* Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = ZZDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPR2RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned ZZZDecoderTable[] = {
- AArch64::Z0_Z1_Z2, AArch64::Z1_Z2_Z3, AArch64::Z2_Z3_Z4,
- AArch64::Z3_Z4_Z5, AArch64::Z4_Z5_Z6, AArch64::Z5_Z6_Z7,
- AArch64::Z6_Z7_Z8, AArch64::Z7_Z8_Z9, AArch64::Z8_Z9_Z10,
- AArch64::Z9_Z10_Z11, AArch64::Z10_Z11_Z12, AArch64::Z11_Z12_Z13,
- AArch64::Z12_Z13_Z14, AArch64::Z13_Z14_Z15, AArch64::Z14_Z15_Z16,
- AArch64::Z15_Z16_Z17, AArch64::Z16_Z17_Z18, AArch64::Z17_Z18_Z19,
- AArch64::Z18_Z19_Z20, AArch64::Z19_Z20_Z21, AArch64::Z20_Z21_Z22,
- AArch64::Z21_Z22_Z23, AArch64::Z22_Z23_Z24, AArch64::Z23_Z24_Z25,
- AArch64::Z24_Z25_Z26, AArch64::Z25_Z26_Z27, AArch64::Z26_Z27_Z28,
- AArch64::Z27_Z28_Z29, AArch64::Z28_Z29_Z30, AArch64::Z29_Z30_Z31,
- AArch64::Z30_Z31_Z0, AArch64::Z31_Z0_Z1
-};
-
static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void* Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = ZZZDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPR3RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned ZZZZDecoderTable[] = {
- AArch64::Z0_Z1_Z2_Z3, AArch64::Z1_Z2_Z3_Z4, AArch64::Z2_Z3_Z4_Z5,
- AArch64::Z3_Z4_Z5_Z6, AArch64::Z4_Z5_Z6_Z7, AArch64::Z5_Z6_Z7_Z8,
- AArch64::Z6_Z7_Z8_Z9, AArch64::Z7_Z8_Z9_Z10, AArch64::Z8_Z9_Z10_Z11,
- AArch64::Z9_Z10_Z11_Z12, AArch64::Z10_Z11_Z12_Z13, AArch64::Z11_Z12_Z13_Z14,
- AArch64::Z12_Z13_Z14_Z15, AArch64::Z13_Z14_Z15_Z16, AArch64::Z14_Z15_Z16_Z17,
- AArch64::Z15_Z16_Z17_Z18, AArch64::Z16_Z17_Z18_Z19, AArch64::Z17_Z18_Z19_Z20,
- AArch64::Z18_Z19_Z20_Z21, AArch64::Z19_Z20_Z21_Z22, AArch64::Z20_Z21_Z22_Z23,
- AArch64::Z21_Z22_Z23_Z24, AArch64::Z22_Z23_Z24_Z25, AArch64::Z23_Z24_Z25_Z26,
- AArch64::Z24_Z25_Z26_Z27, AArch64::Z25_Z26_Z27_Z28, AArch64::Z26_Z27_Z28_Z29,
- AArch64::Z27_Z28_Z29_Z30, AArch64::Z28_Z29_Z30_Z31, AArch64::Z29_Z30_Z31_Z0,
- AArch64::Z30_Z31_Z0_Z1, AArch64::Z31_Z0_Z1_Z2
-};
-
static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void* Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = ZZZZDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPR4RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -741,19 +644,13 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
return Success;
}
-static const unsigned PPRDecoderTable[] = {
- AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3,
- AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7,
- AArch64::P8, AArch64::P9, AArch64::P10, AArch64::P11,
- AArch64::P12, AArch64::P13, AArch64::P14, AArch64::P15
-};
-
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 15)
return Fail;
- unsigned Register = PPRDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::PPRRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -768,157 +665,64 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder);
}
-static const unsigned VectorDecoderTable[] = {
- AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
- AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
- AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
- AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
- AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
- AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
- AArch64::Q30, AArch64::Q31
-};
-
-static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = VectorDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return Success;
-}
-
-static const unsigned QQDecoderTable[] = {
- AArch64::Q0_Q1, AArch64::Q1_Q2, AArch64::Q2_Q3, AArch64::Q3_Q4,
- AArch64::Q4_Q5, AArch64::Q5_Q6, AArch64::Q6_Q7, AArch64::Q7_Q8,
- AArch64::Q8_Q9, AArch64::Q9_Q10, AArch64::Q10_Q11, AArch64::Q11_Q12,
- AArch64::Q12_Q13, AArch64::Q13_Q14, AArch64::Q14_Q15, AArch64::Q15_Q16,
- AArch64::Q16_Q17, AArch64::Q17_Q18, AArch64::Q18_Q19, AArch64::Q19_Q20,
- AArch64::Q20_Q21, AArch64::Q21_Q22, AArch64::Q22_Q23, AArch64::Q23_Q24,
- AArch64::Q24_Q25, AArch64::Q25_Q26, AArch64::Q26_Q27, AArch64::Q27_Q28,
- AArch64::Q28_Q29, AArch64::Q29_Q30, AArch64::Q30_Q31, AArch64::Q31_Q0
-};
-
static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = QQDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::QQRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned QQQDecoderTable[] = {
- AArch64::Q0_Q1_Q2, AArch64::Q1_Q2_Q3, AArch64::Q2_Q3_Q4,
- AArch64::Q3_Q4_Q5, AArch64::Q4_Q5_Q6, AArch64::Q5_Q6_Q7,
- AArch64::Q6_Q7_Q8, AArch64::Q7_Q8_Q9, AArch64::Q8_Q9_Q10,
- AArch64::Q9_Q10_Q11, AArch64::Q10_Q11_Q12, AArch64::Q11_Q12_Q13,
- AArch64::Q12_Q13_Q14, AArch64::Q13_Q14_Q15, AArch64::Q14_Q15_Q16,
- AArch64::Q15_Q16_Q17, AArch64::Q16_Q17_Q18, AArch64::Q17_Q18_Q19,
- AArch64::Q18_Q19_Q20, AArch64::Q19_Q20_Q21, AArch64::Q20_Q21_Q22,
- AArch64::Q21_Q22_Q23, AArch64::Q22_Q23_Q24, AArch64::Q23_Q24_Q25,
- AArch64::Q24_Q25_Q26, AArch64::Q25_Q26_Q27, AArch64::Q26_Q27_Q28,
- AArch64::Q27_Q28_Q29, AArch64::Q28_Q29_Q30, AArch64::Q29_Q30_Q31,
- AArch64::Q30_Q31_Q0, AArch64::Q31_Q0_Q1
-};
-
static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = QQQDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::QQQRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned QQQQDecoderTable[] = {
- AArch64::Q0_Q1_Q2_Q3, AArch64::Q1_Q2_Q3_Q4, AArch64::Q2_Q3_Q4_Q5,
- AArch64::Q3_Q4_Q5_Q6, AArch64::Q4_Q5_Q6_Q7, AArch64::Q5_Q6_Q7_Q8,
- AArch64::Q6_Q7_Q8_Q9, AArch64::Q7_Q8_Q9_Q10, AArch64::Q8_Q9_Q10_Q11,
- AArch64::Q9_Q10_Q11_Q12, AArch64::Q10_Q11_Q12_Q13, AArch64::Q11_Q12_Q13_Q14,
- AArch64::Q12_Q13_Q14_Q15, AArch64::Q13_Q14_Q15_Q16, AArch64::Q14_Q15_Q16_Q17,
- AArch64::Q15_Q16_Q17_Q18, AArch64::Q16_Q17_Q18_Q19, AArch64::Q17_Q18_Q19_Q20,
- AArch64::Q18_Q19_Q20_Q21, AArch64::Q19_Q20_Q21_Q22, AArch64::Q20_Q21_Q22_Q23,
- AArch64::Q21_Q22_Q23_Q24, AArch64::Q22_Q23_Q24_Q25, AArch64::Q23_Q24_Q25_Q26,
- AArch64::Q24_Q25_Q26_Q27, AArch64::Q25_Q26_Q27_Q28, AArch64::Q26_Q27_Q28_Q29,
- AArch64::Q27_Q28_Q29_Q30, AArch64::Q28_Q29_Q30_Q31, AArch64::Q29_Q30_Q31_Q0,
- AArch64::Q30_Q31_Q0_Q1, AArch64::Q31_Q0_Q1_Q2
-};
-
static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = QQQQDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::QQQQRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned DDDecoderTable[] = {
- AArch64::D0_D1, AArch64::D1_D2, AArch64::D2_D3, AArch64::D3_D4,
- AArch64::D4_D5, AArch64::D5_D6, AArch64::D6_D7, AArch64::D7_D8,
- AArch64::D8_D9, AArch64::D9_D10, AArch64::D10_D11, AArch64::D11_D12,
- AArch64::D12_D13, AArch64::D13_D14, AArch64::D14_D15, AArch64::D15_D16,
- AArch64::D16_D17, AArch64::D17_D18, AArch64::D18_D19, AArch64::D19_D20,
- AArch64::D20_D21, AArch64::D21_D22, AArch64::D22_D23, AArch64::D23_D24,
- AArch64::D24_D25, AArch64::D25_D26, AArch64::D26_D27, AArch64::D27_D28,
- AArch64::D28_D29, AArch64::D29_D30, AArch64::D30_D31, AArch64::D31_D0
-};
-
static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = DDDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::DDRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned DDDDecoderTable[] = {
- AArch64::D0_D1_D2, AArch64::D1_D2_D3, AArch64::D2_D3_D4,
- AArch64::D3_D4_D5, AArch64::D4_D5_D6, AArch64::D5_D6_D7,
- AArch64::D6_D7_D8, AArch64::D7_D8_D9, AArch64::D8_D9_D10,
- AArch64::D9_D10_D11, AArch64::D10_D11_D12, AArch64::D11_D12_D13,
- AArch64::D12_D13_D14, AArch64::D13_D14_D15, AArch64::D14_D15_D16,
- AArch64::D15_D16_D17, AArch64::D16_D17_D18, AArch64::D17_D18_D19,
- AArch64::D18_D19_D20, AArch64::D19_D20_D21, AArch64::D20_D21_D22,
- AArch64::D21_D22_D23, AArch64::D22_D23_D24, AArch64::D23_D24_D25,
- AArch64::D24_D25_D26, AArch64::D25_D26_D27, AArch64::D26_D27_D28,
- AArch64::D27_D28_D29, AArch64::D28_D29_D30, AArch64::D29_D30_D31,
- AArch64::D30_D31_D0, AArch64::D31_D0_D1
-};
-
static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = DDDDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::DDDRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned DDDDDecoderTable[] = {
- AArch64::D0_D1_D2_D3, AArch64::D1_D2_D3_D4, AArch64::D2_D3_D4_D5,
- AArch64::D3_D4_D5_D6, AArch64::D4_D5_D6_D7, AArch64::D5_D6_D7_D8,
- AArch64::D6_D7_D8_D9, AArch64::D7_D8_D9_D10, AArch64::D8_D9_D10_D11,
- AArch64::D9_D10_D11_D12, AArch64::D10_D11_D12_D13, AArch64::D11_D12_D13_D14,
- AArch64::D12_D13_D14_D15, AArch64::D13_D14_D15_D16, AArch64::D14_D15_D16_D17,
- AArch64::D15_D16_D17_D18, AArch64::D16_D17_D18_D19, AArch64::D17_D18_D19_D20,
- AArch64::D18_D19_D20_D21, AArch64::D19_D20_D21_D22, AArch64::D20_D21_D22_D23,
- AArch64::D21_D22_D23_D24, AArch64::D22_D23_D24_D25, AArch64::D23_D24_D25_D26,
- AArch64::D24_D25_D26_D27, AArch64::D25_D26_D27_D28, AArch64::D26_D27_D28_D29,
- AArch64::D27_D28_D29_D30, AArch64::D28_D29_D30_D31, AArch64::D29_D30_D31_D0,
- AArch64::D30_D31_D0_D1, AArch64::D31_D0_D1_D2
-};
-
static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = DDDDDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::DDDDRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -1776,7 +1580,7 @@ static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
if (Inst.getOpcode() == AArch64::MOVID)
DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
else
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
Inst.addOperand(MCOperand::createImm(imm));
@@ -1813,8 +1617,8 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
imm |= fieldFromInstruction(insn, 5, 5);
// Tied operands added twice.
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
Inst.addOperand(MCOperand::createImm(imm));
Inst.addOperand(MCOperand::createImm((cmode & 6) << 2));
@@ -1980,8 +1784,7 @@ static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
RegNo, Addr, Decoder);
}
-static DecodeStatus DecodeSVELogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
unsigned Zdn = fieldFromInstruction(insn, 0, 5);
@@ -1997,9 +1800,9 @@ static DecodeStatus DecodeSVELogicalImmInstruction(llvm::MCInst &Inst,
return Success;
}
-template<int Bits>
-static DecodeStatus DecodeSImm(llvm::MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+template <int Bits>
+static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+ const void *Decoder) {
if (Imm & ~((1LL << Bits) - 1))
return Fail;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 3f815ac8c3d0..5b6f06f8dbb4 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -34,7 +34,9 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
return MCSymbolRefExpr::VK_GOTPAGEOFF;
case LLVMDisassembler_VariantKind_ARM64_TLVP:
+ return MCSymbolRefExpr::VK_TLVPPAGE;
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
+ return MCSymbolRefExpr::VK_TLVPPAGEOFF;
default:
llvm_unreachable("bad LLVMDisassembler_VariantKind");
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 28b234b180fc..ac08ee8ae8dd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -156,7 +156,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -181,7 +181,18 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
inferAlignFromPtrInfo(MF, MPO));
- MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::ZExt:
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
+ return;
+ case CCValAssign::LocInfo::SExt:
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
+ return;
+ default:
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ return;
+ }
}
/// How the physical register gets marked varies between formal
@@ -270,7 +281,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -376,11 +387,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
if (EVT(NewVT) != SplitEVTs[i]) {
unsigned ExtendOp = TargetOpcode::G_ANYEXT;
- if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::SExt))
+ if (F.getAttributes().hasRetAttr(Attribute::SExt))
ExtendOp = TargetOpcode::G_SEXT;
- else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::ZExt))
+ else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
ExtendOp = TargetOpcode::G_ZEXT;
LLT NewLLT(NewVT);
@@ -522,6 +531,7 @@ bool AArch64CallLowering::lowerFormalArguments(
auto &DL = F.getParent()->getDataLayout();
SmallVector<ArgInfo, 8> SplitArgs;
+ SmallVector<std::pair<Register, Register>> BoolArgs;
unsigned i = 0;
for (auto &Arg : F.args()) {
if (DL.getTypeStoreSize(Arg.getType()).isZero())
@@ -530,6 +540,22 @@ bool AArch64CallLowering::lowerFormalArguments(
ArgInfo OrigArg{VRegs[i], Arg, i};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
+ // i1 arguments are zero-extended to i8 by the caller. Emit a
+ // hint to reflect this.
+ if (OrigArg.Ty->isIntegerTy(1)) {
+ assert(OrigArg.Regs.size() == 1 &&
+ MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
+ "Unexpected registers used for i1 arg");
+
+ if (!OrigArg.Flags[0].isZExt()) {
+ // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
+ Register OrigReg = OrigArg.Regs[0];
+ Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
+ OrigArg.Regs[0] = WideReg;
+ BoolArgs.push_back({OrigReg, WideReg});
+ }
+ }
+
if (Arg.hasAttribute(Attribute::SwiftAsync))
MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
@@ -550,6 +576,18 @@ bool AArch64CallLowering::lowerFormalArguments(
F.getCallingConv(), F.isVarArg()))
return false;
+ if (!BoolArgs.empty()) {
+ for (auto &KV : BoolArgs) {
+ Register OrigReg = KV.first;
+ Register WideReg = KV.second;
+ LLT WideTy = MRI.getType(WideReg);
+ assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
+ "Unexpected bit size of a bool arg");
+ MIRBuilder.buildTrunc(
+ OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
+ }
+ }
+
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
uint64_t StackOffset = Assigner.StackOffset;
if (F.isVarArg()) {
@@ -1042,8 +1080,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
for (auto &OrigArg : Info.OrigArgs) {
splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
- if (OrigArg.Ty->isIntegerTy(1))
- OutArgs.back().Flags[0].setZExt();
+ if (OrigArg.Ty->isIntegerTy(1)) {
+ ArgInfo &OutArg = OutArgs.back();
+ assert(OutArg.Regs.size() == 1 &&
+ MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
+ "Unexpected registers used for i1 arg");
+
+ // We cannot use a ZExt ArgInfo flag here, because it will
+ // zero-extend the argument to i32 instead of just i8.
+ OutArg.Regs[0] =
+ MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ OutArg.Ty = Type::getInt8Ty(Ctx);
+ }
}
SmallVector<ArgInfo, 8> InArgs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 08d1c987dc3b..38afc5deb42f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -26,7 +26,7 @@ AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
return None;
Register Src = MI.getOperand(1).getReg();
if (auto ValAndVReg =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
+ getAnyConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
return RegOrConstant(ValAndVReg->Value.getSExtValue());
return RegOrConstant(Src);
}
@@ -56,7 +56,7 @@ bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub,
!CmpInst::isEquality(Pred))
return false;
auto MaybeZero =
- getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
}
@@ -68,7 +68,8 @@ bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
if (!TLI.getLibcallName(RTLIB::BZERO))
return false;
- auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
+ auto Zero =
+ getIConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
if (!Zero || Zero->Value.getSExtValue() != 0)
return false;
@@ -78,8 +79,8 @@ bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
if (!MinSize) {
// If the size is known, check it. If it is not known, assume using bzero is
// better.
- if (auto Size =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
+ if (auto Size = getIConstantVRegValWithLookThrough(
+ MI.getOperand(2).getReg(), MRI)) {
if (Size->Value.getSExtValue() <= 256)
return false;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a98248438e40..e090d87d59a2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -163,6 +164,9 @@ private:
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
+ /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
+ /// SUBREG_TO_REG.
+ bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -171,6 +175,14 @@ private:
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
+
+ /// Helper function to select vector load intrinsics like
+ /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
+ /// \p Opc is the opcode that the selected instruction should use.
+ /// \p NumVecs is the number of vector destinations for the instruction.
+ /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
+ bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
+ MachineInstr &I);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -181,6 +193,7 @@ private:
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -263,13 +276,9 @@ private:
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
-
- /// Emit a CSet for an integer compare.
- ///
- /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
- MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg = AArch64::WZR) const;
+ MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
+ AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
@@ -367,18 +376,15 @@ private:
return selectAddrModeWRO(Root, Width / 8);
}
- ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
+ ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
+ bool AllowROR = false) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
- // TODO: selectShiftedRegister should allow for rotates on logical shifts.
- // For now, make them the same. The only difference between the two is that
- // logical shifts are allowed to fold in rotates. Otherwise, these are
- // functionally the same.
- return selectShiftedRegister(Root);
+ return selectShiftedRegister(Root, true);
}
/// Given an extend instruction, determine the correct shift-extend type for
@@ -496,14 +502,18 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
}
if (RB.getID() == AArch64::FPRRegBankID) {
- if (Ty.getSizeInBits() <= 16)
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ return &AArch64::FPR8RegClass;
+ case 16:
return &AArch64::FPR16RegClass;
- if (Ty.getSizeInBits() == 32)
+ case 32:
return &AArch64::FPR32RegClass;
- if (Ty.getSizeInBits() == 64)
+ case 64:
return &AArch64::FPR64RegClass;
- if (Ty.getSizeInBits() == 128)
+ case 128:
return &AArch64::FPR128RegClass;
+ }
return nullptr;
}
@@ -652,7 +662,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
- getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
+ getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value.getSExtValue();
@@ -810,6 +820,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
+ case 128:
+ return isStore ? AArch64::STRQui : AArch64::LDRQui;
}
break;
}
@@ -1195,8 +1207,8 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
&Optimized]() {
if (Optimized)
return false;
- auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
- auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
if (!TrueCst && !FalseCst)
return false;
@@ -1301,6 +1313,7 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
assert(Reg.isValid() && "Expected valid register!");
+ bool HasZext = false;
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
unsigned Opc = MI->getOpcode();
@@ -1314,6 +1327,9 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
// on the truncated x is the same as the bit number on x.
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
Opc == TargetOpcode::G_TRUNC) {
+ if (Opc == TargetOpcode::G_ZEXT)
+ HasZext = true;
+
Register NextReg = MI->getOperand(1).getReg();
// Did we find something worth folding?
if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
@@ -1334,16 +1350,20 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
case TargetOpcode::G_XOR: {
TestReg = MI->getOperand(1).getReg();
Register ConstantReg = MI->getOperand(2).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!VRegAndVal) {
// AND commutes, check the other side for a constant.
// FIXME: Can we canonicalize the constant so that it's always on the
// same side at some point earlier?
std::swap(ConstantReg, TestReg);
- VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
+ }
+ if (VRegAndVal) {
+ if (HasZext)
+ C = VRegAndVal->Value.getZExtValue();
+ else
+ C = VRegAndVal->Value.getSExtValue();
}
- if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1351,7 +1371,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
case TargetOpcode::G_SHL: {
TestReg = MI->getOperand(1).getReg();
auto VRegAndVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
C = VRegAndVal->Value.getSExtValue();
break;
@@ -1479,7 +1499,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit = getConstantVRegValWithLookThrough(
+ auto MaybeBit = getIConstantVRegValWithLookThrough(
AndInst.getOperand(2).getReg(), *MIB.getMRI());
if (!MaybeBit)
return false;
@@ -1555,7 +1575,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
Register RHS = ICmp.getOperand(3).getReg();
// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
- auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
@@ -1590,7 +1610,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
if (ICmpInst::isEquality(Pred)) {
if (!VRegAndVal) {
std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
}
@@ -2049,7 +2069,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
// selector which will match the register variant.
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
const auto &MO = I.getOperand(2);
- auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
+ auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
if (!VRegAndVal)
return false;
@@ -2131,7 +2151,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
- auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -2145,17 +2165,14 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_BR: {
- // If the branch jumps to the fallthrough block, don't bother emitting it.
- // Only do this for -O0 for a good code size improvement, because when
- // optimizations are enabled we want to leave this choice to
- // MachineBlockPlacement.
- bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
- if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
- return false;
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_SEXT:
+ // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
+ // over a normal extend.
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+ return false;
+ case TargetOpcode::G_BR:
+ return false;
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -2192,27 +2209,55 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// fold the add into the cset for the cmp by using cinc.
//
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
- Register X = I.getOperand(1).getReg();
-
- // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
- // early if we see it.
- LLT Ty = MRI.getType(X);
- if (Ty.isVector() || Ty.getSizeInBits() != 32)
+ Register AddDst = I.getOperand(0).getReg();
+ Register AddLHS = I.getOperand(1).getReg();
+ Register AddRHS = I.getOperand(2).getReg();
+ // Only handle scalars.
+ LLT Ty = MRI.getType(AddLHS);
+ if (Ty.isVector())
return false;
-
- Register CmpReg = I.getOperand(2).getReg();
- MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
+ // bits.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size != 32 && Size != 64)
+ return false;
+ auto MatchCmp = [&](Register Reg) -> MachineInstr * {
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return nullptr;
+ // If the LHS of the add is 32 bits, then we want to fold a 32-bit
+ // compare.
+ if (Size == 32)
+ return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
+ // We model scalar compares using 32-bit destinations right now.
+ // If it's a 64-bit compare, it'll have 64-bit sources.
+ Register ZExt;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
+ return nullptr;
+ auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
+ if (!Cmp ||
+ MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
+ return nullptr;
+ return Cmp;
+ };
+ // Try to match
+ // z + (cmp pred, x, y)
+ MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
- std::swap(X, CmpReg);
- Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // (cmp pred, x, y) + z
+ std::swap(AddLHS, AddRHS);
+ Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
- auto Pred =
- static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
- emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
- Cmp->getOperand(1), MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
+ auto &PredOp = Cmp->getOperand(1);
+ auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ MIB.setInstrAndDebugLoc(I);
+ emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
+ /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
+ emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -2352,10 +2397,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
unsigned Size = Ty.getSizeInBits();
unsigned Opc = OpcTable[IsSigned][Size == 64];
auto Cst1 =
- getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
assert(Cst1 && "Should have gotten a constant for src 1?");
auto Cst2 =
- getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
assert(Cst2 && "Should have gotten a constant for src 2?");
auto LSB = Cst1->Value.getZExtValue();
auto Width = Cst2->Value.getZExtValue();
@@ -2456,10 +2501,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64 && Ty != s128) {
+ if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant, expected: " << s32 << " or " << s64
- << " or " << s128 << '\n');
+ << " constant, expected: " << s16 << " or " << s32
+ << " or " << s64 << " or " << s128 << '\n');
return false;
}
@@ -2493,23 +2538,20 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
}
- // We allow G_CONSTANT of types < 32b.
- const unsigned MovOpc =
- DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
-
if (isFP) {
- // Either emit a FMOV, or emit a copy to emit a normal mov.
- const TargetRegisterClass &GPRRC =
- DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass
- : (DefSize == 64 ? AArch64::FPR64RegClass
- : AArch64::FPR128RegClass);
-
- // For 64b values, emit a constant pool load instead.
- // For s32, use a cp load if we have optsize/minsize.
- if (DefSize == 64 || DefSize == 128 ||
- (DefSize == 32 && shouldOptForSize(&MF))) {
+ const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
+ // For 16, 64, and 128b values, emit a constant pool load.
+ switch (DefSize) {
+ default:
+ llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
+ case 32:
+ // For s32, use a cp load if we have optsize/minsize.
+ if (!shouldOptForSize(&MF))
+ break;
+ LLVM_FALLTHROUGH;
+ case 16:
+ case 64:
+ case 128: {
auto *FPImm = I.getOperand(1).getFPImm();
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
if (!LoadMI) {
@@ -2520,9 +2562,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
+ }
- // Nope. Emit a copy and use a normal mov instead.
- const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
+ // Either emit a FMOV, or emit a copy to emit a normal mov.
+ assert(DefSize == 32 &&
+ "Expected constant pool loads for all sizes other than 32!");
+ const Register DefGPRReg =
+ MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
@@ -2545,6 +2591,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.getOperand(1).ChangeToImmediate(Val);
}
+ const unsigned MovOpc =
+ DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
@@ -2693,8 +2741,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
+ GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
- LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
@@ -2702,26 +2751,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
- auto &MemOp = **I.memoperands_begin();
- uint64_t MemSizeInBytes = MemOp.getSize();
- unsigned MemSizeInBits = MemSizeInBytes * 8;
- AtomicOrdering Order = MemOp.getSuccessOrdering();
+ uint64_t MemSizeInBytes = LdSt.getMemSize();
+ unsigned MemSizeInBits = LdSt.getMemSizeInBits();
+ AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
- assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
+ assert(!isa<GZExtLoad>(LdSt));
if (MemSizeInBytes > 64)
return false;
- if (I.getOpcode() == TargetOpcode::G_LOAD) {
+ if (isa<GLoad>(LdSt)) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
+ Register ValReg = LdSt.getReg(0);
+ if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
+ // Emit a subreg copy of 32 bits.
+ Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
+ .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
+ I.getOperand(0).setReg(NewVal);
+ }
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -2729,22 +2785,64 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
#ifndef NDEBUG
- const Register PtrReg = I.getOperand(1).getReg();
+ const Register PtrReg = LdSt.getPointerReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
- // Sanity-check the pointer register.
+ // Check that the pointer register is valid.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer");
#endif
- const Register ValReg = I.getOperand(0).getReg();
+ const Register ValReg = LdSt.getReg(0);
+ const LLT ValTy = MRI.getType(ValReg);
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
+ // The code below doesn't support truncating stores, so we need to split it
+ // again.
+ if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+
+ // Generate a subreg copy.
+ auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
+ .addReg(ValReg, 0, SubReg)
+ .getReg(0);
+ RBI.constrainGenericRegister(Copy, *RC, MRI);
+ LdSt.getOperand(0).setReg(Copy);
+ } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ // If this is an any-extending load from the FPR bank, split it into a regular
+ // load + extend.
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+ Register OldDst = LdSt.getReg(0);
+ Register NewDst =
+ MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
+ LdSt.getOperand(0).setReg(NewDst);
+ MRI.setRegBank(NewDst, RB);
+ // Generate a SUBREG_TO_REG to extend it.
+ MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
+ .addImm(0)
+ .addUse(NewDst)
+ .addImm(SubReg);
+ auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
+ RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
+ MIB.setInstr(LdSt);
+ }
+ }
+
// Helper lambda for partially selecting I. Either returns the original
// instruction with an updated opcode, or a new instruction.
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
- bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ bool IsStore = isa<GStore>(I);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
@@ -2761,7 +2859,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Folded something. Create a new instruction and return it.
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
- IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ Register CurValReg = I.getOperand(0).getReg();
+ IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
@@ -2775,9 +2874,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// If we're storing a 0, use WZR/XZR.
if (Opcode == TargetOpcode::G_STORE) {
- auto CVal = getConstantVRegValWithLookThrough(
- LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
- /*HandleFConstants = */ false);
+ auto CVal = getIConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI);
if (CVal && CVal->Value == 0) {
switch (LoadStore->getOpcode()) {
case AArch64::STRWui:
@@ -2897,17 +2995,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
- auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
- .addImm(getInvertedCondCode(OpAndCC.second));
- constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
+ emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
+ getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -2991,7 +3087,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (Opcode == TargetOpcode::G_PTRTOINT) {
assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
I.setDesc(TII.get(TargetOpcode::COPY));
- return true;
+ return selectCopy(I, TII, MRI, TRI, RBI);
}
}
@@ -2999,6 +3095,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_ANYEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
@@ -3045,6 +3144,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
@@ -3231,9 +3333,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
+ emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
+ /*Src2=*/AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -3839,6 +3943,10 @@ static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
switch (EltSize) {
+ case 8:
+ CopyOpc = AArch64::CPYi8;
+ ExtractSubReg = AArch64::bsub;
+ break;
case 16:
CopyOpc = AArch64::CPYi16;
ExtractSubReg = AArch64::hsub;
@@ -3942,7 +4050,7 @@ bool AArch64InstructionSelector::selectExtractElt(
}
// Find the index to extract from.
- auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
@@ -4164,6 +4272,13 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
+ case 2:
+ LoadMI =
+ &*MIRBuilder
+ .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
+ .addConstantPoolIndex(CPIdx, 0,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
@@ -4326,7 +4441,7 @@ AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
{AArch64::ANDSXrr, AArch64::ANDSWrr}};
// ANDS needs a logical immediate for its immediate form. Check if we can
// fold one in.
- if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
int64_t Imm = ValAndVReg->Value.getSExtValue();
if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
@@ -4368,25 +4483,19 @@ MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
- const Register ZeroReg = AArch64::WZR;
- auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
- auto CSet =
- MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
- .addImm(getInvertedCondCode(CC));
- constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
- return &*CSet;
- };
-
+ const Register ZReg = AArch64::WZR;
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
if (CC2 == AArch64CC::AL)
- return EmitCSet(Dst, CC1);
-
+ return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
+ MIRBuilder);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
- EmitCSet(Def1Reg, CC1);
- EmitCSet(Def2Reg, CC2);
+ auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
+ emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
+ emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
@@ -4495,16 +4604,25 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
}
MachineInstr *
-AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg) const {
- // CSINC increments the result when the predicate is false. Invert it.
- const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
- CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
- auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
- .addImm(InvCC);
- constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
- return &*I;
+AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
+ Register Src2, AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const {
+ auto &MRI = *MIRBuilder.getMRI();
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
+ // If we used a register class, then this won't necessarily have an LLT.
+ // Compute the size based off whether or not we have a class or bank.
+ unsigned Size;
+ if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ Size = TRI.getRegSizeInBits(*RC);
+ else
+ Size = MRI.getType(Dst).getSizeInBits();
+ // Some opcodes use s1.
+ assert(Size <= 64 && "Expected 64 bits or less only!");
+ static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
+ unsigned Opc = OpcTable[Size == 64];
+ auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
+ constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
+ return &*CSINC;
}
std::pair<MachineInstr *, AArch64CC::CondCode>
@@ -4671,7 +4789,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
@@ -4792,6 +4910,71 @@ MachineInstr *AArch64InstructionSelector::emitLaneInsert(
return InsElt;
}
+bool AArch64InstructionSelector::selectUSMovFromExtend(
+ MachineInstr &MI, MachineRegisterInfo &MRI) {
+ if (MI.getOpcode() != TargetOpcode::G_SEXT &&
+ MI.getOpcode() != TargetOpcode::G_ZEXT &&
+ MI.getOpcode() != TargetOpcode::G_ANYEXT)
+ return false;
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
+ const Register DefReg = MI.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DefReg);
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if (DstSize != 32 && DstSize != 64)
+ return false;
+
+ MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ int64_t Lane;
+ if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
+ return false;
+ Register Src0 = Extract->getOperand(1).getReg();
+
+ const LLT &VecTy = MRI.getType(Src0);
+
+ if (VecTy.getSizeInBits() != 128) {
+ const MachineInstr *ScalarToVector = emitScalarToVector(
+ VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
+ assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
+ Src0 = ScalarToVector->getOperand(0).getReg();
+ }
+
+ unsigned Opcode;
+ if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
+ Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
+ else
+ llvm_unreachable("Unexpected type combo for S/UMov!");
+
+ // We may need to generate one of these, depending on the type and sign of the
+ // input:
+ // DstReg = SMOV Src0, Lane;
+ // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
+ MachineInstr *ExtI = nullptr;
+ if (DstSize == 64 && !IsSigned) {
+ Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
+ ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
+ .addImm(0)
+ .addUse(NewReg)
+ .addImm(AArch64::sub_32);
+ RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
+ } else
+ ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
+
+ constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
@@ -4811,7 +4994,7 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
// Find the definition of the index. Bail out if it's not defined by a
// G_CONSTANT.
Register IdxReg = I.getOperand(3).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
@@ -4936,6 +5119,47 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return true;
}
+bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
+ MachineInstr &I, MachineRegisterInfo &MRI) {
+ // Given:
+ // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
+ //
+ // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
+ Register Dst = I.getOperand(0).getReg();
+ Register EltReg = I.getOperand(1).getReg();
+ LLT EltTy = MRI.getType(EltReg);
+ // If the index isn't on the same bank as its elements, then this can't be a
+ // SUBREG_TO_REG.
+ const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
+ const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
+ if (EltRB != DstRB)
+ return false;
+ if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
+ [&MRI](const MachineOperand &Op) {
+ return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
+ MRI);
+ }))
+ return false;
+ unsigned SubReg;
+ const TargetRegisterClass *EltRC =
+ getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
+ if (!EltRC)
+ return false;
+ const TargetRegisterClass *DstRC =
+ getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
+ if (!DstRC)
+ return false;
+ if (!getSubRegForClass(EltRC, TRI, SubReg))
+ return false;
+ auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
+ .addImm(0)
+ .addUse(EltReg)
+ .addImm(SubReg);
+ I.eraseFromParent();
+ constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
+ return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
+}
+
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -4947,6 +5171,9 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (tryOptConstantBuildVec(I, DstTy, MRI))
return true;
+ if (tryOptBuildVecToSubregToReg(I, MRI))
+ return true;
+
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
@@ -5013,24 +5240,45 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
return true;
}
-/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
-/// ID if it exists, and 0 otherwise.
-static unsigned findIntrinsicID(MachineInstr &I) {
- auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
- return Op.isIntrinsicID();
- });
- if (IntrinOp == I.operands_end())
- return 0;
- return IntrinOp->getIntrinsicID();
+bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
+ unsigned NumVecs,
+ MachineInstr &I) {
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(Opc && "Expected an opcode?");
+ assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 64 || Size == 128) &&
+ "Destination must be 64 bits or 128 bits?");
+ unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
+ auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
+ assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
+ auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
+ Load.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
+ Register SelectedLoadDst = Load->getOperand(0).getReg();
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
+ .addReg(SelectedLoadDst, 0, SubReg + Idx);
+ // Emit the subreg copies and immediately select them.
+ // FIXME: We should refactor our copy code into an emitCopy helper and
+ // clean up uses of this pattern elsewhere in the selector.
+ selectCopy(*Vec, TII, MRI, TRI, RBI);
+ }
+ return true;
}
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) {
// Find the intrinsic ID.
- unsigned IntrinID = findIntrinsicID(I);
- if (!IntrinID)
- return false;
+ unsigned IntrinID = I.getIntrinsicID();
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P0 = LLT::pointer(0, 64);
// Select the instruction.
switch (IntrinID) {
default:
@@ -5055,16 +5303,59 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_ld2: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD2Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD2Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD2Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD2Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD2Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD2Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD2Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for ld2!");
+ selectVectorLoadIntrinsic(Opc, 2, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld4: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD4Fourv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD4Fourv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD4Fourv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD4Fourv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD4Fourv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD4Fourv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD4Fourv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Fourv1d;
+ else
+ llvm_unreachable("Unexpected type for ld4!");
+ selectVectorLoadIntrinsic(Opc, 4, I);
+ break;
+ }
case Intrinsic::aarch64_neon_st2: {
Register Src1 = I.getOperand(1).getReg();
Register Src2 = I.getOperand(2).getReg();
Register Ptr = I.getOperand(3).getReg();
LLT Ty = MRI.getType(Src1);
- const LLT S8 = LLT::scalar(8);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
- const LLT S64 = LLT::scalar(64);
- const LLT P0 = LLT::pointer(0, 64);
unsigned Opc;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::ST2Twov8b;
@@ -5100,9 +5391,7 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MachineRegisterInfo &MRI) {
- unsigned IntrinID = findIntrinsicID(I);
- if (!IntrinID)
- return false;
+ unsigned IntrinID = I.getIntrinsicID();
switch (IntrinID) {
default:
@@ -5146,6 +5435,33 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case Intrinsic::ptrauth_sign: {
+ Register DstReg = I.getOperand(0).getReg();
+ Register ValReg = I.getOperand(2).getReg();
+ uint64_t Key = I.getOperand(3).getImm();
+ Register DiscReg = I.getOperand(4).getReg();
+ auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
+ bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
+
+ if (Key > 3)
+ return false;
+
+ unsigned Opcodes[][4] = {
+ {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
+ {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
+ unsigned Opcode = Opcodes[IsDiscZero][Key];
+
+ auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
+
+ if (!IsDiscZero) {
+ PAC.addUse(DiscReg);
+ RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
+ }
+
+ RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
+ I.eraseFromParent();
+ return true;
+ }
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
@@ -5403,7 +5719,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// constant is the RHS.
Register OffsetReg = OffsetInst->getOperand(1).getReg();
Register ConstantReg = OffsetInst->getOperand(2).getReg();
- auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg) {
// We didn't get a constant on the RHS. If the opcode is a shift, then
// we're done.
@@ -5412,7 +5728,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// If we have a G_MUL, we can use either register. Try looking at the RHS.
std::swap(OffsetReg, ConstantReg);
- ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg)
return None;
}
@@ -5580,7 +5896,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
// mov x0, wide
// ldr x2, [base, x0]
auto ValAndVReg =
- getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
if (ValAndVReg) {
unsigned Scale = Log2_32(SizeInBytes);
int64_t ImmOff = ValAndVReg->Value.getSExtValue();
@@ -5839,7 +6155,6 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
/// Given a shift instruction, return the correct shift type for that
/// instruction.
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
- // TODO: Handle AArch64_AM::ROR
switch (MI.getOpcode()) {
default:
return AArch64_AM::InvalidShiftExtend;
@@ -5849,15 +6164,16 @@ static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
return AArch64_AM::LSR;
case TargetOpcode::G_ASHR:
return AArch64_AM::ASR;
+ case TargetOpcode::G_ROTR:
+ return AArch64_AM::ROR;
}
}
/// Select a "shifted register" operand. If the value is not shifted, set the
/// shift operand to a default value of "lsl 0".
-///
-/// TODO: Allow shifted register to be rotated in logical instructions.
InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
+AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
+ bool AllowROR) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
@@ -5865,14 +6181,14 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
// Check if the operand is defined by an instruction which corresponds to
// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
- //
- // TODO: Handle AArch64_AM::ROR for logical instructions.
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
if (!ShiftInst)
return None;
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
if (ShType == AArch64_AM::InvalidShiftExtend)
return None;
+ if (ShType == AArch64_AM::ROR && !AllowROR)
+ return None;
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
return None;
@@ -6045,7 +6361,7 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
Optional<int64_t> CstVal =
- getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+ getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 08e4a119127c..1524aa5eb0ec 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -16,6 +16,7 @@
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -23,6 +24,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/MathExtras.h"
@@ -34,6 +36,7 @@ using namespace llvm;
using namespace LegalizeActions;
using namespace LegalizeMutations;
using namespace LegalityPredicates;
+using namespace MIPatternMatch;
AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
: ST(&ST) {
@@ -45,7 +48,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT s128 = LLT::scalar(128);
- const LLT s256 = LLT::scalar(256);
const LLT v16s8 = LLT::fixed_vector(16, 8);
const LLT v8s8 = LLT::fixed_vector(8, 8);
const LLT v4s8 = LLT::fixed_vector(4, 8);
@@ -80,8 +82,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
.legalFor({p0, s1, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
+ .widenScalarToNextPow2(0)
.clampScalar(0, s8, s64)
- .widenScalarToNextPow2(0, 8)
.fewerElementsIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].isVector() &&
@@ -95,16 +97,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return std::make_pair(0, EltTy);
});
- getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({p0, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
+ .widenScalarToNextPow2(0)
.clampScalar(0, s16, s64)
- .widenScalarToNextPow2(0);
+ // Maximum: sN * k = 128
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
+ .clampMaxNumElements(0, s32, 4)
+ .clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2);
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({s32, s64, v4s32, v2s32, v2s64})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .customIf(typeIs(0, v2s16)); // custom lower as G_REV32 + G_LSHR
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
@@ -114,8 +122,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
},
0)
.legalFor({v2s64})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
@@ -161,11 +169,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.scalarize(0);
getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
- .lowerFor({s1, s8, s16, s32, s64});
+ .lowerFor({s1, s8, s16, s32, s64, v2s64, v4s32, v2s32})
+ .widenScalarOrEltToNextPow2(0)
+ .clampScalarOrElt(0, s32, s64)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0);
+
- getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO})
+ .widenScalarToNextPow2(0, /*Min = */ 32)
+ .clampScalar(0, s32, s64)
+ .lowerIf(typeIs(1, s1));
- getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH})
+ .legalFor({s64, v8s16, v16s8, v4s32})
+ .lower();
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
@@ -184,7 +203,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
- .legalFor({s32, s64, v2s64, v4s32, v2s32})
+ .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
+ .clampScalar(0, MinFPScalar, s64)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64);
@@ -220,42 +240,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({s32, s64, v2s32, v4s32, v2s64});
getActionDefinitionsBuilder(G_INSERT)
- .unsupportedIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
- })
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &Ty0 = Query.Types[0];
- const LLT &Ty1 = Query.Types[1];
- if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
- return false;
- return isPowerOf2_32(Ty1.getSizeInBits()) &&
- (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
- })
- .clampScalar(0, s32, s64)
+ .legalIf(all(typeInSet(0, {s32, s64, p0}),
+ typeInSet(1, {s1, s8, s16, s32}), smallerThan(1, 0)))
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(1)
+ .minScalar(1, s8)
.maxScalarIf(typeInSet(0, {s32}), 1, s16)
- .maxScalarIf(typeInSet(0, {s64}), 1, s32)
- .widenScalarToNextPow2(1);
+ .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
getActionDefinitionsBuilder(G_EXTRACT)
- .unsupportedIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
- })
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &Ty0 = Query.Types[0];
- const LLT &Ty1 = Query.Types[1];
- if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
- return false;
- if (Ty1 == p0)
- return true;
- return isPowerOf2_32(Ty0.getSizeInBits()) &&
- (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
- })
- .clampScalar(1, s32, s128)
+ .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
+ typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
.widenScalarToNextPow2(1)
+ .clampScalar(1, s32, s128)
+ .widenScalarToNextPow2(0)
+ .minScalar(0, s16)
.maxScalarIf(typeInSet(1, {s32}), 0, s16)
- .maxScalarIf(typeInSet(1, {s64}), 0, s32)
- .widenScalarToNextPow2(0);
+ .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
+ .maxScalarIf(typeInSet(1, {s128}), 0, s64);
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
@@ -268,8 +271,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{s64, p0, s64, 8},
{p0, p0, s64, 8},
{v2s32, p0, s64, 8}})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
// TODO: We could support sum-of-pow2's but the lowering code doesn't know
// how to do that yet.
.unsupportedIfMemSizeNotPow2()
@@ -285,6 +288,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
};
getActionDefinitionsBuilder(G_LOAD)
+ .customIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
+ })
.legalForTypesWithMemDesc({{s8, p0, s8, 8},
{s16, p0, s16, 8},
{s32, p0, s32, 8},
@@ -300,9 +307,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s64, p0, s128, 8}})
// These extends are also legal
.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
- .clampScalar(0, s8, s64)
+ .widenScalarToNextPow2(0, /* MinSize = */8)
.lowerIfMemSizeNotPow2()
- .widenScalarToNextPow2(0)
+ .clampScalar(0, s8, s64)
.narrowScalarIf([=](const LegalityQuery &Query) {
// Clamp extending load results to 32-bits.
return Query.Types[0].isScalar() &&
@@ -318,10 +325,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
.clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2)
.customIf(IsPtrVecPred)
.scalarizeIf(typeIs(0, v2s16), 0);
getActionDefinitionsBuilder(G_STORE)
+ .customIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
+ })
.legalForTypesWithMemDesc({{s8, p0, s8, 8},
{s16, p0, s8, 8}, // truncstorei8 from s16
{s32, p0, s8, 8}, // truncstorei8 from s32
@@ -353,6 +365,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
.clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2)
.lowerIfMemSizeNotPow2()
.customIf(IsPtrVecPred)
.scalarizeIf(typeIs(0, v2s16), 0);
@@ -360,8 +373,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({p0, s8, s16, s32, s64})
- .clampScalar(0, s8, s64)
- .widenScalarToNextPow2(0);
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s8, s64);
getActionDefinitionsBuilder(G_FCONSTANT)
.legalIf([=](const LegalityQuery &Query) {
const auto &Ty = Query.Types[0];
@@ -383,6 +396,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v8s16, v8s16},
{v8s8, v8s8},
{v16s8, v16s8}})
+ .widenScalarOrEltToNextPow2(1)
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s32)
.minScalarEltSameAsIf(
@@ -399,7 +413,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
s64)
- .widenScalarOrEltToNextPow2(1)
.clampNumElements(0, v2s32, v4s32);
// Extensions
@@ -459,10 +472,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1);
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, s32, s64);
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
@@ -477,8 +490,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_SELECT)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
.minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
.lowerIf(isVector(0));
@@ -492,6 +505,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_PTRTOINT)
.legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
+ .legalFor({{v2s64, v2p0}})
.maxScalar(0, s64)
.widenScalarToNextPow2(0, /*Min*/ 8);
@@ -544,76 +558,30 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
-
- auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
- const LLT &Ty = Query.Types[TypeIdx];
- if (Ty.isVector()) {
- const LLT &EltTy = Ty.getElementType();
- if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
- return true;
- if (!isPowerOf2_32(EltTy.getSizeInBits()))
- return true;
- }
- return false;
- };
-
- // FIXME: This rule is horrible, but specifies the same as what we had
- // before with the particularly strange definitions removed (e.g.
- // s8 = G_MERGE_VALUES s32, s32).
- // Part of the complexity comes from these ops being extremely flexible. For
- // example, you can build/decompose vectors with it, concatenate vectors,
- // etc. and in addition to this you can also bitcast with it at the same
- // time. We've been considering breaking it up into multiple ops to make it
- // more manageable throughout the backend.
getActionDefinitionsBuilder(Op)
- // Break up vectors with weird elements into scalars
- .fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
- scalarize(0))
- .fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
- scalarize(1))
- // Clamp the big scalar to s8-s128 and make it a power of 2.
- .clampScalar(BigTyIdx, s8, s128)
- .widenScalarIf(
- [=](const LegalityQuery &Query) {
- const LLT &Ty = Query.Types[BigTyIdx];
- return !isPowerOf2_32(Ty.getSizeInBits()) &&
- Ty.getSizeInBits() % 64 != 0;
- },
- [=](const LegalityQuery &Query) {
- // Pick the next power of 2, or a multiple of 64 over 128.
- // Whichever is smaller.
- const LLT &Ty = Query.Types[BigTyIdx];
- unsigned NewSizeInBits = 1
- << Log2_32_Ceil(Ty.getSizeInBits() + 1);
- if (NewSizeInBits >= 256) {
- unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
- if (RoundedTo < NewSizeInBits)
- NewSizeInBits = RoundedTo;
- }
- return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
- })
- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
- // worth considering the multiples of 64 since 2*192 and 2*384 are not
- // valid.
- .clampScalar(LitTyIdx, s8, s256)
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
- // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
- // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
- // At this point it's simple enough to accept the legal types.
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &BigTy = Query.Types[BigTyIdx];
- const LLT &LitTy = Query.Types[LitTyIdx];
- if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+ .widenScalarToNextPow2(LitTyIdx, 8)
+ .widenScalarToNextPow2(BigTyIdx, 32)
+ .clampScalar(LitTyIdx, s8, s64)
+ .clampScalar(BigTyIdx, s32, s128)
+ .legalIf([=](const LegalityQuery &Q) {
+ switch (Q.Types[BigTyIdx].getSizeInBits()) {
+ case 32:
+ case 64:
+ case 128:
+ break;
+ default:
return false;
- if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+ }
+ switch (Q.Types[LitTyIdx].getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return true;
+ default:
return false;
- return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
- })
- // Any vectors left are the wrong size. Scalarize them.
- .scalarize(0)
- .scalarize(1);
+ }
+ });
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -626,7 +594,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &VecTy = Query.Types[1];
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
- VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
+ VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
+ VecTy == v2p0;
})
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
@@ -671,6 +640,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s64, s64}})
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
+ .minScalarOrElt(0, s8)
.minScalarSameAs(1, 0);
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
@@ -682,7 +652,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
// TODO: Custom lowering for v2s32, v4s32, v2s64.
- getActionDefinitionsBuilder(G_BITREVERSE).legalFor({s32, s64, v8s8, v16s8});
+ getActionDefinitionsBuilder(G_BITREVERSE)
+ .legalFor({s32, s64, v8s8, v16s8})
+ .widenScalarToNextPow2(0, /*Min = */ 32)
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
@@ -716,7 +689,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64);
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
- .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
+ .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
@@ -748,6 +721,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s32, 4)
.lower();
+ getActionDefinitionsBuilder(
+ {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
+ // Try to break down into smaller vectors as long as they're at least 64
+ // bits. This lets us use vector operations for some parts of the
+ // reduction.
+ .fewerElementsIf(
+ [=](const LegalityQuery &Q) {
+ LLT SrcTy = Q.Types[1];
+ if (SrcTy.isScalar())
+ return false;
+ if (!isPowerOf2_32(SrcTy.getNumElements()))
+ return false;
+ // We can usually perform 64b vector operations.
+ return SrcTy.getSizeInBits() > 64;
+ },
+ [=](const LegalityQuery &Q) {
+ LLT SrcTy = Q.Types[1];
+ return std::make_pair(1, SrcTy.divide(2));
+ })
+ .scalarize(1)
+ .lower();
+
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
.lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
@@ -764,7 +759,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SBFX, G_UBFX})
.customFor({{s32, s32}, {s64, s64}});
- // TODO: Custom legalization for s128
// TODO: Use generic lowering when custom lowering is not possible.
auto always = [=](const LegalityQuery &Q) { return true; };
getActionDefinitionsBuilder(G_CTPOP)
@@ -775,12 +769,27 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.maxScalarEltSameAsIf(always, 1, 0)
.customFor({{s32, s32},
{s64, s64},
+ {s128, s128},
{v2s64, v2s64},
{v2s32, v2s32},
{v4s32, v4s32},
{v4s16, v4s16},
{v8s16, v8s16}});
+ // TODO: Vector types.
+ getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
+
+ // TODO: Vector types.
+ getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
+ .legalFor({MinFPScalar, s32, s64})
+ .libcallFor({s128})
+ .minScalar(0, MinFPScalar);
+
+ // TODO: Libcall support for s128.
+ // TODO: s16 should be legal with full FP16 support.
+ getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
+ .legalFor({{s64, s32}, {s64, s64}});
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -799,8 +808,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_BSWAP:
- return legalizeBSwap(MI, MRI, MIRBuilder);
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
@@ -948,6 +955,37 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::vacopy: {
+ unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
+ unsigned VaListSize =
+ (ST->isTargetDarwin() || ST->isTargetWindows())
+ ? PtrSize
+ : ST->isTargetILP32() ? 20 : 32;
+
+ MachineFunction &MF = *MI.getMF();
+ auto Val = MF.getRegInfo().createGenericVirtualRegister(
+ LLT::scalar(VaListSize * 8));
+ MachineIRBuilder MIB(MI);
+ MIB.buildLoad(Val, MI.getOperand(2),
+ *MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOLoad,
+ VaListSize, Align(PtrSize)));
+ MIB.buildStore(Val, MI.getOperand(1),
+ *MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VaListSize, Align(PtrSize)));
+ MI.eraseFromParent();
+ return true;
+ }
+ case Intrinsic::get_dynamic_area_offset: {
+ MachineIRBuilder &MIB = Helper.MIRBuilder;
+ MIB.buildConstant(MI.getOperand(0).getReg(), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
return true;
}
@@ -960,7 +998,7 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
// imported patterns can select it later. Either way, it will be legal.
Register AmtReg = MI.getOperand(2).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
if (!VRegAndVal)
return true;
// Check the shift amount is in range for an immediate form.
@@ -974,6 +1012,20 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
return true;
}
+static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
+ MachineRegisterInfo &MRI) {
+ Base = Root;
+ Offset = 0;
+
+ Register NewBase;
+ int64_t NewOffset;
+ if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
+ isShiftedInt<7, 3>(NewOffset)) {
+ Base = NewBase;
+ Offset = NewOffset;
+ }
+}
+
// FIXME: This should be removed and replaced with the generic bitcast legalize
// action.
bool AArch64LegalizerInfo::legalizeLoadStore(
@@ -993,6 +1045,36 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
Register ValReg = MI.getOperand(0).getReg();
const LLT ValTy = MRI.getType(ValReg);
+ if (ValTy == LLT::scalar(128)) {
+ assert((*MI.memoperands_begin())->getSuccessOrdering() ==
+ AtomicOrdering::Monotonic ||
+ (*MI.memoperands_begin())->getSuccessOrdering() ==
+ AtomicOrdering::Unordered);
+ assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
+ LLT s64 = LLT::scalar(64);
+ MachineInstrBuilder NewI;
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ NewI = MIRBuilder.buildInstr(AArch64::LDPXi, {s64, s64}, {});
+ MIRBuilder.buildMerge(ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
+ } else {
+ auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
+ NewI = MIRBuilder.buildInstr(
+ AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)});
+ }
+ Register Base;
+ int Offset;
+ matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
+ NewI.addUse(Base);
+ NewI.addImm(Offset / 8);
+
+ NewI.cloneMemRefs(MI);
+ constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
+ *MRI.getTargetRegisterInfo(),
+ *ST->getRegBankInfo());
+ MI.eraseFromParent();
+ return true;
+ }
+
if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
ValTy.getElementType().getAddressSpace() != 0) {
LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
@@ -1015,46 +1097,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
return true;
}
-bool AArch64LegalizerInfo::legalizeBSwap(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- assert(MI.getOpcode() == TargetOpcode::G_BSWAP);
-
- // The <2 x half> case needs special lowering because there isn't an
- // instruction that does that directly. Instead, we widen to <8 x i8>
- // and emit a G_REV32 followed by a G_LSHR knowing that instruction selection
- // will later match them as:
- //
- // rev32.8b v0, v0
- // ushr.2s v0, v0, #16
- //
- // We could emit those here directly, but it seems better to keep things as
- // generic as possible through legalization, and avoid committing layering
- // violations by legalizing & selecting here at the same time.
-
- Register ValReg = MI.getOperand(1).getReg();
- assert(LLT::fixed_vector(2, 16) == MRI.getType(ValReg));
- const LLT v2s32 = LLT::fixed_vector(2, 32);
- const LLT v8s8 = LLT::fixed_vector(8, 8);
- const LLT s32 = LLT::scalar(32);
-
- auto Undef = MIRBuilder.buildUndef(v8s8);
- auto Insert =
- MIRBuilder
- .buildInstr(TargetOpcode::INSERT_SUBREG, {v8s8}, {Undef, ValReg})
- .addImm(AArch64::ssub);
- auto Rev32 = MIRBuilder.buildInstr(AArch64::G_REV32, {v8s8}, {Insert});
- auto Bitcast = MIRBuilder.buildBitcast(v2s32, Rev32);
- auto Amt = MIRBuilder.buildConstant(v2s32, 16);
- auto UShr =
- MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {v2s32}, {Bitcast, Amt});
- auto Zero = MIRBuilder.buildConstant(s32, 0);
- auto Extract = MIRBuilder.buildExtractVectorElement(s32, UShr, Zero);
- MIRBuilder.buildBitcast({MI.getOperand(0).getReg()}, Extract);
- MI.eraseFromParent();
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
@@ -1107,8 +1149,8 @@ bool AArch64LegalizerInfo::legalizeBitfieldExtract(
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
// Only legal if we can select immediate forms.
// TODO: Lower this otherwise.
- return getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
- getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+ return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
+ getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
}
bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
@@ -1151,8 +1193,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
// v8s16,v4s32,v2s64 -> v16i8
LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
if (Ty.isScalar()) {
- // TODO: Handle s128.
- assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!");
+ assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
if (Size == 32) {
Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
}
@@ -1198,7 +1239,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
}
// Post-conditioning.
- if (Ty.isScalar() && Size == 64)
+ if (Ty.isScalar() && (Size == 64 || Size == 128))
MIRBuilder.buildZExt(Dst, UADD);
else
UADD->getOperand(0).setReg(Dst);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 78fc24559d71..35456d95dc2b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -35,8 +35,6 @@ public:
MachineInstr &MI) const override;
private:
- bool legalizeBSwap(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index b700c3760a58..a9b3792e0118 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -55,7 +55,7 @@ bool matchExtractVecEltPairwiseAdd(
Register Src2 = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);
if (!Cst || Cst->Value != 0)
return false;
// SDAG also checks for FullFP16, but this looks to be beneficial anyway.
@@ -129,7 +129,7 @@ bool matchAArch64MulConstCombine(
const LLT Ty = MRI.getType(LHS);
// The below optimizations require a constant RHS.
- auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!Const)
return false;
@@ -262,6 +262,33 @@ void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
Observer.changedInstr(MI);
}
+/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
+/// instruction.
+static bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ // If this is coming from a scalar compare then we can use a G_ZEXT instead of
+ // a G_ANYEXT:
+ //
+ // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
+ // %ext:_(s64) = G_ANYEXT %cmp(s32)
+ //
+ // By doing this, we can leverage more KnownBits combines.
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ return MRI.getType(Dst).isScalar() &&
+ mi_match(Src, MRI,
+ m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()),
+ m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
+}
+
+static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
+ Observer.changedInstr(MI);
+}
+
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 84ecb4ba6964..3ff67d188822 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -527,7 +527,7 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
// If the RHS is not a constant, or the RHS is already a valid arithmetic
// immediate, then there is nothing to change.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!ValAndVReg)
return None;
uint64_t C = ValAndVReg->Value.getZExtValue();
@@ -757,7 +757,7 @@ static unsigned getCmpOperandFoldingProfit(Register CmpOp,
if (MI.getOpcode() != TargetOpcode::G_AND)
return false;
auto ValAndVReg =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!ValAndVReg)
return false;
uint64_t Mask = ValAndVReg->Value.getZExtValue();
@@ -774,7 +774,7 @@ static unsigned getCmpOperandFoldingProfit(Register CmpOp,
return 0;
auto MaybeShiftAmt =
- getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
if (!MaybeShiftAmt)
return 0;
uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
@@ -814,7 +814,7 @@ static bool trySwapICmpOperands(MachineInstr &MI,
// Don't swap if there's a constant on the RHS, because we know we can fold
// that.
Register RHS = MI.getOperand(3).getReg();
- auto RHSCst = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 9efbcbb0065b..d3f4130d2ba1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -146,8 +146,8 @@ static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
- auto Cst =
- getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(
+ UseInstr.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
@@ -220,6 +220,121 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
+static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
+ CombinerHelper &Helper,
+ GISelChangeObserver &Observer) {
+ // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
+ // result is only used in the no-overflow case. It is restricted to cases
+ // where we know that the high-bits of the operands are 0. If there's an
+ // overflow, then the the 9th or 17th bit must be set, which can be checked
+ // using TBNZ.
+ //
+ // Change (for UADDOs on 8 and 16 bits):
+ //
+ // %z0 = G_ASSERT_ZEXT _
+ // %op0 = G_TRUNC %z0
+ // %z1 = G_ASSERT_ZEXT _
+ // %op1 = G_TRUNC %z1
+ // %val, %cond = G_UADDO %op0, %op1
+ // G_BRCOND %cond, %error.bb
+ //
+ // error.bb:
+ // (no successors and no uses of %val)
+ //
+ // To:
+ //
+ // %z0 = G_ASSERT_ZEXT _
+ // %z1 = G_ASSERT_ZEXT _
+ // %add = G_ADD %z0, %z1
+ // %val = G_TRUNC %add
+ // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
+ // %cond = G_ICMP NE, %bit, 0
+ // G_BRCOND %cond, %error.bb
+
+ auto &MRI = *B.getMRI();
+
+ MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
+ MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
+ Register Op0Wide;
+ Register Op1Wide;
+ if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
+ !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
+ return false;
+ LLT WideTy0 = MRI.getType(Op0Wide);
+ LLT WideTy1 = MRI.getType(Op1Wide);
+ Register ResVal = MI.getOperand(0).getReg();
+ LLT OpTy = MRI.getType(ResVal);
+ MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
+ MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
+
+ unsigned OpTySize = OpTy.getScalarSizeInBits();
+ // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
+ // inputs have been zero-extended.
+ if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
+ Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
+ OpTySize != Op0WideDef->getOperand(2).getImm() ||
+ OpTySize != Op1WideDef->getOperand(2).getImm())
+ return false;
+
+ // Only scalar UADDO with either 8 or 16 bit operands are handled.
+ if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
+ OpTySize >= WideTy0.getScalarSizeInBits() ||
+ (OpTySize != 8 && OpTySize != 16))
+ return false;
+
+ // The overflow-status result must be used by a branch only.
+ Register ResStatus = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(ResStatus))
+ return false;
+ MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
+ if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
+ return false;
+
+ // Make sure the computed result is only used in the no-overflow blocks.
+ MachineBasicBlock *CurrentMBB = MI.getParent();
+ MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
+ if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
+ return false;
+ if (any_of(MRI.use_nodbg_instructions(ResVal),
+ [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
+ return &MI != &I &&
+ (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
+ }))
+ return false;
+
+ // Remove G_ADDO.
+ B.setInstrAndDebugLoc(*MI.getNextNode());
+ MI.eraseFromParent();
+
+ // Emit wide add.
+ Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
+ B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
+
+ // Emit check of the 9th or 17th bit and update users (the branch). This will
+ // later be folded to TBNZ.
+ Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
+ B.buildAnd(
+ CondBit, AddDst,
+ B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
+ B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
+ B.buildConstant(LLT::scalar(32), 0));
+
+ // Update ZEXts users of the result value. Because all uses are in the
+ // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
+ B.buildZExtOrTrunc(ResVal, AddDst);
+ for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
+ Register WideReg;
+ if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
+ auto OldR = U.getParent()->getOperand(0).getReg();
+ Observer.erasingInstr(*U.getParent());
+ U.getParent()->eraseFromParent();
+ Helper.replaceRegWith(MRI, OldR, AddDst);
+ }
+ }
+
+ return true;
+}
+
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;
@@ -272,6 +387,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
+ case TargetOpcode::G_UADDO:
+ return tryToSimplifyUADDO(MI, B, Helper, Observer);
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_MEMCPY:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 8c34027f7bb3..40ddf6a94f73 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -13,8 +13,11 @@
#include "AArch64RegisterBankInfo.h"
#include "AArch64InstrInfo.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -271,6 +274,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AArch64::WSeqPairsClassRegClassID:
case AArch64::XSeqPairsClassRegClassID:
case AArch64::MatrixIndexGPR32_12_15RegClassID:
+ case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
return getRegBank(AArch64::GPRRegBankID);
case AArch64::CCRRegClassID:
return getRegBank(AArch64::CCRegBankID);
@@ -424,6 +428,8 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM:
return true;
}
return false;
@@ -529,6 +535,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND:
return true;
default:
break;
@@ -747,24 +755,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
- if (OpRegBankIdx[0] != PMI_FirstGPR)
+ if (OpRegBankIdx[0] != PMI_FirstGPR) {
Cost = 2;
- else
- // Check if that load feeds fp instructions.
- // In that case, we want the default mapping to be on FPR
- // instead of blind map every scalar to GPR.
- for (const MachineInstr &UseMI :
- MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
- // If we have at least one direct use in a FP instruction,
- // assume this was a floating point load in the IR.
- // If it was not, we would have had a bitcast before
- // reaching that instruction.
- // Int->FP conversion operations are also captured in onlyDefinesFP().
- if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
- OpRegBankIdx[0] = PMI_FirstFPR;
- break;
- }
- }
+ break;
+ }
+
+ if (cast<GLoad>(MI).isAtomic()) {
+ // Atomics always use GPR destinations. Don't refine any further.
+ OpRegBankIdx[0] = PMI_FirstGPR;
+ break;
+ }
+
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP instruction,
+ // assume this was a floating point load in the IR. If it was
+ // not, we would have had a bitcast before reaching that
+ // instruction.
+ //
+ // Int->FP conversion operations are also captured in
+ // onlyDefinesFP().
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ onlyDefinesFP(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx[0] = PMI_FirstFPR;
break;
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
@@ -957,6 +974,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND: {
+ // Source is always floating point and destination is always integer.
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+ break;
+ }
}
// Finally construct the computed mapping.
diff --git a/llvm/lib/Target/AArch64/GISel/select-saddo.mir b/llvm/lib/Target/AArch64/GISel/select-saddo.mir
deleted file mode 100644
index 6f05bd7ac838..000000000000
--- a/llvm/lib/Target/AArch64/GISel/select-saddo.mir
+++ /dev/null
@@ -1,158 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
-
-...
----
-name: saddo_s32
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
-
- ; CHECK-LABEL: name: saddo_s32
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %saddo:gpr32 = ADDSWrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %saddo
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %saddo:gpr(s32), %4:gpr(s1) = G_SADDO %reg0, %reg1
- $w0 = COPY %saddo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_s64
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $x0, $x1, $x2
-
- ; CHECK-LABEL: name: saddo_s64
- ; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: %reg0:gpr64 = COPY $x0
- ; CHECK: %reg1:gpr64 = COPY $x1
- ; CHECK: %saddo:gpr64 = ADDSXrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %saddo
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s64) = COPY $x1
- %saddo:gpr(s64), %4:gpr(s1) = G_SADDO %reg0, %reg1
- $x0 = COPY %saddo(s64)
- RET_ReallyLR implicit $x0
-
-...
----
-name: saddo_s32_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get ADDSWri when we can fold in a constant.
- ;
- ; CHECK-LABEL: name: saddo_s32_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %saddo:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %saddo
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 16
- %saddo:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant
- $w0 = COPY %saddo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_s32_shifted
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get ADDSWrs when we can fold in a shift.
- ;
- ; CHECK-LABEL: name: saddo_s32_shifted
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %add:gpr32 = ADDSWrs %reg0, %reg1, 16, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %constant:gpr(s32) = G_CONSTANT i32 16
- %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32)
- %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %reg0, %shift
- $w0 = COPY %add(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_s32_neg_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get SUBSWri when we can fold in a negative constant.
- ;
- ; CHECK-LABEL: name: saddo_s32_neg_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 -16
- %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant
- $w0 = COPY %add(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_arith_extended
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $x0
- ; Check that we get ADDSXrx.
- ; CHECK-LABEL: name: saddo_arith_extended
- ; CHECK: liveins: $w0, $x0
- ; CHECK: %reg0:gpr64sp = COPY $x0
- ; CHECK: %reg1:gpr32 = COPY $w0
- ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s32) = COPY $w0
- %ext:gpr(s64) = G_ZEXT %reg1(s32)
- %cst:gpr(s64) = G_CONSTANT i64 2
- %shift:gpr(s64) = G_SHL %ext, %cst(s64)
- %add:gpr(s64), %flags:gpr(s1) = G_SADDO %reg0, %shift
- $x0 = COPY %add(s64)
- RET_ReallyLR implicit $x0
diff --git a/llvm/lib/Target/AArch64/GISel/select-ssubo.mir b/llvm/lib/Target/AArch64/GISel/select-ssubo.mir
deleted file mode 100644
index f6b1794645f7..000000000000
--- a/llvm/lib/Target/AArch64/GISel/select-ssubo.mir
+++ /dev/null
@@ -1,158 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
-
-...
----
-name: ssubo_s32
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
-
- ; CHECK-LABEL: name: ssubo_s32
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %ssubo:gpr32 = SUBSWrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %ssubo
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %ssubo:gpr(s32), %4:gpr(s1) = G_SSUBO %reg0, %reg1
- $w0 = COPY %ssubo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_s64
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $x0, $x1, $x2
-
- ; CHECK-LABEL: name: ssubo_s64
- ; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: %reg0:gpr64 = COPY $x0
- ; CHECK: %reg1:gpr64 = COPY $x1
- ; CHECK: %ssubo:gpr64 = SUBSXrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %ssubo
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s64) = COPY $x1
- %ssubo:gpr(s64), %4:gpr(s1) = G_SSUBO %reg0, %reg1
- $x0 = COPY %ssubo(s64)
- RET_ReallyLR implicit $x0
-
-...
----
-name: ssubo_s32_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get SUBSWri when we can fold in a constant.
- ;
- ; CHECK-LABEL: name: ssubo_s32_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %ssubo:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %ssubo
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 16
- %ssubo:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant
- $w0 = COPY %ssubo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_s32_shifted
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get SUBSWrs when we can fold in a shift.
- ;
- ; CHECK-LABEL: name: ssubo_s32_shifted
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %sub:gpr32 = SUBSWrs %reg0, %reg1, 16, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %sub
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %constant:gpr(s32) = G_CONSTANT i32 16
- %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32)
- %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %reg0, %shift
- $w0 = COPY %sub(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_s32_neg_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get ADDSWri when we can fold in a negative constant.
- ;
- ; CHECK-LABEL: name: ssubo_s32_neg_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %sub:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %sub
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 -16
- %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant
- $w0 = COPY %sub(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_arith_extended
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $x0
- ; Check that we get SUBSXrx.
- ; CHECK-LABEL: name: ssubo_arith_extended
- ; CHECK: liveins: $w0, $x0
- ; CHECK: %reg0:gpr64sp = COPY $x0
- ; CHECK: %reg1:gpr32 = COPY $w0
- ; CHECK: %sub:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %sub
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s32) = COPY $w0
- %ext:gpr(s64) = G_ZEXT %reg1(s32)
- %cst:gpr(s64) = G_CONSTANT i64 2
- %shift:gpr(s64) = G_SHL %ext, %cst(s64)
- %sub:gpr(s64), %flags:gpr(s1) = G_SSUBO %reg0, %shift
- $x0 = COPY %sub(s64)
- RET_ReallyLR implicit $x0
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index c3e74757675b..876526093591 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
+#include "AArch64ExpandImm.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/bit.h"
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 290fe88a8cec..dbb8e85713cb 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -24,9 +24,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -92,7 +92,8 @@ public:
const MCAsmLayout &Layout) const override;
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
unsigned getFixupKindContainereSizeInBytes(unsigned Kind) const;
@@ -159,8 +160,11 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
assert(!IsResolved);
- if (TheTriple.isOSBinFormatCOFF())
+ if (TheTriple.isOSBinFormatCOFF()) {
+ if (!isInt<21>(SignedValue))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
+ }
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
case AArch64::fixup_aarch64_ldr_pcrel_imm19:
case AArch64::fixup_aarch64_pcrel_branch19:
@@ -456,7 +460,8 @@ void AArch64AsmBackend::relaxInstruction(MCInst &Inst,
llvm_unreachable("AArch64AsmBackend::relaxInstruction() unimplemented");
}
-bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// If the count is not 4-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index cd1bfed9d40d..ee0870d9ef7a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1026,11 +1026,11 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
unsigned Shift =
AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
O << '#' << formatImm(Val);
- if (Shift != 0)
+ if (Shift != 0) {
printShifter(MI, OpNum + 1, STI, O);
-
- if (CommentStream)
- *CommentStream << '=' << formatImm(Val << Shift) << '\n';
+ if (CommentStream)
+ *CommentStream << '=' << formatImm(Val << Shift) << '\n';
+ }
} else {
assert(MO.isExpr() && "Unexpected operand type!");
MO.getExpr()->print(O, &MAI);
@@ -1450,6 +1450,12 @@ void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
+void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
unsigned OpNum,
const MCSubtargetInfo &STI,
@@ -1539,6 +1545,28 @@ void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
O << "#" << Val;
}
+static bool isValidSysReg(const AArch64SysReg::SysReg *Reg, bool Read,
+ const MCSubtargetInfo &STI) {
+ return (Reg && (Read ? Reg->Readable : Reg->Writeable) &&
+ Reg->haveFeatures(STI.getFeatureBits()));
+}
+
+// Looks up a system register either by encoding or by name. Some system
+// registers share the same encoding between different architectures,
+// therefore a tablegen lookup by encoding will return an entry regardless
+// of the register's predication on a specific subtarget feature. To work
+// around this problem we keep an alternative name for such registers and
+// look them up by that name if the first lookup was unsuccessful.
+static const AArch64SysReg::SysReg *lookupSysReg(unsigned Val, bool Read,
+ const MCSubtargetInfo &STI) {
+ const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
+
+ if (Reg && !isValidSysReg(Reg, Read, STI))
+ Reg = AArch64SysReg::lookupSysRegByName(Reg->AltName);
+
+ return Reg;
+}
+
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1558,8 +1586,9 @@ void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
return;
}
- const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
- if (Reg && Reg->Readable && Reg->haveFeatures(STI.getFeatureBits()))
+ const AArch64SysReg::SysReg *Reg = lookupSysReg(Val, true /*Read*/, STI);
+
+ if (isValidSysReg(Reg, true /*Read*/, STI))
O << Reg->Name;
else
O << AArch64SysReg::genericRegisterString(Val);
@@ -1584,8 +1613,9 @@ void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
return;
}
- const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
- if (Reg && Reg->Writeable && Reg->haveFeatures(STI.getFeatureBits()))
+ const AArch64SysReg::SysReg *Reg = lookupSysReg(Val, false /*Read*/, STI);
+
+ if (isValidSysReg(Reg, false /*Read*/, STI))
O << Reg->Name;
else
O << AArch64SysReg::genericRegisterString(Val);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 9ec74a1bc7b6..d36fb30a0ce6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -161,6 +161,8 @@ protected:
void printVectorIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBarrierOption(const MCInst *MI, unsigned OpNum,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 3c2df1621e11..90688f1a3e83 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -26,9 +26,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -57,7 +57,16 @@ createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
CPU = "apple-a12";
}
- return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ // Most of the NEON instruction set isn't supported in streaming mode on SME
+ // targets, disable NEON unless explicitly requested.
+ bool RequestedNEON = FS.contains("neon");
+ bool RequestedStreamingSVE = FS.contains("streaming-sve");
+ MCSubtargetInfo *STI =
+ createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ if (RequestedStreamingSVE && !RequestedNEON &&
+ STI->hasFeature(AArch64::FeatureNEON))
+ STI->ToggleFeature(AArch64::FeatureNEON);
+ return STI;
}
void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index c84c313c1db0..941226b83e44 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -62,9 +62,6 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
MCInstPrinter *InstPrint,
bool isVerboseAsm);
-MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
- const MCSubtargetInfo &STI);
-
namespace AArch64_MC {
void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 557603c24ba5..cf1a60643efd 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -48,11 +48,13 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
ConstantPools->emitForCurrentSection(Streamer);
}
+void AArch64TargetStreamer::emitConstantPools() {
+ ConstantPools->emitAll(Streamer);
+}
+
// finish() - write out any non-empty assembler constant pools and
// write out note.gnu.properties if need.
void AArch64TargetStreamer::finish() {
- ConstantPools->emitAll(Streamer);
-
if (MarkBTIProperty)
emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 9b030775094c..86c7baf8f429 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -23,6 +23,7 @@ public:
~AArch64TargetStreamer() override;
void finish() override;
+ void emitConstantPools() override;
/// Callback used to implement the ldr= pseudo.
/// Add a new entry to the constant pool for the current section and return an
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 62089166f4b7..41f2cead4cf8 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -180,26 +180,18 @@ class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
let mayLoad = 1;
}
-class sme_mem_ld_ss_inst_BHSD<bits<2> msz, string mnemonic,
- MatrixTileVectorOperand tile_ty, bit is_col,
- Operand imm_ty, RegisterOperand gpr_ty>
+class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
+ MatrixTileVectorOperand tile_ty, bit is_col,
+ Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_ld_ss_base<
- 0b0, is_col, msz, (outs tile_ty:$ZAt),
+ Q, is_col, msz, (outs tile_ty:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
-class sme_mem_ld_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
- bit is_col>
- : sme_mem_ld_ss_base<
- 0b1, is_col, 0b11, (outs tile_ty:$ZAt),
- (ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn,
- GPR64shifted128:$Rm),
- mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg/z, [$Rn, $Rm]">;
-
-multiclass sme_mem_ss_aliases_BHSD<string mnemonic, Instruction inst,
- MatrixTileVectorOperand tile_ty, Operand imm_ty,
- RegisterOperand gpr_ty,
+multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
+ MatrixTileVectorOperand tile_ty,
+ Operand imm_ty, RegisterOperand gpr_ty,
string pg_suffix=""> {
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
@@ -210,35 +202,23 @@ multiclass sme_mem_ss_aliases_BHSD<string mnemonic, Instruction inst,
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
}
-multiclass sme_mem_ss_aliases_Q<string mnemonic, Instruction inst,
- MatrixTileVectorOperand tile_ty,
- string pg_suffix=""> {
- def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn, $Rm]",
- (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, GPR64shifted128:$Rm), 0>;
- // Default XZR offset aliases
- def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv]\\}, $Pg" # pg_suffix # ", [$Rn]",
- (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 2>;
- def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn]",
- (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
-}
-
multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
string pg_suffix=""> {
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "b", !cast<Instruction>(inst # _B),
+ defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
!if(is_col, TileVectorOpV8, TileVectorOpH8),
- imm0_15, GPR64shifted8, pg_suffix>;
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "h", !cast<Instruction>(inst # _H),
+ sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
!if(is_col, TileVectorOpV16, TileVectorOpH16),
- imm0_7, GPR64shifted16, pg_suffix>;
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "w", !cast<Instruction>(inst # _S),
+ sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
!if(is_col, TileVectorOpV32, TileVectorOpH32),
- imm0_3, GPR64shifted32, pg_suffix>;
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "d", !cast<Instruction>(inst # _D),
+ sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
!if(is_col, TileVectorOpV64, TileVectorOpH64),
- imm0_1, GPR64shifted64, pg_suffix>;
- defm : sme_mem_ss_aliases_Q <mnemonic # "q", !cast<Instruction>(inst # _Q),
+ sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
!if(is_col, TileVectorOpV128, TileVectorOpH128),
- pg_suffix>;
+ sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
}
multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
@@ -246,44 +226,39 @@ multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
}
multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
- def _B : sme_mem_ld_ss_inst_BHSD<0b00, mnemonic # "b",
- !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, GPR64shifted8> {
+ def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
+ !if(is_col, TileVectorOpV8, TileVectorOpH8),
+ is_col, sme_elm_idx0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
- def _H : sme_mem_ld_ss_inst_BHSD<0b01, mnemonic # "h",
- !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, GPR64shifted16> {
+ def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
+ !if(is_col, TileVectorOpV16, TileVectorOpH16),
+ is_col, sme_elm_idx0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
- def _S : sme_mem_ld_ss_inst_BHSD<0b10, mnemonic # "w",
- !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, GPR64shifted32> {
+ def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
+ !if(is_col, TileVectorOpV32, TileVectorOpH32),
+ is_col, sme_elm_idx0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
- def _D : sme_mem_ld_ss_inst_BHSD<0b11, mnemonic # "d",
- !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, GPR64shifted64> {
+ def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
+ !if(is_col, TileVectorOpV64, TileVectorOpH64),
+ is_col, sme_elm_idx0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
- def _Q : sme_mem_ld_ss_inst_Q<mnemonic # "q",
- !if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col> {
+ def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
+ !if(is_col, TileVectorOpV128, TileVectorOpH128),
+ is_col, sme_elm_idx0_0, GPR64shifted128> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
@@ -322,66 +297,53 @@ class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
let hasSideEffects = 1;
}
-class sme_mem_st_ss_inst_BHSD<bits<2> msz, string mnemonic,
- MatrixTileVectorOperand tile_ty, bit is_col,
- Operand imm_ty, RegisterOperand gpr_ty>
+class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
+ MatrixTileVectorOperand tile_ty, bit is_col,
+ Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_st_ss_base<
- 0b0, is_col, msz,
+ Q, is_col, msz,
(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
GPR64sp:$Rn, gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
-class sme_mem_st_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
- bit is_col>
- : sme_mem_st_ss_base<
- 0b1, is_col, 0b11,
- (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg,
- GPR64sp:$Rn, GPR64shifted128:$Rm),
- mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg, [$Rn, $Rm]">;
-
multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
}
multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
- def _B : sme_mem_st_ss_inst_BHSD<0b00, mnemonic # "b",
- !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, GPR64shifted8> {
+ def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
+ !if(is_col, TileVectorOpV8, TileVectorOpH8),
+ is_col, sme_elm_idx0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
- def _H : sme_mem_st_ss_inst_BHSD<0b01, mnemonic # "h",
- !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, GPR64shifted16> {
+ def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
+ !if(is_col, TileVectorOpV16, TileVectorOpH16),
+ is_col, sme_elm_idx0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
- def _S : sme_mem_st_ss_inst_BHSD<0b10, mnemonic # "w",
- !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, GPR64shifted32> {
+ def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
+ !if(is_col, TileVectorOpV32, TileVectorOpH32),
+ is_col, sme_elm_idx0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
- def _D : sme_mem_st_ss_inst_BHSD<0b11, mnemonic # "d",
- !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, GPR64shifted64> {
+ def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
+ !if(is_col, TileVectorOpV64, TileVectorOpH64),
+ is_col, sme_elm_idx0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
- def _Q : sme_mem_st_ss_inst_Q<mnemonic # "q",
- !if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col> {
+ def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
+ !if(is_col, TileVectorOpV128, TileVectorOpH128),
+ is_col, sme_elm_idx0_0, GPR64shifted128> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
@@ -423,13 +385,13 @@ multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
- MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm4, GPR64sp:$Rn, 0), 1>;
+ MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
}
multiclass sme_spill<string opcodestr> {
defm NAME : sme_spill_fill<0b1, (outs),
(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
- imm0_15:$imm4, GPR64sp:$Rn,
+ sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
}
@@ -437,7 +399,7 @@ multiclass sme_spill<string opcodestr> {
multiclass sme_fill<string opcodestr> {
defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv,
- imm0_15:$imm4, GPR64sp:$Rn,
+ sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
}
@@ -463,60 +425,54 @@ class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
let Inst{4} = 0b0;
}
-class sme_vector_to_tile_inst<bits<2> sz, MatrixTileVectorOperand tile_ty,
+class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
string mnemonic>
- : sme_vector_to_tile_base<0b0, is_col, sz, (outs tile_ty:$ZAd),
+ : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
-class sme_vector_to_tile_inst_Q<MatrixTileVectorOperand tile_ty,
- bit is_col, string mnemonic>
- : sme_vector_to_tile_base<0b1, is_col, 0b11, (outs tile_ty:$ZAd),
- (ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, ZPR128:$Zn),
- mnemonic, "\t$ZAd[$Rv], $Pg/m, $Zn">;
-
multiclass sme_vector_to_tile_aliases<Instruction inst,
MatrixTileVectorOperand tile_ty,
ZPRRegOp zpr_ty, Operand imm_ty> {
def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
- (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
+ (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
}
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
- def _B : sme_vector_to_tile_inst<0b00, !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, ZPR8, mnemonic> {
+ def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
+ TileVectorOpH8),
+ is_col, sme_elm_idx0_15, ZPR8, mnemonic> {
bits<4> imm;
let Inst{3-0} = imm;
}
- def _H : sme_vector_to_tile_inst<0b01, !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, ZPR16, mnemonic> {
+ def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
+ TileVectorOpH16),
+ is_col, sme_elm_idx0_7, ZPR16, mnemonic> {
bits<1> ZAd;
bits<3> imm;
let Inst{3} = ZAd;
let Inst{2-0} = imm;
}
- def _S : sme_vector_to_tile_inst<0b10, !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, ZPR32, mnemonic> {
+ def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
+ TileVectorOpH32),
+ is_col, sme_elm_idx0_3, ZPR32, mnemonic> {
bits<2> ZAd;
bits<2> imm;
let Inst{3-2} = ZAd;
let Inst{1-0} = imm;
}
- def _D : sme_vector_to_tile_inst<0b11, !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, ZPR64, mnemonic> {
+ def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
+ TileVectorOpH64),
+ is_col, sme_elm_idx0_1, ZPR64, mnemonic> {
bits<3> ZAd;
bits<1> imm;
let Inst{3-1} = ZAd;
let Inst{0} = imm;
}
- def _Q : sme_vector_to_tile_inst_Q<!if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col, mnemonic> {
+ def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
+ TileVectorOpH128),
+ is_col, sme_elm_idx0_0, ZPR128, mnemonic> {
bits<4> ZAd;
bits<1> imm;
let Inst{3-0} = ZAd;
@@ -525,26 +481,23 @@ multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
- ZPR8, imm0_15>;
+ ZPR8, sme_elm_idx0_15>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
!if(is_col, TileVectorOpV16,
TileVectorOpH16),
- ZPR16, imm0_7>;
+ ZPR16, sme_elm_idx0_7>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
!if(is_col, TileVectorOpV32,
TileVectorOpH32),
- ZPR32, imm0_3>;
+ ZPR32, sme_elm_idx0_3>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
!if(is_col, TileVectorOpV64,
TileVectorOpH64),
- ZPR64, imm0_1>;
-
- def : InstAlias<"mov\t$ZAd[$Rv], $Pg/m, $Zn",
- (!cast<Instruction>(NAME # _Q) !if(is_col,
- TileVectorOpV128,
- TileVectorOpH128):$ZAd,
- MatrixIndexGPR32Op12_15:$Rv,
- PPR3bAny:$Pg, ZPR128:$Zn), 1>;
+ ZPR64, sme_elm_idx0_1>;
+ defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
+ !if(is_col, TileVectorOpV128,
+ TileVectorOpH128),
+ ZPR128, sme_elm_idx0_0>;
}
multiclass sme_vector_to_tile<string mnemonic> {
@@ -569,19 +522,13 @@ class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
let Inst{4-0} = Zd;
}
-class sme_tile_to_vector_inst<bits<2> sz, ZPRRegOp zpr_ty,
+class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, string mnemonic>
- : sme_tile_to_vector_base<0b0, is_col, sz, (outs zpr_ty:$Zd),
+ : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
-class sme_tile_to_vector_inst_Q<MatrixTileVectorOperand tile_ty,
- bit is_col, string mnemonic>
- : sme_tile_to_vector_base<0b1, is_col, 0b11, (outs ZPR128:$Zd),
- (ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv),
- mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv]">;
-
multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
Operand imm_ty > {
@@ -590,62 +537,58 @@ multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
}
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
- def _B : sme_tile_to_vector_inst<0b00, ZPR8, !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, mnemonic> {
+ def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
+ TileVectorOpH8),
+ is_col, sme_elm_idx0_15, mnemonic> {
bits<4> imm;
let Inst{8-5} = imm;
}
- def _H : sme_tile_to_vector_inst<0b01, ZPR16, !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, mnemonic> {
+ def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
+ TileVectorOpH16),
+ is_col, sme_elm_idx0_7, mnemonic> {
bits<1> ZAn;
bits<3> imm;
let Inst{8} = ZAn;
let Inst{7-5} = imm;
}
- def _S : sme_tile_to_vector_inst<0b10, ZPR32, !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, mnemonic> {
+ def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
+ TileVectorOpH32),
+ is_col, sme_elm_idx0_3, mnemonic> {
bits<2> ZAn;
bits<2> imm;
let Inst{8-7} = ZAn;
let Inst{6-5} = imm;
}
- def _D : sme_tile_to_vector_inst<0b11, ZPR64, !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, mnemonic> {
+ def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
+ TileVectorOpH64),
+ is_col, sme_elm_idx0_1, mnemonic> {
bits<3> ZAn;
bits<1> imm;
let Inst{8-6} = ZAn;
let Inst{5} = imm;
}
- def _Q : sme_tile_to_vector_inst_Q<!if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col, mnemonic> {
+ def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
+ TileVectorOpH128),
+ is_col, sme_elm_idx0_0, mnemonic> {
bits<4> ZAn;
let Inst{8-5} = ZAn;
}
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
!if(is_col, TileVectorOpV8,
- TileVectorOpH8), imm0_15>;
+ TileVectorOpH8), sme_elm_idx0_15>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
!if(is_col, TileVectorOpV16,
- TileVectorOpH16), imm0_7>;
+ TileVectorOpH16), sme_elm_idx0_7>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
!if(is_col, TileVectorOpV32,
- TileVectorOpH32), imm0_3>;
+ TileVectorOpH32), sme_elm_idx0_3>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
!if(is_col, TileVectorOpV64,
- TileVectorOpH64), imm0_1>;
-
- def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv]",
- (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, PPR3bAny:$Pg,
- !if(is_col,
- TileVectorOpV128,
- TileVectorOpH128):$ZAn,
- MatrixIndexGPR32Op12_15:$Rv), 1>;
+ TileVectorOpH64), sme_elm_idx0_1>;
+ defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
+ !if(is_col, TileVectorOpV128,
+ TileVectorOpH128), sme_elm_idx0_0>;
}
multiclass sme_tile_to_vector<string mnemonic> {
@@ -736,57 +679,48 @@ multiclass sve2_clamp<string asm, bit U> {
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
}
-class sve2_int_perm_dup_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
- : I<(outs ppr_ty:$Pd), (ins PPRAny:$Pg, ppr_ty:$Pn,
- MatrixIndexGPR32Op12_15:$Rm, imm_ty:$imm),
- asm, "\t$Pd, $Pg/z, $Pn[$Rm, $imm]", "", []>,
+class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
+ : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
+ MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+ asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
Sched<[]> {
- bits<2> Rm;
- bits<4> Pg;
+ bits<2> Rv;
bits<4> Pn;
+ bits<4> Pm;
bits<4> Pd;
let Inst{31-24} = 0b00100101;
let Inst{21} = 0b1;
- let Inst{17-16} = Rm;
+ let Inst{17-16} = Rv;
let Inst{15-14} = 0b01;
- let Inst{13-10} = Pg;
+ let Inst{13-10} = Pn;
let Inst{9} = 0b0;
- let Inst{8-5} = Pn;
+ let Inst{8-5} = Pm;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
}
-multiclass sve2_int_perm_dup_p<string asm> {
- def _B : sve2_int_perm_dup_p<asm, PPR8, imm0_15> {
+multiclass sve2_int_perm_sel_p<string asm> {
+ def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
bits<4> imm;
let Inst{23-22} = imm{3-2};
let Inst{20-19} = imm{1-0};
let Inst{18} = 0b1;
}
- def _H : sve2_int_perm_dup_p<asm, PPR16, imm0_7> {
+ def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
bits<3> imm;
let Inst{23-22} = imm{2-1};
let Inst{20} = imm{0};
let Inst{19-18} = 0b10;
}
- def _S : sve2_int_perm_dup_p<asm, PPR32, imm0_3> {
+ def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
bits<2> imm;
let Inst{23-22} = imm{1-0};
let Inst{20-18} = 0b100;
}
- def _D : sve2_int_perm_dup_p<asm, PPR64, imm0_1> {
+ def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
bits<1> imm;
let Inst{23} = imm;
let Inst{22} = 0b1;
let Inst{20-18} = 0b000;
}
-
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _B) PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _H) PPR16:$Pd, PPRAny:$Pg, PPR16:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _S) PPR32:$Pd, PPRAny:$Pg, PPR32:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _D) PPR64:$Pd, PPRAny:$Pg, PPR64:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 02d3a765a802..010ffa1502de 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -264,16 +264,22 @@ def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">;
-def sve_ext_imm_0_1 : ComplexPattern<i32, 1, "SelectEXTImm<1, 8>">;
-def sve_ext_imm_0_3 : ComplexPattern<i32, 1, "SelectEXTImm<3, 4>">;
-def sve_ext_imm_0_7 : ComplexPattern<i32, 1, "SelectEXTImm<7, 2>">;
-def sve_ext_imm_0_15 : ComplexPattern<i32, 1, "SelectEXTImm<15, 1>">;
+def sve_ext_imm_0_31 : ComplexPattern<i32, 1, "SelectEXTImm<31, 8>">;
+def sve_ext_imm_0_63 : ComplexPattern<i32, 1, "SelectEXTImm<63, 4>">;
+def sve_ext_imm_0_127 : ComplexPattern<i32, 1, "SelectEXTImm<127, 2>">;
+def sve_ext_imm_0_255 : ComplexPattern<i32, 1, "SelectEXTImm<255, 1>">;
def int_aarch64_sve_cntp_oneuse : PatFrag<(ops node:$pred, node:$src2),
(int_aarch64_sve_cntp node:$pred, node:$src2), [{
return N->hasOneUse();
}]>;
+def step_vector_oneuse : PatFrag<(ops node:$idx),
+ (step_vector node:$idx), [{
+ return N->hasOneUse();
+}]>;
+
+
//===----------------------------------------------------------------------===//
// SVE PTrue - These are used extensively throughout the pattern matching so
// it's important we define them first.
@@ -321,7 +327,7 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
-let Predicates = [HasSVE] in {
+let Predicates = [HasSVEorStreamingSVE] in {
defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>;
}
@@ -484,6 +490,21 @@ class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
(inst $Rn, i32:$imm)>;
+class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ FPImmLeaf immL, int imm,
+ Instruction inst>
+: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))),
+ (inst $Pg, $Zs1, imm)>;
+
+class SVE_2_Op_Fp_Imm_Pat_Zero<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ FPImmLeaf immL, int imm,
+ Instruction inst>
+: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)),
+ (vt (AArch64dup (it immL))))),
+ (inst $Pg, $Zs1, imm)>;
+
//
// Pseudo -> Instruction mappings
//
@@ -621,6 +642,8 @@ class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Pdn = $_Pdn";
let Defs = [NZCV];
+ let isPTestLike = 1;
+ let ElementSize = pprty.ElementSize;
}
multiclass sve_int_pfirst<bits<5> opc, string asm, SDPatternOperator op> {
@@ -912,13 +935,43 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
let Constraints = "$Rdn = $_Rdn";
}
-multiclass sve_int_pred_pattern_a<bits<3> opc, string asm> {
- def NAME : sve_int_pred_pattern_a<opc, asm>;
+multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
+ SDPatternOperator op,
+ SDPatternOperator opcnt> {
+ let Predicates = [HasSVEorStreamingSVE] in {
+ def NAME : sve_int_pred_pattern_a<opc, asm>;
+
+ def : InstAlias<asm # "\t$Rdn, $pattern",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rdn",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+ }
- def : InstAlias<asm # "\t$Rdn, $pattern",
- (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
- def : InstAlias<asm # "\t$Rdn",
- (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+ let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in {
+ def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>;
+
+ def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm)))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+ def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1),
+ sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm i32:$imm)))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+ sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+ sub_32))>;
+ }
}
class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
@@ -1270,10 +1323,15 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv4i32, op, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv2i64, op, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv2f16, op, nxv2f16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv4f16, op, nxv4f16, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv2f32, op, nxv2f32, !cast<Instruction>(NAME # _D)>;
def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv2bf16, op, nxv2bf16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv4bf16, op, nxv4bf16, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
@@ -1707,10 +1765,19 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
- def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
- def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
- def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
+ let DestructiveInstType = DestructiveBinaryImm in {
+ def _H : SVEPseudo2Instr<Ps # _H, 1>, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
+ def _S : SVEPseudo2Instr<Ps # _S, 1>, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
+ def _D : SVEPseudo2Instr<Ps # _D, 1>, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+ }
+
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_D")>;
}
class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
@@ -1775,7 +1842,7 @@ multiclass sve_fp_2op_p_zds_zeroing_hsd<SDPatternOperator op> {
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, timm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
"",
[]>, Sched<[]> {
@@ -1800,12 +1867,46 @@ multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
def _S : sve_fp_ftmad<0b10, asm, ZPR32>;
def _D : sve_fp_ftmad<0b11, asm, ZPR64>;
- def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))),
- (!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>;
- def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))),
- (!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>;
- def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))),
- (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
+ def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 timm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, timm32_0_7:$imm)>;
+ def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 timm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, timm32_0_7:$imm)>;
+ def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 timm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, timm32_0_7:$imm)>;
+}
+
+multiclass sve_fp_2op_i_p_zds_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator ir_op = null_frag> {
+ def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesUndef>;
+
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_UNDEF_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_UNDEF_D")>;
+}
+
+multiclass sve_fp_2op_i_p_zds_zeroing_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
+ def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesZero>;
+
+ let AddedComplexity = 2 in {
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_ZERO_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_ZERO_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_ZERO_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_ZERO_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_ZERO_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_ZERO_D")>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -1938,7 +2039,7 @@ multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op,
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_fp_3op_p_zds_zx<SDPatternOperator op, SDPatternOperator rev_op> {
+multiclass sve_fp_3op_p_zds_zx {
def _UNDEF_H : PredThreeOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
def _UNDEF_S : PredThreeOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
def _UNDEF_D : PredThreeOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
@@ -2433,7 +2534,7 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
+ let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = Sz;
}
@@ -2482,9 +2583,12 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
- def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
- def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
- def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
+ def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
@@ -2492,6 +2596,17 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _UNDEF_D)>;
}
multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
@@ -4986,7 +5101,7 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ii<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse> {
+multiclass sve_int_index_ii<string asm> {
def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>;
def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>;
def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>;
@@ -5029,7 +5144,7 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ir<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
+multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>;
def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>;
@@ -5096,7 +5211,7 @@ class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ri<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse> {
+multiclass sve_int_index_ri<string asm> {
def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>;
def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>;
@@ -5130,7 +5245,7 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_rr<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse, SDPatternOperator mulop> {
+multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>;
@@ -8333,3 +8448,4 @@ multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
+
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 79dcca8f8458..e72dccdc4b78 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -1,9 +1,8 @@
//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,6 +58,10 @@ private:
bool coalescePTrueIntrinsicCalls(BasicBlock &BB,
SmallSetVector<IntrinsicInst *, 4> &PTrues);
bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
+ bool optimizePredicateStore(Instruction *I);
+ bool optimizePredicateLoad(Instruction *I);
+
+ bool optimizeInstructions(SmallSetVector<Function *, 4> &Functions);
/// Operates at the function-scope. I.e., optimizations are applied local to
/// the functions themselves.
@@ -276,11 +279,166 @@ bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
return Changed;
}
+// This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
+// scalable stores as late as possible
+bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
+ auto *F = I->getFunction();
+ auto Attr = F->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+
+ unsigned MinVScale, MaxVScale;
+ std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ // The transform needs to know the exact runtime length of scalable vectors
+ if (MinVScale != MaxVScale || MinVScale == 0)
+ return false;
+
+ auto *PredType =
+ ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
+ auto *FixedPredType =
+ FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
+
+ // If we have a store..
+ auto *Store = dyn_cast<StoreInst>(I);
+ if (!Store || !Store->isSimple())
+ return false;
+
+ // ..that is storing a predicate vector sized worth of bits..
+ if (Store->getOperand(0)->getType() != FixedPredType)
+ return false;
+
+ // ..where the value stored comes from a vector extract..
+ auto *IntrI = dyn_cast<IntrinsicInst>(Store->getOperand(0));
+ if (!IntrI ||
+ IntrI->getIntrinsicID() != Intrinsic::experimental_vector_extract)
+ return false;
+
+ // ..that is extracting from index 0..
+ if (!cast<ConstantInt>(IntrI->getOperand(1))->isZero())
+ return false;
+
+ // ..where the value being extract from comes from a bitcast
+ auto *BitCast = dyn_cast<BitCastInst>(IntrI->getOperand(0));
+ if (!BitCast)
+ return false;
+
+ // ..and the bitcast is casting from predicate type
+ if (BitCast->getOperand(0)->getType() != PredType)
+ return false;
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(I);
+
+ auto *PtrBitCast = Builder.CreateBitCast(
+ Store->getPointerOperand(),
+ PredType->getPointerTo(Store->getPointerAddressSpace()));
+ Builder.CreateStore(BitCast->getOperand(0), PtrBitCast);
+
+ Store->eraseFromParent();
+ if (IntrI->getNumUses() == 0)
+ IntrI->eraseFromParent();
+ if (BitCast->getNumUses() == 0)
+ BitCast->eraseFromParent();
+
+ return true;
+}
+
+// This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
+// scalable loads as late as possible
+bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
+ auto *F = I->getFunction();
+ auto Attr = F->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+
+ unsigned MinVScale, MaxVScale;
+ std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ // The transform needs to know the exact runtime length of scalable vectors
+ if (MinVScale != MaxVScale || MinVScale == 0)
+ return false;
+
+ auto *PredType =
+ ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
+ auto *FixedPredType =
+ FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
+
+ // If we have a bitcast..
+ auto *BitCast = dyn_cast<BitCastInst>(I);
+ if (!BitCast || BitCast->getType() != PredType)
+ return false;
+
+ // ..whose operand is a vector_insert..
+ auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0));
+ if (!IntrI ||
+ IntrI->getIntrinsicID() != Intrinsic::experimental_vector_insert)
+ return false;
+
+ // ..that is inserting into index zero of an undef vector..
+ if (!isa<UndefValue>(IntrI->getOperand(0)) ||
+ !cast<ConstantInt>(IntrI->getOperand(2))->isZero())
+ return false;
+
+ // ..where the value inserted comes from a load..
+ auto *Load = dyn_cast<LoadInst>(IntrI->getOperand(1));
+ if (!Load || !Load->isSimple())
+ return false;
+
+ // ..that is loading a predicate vector sized worth of bits..
+ if (Load->getType() != FixedPredType)
+ return false;
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(Load);
+
+ auto *PtrBitCast = Builder.CreateBitCast(
+ Load->getPointerOperand(),
+ PredType->getPointerTo(Load->getPointerAddressSpace()));
+ auto *LoadPred = Builder.CreateLoad(PredType, PtrBitCast);
+
+ BitCast->replaceAllUsesWith(LoadPred);
+ BitCast->eraseFromParent();
+ if (IntrI->getNumUses() == 0)
+ IntrI->eraseFromParent();
+ if (Load->getNumUses() == 0)
+ Load->eraseFromParent();
+
+ return true;
+}
+
+bool SVEIntrinsicOpts::optimizeInstructions(
+ SmallSetVector<Function *, 4> &Functions) {
+ bool Changed = false;
+
+ for (auto *F : Functions) {
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
+
+ // Traverse the DT with an rpo walk so we see defs before uses, allowing
+ // simplification to be done incrementally.
+ BasicBlock *Root = DT->getRoot();
+ ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
+ for (auto *BB : RPOT) {
+ for (Instruction &I : make_early_inc_range(*BB)) {
+ switch (I.getOpcode()) {
+ case Instruction::Store:
+ Changed |= optimizePredicateStore(&I);
+ break;
+ case Instruction::BitCast:
+ Changed |= optimizePredicateLoad(&I);
+ break;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool SVEIntrinsicOpts::optimizeFunctions(
SmallSetVector<Function *, 4> &Functions) {
bool Changed = false;
Changed |= optimizePTrueIntrinsicCalls(Functions);
+ Changed |= optimizeInstructions(Functions);
return Changed;
}
@@ -297,6 +455,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
continue;
switch (F.getIntrinsicID()) {
+ case Intrinsic::experimental_vector_extract:
+ case Intrinsic::experimental_vector_insert:
case Intrinsic::aarch64_sve_ptrue:
for (User *U : F.users())
Functions.insert(cast<Instruction>(U)->getFunction());
diff --git a/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index a6796742117b..52c88fd0218d 100644
--- a/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/AArch64TargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheAArch64leTarget() {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index ce6866154242..caee2acd2606 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -106,6 +106,25 @@ inline static unsigned getXRegFromWReg(unsigned Reg) {
return Reg;
}
+inline static unsigned getXRegFromXRegTuple(unsigned RegTuple) {
+ switch (RegTuple) {
+ case AArch64::X0_X1_X2_X3_X4_X5_X6_X7: return AArch64::X0;
+ case AArch64::X2_X3_X4_X5_X6_X7_X8_X9: return AArch64::X2;
+ case AArch64::X4_X5_X6_X7_X8_X9_X10_X11: return AArch64::X4;
+ case AArch64::X6_X7_X8_X9_X10_X11_X12_X13: return AArch64::X6;
+ case AArch64::X8_X9_X10_X11_X12_X13_X14_X15: return AArch64::X8;
+ case AArch64::X10_X11_X12_X13_X14_X15_X16_X17: return AArch64::X10;
+ case AArch64::X12_X13_X14_X15_X16_X17_X18_X19: return AArch64::X12;
+ case AArch64::X14_X15_X16_X17_X18_X19_X20_X21: return AArch64::X14;
+ case AArch64::X16_X17_X18_X19_X20_X21_X22_X23: return AArch64::X16;
+ case AArch64::X18_X19_X20_X21_X22_X23_X24_X25: return AArch64::X18;
+ case AArch64::X20_X21_X22_X23_X24_X25_X26_X27: return AArch64::X20;
+ case AArch64::X22_X23_X24_X25_X26_X27_X28_FP: return AArch64::X22;
+ }
+ // For anything else, return it unchanged.
+ return RegTuple;
+}
+
static inline unsigned getBRegFromDReg(unsigned Reg) {
switch (Reg) {
case AArch64::D0: return AArch64::B0;
@@ -435,6 +454,60 @@ namespace AArch64SVEPredPattern {
#include "AArch64GenSystemOperands.inc"
}
+/// Return the number of active elements for VL1 to VL256 predicate pattern,
+/// zero for all other patterns.
+inline unsigned getNumElementsFromSVEPredPattern(unsigned Pattern) {
+ switch (Pattern) {
+ default:
+ return 0;
+ case AArch64SVEPredPattern::vl1:
+ case AArch64SVEPredPattern::vl2:
+ case AArch64SVEPredPattern::vl3:
+ case AArch64SVEPredPattern::vl4:
+ case AArch64SVEPredPattern::vl5:
+ case AArch64SVEPredPattern::vl6:
+ case AArch64SVEPredPattern::vl7:
+ case AArch64SVEPredPattern::vl8:
+ return Pattern;
+ case AArch64SVEPredPattern::vl16:
+ return 16;
+ case AArch64SVEPredPattern::vl32:
+ return 32;
+ case AArch64SVEPredPattern::vl64:
+ return 64;
+ case AArch64SVEPredPattern::vl128:
+ return 128;
+ case AArch64SVEPredPattern::vl256:
+ return 256;
+ }
+}
+
+/// Return specific VL predicate pattern based on the number of elements.
+inline unsigned getSVEPredPatternFromNumElements(unsigned MinNumElts) {
+ switch (MinNumElts) {
+ default:
+ llvm_unreachable("unexpected element count for SVE predicate");
+ case 1:
+ return AArch64SVEPredPattern::vl1;
+ case 2:
+ return AArch64SVEPredPattern::vl2;
+ case 4:
+ return AArch64SVEPredPattern::vl4;
+ case 8:
+ return AArch64SVEPredPattern::vl8;
+ case 16:
+ return AArch64SVEPredPattern::vl16;
+ case 32:
+ return AArch64SVEPredPattern::vl32;
+ case 64:
+ return AArch64SVEPredPattern::vl64;
+ case 128:
+ return AArch64SVEPredPattern::vl128;
+ case 256:
+ return AArch64SVEPredPattern::vl256;
+ }
+}
+
namespace AArch64ExactFPImm {
struct ExactFPImm {
const char *Name;
@@ -552,6 +625,7 @@ AArch64StringToVectorLayout(StringRef LayoutStr) {
namespace AArch64SysReg {
struct SysReg {
const char *Name;
+ const char *AltName;
unsigned Encoding;
bool Readable;
bool Writeable;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index ca088e63e03c..958e8c9e5bc5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -15,17 +15,7 @@
namespace llvm {
-class FunctionPass;
-class GCNTargetMachine;
-class ImmutablePass;
-class MachineFunctionPass;
-class ModulePass;
-class Pass;
-class Target;
class TargetMachine;
-class TargetOptions;
-class PassRegistry;
-class Module;
// GlobalISel passes
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
@@ -35,16 +25,6 @@ FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
void initializeAMDGPURegBankCombinerPass(PassRegistry &);
-// R600 Passes
-FunctionPass *createR600VectorRegMerger();
-FunctionPass *createR600ExpandSpecialInstrsPass();
-FunctionPass *createR600EmitClauseMarkers();
-FunctionPass *createR600ClauseMergePass();
-FunctionPass *createR600Packetizer();
-FunctionPass *createR600ControlFlowFinalizer();
-FunctionPass *createAMDGPUCFGStructurizerPass();
-FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
-
// SI Passes
FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowPass();
@@ -114,10 +94,23 @@ ModulePass *createAMDGPUFixFunctionBitcastsPass();
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
extern char &AMDGPUFixFunctionBitcastsID;
+ModulePass *createAMDGPUCtorDtorLoweringPass();
+void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &);
+extern char &AMDGPUCtorDtorLoweringID;
+
FunctionPass *createAMDGPULowerKernelArgumentsPass();
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
extern char &AMDGPULowerKernelArgumentsID;
+FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
+void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
+extern char &AMDGPUPromoteKernelArgumentsID;
+
+struct AMDGPUPromoteKernelArgumentsPass
+ : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
ModulePass *createAMDGPULowerKernelAttributesPass();
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
extern char &AMDGPULowerKernelAttributesID;
@@ -172,21 +165,6 @@ extern char &AMDGPURewriteOutArgumentsID;
void initializeGCNDPPCombinePass(PassRegistry &);
extern char &GCNDPPCombineID;
-void initializeR600ClauseMergePassPass(PassRegistry &);
-extern char &R600ClauseMergePassID;
-
-void initializeR600ControlFlowFinalizerPass(PassRegistry &);
-extern char &R600ControlFlowFinalizerID;
-
-void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
-extern char &R600ExpandSpecialInstrsPassID;
-
-void initializeR600VectorRegMergerPass(PassRegistry &);
-extern char &R600VectorRegMergerID;
-
-void initializeR600PacketizerPass(PassRegistry &);
-extern char &R600PacketizerID;
-
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
@@ -278,7 +256,6 @@ private:
bool GlobalOpt;
};
-ModulePass *createR600OpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -390,9 +367,9 @@ namespace AMDGPUAS {
BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
- /// Address space for direct addressible parameter memory (CONST0).
+ /// Address space for direct addressable parameter memory (CONST0).
PARAM_D_ADDRESS = 6,
- /// Address space for indirect addressible parameter memory (VTX1).
+ /// Address space for indirect addressable parameter memory (VTX1).
PARAM_I_ADDRESS = 7,
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7991f3d2a6b2..e606f0e8fc3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -18,7 +18,6 @@ def p4 : PtrValueType<i64, 4>;
def p5 : PtrValueType<i32, 5>;
def p6 : PtrValueType<i32, 6>;
-
class BoolToList<bit Value> {
list<int> ret = !if(Value, [1]<int>, []<int>);
}
@@ -416,7 +415,7 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
-// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
+// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes.
def FeatureDPP8 : SubtargetFeature<"dpp8",
"HasDPP8",
"true",
@@ -1351,7 +1350,7 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
-def HasLDSFPAtomics : Predicate<"Subtarget->hasLDSFPAtomics()">,
+def HasLDSFPAtomicAdd : Predicate<"Subtarget->hasLDSFPAtomicAdd()">,
AssemblerPredicate<(all_of FeatureGFX8Insts)>;
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 88b88a04a7d1..dd3eb3849eac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
@@ -37,6 +38,10 @@ ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
return new AMDGPUExternalAAWrapper();
}
+AMDGPUAAWrapperPass::AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+ initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
@@ -94,7 +99,7 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
getUnderlyingObject(A.Ptr->stripPointerCastsForAliasAnalysis());
if (const LoadInst *LI = dyn_cast<LoadInst>(ObjA)) {
// If a generic pointer is loaded from the constant address space, it
- // could only be a GLOBAL or CONSTANT one as that address space is soley
+ // could only be a GLOBAL or CONSTANT one as that address space is solely
// prepared on the host side, where only GLOBAL or CONSTANT variables are
// visible. Note that this even holds for regular functions.
if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 44de40d4aa7f..22be014813b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -12,13 +12,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
-#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
class DataLayout;
-class MDNode;
class MemoryLocation;
/// A simple AA result that uses TBAA metadata to answer queries.
@@ -67,9 +65,7 @@ class AMDGPUAAWrapperPass : public ImmutablePass {
public:
static char ID;
- AMDGPUAAWrapperPass() : ImmutablePass(ID) {
- initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ AMDGPUAAWrapperPass();
AMDGPUAAResult &getResult() { return *Result; }
const AMDGPUAAResult &getResult() const { return *Result; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 2af9fc955875..2e24e9f929d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -70,7 +71,7 @@ recursivelyVisitUsers(GlobalValue &GV,
// and just let us hit the error when we can't handle this.
//
// Unfortunately, clang adds noinline to all functions at -O0. We have
- // to override this here. until that's fixed.
+ // to override this here until that's fixed.
F->removeFnAttr(Attribute::NoInline);
FuncsToAlwaysInline.insert(F);
@@ -90,9 +91,13 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
SmallPtrSet<Function *, 8> FuncsToNoInline;
+ Triple TT(M.getTargetTriple());
for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+ if (TT.getArch() == Triple::amdgcn &&
+ A.getLinkage() != GlobalValue::InternalLinkage)
+ continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
@@ -122,7 +127,7 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
unsigned AS = GV.getAddressSpace();
if ((AS == AMDGPUAS::REGION_ADDRESS) ||
(AS == AMDGPUAS::LOCAL_ADDRESS &&
- !AMDGPUTargetMachine::EnableLowerModuleLDS))
+ (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index af6dfc07eb50..52791dfd9d93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -6,8 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This pass adds target attributes to functions which use intrinsics
-/// which will impact calling convention lowering.
+/// \file This pass propagates the uniform-work-group-size attribute from
+/// kernels to leaf functions when possible. It also adds additional attributes
+/// to hint ABI lowering optimizations later.
//
//===----------------------------------------------------------------------===//
@@ -25,22 +26,11 @@
using namespace llvm;
namespace {
-static constexpr StringLiteral ImplicitAttrNames[] = {
- // X ids unnecessarily propagated to kernels.
- "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
- "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
- "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
- "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
- "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
-
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
- SmallVector<CallGraphNode*, 8> NodeList;
bool addFeatureAttributes(Function &F);
- bool processUniformWorkGroupAttribute();
- bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
public:
static char ID;
@@ -58,12 +48,6 @@ public:
AU.setPreservesAll();
CallGraphSCCPass::getAnalysisUsage(AU);
}
-
- static bool visitConstantExpr(const ConstantExpr *CE);
- static bool visitConstantExprsRecursively(
- const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
- bool HasApertureRegs);
};
} // end anonymous namespace
@@ -75,212 +59,11 @@ char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
"Add AMDGPU function attributes", false, false)
-
-// The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS) {
- return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
-}
-
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
- return castRequiresQueuePtr(ASC->getSrcAddressSpace());
-}
-
-static bool isDSAddress(const Constant *C) {
- const GlobalValue *GV = dyn_cast<GlobalValue>(C);
- if (!GV)
- return false;
- unsigned AS = GV->getAddressSpace();
- return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
-}
-
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
- if (CE->getOpcode() == Instruction::AddrSpaceCast) {
- unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
- return castRequiresQueuePtr(SrcAS);
- }
-
- return false;
-}
-
-bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
- const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
- bool IsFunc, bool HasApertureRegs) {
-
- if (!ConstantExprVisited.insert(EntryC).second)
- return false;
-
- SmallVector<const Constant *, 16> Stack;
- Stack.push_back(EntryC);
-
- while (!Stack.empty()) {
- const Constant *C = Stack.pop_back_val();
-
- // We need to trap on DS globals in non-entry functions.
- if (IsFunc && isDSAddress(C))
- return true;
-
- // Check this constant expression.
- if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (!HasApertureRegs && visitConstantExpr(CE))
- return true;
- }
-
- // Visit all sub-expressions.
- for (const Use &U : C->operands()) {
- const auto *OpC = dyn_cast<Constant>(U);
- if (!OpC)
- continue;
-
- if (!ConstantExprVisited.insert(OpC).second)
- continue;
-
- Stack.push_back(OpC);
- }
- }
-
- return false;
-}
-
-// We do not need to note the x workitem or workgroup id because they are always
-// initialized.
-//
-// TODO: We should not add the attributes if the known compile time workgroup
-// size is 1 for y/z.
-static StringRef intrinsicToAttrName(Intrinsic::ID ID,
- bool &NonKernelOnly,
- bool &IsQueuePtr) {
- switch (ID) {
- case Intrinsic::amdgcn_workitem_id_x:
- NonKernelOnly = true;
- return "amdgpu-work-item-id-x";
- case Intrinsic::amdgcn_workgroup_id_x:
- NonKernelOnly = true;
- return "amdgpu-work-group-id-x";
- case Intrinsic::amdgcn_workitem_id_y:
- case Intrinsic::r600_read_tidig_y:
- return "amdgpu-work-item-id-y";
- case Intrinsic::amdgcn_workitem_id_z:
- case Intrinsic::r600_read_tidig_z:
- return "amdgpu-work-item-id-z";
- case Intrinsic::amdgcn_workgroup_id_y:
- case Intrinsic::r600_read_tgid_y:
- return "amdgpu-work-group-id-y";
- case Intrinsic::amdgcn_workgroup_id_z:
- case Intrinsic::r600_read_tgid_z:
- return "amdgpu-work-group-id-z";
- case Intrinsic::amdgcn_dispatch_ptr:
- return "amdgpu-dispatch-ptr";
- case Intrinsic::amdgcn_dispatch_id:
- return "amdgpu-dispatch-id";
- case Intrinsic::amdgcn_kernarg_segment_ptr:
- return "amdgpu-kernarg-segment-ptr";
- case Intrinsic::amdgcn_implicitarg_ptr:
- return "amdgpu-implicitarg-ptr";
- case Intrinsic::amdgcn_queue_ptr:
- case Intrinsic::amdgcn_is_shared:
- case Intrinsic::amdgcn_is_private:
- // TODO: Does not require queue ptr on gfx9+
- case Intrinsic::trap:
- case Intrinsic::debugtrap:
- IsQueuePtr = true;
- return "amdgpu-queue-ptr";
- default:
- return "";
- }
-}
-
-static bool handleAttr(Function &Parent, const Function &Callee,
- StringRef Name) {
- if (Callee.hasFnAttribute(Name)) {
- Parent.addFnAttr(Name);
- return true;
- }
- return false;
-}
-
-static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
- bool &NeedQueuePtr) {
- if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
- NeedQueuePtr = true;
-
- for (StringRef AttrName : ImplicitAttrNames)
- handleAttr(Parent, Callee, AttrName);
-}
-
-bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
- bool Changed = false;
-
- for (auto *Node : reverse(NodeList)) {
- Function *Caller = Node->getFunction();
-
- for (auto I : *Node) {
- Function *Callee = std::get<1>(I)->getFunction();
- if (Callee)
- Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
- }
- }
-
- return Changed;
-}
-
-bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
- Function &Caller, Function &Callee) {
-
- // Check for externally defined function
- if (!Callee.hasExactDefinition()) {
- Callee.addFnAttr("uniform-work-group-size", "false");
- if (!Caller.hasFnAttribute("uniform-work-group-size"))
- Caller.addFnAttr("uniform-work-group-size", "false");
-
- return true;
- }
- // Check if the Caller has the attribute
- if (Caller.hasFnAttribute("uniform-work-group-size")) {
- // Check if the value of the attribute is true
- if (Caller.getFnAttribute("uniform-work-group-size")
- .getValueAsString().equals("true")) {
- // Propagate the attribute to the Callee, if it does not have it
- if (!Callee.hasFnAttribute("uniform-work-group-size")) {
- Callee.addFnAttr("uniform-work-group-size", "true");
- return true;
- }
- } else {
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- } else {
- // If the attribute is absent, set it as false
- Caller.addFnAttr("uniform-work-group-size", "false");
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- return false;
-}
-
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
- const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
- bool HasApertureRegs = ST.hasApertureRegs();
- SmallPtrSet<const Constant *, 8> ConstantExprVisited;
-
bool HaveStackObjects = false;
bool Changed = false;
- bool NeedQueuePtr = false;
bool HaveCall = false;
- bool HasIndirectCall = false;
bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
- CallingConv::ID CC = F.getCallingConv();
- bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
-
- // If this function hasAddressTaken() = true
- // then add all attributes corresponding to the implicit args.
- if (CallingConvSupportsAllImplicits &&
- F.hasAddressTaken(nullptr, true, true, true)) {
- for (StringRef AttrName : ImplicitAttrNames) {
- F.addFnAttr(AttrName);
- }
- Changed = true;
- }
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
@@ -293,65 +76,23 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const Function *Callee =
dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
- // Note the occurence of indirect call.
+ // Note the occurrence of indirect call.
if (!Callee) {
- if (!CB->isInlineAsm()) {
- HasIndirectCall = true;
+ if (!CB->isInlineAsm())
HaveCall = true;
- }
+
continue;
}
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {
HaveCall = true;
- copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
Changed = true;
- } else {
- bool NonKernelOnly = false;
-
- if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
- F.addFnAttr("amdgpu-kernarg-segment-ptr");
- } else {
- StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
- NeedQueuePtr);
- if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
- F.addFnAttr(AttrName);
- Changed = true;
- }
- }
- }
- }
-
- if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
- continue;
-
- if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
- NeedQueuePtr = true;
- continue;
- }
- }
-
- for (const Use &U : I.operands()) {
- const auto *OpC = dyn_cast<Constant>(U);
- if (!OpC)
- continue;
-
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
- HasApertureRegs)) {
- NeedQueuePtr = true;
- break;
}
}
}
}
- if (NeedQueuePtr) {
- F.addFnAttr("amdgpu-queue-ptr");
- Changed = true;
- }
-
// TODO: We could refine this to captured pointers that could possibly be
// accessed by flat instructions. For now this is mostly a poor way of
// estimating whether there are calls before argument lowering.
@@ -365,28 +106,6 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Changed = true;
}
- // This pass cannot copy attributes from callees to callers
- // if there is an indirect call and in thus such cases,
- // hasAddressTaken() would be false for kernels and functions
- // making an indirect call (if they are themselves not indirectly called).
- // We must tag all such kernels/functions with all implicits attributes
- // for correctness.
- // e.g.
- // 1. Kernel K1 makes an indirect call to function F1.
- // Without detecting an indirect call in K1, this pass will not
- // add all implicit args to K1 (which is incorrect).
- // 2. Kernel K1 makes direct call to F1 which makes indirect call to function
- // F2.
- // Without detecting an indirect call in F1 (whose hasAddressTaken() is
- // false), the pass will not add all implicit args to F1 (which is
- // essential for correctness).
- if (CallingConvSupportsAllImplicits && HasIndirectCall) {
- for (StringRef AttrName : ImplicitAttrNames) {
- F.addFnAttr(AttrName);
- }
- Changed = true;
- }
-
return Changed;
}
@@ -394,14 +113,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false;
for (CallGraphNode *I : SCC) {
- // Build a list of CallGraphNodes from most number of uses to least
- if (I->getNumReferences())
- NodeList.push_back(I);
- else {
- processUniformWorkGroupAttribute();
- NodeList.clear();
- }
-
Function *F = I->getFunction();
// Ignore functions with graphics calling conventions, these are currently
// not allowed to have kernel arguments.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index cbc4ab212566..bb2e723f4ab0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -32,8 +32,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -678,7 +678,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
- ProgInfo.ScratchSize, DS_Error);
+ ProgInfo.ScratchSize,
+ MaxScratchPerWorkitem, DS_Error);
MF.getFunction().getContext().diagnose(DiagStackSize);
}
@@ -697,11 +698,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction().getContext();
- DiagnosticInfoResourceLimit Diag(MF.getFunction(),
- "addressable scalar registers",
- ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit,
- MaxAddressableNumSGPRs);
+ DiagnosticInfoResourceLimit Diag(
+ MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
+ MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
Ctx.diagnose(Diag);
ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
}
@@ -717,18 +716,72 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
if (isShader(F.getCallingConv())) {
+ bool IsPixelShader =
+ F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
+
+ // Calculate the number of VGPR registers based on the SPI input registers
+ uint32_t InputEna = 0;
+ uint32_t InputAddr = 0;
+ unsigned LastEna = 0;
+
+ if (IsPixelShader) {
+ // Note for IsPixelShader:
+ // By this stage, all enabled inputs are tagged in InputAddr as well.
+ // We will use InputAddr to determine whether the input counts against the
+ // vgpr total and only use the InputEnable to determine the last input
+ // that is relevant - if extra arguments are used, then we have to honour
+ // the InputAddr for any intermediate non-enabled inputs.
+ InputEna = MFI->getPSInputEnable();
+ InputAddr = MFI->getPSInputAddr();
+
+ // We only need to consider input args up to the last used arg.
+ assert((InputEna || InputAddr) &&
+ "PSInputAddr and PSInputEnable should "
+ "never both be 0 for AMDGPU_PS shaders");
+ // There are some rare circumstances where InputAddr is non-zero and
+ // InputEna can be set to 0. In this case we default to setting LastEna
+ // to 1.
+ LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
+ }
+
// FIXME: We should be using the number of registers determined during
// calling convention lowering to legalize the types.
const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned PSArgCount = 0;
+ unsigned IntermediateVGPR = 0;
for (auto &Arg : F.args()) {
unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
- if (Arg.hasAttribute(Attribute::InReg))
+ if (Arg.hasAttribute(Attribute::InReg)) {
WaveDispatchNumSGPR += NumRegs;
- else
- WaveDispatchNumVGPR += NumRegs;
+ } else {
+ // If this is a PS shader and we're processing the PS Input args (first
+ // 16 VGPR), use the InputEna and InputAddr bits to define how many
+ // VGPRs are actually used.
+ // Any extra VGPR arguments are handled as normal arguments (and
+ // contribute to the VGPR count whether they're used or not).
+ if (IsPixelShader && PSArgCount < 16) {
+ if ((1 << PSArgCount) & InputAddr) {
+ if (PSArgCount < LastEna)
+ WaveDispatchNumVGPR += NumRegs;
+ else
+ IntermediateVGPR += NumRegs;
+ }
+ PSArgCount++;
+ } else {
+ // If there are extra arguments we have to include the allocation for
+ // the non-used (but enabled with InputAddr) input arguments
+ if (IntermediateVGPR) {
+ WaveDispatchNumVGPR += IntermediateVGPR;
+ IntermediateVGPR = 0;
+ }
+ WaveDispatchNumVGPR += NumRegs;
+ }
+ }
}
ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
- ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
+ ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
+ ProgInfo.NumVGPR =
+ Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
}
// Adjust number of registers used to meet default/requested minimum/maximum
@@ -745,11 +798,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// This can happen due to a compiler bug or when using inline asm to use
// the registers which are usually reserved for vcc etc.
LLVMContext &Ctx = MF.getFunction().getContext();
- DiagnosticInfoResourceLimit Diag(MF.getFunction(),
- "scalar registers",
- ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit,
- MaxAddressableNumSGPRs);
+ DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
+ ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
+ DS_Error, DK_ResourceLimit);
Ctx.diagnose(Diag);
ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
@@ -766,14 +817,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
LLVMContext &Ctx = MF.getFunction().getContext();
DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
- MFI->getNumUserSGPRs(), DS_Error);
+ MFI->getNumUserSGPRs(),
+ STM.getMaxNumUserSGPRs(), DS_Error);
Ctx.diagnose(Diag);
}
if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
LLVMContext &Ctx = MF.getFunction().getContext();
DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
- MFI->getLDSSize(), DS_Error);
+ MFI->getLDSSize(),
+ STM.getLocalMemorySize(), DS_Error);
Ctx.diagnose(Diag);
}
@@ -1039,6 +1092,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
+ // FIXME: The metadata treats the minimum as 4?
Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index d3a555bc228f..d5c60aa3be7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -26,7 +26,6 @@ struct AMDGPUResourceUsageAnalysis;
class AMDGPUTargetStreamer;
class MCCodeEmitter;
class MCOperand;
-class GCNSubtarget;
namespace AMDGPU {
namespace HSAMD {
@@ -55,9 +54,6 @@ private:
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
- void findNumUsedRegistersSI(const MachineFunction &MF,
- unsigned &NumSGPR,
- unsigned &NumVGPR) const;
/// Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 3e9fdcb1618e..1e2cf3890d0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -541,7 +541,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
if (NeedResult)
ExclScan = buildShiftRight(B, NewV, Identity);
- // Read the value from the last lane, which has accumlated the values of
+ // Read the value from the last lane, which has accumulated the values of
// each active lane in the wavefront. This will be our new value which we
// will provide to the atomic operation.
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 61b1d22edc33..f0aadab3302f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -22,48 +22,71 @@
using namespace llvm;
-static constexpr StringLiteral ImplicitAttrNames[] = {
- // X ids unnecessarily propagated to kernels.
- "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
- "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
- "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
- "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
- "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
+enum ImplicitArgumentMask {
+ NOT_IMPLICIT_INPUT = 0,
+
+ // SGPRs
+ DISPATCH_PTR = 1 << 0,
+ QUEUE_PTR = 1 << 1,
+ DISPATCH_ID = 1 << 2,
+ IMPLICIT_ARG_PTR = 1 << 3,
+ WORKGROUP_ID_X = 1 << 4,
+ WORKGROUP_ID_Y = 1 << 5,
+ WORKGROUP_ID_Z = 1 << 6,
+
+ // VGPRS:
+ WORKITEM_ID_X = 1 << 7,
+ WORKITEM_ID_Y = 1 << 8,
+ WORKITEM_ID_Z = 1 << 9,
+ ALL_ARGUMENT_MASK = (1 << 10) - 1
+};
+
+static constexpr std::pair<ImplicitArgumentMask,
+ StringLiteral> ImplicitAttrs[] = {
+ {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
+ {QUEUE_PTR, "amdgpu-no-queue-ptr"},
+ {DISPATCH_ID, "amdgpu-no-dispatch-id"},
+ {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
+ {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
+ {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
+ {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
+ {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
+ {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
+ {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
+};
// We do not need to note the x workitem or workgroup id because they are always
// initialized.
//
// TODO: We should not add the attributes if the known compile time workgroup
// size is 1 for y/z.
-static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
- bool &IsQueuePtr) {
+static ImplicitArgumentMask
+intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
switch (ID) {
case Intrinsic::amdgcn_workitem_id_x:
NonKernelOnly = true;
- return "amdgpu-work-item-id-x";
+ return WORKITEM_ID_X;
case Intrinsic::amdgcn_workgroup_id_x:
NonKernelOnly = true;
- return "amdgpu-work-group-id-x";
+ return WORKGROUP_ID_X;
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
- return "amdgpu-work-item-id-y";
+ return WORKITEM_ID_Y;
case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
- return "amdgpu-work-item-id-z";
+ return WORKITEM_ID_Z;
case Intrinsic::amdgcn_workgroup_id_y:
case Intrinsic::r600_read_tgid_y:
- return "amdgpu-work-group-id-y";
+ return WORKGROUP_ID_Y;
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
- return "amdgpu-work-group-id-z";
+ return WORKGROUP_ID_Z;
case Intrinsic::amdgcn_dispatch_ptr:
- return "amdgpu-dispatch-ptr";
+ return DISPATCH_PTR;
case Intrinsic::amdgcn_dispatch_id:
- return "amdgpu-dispatch-id";
- case Intrinsic::amdgcn_kernarg_segment_ptr:
- return "amdgpu-kernarg-segment-ptr";
+ return DISPATCH_ID;
case Intrinsic::amdgcn_implicitarg_ptr:
- return "amdgpu-implicitarg-ptr";
+ return IMPLICIT_ARG_PTR;
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
@@ -71,9 +94,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
case Intrinsic::trap:
case Intrinsic::debugtrap:
IsQueuePtr = true;
- return "amdgpu-queue-ptr";
+ return QUEUE_PTR;
default:
- return "";
+ return NOT_IMPLICIT_INPUT;
}
}
@@ -89,6 +112,7 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
+namespace {
class AMDGPUInformationCache : public InformationCache {
public:
AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
@@ -105,6 +129,17 @@ public:
return ST.hasApertureRegs();
}
+ std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getFlatWorkGroupSizes(F);
+ }
+
+ std::pair<unsigned, unsigned>
+ getMaximumFlatWorkGroupRange(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
+ }
+
private:
/// Check if the ConstantExpr \p CE requires queue ptr attribute.
static bool visitConstExpr(const ConstantExpr *CE) {
@@ -163,8 +198,11 @@ private:
DenseMap<const Constant *, uint8_t> ConstantStatus;
};
-struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
- using Base = StateWrapper<BooleanState, AbstractAttribute>;
+struct AAAMDAttributes : public StateWrapper<
+ BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
+ using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
+ AbstractAttribute>;
+
AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
@@ -183,24 +221,24 @@ struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
return (AA->getIdAddr() == &ID);
}
- virtual const DenseSet<StringRef> &getAttributes() const = 0;
-
/// Unique ID (due to the unique address)
static const char ID;
};
const char AAAMDAttributes::ID = 0;
-struct AAAMDWorkGroupSize
+struct AAUniformWorkGroupSize
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
- AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
- static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
- Attributor &A);
+ static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
+ Attributor &A);
/// See AbstractAttribute::getName().
- const std::string getName() const override { return "AAAMDWorkGroupSize"; }
+ const std::string getName() const override {
+ return "AAUniformWorkGroupSize";
+ }
/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }
@@ -214,11 +252,11 @@ struct AAAMDWorkGroupSize
/// Unique ID (due to the unique address)
static const char ID;
};
-const char AAAMDWorkGroupSize::ID = 0;
+const char AAUniformWorkGroupSize::ID = 0;
-struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
- AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
- : AAAMDWorkGroupSize(IRP, A) {}
+struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
+ AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
+ : AAUniformWorkGroupSize(IRP, A) {}
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
@@ -244,10 +282,10 @@ struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
auto CheckCallSite = [&](AbstractCallSite CS) {
Function *Caller = CS.getInstruction()->getFunction();
- LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
+ LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
<< "->" << getAssociatedFunction()->getName() << "\n");
- const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
+ const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
Change = Change | clampStateAndIndicateChange(this->getState(),
@@ -286,11 +324,13 @@ struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
void trackStatistics() const override {}
};
-AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
- Attributor &A) {
+AAUniformWorkGroupSize &
+AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
- return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
- llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
+ return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
+ llvm_unreachable(
+ "AAUniformWorkGroupSize is only valid for function position");
}
struct AAAMDAttributesFunction : public AAAMDAttributes {
@@ -299,14 +339,13 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
- CallingConv::ID CC = F->getCallingConv();
- bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
+ for (auto Attr : ImplicitAttrs) {
+ if (F->hasFnAttribute(Attr.second))
+ addKnownBits(Attr.first);
+ }
- // Don't add attributes to instrinsics
- if (F->isIntrinsic()) {
- indicatePessimisticFixpoint();
+ if (F->isDeclaration())
return;
- }
// Ignore functions with graphics calling conventions, these are currently
// not allowed to have kernel arguments.
@@ -314,94 +353,47 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
indicatePessimisticFixpoint();
return;
}
-
- for (StringRef Attr : ImplicitAttrNames) {
- if (F->hasFnAttribute(Attr))
- Attributes.insert(Attr);
- }
-
- // TODO: We shouldn't need this in the future.
- if (CallingConvSupportsAllImplicits &&
- F->hasAddressTaken(nullptr, true, true, true)) {
- for (StringRef AttrName : ImplicitAttrNames) {
- Attributes.insert(AttrName);
- }
- }
}
ChangeStatus updateImpl(Attributor &A) override {
Function *F = getAssociatedFunction();
- ChangeStatus Change = ChangeStatus::UNCHANGED;
- bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
- CallingConv::ID CC = F->getCallingConv();
- bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
- auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-
- auto AddAttribute = [&](StringRef AttrName) {
- if (Attributes.insert(AttrName).second)
- Change = ChangeStatus::CHANGED;
- };
+ // The current assumed state used to determine a change.
+ auto OrigAssumed = getAssumed();
// Check for Intrinsics and propagate attributes.
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, this->getIRPosition(), DepClassTy::REQUIRED);
+ if (AAEdges.hasNonAsmUnknownCallee())
+ return indicatePessimisticFixpoint();
- // We have to assume that we can reach a function with these attributes.
- // We do not consider inline assembly as a unknown callee.
- if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
- for (StringRef AttrName : ImplicitAttrNames) {
- AddAttribute(AttrName);
- }
- }
+ bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
bool NeedsQueuePtr = false;
- bool HasCall = false;
+
for (Function *Callee : AAEdges.getOptimisticEdges()) {
Intrinsic::ID IID = Callee->getIntrinsicID();
- if (IID != Intrinsic::not_intrinsic) {
- if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
- AddAttribute("amdgpu-kernarg-segment-ptr");
- continue;
- }
-
- bool NonKernelOnly = false;
- StringRef AttrName =
- intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
-
- if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
- AddAttribute(AttrName);
-
+ if (IID == Intrinsic::not_intrinsic) {
+ const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
+ *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+ *this &= AAAMD;
continue;
}
- HasCall = true;
- const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
- *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
- const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
- // Propagate implicit attributes from called function.
- for (StringRef AttrName : ImplicitAttrNames)
- if (CalleeAttributes.count(AttrName))
- AddAttribute(AttrName);
+ bool NonKernelOnly = false;
+ ImplicitArgumentMask AttrMask =
+ intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
+ if (AttrMask != NOT_IMPLICIT_INPUT) {
+ if ((IsNonEntryFunc || !NonKernelOnly))
+ removeAssumedBits(AttrMask);
+ }
}
- HasCall |= AAEdges.hasUnknownCallee();
- if (!IsNonEntryFunc && HasCall)
- AddAttribute("amdgpu-calls");
-
- // Check the function body.
- auto CheckAlloca = [&](Instruction &I) {
- AddAttribute("amdgpu-stack-objects");
- return false;
- };
-
- bool UsedAssumedInformation = false;
- A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
- UsedAssumedInformation);
-
// If we found that we need amdgpu-queue-ptr, nothing else to do.
- if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
- AddAttribute("amdgpu-queue-ptr");
- return Change;
+ if (NeedsQueuePtr) {
+ removeAssumedBits(QUEUE_PTR);
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
auto CheckAddrSpaceCasts = [&](Instruction &I) {
@@ -419,60 +411,68 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// instructions, try it first.
// amdgpu-queue-ptr is not needed if aperture regs is present.
- if (!HasApertureRegs)
+ if (!HasApertureRegs) {
+ bool UsedAssumedInformation = false;
A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
{Instruction::AddrSpaceCast},
UsedAssumedInformation);
+ }
// If we found that we need amdgpu-queue-ptr, nothing else to do.
if (NeedsQueuePtr) {
- AddAttribute("amdgpu-queue-ptr");
- return Change;
+ removeAssumedBits(QUEUE_PTR);
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
- if (!IsNonEntryFunc && HasApertureRegs)
- return Change;
+ if (!IsNonEntryFunc && HasApertureRegs) {
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
+ }
for (BasicBlock &BB : *F) {
for (Instruction &I : BB) {
for (const Use &U : I.operands()) {
if (const auto *C = dyn_cast<Constant>(U)) {
if (InfoCache.needsQueuePtr(C, *F)) {
- AddAttribute("amdgpu-queue-ptr");
- return Change;
+ removeAssumedBits(QUEUE_PTR);
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
}
}
}
}
- return Change;
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
ChangeStatus manifest(Attributor &A) override {
SmallVector<Attribute, 8> AttrList;
LLVMContext &Ctx = getAssociatedFunction()->getContext();
- for (StringRef AttrName : Attributes)
- AttrList.push_back(Attribute::get(Ctx, AttrName));
+ for (auto Attr : ImplicitAttrs) {
+ if (isKnown(Attr.first))
+ AttrList.push_back(Attribute::get(Ctx, Attr.second));
+ }
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
/* ForceReplace */ true);
}
const std::string getAsStr() const override {
- return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
- }
-
- const DenseSet<StringRef> &getAttributes() const override {
- return Attributes;
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "AMDInfo[";
+ for (auto Attr : ImplicitAttrs)
+ OS << ' ' << Attr.second;
+ OS << " ]";
+ return OS.str();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
-
-private:
- DenseSet<StringRef> Attributes;
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
@@ -482,6 +482,118 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDAttributes is only valid for function position");
}
+/// Propagate amdgpu-flat-work-group-size attribute.
+struct AAAMDFlatWorkGroupSize
+ : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
+ using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
+ AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, 32) {}
+
+ /// See AbstractAttribute::getState(...).
+ IntegerRangeState &getState() override { return *this; }
+ const IntegerRangeState &getState() const override { return *this; }
+
+ void initialize(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned MinGroupSize, MaxGroupSize;
+ std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
+ intersectKnown(
+ ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto CheckCallSite = [&](AbstractCallSite CS) {
+ Function *Caller = CS.getInstruction()->getFunction();
+ LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
+ << "->" << getAssociatedFunction()->getName() << '\n');
+
+ const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
+ *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+
+ Change |=
+ clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
+
+ return true;
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Change;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ SmallVector<Attribute, 8> AttrList;
+ Function *F = getAssociatedFunction();
+ LLVMContext &Ctx = F->getContext();
+
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned Min, Max;
+ std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
+
+ // Don't add the attribute if it's the implied default.
+ if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
+ return ChangeStatus::UNCHANGED;
+
+ SmallString<10> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+
+ AttrList.push_back(
+ Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
+ return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
+ /* ForceReplace */ true);
+ }
+
+ const std::string getAsStr() const override {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "AMDFlatWorkGroupSize[";
+ OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+ OS << ']';
+ return OS.str();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override {
+ return "AAAMDFlatWorkGroupSize";
+ }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDFlatWorkGroupSize
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDFlatWorkGroupSize::ID = 0;
+
+AAAMDFlatWorkGroupSize &
+AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
+ if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+ return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
+ llvm_unreachable(
+ "AAAMDFlatWorkGroupSize is only valid for function position");
+}
+
class AMDGPUAttributor : public ModulePass {
public:
AMDGPUAttributor() : ModulePass(ID) {}
@@ -500,17 +612,28 @@ public:
bool runOnModule(Module &M) override {
SetVector<Function *> Functions;
AnalysisGetter AG;
- for (Function &F : M)
- Functions.insert(&F);
+ for (Function &F : M) {
+ if (!F.isIntrinsic())
+ Functions.insert(&F);
+ }
CallGraphUpdater CGUpdater;
BumpPtrAllocator Allocator;
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
- Attributor A(Functions, InfoCache, CGUpdater);
+ DenseSet<const char *> Allowed(
+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
+ &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
+
+ Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
for (Function &F : M) {
- A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
- A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
+ if (!F.isIntrinsic()) {
+ A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
+ A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
+ }
+ }
}
ChangeStatus Change = A.run();
@@ -521,6 +644,7 @@ public:
TargetMachine *TM;
static char ID;
};
+} // namespace
char AMDGPUAttributor::ID = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index b9faad453aba..43928d7c2a09 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -60,7 +60,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value
@@ -103,7 +103,7 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
if (VA.getLocVT().getSizeInBits() < 32) {
@@ -203,7 +203,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -236,7 +236,7 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
-// FIXME: Compatability shim
+// FIXME: Compatibility shim
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
switch (MIOpc) {
case TargetOpcode::G_SEXT:
@@ -355,14 +355,23 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
auto const &ST = MF.getSubtarget<GCNSubtarget>();
- unsigned ReturnOpc =
- IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
+ unsigned ReturnOpc = 0;
+ if (IsShader)
+ ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG;
+ else if (CC == CallingConv::AMDGPU_Gfx)
+ ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx;
+ else
+ ReturnOpc = AMDGPU::S_SETPC_B64_return;
auto Ret = B.buildInstrNoInsert(ReturnOpc);
Register ReturnAddrVReg;
if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
Ret.addUse(ReturnAddrVReg);
+ } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
+ ReturnAddrVReg =
+ MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass);
+ Ret.addUse(ReturnAddrVReg);
}
if (!FLI.CanLowerReturn)
@@ -370,7 +379,8 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
else if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;
- if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
+ if (ReturnOpc == AMDGPU::S_SETPC_B64_return ||
+ ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
&AMDGPU::SGPR_64RegClass);
@@ -753,6 +763,11 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
+ // If there's no call site, this doesn't correspond to a call from the IR and
+ // doesn't need implicit inputs.
+ if (!Info.CB)
+ return true;
+
const AMDGPUFunctionArgInfo *CalleeArgInfo
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
@@ -773,17 +788,32 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
};
+ static constexpr StringLiteral ImplicitAttrNames[] = {
+ "amdgpu-no-dispatch-ptr",
+ "amdgpu-no-queue-ptr",
+ "amdgpu-no-implicitarg-ptr",
+ "amdgpu-no-dispatch-id",
+ "amdgpu-no-workgroup-id-x",
+ "amdgpu-no-workgroup-id-y",
+ "amdgpu-no-workgroup-id-z"
+ };
+
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const AMDGPULegalizerInfo *LI
= static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
+ unsigned I = 0;
for (auto InputID : InputRegs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
+ // If the callee does not use the attribute value, skip copying the value.
+ if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
+ continue;
+
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
@@ -843,16 +873,22 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
const LLT S32 = LLT::scalar(32);
+ const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+ const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+ const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
// If incoming ids are not packed we need to pack them.
// FIXME: Should consider known workgroup size to eliminate known 0 cases.
Register InputReg;
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+ NeedWorkItemIDX) {
InputReg = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
}
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+ NeedWorkItemIDY) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
std::get<2>(WorkitemIDY));
@@ -861,7 +897,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+ NeedWorkItemIDZ) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
std::get<2>(WorkitemIDZ));
@@ -870,7 +907,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
}
- if (!InputReg) {
+ if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
InputReg = MRI.createGenericVirtualRegister(S32);
// Workitem ids are already packed, any of present incoming arguments will
@@ -883,7 +920,9 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
}
if (OutgoingArg->isRegister()) {
- ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (InputReg)
+ ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+
if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
report_fatal_error("failed to allocate implicit input argument");
} else {
@@ -903,7 +942,9 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
bool IsTailCall) {
- return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::SI_CALL;
+ assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
+ "because the address can be divergent");
+ return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
}
// Add operands to call instruction to track the callee.
@@ -1033,6 +1074,11 @@ bool AMDGPUCallLowering::isEligibleForTailCallOptimization(
if (!Info.IsTailCall)
return false;
+ // Indirect calls can't be tail calls, because the address can be divergent.
+ // TODO Check divergence info if the call really is divergent.
+ if (Info.Callee.isReg())
+ return false;
+
MachineFunction &MF = B.getMF();
const Function &CallerF = MF.getFunction();
CallingConv::ID CalleeCC = Info.CallConv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 90b52395b76c..1682d43ae671 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -20,11 +20,13 @@ def CC_SI_Gfx : CallingConv<[
// 0-3 are reserved for the stack buffer descriptor
// 30-31 are reserved for the return address
// 32 is reserved for the stack pointer
+ // 33 is reserved for the frame pointer
+ // 34 is reserved for the base pointer
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
- SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
]>>>,
CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
@@ -41,17 +43,6 @@ def RetCC_SI_Gfx : CallingConv<[
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
- // 0-3 are reserved for the stack buffer descriptor
- // 32 is reserved for the stack pointer
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
- SGPR4, SGPR5, SGPR6, SGPR7,
- SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
- SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
- SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
- SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
- SGPR40, SGPR41, SGPR42, SGPR43
- ]>>>,
-
CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
@@ -139,14 +130,6 @@ def RetCC_SI_Shader : CallingConv<[
]>>
]>;
-def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
- (sequence "VGPR%u", 24, 255)
->;
-
-def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
- (sequence "VGPR%u", 32, 255)
->;
-
def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
// The CSRs & scratch-registers are interleaved at a split boundary of 8.
(add (sequence "VGPR%u", 40, 47),
@@ -173,6 +156,14 @@ def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
(sequence "SGPR%u", 32, 105)
>;
+def CSR_AMDGPU_SI_Gfx_SGPRs_4_29 : CalleeSavedRegs<
+ (sequence "SGPR%u", 4, 29)
+>;
+
+def CSR_AMDGPU_SI_Gfx_SGPRs_64_105 : CalleeSavedRegs<
+ (sequence "SGPR%u", 64, 105)
+>;
+
// Just to get the regmask, not for calling convention purposes.
def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs<
(sequence "VGPR%u", 0, 255)
@@ -198,6 +189,14 @@ def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs<
(add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255)
>;
+def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
+ (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105)
+>;
+
+def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs<
+ (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs_32_255)
+>;
+
def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
// Calling convention for leaf functions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 60e79c2c6c2f..a55729586b8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -148,11 +148,15 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// \returns True.
bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
-
+ /// \returns The minimum number of bits needed to store the value of \Op as an
+ /// unsigned integer. Truncating to this size and then zero-extending to
+ /// ScalarSize will not change the value.
unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+
+ /// \returns The minimum number of bits needed to store the value of \Op as a
+ /// signed integer. Truncating to this size and then sign-extending to
+ /// ScalarSize will not change the value.
unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
- bool isI24(Value *V, unsigned ScalarSize) const;
- bool isU24(Value *V, unsigned ScalarSize) const;
/// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
/// SelectionDAG has an issue where an and asserting the bits are known
@@ -451,17 +455,7 @@ unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
unsigned ScalarSize) const {
// In order for this to be a signed 24-bit value, bit 23, must
// be a sign bit.
- return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
-}
-
-bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
- return ScalarSize >= 24 && // Types less than 24-bit should be treated
- // as unsigned 24-bit values.
- numBitsSigned(V, ScalarSize) < 24;
-}
-
-bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
- return numBitsUnsigned(V, ScalarSize) <= 24;
+ return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC) + 1;
}
static void extractValues(IRBuilder<> &Builder,
@@ -489,6 +483,34 @@ static Value *insertValues(IRBuilder<> &Builder,
return NewVal;
}
+// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
+// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
+// width of the original destination.
+static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
+ unsigned Size, unsigned NumBits, bool IsSigned) {
+ if (Size <= 32 || NumBits <= 32) {
+ Intrinsic::ID ID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
+ }
+
+ assert(NumBits <= 48);
+
+ Intrinsic::ID LoID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ Intrinsic::ID HiID =
+ IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;
+
+ Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
+ Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});
+
+ IntegerType *I64Ty = Builder.getInt64Ty();
+ Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
+ Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);
+
+ return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
+}
+
bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
@@ -507,13 +529,17 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
- Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+ unsigned LHSBits = 0, RHSBits = 0;
+ bool IsSigned = false;
+
+ if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
+ IsSigned = false;
+
+ } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsSigned(RHS, Size)) <= 24) {
+ IsSigned = true;
- // TODO: Should this try to match mulhi24?
- if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
- IntrID = Intrinsic::amdgcn_mul_u24;
- } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
- IntrID = Intrinsic::amdgcn_mul_i24;
} else
return false;
@@ -523,27 +549,26 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
extractValues(Builder, LHSVals, LHS);
extractValues(Builder, RHSVals, RHS);
-
IntegerType *I32Ty = Builder.getInt32Ty();
- FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
for (int I = 0, E = LHSVals.size(); I != E; ++I) {
Value *LHS, *RHS;
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
- LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
- } else {
+ if (IsSigned) {
LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
+ } else {
+ LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+ RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
}
- Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
+ Value *Result =
+ getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
- ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
- LHSVals[I]->getType()));
+ if (IsSigned) {
+ ResultVals.push_back(
+ Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
} else {
- ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
- LHSVals[I]->getType()));
+ ResultVals.push_back(
+ Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
}
}
@@ -816,7 +841,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
return visitBinaryOperator(I);
- // Check if the Call is an intrinsic intruction to amdgcn_class intrinsic
+ // Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
// has only one use
if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
!IntrinsicCall->hasOneUse())
@@ -1314,7 +1339,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
ConstantInt *Lower =
mdconst::extract<ConstantInt>(Range->getOperand(0));
- if (Lower->getValue().isNullValue()) {
+ if (Lower->isNullValue()) {
WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
} else {
Metadata *LowAndHigh[] = {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c6273adca50f..df2f9a0fa3a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -64,26 +64,36 @@ def remove_fcanonicalize : GICombineRule<
[{ return PostLegalizerHelper.matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${fcanonicalize}, ${matchinfo}); }])>;
+def foldable_fneg_matchdata : GIDefMatchData<"MachineInstr *">;
+
+def foldable_fneg : GICombineRule<
+ (defs root:$ffn, foldable_fneg_matchdata:$matchinfo),
+ (match (wip_match_opcode G_FNEG):$ffn,
+ [{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]),
+ (apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>;
+
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, clamp_i64_to_i16]> {
+ "AMDGPUGenPreLegalizerCombinerHelper",
+ [all_combines, clamp_i64_to_i16, foldable_fneg]> {
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
+ let AdditionalArguments = [];
}
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper",
[all_combines, gfx6gfx7_combines,
- uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize]> {
+ uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
let AdditionalArguments = [];
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
- "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3]> {
+ "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
new file mode 100644
index 000000000000..301e6f6d6f42
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -0,0 +1,382 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUCombinerHelper.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+LLVM_READNONE
+static bool fnegFoldsIntoMI(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ case AMDGPU::G_FMUL:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ return true;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MI.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fmed3:
+ case Intrinsic::amdgcn_fma_legacy:
+ return true;
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+/// \p returns true if the operation will definitely need to use a 64-bit
+/// encoding, and thus will use a VOP3 encoding regardless of the source
+/// modifiers.
+LLVM_READONLY
+static bool opMustUseVOP3Encoding(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return MI.getNumOperands() >
+ (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
+}
+
+// Most FP instructions support source modifiers.
+LLVM_READONLY
+static bool hasSourceMods(const MachineInstr &MI) {
+ if (!MI.memoperands().empty())
+ return false;
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::G_SELECT:
+ case AMDGPU::G_FDIV:
+ case AMDGPU::G_FREM:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_BITCAST:
+ case AMDGPU::G_ANYEXT:
+ case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_BUILD_VECTOR_TRUNC:
+ case AMDGPU::G_PHI:
+ return false;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MI.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1_f16:
+ case Intrinsic::amdgcn_interp_p2_f16:
+ case Intrinsic::amdgcn_div_scale:
+ return false;
+ default:
+ return true;
+ }
+ }
+ default:
+ return true;
+ }
+}
+
+static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
+ unsigned CostThreshold = 4) {
+ // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
+ // it is truly free to use a source modifier in all cases. If there are
+ // multiple users but for each one will necessitate using VOP3, there will be
+ // a code size increase. Try to avoid increasing code size unless we know it
+ // will save on the instruction count.
+ unsigned NumMayIncreaseSize = 0;
+ Register Dst = MI.getOperand(0).getReg();
+ for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
+ if (!hasSourceMods(Use))
+ return false;
+
+ if (!opMustUseVOP3Encoding(Use, MRI)) {
+ if (++NumMayIncreaseSize > CostThreshold)
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool mayIgnoreSignedZero(MachineInstr &MI) {
+ const TargetOptions &Options = MI.getMF()->getTarget().Options;
+ return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
+}
+
+static bool isInv2Pi(const APFloat &APF) {
+ static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+ static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+ static const APFloat KF64(APFloat::IEEEdouble(),
+ APInt(64, 0x3fc45f306dc9c882));
+
+ return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
+ APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
+ MachineRegisterInfo &MRI) {
+ Optional<FPValueAndVReg> FPValReg;
+ if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
+ if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
+ return true;
+
+ const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
+ if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
+ return true;
+ }
+ return false;
+}
+
+static unsigned inverseMinMax(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::G_FMAXNUM:
+ return AMDGPU::G_FMINNUM;
+ case AMDGPU::G_FMINNUM:
+ return AMDGPU::G_FMAXNUM;
+ case AMDGPU::G_FMAXNUM_IEEE:
+ return AMDGPU::G_FMINNUM_IEEE;
+ case AMDGPU::G_FMINNUM_IEEE:
+ return AMDGPU::G_FMAXNUM_IEEE;
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ return AMDGPU::G_AMDGPU_FMIN_LEGACY;
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ return AMDGPU::G_AMDGPU_FMAX_LEGACY;
+ default:
+ llvm_unreachable("invalid min/max opcode");
+ }
+}
+
+bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) {
+ Register Src = MI.getOperand(1).getReg();
+ MatchInfo = MRI.getVRegDef(Src);
+
+ // If the input has multiple uses and we can either fold the negate down, or
+ // the other uses cannot, give up. This both prevents unprofitable
+ // transformations and infinite loops: we won't repeatedly try to fold around
+ // a negate that has no 'good' form.
+ if (MRI.hasOneNonDBGUse(Src)) {
+ if (allUsesHaveSourceMods(MI, MRI, 0))
+ return false;
+ } else {
+ if (fnegFoldsIntoMI(*MatchInfo) &&
+ (allUsesHaveSourceMods(MI, MRI) ||
+ !allUsesHaveSourceMods(*MatchInfo, MRI)))
+ return false;
+ }
+
+ switch (MatchInfo->getOpcode()) {
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ // 0 doesn't have a negated inline immediate.
+ return !isConstantCostlierToNegate(*MatchInfo,
+ MatchInfo->getOperand(2).getReg(), MRI);
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ return mayIgnoreSignedZero(*MatchInfo);
+ case AMDGPU::G_FMUL:
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ return true;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fmed3:
+ return true;
+ case Intrinsic::amdgcn_fma_legacy:
+ return mayIgnoreSignedZero(*MatchInfo);
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) {
+ // Transform:
+ // %A = inst %Op1, ...
+ // %B = fneg %A
+ //
+ // into:
+ //
+ // (if %A has one use, specifically fneg above)
+ // %B = inst (maybe fneg %Op1), ...
+ //
+ // (if %A has multiple uses)
+ // %B = inst (maybe fneg %Op1), ...
+ // %A = fneg %B
+
+ // Replace register in operand with a register holding negated value.
+ auto NegateOperand = [&](MachineOperand &Op) {
+ Register Reg = Op.getReg();
+ if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
+ Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
+ replaceRegOpWith(MRI, Op, Reg);
+ };
+
+ // Replace either register in operands with a register holding negated value.
+ auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
+ Register XReg = X.getReg();
+ Register YReg = Y.getReg();
+ if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
+ replaceRegOpWith(MRI, X, XReg);
+ else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
+ replaceRegOpWith(MRI, Y, YReg);
+ else {
+ YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
+ replaceRegOpWith(MRI, Y, YReg);
+ }
+ };
+
+ Builder.setInstrAndDebugLoc(*MatchInfo);
+
+ // Negate appropriate operands so that resulting value of MatchInfo is
+ // negated.
+ switch (MatchInfo->getOpcode()) {
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ NegateOperand(MatchInfo->getOperand(1));
+ NegateOperand(MatchInfo->getOperand(2));
+ break;
+ case AMDGPU::G_FMUL:
+ NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+ break;
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
+ NegateOperand(MatchInfo->getOperand(1));
+ NegateOperand(MatchInfo->getOperand(2));
+ unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
+ replaceOpcodeWith(*MatchInfo, Opposite);
+ break;
+ }
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+ NegateOperand(MatchInfo->getOperand(3));
+ break;
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_FPTRUNC:
+ NegateOperand(MatchInfo->getOperand(1));
+ break;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ NegateOperand(MatchInfo->getOperand(2));
+ break;
+ case Intrinsic::amdgcn_fmul_legacy:
+ NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+ break;
+ case Intrinsic::amdgcn_fmed3:
+ NegateOperand(MatchInfo->getOperand(2));
+ NegateOperand(MatchInfo->getOperand(3));
+ NegateOperand(MatchInfo->getOperand(4));
+ break;
+ case Intrinsic::amdgcn_fma_legacy:
+ NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+ NegateOperand(MatchInfo->getOperand(4));
+ break;
+ default:
+ llvm_unreachable("folding fneg not supported for this intrinsic");
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("folding fneg not supported for this instruction");
+ }
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
+
+ if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
+ // MatchInfo now has negated value so use that instead of old Dst.
+ replaceRegWith(MRI, Dst, MatchInfoDst);
+ } else {
+ // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
+ // but replaceRegWith will replace defs as well. It is easier to replace one
+ // def with a new register.
+ LLT Type = MRI.getType(Dst);
+ Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
+ replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
+
+ // MatchInfo now has negated value so use that instead of old Dst.
+ replaceRegWith(MRI, Dst, NegatedMatchInfo);
+
+ // Recreate non negated value for other uses of old MatchInfoDst
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
+ }
+
+ MI.eraseFromParent();
+ return;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
new file mode 100644
index 000000000000..1d4747136bf7
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -0,0 +1,26 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.h -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This contains common combine transformations that may be used in a combine
+/// pass.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+
+using namespace llvm;
+
+class AMDGPUCombinerHelper : public CombinerHelper {
+public:
+ using CombinerHelper::CombinerHelper;
+
+ bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+ void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
new file mode 100644
index 000000000000..04bf623bfa46
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -0,0 +1,95 @@
+//===-- AMDGPUCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This pass creates a unified init and fini kernel with the required metadata
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
+
+namespace {
+class AMDGPUCtorDtorLowering final : public ModulePass {
+ bool runOnModule(Module &M) override;
+
+public:
+ Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
+ StringRef InitOrFiniKernelName = "amdgcn.device.init";
+ if (!IsCtor)
+ InitOrFiniKernelName = "amdgcn.device.fini";
+
+ Function *InitOrFiniKernel = Function::createWithDefaultAttr(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::ExternalLinkage, 0, InitOrFiniKernelName, &M);
+ BasicBlock *InitOrFiniKernelBB =
+ BasicBlock::Create(M.getContext(), "", InitOrFiniKernel);
+ ReturnInst::Create(M.getContext(), InitOrFiniKernelBB);
+
+ InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
+ if (IsCtor)
+ InitOrFiniKernel->addFnAttr("device-init");
+ else
+ InitOrFiniKernel->addFnAttr("device-fini");
+ return InitOrFiniKernel;
+ }
+
+ bool createInitOrFiniKernel(Module &M, GlobalVariable *GV, bool IsCtor) {
+ if (!GV)
+ return false;
+ ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!GA || GA->getNumOperands() == 0)
+ return false;
+ Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
+ IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator());
+ for (Value *V : GA->operands()) {
+ auto *CS = cast<ConstantStruct>(V);
+ if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
+ FunctionCallee Ctor =
+ M.getOrInsertFunction(F->getName(), IRB.getVoidTy());
+ IRB.CreateCall(Ctor);
+ }
+ }
+ appendToUsed(M, {InitOrFiniKernel});
+ return true;
+ }
+
+ static char ID;
+ AMDGPUCtorDtorLowering() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char AMDGPUCtorDtorLowering::ID = 0;
+char &llvm::AMDGPUCtorDtorLoweringID = AMDGPUCtorDtorLowering::ID;
+INITIALIZE_PASS(AMDGPUCtorDtorLowering, DEBUG_TYPE,
+ "Lower ctors and dtors for AMDGPU", false, false)
+
+ModulePass *llvm::createAMDGPUCtorDtorLoweringPass() {
+ return new AMDGPUCtorDtorLowering();
+}
+
+bool AMDGPUCtorDtorLowering::runOnModule(Module &M) {
+ bool Modified = false;
+ Modified |=
+ createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_ctors"),
+ /*IsCtor =*/true);
+ Modified |=
+ createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_dtors"),
+ /*IsCtor =*/false);
+ return Modified;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index db00f8f711a3..3533087bbfd1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -25,7 +25,6 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"The size of local memory in bytes"
>;
-def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 521c8f261a00..12cef2774aaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -159,6 +159,7 @@ def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32_impl>;
+def : GINodeEquiv<G_AMDGPU_FFBL_B32, AMDGPUffbl_b32_impl>;
def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
def : GINodeEquiv<G_AMDGPU_RCP_IFLAG, AMDGPUrcp_iflag>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 8eeda7b67b73..b9c59f4c615a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -113,7 +113,7 @@ MetadataStreamerV2::getAddressSpaceQualifier(
ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
- if (TypeQual.find("pipe") != StringRef::npos)
+ if (TypeQual.contains("pipe"))
return ValueKind::Pipe;
return StringSwitch<ValueKind>(BaseTypeName)
@@ -201,10 +201,11 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
Align MaxKernArgAlign;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
MaxKernArgAlign);
+ HSACodeProps.mKernargSegmentAlign =
+ std::max(MaxKernArgAlign, Align(4)).value();
+
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mKernargSegmentAlign =
- std::max(MaxKernArgAlign, Align(4)).value();
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
@@ -533,7 +534,7 @@ MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
- if (TypeQual.find("pipe") != StringRef::npos)
+ if (TypeQual.contains("pipe"))
return "pipe";
return StringSwitch<StringRef>(BaseTypeName)
@@ -665,6 +666,10 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
Func.getFnAttribute("runtime-handle").getValueAsString().str(),
/*Copy=*/true);
}
+ if (Func.hasFnAttribute("device-init"))
+ Kern[".kind"] = Kern.getDocument()->getNode("init");
+ else if (Func.hasFnAttribute("device-fini"))
+ Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
@@ -794,7 +799,8 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
if (!HiddenArgNumBytes)
return;
- auto &DL = Func.getParent()->getDataLayout();
+ const Module *M = Func.getParent();
+ auto &DL = M->getDataLayout();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
if (HiddenArgNumBytes >= 8)
@@ -810,16 +816,16 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
auto Int8PtrTy =
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
- // Emit "printf buffer" argument if printf is used, otherwise emit dummy
- // "none" argument.
+ // Emit "printf buffer" argument if printf is used, emit "hostcall buffer"
+ // if "hostcall" module flag is set, otherwise emit dummy "none" argument.
if (HiddenArgNumBytes >= 32) {
- if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ if (M->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
Args);
- else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+ else if (M->getModuleFlag("amdgpu_hostcall")) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
- assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+ assert(!M->getNamedMetadata("llvm.printf.fmts"));
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
Args);
} else
@@ -862,6 +868,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
+
+ // FIXME: The metadata treats the minimum as 16?
Kern[".kernarg_segment_align"] =
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
Kern[".wavefront_size"] =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 4824b4cf37c7..af5dae1cd8c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -15,7 +15,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
-#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/Alignment.h"
@@ -33,6 +32,11 @@ struct SIProgramInfo;
class Type;
namespace AMDGPU {
+
+namespace IsaInfo {
+class AMDGPUTargetID;
+}
+
namespace HSAMD {
class MetadataStreamer {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index a3106ded1e38..cee56ee97294 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,8 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -32,287 +35,12 @@
using namespace llvm;
-namespace llvm {
-
-class R600InstrInfo;
-
-} // end namespace llvm
-
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
-static bool isNullConstantOrUndef(SDValue V) {
- if (V.isUndef())
- return true;
-
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool getConstantValue(SDValue N, uint32_t &Out) {
- // This is only used for packed vectors, where ussing 0 for undef should
- // always be good.
- if (N.isUndef()) {
- Out = 0;
- return true;
- }
-
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- Out = C->getAPIntValue().getSExtValue();
- return true;
- }
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
- Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
- return true;
- }
-
- return false;
-}
-
-// TODO: Handle undef as zero
-static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
- bool Negate = false) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- SDLoc SL(N);
- uint32_t K = Negate ?
- (-LHSVal & 0xffff) | (-RHSVal << 16) :
- (LHSVal & 0xffff) | (RHSVal << 16);
- return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
- DAG.getTargetConstant(K, SL, MVT::i32));
- }
-
- return nullptr;
-}
-
-static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
- return packConstantV2I16(N, DAG, true);
-}
-
-/// AMDGPU specific code to select AMDGPU machine instructions for
-/// SelectionDAG operations.
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const GCNSubtarget *Subtarget;
-
- // Default FP mode for the current function.
- AMDGPU::SIModeRegisterDefaults Mode;
-
- bool EnableLateStructurizeCFG;
-
- // Instructions that will be lowered with a final instruction that zeros the
- // high result bits.
- bool fp16SrcZerosHighBits(unsigned Opc) const;
-
-public:
- explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
- CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
- : SelectionDAGISel(*TM, OptLevel) {
- EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
- }
- ~AMDGPUDAGToDAGISel() override = default;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<LegacyDivergenceAnalysis>();
-#ifdef EXPENSIVE_CHECKS
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
-#endif
- SelectionDAGISel::getAnalysisUsage(AU);
- }
-
- bool matchLoadD16FromBuildVector(SDNode *N) const;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void PreprocessISelDAG() override;
- void Select(SDNode *N) override;
- StringRef getPassName() const override;
- void PostprocessISelDAG() override;
-
-protected:
- void SelectBuildVector(SDNode *N, unsigned RegClassID);
-
-private:
- std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
- bool isNoNanSrc(SDValue N) const;
- bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
- bool isNegInlineImmediate(const SDNode *N) const {
- return isInlineImmediate(N, true);
- }
-
- bool isInlineImmediate16(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate32(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate64(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate(const APFloat &Imm) const {
- return Subtarget->getInstrInfo()->isInlineConstant(Imm);
- }
-
- bool isVGPRImm(const SDNode *N) const;
- bool isUniformLoad(const SDNode *N) const;
- bool isUniformBr(const SDNode *N) const;
-
- bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
- SDValue &RHS) const;
-
- MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
-
- SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
- SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
- SDNode *glueCopyToM0LDSInit(SDNode *N) const;
-
- const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
- virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
- virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
- bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
- unsigned Size) const;
- bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
- bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1, unsigned Size) const;
- bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
- bool SelectMUBUFScratchOffen(SDNode *Parent,
- SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFScratchOffset(SDNode *Parent,
- SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, uint64_t FlatVariant) const;
- bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &VOffset, SDValue &Offset) const;
- bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &Offset) const;
-
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
- bool &Imm) const;
- SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
- bool &Imm) const;
- bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
- bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
- bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
-
- bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
- bool AllowAbs = true) const;
- bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
- bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3OMods(SDValue In, SDValue &Src,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
- bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- SDValue getHi16Elt(SDValue In) const;
-
- SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
-
- void SelectADD_SUB_I64(SDNode *N);
- void SelectAddcSubb(SDNode *N);
- void SelectUADDO_USUBO(SDNode *N);
- void SelectDIV_SCALE(SDNode *N);
- void SelectMAD_64_32(SDNode *N);
- void SelectFMA_W_CHAIN(SDNode *N);
- void SelectFMUL_W_CHAIN(SDNode *N);
-
- SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
- uint32_t Offset, uint32_t Width);
- void SelectS_BFEFromShifts(SDNode *N);
- void SelectS_BFE(SDNode *N);
- bool isCBranchSCC(const SDNode *N) const;
- void SelectBRCOND(SDNode *N);
- void SelectFMAD_FMA(SDNode *N);
- void SelectATOMIC_CMP_SWAP(SDNode *N);
- void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
- void SelectDS_GWS(SDNode *N, unsigned IntrID);
- void SelectInterpP1F16(SDNode *N);
- void SelectINTRINSIC_W_CHAIN(SDNode *N);
- void SelectINTRINSIC_WO_CHAIN(SDNode *N);
- void SelectINTRINSIC_VOID(SDNode *N);
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-
-class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
- const R600Subtarget *Subtarget;
-
- bool isConstantLoad(const MemSDNode *N, int cbID) const;
- bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
- SDValue& Offset);
-public:
- explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {}
-
- void Select(SDNode *N) override;
-
- bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void PreprocessISelDAG() override {}
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "R600GenDAGISel.inc"
-};
-
static SDValue stripBitcast(SDValue Val) {
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
}
@@ -351,7 +79,7 @@ static bool isExtractHiElt(SDValue In, SDValue &Out) {
static SDValue stripExtractLoElt(SDValue In) {
if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
- if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
+ if (Idx->isZero() && In.getValueSizeInBits() <= 32)
return In.getOperand(0);
}
}
@@ -386,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
-/// This pass converts a legalized DAG into a R600-specific
-// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
- CodeGenOpt::Level OptLevel) {
- return new R600DAGToDAGISel(TM, OptLevel);
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
+ TargetMachine *TM /*= nullptr*/,
+ CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
+ : SelectionDAGISel(*TM, OptLevel) {
+ EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -468,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
}
}
+void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+#endif
+ SelectionDAGISel::getAnalysisUsage(AU);
+}
+
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
assert(Subtarget->d16PreservesUnusedBits());
MVT VT = N->getValueType(0).getSimpleVT();
@@ -903,8 +641,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
+ ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
+ WidthVal));
return;
}
case AMDGPUISD::DIV_SCALE: {
@@ -1207,7 +945,14 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
Ops[8] = N->getOperand(0);
Ops[9] = N->getOperand(4);
- CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
+ // If there are no source modifiers, prefer fmac over fma because it can use
+ // the smaller VOP2 encoding.
+ bool UseFMAC = Subtarget->hasDLInsts() &&
+ cast<ConstantSDNode>(Ops[0])->isZero() &&
+ cast<ConstantSDNode>(Ops[2])->isZero() &&
+ cast<ConstantSDNode>(Ops[4])->isZero();
+ unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
}
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
@@ -1707,7 +1452,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
- APInt::getAllOnesValue(32).getZExtValue(); // Size
+ APInt::getAllOnes(32).getZExtValue(); // Size
SDLoc DL(Addr);
const SITargetLowering& Lowering =
@@ -2202,9 +1947,17 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
return true;
}
-SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
+SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
SDValue Val, uint32_t Offset,
uint32_t Width) {
+ if (Val->isDivergent()) {
+ unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
+ SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
+ SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
+ }
+ unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
// Transformation function, pack the offset and width of a BFE into
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
// source, bits [5:0] contain the offset and bits [22:16] the width.
@@ -2229,10 +1982,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
if (0 < BVal && BVal <= CVal && CVal < 32) {
bool Signed = N->getOpcode() == ISD::SRA;
- unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
-
- ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
- 32 - CVal));
+ ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
+ 32 - CVal));
return;
}
}
@@ -2255,9 +2006,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
-
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
- Srl.getOperand(0), ShiftVal, WidthVal));
+ ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
+ WidthVal));
return;
}
}
@@ -2277,9 +2027,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
-
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
- And.getOperand(0), ShiftVal, WidthVal));
+ ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
+ WidthVal));
return;
}
}
@@ -2306,7 +2055,7 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
break;
unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
+ ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
Amt->getZExtValue(), Width));
return;
}
@@ -3111,128 +2860,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
} while (IsModified);
}
-
-bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &MF.getSubtarget<R600Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
-}
-
-bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
- if (!N->readMem())
- return false;
- if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
-
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
- true);
- return true;
- }
- return false;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue& BaseReg, SDValue &Offset) {
- if (!isa<ConstantSDNode>(Addr)) {
- BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
- return true;
- }
- return false;
-}
-
-void R600DAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return; // Already selected.
- }
-
- switch (Opc) {
- default: break;
- case AMDGPUISD::BUILD_VERTICAL_VECTOR:
- case ISD::SCALAR_TO_VECTOR:
- case ISD::BUILD_VECTOR: {
- EVT VT = N->getValueType(0);
- unsigned NumVectorElts = VT.getVectorNumElements();
- unsigned RegClassID;
- // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- switch(NumVectorElts) {
- case 2: RegClassID = R600::R600_Reg64RegClassID; break;
- case 4:
- if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = R600::R600_Reg128VerticalRegClassID;
- else
- RegClassID = R600::R600_Reg128RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- SelectBuildVector(N, RegClassID);
- return;
- }
- }
-
- SelectCode(N);
-}
-
-bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *C;
- SDLoc DL(Addr);
-
- if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- }
-
- return true;
-}
-
-bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(CurDAG->getEntryNode()),
- R600::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- return true;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
new file mode 100644
index 000000000000..c1d9673f067e
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -0,0 +1,256 @@
+//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+static inline bool isNullConstantOrUndef(SDValue V) {
+ if (V.isUndef())
+ return true;
+
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isZero();
+}
+
+static inline bool getConstantValue(SDValue N, uint32_t &Out) {
+ // This is only used for packed vectors, where using 0 for undef should
+ // always be good.
+ if (N.isUndef()) {
+ Out = 0;
+ return true;
+ }
+
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getSExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: Handle undef as zero
+static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+ bool Negate = false) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ SDLoc SL(N);
+ uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
+ : (LHSVal & 0xffff) | (RHSVal << 16);
+ return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+ DAG.getTargetConstant(K, SL, MVT::i32));
+ }
+
+ return nullptr;
+}
+
+static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+ return packConstantV2I16(N, DAG, true);
+}
+} // namespace
+
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const GCNSubtarget *Subtarget;
+
+ // Default FP mode for the current function.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
+ bool EnableLateStructurizeCFG;
+
+ // Instructions that will be lowered with a final instruction that zeros the
+ // high result bits.
+ bool fp16SrcZerosHighBits(unsigned Opc) const;
+
+public:
+ explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
+ ~AMDGPUDAGToDAGISel() override = default;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool matchLoadD16FromBuildVector(SDNode *N) const;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void PreprocessISelDAG() override;
+ void Select(SDNode *N) override;
+ StringRef getPassName() const override;
+ void PostprocessISelDAG() override;
+
+protected:
+ void SelectBuildVector(SDNode *N, unsigned RegClassID);
+
+private:
+ std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
+ bool isNoNanSrc(SDValue N) const;
+ bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+ bool isNegInlineImmediate(const SDNode *N) const {
+ return isInlineImmediate(N, true);
+ }
+
+ bool isInlineImmediate16(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate32(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate64(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate(const APFloat &Imm) const {
+ return Subtarget->getInstrInfo()->isInlineConstant(Imm);
+ }
+
+ bool isVGPRImm(const SDNode *N) const;
+ bool isUniformLoad(const SDNode *N) const;
+ bool isUniformBr(const SDNode *N) const;
+
+ bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const;
+
+ MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
+ SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
+ SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+
+ const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
+ virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
+ bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
+ unsigned Size) const;
+ bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+ bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1, unsigned Size) const;
+ bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset) const;
+ bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset) const;
+
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+
+ bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, uint64_t FlatVariant) const;
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &VOffset, SDValue &Offset) const;
+ bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &Offset) const;
+
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+ bool &Imm) const;
+ SDValue Expand32BitAddress(SDValue Addr) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+ bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+ bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool AllowAbs = true) const;
+ bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
+ bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
+ SDValue &Omod) const;
+
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
+ unsigned &Mods) const;
+ bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ SDValue getHi16Elt(SDValue In) const;
+
+ SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
+
+ void SelectADD_SUB_I64(SDNode *N);
+ void SelectAddcSubb(SDNode *N);
+ void SelectUADDO_USUBO(SDNode *N);
+ void SelectDIV_SCALE(SDNode *N);
+ void SelectMAD_64_32(SDNode *N);
+ void SelectFMA_W_CHAIN(SDNode *N);
+ void SelectFMUL_W_CHAIN(SDNode *N);
+ SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
+ uint32_t Width);
+ void SelectS_BFEFromShifts(SDNode *N);
+ void SelectS_BFE(SDNode *N);
+ bool isCBranchSCC(const SDNode *N) const;
+ void SelectBRCOND(SDNode *N);
+ void SelectFMAD_FMA(SDNode *N);
+ void SelectATOMIC_CMP_SWAP(SDNode *N);
+ void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+ void SelectDS_GWS(SDNode *N, unsigned IntrID);
+ void SelectInterpP1F16(SDNode *N);
+ void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_WO_CHAIN(SDNode *N);
+ void SelectINTRINSIC_VOID(SDNode *N);
+
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d68488ccb342..523fa2d3724b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -45,17 +45,13 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
}
unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- KnownBits Known = DAG.computeKnownBits(Op);
- return VT.getSizeInBits() - Known.countMinLeadingZeros();
+ return DAG.computeKnownBits(Op).countMaxActiveBits();
}
unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
-
// In order for this to be a signed 24-bit value, bit 23, must
// be a sign bit.
- return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op);
+ return DAG.ComputeMinSignedBits(Op);
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
@@ -1042,7 +1038,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
/// In order to correctly lower the arguments we need to know the size of each
/// argument. Since Ins[x].VT gives us the size of the register that will
/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
-/// for the orignal function argument so that we can deduce the correct memory
+/// for the original function argument so that we can deduce the correct memory
/// type to use for Ins[x]. In most cases the correct memory type will be
/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
/// we have a kernel argument of type v8i8, this argument will be split into
@@ -1210,10 +1206,8 @@ SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
- for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
- UE = DAG.getEntryNode().getNode()->use_end();
- U != UE; ++U) {
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) {
+ for (SDNode *U : DAG.getEntryNode().getNode()->uses()) {
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
@@ -1334,14 +1328,6 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-bool AMDGPUTargetLowering::hasDefinedInitializer(const GlobalValue *GV) {
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar || !GVar->hasInitializer())
- return false;
-
- return !isa<UndefValue>(GVar->getInitializer());
-}
-
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
@@ -1378,16 +1364,11 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
"Do not know what to do with an non-zero offset");
// TODO: We could emit code to handle the initialization somewhere.
- if (!hasDefinedInitializer(GV)) {
- unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
- return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
- }
+ // We ignore the initializer for now and legalize it to allow selection.
+ // The initializer will anyway get errored out during assembly emission.
+ unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
+ return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
-
- const Function &Fn = DAG.getMachineFunction().getFunction();
- DiagnosticInfoUnsupported BadInit(
- Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc());
- DAG.getContext()->diagnose(BadInit);
return SDValue();
}
@@ -1856,6 +1837,9 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
}
if (isTypeLegal(MVT::i64)) {
+ // The algorithm here is based on ideas from "Software Integer Division",
+ // Tom Rodeheffer, August 2008.
+
MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -1890,37 +1874,35 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);
+ // First round of UNR (Unsigned integer Newton-Raphson).
SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
Zero);
- SDValue Mulhi1_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
- One);
-
+ SDValue Mulhi1_Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, One);
SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo,
Mulhi1_Lo, Zero1);
SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi,
Mulhi1_Hi, Add1_Lo.getValue(1));
- SDValue Add1_HiNc = DAG.getNode(ISD::ADD, DL, HalfVT, Rcp_Hi, Mulhi1_Hi);
SDValue Add1 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
+ // Second round of UNR.
SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
Zero);
- SDValue Mulhi2_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
- One);
-
+ SDValue Mulhi2_Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, One);
SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo,
Mulhi2_Lo, Zero1);
- SDValue Add2_HiC = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_HiNc,
- Mulhi2_Hi, Add1_Lo.getValue(1));
- SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add2_HiC,
- Zero, Add2_Lo.getValue(1));
+ SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Hi,
+ Mulhi2_Hi, Add2_Lo.getValue(1));
SDValue Add2 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
+
SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);
SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
@@ -2211,13 +2193,10 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f64);
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
// Extract the upper half, since this is where we will find the sign and
// exponent.
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+ SDValue Hi = getHiHalf64(Src, DAG);
SDValue Exp = extractF64Exponent(Hi, SL, DAG);
@@ -2380,72 +2359,50 @@ static bool isCttzOpc(unsigned Opc) {
SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- bool ZeroUndef = Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
- Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
-
- unsigned ISDOpc, NewOpc;
- if (isCtlzOpc(Op.getOpcode())) {
- ISDOpc = ISD::CTLZ_ZERO_UNDEF;
- NewOpc = AMDGPUISD::FFBH_U32;
- } else if (isCttzOpc(Op.getOpcode())) {
- ISDOpc = ISD::CTTZ_ZERO_UNDEF;
- NewOpc = AMDGPUISD::FFBL_B32;
- } else
- llvm_unreachable("Unexpected OPCode!!!");
-
-
- if (ZeroUndef && Src.getValueType() == MVT::i32)
- return DAG.getNode(NewOpc, SL, MVT::i32, Src);
- SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
-
- const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
-
- EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), MVT::i32);
-
- SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo;
- SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ);
-
- SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo);
- SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi);
-
- const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
- SDValue Add, NewOpr;
- if (isCtlzOpc(Op.getOpcode())) {
- Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprLo, Bits32);
- // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
- NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprHi);
- } else {
- Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprHi, Bits32);
- // cttz(x) = lo_32(x) == 0 ? cttz(hi_32(x)) + 32 : cttz(lo_32(x))
- NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprLo);
+ assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode()));
+ bool Ctlz = isCtlzOpc(Op.getOpcode());
+ unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;
+
+ bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
+ Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+
+ if (Src.getValueType() == MVT::i32) {
+ // (ctlz hi:lo) -> (umin (ffbh src), 32)
+ // (cttz hi:lo) -> (umin (ffbl src), 32)
+ // (ctlz_zero_undef src) -> (ffbh src)
+ // (cttz_zero_undef src) -> (ffbl src)
+ SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
+ if (!ZeroUndef) {
+ const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
+ }
+ return NewOpr;
}
- if (!ZeroUndef) {
- // Test if the full 64-bit input is zero.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(Src, DAG);
- // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
- // which we probably don't want.
- SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi;
- SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ);
- SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0);
+ SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo);
+ SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi);
- // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
- // with the same cycles, otherwise it is slower.
- // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
- // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
+ // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64)
+ // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64)
+ // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
+ // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
- const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
+ unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT;
+ const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
+ if (Ctlz)
+ OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
+ else
+ OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);
- // The instruction returns -1 for 0 input, but the defined intrinsic
- // behavior is to return the number of bits.
- NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32,
- SrcIsZero, Bits32, NewOpr);
+ SDValue NewOpr;
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);
+ if (!ZeroUndef) {
+ const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32);
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);
}
return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
@@ -2453,87 +2410,128 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
- // Unsigned
- // cul2f(ulong u)
- //{
- // uint lz = clz(u);
- // uint e = (u != 0) ? 127U + 63U - lz : 0;
- // u = (u << lz) & 0x7fffffffffffffffUL;
- // ulong t = u & 0xffffffffffUL;
- // uint v = (e << 23) | (uint)(u >> 40);
- // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
- // return as_float(v + r);
- //}
- // Signed
- // cl2f(long l)
- //{
- // long s = l >> 63;
- // float r = cul2f((l + s) ^ s);
- // return s ? -r : r;
- //}
+ // The regular method converting a 64-bit integer to float roughly consists of
+ // 2 steps: normalization and rounding. In fact, after normalization, the
+ // conversion from a 64-bit integer to a float is essentially the same as the
+ // one from a 32-bit integer. The only difference is that it has more
+ // trailing bits to be rounded. To leverage the native 32-bit conversion, a
+ // 64-bit integer could be preprocessed and fit into a 32-bit integer then
+ // converted into the correct float number. The basic steps for the unsigned
+ // conversion are illustrated in the following pseudo code:
+ //
+ // f32 uitofp(i64 u) {
+ // i32 hi, lo = split(u);
+ // // Only count the leading zeros in hi as we have native support of the
+ // // conversion from i32 to f32. If hi is all 0s, the conversion is
+ // // reduced to a 32-bit one automatically.
+ // i32 shamt = clz(hi); // Return 32 if hi is all 0s.
+ // u <<= shamt;
+ // hi, lo = split(u);
+ // hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo.
+ // // convert it as a 32-bit integer and scale the result back.
+ // return uitofp(hi) * 2^(32 - shamt);
+ // }
+ //
+ // The signed one follows the same principle but uses 'ffbh_i32' to count its
+ // sign bits instead. If 'ffbh_i32' is not available, its absolute value is
+ // converted instead followed by negation based its sign bit.
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- SDValue L = Src;
- SDValue S;
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(Src, DAG);
+ SDValue Sign;
+ SDValue ShAmt;
+ if (Signed && Subtarget->isGCN()) {
+ // We also need to consider the sign bit in Lo if Hi has just sign bits,
+ // i.e. Hi is 0 or -1. However, that only needs to take the MSB into
+ // account. That is, the maximal shift is
+ // - 32 if Lo and Hi have opposite signs;
+ // - 33 if Lo and Hi have the same sign.
+ //
+ // Or, MaxShAmt = 33 + OppositeSign, where
+ //
+ // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
+ // - -1 if Lo and Hi have opposite signs; and
+ // - 0 otherwise.
+ //
+ // All in all, ShAmt is calculated as
+ //
+ // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
+ //
+ // or
+ //
+ // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
+ //
+ // to reduce the critical path.
+ SDValue OppositeSign = DAG.getNode(
+ ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
+ DAG.getConstant(31, SL, MVT::i32));
+ SDValue MaxShAmt =
+ DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
+ OppositeSign);
+ // Count the leading sign bits.
+ ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
+ // Different from unsigned conversion, the shift should be one bit less to
+ // preserve the sign bit.
+ ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
+ DAG.getConstant(1, SL, MVT::i32));
+ ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
+ } else {
+ if (Signed) {
+ // Without 'ffbh_i32', only leading zeros could be counted. Take the
+ // absolute value first.
+ Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src,
+ DAG.getConstant(63, SL, MVT::i64));
+ SDValue Abs =
+ DAG.getNode(ISD::XOR, SL, MVT::i64,
+ DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign);
+ std::tie(Lo, Hi) = split64BitValue(Abs, DAG);
+ }
+ // Count the leading zeros.
+ ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi);
+ // The shift amount for signed integers is [0, 32].
+ }
+ // Normalize the given 64-bit integer.
+ SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt);
+ // Split it again.
+ std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
+ // Calculate the adjust bit for rounding.
+ // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
+ SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
+ DAG.getConstant(1, SL, MVT::i32), Lo);
+ // Get the 32-bit normalized integer.
+ Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
+ // Convert the normalized 32-bit integer into f32.
+ unsigned Opc =
+ (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
+ SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm);
+
+ // Finally, need to scale back the converted floating number as the original
+ // 64-bit integer is converted as a 32-bit one.
+ ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
+ ShAmt);
+ // On GCN, use LDEXP directly.
+ if (Subtarget->isGCN())
+ return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt);
+
+ // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
+ // part directly to emulate the multiplication of 2^ShAmt. That 8-bit
+ // exponent is enough to avoid overflowing into the sign bit.
+ SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt,
+ DAG.getConstant(23, SL, MVT::i32));
+ SDValue IVal =
+ DAG.getNode(ISD::ADD, SL, MVT::i32,
+ DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);
if (Signed) {
- const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
- S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
-
- SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
- L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
+ // Set the sign bit.
+ Sign = DAG.getNode(ISD::SHL, SL, MVT::i32,
+ DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign),
+ DAG.getConstant(31, SL, MVT::i32));
+ IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);
}
-
- EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), MVT::f32);
-
-
- SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
- SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
- SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
- LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
-
- SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
- SDValue E = DAG.getSelect(SL, MVT::i32,
- DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
- DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
- ZeroI32);
-
- SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
- DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
- DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
-
- SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
- DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
-
- SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
- U, DAG.getConstant(40, SL, MVT::i64));
-
- SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
- DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
- DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl));
-
- SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
- SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
- SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
-
- SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
-
- SDValue R = DAG.getSelect(SL, MVT::i32,
- RCmp,
- One,
- DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
- R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
- R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
-
- if (!Signed)
- return R;
-
- SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
- return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
+ return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);
}
SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
@@ -2541,12 +2539,8 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(0, SL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(1, SL, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(Src, DAG);
SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
SL, MVT::f64, Hi);
@@ -2878,7 +2872,7 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
// as unsigned 24-bit values.
- AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
+ AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24;
}
static SDValue simplifyMul24(SDNode *Node24,
@@ -2892,8 +2886,22 @@ static SDValue simplifyMul24(SDNode *Node24,
unsigned NewOpcode = Node24->getOpcode();
if (IsIntrin) {
unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
- NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ?
- AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+ switch (IID) {
+ case Intrinsic::amdgcn_mul_i24:
+ NewOpcode = AMDGPUISD::MUL_I24;
+ break;
+ case Intrinsic::amdgcn_mul_u24:
+ NewOpcode = AMDGPUISD::MUL_U24;
+ break;
+ case Intrinsic::amdgcn_mulhi_i24:
+ NewOpcode = AMDGPUISD::MULHI_I24;
+ break;
+ case Intrinsic::amdgcn_mulhi_u24:
+ NewOpcode = AMDGPUISD::MULHI_U24;
+ break;
+ default:
+ llvm_unreachable("Expected 24-bit mul intrinsic");
+ }
}
APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
@@ -3102,6 +3110,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
+ case Intrinsic::amdgcn_mulhi_i24:
+ case Intrinsic::amdgcn_mulhi_u24:
return simplifyMul24(N, DCI);
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_rsq:
@@ -3281,11 +3291,9 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
// srl i64:x, C for C >= 32
// =>
// build_pair (srl hi_32(x), C - 32), 0
- SDValue One = DAG.getConstant(1, SL, MVT::i32);
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
+ SDValue Hi = getHiHalf64(LHS, DAG);
SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
@@ -3355,7 +3363,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
- (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
+ (Known.countMaxActiveBits() <= Log2_32(Size))) {
EVT MidVT = VT.isVector() ?
EVT::getVectorVT(*DAG.getContext(), MVT::i32,
VT.getVectorNumElements()) : MVT::i32;
@@ -3522,7 +3530,7 @@ SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
static bool isNegativeOne(SDValue Val) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
- return C->isAllOnesValue();
+ return C->isAllOnes();
return false;
}
@@ -3557,7 +3565,7 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
SDValue LHS, SDValue RHS,
DAGCombinerInfo &DCI) const {
ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (!CmpRhs || !CmpRhs->isNullValue())
+ if (!CmpRhs || !CmpRhs->isZero())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -4341,6 +4349,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(RET_GFX_FLAG)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
NODE_NAME_CASE(DWORDADDR)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index e61021d451f8..03632ac18598 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -35,9 +35,15 @@ private:
SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
public:
+ /// \returns The minimum number of bits needed to store the value of \Op as an
+ /// unsigned integer. Truncating to this size and then zero-extending to the
+ /// original size will not change the value.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
+
+ /// \returns The minimum number of bits needed to store the value of \Op as a
+ /// signed integer. Truncating to this size and then sign-extending to the
+ /// original size will not change the value.
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
- static bool hasDefinedInitializer(const GlobalValue *GV);
protected:
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -337,7 +343,7 @@ namespace AMDGPUISD {
enum NodeType : unsigned {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- UMUL, // 32bit unsigned multiplication
+ UMUL, // 32bit unsigned multiplication
BRANCH_COND,
// End AMDIL ISD Opcodes
@@ -360,6 +366,9 @@ enum NodeType : unsigned {
// Return with values from a non-entry function.
RET_FLAG,
+ // Return with values from a non-entry function (AMDGPU_Gfx CC).
+ RET_GFX_FLAG,
+
DWORDADDR,
FRACT,
@@ -416,10 +425,10 @@ enum NodeType : unsigned {
DOT4,
CARRY,
BORROW,
- BFE_U32, // Extract range of bits with zero extension to 32-bits.
- BFE_I32, // Extract range of bits with sign extension to 32-bits.
- BFI, // (src0 & src1) | (~src0 & src2)
- BFM, // Insert a range of bits into a 32-bit word.
+ BFE_U32, // Extract range of bits with zero extension to 32-bits.
+ BFE_I32, // Extract range of bits with sign extension to 32-bits.
+ BFI, // (src0 & src1) | (~src0 & src2)
+ BFM, // Insert a range of bits into a 32-bit word.
FFBH_U32, // ctlz with -1 if input is zero.
FFBH_I32,
FFBL_B32, // cttz with -1 if input is zero.
@@ -528,7 +537,6 @@ enum NodeType : unsigned {
LAST_AMDGPU_ISD_NUMBER
};
-
} // End namespace AMDGPUISD
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 06aa0055e4bb..88b4ec53a2a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -17,7 +17,6 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNSubtarget.h"
-#include "R600Subtarget.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -149,7 +148,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
Function *I =
Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
- SmallVector<Value *, 8> Args(II.arg_operands());
+ SmallVector<Value *, 8> Args(II.args());
unsigned EndIndex =
OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
@@ -440,7 +439,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (!CWidth || !COffset)
break;
- // The case of Width == 0 is handled above, which makes this tranformation
+ // The case of Width == 0 is handled above, which makes this transformation
// safe. If Width == 0, then the ashr and lshr instructions become poison
// value since the shift amount would be equal to the bit size.
assert(Width != 0);
@@ -586,8 +585,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
MDNode *MD = MDNode::get(II.getContext(), MDArgs);
Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
- NewCall->addAttribute(AttributeList::FunctionIndex,
- Attribute::Convergent);
+ NewCall->addFnAttr(Attribute::Convergent);
NewCall->takeName(&II);
return IC.replaceInstUsesWith(II, NewCall);
}
@@ -712,8 +710,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
MDNode *MD = MDNode::get(II.getContext(), MDArgs);
Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
- NewCall->addAttribute(AttributeList::FunctionIndex,
- Attribute::Convergent);
+ NewCall->addFnAttr(Attribute::Convergent);
NewCall->takeName(&II);
return IC.replaceInstUsesWith(II, NewCall);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 8e7a6a7029c6..b1263618c5db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -76,8 +76,8 @@ struct ImageDimIntrinsicInfo {
};
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
-const ImageDimIntrinsicInfo *getImageDimInstrinsicByBaseOpcode(unsigned BaseOpcode,
- unsigned Dim);
+const ImageDimIntrinsicInfo *
+getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim);
} // end AMDGPU namespace
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 0f9cb712f820..391dc8428539 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -103,9 +103,6 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
-// Force dependencies for vector trunc stores
-def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
-
def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
// out = a - floor(a)
@@ -282,11 +279,18 @@ def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
+// mulhi24 yields the high-order 16 bits of the 48-bit result. Here's an example
+// that shows mulhi24 is not associative:
+//
+// Given a = 0x10002, b = c = 0xffffff:
+// mulhi24(mulhi24(a, b), c) = mulhi24(0x100, 0xffffff) = 0
+// Which is not equal to:
+// mulhi24(a, mulhi24(b, c)) = mulhi24(0x10002, 0xffff) = 1
+def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
+ [SDNPCommutative]
>;
-def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
+def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
+ [SDNPCommutative]
>;
def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
@@ -329,11 +333,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
]>;
-def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
-
-def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
- [SDNPHasChain, SDNPSideEffect]>;
-
//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
@@ -360,6 +359,10 @@ def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPt
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
+def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
+
//===----------------------------------------------------------------------===//
// Intrinsic/Custom node compatibility PatFrags
@@ -443,6 +446,14 @@ def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_mul_i24 node:$src0, node:$src1),
(AMDGPUmul_i24_impl node:$src0, node:$src1)]>;
+def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_mulhi_u24 node:$src0, node:$src1),
+ (AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>;
+
+def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_mulhi_i24 node:$src0, node:$src1),
+ (AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>;
+
def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2),
(AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 323aaaf70cd4..28cb2fc57ac7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#define DEBUG_TYPE "amdgpu-isel"
@@ -140,7 +141,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
= TRI.getConstrainedRegClassForOperand(Src, *MRI);
Optional<ValueAndVReg> ConstVal =
- getConstantVRegValWithLookThrough(SrcReg, *MRI, true, true);
+ getIConstantVRegValWithLookThrough(SrcReg, *MRI, true);
if (ConstVal) {
unsigned MovOpc =
STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
@@ -608,11 +609,10 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *BB = MI.getParent();
- auto ConstSrc1 =
- getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true);
+ auto ConstSrc1 = getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true);
if (ConstSrc1) {
auto ConstSrc0 =
- getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true);
+ getAnyConstantVRegValWithLookThrough(Src0, *MRI, true, true);
if (ConstSrc0) {
const int64_t K0 = ConstSrc0->Value.getSExtValue();
const int64_t K1 = ConstSrc1->Value.getSExtValue();
@@ -844,7 +844,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
Optional<ValueAndVReg> ConstSelect =
- getConstantVRegValWithLookThrough(LaneSelect, *MRI, true, true);
+ getIConstantVRegValWithLookThrough(LaneSelect, *MRI);
if (ConstSelect) {
// The selector has to be an inline immediate, so we can use whatever for
// the other operands.
@@ -853,7 +853,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2()));
} else {
Optional<ValueAndVReg> ConstVal =
- getConstantVRegValWithLookThrough(Val, *MRI, true, true);
+ getIConstantVRegValWithLookThrough(Val, *MRI);
// If the value written is an inline immediate, we can get away without a
// copy to m0.
@@ -928,7 +928,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
case Intrinsic::amdgcn_if_break: {
MachineBasicBlock *BB = I.getParent();
- // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
.add(I.getOperand(0))
@@ -1130,7 +1130,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return false;
Optional<ValueAndVReg> Arg =
- getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true);
+ getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
if (Arg.hasValue()) {
const int64_t Value = Arg.getValue().Value.getSExtValue();
@@ -1242,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
- // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
MachineBasicBlock *BB = MI.getParent();
BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
@@ -1826,8 +1826,9 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
.add(I.getOperand(2))
.add(I.getOperand(3));
- bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
- constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
+ bool Ret = false;
+ Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
+ Ret |= constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
@@ -2387,7 +2388,7 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
STI.ldsRequiresM0Init()) {
MachineBasicBlock *BB = I.getParent();
- // If DS instructions require M0 initializtion, insert it before selecting.
+ // If DS instructions require M0 initialization, insert it before selecting.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(-1);
}
@@ -2465,6 +2466,27 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
+ if (Reg.isPhysical())
+ return false;
+
+ MachineInstr &MI = *MRI.getUniqueVRegDef(Reg);
+ const unsigned Opcode = MI.getOpcode();
+
+ if (Opcode == AMDGPU::COPY)
+ return isVCmpResult(MI.getOperand(1).getReg(), MRI);
+
+ if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
+ Opcode == AMDGPU::G_XOR)
+ return isVCmpResult(MI.getOperand(1).getReg(), MRI) &&
+ isVCmpResult(MI.getOperand(2).getReg(), MRI);
+
+ if (Opcode == TargetOpcode::G_INTRINSIC)
+ return MI.getIntrinsicID() == Intrinsic::amdgcn_class;
+
+ return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
+}
+
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &CondOp = I.getOperand(0);
@@ -2488,11 +2510,22 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
ConstrainRC = &AMDGPU::SReg_32RegClass;
} else {
- // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
- // We sort of know that a VCC producer based on the register bank, that ands
- // inactive lanes with 0. What if there was a logical operation with vcc
- // producers in different blocks/with different exec masks?
// FIXME: Should scc->vcc copies and with exec?
+
+ // Unless the value of CondReg is a result of a V_CMP* instruction then we
+ // need to insert an and with exec.
+ if (!isVCmpResult(CondReg, *MRI)) {
+ const bool Is64 = STI.isWave64();
+ const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+ const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+
+ Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
+ BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
+ .addReg(CondReg)
+ .addReg(Exec);
+ CondReg = TmpReg;
+ }
+
CondPhysReg = TRI.getVCC();
BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
ConstrainRC = TRI.getBoolRC();
@@ -3216,6 +3249,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
return selectG_SBFX_UBFX(I);
+ case AMDGPU::G_SI_CALL:
+ I.setDesc(TII.get(AMDGPU::SI_CALL));
+ return true;
default:
return selectImpl(I, *CoverageInfo);
}
@@ -3977,8 +4013,8 @@ AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
return {Root, 0};
MachineOperand &RHS = RootI->getOperand(2);
- Optional<ValueAndVReg> MaybeOffset
- = getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true);
+ Optional<ValueAndVReg> MaybeOffset =
+ getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!MaybeOffset)
return {Root, 0};
return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};
@@ -4306,8 +4342,8 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
/// Get an immediate that must be 32-bits, and treated as zero extended.
static Optional<uint64_t> getConstantZext32Val(Register Reg,
const MachineRegisterInfo &MRI) {
- // getConstantVRegVal sexts any values, so see if that matters.
- Optional<int64_t> OffsetVal = getConstantVRegSExtVal(Reg, MRI);
+ // getIConstantVRegVal sexts any values, so see if that matters.
+ Optional<int64_t> OffsetVal = getIConstantVRegSExtVal(Reg, MRI);
if (!OffsetVal || !isInt<32>(*OffsetVal))
return None;
return Lo_32(*OffsetVal);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index cb05a1cb6369..b70e6883bae2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -14,10 +14,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
@@ -135,7 +132,6 @@ private:
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
- bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
void initM0(MachineInstr &I) const;
bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 119c4089d6c2..bad9f6265b36 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -237,6 +237,36 @@ def select_oneuse : HasOneUseTernaryOp<select>;
def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
+//===----------------------------------------------------------------------===//
+// PatFrags for shifts
+//===----------------------------------------------------------------------===//
+
+// Constrained shift PatFrags.
+foreach width = [16, 32, 64] in {
+defvar mask = !sub(width, 1);
+
+def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(shl node:$src0, node:$src1), (shl node:$src0, (and node:$src1, mask))]>;
+defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
+def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
+def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
+ (cshl $src1, $src0)>;
+
+def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(srl node:$src0, node:$src1), (srl node:$src0, (and node:$src1, mask))]>;
+defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
+def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
+def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
+ (csrl $src1, $src0)>;
+
+def csra_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(sra node:$src0, node:$src1), (sra node:$src0, (and node:$src1, mask))]>;
+defvar csra = !cast<SDPatternOperator>("csra_"#width);
+def csra_#width#_oneuse : HasOneUseBinOp<csra>;
+def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
+ (csra $src1, $src0)>;
+} // end foreach width
+
def srl_16 : PatFrag<
(ops node:$src0), (srl_oneuse node:$src0, (i32 16))
>;
@@ -422,6 +452,16 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
let MemoryVT = i16;
}
+def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+
+def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
+
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i32;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 4971b010870d..9e86bd0c2b97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -95,10 +95,8 @@ bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
bool Changed = false;
for (auto &BB : F)
- for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) {
- Instruction *I = &*BI++;
- Changed |= visit(*I);
- }
+ for (Instruction &I : llvm::make_early_inc_range(BB))
+ Changed |= visit(I);
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c1a9b30a509e..1f898f2ba8b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -59,7 +59,7 @@ static LLT getPow2ScalarType(LLT Ty) {
return LLT::scalar(Pow2Bits);
}
-/// \returs true if this is an odd sized vector which should widen by adding an
+/// \returns true if this is an odd sized vector which should widen by adding an
/// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
/// excludes s1 vectors, which should always be scalarized.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
@@ -532,10 +532,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Full set of gfx9 features.
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
- .clampScalar(0, S16, S32)
+ .minScalar(0, S16)
.clampMaxNumElements(0, S16, 2)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32);
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32)
+ .scalarize(0);
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
.legalFor({S32, S16, V2S16}) // Clamp modifier
@@ -547,9 +548,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
} else if (ST.has16BitInsts()) {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16})
- .clampScalar(0, S16, S32)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32); // FIXME: min should be 16
+ .minScalar(0, S16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32)
+ .scalarize(0);
// Technically the saturating operations require clamp bit support, but this
// was introduced at the same time as 16-bit operations.
@@ -569,6 +571,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
} else {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32})
+ .widenScalarToNextMultipleOf(0, 32)
.clampScalar(0, S32, S32)
.scalarize(0);
@@ -603,7 +606,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &Mulh = getActionDefinitionsBuilder({G_UMULH, G_SMULH})
.legalFor({S32})
- .maxScalarOrElt(0, S32);
+ .maxScalar(0, S32);
if (ST.hasVOP3PInsts()) {
Mulh
@@ -812,10 +815,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Split s1->s64 during regbankselect for VALU.
auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
- .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
- .lowerFor({{S32, S64}})
- .lowerIf(typeIs(1, S1))
- .customFor({{S64, S64}});
+ .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
+ .lowerIf(typeIs(1, S1))
+ .customFor({{S32, S64}, {S64, S64}});
if (ST.has16BitInsts())
IToFP.legalFor({{S16, S16}});
IToFP.clampScalar(1, S32, S64)
@@ -941,7 +943,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(1, S32, S64)
.widenScalarToNextPow2(0, 32)
.widenScalarToNextPow2(1, 32)
- .lower();
+ .custom();
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF})
@@ -1266,7 +1268,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// May need relegalization for the scalars.
return std::make_pair(0, EltTy);
})
- .lowerIfMemSizeNotPow2()
.minScalar(0, S32)
.narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
.widenScalarToNextPow2(0)
@@ -1318,7 +1319,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
- if (ST.hasLDSFPAtomics()) {
+ if (ST.hasLDSFPAtomicAdd()) {
Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
if (ST.hasGFX90AInsts())
Atomic.legalFor({{S64, LocalPtr}});
@@ -1628,6 +1629,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S32, S64)
.lower();
+ getActionDefinitionsBuilder({G_ROTR, G_ROTL})
+ .scalarize(0)
+ .lower();
+
// TODO: Only Try to form v2s16 with legal packed instructions.
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
@@ -1681,6 +1686,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Implement
G_FMINIMUM, G_FMAXIMUM}).lower();
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
+ .lower();
+
getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
@@ -1760,6 +1768,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeFFloor(MI, MRI, B);
case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, B);
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ return legalizeCTLZ_CTTZ(MI, MRI, B);
default:
return false;
}
@@ -2065,23 +2076,53 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
- assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+ assert(MRI.getType(Src) == S64);
auto Unmerge = B.buildUnmerge({S32, S32}, Src);
+ auto ThirtyTwo = B.buildConstant(S32, 32);
- auto CvtHi = Signed ?
- B.buildSITOFP(S64, Unmerge.getReg(1)) :
- B.buildUITOFP(S64, Unmerge.getReg(1));
+ if (MRI.getType(Dst) == S64) {
+ auto CvtHi = Signed ? B.buildSITOFP(S64, Unmerge.getReg(1))
+ : B.buildUITOFP(S64, Unmerge.getReg(1));
- auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
+ auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
+ auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
+ .addUse(CvtHi.getReg(0))
+ .addUse(ThirtyTwo.getReg(0));
- auto ThirtyTwo = B.buildConstant(S32, 32);
- auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
- .addUse(CvtHi.getReg(0))
- .addUse(ThirtyTwo.getReg(0));
+ // TODO: Should this propagate fast-math-flags?
+ B.buildFAdd(Dst, LdExp, CvtLo);
+ MI.eraseFromParent();
+ return true;
+ }
- // TODO: Should this propagate fast-math-flags?
- B.buildFAdd(Dst, LdExp, CvtLo);
+ assert(MRI.getType(Dst) == S32);
+
+ auto One = B.buildConstant(S32, 1);
+
+ MachineInstrBuilder ShAmt;
+ if (Signed) {
+ auto ThirtyOne = B.buildConstant(S32, 31);
+ auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
+ auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
+ auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
+ auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
+ /*HasSideEffects=*/false)
+ .addUse(Unmerge.getReg(1));
+ auto LS2 = B.buildSub(S32, LS, One);
+ ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
+ } else
+ ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1));
+ auto Norm = B.buildShl(S64, Src, ShAmt);
+ auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm);
+ auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0));
+ auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
+ auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
+ auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
+ B.buildIntrinsic(Intrinsic::amdgcn_ldexp, ArrayRef<Register>{Dst},
+ /*HasSideEffects=*/false)
+ .addUse(FVal.getReg(0))
+ .addUse(Scale.getReg(0));
MI.eraseFromParent();
return true;
}
@@ -2183,9 +2224,9 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
- // getConstantVRegValWithLookThrough.
+ // getIConstantVRegValWithLookThrough.
Optional<ValueAndVReg> MaybeIdxVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeIdxVal) // Dynamic case will be selected to register indexing.
return true;
const int64_t IdxVal = MaybeIdxVal->Value.getSExtValue();
@@ -2215,9 +2256,9 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
- // getConstantVRegValWithLookThrough.
+ // getIConstantVRegValWithLookThrough.
Optional<ValueAndVReg> MaybeIdxVal =
- getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
if (!MaybeIdxVal) // Dynamic case will be selected to register indexing.
return true;
@@ -2379,43 +2420,36 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
}
// TODO: We could emit code to handle the initialization somewhere.
- if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
- const SITargetLowering *TLI = ST.getTargetLowering();
- if (!TLI->shouldUseLDSConstAddress(GV)) {
- MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
- return true; // Leave in place;
- }
+ // We ignore the initializer for now and legalize it to allow selection.
+ // The initializer will anyway get errored out during assembly emission.
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ if (!TLI->shouldUseLDSConstAddress(GV)) {
+ MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
+ return true; // Leave in place;
+ }
- if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
- Type *Ty = GV->getValueType();
- // HIP uses an unsized array `extern __shared__ T s[]` or similar
- // zero-sized type in other languages to declare the dynamic shared
- // memory which size is not known at the compile time. They will be
- // allocated by the runtime and placed directly after the static
- // allocated ones. They all share the same offset.
- if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
- // Adjust alignment for that dynamic shared memory array.
- MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
- LLT S32 = LLT::scalar(32);
- auto Sz =
- B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
- B.buildIntToPtr(DstReg, Sz);
- MI.eraseFromParent();
- return true;
- }
+ if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
+ Type *Ty = GV->getValueType();
+ // HIP uses an unsized array `extern __shared__ T s[]` or similar
+ // zero-sized type in other languages to declare the dynamic shared
+ // memory which size is not known at the compile time. They will be
+ // allocated by the runtime and placed directly after the static
+ // allocated ones. They all share the same offset.
+ if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
+ // Adjust alignment for that dynamic shared memory array.
+ MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
+ LLT S32 = LLT::scalar(32);
+ auto Sz =
+ B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
+ B.buildIntToPtr(DstReg, Sz);
+ MI.eraseFromParent();
+ return true;
}
-
- B.buildConstant(
- DstReg,
- MFI->allocateLDSGlobal(B.getDataLayout(), *cast<GlobalVariable>(GV)));
- MI.eraseFromParent();
- return true;
}
- const Function &Fn = MF.getFunction();
- DiagnosticInfoUnsupported BadInit(
- Fn, "unsupported initializer for address space", MI.getDebugLoc());
- Fn.getContext().diagnose(BadInit);
+ B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(),
+ *cast<GlobalVariable>(GV)));
+ MI.eraseFromParent();
return true;
}
@@ -2446,7 +2480,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
if (Ty.getSizeInBits() == 32) {
- // Truncate if this is a 32-bit constant adrdess.
+ // Truncate if this is a 32-bit constant address.
auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
B.buildExtract(DstReg, Load, 0);
} else
@@ -2745,11 +2779,32 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
return true;
}
+// Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to
+// ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input
+// case with a single min instruction instead of a compare+select.
+bool AMDGPULegalizerInfo::legalizeCTLZ_CTTZ(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ unsigned NewOpc = MI.getOpcode() == AMDGPU::G_CTLZ
+ ? AMDGPU::G_AMDGPU_FFBH_U32
+ : AMDGPU::G_AMDGPU_FFBL_B32;
+ auto Tmp = B.buildInstr(NewOpc, {DstTy}, {Src});
+ B.buildUMin(Dst, Tmp, B.buildConstant(DstTy, SrcTy.getSizeInBits()));
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Check that this is a G_XOR x, -1
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_XOR)
return false;
- auto ConstVal = getConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI);
+ auto ConstVal = getIConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI);
return ConstVal && *ConstVal == -1;
}
@@ -2770,7 +2825,7 @@ verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br,
return nullptr;
// We're deleting the def of this value, so we need to remove it.
- UseMI->eraseFromParent();
+ eraseInstr(*UseMI, MRI);
UseMI = &*MRI.use_instr_nodbg_begin(NegatedCond);
Negated = true;
@@ -2836,6 +2891,20 @@ bool AMDGPULegalizerInfo::loadInputValue(
LLT ArgTy;
std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
+ if (!Arg) {
+ if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
+ // The intrinsic may appear when we have a 0 sized kernarg segment, in which
+ // case the pointer argument may be missing and we use null.
+ B.buildConstant(DstReg, 0);
+ return true;
+ }
+
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ B.buildUndef(DstReg);
+ return true;
+ }
+
if (!Arg->isRegister() || !Arg->getRegister().isValid())
return false; // TODO: Handle these
return loadInputValue(DstReg, B, Arg, ArgRC, ArgTy);
@@ -2913,7 +2982,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
}
-// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
+// Build integer reciprocal sequence around V_RCP_IFLAG_F32
//
// Return lo, hi of result
//
@@ -2982,7 +3051,6 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B,
auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo);
auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1));
- auto Add1_HiNc = B.buildAdd(S32, RcpHi, MulHi1_Hi);
auto Add1 = B.buildMerge(S64, {Add1_Lo, Add1_Hi});
auto MulLo2 = B.buildMul(S64, NegDenom, Add1);
@@ -2993,9 +3061,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B,
auto Zero32 = B.buildConstant(S32, 0);
auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo);
- auto Add2_HiC =
- B.buildUAdde(S32, S1, Add1_HiNc, MulHi2_Hi, Add1_Lo.getReg(1));
- auto Add2_Hi = B.buildUAdde(S32, S1, Add2_HiC, Zero32, Add2_Lo.getReg(1));
+ auto Add2_Hi = B.buildUAdde(S32, S1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1));
auto Add2 = B.buildMerge(S64, {Add2_Lo, Add2_Hi});
auto UnmergeNumer = B.buildUnmerge(S32, Numer);
@@ -3701,11 +3767,11 @@ void AMDGPULegalizerInfo::updateBufferMMO(MachineMemOperand *MMO,
unsigned ImmOffset, Register VIndex,
MachineRegisterInfo &MRI) const {
Optional<ValueAndVReg> MaybeVOffsetVal =
- getConstantVRegValWithLookThrough(VOffset, MRI);
+ getIConstantVRegValWithLookThrough(VOffset, MRI);
Optional<ValueAndVReg> MaybeSOffsetVal =
- getConstantVRegValWithLookThrough(SOffset, MRI);
+ getIConstantVRegValWithLookThrough(SOffset, MRI);
Optional<ValueAndVReg> MaybeVIndexVal =
- getConstantVRegValWithLookThrough(VIndex, MRI);
+ getIConstantVRegValWithLookThrough(VIndex, MRI);
// If the combined VOffset + SOffset + ImmOffset + strided VIndex is constant,
// update the MMO with that offset. The stride is unknown so we can only do
// this if VIndex is constant 0.
@@ -4246,8 +4312,8 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
/// to exposes all register repacking to the legalizer/combiners. We also don't
/// want a selected instrution entering RegBankSelect. In order to avoid
/// defining a multitude of intermediate image instructions, directly hack on
-/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
-/// now unnecessary arguments with $noreg.
+/// the intrinsic's arguments. In cases like a16 addresses, this requires
+/// padding now unnecessary arguments with $noreg.
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
@@ -4339,8 +4405,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
// Set new opcode to _lz variant of _l, and change the intrinsic ID.
const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
- AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ,
- Intr->Dim);
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
+ Intr->Dim);
// The starting indexes should remain in the same place.
--CorrectedNumVAddrs;
@@ -4518,7 +4584,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MI.getOperand(0).setReg(NewResultReg);
// In the IR, TFE is supposed to be used with a 2 element struct return
- // type. The intruction really returns these two values in one contiguous
+ // type. The instruction really returns these two values in one contiguous
// register, with one additional dword beyond the loaded data. Rewrite the
// return type to use a single register result.
@@ -4730,7 +4796,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsa(
bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
- // Is non-HSA path or trap-handler disabled? then, report a warning
+ // Is non-HSA path or trap-handler disabled? Then, report a warning
// accordingly
if (!ST.isTrapHandlerEnabled() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
@@ -4771,12 +4837,27 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return false;
}
- bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
- bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
- unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
- : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
+ const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
+ const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
+ const unsigned NumVDataDwords = 4;
+ const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
+ const bool UseNSA =
+ ST.hasNSAEncoding() && NumVAddrDwords <= ST.getNSAMaxSize();
+ const unsigned BaseOpcodes[2][2] = {
+ {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ int Opcode;
+ if (UseNSA) {
+ Opcode =
+ AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10NSA,
+ NumVDataDwords, NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
+ PowerOf2Ceil(NumVAddrDwords));
+ }
+ assert(Opcode != -1);
SmallVector<Register, 12> Ops;
if (Is64) {
@@ -4813,6 +4894,14 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
packLanes(RayInvDir);
}
+ if (!UseNSA) {
+ // Build a single vector containing all the operands so far prepared.
+ LLT OpTy = LLT::fixed_vector(Ops.size(), 32);
+ Register MergedOps = B.buildMerge(OpTy, Ops).getReg(0);
+ Ops.clear();
+ Ops.push_back(MergedOps);
+ }
+
auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY)
.addDef(DstReg)
.addImm(Opcode);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index d4fefd89b487..7faf0436f995 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -89,6 +89,8 @@ public:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg,
@@ -107,8 +109,8 @@ public:
Register Den) const;
void legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B, Register DstDivReg,
- Register DstRemReg, Register Numer,
- Register Denom) const;
+ Register DstRemReg, Register Num,
+ Register Den) const;
bool legalizeSignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 1ee6933bd7ff..49cf6db5197f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -54,15 +54,14 @@ private:
bool useNativeFunc(const StringRef F) const;
- // Return a pointer (pointer expr) to the function if function defintion with
+ // Return a pointer (pointer expr) to the function if function definition with
// "FuncName" exists. It may create a new function prototype in pre-link mode.
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
// Replace a normal function with its native version.
bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
- bool parseFunctionName(const StringRef& FMangledName,
- FuncInfo *FInfo=nullptr /*out*/);
+ bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
@@ -87,9 +86,9 @@ private:
bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
// evaluate calls if calls' arguments are constants.
- bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
+ bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
- bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
+ bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
// exp
bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
@@ -116,7 +115,8 @@ private:
bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
// __read_pipe/__write_pipe
- bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
+ bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
+ const FuncInfo &FInfo);
// llvm.amdgcn.wavefrontsize
bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
@@ -125,7 +125,7 @@ private:
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
- // Get a scalar native builtin signle argument FP function
+ // Get a scalar native builtin single argument FP function
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
protected:
@@ -466,9 +466,9 @@ FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
: AMDGPULibFunc::getFunction(M, fInfo);
}
-bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
- FuncInfo *FInfo) {
- return AMDGPULibFunc::parse(FMangledName, *FInfo);
+bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
+ FuncInfo &FInfo) {
+ return AMDGPULibFunc::parse(FMangledName, FInfo);
}
bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
@@ -529,7 +529,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
Function *Callee = aCI->getCalledFunction();
FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
+ if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
!(AllNative || useNativeFunc(FInfo.getName()))) {
@@ -558,7 +558,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
- FuncInfo &FInfo) {
+ const FuncInfo &FInfo) {
auto *Callee = CI->getCalledFunction();
if (!Callee->isDeclaration())
return false;
@@ -567,7 +567,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
auto *M = Callee->getParent();
auto &Ctx = M->getContext();
std::string Name = std::string(Callee->getName());
- auto NumArg = CI->getNumArgOperands();
+ auto NumArg = CI->arg_size();
if (NumArg != 4 && NumArg != 6)
return false;
auto *PacketSize = CI->getArgOperand(NumArg - 2);
@@ -584,7 +584,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
else
PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
- unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
+ unsigned PtrArgLoc = CI->arg_size() - 3;
auto PtrArg = CI->getArgOperand(PtrArgLoc);
unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
@@ -644,11 +644,11 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
}
FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo))
+ if (!parseFunctionName(Callee->getName(), FInfo))
return false;
// Further check the number of arguments to see if they match.
- if (CI->getNumArgOperands() != FInfo.getNumArgs())
+ if (CI->arg_size() != FInfo.getNumArgs())
return false;
if (TDOFold(CI, FInfo))
@@ -660,7 +660,7 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
return true;
- // Specilized optimizations for each function call
+ // Specialized optimizations for each function call
switch (FInfo.getId()) {
case AMDGPULibFunc::EI_RECIP:
// skip vector function
@@ -1231,7 +1231,7 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
return false;
}
-// Get a scalar native builtin signle argument FP function
+// Get a scalar native builtin single argument FP function
FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
@@ -1371,8 +1371,7 @@ bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
StringRef CPU = TM->getTargetCPU();
StringRef Features = TM->getTargetFeatureString();
if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
- (Features.empty() ||
- Features.find_insensitive("wavefrontsize") == StringRef::npos))
+ (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
return false;
Function *F = CI->getParent()->getParent();
@@ -1410,7 +1409,7 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
return Alloc;
}
-bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
+bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
double& Res0, double& Res1,
Constant *copr0, Constant *copr1,
Constant *copr2) {
@@ -1605,8 +1604,8 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
return false;
}
-bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
- int numArgs = (int)aCI->getNumArgOperands();
+bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
+ int numArgs = (int)aCI->arg_size();
if (numArgs > 3)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 32262ea75fd3..aa7c7ff2e388 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -352,7 +352,7 @@ const unsigned UnmangledFuncInfo::TableSize =
static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
const AMDGPULibFunc::Param (&Leads)[2]) {
AMDGPULibFunc::Param Res = Leads[0];
- // TBD - This switch may require to be extended for other intriniscs
+ // TBD - This switch may require to be extended for other intrinsics
switch (id) {
case AMDGPULibFunc::EI_SINCOS:
Res.PtrKind = AMDGPULibFunc::BYVALUE;
@@ -455,7 +455,8 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() {
break;
}
- default: llvm_unreachable("Unhandeled param rule");
+ default:
+ llvm_unreachable("Unhandled param rule");
}
}
++Index;
@@ -747,7 +748,8 @@ static const char *getItaniumTypeName(AMDGPULibFunc::EType T) {
case AMDGPULibFunc::IMG3D: return "11ocl_image3d";
case AMDGPULibFunc::SAMPLER: return "11ocl_sampler";
case AMDGPULibFunc::EVENT: return "9ocl_event";
- default: llvm_unreachable("Unhandeled param type");
+ default:
+ llvm_unreachable("Unhandled param type");
}
return nullptr;
}
@@ -761,7 +763,7 @@ namespace {
// substitution candidates from the grammar, but are explicitly excluded:
// 1. <builtin-type> other than vendor extended types ..."
-// For the purpose of functions the following productions make sence for the
+// For the purpose of functions the following productions make sense for the
// substitution:
// <type> ::= <builtin-type>
// ::= <class-enum-type>
@@ -774,11 +776,11 @@ namespace {
// using <class-enum-type> production rule they're not used for substitution
// because clang consider them as builtin types.
//
-// DvNN_ type is GCC extension for vectors and is a subject for the substitution.
-
+// DvNN_ type is GCC extension for vectors and is a subject for the
+// substitution.
class ItaniumMangler {
- SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substituions
+ SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
bool UseAddrSpace;
int findSubst(const AMDGPULibFunc::Param& P) const {
@@ -902,7 +904,7 @@ static Type* getIntrinsicParamType(
case AMDGPULibFunc::EVENT:
T = StructType::create(C,"ocl_event")->getPointerTo(); break;
default:
- llvm_unreachable("Unhandeled param type");
+ llvm_unreachable("Unhandled param type");
return nullptr;
}
if (P.VectorSize > 1)
@@ -990,10 +992,8 @@ FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M,
} else {
AttributeList Attr;
LLVMContext &Ctx = M->getContext();
- Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
- Attribute::ReadOnly);
- Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
- Attribute::NoUnwind);
+ Attr = Attr.addFnAttribute(Ctx, Attribute::ReadOnly);
+ Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 714e74faaf13..b700dd5aa301 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -76,9 +76,8 @@ bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
Intrinsic::ID ID = F.getIntrinsicID();
bool Changed = false;
- for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
- Instruction *Inst = cast<Instruction>(*I);
- ++I;
+ for (User *U : llvm::make_early_inc_range(F.users())) {
+ Instruction *Inst = cast<Instruction>(U);
switch (ID) {
case Intrinsic::memcpy: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 0f157e53c3db..c34c12ab9fec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -82,9 +82,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
nullptr, F.getName() + ".kernarg.segment");
- KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- KernArgSegment->addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
+ KernArgSegment->addRetAttr(Attribute::NonNull);
+ KernArgSegment->addRetAttr(
+ Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
unsigned AS = KernArgSegment->getType()->getPointerAddressSpace();
uint64_t ExplicitArgOffset = 0;
@@ -232,8 +232,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
}
}
- KernArgSegment->addAttribute(
- AttributeList::ReturnIndex,
+ KernArgSegment->addRetAttr(
Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 70ecea8dbc3e..12d6d35a6917 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -118,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
// The llvm.amdgcn.module.lds instance is implicitly used by all kernels
// that might call a function which accesses a field within it. This is
// presently approximated to 'all kernels' if there are any such functions
- // in the module. This implicit use is reified as an explicit use here so
+ // in the module. This implicit use is redefined as an explicit use here so
// that later passes, specifically PromoteAlloca, account for the required
// memory without any knowledge of this transform.
@@ -162,6 +163,9 @@ public:
bool Changed = processUsedLDS(M);
for (Function &F : M.functions()) {
+ if (F.isDeclaration())
+ continue;
+
// Only lower compute kernels' LDS.
if (!AMDGPU::isKernel(F.getCallingConv()))
continue;
@@ -282,6 +286,21 @@ private:
// so remove the variables from these lists before replaceAllUsesWith
removeFromUsedLists(M, LocalVars);
+ // Create alias.scope and their lists. Each field in the new structure
+ // does not alias with all other fields.
+ SmallVector<MDNode *> AliasScopes;
+ SmallVector<Metadata *> NoAliasList;
+ if (LocalVars.size() > 1) {
+ MDBuilder MDB(Ctx);
+ AliasScopes.reserve(LocalVars.size());
+ MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
+ for (size_t I = 0; I < LocalVars.size(); I++) {
+ MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
+ AliasScopes.push_back(Scope);
+ }
+ NoAliasList.append(&AliasScopes[1], AliasScopes.end());
+ }
+
// Replace uses of ith variable with a constantexpr to the ith field of the
// instance that will be allocated by AMDGPUMachineFunction
Type *I32 = Type::getInt32Ty(Ctx);
@@ -313,7 +332,15 @@ private:
uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
Align A = commonAlignment(StructAlign, Off);
- refineUsesAlignment(GEP, A, DL);
+
+ if (I)
+ NoAliasList[I - 1] = AliasScopes[I - 1];
+ MDNode *NoAlias =
+ NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
+ MDNode *AliasScope =
+ AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
+
+ refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
}
// Mark kernels with asm that reads the address of the allocated structure
@@ -323,23 +350,39 @@ private:
if (!F) {
IRBuilder<> Builder(Ctx);
SmallPtrSet<Function *, 32> Kernels;
- for (auto &I : M.functions()) {
- Function *Func = &I;
- if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) {
- markUsedByKernel(Builder, Func, SGV);
- Kernels.insert(Func);
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration())
+ continue;
+
+ if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) {
+ markUsedByKernel(Builder, &Func, SGV);
+ Kernels.insert(&Func);
}
}
}
return true;
}
- void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
- unsigned MaxDepth = 5) {
- if (!MaxDepth || A == 1)
+ void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
+ MDNode *AliasScope, MDNode *NoAlias,
+ unsigned MaxDepth = 5) {
+ if (!MaxDepth || (A == 1 && !AliasScope))
return;
for (User *U : Ptr->users()) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (AliasScope && I->mayReadOrWriteMemory()) {
+ MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
+ AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
+ : AliasScope);
+ I->setMetadata(LLVMContext::MD_alias_scope, AS);
+
+ MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
+ NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
+ I->setMetadata(LLVMContext::MD_noalias, NA);
+ }
+ }
+
if (auto *LI = dyn_cast<LoadInst>(U)) {
LI->setAlignment(std::max(A, LI->getAlign()));
continue;
@@ -364,17 +407,19 @@ private:
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
APInt Off(BitWidth, 0);
- if (GEP->getPointerOperand() == Ptr &&
- GEP->accumulateConstantOffset(DL, Off)) {
- Align GA = commonAlignment(A, Off.getLimitedValue());
- refineUsesAlignment(GEP, GA, DL, MaxDepth - 1);
+ if (GEP->getPointerOperand() == Ptr) {
+ Align GA;
+ if (GEP->accumulateConstantOffset(DL, Off))
+ GA = commonAlignment(A, Off.getLimitedValue());
+ refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
+ MaxDepth - 1);
}
continue;
}
if (auto *I = dyn_cast<Instruction>(U)) {
if (I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::AddrSpaceCast)
- refineUsesAlignment(I, A, DL, MaxDepth - 1);
+ refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 3dd27f1996d6..3fad7e192195 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -12,11 +12,11 @@
//===----------------------------------------------------------------------===//
//
+#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "R600AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -34,36 +34,6 @@
using namespace llvm;
-namespace {
-
-class AMDGPUMCInstLower {
- MCContext &Ctx;
- const TargetSubtargetInfo &ST;
- const AsmPrinter &AP;
-
-public:
- AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
- const AsmPrinter &AP);
-
- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
-
- /// Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-};
-
-class R600MCInstLower : public AMDGPUMCInstLower {
-public:
- R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
- const AsmPrinter &AP);
-
- /// Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-};
-
-
-} // End anonymous namespace
-
#include "AMDGPUGenMCPseudoLowering.inc"
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
@@ -150,7 +120,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
// FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
// need to select it to the subtarget specific version, and there's no way to
// do that with a single pseudo source operation.
- if (Opcode == AMDGPU::S_SETPC_B64_return)
+ if (Opcode == AMDGPU::S_SETPC_B64_return ||
+ Opcode == AMDGPU::S_SETPC_B64_return_gfx)
Opcode = AMDGPU::S_SETPC_B64;
else if (Opcode == AMDGPU::SI_CALL) {
// SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
@@ -194,30 +165,6 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
return MCInstLowering.lowerOperand(MO, MCOp);
}
-static const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
- const Constant *CV,
- MCContext &OutContext) {
- // TargetMachine does not support llvm-style cast. Use C++-style cast.
- // This is safe since TM is always of type AMDGPUTargetMachine or its
- // derived class.
- auto &AT = static_cast<const AMDGPUTargetMachine&>(TM);
- auto *CE = dyn_cast<ConstantExpr>(CV);
-
- // Lower null pointers in private and local address space.
- // Clang generates addrspacecast for null pointers in private and local
- // address space, which needs to be lowered.
- if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
- auto Op = CE->getOperand(0);
- auto SrcAddr = Op->getType()->getPointerAddressSpace();
- if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
- auto DstAddr = CE->getType()->getPointerAddressSpace();
- return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
- OutContext);
- }
- }
- return nullptr;
-}
-
const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
return E;
@@ -267,12 +214,18 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
+ if (MI->isMetaInstruction()) {
+ if (isVerbose())
+ OutStreamer->emitRawComment(" meta instruction");
+ return;
+ }
+
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
#ifdef EXPENSIVE_CHECKS
- // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot
+ // Check getInstSizeInBytes on explicitly specified CPUs (it cannot
// work correctly for the generic CPU).
//
// The isPseudo check really shouldn't be here, but unfortunately there are
@@ -325,47 +278,3 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
}
}
-
-R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
- const AsmPrinter &AP) :
- AMDGPUMCInstLower(Ctx, ST, AP) { }
-
-void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- OutMI.setOpcode(MI->getOpcode());
- for (const MachineOperand &MO : MI->explicit_operands()) {
- MCOperand MCOp;
- lowerOperand(MO, MCOp);
- OutMI.addOperand(MCOp);
- }
-}
-
-void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
- const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
- R600MCInstLower MCInstLowering(OutContext, STI, *this);
-
- StringRef Err;
- if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
- LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
- C.emitError("Illegal instruction detected: " + Err);
- MI->print(errs());
- }
-
- if (MI->isBundle()) {
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
- while (I != MBB->instr_end() && I->isInsideBundle()) {
- emitInstruction(&*I);
- ++I;
- }
- } else {
- MCInst TmpInst;
- MCInstLowering.lower(MI, TmpInst);
- EmitToStreamer(*OutStreamer, TmpInst);
- }
-}
-
-const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
- if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
- return E;
- return AsmPrinter::lowerConstant(CV);
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
new file mode 100644
index 000000000000..0e43b4fe9461
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -0,0 +1,69 @@
+//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Header of lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+} // namespace llvm
+
+using namespace llvm;
+
+class AMDGPUMCInstLower {
+ MCContext &Ctx;
+ const TargetSubtargetInfo &ST;
+ const AsmPrinter &AP;
+
+public:
+ AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
+ const AsmPrinter &AP);
+
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+ /// Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+namespace {
+static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
+ const Constant *CV,
+ MCContext &OutContext) {
+ // TargetMachine does not support llvm-style cast. Use C++-style cast.
+ // This is safe since TM is always of type AMDGPUTargetMachine or its
+ // derived class.
+ auto &AT = static_cast<const AMDGPUTargetMachine &>(TM);
+ auto *CE = dyn_cast<ConstantExpr>(CV);
+
+ // Lower null pointers in private and local address space.
+ // Clang generates addrspacecast for null pointers in private and local
+ // address space, which needs to be lowered.
+ if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
+ auto Op = CE->getOperand(0);
+ auto SrcAddr = Op->getType()->getPointerAddressSpace();
+ if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
+ auto DstAddr = CE->getType()->getPointerAddressSpace();
+ return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
+ OutContext);
+ }
+ }
+ return nullptr;
+}
+} // namespace
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
index a61f1f7b8182..47faa6c72481 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -16,18 +16,12 @@
#ifndef LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H
#define LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MIRFormatter.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdint>
namespace llvm {
class MachineFunction;
-class MachineInstr;
struct PerFunctionMIParsingState;
-struct SlotMapping;
class AMDGPUMIRFormatter final : public MIRFormatter {
public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 697513b5db7a..5d4b007f11e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -194,7 +194,7 @@ bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
}
void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
- assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
+ assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exists");
PHISourcesT EmptySet;
PHIInfoElementT *NewElement = new PHIInfoElementT();
NewElement->DestReg = DestReg;
@@ -606,7 +606,7 @@ MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
DenseMap<MachineRegion *, RegionMRT *> &RegionMap) {
for (auto &MFI : MF) {
MachineBasicBlock *ExitMBB = &MFI;
- if (ExitMBB->succ_size() == 0) {
+ if (ExitMBB->succ_empty()) {
return ExitMBB;
}
}
@@ -748,10 +748,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
// If we have a successor with a PHI, source coming from this MBB we have to
// add the register as live out
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end();
- SI != E; ++SI) {
- for (auto &II : *(*SI)) {
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ for (auto &II : *Succ) {
if (II.isPHI()) {
MachineInstr &PHI = II;
int numPreds = getPHINumInputs(PHI);
@@ -760,7 +758,7 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
unsigned PHIReg = getPHISourceReg(PHI, i);
LLVM_DEBUG(dbgs()
<< "Add LiveOut (PhiSource " << printMBBReference(*MBB)
- << " -> " << printMBBReference(*(*SI))
+ << " -> " << printMBBReference(*Succ)
<< "): " << printReg(PHIReg, TRI) << "\n");
addLiveOut(PHIReg);
}
@@ -813,7 +811,7 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
} else {
LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
// We should be limited to only store registers that are live out from the
- // lineaized region
+ // linearized region
for (auto MBBI : SubRegion->MBBs) {
storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
}
@@ -896,7 +894,7 @@ void LinearizedRegion::replaceRegister(unsigned Register,
assert(Register != NewRegister && "Cannot replace a reg with itself");
LLVM_DEBUG(
- dbgs() << "Pepareing to replace register (region): "
+ dbgs() << "Preparing to replace register (region): "
<< printReg(Register, MRI->getTargetRegisterInfo()) << " with "
<< printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
@@ -1073,7 +1071,6 @@ private:
const SIInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
- unsigned BBSelectRegister;
PHILinearize PHIInfo;
DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
RegionMRT *RMRT;
@@ -1125,8 +1122,6 @@ private:
void transformSimpleIfRegion(RegionMRT *Region);
- void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
-
void insertUnconditionalBranch(MachineBasicBlock *MBB,
MachineBasicBlock *Dest,
const DebugLoc &DL = DebugLoc());
@@ -1238,11 +1233,7 @@ bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
return false;
}
- for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
- E = Entry->succ_end();
- SI != E; ++SI) {
- MachineBasicBlock *Current = *SI;
-
+ for (MachineBasicBlock *Current : Entry->successors()) {
if (Current == Succ) {
FoundBypass = true;
} else if ((Current->succ_size() == 1) &&
@@ -1280,10 +1271,7 @@ static void fixRegionTerminator(RegionMRT *Region) {
auto Exit = LRegion->getExit();
SmallPtrSet<MachineBasicBlock *, 2> Successors;
- for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
- SE = Exit->succ_end();
- SI != SE; ++SI) {
- MachineBasicBlock *Succ = *SI;
+ for (MachineBasicBlock *Succ : Exit->successors()) {
if (LRegion->contains(Succ)) {
// Do not allow re-assign
assert(InternalSucc == nullptr);
@@ -1404,7 +1392,7 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
MachineInstr &Instr = *I;
if (Instr.isPHI()) {
unsigned PHIDestReg = getPHIDestReg(Instr);
- LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
+ LLVM_DEBUG(dbgs() << "Extracting killed phi:\n");
LLVM_DEBUG(Instr.dump());
PHIs.insert(&Instr);
PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
@@ -1589,11 +1577,9 @@ void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
// Check if register is live out of the basic block
MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
- for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
- if ((*UI).getParent()->getParent() != DefMBB) {
+ for (const MachineOperand &MO : MRI->use_operands(Reg))
+ if (MO.getParent()->getParent() != DefMBB)
IsDead = false;
- }
- }
LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
<< (IsDead ? "dead" : "alive")
@@ -1686,7 +1672,7 @@ void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *
static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) {
MachineBasicBlock *result = nullptr;
for (auto &MFI : MF) {
- if (MFI.succ_size() == 0) {
+ if (MFI.succ_empty()) {
if (result == nullptr) {
result = &MFI;
} else {
@@ -1770,34 +1756,27 @@ static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
MachineBasicBlock *EndMBB) {
- // We have to check against the StartMBB successor becasuse a
+ // We have to check against the StartMBB successor because a
// structurized region with a loop will have the entry block split,
// and the backedge will go to the entry successor.
DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
unsigned SuccSize = StartMBB->succ_size();
if (SuccSize > 0) {
MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
- for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
- E = EndMBB->succ_end();
- PI != E; ++PI) {
+ for (MachineBasicBlock *Succ : EndMBB->successors()) {
// Either we have a back-edge to the entry block, or a back-edge to the
// successor of the entry block since the block may be split.
- if ((*PI) != StartMBB &&
- !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
+ if (Succ != StartMBB &&
+ !(Succ == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
Succs.insert(
- std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
+ std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, Succ));
}
}
}
- for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
- E = StartMBB->pred_end();
- PI != E; ++PI) {
- if ((*PI) != EndMBB) {
- Succs.insert(
- std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
- }
- }
+ for (MachineBasicBlock *Pred : StartMBB->predecessors())
+ if (Pred != EndMBB)
+ Succs.insert(std::make_pair(Pred, StartMBB));
for (auto SI : Succs) {
std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
@@ -1815,14 +1794,9 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock();
if (InheritPreds) {
- for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
- E = CodeBBStart->pred_end();
- PI != E; ++PI) {
- if ((*PI) != CodeBBEnd) {
- MachineBasicBlock *Pred = (*PI);
+ for (MachineBasicBlock *Pred : CodeBBStart->predecessors())
+ if (Pred != CodeBBEnd)
Pred->addSuccessor(IfBB);
- }
- }
}
removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
@@ -1872,9 +1846,8 @@ void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
return;
Register CondReg = Cond[0].getReg();
- for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
- (*UI).setIsKill(false);
- }
+ for (MachineOperand &MO : MRI->use_operands(CondReg))
+ MO.setIsKill(false);
}
void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
@@ -2018,7 +1991,7 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
if (!containsDef(CodeBB, InnerRegion, LI) ||
(!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
- // If the register simly lives through the CodeBB, we don't have
+ // If the register simply lives through the CodeBB, we don't have
// to rewrite anything since the register is not defined in this
// part of the code.
LLVM_DEBUG(dbgs() << "- through");
@@ -2028,14 +2001,14 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
unsigned Reg = LI;
if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
// If the register is live out, we do want to create a phi,
- // unless it is from the Exit block, becasuse in that case there
+ // unless it is from the Exit block, because in that case there
// is already a PHI, and no need to create a new one.
// If the register is just a live out def and not part of a phi
// chain, we need to create a PHI node to handle the if region,
// and replace all uses outside of the region with the new dest
// register, unless it is the outgoing BB select register. We have
- // already creaed phi nodes for these.
+ // already created phi nodes for these.
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
Register PHIDestReg = MRI->createVirtualRegister(RegClass);
Register IfSourceReg = MRI->createVirtualRegister(RegClass);
@@ -2569,11 +2542,9 @@ static void removeOldExitPreds(RegionMRT *Region) {
static bool mbbHasBackEdge(MachineBasicBlock *MBB,
SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
- for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
- if (MBBs.contains(*SI)) {
+ for (MachineBasicBlock *Succ : MBB->successors())
+ if (MBBs.contains(Succ))
return true;
- }
- }
return false;
}
@@ -2591,11 +2562,9 @@ static bool containsNewBackedge(MRT *Tree,
}
} else {
RegionMRT *Region = Tree->getRegionMRT();
- SetVector<MRT *> *Children = Region->getChildren();
- for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
- if (containsNewBackedge(*CI, MBBs))
+ for (MRT *C : llvm::reverse(*Region->getChildren()))
+ if (containsNewBackedge(C, MBBs))
return true;
- }
}
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index d27eb68ca74b..5a5a5d213a1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -13,7 +13,7 @@
//
// In LLVM CodeGen the runtime-handle metadata will be translated to
// RuntimeHandle metadata in code object. Runtime allocates a global buffer
-// for each kernel with RuntimeHandel metadata and saves the kernel address
+// for each kernel with RuntimeHandle metadata and saves the kernel address
// required for the AQL packet into the buffer. __enqueue_kernel function
// in device library knows that the invoke function pointer in the block
// literal is actually runtime handle and loads the kernel address from it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 728be811afae..fc984d2dda64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUCombinerHelper.h"
#include "AMDGPULegalizerInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,10 +35,11 @@ protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
public:
- AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
+ AMDGPUCombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
struct FMinFMaxLegacyInfo {
@@ -257,12 +259,12 @@ bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
class AMDGPUPostLegalizerCombinerHelperState {
protected:
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
public:
AMDGPUPostLegalizerCombinerHelperState(
- CombinerHelper &Helper,
+ AMDGPUCombinerHelper &Helper,
AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
: Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
};
@@ -300,7 +302,7 @@ public:
bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
+ AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
PostLegalizerHelper);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 13f09ab8f164..c029046ab65f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUCombinerHelper.h"
#include "AMDGPULegalizerInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,10 +35,11 @@ protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
public:
- AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B,
+ AMDGPUCombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
struct ClampI64ToI16MatchInfo {
@@ -154,12 +156,12 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
class AMDGPUPreLegalizerCombinerHelperState {
protected:
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
public:
AMDGPUPreLegalizerCombinerHelperState(
- CombinerHelper &Helper,
+ AMDGPUCombinerHelper &Helper,
AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
: Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
};
@@ -196,17 +198,15 @@ public:
bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB, MDT);
+ AMDGPUCombinerHelper Helper(Observer, B, KB, MDT);
AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
PreLegalizerHelper);
- if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ if (Generated.tryCombineAll(Observer, MI, B))
return true;
switch (MI.getOpcode()) {
- case TargetOpcode::G_MEMCPY_INLINE:
- return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_CONCAT_VECTORS:
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 7b6959b56145..d560d2043f42 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -149,11 +149,11 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
IRBuilder<> Builder(Ctx);
Type *I32Ty = Type::getInt32Ty(Ctx);
unsigned UniqID = 0;
- // NB: This is important for this string size to be divizable by 4
+ // NB: This is important for this string size to be divisible by 4
const char NonLiteralStr[4] = "???";
for (auto CI : Printfs) {
- unsigned NumOps = CI->getNumArgOperands();
+ unsigned NumOps = CI->arg_size();
SmallString<16> OpConvSpecifiers;
Value *Op = CI->getArgOperand(0);
@@ -201,10 +201,10 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
std::string AStreamHolder;
raw_string_ostream Sizes(AStreamHolder);
int Sum = DWORD_ALIGN;
- Sizes << CI->getNumArgOperands() - 1;
+ Sizes << CI->arg_size() - 1;
Sizes << ':';
- for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
- ArgCount <= OpConvSpecifiers.size();
+ for (unsigned ArgCount = 1;
+ ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
ArgCount++) {
Value *Arg = CI->getArgOperand(ArgCount);
Type *ArgType = Arg->getType();
@@ -330,7 +330,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
- std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
+ std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str();
MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
// Instead of creating global variables, the
@@ -389,8 +389,8 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
Type *Int32Ty = Type::getInt32Ty(Ctx);
Type *Int64Ty = Type::getInt64Ty(Ctx);
- for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
- ArgCount <= OpConvSpecifiers.size();
+ for (unsigned ArgCount = 1;
+ ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
ArgCount++) {
Value *Arg = CI->getArgOperand(ArgCount);
Type *ArgType = Arg->getType();
@@ -524,7 +524,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
<< *StBuff << '\n');
(void)StBuff;
- if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
+ if (I + 1 == E && ArgCount + 1 == CI->arg_size())
break;
BufferIdx = GetElementPtrInst::Create(I8Ty, BufferIdx, BuffOffset,
"PrintBuffNextPtr", Brnch);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3f1f21a33f7e..3ec5dd7e0eff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -176,6 +177,10 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
if (IsAMDGCN) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ // A non-entry function has only 32 caller preserved registers.
+ // Do not promote alloca which will force spilling.
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ MaxVGPRs = std::min(MaxVGPRs, 32u);
} else {
MaxVGPRs = 128;
}
@@ -200,7 +205,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
std::pair<Value *, Value *>
AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
- const Function &F = *Builder.GetInsertBlock()->getParent();
+ Function &F = *Builder.GetInsertBlock()->getParent();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!IsAMDHSA) {
@@ -256,11 +261,12 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
= Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
- DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ DispatchPtr->addRetAttr(Attribute::NoAlias);
+ DispatchPtr->addRetAttr(Attribute::NonNull);
+ F.removeFnAttr("amdgpu-no-dispatch-ptr");
// Size of the dispatch packet struct.
- DispatchPtr->addDereferenceableAttr(AttributeList::ReturnIndex, 64);
+ DispatchPtr->addDereferenceableRetAttr(64);
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
@@ -268,7 +274,7 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
- // to CSE this. The loads should be mergable later anyway.
+ // to CSE this. The loads should be mergeable later anyway.
Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
@@ -288,23 +294,27 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
unsigned N) {
- const AMDGPUSubtarget &ST =
- AMDGPUSubtarget::get(TM, *Builder.GetInsertBlock()->getParent());
+ Function *F = Builder.GetInsertBlock()->getParent();
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *F);
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+ StringRef AttrName;
switch (N) {
case 0:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
: (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
+ AttrName = "amdgpu-no-workitem-id-x";
break;
case 1:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
: (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
+ AttrName = "amdgpu-no-workitem-id-y";
break;
case 2:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
: (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
+ AttrName = "amdgpu-no-workitem-id-z";
break;
default:
llvm_unreachable("invalid dimension");
@@ -313,6 +323,7 @@ Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
CallInst *CI = Builder.CreateCall(WorkitemIdFn);
ST.makeLIDRangeMetadata(CI);
+ F->removeFnAttr(AttrName);
return CI;
}
@@ -1065,9 +1076,9 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
MI->getRawSource(), MI->getSourceAlign(),
MI->getLength(), MI->isVolatile());
- for (unsigned I = 1; I != 3; ++I) {
- if (uint64_t Bytes = Intr->getDereferenceableBytes(I)) {
- B->addDereferenceableAttr(I, Bytes);
+ for (unsigned I = 0; I != 2; ++I) {
+ if (uint64_t Bytes = Intr->getParamDereferenceableBytes(I)) {
+ B->addDereferenceableParamAttr(I, Bytes);
}
}
@@ -1101,6 +1112,10 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ // A non-entry function has only 32 caller preserved registers.
+ // Do not promote alloca which will force spilling.
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ MaxVGPRs = std::min(MaxVGPRs, 32u);
} else {
MaxVGPRs = 128;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
new file mode 100644
index 000000000000..01d03d17ec47
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -0,0 +1,195 @@
+//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass recursively promotes generic pointer arguments of a kernel
+/// into the global address space.
+///
+/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
+/// value is a pointer and loaded pointer is unmodified in the kernel before the
+/// load, then promote loaded pointer to global. Then recursively continue.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteKernelArguments : public FunctionPass {
+ MemorySSA *MSSA;
+
+ Instruction *ArgCastInsertPt;
+
+ SmallVector<Value *> Ptrs;
+
+ void enqueueUsers(Value *Ptr);
+
+ bool promotePointer(Value *Ptr);
+
+public:
+ static char ID;
+
+ AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
+
+ bool run(Function &F, MemorySSA &MSSA);
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
+ SmallVector<User *> PtrUsers(Ptr->users());
+
+ while (!PtrUsers.empty()) {
+ Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
+ if (!U)
+ continue;
+
+ switch (U->getOpcode()) {
+ default:
+ break;
+ case Instruction::Load: {
+ LoadInst *LD = cast<LoadInst>(U);
+ PointerType *PT = dyn_cast<PointerType>(LD->getType());
+ if (!PT ||
+ (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) ||
+ LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
+ break;
+ const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
+ // TODO: This load poprobably can be promoted to constant address space.
+ if (MSSA->isLiveOnEntryDef(MA))
+ Ptrs.push_back(LD);
+ break;
+ }
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ case Instruction::BitCast:
+ if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
+ PtrUsers.append(U->user_begin(), U->user_end());
+ break;
+ }
+ }
+}
+
+bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
+ enqueueUsers(Ptr);
+
+ PointerType *PT = cast<PointerType>(Ptr->getType());
+ if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
+ return false;
+
+ bool IsArg = isa<Argument>(Ptr);
+ IRBuilder<> B(IsArg ? ArgCastInsertPt
+ : &*std::next(cast<Instruction>(Ptr)->getIterator()));
+
+ // Cast pointer to global address space and back to flat and let
+ // Infer Address Spaces pass to do all necessary rewriting.
+ PointerType *NewPT =
+ PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
+ Value *Cast =
+ B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
+ Value *CastBack =
+ B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
+ Ptr->replaceUsesWithIf(CastBack,
+ [Cast](Use &U) { return U.getUser() != Cast; });
+
+ return true;
+}
+
+// skip allocas
+static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
+ BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
+ for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
+
+ // If this is a dynamic alloca, the value may depend on the loaded kernargs,
+ // so loads will need to be inserted before it.
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ return InsPt;
+}
+
+bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
+ if (skipFunction(F))
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
+ return false;
+
+ ArgCastInsertPt = &*getInsertPt(*F.begin());
+ this->MSSA = &MSSA;
+
+ for (Argument &Arg : F.args()) {
+ if (Arg.use_empty())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(Arg.getType());
+ if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
+ continue;
+
+ Ptrs.push_back(&Arg);
+ }
+
+ bool Changed = false;
+ while (!Ptrs.empty()) {
+ Value *Ptr = Ptrs.pop_back_val();
+ Changed |= promotePointer(Ptr);
+ }
+
+ return Changed;
+}
+
+bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ return run(F, MSSA);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
+ "AMDGPU Promote Kernel Arguments", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
+ "AMDGPU Promote Kernel Arguments", false, false)
+
+char AMDGPUPromoteKernelArguments::ID = 0;
+
+FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
+ return new AMDGPUPromoteKernelArguments();
+}
+
+PreservedAnalyses
+AMDGPUPromoteKernelArgumentsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 0e4c26170a8f..dafbeaeaec52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -9,7 +9,7 @@
/// \file
/// \brief This pass propagates attributes from kernels to the non-entry
/// functions. Most of the library functions were not compiled for specific ABI,
-/// yet will be correctly compiled if proper attrbutes are propagated from the
+/// yet will be correctly compiled if proper attributes are propagated from the
/// caller.
///
/// The pass analyzes call graph and propagates ABI target features through the
@@ -17,7 +17,7 @@
///
/// It can run in two modes: as a function or module pass. A function pass
/// simply propagates attributes. A module pass clones functions if there are
-/// callers with different ABI. If a function is clonned all call sites will
+/// callers with different ABI. If a function is cloned all call sites will
/// be updated to use a correct clone.
///
/// A function pass is limited in functionality but can run early in the
@@ -55,10 +55,7 @@ static constexpr const FeatureBitset TargetFeatures = {
// Attributes to propagate.
// TODO: Support conservative min/max merging instead of cloning.
-static constexpr const char* AttributeNames[] = {
- "amdgpu-waves-per-eu",
- "amdgpu-flat-work-group-size"
-};
+static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
static constexpr unsigned NumAttr =
sizeof(AttributeNames) / sizeof(AttributeNames[0]);
@@ -149,7 +146,7 @@ public:
bool process(Module &M);
};
-// Allows to propagate attributes early, but no clonning is allowed as it must
+// Allows to propagate attributes early, but no cloning is allowed as it must
// be a function pass to run before any optimizations.
// TODO: We shall only need a one instance of module pass, but that needs to be
// in the linker pipeline which is currently not possible.
@@ -168,7 +165,7 @@ public:
bool runOnFunction(Function &F) override;
};
-// Allows to propagate attributes with clonning but does that late in the
+// Allows to propagate attributes with cloning but does that late in the
// pipeline.
class AMDGPUPropagateAttributesLate : public ModulePass {
const TargetMachine *TM;
@@ -212,10 +209,10 @@ AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
bool AMDGPUPropagateAttributes::process(Module &M) {
for (auto &F : M.functions())
- if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ if (AMDGPU::isKernel(F.getCallingConv()))
Roots.insert(&F);
- return process();
+ return Roots.empty() ? false : process();
}
bool AMDGPUPropagateAttributes::process(Function &F) {
@@ -228,8 +225,7 @@ bool AMDGPUPropagateAttributes::process() {
SmallSet<Function *, 32> NewRoots;
SmallSet<Function *, 32> Replaced;
- if (Roots.empty())
- return false;
+ assert(!Roots.empty());
Module &M = *(*Roots.begin())->getParent();
do {
@@ -273,7 +269,7 @@ bool AMDGPUPropagateAttributes::process() {
if (!NewF) {
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
- // This may set different features on different iteartions if
+ // This may set different features on different iterations if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.
@@ -383,7 +379,7 @@ bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
TM = &TPC->getTM<TargetMachine>();
}
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ if (!AMDGPU::isKernel(F.getCallingConv()))
return false;
return AMDGPUPropagateAttributes(TM, false).process(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 4e12e5cd8f65..d7dc9ee4117b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -57,9 +57,9 @@ public:
MinMaxMedOpc getMinMaxPair(unsigned Opc);
- template <class m_Cst>
+ template <class m_Cst, typename CstTy>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
- Register &Val, Register &K0, Register &K1);
+ Register &Val, CstTy &K0, CstTy &K1);
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
@@ -83,11 +83,11 @@ AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
}
}
-template <class m_Cst>
+template <class m_Cst, typename CstTy>
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
MachineRegisterInfo &MRI,
MinMaxMedOpc MMMOpc, Register &Val,
- Register &K0, Register &K1) {
+ CstTy &K0, CstTy &K1) {
// 4 operand commutes of: min(max(Val, K0), K1).
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
@@ -115,19 +115,18 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
return false;
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
- Register Val, K0, K1;
+ Register Val;
+ Optional<ValueAndVReg> K0, K1;
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
- if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+ if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;
- const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
- const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
- if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
+ if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
return false;
- if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
+ if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
return false;
- MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
+ MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 0e4005627e02..ab3ce980c3f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -58,7 +58,7 @@
///
/// We avoid trying to solve this problem in RegBankSelect. Any VALU G_*
/// operation should have its source operands all mapped to VGPRs (except for
-/// VCC), inserting copies from any SGPR operands. This the most trival legal
+/// VCC), inserting copies from any SGPR operands. This the most trivial legal
/// mapping. Anything beyond the simplest 1:1 instruction selection would be too
/// complicated to solve here. Every optimization pattern or instruction
/// selected to multiple outputs would have to enforce this rule, and there
@@ -118,7 +118,7 @@ public:
Opc == AMDGPU::G_SEXT) {
// LegalizerHelper wants to use the basic legalization artifacts when
// widening etc. We don't handle selection with vcc in artifact sources,
- // so we need to use a sslect instead to handle these properly.
+ // so we need to use a select instead to handle these properly.
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI);
@@ -282,7 +282,7 @@ AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
// VCC-like use.
if (TRI->isSGPRClass(&RC)) {
// FIXME: This probably came from a copy from a physical register, which
- // should be inferrrable from the copied to-type. We don't have many boolean
+ // should be inferable from the copied to-type. We don't have many boolean
// physical register constraints so just assume a normal SGPR for now.
if (!Ty.isValid())
return AMDGPU::SGPRRegBank;
@@ -734,23 +734,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const int OrigRangeSize = std::distance(Range.begin(), Range.end());
#endif
- for (MachineInstr &MI : Range) {
- for (MachineOperand &Def : MI.defs()) {
- if (MRI.use_nodbg_empty(Def.getReg()))
- continue;
-
- LLT ResTy = MRI.getType(Def.getReg());
- const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
- ResultRegs.push_back(Def.getReg());
- Register InitReg = B.buildUndef(ResTy).getReg(0);
- Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
- InitResultRegs.push_back(InitReg);
- PhiRegs.push_back(PhiReg);
- MRI.setRegBank(PhiReg, *DefBank);
- MRI.setRegBank(InitReg, *DefBank);
- }
- }
-
Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
@@ -894,23 +877,26 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
bool Is64 = OpSize % 64 == 0;
- LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
- unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
- : AMDGPU::V_CMP_EQ_U32_e64;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
+ unsigned UnmergeTySize = Is64 ? 64 : 32;
+ unsigned CmpOp =
+ Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64;
// Insert the unmerge before the loop.
B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
+ unsigned NumPieces = OpSize / UnmergeTySize;
+ SmallVector<Register, 8> UnmergePieces;
+ if (NumPieces == 1) {
+ UnmergePieces.push_back(OpReg);
+ } else {
+ LLT UnmergeTy = LLT::scalar(UnmergeTySize);
+ MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx)
+ UnmergePieces.push_back(Unmerge.getReg(PieceIdx));
+ }
B.setInstr(*I);
- unsigned NumPieces = Unmerge->getNumOperands() - 1;
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
- Register UnmergePiece = Unmerge.getReg(PieceIdx);
-
+ for (Register UnmergePiece : UnmergePieces) {
Register CurrentLaneOpReg;
if (Is64) {
Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
@@ -985,12 +971,14 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
if (OpTy.isVector()) {
auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
Op.setReg(Merge.getReg(0));
- } else {
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ } else if (ReadlanePieces.size() > 1) {
auto Merge = B.buildMerge(OpTy, ReadlanePieces);
Op.setReg(Merge.getReg(0));
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ } else {
+ Op.setReg(ReadlanePieces[0]);
}
-
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
}
// Make sure we don't re-process this register again.
@@ -998,8 +986,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
}
}
- B.setInsertPt(*LoopBB, LoopBB->end());
-
// Update EXEC, save the original EXEC value to VCC.
B.buildInstr(AndSaveExecOpc)
.addDef(NewExec)
@@ -1007,6 +993,8 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MRI.setSimpleHint(NewExec, CondReg);
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
B.buildInstr(XorTermOpc)
.addDef(ExecReg)
@@ -1017,8 +1005,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// s_cbranch_scc0?
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
- B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
- .addMBB(LoopBB);
+ B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
// Save the EXEC mask before the loop.
BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg)
@@ -1336,7 +1323,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo *MRI = B.getMRI();
- if (Optional<int64_t> Imm = getConstantVRegSExtVal(CombinedOffset, *MRI)) {
+ if (Optional<int64_t> Imm = getIConstantVRegSExtVal(CombinedOffset, *MRI)) {
uint32_t SOffset, ImmOffset;
if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
Alignment)) {
@@ -1430,7 +1417,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
OffsetBank == &AMDGPU::SGPRRegBank)
return true; // Legal mapping
- // FIXME: 96-bit case was widened during legalize. We neeed to narrow it back
+ // FIXME: 96-bit case was widened during legalize. We need to narrow it back
// here but don't have an MMO.
unsigned LoadSize = Ty.getSizeInBits();
@@ -1455,7 +1442,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
VOffset, SOffset, ImmOffset, Alignment);
// TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
- // can, but we neeed to track an MMO for that.
+ // can, but we need to track an MMO for that.
const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
const Align MemAlign(4); // FIXME: ABI type alignment?
MachineMemOperand *BaseMMO = MF.getMachineMemOperand(
@@ -1569,7 +1556,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
// A 64-bit bitfield extract uses the 32-bit bitfield extract instructions
// if the width is a constant.
- if (auto ConstWidth = getConstantVRegValWithLookThrough(WidthReg, MRI)) {
+ if (auto ConstWidth = getIConstantVRegValWithLookThrough(WidthReg, MRI)) {
// Use the 32-bit bitfield extract instruction if the width is a constant.
// Depending on the width size, use either the low or high 32-bits.
auto Zero = B.buildConstant(S32, 0);
@@ -1775,97 +1762,6 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
return {BaseReg, C1};
}
-static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
- int64_t C;
- return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
-}
-
-static unsigned extractCPol(unsigned CachePolicy) {
- return CachePolicy & AMDGPU::CPol::ALL;
-}
-
-static unsigned extractSWZ(unsigned CachePolicy) {
- return (CachePolicy >> 3) & 1;
-}
-
-
-MachineInstr *
-AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
- MachineInstr &MI) const {
- MachineRegisterInfo &MRI = *B.getMRI();
- executeInWaterfallLoop(B, MI, MRI, {2, 4});
-
- // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
-
- Register VData = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(VData);
-
- int EltSize = Ty.getScalarSizeInBits();
- int Size = Ty.getSizeInBits();
-
- // FIXME: Broken integer truncstore.
- if (EltSize != 32)
- report_fatal_error("unhandled intrinsic store");
-
- // FIXME: Verifier should enforce 1 MMO for these intrinsics.
- const int MemSize = (*MI.memoperands_begin())->getSize();
-
-
- Register RSrc = MI.getOperand(2).getReg();
- Register VOffset = MI.getOperand(3).getReg();
- Register SOffset = MI.getOperand(4).getReg();
- unsigned CachePolicy = MI.getOperand(5).getImm();
-
- unsigned ImmOffset;
- std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
-
- const bool Offen = !isZero(VOffset, MRI);
-
- unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
- switch (8 * MemSize) {
- case 8:
- Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
- AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
- break;
- case 16:
- Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
- AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
- break;
- default:
- Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
- AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
- if (Size > 32)
- Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
- break;
- }
-
-
- // Set the insertion point back to the instruction in case it was moved into a
- // loop.
- B.setInstr(MI);
-
- MachineInstrBuilder MIB = B.buildInstr(Opc)
- .addUse(VData);
-
- if (Offen)
- MIB.addUse(VOffset);
-
- MIB.addUse(RSrc)
- .addUse(SOffset)
- .addImm(ImmOffset)
- .addImm(extractCPol(CachePolicy))
- .addImm(0) // tfe: FIXME: Remove from inst
- .addImm(extractSWZ(CachePolicy))
- .cloneMemRefs(MI);
-
- // FIXME: We need a way to report failure from applyMappingImpl.
- // Insert constrain copies before inserting the loop.
- if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
- report_fatal_error("failed to constrain selected store intrinsic");
-
- return MIB;
-}
-
bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg,
Register SrcReg) const {
MachineRegisterInfo &MRI = *B.getMRI();
@@ -2153,7 +2049,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// The standard handling only considers the result register bank for
// phis. For VCC, blindly inserting a copy when the phi is lowered will
// produce an invalid copy. We can only copy with some kind of compare to
- // get a vector boolean result. Insert a regitser bank copy that will be
+ // get a vector boolean result. Insert a register bank copy that will be
// correctly lowered to a compare.
MachineIRBuilder B(*MI.getParent()->getParent());
@@ -2491,9 +2387,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_CTPOP:
- case AMDGPU::G_BITREVERSE:
- case AMDGPU::G_CTLZ_ZERO_UNDEF:
- case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ case AMDGPU::G_BITREVERSE: {
const RegisterBank *DstBank =
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::SGPRRegBank)
@@ -2515,6 +2409,48 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
llvm_unreachable("narrowScalar should have succeeded");
return;
}
+ case AMDGPU::G_AMDGPU_FFBH_U32:
+ case AMDGPU::G_AMDGPU_FFBL_B32:
+ case AMDGPU::G_CTLZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank == &AMDGPU::SGPRRegBank)
+ break;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ const LLT S32 = LLT::scalar(32);
+ LLT Ty = MRI.getType(SrcReg);
+ if (Ty == S32)
+ break;
+
+ // We can narrow this more efficiently than Helper can by using ffbh/ffbl
+ // which return -1 when the input is zero:
+ // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
+ // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
+ // (ffbh hi:lo) -> (umin (ffbh hi), (uaddsat (ffbh lo), 32))
+ // (ffbl hi:lo) -> (umin (uaddsat (ffbh hi), 32), (ffbh lo))
+ ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
+ MachineIRBuilder B(MI, ApplyVALU);
+ SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
+ unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
+ ? (unsigned)AMDGPU::G_AMDGPU_FFBH_U32
+ : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
+ ? (unsigned)AMDGPU::G_AMDGPU_FFBL_B32
+ : Opc;
+ unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
+ auto X = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx]});
+ auto Y = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx ^ 1]});
+ unsigned AddOpc =
+ Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
+ ? AMDGPU::G_ADD
+ : AMDGPU::G_UADDSAT;
+ Y = B.buildInstr(AddOpc, {S32}, {Y, B.buildConstant(S32, 32)});
+ Register DstReg = MI.getOperand(0).getReg();
+ B.buildUMin(DstReg, X, Y);
+ MI.eraseFromParent();
+ return;
+ }
case AMDGPU::G_SEXT:
case AMDGPU::G_ZEXT:
case AMDGPU::G_ANYEXT: {
@@ -3034,6 +2970,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
unsigned N = MI.getNumExplicitOperands() - 2;
+ applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, { N });
return;
}
@@ -3095,6 +3032,101 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
break;
}
+ case AMDGPU::G_SI_CALL: {
+ // Use a set to avoid extra readfirstlanes in the case where multiple
+ // operands are the same register.
+ SmallSet<Register, 4> SGPROperandRegs;
+
+ if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, {1}))
+ break;
+
+ // Move all copies to physical SGPRs that are used by the call instruction
+ // into the loop block. Start searching for these copies until the
+ // ADJCALLSTACKUP.
+ unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
+ unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
+
+ // Move all non-copies before the copies, so that a complete range can be
+ // moved into the waterfall loop.
+ SmallVector<MachineInstr *, 4> NonCopyInstrs;
+ // Count of NonCopyInstrs found until the current LastCopy.
+ unsigned NonCopyInstrsLen = 0;
+ MachineBasicBlock::iterator Start(&MI);
+ MachineBasicBlock::iterator LastCopy = Start;
+ MachineBasicBlock *MBB = MI.getParent();
+ const SIMachineFunctionInfo *Info =
+ MBB->getParent()->getInfo<SIMachineFunctionInfo>();
+ while (Start->getOpcode() != FrameSetupOpcode) {
+ --Start;
+ bool IsCopy = false;
+ if (Start->getOpcode() == AMDGPU::COPY) {
+ auto &Dst = Start->getOperand(0);
+ if (Dst.isReg()) {
+ Register Reg = Dst.getReg();
+ if (Reg.isPhysical() && MI.readsRegister(Reg, TRI)) {
+ IsCopy = true;
+ } else {
+ // Also move the copy from the scratch rsrc descriptor into the loop
+ // to allow it to be optimized away.
+ auto &Src = Start->getOperand(1);
+ if (Src.isReg()) {
+ Reg = Src.getReg();
+ IsCopy = Info->getScratchRSrcReg() == Reg;
+ }
+ }
+ }
+ }
+
+ if (IsCopy) {
+ LastCopy = Start;
+ NonCopyInstrsLen = NonCopyInstrs.size();
+ } else {
+ NonCopyInstrs.push_back(&*Start);
+ }
+ }
+ NonCopyInstrs.resize(NonCopyInstrsLen);
+
+ for (auto *NonCopy : reverse(NonCopyInstrs)) {
+ MBB->splice(LastCopy, MBB, NonCopy->getIterator());
+ }
+ Start = LastCopy;
+
+ // Do the same for copies after the loop
+ NonCopyInstrs.clear();
+ NonCopyInstrsLen = 0;
+ MachineBasicBlock::iterator End(&MI);
+ LastCopy = End;
+ while (End->getOpcode() != FrameDestroyOpcode) {
+ ++End;
+ bool IsCopy = false;
+ if (End->getOpcode() == AMDGPU::COPY) {
+ auto &Src = End->getOperand(1);
+ if (Src.isReg()) {
+ Register Reg = Src.getReg();
+ IsCopy = Reg.isPhysical() && MI.modifiesRegister(Reg, TRI);
+ }
+ }
+
+ if (IsCopy) {
+ LastCopy = End;
+ NonCopyInstrsLen = NonCopyInstrs.size();
+ } else {
+ NonCopyInstrs.push_back(&*End);
+ }
+ }
+ NonCopyInstrs.resize(NonCopyInstrsLen);
+
+ End = LastCopy;
+ ++LastCopy;
+ for (auto *NonCopy : reverse(NonCopyInstrs)) {
+ MBB->splice(LastCopy, MBB, NonCopy->getIterator());
+ }
+
+ ++End;
+ MachineIRBuilder B(*Start);
+ executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs, MRI);
+ break;
+ }
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD: {
@@ -3290,7 +3322,7 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
-/// Return the mapping for a pointer arugment.
+/// Return the mapping for a pointer argument.
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
Register PtrReg) const {
@@ -3620,7 +3652,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar
- case AMDGPU::G_AMDGPU_FFBH_U32:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
@@ -3726,8 +3757,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break;
}
+ case AMDGPU::G_AMDGPU_FFBH_U32:
+ case AMDGPU::G_AMDGPU_FFBL_B32:
case AMDGPU::G_CTLZ_ZERO_UNDEF:
- case AMDGPU::G_CTTZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
+ break;
+ }
case AMDGPU::G_CTPOP: {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
@@ -4033,6 +4072,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mbcnt_hi:
case Intrinsic::amdgcn_mul_u24:
case Intrinsic::amdgcn_mul_i24:
+ case Intrinsic::amdgcn_mulhi_u24:
+ case Intrinsic::amdgcn_mulhi_i24:
case Intrinsic::amdgcn_lerp:
case Intrinsic::amdgcn_sad_u8:
case Intrinsic::amdgcn_msad_u8:
@@ -4254,8 +4295,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned N = MI.getNumExplicitOperands() - 2;
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
OpdsMapping[N] = getSGPROpMapping(MI.getOperand(N).getReg(), MRI, *TRI);
- for (unsigned I = 2; I < N; ++I)
- OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ if (N == 3) {
+ // Sequential form: all operands combined into VGPR256/VGPR512
+ unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ if (Size > 256)
+ Size = 512;
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ } else {
+ // NSA form
+ for (unsigned I = 2; I < N; ++I)
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ }
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
@@ -4447,6 +4497,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ case AMDGPU::G_SI_CALL: {
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
+ // Lie and claim everything is legal, even though some need to be
+ // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ // Allow anything for implicit arguments
+ for (unsigned I = 4; I < MI.getNumOperands(); ++I) {
+ if (MI.getOperand(I).isReg()) {
+ Register Reg = MI.getOperand(I).getReg();
+ auto OpBank = getRegBankID(Reg, MRI);
+ unsigned Size = getSizeInBits(Reg, MRI, *TRI);
+ OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);
+ }
+ }
+ break;
+ }
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 7e051e4a5424..2b9d0923ab49 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -89,9 +89,6 @@ public:
std::pair<Register, unsigned>
splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
- MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
- MachineInstr &MI) const;
-
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index dabb4d006d99..d55bf3917e9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -130,11 +130,9 @@ class ReplaceLDSUseImpl {
std::vector<GlobalVariable *> LDSGlobals = AMDGPU::findVariablesToLower(M);
// Remove LDS which don't qualify for replacement.
- LDSGlobals.erase(std::remove_if(LDSGlobals.begin(), LDSGlobals.end(),
- [&](GlobalVariable *GV) {
- return shouldIgnorePointerReplacement(GV);
- }),
- LDSGlobals.end());
+ llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
+ return shouldIgnorePointerReplacement(GV);
+ });
return LDSGlobals;
}
@@ -142,7 +140,7 @@ class ReplaceLDSUseImpl {
// Returns true if uses of given LDS global within non-kernel functions should
// be keep as it is without pointer replacement.
bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
- // LDS whose size is very small and doesn`t exceed pointer size is not worth
+ // LDS whose size is very small and doesn't exceed pointer size is not worth
// replacing.
if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
return true;
@@ -158,7 +156,7 @@ class ReplaceLDSUseImpl {
// Insert new global LDS pointer which points to LDS.
GlobalVariable *createLDSPointer(GlobalVariable *GV) {
- // LDS pointer which points to LDS is already created? return it.
+ // LDS pointer which points to LDS is already created? Return it.
auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
if (!PointerEntry.second)
return PointerEntry.first->second;
@@ -185,7 +183,7 @@ class ReplaceLDSUseImpl {
// Split entry basic block in such a way that only lane 0 of each wave does
// the LDS pointer initialization, and return newly created basic block.
BasicBlock *activateLaneZero(Function *K) {
- // If the entry basic block of kernel K is already splitted, then return
+ // If the entry basic block of kernel K is already split, then return
// newly created basic block.
auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
if (!BasicBlockEntry.second)
@@ -204,7 +202,7 @@ class ReplaceLDSUseImpl {
BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
- // Mark that the entry basic block of kernel K is splitted.
+ // Mark that the entry basic block of kernel K is split.
KernelToInitBB[K] = NBB;
return NBB;
@@ -235,7 +233,7 @@ class ReplaceLDSUseImpl {
}
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
- // within all relevent kernels. Now replace all the uses of LDS within
+ // within all relevant kernels. Now replace all the uses of LDS within
// non-kernel functions by LDS pointer.
void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
SmallVector<User *, 8> LDSUsers(GV->users());
@@ -268,8 +266,8 @@ class ReplaceLDSUseImpl {
convertConstantExprsToInstructions(I, CE, &UserInsts);
}
- // Go through all the user instrutions, if LDS exist within them as an
- // operand, then replace it by replace instruction.
+ // Go through all the user instructions, if LDS exist within them as
+ // an operand, then replace it by replace instruction.
for (auto *II : UserInsts) {
auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
II->replaceUsesOfWith(GV, ReplaceInst);
@@ -373,7 +371,7 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
return false;
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
- // within all relevent kernels. Now replace all the uses of LDS within
+ // within all relevant kernels. Now replace all the uses of LDS within
// non-kernel functions by LDS pointer.
replaceLDSUseByPointer(GV, LDSPointer);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index ef46e53b7460..cb511e5e3483 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -29,6 +29,8 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -61,7 +63,8 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
assert(Op.getImm() == 0);
return nullptr;
}
-
+ if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
+ return cast<Function>(GA->getOperand(0));
return cast<Function>(Op.getGlobal());
}
@@ -83,10 +86,15 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
}
int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
+ const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
+ if (ST.hasGFX90AInsts() && ArgNumAGPR)
+ return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
+ return std::max(ArgNumVGPR, ArgNumAGPR);
+}
+
+int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
const GCNSubtarget &ST) const {
- if (ST.hasGFX90AInsts() && NumAGPR)
- return alignTo(NumVGPR, 4) + NumAGPR;
- return std::max(NumVGPR, NumAGPR);
+ return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
}
bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) {
@@ -444,6 +452,25 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
if (!IsIndirect)
I = CallGraphResourceInfo.find(Callee);
+ // FIXME: Call site could have norecurse on it
+ if (!Callee || !Callee->doesNotRecurse()) {
+ Info.HasRecursion = true;
+
+ // TODO: If we happen to know there is no stack usage in the
+ // callgraph, we don't need to assume an infinitely growing stack.
+ if (!MI.isReturn()) {
+ // We don't need to assume an unknown stack size for tail calls.
+
+ // FIXME: This only benefits in the case where the kernel does not
+ // directly call the tail called function. If a kernel directly
+ // calls a tail recursive function, we'll assume maximum stack size
+ // based on the regular call instruction.
+ CalleeFrameSize =
+ std::max(CalleeFrameSize,
+ static_cast<uint64_t>(AssumedStackSizeForExternalCall));
+ }
+ }
+
if (IsIndirect || I == CallGraphResourceInfo.end()) {
CalleeFrameSize =
std::max(CalleeFrameSize,
@@ -468,10 +495,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
Info.HasRecursion |= I->second.HasRecursion;
Info.HasIndirectCall |= I->second.HasIndirectCall;
}
-
- // FIXME: Call site could have norecurse on it
- if (!Callee || !Callee->doesNotRecurse())
- Info.HasRecursion = true;
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
index 832e8119e444..b0a2d3bffc62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
@@ -17,7 +17,6 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/ValueMap.h"
namespace llvm {
@@ -44,6 +43,10 @@ public:
bool HasIndirectCall = false;
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+ // Total number of VGPRs is actually a combination of AGPR and VGPR
+ // depending on architecture - and some alignment constraints
+ int32_t getTotalNumVGPRs(const GCNSubtarget &ST, int32_t NumAGPR,
+ int32_t NumVGPR) const;
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index e2aafa25142e..45f7c2f369bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -142,8 +142,8 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
// Special case handle structs with single members. It is useful to handle
// some casts between structs and non-structs, but we can't bitcast
- // directly between them. directly bitcast between them. Blender uses
- // some casts that look like { <3 x float> }* to <4 x float>*
+ // directly between them. Blender uses some casts that look like
+ // { <3 x float> }* to <4 x float>*
if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
return false;
@@ -259,7 +259,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
// Keep retrying if we are able to successfully eliminate an argument. This
// helps with cases with multiple arguments which may alias, such as in a
- // sincos implemntation. If we have 2 stores to arguments, on the first
+ // sincos implementation. If we have 2 stores to arguments, on the first
// attempt the MDA query will succeed for the second store but not the
// first. On the second iteration we've removed that out clobbering argument
// (by effectively moving it into another function) and will find the second
@@ -357,7 +357,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
RetAttrs.addAttribute(Attribute::SExt);
RetAttrs.addAttribute(Attribute::ZExt);
RetAttrs.addAttribute(Attribute::NoAlias);
- NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs);
+ NewFunc->removeRetAttrs(RetAttrs);
// TODO: How to preserve metadata?
// Move the body of the function into the new rewritten function, and replace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0c5020dccecd..0655b4342ba1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
-#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -38,10 +38,7 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#define AMDGPUSubtarget GCNSubtarget
#include "AMDGPUGenSubtargetInfo.inc"
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
#undef AMDGPUSubtarget
-#include "R600GenSubtargetInfo.inc"
static cl::opt<bool> DisablePowerSched(
"amdgpu-disable-power-sched",
@@ -64,19 +61,6 @@ static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
GCNSubtarget::~GCNSubtarget() = default;
-R600Subtarget &
-R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
- StringRef GPU, StringRef FS) {
- SmallString<256> FullFS("+promote-alloca,");
- FullFS += FS;
- ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
-
- HasMulU24 = getGeneration() >= EVERGREEN;
- HasMulI24 = hasCaymanISA();
-
- return *this;
-}
-
GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
@@ -98,12 +82,12 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
// Disable mutually exclusive bits.
- if (FS.find_insensitive("+wavefrontsize") != StringRef::npos) {
- if (FS.find_insensitive("wavefrontsize16") == StringRef::npos)
+ if (FS.contains_insensitive("+wavefrontsize")) {
+ if (!FS.contains_insensitive("wavefrontsize16"))
FullFS += "-wavefrontsize16,";
- if (FS.find_insensitive("wavefrontsize32") == StringRef::npos)
+ if (!FS.contains_insensitive("wavefrontsize32"))
FullFS += "-wavefrontsize32,";
- if (FS.find_insensitive("wavefrontsize64") == StringRef::npos)
+ if (!FS.contains_insensitive("wavefrontsize64"))
FullFS += "-wavefrontsize64,";
}
@@ -549,13 +533,10 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
- const Function &F) const {
+ const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
// Default minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
- // Default/requested minimum/maximum flat work group sizes.
- std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
-
// If minimum/maximum flat work group sizes were explicitly requested using
// "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
// number of waves per execution unit to values implied by requested
@@ -563,8 +544,6 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
unsigned MinImpliedByFlatWorkGroupSize =
getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
Default.first = MinImpliedByFlatWorkGroupSize;
- bool RequestedFlatWorkGroupSize =
- F.hasFnAttribute("amdgpu-flat-work-group-size");
// Requested minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
@@ -581,8 +560,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
// Make sure requested values are compatible with values implied by requested
// minimum/maximum flat work group sizes.
- if (RequestedFlatWorkGroupSize &&
- Requested.first < MinImpliedByFlatWorkGroupSize)
+ if (Requested.first < MinImpliedByFlatWorkGroupSize)
return Default;
return Requested;
@@ -710,6 +688,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
if (ImplicitBytes != 0) {
const Align Alignment = getAlignmentForImplicitArgPtr();
TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ MaxAlign = std::max(MaxAlign, Alignment);
}
// Being able to dereference past the end is useful for emitting scalar loads.
@@ -721,23 +700,6 @@ AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const {
: AMDGPUDwarfFlavour::Wave64;
}
-R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM) :
- R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/GPU, FS),
- AMDGPUSubtarget(TT),
- InstrInfo(*this),
- FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- FMA(false),
- CaymanISA(false),
- CFALUBug(false),
- HasVertexCache(false),
- R600ALUInst(false),
- FP64(false),
- TexVTXClauseSize(0),
- Gen(R600),
- TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
- InstrItins(getInstrItineraryForCPU(GPU)) { }
-
void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
// Track register pressure so the scheduler can try to decrease
@@ -805,7 +767,7 @@ GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
- if (HasFlatScratchInit) {
+ if (HasFlatScratchInit || HasArchitectedFlatScratch) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
@@ -1003,6 +965,13 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
--Lat;
}
Dep.setLatency(Lat);
+ } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {
+ // Work around the fact that SIInstrInfo::fixImplicitOperands modifies
+ // implicit operands which come from the MCInstrDesc, which can fool
+ // ScheduleDAGInstrs::addPhysRegDataDeps into treating them as implicit
+ // pseudo operands.
+ Dep.setLatency(InstrInfo.getSchedModel().computeOperandLatency(
+ DefI, DefOpIdx, UseI, UseOpIdx));
}
}
@@ -1052,7 +1021,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
return true;
}
- // Link as much SALU intructions in chain as possible. Return the size
+ // Link as many SALU instructions in chain as possible. Return the size
// of the chain. Links up to MaxChain instructions.
unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
SmallPtrSetImpl<SUnit *> &Visited) const {
@@ -1136,6 +1105,11 @@ void GCNSubtarget::getPostRAMutations(
Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
+std::unique_ptr<ScheduleDAGMutation>
+GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
+ return std::make_unique<FillMFMAShadowMutation>(&InstrInfo);
+}
+
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index b160cdf3a97a..88ed4b2b7a24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -91,7 +91,18 @@ public:
/// be converted to integer, violate subtarget's specifications, or are not
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
- std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+ std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
+ // Default/requested minimum/maximum flat work group sizes.
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
+ return getWavesPerEU(F, FlatWorkGroupSizes);
+ }
+
+ /// Overload which uses the specified values for the flat work group sizes,
+ /// rather than querying the function itself. \p FlatWorkGroupSizes Should
+ /// correspond to the function's value for getFlatWorkGroupSizes.
+ std::pair<unsigned, unsigned>
+ getWavesPerEU(const Function &F,
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
@@ -240,7 +251,7 @@ public:
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
- /// \returns Corresponsing DWARF register number mapping flavour for the
+ /// \returns Corresponding DWARF register number mapping flavour for the
/// \p WavefrontSize.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e4485f87fb79..de11676279f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -8,7 +8,7 @@
//
/// \file
/// The AMDGPU target machine contains all of the hardware specific
-/// information needed to emit code for R600 and SI GPUs.
+/// information needed to emit code for SI+ GPUs.
//
//===----------------------------------------------------------------------===//
@@ -21,7 +21,8 @@
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
-#include "R600MachineScheduler.h"
+#include "R600.h"
+#include "R600TargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
@@ -35,11 +36,13 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -162,12 +165,6 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
}
-
-static cl::opt<bool> EnableR600StructurizeCFG(
- "r600-ir-structurize",
- cl::desc("Use StructurizeCFG IR pass"),
- cl::init(true));
-
static cl::opt<bool> EnableSROA(
"amdgpu-sroa",
cl::desc("Run SROA after promote alloca pass"),
@@ -184,12 +181,6 @@ OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
cl::desc("Run pre-RA exec mask optimizations"),
cl::init(true));
-static cl::opt<bool> EnableR600IfConvert(
- "r600-if-convert",
- cl::desc("Use if conversion pass"),
- cl::ReallyHidden,
- cl::init(true));
-
// Option to disable vectorizer for tests.
static cl::opt<bool> EnableLoadStoreVectorizer(
"amdgpu-load-store-vectorizer",
@@ -240,13 +231,6 @@ static cl::opt<bool, true> LateCFGStructurize(
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
cl::Hidden);
-static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
- "amdgpu-function-calls",
- cl::desc("Enable AMDGPU function call support"),
- cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
- cl::init(true),
- cl::Hidden);
-
static cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt(
"amdgpu-fixed-function-abi",
cl::desc("Enable all implicit function arguments"),
@@ -324,6 +308,11 @@ static cl::opt<bool> EnablePreRAOptimizations(
cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnablePromoteKernelArguments(
+ "amdgpu-enable-promote-kernel-arguments",
+ cl::desc("Enable promotion of flat kernel pointer arguments to global"),
+ cl::Hidden, cl::init(true));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -349,6 +338,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIOptimizeVGPRLiveRangePass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUFixFunctionBitcastsPass(*PR);
+ initializeAMDGPUCtorDtorLoweringPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAttributorPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
@@ -356,6 +346,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUArgumentUsageInfoPass(*PR);
initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
+ initializeAMDGPUPromoteKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
@@ -400,10 +391,6 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return std::make_unique<AMDGPUTargetObjectFile>();
}
-static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
-}
-
static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
return new SIScheduleDAGMI(C);
}
@@ -441,10 +428,6 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) {
}
static MachineSchedRegistry
-R600SchedRegistry("r600", "Run R600's custom scheduler",
- createR600MachineScheduler);
-
-static MachineSchedRegistry
SISchedRegistry("si", "Run SI's custom scheduler",
createSIMachineScheduler);
@@ -542,7 +525,9 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
/// Predicate for Internalize pass.
static bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
- return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
+ return F->isDeclaration() || F->getName().startswith("__asan_") ||
+ F->getName().startswith("__sanitizer_") ||
+ AMDGPU::isEntryFunctionCC(F->getCallingConv());
GV.removeDeadConstantUsers();
return !GV.use_empty();
@@ -556,6 +541,8 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
+ bool PromoteKernelArguments =
+ EnablePromoteKernelArguments && getOptLevel() > CodeGenOpt::Less;
if (EnableFunctionCalls) {
delete Builder.Inliner;
@@ -597,7 +584,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.addExtension(
PassManagerBuilder::EP_CGSCCOptimizerLate,
- [EnableOpt](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ [EnableOpt, PromoteKernelArguments](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+ // Add promote kernel arguments pass to the opt pipeline right before
+ // infer address spaces which is needed to do actual address space
+ // rewriting.
+ if (PromoteKernelArguments)
+ PM.add(createAMDGPUPromoteKernelArgumentsPass());
+
// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
PM.add(createInferAddressSpacesPass());
@@ -674,6 +668,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
return true;
}
+ if (PassName == "amdgpu-promote-kernel-arguments") {
+ PM.addPass(AMDGPUPromoteKernelArgumentsPass());
+ return true;
+ }
return false;
});
@@ -690,19 +688,18 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineStartEPCallback(
- [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
FPM.addPass(AMDGPUUseNativeCallsPass());
- if (EnableLibCallSimplify &&
- Level != PassBuilder::OptimizationLevel::O0)
+ if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
- if (Level == PassBuilder::OptimizationLevel::O0)
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0)
return;
PM.addPass(AMDGPUUnifyMetadataPass());
@@ -720,12 +717,19 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerCGSCCOptimizerLateEPCallback(
- [this](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
- if (Level == PassBuilder::OptimizationLevel::O0)
+ [this](CGSCCPassManager &PM, OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0)
return;
FunctionPassManager FPM;
+ // Add promote kernel arguments pass to the opt pipeline right before
+ // infer address spaces which is needed to do actual address space
+ // rewriting.
+ if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
+ EnablePromoteKernelArguments)
+ FPM.addPass(AMDGPUPromoteKernelArgumentsPass());
+
// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
FPM.addPass(InferAddressSpacesPass());
@@ -734,7 +738,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
// anything, and before other cleanup optimizations.
FPM.addPass(AMDGPULowerKernelAttributesPass());
- if (Level != PassBuilder::OptimizationLevel::O0) {
+ if (Level != OptimizationLevel::O0) {
// Promote alloca to vector before SROA and loop unroll. If we
// manage to eliminate allocas before unroll we may choose to unroll
// less.
@@ -745,45 +749,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
}
-//===----------------------------------------------------------------------===//
-// R600 Target Machine (R600 -> Cayman)
-//===----------------------------------------------------------------------===//
-
-R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Optional<Reloc::Model> RM,
- Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
- setRequiresStructuredCFG(true);
-
- // Override the default since calls aren't supported for r600.
- if (EnableFunctionCalls &&
- EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
- EnableFunctionCalls = false;
-}
-
-const R600Subtarget *R600TargetMachine::getSubtargetImpl(
- const Function &F) const {
- StringRef GPU = getGPUName(F);
- StringRef FS = getFeatureString(F);
-
- SmallString<128> SubtargetKey(GPU);
- SubtargetKey.append(FS);
-
- auto &I = SubtargetMap[SubtargetKey];
- if (!I) {
- // This needs to be done before we create a new subtarget since any
- // creation will depend on the TM and the code generation flags on the
- // function that reside in TargetOptions.
- resetTargetOptions(F);
- I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
- }
-
- return I.get();
-}
-
int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
@@ -817,9 +782,31 @@ unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
return AMDGPUAS::GLOBAL_ADDRESS;
}
-TargetTransformInfo
-R600TargetMachine::getTargetTransformInfo(const Function &F) {
- return TargetTransformInfo(R600TTIImpl(this, F));
+std::pair<const Value *, unsigned>
+AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+ if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_is_shared:
+ return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
+ case Intrinsic::amdgcn_is_private:
+ return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
+ default:
+ break;
+ }
+ return std::make_pair(nullptr, -1);
+ }
+ // Check the global pointer predication based on
+ // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
+ // the order of 'is_shared' and 'is_private' is not significant.
+ Value *Ptr;
+ if (match(
+ const_cast<Value *>(V),
+ m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
+ m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
+ m_Deferred(Ptr))))))
+ return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
+
+ return std::make_pair(nullptr, -1);
}
//===----------------------------------------------------------------------===//
@@ -834,7 +821,8 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
+const TargetSubtargetInfo *
+GCNTargetMachine::getSubtargetImpl(const Function &F) const {
StringRef GPU = getGPUName(F);
StringRef FS = getFeatureString(F);
@@ -864,76 +852,11 @@ GCNTargetMachine::getTargetTransformInfo(const Function &F) {
// AMDGPU Pass Setup
//===----------------------------------------------------------------------===//
-namespace {
-
-class AMDGPUPassConfig : public TargetPassConfig {
-public:
- AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- // Exceptions and StackMaps are not supported, so these passes will never do
- // anything.
- disablePass(&StackMapLivenessID);
- disablePass(&FuncletLayoutID);
- // Garbage collection is not supported.
- disablePass(&GCLoweringID);
- disablePass(&ShadowStackGCLoweringID);
- }
-
- AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
- return getTM<AMDGPUTargetMachine>();
- }
-
- ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const override {
- ScheduleDAGMILive *DAG = createGenericSchedLive(C);
- DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
- return DAG;
- }
-
- void addEarlyCSEOrGVNPass();
- void addStraightLineScalarOptimizationPasses();
- void addIRPasses() override;
- void addCodeGenPrepare() override;
- bool addPreISel() override;
- bool addInstSelector() override;
- bool addGCPasses() override;
-
- std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
-
- /// Check if a pass is enabled given \p Opt option. The option always
- /// overrides defaults if explicitely used. Otherwise its default will
- /// be used given that a pass shall work at an optimization \p Level
- /// minimum.
- bool isPassEnabled(const cl::opt<bool> &Opt,
- CodeGenOpt::Level Level = CodeGenOpt::Default) const {
- if (Opt.getNumOccurrences())
- return Opt;
- if (TM->getOptLevel() < Level)
- return false;
- return Opt;
- }
-};
-
-std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
+std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
return getStandardCSEConfigForOpt(TM->getOptLevel());
}
-class R600PassConfig final : public AMDGPUPassConfig {
-public:
- R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
- : AMDGPUPassConfig(TM, PM) {}
-
- ScheduleDAGInstrs *createMachineScheduler(
- MachineSchedContext *C) const override {
- return createR600MachineScheduler(C);
- }
-
- bool addPreISel() override;
- bool addInstSelector() override;
- void addPreRegAlloc() override;
- void addPreSched2() override;
- void addPreEmitPass() override;
-};
+namespace {
class GCNPassConfig final : public AMDGPUPassConfig {
public:
@@ -943,6 +866,7 @@ public:
// allow calls without EnableAMDGPUFunctionCalls if they are marked
// noinline, so this is always required.
setRequiresCodeGenSCCOrder(true);
+ substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
}
GCNTargetMachine &getGCNTargetMachine() const {
@@ -952,6 +876,15 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override;
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
+ return DAG;
+ }
+
bool addPreISel() override;
void addMachineSSAOptimization() override;
bool addILPOpts() override;
@@ -982,6 +915,17 @@ public:
} // end anonymous namespace
+AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {
+ // Exceptions and StackMaps are not supported, so these passes will never do
+ // anything.
+ disablePass(&StackMapLivenessID);
+ disablePass(&FuncletLayoutID);
+ // Garbage collection is not supported.
+ disablePass(&GCLoweringID);
+ disablePass(&ShadowStackGCLoweringID);
+}
+
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
if (getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGVNPass());
@@ -993,7 +937,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createLICMPass());
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
- // ReassociateGEPs exposes more opportunites for SLSR. See
+ // ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
@@ -1015,6 +959,7 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&PatchableFunctionID);
addPass(createAMDGPUPrintfRuntimeBinding());
+ addPass(createAMDGPUCtorDtorLoweringPass());
// This must occur before inlining, as the inliner will not look through
// bitcast calls.
@@ -1100,8 +1045,13 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- if (TM->getTargetTriple().getArch() == Triple::amdgcn)
+ if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
+ addPass(createAMDGPUAttributorPass());
+
+ // FIXME: This pass adds 2 hacky attributes that can be replaced with an
+ // analysis, and should be removed.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
+ }
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
EnableLowerKernelArguments)
@@ -1126,8 +1076,7 @@ bool AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
- // Defer the verifier until FinalizeISel.
- addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
+ addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
return false;
}
@@ -1136,44 +1085,11 @@ bool AMDGPUPassConfig::addGCPasses() {
return false;
}
-//===----------------------------------------------------------------------===//
-// R600 Pass Setup
-//===----------------------------------------------------------------------===//
-
-bool R600PassConfig::addPreISel() {
- AMDGPUPassConfig::addPreISel();
-
- if (EnableR600StructurizeCFG)
- addPass(createStructurizeCFGPass());
- return false;
-}
-
-bool R600PassConfig::addInstSelector() {
- addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
- return false;
-}
-
-void R600PassConfig::addPreRegAlloc() {
- addPass(createR600VectorRegMerger());
-}
-
-void R600PassConfig::addPreSched2() {
- addPass(createR600EmitClauseMarkers(), false);
- if (EnableR600IfConvert)
- addPass(&IfConverterID, false);
- addPass(createR600ClauseMergePass(), false);
-}
-
-void R600PassConfig::addPreEmitPass() {
- addPass(createAMDGPUCFGStructurizerPass(), false);
- addPass(createR600ExpandSpecialInstrsPass(), false);
- addPass(&FinalizeMachineBundlesID, false);
- addPass(createR600Packetizer(), false);
- addPass(createR600ControlFlowFinalizer(), false);
-}
-
-TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new R600PassConfig(*this, PM);
+llvm::ScheduleDAGInstrs *
+AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
}
//===----------------------------------------------------------------------===//
@@ -1311,7 +1227,7 @@ void GCNPassConfig::addFastRegAlloc() {
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
+ insertPass(&PHIEliminationID, &SILowerControlFlowID);
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
@@ -1341,11 +1257,11 @@ void GCNPassConfig::addOptimizedRegAlloc() {
// the register in LiveVariables, this would trigger a failure in verifier,
// we should fix it and enable the verifier.
if (OptVGPRLiveRange)
- insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID, false);
+ insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID);
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
+ insertPass(&PHIEliminationID, &SILowerControlFlowID);
if (EnableDCEInRA)
insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
@@ -1418,7 +1334,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// Commit allocated register changes. This is mostly necessary because too
// many things rely on the use lists of the physical registers, such as the
// verifier. This is only necessary with allocators which use LiveIntervals,
- // since FastRegAlloc does the replacments itself.
+ // since FastRegAlloc does the replacements itself.
addPass(createVirtRegRewriter(false));
// Equivalent of PEI for SGPRs.
@@ -1440,6 +1356,8 @@ void GCNPassConfig::addPostRegAlloc() {
}
void GCNPassConfig::addPreSched2() {
+ if (TM->getOptLevel() > CodeGenOpt::None)
+ addPass(createSIShrinkInstructionsPass());
addPass(&SIPostRABundlerID);
}
@@ -1447,9 +1365,6 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
- if (TM->getOptLevel() > CodeGenOpt::None)
- addPass(createSIShrinkInstructionsPass());
-
addPass(createSIModeRegisterPass());
if (getOptLevel() > CodeGenOpt::None)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 1bfe026d080c..0ff2db2a52d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -15,11 +15,14 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
#include "GCNSubtarget.h"
-#include "R600Subtarget.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Target/TargetMachine.h"
+#include <utility>
namespace llvm {
+class ScheduleDAGMILive;
+
//===----------------------------------------------------------------------===//
// AMDGPU Target Machine (R600+)
//===----------------------------------------------------------------------===//
@@ -61,31 +64,9 @@ public:
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
unsigned getAssumedAddrSpace(const Value *V) const override;
-};
-
-//===----------------------------------------------------------------------===//
-// R600 Target Machine (R600 -> Cayman)
-//===----------------------------------------------------------------------===//
-
-class R600TargetMachine final : public AMDGPUTargetMachine {
-private:
- mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
-public:
- R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, TargetOptions Options,
- Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
-
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
- const R600Subtarget *getSubtargetImpl(const Function &) const override;
-
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
-
- bool isMachineVerifierClean() const override {
- return false;
- }
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override;
};
//===----------------------------------------------------------------------===//
@@ -104,7 +85,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- const GCNSubtarget *getSubtargetImpl(const Function &) const override;
+ const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override;
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
@@ -121,6 +102,45 @@ public:
SMRange &SourceRange) const override;
};
+//===----------------------------------------------------------------------===//
+// AMDGPU Pass Setup
+//===----------------------------------------------------------------------===//
+
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM);
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override;
+
+ void addEarlyCSEOrGVNPass();
+ void addStraightLineScalarOptimizationPasses();
+ void addIRPasses() override;
+ void addCodeGenPrepare() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addGCPasses() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
+
+ /// Check if a pass is enabled given \p Opt option. The option always
+ /// overrides defaults if explicitly used. Otherwise its default will
+ /// be used given that a pass shall work at an optimization \p Level
+ /// minimum.
+ bool isPassEnabled(const cl::opt<bool> &Opt,
+ CodeGenOpt::Level Level = CodeGenOpt::Default) const {
+ if (Opt.getNumOccurrences())
+ return Opt;
+ if (TM->getOptLevel() < Level)
+ return false;
+ return Opt;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 63f449f7a726..ecdbdf613a53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -16,10 +16,11 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
@@ -101,7 +102,8 @@ AMDGPUTTIImpl::AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
TLI(ST->getTargetLowering()) {}
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
const Function &F = *L->getHeader()->getParent();
UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300);
UP.MaxCount = std::numeric_limits<unsigned>::max();
@@ -503,7 +505,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
Info.ReadMem = true;
Info.WriteMem = true;
- Info.IsVolatile = !Volatile->isNullValue();
+ Info.IsVolatile = !Volatile->isZero();
return true;
}
default:
@@ -1224,8 +1226,9 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
}
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
- CommonTTI.getUnrollingPreferences(L, SE, UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
}
void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
@@ -1239,122 +1242,3 @@ int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const {
: ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)
: getQuarterRateInstrCost(CostKind);
}
-
-R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
- TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
-
-unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
- return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
-}
-
-unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
- return getHardwareNumberOfRegisters(Vec);
-}
-
-TypeSize
-R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
- return TypeSize::getFixed(32);
-}
-
-unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
- return 32;
-}
-
-unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
- AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
- return 128;
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS)
- return 64;
- if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
- return 32;
-
- if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
- AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
- (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
- AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
- return 128;
- llvm_unreachable("unhandled address space");
-}
-
-bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
- Align Alignment,
- unsigned AddrSpace) const {
- // We allow vectorization of flat stores, even though we may need to decompose
- // them later if they may access private memory. We don't have enough context
- // here, and legalization can handle it.
- return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
-}
-
-bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- Align Alignment,
- unsigned AddrSpace) const {
- return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
-}
-
-bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- Align Alignment,
- unsigned AddrSpace) const {
- return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
-}
-
-unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
- // Disable unrolling if the loop is not vectorized.
- // TODO: Enable this again.
- if (VF == 1)
- return 1;
-
- return 8;
-}
-
-InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind,
- const Instruction *I) {
- if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
- return Opcode == Instruction::PHI ? 0 : 1;
-
- // XXX - For some reason this isn't called for switch.
- switch (Opcode) {
- case Instruction::Br:
- case Instruction::Ret:
- return 10;
- default:
- return BaseT::getCFInstrCost(Opcode, CostKind, I);
- }
-}
-
-InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index) {
- switch (Opcode) {
- case Instruction::ExtractElement:
- case Instruction::InsertElement: {
- unsigned EltSize
- = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
- if (EltSize < 32) {
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
- }
-
- // Extracts are just reads of a subregister, so are free. Inserts are
- // considered free because we don't want to have any cost for scalarizing
- // operations, and we don't have to copy into a different register class.
-
- // Dynamic indexing isn't free and is best avoided.
- return Index == ~0u ? 2 : 0;
- }
- default:
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
- }
-}
-
-void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
- CommonTTI.getUnrollingPreferences(L, SE, UP);
-}
-
-void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::PeelingPreferences &PP) {
- CommonTTI.getPeelingPreferences(L, SE, PP);
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 37c0756eb7a8..e901b5c5747d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -18,18 +18,14 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
namespace llvm {
-class AMDGPUTargetLowering;
class AMDGPUTargetMachine;
class GCNSubtarget;
class InstCombiner;
class Loop;
-class R600Subtarget;
class ScalarEvolution;
class SITargetLowering;
class Type;
@@ -53,7 +49,8 @@ public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -82,24 +79,21 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
return TargetTransformInfo::TCC_Basic;
}
- static inline int getHalfRateInstrCost(
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
return CostKind == TTI::TCK_CodeSize ? 2
: 2 * TargetTransformInfo::TCC_Basic;
}
// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
// should be 2 or 4.
- static inline int getQuarterRateInstrCost(
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
return CostKind == TTI::TCK_CodeSize ? 2
: 4 * TargetTransformInfo::TCC_Basic;
}
// On some parts, normal fp64 operations are half rate, and others
// quarter. This also applies to some integer operations.
- int get64BitInstrCost(
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+ int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
public:
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
@@ -108,7 +102,8 @@ public:
bool useGPUDivergenceAnalysis() const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -154,8 +149,7 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -184,6 +178,12 @@ public:
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const;
+
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+ return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
+ AS != AMDGPUAS::PRIVATE_ADDRESS;
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
@@ -213,51 +213,13 @@ public:
InstructionCost getArithmeticReductionCost(
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
+ TTI::TargetCostKind CostKind);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
InstructionCost getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
-};
-
-class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
- using BaseT = BasicTTIImplBase<R600TTIImpl>;
- using TTI = TargetTransformInfo;
-
- friend BaseT;
-
- const R600Subtarget *ST;
- const AMDGPUTargetLowering *TLI;
- AMDGPUTTIImpl CommonTTI;
-
-public:
- explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
-
- const R600Subtarget *getST() const { return ST; }
- const AMDGPUTargetLowering *getTLI() const { return TLI; }
-
- void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
- void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::PeelingPreferences &PP);
- unsigned getHardwareNumberOfRegisters(bool Vec) const;
- unsigned getNumberOfRegisters(bool Vec) const;
- TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
- unsigned getMinVectorRegisterBitWidth() const;
- unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
- bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
- unsigned AddrSpace) const;
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
- unsigned AddrSpace) const;
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
- unsigned AddrSpace) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
- InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index);
+ TTI::TargetCostKind CostKind);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 4e3d5fdc012d..c6751f98fe6a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -232,7 +232,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
} else { // Conditional branch.
- SmallVector<BasicBlock *, 2> Successors(succ_begin(BB), succ_end(BB));
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 56befe4ed0d0..1a9255f3240f 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -6,8 +6,8 @@
//
//==-----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600RegisterInfo.h"
#include "R600Subtarget.h"
#include "llvm/ADT/SCCIterator.h"
@@ -127,6 +127,10 @@ public:
bool prepare();
bool runOnMachineFunction(MachineFunction &MF) override {
+ // FIXME: This pass causes verification failures.
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
+
TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
TRI = &TII->getRegisterInfo();
LLVM_DEBUG(MF.dump(););
@@ -245,7 +249,7 @@ protected:
int loopendPatternMatch();
int mergeLoop(MachineLoop *LoopRep);
- /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
+ /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in
/// the same loop with LoopLandInfo without explicitly keeping track of
/// loopContBlks and loopBreakBlks, this is a method to get the information.
bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
@@ -571,12 +575,9 @@ bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
//get DebugLoc from the first MachineBasicBlock instruction with debug info
DebugLoc DL;
- for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
- ++It) {
- MachineInstr *instr = &(*It);
- if (instr->getDebugLoc())
- DL = instr->getDebugLoc();
- }
+ for (MachineInstr &MI : *MBB)
+ if (MI.getDebugLoc())
+ DL = MI.getDebugLoc();
return DL;
}
@@ -617,7 +618,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
MachineInstr *MI = getReturnInstr(MBB);
- bool IsReturn = (MBB->succ_size() == 0);
+ bool IsReturn = MBB->succ_empty();
if (MI)
assert(IsReturn);
else if (IsReturn)
@@ -628,9 +629,8 @@ bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
MachineBasicBlock *SrcMBB) {
- for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
- iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
- DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
+ for (MachineBasicBlock *Succ : SrcMBB->successors())
+ DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of
}
MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
@@ -808,7 +808,7 @@ bool AMDGPUCFGStructurizer::run() {
MachineBasicBlock *EntryMBB =
*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
- if (EntryMBB->succ_size() == 0) {
+ if (EntryMBB->succ_empty()) {
Finish = true;
LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
} else {
@@ -1054,7 +1054,7 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
- if (Src1MBB->succ_size() == 0) {
+ if (Src1MBB->succ_empty()) {
MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
@@ -1319,12 +1319,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
insertInstrBefore(I, R600::ENDIF);
// put initReg = 2 to other predecessors of landBlk
- for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
- PE = LandBlk->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *MBB = *PI;
+ for (MachineBasicBlock *MBB : LandBlk->predecessors())
if (MBB != TrueMBB && MBB != FalseMBB)
report_fatal_error("Extra register needed to handle CFG");
- }
}
LLVM_DEBUG(
dbgs() << "result from improveSimpleJumpintoIf: ";
@@ -1393,7 +1390,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MBB->splice(I, FalseMBB, FalseMBB->begin(),
FalseMBB->end());
MBB->removeSuccessor(FalseMBB, true);
- if (LandMBB && FalseMBB->succ_size() != 0)
+ if (LandMBB && !FalseMBB->succ_empty())
FalseMBB->removeSuccessor(LandMBB, true);
retireBlock(FalseMBB);
MLI->removeBlock(FalseMBB);
@@ -1639,8 +1636,7 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
SrcBlkInfo = new BlockInformation();
SrcBlkInfo->IsRetired = true;
- assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
- && "can't retire block yet");
+ assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet");
}
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 00032c7d4ea5..4acd77a9d5d2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -28,12 +28,12 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -1542,7 +1542,7 @@ private:
bool validateOpSel(const MCInst &Inst);
bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
bool validateVccOperand(unsigned Reg) const;
- bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+ bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
@@ -1715,6 +1715,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -1723,6 +1724,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1732,6 +1734,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -1742,6 +1745,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_KIMM16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
@@ -2017,12 +2021,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -2036,7 +2042,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
- case AMDGPU::OPERAND_REG_IMM_V2INT32: {
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -2062,6 +2070,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -2101,6 +2110,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -2128,6 +2138,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
Inst.addOperand(MCOperand::createImm(Val));
return;
}
+ case AMDGPU::OPERAND_KIMM32:
+ Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+ setImmKindNone();
+ return;
+ case AMDGPU::OPERAND_KIMM16:
+ Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
+ setImmKindNone();
+ return;
default:
llvm_unreachable("invalid operand size");
}
@@ -3250,7 +3268,8 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
SIInstrFlags::SDWA)) {
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
- ++ConstantBusUseCount;
+ ++NumLiterals;
+ LiteralSize = 4;
}
SmallDenseSet<unsigned> SGPRsUsed;
@@ -3290,7 +3309,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
// An instruction may use only one literal.
// This has been validated on the previous step.
- // See validateVOP3Literal.
+ // See validateVOPLiteral.
// This literal may be used as more than one operand.
// If all these operands are of the same size,
// this literal counts as one scalar value.
@@ -3981,26 +4000,29 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
(FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
}
-// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
- const OperandVector &Operands) {
+// One unique literal can be used. VOP3 literal is only allowed in GFX10+
+bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
+ const OperandVector &Operands) {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
- if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+ const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+ if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
+ ImmIdx == -1)
return true;
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
- const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+ const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
unsigned NumExprs = 0;
unsigned NumLiterals = 0;
uint32_t LiteralValue;
for (int OpIdx : OpIndices) {
- if (OpIdx == -1) break;
+ if (OpIdx == -1)
+ continue;
const MCOperand &MO = Inst.getOperand(OpIdx);
if (!MO.isImm() && !MO.isExpr())
@@ -4030,7 +4052,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
if (!NumLiterals)
return true;
- if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+ if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
Error(getLitLoc(Operands), "literal operands are not supported");
return false;
}
@@ -4202,7 +4224,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"only one literal operand is allowed");
return false;
}
- if (!validateVOP3Literal(Inst, Operands)) {
+ if (!validateVOPLiteral(Inst, Operands)) {
return false;
}
if (!validateConstantBusLimitations(Inst, Operands)) {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 5f43aa8388ee..d3644db7cf8b 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -55,10 +55,6 @@ class MTBUFGetBaseOpcode<string Op> {
!subst("FORMAT_XYZW", "FORMAT_X", Op)));
}
-class getMTBUFElements<string Op> {
- int ret = 1;
-}
-
class MTBUF_Pseudo <string opName, dag outs, dag ins,
string asmOps, list<dag> pattern=[]> :
@@ -223,8 +219,7 @@ class MTBUF_Load_Pseudo <string opName,
}
multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
- int elems, ValueType load_vt = i32,
- SDPatternOperator ld = null_frag> {
+ int elems> {
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>,
MTBUFAddr64Table<0, NAME>;
@@ -265,8 +260,7 @@ class MTBUF_Store_Pseudo <string opName,
}
multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
- int elems, ValueType store_vt = i32,
- SDPatternOperator st = null_frag> {
+ int elems> {
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>,
MTBUFAddr64Table<0, NAME>;
@@ -541,7 +535,6 @@ multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPa
// opcode because it needs an N+1 register class dest register.
multiclass MUBUF_Pseudo_Loads<string opName,
ValueType load_vt = i32,
- SDPatternOperator ld = null_frag,
bit TiedDest = 0,
bit isLds = 0> {
@@ -565,11 +558,9 @@ multiclass MUBUF_Pseudo_Loads<string opName,
}
}
-multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32,
- SDPatternOperator ld_nolds = null_frag,
- SDPatternOperator ld_lds = null_frag> {
- defm NAME : MUBUF_Pseudo_Loads<opName, load_vt, ld_nolds>;
- defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, ld_lds, 0, 1>;
+multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> {
+ defm NAME : MUBUF_Pseudo_Loads<opName, load_vt>;
+ defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
}
class MUBUF_Store_Pseudo <string opName,
@@ -742,7 +733,6 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic,
bit isFP = isFloatType<vdataType>.ret> {
let FPAtomic = isFP in
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
@@ -796,7 +786,7 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
RegisterClass vdataClass,
ValueType vdataType,
SDPatternOperator atomic> :
- MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType, atomic>,
+ MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType>,
MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
@@ -924,13 +914,13 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
// in at least GFX8+ chips. See Bug 37653.
let SubtargetPredicate = isGFX8GFX9 in {
defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx2", v2i32, null_frag, 0, 1
+ "buffer_load_dwordx2", v2i32, 0, 1
>;
defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx3", v3i32, null_frag, 0, 1
+ "buffer_load_dwordx3", v3i32, 0, 1
>;
defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx4", v4i32, null_frag, 0, 1
+ "buffer_load_dwordx4", v4i32, 0, 1
>;
}
@@ -1076,27 +1066,27 @@ defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
let SubtargetPredicate = HasD16LoadStore in {
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_ubyte_d16", i32, null_frag, 1
+ "buffer_load_ubyte_d16", i32, 1
>;
defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_ubyte_d16_hi", i32, null_frag, 1
+ "buffer_load_ubyte_d16_hi", i32, 1
>;
defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_sbyte_d16", i32, null_frag, 1
+ "buffer_load_sbyte_d16", i32, 1
>;
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_sbyte_d16_hi", i32, null_frag, 1
+ "buffer_load_sbyte_d16_hi", i32, 1
>;
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_short_d16", i32, null_frag, 1
+ "buffer_load_short_d16", i32, 1
>;
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_short_d16_hi", i32, null_frag, 1
+ "buffer_load_short_d16_hi", i32, 1
>;
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores <
@@ -1121,10 +1111,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
let SubtargetPredicate = HasAtomicFaddInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_noret_32
+ "buffer_atomic_add_f32", VGPR_32, f32
>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_noret_32
+ "buffer_atomic_pk_add_f16", VGPR_32, v2f16
>;
let OtherPredicates = [isGFX90APlus] in {
@@ -1438,6 +1428,13 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f32, "BUFFER_ATOMIC_FMIN">;
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f32, "BUFFER_ATOMIC_FMAX">;
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_FMIN_X2">;
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_FMAX_X2">;
+}
+
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
(ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5, node:$src6, node:$src7),
(vt (Op $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7)),
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index ad9528ece7d0..104b5160b985 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -428,11 +428,10 @@ defm DS_AND_B32 : DS_1A1D_NORET_mc<"ds_and_b32">;
defm DS_OR_B32 : DS_1A1D_NORET_mc<"ds_or_b32">;
defm DS_XOR_B32 : DS_1A1D_NORET_mc<"ds_xor_b32">;
-let SubtargetPredicate = HasLDSFPAtomics in {
+let SubtargetPredicate = HasLDSFPAtomicAdd in {
defm DS_ADD_F32 : DS_1A1D_NORET_mc<"ds_add_f32">;
}
-// FIXME: Are these really present pre-gfx8?
defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
@@ -493,7 +492,7 @@ defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
-let SubtargetPredicate = HasLDSFPAtomics in {
+let SubtargetPredicate = HasLDSFPAtomicAdd in {
defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
}
defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
@@ -665,7 +664,7 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
} // let SubtargetPredicate = isGFX8Plus
-let SubtargetPredicate = HasLDSFPAtomics, OtherPredicates = [HasDsSrc2Insts] in {
+let SubtargetPredicate = HasLDSFPAtomicAdd, OtherPredicates = [HasDsSrc2Insts] in {
def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
}
@@ -715,6 +714,10 @@ foreach vt = Reg32Types.types in {
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
}
+defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
+defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
@@ -775,6 +778,10 @@ foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_local_8">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_local_8">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_local_16">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
@@ -933,11 +940,11 @@ defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
-
-let SubtargetPredicate = HasLDSFPAtomics in {
defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+
+let SubtargetPredicate = HasLDSFPAtomicAdd in {
defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
}
@@ -954,6 +961,8 @@ defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max">;
defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_F64, f64, "atomic_load_fmin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_F64, f64, "atomic_load_fmax">;
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fe62b8590fa0..e2186d4d533e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -25,8 +25,9 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -264,6 +265,34 @@ static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
}
+static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(
+ Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
+}
+
+static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(
+ Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
+}
+
static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
const MCRegisterInfo *MRI) {
if (OpIdx < 0)
@@ -626,6 +655,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
+ int ImmLitIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+ if (Res && ImmLitIdx != -1)
+ Res = convertFMAanyK(MI, ImmLitIdx);
+
// if the opcode was not recognized we'll assume a Size of 4 bytes
// (unless there are fewer bytes left)
Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -693,22 +727,21 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::d16);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+
assert(VDataIdx != -1);
- if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
+ if (BaseOpcode->BVH) {
+ // Add A16 operand for intersect_ray instructions
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
- assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
addOperand(MI, MCOperand::createImm(1));
}
return MCDisassembler::Success;
}
- const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
-
bool IsNSA = false;
unsigned AddrSize = Info->VAddrDwords;
@@ -717,8 +750,6 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
int A16Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
- const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
const AMDGPU::MIMGDimInfo *Dim =
AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
@@ -813,6 +844,24 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
return MCDisassembler::Success;
}
+DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
+ int ImmLitIdx) const {
+ assert(HasLiteral && "Should have decoded a literal");
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+ unsigned DescNumOps = Desc.getNumOperands();
+ assert(DescNumOps == MI.getNumOperands());
+ for (unsigned I = 0; I < DescNumOps; ++I) {
+ auto &Op = MI.getOperand(I);
+ auto OpType = Desc.OpInfo[I].OperandType;
+ bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
+ OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
+ if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
+ IsDeferredOp)
+ Op.setImm(Literal);
+ }
+ return MCDisassembler::Success;
+}
+
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
return getContext().getRegisterInfo()->
getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -1022,6 +1071,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
return decodeDstOp(OPW512, Val);
}
+// Decode Literals for insts which always have a literal in the encoding
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
+ if (HasLiteral) {
+ if (Literal != Val)
+ return errOperand(Val, "More than one unique literal is illegal");
+ }
+ HasLiteral = true;
+ Literal = Val;
+ return MCOperand::createImm(Literal);
+}
+
MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
@@ -1235,7 +1296,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
}
-MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
+ bool MandatoryLiteral) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1264,8 +1326,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
return decodeFPImmed(Width, Val);
- if (Val == LITERAL_CONST)
- return decodeLiteralConstant();
+ if (Val == LITERAL_CONST) {
+ if (MandatoryLiteral)
+ // Keep a sentinel value for deferred setting
+ return MCOperand::createImm(LITERAL_CONST);
+ else
+ return decodeLiteralConstant();
+ }
switch (Width) {
case OPW32:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index dc879ec5ad88..eea6074d5281 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -87,6 +87,7 @@ public:
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;
+ DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
@@ -150,9 +151,11 @@ public:
static MCOperand decodeIntImmed(unsigned Imm);
static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+ MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeLiteralConstant() const;
- MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
+ bool MandatoryLiteral = false) const;
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 596c3d7baea0..12224cb3f797 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -303,16 +303,16 @@ def : EGPat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
let SubtargetPredicate = isEGorCayman in {
-multiclass AtomicPat<Instruction inst_ret, Instruction inst_noret,
- SDPatternOperator node_ret, SDPatternOperator node_noret> {
+multiclass AtomicPat<Instruction inst_noret,
+ SDPatternOperator node_noret> {
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
// EXTRACT_SUBREG here is dummy, we know the node has no uses
def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, i32:$data)),
(EXTRACT_SUBREG (inst_noret
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>;
}
-multiclass AtomicIncDecPat<Instruction inst_ret, Instruction inst_noret,
- SDPatternOperator node_ret, SDPatternOperator node_noret, int C> {
+multiclass AtomicIncDecPat<Instruction inst_noret,
+ SDPatternOperator node_noret, int C> {
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
// EXTRACT_SUBREG here is dummy, we know the node has no uses
def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, C)),
@@ -330,47 +330,33 @@ def : EGOrCaymanPat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$
$data, sub0),
$ptr), sub1)>;
-defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN,
- RAT_ATOMIC_XCHG_INT_NORET,
- atomic_swap_global_ret_32,
+defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_NORET,
atomic_swap_global_noret_32>;
-defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET,
- atomic_load_add_global_ret_32, atomic_load_add_global_noret_32>;
-defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET,
- atomic_load_sub_global_ret_32, atomic_load_sub_global_noret_32>;
-defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN,
- RAT_ATOMIC_MIN_INT_NORET,
- atomic_load_min_global_ret_32, atomic_load_min_global_noret_32>;
-defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN,
- RAT_ATOMIC_MIN_UINT_NORET,
- atomic_load_umin_global_ret_32, atomic_load_umin_global_noret_32>;
-defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN,
- RAT_ATOMIC_MAX_INT_NORET,
- atomic_load_max_global_ret_32, atomic_load_max_global_noret_32>;
-defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN,
- RAT_ATOMIC_MAX_UINT_NORET,
- atomic_load_umax_global_ret_32, atomic_load_umax_global_noret_32>;
-defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET,
- atomic_load_and_global_ret_32, atomic_load_and_global_noret_32>;
-defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET,
- atomic_load_or_global_ret_32, atomic_load_or_global_noret_32>;
-defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET,
- atomic_load_xor_global_ret_32, atomic_load_xor_global_noret_32>;
-defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
- RAT_ATOMIC_INC_UINT_NORET,
- atomic_load_add_global_ret_32,
+defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_NORET,
+ atomic_load_add_global_noret_32>;
+defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_NORET,
+ atomic_load_sub_global_noret_32>;
+defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_NORET,
+ atomic_load_min_global_noret_32>;
+defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_NORET,
+ atomic_load_umin_global_noret_32>;
+defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_NORET,
+ atomic_load_max_global_noret_32>;
+defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_NORET,
+ atomic_load_umax_global_noret_32>;
+defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_NORET,
+ atomic_load_and_global_noret_32>;
+defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_NORET,
+ atomic_load_or_global_noret_32>;
+defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_NORET,
+ atomic_load_xor_global_noret_32>;
+defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_NORET,
atomic_load_add_global_noret_32, 1>;
-defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
- RAT_ATOMIC_INC_UINT_NORET,
- atomic_load_sub_global_ret_32,
+defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_NORET,
atomic_load_sub_global_noret_32, -1>;
-defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
- RAT_ATOMIC_DEC_UINT_NORET,
- atomic_load_add_global_ret_32,
+defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_NORET,
atomic_load_add_global_noret_32, -1>;
-defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
- RAT_ATOMIC_DEC_UINT_NORET,
- atomic_load_sub_global_ret_32,
+defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_NORET,
atomic_load_sub_global_noret_32, 1>;
// Should be predicated on FeatureFP64
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 90f26e514f54..bb0aa648ff90 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -203,7 +203,7 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
- bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+ bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
@@ -224,10 +224,10 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
}
multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
- bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
- def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
+ bit HasTiedOutput = 0> {
+ def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
+ def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>,
GlobalSaddrTable<1, opName>;
}
@@ -241,7 +241,7 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
- bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+ bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs),
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
@@ -258,11 +258,10 @@ class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
}
-multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
- bit HasSignedOffset = 0> {
- def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
+multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> {
+ def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
+ def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<1, opName>;
}
@@ -353,8 +352,6 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
let mayStore = 1;
let has_glc = 0;
let glcValue = 0;
- let has_dlc = 0;
- let dlcValue = 0;
let has_vdst = 0;
let has_sccb = 1;
let sccbValue = 0;
@@ -368,7 +365,6 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
let hasPostISelHook = 1;
let has_vdst = 1;
let glcValue = 1;
- let dlcValue = 0;
let sccbValue = 0;
let IsAtomicNoRet = 0;
let IsAtomicRet = 1;
@@ -412,7 +408,6 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
string opName,
RegisterClass vdst_rc,
ValueType vt,
- SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
bit isFP = isFloatType<data_vt>.ret,
@@ -483,11 +478,10 @@ multiclass FLAT_Global_Atomic_Pseudo<
RegisterClass vdst_rc,
ValueType vt,
SDPatternOperator atomic_rtn = null_frag,
- SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
- defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
+ defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
}
}
@@ -668,12 +662,11 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
let is_flat_global = 1 in {
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
v2i32, VReg_64>;
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
- null_frag,
v2i64, VReg_128>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
@@ -786,17 +779,17 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
defm GLOBAL_ATOMIC_FCMPSWAP :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
defm GLOBAL_ATOMIC_FMIN :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>;
defm GLOBAL_ATOMIC_FMAX :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
defm GLOBAL_ATOMIC_FMIN_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
defm GLOBAL_ATOMIC_FMAX_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
let is_flat_global = 1 in {
@@ -1237,6 +1230,13 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>;
+let OtherPredicates = [isGFX10Plus] in {
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", atomic_load_fmin_global_32, f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", atomic_load_fmax_global_32, f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", atomic_load_fmin_global_64, f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", atomic_load_fmax_global_64, f64>;
+}
+
let OtherPredicates = [HasAtomicFaddInsts] in {
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>;
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 2bf365168048..a8c85ec4e5ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -612,8 +612,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &MBB : MF) {
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
- auto &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index bc2fb1e9770c..ff5d0b0af6a4 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -349,20 +349,16 @@ void GCNHazardRecognizer::AdvanceCycle() {
return;
}
- // Do not track non-instructions which do not affect the wait states.
- // If included, these instructions can lead to buffer overflow such that
- // detectable hazards are missed.
- if (CurrCycleInstr->isMetaInstruction()) {
- CurrCycleInstr = nullptr;
- return;
- }
-
if (CurrCycleInstr->isBundle()) {
processBundle();
return;
}
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
+ if (!NumWaitStates) {
+ CurrCycleInstr = nullptr;
+ return;
+ }
// Keep track of emitted instructions
EmittedInstrs.push_front(CurrCycleInstr);
@@ -409,7 +405,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (IsHazard(*I))
return WaitStates;
- if (I->isInlineAsm() || I->isMetaInstruction())
+ if (I->isInlineAsm())
continue;
WaitStates += SIInstrInfo::getNumWaitStates(*I);
@@ -1549,7 +1545,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
}
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
- // On gfx90a+ releveant hazards are checked in checkMAIVALUHazards()
+ // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards()
if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
return 0;
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index f3f9eb53355f..86924667084d 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -188,7 +188,7 @@ public:
printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
Sch.BaseClass::schedule();
- // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
+ // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
Sch.RegionEnd = Rgn.End;
//assert(Rgn.End == Sch.RegionEnd);
Rgn.Begin = Sch.RegionBegin;
@@ -280,7 +280,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
return RPTracker.moveMaxPressure();
}
-void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
+void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
@@ -293,7 +293,7 @@ void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
}
}
-void GCNIterativeScheduler::schedule() { // overriden
+void GCNIterativeScheduler::schedule() { // overridden
// do nothing
LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
@@ -304,7 +304,7 @@ void GCNIterativeScheduler::schedule() { // overriden
<< '\n';);
}
-void GCNIterativeScheduler::finalizeSchedule() { // overriden
+void GCNIterativeScheduler::finalizeSchedule() { // overridden
if (Regions.empty())
return;
switch (Strategy) {
@@ -391,8 +391,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
// and already interleaved with debug values
if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
placeDebugValues();
- // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
- //assert(R.End == RegionEnd);
+ // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
+ // assert(R.End == RegionEnd);
RegionEnd = R.End;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 443472a3b99a..e82d7362a342 100644
--- a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines and imlements the class GCNMinRegScheduler, which
+/// This file defines and implements the class GCNMinRegScheduler, which
/// implements an experimental, simple scheduler whose main goal is to learn
/// ways about consuming less possible registers for a region.
///
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
index a51399d7da5f..a906a4207758 100644
--- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This pass combines split register tuple initialization into a single psuedo:
+/// This pass combines split register tuple initialization into a single pseudo:
///
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
/// %0.sub0:sreg_64 = S_MOV_B32 2
@@ -40,6 +40,7 @@ namespace {
class GCNPreRAOptimizations : public MachineFunctionPass {
private:
const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
@@ -85,32 +86,107 @@ bool GCNPreRAOptimizations::processReg(Register Reg) {
MachineInstr *Def0 = nullptr;
MachineInstr *Def1 = nullptr;
uint64_t Init = 0;
+ bool Changed = false;
+ SmallSet<Register, 32> ModifiedRegs;
+ bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
for (MachineInstr &I : MRI->def_instructions(Reg)) {
- if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
- !I.getOperand(1).isImm() || I.getNumOperands() != 2)
- return false;
-
- switch (I.getOperand(0).getSubReg()) {
+ switch (I.getOpcode()) {
default:
return false;
- case AMDGPU::sub0:
- if (Def0)
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ break;
+ case AMDGPU::COPY: {
+ // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
+ // intermdiate temporary VGPR register. Try to find the defining
+ // accvgpr_write to avoid temporary registers.
+
+ if (!IsAGPRDst)
return false;
- Def0 = &I;
- Init |= I.getOperand(1).getImm() & 0xffffffff;
+
+ Register SrcReg = I.getOperand(1).getReg();
+
+ if (!SrcReg.isVirtual())
+ break;
+
+ // Check if source of copy is from another AGPR.
+ bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
+ if (!IsAGPRSrc)
+ break;
+
+ // def_instructions() does not look at subregs so it may give us a
+ // different instruction that defines the same vreg but different subreg
+ // so we have to manually check subreg.
+ Register SrcSubReg = I.getOperand(1).getSubReg();
+ for (auto &Def : MRI->def_instructions(SrcReg)) {
+ if (SrcSubReg != Def.getOperand(0).getSubReg())
+ continue;
+
+ if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
+ MachineOperand DefSrcMO = Def.getOperand(1);
+
+ // Immediates are not an issue and can be propagated in
+ // postrapseudos pass. Only handle cases where defining
+ // accvgpr_write source is a vreg.
+ if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
+ // Propagate source reg of accvgpr write to this copy instruction
+ I.getOperand(1).setReg(DefSrcMO.getReg());
+ I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
+
+ // Reg uses were changed, collect unique set of registers to update
+ // live intervals at the end.
+ ModifiedRegs.insert(DefSrcMO.getReg());
+ ModifiedRegs.insert(SrcReg);
+
+ Changed = true;
+ }
+
+ // Found the defining accvgpr_write, stop looking any further.
+ break;
+ }
+ }
break;
- case AMDGPU::sub1:
- if (Def1)
+ }
+ case AMDGPU::S_MOV_B32:
+ if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
+ I.getNumOperands() != 2)
return false;
- Def1 = &I;
- Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
+
+ switch (I.getOperand(0).getSubReg()) {
+ default:
+ return false;
+ case AMDGPU::sub0:
+ if (Def0)
+ return false;
+ Def0 = &I;
+ Init |= I.getOperand(1).getImm() & 0xffffffff;
+ break;
+ case AMDGPU::sub1:
+ if (Def1)
+ return false;
+ Def1 = &I;
+ Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
+ break;
+ }
break;
}
}
+ // For AGPR reg, check if live intervals need to be updated.
+ if (IsAGPRDst) {
+ if (Changed) {
+ for (Register RegToUpdate : ModifiedRegs) {
+ LIS->removeInterval(RegToUpdate);
+ LIS->createAndComputeVirtRegInterval(RegToUpdate);
+ }
+ }
+
+ return Changed;
+ }
+
+ // For SGPR reg, check if we can combine instructions.
if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
- return false;
+ return Changed;
LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
<< " =>\n");
@@ -144,7 +220,7 @@ bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ TRI = ST.getRegisterInfo();
bool Changed = false;
@@ -153,8 +229,10 @@ bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
if (!LIS->hasInterval(Reg))
continue;
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
+ if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
+ (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
continue;
+
Changed |= processReg(Reg);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0212b8e17641..75855a7a4f9c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -26,32 +26,36 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
-
MF = &DAG->MF;
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
// FIXME: This is also necessary, because some passes that run after
// scheduling and before regalloc increase register pressure.
- const int ErrorMargin = 3;
-
- SGPRExcessLimit = Context->RegClassInfo
- ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
- VGPRExcessLimit = Context->RegClassInfo
- ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
- if (TargetOccupancy) {
- SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
- VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
- } else {
- SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
- AMDGPU::RegisterPressureSets::SReg_32);
- VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
- AMDGPU::RegisterPressureSets::VGPR_32);
- }
-
- SGPRCriticalLimit -= ErrorMargin;
- VGPRCriticalLimit -= ErrorMargin;
+ const unsigned ErrorMargin = 3;
+
+ SGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
+ VGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+
+ SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ // Set the initial TargetOccupnacy to the maximum occupancy that we can
+ // achieve for this function. This effectively sets a lower bound on the
+ // 'Critical' register limits in the scheduler.
+ TargetOccupancy = MFI.getOccupancy();
+ SGPRCriticalLimit =
+ std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
+ VGPRCriticalLimit =
+ std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
+
+ // Subtract error margin from register limits and avoid overflow.
+ SGPRCriticalLimit =
+ std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit);
+ VGPRCriticalLimit =
+ std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit);
+ SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit);
+ VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
}
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -117,7 +121,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// Register pressure is considered 'CRITICAL' if it is approaching a value
// that would reduce the wave occupancy for the execution unit. When
- // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
+ // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
@@ -361,14 +365,18 @@ void GCNScheduleDAGMILive::schedule() {
LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
return;
}
- unsigned Occ = MFI.getOccupancy();
- unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
- unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
+
+ unsigned WavesAfter =
+ std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST));
+ unsigned WavesBefore =
+ std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST));
LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
<< ", after " << WavesAfter << ".\n");
- // We could not keep current target occupancy because of the just scheduled
- // region. Record new occupancy for next scheduling cycle.
+ // We may not be able to keep the current target occupancy because of the just
+ // scheduled region. We might still be able to revert scheduling if the
+ // occupancy before was higher, or if the current schedule has register
+ // pressure higher than the excess limits which could lead to more spilling.
unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
@@ -378,6 +386,7 @@ void GCNScheduleDAGMILive::schedule() {
<< MFI.getMinAllowedOccupancy() << " waves\n");
NewOccupancy = WavesAfter;
}
+
if (NewOccupancy < MinOccupancy) {
MinOccupancy = NewOccupancy;
MFI.limitOccupancy(MinOccupancy);
@@ -394,6 +403,11 @@ void GCNScheduleDAGMILive::schedule() {
RegionsWithHighRP[RegionIdx] = true;
}
+ // If this condition is true, then either the occupancy before and after
+ // scheduling is the same, or we are allowing the occupancy to drop because
+ // the function is memory bound. Even if we are OK with the current occupancy,
+ // we still need to verify that we will not introduce any extra chance of
+ // spilling.
if (WavesAfter >= MinOccupancy) {
if (Stage == UnclusteredReschedule &&
!PressureAfter.less(ST, PressureBefore)) {
@@ -540,7 +554,6 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
}
void GCNScheduleDAGMILive::finalizeSchedule() {
- GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
LiveIns.resize(Regions.size());
@@ -586,8 +599,6 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
dbgs()
<< "Retrying function scheduling with lowest recorded occupancy "
<< MinOccupancy << ".\n");
-
- S.setTargetOccupancy(MinOccupancy);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 15eba3f5eac0..53d6ff0aa731 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -54,7 +54,7 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
// before a region scheduling to know if the region had such clusters.
bool HasClusteredNodes;
- // schedule() have seen a an excess register pressure and had to track
+ // schedule() have seen an excess register pressure and had to track
// register pressure for actual scheduling heuristics.
bool HasExcessPressure;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index bd0c40081c01..d8bc0b2df2bd 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -21,13 +21,6 @@
#include "SIInstrInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-namespace llvm {
-
-class MCInst;
-class MCInstrInfo;
-
-} // namespace llvm
-
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
@@ -104,7 +97,6 @@ protected:
bool FP64;
bool FMA;
bool MIMG_R128;
- bool IsGCN;
bool CIInsts;
bool GFX8Insts;
bool GFX9Insts;
@@ -172,13 +164,8 @@ protected:
bool HasArchitectedFlatScratch;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
- bool R600ALUInst;
- bool CaymanISA;
- bool CFALUBug;
bool LDSMisalignedBug;
bool HasMFMAInlineLiteralBug;
- bool HasVertexCache;
- short TexVTXClauseSize;
bool UnalignedBufferAccess;
bool UnalignedDSAccess;
bool HasPackedTID;
@@ -272,7 +259,7 @@ public:
return (Generation)Gen;
}
- /// Return the number of high bits known to be zero fror a frame index.
+ /// Return the number of high bits known to be zero for a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
}
@@ -612,7 +599,7 @@ public:
}
/// Return if most LDS instructions have an m0 use that require m0 to be
- /// iniitalized.
+ /// initialized.
bool ldsRequiresM0Init() const {
return getGeneration() < GFX9;
}
@@ -753,7 +740,7 @@ public:
}
// Scratch is allocated in 256 dword per wave blocks for the entire
- // wavefront. When viewed from the perspecive of an arbitrary workitem, this
+ // wavefront. When viewed from the perspective of an arbitrary workitem, this
// is 4-byte aligned.
//
// Only 4-byte alignment is really needed to access anything. Transformations
@@ -818,9 +805,7 @@ public:
return HasScalarAtomics;
}
- bool hasLDSFPAtomics() const {
- return GFX8Insts;
- }
+ bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
/// \returns true if the subtarget has the v_permlanex16_b32 instruction.
bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
@@ -1139,6 +1124,9 @@ public:
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+ std::unique_ptr<ScheduleDAGMutation>
+ createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
+
bool isWave32() const {
return getWavefrontSize() == 32;
}
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
new file mode 100644
index 000000000000..f3f664f7972a
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -0,0 +1,361 @@
+//===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods from the AMDGPUCustomBehaviour class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUCustomBehaviour.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/WithColor.h"
+
+namespace llvm {
+namespace mca {
+
+void AMDGPUInstrPostProcess::postProcessInstruction(
+ std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
+ switch (MCI.getOpcode()) {
+ case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx6_gfx7:
+ case AMDGPU::S_WAITCNT_vi:
+ return processWaitCnt(Inst, MCI);
+ }
+}
+
+// s_waitcnt instructions encode important information as immediate operands
+// which are lost during the MCInst -> mca::Instruction lowering.
+void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
+ const MCInst &MCI) {
+ for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
+ MCAOperand Op;
+ const MCOperand &MCOp = MCI.getOperand(Idx);
+ if (MCOp.isReg()) {
+ Op = MCAOperand::createReg(MCOp.getReg());
+ } else if (MCOp.isImm()) {
+ Op = MCAOperand::createImm(MCOp.getImm());
+ }
+ Op.setIndex(Idx);
+ Inst->addOperand(Op);
+ }
+}
+
+AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII)
+ : CustomBehaviour(STI, SrcMgr, MCII) {
+ generateWaitCntInfo();
+}
+
+unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
+ const InstRef &IR) {
+ const Instruction &Inst = *IR.getInstruction();
+ unsigned Opcode = Inst.getOpcode();
+
+ // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
+ // pseudo instructions here. However, there are plans for the future to make
+ // it possible to use mca within backend passes. As such, I have left the
+ // pseudo version of s_waitcnt within this switch statement.
+ switch (Opcode) {
+ default:
+ return 0;
+ case AMDGPU::S_WAITCNT: // This instruction
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_VSCNT: // to this instruction are all pseudo.
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx6_gfx7:
+ case AMDGPU::S_WAITCNT_vi:
+ // s_endpgm also behaves as if there is an implicit
+ // s_waitcnt 0, but I'm not sure if it would be appropriate
+ // to model this in llvm-mca based on how the iterations work
+ // while simulating the pipeline over and over.
+ return handleWaitCnt(IssuedInst, IR);
+ }
+
+ return 0;
+}
+
+unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
+ const InstRef &IR) {
+ // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
+ // I do not know how that instruction works so I did not attempt to model it.
+ // set the max values to begin
+ unsigned Vmcnt = 63;
+ unsigned Expcnt = 7;
+ unsigned Lgkmcnt = 31;
+ unsigned Vscnt = 63;
+ unsigned CurrVmcnt = 0;
+ unsigned CurrExpcnt = 0;
+ unsigned CurrLgkmcnt = 0;
+ unsigned CurrVscnt = 0;
+ unsigned CyclesToWaitVm = ~0U;
+ unsigned CyclesToWaitExp = ~0U;
+ unsigned CyclesToWaitLgkm = ~0U;
+ unsigned CyclesToWaitVs = ~0U;
+
+ computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
+
+ // We will now look at each of the currently executing instructions
+ // to find out if this wait instruction still needs to wait.
+ for (auto I = IssuedInst.begin(), E = IssuedInst.end(); I != E; I++) {
+ const InstRef &PrevIR = *I;
+ const Instruction &PrevInst = *PrevIR.getInstruction();
+ const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
+ const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
+ const int CyclesLeft = PrevInst.getCyclesLeft();
+ assert(CyclesLeft != UNKNOWN_CYCLES &&
+ "We should know how many cycles are left for this instruction");
+ if (PrevInstWaitInfo.VmCnt) {
+ CurrVmcnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitVm)
+ CyclesToWaitVm = CyclesLeft;
+ }
+ if (PrevInstWaitInfo.ExpCnt) {
+ CurrExpcnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitExp)
+ CyclesToWaitExp = CyclesLeft;
+ }
+ if (PrevInstWaitInfo.LgkmCnt) {
+ CurrLgkmcnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
+ CyclesToWaitLgkm = CyclesLeft;
+ }
+ if (PrevInstWaitInfo.VsCnt) {
+ CurrVscnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitVs)
+ CyclesToWaitVs = CyclesLeft;
+ }
+ }
+
+ unsigned CyclesToWait = ~0U;
+ if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
+ CyclesToWait = CyclesToWaitVm;
+ if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
+ CyclesToWait = CyclesToWaitExp;
+ if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
+ CyclesToWait = CyclesToWaitLgkm;
+ if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
+ CyclesToWait = CyclesToWaitVs;
+
+ // We may underestimate how many cycles we need to wait, but this
+ // isn't a big deal. Our return value is just how many cycles until
+ // this function gets run again. So as long as we don't overestimate
+ // the wait time, we'll still end up stalling at this instruction
+ // for the correct number of cycles.
+
+ if (CyclesToWait == ~0U)
+ return 0;
+ return CyclesToWait;
+}
+
+void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
+ unsigned &Expcnt, unsigned &Lgkmcnt,
+ unsigned &Vscnt) {
+ AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
+ const Instruction &Inst = *IR.getInstruction();
+ unsigned Opcode = Inst.getOpcode();
+
+ switch (Opcode) {
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
+ // Should probably be checking for nullptr
+ // here, but I'm not sure how I should handle the case
+ // where we see a nullptr.
+ const MCAOperand *OpReg = Inst.getOperand(0);
+ const MCAOperand *OpImm = Inst.getOperand(1);
+ assert(OpReg && OpReg->isReg() && "First operand should be a register.");
+ assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
+ if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
+ // Instruction is using a real register.
+ // Since we can't know what value this register will have,
+ // we can't compute what the value of this wait should be.
+ WithColor::warning() << "The register component of "
+ << MCII.getName(Opcode) << " will be completely "
+ << "ignored. So the wait may not be accurate.\n";
+ }
+ switch (Opcode) {
+ // Redundant switch so I don't have to repeat the code above
+ // for each case. There are more clever ways to avoid this
+ // extra switch and anyone can feel free to implement one of them.
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ Expcnt = OpImm->getImm();
+ break;
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ Lgkmcnt = OpImm->getImm();
+ break;
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ Vmcnt = OpImm->getImm();
+ break;
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10:
+ Vscnt = OpImm->getImm();
+ break;
+ }
+ return;
+ }
+ case AMDGPU::S_WAITCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx6_gfx7:
+ case AMDGPU::S_WAITCNT_vi:
+ unsigned WaitCnt = Inst.getOperand(0)->getImm();
+ AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
+ return;
+ }
+}
+
+void AMDGPUCustomBehaviour::generateWaitCntInfo() {
+ // The core logic from this function is taken from
+ // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
+ // that are being looked at are in the MachineInstr format, whereas we have
+ // access to the MCInst format. The side effects of this are that we can't use
+ // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
+ // functions. Therefore, we conservatively assume that these functions will
+ // return true. This may cause a few instructions to be incorrectly tagged
+ // with an extra CNT. However, these are instructions that do interact with at
+ // least one CNT so giving them an extra CNT shouldn't cause issues in most
+ // scenarios.
+ AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
+ InstrWaitCntInfo.resize(SrcMgr.size());
+
+ int Index = 0;
+ for (auto I = SrcMgr.begin(), E = SrcMgr.end(); I != E; ++I, ++Index) {
+ const std::unique_ptr<Instruction> &Inst = *I;
+ unsigned Opcode = Inst->getOpcode();
+ const MCInstrDesc &MCID = MCII.get(Opcode);
+ if ((MCID.TSFlags & SIInstrFlags::DS) &&
+ (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) {
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
+ InstrWaitCntInfo[Index].ExpCnt = true;
+ } else if (MCID.TSFlags & SIInstrFlags::FLAT) {
+ // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
+ // and mayAccessLDSThroughFlat(Inst) would both return true for this
+ // instruction. We have to do this because those functions use
+ // information about the memory operands that we don't have access to.
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ if (!STI.hasFeature(AMDGPU::FeatureVscnt))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else
+ InstrWaitCntInfo[Index].VsCnt = true;
+ } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
+ if (!STI.hasFeature(AMDGPU::FeatureVscnt))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else if ((MCID.mayLoad() &&
+ !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) ||
+ ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
+ !MCID.mayStore()))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else if (MCID.mayStore())
+ InstrWaitCntInfo[Index].VsCnt = true;
+
+ // (IV.Major < 7) is meant to represent
+ // GCNTarget.vmemWriteNeedsExpWaitcnt()
+ // which is defined as
+ // { return getGeneration() < SEA_ISLANDS; }
+ if (IV.Major < 7 &&
+ (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
+ InstrWaitCntInfo[Index].ExpCnt = true;
+ } else if (MCID.TSFlags & SIInstrFlags::SMRD) {
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ } else if (MCID.TSFlags & SIInstrFlags::EXP) {
+ InstrWaitCntInfo[Index].ExpCnt = true;
+ } else {
+ switch (Opcode) {
+ case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSGHALT:
+ case AMDGPU::S_MEMTIME:
+ case AMDGPU::S_MEMREALTIME:
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ break;
+ }
+ }
+ }
+}
+
+// taken from SIInstrInfo::isVMEM()
+bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
+ return MCID.TSFlags & SIInstrFlags::MUBUF ||
+ MCID.TSFlags & SIInstrFlags::MTBUF ||
+ MCID.TSFlags & SIInstrFlags::MIMG;
+}
+
+// taken from SIInstrInfo::hasModifiersSet()
+bool AMDGPUCustomBehaviour::hasModifiersSet(
+ const std::unique_ptr<Instruction> &Inst, unsigned OpName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
+ if (Idx == -1)
+ return false;
+
+ const MCAOperand *Op = Inst->getOperand(Idx);
+ if (Op == nullptr || !Op->isImm() || !Op->getImm())
+ return false;
+
+ return true;
+}
+
+// taken from SIInstrInfo::isAlwaysGDS()
+bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
+ return Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
+ Opcode == AMDGPU::DS_GWS_SEMA_P ||
+ Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
+}
+
+} // namespace mca
+} // namespace llvm
+
+using namespace llvm;
+using namespace mca;
+
+static CustomBehaviour *
+createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII) {
+ return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
+}
+
+static InstrPostProcess *
+createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII) {
+ return new AMDGPUInstrPostProcess(STI, MCII);
+}
+
+/// Extern function to initialize the targets for the AMDGPU backend
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
+ TargetRegistry::RegisterCustomBehaviour(getTheAMDGPUTarget(),
+ createAMDGPUCustomBehaviour);
+ TargetRegistry::RegisterInstrPostProcess(getTheAMDGPUTarget(),
+ createAMDGPUInstrPostProcess);
+
+ TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
+ createAMDGPUCustomBehaviour);
+ TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
+ createAMDGPUInstrPostProcess);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
new file mode 100644
index 000000000000..56650515bd0a
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
@@ -0,0 +1,103 @@
+//===------------------- AMDGPUCustomBehaviour.h ----------------*-C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the AMDGPUCustomBehaviour class which inherits from
+/// CustomBehaviour. This class is used by the tool llvm-mca to enforce
+/// target specific behaviour that is not expressed well enough in the
+/// scheduling model for mca to enforce it automatically.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCA_AMDGPUCUSTOMBEHAVIOUR_H
+#define LLVM_LIB_TARGET_AMDGPU_MCA_AMDGPUCUSTOMBEHAVIOUR_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/TargetParser.h"
+
+namespace llvm {
+namespace mca {
+
+class AMDGPUInstrPostProcess : public InstrPostProcess {
+ void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+
+public:
+ AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
+ : InstrPostProcess(STI, MCII) {}
+
+ ~AMDGPUInstrPostProcess() {}
+
+ void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
+ const MCInst &MCI) override;
+};
+
+struct WaitCntInfo {
+ bool VmCnt = false;
+ bool ExpCnt = false;
+ bool LgkmCnt = false;
+ bool VsCnt = false;
+};
+
+class AMDGPUCustomBehaviour : public CustomBehaviour {
+ /// Whenever MCA would like to dispatch an s_waitcnt instructions,
+ /// we must check all the instruction that are still executing to see if
+ /// they modify the same CNT as we need to wait for. This vector
+ /// gets built in the constructor and contains 1 WaitCntInfo struct
+ /// for each instruction within the SrcManager. Each element
+ /// tells us which CNTs that instruction may interact with.
+ /// We conservatively assume some instructions interact with more
+ /// CNTs than they do in reality, so we will occasionally wait
+ /// longer than necessary, but we shouldn't ever wait for shorter.
+ std::vector<WaitCntInfo> InstrWaitCntInfo;
+
+ /// This method gets called from the constructor and is
+ /// where we setup the InstrWaitCntInfo vector.
+ /// The core logic for determining which CNTs an instruction
+ /// interacts with is taken from SIInsertWaitcnts::updateEventWaitcntAfter().
+ /// Unfortunately, some of the logic from that function is not available to us
+ /// in this scope so we conservatively end up assuming that some
+ /// instructions interact with more CNTs than they do in reality.
+ void generateWaitCntInfo();
+ /// Helper function used in generateWaitCntInfo()
+ bool hasModifiersSet(const std::unique_ptr<Instruction> &Inst,
+ unsigned OpName) const;
+ /// Helper function used in generateWaitCntInfo()
+ bool isAlwaysGDS(uint16_t Opcode) const;
+ /// Helper function used in generateWaitCntInfo()
+ bool isVMEM(const MCInstrDesc &MCID);
+ /// This method gets called from checkCustomHazard when mca is attempting to
+ /// dispatch an s_waitcnt instruction (or one of its variants). The method
+ /// looks at each of the instructions that are still executing in the pipeline
+ /// to determine if the waitcnt should force a wait.
+ unsigned handleWaitCnt(ArrayRef<InstRef> IssuedInst, const InstRef &IR);
+ /// Based on the type of s_waitcnt instruction we are looking at, and what its
+ /// operands are, this method will set the values for each of the cnt
+ /// references provided as arguments.
+ void computeWaitCnt(const InstRef &IR, unsigned &Vmcnt, unsigned &Expcnt,
+ unsigned &Lgkmcnt, unsigned &Vscnt);
+
+public:
+ AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII);
+
+ ~AMDGPUCustomBehaviour() {}
+ /// This method is used to determine if an instruction
+ /// should be allowed to be dispatched. The return value is
+ /// how many cycles until the instruction can be dispatched.
+ /// This method is called after MCA has already checked for
+ /// register and hardware dependencies so this method should only
+ /// implement custom behaviour and dependencies that are not picked up
+ /// by MCA naturally.
+ unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
+ const InstRef &IR) override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index dd0db6c7b655..50318a59225d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -15,8 +15,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -44,7 +44,8 @@ public:
const MCSubtargetInfo &STI) const override;
unsigned getMinimumNopSize() const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
};
@@ -169,7 +170,8 @@ unsigned AMDGPUAsmBackend::getMinimumNopSize() const {
return 4;
}
-bool AMDGPUAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool AMDGPUAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// If the count is not 4-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
index b56f75132135..e09e2dca1b47 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -19,10 +19,9 @@ namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
-class MCSubtargetInfo;
class MCELFStreamer;
-class Triple;
class MCObjectWriter;
+class Triple;
MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 9ba0ffbced3d..b68b4b12e750 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -605,6 +605,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -631,6 +632,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
printImmediate16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_V2INT16:
@@ -1451,208 +1453,3 @@ void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
}
#include "AMDGPUGenAsmWriter.inc"
-
-void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address,
- StringRef Annot, const MCSubtargetInfo &STI,
- raw_ostream &O) {
- O.flush();
- printInstruction(MI, Address, O);
- printAnnotation(O, Annot);
-}
-
-void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
-}
-
-void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int BankSwizzle = MI->getOperand(OpNo).getImm();
- switch (BankSwizzle) {
- case 1:
- O << "BS:VEC_021/SCL_122";
- break;
- case 2:
- O << "BS:VEC_120/SCL_212";
- break;
- case 3:
- O << "BS:VEC_102/SCL_221";
- break;
- case 4:
- O << "BS:VEC_201";
- break;
- case 5:
- O << "BS:VEC_210";
- break;
- default:
- break;
- }
-}
-
-void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT");
-}
-
-void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned CT = MI->getOperand(OpNo).getImm();
- switch (CT) {
- case 0:
- O << 'U';
- break;
- case 1:
- O << 'N';
- break;
- default:
- break;
- }
-}
-
-void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int KCacheMode = MI->getOperand(OpNo).getImm();
- if (KCacheMode > 0) {
- int KCacheBank = MI->getOperand(OpNo - 2).getImm();
- O << "CB" << KCacheBank << ':';
- int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
- int LineSize = (KCacheMode == 1) ? 16 : 32;
- O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
- }
-}
-
-void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " ");
-}
-
-void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- assert(Op.isImm() || Op.isExpr());
- if (Op.isImm()) {
- int64_t Imm = Op.getImm();
- O << Imm << '(' << BitsToFloat(Imm) << ')';
- }
- if (Op.isExpr()) {
- Op.getExpr()->print(O << '@', &MAI);
- }
-}
-
-void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-');
-}
-
-void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- switch (MI->getOperand(OpNo).getImm()) {
- default: break;
- case 1:
- O << " * 2.0";
- break;
- case 2:
- O << " * 4.0";
- break;
- case 3:
- O << " / 2.0";
- break;
- }
-}
-
-void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printOperand(MI, OpNo, O);
- O << ", ";
- printOperand(MI, OpNo + 1, O);
-}
-
-void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (OpNo >= MI->getNumOperands()) {
- O << "/*Missing OP" << OpNo << "*/";
- return;
- }
-
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- switch (Op.getReg()) {
- // This is the default predicate state, so we don't need to print it.
- case R600::PRED_SEL_OFF:
- break;
-
- default:
- O << getRegisterName(Op.getReg());
- break;
- }
- } else if (Op.isImm()) {
- O << Op.getImm();
- } else if (Op.isDFPImm()) {
- // We special case 0.0 because otherwise it will be printed as an integer.
- if (Op.getDFPImm() == 0.0)
- O << "0.0";
- else {
- O << bit_cast<double>(Op.getDFPImm());
- }
- } else if (Op.isExpr()) {
- const MCExpr *Exp = Op.getExpr();
- Exp->print(O, &MAI);
- } else {
- O << "/*INV_OP*/";
- }
-}
-
-void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+');
-}
-
-void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned Sel = MI->getOperand(OpNo).getImm();
- switch (Sel) {
- case 0:
- O << 'X';
- break;
- case 1:
- O << 'Y';
- break;
- case 2:
- O << 'Z';
- break;
- case 3:
- O << 'W';
- break;
- case 4:
- O << '0';
- break;
- case 5:
- O << '1';
- break;
- case 7:
- O << '_';
- break;
- default:
- break;
- }
-}
-
-void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,");
-}
-
-void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,");
-}
-
-void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.getImm() == 0) {
- O << " (MASKED)";
- }
-}
-
-#include "R600GenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 3cb4fcb28cb0..71db0beba0b6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -240,36 +240,6 @@ protected:
raw_ostream &O);
};
-class R600InstPrinter : public MCInstPrinter {
-public:
- R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
-
- void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
- const MCSubtargetInfo &STI, raw_ostream &O) override;
- std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
- void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-
} // End namespace llvm
#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index f3d945cc0764..93bec8aaadfd 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// CodeEmitter interface for R600 and SI codegen.
+/// CodeEmitter interface for SI codegen.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 1a7ca7e1a330..53c724f2211a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// CodeEmitter interface for R600 and SI codegen.
+/// CodeEmitter interface for SI codegen.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 34b2cd1fc1e4..1f917cd91b47 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -16,7 +16,8 @@
#include "AMDGPUInstPrinter.h"
#include "AMDGPUMCAsmInfo.h"
#include "AMDGPUTargetStreamer.h"
-#include "SIDefines.h"
+#include "R600InstPrinter.h"
+#include "R600MCTargetDesc.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -26,10 +27,9 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 71b44a509108..e5cce6045c8c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -18,6 +18,7 @@
#include <memory>
namespace llvm {
+class Target;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
@@ -26,20 +27,11 @@ class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
-class StringRef;
-class Target;
-class Triple;
-class raw_pwrite_stream;
enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 };
MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
-MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx);
-MCInstrInfo *createR600MCInstrInfo();
-
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
@@ -57,23 +49,12 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
-#define GET_REGINFO_ENUM
-#include "R600GenRegisterInfo.inc"
-
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
#include "AMDGPUGenInstrInfo.inc"
-#define GET_INSTRINFO_ENUM
-#define GET_INSTRINFO_OPERAND_ENUM
-#define GET_INSTRINFO_SCHED_ENUM
-#include "R600GenInstrInfo.inc"
-
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
-#define GET_SUBTARGETINFO_ENUM
-#include "R600GenSubtargetInfo.inc"
-
#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index cef34a5e5a59..a857fd00a855 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -17,13 +17,8 @@ struct amd_kernel_code_t;
namespace llvm {
-class DataLayout;
-class Function;
class MCELFStreamer;
class MCSymbol;
-class MDNode;
-class Module;
-class Type;
class formatted_raw_ostream;
namespace AMDGPU {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
new file mode 100644
index 000000000000..f77ed1faf029
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
@@ -0,0 +1,224 @@
+//===-- R600InstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600InstPrinter.h"
+#include "AMDGPUInstPrinter.h"
+#include "R600MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O.flush();
+ printInstruction(MI, Address, O);
+ printAnnotation(O, Annot);
+}
+
+void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
+}
+
+void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int BankSwizzle = MI->getOperand(OpNo).getImm();
+ switch (BankSwizzle) {
+ case 1:
+ O << "BS:VEC_021/SCL_122";
+ break;
+ case 2:
+ O << "BS:VEC_120/SCL_212";
+ break;
+ case 3:
+ O << "BS:VEC_102/SCL_221";
+ break;
+ case 4:
+ O << "BS:VEC_201";
+ break;
+ case 5:
+ O << "BS:VEC_210";
+ break;
+ default:
+ break;
+ }
+}
+
+void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ unsigned CT = MI->getOperand(OpNo).getImm();
+ switch (CT) {
+ case 0:
+ O << 'U';
+ break;
+ case 1:
+ O << 'N';
+ break;
+ default:
+ break;
+ }
+}
+
+void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int KCacheMode = MI->getOperand(OpNo).getImm();
+ if (KCacheMode > 0) {
+ int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+ O << "CB" << KCacheBank << ':';
+ int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+ int LineSize = (KCacheMode == 1) ? 16 : 32;
+ O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
+ }
+}
+
+void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " ");
+}
+
+void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isImm() || Op.isExpr());
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm();
+ O << Imm << '(' << BitsToFloat(Imm) << ')';
+ }
+ if (Op.isExpr()) {
+ Op.getExpr()->print(O << '@', &MAI);
+ }
+}
+
+void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-');
+}
+
+void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ switch (MI->getOperand(OpNo).getImm()) {
+ default:
+ break;
+ case 1:
+ O << " * 2.0";
+ break;
+ case 2:
+ O << " * 4.0";
+ break;
+ case 3:
+ O << " / 2.0";
+ break;
+ }
+}
+
+void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo + 1, O);
+}
+
+void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo >= MI->getNumOperands()) {
+ O << "/*Missing OP" << OpNo << "*/";
+ return;
+ }
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ switch (Op.getReg()) {
+ // This is the default predicate state, so we don't need to print it.
+ case R600::PRED_SEL_OFF:
+ break;
+
+ default:
+ O << getRegisterName(Op.getReg());
+ break;
+ }
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isDFPImm()) {
+ // We special case 0.0 because otherwise it will be printed as an integer.
+ if (Op.getDFPImm() == 0.0)
+ O << "0.0";
+ else {
+ O << bit_cast<double>(Op.getDFPImm());
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Exp = Op.getExpr();
+ Exp->print(O, &MAI);
+ } else {
+ O << "/*INV_OP*/";
+ }
+}
+
+void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+');
+}
+
+void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Sel = MI->getOperand(OpNo).getImm();
+ switch (Sel) {
+ case 0:
+ O << 'X';
+ break;
+ case 1:
+ O << 'Y';
+ break;
+ case 2:
+ O << 'Z';
+ break;
+ case 3:
+ O << 'W';
+ break;
+ case 4:
+ O << '0';
+ break;
+ case 5:
+ O << '1';
+ break;
+ case 7:
+ O << '_';
+ break;
+ default:
+ break;
+ }
+}
+
+void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.getImm() == 0) {
+ O << " (MASKED)";
+ }
+}
+
+#include "R600GenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h
new file mode 100644
index 000000000000..6c88ffd1514b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h
@@ -0,0 +1,48 @@
+//===-- R600InstPrinter.h - AMDGPU MC Inst -> ASM interface -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600INSTPRINTER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class R600InstPrinter : public MCInstPrinter {
+public:
+ R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index bbca8cbb742c..6fe192e95e72 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
index a4809af29daa..269209a12175 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUMCTargetDesc.h"
+#include "R600MCTargetDesc.h"
#include "llvm/MC/MCInstrInfo.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
new file mode 100644
index 000000000000..fc52cb33824f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
@@ -0,0 +1,44 @@
+//===-- R600MCTargetDesc.h - R600 Target Descriptions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Provides R600 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600MCTARGETDESC_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600MCTARGETDESC_H
+
+#include <cstdint>
+
+namespace llvm {
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+MCInstrInfo *createR600MCInstrInfo();
+
+} // namespace llvm
+
+#define GET_REGINFO_ENUM
+#include "R600GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
+#define GET_INSTRINFO_SCHED_ENUM
+#include "R600GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "R600GenSubtargetInfo.inc"
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index dbce4b2e872c..77f219aaa3ab 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -233,6 +233,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
switch (OpInfo.OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -255,6 +256,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
@@ -277,6 +279,9 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
uint32_t Encoding = getLit16Encoding(Lo16, STI);
return Encoding;
}
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16:
+ return MO.getImm();
default:
llvm_unreachable("invalid operand size");
}
@@ -341,7 +346,13 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
(bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
return;
- // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+ // Do not print literals from SISrc Operands for insts with mandatory literals
+ int ImmLitIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+ if (ImmLitIdx != -1)
+ return;
+
+ // Check for additional literals
for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
// Check if this operand should be encoded as [SV]Src
@@ -536,8 +547,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
- if (Enc != ~0U &&
- (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
+ if (Enc != ~0U)
return Enc;
} else if (MO.isImm())
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index bacb790aac62..6dd886367302 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -43,6 +43,7 @@ class MIMGBaseOpcode : PredicateControl {
bit HasD16 = 0;
bit IsAtomicRet = 0;
bit MSAA = 0;
+ bit BVH = 0;
}
def MIMGBaseOpcode : GenericEnum {
@@ -54,7 +55,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
- "LodOrClampOrMip", "HasD16", "MSAA"];
+ "LodOrClampOrMip", "HasD16", "MSAA", "BVH"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
let PrimaryKey = ["BaseOpcode"];
@@ -872,6 +873,14 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
+class MIMG_IntersectRay_Helper<bit Is64, bit A16> {
+ int num_addrs = !if(Is64, !if(A16, 9, 12), !if(A16, 8, 11));
+ // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
+ // when we only need 9, 11 or 12 depending on A16 field and ptr size.
+ RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
+ int VAddrDwords = !srl(RegClass.Size, 5);
+}
+
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit A16>
: MIMG_gfx10<op.BASE, (outs VReg_128:$vdata), "AMDGPU"> {
@@ -890,8 +899,11 @@ class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", "");
}
-multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16> {
- def "" : MIMGBaseOpcode;
+multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit A16> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, A16>;
+ def "" : MIMGBaseOpcode {
+ let BVH = 1;
+ }
let SubtargetPredicate = HasGFX10_AEncoding,
AssemblerPredicate = HasGFX10_AEncoding,
AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
@@ -908,13 +920,11 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16>
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
- // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
- // when we only need 9, 11 or 12 depending on A16 field and ptr size.
- def "_sa" : MIMG_IntersectRay_gfx10<op, opcode, MIMGAddrSize<num_addrs, 0>.RegClass, A16> {
- let VAddrDwords = !srl(MIMGAddrSize<num_addrs, 0>.RegClass.Size, 5);
+ def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, A16> {
+ let VAddrDwords = info.VAddrDwords;
}
- def _nsa : MIMG_IntersectRay_nsa_gfx10<op, opcode, num_addrs, A16> {
- let VAddrDwords = num_addrs;
+ def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, A16> {
+ let VAddrDwords = info.num_addrs;
}
}
}
@@ -949,7 +959,7 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimgopc<0x19>, "image_atomic_or">
defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimgopc<0x1a>, "image_atomic_xor">;
defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimgopc<0x1b>, "image_atomic_inc">;
defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimgopc<0x1c>, "image_atomic_dec">;
-defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 0, 1>;
+defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 1, 1>;
defm IMAGE_ATOMIC_FMIN : MIMG_Atomic <mimgopc<0x1e, MIMG.NOP>, "image_atomic_fmin", 0, 1>;
defm IMAGE_ATOMIC_FMAX : MIMG_Atomic <mimgopc<0x1f, MIMG.NOP>, "image_atomic_fmax", 0, 1>;
@@ -1045,10 +1055,10 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<0xef>, AMDGPUSample_c_cd
let SubtargetPredicate = HasGFX10_AEncoding in
defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<0x80>, "image_msaa_load", 1, 0, 0, 1>;
-defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 11, 0>;
-defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 8, 1>;
-defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 12, 0>;
-defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 9, 1>;
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 1>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 1>;
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
@@ -1098,7 +1108,7 @@ def ImageDimIntrinsicTable : GenericTable {
let PrimaryKeyEarlyOut = 1;
}
-def getImageDimInstrinsicByBaseOpcode : SearchIndex {
+def getImageDimIntrinsicByBaseOpcode : SearchIndex {
let Table = ImageDimIntrinsicTable;
let Key = ["BaseOpcode", "Dim"];
}
diff --git a/llvm/lib/Target/AMDGPU/R600.h b/llvm/lib/Target/AMDGPU/R600.h
new file mode 100644
index 000000000000..2b483ae63da9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600.h
@@ -0,0 +1,50 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600_H
+#define LLVM_LIB_TARGET_AMDGPU_R600_H
+
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+
+class FunctionPass;
+class TargetMachine;
+class ModulePass;
+class PassRegistry;
+
+// R600 Passes
+FunctionPass *createR600VectorRegMerger();
+FunctionPass *createR600ExpandSpecialInstrsPass();
+FunctionPass *createR600EmitClauseMarkers();
+FunctionPass *createR600ClauseMergePass();
+FunctionPass *createR600Packetizer();
+FunctionPass *createR600ControlFlowFinalizer();
+FunctionPass *createAMDGPUCFGStructurizerPass();
+FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
+ModulePass *createR600OpenCLImageTypeLoweringPass();
+
+void initializeR600ClauseMergePassPass(PassRegistry &);
+extern char &R600ClauseMergePassID;
+
+void initializeR600ControlFlowFinalizerPass(PassRegistry &);
+extern char &R600ControlFlowFinalizerID;
+
+void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
+extern char &R600ExpandSpecialInstrsPassID;
+
+void initializeR600VectorRegMergerPass(PassRegistry &);
+extern char &R600VectorRegMergerID;
+
+void initializeR600PacketizerPass(PassRegistry &);
+extern char &R600PacketizerID;
+
+} // End namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/R600.td b/llvm/lib/Target/AMDGPU/R600.td
index 1d11da969474..45bc955d4f4c 100644
--- a/llvm/lib/Target/AMDGPU/R600.td
+++ b/llvm/lib/Target/AMDGPU/R600.td
@@ -34,6 +34,7 @@ def ALU_NULL : FuncUnit;
include "AMDGPUFeatures.td"
include "R600Schedule.td"
include "R600Processors.td"
+include "R600InstrInfo.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUInstructions.td"
include "R600Instructions.td"
diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
index a96fc7ef234e..c19e3c41485e 100644
--- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
#include "R600AsmPrinter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
@@ -129,4 +129,3 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-
diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index a19d00b62502..1d93165f9eec 100644
--- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -12,8 +12,8 @@
/// It needs to be called after IfCvt for best results.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Subtarget.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index ca1e61393e9a..29c37c706138 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
#include <set>
diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 664e134889e9..d5eaa33ef964 100644
--- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -13,8 +13,8 @@
/// initiated by CF_ALU instructions.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 81dc91ab922f..838a497b4df1 100644
--- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9f842e91c0f3
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
@@ -0,0 +1,184 @@
+//===-- R600ISelDAGToDAG.cpp - A dag to dag inst selector for R600 --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the R600 subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUISelDAGToDAG.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
+#include "R600Subtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+ const R600Subtarget *Subtarget;
+
+ bool isConstantLoad(const MemSDNode *N, int cbID) const;
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+ SDValue &Offset);
+
+public:
+ explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel)
+ : AMDGPUDAGToDAGISel(TM, OptLevel) {}
+
+ void Select(SDNode *N) override;
+
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void PreprocessISelDAG() override {}
+
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "R600GenDAGISel.inc"
+};
+
+bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &MF.getSubtarget<R600Subtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+ if (!N->readMem())
+ return false;
+ if (CbId == -1)
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue &IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr =
+ CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg,
+ SDValue &Offset) {
+ if (!isa<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
+void R600DAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return; // Already selected.
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ case ISD::BUILD_VECTOR: {
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ unsigned RegClassID;
+ // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ switch (NumVectorElts) {
+ case 2:
+ RegClassID = R600::R600_Reg64RegClassID;
+ break;
+ case 4:
+ if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ RegClassID = R600::R600_Reg128VerticalRegClassID;
+ else
+ RegClassID = R600::R600_Reg128RegClassID;
+ break;
+ default:
+ llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+ }
+ SelectBuildVector(N, RegClassID);
+ return;
+ }
+ }
+
+ SelectCode(N);
+}
+
+bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+ SDLoc DL(Addr);
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ }
+
+ return true;
+}
+
+bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD &&
+ (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+ isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) &&
+ isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ SDLoc(CurDAG->getEntryNode()), R600::ZERO,
+ MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+}
+
+/// This pass converts a legalized DAG into a R600-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
+ CodeGenOpt::Level OptLevel) {
+ return new R600DAGToDAGISel(TM, OptLevel);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 002ef1801448..0215eb9f9fea 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -13,11 +13,12 @@
#include "R600ISelLowering.h"
#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -335,7 +336,9 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
//TODO: Ugh this is rather ugly
- MIB->getOperand(Idx) = MI.getOperand(1);
+ const MachineOperand &MO = MI.getOperand(1);
+ MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
+ MO.getTargetFlags());
break;
}
@@ -827,7 +830,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
- return Cst->isNullValue();
+ return Cst->isZero();
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
return CstFP->isZero();
} else {
@@ -923,7 +926,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
std::swap(LHS, RHS);
CC = DAG.getCondCode(CCSwapped);
} else {
- // Try inverting the conditon and then swapping the operands
+ // Try inverting the condition and then swapping the operands
ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
@@ -1564,7 +1567,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
}
bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
// Local and Private addresses do not handle vectors. Limit to i32
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
return (MemVT.getSizeInBits() <= 32);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 920cf3cd97ef..f9a9a6127322 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AMDGPU_R600ISELLOWERING_H
#include "AMDGPUISelLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -47,7 +48,7 @@ public:
EVT VT) const override;
bool canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const override;
+ const MachineFunction &MF) const override;
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, Align Alignment,
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 7a623f3e304e..a7ebf72315cb 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -13,7 +13,8 @@
#include "R600InstrInfo.h"
#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
#include "llvm/ADT/SmallSet.h"
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 1e249c6348f1..fc567f1a1fca 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -175,7 +175,7 @@ public:
int *BytesAdded = nullptr) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
- int *BytesRemvoed = nullptr) const override;
+ int *BytesRemoved = nullptr) const override;
bool isPredicated(const MachineInstr &MI) const override;
@@ -211,7 +211,7 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
- /// Reserve the registers that may be accesed using indirect addressing.
+ /// Reserve the registers that may be accessed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF,
const R600RegisterInfo &TRI) const;
@@ -220,7 +220,7 @@ public:
/// \p Channel
///
/// We model indirect addressing using a virtual address space that can be
- /// accesed with loads and stores. The "Indirect Address" is the memory
+ /// accessed with loads and stores. The "Indirect Address" is the memory
/// address in this virtual address space that maps to the given \p RegIndex
/// and \p Channel.
unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const;
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.td b/llvm/lib/Target/AMDGPU/R600InstrInfo.td
new file mode 100644
index 000000000000..92320748c497
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.td
@@ -0,0 +1,23 @@
+//===-- R600InstrInfo.td - R600 DAG nodes ------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node definitions for the R600 target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// R600 DAG Nodes
+//
+
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
+def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
+ [SDNPHasChain, SDNPSideEffect]>;
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index 055e2de59ea1..4487864888b6 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -74,8 +74,6 @@ def FRAMEri : Operand<iPTR> {
let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
}
-def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
-def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
@@ -212,16 +210,6 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
let Inst{63-32} = Word1;
}
-class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = VecALU> :
- InstR600 <(outs R600_Reg32:$dst),
- ins,
- asm,
- pattern,
- itin>;
-
-
-
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
@@ -815,7 +803,7 @@ def DUMMY_CHAIN : R600WrapperInst <
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
-class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <
+class MOV_IMM <Operand immType> : R600WrapperInst <
(outs R600_Reg32:$dst),
(ins immType:$imm),
"",
@@ -826,20 +814,20 @@ class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <
} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
-def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def MOV_IMM_I32 : MOV_IMM<i32imm>;
def : R600Pat <
(imm:$val),
(MOV_IMM_I32 imm:$val)
>;
-def MOV_IMM_GLOBAL_ADDR : MOV_IMM<iPTR, i32imm>;
+def MOV_IMM_GLOBAL_ADDR : MOV_IMM<i32imm>;
def : R600Pat <
(AMDGPUconstdata_ptr tglobaladdr:$addr),
(MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr)
>;
-def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def MOV_IMM_F32 : MOV_IMM<f32imm>;
def : R600Pat <
(fpimm:$val),
(MOV_IMM_F32 fpimm:$val)
@@ -1358,7 +1346,7 @@ let Predicates = [isR600] in {
//===----------------------------------------------------------------------===//
-// Regist loads and stores - for indirect addressing
+// Register loads and stores - for indirect addressing
//===----------------------------------------------------------------------===//
let Namespace = "R600" in {
diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
new file mode 100644
index 000000000000..8f7807a2b472
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
@@ -0,0 +1,73 @@
+//===- R600MCInstLower.cpp - Lower R600 MachineInstr to an MCInst ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Code to lower R600 MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "R600AsmPrinter.h"
+#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+class R600MCInstLower : public AMDGPUMCInstLower {
+public:
+ R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
+ const AsmPrinter &AP);
+
+ /// Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
+ const AsmPrinter &AP)
+ : AMDGPUMCInstLower(Ctx, ST, AP) {}
+
+void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+ for (const MachineOperand &MO : MI->explicit_operands()) {
+ MCOperand MCOp;
+ lowerOperand(MO, MCOp);
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
+ R600MCInstLower MCInstLowering(OutContext, STI, *this);
+
+ StringRef Err;
+ if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
+ LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
+ C.emitError("Illegal instruction detected: " + Err);
+ MI->print(errs());
+ }
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+ while (I != MBB->instr_end() && I->isInsideBundle()) {
+ emitInstruction(&*I);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ }
+}
+
+const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
+ if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
+ return E;
+ return AsmPrinter::lowerConstant(CV);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index f85a68706287..36acfafa72aa 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Subtarget.h"
using namespace llvm;
@@ -29,7 +29,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
- OccupedSlotsMask = 31;
+ OccupiedSlotsMask = 31;
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
InstKindLimit[IDOther] = 32;
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
@@ -138,7 +138,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (NextInstKind != CurInstKind) {
LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
if (NextInstKind != IDAlu)
- OccupedSlotsMask |= 31;
+ OccupiedSlotsMask |= 31;
CurEmitted = 0;
CurInstKind = NextInstKind;
}
@@ -339,10 +339,10 @@ void R600SchedStrategy::LoadAlu() {
void R600SchedStrategy::PrepareNextSlot() {
LLVM_DEBUG(dbgs() << "New Slot\n");
- assert (OccupedSlotsMask && "Slot wasn't filled");
- OccupedSlotsMask = 0;
-// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
-// OccupedSlotsMask |= 16;
+ assert(OccupiedSlotsMask && "Slot wasn't filled");
+ OccupiedSlotsMask = 0;
+ // if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+ // OccupiedSlotsMask |= 16;
InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -400,41 +400,41 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
SUnit* R600SchedStrategy::pickAlu() {
while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
- if (!OccupedSlotsMask) {
+ if (!OccupiedSlotsMask) {
// Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) {
- OccupedSlotsMask |= 31;
+ OccupiedSlotsMask |= 31;
return PopInst(AvailableAlus[AluPredX], false);
}
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
- OccupedSlotsMask |= 31;
+ OccupiedSlotsMask |= 31;
return PopInst(AvailableAlus[AluDiscarded], false);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
- OccupedSlotsMask |= 15;
+ OccupiedSlotsMask |= 15;
return PopInst(AvailableAlus[AluT_XYZW], false);
}
}
- bool TransSlotOccuped = OccupedSlotsMask & 16;
- if (!TransSlotOccuped && VLIW5) {
+ bool TransSlotOccupied = OccupiedSlotsMask & 16;
+ if (!TransSlotOccupied && VLIW5) {
if (!AvailableAlus[AluTrans].empty()) {
- OccupedSlotsMask |= 16;
+ OccupiedSlotsMask |= 16;
return PopInst(AvailableAlus[AluTrans], false);
}
SUnit *SU = AttemptFillSlot(3, true);
if (SU) {
- OccupedSlotsMask |= 16;
+ OccupiedSlotsMask |= 16;
return SU;
}
}
for (int Chan = 3; Chan > -1; --Chan) {
- bool isOccupied = OccupedSlotsMask & (1 << Chan);
+ bool isOccupied = OccupiedSlotsMask & (1 << Chan);
if (!isOccupied) {
SUnit *SU = AttemptFillSlot(Chan, false);
if (SU) {
- OccupedSlotsMask |= (1 << Chan);
+ OccupiedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate.push_back(SU->getInstr());
return SU;
}
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.h b/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
index abcc37f8400d..f3fd71d470ba 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -63,7 +63,7 @@ class R600SchedStrategy final : public MachineSchedStrategy {
int InstKindLimit[IDLast];
- int OccupedSlotsMask;
+ int OccupiedSlotsMask;
public:
R600SchedStrategy() = default;
diff --git a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index 8f1a069c232d..ac6a3581e255 100644
--- a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -24,7 +24,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
+#include "R600.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
@@ -86,7 +86,7 @@ GetFunctionFromMDNode(MDNode *Node) {
if (!F)
return nullptr;
- // Sanity checks.
+ // Validation checks.
size_t ExpectNumArgNodeOps = F->arg_size() + 1;
for (size_t i = 0; i < NumKernelArgMDNodes; ++i) {
MDNode *ArgNode = dyn_cast_or_null<MDNode>(Node->getOperand(i + 1));
diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 8f19a3e478e8..1a723279dc9f 100644
--- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -26,8 +26,8 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index eaac938b098a..e858bba2983c 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td
index fff884e4848e..8cf8edd1254f 100644
--- a/llvm/lib/Target/AMDGPU/R600Processors.td
+++ b/llvm/lib/Target/AMDGPU/R600Processors.td
@@ -45,11 +45,11 @@ class R600SubtargetFeatureGeneration <string Value, string FeatureName,
SubtargetFeatureGeneration <Value, FeatureName, "R600Subtarget", Implies>;
def FeatureR600 : R600SubtargetFeatureGeneration<"R600", "r600",
- [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+ [FeatureR600ALUInst, FeatureFetchLimit8]
>;
def FeatureR700 : R600SubtargetFeatureGeneration<"R700", "r700",
- [FeatureFetchLimit16, FeatureLocalMemorySize0]
+ [FeatureFetchLimit16]
>;
def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN", "evergreen",
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index e4f7d89bf4c9..99a1a8e9871a 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "R600RegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
new file mode 100644
index 000000000000..20c1ce7266dd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
@@ -0,0 +1,46 @@
+//===-- R600Subtarget.cpp - R600 Subtarget Information --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implements the R600 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Subtarget.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "r600-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "R600GenSubtargetInfo.inc"
+
+R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM)
+ : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ FMA(false), CaymanISA(false), CFALUBug(false), HasVertexCache(false),
+ R600ALUInst(false), FP64(false), TexVTXClauseSize(0), Gen(R600),
+ TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
+ InstrItins(getInstrItineraryForCPU(GPU)) {}
+
+R600Subtarget &R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU,
+ StringRef FS) {
+ SmallString<256> FullFS("+promote-alloca,");
+ FullFS += FS;
+ ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
+
+ HasMulU24 = getGeneration() >= EVERGREEN;
+ HasMulI24 = hasCaymanISA();
+
+ return *this;
+}
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index 07238da18c67..94403b88f21a 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -23,7 +23,6 @@
namespace llvm {
-class MCInst;
class MCInstrInfo;
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
new file mode 100644
index 000000000000..39dad45425fc
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
@@ -0,0 +1,143 @@
+//===-- R600TargetMachine.cpp - TargetMachine for hw codegen targets-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The AMDGPU-R600 target machine contains all of the hardware specific
+/// information needed to emit code for R600 GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600TargetMachine.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600.h"
+#include "R600MachineScheduler.h"
+#include "R600TargetTransformInfo.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ EnableR600StructurizeCFG("r600-ir-structurize",
+ cl::desc("Use StructurizeCFG IR pass"),
+ cl::init(true));
+
+static cl::opt<bool> EnableR600IfConvert("r600-if-convert",
+ cl::desc("Use if conversion pass"),
+ cl::ReallyHidden, cl::init(true));
+
+static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
+ "amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"),
+ cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true),
+ cl::Hidden);
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
+}
+
+static MachineSchedRegistry R600SchedRegistry("r600",
+ "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+ setRequiresStructuredCFG(true);
+
+ // Override the default since calls aren't supported for r600.
+ if (EnableFunctionCalls &&
+ EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
+ EnableFunctionCalls = false;
+}
+
+const TargetSubtargetInfo *
+R600TargetMachine::getSubtargetImpl(const Function &F) const {
+ StringRef GPU = getGPUName(F);
+ StringRef FS = getFeatureString(F);
+
+ SmallString<128> SubtargetKey(GPU);
+ SubtargetKey.append(FS);
+
+ auto &I = SubtargetMap[SubtargetKey];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
+ }
+
+ return I.get();
+}
+
+TargetTransformInfo
+R600TargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(R600TTIImpl(this, F));
+}
+
+class R600PassConfig final : public AMDGPUPassConfig {
+public:
+ R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
+ : AMDGPUPassConfig(TM, PM) {}
+
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ return createR600MachineScheduler(C);
+ }
+
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+
+//===----------------------------------------------------------------------===//
+// R600 Pass Setup
+//===----------------------------------------------------------------------===//
+
+bool R600PassConfig::addPreISel() {
+ AMDGPUPassConfig::addPreISel();
+
+ if (EnableR600StructurizeCFG)
+ addPass(createStructurizeCFGPass());
+ return false;
+}
+
+bool R600PassConfig::addInstSelector() {
+ addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
+ return false;
+}
+
+void R600PassConfig::addPreRegAlloc() { addPass(createR600VectorRegMerger()); }
+
+void R600PassConfig::addPreSched2() {
+ addPass(createR600EmitClauseMarkers());
+ if (EnableR600IfConvert)
+ addPass(&IfConverterID);
+ addPass(createR600ClauseMergePass());
+}
+
+void R600PassConfig::addPreEmitPass() {
+ addPass(createAMDGPUCFGStructurizerPass());
+ addPass(createR600ExpandSpecialInstrsPass());
+ addPass(&FinalizeMachineBundlesID);
+ addPass(createR600Packetizer());
+ addPass(createR600ControlFlowFinalizer());
+}
+
+TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new R600PassConfig(*this, PM);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
new file mode 100644
index 000000000000..0ccbca3c68b1
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
@@ -0,0 +1,48 @@
+//===-- R600TargetMachine.h - AMDGPU TargetMachine Interface ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
+#define LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
+
+#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+class R600TargetMachine final : public AMDGPUTargetMachine {
+private:
+ mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
+
+public:
+ R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, TargetOptions Options,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override;
+
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
+ bool isMachineVerifierClean() const override { return false; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
new file mode 100644
index 000000000000..365c005b2503
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -0,0 +1,142 @@
+//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// R600 target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600TargetTransformInfo.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "R600tti"
+
+R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
+
+unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
+ return getHardwareNumberOfRegisters(Vec);
+}
+
+TypeSize
+R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ return TypeSize::getFixed(32);
+}
+
+unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
+
+unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
+ return 128;
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
+ return 64;
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
+ return 32;
+
+ if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+ AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+ (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+ AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
+ return 128;
+ llvm_unreachable("unhandled address space");
+}
+
+bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ // We allow vectorization of flat stores, even though we may need to decompose
+ // them later if they may access private memory. We don't have enough context
+ // here, and legalization can handle it.
+ return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
+}
+
+bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ // Disable unrolling if the loop is not vectorized.
+ // TODO: Enable this again.
+ if (VF == 1)
+ return 1;
+
+ return 8;
+}
+
+InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+ return Opcode == Instruction::PHI ? 0 : 1;
+
+ // XXX - For some reason this isn't called for switch.
+ switch (Opcode) {
+ case Instruction::Br:
+ case Instruction::Ret:
+ return 10;
+ default:
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
+ }
+}
+
+InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ switch (Opcode) {
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement: {
+ unsigned EltSize =
+ DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
+ if (EltSize < 32) {
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+
+ // Extracts are just reads of a subregister, so are free. Inserts are
+ // considered free because we don't want to have any cost for scalarizing
+ // operations, and we don't have to copy into a different register class.
+
+ // Dynamic indexing isn't free and is best avoided.
+ return Index == ~0u ? 2 : 0;
+ }
+ default:
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+}
+
+void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
+}
+
+void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
new file mode 100644
index 000000000000..544292bc4fd9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
@@ -0,0 +1,69 @@
+//===- R600TargetTransformInfo.h - R600 specific TTI --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// R600 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H
+
+#include "AMDGPUTargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class R600Subtarget;
+class AMDGPUTargetLowering;
+
+class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
+ using BaseT = BasicTTIImplBase<R600TTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const R600Subtarget *ST;
+ const AMDGPUTargetLowering *TLI;
+ AMDGPUTTIImpl CommonTTI;
+
+public:
+ explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
+
+ const R600Subtarget *getST() const { return ST; }
+ const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+ unsigned getHardwareNumberOfRegisters(bool Vec) const;
+ unsigned getNumberOfRegisters(bool Vec) const;
+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
+ unsigned getMinVectorRegisterBitWidth() const;
+ unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
+ unsigned AddrSpace) const;
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index d3c0d792804d..777744f08cde 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -7,13 +7,20 @@
/// \file
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCInstrDesc.h"
-
#ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
#define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
+#include "llvm/MC/MCInstrDesc.h"
+
namespace llvm {
+// This needs to be kept in sync with the field bits in SIRegisterClass.
+enum SIRCFlags : uint8_t {
+ // For vector registers.
+ HasVGPR = 1 << 0,
+ HasAGPR = 1 << 1
+}; // enum SIRCFlags
+
namespace SIInstrFlags {
// This needs to be kept in sync with the field bits in InstSI.
enum : uint64_t {
@@ -132,64 +139,67 @@ enum ClassFlags : unsigned {
}
namespace AMDGPU {
- enum OperandType : unsigned {
- /// Operands with register or 32-bit immediate
- OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_REG_IMM_INT64,
- OPERAND_REG_IMM_INT16,
- OPERAND_REG_IMM_FP32,
- OPERAND_REG_IMM_FP64,
- OPERAND_REG_IMM_FP16,
- OPERAND_REG_IMM_V2FP16,
- OPERAND_REG_IMM_V2INT16,
- OPERAND_REG_IMM_V2INT32,
- OPERAND_REG_IMM_V2FP32,
-
- /// Operands with register or inline constant
- OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_INT32,
- OPERAND_REG_INLINE_C_INT64,
- OPERAND_REG_INLINE_C_FP16,
- OPERAND_REG_INLINE_C_FP32,
- OPERAND_REG_INLINE_C_FP64,
- OPERAND_REG_INLINE_C_V2INT16,
- OPERAND_REG_INLINE_C_V2FP16,
- OPERAND_REG_INLINE_C_V2INT32,
- OPERAND_REG_INLINE_C_V2FP32,
-
- /// Operands with an AccVGPR register or inline constant
- OPERAND_REG_INLINE_AC_INT16,
- OPERAND_REG_INLINE_AC_INT32,
- OPERAND_REG_INLINE_AC_FP16,
- OPERAND_REG_INLINE_AC_FP32,
- OPERAND_REG_INLINE_AC_FP64,
- OPERAND_REG_INLINE_AC_V2INT16,
- OPERAND_REG_INLINE_AC_V2FP16,
- OPERAND_REG_INLINE_AC_V2INT32,
- OPERAND_REG_INLINE_AC_V2FP32,
-
- OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
- OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
-
- OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
- OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
- OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
- OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
- OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
-
- // Operand for source modifiers for VOP instructions
- OPERAND_INPUT_MODS,
-
- // Operand for SDWA instructions
- OPERAND_SDWA_VOPC_DST,
-
- /// Operand with 32-bit immediate that uses the constant bus.
- OPERAND_KIMM32,
- OPERAND_KIMM16
- };
+enum OperandType : unsigned {
+ /// Operands with register or 32-bit immediate
+ OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_REG_IMM_INT64,
+ OPERAND_REG_IMM_INT16,
+ OPERAND_REG_IMM_FP32,
+ OPERAND_REG_IMM_FP64,
+ OPERAND_REG_IMM_FP16,
+ OPERAND_REG_IMM_FP16_DEFERRED,
+ OPERAND_REG_IMM_FP32_DEFERRED,
+ OPERAND_REG_IMM_V2FP16,
+ OPERAND_REG_IMM_V2INT16,
+ OPERAND_REG_IMM_V2INT32,
+ OPERAND_REG_IMM_V2FP32,
+
+ /// Operands with register or inline constant
+ OPERAND_REG_INLINE_C_INT16,
+ OPERAND_REG_INLINE_C_INT32,
+ OPERAND_REG_INLINE_C_INT64,
+ OPERAND_REG_INLINE_C_FP16,
+ OPERAND_REG_INLINE_C_FP32,
+ OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_V2INT16,
+ OPERAND_REG_INLINE_C_V2FP16,
+ OPERAND_REG_INLINE_C_V2INT32,
+ OPERAND_REG_INLINE_C_V2FP32,
+
+ /// Operand with 32-bit immediate that uses the constant bus.
+ OPERAND_KIMM32,
+ OPERAND_KIMM16,
+
+ /// Operands with an AccVGPR register or inline constant
+ OPERAND_REG_INLINE_AC_INT16,
+ OPERAND_REG_INLINE_AC_INT32,
+ OPERAND_REG_INLINE_AC_FP16,
+ OPERAND_REG_INLINE_AC_FP32,
+ OPERAND_REG_INLINE_AC_FP64,
+ OPERAND_REG_INLINE_AC_V2INT16,
+ OPERAND_REG_INLINE_AC_V2FP16,
+ OPERAND_REG_INLINE_AC_V2INT32,
+ OPERAND_REG_INLINE_AC_V2FP32,
+
+ OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+ OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
+
+ OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+ OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+ OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+ OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+ OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+
+ // Operand for source modifiers for VOP instructions
+ OPERAND_INPUT_MODS,
+
+ // Operand for SDWA instructions
+ OPERAND_SDWA_VOPC_DST
+
+};
}
// Input operand modifiers bit-masks
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index d5c56bf2a321..cf93a63f26a0 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -585,10 +585,43 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SOFT_WQM:
case AMDGPU::STRICT_WWM: {
Register DstReg = MI.getOperand(0).getReg();
-
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
+ if (MI.isCopy()) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (SrcReg == AMDGPU::SCC) {
+ Register SCCCopy = MRI->createVirtualRegister(
+ TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
+ I = BuildMI(*MI.getParent(),
+ std::next(MachineBasicBlock::iterator(MI)),
+ MI.getDebugLoc(),
+ TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64),
+ SCCCopy)
+ .addImm(-1)
+ .addImm(0);
+ I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
+ TII->get(AMDGPU::COPY), DstReg)
+ .addReg(SCCCopy);
+ MI.eraseFromParent();
+ continue;
+ } else if (DstReg == AMDGPU::SCC) {
+ unsigned Opcode =
+ ST.isWave64() ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+ Register Exec = ST.isWave64() ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+ Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
+ I = BuildMI(*MI.getParent(),
+ std::next(MachineBasicBlock::iterator(MI)),
+ MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(Tmp, getDefRegState(true))
+ .addReg(SrcReg)
+ .addReg(Exec);
+ MI.eraseFromParent();
+ continue;
+ }
+ }
+
if (!DstReg.isVirtual()) {
// If the destination register is a physical register there isn't
// really much we can do to fix this.
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index ad910522ba90..a3a0e9c9b9ac 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -228,7 +228,7 @@ static bool updateOperand(FoldCandidate &Fold,
MachineOperand &Mod = MI->getOperand(ModIdx);
unsigned Val = Mod.getImm();
if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
- // Only apply the following transformation if that operand requries
+ // Only apply the following transformation if that operand requires
// a packed immediate.
switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -452,7 +452,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
const SIRegisterInfo &SRI = TII->getRegisterInfo();
// Fine if the operand can be encoded as an inline constant
- if (OpToFold->isImm()) {
+ if (TII->isLiteralConstantLike(*OpToFold, OpInfo)) {
if (!SRI.opCanUseInlineConstant(OpInfo.OperandType) ||
!TII->isInlineConstant(*OpToFold, OpInfo)) {
// Otherwise check for another constant
@@ -646,7 +646,7 @@ void SIFoldOperands::foldOperand(
return;
if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
- // Sanity check that this is a stack access.
+ // Verify that this is a stack access.
// FIXME: Should probably use stack pseudos before frame lowering.
if (TII->isMUBUF(*UseMI)) {
@@ -688,7 +688,7 @@ void SIFoldOperands::foldOperand(
// Don't fold into a copy to a physical register with the same class. Doing
// so would interfere with the register coalescer's logic which would avoid
- // redundant initalizations.
+ // redundant initializations.
if (DestReg.isPhysical() && SrcRC->contains(DestReg))
return;
@@ -902,7 +902,7 @@ void SIFoldOperands::foldOperand(
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
// FIXME: We could try to change the instruction from 64-bit to 32-bit
- // to enable more folding opportunites. The shrink operands pass
+ // to enable more folding opportunities. The shrink operands pass
// already does this.
return;
}
@@ -1388,6 +1388,13 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
DefClamp->setImm(1);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
MI.eraseFromParent();
+
+ // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
+ // instruction, so we might as well convert it to the more flexible VOP3-only
+ // mad/fma form.
+ if (TII->convertToThreeAddress(*Def, nullptr, nullptr))
+ Def->eraseFromParent();
+
return true;
}
@@ -1526,6 +1533,13 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
MI.eraseFromParent();
+
+ // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
+ // instruction, so we might as well convert it to the more flexible VOP3-only
+ // mad/fma form.
+ if (TII->convertToThreeAddress(*Def, nullptr, nullptr))
+ Def->eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index c9883d38e08c..882b9a203755 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -125,8 +125,8 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, Register SpillReg,
- int FI) {
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ Register SpillReg, int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
@@ -136,7 +136,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
LiveRegs.addReg(SpillReg);
- TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
+ TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
LiveRegs.removeReg(SpillReg);
@@ -147,8 +147,8 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, Register SpillReg,
- int FI) {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, Register SpillReg, int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
@@ -157,7 +157,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
- TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
+ TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
}
@@ -258,9 +258,10 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
// Mask the offset in [47:0] of the descriptor
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
- BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
+ auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0xffff);
+ And->getOperand(3).setIsDead(); // Mark SCC as dead.
} else {
Register FlatScratchInitReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
@@ -280,9 +281,12 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+ auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
+ FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0);
+ Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
+
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
addReg(FlatScrInitLo).
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
@@ -298,9 +302,11 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
+ auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
+ AMDGPU::FLAT_SCR_HI)
.addReg(FlatScrInitHi)
.addImm(0);
+ Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
return;
}
@@ -318,9 +324,11 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
.addReg(ScratchWaveOffsetReg);
// Convert offset to 256-byte units.
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
+ auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
+ AMDGPU::FLAT_SCR_HI)
.addReg(FlatScrInitLo, RegState::Kill)
.addImm(8);
+ LShr->getOperand(3).setIsDead(true); // Mark SCC as dead.
}
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
@@ -419,9 +427,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- // FIXME: Hack to not crash in situations which emitted an error.
- if (!PreloadedScratchWaveOffsetReg)
- return;
// We need to do the replacement of the private segment buffer register even
// if there are no stack objects. There could be stores to undef or a
@@ -467,7 +472,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
// wave offset to a free SGPR.
Register ScratchWaveOffsetReg;
- if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
+ if (PreloadedScratchWaveOffsetReg &&
+ TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
AllSGPRs = AllSGPRs.slice(
@@ -485,7 +491,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
} else {
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
}
- assert(ScratchWaveOffsetReg);
+ assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
@@ -506,7 +512,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
- !ST.flatScratchIsArchitected()) {
+ PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
}
@@ -660,10 +666,11 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
.addReg(ScratchRsrcSub0)
.addReg(ScratchWaveOffsetReg)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
+ auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
.addReg(ScratchRsrcSub1)
.addImm(0)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
}
bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
@@ -720,7 +727,9 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const unsigned OrSaveExec =
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
- BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
+ auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
+ .addImm(-1);
+ SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
return ScratchExecCopy;
}
@@ -776,7 +785,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
/*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
*Reg.FI);
}
@@ -791,7 +800,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
+ *FI);
}
if (ScratchExecCopy) {
@@ -817,7 +827,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(FramePtrReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
FramePtrFI);
}
@@ -835,7 +845,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(BasePtrReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
BasePtrFI);
}
@@ -927,10 +937,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(StackPtrReg)
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
+ auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
.addReg(FramePtrReg, RegState::Kill)
.addImm(-Alignment * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
+ And->getOperand(3).setIsDead(); // Mark SCC as dead.
FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
@@ -949,18 +960,22 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
if (HasFP && RoundedSize != 0) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
+ auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere");
+ // If we allow spilling to AGPRs we may have saved FP but then spill
+ // everything into AGPRs instead of the stack.
assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
- !FuncInfo->FramePointerSaveIndex)) &&
+ !FuncInfo->FramePointerSaveIndex) ||
+ EnableSpillVGPRToAGPR) &&
"Saved FP but didn't need it");
assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
@@ -1000,10 +1015,11 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
if (RoundedSize != 0 && hasFP(MF)) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
+ auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
.setMIFlag(MachineInstr::FrameDestroy);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
@@ -1028,8 +1044,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
- FramePtrFI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ TmpVGPR, FramePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
.addReg(TmpVGPR, RegState::Kill);
} else {
@@ -1054,8 +1070,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
- BasePtrFI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ TmpVGPR, BasePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
.addReg(TmpVGPR, RegState::Kill);
} else {
@@ -1080,8 +1096,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
- *Reg.FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ Reg.VGPR, *Reg.FI);
}
for (const auto &Reg : FuncInfo->WWMReservedRegs) {
@@ -1094,7 +1110,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
+ *FI);
}
if (ScratchExecCopy) {
@@ -1154,11 +1171,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
bool SeenDbgInstr = false;
for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (MI.isDebugInstr())
SeenDbgInstr = true;
@@ -1199,7 +1212,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
SpillFIs[MI.getOperand(0).getIndex()]) {
MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
- MI.getOperand(0).setIsDebug();
}
}
}
@@ -1301,10 +1313,13 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
// If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
- // We have to anticipate introducing CSR VGPR spills if we don't have any
- // stack objects already, since we require an FP if there is a call and stack.
+ // We have to anticipate introducing CSR VGPR spills or spill of caller
+ // save VGPR reserved for SGPR spills as we now always create stack entry
+ // for it, if we don't have any stack objects already, since we require
+ // an FP if there is a call and stack.
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
+ const bool WillHaveFP =
+ FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
// FP will be specially managed like SP.
if (WillHaveFP || hasFP(MF))
@@ -1373,9 +1388,10 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
Amount *= getScratchScaleFactor(ST);
if (IsDestroy)
Amount = -Amount;
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
+ auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
.addReg(SPReg)
.addImm(Amount);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
} else if (CalleePopAmount != 0) {
llvm_unreachable("is this used?");
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d98acfc6c532..519c5b936536 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -19,10 +19,12 @@
#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -465,11 +467,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- if (Subtarget->hasFFBH())
+ if (Subtarget->hasFFBH()) {
+ setOperationAction(ISD::CTLZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+ }
- if (Subtarget->hasFFBL())
+ if (Subtarget->hasFFBL()) {
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+ }
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
@@ -1061,7 +1067,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
(Intrinsic::ID)IntrID);
- if (Attr.hasFnAttribute(Attribute::ReadNone))
+ if (Attr.hasFnAttr(Attribute::ReadNone))
return false;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -1076,7 +1082,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
Info.flags = MachineMemOperand::MODereferenceable;
- if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
+ if (Attr.hasFnAttr(Attribute::ReadOnly)) {
unsigned DMaskLanes = 4;
if (RsrcIntr->IsImage) {
@@ -1100,7 +1106,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// FIXME: What does alignment mean for an image?
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.flags |= MachineMemOperand::MOLoad;
- } else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
+ } else if (Attr.hasFnAttr(Attribute::WriteOnly)) {
Info.opc = ISD::INTRINSIC_VOID;
Type *DataTy = CI.getArgOperand(0)->getType();
@@ -1423,7 +1429,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
@@ -1657,12 +1663,17 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
const ArgDescriptor *InputPtrReg;
const TargetRegisterClass *RC;
LLT ArgTy;
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
std::tie(InputPtrReg, RC, ArgTy) =
Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ // We may not have the kernarg segment argument if we have no kernel
+ // arguments.
+ if (!InputPtrReg)
+ return DAG.getConstant(0, SL, PtrVT);
+
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
@@ -1808,6 +1819,19 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
LLT Ty;
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
+ if (!Reg) {
+ if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
+ // It's possible for a kernarg intrinsic call to appear in a kernel with
+ // no allocated segment, in which case we do not add the user sgpr
+ // argument, so just return null.
+ return DAG.getConstant(0, SDLoc(), VT);
+ }
+
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ return DAG.getUNDEF(VT);
+ }
+
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
}
@@ -2023,31 +2047,33 @@ void SITargetLowering::allocateSpecialInputSGPRs(
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
- // TODO: Unify handling with private memory pointers.
+ // We need to allocate these in place regardless of their use.
+ const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;
- if (Info.hasDispatchPtr())
+ // TODO: Unify handling with private memory pointers.
+ if (IsFixed || Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (Info.hasQueuePtr())
+ if (IsFixed || Info.hasQueuePtr())
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
- if (Info.hasImplicitArgPtr())
+ if (IsFixed || Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (Info.hasDispatchID())
+ if (IsFixed || Info.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
- if (Info.hasWorkGroupIDX())
+ if (IsFixed || Info.hasWorkGroupIDX())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
- if (Info.hasWorkGroupIDY())
+ if (IsFixed || Info.hasWorkGroupIDY())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
- if (Info.hasWorkGroupIDZ())
+ if (IsFixed || Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
}
@@ -2590,9 +2616,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue ReturnAddrReg = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
- SDValue ReturnAddrVirtualReg = DAG.getRegister(
- MF.getRegInfo().createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass),
- MVT::i64);
+ SDValue ReturnAddrVirtualReg =
+ DAG.getRegister(MF.getRegInfo().createVirtualRegister(
+ CallConv != CallingConv::AMDGPU_Gfx
+ ? &AMDGPU::CCR_SGPR_64RegClass
+ : &AMDGPU::Gfx_CCR_SGPR_64RegClass),
+ MVT::i64);
Chain =
DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
Flag = Chain.getValue(1);
@@ -2655,8 +2684,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(Flag);
unsigned Opc = AMDGPUISD::ENDPGM;
- if (!IsWaveEnd)
- Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
+ if (!IsWaveEnd) {
+ if (IsShader)
+ Opc = AMDGPUISD::RETURN_TO_EPILOG;
+ else if (CallConv == CallingConv::AMDGPU_Gfx)
+ Opc = AMDGPUISD::RET_GFX_FLAG;
+ else
+ Opc = AMDGPUISD::RET_FLAG;
+ }
+
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
@@ -2747,21 +2783,28 @@ void SITargetLowering::passSpecialInputs(
// TODO: Unify with private memory register handling. This is complicated by
// the fact that at least in kernels, the input argument is not necessarily
// in the same location as the input.
- AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
- AMDGPUFunctionArgInfo::DISPATCH_PTR,
- AMDGPUFunctionArgInfo::QUEUE_PTR,
- AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
- AMDGPUFunctionArgInfo::DISPATCH_ID,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+ static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
+ StringLiteral> ImplicitAttrs[] = {
+ {AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
+ {AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
+ {AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
+ {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}
};
- for (auto InputID : InputRegs) {
+ for (auto Attr : ImplicitAttrs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
+ AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;
+
+ // If the callee does not use the attribute value, skip copying the value.
+ if (CLI.CB->hasFnAttr(Attr.second))
+ continue;
+
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
@@ -2780,11 +2823,14 @@ void SITargetLowering::passSpecialInputs(
if (IncomingArg) {
InputReg = loadInputValue(DAG, ArgRC, ArgVT, DL, *IncomingArg);
- } else {
+ } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
// The implicit arg ptr is special because it doesn't have a corresponding
// input for kernels, and is computed from the kernarg segment pointer.
- assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
InputReg = getImplicitArgPtr(DAG, DL);
+ } else {
+ // We may have proven the input wasn't needed, although the ABI is
+ // requiring it. We just need to allocate the register appropriately.
+ InputReg = DAG.getUNDEF(ArgVT);
}
if (OutgoingArg->isRegister()) {
@@ -2827,11 +2873,17 @@ void SITargetLowering::passSpecialInputs(
SDValue InputReg;
SDLoc SL;
+ const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+ const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+ const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
// If incoming ids are not packed we need to pack them.
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX)
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+ NeedWorkItemIDX)
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+ NeedWorkItemIDY) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
DAG.getShiftAmountConstant(10, MVT::i32, SL));
@@ -2839,7 +2891,8 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+ NeedWorkItemIDZ) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2847,7 +2900,7 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
}
- if (!InputReg.getNode()) {
+ if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
// Workitem ids are already packed, any of present incoming arguments
// will carry all required fields.
ArgDescriptor IncomingArg = ArgDescriptor::createArg(
@@ -2858,13 +2911,17 @@ void SITargetLowering::passSpecialInputs(
}
if (OutgoingArg->isRegister()) {
- RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (InputReg)
+ RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+
CCInfo.AllocateReg(OutgoingArg->getRegister());
} else {
unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
- SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
- SpecialArgOffset);
- MemOpChains.push_back(ArgStore);
+ if (InputReg) {
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+ SpecialArgOffset);
+ MemOpChains.push_back(ArgStore);
+ }
}
}
@@ -4091,7 +4148,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
- if (TRI->getRegSizeInBits(*Src2RC) == 64) {
+ unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
+ assert(WaveSize == 64 || WaveSize == 32);
+
+ if (WaveSize == 64) {
if (ST.hasScalarCompareEq64()) {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
.addReg(Src2.getReg())
@@ -4121,8 +4181,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::COPY), CarryDest.getReg())
- .addReg(AMDGPU::SCC);
+ unsigned SelOpc =
+ (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+
+ BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
+ .addImm(-1)
+ .addImm(0);
+
MI.eraseFromParent();
return BB;
}
@@ -4261,6 +4326,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::V_ADDC_U32_e32:
+ case AMDGPU::V_SUBB_U32_e32:
+ case AMDGPU::V_SUBBREV_U32_e32:
+ // These instructions have an implicit use of vcc which counts towards the
+ // constant bus limit.
+ TII->legalizeOperands(MI);
+ return BB;
case AMDGPU::DS_GWS_INIT:
case AMDGPU::DS_GWS_SEMA_BR:
case AMDGPU::DS_GWS_BARRIER:
@@ -4818,7 +4890,7 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
- if (Arg->isNullValue())
+ if (Arg->isZero())
return DAG.getConstant(0, SL, VT);
// (ballot 1) -> EXEC/EXEC_LO
@@ -5266,9 +5338,18 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr(
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
- SDValue QueuePtr = CreateLiveInRegister(
- DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+
+ SDValue QueuePtr;
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap,
+ // so just use a null pointer.
+ QueuePtr = DAG.getConstant(0, SL, MVT::i64);
+ } else {
+ QueuePtr = CreateLiveInRegister(
+ DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+ }
+
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
@@ -5345,7 +5426,11 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined.
+ return DAG.getUNDEF(MVT::i32);
+ }
SDValue QueuePtr = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
@@ -5936,6 +6021,9 @@ static SDValue constructRetValue(SelectionDAG &DAG,
EVT LegalReqRetVT = ReqRetVT;
if (!ReqRetVT.isVector()) {
+ if (!Data.getValueType().isInteger())
+ Data = DAG.getNode(ISD::BITCAST, DL,
+ Data.getValueType().changeTypeToInteger(), Data);
Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data);
} else {
// We need to widen the return vector to a legal type
@@ -6124,7 +6212,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (MIPMappingInfo) {
if (auto *ConstantLod = dyn_cast<ConstantSDNode>(
Op.getOperand(ArgOffset + Intr->MipIndex))) {
- if (ConstantLod->isNullValue()) {
+ if (ConstantLod->isZero()) {
IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
VAddrEnd--; // remove 'mip'
}
@@ -6659,7 +6747,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// intrinsic has the numerator as the first operand to match a normal
// division operation.
- SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+ SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
Denominator, Numerator);
@@ -6793,7 +6881,7 @@ static void updateBufferMMO(MachineMemOperand *MMO, SDValue VOffset,
}
if (VIndex && (!isa<ConstantSDNode>(VIndex) ||
- !cast<ConstantSDNode>(VIndex)->isNullValue())) {
+ !cast<ConstantSDNode>(VIndex)->isZero())) {
// The strided index component of the address is not known to be zero, so we
// cannot represent it in the MMO. Give up.
MMO->setValue((Value *)nullptr);
@@ -7341,7 +7429,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
- SDLoc DL(Op);
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
@@ -7360,12 +7447,27 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
- bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
- bool Is64 = NodePtr.getValueType() == MVT::i64;
- unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
- : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
+ const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
+ const bool Is64 = NodePtr.getValueType() == MVT::i64;
+ const unsigned NumVDataDwords = 4;
+ const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
+ const bool UseNSA = Subtarget->hasNSAEncoding() &&
+ NumVAddrDwords <= Subtarget->getNSAMaxSize();
+ const unsigned BaseOpcodes[2][2] = {
+ {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ int Opcode;
+ if (UseNSA) {
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ AMDGPU::MIMGEncGfx10NSA, NumVDataDwords,
+ NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(
+ BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
+ PowerOf2Ceil(NumVAddrDwords));
+ }
+ assert(Opcode != -1);
SmallVector<SDValue, 16> Ops;
@@ -7405,6 +7507,20 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
packLanes(RayOrigin, true);
packLanes(RayDir, true);
packLanes(RayInvDir, false);
+
+ if (!UseNSA) {
+ // Build a single vector containing all the operands so far prepared.
+ if (NumVAddrDwords > 8) {
+ SDValue Undef = DAG.getUNDEF(MVT::i32);
+ Ops.append(16 - Ops.size(), Undef);
+ }
+ assert(Ops.size() == 8 || Ops.size() == 16);
+ SDValue MergedOps = DAG.getBuildVector(
+ Ops.size() == 16 ? MVT::v16i32 : MVT::v8i32, DL, Ops);
+ Ops.clear();
+ Ops.push_back(MergedOps);
+ }
+
Ops.push_back(TDescr);
if (IsA16)
Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
@@ -7610,7 +7726,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(0) // Chain
};
- unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
+ unsigned Opc = Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
@@ -8241,6 +8357,16 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Cond = Op.getOperand(0);
+ if (Subtarget->hasScalarCompareEq64() && Op->getOperand(0)->hasOneUse() &&
+ !Op->isDivergent()) {
+ if (VT == MVT::i64)
+ return Op;
+ SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(2));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getSelect(DL, MVT::i64, Cond, LHS, RHS));
+ }
+
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
@@ -9358,7 +9484,8 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (CRHS) {
if (SDValue Split
- = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR, LHS, CRHS))
+ = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR,
+ N->getOperand(0), CRHS))
return Split;
}
@@ -9445,7 +9572,7 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
// fp_class x, 0 -> false
if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
- if (CMask->isNullValue())
+ if (CMask->isZero())
return DAG.getConstant(0, SDLoc(N), MVT::i1);
}
@@ -10348,7 +10475,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
}
- if (numBitsSigned(MulLHS, DAG) < 32 && numBitsSigned(MulRHS, DAG) < 32) {
+ if (numBitsSigned(MulLHS, DAG) <= 32 && numBitsSigned(MulRHS, DAG) <= 32) {
MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
@@ -10434,7 +10561,7 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
if (LHS.getOpcode() == ISD::SUBCARRY) {
// sub (subcarry x, 0, cc), y => subcarry x, y, cc
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- if (!C || !C->isNullValue())
+ if (!C || !C->isZero())
return SDValue();
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
@@ -10657,20 +10784,20 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
// setcc (sext from i1 cc), -1, eq|sle|uge) => cc
// setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
// setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
- if ((CRHS->isAllOnesValue() &&
+ if ((CRHS->isAllOnes() &&
(CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
- (CRHS->isNullValue() &&
+ (CRHS->isZero() &&
(CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(-1, SL, MVT::i1));
- if ((CRHS->isAllOnesValue() &&
+ if ((CRHS->isAllOnes() &&
(CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
- (CRHS->isNullValue() &&
+ (CRHS->isZero() &&
(CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
return LHS.getOperand(0);
}
- uint64_t CRHSVal = CRHS->getZExtValue();
+ const APInt &CRHSVal = CRHS->getAPIntValue();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
LHS.getOpcode() == ISD::SELECT &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
@@ -10682,8 +10809,8 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
// setcc (select cc, CT, CF), CF, ne => cc
// setcc (select cc, CT, CF), CT, ne => xor cc, -1
// setcc (select cc, CT, CF), CT, eq => cc
- uint64_t CT = LHS.getConstantOperandVal(1);
- uint64_t CF = LHS.getConstantOperandVal(2);
+ const APInt &CT = LHS.getConstantOperandAPInt(1);
+ const APInt &CF = LHS.getConstantOperandAPInt(2);
if ((CF == CRHSVal && CC == ISD::SETEQ) ||
(CT == CRHSVal && CC == ISD::SETNE))
@@ -10747,7 +10874,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
// cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
if (auto *C = dyn_cast<ConstantSDNode>(Shift.getOperand(1))) {
- Shift = DAG.getZExtOrTrunc(Shift.getOperand(0),
+ SDValue Shifted = DAG.getZExtOrTrunc(Shift.getOperand(0),
SDLoc(Shift.getOperand(0)), MVT::i32);
unsigned ShiftOffset = 8 * Offset;
@@ -10758,7 +10885,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + ShiftOffset / 8, SL,
- MVT::f32, Shift);
+ MVT::f32, Shifted);
}
}
}
@@ -12086,6 +12213,25 @@ static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+
+ auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
+ OptimizationRemarkEmitter ORE(RMW->getFunction());
+ LLVMContext &Ctx = RMW->getFunction()->getContext();
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[RMW->getSyncScopeID()];
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
+ << "Hardware instruction generated for atomic "
+ << RMW->getOperationName(RMW->getOperation())
+ << " operation at memory scope " << MemScope
+ << " due to an unsafe request.";
+ });
+ return Kind;
+ };
+
switch (RMW->getOperation()) {
case AtomicRMWInst::FAdd: {
Type *Ty = RMW->getType();
@@ -12120,28 +12266,30 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
return AtomicExpansionKind::CmpXChg;
- return AtomicExpansionKind::None;
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
}
if (AS == AMDGPUAS::FLAT_ADDRESS)
return AtomicExpansionKind::CmpXChg;
- return RMW->use_empty() ? AtomicExpansionKind::None
+ return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
// DS FP atomics do repect the denormal mode, but the rounding mode is fixed
// to round-to-nearest-even.
// The only exception is DS_ADD_F64 which never flushes regardless of mode.
- if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) {
+ if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
if (!Ty->isDoubleTy())
return AtomicExpansionKind::None;
- return (fpModeMatchesGlobalFPAtomicMode(RMW) ||
- RMW->getFunction()
- ->getFnAttribute("amdgpu-unsafe-fp-atomics")
- .getValueAsString() == "true")
- ? AtomicExpansionKind::None
+ if (fpModeMatchesGlobalFPAtomicMode(RMW))
+ return AtomicExpansionKind::None;
+
+ return RMW->getFunction()
+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+ .getValueAsString() == "true"
+ ? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f3d34267a81d..1e48c96ad3c8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,6 +16,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -267,7 +268,7 @@ public:
Instruction *I = nullptr) const override;
bool canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const override;
+ const MachineFunction &MF) const override;
bool allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 7ba20eb6027b..125f006a1d1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -58,6 +58,8 @@ enum HardClauseType {
// Internal instructions, which are allowed in the middle of a hard clause,
// except for s_waitcnt.
HARDCLAUSE_INTERNAL,
+ // Meta instructions that do not result in any ISA like KILL.
+ HARDCLAUSE_IGNORE,
// Instructions that are not allowed in a hard clause: SALU, export, branch,
// message, GDS, s_waitcnt and anything else not mentioned above.
HARDCLAUSE_ILLEGAL,
@@ -100,6 +102,8 @@ public:
// It's safe to treat the rest as illegal.
if (MI.getOpcode() == AMDGPU::S_NOP)
return HARDCLAUSE_INTERNAL;
+ if (MI.isMetaInstruction())
+ return HARDCLAUSE_IGNORE;
return HARDCLAUSE_ILLEGAL;
}
@@ -112,25 +116,25 @@ public:
// The last non-internal instruction in the clause.
MachineInstr *Last = nullptr;
// The length of the clause including any internal instructions in the
- // middle or after the end of the clause.
+ // middle (but not at the end) of the clause.
unsigned Length = 0;
+ // Internal instructions at the and of a clause should not be included in
+ // the clause. Count them in TrailingInternalLength until a new memory
+ // instruction is added.
+ unsigned TrailingInternalLength = 0;
// The base operands of *Last.
SmallVector<const MachineOperand *, 4> BaseOps;
};
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
- // Get the size of the clause excluding any internal instructions at the
- // end.
- unsigned Size =
- std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
- if (Size < 2)
+ if (CI.First == CI.Last)
return false;
- assert(Size <= 64 && "Hard clause is too long!");
+ assert(CI.Length <= 64 && "Hard clause is too long!");
auto &MBB = *CI.First->getParent();
auto ClauseMI =
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
- .addImm(Size - 1);
+ .addImm(CI.Length - 1);
finalizeBundle(MBB, ClauseMI->getIterator(),
std::next(CI.Last->getIterator()));
return true;
@@ -168,6 +172,7 @@ public:
if (CI.Length == 64 ||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
+ Type != HARDCLAUSE_IGNORE &&
(Type != CI.Type ||
// Note that we lie to shouldClusterMemOps about the size of the
// cluster. When shouldClusterMemOps is called from the machine
@@ -182,14 +187,20 @@ public:
if (CI.Length) {
// Extend the current clause.
- ++CI.Length;
- if (Type != HARDCLAUSE_INTERNAL) {
- CI.Last = &MI;
- CI.BaseOps = std::move(BaseOps);
+ if (Type != HARDCLAUSE_IGNORE) {
+ if (Type == HARDCLAUSE_INTERNAL) {
+ ++CI.TrailingInternalLength;
+ } else {
+ ++CI.Length;
+ CI.Length += CI.TrailingInternalLength;
+ CI.TrailingInternalLength = 0;
+ CI.Last = &MI;
+ CI.BaseOps = std::move(BaseOps);
+ }
}
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
// Start a new clause.
- CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
+ CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 7d6f79922d2e..f4e5771d2a2a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -73,7 +73,7 @@ public:
// Class of object that encapsulates latest instruction counter score
// associated with the operand. Used for determining whether
-// s_waitcnt instruction needs to be emited.
+// s_waitcnt instruction needs to be emitted.
#define CNT_MASK(t) (1u << (t))
@@ -963,6 +963,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+ MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
@@ -1686,17 +1687,13 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
bool HaveScalarStores = false;
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
- ++BI) {
- MachineBasicBlock &MBB = *BI;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
- ++I) {
- if (!HaveScalarStores && TII->isScalarStore(*I))
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!HaveScalarStores && TII->isScalarStore(MI))
HaveScalarStores = true;
- if (I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+ if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
EndPgmBlocks.push_back(&MBB);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7ab0f7a100c5..4a928123b68f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -19,8 +19,10 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -108,7 +110,7 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const {
- if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) {
+ if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
// Normally VALU use of exec would block the rematerialization, but that
// is OK in this case to have an implicit exec read as all VALU do.
// We really want all of the generic logic for this except for this.
@@ -116,6 +118,10 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
// Another potential implicit use is mode register. The core logic of
// the RA will not attempt rematerialization if mode is set anywhere
// in the function, otherwise it is safe since mode is not changed.
+
+ // There is difference to generic method which does not allow
+ // rematerialization if there are virtual register uses. We allow this,
+ // therefore this method includes SOP instructions as well.
return !MI.hasImplicitDef() &&
MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() &&
!MI.mayRaiseFPException();
@@ -1637,10 +1643,20 @@ void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- default: return 1; // FIXME: Do wait states equal cycles?
+ default:
+ if (MI.isMetaInstruction())
+ return 0;
+ return 1; // FIXME: Do wait states equal cycles?
case AMDGPU::S_NOP:
return MI.getOperand(0).getImm() + 1;
+
+ // FIXME: Any other pseudo instruction?
+ // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The
+ // hazard, even if one exist, won't really be visible. Should we handle it?
+ case AMDGPU::SI_MASKED_UNREACHABLE:
+ case AMDGPU::WAVE_BARRIER:
+ return 0;
}
}
@@ -1889,7 +1905,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
SetOn->getOperand(3).setIsUndef();
- const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect);
+ const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect_write);
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, OpDesc)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
@@ -1929,11 +1945,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
SetOn->getOperand(3).setIsUndef();
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32))
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_indirect_read))
.addDef(Dst)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
- .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0))
- .addReg(AMDGPU::M0, RegState::Implicit);
+ .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF));
@@ -2208,15 +2223,17 @@ MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
return MI.getOperand(0).getMBB();
}
-unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &DestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const {
+void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &DestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset,
+ RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch");
assert(MBB.pred_size() == 1);
+ assert(RestoreBB.empty() &&
+ "restore block should be inserted for restoring clobbered registers");
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -2253,14 +2270,6 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
.addReg(PCReg);
- auto ComputeBlockSize = [](const TargetInstrInfo *TII,
- const MachineBasicBlock &MBB) {
- unsigned Size = 0;
- for (const MachineInstr &MI : MBB)
- Size += TII->getInstSizeInBytes(MI);
- return Size;
- };
-
// FIXME: If spilling is necessary, this will fail because this scavenger has
// no emergency stack slots. It is non-trivial to spill in this situation,
// because the restore code needs to be specially placed after the
@@ -2299,22 +2308,34 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->enterBasicBlockEnd(MBB);
Register Scav = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_64RegClass,
- MachineBasicBlock::iterator(GetPC), false, 0);
- MRI.replaceRegWith(PCReg, Scav);
- MRI.clearVirtRegs();
- RS->setRegUsed(Scav);
+ AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
+ /* RestoreAfter */ false, 0, /* AllowSpill */ false);
+ if (Scav) {
+ RS->setRegUsed(Scav);
+ MRI.replaceRegWith(PCReg, Scav);
+ MRI.clearVirtRegs();
+ } else {
+ // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for
+ // SGPR spill.
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
+ MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
+ MRI.clearVirtRegs();
+ }
+ MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol();
// Now, the distance could be defined.
auto *Offset = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx),
+ MCSymbolRefExpr::create(DestLabel, MCCtx),
MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx);
// Add offset assignments.
auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx);
OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx));
auto *ShAmt = MCConstantExpr::create(32, MCCtx);
OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx));
- return ComputeBlockSize(this, MBB);
+
+ return;
}
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
@@ -2443,16 +2464,15 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- MachineBasicBlock::iterator I = MBB.getFirstTerminator();
-
unsigned Count = 0;
unsigned RemovedSize = 0;
- while (I != MBB.end()) {
- MachineBasicBlock::iterator Next = std::next(I);
- RemovedSize += getInstSizeInBytes(*I);
- I->eraseFromParent();
- ++Count;
- I = Next;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) {
+ // Skip over artificial terminators when removing instructions.
+ if (MI.isBranch() || MI.isReturn()) {
+ RemovedSize += getInstSizeInBytes(MI);
+ MI.eraseFromParent();
+ ++Count;
+ }
}
if (BytesRemoved)
@@ -2691,18 +2711,11 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
}
-bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO: {
- // If there are additional implicit register operands, this may be used for
- // register indexing so the source register operand isn't simply copied.
- unsigned NumOps = MI.getDesc().getNumOperands() +
- MI.getDesc().getNumImplicitUses();
-
- return MI.getNumOperands() == NumOps;
- }
+ case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
@@ -3069,16 +3082,24 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
return false;
}
-static int64_t getFoldableImm(const MachineOperand* MO) {
+static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+ int64_t &Imm) {
+ if (Reg.isPhysical())
+ return false;
+ auto *Def = MRI.getUniqueVRegDef(Reg);
+ if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
+ Imm = Def->getOperand(1).getImm();
+ return true;
+ }
+ return false;
+}
+
+static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
- auto Def = MRI.getUniqueVRegDef(MO->getReg());
- if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
- Def->getOperand(1).isImm())
- return Def->getOperand(1).getImm();
- return AMDGPU::NoRegister;
+ return getFoldableImm(MO->getReg(), MRI, Imm);
}
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
@@ -3093,9 +3114,9 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
}
}
-MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
- MachineInstr &MI,
- LiveVariables *LV) const {
+MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
unsigned Opc = MI.getOpcode();
bool IsF16 = false;
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
@@ -3145,50 +3166,58 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
MachineInstrBuilder MIB;
+ MachineBasicBlock &MBB = *MI.getParent();
if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
- !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
- if (auto Imm = getFoldableImm(Src2)) {
+ !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
+ int64_t Imm;
+ if (getFoldableImm(Src2, Imm)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
if (pseudoToMCOpcode(NewOpc) != -1) {
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src0)
.add(*Src1)
.addImm(Imm);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
}
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
- if (auto Imm = getFoldableImm(Src1)) {
+ if (getFoldableImm(Src1, Imm)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src0)
.addImm(Imm)
.add(*Src2);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
}
- if (auto Imm = getFoldableImm(Src0)) {
+ if (getFoldableImm(Src0, Imm)) {
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
Src1)) {
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src1)
.addImm(Imm)
.add(*Src2);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
}
@@ -3201,7 +3230,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.addImm(Src0Mods ? Src0Mods->getImm() : 0)
.add(*Src0)
@@ -3212,6 +3241,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
.addImm(Clamp ? Clamp->getImm() : 0)
.addImm(Omod ? Omod->getImm() : 0);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
@@ -3382,6 +3413,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -3420,6 +3452,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// This suffers the same problem as the scalar 16-bit cases.
return AMDGPU::isInlinableIntLiteralV216(Imm);
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
@@ -3440,6 +3473,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint32_t Trunc = static_cast<uint32_t>(Imm);
return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
}
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16:
+ return false;
default:
llvm_unreachable("invalid bitwidth");
}
@@ -3566,11 +3602,13 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
// Additional verification is needed for sdst/src2.
return true;
}
- case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_MAC_LEGACY_F32_e64:
case AMDGPU::V_FMAC_F16_e64:
+ case AMDGPU::V_FMAC_F32_e64:
case AMDGPU::V_FMAC_F64_e64:
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
return false;
@@ -3813,6 +3851,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
break;
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
@@ -4472,20 +4511,20 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
- case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
- case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
- case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
- case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
- case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
- case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
- case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
- case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
- case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
- case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
- case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
- case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
- case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
- case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
+ case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e64;
+ case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e64;
+ case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e64;
+ case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e64;
+ case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e64;
+ case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e64;
+ case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e64;
+ case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e64;
+ case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e64;
+ case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e64;
+ case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e64;
+ case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e64;
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -4963,13 +5002,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
continue;
}
- if (RI.hasAGPRs(MRI.getRegClass(MO.getReg())) &&
+ if (RI.hasAGPRs(RI.getRegClassForReg(MRI, MO.getReg())) &&
!isOperandLegal(MI, Idx, &MO)) {
legalizeOpWithMove(MI, Idx);
continue;
}
- if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.getReg())))
continue; // VGPRs are legal
// We can use one SGPR in each VOP3 instruction prior to GFX10
@@ -5165,8 +5204,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
Register DstReg = MRI.createVirtualRegister(DstRC);
- MachineInstr *Copy =
- BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
+ auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Op.setReg(DstReg);
Op.setSubReg(0);
@@ -5188,7 +5226,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
}
if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
!ImpDef)
- Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ Copy.addReg(AMDGPU::EXEC, RegState::Implicit);
}
// Emit the actual waterfall loop, executing the wrapped instruction for each
@@ -5897,18 +5935,18 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
case AMDGPU::S_CBRANCH_SCC0:
- case AMDGPU::S_CBRANCH_SCC1:
- // Clear unused bits of vcc
- if (ST.isWave32())
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B32),
- AMDGPU::VCC_LO)
- .addReg(AMDGPU::EXEC_LO)
- .addReg(AMDGPU::VCC_LO);
- else
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
- AMDGPU::VCC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
+ case AMDGPU::S_CBRANCH_SCC1: {
+ // Clear unused bits of vcc
+ Register CondReg = Inst.getOperand(1).getReg();
+ bool IsSCC = CondReg == AMDGPU::SCC;
+ Register VCC = RI.getVCC();
+ Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC)
+ .addReg(EXEC)
+ .addReg(IsSCC ? VCC : CondReg);
+ Inst.RemoveOperand(1);
+ }
break;
case AMDGPU::S_BFE_U64:
@@ -6016,12 +6054,43 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
case AMDGPU::S_CSELECT_B32:
+ lowerSelect32(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ continue;
case AMDGPU::S_CSELECT_B64:
- lowerSelect(Worklist, Inst, MDT);
+ splitSelect64(Worklist, Inst, MDT);
Inst.eraseFromParent();
continue;
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMP_LT_I32:
+ case AMDGPU::S_CMP_LE_I32:
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_LT_U32:
+ case AMDGPU::S_CMP_LE_U32:
+ case AMDGPU::S_CMP_EQ_U64:
+ case AMDGPU::S_CMP_LG_U64: {
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
+ MachineInstr *NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg)
+ .add(Inst.getOperand(0))
+ .add(Inst.getOperand(1));
+ legalizeOperands(*NewInstr, MDT);
+ int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC);
+ MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
+ Inst.eraseFromParent();
+ }
+ continue;
}
+
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
@@ -6167,8 +6236,8 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
return std::make_pair(false, nullptr);
}
-void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT) const {
+void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -6181,47 +6250,51 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
MachineOperand &Cond = Inst.getOperand(3);
Register SCCSource = Cond.getReg();
- // Find SCC def, and if that is a copy (SCC = COPY reg) then use reg instead.
- if (!Cond.isUndef()) {
- for (MachineInstr &CandI :
- make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)),
- Inst.getParent()->rend())) {
- if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) !=
- -1) {
- if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
- SCCSource = CandI.getOperand(1).getReg();
- }
- break;
- }
- }
- }
+ bool IsSCC = (SCCSource == AMDGPU::SCC);
// If this is a trivial select where the condition is effectively not SCC
// (SCCSource is a source of copy to SCC), then the select is semantically
// equivalent to copying SCCSource. Hence, there is no need to create
// V_CNDMASK, we can just use that and bail out.
- if ((SCCSource != AMDGPU::SCC) && Src0.isImm() && (Src0.getImm() == -1) &&
- Src1.isImm() && (Src1.getImm() == 0)) {
+ if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() &&
+ (Src1.getImm() == 0)) {
MRI.replaceRegWith(Dest.getReg(), SCCSource);
return;
}
- const TargetRegisterClass *TC = ST.getWavefrontSize() == 64
- ? &AMDGPU::SReg_64_XEXECRegClass
- : &AMDGPU::SReg_32_XM0_XEXECRegClass;
+ const TargetRegisterClass *TC =
+ RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
Register CopySCC = MRI.createVirtualRegister(TC);
- if (SCCSource == AMDGPU::SCC) {
- // Insert a trivial select instead of creating a copy, because a copy from
- // SCC would semantically mean just copying a single bit, but we may need
- // the result to be a vector condition mask that needs preserving.
- unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
- : AMDGPU::S_CSELECT_B32;
- auto NewSelect =
- BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0);
- NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
- } else {
- BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC).addReg(SCCSource);
+ if (IsSCC) {
+ // Now look for the closest SCC def if it is a copy
+ // replacing the SCCSource with the COPY source register
+ bool CopyFound = false;
+ for (MachineInstr &CandI :
+ make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)),
+ Inst.getParent()->rend())) {
+ if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) !=
+ -1) {
+ if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
+ BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC)
+ .addReg(CandI.getOperand(1).getReg());
+ CopyFound = true;
+ }
+ break;
+ }
+ }
+ if (!CopyFound) {
+ // SCC def is not a copy
+ // Insert a trivial select instead of creating a copy, because a copy from
+ // SCC would semantically mean just copying a single bit, but we may need
+ // the result to be a vector condition mask that needs preserving.
+ unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
+ : AMDGPU::S_CSELECT_B32;
+ auto NewSelect =
+ BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0);
+ NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
+ }
}
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -6232,13 +6305,102 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
.add(Src1) // False
.addImm(0)
.add(Src0) // True
- .addReg(CopySCC);
+ .addReg(IsSCC ? CopySCC : SCCSource);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
legalizeOperands(*UpdatedInst, MDT);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
+void SIInstrInfo::splitSelect64(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
+ // Split S_CSELECT_B64 into a pair of S_CSELECT_B32 and lower them
+ // further.
+ const DebugLoc &DL = Inst.getDebugLoc();
+ MachineBasicBlock::iterator MII = Inst;
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ // Get the original operands.
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ MachineOperand &Cond = Inst.getOperand(3);
+
+ Register SCCSource = Cond.getReg();
+ bool IsSCC = (SCCSource == AMDGPU::SCC);
+
+ // If this is a trivial select where the condition is effectively not SCC
+ // (SCCSource is a source of copy to SCC), then the select is semantically
+ // equivalent to copying SCCSource. Hence, there is no need to create
+ // V_CNDMASK, we can just use that and bail out.
+ if (!IsSCC && (Src0.isImm() && Src0.getImm() == -1) &&
+ (Src1.isImm() && Src1.getImm() == 0)) {
+ MRI.replaceRegWith(Dest.getReg(), SCCSource);
+ return;
+ }
+
+ // Prepare the split destination.
+ Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // Split the source operands.
+ const TargetRegisterClass *Src0RC = nullptr;
+ const TargetRegisterClass *Src0SubRC = nullptr;
+ if (Src0.isReg()) {
+ Src0RC = MRI.getRegClass(Src0.getReg());
+ Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+ }
+ const TargetRegisterClass *Src1RC = nullptr;
+ const TargetRegisterClass *Src1SubRC = nullptr;
+ if (Src1.isReg()) {
+ Src1RC = MRI.getRegClass(Src1.getReg());
+ Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
+ }
+ // Split lo.
+ MachineOperand SrcReg0Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
+ MachineOperand SrcReg1Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
+ // Split hi.
+ MachineOperand SrcReg0Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
+ // Select the lo part.
+ MachineInstr *LoHalf =
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub0)
+ .add(SrcReg0Sub0)
+ .add(SrcReg1Sub0);
+ // Replace the condition operand with the original one.
+ LoHalf->getOperand(3).setReg(SCCSource);
+ Worklist.insert(LoHalf);
+ // Select the hi part.
+ MachineInstr *HiHalf =
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub1)
+ .add(SrcReg0Sub1)
+ .add(SrcReg1Sub1);
+ // Replace the condition operand with the original one.
+ HiHalf->getOperand(3).setReg(SCCSource);
+ Worklist.insert(HiHalf);
+ // Merge them back to the original 64-bit one.
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep
+ // it valid.
+ legalizeOperands(*LoHalf, MDT);
+ legalizeOperands(*HiHalf, MDT);
+
+ // Move all users of this moved value.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
+}
+
void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -6823,8 +6985,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
- SetVectorType &Worklist) const {
- bool SCCUsedImplicitly = false;
+ SetVectorType &Worklist,
+ Register NewCond) const {
// Ensure that def inst defines SCC, which is still live.
assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
@@ -6836,33 +6998,18 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
SCCDefInst.getParent()->end())) {
// Check if SCC is used first.
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) {
+ int SCCIdx = MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI);
+ if (SCCIdx != -1) {
if (MI.isCopy()) {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Register DestReg = MI.getOperand(0).getReg();
- for (auto &User : MRI.use_nodbg_instructions(DestReg)) {
- if ((User.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) ||
- (User.getOpcode() == AMDGPU::S_SUB_CO_PSEUDO)) {
- User.getOperand(4).setReg(RI.getVCC());
- Worklist.insert(&User);
- } else if (User.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
- User.getOperand(5).setReg(RI.getVCC());
- // No need to add to Worklist.
- }
- }
+ MRI.replaceRegWith(DestReg, NewCond);
CopyToDelete.push_back(&MI);
} else {
- if (MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
- MI.getOpcode() == AMDGPU::S_CSELECT_B64) {
- // This is an implicit use of SCC and it is really expected by
- // the SCC users to handle.
- // We cannot preserve the edge to the user so add the explicit
- // copy: SCC = COPY VCC.
- // The copy will be cleaned up during the processing of the user
- // in lowerSelect.
- SCCUsedImplicitly = true;
- }
+
+ if (NewCond.isValid())
+ MI.getOperand(SCCIdx).setReg(NewCond);
Worklist.insert(&MI);
}
@@ -6873,12 +7020,6 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
}
for (auto &Copy : CopyToDelete)
Copy->eraseFromParent();
-
- if (SCCUsedImplicitly) {
- BuildMI(*SCCDefInst.getParent(), std::next(SCCDefInst.getIterator()),
- SCCDefInst.getDebugLoc(), get(AMDGPU::COPY), AMDGPU::SCC)
- .addReg(RI.getVCC());
- }
}
// Instructions that use SCC may be converted to VALU instructions. When that
@@ -7171,31 +7312,19 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return Size;
}
- // 4-byte instructions may have a 32-bit literal encoded after them. Check
- // operands that coud ever be literals.
+ // Instructions may have a 32-bit literal encoded after them. Check
+ // operands that could ever be literals.
if (isVALU(MI) || isSALU(MI)) {
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- if (Src0Idx == -1)
- return DescSize; // No operands.
-
- if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
- return isVOP3(MI) ? 12 : (DescSize + 4);
-
- int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
- if (Src1Idx == -1)
+ if (isDPP(MI))
return DescSize;
-
- if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
- return isVOP3(MI) ? 12 : (DescSize + 4);
-
- int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
- if (Src2Idx == -1)
- return DescSize;
-
- if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
- return isVOP3(MI) ? 12 : (DescSize + 4);
-
- return DescSize;
+ bool HasLiteral = false;
+ for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
+ if (isLiteralConstant(MI, I)) {
+ HasLiteral = true;
+ break;
+ }
+ }
+ return HasLiteral ? DescSize + 4 : DescSize;
}
// Check whether we have extra NSA words.
@@ -7283,19 +7412,16 @@ void SIInstrInfo::convertNonUniformLoopRegion(
Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstrBuilder HeaderPHIBuilder =
BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
- for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
- E = LoopEntry->pred_end();
- PI != E; ++PI) {
- if (*PI == LoopEnd) {
+ for (MachineBasicBlock *PMBB : LoopEntry->predecessors()) {
+ if (PMBB == LoopEnd) {
HeaderPHIBuilder.addReg(BackEdgeReg);
} else {
- MachineBasicBlock *PMBB = *PI;
Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
ZeroReg, 0);
HeaderPHIBuilder.addReg(ZeroReg);
}
- HeaderPHIBuilder.addMBB(*PI);
+ HeaderPHIBuilder.addMBB(PMBB);
}
MachineInstr *HeaderPhi = HeaderPHIBuilder;
MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
@@ -7340,6 +7466,20 @@ SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const
return new GCNHazardRecognizer(MF);
}
+// Called during:
+// - pre-RA scheduling and post-RA scheduling
+ScheduleHazardRecognizer *
+SIInstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAGMI *DAG) const {
+ // Borrowed from Arm Target
+ // We would like to restrict this hazard recognizer to only
+ // post-RA scheduling; we can tell that we're post-RA because we don't
+ // track VRegLiveness.
+ if (!DAG->hasVRegLiveness())
+ return new GCNHazardRecognizer(DAG->MF);
+ return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
+}
+
std::pair<unsigned, unsigned>
SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
@@ -7919,3 +8059,209 @@ unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) {
return 0;
}
}
+
+bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
+ if (!MI.getOperand(0).isReg() || MI.getOperand(0).getSubReg())
+ return false;
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMP_LT_U32:
+ case AMDGPU::S_CMP_LT_I32:
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMP_LE_U32:
+ case AMDGPU::S_CMP_LE_I32:
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMP_EQ_U64:
+ case AMDGPU::S_CMP_LG_U64:
+ SrcReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(1).isReg()) {
+ if (MI.getOperand(1).getSubReg())
+ return false;
+ SrcReg2 = MI.getOperand(1).getReg();
+ CmpValue = 0;
+ } else if (MI.getOperand(1).isImm()) {
+ SrcReg2 = Register();
+ CmpValue = MI.getOperand(1).getImm();
+ } else {
+ return false;
+ }
+ CmpMask = ~0;
+ return true;
+ case AMDGPU::S_CMPK_EQ_U32:
+ case AMDGPU::S_CMPK_EQ_I32:
+ case AMDGPU::S_CMPK_LG_U32:
+ case AMDGPU::S_CMPK_LG_I32:
+ case AMDGPU::S_CMPK_LT_U32:
+ case AMDGPU::S_CMPK_LT_I32:
+ case AMDGPU::S_CMPK_GT_U32:
+ case AMDGPU::S_CMPK_GT_I32:
+ case AMDGPU::S_CMPK_LE_U32:
+ case AMDGPU::S_CMPK_LE_I32:
+ case AMDGPU::S_CMPK_GE_U32:
+ case AMDGPU::S_CMPK_GE_I32:
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = Register();
+ CmpValue = MI.getOperand(1).getImm();
+ CmpMask = ~0;
+ return true;
+ }
+
+ return false;
+}
+
+bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ if (!SrcReg || SrcReg.isPhysical())
+ return false;
+
+ if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
+ return false;
+
+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
+ this](int64_t ExpectedValue, unsigned SrcSize,
+ bool IsReversable, bool IsSigned) -> bool {
+ // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_ge_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 1 << n => s_and_b64 $src, 1 << n
+ // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_gt_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_gt_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 0 => s_and_b64 $src, 1 << n
+ //
+ // Signed ge/gt are not used for the sign bit.
+ //
+ // If result of the AND is unused except in the compare:
+ // s_and_b(32|64) $src, 1 << n => s_bitcmp1_b(32|64) $src, n
+ //
+ // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n
+ // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n
+ // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 0 => s_bitcmp0_b64 $src, n
+ // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
+ // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
+ // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
+ if (!Def || Def->getParent() != CmpInstr.getParent())
+ return false;
+
+ if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
+ Def->getOpcode() != AMDGPU::S_AND_B64)
+ return false;
+
+ int64_t Mask;
+ const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
+ if (MO->isImm())
+ Mask = MO->getImm();
+ else if (!getFoldableImm(MO, Mask))
+ return false;
+ Mask &= maxUIntN(SrcSize);
+ return isPowerOf2_64(Mask);
+ };
+
+ MachineOperand *SrcOp = &Def->getOperand(1);
+ if (isMask(SrcOp))
+ SrcOp = &Def->getOperand(2);
+ else if (isMask(&Def->getOperand(2)))
+ SrcOp = &Def->getOperand(1);
+ else
+ return false;
+
+ unsigned BitNo = countTrailingZeros((uint64_t)Mask);
+ if (IsSigned && BitNo == SrcSize - 1)
+ return false;
+
+ ExpectedValue <<= BitNo;
+
+ bool IsReversedCC = false;
+ if (CmpValue != ExpectedValue) {
+ if (!IsReversable)
+ return false;
+ IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
+ if (!IsReversedCC)
+ return false;
+ }
+
+ Register DefReg = Def->getOperand(0).getReg();
+ if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
+ return false;
+
+ for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
+ I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
+ I->killsRegister(AMDGPU::SCC, &RI))
+ return false;
+ }
+
+ MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC);
+ SccDef->setIsDead(false);
+ CmpInstr.eraseFromParent();
+
+ if (!MRI->use_nodbg_empty(DefReg)) {
+ assert(!IsReversedCC);
+ return true;
+ }
+
+ // Replace AND with unused result with a S_BITCMP.
+ MachineBasicBlock *MBB = Def->getParent();
+
+ unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
+ : AMDGPU::S_BITCMP1_B32
+ : IsReversedCC ? AMDGPU::S_BITCMP0_B64
+ : AMDGPU::S_BITCMP1_B64;
+
+ BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
+ .add(*SrcOp)
+ .addImm(BitNo);
+ Def->eraseFromParent();
+
+ return true;
+ };
+
+ switch (CmpInstr.getOpcode()) {
+ default:
+ break;
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMPK_EQ_U32:
+ case AMDGPU::S_CMPK_EQ_I32:
+ return optimizeCmpAnd(1, 32, true, false);
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMPK_GE_U32:
+ return optimizeCmpAnd(1, 32, false, false);
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMPK_GE_I32:
+ return optimizeCmpAnd(1, 32, false, true);
+ case AMDGPU::S_CMP_EQ_U64:
+ return optimizeCmpAnd(1, 64, true, false);
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMPK_LG_U32:
+ case AMDGPU::S_CMPK_LG_I32:
+ return optimizeCmpAnd(0, 32, true, false);
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMPK_GT_U32:
+ return optimizeCmpAnd(0, 32, false, false);
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMPK_GT_I32:
+ return optimizeCmpAnd(0, 32, false, true);
+ case AMDGPU::S_CMP_LG_U64:
+ return optimizeCmpAnd(0, 64, true, false);
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index fc5e5be03541..dd9ea2b53ca2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -78,8 +78,11 @@ private:
moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
- void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
+ void lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
+
+ void splitSelect64(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -122,7 +125,8 @@ private:
void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
- SetVectorType &Worklist) const;
+ SetVectorType &Worklist,
+ Register NewCond = Register()) const;
void addSCCDefsToVALUWorklist(MachineOperand &Op,
SetVectorType &Worklist) const;
@@ -271,11 +275,10 @@ public:
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
- unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS = nullptr) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
bool analyzeBranchImpl(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
@@ -315,6 +318,14 @@ public:
Register DstReg, ArrayRef<MachineOperand> Cond,
Register TrueReg, Register FalseReg) const;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
+
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
+ const MachineRegisterInfo *MRI) const override;
+
unsigned getAddressSpaceForPseudoSourceKind(
unsigned Kind) const override;
@@ -322,16 +333,15 @@ public:
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;
- bool isFoldableCopy(const MachineInstr &MI) const;
+ static bool isFoldableCopy(const MachineInstr &MI);
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
unsigned getMachineCSELookAheadLimit() const override { return 500; }
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
@@ -1036,6 +1046,10 @@ public:
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
+ ScheduleHazardRecognizer *
+ CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAGMI *DAG) const override;
+
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
@@ -1119,6 +1133,8 @@ public:
}
static unsigned getDSShaderTypeValue(const MachineFunction &MF);
+
+ const TargetSchedModel &getSchedModel() const { return SchedModel; }
};
/// \brief Returns true if a reg:subreg pair P has a TRC class
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 25b647d34ec1..8c24268e379e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -333,6 +333,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsNonExtLoad = 1;
}
+def atomic_load_8_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+
+def atomic_load_16_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
+
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
let IsAtomic = 1;
@@ -423,6 +435,14 @@ def load_align16_local_m0 : PatFrag<(ops node:$ptr),
} // End IsLoad = 1
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_8_glue node:$ptr)> {
+ let MemoryVT = i8;
+}
+def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_16_glue node:$ptr)> {
+ let MemoryVT = i16;
+}
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
(atomic_load_32_glue node:$ptr)> {
let MemoryVT = i32;
@@ -509,6 +529,18 @@ def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
let AddressSpaces = StoreAddress_local.AddrSpaces in {
+def atomic_store_local_8_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+def atomic_store_local_16_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
def atomic_store_local_32_m0 : PatFrag <
(ops node:$value, node:$ptr),
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
@@ -527,15 +559,7 @@ def atomic_store_local_64_m0 : PatFrag <
def si_setcc_uniform : PatFrag <
(ops node:$lhs, node:$rhs, node:$cond),
(setcc node:$lhs, node:$rhs, node:$cond), [{
- for (SDNode *Use : N->uses()) {
- if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
- return false;
-
- unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
- if (Reg != AMDGPU::SCC)
- return false;
- }
- return true;
+ return !N->isDivergent();
}]>;
//===----------------------------------------------------------------------===//
@@ -1181,6 +1205,7 @@ class kimmOperand<ValueType vt> : Operand<vt> {
let OperandType = "OPERAND_KIMM"#vt.Size;
let PrintMethod = "printU"#vt.Size#"ImmOperand";
let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+ let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
}
// 32-bit VALU immediate operand that uses the constant bus.
@@ -1864,8 +1889,8 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
-class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
- bit HasClamp, ValueType DstVT = i32> {
+class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
+ bit HasClamp> {
string dst = "$vdst";
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -1883,7 +1908,6 @@ class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
class getAsmVOP3OpSel <int NumSrcArgs,
bit HasClamp,
- bit HasOMod,
bit Src0HasMods,
bit Src1HasMods,
bit Src2HasMods> {
@@ -2026,8 +2050,7 @@ class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
);
}
-class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
- ValueType Src1VT = i32> {
+class getHasDPP <int NumSrcArgs> {
bit ret = !if(!eq(NumSrcArgs, 3),
0, // NumSrcArgs == 3 - No DPP for VOP3
1);
@@ -2035,14 +2058,14 @@ class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
- bit ret = !and(getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret,
+ bit ret = !and(getHasDPP<NumSrcArgs>.ret,
getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
}
// Function that checks if instruction supports DPP and SDWA
class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
- bit ret = !or(getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret,
+ bit ret = !or(getHasDPP<NumSrcArgs>.ret,
getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
}
@@ -2146,7 +2169,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field bit HasExtDPP = getHasDPP<NumSrcArgs>.ret;
field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA9 = HasExtSDWA;
@@ -2197,9 +2220,9 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
- field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
+ field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
- HasClamp, HasOMod,
+ HasClamp,
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index fbf4634bfc94..d5f9cb8ba493 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1017,22 +1017,33 @@ def : GCNPat <
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
-foreach Index = 0-2 in {
- def Extract_Element_v2i32_#Index : Extract_Element <
- i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
+// Special case for 2 element vectors. REQ_SEQUENCE produces better code
+// than an INSERT_SUBREG.
+multiclass Insert_Element_V2<RegisterClass RC, ValueType elem_type, ValueType vec_type> {
+ def : GCNPat <
+ (insertelt vec_type:$vec, elem_type:$elem, 0),
+ (REG_SEQUENCE RC, $elem, sub0, (elem_type (EXTRACT_SUBREG $vec, sub1)), sub1)
>;
- def Insert_Element_v2i32_#Index : Insert_Element <
+
+ def : GCNPat <
+ (insertelt vec_type:$vec, elem_type:$elem, 1),
+ (REG_SEQUENCE RC, (elem_type (EXTRACT_SUBREG $vec, sub0)), sub0, $elem, sub1)
+ >;
+}
+
+foreach Index = 0-1 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v2f32_#Index : Extract_Element <
f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
>;
- def Insert_Element_v2f32_#Index : Insert_Element <
- f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
}
+defm : Insert_Element_V2 <SReg_64, i32, v2i32>;
+defm : Insert_Element_V2 <SReg_64, f32, v2f32>;
+
foreach Index = 0-2 in {
def Extract_Element_v3i32_#Index : Extract_Element <
i32, v3i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -1860,40 +1871,92 @@ def : GCNPat <
// Conversion Patterns
//===----------------------------------------------------------------------===//
-def : GCNPat<(i32 (sext_inreg i32:$src, i1)),
+class UniformSextInreg<ValueType VT> : PatFrag<
+ (ops node:$src),
+ (sext_inreg $src, VT),
+ [{ return !N->isDivergent(); }]>;
+
+def : GCNPat<(i32 (UniformSextInreg<i1> i32:$src)),
(S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16
// Handle sext_inreg in i64
def : GCNPat <
- (i64 (sext_inreg i64:$src, i1)),
+ (i64 (UniformSextInreg<i1> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
>;
def : GCNPat <
- (i16 (sext_inreg i16:$src, i1)),
+ (i16 (UniformSextInreg<i1> i16:$src)),
(S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
>;
def : GCNPat <
- (i16 (sext_inreg i16:$src, i8)),
+ (i16 (UniformSextInreg<i8> i16:$src)),
(S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
>;
def : GCNPat <
- (i64 (sext_inreg i64:$src, i8)),
+ (i64 (UniformSextInreg<i8> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16
>;
def : GCNPat <
- (i64 (sext_inreg i64:$src, i16)),
+ (i64 (UniformSextInreg<i16> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16
>;
def : GCNPat <
- (i64 (sext_inreg i64:$src, i32)),
+ (i64 (UniformSextInreg<i32> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16
>;
+
+class DivergentSextInreg<ValueType VT> : PatFrag<
+ (ops node:$src),
+ (sext_inreg $src, VT),
+ [{ return N->isDivergent(); }]>;
+
+def : GCNPat<(i32 (DivergentSextInreg<i1> i32:$src)),
+ (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
+
+def : GCNPat <
+ (i16 (DivergentSextInreg<i1> i16:$src)),
+ (V_BFE_I32_e64 $src, (i32 0), (i32 1)) // 0 | 1 << 16
+>;
+
+def : GCNPat <
+ (i16 (DivergentSextInreg<i8> i16:$src)),
+ (V_BFE_I32_e64 $src, (i32 0), (i32 8)) // 0 | 8 << 16
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i1> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 1)), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 1))), sub1)
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i8> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)/* 0 | 8 << 16 */), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8))), sub1)
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i16> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)/* 0 | 16 << 16 */), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16))), sub1)
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i32> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG i64:$src, sub0)), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (i32 (EXTRACT_SUBREG i64:$src, sub0))), sub1)
+>;
+
def : GCNPat <
(i64 (zext i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1)
@@ -2097,6 +2160,22 @@ def : GCNPat <
>;
def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> i32:$a)),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> i16:$a)),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> i64:$a)),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 1),
+ (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
+>;
+
+def : GCNPat <
(i1 (trunc i32:$a)),
(V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
>;
@@ -2278,31 +2357,37 @@ let SubtargetPredicate = NotHasMinMaxDenormModes in {
let OtherPredicates = [HasDLInsts] in {
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
def : GCNPat <
- (fma (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)),
- (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+ (fma (f32 (VOP3NoMods f32:$src0)),
+ (f32 (VOP3NoMods f32:$src1)),
(f32 (VOP3NoMods f32:$src2))),
- (V_FMAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ (V_FMAC_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2)
>;
} // End OtherPredicates = [HasDLInsts]
let SubtargetPredicate = isGFX10Plus in
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
def : GCNPat <
- (fma (f16 (VOP3Mods f32:$src0, i32:$src0_modifiers)),
- (f16 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+ (fma (f16 (VOP3NoMods f32:$src0)),
+ (f16 (VOP3NoMods f32:$src1)),
(f16 (VOP3NoMods f32:$src2))),
- (V_FMAC_F16_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ (V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2)
>;
let SubtargetPredicate = isGFX90APlus in
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
def : GCNPat <
- (fma (f64 (VOP3Mods0 f64:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
- (f64 (VOP3Mods f64:$src1, i32:$src1_modifiers)),
+ (fma (f64 (VOP3NoMods f64:$src0)),
+ (f64 (VOP3NoMods f64:$src1)),
(f64 (VOP3NoMods f64:$src2))),
- (V_FMAC_F64_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
- SRCMODS.NONE, $src2, $clamp, $omod)
+ (V_FMAC_F64_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2)
>;
// COPY is workaround tablegen bug from multiple outputs
@@ -2656,12 +2741,20 @@ class AMDGPUGenericInstruction : GenericInstruction {
let Namespace = "AMDGPU";
}
+// Returns -1 if the input is zero.
def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
+// Returns -1 if the input is zero.
+def G_AMDGPU_FFBL_B32 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = 0;
+}
+
def G_AMDGPU_RCP_IFLAG : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
@@ -2854,3 +2947,16 @@ def G_AMDGPU_INTRIN_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayLoad = 1;
let mayStore = 0;
}
+
+// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
+// if necessary.
+def G_SI_CALL : AMDGPUGenericInstruction {
+ let OutOperandList = (outs SReg_64:$dst);
+ let InOperandList = (ins type0:$src0, unknown:$callee);
+ let Size = 4;
+ let isCall = 1;
+ let UseNamedOperandTable = 1;
+ let SchedRW = [WriteBranch];
+ // TODO: Should really base this on the call target
+ let isConvergent = 1;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index d560b477b8ba..4fa8ec711134 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -140,11 +140,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
switch (MI.getOpcode()) {
case AMDGPU::S_BRANCH:
// Optimize out branches to the next block.
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 493c1ad87f93..34cbb49dcd16 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -146,7 +146,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
if (!AddrOp->isReg())
return false;
- // TODO: We should be able to merge physical reg addreses.
+ // TODO: We should be able to merge physical reg addresses.
if (AddrOp->getReg().isPhysical())
return false;
@@ -303,6 +303,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
return 2;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
return 4;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
+ return 8;
case AMDGPU::DS_READ_B32: LLVM_FALLTHROUGH;
case AMDGPU::DS_READ_B32_gfx9: LLVM_FALLTHROUGH;
case AMDGPU::DS_WRITE_B32: LLVM_FALLTHROUGH;
@@ -343,6 +345,9 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1)
return UNKNOWN;
+ // Ignore BVH instructions
+ if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
+ return UNKNOWN;
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() ||
TII.isGather4(Opc))
@@ -369,6 +374,7 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return S_BUFFER_LOAD_IMM;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B32_gfx9:
@@ -380,15 +386,6 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64:
case AMDGPU::DS_WRITE_B64_gfx9:
return DS_WRITE;
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa:
- return UNKNOWN;
}
}
@@ -419,6 +416,7 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
}
}
@@ -469,6 +467,7 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
Result.SBase = true;
return Result;
case AMDGPU::DS_READ_B32:
@@ -653,7 +652,7 @@ static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
}
// This function assumes that \p A and \p B have are identical except for
-// size and offset, and they referecne adjacent memory.
+// size and offset, and they reference adjacent memory.
static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
const MachineMemOperand *A,
const MachineMemOperand *B) {
@@ -863,6 +862,7 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
return false;
case 2:
case 4:
+ case 8:
return true;
}
}
@@ -1529,45 +1529,62 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
case 4:
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
+ case 8:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
}
case MIMG:
- assert("No overlaps" && (countPopulation(CI.DMask | Paired.DMask) == Width));
+ assert((countPopulation(CI.DMask | Paired.DMask) == Width) &&
+ "No overlaps");
return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width);
}
}
std::pair<unsigned, unsigned>
-SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired) {
+SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
+ const CombineInfo &Paired) {
- if (CI.Width == 0 || Paired.Width == 0 || CI.Width + Paired.Width > 4)
- return std::make_pair(0, 0);
+ assert(CI.Width != 0 && Paired.Width != 0 && "Width cannot be zero");
bool ReverseOrder;
if (CI.InstClass == MIMG) {
- assert((countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
- "No overlaps");
+ assert(
+ (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
+ "No overlaps");
ReverseOrder = CI.DMask > Paired.DMask;
} else
ReverseOrder = CI.Offset > Paired.Offset;
- static const unsigned Idxs[4][4] = {
- {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
- {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
- {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
- {AMDGPU::sub3, 0, 0, 0},
- };
unsigned Idx0;
unsigned Idx1;
- assert(CI.Width >= 1 && CI.Width <= 3);
- assert(Paired.Width >= 1 && Paired.Width <= 3);
+ if (CI.Width + Paired.Width > 4) {
+ assert(CI.Width == 4 && Paired.Width == 4);
- if (ReverseOrder) {
- Idx1 = Idxs[0][Paired.Width - 1];
- Idx0 = Idxs[Paired.Width][CI.Width - 1];
+ if (ReverseOrder) {
+ Idx1 = AMDGPU::sub0_sub1_sub2_sub3;
+ Idx0 = AMDGPU::sub4_sub5_sub6_sub7;
+ } else {
+ Idx0 = AMDGPU::sub0_sub1_sub2_sub3;
+ Idx1 = AMDGPU::sub4_sub5_sub6_sub7;
+ }
} else {
- Idx0 = Idxs[0][CI.Width - 1];
- Idx1 = Idxs[CI.Width][Paired.Width - 1];
+ static const unsigned Idxs[4][4] = {
+ {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
+ {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
+ {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
+ {AMDGPU::sub3, 0, 0, 0},
+ };
+
+ assert(CI.Width >= 1 && CI.Width <= 3);
+ assert(Paired.Width >= 1 && Paired.Width <= 3);
+
+ if (ReverseOrder) {
+ Idx1 = Idxs[0][Paired.Width - 1];
+ Idx0 = Idxs[Paired.Width][CI.Width - 1];
+ } else {
+ Idx0 = Idxs[0][CI.Width - 1];
+ Idx1 = Idxs[CI.Width][Paired.Width - 1];
+ }
}
return std::make_pair(Idx0, Idx1);
@@ -2048,7 +2065,7 @@ SILoadStoreOptimizer::collectMergeableInsts(
// adjacent to each other in the list, which will make it easier to find
// matches.
MergeList.sort(
- [] (const CombineInfo &A, CombineInfo &B) {
+ [] (const CombineInfo &A, const CombineInfo &B) {
return A.Offset < B.Offset;
});
++I;
@@ -2140,7 +2157,7 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
MachineBasicBlock::iterator NewMI =
mergeSBufferLoadImmPair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 16;
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 8;
break;
}
case BUFFER_LOAD: {
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 0f2836e1e7fb..3168bcd53eda 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -13,7 +13,7 @@
/// All control flow is handled using predicated instructions and
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
-/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// by writing to the 64-bit EXEC register (each bit corresponds to a
/// single vector ALU). Typically, for predicates, a vector ALU will write
/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
/// Vector ALU) and then the ScalarALU will AND the VCC register with the
@@ -38,7 +38,8 @@
/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
///
/// label0:
-/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then block
+/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then
+/// // block
/// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
/// S_BRANCH_EXECZ label1 // Use our branch optimization
/// // instruction again.
@@ -52,6 +53,8 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
@@ -69,6 +72,8 @@ private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
LiveIntervals *LIS = nullptr;
+ LiveVariables *LV = nullptr;
+ MachineDominatorTree *MDT = nullptr;
MachineRegisterInfo *MRI = nullptr;
SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
@@ -141,6 +146,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
// Should preserve the same set that TwoAddressInstructions does.
+ AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
AU.addPreservedID(LiveVariablesID);
@@ -234,6 +240,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
.addReg(CopyReg)
.add(Cond);
+ if (LV)
+ LV->replaceKillInstruction(Cond.getReg(), MI, *And);
setImpSCCDefDead(*And, true);
@@ -251,6 +259,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
MachineInstr *SetExec =
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
+ if (LV)
+ LV->getVarInfo(Tmp).Kills.push_back(SetExec);
// Skip ahead to the unconditional branch in case there are other terminators
// present.
@@ -304,6 +314,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MachineInstr *OrSaveExec =
BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
.add(MI.getOperand(1)); // Saved EXEC
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
@@ -377,15 +389,22 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *And);
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.addReg(AndReg)
.add(MI.getOperand(2));
if (LIS)
LIS->createAndComputeVirtRegInterval(AndReg);
- } else
+ } else {
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
.add(MI.getOperand(2));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *Or);
+ }
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
if (LIS) {
if (And)
@@ -471,6 +490,14 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock *SplitBB = &MBB;
if (NeedBlockSplit) {
SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS);
+ if (MDT && SplitBB != &MBB) {
+ MachineDomTreeNode *MBBNode = (*MDT)[&MBB];
+ SmallVector<MachineDomTreeNode *> Children(MBBNode->begin(),
+ MBBNode->end());
+ MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB);
+ for (MachineDomTreeNode *Child : Children)
+ MDT->changeImmediateDominator(Child, SplitBBNode);
+ }
Opcode = OrTermrOpc;
InsPt = MI;
}
@@ -479,6 +506,8 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
.addReg(Exec)
.add(MI.getOperand(0));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *NewMI);
LoweredEndCf.insert(NewMI);
@@ -570,7 +599,12 @@ void SILowerControlFlow::optimizeEndCf() {
LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
if (LIS)
LIS->RemoveMachineInstrFromMaps(*MI);
+ Register Reg;
+ if (LV)
+ Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg();
MI->eraseFromParent();
+ if (LV)
+ LV->recomputeForSingleDefVirtReg(Reg);
removeMBBifRedundant(MBB);
}
}
@@ -686,6 +720,8 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
.addReg(InputReg)
.addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+ if (LV)
+ LV->recomputeForSingleDefVirtReg(InputReg);
auto BfmMI =
BuildMI(*MBB, FirstMI, DL,
TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
@@ -694,6 +730,8 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
.addReg(CountReg, RegState::Kill)
.addImm(WavefrontSize);
+ if (LV)
+ LV->getVarInfo(CountReg).Kills.push_back(CmpMI);
auto CmovMI =
BuildMI(*MBB, FirstMI, DL,
TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
@@ -719,23 +757,6 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
}
bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
- auto GetFallThroughSucc = [=](MachineBasicBlock *B) -> MachineBasicBlock * {
- auto *S = B->getNextNode();
- if (!S)
- return nullptr;
- if (B->isSuccessor(S)) {
- // The only fallthrough candidate
- MachineBasicBlock::iterator I(B->getFirstInstrTerminator());
- MachineBasicBlock::iterator E = B->end();
- for (; I != E; I++) {
- if (I->isBranch() && TII->getBranchDestBlock(*I) == S)
- // We have unoptimized branch to layout successor
- return nullptr;
- }
- }
- return S;
- };
-
for (auto &I : MBB.instrs()) {
if (!I.isDebugInstr() && !I.isUnconditionalBranch())
return false;
@@ -748,7 +769,7 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
while (!MBB.predecessors().empty()) {
MachineBasicBlock *P = *MBB.pred_begin();
- if (GetFallThroughSucc(P) == &MBB)
+ if (P->getFallThrough() == &MBB)
FallThrough = P;
P->ReplaceUsesOfBlockWith(&MBB, Succ);
}
@@ -757,10 +778,19 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
for (auto &I : MBB.instrs())
LIS->RemoveMachineInstrFromMaps(I);
}
+ if (MDT) {
+ // If Succ, the single successor of MBB, is dominated by MBB, MDT needs
+ // updating by changing Succ's idom to the one of MBB; otherwise, MBB must
+ // be a leaf node in MDT and could be erased directly.
+ if (MDT->dominates(&MBB, Succ))
+ MDT->changeImmediateDominator(MDT->getNode(Succ),
+ MDT->getNode(&MBB)->getIDom());
+ MDT->eraseNode(&MBB);
+ }
MBB.clear();
MBB.eraseFromParent();
if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
- if (!GetFallThroughSucc(Succ)) {
+ if (!Succ->canFallThrough()) {
MachineFunction *MF = FallThrough->getParent();
MachineFunction::iterator FallThroughPos(FallThrough);
MF->splice(std::next(FallThroughPos), Succ);
@@ -780,6 +810,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
+ // This doesn't actually need LiveVariables, but we can preserve them.
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ MDT = getAnalysisIfAvailable<MachineDominatorTree>();
MRI = &MF.getRegInfo();
BoolRC = TRI->getBoolRC();
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 38b9d85b653b..55196fe334e6 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -35,7 +35,6 @@ class SILowerSGPRSpills : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
- VirtRegMap *VRM = nullptr;
LiveIntervals *LIS = nullptr;
// Save and Restore blocks of the current function. Typically there is a
@@ -289,7 +288,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
- VRM = getAnalysisIfAvailable<VirtRegMap>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
assert(SaveBlocks.empty() && RestoreBlocks.empty());
@@ -334,11 +332,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!TII->isSGPRSpill(MI))
continue;
@@ -369,11 +363,17 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
SpillFIs[MI.getOperand(0).getIndex()]) {
MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
- MI.getOperand(0).setIsDebug();
}
}
}
+ // All those frame indices which are dead by now should be removed from the
+ // function frame. Otherwise, there is a side effect such as re-mapping of
+ // free frame index ids by the later pass(es) like "stack slot coloring"
+ // which in turn could mess-up with the book keeping of "frame index to VGPR
+ // lane".
+ FuncInfo->removeDeadFrameIndices(MFI);
+
MadeChange = true;
} else if (FuncInfo->VGPRReservedForSGPRSpill) {
FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 85cfe36df16a..c4007f56f350 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -67,9 +67,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
CC != CallingConv::AMDGPU_Gfx &&
(!isEntryFunction() || HasCalls);
+ const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
+ CC == CallingConv::SPIR_KERNEL;
- if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- if (!F.arg_empty())
+ if (IsKernel) {
+ if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
@@ -94,45 +96,76 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ArgDescriptor::createRegister(ScratchRSrcReg);
}
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
- KernargSegmentPtr = true;
- MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
- MaxKernArgAlign);
- }
+ ImplicitArgPtr = false;
+ MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
+ MaxKernArgAlign);
}
+ bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (isAmdHsaOrMesa && !ST.enableFlatScratch())
+ PrivateSegmentBuffer = true;
+ else if (ST.isMesaGfxShader(F))
+ ImplicitBufferPtr = true;
+
if (UseFixedABI) {
+ DispatchPtr = true;
+ QueuePtr = true;
+ ImplicitArgPtr = true;
WorkGroupIDX = true;
WorkGroupIDY = true;
WorkGroupIDZ = true;
WorkItemIDX = true;
WorkItemIDY = true;
WorkItemIDZ = true;
- ImplicitArgPtr = true;
- } else {
- if (F.hasFnAttribute("amdgpu-work-group-id-x"))
+
+ // FIXME: We don't need this?
+ DispatchID = true;
+ } else if (!AMDGPU::isGraphics(CC)) {
+ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
WorkGroupIDY = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
WorkGroupIDZ = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
WorkItemIDX = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-y"))
WorkItemIDY = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-z"))
WorkItemIDZ = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
+ DispatchPtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
+ QueuePtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
+ DispatchID = true;
}
+ // FIXME: This attribute is a hack, we just need an analysis on the function
+ // to look for allocas.
bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls or stack objects that may require it before argument
+ // lowering.
+ if (ST.hasFlatAddressSpace() && isEntryFunction() &&
+ (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
+ (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+ !ST.flatScratchIsArchitected()) {
+ FlatScratchInit = true;
+ }
+
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
@@ -150,44 +183,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
- if (isAmdHsaOrMesa) {
- if (!ST.enableFlatScratch())
- PrivateSegmentBuffer = true;
-
- if (UseFixedABI) {
- DispatchPtr = true;
- QueuePtr = true;
-
- // FIXME: We don't need this?
- DispatchID = true;
- } else {
- if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
- DispatchPtr = true;
-
- if (F.hasFnAttribute("amdgpu-queue-ptr"))
- QueuePtr = true;
-
- if (F.hasFnAttribute("amdgpu-dispatch-id"))
- DispatchID = true;
- }
- } else if (ST.isMesaGfxShader(F)) {
- ImplicitBufferPtr = true;
- }
-
- if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
- KernargSegmentPtr = true;
-
- // TODO: This could be refined a lot. The attribute is a poor way of
- // detecting calls or stack objects that may require it before argument
- // lowering.
- if (ST.hasFlatAddressSpace() && isEntryFunction() &&
- (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
- (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
- !ST.flatScratchIsArchitected()) {
- FlatScratchInit = true;
- }
-
Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
StringRef S = A.getValueAsString();
if (!S.empty())
@@ -426,7 +421,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
OtherUsedRegs.set(Reg);
SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
- for (unsigned I = 0; I < NumLanes; ++I) {
+ for (int I = NumLanes - 1; I >= 0; --I) {
NextSpillReg = std::find_if(
NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
@@ -447,10 +442,16 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
}
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
- // The FP & BP spills haven't been inserted yet, so keep them around.
- for (auto &R : SGPRToVGPRSpills) {
- if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
+ // Remove dead frame indices from function frame, however keep FP & BP since
+ // spills for them haven't been inserted yet. And also make sure to remove the
+ // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
+ // result in an unexpected side effect and bug, in case of any re-mapping of
+ // freed frame indices by later pass(es) like "stack slot coloring".
+ for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
+ if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
MFI.RemoveStackObject(R.first);
+ SGPRToVGPRSpills.erase(R.first);
+ }
}
// All other SPGRs must be allocated on the default stack, so reset the stack
@@ -650,3 +651,38 @@ bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
}
return false;
}
+
+bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
+ if (UsesAGPRs)
+ return *UsesAGPRs;
+
+ if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
+ MF.getFrameInfo().hasCalls()) {
+ UsesAGPRs = true;
+ return true;
+ }
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ const Register Reg = Register::index2VirtReg(I);
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
+ if (RC && SIRegisterInfo::isAGPRClass(RC)) {
+ UsesAGPRs = true;
+ return true;
+ } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
+ // Defer caching UsesAGPRs, function might not yet been regbank selected.
+ return true;
+ }
+ }
+
+ for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ UsesAGPRs = true;
+ return true;
+ }
+ }
+
+ UsesAGPRs = false;
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index fb6d4f8841ab..c305bc20e40d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -26,9 +26,9 @@ namespace llvm {
class MachineFrameInfo;
class MachineFunction;
-class TargetRegisterClass;
class SIMachineFunctionInfo;
class SIRegisterInfo;
+class TargetRegisterClass;
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
@@ -433,6 +433,8 @@ private:
// Current recorded maximum possible occupancy.
unsigned Occupancy;
+ mutable Optional<bool> UsesAGPRs;
+
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
@@ -946,6 +948,9 @@ public:
Occupancy = Limit;
limitOccupancy(MF);
}
+
+ // \returns true if a function needs or may need AGPRs.
+ bool usesAGPRs(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 278dd05b049c..5590d84cc3ab 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -403,7 +403,7 @@ void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
}
// TODO: compute InternalAdditionnalPressure.
- InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
+ InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
// Check everything is right.
#ifndef NDEBUG
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
index a2f5a1453d6a..ac34a748edbc 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -25,6 +25,8 @@ namespace llvm {
class SIInstrInfo;
class SIRegisterInfo;
+class SIScheduleDAGMI;
+class SIScheduleBlockCreator;
enum SIScheduleCandReason {
NoCand,
@@ -48,9 +50,6 @@ struct SISchedulerCandidate {
void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
};
-class SIScheduleDAGMI;
-class SIScheduleBlockCreator;
-
enum SIScheduleBlockLinkKind {
NoData,
Data
@@ -73,7 +72,7 @@ class SIScheduleBlock {
// store the live virtual and real registers.
// We do care only of SGPR32 and VGPR32 and do track only virtual registers.
// Pressure of additional registers required inside the block.
- std::vector<unsigned> InternalAdditionnalPressure;
+ std::vector<unsigned> InternalAdditionalPressure;
// Pressure of input and output registers
std::vector<unsigned> LiveInPressure;
std::vector<unsigned> LiveOutPressure;
@@ -154,8 +153,8 @@ public:
// Needs the block to be scheduled inside
// TODO: find a way to compute it.
- std::vector<unsigned> &getInternalAdditionnalRegUsage() {
- return InternalAdditionnalPressure;
+ std::vector<unsigned> &getInternalAdditionalRegUsage() {
+ return InternalAdditionalPressure;
}
std::set<unsigned> &getInRegs() { return LiveInRegs; }
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 71be73c2f0e4..29f072ca1e6c 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -126,8 +126,7 @@ private:
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
- SIAtomicAddrSpace::NONE &&
- !isStrongerThan(FailureOrdering, Ordering));
+ SIAtomicAddrSpace::NONE);
// There is also no cross address space ordering if the ordering
// address space is the same as the instruction address space and
@@ -369,7 +368,7 @@ protected:
public:
- SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
+ SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -410,7 +409,7 @@ public:
class SIGfx7CacheControl : public SIGfx6CacheControl {
public:
- SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
+ SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -422,7 +421,7 @@ public:
class SIGfx90ACacheControl : public SIGfx7CacheControl {
public:
- SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {};
+ SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -471,7 +470,7 @@ protected:
public:
- SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {};
+ SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -651,14 +650,11 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
}
SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
- Ordering = isStrongerThan(Ordering, OpOrdering)
- ? Ordering
- : MMO->getSuccessOrdering();
+ Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
FailureOrdering =
- isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
- FailureOrdering : MMO->getFailureOrdering();
+ getMergedAtomicOrdering(FailureOrdering, MMO->getFailureOrdering());
}
}
@@ -859,7 +855,7 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
// instructions. The latter are always marked as volatile so cannot sensibly
// handle it as do not want to pessimize all atomics. Also they do not support
// the nontemporal attribute.
- assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
@@ -1035,8 +1031,8 @@ bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
- return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
- IsCrossAddrSpaceOrdering, Pos);
+ return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
+ IsCrossAddrSpaceOrdering, Pos);
}
bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
@@ -1108,7 +1104,8 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
// different CUs. Therefore need to bypass the L1 which is per CU.
// Otherwise in non-threadgroup split mode all waves of a work-group are
// on the same CU, and so the L1 does not need to be bypassed.
- if (ST.isTgSplitEnabled()) Changed |= enableGLCBit(MI);
+ if (ST.isTgSplitEnabled())
+ Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -1204,14 +1201,13 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
// instructions. The latter are always marked as volatile so cannot sensibly
// handle it as do not want to pessimize all atomics. Also they do not support
// the nontemporal attribute.
- assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
- if (Op == SIMemOp::LOAD) {
+ if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
- }
// Ensure operation has completed at system scope to cause all volatile
// operations to be visible outside the program in a global order. Do not
@@ -1398,7 +1394,8 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
// CU mode all waves of a work-group are on the same CU, and so the L0
// does not need to be bypassed.
- if (!ST.isCuModeEnabled()) Changed |= enableGLCBit(MI);
+ if (!ST.isCuModeEnabled())
+ Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -1432,12 +1429,11 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
// instructions. The latter are always marked as volatile so cannot sensibly
// handle it as do not want to pessimize all atomics. Also they do not support
// the nontemporal attribute.
- assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
-
if (Op == SIMemOp::LOAD) {
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 3d659eca47db..69eab762f05c 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -225,7 +225,7 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
// RequirePending is used to indicate whether we are collecting the initial
// requirements for the block, and need to defer the first InsertionPoint to
// Phase 3. It is set to false once we have set FirstInsertionPoint, or when
- // we discover an explict setreg that means this block doesn't have any
+ // we discover an explicit setreg that means this block doesn't have any
// initial requirements.
bool RequirePending = true;
Status IPChange;
@@ -373,12 +373,8 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
BlockInfo[ThisBlock]->Exit = TmpStatus;
// Add the successors to the work list so we can propagate the changed exit
// status.
- for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
- E = MBB.succ_end();
- S != E; S = std::next(S)) {
- MachineBasicBlock &B = *(*S);
- Phase2List.push(&B);
- }
+ for (MachineBasicBlock *Succ : MBB.successors())
+ Phase2List.push(Succ);
}
BlockInfo[ThisBlock]->ExitSet = ExitSet;
if (RevisitRequired)
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index 307c9eba9d3b..6bf6c45d8cf6 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -11,7 +11,7 @@
/// structures and waterfall loops.
///
/// When we do structurization, we usually transform an if-else into two
-/// sucessive if-then (with a flow block to do predicate inversion). Consider a
+/// successive if-then (with a flow block to do predicate inversion). Consider a
/// simple case after structurization: A divergent value %a was defined before
/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
/// bb.if:
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 7d7a753bb333..6a698348d389 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -365,7 +365,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
if (Dst &&
DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
- // This will work if the tied src is acessing WORD_0, and the dst is
+ // This will work if the tied src is accessing WORD_0, and the dst is
// writing WORD_1. Modifiers don't matter because all the bits that
// would be impacted are being overwritten by the dst.
// Any other case will not work.
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index e05aafe5e291..13a6a718f4f2 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -8,7 +8,7 @@
//
/// \file
/// This pass creates bundles of memory instructions to protect adjacent loads
-/// and stores from beeing rescheduled apart from each other post-RA.
+/// and stores from being rescheduled apart from each other post-RA.
///
//===----------------------------------------------------------------------===//
@@ -90,6 +90,9 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
BitVector &UsedRegUnits) const {
+ if (MI.isDebugInstr())
+ return;
+
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.readsReg())
continue;
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index dce0f4b0df5f..d1b8e217471e 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -174,7 +174,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
} else if (IsVCCZ && MaskValue == 0) {
// Will always branch
- // Remove all succesors shadowed by new unconditional branch
+ // Remove all successors shadowed by new unconditional branch
MachineBasicBlock *Parent = MI.getParent();
SmallVector<MachineInstr *, 4> ToRemove;
bool Found = false;
@@ -257,10 +257,8 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
})) {
// The only exception allowed here is another indirect vector move
// with the same mode.
- if (!IdxOn ||
- !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
- I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
- I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
+ if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
+ I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
return false;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index bba5bf7fdbc3..bfbe84f696f8 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -97,7 +97,7 @@ struct SGPRSpillBuilder {
unsigned EltSize = 4;
RegScavenger *RS;
- MachineBasicBlock &MBB;
+ MachineBasicBlock *MBB;
MachineFunction &MF;
SIMachineFunctionInfo &MFI;
const SIInstrInfo &TII;
@@ -110,9 +110,14 @@ struct SGPRSpillBuilder {
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
bool IsWave32, MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS)
- : SuperReg(MI->getOperand(0).getReg()), MI(MI),
- IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
- RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
+ : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
+ MI->getOperand(0).isKill(), Index, RS) {}
+
+ SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
+ bool IsWave32, MachineBasicBlock::iterator MI, Register Reg,
+ bool IsKill, int Index, RegScavenger *RS)
+ : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
+ Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
IsWave32(IsWave32) {
const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
@@ -189,8 +194,9 @@ struct SGPRSpillBuilder {
if (SavedExecReg) {
RS->setRegUsed(SavedExecReg);
// Set exec to needed lanes
- BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
- auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
+ BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
+ auto I =
+ BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine);
// Spill needed lanes
@@ -201,7 +207,7 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
/*IsKill*/ false);
// Spill inactive lanes
- auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine);
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
@@ -224,7 +230,7 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false);
// Restore exec
- auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
+ auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
.addReg(SavedExecReg, RegState::Kill);
// Add an implicit use of the load so it is not dead.
// FIXME This inserts an unnecessary waitcnt
@@ -235,7 +241,7 @@ struct SGPRSpillBuilder {
// Restore inactive lanes
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false);
- auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive) {
I.addReg(TmpVGPR, RegState::ImplicitKill);
}
@@ -261,11 +267,17 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
/*IsKill*/ false);
// Spill inactive lanes
- BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
- BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
}
}
+
+ void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) {
+ assert(MBB->getParent() == &MF);
+ MI = NewMI;
+ MBB = NewMBB;
+ }
};
} // namespace llvm
@@ -348,10 +360,13 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
- case CallingConv::AMDGPU_Gfx:
return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
: CSR_AMDGPU_HighRegs_SaveList;
+ case CallingConv::AMDGPU_Gfx:
+ return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
+ ? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList
+ : CSR_AMDGPU_SI_Gfx_SaveList;
default: {
// Dummy to not crash RegisterClassInfo.
static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
@@ -371,10 +386,13 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
- case CallingConv::AMDGPU_Gfx:
return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
: CSR_AMDGPU_HighRegs_RegMask;
+ case CallingConv::AMDGPU_Gfx:
+ return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
+ ? CSR_AMDGPU_SI_Gfx_With_AGPRs_RegMask
+ : CSR_AMDGPU_SI_Gfx_RegMask;
default:
return nullptr;
}
@@ -501,18 +519,36 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
- // TODO: In an entry function without calls and AGPRs used it is possible
- // to use the whole register budget for VGPRs. Even more it shall
- // be possible to estimate maximum AGPR/VGPR pressure and split
- // register file accordingly.
- if (ST.hasGFX90AInsts())
- MaxNumVGPRs /= 2;
+ unsigned MaxNumAGPRs = MaxNumVGPRs;
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ if (ST.hasGFX90AInsts()) {
+ // In an entry function without calls and AGPRs used it is possible to use
+ // the whole register budget for VGPRs.
+
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and
+ // split register file accordingly.
+ if (MFI->usesAGPRs(MF)) {
+ MaxNumVGPRs /= 2;
+ MaxNumAGPRs = MaxNumVGPRs;
+ } else {
+ if (MaxNumVGPRs > TotalNumVGPRs) {
+ MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
+ MaxNumVGPRs = TotalNumVGPRs;
+ } else
+ MaxNumAGPRs = 0;
+ }
+ }
+
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
- Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ }
+
+ for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
}
@@ -536,8 +572,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
@@ -801,6 +835,14 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
return &AMDGPU::VGPR_32RegClass;
}
+const TargetRegisterClass *
+SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
+ return getEquivalentVGPRClass(RC);
+
+ return RC;
+}
+
static unsigned getNumSubRegsForSpillOp(unsigned Op) {
switch (Op) {
@@ -1037,7 +1079,7 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
}
void SIRegisterInfo::buildSpillLoadStore(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
RegScavenger *RS, LivePhysRegs *LiveRegs) const {
@@ -1049,7 +1091,6 @@ void SIRegisterInfo::buildSpillLoadStore(
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
- const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
bool IsStore = Desc->mayStore();
bool IsFlat = TII->isFLATScratch(LoadStoreOp);
@@ -1177,9 +1218,19 @@ void SIRegisterInfo::buildSpillLoadStore(
bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
bool NeedSuperRegImpOperand = e > 1;
- unsigned Lane = RegOffset / 4;
- unsigned LaneE = (RegOffset + EltSize) / 4;
- for ( ; Lane != LaneE; ++Lane) {
+ // Remaining element size to spill into memory after some parts of it
+ // spilled into either AGPRs or VGPRs.
+ unsigned RemEltSize = EltSize;
+
+ // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
+ // starting from the last lane. In case if a register cannot be completely
+ // spilled into another register that will ensure its alignment does not
+ // change. For targets with VGPR alignment requirement this is important
+ // in case of flat scratch usage as we might get a scratch_load or
+ // scratch_store of an unaligned register otherwise.
+ for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
+ LaneE = RegOffset / 4;
+ Lane >= LaneE; --Lane) {
bool IsSubReg = e > 1 || EltSize > 4;
Register Sub = IsSubReg
? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
@@ -1187,33 +1238,29 @@ void SIRegisterInfo::buildSpillLoadStore(
auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
if (!MIB.getInstr())
break;
- if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
+ if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && !i)) {
MIB.addReg(ValueReg, RegState::ImplicitDefine);
NeedSuperRegDef = false;
}
if (IsSubReg || NeedSuperRegImpOperand) {
NeedSuperRegImpOperand = true;
unsigned State = SrcDstRegState;
- if (Lane + 1 != LaneE)
+ if (Lane != LaneE)
State &= ~RegState::Kill;
MIB.addReg(ValueReg, RegState::Implicit | State);
}
+ RemEltSize -= 4;
}
- if (Lane == LaneE) // Fully spilled into AGPRs.
+ if (!RemEltSize) // Fully spilled into AGPRs.
continue;
- // Offset in bytes from the beginning of the ValueReg to its portion we
- // still need to spill. It may differ from RegOffset if a portion of
- // current SubReg has been already spilled into AGPRs by the loop above.
- unsigned RemRegOffset = Lane * 4;
- unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
if (RemEltSize != EltSize) { // Partially spilled to AGPRs
assert(IsFlat && EltSize > 4);
unsigned NumRegs = RemEltSize / 4;
SubReg = Register(getSubReg(ValueReg,
- getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
+ getSubRegFromChannel(RegOffset / 4, NumRegs)));
unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
Desc = &TII->get(Opc);
}
@@ -1240,10 +1287,10 @@ void SIRegisterInfo::buildSpillLoadStore(
SubReg = TmpReg;
}
- MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
+ MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
MachineMemOperand *NewMMO =
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
- commonAlignment(Alignment, RemRegOffset));
+ commonAlignment(Alignment, RegOffset));
auto MIB =
BuildMI(MBB, MI, DL, *Desc)
@@ -1257,7 +1304,7 @@ void SIRegisterInfo::buildSpillLoadStore(
} else {
MIB.addReg(SOffset, SOffsetRegState);
}
- MIB.addImm(Offset + RemRegOffset)
+ MIB.addImm(Offset + RegOffset)
.addImm(0); // cpol
if (!IsFlat)
MIB.addImm(0) // tfe
@@ -1307,13 +1354,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
if (IsLoad) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
- buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
- Offset * SB.EltSize, MMO, SB.RS);
+ buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
+ FrameReg, Offset * SB.EltSize, MMO, SB.RS);
} else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
- buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
- Offset * SB.EltSize, MMO, SB.RS);
+ buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
+ FrameReg, Offset * SB.EltSize, MMO, SB.RS);
// This only ever adds one VGPR spill
SB.MFI.addToSpilledVGPRs(1);
}
@@ -1336,6 +1383,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
SB.SuperReg != SB.MFI.getFrameOffsetReg()));
if (SpillToVGPR) {
+
+ assert(SB.NumSubRegs == VGPRSpills.size() &&
+ "Num of VGPR lanes should be equal to num of SGPRs spilled");
+
for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
Register SubReg =
SB.NumSubRegs == 1
@@ -1347,8 +1398,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
- auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
- Spill.VGPR)
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
+ SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR);
@@ -1394,7 +1445,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane =
- BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
SB.TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR)
@@ -1456,10 +1507,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
- auto MIB =
- BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
+ SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
if (SB.NumSubRegs > 1 && i == 0)
MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
if (LIS) {
@@ -1490,7 +1541,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
bool LastSubReg = (i + 1 == e);
- auto MIB = BuildMI(SB.MBB, MI, SB.DL,
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
@@ -1516,6 +1567,75 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
return true;
}
+bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
+ MachineBasicBlock &RestoreMBB,
+ Register SGPR, RegScavenger *RS) const {
+ SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
+ RS);
+ SB.prepare();
+ // Generate the spill of SGPR to SB.TmpVGPR.
+ unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
+ auto PVD = SB.getPerVGPRData();
+ for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
+ unsigned TmpVGPRFlags = RegState::Undef;
+ // Write sub registers into the VGPR
+ for (unsigned i = Offset * PVD.PerVGPR,
+ e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
+ i < e; ++i) {
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
+
+ MachineInstrBuilder WriteLane =
+ BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ SB.TmpVGPR)
+ .addReg(SubReg, SubKillState)
+ .addImm(i % PVD.PerVGPR)
+ .addReg(SB.TmpVGPR, TmpVGPRFlags);
+ TmpVGPRFlags = 0;
+ // There could be undef components of a spilled super register.
+ // TODO: Can we detect this and skip the spill?
+ if (SB.NumSubRegs > 1) {
+ // The last implicit use of the SB.SuperReg carries the "Kill" flag.
+ unsigned SuperKillState = 0;
+ if (i + 1 == SB.NumSubRegs)
+ SuperKillState |= getKillRegState(SB.IsKill);
+ WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
+ }
+ }
+ // Don't need to write VGPR out.
+ }
+
+ // Restore clobbered registers in the specified restore block.
+ MI = RestoreMBB.end();
+ SB.setMI(&RestoreMBB, MI);
+ // Generate the restore of SGPR from SB.TmpVGPR.
+ for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
+ // Don't need to load VGPR in.
+ // Unpack lanes
+ for (unsigned i = Offset * PVD.PerVGPR,
+ e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
+ i < e; ++i) {
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
+ bool LastSubReg = (i + 1 == e);
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
+ SubReg)
+ .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
+ .addImm(i);
+ if (SB.NumSubRegs > 1 && i == 0)
+ MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
+ }
+ }
+ SB.restore();
+
+ SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
+ return false;
+}
+
/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
/// a VGPR and the stack slot can be safely eliminated when all other users are
/// handled.
@@ -1632,7 +1752,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
auto *MBB = MI->getParent();
buildSpillLoadStore(
- *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
@@ -1668,7 +1788,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
auto *MBB = MI->getParent();
buildSpillLoadStore(
- *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MI->eraseFromParent();
@@ -2152,34 +2272,6 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
return isSGPRClass(RC);
}
-// TODO: It might be helpful to have some target specific flags in
-// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
-bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- unsigned Size = getRegSizeInBits(*RC);
- if (Size == 16) {
- return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
- getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
- }
- const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
- if (!VRC) {
- assert(Size < 32 && "Invalid register class size");
- return false;
- }
- return getCommonSubClass(VRC, RC) != nullptr;
-}
-
-bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
- unsigned Size = getRegSizeInBits(*RC);
- if (Size < 16)
- return false;
- const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
- if (!ARC) {
- assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
- return false;
- }
- return getCommonSubClass(ARC, RC) != nullptr;
-}
-
const TargetRegisterClass *
SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
unsigned Size = getRegSizeInBits(*SRC);
@@ -2321,7 +2413,7 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
Register Reg) const {
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
// Registers without classes are unaddressable, SGPR-like registers.
- return RC && hasVGPRs(RC);
+ return RC && isVGPRClass(RC);
}
bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
@@ -2329,7 +2421,7 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
// Registers without classes are unaddressable, SGPR-like registers.
- return RC && hasAGPRs(RC);
+ return RC && isAGPRClass(RC);
}
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
@@ -2427,8 +2519,10 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
- const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
- return getAllocatableClass(RC);
+ if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ return getAllocatableClass(RC);
+
+ return nullptr;
}
MCRegister SIRegisterInfo::getVCC() const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 2a92051e5fb2..8d90ddb1cf4c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -17,6 +17,8 @@
#define GET_REGINFO_HEADER
#include "AMDGPUGenRegisterInfo.inc"
+#include "SIDefines.h"
+
namespace llvm {
class GCNSubtarget;
@@ -24,7 +26,6 @@ class LiveIntervals;
class LivePhysRegs;
class RegisterBank;
struct SGPRSpillBuilder;
-class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
private:
@@ -108,6 +109,13 @@ public:
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
+ /// Returns a legal register class to copy a register in the specified class
+ /// to or from. If it is possible to copy the register directly without using
+ /// a cross register class copy, return the specified RC. Returns NULL if it
+ /// is not possible to copy between two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
+
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
bool IsLoad, bool IsKill = true) const;
@@ -122,6 +130,10 @@ public:
LiveIntervals *LIS = nullptr,
bool OnlyToVGPR = false) const;
+ bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
+ MachineBasicBlock &RestoreMBB, Register SGPR,
+ RegScavenger *RS) const;
+
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
@@ -151,7 +163,7 @@ public:
const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
/// \returns true if this class contains only SGPR registers
- bool isSGPRClass(const TargetRegisterClass *RC) const {
+ static bool isSGPRClass(const TargetRegisterClass *RC) {
return !hasVGPRs(RC) && !hasAGPRs(RC);
}
@@ -162,19 +174,28 @@ public:
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
+ /// \returns true if this class contains only VGPR registers
+ static bool isVGPRClass(const TargetRegisterClass *RC) {
+ return hasVGPRs(RC) && !hasAGPRs(RC);
+ }
+
/// \returns true if this class contains only AGPR registers
- bool isAGPRClass(const TargetRegisterClass *RC) const {
+ static bool isAGPRClass(const TargetRegisterClass *RC) {
return hasAGPRs(RC) && !hasVGPRs(RC);
}
/// \returns true if this class contains VGPR registers.
- bool hasVGPRs(const TargetRegisterClass *RC) const;
+ static bool hasVGPRs(const TargetRegisterClass *RC) {
+ return RC->TSFlags & SIRCFlags::HasVGPR;
+ }
/// \returns true if this class contains AGPR registers.
- bool hasAGPRs(const TargetRegisterClass *RC) const;
+ static bool hasAGPRs(const TargetRegisterClass *RC) {
+ return RC->TSFlags & SIRCFlags::HasAGPR;
+ }
/// \returns true if this class contains any vector registers.
- bool hasVectorRegisters(const TargetRegisterClass *RC) const {
+ static bool hasVectorRegisters(const TargetRegisterClass *RC) {
return hasVGPRs(RC) || hasAGPRs(RC);
}
@@ -350,10 +371,11 @@ public:
// For creating spill instructions during frame lowering, where no scavenger
// is available, LiveRegs can be used.
void buildSpillLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
- int Index, Register ValueReg, bool ValueIsKill,
- MCRegister ScratchOffsetReg, int64_t InstrOffset,
- MachineMemOperand *MMO, RegScavenger *RS,
+ MachineBasicBlock::iterator MI, const DebugLoc &DL,
+ unsigned LoadStoreOp, int Index, Register ValueReg,
+ bool ValueIsKill, MCRegister ScratchOffsetReg,
+ int64_t InstrOffset, MachineMemOperand *MMO,
+ RegScavenger *RS,
LivePhysRegs *LiveRegs = nullptr) const;
};
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 6e3c4e8775f3..cf1d90484228 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -126,8 +126,16 @@ class SIReg <string n, bits<16> regIdx = 0> :
let HWEncoding = regIdx;
}
-class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
- RegisterWithSubRegs<n, subregs> {
+// For register classes that use TSFlags.
+class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
+ : RegisterClass <n, rTypes, Align, rList> {
+ // For vector register classes.
+ field bit HasVGPR = 0;
+ field bit HasAGPR = 0;
+
+ // These need to be kept in sync with the enum SIRCFlags.
+ let TSFlags{0} = HasVGPR;
+ let TSFlags{1} = HasAGPR;
}
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
@@ -490,14 +498,15 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
-def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+let HasVGPR = 1 in {
+def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_LO16", 0, 255))> {
let AllocationPriority = 1;
let Size = 16;
let GeneratePressureSet = 0;
}
-def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_HI16", 0, 255))> {
let AllocationPriority = 1;
let Size = 16;
@@ -506,12 +515,13 @@ def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
// VGPR 32-bit registers
// i16/f16 only on VI+
-def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
+def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}
+} // End HasVGPR = 1
// VGPR 64-bit registers
def VGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, VGPR_32, 255, 1, 2, "v">;
@@ -540,7 +550,8 @@ def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
// VGPR 1024-bit registers
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
-def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+let HasAGPR = 1 in {
+def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "AGPR%u_LO16", 0, 255))> {
let isAllocatable = 0;
let Size = 16;
@@ -548,12 +559,13 @@ def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
}
// AccVGPR 32-bit registers
-def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}
+} // End HasAGPR = 1
// AGPR 64-bit registers
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
@@ -679,6 +691,14 @@ def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
let AllocationPriority = SGPR_64.AllocationPriority;
}
+// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
+def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+ (add (trunc (shl SGPR_64, 15), 1), // s[30:31]
+ (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
+ let CopyCost = SGPR_64.CopyCost;
+ let AllocationPriority = SGPR_64.AllocationPriority;
+}
+
def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
@@ -748,14 +768,15 @@ defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256R
defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
-def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
// Register class for all vector registers (VGPRs + Interpolation Registers)
class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
- RegisterClass<"AMDGPU", regTypes, 32, regList> {
+ SIRegisterClass<"AMDGPU", regTypes, 32, regList> {
let Size = !mul(numRegs, 32);
// Requires n v_mov_b32 to copy
@@ -767,11 +788,13 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
// Define a register tuple class, along with one requiring an even
// aligned base register.
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- // Define the regular class.
- def "" : VRegClassBase<numRegs, regTypes, regList>;
+ let HasVGPR = 1 in {
+ // Define the regular class.
+ def "" : VRegClassBase<numRegs, regTypes, regList>;
- // Define 2-aligned variant
- def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ // Define 2-aligned variant
+ def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ }
}
defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
@@ -787,7 +810,7 @@ defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let CopyCost = !add(numRegs, numRegs, 1) in {
+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList>;
@@ -811,7 +834,7 @@ defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)
// This is not a real register. This is just to have a register to add
// to VReg_1 that does not alias any real register that would
-// introduce inferred register classess.
+// introduce inferred register classes.
def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
let isArtificial = 1;
}
@@ -823,44 +846,53 @@ let GeneratePressureSet = 0 in {
// on an empty register set, but also sorts register classes based on
// the number of registerss in them. Add only one register so this is
// sorted to the end and not preferred over VGPR_32.
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
+def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
let Size = 1;
+ let HasVGPR = 1;
}
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
+def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
-def AV_32 : RegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
+def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
(add AGPR_32, VGPR_32)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasAGPR = 1;
}
-def AV_64 : RegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
+def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
(add AReg_64, VReg_64)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
-def AV_96 : RegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
+let HasVGPR = 1, HasAGPR = 1 in {
+def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
(add AReg_96, VReg_96)> {
let isAllocatable = 0;
}
-def AV_128 : RegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
+def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
(add AReg_128, VReg_128)> {
let isAllocatable = 0;
}
-def AV_160 : RegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
+def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
(add AReg_160, VReg_160)> {
let isAllocatable = 0;
}
+} // End HasVGPR = 1, HasAGPR = 1
//===----------------------------------------------------------------------===//
// Register operands
@@ -996,6 +1028,30 @@ def VSrc_128 : RegisterOperand<VReg_128> {
}
//===----------------------------------------------------------------------===//
+// VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
+// with FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
+ string rc_suffix = "_32"> {
+ let OperandNamespace = "AMDGPU" in {
+ def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+ let OperandType = opType#"_FP16_DEFERRED";
+ let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+ let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
+ }
+
+ def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+ let OperandType = opType#"_FP32_DEFERRED";
+ let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+ let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
+ }
+ }
+}
+
+defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
+
+//===----------------------------------------------------------------------===//
// VRegSrc_* Operands with a VGPR
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index b24c061af7ab..0792b303b830 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -137,6 +137,7 @@ def MIReadVGPR : SchedReadVariant<[
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
+ let RetireOOO = 1 in { // llvm-mca specific flag
def : HWWriteRes<WriteBranch, [HWBranch], 8>;
def : HWWriteRes<WriteExport, [HWExport], 4>;
def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64
@@ -159,6 +160,7 @@ multiclass SICommonWriteRes {
def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
let ResourceCycles = [16] in
def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
+ } // End RetireOOO = 1
def : ReadAdvance<MIVGPRRead, -2>;
@@ -182,6 +184,7 @@ let SchedModel = SIFullSpeedModel in {
defm : SICommonWriteRes;
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteIntMul, 4>;
def : HWVALUWriteRes<WriteFloatFMA, 1>;
@@ -189,6 +192,7 @@ def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : HWVALUWriteRes<WriteTrans64, 4>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -198,6 +202,7 @@ let SchedModel = SIQuarterSpeedModel in {
defm : SICommonWriteRes;
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteIntMul, 4>;
def : HWVALUWriteRes<WriteFloatFMA, 16>;
@@ -205,6 +210,7 @@ def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : HWVALUWriteRes<WriteTrans64, 16>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>;
@@ -218,6 +224,7 @@ let SchedModel = SIDPFullSpeedModel in {
defm : SICommonWriteRes;
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 1>;
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
@@ -225,6 +232,7 @@ def : HWVALUWriteRes<WriteDoubleCvt, 1>;
def : HWVALUWriteRes<WriteTrans64, 4>;
def : HWVALUWriteRes<WriteIntMul, 1>;
def : HWVALUWriteRes<Write64Bit, 1>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>;
@@ -240,6 +248,7 @@ let SchedModel = GFX10SpeedModel in {
// The latency values are 1 / (operations / cycle).
// Add 1 stall cycle for VGPR read.
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
@@ -259,6 +268,7 @@ def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 45dd57ea1be4..3a372d4519fb 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -188,7 +188,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
return;
// eq/ne is special because the imm16 can be treated as signed or unsigned,
- // and initially selectd to the unsigned versions.
+ // and initially selected to the unsigned versions.
if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
bool HasUImm;
if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
@@ -810,6 +810,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// Copy extra operands not present in the instruction definition.
copyExtraImplicitOps(*Inst32, MF, MI);
+ // Copy deadness from the old explicit vcc def to the new implicit def.
+ if (SDst && SDst->isDead())
+ Inst32->findRegisterDefOperand(VCCReg)->setIsDead();
+
MI.eraseFromParent();
foldImmediates(*Inst32, TII, MRI);
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 38548eaf9478..6f63f686635a 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1029,11 +1029,8 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 4> SplitPoints;
char State = BI.InitialState;
- auto II = MBB.getFirstNonPHI(), IE = MBB.end();
- while (II != IE) {
- auto Next = std::next(II);
- MachineInstr &MI = *II;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(
+ llvm::make_range(MBB.getFirstNonPHI(), MBB.end()))) {
if (StateTransition.count(&MI))
State = StateTransition[&MI];
@@ -1051,8 +1048,6 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) {
}
if (SplitPoint)
SplitPoints.push_back(SplitPoint);
-
- II = Next;
}
// Perform splitting after instruction scan to simplify iteration.
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e9697017aac0..61ecc13620a1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -265,6 +265,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
let isReturn = 1 in {
// Define variant marked as return rather than branch.
def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
+def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>;
}
} // End isTerminator = 1, isBarrier = 1
@@ -517,9 +518,10 @@ let Uses = [SCC] in {
def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
[(set i32:$sdst, (SelectPat<select> i32:$src0, i32:$src1))]
>;
+ def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64",
+ [(set i64:$sdst, (SelectPat<select> i64:$src0, i64:$src1))]
+ >;
}
-
- def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
} // End Uses = [SCC]
let Defs = [SCC] in {
@@ -557,19 +559,19 @@ def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
>;
def S_NAND_B32 : SOP2_32 <"s_nand_b32",
- [(set i32:$sdst, (not (and_oneuse i32:$src0, i32:$src1)))]
+ [(set i32:$sdst, (UniformUnaryFrag<not> (and_oneuse i32:$src0, i32:$src1)))]
>;
def S_NAND_B64 : SOP2_64 <"s_nand_b64",
- [(set i64:$sdst, (not (and_oneuse i64:$src0, i64:$src1)))]
+ [(set i64:$sdst, (UniformUnaryFrag<not> (and_oneuse i64:$src0, i64:$src1)))]
>;
def S_NOR_B32 : SOP2_32 <"s_nor_b32",
- [(set i32:$sdst, (not (or_oneuse i32:$src0, i32:$src1)))]
+ [(set i32:$sdst, (UniformUnaryFrag<not> (or_oneuse i32:$src0, i32:$src1)))]
>;
def S_NOR_B64 : SOP2_64 <"s_nor_b64",
- [(set i64:$sdst, (not (or_oneuse i64:$src0, i64:$src1)))]
+ [(set i64:$sdst, (UniformUnaryFrag<not> (or_oneuse i64:$src0, i64:$src1)))]
>;
} // End isCommutable = 1
@@ -597,22 +599,22 @@ let AddedComplexity = 1 in {
let Defs = [SCC] in {
// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set SReg_32:$sdst, (UniformBinFrag<shl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<cshl_32> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set SReg_64:$sdst, (UniformBinFrag<shl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<cshl_64> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set SReg_32:$sdst, (UniformBinFrag<srl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<csrl_32> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set SReg_64:$sdst, (UniformBinFrag<srl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<csrl_64> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set SReg_32:$sdst, (UniformBinFrag<sra> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<csra_32> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set SReg_64:$sdst, (UniformBinFrag<sra> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<csra_64> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
} // End Defs = [SCC]
@@ -621,9 +623,8 @@ def S_BFM_B32 : SOP2_32 <"s_bfm_b32",
[(set i32:$sdst, (UniformBinFrag<AMDGPUbfm> i32:$src0, i32:$src1))]>;
def S_BFM_B64 : SOP2_64_32_32 <"s_bfm_b64">;
-// TODO: S_MUL_I32 require V_MUL_LO_I32 from VOP3 change
def S_MUL_I32 : SOP2_32 <"s_mul_i32",
- [(set i32:$sdst, (mul i32:$src0, i32:$src1))]> {
+ [(set i32:$sdst, (UniformBinFrag<mul> i32:$src0, i32:$src1))]> {
let isCommutable = 1;
}
} // End isReMaterializable = 1
@@ -713,7 +714,7 @@ class SOPK_Pseudo <string opName, dag outs, dag ins,
bits<1> has_sdst = 1;
}
-class SOPK_Real<bits<5> op, SOPK_Pseudo ps> :
+class SOPK_Real<SOPK_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList,
ps.Mnemonic # " " # ps.AsmOperands, []> {
let SALU = 1;
@@ -739,7 +740,7 @@ class SOPK_Real<bits<5> op, SOPK_Pseudo ps> :
}
class SOPK_Real32<bits<5> op, SOPK_Pseudo ps> :
- SOPK_Real <op, ps>,
+ SOPK_Real <ps>,
Enc32 {
let Inst{15-0} = simm16;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -748,7 +749,7 @@ class SOPK_Real32<bits<5> op, SOPK_Pseudo ps> :
}
class SOPK_Real64<bits<5> op, SOPK_Pseudo ps> :
- SOPK_Real<op, ps>,
+ SOPK_Real<ps>,
Enc64 {
let Inst{15-0} = simm16;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -1107,7 +1108,7 @@ class SOPPRelaxTable <bit isRelaxed, string keyName, string gfxip> {
}
//spaces inserted in realname on instantiation of this record to allow s_endpgm to omit whitespace
-class SOPP_Real<bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
+class SOPP_Real<SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
real_name # ps.AsmOperands, []> {
let SALU = 1;
@@ -1127,14 +1128,14 @@ class SOPP_Real<bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
bits <16> simm16;
}
-class SOPP_Real_32 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<op, ps, real_name>,
+class SOPP_Real_32 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<ps, real_name>,
Enc32 {
let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16);
let Inst{22-16} = op;
let Inst{31-23} = 0x17f;
}
-class SOPP_Real_64 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<op, ps, real_name>,
+class SOPP_Real_64 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<ps, real_name>,
Enc64 {
// encoding
let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16);
diff --git a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index 9ec437760c0a..7573af597056 100644
--- a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/AMDGPUTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 29bbf50cbfdc..9da7b9f5145d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -450,16 +450,16 @@ std::string AMDGPUTargetID::toString() const {
} else if (Processor == "gfx801") {
if (!isXnackOnOrAny())
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor +
- " without XNACK");
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor) + " without XNACK");
} else if (Processor == "gfx802") {
} else if (Processor == "gfx803") {
} else if (Processor == "gfx805") {
} else if (Processor == "gfx810") {
if (!isXnackOnOrAny())
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor +
- " without XNACK");
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor) + " without XNACK");
} else if (Processor == "gfx900") {
if (isXnackOnOrAny())
Processor = "gfx901";
@@ -475,11 +475,12 @@ std::string AMDGPUTargetID::toString() const {
} else if (Processor == "gfx90c") {
if (isXnackOnOrAny())
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor +
- " with XNACK being ON or ANY");
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor) + " with XNACK being ON or ANY");
} else {
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor);
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor));
}
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
@@ -671,7 +672,8 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
if (XNACKUsed)
ExtraSGPRs = 4;
- if (FlatScrUsed)
+ if (FlatScrUsed ||
+ STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
ExtraSGPRs = 6;
}
@@ -1572,8 +1574,10 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
switch (OpType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
@@ -1825,8 +1829,8 @@ bool isArgPassedInSGPR(const Argument *A) {
case CallingConv::AMDGPU_Gfx:
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
// Everything else is in VGPRs.
- return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
- F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
+ return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal);
default:
// TODO: Should calls support inreg for SGPR inputs?
return false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 72c872dec5ba..061c74c0ace6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -292,9 +292,13 @@ struct MIMGBaseOpcodeInfo {
bool LodOrClampOrMip;
bool HasD16;
bool MSAA;
+ bool BVH;
};
LLVM_READONLY
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
struct MIMGDimInfo {
@@ -767,7 +771,7 @@ bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
/// Is this floating-point operand?
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
-/// Does this opearnd support only inlinable literals?
+/// Does this operand support only inlinable literals?
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
/// Get the size in bits of a register from the register class \p RC.
@@ -785,6 +789,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
switch (OpInfo.OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -793,6 +798,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
return 4;
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -804,6 +811,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index da8fcf3900bb..2e4d83fbbc39 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPULDSUtils.h"
+#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -68,6 +69,11 @@ class CollectReachableCallees {
if (!VisitedCGNodes.insert(CGN).second)
continue;
+ // Ignore call graph node which does not have associated function or
+ // associated function is not a definition.
+ if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
+ continue;
+
for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) {
auto *RCB = cast<CallBase>(GI->first.getValue());
auto *RCGN = GI->second;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index ffcafb9b76ce..d1c9229bc336 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
-#include "AMDGPU.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +48,7 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
/// as an use within some instruction (either from kernel or from non-kernel).
bool hasUserInstruction(const GlobalValue *GV);
-/// \returns true if an LDS global requres lowering to a module LDS structure
+/// \returns true if an LDS global requires lowering to a module LDS structure
/// if \p F is not given. If \p F is given it must be a kernel and function
/// \returns true if an LDS global is directly used from that kernel and it
/// is safe to replace its uses with a kernel LDS structure member.
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 35d5fe13ad30..48548d8b6722 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -860,16 +860,25 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>;
defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>;
+let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in {
+
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
// and an implicit use and def of the super register should be added.
-def V_MOV_B32_indirect : VPseudoInstSI<(outs),
+def V_MOV_B32_indirect_write : VPseudoInstSI<(outs),
(ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>,
PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
- getVOPSrc0ForVT<i32>.ret:$src0)> {
- let VOP1 = 1;
- let SubtargetPredicate = isGFX8GFX9;
-}
+ getVOPSrc0ForVT<i32>.ret:$src0)>;
+
+// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the
+// super register should be added.
+def V_MOV_B32_indirect_read : VPseudoInstSI<
+ (outs getVALUDstForVT<i32>.ret:$vdst),
+ (ins getVOPSrc0ForVT<i32>.ret:$src0)>,
+ PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
+ getVOPSrc0ForVT<i32>.ret:$src0)>;
+
+} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0]
let OtherPredicates = [isGFX8Plus] in {
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 7860b7e7f8a6..8d232ffe4114 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -154,8 +154,6 @@ multiclass VOP2Inst_e64<string opName,
multiclass VOP2Inst_sdwa<string opName,
VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName,
bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
foreach _ = BoolToList<P.HasExtSDWA>.ret in
@@ -170,7 +168,7 @@ multiclass VOP2Inst<string opName,
bit GFX9Renamed = 0> :
VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
- VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> {
+ VOP2Inst_sdwa<opName, P, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -188,7 +186,7 @@ multiclass VOP2bInst <string opName,
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
- let usesCustomInserter = !eq(P.NumSrcArgs, 2);
+ let usesCustomInserter = true;
}
foreach _ = BoolToList<P.HasExtSDWA>.ret in
@@ -272,12 +270,11 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = !if(!eq(vt.Size, 32),
- (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
- (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
+ (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ field string Asm32 = "$vdst, $src0, $src1, $imm";
field bit HasExt = 0;
let IsSingle = 1;
-
- field string Asm32 = "$vdst, $src0, $src1, $imm";
}
def VOP_MADAK_F16 : VOP_MADAK <f16>;
@@ -285,11 +282,10 @@ def VOP_MADAK_F32 : VOP_MADAK <f32>;
class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
- field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+ field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+ field string Asm32 = "$vdst, $src0, $imm, $src1";
field bit HasExt = 0;
let IsSingle = 1;
-
- field string Asm32 = "$vdst, $src0, $imm, $src1";
}
def VOP_MADMK_F16 : VOP_MADMK <f16>;
@@ -496,18 +492,18 @@ defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
-defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
-defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
-defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">;
-defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
@@ -582,9 +578,9 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma
let isCommutable = 1 in {
let SubtargetPredicate = isGFX6GFX7 in {
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>;
} // End SubtargetPredicate = isGFX6GFX7
} // End isCommutable = 1
} // End isReMaterializable = 1
@@ -609,9 +605,9 @@ class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
)
>;
-def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
-def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
-def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
+def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>;
+def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>;
+def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>;
let SubtargetPredicate = HasAddNoCarryInsts in {
def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
@@ -652,9 +648,9 @@ def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
} // End FPDPRounding = 1
-defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>;
-defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>;
-defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>;
+defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
+defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>;
+defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>;
let isCommutable = 1 in {
let FPDPRounding = 1 in {
@@ -856,9 +852,9 @@ defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>;
defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>;
defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>;
defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
-defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>;
-defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>;
-defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>;
} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
def : ZExt_i16_i1_Pat<zext>;
@@ -927,7 +923,7 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>;
class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
- string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOPProfile p = ps.Pfl> :
VOP_DPP8<ps.OpName, p> {
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
@@ -1123,14 +1119,14 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
let DecoderNamespace = "DPP8";
}
foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_w32_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
let isAsmParserOnly = 1;
@@ -1138,7 +1134,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_w64_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # AsmDPP8;
let isAsmParserOnly = 1;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index ee3b87f487d0..494e3aeb6d55 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -120,11 +120,11 @@ class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
}
// Consistently gives instructions a _e64 suffix.
-multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> {
- def _e64 : VOP3_Pseudo<opName, P, pattern, VOP3Only>;
+multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = []> {
+ def _e64 : VOP3_Pseudo<opName, P, pattern>;
}
-class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
+class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
VOP3_Pseudo<OpName, P,
!if(P.HasOpSel,
!if(P.HasModifiers,
@@ -137,7 +137,7 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
!if (P.IsMAI,
getVOP3MAIPat<P, node>.ret,
getVOP3Pat<P, node>.ret)))),
- VOP3Only, 0, P.HasOpSel> {
+ 0, P.HasOpSel> {
let IntClamp = P.HasIntClamp;
let AsmMatchConverter =
@@ -148,8 +148,8 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
""));
}
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> {
- def _e64 : VOP3InstBase<OpName, P, node, VOP3Only>;
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+ def _e64 : VOP3InstBase<OpName, P, node>;
}
// Special case for v_div_fmas_{f32|f64}, since it seems to be the
@@ -296,15 +296,15 @@ defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_a
let SchedRW = [WriteDoubleAdd] in {
let FPDPRounding = 1 in {
defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
-defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd, 1>;
-defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
+defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd>;
+defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul>;
} // End FPDPRounding = 1
-defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
-defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
+defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like>;
+defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like>;
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteIntMul] in {
-defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, mul>;
+defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
@@ -371,18 +371,18 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
- defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
+ defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
} // End isReMaterializable = 1
let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
let SchedRW = [WriteFloatFMA, WriteSALU] in
- defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> ;
+ defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32> ;
// Double precision division pre-scale.
let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in
- defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1>;
+ defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
} // End mayRaiseFPException = 0
let isReMaterializable = 1 in
@@ -400,15 +400,15 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I
let SchedRW = [Write64Bit] in {
let SubtargetPredicate = isGFX6GFX7 in {
- defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, shl>;
- defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, srl>;
- defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, sra>;
+ defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, cshl_64>;
+ defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, csrl_64>;
+ defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, csra_64>;
} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = isGFX8Plus in {
- defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
- defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
- defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
+ defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
+ defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>;
+ defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>;
} // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
@@ -528,7 +528,7 @@ def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst, SDPatternOperator op3> {
+ Instruction inst> {
def : GCNPat <
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
@@ -536,15 +536,15 @@ def : GCNPat <
}
-defm: Ternary_i16_Pats<mul, add, V_MAD_U16_e64, zext>;
-defm: Ternary_i16_Pats<mul, add, V_MAD_I16_e64, sext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_U16_e64>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_I16_e64>;
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
let Predicates = [Has16BitInsts, isGFX10Plus] in {
multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst, SDPatternOperator op3> {
+ Instruction inst> {
def : GCNPat <
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
@@ -552,8 +552,8 @@ def : GCNPat <
}
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64, zext>;
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64, sext>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64>;
} // End Predicates = [Has16BitInsts, isGFX10Plus]
@@ -656,10 +656,10 @@ class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instructio
(inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
>;
-def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32_e64>;
-def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32_e64>;
+def : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>;
+def : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>;
def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
-def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32_e64>;
+def : ThreeOp_i32_Pats<cshl_32, or, V_LSHL_OR_B32_e64>;
def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
@@ -667,6 +667,14 @@ def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
+def : GCNPat<(getDivergentFrag<or>.ret (or_oneuse i64:$src0, i64:$src1), i64:$src2),
+ (REG_SEQUENCE VReg_64,
+ (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub0)),
+ (i32 (EXTRACT_SUBREG $src1, sub0)),
+ (i32 (EXTRACT_SUBREG $src2, sub0))), sub0,
+ (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1)),
+ (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 48f5eb1dc272..32222b3eb93c 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -50,8 +50,7 @@ multiclass VOP3PInst<string OpName, VOPProfile P,
// Non-packed instructions that use the VOP3P encoding.
// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
-multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P,
- SDPatternOperator node = null_frag> {
+multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> {
def NAME : VOP3P_Pseudo<OpName, P> {
let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", "");
@@ -83,9 +82,9 @@ defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
-defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>;
-defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
-defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>;
+defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>;
+defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>;
let SubtargetPredicate = HasVOP3PInsts in {
@@ -113,7 +112,6 @@ def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
} // End SubtargetPredicate = HasVOP3PInsts
multiclass MadFmaMixPats<SDPatternOperator fma_like,
- Instruction mix_inst,
Instruction mixlo_inst,
Instruction mixhi_inst> {
def : GCNPat <
@@ -192,7 +190,7 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+defm : MadFmaMixPats<fmad, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
} // End SubtargetPredicate = HasMadMixInsts
@@ -211,7 +209,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
+defm : MadFmaMixPats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
// Defines patterns that extract signed 4bit from each Idx[0].
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 5f6f664ea3e7..a3eccf13cd71 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -57,8 +57,7 @@ class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
}
class VOP3Common <dag outs, dag ins, string asm = "",
- list<dag> pattern = [], bit HasMods = 0,
- bit VOP3Only = 0> :
+ list<dag> pattern = [], bit HasMods = 0> :
VOPAnyCommon <outs, ins, asm, pattern> {
// Using complex patterns gives VOP3 patterns a very high complexity rating,
@@ -83,7 +82,7 @@ class VOP3Common <dag outs, dag ins, string asm = "",
}
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
- bit VOP3Only = 0, bit isVOP3P = 0, bit isVop3OpSel = 0> :
+ bit isVOP3P = 0, bit isVop3OpSel = 0> :
VOP_Pseudo <opName, "_e64", P, P.Outs64,
!if(isVop3OpSel,
P.InsVOP3OpSel,
@@ -136,7 +135,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
}
class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
- VOP3_Pseudo<opName, P, pattern, 1, 1> {
+ VOP3_Pseudo<opName, P, pattern, 1> {
let VOP3P = 1;
}
@@ -760,10 +759,11 @@ class getNumNodeArgs<SDPatternOperator Op> {
int ret = TP.NumOperands;
}
-
class getDivergentFrag<SDPatternOperator Op> {
+ assert !or(!isa<SDNode>(Op), !isa<PatFrags>(Op)), "Expected SDNode or PatFrags";
- int NumSrcArgs = getNumNodeArgs<Op>.ret;
+ int NumSrcArgs = !if(!isa<SDNode>(Op), getNumNodeArgs<Op>.ret,
+ !size(!cast<PatFrags>(Op).Operands));
PatFrag ret = PatFrag <
!if(!eq(NumSrcArgs, 1),
(ops node:$src0),
diff --git a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
index 025b920ff7b4..0390c01eecb1 100644
--- a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -22,7 +22,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARC/ARCExpandPseudos.cpp b/llvm/lib/Target/ARC/ARCExpandPseudos.cpp
index a1646d17605f..84bb6cac2876 100644
--- a/llvm/lib/Target/ARC/ARCExpandPseudos.cpp
+++ b/llvm/lib/Target/ARC/ARCExpandPseudos.cpp
@@ -13,6 +13,7 @@
#include "ARCInstrInfo.h"
#include "ARCRegisterInfo.h"
#include "ARCSubtarget.h"
+#include "MCTargetDesc/ARCInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -34,7 +35,9 @@ public:
StringRef getPassName() const override { return "ARC Expand Pseudos"; }
private:
- void ExpandStore(MachineFunction &, MachineBasicBlock::iterator);
+ void expandStore(MachineFunction &, MachineBasicBlock::iterator);
+ void expandCTLZ(MachineFunction &, MachineBasicBlock::iterator);
+ void expandCTTZ(MachineFunction &, MachineBasicBlock::iterator);
const ARCInstrInfo *TII;
};
@@ -56,11 +59,11 @@ static unsigned getMappedOp(unsigned PseudoOp) {
}
}
-void ARCExpandPseudos::ExpandStore(MachineFunction &MF,
+void ARCExpandPseudos::expandStore(MachineFunction &MF,
MachineBasicBlock::iterator SII) {
MachineInstr &SI = *SII;
- unsigned AddrReg = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
- unsigned AddOpc =
+ Register AddrReg = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+ Register AddOpc =
isUInt<6>(SI.getOperand(2).getImm()) ? ARC::ADD_rru6 : ARC::ADD_rrlimm;
BuildMI(*SI.getParent(), SI, SI.getDebugLoc(), TII->get(AddOpc), AddrReg)
.addReg(SI.getOperand(1).getReg())
@@ -73,10 +76,62 @@ void ARCExpandPseudos::ExpandStore(MachineFunction &MF,
SI.eraseFromParent();
}
+void ARCExpandPseudos::expandCTLZ(MachineFunction &MF,
+ MachineBasicBlock::iterator MII) {
+ // Expand:
+ // %R2<def> = CTLZ %R0, %STATUS<imp-def>
+ // To:
+ // %R2<def> = FLS_f_rr %R0, %STATUS<imp-def>
+ // %R2<def,tied1> = MOV_cc_ru6 %R2<tied0>, 32, pred:1, %STATUS<imp-use>
+ // %R2<def,tied1> = RSUB_cc_rru6 %R2<tied0>, 31, pred:2, %STATUS<imp-use>
+ MachineInstr &MI = *MII;
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+ Register Ra = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+ Register Rb = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::FLS_f_rr), Ra)
+ .add(Src);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::MOV_cc_ru6), Rb)
+ .addImm(32)
+ .addImm(ARCCC::EQ)
+ .addReg(Ra);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::RSUB_cc_rru6))
+ .add(Dest)
+ .addImm(31)
+ .addImm(ARCCC::NE)
+ .addReg(Rb);
+
+ MI.eraseFromParent();
+}
+
+void ARCExpandPseudos::expandCTTZ(MachineFunction &MF,
+ MachineBasicBlock::iterator MII) {
+ // Expand:
+ // %R0<def> = CTTZ %R0<kill>, %STATUS<imp-def>
+ // To:
+ // %R0<def> = FFS_f_rr %R0<kill>, %STATUS<imp-def>
+ // %R0<def,tied1> = MOVcc_ru6 %R0<tied0>, 32, pred:1, %STATUS<imp-use>
+ MachineInstr &MI = *MII;
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+ Register R = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::FFS_f_rr), R)
+ .add(Src);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::MOV_cc_ru6))
+ .add(Dest)
+ .addImm(32)
+ .addImm(ARCCC::EQ)
+ .addReg(R);
+
+ MI.eraseFromParent();
+}
+
bool ARCExpandPseudos::runOnMachineFunction(MachineFunction &MF) {
const ARCSubtarget *STI = &MF.getSubtarget<ARCSubtarget>();
TII = STI->getInstrInfo();
- bool ExpandedStore = false;
+ bool Expanded = false;
for (auto &MBB : MF) {
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
@@ -85,8 +140,16 @@ bool ARCExpandPseudos::runOnMachineFunction(MachineFunction &MF) {
case ARC::ST_FAR:
case ARC::STH_FAR:
case ARC::STB_FAR:
- ExpandStore(MF, MBBI);
- ExpandedStore = true;
+ expandStore(MF, MBBI);
+ Expanded = true;
+ break;
+ case ARC::CTLZ:
+ expandCTLZ(MF, MBBI);
+ Expanded = true;
+ break;
+ case ARC::CTTZ:
+ expandCTTZ(MF, MBBI);
+ Expanded = true;
break;
default:
break;
@@ -94,7 +157,7 @@ bool ARCExpandPseudos::runOnMachineFunction(MachineFunction &MF) {
MBBI = NMBBI;
}
}
- return ExpandedStore;
+ return Expanded;
}
FunctionPass *llvm::createARCExpandPseudosPass() {
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp
index ca33f5297471..7fd08f70ea3b 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -68,6 +68,31 @@ static ARCCC::CondCode ISDCCtoARCCC(ISD::CondCode isdCC) {
}
}
+void ARCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ LLVM_DEBUG(dbgs() << "[ARC-ISEL] ReplaceNodeResults ");
+ LLVM_DEBUG(N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "; use_count=" << N->use_size() << "\n");
+
+ switch (N->getOpcode()) {
+ case ISD::READCYCLECOUNTER:
+ if (N->getValueType(0) == MVT::i64) {
+ // We read the TIMER0 and zero-extend it to 64-bits as the intrinsic
+ // requires.
+ SDValue V =
+ DAG.getNode(ISD::READCYCLECOUNTER, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::Other), N->getOperand(0));
+ SDValue Op = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i64, V);
+ Results.push_back(Op);
+ Results.push_back(V.getValue(1));
+ }
+ break;
+ default:
+ break;
+ }
+}
+
ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
const ARCSubtarget &Subtarget)
: TargetLowering(TM), Subtarget(Subtarget) {
@@ -96,6 +121,11 @@ ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMAX, MVT::i32, Legal);
setOperationAction(ISD::SMIN, MVT::i32, Legal);
+ setOperationAction(ISD::ADDC, MVT::i32, Legal);
+ setOperationAction(ISD::ADDE, MVT::i32, Legal);
+ setOperationAction(ISD::SUBC, MVT::i32, Legal);
+ setOperationAction(ISD::SUBE, MVT::i32, Legal);
+
// Need barrel shifter.
setOperationAction(ISD::SHL, MVT::i32, Legal);
setOperationAction(ISD::SRA, MVT::i32, Legal);
@@ -135,6 +165,15 @@ ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
// Sign extend inreg
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+
+ // TODO: Predicate these with `options.hasBitScan() ? Legal : Expand`
+ // when the HasBitScan predicate is available.
+ setOperationAction(ISD::CTLZ, MVT::i32, Legal);
+ setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i32, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
+ isTypeLegal(MVT::i64) ? Legal : Custom);
}
const char *ARCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -495,7 +534,7 @@ SDValue ARCTargetLowering::LowerCallArguments(
CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1));
}
} else {
- // sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getStoreSize();
@@ -761,6 +800,13 @@ SDValue ARCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerJumpTable(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
+ case ISD::READCYCLECOUNTER:
+ // As of LLVM 3.8, the lowering code insists that we customize it even
+ // though we've declared the i32 version as legal. This is because it only
+ // thinks i64 is the truly supported version. We've already converted the
+ // i64 version to a widened i32.
+ assert(Op.getSimpleValueType() == MVT::i32);
+ return Op;
default:
llvm_unreachable("unimplemented operand");
}
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.h b/llvm/lib/Target/ARC/ARCISelLowering.h
index 4b72bfdaee9c..e070ed8752cc 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.h
+++ b/llvm/lib/Target/ARC/ARCISelLowering.h
@@ -77,6 +77,9 @@ public:
private:
const ARCSubtarget &Subtarget;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
// Lower Operand helpers
SDValue LowerCallArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
diff --git a/llvm/lib/Target/ARC/ARCInstrFormats.td b/llvm/lib/Target/ARC/ARCInstrFormats.td
index 5f539c92c745..2a109cc0f764 100644
--- a/llvm/lib/Target/ARC/ARCInstrFormats.td
+++ b/llvm/lib/Target/ARC/ARCInstrFormats.td
@@ -261,32 +261,6 @@ class F32_SOP_RR<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
let Inst{5-0} = subop;
}
-// Single Operand Immediate Instructions.
-// 1-register, unsigned 6-bit immediate Single Operand instruction with
-// condition code.
-// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
-// |B[2-0] | 1| 1| subop| F|B[5-3] |U6 |1|cc |
-class F32_SOP_CC_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
- string asmstr, list<dag> pattern> :
- InstARC<4, outs, ins, asmstr, pattern> {
-
- bits<5> cc;
- bits<6> U6;
- bits<6> B;
-
- let Inst{31-27} = major;
- let Inst{26-24} = B{2-0};
- let Inst{23-22} = 0b11;
- let Inst{21-16} = subop;
- let Inst{15} = F;
- let Inst{14-12} = B{5-3};
- let Inst{11-6} = U6;
- let Inst{5} = 1;
- let Inst{4-0} = cc;
-
- let DecoderMethod = "DecodeCCRU6Instruction";
-}
-
// Dual Operand Instructions. Inst[21-16] specifies the specific operation
// for this format.
@@ -353,6 +327,31 @@ class F32_DOP_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
let Inst{5-0} = A;
}
+// 1-register, unsigned 6-bit, immediate Dual Operand instruction with
+// condition code.
+// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
+// |B[2-0] | 1| 1| subop| F|B[5-3] |U6 |1|cc |
+class F32_DOP_CC_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ InstARC<4, outs, ins, asmstr, pattern> {
+
+ bits<5> cc;
+ bits<6> U6;
+ bits<6> B;
+
+ let Inst{31-27} = major;
+ let Inst{26-24} = B{2-0};
+ let Inst{23-22} = 0b11;
+ let Inst{21-16} = subop;
+ let Inst{15} = F;
+ let Inst{14-12} = B{5-3};
+ let Inst{11-6} = U6;
+ let Inst{5} = 1;
+ let Inst{4-0} = cc;
+
+ let DecoderMethod = "DecodeCCRU6Instruction";
+}
+
// 2-register, unsigned 6-bit immediate Dual Operand instruction with
// condition code. This instruction uses B as the first 2 operands
// (i.e, add.cc B, B, u6).
@@ -364,7 +363,6 @@ class F32_DOP_CC_RRU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
bits<5> cc;
bits<6> U6;
bits<6> B;
- bits<6> A;
let Inst{31-27} = major;
let Inst{26-24} = B{2-0};
@@ -397,6 +395,50 @@ class F32_DOP_RS12<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
let Inst{5-0} = S12{11-6};
}
+// 1-register, signed 12-bit immediate Dual Operand instruction.
+// This instruction uses B as the first operand (i.e., lr B, [%count0]).
+// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
+// |B[2-0] | 1| 0| subop| F|B[5-3] |S12[5-0] |S12[11-6] |
+class F32_SOP_RS12<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ InstARC<4, outs, ins, asmstr, pattern> {
+ bits<6> B;
+ bits<12> S12;
+
+ let Inst{31-27} = major;
+ let Inst{26-24} = B{2-0};
+ let Inst{23-22} = 0b10;
+ let Inst{21-16} = subop;
+ let Inst{15} = F;
+ let Inst{14-12} = B{5-3};
+ let Inst{11-6} = S12{5-0};
+ let Inst{5-0} = S12{11-6};
+
+ let DecoderMethod = "DecodeSOPwithRS12";
+}
+
+// 1-register, unsigned 6-bit immediate Dual Operand instruction.
+// This instruction uses B as the first operand.
+// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
+// |B[2-0] | 0| 1| subop| F|B[5-3] |U6 |0|0|0|0|0|0|
+class F32_SOP_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ InstARC<4, outs, ins, asmstr, pattern> {
+ bits<6> B;
+ bits<6> U6;
+
+ let Inst{31-27} = major;
+ let Inst{26-24} = B{2-0};
+ let Inst{23-22} = 0b01;
+ let Inst{21-16} = subop;
+ let Inst{15} = F;
+ let Inst{14-12} = B{5-3};
+ let Inst{11-6} = U6;
+ let Inst{5-0} = 0;
+
+ let DecoderMethod = "DecodeSOPwithRU6";
+}
+
// 2-register, 32-bit immediate (LImm) Dual Operand instruction.
// This instruction has the 32-bit immediate in bits 32-63, and
// 62 in the C register operand slot, but is otherwise F32_DOP_RR.
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index 527f239c2643..6e8190ee7209 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -18,8 +18,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -43,8 +43,8 @@ enum TSFlagsConstants {
// Pin the vtable to this file.
void ARCInstrInfo::anchor() {}
-ARCInstrInfo::ARCInstrInfo()
- : ARCGenInstrInfo(ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI() {}
+ARCInstrInfo::ARCInstrInfo(const ARCSubtarget &ST)
+ : ARCGenInstrInfo(ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI(ST) {}
static bool isZeroImm(const MachineOperand &Op) {
return Op.isImm() && Op.getImm() == 0;
@@ -99,7 +99,7 @@ unsigned ARCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
}
/// Return the inverse of passed condition, i.e. turning COND_E to COND_NE.
-static ARCCC::CondCode GetOppositeBranchCondition(ARCCC::CondCode CC) {
+static ARCCC::CondCode getOppositeBranchCondition(ARCCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Illegal condition code!");
@@ -280,23 +280,23 @@ unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB,
void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &dl, MCRegister DestReg,
+ const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
assert(ARC::GPR32RegClass.contains(SrcReg) &&
"Only GPR32 src copy supported.");
assert(ARC::GPR32RegClass.contains(DestReg) &&
"Only GPR32 dest copy supported.");
- BuildMI(MBB, I, dl, get(ARC::MOV_rr), DestReg)
+ BuildMI(MBB, I, DL, get(ARC::MOV_rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- Register SrcReg, bool isKill,
+ Register SrcReg, bool IsKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc dl = MBB.findDebugLoc(I);
+ DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -312,8 +312,8 @@ void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Only support GPR32 stores to stack now.");
LLVM_DEBUG(dbgs() << "Created store reg=" << printReg(SrcReg, TRI)
<< " to FrameIndex=" << FrameIndex << "\n");
- BuildMI(MBB, I, dl, get(ARC::ST_rs9))
- .addReg(SrcReg, getKillRegState(isKill))
+ BuildMI(MBB, I, DL, get(ARC::ST_rs9))
+ .addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FrameIndex)
.addImm(0)
.addMemOperand(MMO);
@@ -324,7 +324,7 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc dl = MBB.findDebugLoc(I);
+ DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -339,7 +339,7 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
"Only support GPR32 stores to stack now.");
LLVM_DEBUG(dbgs() << "Created load reg=" << printReg(DestReg, TRI)
<< " from FrameIndex=" << FrameIndex << "\n");
- BuildMI(MBB, I, dl, get(ARC::LD_rs9))
+ BuildMI(MBB, I, DL, get(ARC::LD_rs9))
.addReg(DestReg, RegState::Define)
.addFrameIndex(FrameIndex)
.addImm(0)
@@ -350,7 +350,7 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
bool ARCInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
assert((Cond.size() == 3) && "Invalid ARC branch condition!");
- Cond[2].setImm(GetOppositeBranchCondition((ARCCC::CondCode)Cond[2].getImm()));
+ Cond[2].setImm(getOppositeBranchCondition((ARCCC::CondCode)Cond[2].getImm()));
return false;
}
@@ -358,9 +358,9 @@ MachineBasicBlock::iterator
ARCInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned Reg,
uint64_t Value) const {
- DebugLoc dl = MBB.findDebugLoc(MI);
+ DebugLoc DL = MBB.findDebugLoc(MI);
if (isInt<12>(Value)) {
- return BuildMI(MBB, MI, dl, get(ARC::MOV_rs12), Reg)
+ return BuildMI(MBB, MI, DL, get(ARC::MOV_rs12), Reg)
.addImm(Value)
.getInstr();
}
@@ -371,7 +371,7 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &dl, int *BytesAdded) const {
+ const DebugLoc &DL, int *BytesAdded) const {
assert(!BytesAdded && "Code size not handled.");
// Shouldn't be a fall through.
@@ -380,11 +380,11 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
"ARC branch conditions have two components!");
if (Cond.empty()) {
- BuildMI(&MBB, dl, get(ARC::BR)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(ARC::BR)).addMBB(TBB);
return 1;
}
int BccOpc = Cond[1].isImm() ? ARC::BRcc_ru6_p : ARC::BRcc_rr_p;
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(BccOpc));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(BccOpc));
MIB.addMBB(TBB);
for (unsigned i = 0; i < 3; i++) {
MIB.add(Cond[i]);
@@ -396,7 +396,7 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
// Two-way conditional branch.
- BuildMI(&MBB, dl, get(ARC::BR)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(ARC::BR)).addMBB(FBB);
return 2;
}
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h
index 4f6122daf91f..ebc02a93b124 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.h
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.h
@@ -28,7 +28,7 @@ class ARCInstrInfo : public ARCGenInstrInfo {
virtual void anchor();
public:
- ARCInstrInfo();
+ ARCInstrInfo(const ARCSubtarget &);
const ARCRegisterInfo &getRegisterInfo() const { return RI; }
@@ -57,19 +57,19 @@ public:
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
- const DebugLoc &dl,
+ const DebugLoc &,
int *BytesAdded = nullptr) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &dl, MCRegister DestReg, MCRegister SrcReg,
+ const DebugLoc &, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register SrcReg,
- bool isKill, int FrameIndex,
+ bool IsKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.td b/llvm/lib/Target/ARC/ARCInstrInfo.td
index ea3e41621323..4a0bc5cf7421 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.td
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.td
@@ -45,7 +45,6 @@ def SDT_ARCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
def SDT_ARCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
-
// Global Address.
def ARCGAWrapper : SDNode<"ARCISD::GAWRAPPER", SDT_ARCmov, []>;
@@ -80,6 +79,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARCCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
+// Instruction predicates
+//===----------------------------------------------------------------------===//
+
+def HasNorm : Predicate<"Subtarget->hasNorm()">;
+
+//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@@ -128,6 +133,19 @@ def STB_FAR : PseudoInstARC<(outs), (ins GPR32:$dst, MEMrlimm:$addr),
"STB_FAR $dst, $addr",
[(truncstorei8 GPR32:$dst, AddrModeFar:$addr)]>;
+// TODO: Add `Requires<[HasBitScan]>` predicate to these when available.
+let Defs = [STATUS32] in {
+ def CTLZ : PseudoInstARC<(outs GPR32:$A),
+ (ins GPR32:$B),
+ "error.fls $A, $B",
+ [(set GPR32:$A, (ctlz i32:$B))]>;
+
+ def CTTZ : PseudoInstARC<(outs GPR32:$A),
+ (ins GPR32:$B),
+ "error.ffs $A, $B",
+ [(set GPR32:$A, (cttz i32:$B))]>;
+}
+
//===----------------------------------------------------------------------===//
// Instruction Generation multiclasses.
// Generate many variants of a single instruction with a single defining
@@ -252,6 +270,19 @@ multiclass MultiPat<SDPatternOperator InFrag,
def _rrlimm : Pat<(InFrag i32:$B, imm32:$LImm), (RRLImm i32:$B, imm32:$LImm)>;
}
+// NOTE: This could be specialized later with a custom `PrintMethod` for
+// displaying the aux register name. E.g. `[%count0]` instead of [33].
+def AuxReg : Operand<i32>;
+
+def LR_rs12 : F32_SOP_RS12<0b00100, 0b101010, 0,
+ (outs GPR32:$B), (ins AuxReg:$C),
+ "lr\t$B, [$C]", []>;
+def LR_ru6 : F32_SOP_RU6<0b00100, 0b101010, 0,
+ (outs GPR32:$B), (ins AuxReg:$C),
+ "lr\t$B, [$C]", []>;
+
+def: Pat<(i32 readcyclecounter), (LR_rs12 0x21) >; // read timer
+
// ---------------------------------------------------------------------------
// Instruction definitions and patterns for 3 operand binary instructions.
// ---------------------------------------------------------------------------
@@ -276,6 +307,10 @@ defm MPY : ArcBinaryGEN4Inst<0b011010, "mpy",1>;
defm MPYM : ArcBinaryGEN4Inst<0b011011, "mpym",1>;
defm MPYMU : ArcBinaryGEN4Inst<0b011100, "mpymu",1>;
defm SETEQ : ArcBinaryGEN4Inst<0b111000, "seteq",1>;
+let Uses=[STATUS32], isAsCheapAsAMove=0, isReMaterializable=0 in {
+ defm ADC : ArcBinaryGEN4Inst<0b000001, "adc",1>;
+ defm SBC : ArcBinaryGEN4Inst<0b000011, "sbc">;
+}
// Patterns for 3 operand binary instructions.
defm : MultiPat<add, ADD_rrr, ADD_rru6, ADD_rrlimm>;
@@ -293,6 +328,11 @@ defm : MultiPat<mul, MPY_rrr, MPY_rru6, MPY_rrlimm>;
defm : MultiPat<mulhs, MPYM_rrr, MPYM_rru6, MPYM_rrlimm>;
defm : MultiPat<mulhu, MPYMU_rrr, MPYMU_rru6, MPYMU_rrlimm>;
+defm : MultiPat<addc, ADD_f_rrr, ADD_f_rru6, ADD_f_rrlimm>;
+defm : MultiPat<adde, ADC_f_rrr, ADC_f_rru6, ADC_f_rrlimm>;
+defm : MultiPat<subc, SUB_f_rrr, SUB_f_rru6, SUB_f_rrlimm>;
+defm : MultiPat<sube, SBC_f_rrr, SBC_f_rru6, SBC_f_rrlimm>;
+
// ---------------------------------------------------------------------------
// Unary Instruction definitions.
// ---------------------------------------------------------------------------
@@ -301,8 +341,14 @@ defm SEXB : ArcUnaryGEN4Inst<0b000101, "sexb">;
defm SEXH : ArcUnaryGEN4Inst<0b000110, "sexh">;
// Extension unary instruction definitions.
+defm FFS : ArcUnaryEXT5Inst<0b010010, "ffs">;
defm FLS : ArcUnaryEXT5Inst<0b010011, "fls">;
+let Predicates=[HasNorm] in {
+ defm NORM : ArcUnaryEXT5Inst<0b000001,"norm">;
+ defm NORMH : ArcUnaryEXT5Inst<0b001000,"normh">;
+}
+
// General Unary Instruction fragments.
def : Pat<(sext_inreg i32:$a, i8), (SEXB_rr i32:$a)>;
def : Pat<(sext_inreg i32:$a, i16), (SEXH_rr i32:$a)>;
@@ -337,24 +383,30 @@ def MOV_ru6 : F32_DOP_RU6<0b00100, 0b001010, 0,
(outs GPR32:$B), (ins immU6:$U6),
"mov\t$B, $U6", []>;
+def MOV_f_ru6 : F32_DOP_RU6<0b00100, 0b001010, 1,
+ (outs GPR32:$B), (ins u6:$U6),
+ "mov.f\t$B, $U6", []> {
+ let isAsCheapAsAMove=1;
+ let Defs = [STATUS32];
+}
+
def cmov : PatFrag<(ops node:$op1, node:$op2, node:$cc),
(ARCcmov $op1, $op2, $cc)>;
-let Uses = [STATUS32] in {
- def MOVcc : F32_DOP_CC_RR<0b00100, 0b001010, 0,
- (outs GPR32:$B),
- (ins GPR32:$C, GPR32:$fval, cmovpred:$cc),
- !strconcat("mov.", "$cc\t$B, $C"),
- [(set GPR32:$B, (cmov i32:$C, i32:$fval, cmovpred:$cc))]> {
- let Constraints = "$B = $fval";
- }
-
- def MOVcc_ru6 : F32_SOP_CC_RU6<0b00100, 0b001010, 0,
- (outs GPR32:$b), (ins u6:$c, CCOp:$cc, GPR32:$b2),
- "mov.$cc\t$b, $c", []> {
- let isAsCheapAsAMove=0;
- let isPredicable=1;
- let isReMaterializable=0;
- let Constraints="$b2 = $b";
+let Uses = [STATUS32], isAsCheapAsAMove = 1, isPredicable=1,
+ isReMaterializable = 0, Constraints = "$B = $B2" in {
+ def MOV_cc : F32_DOP_CC_RR<0b00100, 0b001010, 0,
+ (outs GPR32:$B), (ins GPR32:$C, GPR32:$B2, cmovpred:$cc),
+ "mov.$cc\t$B, $C",
+ [(set GPR32:$B, (cmov i32:$C, i32:$B2, cmovpred:$cc))]>;
+
+ def MOV_cc_ru6 : F32_DOP_CC_RU6<0b00100, 0b001010, 0,
+ (outs GPR32:$B), (ins u6:$C, CCOp:$cc, GPR32:$B2),
+ "mov.$cc\t$B, $C", []>;
+
+ def MOV_cc_f_ru6 : F32_DOP_CC_RU6<0b00100, 0b001010, 1,
+ (outs GPR32:$B), (ins u6:$C, CCOp:$cc, GPR32:$B2),
+ "mov.$cc.f\t$B, $C", []> {
+ let Defs = [STATUS32];
}
}
diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
index 232a7be2a9f5..c956f00b628d 100644
--- a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
+++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,6 +34,16 @@ using namespace llvm;
#define DEBUG_TYPE "arc-addr-mode"
namespace llvm {
+
+static cl::opt<unsigned> ArcKillAddrMode("arc-kill-addr-mode", cl::init(0),
+ cl::ReallyHidden, cl::ZeroOrMore);
+
+#define DUMP_BEFORE() ((ArcKillAddrMode & 0x0001) != 0)
+#define DUMP_AFTER() ((ArcKillAddrMode & 0x0002) != 0)
+#define VIEW_BEFORE() ((ArcKillAddrMode & 0x0004) != 0)
+#define VIEW_AFTER() ((ArcKillAddrMode & 0x0008) != 0)
+#define KILL_PASS() ((ArcKillAddrMode & 0x0010) != 0)
+
FunctionPass *createARCOptAddrMode();
void initializeARCOptAddrModePass(PassRegistry &);
} // end namespace llvm
@@ -73,9 +84,9 @@ private:
// instruction \p To
bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
- // Returns true if load/store instruction \p Ldst can be sunk down
- // to instruction \p To
- bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
+ // // Returns true if load/store instruction \p Ldst can be sunk down
+ // // to instruction \p To
+ // bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
// Check if instructions \p Ldst and \p Add can be moved to become adjacent
// If they can return instruction which need not to move.
@@ -413,30 +424,30 @@ bool ARCOptAddrMode::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
return true;
}
-bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
- // Can only sink load/store within same BB
- if (Ldst->getParent() != To->getParent())
- return false;
- MachineBasicBlock::const_iterator MI(Ldst), ME(To),
- End(Ldst->getParent()->end());
-
- bool IsStore = Ldst->mayStore();
- bool IsLoad = Ldst->mayLoad();
-
- Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register();
- for (; MI != ME && MI != End; ++MI) {
- if (MI->isDebugValue())
- continue;
- if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
- MI->hasUnmodeledSideEffects())
- return false;
- if (IsStore && MI->mayLoad())
- return false;
- if (ValReg && MI->readsVirtualRegister(ValReg))
- return false;
- }
- return true;
-}
+// bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
+// // Can only sink load/store within same BB
+// if (Ldst->getParent() != To->getParent())
+// return false;
+// MachineBasicBlock::const_iterator MI(Ldst), ME(To),
+// End(Ldst->getParent()->end());
+
+// bool IsStore = Ldst->mayStore();
+// bool IsLoad = Ldst->mayLoad();
+
+// Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register();
+// for (; MI != ME && MI != End; ++MI) {
+// if (MI->isDebugValue())
+// continue;
+// if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
+// MI->hasUnmodeledSideEffects())
+// return false;
+// if (IsStore && MI->mayLoad())
+// return false;
+// if (ValReg && MI->readsVirtualRegister(ValReg))
+// return false;
+// }
+// return true;
+// }
void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
unsigned NewBase,
@@ -485,9 +496,16 @@ bool ARCOptAddrMode::processBasicBlock(MachineBasicBlock &MBB) {
}
bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
+ if (skipFunction(MF.getFunction()) || KILL_PASS())
return false;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (DUMP_BEFORE())
+ MF.dump();
+#endif
+ if (VIEW_BEFORE())
+ MF.viewCFG();
+
AST = &MF.getSubtarget<ARCSubtarget>();
AII = AST->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -496,6 +514,13 @@ bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &MBB : MF)
Changed |= processBasicBlock(MBB);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (DUMP_AFTER())
+ MF.dump();
+#endif
+ if (VIEW_AFTER())
+ MF.viewCFG();
return Changed;
}
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.cpp b/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
index fb84dd9b266a..91ddd7fe36e1 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -35,19 +35,19 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "ARCGenRegisterInfo.inc"
-static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
+static void replaceFrameIndex(MachineBasicBlock::iterator II,
const ARCInstrInfo &TII, unsigned Reg,
unsigned FrameReg, int Offset, int StackSize,
int ObjSize, RegScavenger *RS, int SPAdj) {
assert(RS && "Need register scavenger.");
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc dl = MI.getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
unsigned BaseReg = FrameReg;
unsigned KillState = 0;
if (MI.getOpcode() == ARC::LD_rs9 && (Offset >= 256 || Offset < -256)) {
// Loads can always be reached with LD_rlimm.
- BuildMI(MBB, II, dl, TII.get(ARC::LD_rlimm), Reg)
+ BuildMI(MBB, II, DL, TII.get(ARC::LD_rlimm), Reg)
.addReg(BaseReg)
.addImm(Offset)
.addMemOperand(*MI.memoperands_begin());
@@ -72,7 +72,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
RS->setRegUsed(BaseReg);
}
unsigned AddOpc = isUInt<6>(Offset) ? ARC::ADD_rru6 : ARC::ADD_rrlimm;
- BuildMI(MBB, II, dl, TII.get(AddOpc))
+ BuildMI(MBB, II, DL, TII.get(AddOpc))
.addReg(BaseReg, RegState::Define)
.addReg(FrameReg)
.addImm(Offset);
@@ -90,7 +90,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
case ARC::LDB_rs9:
case ARC::LDB_X_rs9:
LLVM_DEBUG(dbgs() << "Building LDFI\n");
- BuildMI(MBB, II, dl, TII.get(MI.getOpcode()), Reg)
+ BuildMI(MBB, II, DL, TII.get(MI.getOpcode()), Reg)
.addReg(BaseReg, KillState)
.addImm(Offset)
.addMemOperand(*MI.memoperands_begin());
@@ -103,7 +103,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
LLVM_FALLTHROUGH;
case ARC::STB_rs9:
LLVM_DEBUG(dbgs() << "Building STFI\n");
- BuildMI(MBB, II, dl, TII.get(MI.getOpcode()))
+ BuildMI(MBB, II, DL, TII.get(MI.getOpcode()))
.addReg(Reg, getKillRegState(MI.getOperand(0).isKill()))
.addReg(BaseReg, KillState)
.addImm(Offset)
@@ -111,7 +111,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
break;
case ARC::GETFI:
LLVM_DEBUG(dbgs() << "Building GETFI\n");
- BuildMI(MBB, II, dl,
+ BuildMI(MBB, II, DL,
TII.get(isUInt<6>(Offset) ? ARC::ADD_rru6 : ARC::ADD_rrlimm))
.addReg(Reg, RegState::Define)
.addReg(FrameReg)
@@ -125,7 +125,8 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-ARCRegisterInfo::ARCRegisterInfo() : ARCGenRegisterInfo(ARC::BLINK) {}
+ARCRegisterInfo::ARCRegisterInfo(const ARCSubtarget &ST)
+ : ARCGenRegisterInfo(ARC::BLINK), ST(ST) {}
bool ARCRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
return MF.needsFrameMoves();
@@ -145,6 +146,7 @@ BitVector ARCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARC::R25);
Reserved.set(ARC::BLINK);
Reserved.set(ARC::FP);
+
return Reserved;
}
@@ -214,7 +216,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
"FP Offset not in bounds.");
}
}
- ReplaceFrameIndex(II, TII, Reg, getFrameRegister(MF), Offset, StackSize,
+ replaceFrameIndex(II, TII, Reg, getFrameRegister(MF), Offset, StackSize,
ObjSize, RS, SPAdj);
}
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.h b/llvm/lib/Target/ARC/ARCRegisterInfo.h
index f8bca11fdbc8..b1ae6b69398f 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.h
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.h
@@ -21,10 +21,13 @@
namespace llvm {
class TargetInstrInfo;
+class ARCSubtarget;
struct ARCRegisterInfo : public ARCGenRegisterInfo {
+ const ARCSubtarget &ST;
+
public:
- ARCRegisterInfo();
+ ARCRegisterInfo(const ARCSubtarget &);
/// Code Generation virtual methods...
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.td b/llvm/lib/Target/ARC/ARCRegisterInfo.td
index 5f2bc7974dde..4b686e4bda64 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.td
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.td
@@ -21,56 +21,56 @@ class Core<int num, string n, list<string>altNames=[]> : ARCReg<n, altNames> {
let HWEncoding = num;
}
-class Status<string n> : ARCReg<n, []> {
+// Auxilary register
+class Aux<int num, string n, list<string> altNames=[]> : ARCReg<n, altNames> {
+ let HWEncoding = num;
}
// Integer registers
-def R0 : Core< 0, "%r0">, DwarfRegNum<[0]>;
-def R1 : Core< 1, "%r1">, DwarfRegNum<[1]>;
-def R2 : Core< 2, "%r2">, DwarfRegNum<[2]>;
-def R3 : Core< 3, "%r3">, DwarfRegNum<[3]>;
+foreach i = 0 - 3 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
+
let CostPerUse=[1] in {
-def R4 : Core< 4, "%r4">, DwarfRegNum<[4]>;
-def R5 : Core< 5, "%r5">, DwarfRegNum<[5]>;
-def R6 : Core< 6, "%r6">, DwarfRegNum<[6]>;
-def R7 : Core< 7, "%r7">, DwarfRegNum<[7]>;
-def R8 : Core< 8, "%r8">, DwarfRegNum<[8]>;
-def R9 : Core< 9, "%r9">, DwarfRegNum<[9]>;
-def R10 : Core<10, "%r10">, DwarfRegNum<[10]>;
-def R11 : Core<11, "%r11">, DwarfRegNum<[11]>;
+ foreach i = 4 - 11 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
}
-def R12 : Core<12, "%r12">, DwarfRegNum<[12]>;
-def R13 : Core<13, "%r13">, DwarfRegNum<[13]>;
-def R14 : Core<14, "%r14">, DwarfRegNum<[14]>;
-def R15 : Core<15, "%r15">, DwarfRegNum<[15]>;
+
+foreach i = 12 - 15 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
let CostPerUse=[1] in {
-def R16 : Core<16, "%r16">, DwarfRegNum<[16]>;
-def R17 : Core<17, "%r17">, DwarfRegNum<[17]>;
-def R18 : Core<18, "%r18">, DwarfRegNum<[18]>;
-def R19 : Core<19, "%r19">, DwarfRegNum<[19]>;
-def R20 : Core<20, "%r20">, DwarfRegNum<[20]>;
-def R21 : Core<21, "%r21">, DwarfRegNum<[21]>;
-def R22 : Core<22, "%r22">, DwarfRegNum<[22]>;
-def R23 : Core<23, "%r23">, DwarfRegNum<[23]>;
-def R24 : Core<24, "%r24">, DwarfRegNum<[24]>;
-def R25 : Core<25, "%r25">, DwarfRegNum<[25]>;
-def GP : Core<26, "%gp",["%r26"]>, DwarfRegNum<[26]>;
-def FP : Core<27, "%fp", ["%r27"]>, DwarfRegNum<[27]>;
-def SP : Core<28, "%sp", ["%r28"]>, DwarfRegNum<[28]>;
-def ILINK : Core<29, "%ilink">, DwarfRegNum<[29]>;
-def R30 : Core<30, "%r30">, DwarfRegNum<[30]>;
-def BLINK: Core<31, "%blink">, DwarfRegNum<[31]>;
-
-def STATUS32 : Status<"status32">, DwarfRegNum<[32]>;
+
+ foreach i = 16 - 25 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
+
+ def GP : Core<26, "%gp",["%r26"]>, DwarfRegNum<[26]>;
+ def FP : Core<27, "%fp", ["%r27"]>, DwarfRegNum<[27]>;
+ def SP : Core<28, "%sp", ["%r28"]>, DwarfRegNum<[28]>;
+ def ILINK : Core<29, "%ilink">, DwarfRegNum<[29]>;
+ def R30 : Core<30, "%r30">, DwarfRegNum<[30]>;
+ def BLINK : Core<31, "%blink">, DwarfRegNum<[31]>;
+
+ // Define extended core registers R32..R63
+ foreach i = 32 - 63 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
+}
+
+// Auxilary registers
+let CostPerUse=[1] in {
+ def STATUS32 : Aux<10, "status32">; // No DwarfRegNum defined in the ARC ABI
}
-// Register classes.
-//
def GPR32: RegisterClass<"ARC", [i32], 32,
- (add R0, R1, R2, R3,
- R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19,
- R20, R21, R22, R23, R24, R25, GP, FP, SP, ILINK, R30, BLINK)>;
+ (add (sequence "R%u", 0, 25), GP, FP, SP, ILINK, R30, BLINK, (sequence "R%u", 32, 63))> {
+ let AltOrders=[(add (sequence "R%u", 0, 25), GP, FP, SP, ILINK, R30, BLINK)];
+ let AltOrderSelect = [{
+ // When referenced in a C++ code block like this
+ // 0 is all Core32 regs
+ // 1 is AltOrders[0]
+ // 2 is AltOrders[1] and so on
+ return 1;
+ }];
+}
def SREG : RegisterClass<"ARC", [i32], 1, (add STATUS32)>;
diff --git a/llvm/lib/Target/ARC/ARCSubtarget.cpp b/llvm/lib/Target/ARC/ARCSubtarget.cpp
index 409dd2a98ab4..641c56b06870 100644
--- a/llvm/lib/Target/ARC/ARCSubtarget.cpp
+++ b/llvm/lib/Target/ARC/ARCSubtarget.cpp
@@ -12,7 +12,7 @@
#include "ARCSubtarget.h"
#include "ARC.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -26,5 +26,5 @@ void ARCSubtarget::anchor() {}
ARCSubtarget::ARCSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), FrameLowering(*this),
- TLInfo(TM, *this) {}
+ : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), InstrInfo(*this),
+ FrameLowering(*this), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/ARC/ARCSubtarget.h b/llvm/lib/Target/ARC/ARCSubtarget.h
index 6a4856221b8f..f3429677deeb 100644
--- a/llvm/lib/Target/ARC/ARCSubtarget.h
+++ b/llvm/lib/Target/ARC/ARCSubtarget.h
@@ -29,14 +29,15 @@ class StringRef;
class TargetMachine;
class ARCSubtarget : public ARCGenSubtargetInfo {
- bool Xnorm = false;
-
virtual void anchor();
ARCInstrInfo InstrInfo;
ARCFrameLowering FrameLowering;
ARCTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ // ARC processor extensions
+ bool Xnorm = false;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index b8c8949e18dd..52f74b729ff7 100644
--- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index b7033d0972b9..bb5336931932 100644
--- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -107,6 +107,12 @@ static DecodeStatus DecodeStLImmInstruction(MCInst &, uint64_t, uint64_t,
static DecodeStatus DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t,
const void *);
+static DecodeStatus DecodeSOPwithRS12(MCInst &, uint64_t, uint64_t,
+ const void *);
+
+static DecodeStatus DecodeSOPwithRU6(MCInst &, uint64_t, uint64_t,
+ const void *);
+
static DecodeStatus DecodeCCRU6Instruction(MCInst &, uint64_t, uint64_t,
const void *);
@@ -304,13 +310,36 @@ static DecodeStatus DecodeCCRU6Instruction(MCInst &Inst, uint64_t Insn,
DstB = decodeBField(Insn);
DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
using Field = decltype(Insn);
- Field U6Field = fieldFromInstruction(Insn, 6, 11);
+ Field U6Field = fieldFromInstruction(Insn, 6, 6);
Inst.addOperand(MCOperand::createImm(U6Field));
Field CCField = fieldFromInstruction(Insn, 0, 4);
Inst.addOperand(MCOperand::createImm(CCField));
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned DstB = decodeBField(Insn);
+ DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
+ using Field = decltype(Insn);
+ Field U6 = fieldFromInstruction(Insn, 6, 6);
+ Inst.addOperand(MCOperand::createImm(U6));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSOPwithRS12(MCInst &Inst, uint64_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned DstB = decodeBField(Insn);
+ DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
+ using Field = decltype(Insn);
+ Field Lower = fieldFromInstruction(Insn, 6, 6);
+ Field Upper = fieldFromInstruction(Insn, 0, 5);
+ Field Sign = fieldFromInstruction(Insn, 5, 1) ? -1 : 1;
+ Field Result = Sign * ((Upper << 6) + Lower);
+ Inst.addOperand(MCOperand::createImm(Result));
+ return MCDisassembler::Success;
+}
+
DecodeStatus ARCDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index 358ee6002f80..d4f74fa77fc4 100644
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -19,9 +19,9 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp b/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
index d4a74e1c4174..91d56bb6b86d 100644
--- a/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
+++ b/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/ARCTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index bb81233cf803..f4d0f4a6d6b0 100644
--- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -182,8 +182,7 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
Front.push_back(MI);
while (Front.size() != 0) {
- MI = Front.back();
- Front.pop_back();
+ MI = Front.pop_back_val();
// MI is already known to be dead. We need to see
// if other instructions can also be removed.
@@ -621,9 +620,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// Collect all the uses of this MI's DPR def for updating later.
SmallVector<MachineOperand*, 8> Uses;
Register DPRDefReg = MI->getOperand(0).getReg();
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
- E = MRI->use_end(); I != E; ++I)
- Uses.push_back(&*I);
+ for (MachineOperand &MO : MRI->use_operands(DPRDefReg))
+ Uses.push_back(&MO);
// We can optimize this.
unsigned NewReg = optimizeSDPattern(MI);
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 5c1bed14c941..8cbd80f1bf65 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -437,6 +437,11 @@ def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
"Enable Low Overhead Branch "
"extensions">;
+def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465",
+ "FixCMSE_CVE_2021_35465", "true",
+ "Mitigate against the cve-2021-35465 "
+ "security vulnurability">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -539,6 +544,18 @@ def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
"Support ARM v8.7a instructions",
[HasV8_6aOps]>;
+def HasV9_0aOps : SubtargetFeature<"v9a", "HasV9_0aOps", "true",
+ "Support ARM v9a instructions",
+ [HasV8_5aOps]>;
+
+def HasV9_1aOps : SubtargetFeature<"v9.1a", "HasV9_1aOps", "true",
+ "Support ARM v9.1a instructions",
+ [HasV8_6aOps, HasV9_0aOps]>;
+
+def HasV9_2aOps : SubtargetFeature<"v9.2a", "HasV9_2aOps", "true",
+ "Support ARM v9.2a instructions",
+ [HasV8_7aOps, HasV9_1aOps]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@@ -619,6 +636,8 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", []>;
def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C",
"Cortex-A78C ARM processors", []>;
+def ProcA710 : SubtargetFeature<"cortex-a710", "ARMProcFamily",
+ "CortexA710", "Cortex-A710 ARM processors", []>;
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", []>;
@@ -867,6 +886,43 @@ def ARMv87a : Architecture<"armv8.7-a", "ARMv87a", [HasV8_7aOps,
FeatureRAS,
FeatureDotProd]>;
+def ARMv9a : Architecture<"armv9-a", "ARMv9a", [HasV9_0aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+def ARMv91a : Architecture<"armv9.1-a", "ARMv91a", [HasV9_1aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+def ARMv92a : Architecture<"armv9.2-a", "ARMv92a", [HasV9_2aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
@@ -1213,7 +1269,8 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureHasSlowFPVMLx,
FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureHasNoBranchPredictor]>;
+ FeatureHasNoBranchPredictor,
+ FeatureFixCMSE_CVE_2021_35465]>;
def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
@@ -1222,7 +1279,8 @@ def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureHasSlowFPVMLx,
FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureHasNoBranchPredictor]>;
+ FeatureHasNoBranchPredictor,
+ FeatureFixCMSE_CVE_2021_35465]>;
def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline,
FeatureDSP,
@@ -1231,7 +1289,8 @@ def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline,
FeatureHasNoBranchPredictor,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
- HasMVEFloatOps]>;
+ HasMVEFloatOps,
+ FeatureFixCMSE_CVE_2021_35465]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
@@ -1323,6 +1382,14 @@ def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C,
FeatureDotProd,
FeatureFullFP16]>;
+def : ProcNoItin<"cortex-a710", [ARMv9a, ProcA710,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureFP16FML,
+ FeatureBF16,
+ FeatureMatMulInt8,
+ FeatureSB]>;
+
def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index ba594b7f0935..9901b86b0e87 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -41,11 +41,11 @@
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -1291,9 +1291,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
- const MachineFunction &MF = *MI->getParent()->getParent();
- const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
@@ -1742,7 +1739,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
// FIXME: Ideally we could vary the LDRB index based on the padding
// between the sequence and jump table, however that relies on MCExprs
// for load indexes which are currently not supported.
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
.addReg(Idx)
.addReg(Idx)
@@ -2035,6 +2032,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addImm(ARMCC::AL)
.addReg(0));
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+
if (STI.isTargetDarwin() || STI.isTargetWindows()) {
// These platforms always use the same frame register
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
@@ -2080,6 +2080,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
Register SrcReg = MI->getOperand(0).getReg();
Register ScratchReg = MI->getOperand(1).getReg();
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
.addReg(ScratchReg)
.addReg(SrcReg)
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9b058ff7dbcb..2d981be4cfc1 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -173,8 +173,9 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return MHR;
}
-MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
- MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const {
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
@@ -336,9 +337,9 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
}
}
- MachineBasicBlock::iterator MBBI = MI.getIterator();
- MFI->insert(MBBI, NewMIs[1]);
- MFI->insert(MBBI, NewMIs[0]);
+ MachineBasicBlock &MBB = *MI.getParent();
+ MBB.insert(MI, NewMIs[1]);
+ MBB.insert(MI, NewMIs[0]);
return NewMIs[0];
}
@@ -867,6 +868,7 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
MIB.addImm(ARMVCC::None);
MIB.addReg(0);
+ MIB.addReg(0); // tp_reg
}
void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
@@ -878,6 +880,7 @@ void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
MIB.addImm(Cond);
MIB.addReg(ARM::VPR, RegState::Implicit);
+ MIB.addReg(0); // tp_reg
}
void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
@@ -914,7 +917,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
@@ -923,7 +926,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::MVE_VORR)
addUnpredicatedMveVpredROp(MIB, DestReg);
- else
+ else if (Opc != ARM::MQPRCopy)
MIB.add(predOps(ARMCC::AL));
return;
}
@@ -1241,7 +1244,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
+ ARM::MQQPRRegClass.hasSubClassEq(RC) ||
+ ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
// FIXME: It's possible to only store part of the QQ register if the
@@ -1252,6 +1257,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
+ } else if (Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
} else {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
get(ARM::VSTMDIA))
@@ -1267,7 +1277,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 64:
- if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
@@ -1328,6 +1344,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::MQQPRStore:
+ case ARM::MQQQQPRStore:
+ if (MI.getOperand(1).isFI()) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ break;
}
return 0;
@@ -1473,31 +1496,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
- Subtarget.hasNEON()) {
- BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
- .addFrameIndex(FI)
- .addImm(16)
- .addMemOperand(MMO)
- .add(predOps(ARMCC::AL));
- } else {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI)
- .add(predOps(ARMCC::AL))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
- if (Register::isPhysicalRegister(DestReg))
- MIB.addReg(DestReg, RegState::ImplicitDefine);
- }
- } else
- llvm_unreachable("Unknown reg class!");
- break;
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
+ ARM::MQQPRRegClass.hasSubClassEq(RC) ||
+ ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ Subtarget.hasNEON()) {
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
+ } else if (Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (Register::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 64:
- if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
@@ -1566,6 +1600,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::MQQPRLoad:
+ case ARM::MQQQQPRLoad:
+ if (MI.getOperand(1).isFI()) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ break;
}
return 0;
@@ -1642,8 +1683,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
- assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
- "LOAD_STACK_GUARD currently supported only for MachO.");
expandLoadStackGuard(MI);
MI.getParent()->erase(MI);
return true;
@@ -2331,9 +2370,13 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
// Find new register class to use.
MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
+ MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
Register DestReg = MI.getOperand(0).getReg();
- const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
- if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
+ const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, FalseClass))
+ return nullptr;
+ if (!MRI.constrainRegClass(DestReg, TrueClass))
return nullptr;
// Create a new predicated version of DefMI.
@@ -2760,8 +2803,8 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
case ARM::CMPri:
@@ -2832,7 +2875,8 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
Register SrcReg, Register SrcReg2,
- int ImmValue, const MachineInstr *OI,
+ int64_t ImmValue,
+ const MachineInstr *OI,
bool &IsThumb1) {
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
@@ -2967,8 +3011,8 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
/// condition code of instructions which use the flags.
bool ARMBaseInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue, const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
@@ -3220,9 +3264,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// live-out. If it is live-out, do not optimize.
if (!isSafe) {
MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM::CPSR))
+ for (MachineBasicBlock *Succ : MBB->successors())
+ if (Succ->isLiveIn(ARM::CPSR))
return false;
}
@@ -3255,7 +3298,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
MachineBasicBlock::const_iterator Next = &MI;
++Next;
Register SrcReg, SrcReg2;
- int CmpMask, CmpValue;
+ int64_t CmpMask, CmpValue;
bool IsThumb1;
if (Next != MI.getParent()->end() &&
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
@@ -4839,8 +4882,6 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
-// LoadStackGuard has so far only been implemented for MachO. Different code
-// sequence is needed for other targets.
void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc,
unsigned LoadOpc) const {
@@ -4850,27 +4891,70 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
Register Reg = MI->getOperand(0).getReg();
- const GlobalValue *GV =
- cast<GlobalValue>((*MI->memoperands_begin())->getValue());
MachineInstrBuilder MIB;
+ unsigned int Offset = 0;
+
+ if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
+ assert(Subtarget.isReadTPHard() &&
+ "TLS stack protector requires hardware TLS register");
+
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addImm(15)
+ .addImm(0)
+ .addImm(13)
+ .addImm(0)
+ .addImm(3)
+ .add(predOps(ARMCC::AL));
- BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
- .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+ Module &M = *MBB.getParent()->getFunction().getParent();
+ Offset = M.getStackProtectorGuardOffset();
+ if (Offset & ~0xfffU) {
+ // The offset won't fit in the LDR's 12-bit immediate field, so emit an
+ // extra ADD to cover the delta. This gives us a guaranteed 8 additional
+ // bits, resulting in a range of 0 to +1 MiB for the guard offset.
+ unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
+ BuildMI(MBB, MI, DL, get(AddOpc), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(Offset & ~0xfffU)
+ .add(predOps(ARMCC::AL))
+ .addReg(0);
+ Offset &= 0xfffU;
+ }
+ } else {
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
+
+ unsigned TargetFlags = ARMII::MO_NO_FLAG;
+ if (Subtarget.isTargetMachO()) {
+ TargetFlags |= ARMII::MO_NONLAZY;
+ } else if (Subtarget.isTargetCOFF()) {
+ if (GV->hasDLLImportStorageClass())
+ TargetFlags |= ARMII::MO_DLLIMPORT;
+ else if (IsIndirect)
+ TargetFlags |= ARMII::MO_COFFSTUB;
+ } else if (Subtarget.isGVInGOT(GV)) {
+ TargetFlags |= ARMII::MO_GOT;
+ }
- if (Subtarget.isGVIndirectSymbol(GV)) {
- MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- auto Flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
- MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, TargetFlags);
+
+ if (IsIndirect) {
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ auto Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
+ MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
+ }
}
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill)
- .addImm(0)
+ .addImm(Offset)
.cloneMemRefs(*MI)
.add(predOps(ARMCC::AL));
}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 0ebba0d9fdd5..db9320962e81 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -120,9 +120,8 @@ public:
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
@@ -289,15 +288,15 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
/// optimizeCompareInstr - Convert the instruction to set the zero flag so
/// that we can remove a "comparison with zero"; Remove a redundant CMP
/// instruction if the flags can be updated in the same way by an earlier
/// instruction such as SUB.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
bool analyzeSelect(const MachineInstr &MI,
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 4883e5693f87..b53efe58e8de 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -263,6 +263,13 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case ARM::QQQQPRRegClassID:
if (MF.getSubtarget<ARMSubtarget>().hasNEON())
return Super;
+ break;
+ case ARM::MQPRRegClassID:
+ case ARM::MQQPRRegClassID:
+ case ARM::MQQQQPRRegClassID:
+ if (MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps())
+ return Super;
+ break;
}
Super = *I++;
} while (Super);
@@ -928,4 +935,4 @@ bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
SrcRC, SrcSubReg);
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
index 5ea47f529b23..ddbd6702e528 100644
--- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -31,6 +31,8 @@ private:
const ARMBaseInstrInfo *TII;
std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
MachineLoopInfo *MLI = nullptr;
+ // A list of WLS instructions that need to be reverted to DLS.
+ SmallVector<MachineInstr *> RevertedWhileLoops;
public:
static char ID;
@@ -41,9 +43,9 @@ public:
bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
bool fixBackwardsWLS(MachineLoop *ML);
bool processPostOrderLoops(MachineLoop *ML);
+ bool revertWhileToDoLoop(MachineInstr *WLS);
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -82,6 +84,66 @@ static MachineInstr *findWLS(MachineLoop *ML) {
return nullptr;
}
+// Revert a WhileLoopStart to an equivalent DoLoopStart and branch. Note that
+// because of the branches this requires an extra block to be created.
+bool ARMBlockPlacement::revertWhileToDoLoop(MachineInstr *WLS) {
+ // lr = t2WhileLoopStartTP r0, r1, TgtBB
+ // t2Br Ph
+ // ->
+ // cmp r0, 0
+ // brcc TgtBB
+ // block2:
+ // LR = t2DoLoopStartTP r0, r1
+ // t2Br Ph
+ MachineBasicBlock *Preheader = WLS->getParent();
+ assert(WLS != &Preheader->back());
+ assert(WLS->getNextNode() == &Preheader->back());
+ MachineInstr *Br = &Preheader->back();
+ assert(Br->getOpcode() == ARM::t2B);
+ assert(Br->getOperand(1).getImm() == 14);
+
+ // Clear the kill flags, as the cmp/bcc will no longer kill any operands.
+ WLS->getOperand(1).setIsKill(false);
+ if (WLS->getOpcode() == ARM::t2WhileLoopStartTP)
+ WLS->getOperand(2).setIsKill(false);
+
+ // Create the new block
+ MachineBasicBlock *NewBlock = Preheader->getParent()->CreateMachineBasicBlock(
+ Preheader->getBasicBlock());
+ Preheader->getParent()->insert(++Preheader->getIterator(), NewBlock);
+ // Move the Br to it
+ Br->removeFromParent();
+ NewBlock->insert(NewBlock->end(), Br);
+ // And setup the successors correctly.
+ Preheader->replaceSuccessor(Br->getOperand(0).getMBB(), NewBlock);
+ NewBlock->addSuccessor(Br->getOperand(0).getMBB());
+
+ // Create a new DLS to replace the WLS
+ MachineInstrBuilder MIB =
+ BuildMI(*NewBlock, Br, WLS->getDebugLoc(),
+ TII->get(WLS->getOpcode() == ARM::t2WhileLoopStartTP
+ ? ARM::t2DoLoopStartTP
+ : ARM::t2DoLoopStart));
+ MIB.add(WLS->getOperand(0));
+ MIB.add(WLS->getOperand(1));
+ if (WLS->getOpcode() == ARM::t2WhileLoopStartTP)
+ MIB.add(WLS->getOperand(2));
+
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Reverting While Loop to Do Loop: " << *WLS << "\n");
+
+ RevertWhileLoopStartLR(WLS, TII, ARM::t2Bcc, true);
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *NewBlock);
+
+ Preheader->getParent()->RenumberBlocks();
+ BBUtils->computeAllBlockSizes();
+ BBUtils->adjustBBOffsetsAfter(Preheader);
+
+ return true;
+}
+
/// Checks if loop has a backwards branching WLS, and if possible, fixes it.
/// This requires checking the predecessor (ie. preheader or it's predecessor)
/// for a WLS and if its loopExit/target is before it.
@@ -125,11 +187,10 @@ bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
// TODO: Analyse the blocks to make a decision if it would be worth
// moving Preheader even if we'd introduce a backwards WLS
if (WLSTarget == Predecessor) {
- LLVM_DEBUG(
- dbgs() << DEBUG_PREFIX
- << "Can't move Predecessor"
- "block as it would convert a WLS from forward to a "
- "backwards branching WLS\n");
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Can't move Predecessor block as "
+ << "it would convert a WLS from forward to a "
+ << "backwards branching WLS\n");
+ RevertedWhileLoops.push_back(WlsInstr);
return false;
}
}
@@ -162,11 +223,16 @@ bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BBUtils->computeAllBlockSizes();
BBUtils->adjustBBOffsetsAfter(&MF.front());
bool Changed = false;
+ RevertedWhileLoops.clear();
// Find loops with a backwards branching WLS and fix if possible.
for (auto *ML : *MLI)
Changed |= processPostOrderLoops(ML);
+ // Revert any While loops still out of range to DLS loops.
+ for (auto *WlsInstr : RevertedWhileLoops)
+ Changed |= revertWhileToDoLoop(WlsInstr);
+
return Changed;
}
@@ -199,18 +265,22 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
assert(From->isSuccessor(To) &&
"'To' is expected to be a successor of 'From'");
MachineInstr &Terminator = *(--From->terminators().end());
- if (!Terminator.isUnconditionalBranch()) {
- // The BB doesn't have an unconditional branch so it relied on
- // fall-through. Fix by adding an unconditional branch to the moved BB.
- MachineInstrBuilder MIB =
- BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
- MIB.addMBB(To);
- MIB.addImm(ARMCC::CondCodes::AL);
- MIB.addReg(ARM::NoRegister);
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
- << From->getName() << " to " << To->getName() << ": "
- << *MIB.getInstr());
- }
+ if (!TII->isPredicated(Terminator) &&
+ (isUncondBranchOpcode(Terminator.getOpcode()) ||
+ isIndirectBranchOpcode(Terminator.getOpcode()) ||
+ isJumpTableBranchOpcode(Terminator.getOpcode()) ||
+ Terminator.isReturn()))
+ return;
+ // The BB doesn't have an unconditional branch so it relied on
+ // fall-through. Fix by adding an unconditional branch to the moved BB.
+ MachineInstrBuilder MIB =
+ BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
+ MIB.addMBB(To);
+ MIB.addImm(ARMCC::CondCodes::AL);
+ MIB.addReg(ARM::NoRegister);
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
+ << From->getName() << " to " << To->getName() << ": "
+ << *MIB.getInstr());
};
// Fix fall-through to the moved BB from the one that used to be before it.
@@ -225,5 +295,5 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
F->RenumberBlocks();
BBUtils->computeAllBlockSizes();
- BBUtils->adjustBBOffsetsAfter(&F->front());
+ BBUtils->adjustBBOffsetsAfter(BB);
}
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index aff7ec8d2ed6..81ec4d09a408 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -45,6 +45,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <functional>
#include <utility>
using namespace llvm;
@@ -109,7 +110,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -130,7 +131,8 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
CCValAssign VA = VAs[0];
@@ -158,9 +160,15 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
if (!IsLittle)
std::swap(NewRegs[0], NewRegs[1]);
+ if (Thunk) {
+ *Thunk = [=]() {
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+ };
+ return 1;
+ }
assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
-
return 1;
}
@@ -273,7 +281,7 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -298,7 +306,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
unsigned assignCustomValue(ARMCallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
CCValAssign VA = VAs[0];
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index e15826fa6159..121558276c3e 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -18,6 +18,7 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
@@ -340,12 +341,12 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
// Align blocks where the previous block does not fall through. This may add
// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
-static bool AlignBlocks(MachineFunction *MF) {
+static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
MF->getFunction().hasOptSize())
return false;
- auto *TLI = MF->getSubtarget().getTargetLowering();
+ auto *TLI = STI->getTargetLowering();
const Align Alignment = TLI->getPrefLoopAlignment();
if (Alignment < 4)
return false;
@@ -357,7 +358,25 @@ static bool AlignBlocks(MachineFunction *MF) {
Changed = true;
MBB.setAlignment(Alignment);
}
+
PrevCanFallthough = MBB.canFallThrough();
+
+ // For LOB's, the ARMLowOverheadLoops pass may remove the unconditional
+ // branch later in the pipeline.
+ if (STI->hasLOB()) {
+ for (const auto &MI : reverse(MBB.terminators())) {
+ if (MI.getOpcode() == ARM::t2B &&
+ MI.getOperand(0).getMBB() == MBB.getNextNode())
+ continue;
+ if (isLoopStart(MI) || MI.getOpcode() == ARM::t2LoopEnd ||
+ MI.getOpcode() == ARM::t2LoopEndDec) {
+ PrevCanFallthough = true;
+ break;
+ }
+ // Any other terminator - nothing to do
+ break;
+ }
+ }
}
return Changed;
@@ -406,7 +425,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
}
// Align any non-fallthrough blocks
- MadeChange |= AlignBlocks(MF);
+ MadeChange |= AlignBlocks(MF, STI);
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2167ad5d7467..a8f09969e948 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -69,6 +69,7 @@ namespace {
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt);
+ void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
void CMSEClearGPRegs(MachineBasicBlock &MBB,
@@ -887,6 +888,43 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
}
+void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+ unsigned NewOpc =
+ MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore
+ ? ARM::VSTMDIA
+ : ARM::VLDMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+
+ unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) |
+ getDefRegState(MI.getOperand(0).isDef());
+ Register SrcReg = MI.getOperand(0).getReg();
+
+ // Copy the destination register.
+ MIB.add(MI.getOperand(1));
+ MIB.add(predOps(ARMCC::AL));
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags);
+ if (MI.getOpcode() == ARM::MQQQQPRStore ||
+ MI.getOpcode() == ARM::MQQQQPRLoad) {
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags);
+ }
+
+ if (NewOpc == ARM::VSTMDIA)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ TransferImpOps(MI, MIB, MIB);
+ MIB.cloneMemRefs(MI);
+ MI.eraseFromParent();
+}
+
static bool IsAnAddressOperand(const MachineOperand &MO) {
// This check is overly conservative. Unless we are certain that the machine
// operand is not a symbol reference, we return that it is a symbol reference.
@@ -1295,7 +1333,7 @@ void ARMExpandPseudo::CMSESaveClearFPRegs(
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
if (STI->hasV8_1MMainlineOps())
CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
- else
+ else if (STI->hasV8MMainlineOps())
CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
}
@@ -1303,8 +1341,6 @@ void ARMExpandPseudo::CMSESaveClearFPRegs(
void ARMExpandPseudo::CMSESaveClearFPRegsV8(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
- if (!STI->hasFPRegs())
- return;
// Store an available register for FPSCR clearing
assert(!ScratchRegs.empty());
@@ -1358,7 +1394,11 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
- // Lazy store all fp registers to the stack
+ if (passesFPReg)
+ assert(STI->hasFPRegs() && "Subtarget needs fpregs");
+
+ // Lazy store all fp registers to the stack.
+ // This executes as NOP in the absence of floating-point support.
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
@@ -1486,15 +1526,18 @@ void ARMExpandPseudo::CMSERestoreFPRegs(
SmallVectorImpl<unsigned> &AvailableRegs) {
if (STI->hasV8_1MMainlineOps())
CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
- else
+ else if (STI->hasV8MMainlineOps())
CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
}
void ARMExpandPseudo::CMSERestoreFPRegsV8(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &AvailableRegs) {
- if (!STI->hasFPRegs())
- return;
+
+ // Keep a scratch register for the mitigation sequence.
+ unsigned ScratchReg = ARM::NoRegister;
+ if (STI->fixCMSE_CVE_2021_35465())
+ ScratchReg = AvailableRegs.pop_back_val();
// Use AvailableRegs to store the fp regs
std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
@@ -1536,24 +1579,64 @@ void ARMExpandPseudo::CMSERestoreFPRegsV8(
}
}
+ bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
+
+ if (returnsFPReg)
+ assert(STI->hasFPRegs() && "Subtarget needs fpregs");
+
// Push FP regs that cannot be restored via normal registers on the stack
for (unsigned Reg : NonclearedFPRegs) {
if (ARM::DPR_VFP2RegClass.contains(Reg))
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD))
+ .addReg(Reg)
.addReg(ARM::SP)
.addImm((Reg - ARM::D0) * 2)
.add(predOps(ARMCC::AL));
else if (ARM::SPRRegClass.contains(Reg))
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS))
+ .addReg(Reg)
.addReg(ARM::SP)
.addImm(Reg - ARM::S0)
.add(predOps(ARMCC::AL));
}
- // Lazy load fp regs from stack
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
- .addReg(ARM::SP)
- .add(predOps(ARMCC::AL));
+ // Lazy load fp regs from stack.
+ // This executes as NOP in the absence of floating-point support.
+ MachineInstrBuilder VLLDM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ if (STI->fixCMSE_CVE_2021_35465()) {
+ auto Bundler = MIBundleBuilder(MBB, VLLDM);
+ // Read the CONTROL register.
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M))
+ .addReg(ScratchReg, RegState::Define)
+ .addImm(20)
+ .add(predOps(ARMCC::AL)));
+ // Check bit 3 (SFPA).
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri))
+ .addReg(ScratchReg)
+ .addImm(8)
+ .add(predOps(ARMCC::AL)));
+ // Emit the IT block.
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT))
+ .addImm(ARMCC::NE)
+ .addImm(8));
+ // If SFPA is clear jump over to VLLDM, otherwise execute an instruction
+ // which has no functional effect apart from causing context creation:
+ // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40,
+ // which is defined as NOP if not executed.
+ if (STI->hasFPRegs())
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS))
+ .addReg(ARM::S0, RegState::Define)
+ .addReg(ARM::S0, RegState::Undef)
+ .add(predOps(ARMCC::NE)));
+ else
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM))
+ .addExternalSymbol(".inst.w 0xeeb00a40")
+ .addImm(InlineAsm::Extra_HasSideEffects));
+ finalizeBundle(MBB, Bundler.begin(), Bundler.end());
+ }
// Restore all FP registers via normal registers
for (const auto &Regs : ClearedFPRegs) {
@@ -1594,6 +1677,12 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &AvailableRegs) {
if (!definesOrUsesFPReg(*MBBI)) {
+ if (STI->fixCMSE_CVE_2021_35465()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::VPR, RegState::Define);
+ }
+
// Load FP registers from stack.
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
@@ -1647,7 +1736,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
"CMP_SWAP not expected to be custom expanded for Thumb1");
assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
"ARMv8-M.baseline does not have t2UXTB/t2UXTH");
- assert(ARM::tGPRRegClass.contains(DesiredReg) &&
+ assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) &&
"DesiredReg used for UXT op must be tGPR");
}
@@ -2916,6 +3005,13 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+ case ARM::MQQPRLoad:
+ case ARM::MQQPRStore:
+ case ARM::MQQQQPRLoad:
+ case ARM::MQQQQPRStore:
+ ExpandMQQPRLoadStore(MBBI);
+ return true;
+
case ARM::tCMP_SWAP_8:
assert(STI->isThumb());
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9c7055deaaf8..2b83a292db76 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -79,6 +79,10 @@ public:
void Select(SDNode *N) override;
+ /// Return true as some complex patterns, like those that call
+ /// canExtractShiftFromMul can modify the DAG inplace.
+ bool ComplexPatternFuncMutatesDAG() const override { return true; }
+
bool hasNoVMLxHazardUse(SDNode *N) const;
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
@@ -406,11 +410,9 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
return;
bool isThumb2 = Subtarget->isThumb();
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
-
- if (N->getOpcode() != ISD::ADD)
+ // We use make_early_inc_range to avoid invalidation issues.
+ for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
+ if (N.getOpcode() != ISD::ADD)
continue;
// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
@@ -422,8 +424,8 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
// operand of 'add' and the 'and' and 'srl' would become a bits extraction
// node (UBFX).
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
unsigned And_imm = 0;
if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
@@ -480,7 +482,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
- CurDAG->UpdateNodeOperands(N, N0, N1);
+ CurDAG->UpdateNodeOperands(&N, N0, N1);
}
}
@@ -1121,7 +1123,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
SDValue &Offset) {
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || !NC->isNullValue())
+ if (!NC || !NC->isZero())
return false;
Base = Offset = N;
@@ -1818,8 +1820,11 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
else
return false;
- SDValue Ops[] = {Base, NewOffset,
- CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
+ SDValue Ops[] = {Base,
+ NewOffset,
+ CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
+ PredReg,
+ CurDAG->getRegister(0, MVT::i32), // tp_reg
Chain};
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
N->getValueType(0), MVT::Other, Ops);
@@ -2525,6 +2530,7 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
SDValue PredicateMask) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
Ops.push_back(PredicateMask);
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
}
template <typename SDValueVector>
@@ -2533,6 +2539,7 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
SDValue Inactive) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
Ops.push_back(PredicateMask);
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
Ops.push_back(Inactive);
}
@@ -2540,6 +2547,7 @@ template <typename SDValueVector>
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
}
template <typename SDValueVector>
@@ -2547,6 +2555,7 @@ void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
EVT InactiveTy) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
Ops.push_back(SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
}
@@ -3545,7 +3554,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
return;
SDValue Zero = N->getOperand(1);
- if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
+ if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
And->getOpcode() != ISD::AND)
return;
SDValue X = And.getOperand(0);
@@ -5495,8 +5504,8 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// using the supplied metadata string to select the instruction node to use
// and the registers/masks to construct as operands for the node.
bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
bool IsThumb2 = Subtarget->isThumb2();
SDLoc DL(N);
@@ -5610,8 +5619,8 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
// using the supplied metadata string to select the instruction node to use
// and the registers/masks to use in the nodes
bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
bool IsThumb2 = Subtarget->isThumb2();
SDLoc DL(N);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 900113244e41..e7e10ce07a44 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,6 +55,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -300,6 +301,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
}
// Pre and Post inc are supported on loads and stores
@@ -544,6 +548,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
+ setLibcallName(RTLIB::MULO_I128, nullptr);
// RTLIB
if (Subtarget->isAAPCS_ABI() &&
@@ -741,6 +747,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
Subtarget->hasFPRegs()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
+
if (!Subtarget->hasVFP2Base())
setAllExpand(MVT::f32);
if (!Subtarget->hasFP64())
@@ -981,6 +993,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
@@ -1851,12 +1864,18 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
// MVE Q registers.
- if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
+ if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
return &ARM::QQPRRegClass;
if (VT == MVT::v8i64)
return &ARM::QQQQPRRegClass;
}
+ if (Subtarget->hasMVEIntegerOps()) {
+ if (VT == MVT::v4i64)
+ return &ARM::MQQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::MQQQQPRRegClass;
+ }
return TargetLowering::getRegClassFor(VT);
}
@@ -2287,7 +2306,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool PreferIndirect = false;
// Determine whether this is a non-secure function call.
- if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
isCmseNSCall = true;
// Disable tail calls if they're not supported.
@@ -3259,26 +3278,24 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
SmallPtrSet<SDNode*, 2> Copies;
- for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != ISD::CopyToReg)
+ for (SDNode *U : VMov->uses()) {
+ if (U->getOpcode() != ISD::CopyToReg)
return false;
- Copies.insert(*UI);
+ Copies.insert(U);
}
if (Copies.size() > 2)
return false;
- for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
- UI != UE; ++UI) {
- SDValue UseChain = UI->getOperand(0);
+ for (SDNode *U : VMov->uses()) {
+ SDValue UseChain = U->getOperand(0);
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
- Copy = *UI;
+ Copy = U;
else {
// We are at the top of this chain.
// If the copy has a glue operand, we conservatively assume it
// isn't safe to perform a tail call.
- if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+ if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
return false;
// First CopyToReg
TCChain = UseChain;
@@ -3301,10 +3318,9 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
}
bool HasRet = false;
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG &&
- UI->getOpcode() != ARMISD::INTRET_FLAG)
+ for (const SDNode *U : Copy->uses()) {
+ if (U->getOpcode() != ARMISD::RET_FLAG &&
+ U->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
@@ -3782,7 +3798,7 @@ static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (!(GV = GA->getBaseObject()))
+ if (!(GV = GA->getAliaseeObject()))
return false;
if (const auto *V = dyn_cast<GlobalVariable>(GV))
return V->isConstant();
@@ -4517,7 +4533,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
- // sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
@@ -5811,6 +5827,43 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = Op.getValueType();
+ EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT FromVT = Op.getOperand(0).getValueType();
+
+ if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
+ return Op;
+ if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
+ Subtarget->hasFP64())
+ return Op;
+ if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
+ Subtarget->hasFullFP16())
+ return Op;
+ if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
+ Subtarget->hasMVEFloatOps())
+ return Op;
+ if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
+ Subtarget->hasMVEFloatOps())
+ return Op;
+
+ if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
+ return SDValue();
+
+ SDLoc DL(Op);
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
+ unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
+ SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
+ DAG.getValueType(VT.getScalarType()));
+ SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
+ DAG.getConstant((1 << BW) - 1, DL, VT));
+ if (IsSigned)
+ Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
+ DAG.getConstant(-(1 << BW), DL, VT));
+ return Max;
+}
+
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -7660,7 +7713,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- if (SplatUndef.isAllOnesValue())
+ if (SplatUndef.isAllOnes())
return DAG.getUNDEF(VT);
if ((ST->hasNEON() && SplatBitSize <= 64) ||
@@ -8052,7 +8105,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
Src.WindowBase *= Src.WindowScale;
}
- // Final sanity check before we try to actually produce a shuffle.
+ // Final check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
@@ -8175,7 +8228,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
isVTBLMask(M, VT) ||
isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
return true;
- else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
isReverseMask(M, VT))
return true;
else if (Subtarget->hasMVEIntegerOps() &&
@@ -8268,21 +8321,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
}
-static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
- SelectionDAG &DAG) {
+static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- SDValue OpLHS = Op.getOperand(0);
- EVT VT = OpLHS.getValueType();
+ EVT VT = Op.getValueType();
- assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
"Expect an v8i16/v16i8 type");
- OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
- // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
+ // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
// extract the first 8 bytes into the top double word and the last 8 bytes
- // into the bottom double word. The v8i16 case is similar.
- unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
- return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
- DAG.getConstant(ExtractNum, DL, MVT::i32));
+ // into the bottom double word, through a new vector shuffle that will be
+ // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
+ std::vector<int> NewMask;
+ for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
+ NewMask.push_back(VT.getVectorNumElements() / 2 + i);
+ for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
+ NewMask.push_back(i);
+ return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
}
static EVT getVectorTyFromPredicateVector(EVT VT) {
@@ -8704,8 +8759,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
- if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
- return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+ if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
+ isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
if (ST->hasNEON() && VT == MVT::v8i8)
if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
@@ -8822,54 +8878,68 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) {
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
- EVT VT = Op.getValueType();
- EVT Op1VT = V1.getValueType();
- EVT Op2VT = V2.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
-
- assert(Op1VT == Op2VT && "Operand types don't match!");
- assert(VT.getScalarSizeInBits() == 1 &&
+ assert(Op.getValueType().getScalarSizeInBits() == 1 &&
+ "Unexpected custom CONCAT_VECTORS lowering");
+ assert(isPowerOf2_32(Op.getNumOperands()) &&
"Unexpected custom CONCAT_VECTORS lowering");
assert(ST->hasMVEIntegerOps() &&
"CONCAT_VECTORS lowering only supported for MVE");
- SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
- SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
-
- // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
- // promoted to v8i16, etc.
-
- MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
-
- // Extract the vector elements from Op1 and Op2 one by one and truncate them
- // to be the right size for the destination. For example, if Op1 is v4i1 then
- // the promoted vector is v4i32. The result of concatentation gives a v8i1,
- // which when promoted is v8i16. That means each i32 element from Op1 needs
- // truncating to i16 and inserting in the result.
- EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
- SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
- auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
- EVT NewVT = NewV.getValueType();
- EVT ConcatVT = ConVec.getValueType();
- for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
- DAG.getIntPtrConstant(i, dl));
- ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
- DAG.getConstant(j, dl, MVT::i32));
- }
- return ConVec;
+ auto ConcatPair = [&](SDValue V1, SDValue V2) {
+ EVT Op1VT = V1.getValueType();
+ EVT Op2VT = V2.getValueType();
+ assert(Op1VT == Op2VT && "Operand types don't match!");
+ EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
+
+ SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
+ SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
+
+ // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
+ // promoted to v8i16, etc.
+ MVT ElType =
+ getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
+ unsigned NumElts = 2 * Op1VT.getVectorNumElements();
+
+ // Extract the vector elements from Op1 and Op2 one by one and truncate them
+ // to be the right size for the destination. For example, if Op1 is v4i1
+ // then the promoted vector is v4i32. The result of concatentation gives a
+ // v8i1, which when promoted is v8i16. That means each i32 element from Op1
+ // needs truncating to i16 and inserting in the result.
+ EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
+ SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
+ auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
+ EVT NewVT = NewV.getValueType();
+ EVT ConcatVT = ConVec.getValueType();
+ for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
+ DAG.getIntPtrConstant(i, dl));
+ ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
+ DAG.getConstant(j, dl, MVT::i32));
+ }
+ return ConVec;
+ };
+ unsigned j = 0;
+ ConVec = ExtractInto(NewV1, ConVec, j);
+ ConVec = ExtractInto(NewV2, ConVec, j);
+
+ // Now return the result of comparing the subvector with zero,
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
};
- unsigned j = 0;
- ConVec = ExractInto(NewV1, ConVec, j);
- ConVec = ExractInto(NewV2, ConVec, j);
- // Now return the result of comparing the subvector with zero,
- // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
- return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
- DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ // Concat each pair of subvectors and pack into the lower half of the array.
+ SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
+ while (ConcatOps.size() > 1) {
+ for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
+ SDValue V1 = ConcatOps[I];
+ SDValue V2 = ConcatOps[I + 1];
+ ConcatOps[I / 2] = ConcatPair(V1, V2);
+ }
+ ConcatOps.resize(ConcatOps.size() / 2);
+ }
+ return ConcatOps[0];
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
@@ -9069,7 +9139,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
return true;
} else {
- if (Hi0->isNullValue() && Hi1->isNullValue())
+ if (Hi0->isZero() && Hi1->isZero())
return true;
}
return false;
@@ -10140,6 +10210,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -10326,6 +10398,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ZERO_EXTEND:
Res = LowerVectorExtend(N, DAG, Subtarget);
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
+ break;
}
if (Res.getNode())
Results.push_back(Res);
@@ -10877,10 +10953,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I != Succ)
- return *I;
+ for (MachineBasicBlock *S : MBB->successors())
+ if (S != Succ)
+ return S;
llvm_unreachable("Expecting a BB with two successors!");
}
@@ -11378,13 +11453,9 @@ static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
// If we hit the end of the block, check whether CPSR is live into a
// successor.
if (miI == BB->end()) {
- for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
- sEnd = BB->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock* succ = *sItr;
- if (succ->isLiveIn(ARM::CPSR))
+ for (MachineBasicBlock *Succ : BB->successors())
+ if (Succ->isLiveIn(ARM::CPSR))
return false;
- }
}
// We found a def, or hit the end of the basic block and CPSR wasn't live
@@ -11487,6 +11558,7 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
.addUse(PredCounterPhiReg)
.addImm(ARMVCC::None)
+ .addReg(0)
.addReg(0);
BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
@@ -11505,7 +11577,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
.addReg(SrcPhiReg)
.addImm(16)
.addImm(ARMVCC::Then)
- .addUse(VccrReg);
+ .addUse(VccrReg)
+ .addReg(0);
} else
SrcValueReg = OpSrcReg;
@@ -11515,7 +11588,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
.addReg(DestPhiReg)
.addImm(16)
.addImm(ARMVCC::Then)
- .addUse(VccrReg);
+ .addUse(VccrReg)
+ .addReg(0);
// Add the pseudoInstrs for decrementing the loop counter and marking the
// end:t2DoLoopDec and t2DoLoopEnd
@@ -12103,8 +12177,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
// When looking for a 0 constant, N can be zext or sext.
OtherOp = DAG.getConstant(1, dl, VT);
else
- OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
- VT);
+ OtherOp = DAG.getAllOnesConstant(dl, VT);
return true;
}
}
@@ -12696,7 +12769,7 @@ static SDValue PerformAddcSubcCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
SelectionDAG &DAG(DCI.DAG);
- if (N->getOpcode() == ARMISD::SUBC) {
+ if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
// (SUBC (ADDE 0, 0, C), 1) -> C
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -12868,6 +12941,9 @@ static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Shft;
ConstantSDNode *Clamp;
+ if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
+ return SDValue();
+
if (N->getOpcode() == ISD::SMIN) {
Shft = N->getOperand(0);
Clamp = isConstOrConstSplat(N->getOperand(1));
@@ -13008,19 +13084,15 @@ static SDValue PerformVSELECTCombine(SDNode *N,
}
static SDValue PerformABSCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
- SDValue res;
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
return SDValue();
- if (!TLI.expandABS(N, res, DAG))
- return SDValue();
-
- return res;
+ return TLI.expandABS(N, DAG);
}
/// PerformADDECombine - Target-specific dag combine transform from
@@ -13064,13 +13136,166 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ auto IsVecReduce = [](SDValue Op) {
+ switch (Op.getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ case ARMISD::VADDVs:
+ case ARMISD::VADDVu:
+ case ARMISD::VMLAVs:
+ case ARMISD::VMLAVu:
+ return true;
+ }
+ return false;
+ };
+
+ auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
+ // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
+ // add(add(X, vecreduce(Y)), vecreduce(Z))
+ // to make better use of vaddva style instructions.
+ if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
+ IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
+ !isa<ConstantSDNode>(N0)) {
+ SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
+ }
+ // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
+ // add(add(add(A, C), reduce(B)), reduce(D))
+ if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
+ N1.getOpcode() == ISD::ADD) {
+ unsigned N0RedOp = 0;
+ if (!IsVecReduce(N0.getOperand(N0RedOp))) {
+ N0RedOp = 1;
+ if (!IsVecReduce(N0.getOperand(N0RedOp)))
+ return SDValue();
+ }
+
+ unsigned N1RedOp = 0;
+ if (!IsVecReduce(N1.getOperand(N1RedOp)))
+ N1RedOp = 1;
+ if (!IsVecReduce(N1.getOperand(N1RedOp)))
+ return SDValue();
+
+ SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
+ N1.getOperand(1 - N1RedOp));
+ SDValue Add1 =
+ DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
+ return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
+ }
+ return SDValue();
+ };
+ if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
+ return R;
+ if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
+ return R;
+
+ // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
+ // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
+ // by ascending load offsets. This can help cores prefetch if the order of
+ // loads is more predictable.
+ auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
+ // Check if two reductions are known to load data where one is before/after
+ // another. Return negative if N0 loads data before N1, positive if N1 is
+ // before N0 and 0 otherwise if nothing is known.
+ auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
+ // Look through to the first operand of a MUL, for the VMLA case.
+ // Currently only looks at the first operand, in the hope they are equal.
+ if (N0.getOpcode() == ISD::MUL)
+ N0 = N0.getOperand(0);
+ if (N1.getOpcode() == ISD::MUL)
+ N1 = N1.getOperand(0);
+
+ // Return true if the two operands are loads to the same object and the
+ // offset of the first is known to be less than the offset of the second.
+ LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
+ LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
+ if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
+ !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
+ Load1->isIndexed())
+ return 0;
+
+ auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
+ auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
+
+ if (!BaseLocDecomp0.getBase() ||
+ BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
+ !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
+ return 0;
+ if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
+ return -1;
+ if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
+ return 1;
+ return 0;
+ };
+
+ SDValue X;
+ if (N0.getOpcode() == ISD::ADD) {
+ if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
+ int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
+ N0.getOperand(1).getOperand(0));
+ if (IsBefore < 0) {
+ X = N0.getOperand(0);
+ N0 = N0.getOperand(1);
+ } else if (IsBefore > 0) {
+ X = N0.getOperand(1);
+ N0 = N0.getOperand(0);
+ } else
+ return SDValue();
+ } else if (IsVecReduce(N0.getOperand(0))) {
+ X = N0.getOperand(1);
+ N0 = N0.getOperand(0);
+ } else if (IsVecReduce(N0.getOperand(1))) {
+ X = N0.getOperand(0);
+ N0 = N0.getOperand(1);
+ } else
+ return SDValue();
+ } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
+ IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
+ // Note this is backward to how you would expect. We create
+ // add(reduce(load + 16), reduce(load + 0)) so that the
+ // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
+ // the X as VADDV(load + 0)
+ return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
+ } else
+ return SDValue();
+
+ if (!IsVecReduce(N0) || !IsVecReduce(N1))
+ return SDValue();
+
+ if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
+ return SDValue();
+
+ // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
+ SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
+ return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
+ };
+ if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
+ return R;
+ if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
+ return R;
+ return SDValue();
+}
+
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- if (!Subtarget->hasMVEIntegerOps() || N->getValueType(0) != MVT::i64)
+ if (!Subtarget->hasMVEIntegerOps())
return SDValue();
+ if (SDValue R = TryDistrubutionADDVecReduce(N, DAG))
+ return R;
+
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ if (VT != MVT::i64)
+ return SDValue();
// We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
// will look like:
@@ -13090,7 +13315,6 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
return SDValue();
- SDLoc dl(N);
if (VecRed->getOpcode() == OpcodeA) {
// add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
@@ -14732,6 +14956,7 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDValue Op0 = Ext.getOperand(0);
EVT VecVT = Op0.getValueType();
+ unsigned ResNo = Op0.getResNo();
unsigned Lane = Ext.getConstantOperandVal(1);
if (VecVT.getVectorNumElements() != 4)
return SDValue();
@@ -14740,7 +14965,8 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(V->getOperand(1)) &&
- V->getConstantOperandVal(1) == Lane + 1;
+ V->getConstantOperandVal(1) == Lane + 1 &&
+ V->getOperand(0).getResNo() == ResNo;
});
if (OtherIt == Op0->uses().end())
return SDValue();
@@ -14884,6 +15110,47 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
Op0->getOperand(0), Op1->getOperand(0));
}
+static SDValue
+PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ uint64_t IdxVal = N->getConstantOperandVal(2);
+ EVT VecVT = Vec.getValueType();
+ EVT SubVT = SubVec.getValueType();
+
+ // Only do this for legal fixed vector types.
+ if (!VecVT.isFixedLengthVector() ||
+ !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
+ !DCI.DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+
+ // Ignore widening patterns.
+ if (IdxVal == 0 && Vec.isUndef())
+ return SDValue();
+
+ // Subvector must be half the width and an "aligned" insertion.
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
+ (IdxVal != 0 && IdxVal != NumSubElts))
+ return SDValue();
+
+ // Fold insert_subvector -> concat_vectors
+ // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
+ // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ if (IdxVal == 0) {
+ Lo = SubVec;
+ Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
+ } else {
+ Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DCI.DAG.getVectorIdxConstant(0, DL));
+ Hi = SubVec;
+ }
+ return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
+}
+
// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,
SelectionDAG &DAG) {
@@ -14965,6 +15232,390 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
DAG.getUNDEF(VT), NewMask);
}
+/// Load/store instruction that can be merged with a base address
+/// update
+struct BaseUpdateTarget {
+ SDNode *N;
+ bool isIntrinsic;
+ bool isStore;
+ unsigned AddrOpIdx;
+};
+
+struct BaseUpdateUser {
+ /// Instruction that updates a pointer
+ SDNode *N;
+ /// Pointer increment operand
+ SDValue Inc;
+ /// Pointer increment value if it is a constant, or 0 otherwise
+ unsigned ConstInc;
+};
+
+static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
+ struct BaseUpdateUser &User,
+ bool SimpleConstIncOnly,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDNode *N = Target.N;
+ MemSDNode *MemN = cast<MemSDNode>(N);
+ SDLoc dl(N);
+
+ // Find the new opcode for the updating load/store.
+ bool isLoadOp = true;
+ bool isLaneOp = false;
+ // Workaround for vst1x and vld1x intrinsics which do not have alignment
+ // as an operand.
+ bool hasAlignment = true;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (Target.isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1:
+ NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1;
+ break;
+ case Intrinsic::arm_neon_vld2:
+ NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2;
+ break;
+ case Intrinsic::arm_neon_vld3:
+ NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3;
+ break;
+ case Intrinsic::arm_neon_vld4:
+ NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4;
+ break;
+ case Intrinsic::arm_neon_vld1x2:
+ NewOpc = ARMISD::VLD1x2_UPD;
+ NumVecs = 2;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vld1x3:
+ NewOpc = ARMISD::VLD1x3_UPD;
+ NumVecs = 3;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vld1x4:
+ NewOpc = ARMISD::VLD1x4_UPD;
+ NumVecs = 4;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vld2dup:
+ NewOpc = ARMISD::VLD2DUP_UPD;
+ NumVecs = 2;
+ break;
+ case Intrinsic::arm_neon_vld3dup:
+ NewOpc = ARMISD::VLD3DUP_UPD;
+ NumVecs = 3;
+ break;
+ case Intrinsic::arm_neon_vld4dup:
+ NewOpc = ARMISD::VLD4DUP_UPD;
+ NumVecs = 4;
+ break;
+ case Intrinsic::arm_neon_vld2lane:
+ NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vld3lane:
+ NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vld4lane:
+ NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst1:
+ NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst2:
+ NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst3:
+ NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst4:
+ NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst2lane:
+ NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst3lane:
+ NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3;
+ isLoadOp = false;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst4lane:
+ NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst1x2:
+ NewOpc = ARMISD::VST1x2_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vst1x3:
+ NewOpc = ARMISD::VST1x3_UPD;
+ NumVecs = 3;
+ isLoadOp = false;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vst1x4:
+ NewOpc = ARMISD::VST1x4_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ hasAlignment = false;
+ break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("unexpected opcode for Neon base update");
+ case ARMISD::VLD1DUP:
+ NewOpc = ARMISD::VLD1DUP_UPD;
+ NumVecs = 1;
+ break;
+ case ARMISD::VLD2DUP:
+ NewOpc = ARMISD::VLD2DUP_UPD;
+ NumVecs = 2;
+ break;
+ case ARMISD::VLD3DUP:
+ NewOpc = ARMISD::VLD3DUP_UPD;
+ NumVecs = 3;
+ break;
+ case ARMISD::VLD4DUP:
+ NewOpc = ARMISD::VLD4DUP_UPD;
+ NumVecs = 4;
+ break;
+ case ISD::LOAD:
+ NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1;
+ isLaneOp = false;
+ break;
+ case ISD::STORE:
+ NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1;
+ isLaneOp = false;
+ isLoadOp = false;
+ break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoadOp) {
+ VecTy = N->getValueType(0);
+ } else if (Target.isIntrinsic) {
+ VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
+ } else {
+ assert(Target.isStore &&
+ "Node has to be a load, a store, or an intrinsic!");
+ VecTy = N->getOperand(1).getValueType();
+ }
+
+ bool isVLDDUPOp =
+ NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
+ NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
+
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp || isVLDDUPOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ return false;
+ }
+
+ if (SimpleConstIncOnly && User.ConstInc != NumBytes)
+ return false;
+
+ // OK, we found an ADD we can fold into the base update.
+ // Now, create a _UPD node, taking care of not breaking alignment.
+
+ EVT AlignedVecTy = VecTy;
+ unsigned Alignment = MemN->getAlignment();
+
+ // If this is a less-than-standard-aligned load/store, change the type to
+ // match the standard alignment.
+ // The alignment is overlooked when selecting _UPD variants; and it's
+ // easier to introduce bitcasts here than fix that.
+ // There are 3 ways to get to this base-update combine:
+ // - intrinsics: they are assumed to be properly aligned (to the standard
+ // alignment of the memory type), so we don't need to do anything.
+ // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+ // intrinsics, so, likewise, there's nothing to do.
+ // - generic load/store instructions: the alignment is specified as an
+ // explicit operand, rather than implicitly as the standard alignment
+ // of the memory type (like the intrisics). We need to change the
+ // memory type to match the explicit alignment. That way, we don't
+ // generate non-standard-aligned ARMISD::VLDx nodes.
+ if (isa<LSBaseSDNode>(N)) {
+ if (Alignment == 0)
+ Alignment = 1;
+ if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+ assert(!isLaneOp && "Unexpected generic load/store lane.");
+ unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+ AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+ }
+ // Don't set an explicit alignment on regular load/stores that we want
+ // to transform to VLD/VST 1_UPD nodes.
+ // This matches the behavior of regular load/stores, which only get an
+ // explicit alignment if the MMO alignment is larger than the standard
+ // alignment of the memory type.
+ // Intrinsics, however, always get an explicit alignment, set to the
+ // alignment of the MMO.
+ Alignment = 1;
+ }
+
+ // Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = AlignedVecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
+
+ // Then, gather the new node's operands.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(Target.AddrOpIdx));
+ Ops.push_back(User.Inc);
+
+ if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+ // Try to match the intrinsic's signature
+ Ops.push_back(StN->getValue());
+ } else {
+ // Loads (and of course intrinsics) match the intrinsics' signature,
+ // so just add all but the alignment operand.
+ unsigned LastOperand =
+ hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
+ for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
+ Ops.push_back(N->getOperand(i));
+ }
+
+ // For all node types, the alignment operand is always the last one.
+ Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+
+ // If this is a non-standard-aligned STORE, the penultimate operand is the
+ // stored value. Bitcast it to the aligned type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+ SDValue &StVal = Ops[Ops.size() - 2];
+ StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
+ }
+
+ EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
+ MemN->getMemOperand());
+
+ // Update the uses.
+ SmallVector<SDValue, 5> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i)
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+ // If this is an non-standard-aligned LOAD, the first result is the loaded
+ // value. Bitcast it to the expected result type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+ SDValue &LdVal = NewResults[0];
+ LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
+ }
+
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
+
+ return true;
+}
+
+// If (opcode ptr inc) is and ADD-like instruction, return the
+// increment value. Otherwise return 0.
+static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
+ SDValue Inc, const SelectionDAG &DAG) {
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (!CInc)
+ return 0;
+
+ switch (Opcode) {
+ case ARMISD::VLD1_UPD:
+ case ISD::ADD:
+ return CInc->getZExtValue();
+ case ISD::OR: {
+ if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
+ // (OR ptr inc) is the same as (ADD ptr inc)
+ return CInc->getZExtValue();
+ }
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
+ switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::OR: {
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ *Ptr = N->getOperand(0);
+ *CInc = N->getOperand(1);
+ return true;
+ }
+ return false;
+ }
+ case ARMISD::VLD1_UPD: {
+ if (isa<ConstantSDNode>(N->getOperand(2))) {
+ *Ptr = N->getOperand(1);
+ *CInc = N->getOperand(2);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
+ // Check that the add is independent of the load/store.
+ // Otherwise, folding it would create a cycle. Search through Addr
+ // as well, since the User may not be a direct user of Addr and
+ // only share a base pointer.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
+ return false;
+ return true;
+}
+
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
/// NEON load/store intrinsics, and generic vector load/stores, to merge
/// base address updates.
@@ -14972,237 +15623,89 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
const bool isStore = N->getOpcode() == ISD::STORE;
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
+ BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
+
SDValue Addr = N->getOperand(AddrOpIdx);
- MemSDNode *MemN = cast<MemSDNode>(N);
- SDLoc dl(N);
+
+ SmallVector<BaseUpdateUser, 8> BaseUpdates;
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() != ISD::ADD ||
- UI.getUse().getResNo() != Addr.getResNo())
- continue;
-
- // Check that the add is independent of the load/store. Otherwise, folding
- // it would create a cycle. We can avoid searching through Addr as it's a
- // predecessor to both.
- SmallPtrSet<const SDNode *, 32> Visited;
- SmallVector<const SDNode *, 16> Worklist;
- Visited.insert(Addr.getNode());
- Worklist.push_back(N);
- Worklist.push_back(User);
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
+ if (UI.getUse().getResNo() != Addr.getResNo() ||
+ User->getNumOperands() != 2)
continue;
- // Find the new opcode for the updating load/store.
- bool isLoadOp = true;
- bool isLaneOp = false;
- // Workaround for vst1x and vld1x intrinsics which do not have alignment
- // as an operand.
- bool hasAlignment = true;
- unsigned NewOpc = 0;
- unsigned NumVecs = 0;
- if (isIntrinsic) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default: llvm_unreachable("unexpected intrinsic for Neon base update");
- case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
- NumVecs = 1; break;
- case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
- NumVecs = 2; break;
- case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
- NumVecs = 3; break;
- case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
- NumVecs = 4; break;
- case Intrinsic::arm_neon_vld1x2: NewOpc = ARMISD::VLD1x2_UPD;
- NumVecs = 2; hasAlignment = false; break;
- case Intrinsic::arm_neon_vld1x3: NewOpc = ARMISD::VLD1x3_UPD;
- NumVecs = 3; hasAlignment = false; break;
- case Intrinsic::arm_neon_vld1x4: NewOpc = ARMISD::VLD1x4_UPD;
- NumVecs = 4; hasAlignment = false; break;
- case Intrinsic::arm_neon_vld2dup: NewOpc = ARMISD::VLD2DUP_UPD;
- NumVecs = 2; break;
- case Intrinsic::arm_neon_vld3dup: NewOpc = ARMISD::VLD3DUP_UPD;
- NumVecs = 3; break;
- case Intrinsic::arm_neon_vld4dup: NewOpc = ARMISD::VLD4DUP_UPD;
- NumVecs = 4; break;
- case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
- NumVecs = 2; isLaneOp = true; break;
- case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
- NumVecs = 3; isLaneOp = true; break;
- case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
- NumVecs = 4; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
- NumVecs = 1; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
- NumVecs = 2; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
- NumVecs = 3; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
- NumVecs = 4; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
- NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
- NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
- NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst1x2: NewOpc = ARMISD::VST1x2_UPD;
- NumVecs = 2; isLoadOp = false; hasAlignment = false; break;
- case Intrinsic::arm_neon_vst1x3: NewOpc = ARMISD::VST1x3_UPD;
- NumVecs = 3; isLoadOp = false; hasAlignment = false; break;
- case Intrinsic::arm_neon_vst1x4: NewOpc = ARMISD::VST1x4_UPD;
- NumVecs = 4; isLoadOp = false; hasAlignment = false; break;
- }
- } else {
- isLaneOp = true;
- switch (N->getOpcode()) {
- default: llvm_unreachable("unexpected opcode for Neon base update");
- case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
- case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
- case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
- case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
- case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
- NumVecs = 1; isLaneOp = false; break;
- case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
- NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
- }
- }
-
- // Find the size of memory referenced by the load/store.
- EVT VecTy;
- if (isLoadOp) {
- VecTy = N->getValueType(0);
- } else if (isIntrinsic) {
- VecTy = N->getOperand(AddrOpIdx+1).getValueType();
- } else {
- assert(isStore && "Node has to be a load, a store, or an intrinsic!");
- VecTy = N->getOperand(1).getValueType();
- }
-
- bool isVLDDUPOp =
- NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
- NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
-
- unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
- if (isLaneOp || isVLDDUPOp)
- NumBytes /= VecTy.getVectorNumElements();
-
- // If the increment is a constant, it must match the memory ref size.
- SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
- ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
- if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
- // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
- // separate instructions that make it harder to use a non-constant update.
- continue;
- }
+ SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
+ unsigned ConstInc =
+ getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
- // OK, we found an ADD we can fold into the base update.
- // Now, create a _UPD node, taking care of not breaking alignment.
-
- EVT AlignedVecTy = VecTy;
- unsigned Alignment = MemN->getAlignment();
-
- // If this is a less-than-standard-aligned load/store, change the type to
- // match the standard alignment.
- // The alignment is overlooked when selecting _UPD variants; and it's
- // easier to introduce bitcasts here than fix that.
- // There are 3 ways to get to this base-update combine:
- // - intrinsics: they are assumed to be properly aligned (to the standard
- // alignment of the memory type), so we don't need to do anything.
- // - ARMISD::VLDx nodes: they are only generated from the aforementioned
- // intrinsics, so, likewise, there's nothing to do.
- // - generic load/store instructions: the alignment is specified as an
- // explicit operand, rather than implicitly as the standard alignment
- // of the memory type (like the intrisics). We need to change the
- // memory type to match the explicit alignment. That way, we don't
- // generate non-standard-aligned ARMISD::VLDx nodes.
- if (isa<LSBaseSDNode>(N)) {
- if (Alignment == 0)
- Alignment = 1;
- if (Alignment < VecTy.getScalarSizeInBits() / 8) {
- MVT EltTy = MVT::getIntegerVT(Alignment * 8);
- assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
- assert(!isLaneOp && "Unexpected generic load/store lane.");
- unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
- AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
- }
- // Don't set an explicit alignment on regular load/stores that we want
- // to transform to VLD/VST 1_UPD nodes.
- // This matches the behavior of regular load/stores, which only get an
- // explicit alignment if the MMO alignment is larger than the standard
- // alignment of the memory type.
- // Intrinsics, however, always get an explicit alignment, set to the
- // alignment of the MMO.
- Alignment = 1;
- }
+ if (ConstInc || User->getOpcode() == ISD::ADD)
+ BaseUpdates.push_back({User, Inc, ConstInc});
+ }
- // Create the new updating load/store node.
- // First, create an SDVTList for the new updating node's results.
- EVT Tys[6];
- unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
- unsigned n;
- for (n = 0; n < NumResultVecs; ++n)
- Tys[n] = AlignedVecTy;
- Tys[n++] = MVT::i32;
- Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+ // If the address is a constant pointer increment itself, find
+ // another constant increment that has the same base operand
+ SDValue Base;
+ SDValue CInc;
+ if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
+ unsigned Offset =
+ getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
+ for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
+ UI != UE; ++UI) {
- // Then, gather the new node's operands.
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(N->getOperand(0)); // incoming chain
- Ops.push_back(N->getOperand(AddrOpIdx));
- Ops.push_back(Inc);
+ SDNode *User = *UI;
+ if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
+ User->getNumOperands() != 2)
+ continue;
- if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
- // Try to match the intrinsic's signature
- Ops.push_back(StN->getValue());
- } else {
- // Loads (and of course intrinsics) match the intrinsics' signature,
- // so just add all but the alignment operand.
- unsigned LastOperand =
- hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
- for (unsigned i = AddrOpIdx + 1; i < LastOperand; ++i)
- Ops.push_back(N->getOperand(i));
- }
+ SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
+ unsigned UserOffset =
+ getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
- // For all node types, the alignment operand is always the last one.
- Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+ if (!UserOffset || UserOffset <= Offset)
+ continue;
- // If this is a non-standard-aligned STORE, the penultimate operand is the
- // stored value. Bitcast it to the aligned type.
- if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
- SDValue &StVal = Ops[Ops.size()-2];
- StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
+ unsigned NewConstInc = UserOffset - Offset;
+ SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
+ BaseUpdates.push_back({User, NewInc, NewConstInc});
}
+ }
- EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
- MemN->getMemOperand());
-
- // Update the uses.
- SmallVector<SDValue, 5> NewResults;
- for (unsigned i = 0; i < NumResultVecs; ++i)
- NewResults.push_back(SDValue(UpdN.getNode(), i));
-
- // If this is an non-standard-aligned LOAD, the first result is the loaded
- // value. Bitcast it to the expected result type.
- if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
- SDValue &LdVal = NewResults[0];
- LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
+ // Try to fold the load/store with an update that matches memory
+ // access size. This should work well for sequential loads.
+ //
+ // Filter out invalid updates as well.
+ unsigned NumValidUpd = BaseUpdates.size();
+ for (unsigned I = 0; I < NumValidUpd;) {
+ BaseUpdateUser &User = BaseUpdates[I];
+ if (!isValidBaseUpdate(N, User.N)) {
+ --NumValidUpd;
+ std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
+ continue;
}
- NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
- DCI.CombineTo(N, NewResults);
- DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
-
- break;
+ if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
+ return SDValue();
+ ++I;
+ }
+ BaseUpdates.resize(NumValidUpd);
+
+ // Try to fold with other users. Non-constant updates are considered
+ // first, and constant updates are sorted to not break a sequence of
+ // strided accesses (if there is any).
+ std::sort(BaseUpdates.begin(), BaseUpdates.end(),
+ [](BaseUpdateUser &LHS, BaseUpdateUser &RHS) {
+ return LHS.ConstInc < RHS.ConstInc;
+ });
+ for (BaseUpdateUser &User : BaseUpdates) {
+ if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
+ return SDValue();
}
return SDValue();
}
@@ -15502,11 +16005,12 @@ static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,
}
static SDValue PerformLOADCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
EVT VT = N->getValueType(0);
// If this is a legal vector load, try to combine it into a VLD1_UPD.
- if (ISD::isNormalLoad(N) && VT.isVector() &&
+ if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
@@ -15976,6 +16480,15 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDLoc dl(N);
+ // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
+ if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
+ (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
+ N0.getValueType() == MVT::v16i8)) {
+ SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
+ SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
+ }
+
// We are looking for something that will have illegal types if left alone,
// but that we can convert to a single instruction under MVE. For example
// vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
@@ -16124,38 +16637,8 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue(Node.getNode(), 1));
};
- if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
- return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
- if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
- return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
- if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
- return Create64bitNode(ARMISD::VADDLVs, {A});
- if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
- return Create64bitNode(ARMISD::VADDLVu, {A});
- if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
- if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
-
- SDValue Mask;
- if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
- return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
- if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
- return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
- if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
- return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
- if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
- return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
- if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
- if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
-
SDValue A, B;
+ SDValue Mask;
if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
@@ -16192,6 +16675,36 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
+ if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
+ return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
+ if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
+ return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
+ if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
+ return Create64bitNode(ARMISD::VADDLVs, {A});
+ if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
+ return Create64bitNode(ARMISD::VADDLVu, {A});
+ if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
+ if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
+
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
+
// Some complications. We can get a case where the two inputs of the mul are
// the same, then the output sext will have been helpfully converted to a
// zext. Turn it back.
@@ -16978,7 +17491,7 @@ static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!Const)
return SDValue();
- if (Const->isNullValue())
+ if (Const->isZero())
Imm = 0;
else if (Const->isOne())
Imm = 1;
@@ -17030,7 +17543,7 @@ static SDValue PerformHWLoopCombine(SDNode *N,
Cond = N->getOperand(2);
Dest = N->getOperand(4);
if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
- if (!Const->isOne() && !Const->isNullValue())
+ if (!Const->isOne() && !Const->isZero())
return SDValue();
Imm = Const->getZExtValue();
} else
@@ -17685,6 +18198,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::EXTRACT_VECTOR_ELT:
return PerformExtractEltCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
+ case ISD::INSERT_SUBVECTOR: return PerformInsertSubvectorCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
@@ -17710,9 +18224,12 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SMAX:
case ISD::UMAX:
return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
- case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
- case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
- case ISD::LOAD: return PerformLOADCombine(N, DCI);
+ case ARMISD::CMOV:
+ return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::BRCOND:
+ return PerformBRCONDCombine(N, DCI.DAG);
+ case ISD::LOAD:
+ return PerformLOADCombine(N, DCI, Subtarget);
case ARMISD::VLD1DUP:
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
@@ -17929,7 +18446,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
bool Fast;
if (Op.size() >= 16 &&
(Op.isAligned(Align(16)) ||
@@ -18086,18 +18603,27 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return !IsFMS(I);
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
case Intrinsic::arm_mve_add_predicated:
case Intrinsic::arm_mve_mul_predicated:
case Intrinsic::arm_mve_qadd_predicated:
+ case Intrinsic::arm_mve_vhadd:
case Intrinsic::arm_mve_hadd_predicated:
+ case Intrinsic::arm_mve_vqdmull:
case Intrinsic::arm_mve_vqdmull_predicated:
+ case Intrinsic::arm_mve_vqdmulh:
case Intrinsic::arm_mve_qdmulh_predicated:
+ case Intrinsic::arm_mve_vqrdmulh:
case Intrinsic::arm_mve_qrdmulh_predicated:
case Intrinsic::arm_mve_fma_predicated:
return true;
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat:
case Intrinsic::arm_mve_sub_predicated:
case Intrinsic::arm_mve_qsub_predicated:
case Intrinsic::arm_mve_hsub_predicated:
+ case Intrinsic::arm_mve_vhsub:
return Operand == 1;
default:
return false;
@@ -18508,6 +19034,31 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return AbsImm >= 0 && AbsImm <= 255;
}
+// Return false to prevent folding
+// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
+// if the folding leads to worse code.
+bool ARMTargetLowering::isMulAddWithConstProfitable(
+ const SDValue &AddNode, const SDValue &ConstNode) const {
+ // Let the DAGCombiner decide for vector types and large types.
+ const EVT VT = AddNode.getValueType();
+ if (VT.isVector() || VT.getScalarSizeInBits() > 32)
+ return true;
+
+ // It is worse if c0 is legal add immediate, while c1*c0 is not
+ // and has to be composed by at least two instructions.
+ const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
+ const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
+ const int64_t C0 = C0Node->getSExtValue();
+ APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
+ if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue()))
+ return true;
+ if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
+ return false;
+
+ // Default to true and let the DAGCombiner decide.
+ return true;
+}
+
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
@@ -19015,8 +19566,8 @@ bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
isa<ConstantSDNode>(Op->getOperand(2))) {
unsigned ShAmt = Op->getConstantOperandVal(2);
- if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(
- APInt::getAllOnesValue(32) << (32 - ShAmt)))
+ if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
+ << (32 - ShAmt)))
return TLO.CombineTo(
Op, TLO.DAG.getNode(
ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
@@ -19760,7 +20311,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
@@ -19774,7 +20325,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
@@ -19792,7 +20343,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors stored.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
@@ -19801,7 +20352,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
@@ -19814,7 +20365,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors stored.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
@@ -20128,10 +20679,7 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic(
return InsertFencesForAtomic;
}
-// This has so far only been implemented for MachO.
-bool ARMTargetLowering::useLoadStackGuardNode() const {
- return Subtarget->isTargetMachO();
-}
+bool ARMTargetLowering::useLoadStackGuardNode() const { return true; }
void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
@@ -20146,7 +20694,7 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
"__security_check_cookie", Type::getVoidTy(M.getContext()),
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
- F->addAttribute(1, Attribute::AttrKind::InReg);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 844b7d4f1707..0fddd58e178e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -680,7 +680,7 @@ class VectorType;
unsigned &Cost) const override;
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const override {
+ const MachineFunction &MF) const override {
// Do not merge to larger than i32.
return (MemVT.getSizeInBits() <= 32);
}
@@ -712,6 +712,9 @@ class VectorType;
Align Alignment,
const DataLayout &DL) const;
+ bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const override;
+
bool alignLoopsWithOptSize() const override;
/// Returns the number of interleaved accesses that will be generated when
diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td
index 0e97668e2e01..54e27a6be558 100644
--- a/llvm/lib/Target/ARM/ARMInstrCDE.td
+++ b/llvm/lib/Target/ARM/ARMInstrCDE.td
@@ -612,14 +612,14 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec MQPR:$inactive), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred),
+ (VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx1qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX1A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
imm_12b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx2q_predicated timm:$coproc,
(VTI.Vec MQPR:$inactive),
@@ -627,7 +627,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX2_vec p_imm:$coproc, (v16i8 MQPR:$n),
imm_7b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred),
+ (VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx2qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc),
@@ -635,7 +635,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX2A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), timm:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx3q_predicated timm:$coproc,
(VTI.Vec MQPR:$inactive),
@@ -645,7 +645,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec (CDE_VCX3_vec p_imm:$coproc, (v16i8 MQPR:$n),
(v16i8 MQPR:$m),
imm_4b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred),
+ (VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx3qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc),
@@ -654,7 +654,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec (CDE_VCX3A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), (v16i8 MQPR:$m),
imm_4b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
}
let Predicates = [HasCDE, HasMVEInt] in
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 85da7c5a535e..de351372abf2 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -249,10 +249,10 @@ def VPTPredROperand : AsmOperandClass {
// Base class for both kinds of vpred.
class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
- !con((ops (i32 0), (i32 zero_reg)), extra_op)> {
+ !con((ops (i32 0), (i32 zero_reg), (i32 zero_reg)), extra_op)> {
let PrintMethod = "printVPTPredicateOperand";
let OperandNamespace = "ARM";
- let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
+ let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg, GPRlr:$tp_reg), extra_mi);
// For convenience, we provide a string value that can be appended
// to the constraints string. It's empty for vpred_n, and for
@@ -408,6 +408,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
// in an IT block).
bit thumbArithFlagSetting = 0;
+ bits<2> VecSize = 0;
bit validForTailPredication = 0;
bit retainsPreviousHalfElement = 0;
bit horizontalReduction = 0;
@@ -428,6 +429,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
let TSFlags{21} = retainsPreviousHalfElement;
let TSFlags{22} = horizontalReduction;
let TSFlags{23} = doubleWidthResult;
+ let TSFlags{25-24} = VecSize;
let Constraints = cstr;
let Itinerary = itin;
@@ -1385,8 +1387,8 @@ class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
}
class T2I<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+ string opc, string asm, list<dag> pattern, AddrMode am = AddrModeNone>
+ : Thumb2I<oops, iops, am, 4, itin, opc, asm, "", pattern>;
class T2Ii12<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 3c6c6960b80f..5dee5e04af81 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -95,8 +95,17 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
const TargetMachine &TM = MF.getTarget();
+ Module &M = *MF.getFunction().getParent();
- if (!Subtarget.useMovt()) {
+ if (M.getStackProtectorGuard() == "tls") {
+ expandLoadStackGuardBase(MI, ARM::MRC, ARM::LDRi12);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
@@ -109,9 +118,6 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
return;
}
- const GlobalValue *GV =
- cast<GlobalValue>((*MI->memoperands_begin())->getValue());
-
if (!Subtarget.isGVIndirectSymbol(GV)) {
expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12);
return;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 7466cecb9b33..7d0bc756e882 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -1252,7 +1252,7 @@ def addrmode5_pre : AddrMode5 {
// addrmode5fp16 := reg +/- imm8*2
//
def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; }
-class AddrMode5FP16 : Operand<i32>,
+class AddrMode5FP16 : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode5FP16", []> {
let EncoderMethod = "getAddrMode5FP16OpValue";
let DecoderMethod = "DecodeAddrMode5FP16Operand";
@@ -1589,7 +1589,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- SDNode opnode, bit Commutable = 0> {
+ SDNode opnode> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1693,9 +1693,8 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
/// operands are reversed.
let hasPostISelHook = 1, Defs = [CPSR] in {
-multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis, SDNode opnode,
- bit Commutable = 0> {
+multiclass AsI1_rbin_s_is<InstrItinClass iii,
+ InstrItinClass iis, SDNode opnode> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
[(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>,
@@ -3853,7 +3852,7 @@ defm RSB : AsI1_rbin_irs<0b0011, "rsb",
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUsr, ARMsubc>;
defm RSC : AI1_rsc_irs<0b0111, "rsc", ARMsube>;
@@ -5391,14 +5390,16 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
}
class ACI<dag oops, dag iops, string opc, string asm,
- list<dag> pattern, IndexMode im = IndexModeNone>
- : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ list<dag> pattern, IndexMode im = IndexModeNone,
+ AddrMode am = AddrModeNone>
+ : I<oops, iops, am, 4, im, BrFrm, NoItinerary,
opc, asm, "", pattern> {
let Inst{27-25} = 0b110;
}
class ACInoP<dag oops, dag iops, string opc, string asm,
- list<dag> pattern, IndexMode im = IndexModeNone>
- : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ list<dag> pattern, IndexMode im = IndexModeNone,
+ AddrMode am = AddrModeNone>
+ : InoP<oops, iops, am, 4, im, BrFrm, NoItinerary,
opc, asm, "", pattern> {
let Inst{31-28} = 0b1111;
let Inst{27-25} = 0b110;
@@ -5407,7 +5408,8 @@ class ACInoP<dag oops, dag iops, string opc, string asm,
let DecoderNamespace = "CoProc" in {
multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr", pattern> {
+ asm, "\t$cop, $CRd, $addr", pattern, IndexModeNone,
+ AddrMode5> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -5478,7 +5480,8 @@ multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
}
multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr", pattern> {
+ asm, "\t$cop, $CRd, $addr", pattern, IndexModeNone,
+ AddrMode5> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 372893814092..697730037277 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -97,7 +97,7 @@ def VecList2QAsmOperand : AsmOperandClass {
"q-registers in range [q0,q7]";
}
-def VecList2Q : RegisterOperand<QQPR, "printMVEVectorListTwoQ"> {
+def VecList2Q : RegisterOperand<MQQPR, "printMVEVectorListTwoQ"> {
let ParserMatchClass = VecList2QAsmOperand;
let PrintMethod = "printMVEVectorList<2>";
}
@@ -110,7 +110,7 @@ def VecList4QAsmOperand : AsmOperandClass {
"q-registers in range [q0,q7]";
}
-def VecList4Q : RegisterOperand<QQQQPR, "printMVEVectorListFourQ"> {
+def VecList4Q : RegisterOperand<MQQQQPR, "printMVEVectorListFourQ"> {
let ParserMatchClass = VecList4QAsmOperand;
let PrintMethod = "printMVEVectorList<4>";
}
@@ -332,7 +332,7 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic
(VTI.Vec MQPR:$Qn))),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
// Optionally with the select folded through the op
@@ -341,7 +341,7 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic
(VTI.Vec MQPR:$Qn),
(VTI.Vec IdentityVec))))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$Qm)))>;
}
@@ -350,7 +350,7 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic
PredOperands,
(? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -368,7 +368,7 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
(VTI.Vec (ARMvdup rGPR:$Rn)))),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
// Optionally with the select folded through the op
@@ -377,7 +377,7 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
(ARMvdup rGPR:$Rn),
(VTI.Vec IdentityVec))))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$Qm)))>;
}
@@ -386,19 +386,20 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
PredOperands,
(? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
- string ops, string cstr, list<dag> pattern>
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern>
: Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
pattern>,
Requires<[HasMVEInt]> {
let D = MVEDomain;
let DecoderNamespace = "MVE";
+ let VecSize = vecsize;
}
// MVE_p is used for most predicated instructions, to add the cluster
@@ -406,22 +407,22 @@ class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
// the input predicate register.
class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
string suffix, string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
+ bits<2> vecsize, list<dag> pattern=[]>
: MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
// If the instruction has a suffix, like vadd.f32, then the
// VPT predication suffix goes before the dot, so the full
// name has to be "vadd${vp}.f32".
!strconcat(iname, "${vp}",
!if(!eq(suffix, ""), "", !strconcat(".", suffix))),
- ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
+ ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> {
let Inst{31-29} = 0b111;
let Inst{27-26} = 0b11;
}
class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
string suffix, string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
let Predicates = [HasMVEFloat];
}
@@ -599,11 +600,11 @@ def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
string iname, string suffix,
- string ops, string cstr, list<dag> pattern=[]>
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern=[]>
// Always use vpred_n and not vpred_r: with the output register being
// a GPR and not a vector register, there can't be any question of
// what to put in its inactive lanes.
- : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
+ : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, vecsize, pattern> {
let Inst{25-23} = 0b101;
let Inst{11-9} = 0b111;
@@ -613,7 +614,7 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
class MVE_VABAV<string suffix, bit U, bits<2> size>
: MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
- []> {
+ size, []> {
bits<4> Qm;
bits<4> Qn;
bits<4> Rda;
@@ -652,7 +653,7 @@ multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
(VTI.Pred VCCR:$mask))),
(i32 (Inst (i32 rGPR:$Rda_src),
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -666,7 +667,7 @@ defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, bits<2> size, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
- iname, suffix, "$Rda, $Qm", cstr, pattern> {
+ iname, suffix, "$Rda, $Qm", cstr, size, pattern> {
bits<3> Qm;
bits<4> Rda;
@@ -710,11 +711,11 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec MQPR:$vec),
(VTI.Vec ARMimmAllZerosV))))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
@@ -722,13 +723,13 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
(VTI.Vec MQPR:$vec),
(VTI.Vec ARMimmAllZerosV))))),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
} else {
def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
@@ -736,21 +737,21 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
}
def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
(i32 VTI.Unsigned),
(VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
(i32 VTI.Unsigned),
(VTI.Pred VCCR:$pred)),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
}
}
@@ -764,7 +765,7 @@ defm MVE_VADDVu32 : MVE_VADDV_A<MVE_v4u32>;
class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
- suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
+ suffix, "$RdaLo, $RdaHi, $Qm", cstr, 0b10, pattern> {
bits<3> Qm;
bits<4> RdaLo;
bits<4> RdaHi;
@@ -821,11 +822,11 @@ multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> {
def : Pat<(ARMVADDLVA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec)),
(InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec))>;
def : Pat<(ARMVADDLVp (v4i32 MQPR:$vec), (VTI.Pred VCCR:$pred)),
- (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg)>;
def : Pat<(ARMVADDLVAp tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
(VTI.Pred VCCR:$pred)),
(InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
- ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg)>;
}
}
@@ -836,7 +837,7 @@ class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
bit bit_17, bit bit_7, list<dag> pattern=[]>
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
NoItinerary, iname, suffix, "$RdaSrc, $Qm",
- "$RdaDest = $RdaSrc", pattern> {
+ "$RdaDest = $RdaSrc", !if(sz, 0b01, 0b10), pattern> {
bits<3> Qm;
bits<4> RdaDest;
@@ -876,7 +877,7 @@ multiclass MVE_VMINMAXNMV_p<string iname, bit notAbs, bit isMin,
(VTI.Pred VCCR:$pred))),
(COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
(VTI.Vec MQPR:$vec),
- ARMVCCThen, (VTI.Pred VCCR:$pred)),
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg),
ScalarReg)>;
}
}
@@ -897,7 +898,7 @@ defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">;
class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
bit bit_17, bit bit_7, list<dag> pattern=[]>
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
- iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
+ iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", size, pattern> {
bits<3> Qm;
bits<4> RdaDest;
@@ -931,7 +932,7 @@ multiclass MVE_VMINMAXV_p<string iname, bit notAbs, bit isMin,
(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
def : Pat<(i32 !con(args, (pred_intr (VTI.Pred VCCR:$pred)))),
(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec),
- ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg))>;
}
}
@@ -1020,9 +1021,10 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
- bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
+ bits<2> vecsize>
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
- "$RdaDest, $Qn, $Qm", cstr, []> {
+ "$RdaDest, $Qn, $Qm", cstr, vecsize, []> {
bits<4> RdaDest;
bits<3> Qm;
bits<3> Qn;
@@ -1050,11 +1052,11 @@ multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
(ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0>;
+ sz, bit_28, 0b0, X, bit_8, bit_0, VTI.Size>;
def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
(ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaDest = $RdaSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0>;
+ sz, bit_28, 0b1, X, bit_8, bit_0, VTI.Size>;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (int_arm_mve_vmldava
(i32 VTI.Unsigned),
@@ -1074,7 +1076,7 @@ multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
(VTI.Pred VCCR:$mask))),
(i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
def : Pat<(i32 (int_arm_mve_vmldava
(i32 VTI.Unsigned),
@@ -1096,7 +1098,7 @@ multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
(i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
(i32 tGPREven:$RdaSrc),
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -1200,47 +1202,47 @@ let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
(mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
(v4i32 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
(mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
(v8i16 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
(mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
(v16i8 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
(mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
(v4i32 ARMimmAllZerosV)))),
(i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
(mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
(v8i16 ARMimmAllZerosV)))),
(i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
(mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
(v16i8 ARMimmAllZerosV)))),
(i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
}
// vmlav aliases vmladav
@@ -1255,9 +1257,9 @@ foreach acc = ["", "a"] in {
// Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]>
+ bits<2> vecsize, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
- iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
+ iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, vecsize, pattern> {
bits<4> RdaLoDest;
bits<4> RdaHiDest;
bits<3> Qm;
@@ -1285,35 +1287,35 @@ class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
}
multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
- bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]> {
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
+ bits<2> vecsize, list<dag> pattern=[]> {
def ""#x#suffix : MVE_VMLALDAVBase<
iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b0, X, bit_8, bit_0, vecsize, pattern>;
def "a"#x#suffix : MVE_VMLALDAVBase<
iname # "a" # x, suffix,
(ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b1, X, bit_8, bit_0, vecsize, pattern>;
}
multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ bit bit_8, bit bit_0, bits<2> vecsize, list<dag> pattern=[]> {
defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz,
- bit_28, 0b0, bit_8, bit_0, pattern>;
+ bit_28, 0b0, bit_8, bit_0, vecsize, pattern>;
defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz,
- bit_28, 0b1, bit_8, bit_0, pattern>;
+ bit_28, 0b1, bit_8, bit_0, vecsize, pattern>;
}
-multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix,
- 0b0, 0b0, 0b1, 0b0, pattern>;
- defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix,
- 0b0, 0b1, 0b0, 0b1, 0b0, pattern>;
+multiclass MVE_VRMLALDAVH_multi<MVEVectorVTInfo VTI, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#VTI.BitsSuffix,
+ 0b0, 0b0, 0b1, 0b0, VTI.Size, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#VTI.BitsSuffix,
+ 0b0, 0b1, 0b0, 0b1, 0b0, VTI.Size, pattern>;
}
-defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">;
+defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<MVE_v4i32>;
// vrmlalvh aliases for vrmlaldavh
def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
@@ -1333,14 +1335,15 @@ def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
tGPREven:$RdaLo, tGPROdd:$RdaHi,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
-multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>;
- defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix,
- sz, 0b1, 0b0, 0b0, 0b0, pattern>;
+multiclass MVE_VMLALDAV_multi<MVEVectorVTInfo VTI, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#VTI.BitsSuffix,
+ VTI.Size{1}, 0b0, 0b0, 0b0, VTI.Size, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#VTI.BitsSuffix,
+ VTI.Size{1}, 0b1, 0b0, 0b0, 0b0, VTI.Size, pattern>;
}
-defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
-defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
+defm MVE_VMLALDAV : MVE_VMLALDAV_multi<MVE_v8i16>;
+defm MVE_VMLALDAV : MVE_VMLALDAV_multi<MVE_v4i32>;
let Predicates = [HasMVEInt] in {
def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
@@ -1363,22 +1366,22 @@ let Predicates = [HasMVEInt] in {
// Predicated
def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
}
// vmlalv aliases vmlaldav
@@ -1393,22 +1396,22 @@ foreach acc = ["", "a"] in {
}
multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
- bit bit_28, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
+ bit bit_28, bits<2> vecsize, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, vecsize, pattern>;
}
-defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
-defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
-defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
+defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0, 0b01>;
+defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0, 0b10>;
+defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1, 0b10>;
// end of mve_rDest instructions
// start of mve_comp instructions
class MVE_comp<InstrItinClass itin, string iname, string suffix,
- string cstr, list<dag> pattern=[]>
+ string cstr, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
- "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm", vpred_r, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -1425,25 +1428,26 @@ class MVE_comp<InstrItinClass itin, string iname, string suffix,
let Inst{0} = 0b0;
}
-class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
+class MVE_VMINMAXNM<string iname, string suffix, bits<2> sz, bit bit_21,
list<dag> pattern=[]>
- : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+ : MVE_comp<NoItinerary, iname, suffix, "", sz, pattern> {
let Inst{28} = 0b1;
let Inst{25-24} = 0b11;
let Inst{23} = 0b0;
let Inst{21} = bit_21;
- let Inst{20} = sz;
+ let Inst{20} = sz{0};
let Inst{11} = 0b1;
let Inst{8} = 0b1;
let Inst{6} = 0b1;
let Inst{4} = 0b1;
let Predicates = [HasMVEFloat];
+ let validForTailPredication = 1;
}
multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
+ def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>;
let Predicates = [HasMVEFloat] in {
defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
@@ -1458,7 +1462,7 @@ defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_
class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
bit bit_4, list<dag> pattern=[]>
- : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+ : MVE_comp<NoItinerary, iname, suffix, "", size, pattern> {
let Inst{28} = U;
let Inst{25-24} = 0b11;
@@ -1504,8 +1508,8 @@ defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
// start of mve_bit instructions
class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
- string ops, string cstr, list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -1516,7 +1520,7 @@ class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
}
def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
- "vbic", "", "$Qd, $Qn, $Qm", ""> {
+ "vbic", "", "$Qd, $Qn, $Qm", "", 0b00> {
bits<4> Qn;
let Inst{28} = 0b0;
@@ -1532,9 +1536,10 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
let validForTailPredication = 1;
}
-class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr="">
+class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7,
+ bits<2> vecsize, string cstr="">
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
- suffix, "$Qd, $Qm", cstr> {
+ suffix, "$Qd, $Qm", cstr, vecsize> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
@@ -1548,14 +1553,14 @@ class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, strin
let Inst{0} = 0b0;
}
-def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">;
-def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">;
-def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">;
+def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, 0b11, "@earlyclobber $Qd">;
+def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, 0b11, "@earlyclobber $Qd">;
+def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, 0b11, "@earlyclobber $Qd">;
-def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
-def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
+def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01, 0b10>;
+def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01, 0b10>;
-def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
+def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10, 0b01>;
let Predicates = [HasMVEInt] in {
def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))),
@@ -1574,7 +1579,7 @@ multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
@@ -1590,7 +1595,7 @@ let Predicates = [HasMVEInt] in {
}
def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
- "vmvn", "", "$Qd, $Qm", ""> {
+ "vmvn", "", "$Qd, $Qm", "", 0b00> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
let Inst{21-16} = 0b110000;
@@ -1607,13 +1612,13 @@ let Predicates = [HasMVEInt] in {
def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
(VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
- iname, "", "$Qd, $Qn, $Qm", ""> {
+ iname, "", "$Qd, $Qn, $Qm", "", 0b00> {
bits<4> Qn;
let Inst{28} = bit_28;
@@ -1684,9 +1689,9 @@ let Predicates = [HasMVEInt] in {
int_arm_mve_orn_predicated, (? ), MVE_VORN>;
}
-class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
+class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps, bits<2> vecsize>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
- iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
+ iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src", vecsize> {
bits<12> imm;
bits<4> Qd;
@@ -1709,7 +1714,7 @@ class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
multiclass MVE_bit_cmode_p<string iname, bit opcode,
MVEVectorVTInfo VTI, Operand imm_type, SDNode op> {
def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0},
- (ins MQPR:$Qd_src, imm_type:$imm)> {
+ (ins MQPR:$Qd_src, imm_type:$imm), VTI.Size> {
let Inst{5} = opcode;
let validForTailPredication = 1;
}
@@ -1723,7 +1728,7 @@ multiclass MVE_bit_cmode_p<string iname, bit opcode,
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
UnpredPat, (VTI.Vec MQPR:$src))),
(VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
- ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg))>;
}
}
@@ -1801,6 +1806,7 @@ class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
let Inst{16} = Idx{1};
let Inst{21} = Idx{0};
+ let VecSize = 0b10;
let Predicates = [HasFPRegsV8_1M];
}
@@ -1812,6 +1818,8 @@ class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
let Inst{16} = Idx{2};
let Inst{21} = Idx{1};
let Inst{6} = Idx{0};
+
+ let VecSize = 0b01;
}
class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
@@ -1822,6 +1830,8 @@ class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
let Inst{21} = Idx{2};
let Inst{6} = Idx{1};
let Inst{5} = Idx{0};
+
+ let VecSize = 0b00;
}
def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
@@ -1932,7 +1942,7 @@ let Predicates = [HasMVEInt] in {
class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
- iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+ iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -2205,7 +2215,7 @@ multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2293,7 +2303,7 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2334,7 +2344,7 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2350,9 +2360,9 @@ defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8, subnuw, ARMvshruImm>;
defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16, subnuw, ARMvshruImm>;
defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32, subnuw, ARMvshruImm>;
-class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
+class MVE_VDUP<string suffix, bit B, bit E, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
- "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
+ "vdup", suffix, "$Qd, $Rt", vpred_r, "", vecsize, pattern> {
bits<4> Qd;
bits<4> Rt;
@@ -2371,9 +2381,9 @@ class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
-def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
-def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>;
+def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0, 0b10>;
+def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1, 0b01>;
+def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0, 0b00>;
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
@@ -2392,27 +2402,27 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred),
(v16i8 (ARMvdup (i32 rGPR:$elem))),
(v16i8 MQPR:$inactive))),
- (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred),
+ (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred), zero_reg,
(v16i8 MQPR:$inactive))>;
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred),
(v8i16 (ARMvdup (i32 rGPR:$elem))),
(v8i16 MQPR:$inactive))),
- (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred), zero_reg,
(v8i16 MQPR:$inactive))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred),
(v4i32 (ARMvdup (i32 rGPR:$elem))),
(v4i32 MQPR:$inactive))),
- (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred), zero_reg,
(v4i32 MQPR:$inactive))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred),
(v4f32 (ARMvdup (i32 rGPR:$elem))),
(v4f32 MQPR:$inactive))),
- (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred), zero_reg,
(v4f32 MQPR:$inactive))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred),
(v8f16 (ARMvdup (i32 rGPR:$elem))),
(v8f16 MQPR:$inactive))),
- (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred), zero_reg,
(v8f16 MQPR:$inactive))>;
}
@@ -2420,7 +2430,7 @@ let Predicates = [HasMVEInt] in {
class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
- iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
+ iname, suffix, "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -2460,7 +2470,7 @@ multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2506,7 +2516,7 @@ multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, zero_reg, $inactive))>;
}
}
@@ -2565,9 +2575,9 @@ defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
MVE_VQABSs32, MVE_VQNEGs32>;
class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
- dag iops, list<dag> pattern=[]>
+ dag iops, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
- vpred_r, "", pattern> {
+ vpred_r, "", vecsize, pattern> {
bits<13> imm;
bits<4> Qd;
@@ -2590,21 +2600,21 @@ class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
let isReMaterializable = 1 in {
let isAsCheapAsAMove = 1 in {
-def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
-def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
+def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm), 0b00>;
+def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm), 0b01> {
let Inst{9} = imm{9};
}
-def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
+def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm), 0b10> {
let Inst{11-8} = imm{11-8};
}
-def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
-def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
+def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm), 0b11>;
+def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm), 0b10>;
} // let isAsCheapAsAMove = 1
-def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
+def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm), 0b01> {
let Inst{9} = imm{9};
}
-def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
+def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm), 0b10> {
let Inst{11-8} = imm{11-8};
}
} // let isReMaterializable = 1
@@ -2630,18 +2640,18 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
MQPR:$inactive)),
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
- ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+ ARMVCCThen, VCCR:$pred, zero_reg, MQPR:$inactive))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
MQPR:$inactive)),
(v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
- ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+ ARMVCCThen, VCCR:$pred, zero_reg, MQPR:$inactive))>;
}
class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
bit bit_12, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
- pattern> {
+ size, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -2675,7 +2685,7 @@ multiclass MVE_VMINMAXA_m<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -2700,7 +2710,7 @@ defm MVE_VMAXAs32 : MVE_VMAXA<MVE_v4s32>;
def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
(ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
- vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
+ vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc", 0b10> {
bits<5> imm;
bits<4> Qd;
bits<4> RdmDest;
@@ -2717,8 +2727,8 @@ def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -2732,7 +2742,7 @@ class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
iname, suffix, "$Qd, $Qm", vpred_r, "",
- pattern> {
+ sz, pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
let Inst{21} = 0b1;
@@ -2756,7 +2766,7 @@ multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
(OutVTI.Pred VCCR:$pred),
(OutVTI.Vec MQPR:$inactive))),
(OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
- (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Pred VCCR:$pred), zero_reg,
(OutVTI.Vec MQPR:$inactive)))>;
}
@@ -2798,9 +2808,9 @@ let Predicates = [HasMVEInt] in {
class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
- Operand immtype, list<dag> pattern=[]>
+ Operand immtype, bits<2> vecsize, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
- iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
+ iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", vecsize, pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
let Inst{21} = 0b1;
@@ -2821,7 +2831,7 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
class MVE_VSHLL_imm8<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, 0b01, pattern> {
bits<3> imm;
let Inst{20-19} = 0b01;
let Inst{18-16} = imm;
@@ -2829,7 +2839,7 @@ class MVE_VSHLL_imm8<string iname, string suffix,
class MVE_VSHLL_imm16<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, 0b10, pattern> {
bits<4> imm;
let Inst{20} = 0b1;
let Inst{19-16} = imm;
@@ -2847,7 +2857,7 @@ def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
bit U, string ops, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
- iname, suffix, ops, vpred_r, "", pattern> {
+ iname, suffix, ops, vpred_r, "", !if(size, 0b10, 0b01), pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b100;
let Inst{21-20} = 0b11;
@@ -2894,14 +2904,14 @@ multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
(VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
(i32 VTI.Unsigned), (i32 top),
(VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.DblPred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
@@ -2909,15 +2919,15 @@ foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
foreach top = [0, 1] in
defm : MVE_VSHLL_patterns<VTI, top>;
-class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
+class MVE_shift_imm_partial<Operand imm, string iname, string suffix, bits<2> vecsize>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", vecsize> {
Operand immediateType = imm;
}
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
- Operand imm, list<dag> pattern=[]>
- : MVE_shift_imm_partial<imm, iname, suffix> {
+ Operand imm, bits<2> vecsize>
+ : MVE_shift_imm_partial<imm, iname, suffix, vecsize> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2932,35 +2942,35 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let retainsPreviousHalfElement = 1;
}
-def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
+def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
+def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
+def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
-def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
+def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
+def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
+def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
+def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
+def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
- Operand imm, list<dag> pattern=[]>
- : MVE_shift_imm_partial<imm, iname, suffix> {
+ Operand imm, bits<2> vecsize>
+ : MVE_shift_imm_partial<imm, iname, suffix, vecsize> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2976,42 +2986,42 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
+ "vqrshrunb", "s16", 0b1, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
- "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
+ "vqrshrunt", "s16", 0b1, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
+ "vqrshrunb", "s32", 0b1, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
- "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
+ "vqrshrunt", "s32", 0b1, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
- "vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
+ "vqshrunb", "s16", 0b0, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
- "vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
+ "vqshrunt", "s16", 0b0, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
- "vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
+ "vqshrunb", "s32", 0b0, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
- "vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
+ "vqshrunt", "s32", 0b0, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
- Operand imm, list<dag> pattern=[]>
- : MVE_shift_imm_partial<imm, iname, suffix> {
+ Operand imm, bits<2> vecsize>
+ : MVE_shift_imm_partial<imm, iname, suffix, vecsize> {
bits<5> imm;
let Inst{25-23} = 0b101;
@@ -3026,19 +3036,19 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
- def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
+ def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8, 0b01> {
let Inst{28} = 0b0;
let Inst{20-19} = 0b01;
}
- def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
+ def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8, 0b01> {
let Inst{28} = 0b1;
let Inst{20-19} = 0b01;
}
- def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
+ def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16, 0b10> {
let Inst{28} = 0b0;
let Inst{20} = 0b1;
}
- def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
+ def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16, 0b10> {
let Inst{28} = 0b1;
let Inst{20} = 0b1;
}
@@ -3062,7 +3072,7 @@ multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
(OutVTI.Vec outparams)>;
def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
(InVTI.Pred VCCR:$pred)))),
- (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+ (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred, zero_reg)))>;
}
defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
@@ -3113,7 +3123,7 @@ defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
class MVE_shift_by_vec<string iname, string suffix, bit U,
bits<2> size, bit bit_4, bit bit_8>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
- iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
+ iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", size, []> {
// Shift instructions which take a vector of shift counts
bits<4> Qd;
bits<4> Qm;
@@ -3152,7 +3162,7 @@ multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
(i32 q), (i32 r), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -3188,8 +3198,8 @@ let Predicates = [HasMVEInt] in {
class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -3212,10 +3222,10 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
dag unsignedFlag = (?);
}
-class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType, bits<2> vecsize>
: MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
- "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
+ "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src", vecsize> {
bits<6> imm;
let Inst{28} = 0b1;
let Inst{25-24} = 0b11;
@@ -3227,27 +3237,27 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
Operand immediateType = immType;
}
-def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8, 0b00> {
let Inst{21-19} = 0b001;
}
-def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16, 0b01> {
let Inst{21-20} = 0b01;
}
-def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32, 0b10> {
let Inst{21} = 0b1;
}
-def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7, 0b00> {
let Inst{21-19} = 0b001;
}
-def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15, 0b01> {
let Inst{21-20} = 0b01;
}
-def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31, 0b10> {
let Inst{21} = 0b1;
}
@@ -3263,7 +3273,7 @@ multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
(VTI.Vec outparams)>;
def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
- (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+ (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred, zero_reg)))>;
}
defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
@@ -3276,7 +3286,7 @@ defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", VTI_.Size> {
bits<6> imm;
let Inst{28} = VTI_.Unsigned;
@@ -3316,7 +3326,7 @@ let unpred_int = int_arm_mve_vqshl_imm,
class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", VTI_.Size> {
bits<6> imm;
let Inst{28} = 0b1;
@@ -3346,7 +3356,7 @@ let unpred_int = int_arm_mve_vqshlu_imm,
class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", VTI_.Size> {
bits<6> imm;
let Inst{28} = VTI_.Unsigned;
@@ -3400,7 +3410,7 @@ multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
(inst.VTI.Vec MQPR:$inactive)))),
(inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
inst.immediateType:$imm,
- ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
+ ARMVCCThen, (inst.VTI.Pred VCCR:$mask), zero_reg,
(inst.VTI.Vec MQPR:$inactive)))>;
}
@@ -3420,10 +3430,10 @@ defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
-class MVE_VSHR_imm<string suffix, dag imm>
+class MVE_VSHR_imm<string suffix, dag imm, bits<2> vecsize>
: MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", vecsize> {
bits<6> imm;
let Inst{25-24} = 0b11;
@@ -3431,40 +3441,40 @@ class MVE_VSHR_imm<string suffix, dag imm>
let Inst{10-8} = 0b000;
}
-def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
+def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm), 0b00> {
let Inst{28} = 0b0;
let Inst{21-19} = 0b001;
}
-def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
+def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm), 0b00> {
let Inst{28} = 0b1;
let Inst{21-19} = 0b001;
}
-def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
+def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm), 0b01> {
let Inst{28} = 0b0;
let Inst{21-20} = 0b01;
}
-def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
+def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm), 0b01> {
let Inst{28} = 0b1;
let Inst{21-20} = 0b01;
}
-def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
+def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm), 0b10> {
let Inst{28} = 0b0;
let Inst{21} = 0b1;
}
-def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
+def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm), 0b10> {
let Inst{28} = 0b1;
let Inst{21} = 0b1;
}
-class MVE_VSHL_imm<string suffix, dag imm>
+class MVE_VSHL_imm<string suffix, dag imm, bits<2> vecsize>
: MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", vecsize> {
bits<6> imm;
let Inst{28} = 0b0;
@@ -3473,15 +3483,15 @@ class MVE_VSHL_imm<string suffix, dag imm>
let Inst{10-8} = 0b101;
}
-def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
+def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm), 0b00> {
let Inst{21-19} = 0b001;
}
-def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
+def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm), 0b01> {
let Inst{21-20} = 0b01;
}
-def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
+def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm), 0b10> {
let Inst{21} = 0b1;
}
@@ -3497,7 +3507,7 @@ multiclass MVE_immediate_shift_patterns_inner<
(pred_int (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive)))),
(VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -3525,8 +3535,8 @@ let Predicates = [HasMVEInt] in {
// start of MVE Floating Point instructions
class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
- vpred_ops vpred, string cstr, list<dag> pattern=[]>
- : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ vpred_ops vpred, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qm;
let Inst{12} = 0b0;
@@ -3539,7 +3549,7 @@ class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
list<dag> pattern=[]>
: MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -3568,7 +3578,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
@@ -3586,16 +3596,16 @@ defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
class MVEFloatArithNeon<string iname, string suffix, bit size,
dag oops, dag iops, string ops,
- vpred_ops vpred, string cstr, list<dag> pattern=[]>
- : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
+ vpred_ops vpred, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, vecsize, pattern> {
let Inst{20} = size;
let Inst{16} = 0b0;
}
-class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
- : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
+class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<iname, suffix, size{0}, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
- pattern> {
+ size, pattern> {
bits<4> Qd;
bits<4> Qn;
@@ -3611,9 +3621,9 @@ class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
+multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI,
SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
+ def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3622,15 +3632,15 @@ multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
}
multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
- : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
+ : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated>;
defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
-class MVE_VCMLA<string suffix, bit size>
- : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
+class MVE_VCMLA<string suffix, bits<2> size>
+ : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
bits<4> Qd;
bits<4> Qn;
bits<2> rot;
@@ -3647,8 +3657,8 @@ class MVE_VCMLA<string suffix, bit size>
let Inst{4} = 0b0;
}
-multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
- def "" : MVE_VCMLA<VTI.Suffix, size>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3665,21 +3675,21 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
(VTI.Vec MQPR:$Qm), imm:$rot,
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
-defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>;
-defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>;
+defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
-class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
+class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
bit bit_8, bit bit_21, dag iops=(ins),
vpred_ops vpred=vpred_r, string cstr="",
list<dag> pattern=[]>
- : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
+ : MVEFloatArithNeon<iname, suffix, size{0}, (outs MQPR:$Qd),
!con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
- vpred, cstr, pattern> {
+ vpred, cstr, size, pattern> {
bits<4> Qd;
bits<4> Qn;
@@ -3697,7 +3707,7 @@ class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
}
multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
- def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0b1, 0b0, fms,
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0b1, 0b0, fms,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = int_arm_mve_fma_predicated;
@@ -3713,20 +3723,20 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma (fneg m1), m2, add)),
add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
} else {
def : Pat<(VTI.Vec (fma m1, m2, add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma m1, m2, add)),
add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
}
}
}
@@ -3738,7 +3748,7 @@ defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
let validForTailPredication = 1;
}
defvar Inst = !cast<Instruction>(NAME);
@@ -3759,10 +3769,10 @@ defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
-class MVE_VCADD<string suffix, bit size, string cstr="">
- : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
+class MVE_VCADD<string suffix, bits<2> size, string cstr="">
+ : MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> {
bits<4> Qd;
bits<4> Qn;
bit rot;
@@ -3780,8 +3790,8 @@ class MVE_VCADD<string suffix, bit size, string cstr="">
let Inst{4} = 0b0;
}
-multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
- def "" : MVE_VCADD<VTI.Suffix, size, cstr>;
+multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, string cstr=""> {
+ def "" : MVE_VCADD<VTI.Suffix, VTI.Size, cstr>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3795,18 +3805,18 @@ multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
-defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>;
-defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">;
+defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16>;
+defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, "@earlyclobber $Qd">;
-class MVE_VABD_fp<string suffix, bit size>
+class MVE_VABD_fp<string suffix, bits<2> size>
: MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
- "$Qd, $Qn, $Qm", vpred_r, ""> {
+ "$Qd, $Qn, $Qm", vpred_r, "", size> {
bits<4> Qd;
bits<4> Qn;
@@ -3814,7 +3824,7 @@ class MVE_VABD_fp<string suffix, bit size>
let Inst{25-23} = 0b110;
let Inst{22} = Qd{3};
let Inst{21} = 0b1;
- let Inst{20} = size;
+ let Inst{20} = size{0};
let Inst{19-17} = Qn{2-0};
let Inst{16} = 0b0;
let Inst{15-13} = Qd{2-0};
@@ -3826,7 +3836,7 @@ class MVE_VABD_fp<string suffix, bit size>
multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
Intrinsic unpred_int, Intrinsic pred_int> {
- def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
+ def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3837,7 +3847,7 @@ multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
(i32 0), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -3846,7 +3856,7 @@ multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
: MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
-defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
let Predicates = [HasMVEFloat] in {
def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
@@ -3859,7 +3869,7 @@ class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
Operand imm_operand_type>
: MVE_float<"vcvt", suffix,
(outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
- "$Qd, $Qm, $imm6", vpred_r, "", []> {
+ "$Qd, $Qm, $imm6", vpred_r, "", !if(fsi, 0b10, 0b01), []> {
bits<4> Qd;
bits<6> imm6;
@@ -3913,7 +3923,7 @@ multiclass MVE_VCVT_fix_patterns<Instruction Inst, bit U, MVEVectorVTInfo DestVT
imm:$scale,
(DestVTI.Pred VCCR:$mask))),
(DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale,
- ARMVCCThen, (DestVTI.Pred VCCR:$mask),
+ ARMVCCThen, (DestVTI.Pred VCCR:$mask), zero_reg,
(DestVTI.Vec MQPR:$inactive)))>;
}
}
@@ -3942,7 +3952,7 @@ defm MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32_m<0b1, 0b1, MVE_v4u32, MVE_v4f32>;
class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
bits<2> rm, list<dag> pattern=[]>
: MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -3976,7 +3986,7 @@ multiclass MVE_VCVT_fp_int_anpm_inner<MVEVectorVTInfo Int, MVEVectorVTInfo Flt,
def : Pat<(Int.Vec (PredIntr (i32 Int.Unsigned), (Int.Vec MQPR:$inactive),
(Flt.Vec MQPR:$in), (Flt.Pred VCCR:$pred))),
(Int.Vec (Inst (Flt.Vec MQPR:$in), ARMVCCThen,
- (Flt.Pred VCCR:$pred), (Int.Vec MQPR:$inactive)))>;
+ (Flt.Pred VCCR:$pred), zero_reg, (Int.Vec MQPR:$inactive)))>;
}
}
@@ -3999,7 +4009,7 @@ defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4u32, MVE_v4f32>;
class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
list<dag> pattern=[]>
: MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -4032,7 +4042,7 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
(Src.Vec MQPR:$src), (i32 Unsigned),
(Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
(Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
- (Src.Pred VCCR:$mask),
+ (Src.Pred VCCR:$mask), zero_reg,
(Dest.Vec MQPR:$inactive)))>;
}
}
@@ -4048,10 +4058,21 @@ defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4i32 (fp_to_sint_sat v4f32:$src, i32)),
+ (MVE_VCVTs32f32z v4f32:$src)>;
+ def : Pat<(v4i32 (fp_to_uint_sat v4f32:$src, i32)),
+ (MVE_VCVTu32f32z v4f32:$src)>;
+ def : Pat<(v8i16 (fp_to_sint_sat v8f16:$src, i16)),
+ (MVE_VCVTs16f16z v8f16:$src)>;
+ def : Pat<(v8i16 (fp_to_uint_sat v8f16:$src, i16)),
+ (MVE_VCVTu16f16z v8f16:$src)>;
+}
+
class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
list<dag> pattern=[]>
: MVE_float<iname, suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -4077,7 +4098,7 @@ multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
(VTI.Vec (Inst $v))>;
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, zero_reg, $inactive))>;
}
}
@@ -4090,15 +4111,15 @@ defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
MVE_v4f32, 1>;
-class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
+class MVE_VMAXMINNMA<string iname, string suffix, bits<2> size, bit bit_12,
list<dag> pattern=[]>
: MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
- pattern> {
+ size, pattern> {
bits<4> Qd;
bits<4> Qm;
- let Inst{28} = size;
+ let Inst{28} = size{0};
let Inst{25-23} = 0b100;
let Inst{22} = Qd{3};
let Inst{21-16} = 0b111111;
@@ -4111,12 +4132,13 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
let Inst{0} = 0b1;
let isCommutable = 1;
+ let validForTailPredication = 1;
}
multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int,
bit bit_12> {
- def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size{0}, bit_12>;
+ def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
@@ -4129,7 +4151,7 @@ multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -4150,9 +4172,9 @@ defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
// start of MVE compares
class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
- VCMPPredicateOperand predtype, list<dag> pattern=[]>
+ VCMPPredicateOperand predtype, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
- NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
+ NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", vecsize, pattern> {
// Base class for comparing two vector registers
bits<3> fc;
bits<4> Qn;
@@ -4187,24 +4209,24 @@ class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
}
class MVE_VCMPqqf<string suffix, bit size>
- : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
+ : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp, !if(size, 0b01, 0b10)> {
let Predicates = [HasMVEFloat];
}
class MVE_VCMPqqi<string suffix, bits<2> size>
- : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i, size> {
let Inst{12} = 0b0;
let Inst{0} = 0b0;
}
class MVE_VCMPqqu<string suffix, bits<2> size>
- : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u, size> {
let Inst{12} = 0b0;
let Inst{0} = 0b1;
}
class MVE_VCMPqqs<string suffix, bits<2> size>
- : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s, size> {
let Inst{12} = 0b1;
}
@@ -4224,9 +4246,9 @@ def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
- VCMPPredicateOperand predtype, list<dag> pattern=[]>
+ VCMPPredicateOperand predtype, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
- NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
+ NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", vecsize, pattern> {
// Base class for comparing a vector register with a scalar
bits<3> fc;
bits<4> Qn;
@@ -4252,24 +4274,24 @@ class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
}
class MVE_VCMPqrf<string suffix, bit size>
- : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
+ : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp, !if(size, 0b01, 0b10)> {
let Predicates = [HasMVEFloat];
}
class MVE_VCMPqri<string suffix, bits<2> size>
- : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i, size> {
let Inst{12} = 0b0;
let Inst{5} = 0b0;
}
class MVE_VCMPqru<string suffix, bits<2> size>
- : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u, size> {
let Inst{12} = 0b0;
let Inst{5} = 0b1;
}
class MVE_VCMPqrs<string suffix, bits<2> size>
- : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s, size> {
let Inst{12} = 0b1;
}
@@ -4297,11 +4319,11 @@ multiclass unpred_vcmp_z<string suffix, PatLeaf fc> {
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
@@ -4320,18 +4342,18 @@ multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
multiclass unpred_vcmpf_z<PatLeaf fc> {
@@ -4341,9 +4363,9 @@ multiclass unpred_vcmpf_z<PatLeaf fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
multiclass unpred_vcmpf_r<PatLeaf fc> {
@@ -4358,14 +4380,14 @@ multiclass unpred_vcmpf_r<PatLeaf fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
- (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
- (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
let Predicates = [HasMVEInt] in {
@@ -4477,9 +4499,9 @@ let Predicates = [HasMVEInt] in {
class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
+ bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<oops, iops, NoItinerary, iname, suffix,
- ops, vpred, cstr, pattern> {
+ ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -4494,10 +4516,11 @@ class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
}
class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
- string suffix, bits<2> size, string cstr="", list<dag> pattern=[]>
+ string suffix, bits<2> size, string cstr="",
+ list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_n, "$Qd = $Qd_src"#cstr, pattern> {
+ vpred_n, "$Qd = $Qd_src"#cstr, size, pattern> {
bits<4> Qn;
let Inst{28} = subtract;
@@ -4528,7 +4551,7 @@ multiclass MVE_VQxDMLxDH_p<string iname, bit exch, bit round, bit subtract,
(? (VTI.Pred VCCR:$pred)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
(VTI.Vec MQPR:$c),
- ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg))>;
}
multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
@@ -4547,14 +4570,15 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
-class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
+class MVE_VCMUL<string iname, string suffix, bits<2> size, string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size,
+ []> {
bits<4> Qn;
bits<2> rot;
- let Inst{28} = size;
+ let Inst{28} = size{1};
let Inst{21-20} = 0b11;
let Inst{19-17} = Qn{2-0};
let Inst{16} = 0b0;
@@ -4567,8 +4591,8 @@ class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
}
multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
- bit size, string cstr=""> {
- def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>;
+ string cstr=""> {
+ def "" : MVE_VCMUL<iname, VTI.Suffix, VTI.Size, cstr>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -4582,20 +4606,20 @@ multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
-defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>;
-defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">;
+defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16>;
+defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, "@earlyclobber $Qd">;
class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
- bit T, string cstr, list<dag> pattern=[]>
+ bit T, string cstr, bits<2> vecsize, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, cstr, pattern> {
+ vpred_r, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -4614,9 +4638,9 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
SDPatternOperator unpred_op, Intrinsic pred_int,
- bit Top, string cstr=""> {
+ bit Top, bits<2> vecsize, string cstr=""> {
def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned,
- VTI.Size, Top, cstr>;
+ VTI.Size, Top, cstr, vecsize>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
@@ -4634,7 +4658,7 @@ multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
uflag, (? (i32 Top), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -4643,43 +4667,43 @@ multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
// the unsigned bit switches to encoding the size.
defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b01>;
defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b01>;
defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b10>;
defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b10>;
defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0,
+ int_arm_mve_mull_int_predicated, 0b0, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1,
+ int_arm_mve_mull_int_predicated, 0b1, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b01>;
defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b01>;
defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b10>;
defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b10>;
defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0,
+ int_arm_mve_mull_int_predicated, 0b0, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1,
+ int_arm_mve_mull_int_predicated, 0b1, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b0>;
+ int_arm_mve_mull_poly_predicated, 0b0, 0b01>;
defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b1>;
+ int_arm_mve_mull_poly_predicated, 0b1, 0b01>;
defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b0>;
+ int_arm_mve_mull_poly_predicated, 0b0, 0b10>;
defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b1>;
+ int_arm_mve_mull_poly_predicated, 0b1, 0b10>;
let Predicates = [HasMVEInt] in {
def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
@@ -4729,7 +4753,7 @@ class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, "", pattern> {
+ vpred_r, "", size, pattern> {
bits<4> Qn;
let Inst{28} = U;
@@ -4759,7 +4783,7 @@ multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op,
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -4794,7 +4818,7 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
bits<2> size, bit T, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
- vpred_n, "$Qd = $Qd_src", pattern> {
+ vpred_n, "$Qd = $Qd_src", !if(size, 0b10, 0b01), pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = 0b11;
@@ -4854,7 +4878,7 @@ multiclass MVE_VMOVN_p<Instruction Inst, bit top,
(InVTI.Pred VCCR:$pred))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
(InVTI.Vec MQPR:$Qm),
- ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (InVTI.Pred VCCR:$pred), zero_reg))>;
}
defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
@@ -4876,7 +4900,7 @@ multiclass MVE_VQMOVN_p<Instruction Inst, bit outU, bit inU, bit top,
(InVTI.Pred VCCR:$pred))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
(InVTI.Vec MQPR:$Qm),
- ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (InVTI.Pred VCCR:$pred), zero_reg))>;
}
defm : MVE_VQMOVN_p<MVE_VQMOVNs32bh, 0, 0, 0, MVE_v8i16, MVE_v4i32>;
@@ -4939,7 +4963,7 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
dag iops_extra, vpred_ops vpred, string cstr>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
!con(iops_extra, (ins MQPR:$Qm)), "$Qd, $Qm",
- vpred, cstr, []> {
+ vpred, cstr, 0b10, []> {
let Inst{28} = op;
let Inst{21-16} = 0b111111;
let Inst{12} = T;
@@ -4968,7 +4992,7 @@ multiclass MVE_VCVT_f2h_m<string iname, int half> {
(v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
(v4i1 VCCR:$mask))),
(v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
- ARMVCCThen, (v4i1 VCCR:$mask)))>;
+ ARMVCCThen, (v4i1 VCCR:$mask), zero_reg))>;
def : Pat<(v8f16 (MVEvcvtn (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
(v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
@@ -4986,7 +5010,7 @@ multiclass MVE_VCVT_h2f_m<string iname, int half> {
(v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
(v4i1 VCCR:$mask))),
(v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
- (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
+ (v4i1 VCCR:$mask), zero_reg, (v4f32 MQPR:$inactive)))>;
def : Pat<(v4f32 (MVEvcvtl (v8f16 MQPR:$Qm), (i32 half))),
(v4f32 (Inst (v8f16 MQPR:$Qm)))>;
@@ -5002,7 +5026,7 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> {
bits<4> Qn;
bit rot;
@@ -5032,7 +5056,7 @@ multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI,
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -5050,7 +5074,7 @@ class MVE_VADCSBC<string iname, bit I, bit subtract,
dag carryin, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
!con((ins MQPR:$Qn, MQPR:$Qm), carryin),
- "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+ "$Qd, $Qn, $Qm", vpred_r, "", 0b10, pattern> {
bits<4> Qn;
let Inst{28} = subtract;
@@ -5077,7 +5101,7 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
string cstr="", list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, cstr, pattern> {
+ vpred_r, cstr, !if(size, 0b10, 0b01), pattern> {
bits<4> Qn;
let Inst{28} = size;
@@ -5108,7 +5132,7 @@ multiclass MVE_VQDMULL_m<string iname, MVEVectorVTInfo VTI, bit size, bit T,
(i32 T), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -5125,10 +5149,9 @@ defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
// start of mve_qDest_rSrc
-class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
- string suffix, string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+class MVE_qr_base<dag oops, dag iops, string iname, string suffix, string ops,
+ vpred_ops vpred, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Rm;
@@ -5144,19 +5167,19 @@ class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
let Inst{3-0} = Rm{3-0};
}
-class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]>
+class MVE_qDest_rSrc<string iname, string suffix, string cstr="", bits<2> vecsize, list<dag> pattern=[]>
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
- NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
- pattern>;
+ iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
+ vecsize, pattern>;
-class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
+class MVE_qDestSrc_rSrc<string iname, string suffix, bits<2> vecsize, list<dag> pattern=[]>
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
- NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
- pattern>;
+ iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
+ vecsize, pattern>;
-class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
+class MVE_qDest_single_rSrc<string iname, string suffix, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
- suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
+ suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", vecsize, pattern> {
bits<4> Qd;
bits<4> Rm;
@@ -5187,14 +5210,14 @@ multiclass MVE_vec_scalar_int_pat_m<Instruction inst, MVEVectorVTInfo VTI,
(pred_op (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive)))),
(VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
bit bit_5, bit bit_12, bit bit_16, bit bit_28>
- : MVE_qDest_rSrc<iname, suffix, ""> {
+ : MVE_qDest_rSrc<iname, suffix, "", size> {
let Inst{28} = bit_28;
let Inst{21-20} = size;
@@ -5262,7 +5285,7 @@ defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
bit T, string cstr="", list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, cstr, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, cstr, !if(size, 0b10, 0b01), pattern> {
let Inst{28} = size;
let Inst{21-20} = 0b11;
@@ -5293,7 +5316,7 @@ multiclass MVE_VQDMULL_qr_m<string iname, MVEVectorVTInfo VTI, bit size,
(VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -5307,12 +5330,12 @@ defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<MVE_v8s16, 0b0>;
defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
class MVE_VxADDSUB_qr<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, bit subtract,
- list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ bit bit_28, bits<2> size, bit subtract,
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, "", vecsize, pattern> {
let Inst{28} = bit_28;
- let Inst{21-20} = bits_21_20;
+ let Inst{21-20} = size;
let Inst{16} = 0b0;
let Inst{12} = subtract;
let Inst{8} = 0b1;
@@ -5322,7 +5345,7 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
Intrinsic unpred_int, Intrinsic pred_int> {
- def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract>;
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract, VTI.Size>;
defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
VTI, unpred_int, pred_int, 1, 1>;
}
@@ -5351,7 +5374,7 @@ defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
!cast<Instruction>(NAME)>;
}
@@ -5370,7 +5393,7 @@ let Predicates = [HasMVEFloat] in {
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_7, bit bit_17, list<dag> pattern=[]>
- : MVE_qDest_single_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_single_rSrc<iname, suffix, size, pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b100;
@@ -5398,7 +5421,7 @@ multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
(i32 q), (i32 r), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
@@ -5432,7 +5455,7 @@ let Predicates = [HasMVEInt] in {
}
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", size, pattern> {
let Inst{28} = 0b1;
let Inst{21-20} = size;
@@ -5457,7 +5480,7 @@ multiclass MVE_VBRSR_pat_m<MVEVectorVTInfo VTI, Instruction Inst> {
(VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -5482,7 +5505,7 @@ let Predicates = [HasMVEFloat] in {
}
class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
- : MVE_qDest_rSrc<iname, suffix, ""> {
+ : MVE_qDest_rSrc<iname, suffix, "", size> {
let Inst{28} = 0b0;
let Inst{21-20} = size;
@@ -5506,11 +5529,11 @@ defm MVE_VMUL_qr_i16 : MVE_VMUL_qr_int_m<MVE_v8i16>;
defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m<MVE_v4i32>;
class MVE_VxxMUL_qr<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ bit bit_28, bits<2> size, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, "", vecsize, pattern> {
let Inst{28} = bit_28;
- let Inst{21-20} = bits_21_20;
+ let Inst{21-20} = size;
let Inst{16} = 0b1;
let Inst{12} = 0b0;
let Inst{8} = 0b0;
@@ -5520,7 +5543,7 @@ class MVE_VxxMUL_qr<string iname, string suffix,
multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
- def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
+ def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size, VTI.Size>;
let Predicates = [HasMVEInt] in {
defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
@@ -5546,7 +5569,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
let validForTailPredication = 1 in
- def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
+ def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
!cast<Instruction>(NAME)>;
}
@@ -5558,8 +5581,8 @@ let Predicates = [HasMVEFloat] in {
class MVE_VFMAMLA_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit S,
- list<dag> pattern=[]>
- : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_qDestSrc_rSrc<iname, suffix, vecsize, pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = bits_21_20;
@@ -5574,7 +5597,7 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
bit scalar_addend> {
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
- scalar_addend>;
+ scalar_addend, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
defvar v1 = (VTI.Vec MQPR:$v1);
@@ -5596,7 +5619,7 @@ multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
}
def : Pat<(VTI.Vec (pred_int v1, v2, s, pred)),
- (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred, zero_reg))>;
}
}
@@ -5616,7 +5639,7 @@ defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
bit scalar_addend> {
- def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend>;
+ def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = int_arm_mve_fma_predicated;
defvar v1 = (VTI.Vec MQPR:$v1);
@@ -5632,9 +5655,9 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma v1, v2, vs)),
v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred, zero_reg))>;
} else {
def : Pat<(VTI.Vec (fma v1, vs, v2)),
(VTI.Vec (Inst v2, v1, is))>;
@@ -5643,15 +5666,15 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma vs, v2, v1)),
v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma v2, vs, v1)),
v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
- (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred, zero_reg))>;
def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
- (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred, zero_reg))>;
}
}
}
@@ -5665,7 +5688,7 @@ let Predicates = [HasMVEFloat] in {
class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_5, bit bit_12, list<dag> pattern=[]>
- : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+ : MVE_qDestSrc_rSrc<iname, suffix, size, pattern> {
let Inst{28} = U;
let Inst{21-20} = size;
@@ -5691,7 +5714,7 @@ multiclass MVE_VQDMLAH_qr_multi<string iname, MVEVectorVTInfo VTI,
(i32 rGPR:$s), (VTI.Pred VCCR:$pred))),
(VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
(i32 rGPR:$s), ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
}
}
@@ -5710,7 +5733,7 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
ValueType VT, SDPatternOperator vxdup>
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
(ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
- iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
+ iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src", size,
[(set (VT MQPR:$Qd), (i32 tGPREven:$Rn),
(vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> {
bits<4> Qd;
@@ -5745,7 +5768,7 @@ class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
(ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
- iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
+ iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src", size,
pattern> {
bits<4> Qd;
bits<4> Rm;
@@ -5780,7 +5803,7 @@ def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
let isReMaterializable = 1 in
class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
- "$Rn", vpred_n, "", pattern> {
+ "$Rn", vpred_n, "", size, pattern> {
bits<4> Rn;
let Inst{28-27} = 0b10;
@@ -5804,7 +5827,7 @@ multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
def : Pat<(intr rGPR:$Rn),
(VTI.Pred (Inst rGPR:$Rn))>;
def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
- (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
+ (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask, zero_reg))>;
}
}
@@ -5837,6 +5860,7 @@ class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
let Inst{4} = idx2;
let Inst{3-0} = Rt{3-0};
+ let VecSize = 0b10;
let hasSideEffects = 0;
}
@@ -5925,7 +5949,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
bit load, dag Oops, dag loadIops, dag wbIops,
string iname, string ops,
string cstr, list<dag> pattern=[]>
- : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
+ : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, size, pattern> {
bits<4> VQd;
bits<4> Rn;
@@ -6037,13 +6061,13 @@ multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
def : Pat<(int_arm_mve_vst2q i32:$addr,
(VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
(!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
- (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ (REG_SEQUENCE MQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
t2_addr_offset_none:$addr)>;
foreach stage = [0,1] in
def : Pat<(i32 (MVEVST2UPD i32:$addr, (i32 32),
(VT MQPR:$v0), (VT MQPR:$v1), (i32 stage))),
(i32 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize#_wb)
- (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ (REG_SEQUENCE MQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
t2_addr_offset_none:$addr))>;
foreach stage = [0,1,2,3] in
@@ -6051,16 +6075,16 @@ multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
(VT MQPR:$v0), (VT MQPR:$v1),
(VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
(!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
- (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
- VT:$v2, qsub_2, VT:$v3, qsub_3),
+ (REG_SEQUENCE MQQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
t2_addr_offset_none:$addr)>;
foreach stage = [0,1,2,3] in
def : Pat<(i32 (MVEVST4UPD i32:$addr, (i32 64),
(VT MQPR:$v0), (VT MQPR:$v1),
(VT MQPR:$v2), (VT MQPR:$v3), (i32 stage))),
(i32 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize#_wb)
- (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
- VT:$v2, qsub_2, VT:$v3, qsub_3),
+ (REG_SEQUENCE MQQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
t2_addr_offset_none:$addr))>;
}
defm : MVE_vst24_patterns<8, v16i8>;
@@ -6123,8 +6147,8 @@ def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>;
// input values.
class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
dag oops, dag iops, string asm, string suffix,
- string ops, string cstr, list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, vecsize, pattern> {
bits<3> Qd;
let Inst{28} = U;
@@ -6160,12 +6184,14 @@ class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
dag oops, dag iops, string asm, string suffix,
IndexMode im, string ops, string cstr>
- : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
+ : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr, memsz.encoding> {
bits<12> addr;
let Inst{23} = addr{7};
let Inst{19-16} = addr{11-8};
let Inst{8-7} = memsz.encoding;
let Inst{6-0} = addr{6-0};
+
+ let IM = im;
}
// Contiguous, widening/narrowing
@@ -6173,7 +6199,7 @@ class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
bit P, bit W, bits<2> size, dag oops, dag iops,
string asm, string suffix, IndexMode im,
string ops, string cstr>
- : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
+ : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr, size> {
bits<11> addr;
let Inst{23} = addr{7};
let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
@@ -6290,7 +6316,7 @@ class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
bits<2> size, bit os, string asm, string suffix, int shift>
: MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
!con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
- asm, suffix, "$Qd, $addr", dir.cstr> {
+ asm, suffix, "$Qd, $addr", dir.cstr, size> {
bits<7> addr;
let Inst{23} = 0b1;
let Inst{19-16} = addr{6-3};
@@ -6336,9 +6362,9 @@ multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
(VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
- (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg))>;
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
- (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
@@ -6350,7 +6376,7 @@ multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
(VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
- (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
@@ -6365,9 +6391,9 @@ multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
(Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
- (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
- (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg)>;
}
}
multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
@@ -6379,7 +6405,7 @@ multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
(Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
- (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg)>;
}
}
@@ -6423,7 +6449,7 @@ class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
string asm, string wbAsm, string suffix, string cstr = "">
: MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
!con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
- asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
+ asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr, memsz.encoding> {
bits<11> addr;
let Inst{23} = addr{7};
let Inst{19-17} = addr{10-8};
@@ -6460,7 +6486,7 @@ multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
(DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset),
- ARMVCCThen, VCCR:$pred))>;
+ ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
@@ -6478,7 +6504,7 @@ multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
def : Pat<(int_arm_mve_vstr_scatter_base_predicated
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
(Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
- (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>;
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
(AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
@@ -6486,7 +6512,7 @@ multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
(AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
- (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>;
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
@@ -6532,7 +6558,7 @@ foreach suffix = memsz.suffixes in {
// end of MVE predicable load/store
class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
- : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
+ : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", size, pattern> {
bits<3> fc;
bits<4> Mk;
bits<3> Qn;
@@ -6642,7 +6668,7 @@ def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
: MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
- "", pattern> {
+ "", !if(size, 0b01, 0b10), pattern> {
bits<3> fc;
bits<4> Mk;
bits<3> Qn;
@@ -6695,7 +6721,7 @@ def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>;
def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>;
def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
- !strconcat("vpst", "${Mk}"), "", "", []> {
+ !strconcat("vpst", "${Mk}"), "", "", 0b00, []> {
bits<4> Mk;
let Inst{31-23} = 0b111111100;
@@ -6712,7 +6738,7 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
}
def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
- "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
+ "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", 0b00, []> {
bits<4> Qn;
bits<4> Qd;
bits<4> Qm;
@@ -6741,71 +6767,71 @@ def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
- (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
- (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
- (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
- (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
- (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>;
+ (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne), zero_reg))>;
def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne), zero_reg))>;
def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne), zero_reg))>;
def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne), zero_reg))>;
def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne), zero_reg))>;
// Pred <-> Int
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
}
let Predicates = [HasMVEFloat] in {
// Pred <-> Float
// 112 is 1.0 in float
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
// 2620 in 1.0 in half
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
// 240 is -1.0 in float
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
// 2748 is -1.0 in half
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
@@ -6818,7 +6844,7 @@ let Predicates = [HasMVEFloat] in {
}
def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
- "vpnot", "", "", vpred_n, "", []> {
+ "vpnot", "", "", vpred_n, "", 0b00, []> {
let Inst{31-0} = 0b11111110001100010000111101001101;
let Unpredictable{19-17} = 0b111;
let Unpredictable{12} = 0b1;
@@ -6930,6 +6956,37 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
}
+// Pseudo instructions for lowering MQQPR and MQQQQPR stack spills and reloads.
+// They are equivalent to VLDMDIA/VSTMDIA with a single reg, as opposed to multiple
+// dreg subregs.
+
+let Predicates = [HasMVEInt], AM = AddrMode4 in {
+let mayStore = 1, hasSideEffects = 0 in {
+ def MQQPRStore : t2PseudoInst<(outs), (ins MQQPR:$val, GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+ def MQQQQPRStore : t2PseudoInst<(outs), (ins MQQQQPR:$val, GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+}
+let mayLoad = 1, hasSideEffects = 0 in {
+ def MQQPRLoad : t2PseudoInst<(outs MQQPR:$val), (ins GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+ def MQQQQPRLoad : t2PseudoInst<(outs MQQQQPR:$val), (ins GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+}
+}
+
+// Pseudo for lowering MVE Q register COPYs. These will usually get converted
+// to a "MVE_VORR dst, src, src", but may behave differently in tail predicated
+// loops to ensure the whole register is copied, not a subset from a
+// tail-predicated MVE_VORR. In the event we cannot prove a MVE_VORR is valid,
+// it will become a pair of VMOVD instructions for each half of the Q register.
+let Predicates = [HasMVEInt], hasSideEffects = 0, isMoveReg = 1,
+ D = MVEDomain in {
+ def MQPRCopy : t2PseudoInst<(outs MQPR:$dst), (ins MQPR:$src),
+ 8, NoItinerary, []>;
+}
+
+
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
@@ -7142,7 +7199,7 @@ class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
PatFrag StoreKind, int shift>
: Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
@@ -7163,7 +7220,7 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
PatFrag LoadKind, int shift>
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
int shift> {
@@ -7184,7 +7241,7 @@ class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
PatFrag StoreKind, int shift>
: Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
- (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
@@ -7314,11 +7371,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
// Masked trunc stores
def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
- (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
- (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
- (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
// Ext loads
def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
@@ -7330,11 +7387,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
// Masked ext loads
def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
- (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
- (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
- (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
}
let Predicates = [HasMVEInt] in {
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 3ca6704c17b9..aaf3280ea150 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -2735,8 +2735,11 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
@@ -2789,19 +2792,22 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
- [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
// Same as N3VQIntnp but with Vd as a src register.
class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, Format f, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
- SDPatternOperator IntOp, bit Commutable>
+ SDPatternOperator IntOp>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
f, itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
(OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
+ let isCommutable = 0;
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -3118,7 +3124,10 @@ class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
@@ -4041,7 +4050,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- string baseOpc, SDNode OpNode> {
+ SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
@@ -4987,7 +4996,7 @@ class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
}
multiclass N3VCP8ComplexTied<bit op21, bit op4,
- string OpcodeStr, SDPatternOperator Op> {
+ string OpcodeStr> {
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
@@ -5007,7 +5016,7 @@ multiclass N3VCP8ComplexTied<bit op21, bit op4,
}
multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
- string OpcodeStr, SDPatternOperator Op> {
+ string OpcodeStr> {
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
(outs DPR:$Vd),
@@ -5032,8 +5041,7 @@ multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
// These instructions index by pairs of lanes, so the VectorIndexes are twice
// as wide as the data types.
-multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
- SDPatternOperator Op> {
+multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr> {
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
(outs DPR:$Vd),
@@ -5060,9 +5068,9 @@ multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
}
}
-defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
-defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
-defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
+defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla">;
+defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd">;
+defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla">;
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
@@ -5991,9 +5999,9 @@ def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",
ARMvshrsImm>;
-defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",
ARMvshruImm>;
// VSHLL : Vector Shift Left Long
@@ -6061,9 +6069,9 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",
NEONvrshrsImm>;
-defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",
NEONvrshruImm>;
// VRSHRN : Vector Rounding Shift Right and Narrow
@@ -6438,6 +6446,18 @@ def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
(VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane),
+ (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>;
+def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane),
+ (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>;
}
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
@@ -7074,7 +7094,7 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
(Ty QPR:$Vm), imm:$index)))]> {
@@ -7337,7 +7357,7 @@ let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
!strconcat("sha", op), "32", v4i32, v4i32, Int>;
class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
: N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
- !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>;
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>;
}
let Predicates = [HasV8, HasAES] in {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index ef07b2839bc9..bf717a4056e9 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -168,6 +168,7 @@ def thumb_cb_target : Operand<OtherVT> {
let EncoderMethod = "getThumbCBTargetOpValue";
let DecoderMethod = "DecodeThumbCmpBROperand";
}
+} // OperandType = "OPERAND_PCREL"
// t_addrmode_pc := <label> => pc + imm8 * 4
//
@@ -177,7 +178,6 @@ def t_addrmode_pc : MemOperand {
let PrintMethod = "printThumbLdrLabelOperand";
let ParserMatchClass = ThumbMemPC;
}
-}
// t_addrmode_rr := reg + reg
//
@@ -1520,6 +1520,7 @@ def tTBH_JT : tPseudoInst<(outs),
let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
[(set R0, ARMthread_pointer)]>,
+ Requires<[IsThumb, IsReadTPSoft]>,
Sched<[WriteBr]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index e7eed2a0bbb1..783db9dde17f 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -200,7 +200,7 @@ def t2addrmode_imm12 : MemOperand,
}
// t2ldrlabel := imm12
-def t2ldrlabel : Operand<i32> {
+def t2ldrlabel : MemOperand {
let EncoderMethod = "getAddrModeImm12OpValue";
let PrintMethod = "printThumbLdrLabelOperand";
}
@@ -1927,7 +1927,7 @@ def : InstAlias<"pli${p}.w\t$addr",
// pci variant is very similar to i12, but supports negative offsets
// from the PC. Only PLD and PLI have pci variants (not PLDW)
-class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
+class T2Iplpci<bits<1> inst, string opc> : T2Ipc<(outs), (ins t2ldrlabel:$addr),
IIC_Preload, opc, "\t$addr",
[(ARMPreload (ARMWrapper tconstpool:$addr),
(i32 0), (i32 inst))]>, Sched<[WritePreLd]> {
@@ -4274,8 +4274,9 @@ def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
//===----------------------------------------------------------------------===//
// Coprocessor load/store -- for disassembly only
//
-class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, NoItinerary, opc, asm, pattern> {
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern, AddrMode am = AddrModeNone>
+ : T2I<oops, iops, NoItinerary, opc, asm, pattern, am> {
let Inst{31-28} = op31_28;
let Inst{27-25} = 0b110;
}
@@ -4283,7 +4284,7 @@ class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm, list<dag
multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : T2CI<op31_28,
(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr", pattern> {
+ asm, "\t$cop, $CRd, $addr", pattern, AddrMode5> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -4670,6 +4671,9 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
}
+// Reading thread pointer from coprocessor register
+def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 3)>,
+ Requires<[IsThumb2, IsReadTPHard]>;
//===----------------------------------------------------------------------===//
// ARMv8.1 Privilege Access Never extension
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index bcd6433a579b..9d1bfa414dff 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1600,9 +1600,13 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+ def : VFPPat<(i32 (fp_to_sint_sat (f64 DPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+ def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
+ (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
}
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
@@ -1619,10 +1623,15 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+def : VFPPat<(i32 (fp_to_sint_sat SPR:$a, i32)),
+ (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)),
+ addrmode5:$ptr),
+ (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins HPR:$Sm),
@@ -1635,6 +1644,8 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
def : VFPNoNEONPat<(i32 (fp_to_sint (f16 HPR:$a))),
(COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
+def : VFPPat<(i32 (fp_to_sint_sat (f16 HPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
@@ -1647,9 +1658,13 @@ def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+ def : VFPPat<(i32 (fp_to_uint_sat (f64 DPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+ def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
+ (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
}
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
@@ -1666,10 +1681,15 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+def : VFPPat<(i32 (fp_to_uint_sat SPR:$a, i32)),
+ (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)),
+ addrmode5:$ptr),
+ (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins HPR:$Sm),
@@ -1682,6 +1702,8 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
def : VFPNoNEONPat<(i32 (fp_to_uint (f16 HPR:$a))),
(COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
+def : VFPPat<(i32 (fp_to_uint_sat (f16 HPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index fd06bfdf352c..6e259b1baf97 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -564,7 +564,7 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
}
// End of block was reached.
- if (MBB.succ_size() > 0) {
+ if (!MBB.succ_empty()) {
// FIXME: Because of a bug, live registers are sometimes missing from
// the successor blocks' live-in sets. This means we can't trust that
// information and *always* have to reset at the end of a block.
@@ -587,7 +587,7 @@ unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
}
for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
- if (!LiveRegs.contains(Reg))
+ if (LiveRegs.available(MF->getRegInfo(), Reg))
return Reg;
return 0;
}
@@ -2476,8 +2476,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
} else {
for (unsigned i = 0; i != NumMove; ++i) {
- MachineInstr *Op = Ops.back();
- Ops.pop_back();
+ MachineInstr *Op = Ops.pop_back_val();
MBB->splice(InsertPos, MBB, Op);
}
}
@@ -2811,6 +2810,7 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
.addImm(Offset)
.add(MI->getOperand(3))
.add(MI->getOperand(4))
+ .add(MI->getOperand(5))
.cloneMemRefs(*MI);
case ARMII::AddrModeT2_i8:
if (MI->mayLoad()) {
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index ea41442857f3..3874db5792d6 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -97,7 +97,15 @@ static bool isDomainMVE(MachineInstr *MI) {
return Domain == ARMII::DomainMVE;
}
+static int getVecSize(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::VecSize) >> ARMII::VecSizeShift;
+}
+
static bool shouldInspect(MachineInstr &MI) {
+ if (MI.isDebugInstr())
+ return false;
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
}
@@ -368,9 +376,11 @@ namespace {
MachineInstr *Dec = nullptr;
MachineInstr *End = nullptr;
MachineOperand TPNumElements;
- SmallVector<MachineInstr*, 4> VCTPs;
- SmallPtrSet<MachineInstr*, 4> ToRemove;
- SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
+ SmallVector<MachineInstr *, 4> VCTPs;
+ SmallPtrSet<MachineInstr *, 4> ToRemove;
+ SmallPtrSet<MachineInstr *, 4> BlockMasksToRecompute;
+ SmallPtrSet<MachineInstr *, 4> DoubleWidthResultInstrs;
+ SmallPtrSet<MachineInstr *, 4> VMOVCopies;
bool Revert = false;
bool CannotTailPredicate = false;
@@ -730,6 +740,20 @@ bool LowOverheadLoop::ValidateTailPredicate() {
return false;
}
+ // For any DoubleWidthResultInstrs we found whilst scanning instructions, they
+ // need to compute an output size that is smaller than the VCTP mask operates
+ // on. The VecSize of the DoubleWidthResult is the larger vector size - the
+ // size it extends into, so any VCTP VecSize <= is valid.
+ unsigned VCTPVecSize = getVecSize(*VCTP);
+ for (MachineInstr *MI : DoubleWidthResultInstrs) {
+ unsigned InstrVecSize = getVecSize(*MI);
+ if (InstrVecSize > VCTPVecSize) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Double width result larger than VCTP "
+ << "VecSize:\n" << *MI);
+ return false;
+ }
+ }
+
// Check that the value change of the element count is what we expect and
// that the predication will be equivalent. For this we need:
// NumElements = NumElements - VectorWidth. The sub will be a sub immediate
@@ -880,6 +904,10 @@ static bool producesFalseLanesZero(MachineInstr &MI,
continue;
if (!isRegInClass(MO, QPRs) && AllowScalars)
continue;
+ // Skip the lr predicate reg
+ int PIdx = llvm::findFirstVPTPredOperandIdx(MI);
+ if (PIdx != -1 && (int)MI.getOperandNo(&MO) == PIdx + 2)
+ continue;
// Check that this instruction will produce zeros in its false lanes:
// - If it only consumes false lanes zero or constant 0 (vmov #0)
@@ -927,6 +955,8 @@ bool LowOverheadLoop::ValidateLiveOuts() {
SmallPtrSet<MachineInstr *, 4> Predicated;
MachineBasicBlock *Header = ML.getHeader();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Validating Live outs\n");
+
for (auto &MI : *Header) {
if (!shouldInspect(MI))
continue;
@@ -944,12 +974,25 @@ bool LowOverheadLoop::ValidateLiveOuts() {
FalseLanesZero.insert(&MI);
else if (MI.getNumDefs() == 0)
continue;
- else if (!isPredicated && retainsOrReduces)
+ else if (!isPredicated && retainsOrReduces) {
+ LLVM_DEBUG(dbgs() << " Unpredicated instruction that retainsOrReduces: " << MI);
return false;
- else if (!isPredicated)
+ } else if (!isPredicated && MI.getOpcode() != ARM::MQPRCopy)
FalseLanesUnknown.insert(&MI);
}
+ LLVM_DEBUG({
+ dbgs() << " Predicated:\n";
+ for (auto *I : Predicated)
+ dbgs() << " " << *I;
+ dbgs() << " FalseLanesZero:\n";
+ for (auto *I : FalseLanesZero)
+ dbgs() << " " << *I;
+ dbgs() << " FalseLanesUnknown:\n";
+ for (auto *I : FalseLanesUnknown)
+ dbgs() << " " << *I;
+ });
+
auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
SmallPtrSetImpl<MachineInstr *> &Predicated) {
SmallPtrSet<MachineInstr *, 2> Uses;
@@ -973,7 +1016,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
if (!isRegInClass(MO, QPRs) || !MO.isDef())
continue;
if (!HasPredicatedUsers(MI, MO, Predicated)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
+ LLVM_DEBUG(dbgs() << " Found an unknown def of : "
<< TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
NonPredicated.insert(MI);
break;
@@ -993,8 +1036,10 @@ bool LowOverheadLoop::ValidateLiveOuts() {
for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
// TODO: Instead of blocking predication, we could move the vctp to the exit
// block and calculate it's operand there in or the preheader.
- if (RegMask.PhysReg == ARM::VPR)
+ if (RegMask.PhysReg == ARM::VPR) {
+ LLVM_DEBUG(dbgs() << " VPR is live in to the exit block.");
return false;
+ }
// Check Q-regs that are live in the exit blocks. We don't collect scalars
// because they won't be affected by lane predication.
if (QPRs->contains(RegMask.PhysReg))
@@ -1007,10 +1052,20 @@ bool LowOverheadLoop::ValidateLiveOuts() {
// any VPT predicated instruction is predicated upon VCTP. Any live-out
// instruction needs to be predicated, so check this here. The instructions
// in NonPredicated have been found to be a reduction that we can ensure its
- // legality.
- for (auto *MI : LiveOutMIs) {
- if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
+ // legality. Any MQPRCopy found will need to validate its input as if it was
+ // live out.
+ SmallVector<MachineInstr *> Worklist(LiveOutMIs.begin(), LiveOutMIs.end());
+ while (!Worklist.empty()) {
+ MachineInstr *MI = Worklist.pop_back_val();
+ if (MI->getOpcode() == ARM::MQPRCopy) {
+ VMOVCopies.insert(MI);
+ MachineInstr *CopySrc =
+ RDA.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg());
+ if (CopySrc)
+ Worklist.push_back(CopySrc);
+ } else if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
+ LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);
+ VMOVCopies.clear();
return false;
}
}
@@ -1121,7 +1176,7 @@ static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML) {
return false;
int FI = GetFrameIndex(MI->memoperands().front());
- MachineFrameInfo FrameInfo = MI->getParent()->getParent()->getFrameInfo();
+ auto &FrameInfo = MI->getParent()->getParent()->getFrameInfo();
if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
return false;
@@ -1211,8 +1266,15 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
bool RequiresExplicitPredication =
(MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
if (isDomainMVE(MI) && RequiresExplicitPredication) {
- LLVM_DEBUG(if (!IsUse)
- dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
+ if (MI->getOpcode() == ARM::MQPRCopy)
+ return true;
+ if (!IsUse && producesDoubleWidthResult(*MI)) {
+ DoubleWidthResultInstrs.insert(MI);
+ return true;
+ }
+
+ LLVM_DEBUG(if (!IsUse) dbgs()
+ << "ARM Loops: Can't tail predicate: " << *MI);
return IsUse;
}
@@ -1689,6 +1751,31 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
}
};
+ // And VMOVCopies need to become 2xVMOVD for tail predication to be valid.
+ // Anything other MQPRCopy can be converted to MVE_VORR later on.
+ auto ExpandVMOVCopies = [this](SmallPtrSet<MachineInstr *, 4> &VMOVCopies) {
+ for (auto *MI : VMOVCopies) {
+ LLVM_DEBUG(dbgs() << "Converting copy to VMOVD: " << *MI);
+ assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");
+ MachineBasicBlock *MBB = MI->getParent();
+ Register Dst = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+ auto MIB1 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),
+ ARM::D0 + (Dst - ARM::Q0) * 2)
+ .addReg(ARM::D0 + (Src - ARM::Q0) * 2)
+ .add(predOps(ARMCC::AL));
+ (void)MIB1;
+ LLVM_DEBUG(dbgs() << " into " << *MIB1);
+ auto MIB2 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),
+ ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
+ .addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
+ .add(predOps(ARMCC::AL));
+ LLVM_DEBUG(dbgs() << " and " << *MIB2);
+ (void)MIB2;
+ MI->eraseFromParent();
+ }
+ };
+
if (LoLoop.Revert) {
if (isWhileLoopStart(*LoLoop.Start))
RevertWhile(LoLoop.Start);
@@ -1699,6 +1786,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
else
RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
} else {
+ ExpandVMOVCopies(LoLoop.VMOVCopies);
LoLoop.Start = ExpandLoopStart(LoLoop);
if (LoLoop.Start)
RemoveDeadBranch(LoLoop.Start);
@@ -1743,6 +1831,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
SmallVector<MachineInstr*, 4> Decs;
SmallVector<MachineInstr*, 4> Ends;
SmallVector<MachineInstr *, 4> EndDecs;
+ SmallVector<MachineInstr *, 4> MQPRCopies;
for (auto &I : MBB) {
if (isLoopStart(I))
@@ -1753,9 +1842,12 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
Ends.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopEndDec)
EndDecs.push_back(&I);
+ else if (I.getOpcode() == ARM::MQPRCopy)
+ MQPRCopies.push_back(&I);
}
- if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
+ if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty() &&
+ MQPRCopies.empty())
continue;
Changed = true;
@@ -1773,6 +1865,17 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
RevertLoopEnd(End);
for (auto *End : EndDecs)
RevertLoopEndDec(End);
+ for (auto *MI : MQPRCopies) {
+ LLVM_DEBUG(dbgs() << "Converting copy to VORR: " << *MI);
+ assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");
+ MachineBasicBlock *MBB = MI->getParent();
+ auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::MVE_VORR),
+ MI->getOperand(0).getReg())
+ .add(MI->getOperand(1))
+ .add(MI->getOperand(1));
+ addUnpredicatedMveVpredROp(MIB, MI->getOperand(0).getReg());
+ MI->eraseFromParent();
+ }
}
return Changed;
}
diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index e4b022968431..2030fab6217d 100644
--- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -194,7 +194,7 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
// BLX ip
// POP{ r0, lr }
//
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index b37988232127..9752b3166b45 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -66,6 +66,8 @@ def ssub_10 : ComposedSubRegIndex<dsub_5, ssub_0>;
def ssub_11 : ComposedSubRegIndex<dsub_5, ssub_1>;
def ssub_12 : ComposedSubRegIndex<dsub_6, ssub_0>;
def ssub_13 : ComposedSubRegIndex<dsub_6, ssub_1>;
+def ssub_14 : ComposedSubRegIndex<dsub_7, ssub_0>;
+def ssub_15 : ComposedSubRegIndex<dsub_7, ssub_1>;
def gsub_0 : SubRegIndex<32>;
def gsub_1 : SubRegIndex<32, 32>;
@@ -555,6 +557,9 @@ def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
let AltOrderSelect = [{ return 1; }];
}
+// Same as QQPR but for MVE, containing the 7 register pairs made up from Q0-Q7.
+def MQQPR : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 7)>;
+
// Tuples of 4 D regs that isn't also a pair of Q regs.
def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
[(decimate (shl DPR, 1), 2),
@@ -578,6 +583,9 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
let AltOrderSelect = [{ return 1; }];
}
+// Same as QQPR but for MVE, containing the 5 register quads made up from Q0-Q7.
+def MQQQQPR : RegisterClass<"ARM", [v8i64], 256, (trunc QQQQPR, 5)>;
+
// Pseudo-registers representing 2-spaced consecutive D registers.
def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 90f1b693fec6..36c4bbaafcbf 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -295,6 +295,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexA77:
case CortexA78:
case CortexA78C:
+ case CortexA710:
case CortexR4:
case CortexR4F:
case CortexR5:
@@ -389,7 +390,13 @@ bool ARMSubtarget::enableMachineScheduler() const {
return useMachineScheduler();
}
-bool ARMSubtarget::enableSubRegLiveness() const { return EnableSubRegLiveness; }
+bool ARMSubtarget::enableSubRegLiveness() const {
+ if (EnableSubRegLiveness.getNumOccurrences())
+ return EnableSubRegLiveness;
+ // Enable SubRegLiveness for MVE to better optimize s subregs for mqpr regs
+ // and q subregs for qqqqpr regs.
+ return hasMVEIntegerOps();
+}
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index a8a9ae66b4ab..5e1217b6a468 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -65,6 +65,7 @@ protected:
CortexA77,
CortexA78,
CortexA78C,
+ CortexA710,
CortexA8,
CortexA9,
CortexM3,
@@ -124,6 +125,9 @@ protected:
ARMv8mMainline,
ARMv8r,
ARMv81mMainline,
+ ARMv9a,
+ ARMv91a,
+ ARMv92a,
};
public:
@@ -170,6 +174,9 @@ protected:
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8_7aOps = false;
+ bool HasV9_0aOps = false;
+ bool HasV9_1aOps = false;
+ bool HasV9_2aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
@@ -468,6 +475,9 @@ protected:
/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
bool NegativeImmediates = true;
+ /// Mitigate against the cve-2021-35465 security vulnurability.
+ bool FixCMSE_CVE_2021_35465 = false;
+
/// Harden against Straight Line Speculation for Returns and Indirect
/// Branches.
bool HardenSlsRetBr = false;
@@ -618,6 +628,9 @@ public:
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_6aOps() const { return HasV8_6aOps; }
bool hasV8_7aOps() const { return HasV8_7aOps; }
+ bool hasV9_0aOps() const { return HasV9_0aOps; }
+ bool hasV9_1aOps() const { return HasV9_1aOps; }
+ bool hasV9_2aOps() const { return HasV9_2aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
@@ -780,14 +793,7 @@ public:
// ARM Targets that support EHABI exception handling standard
// Darwin uses SjLj. Other targets might need more checks.
bool isTargetEHABICompatible() const {
- return (TargetTriple.getEnvironment() == Triple::EABI ||
- TargetTriple.getEnvironment() == Triple::GNUEABI ||
- TargetTriple.getEnvironment() == Triple::MuslEABI ||
- TargetTriple.getEnvironment() == Triple::EABIHF ||
- TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
- TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
- isTargetAndroid()) &&
- !isTargetDarwin() && !isTargetWindows();
+ return TargetTriple.isTargetEHABICompatible();
}
bool isTargetHardFloat() const;
@@ -934,6 +940,8 @@ public:
unsigned PhysReg) const override;
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
+ bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; }
+
bool hardenSlsRetBr() const { return HardenSlsRetBr; }
bool hardenSlsBlr() const { return HardenSlsBlr; }
bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index ae7ea7c2f415..833c7effd31c 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,12 +38,12 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/CFGuard.h"
@@ -541,7 +541,6 @@ void ARMPassConfig::addPreSched2() {
return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}));
}
- addPass(createMVEVPTBlockPass());
addPass(createThumb2ITBlockPass());
// Add both scheduling passes to give the subtarget an opportunity to pick
@@ -551,6 +550,7 @@ void ARMPassConfig::addPreSched2() {
addPass(&PostRASchedulerID);
}
+ addPass(createMVEVPTBlockPass());
addPass(createARMIndirectThunks());
addPass(createARMSLSHardeningPass());
}
diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index b03bff92f373..8c5438f7093b 100644
--- a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -54,6 +54,16 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
}
}
+const MCRegister ARMElfTargetObjectFile::getStaticBase() const {
+ return ARM::R9;
+}
+
+const MCExpr *ARMElfTargetObjectFile::
+getIndirectSymViaRWPI(const MCSymbol *Sym) const {
+ return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_SBREL,
+ getContext());
+}
+
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index 7b15dcc61f56..8b13198fe144 100644
--- a/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegister.h"
namespace llvm {
@@ -23,6 +24,10 @@ public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+ const MCRegister getStaticBase() const override;
+
+ const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const override;
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding,
const TargetMachine &TM,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cf7456e9e4f5..88de84a4fd78 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -149,7 +149,7 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Align MemAlign =
getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree());
- unsigned AlignArg = II.getNumArgOperands() - 1;
+ unsigned AlignArg = II.arg_size() - 1;
Value *AlignArgOp = II.getArgOperand(AlignArg);
MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
if (Align && *Align < MemAlign) {
@@ -175,7 +175,7 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
PatternMatch::m_Constant(XorMask))) &&
II.getType() == ArgArg->getType()) {
if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
- if (CI->getValue().trunc(16).isAllOnesValue()) {
+ if (CI->getValue().trunc(16).isAllOnes()) {
auto TrueVector = IC.Builder.CreateVectorSplat(
cast<FixedVectorType>(II.getType())->getNumElements(),
IC.Builder.getTrue());
@@ -248,6 +248,48 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return None;
}
+Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
+ APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+
+ // Compute the demanded bits for a narrowing MVE intrinsic. The TopOpc is the
+ // opcode specifying a Top/Bottom instruction, which can change between
+ // instructions.
+ auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {
+ unsigned NumElts = cast<FixedVectorType>(II.getType())->getNumElements();
+ unsigned IsTop = cast<ConstantInt>(II.getOperand(TopOpc))->getZExtValue();
+
+ // The only odd/even lanes of operand 0 will only be demanded depending
+ // on whether this is a top/bottom instruction.
+ APInt DemandedElts =
+ APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
+ SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
+ // The other lanes will be defined from the inserted elements.
+ UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
+ return None;
+ };
+
+ switch (II.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::arm_mve_vcvt_narrow:
+ SimplifyNarrowInstrTopBottom(2);
+ break;
+ case Intrinsic::arm_mve_vqmovn:
+ SimplifyNarrowInstrTopBottom(4);
+ break;
+ case Intrinsic::arm_mve_vshrn:
+ SimplifyNarrowInstrTopBottom(7);
+ break;
+ }
+
+ return None;
+}
+
InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
@@ -300,7 +342,7 @@ static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
if (InstSPF == SPF_SMAX &&
PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
- C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) {
+ C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
auto isSSatMin = [&](Value *MinInst) {
if (isa<SelectInst>(MinInst)) {
@@ -368,7 +410,7 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
}
// xor a, -1 can always be folded to MVN
- if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
+ if (Opcode == Instruction::Xor && Imm.isAllOnes())
return 0;
// Ensures negative constant of min(max()) or max(min()) patterns that
@@ -381,6 +423,14 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return 0;
}
+ // We can convert <= -1 to < 0, which is generally quite cheap.
+ if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
+ ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE)
+ return std::min(getIntImmCost(Imm, Ty, CostKind),
+ getIntImmCost(Imm + 1, Ty, CostKind));
+ }
+
return getIntImmCost(Imm, Ty, CostKind);
}
@@ -1623,13 +1673,24 @@ ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
TTI::TargetCostKind CostKind) {
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
+
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
- if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
- (LT.second == MVT::v8i16 &&
- ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
- (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64))
+
+ // The legal cases are:
+ // VADDV u/s 8/16/32
+ // VMLAV u/s 8/16/32
+ // VADDLV u/s 32
+ // VMLALV u/s 16/32
+ // Codegen currently cannot always handle larger than legal vectors very
+ // well, especially for predicated reductions where the mask needs to be
+ // split, so restrict to 128bit or smaller input types.
+ unsigned RevVTSize = ResVT.getSizeInBits();
+ if (ValVT.getSizeInBits() <= 128 &&
+ ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
+ (LT.second == MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) ||
+ (LT.second == MVT::v4i32 && RevVTSize <= 64)))
return ST->getMVEVectorCostFactor(CostKind) * LT.first;
}
@@ -1949,6 +2010,20 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
// we simply count the icmps, i.e. there should only be 1 for the backedge.
if (isa<ICmpInst>(&I) && ++ICmpCount > 1)
return false;
+ // FIXME: This is a workaround for poor cost modelling. Min/Max intrinsics are
+ // not currently canonical, but soon will be. Code without them uses icmp, and
+ // so is not tail predicated as per the condition above. In order to get the
+ // same performance we treat min and max the same as an icmp for tailpred
+ // purposes for the moment (we often rely on non-tailpred and higher VF's to
+ // pick more optimial instructions like VQDMULH. They need to be recognized
+ // directly by the vectorizer).
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if ((II->getIntrinsicID() == Intrinsic::smin ||
+ II->getIntrinsicID() == Intrinsic::smax ||
+ II->getIntrinsicID() == Intrinsic::umin ||
+ II->getIntrinsicID() == Intrinsic::umax) &&
+ ++ICmpCount > 1)
+ return false;
if (isa<FCmpInst>(&I))
return false;
@@ -2035,8 +2110,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return false;
}
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
- Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
- int64_t NextStride = getPtrStride(PSE, Ptr, L);
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ Type *AccessTy = getLoadStoreType(&I);
+ int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
@@ -2055,8 +2131,7 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
// least if they are loop invariant.
// TODO: Loop variant strides should in theory work, too, but
// this requires further testing.
- const SCEV *PtrScev =
- replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr);
+ const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr);
if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
if (PSE.getSE()->isLoopInvariant(Step, L))
@@ -2135,14 +2210,15 @@ bool ARMTTIImpl::emitGetActiveLaneMask() const {
return true;
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Enable Upper bound unrolling universally, not dependant upon the conditions
// below.
UP.UpperBound = true;
// Only currently enable these preferences for M-Class cores.
if (!ST->isMClass())
- return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
+ return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
// Disable loop unrolling for Oz and Os.
UP.OptSizeThreshold = 0;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 889940534ce5..a56886d4fc11 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -120,6 +120,11 @@ public:
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
/// \name Scalar TTI Implementations
/// @{
@@ -226,8 +231,7 @@ public:
const SCEV *Ptr);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -246,8 +250,7 @@ public:
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
@@ -279,7 +282,8 @@ public:
DominatorTree *DT,
const LoopAccessInfo *LAI);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
bool emitGetActiveLaneMask() const;
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index e410fe0aeff2..64d2e1bfa9b2 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6,15 +6,15 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMFeatures.h"
#include "ARMBaseInstrInfo.h"
-#include "Utils/ARMBaseInfo.h"
+#include "ARMFeatures.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMInstPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "TargetInfo/ARMTargetInfo.h"
+#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
@@ -22,8 +22,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
@@ -44,6 +44,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
#include "llvm/Support/Casting.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -2478,14 +2478,15 @@ public:
}
void addVPTPredNOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
+ assert(N == 3 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(unsigned(getVPTPred())));
unsigned RegNum = getVPTPred() == ARMVCC::None ? 0: ARM::P0;
Inst.addOperand(MCOperand::createReg(RegNum));
+ Inst.addOperand(MCOperand::createReg(0));
}
void addVPTPredROperands(MCInst &Inst, unsigned N) const {
- assert(N == 3 && "Invalid number of operands!");
+ assert(N == 4 && "Invalid number of operands!");
addVPTPredNOperands(Inst, N-1);
unsigned RegNum;
if (getVPTPred() == ARMVCC::None) {
@@ -3343,16 +3344,16 @@ public:
// regs) or q0-q4 (for 4)
//
// The MVE instructions taking a register range of this kind will
- // need an operand in the QQPR or QQQQPR class, representing the
+ // need an operand in the MQQPR or MQQQQPR class, representing the
// entire range as a unit. So we must translate into that class,
// by finding the index of the base register in the MQPR reg
// class, and returning the super-register at the corresponding
// index in the target class.
const MCRegisterClass *RC_in = &ARMMCRegisterClasses[ARM::MQPRRegClassID];
- const MCRegisterClass *RC_out = (VectorList.Count == 2) ?
- &ARMMCRegisterClasses[ARM::QQPRRegClassID] :
- &ARMMCRegisterClasses[ARM::QQQQPRRegClassID];
+ const MCRegisterClass *RC_out =
+ (VectorList.Count == 2) ? &ARMMCRegisterClasses[ARM::MQQPRRegClassID]
+ : &ARMMCRegisterClasses[ARM::MQQQQPRRegClassID];
unsigned I, E = RC_out->getNumRegs();
for (I = 0; I < E; I++)
@@ -10960,7 +10961,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Only after the instruction is fully processed, we can validate it
if (wasInITBlock && hasV8Ops() && isThumb() &&
- !isV8EligibleForIT(&Inst)) {
+ !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) {
Warning(IDLoc, "deprecated instruction in IT block");
}
}
@@ -11777,13 +11778,13 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
return true;
if (!Section) {
- getStreamer().InitSections(false);
+ getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign())
- getStreamer().emitCodeAlignment(2);
+ getStreamer().emitCodeAlignment(2, &getSTI());
else
getStreamer().emitValueToAlignment(2);
@@ -11985,7 +11986,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign())
- getStreamer().emitCodeAlignment(4, 0);
+ getStreamer().emitCodeAlignment(4, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(4, 0, 1, 0);
return false;
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 51fd45034534..9caef9f09ea9 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -19,10 +19,10 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -227,10 +227,12 @@ static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
@@ -852,12 +854,15 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
VCCI = MI.insert(VCCI, MCOperand::createImm(VCC));
++VCCI;
if (VCC == ARMVCC::None)
- MI.insert(VCCI, MCOperand::createReg(0));
+ VCCI = MI.insert(VCCI, MCOperand::createReg(0));
else
- MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+ VCCI = MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+ ++VCCI;
+ VCCI = MI.insert(VCCI, MCOperand::createReg(0));
+ ++VCCI;
if (OpInfo[VCCPos].OperandType == ARM::OPERAND_VPRED_R) {
int TiedOp = ARMInsts[MI.getOpcode()].getOperandConstraint(
- VCCPos + 2, MCOI::TIED_TO);
+ VCCPos + 3, MCOI::TIED_TO);
assert(TiedOp >= 0 &&
"Inactive register in vpred_r is not tied to an output!");
// Copy the operand to ensure it's not invalidated when MI grows.
@@ -6154,9 +6159,9 @@ static const uint16_t QQPRDecoderTable[] = {
ARM::Q4_Q5, ARM::Q5_Q6, ARM::Q6_Q7
};
-static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 6)
return MCDisassembler::Fail;
@@ -6170,9 +6175,9 @@ static const uint16_t QQQQPRDecoderTable[] = {
ARM::Q3_Q4_Q5_Q6, ARM::Q4_Q5_Q6_Q7
};
-static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 4)
return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 9f7327f792c7..851acea94022 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -48,9 +48,10 @@ public:
} // end anonymous namespace
Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const {
- if (!STI.getTargetTriple().isOSBinFormatELF())
- return None;
+ return None;
+}
+Optional<MCFixupKind> ARMAsmBackendELF::getFixupKind(StringRef Name) const {
unsigned Type = llvm::StringSwitch<unsigned>(Name)
#define ELF_RELOC(X, Y) .Case(#X, Y)
#include "llvm/BinaryFormat/ELFRelocs/ARM.def"
@@ -330,7 +331,7 @@ void ARMAsmBackend::relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const {
unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode(), STI);
- // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ // Return a diagnostic if we get here w/ a bogus instruction.
if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
@@ -357,14 +358,15 @@ void ARMAsmBackend::relaxInstruction(MCInst &Inst,
Inst.setOpcode(RelaxedOp);
}
-bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding =
- hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
+ hasNOP(STI) ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
support::endian::write(OS, nopEncoding, Endian);
@@ -374,7 +376,7 @@ bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
}
// ARM mode
const uint32_t nopEncoding =
- hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
+ hasNOP(STI) ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
support::endian::write(OS, nopEncoding, Endian);
@@ -1300,11 +1302,12 @@ static MCAsmBackend *createARMAsmBackend(const Target &T,
return new ARMAsmBackendDarwin(T, STI, MRI);
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return new ARMAsmBackendWinCOFF(T, STI);
+ return new ARMAsmBackendWinCOFF(T, STI.getTargetTriple().isThumb());
case Triple::ELF:
assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- return new ARMAsmBackendELF(T, STI, OSABI, Endian);
+ return new ARMAsmBackendELF(T, STI.getTargetTriple().isThumb(), OSABI,
+ Endian);
}
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 38c7b30769b3..9b0c8c084161 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -13,29 +13,23 @@
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
namespace llvm {
class ARMAsmBackend : public MCAsmBackend {
- // The STI from the target triple the MCAsmBackend was instantiated with
- // note that MCFragments may have a different local STI that should be
- // used in preference.
- const MCSubtargetInfo &STI;
bool isThumbMode; // Currently emitting Thumb code.
public:
- ARMAsmBackend(const Target &T, const MCSubtargetInfo &STI,
- support::endianness Endian)
- : MCAsmBackend(Endian), STI(STI),
- isThumbMode(STI.getTargetTriple().isThumb()) {}
+ ARMAsmBackend(const Target &T, bool isThumb, support::endianness Endian)
+ : MCAsmBackend(Endian), isThumbMode(isThumb) {}
unsigned getNumFixupKinds() const override {
return ARM::NumTargetFixupKinds;
}
- // FIXME: this should be calculated per fragment as the STI may be
- // different.
- bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }
+ bool hasNOP(const MCSubtargetInfo *STI) const {
+ return STI->getFeatureBits()[ARM::HasV6T2Ops];
+ }
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
@@ -69,7 +63,8 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
void handleAssemblerFlag(MCAssemblerFlag Flag) override;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index e27bb134670f..85013b5f099a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -21,8 +21,8 @@ public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI)
- : ARMAsmBackend(T, STI, support::little), MRI(MRI),
- TT(STI.getTargetTriple()),
+ : ARMAsmBackend(T, STI.getTargetTriple().isThumb(), support::little),
+ MRI(MRI), TT(STI.getTargetTriple()),
Subtype((MachO::CPUSubTypeARM)cantFail(
MachO::getCPUSubType(STI.getTargetTriple()))) {}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 5d735114d441..2431c4865b64 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -19,14 +19,16 @@ namespace {
class ARMAsmBackendELF : public ARMAsmBackend {
public:
uint8_t OSABI;
- ARMAsmBackendELF(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI,
+ ARMAsmBackendELF(const Target &T, bool isThumb, uint8_t OSABI,
support::endianness Endian)
- : ARMAsmBackend(T, STI, Endian), OSABI(OSABI) {}
+ : ARMAsmBackend(T, isThumb, Endian), OSABI(OSABI) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createARMELFObjectWriter(OSABI);
}
+
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
};
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 62eb1d73a2ce..6e447df9e4cb 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -16,8 +16,8 @@ using namespace llvm;
namespace {
class ARMAsmBackendWinCOFF : public ARMAsmBackend {
public:
- ARMAsmBackendWinCOFF(const Target &T, const MCSubtargetInfo &STI)
- : ARMAsmBackend(T, STI, support::little) {}
+ ARMAsmBackendWinCOFF(const Target &T, bool isThumb)
+ : ARMAsmBackend(T, isThumb, support::little) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createARMWinCOFFObjectWriter();
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ecd96114e8a4..43f7575df6db 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -408,6 +408,14 @@ namespace ARMII {
// its input, typically reading from the top/bottom halves of the input(s).
DoubleWidthResult = 1 << 23,
+ // The vector element size for MVE instructions. 00 = i8, 01 = i16, 10 = i32
+ // and 11 = i64. This is the largest type if multiple are present, so a
+ // MVE_VMOVLs8bh is ize 01=i16, as it extends from a i8 to a i16. There are
+ // some caveats so cannot be used blindly, such as exchanging VMLADAVA's and
+ // complex instructions, which may use different input lanes.
+ VecSizeShift = 24,
+ VecSize = 3 << VecSizeShift,
+
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 15,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 12076b8c49c1..896b104e8d97 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -785,6 +785,9 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
case ARM::ArchKind::ARMV8_6A:
+ case ARM::ArchKind::ARMV9A:
+ case ARM::ArchKind::ARMV9_1A:
+ case ARM::ArchKind::ARMV9_2A:
S.setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
S.setAttributeItem(ARM_ISA_use, Allowed, false);
S.setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1056,7 +1059,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
// Switch to .ARM.extab or .ARM.exidx section
SwitchSection(EHSection);
- emitCodeAlignment(4);
+ emitValueToAlignment(4, 0, 1, 0);
}
inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 40e8e244e312..77c0e3522911 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -91,7 +91,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
ExceptionsType = ExceptionHandling::WinEH;
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
- CommentString = ";";
+ CommentString = "@";
// Conditional Thumb 4-byte instructions can have an implicit IT.
MaxInstLength = 6;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index ced48ccc9883..5ecacdab390f 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1138,6 +1138,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// representation for the complex operand in the .td file. This isn't just
// style, unfortunately. As-is, we can't represent the distinct encoding
// for #-0.
+ assert(((Imm8 & 0x3) == 0) && "Not a valid immediate!");
uint32_t Binary = (Imm8 >> 2) & 0xff;
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (isAdd)
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 87cce08b1ce4..05e5a473a3c6 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -27,9 +27,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -441,8 +441,201 @@ public:
}
return false;
}
+
+ Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
+ const MCSubtargetInfo *STI,
+ uint64_t Addr,
+ uint64_t Size) const override;
};
+} // namespace
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrMode_i12(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ return Addr + OffImm;
+}
+
+static Optional<uint64_t> evaluateMemOpAddrForAddrMode3(const MCInst &Inst,
+ const MCInstrDesc &Desc,
+ unsigned MemOpIndex,
+ uint64_t Addr) {
+ if (MemOpIndex + 2 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ const MCOperand &MO3 = Inst.getOperand(MemOpIndex + 2);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || MO2.getReg() || !MO3.isImm())
+ return None;
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM3Op(MO3.getImm());
+
+ if (Op == ARM_AM::sub)
+ return Addr - ImmOffs;
+ return Addr + ImmOffs;
+}
+
+static Optional<uint64_t> evaluateMemOpAddrForAddrMode5(const MCInst &Inst,
+ const MCInstrDesc &Desc,
+ unsigned MemOpIndex,
+ uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
+
+ if (Op == ARM_AM::sub)
+ return Addr - ImmOffs * 4;
+ return Addr + ImmOffs * 4;
+}
+
+static Optional<uint64_t>
+evaluateMemOpAddrForAddrMode5FP16(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5FP16Op(MO2.getImm());
+
+ if (Op == ARM_AM::sub)
+ return Addr - ImmOffs * 2;
+ return Addr + ImmOffs * 2;
+}
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrModeT2_i8s4(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ return Addr + OffImm;
+}
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrModeT2_pc(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ if (!MO1.isImm())
+ return None;
+
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ return Addr + OffImm;
+}
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrModeT1_s(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ return evaluateMemOpAddrForAddrModeT2_pc(Inst, Desc, MemOpIndex, Addr);
+}
+
+Optional<uint64_t> ARMMCInstrAnalysis::evaluateMemoryOperandAddress(
+ const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
+ uint64_t Size) const {
+ const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
+
+ // Only load instructions can have PC-relative memory addressing.
+ if (!Desc.mayLoad())
+ return None;
+
+ // PC-relative addressing does not update the base register.
+ uint64_t TSFlags = Desc.TSFlags;
+ unsigned IndexMode =
+ (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+ if (IndexMode != ARMII::IndexModeNone)
+ return None;
+
+ // Find the memory addressing operand in the instruction.
+ unsigned OpIndex = Desc.NumDefs;
+ while (OpIndex < Desc.getNumOperands() &&
+ Desc.OpInfo[OpIndex].OperandType != MCOI::OPERAND_MEMORY)
+ ++OpIndex;
+ if (OpIndex == Desc.getNumOperands())
+ return None;
+
+ // Base address for PC-relative addressing is always 32-bit aligned.
+ Addr &= ~0x3;
+
+ // For ARM instructions the PC offset is 8 bytes, for Thumb instructions it
+ // is 4 bytes.
+ switch (Desc.TSFlags & ARMII::FormMask) {
+ default:
+ Addr += 8;
+ break;
+ case ARMII::ThumbFrm:
+ Addr += 4;
+ break;
+ // VLDR* instructions share the same opcode (and thus the same form) for Arm
+ // and Thumb. Use a bit longer route through STI in that case.
+ case ARMII::VFPLdStFrm:
+ Addr += STI->getFeatureBits()[ARM::ModeThumb] ? 4 : 8;
+ break;
+ }
+
+ // Eveluate the address depending on the addressing mode
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ default:
+ return None;
+ case ARMII::AddrMode_i12:
+ return evaluateMemOpAddrForAddrMode_i12(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrMode3:
+ return evaluateMemOpAddrForAddrMode3(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrMode5:
+ return evaluateMemOpAddrForAddrMode5(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrMode5FP16:
+ return evaluateMemOpAddrForAddrMode5FP16(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrModeT2_i8s4:
+ return evaluateMemOpAddrForAddrModeT2_i8s4(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrModeT2_pc:
+ return evaluateMemOpAddrForAddrModeT2_pc(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrModeT1_s:
+ return evaluateMemOpAddrForAddrModeT1_s(Inst, Desc, OpIndex, Addr);
+ }
}
static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 1fee354cad93..3e4c97630af6 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -43,7 +43,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() {
}
// finish() - write out any non-empty assembler constant pools.
-void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+void ARMTargetStreamer::emitConstantPools() {
+ ConstantPools->emitAll(Streamer);
+}
// reset() - Reset any state
void ARMTargetStreamer::reset() {}
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 4981b8051657..cfd275bc0621 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -76,6 +76,7 @@ public:
private:
LoopInfo *LI = nullptr;
+ const DataLayout *DL;
// Check this is a valid gather with correct alignment
bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize,
@@ -149,10 +150,10 @@ private:
bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
// Pushes the given add out of the loop
void pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex);
- // Pushes the given mul out of the loop
- void pushOutMul(PHINode *&Phi, Value *IncrementPerRound,
- Value *OffsSecondOperand, unsigned LoopIncrement,
- IRBuilder<> &Builder);
+ // Pushes the given mul or shl out of the loop
+ void pushOutMulShl(unsigned Opc, PHINode *&Phi, Value *IncrementPerRound,
+ Value *OffsSecondOperand, unsigned LoopIncrement,
+ IRBuilder<> &Builder);
};
} // end anonymous namespace
@@ -335,14 +336,15 @@ int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize,
Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
const Constant *C = dyn_cast<Constant>(V);
- if (C != nullptr)
+ if (C && C->getSplatValue())
return Optional<int64_t>{C->getUniqueInteger().getSExtValue()};
if (!isa<Instruction>(V))
return Optional<int64_t>{};
const Instruction *I = cast<Instruction>(V);
- if (I->getOpcode() == Instruction::Add ||
- I->getOpcode() == Instruction::Mul) {
+ if (I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::Mul ||
+ I->getOpcode() == Instruction::Shl) {
Optional<int64_t> Op0 = getIfConst(I->getOperand(0));
Optional<int64_t> Op1 = getIfConst(I->getOperand(1));
if (!Op0 || !Op1)
@@ -351,18 +353,30 @@ Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
return Optional<int64_t>{Op0.getValue() + Op1.getValue()};
if (I->getOpcode() == Instruction::Mul)
return Optional<int64_t>{Op0.getValue() * Op1.getValue()};
+ if (I->getOpcode() == Instruction::Shl)
+ return Optional<int64_t>{Op0.getValue() << Op1.getValue()};
+ if (I->getOpcode() == Instruction::Or)
+ return Optional<int64_t>{Op0.getValue() | Op1.getValue()};
}
return Optional<int64_t>{};
}
+// Return true if I is an Or instruction that is equivalent to an add, due to
+// the operands having no common bits set.
+static bool isAddLikeOr(Instruction *I, const DataLayout &DL) {
+ return I->getOpcode() == Instruction::Or &&
+ haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL);
+}
+
std::pair<Value *, int64_t>
MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) {
std::pair<Value *, int64_t> ReturnFalse =
std::pair<Value *, int64_t>(nullptr, 0);
- // At this point, the instruction we're looking at must be an add or we
- // bail out
+ // At this point, the instruction we're looking at must be an add or an
+ // add-like-or.
Instruction *Add = dyn_cast<Instruction>(Inst);
- if (Add == nullptr || Add->getOpcode() != Instruction::Add)
+ if (Add == nullptr ||
+ (Add->getOpcode() != Instruction::Add && !isAddLikeOr(Add, *DL)))
return ReturnFalse;
Value *Summand;
@@ -737,10 +751,9 @@ Instruction *MVEGatherScatterLowering::tryCreateIncrementingGatScat(
// The gep was in charge of making sure the offsets are scaled correctly
// - calculate that factor so it can be applied by hand
- DataLayout DT = I->getParent()->getParent()->getParent()->getDataLayout();
int TypeScale =
- computeScale(DT.getTypeSizeInBits(GEP->getOperand(0)->getType()),
- DT.getTypeSizeInBits(GEP->getType()) /
+ computeScale(DL->getTypeSizeInBits(GEP->getOperand(0)->getType()),
+ DL->getTypeSizeInBits(GEP->getType()) /
cast<FixedVectorType>(GEP->getType())->getNumElements());
if (TypeScale == -1)
return nullptr;
@@ -888,11 +901,11 @@ void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi,
Phi->removeIncomingValue(StartIndex);
}
-void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
- Value *IncrementPerRound,
- Value *OffsSecondOperand,
- unsigned LoopIncrement,
- IRBuilder<> &Builder) {
+void MVEGatherScatterLowering::pushOutMulShl(unsigned Opcode, PHINode *&Phi,
+ Value *IncrementPerRound,
+ Value *OffsSecondOperand,
+ unsigned LoopIncrement,
+ IRBuilder<> &Builder) {
LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising mul instruction\n");
// Create a new scalar add outside of the loop and transform it to a splat
@@ -901,12 +914,13 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back());
// Create a new index
- Value *StartIndex = BinaryOperator::Create(
- Instruction::Mul, Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1),
- OffsSecondOperand, "PushedOutMul", InsertionPoint);
+ Value *StartIndex =
+ BinaryOperator::Create((Instruction::BinaryOps)Opcode,
+ Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1),
+ OffsSecondOperand, "PushedOutMul", InsertionPoint);
Instruction *Product =
- BinaryOperator::Create(Instruction::Mul, IncrementPerRound,
+ BinaryOperator::Create((Instruction::BinaryOps)Opcode, IncrementPerRound,
OffsSecondOperand, "Product", InsertionPoint);
// Increment NewIndex by Product instead of the multiplication
Instruction *NewIncrement = BinaryOperator::Create(
@@ -923,7 +937,7 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
// Check whether all usages of this instruction are as offsets of
// gathers/scatters or simple arithmetics only used by gathers/scatters
-static bool hasAllGatScatUsers(Instruction *I) {
+static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) {
if (I->hasNUses(0)) {
return false;
}
@@ -936,8 +950,10 @@ static bool hasAllGatScatUsers(Instruction *I) {
return Gatscat;
} else {
unsigned OpCode = cast<Instruction>(U)->getOpcode();
- if ((OpCode == Instruction::Add || OpCode == Instruction::Mul) &&
- hasAllGatScatUsers(cast<Instruction>(U))) {
+ if ((OpCode == Instruction::Add || OpCode == Instruction::Mul ||
+ OpCode == Instruction::Shl ||
+ isAddLikeOr(cast<Instruction>(U), DL)) &&
+ hasAllGatScatUsers(cast<Instruction>(U), DL)) {
continue;
}
return false;
@@ -955,14 +971,15 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
if (!isa<Instruction>(Offsets))
return false;
Instruction *Offs = cast<Instruction>(Offsets);
- if (Offs->getOpcode() != Instruction::Add &&
- Offs->getOpcode() != Instruction::Mul)
+ if (Offs->getOpcode() != Instruction::Add && !isAddLikeOr(Offs, *DL) &&
+ Offs->getOpcode() != Instruction::Mul &&
+ Offs->getOpcode() != Instruction::Shl)
return false;
Loop *L = LI->getLoopFor(BB);
if (L == nullptr)
return false;
if (!Offs->hasOneUse()) {
- if (!hasAllGatScatUsers(Offs))
+ if (!hasAllGatScatUsers(Offs, *DL))
return false;
}
@@ -1060,11 +1077,13 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
switch (Offs->getOpcode()) {
case Instruction::Add:
+ case Instruction::Or:
pushOutAdd(NewPhi, OffsSecondOperand, IncrementingBlock == 1 ? 0 : 1);
break;
case Instruction::Mul:
- pushOutMul(NewPhi, IncrementPerRound, OffsSecondOperand, IncrementingBlock,
- Builder);
+ case Instruction::Shl:
+ pushOutMulShl(Offs->getOpcode(), NewPhi, IncrementPerRound,
+ OffsSecondOperand, IncrementingBlock, Builder);
break;
default:
return false;
@@ -1182,8 +1201,7 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
if (!GEP)
return false;
bool Changed = false;
- if (GEP->hasOneUse() &&
- dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) {
+ if (GEP->hasOneUse() && isa<GetElementPtrInst>(GEP->getPointerOperand())) {
IRBuilder<> Builder(GEP->getContext());
Builder.SetInsertPoint(GEP);
Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
@@ -1214,6 +1232,7 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) {
if (!ST->hasMVEIntegerOps())
return false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DL = &F.getParent()->getDataLayout();
SmallVector<IntrinsicInst *, 4> Gathers;
SmallVector<IntrinsicInst *, 4> Scatters;
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 6fa5402096a6..dc58b5427425 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -40,6 +40,11 @@ MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
cl::desc("Enable merging Loop End and Dec instructions."),
cl::init(true));
+static cl::opt<bool>
+SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
+ cl::desc("Enable setting lr as a predicate in tail predication regions."),
+ cl::init(true));
+
namespace {
class MVETPAndVPTOptimisations : public MachineFunctionPass {
public:
@@ -434,10 +439,14 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
return false;
SmallVector<MachineInstr *, 4> VCTPs;
- for (MachineBasicBlock *BB : ML->blocks())
+ SmallVector<MachineInstr *, 4> MVEInstrs;
+ for (MachineBasicBlock *BB : ML->blocks()) {
for (MachineInstr &MI : *BB)
if (isVCTP(&MI))
VCTPs.push_back(&MI);
+ else if (findFirstVPTPredOperandIdx(MI) != -1)
+ MVEInstrs.push_back(&MI);
+ }
if (VCTPs.empty()) {
LLVM_DEBUG(dbgs() << " no VCTPs\n");
@@ -510,6 +519,16 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
LoopStart->eraseFromParent();
+ if (SetLRPredicate) {
+ // Each instruction in the loop needs to be using LR as the predicate from
+ // the Phi as the predicate.
+ Register LR = LoopPhi->getOperand(0).getReg();
+ for (MachineInstr *MI : MVEInstrs) {
+ int Idx = findFirstVPTPredOperandIdx(*MI);
+ MI->getOperand(Idx + 2).setReg(LR);
+ }
+ }
+
return true;
}
@@ -991,6 +1010,7 @@ bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
.add(MI.getOperand(1))
.addImm(ARMVCC::Then)
.add(MI.getOperand(4))
+ .add(MI.getOperand(5))
.add(MI.getOperand(2));
// Silence unused variable warning in release builds.
(void)MIBuilder;
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index cf9e2484bab5..6a5bc9284266 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -293,14 +293,18 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
// Check for equality of TC and Ceil by calculating SCEV expression
// TC - Ceil and test it for zero.
//
- bool Zero = SE->getMinusSCEV(
- SE->getBackedgeTakenCount(L),
- SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
- SE->getNegativeSCEV(VW)),
- VW))
- ->isZero();
-
- if (!Zero) {
+ const SCEV *Sub =
+ SE->getMinusSCEV(SE->getBackedgeTakenCount(L),
+ SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
+ SE->getNegativeSCEV(VW)),
+ VW));
+
+ // Use context sensitive facts about the path to the loop to refine. This
+ // comes up as the backedge taken count can incorporate context sensitive
+ // reasoning, and our RHS just above doesn't.
+ Sub = SE->applyLoopGuards(Sub, L);
+
+ if (!Sub->isZero()) {
LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
return false;
}
diff --git a/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index a7f7d75e356e..4d514f3ca444 100644
--- a/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/ARMTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheARMLETarget() {
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index ccd272a8617d..e4e95f63f0a6 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -582,10 +582,10 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
static void findTemporariesForLR(const BitVector &GPRsNoLRSP,
const BitVector &PopFriendly,
const LivePhysRegs &UsedRegs, unsigned &PopReg,
- unsigned &TmpReg) {
+ unsigned &TmpReg, MachineRegisterInfo &MRI) {
PopReg = TmpReg = 0;
for (auto Reg : GPRsNoLRSP.set_bits()) {
- if (!UsedRegs.contains(Reg)) {
+ if (UsedRegs.available(MRI, Reg)) {
// Remember the first pop-friendly register and exit.
if (PopFriendly.test(Reg)) {
PopReg = Reg;
@@ -693,7 +693,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
GPRsNoLRSP.reset(ARM::PC);
- findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+ findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg,
+ MF.getRegInfo());
// If we couldn't find a pop-friendly register, try restoring LR before
// popping the other callee-saved registers, so we could use one of them as a
@@ -704,7 +705,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
PrevMBBI--;
if (PrevMBBI->getOpcode() == ARM::tPOP) {
UsedRegs.stepBackward(*PrevMBBI);
- findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+ findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg,
+ TemporaryReg, MF.getRegInfo());
if (PopReg) {
MBBI = PrevMBBI;
UseLDRSP = true;
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index cf5eb4b4c0f1..4b18f5e20d40 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -135,6 +135,10 @@ void Thumb1InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const TargetMachine &TM = MF.getTarget();
+
+ assert(MF.getFunction().getParent()->getStackProtectorGuard() != "tls" &&
+ "TLS stack protector not supported for Thumb1 targets");
+
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi);
else
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 5204e3b03e9e..bdb167a08e61 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -250,7 +250,19 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb2InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
- if (MF.getTarget().isPositionIndependent())
+ Module &M = *MF.getFunction().getParent();
+
+ if (M.getStackProtectorGuard() == "tls") {
+ expandLoadStackGuardBase(MI, ARM::t2MRC, ARM::t2LDRi12);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (MF.getSubtarget<ARMSubtarget>().isGVInGOT(GV))
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::t2LDRi12);
+ else if (MF.getTarget().isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
else
expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);
@@ -792,8 +804,12 @@ void llvm::recomputeVPTBlockMask(MachineInstr &Instr) {
MachineBasicBlock::iterator Iter = ++Instr.getIterator(),
End = Instr.getParent()->end();
+ while (Iter != End && Iter->isDebugInstr())
+ ++Iter;
+
// Verify that the instruction after the VPT/VPST is predicated (it should
// be), and skip it.
+ assert(Iter != End && "Expected some instructions in any VPT block");
assert(
getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
"VPT/VPST should be followed by an instruction with a 'then' predicate!");
@@ -802,6 +818,10 @@ void llvm::recomputeVPTBlockMask(MachineInstr &Instr) {
// Iterate over the predicated instructions, updating the BlockMask as we go.
ARM::PredBlockMask BlockMask = ARM::PredBlockMask::T;
while (Iter != End) {
+ if (Iter->isDebugInstr()) {
+ ++Iter;
+ continue;
+ }
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*Iter);
if (Pred == ARMVCC::None)
break;
diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h
index 7332307c07a3..143c339c0664 100644
--- a/llvm/lib/Target/AVR/AVR.h
+++ b/llvm/lib/Target/AVR/AVR.h
@@ -32,8 +32,8 @@ FunctionPass *createAVRDynAllocaSRPass();
FunctionPass *createAVRBranchSelectionPass();
void initializeAVRShiftExpandPass(PassRegistry &);
-void initializeAVRExpandPseudoPass(PassRegistry&);
-void initializeAVRRelaxMemPass(PassRegistry&);
+void initializeAVRExpandPseudoPass(PassRegistry &);
+void initializeAVRRelaxMemPass(PassRegistry &);
/// Contains the AVR backend.
namespace AVR {
diff --git a/llvm/lib/Target/AVR/AVR.td b/llvm/lib/Target/AVR/AVR.td
index 53768f99df3b..22ffc4a368ad 100644
--- a/llvm/lib/Target/AVR/AVR.td
+++ b/llvm/lib/Target/AVR/AVR.td
@@ -45,8 +45,8 @@ include "AVRCallingConv.td"
//===---------------------------------------------------------------------===//
def AVRAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- bit isMCAsmWriter = 1;
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
}
//===---------------------------------------------------------------------===//
@@ -71,10 +71,9 @@ def AVRAsmParserVariant : AsmParserVariant {
//===---------------------------------------------------------------------===//
def AVR : Target {
- let InstructionSet = AVRInstrInfo;
- let AssemblyWriters = [AVRAsmWriter];
+ let InstructionSet = AVRInstrInfo;
+ let AssemblyWriters = [AVRAsmWriter];
- let AssemblyParsers = [AVRAsmParser];
+ let AssemblyParsers = [AVRAsmParser];
let AssemblyParserVariants = [AVRAsmParserVariant];
}
-
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index e8a13c712210..259ab1bc7aec 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -24,11 +24,12 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "avr-asm-printer"
@@ -38,9 +39,8 @@ namespace llvm {
/// An AVR assembly code printer.
class AVRAsmPrinter : public AsmPrinter {
public:
- AVRAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), MRI(*TM.getMCRegisterInfo()) { }
+ AVRAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MRI(*TM.getMCRegisterInfo()) {}
StringRef getPassName() const override { return "AVR Assembly Printer"; }
@@ -56,8 +56,13 @@ public:
const MCExpr *lowerConstant(const Constant *CV) override;
+ void emitXXStructor(const DataLayout &DL, const Constant *CV) override;
+
+ bool doFinalization(Module &M) override;
+
private:
const MCRegisterInfo &MRI;
+ bool EmittedStructorSymbolAttrs = false;
};
void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
@@ -139,9 +144,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum, const char *ExtraCode,
raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- llvm_unreachable("This branch is not implemented yet");
- }
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
const MachineOperand &MO = MI->getOperand(OpNum);
(void)MO;
@@ -193,9 +197,47 @@ const MCExpr *AVRAsmPrinter::lowerConstant(const Constant *CV) {
return AsmPrinter::lowerConstant(CV);
}
+void AVRAsmPrinter::emitXXStructor(const DataLayout &DL, const Constant *CV) {
+ if (!EmittedStructorSymbolAttrs) {
+ OutStreamer->emitRawComment(
+ " Emitting these undefined symbol references causes us to link the"
+ " libgcc code that runs our constructors/destructors");
+ OutStreamer->emitRawComment(" This matches GCC's behavior");
+
+ MCSymbol *CtorsSym = OutContext.getOrCreateSymbol("__do_global_ctors");
+ OutStreamer->emitSymbolAttribute(CtorsSym, MCSA_Global);
+
+ MCSymbol *DtorsSym = OutContext.getOrCreateSymbol("__do_global_dtors");
+ OutStreamer->emitSymbolAttribute(DtorsSym, MCSA_Global);
+
+ EmittedStructorSymbolAttrs = true;
+ }
+
+ AsmPrinter::emitXXStructor(DL, CV);
+}
+
+bool AVRAsmPrinter::doFinalization(Module &M) {
+ MCSymbol *DoCopyData = OutContext.getOrCreateSymbol("__do_copy_data");
+ MCSymbol *DoClearBss = OutContext.getOrCreateSymbol("__do_clear_bss");
+
+ // FIXME: We can disable __do_copy_data if there are no static RAM variables.
+
+ OutStreamer->emitRawComment(
+ " Declaring this symbol tells the CRT that it should");
+ OutStreamer->emitRawComment(
+ "copy all variables from program memory to RAM on startup");
+ OutStreamer->emitSymbolAttribute(DoCopyData, MCSA_Global);
+
+ OutStreamer->emitRawComment(
+ " Declaring this symbol tells the CRT that it should");
+ OutStreamer->emitRawComment("clear the zeroed data section on startup");
+ OutStreamer->emitSymbolAttribute(DoClearBss, MCSA_Global);
+
+ return AsmPrinter::doFinalization(M);
+}
+
} // end of namespace llvm
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRAsmPrinter() {
llvm::RegisterAsmPrinter<llvm::AVRAsmPrinter> X(llvm::getTheAVRTarget());
}
-
diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td
index 65545e531a88..87874c5c50b2 100644
--- a/llvm/lib/Target/AVR/AVRCallingConv.td
+++ b/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -14,9 +14,8 @@
//===----------------------------------------------------------------------===//
// Special return value calling convention for runtime functions.
-def RetCC_AVR_BUILTIN : CallingConv
-<[
- CCIfType<[i8], CCAssignToReg<[R24,R25]>>,
+def RetCC_AVR_BUILTIN : CallingConv<[
+ CCIfType<[i8], CCAssignToReg<[R24, R25]>>,
CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>>
]>;
@@ -27,8 +26,7 @@ def RetCC_AVR_BUILTIN : CallingConv
// The calling conventions are implemented in custom C++ code
// Calling convention for variadic functions.
-def ArgCC_AVR_Vararg : CallingConv
-<[
+def ArgCC_AVR_Vararg : CallingConv<[
// i16 are always passed through the stack with an alignment of 1.
CCAssignToStack<2, 1>
]>;
@@ -38,4 +36,4 @@ def ArgCC_AVR_Vararg : CallingConv
//===----------------------------------------------------------------------===//
def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
-def CSR_Interrupts : CalleeSavedRegs<(add (sequence "R%u", 31, 0))>;
+def CSR_Interrupts : CalleeSavedRegs<(add(sequence "R%u", 31, 0))>;
diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td
index 9507aa40c3d8..7ad0fe904a81 100644
--- a/llvm/lib/Target/AVR/AVRDevices.td
+++ b/llvm/lib/Target/AVR/AVRDevices.td
@@ -7,19 +7,18 @@
// In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
// avr2 (with SRAM) adds the rest of the variants.
-
// A feature set aggregates features, grouping them. We don't want to create a
// new member in AVRSubtarget (to store a value) for each set because we do not
// care if the set is supported, only the subfeatures inside the set. We fix
// this by simply setting the same dummy member for all feature sets, which is
// then ignored.
class FeatureSet<string name, string desc, list<SubtargetFeature> i>
- : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+ : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
// A family of microcontrollers, defining a set of supported features.
class Family<string name, list<SubtargetFeature> i>
- : FeatureSet<name, !strconcat("The device is a part of the ",
- name, " family"), i>;
+ : FeatureSet<
+ name, !strconcat("The device is a part of the ", name, " family"), i>;
// The device has SRAM, and supports the bare minimum of
// SRAM-relevant instructions.
@@ -32,122 +31,122 @@ class Family<string name, list<SubtargetFeature> i>
// `LDS Rd, K`
// `STS k, Rr`
// `PUSH`/`POP`
-def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
- "The device has random access memory">;
+def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
+ "The device has random access memory">;
// The device supports the `JMP k` and `CALL k` instructions.
-def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
- "The device supports the `JMP` and "
- "`CALL` instructions">;
-
+def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+ "The device supports the `JMP` and "
+ "`CALL` instructions">;
// The device supports the indirect branches `IJMP` and `ICALL`.
-def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
- "true",
- "The device supports `IJMP`/`ICALL`"
- "instructions">;
+def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL", "true",
+ "The device supports `IJMP`/`ICALL`"
+ "instructions">;
// The device supports the extended indirect branches `EIJMP` and `EICALL`.
-def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
- "true", "The device supports the "
- "`EIJMP`/`EICALL` instructions">;
+def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL", "true",
+ "The device supports the "
+ "`EIJMP`/`EICALL` instructions">;
// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
-def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
- "true", "Enable 16-bit register-immediate "
- "addition and subtraction instructions">;
+def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW", "true",
+ "Enable 16-bit register-immediate "
+ "addition and subtraction instructions">;
// The device has an 8-bit stack pointer (SP) register.
-def FeatureSmallStack : SubtargetFeature<"smallstack", "m_hasSmallStack",
- "true", "The device has an 8-bit "
- "stack pointer">;
+def FeatureSmallStack
+ : SubtargetFeature<"smallstack", "m_hasSmallStack", "true",
+ "The device has an 8-bit "
+ "stack pointer">;
// The device supports the 16-bit GPR pair MOVW instruction.
-def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
- "The device supports the 16-bit MOVW "
- "instruction">;
+def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
+ "The device supports the 16-bit MOVW "
+ "instruction">;
// The device supports the `LPM` instruction, with implied destination being r0.
-def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
+def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
"The device supports the `LPM` instruction">;
// The device supports the `LPM Rd, Z[+] instruction.
-def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
- "The device supports the `LPM Rd, Z[+]` "
- "instruction">;
+def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+ "The device supports the `LPM Rd, Z[+]` "
+ "instruction">;
// The device supports the `ELPM` instruction.
-def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
- "The device supports the ELPM instruction">;
+def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
+ "The device supports the ELPM instruction">;
// The device supports the `ELPM Rd, Z[+]` instructions.
-def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
- "The device supports the `ELPM Rd, Z[+]` "
- "instructions">;
+def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+ "The device supports the `ELPM Rd, Z[+]` "
+ "instructions">;
// The device supports the `SPM` instruction.
-def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
+def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
"The device supports the `SPM` instruction">;
// The device supports the `SPM Z+` instruction.
-def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
- "The device supports the `SPM Z+` "
- "instruction">;
+def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+ "The device supports the `SPM Z+` "
+ "instruction">;
// The device supports the `DES k` instruction.
-def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
+def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
"The device supports the `DES k` encryption "
"instruction">;
// The device supports the Read-Write-Modify instructions
// XCH, LAS, LAC, and LAT.
-def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
"The device supports the read-write-modify "
"instructions: XCH, LAS, LAC, LAT">;
// The device supports the `[F]MUL[S][U]` family of instructions.
-def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
- "true", "The device supports the "
- "multiplication instructions">;
+def FeatureMultiplication
+ : SubtargetFeature<"mul", "m_supportsMultiplication", "true",
+ "The device supports the "
+ "multiplication instructions">;
// The device supports the `BREAK` instruction.
-def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
- "The device supports the `BREAK` debugging "
- "instruction">;
+def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
+ "The device supports the `BREAK` debugging "
+ "instruction">;
// The device has instruction encodings specific to the Tiny core.
-def FeatureTinyEncoding : SubtargetFeature<"tinyencoding",
- "m_hasTinyEncoding", "true",
- "The device has Tiny core specific "
- "instruction encodings">;
+def FeatureTinyEncoding
+ : SubtargetFeature<"tinyencoding", "m_hasTinyEncoding", "true",
+ "The device has Tiny core specific "
+ "instruction encodings">;
// The device has CPU registers mapped in data address space
-def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR",
- "true", "The device has CPU registers "
+def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", "true",
+ "The device has CPU registers "
"mapped in data address space">;
-class ELFArch<string name> : SubtargetFeature<"", "ELFArch",
- !strconcat("ELF::",name), "">;
+class ELFArch<string name>
+ : SubtargetFeature<"", "ELFArch", !strconcat("ELF::", name), "">;
// ELF e_flags architecture values
-def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">;
-def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">;
-def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">;
-def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">;
-def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">;
-def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">;
-def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
-def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
-def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
-def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
-def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
-def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
-def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
-def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
-def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">;
-def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">;
-def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">;
-def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
+def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">;
+def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">;
+def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">;
+def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">;
+def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">;
+def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">;
+def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
+def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
+def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
+def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
+def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
+def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
+def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
+def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">;
+def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">;
+def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">;
+def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
//===---------------------------------------------------------------------===//
// AVR Families
@@ -155,68 +154,64 @@ def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
// The device has at least the bare minimum that **every** single AVR
// device should have.
-def FamilyAVR0 : Family<"avr0", []>;
+def FamilyAVR0 : Family<"avr0", []>;
-def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>;
+def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>;
-def FamilyAVR2 : Family<"avr2",
- [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
- FeatureSRAM]>;
+def FamilyAVR2
+ : Family<"avr2",
+ [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW, FeatureSRAM]>;
-def FamilyAVR25 : Family<"avr25",
- [FamilyAVR2, FeatureMOVW, FeatureLPMX,
- FeatureSPM, FeatureBREAK]>;
+def FamilyAVR25
+ : Family<"avr25",
+ [FamilyAVR2, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureBREAK]>;
-def FamilyAVR3 : Family<"avr3",
- [FamilyAVR2, FeatureJMPCALL]>;
+def FamilyAVR3 : Family<"avr3", [FamilyAVR2, FeatureJMPCALL]>;
-def FamilyAVR31 : Family<"avr31",
- [FamilyAVR3, FeatureELPM]>;
+def FamilyAVR31 : Family<"avr31", [FamilyAVR3, FeatureELPM]>;
-def FamilyAVR35 : Family<"avr35",
- [FamilyAVR3, FeatureMOVW, FeatureLPMX,
- FeatureSPM, FeatureBREAK]>;
+def FamilyAVR35
+ : Family<"avr35",
+ [FamilyAVR3, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureBREAK]>;
-def FamilyAVR4 : Family<"avr4",
- [FamilyAVR2, FeatureMultiplication,
- FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK]>;
+def FamilyAVR4 : Family<"avr4", [
+ FamilyAVR2, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK
+]>;
-def FamilyAVR5 : Family<"avr5",
- [FamilyAVR3, FeatureMultiplication,
- FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK]>;
+def FamilyAVR5 : Family<"avr5", [
+ FamilyAVR3, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK
+]>;
-def FamilyAVR51 : Family<"avr51",
- [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
+def FamilyAVR51 : Family<"avr51", [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
-def FamilyAVR6 : Family<"avr6",
- [FamilyAVR51]>;
+def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>;
-def FamilyTiny : Family<"avrtiny",
- [FamilyAVR0, FeatureBREAK, FeatureSRAM,
- FeatureTinyEncoding]>;
+def FamilyTiny
+ : Family<"avrtiny",
+ [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>;
-def FamilyXMEGA : Family<"xmega",
- [FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW,
- FeatureSRAM, FeatureJMPCALL, FeatureMultiplication,
- FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX,
- FeatureDES, FeatureELPM, FeatureELPMX]>;
+def FamilyXMEGA : Family<"xmega", [
+ FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW, FeatureSRAM,
+ FeatureJMPCALL, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX, FeatureDES, FeatureELPM,
+ FeatureELPMX
+]>;
-def FamilyXMEGAU : Family<"xmegau",
- [FamilyXMEGA, FeatureRMW]>;
+def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>;
-def FeatureSetSpecial : FeatureSet<"special",
- "Enable use of the entire instruction "
- "set - used for debugging",
- [FeatureSRAM, FeatureJMPCALL,
- FeatureIJMPCALL, FeatureEIJMPCALL,
- FeatureADDSUBIW, FeatureMOVW,
- FeatureLPM, FeatureLPMX, FeatureELPM,
- FeatureELPMX, FeatureSPM, FeatureSPMX,
- FeatureDES, FeatureRMW,
- FeatureMultiplication, FeatureBREAK, FeatureMMR]>;
+def FeatureSetSpecial
+ : FeatureSet<"special",
+ "Enable use of the entire instruction "
+ "set - used for debugging",
+ [
+ FeatureSRAM, FeatureJMPCALL, FeatureIJMPCALL,
+ FeatureEIJMPCALL, FeatureADDSUBIW, FeatureMOVW, FeatureLPM,
+ FeatureLPMX, FeatureELPM, FeatureELPMX, FeatureSPM,
+ FeatureSPMX, FeatureDES, FeatureRMW, FeatureMultiplication,
+ FeatureBREAK, FeatureMMR
+ ]>;
//===---------------------------------------------------------------------===//
// AVR microcontrollers supported.
@@ -224,284 +219,307 @@ def FeatureSetSpecial : FeatureSet<"special",
class Device<string Name, Family Fam, ELFArch Arch,
list<SubtargetFeature> ExtraFeatures = []>
- : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
+ : Processor<Name, NoItineraries, !listconcat([Fam, Arch], ExtraFeatures)>;
// Generic MCUs
// Note that several versions of GCC has strange ELF architecture
// settings for backwards compatibility - see `gas/config/tc-avr.c`
// in AVR binutils. We do not replicate this.
-def : Device<"avr1", FamilyAVR1, ELFArchAVR1>;
-def : Device<"avr2", FamilyAVR2, ELFArchAVR2>;
-def : Device<"avr25", FamilyAVR25, ELFArchAVR25>;
-def : Device<"avr3", FamilyAVR3, ELFArchAVR3>;
-def : Device<"avr31", FamilyAVR31, ELFArchAVR31>;
-def : Device<"avr35", FamilyAVR35, ELFArchAVR35>;
-def : Device<"avr4", FamilyAVR4, ELFArchAVR4>;
-def : Device<"avr5", FamilyAVR5, ELFArchAVR5>;
-def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
-def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
-def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
-def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
-def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
-def : Device<"avrtiny", FamilyTiny, ELFArchTiny>;
+def : Device<"avr1", FamilyAVR1, ELFArchAVR1>;
+def : Device<"avr2", FamilyAVR2, ELFArchAVR2>;
+def : Device<"avr25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"avr3", FamilyAVR3, ELFArchAVR3>;
+def : Device<"avr31", FamilyAVR31, ELFArchAVR31>;
+def : Device<"avr35", FamilyAVR35, ELFArchAVR35>;
+def : Device<"avr4", FamilyAVR4, ELFArchAVR4>;
+def : Device<"avr5", FamilyAVR5, ELFArchAVR5>;
+def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
+def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
+def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
+def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"avrtiny", FamilyTiny, ELFArchTiny>;
// Specific MCUs
-def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
-def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
-def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
-def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25,
- [FeatureMOVW, FeatureLPMX]>;
-def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
-def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny841", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
-def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
-def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
-def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
-def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>;
-def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
-def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
-def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
-def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
-def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8", FamilyAVR2, ELFArchAVR4,
+def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
+def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
+def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25, [FeatureMOVW, FeatureLPMX]>;
+def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
+def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny841", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
+def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
+def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
+def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
+def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>;
+def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
+def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
-def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48pb", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88pb", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
+def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pb", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pb", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
+def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
-def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
-def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
+def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega163", FamilyAVR3, ELFArchAVR5,
+def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega163", FamilyAVR3, ELFArchAVR5,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168pb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324pb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega328pb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at94k", FamilyAVR3, ELFArchAVR5,
+def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328pb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at94k", FamilyAVR3, ELFArchAVR5,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
-def : Device<"m3000", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>;
-def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>;
-def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
-def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"attiny4", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny5", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny9", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny10", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny20", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny40", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny102", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny104", FamilyTiny, ELFArchTiny>;
-
+def : Device<"m3000", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>;
+def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"attiny4", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny5", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny9", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny10", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny20", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny40", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny102", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny104", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny202", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny402", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny204", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny404", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny804", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1604", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny406", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny806", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1606", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny807", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1607", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny212", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny412", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny214", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny414", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny814", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1614", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny416", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny816", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1616", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny3216", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny417", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny817", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1617", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny3217", FamilyXMEGA, ELFArchXMEGA3>;
diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index f9f91f50c9d5..cb85d73772c5 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -70,25 +70,24 @@ private:
return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode), DstReg);
}
- MachineRegisterInfo &getRegInfo(Block &MBB) { return MBB.getParent()->getRegInfo(); }
+ MachineRegisterInfo &getRegInfo(Block &MBB) {
+ return MBB.getParent()->getRegInfo();
+ }
bool expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI);
bool expandLogic(unsigned Op, Block &MBB, BlockIt MBBI);
bool expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI);
bool isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const;
- template<typename Func>
- bool expandAtomic(Block &MBB, BlockIt MBBI, Func f);
+ template <typename Func> bool expandAtomic(Block &MBB, BlockIt MBBI, Func f);
- template<typename Func>
+ template <typename Func>
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI, Func f);
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
- bool expandAtomicArithmeticOp(unsigned MemOpcode,
- unsigned ArithOpcode,
- Block &MBB,
- BlockIt MBBI);
+ bool expandAtomicArithmeticOp(unsigned MemOpcode, unsigned ArithOpcode,
+ Block &MBB, BlockIt MBBI);
/// Specific shift implementation.
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
@@ -150,8 +149,8 @@ bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
return Modified;
}
-bool AVRExpandPseudo::
-expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandArith(unsigned OpLo, unsigned OpHi, Block &MBB,
+ BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
@@ -164,14 +163,15 @@ expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -183,8 +183,7 @@ expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
return true;
}
-bool AVRExpandPseudo::
-expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
@@ -196,18 +195,20 @@ expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, Op)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
// SREG is always implicitly dead
MIBLO->getOperand(3).setIsDead();
- auto MIBHI = buildMI(MBB, MBBI, Op)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -216,8 +217,8 @@ expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
return true;
}
-bool AVRExpandPseudo::
- isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const {
+bool AVRExpandPseudo::isLogicImmOpRedundant(unsigned Op,
+ unsigned ImmVal) const {
// ANDI Rd, 0xff is redundant.
if (Op == AVR::ANDIRdK && ImmVal == 0xff)
@@ -230,8 +231,7 @@ bool AVRExpandPseudo::
return false;
}
-bool AVRExpandPseudo::
-expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
@@ -244,20 +244,22 @@ expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
if (!isLogicImmOpRedundant(Op, Lo8)) {
- auto MIBLO = buildMI(MBB, MBBI, Op)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(SrcIsKill))
- .addImm(Lo8);
+ auto MIBLO =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(SrcIsKill))
+ .addImm(Lo8);
// SREG is always implicitly dead
MIBLO->getOperand(3).setIsDead();
}
if (!isLogicImmOpRedundant(Op, Hi8)) {
- auto MIBHI = buildMI(MBB, MBBI, Op)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(SrcIsKill))
- .addImm(Hi8);
+ auto MIBHI =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(SrcIsKill))
+ .addImm(Hi8);
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -292,13 +294,15 @@ bool AVRExpandPseudo::expand<AVR::SUBIWRdK>(Block &MBB, BlockIt MBBI) {
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, AVR::SUBIRdK)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(SrcIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, AVR::SUBIRdK)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(SrcIsKill));
- auto MIBHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(SrcIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, AVR::SBCIRdK)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(SrcIsKill));
switch (MI.getOperand(2).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -349,18 +353,20 @@ bool AVRExpandPseudo::expand<AVR::SBCIWRdK>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::SBCIRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(SrcIsKill))
- .addImm(Lo8);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(SrcIsKill))
+ .addImm(Lo8);
// SREG is always implicitly killed
MIBLO->getOperand(4).setIsKill();
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(SrcIsKill))
- .addImm(Hi8);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(SrcIsKill))
+ .addImm(Hi8);
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -409,16 +415,18 @@ bool AVRExpandPseudo::expand<AVR::COMWRd>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::COMRd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
// SREG is always implicitly dead
MIBLO->getOperand(2).setIsDead();
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(2).setIsDead();
@@ -481,12 +489,12 @@ bool AVRExpandPseudo::expand<AVR::CPWRdRr>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(2).setIsDead();
@@ -513,15 +521,15 @@ bool AVRExpandPseudo::expand<AVR::CPCWRdRr>(Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
// SREG is always implicitly killed
MIBLO->getOperand(3).setIsKill();
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(2).setIsDead();
@@ -543,11 +551,13 @@ bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::LDIRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
switch (MI.getOperand(1).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -592,11 +602,13 @@ bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::LDSRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
switch (MI.getOperand(1).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -656,9 +668,9 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addImm(1);
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(1);
if (TmpReg) {
// Move the high byte into the final destination.
@@ -689,15 +701,17 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define)
- .addReg(SrcReg, RegState::Kill);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define)
+ .addReg(SrcReg, RegState::Kill);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
- .addReg(SrcReg, RegState::Kill);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
+ .addReg(SrcReg, RegState::Kill);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -720,15 +734,17 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define)
- .addReg(SrcReg, RegState::Kill);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define)
+ .addReg(SrcReg, RegState::Kill);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
- .addReg(SrcReg, RegState::Kill);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
+ .addReg(SrcReg, RegState::Kill);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -750,8 +766,8 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::LDDRdPtrQ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Offset is out of range");
// Use a temporary register if src and dst registers are the same.
@@ -763,9 +779,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(CurDstLoReg, RegState::Define)
- .addReg(SrcReg)
- .addImm(Imm);
+ .addReg(CurDstLoReg, RegState::Define)
+ .addReg(SrcReg)
+ .addImm(Imm);
// Push low byte onto stack if necessary.
if (TmpReg)
@@ -773,9 +789,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addImm(Imm + 1);
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(Imm + 1);
if (TmpReg) {
// Move the high byte into the final destination.
@@ -813,8 +829,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(CurDstLoReg, RegState::Define)
- .addReg(SrcReg);
+ .addReg(CurDstLoReg, RegState::Define)
+ .addReg(SrcReg);
// Push low byte onto stack if necessary.
if (TmpReg)
@@ -822,8 +838,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
if (TmpReg) {
// Move the high byte into the final destination.
@@ -845,15 +861,15 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
llvm_unreachable("wide LPMPi is unimplemented");
}
-template<typename Func>
+template <typename Func>
bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
// Remove the pseudo instruction.
MachineInstr &MI = *MBBI;
// Store the SREG.
buildMI(MBB, MBBI, AVR::INRdA)
- .addReg(SCRATCH_REGISTER, RegState::Define)
- .addImm(SREG_ADDR);
+ .addReg(SCRATCH_REGISTER, RegState::Define)
+ .addImm(SREG_ADDR);
// Disable exceptions.
buildMI(MBB, MBBI, AVR::BCLRs).addImm(7); // CLI
@@ -861,58 +877,52 @@ bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
f(MI);
// Restore the status reg.
- buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(SREG_ADDR)
- .addReg(SCRATCH_REGISTER);
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(SREG_ADDR).addReg(SCRATCH_REGISTER);
MI.eraseFromParent();
return true;
}
-template<typename Func>
-bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
- Block &MBB,
- BlockIt MBBI,
- Func f) {
+template <typename Func>
+bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB,
+ BlockIt MBBI, Func f) {
return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
- auto Op1 = MI.getOperand(0);
- auto Op2 = MI.getOperand(1);
+ auto Op1 = MI.getOperand(0);
+ auto Op2 = MI.getOperand(1);
- MachineInstr &NewInst =
- *buildMI(MBB, MBBI, Opcode).add(Op1).add(Op2).getInstr();
- f(NewInst);
+ MachineInstr &NewInst =
+ *buildMI(MBB, MBBI, Opcode).add(Op1).add(Op2).getInstr();
+ f(NewInst);
});
}
-bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
- Block &MBB,
+bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB,
BlockIt MBBI) {
return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {});
}
bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
- unsigned ArithOpcode,
- Block &MBB,
+ unsigned ArithOpcode, Block &MBB,
BlockIt MBBI) {
return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
- auto DstReg = MI.getOperand(0).getReg();
- auto PtrOp = MI.getOperand(1);
- auto SrcReg = MI.getOperand(2).getReg();
+ auto DstReg = MI.getOperand(0).getReg();
+ auto PtrOp = MI.getOperand(1);
+ auto SrcReg = MI.getOperand(2).getReg();
- unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
- unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
+ unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
+ unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
- // FIXME: this returns the new value (after the operation), not the old
- // value as the atomicrmw instruction is supposed to do!
+ // FIXME: this returns the new value (after the operation), not the old
+ // value as the atomicrmw instruction is supposed to do!
- // Create the load
- buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg());
+ // Create the load
+ buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg());
- // Create the arithmetic op
- buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg);
+ // Create the arithmetic op
+ buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg);
- // Create the store
- buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg);
+ // Create the store
+ buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg);
});
}
@@ -924,8 +934,7 @@ Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
RS.forward(MI);
BitVector Candidates =
- TRI->getAllocatableSet
- (*MBB.getParent(), &AVR::GPR8RegClass);
+ TRI->getAllocatableSet(*MBB.getParent(), &AVR::GPR8RegClass);
// Exclude all the registers being used by the instruction.
for (MachineOperand &MO : MI.operands()) {
@@ -942,77 +951,77 @@ Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
return Reg;
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoad8>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoad16>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::LDWRdPtr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicStore8>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::STPtrRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicStore16>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::STWPtrRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadSub8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadSub16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadOr8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadOr16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadXor8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadXor16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
// On AVR, there is only one core and so atomic fences do nothing.
MBBI->eraseFromParent();
@@ -1077,15 +1086,15 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::STDPtrQRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- //:TODO: need to reverse this order like inw and stsw?
+ //: TODO: need to reverse this order like inw and stsw?
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg, getUndefRegState(DstIsUndef))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getUndefRegState(DstIsUndef))
- .addImm(1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addImm(1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1110,16 +1119,17 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg, RegState::Define)
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ .addReg(DstReg, RegState::Define)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1144,16 +1154,17 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, RegState::Define)
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ .addReg(DstReg, RegState::Define)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1175,19 +1186,19 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::STDPtrQRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Offset is out of range");
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg)
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1207,17 +1218,19 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::INRdA;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Address is out of range");
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(Imm);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(Imm);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(Imm + 1);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(Imm + 1);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1237,18 +1250,18 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::OUTARr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Address is out of range");
// 16 bit I/O writes need the high byte first
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1270,13 +1283,13 @@ bool AVRExpandPseudo::expand<AVR::PUSHWRr>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -1319,15 +1332,15 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
// Shift part
buildMI(MBB, MBBI, OpShift)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addReg(DstReg);
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(DstReg);
// Add the carry bit
auto MIB = buildMI(MBB, MBBI, OpCarry)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addReg(ZERO_REGISTER);
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(ZERO_REGISTER);
// SREG is always implicitly killed
MIB->getOperand(2).setIsKill();
@@ -1378,14 +1391,15 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg)
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg)
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg)
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg)
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -1554,12 +1568,13 @@ bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBLO->getOperand(2).setIsDead();
@@ -1740,12 +1755,13 @@ bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBLO->getOperand(2).setIsDead();
@@ -1817,7 +1833,8 @@ bool AVRExpandPseudo::expandLSLB7Rd(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::RORRd)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg, getKillRegState(DstIsKill))
- ->getOperand(3).setIsUndef(true);
+ ->getOperand(3)
+ .setIsUndef(true);
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1867,7 +1884,8 @@ bool AVRExpandPseudo::expandLSRB7Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg, getKillRegState(DstIsKill))
.addReg(DstReg, getKillRegState(DstIsKill))
- ->getOperand(4).setIsUndef(true);
+ ->getOperand(4)
+ .setIsUndef(true);
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1918,10 +1936,11 @@ bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstReg, getKillRegState(DstIsKill))
.addReg(DstReg, getKillRegState(DstIsKill));
- auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ auto MIRRC =
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIRRC->getOperand(3).setIsDead();
@@ -1970,9 +1989,10 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
if (SrcReg != DstLoReg) {
- auto MOV = buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg);
+ auto MOV =
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg);
if (SrcReg == DstHiReg) {
MOV->getOperand(1).setIsKill();
@@ -1981,19 +2001,20 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
if (SrcReg != DstHiReg) {
buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr
- .addReg(DstHiReg, RegState::Define)
- .addReg(DstHiReg)
- .addReg(DstHiReg, RegState::Kill);
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(DstHiReg)
+ .addReg(DstHiReg, RegState::Kill);
- auto SBC = buildMI(MBB, MBBI, AVR::SBCRdRr)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, RegState::Kill)
- .addReg(DstHiReg, RegState::Kill);
+ auto SBC =
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
if (ImpIsDead)
SBC->getOperand(3).setIsDead();
@@ -2025,14 +2046,15 @@ template <> bool AVRExpandPseudo::expand<AVR::ZEXT>(Block &MBB, BlockIt MBBI) {
if (SrcReg != DstLoReg) {
buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
- auto EOR = buildMI(MBB, MBBI, AVR::EORRdRr)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, RegState::Kill | RegState::Undef)
- .addReg(DstHiReg, RegState::Kill | RegState::Undef);
+ auto EOR =
+ buildMI(MBB, MBBI, AVR::EORRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill | RegState::Undef)
+ .addReg(DstHiReg, RegState::Kill | RegState::Undef);
if (ImpIsDead)
EOR->getOperand(3).setIsDead();
@@ -2054,15 +2076,15 @@ bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(0x3d)
- .setMIFlags(Flags);
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(0x3d)
+ .setMIFlags(Flags);
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(0x3e)
- .setMIFlags(Flags);
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(0x3e)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -2078,26 +2100,26 @@ bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
buildMI(MBB, MBBI, AVR::INRdA)
- .addReg(AVR::R0, RegState::Define)
- .addImm(SREG_ADDR)
- .setMIFlags(Flags);
+ .addReg(AVR::R0, RegState::Define)
+ .addImm(SREG_ADDR)
+ .setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::BCLRs).addImm(0x07).setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(0x3e)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addImm(0x3e)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(SREG_ADDR)
- .addReg(AVR::R0, RegState::Kill)
- .setMIFlags(Flags);
+ .addImm(SREG_ADDR)
+ .addReg(AVR::R0, RegState::Kill)
+ .setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(0x3d)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addImm(0x3d)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -2107,8 +2129,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
int Opcode = MBBI->getOpcode();
-#define EXPAND(Op) \
- case Op: \
+#define EXPAND(Op) \
+ case Op: \
return expand<Op>(MBB, MI)
switch (Opcode) {
@@ -2132,7 +2154,7 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::LDWRdPtr);
EXPAND(AVR::LDWRdPtrPi);
EXPAND(AVR::LDWRdPtrPd);
- case AVR::LDDWRdYQ: //:FIXME: remove this once PR13375 gets fixed
+ case AVR::LDDWRdYQ: //: FIXME: remove this once PR13375 gets fixed
EXPAND(AVR::LDDWRdPtrQ);
EXPAND(AVR::LPMWRdZ);
EXPAND(AVR::LPMWRdZPi);
@@ -2184,8 +2206,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
} // end of anonymous namespace
-INITIALIZE_PASS(AVRExpandPseudo, "avr-expand-pseudo",
- AVR_EXPAND_PSEUDO_NAME, false, false)
+INITIALIZE_PASS(AVRExpandPseudo, "avr-expand-pseudo", AVR_EXPAND_PSEUDO_NAME,
+ false, false)
namespace llvm {
FunctionPass *createAVRExpandPseudoPass() { return new AVRExpandPseudo(); }
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index 89ed30e8bcdb..672611ea2234 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -111,9 +111,8 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I) {
- I->addLiveIn(AVR::R29R28);
+ for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF)) {
+ MBBJ.addLiveIn(AVR::R29R28);
}
if (!FrameSize) {
@@ -304,16 +303,16 @@ static void fixStackStores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const TargetInstrInfo &TII, Register FP) {
// Iterate through the BB until we hit a call instruction or we reach the end.
- for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) {
- MachineBasicBlock::iterator NextMI = std::next(I);
- MachineInstr &MI = *I;
- unsigned Opcode = I->getOpcode();
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(llvm::make_range(MI, MBB.end()))) {
+ if (MI.isCall())
+ break;
+
+ unsigned Opcode = MI.getOpcode();
// Only care of pseudo store instructions where SP is the base pointer.
- if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr) {
- I = NextMI;
+ if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr)
continue;
- }
assert(MI.getOperand(0).getReg() == AVR::SP &&
"Invalid register, should be SP!");
@@ -325,8 +324,6 @@ static void fixStackStores(MachineBasicBlock &MBB,
MI.setDesc(TII.get(STOpc));
MI.getOperand(0).setReg(FP);
-
- I = NextMI;
}
}
@@ -361,13 +358,13 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
// values, etc) is tricky and thus left to be optimized in the future.
BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
- MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
- .addReg(AVR::R31R30, RegState::Kill)
- .addImm(Amount);
+ MachineInstr *New =
+ BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
+ .addReg(AVR::R31R30, RegState::Kill)
+ .addImm(Amount);
New->getOperand(3).setIsDead();
- BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
- .addReg(AVR::R31R30);
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP).addReg(AVR::R31R30);
// Make sure the remaining stack stores are converted to real store
// instructions.
@@ -536,4 +533,3 @@ char AVRDynAllocaSR::ID = 0;
FunctionPass *createAVRDynAllocaSRPass() { return new AVRDynAllocaSR(); }
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index df382d553753..7ec2629ab45d 100644
--- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -156,9 +156,9 @@ bool AVRDAGToDAGISel::selectIndexedLoad(SDNode *N) {
return false;
}
- SDNode *ResNode = CurDAG->getMachineNode(Opcode, SDLoc(N), VT,
- PtrVT, MVT::Other,
- LD->getBasePtr(), LD->getChain());
+ SDNode *ResNode =
+ CurDAG->getMachineNode(Opcode, SDLoc(N), VT, PtrVT, MVT::Other,
+ LD->getBasePtr(), LD->getChain());
ReplaceUses(N, ResNode);
CurDAG->RemoveDeadNode(N);
@@ -199,12 +199,11 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
return Opcode;
}
-bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintCode,
- std::vector<SDValue> &OutOps) {
+bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, unsigned ConstraintCode, std::vector<SDValue> &OutOps) {
assert((ConstraintCode == InlineAsm::Constraint_m ||
- ConstraintCode == InlineAsm::Constraint_Q) &&
- "Unexpected asm memory constraint");
+ ConstraintCode == InlineAsm::Constraint_Q) &&
+ "Unexpected asm memory constraint");
MachineRegisterInfo &RI = MF->getRegInfo();
const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
@@ -276,7 +275,8 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
}
if (ImmNode->getValueType(0) != MVT::i8) {
- Disp = CurDAG->getTargetConstant(ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8);
+ Disp = CurDAG->getTargetConstant(
+ ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8);
} else {
Disp = ImmOp;
}
@@ -309,11 +309,10 @@ template <> bool AVRDAGToDAGISel::select<ISD::FrameIndex>(SDNode *N) {
// effective address of the final stack slot.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI =
- CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy(DL));
+ CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy(DL));
- CurDAG->SelectNodeTo(N, AVR::FRMIDX,
- getTargetLowering()->getPointerTy(DL), TFI,
- CurDAG->getTargetConstant(0, SDLoc(N), MVT::i16));
+ CurDAG->SelectNodeTo(N, AVR::FRMIDX, getTargetLowering()->getPointerTy(DL),
+ TFI, CurDAG->getTargetConstant(0, SDLoc(N), MVT::i16));
return true;
}
@@ -380,8 +379,8 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
// Check if the opcode can be converted into an indexed load.
if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) {
// It is legal to fold the load into an indexed load.
- ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr,
- RegZ);
+ ResNode =
+ CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr, RegZ);
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
} else {
// Selecting an indexed load is not legal, fallback to a normal load.
@@ -391,8 +390,8 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
Ptr, RegZ);
break;
case MVT::i16:
- ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16,
- MVT::Other, Ptr, RegZ);
+ ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other,
+ Ptr, RegZ);
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
break;
default:
@@ -441,7 +440,7 @@ template <> bool AVRDAGToDAGISel::select<AVRISD::CALL>(SDNode *N) {
Ops.push_back(Chain.getValue(1));
SDNode *ResNode =
- CurDAG->getMachineNode(AVR::ICALL, DL, MVT::Other, MVT::Glue, Ops);
+ CurDAG->getMachineNode(AVR::ICALL, DL, MVT::Other, MVT::Glue, Ops);
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
@@ -532,16 +531,23 @@ bool AVRDAGToDAGISel::trySelect(SDNode *N) {
switch (Opcode) {
// Nodes we fully handle.
- case ISD::FrameIndex: return select<ISD::FrameIndex>(N);
- case ISD::BRIND: return select<ISD::BRIND>(N);
+ case ISD::FrameIndex:
+ return select<ISD::FrameIndex>(N);
+ case ISD::BRIND:
+ return select<ISD::BRIND>(N);
case ISD::UMUL_LOHI:
- case ISD::SMUL_LOHI: return selectMultiplication(N);
+ case ISD::SMUL_LOHI:
+ return selectMultiplication(N);
// Nodes we handle partially. Other cases are autogenerated
- case ISD::STORE: return select<ISD::STORE>(N);
- case ISD::LOAD: return select<ISD::LOAD>(N);
- case AVRISD::CALL: return select<AVRISD::CALL>(N);
- default: return false;
+ case ISD::STORE:
+ return select<ISD::STORE>(N);
+ case ISD::LOAD:
+ return select<ISD::LOAD>(N);
+ case AVRISD::CALL:
+ return select<AVRISD::CALL>(N);
+ default:
+ return false;
}
}
@@ -551,4 +557,3 @@ FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 58a7aed91cdf..a6f2afb87102 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -13,8 +13,8 @@
#include "AVRISelLowering.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -232,8 +232,8 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
}
const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
-#define NODE(name) \
- case AVRISD::name: \
+#define NODE(name) \
+ case AVRISD::name: \
return #name
switch (Opcode) {
@@ -269,7 +269,7 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
}
SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
- //:TODO: this function has to be completely rewritten to produce optimal
+ //: TODO: this function has to be completely rewritten to produce optimal
// code, for now it's producing very long but correct code.
unsigned Opc8;
const SDNode *N = Op.getNode();
@@ -527,7 +527,8 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
"LHS and RHS have different types");
assert(((LHS.getSimpleValueType() == MVT::i16) ||
- (LHS.getSimpleValueType() == MVT::i8)) && "invalid comparison type");
+ (LHS.getSimpleValueType() == MVT::i8)) &&
+ "invalid comparison type");
SDValue Cmp;
@@ -856,7 +857,8 @@ void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
/// by AM is legal for this target, for a load/store of the specified type.
bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS, Instruction *I) const {
+ unsigned AS,
+ Instruction *I) const {
int64_t Offs = AM.BaseOffs;
// Allow absolute addresses.
@@ -1003,14 +1005,13 @@ static const MCPhysReg RegList8[] = {
AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8};
static const MCPhysReg RegList16[] = {
- AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22,
- AVR::R22R21, AVR::R21R20, AVR::R20R19, AVR::R19R18,
- AVR::R18R17, AVR::R17R16, AVR::R16R15, AVR::R15R14,
- AVR::R14R13, AVR::R13R12, AVR::R12R11, AVR::R11R10,
- AVR::R10R9, AVR::R9R8};
+ AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
+ AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
+ AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
+ AVR::R11R10, AVR::R10R9, AVR::R9R8};
static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
- "8-bit and 16-bit register arrays must be of equal length");
+ "8-bit and 16-bit register arrays must be of equal length");
/// Analyze incoming and outgoing function arguments. We need custom C++ code
/// to handle special constraints in the ABI.
@@ -1084,10 +1085,11 @@ analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
/// Count the total number of bytes needed to pass or return these arguments.
template <typename ArgT>
-static unsigned getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
+static unsigned
+getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
unsigned TotalBytes = 0;
- for (const ArgT& Arg : Args) {
+ for (const ArgT &Arg : Args) {
TotalBytes += Arg.VT.getStoreSize();
}
return TotalBytes;
@@ -1102,7 +1104,8 @@ static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
unsigned NumArgs = Args.size();
unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
// CanLowerReturn() guarantees this assertion.
- assert(TotalBytes <= 8 && "return values greater than 8 bytes cannot be lowered");
+ assert(TotalBytes <= 8 &&
+ "return values greater than 8 bytes cannot be lowered");
// GCC-ABI says that the size is rounded up to the next even number,
// but actually once it is more than 4 it will always round up to 8.
@@ -1197,7 +1200,7 @@ SDValue AVRTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else {
- // Sanity check.
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
EVT LocVT = VA.getLocVT();
@@ -1406,8 +1409,8 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
///
SDValue AVRTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1488,17 +1491,14 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Don't emit the ret/reti instruction when the naked attribute is present in
// the function being compiled.
- if (MF.getFunction().getAttributes().hasAttribute(
- AttributeList::FunctionIndex, Attribute::Naked)) {
+ if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {
return Chain;
}
const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
unsigned RetOpc =
- AFI->isInterruptOrSignalHandler()
- ? AVRISD::RETI_FLAG
- : AVRISD::RET_FLAG;
+ AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_FLAG : AVRISD::RET_FLAG;
RetOps[0] = Chain; // Update chain.
@@ -1572,8 +1572,10 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator I;
- for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I);
- if (I != F->end()) ++I;
+ for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)
+ ;
+ if (I != F->end())
+ ++I;
// Create loop block.
MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -1636,8 +1638,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
.addReg(ShiftReg2)
.addMBB(LoopBB);
- BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2)
- .addReg(ShiftAmtReg);
+ BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);
BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
@@ -1725,8 +1726,10 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator I;
- for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I);
- if (I != MF->end()) ++I;
+ for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)
+ ;
+ if (I != MF->end())
+ ++I;
MF->insert(I, trueMBB);
MF->insert(I, falseMBB);
@@ -1748,11 +1751,12 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
falseMBB->addSuccessor(trueMBB);
// Set up the Phi node to determine where we came from
- BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI), MI.getOperand(0).getReg())
- .addReg(MI.getOperand(1).getReg())
- .addMBB(MBB)
- .addReg(MI.getOperand(2).getReg())
- .addMBB(falseMBB) ;
+ BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),
+ MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addMBB(MBB)
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(falseMBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return trueMBB;
@@ -1779,9 +1783,12 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
case 'w': // Special upper register pairs
return C_RegisterClass;
case 't': // Temporary register
- case 'x': case 'X': // Pointer register pair X
- case 'y': case 'Y': // Pointer register pair Y
- case 'z': case 'Z': // Pointer register pair Z
+ case 'x':
+ case 'X': // Pointer register pair X
+ case 'y':
+ case 'Y': // Pointer register pair Y
+ case 'z':
+ case 'Z': // Pointer register pair Z
return C_Register;
case 'Q': // A memory address based on Y or Z pointer with displacement.
return C_Memory;
@@ -1842,9 +1849,12 @@ AVRTargetLowering::getSingleConstraintMatchWeight(
case 'q':
case 't':
case 'w':
- case 'x': case 'X':
- case 'y': case 'Y':
- case 'z': case 'Z':
+ case 'x':
+ case 'X':
+ case 'y':
+ case 'Y':
+ case 'z':
+ case 'Z':
weight = CW_SpecificReg;
break;
case 'G':
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
index 8130cf045fa8..3ae036b66bcb 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -133,11 +133,11 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- Register getRegisterByName(const char* RegName, LLT VT,
+ Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
- bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
- const override {
+ bool shouldSplitFunctionArgumentsAsLittleEndian(
+ const DataLayout &DL) const override {
return false;
}
@@ -179,7 +179,6 @@ private:
SmallVectorImpl<SDValue> &InVals) const;
protected:
-
const AVRSubtarget &Subtarget;
private:
diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td b/llvm/lib/Target/AVR/AVRInstrFormats.td
index 6eb49076efb0..2bcbcdfbf925 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
// A generic AVR instruction.
-class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction
-{
+class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
let Namespace = "AVR";
dag OutOperandList = outs;
@@ -25,8 +25,7 @@ class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction
/// A 16-bit AVR instruction.
class AVRInst16<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst<outs, ins, asmstr, pattern>
-{
+ : AVRInst<outs, ins, asmstr, pattern> {
field bits<16> Inst;
let Size = 2;
@@ -34,8 +33,7 @@ class AVRInst16<dag outs, dag ins, string asmstr, list<dag> pattern>
/// a 32-bit AVR instruction.
class AVRInst32<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst<outs, ins, asmstr, pattern>
-{
+ : AVRInst<outs, ins, asmstr, pattern> {
field bits<32> Inst;
let Size = 4;
@@ -50,8 +48,7 @@ class AVRInst32<dag outs, dag ins, string asmstr, list<dag> pattern>
// is defined as a pseudo instruction. In AVRExpandPseudoInsts.cpp,
// the instruction is then replaced by two add instructions - one for each byte.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
let Pattern = pattern;
let isPseudo = 1;
@@ -67,28 +64,26 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FRdRr<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern>
-{
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
bits<5> rr;
- let Inst{15-12} = opcode;
- let Inst{11-10} = f;
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 10} = f;
let Inst{9} = rr{4};
- let Inst{8-4} = rd;
- let Inst{3-0} = rr{3-0};
+ let Inst{8 - 4} = rd;
+ let Inst{3 - 0} = rr{3 - 0};
}
class FTST<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern>
-{
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
- let Inst{15-12} = opcode;
- let Inst{11-10} = f;
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 10} = f;
let Inst{9} = rd{4};
- let Inst{8-4} = rd;
- let Inst{3-0} = rd{3-0};
+ let Inst{8 - 4} = rd;
+ let Inst{3 - 0} = rd{3 - 0};
}
//===----------------------------------------------------------------------===//
@@ -96,19 +91,18 @@ class FTST<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
// <|1001|001r|rrrr|0ttt>
//===----------------------------------------------------------------------===//
class FZRd<bits<3> t, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-9} = 0b001;
+ let Inst{11 - 9} = 0b001;
let Inst{8} = rd{4};
- let Inst{7-4} = rd{3-0};
+ let Inst{7 - 4} = rd{3 - 0};
let Inst{3} = 0;
- let Inst{2-0} = t;
+ let Inst{2 - 0} = t;
}
//===----------------------------------------------------------------------===//
@@ -119,15 +113,14 @@ class FZRd<bits<3> t, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts r16-r31)
//===----------------------------------------------------------------------===//
class FRdK<bits<4> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<4> rd;
bits<8> k;
- let Inst{15-12} = opcode;
- let Inst{11-8} = k{7-4};
- let Inst{7-4} = rd{3-0};
- let Inst{3-0} = k{3-0};
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 8} = k{7 - 4};
+ let Inst{7 - 4} = rd{3 - 0};
+ let Inst{3 - 0} = k{3 - 0};
let isAsCheapAsAMove = 1;
}
@@ -140,14 +133,13 @@ class FRdK<bits<4> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FRd<bits<4> opcode, bits<7> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern>
-{
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> d;
- let Inst{15-12} = opcode;
- let Inst{11-9} = f{6-4};
- let Inst{8-4} = d;
- let Inst{3-0} = f{3-0};
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 9} = f{6 - 4};
+ let Inst{8 - 4} = d;
+ let Inst{3 - 0} = f{3 - 0};
let DecoderMethod = "decodeFRd";
}
@@ -160,23 +152,22 @@ class FRd<bits<4> opcode, bits<7> f, dag outs, dag ins, string asmstr,
// p = pointer register (1 bit) [1 for Y, 0 for Z]
//===----------------------------------------------------------------------===//
class FSTDLDD<bit type, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<7> memri;
bits<5> reg; // the GP register
- let Inst{15-14} = 0b10;
+ let Inst{15 - 14} = 0b10;
let Inst{13} = memri{5};
let Inst{12} = 0;
- let Inst{11-10} = memri{4-3};
+ let Inst{11 - 10} = memri{4 - 3};
let Inst{9} = type;
let Inst{8} = reg{4};
- let Inst{7-4} = reg{3-0};
+ let Inst{7 - 4} = reg{3 - 0};
let Inst{3} = memri{6};
- let Inst{2-0} = memri{2-0};
+ let Inst{2 - 0} = memri{2 - 0};
}
//===---------------------------------------------------------------------===//
@@ -190,26 +181,24 @@ class FSTDLDD<bit type, dag outs, dag ins, string asmstr, list<dag> pattern>
// Note that the bit labelled 'i' above does not follow a simple pattern,
// so there exists a post encoder method to set it manually.
//===---------------------------------------------------------------------===//
-class FSTLD<bit type, bits<2> mode, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+class FSTLD<bit type, bits<2> mode, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<2> ptrreg;
bits<5> reg;
- let Inst{15-13} = 0b100;
+ let Inst{15 - 13} = 0b100;
// This bit varies depending on the arguments and the mode.
// We have a post encoder method to set this bit manually.
let Inst{12} = 0;
- let Inst{11-10} = 0b00;
+ let Inst{11 - 10} = 0b00;
let Inst{9} = type;
let Inst{8} = reg{4};
- let Inst{7-4} = reg{3-0};
+ let Inst{7 - 4} = reg{3 - 0};
- let Inst{3-2} = ptrreg{1-0};
- let Inst{1-0} = mode{1-0};
+ let Inst{3 - 2} = ptrreg{1 - 0};
+ let Inst{1 - 0} = mode{1 - 0};
let PostEncoderMethod = "loadStorePostEncoder";
}
@@ -223,22 +212,21 @@ class FSTLD<bit type, bits<2> mode, dag outs, dag ins,
// p = is postincrement
//===---------------------------------------------------------------------===//
class FLPMX<bit e, bit p, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
- bits<5> reg;
+ : AVRInst16<outs, ins, asmstr, pattern> {
+ bits<5> reg;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-9} = 0b000;
- let Inst{8} = reg{4};
+ let Inst{11 - 9} = 0b000;
+ let Inst{8} = reg{4};
- let Inst{7-4} = reg{3-0};
+ let Inst{7 - 4} = reg{3 - 0};
- let Inst{3-2} = 0b01;
- let Inst{1} = e;
- let Inst{0} = p;
+ let Inst{3 - 2} = 0b01;
+ let Inst{1} = e;
+ let Inst{0} = p;
- let DecoderMethod = "decodeFLPMX";
+ let DecoderMethod = "decodeFLPMX";
}
//===----------------------------------------------------------------------===//
@@ -248,14 +236,13 @@ class FLPMX<bit e, bit p, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts even registers)
//===----------------------------------------------------------------------===//
class FMOVWRdRr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> d;
bits<5> r;
- let Inst{15-8} = 0b00000001;
- let Inst{7-4} = d{4-1};
- let Inst{3-0} = r{4-1};
+ let Inst{15 - 8} = 0b00000001;
+ let Inst{7 - 4} = d{4 - 1};
+ let Inst{3 - 0} = r{4 - 1};
let DecoderMethod = "decodeFMOVWRdRr";
}
@@ -267,15 +254,14 @@ class FMOVWRdRr<dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts r16-r31)
//===----------------------------------------------------------------------===//
class FMUL2RdRr<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
- bits<5> rd; // accept 5 bits but only encode the lower 4
- bits<5> rr; // accept 5 bits but only encode the lower 4
+ : AVRInst16<outs, ins, asmstr, pattern> {
+ bits<5> rd; // accept 5 bits but only encode the lower 4
+ bits<5> rr; // accept 5 bits but only encode the lower 4
- let Inst{15-9} = 0b0000001;
+ let Inst{15 - 9} = 0b0000001;
let Inst{8} = f;
- let Inst{7-4} = rd{3-0};
- let Inst{3-0} = rr{3-0};
+ let Inst{7 - 4} = rd{3 - 0};
+ let Inst{3 - 0} = rr{3 - 0};
let DecoderMethod = "decodeFMUL2RdRr";
}
@@ -291,21 +277,19 @@ class FMUL2RdRr<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
// ddd = destination register
// rrr = source register
class FFMULRdRr<bits<2> f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<3> rd;
bits<3> rr;
- let Inst{15-8} = 0b00000011;
+ let Inst{15 - 8} = 0b00000011;
let Inst{7} = f{1};
- let Inst{6-4} = rd;
+ let Inst{6 - 4} = rd;
let Inst{3} = f{0};
- let Inst{2-0} = rr;
+ let Inst{2 - 0} = rr;
let DecoderMethod = "decodeFFMULRdRr";
}
-
//===----------------------------------------------------------------------===//
// Arithmetic word instructions (ADIW / SBIW): <|1001|011f|kkdd|kkkk|>
// f = secondary opcode = 1 bit
@@ -314,16 +298,15 @@ class FFMULRdRr<bits<2> f, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts r25:24 r27:26 r29:28 r31:30)
//===----------------------------------------------------------------------===//
class FWRdK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
- bits<5> dst; // accept 5 bits but only encode bits 1 and 2
+ : AVRInst16<outs, ins, asmstr, pattern> {
+ bits<5> dst; // accept 5 bits but only encode bits 1 and 2
bits<6> k;
- let Inst{15-9} = 0b1001011;
+ let Inst{15 - 9} = 0b1001011;
let Inst{8} = f;
- let Inst{7-6} = k{5-4};
- let Inst{5-4} = dst{2-1};
- let Inst{3-0} = k{3-0};
+ let Inst{7 - 6} = k{5 - 4};
+ let Inst{5 - 4} = dst{2 - 1};
+ let Inst{3 - 0} = k{3 - 0};
let DecoderMethod = "decodeFWRdK";
}
@@ -335,15 +318,14 @@ class FWRdK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FIORdA<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> d;
bits<6> A;
- let Inst{15-11} = 0b10110;
- let Inst{10-9} = A{5-4};
- let Inst{8-4} = d;
- let Inst{3-0} = A{3-0};
+ let Inst{15 - 11} = 0b10110;
+ let Inst{10 - 9} = A{5 - 4};
+ let Inst{8 - 4} = d;
+ let Inst{3 - 0} = A{3 - 0};
let DecoderMethod = "decodeFIORdA";
}
@@ -355,15 +337,14 @@ class FIORdA<dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FIOARr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<6> A;
bits<5> r;
- let Inst{15-11} = 0b10111;
- let Inst{10-9} = A{5-4};
- let Inst{8-4} = r;
- let Inst{3-0} = A{3-0};
+ let Inst{15 - 11} = 0b10111;
+ let Inst{10 - 9} = A{5 - 4};
+ let Inst{8 - 4} = r;
+ let Inst{3 - 0} = A{3 - 0};
let DecoderMethod = "decodeFIOARr";
}
@@ -376,20 +357,19 @@ class FIOARr<dag outs, dag ins, string asmstr, list<dag> pattern>
// b = bit number
//===----------------------------------------------------------------------===//
class FIOBIT<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> A;
bits<3> b;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-10} = 0b10;
- let Inst{9-8} = t;
+ let Inst{11 - 10} = 0b10;
+ let Inst{9 - 8} = t;
- let Inst{7-4} = A{4-1};
+ let Inst{7 - 4} = A{4 - 1};
let Inst{3} = A{0};
- let Inst{2-0} = b{2-0};
+ let Inst{2 - 0} = b{2 - 0};
let DecoderMethod = "decodeFIOBIT";
}
@@ -402,21 +382,20 @@ class FIOBIT<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
// b = bit
//===----------------------------------------------------------------------===//
class FRdB<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
bits<3> b;
- let Inst{15-12} = 0b1111;
+ let Inst{15 - 12} = 0b1111;
let Inst{11} = 0b1;
- let Inst{10-9} = t;
+ let Inst{10 - 9} = t;
let Inst{8} = rd{4};
- let Inst{7-4} = rd{3-0};
+ let Inst{7 - 4} = rd{3 - 0};
let Inst{3} = 0;
- let Inst{2-0} = b;
+ let Inst{2 - 0} = b;
}
// Special encoding for the `DES K` instruction.
@@ -425,17 +404,16 @@ class FRdB<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
//
// KKKK = 4 bit immediate
class FDES<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<4> k;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-8} = 0b0100;
+ let Inst{11 - 8} = 0b0100;
- let Inst{7-4} = k;
+ let Inst{7 - 4} = k;
- let Inst{3-0} = 0b1011;
+ let Inst{3 - 0} = 0b1011;
}
//===----------------------------------------------------------------------===//
@@ -444,15 +422,14 @@ class FDES<dag outs, dag ins, string asmstr, list<dag> pattern>
// k = constant address = 7 bits
// s = bit in status register = 3 bits
//===----------------------------------------------------------------------===//
-class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<7> k;
- let Inst{15-11} = 0b11110;
+ let Inst{15 - 11} = 0b11110;
let Inst{10} = f;
- let Inst{9-3} = k;
- let Inst{2-0} = s;
+ let Inst{9 - 3} = k;
+ let Inst{2 - 0} = s;
}
//===----------------------------------------------------------------------===//
@@ -460,14 +437,12 @@ class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr, list<dag> patter
//===----------------------------------------------------------------------===//
class F16<bits<16> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
let Inst = opcode;
}
class F32<bits<32> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern>
-{
+ : AVRInst32<outs, ins, asmstr, pattern> {
let Inst = opcode;
}
@@ -477,13 +452,12 @@ class F32<bits<32> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
// k = constant address = 12 bits
//===----------------------------------------------------------------------===//
class FBRk<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<12> k;
- let Inst{15-13} = 0b110;
+ let Inst{15 - 13} = 0b110;
let Inst{12} = f;
- let Inst{11-0} = k;
+ let Inst{11 - 0} = k;
}
//===----------------------------------------------------------------------===//
@@ -492,14 +466,13 @@ class FBRk<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
// k = constant address = 22 bits
//===----------------------------------------------------------------------===//
class F32BRk<bits<3> f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern>
-{
+ : AVRInst32<outs, ins, asmstr, pattern> {
bits<22> k;
- let Inst{31-25} = 0b1001010;
- let Inst{24-20} = k{21-17};
- let Inst{19-17} = f;
- let Inst{16-0} = k{16-0};
+ let Inst{31 - 25} = 0b1001010;
+ let Inst{24 - 20} = k{21 - 17};
+ let Inst{19 - 17} = f;
+ let Inst{16 - 0} = k{16 - 0};
}
//===----------------------------------------------------------------------===//
@@ -510,38 +483,36 @@ class F32BRk<bits<3> f, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class F32DM<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern>
-{
+ : AVRInst32<outs, ins, asmstr, pattern> {
bits<5> rd;
bits<16> k;
- let Inst{31-28} = 0b1001;
+ let Inst{31 - 28} = 0b1001;
- let Inst{27-26} = 0b00;
+ let Inst{27 - 26} = 0b00;
let Inst{25} = f;
let Inst{24} = rd{4};
- let Inst{23-20} = rd{3-0};
+ let Inst{23 - 20} = rd{3 - 0};
- let Inst{19-16} = 0b0000;
+ let Inst{19 - 16} = 0b0000;
- let Inst{15-0} = k;
+ let Inst{15 - 0} = k;
}
// <|1001|0100|bfff|1000>
class FS<bit b, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<3> s;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-8} = 0b0100;
+ let Inst{11 - 8} = 0b0100;
let Inst{7} = b;
- let Inst{6-4} = s;
+ let Inst{6 - 4} = s;
- let Inst{3-0} = 0b1000;
+ let Inst{3 - 0} = 0b1000;
}
// Set/clr bit in status flag instructions/
@@ -549,48 +520,42 @@ class FS<bit b, dag outs, dag ins, string asmstr, list<dag> pattern>
// ---------------------
// <|1111|0fkk|kkkk|ksss>
class FSK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<7> k;
bits<3> s;
- let Inst{15-12} = 0b1111;
+ let Inst{15 - 12} = 0b1111;
let Inst{11} = 0;
let Inst{10} = f;
- let Inst{9-8} = k{6-5};
+ let Inst{9 - 8} = k{6 - 5};
- let Inst{7-4} = k{4-1};
+ let Inst{7 - 4} = k{4 - 1};
let Inst{3} = k{0};
- let Inst{2-0} = s;
+ let Inst{2 - 0} = s;
}
class ExtensionPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let Defs = [SREG];
}
class StorePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let Defs = [SP];
}
class SelectPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let usesCustomInserter = 1;
let Uses = [SREG];
}
class ShiftPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let usesCustomInserter = 1;
let Defs = [SREG];
}
-
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 06f07696bde3..798d08393eae 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -20,9 +20,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "AVR.h"
#include "AVRMachineFunctionInfo.h"
@@ -55,13 +55,13 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Register DestLo, DestHi, SrcLo, SrcHi;
TRI.splitReg(DestReg, DestLo, DestHi);
- TRI.splitReg(SrcReg, SrcLo, SrcHi);
+ TRI.splitReg(SrcReg, SrcLo, SrcHi);
// Copy each individual register with the `MOV` instruction.
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
- .addReg(SrcLo, getKillRegState(KillSrc));
+ .addReg(SrcLo, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
- .addReg(SrcHi, getKillRegState(KillSrc));
+ .addReg(SrcHi, getKillRegState(KillSrc));
}
} else {
if (AVR::GPR8RegClass.contains(DestReg, SrcReg)) {
@@ -83,7 +83,7 @@ unsigned AVRInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
case AVR::LDDRdPtrQ:
- case AVR::LDDWRdYQ: { //:FIXME: remove this once PR13375 gets fixed
+ case AVR::LDDWRdYQ: { //: FIXME: remove this once PR13375 gets fixed
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
@@ -179,7 +179,7 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = AVR::LDDRdPtrQ;
} else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) {
// Opcode = AVR::LDDWRdPtrQ;
- //:FIXME: remove this once PR13375 gets fixed
+ //: FIXME: remove this once PR13375 gets fixed
Opcode = AVR::LDDWRdYQ;
} else {
llvm_unreachable("Cannot load this register from a stack slot!");
@@ -289,7 +289,7 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// Handle unconditional branches.
- //:TODO: add here jmp
+ //: TODO: add here jmp
if (I->getOpcode() == AVR::RJMPk) {
UnCondBrIter = I;
@@ -399,9 +399,9 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
- if (BytesAdded) *BytesAdded = 0;
+ const DebugLoc &DL, int *BytesAdded) const {
+ if (BytesAdded)
+ *BytesAdded = 0;
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
@@ -421,13 +421,15 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
AVRCC::CondCodes CC = (AVRCC::CondCodes)Cond[0].getImm();
auto &CondMI = *BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
- if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(CondMI);
++Count;
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB);
- if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(MI);
++Count;
}
@@ -436,7 +438,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- if (BytesRemoved) *BytesRemoved = 0;
+ if (BytesRemoved)
+ *BytesRemoved = 0;
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
@@ -446,7 +449,7 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
if (I->isDebugInstr()) {
continue;
}
- //:TODO: add here the missing jmp instructions once they are implemented
+ //: TODO: add here the missing jmp instructions once they are implemented
// like jmp, {e}ijmp, and other cond branches, ...
if (I->getOpcode() != AVR::RJMPk &&
getCondFromBranchOpc(I->getOpcode()) == AVRCC::COND_INVALID) {
@@ -454,7 +457,8 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
}
// Remove the branch.
- if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I);
+ if (BytesRemoved)
+ *BytesRemoved += getInstSizeInBytes(*I);
I->eraseFromParent();
I = MBB.end();
++Count;
@@ -490,7 +494,8 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR: {
const MachineFunction &MF = *MI.getParent()->getParent();
- const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
+ const AVRTargetMachine &TM =
+ static_cast<const AVRTargetMachine &>(MF.getTarget());
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
@@ -555,20 +560,19 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
}
-unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const {
- // This method inserts a *direct* branch (JMP), despite its name.
- // LLVM calls this method to fixup unconditional branches; it never calls
- // insertBranch or some hypothetical "insertDirectBranch".
- // See lib/CodeGen/RegisterRelaxation.cpp for details.
- // We end up here when a jump is too long for a RJMP instruction.
- auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
-
- return getInstSizeInBytes(MI);
+void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset,
+ RegScavenger *RS) const {
+ // This method inserts a *direct* branch (JMP), despite its name.
+ // LLVM calls this method to fixup unconditional branches; it never calls
+ // insertBranch or some hypothetical "insertDirectBranch".
+ // See lib/CodeGen/RegisterRelaxation.cpp for details.
+ // We end up here when a jump is too long for a RJMP instruction.
+ BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
+
+ return;
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h
index 11f45865de54..6d0596642fa1 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -107,11 +107,11 @@ public:
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
- unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
+
private:
const AVRRegisterInfo RI;
};
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index c7c9656d3bfb..c7f423292da0 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -20,12 +20,13 @@ def SDT_AVRCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDT_AVRCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDT_AVRCall : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_AVRWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-def SDT_AVRBrcond : SDTypeProfile<0, 2,
- [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>;
+def SDT_AVRBrcond
+ : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>;
def SDT_AVRCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_AVRTst : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_AVRSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDT_AVRSelectCC
+ : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
//===----------------------------------------------------------------------===//
// AVR Specific Node Definitions
@@ -46,12 +47,12 @@ def AVRcall : SDNode<"AVRISD::CALL", SDT_AVRCall,
def AVRWrapper : SDNode<"AVRISD::WRAPPER", SDT_AVRWrapper>;
-def AVRbrcond : SDNode<"AVRISD::BRCOND", SDT_AVRBrcond,
- [SDNPHasChain, SDNPInGlue]>;
+def AVRbrcond
+ : SDNode<"AVRISD::BRCOND", SDT_AVRBrcond, [SDNPHasChain, SDNPInGlue]>;
def AVRcmp : SDNode<"AVRISD::CMP", SDT_AVRCmp, [SDNPOutGlue]>;
def AVRcmpc : SDNode<"AVRISD::CMPC", SDT_AVRCmp, [SDNPInGlue, SDNPOutGlue]>;
def AVRtst : SDNode<"AVRISD::TST", SDT_AVRTst, [SDNPOutGlue]>;
-def AVRselectcc: SDNode<"AVRISD::SELECT_CC", SDT_AVRSelectCC, [SDNPInGlue]>;
+def AVRselectcc : SDNode<"AVRISD::SELECT_CC", SDT_AVRSelectCC, [SDNPInGlue]>;
// Shift nodes.
def AVRlsl : SDNode<"AVRISD::LSL", SDTIntUnaryOp>;
@@ -80,29 +81,31 @@ def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>;
// AVR Operands, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
-def imm8_neg_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(-N->getAPIntValue(), SDLoc(N), MVT::i8);
-}]>;
+def imm8_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ -N->getAPIntValue(), SDLoc(N), MVT::i8);
+ }]>;
-def imm16_neg_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(-N->getAPIntValue(), SDLoc(N), MVT::i16);
-}]>;
+def imm16_neg_XFORM
+ : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getAPIntValue(),
+ SDLoc(N), MVT::i16);
+ }]>;
-def imm0_63_neg : PatLeaf<(imm),
-[{
- int64_t val = -N->getSExtValue();
- return val >= 0 && val < 64;
-}], imm16_neg_XFORM>;
+def imm0_63_neg : PatLeaf<(imm), [{
+ int64_t val = -N->getSExtValue();
+ return val >= 0 && val < 64;
+ }],
+ imm16_neg_XFORM>;
def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
// imm_com8_XFORM - Return the complement of a imm_com8 value
-def imm_com8_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(~((uint8_t)N->getZExtValue()), SDLoc(N),
- MVT::i8);
-}]>;
+def imm_com8_XFORM
+ : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ ~((uint8_t) N->getZExtValue()), SDLoc(N), MVT::i8);
+ }]>;
// imm_com8 - Match an immediate that is a complement
// of a 8-bit immediate.
@@ -110,59 +113,55 @@ def imm_com8_XFORM : SDNodeXForm<imm, [{
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
// is handled by the destination instructions, which use imm_com8.
def imm_com8_asmoperand : AsmOperandClass { let Name = "ImmCom8"; }
-def imm_com8 : Operand<i8> {
- let ParserMatchClass = imm_com8_asmoperand;
-}
-
-def ioaddr_XFORM : SDNodeXForm<imm,
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - offset,
- SDLoc(N), MVT::i8);
-}]>;
-
-def iobitpos8_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(Log2_32(uint8_t(N->getZExtValue())),
- SDLoc(N), MVT::i8);
-}]>;
-
-def iobitposn8_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(Log2_32(uint8_t(~N->getZExtValue())),
- SDLoc(N), MVT::i8);
-}]>;
-
-def ioaddr8 : PatLeaf<(imm),
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- uint64_t val = N->getZExtValue() - offset;
- return val < 0x40;
-}], ioaddr_XFORM>;
-
-def lowioaddr8 : PatLeaf<(imm),
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- uint64_t val = N->getZExtValue() - offset;
- return val < 0x20;
-}], ioaddr_XFORM>;
-
-def ioaddr16 : PatLeaf<(imm),
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- uint64_t val = N->getZExtValue() - offset;
- return val < 0x3f;
-}], ioaddr_XFORM>;
-
-def iobitpos8 : PatLeaf<(imm),
-[{
- return isPowerOf2_32(uint8_t(N->getZExtValue()));
-}], iobitpos8_XFORM>;
-
-def iobitposn8 : PatLeaf<(imm),
-[{
- return isPowerOf2_32(uint8_t(~N->getZExtValue()));
-}], iobitposn8_XFORM>;
+def imm_com8 : Operand<i8> { let ParserMatchClass = imm_com8_asmoperand; }
+
+def ioaddr_XFORM
+ : SDNodeXForm<imm, [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ return CurDAG->getTargetConstant(
+ uint8_t(N->getZExtValue()) - offset, SDLoc(N), MVT::i8);
+ }]>;
+
+def iobitpos8_XFORM
+ : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ Log2_32(uint8_t(N->getZExtValue())), SDLoc(N), MVT::i8);
+ }]>;
+
+def iobitposn8_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ Log2_32(uint8_t(~N->getZExtValue())),
+ SDLoc(N), MVT::i8);
+ }]>;
+
+def ioaddr8 : PatLeaf<(imm), [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x40;
+ }],
+ ioaddr_XFORM>;
+
+def lowioaddr8 : PatLeaf<(imm), [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x20;
+ }],
+ ioaddr_XFORM>;
+
+def ioaddr16 : PatLeaf<(imm), [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x3f;
+ }],
+ ioaddr_XFORM>;
+
+def iobitpos8
+ : PatLeaf<(imm), [{ return isPowerOf2_32(uint8_t(N->getZExtValue())); }],
+ iobitpos8_XFORM>;
+
+def iobitposn8
+ : PatLeaf<(imm), [{ return isPowerOf2_32(uint8_t(~N->getZExtValue())); }],
+ iobitposn8_XFORM>;
def MemriAsmOperand : AsmOperandClass {
let Name = "Memri";
@@ -170,8 +169,7 @@ def MemriAsmOperand : AsmOperandClass {
}
/// Address operand for `reg+imm` used by STD and LDD.
-def memri : Operand<iPTR>
-{
+def memri : Operand<iPTR> {
let MIOperandInfo = (ops PTRDISPREGS, i16imm);
let PrintMethod = "printMemri";
@@ -181,60 +179,47 @@ def memri : Operand<iPTR>
}
// Address operand for `SP+imm` used by STD{W}SPQRr
-def memspi : Operand<iPTR>
-{
- let MIOperandInfo = (ops GPRSP, i16imm);
-}
+def memspi : Operand<iPTR> { let MIOperandInfo = (ops GPRSP, i16imm); }
-def relbrtarget_7 : Operand<OtherVT>
-{
- let PrintMethod = "printPCRelImm";
- let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_7_pcrel>";
+def relbrtarget_7 : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImm";
+ let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_7_pcrel>";
}
-def brtarget_13 : Operand<OtherVT>
-{
- let PrintMethod = "printPCRelImm";
- let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_13_pcrel>";
+def brtarget_13 : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImm";
+ let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_13_pcrel>";
}
// The target of a 22 or 16-bit call/jmp instruction.
-def call_target : Operand<iPTR>
-{
- let EncoderMethod = "encodeCallTarget";
- let DecoderMethod = "decodeCallTarget";
+def call_target : Operand<iPTR> {
+ let EncoderMethod = "encodeCallTarget";
+ let DecoderMethod = "decodeCallTarget";
}
// A 16-bit address (which can lead to an R_AVR_16 relocation).
-def imm16 : Operand<i16>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_16, 2>";
-}
+def imm16 : Operand<i16> { let EncoderMethod = "encodeImm<AVR::fixup_16, 2>"; }
/// A 6-bit immediate used in the ADIW/SBIW instructions.
-def imm_arith6 : Operand<i16>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_6_adiw, 0>";
+def imm_arith6 : Operand<i16> {
+ let EncoderMethod = "encodeImm<AVR::fixup_6_adiw, 0>";
}
/// An 8-bit immediate inside an instruction with the same format
/// as the `LDI` instruction (the `FRdK` format).
-def imm_ldi8 : Operand<i8>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_ldi, 0>";
+def imm_ldi8 : Operand<i8> {
+ let EncoderMethod = "encodeImm<AVR::fixup_ldi, 0>";
}
/// A 5-bit port number used in SBIC and friends (the `FIOBIT` format).
-def imm_port5 : Operand<i8>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_port5, 0>";
+def imm_port5 : Operand<i8> {
+ let EncoderMethod = "encodeImm<AVR::fixup_port5, 0>";
}
/// A 6-bit port number used in the `IN` instruction and friends (the
/// `FIORdA` format.
-def imm_port6 : Operand<i8>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_port6, 0>";
+def imm_port6 : Operand<i8> {
+ let EncoderMethod = "encodeImm<AVR::fixup_port6, 0>";
}
// Addressing mode pattern reg+imm6
@@ -243,91 +228,85 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], [SDNPWantRoot]>;
// AsmOperand class for a pointer register.
// Used with the LD/ST family of instructions.
// See FSTLD in AVRInstrFormats.td
-def PtrRegAsmOperand : AsmOperandClass
-{
- let Name = "Reg";
-}
+def PtrRegAsmOperand : AsmOperandClass { let Name = "Reg"; }
// A special operand type for the LD/ST instructions.
// It converts the pointer register number into a two-bit field used in the
// instruction.
-def LDSTPtrReg : Operand<i16>
-{
- let MIOperandInfo = (ops PTRREGS);
- let EncoderMethod = "encodeLDSTPtrReg";
+def LDSTPtrReg : Operand<i16> {
+ let MIOperandInfo = (ops PTRREGS);
+ let EncoderMethod = "encodeLDSTPtrReg";
- let ParserMatchClass = PtrRegAsmOperand;
+ let ParserMatchClass = PtrRegAsmOperand;
}
// A special operand type for the LDD/STD instructions.
// It behaves identically to the LD/ST version, except restricts
// the pointer registers to Y and Z.
-def LDDSTDPtrReg : Operand<i16>
-{
- let MIOperandInfo = (ops PTRDISPREGS);
- let EncoderMethod = "encodeLDSTPtrReg";
+def LDDSTDPtrReg : Operand<i16> {
+ let MIOperandInfo = (ops PTRDISPREGS);
+ let EncoderMethod = "encodeLDSTPtrReg";
- let ParserMatchClass = PtrRegAsmOperand;
+ let ParserMatchClass = PtrRegAsmOperand;
}
//===----------------------------------------------------------------------===//
// AVR predicates for subtarget features
//===----------------------------------------------------------------------===//
-def HasSRAM : Predicate<"Subtarget->hasSRAM()">,
- AssemblerPredicate<(all_of FeatureSRAM)>;
+def HasSRAM : Predicate<"Subtarget->hasSRAM()">,
+ AssemblerPredicate<(all_of FeatureSRAM)>;
-def HasJMPCALL : Predicate<"Subtarget->hasJMPCALL()">,
- AssemblerPredicate<(all_of FeatureJMPCALL)>;
+def HasJMPCALL : Predicate<"Subtarget->hasJMPCALL()">,
+ AssemblerPredicate<(all_of FeatureJMPCALL)>;
-def HasIJMPCALL : Predicate<"Subtarget->hasIJMPCALL()">,
- AssemblerPredicate<(all_of FeatureIJMPCALL)>;
+def HasIJMPCALL : Predicate<"Subtarget->hasIJMPCALL()">,
+ AssemblerPredicate<(all_of FeatureIJMPCALL)>;
-def HasEIJMPCALL : Predicate<"Subtarget->hasEIJMPCALL()">,
- AssemblerPredicate<(all_of FeatureEIJMPCALL)>;
+def HasEIJMPCALL : Predicate<"Subtarget->hasEIJMPCALL()">,
+ AssemblerPredicate<(all_of FeatureEIJMPCALL)>;
-def HasADDSUBIW : Predicate<"Subtarget->hasADDSUBIW()">,
- AssemblerPredicate<(all_of FeatureADDSUBIW)>;
+def HasADDSUBIW : Predicate<"Subtarget->hasADDSUBIW()">,
+ AssemblerPredicate<(all_of FeatureADDSUBIW)>;
-def HasSmallStack : Predicate<"Subtarget->HasSmallStack()">,
- AssemblerPredicate<(all_of FeatureSmallStack)>;
+def HasSmallStack : Predicate<"Subtarget->HasSmallStack()">,
+ AssemblerPredicate<(all_of FeatureSmallStack)>;
-def HasMOVW : Predicate<"Subtarget->hasMOVW()">,
- AssemblerPredicate<(all_of FeatureMOVW)>;
+def HasMOVW : Predicate<"Subtarget->hasMOVW()">,
+ AssemblerPredicate<(all_of FeatureMOVW)>;
-def HasLPM : Predicate<"Subtarget->hasLPM()">,
- AssemblerPredicate<(all_of FeatureLPM)>;
+def HasLPM : Predicate<"Subtarget->hasLPM()">,
+ AssemblerPredicate<(all_of FeatureLPM)>;
-def HasLPMX : Predicate<"Subtarget->hasLPMX()">,
- AssemblerPredicate<(all_of FeatureLPMX)>;
+def HasLPMX : Predicate<"Subtarget->hasLPMX()">,
+ AssemblerPredicate<(all_of FeatureLPMX)>;
-def HasELPM : Predicate<"Subtarget->hasELPM()">,
- AssemblerPredicate<(all_of FeatureELPM)>;
+def HasELPM : Predicate<"Subtarget->hasELPM()">,
+ AssemblerPredicate<(all_of FeatureELPM)>;
-def HasELPMX : Predicate<"Subtarget->hasELPMX()">,
- AssemblerPredicate<(all_of FeatureELPMX)>;
+def HasELPMX : Predicate<"Subtarget->hasELPMX()">,
+ AssemblerPredicate<(all_of FeatureELPMX)>;
-def HasSPM : Predicate<"Subtarget->hasSPM()">,
- AssemblerPredicate<(all_of FeatureSPM)>;
+def HasSPM : Predicate<"Subtarget->hasSPM()">,
+ AssemblerPredicate<(all_of FeatureSPM)>;
-def HasSPMX : Predicate<"Subtarget->hasSPMX()">,
- AssemblerPredicate<(all_of FeatureSPMX)>;
+def HasSPMX : Predicate<"Subtarget->hasSPMX()">,
+ AssemblerPredicate<(all_of FeatureSPMX)>;
-def HasDES : Predicate<"Subtarget->hasDES()">,
- AssemblerPredicate<(all_of FeatureDES)>;
+def HasDES : Predicate<"Subtarget->hasDES()">,
+ AssemblerPredicate<(all_of FeatureDES)>;
-def SupportsRMW : Predicate<"Subtarget->supportsRMW()">,
- AssemblerPredicate<(all_of FeatureRMW)>;
+def SupportsRMW : Predicate<"Subtarget->supportsRMW()">,
+ AssemblerPredicate<(all_of FeatureRMW)>;
def SupportsMultiplication : Predicate<"Subtarget->supportsMultiplication()">,
- AssemblerPredicate<(all_of FeatureMultiplication)>;
+ AssemblerPredicate<(all_of FeatureMultiplication)>;
-def HasBREAK : Predicate<"Subtarget->hasBREAK()">,
- AssemblerPredicate<(all_of FeatureBREAK)>;
+def HasBREAK : Predicate<"Subtarget->hasBREAK()">,
+ AssemblerPredicate<(all_of FeatureBREAK)>;
def HasTinyEncoding : Predicate<"Subtarget->hasTinyEncoding()">,
- AssemblerPredicate<(all_of FeatureTinyEncoding)>;
-
+ AssemblerPredicate<(all_of FeatureTinyEncoding)>;
// AVR specific condition code. These correspond to AVR_*_COND in
// AVRInstrInfo.td. They must be kept in synch.
@@ -340,7 +319,6 @@ def AVR_COND_LO : PatLeaf<(i8 5)>;
def AVR_COND_MI : PatLeaf<(i8 6)>;
def AVR_COND_PL : PatLeaf<(i8 7)>;
-
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AVR Instruction list
@@ -352,43 +330,49 @@ def AVR_COND_PL : PatLeaf<(i8 7)>;
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber SREG.
-let Defs = [SP, SREG],
-Uses = [SP] in
-{
+let Defs = [SP, SREG], Uses = [SP] in {
def ADJCALLSTACKDOWN : Pseudo<(outs),
- (ins i16imm:$amt, i16imm:$amt2),
- "#ADJCALLSTACKDOWN",
- [(AVRcallseq_start timm:$amt, timm:$amt2)]>;
+ (ins i16imm
+ : $amt, i16imm
+ : $amt2),
+ "#ADJCALLSTACKDOWN", [(AVRcallseq_start timm
+ : $amt, timm
+ : $amt2)]>;
// R31R30 is used to update SP. It is normally free because it is a
// call-clobbered register but it is necessary to set it as a def as the
// register allocator might use it in rare cases (for rematerialization, it
// seems). hasSideEffects needs to be set to true so this instruction isn't
// considered dead.
- let Defs = [R31R30],
- hasSideEffects=1 in
- def ADJCALLSTACKUP : Pseudo<(outs),
- (ins i16imm:$amt1, i16imm:$amt2),
- "#ADJCALLSTACKUP",
- [(AVRcallseq_end timm:$amt1, timm:$amt2)]>;
+ let Defs = [R31R30], hasSideEffects = 1 in def ADJCALLSTACKUP
+ : Pseudo<(outs),
+ (ins i16imm
+ : $amt1, i16imm
+ : $amt2),
+ "#ADJCALLSTACKUP", [(AVRcallseq_end timm
+ : $amt1, timm
+ : $amt2)]>;
}
//===----------------------------------------------------------------------===//
// Addition
//===----------------------------------------------------------------------===//
-let isCommutable = 1,
-Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let isCommutable = 1, Constraints = "$src = $rd", Defs = [SREG] in {
// ADD Rd, Rr
// Adds two 8-bit registers.
- def ADDRdRr : FRdRr<0b0000,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "add\t$rd, $rr",
- [(set i8:$rd, (add i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def ADDRdRr
+ : FRdRr<0b0000, 0b11,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "add\t$rd, $rr",
+ [(set i8
+ : $rd, (add i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ADDW Rd+1:Rd, Rr+1:Rr
// Pseudo instruction to add four 8-bit registers as two 16-bit values.
@@ -396,22 +380,34 @@ Defs = [SREG] in
// Expands to:
// add Rd, Rr
// adc Rd+1, Rr+1
- def ADDWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "addw\t$rd, $rr",
- [(set i16:$rd, (add i16:$src, i16:$rr)),
- (implicit SREG)]>;
+ def ADDWRdRr
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "addw\t$rd, $rr",
+ [(set i16
+ : $rd, (add i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)]>;
// ADC Rd, Rr
// Adds two 8-bit registers with carry.
- let Uses = [SREG] in
- def ADCRdRr : FRdRr<0b0001,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "adc\t$rd, $rr",
- [(set i8:$rd, (adde i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ let Uses = [SREG] in def ADCRdRr
+ : FRdRr<0b0001, 0b11,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "adc\t$rd, $rr",
+ [(set i8
+ : $rd, (adde i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ADCW Rd+1:Rd, Rr+1:Rr
// Pseudo instruction to add four 8-bit registers as two 16-bit values with
@@ -420,39 +416,56 @@ Defs = [SREG] in
// Expands to:
// adc Rd, Rr
// adc Rd+1, Rr+1
- let Uses = [SREG] in
- def ADCWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "adcw\t$rd, $rr",
- [(set i16:$rd, (adde i16:$src, i16:$rr)),
- (implicit SREG)]>;
+ let Uses = [SREG] in def ADCWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "adcw\t$rd, $rr", [
+ (set i16
+ : $rd, (adde i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
// AIDW Rd, k
// Adds an immediate 6-bit value K to Rd, placing the result in Rd.
- def ADIWRdK : FWRdK<0b0,
- (outs IWREGS:$rd),
- (ins IWREGS:$src, imm_arith6:$k),
- "adiw\t$rd, $k",
- [(set i16:$rd, (add i16:$src, uimm6:$k)),
- (implicit SREG)]>,
- Requires<[HasADDSUBIW]>;
+ def ADIWRdK
+ : FWRdK<0b0,
+ (outs IWREGS
+ : $rd),
+ (ins IWREGS
+ : $src, imm_arith6
+ : $k),
+ "adiw\t$rd, $k",
+ [(set i16
+ : $rd, (add i16
+ : $src, uimm6
+ : $k)),
+ (implicit SREG)]>,
+ Requires<[HasADDSUBIW]>;
}
//===----------------------------------------------------------------------===//
// Subtraction
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let Constraints = "$src = $rd", Defs = [SREG] in {
// SUB Rd, Rr
// Subtracts the 8-bit value of Rr from Rd and places the value in Rd.
- def SUBRdRr : FRdRr<0b0001,
- 0b10,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "sub\t$rd, $rr",
- [(set i8:$rd, (sub i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def SUBRdRr
+ : FRdRr<0b0001, 0b10,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "sub\t$rd, $rr",
+ [(set i8
+ : $rd, (sub i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// SUBW Rd+1:Rd, Rr+1:Rr
// Subtracts two 16-bit values and places the result into Rd.
@@ -460,295 +473,429 @@ Defs = [SREG] in
// Expands to:
// sub Rd, Rr
// sbc Rd+1, Rr+1
- def SUBWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "subw\t$rd, $rr",
- [(set i16:$rd, (sub i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def SUBIRdK : FRdK<0b0101,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "subi\t$rd, $k",
- [(set i8:$rd, (sub i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def SUBWRdRr
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "subw\t$rd, $rr",
+ [(set i16
+ : $rd, (sub i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)]>;
+
+ def SUBIRdK
+ : FRdK<0b0101,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "subi\t$rd, $k",
+ [(set i8
+ : $rd, (sub i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// SUBIW Rd+1:Rd, K+1:K
//
// Expands to:
// subi Rd, K
// sbci Rd+1, K+1
- def SUBIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$rr),
- "subiw\t$rd, $rr",
- [(set i16:$rd, (sub i16:$src, imm:$rr)),
- (implicit SREG)]>;
-
- def SBIWRdK : FWRdK<0b1,
- (outs IWREGS:$rd),
- (ins IWREGS:$src, imm_arith6:$k),
- "sbiw\t$rd, $k",
- [(set i16:$rd, (sub i16:$src, uimm6:$k)),
- (implicit SREG)]>,
- Requires<[HasADDSUBIW]>;
+ def SUBIWRdK
+ : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $rr),
+ "subiw\t$rd, $rr",
+ [(set i16
+ : $rd, (sub i16
+ : $src, imm
+ : $rr)),
+ (implicit SREG)]>;
+
+ def SBIWRdK
+ : FWRdK<0b1,
+ (outs IWREGS
+ : $rd),
+ (ins IWREGS
+ : $src, imm_arith6
+ : $k),
+ "sbiw\t$rd, $k",
+ [(set i16
+ : $rd, (sub i16
+ : $src, uimm6
+ : $k)),
+ (implicit SREG)]>,
+ Requires<[HasADDSUBIW]>;
// Subtract with carry operations which must read the carry flag in SREG.
- let Uses = [SREG] in
- {
- def SBCRdRr : FRdRr<0b0000,
- 0b10,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "sbc\t$rd, $rr",
- [(set i8:$rd, (sube i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ let Uses = [SREG] in {
+ def SBCRdRr
+ : FRdRr<0b0000, 0b10,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "sbc\t$rd, $rr",
+ [(set i8
+ : $rd, (sube i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// SBCW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// sbc Rd, Rr
// sbc Rd+1, Rr+1
- def SBCWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "sbcw\t$rd, $rr",
- [(set i16:$rd, (sube i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def SBCIRdK : FRdK<0b0100,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "sbci\t$rd, $k",
- [(set i8:$rd, (sube i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def SBCWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "sbcw\t$rd, $rr", [
+ (set i16
+ : $rd, (sube i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
+
+ def SBCIRdK
+ : FRdK<0b0100,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "sbci\t$rd, $k",
+ [(set i8
+ : $rd, (sube i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// SBCIW Rd+1:Rd, K+1:K
// sbci Rd, K
// sbci Rd+1, K+1
- def SBCIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$rr),
- "sbciw\t$rd, $rr",
- [(set i16:$rd, (sube i16:$src, imm:$rr)),
- (implicit SREG)]>;
+ def SBCIWRdK : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $rr),
+ "sbciw\t$rd, $rr", [
+ (set i16
+ : $rd, (sube i16
+ : $src, imm
+ : $rr)),
+ (implicit SREG)
+ ]>;
}
}
//===----------------------------------------------------------------------===//
// Increment and Decrement
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
- def INCRd : FRd<0b1001,
- 0b0100011,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "inc\t$rd",
- [(set i8:$rd, (add i8:$src, 1)), (implicit SREG)]>;
-
- def DECRd : FRd<0b1001,
- 0b0101010,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "dec\t$rd",
- [(set i8:$rd, (add i8:$src, -1)), (implicit SREG)]>;
+let Constraints = "$src = $rd", Defs = [SREG] in {
+ def INCRd
+ : FRd<0b1001, 0b0100011,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "inc\t$rd", [(set i8
+ : $rd, (add i8
+ : $src, 1)),
+ (implicit SREG)]>;
+
+ def DECRd
+ : FRd<0b1001, 0b0101010,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "dec\t$rd", [(set i8
+ : $rd, (add i8
+ : $src, -1)),
+ (implicit SREG)]>;
}
//===----------------------------------------------------------------------===//
// Multiplication
//===----------------------------------------------------------------------===//
-let isCommutable = 1,
-Defs = [R1, R0, SREG] in
-{
+let isCommutable = 1, Defs = [R1, R0, SREG] in {
// MUL Rd, Rr
// Multiplies Rd by Rr and places the result into R1:R0.
let usesCustomInserter = 1 in {
- def MULRdRr : FRdRr<0b1001, 0b11,
- (outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ def MULRdRr : FRdRr<0b1001, 0b11, (outs),
+ (ins GPR8
+ : $lhs, GPR8
+ : $rhs),
"mul\t$lhs, $rhs",
[/*(set R1, R0, (smullohi i8:$lhs, i8:$rhs))*/]>,
- Requires<[SupportsMultiplication]>;
+ Requires<[SupportsMultiplication]>;
- def MULSRdRr : FMUL2RdRr<0,
- (outs),
- (ins LD8:$lhs, LD8:$rhs),
- "muls\t$lhs, $rhs",
- []>,
+ def MULSRdRr : FMUL2RdRr<0, (outs),
+ (ins LD8
+ : $lhs, LD8
+ : $rhs),
+ "muls\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
}
- def MULSURdRr : FMUL2RdRr<1,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "mulsu\t$lhs, $rhs",
- []>,
+ def MULSURdRr : FMUL2RdRr<1, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "mulsu\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
- def FMUL : FFMULRdRr<0b01,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "fmul\t$lhs, $rhs",
- []>,
+ def FMUL : FFMULRdRr<0b01, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "fmul\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
- def FMULS : FFMULRdRr<0b10,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "fmuls\t$lhs, $rhs",
- []>,
+ def FMULS : FFMULRdRr<0b10, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "fmuls\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
- def FMULSU : FFMULRdRr<0b11,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "fmulsu\t$lhs, $rhs",
- []>,
+ def FMULSU : FFMULRdRr<0b11, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "fmulsu\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
}
-let Defs = [R15, R14, R13, R12, R11, R10, R9,
- R8, R7, R6, R5, R4, R3, R2, R1, R0] in
-def DESK : FDES<(outs),
- (ins i8imm:$k),
- "des\t$k",
- []>,
- Requires<[HasDES]>;
+let Defs =
+ [R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R1,
+ R0] in def DESK : FDES<(outs),
+ (ins i8imm
+ : $k),
+ "des\t$k", []>,
+ Requires<[HasDES]>;
//===----------------------------------------------------------------------===//
// Logic
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let Constraints = "$src = $rd", Defs = [SREG] in {
// Register-Register logic instructions (which have the
// property of commutativity).
- let isCommutable = 1 in
- {
- def ANDRdRr : FRdRr<0b0010,
- 0b00,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "and\t$rd, $rr",
- [(set i8:$rd, (and i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ let isCommutable = 1 in {
+ def ANDRdRr
+ : FRdRr<0b0010, 0b00,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "and\t$rd, $rr",
+ [(set i8
+ : $rd, (and i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ANDW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// and Rd, Rr
// and Rd+1, Rr+1
- def ANDWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "andw\t$rd, $rr",
- [(set i16:$rd, (and i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def ORRdRr : FRdRr<0b0010,
- 0b10,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "or\t$rd, $rr",
- [(set i8:$rd, (or i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def ANDWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "andw\t$rd, $rr", [
+ (set i16
+ : $rd, (and i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
+
+ def ORRdRr
+ : FRdRr<0b0010, 0b10,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "or\t$rd, $rr",
+ [(set i8
+ : $rd, (or i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ORW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// or Rd, Rr
// or Rd+1, Rr+1
- def ORWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "orw\t$rd, $rr",
- [(set i16:$rd, (or i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def EORRdRr : FRdRr<0b0010,
- 0b01,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "eor\t$rd, $rr",
- [(set i8:$rd, (xor i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def ORWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "orw\t$rd, $rr", [
+ (set i16
+ : $rd, (or i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
+
+ def EORRdRr
+ : FRdRr<0b0010, 0b01,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "eor\t$rd, $rr",
+ [(set i8
+ : $rd, (xor i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// EORW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// eor Rd, Rr
// eor Rd+1, Rr+1
- def EORWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "eorw\t$rd, $rr",
- [(set i16:$rd, (xor i16:$src, i16:$rr)),
- (implicit SREG)]>;
+ def EORWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "eorw\t$rd, $rr", [
+ (set i16
+ : $rd, (xor i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
}
- def ANDIRdK : FRdK<0b0111,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "andi\t$rd, $k",
- [(set i8:$rd, (and i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def ANDIRdK
+ : FRdK<0b0111,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "andi\t$rd, $k",
+ [(set i8
+ : $rd, (and i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// ANDI Rd+1:Rd, K+1:K
//
// Expands to:
// andi Rd, K
// andi Rd+1, K+1
- def ANDIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$k),
- "andiw\t$rd, $k",
- [(set i16:$rd, (and i16:$src, imm:$k)),
- (implicit SREG)]>;
-
- def ORIRdK : FRdK<0b0110,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "ori\t$rd, $k",
- [(set i8:$rd, (or i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def ANDIWRdK
+ : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $k),
+ "andiw\t$rd, $k",
+ [(set i16
+ : $rd, (and i16
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
+
+ def ORIRdK
+ : FRdK<0b0110,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "ori\t$rd, $k",
+ [(set i8
+ : $rd, (or i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// ORIW Rd+1:Rd, K+1,K
//
// Expands to:
// ori Rd, K
// ori Rd+1, K+1
- def ORIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$rr),
- "oriw\t$rd, $rr",
- [(set i16:$rd, (or i16:$src, imm:$rr)),
- (implicit SREG)]>;
+ def ORIWRdK
+ : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $rr),
+ "oriw\t$rd, $rr",
+ [(set i16
+ : $rd, (or i16
+ : $src, imm
+ : $rr)),
+ (implicit SREG)]>;
}
//===----------------------------------------------------------------------===//
// One's/Two's Complement
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
- def COMRd : FRd<0b1001,
- 0b0100000,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "com\t$rd",
- [(set i8:$rd, (not i8:$src)), (implicit SREG)]>;
+let Constraints = "$src = $rd", Defs = [SREG] in {
+ def COMRd
+ : FRd<0b1001, 0b0100000,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "com\t$rd", [(set i8
+ : $rd, (not i8
+ : $src)),
+ (implicit SREG)]>;
// COMW Rd+1:Rd
//
// Expands to:
// com Rd
// com Rd+1
- def COMWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def COMWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"comw\t$rd",
- [(set i16:$rd, (not i16:$src)), (implicit SREG)]>;
+ [(set i16
+ : $rd, (not i16
+ : $src)),
+ (implicit SREG)]>;
- def NEGRd : FRd<0b1001,
- 0b0100001,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "neg\t$rd",
- [(set i8:$rd, (ineg i8:$src)), (implicit SREG)]>;
+ def NEGRd
+ : FRd<0b1001, 0b0100001,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "neg\t$rd", [(set i8
+ : $rd, (ineg i8
+ : $src)),
+ (implicit SREG)]>;
// NEGW Rd+1:Rd
//
@@ -756,155 +903,126 @@ Defs = [SREG] in
// neg Rd+1
// neg Rd
// sbc Rd+1, r1
- def NEGWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def NEGWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"negw\t$rd",
- [(set i16:$rd, (ineg i16:$src)), (implicit SREG)]>;
+ [(set i16
+ : $rd, (ineg i16
+ : $src)),
+ (implicit SREG)]>;
}
// TST Rd
// Test for zero of minus.
// This operation is identical to a `Rd AND Rd`.
-def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+def : InstAlias<"tst\t$rd", (ANDRdRr GPR8 : $rd, GPR8 : $rd)>;
// SBR Rd, K
//
// Mnemonic alias to 'ORI Rd, K'. Same bit pattern, same operands,
// same everything.
def : InstAlias<"sbr\t$rd, $k",
- (ORIRdK LD8:$rd, imm_ldi8:$k),
+ (ORIRdK LD8
+ : $rd, imm_ldi8
+ : $k),
/* Disable display, so we don't override ORI */ 0>;
//===----------------------------------------------------------------------===//
// Jump instructions
//===----------------------------------------------------------------------===//
-let isBarrier = 1,
-isBranch = 1,
-isTerminator = 1 in
-{
- def RJMPk : FBRk<0,
- (outs),
- (ins brtarget_13:$target),
- "rjmp\t$target",
- [(br bb:$target)]>;
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def RJMPk : FBRk<0, (outs),
+ (ins brtarget_13
+ : $target),
+ "rjmp\t$target", [(br bb
+ : $target)]>;
let isIndirectBranch = 1,
- Uses = [R31R30] in
- def IJMP : F16<0b1001010000001001,
- (outs),
- (ins),
- "ijmp",
- []>,
- Requires<[HasIJMPCALL]>;
+ Uses = [R31R30] in def IJMP
+ : F16<0b1001010000001001, (outs), (ins), "ijmp", []>,
+ Requires<[HasIJMPCALL]>;
let isIndirectBranch = 1,
- Uses = [R31R30] in
- def EIJMP : F16<0b1001010000011001,
- (outs),
- (ins),
- "eijmp",
- []>,
- Requires<[HasEIJMPCALL]>;
-
- def JMPk : F32BRk<0b110,
- (outs),
- (ins call_target:$k),
- "jmp\t$k",
- []>,
+ Uses = [R31R30] in def EIJMP
+ : F16<0b1001010000011001, (outs), (ins), "eijmp", []>,
+ Requires<[HasEIJMPCALL]>;
+
+ def JMPk : F32BRk<0b110, (outs),
+ (ins call_target
+ : $k),
+ "jmp\t$k", []>,
Requires<[HasJMPCALL]>;
}
//===----------------------------------------------------------------------===//
// Call instructions
//===----------------------------------------------------------------------===//
-let isCall = 1 in
-{
+let isCall = 1 in {
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP] in
- def RCALLk : FBRk<1,
- (outs),
- (ins brtarget_13:$target),
- "rcall\t$target",
- []>;
+ let Uses = [SP] in def RCALLk : FBRk<1, (outs),
+ (ins brtarget_13
+ : $target),
+ "rcall\t$target", []>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP, R31R30] in
- def ICALL : F16<0b1001010100001001,
- (outs),
- (ins variable_ops),
- "icall",
- []>,
- Requires<[HasIJMPCALL]>;
+ let Uses = [SP, R31R30] in def ICALL
+ : F16<0b1001010100001001, (outs), (ins variable_ops), "icall", []>,
+ Requires<[HasIJMPCALL]>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP, R31R30] in
- def EICALL : F16<0b1001010100011001,
- (outs),
- (ins variable_ops),
- "eicall",
- []>,
- Requires<[HasEIJMPCALL]>;
+ let Uses = [SP, R31R30] in def EICALL
+ : F16<0b1001010100011001, (outs), (ins variable_ops), "eicall", []>,
+ Requires<[HasEIJMPCALL]>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
//
- //:TODO: the imm field can be either 16 or 22 bits in devices with more
+ //: TODO: the imm field can be either 16 or 22 bits in devices with more
// than 64k of ROM, fix it once we support the largest devices.
- let Uses = [SP] in
- def CALLk : F32BRk<0b111,
- (outs),
- (ins call_target:$k),
- "call\t$k",
- [(AVRcall imm:$k)]>,
- Requires<[HasJMPCALL]>;
+ let Uses = [SP] in def CALLk : F32BRk<0b111, (outs),
+ (ins call_target
+ : $k),
+ "call\t$k", [(AVRcall imm
+ : $k)]>,
+ Requires<[HasJMPCALL]>;
}
//===----------------------------------------------------------------------===//
// Return instructions.
//===----------------------------------------------------------------------===//
-let isTerminator = 1,
-isReturn = 1,
-isBarrier = 1 in
-{
- def RET : F16<0b1001010100001000,
- (outs),
- (ins),
- "ret",
- [(AVRretflag)]>;
-
- def RETI : F16<0b1001010100011000,
- (outs),
- (ins),
- "reti",
- [(AVRretiflag)]>;
+let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def RET : F16<0b1001010100001000, (outs), (ins), "ret", [(AVRretflag)]>;
+
+ def RETI : F16<0b1001010100011000, (outs), (ins), "reti", [(AVRretiflag)]>;
}
//===----------------------------------------------------------------------===//
// Compare operations.
//===----------------------------------------------------------------------===//
-let Defs = [SREG] in
-{
+let Defs = [SREG] in {
// CPSE Rd, Rr
// Compare Rd and Rr, skipping the next instruction if they are equal.
- let isBarrier = 1,
- isBranch = 1,
- isTerminator = 1 in
- def CPSE : FRdRr<0b0001,
- 0b00,
- (outs),
- (ins GPR8:$rd, GPR8:$rr),
- "cpse\t$rd, $rr",
- []>;
-
- def CPRdRr : FRdRr<0b0001,
- 0b01,
- (outs),
- (ins GPR8:$rd, GPR8:$rr),
- "cp\t$rd, $rr",
- [(AVRcmp i8:$rd, i8:$rr), (implicit SREG)]>;
+ let isBarrier = 1, isBranch = 1,
+ isTerminator = 1 in def CPSE : FRdRr<0b0001, 0b00, (outs),
+ (ins GPR8
+ : $rd, GPR8
+ : $rr),
+ "cpse\t$rd, $rr", []>;
+
+ def CPRdRr
+ : FRdRr<0b0001, 0b01, (outs),
+ (ins GPR8
+ : $rd, GPR8
+ : $rr),
+ "cp\t$rd, $rr", [(AVRcmp i8
+ : $rd, i8
+ : $rr),
+ (implicit SREG)]>;
// CPW Rd+1:Rd, Rr+1:Rr
//
@@ -912,251 +1030,256 @@ let Defs = [SREG] in
// cp Rd, Rr
// cpc Rd+1, Rr+1
def CPWRdRr : Pseudo<(outs),
- (ins DREGS:$src, DREGS:$src2),
+ (ins DREGS
+ : $src, DREGS
+ : $src2),
"cpw\t$src, $src2",
- [(AVRcmp i16:$src, i16:$src2), (implicit SREG)]>;
+ [(AVRcmp i16
+ : $src, i16
+ : $src2),
+ (implicit SREG)]>;
- let Uses = [SREG] in
- def CPCRdRr : FRdRr<0b0000,
- 0b01,
- (outs),
- (ins GPR8:$rd, GPR8:$rr),
- "cpc\t$rd, $rr",
- [(AVRcmpc i8:$rd, i8:$rr), (implicit SREG)]>;
+ let Uses = [SREG] in def CPCRdRr
+ : FRdRr<0b0000, 0b01, (outs),
+ (ins GPR8
+ : $rd, GPR8
+ : $rr),
+ "cpc\t$rd, $rr", [(AVRcmpc i8
+ : $rd, i8
+ : $rr),
+ (implicit SREG)]>;
// CPCW Rd+1:Rd. Rr+1:Rr
//
// Expands to:
// cpc Rd, Rr
// cpc Rd+1, Rr+1
- let Uses = [SREG] in
- def CPCWRdRr : Pseudo<(outs),
- (ins DREGS:$src, DREGS:$src2),
- "cpcw\t$src, $src2",
- [(AVRcmpc i16:$src, i16:$src2), (implicit SREG)]>;
+ let Uses = [SREG] in def CPCWRdRr
+ : Pseudo<(outs),
+ (ins DREGS
+ : $src, DREGS
+ : $src2),
+ "cpcw\t$src, $src2",
+ [(AVRcmpc i16
+ : $src, i16
+ : $src2),
+ (implicit SREG)]>;
// CPI Rd, K
// Compares a register with an 8 bit immediate.
- def CPIRdK : FRdK<0b0011,
- (outs),
- (ins LD8:$rd, imm_ldi8:$k),
- "cpi\t$rd, $k",
- [(AVRcmp i8:$rd, imm:$k), (implicit SREG)]>;
+ def CPIRdK
+ : FRdK<0b0011, (outs),
+ (ins LD8
+ : $rd, imm_ldi8
+ : $k),
+ "cpi\t$rd, $k", [(AVRcmp i8
+ : $rd, imm
+ : $k),
+ (implicit SREG)]>;
}
//===----------------------------------------------------------------------===//
// Register conditional skipping/branching operations.
//===----------------------------------------------------------------------===//
-let isBranch = 1,
-isTerminator = 1 in
-{
+let isBranch = 1, isTerminator = 1 in {
// Conditional skipping on GPR register bits, and
// conditional skipping on IO register bits.
- let isBarrier = 1 in
- {
- def SBRCRrB : FRdB<0b10,
- (outs),
- (ins GPR8:$rr, i8imm:$b),
- "sbrc\t$rr, $b",
- []>;
-
- def SBRSRrB : FRdB<0b11,
- (outs),
- (ins GPR8:$rr, i8imm:$b),
- "sbrs\t$rr, $b",
- []>;
-
- def SBICAb : FIOBIT<0b01,
- (outs),
- (ins imm_port5:$a, i8imm:$b),
- "sbic\t$a, $b",
- []>;
-
- def SBISAb : FIOBIT<0b11,
- (outs),
- (ins imm_port5:$a, i8imm:$b),
- "sbis\t$a, $b",
- []>;
+ let isBarrier = 1 in {
+ def SBRCRrB : FRdB<0b10, (outs),
+ (ins GPR8
+ : $rr, i8imm
+ : $b),
+ "sbrc\t$rr, $b", []>;
+
+ def SBRSRrB : FRdB<0b11, (outs),
+ (ins GPR8
+ : $rr, i8imm
+ : $b),
+ "sbrs\t$rr, $b", []>;
+
+ def SBICAb : FIOBIT<0b01, (outs),
+ (ins imm_port5
+ : $a, i8imm
+ : $b),
+ "sbic\t$a, $b", []>;
+
+ def SBISAb : FIOBIT<0b11, (outs),
+ (ins imm_port5
+ : $a, i8imm
+ : $b),
+ "sbis\t$a, $b", []>;
}
// Relative branches on status flag bits.
- let Uses = [SREG] in
- {
+ let Uses = [SREG] in {
// BRBS s, k
// Branch if `s` flag in status register is set.
- def BRBSsk : FSK<0,
- (outs),
- (ins i8imm:$s, relbrtarget_7:$k),
- "brbs\t$s, $k",
- []>;
+ def BRBSsk : FSK<0, (outs),
+ (ins i8imm
+ : $s, relbrtarget_7
+ : $k),
+ "brbs\t$s, $k", []>;
// BRBC s, k
// Branch if `s` flag in status register is clear.
- def BRBCsk : FSK<1,
- (outs),
- (ins i8imm:$s, relbrtarget_7:$k),
- "brbc\t$s, $k",
- []>;
+ def BRBCsk : FSK<1, (outs),
+ (ins i8imm
+ : $s, relbrtarget_7
+ : $k),
+ "brbc\t$s, $k", []>;
}
}
-
// BRCS k
// Branch if carry flag is set
-def : InstAlias<"brcs\t$k", (BRBSsk 0, relbrtarget_7:$k)>;
+def : InstAlias<"brcs\t$k", (BRBSsk 0, relbrtarget_7 : $k)>;
// BRCC k
// Branch if carry flag is clear
-def : InstAlias<"brcc\t$k", (BRBCsk 0, relbrtarget_7:$k)>;
+def : InstAlias<"brcc\t$k", (BRBCsk 0, relbrtarget_7 : $k)>;
// BRHS k
// Branch if half carry flag is set
-def : InstAlias<"brhs\t$k", (BRBSsk 5, relbrtarget_7:$k)>;
+def : InstAlias<"brhs\t$k", (BRBSsk 5, relbrtarget_7 : $k)>;
// BRHC k
// Branch if half carry flag is clear
-def : InstAlias<"brhc\t$k", (BRBCsk 5, relbrtarget_7:$k)>;
+def : InstAlias<"brhc\t$k", (BRBCsk 5, relbrtarget_7 : $k)>;
// BRTS k
// Branch if the T flag is set
-def : InstAlias<"brts\t$k", (BRBSsk 6, relbrtarget_7:$k)>;
+def : InstAlias<"brts\t$k", (BRBSsk 6, relbrtarget_7 : $k)>;
// BRTC k
// Branch if the T flag is clear
-def : InstAlias<"brtc\t$k", (BRBCsk 6, relbrtarget_7:$k)>;
+def : InstAlias<"brtc\t$k", (BRBCsk 6, relbrtarget_7 : $k)>;
// BRVS k
// Branch if the overflow flag is set
-def : InstAlias<"brvs\t$k", (BRBSsk 3, relbrtarget_7:$k)>;
+def : InstAlias<"brvs\t$k", (BRBSsk 3, relbrtarget_7 : $k)>;
// BRVC k
// Branch if the overflow flag is clear
-def : InstAlias<"brvc\t$k", (BRBCsk 3, relbrtarget_7:$k)>;
+def : InstAlias<"brvc\t$k", (BRBCsk 3, relbrtarget_7 : $k)>;
// BRIE k
// Branch if the global interrupt flag is enabled
-def : InstAlias<"brie\t$k", (BRBSsk 7, relbrtarget_7:$k)>;
+def : InstAlias<"brie\t$k", (BRBSsk 7, relbrtarget_7 : $k)>;
// BRID k
// Branch if the global interrupt flag is disabled
-def : InstAlias<"brid\t$k", (BRBCsk 7, relbrtarget_7:$k)>;
+def : InstAlias<"brid\t$k", (BRBCsk 7, relbrtarget_7 : $k)>;
//===----------------------------------------------------------------------===//
// PC-relative conditional branches
//===----------------------------------------------------------------------===//
// Based on status register. We cannot simplify these into instruction aliases
// because we also need to be able to specify a pattern to match for ISel.
-let isBranch = 1,
-isTerminator = 1,
-Uses = [SREG] in
-{
- def BREQk : FBRsk<0,
- 0b001,
- (outs),
- (ins relbrtarget_7:$target),
- "breq\t$target",
- [(AVRbrcond bb:$target, AVR_COND_EQ)]>;
-
- def BRNEk : FBRsk<1,
- 0b001,
- (outs),
- (ins relbrtarget_7:$target),
- "brne\t$target",
- [(AVRbrcond bb:$target, AVR_COND_NE)]>;
-
-
- def BRSHk : FBRsk<1,
- 0b000,
- (outs),
- (ins relbrtarget_7:$target),
- "brsh\t$target",
- [(AVRbrcond bb:$target, AVR_COND_SH)]>;
-
- def BRLOk : FBRsk<0,
- 0b000,
- (outs),
- (ins relbrtarget_7:$target),
- "brlo\t$target",
- [(AVRbrcond bb:$target, AVR_COND_LO)]>;
-
- def BRMIk : FBRsk<0,
- 0b010,
- (outs),
- (ins relbrtarget_7:$target),
- "brmi\t$target",
- [(AVRbrcond bb:$target, AVR_COND_MI)]>;
-
- def BRPLk : FBRsk<1,
- 0b010,
- (outs),
- (ins relbrtarget_7:$target),
- "brpl\t$target",
- [(AVRbrcond bb:$target, AVR_COND_PL)]>;
-
- def BRGEk : FBRsk<1,
- 0b100,
- (outs),
- (ins relbrtarget_7:$target),
- "brge\t$target",
- [(AVRbrcond bb:$target, AVR_COND_GE)]>;
-
- def BRLTk : FBRsk<0,
- 0b100,
- (outs),
- (ins relbrtarget_7:$target),
- "brlt\t$target",
- [(AVRbrcond bb:$target, AVR_COND_LT)]>;
+let isBranch = 1, isTerminator = 1, Uses = [SREG] in {
+ def BREQk : FBRsk<0, 0b001, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "breq\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_EQ)]>;
+
+ def BRNEk : FBRsk<1, 0b001, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brne\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_NE)]>;
+
+ def BRSHk : FBRsk<1, 0b000, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brsh\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_SH)]>;
+
+ def BRLOk : FBRsk<0, 0b000, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brlo\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_LO)]>;
+
+ def BRMIk : FBRsk<0, 0b010, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brmi\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_MI)]>;
+
+ def BRPLk : FBRsk<1, 0b010, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brpl\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_PL)]>;
+
+ def BRGEk : FBRsk<1, 0b100, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brge\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_GE)]>;
+
+ def BRLTk : FBRsk<0, 0b100, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brlt\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_LT)]>;
}
//===----------------------------------------------------------------------===//
// Data transfer instructions
//===----------------------------------------------------------------------===//
// 8 and 16-bit register move instructions.
-let hasSideEffects = 0 in
-{
- def MOVRdRr : FRdRr<0b0010,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$rr),
- "mov\t$rd, $rr",
- []>;
-
- def MOVWRdRr : FMOVWRdRr<(outs DREGS:$dst),
- (ins DREGS:$src),
- "movw\t$dst, $src",
- []>,
+let hasSideEffects = 0 in {
+ def MOVRdRr : FRdRr<0b0010, 0b11,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $rr),
+ "mov\t$rd, $rr", []>;
+
+ def MOVWRdRr : FMOVWRdRr<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src),
+ "movw\t$dst, $src", []>,
Requires<[HasMOVW]>;
}
// Load immediate values into registers.
-let isReMaterializable = 1 in
-{
+let isReMaterializable = 1 in {
def LDIRdK : FRdK<0b1110,
- (outs LD8:$rd),
- (ins imm_ldi8:$k),
- "ldi\t$rd, $k",
- [(set i8:$rd, imm:$k)]>;
+ (outs LD8
+ : $rd),
+ (ins imm_ldi8
+ : $k),
+ "ldi\t$rd, $k", [(set i8
+ : $rd, imm
+ : $k)]>;
// LDIW Rd+1:Rd, K+1:K
//
// Expands to:
// ldi Rd, K
// ldi Rd+1, K+1
- def LDIWRdK : Pseudo<(outs DLDREGS:$dst),
- (ins i16imm:$src),
- "ldiw\t$dst, $src",
- [(set i16:$dst, imm:$src)]>;
+ def LDIWRdK : Pseudo<(outs DLDREGS
+ : $dst),
+ (ins i16imm
+ : $src),
+ "ldiw\t$dst, $src", [(set i16
+ : $dst, imm
+ : $src)]>;
}
// Load from data space into register.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def LDSRdK : F32DM<0b0,
- (outs GPR8:$rd),
- (ins imm16:$k),
- "lds\t$rd, $k",
- [(set i8:$rd, (load imm:$k))]>,
+ (outs GPR8
+ : $rd),
+ (ins imm16
+ : $k),
+ "lds\t$rd, $k", [(set i8
+ : $rd, (load imm
+ : $k))]>,
Requires<[HasSRAM]>;
// LDSW Rd+1:Rd, K+1:K
@@ -1164,23 +1287,26 @@ isReMaterializable = 1 in
// Expands to:
// lds Rd, (K+1:K)
// lds Rd+1 (K+1:K) + 1
- def LDSWRdK : Pseudo<(outs DREGS:$dst),
- (ins i16imm:$src),
- "ldsw\t$dst, $src",
- [(set i16:$dst, (load imm:$src))]>,
+ def LDSWRdK : Pseudo<(outs DREGS
+ : $dst),
+ (ins i16imm
+ : $src),
+ "ldsw\t$dst, $src", [(set i16
+ : $dst, (load imm
+ : $src))]>,
Requires<[HasSRAM]>;
}
// Indirect loads.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
- def LDRdPtr : FSTLD<0,
- 0b00,
- (outs GPR8:$reg),
- (ins LDSTPtrReg:$ptrreg),
- "ld\t$reg, $ptrreg",
- [(set GPR8:$reg, (load i16:$ptrreg))]>,
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def LDRdPtr : FSTLD<0, 0b00,
+ (outs GPR8
+ : $reg),
+ (ins LDSTPtrReg
+ : $ptrreg),
+ "ld\t$reg, $ptrreg", [(set GPR8
+ : $reg, (load i16
+ : $ptrreg))]>,
Requires<[HasSRAM]>;
// LDW Rd+1:Rd, P
@@ -1188,43 +1314,48 @@ isReMaterializable = 1 in
// Expands to:
// ld Rd, P
// ldd Rd+1, P+1
- let Constraints = "@earlyclobber $reg" in
- def LDWRdPtr : Pseudo<(outs DREGS:$reg),
- (ins PTRDISPREGS:$ptrreg),
- "ldw\t$reg, $ptrreg",
- [(set i16:$reg, (load i16:$ptrreg))]>,
- Requires<[HasSRAM]>;
+ let Constraints = "@earlyclobber $reg" in def LDWRdPtr
+ : Pseudo<(outs DREGS
+ : $reg),
+ (ins PTRDISPREGS
+ : $ptrreg),
+ "ldw\t$reg, $ptrreg", [(set i16
+ : $reg, (load i16
+ : $ptrreg))]>,
+ Requires<[HasSRAM]>;
}
// Indirect loads (with postincrement or predecrement).
-let mayLoad = 1,
-hasSideEffects = 0,
-Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in
-{
- def LDRdPtrPi : FSTLD<0,
- 0b01,
- (outs GPR8:$reg, PTRREGS:$base_wb),
- (ins LDSTPtrReg:$ptrreg),
- "ld\t$reg, $ptrreg+",
- []>,
+let mayLoad = 1, hasSideEffects = 0,
+ Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in {
+ def LDRdPtrPi : FSTLD<0, 0b01,
+ (outs GPR8
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg),
+ "ld\t$reg, $ptrreg+", []>,
Requires<[HasSRAM]>;
// LDW Rd+1:Rd, P+
// Expands to:
// ld Rd, P+
// ld Rd+1, P+
- def LDWRdPtrPi : Pseudo<(outs DREGS:$reg, PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg),
- "ldw\t$reg, $ptrreg+",
- []>,
+ def LDWRdPtrPi : Pseudo<(outs DREGS
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg),
+ "ldw\t$reg, $ptrreg+", []>,
Requires<[HasSRAM]>;
- def LDRdPtrPd : FSTLD<0,
- 0b10,
- (outs GPR8:$reg, PTRREGS:$base_wb),
- (ins LDSTPtrReg:$ptrreg),
- "ld\t$reg, -$ptrreg",
- []>,
+ def LDRdPtrPd : FSTLD<0, 0b10,
+ (outs GPR8
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg),
+ "ld\t$reg, -$ptrreg", []>,
Requires<[HasSRAM]>;
// LDW Rd+1:Rd, -P
@@ -1232,36 +1363,42 @@ Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in
// Expands to:
// ld Rd+1, -P
// ld Rd, -P
- def LDWRdPtrPd : Pseudo<(outs DREGS:$reg, PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg),
- "ldw\t$reg, -$ptrreg",
- []>,
+ def LDWRdPtrPd : Pseudo<(outs DREGS
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg),
+ "ldw\t$reg, -$ptrreg", []>,
Requires<[HasSRAM]>;
}
// Load indirect with displacement operations.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
- let Constraints = "@earlyclobber $reg" in
- def LDDRdPtrQ : FSTDLDD<0,
- (outs GPR8:$reg),
- (ins memri:$memri),
- "ldd\t$reg, $memri",
- [(set i8:$reg, (load addr:$memri))]>,
- Requires<[HasSRAM]>;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ let Constraints = "@earlyclobber $reg" in def LDDRdPtrQ
+ : FSTDLDD<0,
+ (outs GPR8
+ : $reg),
+ (ins memri
+ : $memri),
+ "ldd\t$reg, $memri", [(set i8
+ : $reg, (load addr
+ : $memri))]>,
+ Requires<[HasSRAM]>;
// LDDW Rd+1:Rd, P+q
//
// Expands to:
// ldd Rd, P+q
// ldd Rd+1, P+q+1
- let Constraints = "@earlyclobber $dst" in
- def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND:$dst),
- (ins memri:$memri),
- "lddw\t$dst, $memri",
- [(set i16:$dst, (load addr:$memri))]>,
- Requires<[HasSRAM]>;
+ let Constraints = "@earlyclobber $dst" in def LDDWRdPtrQ
+ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND
+ : $dst),
+ (ins memri
+ : $memri),
+ "lddw\t$dst, $memri", [(set i16
+ : $dst, (load addr
+ : $memri))]>,
+ Requires<[HasSRAM]>;
// An identical pseudo instruction to LDDWRdPtrQ, expect restricted to the Y
// register and without the @earlyclobber flag.
@@ -1270,7 +1407,8 @@ isReMaterializable = 1 in
// being able to handle the expansion of a COPY into an machine instruction
// that has an earlyclobber flag. This is because the register allocator will
// try expand a copy from a register slot into an earlyclobber instruction.
- // Instructions that are earlyclobber need to be in a dedicated earlyclobber slot.
+ // Instructions that are earlyclobber need to be in a dedicated earlyclobber
+ // slot.
//
// This pseudo instruction can be used pre-AVR pseudo expansion in order to
// get a frame index load without directly using earlyclobber instructions.
@@ -1279,30 +1417,44 @@ isReMaterializable = 1 in
//
// This instruction may be removed once PR13375 is fixed.
let mayLoad = 1,
- hasSideEffects = 0 in
- def LDDWRdYQ : Pseudo<(outs DREGS:$dst),
- (ins memri:$memri),
- "lddw\t$dst, $memri",
- []>,
- Requires<[HasSRAM]>;
+ hasSideEffects = 0 in def LDDWRdYQ : Pseudo<(outs DREGS
+ : $dst),
+ (ins memri
+ : $memri),
+ "lddw\t$dst, $memri", []>,
+ Requires<[HasSRAM]>;
}
-class AtomicLoad<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC> :
- Pseudo<(outs DRC:$rd), (ins PTRRC:$rr), "atomic_op",
- [(set DRC:$rd, (Op i16:$rr))]>;
-
-class AtomicStore<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC> :
- Pseudo<(outs), (ins PTRRC:$rd, DRC:$rr), "atomic_op",
- [(Op i16:$rd, DRC:$rr)]>;
-
-let Constraints = "@earlyclobber $rd" in
-class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC> :
- Pseudo<(outs DRC:$rd), (ins PTRRC:$rr, DRC:$operand),
- "atomic_op",
- [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>;
+class AtomicLoad<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
+ : Pseudo<(outs DRC
+ : $rd),
+ (ins PTRRC
+ : $rr),
+ "atomic_op", [(set DRC
+ : $rd, (Op i16
+ : $rr))]>;
+
+class AtomicStore<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
+ : Pseudo<(outs),
+ (ins PTRRC
+ : $rd, DRC
+ : $rr),
+ "atomic_op", [(Op i16
+ : $rd, DRC
+ : $rr)]>;
+
+let Constraints =
+ "@earlyclobber $rd" in class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
+ RegisterClass PTRRC>
+ : Pseudo<(outs DRC
+ : $rd),
+ (ins PTRRC
+ : $rr, DRC
+ : $operand),
+ "atomic_op", [(set DRC
+ : $rd, (Op i16
+ : $rr, DRC
+ : $operand))]>;
// FIXME: I think 16-bit atomic binary ops need to mark
// r0 as clobbered.
@@ -1318,34 +1470,36 @@ class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
// 16-bit operations use 16-bit load/store postincrement instructions,
// which require PTRDISPREGS.
-def AtomicLoad8 : AtomicLoad<atomic_load_8, GPR8, PTRREGS>;
-def AtomicLoad16 : AtomicLoad<atomic_load_16, DREGS, PTRDISPREGS>;
+def AtomicLoad8 : AtomicLoad<atomic_load_8, GPR8, PTRREGS>;
+def AtomicLoad16 : AtomicLoad<atomic_load_16, DREGS, PTRDISPREGS>;
-def AtomicStore8 : AtomicStore<atomic_store_8, GPR8, PTRREGS>;
+def AtomicStore8 : AtomicStore<atomic_store_8, GPR8, PTRREGS>;
def AtomicStore16 : AtomicStore<atomic_store_16, DREGS, PTRDISPREGS>;
class AtomicLoadOp8<PatFrag Op> : AtomicLoadOp<Op, GPR8, PTRREGS>;
class AtomicLoadOp16<PatFrag Op> : AtomicLoadOp<Op, DREGS, PTRDISPREGS>;
-def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
+def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
-def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
+def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
-def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
+def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
-def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
-def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
-def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
+def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
+def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
+def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
-def AtomicFence : Pseudo<(outs), (ins), "atomic_fence",
- [(atomic_fence timm, timm)]>;
+def AtomicFence
+ : Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence timm, timm)]>;
// Indirect store from register to data space.
-def STSKRr : F32DM<0b1,
- (outs),
- (ins imm16:$k, GPR8:$rd),
- "sts\t$k, $rd",
- [(store i8:$rd, imm:$k)]>,
+def STSKRr : F32DM<0b1, (outs),
+ (ins imm16
+ : $k, GPR8
+ : $rd),
+ "sts\t$k, $rd", [(store i8
+ : $rd, imm
+ : $k)]>,
Requires<[HasSRAM]>;
// STSW K+1:K, Rr+1:Rr
@@ -1354,20 +1508,24 @@ def STSKRr : F32DM<0b1,
// sts Rr+1, (K+1:K) + 1
// sts Rr, (K+1:K)
def STSWKRr : Pseudo<(outs),
- (ins i16imm:$dst, DREGS:$src),
- "stsw\t$dst, $src",
- [(store i16:$src, imm:$dst)]>,
+ (ins i16imm
+ : $dst, DREGS
+ : $src),
+ "stsw\t$dst, $src", [(store i16
+ : $src, imm
+ : $dst)]>,
Requires<[HasSRAM]>;
// Indirect stores.
// ST P, Rr
// Stores the value of Rr into the location addressed by pointer P.
-def STPtrRr : FSTLD<1,
- 0b00,
- (outs),
- (ins LDSTPtrReg:$ptrreg, GPR8:$reg),
- "st\t$ptrreg, $reg",
- [(store GPR8:$reg, i16:$ptrreg)]>,
+def STPtrRr : FSTLD<1, 0b00, (outs),
+ (ins LDSTPtrReg
+ : $ptrreg, GPR8
+ : $reg),
+ "st\t$ptrreg, $reg", [(store GPR8
+ : $reg, i16
+ : $ptrreg)]>,
Requires<[HasSRAM]>;
// STW P, Rr+1:Rr
@@ -1377,25 +1535,32 @@ def STPtrRr : FSTLD<1,
// st P, Rr
// std P+1, Rr+1
def STWPtrRr : Pseudo<(outs),
- (ins PTRDISPREGS:$ptrreg, DREGS:$reg),
- "stw\t$ptrreg, $reg",
- [(store i16:$reg, i16:$ptrreg)]>,
+ (ins PTRDISPREGS
+ : $ptrreg, DREGS
+ : $reg),
+ "stw\t$ptrreg, $reg", [(store i16
+ : $reg, i16
+ : $ptrreg)]>,
Requires<[HasSRAM]>;
// Indirect stores (with postincrement or predecrement).
-let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
-{
+let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in {
// ST P+, Rr
// Stores the value of Rr into the location addressed by pointer P.
// Post increments P.
- def STPtrPiRr : FSTLD<1,
- 0b01,
- (outs LDSTPtrReg:$base_wb),
- (ins LDSTPtrReg:$ptrreg, GPR8:$reg, i8imm:$offs),
- "st\t$ptrreg+, $reg",
- [(set i16:$base_wb,
- (post_store GPR8:$reg, i16:$ptrreg, imm:$offs))]>,
+ def STPtrPiRr : FSTLD<1, 0b01,
+ (outs LDSTPtrReg
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg, GPR8
+ : $reg, i8imm
+ : $offs),
+ "st\t$ptrreg+, $reg", [(set i16
+ : $base_wb, (post_store GPR8
+ : $reg, i16
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
// STW P+, Rr+1:Rr
@@ -1405,23 +1570,34 @@ let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
// Expands to:
// st P+, Rr
// st P+, Rr+1
- def STWPtrPiRr : Pseudo<(outs PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg, DREGS:$trh, i8imm:$offs),
- "stw\t$ptrreg+, $trh",
- [(set PTRREGS:$base_wb,
- (post_store DREGS:$trh, PTRREGS:$ptrreg, imm:$offs))]>,
+ def STWPtrPiRr : Pseudo<(outs PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg, DREGS
+ : $trh, i8imm
+ : $offs),
+ "stw\t$ptrreg+, $trh", [(set PTRREGS
+ : $base_wb, (post_store DREGS
+ : $trh, PTRREGS
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
// ST -P, Rr
// Stores the value of Rr into the location addressed by pointer P.
// Pre decrements P.
- def STPtrPdRr : FSTLD<1,
- 0b10,
- (outs LDSTPtrReg:$base_wb),
- (ins LDSTPtrReg:$ptrreg, GPR8:$reg, i8imm:$offs),
- "st\t-$ptrreg, $reg",
- [(set i16:$base_wb,
- (pre_store GPR8:$reg, i16:$ptrreg, imm:$offs))]>,
+ def STPtrPdRr : FSTLD<1, 0b10,
+ (outs LDSTPtrReg
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg, GPR8
+ : $reg, i8imm
+ : $offs),
+ "st\t-$ptrreg, $reg", [(set i16
+ : $base_wb, (pre_store GPR8
+ : $reg, i16
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
// STW -P, Rr+1:Rr
@@ -1431,11 +1607,17 @@ let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
// Expands to:
// st -P, Rr+1
// st -P, Rr
- def STWPtrPdRr : Pseudo<(outs PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg, DREGS:$reg, i8imm:$offs),
- "stw\t-$ptrreg, $reg",
- [(set PTRREGS:$base_wb,
- (pre_store i16:$reg, i16:$ptrreg, imm:$offs))]>,
+ def STWPtrPdRr : Pseudo<(outs PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg, DREGS
+ : $reg, i8imm
+ : $offs),
+ "stw\t-$ptrreg, $reg", [(set PTRREGS
+ : $base_wb, (pre_store i16
+ : $reg, i16
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
}
@@ -1443,11 +1625,13 @@ let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
// STD P+q, Rr
// Stores the value of Rr into the location addressed by pointer P with a
// displacement of q. Does not modify P.
-def STDPtrQRr : FSTDLDD<1,
- (outs),
- (ins memri:$memri, GPR8:$reg),
- "std\t$memri, $reg",
- [(store i8:$reg, addr:$memri)]>,
+def STDPtrQRr : FSTDLDD<1, (outs),
+ (ins memri
+ : $memri, GPR8
+ : $reg),
+ "std\t$memri, $reg", [(store i8
+ : $reg, addr
+ : $memri)]>,
Requires<[HasSRAM]>;
// STDW P+q, Rr+1:Rr
@@ -1458,206 +1642,192 @@ def STDPtrQRr : FSTDLDD<1,
// std P+q, Rr
// std P+q+1, Rr+1
def STDWPtrQRr : Pseudo<(outs),
- (ins memri:$memri, DREGS:$src),
- "stdw\t$memri, $src",
- [(store i16:$src, addr:$memri)]>,
+ (ins memri
+ : $memri, DREGS
+ : $src),
+ "stdw\t$memri, $src", [(store i16
+ : $src, addr
+ : $memri)]>,
Requires<[HasSRAM]>;
-
// Load program memory operations.
-let canFoldAsLoad = 1,
-isReMaterializable = 1,
-mayLoad = 1,
-hasSideEffects = 0 in
-{
+let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
+ hasSideEffects = 0 in {
let Defs = [R0],
- Uses = [R31R30] in
- def LPM : F16<0b1001010111001000,
- (outs),
- (ins),
- "lpm",
- []>,
- Requires<[HasLPM]>;
-
- def LPMRdZ : FLPMX<0,
- 0,
- (outs GPR8:$dst),
- (ins ZREG:$z),
- "lpm\t$dst, $z",
- []>,
+ Uses = [R31R30] in def LPM
+ : F16<0b1001010111001000, (outs), (ins), "lpm", []>,
+ Requires<[HasLPM]>;
+
+ def LPMRdZ : FLPMX<0, 0,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpm\t$dst, $z", []>,
Requires<[HasLPMX]>;
// Load program memory, while postincrementing the Z register.
- let Defs = [R31R30] in
- {
- def LPMRdZPi : FLPMX<0,
- 1,
- (outs GPR8:$dst),
- (ins ZREG:$z),
- "lpm\t$dst, $z+",
- []>,
+ let Defs = [R31R30] in {
+ def LPMRdZPi : FLPMX<0, 1,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpm\t$dst, $z+", []>,
Requires<[HasLPMX]>;
- def LPMWRdZ : Pseudo<(outs DREGS:$dst),
- (ins ZREG:$z),
- "lpmw\t$dst, $z",
- []>,
+ def LPMWRdZ : Pseudo<(outs DREGS
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpmw\t$dst, $z", []>,
Requires<[HasLPMX]>;
- def LPMWRdZPi : Pseudo<(outs DREGS:$dst),
- (ins ZREG:$z),
- "lpmw\t$dst, $z+",
- []>,
+ def LPMWRdZPi : Pseudo<(outs DREGS
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpmw\t$dst, $z+", []>,
Requires<[HasLPMX]>;
}
}
// Extended load program memory operations.
-let mayLoad = 1,
-hasSideEffects = 0 in
-{
+let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [R0],
- Uses = [R31R30] in
- def ELPM : F16<0b1001010111011000,
- (outs),
- (ins),
- "elpm",
- []>,
- Requires<[HasELPM]>;
-
- def ELPMRdZ : FLPMX<1,
- 0,
- (outs GPR8:$dst),
- (ins ZREG:$z),
- "elpm\t$dst, $z",
- []>,
+ Uses = [R31R30] in def ELPM
+ : F16<0b1001010111011000, (outs), (ins), "elpm", []>,
+ Requires<[HasELPM]>;
+
+ def ELPMRdZ : FLPMX<1, 0,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "elpm\t$dst, $z", []>,
Requires<[HasELPMX]>;
- let Defs = [R31R30] in
- def ELPMRdZPi : FLPMX<1,
- 1,
- (outs GPR8:$dst),
- (ins ZREG: $z),
- "elpm\t$dst, $z+",
- []>,
- Requires<[HasELPMX]>;
+ let Defs = [R31R30] in def ELPMRdZPi : FLPMX<1, 1,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "elpm\t$dst, $z+", []>,
+ Requires<[HasELPMX]>;
}
// Store program memory operations.
-let Uses = [R1, R0] in
-{
- let Uses = [R31R30, R1, R0] in
- def SPM : F16<0b1001010111101000,
- (outs),
- (ins),
- "spm",
- []>,
- Requires<[HasSPM]>;
-
- let Defs = [R31R30] in
- def SPMZPi : F16<0b1001010111111000,
- (outs),
- (ins ZREG:$z),
- "spm $z+",
- []>,
- Requires<[HasSPMX]>;
+let Uses = [R1, R0] in {
+ let Uses = [R31R30, R1, R0] in def SPM
+ : F16<0b1001010111101000, (outs), (ins), "spm", []>,
+ Requires<[HasSPM]>;
+
+ let Defs = [R31R30] in def SPMZPi : F16<0b1001010111111000, (outs),
+ (ins ZREG
+ : $z),
+ "spm $z+", []>,
+ Requires<[HasSPMX]>;
}
// Read data from IO location operations.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
- def INRdA : FIORdA<(outs GPR8:$dst),
- (ins imm_port6:$src),
- "in\t$dst, $src",
- [(set i8:$dst, (load ioaddr8:$src))]>;
-
- def INWRdA : Pseudo<(outs DREGS:$dst),
- (ins imm_port6:$src),
- "inw\t$dst, $src",
- [(set i16:$dst, (load ioaddr16:$src))]>;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def INRdA : FIORdA<(outs GPR8
+ : $dst),
+ (ins imm_port6
+ : $src),
+ "in\t$dst, $src", [(set i8
+ : $dst, (load ioaddr8
+ : $src))]>;
+
+ def INWRdA : Pseudo<(outs DREGS
+ : $dst),
+ (ins imm_port6
+ : $src),
+ "inw\t$dst, $src", [(set i16
+ : $dst, (load ioaddr16
+ : $src))]>;
}
// Write data to IO location operations.
def OUTARr : FIOARr<(outs),
- (ins imm_port6:$dst, GPR8:$src),
- "out\t$dst, $src",
- [(store i8:$src, ioaddr8:$dst)]>;
+ (ins imm_port6
+ : $dst, GPR8
+ : $src),
+ "out\t$dst, $src", [(store i8
+ : $src, ioaddr8
+ : $dst)]>;
def OUTWARr : Pseudo<(outs),
- (ins imm_port6:$dst, DREGS:$src),
- "outw\t$dst, $src",
- [(store i16:$src, ioaddr16:$dst)]>;
+ (ins imm_port6
+ : $dst, DREGS
+ : $src),
+ "outw\t$dst, $src", [(store i16
+ : $src, ioaddr16
+ : $dst)]>;
// Stack push/pop operations.
-let Defs = [SP],
-Uses = [SP],
-hasSideEffects = 0 in
-{
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in {
// Stack push operations.
- let mayStore = 1 in
- {
- def PUSHRr : FRd<0b1001,
- 0b0011111,
- (outs),
- (ins GPR8:$reg),
- "push\t$reg",
- []>,
+ let mayStore = 1 in {
+ def PUSHRr : FRd<0b1001, 0b0011111, (outs),
+ (ins GPR8
+ : $reg),
+ "push\t$reg", []>,
Requires<[HasSRAM]>;
def PUSHWRr : Pseudo<(outs),
- (ins DREGS:$reg),
- "pushw\t$reg",
- []>,
+ (ins DREGS
+ : $reg),
+ "pushw\t$reg", []>,
Requires<[HasSRAM]>;
}
// Stack pop operations.
- let mayLoad = 1 in
- {
- def POPRd : FRd<0b1001,
- 0b0001111,
- (outs GPR8:$reg),
- (ins),
- "pop\t$reg",
- []>,
+ let mayLoad = 1 in {
+ def POPRd : FRd<0b1001, 0b0001111,
+ (outs GPR8
+ : $reg),
+ (ins), "pop\t$reg", []>,
Requires<[HasSRAM]>;
- def POPWRd : Pseudo<(outs DREGS:$reg),
- (ins),
- "popw\t$reg",
- []>,
+ def POPWRd : Pseudo<(outs DREGS
+ : $reg),
+ (ins), "popw\t$reg", []>,
Requires<[HasSRAM]>;
}
}
// Read-Write-Modify (RMW) instructions.
def XCHZRd : FZRd<0b100,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "xch\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "xch\t$z, $rd", []>,
Requires<[SupportsRMW]>;
def LASZRd : FZRd<0b101,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "las\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "las\t$z, $rd", []>,
Requires<[SupportsRMW]>;
def LACZRd : FZRd<0b110,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "lac\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "lac\t$z, $rd", []>,
Requires<[SupportsRMW]>;
def LATZRd : FZRd<0b111,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "lat\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "lat\t$z, $rd", []>,
Requires<[SupportsRMW]>;
//===----------------------------------------------------------------------===//
@@ -1665,187 +1835,277 @@ def LATZRd : FZRd<0b111,
//===----------------------------------------------------------------------===//
// Bit shift/rotate operations.
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let Constraints = "$src = $rd", Defs = [SREG] in {
// 8-bit LSL is an alias of ADD Rd, Rd
- def LSLWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def LSLWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"lslw\t$rd",
- [(set i16:$rd, (AVRlsl i16:$src)), (implicit SREG)]>;
-
- def LSLWNRd : Pseudo<(outs DLDREGS:$rd),
- (ins DREGS:$src, imm16:$bits),
- "lslwn\t$rd, $bits",
- [(set i16:$rd, (AVRlslwn i16:$src, imm:$bits)),
- (implicit SREG)]>;
-
- def LSLBNRd : Pseudo<(outs LD8:$rd),
- (ins GPR8:$src, imm_ldi8:$bits),
- "lslbn\t$rd, $bits",
- [(set i8:$rd, (AVRlslbn i8:$src, imm:$bits)),
- (implicit SREG)]>;
+ [(set i16
+ : $rd, (AVRlsl i16
+ : $src)),
+ (implicit SREG)]>;
- def LSRRd : FRd<0b1001,
- 0b0100110,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "lsr\t$rd",
- [(set i8:$rd, (AVRlsr i8:$src)), (implicit SREG)]>;
+ def LSLWNRd : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DREGS
+ : $src, imm16
+ : $bits),
+ "lslwn\t$rd, $bits", [
+ (set i16
+ : $rd, (AVRlslwn i16
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def LSLBNRd : Pseudo<(outs LD8
+ : $rd),
+ (ins GPR8
+ : $src, imm_ldi8
+ : $bits),
+ "lslbn\t$rd, $bits", [
+ (set i8
+ : $rd, (AVRlslbn i8
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def LSRRd
+ : FRd<0b1001, 0b0100110,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "lsr\t$rd", [(set i8
+ : $rd, (AVRlsr i8
+ : $src)),
+ (implicit SREG)]>;
- def LSRWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def LSRWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"lsrw\t$rd",
- [(set i16:$rd, (AVRlsr i16:$src)), (implicit SREG)]>;
-
- def LSRWNRd : Pseudo<(outs DLDREGS:$rd),
- (ins DREGS:$src, imm16:$bits),
- "lsrwn\t$rd, $bits",
- [(set i16:$rd, (AVRlsrwn i16:$src, imm:$bits)),
- (implicit SREG)]>;
-
- def LSRBNRd : Pseudo<(outs LD8:$rd),
- (ins GPR8:$src, imm_ldi8:$bits),
- "lsrbn\t$rd, $bits",
- [(set i8:$rd, (AVRlsrbn i8:$src, imm:$bits)),
- (implicit SREG)]>;
-
- def ASRRd : FRd<0b1001,
- 0b0100101,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "asr\t$rd",
- [(set i8:$rd, (AVRasr i8:$src)), (implicit SREG)]>;
-
- def ASRWNRd : Pseudo<(outs DLDREGS:$rd),
- (ins DREGS:$src, imm16:$bits),
- "asrwn\t$rd, $bits",
- [(set i16:$rd, (AVRasrwn i16:$src, imm:$bits)),
- (implicit SREG)]>;
+ [(set i16
+ : $rd, (AVRlsr i16
+ : $src)),
+ (implicit SREG)]>;
- def ASRBNRd : Pseudo<(outs LD8:$rd),
- (ins GPR8:$src, imm_ldi8:$bits),
- "asrbn\t$rd, $bits",
- [(set i8:$rd, (AVRasrbn i8:$src, imm:$bits)),
- (implicit SREG)]>;
+ def LSRWNRd : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DREGS
+ : $src, imm16
+ : $bits),
+ "lsrwn\t$rd, $bits", [
+ (set i16
+ : $rd, (AVRlsrwn i16
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def LSRBNRd : Pseudo<(outs LD8
+ : $rd),
+ (ins GPR8
+ : $src, imm_ldi8
+ : $bits),
+ "lsrbn\t$rd, $bits", [
+ (set i8
+ : $rd, (AVRlsrbn i8
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def ASRRd
+ : FRd<0b1001, 0b0100101,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "asr\t$rd", [(set i8
+ : $rd, (AVRasr i8
+ : $src)),
+ (implicit SREG)]>;
- def ASRWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def ASRWNRd : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DREGS
+ : $src, imm16
+ : $bits),
+ "asrwn\t$rd, $bits", [
+ (set i16
+ : $rd, (AVRasrwn i16
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def ASRBNRd : Pseudo<(outs LD8
+ : $rd),
+ (ins GPR8
+ : $src, imm_ldi8
+ : $bits),
+ "asrbn\t$rd, $bits", [
+ (set i8
+ : $rd, (AVRasrbn i8
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def ASRWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"asrw\t$rd",
- [(set i16:$rd, (AVRasr i16:$src)), (implicit SREG)]>;
+ [(set i16
+ : $rd, (AVRasr i16
+ : $src)),
+ (implicit SREG)]>;
- def ROLBRd : Pseudo<(outs GPR8:$rd),
- (ins GPR8:$src),
+ def ROLBRd : Pseudo<(outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
"rolb\t$rd",
- [(set i8:$rd, (AVRrol i8:$src)), (implicit SREG)]>;
+ [(set i8
+ : $rd, (AVRrol i8
+ : $src)),
+ (implicit SREG)]>;
- def RORBRd : Pseudo<(outs GPR8:$rd),
- (ins GPR8:$src),
+ def RORBRd : Pseudo<(outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
"rorb\t$rd",
- [(set i8:$rd, (AVRror i8:$src)), (implicit SREG)]>;
+ [(set i8
+ : $rd, (AVRror i8
+ : $src)),
+ (implicit SREG)]>;
// Bit rotate operations.
- let Uses = [SREG] in
- {
-
- def ROLWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
- "rolw\t$rd",
- [(set i16:$rd, (AVRrol i16:$src)), (implicit SREG)]>;
-
- def RORRd : FRd<0b1001,
- 0b0100111,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "ror\t$rd",
- []>;
-
- def RORWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
- "rorw\t$rd",
- [(set i16:$rd, (AVRror i16:$src)), (implicit SREG)]>;
+ let Uses = [SREG] in {
+
+ def ROLWRd
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
+ "rolw\t$rd",
+ [(set i16
+ : $rd, (AVRrol i16
+ : $src)),
+ (implicit SREG)]>;
+
+ def RORRd : FRd<0b1001, 0b0100111,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "ror\t$rd", []>;
+
+ def RORWRd
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
+ "rorw\t$rd",
+ [(set i16
+ : $rd, (AVRror i16
+ : $src)),
+ (implicit SREG)]>;
}
}
// SWAP Rd
// Swaps the high and low nibbles in a register.
-let Constraints = "$src = $rd" in
-def SWAPRd : FRd<0b1001,
- 0b0100010,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "swap\t$rd",
- [(set i8:$rd, (AVRSwap i8:$src))]>;
+let Constraints =
+ "$src = $rd" in def SWAPRd : FRd<0b1001, 0b0100010,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "swap\t$rd", [(set i8
+ : $rd, (AVRSwap i8
+ : $src))]>;
// IO register bit set/clear operations.
-//:TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi
+//: TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi
// instead of in+ori+out which requires one more instr.
-def SBIAb : FIOBIT<0b10,
- (outs),
- (ins imm_port5:$addr, i8imm:$bit),
- "sbi\t$addr, $bit",
- [(store (or (i8 (load lowioaddr8:$addr)), iobitpos8:$bit),
- lowioaddr8:$addr)]>;
-
-def CBIAb : FIOBIT<0b00,
- (outs),
- (ins imm_port5:$addr, i8imm:$bit),
- "cbi\t$addr, $bit",
- [(store (and (i8 (load lowioaddr8:$addr)), iobitposn8:$bit),
- lowioaddr8:$addr)]>;
+def SBIAb : FIOBIT<0b10, (outs),
+ (ins imm_port5
+ : $addr, i8imm
+ : $bit),
+ "sbi\t$addr, $bit", [(store(or(i8(load lowioaddr8
+ : $addr)),
+ iobitpos8
+ : $bit),
+ lowioaddr8
+ : $addr)]>;
+
+def CBIAb : FIOBIT<0b00, (outs),
+ (ins imm_port5
+ : $addr, i8imm
+ : $bit),
+ "cbi\t$addr, $bit", [(store(and(i8(load lowioaddr8
+ : $addr)),
+ iobitposn8
+ : $bit),
+ lowioaddr8
+ : $addr)]>;
// Status register bit load/store operations.
-let Defs = [SREG] in
-def BST : FRdB<0b01,
- (outs),
- (ins GPR8:$rd, i8imm:$b),
- "bst\t$rd, $b",
- []>;
+let Defs = [SREG] in def BST : FRdB<0b01, (outs),
+ (ins GPR8
+ : $rd, i8imm
+ : $b),
+ "bst\t$rd, $b", []>;
let Constraints = "$src = $rd",
-Uses = [SREG] in
-def BLD : FRdB<0b00,
- (outs GPR8:$rd),
- (ins GPR8:$src, i8imm:$b),
- "bld\t$rd, $b",
- []>;
+ Uses = [SREG] in def BLD : FRdB<0b00,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, i8imm
+ : $b),
+ "bld\t$rd, $b", []>;
-def CBR : InstAlias<"cbr\t$rd, $k", (ANDIRdK LD8:$rd, imm_com8:$k), 0>;
+def CBR : InstAlias<"cbr\t$rd, $k", (ANDIRdK LD8 : $rd, imm_com8 : $k), 0>;
// CLR Rd
// Alias for EOR Rd, Rd
// -------------
// Clears all bits in a register.
-def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8:$rd, GPR8:$rd)>;
+def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8 : $rd, GPR8 : $rd)>;
// LSL Rd
// Alias for ADD Rd, Rd
// --------------
// Logical shift left one bit.
-def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8 : $rd, GPR8 : $rd)>;
-def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8:$rd, GPR8:$rd)>;
+def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8 : $rd, GPR8 : $rd)>;
// SER Rd
// Alias for LDI Rd, 0xff
// ---------
// Sets all bits in a register.
-def : InstAlias<"ser\t$rd", (LDIRdK LD8:$rd, 0xff), 0>;
-
-let Defs = [SREG] in
-def BSETs : FS<0,
- (outs),
- (ins i8imm:$s),
- "bset\t$s",
- []>;
-
-let Defs = [SREG] in
-def BCLRs : FS<1,
- (outs),
- (ins i8imm:$s),
- "bclr\t$s",
- []>;
+def : InstAlias<"ser\t$rd", (LDIRdK LD8 : $rd, 0xff), 0>;
+
+let Defs = [SREG] in def BSETs : FS<0, (outs),
+ (ins i8imm
+ : $s),
+ "bset\t$s", []>;
+
+let Defs = [SREG] in def BCLRs : FS<1, (outs),
+ (ins i8imm
+ : $s),
+ "bclr\t$s", []>;
// Set/clear aliases for the carry (C) status flag (bit 0).
def : InstAlias<"sec", (BSETs 0)>;
@@ -1887,284 +2147,353 @@ def : InstAlias<"cli", (BCLRs 7)>;
// Breakpoint instruction
// ---------
// <|1001|0101|1001|1000>
-def BREAK : F16<0b1001010110011000,
- (outs),
- (ins),
- "break",
- []>,
+def BREAK : F16<0b1001010110011000, (outs), (ins), "break", []>,
Requires<[HasBREAK]>;
// NOP
// No-operation instruction
// ---------
// <|0000|0000|0000|0000>
-def NOP : F16<0b0000000000000000,
- (outs),
- (ins),
- "nop",
- []>;
+def NOP : F16<0b0000000000000000, (outs), (ins), "nop", []>;
// SLEEP
// Sleep instruction
// ---------
// <|1001|0101|1000|1000>
-def SLEEP : F16<0b1001010110001000,
- (outs),
- (ins),
- "sleep",
- []>;
+def SLEEP : F16<0b1001010110001000, (outs), (ins), "sleep", []>;
// WDR
// Watchdog reset
// ---------
// <|1001|0101|1010|1000>
-def WDR : F16<0b1001010110101000,
- (outs),
- (ins),
- "wdr",
- []>;
+def WDR : F16<0b1001010110101000, (outs), (ins), "wdr", []>;
//===----------------------------------------------------------------------===//
// Pseudo instructions for later expansion
//===----------------------------------------------------------------------===//
-//:TODO: Optimize this for wider types AND optimize the following code
+//: TODO: Optimize this for wider types AND optimize the following code
// compile int foo(char a, char b, char c, char d) {return d+b;}
// looks like a missed sext_inreg opportunity.
-def SEXT : ExtensionPseudo<
- (outs DREGS:$dst),
- (ins GPR8:$src),
- "sext\t$dst, $src",
- [(set i16:$dst, (sext i8:$src)), (implicit SREG)]
->;
-
-def ZEXT : ExtensionPseudo<
- (outs DREGS:$dst),
- (ins GPR8:$src),
- "zext\t$dst, $src",
- [(set i16:$dst, (zext i8:$src)), (implicit SREG)]
->;
+def SEXT
+ : ExtensionPseudo<(outs DREGS
+ : $dst),
+ (ins GPR8
+ : $src),
+ "sext\t$dst, $src",
+ [(set i16
+ : $dst, (sext i8
+ : $src)),
+ (implicit SREG)]>;
+
+def ZEXT
+ : ExtensionPseudo<(outs DREGS
+ : $dst),
+ (ins GPR8
+ : $src),
+ "zext\t$dst, $src",
+ [(set i16
+ : $dst, (zext i8
+ : $src)),
+ (implicit SREG)]>;
// This pseudo gets expanded into a movw+adiw thus it clobbers SREG.
let Defs = [SREG],
- hasSideEffects = 0 in
-def FRMIDX : Pseudo<(outs DLDREGS:$dst),
- (ins DLDREGS:$src, i16imm:$src2),
- "frmidx\t$dst, $src, $src2",
- []>;
+ hasSideEffects = 0 in def FRMIDX : Pseudo<(outs DLDREGS
+ : $dst),
+ (ins DLDREGS
+ : $src, i16imm
+ : $src2),
+ "frmidx\t$dst, $src, $src2", []>;
// This pseudo is either converted to a regular store or a push which clobbers
// SP.
-def STDSPQRr : StorePseudo<
- (outs),
- (ins memspi:$dst, GPR8:$src),
- "stdstk\t$dst, $src",
- [(store i8:$src, addr:$dst)]
->;
+def STDSPQRr : StorePseudo<(outs),
+ (ins memspi
+ : $dst, GPR8
+ : $src),
+ "stdstk\t$dst, $src", [(store i8
+ : $src, addr
+ : $dst)]>;
// This pseudo is either converted to a regular store or a push which clobbers
// SP.
-def STDWSPQRr : StorePseudo<
- (outs),
- (ins memspi:$dst, DREGS:$src),
- "stdwstk\t$dst, $src",
- [(store i16:$src, addr:$dst)]
->;
+def STDWSPQRr : StorePseudo<(outs),
+ (ins memspi
+ : $dst, DREGS
+ : $src),
+ "stdwstk\t$dst, $src", [(store i16
+ : $src, addr
+ : $dst)]>;
// SP read/write pseudos.
-let hasSideEffects = 0 in
-{
- let Uses = [SP] in
- def SPREAD : Pseudo<
- (outs DREGS:$dst),
- (ins GPRSP:$src),
- "spread\t$dst, $src",
- []
- >;
-
- let Defs = [SP] in
- def SPWRITE : Pseudo<
- (outs GPRSP:$dst),
- (ins DREGS:$src),
- "spwrite\t$dst, $src",
- []>;
+let hasSideEffects = 0 in {
+ let Uses = [SP] in def SPREAD : Pseudo<(outs DREGS
+ : $dst),
+ (ins GPRSP
+ : $src),
+ "spread\t$dst, $src", []>;
+
+ let Defs = [SP] in def SPWRITE : Pseudo<(outs GPRSP
+ : $dst),
+ (ins DREGS
+ : $src),
+ "spwrite\t$dst, $src", []>;
}
-def Select8 : SelectPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$src2, i8imm:$cc),
- "# Select8 PSEUDO",
- [(set i8:$dst, (AVRselectcc i8:$src, i8:$src2, imm:$cc))]
->;
-
-def Select16 : SelectPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, DREGS:$src2, i8imm:$cc),
- "# Select16 PSEUDO",
- [(set i16:$dst, (AVRselectcc i16:$src, i16:$src2, imm:$cc))]
->;
-
-def Lsl8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Lsl8 PSEUDO",
- [(set i8:$dst, (AVRlslLoop i8:$src, i8:$cnt))]
->;
-
-def Lsl16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Lsl16 PSEUDO",
- [(set i16:$dst, (AVRlslLoop i16:$src, i8:$cnt))]
->;
-
-def Lsr8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Lsr8 PSEUDO",
- [(set i8:$dst, (AVRlsrLoop i8:$src, i8:$cnt))]
->;
-
-def Lsr16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Lsr16 PSEUDO",
- [(set i16:$dst, (AVRlsrLoop i16:$src, i8:$cnt))]
->;
-
-def Rol8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Rol8 PSEUDO",
- [(set i8:$dst, (AVRrolLoop i8:$src, i8:$cnt))]
->;
-
-def Rol16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Rol16 PSEUDO",
- [(set i16:$dst, (AVRrolLoop i16:$src, i8:$cnt))]
->;
-
-def Ror8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Ror8 PSEUDO",
- [(set i8:$dst, (AVRrorLoop i8:$src, i8:$cnt))]
->;
-
-def Ror16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Ror16 PSEUDO",
- [(set i16:$dst, (AVRrorLoop i16:$src, i8:$cnt))]
->;
-
-def Asr8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Asr8 PSEUDO",
- [(set i8:$dst, (AVRasrLoop i8:$src, i8:$cnt))]
->;
-
-def Asr16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Asr16 PSEUDO",
- [(set i16:$dst, (AVRasrLoop i16:$src, i8:$cnt))]
->;
-
+def Select8 : SelectPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $src2, i8imm
+ : $cc),
+ "# Select8 PSEUDO", [(set i8
+ : $dst, (AVRselectcc i8
+ : $src, i8
+ : $src2, imm
+ : $cc))]>;
+
+def Select16 : SelectPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, DREGS
+ : $src2, i8imm
+ : $cc),
+ "# Select16 PSEUDO", [(set i16
+ : $dst, (AVRselectcc i16
+ : $src, i16
+ : $src2, imm
+ : $cc))]>;
+
+def Lsl8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Lsl8 PSEUDO", [(set i8
+ : $dst, (AVRlslLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Lsl16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Lsl16 PSEUDO", [(set i16
+ : $dst, (AVRlslLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Lsr8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Lsr8 PSEUDO", [(set i8
+ : $dst, (AVRlsrLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Lsr16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Lsr16 PSEUDO", [(set i16
+ : $dst, (AVRlsrLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Rol8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Rol8 PSEUDO", [(set i8
+ : $dst, (AVRrolLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Rol16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Rol16 PSEUDO", [(set i16
+ : $dst, (AVRrolLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Ror8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Ror8 PSEUDO", [(set i8
+ : $dst, (AVRrorLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Ror16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Ror16 PSEUDO", [(set i16
+ : $dst, (AVRrorLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Asr8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Asr8 PSEUDO", [(set i8
+ : $dst, (AVRasrLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Asr16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Asr16 PSEUDO", [(set i16
+ : $dst, (AVRasrLoop i16
+ : $src, i8
+ : $cnt))]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
-//:TODO: look in x86InstrCompiler.td for odd encoding trick related to
+//: TODO: look in x86InstrCompiler.td for odd encoding trick related to
// add x, 128 -> sub x, -128. Clang is emitting an eor for this (ldi+eor)
// the add instruction always writes the carry flag
-def : Pat<(addc i8:$src, i8:$src2),
- (ADDRdRr i8:$src, i8:$src2)>;
-def : Pat<(addc DREGS:$src, DREGS:$src2),
- (ADDWRdRr DREGS:$src, DREGS:$src2)>;
+def : Pat<(addc i8 : $src, i8 : $src2), (ADDRdRr i8 : $src, i8 : $src2)>;
+def : Pat<(addc DREGS
+ : $src, DREGS
+ : $src2),
+ (ADDWRdRr DREGS
+ : $src, DREGS
+ : $src2)>;
// all sub instruction variants always writes the carry flag
-def : Pat<(subc i8:$src, i8:$src2),
- (SUBRdRr i8:$src, i8:$src2)>;
-def : Pat<(subc i16:$src, i16:$src2),
- (SUBWRdRr i16:$src, i16:$src2)>;
-def : Pat<(subc i8:$src, imm:$src2),
- (SUBIRdK i8:$src, imm:$src2)>;
-def : Pat<(subc i16:$src, imm:$src2),
- (SUBIWRdK i16:$src, imm:$src2)>;
+def : Pat<(subc i8 : $src, i8 : $src2), (SUBRdRr i8 : $src, i8 : $src2)>;
+def : Pat<(subc i16 : $src, i16 : $src2), (SUBWRdRr i16 : $src, i16 : $src2)>;
+def : Pat<(subc i8 : $src, imm : $src2), (SUBIRdK i8 : $src, imm : $src2)>;
+def : Pat<(subc i16 : $src, imm : $src2), (SUBIWRdK i16 : $src, imm : $src2)>;
// These patterns convert add (x, -imm) to sub (x, imm) since we dont have
// any add with imm instructions. Also take care of the adiw/sbiw instructions.
-def : Pat<(add i16:$src1, imm0_63_neg:$src2),
- (SBIWRdK i16:$src1, (imm0_63_neg:$src2))>;
-def : Pat<(add i16:$src1, imm:$src2),
- (SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
-def : Pat<(addc i16:$src1, imm:$src2),
- (SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
-
-def : Pat<(add i8:$src1, imm:$src2),
- (SUBIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
-def : Pat<(addc i8:$src1, imm:$src2),
- (SUBIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
-def : Pat<(adde i8:$src1, imm:$src2),
- (SBCIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
+def : Pat<(add i16
+ : $src1, imm0_63_neg
+ : $src2),
+ (SBIWRdK i16
+ : $src1, (imm0_63_neg
+ : $src2))>;
+def : Pat<(add i16
+ : $src1, imm
+ : $src2),
+ (SUBIWRdK i16
+ : $src1, (imm16_neg_XFORM imm
+ : $src2))>;
+def : Pat<(addc i16
+ : $src1, imm
+ : $src2),
+ (SUBIWRdK i16
+ : $src1, (imm16_neg_XFORM imm
+ : $src2))>;
+
+def : Pat<(add i8
+ : $src1, imm
+ : $src2),
+ (SUBIRdK i8
+ : $src1, (imm8_neg_XFORM imm
+ : $src2))>;
+def : Pat<(addc i8
+ : $src1, imm
+ : $src2),
+ (SUBIRdK i8
+ : $src1, (imm8_neg_XFORM imm
+ : $src2))>;
+def : Pat<(adde i8
+ : $src1, imm
+ : $src2),
+ (SBCIRdK i8
+ : $src1, (imm8_neg_XFORM imm
+ : $src2))>;
// Calls.
-def : Pat<(AVRcall (i16 tglobaladdr:$dst)),
- (CALLk tglobaladdr:$dst)>;
-def : Pat<(AVRcall (i16 texternalsym:$dst)),
- (CALLk texternalsym:$dst)>;
+def : Pat<(AVRcall(i16 tglobaladdr : $dst)), (CALLk tglobaladdr : $dst)>;
+def : Pat<(AVRcall(i16 texternalsym : $dst)), (CALLk texternalsym : $dst)>;
// `anyext`
-def : Pat<(i16 (anyext i8:$src)),
- (INSERT_SUBREG (i16 (IMPLICIT_DEF)), i8:$src, sub_lo)>;
+def : Pat<(i16(anyext i8
+ : $src)),
+ (INSERT_SUBREG(i16(IMPLICIT_DEF)), i8
+ : $src, sub_lo)>;
// `trunc`
-def : Pat<(i8 (trunc i16:$src)),
- (EXTRACT_SUBREG i16:$src, sub_lo)>;
+def : Pat<(i8(trunc i16 : $src)), (EXTRACT_SUBREG i16 : $src, sub_lo)>;
// sext_inreg
-def : Pat<(sext_inreg i16:$src, i8),
- (SEXT (i8 (EXTRACT_SUBREG i16:$src, sub_lo)))>;
+def : Pat<(sext_inreg i16
+ : $src, i8),
+ (SEXT(i8(EXTRACT_SUBREG i16
+ : $src, sub_lo)))>;
// GlobalAddress
-def : Pat<(i16 (AVRWrapper tglobaladdr:$dst)),
- (LDIWRdK tglobaladdr:$dst)>;
-def : Pat<(add i16:$src, (AVRWrapper tglobaladdr:$src2)),
- (SUBIWRdK i16:$src, tglobaladdr:$src2)>;
-def : Pat<(i8 (load (AVRWrapper tglobaladdr:$dst))),
- (LDSRdK tglobaladdr:$dst)>;
-def : Pat<(i16 (load (AVRWrapper tglobaladdr:$dst))),
- (LDSWRdK tglobaladdr:$dst)>;
-def : Pat<(store i8:$src, (i16 (AVRWrapper tglobaladdr:$dst))),
- (STSKRr tglobaladdr:$dst, i8:$src)>;
-def : Pat<(store i16:$src, (i16 (AVRWrapper tglobaladdr:$dst))),
- (STSWKRr tglobaladdr:$dst, i16:$src)>;
+def : Pat<(i16(AVRWrapper tglobaladdr : $dst)), (LDIWRdK tglobaladdr : $dst)>;
+def : Pat<(add i16
+ : $src, (AVRWrapper tglobaladdr
+ : $src2)),
+ (SUBIWRdK i16
+ : $src, tglobaladdr
+ : $src2)>;
+def : Pat<(i8(load(AVRWrapper tglobaladdr
+ : $dst))),
+ (LDSRdK tglobaladdr
+ : $dst)>;
+def : Pat<(i16(load(AVRWrapper tglobaladdr
+ : $dst))),
+ (LDSWRdK tglobaladdr
+ : $dst)>;
+def : Pat<(store i8
+ : $src, (i16(AVRWrapper tglobaladdr
+ : $dst))),
+ (STSKRr tglobaladdr
+ : $dst, i8
+ : $src)>;
+def : Pat<(store i16
+ : $src, (i16(AVRWrapper tglobaladdr
+ : $dst))),
+ (STSWKRr tglobaladdr
+ : $dst, i16
+ : $src)>;
// BlockAddress
-def : Pat<(i16 (AVRWrapper tblockaddress:$dst)),
- (LDIWRdK tblockaddress:$dst)>;
+def : Pat<(i16(AVRWrapper tblockaddress
+ : $dst)),
+ (LDIWRdK tblockaddress
+ : $dst)>;
-def : Pat<(i8 (trunc (AVRlsrwn DLDREGS:$src, (i16 8)))),
- (EXTRACT_SUBREG DREGS:$src, sub_hi)>;
+def : Pat<(i8(trunc(AVRlsrwn DLDREGS
+ : $src, (i16 8)))),
+ (EXTRACT_SUBREG DREGS
+ : $src, sub_hi)>;
// :FIXME: DAGCombiner produces an shl node after legalization from these seq:
// BR_JT -> (mul x, 2) -> (shl x, 1)
-def : Pat<(shl i16:$src1, (i8 1)),
- (LSLWRd i16:$src1)>;
+def : Pat<(shl i16 : $src1, (i8 1)), (LSLWRd i16 : $src1)>;
// Lowering of 'tst' node to 'TST' instruction.
// TST is an alias of AND Rd, Rd.
-def : Pat<(AVRtst i8:$rd),
- (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+def : Pat<(AVRtst i8 : $rd), (ANDRdRr GPR8 : $rd, GPR8 : $rd)>;
// Lowering of 'lsl' node to 'LSL' instruction.
// LSL is an alias of 'ADD Rd, Rd'
-def : Pat<(AVRlsl i8:$rd),
- (ADDRdRr GPR8:$rd, GPR8:$rd)>;
-
+def : Pat<(AVRlsl i8 : $rd), (ADDRdRr GPR8 : $rd, GPR8 : $rd)>;
diff --git a/llvm/lib/Target/AVR/AVRMCInstLower.cpp b/llvm/lib/Target/AVR/AVRMCInstLower.cpp
index 49a318762b63..2b8711656139 100644
--- a/llvm/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/llvm/lib/Target/AVR/AVRMCInstLower.cpp
@@ -29,7 +29,9 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
bool IsNegated = false;
- if (TF & AVRII::MO_NEG) { IsNegated = true; }
+ if (TF & AVRII::MO_NEG) {
+ IsNegated = true;
+ }
if (!MO.isJTI() && MO.getOffset()) {
Expr = MCBinaryExpr::createAdd(
@@ -59,7 +61,8 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
-void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) const {
+void AVRMCInstLower::lowerInstruction(const MachineInstr &MI,
+ MCInst &OutMI) const {
OutMI.setOpcode(MI.getOpcode());
for (MachineOperand const &MO : MI.operands()) {
@@ -108,4 +111,3 @@ void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) con
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRMCInstLower.h b/llvm/lib/Target/AVR/AVRMCInstLower.h
index 5e0f42ac16a7..7ad6d472ad87 100644
--- a/llvm/lib/Target/AVR/AVRMCInstLower.h
+++ b/llvm/lib/Target/AVR/AVRMCInstLower.h
@@ -39,4 +39,3 @@ private:
} // end namespace llvm
#endif // LLVM_AVR_MCINST_LOWER_H
-
diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
index 5432fac122ef..8b1c247eb6a7 100644
--- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
+++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -55,8 +55,10 @@ public:
CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {
unsigned CallConv = MF.getFunction().getCallingConv();
- this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR || MF.getFunction().hasFnAttribute("interrupt");
- this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL || MF.getFunction().hasFnAttribute("signal");
+ this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR ||
+ MF.getFunction().hasFnAttribute("interrupt");
+ this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL ||
+ MF.getFunction().hasFnAttribute("signal");
}
bool getHasSpills() const { return HasSpills; }
@@ -69,7 +71,9 @@ public:
void setHasStackArgs(bool B) { HasStackArgs = B; }
/// Checks if the function is some form of interrupt service routine.
- bool isInterruptOrSignalHandler() const { return isInterruptHandler() || isSignalHandler(); }
+ bool isInterruptOrSignalHandler() const {
+ return isInterruptHandler() || isSignalHandler();
+ }
bool isInterruptHandler() const { return IsInterruptHandler; }
bool isSignalHandler() const { return IsSignalHandler; }
@@ -81,6 +85,6 @@ public:
void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
};
-} // end llvm namespace
+} // namespace llvm
#endif // LLVM_AVR_MACHINE_FUNCTION_INFO_H
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 2a4905ce2461..1886debaf492 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/Function.h"
#include "AVR.h"
#include "AVRInstrInfo.h"
@@ -37,9 +37,8 @@ const uint16_t *
AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const AVRMachineFunctionInfo *AFI = MF->getInfo<AVRMachineFunctionInfo>();
- return AFI->isInterruptOrSignalHandler()
- ? CSR_Interrupts_SaveList
- : CSR_Normal_SaveList;
+ return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList
+ : CSR_Normal_SaveList;
}
const uint32_t *
@@ -47,9 +46,8 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
- return AFI->isInterruptOrSignalHandler()
- ? CSR_Interrupts_RegMask
- : CSR_Normal_RegMask;
+ return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_RegMask
+ : CSR_Normal_RegMask;
}
BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -207,7 +205,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If the offset is too big we have to adjust and restore the frame pointer
// to materialize a valid load/store with displacement.
- //:TODO: consider using only one adiw/sbiw chain for more than one frame index
+ //: TODO: consider using only one adiw/sbiw chain for more than one frame
+ //: index
if (Offset > 62) {
unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK;
int AddOffset = Offset - 63 + 1;
@@ -276,18 +275,16 @@ void AVRRegisterInfo::splitReg(Register Reg, Register &LoReg,
HiReg = getSubReg(Reg, AVR::sub_hi);
}
-bool AVRRegisterInfo::shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const {
- if(this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
+bool AVRRegisterInfo::shouldCoalesce(
+ MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
+ const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+ if (this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
return false;
}
- return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg, NewRC, LIS);
+ return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg,
+ NewRC, LIS);
}
} // end of namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 23439f2fe195..fa27d9283209 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -51,12 +51,9 @@ public:
/// \param Reg A 16-bit register to split.
void splitReg(Register Reg, Register &LoReg, Register &HiReg) const;
- bool shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
+ bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+ unsigned SubReg, const TargetRegisterClass *DstRC,
+ unsigned DstSubReg, const TargetRegisterClass *NewRC,
LiveIntervals &LIS) const override;
};
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td
index 1948fcbaf75a..bb4e86ca0536 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -11,12 +11,8 @@
//===----------------------------------------------------------------------===//
// 8-bit General purpose register definition.
-class AVRReg<bits<16> num,
- string name,
- list<Register> subregs = [],
- list<string> altNames = []>
- : RegisterWithSubRegs<name, subregs>
-{
+class AVRReg<bits<16> num, string name, list<Register> subregs = [],
+ list<string> altNames = []> : RegisterWithSubRegs<name, subregs> {
field bits<16> Num = num;
let HWEncoding = num;
@@ -26,31 +22,27 @@ class AVRReg<bits<16> num,
}
// Subregister indices.
-let Namespace = "AVR" in
-{
+let Namespace = "AVR" in {
def sub_lo : SubRegIndex<8>;
def sub_hi : SubRegIndex<8, 8>;
}
-let Namespace = "AVR" in {
- def ptr : RegAltNameIndex;
-}
-
+let Namespace = "AVR" in { def ptr : RegAltNameIndex; }
//===----------------------------------------------------------------------===//
// 8-bit general purpose registers
//===----------------------------------------------------------------------===//
-def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>;
-def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>;
-def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>;
-def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>;
-def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>;
-def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>;
-def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>;
-def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>;
-def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>;
-def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>;
+def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>;
+def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>;
def R10 : AVRReg<10, "r10">, DwarfRegNum<[10]>;
def R11 : AVRReg<11, "r11">, DwarfRegNum<[11]>;
def R12 : AVRReg<12, "r12">, DwarfRegNum<[12]>;
@@ -76,19 +68,17 @@ def R31 : AVRReg<31, "r31", [], ["zh"]>, DwarfRegNum<[31]>;
def SPL : AVRReg<32, "SPL">, DwarfRegNum<[32]>;
def SPH : AVRReg<33, "SPH">, DwarfRegNum<[33]>;
-let SubRegIndices = [sub_lo, sub_hi],
-CoveredBySubRegs = 1 in
-{
+let SubRegIndices = [sub_lo, sub_hi], CoveredBySubRegs = 1 in {
// 16 bit GPR pairs.
- def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>;
+ def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>;
// The pointer registers (X,Y,Z) are a special case because they
// are printed as a `high:low` pair when a DREG is expected,
// but printed using `X`, `Y`, `Z` when a pointer register is expected.
let RegAltNameIndices = [ptr] in {
- def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
- def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
- def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
+ def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
+ def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
+ def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
}
def R25R24 : AVRReg<24, "r25:r24", [R24, R25]>, DwarfRegNum<[24]>;
def R23R22 : AVRReg<22, "r23:r22", [R22, R23]>, DwarfRegNum<[22]>;
@@ -98,11 +88,11 @@ CoveredBySubRegs = 1 in
def R15R14 : AVRReg<14, "r15:r14", [R14, R15]>, DwarfRegNum<[14]>;
def R13R12 : AVRReg<12, "r13:r12", [R12, R13]>, DwarfRegNum<[12]>;
def R11R10 : AVRReg<10, "r11:r10", [R10, R11]>, DwarfRegNum<[10]>;
- def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>;
- def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>;
- def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
- def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
- def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
+ def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>;
+ def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>;
+ def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
+ def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
+ def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
// Pseudo registers for unaligned i16
def R26R25 : AVRReg<25, "r26:r25", [R25, R26]>, DwarfRegNum<[25]>;
@@ -113,7 +103,7 @@ CoveredBySubRegs = 1 in
def R16R15 : AVRReg<15, "r16:r15", [R15, R16]>, DwarfRegNum<[15]>;
def R14R13 : AVRReg<13, "r14:r13", [R13, R14]>, DwarfRegNum<[13]>;
def R12R11 : AVRReg<11, "r12:r11", [R11, R12]>, DwarfRegNum<[11]>;
- def R10R9 : AVRReg<9, "r10:r9", [R9, R10]>, DwarfRegNum<[9]>;
+ def R10R9 : AVRReg<9, "r10:r9", [R9, R10]>, DwarfRegNum<[9]>;
}
//===----------------------------------------------------------------------===//
@@ -122,81 +112,71 @@ CoveredBySubRegs = 1 in
// Main 8-bit register class.
def GPR8 : RegisterClass<"AVR", [i8], 8,
- (
- // Return value and argument registers.
- add R24, R25, R18, R19, R20, R21, R22, R23,
- // Scratch registers.
- R30, R31, R26, R27,
- // Callee saved registers.
- R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
- R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
- )>;
+ (
+ // Return value and argument registers.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
+ R9, R8, R7, R6, R5, R4, R3, R2, R0, R1)>;
// Simple lower registers r0..r15
def GPR8lo : RegisterClass<"AVR", [i8], 8,
- (
- add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
- )>;
+ (add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6,
+ R5, R4, R3, R2, R0, R1)>;
// 8-bit register class for instructions which take immediates.
def LD8 : RegisterClass<"AVR", [i8], 8,
- (
- // Return value and arguments.
- add R24, R25, R18, R19, R20, R21, R22, R23,
- // Scratch registers.
- R30, R31, R26, R27,
- // Callee saved registers.
- R28, R29, R17, R16
- )>;
+ (
+ // Return value and arguments.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16)>;
// Simple lower registers r16..r23
def LD8lo : RegisterClass<"AVR", [i8], 8,
- (
- add R23, R22, R21, R20, R19, R18, R17, R16
- )>;
+ (add R23, R22, R21, R20, R19, R18, R17, R16)>;
// Main 16-bit pair register class.
def DREGS : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28, R17R16, R15R14, R13R12, R11R10,
- R9R8, R7R6, R5R4, R3R2, R1R0,
- // Pseudo regs for unaligned 16-bits
- R26R25, R24R23, R22R21,
- R20R19, R18R17, R16R15,
- R14R13, R12R11, R10R9
- )>;
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
+ R7R6, R5R4, R3R2, R1R0,
+ // Pseudo regs for unaligned 16-bits
+ R26R25, R24R23, R22R21, R20R19, R18R17, R16R15,
+ R14R13, R12R11, R10R9)>;
// Lower 16-bit pair registers in R0..R15, only used in inline assembly.
-def DREGSlo : RegisterClass<"AVR", [i16], 8,
- (
- add R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2, R1R0
- )>;
+def DREGSlo
+ : RegisterClass<"AVR", [i16], 8,
+ (add R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2, R1R0)>;
// Lower 16-bit pair registers in r16..r23, only used in inline assembly.
def DREGSLD8lo : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R19R18, R21R20, R23R22,
- // Callee saved registers.
- R17R16
- )>;
+ (
+ // Return value and arguments.
+ add R19R18, R21R20, R23R22,
+ // Callee saved registers.
+ R17R16)>;
// 16-bit pair register class for movw
def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28, R17R16, R15R14, R13R12, R11R10,
- R9R8, R7R6, R5R4, R3R2, R1R0
- )>;
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
+ R7R6, R5R4, R3R2, R1R0)>;
// The 16-bit DREGS register class, excluding the Z pointer register.
//
@@ -207,66 +187,59 @@ def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
// cannot use Z; it's simply a workaround a regalloc bug.
//
// More information can be found in PR39553.
-def DREGS_WITHOUT_YZ_WORKAROUND : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R27R26,
- // Callee saved registers.
- R17R16, R15R14, R13R12, R11R10,
- R9R8, R7R6, R5R4, R3R2, R1R0
- )>;
+def DREGS_WITHOUT_YZ_WORKAROUND
+ : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R27R26,
+ // Callee saved registers.
+ R17R16, R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2,
+ R1R0)>;
// 16-bit register class for immediate instructions.
def DLDREGS : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28, R17R16
- )>;
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16)>;
// 16-bit register class for the adiw/sbiw instructions.
def IWREGS : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28
- )>;
+ (
+ // Return value and arguments.
+ add R25R24,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28)>;
// 16-bit register class for the ld and st instructions.
// AKA X,Y, and Z
def PTRREGS : RegisterClass<"AVR", [i16], 8,
- (
- add R27R26, // X
- R29R28, // Y
- R31R30 // Z
- ), ptr>;
+ (add R27R26, // X
+ R29R28, // Y
+ R31R30 // Z
+ ),
+ ptr>;
// 16-bit register class for the ldd and std instructions.
// AKA Y and Z.
-def PTRDISPREGS : RegisterClass<"AVR", [i16], 8,
- (
- add R31R30, R29R28
- ), ptr>;
+def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, (add R31R30, R29R28), ptr>;
// We have a bunch of instructions with an explicit Z register argument. We
// model this using a register class containing only the Z register.
def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
// Register class used for the stack read pseudo instruction.
-def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>;
+def GPRSP : RegisterClass<"AVR", [i16], 8, (add SP)>;
// Status register.
def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>;
-def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)>
-{
- let CopyCost = -1; // Don't allow copying of status registers
+def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)> {
+ let CopyCost = -1; // Don't allow copying of status registers
}
-
diff --git a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp b/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
index 7d2d19de7578..76f29eb9f369 100644
--- a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
+++ b/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
@@ -84,8 +84,7 @@ bool AVRRelaxMem::runOnBasicBlock(Block &MBB) {
return Modified;
}
-template <>
-bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
+template <> bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
MachineOperand &Ptr = MI.getOperand(0);
@@ -96,24 +95,23 @@ bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
if (Imm > 63) {
// Push the previous state of the pointer register.
// This instruction must preserve the value.
- buildMI(MBB, MBBI, AVR::PUSHWRr)
- .addReg(Ptr.getReg());
+ buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(Ptr.getReg());
// Add the immediate to the pointer register.
buildMI(MBB, MBBI, AVR::SBCIWRdK)
- .addReg(Ptr.getReg(), RegState::Define)
- .addReg(Ptr.getReg())
- .addImm(-Imm);
+ .addReg(Ptr.getReg(), RegState::Define)
+ .addReg(Ptr.getReg())
+ .addImm(-Imm);
// Store the value in the source register to the address
// pointed to by the pointer register.
buildMI(MBB, MBBI, AVR::STWPtrRr)
- .addReg(Ptr.getReg())
- .addReg(Src.getReg(), getKillRegState(Src.isKill()));
+ .addReg(Ptr.getReg())
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()));
// Pop the original state of the pointer register.
buildMI(MBB, MBBI, AVR::POPWRd)
- .addDef(Ptr.getReg(), getKillRegState(Ptr.isKill()));
+ .addDef(Ptr.getReg(), getKillRegState(Ptr.isKill()));
MI.removeFromParent();
}
@@ -125,21 +123,19 @@ bool AVRRelaxMem::runOnInstruction(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
int Opcode = MBBI->getOpcode();
-#define RELAX(Op) \
- case Op: \
+#define RELAX(Op) \
+ case Op: \
return relax<Op>(MBB, MI)
- switch (Opcode) {
- RELAX(AVR::STDWPtrQRr);
- }
+ switch (Opcode) { RELAX(AVR::STDWPtrQRr); }
#undef RELAX
return false;
}
} // end of anonymous namespace
-INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem",
- AVR_RELAX_MEM_OPS_NAME, false, false)
+INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem", AVR_RELAX_MEM_OPS_NAME, false,
+ false)
namespace llvm {
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp
index 601865120491..990e1c57e63f 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -13,7 +13,7 @@
#include "AVRSubtarget.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "AVR.h"
#include "AVRTargetMachine.h"
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
index 7d49e43a83f5..90b9cd4da7c1 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -39,10 +39,18 @@ public:
const AVRTargetMachine &TM);
const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
- const AVRTargetLowering *getTargetLowering() const override { return &TLInfo; }
- const AVRSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; }
- const AVRRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const AVRTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const AVRSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const AVRRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
/// Parses a subtarget feature string, setting appropriate options.
/// \note Definition of function is auto generated by `tblgen`.
@@ -84,7 +92,6 @@ public:
}
private:
-
/// The ELF e_flags architecture.
unsigned ELFArch;
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 5be4260ce035..65740f7c2306 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "AVR.h"
#include "AVRTargetObjectFile.h"
@@ -25,7 +25,8 @@
namespace llvm {
-static const char *AVRDataLayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8";
+static const char *AVRDataLayout =
+ "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8";
/// Processes a CPU name.
static StringRef getCPU(StringRef CPU) {
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.h b/llvm/lib/Target/AVR/AVRTargetMachine.h
index f9015c8741ea..54669eda060c 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.h
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.h
@@ -29,8 +29,7 @@ class AVRTargetMachine : public LLVMTargetMachine {
public:
AVRTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- Optional<CodeModel::Model> CM,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT);
const AVRSubtarget *getSubtargetImpl() const;
@@ -42,10 +41,6 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool isMachineVerifierClean() const override {
- return false;
- }
-
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AVRSubtarget SubTarget;
diff --git a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
index 14206cdb8276..c7715ca1f51b 100644
--- a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -24,10 +24,8 @@ void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
}
-MCSection *
-AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
- SectionKind Kind,
- const TargetMachine &TM) const {
+MCSection *AVRTargetObjectFile::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// Global values in flash memory are placed in the progmem.data section
// unless they already have a user assigned section.
if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection() && Kind.isReadOnly())
@@ -37,4 +35,3 @@ AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
return Base::SelectSectionForGlobal(GO, Kind, TM);
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 19f769270569..95ecd28200ba 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include <sstream>
@@ -170,9 +170,11 @@ public:
}
bool isImmCom8() const {
- if (!isImm()) return false;
+ if (!isImm())
+ return false;
const auto *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ if (!CE)
+ return false;
int64_t Value = CE->getValue();
return isUInt<8>(Value);
}
@@ -322,11 +324,16 @@ bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
- case Match_Success: return emit(Inst, Loc, Out);
- case Match_MissingFeature: return missingFeature(Loc, ErrorInfo);
- case Match_InvalidOperand: return invalidOperand(Loc, Operands, ErrorInfo);
- case Match_MnemonicFail: return Error(Loc, "invalid instruction");
- default: return true;
+ case Match_Success:
+ return emit(Inst, Loc, Out);
+ case Match_MissingFeature:
+ return missingFeature(Loc, ErrorInfo);
+ case Match_InvalidOperand:
+ return invalidOperand(Loc, Operands, ErrorInfo);
+ case Match_MnemonicFail:
+ return Error(Loc, "invalid instruction");
+ default:
+ return true;
}
}
@@ -440,8 +447,7 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
tokens[1].getKind() == AsmToken::Minus)) {
AsmToken::TokenKind CurTok = Parser.getLexer().getKind();
- if (CurTok == AsmToken::Minus ||
- tokens[1].getKind() == AsmToken::Minus) {
+ if (CurTok == AsmToken::Minus || tokens[1].getKind() == AsmToken::Minus) {
isNegated = true;
} else {
assert(CurTok == AsmToken::Plus);
@@ -461,7 +467,7 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
return true;
}
StringRef ModifierName = Parser.getTok().getString();
- ModifierKind = AVRMCExpr::getKindByName(ModifierName.str().c_str());
+ ModifierKind = AVRMCExpr::getKindByName(ModifierName);
if (ModifierKind != AVRMCExpr::VK_AVR_None) {
Parser.Lex();
@@ -469,7 +475,7 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
if (Parser.getTok().getString() == GENERATE_STUBS &&
Parser.getTok().getKind() == AsmToken::Identifier) {
std::string GSModName = ModifierName.str() + "_" + GENERATE_STUBS;
- ModifierKind = AVRMCExpr::getKindByName(GSModName.c_str());
+ ModifierKind = AVRMCExpr::getKindByName(GSModName);
if (ModifierKind != AVRMCExpr::VK_AVR_None)
Parser.Lex(); // Eat gs modifier name
}
@@ -498,8 +504,8 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
assert(Parser.getTok().getKind() == AsmToken::RParen);
Parser.Lex(); // Eat closing parenthesis
- MCExpr const *Expression = AVRMCExpr::create(ModifierKind, InnerExpression,
- isNegated, getContext());
+ MCExpr const *Expression =
+ AVRMCExpr::create(ModifierKind, InnerExpression, isNegated, getContext());
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(AVROperand::CreateImm(Expression, S, E));
@@ -552,8 +558,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) {
return true;
}
-OperandMatchResultTy
-AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
+OperandMatchResultTy AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << "parseMemriOperand()\n");
SMLoc E, S;
@@ -620,7 +625,8 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
bool first = true;
while (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (!first) eatComma();
+ if (!first)
+ eatComma();
first = false;
@@ -670,7 +676,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
Tokens[1].getKind() == AsmToken::Identifier) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(".text");
AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L,
- AVRMCExpr::VK_AVR_None);
+ AVRMCExpr::VK_AVR_None);
return false;
}
@@ -678,7 +684,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
Parser.getLexer().peekTok().getKind() == AsmToken::LParen) {
StringRef ModifierName = Parser.getTok().getString();
AVRMCExpr::VariantKind ModifierKind =
- AVRMCExpr::getKindByName(ModifierName.str().c_str());
+ AVRMCExpr::getKindByName(ModifierName);
if (ModifierKind != AVRMCExpr::VK_AVR_None) {
Parser.Lex();
Parser.Lex(); // Eat the modifier and parenthesis
@@ -722,7 +728,7 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
int64_t RegNum = Const->getValue();
std::ostringstream RegName;
RegName << "r" << RegNum;
- RegNum = MatchRegisterName(RegName.str().c_str());
+ RegNum = MatchRegisterName(RegName.str());
if (RegNum != AVR::NoRegister) {
Op.makeReg(RegNum);
if (validateOperandClass(Op, Expected) == Match_Success) {
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 8e7251a74dfd..9dcd370b9f1e 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -42,7 +42,7 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
};
-}
+} // namespace
static MCDisassembler *createAVRDisassembler(const Target &T,
const MCSubtargetInfo &STI,
@@ -50,7 +50,6 @@ static MCDisassembler *createAVRDisassembler(const Target &T,
return new AVRDisassembler(STI, Ctx);
}
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheAVRTarget(),
@@ -58,18 +57,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRDisassembler() {
}
static const uint16_t GPRDecoderTable[] = {
- AVR::R0, AVR::R1, AVR::R2, AVR::R3,
- AVR::R4, AVR::R5, AVR::R6, AVR::R7,
- AVR::R8, AVR::R9, AVR::R10, AVR::R11,
- AVR::R12, AVR::R13, AVR::R14, AVR::R15,
- AVR::R16, AVR::R17, AVR::R18, AVR::R19,
- AVR::R20, AVR::R21, AVR::R22, AVR::R23,
- AVR::R24, AVR::R25, AVR::R26, AVR::R27,
- AVR::R28, AVR::R29, AVR::R30, AVR::R31,
+ AVR::R0, AVR::R1, AVR::R2, AVR::R3, AVR::R4, AVR::R5, AVR::R6,
+ AVR::R7, AVR::R8, AVR::R9, AVR::R10, AVR::R11, AVR::R12, AVR::R13,
+ AVR::R14, AVR::R15, AVR::R16, AVR::R17, AVR::R18, AVR::R19, AVR::R20,
+ AVR::R21, AVR::R22, AVR::R23, AVR::R24, AVR::R25, AVR::R26, AVR::R27,
+ AVR::R28, AVR::R29, AVR::R30, AVR::R31,
};
static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -79,39 +76,41 @@ static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
- unsigned Register = GPRDecoderTable[RegNo+16];
+ unsigned Register = GPRDecoderTable[RegNo + 16];
Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodePTRREGSRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const void *Decoder) {
// Note: this function must be defined but does not seem to be called.
assert(false && "unimplemented: PTRREGS register class");
return MCDisassembler::Success;
}
-static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -119,40 +118,42 @@ static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
#include "AVRGenDisassemblerTables.inc"
-static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned addr = 0;
addr |= fieldFromInstruction(Insn, 0, 4);
addr |= fieldFromInstruction(Insn, 9, 2) << 4;
unsigned reg = fieldFromInstruction(Insn, 4, 5);
Inst.addOperand(MCOperand::createImm(addr));
- if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned addr = 0;
addr |= fieldFromInstruction(Insn, 0, 4);
addr |= fieldFromInstruction(Insn, 9, 2) << 4;
unsigned reg = fieldFromInstruction(Insn, 4, 5);
- if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(addr));
return MCDisassembler::Success;
}
-static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned addr = fieldFromInstruction(Insn, 3, 5);
unsigned b = fieldFromInstruction(Insn, 0, 3);
Inst.addOperand(MCOperand::createImm(addr));
@@ -168,16 +169,17 @@ static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field,
return MCDisassembler::Success;
}
-static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 5);
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
if (decodeFRd(Inst, Insn, Address, Decoder) == MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(AVR::R31R30));
@@ -188,9 +190,11 @@ static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 3) + 16;
unsigned r = fieldFromInstruction(Insn, 0, 3) + 16;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
@@ -199,22 +203,26 @@ static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned r = fieldFromInstruction(Insn, 4, 4) * 2;
unsigned d = fieldFromInstruction(Insn, 0, 4) * 2;
- if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 2) * 2 + 24; // starts at r24:r25
unsigned k = 0;
k |= fieldFromInstruction(Insn, 0, 4);
k |= fieldFromInstruction(Insn, 6, 2) << 4;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(k));
return MCDisassembler::Success;
@@ -224,9 +232,11 @@ static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned rd = fieldFromInstruction(Insn, 4, 4) + 16;
unsigned rr = fieldFromInstruction(Insn, 0, 4) + 16;
- if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, rr, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, rr, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
@@ -253,7 +263,8 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
}
Size = 4;
- Insn = (Bytes[0] << 16) | (Bytes[1] << 24) | (Bytes[2] << 0) | (Bytes[3] << 8);
+ Insn =
+ (Bytes[0] << 16) | (Bytes[1] << 24) | (Bytes[2] << 0) | (Bytes[3] << 8);
return MCDisassembler::Success;
}
@@ -261,9 +272,12 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
static const uint8_t *getDecoderTable(uint64_t Size) {
switch (Size) {
- case 2: return DecoderTable16;
- case 4: return DecoderTable32;
- default: llvm_unreachable("instructions must be 16 or 32-bits");
+ case 2:
+ return DecoderTable16;
+ case 4:
+ return DecoderTable32;
+ default:
+ llvm_unreachable("instructions must be 16 or 32-bits");
}
}
@@ -279,11 +293,12 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
{
Result = readInstruction16(Bytes, Address, Size, Insn);
- if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
// Try to auto-decode a 16-bit instruction.
- Result = decodeInstruction(getDecoderTable(Size), Instr,
- Insn, Address, this, STI);
+ Result = decodeInstruction(getDecoderTable(Size), Instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail)
return Result;
@@ -293,10 +308,11 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
{
Result = readInstruction32(Bytes, Address, Size, Insn);
- if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
- Result = decodeInstruction(getDecoderTable(Size), Instr, Insn,
- Address, this, STI);
+ Result = decodeInstruction(getDecoderTable(Size), Instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
return Result;
@@ -308,4 +324,3 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
const void *Decoder);
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 49840672bf9a..a3a4d63932c0 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -44,7 +44,7 @@ static void signed_width(unsigned Width, uint64_t Value,
int64_t Max = maxIntN(Width);
Diagnostic += " (expected an integer in the range " + std::to_string(Min) +
- " to " + std::to_string(Max) + ")";
+ " to " + std::to_string(Max) + ")";
if (Ctx) {
Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
@@ -62,8 +62,8 @@ static void unsigned_width(unsigned Width, uint64_t Value,
int64_t Max = maxUIntN(Width);
- Diagnostic += " (expected an integer in the range 0 to " +
- std::to_string(Max) + ")";
+ Diagnostic +=
+ " (expected an integer in the range 0 to " + std::to_string(Max) + ")";
if (Ctx) {
Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
@@ -233,15 +233,14 @@ static void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
ldi::fixup(Size, Fixup, Value, Ctx);
}
-} // end of ldi namespace
-} // end of adjust namespace
+} // namespace ldi
+} // namespace adjust
namespace llvm {
// Prepare value for the target space for it
void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
- const MCValue &Target,
- uint64_t &Value,
+ const MCValue &Target, uint64_t &Value,
MCContext *Ctx) const {
// The size of the fixup in bits.
uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize;
@@ -280,7 +279,8 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
break;
case AVR::fixup_hh8_ldi:
case AVR::fixup_hh8_ldi_pm:
- if (Kind == AVR::fixup_hh8_ldi_pm) adjust::pm(Value);
+ if (Kind == AVR::fixup_hh8_ldi_pm)
+ adjust::pm(Value);
adjust::ldi::hh8(Size, Fixup, Value, Ctx);
break;
@@ -290,21 +290,24 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
case AVR::fixup_lo8_ldi_neg:
case AVR::fixup_lo8_ldi_pm_neg:
- if (Kind == AVR::fixup_lo8_ldi_pm_neg) adjust::pm(Value);
+ if (Kind == AVR::fixup_lo8_ldi_pm_neg)
+ adjust::pm(Value);
adjust::ldi::neg(Value);
adjust::ldi::lo8(Size, Fixup, Value, Ctx);
break;
case AVR::fixup_hi8_ldi_neg:
case AVR::fixup_hi8_ldi_pm_neg:
- if (Kind == AVR::fixup_hi8_ldi_pm_neg) adjust::pm(Value);
+ if (Kind == AVR::fixup_hi8_ldi_pm_neg)
+ adjust::pm(Value);
adjust::ldi::neg(Value);
adjust::ldi::hi8(Size, Fixup, Value, Ctx);
break;
case AVR::fixup_hh8_ldi_neg:
case AVR::fixup_hh8_ldi_pm_neg:
- if (Kind == AVR::fixup_hh8_ldi_pm_neg) adjust::pm(Value);
+ if (Kind == AVR::fixup_hh8_ldi_pm_neg)
+ adjust::pm(Value);
adjust::ldi::neg(Value);
adjust::ldi::hh8(Size, Fixup, Value, Ctx);
@@ -455,7 +458,8 @@ MCFixupKindInfo const &AVRAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
return Infos[Kind - FirstTargetFixupKind];
}
-bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// If the count is not 2-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
@@ -468,8 +472,9 @@ bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target) {
- switch ((unsigned) Fixup.getKind()) {
- default: return false;
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ return false;
// Fixups which should always be recorded as relocations.
case AVR::fixup_7_pcrel:
case AVR::fixup_13_pcrel:
@@ -485,4 +490,3 @@ MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 46dc914adf78..ea7fc30ab9d0 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -55,7 +55,8 @@ public:
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
@@ -67,4 +68,3 @@ private:
} // end namespace llvm
#endif // LLVM_AVR_ASM_BACKEND_H
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
index bedf68db08ca..b90e103794da 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -27,21 +27,18 @@ public:
virtual ~AVRELFObjectWriter() {}
- unsigned getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const override;
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
};
AVRELFObjectWriter::AVRELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(false, OSABI, ELF::EM_AVR, true) {}
-unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
+unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
- switch ((unsigned) Fixup.getKind()) {
+ switch ((unsigned)Fixup.getKind()) {
case FK_Data_1:
switch (Modifier) {
default:
@@ -158,4 +155,3 @@ std::unique_ptr<MCObjectTargetWriter> createAVRELFObjectWriter(uint8_t OSABI) {
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 6d126ed622aa..85933d6b9bb9 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -53,8 +53,7 @@ static unsigned getEFlagsForFeatureSet(const FeatureBitset &Features) {
return EFlags;
}
-AVRELFStreamer::AVRELFStreamer(MCStreamer &S,
- const MCSubtargetInfo &STI)
+AVRELFStreamer::AVRELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
: AVRTargetStreamer(S) {
MCAssembler &MCA = getStreamer().getAssembler();
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
index a0dd1dc8ac3e..1f7a926edb5c 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -141,7 +141,7 @@ namespace fixups {
template <typename T> inline void adjustBranchTarget(T &val) { val >>= 1; }
} // end of namespace fixups
-}
-} // end of namespace llvm::AVR
+} // namespace AVR
+} // namespace llvm
#endif // LLVM_AVR_FIXUP_KINDS_H
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 42fac5e2e000..d68e73ce0bb1 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -172,7 +172,8 @@ void AVRInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- assert(MI->getOperand(OpNo).isReg() && "Expected a register for the first operand");
+ assert(MI->getOperand(OpNo).isReg() &&
+ "Expected a register for the first operand");
const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
@@ -195,4 +196,3 @@ void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index 8976ef28f3dc..11f55f6d253b 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -56,4 +56,3 @@ private:
} // end namespace llvm
#endif // LLVM_AVR_INST_PRINTER_H
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index 50872d6d7a92..9754ff7f1146 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -75,7 +75,7 @@ AVRMCCodeEmitter::loadStorePostEncoder(const MCInst &MI, unsigned EncodedValue,
// check whether either of the registers are the X pointer register.
bool IsRegX = MI.getOperand(0).getReg() == AVR::R27R26 ||
- MI.getOperand(1).getReg() == AVR::R27R26;
+ MI.getOperand(1).getReg() == AVR::R27R26;
bool IsPredec = Opcode == AVR::LDRdPtrPd || Opcode == AVR::STPtrPdRr;
bool IsPostinc = Opcode == AVR::LDRdPtrPi || Opcode == AVR::STPtrPiRr;
@@ -96,8 +96,8 @@ AVRMCCodeEmitter::encodeRelCondBrTarget(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isExpr()) {
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- MCFixupKind(Fixup), MI.getLoc()));
+ Fixups.push_back(
+ MCFixup::create(0, MO.getExpr(), MCFixupKind(Fixup), MI.getLoc()));
return 0;
}
@@ -119,9 +119,12 @@ unsigned AVRMCCodeEmitter::encodeLDSTPtrReg(const MCInst &MI, unsigned OpNo,
assert(MO.isReg());
switch (MO.getReg()) {
- case AVR::R27R26: return 0x03; // X: 0b11
- case AVR::R29R28: return 0x02; // Y: 0b10
- case AVR::R31R30: return 0x00; // Z: 0b00
+ case AVR::R27R26:
+ return 0x03; // X: 0b11
+ case AVR::R29R28:
+ return 0x02; // Y: 0b10
+ case AVR::R31R30:
+ return 0x00; // Z: 0b00
default:
llvm_unreachable("invalid pointer register");
}
@@ -159,7 +162,7 @@ unsigned AVRMCCodeEmitter::encodeMemri(const MCInst &MI, unsigned OpNo,
} else if (OffsetOp.isExpr()) {
OffsetBits = 0;
Fixups.push_back(MCFixup::create(0, OffsetOp.getExpr(),
- MCFixupKind(AVR::fixup_6), MI.getLoc()));
+ MCFixupKind(AVR::fixup_6), MI.getLoc()));
} else {
llvm_unreachable("invalid value for offset");
}
@@ -193,7 +196,8 @@ unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo,
}
MCFixupKind FixupKind = static_cast<MCFixupKind>(Fixup);
- Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc()));
+ Fixups.push_back(
+ MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc()));
return 0;
}
@@ -251,8 +255,10 @@ unsigned AVRMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- if (MO.isImm()) return static_cast<unsigned>(MO.getImm());
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
if (MO.isDFPImm())
return static_cast<unsigned>(bit_cast<double>(MO.getDFPImm()));
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 2e24d885c155..1bfa79f26b27 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -109,7 +109,6 @@ private:
MCContext &Ctx;
};
-} // end namespace of llvm.
+} // namespace llvm
#endif // LLVM_AVR_CODE_EMITTER_H
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
index 0743344bc1ed..5f2a5a82e41d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
@@ -13,8 +13,8 @@
#include "MCTargetDesc/AVRMCELFStreamer.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
#define DEBUG_TYPE "avrmcelfstreamer"
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index a4f8787e5667..7e735ffa6cec 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -19,16 +19,15 @@ namespace llvm {
namespace {
const struct ModifierEntry {
- const char * const Spelling;
+ const char *const Spelling;
AVRMCExpr::VariantKind VariantKind;
} ModifierNames[] = {
{"lo8", AVRMCExpr::VK_AVR_LO8}, {"hi8", AVRMCExpr::VK_AVR_HI8},
{"hh8", AVRMCExpr::VK_AVR_HH8}, // synonym with hlo8
{"hlo8", AVRMCExpr::VK_AVR_HH8}, {"hhi8", AVRMCExpr::VK_AVR_HHI8},
- {"pm", AVRMCExpr::VK_AVR_PM},
- {"pm_lo8", AVRMCExpr::VK_AVR_PM_LO8}, {"pm_hi8", AVRMCExpr::VK_AVR_PM_HI8},
- {"pm_hh8", AVRMCExpr::VK_AVR_PM_HH8},
+ {"pm", AVRMCExpr::VK_AVR_PM}, {"pm_lo8", AVRMCExpr::VK_AVR_PM_LO8},
+ {"pm_hi8", AVRMCExpr::VK_AVR_PM_HI8}, {"pm_hh8", AVRMCExpr::VK_AVR_PM_HH8},
{"lo8_gs", AVRMCExpr::VK_AVR_LO8_GS}, {"hi8_gs", AVRMCExpr::VK_AVR_HI8_GS},
{"gs", AVRMCExpr::VK_AVR_GS},
@@ -81,7 +80,8 @@ bool AVRMCExpr::evaluateAsRelocatableImpl(MCValue &Result,
if (Value.isAbsolute()) {
Result = MCValue::get(evaluateAsInt64(Value.getConstant()));
} else {
- if (!Layout) return false;
+ if (!Layout)
+ return false;
MCContext &Context = Layout->getAssembler().getContext();
const MCSymbolRefExpr *Sym = Value.getSymA();
@@ -219,4 +219,3 @@ AVRMCExpr::VariantKind AVRMCExpr::getKindByName(StringRef Name) {
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
index e35385ebd90a..68589763f29a 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
@@ -34,7 +34,7 @@ public:
VK_AVR_LO8_GS, ///< Corresponds to `lo8(gs())`.
VK_AVR_HI8_GS, ///< Corresponds to `hi8(gs())`.
- VK_AVR_GS, ///< Corresponds to `gs()`.
+ VK_AVR_GS, ///< Corresponds to `gs()`.
};
public:
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index 95f4465924cc..cdfe4a21105d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -10,21 +10,21 @@
//
//===----------------------------------------------------------------------===//
+#include "AVRMCTargetDesc.h"
#include "AVRELFStreamer.h"
#include "AVRInstPrinter.h"
#include "AVRMCAsmInfo.h"
#include "AVRMCELFStreamer.h"
-#include "AVRMCTargetDesc.h"
#include "AVRTargetStreamer.h"
#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "AVRGenInstrInfo.inc"
@@ -108,7 +108,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetMC() {
createAVRMCInstPrinter);
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(getTheAVRTarget(), createAVRMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(getTheAVRTarget(),
+ createAVRMCCodeEmitter);
// Register the obj streamer
TargetRegistry::RegisterELFStreamer(getTheAVRTarget(), createMCStreamer);
@@ -124,4 +125,3 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetMC() {
// Register the asm backend (as little endian).
TargetRegistry::RegisterMCAsmBackend(getTheAVRTarget(), createAVRAsmBackend);
}
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
index eccd343d79ab..56e0e7810466 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
@@ -21,23 +21,4 @@ AVRTargetStreamer::AVRTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
AVRTargetAsmStreamer::AVRTargetAsmStreamer(MCStreamer &S)
: AVRTargetStreamer(S) {}
-void AVRTargetStreamer::finish() {
- MCStreamer &OS = getStreamer();
- MCContext &Context = OS.getContext();
-
- MCSymbol *DoCopyData = Context.getOrCreateSymbol("__do_copy_data");
- MCSymbol *DoClearBss = Context.getOrCreateSymbol("__do_clear_bss");
-
- // FIXME: We can disable __do_copy_data if there are no static RAM variables.
-
- OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
- OS.emitRawComment("copy all variables from program memory to RAM on startup");
- OS.emitSymbolAttribute(DoCopyData, MCSA_Global);
-
- OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
- OS.emitRawComment("clear the zeroed data section on startup");
- OS.emitSymbolAttribute(DoClearBss, MCSA_Global);
-}
-
} // end namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
index 5c4d1a22f6c6..b8b1454a2b8d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
@@ -18,8 +18,6 @@ class MCStreamer;
class AVRTargetStreamer : public MCTargetStreamer {
public:
explicit AVRTargetStreamer(MCStreamer &S);
-
- void finish() override;
};
/// A target streamer for textual AVR assembly code.
diff --git a/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
index 69b509b33e88..dd61add1526c 100644
--- a/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
+++ b/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -7,16 +7,15 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/AVRTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
namespace llvm {
Target &getTheAVRTarget() {
static Target TheAVRTarget;
return TheAVRTarget;
}
-}
+} // namespace llvm
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetInfo() {
llvm::RegisterTarget<llvm::Triple::avr> X(llvm::getTheAVRTarget(), "avr",
"Atmel AVR Microcontroller", "AVR");
}
-
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 57488bc28f98..50298bf5e943 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -19,8 +19,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index a98a3e08d5de..89990f7e15c2 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -21,6 +21,7 @@ ModulePass *createBPFCheckAndAdjustIR();
FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
FunctionPass *createBPFPreserveDIType();
+FunctionPass *createBPFIRPeephole();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
@@ -33,6 +34,7 @@ void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &);
void initializeBPFPreserveDITypePass(PassRegistry&);
+void initializeBPFIRPeepholePass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
@@ -57,6 +59,13 @@ public:
static bool isRequired() { return true; }
};
+class BPFIRPeepholePass : public PassInfoMixin<BPFIRPeepholePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ static bool isRequired() { return true; }
+};
+
class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
index 7088d55e1a71..69d0bca0bd77 100644
--- a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
+++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
@@ -15,6 +15,7 @@
#include "BPFTargetMachine.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -66,6 +67,7 @@ private:
Module *M;
SmallVector<PassThroughInfo, 16> PassThroughs;
+ bool adjustICmpToBuiltin();
void adjustBasicBlock(BasicBlock &BB);
bool serializeICMPCrossBB(BasicBlock &BB);
void adjustInst(Instruction &I);
@@ -85,14 +87,72 @@ ModulePass *llvm::createBPFAdjustOpt() { return new BPFAdjustOpt(); }
bool BPFAdjustOpt::runOnModule(Module &M) { return BPFAdjustOptImpl(&M).run(); }
bool BPFAdjustOptImpl::run() {
+ bool Changed = adjustICmpToBuiltin();
+
for (Function &F : *M)
for (auto &BB : F) {
adjustBasicBlock(BB);
for (auto &I : BB)
adjustInst(I);
}
+ return insertPassThrough() || Changed;
+}
+
+// Commit acabad9ff6bf ("[InstCombine] try to canonicalize icmp with
+// trunc op into mask and cmp") added a transformation to
+// convert "(conv)a < power_2_const" to "a & <const>" in certain
+// cases and bpf kernel verifier has to handle the resulted code
+// conservatively and this may reject otherwise legitimate program.
+// Here, we change related icmp code to a builtin which will
+// be restored to original icmp code later to prevent that
+// InstCombine transformatin.
+bool BPFAdjustOptImpl::adjustICmpToBuiltin() {
+ bool Changed = false;
+ ICmpInst *ToBeDeleted = nullptr;
+ for (Function &F : *M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (ToBeDeleted) {
+ ToBeDeleted->eraseFromParent();
+ ToBeDeleted = nullptr;
+ }
+
+ auto *Icmp = dyn_cast<ICmpInst>(&I);
+ if (!Icmp)
+ continue;
+
+ Value *Op0 = Icmp->getOperand(0);
+ if (!isa<TruncInst>(Op0))
+ continue;
+
+ auto ConstOp1 = dyn_cast<ConstantInt>(Icmp->getOperand(1));
+ if (!ConstOp1)
+ continue;
+
+ auto ConstOp1Val = ConstOp1->getValue().getZExtValue();
+ auto Op = Icmp->getPredicate();
+ if (Op == ICmpInst::ICMP_ULT || Op == ICmpInst::ICMP_UGE) {
+ if ((ConstOp1Val - 1) & ConstOp1Val)
+ continue;
+ } else if (Op == ICmpInst::ICMP_ULE || Op == ICmpInst::ICMP_UGT) {
+ if (ConstOp1Val & (ConstOp1Val + 1))
+ continue;
+ } else {
+ continue;
+ }
+
+ Constant *Opcode =
+ ConstantInt::get(Type::getInt32Ty(BB.getContext()), Op);
+ Function *Fn = Intrinsic::getDeclaration(
+ M, Intrinsic::bpf_compare, {Op0->getType(), ConstOp1->getType()});
+ auto *NewInst = CallInst::Create(Fn, {Opcode, Op0, ConstOp1});
+ BB.getInstList().insert(I.getIterator(), NewInst);
+ Icmp->replaceAllUsesWith(NewInst);
+ Changed = true;
+ ToBeDeleted = Icmp;
+ }
- return insertPassThrough();
+ return Changed;
}
bool BPFAdjustOptImpl::insertPassThrough() {
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 37950e105bdc..d6145f53c170 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -27,7 +27,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index 5239218ad003..cf1bc3f7c5bc 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -46,6 +46,7 @@ private:
void checkIR(Module &M);
bool adjustIR(Module &M);
bool removePassThroughBuiltin(Module &M);
+ bool removeCompareBuiltin(Module &M);
};
} // End anonymous namespace
@@ -120,8 +121,50 @@ bool BPFCheckAndAdjustIR::removePassThroughBuiltin(Module &M) {
return Changed;
}
+bool BPFCheckAndAdjustIR::removeCompareBuiltin(Module &M) {
+ // Remove __builtin_bpf_compare()'s which are used to prevent
+ // certain IR optimizations. Now major IR optimizations are done,
+ // remove them.
+ bool Changed = false;
+ CallInst *ToBeDeleted = nullptr;
+ for (Function &F : M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (ToBeDeleted) {
+ ToBeDeleted->eraseFromParent();
+ ToBeDeleted = nullptr;
+ }
+
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+ auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+ if (!GV->getName().startswith("llvm.bpf.compare"))
+ continue;
+
+ Changed = true;
+ Value *Arg0 = Call->getArgOperand(0);
+ Value *Arg1 = Call->getArgOperand(1);
+ Value *Arg2 = Call->getArgOperand(2);
+
+ auto OpVal = cast<ConstantInt>(Arg0)->getValue().getZExtValue();
+ CmpInst::Predicate Opcode = (CmpInst::Predicate)OpVal;
+
+ auto *ICmp = new ICmpInst(Opcode, Arg1, Arg2);
+ BB.getInstList().insert(Call->getIterator(), ICmp);
+
+ Call->replaceAllUsesWith(ICmp);
+ ToBeDeleted = Call;
+ }
+ return Changed;
+}
+
bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
- return removePassThroughBuiltin(M);
+ bool Changed = removePassThroughBuiltin(M);
+ Changed = removeCompareBuiltin(M) || Changed;
+ return Changed;
}
bool BPFCheckAndAdjustIR::runOnModule(Module &M) {
diff --git a/llvm/lib/Target/BPF/BPFIRPeephole.cpp b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
new file mode 100644
index 000000000000..d6a70012dca0
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
@@ -0,0 +1,118 @@
+//===------------ BPFIRPeephole.cpp - IR Peephole Transformation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// IR level peephole optimization, specifically removing @llvm.stacksave() and
+// @llvm.stackrestore().
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+
+#define DEBUG_TYPE "bpf-ir-peephole"
+
+using namespace llvm;
+
+namespace {
+
+static bool BPFIRPeepholeImpl(Function &F) {
+ LLVM_DEBUG(dbgs() << "******** BPF IR Peephole ********\n");
+
+ bool Changed = false;
+ Instruction *ToErase = nullptr;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ // The following code pattern is handled:
+ // %3 = call i8* @llvm.stacksave()
+ // store i8* %3, i8** %saved_stack, align 8
+ // ...
+ // %4 = load i8*, i8** %saved_stack, align 8
+ // call void @llvm.stackrestore(i8* %4)
+ // ...
+ // The goal is to remove the above four instructions,
+ // so we won't have instructions with r11 (stack pointer)
+ // if eventually there is no variable length stack allocation.
+ // InstrCombine also tries to remove the above instructions,
+ // if it is proven safe (constant alloca etc.), but depending
+ // on code pattern, it may still miss some.
+ //
+ // With unconditionally removing these instructions, if alloca is
+ // constant, we are okay then. Otherwise, SelectionDag will complain
+ // since BPF does not support dynamic allocation yet.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ if (auto *Call = dyn_cast<CallInst>(&I)) {
+ if (auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand())) {
+ if (!GV->getName().equals("llvm.stacksave"))
+ continue;
+ if (!Call->hasOneUser())
+ continue;
+ auto *Inst = cast<Instruction>(*Call->user_begin());
+ LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
+ LLVM_DEBUG(dbgs() << "Remove:"; Inst->dump(); dbgs() << '\n');
+ Changed = true;
+ Inst->eraseFromParent();
+ ToErase = &I;
+ }
+ continue;
+ }
+
+ if (auto *LD = dyn_cast<LoadInst>(&I)) {
+ if (!LD->hasOneUser())
+ continue;
+ auto *Call = dyn_cast<CallInst>(*LD->user_begin());
+ if (!Call)
+ continue;
+ auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+ if (!GV->getName().equals("llvm.stackrestore"))
+ continue;
+ LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
+ LLVM_DEBUG(dbgs() << "Remove:"; Call->dump(); dbgs() << '\n');
+ Changed = true;
+ Call->eraseFromParent();
+ ToErase = &I;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+class BPFIRPeephole final : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+public:
+ static char ID;
+ BPFIRPeephole() : FunctionPass(ID) {}
+};
+} // End anonymous namespace
+
+char BPFIRPeephole::ID = 0;
+INITIALIZE_PASS(BPFIRPeephole, DEBUG_TYPE, "BPF IR Peephole", false, false)
+
+FunctionPass *llvm::createBPFIRPeephole() { return new BPFIRPeephole(); }
+
+bool BPFIRPeephole::runOnFunction(Function &F) { return BPFIRPeepholeImpl(F); }
+
+PreservedAnalyses BPFIRPeepholePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return BPFIRPeepholeImpl(F) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index c543dfcfca95..90723ac04f64 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -822,7 +822,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
} else {
int64_t imm32 = MI.getOperand(2).getImm();
- // sanity check before we build J*_ri instruction.
+ // Check before we build J*_ri instruction.
assert (isInt<32>(imm32));
BuildMI(BB, DL, TII.get(NewCC))
.addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
@@ -859,3 +859,25 @@ MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT VT) const {
return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
}
+
+bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS,
+ Instruction *I) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (!AM.HasBaseReg) // allow "r+i".
+ break;
+ return false; // disallow "r+r" or "r+r+i".
+ default:
+ return false;
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index d5007425a7f8..dcc53019db75 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -130,6 +130,10 @@ private:
return false;
}
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
+
// isTruncateFree - Return true if it's free to truncate a value of
// type Ty1 to type Ty2. e.g. On BPF at alu32 mode, it's free to truncate
// a i64 value in register R1 to i32 by referencing its sub-register W1.
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index 4e24e3d911b8..eb8c48ac49de 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -164,7 +164,7 @@ bool BPFMIPreEmitChecking::processAtomicInsts(void) {
DebugLoc Empty;
const DebugLoc &DL = MI.getDebugLoc();
if (DL != Empty)
- report_fatal_error("line " + std::to_string(DL.getLine()) +
+ report_fatal_error(Twine("line ") + std::to_string(DL.getLine()) +
": Invalid usage of the XADD return value", false);
else
report_fatal_error("Invalid usage of the XADD return value", false);
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index ae1f5ea21c12..7e829ea43e89 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -97,15 +97,13 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
// Go through all uses of %1 as in %1 = ADD_rr %2, %3
const MachineOperand Op0 = Inst->getOperand(0);
- auto Begin = MRI->use_begin(Op0.getReg()), End = MRI->use_end();
- decltype(End) NextI;
- for (auto I = Begin; I != End; I = NextI) {
- NextI = std::next(I);
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_operands(Op0.getReg()))) {
// The candidate needs to have a unique definition.
- if (!MRI->getUniqueVRegDef(I->getReg()))
+ if (!MRI->getUniqueVRegDef(MO.getReg()))
continue;
- MachineInstr *DefInst = I->getParent();
+ MachineInstr *DefInst = MO.getParent();
unsigned Opcode = DefInst->getOpcode();
unsigned COREOp;
if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
@@ -131,7 +129,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
Opcode == BPF::STD || Opcode == BPF::STB32 || Opcode == BPF::STH32 ||
Opcode == BPF::STW32) {
const MachineOperand &Opnd = DefInst->getOperand(0);
- if (Opnd.isReg() && Opnd.getReg() == I->getReg())
+ if (Opnd.isReg() && Opnd.getReg() == MO.getReg())
continue;
}
diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.td b/llvm/lib/Target/BPF/BPFRegisterInfo.td
index 88dec063be70..abeef5dc8aad 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.td
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.td
@@ -36,7 +36,7 @@ foreach I = 0-11 in {
}
// Register classes.
-def GPR32 : RegisterClass<"BPF", [i32], 32, (add
+def GPR32 : RegisterClass<"BPF", [i32], 64, (add
(sequence "W%u", 1, 9),
W0, // Return value
W11, // Stack Ptr
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index fac02e6476b7..77e3cd393f87 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -12,8 +12,8 @@
#include "BPFSubtarget.h"
#include "BPF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 5b0431293dc2..2fb76ab5c440 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -20,9 +20,9 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
@@ -43,6 +43,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeBPFAbstractMemberAccessLegacyPassPass(PR);
initializeBPFPreserveDITypePass(PR);
+ initializeBPFIRPeepholePass(PR);
initializeBPFAdjustOptPass(PR);
initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
@@ -107,6 +108,7 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createBPFAbstractMemberAccess(this));
PM.add(createBPFPreserveDIType());
+ PM.add(createBPFIRPeephole());
});
Builder.addExtension(
@@ -124,18 +126,19 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
- [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel) {
+ [=](ModulePassManager &MPM, OptimizationLevel) {
FunctionPassManager FPM;
FPM.addPass(BPFAbstractMemberAccessPass(this));
FPM.addPass(BPFPreserveDITypePass());
+ FPM.addPass(BPFIRPeepholePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,
- PassBuilder::OptimizationLevel Level) {
+ OptimizationLevel Level) {
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel) {
+ [=](ModulePassManager &MPM, OptimizationLevel) {
MPM.addPass(BPFAdjustOptPass());
});
}
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
index 3bc5556a62f4..6b86bf6e6cc1 100644
--- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -54,6 +54,23 @@ public:
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
}
+
+ InstructionCost getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput)
+ return SCEVCheapExpansionBudget.getValue() + 1;
+
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo,
+ Opd2PropInfo);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def
index 66cf2c90ead4..0ae4194bc512 100644
--- a/llvm/lib/Target/BPF/BTF.def
+++ b/llvm/lib/Target/BPF/BTF.def
@@ -31,5 +31,7 @@ HANDLE_BTF_KIND(13, FUNC_PROTO)
HANDLE_BTF_KIND(14, VAR)
HANDLE_BTF_KIND(15, DATASEC)
HANDLE_BTF_KIND(16, FLOAT)
+HANDLE_BTF_KIND(17, DECL_TAG)
+HANDLE_BTF_KIND(18, TYPE_TAG)
#undef HANDLE_BTF_KIND
diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h
index ad3dcc14c38a..e54b97cd49a9 100644
--- a/llvm/lib/Target/BPF/BTF.h
+++ b/llvm/lib/Target/BPF/BTF.h
@@ -113,7 +113,7 @@ struct CommonType {
/// "Size" tells the size of the type it is describing.
///
/// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- /// FUNC, FUNC_PROTO and VAR.
+ /// FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG.
/// "Type" is a type_id referring to another type.
union {
uint32_t Size;
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index c1f8ea99b959..0c510686a13b 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -43,7 +43,7 @@ void BTFTypeBase::emitType(MCStreamer &OS) {
BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
bool NeedsFixup)
- : DTy(DTy), NeedsFixup(NeedsFixup) {
+ : DTy(DTy), NeedsFixup(NeedsFixup), Name(DTy->getName()) {
switch (Tag) {
case dwarf::DW_TAG_pointer_type:
Kind = BTF::BTF_KIND_PTR;
@@ -66,14 +66,23 @@ BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
BTFType.Info = Kind << 24;
}
+/// Used by DW_TAG_pointer_type only.
+BTFTypeDerived::BTFTypeDerived(unsigned NextTypeId, unsigned Tag,
+ StringRef Name)
+ : DTy(nullptr), NeedsFixup(false), Name(Name) {
+ Kind = BTF::BTF_KIND_PTR;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = NextTypeId;
+}
+
void BTFTypeDerived::completeType(BTFDebug &BDebug) {
if (IsCompleted)
return;
IsCompleted = true;
- BTFType.NameOff = BDebug.addString(DTy->getName());
+ BTFType.NameOff = BDebug.addString(Name);
- if (NeedsFixup)
+ if (NeedsFixup || !DTy)
return;
// The base type for PTR/CONST/VOLATILE could be void.
@@ -386,6 +395,55 @@ void BTFTypeFloat::completeType(BTFDebug &BDebug) {
BTFType.NameOff = BDebug.addString(Name);
}
+BTFTypeDeclTag::BTFTypeDeclTag(uint32_t BaseTypeId, int ComponentIdx,
+ StringRef Tag)
+ : Tag(Tag) {
+ Kind = BTF::BTF_KIND_DECL_TAG;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = BaseTypeId;
+ Info = ComponentIdx;
+}
+
+void BTFTypeDeclTag::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
+ BTFType.NameOff = BDebug.addString(Tag);
+}
+
+void BTFTypeDeclTag::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.emitInt32(Info);
+}
+
+BTFTypeTypeTag::BTFTypeTypeTag(uint32_t NextTypeId, StringRef Tag)
+ : DTy(nullptr), Tag(Tag) {
+ Kind = BTF::BTF_KIND_TYPE_TAG;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = NextTypeId;
+}
+
+BTFTypeTypeTag::BTFTypeTypeTag(const DIDerivedType *DTy, StringRef Tag)
+ : DTy(DTy), Tag(Tag) {
+ Kind = BTF::BTF_KIND_TYPE_TAG;
+ BTFType.Info = Kind << 24;
+}
+
+void BTFTypeTypeTag::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+ BTFType.NameOff = BDebug.addString(Tag);
+ if (DTy) {
+ const DIType *ResolvedType = DTy->getBaseType();
+ if (!ResolvedType)
+ BTFType.Type = 0;
+ else
+ BTFType.Type = BDebug.getTypeId(ResolvedType);
+ }
+}
+
uint32_t BTFStringTable::addString(StringRef S) {
// Check whether the string already exists.
for (auto &OffsetM : OffsetToIdMap) {
@@ -475,6 +533,25 @@ void BTFDebug::visitSubroutineType(
}
}
+void BTFDebug::processDeclAnnotations(DINodeArray Annotations,
+ uint32_t BaseTypeId,
+ int ComponentIdx) {
+ if (!Annotations)
+ return;
+
+ for (const Metadata *Annotation : Annotations->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotation);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+ if (!Name->getString().equals("btf_decl_tag"))
+ continue;
+
+ const MDString *Value = cast<MDString>(MD->getOperand(1));
+ auto TypeEntry = std::make_unique<BTFTypeDeclTag>(BaseTypeId, ComponentIdx,
+ Value->getString());
+ addType(std::move(TypeEntry));
+ }
+}
+
/// Handle structure/union types.
void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
uint32_t &TypeId) {
@@ -498,9 +575,17 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
StructTypes.push_back(TypeEntry.get());
TypeId = addType(std::move(TypeEntry), CTy);
+ // Check struct/union annotations
+ processDeclAnnotations(CTy->getAnnotations(), TypeId, -1);
+
// Visit all struct members.
- for (const auto *Element : Elements)
- visitTypeEntry(cast<DIDerivedType>(Element));
+ int FieldNo = 0;
+ for (const auto *Element : Elements) {
+ const auto Elem = cast<DIDerivedType>(Element);
+ visitTypeEntry(Elem);
+ processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo);
+ FieldNo++;
+ }
}
void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
@@ -609,11 +694,49 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
}
}
- if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
- Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
- Tag == dwarf::DW_TAG_restrict_type) {
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ SmallVector<const MDString *, 4> MDStrs;
+ DINodeArray Annots = DTy->getAnnotations();
+ if (Annots) {
+ // For type with "int __tag1 __tag2 *p", the MDStrs will have
+ // content: [__tag1, __tag2].
+ for (const Metadata *Annotations : Annots->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotations);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+ if (!Name->getString().equals("btf_type_tag"))
+ continue;
+ MDStrs.push_back(cast<MDString>(MD->getOperand(1)));
+ }
+ }
+
+ if (MDStrs.size() > 0) {
+ // With MDStrs [__tag1, __tag2], the output type chain looks like
+ // PTR -> __tag2 -> __tag1 -> BaseType
+ // In the below, we construct BTF types with the order of __tag1, __tag2
+ // and PTR.
+ auto TypeEntry =
+ std::make_unique<BTFTypeTypeTag>(DTy, MDStrs[0]->getString());
+ unsigned TmpTypeId = addType(std::move(TypeEntry));
+ for (unsigned I = 1; I < MDStrs.size(); I++) {
+ const MDString *Value = MDStrs[I];
+ TypeEntry =
+ std::make_unique<BTFTypeTypeTag>(TmpTypeId, Value->getString());
+ TmpTypeId = addType(std::move(TypeEntry));
+ }
+ auto TypeDEntry =
+ std::make_unique<BTFTypeDerived>(TmpTypeId, Tag, DTy->getName());
+ TypeId = addType(std::move(TypeDEntry), DTy);
+ } else {
+ auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, false);
+ TypeId = addType(std::move(TypeEntry), DTy);
+ }
+ } else if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
+ Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, false);
TypeId = addType(std::move(TypeEntry), DTy);
+ if (Tag == dwarf::DW_TAG_typedef)
+ processDeclAnnotations(DTy->getAnnotations(), TypeId, -1);
} else if (Tag != dwarf::DW_TAG_member) {
return;
}
@@ -783,7 +906,9 @@ void BTFDebug::emitBTFSection() {
return;
MCContext &Ctx = OS.getContext();
- OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
+ MCSectionELF *Sec = Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0);
+ Sec->setAlignment(Align(4));
+ OS.SwitchSection(Sec);
// Emit header.
emitCommonHeader();
@@ -821,7 +946,9 @@ void BTFDebug::emitBTFExtSection() {
return;
MCContext &Ctx = OS.getContext();
- OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
+ MCSectionELF *Sec = Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0);
+ Sec->setAlignment(Align(4));
+ OS.SwitchSection(Sec);
// Emit header.
emitCommonHeader();
@@ -964,6 +1091,17 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
+ // Process argument annotations.
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ uint32_t Arg = DV->getArg();
+ if (Arg)
+ processDeclAnnotations(DV->getAnnotations(), FuncTypeId, Arg - 1);
+ }
+ }
+
+ processDeclAnnotations(SP->getAnnotations(), FuncTypeId, -1);
+
for (const auto &TypeEntry : TypeEntries)
TypeEntry->completeType(*this);
@@ -1176,11 +1314,13 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
continue;
uint32_t GVTypeId = 0;
+ DIGlobalVariable *DIGlobal = nullptr;
for (auto *GVE : GVs) {
+ DIGlobal = GVE->getVariable();
if (SecName.startswith(".maps"))
- visitMapDefType(GVE->getVariable()->getType(), GVTypeId);
+ visitMapDefType(DIGlobal->getType(), GVTypeId);
else
- visitTypeEntry(GVE->getVariable()->getType(), GVTypeId, false, false);
+ visitTypeEntry(DIGlobal->getType(), GVTypeId, false, false);
break;
}
@@ -1212,6 +1352,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
std::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
uint32_t VarId = addType(std::move(VarEntry));
+ processDeclAnnotations(DIGlobal->getAnnotations(), VarId, -1);
+
// An empty SecName means an extern variable without section attribute.
if (SecName.empty())
continue;
@@ -1306,6 +1448,9 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
auto FuncTypeEntry =
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
uint32_t FuncId = addType(std::move(FuncTypeEntry));
+
+ processDeclAnnotations(SP->getAnnotations(), FuncId, -1);
+
if (F->hasSection()) {
StringRef SecName = F->getSection();
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index 2fdcf8500b7f..7c30675c553c 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -64,9 +64,11 @@ public:
class BTFTypeDerived : public BTFTypeBase {
const DIDerivedType *DTy;
bool NeedsFixup;
+ StringRef Name;
public:
BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
+ BTFTypeDerived(unsigned NextTypeId, unsigned Tag, StringRef Name);
void completeType(BTFDebug &BDebug) override;
void emitType(MCStreamer &OS) override;
void setPointeeType(uint32_t PointeeType);
@@ -204,6 +206,28 @@ public:
void completeType(BTFDebug &BDebug) override;
};
+/// Handle decl tags.
+class BTFTypeDeclTag : public BTFTypeBase {
+ uint32_t Info;
+ StringRef Tag;
+
+public:
+ BTFTypeDeclTag(uint32_t BaseTypeId, int ComponentId, StringRef Tag);
+ uint32_t getSize() override { return BTFTypeBase::getSize() + 4; }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
+};
+
+class BTFTypeTypeTag : public BTFTypeBase {
+ const DIDerivedType *DTy;
+ StringRef Tag;
+
+public:
+ BTFTypeTypeTag(uint32_t NextTypeId, StringRef Tag);
+ BTFTypeTypeTag(const DIDerivedType *DTy, StringRef Tag);
+ void completeType(BTFDebug &BDebug) override;
+};
+
/// String table.
class BTFStringTable {
/// String table size in bytes.
@@ -313,6 +337,10 @@ class BTFDebug : public DebugHandlerBase {
/// Generate types for function prototypes.
void processFuncPrototypes(const Function *);
+ /// Generate types for decl annotations.
+ void processDeclAnnotations(DINodeArray Annotations, uint32_t BaseTypeId,
+ int ComponentId);
+
/// Generate one field relocation record.
void generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
const GlobalVariable *, bool IsAma);
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 3a1492743bf4..3f643d47f934 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cstdint>
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 6687dbe25364..bacd00360f82 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -43,12 +43,14 @@ public:
unsigned getNumFixupKinds() const override { return 1; }
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
} // end anonymous namespace
-bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if ((Count % 8) != 0)
return false;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 8fb7d7e89f09..5a1e251cd29c 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "BPFGenInstrInfo.inc"
diff --git a/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 49eb9ad62c56..d7cdcae916aa 100644
--- a/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/BPFTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index f2a381190fe7..ebc04b40d428 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -1,12 +1,12 @@
-//===-- CSKYAsmParser.cpp - Parse CSKY assembly to MCInst instructions --===//
+//===---- CSKYAsmParser.cpp - Parse CSKY assembly to MCInst instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/CSKYInstPrinter.h"
#include "MCTargetDesc/CSKYMCExpr.h"
#include "MCTargetDesc/CSKYMCTargetDesc.h"
#include "TargetInfo/CSKYTargetInfo.h"
@@ -20,10 +20,14 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "csky-asm-parser"
using namespace llvm;
@@ -32,6 +36,8 @@ struct CSKYOperand;
class CSKYAsmParser : public MCTargetAsmParser {
+ const MCRegisterInfo *MRI;
+
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
int64_t Lower, int64_t Upper, Twine Msg);
@@ -52,6 +58,9 @@ class CSKYAsmParser : public MCTargetAsmParser {
OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) override;
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
+ MCStreamer &Out);
+
// Auto-generated instruction matching functions
#define GET_ASSEMBLER_HEADER
#include "CSKYGenAsmMatcher.inc"
@@ -61,12 +70,18 @@ class CSKYAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseBaseRegImm(OperandVector &Operands);
OperandMatchResultTy parseCSKYSymbol(OperandVector &Operands);
OperandMatchResultTy parseConstpoolSymbol(OperandVector &Operands);
+ OperandMatchResultTy parseDataSymbol(OperandVector &Operands);
+ OperandMatchResultTy parsePSRFlag(OperandVector &Operands);
+ OperandMatchResultTy parseRegSeq(OperandVector &Operands);
+ OperandMatchResultTy parseRegList(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
public:
enum CSKYMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresSameSrcAndDst,
+ Match_InvalidRegOutOfRange,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "CSKYGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
@@ -81,10 +96,14 @@ public:
/// Instances of this class represent a parsed machine instruction.
struct CSKYOperand : public MCParsedAsmOperand {
+
enum KindTy {
Token,
Register,
Immediate,
+ RegisterSeq,
+ CPOP,
+ RegisterList
} Kind;
struct RegOp {
@@ -95,11 +114,34 @@ struct CSKYOperand : public MCParsedAsmOperand {
const MCExpr *Val;
};
+ struct ConstpoolOp {
+ const MCExpr *Val;
+ };
+
+ struct RegSeqOp {
+ unsigned RegNumFrom;
+ unsigned RegNumTo;
+ };
+
+ struct RegListOp {
+ unsigned List1From = 0;
+ unsigned List1To = 0;
+ unsigned List2From = 0;
+ unsigned List2To = 0;
+ unsigned List3From = 0;
+ unsigned List3To = 0;
+ unsigned List4From = 0;
+ unsigned List4To = 0;
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
+ ConstpoolOp CPool;
+ RegSeqOp RegSeq;
+ RegListOp RegList;
};
CSKYOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -113,18 +155,31 @@ public:
case Register:
Reg = o.Reg;
break;
+ case RegisterSeq:
+ RegSeq = o.RegSeq;
+ break;
+ case CPOP:
+ CPool = o.CPool;
+ break;
case Immediate:
Imm = o.Imm;
break;
case Token:
Tok = o.Tok;
break;
+ case RegisterList:
+ RegList = o.RegList;
+ break;
}
}
bool isToken() const override { return Kind == Token; }
bool isReg() const override { return Kind == Register; }
bool isImm() const override { return Kind == Immediate; }
+ bool isRegisterSeq() const { return Kind == RegisterSeq; }
+ bool isRegisterList() const { return Kind == RegisterList; }
+ bool isConstPoolOp() const { return Kind == CPOP; }
+
bool isMem() const override { return false; }
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) {
@@ -163,29 +218,132 @@ public:
return IsConstantImm && isShiftedInt<num, shift>(Imm);
}
+ bool isUImm1() const { return isUImm<1>(); }
bool isUImm2() const { return isUImm<2>(); }
+ bool isUImm3() const { return isUImm<3>(); }
+ bool isUImm4() const { return isUImm<4>(); }
bool isUImm5() const { return isUImm<5>(); }
+ bool isUImm6() const { return isUImm<6>(); }
+ bool isUImm7() const { return isUImm<7>(); }
+ bool isUImm8() const { return isUImm<8>(); }
bool isUImm12() const { return isUImm<12>(); }
bool isUImm16() const { return isUImm<16>(); }
-
+ bool isUImm20() const { return isUImm<20>(); }
+ bool isUImm24() const { return isUImm<24>(); }
+
+ bool isOImm3() const { return isOImm<3>(); }
+ bool isOImm4() const { return isOImm<4>(); }
+ bool isOImm5() const { return isOImm<5>(); }
+ bool isOImm6() const { return isOImm<6>(); }
+ bool isOImm8() const { return isOImm<8>(); }
bool isOImm12() const { return isOImm<12>(); }
bool isOImm16() const { return isOImm<16>(); }
+ bool isSImm8() const { return isSImm<8>(); }
+
+ bool isUImm5Shift1() { return isUImm<5, 1>(); }
+ bool isUImm5Shift2() { return isUImm<5, 2>(); }
+ bool isUImm7Shift1() { return isUImm<7, 1>(); }
+ bool isUImm7Shift2() { return isUImm<7, 2>(); }
+ bool isUImm7Shift3() { return isUImm<7, 3>(); }
+ bool isUImm8Shift2() { return isUImm<8, 2>(); }
+ bool isUImm8Shift3() { return isUImm<8, 3>(); }
+ bool isUImm8Shift8() { return isUImm<8, 8>(); }
+ bool isUImm8Shift16() { return isUImm<8, 16>(); }
+ bool isUImm8Shift24() { return isUImm<8, 24>(); }
bool isUImm12Shift1() { return isUImm<12, 1>(); }
bool isUImm12Shift2() { return isUImm<12, 2>(); }
+ bool isUImm16Shift8() { return isUImm<16, 8>(); }
+ bool isUImm16Shift16() { return isUImm<16, 16>(); }
+ bool isUImm24Shift8() { return isUImm<24, 8>(); }
bool isSImm16Shift1() { return isSImm<16, 1>(); }
- bool isCSKYSymbol() const {
+ bool isCSKYSymbol() const { return isImm(); }
+
+ bool isConstpool() const { return isConstPoolOp(); }
+ bool isDataSymbol() const { return isConstPoolOp(); }
+
+ bool isSPOperand() const {
+ if (!isReg())
+ return false;
+ return getReg() == CSKY::R14;
+ }
+
+ bool isPSRFlag() const {
int64_t Imm;
- // Must be of 'immediate' type but not a constant.
- return isImm() && !evaluateConstantImm(getImm(), Imm);
+ // Must be of 'immediate' type and a constant.
+ if (!isImm() || !evaluateConstantImm(getImm(), Imm))
+ return false;
+
+ return isUInt<5>(Imm);
+ }
+
+ template <unsigned MIN, unsigned MAX> bool isRegSeqTemplate() const {
+ if (!isRegisterSeq())
+ return false;
+
+ std::pair<unsigned, unsigned> regSeq = getRegSeq();
+
+ return MIN <= regSeq.first && regSeq.first <= regSeq.second &&
+ regSeq.second <= MAX;
+ }
+
+ bool isRegSeq() const { return isRegSeqTemplate<CSKY::R0, CSKY::R31>(); }
+
+ static bool isLegalRegList(unsigned from, unsigned to) {
+ if (from == 0 && to == 0)
+ return true;
+
+ if (from == to) {
+ if (from != CSKY::R4 && from != CSKY::R15 && from != CSKY::R16 &&
+ from != CSKY::R28)
+ return false;
+
+ return true;
+ } else {
+ if (from != CSKY::R4 && from != CSKY::R16)
+ return false;
+
+ if (from == CSKY::R4 && to > CSKY::R4 && to < CSKY::R12)
+ return true;
+ else if (from == CSKY::R16 && to > CSKY::R16 && to < CSKY::R18)
+ return true;
+ else
+ return false;
+ }
+ }
+
+ bool isRegList() const {
+ if (!isRegisterList())
+ return false;
+
+ auto regList = getRegList();
+
+ if (!isLegalRegList(regList.List1From, regList.List1To))
+ return false;
+ if (!isLegalRegList(regList.List2From, regList.List2To))
+ return false;
+ if (!isLegalRegList(regList.List3From, regList.List3To))
+ return false;
+ if (!isLegalRegList(regList.List4From, regList.List4To))
+ return false;
+
+ return true;
}
- bool isConstpoolSymbol() const {
+ bool isExtImm6() {
+ if (!isImm())
+ return false;
+
int64_t Imm;
- // Must be of 'immediate' type but not a constant.
- return isImm() && !evaluateConstantImm(getImm(), Imm);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm);
+ if (!IsConstantImm)
+ return false;
+
+ int uimm4 = Imm & 0xf;
+
+ return isShiftedUInt<6, 0>(Imm) && uimm4 >= 0 && uimm4 <= 14;
}
/// Gets location of the first token of this operand.
@@ -198,23 +356,64 @@ public:
return Reg.RegNum;
}
+ std::pair<unsigned, unsigned> getRegSeq() const {
+ assert(Kind == RegisterSeq && "Invalid type access!");
+ return std::pair<unsigned, unsigned>(RegSeq.RegNumFrom, RegSeq.RegNumTo);
+ }
+
+ RegListOp getRegList() const {
+ assert(Kind == RegisterList && "Invalid type access!");
+ return RegList;
+ }
+
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid type access!");
return Imm.Val;
}
+ const MCExpr *getConstpoolOp() const {
+ assert(Kind == CPOP && "Invalid type access!");
+ return CPool.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid type access!");
return Tok;
}
void print(raw_ostream &OS) const override {
+ auto RegName = [](unsigned Reg) {
+ if (Reg)
+ return CSKYInstPrinter::getRegisterName(Reg);
+ else
+ return "noreg";
+ };
+
switch (Kind) {
+ case CPOP:
+ OS << *getConstpoolOp();
+ break;
case Immediate:
OS << *getImm();
break;
- case Register:
- OS << "<register x" << getReg() << ">";
+ case KindTy::Register:
+ OS << "<register " << RegName(getReg()) << ">";
+ break;
+ case RegisterSeq:
+ OS << "<register-seq ";
+ OS << RegName(getRegSeq().first) << "-" << RegName(getRegSeq().second)
+ << ">";
+ break;
+ case RegisterList:
+ OS << "<register-list ";
+ OS << RegName(getRegList().List1From) << "-"
+ << RegName(getRegList().List1To) << ",";
+ OS << RegName(getRegList().List2From) << "-"
+ << RegName(getRegList().List2To) << ",";
+ OS << RegName(getRegList().List3From) << "-"
+ << RegName(getRegList().List3To) << ",";
+ OS << RegName(getRegList().List4From) << "-"
+ << RegName(getRegList().List4To);
break;
case Token:
OS << "'" << getToken() << "'";
@@ -239,6 +438,51 @@ public:
return Op;
}
+ static std::unique_ptr<CSKYOperand> createRegSeq(unsigned RegNoFrom,
+ unsigned RegNoTo, SMLoc S) {
+ auto Op = std::make_unique<CSKYOperand>(RegisterSeq);
+ Op->RegSeq.RegNumFrom = RegNoFrom;
+ Op->RegSeq.RegNumTo = RegNoTo;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<CSKYOperand>
+ createRegList(SmallVector<unsigned, 4> reglist, SMLoc S) {
+ auto Op = std::make_unique<CSKYOperand>(RegisterList);
+ Op->RegList.List1From = 0;
+ Op->RegList.List1To = 0;
+ Op->RegList.List2From = 0;
+ Op->RegList.List2To = 0;
+ Op->RegList.List3From = 0;
+ Op->RegList.List3To = 0;
+ Op->RegList.List4From = 0;
+ Op->RegList.List4To = 0;
+
+ for (unsigned i = 0; i < reglist.size(); i += 2) {
+ if (Op->RegList.List1From == 0) {
+ Op->RegList.List1From = reglist[i];
+ Op->RegList.List1To = reglist[i + 1];
+ } else if (Op->RegList.List2From == 0) {
+ Op->RegList.List2From = reglist[i];
+ Op->RegList.List2To = reglist[i + 1];
+ } else if (Op->RegList.List3From == 0) {
+ Op->RegList.List3From = reglist[i];
+ Op->RegList.List3To = reglist[i + 1];
+ } else if (Op->RegList.List4From == 0) {
+ Op->RegList.List4From = reglist[i];
+ Op->RegList.List4To = reglist[i + 1];
+ } else {
+ assert(0);
+ }
+ }
+
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<CSKYOperand> createImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
auto Op = std::make_unique<CSKYOperand>(Immediate);
@@ -248,6 +492,15 @@ public:
return Op;
}
+ static std::unique_ptr<CSKYOperand> createConstpoolOp(const MCExpr *Val,
+ SMLoc S, SMLoc E) {
+ auto Op = std::make_unique<CSKYOperand>(CPOP);
+ Op->CPool.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -266,6 +519,70 @@ public:
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+
+ void addConstpoolOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createExpr(getConstpoolOp()));
+ }
+
+ void addRegSeqOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ auto regSeq = getRegSeq();
+
+ Inst.addOperand(MCOperand::createReg(regSeq.first));
+ Inst.addOperand(MCOperand::createReg(regSeq.second));
+ }
+
+ static unsigned getListValue(unsigned ListFrom, unsigned ListTo) {
+ if (ListFrom == ListTo && ListFrom == CSKY::R15)
+ return (1 << 4);
+ else if (ListFrom == ListTo && ListFrom == CSKY::R28)
+ return (1 << 8);
+ else if (ListFrom == CSKY::R4)
+ return ListTo - ListFrom + 1;
+ else if (ListFrom == CSKY::R16)
+ return ((ListTo - ListFrom + 1) << 5);
+ else
+ return 0;
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ auto regList = getRegList();
+
+ unsigned V = 0;
+
+ unsigned T = getListValue(regList.List1From, regList.List1To);
+ if (T != 0)
+ V = V | T;
+
+ T = getListValue(regList.List2From, regList.List2To);
+ if (T != 0)
+ V = V | T;
+
+ T = getListValue(regList.List3From, regList.List3To);
+ if (T != 0)
+ V = V | T;
+
+ T = getListValue(regList.List4From, regList.List4To);
+ if (T != 0)
+ V = V | T;
+
+ Inst.addOperand(MCOperand::createImm(V));
+ }
+
+ bool isValidForTie(const CSKYOperand &Other) const {
+ if (Kind != Other.Kind)
+ return false;
+
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unexpected kind");
+ return false;
+ case Register:
+ return Reg.RegNum == Other.Reg.RegNum;
+ }
+ }
};
} // end anonymous namespace.
@@ -299,9 +616,7 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
default:
break;
case Match_Success:
- Inst.setLoc(IDLoc);
- Out.emitInstruction(Inst, getSTI());
- return false;
+ return processInstruction(Inst, IDLoc, Operands, Out);
case Match_MissingFeature: {
assert(MissingFeatures.any() && "Unknown missing features!");
ListSeparator LS;
@@ -347,26 +662,79 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default:
break;
+ case Match_InvalidSImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 7),
+ (1 << 7) - 1);
+ case Match_InvalidOImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 3));
+ case Match_InvalidOImm4:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 4));
+ case Match_InvalidOImm5:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 5));
+ case Match_InvalidOImm6:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6));
+ case Match_InvalidOImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 8));
case Match_InvalidOImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 12));
case Match_InvalidOImm16:
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 16));
+ case Match_InvalidUImm1:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 1) - 1);
case Match_InvalidUImm2:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1);
+ case Match_InvalidUImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1);
+ case Match_InvalidUImm4:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ case Match_InvalidUImm6:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
+ case Match_InvalidUImm7:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 7) - 1);
+ case Match_InvalidUImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 8) - 1);
case Match_InvalidUImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1);
+ case Match_InvalidUImm16:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 16) - 1);
+ case Match_InvalidUImm5Shift1:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 5) - 2,
+ "immediate must be a multiple of 2 bytes in the range");
case Match_InvalidUImm12Shift1:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 12) - 2,
"immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidUImm5Shift2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 5) - 4,
+ "immediate must be a multiple of 4 bytes in the range");
+ case Match_InvalidUImm7Shift1:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 7) - 2,
+ "immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidUImm7Shift2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 7) - 4,
+ "immediate must be a multiple of 4 bytes in the range");
+ case Match_InvalidUImm8Shift2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 8) - 4,
+ "immediate must be a multiple of 4 bytes in the range");
+ case Match_InvalidUImm8Shift3:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 8) - 8,
+ "immediate must be a multiple of 8 bytes in the range");
+ case Match_InvalidUImm8Shift8:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 8) - 256,
+ "immediate must be a multiple of 256 bytes in the range");
case Match_InvalidUImm12Shift2:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 12) - 4,
"immediate must be a multiple of 4 bytes in the range");
- case Match_InvalidUImm16:
- return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 16) - 1);
case Match_InvalidCSKYSymbol: {
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a symbol name");
@@ -375,15 +743,68 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a constpool symbol name");
}
+ case Match_InvalidPSRFlag: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "psrset operand is not valid");
+ }
+ case Match_InvalidRegSeq: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "Register sequence is not valid");
}
-
+ case Match_InvalidRegOutOfRange: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "register is out of range");
+ }
+ case Match_InvalidSPOperand: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be sp register");
+ }
+ case Match_RequiresSameSrcAndDst: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "src and dst operand must be same");
+ }
+ case Match_InvalidRegList: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "invalid register list");
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Result = " << Result);
llvm_unreachable("Unknown match type detected!");
}
+bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ OperandVector &Operands,
+ MCStreamer &Out) {
+
+ if (Inst.getOpcode() == CSKY::LDQ32 || Inst.getOpcode() == CSKY::STQ32) {
+ if (Inst.getOperand(1).getReg() != CSKY::R4 ||
+ Inst.getOperand(2).getReg() != CSKY::R7) {
+ return Error(IDLoc, "Register sequence is not valid. 'r4-r7' expected");
+ }
+ Inst.setOpcode(Inst.getOpcode() == CSKY::LDQ32 ? CSKY::LDM32 : CSKY::STM32);
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+ } else if (Inst.getOpcode() == CSKY::SEXT32 ||
+ Inst.getOpcode() == CSKY::ZEXT32) {
+ if (Inst.getOperand(2).getImm() < Inst.getOperand(3).getImm())
+ return Error(IDLoc, "msb must be greater or equal to lsb");
+ } else if (Inst.getOpcode() == CSKY::INS32) {
+ if (Inst.getOperand(3).getImm() < Inst.getOperand(4).getImm())
+ return Error(IDLoc, "msb must be greater or equal to lsb");
+ } else if (Inst.getOpcode() == CSKY::IDLY32) {
+ if (Inst.getOperand(0).getImm() > 32 || Inst.getOperand(0).getImm() < 0)
+ return Error(IDLoc, "n must be in range [0,32]");
+ }
+
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+}
+
// Attempts to match Name as a register (either using the default name or
// alternative ABI names), setting RegNo to the matching register. Upon
// failure, returns true and sets RegNo to 0.
-static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) {
+static bool matchRegisterNameHelper(const MCSubtargetInfo &STI,
+ MCRegister &RegNo, StringRef Name) {
RegNo = MatchRegisterName(Name);
if (RegNo == CSKY::NoRegister)
@@ -399,12 +820,12 @@ bool CSKYAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
EndLoc = Tok.getEndLoc();
StringRef Name = getLexer().getTok().getIdentifier();
- if (!matchRegisterNameHelper((MCRegister &)RegNo, Name)) {
+ if (!matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name)) {
getParser().Lex(); // Eat identifier token.
return false;
}
- return Error(StartLoc, "invalid register name");
+ return MatchOperand_NoMatch;
}
OperandMatchResultTy CSKYAsmParser::parseRegister(OperandVector &Operands) {
@@ -418,7 +839,7 @@ OperandMatchResultTy CSKYAsmParser::parseRegister(OperandVector &Operands) {
StringRef Name = getLexer().getTok().getIdentifier();
MCRegister RegNo;
- if (matchRegisterNameHelper((MCRegister &)RegNo, Name))
+ if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
return MatchOperand_NoMatch;
getLexer().Lex();
@@ -439,7 +860,13 @@ OperandMatchResultTy CSKYAsmParser::parseBaseRegImm(OperandVector &Operands) {
if (parseRegister(Operands) != MatchOperand_Success) {
getLexer().UnLex(Tok);
Operands.pop_back();
- return MatchOperand_ParseFail;
+ return MatchOperand_NoMatch;
+ }
+
+ if (getLexer().is(AsmToken::RParen)) {
+ Operands.push_back(CSKYOperand::createToken(")", getLoc()));
+ getParser().Lex(); // Eat ')'
+ return MatchOperand_Success;
}
if (getLexer().isNot(AsmToken::Comma)) {
@@ -495,8 +922,10 @@ OperandMatchResultTy CSKYAsmParser::parseImmediate(OperandVector &Operands) {
const MCExpr *IdVal;
SMLoc S = getLoc();
- if (getParser().parseExpression(IdVal))
+ if (getParser().parseExpression(IdVal)) {
+ Error(getLoc(), "unknown expression");
return MatchOperand_ParseFail;
+ }
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
Operands.push_back(CSKYOperand::createImm(IdVal, S, E));
@@ -517,17 +946,26 @@ bool CSKYAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
// Attempt to parse token as register
- if (parseRegister(Operands) == MatchOperand_Success)
+ auto Res = parseRegister(Operands);
+ if (Res == MatchOperand_Success)
return false;
+ else if (Res == MatchOperand_ParseFail)
+ return true;
// Attempt to parse token as (register, imm)
- if (getLexer().is(AsmToken::LParen))
- if (parseBaseRegImm(Operands) == MatchOperand_Success)
+ if (getLexer().is(AsmToken::LParen)) {
+ Res = parseBaseRegImm(Operands);
+ if (Res == MatchOperand_Success)
return false;
+ else if (Res == MatchOperand_ParseFail)
+ return true;
+ }
- // Attempt to parse token as a imm.
- if (parseImmediate(Operands) == MatchOperand_Success)
+ Res = parseImmediate(Operands);
+ if (Res == MatchOperand_Success)
return false;
+ else if (Res == MatchOperand_ParseFail)
+ return true;
// Finally we have exhausted all options and must declare defeat.
Error(getLoc(), "unknown operand");
@@ -537,16 +975,20 @@ bool CSKYAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier))
+ AsmToken Tok = getLexer().getTok();
+
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier");
return MatchOperand_ParseFail;
+ }
CSKYMCExpr::VariantKind Kind = CSKYMCExpr::VK_CSKY_None;
-
if (Identifier.consume_back("@GOT"))
Kind = CSKYMCExpr::VK_CSKY_GOT;
else if (Identifier.consume_back("@GOTOFF"))
@@ -555,44 +997,377 @@ OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
Kind = CSKYMCExpr::VK_CSKY_PLT;
else if (Identifier.consume_back("@GOTPC"))
Kind = CSKYMCExpr::VK_CSKY_GOTPC;
+ else if (Identifier.consume_back("@TLSGD32"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSGD;
+ else if (Identifier.consume_back("@GOTTPOFF"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSIE;
+ else if (Identifier.consume_back("@TPOFF"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSLE;
+ else if (Identifier.consume_back("@TLSLDM32"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSLDM;
+ else if (Identifier.consume_back("@TLSLDO32"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSLDO;
+
+ MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
+
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ }
+ Res = V;
+ } else
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- const MCExpr *Res =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ if (Kind != CSKYMCExpr::VK_CSKY_None)
+ Res = CSKYMCExpr::create(Res, Kind, getContext());
- if (Kind != CSKYMCExpr::VK_CSKY_None)
- Res = CSKYMCExpr::create(Res, Kind, getContext());
+ Operands.push_back(CSKYOperand::createImm(Res, S, E));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+ getLexer().Lex(); // eat + or -
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(CSKYOperand::createImm(Res, S, E));
return MatchOperand_Success;
}
+OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ if (getLexer().getKind() != AsmToken::LBrac)
+ return MatchOperand_NoMatch;
+
+ getLexer().Lex(); // Eat '['.
+
+ if (getLexer().getKind() != AsmToken::Identifier) {
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ]");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // Eat ']'.
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Expr, S, E));
+ return MatchOperand_Success;
+ }
+
+ AsmToken Tok = getLexer().getTok();
+ StringRef Identifier;
+
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier " + Identifier);
+ return MatchOperand_ParseFail;
+ }
+
+ CSKYMCExpr::VariantKind Kind = CSKYMCExpr::VK_CSKY_None;
+ if (Identifier.consume_back("@GOT"))
+ Kind = CSKYMCExpr::VK_CSKY_GOT_IMM18_BY4;
+ else if (Identifier.consume_back("@PLT"))
+ Kind = CSKYMCExpr::VK_CSKY_PLT_IMM18_BY4;
+
+ MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
+
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ }
+ Res = V;
+ } else {
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ }
+
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ case AsmToken::RBrac:
+
+ getLexer().Lex(); // Eat ']'.
+
+ if (Kind != CSKYMCExpr::VK_CSKY_None)
+ Res = CSKYMCExpr::create(Res, Kind, getContext());
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+
+ getLexer().Lex(); // eat + or -
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ']'");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // Eat ']'.
+
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
if (getLexer().getKind() != AsmToken::LBrac)
return MatchOperand_NoMatch;
getLexer().Lex(); // Eat '['.
- if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ if (getLexer().getKind() != AsmToken::Identifier) {
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ']'");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // Eat ']'.
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Expr, S, E));
+ return MatchOperand_Success;
+ }
+ AsmToken Tok = getLexer().getTok();
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier))
+
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier");
return MatchOperand_ParseFail;
+ }
- if (getLexer().getKind() != AsmToken::RBrac)
- return MatchOperand_NoMatch;
+ MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
+
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ }
+ Res = V;
+ } else {
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ }
+
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ case AsmToken::RBrac:
+
+ getLexer().Lex(); // Eat ']'.
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+
+ getLexer().Lex(); // eat + or -
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ']'");
+ return MatchOperand_ParseFail;
+ }
getLexer().Lex(); // Eat ']'.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- const MCExpr *Res =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- Operands.push_back(CSKYOperand::createImm(Res, S, E));
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy CSKYAsmParser::parsePSRFlag(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+
+ unsigned Flag = 0;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement)) {
+ StringRef Identifier;
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier " + Identifier);
+ return MatchOperand_ParseFail;
+ }
+
+ if (Identifier == "sie")
+ Flag = (1 << 4) | Flag;
+ else if (Identifier == "ee")
+ Flag = (1 << 3) | Flag;
+ else if (Identifier == "ie")
+ Flag = (1 << 2) | Flag;
+ else if (Identifier == "fe")
+ Flag = (1 << 1) | Flag;
+ else if (Identifier == "af")
+ Flag = (1 << 0) | Flag;
+ else {
+ Error(getLoc(), "expected " + Identifier);
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex(); // eat ','
+ } else {
+ Error(getLoc(), "expected ,");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ Operands.push_back(
+ CSKYOperand::createImm(MCConstantExpr::create(Flag, getContext()), S, E));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy CSKYAsmParser::parseRegSeq(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ if (parseRegister(Operands) != MatchOperand_Success)
+ return MatchOperand_NoMatch;
+
+ auto Ry = Operands.back()->getReg();
+ Operands.pop_back();
+
+ if (getLexer().isNot(AsmToken::Minus)) {
+ Error(getLoc(), "expected '-'");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // eat '-'
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
+ auto Rz = Operands.back()->getReg();
+ Operands.pop_back();
+
+ Operands.push_back(CSKYOperand::createRegSeq(Ry, Rz, S));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy CSKYAsmParser::parseRegList(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ SmallVector<unsigned, 4> reglist;
+
+ while (true) {
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
+ auto Ry = Operands.back()->getReg();
+ Operands.pop_back();
+
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex(); // eat '-'
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
+ auto Rz = Operands.back()->getReg();
+ Operands.pop_back();
+
+ reglist.push_back(Ry);
+ reglist.push_back(Rz);
+
+ if (getLexer().is(AsmToken::Comma))
+ getLexer().Lex(); // eat ','
+ else if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ } else if (getLexer().is(AsmToken::Comma)) {
+ reglist.push_back(Ry);
+ reglist.push_back(Ry);
+
+ getLexer().Lex(); // eat ','
+ } else if (getLexer().is(AsmToken::EndOfStatement)) {
+ reglist.push_back(Ry);
+ reglist.push_back(Ry);
+ break;
+ } else {
+ Error(getLoc(), "invalid register list");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ Operands.push_back(CSKYOperand::createRegList(reglist, S));
return MatchOperand_Success;
}
@@ -638,7 +1413,7 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(unsigned &RegNo,
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper((MCRegister &)RegNo, Name))
+ if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
return MatchOperand_NoMatch;
getParser().Lex(); // Eat identifier token.
diff --git a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/llvm/lib/Target/CSKY/CSKY.h
index 7ed254b3ee04..357b1e96e606 100644
--- a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
+++ b/llvm/lib/Target/CSKY/CSKY.h
@@ -1,4 +1,4 @@
-//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===//
+//===-- CSKY.h - Top-level interface for CSKY--------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,17 +6,22 @@
//
//===----------------------------------------------------------------------===//
//
-// Tag for suppressing ORCv1 deprecation warnings.
+// This file contains the entry points for global functions defined in the LLVM
+// CSKY back-end.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
-#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#ifndef LLVM_LIB_TARGET_CSKY_CSKY_H
+#define LLVM_LIB_TARGET_CSKY_CSKY_H
+
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class CSKYTargetMachine;
+class FunctionPass;
-enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation };
+FunctionPass *createCSKYISelDag(CSKYTargetMachine &TM);
} // namespace llvm
-#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#endif // LLVM_LIB_TARGET_CSKY_CSKY_H
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index 854a8b5f22a2..e26781ca6aa1 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -9,10 +9,97 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
+// CSKY subtarget features and instruction predicates.
+//===----------------------------------------------------------------------===//
+
+def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true",
+ "Use the 16-bit btsti instruction">;
+def HasBTST16 : Predicate<"Subtarget->hasBTST16()">,
+ AssemblerPredicate<(all_of FeatureBTST16),
+ "Use the 16-bit btsti instruction">;
+
+// Atomic Support
+def FeatureExtendLrw : SubtargetFeature<"elrw", "HasExtendLrw", "true",
+ "Use the extend LRW instruction">;
+def HasExtendLrw : Predicate<"Subtarget->hasExtendLrw()">,
+ AssemblerPredicate<(all_of FeatureExtendLrw),
+ "Use the extend LRW instruction">;
+
+def FeatureJAVA
+ : SubtargetFeature<"java", "HasJAVA", "true", "Enable java instructions">;
+def HasJAVA : Predicate<"Subtarget->hasJAVA()">,
+ AssemblerPredicate<(all_of FeatureJAVA),
+ "Enable java instructions">;
+
+def FeatureDoloop : SubtargetFeature<"doloop", "HasDoloop", "true",
+ "Enable doloop instructions">;
+def HasDoloop : Predicate<"Subtarget->hasDoloop()">,
+ AssemblerPredicate<(all_of FeatureDoloop),
+ "Enable doloop instructions">;
+
+def HasE1
+ : SubtargetFeature<"e1", "HasE1", "true", "Support CSKY e1 instructions",
+ [FeatureExtendLrw]>;
+def iHasE1 : Predicate<"Subtarget->hasE1()">,
+ AssemblerPredicate<(all_of HasE1),
+ "Support CSKY e1 instructions">;
+
+def HasE2
+ : SubtargetFeature<"e2", "HasE2", "true", "Support CSKY e2 instructions",
+ [HasE1]>;
+def iHasE2 : Predicate<"Subtarget->hasE2()">,
+ AssemblerPredicate<(all_of HasE2),
+ "Support CSKY e2 instructions">;
+
+def Has2E3 : SubtargetFeature<"2e3", "Has2E3", "true",
+ "Support CSKY 2e3 instructions", [HasE2]>;
+def iHas2E3 : Predicate<"Subtarget->has2E3()">,
+ AssemblerPredicate<(all_of Has2E3),
+ "Support CSKY 2e3 instructions">;
+
+def Has3E3r1 : SubtargetFeature<"3e3r1", "Has3E3r1", "true",
+ "Support CSKY 3e3r1 instructions">;
+def iHas3E3r1 : Predicate<"Subtarget->has3E3r1()">,
+ AssemblerPredicate<(all_of Has3E3r1),
+ "Support CSKY 3e3r1 instructions">;
+
+def Has3r2E3r3
+ : SubtargetFeature<"3e3r3", "Has3r2E3r3", "true",
+ "Support CSKY 3e3r3 instructions", [FeatureDoloop]>;
+def iHas3r2E3r3 : Predicate<"Subtarget->has3r2E3r3()">,
+ AssemblerPredicate<(all_of Has3r2E3r3),
+ "Support CSKY 3e3r3 instructions">;
+
+def Has3E7 : SubtargetFeature<"3e7", "Has3E7", "true",
+ "Support CSKY 3e7 instructions", [Has2E3]>;
+def iHas3E7 : Predicate<"Subtarget->has3E7()">,
+ AssemblerPredicate<(all_of Has3E7),
+ "Support CSKY 3e7 instructions">;
+
+def HasMP1E2 : SubtargetFeature<"mp1e2", "HasMP1E2", "true",
+ "Support CSKY mp1e2 instructions", [Has3E7]>;
+def iHasMP1E2 : Predicate<"Subtarget->hasMP1E2()">,
+ AssemblerPredicate<(all_of HasMP1E2),
+ "Support CSKY mp1e2 instructions">;
+
+def Has7E10 : SubtargetFeature<"7e10", "Has7E10", "true",
+ "Support CSKY 7e10 instructions", [Has3E7]>;
+def iHas7E10 : Predicate<"Subtarget->has7E10()">,
+ AssemblerPredicate<(all_of Has7E10),
+ "Support CSKY 7e10 instructions">;
+
+def Has10E60 : SubtargetFeature<"10e60", "Has10E60", "true",
+ "Support CSKY 10e60 instructions", [Has7E10]>;
+def iHas10E60 : Predicate<"Subtarget->has10E60()">,
+ AssemblerPredicate<(all_of Has10E60),
+ "Support CSKY 10e60 instructions">;
+
+//===----------------------------------------------------------------------===//
// Registers, calling conventions, instruction descriptions.
//===----------------------------------------------------------------------===//
include "CSKYRegisterInfo.td"
+include "CSKYCallingConv.td"
include "CSKYInstrInfo.td"
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
new file mode 100644
index 000000000000..1c38c5d1fde6
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -0,0 +1,58 @@
+//===-- CSKYAsmPrinter.cpp - CSKY LLVM assembly writer --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the CSKY assembly language.
+//
+//===----------------------------------------------------------------------===//
+#include "CSKYAsmPrinter.h"
+#include "CSKY.h"
+#include "CSKYTargetMachine.h"
+#include "MCTargetDesc/CSKYInstPrinter.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
+#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-asm-printer"
+
+CSKYAsmPrinter::CSKYAsmPrinter(llvm::TargetMachine &TM,
+ std::unique_ptr<llvm::MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this) {}
+
+bool CSKYAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &MF.getSubtarget<CSKYSubtarget>();
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "CSKYGenMCPseudoLowering.inc"
+
+void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(*OutStreamer, MI))
+ return;
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYAsmPrinter() {
+ RegisterAsmPrinter<CSKYAsmPrinter> X(getTheCSKYTarget());
+}
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
new file mode 100644
index 000000000000..f0f5d8657c04
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -0,0 +1,40 @@
+//===-- CSKYAsmPrinter.h - CSKY implementation of AsmPrinter ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYASMPRINTER_H
+#define LLVM_LIB_TARGET_CSKY_CSKYASMPRINTER_H
+
+#include "CSKYMCInstLower.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCDirectives.h"
+
+namespace llvm {
+class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
+ CSKYMCInstLower MCInstLowering;
+
+ const CSKYSubtarget *Subtarget;
+
+public:
+ explicit CSKYAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
+
+ StringRef getPassName() const override { return "CSKY Assembly Printer"; }
+
+ /// tblgen'erated driver function for lowering simple MI->MC
+ /// pseudo instructions.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ void emitInstruction(const MachineInstr *MI) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYASMPRINTER_H
diff --git a/llvm/lib/Target/CSKY/CSKYCallingConv.h b/llvm/lib/Target/CSKY/CSKYCallingConv.h
new file mode 100644
index 000000000000..f1048f86264b
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYCallingConv.h
@@ -0,0 +1,63 @@
+//=== CSKYCallingConv.h - CSKY Custom Calling Convention Routines -*-C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the CSKY Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYCALLINGCONV_H
+#define LLVM_LIB_TARGET_CSKY_CSKYCALLINGCONV_H
+
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+static bool CC_CSKY_ABIV2_SOFT_64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+
+ static const MCPhysReg ArgGPRs[] = {CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3};
+ Register Reg = State.AllocateReg(ArgGPRs);
+ LocVT = MVT::i32;
+ if (!Reg) {
+ unsigned StackOffset = State.AllocateStack(8, Align(4));
+ State.addLoc(
+ CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ return true;
+ }
+ if (!State.AllocateReg(ArgGPRs))
+ State.AllocateStack(4, Align(4));
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+}
+
+static bool Ret_CSKY_ABIV2_SOFT_64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+
+ static const MCPhysReg ArgGPRs[] = {CSKY::R0, CSKY::R1};
+ Register Reg = State.AllocateReg(ArgGPRs);
+ LocVT = MVT::i32;
+ if (!Reg)
+ return false;
+
+ if (!State.AllocateReg(ArgGPRs))
+ return false;
+
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/CSKY/CSKYCallingConv.td b/llvm/lib/Target/CSKY/CSKYCallingConv.td
new file mode 100644
index 000000000000..87e2e6b9dc31
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYCallingConv.td
@@ -0,0 +1,82 @@
+//===-- CSKYCallingConv.td - Calling Conventions CSKY ----*- tablegen -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the CSKY architecture.
+//
+//===----------------------------------------------------------------------===//
+
+def CSR_I32 : CalleeSavedRegs<(add R8, R15, (sequence "R%u", 4, 7),
+ (sequence "R%u", 9, 11), (sequence "R%u", 16, 17), R28)>;
+def CSR_GPR_FPR32 : CalleeSavedRegs<(add CSR_I32, (sequence "F%u_32", 8, 15))>;
+def CSR_GPR_FPR64 : CalleeSavedRegs<(add CSR_I32,
+ (sequence "F%u_64", 8, 15))>;
+
+// Interrupt handler needs to save/restore all registers that are used,
+// both Caller and Callee saved registers.
+def CSR_GPR_ISR : CalleeSavedRegs<(add R8, R15,
+ (sequence "R%u", 0, 3),
+ (sequence "R%u", 4, 7),
+ (sequence "R%u", 9, 13),
+ (sequence "R%u", 16, 31))>;
+
+def CSR_GPR_FPR32_ISR: CalleeSavedRegs<(add CSR_GPR_ISR,
+ (sequence "F%u_32", 0, 15))>;
+def CSR_GPR_FPR64_ISR: CalleeSavedRegs<(add CSR_GPR_ISR,
+ (sequence "F%u_64", 0, 15))>;
+
+def CSR_GPR_FPR32v3_ISR: CalleeSavedRegs<(add CSR_GPR_FPR32_ISR,
+ (sequence "F%u_32", 16, 31))>;
+def CSR_GPR_FPR64v3_ISR: CalleeSavedRegs<(add CSR_GPR_FPR64_ISR,
+ (sequence "F%u_64", 16, 31))>;
+
+// Needed for implementation of CSKYRegisterInfo::getNoPreservedMask()
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CC_CSKY_ABIV2_SOFT : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[v2i16, v4i8], CCAssignToStack<4, 4>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCCustom<"CC_CSKY_ABIV2_SOFT_64">>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_CSKY_ABIV2_SOFT : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[f64], CCCustom<"Ret_CSKY_ABIV2_SOFT_64">>
+]>;
+
+def CC_CSKY_ABIV2_FP : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[v2i16, v4i8], CCAssignToStack<4, 4>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f32], CCAssignToReg<[F0_32, F1_32, F2_32, F3_32]>>,
+ CCIfType<[f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64]>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_CSKY_ABIV2_FP : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[f32], CCAssignToReg<[F0_32]>>,
+ CCIfType<[f64], CCAssignToReg<[F0_64]>>
+]>; \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
new file mode 100644
index 000000000000..9b22c95cfe21
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
@@ -0,0 +1,57 @@
+//===-- CSKYFrameLowering.cpp - CSKY Frame Information ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYFrameLowering.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/MC/MCDwarf.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-frame-lowering"
+
+// Returns the register used to hold the frame pointer.
+static Register getFPReg(const CSKYSubtarget &STI) { return CSKY::R8; }
+
+// To avoid the BP value clobbered by a function call, we need to choose a
+// callee saved register to save the value.
+static Register getBPReg(const CSKYSubtarget &STI) { return CSKY::R7; }
+
+bool CSKYFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
+ MFI.isFrameAddressTaken();
+}
+
+bool CSKYFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ return MFI.hasVarSizedObjects();
+}
+
+void CSKYFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // FIXME: Implement this when we have function calls
+}
+
+void CSKYFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // FIXME: Implement this when we have function calls
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.h b/llvm/lib/Target/CSKY/CSKYFrameLowering.h
new file mode 100644
index 000000000000..49921a1866bc
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.h
@@ -0,0 +1,38 @@
+//===-- CSKYFrameLowering.h - Define frame lowering for CSKY -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements CSKY-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYFRAMELOWERING_H
+#define LLVM_LIB_TARGET_CSKY_CSKYFRAMELOWERING_H
+
+#include "llvm/CodeGen/TargetFrameLowering.h"
+
+namespace llvm {
+class CSKYSubtarget;
+
+class CSKYFrameLowering : public TargetFrameLowering {
+ const CSKYSubtarget &STI;
+
+public:
+ explicit CSKYFrameLowering(const CSKYSubtarget &STI)
+ : TargetFrameLowering(StackGrowsDown,
+ /*StackAlignment=*/Align(4),
+ /*LocalAreaOffset=*/0),
+ STI(STI) {}
+
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fc9ef8bfd9d9
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -0,0 +1,75 @@
+//===-- CSKYISelDAGToDAG.cpp - A dag to dag inst selector for CSKY---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the CSKY target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
+#include "CSKYTargetMachine.h"
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-isel"
+
+namespace {
+class CSKYDAGToDAGISel : public SelectionDAGISel {
+ const CSKYSubtarget *Subtarget;
+
+public:
+ explicit CSKYDAGToDAGISel(CSKYTargetMachine &TM) : SelectionDAGISel(TM) {}
+
+ StringRef getPassName() const override {
+ return "CSKY DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ Subtarget = &MF.getSubtarget<CSKYSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ void Select(SDNode *N) override;
+
+#include "CSKYGenDAGISel.inc"
+};
+} // namespace
+
+void CSKYDAGToDAGISel::Select(SDNode *N) {
+ // If we have a custom node, we have already selected
+ if (N->isMachineOpcode()) {
+ LLVM_DEBUG(dbgs() << "== "; N->dump(CurDAG); dbgs() << "\n");
+ N->setNodeId(-1);
+ return;
+ }
+
+ SDLoc Dl(N);
+ unsigned Opcode = N->getOpcode();
+ bool IsSelected = false;
+
+ switch (Opcode) {
+ default:
+ break;
+ // FIXME: Add selection nodes needed later.
+ }
+
+ if (IsSelected)
+ return;
+
+ // Select the default instruction.
+ SelectCode(N);
+}
+
+FunctionPass *llvm::createCSKYISelDag(CSKYTargetMachine &TM) {
+ return new CSKYDAGToDAGISel(TM);
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
new file mode 100644
index 000000000000..ac6d069e592c
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -0,0 +1,346 @@
+//===-- CSKYISelLowering.cpp - CSKY DAG Lowering Implementation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that CSKY uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYISelLowering.h"
+#include "CSKYCallingConv.h"
+#include "CSKYMachineFunctionInfo.h"
+#include "CSKYRegisterInfo.h"
+#include "CSKYSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-isel-lowering"
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+#include "CSKYGenCallingConv.inc"
+
+static const MCPhysReg GPRArgRegs[] = {CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3};
+
+CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
+ const CSKYSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
+ // Register Class
+ addRegisterClass(MVT::i32, &CSKY::GPRRegClass);
+
+ // Compute derived properties from the register classes.
+ computeRegisterProperties(STI.getRegisterInfo());
+
+ setBooleanContents(UndefinedBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // TODO: Add atomic support fully.
+ setMaxAtomicSizeInBitsSupported(0);
+
+ setStackPointerRegisterToSaveRestore(CSKY::R14);
+ const Align FunctionAlignment(2);
+ setMinFunctionAlignment(FunctionAlignment);
+ setSchedulingPreference(Sched::Source);
+}
+
+EVT CSKYTargetLowering::getSetCCResultType(const DataLayout &DL,
+ LLVMContext &Context, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+
+ return VT.changeVectorElementTypeToInteger();
+}
+
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ EVT LocVT = VA.getLocVT();
+
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+ break;
+ }
+ return Val;
+}
+
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ break;
+ }
+ return Val;
+}
+
+static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget,
+ SelectionDAG &DAG, SDValue Chain,
+ const CCValAssign &VA, const SDLoc &DL) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ EVT LocVT = VA.getLocVT();
+ SDValue Val;
+ const TargetRegisterClass *RC;
+
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected register type");
+ case MVT::i32:
+ RC = &CSKY::GPRRegClass;
+ break;
+ }
+
+ Register VReg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+
+ return convertLocVTToValVT(DAG, Val, VA, DL);
+}
+
+static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
+ const CCValAssign &VA, const SDLoc &DL) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ EVT LocVT = VA.getLocVT();
+ EVT ValVT = VA.getValVT();
+ EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
+ int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), /*Immutable=*/true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val;
+
+ ISD::LoadExtType ExtType;
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ case CCValAssign::BCvt:
+ ExtType = ISD::NON_EXTLOAD;
+ break;
+ }
+ Val = DAG.getExtLoad(
+ ExtType, DL, LocVT, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
+ return Val;
+}
+
+// Transform physical registers into virtual registers.
+SDValue CSKYTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+ switch (CallConv) {
+ default:
+ report_fatal_error("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ break;
+ }
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, IsVarArg));
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue;
+
+ if (VA.isRegLoc())
+ ArgValue = unpackFromRegLoc(Subtarget, DAG, Chain, VA, DL);
+ else
+ ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
+
+ InVals.push_back(ArgValue);
+ }
+
+ if (IsVarArg) {
+ const unsigned XLenInBytes = 4;
+ const MVT XLenVT = MVT::i32;
+
+ ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(GPRArgRegs);
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+ const TargetRegisterClass *RC = &CSKY::GPRRegClass;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ CSKYMachineFunctionInfo *CSKYFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ // Offset of the first variable argument from stack pointer, and size of
+ // the vararg save area. For now, the varargs save area is either zero or
+ // large enough to hold a0-a4.
+ int VaArgOffset, VarArgsSaveSize;
+
+ // If all registers are allocated, then all varargs must be passed on the
+ // stack and we don't need to save any argregs.
+ if (ArgRegs.size() == Idx) {
+ VaArgOffset = CCInfo.getNextStackOffset();
+ VarArgsSaveSize = 0;
+ } else {
+ VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
+ VaArgOffset = -VarArgsSaveSize;
+ }
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
+ CSKYFI->setVarArgsFrameIndex(FI);
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ for (unsigned I = Idx; I < ArgRegs.size();
+ ++I, VaArgOffset += XLenInBytes) {
+ const Register Reg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(ArgRegs[I], Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
+ FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ cast<StoreSDNode>(Store.getNode())
+ ->getMemOperand()
+ ->setValue((Value *)nullptr);
+ OutChains.push_back(Store);
+ }
+ CSKYFI->setVarArgsSaveSize(VarArgsSaveSize);
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens for vararg functions.
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ }
+
+ return Chain;
+}
+
+bool CSKYTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> CSKYLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, CSKYLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+}
+
+SDValue
+CSKYTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &DL, SelectionDAG &DAG) const {
+ // Stores the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> CSKYLocs;
+
+ // Info about the registers and stack slot.
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), CSKYLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+
+ SDValue Glue;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, e = CSKYLocs.size(); i < e; ++i) {
+ SDValue Val = OutVals[i];
+ CCValAssign &VA = CSKYLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ bool IsF64OnCSKY = VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
+
+ if (IsF64OnCSKY) {
+
+ assert(VA.isRegLoc() && "Expected return via registers");
+ SDValue Split64 = DAG.getNode(CSKYISD::BITCAST_TO_LOHI, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), Val);
+ SDValue Lo = Split64.getValue(0);
+ SDValue Hi = Split64.getValue(1);
+
+ Register RegLo = VA.getLocReg();
+ assert(RegLo < CSKY::R31 && "Invalid register pair");
+ Register RegHi = RegLo + 1;
+
+ Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
+ Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
+ } else {
+ // Handle a 'normal' return.
+ Val = convertValVTToLocVT(DAG, Val, VA, DL);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
+
+ // Guarantee that all emitted copies are stuck together.
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the glue node if we have it.
+ if (Glue.getNode()) {
+ RetOps.push_back(Glue);
+ }
+
+ // Interrupt service routines use different return instructions.
+ if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt"))
+ return DAG.getNode(CSKYISD::NIR, DL, MVT::Other, RetOps);
+
+ return DAG.getNode(CSKYISD::RET, DL, MVT::Other, RetOps);
+}
+
+CCAssignFn *CSKYTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool IsVarArg) const {
+ if (IsVarArg || !Subtarget.useHardFloatABI())
+ return RetCC_CSKY_ABIV2_SOFT;
+ else
+ return RetCC_CSKY_ABIV2_FP;
+}
+
+CCAssignFn *CSKYTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) const {
+ if (IsVarArg || !Subtarget.useHardFloatABI())
+ return CC_CSKY_ABIV2_SOFT;
+ else
+ return CC_CSKY_ABIV2_FP;
+}
+
+const char *CSKYTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unknown CSKYISD node");
+ case CSKYISD::NIE:
+ return "CSKYISD::NIE";
+ case CSKYISD::NIR:
+ return "CSKYISD::NIR";
+ case CSKYISD::RET:
+ return "CSKYISD::RET";
+ case CSKYISD::BITCAST_TO_LOHI:
+ return "CSKYISD::BITCAST_TO_LOHI";
+ }
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
new file mode 100644
index 000000000000..7557c11f50a8
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -0,0 +1,69 @@
+//===-- CSKYISelLowering.cpp - CSKY DAG Lowering Implementation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that CSKY uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYISELLOWERING_H
+#define LLVM_LIB_TARGET_CSKY_CSKYISELLOWERING_H
+
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+class CSKYSubtarget;
+
+namespace CSKYISD {
+enum NodeType : unsigned {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ NIE,
+ NIR,
+ RET,
+ BITCAST_TO_LOHI
+};
+}
+
+class CSKYTargetLowering : public TargetLowering {
+ const CSKYSubtarget &Subtarget;
+
+public:
+ explicit CSKYTargetLowering(const TargetMachine &TM,
+ const CSKYSubtarget &STI);
+
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
+
+private:
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
+ CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYISELLOWERING_H
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats.td b/llvm/lib/Target/CSKY/CSKYInstrFormats.td
index dd71b693bbbb..9b6ef9ca23db 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrFormats.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormats.td
@@ -24,7 +24,7 @@ class CSKYInst<AddrMode am, int sz, dag outs, dag ins, string asmstr,
let Namespace = "CSKY";
int Size = sz;
AddrMode AM = am;
-
+ field bits<32> SoftFail = 0;
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asmstr;
@@ -46,6 +46,11 @@ class CSKY32Inst<AddrMode am, bits<6> opcode, dag outs, dag ins, string asmstr,
let Inst{31 - 26} = opcode;
}
+class CSKY16Inst<AddrMode am, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : CSKYInst<am, 2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+}
+
// CSKY 32-bit instruction
// Format< OP[6] | Offset[26] >
// Instruction(1): bsr32
@@ -157,19 +162,7 @@ class I_16_RET<bits<5> sop, bits<5> pcode, string op, list<dag> pattern>
let isTerminator = 1;
let isReturn = 1;
let isBarrier = 1;
-}
-
-// Instructions(1): rte32
-class I_16_RET_I<bits<5> sop, bits<5> pcode, string op, list<dag> pattern>
- : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), op, pattern> {
- let Inst{25 - 21} = sop;
- let Inst{20 - 16} = pcode;
- let Inst{15 - 10} = 0x10;
- let Inst{9 - 5} = 1;
- let Inst{4 - 0} = 0;
- let isTerminator = 1;
- let isReturn = 1;
- let isBarrier = 1;
+ let Uses = [ R15 ];
}
// Format< OP[6] | SOP[5] | RX[5] | IMM16[16] >
@@ -227,14 +220,27 @@ class I_LDST<AddrMode am, bits<6> opcode, bits<4> sop, dag outs, dag ins,
let Inst{11 - 0} = imm12;
}
+class I_PLDR<AddrMode am, bits<6> opcode, bits<4> sop, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<am, opcode, outs, ins, !strconcat(op, "\t($rx, ${imm12})"),
+ pattern> {
+ bits<5> rx;
+ bits<12> imm12;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = imm12;
+}
+
+
// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | OFFSET[12] >
-// Instructions(6): ld32.b, ld32.bs, ld32.h, ld32.hs, ld32.w, ld32.d
+// Instructions(6): ld32.b, ld32.bs, ld32.h, ld32.hs, ld32.w
class I_LD<AddrMode am, bits<4> sop, string op, Operand operand>
: I_LDST<am, 0x36, sop,
(outs GPR:$rz), (ins GPR:$rx, operand:$imm12), op, []>;
// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | OFFSET[12] >
-// Instructions(4): st32.b, st32.h, st32.w, st32.d
+// Instructions(4): st32.b, st32.h, st32.w
class I_ST<AddrMode am, bits<4> sop, string op, Operand operand>
: I_LDST<am, 0x37, sop, (outs),
(ins GPR:$rz, GPR:$rx, operand:$imm12), op, []>;
@@ -249,6 +255,8 @@ class I_12_PP<bits<5> sop, bits<5> pcode, dag outs, dag ins, string op>
let Inst{20 - 16} = pcode;
let Inst{15 - 12} = 0;
let Inst{11 - 0} = regs;
+ let Uses = [R14];
+ let Defs = [R14];
}
// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | PCODE[5] | IMM[5]>
@@ -256,7 +264,7 @@ class I_12_PP<bits<5> sop, bits<5> pcode, dag outs, dag ins, string op>
class I_5_ZX<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
list<dag> pattern>
: CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
- (ins GPR:$false, GPR:$rx, ImmType:$imm5),
+ (ins CARRY:$cond, GPR:$false, GPR:$rx, ImmType:$imm5),
!strconcat(op, "\t$rz, $rx, $imm5"), pattern> {
bits<5> rz;
bits<5> rx;
@@ -272,9 +280,9 @@ class I_5_ZX<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
// Format< OP[6] | IMM[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5]>
// Instructions(13): decgt32, declt32, decne32, lsli32, lslc32, lsri32
// lsrc32, asri32, asrc32, rotli32, xsr32, bclri32, bseti32
-class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag ins, dag outs,
+class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag outs, dag ins,
list<dag> pattern>
- : CSKY32Inst<AddrModeNone, 0x31, ins, outs,
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins,
!strconcat(op, "\t$rz, $rx, $imm5"), pattern> {
bits<5> imm5;
bits<5> rx;
@@ -286,19 +294,107 @@ class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag ins, dag outs,
let Inst{4 - 0} = rz;
}
+// mtcr32, mfcr32
+class I_5_XZ_CR<bits<6> sop, bits<5> pcode, string opStr, dag outs, dag ins,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x30, outs, ins, opStr, pattern> {
+ bits<5> sel;
+ bits<5> rx;
+ bits<5> cr;
+ let Inst{25 - 21} = sel;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = cr;
+}
+
+// sync
+class I_5_XZ_SYNC<bits<6> sop, bits<5> pcode, string opStr, bits<1> S, bits<1> I>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), opStr, []> {
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+ let Inst{25} = S;
+ let Inst{21} = I;
+
+}
+
+// Priviledged Instructions
+class I_5_XZ_PRIVI<bits<6> sop, bits<5> pcode, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), opStr, []> {
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+}
+
+class I_CP<bits<4> sop, dag outs, dag ins, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x3f, outs, ins, opStr, []> {
+ bits<5> cpid;
+ bits<12> usdef;
+ let Inst{25 - 21} = cpid;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = usdef;
+}
+
+class I_CPOP<dag outs, dag ins, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x3f, outs, ins, opStr, []> {
+ bits<5> cpid;
+ bits<20> usdef;
+ let Inst{25 - 21} = cpid;
+ let Inst{20 - 16} = usdef{19-15};
+ let Inst{15} = 1;
+ let Inst{14 - 0} = usdef{14-0};
+}
+
+class I_CP_Z<bits<4> sop, dag outs, dag ins, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x3f, outs, ins, opStr, []> {
+ bits<5> cpid;
+ bits<12> usdef;
+ bits<5> rz;
+
+ let Inst{25 - 21} = cpid;
+ let Inst{20 - 16} = rz;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = usdef;
+}
+
+class I_5_CACHE<bits<6> sop, bits<5> pcode, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), opStr, []> {
+ let Inst{25 - 21} = pcode;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+class I_5_X_CACHE<bits<6> sop, bits<5> pcode, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins GPR:$rx), opStr #"\t$rx", []> {
+ bits<5> rx;
+
+ let Inst{25 - 21} = pcode;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | IMM[5]>
// Instructions(2): ldm32, (ldq32), stm32, (stq32)
-class I_5_YX<bits<6> opcode, dag outs, dag ins, string op, list<dag> pattern,
- bits<5> imm5>
- : CSKY32Inst<AddrModeNone, opcode, outs, ins,
- op #"\t${ry}, (${rx}), " #!cast<int>(imm5), pattern> {
+class I_5_YX<bits<6> opcode, bits<6> sop, dag outs, dag ins, string opStr, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, outs, ins, opStr, pattern> {
+ bits<10> regs;
bits<5> rx;
- bits<5> ry;
- let Inst{25 - 21} = ry; // ry
+
+ let Inst{25 - 21} = regs{9 - 5}; // ry
let Inst{20 - 16} = rx;
- let Inst{15 - 10} = 0b000111;
+ let Inst{15 - 10} = sop;
let Inst{9 - 5} = 0b00001;
- let Inst{4 - 0} = imm5{4 - 0}; // imm5
+ let Inst{4 - 0} = regs{4 - 0}; // imm5
}
// Format< OP[6] | LSB[5] | RX[5] | SOP[6] | MSB[5] | RZ[5]>
@@ -317,14 +413,33 @@ class I_5_XZ_U<bits<6> sop, dag outs, dag ins, string op, list<dag> pattern>
let Inst{4 - 0} = rz;
}
-// sextb, sexth
-class I_5_XZ_US<bits<6> sop, string op, SDNode opnode,
- ValueType type> : I_5_XZ_U<sop, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), op,
- [(set GPR:$rz, (opnode GPR:$rx, type))]>;
+class I_5_XZ_INS<bits<6> sop, dag outs, dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, op #"\t$rz, $rx, $msb, $lsb",
+ pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ bits<5> msb;
+ bits<5> lsb;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = msb;
+ let Inst{4 - 0} = lsb;
+}
-class I_5_XZ_UZ<bits<6> sop, string op, int v>
- : I_5_XZ_U<sop, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), op,
- [(set GPR:$rz, (and GPR:$rx, (i32 v)))]>;
+// Format< OP[6] | LSB[5] | RX[5] | SOP[6] | MSB[5] | RZ[5]>
+// Instructions(6): zext32, zextb32, zexth32, sext32, sextb32, sexth32
+class I_5_XZ_U2<bits<6> sop, bits<5> lsb, bits<5> msb, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, !strconcat(op, "\t$rz, $rx"), pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = lsb; // lsb
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = msb; // msb
+ let Inst{4 - 0} = rz;
+}
// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | SIZE[5] | LSB[5]>
// Instructions(1): ins32
@@ -341,6 +456,16 @@ class I_5_ZX_U<bits<6> sop, string op, Operand operand, list<dag> pattern>
let Inst{4 - 0} = size_lsb{4 - 0}; // lsb
}
+// sextb, sexth
+class I_5_XZ_US<bits<6> sop, bits<5> lsb, bits<5> msb, string op,
+ SDNode opnode, ValueType type>
+ : I_5_XZ_U2<sop, lsb, msb, (outs GPR:$rz), (ins GPR:$rx), op,
+ [(set GPR:$rz, (opnode GPR:$rx, type))]>;
+
+class I_5_XZ_UZ<bits<6> sop, bits<5> lsb, bits<5> msb, string op, int v>
+ : I_5_XZ_U2<sop, lsb, msb, (outs GPR:$rz), (ins GPR:$rx), op,
+ [(set GPR:$rz, (and GPR:$rx, (i32 v)))]>;
+
// Format< OP[6] | IMM[5] | RX[5] | SOP[6] | PCODE[5] | 00000 >
// Instructions(1): btsti32
class I_5_X<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
@@ -373,6 +498,18 @@ class I_5_Z<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
let Inst{4 - 0} = rz;
}
+class I_5_IMM5<bits<6> opcode, bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, (outs), (ins ImmType:$imm5),
+ !strconcat(op, "\t$imm5"), pattern> {
+ bits<5> imm5;
+ let Inst{25 - 21} = imm5;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+}
+
// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5] >
// Instructions(24): addu32, addc32, subu32, subc32, (rsub32), ixh32, ixw32,
// ixd32, and32, andn32, or32, xor32, nor32, lsl32, lsr32, asr32, rotl32
@@ -493,9 +630,8 @@ class R_ZX<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
// Format< OP[6] | 00000[5] | RX[5] | SOP[6] | PCODE[5] | 00000[5] >
// Instructions:(1) tstnbz32
-class R_X<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
- : CSKY32Inst<AddrModeNone, 0x31, (outs CARRY:$ca),(ins GPR:$rx),
- !strconcat(op, "\t$rx"), pattern> {
+class R_X<bits<6> sop, bits<5> pcode, dag outs, dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, !strconcat(op, "\t$rx"), pattern> {
bits<5> rx;
let Inst{25 - 21} = 0;
let Inst{20 - 16} = rx;
@@ -530,3 +666,14 @@ class R_Z_2<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
let Inst{4 - 0} = 0;
let Constraints = "$rz = $false";
}
+
+class BAR<bits<5> sop, string op, bits<1> signed>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), op, []> {
+ let Inst{25} = signed;
+ let Inst{24 - 16} = 0;
+ let Inst{15 - 5} = 0x421;
+ let Inst{4 - 0} = sop;
+ let hasSideEffects = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
new file mode 100644
index 000000000000..6d42bddcdd78
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
@@ -0,0 +1,219 @@
+//===- CSKYInstrFormats16Instr.td - 16-bit Instr. Formats -*- tablegen --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+class J16<bits<5> sop, string opstr, dag ins>
+ : CSKY16Inst<AddrModeNone, (outs), ins,
+ !strconcat(opstr, "\t$offset"), []> {
+ bits<10> offset;
+ let Inst{15} = 0;
+ let Inst{14 - 10} = sop;
+ let Inst{9 - 0} = offset;
+}
+
+class J16_B<bits<5> sop, string opstr>
+ : CSKY16Inst<AddrModeNone, (outs), (ins CARRY:$ca, br_symbol_16bit:$offset),
+ !strconcat(opstr, "\t$offset"), []> {
+ bits<10> offset;
+ let Inst{15} = 0;
+ let Inst{14 - 10} = sop;
+ let Inst{9 - 0} = offset;
+}
+
+class R16_XYZ<bits<2> sop, string opstr, SDNode opnode> : CSKY16Inst<AddrModeNone,
+ (outs mGPR:$rz), (ins mGPR:$rx, mGPR:$ry), !strconcat(opstr, "\t$rz, $rx, $ry"),
+ [(set mGPR:$rz, (opnode mGPR:$rx, mGPR:$ry)) ]> {
+ bits<3> rz;
+ bits<3> rx;
+ bits<3> ry;
+ let Inst{15 - 11} = 0b01011;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 2} = ry;
+ let Inst{1, 0} = sop;
+}
+
+class R16_XZ_BINOP<bits<4> op, bits<2> sop, string opstr, PatFrag opnode> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rZ, sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"),
+ [(set sGPR:$rz, (opnode sGPR:$rZ, sGPR:$rx))]> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rZ";
+}
+
+class R16_XZ_BINOP_NOPat<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rZ, sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"),
+ []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rZ";
+}
+
+class R16_XZ_BINOP_C<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz, CARRY:$cout),
+ (ins sGPR:$rZ, sGPR:$rx, CARRY:$cin), !strconcat(opstr, "\t$rz, $rx"), []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rZ";
+}
+
+class R16_XZ_UNOP<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"),
+ []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+}
+
+class R16_XY_CMP<bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx, sGPR:$ry), !strconcat(opstr, "\t$rx, $ry"),
+ []> {
+ bits<4> ry;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = ry;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let isCompare = 1;
+}
+
+class R16_X_J<bits<8> op_rz, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs), (ins sGPR:$rx), !strconcat(opstr, "\t$rx"), []> {
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 6} = op_rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+}
+
+class I16_Z_8<bits<3> op, dag ins, string asmstr>
+ : CSKY16Inst<AddrModeNone, (outs mGPR:$rz), ins, asmstr, []> {
+ bits<3> rz;
+ bits<8> imm8;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 11} = op;
+ let Inst{10 - 8} = rz;
+ let Inst{7 - 0} = imm8;
+}
+
+class I16_Z_5<bits<3> sop, dag outs, dag ins,string opstr>
+ : CSKY16Inst<AddrModeNone, outs, ins,
+ !strconcat(opstr, "\t$rz, $imm5"), []> {
+ bits<3> rz;
+ bits<5> imm5;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 11} = 0b111;
+ let Inst{10 - 8} = rz;
+ let Inst{7 - 5} = sop;
+ let Inst{4 - 0} = imm5;
+}
+
+class I16_X_CMP<bits<3> sop, string opstr, Operand Immoperand> : CSKY16Inst<
+ AddrModeNone, (outs CARRY:$ca), (ins mGPR:$rx, Immoperand:$imm5),
+ !strconcat(opstr, "\t$rx, $imm5"), []> {
+ bits<3> rx;
+ bits<5> imm5;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 11} = 0b111;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = sop;
+ let Inst{4 - 0} = imm5;
+ let isCompare = 1;
+}
+
+class I16_SP_IMM7<bits<3> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs SPOp:$sp2), (ins SPOp:$sp1, uimm7_2:$imm7),
+ !strconcat(opstr, "\t$sp2, $sp1, $imm7"), []> {
+ bits<7> imm7;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 10} = 0b0101;
+ let Inst{9, 8} = imm7{6,5};
+ let Inst{7 - 5} = sop;
+ let Inst{4 - 0} = imm7{4 - 0};
+}
+
+class I16_XZ_IMM5<bits<3> sop, string opstr, SDNode opnode> : CSKY16Inst<
+ AddrModeNone, (outs mGPR:$rz), (ins mGPR:$rx, uimm5:$imm5),
+ !strconcat(opstr, "\t$rz, $rx, $imm5"), [(set mGPR:$rz, (opnode mGPR:$rx, uimm5:$imm5))]> {
+ bits<3> rx;
+ bits<3> rz;
+ bits<5> imm5;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 11} = sop;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = imm5;
+}
+
+class I16_XZ_LDST<AddrMode am, bits<3> sop, string opstr, dag outs, dag ins>
+ : CSKY16Inst<am, outs, ins, !strconcat(opstr, "\t$rz, ($rx, ${imm})"),
+ []> {
+ bits<3> rx;
+ bits<3> rz;
+ bits<5> imm;
+ let Inst{15, 14} = 0b10;
+ let Inst{13 - 11} = sop;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = imm;
+}
+
+class I16_ZSP_LDST<AddrMode am, bits<3> sop, string opstr, dag outs, dag ins> : CSKY16Inst<
+ am, outs, ins, !strconcat(opstr, "\t$rz, ($sp, ${addr})"),
+ []> {
+ bits<3> rz;
+ bits<8> addr;
+ let Inst{15, 14} = 0b10;
+ let Inst{13 - 11} = sop;
+ let Inst{10 - 8} = addr{7 - 5};
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = addr{4 - 0};
+}
+
+class I16_XZ_IMM3<bits<2> sop, string opstr, SDNode opnode> : CSKY16Inst<
+ AddrModeNone, (outs mGPR:$rz), (ins mGPR:$rx, oimm3:$oimm3),
+ !strconcat(opstr, "\t$rz, $rx, $oimm3"), [(set mGPR:$rz, (opnode mGPR:$rx, oimm3:$oimm3))]> {
+ bits<3> rx;
+ bits<3> rz;
+ bits<3> oimm3;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 11} = 0b011;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 2} = oimm3;
+ let Inst{1, 0} = sop;
+}
+
+class I16_BPushPop<bits<11> op, bits<2> uop, dag out, dag ins, string opstr> :
+ CSKY16Inst<AddrModeNone, out, ins, opstr, []>{
+ bits<3> rz;
+ let Inst{15- 5} = op;
+ let Inst{4 -2} = rz;
+ let Inst{1,0} = uop;
+ let Predicates = [HasJAVA];
+ let hasSideEffects = 1;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
new file mode 100644
index 000000000000..e12235cf9478
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -0,0 +1,25 @@
+//===-- CSKYInstrInfo.h - CSKY Instruction Information --------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+
+#define DEBUG_TYPE "csky-instr-info"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "CSKYGenInstrInfo.inc"
+
+CSKYInstrInfo::CSKYInstrInfo(CSKYSubtarget &STI)
+ : CSKYGenInstrInfo(CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) {
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
new file mode 100644
index 000000000000..04be9da27b57
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -0,0 +1,36 @@
+//===-- CSKYInstrInfo.h - CSKY Instruction Information --------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYINSTRINFO_H
+#define LLVM_LIB_TARGET_CSKY_CSKYINSTRINFO_H
+
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "CSKYGenInstrInfo.inc"
+
+namespace llvm {
+
+class CSKYSubtarget;
+
+class CSKYInstrInfo : public CSKYGenInstrInfo {
+protected:
+ const CSKYSubtarget &STI;
+
+public:
+ explicit CSKYInstrInfo(CSKYSubtarget &STI);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYINSTRINFO_H
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 20adda4f9ca2..9dda3159e446 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -15,6 +15,18 @@
// CSKY specific DAG Nodes.
//===----------------------------------------------------------------------===//
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Target-dependent nodes.
def CSKY_RET : SDNode<"CSKYISD::RET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -44,6 +56,7 @@ class oimm<int num> : Operand<i32>,
ImmLeaf<i32, "return isUInt<"#num#">(Imm - 1);"> {
let EncoderMethod = "getOImmOpValue";
let ParserMatchClass = OImmAsmOperand<num>;
+ let DecoderMethod = "decodeOImmOperand<"#num#">";
}
class uimm<int num, int shift = 0> : Operand<i32>,
@@ -53,12 +66,14 @@ class uimm<int num, int shift = 0> : Operand<i32>,
!if(!ne(shift, 0),
UImmAsmOperand<num, "Shift"#shift>,
UImmAsmOperand<num>);
+ let DecoderMethod = "decodeUImmOperand<"#num#", "#shift#">";
}
class simm<int num, int shift = 0> : Operand<i32>,
ImmLeaf<i32, "return isShiftedInt<"#num#", "#shift#">(Imm);"> {
let EncoderMethod = "getImmOpValue<"#shift#">";
let ParserMatchClass = SImmAsmOperand<num>;
+ let DecoderMethod = "decodeSImmOperand<"#num#", "#shift#">";
}
def nimm_XFORM : SDNodeXForm<imm, [{
@@ -73,14 +88,19 @@ def uimm32_hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 16) & 0xFFFF,
SDLoc(N), MVT::i32);
}]>;
+def uimm32_lo16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue()& 0xFFFF, SDLoc(N), MVT::i32);
+}]>;
def uimm16_16_xform : Operand<i32>,
ImmLeaf<i32, "return isShiftedUInt<16, 16>(Imm);", uimm32_hi16> {
let ParserMatchClass = UImmAsmOperand<16>;
+ let EncoderMethod = "getImmOpValue";
}
def uimm_shift : Operand<i32>, ImmLeaf<i32, "return isUInt<2>(Imm);"> {
let EncoderMethod = "getImmShiftOpValue";
let ParserMatchClass = UImmAsmOperand<2>;
+ let DecoderMethod = "decodeImmShiftOpValue";
}
def CSKYSymbol : AsmOperandClass {
@@ -94,16 +114,22 @@ def br_symbol : Operand<iPTR> {
let EncoderMethod =
"getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm16_scale2>";
let ParserMatchClass = CSKYSymbol;
+ let DecoderMethod = "decodeSImmOperand<16, 1>";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let OperandType = "OPERAND_PCREL";
}
def call_symbol : Operand<iPTR> {
let ParserMatchClass = CSKYSymbol;
let EncoderMethod = "getCallSymbolOpValue";
+ let DecoderMethod = "decodeSImmOperand<26, 1>";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let OperandType = "OPERAND_PCREL";
}
def Constpool : AsmOperandClass {
- let Name = "ConstpoolSymbol";
- let RenderMethod = "addImmOperands";
+ let Name = "Constpool";
+ let RenderMethod = "addConstpoolOperands";
let DiagnosticType = "InvalidConstpool";
let ParserMethod = "parseConstpoolSymbol";
}
@@ -112,24 +138,132 @@ def constpool_symbol : Operand<iPTR> {
let ParserMatchClass = Constpool;
let EncoderMethod =
"getConstpoolSymbolOpValue<CSKY::fixup_csky_pcrel_uimm16_scale4>";
+ let DecoderMethod = "decodeUImmOperand<16, 2>";
+ let PrintMethod = "printConstpool";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def DataAsmClass : AsmOperandClass {
+ let Name = "DataSymbol";
+ let RenderMethod = "addConstpoolOperands";
+ let DiagnosticType = "InvalidConstpool";
+ let ParserMethod = "parseDataSymbol";
+}
+
+class data_symbol<string reloc, int shift> : Operand<iPTR> {
+ let ParserMatchClass = Constpool;
+ let EncoderMethod =
+ "getDataSymbolOpValue<"#reloc#">";
+ let DecoderMethod = "decodeUImmOperand<18, "#shift#">";
+ let PrintMethod = "printDataSymbol";
}
def bare_symbol : Operand<iPTR> {
let ParserMatchClass = CSKYSymbol;
let EncoderMethod = "getBareSymbolOpValue";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let DecoderMethod = "decodeSImmOperand<18, 1>";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def oimm3 : oimm<3>;
+def oimm4 : oimm<4>;
+def oimm5 : oimm<5>;
+def oimm6 : oimm<6>;
+
+def imm5_idly : Operand<i32>, ImmLeaf<i32,
+ "return Imm <= 32 && Imm >= 0;"> {
+ let EncoderMethod = "getImmOpValueIDLY";
+ let DecoderMethod = "decodeOImmOperand<5>";
}
+def oimm8 : oimm<8>;
def oimm12 : oimm<12>;
def oimm16 : oimm<16>;
def nimm12 : nimm<12>;
+def uimm1 : uimm<1>;
+def uimm2 : uimm<2>;
+
+
+def uimm2_jmpix : Operand<i32>,
+ ImmLeaf<i32, "return Imm == 16 || Imm == 24 || Imm == 32 || Imm == 40;"> {
+ let EncoderMethod = "getImmJMPIX";
+ let DecoderMethod = "decodeJMPIXImmOperand";
+}
+
+def uimm3 : uimm<3>;
+def uimm4 : uimm<4>;
def uimm5 : uimm<5>;
+def uimm5_msb_size : uimm<5> {
+ let EncoderMethod = "getImmOpValueMSBSize";
+}
+
+def uimm5_1 : uimm<5, 1>;
+def uimm5_2 : uimm<5, 2>;
+def uimm6 : uimm<6>;
+def uimm7 : uimm<7>;
+def uimm7_1 : uimm<7, 1>;
+def uimm7_2 : uimm<7, 2>;
+def uimm7_3 : uimm<7, 3>;
+def uimm8 : uimm<8>;
+def uimm8_2 : uimm<8, 2>;
+def uimm8_3 : uimm<8, 3>;
+def uimm8_8 : uimm<8, 8>;
+def uimm8_16 : uimm<8, 16>;
+def uimm8_24 : uimm<8, 24>;
def uimm12 : uimm<12>;
def uimm12_1 : uimm<12, 1>;
def uimm12_2 : uimm<12, 2>;
def uimm16 : uimm<16>;
+def uimm16_8 : uimm<16, 8>;
+def uimm16_16 : uimm<16, 16>;
+def uimm20 : uimm<20>;
+def uimm24 : uimm<24>;
+def uimm24_8 : uimm<24, 8>;
+
+def simm8_2 : simm<8, 2>;
+
+class RegSeqAsmOperand<string Suffix = ""> : AsmOperandClass {
+ let Name = "RegSeq"#Suffix;
+ let RenderMethod = "addRegSeqOperands";
+ let DiagnosticType = "InvalidRegSeq";
+ let ParserMethod = "parseRegSeq";
+}
+
+def regseq : Operand<iPTR> {
+ let EncoderMethod = "getRegisterSeqOpValue";
+ let ParserMatchClass = RegSeqAsmOperand<"">;
+ let PrintMethod = "printRegisterSeq";
+ let DecoderMethod = "DecodeRegSeqOperand";
+ let MIOperandInfo = (ops GPR, uimm5);
+}
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let RenderMethod = "addRegListOperands";
+ let DiagnosticType = "InvalidRegList";
+ let ParserMethod = "parseRegList";
+}
+
+def reglist : Operand<iPTR> {
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+def PSRFlag : AsmOperandClass {
+ let Name = "PSRFlag";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidPSRFlag";
+ let ParserMethod = "parsePSRFlag";
+}
+
+def psrflag : Operand<i32>, ImmLeaf<i32, "return isShiftedUInt<5, 0>(Imm);"> {
+ let EncoderMethod = "getImmOpValue";
+ let ParserMatchClass = PSRFlag;
+ let PrintMethod = "printPSRFlag";
+}
//===----------------------------------------------------------------------===//
// Instruction Formats
@@ -145,12 +279,33 @@ class TriOpFrag<dag res> : PatFrag<(ops node: $LHS, node:$MHS, node:$RHS), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
+def eqToAdd : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), [{
+ return isOrEquivalentToAdd(N);
+}]>;
+
+def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
+
+
+//===----------------------------------------------------------------------===//
+// CSKYPseudo
+//===----------------------------------------------------------------------===//
+
+// Pessimistically assume the stack pointer will be clobbered
+let Defs = [R14], Uses = [R14] in {
+def ADJCALLSTACKDOWN : CSKYPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKDOWN $amt1, $amt2", [(callseq_start timm:$amt1, timm:$amt2)]>;
+def ADJCALLSTACKUP : CSKYPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1, $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>;
+} // Defs = [R14], Uses = [R14]
//===----------------------------------------------------------------------===//
// Basic ALU instructions.
//===----------------------------------------------------------------------===//
+let Predicates = [iHasE2] in {
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ let isAdd = 1 in
def ADDI32 : I_12<0x0, "addi32", add, oimm12>;
def SUBI32 : I_12<0x1, "subi32", sub, oimm12>;
def ORI32 : I_16_ZX<"ori32", uimm16,
@@ -171,11 +326,15 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
(outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5),
[(set GPR:$rz, (rotl GPR:$rx, uimm5:$imm5))]>;
-
+ def ROTRI32 : CSKYPseudo<(outs GPR:$rz), (ins GPR:$rx, oimm5:$imm5),
+ "rotri32 $rz, $rx, $imm5", []>;
+ }
+ let isAdd = 1 in
def ADDU32 : R_YXZ_SP_F1<0x0, 0x1,
BinOpFrag<(add node:$LHS, node:$RHS)>, "addu32", 1>;
def SUBU32 : R_YXZ_SP_F1<0x0, 0x4,
BinOpFrag<(sub node:$LHS, node:$RHS)>, "subu32">;
+
def MULT32 : R_YXZ_SP_F1<0x21, 0x1,
BinOpFrag<(mul node:$LHS, node:$RHS)>, "mult32", 1>;
def AND32 : R_YXZ_SP_F1<0x8, 0x1,
@@ -188,8 +347,16 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
BinOpFrag<(xor node:$LHS, node:$RHS)>, "xor32", 1>;
def NOR32 : R_YXZ_SP_F1<0x9, 0x4,
BinOpFrag<(not (or node:$LHS, node:$RHS))>, "nor32", 1>;
+ let isCodeGenOnly = 1 in
def NOT32 : R_XXZ<0b001001, 0b00100, (outs GPR:$rz), (ins GPR:$rx),
"not32", [(set GPR:$rz, (not GPR:$rx))]>;
+
+ let Size = 8 in
+ def NEG32 : CSKYPseudo<(outs GPR:$rd), (ins GPR:$rx), "neg32 $rd, $rx", []>;
+
+ let Size = 8 in
+ def RSUBI32 : CSKYPseudo<(outs GPR:$rd), (ins GPR:$rx, uimm12:$imm12), "rsubi32 $rd, $rx, $imm12", []>;
+
def LSL32 : R_YXZ_SP_F1<0x10, 0x1,
BinOpFrag<(shl node:$LHS, node:$RHS)>, "lsl32">;
def LSR32 : R_YXZ_SP_F1<0x10, 0x2,
@@ -199,23 +366,37 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
def ROTL32 : R_YXZ_SP_F1<0x10, 0x8,
BinOpFrag<(rotl node:$LHS, (and node:$RHS, 0x1f))>, "rotl32">;
- // TODO: Shift series instr. with carry.
+ def BMASKI32 : I_5_Z<0b010100, 0x1, "bmaski32", oimm5, []>;
+ def LSLC32 : I_5_XZ<0x13, 0x1, "lslc32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5), []>;
+ def LSRC32 : I_5_XZ<0x13, 0x2, "lsrc32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5), []>;
+ def ASRC32 : I_5_XZ<0x13, 0x4, "asrc32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5), []>;
+ def XSR32 : I_5_XZ<0x13, 0x8, "xsr32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5, CARRY:$cin), []>;
def IXH32 : R_YXZ_SP_F1<0x2, 0x1,
BinOpFrag<(add node:$LHS, (shl node:$RHS, (i32 1)))>, "ixh32">;
def IXW32 : R_YXZ_SP_F1<0x2, 0x2,
BinOpFrag<(add node:$LHS, (shl node:$RHS, (i32 2)))>, "ixw32">;
-
+ let Predicates = [iHas2E3] in
def IXD32 : R_YXZ_SP_F1<0x2, 0x4,
BinOpFrag<(add node:$LHS, (shl node:$RHS, (i32 3)))>, "ixd32">;
- let isCommutable = 1 in
+ let isCommutable = 1, isAdd = 1 in
def ADDC32 : R_YXZ<0x31, 0x0, 0x2, (outs GPR:$rz, CARRY:$cout),
(ins GPR:$rx, GPR:$ry, CARRY:$cin), "addc32", []>;
def SUBC32 : R_YXZ<0x31, 0x0, 0x8, (outs GPR:$rz, CARRY:$cout),
(ins GPR:$rx, GPR:$ry, CARRY:$cin), "subc32", []>;
- // TODO: incf32.
+ def INCF32 : I_5_ZX<0x3, 0x1, "incf32", uimm5, []>;
+ def INCT32 : I_5_ZX<0x3, 0x2, "inct32", uimm5, []>;
+ def DECF32 : I_5_ZX<0x3, 0x4, "decf32", uimm5, []>;
+ def DECT32 : I_5_ZX<0x3, 0x8, "dect32", uimm5, []>;
+}
+
+let Predicates = [iHas2E3] in {
def DIVS32 : R_YXZ_SP_F1<0x20, 0x2,
BinOpFrag<(sdiv node:$LHS, node:$RHS)>, "divs32">;
def DIVU32 : R_YXZ_SP_F1<0x20, 0x1,
@@ -228,11 +409,35 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
def DECNE32 : I_5_XZ<0x4, 0x4, "decne32",
(outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, uimm5:$imm5), []>;
- // TODO: s/zext.
- def ZEXT32 : I_5_XZ_U<0x15, (outs GPR:$rz),
- (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "zext32",[]>;
- def SEXT32 : I_5_XZ_U<0x16, (outs GPR:$rz),
- (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "sext32", []>;
+ def SEXT32 : I_5_XZ_U<0x16, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "sext32", []>;
+ let isCodeGenOnly = 1 in {
+ def SEXTB32 : I_5_XZ_US<0x16, 0, 7, "sextb32", sext_inreg, i8>;
+ def SEXTH32 : I_5_XZ_US<0x16, 0, 15, "sexth32", sext_inreg, i16>;
+ def ZEXTB32 : I_5_XZ_UZ<0x15, 0, 7, "zextb32", 255>;
+ def ZEXTH32 : I_5_XZ_UZ<0x15, 0, 15, "zexth32", 65535>;
+ }
+ def ZEXT32 : I_5_XZ_U<0x15, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "zext32",[]>;
+
+ let Constraints = "$rZ = $rz" in
+ def INS32 : I_5_XZ_INS<0b010111, (outs GPR:$rz), (ins GPR:$rZ, GPR:$rx, uimm5_msb_size:$msb, uimm5:$lsb), "ins32", []>;
+}
+
+let Predicates = [iHas3E3r1] in {
+def MULTS32 : R_YXZ<0x3e, 0x20, 0x10, (outs GPRPair:$rz),
+ (ins GPR:$rx, GPR:$ry), "mul.s32", []>;
+def MULTU32 : R_YXZ<0x3e, 0x20, 0x00, (outs GPRPair:$rz),
+ (ins GPR:$rx, GPR:$ry), "mul.u32", []>;
+
+let Constraints = "$rZ = $rz" in {
+def MULATS32 : R_YXZ<0x3e, 0x20, 0x14, (outs GPRPair:$rZ),
+ (ins GPRPair:$rz, GPR:$rx, GPR:$ry), "mula.s32", []>;
+def MULATU32 : R_YXZ<0x3e, 0x20, 0x04, (outs GPRPair:$rZ),
+ (ins GPRPair:$rz, GPR:$rx, GPR:$ry), "mula.u32", []>;
+}
+}
+
+def MULSH32 : R_YXZ<0x31, 0b100100, 0b00001, (outs GPR:$rz),
+ (ins GPR:$rx, GPR:$ry), "mulsh32", []>;
//===----------------------------------------------------------------------===//
// Load & Store instructions.
@@ -242,18 +447,35 @@ def LD32B : I_LD<AddrMode32B, 0x0, "ld32.b", uimm12>;
def LD32H : I_LD<AddrMode32H, 0x1, "ld32.h", uimm12_1>;
def LD32W : I_LD<AddrMode32WD, 0x2, "ld32.w", uimm12_2>;
+let OutOperandList = (outs GPRPair:$rz) in
+def LD32D : I_LD<AddrMode32WD, 0x3, "ld32.d", uimm12_2>;
+let Predicates = [iHasE2] in {
def LD32BS : I_LD<AddrMode32B, 0x4, "ld32.bs", uimm12>;
def LD32HS : I_LD<AddrMode32H, 0x5, "ld32.hs", uimm12_1>;
- // TODO: LDM and STM.
+ def LDM32 : I_5_YX<0b110100, 0b000111,
+ (outs), (ins GPR:$rx, regseq:$regs, variable_ops), "ldm32\t$regs, (${rx})", []>;
+ def STM32 : I_5_YX<0b110101, 0b000111,
+ (outs), (ins GPR:$rx, regseq:$regs, variable_ops), "stm32\t$regs, (${rx})", []>;
+ let Size = 4, isCodeGenOnly = 0 in {
+ def LDQ32 : CSKYPseudo<(outs), (ins GPR:$rx, regseq:$regs, variable_ops),
+ "ldq32\t$regs, (${rx})", []>;
+ def STQ32 : CSKYPseudo<(outs), (ins GPR:$rx, regseq:$regs, variable_ops),
+ "stq32\t$regs, (${rx})", []>;
+ }
+
+}
def ST32B : I_ST<AddrMode32B, 0x0, "st32.b", uimm12>;
def ST32H : I_ST<AddrMode32H, 0x1, "st32.h", uimm12_1>;
def ST32W : I_ST<AddrMode32WD, 0x2, "st32.w", uimm12_2>;
+let InOperandList = (ins GPRPair:$rz, GPR:$rx, uimm12_2:$imm12 ) in
+def ST32D : I_ST<AddrMode32WD, 0x3, "st32.d", uimm12_2>;
+let Predicates = [iHas2E3] in {
def LDR32B : I_LDR<0x0, "ldr32.b">;
def LDR32BS : I_LDR<0x4, "ldr32.bs">;
def LDR32H : I_LDR<0x1, "ldr32.h">;
@@ -262,42 +484,100 @@ def ST32W : I_ST<AddrMode32WD, 0x2, "st32.w", uimm12_2>;
def STR32B : I_STR<0x0, "str32.b">;
def STR32H : I_STR<0x1, "str32.h">;
def STR32W : I_STR<0x2, "str32.w">;
+}
+
+// Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+let mayStore = 1 in {
+def SPILL_CARRY : CSKYPseudo<(outs), (ins CARRY:$cond, GPR:$rx, uimm12_2:$imm),
+ "!SPILL_CARRY $cond, $rx, $imm", []>;
+}
+
+// Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in {
+def RESTORE_CARRY : CSKYPseudo<(outs CARRY:$cond), (ins GPR:$rx, uimm12_2:$imm),
+ "!RESTORE_CARRY $cond, $rx, $imm", []>;
+}
- //TODO: SPILL_CARRY and RESTORE_CARRY.
+let mayLoad = 1 in {
+def STORE_PAIR : CSKYPseudo<(outs), (ins GPRPair:$rz, GPR:$rx, uimm12_2:$imm),
+ "!STORE_PAIR $rz, $rx, $imm", []>;
+}
+
+let mayLoad = 1 in {
+def LOAD_PAIR : CSKYPseudo<(outs GPRPair:$rz), (ins GPR:$rx, uimm12_2:$imm),
+ "!LOAD_PAIR $rz, $rx, $imm", []>;
+}
//===----------------------------------------------------------------------===//
// Compare instructions.
//===----------------------------------------------------------------------===//
-
+let Predicates = [iHasE2] in {
def CMPNEI32 : I_16_X<0x1A, "cmpnei32", uimm16>;
def CMPHSI32 : I_16_X<0x18, "cmphsi32", oimm16>;
def CMPLTI32 : I_16_X<0x19, "cmplti32", oimm16>;
-
-
+ def CMPLEI32 : CSKYPseudo<(outs CARRY:$ca), (ins GPR:$rx, uimm16:$imm16),
+ "cmplei32\t$rx, $imm16", []>;
+}
+let Predicates = [iHas2E3] in {
def CMPNE32 : R_YX<0x1, 0x4, "cmpne32">;
def CMPHS32 : R_YX<0x1, 0x1, "cmphs32">;
def CMPLT32 : R_YX<0x1, 0x2, "cmplt32">;
- // TODO: setc and clrc.
- // TODO: test32 and tstnbz.
+ def SETC32 : CSKY32Inst<AddrModeNone, 0x31,
+ (outs CARRY:$ca), (ins), "setc32", []> {
+ let Inst{25 - 21} = 0; //rx
+ let Inst{20 - 16} = 0; //ry
+ let Inst{15 - 10} = 0x1;
+ let Inst{9 - 5} = 0x1;
+ let Inst{4 - 0} = 0;
+ let isCompare = 1;
+ }
+ def CLRC32 : CSKY32Inst<AddrModeNone, 0x31,
+ (outs CARRY:$ca), (ins), "clrc32", []> {
+ let Inst{25 - 21} = 0; //rx
+ let Inst{20 - 16} = 0; //ry
+ let Inst{15 - 10} = 0x1;
+ let Inst{9 - 5} = 0x4;
+ let Inst{4 - 0} = 0;
+ let isCompare = 1;
+ }
+
+ def TST32 : R_YX<0x8, 0x4, "tst32">;
+ def TSTNBZ32 : R_X<0x8, 0x8,
+ (outs CARRY:$ca), (ins GPR:$rx), "tstnbz32", []>;
+}
//===----------------------------------------------------------------------===//
// Data move instructions.
//===----------------------------------------------------------------------===//
+let Predicates= [iHasE2] in {
+ let isCodeGenOnly = 1 in {
def MOVT32 : R_ZX<0x3, 0x2, "movt32", []>;
def MOVF32 : R_ZX<0x3, 0x1, "movf32", []>;
+ }
def MOVI32 : I_16_MOV<0x10, "movi32", uimm16>;
+ let Size = 4, isCodeGenOnly = 0 in
+ def BGENI : CSKYPseudo<(outs GPR:$dst), (ins uimm5:$imm), "bgeni\t$dst, $imm", []>;
+ def : InstAlias<"bgeni16 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
+ def : InstAlias<"bgeni32 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
def MOVIH32 : I_16_MOV<0x11, "movih32", uimm16_16_xform>;
def MVC32 : R_Z_1<0x1, 0x8, "mvc32">;
+ let isCodeGenOnly = 1 in
def MOV32 : R_XZ<0x12, 0x1, "mov32">;
- // TODO: ISEL Pseudo.
+ let usesCustomInserter = 1 in
+ def ISEL32 : CSKYPseudo<(outs GPR:$dst), (ins CARRY:$cond, GPR:$src1, GPR:$src2),
+ "!isel32\t$dst, $src1, src2", [(set GPR:$dst, (select CARRY:$cond, GPR:$src1, GPR:$src2))]>;
+}
+let Predicates = [iHas2E3] in {
def MVCV32 : R_Z_1<0x1, 0x10, "mvcv32">;
- // TODO: clrf and clrt.
def CLRF32 : R_Z_2<0xB, 0x1, "clrf32", []>;
def CLRT32 : R_Z_2<0xB, 0x2, "clrt32", []>;
+}
//===----------------------------------------------------------------------===//
// Branch and call instructions.
@@ -309,12 +589,12 @@ let isBranch = 1, isTerminator = 1 in {
[(br bb:$imm16)]>;
def BT32 : I_16_L<0x3, (outs), (ins CARRY:$ca, br_symbol:$imm16),
- "bt32\t$imm16", [(brcond CARRY:$ca, bb:$imm16)]>;
+ "bt32\t$imm16", [(brcond CARRY:$ca, bb:$imm16)]>, Requires<[iHasE2]>;
def BF32 : I_16_L<0x2, (outs), (ins CARRY:$ca, br_symbol:$imm16),
- "bf32\t$imm16", []>;
+ "bf32\t$imm16", []>, Requires<[iHasE2]>;
}
-
+let Predicates = [iHas2E3] in {
def BEZ32 : I_16_X_L<0x8, "bez32", br_symbol>;
def BNEZ32 : I_16_X_L<0x9, "bnez32", br_symbol>;
def BHZ32 : I_16_X_L<0xA, "bhz32", br_symbol>;
@@ -334,10 +614,25 @@ let isBranch = 1, isTerminator = 1 in {
let isCall = 1, Defs = [ R15 ] , mayLoad = 1 in
def JSRI32: I_16_L<0x17, (outs),
(ins constpool_symbol:$imm16), "jsri32\t$imm16", []>;
+}
+def BNEZAD32 : CSKY32Inst<AddrModeNone, 0x3a,
+ (outs GPR:$rx_u), (ins GPR:$rx, br_symbol:$imm16), "bnezad32\t$rx, $imm16", []> {
+ bits<5> rx;
+ bits<16> imm16;
+ let Inst{25 - 21} = 0x1;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 0} = imm16;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let Constraints = "$rx_u = $rx";
+ let Predicates = [iHas2E3, iHas10E60];
+}
def BSR32 : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>;
+def : InstAlias<"bsr $dst", (BSR32 call_symbol:$dst)>;
+
def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{
let isCodeGenOnly = 1;
let isBranch = 1;
@@ -347,27 +642,310 @@ def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{
let Defs = [ R15 ];
}
-
+let Predicates = [iHasE2], isCodeGenOnly = 1 in {
def RTS32 : I_16_RET<0x6, 0xF, "rts32", [(CSKY_RET)]>;
+}
-def RTE32 : I_16_RET_I<0, 0, "rte32", []>;
-
//===----------------------------------------------------------------------===//
// Symbol address instructions.
//===----------------------------------------------------------------------===//
+def data_symbol_b : data_symbol<"CSKY::fixup_csky_doffset_imm18", 0>;
+def data_symbol_h : data_symbol<"CSKY::fixup_csky_doffset_imm18_scale2", 1>;
+def data_symbol_w : data_symbol<"CSKY::fixup_csky_doffset_imm18_scale4", 2> {
+ let ParserMatchClass = DataAsmClass;
+}
+
+let Predicates = [iHas2E3] in {
+
def GRS32 : I_18_Z_L<0x3, "grs32\t$rz, $offset",
(outs GPR:$rz), (ins bare_symbol:$offset), []>;
+def : InstAlias<"grs\t$rz, $offset", (GRS32 GPR:$rz, bare_symbol:$offset)>;
+
+let Uses = [R28] in {
+def LRS32B : I_18_Z_L<0x0, "lrs32.b\t$rz, $offset",
+ (outs GPR:$rz), (ins data_symbol_b:$offset), []>;
+def LRS32H : I_18_Z_L<0x1, "lrs32.h\t$rz, $offset",
+ (outs GPR:$rz), (ins data_symbol_h:$offset), []>;
+def LRS32W : I_18_Z_L<0x2, "lrs32.w\t$rz, $offset",
+ (outs GPR:$rz), (ins data_symbol_w:$offset), []>;
+def SRS32B : I_18_Z_L<0x4, "srs32.b\t$rz, $offset",
+ (outs), (ins GPR:$rz, data_symbol_b:$offset), []>;
+def SRS32H : I_18_Z_L<0x5, "srs32.h\t$rz, $offset",
+ (outs), (ins GPR:$rz, data_symbol_h:$offset), []>;
+def SRS32W : I_18_Z_L<0x6, "srs32.w\t$rz, $offset",
+ (outs), (ins GPR:$rz, data_symbol_w:$offset), []>;
+}
+
+def PUSH32 : I_12_PP<0b11111, 0b00000, (outs), (ins reglist:$regs, variable_ops), "push32 $regs">;
+
+let Uses = [R14, R15], isReturn = 1, isTerminator = 1, isBarrier = 1 in
+def POP32 : I_12_PP<0b11110, 0b00000, (outs), (ins reglist:$regs, variable_ops), "pop32 $regs">;
+
+}
let mayLoad = 1, mayStore = 0 in {
def LRW32 : I_16_Z_L<0x14, "lrw32", (ins constpool_symbol:$imm16), []>;
let isCodeGenOnly = 1 in
-def LRW32_Gen : I_16_Z_L<0x14, "lrw32",
- (ins bare_symbol:$src1, constpool_symbol:$imm16), []>;
+def LRW32_Gen : I_16_Z_L<0x14, "lrw32", (ins bare_symbol:$src1, constpool_symbol:$imm16), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic and fence instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [iHasMP1E2] in {
+ def BRWARW : BAR<0b01111, "bar.brwarw", 0>;
+ def BRWARWS : BAR<0b01111, "bar.brwarws", 1>;
+ def BRARW : BAR<0b00111, "bar.brarw", 0>;
+ def BRARWS : BAR<0b00111, "bar.brarws", 1>;
+ def BRWAW : BAR<0b01110, "bar.brwaw", 0>;
+ def BRWAWS : BAR<0b01110, "bar.brwaws", 1>;
+ def BRAR : BAR<0b00101, "bar.brar", 0>;
+ def BRARS : BAR<0b00101, "bar.brars", 1>;
+ def BWAW : BAR<0b01010, "bar.bwaw", 0>;
+ def BWAWS : BAR<0b01010, "bar.bwaws", 1>;
+
+ def LDEX32W : I_LD<AddrMode32WD, 0x7, "ldex32.w", uimm12_2>;
+ let Constraints = "$rd = $rz" in
+ def STEX32W : I_LDST<AddrMode32WD, 0x37, 7,
+ (outs GPR:$rd), (ins GPR:$rz, GPR:$rx, uimm12_2:$imm12), "stex32.w", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Other operation instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [iHas2E3] in {
+ def BREV32 : R_XZ<0x18, 0x10, "brev32">;
+ def ABS32 : R_XZ<0x0, 0x10, "abs32">;
+ def BGENR32 : R_XZ<0x14, 0x2, "bgenr32">;
+}
+
+let Predicates = [iHasE2] in {
+ def REVB32 : R_XZ<0x18, 0x4, "revb32">;
+ def REVH32 : R_XZ<0x18, 0x8, "revh32">;
+ def FF0 : R_XZ<0x1F, 0x1, "ff0.32">;
+ def FF1 : R_XZ<0x1F, 0x2, "ff1.32">;
+ def XTRB0 : R_XZ<0x1C, 0x1, "xtrb0.32">;
+ def XTRB1 : R_XZ<0x1C, 0x2, "xtrb1.32">;
+ def XTRB2 : R_XZ<0x1C, 0x4, "xtrb2.32">;
+ def XTRB3 : R_XZ<0x1C, 0x8, "xtrb3.32">;
+ def BTSTI32 : I_5_X<0x0A, 0x4, "btsti32", uimm5, []>;
+ def BCLRI32 : I_5_XZ<0xA, 0x1, "bclri32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
+ def BSETI32 : I_5_XZ<0xA, 0x2, "bseti32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instructions.
+//===----------------------------------------------------------------------===//
+
+def MFFCR : CSKY32Inst<AddrModeNone, 0x30,
+ (outs GPR:$rx), (ins), "mfcr\t$rx, fcr", []> {
+ bits<5> rx;
+
+ let Inst{25 - 21} = 0b00010;
+ let Inst{20 - 16} = 0b00001;
+ let Inst{15 - 10} = 0b011000;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = rx;
+ let hasSideEffects = 1;
+ let isCodeGenOnly = 1;
+}
+
+def MTFCR : CSKY32Inst<AddrModeNone, 0x30,
+ (outs), (ins GPR:$rx), "mtcr\t$rx, fcr", []> {
+ bits<5> rx;
+
+ let Inst{25 - 21} = 0b00010;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = 0b011001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0b00001;
+ let hasSideEffects = 1;
+ let isCodeGenOnly = 1;
+}
+
+def SYNC32 : I_5_IMM5<0x30, 0b000001, 0b00001, "sync32", uimm5, []>;
+
+def SYNC0_32 : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32", []> {
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+def SYNC_32_I : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32.i", []> {
+ let Inst{25 - 21} = 1;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+def SYNC_32_S : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32.s", []> {
+ let Inst{25 - 21} = 0b10000;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+def SYNC_32_IS : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32.is", []> {
+ let Inst{25 - 21} = 0b10001;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
}
-// TODO: Atomic and fence instructions.
-// TODO: Other operations.
-// TODO: Special instructions.
-// TODO: Pseudo for assembly.
+let Predicates = [iHas2E3] in {
+ def RFI32 : I_5_XZ_PRIVI<0x11, 0x1, "rfi32">;
+ def SCE32 : I_5_IMM5<0x30, 0b000110, 0b00001, "sce32", uimm4, []>;
+}
+let Predicates = [HasExtendLrw] in
+def IDLY32 : I_5_IMM5<0x30, 0b000111, 0b00001, "idly32", imm5_idly, []>;
+def STOP32 : I_5_XZ_PRIVI<0x12, 0x1, "stop32">;
+def WAIT32 : I_5_XZ_PRIVI<0x13, 0x1, "wait32">;
+def DOZE32 : I_5_XZ_PRIVI<0x14, 0x1, "doze32">;
+def WE32 : I_5_XZ_PRIVI<0b010101, 0x1, "we32">;
+def SE32 : I_5_XZ_PRIVI<0b010110, 0x1, "se32">;
+def WSC32 : I_5_XZ_PRIVI<0b001111, 0x1, "wsc32">;
+
+def CPOP32 : I_CPOP<(outs), (ins uimm5:$cpid, uimm20:$usdef), "cpop32 <$cpid, ${usdef}>">;
+def CPRC32 : I_CP<0b0100, (outs CARRY:$ca), (ins uimm5:$cpid, uimm12:$usdef), "cprc32 <$cpid, ${usdef}>">;
+def CPRCR32 : I_CP_Z<0b0010, (outs GPR:$rz), (ins uimm5:$cpid, uimm12:$usdef), "cprcr32 $rz, <$cpid, ${usdef}>">;
+def CPRGR32 : I_CP_Z<0b0000, (outs GPR:$rz), (ins uimm5:$cpid, uimm12:$usdef), "cprgr32 $rz, <$cpid, ${usdef}>">;
+def CPWCR32 : I_CP_Z<0b0011, (outs), (ins GPR:$rz, uimm5:$cpid, uimm12:$usdef), "cpwcr32 $rz, <$cpid, ${usdef}>">;
+def CPWGR32 : I_CP_Z<0b0001, (outs), (ins GPR:$rz, uimm5:$cpid, uimm12:$usdef), "cpwgr32 $rz, <$cpid, ${usdef}>">;
+
+let Predicates = [iHas3r2E3r3] in {
+def DCACHE_IALL32 : I_5_CACHE<0b100101, 0b01000, "dcache32.iall">;
+def DCACHE_CALL32 : I_5_CACHE<0b100101, 0b00100, "dcache32.call">;
+def DCACHE_CIALL32 : I_5_CACHE<0b100101, 0b01100, "dcache32.ciall">;
+def DCACHE_IVA32 : I_5_X_CACHE<0b100101, 0b01011, "dcache32.iva">;
+def DCACHE_ISW32: I_5_X_CACHE<0b100101, 0b01010, "dcache32.isw">;
+def DCACHE_CVA32 : I_5_X_CACHE<0b100101, 0b00111, "dcache32.cva">;
+def DCACHE_CVAL32 : I_5_X_CACHE<0b100101, 0b10111, "dcache32.cval1">;
+def DCACHE_CSW32 : I_5_X_CACHE<0b100101, 0b00110, "dcache32.csw">;
+def DCACHE_CIVA32 : I_5_X_CACHE<0b100101, 0b01111, "dcache32.civa">;
+def DCACHE_CISW32 : I_5_X_CACHE<0b100101, 0b01110, "dcache32.cisw">;
+
+def ICACHE_IALL32 : I_5_CACHE<0b100100, 0b01000, "icache32.iall">;
+def ICACHE_IALLS32 : I_5_CACHE<0b100100, 0b11000, "icache32.ialls">;
+def ICACHE_IVA32 : I_5_X_CACHE<0b100100, 0b01011, "icache32.iva">;
+
+def TLBI_VAA32 : I_5_X_CACHE<0b100010, 0b00010, "tlbi32.vaa">;
+def TLBI_VAAS32 : I_5_X_CACHE<0b100010, 0b10010, "tlbi32.vaas">;
+def TLBI_ASID32 : I_5_X_CACHE<0b100010, 0b00001, "tlbi32.asid">;
+def TLBI_ASIDS32 : I_5_X_CACHE<0b100010, 0b10001, "tlbi32.asids">;
+def TLBI_VA32 : I_5_X_CACHE<0b100010, 0b00011, "tlbi32.va">;
+def TLBI_VAS32 : I_5_X_CACHE<0b100010, 0b10011, "tlbi32.vas">;
+def TLBI_ALL32 : I_5_CACHE<0b100010, 0b00000, "tlbi32.all">;
+def TLBI_ALLS32 : I_5_CACHE<0b100010, 0b10000, "tlbi32.alls">;
+
+def L2CACHE_IALL : I_5_CACHE<0b100110, 0b01000, "l2cache.iall">;
+def L2CACHE_CALL : I_5_CACHE<0b100110, 0b00100, "l2cache.call">;
+def L2CACHE_CIALL : I_5_CACHE<0b100110, 0b01100, "l2cache.ciall">;
+}
+
+def PLDR32 :I_PLDR<AddrMode32WD, 0x36, 0b0110, (outs), (ins GPR:$rx, uimm12_2:$imm12), "pldr32", []>;
+def PLDW32 :I_PLDR<AddrMode32WD, 0x37, 0b0110, (outs), (ins GPR:$rx, uimm12_2:$imm12), "pldw32", []>;
+
+def TRAP32 : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins uimm2:$imm2), "trap32 ${imm2}", []> {
+ bits<2> imm2;
+
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 12} = 0b0010;
+ let Inst{11 - 10} = imm2;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo for assembly
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, Defs = [ R15 ], mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
+def JBSR32 : CSKYPseudo<(outs), (ins call_symbol:$src1), "jbsr32\t$src1", []>;
+
+def : InstAlias<"jbsr\t$src1", (JBSR32 call_symbol:$src1)>;
+
+def JBR32 : CSKYPseudo<(outs), (ins br_symbol:$src1), "jbr32\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 4;
+}
+
+def JBT32 : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "jbt32\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 4;
+}
+
+def JBF32 : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "jbf32\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 4;
+}
+
+def JBT_E : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "!jbt_e\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 6;
+}
+
+def JBF_E : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "!jbf_e\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 6;
+}
+
+let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
+def PseudoLRW32 : CSKYPseudo<(outs GPR:$rz), (ins bare_symbol:$src), "lrw32 $rz, $src", []>;
+
+
+def : InstAlias<"lrw $rz, $src", (PseudoLRW32 GPR:$rz, bare_symbol:$src)>;
+def : InstAlias<"lrw $rz, $src", (LRW32 GPR:$rz, constpool_symbol:$src)>;
+
+let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
+def PseudoJSRI32 : CSKYPseudo<(outs), (ins call_symbol:$src), "jsri32 $src", []>;
+def : InstAlias<"jsri $dst", (PseudoJSRI32 call_symbol:$dst)>;
+def : InstAlias<"jsri $dst", (JSRI32 constpool_symbol:$dst)>;
+
+let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
+def PseudoJMPI32 : CSKYPseudo<(outs), (ins br_symbol:$src), "jmpi32 $src", []>;
+def : InstAlias<"jmpi $dst", (PseudoJMPI32 br_symbol:$dst)>;
+def : InstAlias<"jmpi $dst", (JMPI32 constpool_symbol:$dst)>;
+
+let isNotDuplicable = 1, mayLoad = 1, mayStore = 0, Size = 8 in
+def PseudoTLSLA32 : CSKYPseudo<(outs GPR:$dst1, GPR:$dst2),
+ (ins constpool_symbol:$src, i32imm:$label), "!tlslrw32\t$dst1, $dst2, $src, $label", []>;
+
+let hasSideEffects = 0, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY : CSKYPseudo<(outs),
+ (ins i32imm:$instid, i32imm:$cpidx, i32imm:$size), "", []>;
+
+include "CSKYInstrInfo16Instr.td"
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
new file mode 100644
index 000000000000..c98f43622155
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -0,0 +1,452 @@
+//===-- CSKYInstrInfo16Instr.td - CSKY 16-bit Instruction --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY 16-bit instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CSKY specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Target-dependent nodes.
+def CSKY_NIE : SDNode<"CSKYISD::NIE", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def CSKY_NIR : SDNode<"CSKYISD::NIR", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def br_symbol_16bit : Operand<iPTR> {
+ let EncoderMethod =
+ "getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm10_scale2>";
+ let ParserMatchClass = CSKYSymbol;
+ let DecoderMethod = "decodeSImmOperand<10, 1>";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def SPOperand : AsmOperandClass {
+ let Name = "SPOperand";
+ let RenderMethod = "addRegOperands";
+ let DiagnosticType = !strconcat("Invalid", Name);
+}
+
+def SPOp : RegisterOperand<GPR> {
+ let ParserMatchClass = SPOperand;
+}
+
+def constpool_symbol_16bit : Operand<iPTR> {
+ let ParserMatchClass = Constpool;
+ let EncoderMethod =
+ "getConstpoolSymbolOpValue<CSKY::fixup_csky_pcrel_uimm7_scale4>";
+ let DecoderMethod = "decodeLRW16Imm8";
+ let PrintMethod = "printConstpool";
+ let OperandType = "OPERAND_PCREL";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Formats
+//===----------------------------------------------------------------------===//
+
+include "CSKYInstrFormats16Instr.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Basic ALU instructions.
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAdd = 1 in
+ def ADDU16 : R16_XYZ<0, "addu16", add>;
+let Pattern = [(set mGPR:$rz, (sub mGPR:$rx, mGPR:$ry))] in
+ def SUBU16 : R16_XYZ<1, "subu16", sub>;
+
+let isCommutable = 1, isAdd = 1 in
+ def ADDC16 : R16_XZ_BINOP_C<0b1000, 0b01, "addc16">;
+def SUBC16 : R16_XZ_BINOP_C<0b1000, 0b11, "subc16">;
+
+let isCommutable = 1 in {
+ let isAdd = 1 in
+ def ADDU16XZ : R16_XZ_BINOP<0b1000, 0b00, "addu16", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+ def AND16 : R16_XZ_BINOP<0b1010, 0b00, "and16", BinOpFrag<(and node:$LHS, node:$RHS)>>;
+ def OR16 : R16_XZ_BINOP<0b1011, 0b00, "or16", BinOpFrag<(or node:$LHS, node:$RHS)>>;
+ def XOR16 : R16_XZ_BINOP<0b1011, 0b01, "xor16", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+ def NOR16 : R16_XZ_BINOP<0b1011, 0b10, "nor16", BinOpFrag<(not (or node:$LHS, node:$RHS))>>;
+ let isCodeGenOnly = 1 in
+ def NOT16 : R16_XZ_UNOP<0b1011, 0b10, "not16">;
+ def MULT16 : R16_XZ_BINOP<0b1111, 0b00, "mult16", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+}
+def SUBU16XZ : R16_XZ_BINOP<0b1000, 0b10, "subu16", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+def ANDN16 : R16_XZ_BINOP<0b1010, 0b01, "andn16", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+def LSL16 : R16_XZ_BINOP<0b1100, 0b00, "lsl16", BinOpFrag<(shl node:$LHS, node:$RHS)>>;
+def LSR16 : R16_XZ_BINOP<0b1100, 0b01, "lsr16", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
+def ASR16 : R16_XZ_BINOP<0b1100, 0b10, "asr16", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
+def ROTL16 : R16_XZ_BINOP<0b1100, 0b11, "rotl16", BinOpFrag<(rotl node:$LHS, (and node:$RHS, 0x1f))>>;
+
+def MULSH16 : R16_XZ_BINOP_NOPat<0b1111, 0b01, "mulsh16">;
+
+def ZEXTB16 : R16_XZ_UNOP<0b1101, 0b00, "zextb16">;
+def ZEXTH16 : R16_XZ_UNOP<0b1101, 0b01, "zexth16">;
+def SEXTB16 : R16_XZ_UNOP<0b1101, 0b10, "sextb16">;
+def SEXTH16 : R16_XZ_UNOP<0b1101, 0b11, "sexth16">;
+
+let Constraints = "$rZ = $rz", isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ let isAdd = 1, Pattern = [(set mGPR:$rz, (add mGPR:$rZ, oimm8:$imm8))] in
+ def ADDI16 : I16_Z_8<0b100, (ins mGPR:$rZ, oimm8:$imm8), "addi16\t$rz, $imm8">;
+ let Pattern = [(set mGPR:$rz, (sub mGPR:$rZ, oimm8:$imm8))] in
+ def SUBI16 : I16_Z_8<0b101, (ins mGPR:$rZ, oimm8:$imm8), "subi16\t$rz, $imm8">;
+}
+
+let isAdd = 1 in
+def ADDI16ZSP : I16_Z_8<0b011, (ins SPOp:$sp, uimm8_2:$imm8),
+ "addi16\t$rz, $sp, $imm8">;
+
+let isAdd = 1 in
+def ADDI16SPSP : I16_SP_IMM7<0b000,"addi16">;
+def SUBI16SPSP : I16_SP_IMM7<0b001,"subi16">;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def LSLI16 : I16_XZ_IMM5<0, "lsli16", shl>;
+ def LSRI16 : I16_XZ_IMM5<1, "lsri16", srl>;
+ def ASRI16 : I16_XZ_IMM5<2, "asri16", sra>;
+}
+
+let isAdd = 1 in
+def ADDI16XZ : I16_XZ_IMM3<0b10, "addi16", add>;
+def SUBI16XZ : I16_XZ_IMM3<0b11, "subi16", sub>;
+
+let Size = 4 in
+def NEG16 : CSKYPseudo<(outs mGPR:$rd), (ins mGPR:$rx), "neg16 $rd, $rx", []>;
+
+let Size = 4 in
+def RSUBI16 : CSKYPseudo<(outs mGPR:$rd),
+ (ins mGPR:$rx, uimm8:$imm8), "rsubi16 $rd, $rx, $imm8", []>;
+
+//===----------------------------------------------------------------------===//
+// Load & Store instructions.
+//===----------------------------------------------------------------------===//
+
+def LD16B : I16_XZ_LDST<AddrMode16B, 0b000, "ld16.b",
+ (outs mGPR:$rz), (ins mGPR:$rx, uimm5:$imm)>;
+def LD16H : I16_XZ_LDST<AddrMode16H, 0b001, "ld16.h",
+ (outs mGPR:$rz), (ins mGPR:$rx, uimm5_1:$imm)>;
+def LD16W : I16_XZ_LDST<AddrMode16W, 0b010, "ld16.w",
+ (outs mGPR:$rz), (ins mGPR:$rx, uimm5_2:$imm)>;
+def ST16B : I16_XZ_LDST<AddrMode16B, 0b100, "st16.b",
+ (outs), (ins mGPR:$rz, mGPR:$rx, uimm5:$imm)>;
+def ST16H : I16_XZ_LDST<AddrMode16H, 0b101, "st16.h",
+ (outs), (ins mGPR:$rz, mGPR:$rx, uimm5_1:$imm)>;
+def ST16W : I16_XZ_LDST<AddrMode16W, 0b110, "st16.w",
+ (outs), (ins mGPR:$rz, mGPR:$rx, uimm5_2:$imm)>;
+
+def LD16WSP : I16_ZSP_LDST<AddrMode16W, 0b011, "ld16.w",
+ (outs mGPR:$rz), (ins SPOp:$sp, uimm8_2:$addr)>;
+def ST16WSP : I16_ZSP_LDST<AddrMode16W, 0b111, "st16.w",
+ (outs), (ins mGPR:$rz, SPOp:$sp, uimm8_2:$addr)>;
+
+//===----------------------------------------------------------------------===//
+// Compare instructions.
+//===----------------------------------------------------------------------===//
+
+def CMPHS16 : R16_XY_CMP<0, "cmphs16">;
+def CMPLT16 : R16_XY_CMP<1, "cmplt16">;
+let isCommutable = 1 in
+def CMPNE16 : R16_XY_CMP<2, "cmpne16">;
+
+
+def CMPHSI16 : I16_X_CMP<0, "cmphsi16", oimm5>;
+def CMPLTI16 : I16_X_CMP<1, "cmplti16", oimm5>;
+def CMPLEI16 : CSKYPseudo<(outs CARRY:$ca), (ins mGPR:$rx, uimm5:$imm5),
+ "cmplei16\t$rx, $imm5", []>;
+def CMPNEI16 : I16_X_CMP<2, "cmpnei16", uimm5>;
+
+//===----------------------------------------------------------------------===//
+// Data move instructions.
+//===----------------------------------------------------------------------===//
+
+
+def MOVI16 : I16_Z_8<0b110, (ins uimm8:$imm8), "movi16\t$rz, $imm8"> {
+ let isReMaterializable = 1;
+ let isAsCheapAsAMove = 1;
+ let isMoveImm = 1;
+ let Pattern = [(set mGPR:$rz, uimm8:$imm8)];
+}
+
+def MOV16 : CSKY16Inst<AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rx),
+ "mov16\t$rz, $rx", []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1011;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1,0} = 0b11;
+}
+
+// MVC16 is not in "cskyv2 instructions reference manul"
+def MVCV16 : CSKY16Inst<AddrModeNone,
+ (outs sGPR:$rz), (ins CARRY:$ca), "mvcv16\t$rz", []> {
+ bits<4> rz;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = 0;
+ let Inst{1,0} = 0b11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Branch and call instructions.
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+ let isBarrier = 1, isPredicable = 1 in
+ def BR16 : J16<1, "br16", (ins br_symbol_16bit:$offset)>;
+
+ def BT16 : J16_B<2, "bt16">;
+ def BF16 : J16_B<3, "bf16">;
+}
+
+def JMP16 : R16_X_J<0b11100000, 0b00, "jmp16"> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isIndirectBranch = 1;
+ let Pattern = [(brind sGPR:$rx)];
+}
+
+def JSR16 : R16_X_J<0b11101111, 0b01, "jsr16"> {
+ let isCall = 1;
+ let Defs = [ R15 ];
+}
+
+def RTS16 : CSKY16Inst<AddrModeNone, (outs), (ins), "rts16", [(CSKY_RET)]> {
+ let isTerminator = 1;
+ let isReturn = 1;
+ let isBarrier = 1;
+ let Inst = 0b0111100000111100;
+ let Uses = [R15];
+ let isCodeGenOnly = 1;
+}
+
+def JMPIX16 : CSKY16Inst<AddrModeNone, (outs),
+ (ins mGPR:$rx, uimm2_jmpix:$indeximm2), "jmpix16\t$rx, $indeximm2", []> {
+ bits<3> rx;
+ bits<2> indeximm2;
+ let Inst{15,14} = 0b00;
+ let Inst{13 - 11} = 0b111;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 2} = 0b111000;
+ let Inst{1,0} = indeximm2;
+ let Predicates = [HasJAVA];
+ let Uses = [R30];
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol address instructions.
+//===----------------------------------------------------------------------===//
+
+def LRW16 : CSKY16Inst<AddrModeNone, (outs mGPR:$rz),
+ (ins constpool_symbol_16bit:$label), "lrw16\t$rz, $label", []> {
+ bits<3> rz;
+ bits<8> label;
+ let Inst{15 - 13} = 0b000;
+ let Inst{12} = label{7};
+ let Inst{11,10} = 0b00;
+ let Inst{9,8} = label{6,5};
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = label{4-0};
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+def LRW16_Gen : CSKY16Inst<AddrModeNone, (outs mGPR:$rz),
+ (ins bare_symbol:$src, constpool_symbol_16bit:$label),
+ "lrw16\t$rz, $label", []> {
+ bits<3> rz;
+ bits<8> label;
+ let Inst{15 - 13} = 0b000;
+ let Inst{12} = label{7};
+ let Inst{11,10} = 0b00;
+ let Inst{9,8} = label{6,5};
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = label{4-0};
+ let mayLoad = 1;
+ let mayStore = 0;
+ let isCodeGenOnly = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other operation instructions.
+//===----------------------------------------------------------------------===//
+
+def REVB16 : R16_XZ_UNOP<0b1110, 0b10, "revb16">;
+def REVH16 : R16_XZ_UNOP<0b1110, 0b11, "revh16">;
+
+let isCodeGenOnly = 1 in
+def SETC16 : CSKY16Inst<AddrModeNone,
+ (outs CARRY:$ca), (ins), "setc16", []> {
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = 0;
+ let Inst{5 - 2} = 0;
+ let Inst{1, 0} = 0;
+ let isCompare = 1;
+}
+
+let isCodeGenOnly = 1 in
+def CLRC16 : CSKY16Inst<AddrModeNone,
+ (outs CARRY:$ca), (ins), "clrc16", []> {
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = 0;
+ let Inst{5 - 2} = 0;
+ let Inst{1, 0} = 2;
+ let isCompare = 1;
+}
+
+let Constraints = "$rZ = $rz" in {
+ def BCLRI16 : I16_Z_5<0b100, (outs mGPR:$rz), (ins mGPR:$rZ, uimm5:$imm5),
+ "bclri16">;
+ def BSETI16 : I16_Z_5<0b101, (outs mGPR:$rz), (ins mGPR:$rZ, uimm5:$imm5),
+ "bseti16">;
+}
+
+let Predicates = [HasBTST16] in
+ def BTSTI16 : I16_Z_5<0b110, (outs CARRY:$ca), (ins mGPR:$rz, uimm5:$imm5),
+ "btsti16">;
+
+def TST16 : CSKY16Inst<AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx, sGPR:$ry),
+ "tst16\t$rx, $ry", []> {
+ bits<4> ry;
+ bits<4> rx;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1010;
+ let Inst{9 - 6} = ry;
+ let Inst{5 - 2} = rx;
+ let Inst{1,0} = 0b10;
+ let isCompare = 1;
+}
+
+def TSTNBZ16 : CSKY16Inst<AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx),
+ "tstnbz16\t$rx", []> {
+ bits<4> rx;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1010;
+ let Inst{9 - 6} = 0b0000;
+ let Inst{5 - 2} = rx;
+ let Inst{1,0} = 0b11;
+ let isCompare = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instructions.
+//===----------------------------------------------------------------------===//
+
+def BKPT : CSKY16Inst<AddrModeNone, (outs), (ins), "bkpt", []> {
+ let Inst = 0;
+}
+
+let mayStore = 1 in {
+def BPUSHH : I16_BPushPop<0b00010100111, 0, (outs), (ins mGPR:$rz), "bpush.h $rz">;
+def BPUSHW : I16_BPushPop<0b00010100111, 0b10, (outs), (ins mGPR:$rz), "bpush.w $rz">;
+}
+
+let mayLoad = 1 in {
+def BPOPH : I16_BPushPop<0b00010100101, 0, (outs mGPR:$rz), (ins), "bpop.h $rz">;
+def BPOPW : I16_BPushPop<0b00010100101, 0b10, (outs mGPR:$rz), (ins), "bpop.w $rz">;
+}
+
+def NIE : CSKY16Inst<AddrModeNone, (outs), (ins), "nie", [(CSKY_NIE)]> {
+ let Inst = 0b0001010001100000;
+}
+
+let isBarrier = 1, isReturn = 1, isTerminator = 1 in
+def NIR : CSKY16Inst<AddrModeNone, (outs), (ins), "nir", [(CSKY_NIR)]> {
+ let Inst = 0b0001010001100001;
+}
+
+def IPUSH16 : CSKY16Inst<AddrModeNone, (outs), (ins), "ipush16", []> {
+ let Inst{15- 5} = 0b00010100011;
+ let Inst{4-0} = 0b00010;
+ let Predicates = [iHasE1];
+ let Defs = [R14];
+ let Uses = [R14, R0, R1, R2, R3, R12, R13];
+ let mayStore = 1;
+}
+
+def IPOP16 : CSKY16Inst<AddrModeNone, (outs), (ins), "ipop16", []> {
+ let Inst{15- 5} = 0b00010100011;
+ let Inst{4-0} = 0b00011;
+ let Predicates = [iHasE1];
+ let Defs = [R14, R0, R1, R2, R3, R12, R13];
+ let Uses = [R14];
+ let mayLoad = 1;
+}
+
+def PUSH16 : CSKY16Inst<AddrModeNone, (outs),
+ (ins reglist:$regs, variable_ops), "push16 $regs", []> {
+ bits<5> regs;
+
+ let Inst{15- 5} = 0b00010100110;
+ let Inst{4-0} = regs;
+ let Predicates = [iHasE1];
+ let Defs = [R14];
+ let Uses = [R14];
+ let mayStore = 1;
+}
+
+def POP16 : CSKY16Inst<AddrModeNone, (outs),
+ (ins reglist:$regs, variable_ops), "pop16 $regs", []> {
+ bits<5> regs;
+
+ let Inst{15- 5} = 0b00010100100;
+ let Inst{4-0} = regs;
+ let Predicates = [iHasE1];
+ let Defs = [R14];
+ let Uses = [R14];
+ let mayLoad = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// CSKYPseudo
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+ def ISEL16 : CSKYPseudo<(outs sGPR:$dst),
+ (ins CARRY:$cond, sGPR:$src1, sGPR:$src2),
+ "!isel16\t$dst, $src1, src2",
+ [(set sGPR:$dst, (select CARRY:$cond, sGPR:$src1, sGPR:$src2))]>;
+}
+
+class JBranchPseudo<dag out, dag ins, string opstr> :
+ CSKYPseudo<out, ins, opstr, []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 2;
+}
+
+let isBarrier = 1 in
+def JBR16 : JBranchPseudo<(outs),
+ (ins br_symbol_16bit:$src1), "jbr16\t$src1">;
+def JBT16 : JBranchPseudo<(outs),
+ (ins CARRY:$ca, br_symbol_16bit:$src1), "jbt16\t$src1">;
+def JBF16 : JBranchPseudo<(outs),
+ (ins CARRY:$ca, br_symbol_16bit:$src1), "jbf16\t$src1">;
+
+let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
+def PseudoLRW16 : CSKYPseudo<(outs mGPR:$rz),
+ (ins bare_symbol:$src), "lrw16 $rz, $src", []>;
diff --git a/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp b/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
new file mode 100644
index 000000000000..c42a56bfb04e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
@@ -0,0 +1,117 @@
+//===-- CSKYMCInstLower.cpp - Convert CSKY MachineInstr to an MCInst --------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower CSKY MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYMCInstLower.h"
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+
+#define DEBUG_TYPE "csky-mcinst-lower"
+
+using namespace llvm;
+
+CSKYMCInstLower::CSKYMCInstLower(MCContext &Ctx, AsmPrinter &Printer)
+ : Ctx(Ctx), Printer(Printer) {}
+
+void CSKYMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (const MachineOperand &MO : MI->operands()) {
+ MCOperand MCOp;
+ if (lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
+
+MCOperand CSKYMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ CSKYMCExpr::VariantKind Kind;
+ MCContext &Ctx = Printer.OutContext;
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag.");
+ case CSKYII::MO_None:
+ Kind = CSKYMCExpr::VK_CSKY_None;
+ break;
+ case CSKYII::MO_GOT32:
+ Kind = CSKYMCExpr::VK_CSKY_GOT;
+ break;
+ case CSKYII::MO_GOTOFF:
+ Kind = CSKYMCExpr::VK_CSKY_GOTOFF;
+ break;
+ case CSKYII::MO_ADDR32:
+ Kind = CSKYMCExpr::VK_CSKY_ADDR;
+ break;
+ case CSKYII::MO_PLT32:
+ Kind = CSKYMCExpr::VK_CSKY_PLT;
+ break;
+ case CSKYII::MO_ADDR_HI16:
+ Kind = CSKYMCExpr::VK_CSKY_ADDR_HI16;
+ break;
+ case CSKYII::MO_ADDR_LO16:
+ Kind = CSKYMCExpr::VK_CSKY_ADDR_LO16;
+ break;
+ }
+ const MCExpr *ME =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (Kind != CSKYMCExpr::VK_CSKY_None)
+ ME = CSKYMCExpr::create(ME, Kind, Ctx);
+
+ return MCOperand::createExpr(ME);
+}
+
+bool CSKYMCInstLower::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_RegisterMask:
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_Register:
+ if (MO.isImplicit())
+ return false;
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, Printer.getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(
+ MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = lowerSymbolOperand(
+ MO, Printer.GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_MCSymbol:
+ MCOp = lowerSymbolOperand(MO, MO.getMCSymbol());
+ break;
+ }
+ return true;
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYMCInstLower.h b/llvm/lib/Target/CSKY/CSKYMCInstLower.h
new file mode 100644
index 000000000000..ea76bd129d30
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYMCInstLower.h
@@ -0,0 +1,35 @@
+//===-- CSKYMCInstLower.cpp - Convert CSKY MachineInstr to an MCInst --------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYMCINSTLOWER_H
+#define LLVM_LIB_TARGET_CSKY_CSKYMCINSTLOWER_H
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+class MachineInstr;
+class MCInst;
+class MachineOperand;
+class MCOperand;
+class MCSymbol;
+
+class CSKYMCInstLower {
+ MCContext &Ctx;
+ AsmPrinter &Printer;
+
+public:
+ CSKYMCInstLower(MCContext &Ctx, AsmPrinter &Printer);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+ MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYMCINSTLOWER_H
diff --git a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
new file mode 100644
index 000000000000..b6e303f8ccfb
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
@@ -0,0 +1,62 @@
+//=- CSKYMachineFunctionInfo.h - CSKY machine function info -------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares CSKY-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_CSKY_CSKYMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class CSKYMachineFunctionInfo : public MachineFunctionInfo {
+ MachineFunction &MF;
+
+ Register GlobalBaseReg = 0;
+ bool SpillsCR = false;
+
+ int VarArgsFrameIndex = 0;
+ unsigned VarArgsSaveSize = 0;
+
+ int spillAreaSize = 0;
+
+ bool LRSpilled = false;
+
+ unsigned PICLabelUId = 0;
+
+public:
+ CSKYMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
+
+ Register getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; }
+ void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; }
+
+ bool isLRSpilled() const { return LRSpilled; }
+ void setLRIsSpilled(bool s) { LRSpilled = s; }
+
+ void setCalleeSaveAreaSize(int v) { spillAreaSize = v; }
+ int getCalleeSaveAreaSize() const { return spillAreaSize; }
+
+ unsigned createPICLabelUId() { return ++PICLabelUId; }
+ void initPICLabelUId(unsigned UId) { PICLabelUId = UId; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYMACHINEFUNCTIONINFO_H
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
new file mode 100644
index 000000000000..a1d45fea534b
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
@@ -0,0 +1,95 @@
+//===-- CSKYRegisterInfo.h - CSKY Register Information Impl ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYRegisterInfo.h"
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCContext.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "CSKYGenRegisterInfo.inc"
+
+using namespace llvm;
+
+CSKYRegisterInfo::CSKYRegisterInfo()
+ : CSKYGenRegisterInfo(CSKY::R15, 0, 0, 0) {}
+
+const uint32_t *
+CSKYRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID Id) const {
+ const CSKYSubtarget &STI = MF.getSubtarget<CSKYSubtarget>();
+ return CSR_I32_RegMask;
+}
+
+Register CSKYRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = getFrameLowering(MF);
+ return TFI->hasFP(MF) ? CSKY::R8 : CSKY::R14;
+}
+
+BitVector CSKYRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const CSKYFrameLowering *TFI = getFrameLowering(MF);
+ const CSKYSubtarget &STI = MF.getSubtarget<CSKYSubtarget>();
+ BitVector Reserved(getNumRegs());
+
+ // Reserve the base register if we need to allocate
+ // variable-sized objects at runtime.
+ if (TFI->hasBP(MF))
+ markSuperRegs(Reserved, CSKY::R7); // bp
+
+ if (TFI->hasFP(MF))
+ markSuperRegs(Reserved, CSKY::R8); // fp
+
+ if (!STI.hasE2()) {
+ for (unsigned i = 0; i < 6; i++)
+ markSuperRegs(Reserved, CSKY::R8 + i); // R8 - R13
+ }
+
+ markSuperRegs(Reserved, CSKY::R14); // sp
+ markSuperRegs(Reserved, CSKY::R15); // lr
+
+ if (!STI.hasHighRegisters()) {
+ for (unsigned i = 0; i < 10; i++)
+ markSuperRegs(Reserved, CSKY::R16 + i); // R16 - R25
+ }
+
+ markSuperRegs(Reserved, CSKY::R26);
+ markSuperRegs(Reserved, CSKY::R27);
+ markSuperRegs(Reserved, CSKY::R28); // gp
+ markSuperRegs(Reserved, CSKY::R29);
+ markSuperRegs(Reserved, CSKY::R30);
+ markSuperRegs(Reserved, CSKY::R31); // tp
+
+ assert(checkAllSuperRegsMarked(Reserved));
+ return Reserved;
+}
+
+const uint32_t *CSKYRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+const MCPhysReg *
+CSKYRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const CSKYSubtarget &STI = MF->getSubtarget<CSKYSubtarget>();
+ if (MF->getFunction().hasFnAttribute("interrupt")) {
+ return CSR_GPR_ISR_SaveList;
+ }
+
+ return CSR_I32_SaveList;
+}
+
+void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.h b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
new file mode 100644
index 000000000000..779ea6493c7e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
@@ -0,0 +1,45 @@
+//===-- CSKYRegisterInfo.h - CSKY Register Information Impl ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYREGISTERINFO_H
+#define LLVM_LIB_TARGET_CSKY_CSKYREGISTERINFO_H
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "CSKYGenRegisterInfo.inc"
+
+namespace llvm {
+class CSKYInstrInfo;
+
+class CSKYRegisterInfo : public CSKYGenRegisterInfo {
+public:
+ CSKYRegisterInfo();
+
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID id) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+ Register getFrameRegister(const MachineFunction &MF) const override;
+
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYREGISTERINFO_H
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
index aef4589a67f2..7548c22bb2c5 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -153,6 +153,21 @@ def GPR : RegisterClass<"CSKY", [i32], 32,
let Size = 32;
}
+// Register class for R0 - R15.
+// Some 16-bit integer instructions can only access R0 - R15.
+def sGPR : RegisterClass<"CSKY", [i32], 32,
+ (add (sequence "R%u", 0, 3), (sequence "R%u", 12, 13), R15,
+ (sequence "R%u", 4, 11), R14)> {
+ let Size = 32;
+}
+
+// Register class for R0 - R7.
+// Some 16-bit integer instructions can only access R0 - R7.
+def mGPR : RegisterClass<"CSKY", [i32], 32,
+ (add (sequence "R%u", 0, 7))> {
+ let Size = 32;
+}
+
def GPRPair : RegisterClass<"CSKY", [untyped], 32, (add GPRTuple)> {
let Size = 64;
}
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
new file mode 100644
index 000000000000..963c2ede9c44
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
@@ -0,0 +1,74 @@
+//===-- CSKYSubtarget.h - Define Subtarget for the CSKY----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CSKY specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYSubtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-subtarget"
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "CSKYGenSubtargetInfo.inc"
+
+void CSKYSubtarget::anchor() {}
+
+CSKYSubtarget &CSKYSubtarget::initializeSubtargetDependencies(
+ const Triple &TT, StringRef CPUName, StringRef TuneCPUName, StringRef FS) {
+
+ if (CPUName.empty())
+ CPUName = "generic";
+ if (TuneCPUName.empty())
+ TuneCPUName = CPUName;
+
+ UseHardFloat = false;
+ UseHardFloatABI = false;
+ HasFPUv2SingleFloat = false;
+ HasFPUv2DoubleFloat = false;
+ HasFPUv3SingleFloat = false;
+ HasFPUv3DoubleFloat = false;
+
+ HasBTST16 = false;
+ HasJAVA = false;
+ HasExtendLrw = false;
+ HasDoloop = false;
+ HasHighRegisters = false;
+
+ HasE1 = false;
+ HasE2 = false;
+ Has2E3 = false;
+ HasMP = false;
+ Has3E3r1 = false;
+ Has3r1E3r2 = false;
+ Has3r2E3r3 = false;
+ Has3E7 = false;
+ HasMP1E2 = false;
+ Has7E10 = false;
+ Has10E60 = false;
+
+ ParseSubtargetFeatures(CPUName, TuneCPUName, FS);
+ return *this;
+}
+
+CSKYSubtarget::CSKYSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, const TargetMachine &TM)
+ : CSKYGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+ FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS)),
+ InstrInfo(*this), RegInfo(), TLInfo(TM, *this) {}
+
+bool CSKYSubtarget::useHardFloatABI() const {
+ auto FloatABI = getTargetLowering()->getTargetMachine().Options.FloatABIType;
+
+ if (FloatABI == FloatABI::Default)
+ return UseHardFloatABI;
+ else
+ return FloatABI == FloatABI::Hard;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h
new file mode 100644
index 000000000000..4cd590e8e76e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h
@@ -0,0 +1,120 @@
+//===-- CSKYSubtarget.h - Define Subtarget for the CSKY----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CSKY specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYSUBTARGET_H
+#define LLVM_LIB_TARGET_CSKY_CSKYSUBTARGET_H
+
+#include "CSKYFrameLowering.h"
+#include "CSKYISelLowering.h"
+#include "CSKYInstrInfo.h"
+#include "CSKYRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "CSKYGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class CSKYSubtarget : public CSKYGenSubtargetInfo {
+ virtual void anchor();
+
+ CSKYFrameLowering FrameLowering;
+ CSKYInstrInfo InstrInfo;
+ CSKYRegisterInfo RegInfo;
+ CSKYTargetLowering TLInfo;
+ SelectionDAGTargetInfo TSInfo;
+
+ bool UseHardFloat;
+ bool UseHardFloatABI;
+ bool HasFPUv2SingleFloat;
+ bool HasFPUv2DoubleFloat;
+ bool HasFPUv3SingleFloat;
+ bool HasFPUv3DoubleFloat;
+
+ bool HasBTST16;
+ bool HasJAVA;
+ bool HasExtendLrw;
+ bool HasDoloop;
+ bool HasHighRegisters;
+
+ bool HasE1;
+ bool HasE2;
+ bool Has2E3;
+ bool HasMP;
+ bool Has3E3r1;
+ bool Has3r1E3r2;
+ bool Has3r2E3r3;
+ bool Has3E7;
+ bool HasMP1E2;
+ bool Has7E10;
+ bool Has10E60;
+
+public:
+ CSKYSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, const TargetMachine &TM);
+
+ const CSKYFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const CSKYInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const CSKYRegisterInfo *getRegisterInfo() const override { return &RegInfo; }
+ const CSKYTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ /// Initializes using the passed in CPU and feature strings so that we can
+ /// use initializer lists for subtarget initialization.
+ CSKYSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef CPU,
+ StringRef TuneCPU,
+ StringRef FS);
+
+ // Generated by inc file
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ bool useHardFloatABI() const;
+ bool useHardFloat() const { return UseHardFloat; }
+ bool hasFPUv2SingleFloat() const { return HasFPUv2SingleFloat; }
+ bool hasFPUv2DoubleFloat() const { return HasFPUv2DoubleFloat; }
+ bool hasFPUv2() const { return HasFPUv2SingleFloat || HasFPUv2DoubleFloat; }
+ bool hasFPUv3SingleFloat() const { return HasFPUv3SingleFloat; }
+ bool hasFPUv3DoubleFloat() const { return HasFPUv3DoubleFloat; }
+ bool hasFPUv3() const { return HasFPUv3SingleFloat || HasFPUv3DoubleFloat; }
+ bool hasAnyFloatExt() const { return hasFPUv2() || hasFPUv3(); };
+
+ bool hasBTST16() const { return HasBTST16; }
+ bool hasJAVA() const { return HasJAVA; }
+ bool hasExtendLrw() const { return HasExtendLrw; }
+ bool hasDoloop() const { return HasDoloop; }
+ bool hasHighRegisters() const { return HasHighRegisters; }
+
+ bool hasE1() const { return HasE1; }
+ bool hasE2() const { return HasE2; }
+ bool has2E3() const { return Has2E3; }
+ bool has3r1E3r2() const { return Has3r1E3r2; }
+ bool has3r2E3r3() const { return Has3r2E3r3; }
+ bool has3E3r1() const { return Has3E3r1; }
+ bool has3E7() const { return Has3E7; }
+ bool hasMP() const { return HasMP; }
+ bool hasMP1E2() const { return HasMP1E2; }
+ bool has7E10() const { return Has7E10; }
+ bool has10E60() const { return Has10E60; }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYSUBTARGET_H
diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
index 1c13796e84b6..8f61feb6506d 100644
--- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
+++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "CSKYTargetMachine.h"
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
#include "TargetInfo/CSKYTargetInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -50,6 +53,34 @@ CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
+const CSKYSubtarget *
+CSKYTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+
+ std::string Key = CPU + TuneCPU + FS;
+ auto &I = SubtargetMap[Key];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<CSKYSubtarget>(TargetTriple, CPU, TuneCPU, FS, *this);
+ if (I->useHardFloat() && !I->hasAnyFloatExt())
+ errs() << "Hard-float can't be used with current CPU,"
+ " set to Soft-float\n";
+ }
+ return I.get();
+}
+
namespace {
class CSKYPassConfig : public TargetPassConfig {
public:
@@ -59,6 +90,8 @@ public:
CSKYTargetMachine &getCSKYTargetMachine() const {
return getTM<CSKYTargetMachine>();
}
+
+ bool addInstSelector() override;
};
} // namespace
@@ -66,3 +99,9 @@ public:
TargetPassConfig *CSKYTargetMachine::createPassConfig(PassManagerBase &PM) {
return new CSKYPassConfig(*this, PM);
}
+
+bool CSKYPassConfig::addInstSelector() {
+ addPass(createCSKYISelDag(getCSKYTargetMachine()));
+
+ return false;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.h b/llvm/lib/Target/CSKY/CSKYTargetMachine.h
index d50e3877b550..ecb9fe953077 100644
--- a/llvm/lib/Target/CSKY/CSKYTargetMachine.h
+++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H
#define LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H
+#include "CSKYSubtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -20,6 +21,7 @@ namespace llvm {
class CSKYTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ mutable StringMap<std::unique_ptr<CSKYSubtarget>> SubtargetMap;
public:
CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -29,6 +31,12 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ const CSKYSubtarget *getSubtargetImpl(const Function &F) const override;
+ // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
+ // subtargets are per-function entities based on the target-specific
+ // attributes of each function.
+ const CSKYSubtarget *getSubtargetImpl() const = delete;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index 7fb5f35548b4..daa655416c47 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -30,25 +30,57 @@ CSKYAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static llvm::DenseMap<unsigned, MCFixupKindInfo> Infos = {
{CSKY::Fixups::fixup_csky_addr32, {"fixup_csky_addr32", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_addr_hi16, {"fixup_csky_addr_hi16", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_addr_lo16, {"fixup_csky_addr_lo16", 0, 32, 0}},
{CSKY::Fixups::fixup_csky_pcrel_imm16_scale2,
{"fixup_csky_pcrel_imm16_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
{CSKY::Fixups::fixup_csky_pcrel_uimm16_scale4,
- {"fixup_csky_pcrel_uimm16_scale4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
+ {"fixup_csky_pcrel_uimm16_scale4", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}},
+ {CSKY::Fixups::fixup_csky_pcrel_uimm8_scale4,
+ {"fixup_csky_pcrel_uimm8_scale4", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}},
{CSKY::Fixups::fixup_csky_pcrel_imm26_scale2,
{"fixup_csky_pcrel_imm26_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
{CSKY::Fixups::fixup_csky_pcrel_imm18_scale2,
- {"fixup_csky_pcrel_imm18_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}}};
+ {"fixup_csky_pcrel_imm18_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
+ {CSKY::Fixups::fixup_csky_got32, {"fixup_csky_got32", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_got_imm18_scale4,
+ {"fixup_csky_got_imm18_scale4", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_gotoff, {"fixup_csky_gotoff", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_gotpc,
+ {"fixup_csky_gotpc", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
+ {CSKY::Fixups::fixup_csky_plt32, {"fixup_csky_plt32", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_plt_imm18_scale4,
+ {"fixup_csky_plt_imm18_scale4", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_pcrel_imm10_scale2,
+ {"fixup_csky_pcrel_imm10_scale2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}},
+ {CSKY::Fixups::fixup_csky_pcrel_uimm7_scale4,
+ {"fixup_csky_pcrel_uimm7_scale4", 0, 16,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}},
+ {CSKY::Fixups::fixup_csky_doffset_imm18,
+ {"fixup_csky_doffset_imm18", 0, 18, 0}},
+ {CSKY::Fixups::fixup_csky_doffset_imm18_scale2,
+ {"fixup_csky_doffset_imm18_scale2", 0, 18, 0}},
+ {CSKY::Fixups::fixup_csky_doffset_imm18_scale4,
+ {"fixup_csky_doffset_imm18_scale4", 0, 18, 0}}};
+
assert(Infos.size() == CSKY::NumTargetFixupKinds &&
"Not all fixup kinds added to Infos array");
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- if (FirstTargetFixupKind <= Kind && Kind < FirstLiteralRelocationKind)
+ if (FirstTargetFixupKind <= Kind && Kind < FirstLiteralRelocationKind) {
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+
return Infos[Kind];
- else if (Kind < FirstTargetFixupKind)
+ } else if (Kind < FirstTargetFixupKind) {
return MCAsmBackend::getFixupKindInfo(Kind);
- else
+ } else {
return MCAsmBackend::getFixupKindInfo(FK_NONE);
+ }
}
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
@@ -145,7 +177,8 @@ void CSKYAsmBackend::relaxInstruction(MCInst &Inst,
llvm_unreachable("CSKYAsmBackend::relaxInstruction() unimplemented");
}
-bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if (Count % 2)
return false;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index cdf688e9032a..e710954e9df8 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -39,7 +39,8 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h
new file mode 100644
index 000000000000..fbfca4b6b85f
--- /dev/null
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- CSKYBaseInfo.h - Top level definitions for CSKY ---*- C++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the CSKY target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYBASEINFO_H
+#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYBASEINFO_H
+
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/MC/MCInstrDesc.h"
+
+namespace llvm {
+
+// CSKYII - This namespace holds all of the target specific flags that
+// instruction info tracks. All definitions must match CSKYInstrFormats.td.
+namespace CSKYII {
+
+enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode32B = 1, // ld32.b, ld32.bs, st32.b, st32.bs, +4kb
+ AddrMode32H = 2, // ld32.h, ld32.hs, st32.h, st32.hs, +8kb
+ AddrMode32WD = 3, // ld32.w, st32.w, ld32.d, st32.d, +16kb
+ AddrMode16B = 4, // ld16.b, +32b
+ AddrMode16H = 5, // ld16.h, +64b
+ AddrMode16W = 6, // ld16.w, +128b or +1kb
+ AddrMode32SDF = 7, // flds, fldd, +1kb
+};
+
+// CSKY Specific MachineOperand Flags.
+enum TOF {
+ MO_None = 0,
+ MO_ADDR32,
+ MO_GOT32,
+ MO_GOTOFF,
+ MO_PLT32,
+ MO_ADDR_HI16,
+ MO_ADDR_LO16,
+
+ // Used to differentiate between target-specific "direct" flags and "bitmask"
+ // flags. A machine operand can only have one "direct" flag, but can have
+ // multiple "bitmask" flags.
+ MO_DIRECT_FLAG_MASK = 15
+};
+
+enum {
+ AddrModeMask = 0x1f,
+};
+
+} // namespace CSKYII
+
+namespace CSKYOp {
+enum OperandType : unsigned {
+ OPERAND_BARESYMBOL = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_CONSTPOOL
+};
+} // namespace CSKYOp
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYBASEINFO_H
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h
index 917f940fcad4..434fd5481626 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h
@@ -16,6 +16,10 @@ namespace CSKY {
enum Fixups {
fixup_csky_addr32 = FirstTargetFixupKind,
+ fixup_csky_addr_hi16,
+
+ fixup_csky_addr_lo16,
+
fixup_csky_pcrel_imm16_scale2,
fixup_csky_pcrel_uimm16_scale4,
@@ -24,6 +28,29 @@ enum Fixups {
fixup_csky_pcrel_imm18_scale2,
+ fixup_csky_gotpc,
+
+ fixup_csky_gotoff,
+
+ fixup_csky_got32,
+
+ fixup_csky_got_imm18_scale4,
+
+ fixup_csky_plt32,
+
+ fixup_csky_plt_imm18_scale4,
+
+ fixup_csky_pcrel_imm10_scale2,
+
+ fixup_csky_pcrel_uimm7_scale4,
+
+ fixup_csky_pcrel_uimm8_scale4,
+
+ fixup_csky_doffset_imm18,
+
+ fixup_csky_doffset_imm18_scale2,
+
+ fixup_csky_doffset_imm18_scale4,
// Marker
fixup_csky_invalid,
NumTargetFixupKinds = fixup_csky_invalid - FirstTargetFixupKind
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index c8920fbb4b4c..7001de999a51 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -95,6 +96,107 @@ void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
MO.getExpr()->print(O, &MAI);
}
+void CSKYInstPrinter::printDataSymbol(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ O << "[";
+ if (MO.isImm())
+ O << MO.getImm();
+ else
+ MO.getExpr()->print(O, &MAI);
+ O << "]";
+}
+
+void CSKYInstPrinter::printConstpool(const MCInst *MI, uint64_t Address,
+ unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ if (MO.isImm()) {
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + MO.getImm();
+ Target &= 0xfffffffc;
+ O << formatHex(Target);
+ } else {
+ O << MO.getImm();
+ }
+ return;
+ }
+
+ assert(MO.isExpr() && "Unknown operand kind in printConstpool");
+
+ O << "[";
+ MO.getExpr()->print(O, &MAI);
+ O << "]";
+}
+
+void CSKYInstPrinter::printCSKYSymbolOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isImm()) {
+ return printOperand(MI, OpNo, STI, O);
+ }
+
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + MO.getImm();
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else {
+ O << MO.getImm();
+ }
+}
+
+void CSKYInstPrinter::printRegisterSeq(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "-";
+ printRegName(O, MI->getOperand(OpNum + 1).getReg());
+}
+
+void CSKYInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto V = MI->getOperand(OpNum).getImm();
+ ListSeparator LS;
+
+ if (V & 0xf) {
+ O << LS;
+ printRegName(O, CSKY::R4);
+ auto Offset = (V & 0xf) - 1;
+ if (Offset) {
+ O << "-";
+ printRegName(O, CSKY::R4 + Offset);
+ }
+ }
+
+ if ((V >> 4) & 0x1) {
+ O << LS;
+ printRegName(O, CSKY::R15);
+ }
+
+ if ((V >> 5) & 0x7) {
+ O << LS;
+ printRegName(O, CSKY::R16);
+
+ auto Offset = ((V >> 5) & 0x7) - 1;
+
+ if (Offset) {
+ O << "-";
+ printRegName(O, CSKY::R16 + Offset);
+ }
+ }
+
+ if ((V >> 8) & 0x1) {
+ O << LS;
+ printRegName(O, CSKY::R28);
+ }
+}
+
const char *CSKYInstPrinter::getRegisterName(unsigned RegNo) {
return getRegisterName(RegNo, ArchRegNames ? CSKY::NoRegAltName
: CSKY::ABIRegAltName);
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
index a28791a6d8e9..f93a342ec6a3 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
@@ -19,6 +19,9 @@
namespace llvm {
class CSKYInstPrinter : public MCInstPrinter {
+private:
+ bool ABIRegNames = false;
+
public:
CSKYInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
@@ -43,6 +46,20 @@ public:
unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDataSymbol(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printConstpool(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPSRFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printRegisterSeq(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCSKYSymbolOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSPAddr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
};
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
index 1a5b0225e0b9..1d220b749cb1 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYMCCodeEmitter.h"
+#include "CSKYMCExpr.h"
#include "MCTargetDesc/CSKYMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -31,11 +32,46 @@ unsigned CSKYMCCodeEmitter::getOImmOpValue(const MCInst &MI, unsigned Idx,
return MO.getImm() - 1;
}
+unsigned
+CSKYMCCodeEmitter::getImmOpValueIDLY(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Idx);
+ assert(MO.isImm() && "Unexpected MO type.");
+
+ auto V = (MO.getImm() <= 3) ? 4 : MO.getImm();
+ return V - 1;
+}
+
+unsigned
+CSKYMCCodeEmitter::getImmOpValueMSBSize(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MSB = MI.getOperand(Idx);
+ const MCOperand &LSB = MI.getOperand(Idx + 1);
+ assert(MSB.isImm() && LSB.isImm() && "Unexpected MO type.");
+
+ return MSB.getImm() - LSB.getImm();
+}
+
+static void writeData(uint32_t Bin, unsigned Size, raw_ostream &OS) {
+ uint16_t LO16 = static_cast<uint16_t>(Bin);
+ uint16_t HI16 = static_cast<uint16_t>(Bin >> 16);
+
+ if (Size == 4)
+ support::endian::write<uint16_t>(OS, HI16, support::little);
+
+ support::endian::write<uint16_t>(OS, LO16, support::little);
+}
+
void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MII.get(MI.getOpcode());
unsigned Size = Desc.getSize();
+
+ ++MCNumEmitted;
+
uint32_t Bin = getBinaryCodeForInstr(MI, Fixups, STI);
uint16_t LO16 = static_cast<uint16_t>(Bin);
@@ -45,7 +81,6 @@ void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
support::endian::write<uint16_t>(OS, HI16, support::little);
support::endian::write<uint16_t>(OS, LO16, support::little);
- ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
unsigned
@@ -62,6 +97,51 @@ CSKYMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return 0;
}
+unsigned
+CSKYMCCodeEmitter::getRegSeqImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(Idx).isReg() && "Unexpected MO type.");
+ assert(MI.getOperand(Idx + 1).isImm() && "Unexpected MO type.");
+
+ unsigned Ry = MI.getOperand(Idx).getReg();
+ unsigned Rz = MI.getOperand(Idx + 1).getImm();
+
+ unsigned Imm = Ctx.getRegisterInfo()->getEncodingValue(Rz) -
+ Ctx.getRegisterInfo()->getEncodingValue(Ry);
+
+ return ((Ctx.getRegisterInfo()->getEncodingValue(Ry) << 5) | Imm);
+}
+
+unsigned
+CSKYMCCodeEmitter::getRegisterSeqOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned Reg1 =
+ Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(Op).getReg());
+ unsigned Reg2 =
+ Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(Op + 1).getReg());
+
+ unsigned Binary = ((Reg1 & 0x1f) << 5) | (Reg2 - Reg1);
+
+ return Binary;
+}
+
+unsigned CSKYMCCodeEmitter::getImmJMPIX(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MI.getOperand(Idx).getImm() == 16)
+ return 0;
+ else if (MI.getOperand(Idx).getImm() == 24)
+ return 1;
+ else if (MI.getOperand(Idx).getImm() == 32)
+ return 2;
+ else if (MI.getOperand(Idx).getImm() == 40)
+ return 3;
+ else
+ assert(0);
+}
+
MCFixupKind CSKYMCCodeEmitter::getTargetFixup(const MCExpr *Expr) const {
const CSKYMCExpr *CSKYExpr = cast<CSKYMCExpr>(Expr);
@@ -70,6 +150,22 @@ MCFixupKind CSKYMCCodeEmitter::getTargetFixup(const MCExpr *Expr) const {
llvm_unreachable("Unhandled fixup kind!");
case CSKYMCExpr::VK_CSKY_ADDR:
return MCFixupKind(CSKY::fixup_csky_addr32);
+ case CSKYMCExpr::VK_CSKY_ADDR_HI16:
+ return MCFixupKind(CSKY::fixup_csky_addr_hi16);
+ case CSKYMCExpr::VK_CSKY_ADDR_LO16:
+ return MCFixupKind(CSKY::fixup_csky_addr_lo16);
+ case CSKYMCExpr::VK_CSKY_GOT:
+ return MCFixupKind(CSKY::fixup_csky_got32);
+ case CSKYMCExpr::VK_CSKY_GOTPC:
+ return MCFixupKind(CSKY::fixup_csky_gotpc);
+ case CSKYMCExpr::VK_CSKY_GOTOFF:
+ return MCFixupKind(CSKY::fixup_csky_gotoff);
+ case CSKYMCExpr::VK_CSKY_PLT:
+ return MCFixupKind(CSKY::fixup_csky_plt32);
+ case CSKYMCExpr::VK_CSKY_PLT_IMM18_BY4:
+ return MCFixupKind(CSKY::fixup_csky_plt_imm18_scale4);
+ case CSKYMCExpr::VK_CSKY_GOT_IMM18_BY4:
+ return MCFixupKind(CSKY::fixup_csky_got_imm18_scale4);
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
index a4c50d992a07..bfba07bcb32a 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
@@ -13,8 +13,8 @@
#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
-#include "CSKYMCExpr.h"
#include "MCTargetDesc/CSKYFixupKinds.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -49,14 +49,40 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(Idx);
- assert(MO.isImm() && "Unexpected MO type.");
- return (MO.getImm() >> shift);
+ if (MO.isImm())
+ return (MO.getImm() >> shift);
+
+ assert(MO.isExpr() && "Unexpected MO type.");
+
+ MCFixupKind Kind = getTargetFixup(MO.getExpr());
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
+ return 0;
}
+ unsigned getRegSeqImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getRegisterSeqOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getOImmOpValue(const MCInst &MI, unsigned Idx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getImmOpValueIDLY(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getImmJMPIX(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getImmOpValueMSBSize(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getImmShiftOpValue(const MCInst &MI, unsigned Idx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -101,6 +127,21 @@ public:
return 0;
}
+ template <llvm::CSKY::Fixups FIXUP>
+ unsigned getDataSymbolOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Idx);
+ assert(MO.isExpr() && "Unexpected MO type.");
+
+ MCFixupKind Kind = MCFixupKind(FIXUP);
+ if (MO.getExpr()->getKind() == MCExpr::Target)
+ Kind = getTargetFixup(MO.getExpr());
+
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
+ return 0;
+ }
+
unsigned getCallSymbolOpValue(const MCInst &MI, unsigned Idx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
index 59e630f43a42..7987613b0608 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
@@ -26,22 +26,33 @@ StringRef CSKYMCExpr::getVariantKindName(VariantKind Kind) {
switch (Kind) {
default:
llvm_unreachable("Invalid ELF symbol kind");
+ case VK_CSKY_None:
case VK_CSKY_ADDR:
return "";
- case VK_CSKY_PCREL:
- return "";
+ case VK_CSKY_ADDR_HI16:
+ return "@HI16";
+ case VK_CSKY_ADDR_LO16:
+ return "@LO16";
+ case VK_CSKY_GOT_IMM18_BY4:
case VK_CSKY_GOT:
return "@GOT";
case VK_CSKY_GOTPC:
return "@GOTPC";
case VK_CSKY_GOTOFF:
return "@GOTOFF";
+ case VK_CSKY_PLT_IMM18_BY4:
case VK_CSKY_PLT:
return "@PLT";
- case VK_CSKY_TPOFF:
+ case VK_CSKY_TLSLE:
return "@TPOFF";
+ case VK_CSKY_TLSIE:
+ return "@GOTTPOFF";
case VK_CSKY_TLSGD:
- return "@TLSGD";
+ return "@TLSGD32";
+ case VK_CSKY_TLSLDO:
+ return "@TLSLDO32";
+ case VK_CSKY_TLSLDM:
+ return "@TLSLDM32";
}
}
@@ -87,7 +98,8 @@ void CSKYMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
switch (getKind()) {
default:
return;
- case VK_CSKY_TPOFF:
+ case VK_CSKY_TLSLE:
+ case VK_CSKY_TLSIE:
case VK_CSKY_TLSGD:
break;
}
@@ -106,17 +118,20 @@ bool CSKYMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
switch (getKind()) {
default:
return true;
-
- case VK_CSKY_ADDR:
- case VK_CSKY_PCREL:
case VK_CSKY_GOT:
+ case VK_CSKY_GOT_IMM18_BY4:
case VK_CSKY_GOTPC:
case VK_CSKY_GOTOFF:
- case VK_CSKY_TPOFF:
+ case VK_CSKY_PLT:
+ case VK_CSKY_PLT_IMM18_BY4:
+ case VK_CSKY_TLSIE:
+ case VK_CSKY_TLSLE:
case VK_CSKY_TLSGD:
+ case VK_CSKY_TLSLDO:
+ case VK_CSKY_TLSLDM:
return false;
}
}
return true;
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h
index 06fccada53ce..9e5b4ca7d9bb 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h
@@ -19,13 +19,20 @@ public:
enum VariantKind {
VK_CSKY_None,
VK_CSKY_ADDR,
+ VK_CSKY_ADDR_HI16,
+ VK_CSKY_ADDR_LO16,
VK_CSKY_PCREL,
VK_CSKY_GOT,
+ VK_CSKY_GOT_IMM18_BY4,
VK_CSKY_GOTPC,
VK_CSKY_GOTOFF,
VK_CSKY_PLT,
- VK_CSKY_TPOFF,
+ VK_CSKY_PLT_IMM18_BY4,
+ VK_CSKY_TLSIE,
+ VK_CSKY_TLSLE,
VK_CSKY_TLSGD,
+ VK_CSKY_TLSLDO,
+ VK_CSKY_TLSLDM,
VK_CSKY_Invalid
};
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
index 169e1e14eb0a..0901c0993607 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "CSKYGenInstrInfo.inc"
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
index da8a3b63a2f9..25bbd635fc58 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
@@ -45,4 +45,7 @@ MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII,
#define GET_INSTRINFO_ENUM
#include "CSKYGenInstrInfo.inc"
+#define GET_SUBTARGETINFO_ENUM
+#include "CSKYGenSubtargetInfo.inc"
+
#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H
diff --git a/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp b/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
index 1af2e672ff42..40b7d493652d 100644
--- a/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
+++ b/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/CSKYTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheCSKYTarget() {
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 7edc2a01eeb8..d131cf896834 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -141,12 +141,6 @@ class HexagonAsmParser : public MCTargetAsmParser {
int processInstruction(MCInst &Inst, OperandVector const &Operands,
SMLoc IDLoc);
- // Check if we have an assembler and, if so, set the ELF e_header flags.
- void chksetELFHeaderEFlags(unsigned flags) {
- if (getAssembler())
- getAssembler()->setELFHeaderEFlags(flags);
- }
-
unsigned matchRegister(StringRef Name);
/// @name Auto-generated Match Functions
@@ -211,10 +205,6 @@ struct HexagonOperand : public MCParsedAsmOperand {
const MCExpr *Val;
};
- struct InstTy {
- OperandVector *SubInsts;
- };
-
union {
struct TokTy Tok;
struct RegTy Reg;
@@ -1498,7 +1488,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MES->SwitchSection(mySection);
unsigned byteSize = is32bit ? 4 : 8;
- getStreamer().emitCodeAlignment(byteSize, byteSize);
+ getStreamer().emitCodeAlignment(byteSize, &getSTI(), byteSize);
MCSymbol *Sym;
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 80a987c3a549..3c742c98077b 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
@@ -131,6 +131,9 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -140,6 +143,10 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t Address, const void *Decoder);
@@ -760,6 +767,78 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
return MCDisassembler::Success;
}
+static const uint16_t SysRegDecoderTable[] = {
+ Hexagon::SGP0, Hexagon::SGP1, Hexagon::STID,
+ Hexagon::ELR, Hexagon::BADVA0, Hexagon::BADVA1,
+ Hexagon::SSR, Hexagon::CCR, Hexagon::HTID,
+ Hexagon::BADVA, Hexagon::IMASK, Hexagon::S11,
+ Hexagon::S12, Hexagon::S13, Hexagon::S14,
+ Hexagon::S15, Hexagon::EVB, Hexagon::MODECTL,
+ Hexagon::SYSCFG, Hexagon::S19, Hexagon::S20,
+ Hexagon::VID, Hexagon::S22, Hexagon::S23,
+ Hexagon::S24, Hexagon::S25, Hexagon::S26,
+ Hexagon::CFGBASE, Hexagon::DIAG, Hexagon::REV,
+ Hexagon::PCYCLELO, Hexagon::PCYCLEHI, Hexagon::ISDBST,
+ Hexagon::ISDBCFG0, Hexagon::ISDBCFG1, Hexagon::S35,
+ Hexagon::BRKPTPC0, Hexagon::BRKPTCFG0, Hexagon::BRKPTPC1,
+ Hexagon::BRKPTCFG1, Hexagon::ISDBMBXIN, Hexagon::ISDBMBXOUT,
+ Hexagon::ISDBEN, Hexagon::ISDBGPR, Hexagon::S44,
+ Hexagon::S45, Hexagon::S46, Hexagon::S47,
+ Hexagon::PMUCNT0, Hexagon::PMUCNT1, Hexagon::PMUCNT2,
+ Hexagon::PMUCNT3, Hexagon::PMUEVTCFG, Hexagon::PMUCFG,
+ Hexagon::S54, Hexagon::S55, Hexagon::S56,
+ Hexagon::S57, Hexagon::S58, Hexagon::S59,
+ Hexagon::S60, Hexagon::S61, Hexagon::S62,
+ Hexagon::S63, Hexagon::S64, Hexagon::S65,
+ Hexagon::S66, Hexagon::S67, Hexagon::S68,
+ Hexagon::S69, Hexagon::S70, Hexagon::S71,
+ Hexagon::S72, Hexagon::S73, Hexagon::S74,
+ Hexagon::S75, Hexagon::S76, Hexagon::S77,
+ Hexagon::S78, Hexagon::S79, Hexagon::S80,
+};
+
+static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ if (RegNo >= sizeof(SysRegDecoderTable) / sizeof(SysRegDecoderTable[0]))
+ return MCDisassembler::Fail;
+
+ if (SysRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SysRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t SysReg64DecoderTable[] = {
+ Hexagon::SGP1_0, Hexagon::S3_2, Hexagon::S5_4, Hexagon::S7_6,
+ Hexagon::S9_8, Hexagon::S11_10, Hexagon::S13_12, Hexagon::S15_14,
+ Hexagon::S17_16, Hexagon::S19_18, Hexagon::S21_20, Hexagon::S23_22,
+ Hexagon::S25_24, Hexagon::S27_26, Hexagon::S29_28, Hexagon::S31_30,
+ Hexagon::S33_32, Hexagon::S35_34, Hexagon::S37_36, Hexagon::S39_38,
+ Hexagon::S41_40, Hexagon::S43_42, Hexagon::S45_44, Hexagon::S47_46,
+ Hexagon::S49_48, Hexagon::S51_50, Hexagon::S53_52, Hexagon::S55_54,
+ Hexagon::S57_56, Hexagon::S59_58, Hexagon::S61_60, Hexagon::S63_62,
+ Hexagon::S65_64, Hexagon::S67_66, Hexagon::S69_68, Hexagon::S71_70,
+ Hexagon::S73_72, Hexagon::S75_74, Hexagon::S77_76, Hexagon::S79_78,
+};
+
+static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ RegNo = RegNo >> 1;
+ if (RegNo >= sizeof(SysReg64DecoderTable) / sizeof(SysReg64DecoderTable[0]))
+ return MCDisassembler::Fail;
+
+ if (SysReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SysReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
diff --git a/llvm/lib/Target/Hexagon/HexagonArch.h b/llvm/lib/Target/Hexagon/HexagonArch.h
index e5d528390c51..4a42ec98feb1 100644
--- a/llvm/lib/Target/Hexagon/HexagonArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonArch.h
@@ -19,12 +19,6 @@ namespace llvm {
namespace Hexagon {
template <class ArchCont, typename Val>
-bool ValidArch(ArchCont const &ArchList, Val HexArch) {
- return std::any_of(std::begin(ArchList), std::end(ArchList),
- [HexArch](Val V) { return V == HexArch; });
-}
-
-template <class ArchCont, typename Val>
llvm::Optional<ArchEnum> GetCpu(ArchCont const &ArchList, Val CPUString) {
llvm::Optional<ArchEnum> Res;
auto Entry = ArchList.find(CPUString);
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index f3017d02995e..8e6a01e3a186 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -41,10 +41,10 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -179,7 +179,7 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
MCStreamer &OutStreamer, const MCOperand &Imm,
- int AlignSize) {
+ int AlignSize, const MCSubtargetInfo& STI) {
MCSymbol *Sym;
int64_t Value;
if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
@@ -209,7 +209,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
OutStreamer.emitLabel(Sym);
OutStreamer.emitSymbolAttribute(Sym, MCSA_Global);
OutStreamer.emitIntValue(Value, AlignSize);
- OutStreamer.emitCodeAlignment(AlignSize);
+ OutStreamer.emitCodeAlignment(AlignSize, &STI);
}
} else {
assert(Imm.isExpr() && "Expected expression and found none");
@@ -237,7 +237,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
OutStreamer.emitLabel(Sym);
OutStreamer.emitSymbolAttribute(Sym, MCSA_Local);
OutStreamer.emitValue(Imm.getExpr(), AlignSize);
- OutStreamer.emitCodeAlignment(AlignSize);
+ OutStreamer.emitCodeAlignment(AlignSize, &STI);
}
}
return Sym;
@@ -328,7 +328,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
const MCOperand &Imm = MappedInst.getOperand(1);
MCSectionSubPair Current = OutStreamer->getCurrentSection();
- MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+ MCSymbol *Sym =
+ smallData(*this, MI, *OutStreamer, Imm, 8, getSubtargetInfo());
OutStreamer->SwitchSection(Current.first, Current.second);
MCInst TmpInst;
@@ -345,7 +346,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
if (!OutStreamer->hasRawTextSupport()) {
MCOperand &Imm = MappedInst.getOperand(1);
MCSectionSubPair Current = OutStreamer->getCurrentSection();
- MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+ MCSymbol *Sym =
+ smallData(*this, MI, *OutStreamer, Imm, 4, getSubtargetInfo());
OutStreamer->SwitchSection(Current.first, Current.second);
MCInst TmpInst;
MCOperand &Reg = MappedInst.getOperand(0);
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 54aa14849dd9..2c5ad3b589d2 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -972,8 +972,8 @@ namespace {
} // end anonymous namespace
bool DeadCodeElimination::isDead(unsigned R) const {
- for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
- MachineInstr *UseI = I->getParent();
+ for (const MachineOperand &MO : MRI.use_operands(R)) {
+ const MachineInstr *UseI = MO.getParent();
if (UseI->isDebugValue())
continue;
if (UseI->isPHI()) {
@@ -1305,8 +1305,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
return false;
bool Changed = false;
- for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
- NextI = std::next(I);
+ for (auto I = B.begin(), E = B.end(); I != E; ++I) {
MachineInstr *MI = &*I;
if (MI->getOpcode() == TargetOpcode::COPY)
@@ -1598,9 +1597,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
bool Changed = false;
RegisterSet Defs;
- for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
- ++I, AVB.insert(Defs)) {
- NextI = std::next(I);
+ for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
Defs.clear();
HBS::getInstrDefs(*I, Defs);
@@ -1726,8 +1723,8 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
std::vector<MachineInstr*> Instrs;
- for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
- Instrs.push_back(&*I);
+ for (MachineInstr &MI : llvm::reverse(B))
+ Instrs.push_back(&MI);
bool Changed = false;
for (auto I : Instrs) {
@@ -3123,8 +3120,8 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
if (isConst(PR))
continue;
bool BadUse = false, GoodUse = false;
- for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
- MachineInstr *UseI = UI->getParent();
+ for (const MachineOperand &MO : MRI->use_operands(PR)) {
+ const MachineInstr *UseI = MO.getParent();
if (UseI->getParent() != C.LB) {
BadUse = true;
break;
@@ -3252,7 +3249,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
return G.Out.Reg == P.LR.Reg;
};
- if (llvm::find_if(Phis, LoopInpEq) == Phis.end())
+ if (llvm::none_of(Phis, LoopInpEq))
continue;
G.Inp.Reg = Inputs.find_first();
@@ -3338,9 +3335,9 @@ bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
continue;
MachineBasicBlock *PB = nullptr;
bool IsLoop = false;
- for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
- if (*PI != &B)
- PB = *PI;
+ for (MachineBasicBlock *Pred : B.predecessors()) {
+ if (Pred != &B)
+ PB = Pred;
else
IsLoop = true;
}
@@ -3348,13 +3345,13 @@ bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
continue;
MachineBasicBlock *EB = nullptr;
- for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
- if (*SI == &B)
+ for (MachineBasicBlock *Succ : B.successors()) {
+ if (Succ == &B)
continue;
// Set EP to the epilog block, if it has only 1 predecessor (i.e. the
// edge from B to EP is non-critical.
- if ((*SI)->pred_size() == 1)
- EB = *SI;
+ if (Succ->pred_size() == 1)
+ EB = Succ;
break;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 9f18d0b3162c..43f0758f6598 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -386,9 +386,8 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
// dereferences the pointer operand.
GepNode *PN = N;
Type *PtrTy = GepI->getSourceElementType();
- for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
- OI != OE; ++OI) {
- Value *Op = *OI;
+ for (Use &U : llvm::drop_begin(GepI->indices())) {
+ Value *Op = U;
GepNode *Nx = new (*Mem) GepNode;
Nx->Parent = PN; // Link Nx to the previous node.
Nx->Flags |= GepNode::Internal | InBounds;
diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 954e61563697..daf311fc49d4 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -863,14 +863,13 @@ void MachineConstPropagator::removeCFGEdge(MachineBasicBlock *From,
// First, remove the CFG successor/predecessor information.
From->removeSuccessor(To);
// Remove all corresponding PHI operands in the To block.
- for (auto I = To->begin(), E = To->getFirstNonPHI(); I != E; ++I) {
- MachineInstr *PN = &*I;
+ for (MachineInstr &PN : To->phis()) {
// reg0 = PHI reg1, bb2, reg3, bb4, ...
- int N = PN->getNumOperands()-2;
+ int N = PN.getNumOperands() - 2;
while (N > 0) {
- if (PN->getOperand(N+1).getMBB() == From) {
- PN->RemoveOperand(N+1);
- PN->RemoveOperand(N);
+ if (PN.getOperand(N + 1).getMBB() == From) {
+ PN.RemoveOperand(N + 1);
+ PN.RemoveOperand(N);
}
N -= 2;
}
@@ -996,8 +995,7 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) {
bool HaveTargets = computeBlockSuccessors(B, Targets);
// Rewrite the executable instructions. Skip branches if we don't
// have block successor information.
- for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::reverse(*B)) {
if (InstrExec.count(&MI)) {
if (MI.isBranch() && !HaveTargets)
continue;
@@ -1046,13 +1044,9 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) {
// erase instructions during rewriting, so this needs to be delayed until
// now.
for (MachineBasicBlock &B : MF) {
- MachineBasicBlock::iterator I = B.begin(), E = B.end();
- while (I != E) {
- auto Next = std::next(I);
- if (I->isBranch() && !InstrExec.count(&*I))
- B.erase(I);
- I = Next;
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(B))
+ if (MI.isBranch() && !InstrExec.count(&MI))
+ B.erase(&MI);
}
return Changed;
}
@@ -3133,11 +3127,9 @@ void HexagonConstEvaluator::replaceAllRegUsesWith(Register FromReg,
Register ToReg) {
assert(FromReg.isVirtual());
assert(ToReg.isVirtual());
- for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI->use_operands(FromReg)))
O.setReg(ToReg);
- }
}
bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI,
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index 4dd0110c4fed..b3f1b6638193 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1700,6 +1700,12 @@ class Enc_7b7ba8 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
+class Enc_7d1542 : OpcodeHexagon {
+ bits <7> Ss128;
+ let Inst{22-16} = Ss128{6-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
class Enc_7e5a82 : OpcodeHexagon {
bits <5> Ii;
let Inst{12-8} = Ii{4-0};
@@ -2011,6 +2017,12 @@ class Enc_8e583a : OpcodeHexagon {
let Inst{25-23} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
+class Enc_8f7633 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <7> Sd128;
+ let Inst{6-0} = Sd128{6-0};
+}
class Enc_90cd8b : OpcodeHexagon {
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
@@ -2346,6 +2358,12 @@ class Enc_a6ce9c : OpcodeHexagon {
bits <4> Rs16;
let Inst{7-4} = Rs16{3-0};
}
+class Enc_a705fc : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <7> Sdd128;
+ let Inst{6-0} = Sdd128{6-0};
+}
class Enc_a7341a : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
@@ -3127,6 +3145,12 @@ class Enc_e26546 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
+class Enc_e32517 : OpcodeHexagon {
+ bits <7> Sss128;
+ let Inst{22-16} = Sss128{6-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
class Enc_e38e1f : OpcodeHexagon {
bits <8> Ii;
let Inst{12-5} = Ii{7-0};
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index bba36352815e..4f00409c336c 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -38870,6 +38870,26 @@ let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100001000000;
let isSolo = 1;
}
+def Y2_tfrscrr : HInst<
+(outs IntRegs:$Rd32),
+(ins SysRegs:$Ss128),
+"$Rd32 = $Ss128",
+tc_fae9dfa5, TypeCR>, Enc_7d1542 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-23} = 0b011011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def Y2_tfrsrcr : HInst<
+(outs SysRegs:$Sd128),
+(ins IntRegs:$Rs32),
+"$Sd128 = $Rs32",
+tc_6ae3426b, TypeCR>, Enc_8f7633 {
+let Inst{13-7} = 0b0000000;
+let Inst{31-21} = 0b01100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
def Y2_wait : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -38891,6 +38911,24 @@ let isSoloAX = 1;
let hasSideEffects = 1;
let mayStore = 1;
}
+def Y4_tfrscpp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins SysRegs64:$Sss128),
+"$Rdd32 = $Sss128",
+tc_fae9dfa5, TypeCR>, Enc_e32517 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-23} = 0b011011110;
+}
+def Y4_tfrspcp : HInst<
+(outs SysRegs64:$Sdd128),
+(ins DoubleRegs:$Rss32),
+"$Sdd128 = $Rss32",
+tc_6ae3426b, TypeCR>, Enc_a705fc {
+let Inst{13-7} = 0b0000000;
+let Inst{31-21} = 0b01101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
def Y4_trace : HInst<
(outs),
(ins IntRegs:$Rs32),
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index d36ffc3da641..9a3feb5b6af1 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -364,7 +364,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
return true;
if (B->isEHPad() || B->hasAddressTaken())
return false;
- if (B->succ_size() == 0)
+ if (B->succ_empty())
return false;
for (auto &MI : *B) {
@@ -390,8 +390,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
continue;
if (!isPredicate(R))
continue;
- for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
- if (U->getParent()->isPHI())
+ for (const MachineOperand &U : MRI->use_operands(R))
+ if (U.getParent()->isPHI())
return false;
}
}
@@ -570,12 +570,12 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
TotalPh = computePhiCost(FP.JoinB, FP);
PredDefs += countPredicateDefs(FP.JoinB);
} else {
- if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+ if (FP.TrueB && !FP.TrueB->succ_empty()) {
MachineBasicBlock *SB = *FP.TrueB->succ_begin();
TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
}
- if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+ if (FP.FalseB && !FP.FalseB->succ_empty()) {
MachineBasicBlock *SB = *FP.FalseB->succ_begin();
TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
@@ -877,7 +877,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
// existing terminators/successors from the split block.
MachineBasicBlock *SSB = nullptr;
FP.SplitB->erase(OldTI, FP.SplitB->end());
- while (FP.SplitB->succ_size() > 0) {
+ while (!FP.SplitB->succ_empty()) {
MachineBasicBlock *T = *FP.SplitB->succ_begin();
// It's possible that the split block had a successor that is not a pre-
// dicated block. This could only happen if there was only one block to
@@ -970,7 +970,7 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
}
}
- while (B->succ_size() > 0)
+ while (!B->succ_empty())
B->removeSuccessor(B->succ_begin());
for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index fcc880463925..c444cf557c21 100644
--- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1070,20 +1070,18 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
std::set<Register> &UpdRegs) {
bool Changed = false;
- MachineBasicBlock::iterator I, E, NextI;
- for (I = B.begin(), E = B.end(); I != E; I = NextI) {
- NextI = std::next(I);
- unsigned Opc = I->getOpcode();
+ for (MachineInstr &MI : llvm::make_early_inc_range(B)) {
+ unsigned Opc = MI.getOpcode();
if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
- bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs);
+ bool Done = predicate(MI, (Opc == Hexagon::A2_tfrt), UpdRegs);
if (!Done) {
// If we didn't predicate I, we may need to remove it in case it is
// an "identity" copy, e.g. %1 = A2_tfrt %2, %1.
- if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) {
- for (auto &Op : I->operands())
+ if (RegisterRef(MI.getOperand(0)) == RegisterRef(MI.getOperand(2))) {
+ for (auto &Op : MI.operands())
if (Op.isReg())
UpdRegs.insert(Op.getReg());
- removeInstr(*I);
+ removeInstr(MI);
}
}
Changed |= Done;
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 5b782543b3b4..bff596e69efd 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -281,11 +281,10 @@ static unsigned getMaxCalleeSavedReg(ArrayRef<CalleeSavedInfo> CSI,
/// frame to be already in place.
static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
const HexagonRegisterInfo &HRI) {
- for (auto &I : MBB) {
- const MachineInstr *MI = &I;
- if (MI->isCall())
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isCall())
return true;
- unsigned Opc = MI->getOpcode();
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
case Hexagon::PS_alloca:
case Hexagon::PS_aligna:
@@ -294,7 +293,7 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
break;
}
// Check individual operands.
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
// While the presence of a frame index does not prove that a stack
// frame will be required, all frame indexes should be within alloc-
// frame/deallocframe. Otherwise, the code that translates a frame
@@ -343,8 +342,8 @@ static bool hasTailCall(const MachineBasicBlock &MBB) {
/// Returns true if MBB contains an instruction that returns.
static bool hasReturn(const MachineBasicBlock &MBB) {
- for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
- if (I->isReturn())
+ for (const MachineInstr &MI : MBB.terminators())
+ if (MI.isReturn())
return true;
return false;
}
@@ -425,11 +424,10 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
// city don't do it right now.
for (auto &I : MF) {
unsigned BN = RPO[I.getNumber()];
- for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock *Succ : I.successors())
// If found a back-edge, return.
- if (RPO[(*SI)->getNumber()] <= BN)
+ if (RPO[Succ->getNumber()] <= BN)
return;
- }
}
// Collect the set of blocks that need a stack frame to execute. Scan
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index f2026877b22c..02da2f29591a 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -765,10 +765,7 @@ unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
unsigned MaxD = 0;
- using pred_iterator = MachineBasicBlock::const_pred_iterator;
-
- for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) {
- const MachineBasicBlock *PB = *I;
+ for (const MachineBasicBlock *PB : ToB->predecessors()) {
// Skip back edges. Also, if FromB is a predecessor of ToB, the distance
// along that path will be 0, and we don't need to do any calculations
// on it.
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index 07f85e69abba..cf4f13fb8c0d 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -183,12 +183,11 @@ void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
unsigned NR = HRI->getNumRegs();
BitVector Defs(NR), Uses(NR);
- for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
- MachineInstr *MI = &*I;
- I2X.insert(std::make_pair(MI, Index));
+ for (MachineInstr &MI : B) {
+ I2X.insert(std::make_pair(&MI, Index));
Defs.reset();
Uses.reset();
- getDefsUses(MI, Defs, Uses);
+ getDefsUses(&MI, Defs, Uses);
DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
Index++;
}
@@ -232,22 +231,19 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
CondsetMap CM;
MuxInfoList ML;
- MachineBasicBlock::iterator NextI, End = B.end();
- for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
- MachineInstr *MI = &*I;
- NextI = std::next(I);
- unsigned Opc = MI->getOpcode();
+ for (MachineInstr &MI : llvm::make_early_inc_range(B)) {
+ unsigned Opc = MI.getOpcode();
if (!isCondTransfer(Opc))
continue;
- Register DR = MI->getOperand(0).getReg();
+ Register DR = MI.getOperand(0).getReg();
if (isRegPair(DR))
continue;
- MachineOperand &PredOp = MI->getOperand(1);
+ MachineOperand &PredOp = MI.getOperand(1);
if (PredOp.isUndef())
continue;
Register PR = PredOp.getReg();
- unsigned Idx = I2X.lookup(MI);
+ unsigned Idx = I2X.lookup(&MI);
CondsetMap::iterator F = CM.find(DR);
bool IfTrue = HII->isPredicatedTrue(Opc);
@@ -360,21 +356,21 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
return true;
return false;
};
- for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) {
- if (I->isDebugInstr())
+ for (MachineInstr &I : llvm::reverse(B)) {
+ if (I.isDebugInstr())
continue;
// This isn't 100% accurate, but it's safe.
// It won't detect (as a kill) a case like this
// r0 = add r0, 1 <-- r0 should be "killed"
// ... = r0
- for (MachineOperand &Op : I->operands()) {
+ for (MachineOperand &Op : I.operands()) {
if (!Op.isReg() || !Op.isUse())
continue;
assert(Op.getSubReg() == 0 && "Should have physical registers only");
bool Live = IsLive(Op.getReg());
Op.setIsKill(!Live);
}
- LPR.stepBackward(*I);
+ LPR.stepBackward(I);
}
return Changed;
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 58f3cd55ee9f..a4971ad712eb 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -468,7 +468,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
return false;
Register CmpReg1, CmpReg2;
- int CmpImm = 0, CmpMask = 0;
+ int64_t CmpImm = 0, CmpMask = 0;
bool CmpAnalyzed =
TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
// Fail if the compare was not analyzed, or it's not comparing a register
@@ -652,7 +652,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
unsigned CondOpc = CondI->getOpcode();
Register CmpReg1, CmpReg2;
- int Mask = 0, ImmValue = 0;
+ int64_t Mask = 0, ImmValue = 0;
bool AnalyzedCmp =
TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
if (!AnalyzedCmp)
@@ -1094,15 +1094,15 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
- MachineRegisterInfo::use_iterator nextI;
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
- E = MRI->use_end(); I != E; I = nextI) {
- nextI = std::next(I); // I is invalidated by the setReg
- MachineInstr *UseMI = I->getParent();
+ // We use make_early_inc_range here because setReg below invalidates the
+ // iterator.
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_operands(Reg))) {
+ MachineInstr *UseMI = MO.getParent();
if (UseMI == MI)
continue;
- if (I->isDebug())
- I->setReg(0U);
+ if (MO.isDebug())
+ MO.setReg(0U);
}
}
@@ -1453,7 +1453,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
E = MRI->use_instr_nodbg_end(); I != E; ++I) {
MachineInstr *MI = &*I;
Register CmpReg1, CmpReg2;
- int CmpMask = 0, CmpValue = 0;
+ int64_t CmpMask = 0, CmpValue = 0;
if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index fd404a156903..2679e399852f 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -990,7 +990,7 @@ void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
auto IsZero = [] (const SDValue &V) -> bool {
if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
- return SC->isNullValue();
+ return SC->isZero();
return false;
};
auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
@@ -2247,8 +2247,8 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
}
void HexagonDAGToDAGISel::rebalanceAddressTrees() {
- for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
- SDNode *N = &*I++;
+ for (SDNode &Node : llvm::make_early_inc_range(CurDAG->allnodes())) {
+ SDNode *N = &Node;
if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 6ded323a34c3..29572e3106d1 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1231,7 +1231,7 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
if (RM == Reloc::Static) {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
- const GlobalObject *GO = GV->getBaseObject();
+ const GlobalObject *GO = GV->getAliaseeObject();
if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
@@ -2556,7 +2556,7 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
// Extracting the lowest bit is a no-op, but it changes the type,
// so it must be kept as an operation to avoid errors related to
// type mismatches.
- if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
+ if (IdxN->isZero() && ValTy.getSizeInBits() == 1)
return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index e7d3c7c24f34..8900fca8bb78 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -525,7 +525,7 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
if (IsSplat) {
assert(SplatV.getNode());
auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
- if (IdxN && IdxN->isNullValue())
+ if (IdxN && IdxN->isZero())
return getZero(dl, VecTy, DAG);
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
@@ -743,12 +743,12 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
auto IsTrue = [] (SDValue V) {
if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
- return !N->isNullValue();
+ return !N->isZero();
return false;
};
auto IsFalse = [] (SDValue V) {
if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
- return N->isNullValue();
+ return N->isZero();
return false;
};
@@ -1065,7 +1065,7 @@ HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
// Convert IdxV to be index in bytes.
auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
- if (!IdxN || !IdxN->isNullValue()) {
+ if (!IdxN || !IdxN->isZero()) {
IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
DAG.getConstant(ElemWidth/8, dl, MVT::i32));
SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
@@ -1088,7 +1088,7 @@ HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
RolBase = HwLen-4;
}
// If the vector wasn't ror'ed, don't ror it back.
- if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) {
+ if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
@@ -1125,7 +1125,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
SDValue ByteIdx;
auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
- if (!IdxN || !IdxN->isNullValue()) {
+ if (!IdxN || !IdxN->isZero()) {
ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
DAG.getConstant(BitBytes, dl, MVT::i32));
ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
@@ -1140,7 +1140,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
{DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
// Rotate ByteVec back, and convert to a vector predicate.
- if (!IdxN || !IdxN->isNullValue()) {
+ if (!IdxN || !IdxN->isZero()) {
SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
@@ -1594,15 +1594,15 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
- SDValue ValQ = Op.getOperand(0);
+ SDValue Val = Op.getOperand(0);
MVT ResTy = ty(Op);
- MVT VecTy = ty(ValQ);
+ MVT ValTy = ty(Val);
const SDLoc &dl(Op);
- if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) {
+ if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
unsigned HwLen = Subtarget.getVectorLength();
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
- SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG);
+ SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
unsigned BitWidth = ResTy.getSizeInBits();
if (BitWidth < 64) {
@@ -1635,6 +1635,39 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
}
+ if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
+ // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
+ unsigned BitWidth = ValTy.getSizeInBits();
+ unsigned HwLen = Subtarget.getVectorLength();
+ assert(BitWidth == HwLen);
+
+ MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
+ SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
+ // Splat each byte of Val 8 times.
+ // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
+ // where b0, b1,..., b15 are least to most significant bytes of I.
+ SmallVector<SDValue, 128> Bytes;
+ // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
+ // These are bytes with the LSB rotated left with respect to their index.
+ SmallVector<SDValue, 128> Tmp;
+ for (unsigned I = 0; I != HwLen / 8; ++I) {
+ SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
+ SDValue Byte =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
+ for (unsigned J = 0; J != 8; ++J) {
+ Bytes.push_back(Byte);
+ Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
+ }
+ }
+
+ MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
+ SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
+
+ // Each Byte in the I2V will be set iff corresponding bit is set in Val.
+ I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
+ return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
+ }
return Op;
}
@@ -2255,8 +2288,8 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case HexagonISD::V2Q:
if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
- return C->isNullValue() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
- : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
+ return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
+ : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
}
break;
case HexagonISD::Q2V:
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index ef2b3040931d..45adaf50774f 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -268,8 +268,7 @@ class OpcodeDuplex {
let Inst{12-0} = ISubLo;
}
-class InstDuplex<bits<4> iClass, list<dag> pattern = [],
- string cstr = "">
+class InstDuplex<bits<4> iClass, string cstr = "">
: Instruction, OpcodeDuplex {
let Namespace = "Hexagon";
IType Type = TypeDUPLEX; // uses slot 0,1
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index f14eaacbf071..76220eff4d51 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -169,13 +169,13 @@ MachineInstr *HexagonInstrInfo::findLoopInstr(MachineBasicBlock *BB,
continue;
if (PB == BB)
continue;
- for (auto I = PB->instr_rbegin(), E = PB->instr_rend(); I != E; ++I) {
- unsigned Opc = I->getOpcode();
+ for (MachineInstr &I : llvm::reverse(PB->instrs())) {
+ unsigned Opc = I.getOpcode();
if (Opc == LOOPi || Opc == LOOPr)
- return &*I;
+ return &I;
// We've reached a different loop, which means the loop01 has been
// removed.
- if (Opc == EndLoopOp && I->getOperand(0).getMBB() != TargetBB)
+ if (Opc == EndLoopOp && I.getOperand(0).getMBB() != TargetBB)
return nullptr;
}
// Check the predecessors for the LOOP instruction.
@@ -1791,8 +1791,8 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask,
- int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
unsigned Opc = MI.getOpcode();
// Set mask and the first source register.
@@ -3627,8 +3627,8 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
- report_fatal_error(std::string("Unknown .new type: ") +
- std::to_string(MI.getOpcode()));
+ report_fatal_error(Twine("Unknown .new type: ") +
+ std::to_string(MI.getOpcode()));
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 11717996935d..eaaf9f7046c7 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -270,7 +270,8 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const override;
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const override;
/// Compute the instruction latency of a given instruction.
/// If the instruction has higher cost when predicated, it's returned via
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 76cc8f402c5a..ccaf1aac1ce0 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1351,8 +1351,8 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
// be unshifted.
if (!commutesWithShift(R))
return false;
- for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
- auto *T = cast<Instruction>(*I);
+ for (User *U : R->users()) {
+ auto *T = cast<Instruction>(U);
// Skip users from outside of the loop. They will be handled later.
// Also, skip the right-shifts and phi nodes, since they mix early
// and late values.
@@ -1490,10 +1490,8 @@ void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
I.replaceAllUsesWith(SV);
- for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
- N = std::next(I);
- RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
- }
+ for (Instruction &I : llvm::make_early_inc_range(*LoopB))
+ RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI);
}
unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
@@ -2247,8 +2245,7 @@ CleanupAndExit:
DT->addNewBlock(MemmoveB, Preheader);
// Find the new immediate dominator of the exit block.
BasicBlock *ExitD = Preheader;
- for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
- BasicBlock *PB = *PI;
+ for (BasicBlock *PB : predecessors(ExitB)) {
ExitD = DT->findNearestCommonDominator(ExitD, PB);
if (!ExitD)
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 0e6555024303..47bebf77b31b 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -286,9 +286,6 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
}
void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
- if (SU->isScheduled)
- return;
-
for (const SDep &PI : SU->Preds) {
unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
unsigned MinLatency = PI.getLatency();
@@ -298,13 +295,12 @@ void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
SU->TopReadyCycle = PredReadyCycle + MinLatency;
}
- Top.releaseNode(SU, SU->TopReadyCycle);
+
+ if (!SU->isScheduled)
+ Top.releaseNode(SU, SU->TopReadyCycle);
}
void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
- if (SU->isScheduled)
- return;
-
assert(SU->getInstr() && "Scheduled SUnit must have instr");
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -317,7 +313,9 @@ void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
SU->BotReadyCycle = SuccReadyCycle + MinLatency;
}
- Bot.releaseNode(SU, SU->BotReadyCycle);
+
+ if (!SU->isScheduled)
+ Bot.releaseNode(SU, SU->BotReadyCycle);
}
/// Does this SU have a hazard within the current instruction group.
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 8dc1113194a8..8edcb745d654 100644
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -535,13 +535,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// I am doing this only because LLVM does not provide LiveOut
// at the BB level.
bool predLive = false;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SIE = MBB->succ_end();
- SI != SIE; ++SI) {
- MachineBasicBlock *succMBB = *SI;
- if (succMBB->isLiveIn(predReg))
+ for (const MachineBasicBlock *SuccMBB : MBB->successors())
+ if (SuccMBB->isLiveIn(predReg))
predLive = true;
- }
if (predLive)
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index e026bb6d601d..bfd02802b782 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -67,26 +67,23 @@ bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- unsigned Idx = 1;
+ unsigned Idx = 0;
// Try to optimize sign extends in formal parameters. It's relying on
// callee already sign extending the values. I'm not sure if our ABI
// requires callee to sign extend though.
for (auto &Arg : F.args()) {
- if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+ if (F.getAttributes().hasParamAttr(Idx, Attribute::SExt)) {
if (!isa<PointerType>(Arg.getType())) {
- for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
- if (isa<SExtInst>(*UI)) {
- Instruction* Use = cast<Instruction>(*UI);
+ for (Use &U : llvm::make_early_inc_range(Arg.uses())) {
+ if (isa<SExtInst>(U)) {
+ Instruction* Use = cast<Instruction>(U);
SExtInst* SI = new SExtInst(&Arg, Use->getType());
assert (EVT::getEVT(SI->getType()) ==
(EVT::getEVT(Use->getType())));
- ++UI;
Use->replaceAllUsesWith(SI);
Instruction* First = &F.getEntryBlock().front();
SI->insertBefore(First);
Use->eraseFromParent();
- } else {
- ++UI;
}
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td
index 20c939577586..11f8af7c41a0 100644
--- a/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -198,7 +198,7 @@ def PS_callr_nr: InstHexagon<(outs), (ins IntRegs:$Rs),
let isCall = 1, hasSideEffects = 1,
isExtended = 0, isExtendable = 1, opExtendable = 0, isCodeGenOnly = 1,
BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2 in
-class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
+class Call_nr<bits<5> nbits, bit isFalse, dag iops,
InstrItinClass itin>
: Pseudo<(outs), iops, "">, PredRel {
bits<2> Pu;
@@ -210,7 +210,7 @@ class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
let Itinerary = itin;
}
-def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
+def PS_call_nr : Call_nr<24, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
//def PS_call_nrt: Call_nr<17, 1, 0, (ins PredRegs:$Pu, s32_0Imm:$dst),
// J2_callt.Itinerary>;
//def PS_call_nrf: Call_nr<17, 1, 1, (ins PredRegs:$Pu, s32_0Imm:$dst),
@@ -267,7 +267,7 @@ let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
class CondStr<string CReg, bit True, bit New> {
string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
}
-class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+class JumpOpcStr<string Mnemonic, bit Taken> {
string S = Mnemonic # !if(Taken, ":t", ":nt");
}
let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
@@ -275,7 +275,7 @@ let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak, InstHexagon rootInst>
: InstHexagon<(outs), (ins PredRegs:$src, IntRegs:$dst),
CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
- JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst",
+ JumpOpcStr<"jumpr", isTak>.S # " $dst",
[], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon {
let isTaken = isTak;
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 49428db223a1..8b7138d3c809 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -31,6 +31,19 @@ let Namespace = "Hexagon" in {
let HWEncoding{4-0} = num;
}
+ class HexagonSys<bits<7> num, string n, list<string> alt = [],
+ list<Register> alias = []> : Register<n, alt> {
+ let Aliases = alias;
+ let HWEncoding{6-0} = num;
+ }
+
+ class HexagonDoubleSys<bits<7> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ RegisterWithSubRegs<n, subregs> {
+ let AltNames = alt;
+ let HWEncoding{6-0} = num;
+ }
+
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers.
class Ri<bits<5> num, string n, list<string> alt = []> :
@@ -74,6 +87,18 @@ let Namespace = "Hexagon" in {
let SubRegs = subregs;
}
+ // Rs - system registers
+ class Rs<bits<7> num, string n,
+ list<string> alt = [], list<Register> alias = []> :
+ HexagonSys<num, n, alt, alias>;
+
+ // Rss - 64-bit system registers.
+ class Rss<bits<7> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ HexagonDoubleSys<num, n, subregs, alt> {
+ let SubRegs = subregs;
+ }
+
// Mx - address modifier registers
class Mx<bits<1> num, string n> : Register<n, []> {
let HWEncoding{0} = num;
@@ -260,6 +285,134 @@ let Namespace = "Hexagon" in {
def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+ // System registers.
+ def SGP0 : Rs<0, "sgp0", ["s0"]>, DwarfRegNum<[144]>;
+ def SGP1 : Rs<1, "sgp1", ["s1"]>, DwarfRegNum<[145]>;
+ def STID : Rs<2, "stid", ["s2"]>, DwarfRegNum<[146]>;
+ def ELR : Rs<3, "elr", ["s3"]>, DwarfRegNum<[147]>;
+ def BADVA0 : Rs<4, "badva0", ["s4"]>, DwarfRegNum<[148]>;
+ def BADVA1 : Rs<5, "badva1", ["s5"]>, DwarfRegNum<[149]>;
+ def SSR : Rs<6, "ssr", ["s6"]>, DwarfRegNum<[150]>;
+ def CCR : Rs<7, "ccr", ["s7"]>, DwarfRegNum<[151]>;
+ def HTID : Rs<8, "htid", ["s8"]>, DwarfRegNum<[152]>;
+ def BADVA : Rs<9, "badva", ["s9"]>, DwarfRegNum<[153]>;
+ def IMASK : Rs<10, "imask", ["s10"]>, DwarfRegNum<[154]>;
+ def S11 : Rs<11, "s11">, DwarfRegNum<[155]>;
+ def S12 : Rs<12, "s12">, DwarfRegNum<[156]>;
+ def S13 : Rs<13, "s13">, DwarfRegNum<[157]>;
+ def S14 : Rs<14, "s14">, DwarfRegNum<[158]>;
+ def S15 : Rs<15, "s15">, DwarfRegNum<[159]>;
+ def EVB : Rs<16, "evb", ["s16"]>, DwarfRegNum<[160]>;
+ def MODECTL : Rs<17, "modectl", ["s17"]>, DwarfRegNum<[161]>;
+ def SYSCFG : Rs<18, "syscfg", ["s18"]>, DwarfRegNum<[162]>;
+ def S19 : Rs<19, "s19", ["s19"]>, DwarfRegNum<[163]>;
+ def S20 : Rs<20, "s20", ["s20"]>, DwarfRegNum<[164]>;
+ def VID : Rs<21, "vid", ["s21"]>, DwarfRegNum<[165]>;
+ def S22 : Rs<22, "s22", ["s22"]>, DwarfRegNum<[166]>;
+ def S23 : Rs<23, "s23">, DwarfRegNum<[167]>;
+ def S24 : Rs<24, "s24">, DwarfRegNum<[168]>;
+ def S25 : Rs<25, "s25">, DwarfRegNum<[169]>;
+ def S26 : Rs<26, "s26">, DwarfRegNum<[170]>;
+ def CFGBASE : Rs<27, "cfgbase", ["s27"]>, DwarfRegNum<[171]>;
+ def DIAG : Rs<28, "diag", ["s28"]>, DwarfRegNum<[172]>;
+ def REV : Rs<29, "rev", ["s29"]>, DwarfRegNum<[173]>;
+ def PCYCLELO : Rs<30, "pcyclelo", ["s30"]>, DwarfRegNum<[174]>;
+ def PCYCLEHI : Rs<31, "pcyclehi", ["s31"]>, DwarfRegNum<[175]>;
+ def ISDBST : Rs<32, "isdbst", ["s32"]>, DwarfRegNum<[176]>;
+ def ISDBCFG0 : Rs<33, "isdbcfg0", ["s33"]>, DwarfRegNum<[177]>;
+ def ISDBCFG1 : Rs<34, "isdbcfg1", ["s34"]>, DwarfRegNum<[178]>;
+ def S35 : Rs<35, "s35">, DwarfRegNum<[179]>;
+ def BRKPTPC0 : Rs<36, "brkptpc0", ["s36"]>, DwarfRegNum<[180]>;
+ def BRKPTCFG0: Rs<37, "brkptcfg0", ["s37"]>, DwarfRegNum<[181]>;
+ def BRKPTPC1 : Rs<38, "brkptpc1", ["s38"]>, DwarfRegNum<[182]>;
+ def BRKPTCFG1: Rs<39, "brkptcfg1", ["s39"]>, DwarfRegNum<[183]>;
+ def ISDBMBXIN: Rs<40, "isdbmbxin", ["s40"]>, DwarfRegNum<[184]>;
+ def ISDBMBXOUT: Rs<41, "isdbmbxout", ["s41"]>, DwarfRegNum<[185]>;
+ def ISDBEN: Rs<42, "isdben", ["s42"]>, DwarfRegNum<[186]>;
+ def ISDBGPR: Rs<43, "isdbgpr", ["s43"]>, DwarfRegNum<[187]>;
+ def S44: Rs<44, "s44">, DwarfRegNum<[188]>;
+ def S45: Rs<45, "s45">, DwarfRegNum<[189]>;
+ def S46: Rs<46, "s46">, DwarfRegNum<[190]>;
+ def S47: Rs<47, "s47">, DwarfRegNum<[191]>;
+ def PMUCNT0: Rs<48, "pmucnt0", ["s48"]>, DwarfRegNum<[192]>;
+ def PMUCNT1: Rs<49, "pmucnt1", ["s49"]>, DwarfRegNum<[193]>;
+ def PMUCNT2: Rs<50, "pmucnt2", ["s50"]>, DwarfRegNum<[194]>;
+ def PMUCNT3: Rs<51, "pmucnt3", ["s51"]>, DwarfRegNum<[195]>;
+ def PMUEVTCFG: Rs<52, "pmuevtcfg", ["s52"]>, DwarfRegNum<[196]>;
+ def PMUCFG: Rs<53, "pmucfg", ["s53"]>, DwarfRegNum<[197]>;
+ def S54: Rs<54, "s54">, DwarfRegNum<[198]>;
+ def S55: Rs<55, "s55">, DwarfRegNum<[199]>;
+ def S56: Rs<56, "s56">, DwarfRegNum<[200]>;
+ def S57: Rs<57, "s57">, DwarfRegNum<[201]>;
+ def S58: Rs<58, "s58">, DwarfRegNum<[202]>;
+ def S59: Rs<59, "s59">, DwarfRegNum<[203]>;
+ def S60: Rs<60, "s60">, DwarfRegNum<[204]>;
+ def S61: Rs<61, "s61">, DwarfRegNum<[205]>;
+ def S62: Rs<62, "s62">, DwarfRegNum<[206]>;
+ def S63: Rs<63, "s63">, DwarfRegNum<[207]>;
+ def S64: Rs<64, "s64">, DwarfRegNum<[208]>;
+ def S65: Rs<65, "s65">, DwarfRegNum<[209]>;
+ def S66: Rs<66, "s66">, DwarfRegNum<[210]>;
+ def S67: Rs<67, "s67">, DwarfRegNum<[211]>;
+ def S68: Rs<68, "s68">, DwarfRegNum<[212]>;
+ def S69: Rs<69, "s69">, DwarfRegNum<[213]>;
+ def S70: Rs<70, "s70">, DwarfRegNum<[214]>;
+ def S71: Rs<71, "s71">, DwarfRegNum<[215]>;
+ def S72: Rs<72, "s72">, DwarfRegNum<[216]>;
+ def S73: Rs<73, "s73">, DwarfRegNum<[217]>;
+ def S74: Rs<74, "s74">, DwarfRegNum<[218]>;
+ def S75: Rs<75, "s75">, DwarfRegNum<[219]>;
+ def S76: Rs<76, "s76">, DwarfRegNum<[220]>;
+ def S77: Rs<77, "s77">, DwarfRegNum<[221]>;
+ def S78: Rs<78, "s78">, DwarfRegNum<[222]>;
+ def S79: Rs<79, "s79">, DwarfRegNum<[223]>;
+ def S80: Rs<80, "s80">, DwarfRegNum<[224]>;
+
+ // System Register Pair
+ let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
+ def SGP1_0 : Rss<0, "s1:0", [SGP0, SGP1], ["sgp1:0"]>, DwarfRegNum<[144]>;
+ def S3_2 : Rss<2, "s3:2", [STID, ELR]>, DwarfRegNum<[146]>;
+ def S5_4 : Rss<4, "s5:4", [BADVA0, BADVA1], ["badva1:0"]>,
+ DwarfRegNum<[148]>;
+ def S7_6 : Rss<6, "s7:6", [SSR, CCR], ["ccr:ssr"]>, DwarfRegNum<[150]>;
+ def S9_8 : Rss<8, "s9:8", [HTID, BADVA]>, DwarfRegNum<[152]>;
+ def S11_10 : Rss<10, "s11:10", [IMASK, S11]>, DwarfRegNum<[154]>;
+ def S13_12 : Rss<12, "s13:12", [S12, S13]>, DwarfRegNum<[156]>;
+ def S15_14 : Rss<14, "s15:14", [S14, S15]>, DwarfRegNum<[158]>;
+ def S17_16 : Rss<16, "s17:16", [EVB, MODECTL]>, DwarfRegNum<[160]>;
+ def S19_18 : Rss<18, "s19:18", [SYSCFG, S19]>, DwarfRegNum<[162]>;
+ def S21_20 : Rss<20, "s21:20", [S20, VID]>, DwarfRegNum<[164]>;
+ def S23_22 : Rss<22, "s23:22", [S22, S23]>, DwarfRegNum<[166]>;
+ def S25_24 : Rss<24, "s25:24", [S24, S25]>, DwarfRegNum<[168]>;
+ def S27_26 : Rss<26, "s27:26", [S26, CFGBASE]>, DwarfRegNum<[170]>;
+ def S29_28 : Rss<28, "s29:28", [DIAG, REV]>, DwarfRegNum<[172]>;
+ def S31_30 : Rss<30, "s31:30", [PCYCLELO, PCYCLEHI], ["pcycle"]>, DwarfRegNum<[174]>;
+ def S33_32 : Rss<32, "s33:32", [ISDBST, ISDBCFG0]>, DwarfRegNum<[176]>;
+ def S35_34 : Rss<34, "s35:34", [ISDBCFG1, S35]>, DwarfRegNum<[178]>;
+ def S37_36 : Rss<36, "s37:36", [BRKPTPC0, BRKPTCFG0]>, DwarfRegNum<[180]>;
+ def S39_38 : Rss<38, "s39:38", [BRKPTPC1, BRKPTCFG1]>, DwarfRegNum<[182]>;
+ def S41_40 : Rss<40, "s41:40", [ISDBMBXIN, ISDBMBXOUT]>, DwarfRegNum<[184]>;
+ def S43_42 : Rss<42, "s43:42", [ISDBEN, ISDBGPR]>, DwarfRegNum<[186]>;
+ def S45_44 : Rss<44, "s45:44", [S44, S45]>, DwarfRegNum<[188]>;
+ def S47_46 : Rss<46, "s47:46", [S46, S47]>, DwarfRegNum<[190]>;
+ def S49_48 : Rss<48, "s49:48", [PMUCNT0, PMUCNT1]>, DwarfRegNum<[192]>;
+ def S51_50 : Rss<50, "s51:50", [PMUCNT2, PMUCNT3]>, DwarfRegNum<[194]>;
+ def S53_52 : Rss<52, "s53:52", [PMUEVTCFG, PMUCFG]>, DwarfRegNum<[196]>;
+ def S55_54 : Rss<54, "s55:54", [S54, S55]>, DwarfRegNum<[198]>;
+ def S57_56 : Rss<56, "s57:56", [S56, S57]>, DwarfRegNum<[200]>;
+ def S59_58 : Rss<58, "s59:58", [S58, S59]>, DwarfRegNum<[202]>;
+ def S61_60 : Rss<60, "s61:60", [S60, S61]>, DwarfRegNum<[204]>;
+ def S63_62 : Rss<62, "s63:62", [S62, S63]>, DwarfRegNum<[206]>;
+ def S65_64 : Rss<64, "s65:64", [S64, S65]>, DwarfRegNum<[208]>;
+ def S67_66 : Rss<66, "s67:66", [S66, S67]>, DwarfRegNum<[210]>;
+ def S69_68 : Rss<68, "s69:68", [S68, S69]>, DwarfRegNum<[212]>;
+ def S71_70 : Rss<70, "s71:70", [S70, S71]>, DwarfRegNum<[214]>;
+ def S73_72 : Rss<72, "s73:72", [S72, S73]>, DwarfRegNum<[216]>;
+ def S75_74 : Rss<74, "s75:74", [S74, S75]>, DwarfRegNum<[218]>;
+ def S77_76 : Rss<76, "s77:76", [S77, S76]>, DwarfRegNum<[219]>;
+ def S79_78 : Rss<78, "s79:78", [S79, S78]>, DwarfRegNum<[220]>;
+ }
+
// Guest Registers
def GELR: Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>;
def GSR: Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>;
@@ -432,6 +585,40 @@ def GuestRegs64 : RegisterClass<"Hexagon", [i64], 64,
G25_24, G27_26, G29_28,
G31_30)>;
+let Size = 32, isAllocatable = 0 in
+def SysRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add SGP0, SGP1, STID, ELR, BADVA0, BADVA1,
+ SSR, CCR, HTID, BADVA, IMASK,
+ S11, S12, S13, S14, S15,
+ S19, S23, S25,
+ EVB, MODECTL, SYSCFG, S20, VID, S22, S24,
+ S26, CFGBASE, DIAG, REV, PCYCLEHI,
+ PCYCLELO, ISDBST, ISDBCFG0, ISDBCFG1, S35,
+ BRKPTPC0, BRKPTCFG0, BRKPTPC1, BRKPTCFG1,
+ ISDBMBXIN, ISDBMBXOUT, ISDBEN, ISDBGPR,
+ S44, S45, S46, S47,
+ PMUCNT0, PMUCNT1, PMUCNT2, PMUCNT3,
+ PMUEVTCFG, PMUCFG, S54, S55, S56, S57,
+ S58, S59, S60, S61, S62, S63, S64, S65, S66, S67,
+ S68, S69, S70, S71, S72, S73, S74, S75, S76, S77,
+ S78, S79, S80
+ )>;
+
+let Size = 64, isAllocatable = 0 in
+def SysRegs64 : RegisterClass<"Hexagon", [i64], 64,
+ (add SGP1_0,
+ S3_2, S5_4, S7_6, S9_8,
+ S11_10, S13_12, S15_14,
+ S17_16, S19_18, S21_20,
+ S23_22, S25_24,
+ S27_26, S29_28, S31_30, S33_32, S35_34,
+ S37_36, S39_38, S41_40, S43_42, S45_44,
+ S47_46, S49_48, S51_50, S53_52,
+ S55_54, S57_56, S59_58,
+ S61_60, S63_62, S65_64, S67_66, S69_68,
+ S71_70, S73_72, S75_74, S77_76, S79_78
+ )>;
+
// These registers are new for v62 and onward.
// The function RegisterMatchesArch() uses this list for validation.
let isAllocatable = 0 in
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index f9fb14c190ff..4890c3dbb7bc 100644
--- a/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -70,9 +70,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks
for (MachineBasicBlock &B : Fn) {
- for (auto I = B.begin(), E = B.end(); I != E; ) {
- MachineInstr &MI = *I;
- ++I;
+ for (MachineInstr &MI : llvm::make_early_inc_range(B)) {
unsigned Opc = MI.getOpcode();
if (Opc == Hexagon::CONST32) {
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 52452e9c6cd5..9a0f57fce97d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -508,7 +508,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
while (CmpI->getOpcode() == Hexagon::C2_not)
CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
- int Mask = 0, Val = 0;
+ int64_t Mask = 0, Val = 0;
bool OkCI = TII->analyzeCompare(*CmpI, CmpR1, CmpR2, Mask, Val);
if (!OkCI)
return;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 3cbb4b591f8c..66de698182d7 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -23,9 +23,9 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
@@ -238,9 +238,9 @@ const HexagonSubtarget *
HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
AttributeList FnAttrs = F.getAttributes();
Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-cpu");
+ FnAttrs.getFnAttr("target-cpu");
Attribute FSAttr =
- FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-features");
+ FnAttrs.getFnAttr("target-features");
std::string CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
@@ -280,11 +280,11 @@ void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerLateLoopOptimizationsEPCallback(
- [=](LoopPassManager &LPM, PassBuilder::OptimizationLevel Level) {
+ [=](LoopPassManager &LPM, OptimizationLevel Level) {
LPM.addPass(HexagonLoopIdiomRecognitionPass());
});
PB.registerLoopOptimizerEndEPCallback(
- [=](LoopPassManager &LPM, PassBuilder::OptimizationLevel Level) {
+ [=](LoopPassManager &LPM, OptimizationLevel Level) {
LPM.addPass(HexagonVectorLoopCarriedReusePass());
});
}
@@ -447,11 +447,11 @@ void HexagonPassConfig::addPreEmitPass() {
}
// Packetization is mandatory: it handles gather/scatter at all opt levels.
- addPass(createHexagonPacketizer(NoOpt), false);
+ addPass(createHexagonPacketizer(NoOpt));
if (EnableVectorPrint)
- addPass(createHexagonVectorPrint(), false);
+ addPass(createHexagonVectorPrint());
// Add CFI instructions if necessary.
- addPass(createHexagonCallFrameInformation(), false);
+ addPass(createHexagonCallFrameInformation());
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 25466786ee41..7df32e4072e3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -90,9 +90,8 @@ static bool isSmallDataSection(StringRef Sec) {
return true;
// If either ".sdata." or ".sbss." is a substring of the section name
// then put the symbol in small data.
- return Sec.find(".sdata.") != StringRef::npos ||
- Sec.find(".sbss.") != StringRef::npos ||
- Sec.find(".scommon.") != StringRef::npos;
+ return Sec.contains(".sdata.") || Sec.contains(".sbss.") ||
+ Sec.contains(".scommon.");
}
static const char *getSectionSuffixForSize(unsigned Size) {
@@ -178,10 +177,10 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
if (GO->hasSection()) {
StringRef Section = GO->getSection();
- if (Section.find(".access.text.group") != StringRef::npos)
+ if (Section.contains(".access.text.group"))
return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
- if (Section.find(".access.data.group") != StringRef::npos)
+ if (Section.contains(".access.data.group"))
return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS,
ELF::SHF_WRITE | ELF::SHF_ALLOC);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
index a5b14a7e0764..a99aa4f16a08 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -16,6 +16,7 @@ class HexagonTargetStreamer : public MCTargetStreamer {
public:
HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
virtual void emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0){};
virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){};
virtual void emitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 108027d79754..1bdd8c3c513a 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -64,7 +64,8 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
// The Hexagon target can unroll loops with run-time trip counts.
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
UP.Runtime = UP.Partial = true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 2144fb27eb67..9e637dfc3e16 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -61,7 +61,8 @@ public:
// The Hexagon target can unroll loops with run-time trip counts.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -120,10 +121,9 @@ public:
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index, Type *SubTp);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
@@ -133,16 +133,14 @@ public:
const Instruction *I);
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index fa1ba4f2e469..1d325553f45a 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -203,6 +203,10 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+ // FIXME: This pass causes verification failures.
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
+
auto &HST = MF.getSubtarget<HexagonSubtarget>();
HII = HST.getInstrInfo();
HRI = HST.getRegisterInfo();
@@ -230,16 +234,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
for (MachineBasicBlock &MB : MF) {
- auto End = MB.end();
- auto MI = MB.begin();
- while (MI != End) {
- auto NextI = std::next(MI);
- if (MI->isKill()) {
- MB.erase(MI);
- End = MB.end();
- }
- MI = NextI;
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(MB))
+ if (MI.isKill())
+ MB.erase(&MI);
}
// TinyCore with Duplexes: Translate to big-instructions.
@@ -1156,12 +1153,9 @@ bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI,
void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
for (auto &B : MF) {
MachineBasicBlock::iterator BundleIt;
- MachineBasicBlock::instr_iterator NextI;
- for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
- NextI = std::next(I);
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::make_early_inc_range(B.instrs())) {
if (MI.isBundle())
- BundleIt = I;
+ BundleIt = MI.getIterator();
if (!MI.isInsideBundle())
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index f949a9327f7a..897fb209a8bf 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -82,6 +82,7 @@ public:
int getSizeOf(const Value *Val) const;
int getSizeOf(const Type *Ty) const;
+ int getAllocSizeOf(const Type *Ty) const;
int getTypeAlignment(Type *Ty) const;
VectorType *getByteVectorTy(int ScLen) const;
@@ -443,8 +444,8 @@ auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
auto *PtrTy = cast<PointerType>(Ptr->getType());
if (!PtrTy->isOpaque()) {
Type *ElemTy = PtrTy->getElementType();
- int ElemSize = HVC.getSizeOf(ElemTy);
- if (Adjust % ElemSize == 0) {
+ int ElemSize = HVC.getAllocSizeOf(ElemTy);
+ if (Adjust % ElemSize == 0 && Adjust != 0) {
Value *Tmp0 =
Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
@@ -979,6 +980,10 @@ auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
}
+auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int {
+ return DL.getTypeAllocSize(const_cast<Type *>(Ty)).getFixedValue();
+}
+
auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
// The actual type may be shorter than the HVX vector, so determine
// the alignment based on subtarget info.
@@ -1326,7 +1331,7 @@ auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
return None;
Builder B(Gep0->getParent());
- int Scale = DL.getTypeStoreSize(Gep0->getSourceElementType());
+ int Scale = getAllocSizeOf(Gep0->getSourceElementType());
// FIXME: for now only check GEPs with a single index.
if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
@@ -1343,7 +1348,7 @@ auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
- if (Unknown.isAllOnesValue())
+ if (Unknown.isAllOnes())
return None;
Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 310536458de9..f973862a0c9b 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -386,8 +386,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
<< " can be reused\n");
SmallVector<Instruction *, 4> PNUsers;
- for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
- Use &U = *UI;
+ for (Use &U : PN->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (User->getParent() != BB)
@@ -415,9 +414,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
// rematerialized in OtherBB, we may find more such "fixup" opportunities
// in this block. So, we'll start over again.
for (Instruction *I : PNUsers) {
- for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
- ++UI) {
- Use &U = *UI;
+ for (Use &U : BEInst->uses()) {
Instruction *BEUser = cast<Instruction>(U.getUser());
if (BEUser->getParent() != BB)
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 627c53cadd84..5e5a26fea076 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include <sstream>
@@ -686,10 +686,11 @@ public:
assert(Update && "Didn't find relaxation target");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
- static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP.
- ParseIn = 0x00004000, // In packet parse-bits.
- ParseEnd = 0x0000c000; // End of packet parse-bits.
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
+ static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP.
+ ParseIn = 0x00004000, // In packet parse-bits.
+ ParseEnd = 0x0000c000; // End of packet parse-bits.
while (Count % HEXAGON_INSTR_SIZE) {
LLVM_DEBUG(dbgs() << "Alignment not a multiple of the instruction size:"
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 24169c83bdb9..33b2e9a9e302 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -448,13 +448,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(const MCInst &MI,
++MCNumEmitted;
}
-LLVM_ATTRIBUTE_NORETURN
-static void raise_relocation_error(unsigned Width, unsigned Kind) {
+[[noreturn]] static void raise_relocation_error(unsigned Width, unsigned Kind) {
std::string Text;
raw_string_ostream Stream(Text);
Stream << "Unrecognized relocation combination: width=" << Width
<< " kind=" << Kind;
- report_fatal_error(Stream.str());
+ report_fatal_error(Twine(Stream.str()));
}
/// Some insns are not extended and thus have no bits. These cases require
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 32b0c610d63d..d832a756cb92 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -10,13 +10,13 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "HexagonArch.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCAsmInfo.h"
#include "MCTargetDesc/HexagonMCELFStreamer.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -32,8 +32,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 48770be3e301..ef9f9fd337fa 100644
--- a/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/HexagonTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheHexagonTarget() {
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index e2642ddf722b..a994bd7e57a4 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -24,11 +24,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index b6f372657d59..57343784237d 100644
--- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -19,17 +19,13 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
-namespace llvm {
-Target &getTheLanaiTarget();
-}
-
static MCDisassembler *createLanaiDisassembler(const Target & /*T*/,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
diff --git a/llvm/lib/Target/Lanai/LanaiAluCode.h b/llvm/lib/Target/Lanai/LanaiAluCode.h
index 728332bff00b..69be05542723 100644
--- a/llvm/lib/Target/Lanai/LanaiAluCode.h
+++ b/llvm/lib/Target/Lanai/LanaiAluCode.h
@@ -70,7 +70,7 @@ inline static unsigned makePostOp(unsigned AluOp) {
}
inline static bool modifiesOp(unsigned AluOp) {
- return isPreOp(AluOp) | isPostOp(AluOp);
+ return isPreOp(AluOp) || isPostOp(AluOp);
}
inline static const char *lanaiAluCodeToString(unsigned AluOp) {
diff --git a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 6bac7c75853d..c0b7fd3fdd5d 100644
--- a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/LanaiInstPrinter.h"
#include "LanaiAluCode.h"
#include "LanaiCondCode.h"
#include "LanaiInstrInfo.h"
#include "LanaiMCInstLower.h"
#include "LanaiTargetMachine.h"
+#include "MCTargetDesc/LanaiInstPrinter.h"
#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,7 +32,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "asm-printer"
diff --git a/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index aadcdc43f560..45af250b1410 100644
--- a/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -287,14 +287,14 @@ void LanaiDAGToDAGISel::Select(SDNode *Node) {
ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
// Materialize zero constants as copies from R0. This allows the coalescer
// to propagate these into other instructions.
- if (ConstNode->isNullValue()) {
+ if (ConstNode->isZero()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(Node), Lanai::R0, MVT::i32);
return ReplaceNode(Node, New.getNode());
}
// Materialize all ones constants as copies from R1. This allows the
// coalescer to propagate these into other instructions.
- if (ConstNode->isAllOnesValue()) {
+ if (ConstNode->isAllOnes()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(Node), Lanai::R1, MVT::i32);
return ReplaceNode(Node, New.getNode());
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index b96e178109d0..0d9e63c112fb 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -486,7 +486,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
llvm_unreachable("unhandled argument type");
}
} else {
- // Sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8;
@@ -530,6 +530,15 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
return Chain;
}
+bool LanaiTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+
+ return CCInfo.CheckReturn(Outs, RetCC_Lanai32);
+}
+
SDValue
LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -1167,7 +1176,7 @@ SDValue LanaiTargetLowering::LowerGlobalAddress(SDValue Op,
// If the code model is small or global variable will be placed in the small
// section, then assume address will fit in 21-bits.
- const GlobalObject *GO = GV->getBaseObject();
+ const GlobalObject *GO = GV->getAliaseeObject();
if (TLOF->isGlobalInSmallSection(GO, getTargetMachine())) {
SDValue Small = DAG.getTargetGlobalAddress(
GV, DL, getPointerTy(DAG.getDataLayout()), Offset, LanaiII::MO_NO_FLAG);
@@ -1391,8 +1400,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC,
// value is 0.
OtherOp = DAG.getConstant(0, dl, VT);
else
- OtherOp =
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
+ OtherOp = DAG.getAllOnesConstant(dl, VT);
return true;
}
}
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.h b/llvm/lib/Target/Lanai/LanaiISelLowering.h
index d29d69eaadb0..2f58560f4efe 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.h
@@ -90,6 +90,11 @@ public:
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
std::pair<unsigned, const TargetRegisterClass *>
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index c82142970357..21d035c7ee9c 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -175,8 +175,8 @@ LanaiInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
}
bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
switch (MI.getOpcode()) {
default:
break;
@@ -203,7 +203,7 @@ bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
// * SFSUB_F_RR can be made redundant by SUB_RI if the operands are the same.
// * SFSUB_F_RI can be made redundant by SUB_I if the operands are the same.
inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
- unsigned SrcReg2, int ImmValue,
+ unsigned SrcReg2, int64_t ImmValue,
MachineInstr *OI) {
if (CmpI->getOpcode() == Lanai::SFSUB_F_RR &&
OI->getOpcode() == Lanai::SUB_R &&
@@ -281,8 +281,9 @@ inline static unsigned flagSettingOpcodeVariant(unsigned OldOpcode) {
}
bool LanaiInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int /*CmpMask*/,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2,
+ int64_t /*CmpMask*/, int64_t CmpValue,
+ const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI)
@@ -418,10 +419,8 @@ bool LanaiInstrInfo::optimizeCompareInstr(
// live-out. If it is live-out, do not optimize.
if (!isSafe) {
MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI)
- if ((*SI)->isLiveIn(Lanai::SR))
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ if (Succ->isLiveIn(Lanai::SR))
return false;
}
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 44c1e629a8e6..5eef4474801d 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -96,14 +96,14 @@ public:
// SrcReg2 if having two register operands, and the value it compares against
// in CmpValue. Return true if the comparison instruction can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
// See if the comparison instruction can be converted into something more
// efficient. E.g., on Lanai register-register instructions can set the flag
// register, obviating the need for a separate compare.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
// Analyze the given select instruction, returning true if it cannot be
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.td b/llvm/lib/Target/Lanai/LanaiInstrInfo.td
index fcf89a0b52f6..d1fd327722ef 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.td
@@ -269,7 +269,7 @@ def splsIdempotent : InstrMapping {
// -------------------------------------------------- //
// ALU instructions
// -------------------------------------------------- //
-multiclass ALUbase<bits<3> subOp, string AsmStr, SDNode OpNode,
+multiclass ALUbase<bits<3> subOp, string AsmStr,
PatLeaf LoExt, PatLeaf HiExt,
list<dag> loPattern, list<dag> hiPattern> {
// Register Immediate
@@ -286,7 +286,7 @@ multiclass ALUbase<bits<3> subOp, string AsmStr, SDNode OpNode,
multiclass ALUarith<bits<3> subOp, string AsmStr, SDNode OpNode,
PatLeaf LoExt, PatLeaf HiExt> {
- defm I_ : ALUbase<subOp, AsmStr, OpNode, LoExt, HiExt, [], []>;
+ defm I_ : ALUbase<subOp, AsmStr, LoExt, HiExt, [], []>;
// Register Register
let JJJJJ = 0 in
@@ -297,7 +297,7 @@ multiclass ALUarith<bits<3> subOp, string AsmStr, SDNode OpNode,
multiclass ALUlogic<bits<3> subOp, string AsmStr, SDNode OpNode,
PatLeaf LoExt, PatLeaf HiExt> {
- defm I_ : ALUbase<subOp, AsmStr, OpNode, LoExt, HiExt,
+ defm I_ : ALUbase<subOp, AsmStr, LoExt, HiExt,
[(set GPR:$Rd, (OpNode GPR:$Rs1, LoExt:$imm16))],
[(set GPR:$Rd, (OpNode GPR:$Rs1, HiExt:$imm16))]>;
diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index a31f59214ec7..70b6fd2c185d 100644
--- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
index f1fcbe4f418a..19a3bf4455ad 100644
--- a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -52,6 +52,16 @@ public:
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return
+ // TCC_Free here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented
+ // yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
if (Imm == 0)
return TTI::TCC_Free;
if (isInt<16>(Imm.getSExtValue()))
@@ -81,8 +91,7 @@ public:
}
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index a17afe5e62f6..3c2a3ac69224 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -69,10 +69,12 @@ public:
return Lanai::NumTargetFixupKinds;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
-bool LanaiAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool LanaiAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if ((Count % 4) != 0)
return false;
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index e850b98de806..eb6bf8d3836c 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -22,8 +22,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cstdint>
#include <string>
@@ -97,6 +97,9 @@ public:
uint64_t &Target) const override {
if (Inst.getNumOperands() == 0)
return false;
+ if (!isConditionalBranch(Inst) && !isUnconditionalBranch(Inst) &&
+ !isCall(Inst))
+ return false;
if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType ==
MCOI::OPERAND_PCREL) {
diff --git a/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp b/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
index 2bb9f6ed1e97..5c63df670938 100644
--- a/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
+++ b/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/LanaiTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index d8465f6d682b..4db879c34ad9 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -14,7 +14,7 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include <sstream>
@@ -52,6 +52,7 @@ class M68kAsmParser : public MCTargetAsmParser {
bool isExpr();
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseMemOp(OperandVector &Operands);
+ OperandMatchResultTy parseRegOrMoveMask(OperandVector &Operands);
public:
M68kAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -80,6 +81,7 @@ public:
struct M68kMemOp {
enum class Kind {
Addr,
+ RegMask,
Reg,
RegIndirect,
RegPostIncrement,
@@ -90,6 +92,7 @@ struct M68kMemOp {
// These variables are used for the following forms:
// Addr: (OuterDisp)
+ // RegMask: RegMask (as register mask)
// Reg: %OuterReg
// RegIndirect: (%OuterReg)
// RegPostIncrement: (%OuterReg)+
@@ -106,6 +109,7 @@ struct M68kMemOp {
uint8_t Size : 4;
uint8_t Scale : 4;
const MCExpr *Expr;
+ uint16_t RegMask;
M68kMemOp() {}
M68kMemOp(Kind Op) : Op(Op) {}
@@ -117,14 +121,14 @@ struct M68kMemOp {
class M68kOperand : public MCParsedAsmOperand {
typedef MCParsedAsmOperand Base;
- enum class Kind {
+ enum class KindTy {
Invalid,
Token,
Imm,
MemOp,
};
- Kind Kind;
+ KindTy Kind;
SMLoc Start, End;
union {
StringRef Token;
@@ -133,8 +137,10 @@ class M68kOperand : public MCParsedAsmOperand {
M68kMemOp MemOp;
};
+ template <unsigned N> bool isAddrN() const;
+
public:
- M68kOperand(enum Kind Kind, SMLoc Start, SMLoc End)
+ M68kOperand(KindTy Kind, SMLoc Start, SMLoc End)
: Base(), Kind(Kind), Start(Start), End(End) {}
SMLoc getStartLoc() const override { return Start; }
@@ -143,12 +149,14 @@ public:
void print(raw_ostream &OS) const override;
bool isMem() const override { return false; }
- bool isMemOp() const { return Kind == Kind::MemOp; }
+ bool isMemOp() const { return Kind == KindTy::MemOp; }
static void addExpr(MCInst &Inst, const MCExpr *Expr);
// Reg
bool isReg() const override;
+ bool isAReg() const;
+ bool isDReg() const;
unsigned getReg() const override;
void addRegOperands(MCInst &Inst, unsigned N) const;
@@ -168,8 +176,15 @@ public:
static std::unique_ptr<M68kOperand> createImm(const MCExpr *Expr, SMLoc Start,
SMLoc End);
+ // MoveMask
+ bool isMoveMask() const;
+ void addMoveMaskOperands(MCInst &Inst, unsigned N) const;
+
// Addr
bool isAddr() const;
+ bool isAddr8() const { return isAddrN<8>(); }
+ bool isAddr16() const { return isAddrN<16>(); }
+ bool isAddr32() const { return isAddrN<32>(); }
void addAddrOperands(MCInst &Inst, unsigned N) const;
// ARI
@@ -210,11 +225,45 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kAsmParser() {
#define GET_MATCHER_IMPLEMENTATION
#include "M68kGenAsmMatcher.inc"
+static inline unsigned getRegisterByIndex(unsigned RegisterIndex) {
+ static unsigned RegistersByIndex[] = {
+ M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
+ M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
+ M68k::A4, M68k::A5, M68k::A6, M68k::SP,
+ };
+ assert(RegisterIndex <=
+ sizeof(RegistersByIndex) / sizeof(RegistersByIndex[0]));
+ return RegistersByIndex[RegisterIndex];
+}
+
+static inline unsigned getRegisterIndex(unsigned Register) {
+ if (Register >= M68k::D0 && Register <= M68k::D7)
+ return Register - M68k::D0;
+ if (Register >= M68k::A0 && Register <= M68k::A6)
+ return Register - M68k::A0 + 8;
+
+ switch (Register) {
+ case M68k::SP:
+ // SP is sadly not contiguous with the rest of the An registers
+ return 15;
+
+ case M68k::PC:
+ case M68k::CCR:
+ return 16;
+
+ default:
+ llvm_unreachable("unexpected register number");
+ }
+}
+
void M68kMemOp::print(raw_ostream &OS) const {
switch (Op) {
case Kind::Addr:
OS << OuterDisp;
break;
+ case Kind::RegMask:
+ OS << "RegMask(" << format("%04x", RegMask) << ")";
+ break;
case Kind::Reg:
OS << '%' << OuterReg;
break;
@@ -248,7 +297,7 @@ void M68kOperand::addExpr(MCInst &Inst, const MCExpr *Expr) {
// Reg
bool M68kOperand::isReg() const {
- return Kind == Kind::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
+ return Kind == KindTy::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
}
unsigned M68kOperand::getReg() const {
@@ -265,13 +314,13 @@ void M68kOperand::addRegOperands(MCInst &Inst, unsigned N) const {
std::unique_ptr<M68kOperand> M68kOperand::createMemOp(M68kMemOp MemOp,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::MemOp, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::MemOp, Start, End);
Op->MemOp = MemOp;
return Op;
}
// Token
-bool M68kOperand::isToken() const { return Kind == Kind::Token; }
+bool M68kOperand::isToken() const { return Kind == KindTy::Token; }
StringRef M68kOperand::getToken() const {
assert(isToken());
return Token;
@@ -279,15 +328,15 @@ StringRef M68kOperand::getToken() const {
std::unique_ptr<M68kOperand> M68kOperand::createToken(StringRef Token,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::Token, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::Token, Start, End);
Op->Token = Token;
return Op;
}
// Imm
-bool M68kOperand::isImm() const { return Kind == Kind::Imm; }
+bool M68kOperand::isImm() const { return Kind == KindTy::Imm; }
void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
- assert(isImm() && "wrong oeprand kind");
+ assert(isImm() && "wrong operand kind");
assert((N == 1) && "can only handle one register operand");
M68kOperand::addExpr(Inst, Expr);
@@ -295,15 +344,53 @@ void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::Imm, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::Imm, Start, End);
Op->Expr = Expr;
return Op;
}
+// MoveMask
+bool M68kOperand::isMoveMask() const {
+ if (!isMemOp())
+ return false;
+
+ if (MemOp.Op == M68kMemOp::Kind::RegMask)
+ return true;
+
+ if (MemOp.Op != M68kMemOp::Kind::Reg)
+ return false;
+
+ // Only regular address / data registers are allowed to be used
+ // in register masks.
+ return getRegisterIndex(MemOp.OuterReg) < 16;
+}
+
+void M68kOperand::addMoveMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(isMoveMask() && "wrong operand kind");
+ assert((N == 1) && "can only handle one immediate operand");
+
+ uint16_t MoveMask = MemOp.RegMask;
+ if (MemOp.Op == M68kMemOp::Kind::Reg)
+ MoveMask = 1 << getRegisterIndex(MemOp.OuterReg);
+
+ Inst.addOperand(MCOperand::createImm(MoveMask));
+}
+
// Addr
bool M68kOperand::isAddr() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::Addr;
}
+// TODO: Maybe we can also store the size of OuterDisp
+// in Size?
+template <unsigned N> bool M68kOperand::isAddrN() const {
+ if (isAddr()) {
+ int64_t Res;
+ if (MemOp.OuterDisp->evaluateAsAbsolute(Res))
+ return isInt<N>(Res);
+ return true;
+ }
+ return false;
+}
void M68kOperand::addAddrOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
}
@@ -412,6 +499,18 @@ static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
}
}
+bool M68kOperand::isAReg() const {
+ return isReg() && checkRegisterClass(getReg(),
+ /*Data=*/false,
+ /*Address=*/true, /*SP=*/true);
+}
+
+bool M68kOperand::isDReg() const {
+ return isReg() && checkRegisterClass(getReg(),
+ /*Data=*/true,
+ /*Address=*/false, /*SP=*/false);
+}
+
unsigned M68kAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
M68kOperand &Operand = (M68kOperand &)Op;
@@ -487,11 +586,6 @@ bool M68kAsmParser::parseRegisterName(unsigned &RegNo, SMLoc Loc,
// Parse simple general-purpose registers.
if (RegisterNameLower.size() == 2) {
- static unsigned RegistersByIndex[] = {
- M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
- M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
- M68k::A4, M68k::A5, M68k::A6, M68k::SP,
- };
switch (RegisterNameLower[0]) {
case 'd':
@@ -500,7 +594,7 @@ bool M68kAsmParser::parseRegisterName(unsigned &RegNo, SMLoc Loc,
unsigned IndexOffset = (RegisterNameLower[0] == 'a') ? 8 : 0;
unsigned RegIndex = (unsigned)(RegisterNameLower[1] - '0');
if (RegIndex < 8) {
- RegNo = RegistersByIndex[IndexOffset + RegIndex];
+ RegNo = getRegisterByIndex(IndexOffset + RegIndex);
return true;
}
}
@@ -616,16 +710,9 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
bool IsPD = false;
M68kMemOp MemOp;
- // Check for a plain register.
- auto Result = parseRegister(MemOp.OuterReg);
- if (Result == MatchOperand_Success) {
- MemOp.Op = M68kMemOp::Kind::Reg;
- Operands.push_back(
- M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
- return MatchOperand_Success;
- }
-
- if (Result == MatchOperand_ParseFail) {
+ // Check for a plain register or register mask.
+ auto Result = parseRegOrMoveMask(Operands);
+ if (Result != llvm::MatchOperand_NoMatch) {
return Result;
}
@@ -743,6 +830,87 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy
+M68kAsmParser::parseRegOrMoveMask(OperandVector &Operands) {
+ SMLoc Start = getLexer().getLoc();
+ M68kMemOp MemOp(M68kMemOp::Kind::RegMask);
+ MemOp.RegMask = 0;
+
+ for (;;) {
+ bool IsFirstRegister =
+ (MemOp.Op == M68kMemOp::Kind::RegMask) && (MemOp.RegMask == 0);
+
+ unsigned FirstRegister;
+ auto Result = parseRegister(FirstRegister);
+ if (IsFirstRegister && (Result == llvm::MatchOperand_NoMatch)) {
+ return MatchOperand_NoMatch;
+ }
+ if (Result != llvm::MatchOperand_Success) {
+ Error(getLexer().getLoc(), "expected start register");
+ return MatchOperand_ParseFail;
+ }
+
+ unsigned LastRegister = FirstRegister;
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex();
+ Result = parseRegister(LastRegister);
+ if (Result != llvm::MatchOperand_Success) {
+ Error(getLexer().getLoc(), "expected end register");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ unsigned FirstRegisterIndex = getRegisterIndex(FirstRegister);
+ unsigned LastRegisterIndex = getRegisterIndex(LastRegister);
+
+ uint16_t NumNewBits = LastRegisterIndex - FirstRegisterIndex + 1;
+ uint16_t NewMaskBits = ((1 << NumNewBits) - 1) << FirstRegisterIndex;
+
+ if (IsFirstRegister && (FirstRegister == LastRegister)) {
+ // First register range is a single register, simplify to just Reg
+ // so that it matches more operands.
+ MemOp.Op = M68kMemOp::Kind::Reg;
+ MemOp.OuterReg = FirstRegister;
+ } else {
+ if (MemOp.Op == M68kMemOp::Kind::Reg) {
+ // This is the second register being specified - expand the Reg operand
+ // into a mask first.
+ MemOp.Op = M68kMemOp::Kind::RegMask;
+ MemOp.RegMask = 1 << getRegisterIndex(MemOp.OuterReg);
+
+ if (MemOp.RegMask == 0) {
+ Error(getLexer().getLoc(),
+ "special registers cannot be used in register masks");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ if ((FirstRegisterIndex >= 16) || (LastRegisterIndex >= 16)) {
+ Error(getLexer().getLoc(),
+ "special registers cannot be used in register masks");
+ return MatchOperand_ParseFail;
+ }
+
+ if (NewMaskBits & MemOp.RegMask) {
+ Error(getLexer().getLoc(), "conflicting masked registers");
+ return MatchOperand_ParseFail;
+ }
+
+ MemOp.RegMask |= NewMaskBits;
+ }
+
+ if (getLexer().isNot(AsmToken::Slash)) {
+ break;
+ }
+
+ getLexer().Lex();
+ }
+
+ Operands.push_back(
+ M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
+ return MatchOperand_Success;
+}
+
void M68kAsmParser::eatComma() {
if (Parser.getTok().is(AsmToken::Comma)) {
Parser.Lex();
@@ -842,19 +1010,19 @@ bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
void M68kOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case Kind::Invalid:
+ case KindTy::Invalid:
OS << "invalid";
break;
- case Kind::Token:
+ case KindTy::Token:
OS << "token '" << Token << "'";
break;
- case Kind::Imm:
+ case KindTy::Imm:
OS << "immediate " << Imm;
break;
- case Kind::MemOp:
+ case KindTy::MemOp:
MemOp.print(OS);
break;
}
diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index a8453c838493..a08ffa787095 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -451,7 +451,8 @@ void M68kDisassembler::decodeImm(MCInst &Instr, unsigned Bead,
llvm_unreachable("invalid imm");
}
- Scratch = (Scratch << NumToRead) | Reader.readBits(NumToRead);
+ Scratch = (NumToRead < 32) ? (Scratch << NumToRead) : 0;
+ Scratch |= Reader.readBits(NumToRead);
}
DecodeStatus M68kDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
index c5931cbfe04f..9cd959012e6f 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
@@ -33,7 +33,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -110,7 +110,7 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void M68kIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
index 9e0d462db677..47cdefdba100 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
@@ -52,7 +52,7 @@ struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
diff --git a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
index 9ac4ab9a5ba1..9ac4ab9a5ba1 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
index bcbe62816beb..bcbe62816beb 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
index 205aa81aedcc..205aa81aedcc 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
new file mode 100644
index 000000000000..5c0f5dae8e37
--- /dev/null
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
@@ -0,0 +1,105 @@
+//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for M68k.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "M68kRegisterBankInfo.h"
+#include "M68kInstrInfo.h" // For the register classes
+#include "M68kSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "M68kGenRegisterBank.inc"
+
+using namespace llvm;
+
+// FIXME: TableGen this.
+// If it grows too much and TableGen still isn't ready to do the job, extract it
+// into an M68kGenRegisterBankInfo.def (similar to AArch64).
+namespace llvm {
+namespace M68k {
+enum PartialMappingIdx {
+ PMI_GPR,
+ PMI_Min = PMI_GPR,
+};
+
+RegisterBankInfo::PartialMapping PartMappings[]{
+ // GPR Partial Mapping
+ {0, 32, GPRRegBank},
+};
+
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPR3OpsIdx = 1,
+};
+
+RegisterBankInfo::ValueMapping ValueMappings[] = {
+ // invalid
+ {nullptr, 0},
+ // 3 operands in GPRs
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+
+};
+} // end namespace M68k
+} // end namespace llvm
+
+M68kRegisterBankInfo::M68kRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : M68kGenRegisterBankInfo() {}
+
+const RegisterBank &
+M68kRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const {
+ return getRegBank(M68k::GPRRegBankID);
+}
+
+const RegisterBankInfo::InstructionMapping &
+M68kRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ auto Opc = MI.getOpcode();
+
+ if (!isPreISelGenericOpcode(Opc)) {
+ const InstructionMapping &Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ using namespace TargetOpcode;
+
+ unsigned NumOperands = MI.getNumOperands();
+ const ValueMapping *OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx];
+
+ switch (Opc) {
+ case G_ADD:
+ case G_SUB:
+ case G_MUL:
+ case G_SDIV:
+ case G_UDIV:
+ case G_LOAD:
+ case G_STORE: {
+ OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx];
+ break;
+ }
+
+ case G_CONSTANT:
+ case G_FRAME_INDEX:
+ OperandsMapping =
+ getOperandsMapping({&M68k::ValueMappings[M68k::GPR3OpsIdx], nullptr});
+ break;
+ default:
+ return getInvalidInstructionMapping();
+ }
+
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping,
+ NumOperands);
+}
diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
index 9b97cc4a6dd4..853c75df2bb3 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
@@ -34,6 +34,12 @@ protected:
class M68kRegisterBankInfo final : public M68kGenRegisterBankInfo {
public:
M68kRegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
+
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
};
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
index 2d1e74f78480..942677a60e6c 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
@@ -12,4 +12,4 @@
//===----------------------------------------------------------------------===//
/// General Purpose Registers. Here we define a register bank with name AnyGPR
-def GPRRegBank : RegisterBank<"AnyGPR", [DR8]>;
+def GPRRegBank : RegisterBank<"AnyGPR", [XR32]>;
diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp
deleted file mode 100644
index d12478624655..000000000000
--- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements the targeting of the RegisterBankInfo class for M68k.
-/// \todo This should be generated by TableGen.
-//===----------------------------------------------------------------------===//
-
-#include "M68kRegisterBankInfo.h"
-#include "MCTargetDesc/M68kMCTargetDesc.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-#define GET_TARGET_REGBANK_IMPL
-#include "M68kGenRegisterBank.inc"
-#undef GET_TARGET_REGBANK_IMPL
-
-using namespace llvm;
-
-M68kRegisterBankInfo::M68kRegisterBankInfo(const TargetRegisterInfo &TRI)
- : M68kGenRegisterBankInfo() {}
diff --git a/llvm/lib/Target/M68k/M68k.td b/llvm/lib/Target/M68k/M68k.td
index 669eb32f46f1..fde491e1b6d5 100644
--- a/llvm/lib/Target/M68k/M68k.td
+++ b/llvm/lib/Target/M68k/M68k.td
@@ -78,7 +78,7 @@ def : Proc<"M68060", [ FeatureISA60 ]>;
//===----------------------------------------------------------------------===//
include "M68kRegisterInfo.td"
-include "GlSel/M68kRegisterBanks.td"
+include "GISel/M68kRegisterBanks.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
index a6fc58b5a277..08b7153632b4 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
@@ -21,7 +21,7 @@
#include "MCTargetDesc/M68kInstPrinter.h"
#include "TargetInfo/M68kTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/M68kCallingConv.h b/llvm/lib/Target/M68k/M68kCallingConv.h
index 18f72c95cedb..20ffa993897f 100644
--- a/llvm/lib/Target/M68k/M68kCallingConv.h
+++ b/llvm/lib/Target/M68k/M68kCallingConv.h
@@ -24,14 +24,13 @@
namespace llvm {
/// Custom state to propagate llvm type info to register CC assigner
-class M68kCCState : public CCState {
-public:
- const llvm::Function &F;
+struct M68kCCState : public CCState {
+ ArrayRef<Type *> ArgTypeList;
- M68kCCState(const llvm::Function &F, CallingConv::ID CC, bool IsVarArg,
+ M68kCCState(ArrayRef<Type *> ArgTypes, CallingConv::ID CC, bool IsVarArg,
MachineFunction &MF, SmallVectorImpl<CCValAssign> &Locs,
LLVMContext &C)
- : CCState(CC, IsVarArg, MF, Locs, C), F(F) {}
+ : CCState(CC, IsVarArg, MF, Locs, C), ArgTypeList(ArgTypes) {}
};
/// NOTE this function is used to select registers for formal arguments and call
@@ -39,7 +38,7 @@ public:
inline bool CC_M68k_Any_AssignToReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
- M68kCCState CCInfo = static_cast<M68kCCState &>(State);
+ const M68kCCState &CCInfo = static_cast<M68kCCState &>(State);
static const MCPhysReg DataRegList[] = {M68k::D0, M68k::D1, M68k::A0,
M68k::A1};
@@ -52,14 +51,15 @@ inline bool CC_M68k_Any_AssignToReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
M68k::D1,
};
- auto I = CCInfo.F.arg_begin();
+ const auto &ArgTypes = CCInfo.ArgTypeList;
+ auto I = ArgTypes.begin(), End = ArgTypes.end();
int No = ValNo;
- while (No > 0) {
- No -= I->getType()->isIntegerTy(64) ? 2 : 1;
- I++;
+ while (No > 0 && I != End) {
+ No -= (*I)->isIntegerTy(64) ? 2 : 1;
+ ++I;
}
- bool IsPtr = I != CCInfo.F.arg_end() && I->getType()->isPointerTy();
+ bool IsPtr = I != End && (*I)->isPointerTy();
unsigned Reg =
IsPtr ? State.AllocateReg(AddrRegList) : State.AllocateReg(DataRegList);
diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
index 26262b9b573d..66ea6ae38f43 100644
--- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
@@ -357,7 +357,7 @@ void M68kFrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
if (Reg) {
unsigned Opc = M68k::MOV32ri;
BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg).addImm(Offset);
- Opc = IsSub ? M68k::SUB32rr : M68k::ADD32rr;
+ Opc = IsSub ? M68k::SUB32ar : M68k::ADD32ar;
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addReg(Reg);
@@ -400,13 +400,13 @@ int M68kFrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
return Offset;
}
- if (Opc == M68k::ADD32ri && PI->getOperand(0).getReg() == StackPtr) {
+ if (Opc == M68k::ADD32ai && PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset += PI->getOperand(2).getImm();
MBB.erase(PI);
if (!MergeWithPrevious)
MBBI = NI;
- } else if (Opc == M68k::SUB32ri && PI->getOperand(0).getReg() == StackPtr) {
+ } else if (Opc == M68k::SUB32ai && PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset -= PI->getOperand(2).getImm();
MBB.erase(PI);
@@ -426,7 +426,7 @@ MachineInstrBuilder M68kFrameLowering::BuildStackAdjustment(
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
- unsigned Opc = IsSub ? M68k::SUB32ri : M68k::ADD32ri;
+ unsigned Opc = IsSub ? M68k::SUB32ai : M68k::ADD32ai;
MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 3e7cee9889d7..79b395f8f984 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -519,9 +519,10 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- // It is empty for LibCall
- const Function *CalleeFunc = CLI.CB ? CLI.CB->getCalledFunction() : nullptr;
- M68kCCState CCInfo(*CalleeFunc, CallConv, IsVarArg, MF, ArgLocs,
+ SmallVector<Type *, 4> ArgTypes;
+ for (const auto &Arg : CLI.getArgs())
+ ArgTypes.emplace_back(Arg.Ty);
+ M68kCCState CCInfo(ArgTypes, CallConv, IsVarArg, MF, ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_M68k);
@@ -876,8 +877,10 @@ SDValue M68kTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- M68kCCState CCInfo(MF.getFunction(), CCID, IsVarArg, MF, ArgLocs,
- *DAG.getContext());
+ SmallVector<Type *, 4> ArgTypes;
+ for (const Argument &Arg : MF.getFunction().args())
+ ArgTypes.emplace_back(Arg.getType());
+ M68kCCState CCInfo(ArgTypes, CCID, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_M68k);
@@ -1975,7 +1978,7 @@ SDValue M68kTargetLowering::LowerSETCCCARRY(SDValue Op,
M68k::CondCode CC = TranslateIntegerM68kCC(cast<CondCodeSDNode>(Cond)->get());
EVT CarryVT = Carry.getValueType();
- APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+ APInt NegOne = APInt::getAllOnes(CarryVT.getScalarSizeInBits());
Carry = DAG.getNode(M68kISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
DAG.getConstant(NegOne, DL, CarryVT));
@@ -2199,7 +2202,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
- // Blacklist CopyFromReg to avoid partial register stalls.
+ // Block CopyFromReg so partial register stalls are avoided.
T1.getOpcode() != ISD::CopyFromReg &&
T2.getOpcode() != ISD::CopyFromReg) {
SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index f65ad5729eb4..b2c05365d30b 100644
--- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -88,14 +88,15 @@ let Defs = [CCR] in {
let Constraints = "$src = $dst" in {
// $reg, $ccr <- $reg op $reg
-class MxBiArOp_RFRR_xEA<string MN, SDNode NODE, MxType TYPE, bits<4> CMD, MxBead REG>
- : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd),
- MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd))],
+class MxBiArOp_RFRR_xEA<string MN, SDNode NODE, MxType DST_TYPE, MxType SRC_TYPE,
+ bits<4> CMD, MxBead REG>
+ : MxInst<(outs DST_TYPE.ROp:$dst), (ins DST_TYPE.ROp:$src, SRC_TYPE.ROp:$opd),
+ MN#"."#DST_TYPE.Prefix#"\t$opd, $dst",
+ [(set DST_TYPE.VT:$dst, CCR, (NODE DST_TYPE.VT:$src, SRC_TYPE.VT:$opd))],
MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#TYPE.RLet#"EA"),
+ !cast<MxEncOpMode>("MxOpMode"#DST_TYPE.Size#DST_TYPE.RLet#"EA"),
REG,
- !cast<MxEncEA>("MxEncEA"#TYPE.RLet#"_2"),
+ !cast<MxEncEA>("MxEncEA"#SRC_TYPE.RLet#"_2"),
MxExtEmpty>>;
/// This Op is similar to the one above except it uses reversed opmode, some
@@ -260,11 +261,19 @@ multiclass MxBiArOp_DF<string MN, SDNode NODE, bit isComm,
def NAME#"32ji" : MxBiArOp_FMI<MN, NODE, MxType32, MxType32.JOp, MxType32.JPat,
CMDI, MxEncEAj_0, MxExtEmpty>;
+ def NAME#"16dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16r,
+ CMD, MxBeadDReg<0>>;
+ def NAME#"32dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, MxType32r,
+ CMD, MxBeadDReg<0>>;
+
let isCommutable = isComm in {
- def NAME#"8dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType8d, CMD, MxBeadDReg<0>>;
- def NAME#"16dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, CMD, MxBeadDReg<0>>;
- def NAME#"32dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, CMD, MxBeadDReg<0>>;
+ def NAME#"8dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType8d, MxType8d,
+ CMD, MxBeadDReg<0>>;
+ def NAME#"16dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16d,
+ CMD, MxBeadDReg<0>>;
+ def NAME#"32dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, MxType32d,
+ CMD, MxBeadDReg<0>>;
} // isComm
@@ -278,29 +287,29 @@ let Pattern = [(null_frag)] in
multiclass MxBiArOp_AF<string MN, SDNode NODE, bit isComm,
bits<4> CMD, bits<4> CMDI> {
- def NAME#"32rk" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.KOp, MxType32.KPat,
+ def NAME#"32ak" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.KOp, MxType32.KPat,
CMD, MxEncEAk, MxExtBrief_2>;
- def NAME#"32rq" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.QOp, MxType32.QPat,
+ def NAME#"32aq" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.QOp, MxType32.QPat,
CMD, MxEncEAq, MxExtI16_2>;
- def NAME#"32rf" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.FOp, MxType32.FPat,
+ def NAME#"32af" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.FOp, MxType32.FPat,
CMD, MxEncEAf_2, MxExtBrief_2>;
- def NAME#"32rp" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.POp, MxType32.PPat,
+ def NAME#"32ap" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.POp, MxType32.PPat,
CMD, MxEncEAp_2, MxExtI16_2>;
- def NAME#"32rj" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.JOp, MxType32.JPat,
+ def NAME#"32aj" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.JOp, MxType32.JPat,
CMD, MxEncEAj_2, MxExtEmpty>;
- def NAME#"32ri" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32r, CMD>;
+ def NAME#"32ai" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32a, CMD>;
- let isCommutable = isComm in
- def NAME#"32rr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32r, CMD, MxBeadReg<0>>;
+ def NAME#"32ar" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32a, MxType32r,
+ CMD, MxBeadReg<0>>;
} // MxBiArOp_AF
// NOTE These naturally produce CCR
-defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>;
-defm ADD : MxBiArOp_AF<"add", MxAdd, 1, 0xD, 0x6>;
-defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
-defm SUB : MxBiArOp_AF<"sub", MxSub, 0, 0x9, 0x4>;
+defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>;
+defm ADD : MxBiArOp_AF<"adda", MxAdd, 1, 0xD, 0x6>;
+defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
+defm SUB : MxBiArOp_AF<"suba", MxSub, 0, 0x9, 0x4>;
let Uses = [CCR], Defs = [CCR] in {
@@ -366,13 +375,16 @@ defm XOR : MxBiArOp_DF_EAd<"eor", MxXor, 0xB, 0xA>;
//===----------------------------------------------------------------------===//
let Defs = [CCR] in {
-class MxCmp_RR<MxType TYPE>
- : MxInst<(outs), (ins TYPE.ROp:$lhs, TYPE.ROp:$rhs),
- "cmp."#TYPE.Prefix#"\t$lhs, $rhs",
- [(set CCR, (MxCmp TYPE.VT:$lhs, TYPE.VT:$rhs))],
+class MxCmp_RR<MxType LHS_TYPE, MxType RHS_TYPE = LHS_TYPE,
+ MxBead REG = MxBeadDReg<1>>
+ : MxInst<(outs), (ins LHS_TYPE.ROp:$lhs, RHS_TYPE.ROp:$rhs),
+ "cmp."#RHS_TYPE.Prefix#"\t$lhs, $rhs",
+ [(set CCR, (MxCmp LHS_TYPE.VT:$lhs, RHS_TYPE.VT:$rhs))],
MxArithEncoding<MxBead4Bits<0xB>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#"dEA"),
- MxBeadDReg<1>, MxEncEAd_0, MxExtEmpty>>;
+ !cast<MxEncOpMode>("MxOpMode"#RHS_TYPE.Size#RHS_TYPE.RLet#"EA"),
+ REG,
+ !cast<MxEncEA>("MxEncEA"#LHS_TYPE.RLet#"_0"),
+ MxExtEmpty>>;
class MxCmp_RI<MxType TYPE>
: MxInst<(outs), (ins TYPE.IOp:$imm, TYPE.ROp:$reg),
@@ -444,11 +456,16 @@ multiclass MMxCmp_MI<MxType TYPE> {
}
foreach S = [8, 16, 32] in {
- def CMP#S#dd : MxCmp_RR<!cast<MxType>("MxType"#S#"d")>;
def CMP#S#di : MxCmp_RI<!cast<MxType>("MxType"#S#"d")>;
def CMP#S#bi : MxCmp_BI<!cast<MxType>("MxType"#S#"d")>;
} // foreach
+def CMP8dd : MxCmp_RR<MxType8d>;
+foreach S = [16, 32] in {
+ def CMP#S#dr : MxCmp_RR<!cast<MxType>("MxType"#S#"r"),
+ !cast<MxType>("MxType"#S#"d")>;
+}
+
// cmp mem, Dn
defm CMP8d : MMxCmp_RM<MxType8d>;
defm CMP16d : MMxCmp_RM<MxType16d>;
@@ -737,9 +754,9 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) i8 :$src, i8 :$opd),
(ADD8dd MxDRD8 :$src, MxDRD8 :$opd)>;
def : Pat<(!cast<SDNode>(N) i16:$src, i16:$opd),
- (ADD16dd MxDRD16:$src, MxDRD16:$opd)>;
+ (ADD16dr MxXRD16:$src, MxDRD16:$opd)>;
def : Pat<(!cast<SDNode>(N) i32:$src, i32:$opd),
- (ADD32rr MxXRD32:$src, MxXRD32:$opd)>;
+ (ADD32dr MxXRD32:$src, MxDRD32:$opd)>;
// add (An), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.JPat:$opd)),
@@ -747,7 +764,7 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.JPat:$opd)),
(ADD16dj MxDRD16:$src, MxType16.JOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.JPat:$opd)),
- (ADD32rj MxXRD32:$src, MxType32.JOp:$opd)>;
+ (ADD32dj MxDRD32:$src, MxType32.JOp:$opd)>;
// add (i,An), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.PPat:$opd)),
@@ -755,7 +772,7 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.PPat:$opd)),
(ADD16dp MxDRD16:$src, MxType16.POp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.PPat:$opd)),
- (ADD32rp MxXRD32:$src, MxType32.POp:$opd)>;
+ (ADD32dp MxDRD32:$src, MxType32.POp:$opd)>;
// add (i,An,Xn), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.FPat:$opd)),
@@ -763,7 +780,7 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.FPat:$opd)),
(ADD16df MxDRD16:$src, MxType16.FOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.FPat:$opd)),
- (ADD32rf MxXRD32:$src, MxType32.FOp:$opd)>;
+ (ADD32df MxDRD32:$src, MxType32.FOp:$opd)>;
// add reg, imm
def : Pat<(!cast<SDNode>(N) i8: $src, MximmSExt8:$opd),
@@ -776,7 +793,7 @@ foreach N = ["add", "addc"] in {
// we make sure it will be selected over LEAp
let AddedComplexity = 15 in {
def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd),
- (ADD32ri MxXRD32:$src, imm:$opd)>;
+ (ADD32di MxDRD32:$src, imm:$opd)>;
} // AddedComplexity = 15
// add imm, (An)
@@ -806,7 +823,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) i16:$src, i16:$opd),
(SUB16dd MxDRD16:$src, MxDRD16:$opd)>;
def : Pat<(!cast<SDNode>(N) i32:$src, i32:$opd),
- (SUB32rr MxXRD32:$src, MxXRD32:$opd)>;
+ (SUB32dd MxDRD32:$src, MxDRD32:$opd)>;
// sub (An), reg
@@ -815,7 +832,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.JPat:$opd)),
(SUB16dj MxDRD16:$src, MxType16.JOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.JPat:$opd)),
- (SUB32rj MxXRD32:$src, MxType32.JOp:$opd)>;
+ (SUB32dj MxDRD32:$src, MxType32.JOp:$opd)>;
// sub (i,An), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.PPat:$opd)),
@@ -823,7 +840,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.PPat:$opd)),
(SUB16dp MxDRD16:$src, MxType16.POp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.PPat:$opd)),
- (SUB32rp MxXRD32:$src, MxType32.POp:$opd)>;
+ (SUB32dp MxDRD32:$src, MxType32.POp:$opd)>;
// sub (i,An,Xn), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.FPat:$opd)),
@@ -831,7 +848,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.FPat:$opd)),
(SUB16df MxDRD16:$src, MxType16.FOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.FPat:$opd)),
- (SUB32rf MxXRD32:$src, MxType32.FOp:$opd)>;
+ (SUB32df MxDRD32:$src, MxType32.FOp:$opd)>;
// sub reg, imm
def : Pat<(!cast<SDNode>(N) i8 :$src, MximmSExt8 :$opd),
@@ -839,7 +856,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd),
(SUB16di MxDRD16:$src, imm:$opd)>;
def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd),
- (SUB32ri MxXRD32:$src, imm:$opd)>;
+ (SUB32di MxDRD32:$src, imm:$opd)>;
// sub imm, (An)
def : Pat<(store (!cast<SDNode>(N) (load MxType8.JPat:$dst), MxType8.IPat:$opd),
diff --git a/llvm/lib/Target/M68k/M68kInstrCompiler.td b/llvm/lib/Target/M68k/M68kInstrCompiler.td
index bcb815dbc4eb..8fb331dec0e9 100644
--- a/llvm/lib/Target/M68k/M68kInstrCompiler.td
+++ b/llvm/lib/Target/M68k/M68kInstrCompiler.td
@@ -23,15 +23,15 @@ def : Pat<(i32 (MxWrapper tjumptable :$src)), (MOV32ri tjumptable :$src)>;
def : Pat<(i32 (MxWrapper tblockaddress :$src)), (MOV32ri tblockaddress :$src)>;
def : Pat<(add MxDRD32:$src, (MxWrapper tconstpool:$opd)),
- (ADD32ri MxDRD32:$src, tconstpool:$opd)>;
+ (ADD32di MxDRD32:$src, tconstpool:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper tjumptable:$opd)),
- (ADD32ri MxARD32:$src, tjumptable:$opd)>;
+ (ADD32ai MxARD32:$src, tjumptable:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper tglobaladdr :$opd)),
- (ADD32ri MxARD32:$src, tglobaladdr:$opd)>;
+ (ADD32ai MxARD32:$src, tglobaladdr:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper texternalsym:$opd)),
- (ADD32ri MxARD32:$src, texternalsym:$opd)>;
+ (ADD32ai MxARD32:$src, texternalsym:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper tblockaddress:$opd)),
- (ADD32ri MxARD32:$src, tblockaddress:$opd)>;
+ (ADD32ai MxARD32:$src, tblockaddress:$opd)>;
def : Pat<(store (i32 (MxWrapper tglobaladdr:$src)), iPTR:$dst),
(MOV32ji MxARI32:$dst, tglobaladdr:$src)>;
diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td
index 1d950bd0377a..99b7ffd17971 100644
--- a/llvm/lib/Target/M68k/M68kInstrFormats.td
+++ b/llvm/lib/Target/M68k/M68kInstrFormats.td
@@ -250,7 +250,7 @@ def MxOpMode16dEA : MxEncOpMode<MxBead3Bits<0b001>>;
def MxOpMode32dEA : MxEncOpMode<MxBead3Bits<0b010>>;
// op EA, An
-def MxOpMode16aEA : MxEncOpMode<MxBead3Bits<0b110>>;
+def MxOpMode16aEA : MxEncOpMode<MxBead3Bits<0b011>>;
def MxOpMode32aEA : MxEncOpMode<MxBead3Bits<0b111>>;
// op EA, Rn
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 0eddd8ce5f4c..639bcd455687 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -24,8 +24,8 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include <functional>
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h
index a503b02c5a82..6aced1487365 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.h
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.h
@@ -173,7 +173,7 @@ static inline unsigned IsCMP(unsigned Op) {
case M68k::CMP8di:
case M68k::CMP8dj:
case M68k::CMP8dp:
- case M68k::CMP16dd:
+ case M68k::CMP16dr:
case M68k::CMP16df:
case M68k::CMP16di:
case M68k::CMP16dj:
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index e743213830de..ed6cd9ecf442 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -165,12 +165,23 @@ def MxSize8 : MxSize<8, "b", "byte">;
def MxSize16 : MxSize<16, "w", "word">;
def MxSize32 : MxSize<32, "l", "long">;
-class MxOpClass<string name> : AsmOperandClass {
+class MxOpClass<string name,
+ list<AsmOperandClass> superClasses = []> : AsmOperandClass {
let Name = name;
let ParserMethod = "parseMemOp";
+ let SuperClasses = superClasses;
}
def MxRegClass : MxOpClass<"Reg">;
+// Splitting asm register class to avoid ambiguous on operands'
+// MatchClassKind. For instance, without this separation,
+// both ADD32dd and ADD32dr has {MCK_RegClass, MCK_RegClass} for
+// their operands, which makes AsmParser unable to pick the correct
+// one in a deterministic way.
+let RenderMethod = "addRegOperands", SuperClasses = [MxRegClass]in {
+ def MxARegClass : MxOpClass<"AReg">;
+ def MxDRegClass : MxOpClass<"DReg">;
+}
class MxOperand<ValueType vt, MxSize size, string letter, RegisterClass rc, dag pat = (null_frag)> {
ValueType VT = vt;
@@ -200,20 +211,24 @@ def MxXRD32_TC : MxRegOp<i32, XR32_TC, MxSize32, "r">;
// DATA REGISTER DIRECT. The operand is in the data register specified by
// the effective address register field.
-def MxDRD8 : MxRegOp<i8, DR8, MxSize8, "d">;
-def MxDRD16 : MxRegOp<i16, DR16, MxSize16, "d">;
-def MxDRD32 : MxRegOp<i32, DR32, MxSize32, "d">;
+let ParserMatchClass = MxDRegClass in {
+ def MxDRD8 : MxRegOp<i8, DR8, MxSize8, "d">;
+ def MxDRD16 : MxRegOp<i16, DR16, MxSize16, "d">;
+ def MxDRD32 : MxRegOp<i32, DR32, MxSize32, "d">;
-def MxDRD16_TC : MxRegOp<i16, DR16_TC, MxSize16, "d">;
-def MxDRD32_TC : MxRegOp<i32, DR32_TC, MxSize32, "d">;
+ def MxDRD16_TC : MxRegOp<i16, DR16_TC, MxSize16, "d">;
+ def MxDRD32_TC : MxRegOp<i32, DR32_TC, MxSize32, "d">;
+}
// ADDRESS REGISTER DIRECT. The operand is in the address register specified by
// the effective address register field.
-def MxARD16 : MxRegOp<i16, AR16, MxSize16, "a">;
-def MxARD32 : MxRegOp<i32, AR32, MxSize32, "a">;
+let ParserMatchClass = MxARegClass in {
+ def MxARD16 : MxRegOp<i16, AR16, MxSize16, "a">;
+ def MxARD32 : MxRegOp<i32, AR32, MxSize32, "a">;
-def MxARD16_TC : MxRegOp<i16, AR16_TC, MxSize16, "a">;
-def MxARD32_TC : MxRegOp<i32, AR32_TC, MxSize32, "a">;
+ def MxARD16_TC : MxRegOp<i16, AR16_TC, MxSize16, "a">;
+ def MxARD32_TC : MxRegOp<i32, AR32_TC, MxSize32, "a">;
+}
class MxMemOp<dag ops, MxSize size, string letter,
string printMethod = "printOperand",
@@ -304,9 +319,17 @@ def MxARII32_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize32, "f", "printA
// extended before it is used. The reference is classified as a data reference
// with the exception of the jump and jump-tosubroutine instructions.
def MxAddr : MxOpClass<"Addr">;
-def MxAS8 : MxMemOp<(ops OtherVT), MxSize8, "B", "printAS8Mem", MxAddr>;
-def MxAS16 : MxMemOp<(ops OtherVT), MxSize16, "B", "printAS16Mem", MxAddr>;
-def MxAS32 : MxMemOp<(ops OtherVT), MxSize32, "B", "printAS32Mem", MxAddr>;
+let RenderMethod = "addAddrOperands" in {
+ // This hierarchy ensures Addr8 will always be parsed
+ // before other larger-width variants.
+ def MxAddr32 : MxOpClass<"Addr32", [MxAddr]>;
+ def MxAddr16 : MxOpClass<"Addr16", [MxAddr32]>;
+ def MxAddr8 : MxOpClass<"Addr8", [MxAddr16]>;
+}
+
+def MxAS8 : MxMemOp<(ops OtherVT), MxSize8, "B", "printAS8Mem", MxAddr8>;
+def MxAS16 : MxMemOp<(ops OtherVT), MxSize16, "B", "printAS16Mem", MxAddr16>;
+def MxAS32 : MxMemOp<(ops OtherVT), MxSize32, "B", "printAS32Mem", MxAddr32>;
// ABSOLUTE LONG ADDRESS. This addressing mode requires two words of extension.
// The address of the operand is developed by the concatenation of the extension
@@ -314,9 +337,9 @@ def MxAS32 : MxMemOp<(ops OtherVT), MxSize32, "B", "printAS32Mem", MxAddr>;
// order part of the address is the second extension word. The reference is
// classified as a data reference with the exception of the jump and jump
// to-subroutine instructions.
-def MxAL8 : MxMemOp<(ops OtherVT), MxSize8, "b", "printAL8Mem", MxAddr>;
-def MxAL16 : MxMemOp<(ops OtherVT), MxSize16, "b", "printAL16Mem", MxAddr>;
-def MxAL32 : MxMemOp<(ops OtherVT), MxSize32, "b", "printAL32Mem", MxAddr>;
+def MxAL8 : MxMemOp<(ops OtherVT), MxSize8, "b", "printAL8Mem", MxAddr8>;
+def MxAL16 : MxMemOp<(ops OtherVT), MxSize16, "b", "printAL16Mem", MxAddr16>;
+def MxAL32 : MxMemOp<(ops OtherVT), MxSize32, "b", "printAL32Mem", MxAddr32>;
def MxPCD : MxOpClass<"PCD">;
def MxPCI : MxOpClass<"PCI">;
@@ -370,21 +393,22 @@ def Mxi16imm : MxOp<i16, MxSize16, "i">;
def Mxi32imm : MxOp<i32, MxSize32, "i">;
} // OPERAND_IMMEDIATE
-let OperandType = "OPERAND_PCREL",
- ParserMatchClass = MxAddr,
- PrintMethod = "printPCRelImm" in {
-
+class MxBrTargetOperand<int N> : Operand<OtherVT> {
+ let OperandType = "OPERAND_PCREL";
+ let PrintMethod = "printPCRelImm";
+ let ParserMatchClass = !cast<AsmOperandClass>("MxAddr"#N);
+}
// Branch targets have OtherVT type and print as pc-relative values.
-def MxBrTarget8 : Operand<OtherVT>;
-def MxBrTarget16 : Operand<OtherVT>;
-def MxBrTarget32 : Operand<OtherVT>;
-
-} // OPERAND_PCREL
+def MxBrTarget8 : MxBrTargetOperand<8>;
+def MxBrTarget16 : MxBrTargetOperand<16>;
+def MxBrTarget32 : MxBrTargetOperand<32>;
// Used with MOVEM
+def MxMoveMaskClass : MxOpClass<"MoveMask">;
def MxMoveMask : MxOp<i16, MxSize16, "m"> {
let OperandType = "OPERAND_IMMEDIATE";
let PrintMethod = "printMoveMask";
+ let ParserMatchClass = MxMoveMaskClass;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp
index 963e83cfbb07..991889706e67 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.cpp
+++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "M68kSubtarget.h"
-#include "GlSel/M68kCallLowering.h"
-#include "GlSel/M68kLegalizerInfo.h"
-#include "GlSel/M68kRegisterBankInfo.h"
+#include "GISel/M68kCallLowering.h"
+#include "GISel/M68kLegalizerInfo.h"
+#include "GISel/M68kRegisterBankInfo.h"
#include "M68k.h"
#include "M68kMachineFunction.h"
@@ -24,9 +24,9 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index 5b8fd3d41b14..e8126c6219e8 100644
--- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -24,8 +24,8 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/PassRegistry.h"
-#include "llvm/Support/TargetRegistry.h"
#include <memory>
using namespace llvm;
@@ -49,10 +49,14 @@ std::string computeDataLayout(const Triple &TT, StringRef CPU,
// FIXME how to wire it with the used object format?
Ret += "-m:e";
- // M68k pointers are always 32 bit wide even for 16 bit cpus
- Ret += "-p:32:32";
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+ // The ABI only specifies 16-bit alignment.
+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+ // to having 32-bit alignment.
+ Ret += "-p:32:16:32";
- // M68k requires i8 to align on 2 byte boundry
+ // Bytes do not require special alignment, words are word aligned and
+ // long words are word aligned at minimum.
Ret += "-i8:8:8-i16:16:16-i32:16:32";
// FIXME no floats at the moment
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index 8a0f32b58da4..c1f88fb78ee1 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -29,9 +29,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -82,7 +82,8 @@ public:
/// Write a sequence of optimal nops to the output, covering \p Count bytes.
/// \return - true on success, false on failure
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
} // end anonymous namespace
@@ -200,7 +201,8 @@ void M68kAsmBackend::relaxInstruction(MCInst &Inst,
Inst.setOpcode(RelaxedOp);
}
-bool M68kAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool M68kAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// Cannot emit NOP with size being not multiple of 16 bits.
if (Count % 2 != 0)
return false;
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
index e5f5909b5d79..a2e41437ee21 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
@@ -109,7 +109,7 @@ void M68kInstPrinter::printMoveMask(const MCInst *MI, unsigned opNum,
// Print separation comma only if
// both data & register parts have bit(s) set
if (s != 0 && (Mask & 0xFF) && HalfMask)
- O << ',';
+ O << '/';
for (int i = 0; HalfMask; ++i) {
if ((HalfMask >> i) & 0b1) {
@@ -130,7 +130,7 @@ void M68kInstPrinter::printMoveMask(const MCInst *MI, unsigned opNum,
i = j;
if (HalfMask)
- O << ',';
+ O << '/';
}
}
}
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
index 0a438ea042be..9f4db895a821 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
@@ -23,10 +23,10 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
index 5f08b9044b4e..2a225b8a43cd 100644
--- a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
+++ b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
@@ -10,7 +10,7 @@
/// This file contains M68k target initializer.
///
//===----------------------------------------------------------------------===//
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 4bad0368505a..c1677baf52a7 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -23,9 +23,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#define DEBUG_TYPE "msp430-asm-parser"
diff --git a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index d2902189ec40..9bbb2938ab75 100644
--- a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "MSP430.h"
#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -19,8 +19,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 071e1484196b..953916776c57 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -90,7 +90,8 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
@@ -147,7 +148,8 @@ void MSP430AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
}
-bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if ((Count % 2) != 0)
return false;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
index 87ee312424c8..087045ccb1df 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -17,8 +17,10 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MSP430Attributes.h"
using namespace llvm;
+using namespace llvm::MSP430Attrs;
namespace llvm {
@@ -54,15 +56,14 @@ MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
Streamer.emitInt8(1);
// Attribute vector length.
Streamer.emitInt32(11);
- // OFBA_MSPABI_Tag_ISA(4) = 1, MSP430
- Streamer.emitInt8(4);
- Streamer.emitInt8(1);
- // OFBA_MSPABI_Tag_Code_Model(6) = 1, Small
- Streamer.emitInt8(6);
- Streamer.emitInt8(1);
- // OFBA_MSPABI_Tag_Data_Model(8) = 1, Small
- Streamer.emitInt8(8);
- Streamer.emitInt8(1);
+
+ Streamer.emitInt8(TagISA);
+ Streamer.emitInt8(STI.hasFeature(MSP430::FeatureX) ? ISAMSP430X : ISAMSP430);
+ Streamer.emitInt8(TagCodeModel);
+ Streamer.emitInt8(CMSmall);
+ Streamer.emitInt8(TagDataModel);
+ Streamer.emitInt8(DMSmall);
+ // Don't emit TagEnumSize, for full GCC compatibility.
}
MCELFStreamer &MSP430TargetELFStreamer::getStreamer() {
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index c352ea563454..3f006056955d 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -17,7 +17,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 459188434f2c..8eb3fbd58328 100644
--- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -32,7 +32,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index 4be8d0760e68..a83a5d2dfcc9 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -71,9 +71,8 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(MSP430::SP);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(MSP430::R4);
+ for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF))
+ MBBJ.addLiveIn(MSP430::R4);
} else
NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
diff --git a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 7dabb9b4abae..abd48dfd5139 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -304,13 +304,11 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8:
- // Sanity check
if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
return false;
break;
case MVT::i16:
- // Sanity check
if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
return false;
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 9c6d44bf92de..c64a44a0ef95 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -670,7 +670,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
InVals.push_back(ArgValue);
}
} else {
- // Sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
SDValue InVal;
@@ -1150,7 +1150,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// lowering & isel wouldn't diverge.
bool andCC = false;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ if (RHSC->isZero() && LHS.hasOneUse() &&
(LHS.getOpcode() == ISD::AND ||
(LHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getOpcode() == ISD::AND))) {
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 130211878be1..e9e26e295fd5 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -18,8 +18,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -116,6 +116,7 @@ unsigned MSP430InstrInfo::removeBranch(MachineBasicBlock &MBB,
continue;
if (I->getOpcode() != MSP430::JMP &&
I->getOpcode() != MSP430::JCC &&
+ I->getOpcode() != MSP430::Bi &&
I->getOpcode() != MSP430::Br &&
I->getOpcode() != MSP430::Bm)
break;
@@ -189,7 +190,7 @@ bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
// Handle unconditional branches.
- if (I->getOpcode() == MSP430::JMP) {
+ if (I->getOpcode() == MSP430::JMP || I->getOpcode() == MSP430::Bi) {
if (!AllowModify) {
TBB = I->getOperand(0).getMBB();
continue;
diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 5a117404d772..2fd58717c4db 100644
--- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -12,7 +12,7 @@
#include "MSP430Subtarget.h"
#include "MSP430.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 827f24daad16..a33146ce2239 100644
--- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -18,7 +18,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Target() {
@@ -81,5 +81,5 @@ bool MSP430PassConfig::addInstSelector() {
void MSP430PassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- addPass(createMSP430BranchSelectionPass(), false);
+ addPass(createMSP430BranchSelectionPass());
}
diff --git a/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 9d4a8f141cc4..fc2b38f41c14 100644
--- a/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/MSP430TargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheMSP430Target() {
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index e4d61f8c210e..01b5dff2e448 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -48,7 +49,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 6f197e424561..9a66dd77c0d3 100644
--- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -20,11 +20,11 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -455,14 +455,6 @@ static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
template <typename InsnType>
-static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
- uint64_t Address, const void *Decoder);
-
-template <typename InsnType>
-static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
-
-template <typename InsnType>
static DecodeStatus
DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 94d338746a6c..bfe413a152b6 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -518,7 +518,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
/// it should return an error.
///
/// \return - True on success.
-bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// Check for a less than instruction size number of bytes
// FIXME: 16 bit instructions are not handled yet here.
// We shouldn't be using a hard coded number for instruction size.
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 16c7befb2670..5a0da3bc49bf 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -63,7 +63,8 @@ public:
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 454f79926dd0..6fc8fcb482cd 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -29,9 +29,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 232d0eb33164..57cd016da4dc 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -902,7 +902,7 @@ void MipsTargetELFStreamer::finish() {
if (Alignment) {
OS.SwitchSection(&Section);
if (Section.UseCodeAlign())
- OS.emitCodeAlignment(Alignment, Alignment);
+ OS.emitCodeAlignment(Alignment, &STI, Alignment);
else
OS.emitValueToAlignment(Alignment, 0, 1, Alignment);
}
diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
index da8a06b0cff8..00ac9bf99c92 100644
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -958,7 +958,7 @@ class POOL32A_DVPEVP_FM_MMR6<string instr_asm, bits<10> funct>
let Inst{5-0} = 0b111100;
}
-class CMP_BRANCH_OFF21_FM_MMR6<string opstr, bits<6> funct> : MipsR6Inst {
+class CMP_BRANCH_OFF21_FM_MMR6<bits<6> funct> : MipsR6Inst {
bits<5> rs;
bits<21> offset;
diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 832124cb3f57..b1a05388884b 100644
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -62,8 +62,8 @@ class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>;
class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>;
class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
-class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"beqzc", 0b100000>;
-class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"bnezc", 0b101000>;
+class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<0b100000>;
+class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<0b101000>;
class BGEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgec", 0b111101>,
DecodeDisambiguates<"POP75GroupBranchMMR6">;
class BGEUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgeuc", 0b110000>,
@@ -406,7 +406,7 @@ class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
class BRK_MMR6_DESC : BRK_FT<"break">;
class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
- RegisterOperand GPROpnd, InstrItinClass Itin>
+ InstrItinClass Itin>
: MMR6Arch<instr_asm> {
dag OutOperandList = (outs);
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
@@ -416,10 +416,8 @@ class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
InstrItinClass Itinerary = Itin;
}
-class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd,
- II_CACHE>;
-class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd,
- II_PREF>;
+class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, II_CACHE>;
+class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, II_PREF>;
class LB_LBU_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
RegisterOperand GPROpnd, InstrItinClass Itin>
@@ -1197,21 +1195,21 @@ class SWM16_MMR6_DESC
ComplexPattern Addr = addr;
}
-class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd, DAGOperand RO,
- SDPatternOperator OpNode, InstrItinClass Itin,
- Operand MemOpnd>
+class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd,
+ InstrItinClass Itin, Operand MemOpnd>
: MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>,
MMR6Arch<opstr> {
let DecoderMethod = "DecodeMemMMImm4";
let mayStore = 1;
}
-class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd,
- truncstorei8, II_SB, mem_mm_4>;
-class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd,
- truncstorei16, II_SH, mem_mm_4_lsl1>;
-class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
- store, II_SW, mem_mm_4_lsl2>;
+
+class SB16_MMR6_DESC
+ : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, II_SB, mem_mm_4>;
+class SH16_MMR6_DESC
+ : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, II_SH, mem_mm_4_lsl1>;
+class SW16_MMR6_DESC
+ : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, II_SW, mem_mm_4_lsl2>;
class SWSP_MMR6_DESC
: MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
diff --git a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index 9a1e47e5ecca..8950de230a01 100644
--- a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -281,57 +281,46 @@ class SHRLV_PH_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
class SHRLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
"shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>;
-class EXT_MM_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXT_MM_2R_DESC_BASE<string instr_asm> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $rs");
- InstrItinClass Itinerary = itin;
+ InstrItinClass Itinerary = NoItinerary;
}
-class EXT_MM_1R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXT_MM_1R_DESC_BASE<string instr_asm> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$imm);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $imm");
- InstrItinClass Itinerary = itin;
+ InstrItinClass Itinerary = NoItinerary;
}
-class EXTP_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPDP_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTPDPV_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTPV_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTR_W_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTR_R_W_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTR_RS_W_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTR_S_H_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_W_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_R_W_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_RS_W_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_S_H_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>,
- Defs<[DSPOutFlag23]>;
+class EXTP_MM_DESC : EXT_MM_1R_DESC_BASE<"extp">,
+ Uses<[DSPPos]>,
+ Defs<[DSPEFI]>;
+class EXTPDP_MM_DESC : EXT_MM_1R_DESC_BASE<"extpdp">,
+ Uses<[DSPPos]>,
+ Defs<[DSPPos, DSPEFI]>;
+class EXTPDPV_MM_DESC : EXT_MM_2R_DESC_BASE<"extpdpv">,
+ Uses<[DSPPos]>,
+ Defs<[DSPPos, DSPEFI]>;
+class EXTPV_MM_DESC : EXT_MM_2R_DESC_BASE<"extpv">,
+ Uses<[DSPPos]>,
+ Defs<[DSPEFI]>;
+class EXTR_W_MM_DESC : EXT_MM_1R_DESC_BASE<"extr.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTR_R_W_MM_DESC : EXT_MM_1R_DESC_BASE<"extr_r.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTR_RS_W_MM_DESC : EXT_MM_1R_DESC_BASE<"extr_rs.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTR_S_H_MM_DESC : EXT_MM_1R_DESC_BASE<"extr_s.h">,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_W_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv.w">, Defs<[DSPOutFlag23]>;
+class EXTRV_R_W_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv_r.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_RS_W_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv_rs.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_S_H_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv_s.h">,
+ Defs<[DSPOutFlag23]>;
class MFHI_MM_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
InstrItinClass itin> {
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 269ad8b548a4..5f6354e19ebc 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -195,8 +195,7 @@ def simm23_lsl2 : Operand<i32> {
let DecoderMethod = "DecodeSimm23Lsl2";
}
-class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO> :
+class CompactBranchMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"), [], II_BCCZC, FrmI> {
let isBranch = 1;
@@ -240,7 +239,7 @@ MicroMipsInst16<(outs RO1:$rd1, RO2:$rd2), (ins RO3:$rs, RO3:$rt),
let DecoderMethod = "DecodeMovePOperands";
}
-class StorePairMM<string opstr, ComplexPattern Addr = addr>
+class StorePairMM<string opstr>
: InstSE<(outs), (ins GPR32Opnd:$rt, GPR32Opnd:$rt2, mem_simm12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], II_SWP, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
@@ -248,7 +247,7 @@ class StorePairMM<string opstr, ComplexPattern Addr = addr>
let AsmMatchConverter = "ConvertXWPOperands";
}
-class LoadPairMM<string opstr, ComplexPattern Addr = addr>
+class LoadPairMM<string opstr>
: InstSE<(outs GPR32Opnd:$rt, GPR32Opnd:$rt2), (ins mem_simm12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], II_LWP, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
@@ -332,7 +331,7 @@ class ShiftIMM16<string opstr, Operand ImmOpnd, RegisterOperand RO,
MicroMipsInst16<(outs RO:$rd), (ins RO:$rt, ImmOpnd:$shamt),
!strconcat(opstr, "\t$rd, $rt, $shamt"), [], Itin, FrmR>;
-class LoadMM16<string opstr, DAGOperand RO, SDPatternOperator OpNode,
+class LoadMM16<string opstr, DAGOperand RO,
InstrItinClass Itin, Operand MemOpnd> :
MicroMipsInst16<(outs RO:$rt), (ins MemOpnd:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
@@ -341,8 +340,7 @@ class LoadMM16<string opstr, DAGOperand RO, SDPatternOperator OpNode,
let mayLoad = 1;
}
-class StoreMM16<string opstr, DAGOperand RTOpnd, DAGOperand RO,
- SDPatternOperator OpNode, InstrItinClass Itin,
+class StoreMM16<string opstr, DAGOperand RTOpnd, InstrItinClass Itin,
Operand MemOpnd> :
MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
@@ -499,8 +497,7 @@ let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
!strconcat(opstr, "\t$rs, $offset"), [], II_BCCZALS, FrmI, opstr>;
}
-class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
- SDPatternOperator OpNode = null_frag> :
+class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$rd, ${index}(${base})"), [], II_LWXS, FrmFI>;
@@ -540,34 +537,28 @@ def reglist16 : Operand<i32> {
let ParserMatchClass = RegList16AsmOperand;
}
-class StoreMultMM<string opstr,
- InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+class StoreMultMM<string opstr, InstrItinClass Itin> :
InstSE<(outs), (ins reglist:$rt, mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
let mayStore = 1;
}
-class LoadMultMM<string opstr,
- InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+class LoadMultMM<string opstr, InstrItinClass Itin> :
InstSE<(outs reglist:$rt), (ins mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
let mayLoad = 1;
}
-class StoreMultMM16<string opstr,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> :
+class StoreMultMM16<string opstr, InstrItinClass Itin> :
MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
let mayStore = 1;
}
-class LoadMultMM16<string opstr,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> :
+class LoadMultMM16<string opstr, InstrItinClass Itin> :
MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
@@ -636,21 +627,21 @@ let FastISelShouldIgnore = 1 in {
def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
LOGIC_FM_MM16<0x1>, ISA_MICROMIPS32_NOT_MIPS32R6;
}
-def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU,
- mem_mm_4>, LOAD_STORE_FM_MM16<0x02>, ISA_MICROMIPS;
-def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU,
- mem_mm_4_lsl1>, LOAD_STORE_FM_MM16<0x0a>, ISA_MICROMIPS;
-def LW16_MM : LoadMM16<"lw16", GPRMM16Opnd, load, II_LW, mem_mm_4_lsl2>,
+def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, II_LBU, mem_mm_4>,
+ LOAD_STORE_FM_MM16<0x02>, ISA_MICROMIPS;
+def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, II_LHU, mem_mm_4_lsl1>,
+ LOAD_STORE_FM_MM16<0x0a>, ISA_MICROMIPS;
+def LW16_MM : LoadMM16<"lw16", GPRMM16Opnd, II_LW, mem_mm_4_lsl2>,
LOAD_STORE_FM_MM16<0x1a>, ISA_MICROMIPS;
-def SB16_MM : StoreMM16<"sb16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei8,
- II_SB, mem_mm_4>, LOAD_STORE_FM_MM16<0x22>,
- ISA_MICROMIPS32_NOT_MIPS32R6;
-def SH16_MM : StoreMM16<"sh16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei16,
- II_SH, mem_mm_4_lsl1>,
- LOAD_STORE_FM_MM16<0x2a>, ISA_MICROMIPS32_NOT_MIPS32R6;
-def SW16_MM : StoreMM16<"sw16", GPRMM16OpndZero, GPRMM16Opnd, store, II_SW,
- mem_mm_4_lsl2>, LOAD_STORE_FM_MM16<0x3a>,
- ISA_MICROMIPS32_NOT_MIPS32R6;
+def SB16_MM : StoreMM16<"sb16", GPRMM16OpndZero, II_SB, mem_mm_4>,
+ LOAD_STORE_FM_MM16<0x22>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+def SH16_MM : StoreMM16<"sh16", GPRMM16OpndZero, II_SH, mem_mm_4_lsl1>,
+ LOAD_STORE_FM_MM16<0x2a>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+def SW16_MM : StoreMM16<"sw16", GPRMM16OpndZero, II_SW, mem_mm_4_lsl2>,
+ LOAD_STORE_FM_MM16<0x3a>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
def LWGP_MM : LoadGPMM16<"lw", GPRMM16Opnd, II_LW, mem_mm_gp_simm7_lsl2>,
LOAD_GP_FM_MM16<0x19>, ISA_MICROMIPS;
def LWSP_MM : LoadSPMM16<"lw", GPR32Opnd, II_LW, mem_mm_sp_imm5_lsl2>,
@@ -713,9 +704,9 @@ let DecoderNamespace = "MicroMips" in {
POOL32A_CFTC2_FM_MM<0b1101110100>, ISA_MICROMIPS;
/// Compact Branch Instructions
- def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
+ def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, GPR32Opnd>,
COMPACT_BRANCH_FM_MM<0x7>, ISA_MICROMIPS32_NOT_MIPS32R6;
- def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, setne, GPR32Opnd>,
+ def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, GPR32Opnd>,
COMPACT_BRANCH_FM_MM<0x5>, ISA_MICROMIPS32_NOT_MIPS32R6;
/// Arithmetic Instructions (ALU Immediate)
diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 6c5f63804d19..203e05dde7ad 100644
--- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -408,12 +408,9 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
// during call setup, the proper call lowering to the helper
// functions will take place.
//
- A = A.addAttribute(C, AttributeList::FunctionIndex,
- "__Mips16RetHelper");
- A = A.addAttribute(C, AttributeList::FunctionIndex,
- Attribute::ReadNone);
- A = A.addAttribute(C, AttributeList::FunctionIndex,
- Attribute::NoInline);
+ A = A.addFnAttribute(C, "__Mips16RetHelper");
+ A = A.addFnAttribute(C, Attribute::ReadNone);
+ A = A.addFnAttribute(C, Attribute::NoInline);
FunctionCallee F = (M->getOrInsertFunction(Name, A, MyVoid, T));
CallInst::Create(F, Params, "", &I);
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -485,11 +482,11 @@ static void removeUseSoftFloat(Function &F) {
AttrBuilder B;
LLVM_DEBUG(errs() << "removing -use-soft-float\n");
B.addAttribute("use-soft-float", "false");
- F.removeAttributes(AttributeList::FunctionIndex, B);
+ F.removeFnAttrs(B);
if (F.hasFnAttribute("use-soft-float")) {
LLVM_DEBUG(errs() << "still has -use-soft-float\n");
}
- F.addAttributes(AttributeList::FunctionIndex, B);
+ F.addFnAttrs(B);
}
// This pass only makes sense when the underlying chip has floating point but
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.td b/llvm/lib/Target/Mips/Mips16InstrInfo.td
index 990202b23bc0..3410fcd85fdc 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -304,14 +304,14 @@ class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>:
//
// MULT
//
-class FMULT16_ins<string asmstr, InstrItinClass itin> :
+class FMULT16_ins<string asmstr> :
MipsPseudo16<(outs), (ins CPU16Regs:$rx, CPU16Regs:$ry),
!strconcat(asmstr, "\t$rx, $ry"), []>;
//
// MULT-LO
//
-class FMULT16_LO_ins<string asmstr, InstrItinClass itin> :
+class FMULT16_LO_ins<string asmstr> :
MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
!strconcat(asmstr, "\t$rx, $ry\n\tmflo\t$rz"), []> {
let isCodeGenOnly=1;
@@ -895,13 +895,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIM16Alu> {
//
// Pseudo Instruction for mult
//
-def MultRxRy16: FMULT16_ins<"mult", IIM16Alu> {
+def MultRxRy16: FMULT16_ins<"mult"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
-def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
+def MultuRxRy16: FMULT16_ins<"multu"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -912,7 +912,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
// Purpose: Multiply Word
// To multiply 32-bit signed integers.
//
-def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
+def MultRxRyRz16: FMULT16_LO_ins<"mult"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -923,7 +923,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
// Purpose: Multiply Unsigned Word
// To multiply 32-bit unsigned integers.
//
-def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIM16Alu> {
+def MultuRxRyRz16: FMULT16_LO_ins<"multu"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 9607d008bc97..192d0013d89c 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -700,8 +700,7 @@ class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd, II_RINT_D>;
class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd, II_CLASS_S>;
class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd, II_CLASS_D>;
-class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
- RegisterOperand GPROpnd, InstrItinClass itin>
+class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd, InstrItinClass itin>
: MipsR6Arch<instr_asm> {
dag OutOperandList = (outs);
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
@@ -711,8 +710,8 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
InstrItinClass Itinerary = itin;
}
-class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd, II_CACHE>;
-class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd, II_PREF>;
+class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, II_CACHE>;
+class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, II_PREF>;
class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd,
InstrItinClass itin> {
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index b460bc71b11f..6d3f3adb2b7a 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -52,9 +52,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -1203,7 +1203,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
// LD RA, 8(SP)
// DADDIU SP, SP, 16
//
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 5c2549ee176b..f6ec34c7f403 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
: CallLowering(&TLI) {}
+namespace {
struct MipsOutgoingValueAssigner : public CallLowering::OutgoingValueAssigner {
/// This is the name of the function being called
/// FIXME: Relying on this is unsound
@@ -80,7 +81,6 @@ struct MipsIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
}
};
-namespace {
class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler {
const MipsSubtarget &STI;
@@ -92,7 +92,7 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
@@ -101,7 +101,8 @@ private:
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override;
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk = nullptr) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -127,7 +128,7 @@ private:
void MipsIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -163,7 +164,8 @@ void MipsIncomingValueHandler::assignValueToAddress(Register ValVReg,
/// dependent on other arguments.
unsigned
MipsIncomingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];
@@ -197,7 +199,7 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
@@ -206,7 +208,8 @@ private:
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override;
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override;
MachineInstrBuilder &MIB;
};
@@ -214,7 +217,7 @@ private:
void MipsOutgoingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
@@ -253,7 +256,8 @@ void MipsOutgoingValueHandler::assignValueToAddress(Register ValVReg,
unsigned
MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];
@@ -271,6 +275,15 @@ MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
if (!STI.isLittle())
std::swap(Lo, Hi);
+ // If we can return a thunk, just include the register copies. The unmerge can
+ // be emitted earlier.
+ if (Thunk) {
+ *Thunk = [=]() {
+ MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
+ MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
+ };
+ return 2;
+ }
MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
return 2;
diff --git a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index 727d47d06ad4..dd0b48573ef6 100644
--- a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -398,8 +398,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string BaseOpcode = instr_asm;
}
-class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
@@ -407,8 +406,7 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string BaseOpcode = instr_asm;
}
-class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
@@ -522,7 +520,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO,
bit isMoveReg = 1;
}
-class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
+class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode> :
MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> {
bit hasNoSchedulingInfo = 1;
bit usesCustomInserter = 1;
@@ -891,47 +889,40 @@ class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>;
class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", brtarget, NoItinerary>;
// Extr
-class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
- NoItinerary>,
+class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
- NoItinerary>, Defs<[DSPOutFlag23]>;
+class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", NoItinerary>,
+ Defs<[DSPOutFlag23]>;
-class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
- NoItinerary>,
+class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
- NoItinerary>,
+class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
- NoItinerary>,
+class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
- NoItinerary>,
+class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
- NoItinerary>,
+class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
- NoItinerary>,
+class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", NoItinerary>,
Defs<[DSPOutFlag23]>;
class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
@@ -1115,8 +1106,8 @@ class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
timmZExt5, NoItinerary>;
// Pseudos.
-def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
- NoItinerary>, Uses<[DSPPos]>;
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32>,
+ Uses<[DSPPos]>;
// Instruction defs.
// MIPS DSP Rev 1
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 797d81204305..c2e3d7393a6d 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -401,10 +401,9 @@ void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
const MachineBasicBlock &SuccBB) {
- for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI)
- if (*SI != &SuccBB)
- for (const auto &LI : (*SI)->liveins())
+ for (const MachineBasicBlock *S : MBB.successors())
+ if (S != &SuccBB)
+ for (const auto &LI : S->liveins())
Uses.set(LI.PhysReg);
}
@@ -839,9 +838,8 @@ bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB,
auto *Fn = MBB.getParent();
// Iterate over SuccBB's predecessor list.
- for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
- PE = SuccBB->pred_end(); PI != PE; ++PI)
- if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+ for (MachineBasicBlock *Pred : SuccBB->predecessors())
+ if (!examinePred(*Pred, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
return false;
// Do not allow moving instructions which have unallocatable register operands
diff --git a/llvm/lib/Target/Mips/MipsEVAInstrInfo.td b/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
index 73cca8cfa5d9..c697dc90c14c 100644
--- a/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -70,8 +70,7 @@ class LHuE_DESC : LOAD_EVA_DESC_BASE<"lhue", GPR32Opnd, II_LHUE>;
class LWE_DESC : LOAD_EVA_DESC_BASE<"lwe", GPR32Opnd, II_LWE>;
class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
- SDPatternOperator OpNode = null_frag,
- InstrItinClass itin = NoItinerary> {
+ InstrItinClass itin> {
dag OutOperandList = (outs);
dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
@@ -82,9 +81,9 @@ class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
InstrItinClass Itinerary = itin;
}
-class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd, null_frag, II_SBE>;
-class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd, null_frag, II_SHE>;
-class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd, null_frag, II_SWE>;
+class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd, II_SBE>;
+class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd, II_SHE>;
+class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd, II_SWE>;
// Load/Store Left/Right EVA descriptions
class LOAD_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index e963185eaeaa..05c1c06ffefe 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1660,7 +1660,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!MTI->getLength()->getType()->isIntegerTy(32))
return false;
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
- return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
+ return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -1669,7 +1669,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
if (!MSI->getLength()->getType()->isIntegerTy(32))
return false;
- return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memset", II->arg_size() - 1);
}
}
return false;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9399c949a3f2..4f364ef6afc7 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -509,6 +509,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
+ setLibcallName(RTLIB::MULO_I128, nullptr);
}
setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4));
@@ -2073,7 +2076,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
const MipsTargetObjectFile *TLOF =
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
- const GlobalObject *GO = GV->getBaseObject();
+ const GlobalObject *GO = GV->getAliaseeObject();
if (GO && TLOF->IsGlobalInSmallSection(GO, getTargetMachine()))
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64());
@@ -3714,7 +3717,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(
LocVT = VA.getValVT();
}
- // sanity check
+ // Only arguments pased on the stack should make it here.
assert(VA.isMemLoc());
// The stack pointer offset is relative to the caller stack frame.
diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 256fb74c1d6c..6d44ce2ab563 100644
--- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -145,14 +145,14 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
MachineIRBuilder &B) const {
assert(Imm.getBitWidth() == 32 && "Unsupported immediate size.");
// Ori zero extends immediate. Used for values with zeros in high 16 bits.
- if (Imm.getHiBits(16).isNullValue()) {
+ if (Imm.getHiBits(16).isZero()) {
MachineInstr *Inst =
B.buildInstr(Mips::ORi, {DestReg}, {Register(Mips::ZERO)})
.addImm(Imm.getLoBits(16).getLimitedValue());
return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
}
// Lui places immediate in high 16 bits and sets low 16 bits to zero.
- if (Imm.getLoBits(16).isNullValue()) {
+ if (Imm.getLoBits(16).isZero()) {
MachineInstr *Inst = B.buildInstr(Mips::LUi, {DestReg}, {})
.addImm(Imm.getHiBits(16).getLimitedValue());
return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 301f1c158010..c4abccb24c6f 100644
--- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -1308,8 +1308,8 @@ class MSA_2R_FILL_DESC_BASE<string instr_asm, ValueType VT,
InstrItinClass Itinerary = itin;
}
-class MSA_2R_FILL_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
- RegisterClass RCWD, RegisterClass RCWS = RCWD> :
+class MSA_2R_FILL_PSEUDO_BASE<SDPatternOperator OpNode,
+ RegisterClass RCWD, RegisterClass RCWS> :
MSAPseudo<(outs RCWD:$wd), (ins RCWS:$fs),
[(set RCWD:$wd, (OpNode RCWS:$fs))]> {
let usesCustomInserter = 1;
@@ -2091,10 +2091,8 @@ class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32,
class FILL_D_DESC : MSA_2R_FILL_DESC_BASE<"fill.d", v2i64, vsplati64,
MSA128DOpnd, GPR64Opnd>;
-class FILL_FW_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v4f32, vsplatf32, MSA128W,
- FGR32>;
-class FILL_FD_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v2f64, vsplatf64, MSA128D,
- FGR64>;
+class FILL_FW_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<vsplatf32, MSA128W, FGR32>;
+class FILL_FD_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<vsplatf64, MSA128D, FGR64>;
class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128WOpnd>;
class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128DOpnd>;
@@ -3755,8 +3753,7 @@ def : MSABitconvertReverseWInDPat<v4f32, v2f64, MSA128W>;
// Pseudos used to implement BNZ.df, and BZ.df
class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
- RegisterClass RCWS,
- InstrItinClass itin = NoItinerary> :
+ RegisterClass RCWS> :
MipsPseudo<(outs GPR32:$dst),
(ins RCWS:$ws),
[(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
@@ -3764,27 +3761,22 @@ class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
bit hasNoSchedulingInfo = 1;
}
-def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
- MSA128B, NoItinerary>;
-def SNZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16,
- MSA128H, NoItinerary>;
-def SNZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32,
- MSA128W, NoItinerary>;
-def SNZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64,
- MSA128D, NoItinerary>;
-def SNZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8,
- MSA128B, NoItinerary>;
-
-def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8,
- MSA128B, NoItinerary>;
-def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16,
- MSA128H, NoItinerary>;
-def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32,
- MSA128W, NoItinerary>;
-def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
- MSA128D, NoItinerary>;
-def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
- MSA128B, NoItinerary>;
+def SNZ_B_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8, MSA128B>;
+def SNZ_H_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16, MSA128H>;
+def SNZ_W_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32, MSA128W>;
+def SNZ_D_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64, MSA128D>;
+def SNZ_V_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8, MSA128B>;
+
+def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8, MSA128B>;
+def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16, MSA128H>;
+def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32, MSA128W>;
+def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64, MSA128D>;
+def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8, MSA128B>;
// Pseudoes used to implement transparent fp16 support.
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 7be5fc33a0af..03a545605fe1 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -964,7 +964,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
// match the instruction.
case MipsISD::Ins: {
- // Sanity checking for the node operands.
+ // Validating the node operands.
if (Node->getValueType(0) != MVT::i32 && Node->getValueType(0) != MVT::i64)
return false;
@@ -1027,12 +1027,13 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0),
+ CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), MVT::Glue,
CurDAG->getRegister(Mips::HWR29, MVT::i32),
CurDAG->getTargetConstant(0, DL, MVT::i32));
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ SDValue(Rdhwr, 0), SDValue(Rdhwr, 1));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT,
+ Chain.getValue(1));
ReplaceNode(Node, ResNode.getNode());
return true;
}
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 37d4313cc506..1fe6ab09804b 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -569,7 +569,7 @@ static bool isVectorAllOnes(SDValue N) {
// Endianness doesn't matter in this context because we are looking for
// an all-ones value.
if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
- return SplatValue.isAllOnesValue();
+ return SplatValue.isAllOnes();
return false;
}
@@ -701,7 +701,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
// Fold degenerate cases.
if (IsConstantMask) {
- if (Mask.isAllOnesValue())
+ if (Mask.isAllOnes())
return IfSet;
else if (Mask == 0)
return IfClr;
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 901a4fe4e2ac..26b31cfa9f2a 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -18,9 +18,9 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 8bb9d75e9173..c285385a19dd 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -12,17 +12,17 @@
#include "MipsSubtarget.h"
#include "Mips.h"
-#include "MipsMachineFunction.h"
-#include "MipsRegisterInfo.h"
-#include "MipsTargetMachine.h"
#include "MipsCallLowering.h"
#include "MipsLegalizerInfo.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterBankInfo.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -78,7 +78,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
+ HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 || Mips_Os16),
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false),
HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false),
diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 7dd030f73d55..8de3c9fd25bd 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,9 +34,9 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
diff --git a/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 44041987ec76..db5f607bbb4f 100644
--- a/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/MipsTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheMipsTarget() {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index d69166feb042..856d03f0b210 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -10,15 +10,15 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTXMCTargetDesc.h"
#include "NVPTXInstPrinter.h"
#include "NVPTXMCAsmInfo.h"
-#include "NVPTXMCTargetDesc.h"
#include "NVPTXTargetStreamer.h"
#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index fe335f154703..1cbd650bdf06 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -26,7 +26,7 @@ NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
void NVPTXTargetStreamer::outputDwarfFileDirectives() {
for (const std::string &S : DwarfFiles)
- getStreamer().emitRawText(S.data());
+ getStreamer().emitRawText(S);
DwarfFiles.clear();
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 2b0972b8531e..7af927aba64e 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -89,6 +89,12 @@ def PTX71 : SubtargetFeature<"ptx71", "PTXVersion", "71",
"Use PTX version 7.1">;
def PTX72 : SubtargetFeature<"ptx72", "PTXVersion", "72",
"Use PTX version 7.2">;
+def PTX73 : SubtargetFeature<"ptx73", "PTXVersion", "73",
+ "Use PTX version 7.3">;
+def PTX74 : SubtargetFeature<"ptx74", "PTXVersion", "74",
+ "Use PTX version 7.4">;
+def PTX75 : SubtargetFeature<"ptx75", "PTXVersion", "75",
+ "Use PTX version 7.5">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 38844ff4ddf9..aab6d2034f11 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -71,12 +71,12 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -417,8 +417,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
// llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
// we iterate through each back edge of the loop with header MBB, and check
// whether its metadata contains llvm.loop.unroll.disable.
- for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
- const MachineBasicBlock *PMBB = *I;
+ for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
// Edges from other loops to MBB are not back edges.
continue;
@@ -703,7 +702,7 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = &*FI;
- if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
+ if (F->getAttributes().hasFnAttr("nvptx-libcall-callee")) {
emitDeclaration(F, O);
continue;
}
@@ -1457,7 +1456,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
+ if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1748,135 +1747,63 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-// These utility functions assure we get the right sequence of bytes for a given
-// type even for big-endian machines
-template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
- int64_t vp = (int64_t)val;
- for (unsigned i = 0; i < sizeof(T); ++i) {
- p[i] = (unsigned char)vp;
- vp >>= 8;
- }
-}
-static void ConvertFloatToBytes(unsigned char *p, float val) {
- int32_t *vp = (int32_t *)&val;
- for (unsigned i = 0; i < sizeof(int32_t); ++i) {
- p[i] = (unsigned char)*vp;
- *vp >>= 8;
- }
-}
-static void ConvertDoubleToBytes(unsigned char *p, double val) {
- int64_t *vp = (int64_t *)&val;
- for (unsigned i = 0; i < sizeof(int64_t); ++i) {
- p[i] = (unsigned char)*vp;
- *vp >>= 8;
- }
-}
-
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
- AggBuffer *aggBuffer) {
+ AggBuffer *AggBuffer) {
const DataLayout &DL = getDataLayout();
-
+ int AllocSize = DL.getTypeAllocSize(CPV->getType());
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- int s = DL.getTypeAllocSize(CPV->getType());
- if (s < Bytes)
- s = Bytes;
- aggBuffer->addZeros(s);
+ // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
+ // only the space allocated by CPV.
+ AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
return;
}
- unsigned char ptr[8];
- switch (CPV->getType()->getTypeID()) {
+ // Helper for filling AggBuffer with APInts.
+ auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
+ size_t NumBytes = (Val.getBitWidth() + 7) / 8;
+ SmallVector<unsigned char, 16> Buf(NumBytes);
+ for (unsigned I = 0; I < NumBytes; ++I) {
+ Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
+ }
+ AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
+ };
- case Type::IntegerTyID: {
- Type *ETy = CPV->getType();
- if (ETy == Type::getInt8Ty(CPV->getContext())) {
- unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
- ConvertIntToBytes<>(ptr, c);
- aggBuffer->addBytes(ptr, 1, Bytes);
- } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
- short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
- ConvertIntToBytes<>(ptr, int16);
- aggBuffer->addBytes(ptr, 2, Bytes);
- } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
- if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- int int32 = (int)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int32);
- aggBuffer->addBytes(ptr, 4, Bytes);
+ switch (CPV->getType()->getTypeID()) {
+ case Type::IntegerTyID:
+ if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
+ AddIntToBuffer(CI->getValue());
+ break;
+ }
+ if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const auto *CI =
+ dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
+ AddIntToBuffer(CI->getValue());
break;
- } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstant(Cexpr, DL))) {
- int int32 = (int)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int32);
- aggBuffer->addBytes(ptr, 4, Bytes);
- break;
- }
- if (Cexpr->getOpcode() == Instruction::PtrToInt) {
- Value *v = Cexpr->getOperand(0)->stripPointerCasts();
- aggBuffer->addSymbol(v, Cexpr->getOperand(0));
- aggBuffer->addZeros(4);
- break;
- }
}
- llvm_unreachable("unsupported integer const type");
- } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
- if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- long long int64 = (long long)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int64);
- aggBuffer->addBytes(ptr, 8, Bytes);
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *V = Cexpr->getOperand(0)->stripPointerCasts();
+ AggBuffer->addSymbol(V, Cexpr->getOperand(0));
+ AggBuffer->addZeros(AllocSize);
break;
- } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstant(Cexpr, DL))) {
- long long int64 = (long long)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int64);
- aggBuffer->addBytes(ptr, 8, Bytes);
- break;
- }
- if (Cexpr->getOpcode() == Instruction::PtrToInt) {
- Value *v = Cexpr->getOperand(0)->stripPointerCasts();
- aggBuffer->addSymbol(v, Cexpr->getOperand(0));
- aggBuffer->addZeros(8);
- break;
- }
}
- llvm_unreachable("unsupported integer const type");
- } else
- llvm_unreachable("unsupported integer const type");
+ }
+ llvm_unreachable("unsupported integer const type");
break;
- }
+
case Type::HalfTyID:
case Type::FloatTyID:
- case Type::DoubleTyID: {
- const auto *CFP = cast<ConstantFP>(CPV);
- Type *Ty = CFP->getType();
- if (Ty == Type::getHalfTy(CPV->getContext())) {
- APInt API = CFP->getValueAPF().bitcastToAPInt();
- uint16_t float16 = API.getLoBits(16).getZExtValue();
- ConvertIntToBytes<>(ptr, float16);
- aggBuffer->addBytes(ptr, 2, Bytes);
- } else if (Ty == Type::getFloatTy(CPV->getContext())) {
- float float32 = (float) CFP->getValueAPF().convertToFloat();
- ConvertFloatToBytes(ptr, float32);
- aggBuffer->addBytes(ptr, 4, Bytes);
- } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
- double float64 = CFP->getValueAPF().convertToDouble();
- ConvertDoubleToBytes(ptr, float64);
- aggBuffer->addBytes(ptr, 8, Bytes);
- } else {
- llvm_unreachable("unsupported fp const type");
- }
+ case Type::DoubleTyID:
+ AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
break;
- }
+
case Type::PointerTyID: {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- aggBuffer->addSymbol(GVar, GVar);
+ AggBuffer->addSymbol(GVar, GVar);
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
- aggBuffer->addSymbol(v, Cexpr);
+ AggBuffer->addSymbol(v, Cexpr);
}
- unsigned int s = DL.getTypeAllocSize(CPV->getType());
- aggBuffer->addZeros(s);
+ AggBuffer->addZeros(AllocSize);
break;
}
@@ -1884,12 +1811,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::FixedVectorTyID:
case Type::StructTyID: {
if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
- int ElementSize = DL.getTypeAllocSize(CPV->getType());
- bufferAggregateConstant(CPV, aggBuffer);
- if (Bytes > ElementSize)
- aggBuffer->addZeros(Bytes - ElementSize);
+ bufferAggregateConstant(CPV, AggBuffer);
+ if (Bytes > AllocSize)
+ AggBuffer->addZeros(Bytes - AllocSize);
} else if (isa<ConstantAggregateZero>(CPV))
- aggBuffer->addZeros(Bytes);
+ AggBuffer->addZeros(Bytes);
else
llvm_unreachable("Unexpected Constant type");
break;
@@ -1996,7 +1922,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::AddrSpaceCast: {
@@ -2010,7 +1936,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/ false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::GetElementPtr: {
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 5c3a4eb470c1..5d680e731e4a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -106,6 +106,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
EmitGeneric = AP.EmitGeneric;
}
+ // Copy Num bytes from Ptr.
+ // if Bytes > Num, zero fill up to Bytes.
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
assert((curpos + Num) <= size);
assert((curpos + Bytes) <= size);
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 024e51e5f488..1e19ef4116c3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -36,6 +36,9 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr *MI = &MBB.front();
MachineRegisterInfo &MR = MF.getRegInfo();
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
// This instruction really occurs before first instruction
// in the BB, so giving it no debug location.
DebugLoc dl = DebugLoc();
@@ -50,15 +53,15 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
(Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes);
unsigned MovDepotOpcode =
(Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
- if (!MR.use_empty(NVPTX::VRFrame)) {
+ if (!MR.use_empty(NRI->getFrameRegister(MF))) {
// If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
MI = BuildMI(MBB, MI, dl,
MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
- NVPTX::VRFrame)
- .addReg(NVPTX::VRFrameLocal);
+ NRI->getFrameRegister(MF))
+ .addReg(NRI->getFrameLocalRegister(MF));
}
BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
- NVPTX::VRFrameLocal)
+ NRI->getFrameLocalRegister(MF))
.addImm(MF.getFunctionNumber());
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 9078ff8cfb97..a9a5eae42c1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -74,19 +74,16 @@ bool GenericToNVVM::runOnModule(Module &M) {
// of original global variable and its clone is placed in the GVMap for later
// use.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E;) {
- GlobalVariable *GV = &*I++;
- if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
- !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
- !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
+ if (GV.getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+ !llvm::isTexture(GV) && !llvm::isSurface(GV) && !llvm::isSampler(GV) &&
+ !GV.getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
- M, GV->getValueType(), GV->isConstant(),
- GV->getLinkage(),
- GV->hasInitializer() ? GV->getInitializer() : nullptr,
- "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
- NewGV->copyAttributesFrom(GV);
- GVMap[GV] = NewGV;
+ M, GV.getValueType(), GV.isConstant(), GV.getLinkage(),
+ GV.hasInitializer() ? GV.getInitializer() : nullptr, "", &GV,
+ GV.getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+ NewGV->copyAttributesFrom(&GV);
+ GVMap[&GV] = NewGV;
}
}
@@ -215,7 +212,7 @@ Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
// If any of the elements has been modified, construct the equivalent
// vector or aggregate value with a set instructions and the converted
// elements.
- Value *NewValue = UndefValue::get(C->getType());
+ Value *NewValue = PoisonValue::get(C->getType());
if (isa<ConstantVector>(C)) {
for (unsigned i = 0; i < NumOperands; ++i) {
Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 00913e93cfd3..dd4290a605a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2348,508 +2348,508 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
switch (N->getOpcode()) {
default: return false;
case NVPTXISD::Tex1DFloatS32:
- Opc = NVPTX::TEX_1D_F32_S32;
+ Opc = NVPTX::TEX_1D_F32_S32_RR;
break;
case NVPTXISD::Tex1DFloatFloat:
- Opc = NVPTX::TEX_1D_F32_F32;
+ Opc = NVPTX::TEX_1D_F32_F32_RR;
break;
case NVPTXISD::Tex1DFloatFloatLevel:
- Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DFloatFloatGrad:
- Opc = NVPTX::TEX_1D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DS32S32:
- Opc = NVPTX::TEX_1D_S32_S32;
+ Opc = NVPTX::TEX_1D_S32_S32_RR;
break;
case NVPTXISD::Tex1DS32Float:
- Opc = NVPTX::TEX_1D_S32_F32;
+ Opc = NVPTX::TEX_1D_S32_F32_RR;
break;
case NVPTXISD::Tex1DS32FloatLevel:
- Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DS32FloatGrad:
- Opc = NVPTX::TEX_1D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DU32S32:
- Opc = NVPTX::TEX_1D_U32_S32;
+ Opc = NVPTX::TEX_1D_U32_S32_RR;
break;
case NVPTXISD::Tex1DU32Float:
- Opc = NVPTX::TEX_1D_U32_F32;
+ Opc = NVPTX::TEX_1D_U32_F32_RR;
break;
case NVPTXISD::Tex1DU32FloatLevel:
- Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DU32FloatGrad:
- Opc = NVPTX::TEX_1D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DArrayFloatS32:
- Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
break;
case NVPTXISD::Tex1DArrayFloatFloat:
- Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
break;
case NVPTXISD::Tex1DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DArrayS32S32:
- Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
break;
case NVPTXISD::Tex1DArrayS32Float:
- Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
break;
case NVPTXISD::Tex1DArrayS32FloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DArrayS32FloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DArrayU32S32:
- Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
break;
case NVPTXISD::Tex1DArrayU32Float:
- Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
break;
case NVPTXISD::Tex1DArrayU32FloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DArrayU32FloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DFloatS32:
- Opc = NVPTX::TEX_2D_F32_S32;
+ Opc = NVPTX::TEX_2D_F32_S32_RR;
break;
case NVPTXISD::Tex2DFloatFloat:
- Opc = NVPTX::TEX_2D_F32_F32;
+ Opc = NVPTX::TEX_2D_F32_F32_RR;
break;
case NVPTXISD::Tex2DFloatFloatLevel:
- Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DFloatFloatGrad:
- Opc = NVPTX::TEX_2D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DS32S32:
- Opc = NVPTX::TEX_2D_S32_S32;
+ Opc = NVPTX::TEX_2D_S32_S32_RR;
break;
case NVPTXISD::Tex2DS32Float:
- Opc = NVPTX::TEX_2D_S32_F32;
+ Opc = NVPTX::TEX_2D_S32_F32_RR;
break;
case NVPTXISD::Tex2DS32FloatLevel:
- Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DS32FloatGrad:
- Opc = NVPTX::TEX_2D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DU32S32:
- Opc = NVPTX::TEX_2D_U32_S32;
+ Opc = NVPTX::TEX_2D_U32_S32_RR;
break;
case NVPTXISD::Tex2DU32Float:
- Opc = NVPTX::TEX_2D_U32_F32;
+ Opc = NVPTX::TEX_2D_U32_F32_RR;
break;
case NVPTXISD::Tex2DU32FloatLevel:
- Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DU32FloatGrad:
- Opc = NVPTX::TEX_2D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DArrayFloatS32:
- Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
break;
case NVPTXISD::Tex2DArrayFloatFloat:
- Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
break;
case NVPTXISD::Tex2DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DArrayS32S32:
- Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
break;
case NVPTXISD::Tex2DArrayS32Float:
- Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
break;
case NVPTXISD::Tex2DArrayS32FloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DArrayS32FloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DArrayU32S32:
- Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
break;
case NVPTXISD::Tex2DArrayU32Float:
- Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
break;
case NVPTXISD::Tex2DArrayU32FloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DArrayU32FloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex3DFloatS32:
- Opc = NVPTX::TEX_3D_F32_S32;
+ Opc = NVPTX::TEX_3D_F32_S32_RR;
break;
case NVPTXISD::Tex3DFloatFloat:
- Opc = NVPTX::TEX_3D_F32_F32;
+ Opc = NVPTX::TEX_3D_F32_F32_RR;
break;
case NVPTXISD::Tex3DFloatFloatLevel:
- Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex3DFloatFloatGrad:
- Opc = NVPTX::TEX_3D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex3DS32S32:
- Opc = NVPTX::TEX_3D_S32_S32;
+ Opc = NVPTX::TEX_3D_S32_S32_RR;
break;
case NVPTXISD::Tex3DS32Float:
- Opc = NVPTX::TEX_3D_S32_F32;
+ Opc = NVPTX::TEX_3D_S32_F32_RR;
break;
case NVPTXISD::Tex3DS32FloatLevel:
- Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex3DS32FloatGrad:
- Opc = NVPTX::TEX_3D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex3DU32S32:
- Opc = NVPTX::TEX_3D_U32_S32;
+ Opc = NVPTX::TEX_3D_U32_S32_RR;
break;
case NVPTXISD::Tex3DU32Float:
- Opc = NVPTX::TEX_3D_U32_F32;
+ Opc = NVPTX::TEX_3D_U32_F32_RR;
break;
case NVPTXISD::Tex3DU32FloatLevel:
- Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex3DU32FloatGrad:
- Opc = NVPTX::TEX_3D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
break;
case NVPTXISD::TexCubeFloatFloat:
- Opc = NVPTX::TEX_CUBE_F32_F32;
+ Opc = NVPTX::TEX_CUBE_F32_F32_RR;
break;
case NVPTXISD::TexCubeFloatFloatLevel:
- Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeS32Float:
- Opc = NVPTX::TEX_CUBE_S32_F32;
+ Opc = NVPTX::TEX_CUBE_S32_F32_RR;
break;
case NVPTXISD::TexCubeS32FloatLevel:
- Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeU32Float:
- Opc = NVPTX::TEX_CUBE_U32_F32;
+ Opc = NVPTX::TEX_CUBE_U32_F32_RR;
break;
case NVPTXISD::TexCubeU32FloatLevel:
- Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeArrayFloatFloat:
- Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
break;
case NVPTXISD::TexCubeArrayFloatFloatLevel:
- Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeArrayS32Float:
- Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
break;
case NVPTXISD::TexCubeArrayS32FloatLevel:
- Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeArrayU32Float:
- Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
break;
case NVPTXISD::TexCubeArrayU32FloatLevel:
- Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tld4R2DFloatFloat:
- Opc = NVPTX::TLD4_R_2D_F32_F32;
+ Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4G2DFloatFloat:
- Opc = NVPTX::TLD4_G_2D_F32_F32;
+ Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4B2DFloatFloat:
- Opc = NVPTX::TLD4_B_2D_F32_F32;
+ Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4A2DFloatFloat:
- Opc = NVPTX::TLD4_A_2D_F32_F32;
+ Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4R2DS64Float:
- Opc = NVPTX::TLD4_R_2D_S32_F32;
+ Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4G2DS64Float:
- Opc = NVPTX::TLD4_G_2D_S32_F32;
+ Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4B2DS64Float:
- Opc = NVPTX::TLD4_B_2D_S32_F32;
+ Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4A2DS64Float:
- Opc = NVPTX::TLD4_A_2D_S32_F32;
+ Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4R2DU64Float:
- Opc = NVPTX::TLD4_R_2D_U32_F32;
+ Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
break;
case NVPTXISD::Tld4G2DU64Float:
- Opc = NVPTX::TLD4_G_2D_U32_F32;
+ Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
break;
case NVPTXISD::Tld4B2DU64Float:
- Opc = NVPTX::TLD4_B_2D_U32_F32;
+ Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
break;
case NVPTXISD::Tld4A2DU64Float:
- Opc = NVPTX::TLD4_A_2D_U32_F32;
+ Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
break;
case NVPTXISD::TexUnified1DFloatS32:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
break;
case NVPTXISD::TexUnified1DFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
break;
case NVPTXISD::TexUnified1DFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DS32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
break;
case NVPTXISD::TexUnified1DS32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
break;
case NVPTXISD::TexUnified1DS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DU32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
break;
case NVPTXISD::TexUnified1DU32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
break;
case NVPTXISD::TexUnified1DU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DArrayFloatS32:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
break;
case NVPTXISD::TexUnified1DArrayFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
break;
case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DArrayS32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
break;
case NVPTXISD::TexUnified1DArrayS32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
break;
case NVPTXISD::TexUnified1DArrayS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DArrayS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DArrayU32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
break;
case NVPTXISD::TexUnified1DArrayU32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
break;
case NVPTXISD::TexUnified1DArrayU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DArrayU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DFloatS32:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
break;
case NVPTXISD::TexUnified2DFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
break;
case NVPTXISD::TexUnified2DFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DS32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
break;
case NVPTXISD::TexUnified2DS32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
break;
case NVPTXISD::TexUnified2DS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DU32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
break;
case NVPTXISD::TexUnified2DU32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
break;
case NVPTXISD::TexUnified2DU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DArrayFloatS32:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
break;
case NVPTXISD::TexUnified2DArrayFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
break;
case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DArrayS32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
break;
case NVPTXISD::TexUnified2DArrayS32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
break;
case NVPTXISD::TexUnified2DArrayS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DArrayS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DArrayU32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
break;
case NVPTXISD::TexUnified2DArrayU32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
break;
case NVPTXISD::TexUnified2DArrayU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DArrayU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified3DFloatS32:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
break;
case NVPTXISD::TexUnified3DFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
break;
case NVPTXISD::TexUnified3DFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified3DFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified3DS32S32:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
break;
case NVPTXISD::TexUnified3DS32Float:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
break;
case NVPTXISD::TexUnified3DS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified3DS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified3DU32S32:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
break;
case NVPTXISD::TexUnified3DU32Float:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
break;
case NVPTXISD::TexUnified3DU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified3DU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnifiedCubeFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeS32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeU32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeArrayS32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeArrayU32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
break;
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedB2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedA2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedR2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedG2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedB2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedA2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedR2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
break;
case NVPTXISD::Tld4UnifiedG2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
break;
case NVPTXISD::Tld4UnifiedB2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
break;
case NVPTXISD::Tld4UnifiedA2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
break;
}
@@ -2866,499 +2866,499 @@ bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
switch (N->getOpcode()) {
default: return false;
case NVPTXISD::Suld1DI8Clamp:
- Opc = NVPTX::SULD_1D_I8_CLAMP;
+ Opc = NVPTX::SULD_1D_I8_CLAMP_R;
break;
case NVPTXISD::Suld1DI16Clamp:
- Opc = NVPTX::SULD_1D_I16_CLAMP;
+ Opc = NVPTX::SULD_1D_I16_CLAMP_R;
break;
case NVPTXISD::Suld1DI32Clamp:
- Opc = NVPTX::SULD_1D_I32_CLAMP;
+ Opc = NVPTX::SULD_1D_I32_CLAMP_R;
break;
case NVPTXISD::Suld1DI64Clamp:
- Opc = NVPTX::SULD_1D_I64_CLAMP;
+ Opc = NVPTX::SULD_1D_I64_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I8Clamp:
- Opc = NVPTX::SULD_1D_V2I8_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I16Clamp:
- Opc = NVPTX::SULD_1D_V2I16_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I32Clamp:
- Opc = NVPTX::SULD_1D_V2I32_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I64Clamp:
- Opc = NVPTX::SULD_1D_V2I64_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld1DV4I8Clamp:
- Opc = NVPTX::SULD_1D_V4I8_CLAMP;
+ Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld1DV4I16Clamp:
- Opc = NVPTX::SULD_1D_V4I16_CLAMP;
+ Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld1DV4I32Clamp:
- Opc = NVPTX::SULD_1D_V4I32_CLAMP;
+ Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI8Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI16Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI32Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI64Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I8Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I16Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I32Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I64Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV4I8Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV4I16Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV4I32Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld2DI8Clamp:
- Opc = NVPTX::SULD_2D_I8_CLAMP;
+ Opc = NVPTX::SULD_2D_I8_CLAMP_R;
break;
case NVPTXISD::Suld2DI16Clamp:
- Opc = NVPTX::SULD_2D_I16_CLAMP;
+ Opc = NVPTX::SULD_2D_I16_CLAMP_R;
break;
case NVPTXISD::Suld2DI32Clamp:
- Opc = NVPTX::SULD_2D_I32_CLAMP;
+ Opc = NVPTX::SULD_2D_I32_CLAMP_R;
break;
case NVPTXISD::Suld2DI64Clamp:
- Opc = NVPTX::SULD_2D_I64_CLAMP;
+ Opc = NVPTX::SULD_2D_I64_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I8Clamp:
- Opc = NVPTX::SULD_2D_V2I8_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I16Clamp:
- Opc = NVPTX::SULD_2D_V2I16_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I32Clamp:
- Opc = NVPTX::SULD_2D_V2I32_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I64Clamp:
- Opc = NVPTX::SULD_2D_V2I64_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld2DV4I8Clamp:
- Opc = NVPTX::SULD_2D_V4I8_CLAMP;
+ Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld2DV4I16Clamp:
- Opc = NVPTX::SULD_2D_V4I16_CLAMP;
+ Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld2DV4I32Clamp:
- Opc = NVPTX::SULD_2D_V4I32_CLAMP;
+ Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI8Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI16Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI32Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI64Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I8Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I16Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I32Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I64Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV4I8Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV4I16Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV4I32Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld3DI8Clamp:
- Opc = NVPTX::SULD_3D_I8_CLAMP;
+ Opc = NVPTX::SULD_3D_I8_CLAMP_R;
break;
case NVPTXISD::Suld3DI16Clamp:
- Opc = NVPTX::SULD_3D_I16_CLAMP;
+ Opc = NVPTX::SULD_3D_I16_CLAMP_R;
break;
case NVPTXISD::Suld3DI32Clamp:
- Opc = NVPTX::SULD_3D_I32_CLAMP;
+ Opc = NVPTX::SULD_3D_I32_CLAMP_R;
break;
case NVPTXISD::Suld3DI64Clamp:
- Opc = NVPTX::SULD_3D_I64_CLAMP;
+ Opc = NVPTX::SULD_3D_I64_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I8Clamp:
- Opc = NVPTX::SULD_3D_V2I8_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I16Clamp:
- Opc = NVPTX::SULD_3D_V2I16_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I32Clamp:
- Opc = NVPTX::SULD_3D_V2I32_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I64Clamp:
- Opc = NVPTX::SULD_3D_V2I64_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld3DV4I8Clamp:
- Opc = NVPTX::SULD_3D_V4I8_CLAMP;
+ Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld3DV4I16Clamp:
- Opc = NVPTX::SULD_3D_V4I16_CLAMP;
+ Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld3DV4I32Clamp:
- Opc = NVPTX::SULD_3D_V4I32_CLAMP;
+ Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld1DI8Trap:
- Opc = NVPTX::SULD_1D_I8_TRAP;
+ Opc = NVPTX::SULD_1D_I8_TRAP_R;
break;
case NVPTXISD::Suld1DI16Trap:
- Opc = NVPTX::SULD_1D_I16_TRAP;
+ Opc = NVPTX::SULD_1D_I16_TRAP_R;
break;
case NVPTXISD::Suld1DI32Trap:
- Opc = NVPTX::SULD_1D_I32_TRAP;
+ Opc = NVPTX::SULD_1D_I32_TRAP_R;
break;
case NVPTXISD::Suld1DI64Trap:
- Opc = NVPTX::SULD_1D_I64_TRAP;
+ Opc = NVPTX::SULD_1D_I64_TRAP_R;
break;
case NVPTXISD::Suld1DV2I8Trap:
- Opc = NVPTX::SULD_1D_V2I8_TRAP;
+ Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
break;
case NVPTXISD::Suld1DV2I16Trap:
- Opc = NVPTX::SULD_1D_V2I16_TRAP;
+ Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
break;
case NVPTXISD::Suld1DV2I32Trap:
- Opc = NVPTX::SULD_1D_V2I32_TRAP;
+ Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
break;
case NVPTXISD::Suld1DV2I64Trap:
- Opc = NVPTX::SULD_1D_V2I64_TRAP;
+ Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
break;
case NVPTXISD::Suld1DV4I8Trap:
- Opc = NVPTX::SULD_1D_V4I8_TRAP;
+ Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
break;
case NVPTXISD::Suld1DV4I16Trap:
- Opc = NVPTX::SULD_1D_V4I16_TRAP;
+ Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
break;
case NVPTXISD::Suld1DV4I32Trap:
- Opc = NVPTX::SULD_1D_V4I32_TRAP;
+ Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI8Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI16Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI32Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI64Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I8Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I16Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I32Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I64Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV4I8Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV4I16Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV4I32Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
break;
case NVPTXISD::Suld2DI8Trap:
- Opc = NVPTX::SULD_2D_I8_TRAP;
+ Opc = NVPTX::SULD_2D_I8_TRAP_R;
break;
case NVPTXISD::Suld2DI16Trap:
- Opc = NVPTX::SULD_2D_I16_TRAP;
+ Opc = NVPTX::SULD_2D_I16_TRAP_R;
break;
case NVPTXISD::Suld2DI32Trap:
- Opc = NVPTX::SULD_2D_I32_TRAP;
+ Opc = NVPTX::SULD_2D_I32_TRAP_R;
break;
case NVPTXISD::Suld2DI64Trap:
- Opc = NVPTX::SULD_2D_I64_TRAP;
+ Opc = NVPTX::SULD_2D_I64_TRAP_R;
break;
case NVPTXISD::Suld2DV2I8Trap:
- Opc = NVPTX::SULD_2D_V2I8_TRAP;
+ Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
break;
case NVPTXISD::Suld2DV2I16Trap:
- Opc = NVPTX::SULD_2D_V2I16_TRAP;
+ Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
break;
case NVPTXISD::Suld2DV2I32Trap:
- Opc = NVPTX::SULD_2D_V2I32_TRAP;
+ Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
break;
case NVPTXISD::Suld2DV2I64Trap:
- Opc = NVPTX::SULD_2D_V2I64_TRAP;
+ Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
break;
case NVPTXISD::Suld2DV4I8Trap:
- Opc = NVPTX::SULD_2D_V4I8_TRAP;
+ Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
break;
case NVPTXISD::Suld2DV4I16Trap:
- Opc = NVPTX::SULD_2D_V4I16_TRAP;
+ Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
break;
case NVPTXISD::Suld2DV4I32Trap:
- Opc = NVPTX::SULD_2D_V4I32_TRAP;
+ Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI8Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI16Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI32Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI64Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I8Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I16Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I32Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I64Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV4I8Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV4I16Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV4I32Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
break;
case NVPTXISD::Suld3DI8Trap:
- Opc = NVPTX::SULD_3D_I8_TRAP;
+ Opc = NVPTX::SULD_3D_I8_TRAP_R;
break;
case NVPTXISD::Suld3DI16Trap:
- Opc = NVPTX::SULD_3D_I16_TRAP;
+ Opc = NVPTX::SULD_3D_I16_TRAP_R;
break;
case NVPTXISD::Suld3DI32Trap:
- Opc = NVPTX::SULD_3D_I32_TRAP;
+ Opc = NVPTX::SULD_3D_I32_TRAP_R;
break;
case NVPTXISD::Suld3DI64Trap:
- Opc = NVPTX::SULD_3D_I64_TRAP;
+ Opc = NVPTX::SULD_3D_I64_TRAP_R;
break;
case NVPTXISD::Suld3DV2I8Trap:
- Opc = NVPTX::SULD_3D_V2I8_TRAP;
+ Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
break;
case NVPTXISD::Suld3DV2I16Trap:
- Opc = NVPTX::SULD_3D_V2I16_TRAP;
+ Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
break;
case NVPTXISD::Suld3DV2I32Trap:
- Opc = NVPTX::SULD_3D_V2I32_TRAP;
+ Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
break;
case NVPTXISD::Suld3DV2I64Trap:
- Opc = NVPTX::SULD_3D_V2I64_TRAP;
+ Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
break;
case NVPTXISD::Suld3DV4I8Trap:
- Opc = NVPTX::SULD_3D_V4I8_TRAP;
+ Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
break;
case NVPTXISD::Suld3DV4I16Trap:
- Opc = NVPTX::SULD_3D_V4I16_TRAP;
+ Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
break;
case NVPTXISD::Suld3DV4I32Trap:
- Opc = NVPTX::SULD_3D_V4I32_TRAP;
+ Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
break;
case NVPTXISD::Suld1DI8Zero:
- Opc = NVPTX::SULD_1D_I8_ZERO;
+ Opc = NVPTX::SULD_1D_I8_ZERO_R;
break;
case NVPTXISD::Suld1DI16Zero:
- Opc = NVPTX::SULD_1D_I16_ZERO;
+ Opc = NVPTX::SULD_1D_I16_ZERO_R;
break;
case NVPTXISD::Suld1DI32Zero:
- Opc = NVPTX::SULD_1D_I32_ZERO;
+ Opc = NVPTX::SULD_1D_I32_ZERO_R;
break;
case NVPTXISD::Suld1DI64Zero:
- Opc = NVPTX::SULD_1D_I64_ZERO;
+ Opc = NVPTX::SULD_1D_I64_ZERO_R;
break;
case NVPTXISD::Suld1DV2I8Zero:
- Opc = NVPTX::SULD_1D_V2I8_ZERO;
+ Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
break;
case NVPTXISD::Suld1DV2I16Zero:
- Opc = NVPTX::SULD_1D_V2I16_ZERO;
+ Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
break;
case NVPTXISD::Suld1DV2I32Zero:
- Opc = NVPTX::SULD_1D_V2I32_ZERO;
+ Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
break;
case NVPTXISD::Suld1DV2I64Zero:
- Opc = NVPTX::SULD_1D_V2I64_ZERO;
+ Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
break;
case NVPTXISD::Suld1DV4I8Zero:
- Opc = NVPTX::SULD_1D_V4I8_ZERO;
+ Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
break;
case NVPTXISD::Suld1DV4I16Zero:
- Opc = NVPTX::SULD_1D_V4I16_ZERO;
+ Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
break;
case NVPTXISD::Suld1DV4I32Zero:
- Opc = NVPTX::SULD_1D_V4I32_ZERO;
+ Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI8Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI16Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI32Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI64Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I8Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I16Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I32Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I64Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV4I8Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV4I16Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV4I32Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
break;
case NVPTXISD::Suld2DI8Zero:
- Opc = NVPTX::SULD_2D_I8_ZERO;
+ Opc = NVPTX::SULD_2D_I8_ZERO_R;
break;
case NVPTXISD::Suld2DI16Zero:
- Opc = NVPTX::SULD_2D_I16_ZERO;
+ Opc = NVPTX::SULD_2D_I16_ZERO_R;
break;
case NVPTXISD::Suld2DI32Zero:
- Opc = NVPTX::SULD_2D_I32_ZERO;
+ Opc = NVPTX::SULD_2D_I32_ZERO_R;
break;
case NVPTXISD::Suld2DI64Zero:
- Opc = NVPTX::SULD_2D_I64_ZERO;
+ Opc = NVPTX::SULD_2D_I64_ZERO_R;
break;
case NVPTXISD::Suld2DV2I8Zero:
- Opc = NVPTX::SULD_2D_V2I8_ZERO;
+ Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
break;
case NVPTXISD::Suld2DV2I16Zero:
- Opc = NVPTX::SULD_2D_V2I16_ZERO;
+ Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
break;
case NVPTXISD::Suld2DV2I32Zero:
- Opc = NVPTX::SULD_2D_V2I32_ZERO;
+ Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
break;
case NVPTXISD::Suld2DV2I64Zero:
- Opc = NVPTX::SULD_2D_V2I64_ZERO;
+ Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
break;
case NVPTXISD::Suld2DV4I8Zero:
- Opc = NVPTX::SULD_2D_V4I8_ZERO;
+ Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
break;
case NVPTXISD::Suld2DV4I16Zero:
- Opc = NVPTX::SULD_2D_V4I16_ZERO;
+ Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
break;
case NVPTXISD::Suld2DV4I32Zero:
- Opc = NVPTX::SULD_2D_V4I32_ZERO;
+ Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI8Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI16Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI32Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI64Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I8Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I16Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I32Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I64Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV4I8Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV4I16Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV4I32Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
break;
case NVPTXISD::Suld3DI8Zero:
- Opc = NVPTX::SULD_3D_I8_ZERO;
+ Opc = NVPTX::SULD_3D_I8_ZERO_R;
break;
case NVPTXISD::Suld3DI16Zero:
- Opc = NVPTX::SULD_3D_I16_ZERO;
+ Opc = NVPTX::SULD_3D_I16_ZERO_R;
break;
case NVPTXISD::Suld3DI32Zero:
- Opc = NVPTX::SULD_3D_I32_ZERO;
+ Opc = NVPTX::SULD_3D_I32_ZERO_R;
break;
case NVPTXISD::Suld3DI64Zero:
- Opc = NVPTX::SULD_3D_I64_ZERO;
+ Opc = NVPTX::SULD_3D_I64_ZERO_R;
break;
case NVPTXISD::Suld3DV2I8Zero:
- Opc = NVPTX::SULD_3D_V2I8_ZERO;
+ Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
break;
case NVPTXISD::Suld3DV2I16Zero:
- Opc = NVPTX::SULD_3D_V2I16_ZERO;
+ Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
break;
case NVPTXISD::Suld3DV2I32Zero:
- Opc = NVPTX::SULD_3D_V2I32_ZERO;
+ Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
break;
case NVPTXISD::Suld3DV2I64Zero:
- Opc = NVPTX::SULD_3D_V2I64_ZERO;
+ Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
break;
case NVPTXISD::Suld3DV4I8Zero:
- Opc = NVPTX::SULD_3D_V4I8_ZERO;
+ Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
break;
case NVPTXISD::Suld3DV4I16Zero:
- Opc = NVPTX::SULD_3D_V4I16_ZERO;
+ Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
break;
case NVPTXISD::Suld3DV4I32Zero:
- Opc = NVPTX::SULD_3D_V4I32_ZERO;
+ Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
break;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d4842c953ce7..e2f6b69fc530 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2530,7 +2530,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (!PAL.hasParamAttribute(i, Attribute::ByVal)) {
+ if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
bool aggregateIsPacked = false;
if (StructType *STy = dyn_cast<StructType>(Ty))
aggregateIsPacked = STy->isPacked();
@@ -3547,7 +3547,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
- case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride: {
+ case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::v4i32;
Info.ptrVal = I.getArgOperand(0);
@@ -3585,7 +3587,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
- case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: {
+ case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
@@ -3679,7 +3683,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
- case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: {
+ case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::v2i32;
Info.ptrVal = I.getArgOperand(0);
@@ -4441,11 +4447,8 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
//
int numUses = 0;
int nonAddCount = 0;
- for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
- UE = N0.getNode()->use_end();
- UI != UE; ++UI) {
+ for (const SDNode *User : N0.getNode()->uses()) {
numUses++;
- SDNode *User = *UI;
if (User->getOpcode() != ISD::FADD)
++nonAddCount;
}
@@ -4471,8 +4474,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
opIsLive = true;
if (!opIsLive)
- for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : left->uses()) {
int orderNo3 = User->getIROrder();
if (orderNo3 > orderNo) {
opIsLive = true;
@@ -4481,8 +4483,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
}
if (!opIsLive)
- for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : right->uses()) {
int orderNo3 = User->getIROrder();
if (orderNo3 > orderNo) {
opIsLive = true;
diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index a846c2fada26..fc0d5cc6fbfa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -148,9 +148,8 @@ void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// We implement "poor man's DCE" here to make sure any code that is no longer
// live is actually unreachable and can be trivially eliminated by the
// unreachable block elimination pass.
- for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE; ++UI) {
- if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
+ for (Use &U : From->uses()) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(U)) {
if (BI->isUnconditional()) continue;
BasicBlock *Dest;
if (To->isZero())
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index ec0c92ccf5c5..953d95e55f65 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -195,13 +195,12 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
else // Conditional branch
- BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
- .addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
- BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 4834985b1019..96386af569de 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2247,8 +2247,18 @@ class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
!strconcat("mov", asmstr, " \t$dst, $src;"),
[(set regclass:$dst, (MoveParam regclass:$src))]>;
+class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty,
+ string asmstr> :
+ NVPTXInst<(outs regclass:$dst), (ins srcty:$src),
+ !strconcat("mov", asmstr, " \t$dst, $src;"),
+ [(set regclass:$dst, (MoveParam texternalsym:$src))]>;
+
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
+
+def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, ".b64">;
+def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, ".b32">;
+
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"cvt.u16.u32 \t$dst, $src;",
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index de4bf2ef3055..511cd875ac55 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1232,7 +1232,7 @@ multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
// has 2 operands, neg the second one
multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
- Operand IMMType, list<Predicate> Pred> {
+ list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
!strconcat(
"{{ \n\t",
@@ -1244,12 +1244,11 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
Requires<Pred>;
}
multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
- string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
- list<Predicate> Pred = []> {
+ string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
- IntOp, IMMType, Pred> ;
+ IntOp, Pred> ;
defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
- IntOp, IMMType, Pred> ;
+ IntOp, Pred> ;
}
// has 3 operands
@@ -1357,21 +1356,21 @@ def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_sub_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
- atomic_load_sub_32_g, i32imm>;
+ atomic_load_sub_32_g>;
defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
- atomic_load_sub_64_g, i64imm>;
+ atomic_load_sub_64_g>;
defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
- atomic_load_sub_32_gen, i32imm>;
+ atomic_load_sub_32_gen>;
defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
- ".add", atomic_load_sub_32_gen, i32imm>;
+ ".add", atomic_load_sub_32_gen>;
defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
- atomic_load_sub_32_s, i32imm>;
+ atomic_load_sub_32_s>;
defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
- atomic_load_sub_64_s, i64imm>;
+ atomic_load_sub_64_s>;
defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
- atomic_load_sub_64_gen, i64imm>;
+ atomic_load_sub_64_gen>;
defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
- ".add", atomic_load_sub_64_gen, i64imm>;
+ ".add", atomic_load_sub_64_gen>;
// atom_swap
@@ -2465,2303 +2464,1563 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// texmode_independent
let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
-def TEX_1D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-
-def TEX_2D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+class TEX_1D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+
+multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_1D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_2D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
+ []>;
+
+multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_3D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_3D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_3D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+defm TEX_1D_F32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
+ " \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_CUBE_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_CUBE_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_CUBE_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+defm TEX_1D_F32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
+ []>;
+
+multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_CUBE_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_CUBE_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_CUBE_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+defm TEX_1D_ARRAY_F32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_F32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_U32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+
+multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TLD4_R_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_R_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_R_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
+defm TEX_1D_ARRAY_F32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x,
+ intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
+ " \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
}
+defm TEX_1D_ARRAY_F32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
+ []>;
+
+multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
+}
-// texmode_unified
-let IsTex = true, IsTexModeUnified = true in {
-// Texture fetch instructions using handles
-def TEX_UNIFIED_1D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_1D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+defm TEX_2D_F32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+
+multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_2D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_2D_F32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_2D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_2D_ARRAY_F32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_F32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_U32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_3D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_UNIFIED_3D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_UNIFIED_3D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+defm TEX_2D_ARRAY_F32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+
+multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_CUBE_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+defm TEX_2D_ARRAY_F32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_3D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TLD4_UNIFIED_R_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_R_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_R_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
+defm TEX_3D_F32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype :$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
}
+defm TEX_3D_F32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
+defm TEX_CUBE_F32_F32
+ : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32
+ : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32
+ : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-//=== Surface load instructions
-// .clamp variant
-let IsSuld = true in {
-def SULD_1D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_CUBE_F32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $z\\}];",
+ []>;
+
+multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_1D_ARRAY_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_CUBE_ARRAY_F32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_2D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_CUBE_ARRAY_F32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TLD4_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+ outtype:$v2, outtype:$v3),
+ !con(texsamp, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
+ []>;
+
+multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TLD4_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TLD4_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TLD4_2D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TLD4_2D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_2D_ARRAY_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TLD4_R_2D_F32_F32
+ : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_G_2D_F32_F32
+ : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_B_2D_F32_F32
+ : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_A_2D_F32_F32
+ : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_R_2D_S32_F32
+ : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_S32_F32
+ : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_S32_F32
+ : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_S32_F32
+ : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_R_2D_U32_F32
+ : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_U32_F32
+ : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_U32_F32
+ : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_U32_F32
+ : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
-def SULD_3D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
-let IsSuld = 2 in {
-def SULD_1D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+// texmode_unified
+let IsTex = true, IsTexModeUnified = true in {
+// Texture fetch instructions using handles
-def SULD_2D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_F32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_S32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_U32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 3 in {
-def SULD_1D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x,
+ intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
+defm TEX_UNIFIED_2D_F32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_F32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_S32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_U32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_2D_F32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
+defm TEX_UNIFIED_2D_F32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-// .trap variant
-let IsSuld = true in {
-def SULD_1D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_F32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_S32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_U32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 2 in {
-def SULD_1D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_F32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_3D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+ outtype:$v2, outtype:$v3),
+ !con(tex, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
+ []>;
+multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 3 in {
-def SULD_1D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+defm TLD4_UNIFIED_R_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
-def SULD_1D_ARRAY_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+}
-def SULD_2D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+//=== Surface load instructions
-def SULD_3D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
+let IsSuld = true in {
+
+class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
}
-// .zero variant
-let IsSuld = true in {
-def SULD_1D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
+defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
+defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
+defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
+defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
+defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
+defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
+defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
+defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
+defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_1D_ARRAY_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm SULD_1D_ARRAY_I8_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I16_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I32_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_I64_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I16_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I32_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_I64_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I16_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I32_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_I64_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
+
+class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
+defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
+defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
+defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
+defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
+defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
+defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
+defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
+defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
+defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
+
+class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_3D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
+defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
+defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
+defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
+
+defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
+defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
+defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
+defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
+
+defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
+defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
+defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
+defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
}
let IsSuld = 2 in {
-def SULD_1D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_1D_ARRAY_V2I8_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_V2I8_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_V2I16_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_V2I32_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_V2I64_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_V2I8_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_V2I16_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_V2I32_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_V2I64_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_V2I8_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_V2I16_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_V2I32_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_V2I64_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V2I8_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
+
+class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
+defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
+defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
+defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
+defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
+defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
+defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
+defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
+defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
+defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
-def SULD_3D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
let IsSuld = 3 in {
-def SULD_1D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
+
+class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_1D_ARRAY_V4I8_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V4I8_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
+
+class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
+defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
+defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
+defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
+defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
+
+defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
+defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
+defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
-def SULD_3D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
//-----------------------------------
@@ -4769,56 +4028,88 @@ def SULD_3D_V4I32_ZERO
//-----------------------------------
let IsSurfTexQuery = true in {
-def TXQ_CHANNEL_ORDER
+def TXQ_CHANNEL_ORDER_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
[]>;
-def TXQ_CHANNEL_DATA_TYPE
+def TXQ_CHANNEL_ORDER_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.channel_order.b32 \t$d, [$a];",
+ []>;
+def TXQ_CHANNEL_DATA_TYPE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_data_type.b32 \t$d, [$a];",
[]>;
-def TXQ_WIDTH
+def TXQ_CHANNEL_DATA_TYPE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def TXQ_WIDTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.width.b32 \t$d, [$a];",
[]>;
-def TXQ_HEIGHT
+def TXQ_WIDTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.width.b32 \t$d, [$a];",
+ []>;
+def TXQ_HEIGHT_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.height.b32 \t$d, [$a];",
[]>;
-def TXQ_DEPTH
+def TXQ_HEIGHT_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.height.b32 \t$d, [$a];",
+ []>;
+def TXQ_DEPTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.depth.b32 \t$d, [$a];",
[]>;
-def TXQ_ARRAY_SIZE
+def TXQ_DEPTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.depth.b32 \t$d, [$a];",
+ []>;
+def TXQ_ARRAY_SIZE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.array_size.b32 \t$d, [$a];",
[]>;
-def TXQ_NUM_SAMPLES
+def TXQ_ARRAY_SIZE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.array_size.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_SAMPLES_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_samples.b32 \t$d, [$a];",
[]>;
-def TXQ_NUM_MIPMAP_LEVELS
+def TXQ_NUM_SAMPLES_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.num_samples.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_MIPMAP_LEVELS_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_mipmap_levels.b32 \t$d, [$a];",
[]>;
+def TXQ_NUM_MIPMAP_LEVELS_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.num_mipmap_levels.b32 \t$d, [$a];",
+ []>;
}
def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
- (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
+ (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
- (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+ (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_width Int64Regs:$a),
- (TXQ_WIDTH Int64Regs:$a)>;
+ (TXQ_WIDTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_height Int64Regs:$a),
- (TXQ_HEIGHT Int64Regs:$a)>;
+ (TXQ_HEIGHT_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
- (TXQ_DEPTH Int64Regs:$a)>;
+ (TXQ_DEPTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
- (TXQ_ARRAY_SIZE Int64Regs:$a)>;
+ (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
- (TXQ_NUM_SAMPLES Int64Regs:$a)>;
+ (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
- (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
+ (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
//-----------------------------------
@@ -4826,44 +4117,68 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
//-----------------------------------
let IsSurfTexQuery = true in {
-def SUQ_CHANNEL_ORDER
+def SUQ_CHANNEL_ORDER_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
[]>;
-def SUQ_CHANNEL_DATA_TYPE
+def SUQ_CHANNEL_ORDER_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.channel_order.b32 \t$d, [$a];",
+ []>;
+def SUQ_CHANNEL_DATA_TYPE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_data_type.b32 \t$d, [$a];",
[]>;
-def SUQ_WIDTH
+def SUQ_CHANNEL_DATA_TYPE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def SUQ_WIDTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.width.b32 \t$d, [$a];",
[]>;
-def SUQ_HEIGHT
+def SUQ_WIDTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.width.b32 \t$d, [$a];",
+ []>;
+def SUQ_HEIGHT_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.height.b32 \t$d, [$a];",
[]>;
-def SUQ_DEPTH
+def SUQ_HEIGHT_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.height.b32 \t$d, [$a];",
+ []>;
+def SUQ_DEPTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.depth.b32 \t$d, [$a];",
[]>;
-def SUQ_ARRAY_SIZE
+def SUQ_DEPTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.depth.b32 \t$d, [$a];",
+ []>;
+def SUQ_ARRAY_SIZE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.array_size.b32 \t$d, [$a];",
[]>;
+def SUQ_ARRAY_SIZE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.array_size.b32 \t$d, [$a];",
+ []>;
}
def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
- (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
+ (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
- (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+ (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_width Int64Regs:$a),
- (SUQ_WIDTH Int64Regs:$a)>;
+ (SUQ_WIDTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_height Int64Regs:$a),
- (SUQ_HEIGHT Int64Regs:$a)>;
+ (SUQ_HEIGHT_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
- (SUQ_DEPTH Int64Regs:$a)>;
+ (SUQ_DEPTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
- (SUQ_ARRAY_SIZE Int64Regs:$a)>;
+ (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
//===- Handle Query -------------------------------------------------------===//
@@ -4885,1329 +4200,522 @@ def ISTYPEP_TEXTURE
//===- Surface Stores -----------------------------------------------------===//
let IsSust = true in {
-// Unformatted
-// .clamp variant
-def SUST_B_1D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_1D_ARRAY_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_ARRAY_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_3D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-// .trap variant
-def SUST_B_1D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_1D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_3D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-// .zero variant
-def SUST_B_1D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
+class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r)),
+ inst # " \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+multiclass SUST_1D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_1D_ARRAY_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
+
+class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
+ intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_2D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_ARRAY_B8_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_B8_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B16_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B32_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_2D_ARRAY_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_ARRAY_V2B8_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_V2B8_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B16_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B32_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_ARRAY_V4B8_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_ARRAY_V4B8_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B16_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B32_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+multiclass SUST_2D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_3D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
+
+class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
-// Formatted
+defm SUST_B_2D_ARRAY_B8_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_B8_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B16_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B32_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_1D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_2D_ARRAY_V2B8_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_V2B8_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B16_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B32_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_ARRAY_V4B8_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_ARRAY_V4B8_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B16_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B32_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
+
+class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+multiclass SUST_3D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_1D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
+defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
+defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
+defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
+defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
+defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
+defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
+defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
+defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
+defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
+defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
+defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
+
+class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
+defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
+defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
+defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
+defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
+defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
+defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
+defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
+defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
+
+class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_2D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
+defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
+defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
-def SUST_P_2D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
+defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
+defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
+defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
+defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
+defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
-def SUST_P_3D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_3D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_3D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
}
// Surface store instruction patterns
@@ -6217,248 +4725,248 @@ def SUST_P_3D_V4B32_TRAP
// .clamp variant
def : Pat<(int_nvvm_sust_b_1d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6467,77 +4975,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
def : Pat<(int_nvvm_sust_b_3d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6545,248 +5053,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
// .trap variant
def : Pat<(int_nvvm_sust_b_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6795,77 +5303,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
def : Pat<(int_nvvm_sust_b_3d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_TRAP Int64Regs:$s,
+ (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6873,248 +5381,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
// .zero variant
def : Pat<(int_nvvm_sust_b_1d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7123,77 +5631,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
def : Pat<(int_nvvm_sust_b_3d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_ZERO Int64Regs:$s,
+ (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7202,207 +5710,207 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
def : Pat<(int_nvvm_sust_p_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7411,63 +5919,63 @@ def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
def : Pat<(int_nvvm_sust_p_3d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_P_3D_B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_P_3D_B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_P_3D_B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7578,6 +6086,7 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(ptx_elt_type, "bf16") : Int32Regs,
!eq(ptx_elt_type, "tf32") : Int32Regs,
!eq(ptx_elt_type, "s32") : Int32Regs,
+ !eq(ptx_elt_type, "b16") : Int32Regs,
!eq(ptx_elt_type, "s8") : Int32Regs,
!eq(ptx_elt_type, "u8") : Int32Regs,
!eq(ptx_elt_type, "s4") : Int32Regs,
@@ -7661,7 +6170,11 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(geom, "m16n8k64"),
!eq(geom, "m8n8k128"),
!eq(geom, "m16n8k128"),
- !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]);
+ !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
+
+ !and(!eq(op,"ldmatrix"),
+ !eq(ptx_elt_type,"b16"),
+ !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
// template DAGs for instruction inputs/output.
dag Outs = !dag(outs, ptx_regs, reg_names);
@@ -7910,6 +6423,44 @@ defset list<WMMA_INSTR> MMAs = {
} // layout_a
} // defset
+//
+// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
+//
+class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
+ DAGOperand SrcOp>
+ : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
+ Requires<Frag.Predicates> {
+ // Build PatFrag that only matches particular address space.
+ PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
+ !cond(!eq(Space, ".shared"): AS_match.shared,
+ true: AS_match.generic)>;
+ // Build AS-constrained pattern.
+ let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+ let OutOperandList = Frag.Outs;
+ let InOperandList = !con(Args, (ins MmaCode:$ptx));
+ let AsmString = "ldmatrix.sync.aligned."
+ # Frag.geom
+ # "." # Frag.frag
+ # !if(Transposed, ".trans", "")
+ # Space
+ # "." # Frag.ptx_elt_type
+ # " " # Frag.regstring # ", [$src];";
+}
+
+// Create all ldmatrix variants
+defset list<WMMA_INSTR> LDMATRIXs = {
+ foreach transposed = [false, true] in {
+ foreach space = [".shared", ""] in {
+ foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
+ foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
+ if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
+ def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
+ addr>;
+ } // addr
+ } // space
+ } // transposed
+} // defset
// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
@@ -7921,5 +6472,5 @@ class MMA_PAT<WMMA_INSTR wi>
Requires<wi.Predicates>;
// Build intrinsic->instruction patterns for all MMA instructions.
-foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in
+foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
def : MMA_PAT<mma>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 1bd02552b666..369238436083 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -78,15 +78,12 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
NewASCToLocal->insertAfter(allocaInst);
NewASCToGeneric->insertAfter(NewASCToLocal);
- for (Value::use_iterator UI = allocaInst->use_begin(),
- UE = allocaInst->use_end();
- UI != UE;) {
+ for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXInferAddressSpaces. For other types
// of instructions this is unnecessary and may introduce redundant
// address cast.
- const auto &AllocaUse = *UI++;
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
if (LI && LI->getPointerOperand() == allocaInst &&
!LI->isVolatile()) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index 5e6411c61eab..1f3b4c9440d8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -21,17 +21,19 @@
// This peephole pass optimizes these cases, for example
//
// It will transform the following pattern
-// %0 = LEA_ADDRi64 %VRFrame, 4
+// %0 = LEA_ADDRi64 %VRFrame64, 4
// %1 = cvta_to_local_yes_64 %0
//
// into
-// %1 = LEA_ADDRi64 %VRFrameLocal, 4
+// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
//
-// %VRFrameLocal is the virtual register name of %SPL
+// %VRFrameLocal64 is the virtual register name of %SPL
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -92,9 +94,12 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
return false;
}
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
// Check the LEA_ADDRi operand is Frame index
auto &BaseAddrOp = GenericAddrDef->getOperand(1);
- if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
+ if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
return true;
}
@@ -108,10 +113,13 @@ static void CombineCVTAToLocal(MachineInstr &Root) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
MachineInstrBuilder MIB =
BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
Root.getOperand(0).getReg())
- .addReg(NVPTX::VRFrameLocal)
+ .addReg(NRI->getFrameLocalRegister(MF))
.add(Prev.getOperand(2));
MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
@@ -142,10 +150,13 @@ bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
} // Instruction
} // Basic Block
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
// Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
const auto &MRI = MF.getRegInfo();
- if (MRI.use_empty(NVPTX::VRFrame)) {
- if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
+ if (MRI.use_empty(NRI->getFrameRegister(MF))) {
+ if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
MI->eraseFromParentAndMarkDBGValuesForRemoval();
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 8e2299e65222..16fbe1a65562 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -74,7 +74,6 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
auto Offset =
TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
Op.ChangeToRegister(Reg, /*isDef=*/false);
- Op.setIsDebug();
const DIExpression *DIExpr = MI.getDebugExpression();
if (MI.isNonListDebugValue()) {
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 5cdec0925b26..ec7307265bca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -13,6 +13,7 @@
#include "NVPTXRegisterInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -106,6 +107,14 @@ NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ for (unsigned Reg = NVPTX::ENVREG0; Reg <= NVPTX::ENVREG31; ++Reg) {
+ markSuperRegs(Reserved, Reg);
+ }
+ markSuperRegs(Reserved, NVPTX::VRFrame32);
+ markSuperRegs(Reserved, NVPTX::VRFrameLocal32);
+ markSuperRegs(Reserved, NVPTX::VRFrame64);
+ markSuperRegs(Reserved, NVPTX::VRFrameLocal64);
+ markSuperRegs(Reserved, NVPTX::VRDepot);
return Reserved;
}
@@ -122,10 +131,19 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
- MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return NVPTX::VRFrame;
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+ return TM.is64Bit() ? NVPTX::VRFrame64 : NVPTX::VRFrame32;
+}
+
+Register
+NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+ return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 9ef6940daf86..c6dd647f4637 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -43,6 +43,7 @@ public:
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameLocalRegister(const MachineFunction &MF) const;
ManagedStringPool *getStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 19895a20bacf..162167e8720d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -22,8 +22,10 @@ class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
//===----------------------------------------------------------------------===//
// Special Registers used as stack pointer
-def VRFrame : NVPTXReg<"%SP">;
-def VRFrameLocal : NVPTXReg<"%SPL">;
+def VRFrame32 : NVPTXReg<"%SP">;
+def VRFrame64 : NVPTXReg<"%SP">;
+def VRFrameLocal32 : NVPTXReg<"%SPL">;
+def VRFrameLocal64 : NVPTXReg<"%SPL">;
// Special Registers used as the stack
def VRDepot : NVPTXReg<"%Depot">;
@@ -56,8 +58,8 @@ foreach i = 0...31 in {
//===----------------------------------------------------------------------===//
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>;
-def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4))>;
-def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4))>;
+def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>;
+def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
def Float16Regs : NVPTXRegClass<[f16], 16, (add (sequence "H%u", 0, 4))>;
def Float16x2Regs : NVPTXRegClass<[v2f16], 32, (add (sequence "HH%u", 0, 4))>;
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
@@ -68,5 +70,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRFrameLocal, VRDepot,
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame32, VRFrameLocal32, VRDepot,
(sequence "ENVREG%u", 0, 31))>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 8ae542130a14..e404cead344b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -41,7 +41,7 @@ public:
}
private:
bool processInstr(MachineInstr &MI);
- void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+ bool replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
unsigned &Idx);
};
@@ -76,19 +76,1675 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+static unsigned suldRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::SULD_1D_I8_CLAMP_R:
+ return NVPTX::SULD_1D_I8_CLAMP_I;
+ case NVPTX::SULD_1D_I16_CLAMP_R:
+ return NVPTX::SULD_1D_I16_CLAMP_I;
+ case NVPTX::SULD_1D_I32_CLAMP_R:
+ return NVPTX::SULD_1D_I32_CLAMP_I;
+ case NVPTX::SULD_1D_I64_CLAMP_R:
+ return NVPTX::SULD_1D_I64_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I8_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I8_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I16_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I16_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I32_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I32_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I64_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I64_CLAMP_I;
+ case NVPTX::SULD_2D_I8_CLAMP_R:
+ return NVPTX::SULD_2D_I8_CLAMP_I;
+ case NVPTX::SULD_2D_I16_CLAMP_R:
+ return NVPTX::SULD_2D_I16_CLAMP_I;
+ case NVPTX::SULD_2D_I32_CLAMP_R:
+ return NVPTX::SULD_2D_I32_CLAMP_I;
+ case NVPTX::SULD_2D_I64_CLAMP_R:
+ return NVPTX::SULD_2D_I64_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I8_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I8_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I16_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I16_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I32_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I32_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I64_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I64_CLAMP_I;
+ case NVPTX::SULD_3D_I8_CLAMP_R:
+ return NVPTX::SULD_3D_I8_CLAMP_I;
+ case NVPTX::SULD_3D_I16_CLAMP_R:
+ return NVPTX::SULD_3D_I16_CLAMP_I;
+ case NVPTX::SULD_3D_I32_CLAMP_R:
+ return NVPTX::SULD_3D_I32_CLAMP_I;
+ case NVPTX::SULD_3D_I64_CLAMP_R:
+ return NVPTX::SULD_3D_I64_CLAMP_I;
+ case NVPTX::SULD_1D_V2I8_CLAMP_R:
+ return NVPTX::SULD_1D_V2I8_CLAMP_I;
+ case NVPTX::SULD_1D_V2I16_CLAMP_R:
+ return NVPTX::SULD_1D_V2I16_CLAMP_I;
+ case NVPTX::SULD_1D_V2I32_CLAMP_R:
+ return NVPTX::SULD_1D_V2I32_CLAMP_I;
+ case NVPTX::SULD_1D_V2I64_CLAMP_R:
+ return NVPTX::SULD_1D_V2I64_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_I;
+ case NVPTX::SULD_2D_V2I8_CLAMP_R:
+ return NVPTX::SULD_2D_V2I8_CLAMP_I;
+ case NVPTX::SULD_2D_V2I16_CLAMP_R:
+ return NVPTX::SULD_2D_V2I16_CLAMP_I;
+ case NVPTX::SULD_2D_V2I32_CLAMP_R:
+ return NVPTX::SULD_2D_V2I32_CLAMP_I;
+ case NVPTX::SULD_2D_V2I64_CLAMP_R:
+ return NVPTX::SULD_2D_V2I64_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_I;
+ case NVPTX::SULD_3D_V2I8_CLAMP_R:
+ return NVPTX::SULD_3D_V2I8_CLAMP_I;
+ case NVPTX::SULD_3D_V2I16_CLAMP_R:
+ return NVPTX::SULD_3D_V2I16_CLAMP_I;
+ case NVPTX::SULD_3D_V2I32_CLAMP_R:
+ return NVPTX::SULD_3D_V2I32_CLAMP_I;
+ case NVPTX::SULD_3D_V2I64_CLAMP_R:
+ return NVPTX::SULD_3D_V2I64_CLAMP_I;
+ case NVPTX::SULD_1D_V4I8_CLAMP_R:
+ return NVPTX::SULD_1D_V4I8_CLAMP_I;
+ case NVPTX::SULD_1D_V4I16_CLAMP_R:
+ return NVPTX::SULD_1D_V4I16_CLAMP_I;
+ case NVPTX::SULD_1D_V4I32_CLAMP_R:
+ return NVPTX::SULD_1D_V4I32_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_I;
+ case NVPTX::SULD_2D_V4I8_CLAMP_R:
+ return NVPTX::SULD_2D_V4I8_CLAMP_I;
+ case NVPTX::SULD_2D_V4I16_CLAMP_R:
+ return NVPTX::SULD_2D_V4I16_CLAMP_I;
+ case NVPTX::SULD_2D_V4I32_CLAMP_R:
+ return NVPTX::SULD_2D_V4I32_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_I;
+ case NVPTX::SULD_3D_V4I8_CLAMP_R:
+ return NVPTX::SULD_3D_V4I8_CLAMP_I;
+ case NVPTX::SULD_3D_V4I16_CLAMP_R:
+ return NVPTX::SULD_3D_V4I16_CLAMP_I;
+ case NVPTX::SULD_3D_V4I32_CLAMP_R:
+ return NVPTX::SULD_3D_V4I32_CLAMP_I;
+ case NVPTX::SULD_1D_I8_TRAP_R:
+ return NVPTX::SULD_1D_I8_TRAP_I;
+ case NVPTX::SULD_1D_I16_TRAP_R:
+ return NVPTX::SULD_1D_I16_TRAP_I;
+ case NVPTX::SULD_1D_I32_TRAP_R:
+ return NVPTX::SULD_1D_I32_TRAP_I;
+ case NVPTX::SULD_1D_I64_TRAP_R:
+ return NVPTX::SULD_1D_I64_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I8_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I8_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I16_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I16_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I32_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I32_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I64_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I64_TRAP_I;
+ case NVPTX::SULD_2D_I8_TRAP_R:
+ return NVPTX::SULD_2D_I8_TRAP_I;
+ case NVPTX::SULD_2D_I16_TRAP_R:
+ return NVPTX::SULD_2D_I16_TRAP_I;
+ case NVPTX::SULD_2D_I32_TRAP_R:
+ return NVPTX::SULD_2D_I32_TRAP_I;
+ case NVPTX::SULD_2D_I64_TRAP_R:
+ return NVPTX::SULD_2D_I64_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I8_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I8_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I16_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I16_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I32_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I32_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I64_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I64_TRAP_I;
+ case NVPTX::SULD_3D_I8_TRAP_R:
+ return NVPTX::SULD_3D_I8_TRAP_I;
+ case NVPTX::SULD_3D_I16_TRAP_R:
+ return NVPTX::SULD_3D_I16_TRAP_I;
+ case NVPTX::SULD_3D_I32_TRAP_R:
+ return NVPTX::SULD_3D_I32_TRAP_I;
+ case NVPTX::SULD_3D_I64_TRAP_R:
+ return NVPTX::SULD_3D_I64_TRAP_I;
+ case NVPTX::SULD_1D_V2I8_TRAP_R:
+ return NVPTX::SULD_1D_V2I8_TRAP_I;
+ case NVPTX::SULD_1D_V2I16_TRAP_R:
+ return NVPTX::SULD_1D_V2I16_TRAP_I;
+ case NVPTX::SULD_1D_V2I32_TRAP_R:
+ return NVPTX::SULD_1D_V2I32_TRAP_I;
+ case NVPTX::SULD_1D_V2I64_TRAP_R:
+ return NVPTX::SULD_1D_V2I64_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I8_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I16_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I32_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I64_TRAP_I;
+ case NVPTX::SULD_2D_V2I8_TRAP_R:
+ return NVPTX::SULD_2D_V2I8_TRAP_I;
+ case NVPTX::SULD_2D_V2I16_TRAP_R:
+ return NVPTX::SULD_2D_V2I16_TRAP_I;
+ case NVPTX::SULD_2D_V2I32_TRAP_R:
+ return NVPTX::SULD_2D_V2I32_TRAP_I;
+ case NVPTX::SULD_2D_V2I64_TRAP_R:
+ return NVPTX::SULD_2D_V2I64_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I8_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I16_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I32_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I64_TRAP_I;
+ case NVPTX::SULD_3D_V2I8_TRAP_R:
+ return NVPTX::SULD_3D_V2I8_TRAP_I;
+ case NVPTX::SULD_3D_V2I16_TRAP_R:
+ return NVPTX::SULD_3D_V2I16_TRAP_I;
+ case NVPTX::SULD_3D_V2I32_TRAP_R:
+ return NVPTX::SULD_3D_V2I32_TRAP_I;
+ case NVPTX::SULD_3D_V2I64_TRAP_R:
+ return NVPTX::SULD_3D_V2I64_TRAP_I;
+ case NVPTX::SULD_1D_V4I8_TRAP_R:
+ return NVPTX::SULD_1D_V4I8_TRAP_I;
+ case NVPTX::SULD_1D_V4I16_TRAP_R:
+ return NVPTX::SULD_1D_V4I16_TRAP_I;
+ case NVPTX::SULD_1D_V4I32_TRAP_R:
+ return NVPTX::SULD_1D_V4I32_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I8_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I16_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I32_TRAP_I;
+ case NVPTX::SULD_2D_V4I8_TRAP_R:
+ return NVPTX::SULD_2D_V4I8_TRAP_I;
+ case NVPTX::SULD_2D_V4I16_TRAP_R:
+ return NVPTX::SULD_2D_V4I16_TRAP_I;
+ case NVPTX::SULD_2D_V4I32_TRAP_R:
+ return NVPTX::SULD_2D_V4I32_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I8_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I16_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I32_TRAP_I;
+ case NVPTX::SULD_3D_V4I8_TRAP_R:
+ return NVPTX::SULD_3D_V4I8_TRAP_I;
+ case NVPTX::SULD_3D_V4I16_TRAP_R:
+ return NVPTX::SULD_3D_V4I16_TRAP_I;
+ case NVPTX::SULD_3D_V4I32_TRAP_R:
+ return NVPTX::SULD_3D_V4I32_TRAP_I;
+ case NVPTX::SULD_1D_I8_ZERO_R:
+ return NVPTX::SULD_1D_I8_ZERO_I;
+ case NVPTX::SULD_1D_I16_ZERO_R:
+ return NVPTX::SULD_1D_I16_ZERO_I;
+ case NVPTX::SULD_1D_I32_ZERO_R:
+ return NVPTX::SULD_1D_I32_ZERO_I;
+ case NVPTX::SULD_1D_I64_ZERO_R:
+ return NVPTX::SULD_1D_I64_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I8_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I8_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I16_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I16_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I32_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I32_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I64_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I64_ZERO_I;
+ case NVPTX::SULD_2D_I8_ZERO_R:
+ return NVPTX::SULD_2D_I8_ZERO_I;
+ case NVPTX::SULD_2D_I16_ZERO_R:
+ return NVPTX::SULD_2D_I16_ZERO_I;
+ case NVPTX::SULD_2D_I32_ZERO_R:
+ return NVPTX::SULD_2D_I32_ZERO_I;
+ case NVPTX::SULD_2D_I64_ZERO_R:
+ return NVPTX::SULD_2D_I64_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I8_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I8_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I16_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I16_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I32_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I32_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I64_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I64_ZERO_I;
+ case NVPTX::SULD_3D_I8_ZERO_R:
+ return NVPTX::SULD_3D_I8_ZERO_I;
+ case NVPTX::SULD_3D_I16_ZERO_R:
+ return NVPTX::SULD_3D_I16_ZERO_I;
+ case NVPTX::SULD_3D_I32_ZERO_R:
+ return NVPTX::SULD_3D_I32_ZERO_I;
+ case NVPTX::SULD_3D_I64_ZERO_R:
+ return NVPTX::SULD_3D_I64_ZERO_I;
+ case NVPTX::SULD_1D_V2I8_ZERO_R:
+ return NVPTX::SULD_1D_V2I8_ZERO_I;
+ case NVPTX::SULD_1D_V2I16_ZERO_R:
+ return NVPTX::SULD_1D_V2I16_ZERO_I;
+ case NVPTX::SULD_1D_V2I32_ZERO_R:
+ return NVPTX::SULD_1D_V2I32_ZERO_I;
+ case NVPTX::SULD_1D_V2I64_ZERO_R:
+ return NVPTX::SULD_1D_V2I64_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I8_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I16_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I32_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I64_ZERO_I;
+ case NVPTX::SULD_2D_V2I8_ZERO_R:
+ return NVPTX::SULD_2D_V2I8_ZERO_I;
+ case NVPTX::SULD_2D_V2I16_ZERO_R:
+ return NVPTX::SULD_2D_V2I16_ZERO_I;
+ case NVPTX::SULD_2D_V2I32_ZERO_R:
+ return NVPTX::SULD_2D_V2I32_ZERO_I;
+ case NVPTX::SULD_2D_V2I64_ZERO_R:
+ return NVPTX::SULD_2D_V2I64_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I8_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I16_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I32_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I64_ZERO_I;
+ case NVPTX::SULD_3D_V2I8_ZERO_R:
+ return NVPTX::SULD_3D_V2I8_ZERO_I;
+ case NVPTX::SULD_3D_V2I16_ZERO_R:
+ return NVPTX::SULD_3D_V2I16_ZERO_I;
+ case NVPTX::SULD_3D_V2I32_ZERO_R:
+ return NVPTX::SULD_3D_V2I32_ZERO_I;
+ case NVPTX::SULD_3D_V2I64_ZERO_R:
+ return NVPTX::SULD_3D_V2I64_ZERO_I;
+ case NVPTX::SULD_1D_V4I8_ZERO_R:
+ return NVPTX::SULD_1D_V4I8_ZERO_I;
+ case NVPTX::SULD_1D_V4I16_ZERO_R:
+ return NVPTX::SULD_1D_V4I16_ZERO_I;
+ case NVPTX::SULD_1D_V4I32_ZERO_R:
+ return NVPTX::SULD_1D_V4I32_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V4I8_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V4I16_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V4I32_ZERO_I;
+ case NVPTX::SULD_2D_V4I8_ZERO_R:
+ return NVPTX::SULD_2D_V4I8_ZERO_I;
+ case NVPTX::SULD_2D_V4I16_ZERO_R:
+ return NVPTX::SULD_2D_V4I16_ZERO_I;
+ case NVPTX::SULD_2D_V4I32_ZERO_R:
+ return NVPTX::SULD_2D_V4I32_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V4I8_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V4I16_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V4I32_ZERO_I;
+ case NVPTX::SULD_3D_V4I8_ZERO_R:
+ return NVPTX::SULD_3D_V4I8_ZERO_I;
+ case NVPTX::SULD_3D_V4I16_ZERO_R:
+ return NVPTX::SULD_3D_V4I16_ZERO_I;
+ case NVPTX::SULD_3D_V4I32_ZERO_R:
+ return NVPTX::SULD_3D_V4I32_ZERO_I;
+ default:
+ llvm_unreachable("Unhandled SULD opcode");
+ }
+}
+
+static unsigned sustRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::SUST_B_1D_B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_3D_B8_CLAMP_R:
+ return NVPTX::SUST_B_3D_B8_CLAMP_I;
+ case NVPTX::SUST_B_3D_B16_CLAMP_R:
+ return NVPTX::SUST_B_3D_B16_CLAMP_I;
+ case NVPTX::SUST_B_3D_B32_CLAMP_R:
+ return NVPTX::SUST_B_3D_B32_CLAMP_I;
+ case NVPTX::SUST_B_3D_B64_CLAMP_R:
+ return NVPTX::SUST_B_3D_B64_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_3D_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_3D_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_3D_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_3D_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_3D_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_3D_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_B8_TRAP_R:
+ return NVPTX::SUST_B_1D_B8_TRAP_I;
+ case NVPTX::SUST_B_1D_B16_TRAP_R:
+ return NVPTX::SUST_B_1D_B16_TRAP_I;
+ case NVPTX::SUST_B_1D_B32_TRAP_R:
+ return NVPTX::SUST_B_1D_B32_TRAP_I;
+ case NVPTX::SUST_B_1D_B64_TRAP_R:
+ return NVPTX::SUST_B_1D_B64_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B8_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B8_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B16_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B16_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B32_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B32_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B64_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B64_TRAP_I;
+ case NVPTX::SUST_B_1D_V4B8_TRAP_R:
+ return NVPTX::SUST_B_1D_V4B8_TRAP_I;
+ case NVPTX::SUST_B_1D_V4B16_TRAP_R:
+ return NVPTX::SUST_B_1D_V4B16_TRAP_I;
+ case NVPTX::SUST_B_1D_V4B32_TRAP_R:
+ return NVPTX::SUST_B_1D_V4B32_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B64_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B64_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_B_2D_B8_TRAP_R:
+ return NVPTX::SUST_B_2D_B8_TRAP_I;
+ case NVPTX::SUST_B_2D_B16_TRAP_R:
+ return NVPTX::SUST_B_2D_B16_TRAP_I;
+ case NVPTX::SUST_B_2D_B32_TRAP_R:
+ return NVPTX::SUST_B_2D_B32_TRAP_I;
+ case NVPTX::SUST_B_2D_B64_TRAP_R:
+ return NVPTX::SUST_B_2D_B64_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B8_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B8_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B16_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B16_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B32_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B32_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B64_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B64_TRAP_I;
+ case NVPTX::SUST_B_2D_V4B8_TRAP_R:
+ return NVPTX::SUST_B_2D_V4B8_TRAP_I;
+ case NVPTX::SUST_B_2D_V4B16_TRAP_R:
+ return NVPTX::SUST_B_2D_V4B16_TRAP_I;
+ case NVPTX::SUST_B_2D_V4B32_TRAP_R:
+ return NVPTX::SUST_B_2D_V4B32_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B64_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B64_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_B_3D_B8_TRAP_R:
+ return NVPTX::SUST_B_3D_B8_TRAP_I;
+ case NVPTX::SUST_B_3D_B16_TRAP_R:
+ return NVPTX::SUST_B_3D_B16_TRAP_I;
+ case NVPTX::SUST_B_3D_B32_TRAP_R:
+ return NVPTX::SUST_B_3D_B32_TRAP_I;
+ case NVPTX::SUST_B_3D_B64_TRAP_R:
+ return NVPTX::SUST_B_3D_B64_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B8_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B8_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B16_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B16_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B32_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B32_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B64_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B64_TRAP_I;
+ case NVPTX::SUST_B_3D_V4B8_TRAP_R:
+ return NVPTX::SUST_B_3D_V4B8_TRAP_I;
+ case NVPTX::SUST_B_3D_V4B16_TRAP_R:
+ return NVPTX::SUST_B_3D_V4B16_TRAP_I;
+ case NVPTX::SUST_B_3D_V4B32_TRAP_R:
+ return NVPTX::SUST_B_3D_V4B32_TRAP_I;
+ case NVPTX::SUST_B_1D_B8_ZERO_R:
+ return NVPTX::SUST_B_1D_B8_ZERO_I;
+ case NVPTX::SUST_B_1D_B16_ZERO_R:
+ return NVPTX::SUST_B_1D_B16_ZERO_I;
+ case NVPTX::SUST_B_1D_B32_ZERO_R:
+ return NVPTX::SUST_B_1D_B32_ZERO_I;
+ case NVPTX::SUST_B_1D_B64_ZERO_R:
+ return NVPTX::SUST_B_1D_B64_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B8_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B8_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B16_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B16_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B32_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B32_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B64_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B64_ZERO_I;
+ case NVPTX::SUST_B_1D_V4B8_ZERO_R:
+ return NVPTX::SUST_B_1D_V4B8_ZERO_I;
+ case NVPTX::SUST_B_1D_V4B16_ZERO_R:
+ return NVPTX::SUST_B_1D_V4B16_ZERO_I;
+ case NVPTX::SUST_B_1D_V4B32_ZERO_R:
+ return NVPTX::SUST_B_1D_V4B32_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B8_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B8_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B16_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B16_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B32_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B32_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B64_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B64_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_I;
+ case NVPTX::SUST_B_2D_B8_ZERO_R:
+ return NVPTX::SUST_B_2D_B8_ZERO_I;
+ case NVPTX::SUST_B_2D_B16_ZERO_R:
+ return NVPTX::SUST_B_2D_B16_ZERO_I;
+ case NVPTX::SUST_B_2D_B32_ZERO_R:
+ return NVPTX::SUST_B_2D_B32_ZERO_I;
+ case NVPTX::SUST_B_2D_B64_ZERO_R:
+ return NVPTX::SUST_B_2D_B64_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B8_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B8_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B16_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B16_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B32_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B32_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B64_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B64_ZERO_I;
+ case NVPTX::SUST_B_2D_V4B8_ZERO_R:
+ return NVPTX::SUST_B_2D_V4B8_ZERO_I;
+ case NVPTX::SUST_B_2D_V4B16_ZERO_R:
+ return NVPTX::SUST_B_2D_V4B16_ZERO_I;
+ case NVPTX::SUST_B_2D_V4B32_ZERO_R:
+ return NVPTX::SUST_B_2D_V4B32_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B8_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B8_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B16_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B16_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B32_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B32_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B64_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B64_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_I;
+ case NVPTX::SUST_B_3D_B8_ZERO_R:
+ return NVPTX::SUST_B_3D_B8_ZERO_I;
+ case NVPTX::SUST_B_3D_B16_ZERO_R:
+ return NVPTX::SUST_B_3D_B16_ZERO_I;
+ case NVPTX::SUST_B_3D_B32_ZERO_R:
+ return NVPTX::SUST_B_3D_B32_ZERO_I;
+ case NVPTX::SUST_B_3D_B64_ZERO_R:
+ return NVPTX::SUST_B_3D_B64_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B8_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B8_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B16_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B16_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B32_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B32_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B64_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B64_ZERO_I;
+ case NVPTX::SUST_B_3D_V4B8_ZERO_R:
+ return NVPTX::SUST_B_3D_V4B8_ZERO_I;
+ case NVPTX::SUST_B_3D_V4B16_ZERO_R:
+ return NVPTX::SUST_B_3D_V4B16_ZERO_I;
+ case NVPTX::SUST_B_3D_V4B32_ZERO_R:
+ return NVPTX::SUST_B_3D_V4B32_ZERO_I;
+ case NVPTX::SUST_P_1D_B8_TRAP_R:
+ return NVPTX::SUST_P_1D_B8_TRAP_I;
+ case NVPTX::SUST_P_1D_B16_TRAP_R:
+ return NVPTX::SUST_P_1D_B16_TRAP_I;
+ case NVPTX::SUST_P_1D_B32_TRAP_R:
+ return NVPTX::SUST_P_1D_B32_TRAP_I;
+ case NVPTX::SUST_P_1D_V2B8_TRAP_R:
+ return NVPTX::SUST_P_1D_V2B8_TRAP_I;
+ case NVPTX::SUST_P_1D_V2B16_TRAP_R:
+ return NVPTX::SUST_P_1D_V2B16_TRAP_I;
+ case NVPTX::SUST_P_1D_V2B32_TRAP_R:
+ return NVPTX::SUST_P_1D_V2B32_TRAP_I;
+ case NVPTX::SUST_P_1D_V4B8_TRAP_R:
+ return NVPTX::SUST_P_1D_V4B8_TRAP_I;
+ case NVPTX::SUST_P_1D_V4B16_TRAP_R:
+ return NVPTX::SUST_P_1D_V4B16_TRAP_I;
+ case NVPTX::SUST_P_1D_V4B32_TRAP_R:
+ return NVPTX::SUST_P_1D_V4B32_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_P_2D_B8_TRAP_R:
+ return NVPTX::SUST_P_2D_B8_TRAP_I;
+ case NVPTX::SUST_P_2D_B16_TRAP_R:
+ return NVPTX::SUST_P_2D_B16_TRAP_I;
+ case NVPTX::SUST_P_2D_B32_TRAP_R:
+ return NVPTX::SUST_P_2D_B32_TRAP_I;
+ case NVPTX::SUST_P_2D_V2B8_TRAP_R:
+ return NVPTX::SUST_P_2D_V2B8_TRAP_I;
+ case NVPTX::SUST_P_2D_V2B16_TRAP_R:
+ return NVPTX::SUST_P_2D_V2B16_TRAP_I;
+ case NVPTX::SUST_P_2D_V2B32_TRAP_R:
+ return NVPTX::SUST_P_2D_V2B32_TRAP_I;
+ case NVPTX::SUST_P_2D_V4B8_TRAP_R:
+ return NVPTX::SUST_P_2D_V4B8_TRAP_I;
+ case NVPTX::SUST_P_2D_V4B16_TRAP_R:
+ return NVPTX::SUST_P_2D_V4B16_TRAP_I;
+ case NVPTX::SUST_P_2D_V4B32_TRAP_R:
+ return NVPTX::SUST_P_2D_V4B32_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_P_3D_B8_TRAP_R:
+ return NVPTX::SUST_P_3D_B8_TRAP_I;
+ case NVPTX::SUST_P_3D_B16_TRAP_R:
+ return NVPTX::SUST_P_3D_B16_TRAP_I;
+ case NVPTX::SUST_P_3D_B32_TRAP_R:
+ return NVPTX::SUST_P_3D_B32_TRAP_I;
+ case NVPTX::SUST_P_3D_V2B8_TRAP_R:
+ return NVPTX::SUST_P_3D_V2B8_TRAP_I;
+ case NVPTX::SUST_P_3D_V2B16_TRAP_R:
+ return NVPTX::SUST_P_3D_V2B16_TRAP_I;
+ case NVPTX::SUST_P_3D_V2B32_TRAP_R:
+ return NVPTX::SUST_P_3D_V2B32_TRAP_I;
+ case NVPTX::SUST_P_3D_V4B8_TRAP_R:
+ return NVPTX::SUST_P_3D_V4B8_TRAP_I;
+ case NVPTX::SUST_P_3D_V4B16_TRAP_R:
+ return NVPTX::SUST_P_3D_V4B16_TRAP_I;
+ case NVPTX::SUST_P_3D_V4B32_TRAP_R:
+ return NVPTX::SUST_P_3D_V4B32_TRAP_I;
+ default:
+ llvm_unreachable("Unhandled SUST opcode");
+ }
+}
+
+static unsigned texRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::TEX_1D_F32_S32_RR:
+ return NVPTX::TEX_1D_F32_S32_IR;
+ case NVPTX::TEX_1D_F32_S32_RI:
+ return NVPTX::TEX_1D_F32_S32_II;
+ case NVPTX::TEX_1D_F32_F32_RR:
+ return NVPTX::TEX_1D_F32_F32_IR;
+ case NVPTX::TEX_1D_F32_F32_RI:
+ return NVPTX::TEX_1D_F32_F32_II;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_F32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_F32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_S32_S32_RR:
+ return NVPTX::TEX_1D_S32_S32_IR;
+ case NVPTX::TEX_1D_S32_S32_RI:
+ return NVPTX::TEX_1D_S32_S32_II;
+ case NVPTX::TEX_1D_S32_F32_RR:
+ return NVPTX::TEX_1D_S32_F32_IR;
+ case NVPTX::TEX_1D_S32_F32_RI:
+ return NVPTX::TEX_1D_S32_F32_II;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_S32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_S32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_U32_S32_RR:
+ return NVPTX::TEX_1D_U32_S32_IR;
+ case NVPTX::TEX_1D_U32_S32_RI:
+ return NVPTX::TEX_1D_U32_S32_II;
+ case NVPTX::TEX_1D_U32_F32_RR:
+ return NVPTX::TEX_1D_U32_F32_IR;
+ case NVPTX::TEX_1D_U32_F32_RI:
+ return NVPTX::TEX_1D_U32_F32_II;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_U32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_U32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_U32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_F32_S32_RR:
+ return NVPTX::TEX_2D_F32_S32_IR;
+ case NVPTX::TEX_2D_F32_S32_RI:
+ return NVPTX::TEX_2D_F32_S32_II;
+ case NVPTX::TEX_2D_F32_F32_RR:
+ return NVPTX::TEX_2D_F32_F32_IR;
+ case NVPTX::TEX_2D_F32_F32_RI:
+ return NVPTX::TEX_2D_F32_F32_II;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_F32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_F32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_S32_S32_RR:
+ return NVPTX::TEX_2D_S32_S32_IR;
+ case NVPTX::TEX_2D_S32_S32_RI:
+ return NVPTX::TEX_2D_S32_S32_II;
+ case NVPTX::TEX_2D_S32_F32_RR:
+ return NVPTX::TEX_2D_S32_F32_IR;
+ case NVPTX::TEX_2D_S32_F32_RI:
+ return NVPTX::TEX_2D_S32_F32_II;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_S32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_S32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_U32_S32_RR:
+ return NVPTX::TEX_2D_U32_S32_IR;
+ case NVPTX::TEX_2D_U32_S32_RI:
+ return NVPTX::TEX_2D_U32_S32_II;
+ case NVPTX::TEX_2D_U32_F32_RR:
+ return NVPTX::TEX_2D_U32_F32_IR;
+ case NVPTX::TEX_2D_U32_F32_RI:
+ return NVPTX::TEX_2D_U32_F32_II;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_U32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_U32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_3D_F32_S32_RR:
+ return NVPTX::TEX_3D_F32_S32_IR;
+ case NVPTX::TEX_3D_F32_S32_RI:
+ return NVPTX::TEX_3D_F32_S32_II;
+ case NVPTX::TEX_3D_F32_F32_RR:
+ return NVPTX::TEX_3D_F32_F32_IR;
+ case NVPTX::TEX_3D_F32_F32_RI:
+ return NVPTX::TEX_3D_F32_F32_II;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_F32_F32_GRAD_IR;
+ case NVPTX::TEX_3D_F32_F32_GRAD_RI:
+ return NVPTX::TEX_3D_F32_F32_GRAD_II;
+ case NVPTX::TEX_3D_S32_S32_RR:
+ return NVPTX::TEX_3D_S32_S32_IR;
+ case NVPTX::TEX_3D_S32_S32_RI:
+ return NVPTX::TEX_3D_S32_S32_II;
+ case NVPTX::TEX_3D_S32_F32_RR:
+ return NVPTX::TEX_3D_S32_F32_IR;
+ case NVPTX::TEX_3D_S32_F32_RI:
+ return NVPTX::TEX_3D_S32_F32_II;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_S32_F32_GRAD_IR;
+ case NVPTX::TEX_3D_S32_F32_GRAD_RI:
+ return NVPTX::TEX_3D_S32_F32_GRAD_II;
+ case NVPTX::TEX_3D_U32_S32_RR:
+ return NVPTX::TEX_3D_U32_S32_IR;
+ case NVPTX::TEX_3D_U32_S32_RI:
+ return NVPTX::TEX_3D_U32_S32_II;
+ case NVPTX::TEX_3D_U32_F32_RR:
+ return NVPTX::TEX_3D_U32_F32_IR;
+ case NVPTX::TEX_3D_U32_F32_RI:
+ return NVPTX::TEX_3D_U32_F32_II;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_U32_F32_GRAD_IR;
+ case NVPTX::TEX_3D_U32_F32_GRAD_RI:
+ return NVPTX::TEX_3D_U32_F32_GRAD_II;
+ case NVPTX::TEX_CUBE_F32_F32_RR:
+ return NVPTX::TEX_CUBE_F32_F32_IR;
+ case NVPTX::TEX_CUBE_F32_F32_RI:
+ return NVPTX::TEX_CUBE_F32_F32_II;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_S32_F32_RR:
+ return NVPTX::TEX_CUBE_S32_F32_IR;
+ case NVPTX::TEX_CUBE_S32_F32_RI:
+ return NVPTX::TEX_CUBE_S32_F32_II;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_U32_F32_RR:
+ return NVPTX::TEX_CUBE_U32_F32_IR;
+ case NVPTX::TEX_CUBE_U32_F32_RI:
+ return NVPTX::TEX_CUBE_U32_F32_II;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_IR;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_RI:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_IR;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_RI:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_IR;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_RI:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TLD4_R_2D_F32_F32_RR:
+ return NVPTX::TLD4_R_2D_F32_F32_IR;
+ case NVPTX::TLD4_R_2D_F32_F32_RI:
+ return NVPTX::TLD4_R_2D_F32_F32_II;
+ case NVPTX::TLD4_G_2D_F32_F32_RR:
+ return NVPTX::TLD4_G_2D_F32_F32_IR;
+ case NVPTX::TLD4_G_2D_F32_F32_RI:
+ return NVPTX::TLD4_G_2D_F32_F32_II;
+ case NVPTX::TLD4_B_2D_F32_F32_RR:
+ return NVPTX::TLD4_B_2D_F32_F32_IR;
+ case NVPTX::TLD4_B_2D_F32_F32_RI:
+ return NVPTX::TLD4_B_2D_F32_F32_II;
+ case NVPTX::TLD4_A_2D_F32_F32_RR:
+ return NVPTX::TLD4_A_2D_F32_F32_IR;
+ case NVPTX::TLD4_A_2D_F32_F32_RI:
+ return NVPTX::TLD4_A_2D_F32_F32_II;
+ case NVPTX::TLD4_R_2D_S32_F32_RR:
+ return NVPTX::TLD4_R_2D_S32_F32_IR;
+ case NVPTX::TLD4_R_2D_S32_F32_RI:
+ return NVPTX::TLD4_R_2D_S32_F32_II;
+ case NVPTX::TLD4_G_2D_S32_F32_RR:
+ return NVPTX::TLD4_G_2D_S32_F32_IR;
+ case NVPTX::TLD4_G_2D_S32_F32_RI:
+ return NVPTX::TLD4_G_2D_S32_F32_II;
+ case NVPTX::TLD4_B_2D_S32_F32_RR:
+ return NVPTX::TLD4_B_2D_S32_F32_IR;
+ case NVPTX::TLD4_B_2D_S32_F32_RI:
+ return NVPTX::TLD4_B_2D_S32_F32_II;
+ case NVPTX::TLD4_A_2D_S32_F32_RR:
+ return NVPTX::TLD4_A_2D_S32_F32_IR;
+ case NVPTX::TLD4_A_2D_S32_F32_RI:
+ return NVPTX::TLD4_A_2D_S32_F32_II;
+ case NVPTX::TLD4_R_2D_U32_F32_RR:
+ return NVPTX::TLD4_R_2D_U32_F32_IR;
+ case NVPTX::TLD4_R_2D_U32_F32_RI:
+ return NVPTX::TLD4_R_2D_U32_F32_II;
+ case NVPTX::TLD4_G_2D_U32_F32_RR:
+ return NVPTX::TLD4_G_2D_U32_F32_IR;
+ case NVPTX::TLD4_G_2D_U32_F32_RI:
+ return NVPTX::TLD4_G_2D_U32_F32_II;
+ case NVPTX::TLD4_B_2D_U32_F32_RR:
+ return NVPTX::TLD4_B_2D_U32_F32_IR;
+ case NVPTX::TLD4_B_2D_U32_F32_RI:
+ return NVPTX::TLD4_B_2D_U32_F32_II;
+ case NVPTX::TLD4_A_2D_U32_F32_RR:
+ return NVPTX::TLD4_A_2D_U32_F32_IR;
+ case NVPTX::TLD4_A_2D_U32_F32_RI:
+ return NVPTX::TLD4_A_2D_U32_F32_II;
+ case NVPTX::TEX_UNIFIED_1D_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_G_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_B_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_A_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_R_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_G_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_B_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_A_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_R_2D_U32_F32_I;
+ case NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_G_2D_U32_F32_I;
+ case NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_B_2D_U32_F32_I;
+ case NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_A_2D_U32_F32_I;
+ default:
+ llvm_unreachable("Unhandled TEX opcode");
+ };
+}
+
+static unsigned samplerRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::TEX_1D_F32_S32_RR:
+ return NVPTX::TEX_1D_F32_S32_RI;
+ case NVPTX::TEX_1D_F32_S32_IR:
+ return NVPTX::TEX_1D_F32_S32_II;
+ case NVPTX::TEX_1D_F32_F32_RR:
+ return NVPTX::TEX_1D_F32_F32_RI;
+ case NVPTX::TEX_1D_F32_F32_IR:
+ return NVPTX::TEX_1D_F32_F32_II;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_F32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_F32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_S32_S32_RR:
+ return NVPTX::TEX_1D_S32_S32_RI;
+ case NVPTX::TEX_1D_S32_S32_IR:
+ return NVPTX::TEX_1D_S32_S32_II;
+ case NVPTX::TEX_1D_S32_F32_RR:
+ return NVPTX::TEX_1D_S32_F32_RI;
+ case NVPTX::TEX_1D_S32_F32_IR:
+ return NVPTX::TEX_1D_S32_F32_II;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_S32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_S32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_U32_S32_RR:
+ return NVPTX::TEX_1D_U32_S32_RI;
+ case NVPTX::TEX_1D_U32_S32_IR:
+ return NVPTX::TEX_1D_U32_S32_II;
+ case NVPTX::TEX_1D_U32_F32_RR:
+ return NVPTX::TEX_1D_U32_F32_RI;
+ case NVPTX::TEX_1D_U32_F32_IR:
+ return NVPTX::TEX_1D_U32_F32_II;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_U32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_U32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_U32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_F32_S32_RR:
+ return NVPTX::TEX_2D_F32_S32_RI;
+ case NVPTX::TEX_2D_F32_S32_IR:
+ return NVPTX::TEX_2D_F32_S32_II;
+ case NVPTX::TEX_2D_F32_F32_RR:
+ return NVPTX::TEX_2D_F32_F32_RI;
+ case NVPTX::TEX_2D_F32_F32_IR:
+ return NVPTX::TEX_2D_F32_F32_II;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_F32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_F32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_S32_S32_RR:
+ return NVPTX::TEX_2D_S32_S32_RI;
+ case NVPTX::TEX_2D_S32_S32_IR:
+ return NVPTX::TEX_2D_S32_S32_II;
+ case NVPTX::TEX_2D_S32_F32_RR:
+ return NVPTX::TEX_2D_S32_F32_RI;
+ case NVPTX::TEX_2D_S32_F32_IR:
+ return NVPTX::TEX_2D_S32_F32_II;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_S32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_S32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_U32_S32_RR:
+ return NVPTX::TEX_2D_U32_S32_RI;
+ case NVPTX::TEX_2D_U32_S32_IR:
+ return NVPTX::TEX_2D_U32_S32_II;
+ case NVPTX::TEX_2D_U32_F32_RR:
+ return NVPTX::TEX_2D_U32_F32_RI;
+ case NVPTX::TEX_2D_U32_F32_IR:
+ return NVPTX::TEX_2D_U32_F32_II;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_U32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_U32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_3D_F32_S32_RR:
+ return NVPTX::TEX_3D_F32_S32_RI;
+ case NVPTX::TEX_3D_F32_S32_IR:
+ return NVPTX::TEX_3D_F32_S32_II;
+ case NVPTX::TEX_3D_F32_F32_RR:
+ return NVPTX::TEX_3D_F32_F32_RI;
+ case NVPTX::TEX_3D_F32_F32_IR:
+ return NVPTX::TEX_3D_F32_F32_II;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_F32_F32_GRAD_RI;
+ case NVPTX::TEX_3D_F32_F32_GRAD_IR:
+ return NVPTX::TEX_3D_F32_F32_GRAD_II;
+ case NVPTX::TEX_3D_S32_S32_RR:
+ return NVPTX::TEX_3D_S32_S32_RI;
+ case NVPTX::TEX_3D_S32_S32_IR:
+ return NVPTX::TEX_3D_S32_S32_II;
+ case NVPTX::TEX_3D_S32_F32_RR:
+ return NVPTX::TEX_3D_S32_F32_RI;
+ case NVPTX::TEX_3D_S32_F32_IR:
+ return NVPTX::TEX_3D_S32_F32_II;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_S32_F32_GRAD_RI;
+ case NVPTX::TEX_3D_S32_F32_GRAD_IR:
+ return NVPTX::TEX_3D_S32_F32_GRAD_II;
+ case NVPTX::TEX_3D_U32_S32_RR:
+ return NVPTX::TEX_3D_U32_S32_RI;
+ case NVPTX::TEX_3D_U32_S32_IR:
+ return NVPTX::TEX_3D_U32_S32_II;
+ case NVPTX::TEX_3D_U32_F32_RR:
+ return NVPTX::TEX_3D_U32_F32_RI;
+ case NVPTX::TEX_3D_U32_F32_IR:
+ return NVPTX::TEX_3D_U32_F32_II;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_U32_F32_GRAD_RI;
+ case NVPTX::TEX_3D_U32_F32_GRAD_IR:
+ return NVPTX::TEX_3D_U32_F32_GRAD_II;
+ case NVPTX::TEX_CUBE_F32_F32_RR:
+ return NVPTX::TEX_CUBE_F32_F32_RI;
+ case NVPTX::TEX_CUBE_F32_F32_IR:
+ return NVPTX::TEX_CUBE_F32_F32_II;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_S32_F32_RR:
+ return NVPTX::TEX_CUBE_S32_F32_RI;
+ case NVPTX::TEX_CUBE_S32_F32_IR:
+ return NVPTX::TEX_CUBE_S32_F32_II;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_U32_F32_RR:
+ return NVPTX::TEX_CUBE_U32_F32_RI;
+ case NVPTX::TEX_CUBE_U32_F32_IR:
+ return NVPTX::TEX_CUBE_U32_F32_II;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_RI;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_IR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_RI;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_IR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_RI;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_IR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TLD4_R_2D_F32_F32_RR:
+ return NVPTX::TLD4_R_2D_F32_F32_RI;
+ case NVPTX::TLD4_R_2D_F32_F32_IR:
+ return NVPTX::TLD4_R_2D_F32_F32_II;
+ case NVPTX::TLD4_G_2D_F32_F32_RR:
+ return NVPTX::TLD4_G_2D_F32_F32_RI;
+ case NVPTX::TLD4_G_2D_F32_F32_IR:
+ return NVPTX::TLD4_G_2D_F32_F32_II;
+ case NVPTX::TLD4_B_2D_F32_F32_RR:
+ return NVPTX::TLD4_B_2D_F32_F32_RI;
+ case NVPTX::TLD4_B_2D_F32_F32_IR:
+ return NVPTX::TLD4_B_2D_F32_F32_II;
+ case NVPTX::TLD4_A_2D_F32_F32_RR:
+ return NVPTX::TLD4_A_2D_F32_F32_RI;
+ case NVPTX::TLD4_A_2D_F32_F32_IR:
+ return NVPTX::TLD4_A_2D_F32_F32_II;
+ case NVPTX::TLD4_R_2D_S32_F32_RR:
+ return NVPTX::TLD4_R_2D_S32_F32_RI;
+ case NVPTX::TLD4_R_2D_S32_F32_IR:
+ return NVPTX::TLD4_R_2D_S32_F32_II;
+ case NVPTX::TLD4_G_2D_S32_F32_RR:
+ return NVPTX::TLD4_G_2D_S32_F32_RI;
+ case NVPTX::TLD4_G_2D_S32_F32_IR:
+ return NVPTX::TLD4_G_2D_S32_F32_II;
+ case NVPTX::TLD4_B_2D_S32_F32_RR:
+ return NVPTX::TLD4_B_2D_S32_F32_RI;
+ case NVPTX::TLD4_B_2D_S32_F32_IR:
+ return NVPTX::TLD4_B_2D_S32_F32_II;
+ case NVPTX::TLD4_A_2D_S32_F32_RR:
+ return NVPTX::TLD4_A_2D_S32_F32_RI;
+ case NVPTX::TLD4_A_2D_S32_F32_IR:
+ return NVPTX::TLD4_A_2D_S32_F32_II;
+ case NVPTX::TLD4_R_2D_U32_F32_RR:
+ return NVPTX::TLD4_R_2D_U32_F32_RI;
+ case NVPTX::TLD4_R_2D_U32_F32_IR:
+ return NVPTX::TLD4_R_2D_U32_F32_II;
+ case NVPTX::TLD4_G_2D_U32_F32_RR:
+ return NVPTX::TLD4_G_2D_U32_F32_RI;
+ case NVPTX::TLD4_G_2D_U32_F32_IR:
+ return NVPTX::TLD4_G_2D_U32_F32_II;
+ case NVPTX::TLD4_B_2D_U32_F32_RR:
+ return NVPTX::TLD4_B_2D_U32_F32_RI;
+ case NVPTX::TLD4_B_2D_U32_F32_IR:
+ return NVPTX::TLD4_B_2D_U32_F32_II;
+ case NVPTX::TLD4_A_2D_U32_F32_RR:
+ return NVPTX::TLD4_A_2D_U32_F32_RI;
+ case NVPTX::TLD4_A_2D_U32_F32_IR:
+ return NVPTX::TLD4_A_2D_U32_F32_II;
+ default:
+ llvm_unreachable("Unhandled TEX opcode");
+ };
+}
+
+static unsigned queryRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::TXQ_CHANNEL_ORDER_R:
+ return NVPTX::TXQ_CHANNEL_ORDER_I;
+ case NVPTX::TXQ_CHANNEL_DATA_TYPE_R:
+ return NVPTX::TXQ_CHANNEL_DATA_TYPE_I;
+ case NVPTX::TXQ_WIDTH_R:
+ return NVPTX::TXQ_WIDTH_I;
+ case NVPTX::TXQ_HEIGHT_R:
+ return NVPTX::TXQ_HEIGHT_I;
+ case NVPTX::TXQ_DEPTH_R:
+ return NVPTX::TXQ_DEPTH_I;
+ case NVPTX::TXQ_ARRAY_SIZE_R:
+ return NVPTX::TXQ_ARRAY_SIZE_I;
+ case NVPTX::TXQ_NUM_SAMPLES_R:
+ return NVPTX::TXQ_NUM_SAMPLES_I;
+ case NVPTX::TXQ_NUM_MIPMAP_LEVELS_R:
+ return NVPTX::TXQ_NUM_MIPMAP_LEVELS_I;
+ case NVPTX::SUQ_CHANNEL_ORDER_R:
+ return NVPTX::SUQ_CHANNEL_ORDER_I;
+ case NVPTX::SUQ_CHANNEL_DATA_TYPE_R:
+ return NVPTX::SUQ_CHANNEL_DATA_TYPE_I;
+ case NVPTX::SUQ_WIDTH_R:
+ return NVPTX::SUQ_WIDTH_I;
+ case NVPTX::SUQ_HEIGHT_R:
+ return NVPTX::SUQ_HEIGHT_I;
+ case NVPTX::SUQ_DEPTH_R:
+ return NVPTX::SUQ_DEPTH_I;
+ case NVPTX::SUQ_ARRAY_SIZE_R:
+ return NVPTX::SUQ_ARRAY_SIZE_I;
+ default:
+ llvm_unreachable("Unhandled TXQ/SUQ opcode");
+ };
+}
+
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
const MCInstrDesc &MCID = MI.getDesc();
+ const NVPTXInstrInfo *TII = MF.getSubtarget<NVPTXSubtarget>().getInstrInfo();
if (MCID.TSFlags & NVPTXII::IsTexFlag) {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
- replaceImageHandle(TexHandle, MF);
+ if (replaceImageHandle(TexHandle, MF))
+ MI.setDesc(TII->get(texRegisterToIndexOpcode(MI.getOpcode())));
if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
MachineOperand &SampHandle = MI.getOperand(5);
- replaceImageHandle(SampHandle, MF);
+ if (replaceImageHandle(SampHandle, MF))
+ MI.setDesc(TII->get(samplerRegisterToIndexOpcode(MI.getOpcode())));
}
return true;
@@ -99,21 +1755,24 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
// For a surface load of vector size N, the Nth operand will be the surfref
MachineOperand &SurfHandle = MI.getOperand(VecSize);
- replaceImageHandle(SurfHandle, MF);
+ if (replaceImageHandle(SurfHandle, MF))
+ MI.setDesc(TII->get(suldRegisterToIndexOpcode(MI.getOpcode())));
return true;
} else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);
- replaceImageHandle(SurfHandle, MF);
+ if (replaceImageHandle(SurfHandle, MF))
+ MI.setDesc(TII->get(sustRegisterToIndexOpcode(MI.getOpcode())));
return true;
} else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);
- replaceImageHandle(Handle, MF);
+ if (replaceImageHandle(Handle, MF))
+ MI.setDesc(TII->get(queryRegisterToIndexOpcode(MI.getOpcode())));
return true;
}
@@ -121,12 +1780,14 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
return false;
}
-void NVPTXReplaceImageHandles::
-replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op,
+ MachineFunction &MF) {
unsigned Idx;
if (findIndexForHandle(Op, MF, Idx)) {
Op.ChangeToImmediate(Idx);
+ return true;
}
+ return false;
}
bool NVPTXReplaceImageHandles::
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index e3515f35d022..0a1c61a35795 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -23,11 +23,12 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -225,7 +226,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineStartEPCallback(
- [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// FIXME: NVVMIntrRangePass is causing numerical discrepancies,
@@ -240,6 +241,25 @@ NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
}
+std::pair<const Value *, unsigned>
+NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+ if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::nvvm_isspacep_const:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
+ case Intrinsic::nvvm_isspacep_global:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
+ case Intrinsic::nvvm_isspacep_local:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
+ case Intrinsic::nvvm_isspacep_shared:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
+ default:
+ break;
+ }
+ }
+ return std::make_pair(nullptr, -1);
+}
+
void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
if (getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGVNPass());
@@ -328,6 +348,7 @@ void NVPTXPassConfig::addIRPasses() {
addEarlyCSEOrGVNPass();
if (!DisableLoadStoreVectorizer)
addPass(createLoadStoreVectorizerPass());
+ addPass(createSROAPass());
}
}
@@ -350,7 +371,7 @@ void NVPTXPassConfig::addPreRegAlloc() {
}
void NVPTXPassConfig::addPostRegAlloc() {
- addPass(createNVPTXPrologEpilogPass(), false);
+ addPass(createNVPTXPrologEpilogPass());
if (getOptLevel() != CodeGenOpt::None) {
// NVPTXPrologEpilogPass calculates frame object offset and replace frame
// index with VRFrame register. NVPTXPeephole need to be run after that and
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 39647eb65c0c..7a69197abcff 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -16,6 +16,7 @@
#include "ManagedStringPool.h"
#include "NVPTXSubtarget.h"
#include "llvm/Target/TargetMachine.h"
+#include <utility>
namespace llvm {
@@ -69,6 +70,9 @@ public:
bool isMachineVerifierClean() const override {
return false;
}
+
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override;
}; // NVPTXTargetMachine.
class NVPTXTargetMachine32 : public NVPTXTargetMachine {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 20bd227b4b16..466aa7130216 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -328,7 +328,7 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
// Simplify to target-generic intrinsic.
if (Action.IID) {
- SmallVector<Value *, 4> Args(II->arg_operands());
+ SmallVector<Value *, 4> Args(II->args());
// All the target-generic intrinsics currently of interest to us have one
// type argument, equal to that of the nvvm intrinsic's argument.
Type *Tys[] = {II->getArgOperand(0)->getType()};
@@ -402,8 +402,9 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
}
void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
- BaseT::getUnrollingPreferences(L, SE, UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
// Enable partial unrolling and runtime unrolling, but reduce the
// threshold. This partially unrolls small loops which are often
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index d5a52d42ca00..bf593af68f33 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -48,6 +48,11 @@ public:
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+ return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
+ AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
+ }
+
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
@@ -89,8 +94,7 @@ public:
unsigned getInliningThresholdMultiplier() { return 5; }
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -99,7 +103,8 @@ public:
const Instruction *CxtI = nullptr);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
diff --git a/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 235be9c0dbbb..e4f0a517599f 100644
--- a/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/NVPTXTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheNVPTXTarget32() {
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 7631bb4bccfb..9e181d4052d6 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -23,8 +23,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 94416fc584b5..5a12c3f22dee 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -12,8 +12,8 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
index 22731bbd0f82..6b16af293244 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -80,7 +80,7 @@ bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void PPCIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
index b045032bec06..cc2cb7b26e84 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
@@ -46,7 +46,7 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 28294b4c00ed..9df94edc8cdf 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -20,8 +20,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
@@ -196,7 +196,8 @@ public:
llvm_unreachable("relaxInstruction() unimplemented");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
support::endian::write<uint32_t>(OS, 0x60000000, Endian);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 386d59266096..0ca8587ba483 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-------- PPCELFStreamer.cpp - ELF Object Output ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,7 +56,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
// all of the nops required as part of the alignment operation. In the cases
// when no nops are added then The fragment is still created but it remains
// empty.
- emitCodeAlignment(64, 4);
+ emitCodeAlignment(64, &STI, 4);
// Emit the instruction.
// Since the previous emit created a new fragment then adding this instruction
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index f44200104f32..b3e12413eacf 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -1,9 +1,8 @@
//===- PPCELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 3f6497aa0e8f..67c02c17bc46 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -158,7 +158,10 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// dcbt ra, rb, th [server]
// dcbt th, ra, rb [embedded]
// where th can be omitted when it is 0. dcbtst is the same.
- if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
+ // On AIX, only emit the extended mnemonics for dcbt and dcbtst if
+ // the "modern assembler" is available.
+ if ((MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) &&
+ (!TT.isOSAIX() || STI.getFeatureBits()[PPC::FeatureModernAIXAs])) {
unsigned char TH = MI->getOperand(0).getImm();
O << "\tdcbt";
if (MI->getOpcode() == PPC::DCBTST)
@@ -628,8 +631,6 @@ const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
// showRegistersWithPrefix - This method determines whether registers
// should be number-only or include the prefix.
bool PPCInstPrinter::showRegistersWithPrefix() const {
- if (TT.getOS() == Triple::AIX)
- return false;
return FullRegNamesWithPercent || FullRegNames;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index e9fc056a08f0..22b948a83c34 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -37,11 +37,11 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
index e582ddfca323..79db03b0331b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
@@ -1,9 +1,8 @@
//===-------- PPCXCOFFStreamer.cpp - XCOFF Object Output ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -46,7 +45,7 @@ void PPCXCOFFStreamer::emitPrefixedInstruction(const MCInst &Inst,
// prefixed instruction. Align to 64 bytes if possible but add a maximum of 4
// bytes when trying to do that. If alignment requires adding more than 4
// bytes then the instruction won't be aligned.
- emitCodeAlignment(64, 4);
+ emitCodeAlignment(64, &STI, 4);
// Emit the instruction.
// Since the previous emit created a new fragment then adding this instruction
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h
index f6eb5edfb7a7..5fa35127b70b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h
@@ -1,9 +1,8 @@
//===- PPCXCOFFStreamer.h - XCOFF Object Output -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
new file mode 100644
index 000000000000..f43ba00ec373
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -0,0 +1,2075 @@
+//===--- P10InstrResources.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the itinerary class data for the POWER10 processor.
+//
+//===----------------------------------------------------------------------===//
+// 22 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FDIVS,
+ XSDIVSP
+)>;
+
+// 2-way crack instructions
+// 22 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FDIVS_rec
+)>;
+
+// 24 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_24C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ XVDIVSP
+)>;
+
+// 26 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FSQRTS,
+ XSSQRTSP
+)>;
+
+// 2-way crack instructions
+// 26 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FSQRTS_rec
+)>;
+
+// 27 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ XVSQRTSP
+)>;
+
+// 27 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FDIV,
+ XSDIVDP,
+ XVDIVDP
+)>;
+
+// 2-way crack instructions
+// 27 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FDIV_rec
+)>;
+
+// 36 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FSQRT,
+ XSSQRTDP,
+ XVSQRTDP
+)>;
+
+// 2-way crack instructions
+// 36 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FSQRT_rec
+)>;
+
+// 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FCFID,
+ FCFIDS,
+ FCFIDU,
+ FCFIDUS,
+ FCTID,
+ FCTIDU,
+ FCTIDUZ,
+ FCTIDZ,
+ FCTIW,
+ FCTIWU,
+ FCTIWUZ,
+ FCTIWZ,
+ FRE,
+ FRES,
+ FRIMD, FRIMS,
+ FRIND, FRINS,
+ FRIPD, FRIPS,
+ FRIZD, FRIZS,
+ FRSP,
+ FRSQRTE,
+ FRSQRTES,
+ VCFSX, VCFSX_0,
+ VCFUX, VCFUX_0,
+ VCTSXS, VCTSXS_0,
+ VCTUXS, VCTUXS_0,
+ VLOGEFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSPN,
+ XSCVDPSXDS, XSCVDPSXDSs,
+ XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs,
+ XSCVDPUXWS, XSCVDPUXWSs,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVSPBF16,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FADD,
+ FADDS,
+ FMUL,
+ FMULS,
+ FSUB,
+ FSUBS,
+ VADDFP,
+ VSUBFP,
+ XSADDDP,
+ XSADDSP,
+ XSMULDP,
+ XSMULSP,
+ XSSUBDP,
+ XSSUBSP,
+ XVADDDP,
+ XVADDSP,
+ XVMULDP,
+ XVMULSP,
+ XVSUBDP,
+ XVSUBSP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read, P10BF_Read],
+ (instrs
+ FMADD,
+ FMADDS,
+ FMSUB,
+ FMSUBS,
+ FNMADD,
+ FNMADDS,
+ FNMSUB,
+ FNMSUBS,
+ FSELD, FSELS,
+ VMADDFP,
+ VNMSUBFP,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ VEXPTEFP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FADD_rec,
+ FADDS_rec,
+ FMUL_rec,
+ FMULS_rec,
+ FSUB_rec,
+ FSUBS_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FCFID_rec,
+ FCFIDS_rec,
+ FCFIDU_rec,
+ FCFIDUS_rec,
+ FCTID_rec,
+ FCTIDU_rec,
+ FCTIDUZ_rec,
+ FCTIDZ_rec,
+ FCTIW_rec,
+ FCTIWU_rec,
+ FCTIWUZ_rec,
+ FCTIWZ_rec,
+ FRE_rec,
+ FRES_rec,
+ FRIMD_rec, FRIMS_rec,
+ FRIND_rec, FRINS_rec,
+ FRIPD_rec, FRIPS_rec,
+ FRIZD_rec, FRIZS_rec,
+ FRSP_rec,
+ FRSQRTE_rec,
+ FRSQRTES_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FMADD_rec,
+ FMADDS_rec,
+ FMSUB_rec,
+ FMSUBS_rec,
+ FNMADD_rec,
+ FNMADDS_rec,
+ FNMSUB_rec,
+ FNMSUBS_rec,
+ FSELD_rec, FSELS_rec
+)>;
+
+// 2 Cycles Branch operations, 0 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
+ (instrs
+ BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+ BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+ BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
+)>;
+
+// 2 Cycles Branch operations, 1 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
+ (instrs
+ B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
+ BA, TAILBA, TAILBA8,
+ BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
+ BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
+ BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
+)>;
+
+// 2 Cycles Branch operations, 3 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
+ (instrs
+ BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
+ BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+)>;
+
+// 2 Cycles Branch operations, 4 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+ (instrs
+ BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+ BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
+)>;
+
+// 7 Cycles Crypto operations, 1 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
+ (instrs
+ VSBOX
+)>;
+
+// 7 Cycles Crypto operations, 2 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
+ (instrs
+ CFUGED,
+ CNTLZDM,
+ CNTTZDM,
+ PDEPD,
+ PEXTD,
+ VCFUGED,
+ VCIPHER,
+ VCIPHERLAST,
+ VCLZDM,
+ VCTZDM,
+ VGNB,
+ VNCIPHER,
+ VNCIPHERLAST,
+ VPDEPD,
+ VPEXTD,
+ VPMSUMB,
+ VPMSUMD,
+ VPMSUMH,
+ VPMSUMW
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ XSCVDPQP,
+ XSCVQPDP,
+ XSCVQPDPO,
+ XSCVQPSDZ,
+ XSCVQPSQZ,
+ XSCVQPSWZ,
+ XSCVQPUDZ,
+ XSCVQPUQZ,
+ XSCVQPUWZ,
+ XSCVSDQP,
+ XSCVSQQP,
+ XSCVUDQP,
+ XSCVUQQP
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSADDQP,
+ XSADDQPO,
+ XSSUBQP,
+ XSSUBQPO
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+ (instrs
+ BCDSR_rec,
+ XSRQPI,
+ XSRQPIX,
+ XSRQPXP
+)>;
+
+// 2-way crack instructions
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ HASHST,
+ HASHSTP
+)>;
+
+// 24 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_24C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ BCDCTSQ_rec
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSMULQP,
+ XSMULQPO
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+ (instrs
+ XSMADDQP,
+ XSMADDQPO,
+ XSMSUBQP,
+ XSMSUBQPO,
+ XSNMADDQP,
+ XSNMADDQPO,
+ XSNMSUBQP,
+ XSNMSUBQPO
+)>;
+
+// 38 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ BCDCFSQ_rec
+)>;
+
+// 59 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_59C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSDIVQP,
+ XSDIVQPO
+)>;
+
+// 61 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_61C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ VDIVESQ,
+ VDIVEUQ,
+ VDIVSQ,
+ VDIVUQ
+)>;
+
+// 68 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_68C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ VMODSQ,
+ VMODUQ
+)>;
+
+// 77 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_77C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ XSSQRTQP,
+ XSSQRTQPO
+)>;
+
+// 20 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVW,
+ DIVWO,
+ DIVWU,
+ DIVWUO,
+ MODSW
+)>;
+
+// 2-way crack instructions
+// 20 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVW_rec,
+ DIVWO_rec,
+ DIVWU_rec,
+ DIVWUO_rec
+)>;
+
+// 25 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVD,
+ DIVDO,
+ DIVDU,
+ DIVDUO,
+ DIVWE,
+ DIVWEO,
+ DIVWEU,
+ DIVWEUO
+)>;
+
+// 2-way crack instructions
+// 25 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVD_rec,
+ DIVDO_rec,
+ DIVDU_rec,
+ DIVDUO_rec,
+ DIVWE_rec,
+ DIVWEO_rec,
+ DIVWEU_rec,
+ DIVWEUO_rec
+)>;
+
+// 27 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_27C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ MODSD,
+ MODUD,
+ MODUW
+)>;
+
+// 41 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVDE,
+ DIVDEO,
+ DIVDEU,
+ DIVDEUO
+)>;
+
+// 2-way crack instructions
+// 41 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVDE_rec,
+ DIVDEO_rec,
+ DIVDEU_rec,
+ DIVDEUO_rec
+)>;
+
+// 43 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_43C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVSD,
+ VDIVUD
+)>;
+
+// 47 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_47C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VMODSD,
+ VMODUD
+)>;
+
+// 54 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_54C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVSW,
+ VDIVUW
+)>;
+
+// 60 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_60C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VMODSW,
+ VMODUW
+)>;
+
+// 75 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_75C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVESD,
+ VDIVEUD
+)>;
+
+// 83 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVESW,
+ VDIVEUW
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 1 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
+ (instrs
+ BCDCTN_rec,
+ VMUL10CUQ,
+ VMUL10UQ,
+ XSXSIGQP
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 2 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
+ (instrs
+ BCDCFN_rec,
+ BCDCFZ_rec,
+ BCDCPSGN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
+ BCDUS_rec,
+ BCDUTRUNC_rec,
+ VADDCUQ,
+ VADDUQM,
+ VMUL10ECUQ,
+ VMUL10EUQ,
+ VSUBCUQ,
+ VSUBUQM,
+ XSCMPEXPQP,
+ XSCMPOQP,
+ XSCMPUQP,
+ XSTSTDCQP,
+ XXGENPCVBM
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 3 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
+ (instrs
+ BCDS_rec,
+ BCDTRUNC_rec,
+ VADDECUQ,
+ VADDEUQM,
+ VSUBECUQ,
+ VSUBEUQM
+)>;
+
+// 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
+ (instrs
+ TRAP, TW
+)>;
+
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
+ (instrs
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW, CNTLZW8,
+ CNTLZW8_rec, CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW, CNTTZW8,
+ CNTTZW8_rec, CNTTZW_rec,
+ FTSQRT,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWM,
+ POPCNTB, POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSTSQRTDP,
+ XVCVHPSP,
+ XVTLSBB,
+ XVTSQRTDP,
+ XVTSQRTSP
+)>;
+
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+ (instrs
+ CMPEQB,
+ EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+ FCMPOD, FCMPOS,
+ FCMPUD, FCMPUS,
+ FTDIV,
+ SLD_rec,
+ SLW8_rec, SLW_rec,
+ SRD_rec,
+ SRW8_rec, SRW_rec,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBS,
+ VADDUHS,
+ VADDUWS,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB_rec,
+ VCMPEQUD_rec,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB_rec,
+ VCMPGTSD_rec,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW_rec,
+ VCMPGTUB_rec,
+ VCMPGTUD_rec,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW_rec,
+ VCMPNEB_rec,
+ VCMPNEH_rec,
+ VCMPNEW_rec,
+ VCMPNEZB_rec,
+ VCMPNEZH_rec,
+ VCMPNEZW_rec,
+ VCMPSQ,
+ VCMPUQ,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VMAXFP,
+ VMINFP,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBS,
+ VSUBUHS,
+ VSUBUWS,
+ XSCMPEQDP,
+ XSCMPEXPDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCMPODP,
+ XSCMPUDP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSTDIVDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVTDIVDP,
+ XVTDIVSP,
+ XVTSTDCDP,
+ XVTSTDCSP
+)>;
+
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ CMPRB, CMPRB8,
+ RLDCL_rec,
+ RLDCR_rec,
+ RLDIC_rec,
+ RLDICL_32_rec, RLDICL_rec,
+ RLDICR_rec,
+ TD,
+ TDI,
+ TWI,
+ VSHASIGMAD,
+ VSHASIGMAW
+)>;
+
+// 4 Cycles ALU2 operations, 4 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ RLDIMI_rec,
+ RLWINM8_rec, RLWINM_rec,
+ RLWNM8_rec, RLWNM_rec
+)>;
+
+// 4 Cycles ALU2 operations, 5 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ RLWIMI8_rec, RLWIMI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+ (instrs
+ SRAD_rec,
+ SRADI_rec,
+ SRAW_rec,
+ SRAWI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ TABORTDC,
+ TABORTDCI,
+ TABORTWC,
+ TABORTWCI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VRLQ,
+ VRLQNM,
+ VSLQ,
+ VSRAQ,
+ VSRQ
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VRLQMI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ MFCR, MFCR8
+)>;
+
+// 2 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ MTCTR, MTCTR8, MTCTR8loop, MTCTRloop,
+ MTLR, MTLR8
+)>;
+
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ CR6SET, CREQV, CRSET,
+ DSS, DSSALL,
+ MCRXRX,
+ MFCTR, MFCTR8,
+ MFLR, MFLR8,
+ NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+ VXOR, V_SET0, V_SET0B, V_SET0H,
+ XXLEQV, XXLEQVOnes,
+ XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+)>;
+
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+ ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
+ ADDME, ADDME8,
+ ADDME8O, ADDMEO,
+ ADDZE, ADDZE8,
+ ADDZE8O, ADDZEO,
+ EXTSB, EXTSB8, EXTSB8_32_64,
+ EXTSB8_rec, EXTSB_rec,
+ EXTSH, EXTSH8, EXTSH8_32_64,
+ EXTSH8_rec, EXTSH_rec,
+ EXTSW, EXTSW_32, EXTSW_32_64,
+ EXTSW_32_64_rec, EXTSW_rec,
+ FABSD, FABSS,
+ FMR,
+ FNABSD, FNABSS,
+ FNEGD, FNEGS,
+ MCRF,
+ MFOCRF, MFOCRF8,
+ MFVRD, MFVSRD,
+ MFVRWZ, MFVSRWZ,
+ MTOCRF, MTOCRF8,
+ MTVRD, MTVSRD,
+ MTVRWA, MTVSRWA,
+ MTVRWZ, MTVSRWZ,
+ NEG, NEG8,
+ NEG8_rec, NEG_rec,
+ NEG8O, NEGO,
+ SETB, SETB8,
+ SETBC, SETBC8,
+ SETBCR, SETBCR8,
+ SETNBC, SETNBC8,
+ SETNBCR, SETNBCR8,
+ SUBFME, SUBFME8,
+ SUBFME8O, SUBFMEO,
+ SUBFZE, SUBFZE8,
+ SUBFZE8O, SUBFZEO,
+ VEXTSB2D, VEXTSB2Ds,
+ VEXTSB2W, VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D, VEXTSH2Ds,
+ VEXTSH2W, VEXTSH2Ws,
+ VEXTSW2D, VEXTSW2Ds,
+ VNEGD,
+ VNEGW,
+ WAIT,
+ XSABSDP,
+ XSABSQP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP
+)>;
+
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADD4, ADD4TLS, ADD8, ADD8TLS, ADD8TLS_,
+ ADD4_rec, ADD8_rec,
+ ADDE, ADDE8,
+ ADDE8O, ADDEO,
+ ADDIC, ADDIC8,
+ ADD4O, ADD8O,
+ AND, AND8,
+ AND8_rec, AND_rec,
+ ANDC, ANDC8,
+ ANDC8_rec, ANDC_rec,
+ ANDI8_rec, ANDI_rec,
+ ANDIS8_rec, ANDIS_rec,
+ CMPD, CMPW,
+ CMPB, CMPB8,
+ CMPDI, CMPWI,
+ CMPLD, CMPLW,
+ CMPLDI, CMPLWI,
+ CRAND,
+ CRANDC,
+ CRNAND,
+ CRNOR,
+ CROR,
+ CRORC,
+ CR6UNSET, CRUNSET, CRXOR,
+ EQV, EQV8,
+ EQV8_rec, EQV_rec,
+ EXTSWSLI, EXTSWSLI_32_64,
+ FCPSGND, FCPSGNS,
+ NAND, NAND8,
+ NAND8_rec, NAND_rec,
+ NOR, NOR8,
+ NOR8_rec, NOR_rec,
+ COPY, OR, OR8,
+ OR8_rec, OR_rec,
+ ORC, ORC8,
+ ORC8_rec, ORC_rec,
+ ORIS, ORIS8,
+ SLD,
+ SLW, SLW8,
+ SRAD,
+ SRADI, SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRW, SRW8,
+ SUBF, SUBF8,
+ SUBF8_rec, SUBF_rec,
+ SUBFE, SUBFE8,
+ SUBFE8O, SUBFEO,
+ SUBFIC, SUBFIC8,
+ SUBF8O, SUBFO,
+ VADDUBM,
+ VADDUDM,
+ VADDUHM,
+ VADDUWM,
+ VAND,
+ VANDC,
+ VCMPEQUB,
+ VCMPEQUD,
+ VCMPEQUH,
+ VCMPEQUW,
+ VCMPGTSB,
+ VCMPGTSD,
+ VCMPGTSH,
+ VCMPGTSW,
+ VCMPGTUB,
+ VCMPGTUD,
+ VCMPGTUH,
+ VCMPGTUW,
+ VCMPNEB,
+ VCMPNEH,
+ VCMPNEW,
+ VCMPNEZB,
+ VCMPNEZH,
+ VCMPNEZW,
+ VEQV,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNOR,
+ VOR,
+ VORC,
+ VRLB,
+ VRLD,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWNM,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBUBM,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUWM,
+ XOR, XOR8,
+ XOR8_rec, XOR_rec,
+ XORI, XORI8,
+ XORIS, XORIS8,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSIEXPDP,
+ XSIEXPQP,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XXLAND,
+ XXLANDC,
+ XXLNAND,
+ XXLNOR,
+ XXLOR, XXLORf,
+ XXLORC
+)>;
+
+// 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDEX, ADDEX8,
+ DST, DST64, DSTT, DSTT64,
+ DSTST, DSTST64, DSTSTT, DSTSTT64,
+ ISEL, ISEL8,
+ RLDCL,
+ RLDCR,
+ RLDIC,
+ RLDICL, RLDICL_32, RLDICL_32_64,
+ RLDICR, RLDICR_32,
+ VRLDMI,
+ VRLWMI,
+ VSEL,
+ XXSEL
+)>;
+
+// 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ RLDIMI,
+ RLWINM, RLWINM8,
+ RLWNM, RLWNM8
+)>;
+
+// 3 Cycles ALU operations, 5 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ RLWIMI, RLWIMI8
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ MFFS,
+ MFFS_rec,
+ MFFSL,
+ MFVSCR,
+ TRECHKPT
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ ADDME8_rec, ADDME_rec,
+ ADDME8O_rec, ADDMEO_rec,
+ ADDZE8_rec, ADDZE_rec,
+ ADDZE8O_rec, ADDZEO_rec,
+ MCRFS,
+ MFFSCDRN,
+ MFFSCDRNI,
+ MFFSCRN,
+ MFFSCRNI,
+ MTFSB0,
+ MTVSCR,
+ NEG8O_rec, NEGO_rec,
+ SUBFME8_rec, SUBFME_rec,
+ SUBFME8O_rec, SUBFMEO_rec,
+ SUBFZE8_rec, SUBFZE_rec,
+ SUBFZE8O_rec, SUBFZEO_rec,
+ TABORT,
+ TBEGIN,
+ TRECLAIM,
+ TSR
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDE8_rec, ADDE_rec,
+ ADDE8O_rec, ADDEO_rec,
+ ADDIC_rec,
+ ADD4O_rec, ADD8O_rec,
+ SUBFE8_rec, SUBFE_rec,
+ SUBFE8O_rec, SUBFEO_rec,
+ SUBF8O_rec, SUBFO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ HRFID,
+ MFFSCE,
+ RFID,
+ STOP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ FABSD_rec, FABSS_rec,
+ FMR_rec,
+ FNABSD_rec, FNABSS_rec,
+ FNEGD_rec, FNEGS_rec,
+ MTFSB1,
+ RFEBB,
+ SC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDC, ADDC8,
+ ADDC8_rec, ADDC_rec,
+ ADDC8O, ADDCO,
+ FCPSGND_rec, FCPSGNS_rec,
+ MTFSF, MTFSFb,
+ MTFSFI, MTFSFIb,
+ SUBFC, SUBFC8,
+ SUBFC8_rec, SUBFC_rec,
+ SUBFC8O, SUBFCO
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ MTFSFI_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ MTFSF_rec
+)>;
+
+// 4-way crack instructions
+// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDC8O_rec, ADDCO_rec,
+ SUBFC8O_rec, SUBFCO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VSTRIBL_rec,
+ VSTRIBR_rec,
+ VSTRIHL_rec,
+ VSTRIHR_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+ (instrs
+ MTCRF, MTCRF8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ LBZ, LBZ8,
+ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ LDBRX,
+ DFLOADf32, DFLOADf64, LFD,
+ LFDX, XFLOADf32, XFLOADf64,
+ LFIWAX, LIWAX,
+ LFIWZX, LIWZX,
+ LHA, LHA8,
+ LHAX, LHAX8,
+ LHBRX, LHBRX8,
+ LHZ, LHZ8,
+ LVEBX,
+ LVEHX,
+ LVEWX,
+ LVX,
+ LVXL,
+ LWA, LWA_32,
+ LWAX, LWAX_32,
+ LWBRX, LWBRX8,
+ LWZ, LWZ8, LWZtoc, LWZtocL,
+ LXSD,
+ LXSDX,
+ LXSIBZX,
+ LXSIHZX,
+ LXSIWAX,
+ LXSIWZX,
+ LXV,
+ LXVB16X,
+ LXVD2X,
+ LXVDSX,
+ LXVH8X,
+ LXVRBX,
+ LXVRDX,
+ LXVRHX,
+ LXVRWX,
+ LXVW4X,
+ LXVWSX,
+ LXVX
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ DCBT,
+ DCBTST,
+ ICBT,
+ LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
+ LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+ LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
+ LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
+ LXVL,
+ LXVLL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
+ (instrs
+ HASHCHK,
+ HASHCHKP
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ SLBIA
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ DARN,
+ LBARX, LBARXL,
+ LDARX, LDARXL,
+ LHARX, LHARXL,
+ LWARX, LWARXL,
+ SLBFEE_rec,
+ SLBIE,
+ SLBMFEE,
+ SLBMFEV
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ LBZCIX,
+ LDCIX,
+ LHZCIX,
+ LWZCIX,
+ MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ LMW
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ LSWI
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+ (instrs
+ LBZU, LBZU8,
+ LBZUX, LBZUX8,
+ LDU,
+ LDUX,
+ LFDU,
+ LFDUX,
+ LHAU, LHAU8,
+ LHAUX, LHAUX8,
+ LHZU, LHZU8,
+ LHZUX, LHZUX8,
+ LWAUX,
+ LWZU, LWZU8,
+ LWZUX, LWZUX8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+ (instrs
+ PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
+ PLD, PLDpc,
+ PLFD, PLFDpc,
+ PLFS, PLFSpc,
+ PLHA, PLHA8, PLHA8pc, PLHApc,
+ PLHZ, PLHZ8, PLHZ8pc, PLHZpc,
+ PLWA, PLWA8, PLWA8pc, PLWApc,
+ PLWZ, PLWZ8, PLWZ8pc, PLWZpc,
+ PLXSD, PLXSDpc,
+ PLXSSP, PLXSSPpc,
+ PLXV, PLXVpc,
+ PLXVP, PLXVPpc
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ LFS,
+ LFSX,
+ LXSSP,
+ LXSSPX
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ LFSU,
+ LFSUX
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+ (instrs
+ TLBIEL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read, P10LD_Read],
+ (instrs
+ SLBMTE
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+ (instrs
+ LXVP,
+ LXVPX
+)>;
+
+// Single crack instructions
+// 13 Cycles Unknown operations, 1 input operands
+def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY],
+ (instrs
+ XXSETACCZ
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read],
+ (instrs
+ XVBF16GER2,
+ XVF16GER2,
+ XVF32GER,
+ XVF64GER,
+ XVI16GER2,
+ XVI16GER2S,
+ XVI4GER8,
+ XVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ XVBF16GER2NN,
+ XVBF16GER2NP,
+ XVBF16GER2PN,
+ XVBF16GER2PP,
+ XVF16GER2NN,
+ XVF16GER2NP,
+ XVF16GER2PN,
+ XVF16GER2PP,
+ XVF32GERNN,
+ XVF32GERNP,
+ XVF32GERPN,
+ XVF32GERPP,
+ XVF64GERNN,
+ XVF64GERNP,
+ XVF64GERPN,
+ XVF64GERPP,
+ XVI16GER2PP,
+ XVI16GER2SPP,
+ XVI4GER8PP,
+ XVI8GER4PP,
+ XVI8GER4SPP
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVF32GER,
+ PMXVF64GER
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVBF16GER2,
+ PMXVF16GER2,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
+ PMXVI16GER2,
+ PMXVI16GER2S,
+ PMXVI4GER8,
+ PMXVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVBF16GER2NN,
+ PMXVBF16GER2NP,
+ PMXVBF16GER2PN,
+ PMXVBF16GER2PP,
+ PMXVF16GER2NN,
+ PMXVF16GER2NP,
+ PMXVF16GER2PN,
+ PMXVF16GER2PP,
+ PMXVI16GER2PP,
+ PMXVI16GER2SPP,
+ PMXVI4GER8PP,
+ PMXVI8GER4PP,
+ PMXVI8GER4SPP
+)>;
+
+// 2-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+ (instrs
+ XXMTACC
+)>;
+
+// 4-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 Cycles ALU operations, 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+ (instrs
+ XXMFACC
+)>;
+
+// 5 Cycles GPR Multiply operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
+ (instrs
+ MULHD,
+ MULHDU,
+ MULHW,
+ MULHWU,
+ MULLD,
+ MULLDO,
+ MULLI, MULLI8,
+ MULLW,
+ MULLWO,
+ VMULHSD,
+ VMULHUD,
+ VMULLD
+)>;
+
+// 5 Cycles GPR Multiply operations, 3 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read, P10MU_Read],
+ (instrs
+ MADDHD,
+ MADDHDU,
+ MADDLD, MADDLD8
+)>;
+
+// 2-way crack instructions
+// 5 Cycles GPR Multiply operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ MULHD_rec,
+ MULHDU_rec,
+ MULHW_rec,
+ MULHWU_rec,
+ MULLD_rec,
+ MULLDO_rec,
+ MULLW_rec,
+ MULLWO_rec
+)>;
+
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
+ (instrs
+ LVSL,
+ LVSR,
+ MFVSRLD,
+ MTVSRWS,
+ VCLZLSBB,
+ VCTZLSBB,
+ VGBBD,
+ VPRTYBQ,
+ VSPLTISB,
+ VSPLTISH,
+ VSTRIBL,
+ VSTRIBR,
+ VSTRIHL,
+ VSTRIHR,
+ VUPKHPX,
+ VUPKHSB,
+ VUPKHSH,
+ VUPKHSW,
+ VUPKLPX,
+ VUPKLSB,
+ VUPKLSH,
+ VUPKLSW,
+ XVCVBF16SPN,
+ XXBRD,
+ XXBRH,
+ XXBRQ,
+ XXBRW,
+ XXSPLTIB
+)>;
+
+// 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
+ (instrs
+ BPERMD,
+ MTVSRDD,
+ VBPERMD,
+ VBPERMQ,
+ VCLRLB,
+ VCLRRB,
+ VEXTRACTD,
+ VEXTRACTUB,
+ VEXTRACTUH,
+ VEXTRACTUW,
+ VEXTUBLX,
+ VEXTUBRX,
+ VEXTUHLX,
+ VEXTUHRX,
+ VEXTUWLX,
+ VEXTUWRX,
+ VINSERTD,
+ VINSERTW,
+ VMRGHB,
+ VMRGHH,
+ VMRGHW,
+ VMRGLB,
+ VMRGLH,
+ VMRGLW,
+ VPKPX,
+ VPKSDSS,
+ VPKSDUS,
+ VPKSHSS,
+ VPKSHUS,
+ VPKSWSS,
+ VPKSWUS,
+ VPKUDUM,
+ VPKUDUS,
+ VPKUHUM,
+ VPKUHUS,
+ VPKUWUM,
+ VPKUWUS,
+ VSL,
+ VSLO,
+ VSLV,
+ VSPLTB, VSPLTBs,
+ VSPLTH, VSPLTHs,
+ VSPLTW,
+ VSR,
+ VSRO,
+ VSRV,
+ XXEXTRACTUW,
+ XXGENPCVDM,
+ XXGENPCVHM,
+ XXGENPCVWM,
+ XXMRGHW,
+ XXMRGLW,
+ XXPERM,
+ XXPERMDI, XXPERMDIs,
+ XXPERMR,
+ XXSLDWI, XXSLDWIs,
+ XXSPLTW, XXSPLTWs
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ VEXTDDVLX,
+ VEXTDDVRX,
+ VEXTDUBVLX,
+ VEXTDUBVRX,
+ VEXTDUHVLX,
+ VEXTDUHVRX,
+ VEXTDUWVLX,
+ VEXTDUWVRX,
+ VINSBLX,
+ VINSBRX,
+ VINSBVLX,
+ VINSBVRX,
+ VINSD,
+ VINSDLX,
+ VINSDRX,
+ VINSERTB,
+ VINSERTH,
+ VINSHLX,
+ VINSHRX,
+ VINSHVLX,
+ VINSHVRX,
+ VINSW,
+ VINSWLX,
+ VINSWRX,
+ VINSWVLX,
+ VINSWVRX,
+ VPERM,
+ VPERMR,
+ VPERMXOR,
+ VSLDBI,
+ VSLDOI,
+ VSRDBI,
+ XXINSERTW
+)>;
+
+// 2-way crack instructions
+// 4 Cycles Permute operations, and 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
+ (instrs
+ VSUMSWS
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+ (instrs
+ XXSPLTIDP,
+ XXSPLTIW
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ XXBLENDVB,
+ XXBLENDVD,
+ XXBLENDVH,
+ XXBLENDVW,
+ XXSPLTI32DX
+)>;
+
+// 4 Cycles Permute operations, 4 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ XXEVAL,
+ XXPERMX
+)>;
+
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
+ (instrs
+ DCBST,
+ DCBZ,
+ ICBI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ DCBF,
+ PSTXVP, PSTXVPpc,
+ STB, STB8,
+ STBU, STBU8,
+ STBUX, STBUX8,
+ SPILLTOVSR_ST, STD,
+ STDBRX,
+ STDU,
+ STDUX,
+ DFSTOREf32, DFSTOREf64, STFD,
+ STFDU,
+ STFDUX,
+ STFDX,
+ STFIWX, STIWX,
+ STFS,
+ STFSU,
+ STFSUX,
+ STFSX,
+ STH, STH8,
+ STHBRX,
+ STHU, STHU8,
+ STHUX, STHUX8,
+ STVEBX,
+ STVEHX,
+ STVEWX,
+ STVX,
+ STVXL,
+ STW, STW8,
+ STWBRX,
+ STWU, STWU8,
+ STWUX, STWUX8,
+ STXSD,
+ STXSDX,
+ STXSIBX, STXSIBXv,
+ STXSIHX, STXSIHXv,
+ STXSIWX,
+ STXSSP,
+ STXSSPX,
+ STXV,
+ STXVB16X,
+ STXVD2X,
+ STXVH8X,
+ STXVRBX,
+ STXVRDX,
+ STXVRHX,
+ STXVRWX,
+ STXVW4X,
+ STXVX
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ CP_COPY, CP_COPY8,
+ STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
+ SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
+ STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
+ STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
+ STXVL,
+ STXVLL
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ EnforceIEIO,
+ MSGSYNC,
+ SLBSYNC,
+ TCHECK,
+ TLBSYNC
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
+ (instrs
+ TEND
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ SLBIEG,
+ STBCX,
+ STDCX,
+ STHCX,
+ STWCX,
+ TLBIE
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ CP_PASTE8_rec, CP_PASTE_rec,
+ STBCIX,
+ STDCIX,
+ STHCIX,
+ STWCIX
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ ISYNC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ SYNC
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ LDAT,
+ LWAT
+)>;
+
+// 4-way crack instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ STDAT,
+ STWAT
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ STMW
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ STSWI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+ (instrs
+ PSTB, PSTB8, PSTB8pc, PSTBpc,
+ PSTD, PSTDpc,
+ PSTFD, PSTFDpc,
+ PSTFS, PSTFSpc,
+ PSTH, PSTH8, PSTH8pc, PSTHpc,
+ PSTW, PSTW8, PSTW8pc, PSTWpc,
+ PSTXSD, PSTXSDpc,
+ PSTXSSP, PSTXSSPpc,
+ PSTXV, PSTXVpc
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+ (instrs
+ STXVP,
+ STXVPX
+)>;
+
+// FIXME - Miss scheduling information from datasheet
+// Temporary set it as 1 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX, P10W_DISP_ANY],
+ (instrs
+ ATTN,
+ CP_ABORT,
+ DCBA,
+ DCBI,
+ DCBZL,
+ DCCCI,
+ ICBLC,
+ ICBLQ,
+ ICBTLS,
+ ICCCI,
+ LA,
+ LDMX,
+ MFDCR,
+ MFPMR,
+ MFSRIN,
+ MSYNC,
+ MTDCR,
+ MTPMR,
+ MTSRIN,
+ NAP,
+ TLBIA,
+ TLBLD,
+ TLBLI,
+ TLBRE2,
+ TLBSX2,
+ TLBSX2D,
+ TLBWE2
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ CLRBHRB,
+ MFMSR
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
+ (instrs
+ MFTB
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
+ (instrs
+ MFBHRBE,
+ MTMSR,
+ MTMSRD
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ ADDPCIS
+)>;
+
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_PAIR, P10SX_Read],
+ (instrs
+ PADDI, PADDI8, PADDI8pc, PADDIpc, PLI, PLI8
+)>;
+
+// 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read],
+ (instrs
+ VMULESB,
+ VMULESD,
+ VMULESH,
+ VMULESW,
+ VMULEUB,
+ VMULEUD,
+ VMULEUH,
+ VMULEUW,
+ VMULHSW,
+ VMULHUW,
+ VMULOSB,
+ VMULOSD,
+ VMULOSH,
+ VMULOSW,
+ VMULOUB,
+ VMULOUD,
+ VMULOUH,
+ VMULOUW,
+ VMULUWM,
+ VSUM2SWS,
+ VSUM4SBS,
+ VSUM4SHS,
+ VSUM4UBS
+)>;
+
+// 7 Cycles VMX Multiply operations, 3 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read],
+ (instrs
+ VMHADDSHS,
+ VMHRADDSHS,
+ VMLADDUHM,
+ VMSUMCUD,
+ VMSUMMBM,
+ VMSUMSHM,
+ VMSUMSHS,
+ VMSUMUBM,
+ VMSUMUDM,
+ VMSUMUHM,
+ VMSUMUHS
+)>;
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 76663acf4782..c4f4a2b3d796 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1302,15 +1302,15 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
(instregex "BCCTR(L)?(8)?(n)?$"),
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
- (instregex "BL(_TLS|_NOP)?$"),
- (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
- (instregex "BLA(8|8_NOP)?$"),
+ (instregex "BL(_TLS|_NOP)?(_RM)?$"),
+ (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
+ (instregex "BLA(8|8_NOP)?(_RM)?$"),
(instregex "BLR(8|L)?$"),
(instregex "TAILB(A)?(8)?$"),
(instregex "TAILBCTR(8)?$"),
(instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
(instregex "BCLR(L)?(n)?$"),
- (instregex "BCTR(L)?(8)?$"),
+ (instregex "BCTR(L)?(8)?(_RM)?$"),
B,
BA,
BC,
@@ -1321,6 +1321,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
BCLn,
BCTRL8_LDinto_toc,
BCTRL_LWZinto_toc,
+ BCTRL8_LDinto_toc_RM,
+ BCTRL_LWZinto_toc_RM,
BCn,
CTRL_DEP
)>;
@@ -1430,5 +1432,6 @@ def : InstRW<[],
DCBI,
DCCCI,
ICCCI,
- ADDEX
+ ADDEX,
+ ADDEX8
)> { let Unsupported = 1; }
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index ce43ced57560..a1ff20bb3612 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -187,6 +187,22 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true",
"Target supports store clustering",
[FeatureFusion]>;
+def FeatureArithAddFusion :
+ SubtargetFeature<"fuse-arith-add", "HasArithAddFusion", "true",
+ "Target supports Arithmetic Operations with Add fusion",
+ [FeatureFusion]>;
+def FeatureAddLogicalFusion :
+ SubtargetFeature<"fuse-add-logical", "HasAddLogicalFusion", "true",
+ "Target supports Add with Logical Operations fusion",
+ [FeatureFusion]>;
+def FeatureLogicalAddFusion :
+ SubtargetFeature<"fuse-logical-add", "HasLogicalAddFusion", "true",
+ "Target supports Logical with Add Operations fusion",
+ [FeatureFusion]>;
+def FeatureLogicalFusion :
+ SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
+ "Target supports Logical Operations fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -213,6 +229,9 @@ def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
+def FeatureISA2_06 : SubtargetFeature<"isa-v206-instructions", "IsISA2_06",
+ "true",
+ "Enable instructions in ISA 2.06.">;
def FeatureISA2_07 : SubtargetFeature<"isa-v207-instructions", "IsISA2_07",
"true",
"Enable instructions in ISA 2.07.">;
@@ -319,7 +338,8 @@ def ProcessorFeatures {
FeatureMFTB,
DeprecatedDST,
FeatureTwoConstNR,
- FeatureUnalignedFloats];
+ FeatureUnalignedFloats,
+ FeatureISA2_06];
list<SubtargetFeature> P7SpecificFeatures = [];
list<SubtargetFeature> P7Features =
!listconcat(P7InheritableFeatures, P7SpecificFeatures);
@@ -371,7 +391,10 @@ def ProcessorFeatures {
// Power10
// For P10 CPU we assume that all of the existing features from Power9
// still exist with the exception of those we know are Power9 specific.
- list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion];
+ list<SubtargetFeature> FusionFeatures = [
+ FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
+ FeatureLogicalFusion, FeatureArithAddFusion
+ ];
list<SubtargetFeature> P10AdditionalFeatures =
!listconcat(FusionFeatures, [
DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
@@ -586,8 +609,7 @@ def : ProcessorModel<"pwr6x", G5Model,
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
-// No scheduler model yet.
-def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d0109f968446..a76963abb8e4 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -58,13 +58,13 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Process.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1494,7 +1494,7 @@ void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
//
// Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
// of instructions change.
- OutStreamer->emitCodeAlignment(8);
+ OutStreamer->emitCodeAlignment(8, &getSubtargetInfo());
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
OutStreamer->emitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer, RetInst);
@@ -2023,9 +2023,10 @@ void PPCAIXAsmPrinter::emitTracebackTable() {
// Set the 4th byte of the mandatory field.
FirstHalfOfMandatoryField |= TracebackTable::IsFunctionNamePresentMask;
- static_assert(XCOFF::AllocRegNo == 31, "Unexpected register usage!");
- if (MRI.isPhysRegUsed(Subtarget->isPPC64() ? PPC::X31 : PPC::R31,
- /* SkipRegMaskTest */ true))
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget->getRegisterInfo());
+ Register FrameReg = RegInfo->getFrameRegister(*MF);
+ if (FrameReg == (Subtarget->isPPC64() ? PPC::X31 : PPC::R31))
FirstHalfOfMandatoryField |= TracebackTable::IsAllocaUsedMask;
const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
@@ -2527,7 +2528,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// Construct an aliasing list for each GlobalObject.
for (const auto &Alias : M.aliases()) {
- const GlobalObject *Base = Alias.getBaseObject();
+ const GlobalObject *Base = Alias.getAliaseeObject();
if (!Base)
report_fatal_error(
"alias without a base object is not yet supported on AIX");
diff --git a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 50ae4450a837..786a3e163540 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -291,7 +291,7 @@ bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
return false;
}
- // Sanity check - the block must be able to fall through
+ // The block must be able to fall through.
assert(Cand.BranchBlock->canFallThrough() &&
"Expecting the block to fall through!");
@@ -751,9 +751,8 @@ bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
if (!canCoalesceBranch(Cand2))
break;
- // Sanity check
// The branch-taken block of the second candidate should post-dominate the
- // first candidate
+ // first candidate.
assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
"Branch-taken block should post-dominate first candidate");
diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 08b7bdb3ac1e..ff3d36d39fb2 100644
--- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -25,9 +25,9 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -62,15 +62,14 @@ protected:
return Changed;
SmallVector<MachineBasicBlock*, 8> PredToRemove;
- for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
- PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+ for (MachineBasicBlock *Pred : ReturnMBB.predecessors()) {
bool OtherReference = false, BlockChanged = false;
- if ((*PI)->empty())
+ if (Pred->empty())
continue;
- for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
- if (J == (*PI)->end())
+ for (MachineBasicBlock::iterator J = Pred->getLastNonDebugInstr();;) {
+ if (J == Pred->end())
break;
if (J->getOpcode() == PPC::B) {
@@ -78,7 +77,7 @@ protected:
// This is an unconditional branch to the return. Replace the
// branch with a blr.
MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
- (*PI)->insert(J, MI);
+ Pred->insert(J, MI);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -95,7 +94,7 @@ protected:
MachineInstrBuilder(*ReturnMBB.getParent(), MI)
.add(J->getOperand(0))
.add(J->getOperand(1));
- (*PI)->insert(J, MI);
+ Pred->insert(J, MI);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -112,7 +111,7 @@ protected:
TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn));
MachineInstrBuilder(*ReturnMBB.getParent(), MI)
.add(J->getOperand(0));
- (*PI)->insert(J, MI);
+ Pred->insert(J, MI);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -132,18 +131,18 @@ protected:
} else if (!J->isTerminator() && !J->isDebugInstr())
break;
- if (J == (*PI)->begin())
+ if (J == Pred->begin())
break;
--J;
}
- if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+ if (Pred->canFallThrough() && Pred->isLayoutSuccessor(&ReturnMBB))
OtherReference = true;
// Predecessors are stored in a vector and can't be removed here.
if (!OtherReference && BlockChanged) {
- PredToRemove.push_back(*PI);
+ PredToRemove.push_back(Pred);
}
if (BlockChanged)
@@ -185,12 +184,9 @@ public:
// nothing to do.
if (MF.size() < 2)
return Changed;
-
- // We can't use a range-based for loop due to clobbering the iterator.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) {
- MachineBasicBlock &B = *I++;
+
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
Changed |= processBlock(B);
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index 9daef26ede47..be4c9dd60b00 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -102,6 +102,16 @@ bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
return expandAtomicRMW128(MBB, MI, NMBBI);
case PPC::ATOMIC_CMP_SWAP_I128:
return expandAtomicCmpSwap128(MBB, MI, NMBBI);
+ case PPC::BUILD_QUADWORD: {
+ Register Dst = MI.getOperand(0).getReg();
+ Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0);
+ Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1);
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+ PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo);
+ MI.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index dfb2c1e5c0f5..856569bc8a73 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -987,15 +987,16 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
auto RC = MRI.getRegClass(SrcReg);
if (Subtarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(PPC::EFSCFD), DestReg)
- .addReg(SrcReg);
- } else if (isVSFRCRegClass(RC)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD),
+ DestReg)
+ .addReg(SrcReg);
+ } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSSRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(PPC::XSRSP), DestReg)
- .addReg(SrcReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::XSRSP),
+ DestReg)
+ .addReg(SrcReg);
} else {
+ SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::FRSP), DestReg)
@@ -2467,9 +2468,9 @@ namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
- // Only available on 64-bit ELF for now.
+ // Only available on 64-bit for now.
const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
- if (Subtarget.is64BitELFABI())
+ if (Subtarget.isPPC64())
return new PPCFastISel(FuncInfo, LibInfo);
return nullptr;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 1de6b633d20a..fc3c7ec35b8d 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -279,11 +279,11 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
/// call frame size. Update the MachineFunction object with the stack size.
-unsigned
+uint64_t
PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
bool UseEstimate) const {
unsigned NewMaxCallFrameSize = 0;
- unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+ uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
&NewMaxCallFrameSize);
MF.getFrameInfo().setStackSize(FrameSize);
MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
@@ -292,7 +292,7 @@ PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-unsigned
+uint64_t
PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
bool UseEstimate,
unsigned *NewMaxCallFrameSize) const {
@@ -300,7 +300,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize =
+ uint64_t FrameSize =
UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
// Get stack alignments. The frame must be aligned to the greatest of these:
@@ -624,9 +624,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
// Work out frame sizes.
- unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
- int NegFrameSize = -FrameSize;
- if (!isInt<32>(NegFrameSize))
+ uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
+ int64_t NegFrameSize = -FrameSize;
+ if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
if (MFI.isFrameAddressTaken())
@@ -692,9 +692,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
SingleScratchReg = ScratchReg == TempReg;
- int LROffset = getReturnSaveOffset();
+ int64_t LROffset = getReturnSaveOffset();
- int FPOffset = 0;
+ int64_t FPOffset = 0;
if (HasFP) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
@@ -702,7 +702,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
FPOffset = MFI.getObjectOffset(FPIndex);
}
- int BPOffset = 0;
+ int64_t BPOffset = 0;
if (HasBP) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int BPIndex = FI->getBasePointerSaveIndex();
@@ -710,7 +710,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BPOffset = MFI.getObjectOffset(BPIndex);
}
- int PBPOffset = 0;
+ int64_t PBPOffset = 0;
if (FI->usesPICBase()) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int PBPIndex = FI->getPICBasePointerSaveIndex();
@@ -854,7 +854,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// ABI.
if (HasROPProtect) {
const int SaveIndex = FI->getROPProtectionHashSaveIndex();
- const int ImmOffset = MFI.getObjectOffset(SaveIndex);
+ const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
assert((ImmOffset <= -8 && ImmOffset >= -512) &&
"ROP hash save offset out of range.");
assert(((ImmOffset & 0x7) == 0) &&
@@ -1212,7 +1212,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIRegister);
} else {
- int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ int64_t Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
// We have changed the object offset above but we do not want to change
// the actual offsets in the CFI instruction so we have to undo the
// offset change here.
@@ -1550,7 +1550,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
const MachineFrameInfo &MFI = MF.getFrameInfo();
// Get the number of bytes allocated from the FrameInfo.
- int FrameSize = MFI.getStackSize();
+ int64_t FrameSize = MFI.getStackSize();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -1592,9 +1592,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::MTOCRF);
const MCInstrDesc &HashChk =
TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK);
- int LROffset = getReturnSaveOffset();
+ int64_t LROffset = getReturnSaveOffset();
- int FPOffset = 0;
+ int64_t FPOffset = 0;
// Using the same bool variable as below to suppress compiler warnings.
bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
@@ -1610,14 +1610,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
FPOffset = MFI.getObjectOffset(FPIndex);
}
- int BPOffset = 0;
+ int64_t BPOffset = 0;
if (HasBP) {
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = MFI.getObjectOffset(BPIndex);
}
- int PBPOffset = 0;
+ int64_t PBPOffset = 0;
if (FI->usesPICBase()) {
int PBPIndex = FI->getPICBasePointerSaveIndex();
assert(PBPIndex && "No PIC Base Pointer Save Slot!");
@@ -1865,7 +1865,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// hash and then compare it to the hash stored in the prologue.
if (HasROPProtect) {
const int SaveIndex = FI->getROPProtectionHashSaveIndex();
- const int ImmOffset = MFI.getObjectOffset(SaveIndex);
+ const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
assert((ImmOffset <= -8 && ImmOffset >= -512) &&
"ROP hash check location offset out of range.");
assert(((ImmOffset & 0x7) == 0) &&
@@ -2680,15 +2680,15 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
return true;
}
-unsigned PPCFrameLowering::getTOCSaveOffset() const {
+uint64_t PPCFrameLowering::getTOCSaveOffset() const {
return TOCSaveOffset;
}
-unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
+uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
return FramePointerSaveOffset;
}
-unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
+uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
return BasePointerSaveOffset;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index b378c2739925..21883b19a575 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -21,12 +21,12 @@ class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
- const unsigned ReturnSaveOffset;
- const unsigned TOCSaveOffset;
- const unsigned FramePointerSaveOffset;
+ const uint64_t ReturnSaveOffset;
+ const uint64_t TOCSaveOffset;
+ const uint64_t FramePointerSaveOffset;
const unsigned LinkageSize;
- const unsigned BasePointerSaveOffset;
- const unsigned CRSaveOffset;
+ const uint64_t BasePointerSaveOffset;
+ const uint64_t CRSaveOffset;
// Map each group of one or two GPRs to corresponding VSR for spilling.
// TODO: Use local table in methods to avoid this mutable member.
@@ -88,7 +88,7 @@ public:
/**
* Determine the frame layout and update the machine function.
*/
- unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+ uint64_t determineFrameLayoutAndUpdate(MachineFunction &MF,
bool UseEstimate = false) const;
/**
@@ -96,7 +96,7 @@ public:
* The MachineFunction object can be const in this case as it is not
* modified.
*/
- unsigned determineFrameLayout(const MachineFunction &MF,
+ uint64_t determineFrameLayout(const MachineFunction &MF,
bool UseEstimate = false,
unsigned *NewMaxCallFrameSize = nullptr) const;
@@ -146,19 +146,19 @@ public:
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
- unsigned getReturnSaveOffset() const { return ReturnSaveOffset; }
+ uint64_t getReturnSaveOffset() const { return ReturnSaveOffset; }
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- unsigned getTOCSaveOffset() const;
+ uint64_t getTOCSaveOffset() const;
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
- unsigned getFramePointerSaveOffset() const;
+ uint64_t getFramePointerSaveOffset() const;
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- unsigned getBasePointerSaveOffset() const;
+ uint64_t getBasePointerSaveOffset() const;
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 606aae66196c..0abdf81d0908 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -262,6 +262,21 @@ namespace {
None) == PPC::AM_DForm;
}
+ /// SelectPCRelForm - Returns true if address N can be represented by
+ /// PC-Relative addressing mode.
+ bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
+ None) == PPC::AM_PCRel;
+ }
+
+ /// SelectPDForm - Returns true if address N can be represented by Prefixed
+ /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
+ bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
+ None) == PPC::AM_PrefixDForm;
+ }
+
/// SelectXForm - Returns true if address N can be represented by the
/// addressing mode of XForm instructions (an indexed [r+r] operation).
bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
@@ -3186,7 +3201,7 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
// by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3236,7 +3251,7 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
// (%b < %a) by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -3370,7 +3385,7 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
// by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3415,7 +3430,7 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
// (%b < %a) by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -3528,7 +3543,7 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3570,7 +3585,7 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
}
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -3687,7 +3702,7 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3730,7 +3745,7 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
}
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -4982,6 +4997,51 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
break;
+ case ISD::INTRINSIC_VOID: {
+ auto IntrinsicID = N->getConstantOperandVal(1);
+ if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) {
+ unsigned Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI;
+ SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
+ int16_t SImmOperand2;
+ int16_t SImmOperand3;
+ int16_t SImmOperand4;
+ bool isOperand2IntS16Immediate =
+ isIntS16Immediate(N->getOperand(2), SImmOperand2);
+ bool isOperand3IntS16Immediate =
+ isIntS16Immediate(N->getOperand(3), SImmOperand3);
+ // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
+ // reg or imm + imm. The imm + imm form will be optimized to either an
+ // unconditional trap or a nop in a later pass.
+ if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
+ Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
+ else if (isOperand3IntS16Immediate)
+ // The 2nd and 3rd operands are reg + imm.
+ Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
+ else {
+ // The 2nd and 3rd operands are imm + reg.
+ bool isOperand4IntS16Immediate =
+ isIntS16Immediate(N->getOperand(4), SImmOperand4);
+ (void)isOperand4IntS16Immediate;
+ assert(isOperand4IntS16Immediate &&
+ "The 4th operand is not an Immediate");
+ // We need to flip the condition immediate TO.
+ int16_t TO = int(SImmOperand4) & 0x1F;
+ // We swap the first and second bit of TO if they are not same.
+ if ((TO & 0x1) != ((TO & 0x2) >> 1))
+ TO = (TO & 0x1) ? TO + 1 : TO - 1;
+ // We swap the fourth and fifth bit of TO if they are not same.
+ if ((TO & 0x8) != ((TO & 0x10) >> 1))
+ TO = (TO & 0x8) ? TO + 8 : TO - 8;
+ Ops[0] = getI32Imm(TO, dl);
+ Ops[1] = N->getOperand(3);
+ Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
+ }
+ CurDAG->SelectNodeTo(N, Opcode, MVT::Other, Ops);
+ return;
+ }
+ break;
+ }
+
case ISD::INTRINSIC_WO_CHAIN: {
// We emit the PPC::FSELS instruction here because of type conflicts with
// the comparison operand. The FSELS instruction is defined to use an 8-byte
@@ -5423,8 +5483,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
- if (N1C->isNullValue() && N3C->isNullValue() &&
- N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
+ CC == ISD::SETNE &&
// FIXME: Implement this optzn for PPC64.
N->getValueType(0) == MVT::i32) {
SDNode *Tmp =
@@ -5810,6 +5870,69 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
}
+ case PPCISD::LD_SPLAT: {
+ // Here we want to handle splat load for type v16i8 and v8i16 when there is
+ // no direct move, we don't need to use stack for this case. If target has
+ // direct move, we should be able to get the best selection in the .td file.
+ if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
+ break;
+
+ EVT Type = N->getValueType(0);
+ if (Type != MVT::v16i8 && Type != MVT::v8i16)
+ break;
+
+ SDValue ZeroReg =
+ CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
+ unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
+ // v16i8 LD_SPLAT addr
+ // ======>
+ // Mask = LVSR/LVSL 0, addr
+ // LoadLow = LVX 0, addr
+ // Perm = VPERM LoadLow, LoadLow, Mask
+ // Splat = VSPLTB 15/0, Perm
+ //
+ // v8i16 LD_SPLAT addr
+ // ======>
+ // Mask = LVSR/LVSL 0, addr
+ // LoadLow = LVX 0, addr
+ // LoadHigh = LVX (LI, 1), addr
+ // Perm = VPERM LoadLow, LoadHigh, Mask
+ // Splat = VSPLTH 7/0, Perm
+ unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
+ unsigned SplatElemIndex =
+ Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
+
+ SDNode *Mask = CurDAG->getMachineNode(
+ Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
+ N->getOperand(1));
+
+ SDNode *LoadLow =
+ CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
+ {ZeroReg, N->getOperand(1), N->getOperand(0)});
+
+ SDNode *LoadHigh = LoadLow;
+ if (Type == MVT::v8i16) {
+ LoadHigh = CurDAG->getMachineNode(
+ PPC::LVX, dl, MVT::v16i8, MVT::Other,
+ {SDValue(CurDAG->getMachineNode(
+ LIOpcode, dl, MVT::i32,
+ CurDAG->getTargetConstant(1, dl, MVT::i8)),
+ 0),
+ N->getOperand(1), SDValue(LoadLow, 1)});
+ }
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
+ transferMemOperands(N, LoadHigh);
+
+ SDNode *Perm =
+ CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
+ SDValue(LoadHigh, 0), SDValue(Mask, 0));
+ CurDAG->SelectNodeTo(N, SplatOp, Type,
+ CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
+ SDValue(Perm, 0));
+ return;
+ }
}
SelectCode(N);
@@ -6153,9 +6276,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// be folded with the isel so that we don't need to materialize a register
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : N->uses()) {
if (!User->isMachineOpcode())
return false;
if (User->getMachineOpcode() != PPC::SELECT_I4 &&
@@ -6180,7 +6301,7 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
if (!C)
return false;
- if (!C->isNullValue())
+ if (!C->isZero())
return false;
}
@@ -6189,18 +6310,14 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
SmallVector<SDNode *, 4> ToReplace;
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : N->uses()) {
assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
User->getMachineOpcode() == PPC::SELECT_I8) &&
"Must have all select users");
ToReplace.push_back(User);
}
- for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
- UE = ToReplace.end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : ToReplace) {
SDNode *ResNode =
CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
User->getValueType(0), User->getOperand(0),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 37358176f35e..ac952b240a48 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -601,6 +601,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
@@ -1245,9 +1247,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
if (Subtarget.hasP9Altivec()) {
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
-
+ if (Subtarget.isISA3_1()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+ } else {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ }
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
@@ -1256,9 +1265,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
-
- if (Subtarget.isISA3_1())
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
}
if (Subtarget.pairedVectorMemops()) {
@@ -1286,8 +1292,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics())
+ if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
setMaxAtomicSizeInBitsSupported(128);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
@@ -1301,6 +1311,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!isPPC64)
@@ -1513,10 +1524,10 @@ void PPCTargetLowering::initializeAddrModeMap() {
PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
- PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
- PPC::MOF_NotAddNorCst | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
- PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
};
+ AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
+ PPC::MOF_SubtargetP10};
+ // TODO: Add mapping for quadword load/store.
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
@@ -1550,7 +1561,7 @@ static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
+uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
@@ -1623,9 +1634,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
+ case PPCISD::CALL_RM:
+ return "PPCISD::CALL_RM";
+ case PPCISD::CALL_NOP_RM:
+ return "PPCISD::CALL_NOP_RM";
+ case PPCISD::CALL_NOTOC_RM:
+ return "PPCISD::CALL_NOTOC_RM";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
+ case PPCISD::BCTRL_RM:
+ return "PPCISD::BCTRL_RM";
+ case PPCISD::BCTRL_LOAD_TOC_RM:
+ return "PPCISD::BCTRL_LOAD_TOC_RM";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
@@ -1707,6 +1728,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
+ case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
+ case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
case PPCISD::STRICT_FADDRTZ:
return "PPCISD::STRICT_FADDRTZ";
@@ -2551,9 +2574,8 @@ static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI) {
- if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+ for (SDNode *U : N->uses()) {
+ if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
if (Memop->getMemoryVT() == MVT::f64) {
Base = N.getOperand(0);
Index = N.getOperand(1);
@@ -3503,7 +3525,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Leave comparisons against 0 and -1 alone for now, since they're usually
// optimized. FIXME: revisit this when we can custom lower all setcc
// optimizations.
- if (C->isAllOnesValue() || C->isNullValue())
+ if (C->isAllOnes() || C->isZero())
return SDValue();
}
@@ -4364,21 +4386,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
FuncInfo->addLiveInAttr(VReg, Flags);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store;
-
- if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
- EVT ObjType = (ObjSize == 1 ? MVT::i8 :
- (ObjSize == 2 ? MVT::i16 : MVT::i32));
- Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
- MachinePointerInfo(&*FuncArg), ObjType);
- } else {
- // For sizes that don't fit a truncating store (3, 5, 6, 7),
- // store the whole register as-is to the parameter save area
- // slot.
- Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(&*FuncArg));
- }
-
+ EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
+ SDValue Store =
+ DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
+ MachinePointerInfo(&*FuncArg), ObjType);
MemOps.push_back(Store);
}
// Whether we copied from a register or not, advance the offset
@@ -4649,7 +4660,7 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
// If we have an Alias we can try to get the function from there.
if (Alias) {
- const GlobalObject *GlobalObj = Alias->getBaseObject();
+ const GlobalObject *GlobalObj = Alias->getAliaseeObject();
F = dyn_cast<Function>(GlobalObj);
}
@@ -5174,13 +5185,14 @@ static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
}
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
- const Function &Caller,
- const SDValue &Callee,
+ const Function &Caller, const SDValue &Callee,
const PPCSubtarget &Subtarget,
- const TargetMachine &TM) {
+ const TargetMachine &TM,
+ bool IsStrictFPCall = false) {
if (CFlags.IsTailCall)
return PPCISD::TC_RETURN;
+ unsigned RetOpc = 0;
// This is a call through a function pointer.
if (CFlags.IsIndirect) {
// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
@@ -5191,28 +5203,46 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// immediately followed by a load of the TOC pointer from the the stack save
// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
// as it is not saved or used.
- return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
- : PPCISD::BCTRL;
- }
-
- if (Subtarget.isUsingPCRelativeCalls()) {
+ RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
+ : PPCISD::BCTRL;
+ } else if (Subtarget.isUsingPCRelativeCalls()) {
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
- return PPCISD::CALL_NOTOC;
+ RetOpc = PPCISD::CALL_NOTOC;
+ } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ // The ABIs that maintain a TOC pointer accross calls need to have a nop
+ // immediately following the call instruction if the caller and callee may
+ // have different TOC bases. At link time if the linker determines the calls
+ // may not share a TOC base, the call is redirected to a trampoline inserted
+ // by the linker. The trampoline will (among other things) save the callers
+ // TOC pointer at an ABI designated offset in the linkage area and the
+ // linker will rewrite the nop to be a load of the TOC pointer from the
+ // linkage area into gpr2.
+ RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+ : PPCISD::CALL_NOP;
+ else
+ RetOpc = PPCISD::CALL;
+ if (IsStrictFPCall) {
+ switch (RetOpc) {
+ default:
+ llvm_unreachable("Unknown call opcode");
+ case PPCISD::BCTRL_LOAD_TOC:
+ RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
+ break;
+ case PPCISD::BCTRL:
+ RetOpc = PPCISD::BCTRL_RM;
+ break;
+ case PPCISD::CALL_NOTOC:
+ RetOpc = PPCISD::CALL_NOTOC_RM;
+ break;
+ case PPCISD::CALL:
+ RetOpc = PPCISD::CALL_RM;
+ break;
+ case PPCISD::CALL_NOP:
+ RetOpc = PPCISD::CALL_NOP_RM;
+ break;
+ }
}
-
- // The ABIs that maintain a TOC pointer accross calls need to have a nop
- // immediately following the call instruction if the caller and callee may
- // have different TOC bases. At link time if the linker determines the calls
- // may not share a TOC base, the call is redirected to a trampoline inserted
- // by the linker. The trampoline will (among other things) save the callers
- // TOC pointer at an ABI designated offset in the linkage area and the linker
- // will rewrite the nop to be a load of the TOC pointer from the linkage area
- // into gpr2.
- if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
- return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
- : PPCISD::CALL_NOP;
-
- return PPCISD::CALL;
+ return RetOpc;
}
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
@@ -5228,7 +5258,7 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
const GlobalValue *GV = G ? G->getGlobal() : nullptr;
return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
- !dyn_cast_or_null<GlobalIFunc>(GV);
+ !isa_and_nonnull<GlobalIFunc>(GV);
};
// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
@@ -5508,7 +5538,7 @@ SDValue PPCTargetLowering::FinishCall(
unsigned CallOpc =
getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
- Subtarget, DAG.getTarget());
+ Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
if (!CFlags.IsIndirect)
Callee = transformCallee(Callee, DAG, dl, Subtarget);
@@ -9066,6 +9096,34 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
return (!LosesInfo && !APFloatToConvert.isDenormal());
}
+static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
+ unsigned &Opcode) {
+ const SDNode *InputNode = Op.getOperand(0).getNode();
+ if (!InputNode || !ISD::isUNINDEXEDLoad(InputNode))
+ return false;
+
+ if (!Subtarget.hasVSX())
+ return false;
+
+ EVT Ty = Op->getValueType(0);
+ if (Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32 ||
+ Ty == MVT::v8i16 || Ty == MVT::v16i8)
+ return true;
+
+ if (Ty == MVT::v2i64) {
+ // Check the extend type, when the input type is i32, and the output vector
+ // type is v2i64.
+ if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) {
+ if (ISD::isZEXTLoad(InputNode))
+ Opcode = PPCISD::ZEXT_LD_SPLAT;
+ if (ISD::isSEXTLoad(InputNode))
+ Opcode = PPCISD::SEXT_LD_SPLAT;
+ }
+ return true;
+ }
+ return false;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9129,17 +9187,26 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
if (!BVNIsConstantSplat || SplatBitSize > 32) {
+ unsigned NewOpcode = PPCISD::LD_SPLAT;
- bool IsPermutedLoad = false;
- const SDValue *InputLoad =
- getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
// Handle load-and-splat patterns as we have instructions that will do this
// in one go.
- if (InputLoad && DAG.isSplatValue(Op, true)) {
+ if (DAG.isSplatValue(Op, true) &&
+ isValidSplatLoad(Subtarget, Op, NewOpcode)) {
+ const SDValue *InputLoad = &Op.getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
- // We have handling for 4 and 8 byte elements.
- unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
+ // If the input load is an extending load, it will be an i32 -> i64
+ // extending load and isValidSplatLoad() will update NewOpcode.
+ unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
+ unsigned ElementSize =
+ MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
+
+ assert(((ElementSize == 2 * MemorySize)
+ ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
+ NewOpcode == PPCISD::SEXT_LD_SPLAT)
+ : (NewOpcode == PPCISD::LD_SPLAT)) &&
+ "Unmatched element size and opcode!\n");
// Checking for a single use of this load, we have to check for vector
// width (128 bits) / ElementSize uses (since each operand of the
@@ -9148,18 +9215,45 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (SDValue BVInOp : Op->ops())
if (BVInOp.isUndef())
NumUsesOfInputLD--;
+
+ // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
+ // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
+ // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
+ // 15", but funciton IsValidSplatLoad() now will only return true when
+ // the data at index 0 is not nullptr. So we will not get into trouble for
+ // these cases.
+ //
+ // case 1 - lfiwzx/lfiwax
+ // 1.1: load result is i32 and is sign/zero extend to i64;
+ // 1.2: build a v2i64 vector type with above loaded value;
+ // 1.3: the vector has only one value at index 0, others are all undef;
+ // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
+ if (NumUsesOfInputLD == 1 &&
+ (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
+ !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
+ Subtarget.hasLFIWAX()))
+ return SDValue();
+
+ // case 2 - lxvr[hb]x
+ // 2.1: load result is at most i16;
+ // 2.2: build a vector with above loaded value;
+ // 2.3: the vector has only one value at index 0, others are all undef;
+ // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
+ if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
+ Subtarget.isISA3_1() && ElementSize <= 16)
+ return SDValue();
+
assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
- ((Subtarget.hasVSX() && ElementSize == 64) ||
- (Subtarget.hasP9Vector() && ElementSize == 32))) {
+ Subtarget.hasVSX()) {
SDValue Ops[] = {
LD->getChain(), // Chain
LD->getBasePtr(), // Ptr
DAG.getValueType(Op.getValueType()) // VT
};
SDValue LdSplt = DAG.getMemIntrinsicNode(
- PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
- Ops, LD->getMemoryVT(), LD->getMemOperand());
+ NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
+ LD->getMemoryVT(), LD->getMemOperand());
// Replace all uses of the output chain of the original load with the
// output chain of the new load.
DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
@@ -10368,6 +10462,71 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getMergeValues(RetOps, dl);
}
+
+ case Intrinsic::ppc_unpack_longdouble: {
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
+ "Argument of long double unpack must be 0 or 1!");
+ return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
+ DAG.getConstant(!!(Idx->getSExtValue()), dl,
+ Idx->getValueType(0)));
+ }
+
+ case Intrinsic::ppc_compare_exp_lt:
+ case Intrinsic::ppc_compare_exp_gt:
+ case Intrinsic::ppc_compare_exp_eq:
+ case Intrinsic::ppc_compare_exp_uo: {
+ unsigned Pred;
+ switch (IntrinsicID) {
+ case Intrinsic::ppc_compare_exp_lt:
+ Pred = PPC::PRED_LT;
+ break;
+ case Intrinsic::ppc_compare_exp_gt:
+ Pred = PPC::PRED_GT;
+ break;
+ case Intrinsic::ppc_compare_exp_eq:
+ Pred = PPC::PRED_EQ;
+ break;
+ case Intrinsic::ppc_compare_exp_uo:
+ Pred = PPC::PRED_UN;
+ break;
+ }
+ return SDValue(
+ DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
+ Op.getOperand(1), Op.getOperand(2)),
+ 0),
+ DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(Pred, dl, MVT::i32)}),
+ 0);
+ }
+ case Intrinsic::ppc_test_data_class_d:
+ case Intrinsic::ppc_test_data_class_f: {
+ unsigned CmprOpc = PPC::XSTSTDCDP;
+ if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
+ CmprOpc = PPC::XSTSTDCSP;
+ return SDValue(
+ DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(1)),
+ 0),
+ DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+ 0);
+ }
+ case Intrinsic::ppc_convert_f128_to_ppcf128:
+ case Intrinsic::ppc_convert_ppcf128_to_f128: {
+ RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
+ ? RTLIB::CONVERT_PPCF128_F128
+ : RTLIB::CONVERT_F128_PPCF128;
+ MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Result =
+ makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
+ dl, SDValue());
+ return Result.first;
+ }
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -10443,11 +10602,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
- return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
- Op.getOperand(ArgStart + 1)),
- Op.getOperand(0)),
- 0);
+ SDValue Val = Op.getOperand(ArgStart + 1);
+ EVT Ty = Val.getValueType();
+ if (Ty == MVT::i128) {
+ // FIXME: Testing one of two paired registers is sufficient to guarantee
+ // ordering?
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
+ }
+ return SDValue(
+ DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
+ Op.getOperand(0)),
+ 0);
}
default:
break;
@@ -10510,6 +10676,59 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
+SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = N->getMemoryVT();
+ assert(MemVT.getSimpleVT() == MVT::i128 &&
+ "Expect quadword atomic operations");
+ SDLoc dl(N);
+ unsigned Opc = N->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_LOAD: {
+ // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
+ // lowered to ppc instructions by pattern matching instruction selector.
+ SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ N->getOperand(0),
+ DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
+ for (int I = 1, E = N->getNumOperands(); I < E; ++I)
+ Ops.push_back(N->getOperand(I));
+ SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
+ Ops, MemVT, N->getMemOperand());
+ SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
+ SDValue ValHi =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
+ ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
+ DAG.getConstant(64, dl, MVT::i32));
+ SDValue Val =
+ DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
+ return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
+ {Val, LoadedVal.getValue(2)});
+ }
+ case ISD::ATOMIC_STORE: {
+ // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
+ // lowered to ppc instructions by pattern matching instruction selector.
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ N->getOperand(0),
+ DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
+ SDValue Val = N->getOperand(2);
+ SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
+ SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
+ DAG.getConstant(64, dl, MVT::i32));
+ ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
+ Ops.push_back(ValLo);
+ Ops.push_back(ValHi);
+ Ops.push_back(N->getOperand(1));
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
+ N->getMemOperand());
+ }
+ default:
+ llvm_unreachable("Unexpected atomic opcode");
+ }
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -10537,7 +10756,6 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- SDValue V3 = Op.getOperand(2);
if (VT == MVT::v2f64 && C)
return Op;
@@ -10546,18 +10764,10 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
return SDValue();
// On P10, we have legal lowering for constant and variable indices for
- // integer vectors.
+ // all vectors.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64)
- return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
- // For f32 and f64 vectors, we have legal lowering for variable indices.
- // For f32 we also have legal lowering when the element is loaded from
- // memory.
- if (VT == MVT::v4f32 || VT == MVT::v2f64) {
- if (!C || (VT == MVT::v4f32 && dyn_cast<LoadSDNode>(V2)))
- return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
return Op;
- }
}
// Before P10, we have legal lowering for constant indices but not for
@@ -10901,6 +11111,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerBSWAP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP:
return LowerATOMIC_CMP_SWAP(Op, DAG);
+ case ISD::ATOMIC_STORE:
+ return LowerATOMIC_LOAD_STORE(Op, DAG);
}
}
@@ -10911,6 +11123,12 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::ATOMIC_LOAD: {
+ SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::READCYCLECOUNTER: {
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
@@ -10937,6 +11155,18 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(NewInt.getValue(1));
break;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
+ case Intrinsic::ppc_pack_longdouble:
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ N->getOperand(2), N->getOperand(1)));
+ break;
+ case Intrinsic::ppc_convert_f128_to_ppcf128:
+ Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
+ break;
+ }
+ break;
+ }
case ISD::VAARG: {
if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
return;
@@ -12647,6 +12877,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
.addDef(Hi)
.addUse(Src, 0, PPC::sub_gp8_x0);
+ } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
+ MI.getOpcode() == PPC::STQX_PSEUDO) {
+ DebugLoc DL = MI.getDebugLoc();
+ // Ptr is used as the ptr_rc_no_r0 part
+ // of LQ/STQ's memory operand and adding result of RA and RB,
+ // so it has to be g8rc_and_g8rc_nox0.
+ Register Ptr =
+ F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register Val = MI.getOperand(0).getReg();
+ Register RA = MI.getOperand(1).getReg();
+ Register RB = MI.getOperand(2).getReg();
+ BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
+ BuildMI(*BB, MI, DL,
+ MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
+ : TII->get(PPC::STQ))
+ .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
+ .addImm(0)
+ .addReg(Ptr);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -12951,12 +13199,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
- for (SDNode::use_iterator UI = LoadRoot->use_begin(),
- UE = LoadRoot->use_end(); UI != UE; ++UI)
- if (((isa<MemSDNode>(*UI) &&
- cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
- UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
- Queue.push_back(*UI);
+ for (SDNode *U : LoadRoot->uses())
+ if (((isa<MemSDNode>(U) &&
+ cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
+ U->getOpcode() == ISD::TokenFactor) &&
+ !Visited.count(U))
+ Queue.push_back(U);
}
}
@@ -13013,11 +13261,9 @@ SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
// If all users of SETCC extend its value to a legal integer type
// then we replace SETCC with a subtraction
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() != ISD::ZERO_EXTEND)
+ for (const SDNode *U : N->uses())
+ if (U->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
- }
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
auto OpSize = N->getOperand(0).getValueSizeInBits();
@@ -13194,10 +13440,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
if (isa<ConstantSDNode>(Inputs[i]))
continue;
- for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
- UE = Inputs[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : Inputs[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -13218,10 +13461,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
}
for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
- for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
- UE = PromOps[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : PromOps[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -13406,10 +13646,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
if (isa<ConstantSDNode>(Inputs[i]))
continue;
- for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
- UE = Inputs[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Inputs[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -13431,10 +13668,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
}
for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
- for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
- UE = PromOps[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : PromOps[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -14753,8 +14987,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->isNullValue() || // 0 >>s V -> 0.
- C->isAllOnesValue()) // -1 >>s V -> -1.
+ if (C->isZero() || // 0 >>s V -> 0.
+ C->isAllOnes()) // -1 >>s V -> -1.
return N->getOperand(0);
}
break;
@@ -15126,39 +15360,36 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
int Bits = 4 /* 16 byte alignment */;
if (DAG.MaskedValueIsZero(Add->getOperand(1),
- APInt::getAllOnesValue(Bits /* alignment */)
+ APInt::getAllOnes(Bits /* alignment */)
.zext(Add.getScalarValueSizeInBits()))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
- for (SDNode::use_iterator UI = BasePtr->use_begin(),
- UE = BasePtr->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- IID) {
+ for (SDNode *U : BasePtr->uses()) {
+ if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
// We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
- return SDValue(*UI, 0);
+ return SDValue(U, 0);
}
}
}
if (isa<ConstantSDNode>(Add->getOperand(1))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
- for (SDNode::use_iterator UI = BasePtr->use_begin(),
- UE = BasePtr->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(UI->getOperand(1)) &&
+ for (SDNode *U : BasePtr->uses()) {
+ if (U->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(U->getOperand(1)) &&
(cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
- cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
- (1ULL << Bits) == 0) {
- SDNode *OtherAdd = *UI;
- for (SDNode::use_iterator VI = OtherAdd->use_begin(),
- VE = OtherAdd->use_end(); VI != VE; ++VI) {
- if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
- return SDValue(*VI, 0);
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
+ (1ULL << Bits) ==
+ 0) {
+ SDNode *OtherAdd = U;
+ for (SDNode *V : OtherAdd->uses()) {
+ if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
+ IID) {
+ return SDValue(V, 0);
}
}
}
@@ -15482,13 +15713,13 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (VT == MVT::i64 && !Subtarget.isPPC64())
return SDValue();
if ((VT != MVT::i32 && VT != MVT::i64) ||
- !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
- bool IsNegPow2 = (-Divisor).isPowerOf2();
+ bool IsNegPow2 = Divisor.isNegatedPowerOf2();
unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
@@ -15546,6 +15777,18 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero = ~1U; // All bits but the low one are known to be zero.
break;
}
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_load2r:
+ // Top bits are cleared for load2r (which is the same as lhbrx).
+ Known.Zero = 0xFFFF0000;
+ break;
+ }
+ break;
}
}
}
@@ -15960,7 +16203,12 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
if (Depth > 0) {
- SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ // The link register (return address) is saved in the caller's frame
+ // not the callee's stack frame. So we must get the caller's frame
+ // address and load the return address at the LR offset from there.
+ SDValue FrameAddr =
+ DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
SDValue Offset =
DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
isPPC64 ? MVT::i64 : MVT::i32);
@@ -16077,6 +16325,22 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
+ case Intrinsic::ppc_atomic_load_i128:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Align(16);
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ return true;
+ case Intrinsic::ppc_atomic_store_i128:
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = Align(16);
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
+ return true;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
@@ -17146,6 +17410,9 @@ PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
if ((Flags & FlagSet) == FlagSet)
return PPC::AM_DQForm;
+ for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
+ if ((Flags & FlagSet) == FlagSet)
+ return PPC::AM_PrefixDForm;
// If no other forms are selected, return an X-Form as it is the most
// general addressing mode.
return PPC::AM_XForm;
@@ -17236,6 +17503,14 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
}
}
+static bool isPCRelNode(SDValue N) {
+ return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
+ isValidPCRelNode<ConstantPoolSDNode>(N) ||
+ isValidPCRelNode<GlobalAddressSDNode>(N) ||
+ isValidPCRelNode<JumpTableSDNode>(N) ||
+ isValidPCRelNode<BlockAddressSDNode>(N));
+}
+
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
@@ -17253,6 +17528,26 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
if (Subtarget.hasSPE())
FlagSet |= PPC::MOF_SubtargetSPE;
+ // Check if we have a PCRel node and return early.
+ if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
+ return FlagSet;
+
+ // If the node is the paired load/store intrinsics, compute flags for
+ // address computation and return early.
+ unsigned ParentOp = Parent->getOpcode();
+ if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
+ (ParentOp == ISD::INTRINSIC_VOID))) {
+ unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
+ assert(
+ ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) &&
+ "Only the paired load and store (lxvp/stxvp) intrinsics are valid.");
+ SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2)
+ : Parent->getOperand(3);
+ computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
+ FlagSet |= PPC::MOF_Vector;
+ return FlagSet;
+ }
+
// Mark this as something we don't want to handle here if it is atomic
// or pre-increment instruction.
if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
@@ -17266,7 +17561,8 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
EVT MemVT = MN->getMemoryVT();
unsigned Size = MemVT.getSizeInBits();
if (MemVT.isScalarInteger()) {
- assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!");
+ assert(Size <= 128 &&
+ "Not expecting scalar integers larger than 16 bytes!");
if (Size < 32)
FlagSet |= PPC::MOF_SubWordInt;
else if (Size == 32)
@@ -17276,9 +17572,12 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
if (Size == 128)
FlagSet |= PPC::MOF_Vector;
- else if (Size == 256)
- FlagSet |= PPC::MOF_Vector256;
- else
+ else if (Size == 256) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "256-bit vectors are only available when paired vector memops is "
+ "enabled!");
+ FlagSet |= PPC::MOF_Vector;
+ } else
llvm_unreachable("Not expecting illegal vectors!");
} else { // Floating point type: can be scalar, f128 or vector types.
if (Size == 32 || Size == 64)
@@ -17396,6 +17695,14 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
// Select an X-Form load if it is not.
setXFormForUnalignedFI(N, Flags, Mode);
+ // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
+ if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
+ assert(Subtarget.isUsingPCRelativeCalls() &&
+ "Must be using PC-Relative calls when a valid PC-Relative node is "
+ "present!");
+ Mode = PPC::AM_PCRel;
+ }
+
// Set Base and Disp accordingly depending on the address mode.
switch (Mode) {
case PPC::AM_DForm:
@@ -17467,6 +17774,30 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
Base = N;
break;
}
+ case PPC::AM_PrefixDForm: {
+ int64_t Imm34 = 0;
+ unsigned Opcode = N.getOpcode();
+ if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
+ (isIntS34Immediate(N.getOperand(1), Imm34))) {
+ // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
+ Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N.getOperand(0);
+ } else if (isIntS34Immediate(N, Imm34)) {
+ // The address is a 34-bit signed immediate.
+ Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
+ Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
+ }
+ break;
+ }
+ case PPC::AM_PCRel: {
+ // When selecting PC-Relative instructions, "Base" is not utilized as
+ // we select the address as [PC+imm].
+ Disp = N;
+ break;
+ }
case PPC::AM_None:
break;
default: { // By default, X-Form is always available to be selected.
@@ -17503,10 +17834,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
- unsigned Size = AI->getPointerOperand()
- ->getType()
- ->getPointerElementType()
- ->getPrimitiveSizeInBits();
+ unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 87579bad118f..34dce2c3172d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -200,6 +200,14 @@ namespace llvm {
/// and 64-bit AIX.
BCTRL_LOAD_TOC,
+ /// The variants that implicitly define rounding mode for calls with
+ /// strictfp semantics.
+ CALL_RM,
+ CALL_NOP_RM,
+ CALL_NOTOC_RM,
+ BCTRL_RM,
+ BCTRL_LOAD_TOC_RM,
+
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
@@ -494,6 +502,11 @@ namespace llvm {
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,
+ // NOTE: The nodes below may require PC-Rel specific patterns if the
+ // address could be PC-Relative. When adding new nodes below, consider
+ // whether or not the address can be PC-Relative and add the corresponding
+ // PC-relative patterns and tests.
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -554,6 +567,14 @@ namespace llvm {
/// instructions such as LXVDSX, LXVWSX.
LD_SPLAT,
+ /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// that zero-extends.
+ ZEXT_LD_SPLAT,
+
+ /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// that sign-extends.
+ SEXT_LD_SPLAT,
+
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
@@ -712,7 +733,9 @@ namespace llvm {
AM_DForm,
AM_DSForm,
AM_DQForm,
+ AM_PrefixDForm,
AM_XForm,
+ AM_PCRel
};
} // end namespace PPC
@@ -936,7 +959,7 @@ namespace llvm {
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(Type *Ty,
+ uint64_t getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -1246,6 +1269,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 92712c5c072b..417a6ce7e522 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -73,7 +73,7 @@ def SRL64 : SDNodeXForm<imm, [{
//
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
-let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in
def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In64BitMode]>;
@@ -100,7 +100,7 @@ let Defs = [LR8] in
def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let Defs = [CTR8], Uses = [CTR8] in {
def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
"bdz $dst">;
@@ -118,7 +118,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
@@ -178,6 +178,39 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
}
}
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8, RM], hasSideEffects = 0,
+ isCodeGenOnly = 1, Uses = [RM] in {
+ // Convenient aliases for call instructions
+ def BL8_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+
+ def BLA8_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall_rm (i64 imm:$func))]>;
+ def BL8_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+
+ def BLA8_NOP_RM : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins abscalltarget:$func),
+ "bla $func\n\tnop", IIC_BrB,
+ [(PPCcall_nop_rm (i64 imm:$func))]>;
+ let Predicates = [PCRelativeMemops] in {
+ // BL8_NOTOC means that the caller does not use the TOC pointer and if
+ // it does use R2 then it is just a caller saved register. Therefore it is
+ // safe to emit only the bl and not the nop for this instruction. The
+ // linker will not try to restore R2 after the call.
+ def BL8_NOTOC_RM : IForm<18, 0, 1, (outs),
+ (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>;
+ }
+ let Uses = [CTR8, RM] in {
+ let isPredicable = 1 in
+ def BCTRL8_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
+ Requires<[In64BitMode]>;
+ }
+}
+
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
def BCTRL8_LDinto_toc :
@@ -188,12 +221,22 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Requires<[In64BitMode]>;
}
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR8, X2, RM], Uses = [CTR8, RM], RST = 2 in {
+ def BCTRL8_LDinto_toc_RM :
+ XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
+ (ins memrix:$src),
+ "bctrl\n\tld 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc_rm iaddrX4:$src)]>,
+ Requires<[In64BitMode]>;
+}
+
} // Interpretation64Bit
// FIXME: Duplicating this for the asm parser should be unnecessary, but the
// previous definition must be marked as CodeGen only to prevent decoding
// conflicts.
-let Interpretation64Bit = 1, isAsmParserOnly = 1 in
+let Interpretation64Bit = 1, isAsmParserOnly = 1, hasSideEffects = 0 in
let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
"bl $func", IIC_BrB, []>;
@@ -214,12 +257,32 @@ def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
(BL8_NOTOC texternalsym:$dst)>;
+def : Pat<(PPCcall_rm (i64 tglobaladdr:$dst)),
+ (BL8_RM tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop_rm (i64 tglobaladdr:$dst)),
+ (BL8_NOP_RM tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall_rm (i64 texternalsym:$dst)),
+ (BL8_RM texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_rm (i64 texternalsym:$dst)),
+ (BL8_NOP_RM texternalsym:$dst)>;
+
+def : Pat<(PPCcall_notoc_rm (i64 tglobaladdr:$dst)),
+ (BL8_NOTOC_RM tglobaladdr:$dst)>;
+def : Pat<(PPCcall_notoc_rm (i64 texternalsym:$dst)),
+ (BL8_NOTOC_RM texternalsym:$dst)>;
+
// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;
def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
(BL8_NOP mcsym:$dst)>;
+def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
+ (BL8_RM mcsym:$dst)>;
+def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
+ (BL8_NOP_RM mcsym:$dst)>;
+
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
@@ -408,6 +471,7 @@ def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
+let hasSideEffects = 0 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
@@ -425,6 +489,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
"ba $dst", IIC_BrB,
[]>;
+}
} // Interpretation64Bit
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
@@ -638,7 +703,7 @@ def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
[(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
let isCommutable = 1 in
-defm ADD8 : XOForm_1rx<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+defm ADD8 : XOForm_1rx<31, 266, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
@@ -717,7 +782,7 @@ defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
-defm SUBF8 : XOForm_1rx<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+defm SUBF8 : XOForm_1rx<31, 40, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
@@ -961,7 +1026,7 @@ defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
[(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
-defm MULLD : XOForm_1rx<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+defm MULLD : XOForm_1rx<31, 233, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"mulld", "$rT, $rA, $rB", IIC_IntMulHD,
[(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
@@ -1300,9 +1365,12 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load XForm:$src))]>, isPPC64;
+
+let Predicates = [IsISA2_06] in {
def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx ForceXForm:$src, i64))]>, isPPC64;
+}
let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in {
def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src),
@@ -1340,12 +1408,25 @@ def LQ : DQForm_RTp5_RA17_MEM<56, 0,
[]>,
RegConstraint<"@earlyclobber $RTp">,
isPPC64;
+// We don't really have LQX in the ISA, make a pseudo one so that we can
+// handle x-form during isel. Make it pre-ra may expose
+// oppotunities to some opts(CSE, LICM and etc.) for the result of adding
+// RA and RB.
+def LQX_PSEUDO : PPCCustomInserterPseudo<(outs g8prc:$RTp),
+ (ins memrr:$src), "#LQX_PSEUDO", []>;
+
def RESTORE_QUADWORD : PPCEmitTimePseudo<(outs g8prc:$RTp), (ins memrix:$src),
"#RESTORE_QUADWORD", []>;
}
}
+def : Pat<(int_ppc_atomic_load_i128 iaddrX16:$src),
+ (SPLIT_QUADWORD (LQ memrix16:$src))>;
+
+def : Pat<(int_ppc_atomic_load_i128 ForceXForm:$src),
+ (SPLIT_QUADWORD (LQX_PSEUDO memrr:$src))>;
+
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
@@ -1523,10 +1604,13 @@ def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, XForm:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
+
+let Predicates = [IsISA2_06] in {
def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
"stdbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i64:$rS, ForceXForm:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
+}
let mayStore = 1, hasNoSchedulingInfo = 1 in {
// Normal 16-byte stores.
@@ -1534,12 +1618,28 @@ let mayStore = 1, hasNoSchedulingInfo = 1 in {
def STQ : DSForm_1<62, 2, (outs), (ins g8prc:$RSp, memrix:$dst),
"stq $RSp, $dst", IIC_LdStSTQ,
[]>, isPPC64;
+
+def STQX_PSEUDO : PPCCustomInserterPseudo<(outs),
+ (ins g8prc:$RSp, memrr:$dst),
+ "#STQX_PSEUDO", []>;
+
def SPILL_QUADWORD : PPCEmitTimePseudo<(outs), (ins g8prc:$RSp, memrix:$dst),
"#SPILL_QUADWORD", []>;
}
}
+def BUILD_QUADWORD : PPCPostRAExpPseudo<
+ (outs g8prc:$RTp),
+ (ins g8rc:$lo, g8rc:$hi),
+ "#BUILD_QUADWORD", []>;
+
+def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, DSForm:$dst),
+ (STQ (BUILD_QUADWORD g8rc:$lo, g8rc:$hi), memrix:$dst)>;
+
+def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, ForceXForm:$dst),
+ (STQX_PSEUDO (BUILD_QUADWORD g8rc:$lo, g8rc:$hi), memrr:$dst)>;
+
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
@@ -1670,6 +1770,13 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
"hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
}
+let Interpretation64Bit = 1, isCodeGenOnly = 1, hasSideEffects = 1 in
+def ADDEX8 : Z23Form_RTAB5_CY2<31, 170, (outs g8rc:$rT),
+ (ins g8rc:$rA, g8rc:$rB, u2imm:$CY),
+ "addex $rT, $rA, $rB, $CY", IIC_IntGeneral,
+ [(set i64:$rT, (int_ppc_addex i64:$rA, i64:$rB,
+ timm:$CY))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Patterns
//
@@ -1833,8 +1940,6 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
(STDCX g8rc:$A, ForceXForm:$dst)>;
-def : Pat<(int_ppc_tdw g8rc:$A, g8rc:$B, i32:$IMM),
- (TD $IMM, $A, $B)>;
// trapd
def : Pat<(int_ppc_trapd g8rc:$A),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 2bc7fb2a1a5f..1e0e2d88e54b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1518,8 +1518,8 @@ def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", [(set v1i128:$vD,
(int_ppc_altivec_vprtybq v1i128:$vB))]>;
// Vector (Bit) Permute (Right-indexed)
-def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vbpermd $vD, $vA, $vB", IIC_VecFP, []>;
+def VBPERMD : VX1_Int_Ty3<1484, "vbpermd", int_ppc_altivec_vbpermd,
+ v2i64, v2i64, v16i8>;
def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
"vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 91b507ea6c4c..f7e4c0708d7d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -889,7 +889,7 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
- string asmstr, InstrItinClass itin, list<dag> pattern>
+ string asmstr, InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bit R;
@@ -903,7 +903,7 @@ class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
- string asmstr, InstrItinClass itin, list<dag> pattern>
+ string asmstr, InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bit A;
@@ -916,7 +916,7 @@ class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+ InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bit L;
@@ -930,7 +930,7 @@ class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+ InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> BF;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index e59a08774dc5..ec1c397ff57f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -29,10 +29,10 @@ let Predicates = [HasHTM] in {
let Defs = [CR0] in {
def TBEGIN : XForm_htm0 <31, 654,
- (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR>;
def TEND : XForm_htm1 <31, 686,
- (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR>;
def TABORT : XForm_base_r3xo <31, 910,
(outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
@@ -62,7 +62,7 @@ def TABORTDCI : XForm_base_r3xo <31, 878,
isRecordForm;
def TSR : XForm_htm2 <31, 750,
- (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR>,
isRecordForm;
def TRECLAIM : XForm_base_r3xo <31, 942,
@@ -84,7 +84,7 @@ def TRECHKPT : XForm_base_r3xo <31, 1006,
}
def TCHECK : XForm_htm3 <31, 718,
- (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>;
+ (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR>;
// Builtins
// All HTM instructions, with the exception of tcheck, set CR0 with the
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9dd35d5f44d1..649a150866b4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -35,10 +35,10 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -1109,6 +1109,8 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::XXLXORdpz:
case PPC::XXLEQVOnes:
case PPC::XXSPLTI32DX:
+ case PPC::XXSPLTIW:
+ case PPC::XXSPLTIDP:
case PPC::V_SET0B:
case PPC::V_SET0H:
case PPC::V_SET0:
@@ -1541,6 +1543,11 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
return false;
+ // If the conditional branch uses a physical register, then it cannot be
+ // turned into a select.
+ if (Register::isPhysicalRegister(Cond[1].getReg()))
+ return false;
+
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
@@ -2239,11 +2246,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
return true;
} else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
- OpC == PPC::BCTRL8) {
+ OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
+ OpC == PPC::BCTRL8_RM) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
- bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+ bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
+ OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
bool isPPC64 = Subtarget.isPPC64();
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
@@ -2267,6 +2276,9 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
+ if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
return true;
}
@@ -2343,8 +2355,8 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
}
bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask,
- int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
unsigned Opc = MI.getOpcode();
switch (Opc) {
@@ -2373,7 +2385,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
}
bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int Mask, int Value,
+ Register SrcReg2, int64_t Mask,
+ int64_t Value,
const MachineRegisterInfo *MRI) const {
if (DisableCmpOpt)
return false;
@@ -3009,7 +3022,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(SrcVSR + VecNo)
.addReg(SrcVSR + VecNo);
}
- // BUILD_UACC is expanded to 4 copies of the underlying vsx regisers.
+ // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
// So after building the 4 copies, we can replace the BUILD_UACC instruction
// with a NOP.
LLVM_FALLTHROUGH;
@@ -3103,6 +3116,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+ // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
@@ -3770,7 +3784,7 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
bool Simplified = false;
// If final mask is 0, MI result should be 0 too.
- if (FinalMask.isNullValue()) {
+ if (FinalMask.isZero()) {
bool Is64Bit =
(MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
Simplified = true;
@@ -5241,8 +5255,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
return false;
const IntegerType *IntTy =
dyn_cast<IntegerType>(CalleeFn->getReturnType());
- const AttributeSet &Attrs =
- CalleeFn->getAttributes().getRetAttributes();
+ const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
if (IntTy && IntTy->getBitWidth() <= 32)
return Attrs.hasAttribute(SignExt ? Attribute::SExt :
Attribute::ZExt);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 404156de232f..2cfd53de3290 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -524,10 +524,11 @@ public:
// Comparison optimization.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const override;
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const override;
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int Mask, int Value,
+ Register SrcReg2, int64_t Mask, int64_t Value,
const MachineRegisterInfo *MRI) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index f53e1b89626f..d83ecc699b19 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -316,6 +316,24 @@ def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Call nodes for strictfp calls (that define RM).
+def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop_rm : SDNode<"PPCISD::CALL_NOP_RM", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_notoc_rm : SDNode<"PPCISD::CALL_NOTOC_RM", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCbctrl_rm : SDNode<"PPCISD::BCTRL_RM", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM",
+ SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -1152,15 +1170,14 @@ def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-// PC Relative Address
-def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
-
// Load and Store Instruction Selection addressing modes.
def DForm : ComplexPattern<iPTR, 2, "SelectDForm", [], [SDNPWantParent]>;
def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm", [], [SDNPWantParent]>;
def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm", [], [SDNPWantParent]>;
def XForm : ComplexPattern<iPTR, 2, "SelectXForm", [], [SDNPWantParent]>;
def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm", [], [SDNPWantParent]>;
+def PCRelForm : ComplexPattern<iPTR, 2, "SelectPCRelForm", [], [SDNPWantParent]>;
+def PDForm : ComplexPattern<iPTR, 2, "SelectPDForm", [], [SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
@@ -1183,6 +1200,7 @@ def NaNsFPMath
: Predicate<"!Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
def HasBPERMD : Predicate<"Subtarget->hasBPERMD()">;
def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
+def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">;
def IsISA2_07 : Predicate<"Subtarget->isISA2_07()">;
def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
def HasFPU : Predicate<"Subtarget->hasFPU()">;
@@ -1272,7 +1290,7 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
// Multiclass for instructions which have a record overflow form as well
// as a record form but no carry (i.e. mulld, mulldo, subf, subfo, etc.)
-multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
@@ -1649,7 +1667,7 @@ def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
"#RESTORE_CRBIT", []>;
}
-let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In32BitMode]>;
@@ -1690,7 +1708,8 @@ let Defs = [LR] in
def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>,
PPC970_Unit_BRU;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ hasSideEffects = 0 in {
let isBarrier = 1 in {
let isPredicable = 1 in
def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
@@ -1782,7 +1801,8 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
// The unconditional BCL used by the SjLj setjmp code.
-let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7,
+ hasSideEffects = 0 in {
let Defs = [LR], Uses = [RM] in {
def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
"bcl 20, 31, $dst">;
@@ -1890,6 +1910,26 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
}
+let isCall = 1, PPC970_Unit = 7, Defs = [LR, RM], isCodeGenOnly = 1 in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+ def BLA_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall_rm (i32 imm:$func))]>;
+
+ def BL_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+ }
+ let Uses = [CTR, RM] in {
+ let isPredicable = 1 in
+ def BCTRL_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
+ Requires<[In32BitMode]>;
+ }
+}
+
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -1916,8 +1956,16 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
}
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in {
+ def BCTRL_LWZinto_toc_RM:
+ XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
+ (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc_rm iaddr:$src)]>, Requires<[In32BitMode]>;
-let isCodeGenOnly = 1 in {
+}
+
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
@@ -3106,14 +3154,14 @@ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
-let Defs = [RM] in {
+let Defs = [RM], hasSideEffects = 1 in {
let isCodeGenOnly = 1 in
def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
"mtfsf $FM, $rT", IIC_IntMTFSB0,
[(int_ppc_mtfsf timm:$FM, f64:$rT)]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
-let Uses = [RM] in {
+let Uses = [RM], hasSideEffects = 1 in {
def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
"mffs $rT", IIC_IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
@@ -3170,7 +3218,7 @@ def ADDEX : Z23Form_RTAB5_CY2<31, 170, (outs gprc:$rT),
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
let isCommutable = 1 in
-defm ADD4 : XOForm_1rx<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+defm ADD4 : XOForm_1rx<31, 266, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i32:$rT, (add i32:$rA, i32:$rB))]>;
let isCodeGenOnly = 1 in
@@ -3204,11 +3252,11 @@ defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-defm MULLW : XOForm_1rx<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+defm MULLW : XOForm_1rx<31, 235, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mullw", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
} // isCommutable
-defm SUBF : XOForm_1rx<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+defm SUBF : XOForm_1rx<31, 40, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
@@ -3433,6 +3481,12 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+def : Pat<(PPCcall_rm (i32 tglobaladdr:$dst)),
+ (BL_RM tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall_rm (i32 texternalsym:$dst)),
+ (BL_RM texternalsym:$dst)>;
+
// Calls for AIX only
def : Pat<(PPCcall (i32 mcsym:$dst)),
(BL mcsym:$dst)>;
@@ -3443,6 +3497,15 @@ def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
(BL_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_rm (i32 mcsym:$dst)),
+ (BL_RM mcsym:$dst)>;
+
+def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)),
+ (BL_NOP_RM mcsym:$dst)>;
+
+def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
+ (BL_NOP_RM texternalsym:$dst)>;
+
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -4501,7 +4564,7 @@ def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
// All MTFSF variants may change the rounding mode so conservatively set it
// as an implicit def for all of them.
let Predicates = [HasFPU] in {
-let Defs = [RM] in {
+let Defs = [RM], hasSideEffects = 1 in {
let isCodeGenOnly = 1,
Pattern = [(int_ppc_mtfsfi timm:$BF, timm:$U)], W = 0 in
def MTFSFIb : XLForm_4<63, 134, (outs), (ins u3imm:$BF, u4imm:$U),
@@ -5059,7 +5122,7 @@ def RLWNMbm_rec : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
// These generic branch instruction forms are used for the assembler parser only.
// Defs and Uses are conservative, since we don't know the BO value.
-let PPC970_Unit = 7, isBranch = 1 in {
+let PPC970_Unit = 7, isBranch = 1, hasSideEffects = 0 in {
let Defs = [CTR], Uses = [CTR, RM] in {
def gBC : BForm_3<16, 0, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
@@ -5475,8 +5538,6 @@ def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
(STWCX gprc:$A, ForceXForm:$dst)>;
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
(STBCX gprc:$A, ForceXForm:$dst)>;
-def : Pat<(int_ppc_tw gprc:$A, gprc:$B, i32:$IMM),
- (TW $IMM, $A, $B)>;
def : Pat<(int_ppc_trap gprc:$A),
(TWI 24, $A, 0)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index b183dbd4b3bb..a19289e96b3e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -29,9 +29,6 @@ def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
-def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>
-]>;
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
@@ -45,7 +42,6 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
-def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>;
//===----------------------------------------------------------------------===//
@@ -621,7 +617,7 @@ class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Inst{48-63} = D_RA{15-0}; // D
}
-multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> pref, bits<6> opcode, dag OOL,
+multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> opcode, dag OOL,
dag IOL, dag PCRel_IOL,
string asmstr, InstrItinClass itin> {
def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL,
@@ -1652,208 +1648,201 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in {
let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PLXVP :
- 8LS_DForm_R_XTp5_SI34_MEM_p<1, 58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
+ 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
(ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA",
IIC_LdStLFD>;
}
let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PSTXVP :
- 8LS_DForm_R_XTp5_SI34_MEM_p<1, 62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
+ 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
(ins vsrprc:$XTp, memri34_pcrel:$D_RA),
"pstxvp $XTp, $D_RA", IIC_LdStLFD>;
}
let Predicates = [PairedVectorMemops] in {
// Intrinsics for Paired Vector Loads.
- def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
- def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp DQForm:$src)), (LXVP memrix16:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp XForm:$src)), (LXVPX XForm:$src)>;
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
- def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp PDForm:$src)), (PLXVP memri34:$src)>;
}
// Intrinsics for Paired Vector Stores.
- def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst),
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, DQForm:$dst),
(STXVP $XSp, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst),
- (STXVPX $XSp, xaddrX16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, XForm:$dst),
+ (STXVPX $XSp, XForm:$dst)>;
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
- def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst),
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, PDForm:$dst),
(PSTXVP $XSp, memri34:$dst)>;
}
}
-// TODO: We have an added complexity of 500 here. This is only a temporary
-// solution to have tablegen consider these patterns first. The way we do
-// addressing for PowerPC is complex depending on available D form, X form, or
-// aligned D form loads/stores like DS and DQ forms. The prefixed
-// instructions in this file also add additional PC Relative loads/stores
-// and D form loads/stores with 34 bit immediates. It is very difficult to force
-// instruction selection to consistently pick these first without the current
-// added complexity. Once pc-relative implementation is complete, a set of
-// follow-up patches will address this refactoring and the AddedComplexity will
-// be removed.
-let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
+let Predicates = [PCRelativeMemops] in {
// Load i32
- def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHApc $ga, 0)>;
- def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
- def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
- def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
+ def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>;
// Store i32
- def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTBpc $RS, $ga, 0)>;
- def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTHpc $RS, $ga, 0)>;
- def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTWpc $RS, $ga, 0)>;
// Load i64
- def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHA8pc $ga, 0)>;
- def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZ8pc $ga, 0)>;
- def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZ8pc $ga, 0)>;
- def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZ8pc $ga, 0)>;
- def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWA8pc $ga, 0)>;
- def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZ8pc $ga, 0)>;
- def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
+ def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>;
// Store i64
- def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTB8pc $RS, $ga, 0)>;
- def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTH8pc $RS, $ga, 0)>;
- def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTW8pc $RS, $ga, 0)>;
- def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTDpc $RS, $ga, 0)>;
// Load f32
- def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
+ def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>;
// Store f32
- def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTFSpc $FRS, $ga, 0)>;
// Load f64
- def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
+ def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))),
(COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
- def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
+ def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>;
// Store f64
- def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTFDpc $FRS, $ga, 0)>;
// Load f128
- def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
+ def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))),
(COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
// Store f128
- def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
// Load v4i32
- def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v4i32
- def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v2i64
- def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v2i64
- def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v4f32
- def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v4f32
- def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v2f64
- def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v2f64
- def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Atomic Load
- def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)),
(PLBZpc $ga, 0)>;
- def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)),
(PLHZpc $ga, 0)>;
- def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)),
(PLWZpc $ga, 0)>;
- def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)),
(PLDpc $ga, 0)>;
// Atomic Store
- def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTBpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTHpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTWpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTB8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTH8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTW8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTDpc $RS, $ga, 0)>;
// Special Cases For PPCstore_scal_int_from_vsr
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
+ def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))),
+ (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>;
+
// If the PPCmatpcreladdr node is not caught by any other pattern it should be
// caught here and turned into a paddi instruction to materialize the address.
- def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
// PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
// tls global address with paddi instruction.
- def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
// PPCtlslocalexecmataddr node is used for TLS local exec models to
// materialize tls global address with paddi instruction.
def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)),
@@ -1861,15 +1850,6 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
}
let Predicates = [PrefixInstrs] in {
- def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
- (ins i32imm:$IMM32),
- "xxspltiw $XT, $IMM32", IIC_VecGeneral,
- []>;
- def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
- (ins i32imm:$IMM32),
- "xxspltidp $XT, $IMM32", IIC_VecGeneral,
- [(set v2f64:$XT,
- (PPCxxspltidp i32:$IMM32))]>;
def XXPERMX :
8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
vsrc:$XC, u3imm:$UIM),
@@ -1893,9 +1873,18 @@ let Predicates = [PrefixInstrs] in {
IIC_VecGeneral, []>;
}
-// XXSPLI32DX needs extra flags to make sure the compiler does not attempt
+// XXSPLTIW/DP/32DX need extra flags to make sure the compiler does not attempt
// to spill part of the instruction when the values are similar.
-let isReMaterializable = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+ def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltiw $XT, $IMM32", IIC_VecGeneral,
+ []>;
+ def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltidp $XT, $IMM32", IIC_VecGeneral,
+ [(set v2f64:$XT,
+ (PPCxxspltidp i32:$IMM32))]>;
def XXSPLTI32DX :
8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
(ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
@@ -1934,7 +1923,7 @@ let Predicates = [IsISA3_1] in {
[(set v16i8:$VRT,
(int_ppc_altivec_vsldbi v16i8:$VRA,
v16i8:$VRB,
- i32:$SH))]>;
+ timm:$SH))]>;
def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT),
(ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
"vsrdbi $VRT, $VRA, $VRB, $SH",
@@ -1942,7 +1931,7 @@ let Predicates = [IsISA3_1] in {
[(set v16i8:$VRT,
(int_ppc_altivec_vsrdbi v16i8:$VRA,
v16i8:$VRB,
- i32:$SH))]>;
+ timm:$SH))]>;
defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB),
"vstribr", "$vT, $vB", IIC_VecGeneral,
[(set v16i8:$vT,
@@ -2678,6 +2667,45 @@ def : Pat<(f64 nzFPImmAsi64:$A),
// nand(A, nand(B, C))
def : xxevalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
!sub(255, 14)>;
+
+ // Anonymous patterns to select prefixed VSX loads and stores.
+ // Load / Store f128
+ def : Pat<(f128 (load PDForm:$src)),
+ (COPY_TO_REGCLASS (PLXV memri34:$src), VRRC)>;
+ def : Pat<(store f128:$XS, PDForm:$dst),
+ (PSTXV (COPY_TO_REGCLASS $XS, VSRC), memri34:$dst)>;
+
+ // Load / Store v4i32
+ def : Pat<(v4i32 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v4i32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Load / Store v2i64
+ def : Pat<(v2i64 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v2i64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Load / Store v4f32
+ def : Pat<(v4f32 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v4f32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Load / Store v2f64
+ def : Pat<(v2f64 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v2f64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Cases For PPCstore_scal_int_from_vsr
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), PDForm:$dst, 8),
+ (PSTXSD (XSCVDPUXDS f64:$src), PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), PDForm:$dst, 8),
+ (PSTXSD (XSCVDPSXDS f64:$src), PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), PDForm:$dst, 8),
+ (PSTXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), PDForm:$dst, 8),
+ (PSTXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ PDForm:$dst)>;
}
let Predicates = [PrefixInstrs] in {
@@ -2701,138 +2729,192 @@ let Predicates = [PrefixInstrs] in {
(XXBLENDVW $A, $B, $C)>;
def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C),
(XXBLENDVD $A, $B, $C)>;
+
+ // Anonymous patterns to select prefixed loads and stores.
+ // Load i32
+ def : Pat<(i32 (extloadi1 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (zextloadi1 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (extloadi8 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (zextloadi8 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (extloadi16 PDForm:$src)), (PLHZ memri34:$src)>;
+ def : Pat<(i32 (zextloadi16 PDForm:$src)), (PLHZ memri34:$src)>;
+ def : Pat<(i32 (sextloadi16 PDForm:$src)), (PLHA memri34:$src)>;
+ def : Pat<(i32 (load PDForm:$src)), (PLWZ memri34:$src)>;
+
+ // Store i32
+ def : Pat<(truncstorei8 i32:$rS, PDForm:$dst), (PSTB gprc:$rS, memri34:$dst)>;
+ def : Pat<(truncstorei16 i32:$rS, PDForm:$dst), (PSTH gprc:$rS, memri34:$dst)>;
+ def : Pat<(store i32:$rS, PDForm:$dst), (PSTW gprc:$rS, memri34:$dst)>;
+
+ // Load i64
+ def : Pat<(i64 (extloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (extloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (extloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>;
+ def : Pat<(i64 (sextloadi16 PDForm:$src)), (PLHA8 memri34:$src)>;
+ def : Pat<(i64 (extloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>;
+ def : Pat<(i64 (sextloadi32 PDForm:$src)), (PLWA8 memri34:$src)>;
+ def : Pat<(i64 (load PDForm:$src)), (PLD memri34:$src)>;
+
+ // Store i64
+ def : Pat<(truncstorei8 i64:$rS, PDForm:$dst), (PSTB8 g8rc:$rS, memri34:$dst)>;
+ def : Pat<(truncstorei16 i64:$rS, PDForm:$dst), (PSTH8 g8rc:$rS, memri34:$dst)>;
+ def : Pat<(truncstorei32 i64:$rS, PDForm:$dst), (PSTW8 g8rc:$rS, memri34:$dst)>;
+ def : Pat<(store i64:$rS, PDForm:$dst), (PSTD g8rc:$rS, memri34:$dst)>;
+
+ // Load / Store f32
+ def : Pat<(f32 (load PDForm:$src)), (PLFS memri34:$src)>;
+ def : Pat<(store f32:$FRS, PDForm:$dst), (PSTFS $FRS, memri34:$dst)>;
+
+ // Load / Store f64
+ def : Pat<(f64 (extloadf32 PDForm:$src)),
+ (COPY_TO_REGCLASS (PLFS memri34:$src), VSFRC)>;
+ def : Pat<(f64 (load PDForm:$src)), (PLFD memri34:$src)>;
+ def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>;
+
+ // Atomic Load
+ def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>;
+ def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>;
+ def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>;
+ def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>;
+
+ // Atomic Store
+ def : Pat<(atomic_store_8 PDForm:$dst, i32:$RS), (PSTB $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>;
+
+ // Prefixed fpext to v2f64
+ def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)),
+ (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>;
}
def InsertEltShift {
- dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32);
+ dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32));
dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30);
dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29);
+ dag Left1 = (RLWINM $rB, 1, 0, 30);
+ dag Left2 = (RLWINM $rB, 2, 0, 29);
dag Left3 = (RLWINM8 $rB, 3, 0, 28);
}
let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
// Indexed vector insert element
- def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
- (VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>;
- def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
+ (VINSBRX $vDi, InsertEltShift.Sub32, $rA)>;
+ def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>;
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>;
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
- (VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
+ (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
-
- // Immediate vector insert element
- foreach i = [0, 1, 2, 3] in {
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>;
+ let AddedComplexity = 400 in {
+ // Immediate vector insert element
+ foreach Idx = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
+ }
+ foreach i = [0, 1] in
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
+ (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
- foreach i = [0, 1] in
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
- (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
// Indexed vector insert element
- def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
+ def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)),
(VINSBLX $vDi, $rB, $rA)>;
- def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
- (VINSHLX $vDi, $rB, $rA)>;
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
- (VINSWLX $vDi, $rB, $rA)>;
-
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
- (VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
- i32:$rB)),
- (VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
- i32:$rB)),
- (VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
- i32:$rB)),
- (VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
+ def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)),
+ (VINSHLX $vDi, InsertEltShift.Left1, $rA)>;
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Left2, $rA)>;
- // Immediate vector insert element
- foreach i = [0, 1, 2, 3] in {
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
- (VINSW $vDi, !mul(i, 4), $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
- (i32 i))),
- (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
- (i32 i))),
- (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
- (i32 i))),
- (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
- }
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)),
+ (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
+ (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
+ (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
+ def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
+ (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
// Indexed vector insert element
- def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
- (VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
- def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
+ (VINSBLX $vDi, InsertEltShift.Sub32, $rA)>;
+ def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>;
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>;
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
- (VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
+ (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
+}
+let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
// Immediate vector insert element
- foreach i = [0, 1, 2, 3] in {
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
- (VINSW $vDi, !mul(i, 4), $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
- (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
+ foreach Ty = [i32, i64] in {
+ foreach Idx = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), $rA)>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
+ (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
+ (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
+ (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
+ }
}
- foreach i = [0, 1] in
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
- (VINSD $vDi, !mul(i, 8), $rA)>;
+
+ foreach Idx = [0, 1] in
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)),
+ (VINSD $vDi, !mul(Idx, 8), $rA)>;
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index c0f2aed43a4d..d2d5ca92ca1c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -138,6 +138,10 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCzextldsplat : SDNode<"PPCISD::ZEXT_LD_SPLAT", SDT_PPCldsplat,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
SDTypeProfile<1, 1, []>, []>;
@@ -902,16 +906,13 @@ let hasSideEffects = 0 in {
// Rounding Instructions respecting current rounding mode
def XSRDPIC : XX2Form<60, 107,
(outs vsfrc:$XT), (ins vsfrc:$XB),
- "xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ "xsrdpic $XT, $XB", IIC_VecFP, []>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ "xvrdpic $XT, $XB", IIC_VecFP, []>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ "xvrspic $XT, $XB", IIC_VecFP, []>;
// Max/Min Instructions
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
@@ -1062,6 +1063,14 @@ let hasSideEffects = 0 in {
[(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
imm32SExt16:$DM))]>;
let isCodeGenOnly = 1 in
+ // Note that the input register class for `$XA` of XXPERMDIs is `vsfrc` which
+ // is not the same with the input register class(`vsrc`) of XXPERMDI instruction.
+ // We did this on purpose because:
+ // 1: The input is primarily for loads that load a partial vector(LFIWZX,
+ // etc.), no need for SUBREG_TO_REG.
+ // 2: With `vsfrc` register class, in the final assembly, float registers
+ // like `f0` are used instead of vector scalar register like `vs0`. This
+ // helps readability.
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
def XXSEL : XX4Form<60, 3,
@@ -2771,9 +2780,6 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be ForceXForm:$src)), (LXVD2X ForceXForm:$s
def : Pat<(f32 (any_fround f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPI
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fnearbyint f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIC
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_ffloor f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIM
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
@@ -2792,6 +2798,19 @@ def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
+// these need to be defined after the any_frint versions so ISEL will correctly
+// add the chain to the strict versions.
+def : Pat<(f32 (fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f64 (fnearbyint f64:$S)),
+ (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (fnearbyint v2f64:$S)),
+ (v2f64 (XVRDPIC $S))>;
+def : Pat<(v4f32 (fnearbyint v4f32:$S)),
+ (v4f32 (XVRSPIC $S))>;
+
// Materialize a zero-vector of long long
def : Pat<(v2i64 immAllZerosV),
(v2i64 (XXLXORz))>;
@@ -2809,6 +2828,10 @@ def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
(v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64),
(SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>;
+def : Pat<(v4i32 (PPCSToV DblToInt.A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>;
+def : Pat<(v4i32 (PPCSToV DblToUInt.A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS f64:$A), sub_64))>;
defm : ScalToVecWPermute<
v4i32, FltToIntLoad.A,
(XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
@@ -2823,10 +2846,20 @@ def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)),
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+
+// Splat loads.
def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)),
(v2f64 (LXVDSX ForceXForm:$A))>;
+def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)),
+ (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>;
def : Pat<(v2i64 (PPCldsplat ForceXForm:$A)),
(v2i64 (LXVDSX ForceXForm:$A))>;
+def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)),
+ (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>;
+def : Pat<(v2i64 (PPCzextldsplat ForceXForm:$A)),
+ (v2i64 (XXPERMDIs (LFIWZX ForceXForm:$A), 0))>;
+def : Pat<(v2i64 (PPCsextldsplat ForceXForm:$A)),
+ (v2i64 (XXPERMDIs (LFIWAX ForceXForm:$A), 0))>;
// Build vectors of floating point converted to i64.
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
@@ -2962,11 +2995,11 @@ def : Pat<(v2i64 (fp_to_uint
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV13S,
- (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02U,
(v2f64 (XVCVUXWDP $A))>;
def : Pat<WToDPExtractConv.BV13U,
- (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
(v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
@@ -3536,6 +3569,12 @@ def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
immSExt5NonZero:$A, immSExt5NonZero:$A)),
(v4i32 (VSPLTISW imm:$A))>;
+
+// Splat loads.
+def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
+ (v8i16 (VSPLTHs 3, (MTVSRWZ (LHZX ForceXForm:$A))))>;
+def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
+ (v16i8 (VSPLTBs 7, (MTVSRWZ (LBZX ForceXForm:$A))))>;
} // HasVSX, HasDirectMove
// Big endian VSX subtarget with direct moves.
@@ -3547,7 +3586,7 @@ defm : ScalToVecWPermute<
(SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
defm : ScalToVecWPermute<
v8i16, (i32 i32:$A),
- (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64),
+ (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64),
(SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
defm : ScalToVecWPermute<
v4i32, (i32 i32:$A),
@@ -4083,6 +4122,10 @@ def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)),
(v4f32 (LXVWSX ForceXForm:$A))>;
def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)),
(v4i32 (LXVWSX ForceXForm:$A))>;
+def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
+ (v8i16 (VSPLTHs 3, (LXSIHZX ForceXForm:$A)))>;
+def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
+ (v16i8 (VSPLTBs 7, (LXSIBZX ForceXForm:$A)))>;
} // HasVSX, HasP9Vector
// Any Power9 VSX subtarget with equivalent length but better Power10 VSX
@@ -4138,12 +4181,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 0))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 8))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
@@ -4382,12 +4465,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 12))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 8))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 1d2b1ed3f626..7f63827afbd6 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -39,6 +39,40 @@
// T *p = array[-1];
// for (int i = 0; i < n; ++i)
// *++p = c;
+//
+// 3: common multiple chains for the load/stores with same offsets in the loop,
+// so that we can reuse the offsets and reduce the register pressure in the
+// loop. This transformation can also increase the loop ILP as now each chain
+// uses its own loop induction add/addi. But this will increase the number of
+// add/addi in the loop.
+//
+// Generically, this means transforming loops like this:
+//
+// char *p;
+// A1 = p + base1
+// A2 = p + base1 + offset
+// B1 = p + base2
+// B2 = p + base2 + offset
+//
+// for (int i = 0; i < n; i++)
+// unsigned long x1 = *(unsigned long *)(A1 + i);
+// unsigned long x2 = *(unsigned long *)(A2 + i)
+// unsigned long x3 = *(unsigned long *)(B1 + i);
+// unsigned long x4 = *(unsigned long *)(B2 + i);
+// }
+//
+// to look like this:
+//
+// A1_new = p + base1 // chain 1
+// B1_new = p + base2 // chain 2, now inside the loop, common offset is
+// // reused.
+//
+// for (long long i = 0; i < n; i+=count) {
+// unsigned long x1 = *(unsigned long *)(A1_new + i);
+// unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);
+// unsigned long x3 = *(unsigned long *)(B1_new + i);
+// unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);
+// }
//===----------------------------------------------------------------------===//
#include "PPC.h"
@@ -81,15 +115,25 @@
using namespace llvm;
-static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
- cl::Hidden, cl::init(24),
- cl::desc("Potential common base number threshold per function for PPC loop "
- "prep"));
+static cl::opt<unsigned>
+ MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),
+ cl::ZeroOrMore,
+ cl::desc("Potential common base number threshold per function "
+ "for PPC loop prep"));
static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
cl::init(true), cl::Hidden,
cl::desc("prefer update form when ds form is also a update form"));
+static cl::opt<bool> EnableUpdateFormForNonConstInc(
+ "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
+ cl::desc("prepare update form when the load/store increment is a loop "
+ "invariant non-const value."));
+
+static cl::opt<bool> EnableChainCommoning(
+ "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
+ cl::desc("Enable chain commoning in PPC loop prepare pass."));
+
// Sum of following 3 per loop thresholds for all loops can not be larger
// than MaxVarsPrep.
// now the thresholds for each kind prep are exterimental values on Power9.
@@ -106,6 +150,16 @@ static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
cl::Hidden, cl::init(8),
cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
+// Commoning chain will reduce the register pressure, so we don't consider about
+// the PHI nodes number.
+// But commoning chain will increase the addi/add number in the loop and also
+// increase loop ILP. Maximum chain number should be same with hardware
+// IssueWidth, because we won't benefit from ILP if the parallel chains number
+// is bigger than IssueWidth. We assume there are 2 chains in one bucket, so
+// there would be 4 buckets at most on P9(IssueWidth is 8).
+static cl::opt<unsigned> MaxVarsChainCommon(
+ "ppc-chaincommon-max-vars", cl::Hidden, cl::init(4),
+ cl::desc("Bucket number per loop for PPC loop chain common"));
// If would not be profitable if the common base has only one load/store, ISEL
// should already be able to choose best load/store form based on offset for
@@ -116,35 +170,54 @@ static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
"preparation"));
+static cl::opt<unsigned> ChainCommonPrepMinThreshold(
+ "ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Minimal common base load/store instructions triggering chain "
+ "commoning preparation. Must be not smaller than 4"));
+
STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
+STATISTIC(ChainCommoningRewritten, "Num of commoning chains");
namespace {
struct BucketElement {
- BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {}
BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
- const SCEVConstant *Offset;
+ const SCEV *Offset;
Instruction *Instr;
};
struct Bucket {
- Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
- Elements(1, BucketElement(I)) {}
+ Bucket(const SCEV *B, Instruction *I)
+ : BaseSCEV(B), Elements(1, BucketElement(I)) {
+ ChainSize = 0;
+ }
+ // The base of the whole bucket.
const SCEV *BaseSCEV;
+
+ // All elements in the bucket. In the bucket, the element with the BaseSCEV
+ // has no offset and all other elements are stored as offsets to the
+ // BaseSCEV.
SmallVector<BucketElement, 16> Elements;
+
+ // The potential chains size. This is used for chain commoning only.
+ unsigned ChainSize;
+
+ // The base for each potential chain. This is used for chain commoning only.
+ SmallVector<BucketElement, 16> ChainBases;
};
// "UpdateForm" is not a real PPC instruction form, it stands for dform
// load/store with update like ldu/stdu, or Prefetch intrinsic.
// For DS form instructions, their displacements must be multiple of 4.
// For DQ form instructions, their displacements must be multiple of 16.
- enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
+ enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
class PPCLoopInstrFormPrep : public FunctionPass {
public:
@@ -169,11 +242,12 @@ namespace {
private:
PPCTargetMachine *TM = nullptr;
- const PPCSubtarget *ST;
+ const PPCSubtarget *ST;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
bool PreserveLCSSA;
+ bool HasCandidateForPrepare;
/// Successful preparation number for Update/DS/DQ form in all inner most
/// loops. One successful preparation will put one common base out of loop,
@@ -184,22 +258,39 @@ namespace {
bool runOnLoop(Loop *L);
/// Check if required PHI node is already exist in Loop \p L.
- bool alreadyPrepared(Loop *L, Instruction* MemI,
+ bool alreadyPrepared(Loop *L, Instruction *MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV,
- InstrForm Form);
+ const SCEV *BasePtrIncSCEV, PrepForm Form);
+
+ /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
+ Value *getNodeForInc(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrIncSCEV);
+
+ /// Common chains to reuse offsets for a loop to reduce register pressure.
+ bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets);
+
+ /// Find out the potential commoning chains and their bases.
+ bool prepareBasesForCommoningChains(Bucket &BucketChain);
+
+ /// Rewrite load/store according to the common chains.
+ bool
+ rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket,
+ SmallSet<BasicBlock *, 16> &BBChanged);
/// Collect condition matched(\p isValidCandidate() returns true)
/// candidates in Loop \p L.
SmallVector<Bucket, 16> collectCandidates(
Loop *L,
- std::function<bool(const Instruction *, const Value *, const Type *)>
+ std::function<bool(const Instruction *, Value *, const Type *)>
isValidCandidate,
+ std::function<bool(const SCEV *)> isValidDiff,
unsigned MaxCandidateNum);
- /// Add a candidate to candidates \p Buckets.
+ /// Add a candidate to candidates \p Buckets if diff between candidate and
+ /// one base in \p Buckets matches \p isValidDiff.
void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
SmallVector<Bucket, 16> &Buckets,
+ std::function<bool(const SCEV *)> isValidDiff,
unsigned MaxCandidateNum);
/// Prepare all candidates in \p Buckets for update form.
@@ -207,8 +298,7 @@ namespace {
/// Prepare all candidates in \p Buckets for displacement form, now for
/// ds/dq.
- bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
- InstrForm Form);
+ bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
@@ -216,8 +306,7 @@ namespace {
/// If success, best base element must be stored as the first element of
/// \p BucketChain.
/// Return false if no base element found, otherwise return true.
- bool prepareBaseForDispFormChain(Bucket &BucketChain,
- InstrForm Form);
+ bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
@@ -230,7 +319,20 @@ namespace {
/// preparation.
bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
SmallSet<BasicBlock *, 16> &BBChanged,
- InstrForm Form);
+ PrepForm Form);
+
+ /// Rewrite for the base load/store of a chain.
+ std::pair<Instruction *, Instruction *>
+ rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
+ Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
+ SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
+
+ /// Rewrite for the other load/stores of a chain according to the new \p
+ /// Base.
+ Instruction *
+ rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base,
+ const BucketElement &Element, Value *OffToBase,
+ SmallPtrSet<Value *, 16> &DeletedPtrs);
};
} // end anonymous namespace
@@ -266,23 +368,35 @@ static std::string getInstrName(const Value *I, StringRef Suffix) {
if (I->hasName())
return (I->getName() + Suffix).str();
else
- return "";
+ return "";
}
-static Value *GetPointerOperand(Value *MemI) {
+static Value *getPointerOperandAndType(Value *MemI,
+ Type **PtrElementType = nullptr) {
+
+ Value *PtrValue = nullptr;
+ Type *PointerElementType = nullptr;
+
if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
- return LMemI->getPointerOperand();
+ PtrValue = LMemI->getPointerOperand();
+ PointerElementType = LMemI->getType();
} else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
- return SMemI->getPointerOperand();
+ PtrValue = SMemI->getPointerOperand();
+ PointerElementType = SMemI->getValueOperand()->getType();
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
+ PointerElementType = Type::getInt8Ty(MemI->getContext());
if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
- IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp)
- return IMemI->getArgOperand(0);
- if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)
- return IMemI->getArgOperand(1);
+ IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
+ PtrValue = IMemI->getArgOperand(0);
+ } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
+ PtrValue = IMemI->getArgOperand(1);
+ }
}
+ /*Get ElementType if PtrElementType is not null.*/
+ if (PtrElementType)
+ *PtrElementType = PointerElementType;
- return nullptr;
+ return PtrValue;
}
bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
@@ -306,58 +420,460 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
return MadeChange;
}
-void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
- SmallVector<Bucket, 16> &Buckets,
- unsigned MaxCandidateNum) {
- assert((MemI && GetPointerOperand(MemI)) &&
+// Finding the minimal(chain_number + reusable_offset_number) is a complicated
+// algorithmic problem.
+// For now, the algorithm used here is simply adjusted to handle the case for
+// manually unrolling cases.
+// FIXME: use a more powerful algorithm to find minimal sum of chain_number and
+// reusable_offset_number for one base with multiple offsets.
+bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
+ // The minimal size for profitable chain commoning:
+ // A1 = base + offset1
+ // A2 = base + offset2 (offset2 - offset1 = X)
+ // A3 = base + offset3
+ // A4 = base + offset4 (offset4 - offset3 = X)
+ // ======>
+ // base1 = base + offset1
+ // base2 = base + offset3
+ // A1 = base1
+ // A2 = base1 + X
+ // A3 = base2
+ // A4 = base2 + X
+ //
+ // There is benefit because of reuse of offest 'X'.
+
+ assert(ChainCommonPrepMinThreshold >= 4 &&
+ "Thredhold can not be smaller than 4!\n");
+ if (CBucket.Elements.size() < ChainCommonPrepMinThreshold)
+ return false;
+
+ // We simply select the FirstOffset as the first reusable offset between each
+ // chain element 1 and element 0.
+ const SCEV *FirstOffset = CBucket.Elements[1].Offset;
+
+ // Figure out how many times above FirstOffset is used in the chain.
+ // For a success commoning chain candidate, offset difference between each
+ // chain element 1 and element 0 must be also FirstOffset.
+ unsigned FirstOffsetReusedCount = 1;
+
+ // Figure out how many times above FirstOffset is used in the first chain.
+ // Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain
+ unsigned FirstOffsetReusedCountInFirstChain = 1;
+
+ unsigned EleNum = CBucket.Elements.size();
+ bool SawChainSeparater = false;
+ for (unsigned j = 2; j != EleNum; ++j) {
+ if (SE->getMinusSCEV(CBucket.Elements[j].Offset,
+ CBucket.Elements[j - 1].Offset) == FirstOffset) {
+ if (!SawChainSeparater)
+ FirstOffsetReusedCountInFirstChain++;
+ FirstOffsetReusedCount++;
+ } else
+ // For now, if we meet any offset which is not FirstOffset, we assume we
+ // find a new Chain.
+ // This makes us miss some opportunities.
+ // For example, we can common:
+ //
+ // {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB}
+ //
+ // as two chains:
+ // {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}}
+ // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2
+ //
+ // But we fail to common:
+ //
+ // {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA}
+ // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1
+
+ SawChainSeparater = true;
+ }
+
+ // FirstOffset is not reused, skip this bucket.
+ if (FirstOffsetReusedCount == 1)
+ return false;
+
+ unsigned ChainNum =
+ FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
+
+ // All elements are increased by FirstOffset.
+ // The number of chains should be sqrt(EleNum).
+ if (!SawChainSeparater)
+ ChainNum = (unsigned)sqrt((double)EleNum);
+
+ CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
+
+ // If this is not a perfect chain(eg: not all elements can be put inside
+ // commoning chains.), skip now.
+ if (CBucket.ChainSize * ChainNum != EleNum)
+ return false;
+
+ if (SawChainSeparater) {
+ // Check that the offset seqs are the same for all chains.
+ for (unsigned i = 1; i < CBucket.ChainSize; i++)
+ for (unsigned j = 1; j < ChainNum; j++)
+ if (CBucket.Elements[i].Offset !=
+ SE->getMinusSCEV(CBucket.Elements[i + j * CBucket.ChainSize].Offset,
+ CBucket.Elements[j * CBucket.ChainSize].Offset))
+ return false;
+ }
+
+ for (unsigned i = 0; i < ChainNum; i++)
+ CBucket.ChainBases.push_back(CBucket.Elements[i * CBucket.ChainSize]);
+
+ LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n");
+
+ return true;
+}
+
+bool PPCLoopInstrFormPrep::chainCommoning(Loop *L,
+ SmallVector<Bucket, 16> &Buckets) {
+ bool MadeChange = false;
+
+ if (Buckets.empty())
+ return MadeChange;
+
+ SmallSet<BasicBlock *, 16> BBChanged;
+
+ for (auto &Bucket : Buckets) {
+ if (prepareBasesForCommoningChains(Bucket))
+ MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
+ }
+
+ if (MadeChange)
+ for (auto *BB : BBChanged)
+ DeleteDeadPHIs(BB);
+ return MadeChange;
+}
+
+bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
+ Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) {
+ bool MadeChange = false;
+
+ assert(Bucket.Elements.size() ==
+ Bucket.ChainBases.size() * Bucket.ChainSize &&
+ "invalid bucket for chain commoning!\n");
+ SmallPtrSet<Value *, 16> DeletedPtrs;
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
+ "loopprepare-chaincommon");
+
+ for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
+ unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
+ const SCEV *BaseSCEV =
+ ChainIdx ? SE->getAddExpr(Bucket.BaseSCEV,
+ Bucket.Elements[BaseElemIdx].Offset)
+ : Bucket.BaseSCEV;
+ const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
+
+ // Make sure the base is able to expand.
+ if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
+ return MadeChange;
+
+ assert(BasePtrSCEV->isAffine() &&
+ "Invalid SCEV type for the base ptr for a candidate chain!\n");
+
+ std::pair<Instruction *, Instruction *> Base = rewriteForBase(
+ L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
+ false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
+
+ if (!Base.first || !Base.second)
+ return MadeChange;
+
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert(Base.first);
+
+ for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
+ ++Idx) {
+ BucketElement &I = Bucket.Elements[Idx];
+ Value *Ptr = getPointerOperandAndType(I.Instr);
+ assert(Ptr && "No pointer operand");
+ if (NewPtrs.count(Ptr))
+ continue;
+
+ const SCEV *OffsetSCEV =
+ BaseElemIdx ? SE->getMinusSCEV(Bucket.Elements[Idx].Offset,
+ Bucket.Elements[BaseElemIdx].Offset)
+ : Bucket.Elements[Idx].Offset;
+
+ // Make sure offset is able to expand. Only need to check one time as the
+ // offsets are reused between different chains.
+ if (!BaseElemIdx)
+ if (!isSafeToExpand(OffsetSCEV, *SE))
+ return false;
+
+ Value *OffsetValue = SCEVE.expandCodeFor(
+ OffsetSCEV, OffsetSCEV->getType(), LoopPredecessor->getTerminator());
+
+ Instruction *NewPtr = rewriteForBucketElement(Base, Bucket.Elements[Idx],
+ OffsetValue, DeletedPtrs);
+
+ assert(NewPtr && "Wrong rewrite!\n");
+ NewPtrs.insert(NewPtr);
+ }
+
+ ++ChainCommoningRewritten;
+ }
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted below, causing the AssertingVH in the cache to trigger.
+ SCEVE.clear();
+
+ for (auto *Ptr : DeletedPtrs) {
+ if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+ BBChanged.insert(IDel->getParent());
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+ }
+
+ MadeChange = true;
+ return MadeChange;
+}
+
+// Rewrite the new base according to BasePtrSCEV.
+// bb.loop.preheader:
+// %newstart = ...
+// bb.loop.body:
+// %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ]
+// ...
+// %add = getelementptr %phinode, %inc
+//
+// First returned instruciton is %phinode (or a type cast to %phinode), caller
+// needs this value to rewrite other load/stores in the same chain.
+// Second returned instruction is %add, caller needs this value to rewrite other
+// load/stores in the same chain.
+std::pair<Instruction *, Instruction *>
+PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
+ Instruction *BaseMemI, bool CanPreInc,
+ PrepForm Form, SCEVExpander &SCEVE,
+ SmallPtrSet<Value *, 16> &DeletedPtrs) {
+
+ LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+
+ assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
+
+ Value *BasePtr = getPointerOperandAndType(BaseMemI);
+ assert(BasePtr && "No pointer operand");
+
+ Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());
+ Type *I8PtrTy =
+ Type::getInt8PtrTy(BaseMemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
+
+ bool IsConstantInc = false;
+ const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
+ Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
+
+ const SCEVConstant *BasePtrIncConstantSCEV =
+ dyn_cast<SCEVConstant>(BasePtrIncSCEV);
+ if (BasePtrIncConstantSCEV)
+ IsConstantInc = true;
+
+ // No valid representation for the increment.
+ if (!IncNode) {
+ LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
+ return std::make_pair(nullptr, nullptr);
+ }
+
+ if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Update form prepare for non-const increment is not enabled!\n");
+ return std::make_pair(nullptr, nullptr);
+ }
+
+ const SCEV *BasePtrStartSCEV = nullptr;
+ if (CanPreInc) {
+ assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
+ "Increment is not loop invariant!\n");
+ BasePtrStartSCEV = SE->getMinusSCEV(BasePtrSCEV->getStart(),
+ IsConstantInc ? BasePtrIncConstantSCEV
+ : BasePtrIncSCEV);
+ } else
+ BasePtrStartSCEV = BasePtrSCEV->getStart();
+
+ if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
+ LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
+ return std::make_pair(nullptr, nullptr);
+ }
+
+ LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+ BasicBlock *Header = L->getHeader();
+ unsigned HeaderLoopPredCount = pred_size(Header);
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+ PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+ getInstrName(BaseMemI, PHINodeNameSuffix),
+ Header->getFirstNonPHI());
+
+ Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+ LoopPredecessor->getTerminator());
+
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to add it the right number of times.
+ for (auto PI : predecessors(Header)) {
+ if (PI != LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+ }
+
+ Instruction *PtrInc = nullptr;
+ Instruction *NewBasePtr = nullptr;
+ if (CanPreInc) {
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
+ InsPoint);
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr =
+ new BitCastInst(PtrInc, BasePtr->getType(),
+ getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+ else
+ NewBasePtr = PtrInc;
+ } else {
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to make sure no more incoming value for them in PHI.
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
+
+ // For the latch predecessor, we need to insert a GEP just before the
+ // terminator to increase the address.
+ BasicBlock *BB = PI;
+ Instruction *InsPoint = BB->getTerminator();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
+ InsPoint);
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ PtrInc = NewPHI;
+ if (NewPHI->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(),
+ getInstrName(NewPHI, CastNodeNameSuffix),
+ &*Header->getFirstInsertionPt());
+ else
+ NewBasePtr = NewPHI;
+ }
+
+ BasePtr->replaceAllUsesWith(NewBasePtr);
+
+ DeletedPtrs.insert(BasePtr);
+
+ return std::make_pair(NewBasePtr, PtrInc);
+}
+
+Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
+ std::pair<Instruction *, Instruction *> Base, const BucketElement &Element,
+ Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) {
+ Instruction *NewBasePtr = Base.first;
+ Instruction *PtrInc = Base.second;
+ assert((NewBasePtr && PtrInc) && "base does not exist!\n");
+
+ Type *I8Ty = Type::getInt8Ty(PtrInc->getParent()->getContext());
+
+ Value *Ptr = getPointerOperandAndType(Element.Instr);
+ assert(Ptr && "No pointer operand");
+
+ Instruction *RealNewPtr;
+ if (!Element.Offset ||
+ (isa<SCEVConstant>(Element.Offset) &&
+ cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
+ RealNewPtr = NewBasePtr;
+ } else {
+ Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+ if (PtrIP && isa<Instruction>(NewBasePtr) &&
+ cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+ PtrIP = nullptr;
+ else if (PtrIP && isa<PHINode>(PtrIP))
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
+ else if (!PtrIP)
+ PtrIP = Element.Instr;
+
+ assert(OffToBase && "There should be an offset for non base element!\n");
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, OffToBase,
+ getInstrName(Element.Instr, GEPNodeOffNameSuffix), PtrIP);
+ if (!PtrIP)
+ NewPtr->insertAfter(cast<Instruction>(PtrInc));
+ NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+ RealNewPtr = NewPtr;
+ }
+
+ Instruction *ReplNewPtr;
+ if (Ptr->getType() != RealNewPtr->getType()) {
+ ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+ getInstrName(Ptr, CastNodeNameSuffix));
+ ReplNewPtr->insertAfter(RealNewPtr);
+ } else
+ ReplNewPtr = RealNewPtr;
+
+ Ptr->replaceAllUsesWith(ReplNewPtr);
+ DeletedPtrs.insert(Ptr);
+
+ return ReplNewPtr;
+}
+
+void PPCLoopInstrFormPrep::addOneCandidate(
+ Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets,
+ std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
+ assert((MemI && getPointerOperandAndType(MemI)) &&
"Candidate should be a memory instruction.");
assert(LSCEV && "Invalid SCEV for Ptr value.");
+
bool FoundBucket = false;
for (auto &B : Buckets) {
+ if (cast<SCEVAddRecExpr>(B.BaseSCEV)->getStepRecurrence(*SE) !=
+ cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
+ continue;
const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
- if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
- B.Elements.push_back(BucketElement(CDiff, MemI));
+ if (isValidDiff(Diff)) {
+ B.Elements.push_back(BucketElement(Diff, MemI));
FoundBucket = true;
break;
}
}
if (!FoundBucket) {
- if (Buckets.size() == MaxCandidateNum)
+ if (Buckets.size() == MaxCandidateNum) {
+ LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit "
+ << MaxCandidateNum << "\n");
return;
+ }
Buckets.push_back(Bucket(LSCEV, MemI));
}
}
SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
Loop *L,
- std::function<bool(const Instruction *, const Value *, const Type *)>
+ std::function<bool(const Instruction *, Value *, const Type *)>
isValidCandidate,
- unsigned MaxCandidateNum) {
+ std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
SmallVector<Bucket, 16> Buckets;
+
for (const auto &BB : L->blocks())
for (auto &J : *BB) {
- Value *PtrValue;
- Type *PointerElementType;
-
- if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) {
- PtrValue = LMemI->getPointerOperand();
- PointerElementType = LMemI->getType();
- } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) {
- PtrValue = SMemI->getPointerOperand();
- PointerElementType = SMemI->getValueOperand()->getType();
- } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
- PointerElementType = Type::getInt8Ty(J.getContext());
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
- IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
- PtrValue = IMemI->getArgOperand(0);
- } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
- PtrValue = IMemI->getArgOperand(1);
- } else continue;
- } else continue;
-
- unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
- if (PtrAddrSpace)
+ Value *PtrValue = nullptr;
+ Type *PointerElementType = nullptr;
+ PtrValue = getPointerOperandAndType(&J, &PointerElementType);
+
+ if (!PtrValue)
+ continue;
+
+ if (PtrValue->getType()->getPointerAddressSpace())
continue;
if (L->isLoopInvariant(PtrValue))
@@ -368,14 +884,17 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
if (!LARSCEV || LARSCEV->getLoop() != L)
continue;
+ // Mark that we have candidates for preparing.
+ HasCandidateForPrepare = true;
+
if (isValidCandidate(&J, PtrValue, PointerElementType))
- addOneCandidate(&J, LSCEV, Buckets, MaxCandidateNum);
+ addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
}
return Buckets;
}
bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
- InstrForm Form) {
+ PrepForm Form) {
// RemainderOffsetInfo details:
// key: value of (Offset urem DispConstraint). For DSForm, it can
// be [0, 4).
@@ -388,8 +907,9 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
if (!BucketChain.Elements[j].Offset)
RemainderOffsetInfo[0] = std::make_pair(0, 1);
else {
- unsigned Remainder =
- BucketChain.Elements[j].Offset->getAPInt().urem(Form);
+ unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)
+ ->getAPInt()
+ .urem(Form);
if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
else
@@ -404,13 +924,13 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
// contains following load/stores with different remainders:
// 1: 10 load/store whose remainder is 1;
// 2: 9 load/store whose remainder is 2;
- // 3: 1 for remainder 3 and 0 for remainder 0;
+ // 3: 1 for remainder 3 and 0 for remainder 0;
// Now we will choose the first load/store whose remainder is 1 as base and
// adjust all other load/stores according to new base, so we will get 10 DS
// form and 10 X form.
// But we should be more clever, for this case we could use two bases, one for
- // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
- // X form.
+ // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
+ // 1 X form.
unsigned MaxCountRemainder = 0;
for (unsigned j = 0; j < (unsigned)Form; j++)
if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
@@ -471,7 +991,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
// If our chosen element has no offset from the base pointer, there's
// nothing to do.
if (!BucketChain.Elements[j].Offset ||
- BucketChain.Elements[j].Offset->isZero())
+ cast<SCEVConstant>(BucketChain.Elements[j].Offset)->isZero())
break;
const SCEV *Offset = BucketChain.Elements[j].Offset;
@@ -489,191 +1009,76 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
return true;
}
-bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
- SmallSet<BasicBlock *, 16> &BBChanged,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::rewriteLoadStores(
+ Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
+ PrepForm Form) {
bool MadeChange = false;
+
const SCEVAddRecExpr *BasePtrSCEV =
cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
if (!BasePtrSCEV->isAffine())
return MadeChange;
- LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
-
- assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
-
- // The instruction corresponding to the Bucket's BaseSCEV must be the first
- // in the vector of elements.
- Instruction *MemI = BucketChain.Elements.begin()->Instr;
- Value *BasePtr = GetPointerOperand(MemI);
- assert(BasePtr && "No pointer operand");
-
- Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
- Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
- BasePtr->getType()->getPointerAddressSpace());
-
- if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
+ if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
return MadeChange;
- const SCEVConstant *BasePtrIncSCEV =
- dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
- if (!BasePtrIncSCEV)
- return MadeChange;
+ SmallPtrSet<Value *, 16> DeletedPtrs;
+
+ BasicBlock *Header = L->getHeader();
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
+ "loopprepare-formrewrite");
// For some DS form load/store instructions, it can also be an update form,
- // if the stride is a multipler of 4. Use update form if prefer it.
+ // if the stride is constant and is a multipler of 4. Use update form if
+ // prefer it.
bool CanPreInc = (Form == UpdateForm ||
- ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
+ ((Form == DSForm) &&
+ isa<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)) &&
+ !cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE))
+ ->getAPInt()
+ .urem(4) &&
PreferUpdateForm));
- const SCEV *BasePtrStartSCEV = nullptr;
- if (CanPreInc)
- BasePtrStartSCEV =
- SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
- else
- BasePtrStartSCEV = BasePtrSCEV->getStart();
- if (!isSafeToExpand(BasePtrStartSCEV, *SE))
- return MadeChange;
+ std::pair<Instruction *, Instruction *> Base =
+ rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
+ CanPreInc, Form, SCEVE, DeletedPtrs);
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
+ if (!Base.first || !Base.second)
return MadeChange;
- LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
-
- BasicBlock *Header = L->getHeader();
- unsigned HeaderLoopPredCount = pred_size(Header);
- BasicBlock *LoopPredecessor = L->getLoopPredecessor();
-
- PHINode *NewPHI =
- PHINode::Create(I8PtrTy, HeaderLoopPredCount,
- getInstrName(MemI, PHINodeNameSuffix),
- Header->getFirstNonPHI());
-
- SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
- Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
- LoopPredecessor->getTerminator());
-
- // Note that LoopPredecessor might occur in the predecessor list multiple
- // times, and we need to add it the right number of times.
- for (auto PI : predecessors(Header)) {
- if (PI != LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
- }
-
- Instruction *PtrInc = nullptr;
- Instruction *NewBasePtr = nullptr;
- if (CanPreInc) {
- Instruction *InsPoint = &*Header->getFirstInsertionPt();
- PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
- cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
- for (auto PI : predecessors(Header)) {
- if (PI == LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(PtrInc, PI);
- }
- if (PtrInc->getType() != BasePtr->getType())
- NewBasePtr = new BitCastInst(
- PtrInc, BasePtr->getType(),
- getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
- else
- NewBasePtr = PtrInc;
- } else {
- // Note that LoopPredecessor might occur in the predecessor list multiple
- // times, and we need to make sure no more incoming value for them in PHI.
- for (auto PI : predecessors(Header)) {
- if (PI == LoopPredecessor)
- continue;
-
- // For the latch predecessor, we need to insert a GEP just before the
- // terminator to increase the address.
- BasicBlock *BB = PI;
- Instruction *InsPoint = BB->getTerminator();
- PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
-
- cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
-
- NewPHI->addIncoming(PtrInc, PI);
- }
- PtrInc = NewPHI;
- if (NewPHI->getType() != BasePtr->getType())
- NewBasePtr =
- new BitCastInst(NewPHI, BasePtr->getType(),
- getInstrName(NewPHI, CastNodeNameSuffix),
- &*Header->getFirstInsertionPt());
- else
- NewBasePtr = NewPHI;
- }
-
- // Clear the rewriter cache, because values that are in the rewriter's cache
- // can be deleted below, causing the AssertingVH in the cache to trigger.
- SCEVE.clear();
-
- if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
- BBChanged.insert(IDel->getParent());
- BasePtr->replaceAllUsesWith(NewBasePtr);
- RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
-
// Keep track of the replacement pointer values we've inserted so that we
// don't generate more pointer values than necessary.
SmallPtrSet<Value *, 16> NewPtrs;
- NewPtrs.insert(NewBasePtr);
+ NewPtrs.insert(Base.first);
for (auto I = std::next(BucketChain.Elements.begin()),
IE = BucketChain.Elements.end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->Instr);
+ Value *Ptr = getPointerOperandAndType(I->Instr);
assert(Ptr && "No pointer operand");
if (NewPtrs.count(Ptr))
continue;
- Instruction *RealNewPtr;
- if (!I->Offset || I->Offset->getValue()->isZero()) {
- RealNewPtr = NewBasePtr;
- } else {
- Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
- if (PtrIP && isa<Instruction>(NewBasePtr) &&
- cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
- PtrIP = nullptr;
- else if (PtrIP && isa<PHINode>(PtrIP))
- PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
- else if (!PtrIP)
- PtrIP = I->Instr;
-
- GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, I->Offset->getValue(),
- getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP);
- if (!PtrIP)
- NewPtr->insertAfter(cast<Instruction>(PtrInc));
- NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
- RealNewPtr = NewPtr;
- }
+ Instruction *NewPtr = rewriteForBucketElement(
+ Base, *I,
+ I->Offset ? cast<SCEVConstant>(I->Offset)->getValue() : nullptr,
+ DeletedPtrs);
+ assert(NewPtr && "wrong rewrite!\n");
+ NewPtrs.insert(NewPtr);
+ }
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted below, causing the AssertingVH in the cache to trigger.
+ SCEVE.clear();
+ for (auto *Ptr : DeletedPtrs) {
if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
BBChanged.insert(IDel->getParent());
-
- Instruction *ReplNewPtr;
- if (Ptr->getType() != RealNewPtr->getType()) {
- ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
- getInstrName(Ptr, CastNodeNameSuffix));
- ReplNewPtr->insertAfter(RealNewPtr);
- } else
- ReplNewPtr = RealNewPtr;
-
- Ptr->replaceAllUsesWith(ReplNewPtr);
RecursivelyDeleteTriviallyDeadInstructions(Ptr);
-
- NewPtrs.insert(RealNewPtr);
}
MadeChange = true;
- SuccPrepCount++;
+ SuccPrepCount++;
if (Form == DSForm && !CanPreInc)
DSFormChainRewritten++;
@@ -698,14 +1103,14 @@ bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
if (MadeChange)
- for (auto &BB : L->blocks())
- if (BBChanged.count(BB))
- DeleteDeadPHIs(BB);
+ for (auto *BB : BBChanged)
+ DeleteDeadPHIs(BB);
return MadeChange;
}
-bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
+ SmallVector<Bucket, 16> &Buckets,
+ PrepForm Form) {
bool MadeChange = false;
if (Buckets.empty())
@@ -720,20 +1125,95 @@ bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Bucket
}
if (MadeChange)
- for (auto &BB : L->blocks())
- if (BBChanged.count(BB))
- DeleteDeadPHIs(BB);
+ for (auto *BB : BBChanged)
+ DeleteDeadPHIs(BB);
return MadeChange;
}
+// Find the loop invariant increment node for SCEV BasePtrIncSCEV.
+// bb.loop.preheader:
+// %start = ...
+// bb.loop.body:
+// %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]
+// ...
+// %add = add %phinode, %inc ; %inc is what we want to get.
+//
+Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrIncSCEV) {
+ // If the increment is a constant, no definition is needed.
+ // Return the value directly.
+ if (isa<SCEVConstant>(BasePtrIncSCEV))
+ return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
+
+ if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
+ return nullptr;
+
+ BasicBlock *BB = MemI->getParent();
+ if (!BB)
+ return nullptr;
+
+ BasicBlock *LatchBB = L->getLoopLatch();
+
+ if (!LatchBB)
+ return nullptr;
+
+ // Run through the PHIs and check their operands to find valid representation
+ // for the increment SCEV.
+ iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
+ for (auto &CurrentPHI : PHIIter) {
+ PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
+ if (!CurrentPHINode)
+ continue;
+
+ if (!SE->isSCEVable(CurrentPHINode->getType()))
+ continue;
+
+ const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
+
+ const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
+ if (!PHIBasePtrSCEV)
+ continue;
+
+ const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
+
+ if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
+ continue;
+
+ // Get the incoming value from the loop latch and check if the value has
+ // the add form with the required increment.
+ if (Instruction *I = dyn_cast<Instruction>(
+ CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
+ Value *StrippedBaseI = I;
+ while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
+ StrippedBaseI = BC->getOperand(0);
+
+ Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
+ if (!StrippedI)
+ continue;
+
+ // LSR pass may add a getelementptr instruction to do the loop increment,
+ // also search in that getelementptr instruction.
+ if (StrippedI->getOpcode() == Instruction::Add ||
+ (StrippedI->getOpcode() == Instruction::GetElementPtr &&
+ StrippedI->getNumOperands() == 2)) {
+ if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
+ return StrippedI->getOperand(0);
+ if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
+ return StrippedI->getOperand(1);
+ }
+ }
+ }
+ return nullptr;
+}
+
// In order to prepare for the preferred instruction form, a PHI is added.
// This function will check to see if that PHI already exists and will return
// true if it found an existing PHI with the matched start and increment as the
// one we wanted to create.
-bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
- const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrStartSCEV,
+ const SCEV *BasePtrIncSCEV,
+ PrepForm Form) {
BasicBlock *BB = MemI->getParent();
if (!BB)
return false;
@@ -773,11 +1253,11 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
// as the PHI that we wanted to create.
- if (Form == UpdateForm &&
+ if ((Form == UpdateForm || Form == ChainCommoning ) &&
PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
++PHINodeAlreadyExistsUpdate;
return true;
- }
+ }
if (Form == DSForm || Form == DQForm) {
const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
@@ -788,7 +1268,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
++PHINodeAlreadyExistsDQ;
return true;
}
- }
+ }
}
}
}
@@ -825,7 +1305,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
}
// Check if a load/store has update form. This lambda is used by function
// collectCandidates which can collect candidates for types defined by lambda.
- auto isUpdateFormCandidate = [&](const Instruction *I, const Value *PtrValue,
+ auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue,
const Type *PointerElementType) {
assert((PtrValue && I) && "Invalid parameter!");
// There are no update forms for Altivec vector load/stores.
@@ -857,7 +1337,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
};
// Check if a load/store has DS form.
- auto isDSFormCandidate = [](const Instruction *I, const Value *PtrValue,
+ auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue,
const Type *PointerElementType) {
assert((PtrValue && I) && "Invalid parameter!");
if (isa<IntrinsicInst>(I))
@@ -871,7 +1351,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
};
// Check if a load/store has DQ form.
- auto isDQFormCandidate = [&](const Instruction *I, const Value *PtrValue,
+ auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue,
const Type *PointerElementType) {
assert((PtrValue && I) && "Invalid parameter!");
// Check if it is a P10 lxvp/stxvp intrinsic.
@@ -883,31 +1363,131 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy());
};
- // intrinsic for update form.
- SmallVector<Bucket, 16> UpdateFormBuckets =
- collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm);
+ // Check if a load/store is candidate for chain commoning.
+ // If the SCEV is only with one ptr operand in its start, we can use that
+ // start as a chain separator. Mark this load/store as a candidate.
+ auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue,
+ const Type *PointerElementType) {
+ const SCEVAddRecExpr *ARSCEV =
+ cast<SCEVAddRecExpr>(SE->getSCEVAtScope(PtrValue, L));
+ if (!ARSCEV)
+ return false;
+
+ if (!ARSCEV->isAffine())
+ return false;
+
+ const SCEV *Start = ARSCEV->getStart();
+
+ // A single pointer. We can treat it as offset 0.
+ if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
+ return true;
+
+ const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
+
+ // We need a SCEVAddExpr to include both base and offset.
+ if (!ASCEV)
+ return false;
+
+ // Make sure there is only one pointer operand(base) and all other operands
+ // are integer type.
+ bool SawPointer = false;
+ for (const SCEV *Op : ASCEV->operands()) {
+ if (Op->getType()->isPointerTy()) {
+ if (SawPointer)
+ return false;
+ SawPointer = true;
+ } else if (!Op->getType()->isIntegerTy())
+ return false;
+ }
+
+ return SawPointer;
+ };
+
+ // Check if the diff is a constant type. This is used for update/DS/DQ form
+ // preparation.
+ auto isValidConstantDiff = [](const SCEV *Diff) {
+ return dyn_cast<SCEVConstant>(Diff) != nullptr;
+ };
+
+ // Make sure the diff between the base and new candidate is required type.
+ // This is used for chain commoning preparation.
+ auto isValidChainCommoningDiff = [](const SCEV *Diff) {
+ assert(Diff && "Invalid Diff!\n");
+
+ // Don't mess up previous dform prepare.
+ if (isa<SCEVConstant>(Diff))
+ return false;
+
+ // A single integer type offset.
+ if (isa<SCEVUnknown>(Diff) && Diff->getType()->isIntegerTy())
+ return true;
+
+ const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
+ if (!ADiff)
+ return false;
+
+ for (const SCEV *Op : ADiff->operands())
+ if (!Op->getType()->isIntegerTy())
+ return false;
+
+ return true;
+ };
+
+ HasCandidateForPrepare = false;
+
+ LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n");
+ // Collect buckets of comparable addresses used by loads and stores for update
+ // form.
+ SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates(
+ L, isUpdateFormCandidate, isValidConstantDiff, MaxVarsUpdateForm);
// Prepare for update form.
if (!UpdateFormBuckets.empty())
MadeChange |= updateFormPrep(L, UpdateFormBuckets);
+ else if (!HasCandidateForPrepare) {
+ LLVM_DEBUG(
+ dbgs()
+ << "No prepare candidates found, stop praparation for current loop!\n");
+ // If no candidate for preparing, return early.
+ return MadeChange;
+ }
+ LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n");
// Collect buckets of comparable addresses used by loads and stores for DS
// form.
- SmallVector<Bucket, 16> DSFormBuckets =
- collectCandidates(L, isDSFormCandidate, MaxVarsDSForm);
+ SmallVector<Bucket, 16> DSFormBuckets = collectCandidates(
+ L, isDSFormCandidate, isValidConstantDiff, MaxVarsDSForm);
// Prepare for DS form.
if (!DSFormBuckets.empty())
MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
+ LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n");
// Collect buckets of comparable addresses used by loads and stores for DQ
// form.
- SmallVector<Bucket, 16> DQFormBuckets =
- collectCandidates(L, isDQFormCandidate, MaxVarsDQForm);
+ SmallVector<Bucket, 16> DQFormBuckets = collectCandidates(
+ L, isDQFormCandidate, isValidConstantDiff, MaxVarsDQForm);
// Prepare for DQ form.
if (!DQFormBuckets.empty())
MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
+ // Collect buckets of comparable addresses used by loads and stores for chain
+ // commoning. With chain commoning, we reuse offsets between the chains, so
+ // the register pressure will be reduced.
+ if (!EnableChainCommoning) {
+ LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n");
+ return MadeChange;
+ }
+
+ LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n");
+ SmallVector<Bucket, 16> Buckets =
+ collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
+ MaxVarsChainCommon);
+
+ // Prepare for chain commoning.
+ if (!Buckets.empty())
+ MadeChange |= chainCommoning(L, Buckets);
+
return MadeChange;
}
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 4bbb6ed85a6c..d12a9b806fd0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -79,6 +79,11 @@ static cl::opt<bool>
cl::desc("enable elimination of zero-extensions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ EnableTrapOptimization("ppc-opt-conditional-trap",
+ cl::desc("enable optimization of conditional traps"),
+ cl::init(false), cl::Hidden);
+
namespace {
struct PPCMIPeephole : public MachineFunctionPass {
@@ -322,8 +327,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
SmallVectorImpl<MachineInstr *> &PHIs,
Register Dst) {
DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
- for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
- MachineInstr *PHI = *It;
+ for (MachineInstr *PHI : llvm::reverse(PHIs)) {
SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
// We check if the current PHI node can be changed by looking at its
// operands. If all the operands are either copies from primed
@@ -379,6 +383,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
+ bool TrapOpt = false;
MachineInstr* ToErase = nullptr;
std::map<MachineInstr *, bool> TOCSaves;
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
@@ -420,6 +425,13 @@ bool PPCMIPeephole::simplifyCode(void) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
+ // If a conditional trap instruction got optimized to an
+ // unconditional trap, eliminate all the instructions after
+ // the trap.
+ if (EnableTrapOptimization && TrapOpt) {
+ ToErase = &MI;
+ continue;
+ }
// Ignore debug instructions.
if (MI.isDebugInstr())
@@ -603,14 +615,24 @@ bool PPCMIPeephole::simplifyCode(void) {
ToErase = &MI;
Simplified = true;
}
- } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+ } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
+ DefOpc == PPC::XXPERMDIs &&
(DefMI->getOperand(2).getImm() == 0 ||
DefMI->getOperand(2).getImm() == 3)) {
+ ToErase = &MI;
+ Simplified = true;
+ // Swap of a splat, convert to copy.
+ if (Immed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ break;
+ }
// Splat fed by another splat - switch the output of the first
// and remove the second.
DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
- ToErase = &MI;
- Simplified = true;
LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
LLVM_DEBUG(MI.dump());
}
@@ -997,6 +1019,51 @@ bool PPCMIPeephole::simplifyCode(void) {
++NumRotatesCollapsed;
break;
}
+ // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
+ // always trap, we will delete the node if it will never trap.
+ case PPC::TDI:
+ case PPC::TWI:
+ case PPC::TD:
+ case PPC::TW: {
+ if (!EnableTrapOptimization) break;
+ MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
+ MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
+ bool IsOperand2Immediate = MI.getOperand(2).isImm();
+ // We can only do the optimization if we can get immediates
+ // from both operands
+ if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
+ LiMI1->getOpcode() == PPC::LI8)))
+ break;
+ if (!IsOperand2Immediate &&
+ !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
+ LiMI2->getOpcode() == PPC::LI8)))
+ break;
+
+ auto ImmOperand0 = MI.getOperand(0).getImm();
+ auto ImmOperand1 = LiMI1->getOperand(1).getImm();
+ auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
+ : LiMI2->getOperand(1).getImm();
+
+ // We will replace the MI with an unconditional trap if it will always
+ // trap.
+ if ((ImmOperand0 == 31) ||
+ ((ImmOperand0 & 0x10) &&
+ ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x8) &&
+ ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x2) &&
+ ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x1) &&
+ ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
+ TrapOpt = true;
+ }
+ // We will delete the MI if it will never trap.
+ ToErase = &MI;
+ Simplified = true;
+ break;
+ }
}
}
@@ -1006,6 +1073,9 @@ bool PPCMIPeephole::simplifyCode(void) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
+ // Reset TrapOpt to false at the end of the basic block.
+ if (EnableTrapOptimization)
+ TrapOpt = false;
}
// Eliminate all the TOC save instructions which are redundant.
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index d12c6d9cd406..bdff5109c1e1 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -75,6 +75,19 @@ static bool matchingRegOps(const MachineInstr &FirstMI,
return Op1.getReg() == Op2.getReg();
}
+static bool matchingImmOps(const MachineInstr &MI,
+ int MIOpIndex,
+ int64_t Expect,
+ unsigned ExtendFrom = 64) {
+ const MachineOperand &Op = MI.getOperand(MIOpIndex);
+ if (!Op.isImm())
+ return false;
+ int64_t Imm = Op.getImm();
+ if (ExtendFrom < 64)
+ Imm = SignExtend64(Imm, ExtendFrom);
+ return Imm == Expect;
+}
+
// Return true if the FirstMI meets the constraints of SecondMI according to
// fusion specification.
static bool checkOpConstraints(FusionFeature::FusionKind Kd,
@@ -116,7 +129,7 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0))
return false;
- // If si = 1111111111110000 and the msb of the d/ds field of the load equals
+ // If si = 1111111111110000 and the msb of the d/ds field of the load equals
// 1, then fusion does not occur.
if ((Imm & 0xFFF0) == 0xFFF0) {
const MachineOperand &D = SecondMI.getOperand(1);
@@ -132,6 +145,10 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
}
return true;
}
+
+ case FusionFeature::FK_SldiAdd:
+ return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
+ (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
}
llvm_unreachable("All the cases should have been handled");
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index c7e4e7c22e0a..469a24800423 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -41,5 +41,42 @@ FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
+// Power10 User Manual Section 19.1.5.4, Fusion
+// {add, mulld} - add
+FUSION_FEATURE(ArithAdd, hasArithAddFusion, -1,
+ FUSION_OP_SET(ADD4, ADD8, MULLD), FUSION_OP_SET(ADD4, ADD8))
+
+// {add, subf} - {and, nand, nor, or}
+FUSION_FEATURE(ArithLogical, hasAddLogicalFusion, -1,
+ FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8),
+ FUSION_OP_SET(AND, AND8, OR, OR8, NAND, NAND8, NOR, NOR8))
+
+// {and, andc, eqv, nand, nor, or, orc, xor} - {add, subf}
+FUSION_FEATURE(LogicalArith, hasLogicalAddFusion, -1,
+ FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8,
+ ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8),
+ FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+
+// Either of {and, andc, eqv, nand, nor, or, orc, xor}
+FUSION_FEATURE(Logical, hasLogicalFusion, -1,
+ FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8,
+ ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8),
+ FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8,
+ ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8))
+
+// vaddudm - vaddudm
+FUSION_FEATURE(VecAdd, hasArithAddFusion, -1, FUSION_OP_SET(VADDUDM),
+ FUSION_OP_SET(VADDUDM))
+
+// Either of {vand, vandc, veqv, vnand, vnor, vor, vorc, vxor}
+FUSION_FEATURE(VecLogical, hasLogicalFusion, -1,
+ FUSION_OP_SET(VAND, VANDC, VEQV, VNAND, VNOR, VOR, VORC, VXOR),
+ FUSION_OP_SET(VAND, VANDC, VEQV, VNAND, VNOR, VOR, VORC, VXOR))
+
+// sldi rx, ra, {3, 6} - {add, subf}
+// sldi rx, ra n is alias of rldicr rx, ra, n, 63-n
+FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
+ FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4f16c7f5ff17..4bccc5596d2b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -135,6 +135,23 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
ImmToIdxMap[PPC::SPELWZ] = PPC::SPELWZX;
// Power10
+ ImmToIdxMap[PPC::PLBZ] = PPC::LBZX; ImmToIdxMap[PPC::PLBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::PLHZ] = PPC::LHZX; ImmToIdxMap[PPC::PLHZ8] = PPC::LHZX8;
+ ImmToIdxMap[PPC::PLHA] = PPC::LHAX; ImmToIdxMap[PPC::PLHA8] = PPC::LHAX8;
+ ImmToIdxMap[PPC::PLWZ] = PPC::LWZX; ImmToIdxMap[PPC::PLWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::PLWA] = PPC::LWAX; ImmToIdxMap[PPC::PLWA8] = PPC::LWAX;
+ ImmToIdxMap[PPC::PLD] = PPC::LDX; ImmToIdxMap[PPC::PSTD] = PPC::STDX;
+
+ ImmToIdxMap[PPC::PSTB] = PPC::STBX; ImmToIdxMap[PPC::PSTB8] = PPC::STBX8;
+ ImmToIdxMap[PPC::PSTH] = PPC::STHX; ImmToIdxMap[PPC::PSTH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::PSTW] = PPC::STWX; ImmToIdxMap[PPC::PSTW8] = PPC::STWX8;
+
+ ImmToIdxMap[PPC::PLFS] = PPC::LFSX; ImmToIdxMap[PPC::PSTFS] = PPC::STFSX;
+ ImmToIdxMap[PPC::PLFD] = PPC::LFDX; ImmToIdxMap[PPC::PSTFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::PLXSSP] = PPC::LXSSPX; ImmToIdxMap[PPC::PSTXSSP] = PPC::STXSSPX;
+ ImmToIdxMap[PPC::PLXSD] = PPC::LXSDX; ImmToIdxMap[PPC::PSTXSD] = PPC::STXSDX;
+ ImmToIdxMap[PPC::PLXV] = PPC::LXVX; ImmToIdxMap[PPC::PSTXV] = PPC::STXVX;
+
ImmToIdxMap[PPC::LXVP] = PPC::LXVPX;
ImmToIdxMap[PPC::STXVP] = PPC::STXVPX;
ImmToIdxMap[PPC::PLXVP] = PPC::LXVPX;
@@ -506,7 +523,9 @@ bool PPCRegisterInfo::getRegAllocationHints(Register VirtReg,
VRM->hasPhys(ResultReg)) {
Register UACCPhys = VRM->getPhys(ResultReg);
Register HintReg = getSubReg(UACCPhys, ResultOp->getSubReg());
- Hints.push_back(HintReg);
+ // Ensure that the hint is a VSRp register.
+ if (HintReg >= PPC::VSRp0 && HintReg <= PPC::VSRp31)
+ Hints.push_back(HintReg);
}
break;
}
@@ -1345,7 +1364,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Get the frame info.
MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -1457,7 +1476,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
isUInt<8>(Offset) :
isInt<16>(Offset);
- if (OpC == PPC::PLXVP || OpC == PPC::PSTXVP)
+ if (TII.isPrefixed(MI.getOpcode()))
OffsetFitsMnemonic = isInt<34>(Offset);
if (!noImmForm && ((OffsetFitsMnemonic &&
((Offset % offsetMinAlign(MI)) == 0)) ||
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index c22a5826337b..2e534dd1bcd5 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -147,8 +147,6 @@ public:
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
- bool addAllocPriorityToGlobalRanges() const override { return true; }
-
// Support for virtual base registers.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
diff --git a/llvm/lib/Target/PowerPC/PPCSchedPredicates.td b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
new file mode 100644
index 000000000000..18f325e99a60
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
@@ -0,0 +1,294 @@
+//===--- PPCSchedPredicates.td - PowerPC Scheduling Preds -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines scheduling predicate definitions that are used by the
+// PowerPC subtargets.
+//===----------------------------------------------------------------------===//
+// Identify instructions that write BF pipelines with 7 cycles.
+def P10W_BF_7C_Pred : MCSchedPredicate<
+ CheckOpcode<[FADD,
+ FADDS,
+ FADDS_rec,
+ FADD_rec,
+ FCFID,
+ FCFIDS,
+ FCFIDS_rec,
+ FCFIDU,
+ FCFIDUS,
+ FCFIDUS_rec,
+ FCFIDU_rec,
+ FCFID_rec,
+ FCTID,
+ FCTIDU,
+ FCTIDUZ,
+ FCTIDUZ_rec,
+ FCTIDU_rec,
+ FCTIDZ,
+ FCTIDZ_rec,
+ FCTID_rec,
+ FCTIW,
+ FCTIWU,
+ FCTIWUZ,
+ FCTIWUZ_rec,
+ FCTIWU_rec,
+ FCTIWZ,
+ FCTIWZ_rec,
+ FCTIW_rec,
+ FMADD,
+ FMADDS,
+ FMADDS_rec,
+ FMADD_rec,
+ FMSUB,
+ FMSUBS,
+ FMSUBS_rec,
+ FMSUB_rec,
+ FMUL,
+ FMULS,
+ FMULS_rec,
+ FMUL_rec,
+ FNMADD,
+ FNMADDS,
+ FNMADDS_rec,
+ FNMADD_rec,
+ FNMSUB,
+ FNMSUBS,
+ FNMSUBS_rec,
+ FNMSUB_rec,
+ FRE,
+ FRES,
+ FRES_rec,
+ FRE_rec,
+ FRIMD, FRIMS,
+ FRIMD_rec, FRIMS_rec,
+ FRIND, FRINS,
+ FRIND_rec, FRINS_rec,
+ FRIPD, FRIPS,
+ FRIPD_rec, FRIPS_rec,
+ FRIZD, FRIZS,
+ FRIZD_rec, FRIZS_rec,
+ FRSP,
+ FRSP_rec,
+ FRSQRTE,
+ FRSQRTES,
+ FRSQRTES_rec,
+ FRSQRTE_rec,
+ FSELD, FSELS,
+ FSELD_rec, FSELS_rec,
+ FSUB,
+ FSUBS,
+ FSUBS_rec,
+ FSUB_rec,
+ VADDFP,
+ VCFSX, VCFSX_0,
+ VCFUX, VCFUX_0,
+ VCTSXS, VCTSXS_0,
+ VCTUXS, VCTUXS_0,
+ VEXPTEFP,
+ VEXPTEFP,
+ VLOGEFP,
+ VMADDFP,
+ VNMSUBFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ VSUBFP,
+ XSADDDP,
+ XSADDSP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSPN,
+ XSCVDPSXDS, XSCVDPSXDSs,
+ XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs,
+ XSCVDPUXWS, XSCVDPUXWSs,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSMULDP,
+ XSMULSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XSSUBDP,
+ XSSUBSP,
+ XVADDDP,
+ XVADDSP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVSPBF16,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVMULDP,
+ XVMULSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP,
+ XVSUBDP,
+ XVSUBSP]>
+>;
+
+// Identify instructions that write CY pipelines with 7 cycles.
+def P10W_CY_7C_Pred : MCSchedPredicate<
+ CheckOpcode<[CFUGED,
+ CNTLZDM,
+ CNTTZDM,
+ PDEPD,
+ PEXTD,
+ VCFUGED,
+ VCIPHER,
+ VCIPHERLAST,
+ VCLZDM,
+ VCTZDM,
+ VGNB,
+ VNCIPHER,
+ VNCIPHERLAST,
+ VPDEPD,
+ VPEXTD,
+ VPMSUMB,
+ VPMSUMD,
+ VPMSUMH,
+ VPMSUMW,
+ VSBOX]>
+>;
+
+// Identify instructions that write MM pipelines with 10 cycles.
+def P10W_MM_10C_Pred : MCSchedPredicate<
+ CheckOpcode<[PMXVBF16GER2,
+ PMXVBF16GER2NN,
+ PMXVBF16GER2NP,
+ PMXVBF16GER2PN,
+ PMXVBF16GER2PP,
+ PMXVF16GER2,
+ PMXVF16GER2NN,
+ PMXVF16GER2NP,
+ PMXVF16GER2PN,
+ PMXVF16GER2PP,
+ PMXVF32GER,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GER,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
+ PMXVI16GER2,
+ PMXVI16GER2PP,
+ PMXVI16GER2S,
+ PMXVI16GER2SPP,
+ PMXVI4GER8,
+ PMXVI4GER8PP,
+ PMXVI8GER4,
+ PMXVI8GER4PP,
+ PMXVI8GER4SPP,
+ XVBF16GER2,
+ XVBF16GER2NN,
+ XVBF16GER2NP,
+ XVBF16GER2PN,
+ XVBF16GER2PP,
+ XVF16GER2,
+ XVF16GER2NN,
+ XVF16GER2NP,
+ XVF16GER2PN,
+ XVF16GER2PP,
+ XVF32GER,
+ XVF32GERNN,
+ XVF32GERNP,
+ XVF32GERPN,
+ XVF32GERPP,
+ XVF64GER,
+ XVF64GERNN,
+ XVF64GERNP,
+ XVF64GERPN,
+ XVF64GERPP,
+ XVI16GER2,
+ XVI16GER2PP,
+ XVI16GER2S,
+ XVI16GER2SPP,
+ XVI4GER8,
+ XVI4GER8PP,
+ XVI8GER4,
+ XVI8GER4PP,
+ XVI8GER4SPP,
+ XXMFACC,
+ XXMFACC,
+ XXMTACC,
+ XXSETACCZ]>
+>;
diff --git a/llvm/lib/Target/PowerPC/PPCSchedule.td b/llvm/lib/Target/PowerPC/PPCSchedule.td
index e378d57d325e..f65dbae16d3a 100644
--- a/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -128,7 +128,9 @@ def IIC_SprMTPMR : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
+include "PPCInstrInfo.td"
+include "PPCSchedPredicates.td"
include "PPCScheduleG3.td"
include "PPCSchedule440.td"
include "PPCScheduleG4.td"
@@ -137,6 +139,7 @@ include "PPCScheduleG5.td"
include "PPCScheduleP7.td"
include "PPCScheduleP8.td"
include "PPCScheduleP9.td"
+include "PPCScheduleP10.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500.td"
include "PPCScheduleE500mc.td"
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
new file mode 100644
index 000000000000..bf56491f373a
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -0,0 +1,416 @@
+//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the resources required by P10 instructions.
+//===----------------------------------------------------------------------===//
+// Modeling pipeline forwarding logic.
+def P10BR_Read : SchedRead;
+def P10DF_Read : SchedRead;
+def P10DV_Read : SchedRead;
+def P10DX_Read : SchedRead;
+def P10F2_Read : SchedRead;
+def P10FX_Read : SchedRead;
+def P10LD_Read : SchedRead;
+def P10MU_Read : SchedRead;
+def P10PM_Read : SchedRead;
+def P10ST_Read : SchedRead;
+def P10SX_Read : SchedRead;
+def P10vMU_Read : SchedRead;
+
+def P10Model : SchedMachineModel {
+ let IssueWidth = 8;
+
+ // TODO - Need to be updated according to P10 UM.
+ let MicroOpBufferSize = 44;
+
+ // TODO - tune this on real HW once it arrives. For now, we will use the same
+ // value as we do on P9.
+ let LoopMicroOpBufferSize = 60;
+
+ let CompleteModel = 1;
+
+ // Do not support SPE (Signal Procesing Engine) on Power 10.
+ let UnsupportedFeatures = [HasSPE, IsE500, IsBookE];
+}
+
+let SchedModel = P10Model in {
+
+ // ***************** Processor Resources *****************
+
+ // Pipeline Groups
+
+ def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
+ def P10_BR : ProcResource<2>; // Two Branch pipelines.
+ def P10_CY : ProcResource<4>; // Four Crypto pipelines.
+ def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
+ def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
+ def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
+ def P10_FX : ProcResource<4>; // Four ALU pipelines.
+ def P10_LD : ProcResource<2>; // Two Load pipelines.
+ def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
+ def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
+ def P10_ST : ProcResource<2>; // Two ST-D pipelines.
+ def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
+
+ // Dispatch Groups
+
+ // Dispatch to any slots
+ def P10_ANY_SLOT : ProcResource<8>;
+
+ let Super = P10_ANY_SLOT in {
+
+ // Dispatch to even slots
+ def P10_EVEN_SLOT : ProcResource<4>;
+
+ // Dispatch to odd slots
+ def P10_ODD_SLOT : ProcResource<4>;
+ }
+
+ // Dispatch Rules
+ let NumMicroOps = 0, Latency = 1 in {
+ // Dispatch Rule '-'
+ def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
+
+ // Dispatch Rule '-', even slot
+ def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
+
+ // Dispatch Rule 'P'
+ def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
+ }
+
+ // ***************** SchedWriteRes Definitions *****************
+
+ // A BF pipeline may take from 7 to 36 cycles to complete.
+ // Some BF operations may keep the pipeline busy for up to 10 cycles.
+ def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
+ let Latency = 7;
+ }
+
+ def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 5 ];
+ let Latency = 22;
+ }
+
+ def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 8 ];
+ let Latency = 24;
+ }
+
+ def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 5 ];
+ let Latency = 26;
+ }
+
+ def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 7 ];
+ let Latency = 27;
+ }
+
+ def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 36;
+ }
+
+ // A BR pipeline may take 2 cycles to complete.
+ def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
+ let Latency = 2;
+ }
+
+ // A CY pipeline may take 7 cycles to complete.
+ def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
+ let Latency = 7;
+ }
+
+ // A DF pipeline may take from 13 to 174 cycles to complete.
+ // Some DF operations may keep the pipeline busy for up to 67 cycles.
+ def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
+ let Latency = 13;
+ }
+
+ def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 16 ];
+ let Latency = 24;
+ }
+
+ def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 17 ];
+ let Latency = 25;
+ }
+
+ def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 18 ];
+ let Latency = 26;
+ }
+
+ def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 22 ];
+ let Latency = 32;
+ }
+
+ def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 25 ];
+ let Latency = 33;
+ }
+
+ def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 25 ];
+ let Latency = 34;
+ }
+
+ def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 30 ];
+ let Latency = 38;
+ }
+
+ def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 17 ];
+ let Latency = 40;
+ }
+
+ def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 34 ];
+ let Latency = 43;
+ }
+
+ def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 49 ];
+ let Latency = 59;
+ }
+
+ def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 12 ];
+ let Latency = 61;
+ }
+
+ def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 15 ];
+ let Latency = 68;
+ }
+
+ def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 67 ];
+ let Latency = 77;
+ }
+
+ def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 12 ];
+ let Latency = 87;
+ }
+
+ def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 32 ];
+ let Latency = 100;
+ }
+
+ def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 174;
+ }
+
+ // A DV pipeline may take from 20 to 83 cycles to complete.
+ // Some DV operations may keep the pipeline busy for up to 33 cycles.
+ def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 20;
+ }
+
+ def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 25;
+ }
+
+ def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 27;
+ }
+
+ def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 41;
+ }
+
+ def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 43;
+ }
+
+ def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 47;
+ }
+
+ def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 54;
+ }
+
+ def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 60;
+ }
+
+ def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 75;
+ }
+
+ def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 83;
+ }
+
+ // A DX pipeline may take 5 cycles to complete.
+ def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
+ let Latency = 5;
+ }
+
+ // A F2 pipeline may take 4 cycles to complete.
+ def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
+ let Latency = 4;
+ }
+
+ // A FX pipeline may take from 2 to 3 cycles to complete.
+ def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
+ let Latency = 2;
+ }
+
+ def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
+ let Latency = 3;
+ }
+
+ // A LD pipeline may take 6 cycles to complete.
+ def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
+ let Latency = 6;
+ }
+
+ // A MF pipeline may take 13 cycles to complete.
+ def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
+ let Latency = 13;
+ }
+
+ // A MFL pipeline may take 13 cycles to complete.
+ def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
+ let Latency = 13;
+ }
+
+ // A MM pipeline may take 10 cycles to complete.
+ def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
+ let Latency = 10;
+ }
+
+ // A MU pipeline may take 5 cycles to complete.
+ def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
+ let Latency = 5;
+ }
+
+ // A PM pipeline may take 4 cycles to complete.
+ def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
+ let Latency = 4;
+ }
+
+ // A ST pipeline may take 3 cycles to complete.
+ def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
+ let Latency = 3;
+ }
+
+ // A SX pipeline may take from 0 to 3 cycles to complete.
+ def P10W_SX : SchedWriteRes<[P10_SX]> {
+ let Latency = 0;
+ }
+
+ def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
+ let Latency = 3;
+ }
+
+ // A vMU pipeline may take 7 cycles to complete.
+ def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
+ let Latency = 7;
+ }
+
+ // ***************** Read Advance Definitions *****************
+
+ // Modeling pipeline forwarding logic.
+ def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
+ def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
+ def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
+ def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
+ def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
+ def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
+ def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
+ def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
+ def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+
+ // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
+ // Save 2 cycles if pipeline BF reads the data from pipelines BF.
+ def P10BF_Read : SchedReadVariant<[
+ SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
+ SchedVar<NoSchedPred, [P10BF_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
+ // Save 3 cycles if pipeline CY reads the data from pipelines CY.
+ def P10CY_Read : SchedReadVariant<[
+ SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
+ SchedVar<NoSchedPred, [P10CY_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
+ // Save 6 cycles if pipeline MM reads the data from pipelines MM.
+ def P10MM_Read : SchedReadVariant<[
+ SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
+ SchedVar<NoSchedPred, [P10MM_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
+ def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
+
+ // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
+
+ // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
+
+ // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
+
+ // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
+
+ // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
+
+ // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
+ def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
+
+ // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
+ def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
+
+ // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
+
+ // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
+
+ // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
+ def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
+
+ // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
+
+ include "P10InstrResources.td"
+}
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 571cc219ff2b..3dc069ecad8a 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -9,8 +9,6 @@
// This file defines the itinerary class data for the POWER9 processor.
//
//===----------------------------------------------------------------------===//
-include "PPCInstrInfo.td"
-
def P9Model : SchedMachineModel {
// The maximum number of instructions to be issued at the same time.
// While a value of 8 is technically correct since 8 instructions can be
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 87ce32f027ab..dfc29dbb10f1 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -23,8 +23,8 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
@@ -127,6 +127,11 @@ void PPCSubtarget::initializeEnvironment() {
HasStoreFusion = false;
HasAddiLoadFusion = false;
HasAddisLoadFusion = false;
+ HasArithAddFusion = false;
+ HasAddLogicalFusion = false;
+ HasLogicalAddFusion = false;
+ HasLogicalFusion = false;
+ IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
IsISA3_1 = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index e916b0c02000..783ea121ccb8 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -147,6 +147,11 @@ protected:
bool HasStoreFusion;
bool HasAddiLoadFusion;
bool HasAddisLoadFusion;
+ bool HasArithAddFusion;
+ bool HasAddLogicalFusion;
+ bool HasLogicalAddFusion;
+ bool HasLogicalFusion;
+ bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
bool IsISA3_1;
@@ -322,6 +327,7 @@ public:
bool hasHTM() const { return HasHTM; }
bool hasFloat128() const { return HasFloat128; }
+ bool isISA2_06() const { return IsISA2_06; }
bool isISA2_07() const { return IsISA2_07; }
bool isISA3_0() const { return IsISA3_0; }
bool isISA3_1() const { return IsISA3_1; }
@@ -330,6 +336,10 @@ public:
bool hasStoreFusion() const { return HasStoreFusion; }
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
+ bool hasArithAddFusion() const { return HasArithAddFusion; }
+ bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
+ bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
+ bool hasLogicalFusion() const { return HasLogicalFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 3186d197931d..fbd487fbcfd5 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -208,11 +208,9 @@ public:
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 895ae6744421..8120975c4fb2 100644
--- a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -73,9 +73,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -131,11 +131,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 82717300a480..3eff00fc3c05 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -36,10 +36,10 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index ed9e74b72d1e..d3fe5362ccdc 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -68,6 +68,8 @@ public:
}
bool isLittleEndian() const;
+
+ int unqualifiedInlineAsmVariant() const override { return 1; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index d5a7873bd056..77d5a2668b60 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
return true;
+ // There is no corresponding FMA instruction for PPC double double.
+ // Thus, we need to disable CTR loop generation for this type.
+ case Intrinsic::fmuladd:
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
@@ -787,7 +790,8 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
}
void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
if (ST->getCPUDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
@@ -798,7 +802,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.AllowExpensiveTripCount = true;
}
- BaseT::getUnrollingPreferences(L, SE, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
}
void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 8ac3038d51d6..aa84013803af 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -72,7 +72,8 @@ public:
TargetLibraryInfo *LibInfo);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
@@ -102,8 +103,7 @@ public:
InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode,
Type *Ty1, Type *Ty2);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -130,8 +130,7 @@ public:
const Instruction *I = nullptr);
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 3463bbbdc5f0..7272e6edefc5 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -27,9 +27,9 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -148,11 +148,9 @@ public:
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
@@ -169,4 +167,3 @@ INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
char PPCVSXCopy::ID = 0;
FunctionPass*
llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
-
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index e72e29112da7..0be35adc35c7 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -31,10 +31,10 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -361,11 +361,9 @@ public:
if (DisableVSXFMAMutate)
return Changed;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 6bb952f27fee..0bfa0bd5ec0e 100644
--- a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/PowerPCTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getThePPC32Target() {
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 87496e0b9330..f00813f1301a 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -32,10 +32,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributes.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include <limits>
@@ -50,6 +51,10 @@ using namespace llvm;
STATISTIC(RISCVNumInstrsCompressed,
"Number of RISC-V Compressed instructions emitted");
+namespace llvm {
+extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
+} // namespace llvm
+
namespace {
struct RISCVOperand;
@@ -169,6 +174,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool parseDirectiveOption();
bool parseDirectiveAttribute();
+ bool parseDirectiveInsn(SMLoc L);
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(getSTI().getFeatureBits()[Feature])) {
@@ -504,6 +510,24 @@ public:
return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm);
}
+ bool isUImm2() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
+ bool isUImm3() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isUImm5() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
@@ -513,6 +537,15 @@ public:
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isUImm7() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isSImm5() const {
if (!isImm())
return false;
@@ -960,10 +993,6 @@ bool RISCVAsmParser::generateImmOutOfRangeError(
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
-static std::string RISCVMnemonicSpellCheck(StringRef S,
- const FeatureBitset &FBS,
- unsigned VariantID = 0);
-
bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -996,13 +1025,13 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_MnemonicFail: {
FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
- std::string Suggestion =
- RISCVMnemonicSpellCheck(((RISCVOperand &)*Operands[0]).getToken(), FBS);
+ std::string Suggestion = RISCVMnemonicSpellCheck(
+ ((RISCVOperand &)*Operands[0]).getToken(), FBS, 0);
return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion);
}
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
@@ -1019,7 +1048,7 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// corresponding operand is missing.
if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
}
@@ -1050,8 +1079,14 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
+ case Match_InvalidUImm2:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1);
+ case Match_InvalidUImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ case Match_InvalidUImm7:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 7) - 1);
case Match_InvalidSImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4),
(1 << 4) - 1);
@@ -1835,8 +1870,10 @@ bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".option")
return parseDirectiveOption();
- else if (IDVal == ".attribute")
+ if (IDVal == ".attribute")
return parseDirectiveAttribute();
+ if (IDVal == ".insn")
+ return parseDirectiveInsn(DirectiveID.getLoc());
return true;
}
@@ -2027,113 +2064,35 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
if (Tag == RISCVAttrs::ARCH) {
StringRef Arch = StringValue;
- if (Arch.consume_front("rv32"))
+ for (auto Feature : RISCVFeatureKV)
+ if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ clearFeatureBits(Feature.Value, Feature.Key);
+
+ auto ParseResult = llvm::RISCVISAInfo::parseArchString(
+ StringValue, /*EnableExperimentalExtension=*/true,
+ /*ExperimentalExtensionVersionCheck=*/false);
+ if (!ParseResult) {
+ std::string Buffer;
+ raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << "invalid arch name '" << Arch << "', "
+ << ErrMsg.getMessage();
+ });
+
+ return Error(ValueExprLoc, OutputErrMsg.str());
+ }
+ auto &ISAInfo = *ParseResult;
+
+ for (auto Feature : RISCVFeatureKV)
+ if (ISAInfo->hasExtension(Feature.Key))
+ setFeatureBits(Feature.Value, Feature.Key);
+
+ if (ISAInfo->getXLen() == 32)
clearFeatureBits(RISCV::Feature64Bit, "64bit");
- else if (Arch.consume_front("rv64"))
+ else if (ISAInfo->getXLen() == 64)
setFeatureBits(RISCV::Feature64Bit, "64bit");
else
return Error(ValueExprLoc, "bad arch string " + Arch);
-
- // .attribute arch overrides the current architecture, so unset all
- // currently enabled extensions
- clearFeatureBits(RISCV::FeatureRV32E, "e");
- clearFeatureBits(RISCV::FeatureStdExtM, "m");
- clearFeatureBits(RISCV::FeatureStdExtA, "a");
- clearFeatureBits(RISCV::FeatureStdExtF, "f");
- clearFeatureBits(RISCV::FeatureStdExtD, "d");
- clearFeatureBits(RISCV::FeatureStdExtC, "c");
- clearFeatureBits(RISCV::FeatureStdExtB, "experimental-b");
- clearFeatureBits(RISCV::FeatureStdExtV, "experimental-v");
- clearFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh");
- clearFeatureBits(RISCV::FeatureExtZba, "experimental-zba");
- clearFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb");
- clearFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc");
- clearFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe");
- clearFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf");
- clearFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm");
- clearFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp");
- clearFeatureBits(RISCV::FeatureExtZbproposedc, "experimental-zbproposedc");
- clearFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr");
- clearFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs");
- clearFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt");
- clearFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo");
- clearFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg");
-
- while (!Arch.empty()) {
- bool DropFirst = true;
- if (Arch[0] == 'i')
- clearFeatureBits(RISCV::FeatureRV32E, "e");
- else if (Arch[0] == 'e')
- setFeatureBits(RISCV::FeatureRV32E, "e");
- else if (Arch[0] == 'g') {
- clearFeatureBits(RISCV::FeatureRV32E, "e");
- setFeatureBits(RISCV::FeatureStdExtM, "m");
- setFeatureBits(RISCV::FeatureStdExtA, "a");
- setFeatureBits(RISCV::FeatureStdExtF, "f");
- setFeatureBits(RISCV::FeatureStdExtD, "d");
- } else if (Arch[0] == 'm')
- setFeatureBits(RISCV::FeatureStdExtM, "m");
- else if (Arch[0] == 'a')
- setFeatureBits(RISCV::FeatureStdExtA, "a");
- else if (Arch[0] == 'f')
- setFeatureBits(RISCV::FeatureStdExtF, "f");
- else if (Arch[0] == 'd') {
- setFeatureBits(RISCV::FeatureStdExtF, "f");
- setFeatureBits(RISCV::FeatureStdExtD, "d");
- } else if (Arch[0] == 'c') {
- setFeatureBits(RISCV::FeatureStdExtC, "c");
- } else if (Arch[0] == 'b') {
- setFeatureBits(RISCV::FeatureStdExtB, "experimental-b");
- } else if (Arch[0] == 'v') {
- setFeatureBits(RISCV::FeatureStdExtV, "experimental-v");
- } else if (Arch[0] == 's' || Arch[0] == 'x' || Arch[0] == 'z') {
- StringRef Ext =
- Arch.take_until([](char c) { return ::isdigit(c) || c == '_'; });
- if (Ext == "zba")
- setFeatureBits(RISCV::FeatureExtZba, "experimental-zba");
- else if (Ext == "zbb")
- setFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb");
- else if (Ext == "zbc")
- setFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc");
- else if (Ext == "zbe")
- setFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe");
- else if (Ext == "zbf")
- setFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf");
- else if (Ext == "zbm")
- setFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm");
- else if (Ext == "zbp")
- setFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp");
- else if (Ext == "zbproposedc")
- setFeatureBits(RISCV::FeatureExtZbproposedc,
- "experimental-zbproposedc");
- else if (Ext == "zbr")
- setFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr");
- else if (Ext == "zbs")
- setFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs");
- else if (Ext == "zbt")
- setFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt");
- else if (Ext == "zfh")
- setFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh");
- else if (Ext == "zvamo")
- setFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo");
- else if (Ext == "zvlsseg")
- setFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg");
- else
- return Error(ValueExprLoc, "bad arch string " + Ext);
- Arch = Arch.drop_until([](char c) { return ::isdigit(c) || c == '_'; });
- DropFirst = false;
- } else
- return Error(ValueExprLoc, "bad arch string " + Arch);
-
- if (DropFirst)
- Arch = Arch.drop_front(1);
- int major = 0;
- int minor = 0;
- Arch.consumeInteger(10, major);
- Arch.consume_front("p");
- Arch.consumeInteger(10, minor);
- Arch = Arch.drop_while([](char c) { return c == '_'; });
- }
}
if (IsIntegerValue)
@@ -2142,64 +2101,63 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
if (Tag != RISCVAttrs::ARCH) {
getTargetStreamer().emitTextAttribute(Tag, StringValue);
} else {
- std::string formalArchStr = "rv32";
- if (getFeatureBits(RISCV::Feature64Bit))
- formalArchStr = "rv64";
- if (getFeatureBits(RISCV::FeatureRV32E))
- formalArchStr = (Twine(formalArchStr) + "e1p9").str();
- else
- formalArchStr = (Twine(formalArchStr) + "i2p0").str();
-
- if (getFeatureBits(RISCV::FeatureStdExtM))
- formalArchStr = (Twine(formalArchStr) + "_m2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtA))
- formalArchStr = (Twine(formalArchStr) + "_a2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtF))
- formalArchStr = (Twine(formalArchStr) + "_f2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtD))
- formalArchStr = (Twine(formalArchStr) + "_d2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtC))
- formalArchStr = (Twine(formalArchStr) + "_c2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtB))
- formalArchStr = (Twine(formalArchStr) + "_b0p93").str();
- if (getFeatureBits(RISCV::FeatureStdExtV))
- formalArchStr = (Twine(formalArchStr) + "_v0p10").str();
- if (getFeatureBits(RISCV::FeatureExtZfh))
- formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str();
- if (getFeatureBits(RISCV::FeatureExtZba))
- formalArchStr = (Twine(formalArchStr) + "_zba0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbb))
- formalArchStr = (Twine(formalArchStr) + "_zbb0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbc))
- formalArchStr = (Twine(formalArchStr) + "_zbc0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbe))
- formalArchStr = (Twine(formalArchStr) + "_zbe0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbf))
- formalArchStr = (Twine(formalArchStr) + "_zbf0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbm))
- formalArchStr = (Twine(formalArchStr) + "_zbm0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbp))
- formalArchStr = (Twine(formalArchStr) + "_zbp0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbproposedc))
- formalArchStr = (Twine(formalArchStr) + "_zbproposedc0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbr))
- formalArchStr = (Twine(formalArchStr) + "_zbr0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbs))
- formalArchStr = (Twine(formalArchStr) + "_zbs0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbt))
- formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZvamo))
- formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str();
- if (getFeatureBits(RISCV::FeatureStdExtZvlsseg))
- formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str();
-
- getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
+ std::vector<std::string> FeatureVector;
+ RISCVFeatures::toFeatureVector(FeatureVector, getSTI().getFeatureBits());
+
+ // Parse that by RISCVISAInfo->
+ unsigned XLen = getFeatureBits(RISCV::Feature64Bit) ? 64 : 32;
+ auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
+ if (!ParseResult) {
+ std::string Buffer;
+ raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(),
+ [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << ErrMsg.getMessage();
+ });
+
+ return Error(ValueExprLoc, OutputErrMsg.str());
+ }
+ auto &ISAInfo = *ParseResult;
+
+ // Then emit the arch string.
+ getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString());
}
}
return false;
}
+/// parseDirectiveInsn
+/// ::= .insn [ format encoding, (operands (, operands)*) ]
+bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ // Expect instruction format as identifier.
+ StringRef Format;
+ SMLoc ErrorLoc = Parser.getTok().getLoc();
+ if (Parser.parseIdentifier(Format))
+ return Error(ErrorLoc, "expected instruction format");
+
+ if (Format != "r" && Format != "r4" && Format != "i" && Format != "b" &&
+ Format != "sb" && Format != "u" && Format != "j" && Format != "uj" &&
+ Format != "s")
+ return Error(ErrorLoc, "invalid instruction format");
+
+ std::string FormatName = (".insn_" + Format).str();
+
+ ParseInstructionInfo Info;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands;
+
+ if (ParseInstruction(Info, FormatName, L, Operands))
+ return true;
+
+ unsigned Opcode;
+ uint64_t ErrorInfo;
+ return MatchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(),
+ ErrorInfo,
+ /*MatchingInlineAsm=*/false);
+}
+
void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = compressInst(CInst, Inst, getSTI(), S.getContext());
@@ -2223,6 +2181,11 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
.addReg(DestReg)
.addReg(SrcReg)
.addReg(RISCV::X0));
+ } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
+ Inst.Opc == RISCV::SH3ADD) {
+ emitToStreamer(
+ Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg(
+ SrcReg));
} else {
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
@@ -2339,10 +2302,10 @@ void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
//
// TmpLabel: AUIPC tmp, %pcrel_hi(symbol)
// [S|L]X rd, %pcrel_lo(TmpLabel)(tmp)
- MCOperand DestReg = Inst.getOperand(0);
+ unsigned DestRegOpIdx = HasTmpReg ? 1 : 0;
+ MCOperand DestReg = Inst.getOperand(DestRegOpIdx);
unsigned SymbolOpIdx = HasTmpReg ? 2 : 1;
- unsigned TmpRegOpIdx = HasTmpReg ? 1 : 0;
- MCOperand TmpReg = Inst.getOperand(TmpRegOpIdx);
+ MCOperand TmpReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(SymbolOpIdx).getExpr();
emitAuipcInstPair(DestReg, TmpReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
Opcode, IDLoc, Out);
@@ -2414,7 +2377,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
// masked va >= x, vd == v0
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt
+ // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
assert(Inst.getOperand(0).getReg() == RISCV::V0 &&
"The destination register should be V0.");
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
@@ -2424,7 +2387,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
.addOperand(Inst.getOperand(4)));
- emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1)));
@@ -2432,7 +2395,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
// masked va >= x, any vd
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vt, v0, vt; vmandnot.mm vd,
+ // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd,
// vd, v0; vmor.mm vd, vt, vd
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
"The temporary vector register should not be V0.");
@@ -2441,11 +2404,11 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
.addReg(RISCV::NoRegister));
- emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(1))
.addReg(RISCV::V0)
.addOperand(Inst.getOperand(1)));
- emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addReg(RISCV::V0));
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 504a78d91f32..ff96b2b254ca 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -20,8 +20,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -449,19 +449,6 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (STI.getFeatureBits()[RISCV::FeatureExtZbproposedc] &&
- STI.getFeatureBits()[RISCV::FeatureStdExtC]) {
- LLVM_DEBUG(
- dbgs() << "Trying RVBC32 table (BitManip 16-bit Instruction):\n");
- // Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableRVBC16, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 2;
- return Result;
- }
- }
-
LLVM_DEBUG(dbgs() << "Trying RISCV_C table (16-bit Instruction):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index b93197e713e5..514789b3f645 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -352,8 +352,9 @@ bool RISCVAsmBackend::mayNeedRelaxation(const MCInst &Inst,
return getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode();
}
-bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
- bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC];
+bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
+ bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC];
unsigned MinNopLen = HasStdExtC ? 2 : 4;
if ((Count % MinNopLen) != 0)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index e1628673419a..f04d2912f09d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -99,7 +99,8 @@ public:
bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout,
bool &WasRelaxed) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 60e86093d9f4..0aba18b20f0d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -14,9 +14,14 @@
#include "RISCVBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
+extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
+
namespace RISCVSysReg {
#define GET_SysRegsList_IMPL
#include "RISCVGenSearchableTables.inc"
@@ -96,6 +101,15 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
report_fatal_error("RV32E can't be enabled for an RV64 target");
}
+void toFeatureVector(std::vector<std::string> &FeatureVector,
+ const FeatureBitset &FeatureBits) {
+ for (auto Feature : RISCVFeatureKV) {
+ if (FeatureBits[Feature.Value] &&
+ llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ FeatureVector.push_back(std::string("+") + Feature.Key);
+ }
+}
+
} // namespace RISCVFeatures
// Encode VTYPE into the binary format used by the the VSETVLI instruction which
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 9bdd2003cb15..d8f4403c824f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -68,14 +68,25 @@ enum {
HasMergeOpMask = 1 << HasMergeOpShift,
// Does this instruction have a SEW operand. It will be the last explicit
- // operand. Used by RVV Pseudos.
+ // operand unless there is a vector policy operand. Used by RVV Pseudos.
HasSEWOpShift = HasMergeOpShift + 1,
HasSEWOpMask = 1 << HasSEWOpShift,
// Does this instruction have a VL operand. It will be the second to last
- // explicit operand. Used by RVV Pseudos.
+ // explicit operand unless there is a vector policy operand. Used by RVV
+ // Pseudos.
HasVLOpShift = HasSEWOpShift + 1,
HasVLOpMask = 1 << HasVLOpShift,
+
+ // Does this instruction have a vector policy operand. It will be the last
+ // explicit operand. Used by RVV Pseudos.
+ HasVecPolicyOpShift = HasVLOpShift + 1,
+ HasVecPolicyOpMask = 1 << HasVecPolicyOpShift,
+
+ // Is this instruction a vector widening reduction instruction. Used by RVV
+ // Pseudos.
+ IsRVVWideningReductionShift = HasVecPolicyOpShift + 1,
+ IsRVVWideningReductionMask = 1 << IsRVVWideningReductionShift,
};
// Match with the definitions in RISCVInstrFormatsV.td
@@ -97,6 +108,11 @@ enum VLMUL : uint8_t {
LMUL_F2
};
+enum {
+ TAIL_UNDISTURBED = 0,
+ TAIL_AGNOSTIC = 1,
+};
+
// Helper functions to read TSFlags.
/// \returns the format of the instruction.
static inline unsigned getFormat(uint64_t TSFlags) {
@@ -131,6 +147,14 @@ static inline bool hasSEWOp(uint64_t TSFlags) {
static inline bool hasVLOp(uint64_t TSFlags) {
return TSFlags & HasVLOpMask;
}
+/// \returns true if there is a vector policy operand for this instruction.
+static inline bool hasVecPolicyOp(uint64_t TSFlags) {
+ return TSFlags & HasVecPolicyOpMask;
+}
+/// \returns true if it is a vector widening reduction instruction.
+static inline bool isRVVWideningReduction(uint64_t TSFlags) {
+ return TSFlags & IsRVVWideningReductionMask;
+}
// RISC-V Specific Machine Operand Flags
enum {
@@ -158,8 +182,11 @@ enum {
namespace RISCVOp {
enum OperandType : unsigned {
OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM2 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM3,
+ OPERAND_UIMM4,
OPERAND_UIMM5,
+ OPERAND_UIMM7,
OPERAND_UIMM12,
OPERAND_SIMM12,
OPERAND_UIMM20,
@@ -306,6 +333,10 @@ namespace RISCVFeatures {
// triple. Exits with report_fatal_error if not.
void validate(const Triple &TT, const FeatureBitset &FeatureBits);
+// Convert FeatureBitset to FeatureVector.
+void toFeatureVector(std::vector<std::string> &FeatureVector,
+ const FeatureBitset &FeatureBits);
+
} // namespace RISCVFeatures
namespace RISCVVType {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 1ef276b10100..14d0191a505f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -358,7 +358,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
}
} else if (Kind == MCExpr::SymbolRef &&
cast<MCSymbolRefExpr>(Expr)->getKind() == MCSymbolRefExpr::VK_None) {
- if (Desc.getOpcode() == RISCV::JAL) {
+ if (MIFrm == RISCVII::InstFormatJ) {
FixupKind = RISCV::fixup_riscv_jal;
} else if (MIFrm == RISCVII::InstFormatB) {
FixupKind = RISCV::fixup_riscv_branch;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
new file mode 100644
index 000000000000..9c9d9221578c
--- /dev/null
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
@@ -0,0 +1,22 @@
+//===-- RISCVMCObjectFileInfo.cpp - RISCV object file properties ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the RISCVMCObjectFileInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMCObjectFileInfo.h"
+#include "RISCVMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+
+using namespace llvm;
+
+unsigned RISCVMCObjectFileInfo::getTextSectionAlignment() const {
+ const MCSubtargetInfo *STI = getContext().getSubtargetInfo();
+ return STI->hasFeature(RISCV::FeatureStdExtC) ? 2 : 4;
+}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
new file mode 100644
index 000000000000..2f6b10229864
--- /dev/null
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
@@ -0,0 +1,27 @@
+//===-- RISCVMCObjectFileInfo.h - RISCV object file Info -------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the RISCVMCObjectFileInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCOBJECTFILEINFO_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCOBJECTFILEINFO_H
+
+#include "llvm/MC/MCObjectFileInfo.h"
+
+namespace llvm {
+
+class RISCVMCObjectFileInfo : public MCObjectFileInfo {
+public:
+ unsigned getTextSectionAlignment() const override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 38c32539833c..07c2be624932 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -15,6 +15,7 @@
#include "RISCVELFStreamer.h"
#include "RISCVInstPrinter.h"
#include "RISCVMCAsmInfo.h"
+#include "RISCVMCObjectFileInfo.h"
#include "RISCVTargetStreamer.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
@@ -23,12 +24,13 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "RISCVGenInstrInfo.inc"
@@ -65,6 +67,14 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
+static MCObjectFileInfo *
+createRISCVMCObjectFileInfo(MCContext &Ctx, bool PIC,
+ bool LargeCodeModel = false) {
+ MCObjectFileInfo *MOFI = new RISCVMCObjectFileInfo();
+ MOFI->initMCObjectFileInfo(Ctx, PIC, LargeCodeModel);
+ return MOFI;
+}
+
static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
if (CPU.empty())
@@ -155,6 +165,7 @@ MCStreamer *createRISCVELFStreamer(const Triple &T, MCContext &Context,
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
+ TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createRISCVMCRegisterInfo);
TargetRegistry::RegisterMCAsmBackend(*T, createRISCVAsmBackend);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 2ca5eeb8392e..0ee6d8de78c9 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -20,7 +20,8 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
for (auto Instr : Res) {
bool Compressed;
switch (Instr.Opc) {
- default: llvm_unreachable("Unexpected opcode");
+ default:
+ llvm_unreachable("Unexpected opcode");
case RISCV::SLLI:
case RISCV::SRLI:
Compressed = true;
@@ -77,7 +78,7 @@ static void generateInstSeqImpl(int64_t Val,
assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
// In the worst case, for a full 64-bit constant, a sequence of 8 instructions
- // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
+ // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
// that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
// while the following ADDI instructions contribute up to 12 bits each.
//
@@ -106,15 +107,36 @@ static void generateInstSeqImpl(int64_t Val,
// If the remaining bits don't fit in 12 bits, we might be able to reduce the
// shift amount in order to use LUI which will zero the lower 12 bits.
- if (ShiftAmount > 12 && !isInt<12>(Hi52) && isInt<32>((uint64_t)Hi52 << 12)) {
- // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
- ShiftAmount -= 12;
- Hi52 = (uint64_t)Hi52 << 12;
+ bool Unsigned = false;
+ if (ShiftAmount > 12 && !isInt<12>(Hi52)) {
+ if (isInt<32>((uint64_t)Hi52 << 12)) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
+ ShiftAmount -= 12;
+ Hi52 = (uint64_t)Hi52 << 12;
+ } else if (isUInt<32>((uint64_t)Hi52 << 12) &&
+ ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match
+ // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
+ ShiftAmount -= 12;
+ Hi52 = ((uint64_t)Hi52 << 12) | (0xffffffffull << 32);
+ Unsigned = true;
+ }
+ }
+
+ // Try to use SLLIUW for Hi52 when it is uint32 but not int32.
+ if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
+ ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW.
+ Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
+ Unsigned = true;
}
generateInstSeqImpl(Hi52, ActiveFeatures, Res);
- Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
+ if (Unsigned)
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount));
+ else
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
}
@@ -165,7 +187,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// If we have exactly 32 leading zeros and Zba, we can try using zext.w at
// the end of the sequence.
- if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureExtZba]) {
+ if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
// Try replacing upper bits with 1.
uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
TmpSeq.clear();
@@ -182,12 +204,119 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
}
+ // Perform optimization with BCLRI/BSETI in the Zbs extension.
+ if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
+ assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ "Expected RV32 to only need 2 instructions");
+
+ // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
+ // call generateInstSeqImpl with Val|0x80000000 (which is expected be
+ // an int32), then emit (BCLRI r, 31).
+ // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
+ // with Val&~0x80000000 (which is expected to be an int32), then
+ // emit (BSETI r, 31).
+ int64_t NewVal;
+ unsigned Opc;
+ if (Val < 0) {
+ Opc = RISCV::BCLRI;
+ NewVal = Val | 0x80000000ll;
+ } else {
+ Opc = RISCV::BSETI;
+ NewVal = Val & ~0x80000000ll;
+ }
+ if (isInt<32>(NewVal)) {
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+
+ // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
+ // negative int32, or use BSETI for upper 32 bits if the original lower
+ // 32 bits are positive int32.
+ int32_t Lo = Val;
+ uint32_t Hi = Val >> 32;
+ Opc = 0;
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
+ // Check if it is profitable to use BCLRI/BSETI.
+ if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
+ Opc = RISCV::BSETI;
+ } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
+ Opc = RISCV::BCLRI;
+ Hi = ~Hi;
+ }
+ // Search for each bit and build corresponding BCLRI/BSETI.
+ if (Opc > 0) {
+ while (Hi != 0) {
+ unsigned Bit = countTrailingZeros(Hi);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
+ Hi &= ~(1 << Bit);
+ }
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+ }
+
+ // Perform optimization with SH*ADD in the Zba extension.
+ if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ "Expected RV32 to only need 2 instructions");
+ int64_t Div = 0;
+ unsigned Opc = 0;
+ RISCVMatInt::InstSeq TmpSeq;
+ // Select the opcode and divisor.
+ if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
+ Div = 3;
+ Opc = RISCV::SH1ADD;
+ } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
+ Div = 5;
+ Opc = RISCV::SH2ADD;
+ } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
+ Div = 9;
+ Opc = RISCV::SH3ADD;
+ }
+ // Build the new instruction sequence.
+ if (Div > 0) {
+ generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+ // Try to use LUI+SH*ADD+ADDI.
+ int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
+ int64_t Lo12 = SignExtend64<12>(Val);
+ Div = 0;
+ if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
+ Div = 3;
+ Opc = RISCV::SH1ADD;
+ } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
+ Div = 5;
+ Opc = RISCV::SH2ADD;
+ } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
+ Div = 9;
+ Opc = RISCV::SH3ADD;
+ }
+ // Build the new instruction sequence.
+ if (Div > 0) {
+ // For Val that has zero Lo12 (implies Val equals to Hi52) should has
+ // already been processed to LUI+SH*ADD by previous optimization.
+ assert(Lo12 != 0 &&
+ "unexpected instruction sequence for immediate materialisation");
+ generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+ }
+
return Res;
}
int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures,
- bool CompressionCost) {
+ const FeatureBitset &ActiveFeatures, bool CompressionCost) {
bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
int PlatRegSize = IsRV64 ? 64 : 32;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 13c4b84aa300..2f016374e6a2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -11,9 +11,11 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetStreamer.h"
+#include "RISCVBaseInfo.h"
#include "RISCVMCTargetDesc.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/RISCVAttributes.h"
+#include "llvm/Support/RISCVISAInfo.h"
using namespace llvm;
@@ -43,57 +45,19 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
else
emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
- std::string Arch = "rv32";
- if (STI.hasFeature(RISCV::Feature64Bit))
- Arch = "rv64";
- if (STI.hasFeature(RISCV::FeatureRV32E))
- Arch += "e1p9";
- else
- Arch += "i2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtM))
- Arch += "_m2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtA))
- Arch += "_a2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtF))
- Arch += "_f2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtD))
- Arch += "_d2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtC))
- Arch += "_c2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtB))
- Arch += "_b0p93";
- if (STI.hasFeature(RISCV::FeatureStdExtV))
- Arch += "_v0p10";
- if (STI.hasFeature(RISCV::FeatureExtZfh))
- Arch += "_zfh0p1";
- if (STI.hasFeature(RISCV::FeatureExtZba))
- Arch += "_zba0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbb))
- Arch += "_zbb0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbc))
- Arch += "_zbc0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbe))
- Arch += "_zbe0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbf))
- Arch += "_zbf0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbm))
- Arch += "_zbm0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbp))
- Arch += "_zbp0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbproposedc))
- Arch += "_zbproposedc0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbr))
- Arch += "_zbr0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbs))
- Arch += "_zbs0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbt))
- Arch += "_zbt0p93";
- if (STI.hasFeature(RISCV::FeatureExtZvamo))
- Arch += "_zvamo0p10";
- if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg))
- Arch += "_zvlsseg0p10";
-
- emitTextAttribute(RISCVAttrs::ARCH, Arch);
+ unsigned XLen = STI.hasFeature(RISCV::Feature64Bit) ? 64 : 32;
+ std::vector<std::string> FeatureVector;
+ RISCVFeatures::toFeatureVector(FeatureVector, STI.getFeatureBits());
+
+ auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
+ if (!ParseResult) {
+ /* Assume any error about features should handled earlier. */
+ consumeError(ParseResult.takeError());
+ llvm_unreachable("Parsing feature error when emitTargetAttributes?");
+ } else {
+ auto &ISAInfo = *ParseResult;
+ emitTextAttribute(RISCVAttrs::ARCH, ISAInfo->toString());
+ }
}
// This part is for ascii assembly output
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ef1f97067e12..b415c9f35e7f 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -37,6 +37,9 @@ bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
+FunctionPass *createRISCVGatherScatterLoweringPass();
+void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
+
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 52e8d8cdc774..772a4f8ecd53 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -41,12 +41,20 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
AssemblerPredicate<(all_of FeatureStdExtD),
"'D' (Double-Precision Floating-Point)">;
-def FeatureExtZfh
+def FeatureStdExtZfhmin
+ : SubtargetFeature<"experimental-zfhmin", "HasStdExtZfhmin", "true",
+ "'Zfhmin' (Half-Precision Floating-Point Minimal)",
+ [FeatureStdExtF]>;
+def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfhmin),
+ "'Zfhmin' (Half-Precision Floating-Point Minimal)">;
+
+def FeatureStdExtZfh
: SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true",
"'Zfh' (Half-Precision Floating-Point)",
- [FeatureStdExtF]>;
+ [FeatureStdExtZfhmin, FeatureStdExtF]>;
def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
- AssemblerPredicate<(all_of FeatureExtZfh),
+ AssemblerPredicate<(all_of FeatureStdExtZfh),
"'Zfh' (Half-Precision Floating-Point)">;
def FeatureStdExtC
@@ -56,109 +64,85 @@ def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
AssemblerPredicate<(all_of FeatureStdExtC),
"'C' (Compressed Instructions)">;
-def FeatureExtZba
+def FeatureStdExtZba
: SubtargetFeature<"experimental-zba", "HasStdExtZba", "true",
"'Zba' (Address calculation 'B' Instructions)">;
def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">,
- AssemblerPredicate<(all_of FeatureExtZba),
+ AssemblerPredicate<(all_of FeatureStdExtZba),
"'Zba' (Address calculation 'B' Instructions)">;
def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
-def FeatureExtZbb
+def FeatureStdExtZbb
: SubtargetFeature<"experimental-zbb", "HasStdExtZbb", "true",
"'Zbb' (Base 'B' Instructions)">;
def HasStdExtZbb : Predicate<"Subtarget->hasStdExtZbb()">,
- AssemblerPredicate<(all_of FeatureExtZbb),
+ AssemblerPredicate<(all_of FeatureStdExtZbb),
"'Zbb' (Base 'B' Instructions)">;
-def FeatureExtZbc
+def FeatureStdExtZbc
: SubtargetFeature<"experimental-zbc", "HasStdExtZbc", "true",
"'Zbc' (Carry-Less 'B' Instructions)">;
def HasStdExtZbc : Predicate<"Subtarget->hasStdExtZbc()">,
- AssemblerPredicate<(all_of FeatureExtZbc),
+ AssemblerPredicate<(all_of FeatureStdExtZbc),
"'Zbc' (Carry-Less 'B' Instructions)">;
-def FeatureExtZbe
+def FeatureStdExtZbe
: SubtargetFeature<"experimental-zbe", "HasStdExtZbe", "true",
"'Zbe' (Extract-Deposit 'B' Instructions)">;
def HasStdExtZbe : Predicate<"Subtarget->hasStdExtZbe()">,
- AssemblerPredicate<(all_of FeatureExtZbe),
+ AssemblerPredicate<(all_of FeatureStdExtZbe),
"'Zbe' (Extract-Deposit 'B' Instructions)">;
-def FeatureExtZbf
+def FeatureStdExtZbf
: SubtargetFeature<"experimental-zbf", "HasStdExtZbf", "true",
"'Zbf' (Bit-Field 'B' Instructions)">;
def HasStdExtZbf : Predicate<"Subtarget->hasStdExtZbf()">,
- AssemblerPredicate<(all_of FeatureExtZbf),
+ AssemblerPredicate<(all_of FeatureStdExtZbf),
"'Zbf' (Bit-Field 'B' Instructions)">;
-def FeatureExtZbm
+def FeatureStdExtZbm
: SubtargetFeature<"experimental-zbm", "HasStdExtZbm", "true",
"'Zbm' (Matrix 'B' Instructions)">;
def HasStdExtZbm : Predicate<"Subtarget->hasStdExtZbm()">,
- AssemblerPredicate<(all_of FeatureExtZbm),
+ AssemblerPredicate<(all_of FeatureStdExtZbm),
"'Zbm' (Matrix 'B' Instructions)">;
-def FeatureExtZbp
+def FeatureStdExtZbp
: SubtargetFeature<"experimental-zbp", "HasStdExtZbp", "true",
"'Zbp' (Permutation 'B' Instructions)">;
def HasStdExtZbp : Predicate<"Subtarget->hasStdExtZbp()">,
- AssemblerPredicate<(all_of FeatureExtZbp),
+ AssemblerPredicate<(all_of FeatureStdExtZbp),
"'Zbp' (Permutation 'B' Instructions)">;
-def FeatureExtZbr
+def FeatureStdExtZbr
: SubtargetFeature<"experimental-zbr", "HasStdExtZbr", "true",
"'Zbr' (Polynomial Reduction 'B' Instructions)">;
def HasStdExtZbr : Predicate<"Subtarget->hasStdExtZbr()">,
- AssemblerPredicate<(all_of FeatureExtZbr),
+ AssemblerPredicate<(all_of FeatureStdExtZbr),
"'Zbr' (Polynomial Reduction 'B' Instructions)">;
-def FeatureExtZbs
+def FeatureStdExtZbs
: SubtargetFeature<"experimental-zbs", "HasStdExtZbs", "true",
"'Zbs' (Single-Bit 'B' Instructions)">;
def HasStdExtZbs : Predicate<"Subtarget->hasStdExtZbs()">,
- AssemblerPredicate<(all_of FeatureExtZbs),
+ AssemblerPredicate<(all_of FeatureStdExtZbs),
"'Zbs' (Single-Bit 'B' Instructions)">;
-def FeatureExtZbt
+def FeatureStdExtZbt
: SubtargetFeature<"experimental-zbt", "HasStdExtZbt", "true",
"'Zbt' (Ternary 'B' Instructions)">;
def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">,
- AssemblerPredicate<(all_of FeatureExtZbt),
+ AssemblerPredicate<(all_of FeatureStdExtZbt),
"'Zbt' (Ternary 'B' Instructions)">;
// Some instructions belong to both the basic and the permutation
// subextensions. They should be enabled if either has been specified.
def HasStdExtZbbOrZbp
: Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">,
- AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp),
+ AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp),
"'Zbb' (Base 'B' Instructions) or "
"'Zbp' (Permutation 'B' Instructions)">;
-def FeatureExtZbproposedc
- : SubtargetFeature<"experimental-zbproposedc", "HasStdExtZbproposedc", "true",
- "'Zbproposedc' (Proposed Compressed 'B' Instructions)">;
-def HasStdExtZbproposedc : Predicate<"Subtarget->hasStdExtZbproposedc()">,
- AssemblerPredicate<(all_of FeatureExtZbproposedc),
- "'Zbproposedc' (Proposed Compressed 'B' Instructions)">;
-
-def FeatureStdExtB
- : SubtargetFeature<"experimental-b", "HasStdExtB", "true",
- "'B' (Bit Manipulation Instructions)",
- [FeatureExtZba,
- FeatureExtZbb,
- FeatureExtZbc,
- FeatureExtZbe,
- FeatureExtZbf,
- FeatureExtZbm,
- FeatureExtZbp,
- FeatureExtZbr,
- FeatureExtZbs,
- FeatureExtZbt]>;
-def HasStdExtB : Predicate<"Subtarget->hasStdExtB()">,
- AssemblerPredicate<(all_of FeatureStdExtB),
- "'B' (Bit Manipulation Instructions)">;
-
def FeatureNoRVCHints
: SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
"Disable RVC Hint Instructions.">;
@@ -173,6 +157,9 @@ def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">,
AssemblerPredicate<(all_of FeatureStdExtV),
"'V' (Vector Instructions)">;
+def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">;
+def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">;
+
def FeatureStdExtZvlsseg
: SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true",
"'Zvlsseg' (Vector segment load/store instructions)",
@@ -181,12 +168,12 @@ def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
"'Zvlsseg' (Vector segment load/store instructions)">;
-def FeatureExtZvamo
+def FeatureStdExtZvamo
: SubtargetFeature<"experimental-zvamo", "HasStdExtZvamo", "true",
"'Zvamo' (Vector AMO Operations)",
[FeatureStdExtV]>;
def HasStdExtZvamo : Predicate<"Subtarget->hasStdExtZvamo()">,
- AssemblerPredicate<(all_of FeatureExtZvamo),
+ AssemblerPredicate<(all_of FeatureStdExtZvamo),
"'Zvamo' (Vector AMO Operations)">;
def Feature64Bit
@@ -250,22 +237,63 @@ def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
def : ProcessorModel<"sifive-7-rv32", SiFive7Model, []>;
def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit]>;
+def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e21", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e24", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtC]>;
-def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
+def : ProcessorModel<"sifive-e34", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-s51", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-s54", RocketModel, [Feature64Bit,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC]>;
-def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
+def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit,
+ FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
+ FeatureStdExtD,
FeatureStdExtC]>;
+def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC]>;
+
def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit,
FeatureStdExtM,
FeatureStdExtA,
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index bdf30f8eb1b3..9fed6e7baadc 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -27,7 +27,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 31ef752967cc..80340ee81509 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -105,6 +105,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoLA_TLS_GD:
return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
+ case RISCV::PseudoVSETVLIX0:
case RISCV::PseudoVSETIVLI:
return expandVSetVL(MBB, MBBI);
case RISCV::PseudoVMCLR_M_B1:
@@ -246,13 +247,14 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
DebugLoc DL = MBBI->getDebugLoc();
assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+ MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
MBBI->getOpcode() == RISCV::PseudoVSETIVLI) &&
"Unexpected pseudo instruction");
unsigned Opcode;
- if (MBBI->getOpcode() == RISCV::PseudoVSETVLI)
- Opcode = RISCV::VSETVLI;
- else
+ if (MBBI->getOpcode() == RISCV::PseudoVSETIVLI)
Opcode = RISCV::VSETIVLI;
+ else
+ Opcode = RISCV::VSETVLI;
const MCInstrDesc &Desc = TII->get(Opcode);
assert(Desc.getNumOperands() == 3 && "Unexpected instruction format");
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 188bd49595a5..595c3cdfbb1d 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -220,6 +220,10 @@ getRestoreLibCallName(const MachineFunction &MF,
return RestoreLibCalls[LibCallID];
}
+// Return true if the specified function should have a dedicated frame
+// pointer register. This is true if frame pointer elimination is
+// disabled, if it needs dynamic stack realignment, if the function has
+// variable sized allocas, or if the frame address is taken.
bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -671,15 +675,15 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- BP
@@ -696,15 +700,15 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- SP
@@ -749,15 +753,15 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | RVV objects | | |-- MFI.getStackSize()
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- SP
@@ -767,8 +771,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// objects to 8 bytes.
if (MFI.getStackID(FI) == TargetStackID::Default) {
if (MFI.isFixedObjectIndex(FI)) {
- Offset += StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding()
- + RVFI->getLibCallStackSize(), RVFI->getRVVStackSize());
+ Offset +=
+ StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding() +
+ RVFI->getLibCallStackSize(),
+ RVFI->getRVVStackSize());
} else {
Offset += StackOffset::getFixed(MFI.getStackSize());
}
@@ -860,7 +866,7 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const {
}
static bool hasRVVSpillWithFIs(MachineFunction &MF, const RISCVInstrInfo &TII) {
- if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtV())
+ if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
return false;
return any_of(MF, [&TII](const MachineBasicBlock &MBB) {
return any_of(MBB, [&TII](const MachineInstr &MI) {
@@ -1040,7 +1046,8 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
// Insert the spill to the stack frame.
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
+ RC, TRI);
}
return true;
@@ -1087,6 +1094,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
return true;
}
+bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ // Keep the conventional code flow when not optimizing.
+ if (MF.getFunction().hasOptNone())
+ return false;
+
+ return true;
+}
+
bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const MachineFunction *MF = MBB.getParent();
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index bc3ace786272..1e94e34acf2f 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -65,6 +65,8 @@ public:
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
bool isSupportedStackID(TargetStackID::Value ID) const override;
TargetStackID::Value getStackIDForScalableVectors() const override;
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
new file mode 100644
index 000000000000..d47bd739235f
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -0,0 +1,475 @@
+//===- RISCVGatherScatterLowering.cpp - Gather/Scatter lowering -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass custom lowers llvm.gather and llvm.scatter instructions to
+// RISCV intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-gather-scatter-lowering"
+
+namespace {
+
+class RISCVGatherScatterLowering : public FunctionPass {
+ const RISCVSubtarget *ST = nullptr;
+ const RISCVTargetLowering *TLI = nullptr;
+ LoopInfo *LI = nullptr;
+ const DataLayout *DL = nullptr;
+
+ SmallVector<WeakTrackingVH> MaybeDeadPHIs;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ RISCVGatherScatterLowering() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+
+ StringRef getPassName() const override {
+ return "RISCV gather/scatter lowering";
+ }
+
+private:
+ bool isLegalTypeAndAlignment(Type *DataType, Value *AlignOp);
+
+ bool tryCreateStridedLoadStore(IntrinsicInst *II, Type *DataType, Value *Ptr,
+ Value *AlignOp);
+
+ std::pair<Value *, Value *> determineBaseAndStride(GetElementPtrInst *GEP,
+ IRBuilder<> &Builder);
+
+ bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride,
+ PHINode *&BasePtr, BinaryOperator *&Inc,
+ IRBuilder<> &Builder);
+};
+
+} // end anonymous namespace
+
+char RISCVGatherScatterLowering::ID = 0;
+
+INITIALIZE_PASS(RISCVGatherScatterLowering, DEBUG_TYPE,
+ "RISCV gather/scatter lowering pass", false, false)
+
+FunctionPass *llvm::createRISCVGatherScatterLoweringPass() {
+ return new RISCVGatherScatterLowering();
+}
+
+bool RISCVGatherScatterLowering::isLegalTypeAndAlignment(Type *DataType,
+ Value *AlignOp) {
+ Type *ScalarType = DataType->getScalarType();
+ if (!TLI->isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ MaybeAlign MA = cast<ConstantInt>(AlignOp)->getMaybeAlignValue();
+ if (MA && MA->value() < DL->getTypeStoreSize(ScalarType).getFixedSize())
+ return false;
+
+ // FIXME: Let the backend type legalize by splitting/widening?
+ EVT DataVT = TLI->getValueType(*DL, DataType);
+ if (!TLI->isTypeLegal(DataVT))
+ return false;
+
+ return true;
+}
+
+// TODO: Should we consider the mask when looking for a stride?
+static std::pair<Value *, Value *> matchStridedConstant(Constant *StartC) {
+ unsigned NumElts = cast<FixedVectorType>(StartC->getType())->getNumElements();
+
+ // Check that the start value is a strided constant.
+ auto *StartVal =
+ dyn_cast_or_null<ConstantInt>(StartC->getAggregateElement((unsigned)0));
+ if (!StartVal)
+ return std::make_pair(nullptr, nullptr);
+ APInt StrideVal(StartVal->getValue().getBitWidth(), 0);
+ ConstantInt *Prev = StartVal;
+ for (unsigned i = 1; i != NumElts; ++i) {
+ auto *C = dyn_cast_or_null<ConstantInt>(StartC->getAggregateElement(i));
+ if (!C)
+ return std::make_pair(nullptr, nullptr);
+
+ APInt LocalStride = C->getValue() - Prev->getValue();
+ if (i == 1)
+ StrideVal = LocalStride;
+ else if (StrideVal != LocalStride)
+ return std::make_pair(nullptr, nullptr);
+
+ Prev = C;
+ }
+
+ Value *Stride = ConstantInt::get(StartVal->getType(), StrideVal);
+
+ return std::make_pair(StartVal, Stride);
+}
+
+// Recursively, walk about the use-def chain until we find a Phi with a strided
+// start value. Build and update a scalar recurrence as we unwind the recursion.
+// We also update the Stride as we unwind. Our goal is to move all of the
+// arithmetic out of the loop.
+bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
+ Value *&Stride,
+ PHINode *&BasePtr,
+ BinaryOperator *&Inc,
+ IRBuilder<> &Builder) {
+ // Our base case is a Phi.
+ if (auto *Phi = dyn_cast<PHINode>(Index)) {
+ // A phi node we want to perform this function on should be from the
+ // loop header.
+ if (Phi->getParent() != L->getHeader())
+ return false;
+
+ Value *Step, *Start;
+ if (!matchSimpleRecurrence(Phi, Inc, Start, Step) ||
+ Inc->getOpcode() != Instruction::Add)
+ return false;
+ assert(Phi->getNumIncomingValues() == 2 && "Expected 2 operand phi.");
+ unsigned IncrementingBlock = Phi->getIncomingValue(0) == Inc ? 0 : 1;
+ assert(Phi->getIncomingValue(IncrementingBlock) == Inc &&
+ "Expected one operand of phi to be Inc");
+
+ // Only proceed if the step is loop invariant.
+ if (!L->isLoopInvariant(Step))
+ return false;
+
+ // Step should be a splat.
+ Step = getSplatValue(Step);
+ if (!Step)
+ return false;
+
+ // Start should be a strided constant.
+ auto *StartC = dyn_cast<Constant>(Start);
+ if (!StartC)
+ return false;
+
+ std::tie(Start, Stride) = matchStridedConstant(StartC);
+ if (!Start)
+ return false;
+ assert(Stride != nullptr);
+
+ // Build scalar phi and increment.
+ BasePtr =
+ PHINode::Create(Start->getType(), 2, Phi->getName() + ".scalar", Phi);
+ Inc = BinaryOperator::CreateAdd(BasePtr, Step, Inc->getName() + ".scalar",
+ Inc);
+ BasePtr->addIncoming(Start, Phi->getIncomingBlock(1 - IncrementingBlock));
+ BasePtr->addIncoming(Inc, Phi->getIncomingBlock(IncrementingBlock));
+
+ // Note that this Phi might be eligible for removal.
+ MaybeDeadPHIs.push_back(Phi);
+ return true;
+ }
+
+ // Otherwise look for binary operator.
+ auto *BO = dyn_cast<BinaryOperator>(Index);
+ if (!BO)
+ return false;
+
+ if (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Or &&
+ BO->getOpcode() != Instruction::Mul &&
+ BO->getOpcode() != Instruction::Shl)
+ return false;
+
+ // Only support shift by constant.
+ if (BO->getOpcode() == Instruction::Shl && !isa<Constant>(BO->getOperand(1)))
+ return false;
+
+ // We need to be able to treat Or as Add.
+ if (BO->getOpcode() == Instruction::Or &&
+ !haveNoCommonBitsSet(BO->getOperand(0), BO->getOperand(1), *DL))
+ return false;
+
+ // We should have one operand in the loop and one splat.
+ Value *OtherOp;
+ if (isa<Instruction>(BO->getOperand(0)) &&
+ L->contains(cast<Instruction>(BO->getOperand(0)))) {
+ Index = cast<Instruction>(BO->getOperand(0));
+ OtherOp = BO->getOperand(1);
+ } else if (isa<Instruction>(BO->getOperand(1)) &&
+ L->contains(cast<Instruction>(BO->getOperand(1)))) {
+ Index = cast<Instruction>(BO->getOperand(1));
+ OtherOp = BO->getOperand(0);
+ } else {
+ return false;
+ }
+
+ // Make sure other op is loop invariant.
+ if (!L->isLoopInvariant(OtherOp))
+ return false;
+
+ // Make sure we have a splat.
+ Value *SplatOp = getSplatValue(OtherOp);
+ if (!SplatOp)
+ return false;
+
+ // Recurse up the use-def chain.
+ if (!matchStridedRecurrence(Index, L, Stride, BasePtr, Inc, Builder))
+ return false;
+
+ // Locate the Step and Start values from the recurrence.
+ unsigned StepIndex = Inc->getOperand(0) == BasePtr ? 1 : 0;
+ unsigned StartBlock = BasePtr->getOperand(0) == Inc ? 1 : 0;
+ Value *Step = Inc->getOperand(StepIndex);
+ Value *Start = BasePtr->getOperand(StartBlock);
+
+ // We need to adjust the start value in the preheader.
+ Builder.SetInsertPoint(
+ BasePtr->getIncomingBlock(StartBlock)->getTerminator());
+ Builder.SetCurrentDebugLocation(DebugLoc());
+
+ switch (BO->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case Instruction::Add:
+ case Instruction::Or: {
+ // An add only affects the start value. It's ok to do this for Or because
+ // we already checked that there are no common set bits.
+
+ // If the start value is Zero, just take the SplatOp.
+ if (isa<ConstantInt>(Start) && cast<ConstantInt>(Start)->isZero())
+ Start = SplatOp;
+ else
+ Start = Builder.CreateAdd(Start, SplatOp, "start");
+ BasePtr->setIncomingValue(StartBlock, Start);
+ break;
+ }
+ case Instruction::Mul: {
+ // If the start is zero we don't need to multiply.
+ if (!isa<ConstantInt>(Start) || !cast<ConstantInt>(Start)->isZero())
+ Start = Builder.CreateMul(Start, SplatOp, "start");
+
+ Step = Builder.CreateMul(Step, SplatOp, "step");
+
+ // If the Stride is 1 just take the SplatOpt.
+ if (isa<ConstantInt>(Stride) && cast<ConstantInt>(Stride)->isOne())
+ Stride = SplatOp;
+ else
+ Stride = Builder.CreateMul(Stride, SplatOp, "stride");
+ Inc->setOperand(StepIndex, Step);
+ BasePtr->setIncomingValue(StartBlock, Start);
+ break;
+ }
+ case Instruction::Shl: {
+ // If the start is zero we don't need to shift.
+ if (!isa<ConstantInt>(Start) || !cast<ConstantInt>(Start)->isZero())
+ Start = Builder.CreateShl(Start, SplatOp, "start");
+ Step = Builder.CreateShl(Step, SplatOp, "step");
+ Stride = Builder.CreateShl(Stride, SplatOp, "stride");
+ Inc->setOperand(StepIndex, Step);
+ BasePtr->setIncomingValue(StartBlock, Start);
+ break;
+ }
+ }
+
+ return true;
+}
+
+std::pair<Value *, Value *>
+RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
+ IRBuilder<> &Builder) {
+
+ SmallVector<Value *, 2> Ops(GEP->operands());
+
+ // Base pointer needs to be a scalar.
+ if (Ops[0]->getType()->isVectorTy())
+ return std::make_pair(nullptr, nullptr);
+
+ // Make sure we're in a loop and it is in loop simplify form.
+ Loop *L = LI->getLoopFor(GEP->getParent());
+ if (!L || !L->isLoopSimplifyForm())
+ return std::make_pair(nullptr, nullptr);
+
+ Optional<unsigned> VecOperand;
+ unsigned TypeScale = 0;
+
+ // Look for a vector operand and scale.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ if (!Ops[i]->getType()->isVectorTy())
+ continue;
+
+ if (VecOperand)
+ return std::make_pair(nullptr, nullptr);
+
+ VecOperand = i;
+
+ TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType());
+ if (TS.isScalable())
+ return std::make_pair(nullptr, nullptr);
+
+ TypeScale = TS.getFixedSize();
+ }
+
+ // We need to find a vector index to simplify.
+ if (!VecOperand)
+ return std::make_pair(nullptr, nullptr);
+
+ // We can't extract the stride if the arithmetic is done at a different size
+ // than the pointer type. Adding the stride later may not wrap correctly.
+ // Technically we could handle wider indices, but I don't expect that in
+ // practice.
+ Value *VecIndex = Ops[*VecOperand];
+ Type *VecIntPtrTy = DL->getIntPtrType(GEP->getType());
+ if (VecIndex->getType() != VecIntPtrTy)
+ return std::make_pair(nullptr, nullptr);
+
+ Value *Stride;
+ BinaryOperator *Inc;
+ PHINode *BasePhi;
+ if (!matchStridedRecurrence(VecIndex, L, Stride, BasePhi, Inc, Builder))
+ return std::make_pair(nullptr, nullptr);
+
+ assert(BasePhi->getNumIncomingValues() == 2 && "Expected 2 operand phi.");
+ unsigned IncrementingBlock = BasePhi->getOperand(0) == Inc ? 0 : 1;
+ assert(BasePhi->getIncomingValue(IncrementingBlock) == Inc &&
+ "Expected one operand of phi to be Inc");
+
+ Builder.SetInsertPoint(GEP);
+
+ // Replace the vector index with the scalar phi and build a scalar GEP.
+ Ops[*VecOperand] = BasePhi;
+ Type *SourceTy = GEP->getSourceElementType();
+ Value *BasePtr =
+ Builder.CreateGEP(SourceTy, Ops[0], makeArrayRef(Ops).drop_front());
+
+ // Cast the GEP to an i8*.
+ LLVMContext &Ctx = GEP->getContext();
+ Type *I8PtrTy =
+ Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
+ if (BasePtr->getType() != I8PtrTy)
+ BasePtr = Builder.CreatePointerCast(BasePtr, I8PtrTy);
+
+ // Final adjustments to stride should go in the start block.
+ Builder.SetInsertPoint(
+ BasePhi->getIncomingBlock(1 - IncrementingBlock)->getTerminator());
+
+ // Convert stride to pointer size if needed.
+ Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
+ assert(Stride->getType() == IntPtrTy && "Unexpected type");
+
+ // Scale the stride by the size of the indexed type.
+ if (TypeScale != 1)
+ Stride = Builder.CreateMul(Stride, ConstantInt::get(IntPtrTy, TypeScale));
+
+ return std::make_pair(BasePtr, Stride);
+}
+
+bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
+ Type *DataType,
+ Value *Ptr,
+ Value *AlignOp) {
+ // Make sure the operation will be supported by the backend.
+ if (!isLegalTypeAndAlignment(DataType, AlignOp))
+ return false;
+
+ // Pointer should be a GEP.
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return false;
+
+ IRBuilder<> Builder(GEP);
+
+ Value *BasePtr, *Stride;
+ std::tie(BasePtr, Stride) = determineBaseAndStride(GEP, Builder);
+ if (!BasePtr)
+ return false;
+ assert(Stride != nullptr);
+
+ Builder.SetInsertPoint(II);
+
+ CallInst *Call;
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ Call = Builder.CreateIntrinsic(
+ Intrinsic::riscv_masked_strided_load,
+ {DataType, BasePtr->getType(), Stride->getType()},
+ {II->getArgOperand(3), BasePtr, Stride, II->getArgOperand(2)});
+ else
+ Call = Builder.CreateIntrinsic(
+ Intrinsic::riscv_masked_strided_store,
+ {DataType, BasePtr->getType(), Stride->getType()},
+ {II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3)});
+
+ Call->takeName(II);
+ II->replaceAllUsesWith(Call);
+ II->eraseFromParent();
+
+ if (GEP->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+
+ return true;
+}
+
+bool RISCVGatherScatterLowering::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<RISCVTargetMachine>();
+ ST = &TM.getSubtarget<RISCVSubtarget>(F);
+ if (!ST->hasVInstructions() || !ST->useRVVForFixedLengthVectors())
+ return false;
+
+ TLI = ST->getTargetLowering();
+ DL = &F.getParent()->getDataLayout();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ SmallVector<IntrinsicInst *, 4> Gathers;
+ SmallVector<IntrinsicInst *, 4> Scatters;
+
+ bool Changed = false;
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather &&
+ isa<FixedVectorType>(II->getType())) {
+ Gathers.push_back(II);
+ } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter &&
+ isa<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ Scatters.push_back(II);
+ }
+ }
+ }
+
+ // Rewrite gather/scatter to form strided load/store if possible.
+ for (auto *II : Gathers)
+ Changed |= tryCreateStridedLoadStore(
+ II, II->getType(), II->getArgOperand(0), II->getArgOperand(1));
+ for (auto *II : Scatters)
+ Changed |=
+ tryCreateStridedLoadStore(II, II->getArgOperand(0)->getType(),
+ II->getArgOperand(1), II->getArgOperand(2));
+
+ // Remove any dead phis.
+ while (!MaybeDeadPHIs.empty()) {
+ if (auto *Phi = dyn_cast_or_null<PHINode>(MaybeDeadPHIs.pop_back_val()))
+ RecursivelyDeleteDeadPHINode(Phi);
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9866567ac1ee..66a34d73dd37 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -108,7 +108,21 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
- doPeepholeLoadStoreADDI();
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ MadeChange |= doPeepholeSExtW(N);
+ MadeChange |= doPeepholeLoadStoreADDI(N);
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
@@ -126,6 +140,9 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
else if (Inst.Opc == RISCV::ADDUW)
Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg,
CurDAG->getRegister(RISCV::X0, XLenVT));
+ else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
+ Inst.Opc == RISCV::SH3ADD)
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
else
Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
@@ -199,7 +216,7 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
- MVT *IndexVT) {
+ bool IsLoad, MVT *IndexVT) {
SDValue Chain = Node->getOperand(0);
SDValue Glue;
@@ -228,6 +245,14 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
Operands.push_back(SEWOp);
+ // Masked load has the tail policy argument.
+ if (IsMasked && IsLoad) {
+ // Policy must be a constant.
+ uint64_t Policy = Node->getConstantOperandVal(CurOp++);
+ SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
+ Operands.push_back(PolicyOp);
+ }
+
Operands.push_back(Chain); // Chain.
if (Glue)
Operands.push_back(Glue);
@@ -252,7 +277,7 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
}
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
- Operands);
+ Operands, /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
@@ -293,7 +318,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
}
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ false, Operands);
+ /*IsStridedOrIndexed*/ false, Operands,
+ /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
@@ -338,7 +364,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ true, Operands, &IndexVT);
+ /*IsStridedOrIndexed*/ true, Operands,
+ /*IsLoad=*/true, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -415,7 +442,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ true, Operands, &IndexVT);
+ /*IsStridedOrIndexed*/ true, Operands,
+ /*IsLoad=*/false, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -453,14 +481,24 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
case ISD::Constant: {
auto *ConstNode = cast<ConstantSDNode>(Node);
- if (VT == XLenVT && ConstNode->isNullValue()) {
+ if (VT == XLenVT && ConstNode->isZero()) {
SDValue New =
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
- ReplaceNode(Node,
- selectImm(CurDAG, DL, ConstNode->getSExtValue(), *Subtarget));
+ int64_t Imm = ConstNode->getSExtValue();
+ // If the upper XLen-16 bits are not used, try to convert this to a simm12
+ // by sign extending bit 15.
+ if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&
+ hasAllHUsers(Node))
+ Imm = SignExtend64(Imm, 16);
+ // If the upper 32-bits are not used try to convert this into a simm32 by
+ // sign extending bit 32.
+ if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
+ Imm = SignExtend64(Imm, 32);
+
+ ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget));
return;
}
case ISD::FrameIndex: {
@@ -591,7 +629,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
}
- // Turn (and (shl x, c2) c1) -> (srli (slli c2+c3), c3) if c1 is a mask
+ // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
// shifted by c2 bits with c3 leading zeros.
if (LeftShift && isShiftedMask_64(C1)) {
uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
@@ -621,6 +659,63 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
}
+ // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
+ // shifted mask with c2 leading zeros and c3 trailing zeros.
+ if (!LeftShift && isShiftedMask_64(C1)) {
+ uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
+ uint64_t C3 = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
+ if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
+ OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+
+ // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
+ // shifted mask with no leading zeros and c3 trailing zeros.
+ if (LeftShift && isShiftedMask_64(C1)) {
+ uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
+ uint64_t C3 = countTrailingZeros(C1);
+ if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
+ if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
@@ -713,7 +808,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
MVT Src1VT = Src1.getSimpleValueType();
- unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOTOpcode;
+ unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode;
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
@@ -766,31 +861,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
llvm_unreachable("Unexpected LMUL!");
case RISCVII::VLMUL::LMUL_F8:
VMXOROpcode = RISCV::PseudoVMXOR_MM_MF8;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF8;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF8;
break;
case RISCVII::VLMUL::LMUL_F4:
VMXOROpcode = RISCV::PseudoVMXOR_MM_MF4;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF4;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF4;
break;
case RISCVII::VLMUL::LMUL_F2:
VMXOROpcode = RISCV::PseudoVMXOR_MM_MF2;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF2;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF2;
break;
case RISCVII::VLMUL::LMUL_1:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M1;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M1;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M1;
break;
case RISCVII::VLMUL::LMUL_2:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M2;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M2;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M2;
break;
case RISCVII::VLMUL::LMUL_4:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M4;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M4;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M4;
break;
case RISCVII::VLMUL::LMUL_8:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M8;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M8;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M8;
break;
}
SDValue SEW = CurDAG->getTargetConstant(
@@ -801,13 +896,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue MaskedOff = Node->getOperand(1);
SDValue Mask = Node->getOperand(4);
// If the MaskedOff value and the Mask are the same value use
- // vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt
+ // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
// This avoids needing to copy v0 to vd before starting the next sequence.
if (Mask == MaskedOff) {
SDValue Cmp = SDValue(
CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
0);
- ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOTOpcode, DL, VT,
+ ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
{Mask, Cmp, VL, MaskSEW}));
return;
}
@@ -840,7 +935,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax: {
- if (!Subtarget->hasStdExtV())
+ if (!Subtarget->hasVInstructions())
break;
bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
@@ -859,8 +954,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
SDValue VLOperand;
+ unsigned Opcode = RISCV::PseudoVSETVLI;
if (VLMax) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+ Opcode = RISCV::PseudoVSETVLIX0;
} else {
VLOperand = Node->getOperand(2);
@@ -878,7 +975,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
ReplaceNode(Node,
- CurDAG->getMachineNode(RISCV::PseudoVSETVLI, DL, XLenVT,
+ CurDAG->getMachineNode(Opcode, DL, XLenVT,
MVT::Other, VLOperand, VTypeIOp,
/* Chain */ Node->getOperand(0)));
return;
@@ -999,7 +1096,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ true, Operands,
- &IndexVT);
+ /*IsLoad=*/true, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -1019,7 +1116,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Load);
return;
}
- case Intrinsic::riscv_vle1:
+ case Intrinsic::riscv_vlm:
case Intrinsic::riscv_vle:
case Intrinsic::riscv_vle_mask:
case Intrinsic::riscv_vlse:
@@ -1038,7 +1135,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Operands.push_back(Node->getOperand(CurOp++));
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
- Operands);
+ Operands, /*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
@@ -1066,7 +1163,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Operands.push_back(Node->getOperand(CurOp++));
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ false, Operands);
+ /*IsStridedOrIndexed*/ false, Operands,
+ /*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
@@ -1188,7 +1286,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ true, Operands,
- &IndexVT);
+ /*IsLoad=*/false, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -1208,7 +1306,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Store);
return;
}
- case Intrinsic::riscv_vse1:
+ case Intrinsic::riscv_vsm:
case Intrinsic::riscv_vse:
case Intrinsic::riscv_vse_mask:
case Intrinsic::riscv_vsse:
@@ -1496,6 +1594,97 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
return false;
}
+// Return true if all users of this SDNode* only consume the lower \p Bits.
+// This can be used to form W instructions for add/sub/mul/shl even when the
+// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
+// SimplifyDemandedBits has made it so some users see a sext_inreg and some
+// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
+// the add/sub/mul/shl to become non-W instructions. By checking the users we
+// may be able to use a W instruction and CSE with the other instruction if
+// this has happened. We could try to detect that the CSE opportunity exists
+// before doing this, but that would be more complicated.
+// TODO: Does this need to look through AND/OR/XOR to their users to find more
+// opportunities.
+bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
+ assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
+ Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
+ Node->getOpcode() == ISD::SRL ||
+ Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ isa<ConstantSDNode>(Node)) &&
+ "Unexpected opcode");
+
+ for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ // Users of this node should have already been instruction selected
+ if (!User->isMachineOpcode())
+ return false;
+
+ // TODO: Add more opcodes?
+ switch (User->getMachineOpcode()) {
+ default:
+ return false;
+ case RISCV::ADDW:
+ case RISCV::ADDIW:
+ case RISCV::SUBW:
+ case RISCV::MULW:
+ case RISCV::SLLW:
+ case RISCV::SLLIW:
+ case RISCV::SRAW:
+ case RISCV::SRAIW:
+ case RISCV::SRLW:
+ case RISCV::SRLIW:
+ case RISCV::DIVW:
+ case RISCV::DIVUW:
+ case RISCV::REMW:
+ case RISCV::REMUW:
+ case RISCV::ROLW:
+ case RISCV::RORW:
+ case RISCV::RORIW:
+ case RISCV::CLZW:
+ case RISCV::CTZW:
+ case RISCV::CPOPW:
+ case RISCV::SLLIUW:
+ case RISCV::FCVT_H_W:
+ case RISCV::FCVT_H_WU:
+ case RISCV::FCVT_S_W:
+ case RISCV::FCVT_S_WU:
+ case RISCV::FCVT_D_W:
+ case RISCV::FCVT_D_WU:
+ if (Bits < 32)
+ return false;
+ break;
+ case RISCV::SLLI:
+ // SLLI only uses the lower (XLen - ShAmt) bits.
+ if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
+ return false;
+ break;
+ case RISCV::ADDUW:
+ case RISCV::SH1ADDUW:
+ case RISCV::SH2ADDUW:
+ case RISCV::SH3ADDUW:
+ // The first operand to add.uw/shXadd.uw is implicitly zero extended from
+ // 32 bits.
+ if (UI.getOperandNo() != 0 || Bits < 32)
+ return false;
+ break;
+ case RISCV::SB:
+ if (UI.getOperandNo() != 0 || Bits < 8)
+ return false;
+ break;
+ case RISCV::SH:
+ if (UI.getOperandNo() != 0 || Bits < 16)
+ return false;
+ break;
+ case RISCV::SW:
+ if (UI.getOperandNo() != 0 || Bits < 32)
+ return false;
+ break;
+ }
+ }
+
+ return true;
+}
+
// Select VL as a 5 bit immediate or a value that will become a register. This
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
@@ -1609,113 +1798,162 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
// This is possible when off1+off2 fits a 12-bit immediate.
-void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
- SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
- ++Position;
+bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
+ int OffsetOpIdx;
+ int BaseOpIdx;
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- // Skip dead nodes and any non-machine opcodes.
- if (N->use_empty() || !N->isMachineOpcode())
- continue;
+ // Only attempt this optimisation for I-type loads and S-type stores.
+ switch (N->getMachineOpcode()) {
+ default:
+ return false;
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ BaseOpIdx = 0;
+ OffsetOpIdx = 1;
+ break;
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ BaseOpIdx = 1;
+ OffsetOpIdx = 2;
+ break;
+ }
- int OffsetOpIdx;
- int BaseOpIdx;
+ if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
+ return false;
- // Only attempt this optimisation for I-type loads and S-type stores.
- switch (N->getMachineOpcode()) {
- default:
- continue;
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- BaseOpIdx = 0;
- OffsetOpIdx = 1;
- break;
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD:
- BaseOpIdx = 1;
- OffsetOpIdx = 2;
- break;
- }
+ SDValue Base = N->getOperand(BaseOpIdx);
- if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
- continue;
+ // If the base is an ADDI, we can merge it in to the load/store.
+ if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
+ return false;
- SDValue Base = N->getOperand(BaseOpIdx);
+ SDValue ImmOperand = Base.getOperand(1);
+ uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
- // If the base is an ADDI, we can merge it in to the load/store.
- if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
- continue;
+ if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
+ int64_t Offset1 = Const->getSExtValue();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ if (!isInt<12>(CombinedOffset))
+ return false;
+ ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
+ ImmOperand.getValueType());
+ } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
+ // If the off1 in (addi base, off1) is a global variable's address (its
+ // low part, really), then we can rely on the alignment of that variable
+ // to provide a margin of safety before off1 can overflow the 12 bits.
+ // Check if off2 falls within that margin; if so off1+off2 can't overflow.
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
+ if (Offset2 != 0 && Alignment <= Offset2)
+ return false;
+ int64_t Offset1 = GA->getOffset();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ ImmOperand = CurDAG->getTargetGlobalAddress(
+ GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
+ CombinedOffset, GA->getTargetFlags());
+ } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
+ // Ditto.
+ Align Alignment = CP->getAlign();
+ if (Offset2 != 0 && Alignment <= Offset2)
+ return false;
+ int64_t Offset1 = CP->getOffset();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ ImmOperand = CurDAG->getTargetConstantPool(
+ CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
+ CombinedOffset, CP->getTargetFlags());
+ } else {
+ return false;
+ }
- SDValue ImmOperand = Base.getOperand(1);
- uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
-
- if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
- int64_t Offset1 = Const->getSExtValue();
- int64_t CombinedOffset = Offset1 + Offset2;
- if (!isInt<12>(CombinedOffset))
- continue;
- ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
- ImmOperand.getValueType());
- } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
- // If the off1 in (addi base, off1) is a global variable's address (its
- // low part, really), then we can rely on the alignment of that variable
- // to provide a margin of safety before off1 can overflow the 12 bits.
- // Check if off2 falls within that margin; if so off1+off2 can't overflow.
- const DataLayout &DL = CurDAG->getDataLayout();
- Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
- if (Offset2 != 0 && Alignment <= Offset2)
- continue;
- int64_t Offset1 = GA->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetGlobalAddress(
- GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
- CombinedOffset, GA->getTargetFlags());
- } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
- // Ditto.
- Align Alignment = CP->getAlign();
- if (Offset2 != 0 && Alignment <= Offset2)
- continue;
- int64_t Offset1 = CP->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetConstantPool(
- CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
- CombinedOffset, CP->getTargetFlags());
- } else {
- continue;
+ LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ LLVM_DEBUG(Base->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\nN: ");
+ LLVM_DEBUG(N->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\n");
+
+ // Modify the offset operand of the load/store.
+ if (BaseOpIdx == 0) // Load
+ CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
+ N->getOperand(2));
+ else // Store
+ CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
+ ImmOperand, N->getOperand(3));
+
+ return true;
+}
+
+// Try to remove sext.w if the input is a W instruction or can be made into
+// a W instruction cheaply.
+bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
+ // Look for the sext.w pattern, addiw rd, rs1, 0.
+ if (N->getMachineOpcode() != RISCV::ADDIW ||
+ !isNullConstant(N->getOperand(1)))
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ if (!N0.isMachineOpcode())
+ return false;
+
+ switch (N0.getMachineOpcode()) {
+ default:
+ break;
+ case RISCV::ADD:
+ case RISCV::ADDI:
+ case RISCV::SUB:
+ case RISCV::MUL:
+ case RISCV::SLLI: {
+ // Convert sext.w+add/sub/mul to their W instructions. This will create
+ // a new independent instruction. This improves latency.
+ unsigned Opc;
+ switch (N0.getMachineOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case RISCV::ADD: Opc = RISCV::ADDW; break;
+ case RISCV::ADDI: Opc = RISCV::ADDIW; break;
+ case RISCV::SUB: Opc = RISCV::SUBW; break;
+ case RISCV::MUL: Opc = RISCV::MULW; break;
+ case RISCV::SLLI: Opc = RISCV::SLLIW; break;
}
- LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
- LLVM_DEBUG(Base->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\nN: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
-
- // Modify the offset operand of the load/store.
- if (BaseOpIdx == 0) // Load
- CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
- N->getOperand(2));
- else // Store
- CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
- ImmOperand, N->getOperand(3));
-
- // The add-immediate may now be dead, in which case remove it.
- if (Base.getNode()->use_empty())
- CurDAG->RemoveDeadNode(Base.getNode());
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ // Shift amount needs to be uimm5.
+ if (N0.getMachineOpcode() == RISCV::SLLI &&
+ !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
+ break;
+
+ SDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
+ N00, N01);
+ ReplaceUses(N, Result);
+ return true;
+ }
+ case RISCV::ADDW:
+ case RISCV::ADDIW:
+ case RISCV::SUBW:
+ case RISCV::MULW:
+ case RISCV::SLLIW:
+ // Result is already sign extended just remove the sext.w.
+ // NOTE: We only handle the nodes that are selected with hasAllWUsers.
+ ReplaceUses(N, N0.getNode());
+ return true;
}
+
+ return false;
}
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 56d072206316..a2770089995d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -58,6 +58,10 @@ public:
bool selectSExti32(SDValue N, SDValue &Val);
bool selectZExti32(SDValue N, SDValue &Val);
+ bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
+ bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
+ bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
+
bool selectVLOp(SDValue N, SDValue &VL);
bool selectVSplat(SDValue N, SDValue &SplatVal);
@@ -75,7 +79,7 @@ public:
const SDLoc &DL, unsigned CurOp,
bool IsMasked, bool IsStridedOrIndexed,
SmallVectorImpl<SDValue> &Operands,
- MVT *IndexVT = nullptr);
+ bool IsLoad = false, MVT *IndexVT = nullptr);
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided);
void selectVLSEGFF(SDNode *Node, bool IsMasked);
@@ -83,11 +87,34 @@ public:
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
+ // Return the RISC-V condition code that matches the given DAG integer
+ // condition code. The CondCode must be one of those supported by the RISC-V
+ // ISA (see translateSetCCForBranch).
+ static RISCVCC::CondCode getRISCVCCForIntCC(ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported CondCode");
+ case ISD::SETEQ:
+ return RISCVCC::COND_EQ;
+ case ISD::SETNE:
+ return RISCVCC::COND_NE;
+ case ISD::SETLT:
+ return RISCVCC::COND_LT;
+ case ISD::SETGE:
+ return RISCVCC::COND_GE;
+ case ISD::SETULT:
+ return RISCVCC::COND_LTU;
+ case ISD::SETUGE:
+ return RISCVCC::COND_GEU;
+ }
+ }
+
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
private:
- void doPeepholeLoadStoreADDI();
+ bool doPeepholeLoadStoreADDI(SDNode *Node);
+ bool doPeepholeSExtW(SDNode *Node);
};
namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d37ed584d9d2..0f1a6e5f9154 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20,6 +20,7 @@
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,8 +29,9 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -107,7 +109,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const MVT::SimpleValueType F64VecVTs[] = {
MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
unsigned Size = VT.getSizeInBits().getKnownMinValue();
assert(Size <= 512 && isPowerOf2_32(Size));
@@ -126,18 +128,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : BoolVecVTs)
addRegClassForRVV(VT);
- for (MVT VT : IntVecVTs)
+ for (MVT VT : IntVecVTs) {
+ if (VT.getVectorElementType() == MVT::i64 &&
+ !Subtarget.hasVInstructionsI64())
+ continue;
addRegClassForRVV(VT);
+ }
- if (Subtarget.hasStdExtZfh())
+ if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasStdExtD())
+ if (Subtarget.hasVInstructionsF64())
for (MVT VT : F64VecVTs)
addRegClassForRVV(VT);
@@ -199,6 +205,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
+ } else {
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM()) {
@@ -299,14 +311,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
- ISD::CondCode FPCCToExpand[] = {
+ static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
- ISD::NodeType FPOpToExpand[] = {
- ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
- ISD::FP_TO_FP16};
+ static const ISD::NodeType FPOpToExpand[] = {
+ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
+ ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
@@ -325,6 +337,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f16, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
}
if (Subtarget.hasStdExtF()) {
@@ -376,6 +397,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtF()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
+
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
@@ -407,7 +431,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
@@ -426,14 +450,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
-
- static unsigned IntegerVPOps[] = {
- ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
- ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,
- ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
-
- static unsigned FloatingPointVPOps[] = {ISD::VP_FADD, ISD::VP_FSUB,
- ISD::VP_FMUL, ISD::VP_FDIV};
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
+ static const unsigned IntegerVPOps[] = {
+ ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
+ ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
+ ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
+ ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
+ ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
+ ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
+ ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN};
+
+ static const unsigned FloatingPointVPOps[] = {
+ ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
+ ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -449,6 +480,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
+
+ setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
@@ -471,6 +511,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
@@ -491,9 +535,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
for (MVT VT : IntVecVTs) {
+ if (VT.getVectorElementType() == MVT::i64 &&
+ !Subtarget.hasVInstructionsI64())
+ continue;
+
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
+ // Vectors implement MULHS/MULHU.
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
@@ -502,6 +554,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
// Custom-lower extensions and truncations from/to mask types.
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
@@ -551,6 +609,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
@@ -567,6 +630,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
+
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
+ // type that can represent the value exactly.
+ if (VT.getVectorElementType() != MVT::i64) {
+ MVT FloatEltVT =
+ VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
+ EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT)) {
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ }
+ }
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
@@ -576,7 +651,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
- ISD::CondCode VFPCCToExpand[] = {
+ static const ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
@@ -605,6 +680,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
setOperationAction(ISD::LOAD, VT, Custom);
@@ -615,6 +691,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -638,18 +719,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
- if (Subtarget.hasStdExtZfh())
+ if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
SetCommonVFPActions(VT);
for (MVT VT : F32VecVTs) {
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasVInstructionsF32())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
}
for (MVT VT : F64VecVTs) {
- if (Subtarget.hasStdExtD())
+ if (Subtarget.hasVInstructionsF64())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
@@ -695,6 +776,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
@@ -724,6 +809,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
@@ -769,6 +860,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
+
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
+ // type that can represent the value exactly.
+ if (VT.getVectorElementType() != MVT::i64) {
+ MVT FloatEltVT =
+ VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
+ EVT FloatVT =
+ MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT)) {
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ }
+ }
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -788,6 +892,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -798,6 +903,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
@@ -852,21 +963,23 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Jumps are expensive, compared to logic
setJumpIsExpensive();
- // We can use any register for comparisons
- setHasMultipleConditionRegisters();
-
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
setTargetDAGCombine(ISD::MSCATTER);
+ setTargetDAGCombine(ISD::VP_GATHER);
+ setTargetDAGCombine(ISD::VP_SCATTER);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::STORE);
}
}
@@ -875,7 +988,7 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
- if (Subtarget.hasStdExtV() &&
+ if (Subtarget.hasVInstructions() &&
(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
@@ -889,6 +1002,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
+ auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
default:
return false;
@@ -911,6 +1025,25 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::riscv_masked_strided_load:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.memVT = getValueType(DL, I.getType()->getScalarType());
+ Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::riscv_masked_strided_store:
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.memVT =
+ getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
+ Info.align = Align(
+ DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
+ 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOStore;
+ return true;
}
}
@@ -994,9 +1127,91 @@ bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasStdExtZbb();
}
+bool RISCVTargetLowering::hasAndNot(SDValue Y) const {
+ EVT VT = Y.getValueType();
+
+ // FIXME: Support vectors once we have tests.
+ if (VT.isVector())
+ return false;
+
+ return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
+}
+
+/// Check if sinking \p I's operands to I's basic block is profitable, because
+/// the operands can be folded into a target instruction, e.g.
+/// splats of scalars can fold into vector instructions.
+bool RISCVTargetLowering::shouldSinkOperands(
+ Instruction *I, SmallVectorImpl<Use *> &Ops) const {
+ using namespace llvm::PatternMatch;
+
+ if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
+ return false;
+
+ auto IsSinker = [&](Instruction *I, int Operand) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return true;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Operand == 1;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fma:
+ return Operand == 0 || Operand == 1;
+ default:
+ return false;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
+ };
+
+ for (auto OpIdx : enumerate(I->operands())) {
+ if (!IsSinker(I, OpIdx.index()))
+ continue;
+
+ Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
+ // Make sure we are not already sinking this operand
+ if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ // We are looking for a splat that can be sunk.
+ if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
+ m_Undef(), m_ZeroMask())))
+ continue;
+
+ // All uses of the shuffle should be sunk to avoid duplicating it across gpr
+ // and vector registers
+ for (Use &U : Op->uses()) {
+ Instruction *Insn = cast<Instruction>(U.getUser());
+ if (!IsSinker(Insn, U.getOperandNo()))
+ return false;
+ }
+
+ Ops.push_back(&Op->getOperandUse(0));
+ Ops.push_back(&OpIdx.value());
+ }
+ return true;
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
+ if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin())
return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
@@ -1016,9 +1231,9 @@ bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
- // end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
+ // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
+ // We might still end up using a GPR but that will be decided based on ABI.
+ if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -1027,9 +1242,9 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
- // end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
+ // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
+ // We might still end up using a GPR but that will be decided based on ABI.
+ if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -1068,28 +1283,6 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
}
-// Return the RISC-V branch opcode that matches the given DAG integer
-// condition code. The CondCode must be one of those supported by the RISC-V
-// ISA (see translateSetCCForBranch).
-static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
- switch (CC) {
- default:
- llvm_unreachable("Unsupported CondCode");
- case ISD::SETEQ:
- return RISCV::BEQ;
- case ISD::SETNE:
- return RISCV::BNE;
- case ISD::SETLT:
- return RISCV::BLT;
- case ISD::SETGE:
- return RISCV::BGE;
- case ISD::SETULT:
- return RISCV::BLTU;
- case ISD::SETUGE:
- return RISCV::BGEU;
- }
-}
-
RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
assert(VT.isScalableVector() && "Expecting a scalable vector type");
unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
@@ -1206,6 +1399,27 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
+bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
+ ScalarTy->isIntegerTy(32))
+ return true;
+
+ if (ScalarTy->isIntegerTy(64))
+ return Subtarget.hasVInstructionsI64();
+
+ if (ScalarTy->isHalfTy())
+ return Subtarget.hasVInstructionsF16();
+ if (ScalarTy->isFloatTy())
+ return Subtarget.hasVInstructionsF32();
+ if (ScalarTy->isDoubleTy())
+ return Subtarget.hasVInstructionsF64();
+
+ return false;
+}
+
static bool useRVVForFixedLengthVectorVT(MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
@@ -1221,8 +1435,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+ MVT EltVT = VT.getVectorElementType();
+
// Don't use RVV for vectors we cannot scalarize if required.
- switch (VT.getVectorElementType().SimpleTy) {
+ switch (EltVT.SimpleTy) {
// i1 is supported but has different rules.
default:
return false;
@@ -1235,22 +1451,29 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
case MVT::i8:
case MVT::i16:
case MVT::i32:
+ break;
case MVT::i64:
+ if (!Subtarget.hasVInstructionsI64())
+ return false;
break;
case MVT::f16:
- if (!Subtarget.hasStdExtZfh())
+ if (!Subtarget.hasVInstructionsF16())
return false;
break;
case MVT::f32:
- if (!Subtarget.hasStdExtF())
+ if (!Subtarget.hasVInstructionsF32())
return false;
break;
case MVT::f64:
- if (!Subtarget.hasStdExtD())
+ if (!Subtarget.hasVInstructionsF64())
return false;
break;
}
+ // Reject elements larger than ELEN.
+ if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
+ return false;
+
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
// Don't use RVV for types that don't fit.
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
@@ -1277,6 +1500,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+ unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
@@ -1291,10 +1515,12 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
case MVT::f32:
case MVT::f64: {
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
- // narrower types, but we can't have a fractional LMUL with demoninator less
- // than 64/SEW.
+ // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
+ // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
unsigned NumElts =
- divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
+ (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
+ NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
+ assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
@@ -1344,7 +1570,7 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
- : DAG.getRegister(RISCV::X0, XLenVT);
+ : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
return {Mask, VL};
@@ -1379,6 +1605,32 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
return false;
}
+static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
+ // RISCV FP-to-int conversions saturate to the destination register size, but
+ // don't produce 0 for nan. We can use a conversion instruction and fix the
+ // nan case with a compare and a select.
+ SDValue Src = Op.getOperand(0);
+
+ EVT DstVT = Op.getValueType();
+ EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
+ unsigned Opc;
+ if (SatVT == DstVT)
+ Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
+ else if (DstVT == MVT::i64 && SatVT == MVT::i32)
+ Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
+ else
+ return SDValue();
+ // FIXME: Support other SatVTs by clamping before or after the conversion.
+
+ SDLoc DL(Op);
+ SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
+
+ SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
+ return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+}
+
static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -1397,13 +1649,18 @@ static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
}
struct VIDSequence {
- int64_t Step;
+ int64_t StepNumerator;
+ unsigned StepDenominator;
int64_t Addend;
};
// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
// to the (non-zero) step S and start value X. This can be then lowered as the
// RVV sequence (VID * S) + X, for example.
+// The step S is represented as an integer numerator divided by a positive
+// denominator. Note that the implementation currently only identifies
+// sequences in which either the numerator is +/- 1 or the denominator is 1. It
+// cannot detect 2/3, for example.
// Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for.
@@ -1413,7 +1670,8 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (!Op.getValueType().isInteger())
return None;
- Optional<int64_t> SeqStep, SeqAddend;
+ Optional<unsigned> SeqStepDenom;
+ Optional<int64_t> SeqStepNum, SeqAddend;
Optional<std::pair<uint64_t, unsigned>> PrevElt;
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
@@ -1431,26 +1689,40 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (PrevElt) {
// Calculate the step since the last non-undef element, and ensure
// it's consistent across the entire sequence.
- int64_t Diff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
- // The difference must cleanly divide the element span.
- if (Diff % (Idx - PrevElt->second) != 0)
- return None;
- int64_t Step = Diff / (Idx - PrevElt->second);
- // A zero step indicates we're either a not an index sequence, or we
- // have a fractional step. This must be handled by a more complex
- // pattern recognition (undefs complicate things here).
- if (Step == 0)
- return None;
- if (!SeqStep)
- SeqStep = Step;
- else if (Step != SeqStep)
- return None;
+ unsigned IdxDiff = Idx - PrevElt->second;
+ int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
+
+ // A zero-value value difference means that we're somewhere in the middle
+ // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
+ // step change before evaluating the sequence.
+ if (ValDiff != 0) {
+ int64_t Remainder = ValDiff % IdxDiff;
+ // Normalize the step if it's greater than 1.
+ if (Remainder != ValDiff) {
+ // The difference must cleanly divide the element span.
+ if (Remainder != 0)
+ return None;
+ ValDiff /= IdxDiff;
+ IdxDiff = 1;
+ }
+
+ if (!SeqStepNum)
+ SeqStepNum = ValDiff;
+ else if (ValDiff != SeqStepNum)
+ return None;
+
+ if (!SeqStepDenom)
+ SeqStepDenom = IdxDiff;
+ else if (IdxDiff != *SeqStepDenom)
+ return None;
+ }
}
// Record and/or check any addend.
- if (SeqStep) {
- int64_t Addend =
- SignExtend64(Val - (Idx * (uint64_t)*SeqStep), EltSizeInBits);
+ if (SeqStepNum && SeqStepDenom) {
+ uint64_t ExpectedVal =
+ (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
+ int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
if (!SeqAddend)
SeqAddend = Addend;
else if (SeqAddend != Addend)
@@ -1458,14 +1730,15 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
}
// Record this non-undef element for later.
- PrevElt = std::make_pair(Val, Idx);
+ if (!PrevElt || PrevElt->first != Val)
+ PrevElt = std::make_pair(Val, Idx);
}
// We need to have logged both a step and an addend for this to count as
// a legal index sequence.
- if (!SeqStep || !SeqAddend)
+ if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
return None;
- return VIDSequence{*SeqStep, *SeqAddend};
+ return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
}
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@@ -1599,31 +1872,38 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
if (auto SimpleVID = isSimpleVIDSequence(Op)) {
- int64_t Step = SimpleVID->Step;
+ int64_t StepNumerator = SimpleVID->StepNumerator;
+ unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend;
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
// threshold since it's the immediate value many RVV instructions accept.
- if (isInt<5>(Step) && isInt<5>(Addend)) {
+ if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
+ isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
// these, we'd lose the fixed-length vector info and generate worse
// vsetvli code.
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
- assert(Step != 0 && "Invalid step");
+ assert(StepNumerator != 0 && "Invalid step");
bool Negate = false;
- if (Step != 1) {
- int64_t SplatStepVal = Step;
+ if (StepNumerator != 1) {
+ int64_t SplatStepVal = StepNumerator;
unsigned Opcode = ISD::MUL;
- if (isPowerOf2_64(std::abs(Step))) {
- Negate = Step < 0;
+ if (isPowerOf2_64(std::abs(StepNumerator))) {
+ Negate = StepNumerator < 0;
Opcode = ISD::SHL;
- SplatStepVal = Log2_64(std::abs(Step));
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
}
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
}
+ if (StepDenominator != 1) {
+ SDValue SplatStep = DAG.getSplatVector(
+ VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
+ VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
+ }
if (Addend != 0 || Negate) {
SDValue SplatAddend =
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
@@ -1704,6 +1984,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned NumUndefElts =
count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+ // Track the number of scalar loads we know we'd be inserting, estimated as
+ // any non-zero floating-point constant. Other kinds of element are either
+ // already in registers or are materialized on demand. The threshold at which
+ // a vector load is more desirable than several scalar materializion and
+ // vector-insertion instructions is not known.
+ unsigned NumScalarLoads = 0;
+
for (SDValue V : Op->op_values()) {
if (V.isUndef())
continue;
@@ -1711,6 +1998,9 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
+ NumScalarLoads += !CFP->isExactlyValue(+0.0);
+
// Is this value dominant? In case of a tie, prefer the highest element as
// it's cheaper to insert near the beginning of a vector than it is at the
// end.
@@ -1726,7 +2016,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Don't perform this optimization when optimizing for size, since
// materializing elements and inserting them tends to cause code bloat.
- if (!DAG.shouldOptForSize() &&
+ if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
((MostCommonCount > DominantValueCountThreshold) ||
(ValueCounts.size() <= Log2_32(NumDefElts)))) {
// Start by splatting the most common element.
@@ -1926,6 +2216,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
bool InvertMask = IsSelect == SwapOps;
+ // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
+ // half.
+ DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
+
// Now construct the mask that will be used by the vselect or blended
// vrgather operation. For vrgathers, construct the appropriate indices into
// each vector.
@@ -1940,6 +2234,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
GatherIndicesRHS.push_back(
IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
: DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
+ if (IsLHSOrUndefIndex && MaskIndex >= 0)
+ ++LHSIndexCounts[MaskIndex];
+ if (!IsLHSOrUndefIndex)
+ ++RHSIndexCounts[MaskIndex - NumElts];
}
}
@@ -1963,13 +2261,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
- unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
+ unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
+ unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
MVT IndexVT = VT.changeTypeToInteger();
// Since we can't introduce illegal index types at this stage, use i16 and
// vrgatherei16 if the corresponding index type for plain vrgather is greater
// than XLenVT.
if (IndexVT.getScalarType().bitsGT(XLenVT)) {
- GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
}
@@ -1982,28 +2281,48 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
} else {
- SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
- LHSIndices =
- convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
-
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
- Gather =
- DAG.getNode(GatherOpc, DL, ContainerVT, V1, LHSIndices, TrueMask, VL);
+ // If only one index is used, we can use a "splat" vrgather.
+ // TODO: We can splat the most-common index and fix-up any stragglers, if
+ // that's beneficial.
+ if (LHSIndexCounts.size() == 1) {
+ int SplatIndex = LHSIndexCounts.begin()->getFirst();
+ Gather =
+ DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
+ DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ } else {
+ SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
+ LHSIndices =
+ convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
+
+ Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
+ TrueMask, VL);
+ }
}
// If a second vector operand is used by this shuffle, blend it in with an
// additional vrgather.
if (!V2.isUndef()) {
+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+ // If only one index is used, we can use a "splat" vrgather.
+ // TODO: We can splat the most-common index and fix-up any stragglers, if
+ // that's beneficial.
+ if (RHSIndexCounts.size() == 1) {
+ int SplatIndex = RHSIndexCounts.begin()->getFirst();
+ V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
+ DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ } else {
+ SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
+ RHSIndices =
+ convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
+ V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
+ VL);
+ }
+
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SelectMask =
convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
- SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
- RHSIndices =
- convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
-
- V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
- V2 = DAG.getNode(GatherOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, VL);
Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
Gather, VL);
}
@@ -2026,6 +2345,57 @@ static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
}
+// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
+// the exponent.
+static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ SDValue Src = Op.getOperand(0);
+ SDLoc DL(Op);
+
+ // We need a FP type that can represent the value.
+ // TODO: Use f16 for i8 when possible?
+ MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
+ MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
+
+ // Legal types should have been checked in the RISCVTargetLowering
+ // constructor.
+ // TODO: Splitting may make sense in some cases.
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
+ "Expected legal float type!");
+
+ // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
+ // The trailing zero count is equal to log2 of this single bit value.
+ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
+ SDValue Neg =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+ Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
+ }
+
+ // We have a legal FP type, convert to it.
+ SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
+ // Bitcast to integer and shift the exponent to the LSB.
+ EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
+ SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
+ unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT));
+ // Truncate back to original type to allow vnsrl.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
+ // The exponent contains log2 of the value in biased form.
+ unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
+
+ // For trailing zeros, we just need to subtract the bias.
+ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ return DAG.getNode(ISD::SUB, DL, VT, Trunc,
+ DAG.getConstant(ExponentBias, DL, VT));
+
+ // For leading zeros, we need to remove the bias and convert from log2 to
+ // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
+ unsigned Adjust = ExponentBias + (EltSize - 1);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
+}
+
// While RVV has alignment restrictions, we should always be able to load as a
// legal equivalently-sized byte-typed vector instead. This method is
// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
@@ -2132,7 +2502,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// into a one-element vector of the result type, and perform a vector
// bitcast.
if (!Op0VT.isVector()) {
- auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
+ EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
+ if (!isTypeLegal(BVT))
+ return SDValue();
return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
DAG.getUNDEF(BVT), Op0,
DAG.getConstant(0, DL, XLenVT)));
@@ -2143,8 +2515,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// thus: bitcast the vector to a one-element vector type whose element type
// is the same as the result type, and extract the first element.
if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
- LLVMContext &Context = *DAG.getContext();
- SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
+ EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
+ if (!isTypeLegal(BVT))
+ return SDValue();
+ SDValue BVec = DAG.getBitcast(BVT, Op0);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT));
}
@@ -2166,6 +2540,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
// Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
@@ -2479,6 +2855,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ return lowerFP_TO_INT_SAT(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -2489,13 +2868,29 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
- return lowerVectorMaskVECREDUCE(Op, DAG);
+ return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
return lowerFPVECREDUCE(Op, DAG);
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAX:
+ return lowerVPREDUCE(Op, DAG);
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
+ return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
+ return lowerVPREDUCE(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
@@ -2538,9 +2933,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
return Op;
case ISD::MLOAD:
- return lowerMLOAD(Op, DAG);
+ case ISD::VP_LOAD:
+ return lowerMaskedLoad(Op, DAG);
case ISD::MSTORE:
- return lowerMSTORE(Op, DAG);
+ case ISD::VP_STORE:
+ return lowerMaskedStore(Op, DAG);
case ISD::SETCC:
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
case ISD::ADD:
@@ -2617,14 +3014,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
case ISD::ABS:
return lowerABS(Op, DAG);
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::MGATHER:
- return lowerMGATHER(Op, DAG);
+ case ISD::VP_GATHER:
+ return lowerMaskedGather(Op, DAG);
case ISD::MSCATTER:
- return lowerMSCATTER(Op, DAG);
+ case ISD::VP_SCATTER:
+ return lowerMaskedScatter(Op, DAG);
case ISD::FLT_ROUNDS_:
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
@@ -2932,7 +3334,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
- SDValue TargetCC = DAG.getTargetConstant(CCVal, DL, XLenVT);
+ SDValue TargetCC = DAG.getCondCode(CCVal);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
@@ -2941,7 +3343,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select condv, truev, falsev)
// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
- SDValue SetNE = DAG.getTargetConstant(ISD::SETNE, DL, XLenVT);
+ SDValue SetNE = DAG.getCondCode(ISD::SETNE);
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
@@ -3200,7 +3602,7 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
// Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
- DAG.getRegister(RISCV::X0, MVT::i64));
+ DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
@@ -3483,7 +3885,7 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
- if (!Subtarget.hasStdExtV())
+ if (!Subtarget.hasVInstructions())
return SDValue();
bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
@@ -3645,7 +4047,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_vslide1down_mask: {
// We need to special case these when the scalar is larger than XLen.
unsigned NumOps = Op.getNumOperands();
- bool IsMasked = NumOps == 6;
+ bool IsMasked = NumOps == 7;
unsigned OpOffset = IsMasked ? 1 : 0;
SDValue Scalar = Op.getOperand(2 + OpOffset);
if (Scalar.getValueType().bitsLE(XLenVT))
@@ -3670,7 +4072,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(1, DL, XLenVT));
// Double the VL since we halved SEW.
- SDValue VL = Op.getOperand(NumOps - 1);
+ SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
SDValue I32VL =
DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
@@ -3699,7 +4101,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return Vec;
// Apply mask after the operation.
- SDValue Mask = Op.getOperand(NumOps - 2);
+ SDValue Mask = Op.getOperand(NumOps - 3);
SDValue MaskedOff = Op.getOperand(1);
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
}
@@ -3710,9 +4112,113 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::riscv_masked_strided_load: {
+ SDLoc DL(Op);
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // If the mask is known to be all ones, optimize to an unmasked intrinsic;
+ // the selection of the masked intrinsics doesn't do this for us.
+ SDValue Mask = Op.getOperand(5);
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue PassThru = Op.getOperand(2);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
+ }
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+
+ SDValue IntID = DAG.getTargetConstant(
+ IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
+ XLenVT);
+
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
+ if (!IsUnmasked)
+ Ops.push_back(PassThru);
+ Ops.push_back(Op.getOperand(3)); // Ptr
+ Ops.push_back(Op.getOperand(4)); // Stride
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked) {
+ SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
+ Ops.push_back(Policy);
+ }
+
+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
+ SDValue Chain = Result.getValue(1);
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
+ }
+
return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
}
+SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::riscv_masked_strided_store: {
+ SDLoc DL(Op);
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // If the mask is known to be all ones, optimize to an unmasked intrinsic;
+ // the selection of the masked intrinsics doesn't do this for us.
+ SDValue Mask = Op.getOperand(5);
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ SDValue Val = Op.getOperand(2);
+ MVT VT = Val.getSimpleValueType();
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+
+ SDValue IntID = DAG.getTargetConstant(
+ IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
+ XLenVT);
+
+ auto *Store = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
+ Ops.push_back(Val);
+ Ops.push_back(Op.getOperand(3)); // Ptr
+ Ops.push_back(Op.getOperand(4)); // Stride
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
+ Ops, Store->getMemoryVT(),
+ Store->getMemOperand());
+ }
+ }
+
+ return SDValue();
+}
+
static MVT getLMUL1VT(MVT VT) {
assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
"Unexpected vector MVT");
@@ -3744,14 +4250,18 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) {
}
}
-SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
+ SelectionDAG &DAG,
+ bool IsVP) const {
SDLoc DL(Op);
- SDValue Vec = Op.getOperand(0);
+ SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
MVT VecVT = Vec.getSimpleValueType();
assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
- Op.getOpcode() == ISD::VECREDUCE_XOR) &&
+ Op.getOpcode() == ISD::VECREDUCE_XOR ||
+ Op.getOpcode() == ISD::VP_REDUCE_AND ||
+ Op.getOpcode() == ISD::VP_REDUCE_OR ||
+ Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
"Unexpected reduction lowering");
MVT XLenVT = Subtarget.getXLenVT();
@@ -3765,29 +4275,62 @@ SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
}
SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ if (IsVP) {
+ Mask = Op.getOperand(2);
+ VL = Op.getOperand(3);
+ } else {
+ std::tie(Mask, VL) =
+ getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ }
+
+ unsigned BaseOpc;
+ ISD::CondCode CC;
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_AND:
- // vpopc ~x == 0
- Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
- Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
- return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
+ case ISD::VP_REDUCE_AND: {
+ // vcpop ~x == 0
+ SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
+ Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
+ Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
+ CC = ISD::SETEQ;
+ BaseOpc = ISD::AND;
+ break;
+ }
case ISD::VECREDUCE_OR:
- // vpopc x != 0
- Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
- return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
- case ISD::VECREDUCE_XOR: {
- // ((vpopc x) & 1) != 0
+ case ISD::VP_REDUCE_OR:
+ // vcpop x != 0
+ Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
+ CC = ISD::SETNE;
+ BaseOpc = ISD::OR;
+ break;
+ case ISD::VECREDUCE_XOR:
+ case ISD::VP_REDUCE_XOR: {
+ // ((vcpop x) & 1) != 0
SDValue One = DAG.getConstant(1, DL, XLenVT);
- Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
+ Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
- return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
+ CC = ISD::SETNE;
+ BaseOpc = ISD::XOR;
+ break;
}
}
+
+ SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
+
+ if (!IsVP)
+ return SetCC;
+
+ // Now include the start value in the operation.
+ // Note that we must return the start value when no elements are operated
+ // upon. The vcpop instructions we've emitted in each case above will return
+ // 0 for an inactive vector, and so we've already received the neutral value:
+ // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
+ // can simply include the start value.
+ return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
}
SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
@@ -3833,8 +4376,8 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
SDValue NeutralElem =
DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
- SDValue Reduction =
- DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
+ SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
+ IdentitySplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
@@ -3892,12 +4435,83 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
// FIXME: This is a VLMAX splat which might be too large and can prevent
// vsetvli removal.
SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
- SDValue Reduction =
- DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
+ SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
+ VectorVal, ScalarSplat, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
}
+static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
+ switch (ISDOpcode) {
+ default:
+ llvm_unreachable("Unhandled reduction");
+ case ISD::VP_REDUCE_ADD:
+ return RISCVISD::VECREDUCE_ADD_VL;
+ case ISD::VP_REDUCE_UMAX:
+ return RISCVISD::VECREDUCE_UMAX_VL;
+ case ISD::VP_REDUCE_SMAX:
+ return RISCVISD::VECREDUCE_SMAX_VL;
+ case ISD::VP_REDUCE_UMIN:
+ return RISCVISD::VECREDUCE_UMIN_VL;
+ case ISD::VP_REDUCE_SMIN:
+ return RISCVISD::VECREDUCE_SMIN_VL;
+ case ISD::VP_REDUCE_AND:
+ return RISCVISD::VECREDUCE_AND_VL;
+ case ISD::VP_REDUCE_OR:
+ return RISCVISD::VECREDUCE_OR_VL;
+ case ISD::VP_REDUCE_XOR:
+ return RISCVISD::VECREDUCE_XOR_VL;
+ case ISD::VP_REDUCE_FADD:
+ return RISCVISD::VECREDUCE_FADD_VL;
+ case ISD::VP_REDUCE_SEQ_FADD:
+ return RISCVISD::VECREDUCE_SEQ_FADD_VL;
+ case ISD::VP_REDUCE_FMAX:
+ return RISCVISD::VECREDUCE_FMAX_VL;
+ case ISD::VP_REDUCE_FMIN:
+ return RISCVISD::VECREDUCE_FMIN_VL;
+ }
+}
+
+SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Vec = Op.getOperand(1);
+ EVT VecEVT = Vec.getValueType();
+
+ // TODO: The type may need to be widened rather than split. Or widened before
+ // it can be split.
+ if (!isTypeLegal(VecEVT))
+ return SDValue();
+
+ MVT VecVT = VecEVT.getSimpleVT();
+ MVT VecEltVT = VecVT.getVectorElementType();
+ unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
+
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
+ SDValue VL = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(2);
+
+ MVT M1VT = getLMUL1VT(ContainerVT);
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
+
+ // FIXME: This is a VLMAX splat which might be too large and can prevent
+ // vsetvli removal.
+ SDValue StartSplat = DAG.getSplatVector(M1VT, DL, Op.getOperand(0));
+ SDValue Reduction =
+ DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ if (!VecVT.isInteger())
+ return Elt0;
+ return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
+}
+
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
@@ -4338,36 +4952,63 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
Store->getMemoryVT(), Store->getMemOperand());
}
-SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
- auto *Load = cast<MaskedLoadSDNode>(Op);
-
+SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
- MVT XLenVT = Subtarget.getXLenVT();
- SDValue Mask = Load->getMask();
- SDValue PassThru = Load->getPassThru();
- SDValue VL;
+ const auto *MemSD = cast<MemSDNode>(Op);
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+
+ SDValue Mask, PassThru, VL;
+ if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
+ Mask = VPLoad->getMask();
+ PassThru = DAG.getUNDEF(VT);
+ VL = VPLoad->getVectorLength();
+ } else {
+ const auto *MLoad = cast<MaskedLoadSDNode>(Op);
+ Mask = MLoad->getMask();
+ PassThru = MLoad->getPassThru();
+ }
+
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
-
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ if (!IsUnmasked)
+ Ops.push_back(PassThru);
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
- SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
- SDValue Ops[] = {Load->getChain(), IntID, PassThru,
- Load->getBasePtr(), Mask, VL};
+
SDValue Result =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
- Load->getMemoryVT(), Load->getMemOperand());
- SDValue Chain = Result.getValue(1);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ Chain = Result.getValue(1);
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
@@ -4375,32 +5016,58 @@ SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Result, Chain}, DL);
}
-SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
- auto *Store = cast<MaskedStoreSDNode>(Op);
-
+SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
- SDValue Val = Store->getValue();
- SDValue Mask = Store->getMask();
+
+ const auto *MemSD = cast<MemSDNode>(Op);
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+ SDValue Val, Mask, VL;
+
+ if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
+ Val = VPStore->getValue();
+ Mask = VPStore->getMask();
+ VL = VPStore->getVectorLength();
+ } else {
+ const auto *MStore = cast<MaskedStoreSDNode>(Op);
+ Val = MStore->getValue();
+ Mask = MStore->getMask();
+ }
+
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
MVT VT = Val.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
- SDValue VL;
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
- SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
- return DAG.getMemIntrinsicNode(
- ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
- {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
- Store->getMemoryVT(), Store->getMemOperand());
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ Ops.push_back(Val);
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
SDValue
@@ -4596,36 +5263,57 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
}
-// Custom lower MGATHER to a legalized form for RVV. It will then be matched to
-// a RVV indexed load. The RVV indexed load instructions only support the
-// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
-// truncated to XLEN and are treated as byte offsets. Any signed or scaled
-// indexing is extended to the XLEN value type and scaled accordingly.
-SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
- auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
+// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
+// matched to a RVV indexed load. The RVV indexed load instructions only
+// support the "unsigned unscaled" addressing mode; indices are implicitly
+// zero-extended or truncated to XLEN and are treated as byte offsets. Any
+// signed or scaled indexing is extended to the XLEN value type and scaled
+// accordingly.
+SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
- SDValue Index = MGN->getIndex();
- SDValue Mask = MGN->getMask();
- SDValue PassThru = MGN->getPassThru();
+ const auto *MemSD = cast<MemSDNode>(Op.getNode());
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+
+ ISD::LoadExtType LoadExtType;
+ SDValue Index, Mask, PassThru, VL;
+
+ if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
+ Index = VPGN->getIndex();
+ Mask = VPGN->getMask();
+ PassThru = DAG.getUNDEF(VT);
+ VL = VPGN->getVectorLength();
+ // VP doesn't support extending loads.
+ LoadExtType = ISD::NON_EXTLOAD;
+ } else {
+ // Else it must be a MGATHER.
+ auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
+ Index = MGN->getIndex();
+ Mask = MGN->getMask();
+ PassThru = MGN->getPassThru();
+ LoadExtType = MGN->getExtensionType();
+ }
- MVT VT = Op.getSimpleValueType();
MVT IndexVT = Index.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Unexpected VTs!");
- assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
- "Unexpected pointer type");
+ assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
// Targets have to explicitly opt-in for extending vector loads.
- assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
- "Unexpected extending MGATHER");
+ assert(LoadExtType == ISD::NON_EXTLOAD &&
+ "Unexpected extending MGATHER/VP_GATHER");
+ (void)LoadExtType;
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
- SDValue VL;
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
// We need to use the larger of the result and index type to determine the
@@ -4648,28 +5336,28 @@ SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
+ }
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
- SmallVector<SDValue, 8> Ops{MGN->getChain(),
- DAG.getTargetConstant(IntID, DL, XLenVT)};
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
if (!IsUnmasked)
Ops.push_back(PassThru);
- Ops.push_back(MGN->getBasePtr());
+ Ops.push_back(BasePtr);
Ops.push_back(Index);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue Result =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
- MGN->getMemoryVT(), MGN->getMemOperand());
- SDValue Chain = Result.getValue(1);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ Chain = Result.getValue(1);
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
@@ -4677,18 +5365,39 @@ SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Result, Chain}, DL);
}
-// Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
-// a RVV indexed store. The RVV indexed store instructions only support the
-// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
-// truncated to XLEN and are treated as byte offsets. Any signed or scaled
-// indexing is extended to the XLEN value type and scaled accordingly.
-SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
- SelectionDAG &DAG) const {
- auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
+// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
+// matched to a RVV indexed store. The RVV indexed store instructions only
+// support the "unsigned unscaled" addressing mode; indices are implicitly
+// zero-extended or truncated to XLEN and are treated as byte offsets. Any
+// signed or scaled indexing is extended to the XLEN value type and scaled
+// accordingly.
+SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
- SDValue Index = MSN->getIndex();
- SDValue Mask = MSN->getMask();
- SDValue Val = MSN->getValue();
+ const auto *MemSD = cast<MemSDNode>(Op.getNode());
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+
+ bool IsTruncatingStore = false;
+ SDValue Index, Mask, Val, VL;
+
+ if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
+ Index = VPSN->getIndex();
+ Mask = VPSN->getMask();
+ Val = VPSN->getValue();
+ VL = VPSN->getVectorLength();
+ // VP doesn't support truncating stores.
+ IsTruncatingStore = false;
+ } else {
+ // Else it must be a MSCATTER.
+ auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
+ Index = MSN->getIndex();
+ Mask = MSN->getMask();
+ Val = MSN->getValue();
+ IsTruncatingStore = MSN->isTruncatingStore();
+ }
MVT VT = Val.getSimpleValueType();
MVT IndexVT = Index.getSimpleValueType();
@@ -4696,21 +5405,20 @@ SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Unexpected VTs!");
- assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
- "Unexpected pointer type");
+ assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
// Targets have to explicitly opt-in for extending vector loads and
// truncating vector stores.
- assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
+ assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
+ (void)IsTruncatingStore;
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
- SDValue VL;
+ MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
// We need to use the larger of the value and index type to determine the
// scalable type to use so we don't increase LMUL for any operand/result.
- MVT ContainerVT;
if (VT.bitsGE(IndexVT)) {
ContainerVT = getContainerForFixedLengthVector(VT);
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
@@ -4729,24 +5437,23 @@ SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
+ }
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
- SmallVector<SDValue, 8> Ops{MSN->getChain(),
- DAG.getTargetConstant(IntID, DL, XLenVT)};
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
Ops.push_back(Val);
- Ops.push_back(MSN->getBasePtr());
+ Ops.push_back(BasePtr);
Ops.push_back(Index);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
- return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops,
- MSN->getMemoryVT(), MSN->getMemOperand());
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
@@ -4754,7 +5461,7 @@ SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
const MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
- SDValue SysRegNo = DAG.getConstant(
+ SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
@@ -4786,7 +5493,7 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
- SDValue SysRegNo = DAG.getConstant(
+ SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
// Encoding used for rounding mode in RISCV differs from that used in
@@ -4891,7 +5598,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
if (!isTypeLegal(Op0.getValueType()))
return;
- unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+ unsigned Opc =
+ IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -4973,8 +5681,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SUB:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- if (N->getOperand(1).getOpcode() == ISD::Constant)
- return;
Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
break;
case ISD::SHL:
@@ -4982,9 +5688,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRL:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- if (N->getOperand(1).getOpcode() == ISD::Constant)
- return;
- Results.push_back(customLegalizeToWOp(N, DAG));
+ if (N->getOperand(1).getOpcode() != ISD::Constant) {
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
+ }
+
+ // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
+ // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
+ // shift amount.
+ if (N->getOpcode() == ISD::SHL) {
+ SDLoc DL(N);
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
+ SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
+ DAG.getValueType(MVT::i32));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ }
+
break;
case ISD::ROTL:
case ISD::ROTR:
@@ -5098,10 +5821,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// scalar types in order to improve codegen. Bitcast the vector to a
// one-element vector type whose element type is the same as the result
// type, and extract the first element.
- LLVMContext &Context = *DAG.getContext();
- SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
- Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
- DAG.getConstant(0, DL, XLenVT)));
+ EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
+ if (isTypeLegal(BVT)) {
+ SDValue BVec = DAG.getBitcast(BVT, Op0);
+ Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
+ DAG.getConstant(0, DL, XLenVT)));
+ }
}
break;
}
@@ -5211,7 +5936,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
MVT XLenVT = Subtarget.getXLenVT();
// Use a VL of 1 to avoid processing more elements than we need.
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue VL = DAG.getConstant(1, DL, XLenVT);
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
@@ -5354,6 +6079,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
Results.push_back(V);
break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMIN:
+ if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
+ Results.push_back(V);
+ break;
case ISD::FLT_ROUNDS_: {
SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
@@ -5656,6 +6392,52 @@ static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(Match1->ShAmt, DL, VT));
}
+// Optimize (add (shl x, c0), (shl y, c1)) ->
+// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
+static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ // Perform this optimization only in the zba extension.
+ if (!Subtarget.hasStdExtZba())
+ return SDValue();
+
+ // Skip for vector types and larger types.
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+
+ // The two operand nodes must be SHL and have no other use.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
+ !N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+
+ // Check c0 and c1.
+ auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ if (!N0C || !N1C)
+ return SDValue();
+ int64_t C0 = N0C->getSExtValue();
+ int64_t C1 = N1C->getSExtValue();
+ if (C0 <= 0 || C1 <= 0)
+ return SDValue();
+
+ // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
+ int64_t Bits = std::min(C0, C1);
+ int64_t Diff = std::abs(C0 - C1);
+ if (Diff != 1 && Diff != 2 && Diff != 3)
+ return SDValue();
+
+ // Build nodes.
+ SDLoc DL(N);
+ SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
+ SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
+ SDValue NA0 =
+ DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
+ SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
+ return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
+}
+
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
@@ -5691,17 +6473,27 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
// Combine a constant select operand into its use:
//
-// (and (select_cc lhs, rhs, cc, -1, c), x)
-// -> (select_cc lhs, rhs, cc, x, (and, x, c)) [AllOnes=1]
-// (or (select_cc lhs, rhs, cc, 0, c), x)
-// -> (select_cc lhs, rhs, cc, x, (or, x, c)) [AllOnes=0]
-// (xor (select_cc lhs, rhs, cc, 0, c), x)
-// -> (select_cc lhs, rhs, cc, x, (xor, x, c)) [AllOnes=0]
-static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
- SelectionDAG &DAG, bool AllOnes) {
+// (and (select cond, -1, c), x)
+// -> (select cond, x, (and x, c)) [AllOnes=1]
+// (or (select cond, 0, c), x)
+// -> (select cond, x, (or x, c)) [AllOnes=0]
+// (xor (select cond, 0, c), x)
+// -> (select cond, x, (xor x, c)) [AllOnes=0]
+// (add (select cond, 0, c), x)
+// -> (select cond, x, (add x, c)) [AllOnes=0]
+// (sub x, (select cond, 0, c))
+// -> (select cond, x, (sub x, c)) [AllOnes=0]
+static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ SelectionDAG &DAG, bool AllOnes) {
EVT VT = N->getValueType(0);
- if (Slct.getOpcode() != RISCVISD::SELECT_CC || !Slct.hasOneUse())
+ // Skip vectors.
+ if (VT.isVector())
+ return SDValue();
+
+ if ((Slct.getOpcode() != ISD::SELECT &&
+ Slct.getOpcode() != RISCVISD::SELECT_CC) ||
+ !Slct.hasOneUse())
return SDValue();
auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
@@ -5709,8 +6501,9 @@ static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
};
bool SwapSelectOps;
- SDValue TrueVal = Slct.getOperand(3);
- SDValue FalseVal = Slct.getOperand(4);
+ unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
+ SDValue TrueVal = Slct.getOperand(1 + OpOffset);
+ SDValue FalseVal = Slct.getOperand(2 + OpOffset);
SDValue NonConstantVal;
if (isZeroOrAllOnes(TrueVal, AllOnes)) {
SwapSelectOps = false;
@@ -5724,40 +6517,120 @@ static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
// Slct is now know to be the desired identity constant when CC is true.
TrueVal = OtherOp;
FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
- // Unless SwapSelectOps says CC should be false.
+ // Unless SwapSelectOps says the condition should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
- return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
- {Slct.getOperand(0), Slct.getOperand(1),
- Slct.getOperand(2), TrueVal, FalseVal});
+ if (Slct.getOpcode() == RISCVISD::SELECT_CC)
+ return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
+ {Slct.getOperand(0), Slct.getOperand(1),
+ Slct.getOperand(2), TrueVal, FalseVal});
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
+ {Slct.getOperand(0), TrueVal, FalseVal});
}
// Attempt combineSelectAndUse on each operand of a commutative operator N.
-static SDValue combineSelectCCAndUseCommutative(SDNode *N, SelectionDAG &DAG,
- bool AllOnes) {
+static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
+ bool AllOnes) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (SDValue Result = combineSelectCCAndUse(N, N0, N1, DAG, AllOnes))
+ if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
return Result;
- if (SDValue Result = combineSelectCCAndUse(N, N1, N0, DAG, AllOnes))
+ if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
return Result;
return SDValue();
}
-static SDValue performANDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+// Transform (add (mul x, c0), c1) ->
+// (add (mul (add x, c1/c0), c0), c1%c0).
+// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
+// that should be excluded is when c0*(c1/c0) is simm12, which will lead
+// to an infinite loop in DAGCombine if transformed.
+// Or transform (add (mul x, c0), c1) ->
+// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
+// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
+// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
+// lead to an infinite loop in DAGCombine if transformed.
+// Or transform (add (mul x, c0), c1) ->
+// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
+// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
+// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
+// lead to an infinite loop in DAGCombine if transformed.
+// Or transform (add (mul x, c0), c1) ->
+// (mul (add x, c1/c0), c0).
+// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
+static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ // Skip for vector types and larger types.
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+ // The first operand node must be a MUL and has no other use.
+ SDValue N0 = N->getOperand(0);
+ if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
+ return SDValue();
+ // Check if c0 and c1 match above conditions.
+ auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!N0C || !N1C)
+ return SDValue();
+ int64_t C0 = N0C->getSExtValue();
+ int64_t C1 = N1C->getSExtValue();
+ int64_t CA, CB;
+ if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
+ return SDValue();
+ // Search for proper CA (non-zero) and CB that both are simm12.
+ if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
+ !isInt<12>(C0 * (C1 / C0))) {
+ CA = C1 / C0;
+ CB = C1 % C0;
+ } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
+ isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
+ CA = C1 / C0 + 1;
+ CB = C1 % C0 - C0;
+ } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
+ isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
+ CA = C1 / C0 - 1;
+ CB = C1 % C0 + C0;
+ } else
+ return SDValue();
+ // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
+ SDLoc DL(N);
+ SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
+ DAG.getConstant(CA, DL, VT));
+ SDValue New1 =
+ DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
+}
+
+static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
+ if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
+ return V;
+ if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
+ return V;
+ // fold (add (select lhs, rhs, cc, 0, y), x) ->
+ // (select lhs, rhs, cc, x, (add x, y))
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
+}
+
+static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
+ // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
+ // (select lhs, rhs, cc, x, (sub x, y))
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
+}
- // fold (and (select_cc lhs, rhs, cc, -1, y), x) ->
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // fold (and (select lhs, rhs, cc, -1, y), x) ->
// (select lhs, rhs, cc, x, (and x, y))
- return combineSelectCCAndUseCommutative(N, DAG, true);
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
}
-static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
if (Subtarget.hasStdExtZbp()) {
if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
return GREV;
@@ -5767,19 +6640,15 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SHFL;
}
- // fold (or (select_cc lhs, rhs, cc, 0, y), x) ->
- // (select lhs, rhs, cc, x, (or x, y))
- return combineSelectCCAndUseCommutative(N, DAG, false);
+ // fold (or (select cond, 0, y), x) ->
+ // (select cond, x, (or x, y))
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
-static SDValue performXORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const RISCVSubtarget &Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
-
- // fold (xor (select_cc lhs, rhs, cc, 0, y), x) ->
- // (select lhs, rhs, cc, x, (xor x, y))
- return combineSelectCCAndUseCommutative(N, DAG, false);
+static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
+ // fold (xor (select cond, 0, y), x) ->
+ // (select cond, x, (xor x, y))
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
@@ -5814,6 +6683,13 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
break;
}
+ // Only handle cases where the result is used by a CopyToReg. That likely
+ // means the value is a liveout of the basic block. This helps prevent
+ // infinite combine loops like PR51206.
+ if (none_of(N->uses(),
+ [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
+ return SDValue();
+
SmallVector<SDNode *, 4> SetCCs;
for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
UE = Src.getNode()->use_end();
@@ -5859,10 +6735,105 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
return SDValue(N, 0);
}
+// Try to form VWMUL or VWMULU.
+// FIXME: Support VWMULSU.
+static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
+ bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
+ bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
+ if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
+ return SDValue();
+
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ // Make sure the mask and VL match.
+ if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
+ return SDValue();
+
+ MVT VT = N->getSimpleValueType(0);
+
+ // Determine the narrow size for a widening multiply.
+ unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
+ MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
+ VT.getVectorElementCount());
+
+ SDLoc DL(N);
+
+ // See if the other operand is the same opcode.
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ if (!Op1.hasOneUse())
+ return SDValue();
+
+ // Make sure the mask and VL match.
+ if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
+ return SDValue();
+
+ Op1 = Op1.getOperand(0);
+ } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
+ // The operand is a splat of a scalar.
+
+ // The VL must be the same.
+ if (Op1.getOperand(1) != VL)
+ return SDValue();
+
+ // Get the scalar value.
+ Op1 = Op1.getOperand(0);
+
+ // See if have enough sign bits or zero bits in the scalar to use a
+ // widening multiply by splatting to smaller element size.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned ScalarBits = Op1.getValueSizeInBits();
+ // Make sure we're getting all element bits from the scalar register.
+ // FIXME: Support implicit sign extension of vmv.v.x?
+ if (ScalarBits < EltBits)
+ return SDValue();
+
+ if (IsSignExt) {
+ if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
+ return SDValue();
+ } else {
+ APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
+ if (!DAG.MaskedValueIsZero(Op1, Mask))
+ return SDValue();
+ }
+
+ Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
+ } else
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+
+ // Re-introduce narrower extends if needed.
+ unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
+ if (Op0.getValueType() != NarrowVT)
+ Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ if (Op1.getValueType() != NarrowVT)
+ Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+
+ unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
+ return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ // Helper to call SimplifyDemandedBits on an operand of N where only some low
+ // bits are demanded. N will be added to the Worklist if it was not deleted.
+ // Caller should return SDValue(N, 0) if this returns true.
+ auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
+ SDValue Op = N->getOperand(OpNo);
+ APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
+ if (!SimplifyDemandedBits(Op, Mask, DCI))
+ return false;
+
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return true;
+ };
+
switch (N->getOpcode()) {
default:
break;
@@ -5914,147 +6885,101 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::ROLW:
case RISCVISD::RORW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
- if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
- SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
- SDValue Op0 = N->getOperand(0);
- APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
- if (SimplifyDemandedBits(Op0, Mask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::FSL:
case RISCVISD::FSR: {
// Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
- SDValue ShAmt = N->getOperand(2);
- unsigned BitWidth = ShAmt.getValueSizeInBits();
+ unsigned BitWidth = N->getOperand(2).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
- if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(2, Log2_32(BitWidth) + 1))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::FSLW:
case RISCVISD::FSRW: {
// Only the lower 32 bits of Values and lower 6 bits of shift amount are
// read.
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- SDValue ShAmt = N->getOperand(2);
- APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
- APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
- if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
- SimplifyDemandedBits(Op1, OpMask, DCI) ||
- SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 32) ||
+ SimplifyDemandedLowBitsHelper(2, 6))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::GREV:
case RISCVISD::GORC: {
// Only the lower log2(Bitwidth) bits of the the shift amount are read.
- SDValue ShAmt = N->getOperand(1);
- unsigned BitWidth = ShAmt.getValueSizeInBits();
+ unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- APInt ShAmtMask(BitWidth, BitWidth - 1);
- if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
return SDValue(N, 0);
- }
return combineGREVI_GORCI(N, DCI.DAG);
}
case RISCVISD::GREVW:
case RISCVISD::GORCW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
- if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
- SimplifyDemandedBits(RHS, RHSMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- }
return combineGREVI_GORCI(N, DCI.DAG);
}
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
- // Only the lower log2(Bitwidth) bits of the the shift amount are read.
- SDValue ShAmt = N->getOperand(1);
- unsigned BitWidth = ShAmt.getValueSizeInBits();
+ // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
+ unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- APInt ShAmtMask(BitWidth, (BitWidth / 2) - 1);
- if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::SHFLW:
case RISCVISD::UNSHFLW: {
- // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+ // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
- if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
- SimplifyDemandedBits(RHS, RHSMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 4))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW: {
// Only the lower 32 bits of LHS and RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt Mask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- if (SimplifyDemandedBits(LHS, Mask, DCI) ||
- SimplifyDemandedBits(RHS, Mask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 32))
return SDValue(N, 0);
- }
break;
}
+ case RISCVISD::FMV_X_ANYEXTH:
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
+ MVT VT = N->getSimpleValueType(0);
// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
- // conversion is unnecessary and can be replaced with an ANY_EXTEND
- // of the FMV_W_X_RV64 operand.
- if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
- assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
+ // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
+ // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
+ if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
+ Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
+ (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
+ Op0->getOpcode() == RISCVISD::FMV_H_X)) {
+ assert(Op0.getOperand(0).getValueType() == VT &&
"Unexpected value type!");
return Op0.getOperand(0);
}
@@ -6066,23 +6991,27 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
!Op0.getNode()->hasOneUse())
break;
- SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
- Op0.getOperand(0));
- APInt SignBit = APInt::getSignMask(32).sext(64);
+ SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
+ unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
+ APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
if (Op0.getOpcode() == ISD::FNEG)
- return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
- DAG.getConstant(SignBit, DL, MVT::i64));
+ return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
+ DAG.getConstant(SignBit, DL, VT));
assert(Op0.getOpcode() == ISD::FABS);
- return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
- DAG.getConstant(~SignBit, DL, MVT::i64));
+ return DAG.getNode(ISD::AND, DL, VT, NewFMV,
+ DAG.getConstant(~SignBit, DL, VT));
}
+ case ISD::ADD:
+ return performADDCombine(N, DAG, Subtarget);
+ case ISD::SUB:
+ return performSUBCombine(N, DAG);
case ISD::AND:
- return performANDCombine(N, DCI, Subtarget);
+ return performANDCombine(N, DAG);
case ISD::OR:
- return performORCombine(N, DCI, Subtarget);
+ return performORCombine(N, DAG, Subtarget);
case ISD::XOR:
- return performXORCombine(N, DCI, Subtarget);
+ return performXORCombine(N, DAG);
case ISD::ANY_EXTEND:
return performANY_EXTENDCombine(N, DCI, Subtarget);
case ISD::ZERO_EXTEND:
@@ -6099,7 +7028,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Transform
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
+ SDValue TrueV = N->getOperand(3);
+ SDValue FalseV = N->getOperand(4);
+
+ // If the True and False values are the same, we don't need a select_cc.
+ if (TrueV == FalseV)
+ return TrueV;
+
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (!ISD::isIntEqualitySetCC(CCVal))
break;
@@ -6120,11 +7056,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
LHS = LHS.getOperand(0);
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
- SDValue TargetCC =
- DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
- return DAG.getNode(
- RISCVISD::SELECT_CC, DL, N->getValueType(0),
- {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
+ SDValue TargetCC = DAG.getCondCode(CCVal);
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
+ {LHS, RHS, TargetCC, TrueV, FalseV});
}
// Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
@@ -6132,8 +7066,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
{LHS.getOperand(0), LHS.getOperand(1),
- N->getOperand(2), N->getOperand(3),
- N->getOperand(4)});
+ N->getOperand(2), TrueV, FalseV});
// (select_cc X, 1, setne, trueV, falseV) ->
// (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
// This can occur when legalizing some floating point comparisons.
@@ -6141,12 +7074,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
SDLoc DL(N);
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
- SDValue TargetCC =
- DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
+ SDValue TargetCC = DAG.getCondCode(CCVal);
RHS = DAG.getConstant(0, DL, LHS.getValueType());
- return DAG.getNode(
- RISCVISD::SELECT_CC, DL, N->getValueType(0),
- {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
+ {LHS, RHS, TargetCC, TrueV, FalseV});
}
break;
@@ -6227,18 +7158,33 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
}
case ISD::MGATHER:
- case ISD::MSCATTER: {
+ case ISD::MSCATTER:
+ case ISD::VP_GATHER:
+ case ISD::VP_SCATTER: {
if (!DCI.isBeforeLegalize())
break;
- MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N);
- SDValue Index = MGSN->getIndex();
+ SDValue Index, ScaleOp;
+ bool IsIndexScaled = false;
+ bool IsIndexSigned = false;
+ if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
+ Index = VPGSN->getIndex();
+ ScaleOp = VPGSN->getScale();
+ IsIndexScaled = VPGSN->isIndexScaled();
+ IsIndexSigned = VPGSN->isIndexSigned();
+ } else {
+ const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
+ Index = MGSN->getIndex();
+ ScaleOp = MGSN->getScale();
+ IsIndexScaled = MGSN->isIndexScaled();
+ IsIndexSigned = MGSN->isIndexSigned();
+ }
EVT IndexVT = Index.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
// RISCV indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
- bool NeedsIdxLegalization = MGSN->isIndexScaled() ||
- (MGSN->isIndexSigned() &&
- IndexVT.getVectorElementType().bitsLT(XLenVT));
+ bool NeedsIdxLegalization =
+ IsIndexScaled ||
+ (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
if (!NeedsIdxLegalization)
break;
@@ -6247,36 +7193,48 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Any index legalization should first promote to XLenVT, so we don't lose
// bits when scaling. This may create an illegal index type so we let
// LLVM's legalization take care of the splitting.
+ // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
- Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND,
+ Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
DL, IndexVT, Index);
}
- unsigned Scale = N->getConstantOperandVal(5);
- if (MGSN->isIndexScaled() && Scale != 1) {
+ unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
+ if (IsIndexScaled && Scale != 1) {
// Manually scale the indices by the element size.
// TODO: Sanitize the scale operand here?
+ // TODO: For VP nodes, should we use VP_SHL here?
assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
}
ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
- if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) {
+ if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
+ return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
+ {VPGN->getChain(), VPGN->getBasePtr(), Index,
+ VPGN->getScale(), VPGN->getMask(),
+ VPGN->getVectorLength()},
+ VPGN->getMemOperand(), NewIndexTy);
+ if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
+ return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
+ {VPSN->getChain(), VPSN->getValue(),
+ VPSN->getBasePtr(), Index, VPSN->getScale(),
+ VPSN->getMask(), VPSN->getVectorLength()},
+ VPSN->getMemOperand(), NewIndexTy);
+ if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
return DAG.getMaskedGather(
- N->getVTList(), MGSN->getMemoryVT(), DL,
- {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(),
- MGSN->getBasePtr(), Index, MGN->getScale()},
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, MGN->getScale()},
MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
- }
const auto *MSN = cast<MaskedScatterSDNode>(N);
return DAG.getMaskedScatter(
- N->getVTList(), MGSN->getMemoryVT(), DL,
- {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(),
- Index, MGSN->getScale()},
- MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, MSN->getScale()},
+ MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
}
case RISCVISD::SRA_VL:
case RISCVISD::SRL_VL:
@@ -6309,45 +7267,37 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::MUL_VL: {
- // Try to form VWMUL or VWMULU.
- // FIXME: Look for splat of extended scalar as well.
- // FIXME: Support VWMULSU.
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
- bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
- bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
- if ((!IsSignExt && !IsZeroExt) || Op0.getOpcode() != Op1.getOpcode())
- return SDValue();
-
- // Make sure the extends have a single use.
- if (!Op0.hasOneUse() || !Op1.hasOneUse())
- return SDValue();
-
- SDValue Mask = N->getOperand(2);
- SDValue VL = N->getOperand(3);
- if (Op0.getOperand(1) != Mask || Op1.getOperand(1) != Mask ||
- Op0.getOperand(2) != VL || Op1.getOperand(2) != VL)
- return SDValue();
-
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
-
- MVT VT = N->getSimpleValueType(0);
- MVT NarrowVT =
- MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() / 2),
- VT.getVectorElementCount());
-
- SDLoc DL(N);
-
- // Re-introduce narrower extends if needed.
- unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
- if (Op0.getValueType() != NarrowVT)
- Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
- if (Op1.getValueType() != NarrowVT)
- Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+ if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
+ return V;
+ if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
+ return V;
+ return SDValue();
+ }
+ case ISD::STORE: {
+ auto *Store = cast<StoreSDNode>(N);
+ SDValue Val = Store->getValue();
+ // Combine store of vmv.x.s to vse with VL of 1.
+ // FIXME: Support FP.
+ if (Val.getOpcode() == RISCVISD::VMV_X_S) {
+ SDValue Src = Val.getOperand(0);
+ EVT VecVT = Src.getValueType();
+ EVT MemVT = Store->getMemoryVT();
+ // The memory VT and the element type must match.
+ if (VecVT.getVectorElementType() == MemVT) {
+ SDLoc DL(N);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
+ return DAG.getStoreVP(Store->getChain(), DL, Src, Store->getBasePtr(),
+ DAG.getConstant(1, DL, MaskVT),
+ DAG.getConstant(1, DL, Subtarget.getXLenVT()),
+ Store->getPointerInfo(),
+ Store->getOriginalAlign(),
+ Store->getMemOperand()->getFlags());
+ }
+ }
- unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
- return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
+ break;
}
}
@@ -6479,7 +7429,7 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
else
return false;
- // Sanity check that our new mask is a subset of the demanded mask.
+ // Check that our new mask is a subset of the demanded mask.
assert(IsLegalMask(NewMask));
return UseMask(NewMask);
}
@@ -6609,6 +7559,12 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
switch (Op.getOpcode()) {
default:
break;
+ case RISCVISD::SELECT_CC: {
+ unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
+ unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW:
@@ -6625,8 +7581,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::UNSHFLW:
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW:
- case RISCVISD::FCVT_W_RV64:
- case RISCVISD::FCVT_WU_RV64:
+ case RISCVISD::FCVT_W_RTZ_RV64:
+ case RISCVISD::FCVT_WU_RTZ_RV64:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
@@ -6803,7 +7759,8 @@ static bool isSelectPseudo(MachineInstr &MI) {
}
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
- MachineBasicBlock *BB) {
+ MachineBasicBlock *BB,
+ const RISCVSubtarget &Subtarget) {
// To "insert" Select_* instructions, we actually have to insert the triangle
// control-flow pattern. The incoming instructions know the destination vreg
// to set, the condition code register to branch on, the true/false values to
@@ -6830,7 +7787,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
// related approach and more information.
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
SmallVector<MachineInstr *, 4> SelectDebugValues;
SmallSet<Register, 4> SelectDests;
@@ -6863,7 +7820,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
}
}
- const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+ const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator I = ++BB->getIterator();
@@ -6892,9 +7849,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
HeadMBB->addSuccessor(TailMBB);
// Insert appropriate branch.
- unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
-
- BuildMI(HeadMBB, DL, TII.get(Opcode))
+ BuildMI(HeadMBB, DL, TII.getBrCond(CC))
.addReg(LHS)
.addReg(RHS)
.addMBB(TailMBB);
@@ -6939,7 +7894,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case RISCV::Select_FPR16_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
- return emitSelectPseudo(MI, BB);
+ return emitSelectPseudo(MI, BB, Subtarget);
case RISCV::BuildPairF64Pseudo:
return emitBuildPairF64Pseudo(MI, BB);
case RISCV::SplitF64Pseudo:
@@ -7258,7 +8213,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
}
assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
- (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
+ (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
"Expected an XLenVT or vector types at this stage");
if (Reg) {
@@ -7294,7 +8249,7 @@ void RISCVTargetLowering::analyzeInputArgs(
FunctionType *FType = MF.getFunction().getFunctionType();
Optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasStdExtV())
+ if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Ins);
for (unsigned i = 0; i != NumArgs; ++i) {
@@ -7325,7 +8280,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
unsigned NumArgs = Outs.size();
Optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasStdExtV())
+ if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
for (unsigned i = 0; i != NumArgs; i++) {
@@ -8170,7 +9125,7 @@ bool RISCVTargetLowering::CanLowerReturn(
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
Optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasStdExtV())
+ if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
@@ -8339,8 +9294,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMV_X_ANYEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
- NODE_NAME_CASE(FCVT_W_RV64)
- NODE_NAME_CASE(FCVT_WU_RV64)
+ NODE_NAME_CASE(FCVT_X_RTZ)
+ NODE_NAME_CASE(FCVT_XU_RTZ)
+ NODE_NAME_CASE(FCVT_W_RTZ_RV64)
+ NODE_NAME_CASE(FCVT_WU_RTZ_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)
@@ -8435,7 +9392,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VRGATHEREI16_VV_VL)
NODE_NAME_CASE(VSEXT_VL)
NODE_NAME_CASE(VZEXT_VL)
- NODE_NAME_CASE(VPOPC_VL)
+ NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VLE_VL)
NODE_NAME_CASE(VSE_VL)
NODE_NAME_CASE(READ_CSR)
@@ -8456,7 +9413,6 @@ RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
case 'f':
- case 'v':
return C_RegisterClass;
case 'I':
case 'J':
@@ -8467,6 +9423,9 @@ RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
case 'S': // A symbolic address
return C_Other;
}
+ } else {
+ if (Constraint == "vr" || Constraint == "vm")
+ return C_RegisterClass;
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -8489,16 +9448,19 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasStdExtD() && VT == MVT::f64)
return std::make_pair(0U, &RISCV::FPR64RegClass);
break;
- case 'v':
- for (const auto *RC :
- {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
- &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
+ default:
+ break;
+ }
+ } else {
+ if (Constraint == "vr") {
+ for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
+ &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
}
- break;
- default:
- break;
+ } else if (Constraint == "vm") {
+ if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
+ return std::make_pair(0U, &RISCV::VMRegClass);
}
}
@@ -8596,7 +9558,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
Register VReg = StringSwitch<Register>(Constraint.lower())
.Case("{v0}", RISCV::V0)
.Case("{v1}", RISCV::V1)
@@ -8934,6 +9896,11 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
return true;
+ // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
+ if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
+ ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
+ (Imm - 8).isPowerOf2()))
+ return true;
// Omit the following optimization if the sub target has the M extension
// and the data size >= XLen.
if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
@@ -8952,6 +9919,29 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
+bool RISCVTargetLowering::isMulAddWithConstProfitable(
+ const SDValue &AddNode, const SDValue &ConstNode) const {
+ // Let the DAGCombiner decide for vectors.
+ EVT VT = AddNode.getValueType();
+ if (VT.isVector())
+ return true;
+
+ // Let the DAGCombiner decide for larger types.
+ if (VT.getScalarSizeInBits() > Subtarget.getXLen())
+ return true;
+
+ // It is worse if c1 is simm12 while c1*c2 is not.
+ ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
+ ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
+ const APInt &C1 = C1Node->getAPIntValue();
+ const APInt &C2 = C2Node->getAPIntValue();
+ if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
+ return false;
+
+ // Default to true and let the DAGCombiner decide.
+ return true;
+}
+
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 0e71220da3b3..8e3d716ae919 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -84,10 +84,16 @@ enum NodeType : unsigned {
FMV_X_ANYEXTH,
FMV_W_X_RV64,
FMV_X_ANYEXTW_RV64,
+ // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
+ // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
+ // range inputs. These are used for FP_TO_S/UINT_SAT lowering.
+ FCVT_X_RTZ,
+ FCVT_XU_RTZ,
// FP to 32 bit int conversions for RV64. These are used to keep track of the
- // result being sign extended to 64 bit.
- FCVT_W_RV64,
- FCVT_WU_RV64,
+ // result being sign extended to 64 bit. These saturate out of range inputs.
+ // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering.
+ FCVT_W_RTZ_RV64,
+ FCVT_WU_RTZ_RV64,
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
READ_CYCLE_WIDE,
@@ -158,12 +164,13 @@ enum NodeType : unsigned {
VFNCVT_ROD_VL,
// These nodes match the semantics of the corresponding RVV vector reduction
// instructions. They produce a vector result which is the reduction
- // performed over the first vector operand plus the first element of the
- // second vector operand. The first operand is an unconstrained vector type,
- // and the result and second operand's types are expected to be the
- // corresponding full-width LMUL=1 type for the first operand:
- // nxv8i8 = vecreduce_add nxv32i8, nxv8i8
- // nxv2i32 = vecreduce_add nxv8i32, nxv2i32
+ // performed over the second vector operand plus the first element of the
+ // third vector operand. The first operand is the pass-thru operand. The
+ // second operand is an unconstrained vector type, and the result, first, and
+ // third operand's types are expected to be the corresponding full-width
+ // LMUL=1 type for the second operand:
+ // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
+ // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
// The different in types does introduce extra vsetvli instructions but
// similarly it reduces the number of registers consumed per reduction.
// Also has a mask and VL operand.
@@ -256,8 +263,8 @@ enum NodeType : unsigned {
VSEXT_VL,
VZEXT_VL,
- // vpopc.m with additional mask and VL operands.
- VPOPC_VL,
+ // vcpop.m with additional mask and VL operands.
+ VCPOP_VL,
// Reads value of CSR.
// The first operand is a chain pointer. The second specifies address of the
@@ -308,6 +315,9 @@ public:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool hasAndNot(SDValue Y) const override;
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
@@ -455,6 +465,9 @@ public:
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
+ bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const override;
+
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
@@ -499,6 +512,8 @@ public:
bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
+ bool isLegalElementTypeForRVV(Type *ScalarTy) const;
+
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
@@ -547,20 +562,23 @@ private:
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVectorMaskVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
+ bool IsVP) const;
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
SelectionDAG &DAG) const;
- SDValue lowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
@@ -602,6 +620,14 @@ private:
/// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
/// this override can be removed.
bool mergeStoresAfterLegalization(EVT VT) const override;
+
+ /// Disable normalizing
+ /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+ /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
+ /// RISCV doesn't have flags so it's better to perform the and/or in a GPR.
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
+ return false;
+ };
};
namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index fb7cb408cade..dbfc90f36f80 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -58,12 +58,13 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t MaskRegOp : 1;
+ uint8_t StoreOp : 1;
uint8_t SEWLMULRatioOnly : 1;
public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
- SEWLMULRatioOnly(false) {}
+ StoreOp(false), SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
@@ -118,7 +119,8 @@ public:
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
- void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO) {
+ void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO,
+ bool IsStore) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
@@ -126,6 +128,7 @@ public:
TailAgnostic = TA;
MaskAgnostic = MA;
MaskRegOp = MRO;
+ StoreOp = IsStore;
}
unsigned encodeVTYPE() const {
@@ -148,10 +151,7 @@ public:
Other.MaskAgnostic);
}
- // Convert VLMUL to a fixed point value with 3 bits of fraction.
- unsigned getSEWLMULRatio() const {
- assert(isValid() && !isUnknown() &&
- "Can't use VTYPE for uninitialized or unknown");
+ static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
unsigned LMul;
bool Fractional;
std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
@@ -163,6 +163,12 @@ public:
return (SEW * 8) / LMul;
}
+ unsigned getSEWLMULRatio() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return getSEWLMULRatio(SEW, VLMul);
+ }
+
// Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
bool hasSameVLMAX(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
@@ -172,10 +178,30 @@ public:
return getSEWLMULRatio() == Other.getSEWLMULRatio();
}
+ bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
+ // Simple case, see if full VTYPE matches.
+ if (hasSameVTYPE(InstrInfo))
+ return true;
+
+ if (Strict)
+ return false;
+
+ // If this is a mask reg operation, it only cares about VLMAX.
+ // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
+ // than "InstrInfo".
+ // FIXME: The policy bits can probably be ignored for mask reg operations.
+ if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
+ TailAgnostic == InstrInfo.TailAgnostic &&
+ MaskAgnostic == InstrInfo.MaskAgnostic)
+ return true;
+
+ return false;
+ }
+
// Determine whether the vector instructions requirements represented by
// InstrInfo are compatible with the previous vsetvli instruction represented
// by this.
- bool isCompatible(const VSETVLIInfo &InstrInfo) const {
+ bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const {
assert(isValid() && InstrInfo.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!InstrInfo.SEWLMULRatioOnly &&
@@ -190,22 +216,52 @@ public:
// If the instruction doesn't need an AVLReg and the SEW matches, consider
// it compatible.
- if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) {
+ if (!Strict && InstrInfo.hasAVLReg() &&
+ InstrInfo.AVLReg == RISCV::NoRegister) {
if (SEW == InstrInfo.SEW)
return true;
}
- // VTypes must match unless the instruction is a mask reg operation, then it
- // only care about VLMAX.
- // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
- // than "InstrInfo".
- if (!hasSameVTYPE(InstrInfo) &&
- !(InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
- TailAgnostic == InstrInfo.TailAgnostic &&
- MaskAgnostic == InstrInfo.MaskAgnostic))
+ // The AVL must match.
+ if (!hasSameAVL(InstrInfo))
return false;
- return hasSameAVL(InstrInfo);
+ if (hasCompatibleVTYPE(InstrInfo, Strict))
+ return true;
+
+ // Strict matches must ensure a full VTYPE match.
+ if (Strict)
+ return false;
+
+ // Store instructions don't use the policy fields.
+ // TODO: Move into hasCompatibleVTYPE?
+ if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW)
+ return true;
+
+ // Anything else is not compatible.
+ return false;
+ }
+
+ bool isCompatibleWithLoadStoreEEW(unsigned EEW,
+ const VSETVLIInfo &InstrInfo) const {
+ assert(isValid() && InstrInfo.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ assert(!InstrInfo.SEWLMULRatioOnly &&
+ "Expected a valid VTYPE for instruction!");
+ assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store");
+
+ if (isUnknown() || hasSEWLMULRatioOnly())
+ return false;
+
+ if (!hasSameAVL(InstrInfo))
+ return false;
+
+ // Stores can ignore the tail and mask policies.
+ if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic ||
+ MaskAgnostic != InstrInfo.MaskAgnostic))
+ return false;
+
+ return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul);
}
bool operator==(const VSETVLIInfo &Other) const {
@@ -278,7 +334,7 @@ public:
// If the change is compatible with the input, we won't create a VSETVLI
// and should keep the predecessor.
- if (isCompatible(Other))
+ if (isCompatible(Other, /*Strict*/ true))
return *this;
// Otherwise just use whatever is in this block.
@@ -362,14 +418,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
unsigned NumOperands = MI.getNumExplicitOperands();
-
- RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
-
- unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
- // A Log2SEW of 0 is an operation on mask registers only.
- bool MaskRegOp = Log2SEW == 0;
- unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
- assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+ bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags);
// Default to tail agnostic unless the destination is tied to a source.
// Unless the source is undef. In that case the user would have some control
@@ -377,8 +426,15 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
// despite having a tied def.
bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
bool TailAgnostic = true;
+ // If the instruction has policy argument, use the argument.
+ if (HasPolicy) {
+ const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ TailAgnostic = Op.getImm() & 0x1;
+ }
+
unsigned UseOpIdx;
- if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) &&
+ MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
TailAgnostic = false;
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
@@ -390,16 +446,38 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
}
}
+ // Remove the tail policy so we can find the SEW and VL.
+ if (HasPolicy)
+ --NumOperands;
+
+ RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+
+ unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
+ // A Log2SEW of 0 is an operation on mask registers only.
+ bool MaskRegOp = Log2SEW == 0;
+ unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
+ assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+ // If there are no explicit defs, this is a store instruction which can
+ // ignore the tail and mask policies.
+ bool StoreOp = MI.getNumExplicitDefs() == 0;
+
if (RISCVII::hasVLOp(TSFlags)) {
- const MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
- if (VLOp.isImm())
- InstrInfo.setAVLImm(VLOp.getImm());
- else
+ const MachineOperand &VLOp = MI.getOperand(NumOperands - 2);
+ if (VLOp.isImm()) {
+ int64_t Imm = VLOp.getImm();
+ // Conver the VLMax sentintel to X0 register.
+ if (Imm == RISCV::VLMaxSentinel)
+ InstrInfo.setAVLReg(RISCV::X0);
+ else
+ InstrInfo.setAVLImm(Imm);
+ } else {
InstrInfo.setAVLReg(VLOp.getReg());
+ }
} else
InstrInfo.setAVLReg(RISCV::NoRegister);
InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
- /*MaskAgnostic*/ false, MaskRegOp);
+ /*MaskAgnostic*/ false, MaskRegOp, StoreOp);
return InstrInfo;
}
@@ -413,7 +491,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// VLMAX.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
@@ -435,7 +513,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// the previous vl to become invalid.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
@@ -450,11 +528,19 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
return;
}
- // Use X0 as the DestReg unless AVLReg is X0.
+ if (AVLReg.isVirtual())
+ MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
+
+ // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
+ // opcode if the AVLReg is X0 as they have different register classes for
+ // the AVL operand.
Register DestReg = RISCV::X0;
- if (AVLReg == RISCV::X0)
+ unsigned Opcode = RISCV::PseudoVSETVLI;
+ if (AVLReg == RISCV::X0) {
DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+ Opcode = RISCV::PseudoVSETVLIX0;
+ }
+ BuildMI(MBB, MI, DL, TII->get(Opcode))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(AVLReg)
.addImm(Info.encodeVTYPE());
@@ -464,14 +550,15 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+ if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else {
+ assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0);
Register AVLReg = MI.getOperand(1).getReg();
assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
"Can't handle X0, X0 vsetvli yet");
NewInfo.setAVLReg(AVLReg);
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETIVLI);
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -480,7 +567,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) {
- if (CurInfo.isCompatible(Require))
+ if (CurInfo.isCompatible(Require, /*Strict*/ false))
return false;
// We didn't find a compatible value. If our AVL is a virtual register,
@@ -489,9 +576,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
// VSETVLI here.
if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
- Require.hasSameVTYPE(CurInfo)) {
+ CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
+ DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
@@ -503,6 +591,202 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
return true;
}
+bool canSkipVSETVLIForLoadStore(const MachineInstr &MI,
+ const VSETVLIInfo &Require,
+ const VSETVLIInfo &CurInfo) {
+ unsigned EEW;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::PseudoVLE8_V_M1:
+ case RISCV::PseudoVLE8_V_M1_MASK:
+ case RISCV::PseudoVLE8_V_M2:
+ case RISCV::PseudoVLE8_V_M2_MASK:
+ case RISCV::PseudoVLE8_V_M4:
+ case RISCV::PseudoVLE8_V_M4_MASK:
+ case RISCV::PseudoVLE8_V_M8:
+ case RISCV::PseudoVLE8_V_M8_MASK:
+ case RISCV::PseudoVLE8_V_MF2:
+ case RISCV::PseudoVLE8_V_MF2_MASK:
+ case RISCV::PseudoVLE8_V_MF4:
+ case RISCV::PseudoVLE8_V_MF4_MASK:
+ case RISCV::PseudoVLE8_V_MF8:
+ case RISCV::PseudoVLE8_V_MF8_MASK:
+ case RISCV::PseudoVLSE8_V_M1:
+ case RISCV::PseudoVLSE8_V_M1_MASK:
+ case RISCV::PseudoVLSE8_V_M2:
+ case RISCV::PseudoVLSE8_V_M2_MASK:
+ case RISCV::PseudoVLSE8_V_M4:
+ case RISCV::PseudoVLSE8_V_M4_MASK:
+ case RISCV::PseudoVLSE8_V_M8:
+ case RISCV::PseudoVLSE8_V_M8_MASK:
+ case RISCV::PseudoVLSE8_V_MF2:
+ case RISCV::PseudoVLSE8_V_MF2_MASK:
+ case RISCV::PseudoVLSE8_V_MF4:
+ case RISCV::PseudoVLSE8_V_MF4_MASK:
+ case RISCV::PseudoVLSE8_V_MF8:
+ case RISCV::PseudoVLSE8_V_MF8_MASK:
+ case RISCV::PseudoVSE8_V_M1:
+ case RISCV::PseudoVSE8_V_M1_MASK:
+ case RISCV::PseudoVSE8_V_M2:
+ case RISCV::PseudoVSE8_V_M2_MASK:
+ case RISCV::PseudoVSE8_V_M4:
+ case RISCV::PseudoVSE8_V_M4_MASK:
+ case RISCV::PseudoVSE8_V_M8:
+ case RISCV::PseudoVSE8_V_M8_MASK:
+ case RISCV::PseudoVSE8_V_MF2:
+ case RISCV::PseudoVSE8_V_MF2_MASK:
+ case RISCV::PseudoVSE8_V_MF4:
+ case RISCV::PseudoVSE8_V_MF4_MASK:
+ case RISCV::PseudoVSE8_V_MF8:
+ case RISCV::PseudoVSE8_V_MF8_MASK:
+ case RISCV::PseudoVSSE8_V_M1:
+ case RISCV::PseudoVSSE8_V_M1_MASK:
+ case RISCV::PseudoVSSE8_V_M2:
+ case RISCV::PseudoVSSE8_V_M2_MASK:
+ case RISCV::PseudoVSSE8_V_M4:
+ case RISCV::PseudoVSSE8_V_M4_MASK:
+ case RISCV::PseudoVSSE8_V_M8:
+ case RISCV::PseudoVSSE8_V_M8_MASK:
+ case RISCV::PseudoVSSE8_V_MF2:
+ case RISCV::PseudoVSSE8_V_MF2_MASK:
+ case RISCV::PseudoVSSE8_V_MF4:
+ case RISCV::PseudoVSSE8_V_MF4_MASK:
+ case RISCV::PseudoVSSE8_V_MF8:
+ case RISCV::PseudoVSSE8_V_MF8_MASK:
+ EEW = 8;
+ break;
+ case RISCV::PseudoVLE16_V_M1:
+ case RISCV::PseudoVLE16_V_M1_MASK:
+ case RISCV::PseudoVLE16_V_M2:
+ case RISCV::PseudoVLE16_V_M2_MASK:
+ case RISCV::PseudoVLE16_V_M4:
+ case RISCV::PseudoVLE16_V_M4_MASK:
+ case RISCV::PseudoVLE16_V_M8:
+ case RISCV::PseudoVLE16_V_M8_MASK:
+ case RISCV::PseudoVLE16_V_MF2:
+ case RISCV::PseudoVLE16_V_MF2_MASK:
+ case RISCV::PseudoVLE16_V_MF4:
+ case RISCV::PseudoVLE16_V_MF4_MASK:
+ case RISCV::PseudoVLSE16_V_M1:
+ case RISCV::PseudoVLSE16_V_M1_MASK:
+ case RISCV::PseudoVLSE16_V_M2:
+ case RISCV::PseudoVLSE16_V_M2_MASK:
+ case RISCV::PseudoVLSE16_V_M4:
+ case RISCV::PseudoVLSE16_V_M4_MASK:
+ case RISCV::PseudoVLSE16_V_M8:
+ case RISCV::PseudoVLSE16_V_M8_MASK:
+ case RISCV::PseudoVLSE16_V_MF2:
+ case RISCV::PseudoVLSE16_V_MF2_MASK:
+ case RISCV::PseudoVLSE16_V_MF4:
+ case RISCV::PseudoVLSE16_V_MF4_MASK:
+ case RISCV::PseudoVSE16_V_M1:
+ case RISCV::PseudoVSE16_V_M1_MASK:
+ case RISCV::PseudoVSE16_V_M2:
+ case RISCV::PseudoVSE16_V_M2_MASK:
+ case RISCV::PseudoVSE16_V_M4:
+ case RISCV::PseudoVSE16_V_M4_MASK:
+ case RISCV::PseudoVSE16_V_M8:
+ case RISCV::PseudoVSE16_V_M8_MASK:
+ case RISCV::PseudoVSE16_V_MF2:
+ case RISCV::PseudoVSE16_V_MF2_MASK:
+ case RISCV::PseudoVSE16_V_MF4:
+ case RISCV::PseudoVSE16_V_MF4_MASK:
+ case RISCV::PseudoVSSE16_V_M1:
+ case RISCV::PseudoVSSE16_V_M1_MASK:
+ case RISCV::PseudoVSSE16_V_M2:
+ case RISCV::PseudoVSSE16_V_M2_MASK:
+ case RISCV::PseudoVSSE16_V_M4:
+ case RISCV::PseudoVSSE16_V_M4_MASK:
+ case RISCV::PseudoVSSE16_V_M8:
+ case RISCV::PseudoVSSE16_V_M8_MASK:
+ case RISCV::PseudoVSSE16_V_MF2:
+ case RISCV::PseudoVSSE16_V_MF2_MASK:
+ case RISCV::PseudoVSSE16_V_MF4:
+ case RISCV::PseudoVSSE16_V_MF4_MASK:
+ EEW = 16;
+ break;
+ case RISCV::PseudoVLE32_V_M1:
+ case RISCV::PseudoVLE32_V_M1_MASK:
+ case RISCV::PseudoVLE32_V_M2:
+ case RISCV::PseudoVLE32_V_M2_MASK:
+ case RISCV::PseudoVLE32_V_M4:
+ case RISCV::PseudoVLE32_V_M4_MASK:
+ case RISCV::PseudoVLE32_V_M8:
+ case RISCV::PseudoVLE32_V_M8_MASK:
+ case RISCV::PseudoVLE32_V_MF2:
+ case RISCV::PseudoVLE32_V_MF2_MASK:
+ case RISCV::PseudoVLSE32_V_M1:
+ case RISCV::PseudoVLSE32_V_M1_MASK:
+ case RISCV::PseudoVLSE32_V_M2:
+ case RISCV::PseudoVLSE32_V_M2_MASK:
+ case RISCV::PseudoVLSE32_V_M4:
+ case RISCV::PseudoVLSE32_V_M4_MASK:
+ case RISCV::PseudoVLSE32_V_M8:
+ case RISCV::PseudoVLSE32_V_M8_MASK:
+ case RISCV::PseudoVLSE32_V_MF2:
+ case RISCV::PseudoVLSE32_V_MF2_MASK:
+ case RISCV::PseudoVSE32_V_M1:
+ case RISCV::PseudoVSE32_V_M1_MASK:
+ case RISCV::PseudoVSE32_V_M2:
+ case RISCV::PseudoVSE32_V_M2_MASK:
+ case RISCV::PseudoVSE32_V_M4:
+ case RISCV::PseudoVSE32_V_M4_MASK:
+ case RISCV::PseudoVSE32_V_M8:
+ case RISCV::PseudoVSE32_V_M8_MASK:
+ case RISCV::PseudoVSE32_V_MF2:
+ case RISCV::PseudoVSE32_V_MF2_MASK:
+ case RISCV::PseudoVSSE32_V_M1:
+ case RISCV::PseudoVSSE32_V_M1_MASK:
+ case RISCV::PseudoVSSE32_V_M2:
+ case RISCV::PseudoVSSE32_V_M2_MASK:
+ case RISCV::PseudoVSSE32_V_M4:
+ case RISCV::PseudoVSSE32_V_M4_MASK:
+ case RISCV::PseudoVSSE32_V_M8:
+ case RISCV::PseudoVSSE32_V_M8_MASK:
+ case RISCV::PseudoVSSE32_V_MF2:
+ case RISCV::PseudoVSSE32_V_MF2_MASK:
+ EEW = 32;
+ break;
+ case RISCV::PseudoVLE64_V_M1:
+ case RISCV::PseudoVLE64_V_M1_MASK:
+ case RISCV::PseudoVLE64_V_M2:
+ case RISCV::PseudoVLE64_V_M2_MASK:
+ case RISCV::PseudoVLE64_V_M4:
+ case RISCV::PseudoVLE64_V_M4_MASK:
+ case RISCV::PseudoVLE64_V_M8:
+ case RISCV::PseudoVLE64_V_M8_MASK:
+ case RISCV::PseudoVLSE64_V_M1:
+ case RISCV::PseudoVLSE64_V_M1_MASK:
+ case RISCV::PseudoVLSE64_V_M2:
+ case RISCV::PseudoVLSE64_V_M2_MASK:
+ case RISCV::PseudoVLSE64_V_M4:
+ case RISCV::PseudoVLSE64_V_M4_MASK:
+ case RISCV::PseudoVLSE64_V_M8:
+ case RISCV::PseudoVLSE64_V_M8_MASK:
+ case RISCV::PseudoVSE64_V_M1:
+ case RISCV::PseudoVSE64_V_M1_MASK:
+ case RISCV::PseudoVSE64_V_M2:
+ case RISCV::PseudoVSE64_V_M2_MASK:
+ case RISCV::PseudoVSE64_V_M4:
+ case RISCV::PseudoVSE64_V_M4_MASK:
+ case RISCV::PseudoVSE64_V_M8:
+ case RISCV::PseudoVSE64_V_M8_MASK:
+ case RISCV::PseudoVSSE64_V_M1:
+ case RISCV::PseudoVSSE64_V_M1_MASK:
+ case RISCV::PseudoVSSE64_V_M2:
+ case RISCV::PseudoVSSE64_V_M2_MASK:
+ case RISCV::PseudoVSSE64_V_M4:
+ case RISCV::PseudoVSSE64_V_M4_MASK:
+ case RISCV::PseudoVSSE64_V_M8:
+ case RISCV::PseudoVSSE64_V_M8_MASK:
+ EEW = 64;
+ break;
+ }
+
+ return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require);
+}
+
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
bool HadVectorOp = false;
@@ -510,6 +794,7 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
for (const MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
HadVectorOp = true;
BBInfo.Change = getInfoForVSETVLI(MI);
@@ -527,7 +812,13 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
} else {
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
- if (needVSETVLI(NewInfo, BBInfo.Change))
+ // If this is a unit-stride or strided load/store, we may be able to use
+ // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
+ // NOTE: We only do this if the vtype we're comparing against was
+ // created in this block. We need the first and third phase to treat
+ // the store the same way.
+ if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) &&
+ needVSETVLI(NewInfo, BBInfo.Change))
BBInfo.Change = NewInfo;
}
}
@@ -609,12 +900,14 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
// If the exit from the predecessor has the VTYPE we are looking for
// we might be able to avoid a VSETVLI.
- if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
+ if (PBBInfo.Exit.isUnknown() ||
+ !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false))
return true;
// We need the PHI input to the be the output of a VSET(I)VLI.
MachineInstr *DefMI = MRI->getVRegDef(InReg);
if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI &&
+ DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 &&
DefMI->getOpcode() != RISCV::PseudoVSETIVLI))
return true;
@@ -633,10 +926,13 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo;
+ // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
+ MachineInstr *PrevVSETVLIMI = nullptr;
for (MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
// Conservatively, mark the VL and VTYPE as live.
assert(MI.getOperand(3).getReg() == RISCV::VL &&
@@ -645,6 +941,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
CurInfo = getInfoForVSETVLI(MI);
+ PrevVSETVLIMI = &MI;
continue;
}
@@ -652,7 +949,11 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (RISCVII::hasSEWOp(TSFlags)) {
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
if (RISCVII::hasVLOp(TSFlags)) {
- MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
+ unsigned Offset = 2;
+ if (RISCVII::hasVecPolicyOp(TSFlags))
+ Offset = 3;
+ MachineOperand &VLOp =
+ MI.getOperand(MI.getNumExplicitOperands() - Offset);
if (VLOp.isReg()) {
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
@@ -677,11 +978,35 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
} else {
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
- if (needVSETVLI(NewInfo, CurInfo)) {
- insertVSETVLI(MBB, MI, NewInfo, CurInfo);
+ // If this is a unit-stride or strided load/store, we may be able to use
+ // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
+ // NOTE: We can't use predecessor information for the store. We must
+ // treat it the same as the first phase so that we produce the correct
+ // vl/vtype for succesor blocks.
+ if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) &&
+ needVSETVLI(NewInfo, CurInfo)) {
+ // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it
+ // with current VL/VTYPE.
+ bool NeedInsertVSETVLI = true;
+ if (PrevVSETVLIMI) {
+ bool HasSameAVL =
+ CurInfo.hasSameAVL(NewInfo) ||
+ (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() &&
+ NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg());
+ // If these two VSETVLI have the same AVL and the same VLMAX,
+ // we could merge these two VSETVLI.
+ if (HasSameAVL &&
+ CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) {
+ PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
+ NeedInsertVSETVLI = false;
+ }
+ }
+ if (NeedInsertVSETVLI)
+ insertVSETVLI(MBB, MI, NewInfo, CurInfo);
CurInfo = NewInfo;
}
}
+ PrevVSETVLIMI = nullptr;
}
// If this is something updates VL/VTYPE that we don't know about, set
@@ -689,6 +1014,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE)) {
CurInfo = VSETVLIInfo::getUnknown();
+ PrevVSETVLIMI = nullptr;
}
}
}
@@ -696,7 +1022,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!ST.hasStdExtV())
+ if (!ST.hasVInstructions())
return false;
TII = ST.getInstrInfo();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 8e9d245f13eb..cfad4cdb9364 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -178,6 +178,12 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
bit HasVLOp = 0;
let TSFlags{15} = HasVLOp;
+
+ bit HasVecPolicyOp = 0;
+ let TSFlags{16} = HasVecPolicyOp;
+
+ bit IsRVVWideningReduction = 0;
+ let TSFlags{17} = IsRVVWideningReduction;
}
// Pseudo instructions
@@ -199,7 +205,7 @@ class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
}
class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
- : Pseudo<(outs rdty:$rd, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
+ : Pseudo<(outs GPR:$tmp, rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
@@ -209,7 +215,7 @@ class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
// Pseudo store instructions.
class PseudoStore<string opcodestr, RegisterClass rsty = GPR>
- : Pseudo<(outs rsty:$rs, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rs, $addr, $tmp"> {
+ : Pseudo<(outs GPR:$tmp), (ins rsty:$rs, bare_symbol:$addr), [], opcodestr, "$rs, $addr, $tmp"> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 1;
@@ -406,3 +412,135 @@ class RVInstJ<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
let Inst{11-7} = rd;
let Opcode = opcode.Value;
}
+
+//===----------------------------------------------------------------------===//
+// Instruction classes for .insn directives
+//===----------------------------------------------------------------------===//
+
+class DirectiveInsnR<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatR> {
+ bits<7> opcode;
+ bits<7> funct7;
+ bits<3> funct3;
+
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn r " # argstr;
+}
+
+class DirectiveInsnR4<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatR4> {
+ bits<7> opcode;
+ bits<2> funct2;
+ bits<3> funct3;
+
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-27} = rs3;
+ let Inst{26-25} = funct2;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn r4 " # argstr;
+}
+
+class DirectiveInsnI<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatI> {
+ bits<7> opcode;
+ bits<3> funct3;
+
+ bits<12> imm12;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-20} = imm12;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn i " # argstr;
+}
+
+class DirectiveInsnS<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatS> {
+ bits<7> opcode;
+ bits<3> funct3;
+
+ bits<12> imm12;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-25} = imm12{11-5};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = imm12{4-0};
+ let Opcode = opcode;
+
+ let AsmString = ".insn s " # argstr;
+}
+
+class DirectiveInsnB<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatB> {
+ bits<7> opcode;
+ bits<3> funct3;
+
+ bits<12> imm12;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31} = imm12{11};
+ let Inst{30-25} = imm12{9-4};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-8} = imm12{3-0};
+ let Inst{7} = imm12{10};
+ let Opcode = opcode;
+
+ let AsmString = ".insn b " # argstr;
+}
+
+class DirectiveInsnU<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatU> {
+ bits<7> opcode;
+
+ bits<20> imm20;
+ bits<5> rd;
+
+ let Inst{31-12} = imm20;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn u " # argstr;
+}
+
+class DirectiveInsnJ<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatJ> {
+ bits<7> opcode;
+
+ bits<20> imm20;
+ bits<5> rd;
+
+ let Inst{31-12} = imm20;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn j " # argstr;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index a541daaff9f4..547d82550cac 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -19,14 +19,15 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -36,6 +37,10 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "RISCVGenInstrInfo.inc"
+static cl::opt<bool> PreferWholeRegisterMove(
+ "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
+ cl::desc("Prefer whole register move for vector registers."));
+
namespace llvm {
namespace RISCVVPseudosTable {
@@ -113,9 +118,137 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
unsigned NumRegs) {
- // We really want the positive remainder mod 32 here, that happens to be
- // easily obtainable with a mask.
- return ((DstReg - SrcReg) & 0x1f) < NumRegs;
+ return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
+}
+
+static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
+ const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI,
+ MachineBasicBlock::const_iterator &DefMBBI,
+ RISCVII::VLMUL LMul) {
+ if (PreferWholeRegisterMove)
+ return false;
+
+ assert(MBBI->getOpcode() == TargetOpcode::COPY &&
+ "Unexpected COPY instruction.");
+ Register SrcReg = MBBI->getOperand(1).getReg();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ bool FoundDef = false;
+ bool FirstVSetVLI = false;
+ unsigned FirstSEW = 0;
+ while (MBBI != MBB.begin()) {
+ --MBBI;
+ if (MBBI->isMetaInstruction())
+ continue;
+
+ if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+ MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
+ MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
+ // There is a vsetvli between COPY and source define instruction.
+ // vy = def_vop ... (producing instruction)
+ // ...
+ // vsetvli
+ // ...
+ // vx = COPY vy
+ if (!FoundDef) {
+ if (!FirstVSetVLI) {
+ FirstVSetVLI = true;
+ unsigned FirstVType = MBBI->getOperand(2).getImm();
+ RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
+ FirstSEW = RISCVVType::getSEW(FirstVType);
+ // The first encountered vsetvli must have the same lmul as the
+ // register class of COPY.
+ if (FirstLMul != LMul)
+ return false;
+ }
+ // Only permit `vsetvli x0, x0, vtype` between COPY and the source
+ // define instruction.
+ if (MBBI->getOperand(0).getReg() != RISCV::X0)
+ return false;
+ if (MBBI->getOperand(1).isImm())
+ return false;
+ if (MBBI->getOperand(1).getReg() != RISCV::X0)
+ return false;
+ continue;
+ }
+
+ // MBBI is the first vsetvli before the producing instruction.
+ unsigned VType = MBBI->getOperand(2).getImm();
+ // If there is a vsetvli between COPY and the producing instruction.
+ if (FirstVSetVLI) {
+ // If SEW is different, return false.
+ if (RISCVVType::getSEW(VType) != FirstSEW)
+ return false;
+ }
+
+ // If the vsetvli is tail undisturbed, keep the whole register move.
+ if (!RISCVVType::isTailAgnostic(VType))
+ return false;
+
+ // The checking is conservative. We only have register classes for
+ // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
+ // for fractional LMUL operations. However, we could not use the vsetvli
+ // lmul for widening operations. The result of widening operation is
+ // 2 x LMUL.
+ return LMul == RISCVVType::getVLMUL(VType);
+ } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
+ return false;
+ } else if (MBBI->getNumDefs()) {
+ // Check all the instructions which will change VL.
+ // For example, vleff has implicit def VL.
+ if (MBBI->modifiesRegister(RISCV::VL))
+ return false;
+
+ // Go through all defined operands, including implicit defines.
+ for (const MachineOperand &MO : MBBI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (!FoundDef && TRI->isSubRegisterEq(MO.getReg(), SrcReg)) {
+ // We only permit the source of COPY has the same LMUL as the defined
+ // operand.
+ // There are cases we need to keep the whole register copy if the LMUL
+ // is different.
+ // For example,
+ // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
+ // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
+ // # The COPY may be created by vlmul_trunc intrinsic.
+ // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
+ //
+ // After widening, the valid value will be 4 x e32 elements. If we
+ // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
+ // FIXME: The COPY of subregister of Zvlsseg register will not be able
+ // to convert to vmv.v.[v|i] under the constraint.
+ if (MO.getReg() != SrcReg)
+ return false;
+
+ // In widening reduction instructions with LMUL_1 input vector case,
+ // only checking the LMUL is insufficient due to reduction result is
+ // always LMUL_1.
+ // For example,
+ // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
+ // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
+ // $v26 = COPY killed renamable $v8
+ // After widening, The valid value will be 1 x e16 elements. If we
+ // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
+ uint64_t TSFlags = MBBI->getDesc().TSFlags;
+ if (RISCVII::isRVVWideningReduction(TSFlags))
+ return false;
+
+ // Found the definition.
+ FoundDef = true;
+ DefMBBI = MBBI;
+ // If the producing instruction does not depend on vsetvli, do not
+ // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ break;
+ }
+ }
+ }
+ }
+
+ return false;
}
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -133,7 +266,7 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opc;
bool IsScalableVector = true;
unsigned NF = 1;
- unsigned LMul = 1;
+ RISCVII::VLMUL LMul = RISCVII::LMUL_1;
unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_H;
@@ -146,91 +279,157 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
IsScalableVector = false;
} else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
+ LMul = RISCVII::LMUL_4;
} else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV8R_V;
+ LMul = RISCVII::LMUL_8;
} else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 2;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 2;
- LMul = 2;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
SubRegIdx = RISCV::sub_vrm4_0;
NF = 2;
- LMul = 4;
+ LMul = RISCVII::LMUL_4;
} else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 3;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 3;
- LMul = 2;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 4;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 4;
- LMul = 2;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 5;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 6;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 7;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 8;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else {
llvm_unreachable("Impossible reg-to-reg copy");
}
if (IsScalableVector) {
+ bool UseVMV_V_V = false;
+ MachineBasicBlock::const_iterator DefMBBI;
+ unsigned DefExplicitOpNum;
+ unsigned VIOpc;
+ if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
+ UseVMV_V_V = true;
+ DefExplicitOpNum = DefMBBI->getNumExplicitOperands();
+ // We only need to handle LMUL = 1/2/4/8 here because we only define
+ // vector register classes for LMUL = 1/2/4/8.
+ switch (LMul) {
+ default:
+ llvm_unreachable("Impossible LMUL for vector register copy.");
+ case RISCVII::LMUL_1:
+ Opc = RISCV::PseudoVMV_V_V_M1;
+ VIOpc = RISCV::PseudoVMV_V_I_M1;
+ break;
+ case RISCVII::LMUL_2:
+ Opc = RISCV::PseudoVMV_V_V_M2;
+ VIOpc = RISCV::PseudoVMV_V_I_M2;
+ break;
+ case RISCVII::LMUL_4:
+ Opc = RISCV::PseudoVMV_V_V_M4;
+ VIOpc = RISCV::PseudoVMV_V_I_M4;
+ break;
+ case RISCVII::LMUL_8:
+ Opc = RISCV::PseudoVMV_V_V_M8;
+ VIOpc = RISCV::PseudoVMV_V_I_M8;
+ break;
+ }
+ }
+
+ bool UseVMV_V_I = false;
+ if (UseVMV_V_V && (DefMBBI->getOpcode() == VIOpc)) {
+ UseVMV_V_I = true;
+ Opc = VIOpc;
+ }
+
if (NF == 1) {
- BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(1));
+ else
+ MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ // The last two arguments of vector instructions are
+ // AVL, SEW. We also need to append the implicit-use vl and vtype.
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 2)); // AVL
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 1)); // SEW
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
} else {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
int I = 0, End = NF, Incr = 1;
unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned DstEncoding = TRI->getEncodingValue(DstReg);
- if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) {
+ unsigned LMulVal;
+ bool Fractional;
+ std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
+ assert(!Fractional && "It is impossible be fractional lmul here.");
+ if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
I = NF - 1;
End = -1;
Incr = -1;
}
for (; I != End; I += Incr) {
- BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I))
- .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
- getKillRegState(KillSrc));
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc),
+ TRI->getSubReg(DstReg, SubRegIdx + I));
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(1));
+ else
+ MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
+ getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 2)); // AVL
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 1)); // SEW
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
}
}
} else {
@@ -458,6 +657,12 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
.addReg(SrcReg, RegState::Kill)
.addReg(RISCV::X0)
.setMIFlag(Flag);
+ } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
+ Inst.Opc == RISCV::SH3ADD) {
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(SrcReg, RegState::Kill)
+ .setMIFlag(Flag);
} else {
BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
.addReg(SrcReg, RegState::Kill)
@@ -469,6 +674,25 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
}
}
+static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return RISCVCC::COND_INVALID;
+ case RISCV::BEQ:
+ return RISCVCC::COND_EQ;
+ case RISCV::BNE:
+ return RISCVCC::COND_NE;
+ case RISCV::BLT:
+ return RISCVCC::COND_LT;
+ case RISCV::BGE:
+ return RISCVCC::COND_GE;
+ case RISCV::BLTU:
+ return RISCVCC::COND_LTU;
+ case RISCV::BGEU:
+ return RISCVCC::COND_GEU;
+ }
+}
+
// The contents of values added to Cond are not examined outside of
// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
// push BranchOpcode, Reg1, Reg2.
@@ -478,27 +702,47 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
assert(LastInst.getDesc().isConditionalBranch() &&
"Unknown conditional branch");
Target = LastInst.getOperand(2).getMBB();
- Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode()));
+ unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
+ Cond.push_back(MachineOperand::CreateImm(CC));
Cond.push_back(LastInst.getOperand(0));
Cond.push_back(LastInst.getOperand(1));
}
-static unsigned getOppositeBranchOpcode(int Opc) {
- switch (Opc) {
+const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case RISCVCC::COND_EQ:
+ return get(RISCV::BEQ);
+ case RISCVCC::COND_NE:
+ return get(RISCV::BNE);
+ case RISCVCC::COND_LT:
+ return get(RISCV::BLT);
+ case RISCVCC::COND_GE:
+ return get(RISCV::BGE);
+ case RISCVCC::COND_LTU:
+ return get(RISCV::BLTU);
+ case RISCVCC::COND_GEU:
+ return get(RISCV::BGEU);
+ }
+}
+
+RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
+ switch (CC) {
default:
llvm_unreachable("Unrecognized conditional branch");
- case RISCV::BEQ:
- return RISCV::BNE;
- case RISCV::BNE:
- return RISCV::BEQ;
- case RISCV::BLT:
- return RISCV::BGE;
- case RISCV::BGE:
- return RISCV::BLT;
- case RISCV::BLTU:
- return RISCV::BGEU;
- case RISCV::BGEU:
- return RISCV::BLTU;
+ case RISCVCC::COND_EQ:
+ return RISCVCC::COND_NE;
+ case RISCVCC::COND_NE:
+ return RISCVCC::COND_EQ;
+ case RISCVCC::COND_LT:
+ return RISCVCC::COND_GE;
+ case RISCVCC::COND_GE:
+ return RISCVCC::COND_LT;
+ case RISCVCC::COND_LTU:
+ return RISCVCC::COND_GEU;
+ case RISCVCC::COND_GEU:
+ return RISCVCC::COND_LTU;
}
}
@@ -624,9 +868,9 @@ unsigned RISCVInstrInfo::insertBranch(
}
// Either a one or two-way conditional branch.
- unsigned Opc = Cond[0].getImm();
+ auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
MachineInstr &CondMI =
- *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
+ *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
if (BytesAdded)
*BytesAdded += getInstSizeInBytes(CondMI);
@@ -641,11 +885,11 @@ unsigned RISCVInstrInfo::insertBranch(
return 2;
}
-unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &DestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const {
+void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &DestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset,
+ RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch");
@@ -671,16 +915,18 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->enterBasicBlockEnd(MBB);
unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
MI.getIterator(), false, 0);
+ // TODO: The case when there is no scavenged register needs special handling.
+ assert(Scav != RISCV::NoRegister && "No register is scavenged!");
MRI.replaceRegWith(ScratchReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
- return 8;
}
bool RISCVInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
assert((Cond.size() == 3) && "Invalid branch condition!");
- Cond[0].setImm(getOppositeBranchOpcode(Cond[0].getImm()));
+ auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+ Cond[0].setImm(getOppositeBranchCondition(CC));
return false;
}
@@ -866,12 +1112,21 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
switch (OpType) {
default:
llvm_unreachable("Unexpected operand type");
+ case RISCVOp::OPERAND_UIMM2:
+ Ok = isUInt<2>(Imm);
+ break;
+ case RISCVOp::OPERAND_UIMM3:
+ Ok = isUInt<3>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM4:
Ok = isUInt<4>(Imm);
break;
case RISCVOp::OPERAND_UIMM5:
Ok = isUInt<5>(Imm);
break;
+ case RISCVOp::OPERAND_UIMM7:
+ Ok = isUInt<7>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM12:
Ok = isUInt<12>(Imm);
break;
@@ -1086,7 +1341,7 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
// Make sure the operands don't reference something unsafe.
for (const auto &MO : MI.operands())
- if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI())
+ if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI() || MO.isJTI())
return outliner::InstrType::Illegal;
// Don't allow instructions which won't be materialized to impact outlining
@@ -1139,7 +1394,7 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
// clang-format off
#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
- RISCV::PseudoV##OP##_##TYPE##_##LMUL##_COMMUTABLE
+ RISCV::PseudoV##OP##_##TYPE##_##LMUL
#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
@@ -1182,6 +1437,11 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VV):
case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
+ // If the tail policy is undisturbed we can't commute.
+ assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
+ if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
+ return false;
+
// For these instructions we can only swap operand 1 and operand 3 by
// changing the opcode.
unsigned CommutableOpIdx1 = 1;
@@ -1197,6 +1457,11 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VV):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
+ // If the tail policy is undisturbed we can't commute.
+ assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
+ if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
+ return false;
+
// For these instructions we have more freedom. We can commute with the
// other multiplicand or with the addend/subtrahend/minuend.
@@ -1223,7 +1488,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
// Both of operands are not fixed. Set one of commutable
// operands to the tied source.
CommutableOpIdx1 = 1;
- } else if (SrcOpIdx1 == CommutableOpIdx1) {
+ } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
// Only one of the operands is not fixed.
CommutableOpIdx1 = SrcOpIdx2;
}
@@ -1261,8 +1526,8 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
}
#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
- case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_COMMUTABLE: \
- Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_COMMUTABLE; \
+ case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
+ Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
break;
#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
@@ -1409,8 +1674,9 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
-MachineInstr *RISCVInstrInfo::convertToThreeAddress(
- MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
switch (MI.getOpcode()) {
default:
break;
@@ -1434,7 +1700,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(
}
//clang-format on
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
@@ -1451,6 +1718,20 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(
}
}
+ if (LIS) {
+ SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+
+ if (MI.getOperand(0).isEarlyClobber()) {
+ // Use operand 1 was tied to early-clobber def operand 0, so its live
+ // interval could have ended at an early-clobber slot. Now they are not
+ // tied we need to update it to the normal register slot.
+ LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ LiveRange::Segment *S = LI.getSegmentContaining(Idx);
+ if (S->end == Idx.getRegSlot(true))
+ S->end = Idx.getRegSlot();
+ }
+ }
+
return MIB;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index d80fc483826f..2bfad7844c43 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -24,12 +24,29 @@ namespace llvm {
class RISCVSubtarget;
+namespace RISCVCC {
+
+enum CondCode {
+ COND_EQ,
+ COND_NE,
+ COND_LT,
+ COND_GE,
+ COND_LTU,
+ COND_GEU,
+ COND_INVALID
+};
+
+CondCode getOppositeBranchCondition(CondCode);
+
+} // end of namespace RISCVCC
+
class RISCVInstrInfo : public RISCVGenInstrInfo {
public:
explicit RISCVInstrInfo(RISCVSubtarget &STI);
MCInst getNop() const override;
+ const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -68,10 +85,10 @@ public:
const DebugLoc &dl,
int *BytesAdded = nullptr) const override;
- unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL, int64_t BrOffset,
- RegScavenger *RS = nullptr) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
@@ -143,9 +160,8 @@ public:
unsigned OpIdx1,
unsigned OpIdx2) const override;
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
Register getVLENFactoredAmount(
MachineFunction &MF, MachineBasicBlock &MBB,
@@ -164,6 +180,11 @@ protected:
const RISCVSubtarget &STI;
};
+namespace RISCV {
+// Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
+static constexpr int64_t VLMaxSentinel = -1LL;
+} // namespace RISCV
+
namespace RISCVVPseudosTable {
struct PseudoInfo {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 949fff25e9e0..b653928ccea9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
// Target-dependent type requirements.
def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>,
SDTCisSameAs<0, 4>,
SDTCisSameAs<4, 5>]>;
def SDT_RISCVBrCC : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
@@ -152,6 +153,20 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
let OperandNamespace = "RISCVOp";
}
+def uimm2 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<2>;
+ let DecoderMethod = "decodeUImmOperand<2>";
+ let OperandType = "OPERAND_UIMM2";
+ let OperandNamespace = "RISCVOp";
+}
+
+def uimm3 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<3>;
+ let DecoderMethod = "decodeUImmOperand<3>";
+ let OperandType = "OPERAND_UIMM3";
+ let OperandNamespace = "RISCVOp";
+}
+
def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5>;
let DecoderMethod = "decodeUImmOperand<5>";
@@ -159,6 +174,13 @@ def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let OperandNamespace = "RISCVOp";
}
+def uimm7 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<7>;
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7";
+ let OperandNamespace = "RISCVOp";
+}
+
def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12>;
let EncoderMethod = "getImmOpValue";
@@ -849,6 +871,87 @@ def : MnemonicAlias<"sbreak", "ebreak">;
def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// isCodeGenOnly = 1 to hide them from the tablegened assembly parser.
+let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1,
+ hasNoSchedulingInfo = 1 in {
+def InsnR : DirectiveInsnR<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3,
+ uimm7:$funct7, AnyReg:$rs1,
+ AnyReg:$rs2),
+ "$opcode, $funct3, $funct7, $rd, $rs1, $rs2">;
+def InsnR4 : DirectiveInsnR4<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ uimm3:$funct3,
+ uimm2:$funct2,
+ AnyReg:$rs1, AnyReg:$rs2,
+ AnyReg:$rs3),
+ "$opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3">;
+def InsnI : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs1, simm12:$imm12),
+ "$opcode, $funct3, $rd, $rs1, $imm12">;
+def InsnI_Mem : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ uimm3:$funct3,
+ AnyReg:$rs1,
+ simm12:$imm12),
+ "$opcode, $funct3, $rd, ${imm12}(${rs1})">;
+def InsnB : DirectiveInsnB<(outs), (ins uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs1, AnyReg:$rs2,
+ simm13_lsb0:$imm12),
+ "$opcode, $funct3, $rs1, $rs2, $imm12">;
+def InsnU : DirectiveInsnU<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ uimm20_lui:$imm20),
+ "$opcode, $rd, $imm20">;
+def InsnJ : DirectiveInsnJ<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ simm21_lsb0_jal:$imm20),
+ "$opcode, $rd, $imm20">;
+def InsnS : DirectiveInsnS<(outs), (ins uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs2, AnyReg:$rs1,
+ simm12:$imm12),
+ "$opcode, $funct3, $rs2, ${imm12}(${rs1})">;
+}
+
+// Use InstAliases to match these so that we can combine the insn and format
+// into a mnemonic to use as the key for the tablegened asm matcher table. The
+// parser will take care of creating these fake mnemonics and will only do it
+// for known formats.
+let EmitPriority = 0 in {
+def : InstAlias<".insn_r $opcode, $funct3, $funct7, $rd, $rs1, $rs2",
+ (InsnR AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm7:$funct7,
+ AnyReg:$rs1, AnyReg:$rs2)>;
+// Accept 4 register form of ".insn r" as alias for ".insn r4".
+def : InstAlias<".insn_r $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3",
+ (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2,
+ AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>;
+def : InstAlias<".insn_r4 $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3",
+ (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2,
+ AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>;
+def : InstAlias<".insn_i $opcode, $funct3, $rd, $rs1, $imm12",
+ (InsnI AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ simm12:$imm12)>;
+def : InstAlias<".insn_i $opcode, $funct3, $rd, ${imm12}(${rs1})",
+ (InsnI_Mem AnyReg:$rd, uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs1, simm12:$imm12)>;
+def : InstAlias<".insn_b $opcode, $funct3, $rs1, $rs2, $imm12",
+ (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ AnyReg:$rs2, simm13_lsb0:$imm12)>;
+// Accept sb as an alias for b.
+def : InstAlias<".insn_sb $opcode, $funct3, $rs1, $rs2, $imm12",
+ (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ AnyReg:$rs2, simm13_lsb0:$imm12)>;
+def : InstAlias<".insn_u $opcode, $rd, $imm20",
+ (InsnU AnyReg:$rd, uimm7:$opcode, uimm20_lui:$imm20)>;
+def : InstAlias<".insn_j $opcode, $rd, $imm20",
+ (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>;
+// Accept uj as an alias for j.
+def : InstAlias<".insn_uj $opcode, $rd, $imm20",
+ (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>;
+def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
+ (InsnS uimm7:$opcode, uimm3:$funct3, AnyReg:$rs2,
+ AnyReg:$rs1, simm12:$imm12)>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//
// Naming convention: For 'generic' pattern classes, we use the naming
@@ -893,6 +996,14 @@ def mul_oneuse : PatFrag<(ops node:$A, node:$B), (mul node:$A, node:$B), [{
return N->hasOneUse();
}]>;
+def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
+ (mul node:$A, node:$B), [{
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (N1C->hasOneUse())
+ return true;
+ return false;
+}]>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -966,13 +1077,27 @@ def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>;
def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
+def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ RISCVCC::CondCode BrCC = getRISCVCCForIntCC(CC);
+ return CurDAG->getTargetConstant(BrCC, SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
+ node:$truev, node:$falsev),
+ (riscv_selectcc node:$lhs, node:$rhs,
+ node:$cc, node:$truev,
+ node:$falsev), [{}],
+ IntCCtoRISCVCC>;
+
let usesCustomInserter = 1 in
class SelectCC_rrirr<RegisterClass valty, RegisterClass cmpty>
: Pseudo<(outs valty:$dst),
(ins cmpty:$lhs, cmpty:$rhs, ixlenimm:$imm,
valty:$truev, valty:$falsev),
- [(set valty:$dst, (riscv_selectcc cmpty:$lhs, cmpty:$rhs,
- (XLenVT timm:$imm), valty:$truev, valty:$falsev))]>;
+ [(set valty:$dst,
+ (riscv_selectcc_frag:$imm cmpty:$lhs, cmpty:$rhs, cond,
+ valty:$truev, valty:$falsev))]>;
def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
@@ -1231,22 +1356,30 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
}
+// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
+// if only the lower 32 bits of their result is used.
+class binop_allwusers<SDPatternOperator operator>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (operator node:$lhs, node:$rhs), [{
+ return hasAllWUsers(Node);
+}]>;
+
+def sexti32_allwusers : PatFrag<(ops node:$src),
+ (sext_inreg node:$src, i32), [{
+ return hasAllWUsers(Node);
+}]>;
+
let Predicates = [IsRV64] in {
/// sext and zext
+// Sign extend is not needed if all users are W instructions.
+def : Pat<(sexti32_allwusers GPR:$rs1), (XLenVT GPR:$rs1)>;
+
def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>;
/// ALU operations
-def : Pat<(sext_inreg (add GPR:$rs1, GPR:$rs2), i32),
- (ADDW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (add GPR:$rs1, simm12:$imm12), i32),
- (ADDIW GPR:$rs1, simm12:$imm12)>;
-def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
- (SUBW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
- (SLLIW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (srl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLIW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (srl (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt)),
@@ -1260,6 +1393,18 @@ def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;
+// Select W instructions if only the lower 32 bits of the result are used.
+def : PatGprGpr<binop_allwusers<add>, ADDW>;
+def : PatGprSimm12<binop_allwusers<add>, ADDIW>;
+def : PatGprGpr<binop_allwusers<sub>, SUBW>;
+def : PatGprImm<binop_allwusers<shl>, SLLIW, uimm5>;
+
+// If this is a shr of a value sign extended from i32, and all the users only
+// use the lower 32 bits, we can use an sraiw to remove the sext_inreg. This
+// occurs because SimplifyDemandedBits prefers srl over sra.
+def : Pat<(binop_allwusers<srl> (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+ (SRAIW GPR:$rs1, uimm5:$shamt)>;
+
/// Loads
defm : LdPat<sextloadi32, LW, i64>;
@@ -1300,7 +1445,8 @@ def : Pat<(add GPR:$rs1, (AddiPair:$rs2)),
(AddiPairImmA GPR:$rs2))>;
let Predicates = [IsRV64] in {
-def : Pat<(sext_inreg (add_oneuse GPR:$rs1, (AddiPair:$rs2)), i32),
+// Select W instructions if only the lower 32-bits of the result are used.
+def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
(ADDIW (ADDIW GPR:$rs1, (AddiPairImmB AddiPair:$rs2)),
(AddiPairImmA AddiPair:$rs2))>;
}
@@ -1314,6 +1460,6 @@ include "RISCVInstrInfoA.td"
include "RISCVInstrInfoF.td"
include "RISCVInstrInfoD.td"
include "RISCVInstrInfoC.td"
-include "RISCVInstrInfoB.td"
+include "RISCVInstrInfoZb.td"
include "RISCVInstrInfoV.td"
include "RISCVInstrInfoZfh.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 86f96c1529b1..d204c85d6179 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -745,13 +745,6 @@ def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>;
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
-class CompressPat<dag input, dag output> {
- dag Input = input;
- dag Output = output;
- list<Predicate> Predicates = [];
- bit isCompressOnly = false;
-}
-
// Patterns are defined in the same order the compressed instructions appear
// on page 82 of the ISA manual.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 41eff2ef7607..2cd011a02345 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -331,6 +331,10 @@ def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>;
def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>;
def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+// Saturating double->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_WU_D $rs1, 0b001)>;
+
// float->int32 with current rounding mode.
def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>;
@@ -354,13 +358,17 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
// [u]int32->fp
def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
+// Saturating double->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>;
+
// double->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>;
def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6b5c9617426a..3400c3be52bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -21,15 +21,21 @@ def SDT_RISCVFMV_X_ANYEXTW_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def STD_RISCVFCVT_W_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>;
+def STD_RISCVFCVT_X
+ : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
def riscv_fmv_w_x_rv64
: SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
def riscv_fmv_x_anyextw_rv64
: SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
-def riscv_fcvt_w_rv64
- : SDNode<"RISCVISD::FCVT_W_RV64", STD_RISCVFCVT_W_RV64>;
-def riscv_fcvt_wu_rv64
- : SDNode<"RISCVISD::FCVT_WU_RV64", STD_RISCVFCVT_W_RV64>;
+def riscv_fcvt_w_rtz_rv64
+ : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>;
+def riscv_fcvt_wu_rtz_rv64
+ : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>;
+def riscv_fcvt_x_rtz
+ : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>;
+def riscv_fcvt_xu_rtz
+ : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -379,6 +385,10 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+// Saturating float->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+
// float->int32 with current rounding mode.
def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>;
@@ -400,13 +410,17 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
// float->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+// Saturating float->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+
// float->int64 with current rounding mode.
def : Pat<(i64 (lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
def : Pat<(i64 (llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index f654ed1949a4..a037dbf585ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -72,8 +72,8 @@ def : PatGprGpr<urem, REMU>;
} // Predicates = [HasStdExtM]
let Predicates = [HasStdExtM, IsRV64] in {
-def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
- (MULW GPR:$rs1, GPR:$rs2)>;
+// Select W instructions if only the lower 32-bits of the result are used.
+def : PatGprGpr<binop_allwusers<mul>, MULW>;
def : PatGprGpr<riscv_divw, DIVW>;
def : PatGprGpr<riscv_divuw, DIVUW>;
@@ -96,20 +96,24 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
(REMW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtM, IsRV64]
+// Pattern to detect constants with no more than 32 active bits that can't
+// be materialized with lui+addiw.
+def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) &&
+ !isInt<32>(C->getSExtValue());
+}]>;
+
let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
// Special case for calculating the full 64-bit product of a 32x32 unsigned
// multiply where the inputs aren't known to be zero extended. We can shift the
// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
// zeroing the upper 32 bits.
-// TODO: If one of the operands is zero extended and the other isn't, we might
-// still be better off shifting both left by 32.
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
-// Prevent matching the first part of this pattern to mulw. The mul here has
-// additionals users or the ANDs would have been removed. The above pattern
-// will be used for the other users. If we form a mulw we'll keep the ANDs alive
-// and they'll still become SLLI+SRLI.
-def : Pat<(sext_inreg (mul (and GPR:$rs1, 0xffffffff),
- (and GPR:$rs2, 0xffffffff)), i32),
- (ADDIW (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32)), 0)>;
+// The RHS could also be a constant that is hard to materialize. By shifting
+// left we can allow constant materialization to use LUI+ADDIW via
+// hasAllWUsers.
+def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)),
+ (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 342497150d49..3d5f9bc54731 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -78,65 +78,105 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
}
//===----------------------------------------------------------------------===//
+// Scheduling definitions.
+//===----------------------------------------------------------------------===//
+
+class VMVRSched<int n>: Sched <[!cast<SchedReadWrite>("WriteVMov" # n # "V"),
+ !cast<SchedReadWrite>("ReadVMov" # n # "V")]>;
+
+class VLESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDE" # n),
+ ReadVLDX, ReadVMask]>;
+
+class VSESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTE" # n),
+ !cast<SchedReadWrite>("ReadVSTE" # n # "V"),
+ ReadVSTX, ReadVMask]>;
+
+class VLSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDS" # n),
+ ReadVLDX, ReadVLDSX, ReadVMask]>;
+
+class VSSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTS" # n),
+ !cast<SchedReadWrite>("ReadVSTS" # n # "V"),
+ ReadVSTX, ReadVSTSX, ReadVMask]>;
+
+class VLXSched<int n, string o> :
+ Sched <[!cast<SchedReadWrite>("WriteVLD" # o # "X" # n),
+ ReadVLDX, !cast<SchedReadWrite>("ReadVLD" # o # "XV"), ReadVMask]>;
+
+class VSXSched<int n, string o> :
+ Sched <[!cast<SchedReadWrite>("WriteVST" # o # "X" # n),
+ !cast<SchedReadWrite>("ReadVST" # o # "X" # n),
+ ReadVSTX, !cast<SchedReadWrite>("ReadVST" # o # "XV"), ReadVMask]>;
+
+class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n),
+ ReadVLDX, ReadVMask]>;
+
+//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
-// load vd, (rs1)
+// unit-stride load vd, (rs1), vm
+class VUnitStrideLoad<RISCVWidth width, string opcodestr>
+ : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+
+let vm = 1, RVVConstraint = NoConstraint in {
+// unit-stride whole register load vl<nf>r.v vd, (rs1)
+class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass VRC>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
+ width.Value{2-0}, (outs VRC:$vd), (ins GPR:$rs1),
+ opcodestr, "$vd, (${rs1})"> {
+ let Uses = [];
+}
+
+// unit-stride mask load vd, (rs1)
class VUnitStrideLoadMask<string opcodestr>
: RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
(outs VR:$vd),
- (ins GPR:$rs1), opcodestr, "$vd, (${rs1})"> {
- let vm = 1;
- let RVVConstraint = NoConstraint;
-}
+ (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">;
+} // vm = 1, RVVConstraint = NoConstraint
-// load vd, (rs1), vm
-class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width,
- string opcodestr>
- : RVInstVLU<0b000, width.Value{3}, lumop, width.Value{2-0},
+// unit-stride fault-only-first load vd, (rs1), vm
+class VUnitStrideLoadFF<RISCVWidth width, string opcodestr>
+ : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
-// load vd, (rs1), rs2, vm
+// strided load vd, (rs1), rs2, vm
class VStridedLoad<RISCVWidth width, string opcodestr>
: RVInstVLS<0b000, width.Value{3}, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $rs2$vm">;
-// load vd, (rs1), vs2, vm
+// indexed load vd, (rs1), vs2, vm
class VIndexedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
: RVInstVLX<0b000, width.Value{3}, mop, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $vs2$vm">;
-// vl<nf>r.v vd, (rs1)
-class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass VRC>
- : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
- width.Value{2-0}, (outs VRC:$vd), (ins GPR:$rs1),
- opcodestr, "$vd, (${rs1})"> {
- let vm = 1;
- let Uses = [];
- let RVVConstraint = NoConstraint;
-}
+// unit-stride segment load vd, (rs1), vm
+class VUnitStrideSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
-// segment load vd, (rs1), vm
-class VUnitStrideSegmentLoad<bits<3> nf, RISCVLSUMOP lumop,
- RISCVWidth width, string opcodestr>
- : RVInstVLU<nf, width.Value{3}, lumop, width.Value{2-0},
+// segment fault-only-first load vd, (rs1), vm
+class VUnitStrideSegmentLoadFF<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
-// segment load vd, (rs1), rs2, vm
+// strided segment load vd, (rs1), rs2, vm
class VStridedSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVLS<nf, width.Value{3}, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $rs2$vm">;
-// segment load vd, (rs1), vs2, vm
+// indexed segment load vd, (rs1), vs2, vm
class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
string opcodestr>
: RVInstVLX<nf, width.Value{3}, mop, width.Value{2-0},
@@ -146,42 +186,40 @@ class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
-// store vd, vs3, (rs1)
+// unit-stride store vd, vs3, (rs1), vm
+class VUnitStrideStore<RISCVWidth width, string opcodestr>
+ : RVInstVSU<0b000, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
+ (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
+ "$vs3, (${rs1})$vm">;
+
+let vm = 1 in {
+// vs<nf>r.v vd, (rs1)
+class VWholeStore<bits<3> nf, string opcodestr, RegisterClass VRC>
+ : RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
+ 0b000, (outs), (ins VRC:$vs3, GPR:$rs1),
+ opcodestr, "$vs3, (${rs1})"> {
+ let Uses = [];
+}
+
+// unit-stride mask store vd, vs3, (rs1)
class VUnitStrideStoreMask<string opcodestr>
: RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
(outs), (ins VR:$vs3, GPR:$rs1), opcodestr,
- "$vs3, (${rs1})"> {
- let vm = 1;
-}
-
-// store vd, vs3, (rs1), vm
-class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width,
- string opcodestr>
- : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0},
- (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
- "$vs3, (${rs1})$vm">;
+ "$vs3, (${rs1})">;
+} // vm = 1
-// store vd, vs3, (rs1), rs2, vm
+// strided store vd, vs3, (rs1), rs2, vm
class VStridedStore<RISCVWidth width, string opcodestr>
: RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs),
(ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm),
opcodestr, "$vs3, (${rs1}), $rs2$vm">;
-// store vd, vs3, (rs1), vs2, vm
+// indexed store vd, vs3, (rs1), vs2, vm
class VIndexedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
: RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs),
(ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vs3, (${rs1}), $vs2$vm">;
-// vs<nf>r.v vd, (rs1)
-class VWholeStore<bits<3> nf, string opcodestr, RegisterClass VRC>
- : RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
- 0b000, (outs), (ins VRC:$vs3, GPR:$rs1),
- opcodestr, "$vs3, (${rs1})"> {
- let vm = 1;
- let Uses = [];
-}
-
// segment store vd, vs3, (rs1), vm
class VUnitStrideSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVSU<nf, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
@@ -328,106 +366,417 @@ class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
}
multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
}
-multiclass VALUr_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
}
-multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>;
}
-multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">;
+multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>;
}
-multiclass VALUr_IV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VALU_MV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">;
+multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
-multiclass VALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
- def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">;
+multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
}
-multiclass VALU_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
}
-multiclass VALUr_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
}
-multiclass VALUr_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
+ def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>,
+ Sched<[WriteVICALUI, ReadVIALUCV]>;
}
-multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
- def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>;
+multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
}
-multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
- def IM : VALUmVI<funct6, opcodestr # ".vim">;
+multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
}
-multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
+multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
- def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>;
+multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
+multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>;
}
-multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
}
-multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VRDIV_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
}
-multiclass VALUr_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
- def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>;
}
-multiclass VALU_FV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">;
+multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>;
}
-multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
- def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>;
+multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>;
+}
+
+multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
+}
+
+multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
+}
+
+multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VCMP_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>;
+}
+
+multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
+}
+
+multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>;
+}
+
+multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
+}
+
+multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>;
+}
+
+multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>;
+}
+
+multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>;
+}
+
+multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>;
+}
+
+multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>;
+}
+
+multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>;
+}
+
+multiclass VRED_MV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
+ Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>;
+}
+
+multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">,
+ Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>;
+}
+
+multiclass VRED_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>;
+}
+
+multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>;
+}
+
+multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>;
+}
+
+multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>;
+}
+
+multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
+ Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
+}
+
+multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>;
+}
+
+multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
+}
+
+multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>;
+}
+
+multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+}
+
+multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>;
+}
+
+multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>;
+}
+
+multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>;
+}
+
+multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>;
+}
+
+multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+}
+
+multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>;
+}
+
+multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>;
+}
+
+multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>;
+}
+
+multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>;
+}
+
+multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVISlideI, ReadVISlideV, ReadVMask]>;
+}
+
+multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+}
+
+multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>;
+}
+
+multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>;
+}
+
+multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
+ Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>;
}
multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
@@ -435,11 +784,14 @@ multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
def _UNWD : VAMONoWd<amoop, width, opcodestr>;
}
-multiclass VWholeLoad<bits<3> nf, string opcodestr, RegisterClass VRC> {
- def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v", VRC>;
- def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v", VRC>;
- def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v", VRC>;
- def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>;
+multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
+ foreach l = [8, 16, 32, 64] in {
+ defvar w = !cast<RISCVWidth>("LSWidth" # l);
+ defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R" # l);
+
+ def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
+ Sched<[s, ReadVLDX]>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -457,71 +809,58 @@ def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei)
def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
+foreach eew = [8, 16, 32, 64] in {
+ defvar w = !cast<RISCVWidth>("LSWidth" # eew);
+
+ // Vector Unit-Stride Instructions
+ def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched<eew>;
+ def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched<eew>;
+
+ // Vector Unit-Stride Fault-only-First Loads
+ def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched<eew>;
+
+ // Vector Strided Instructions
+ def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
+ def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
+
+ // Vector Indexed Instructions
+ def VLUXEI#eew#_V :
+ VIndexedLoad<MOPLDIndexedUnord, w, "vluxei"#eew#".v">, VLXSched<eew, "U">;
+ def VLOXEI#eew#_V :
+ VIndexedLoad<MOPLDIndexedOrder, w, "vloxei"#eew#".v">, VLXSched<eew, "O">;
+ def VSUXEI#eew#_V :
+ VIndexedStore<MOPSTIndexedUnord, w, "vsuxei"#eew#".v">, VSXSched<eew, "U">;
+ def VSOXEI#eew#_V :
+ VIndexedStore<MOPSTIndexedOrder, w, "vsoxei"#eew#".v">, VSXSched<eew, "O">;
+}
+
+def VLM_V : VUnitStrideLoadMask<"vlm.v">,
+ Sched<[WriteVLDM, ReadVLDX]>;
+def VSM_V : VUnitStrideStoreMask<"vsm.v">,
+ Sched<[WriteVSTM, ReadVSTM, ReadVSTX]>;
+def : InstAlias<"vle1.v $vd, (${rs1})",
+ (VLM_V VR:$vd, GPR:$rs1), 0>;
+def : InstAlias<"vse1.v $vs3, (${rs1})",
+ (VSM_V VR:$vs3, GPR:$rs1), 0>;
+
+defm VL1R : VWholeLoadN<0, "vl1r", VR>;
+defm VL2R : VWholeLoadN<1, "vl2r", VRM2>;
+defm VL4R : VWholeLoadN<3, "vl4r", VRM4>;
+defm VL8R : VWholeLoadN<7, "vl8r", VRM8>;
-// Vector Unit-Stride Instructions
-def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
-def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
-def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
-def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-
-def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
-def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
-def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
-def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-
-def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
-def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
-
-def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
-def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
-def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
-def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
-
-// Vector Strided Instructions
-def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
-def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
-def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
-def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-
-def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
-def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
-def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
-def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
-
-// Vector Indexed Instructions
-def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
-def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">;
-def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">;
-def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">;
-
-def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">;
-def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">;
-def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">;
-def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">;
-
-def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">;
-def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">;
-def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">;
-def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">;
-
-def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">;
-def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
-def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
-def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
-
-defm VL1R : VWholeLoad<0, "vl1r", VR>;
-defm VL2R : VWholeLoad<1, "vl2r", VRM2>;
-defm VL4R : VWholeLoad<3, "vl4r", VRM4>;
-defm VL8R : VWholeLoad<7, "vl8r", VRM8>;
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>;
def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
-def VS1R_V : VWholeStore<0, "vs1r.v", VR>;
-def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>;
-def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>;
-def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>;
+def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
+ Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
+def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
+ Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>;
+def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>,
+ Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>;
+def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>,
+ Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>;
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
@@ -588,9 +927,9 @@ def : InstAlias<"vnot.v $vd, $vs$vm",
(VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>;
// Vector Single-Width Bit Shift Instructions
-defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>;
-defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>;
-defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
+defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>;
+defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>;
+defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>;
// Vector Narrowing Integer Right Shift Instructions
// Refer to 11.3. Narrowing Vector Arithmetic Instructions
@@ -598,8 +937,8 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
// vector register group (specified by vs2). The destination vector register
// group cannot overlap the mask register if used, unless LMUL=1.
let Constraints = "@earlyclobber $vd" in {
-defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
-defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
+defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
+defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
@@ -607,14 +946,14 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
// Vector Integer Comparison Instructions
let RVVConstraint = NoConstraint in {
-defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>;
-defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>;
-defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>;
-defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>;
-defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>;
-defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>;
-defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>;
-defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>;
+defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>;
+defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>;
+defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>;
+defm VMSLT_V : VCMP_IV_V_X<"vmslt", 0b011011>;
+defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>;
+defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>;
+defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>;
+defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>;
} // RVVConstraint = NoConstraint
def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm",
@@ -672,84 +1011,87 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch),
}
// Vector Integer Min/Max Instructions
-defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>;
-defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>;
-defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>;
-defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>;
+defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>;
+defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>;
+defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>;
+defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>;
// Vector Single-Width Integer Multiply Instructions
-defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>;
-defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>;
-defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>;
-defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>;
+defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>;
+defm VMULH_V : VMUL_MV_V_X<"vmulh", 0b100111>;
+defm VMULHU_V : VMUL_MV_V_X<"vmulhu", 0b100100>;
+defm VMULHSU_V : VMUL_MV_V_X<"vmulhsu", 0b100110>;
// Vector Integer Divide Instructions
-defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>;
-defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>;
-defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>;
-defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>;
+defm VDIVU_V : VDIV_MV_V_X<"vdivu", 0b100000>;
+defm VDIV_V : VDIV_MV_V_X<"vdiv", 0b100001>;
+defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>;
+defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>;
// Vector Widening Integer Multiply Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>;
-defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>;
-defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>;
+defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>;
+defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>;
+defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Integer Multiply-Add Instructions
-defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>;
-defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>;
-defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>;
-defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>;
+defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>;
+defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>;
+defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
+defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
// Vector Widening Integer Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>;
-defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>;
-defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>;
-defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>;
+defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
+defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
+defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
+defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Integer Merge Instructions
-defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>;
+defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
// Vector Integer Move Instructions
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
RVVConstraint = NoConstraint in {
// op vd, vs1
def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
- (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">;
+ (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">,
+ Sched<[WriteVIMovV, ReadVIMovV]>;
// op vd, rs1
def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
- (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">;
+ (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">,
+ Sched<[WriteVIMovX, ReadVIMovX]>;
// op vd, imm
def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
- (ins simm5:$imm), "vmv.v.i", "$vd, $imm">;
+ (ins simm5:$imm), "vmv.v.i", "$vd, $imm">,
+ Sched<[WriteVIMovI]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Vector Fixed-Point Arithmetic Instructions
-defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>;
-defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>;
-defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>;
-defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>;
+defm VSADDU_V : VSALU_IV_V_X_I<"vsaddu", 0b100000>;
+defm VSADD_V : VSALU_IV_V_X_I<"vsadd", 0b100001>;
+defm VSSUBU_V : VSALU_IV_V_X<"vssubu", 0b100010>;
+defm VSSUB_V : VSALU_IV_V_X<"vssub", 0b100011>;
// Vector Single-Width Averaging Add and Subtract
-defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>;
-defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>;
-defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>;
-defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>;
+defm VAADDU_V : VAALU_MV_V_X<"vaaddu", 0b001000>;
+defm VAADD_V : VAALU_MV_V_X<"vaadd", 0b001001>;
+defm VASUBU_V : VAALU_MV_V_X<"vasubu", 0b001010>;
+defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>;
// Vector Single-Width Fractional Multiply with Rounding and Saturation
-defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>;
+defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>;
// Vector Single-Width Scaling Shift Instructions
-defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>;
-defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>;
+defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>;
+defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>;
// Vector Narrowing Fixed-Point Clip Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
-defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
+defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
+defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasStdExtV]
@@ -762,60 +1104,60 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>;
// Vector Widening Floating-Point Add/Subtract Instructions
let Constraints = "@earlyclobber $vd" in {
let RVVConstraint = WidenV in {
-defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>;
-defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>;
+defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>;
+defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>;
} // RVVConstraint = WidenV
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
let RVVConstraint = WidenW in {
-defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">;
-defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">;
+defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">;
+defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">;
} // RVVConstraint = WidenW
} // Constraints = "@earlyclobber $vd"
// Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>;
-defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>;
-defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>;
+defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>;
+defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>;
+defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>;
// Vector Widening Floating-Point Multiply
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>;
+defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
-defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>;
-defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>;
-defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>;
-defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>;
-defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>;
-defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>;
-defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>;
-defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>;
+defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>;
+defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>;
+defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>;
+defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>;
+defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>;
+defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>;
+defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>;
+defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
// Vector Widening Floating-Point Fused Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>;
-defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>;
-defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>;
-defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
+defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
+defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
+defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
+defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Floating-Point Square-Root Instruction
-defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
-defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
+defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
+defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
// Vector Floating-Point MIN/MAX Instructions
-defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
-defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>;
+defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>;
+defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>;
// Vector Floating-Point Sign-Injection Instructions
-defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>;
-defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>;
-defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>;
+defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>;
+defm VFSGNJN_V : VSGNJ_FV_V_F<"vfsgnjn", 0b001001>;
+defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>;
def : InstAlias<"vfneg.v $vd, $vs$vm",
(VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>;
@@ -824,12 +1166,12 @@ def : InstAlias<"vfabs.v $vd, $vs$vm",
// Vector Floating-Point Compare Instructions
let RVVConstraint = NoConstraint in {
-defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>;
-defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>;
-defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>;
-defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>;
-defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>;
-defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>;
+defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>;
+defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>;
+defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>;
+defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>;
+defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>;
+defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>;
} // RVVConstraint = NoConstraint
def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
@@ -838,68 +1180,70 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm",
(VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
// Vector Floating-Point Classify Instruction
-defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
+defm VFCLASS_V : VCLS_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+
// Vector Floating-Point Merge Instruction
+let vm = 0 in
def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
- "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> {
- let vm = 0;
-}
+ "vfmerge.vfm", "$vd, $vs2, $rs1, v0">,
+ Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
// Vector Floating-Point Move Instruction
let RVVConstraint = NoConstraint in
+let vm = 1, vs2 = 0 in
def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
- (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> {
- let vs2 = 0;
- let vm = 1;
-}
+ (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">,
+ Sched<[WriteVFMovV, ReadVFMovF]>;
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Single-Width Floating-Point/Integer Type-Convert Instructions
-defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
-defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
-defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
-defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
-defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
-defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
+defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
+defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
+defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
+defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
+defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
+defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
// Widening Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in {
-defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
-defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
-defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
-defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
-defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
-defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
-defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
+defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
+defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
+defm VFWCVT_RTZ_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
+defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
+defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
+defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
+defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt
// Narrowing Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
-defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
-defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
-defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
-defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
-defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
-defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
-defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
+defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
+defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
+defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
+defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
+defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
+defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
+defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
+defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
+
// Vector Single-Width Integer Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>;
-defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>;
-defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>;
-defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>;
-defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>;
-defm VREDAND : VALU_MV_V<"vredand", 0b000001>;
-defm VREDOR : VALU_MV_V<"vredor", 0b000010>;
-defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>;
+defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>;
+defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>;
+defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>;
+defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>;
+defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>;
+defm VREDAND : VRED_MV_V<"vredand", 0b000001>;
+defm VREDOR : VRED_MV_V<"vredor", 0b000010>;
+defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>;
} // RVVConstraint = NoConstraint
// Vector Widening Integer Reduction Instructions
@@ -908,42 +1252,49 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
-defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>;
-defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>;
+defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>;
+defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
// Vector Single-Width Floating-Point Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>;
-defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>;
-defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>;
-defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>;
+defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>;
+defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>;
+defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>;
+defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>;
} // RVVConstraint = NoConstraint
+def : InstAlias<"vfredsum.vs $vd, $vs2, $vs1$vm",
+ (VFREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
+
// Vector Widening Floating-Point Reduction Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
-defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>;
-defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>;
+defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>;
+defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+
+def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm",
+ (VFWREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Mask-Register Logical Instructions
let RVVConstraint = NoConstraint in {
-defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">;
-defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">;
-defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">;
-defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">;
-defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">;
-defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">;
-defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">;
-defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">;
+defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">;
+defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">;
+defm VMANDN_M : VMALU_MV_Mask<"vmandn", 0b011000, "m">;
+defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">;
+defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">;
+defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">;
+defm VMORN_M : VMALU_MV_Mask<"vmorn", 0b011100, "m">;
+defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">;
}
def : InstAlias<"vmmv.m $vd, $vs",
@@ -955,207 +1306,175 @@ def : InstAlias<"vmset.m $vd",
def : InstAlias<"vmnot.m $vd, $vs",
(VMNAND_MM VR:$vd, VR:$vs, VR:$vs)>;
+def : InstAlias<"vmandnot.mm $vd, $vs2, $vs1",
+ (VMANDN_MM VR:$vd, VR:$vs2, VR:$vs1), 0>;
+def : InstAlias<"vmornot.mm $vd, $vs2, $vs1",
+ (VMORN_MM VR:$vd, VR:$vs2, VR:$vs1), 0>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-// Vector mask population count vpopc
-def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2, VMaskOp:$vm),
- "vpopc.m", "$vd, $vs2$vm">;
+
+// Vector mask population count vcpop
+def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
+ (ins VR:$vs2, VMaskOp:$vm),
+ "vcpop.m", "$vd, $vs2$vm">,
+ Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2, VMaskOp:$vm),
- "vfirst.m", "$vd, $vs2$vm">;
+ (ins VR:$vs2, VMaskOp:$vm),
+ "vfirst.m", "$vd, $vs2$vm">,
+ Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMask]>;
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+def : InstAlias<"vpopc.m $vd, $vs2$vm",
+ (VCPOP_M GPR:$vd, VR:$vs2, VMaskOp:$vm), 0>;
+
let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in {
+
// vmsbf.m set-before-first mask bit
-defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>;
+defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>;
// vmsif.m set-including-first mask bit
-defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>;
+defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>;
// vmsof.m set-only-first mask bit
-defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>;
+defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>;
// Vector Iota Instruction
-defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>;
+defm VIOTA_M : VMIOT_MV_V<"viota.m", 0b010100, 0b10000>;
+
} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota
// Vector Element Index Instruction
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+
+let vs2 = 0 in
def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
- (ins VMaskOp:$vm), "vid.v", "$vd$vm"> {
- let vs2 = 0;
-}
+ (ins VMaskOp:$vm), "vid.v", "$vd$vm">,
+ Sched<[WriteVMIdxV, ReadVMask]>;
// Integer Scalar Move Instructions
let vm = 1, RVVConstraint = NoConstraint in {
def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">;
+ (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">,
+ Sched<[WriteVIMovVX, ReadVIMovVX]>;
let Constraints = "$vd = $vd_wb" in
def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb),
- (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">;
-
+ (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">,
+ Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>;
}
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1,
RVVConstraint = NoConstraint in {
// Floating-Point Scalar Move Instructions
def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd),
- (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">;
+ (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">,
+ Sched<[WriteVFMovVF, ReadVFMovVF]>;
let Constraints = "$vd = $vd_wb" in
def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
- (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">;
+ (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">,
+ Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
+
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Slide Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>;
-defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>;
+defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>;
+defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>;
-defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>;
+defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>;
+defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>;
+defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>;
+defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Register Gather Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
-defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>;
-def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">;
+defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>;
+def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in {
-defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>;
+defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-foreach nf = [1, 2, 4, 8] in {
- def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd),
- (ins VR:$vs2), "vmv" # nf # "r.v",
- "$vd, $vs2"> {
- let Uses = [];
- let vm = 1;
- }
+foreach n = [1, 2, 4, 8] in {
+ def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd),
+ (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">,
+ VMVRSched<n> {
+ let Uses = [];
+ let vm = 1;
+}
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtZvlsseg] in {
foreach nf=2-8 in {
- def VLSEG#nf#E8_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth8, "vlseg"#nf#"e8.v">;
- def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">;
- def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">;
- def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">;
-
- def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">;
- def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">;
- def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">;
- def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">;
-
- def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">;
- def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">;
- def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">;
- def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
-
- // Vector Strided Instructions
- def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">;
- def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">;
- def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">;
- def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
-
- def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">;
- def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">;
- def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">;
- def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
-
- // Vector Indexed Instructions
- def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth8, "vluxseg"#nf#"ei8.v">;
- def VLUXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth16, "vluxseg"#nf#"ei16.v">;
- def VLUXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth32, "vluxseg"#nf#"ei32.v">;
- def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth64, "vluxseg"#nf#"ei64.v">;
-
- def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth8, "vloxseg"#nf#"ei8.v">;
- def VLOXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth16, "vloxseg"#nf#"ei16.v">;
- def VLOXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth32, "vloxseg"#nf#"ei32.v">;
- def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth64, "vloxseg"#nf#"ei64.v">;
-
- def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth8, "vsuxseg"#nf#"ei8.v">;
- def VSUXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth16, "vsuxseg"#nf#"ei16.v">;
- def VSUXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth32, "vsuxseg"#nf#"ei32.v">;
- def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth64, "vsuxseg"#nf#"ei64.v">;
-
- def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth8, "vsoxseg"#nf#"ei8.v">;
- def VSOXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth16, "vsoxseg"#nf#"ei16.v">;
- def VSOXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth32, "vsoxseg"#nf#"ei32.v">;
- def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth64, "vsoxseg"#nf#"ei64.v">;
+ foreach eew = [8, 16, 32, 64] in {
+ defvar w = !cast<RISCVWidth>("LSWidth"#eew);
+
+ def VLSEG#nf#E#eew#_V :
+ VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">;
+ def VLSEG#nf#E#eew#FF_V :
+ VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">;
+ def VSSEG#nf#E#eew#_V :
+ VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">;
+
+ // Vector Strided Instructions
+ def VLSSEG#nf#E#eew#_V :
+ VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">;
+ def VSSSEG#nf#E#eew#_V :
+ VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">;
+
+ // Vector Indexed Instructions
+ def VLUXSEG#nf#EI#eew#_V :
+ VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w,
+ "vluxseg"#nf#"ei"#eew#".v">;
+ def VLOXSEG#nf#EI#eew#_V :
+ VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w,
+ "vloxseg"#nf#"ei"#eew#".v">;
+ def VSUXSEG#nf#EI#eew#_V :
+ VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w,
+ "vsuxseg"#nf#"ei"#eew#".v">;
+ def VSOXSEG#nf#EI#eew#_V :
+ VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w,
+ "vsoxseg"#nf#"ei"#eew#".v">;
+ }
}
} // Predicates = [HasStdExtZvlsseg]
let Predicates = [HasStdExtZvamo, HasStdExtA] in {
- defm VAMOSWAPEI8 : VAMO<AMOOPVamoSwap, LSWidth8, "vamoswapei8.v">;
- defm VAMOSWAPEI16 : VAMO<AMOOPVamoSwap, LSWidth16, "vamoswapei16.v">;
- defm VAMOSWAPEI32 : VAMO<AMOOPVamoSwap, LSWidth32, "vamoswapei32.v">;
-
- defm VAMOADDEI8 : VAMO<AMOOPVamoAdd, LSWidth8, "vamoaddei8.v">;
- defm VAMOADDEI16 : VAMO<AMOOPVamoAdd, LSWidth16, "vamoaddei16.v">;
- defm VAMOADDEI32 : VAMO<AMOOPVamoAdd, LSWidth32, "vamoaddei32.v">;
-
- defm VAMOXOREI8 : VAMO<AMOOPVamoXor, LSWidth8, "vamoxorei8.v">;
- defm VAMOXOREI16 : VAMO<AMOOPVamoXor, LSWidth16, "vamoxorei16.v">;
- defm VAMOXOREI32 : VAMO<AMOOPVamoXor, LSWidth32, "vamoxorei32.v">;
-
- defm VAMOANDEI8 : VAMO<AMOOPVamoAnd, LSWidth8, "vamoandei8.v">;
- defm VAMOANDEI16 : VAMO<AMOOPVamoAnd, LSWidth16, "vamoandei16.v">;
- defm VAMOANDEI32 : VAMO<AMOOPVamoAnd, LSWidth32, "vamoandei32.v">;
-
- defm VAMOOREI8 : VAMO<AMOOPVamoOr, LSWidth8, "vamoorei8.v">;
- defm VAMOOREI16 : VAMO<AMOOPVamoOr, LSWidth16, "vamoorei16.v">;
- defm VAMOOREI32 : VAMO<AMOOPVamoOr, LSWidth32, "vamoorei32.v">;
-
- defm VAMOMINEI8 : VAMO<AMOOPVamoMin, LSWidth8, "vamominei8.v">;
- defm VAMOMINEI16 : VAMO<AMOOPVamoMin, LSWidth16, "vamominei16.v">;
- defm VAMOMINEI32 : VAMO<AMOOPVamoMin, LSWidth32, "vamominei32.v">;
-
- defm VAMOMAXEI8 : VAMO<AMOOPVamoMax, LSWidth8, "vamomaxei8.v">;
- defm VAMOMAXEI16 : VAMO<AMOOPVamoMax, LSWidth16, "vamomaxei16.v">;
- defm VAMOMAXEI32 : VAMO<AMOOPVamoMax, LSWidth32, "vamomaxei32.v">;
-
- defm VAMOMINUEI8 : VAMO<AMOOPVamoMinu, LSWidth8, "vamominuei8.v">;
- defm VAMOMINUEI16 : VAMO<AMOOPVamoMinu, LSWidth16, "vamominuei16.v">;
- defm VAMOMINUEI32 : VAMO<AMOOPVamoMinu, LSWidth32, "vamominuei32.v">;
-
- defm VAMOMAXUEI8 : VAMO<AMOOPVamoMaxu, LSWidth8, "vamomaxuei8.v">;
- defm VAMOMAXUEI16 : VAMO<AMOOPVamoMaxu, LSWidth16, "vamomaxuei16.v">;
- defm VAMOMAXUEI32 : VAMO<AMOOPVamoMaxu, LSWidth32, "vamomaxuei32.v">;
+ foreach eew = [8, 16, 32] in {
+ defvar w = !cast<RISCVWidth>("LSWidth"#eew);
+ defm VAMOSWAPEI#eew : VAMO<AMOOPVamoSwap, w, "vamoswapei"#eew#".v">;
+ defm VAMOADDEI#eew : VAMO<AMOOPVamoAdd, w, "vamoaddei"#eew#".v">;
+ defm VAMOXOREI#eew : VAMO<AMOOPVamoXor, w, "vamoxorei"#eew#".v">;
+ defm VAMOANDEI#eew : VAMO<AMOOPVamoAnd, w, "vamoandei"#eew#".v">;
+ defm VAMOOREI#eew : VAMO<AMOOPVamoOr, w, "vamoorei"#eew#".v">;
+ defm VAMOMINEI#eew : VAMO<AMOOPVamoMin, w, "vamominei"#eew#".v">;
+ defm VAMOMAXEI#eew : VAMO<AMOOPVamoMax, w, "vamomaxei"#eew#".v">;
+ defm VAMOMINUEI#eew : VAMO<AMOOPVamoMinu, w, "vamominuei"#eew#".v">;
+ defm VAMOMAXUEI#eew : VAMO<AMOOPVamoMaxu, w, "vamomaxuei"#eew#".v">;
+ }
} // Predicates = [HasStdExtZvamo, HasStdExtA]
let Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 0284ff6d1c6b..a82e333e6bab 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -23,7 +23,7 @@ def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
// Operand that is allowed to be a register or a 5 bit immediate.
// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same
// pseudo instructions.
-def AVL : RegisterOperand<GPR> {
+def AVL : RegisterOperand<GPRNoX0> {
let OperandNamespace = "RISCVOp";
let OperandType = "OPERAND_AVL";
}
@@ -40,6 +40,9 @@ def DecImm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+defvar TAIL_UNDISTURBED = 0;
+defvar TAIL_AGNOSTIC = 1;
+
//===----------------------------------------------------------------------===//
// Utilities.
//===----------------------------------------------------------------------===//
@@ -137,7 +140,9 @@ class octuple_to_str<int octuple> {
def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
// Output pattern for X0 used to represent VLMAX in the pseudo instructions.
-def VLMax : OutPatFrag<(ops), (XLenVT X0)>;
+// We can't use X0 register becuase the AVL operands use GPRNoX0.
+// This must be kept in sync with RISCV::VLMaxSentinel.
+def VLMax : OutPatFrag<(ops), (XLenVT -1)>;
// List of EEW.
defvar EEWList = [8, 16, 32, 64];
@@ -577,13 +582,11 @@ class PseudoToVInst<string PseudoInst> {
!subst("_B32", "",
!subst("_B64", "",
!subst("_MASK", "",
- !subst("_COMMUTABLE", "",
- !subst("_TA", "",
!subst("_TIED", "",
!subst("F16", "F",
!subst("F32", "F",
!subst("F64", "F",
- !subst("Pseudo", "", PseudoInst))))))))))))))))))))));
+ !subst("Pseudo", "", PseudoInst))))))))))))))))))));
}
// The destination vector register group for a masked vector instruction cannot
@@ -643,7 +646,7 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -653,6 +656,7 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -674,7 +678,7 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, GPR:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -684,6 +688,7 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -708,7 +713,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, IdxClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLX</*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
@@ -718,6 +723,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -861,6 +867,22 @@ class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
// mask unary operation without maskedoff
class VPseudoMaskUnarySOutMask:
Pseudo<(outs GPR:$rd),
@@ -976,6 +998,26 @@ class VPseudoBinaryMask<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoBinaryMaskTA<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
// Like VPseudoBinaryMask, but output can be V0.
class VPseudoBinaryMOutMask<VReg RetClass,
RegisterClass Op1Class,
@@ -1005,7 +1047,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@@ -1014,6 +1056,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 0; // Merge is also rs2.
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1060,6 +1103,27 @@ class VPseudoTernaryNoMask<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
+ []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
+ let HasVecPolicyOp = 1;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoAMOWDNoMask<VReg RetClass,
VReg Op1Class> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
@@ -1139,7 +1203,7 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -1149,6 +1213,7 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1170,7 +1235,8 @@ class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
- GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -1180,6 +1246,7 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1205,7 +1272,8 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
bits<4> NF, bit Ordered>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
- IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
@@ -1217,6 +1285,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1492,8 +1561,8 @@ multiclass VPseudoBinary<VReg RetClass,
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA<RetClass, Op1Class, Op2Class,
+ Constraint>;
}
}
@@ -1520,8 +1589,8 @@ multiclass VPseudoBinaryEmul<VReg RetClass,
let VLMul = lmul.value in {
def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
Constraint>;
- def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA<RetClass, Op1Class, Op2Class,
+ Constraint>;
}
}
@@ -1713,6 +1782,15 @@ multiclass VPseudoUnaryV_F_NoDummyMask {
}
}
+multiclass VPseudoUnaryTAV_V {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>;
+ }
+ }
+}
+
multiclass VPseudoUnaryV_V {
foreach m = MxList.m in {
let VLMul = m.value in {
@@ -1728,8 +1806,8 @@ multiclass PseudoUnaryV_VF2 {
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f2vrclass,
- constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass,
+ constraints>;
}
}
}
@@ -1740,8 +1818,8 @@ multiclass PseudoUnaryV_VF4 {
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f4vrclass,
- constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass,
+ constraints>;
}
}
}
@@ -1752,8 +1830,8 @@ multiclass PseudoUnaryV_VF8 {
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f8vrclass,
- constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass,
+ constraints>;
}
}
}
@@ -1887,16 +1965,23 @@ multiclass VPseudoTernary<VReg RetClass,
}
}
-multiclass VPseudoTernaryV_VV<string Constraint = ""> {
+multiclass VPseudoTernaryWithPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = "",
+ bit Commutable = 0> {
+ let VLMul = MInfo.value in {
+ let isCommutable = Commutable in
+ def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, Constraint>;
+ }
+}
+
+multiclass VPseudoTernaryV_VV_AAXA<string Constraint = ""> {
foreach m = MxList.m in {
- defm _VV : VPseudoTernary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
-
- // Add a commutable version for use by IR mul+add.
- let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in
- def "_VV_" # m.MX # "_COMMUTABLE" : VPseudoTernaryNoMask<m.vrclass,
- m.vrclass,
- m.vrclass,
- Constraint>;
+ defm _VV : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint, /*Commutable*/1>;
}
}
@@ -1906,68 +1991,39 @@ multiclass VPseudoTernaryV_VX<string Constraint = ""> {
}
multiclass VPseudoTernaryV_VX_AAXA<string Constraint = ""> {
- foreach m = MxList.m in {
- defm "_VX" : VPseudoTernary<m.vrclass, GPR, m.vrclass, m, Constraint>;
-
- // Add a commutable version for use by IR mul+add.
- let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in
- def "_VX_" # m.MX # "_COMMUTABLE" :
- VPseudoTernaryNoMask<m.vrclass, GPR, m.vrclass, Constraint>;
- }
+ foreach m = MxList.m in
+ defm "_VX" : VPseudoTernaryWithPolicy<m.vrclass, GPR, m.vrclass, m,
+ Constraint, /*Commutable*/1>;
}
multiclass VPseudoTernaryV_VF_AAXA<string Constraint = ""> {
- foreach m = MxList.m in {
- foreach f = FPList.fpinfo in {
- defm "_V" # f.FX : VPseudoTernary<m.vrclass, f.fprclass, m.vrclass,
- m, Constraint>;
-
- // Add a commutable version for use by IR mul+add.
- let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in
- def "_V" # f.FX # "_" # m.MX # "_COMMUTABLE" :
- VPseudoTernaryNoMask<m.vrclass, f.fprclass, m.vrclass, Constraint>;
- }
- }
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.vrclass, f.fprclass,
+ m.vrclass, m, Constraint,
+ /*Commutable*/1>;
}
multiclass VPseudoTernaryW_VV {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in {
- defm _VV : VPseudoTernary<m.wvrclass, m.vrclass, m.vrclass, m, constraint>;
-
- // Add a tail agnostic version for us by IR mul+add.
- let ForceTailAgnostic = true, VLMul = m.value in
- def "_VV_" # m.MX # "_TA" : VPseudoTernaryNoMask<m.wvrclass,
- m.vrclass,
- m.vrclass,
- constraint>;
- }
+ foreach m = MxListW.m in
+ defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
+ constraint>;
}
multiclass VPseudoTernaryW_VX {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in {
- defm "_VX" : VPseudoTernary<m.wvrclass, GPR, m.vrclass, m, constraint>;
-
- // Add a tail agnostic version for use by IR mul+add.
- let ForceTailAgnostic = true, VLMul = m.value in
- def "_VX_" # m.MX # "_TA" :
- VPseudoTernaryNoMask<m.wvrclass, GPR, m.vrclass, constraint>;
- }
+ foreach m = MxListW.m in
+ defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m,
+ constraint>;
}
multiclass VPseudoTernaryW_VF {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW.m in
- foreach f = FPListW.fpinfo in {
- defm "_V" # f.FX : VPseudoTernary<m.wvrclass, f.fprclass, m.vrclass, m,
- constraint>;
-
- // Add a tail agnostic version for use by IR mul+add.
- let ForceTailAgnostic = true, VLMul = m.value in
- def "_V" # f.FX # "_" # m.MX # "_TA" :
- VPseudoTernaryNoMask<m.vrclass, f.fprclass, m.vrclass, constraint>;
- }
+ foreach f = FPListW.fpinfo in
+ defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.wvrclass, f.fprclass,
+ m.vrclass, m, constraint>;
}
multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
@@ -1976,12 +2032,12 @@ multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
}
multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV<Constraint>;
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint>;
defm "" : VPseudoTernaryV_VX_AAXA<Constraint>;
}
multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV<Constraint>;
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint>;
defm "" : VPseudoTernaryV_VF_AAXA<Constraint>;
}
@@ -2033,8 +2089,8 @@ multiclass VPseudoConversion<VReg RetClass,
string Constraint = ""> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
- Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA<RetClass, Op1Class,
+ Constraint>;
}
}
@@ -2217,6 +2273,26 @@ class VPatUnaryMask<string intrinsic_name,
(op2_type op2_reg_class:$rs2),
(mask_type V0), GPR:$vl, sew)>;
+class VPatUnaryMaskTA<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ (mask_type V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+
class VPatMaskUnaryNoMask<string intrinsic_name,
string inst,
MTypeInfo mti> :
@@ -2318,6 +2394,28 @@ class VPatBinaryMask<string intrinsic_name,
(op2_type op2_kind:$rs2),
(mask_type V0), GPR:$vl, sew)>;
+class VPatBinaryMaskTA<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+
// Same as above but source operands are swapped.
class VPatBinaryMaskSwapped<string intrinsic_name,
string inst,
@@ -2370,11 +2468,11 @@ class VPatTiedBinaryMask<string intrinsic_name,
(result_type result_reg_class:$merge),
(op2_type op2_kind:$rs2),
(mask_type V0),
- VLOpFrag)),
+ VLOpFrag, (XLenVT timm:$policy))),
(!cast<Instruction>(inst#"_MASK_TIED")
(result_type result_reg_class:$merge),
(op2_type op2_kind:$rs2),
- (mask_type V0), GPR:$vl, sew)>;
+ (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
class VPatTernaryNoMask<string intrinsic,
string inst,
@@ -2382,7 +2480,6 @@ class VPatTernaryNoMask<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
VReg result_reg_class,
@@ -2399,6 +2496,28 @@ class VPatTernaryNoMask<string intrinsic,
op2_kind:$rs2,
GPR:$vl, sew)>;
+class VPatTernaryNoMaskWithPolicy<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ GPR:$vl, sew, TAIL_UNDISTURBED)>;
+
class VPatTernaryMask<string intrinsic,
string inst,
string kind,
@@ -2514,9 +2633,9 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
def : VPatUnaryNoMask<intrinsic, instruction, suffix,
vti.Vector, fti.Vector,
vti.Log2SEW, vti.LMul, fti.RegClass>;
- def : VPatUnaryMask<intrinsic, instruction, suffix,
- vti.Vector, fti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
+ def : VPatUnaryMaskTA<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
}
}
@@ -2526,9 +2645,9 @@ multiclass VPatUnaryV_V<string intrinsic, string instruction,
def : VPatUnaryNoMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector,
vti.Log2SEW, vti.LMul, vti.RegClass>;
- def : VPatUnaryMask<intrinsic, instruction, "V",
- vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
+ def : VPatUnaryMaskTA<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
}
}
@@ -2574,6 +2693,24 @@ multiclass VPatBinary<string intrinsic,
op2_kind>;
}
+multiclass VPatBinaryTA<string intrinsic,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind>
+{
+ def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
+ mask_type, sew, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
multiclass VPatBinarySwapped<string intrinsic,
string inst,
ValueType result_type,
@@ -2653,23 +2790,40 @@ multiclass VPatConversion<string intrinsic,
mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
}
+multiclass VPatConversionTA<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class>
+{
+ def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
+ sew, vlmul, op1_reg_class>;
+ def : VPatUnaryMaskTA<intrinsic, inst, kind, result_type, op1_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+}
+
multiclass VPatBinaryV_VV<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Vector, vti.Vector, vti.Vector,vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Vector,vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
multiclass VPatBinaryV_VV_INT<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar ivti = GetIntVTypeInfo<vti>.Vti;
- defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
}
@@ -2684,10 +2838,10 @@ multiclass VPatBinaryV_VV_INT_EEW<string intrinsic, string instruction,
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI" # eew # emul_str);
defvar inst = instruction # "_VV_" # vti.LMul.MX # "_" # emul_str;
- defm : VPatBinary<intrinsic, inst,
- vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, ivti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, inst,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, ivti.RegClass>;
}
}
}
@@ -2696,29 +2850,29 @@ multiclass VPatBinaryV_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar kind = "V"#vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
- vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
}
}
multiclass VPatBinaryV_VX_INT<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VX_" # vti.LMul.MX,
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, GPR>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VX_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, GPR>;
}
multiclass VPatBinaryV_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand imm_type> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, imm_type>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, imm_type>;
}
multiclass VPatBinaryM_MM<string intrinsic, string instruction> {
@@ -2733,10 +2887,10 @@ multiclass VPatBinaryW_VV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinary<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Vti.RegClass, Vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.RegClass>;
}
}
@@ -2746,10 +2900,10 @@ multiclass VPatBinaryW_VX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "V"#Vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Vti.RegClass, Vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -2765,10 +2919,10 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction,
def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- def : VPatBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ def : VPatBinaryMaskTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
@@ -2778,10 +2932,10 @@ multiclass VPatBinaryW_WX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -2790,10 +2944,10 @@ multiclass VPatBinaryV_WV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
@@ -2803,10 +2957,10 @@ multiclass VPatBinaryV_WX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, Vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -2815,10 +2969,10 @@ multiclass VPatBinaryV_WI<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinary<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
- Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, uimm5>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, uimm5>;
}
}
@@ -2989,20 +3143,40 @@ multiclass VPatTernary<string intrinsic,
RegisterClass op1_reg_class,
DAGOperand op2_kind> {
def : VPatTernaryNoMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class,
- op2_kind>;
+ sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
mask_type, sew, vlmul, result_reg_class, op1_reg_class,
op2_kind>;
}
-multiclass VPatTernaryV_VV<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+multiclass VPatTernaryWithPolicy<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMaskWithPolicy<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, sew, vlmul, result_reg_class,
+ op1_reg_class, op2_kind>;
+ def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
+multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatTernary<intrinsic, instruction, "VV",
- vti.Vector, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
multiclass VPatTernaryV_VX<string intrinsic, string instruction,
@@ -3017,11 +3191,11 @@ multiclass VPatTernaryV_VX<string intrinsic, string instruction,
multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatTernary<intrinsic, instruction,
- "V"#vti.ScalarSuffix,
- vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.ScalarRegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.ScalarRegClass, vti.RegClass>;
}
multiclass VPatTernaryV_VI<string intrinsic, string instruction,
@@ -3038,10 +3212,10 @@ multiclass VPatTernaryW_VV<string intrinsic, string instruction,
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- defm : VPatTernary<intrinsic, instruction, "VV",
- wti.Vector, vti.Vector, vti.Vector,
- vti.Mask, vti.Log2SEW, vti.LMul,
- wti.RegClass, vti.RegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+ wti.Vector, vti.Vector, vti.Vector,
+ vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.RegClass, vti.RegClass>;
}
}
@@ -3050,17 +3224,17 @@ multiclass VPatTernaryW_VX<string intrinsic, string instruction,
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- defm : VPatTernary<intrinsic, instruction,
- "V"#vti.ScalarSuffix,
- wti.Vector, vti.Scalar, vti.Vector,
- vti.Mask, vti.Log2SEW, vti.LMul,
- wti.RegClass, vti.ScalarRegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ wti.Vector, vti.Scalar, vti.Vector,
+ vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.ScalarRegClass, vti.RegClass>;
}
}
multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
- : VPatTernaryV_VV<intrinsic, instruction, vtilist>,
+ : VPatTernaryV_VV_AAXA<intrinsic, instruction, vtilist>,
VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>;
multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
@@ -3131,8 +3305,8 @@ multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat =
}
}
-multiclass VPatConversionVI_VF<string intrinsic,
- string instruction>
+multiclass VPatClassifyVI_VF<string intrinsic,
+ string instruction>
{
foreach fvti = AllFloatVectors in
{
@@ -3144,6 +3318,19 @@ multiclass VPatConversionVI_VF<string intrinsic,
}
}
+multiclass VPatConversionVI_VF<string intrinsic,
+ string instruction>
+{
+ foreach fvti = AllFloatVectors in
+ {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
multiclass VPatConversionVF_VI<string intrinsic,
string instruction>
{
@@ -3151,9 +3338,9 @@ multiclass VPatConversionVF_VI<string intrinsic,
{
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
- defm : VPatConversion<intrinsic, instruction, "V",
- fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
- ivti.LMul, fvti.RegClass, ivti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
+ ivti.LMul, fvti.RegClass, ivti.RegClass>;
}
}
@@ -3163,9 +3350,9 @@ multiclass VPatConversionWI_VF<string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
- defm : VPatConversion<intrinsic, instruction, "V",
- iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
- fvti.LMul, iwti.RegClass, fvti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
+ fvti.LMul, iwti.RegClass, fvti.RegClass>;
}
}
@@ -3175,9 +3362,9 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "V",
- fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
- vti.LMul, fwti.RegClass, vti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass>;
}
}
@@ -3187,9 +3374,9 @@ multiclass VPatConversionWF_VF <string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "V",
- fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass>;
}
}
@@ -3199,9 +3386,9 @@ multiclass VPatConversionVI_WF <string intrinsic, string instruction> {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "W",
- vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, fwti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
}
}
@@ -3211,9 +3398,9 @@ multiclass VPatConversionVF_WI <string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
- defm : VPatConversion<intrinsic, instruction, "W",
- fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, iwti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "W",
+ fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, iwti.RegClass>;
}
}
@@ -3223,9 +3410,9 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "W",
- fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
}
}
@@ -3271,7 +3458,7 @@ multiclass VPatAMOV_WD<string intrinsic,
// Pseudo instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// Pseudo Instructions for CodeGen
@@ -3326,7 +3513,12 @@ foreach lmul = MxList.m in {
// Pseudos.
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
-def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>;
+// Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for
+// the when we aren't using one of the special X0 encodings. Otherwise it could
+// be accidentally be made X0 by MachineIR optimizations. To satisfy the
+// verifier, we also need a GPRX0 instruction for the special encodings.
+def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPRNoX0:$rs1, VTypeIOp:$vtypei), []>;
+def PseudoVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins GPRX0:$rs1, VTypeIOp:$vtypei), []>;
def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
}
@@ -3342,8 +3534,8 @@ def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei),
defm PseudoVL : VPseudoUSLoad</*isFF=*/false>;
defm PseudoVS : VPseudoUSStore;
-defm PseudoVLE1 : VPseudoLoadMask;
-defm PseudoVSE1 : VPseudoStoreMask;
+defm PseudoVLM : VPseudoLoadMask;
+defm PseudoVSM : VPseudoStoreMask;
//===----------------------------------------------------------------------===//
// 7.5 Vector Strided Instructions
@@ -3427,14 +3619,16 @@ foreach vti = AllIntegerVectors in {
(vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
(vti.Mask V0),
- VLOpFrag)),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
(!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK")
vti.RegClass:$merge,
vti.RegClass:$rs1,
vti.RegClass:$rs2,
(vti.Mask V0),
GPR:$vl,
- vti.Log2SEW)>;
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
// Match VSUB with a small immediate to vadd.vi by negating the immediate.
def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1),
@@ -3448,14 +3642,16 @@ foreach vti = AllIntegerVectors in {
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(vti.Mask V0),
- VLOpFrag)),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
(!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX#"_MASK")
vti.RegClass:$merge,
vti.RegClass:$rs1,
(NegImm simm5_plus1:$rs2),
(vti.Mask V0),
GPR:$vl,
- vti.Log2SEW)>;
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
}
//===----------------------------------------------------------------------===//
@@ -3623,9 +3819,9 @@ let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -3676,17 +3872,17 @@ defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VF;
//===----------------------------------------------------------------------===//
// 14.8. Vector Floating-Point Square-Root Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFSQRT : VPseudoUnaryV_V;
+defm PseudoVFSQRT : VPseudoUnaryTAV_V;
//===----------------------------------------------------------------------===//
// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFRSQRT7 : VPseudoUnaryV_V;
+defm PseudoVFRSQRT7 : VPseudoUnaryTAV_V;
//===----------------------------------------------------------------------===//
// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFREC7 : VPseudoUnaryV_V;
+defm PseudoVFREC7 : VPseudoUnaryTAV_V;
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
@@ -3758,9 +3954,9 @@ defm PseudoVFNCVT_F_XU : VPseudoConversionV_W;
defm PseudoVFNCVT_F_X : VPseudoConversionV_W;
defm PseudoVFNCVT_F_F : VPseudoConversionV_W;
defm PseudoVFNCVT_ROD_F_F : VPseudoConversionV_W;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 15.1. Vector Single-Width Integer Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -3776,26 +3972,30 @@ defm PseudoVREDMAX : VPseudoReductionV_VS;
//===----------------------------------------------------------------------===//
// 15.2. Vector Widening Integer Reduction Instructions
//===----------------------------------------------------------------------===//
+let IsRVVWideningReduction = 1 in {
defm PseudoVWREDSUMU : VPseudoReductionV_VS;
defm PseudoVWREDSUM : VPseudoReductionV_VS;
-} // Predicates = [HasStdExtV]
+}
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
defm PseudoVFREDOSUM : VPseudoReductionV_VS;
-defm PseudoVFREDSUM : VPseudoReductionV_VS;
+defm PseudoVFREDUSUM : VPseudoReductionV_VS;
defm PseudoVFREDMIN : VPseudoReductionV_VS;
defm PseudoVFREDMAX : VPseudoReductionV_VS;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFWREDSUM : VPseudoReductionV_VS;
+let IsRVVWideningReduction = 1 in {
+defm PseudoVFWREDUSUM : VPseudoReductionV_VS;
defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
+}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 16. Vector Mask Instructions
@@ -3807,11 +4007,11 @@ defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
defm PseudoVMAND: VPseudoBinaryM_MM;
defm PseudoVMNAND: VPseudoBinaryM_MM;
-defm PseudoVMANDNOT: VPseudoBinaryM_MM;
+defm PseudoVMANDN: VPseudoBinaryM_MM;
defm PseudoVMXOR: VPseudoBinaryM_MM;
defm PseudoVMOR: VPseudoBinaryM_MM;
defm PseudoVMNOR: VPseudoBinaryM_MM;
-defm PseudoVMORNOT: VPseudoBinaryM_MM;
+defm PseudoVMORN: VPseudoBinaryM_MM;
defm PseudoVMXNOR: VPseudoBinaryM_MM;
// Pseudo instructions
@@ -3819,10 +4019,10 @@ defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">;
defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">;
//===----------------------------------------------------------------------===//
-// 16.2. Vector mask population count vpopc
+// 16.2. Vector mask population count vcpop
//===----------------------------------------------------------------------===//
-defm PseudoVPOPC: VPseudoUnaryS_M;
+defm PseudoVCPOP: VPseudoUnaryS_M;
//===----------------------------------------------------------------------===//
// 16.3. vfirst find-first-set mask bit
@@ -3863,7 +4063,7 @@ defm PseudoVID : VPseudoMaskNullaryV;
// 17.1. Integer Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
let VLMul = m.value in {
@@ -3880,13 +4080,13 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
}
}
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 17.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
foreach f = FPList.fpinfo in {
@@ -3908,22 +4108,22 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
}
}
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.3. Vector Slide Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI<uimm5, "@earlyclobber $rd">;
defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI<uimm5>;
defm PseudoVSLIDE1UP : VPseudoBinaryV_VX<"@earlyclobber $rd">;
defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm PseudoVFSLIDE1UP : VPseudoBinaryV_VF<"@earlyclobber $rd">;
defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VF;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.4. Vector Register Gather Instructions
@@ -3955,15 +4155,15 @@ let Predicates = [HasStdExtZvamo] in {
defm : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>;
} // Predicates = [HasStdExtZvamo]
-let Predicates = [HasStdExtZvamo, HasStdExtF] in {
+let Predicates = [HasStdExtZvamo, HasVInstructionsAnyF] in {
defm : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>;
-} // Predicates = [HasStdExtZvamo, HasStdExtF]
+} // Predicates = [HasStdExtZvamo, HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 12.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
@@ -4279,9 +4479,9 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vssra", "PseudoVSSRA", AllIntegerVectors,
defm : VPatBinaryV_WV_WX_WI<"int_riscv_vnclipu", "PseudoVNCLIPU", AllWidenableIntVectors>;
defm : VPatBinaryV_WV_WX_WI<"int_riscv_vnclip", "PseudoVNCLIP", AllWidenableIntVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -4372,12 +4572,16 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 14.14. Vector Floating-Point Classify Instruction
//===----------------------------------------------------------------------===//
-defm : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
+defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
//===----------------------------------------------------------------------===//
// 14.15. Vector Floating-Point Merge Instruction
//===----------------------------------------------------------------------===//
// We can use vmerge.vvm to support vector-vector vfmerge.
+// NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses
+// int_riscv_vmerge. Support both for compatibility.
+defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE",
/*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE",
@@ -4423,9 +4627,9 @@ defm : VPatConversionVF_WI <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
defm : VPatConversionVF_WI <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 15.1. Vector Single-Width Integer Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -4443,40 +4647,40 @@ defm : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">;
//===----------------------------------------------------------------------===//
defm : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">;
defm : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
defm : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=*/1>;
-defm : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>;
+defm : VPatReductionV_VS<"int_riscv_vfredusum", "PseudoVFREDUSUM", /*IsFloat=*/1>;
defm : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>;
defm : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>;
+defm : VPatReductionW_VS<"int_riscv_vfwredusum", "PseudoVFWREDUSUM", /*IsFloat=*/1>;
defm : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 16. Vector Mask Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 16.1 Vector Mask-Register Logical Instructions
//===----------------------------------------------------------------------===//
defm : VPatBinaryM_MM<"int_riscv_vmand", "PseudoVMAND">;
defm : VPatBinaryM_MM<"int_riscv_vmnand", "PseudoVMNAND">;
-defm : VPatBinaryM_MM<"int_riscv_vmandnot", "PseudoVMANDNOT">;
+defm : VPatBinaryM_MM<"int_riscv_vmandn", "PseudoVMANDN">;
defm : VPatBinaryM_MM<"int_riscv_vmxor", "PseudoVMXOR">;
defm : VPatBinaryM_MM<"int_riscv_vmor", "PseudoVMOR">;
defm : VPatBinaryM_MM<"int_riscv_vmnor", "PseudoVMNOR">;
-defm : VPatBinaryM_MM<"int_riscv_vmornot", "PseudoVMORNOT">;
+defm : VPatBinaryM_MM<"int_riscv_vmorn", "PseudoVMORN">;
defm : VPatBinaryM_MM<"int_riscv_vmxnor", "PseudoVMXNOR">;
// pseudo instructions
@@ -4484,9 +4688,9 @@ defm : VPatNullaryM<"int_riscv_vmclr", "PseudoVMCLR">;
defm : VPatNullaryM<"int_riscv_vmset", "PseudoVMSET">;
//===----------------------------------------------------------------------===//
-// 16.2. Vector mask population count vpopc
+// 16.2. Vector count population in mask vcpop.m
//===----------------------------------------------------------------------===//
-defm : VPatUnaryS_M<"int_riscv_vpopc", "PseudoVPOPC">;
+defm : VPatUnaryS_M<"int_riscv_vcpop", "PseudoVCPOP">;
//===----------------------------------------------------------------------===//
// 16.3. vfirst find-first-set mask bit
@@ -4518,7 +4722,7 @@ defm : VPatUnaryV_M<"int_riscv_viota", "PseudoVIOTA">;
//===----------------------------------------------------------------------===//
defm : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 17. Vector Permutation Instructions
@@ -4528,19 +4732,19 @@ defm : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
// 17.1. Integer Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.Log2SEW)>;
// vmv.s.x is handled with a custom node in RISCVInstrInfoVVLPatterns.td
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 17.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
defvar instr = !cast<Instruction>("PseudoVFMV_"#fvti.ScalarSuffix#"_S_" #
fvti.LMul.MX);
@@ -4555,52 +4759,52 @@ foreach fvti = AllFloatVectors in {
(fvti.Scalar fvti.ScalarRegClass:$rs2),
GPR:$vl, fvti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.3. Vector Slide Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>;
defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>;
defm : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>;
defm : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>;
defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>;
defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>;
defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.4. Vector Register Gather Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
AllIntegerVectors, uimm5>;
defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
/* eew */ 16, AllIntegerVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
AllFloatVectors, uimm5>;
defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
/* eew */ 16, AllFloatVectors>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
// Include the non-intrinsic ISel patterns
include "RISCVInstrInfoVSDPatterns.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 483fc8bfecda..711ad4335ece 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -89,8 +89,8 @@ multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m>
{
- defvar load_instr = !cast<Instruction>("PseudoVLE1_V_"#m.BX);
- defvar store_instr = !cast<Instruction>("PseudoVSE1_V_"#m.BX);
+ defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX);
+ defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX);
// Load
def : Pat<(m.Mask (load BaseAddr:$rs1)),
(load_instr BaseAddr:$rs1, m.AVL, m.Log2SEW)>;
@@ -103,11 +103,9 @@ class VPatBinarySDNode_VV<SDNode vop,
string instruction_name,
ValueType result_type,
ValueType op_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg RetClass,
VReg op_reg_class> :
Pat<(result_type (vop
(op_type op_reg_class:$rs1),
@@ -122,11 +120,9 @@ class VPatBinarySDNode_XI<SDNode vop,
string suffix,
ValueType result_type,
ValueType vop_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg RetClass,
VReg vop_reg_class,
ComplexPattern SplatPatKind,
DAGOperand xop_kind> :
@@ -141,11 +137,11 @@ class VPatBinarySDNode_XI<SDNode vop,
multiclass VPatBinarySDNode_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
def : VPatBinarySDNode_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass>;
def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass,
SplatPat, GPR>;
}
}
@@ -155,8 +151,8 @@ multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name,
: VPatBinarySDNode_VV_VX<vop, instruction_name> {
foreach vti = AllIntegerVectors in {
def : VPatBinarySDNode_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
}
@@ -167,11 +163,9 @@ class VPatBinarySDNode_VF<SDNode vop,
ValueType result_type,
ValueType vop_type,
ValueType xop_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg RetClass,
VReg vop_reg_class,
DAGOperand xop_kind> :
Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
@@ -184,11 +178,11 @@ class VPatBinarySDNode_VF<SDNode vop,
multiclass VPatBinaryFPSDNode_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
def : VPatBinarySDNode_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass>;
def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
- vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Scalar,
+ vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
vti.ScalarRegClass>;
}
}
@@ -373,7 +367,7 @@ multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
// Patterns.
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
@@ -491,17 +485,17 @@ defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
foreach vti = AllIntegerVectors in {
// NOTE: We choose VMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = vti.LMul.MX;
def : Pat<(vti.Vector (add vti.RegClass:$rs2,
(mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (sub vti.RegClass:$rs2,
(mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVNMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
// commutable.
@@ -510,32 +504,32 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVMADD_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (sub vti.RegClass:$rs2,
(mul_oneuse (SplatPat XLenVT:$rs1),
vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVNMSUB_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1,
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1,
vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, VMV0:$vm,
+ vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1),
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1),
vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, VMV0:$vm, vti.AVL, vti.Log2SEW)>;
+ vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1),
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1),
vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.Log2SEW)>;
+ vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
}
// 12.1. Vector Single-Width Saturating Add and Subtract
@@ -567,10 +561,10 @@ foreach mti = AllMasks in {
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))),
- (!cast<Instruction>("PseudoVMANDNOT_MM_"#mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMANDN_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))),
- (!cast<Instruction>("PseudoVMORNOT_MM_"#mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMORN_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
// Handle rvv_vnot the same as the vmnot.m pseudoinstruction.
@@ -579,9 +573,9 @@ foreach mti = AllMasks in {
VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
defm : VPatBinaryFPSDNode_VV_VF<fadd, "PseudoVFADD">;
@@ -597,27 +591,27 @@ defm : VPatBinaryFPSDNode_R_VF<fdiv, "PseudoVFRDIV">;
foreach fvti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = fvti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = fvti.LMul.MX;
def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
(fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
(fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
// commutable.
@@ -625,35 +619,35 @@ foreach fvti = AllFloatVectors in {
fvti.RegClass:$rd, fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
(fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
(fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
// The splat might be negated.
def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)),
fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)),
fvti.RegClass:$rd, fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
foreach vti = AllFloatVectors in {
@@ -711,25 +705,25 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// 12.15. Vector Integer Merge Instructions
// 14.15. Vector Floating-Point Merge Instruction
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, VMV0:$vm,
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(splat_vector fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
- VMV0:$vm, fvti.AVL, fvti.Log2SEW)>;
+ (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(splat_vector (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, 0, VMV0:$vm, fvti.AVL, fvti.Log2SEW)>;
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
}
// 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
@@ -763,13 +757,13 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
fwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Vector Splats
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (SplatPat GPR:$rs1)),
(!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX)
@@ -778,9 +772,9 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX)
simm5:$rs1, vti.AVL, vti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
@@ -791,12 +785,12 @@ foreach fvti = AllFloatVectors in {
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
0, fvti.AVL, fvti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Vector Element Extracts
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV, HasStdExtF] in
+let Predicates = [HasVInstructionsAnyF] in
foreach vti = AllFloatVectors in {
defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
vti.ScalarSuffix,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c9c42152c47b..73b97e1c3675 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -199,7 +199,7 @@ def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl),
(riscv_vmxor_vl node:$rs, true_mask, node:$vl)>;
-def riscv_vpopc_vl : SDNode<"RISCVISD::VPOPC_VL",
+def riscv_vcpop_vl : SDNode<"RISCVISD::VCPOP_VL",
SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>,
SDTCisVec<1>, SDTCisInt<1>,
SDTCVecEltisVT<2, i1>,
@@ -230,9 +230,9 @@ def SDT_RISCVVWMUL_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
-def SDTRVVVecReduce : SDTypeProfile<1, 4, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
- SDTCisSameNumEltsAs<1, 3>, SDTCisVT<4, XLenVT>
+def SDTRVVVecReduce : SDTypeProfile<1, 5, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<2, 4>, SDTCisVT<5, XLenVT>
]>;
def riscv_mul_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D),
@@ -273,7 +273,6 @@ multiclass VPatBinaryVL_VV<SDNode vop,
ValueType mask_type,
int sew,
LMULInfo vlmul,
- VReg RetClass,
VReg op_reg_class> {
def : Pat<(result_type (vop
(op_type op_reg_class:$rs1),
@@ -287,13 +286,13 @@ multiclass VPatBinaryVL_VV<SDNode vop,
def : Pat<(result_type (vop
(op_type op_reg_class:$rs1),
(op_type op_reg_class:$rs2),
- (mask_type VMV0:$vm),
+ (mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX#"_MASK")
(result_type (IMPLICIT_DEF)),
op_reg_class:$rs1,
op_reg_class:$rs2,
- VMV0:$vm, GPR:$vl, sew)>;
+ (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
multiclass VPatBinaryVL_XI<SDNode vop,
@@ -304,7 +303,6 @@ multiclass VPatBinaryVL_XI<SDNode vop,
ValueType mask_type,
int sew,
LMULInfo vlmul,
- VReg RetClass,
VReg vop_reg_class,
ComplexPattern SplatPatKind,
DAGOperand xop_kind> {
@@ -320,24 +318,23 @@ multiclass VPatBinaryVL_XI<SDNode vop,
def : Pat<(result_type (vop
(vop_type vop_reg_class:$rs1),
(vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
- (mask_type VMV0:$vm),
+ (mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX#"_MASK")
(result_type (IMPLICIT_DEF)),
vop_reg_class:$rs1,
xop_kind:$rs2,
- VMV0:$vm, GPR:$vl, sew)>;
+ (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
defm : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ vti.LMul, vti.RegClass, SplatPat, GPR>;
}
}
@@ -347,7 +344,7 @@ multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
foreach vti = AllIntegerVectors in {
defm : VPatBinaryVL_XI<vop, instruction_name, "VI",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
+ vti.LMul, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
}
@@ -359,11 +356,10 @@ multiclass VPatBinaryWVL_VV_VX<SDNode vop, string instruction_name> {
defvar wti = VtiToWti.Wti;
defm : VPatBinaryVL_VV<vop, instruction_name,
wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, wti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, wti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ vti.LMul, vti.RegClass, SplatPat, GPR>;
}
}
@@ -374,7 +370,6 @@ class VPatBinaryVL_VF<SDNode vop,
ValueType mask_type,
int sew,
LMULInfo vlmul,
- VReg RetClass,
VReg vop_reg_class,
RegisterClass scalar_reg_class> :
Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
@@ -390,11 +385,10 @@ multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defm : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass>;
def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- vti.ScalarRegClass>;
+ vti.LMul, vti.RegClass, vti.ScalarRegClass>;
}
}
@@ -589,14 +583,22 @@ multiclass VPatNConvertI2FPSDNode_V_VL<SDNode vop, string instruction_name> {
multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in {
defvar vti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # vti.SEW # "M1");
- def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2,
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
- (vti_m1.Vector (IMPLICIT_DEF)),
+ (vti_m1.Vector VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti_m1.Vector VR:$rs2),
GPR:$vl, vti.Log2SEW)>;
+
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
+ (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti_m1.Vector VR:$rs2),
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
}
@@ -604,7 +606,7 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
// Patterns.
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// 7.4. Vector Unit-Stride Instructions
foreach vti = AllVectors in {
@@ -620,8 +622,8 @@ foreach vti = AllVectors in {
}
foreach mti = AllMasks in {
- defvar load_instr = !cast<Instruction>("PseudoVLE1_V_"#mti.BX);
- defvar store_instr = !cast<Instruction>("PseudoVSE1_V_"#mti.BX);
+ defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#mti.BX);
+ defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#mti.BX);
def : Pat<(mti.Mask (riscv_vle_vl BaseAddr:$rs1, VLOpFrag)),
(load_instr BaseAddr:$rs1, GPR:$vl, mti.Log2SEW)>;
def : Pat<(riscv_vse_vl (mti.Mask VR:$rs2), BaseAddr:$rs1,
@@ -641,22 +643,22 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask V0),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
- (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask V0),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2,
- VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.3. Vector Integer Extension
@@ -794,7 +796,7 @@ defm : VPatBinaryWVL_VV_VX<riscv_vwmulu_vl, "PseudoVWMULU">;
foreach vti = AllIntegerVectors in {
// NOTE: We choose VMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = vti.LMul.MX;
def : Pat<(vti.Vector
(riscv_add_vl vti.RegClass:$rs2,
(riscv_mul_vl_oneuse vti.RegClass:$rs1,
@@ -803,7 +805,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector
(riscv_sub_vl vti.RegClass:$rs2,
(riscv_mul_vl_oneuse vti.RegClass:$rs1,
@@ -812,7 +814,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVNMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
// commutable.
@@ -824,7 +826,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVMADD_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector
(riscv_sub_vl vti.RegClass:$rs2,
(riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
@@ -834,7 +836,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVNMSUB_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.14. Vector Widening Integer Multiply-Add Instructions
@@ -847,18 +849,18 @@ foreach vtiTowti = AllWidenableIntVectors in {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACC_VV_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACC_VV_" # vti.LMul.MX)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
(riscv_vwmulu_vl_oneuse vti.RegClass:$rs1,
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCU_VV_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACCU_VV_" # vti.LMul.MX)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
@@ -866,43 +868,43 @@ foreach vtiTowti = AllWidenableIntVectors in {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACC_VX_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACC_VX_" # vti.LMul.MX)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
(riscv_vwmulu_vl_oneuse (SplatPat XLenVT:$rs1),
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCU_VX_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACCU_VX_" # vti.LMul.MX)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
vti.RegClass:$rs1,
vti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, VMV0:$vm,
+ vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(SplatPat XLenVT:$rs1),
vti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(SplatPat_simm5 simm5:$rs1),
vti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
// 12.16. Vector Integer Move Instructions
@@ -923,10 +925,10 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
// 15.1. Vector Single-Width Integer Reduction Instructions
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatReductionVL<rvv_vecreduce_ADD_vl, "PseudoVREDSUM", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_UMAX_vl, "PseudoVREDMAXU", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_SMAX_vl, "PseudoVREDMAX", /*is_float*/0>;
@@ -935,17 +937,17 @@ defm : VPatReductionVL<rvv_vecreduce_SMIN_vl, "PseudoVREDMIN", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_AND_vl, "PseudoVREDAND", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_OR_vl, "PseudoVREDOR", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_XOR_vl, "PseudoVREDXOR", /*is_float*/0>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatReductionVL<rvv_vecreduce_SEQ_FADD_vl, "PseudoVFREDOSUM", /*is_float*/1>;
-defm : VPatReductionVL<rvv_vecreduce_FADD_vl, "PseudoVFREDSUM", /*is_float*/1>;
+defm : VPatReductionVL<rvv_vecreduce_FADD_vl, "PseudoVFREDUSUM", /*is_float*/1>;
defm : VPatReductionVL<rvv_vecreduce_FMIN_vl, "PseudoVFREDMIN", /*is_float*/1>;
defm : VPatReductionVL<rvv_vecreduce_FMAX_vl, "PseudoVFREDMAX", /*is_float*/1>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
defm : VPatBinaryFPVL_VV_VF<riscv_fadd_vl, "PseudoVFADD">;
@@ -961,13 +963,13 @@ defm : VPatBinaryFPVL_R_VF<riscv_fdiv_vl, "PseudoVFRDIV">;
foreach vti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = vti.LMul.MX;
def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
vti.RegClass:$rs2, (vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
(riscv_fneg_vl vti.RegClass:$rs2,
(vti.Mask true_mask),
@@ -976,7 +978,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
(vti.Mask true_mask),
VLOpFrag),
@@ -988,7 +990,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
(vti.Mask true_mask),
VLOpFrag),
@@ -997,7 +999,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
// commutable.
@@ -1007,7 +1009,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
vti.RegClass:$rd,
(riscv_fneg_vl vti.RegClass:$rs2,
@@ -1017,7 +1019,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
(riscv_fneg_vl vti.RegClass:$rd,
(vti.Mask true_mask),
@@ -1029,7 +1031,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
(riscv_fneg_vl vti.RegClass:$rd,
(vti.Mask true_mask),
@@ -1039,7 +1041,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The splat might be negated.
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
@@ -1053,7 +1055,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
(vti.Mask true_mask),
VLOpFrag),
@@ -1062,7 +1064,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 14.11. Vector Floating-Point MIN/MAX Instructions
@@ -1126,29 +1128,29 @@ foreach fvti = AllFloatVectors in {
// Floating-point vselects:
// 12.15. Vector Integer Merge Instructions
// 14.15. Vector Floating-Point Merge Instruction
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
fvti.RegClass:$rs1,
fvti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, VMV0:$vm,
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
- VMV0:$vm, GPR:$vl, fvti.Log2SEW)>;
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
(SplatFPOp (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, 0, VMV0:$vm, GPR:$vl, fvti.Log2SEW)>;
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
// 14.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
@@ -1207,9 +1209,9 @@ foreach fvti = AllFloatVectors in {
}
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach mti = AllMasks in {
// 16.1 Vector Mask-Register Logical Instructions
@@ -1231,12 +1233,12 @@ foreach mti = AllMasks in {
def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1,
(riscv_vmnot_vl VR:$rs2, VLOpFrag),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMANDNOT_MM_" # mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMANDN_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1,
(riscv_vmnot_vl VR:$rs2, VLOpFrag),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMORNOT_MM_" # mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMORN_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
// XOR is associative so we need 2 patterns for VMXNOR.
def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1,
@@ -1266,16 +1268,20 @@ foreach mti = AllMasks in {
(!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
VR:$rs, VR:$rs, GPR:$vl, mti.Log2SEW)>;
- // 16.2 Vector Mask Population Count vpopc
- def : Pat<(XLenVT (riscv_vpopc_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
+ // 16.2 Vector count population in mask vcpop.m
+ def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVPOPC_M_" # mti.BX)
+ (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX)
VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX # "_MASK")
+ VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// 17.1. Integer Scalar Move Instructions
// 17.4. Vector Register Gather Instruction
foreach vti = AllIntegerVectors in {
@@ -1302,7 +1308,7 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgather_vv_vl
vti.RegClass:$rs2,
vti.RegClass:$rs1,
@@ -1312,7 +1318,19 @@ foreach vti = AllIntegerVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ (riscv_vrgather_vx_vl
+ vti.RegClass:$rs2,
+ uimm5:$imm,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ vti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// emul = lmul * 16 / sew
defvar vlmul = vti.LMul;
@@ -1329,7 +1347,7 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>(inst)
vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgatherei16_vv_vl
vti.RegClass:$rs2,
(ivti.Vector ivti.RegClass:$rs1),
@@ -1339,13 +1357,13 @@ foreach vti = AllIntegerVectors in {
VLOpFrag)),
(!cast<Instruction>(inst#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// 17.2. Floating-Point Scalar Move Instructions
foreach vti = AllFloatVectors in {
@@ -1373,7 +1391,7 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgather_vv_vl
vti.RegClass:$rs2,
(ivti.Vector vti.RegClass:$rs1),
@@ -1383,7 +1401,19 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ (riscv_vrgather_vx_vl
+ vti.RegClass:$rs2,
+ uimm5:$imm,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ vti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
defvar vlmul = vti.LMul;
defvar octuple_lmul = vlmul.octuple;
@@ -1399,7 +1429,7 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>(inst)
vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgatherei16_vv_vl
vti.RegClass:$rs2,
(ivti.Vector ivti.RegClass:$rs1),
@@ -1409,11 +1439,11 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>(inst#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Miscellaneous RISCVISD SDNodes
@@ -1437,7 +1467,7 @@ def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask true_mask),
@@ -1490,4 +1520,4 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
GPR:$vl, vti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 7359e567a58d..461bdd348934 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfoB.td - RISC-V 'B' instructions -------*- tablegen -*-===//
+//===-- RISCVInstrInfoZb.td - RISC-V Bitmanip instructions -*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,19 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the RISC-V instructions from the standard 'B' Bitmanip
-// extension, version 0.93.
-// This version is still experimental as the 'B' extension hasn't been
+// This file describes the RISC-V instructions from the standard Bitmanip
+// extensions, versions:
+// Zba - 1.0
+// Zbb - 1.0
+// Zbc - 1.0
+// Zbs - 1.0
+// Zbe - 0.93
+// Zbf - 0.93
+// Zbm - 0.93
+// Zbp - 0.93
+// Zbr - 0.93
+// Zbt - 0.93
+// This version is still experimental as the Bitmanip extensions haven't been
// ratified yet.
//
//===----------------------------------------------------------------------===//
@@ -186,6 +196,32 @@ def C9LeftShift : PatLeaf<(imm), [{
return C > 9 && ((C % 9) == 0) && isPowerOf2_64(C / 9);
}]>;
+def CSImm12MulBy4 : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ int64_t C = N->getSExtValue();
+ // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair.
+ return !isInt<13>(C) && isInt<14>(C) && (C & 3) == 0;
+}]>;
+
+def CSImm12MulBy8 : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ int64_t C = N->getSExtValue();
+ // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair.
+ return !isInt<13>(C) && isInt<15>(C) && (C & 7) == 0;
+}]>;
+
+def SimmShiftRightBy2XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 2, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 3, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -459,15 +495,6 @@ def RORW : ALUW_rr<0b0110000, 0b101, "rorw">,
Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
-let Predicates = [HasStdExtZbs, IsRV64] in {
-// NOTE: These instructions have been removed from the 0.94 spec. As a result
-// we have no isel patterns for them.
-def BCLRW : ALUW_rr<0b0100100, 0b001, "bclrw">, Sched<[]>;
-def BSETW : ALUW_rr<0b0010100, 0b001, "bsetw">, Sched<[]>;
-def BINVW : ALUW_rr<0b0110100, 0b001, "binvw">, Sched<[]>;
-def BEXTW : ALUW_rr<0b0100100, 0b101, "bextw">, Sched<[]>;
-} // Predicates = [HasStdExtZbs, IsRV64]
-
let Predicates = [HasStdExtZbp, IsRV64] in {
def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
@@ -481,17 +508,6 @@ let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
Sched<[WriteRotateImm32, ReadRotateImm32]>;
-let Predicates = [HasStdExtZbs, IsRV64] in {
-// NOTE: These instructions have been removed from the 0.94 spec. As a result
-// we have no isel patterns for them.
-def BCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "bclriw">,
- Sched<[]>;
-def BSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "bsetiw">,
- Sched<[]>;
-def BINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "binviw">,
- Sched<[]>;
-} // Predicates = [HasStdExtZbs, IsRV64]
-
let Predicates = [HasStdExtZbp, IsRV64] in {
def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
@@ -585,43 +601,10 @@ def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
} // Predicates = [HasStdExtZbbOrZbp]
//===----------------------------------------------------------------------===//
-// Future compressed instructions
-//===----------------------------------------------------------------------===//
-
-// The presence of these instructions in the B extension is purely experimental
-// and they should be moved to the C extension as soon as they are ratified.
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBInstC<bits<2> funct2, string opcodestr>
- : RVInst16<(outs GPRC:$rs_wb), (ins GPRC:$rs), opcodestr, "$rs", [],
- InstFormatCR> {
- bits<3> rs;
- let Constraints = "$rs = $rs_wb";
-
- let Inst{15-12} = 0b0110;
- let Inst{11-10} = funct2;
- let Inst{9-7} = rs;
- let Inst{6-0} = 0b0000001;
-}
-
-// The namespace RVBC exists to avoid encoding conflicts with the compressed
-// instructions c.addi16sp and c.lui already implemented in the C extension.
-
-let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC] in {
-def C_NOT : RVBInstC<0b00, "c.not">, Sched<[]>;
-def C_NEG : RVBInstC<0b01, "c.neg">, Sched<[]>;
-} // DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC]
-
-let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in
-def C_ZEXTW : RVBInstC<0b10, "c.zext.w">, Sched<[]>;
-
-//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZba, IsRV64] in {
-// NOTE: The 0.93 spec shows zext.w as an alias of pack/packw. It has been
-// changed to add.uw in a draft after 0.94.
def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>;
}
@@ -770,21 +753,6 @@ def : InstAlias<"bext $rd, $rs1, $shamt",
} // Predicates = [HasStdExtZbs]
//===----------------------------------------------------------------------===//
-// Compressed Instruction patterns
-//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZbproposedc, HasStdExtC] in {
-def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1),
- (C_NOT GPRC:$rs1)>;
-def : CompressPat<(SUB GPRC:$rs1, X0, GPRC:$rs1),
- (C_NEG GPRC:$rs1)>;
-} // Predicates = [HasStdExtZbproposedc, HasStdExtC]
-
-let Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in {
-def : CompressPat<(ADDUW GPRC:$rs1, GPRC:$rs1, X0),
- (C_ZEXTW GPRC:$rs1)>;
-} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
-
-//===----------------------------------------------------------------------===//
// Codegen patterns
//===----------------------------------------------------------------------===//
@@ -1011,6 +979,13 @@ def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
(SH3ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add GPR:$r, CSImm12MulBy4:$i),
+ (SH2ADD (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)),
+ GPR:$r)>;
+def : Pat<(add GPR:$r, CSImm12MulBy8:$i),
+ (SH3ADD (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)),
+ GPR:$r)>;
+
def : Pat<(mul GPR:$r, C3LeftShift:$i),
(SLLI (SH1ADD GPR:$r, GPR:$r),
(TrailingZerosXForm C3LeftShift:$i))>;
@@ -1020,6 +995,29 @@ def : Pat<(mul GPR:$r, C5LeftShift:$i),
def : Pat<(mul GPR:$r, C9LeftShift:$i),
(SLLI (SH3ADD GPR:$r, GPR:$r),
(TrailingZerosXForm C9LeftShift:$i))>;
+
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
+ (SH1ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
+ (SH1ADD (SH3ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
+ (SH2ADD (SH1ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
+ (SH2ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
+ (SH2ADD (SH3ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
+ (SH3ADD (SH1ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
+ (SH3ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
+ (SH3ADD (SH3ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
+ (SH1ADD (SH3ADD GPR:$r, GPR:$r), (SH3ADD GPR:$r, GPR:$r))>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
+ (SH2ADD (SH3ADD GPR:$r, GPR:$r), (SH3ADD GPR:$r, GPR:$r))>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
+ (SH3ADD (SH3ADD GPR:$r, GPR:$r), (SH3ADD GPR:$r, GPR:$r))>;
} // Predicates = [HasStdExtZba]
let Predicates = [HasStdExtZba, IsRV64] in {
@@ -1085,6 +1083,9 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
(and GPR:$rs1, 0x000000000000FFFF)),
i32)),
(PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
+ (and GPR:$rs1, 0x000000000000FFFF))),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
(srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
(PACKUW GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 7316b7ad7674..a33494461869 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -66,7 +66,7 @@ class FPCmpH_rr<bits<3> funct3, string opcodestr>
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfh] in {
+let Predicates = [HasStdExtZfhmin] in {
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd),
(ins GPR:$rs1, simm12:$imm12),
@@ -81,7 +81,9 @@ def FSH : RVInstS<0b001, OPC_STORE_FP, (outs),
(ins FPR16:$rs2, GPR:$rs1, simm12:$imm12),
"fsh", "$rs2, ${imm12}(${rs1})">,
Sched<[WriteFST16, ReadStoreData, ReadFMemBase]>;
+} // Predicates = [HasStdExtZfhmin]
+let Predicates = [HasStdExtZfh] in {
def FMADD_H : FPFMAH_rrr_frm<OPC_MADD, "fmadd.h">,
Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>;
def : FPFMAHDynFrmAlias<FMADD_H, "fmadd.h">;
@@ -148,7 +150,9 @@ def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">,
let rs2 = 0b00001;
}
def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
+} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZfhmin] in {
def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">,
Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]> {
let rs2 = 0b00000;
@@ -169,7 +173,9 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">,
Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]> {
let rs2 = 0b00000;
}
+} // Predicates = [HasStdExtZfhmin]
+let Predicates = [HasStdExtZfh] in {
def FEQ_H : FPCmpH_rr<0b010, "feq.h">;
def FLT_H : FPCmpH_rr<0b001, "flt.h">;
def FLE_H : FPCmpH_rr<0b000, "fle.h">;
@@ -206,7 +212,7 @@ def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">,
def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
} // Predicates = [HasStdExtZfh, IsRV64]
-let Predicates = [HasStdExtZfh, HasStdExtD] in {
+let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">,
Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]> {
let rs2 = 0b00001;
@@ -217,16 +223,18 @@ def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">,
Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]> {
let rs2 = 0b00010;
}
-} // Predicates = [HasStdExtZfh, HasStdExtD]
+} // Predicates = [HasStdExtZfhmin, HasStdExtD]
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfh] in {
+let Predicates = [HasStdExtZfhmin] in {
def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>;
+} // Predicates = [HasStdExtZfhmin]
+let Predicates = [HasStdExtZfh] in {
def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
@@ -237,10 +245,12 @@ def : InstAlias<"fgt.h $rd, $rs, $rt",
(FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
def : InstAlias<"fge.h $rd, $rs, $rt",
(FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
+} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZfhmin] in {
def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
def PseudoFSH : PseudoStore<"fsh", FPR16>;
-} // Predicates = [HasStdExtZfh]
+} // Predicates = [HasStdExtZfhmin]
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
@@ -313,7 +323,9 @@ def : PatFpr16Fpr16<setle, FLE_H>;
def : PatFpr16Fpr16<setole, FLE_H>;
def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
+} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZfhmin] in {
/// Loads
defm : LdPat<load, FLH, f16>;
@@ -331,13 +343,17 @@ def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
// Moves (no conversion)
def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
-} // Predicates = [HasStdExtZfh]
+} // Predicates = [HasStdExtZfhmin]
let Predicates = [HasStdExtZfh, IsRV32] in {
// half->[u]int. Round-to-zero must be used.
def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+// Saturating float->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+
// half->int32 with current rounding mode.
def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>;
@@ -353,13 +369,17 @@ let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
// half->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+// Saturating float->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+
// half->int64 with current rounding mode.
def : Pat<(i64 (lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
def : Pat<(i64 (llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
@@ -375,7 +395,7 @@ def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
} // Predicates = [HasStdExtZfh, IsRV64]
-let Predicates = [HasStdExtZfh, HasStdExtD] in {
+let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
@@ -385,4 +405,4 @@ def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
(FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>;
def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
-}
+} // Predicates = [HasStdExtZfhmin, HasStdExtD]
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index 74d92468b9b9..dd084f53e511 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -148,17 +148,18 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
assert(TRI && "TargetRegisterInfo expected");
uint64_t TSFlags = MI->getDesc().TSFlags;
- int NumOps = MI->getNumExplicitOperands();
-
- for (const MachineOperand &MO : MI->explicit_operands()) {
- int OpNo = (int)MI->getOperandNo(&MO);
- assert(OpNo >= 0 && "Operand number doesn't fit in an 'int' type");
-
- // Skip VL and SEW operands which are the last two operands if present.
- if (RISCVII::hasVLOp(TSFlags) && OpNo == (NumOps - 2))
- continue;
- if (RISCVII::hasSEWOp(TSFlags) && OpNo == (NumOps - 1))
- continue;
+ unsigned NumOps = MI->getNumExplicitOperands();
+
+ // Skip policy, VL and SEW operands which are the last operands if present.
+ if (RISCVII::hasVecPolicyOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasVLOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasSEWOp(TSFlags))
+ --NumOps;
+
+ for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
// Skip merge op. It should be the first operand after the result.
if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1) {
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 87586023caa4..5f4022439abb 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -26,8 +26,8 @@
#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include <set>
using namespace llvm;
@@ -38,7 +38,6 @@ namespace {
struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
static char ID;
- const MachineFunction *MF;
bool runOnMachineFunction(MachineFunction &Fn) override;
bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
@@ -53,6 +52,11 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
MachineFunctionProperties::Property::IsSSA);
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
StringRef getPassName() const override {
return RISCV_MERGE_BASE_OFFSET_NAME;
}
@@ -193,7 +197,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
- } break;
+ }
case RISCV::ADD: {
// The offset is too large to fit in the immediate field of ADDI.
// This can be in two forms:
@@ -208,7 +212,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
return false;
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
- } break;
+ }
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
@@ -252,7 +256,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
DeadInstrs.insert(&LoADDI);
return true;
- } break;
+ }
}
return false;
}
@@ -261,6 +265,7 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
+ bool MadeChange = false;
DeadInstrs.clear();
MRI = &Fn.getRegInfo();
for (MachineBasicBlock &MBB : Fn) {
@@ -272,13 +277,13 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
LLVM_DEBUG(dbgs() << " Found lowered global address with one use: "
<< *LoADDI->getOperand(2).getGlobal() << "\n");
// If the use count is only one, merge the offset
- detectAndFoldOffset(HiLUI, *LoADDI);
+ MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI);
}
}
// Delete dead instructions.
for (auto *MI : DeadInstrs)
MI->eraseFromParent();
- return true;
+ return MadeChange;
}
/// Returns an instance of the Merge Base Offset Optimization pass.
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index fde75206889c..a915a572f3b7 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -372,7 +372,7 @@ class NFList<int lmul> {
}
// Generate [start, end) SubRegIndex list.
-class SubRegSet<list<SubRegIndex> LIn, int start, int nf, int lmul> {
+class SubRegSet<int nf, int lmul> {
list<SubRegIndex> L = !foldl([]<SubRegIndex>,
[0, 1, 2, 3, 4, 5, 6, 7],
AccList, i,
@@ -382,39 +382,61 @@ class SubRegSet<list<SubRegIndex> LIn, int start, int nf, int lmul> {
[])));
}
-class IndexSet<int index, int nf, int lmul> {
+// Collect the valid indexes into 'R' under NF and LMUL values from TUPLE_INDEX.
+// When NF = 2, the valid TUPLE_INDEX is 0 and 1.
+// For example, when LMUL = 4, the potential valid indexes is
+// [8, 12, 16, 20, 24, 28, 4]. However, not all these indexes are valid under
+// NF = 2. For example, 28 is not valid under LMUL = 4, NF = 2 and TUPLE_INDEX = 0.
+// The filter is
+// (tuple_index + i) x lmul <= (tuple_index x lmul) + 32 - (nf x lmul)
+//
+// Use START = 0, LMUL = 4 and NF = 2 as the example,
+// i x 4 <= 24
+// The class will return [8, 12, 16, 20, 24, 4].
+// Use START = 1, LMUL = 4 and NF = 2 as the example,
+// (1 + i) x 4 <= 28
+// The class will return [12, 16, 20, 24, 28, 8].
+//
+class IndexSet<int tuple_index, int nf, int lmul, bit isV0 = false> {
list<int> R =
!foldl([]<int>,
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
- 23, 24, 25, 26, 27, 28, 29, 30, 31],
+ !if(isV0, [0],
+ !cond(
+ !eq(lmul, 1):
+ [8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 1, 2, 3, 4, 5, 6, 7],
+ !eq(lmul, 2):
+ [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3],
+ !eq(lmul, 4):
+ [2, 3, 4, 5, 6, 7, 1])),
L, i,
!listconcat(L,
- !if(!and(
- !le(!mul(index, lmul), !mul(i, lmul)),
- !le(!mul(i, lmul),
- !sub(!add(32, !mul(index, lmul)), !mul(nf, lmul)))
- ), [!mul(i, lmul)], [])));
+ !if(!le(!mul(!add(i, tuple_index), lmul),
+ !sub(!add(32, !mul(tuple_index, lmul)), !mul(nf, lmul))),
+ [!mul(!add(i, tuple_index), lmul)], [])));
}
-class VRegList<list<dag> LIn, int start, int nf, int lmul, bit NoV0> {
+// This class returns a list of vector register collections.
+// For example, for NF = 2 and LMUL = 4,
+// it will return
+// ([ V8M4, V12M4, V16M4, V20M4, V24M4, V4M4],
+// [V12M4, V16M4, V20M4, V24M4, V28M4, V8M4])
+//
+class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> {
list<dag> L =
!if(!ge(start, nf),
LIn,
!listconcat(
[!dag(add,
- !foreach(i,
- !if(NoV0,
- !tail(IndexSet<start, nf, lmul>.R),
- [!head(IndexSet<start, nf, lmul>.R)]),
+ !foreach(i, IndexSet<start, nf, lmul, isV0>.R,
!cast<Register>("V" # i # !cond(!eq(lmul, 2): "M2",
!eq(lmul, 4): "M4",
true: ""))),
!listsplat("",
- !if(NoV0,
- !size(!tail(IndexSet<start, nf, lmul>.R)),
- !size([!head(IndexSet<start, nf, lmul>.R)]))))],
- VRegList<LIn, !add(start, 1), nf, lmul, NoV0>.L));
+ !size(IndexSet<start, nf, lmul, isV0>.R)))],
+ VRegList<LIn, !add(start, 1), nf, lmul, isV0>.L));
}
// Vector registers
@@ -463,11 +485,11 @@ let RegAltNameIndices = [ABIRegAltName] in {
foreach m = [1, 2, 4] in {
foreach n = NFList<m>.L in {
def "VN" # n # "M" # m # "NoV0": RegisterTuples<
- SubRegSet<[], 0, n, m>.L,
- VRegList<[], 0, n, m, 1>.L>;
+ SubRegSet<n, m>.L,
+ VRegList<[], 0, n, m, false>.L>;
def "VN" # n # "M" # m # "V0" : RegisterTuples<
- SubRegSet<[], 0, n, m>.L,
- VRegList<[], 0, n, m, 0>.L>;
+ SubRegSet<n, m>.L,
+ VRegList<[], 0, n, m, true>.L>;
}
}
@@ -487,8 +509,7 @@ def VR : VReg<[vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t,
vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t,
vbool2_t, vbool1_t],
- (add (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
+ (add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
def VRNoV0 : VReg<[vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
@@ -498,27 +519,26 @@ def VRNoV0 : VReg<[vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t,
vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t,
vbool2_t, vbool1_t],
- (add (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
+ (add (sequence "V%u", 8, 31),
(sequence "V%u", 1, 7)), 1>;
def VRM2 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
vfloat16m2_t, vfloat32m2_t, vfloat64m2_t],
- (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
- V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2), 2>;
+ (add (sequence "V%uM2", 8, 31, 2),
+ (sequence "V%uM2", 0, 7, 2)), 2>;
def VRM2NoV0 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
vfloat16m2_t, vfloat32m2_t, vfloat64m2_t],
- (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
- V18M2, V20M2, V22M2, V24M2, V2M2, V4M2, V6M2), 2>;
+ (add (sequence "V%uM2", 8, 31, 2),
+ (sequence "V%uM2", 2, 7, 2)), 2>;
def VRM4 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
vfloat16m4_t, vfloat32m4_t, vfloat64m4_t],
- (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4), 4>;
+ (add V8M4, V12M4, V16M4, V20M4, V24M4, V28M4, V0M4, V4M4), 4>;
def VRM4NoV0 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
vfloat16m4_t, vfloat32m4_t, vfloat64m4_t],
- (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V4M4), 4>;
+ (add V8M4, V12M4, V16M4, V20M4, V24M4, V28M4, V4M4), 4>;
def VRM8 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
@@ -526,7 +546,7 @@ def VRM8 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
def VRM8NoV0 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
- (add V8M8, V16M8, V24M8), 8>;
+ (add V8M8, V16M8, V24M8), 8>;
defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t,
vbool4_t, vbool2_t, vbool1_t];
@@ -538,18 +558,18 @@ def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> {
// The register class is added for inline assembly for vector mask types.
def VM : VReg<[vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
vbool32_t, vbool64_t],
- (add (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
+ (add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
foreach m = LMULList.m in {
foreach nf = NFList<m>.L in {
- def "VRN" # nf # "M" # m: VReg<[untyped],
- (add !cast<RegisterTuples>("VN" # nf # "M" # m # "V0"), !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0")),
- !mul(nf, m)>;
def "VRN" # nf # "M" # m # "NoV0": VReg<[untyped],
(add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0")),
!mul(nf, m)>;
+ def "VRN" # nf # "M" # m: VReg<[untyped],
+ (add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0"),
+ !cast<RegisterTuples>("VN" # nf # "M" # m # "V0")),
+ !mul(nf, m)>;
}
}
@@ -557,3 +577,15 @@ foreach m = LMULList.m in {
def FFLAGS : RISCVReg<0, "fflags">;
def FRM : RISCVReg<0, "frm">;
def FCSR : RISCVReg<0, "fcsr">;
+
+// Any type register. Used for .insn directives when we don't know what the
+// register types could be.
+// NOTE: The alignment and size are bogus values. The Size needs to be non-zero
+// or tablegen will use "untyped" to determine the size which will assert.
+let isAllocatable = 0 in
+def AnyReg : RegisterClass<"RISCV", [untyped], 32,
+ (add (sequence "X%u", 0, 31),
+ (sequence "F%u_D", 0, 31),
+ (sequence "V%u", 0, 31))> {
+ let Size = 32;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index ed26a5026114..14f59152ed42 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -231,6 +231,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZfh;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 314af180aca1..5b435fcb16a2 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -18,7 +18,7 @@ def SiFive7Model : SchedMachineModel {
let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg];
}
-// The SiFive7 microarchitecure has two pipelines: A and B.
+// The SiFive7 microarchitecture has two pipelines: A and B.
// Pipe A can handle memory, integer alu and vector operations.
// Pipe B can handle integer alu, control flow, integer multiply and divide,
// and floating point computation.
@@ -219,6 +219,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZfh;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index f31e4af46c1b..4971ca1d4e3e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -230,3 +230,4 @@ def : ReadAdvance<ReadFSqrt16, 0>;
// Include the scheduler resources for other instruction extensions.
include "RISCVScheduleB.td"
+include "RISCVScheduleV.td"
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
new file mode 100644
index 000000000000..43af1802d706
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -0,0 +1,820 @@
+//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// Define scheduler resources associated with def operands.
+
+// 7. Vector Loads and Stores
+// 7.4. Vector Unit-Stride Instructions
+def WriteVLDE8 : SchedWrite;
+def WriteVLDE16 : SchedWrite;
+def WriteVLDE32 : SchedWrite;
+def WriteVLDE64 : SchedWrite;
+def WriteVSTE8 : SchedWrite;
+def WriteVSTE16 : SchedWrite;
+def WriteVSTE32 : SchedWrite;
+def WriteVSTE64 : SchedWrite;
+// 7.4.1. Vector Unit-Strided Mask
+def WriteVLDM : SchedWrite;
+def WriteVSTM : SchedWrite;
+// 7.5. Vector Strided Instructions
+def WriteVLDS8 : SchedWrite;
+def WriteVLDS16 : SchedWrite;
+def WriteVLDS32 : SchedWrite;
+def WriteVLDS64 : SchedWrite;
+def WriteVSTS8 : SchedWrite;
+def WriteVSTS16 : SchedWrite;
+def WriteVSTS32 : SchedWrite;
+def WriteVSTS64 : SchedWrite;
+// 7.6. Vector Indexed Instructions
+def WriteVLDUX8 : SchedWrite;
+def WriteVLDUX16 : SchedWrite;
+def WriteVLDUX32 : SchedWrite;
+def WriteVLDUX64 : SchedWrite;
+def WriteVLDOX8 : SchedWrite;
+def WriteVLDOX16 : SchedWrite;
+def WriteVLDOX32 : SchedWrite;
+def WriteVLDOX64 : SchedWrite;
+def WriteVSTUX8 : SchedWrite;
+def WriteVSTUX16 : SchedWrite;
+def WriteVSTUX32 : SchedWrite;
+def WriteVSTUX64 : SchedWrite;
+def WriteVSTOX8 : SchedWrite;
+def WriteVSTOX16 : SchedWrite;
+def WriteVSTOX32 : SchedWrite;
+def WriteVSTOX64 : SchedWrite;
+// 7.7. Vector Unit-stride Fault-Only-First Loads
+def WriteVLDFF8 : SchedWrite;
+def WriteVLDFF16 : SchedWrite;
+def WriteVLDFF32 : SchedWrite;
+def WriteVLDFF64 : SchedWrite;
+// 7.9. Vector Whole Register Instructions
+def WriteVLD1R8 : SchedWrite;
+def WriteVLD1R16 : SchedWrite;
+def WriteVLD1R32 : SchedWrite;
+def WriteVLD1R64 : SchedWrite;
+def WriteVLD2R8 : SchedWrite;
+def WriteVLD2R16 : SchedWrite;
+def WriteVLD2R32 : SchedWrite;
+def WriteVLD2R64 : SchedWrite;
+def WriteVLD4R8 : SchedWrite;
+def WriteVLD4R16 : SchedWrite;
+def WriteVLD4R32 : SchedWrite;
+def WriteVLD4R64 : SchedWrite;
+def WriteVLD8R8 : SchedWrite;
+def WriteVLD8R16 : SchedWrite;
+def WriteVLD8R32 : SchedWrite;
+def WriteVLD8R64 : SchedWrite;
+def WriteVST1R : SchedWrite;
+def WriteVST2R : SchedWrite;
+def WriteVST4R : SchedWrite;
+def WriteVST8R : SchedWrite;
+
+// 11. Vector Integer Arithmetic Instructions
+// 11.1. Vector Single-Width Integer Add and Subtract
+// 11.5. Vector Bitwise Logical Instructions
+def WriteVIALUV : SchedWrite;
+def WriteVIALUX : SchedWrite;
+def WriteVIALUI : SchedWrite;
+// 11.2. Vector Widening Integer Add/Subtract
+def WriteVIWALUV : SchedWrite;
+def WriteVIWALUX : SchedWrite;
+def WriteVIWALUI : SchedWrite;
+// 11.3. Vector Integer Extension
+def WriteVExtV : SchedWrite;
+// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
+def WriteVICALUV : SchedWrite;
+def WriteVICALUX : SchedWrite;
+def WriteVICALUI : SchedWrite;
+// 11.6. Vector Single-Width Bit Shift Instructions
+def WriteVShiftV : SchedWrite;
+def WriteVShiftX : SchedWrite;
+def WriteVShiftI : SchedWrite;
+// 11.7. Vector Narrowing Integer Right Shift Instructions
+def WriteVNShiftV : SchedWrite;
+def WriteVNShiftX : SchedWrite;
+def WriteVNShiftI : SchedWrite;
+// 11.8. Vector Integer Comparison Instructions
+// 11.9. Vector Integer Min/Max Instructions
+def WriteVICmpV : SchedWrite;
+def WriteVICmpX : SchedWrite;
+def WriteVICmpI : SchedWrite;
+// 11.10. Vector Single-Width Integer Multiply Instructions
+def WriteVIMulV : SchedWrite;
+def WriteVIMulX : SchedWrite;
+// 11.11. Vector Integer Divide Instructions
+def WriteVIDivV : SchedWrite;
+def WriteVIDivX : SchedWrite;
+// 11.12. Vector Widening Integer Multiply Instructions
+def WriteVIWMulV : SchedWrite;
+def WriteVIWMulX : SchedWrite;
+// 11.13. Vector Single-Width Integer Multiply-Add Instructions
+def WriteVIMulAddV : SchedWrite;
+def WriteVIMulAddX : SchedWrite;
+// 11.14. Vector Widening Integer Multiply-Add Instructions
+def WriteVIWMulAddV : SchedWrite;
+def WriteVIWMulAddX : SchedWrite;
+// 11.15. Vector Integer Merge Instructions
+def WriteVIMergeV : SchedWrite;
+def WriteVIMergeX : SchedWrite;
+def WriteVIMergeI : SchedWrite;
+// 11.16. Vector Integer Move Instructions
+def WriteVIMovV : SchedWrite;
+def WriteVIMovX : SchedWrite;
+def WriteVIMovI : SchedWrite;
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+// 12.1. Vector Single-Width Saturating Add and Subtract
+def WriteVSALUV : SchedWrite;
+def WriteVSALUX : SchedWrite;
+def WriteVSALUI : SchedWrite;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+def WriteVAALUV : SchedWrite;
+def WriteVAALUX : SchedWrite;
+// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+def WriteVSMulV : SchedWrite;
+def WriteVSMulX : SchedWrite;
+// 12.4. Vector Single-Width Scaling Shift Instructions
+def WriteVSShiftV : SchedWrite;
+def WriteVSShiftX : SchedWrite;
+def WriteVSShiftI : SchedWrite;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+def WriteVNClipV : SchedWrite;
+def WriteVNClipX : SchedWrite;
+def WriteVNClipI : SchedWrite;
+
+// 13. Vector Floating-Point Instructions
+// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+def WriteVFALUV : SchedWrite;
+def WriteVFALUF : SchedWrite;
+// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+def WriteVFWALUV : SchedWrite;
+def WriteVFWALUF : SchedWrite;
+// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+def WriteVFMulV : SchedWrite;
+def WriteVFMulF : SchedWrite;
+def WriteVFDivV : SchedWrite;
+def WriteVFDivF : SchedWrite;
+// 13.5. Vector Widening Floating-Point Multiply
+def WriteVFWMulV : SchedWrite;
+def WriteVFWMulF : SchedWrite;
+// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+def WriteVFMulAddV : SchedWrite;
+def WriteVFMulAddF : SchedWrite;
+// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+def WriteVFWMulAddV : SchedWrite;
+def WriteVFWMulAddF : SchedWrite;
+// 13.8. Vector Floating-Point Square-Root Instruction
+def WriteVFSqrtV : SchedWrite;
+// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+def WriteVFRecpV : SchedWrite;
+// 13.11. Vector Floating-Point MIN/MAX Instructions
+// 13.13. Vector Floating-Point Compare Instructions
+def WriteVFCmpV : SchedWrite;
+def WriteVFCmpF : SchedWrite;
+// 13.12. Vector Floating-Point Sign-Injection Instructions
+def WriteVFSgnjV : SchedWrite;
+def WriteVFSgnjF : SchedWrite;
+// 13.14. Vector Floating-Point Classify Instruction
+def WriteVFClassV : SchedWrite;
+// 13.15. Vector Floating-Point Merge Instruction
+def WriteVFMergeV : SchedWrite;
+// 13.16. Vector Floating-Point Move Instruction
+def WriteVFMovV : SchedWrite;
+// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+def WriteVFCvtIToFV : SchedWrite;
+def WriteVFCvtFToIV : SchedWrite;
+def WriteVFCvtFToFV : SchedWrite;
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+def WriteVFWCvtIToFV : SchedWrite;
+def WriteVFWCvtFToIV : SchedWrite;
+def WriteVFWCvtFToFV : SchedWrite;
+// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+def WriteVFNCvtIToFV : SchedWrite;
+def WriteVFNCvtFToIV : SchedWrite;
+def WriteVFNCvtFToFV : SchedWrite;
+
+// 14. Vector Reduction Operations
+// 14.1. Vector Single-Width Integer Reduction Instructions
+def WriteVIRedV : SchedWrite;
+// 14.2. Vector Widening Integer Reduction Instructions
+def WriteVIWRedV : SchedWrite;
+// 14.3. Vector Single-Width Floating-Point Reduction Instructions
+def WriteVFRedV : SchedWrite;
+def WriteVFRedOV : SchedWrite;
+// 14.4. Vector Widening Floating-Point Reduction Instructions
+def WriteVFWRedV : SchedWrite;
+def WriteVFWRedOV : SchedWrite;
+
+// 15. Vector Mask Instructions
+// 15.1. Vector Mask-Register Logical Instructions
+def WriteVMALUV : SchedWrite;
+// 15.2. Vector Mask Population Count
+def WriteVMPopV : SchedWrite;
+// 15.3. Vector Find-First-Set Mask Bit
+def WriteVMFFSV : SchedWrite;
+// 15.4. Vector Set-Before-First Mask Bit
+// 15.5. Vector Set-Including-First Mask Bit
+// 15.6. Vector Set-only-First Mask Bit
+def WriteVMSFSV : SchedWrite;
+// 15.8. Vector Iota Instruction
+def WriteVMIotV : SchedWrite;
+// 15.9. Vector Element Index Instruction
+def WriteVMIdxV : SchedWrite;
+
+// 16. Vector Permutation Instructions
+// 16.1. Integer Scalar Move Instructions
+def WriteVIMovVX : SchedWrite;
+def WriteVIMovXV : SchedWrite;
+// 16.2. Floating-Point Scalar Move Instructions
+def WriteVFMovVF : SchedWrite;
+def WriteVFMovFV : SchedWrite;
+// 16.3. Vector Slide Instructions
+def WriteVISlideX : SchedWrite;
+def WriteVISlideI : SchedWrite;
+def WriteVISlide1X : SchedWrite;
+def WriteVFSlide1F : SchedWrite;
+// 16.4. Vector Register Gather Instructions
+def WriteVGatherV : SchedWrite;
+def WriteVGatherX : SchedWrite;
+def WriteVGatherI : SchedWrite;
+// 16.5. Vector Compress Instruction
+def WriteVCompressV : SchedWrite;
+// 16.6. Whole Vector Register Move
+def WriteVMov1V : SchedWrite;
+def WriteVMov2V : SchedWrite;
+def WriteVMov4V : SchedWrite;
+def WriteVMov8V : SchedWrite;
+
+//===----------------------------------------------------------------------===//
+/// Define scheduler resources associated with use operands.
+
+// 7. Vector Loads and Stores
+def ReadVLDX : SchedRead;
+def ReadVSTX : SchedRead;
+// 7.4. Vector Unit-Stride Instructions
+def ReadVSTE8V : SchedRead;
+def ReadVSTE16V : SchedRead;
+def ReadVSTE32V : SchedRead;
+def ReadVSTE64V : SchedRead;
+// 7.4.1. Vector Unit-Strided Mask
+def ReadVSTM : SchedRead;
+// 7.5. Vector Strided Instructions
+def ReadVLDSX : SchedRead;
+def ReadVSTSX : SchedRead;
+def ReadVSTS8V : SchedRead;
+def ReadVSTS16V : SchedRead;
+def ReadVSTS32V : SchedRead;
+def ReadVSTS64V : SchedRead;
+// 7.6. Vector Indexed Instructions
+def ReadVLDUXV : SchedRead;
+def ReadVLDOXV : SchedRead;
+def ReadVSTUX8 : SchedRead;
+def ReadVSTUX16 : SchedRead;
+def ReadVSTUX32 : SchedRead;
+def ReadVSTUX64 : SchedRead;
+def ReadVSTUXV : SchedRead;
+def ReadVSTUX8V : SchedRead;
+def ReadVSTUX16V : SchedRead;
+def ReadVSTUX32V : SchedRead;
+def ReadVSTUX64V : SchedRead;
+def ReadVSTOX8 : SchedRead;
+def ReadVSTOX16 : SchedRead;
+def ReadVSTOX32 : SchedRead;
+def ReadVSTOX64 : SchedRead;
+def ReadVSTOXV : SchedRead;
+def ReadVSTOX8V : SchedRead;
+def ReadVSTOX16V : SchedRead;
+def ReadVSTOX32V : SchedRead;
+def ReadVSTOX64V : SchedRead;
+// 7.9. Vector Whole Register Instructions
+def ReadVST1R : SchedRead;
+def ReadVST2R : SchedRead;
+def ReadVST4R : SchedRead;
+def ReadVST8R : SchedRead;
+
+// 11. Vector Integer Arithmetic Instructions
+// 11.1. Vector Single-Width Integer Add and Subtract
+// 11.5. Vector Bitwise Logical Instructions
+def ReadVIALUV : SchedRead;
+def ReadVIALUX : SchedRead;
+// 11.2. Vector Widening Integer Add/Subtract
+def ReadVIWALUV : SchedRead;
+def ReadVIWALUX : SchedRead;
+// 11.3. Vector Integer Extension
+def ReadVExtV : SchedRead;
+// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
+def ReadVIALUCV : SchedRead;
+def ReadVIALUCX : SchedRead;
+// 11.6. Vector Single-Width Bit Shift Instructions
+def ReadVShiftV : SchedRead;
+def ReadVShiftX : SchedRead;
+// 11.7. Vector Narrowing Integer Right Shift Instructions
+def ReadVNShiftV : SchedRead;
+def ReadVNShiftX : SchedRead;
+// 11.8. Vector Integer Comparison Instructions
+// 11.9. Vector Integer Min/Max Instructions
+def ReadVICmpV : SchedRead;
+def ReadVICmpX : SchedRead;
+// 11.10. Vector Single-Width Integer Multiply Instructions
+def ReadVIMulV : SchedRead;
+def ReadVIMulX : SchedRead;
+// 11.11. Vector Integer Divide Instructions
+def ReadVIDivV : SchedRead;
+def ReadVIDivX : SchedRead;
+// 11.12. Vector Widening Integer Multiply Instructions
+def ReadVIWMulV : SchedRead;
+def ReadVIWMulX : SchedRead;
+// 11.13. Vector Single-Width Integer Multiply-Add Instructions
+def ReadVIMulAddV : SchedRead;
+def ReadVIMulAddX : SchedRead;
+// 11.14. Vector Widening Integer Multiply-Add Instructions
+def ReadVIWMulAddV : SchedRead;
+def ReadVIWMulAddX : SchedRead;
+// 11.15. Vector Integer Merge Instructions
+def ReadVIMergeV : SchedRead;
+def ReadVIMergeX : SchedRead;
+// 11.16. Vector Integer Move Instructions
+def ReadVIMovV : SchedRead;
+def ReadVIMovX : SchedRead;
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+// 12.1. Vector Single-Width Saturating Add and Subtract
+def ReadVSALUV : SchedRead;
+def ReadVSALUX : SchedRead;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+def ReadVAALUV : SchedRead;
+def ReadVAALUX : SchedRead;
+// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+def ReadVSMulV : SchedRead;
+def ReadVSMulX : SchedRead;
+// 12.4. Vector Single-Width Scaling Shift Instructions
+def ReadVSShiftV : SchedRead;
+def ReadVSShiftX : SchedRead;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+def ReadVNClipV : SchedRead;
+def ReadVNClipX : SchedRead;
+
+// 13. Vector Floating-Point Instructions
+// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+def ReadVFALUV : SchedRead;
+def ReadVFALUF : SchedRead;
+// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+def ReadVFWALUV : SchedRead;
+def ReadVFWALUF : SchedRead;
+// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+def ReadVFMulV : SchedRead;
+def ReadVFMulF : SchedRead;
+def ReadVFDivV : SchedRead;
+def ReadVFDivF : SchedRead;
+// 13.5. Vector Widening Floating-Point Multiply
+def ReadVFWMulV : SchedRead;
+def ReadVFWMulF : SchedRead;
+// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+def ReadVFMulAddV : SchedRead;
+def ReadVFMulAddF : SchedRead;
+// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+def ReadVFWMulAddV : SchedRead;
+def ReadVFWMulAddF : SchedRead;
+// 13.8. Vector Floating-Point Square-Root Instruction
+def ReadVFSqrtV : SchedRead;
+// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+def ReadVFRecpV : SchedRead;
+// 13.11. Vector Floating-Point MIN/MAX Instructions
+// 13.13. Vector Floating-Point Compare Instructions
+def ReadVFCmpV : SchedRead;
+def ReadVFCmpF : SchedRead;
+// 13.12. Vector Floating-Point Sign-Injection Instructions
+def ReadVFSgnjV : SchedRead;
+def ReadVFSgnjF : SchedRead;
+// 13.14. Vector Floating-Point Classify Instruction
+def ReadVFClassV : SchedRead;
+// 13.15. Vector Floating-Point Merge Instruction
+def ReadVFMergeV : SchedRead;
+def ReadVFMergeF : SchedRead;
+// 13.16. Vector Floating-Point Move Instruction
+def ReadVFMovF : SchedRead;
+// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+def ReadVFCvtIToFV : SchedRead;
+def ReadVFCvtFToIV : SchedRead;
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+def ReadVFWCvtIToFV : SchedRead;
+def ReadVFWCvtFToIV : SchedRead;
+def ReadVFWCvtFToFV : SchedRead;
+// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+def ReadVFNCvtIToFV : SchedRead;
+def ReadVFNCvtFToIV : SchedRead;
+def ReadVFNCvtFToFV : SchedRead;
+
+// 14. Vector Reduction Operations
+// 14.1. Vector Single-Width Integer Reduction Instructions
+def ReadVIRedV : SchedRead;
+def ReadVIRedV0 : SchedRead;
+// 14.2. Vector Widening Integer Reduction Instructions
+def ReadVIWRedV : SchedRead;
+def ReadVIWRedV0 : SchedRead;
+// 14.3. Vector Single-Width Floating-Point Reduction Instructions
+def ReadVFRedV : SchedRead;
+def ReadVFRedV0 : SchedRead;
+def ReadVFRedOV : SchedRead;
+def ReadVFRedOV0 : SchedRead;
+// 14.4. Vector Widening Floating-Point Reduction Instructions
+def ReadVFWRedV : SchedRead;
+def ReadVFWRedV0 : SchedRead;
+def ReadVFWRedOV : SchedRead;
+def ReadVFWRedOV0 : SchedRead;
+
+// 15. Vector Mask Instructions
+// 15.1. Vector Mask-Register Logical Instructions
+def ReadVMALUV : SchedRead;
+// 15.2. Vector Mask Population Count
+def ReadVMPopV : SchedRead;
+// 15.3. Vector Find-First-Set Mask Bit
+def ReadVMFFSV : SchedRead;
+// 15.4. Vector Set-Before-First Mask Bit
+// 15.5. Vector Set-Including-First Mask Bit
+// 15.6. Vector Set-only-First Mask Bit
+def ReadVMSFSV : SchedRead;
+// 15.8. Vector Iota Instruction
+def ReadVMIotV : SchedRead;
+
+// 16. Vector Permutation Instructions
+// 16.1. Integer Scalar Move Instructions
+def ReadVIMovVX : SchedRead;
+def ReadVIMovXV : SchedRead;
+def ReadVIMovXX : SchedRead;
+// 16.2. Floating-Point Scalar Move Instructions
+def ReadVFMovVF : SchedRead;
+def ReadVFMovFV : SchedRead;
+def ReadVFMovFX : SchedRead;
+// 16.3. Vector Slide Instructions
+def ReadVISlideV : SchedRead;
+def ReadVISlideX : SchedRead;
+def ReadVFSlideV : SchedRead;
+def ReadVFSlideF : SchedRead;
+// 16.4. Vector Register Gather Instructions
+def ReadVGatherV : SchedRead;
+def ReadVGatherX : SchedRead;
+// 16.5. Vector Compress Instruction
+def ReadVCompressV : SchedRead;
+// 16.6. Whole Vector Register Move
+def ReadVMov1V : SchedRead;
+def ReadVMov2V : SchedRead;
+def ReadVMov4V : SchedRead;
+def ReadVMov8V : SchedRead;
+
+// Others
+def ReadVMask : SchedRead;
+
+//===----------------------------------------------------------------------===//
+/// Define default scheduler resources for V.
+
+multiclass UnsupportedSchedV {
+let Unsupported = true in {
+
+// 7. Vector Loads and Stores
+def : WriteRes<WriteVLDE8, []>;
+def : WriteRes<WriteVLDE16, []>;
+def : WriteRes<WriteVLDE32, []>;
+def : WriteRes<WriteVLDE64, []>;
+def : WriteRes<WriteVSTE8, []>;
+def : WriteRes<WriteVSTE16, []>;
+def : WriteRes<WriteVSTE32, []>;
+def : WriteRes<WriteVSTE64, []>;
+def : WriteRes<WriteVLDM, []>;
+def : WriteRes<WriteVSTM, []>;
+def : WriteRes<WriteVLDS8, []>;
+def : WriteRes<WriteVLDS16, []>;
+def : WriteRes<WriteVLDS32, []>;
+def : WriteRes<WriteVLDS64, []>;
+def : WriteRes<WriteVSTS8, []>;
+def : WriteRes<WriteVSTS16, []>;
+def : WriteRes<WriteVSTS32, []>;
+def : WriteRes<WriteVSTS64, []>;
+def : WriteRes<WriteVLDUX8, []>;
+def : WriteRes<WriteVLDUX16, []>;
+def : WriteRes<WriteVLDUX32, []>;
+def : WriteRes<WriteVLDUX64, []>;
+def : WriteRes<WriteVLDOX8, []>;
+def : WriteRes<WriteVLDOX16, []>;
+def : WriteRes<WriteVLDOX32, []>;
+def : WriteRes<WriteVLDOX64, []>;
+def : WriteRes<WriteVSTUX8, []>;
+def : WriteRes<WriteVSTUX16, []>;
+def : WriteRes<WriteVSTUX32, []>;
+def : WriteRes<WriteVSTUX64, []>;
+def : WriteRes<WriteVSTOX8, []>;
+def : WriteRes<WriteVSTOX16, []>;
+def : WriteRes<WriteVSTOX32, []>;
+def : WriteRes<WriteVSTOX64, []>;
+def : WriteRes<WriteVLDFF8, []>;
+def : WriteRes<WriteVLDFF16, []>;
+def : WriteRes<WriteVLDFF32, []>;
+def : WriteRes<WriteVLDFF64, []>;
+def : WriteRes<WriteVLD1R8, []>;
+def : WriteRes<WriteVLD1R16, []>;
+def : WriteRes<WriteVLD1R32, []>;
+def : WriteRes<WriteVLD1R64, []>;
+def : WriteRes<WriteVLD2R8, []>;
+def : WriteRes<WriteVLD2R16, []>;
+def : WriteRes<WriteVLD2R32, []>;
+def : WriteRes<WriteVLD2R64, []>;
+def : WriteRes<WriteVLD4R8, []>;
+def : WriteRes<WriteVLD4R16, []>;
+def : WriteRes<WriteVLD4R32, []>;
+def : WriteRes<WriteVLD4R64, []>;
+def : WriteRes<WriteVLD8R8, []>;
+def : WriteRes<WriteVLD8R16, []>;
+def : WriteRes<WriteVLD8R32, []>;
+def : WriteRes<WriteVLD8R64, []>;
+def : WriteRes<WriteVST1R, []>;
+def : WriteRes<WriteVST2R, []>;
+def : WriteRes<WriteVST4R, []>;
+def : WriteRes<WriteVST8R, []>;
+
+// 12. Vector Integer Arithmetic Instructions
+def : WriteRes<WriteVIALUV, []>;
+def : WriteRes<WriteVIALUX, []>;
+def : WriteRes<WriteVIALUI, []>;
+def : WriteRes<WriteVIWALUV, []>;
+def : WriteRes<WriteVIWALUX, []>;
+def : WriteRes<WriteVIWALUI, []>;
+def : WriteRes<WriteVExtV, []>;
+def : WriteRes<WriteVICALUV, []>;
+def : WriteRes<WriteVICALUX, []>;
+def : WriteRes<WriteVICALUI, []>;
+def : WriteRes<WriteVShiftV, []>;
+def : WriteRes<WriteVShiftX, []>;
+def : WriteRes<WriteVShiftI, []>;
+def : WriteRes<WriteVNShiftV, []>;
+def : WriteRes<WriteVNShiftX, []>;
+def : WriteRes<WriteVNShiftI, []>;
+def : WriteRes<WriteVICmpV, []>;
+def : WriteRes<WriteVICmpX, []>;
+def : WriteRes<WriteVICmpI, []>;
+def : WriteRes<WriteVIMulV, []>;
+def : WriteRes<WriteVIMulX, []>;
+def : WriteRes<WriteVIDivV, []>;
+def : WriteRes<WriteVIDivX, []>;
+def : WriteRes<WriteVIWMulV, []>;
+def : WriteRes<WriteVIWMulX, []>;
+def : WriteRes<WriteVIMulAddV, []>;
+def : WriteRes<WriteVIMulAddX, []>;
+def : WriteRes<WriteVIWMulAddV, []>;
+def : WriteRes<WriteVIWMulAddX, []>;
+def : WriteRes<WriteVIMergeV, []>;
+def : WriteRes<WriteVIMergeX, []>;
+def : WriteRes<WriteVIMergeI, []>;
+def : WriteRes<WriteVIMovV, []>;
+def : WriteRes<WriteVIMovX, []>;
+def : WriteRes<WriteVIMovI, []>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+def : WriteRes<WriteVSALUV, []>;
+def : WriteRes<WriteVSALUX, []>;
+def : WriteRes<WriteVSALUI, []>;
+def : WriteRes<WriteVAALUV, []>;
+def : WriteRes<WriteVAALUX, []>;
+def : WriteRes<WriteVSMulV, []>;
+def : WriteRes<WriteVSMulX, []>;
+def : WriteRes<WriteVSShiftV, []>;
+def : WriteRes<WriteVSShiftX, []>;
+def : WriteRes<WriteVSShiftI, []>;
+def : WriteRes<WriteVNClipV, []>;
+def : WriteRes<WriteVNClipX, []>;
+def : WriteRes<WriteVNClipI, []>;
+
+// 14. Vector Floating-Point Instructions
+def : WriteRes<WriteVFALUV, []>;
+def : WriteRes<WriteVFALUF, []>;
+def : WriteRes<WriteVFWALUV, []>;
+def : WriteRes<WriteVFWALUF, []>;
+def : WriteRes<WriteVFMulV, []>;
+def : WriteRes<WriteVFMulF, []>;
+def : WriteRes<WriteVFDivV, []>;
+def : WriteRes<WriteVFDivF, []>;
+def : WriteRes<WriteVFWMulV, []>;
+def : WriteRes<WriteVFWMulF, []>;
+def : WriteRes<WriteVFMulAddV, []>;
+def : WriteRes<WriteVFMulAddF, []>;
+def : WriteRes<WriteVFWMulAddV, []>;
+def : WriteRes<WriteVFWMulAddF, []>;
+def : WriteRes<WriteVFSqrtV, []>;
+def : WriteRes<WriteVFRecpV, []>;
+def : WriteRes<WriteVFCmpV, []>;
+def : WriteRes<WriteVFCmpF, []>;
+def : WriteRes<WriteVFSgnjV, []>;
+def : WriteRes<WriteVFSgnjF, []>;
+def : WriteRes<WriteVFClassV, []>;
+def : WriteRes<WriteVFMergeV, []>;
+def : WriteRes<WriteVFMovV, []>;
+def : WriteRes<WriteVFCvtIToFV, []>;
+def : WriteRes<WriteVFCvtFToIV, []>;
+def : WriteRes<WriteVFCvtFToFV, []>;
+def : WriteRes<WriteVFWCvtIToFV, []>;
+def : WriteRes<WriteVFWCvtFToIV, []>;
+def : WriteRes<WriteVFWCvtFToFV, []>;
+def : WriteRes<WriteVFNCvtIToFV, []>;
+def : WriteRes<WriteVFNCvtFToIV, []>;
+def : WriteRes<WriteVFNCvtFToFV, []>;
+
+// 15. Vector Reduction Operations
+def : WriteRes<WriteVIRedV, []>;
+def : WriteRes<WriteVIWRedV, []>;
+def : WriteRes<WriteVFRedV, []>;
+def : WriteRes<WriteVFRedOV, []>;
+def : WriteRes<WriteVFWRedV, []>;
+def : WriteRes<WriteVFWRedOV, []>;
+
+// 16. Vector Mask Instructions
+def : WriteRes<WriteVMALUV, []>;
+def : WriteRes<WriteVMPopV, []>;
+def : WriteRes<WriteVMFFSV, []>;
+def : WriteRes<WriteVMSFSV, []>;
+def : WriteRes<WriteVMIotV, []>;
+def : WriteRes<WriteVMIdxV, []>;
+
+// 17. Vector Permutation Instructions
+def : WriteRes<WriteVIMovVX, []>;
+def : WriteRes<WriteVIMovXV, []>;
+def : WriteRes<WriteVFMovVF, []>;
+def : WriteRes<WriteVFMovFV, []>;
+def : WriteRes<WriteVISlideX, []>;
+def : WriteRes<WriteVISlideI, []>;
+def : WriteRes<WriteVISlide1X, []>;
+def : WriteRes<WriteVFSlide1F, []>;
+def : WriteRes<WriteVGatherV, []>;
+def : WriteRes<WriteVGatherX, []>;
+def : WriteRes<WriteVGatherI, []>;
+def : WriteRes<WriteVCompressV, []>;
+def : WriteRes<WriteVMov1V, []>;
+def : WriteRes<WriteVMov2V, []>;
+def : WriteRes<WriteVMov4V, []>;
+def : WriteRes<WriteVMov8V, []>;
+
+// 7. Vector Loads and Stores
+def : ReadAdvance<ReadVLDX, 0>;
+def : ReadAdvance<ReadVSTX, 0>;
+def : ReadAdvance<ReadVSTE8V, 0>;
+def : ReadAdvance<ReadVSTE16V, 0>;
+def : ReadAdvance<ReadVSTE32V, 0>;
+def : ReadAdvance<ReadVSTE64V, 0>;
+def : ReadAdvance<ReadVSTM, 0>;
+def : ReadAdvance<ReadVLDSX, 0>;
+def : ReadAdvance<ReadVSTSX, 0>;
+def : ReadAdvance<ReadVSTS8V, 0>;
+def : ReadAdvance<ReadVSTS16V, 0>;
+def : ReadAdvance<ReadVSTS32V, 0>;
+def : ReadAdvance<ReadVSTS64V, 0>;
+def : ReadAdvance<ReadVLDUXV, 0>;
+def : ReadAdvance<ReadVLDOXV, 0>;
+def : ReadAdvance<ReadVSTUXV, 0>;
+def : ReadAdvance<ReadVSTUX8, 0>;
+def : ReadAdvance<ReadVSTUX16, 0>;
+def : ReadAdvance<ReadVSTUX32, 0>;
+def : ReadAdvance<ReadVSTUX64, 0>;
+def : ReadAdvance<ReadVSTUX8V, 0>;
+def : ReadAdvance<ReadVSTUX16V, 0>;
+def : ReadAdvance<ReadVSTUX32V, 0>;
+def : ReadAdvance<ReadVSTUX64V, 0>;
+def : ReadAdvance<ReadVSTOX8, 0>;
+def : ReadAdvance<ReadVSTOX16, 0>;
+def : ReadAdvance<ReadVSTOX32, 0>;
+def : ReadAdvance<ReadVSTOX64, 0>;
+def : ReadAdvance<ReadVSTOXV, 0>;
+def : ReadAdvance<ReadVSTOX8V, 0>;
+def : ReadAdvance<ReadVSTOX16V, 0>;
+def : ReadAdvance<ReadVSTOX32V, 0>;
+def : ReadAdvance<ReadVSTOX64V, 0>;
+def : ReadAdvance<ReadVST1R, 0>;
+def : ReadAdvance<ReadVST2R, 0>;
+def : ReadAdvance<ReadVST4R, 0>;
+def : ReadAdvance<ReadVST8R, 0>;
+
+// 12. Vector Integer Arithmetic Instructions
+def : ReadAdvance<ReadVIALUV, 0>;
+def : ReadAdvance<ReadVIALUX, 0>;
+def : ReadAdvance<ReadVIWALUV, 0>;
+def : ReadAdvance<ReadVIWALUX, 0>;
+def : ReadAdvance<ReadVExtV, 0>;
+def : ReadAdvance<ReadVIALUCV, 0>;
+def : ReadAdvance<ReadVIALUCX, 0>;
+def : ReadAdvance<ReadVShiftV, 0>;
+def : ReadAdvance<ReadVShiftX, 0>;
+def : ReadAdvance<ReadVNShiftV, 0>;
+def : ReadAdvance<ReadVNShiftX, 0>;
+def : ReadAdvance<ReadVICmpV, 0>;
+def : ReadAdvance<ReadVICmpX, 0>;
+def : ReadAdvance<ReadVIMulV, 0>;
+def : ReadAdvance<ReadVIMulX, 0>;
+def : ReadAdvance<ReadVIDivV, 0>;
+def : ReadAdvance<ReadVIDivX, 0>;
+def : ReadAdvance<ReadVIWMulV, 0>;
+def : ReadAdvance<ReadVIWMulX, 0>;
+def : ReadAdvance<ReadVIMulAddV, 0>;
+def : ReadAdvance<ReadVIMulAddX, 0>;
+def : ReadAdvance<ReadVIWMulAddV, 0>;
+def : ReadAdvance<ReadVIWMulAddX, 0>;
+def : ReadAdvance<ReadVIMergeV, 0>;
+def : ReadAdvance<ReadVIMergeX, 0>;
+def : ReadAdvance<ReadVIMovV, 0>;
+def : ReadAdvance<ReadVIMovX, 0>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+def : ReadAdvance<ReadVSALUV, 0>;
+def : ReadAdvance<ReadVSALUX, 0>;
+def : ReadAdvance<ReadVAALUV, 0>;
+def : ReadAdvance<ReadVAALUX, 0>;
+def : ReadAdvance<ReadVSMulV, 0>;
+def : ReadAdvance<ReadVSMulX, 0>;
+def : ReadAdvance<ReadVSShiftV, 0>;
+def : ReadAdvance<ReadVSShiftX, 0>;
+def : ReadAdvance<ReadVNClipV, 0>;
+def : ReadAdvance<ReadVNClipX, 0>;
+
+// 14. Vector Floating-Point Instructions
+def : ReadAdvance<ReadVFALUV, 0>;
+def : ReadAdvance<ReadVFALUF, 0>;
+def : ReadAdvance<ReadVFWALUV, 0>;
+def : ReadAdvance<ReadVFWALUF, 0>;
+def : ReadAdvance<ReadVFMulV, 0>;
+def : ReadAdvance<ReadVFMulF, 0>;
+def : ReadAdvance<ReadVFDivV, 0>;
+def : ReadAdvance<ReadVFDivF, 0>;
+def : ReadAdvance<ReadVFWMulV, 0>;
+def : ReadAdvance<ReadVFWMulF, 0>;
+def : ReadAdvance<ReadVFMulAddV, 0>;
+def : ReadAdvance<ReadVFMulAddF, 0>;
+def : ReadAdvance<ReadVFWMulAddV, 0>;
+def : ReadAdvance<ReadVFWMulAddF, 0>;
+def : ReadAdvance<ReadVFSqrtV, 0>;
+def : ReadAdvance<ReadVFRecpV, 0>;
+def : ReadAdvance<ReadVFCmpV, 0>;
+def : ReadAdvance<ReadVFCmpF, 0>;
+def : ReadAdvance<ReadVFSgnjV, 0>;
+def : ReadAdvance<ReadVFSgnjF, 0>;
+def : ReadAdvance<ReadVFClassV, 0>;
+def : ReadAdvance<ReadVFMergeV, 0>;
+def : ReadAdvance<ReadVFMergeF, 0>;
+def : ReadAdvance<ReadVFMovF, 0>;
+def : ReadAdvance<ReadVFCvtIToFV, 0>;
+def : ReadAdvance<ReadVFCvtFToIV, 0>;
+def : ReadAdvance<ReadVFWCvtIToFV, 0>;
+def : ReadAdvance<ReadVFWCvtFToIV, 0>;
+def : ReadAdvance<ReadVFWCvtFToFV, 0>;
+def : ReadAdvance<ReadVFNCvtIToFV, 0>;
+def : ReadAdvance<ReadVFNCvtFToIV, 0>;
+def : ReadAdvance<ReadVFNCvtFToFV, 0>;
+
+// 15. Vector Reduction Operations
+def : ReadAdvance<ReadVIRedV, 0>;
+def : ReadAdvance<ReadVIRedV0, 0>;
+def : ReadAdvance<ReadVIWRedV, 0>;
+def : ReadAdvance<ReadVIWRedV0, 0>;
+def : ReadAdvance<ReadVFRedV, 0>;
+def : ReadAdvance<ReadVFRedV0, 0>;
+def : ReadAdvance<ReadVFRedOV, 0>;
+def : ReadAdvance<ReadVFRedOV0, 0>;
+def : ReadAdvance<ReadVFWRedV, 0>;
+def : ReadAdvance<ReadVFWRedV0, 0>;
+def : ReadAdvance<ReadVFWRedOV, 0>;
+def : ReadAdvance<ReadVFWRedOV0, 0>;
+
+// 16. Vector Mask Instructions
+def : ReadAdvance<ReadVMALUV, 0>;
+def : ReadAdvance<ReadVMPopV, 0>;
+def : ReadAdvance<ReadVMFFSV, 0>;
+def : ReadAdvance<ReadVMSFSV, 0>;
+def : ReadAdvance<ReadVMIotV, 0>;
+
+// 17. Vector Permutation Instructions
+def : ReadAdvance<ReadVIMovVX, 0>;
+def : ReadAdvance<ReadVIMovXV, 0>;
+def : ReadAdvance<ReadVIMovXX, 0>;
+def : ReadAdvance<ReadVFMovVF, 0>;
+def : ReadAdvance<ReadVFMovFV, 0>;
+def : ReadAdvance<ReadVFMovFX, 0>;
+def : ReadAdvance<ReadVISlideV, 0>;
+def : ReadAdvance<ReadVISlideX, 0>;
+def : ReadAdvance<ReadVFSlideV, 0>;
+def : ReadAdvance<ReadVFSlideF, 0>;
+def : ReadAdvance<ReadVGatherV, 0>;
+def : ReadAdvance<ReadVGatherX, 0>;
+def : ReadAdvance<ReadVCompressV, 0>;
+def : ReadAdvance<ReadVMov1V, 0>;
+def : ReadAdvance<ReadVMov2V, 0>;
+def : ReadAdvance<ReadVMov4V, 0>;
+def : ReadAdvance<ReadVMov8V, 0>;
+
+// Others
+def : ReadAdvance<ReadVMask, 0>;
+
+} // Unsupported
+} // UnsupportedSchedV
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index b19fdcb0082b..1063134b8a6c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -17,7 +17,7 @@
#include "RISCVLegalizerInfo.h"
#include "RISCVRegisterBankInfo.h"
#include "RISCVTargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -45,6 +45,11 @@ static cl::opt<unsigned> RVVVectorLMULMax(
"Fractional LMUL values are not supported."),
cl::init(8), cl::Hidden);
+static cl::opt<unsigned> RVVVectorELENMax(
+ "riscv-v-fixed-length-vector-elen-max",
+ cl::desc("The maximum ELEN value to use for fixed length vectors."),
+ cl::init(64), cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -106,7 +111,8 @@ const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
}
unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
- assert(hasStdExtV() && "Tried to get vector length without V support!");
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
if (RVVVectorBitsMax == 0)
return 0;
assert(RVVVectorBitsMax >= 128 && RVVVectorBitsMax <= 65536 &&
@@ -121,8 +127,8 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
}
unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
- assert(hasStdExtV() &&
- "Tried to get vector length without V extension support!");
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
assert((RVVVectorBitsMin == 0 ||
(RVVVectorBitsMin >= 128 && RVVVectorBitsMax <= 65536 &&
isPowerOf2_32(RVVVectorBitsMin))) &&
@@ -138,13 +144,24 @@ unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
}
unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
- assert(hasStdExtV() &&
- "Tried to get maximum LMUL without V extension support!");
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
assert(RVVVectorLMULMax <= 8 && isPowerOf2_32(RVVVectorLMULMax) &&
"V extension requires a LMUL to be at most 8 and a power of 2!");
- return PowerOf2Floor(std::max<unsigned>(RVVVectorLMULMax, 1));
+ return PowerOf2Floor(
+ std::max<unsigned>(std::min<unsigned>(RVVVectorLMULMax, 8), 1));
+}
+
+unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
+ assert(hasVInstructions() &&
+ "Tried to get maximum ELEN without Zve or V extension support!");
+ assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
+ isPowerOf2_32(RVVVectorELENMax) &&
+ "V extension requires a ELEN to be a power of 2 between 8 and 64!");
+ return PowerOf2Floor(
+ std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, 64), 8));
}
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
- return hasStdExtV() && getMinRVVVectorSizeInBits() != 0;
+ return hasVInstructions() && getMinRVVVectorSizeInBits() != 0;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index ce36331e044d..deb2a11f98f1 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -39,7 +39,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtF = false;
bool HasStdExtD = false;
bool HasStdExtC = false;
- bool HasStdExtB = false;
bool HasStdExtZba = false;
bool HasStdExtZbb = false;
bool HasStdExtZbc = false;
@@ -50,10 +49,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtZbr = false;
bool HasStdExtZbs = false;
bool HasStdExtZbt = false;
- bool HasStdExtZbproposedc = false;
bool HasStdExtV = false;
bool HasStdExtZvlsseg = false;
bool HasStdExtZvamo = false;
+ bool HasStdExtZfhmin = false;
bool HasStdExtZfh = false;
bool HasRV64 = false;
bool IsRV32E = false;
@@ -107,7 +106,6 @@ public:
bool hasStdExtF() const { return HasStdExtF; }
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
- bool hasStdExtB() const { return HasStdExtB; }
bool hasStdExtZba() const { return HasStdExtZba; }
bool hasStdExtZbb() const { return HasStdExtZbb; }
bool hasStdExtZbc() const { return HasStdExtZbc; }
@@ -118,10 +116,10 @@ public:
bool hasStdExtZbr() const { return HasStdExtZbr; }
bool hasStdExtZbs() const { return HasStdExtZbs; }
bool hasStdExtZbt() const { return HasStdExtZbt; }
- bool hasStdExtZbproposedc() const { return HasStdExtZbproposedc; }
bool hasStdExtV() const { return HasStdExtV; }
bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
bool hasStdExtZvamo() const { return HasStdExtZvamo; }
+ bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
bool hasStdExtZfh() const { return HasStdExtZfh; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
@@ -135,8 +133,17 @@ public:
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
return UserReservedRegister[i];
}
+
+ // Vector codegen related methods.
+ bool hasVInstructions() const { return HasStdExtV; }
+ bool hasVInstructionsI64() const { return HasStdExtV; }
+ bool hasVInstructionsF16() const { return HasStdExtV && hasStdExtZfh(); }
+ bool hasVInstructionsF32() const { return HasStdExtV && hasStdExtF(); }
+ bool hasVInstructionsF64() const { return HasStdExtV && hasStdExtD(); }
+ // F16 and F64 both require F32.
+ bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
unsigned getMaxInterleaveFactor() const {
- return hasStdExtV() ? MaxInterleaveFactor : 1;
+ return hasVInstructions() ? MaxInterleaveFactor : 1;
}
protected:
@@ -158,6 +165,7 @@ public:
unsigned getMaxRVVVectorSizeInBits() const;
unsigned getMinRVVVectorSizeInBits() const;
unsigned getMaxLMULForFixedLengthVectors() const;
+ unsigned getMaxELENForFixedLengthVectors() const;
bool useRVVForFixedLengthVectors() const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index a561772b650b..41599dd8bb3f 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -385,6 +385,7 @@ def : SysReg<"dscratch1", 0x7B3>;
def : SysReg<"vstart", 0x008>;
def : SysReg<"vxsat", 0x009>;
def : SysReg<"vxrm", 0x00A>;
+def : SysReg<"vcsr", 0x00F>;
def : SysReg<"vl", 0xC20>;
def : SysReg<"vtype", 0xC21>;
def : SysReg<"vlenb", 0xC22>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b18ee6009217..b421eba8d442 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,8 +27,8 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -37,6 +37,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
@@ -149,6 +150,9 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
+
+ addPass(createRISCVGatherScatterLoweringPass());
+
TargetPassConfig::addIRPasses();
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fd110db1064b..56f0952fafc9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -52,8 +52,15 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
// split up large offsets in GEP into better parts than ConstantHoisting
// can.
return TTI::TCC_Free;
- case Instruction::Add:
case Instruction::And:
+ // zext.h
+ if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
+ return TTI::TCC_Free;
+ // zext.w
+ if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb())
+ return TTI::TCC_Free;
+ LLVM_FALLTHROUGH;
+ case Instruction::Add:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Mul:
@@ -125,7 +132,7 @@ Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
// know whether the LoopVectorizer is safe to do or not.
// We only consider to use single vector register (LMUL = 1) to vectorize.
unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits();
- if (ST->hasStdExtV() && MaxVectorSizeInBits != 0)
+ if (ST->hasVInstructions() && MaxVectorSizeInBits != 0)
return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
return BaseT::getMaxVScale();
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 95dacb1e6285..675681616d6e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -18,6 +18,7 @@
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Function.h"
@@ -54,7 +55,7 @@ public:
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
bool shouldExpandReduction(const IntrinsicInst *II) const;
- bool supportsScalableVectors() const { return ST->hasStdExtV(); }
+ bool supportsScalableVectors() const { return ST->hasVInstructions(); }
Optional<unsigned> getMaxVScale() const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
@@ -63,52 +64,44 @@ public:
return TypeSize::getFixed(ST->getXLen());
case TargetTransformInfo::RGK_FixedWidthVector:
return TypeSize::getFixed(
- ST->hasStdExtV() ? ST->getMinRVVVectorSizeInBits() : 0);
+ ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0);
case TargetTransformInfo::RGK_ScalableVector:
return TypeSize::getScalable(
- ST->hasStdExtV() ? ST->getMinRVVVectorSizeInBits() : 0);
+ ST->hasVInstructions() ? RISCV::RVVBitsPerBlock : 0);
}
llvm_unreachable("Unsupported register kind");
}
+ unsigned getMinVectorRegisterBitWidth() const {
+ return ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0;
+ }
+
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I);
- bool isLegalElementTypeForRVV(Type *ScalarTy) const {
- if (ScalarTy->isPointerTy())
- return true;
-
- if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
- ScalarTy->isIntegerTy(32) || ScalarTy->isIntegerTy(64))
- return true;
-
- if (ScalarTy->isHalfTy())
- return ST->hasStdExtZfh();
- if (ScalarTy->isFloatTy())
- return ST->hasStdExtF();
- if (ScalarTy->isDoubleTy())
- return ST->hasStdExtD();
-
- return false;
- }
-
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
- if (!ST->hasStdExtV())
+ if (!ST->hasVInstructions())
return false;
// Only support fixed vectors if we know the minimum vector size.
if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
return false;
+ // Don't allow elements larger than the ELEN.
+ // FIXME: How to limit for scalable vectors?
+ if (isa<FixedVectorType>(DataType) &&
+ DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+ return false;
+
if (Alignment <
DL.getTypeStoreSize(DataType->getScalarType()).getFixedSize())
return false;
- return isLegalElementTypeForRVV(DataType->getScalarType());
+ return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -119,18 +112,24 @@ public:
}
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) {
- if (!ST->hasStdExtV())
+ if (!ST->hasVInstructions())
return false;
// Only support fixed vectors if we know the minimum vector size.
if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
return false;
+ // Don't allow elements larger than the ELEN.
+ // FIXME: How to limit for scalable vectors?
+ if (isa<FixedVectorType>(DataType) &&
+ DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+ return false;
+
if (Alignment <
DL.getTypeStoreSize(DataType->getScalarType()).getFixedSize())
return false;
- return isLegalElementTypeForRVV(DataType->getScalarType());
+ return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) {
@@ -150,14 +149,14 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const {
- if (!ST->hasStdExtV())
+ if (!ST->hasVInstructions())
return false;
if (!VF.isScalable())
return true;
Type *Ty = RdxDesc.getRecurrenceType();
- if (!isLegalElementTypeForRVV(Ty))
+ if (!TLI->isLegalElementTypeForRVV(Ty))
return false;
switch (RdxDesc.getRecurrenceKind()) {
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index 4f265d556380..27d1326d5f6c 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/RISCVTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheRISCV32Target() {
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 9a2df8ca7fe9..48e6903bd1b1 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -25,10 +25,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 5c4419c108c0..142124a8e0d9 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -17,7 +17,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 51eccfa52359..e950f9582f09 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -15,8 +15,8 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -274,7 +274,8 @@ namespace {
llvm_unreachable("relaxInstruction() unimplemented");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
// Cannot emit NOP with size not multiple of 32 bits.
if (Count % 4 != 0)
return false;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 9531e3105fe2..49b75b7e0bd1 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 2006c9bede34..f6f9c0a1de81 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -29,7 +29,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 2007303d9903..ed1faf6b1fe8 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1614,11 +1614,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::MULO_I64, nullptr);
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
}
+ setLibcallName(RTLIB::MULO_I128, nullptr);
+
if (!Subtarget->isV9()) {
// SparcV8 does not have FNEGD and FABSD.
setOperationAction(ISD::FNEG, MVT::f64, Custom);
@@ -2957,8 +2960,15 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
SDValue ShiftAmt = DAG.getConstant(63, dl, VT);
SDValue RHS = Op.getOperand(1);
- SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
- SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
+ SDValue HiLHS, HiRHS;
+ if (isSigned) {
+ HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
+ HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, MVT::i64);
+ }
+
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
TargetLowering::MakeLibCallOptions CallOptions;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index dc3a41c63098..a8a0b2cc9e67 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index b161e2a9d087..5e305fc9df71 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -369,8 +369,7 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
// TODO: Instructions of the LoadASI class are currently asm only; hooking up
// CodeGen's address spaces to use these is a future task.
-class LoadASI<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
- RegisterClass RC, ValueType Ty, InstrItinClass itin = NoItinerary> :
+class LoadASI<string OpcStr, bits<6> Op3Val, RegisterClass RC> :
F3_1_asi<3, Op3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
!strconcat(OpcStr, "a [$addr] $asi, $dst"),
[]>;
@@ -380,7 +379,7 @@ multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
InstrItinClass itin = NoItinerary> :
Load<OpcStr, Op3Val, OpNode, RC, Ty, itin> {
- def Arr : LoadASI<OpcStr, LoadAOp3Val, OpNode, RC, Ty>;
+ def Arr : LoadASI<OpcStr, LoadAOp3Val, RC>;
}
// The LDSTUB instruction is supported for asm only.
@@ -411,8 +410,7 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
// TODO: Instructions of the StoreASI class are currently asm only; hooking up
// CodeGen's address spaces to use these is a future task.
-class StoreASI<string OpcStr, bits<6> Op3Val,
- SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
+class StoreASI<string OpcStr, bits<6> Op3Val, RegisterClass RC,
InstrItinClass itin = IIC_st> :
F3_1_asi<3, Op3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
!strconcat(OpcStr, "a $rd, [$addr] $asi"),
@@ -420,10 +418,9 @@ class StoreASI<string OpcStr, bits<6> Op3Val,
itin>;
multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
- SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
- InstrItinClass itin = IIC_st> :
+ SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
Store<OpcStr, Op3Val, OpNode, RC, Ty> {
- def Arr : StoreASI<OpcStr, StoreAOp3Val, OpNode, RC, Ty, itin>;
+ def Arr : StoreASI<OpcStr, StoreAOp3Val, RC>;
}
//===----------------------------------------------------------------------===//
@@ -523,12 +520,12 @@ let DecoderMethod = "DecodeLoadIntPair" in
// Section B.2 - Load Floating-point Instructions, p. 92
let DecoderMethod = "DecodeLoadFP" in {
defm LDF : Load<"ld", 0b100000, load, FPRegs, f32, IIC_iu_or_fpu_instr>;
- def LDFArr : LoadASI<"ld", 0b110000, load, FPRegs, f32, IIC_iu_or_fpu_instr>,
+ def LDFArr : LoadASI<"ld", 0b110000, FPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeLoadDFP" in {
defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64, IIC_ldd>;
- def LDDFArr : LoadASI<"ldd", 0b110011, load, DFPRegs, f64>,
+ def LDDFArr : LoadASI<"ldd", 0b110011, DFPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeLoadQFP" in
@@ -573,17 +570,17 @@ let DecoderMethod = "DecodeStoreInt" in {
}
let DecoderMethod = "DecodeStoreIntPair" in
- defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32, IIC_std>;
+ defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
// Section B.5 - Store Floating-point Instructions, p. 97
let DecoderMethod = "DecodeStoreFP" in {
defm STF : Store<"st", 0b100100, store, FPRegs, f32>;
- def STFArr : StoreASI<"st", 0b110100, store, FPRegs, f32>,
+ def STFArr : StoreASI<"st", 0b110100, FPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeStoreDFP" in {
defm STDF : Store<"std", 0b100111, store, DFPRegs, f64, IIC_std>;
- def STDFArr : StoreASI<"std", 0b110111, store, DFPRegs, f64>,
+ def STDFArr : StoreASI<"std", 0b110111, DFPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeStoreQFP" in
@@ -1623,6 +1620,17 @@ let hasSideEffects = 1 in {
}
}
+// Section A.42 - Prefetch Data
+let Predicates = [HasV9] in {
+ def PREFETCHr : F3_1<3, 0b101101,
+ (outs), (ins MEMrr:$addr, shift_imm5:$rd),
+ "prefetch [$addr], $rd", []>;
+ def PREFETCHi : F3_2<3, 0b101101,
+ (outs), (ins MEMri:$addr, shift_imm5:$rd),
+ "prefetch [$addr], $rd", []>;
+}
+
+
// Section A.43 - Read Privileged Register Instructions
let Predicates = [HasV9] in {
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index abc47ef51563..618a8633f0a9 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -12,8 +12,8 @@
#include "SparcSubtarget.h"
#include "Sparc.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 083339bc157c..27c49a408a02 100644
--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTarget() {
diff --git a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 3bf5907012da..1138788ac7fa 100644
--- a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/SparcTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheSparcTarget() {
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 0de24245cfcc..40ed417d0817 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/SystemZInstPrinter.h"
#include "MCTargetDesc/SystemZMCAsmInfo.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZTargetStreamer.h"
#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,10 +26,10 @@
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -39,13 +40,15 @@
using namespace llvm;
-// Return true if Expr is in the range [MinValue, MaxValue].
-static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
+// Return true if Expr is in the range [MinValue, MaxValue]. If AllowSymbol
+// is true any MCExpr is accepted (address displacement).
+static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue,
+ bool AllowSymbol = false) {
if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
int64_t Value = CE->getValue();
return Value >= MinValue && Value <= MaxValue;
}
- return false;
+ return AllowSymbol;
}
namespace {
@@ -264,10 +267,10 @@ public:
return isMem(MemKind) && Mem.RegKind == RegKind;
}
bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const {
- return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff);
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff, true);
}
bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const {
- return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287, true);
}
bool isMemDisp12Len4(RegisterKind RegKind) const {
return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x10);
@@ -405,6 +408,13 @@ private:
SMLoc StartLoc, EndLoc;
};
+ SystemZTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<SystemZTargetStreamer &>(TS);
+ }
+
bool parseRegister(Register &Reg, bool RestoreOnFailure = false);
bool parseIntegerRegister(Register &Reg, RegisterGroup Group);
@@ -420,6 +430,7 @@ private:
bool parseAddressRegister(Register &Reg);
bool ParseDirectiveInsn(SMLoc L);
+ bool ParseDirectiveMachine(SMLoc L);
OperandMatchResultTy parseAddress(OperandVector &Operands,
MemoryKind MemKind,
@@ -1210,6 +1221,8 @@ bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".insn")
return ParseDirectiveInsn(DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
return true;
}
@@ -1322,6 +1335,28 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
return false;
}
+/// ParseDirectiveMachine
+/// ::= .machine [ mcpu ]
+bool SystemZAsmParser::ParseDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (Parser.getTok().isNot(AsmToken::Identifier) &&
+ Parser.getTok().isNot(AsmToken::String))
+ return Error(L, "unexpected token in '.machine' directive");
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+ if (parseToken(AsmToken::EndOfStatement))
+ return addErrorSuffix(" in '.machine' directive");
+
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ getTargetStreamer().emitMachine(CPU);
+
+ return false;
+}
+
bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc, bool RestoreOnFailure) {
Register Reg;
@@ -1486,10 +1521,6 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
return false;
}
-static std::string SystemZMnemonicSpellCheck(StringRef S,
- const FeatureBitset &FBS,
- unsigned VariantID = 0);
-
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index e81db1030c01..5eba150dadc3 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -13,8 +13,8 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index f3f3f096da33..0cb6bfaaebfb 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -24,9 +24,9 @@ using namespace llvm;
#include "SystemZGenAsmWriter.inc"
void SystemZInstPrinter::printAddress(const MCAsmInfo *MAI, unsigned Base,
- int64_t Disp, unsigned Index,
+ const MCOperand &DispMO, unsigned Index,
raw_ostream &O) {
- O << Disp;
+ printOperand(DispMO, MAI, O);
if (Base || Index) {
O << '(';
if (Index) {
@@ -194,23 +194,23 @@ void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printAddress(&MAI, MI->getOperand(OpNum).getReg(),
- MI->getOperand(OpNum + 1).getImm(), 0, O);
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
+ 0, O);
}
void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printAddress(&MAI, MI->getOperand(OpNum).getReg(),
- MI->getOperand(OpNum + 1).getImm(),
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
MI->getOperand(OpNum + 2).getReg(), O);
}
void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
unsigned Base = MI->getOperand(OpNum).getReg();
- uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ const MCOperand &DispMO = MI->getOperand(OpNum + 1);
uint64_t Length = MI->getOperand(OpNum + 2).getImm();
- O << Disp << '(' << Length;
+ printOperand(DispMO, &MAI, O);
+ O << '(' << Length;
if (Base) {
O << ",";
printRegName(O, Base);
@@ -221,9 +221,10 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
unsigned Base = MI->getOperand(OpNum).getReg();
- uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ const MCOperand &DispMO = MI->getOperand(OpNum + 1);
unsigned Length = MI->getOperand(OpNum + 2).getReg();
- O << Disp << "(";
+ printOperand(DispMO, &MAI, O);
+ O << "(";
printRegName(O, Length);
if (Base) {
O << ",";
@@ -234,8 +235,7 @@ void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printAddress(&MAI, MI->getOperand(OpNum).getReg(),
- MI->getOperand(OpNum + 1).getImm(),
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
MI->getOperand(OpNum + 2).getReg(), O);
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 0a57ca0082e6..008bf747e5a1 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -33,8 +33,9 @@ public:
static const char *getRegisterName(unsigned RegNo);
// Print an address with the given base, displacement and index.
- static void printAddress(const MCAsmInfo *MAI, unsigned Base, int64_t Disp,
- unsigned Index, raw_ostream &O);
+ static void printAddress(const MCAsmInfo *MAI, unsigned Base,
+ const MCOperand &DispMO, unsigned Index,
+ raw_ostream &O);
// Print the given operand.
static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 134c85e822be..0f5e0b9672a9 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -10,6 +10,8 @@
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
@@ -21,7 +23,8 @@ using namespace llvm;
// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot].
// Return the bits that should be installed in a relocation field for
// fixup kind Kind.
-static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
+static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value,
+ const MCFixup &Fixup, MCContext &Ctx) {
if (Kind < FirstTargetFixupKind)
return Value;
@@ -32,6 +35,24 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
case SystemZ::FK_390_PC32DBL:
return (int64_t)Value / 2;
+ case SystemZ::FK_390_12:
+ if (!isUInt<12>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "displacement exceeds uint12");
+ return 0;
+ }
+ return Value;
+
+ case SystemZ::FK_390_20: {
+ if (!isInt<20>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "displacement exceeds int20");
+ return 0;
+ }
+ // The high byte of a 20 bit displacement value comes first.
+ uint64_t DLo = Value & 0xfff;
+ uint64_t DHi = (Value >> 12) & 0xff;
+ return (DLo << 8) | DHi;
+ }
+
case SystemZ::FK_390_TLS_CALL:
return 0;
}
@@ -63,7 +84,8 @@ public:
const MCAsmLayout &Layout) const override {
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createSystemZObjectWriter(OSABI);
@@ -94,7 +116,9 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{ "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_390_PC24DBL", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_TLS_CALL", 0, 0, 0 }
+ { "FK_390_TLS_CALL", 0, 0, 0 },
+ { "FK_390_12", 4, 12, 0 },
+ { "FK_390_20", 4, 20, 0 }
};
// Fixup kinds from .reloc directive are like R_390_NONE. They
@@ -132,7 +156,7 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
assert(Offset + Size <= Data.size() && "Invalid fixup offset!");
// Big-endian insertion of Size bytes.
- Value = extractBitsForFixup(Kind, Value);
+ Value = extractBitsForFixup(Kind, Value, Fixup, Asm.getContext());
if (BitSize < 64)
Value &= ((uint64_t)1 << BitSize) - 1;
unsigned ShiftValue = (Size * 8) - 8;
@@ -142,7 +166,8 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
}
}
-bool SystemZMCAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool SystemZMCAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
for (uint64_t I = 0; I != Count; ++I)
OS << '\x7';
return true;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index fa4864299586..e61b07e973e9 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -12,37 +12,39 @@
using namespace llvm;
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
- CodePointerSize = 8;
+SystemZMCAsmInfoELF::SystemZMCAsmInfoELF(const Triple &TT) {
+ AssemblerDialect = AD_ATT;
CalleeSaveStackSlotSize = 8;
+ CodePointerSize = 8;
+ Data64bitsDirective = "\t.quad\t";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
IsLittleEndian = false;
-
- AssemblerDialect = TT.isOSzOS() ? AD_HLASM : AD_ATT;
-
MaxInstLength = 6;
-
- CommentString = AssemblerDialect == AD_HLASM ? "*" : "#";
- RestrictCommentStringToStartOfStatement = (AssemblerDialect == AD_HLASM);
- AllowAdditionalComments = (AssemblerDialect == AD_ATT);
- AllowAtAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
- AllowDollarAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
- AllowHashAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
- DotIsPC = (AssemblerDialect == AD_ATT);
- StarIsPC = (AssemblerDialect == AD_HLASM);
- EmitGNUAsmStartIndentationMarker = (AssemblerDialect == AD_ATT);
- AllowAtInName = (AssemblerDialect == AD_HLASM);
- EmitLabelsInUpperCase = (AssemblerDialect == AD_HLASM);
-
- ZeroDirective = "\t.space\t";
- Data64bitsDirective = "\t.quad\t";
- UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::DwarfCFI;
+ UsesELFSectionDirectiveForBSS = true;
+ ZeroDirective = "\t.space\t";
}
-bool SystemZMCAsmInfo::isAcceptableChar(char C) const {
- if (AssemblerDialect == AD_ATT)
- return MCAsmInfo::isAcceptableChar(C);
+SystemZMCAsmInfoGOFF::SystemZMCAsmInfoGOFF(const Triple &TT) {
+ AllowAdditionalComments = false;
+ AllowAtInName = true;
+ AllowAtAtStartOfIdentifier = true;
+ AllowDollarAtStartOfIdentifier = true;
+ AllowHashAtStartOfIdentifier = true;
+ AssemblerDialect = AD_HLASM;
+ CalleeSaveStackSlotSize = 8;
+ CodePointerSize = 8;
+ CommentString = "*";
+ DotIsPC = false;
+ EmitGNUAsmStartIndentationMarker = false;
+ EmitLabelsInUpperCase = true;
+ IsLittleEndian = false;
+ MaxInstLength = 6;
+ RestrictCommentStringToStartOfStatement = true;
+ StarIsPC = true;
+ SupportsDebugInformation = true;
+}
+bool SystemZMCAsmInfoGOFF::isAcceptableChar(char C) const {
return MCAsmInfo::isAcceptableChar(C) || C == '#';
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 389575d14679..b2f191424d01 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -10,15 +10,21 @@
#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoGOFF.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
class Triple;
enum SystemZAsmDialect { AD_ATT = 0, AD_HLASM = 1 };
-class SystemZMCAsmInfo : public MCAsmInfoELF {
+class SystemZMCAsmInfoELF : public MCAsmInfoELF {
public:
- explicit SystemZMCAsmInfo(const Triple &TT);
+ explicit SystemZMCAsmInfoELF(const Triple &TT);
+};
+
+class SystemZMCAsmInfoGOFF : public MCAsmInfoGOFF {
+public:
+ explicit SystemZMCAsmInfoGOFF(const Triple &TT);
bool isAcceptableChar(char C) const override;
};
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index a5ccf4f68ffd..e280e4aaf3d8 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -12,6 +12,7 @@
#include "MCTargetDesc/SystemZMCFixups.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -60,6 +61,12 @@ private:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // Return the displacement value for the OpNum operand. If it is a symbol,
+ // add a fixup for it and return 0.
+ uint64_t getDispOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ SystemZ::FixupKind Kind) const;
+
// Called by the TableGen code to get the binary encoding of an address.
// The index or length, if any, is encoded first, followed by the base,
// followed by the displacement. In a 20-bit displacement,
@@ -180,11 +187,29 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
}
uint64_t SystemZMCCodeEmitter::
+getDispOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ SystemZ::FixupKind Kind) const {
+ const MCOperand &MO = MI.getOperand(OpNum);
+ if (MO.isImm())
+ return static_cast<uint64_t>(MO.getImm());
+ if (MO.isExpr()) {
+ // All instructions follow the pattern where the first displacement has a
+ // 2 bytes offset, and the second one 4 bytes.
+ unsigned ByteOffs = Fixups.size() == 0 ? 2 : 4;
+ Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind));
+ assert(Fixups.size() <= 2 && "More than two memory operands in MI?");
+ return 0;
+ }
+ llvm_unreachable("Unexpected operand type!");
+}
+
+uint64_t SystemZMCCodeEmitter::
getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
assert(isUInt<4>(Base) && isUInt<12>(Disp));
return (Base << 12) | Disp;
}
@@ -194,7 +219,7 @@ getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_20);
assert(isUInt<4>(Base) && isInt<20>(Disp));
return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12);
}
@@ -204,7 +229,7 @@ getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index));
return (Index << 16) | (Base << 12) | Disp;
@@ -215,7 +240,7 @@ getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_20);
uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index));
return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8)
@@ -227,7 +252,7 @@ getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
return (Len << 16) | (Base << 12) | Disp;
@@ -238,7 +263,7 @@ getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len));
return (Len << 16) | (Base << 12) | Disp;
@@ -249,7 +274,7 @@ getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
return (Len << 16) | (Base << 12) | Disp;
@@ -260,7 +285,7 @@ getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
return (Index << 16) | (Base << 12) | Disp;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index 14f6198183b9..1f62baabb9e7 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -20,6 +20,8 @@ enum FixupKind {
FK_390_PC24DBL,
FK_390_PC32DBL,
FK_390_TLS_CALL,
+ FK_390_12,
+ FK_390_20,
// Marker
LastTargetFixupKind,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 0b3e7b15df13..c23463ab9bde 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -46,6 +46,8 @@ static unsigned getAbsoluteReloc(unsigned Kind) {
case FK_Data_2: return ELF::R_390_16;
case FK_Data_4: return ELF::R_390_32;
case FK_Data_8: return ELF::R_390_64;
+ case SystemZ::FK_390_12: return ELF::R_390_12;
+ case SystemZ::FK_390_20: return ELF::R_390_20;
}
llvm_unreachable("Unsupported absolute address");
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 2a53dda84144..c7b73fd3b805 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -9,13 +9,15 @@
#include "SystemZMCTargetDesc.h"
#include "SystemZInstPrinter.h"
#include "SystemZMCAsmInfo.h"
+#include "SystemZTargetStreamer.h"
#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -149,7 +151,10 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT,
const MCTargetOptions &Options) {
- MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
+ if (TT.isOSzOS())
+ return new SystemZMCAsmInfoGOFF(TT);
+
+ MCAsmInfo *MAI = new SystemZMCAsmInfoELF(TT);
MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true),
SystemZMC::ELFCFAOffsetFromInitialSP);
@@ -182,6 +187,53 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
+void SystemZTargetStreamer::emitConstantPools() {
+ // Emit EXRL target instructions.
+ if (EXRLTargets2Sym.empty())
+ return;
+ // Switch to the .text section.
+ const MCObjectFileInfo &OFI = *Streamer.getContext().getObjectFileInfo();
+ Streamer.SwitchSection(OFI.getTextSection());
+ for (auto &I : EXRLTargets2Sym) {
+ Streamer.emitLabel(I.second);
+ const MCInstSTIPair &MCI_STI = I.first;
+ Streamer.emitInstruction(MCI_STI.first, *MCI_STI.second);
+ }
+ EXRLTargets2Sym.clear();
+}
+
+namespace {
+class SystemZTargetAsmStreamer : public SystemZTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ SystemZTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : SystemZTargetStreamer(S), OS(OS) {}
+ void emitMachine(StringRef CPU) override {
+ OS << "\t.machine " << CPU << "\n";
+ }
+};
+
+class SystemZTargetELFStreamer : public SystemZTargetStreamer {
+public:
+ SystemZTargetELFStreamer(MCStreamer &S) : SystemZTargetStreamer(S) {}
+ void emitMachine(StringRef CPU) override {}
+};
+} // end namespace
+
+static MCTargetStreamer *
+createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new SystemZTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new SystemZTargetELFStreamer(S);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
// Register the MCAsmInfo.
TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
@@ -210,4 +262,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(getTheSystemZTarget(),
createSystemZMCInstPrinter);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(getTheSystemZTarget(),
+ createAsmTargetStreamer);
+
+ // Register the obj streamer
+ TargetRegistry::RegisterObjectTargetStreamer(getTheSystemZTarget(),
+ createObjectTargetStreamer);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 46ccd2129969..defab665f924 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -24,7 +24,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -549,15 +549,17 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
Register SrcReg = MI->getOperand(4).getReg();
int64_t SrcDisp = MI->getOperand(5).getImm();
+ SystemZTargetStreamer *TS = getTargetStreamer();
MCSymbol *DotSym = nullptr;
MCInst ET = MCInstBuilder(TargetInsOpc).addReg(DestReg)
.addImm(DestDisp).addImm(1).addReg(SrcReg).addImm(SrcDisp);
- MCInstSTIPair ET_STI(ET, &MF->getSubtarget());
- EXRLT2SymMap::iterator I = EXRLTargets2Sym.find(ET_STI);
- if (I != EXRLTargets2Sym.end())
+ SystemZTargetStreamer::MCInstSTIPair ET_STI(ET, &MF->getSubtarget());
+ SystemZTargetStreamer::EXRLT2SymMap::iterator I =
+ TS->EXRLTargets2Sym.find(ET_STI);
+ if (I != TS->EXRLTargets2Sym.end())
DotSym = I->second;
else
- EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol();
+ TS->EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol();
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
EmitToStreamer(
*OutStreamer,
@@ -722,19 +724,6 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
getSubtargetInfo());
}
-void SystemZAsmPrinter::emitEXRLTargetInstructions() {
- if (EXRLTargets2Sym.empty())
- return;
- // Switch to the .text section.
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
- for (auto &I : EXRLTargets2Sym) {
- OutStreamer->emitLabel(I.second);
- const MCInstSTIPair &MCI_STI = I.first;
- OutStreamer->emitInstruction(MCI_STI.first, *MCI_STI.second);
- }
- EXRLTargets2Sym.clear();
-}
-
// Convert a SystemZ-specific constant pool modifier into the associated
// MCSymbolRefExpr variant kind.
static MCSymbolRefExpr::VariantKind
@@ -786,14 +775,14 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
- SystemZInstPrinter::printAddress(MAI, MI->getOperand(OpNo).getReg(),
- MI->getOperand(OpNo + 1).getImm(),
- MI->getOperand(OpNo + 2).getReg(), OS);
+ SystemZInstPrinter::
+ printAddress(MAI, MI->getOperand(OpNo).getReg(),
+ MCOperand::createImm(MI->getOperand(OpNo + 1).getImm()),
+ MI->getOperand(OpNo + 2).getReg(), OS);
return false;
}
void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
- emitEXRLTargetInstructions();
emitStackMaps(SM);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index 11b731103c17..6cfd7bd4c486 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -11,6 +11,7 @@
#include "SystemZMCInstLower.h"
#include "SystemZTargetMachine.h"
+#include "SystemZTargetStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -27,32 +28,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
private:
StackMaps SM;
- typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair;
- struct CmpMCInst {
- bool operator()(const MCInstSTIPair &MCI_STI_A,
- const MCInstSTIPair &MCI_STI_B) const {
- if (MCI_STI_A.second != MCI_STI_B.second)
- return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second);
- const MCInst &A = MCI_STI_A.first;
- const MCInst &B = MCI_STI_B.first;
- assert(A.getNumOperands() == B.getNumOperands() &&
- A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 &&
- B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst");
- if (A.getOpcode() != B.getOpcode())
- return A.getOpcode() < B.getOpcode();
- if (A.getOperand(0).getReg() != B.getOperand(0).getReg())
- return A.getOperand(0).getReg() < B.getOperand(0).getReg();
- if (A.getOperand(1).getImm() != B.getOperand(1).getImm())
- return A.getOperand(1).getImm() < B.getOperand(1).getImm();
- if (A.getOperand(3).getReg() != B.getOperand(3).getReg())
- return A.getOperand(3).getReg() < B.getOperand(3).getReg();
- if (A.getOperand(4).getImm() != B.getOperand(4).getImm())
- return A.getOperand(4).getImm() < B.getOperand(4).getImm();
- return false;
- }
- };
- typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap;
- EXRLT2SymMap EXRLTargets2Sym;
+ SystemZTargetStreamer *getTargetStreamer() {
+ MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
+ assert(TS && "do not have a target streamer");
+ return static_cast<SystemZTargetStreamer *>(TS);
+ }
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -77,7 +57,6 @@ private:
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
- void emitEXRLTargetInstructions();
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
index 86eb8365d527..9c73757d7f5c 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
};
+
+const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
+ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
+ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 96c1080d5237..f82c61c0f344 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -27,6 +27,9 @@ namespace SystemZ {
const unsigned XPLINK64NumArgFPRs = 4;
extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
+
+ const unsigned XPLINK64NumArgVRs = 8;
+ extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
@@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
else
llvm_unreachable("Unknown Calling Convention!");
- unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
+ unsigned Offset = Reg && !Subtarget.isTargetXPLINK64()
+ ? 0
+ : State.AllocateStack(8, Align(8));
// Use that same location for all the pending parts.
for (auto &It : PendingMembers) {
@@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- if (LocVT.getSizeInBits() < 128)
- return false;
-
- if (static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))
- return false;
-
// For any C or C++ program, this should always be
// false, since it is illegal to have a function
// where the first argument is variadic. Therefore
@@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
bool AllocGPR3 = State.AllocateReg(SystemZ::R3D);
// If GPR2 and GPR3 are available, then we may pass vararg in R2Q.
- if (AllocGPR2 && AllocGPR3) {
- State.addLoc(
- CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+ // If only GPR3 is available, we need to set custom handling to copy
+ // hi bits into GPR3.
+ // Either way, we allocate on the stack.
+ if (AllocGPR3) {
+ // For f128 and vector var arg case, set the bitcast flag to bitcast to
+ // i128.
+ LocVT = MVT::i128;
+ LocInfo = CCValAssign::BCvt;
+ auto Offset = State.AllocateStack(16, Align(8));
+ if (AllocGPR2)
+ State.addLoc(
+ CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+ else
+ State.addLoc(
+ CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
- // If only GPR3 is available, we allocate on stack but need to
- // set custom handling to copy hi bits into GPR3.
- if (!AllocGPR2 && AllocGPR3) {
- auto Offset = State.AllocateStack(16, Align(8));
- State.addLoc(
- CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return true;
+ return false;
+}
+
+inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ ArrayRef<MCPhysReg> RegList;
+
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ RegList = SystemZ::XPLINK64ArgGPRs;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ RegList = SystemZ::XPLINK64ArgVRs;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::f128:
+ RegList = SystemZ::XPLINK64ArgFPRs;
+ break;
+ default:
+ return false;
}
+ unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
+ // Every time we can allocate a register, allocate on the stack.
+ if (UnallocatedRegisterIndex < RegList.size())
+ State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
+
return false;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 45e22b07be30..373023effb4a 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -162,12 +162,14 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 callee-saved registers
//===----------------------------------------------------------------------===//
-def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
- (sequence "F%dD", 8, 15))>;
+// %R7D is volatile by the spec, but it must be saved in the prologue by
+// any non-leaf function and restored in the epilogue for use by the
+// return instruction so it functions exactly like a callee-saved register.
+def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
+ (sequence "F%dD", 15, 8))>;
-def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
- (sequence "F%dD", 15, 8),
- (sequence "V%d", 23, 16))>;
+def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
+ (sequence "V%d", 23, 16))>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 return value calling convention
@@ -222,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// XPLINK64 ABI compliant code widens integral types smaller than i64
// to i64 before placing the parameters either on the stack or in registers.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+ // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
+ CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
+ CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
+ // long double, can only be passed in GPR2 and GPR3, if available,
+ // hence R2Q
+ CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
+ // Non fixed vector arguments are treated in the same way as long
+ // doubles.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
// A SwiftSelf is passed in callee-saved R10.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
@@ -236,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 3 integer arguments are passed in registers R1D-R3D.
// The rest will be passed in the user area. The address offset of the user
// area can be found in register R4D.
- CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>,
+ CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
// The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
@@ -247,34 +260,24 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
V28, V29, V30, V31]>>>>,
// The first 4 named float and double arguments are passed in registers FPR0-FPR6.
// The rest will be passed in the user area.
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
+ CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
// The first 2 long double arguments are passed in register FPR0/FPR2
// and FPR4/FPR6. The rest will be passed in the user area.
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
+ CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
- // Non fixed floats are passed in GPRs
- // Promote f32 to f64, if it needs to be passed in GPRs.
- CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
- // Assign f64 varargs to their proper GPRs.
- CCIfType<[f64], CCIfNotFixed<CCAssignToReg<[R1D, R2D, R3D]>>>,
- // long double, can only be passed in GPR2 and GPR3, if available,
- // hence R2Q
- CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
-
- // Non fixed vector arguments are treated in the same way as long
- // doubles.
- CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
-
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots.
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 19b703bbb226..ac94570e568f 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -571,10 +571,9 @@ bool SystemZElimCompare::optimizeCompareZero(
// Also do a forward search to handle cases where an instruction after the
// compare can be converted, like
// LTEBRCompare %f0s, %f0s; %f2s = LER %f0s => LTEBRCompare %f2s, %f0s
- for (MachineBasicBlock::iterator MBBI =
- std::next(MachineBasicBlock::iterator(&Compare)), MBBE = MBB.end();
- MBBI != MBBE;) {
- MachineInstr &MI = *MBBI++;
+ auto MIRange = llvm::make_range(
+ std::next(MachineBasicBlock::iterator(&Compare)), MBB.end());
+ for (MachineInstr &MI : llvm::make_early_inc_range(MIRange)) {
if (preservesValueOf(MI, SrcReg)) {
// Try to eliminate Compare by reusing a CC result from MI.
if (convertToLoadAndTest(MI, Compare, CCUsers)) {
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index d2f6ff96158d..d11d118fb8ee 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
namespace {
// The ABI-defined register save slots, relative to the CFA (i.e.
// incoming stack pointer + SystemZMC::ELFCallFrameSize).
-static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
+static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = {
{ SystemZ::R2D, 0x10 },
{ SystemZ::R3D, 0x18 },
{ SystemZ::R4D, 0x20 },
@@ -44,29 +44,55 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
{ SystemZ::F4D, 0x90 },
{ SystemZ::F6D, 0x98 }
};
+
+static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = {
+ {SystemZ::R4D, 0x00}, {SystemZ::R5D, 0x08}, {SystemZ::R6D, 0x10},
+ {SystemZ::R7D, 0x18}, {SystemZ::R8D, 0x20}, {SystemZ::R9D, 0x28},
+ {SystemZ::R10D, 0x30}, {SystemZ::R11D, 0x38}, {SystemZ::R12D, 0x40},
+ {SystemZ::R13D, 0x48}, {SystemZ::R14D, 0x50}, {SystemZ::R15D, 0x58}};
} // end anonymous namespace
-SystemZFrameLowering::SystemZFrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8),
- 0, Align(8), false /* StackRealignable */),
- RegSpillOffsets(0) {
- // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
- // equal to the incoming stack pointer, but to incoming stack pointer plus
- // 160. Instead of using a Local Area Offset, the Register save area will
- // be occupied by fixed frame objects, and all offsets are actually
- // relative to CFA.
+SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl,
+ int LAO, Align TransAl,
+ bool StackReal)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal) {}
- // Create a mapping from register number to save slot offset.
- // These offsets are relative to the start of the register save area.
- RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
- for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
- RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
+std::unique_ptr<SystemZFrameLowering>
+SystemZFrameLowering::create(const SystemZSubtarget &STI) {
+ if (STI.isTargetXPLINK64())
+ return std::make_unique<SystemZXPLINKFrameLowering>();
+ return std::make_unique<SystemZELFFrameLowering>();
}
-bool SystemZFrameLowering::
-assignCalleeSavedSpillSlots(MachineFunction &MF,
- const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI) const {
+MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ assert(hasReservedCallFrame(MF) &&
+ "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+ return MBB.erase(MI);
+ break;
+
+ default:
+ llvm_unreachable("Unexpected call frame instruction");
+ }
+}
+
+bool SystemZFrameLowering::hasReservedCallFrame(
+ const MachineFunction &MF) const {
+ // The ELF ABI requires us to allocate 160 bytes of stack space for the
+ // callee, with any outgoing stack arguments being placed above that. It
+ // seems better to make that area a permanent feature of the frame even if
+ // we're using a frame pointer. Similarly, 64-bit XPLINK requires 96 bytes
+ // of stack space for the register save area.
+ return true;
+}
+
+bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
bool IsVarArg = MF.getFunction().isVarArg();
@@ -130,9 +156,9 @@ assignCalleeSavedSpillSlots(MachineFunction &MF,
return true;
}
-void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
- BitVector &SavedRegs,
- RegScavenger *RS) const {
+void SystemZELFFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
MachineFrameInfo &MFFrame = MF.getFrameInfo();
@@ -179,6 +205,24 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+SystemZELFFrameLowering::SystemZELFFrameLowering()
+ : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0,
+ Align(8), /* StackRealignable */ false),
+ RegSpillOffsets(0) {
+
+ // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
+ // equal to the incoming stack pointer, but to incoming stack pointer plus
+ // 160. Instead of using a Local Area Offset, the Register save area will
+ // be occupied by fixed frame objects, and all offsets are actually
+ // relative to CFA.
+
+ // Create a mapping from register number to save slot offset.
+ // These offsets are relative to the start of the register save area.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(ELFSpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[ELFSpillOffsetTable[I].Reg] = ELFSpillOffsetTable[I].Offset;
+}
+
// Add GPR64 to the save instruction being built by MIB, which is in basic
// block MBB. IsImplicit says whether this is an explicit operand to the
// instruction, or an implicit one that comes between the explicit start
@@ -196,7 +240,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
}
}
-bool SystemZFrameLowering::spillCalleeSavedRegisters(
+bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
@@ -256,7 +300,7 @@ bool SystemZFrameLowering::spillCalleeSavedRegisters(
return true;
}
-bool SystemZFrameLowering::restoreCalleeSavedRegisters(
+bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
@@ -312,9 +356,8 @@ bool SystemZFrameLowering::restoreCalleeSavedRegisters(
return true;
}
-void SystemZFrameLowering::
-processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS) const {
+void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
@@ -410,8 +453,8 @@ static void buildDefCFAReg(MachineBasicBlock &MBB,
.addCFIIndex(CFIIndex);
}
-void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
const SystemZTargetLowering &TLI = *STI.getTargetLowering();
@@ -530,8 +573,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
// saving the GPRs.)
- for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
- I->addLiveIn(SystemZ::R11D);
+ for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF))
+ MBBJ.addLiveIn(SystemZ::R11D);
}
// Skip over the FPR/VR saves.
@@ -573,15 +616,15 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
}
}
-void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+void SystemZELFFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- // See SystemZFrameLowering::emitPrologue
+ // See SystemZELFFrameLowering::emitPrologue
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
@@ -619,8 +662,8 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
- MachineBasicBlock &PrologMBB) const {
+void SystemZELFFrameLowering::inlineStackProbe(
+ MachineFunction &MF, MachineBasicBlock &PrologMBB) const {
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
@@ -719,24 +762,14 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
}
}
-bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo().hasVarSizedObjects() ||
MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
}
-bool
-SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- // The ABI requires us to allocate 160 bytes of stack space for the callee,
- // with any outgoing stack arguments being placed above that. It seems
- // better to make that area a permanent feature of the frame even if
- // we're using a frame pointer.
- return true;
-}
-
-StackOffset
-SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset SystemZELFFrameLowering::getFrameIndexReference(
+ const MachineFunction &MF, int FI, Register &FrameReg) const {
// Our incoming SP is actually SystemZMC::ELFCallFrameSize below the CFA, so
// add that difference here.
StackOffset Offset =
@@ -744,25 +777,8 @@ SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset + StackOffset::getFixed(SystemZMC::ELFCallFrameSize);
}
-MachineBasicBlock::iterator SystemZFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- switch (MI->getOpcode()) {
- case SystemZ::ADJCALLSTACKDOWN:
- case SystemZ::ADJCALLSTACKUP:
- assert(hasReservedCallFrame(MF) &&
- "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
- return MBB.erase(MI);
- break;
-
- default:
- llvm_unreachable("Unexpected call frame instruction");
- }
-}
-
-unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF,
- Register Reg) const {
+unsigned SystemZELFFrameLowering::getRegSpillOffset(MachineFunction &MF,
+ Register Reg) const {
bool IsVarArg = MF.getFunction().isVarArg();
bool BackChain = MF.getFunction().hasFnAttribute("backchain");
bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
@@ -778,8 +794,8 @@ unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF,
return Offset;
}
-int SystemZFrameLowering::
-getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
+int SystemZELFFrameLowering::getOrCreateFramePointerSaveIndex(
+ MachineFunction &MF) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
int FI = ZFI->getFramePointerSaveIndex();
if (!FI) {
@@ -791,7 +807,7 @@ getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
return FI;
}
-bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const {
+bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
bool BackChain = MF.getFunction().hasFnAttribute("backchain");
bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
@@ -800,3 +816,186 @@ bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const {
bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
return HasPackedStackAttr && CallConv;
}
+
+SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
+ : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
+ Align(32), /* StackRealignable */ false),
+ RegSpillOffsets(-1) {
+
+ // Create a mapping from register number to save slot offset.
+ // These offsets are relative to the start of the local are area.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(XPLINKSpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[XPLINKSpillOffsetTable[I].Reg] =
+ XPLINKSpillOffsetTable[I].Offset;
+}
+
+bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Scan the call-saved GPRs and find the bounds of the register spill area.
+ unsigned LowGPR = 0;
+ int LowOffset = INT32_MAX;
+ unsigned HighGPR = LowGPR;
+ int HighOffset = -1;
+
+ unsigned RegSP = Regs.getStackPointerRegister();
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+ const unsigned RegSize = 8;
+
+ auto ProcessCSI = [&](std::vector<CalleeSavedInfo> &CSIList) {
+ for (auto &CS : CSIList) {
+ unsigned Reg = CS.getReg();
+ int Offset = RegSpillOffsets[Reg];
+ if (Offset >= 0) {
+ if (GRRegClass.contains(Reg)) {
+ if (LowOffset > Offset) {
+ LowOffset = Offset;
+ LowGPR = Reg;
+ }
+
+ if (Offset > HighOffset) {
+ HighOffset = Offset;
+ HighGPR = Reg;
+ }
+ }
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
+ };
+
+ std::vector<CalleeSavedInfo> Spills;
+
+ // For non-leaf functions:
+ // - the address of callee (entry point) register R6 must be saved
+ Spills.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
+
+ // If the function needs a frame pointer, or if the backchain pointer should
+ // be stored, then save the stack pointer register R4.
+ if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
+ Spills.push_back(CalleeSavedInfo(RegSP));
+
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ProcessCSI(CSI);
+ MFI->setRestoreGPRRegs(LowGPR, HighGPR, LowOffset);
+
+ // Save the range of call-saved registers, for use by the epilogue inserter.
+ ProcessCSI(Spills);
+ MFI->setSpillGPRRegs(LowGPR, HighGPR, LowOffset);
+
+ // Create spill slots for the remaining registers.
+ for (auto &CS : CSI) {
+ if (CS.getFrameIdx() != INT32_MAX)
+ continue;
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ Align Alignment = TRI->getSpillAlign(*RC);
+ unsigned Size = TRI->getSpillSize(*RC);
+ Alignment = std::min(Alignment, getStackAlign());
+ int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true);
+ CS.setFrameIdx(FrameIdx);
+ }
+
+ return true;
+}
+
+void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ bool HasFP = hasFP(MF);
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // If the function requires a frame pointer, record that the hard
+ // frame pointer will be clobbered.
+ if (HasFP)
+ SavedRegs.set(Regs.getFramePointerRegister());
+
+ // If the function is not an XPLeaf function, we need to save the
+ // return address register. We also always use that register for
+ // the return instruction, so it needs to be restored in the
+ // epilogue even though that register is considered to be volatile.
+ // #TODO: Implement leaf detection.
+ SavedRegs.set(Regs.getReturnFunctionAddressRegister());
+}
+
+bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction &MF = *MBB.getParent();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
+ DebugLoc DL;
+
+ // Save GPRs
+ if (SpillGPRs.LowGPR) {
+ assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
+ "Should be saving multiple registers");
+
+ // Build an STM/STMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+ // Add the explicit register operands.
+ addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
+ addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
+
+ // Add the address r4
+ MIB.addReg(Regs.getStackPointerRegister());
+
+ // Add the partial offset
+ // We cannot add the actual offset as, at the stack is not finalized
+ MIB.addImm(SpillGPRs.GPROffset);
+
+ // Make sure all call-saved GPRs are included as operands and are
+ // marked as live on entry.
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (GRRegClass.contains(Reg))
+ addSavedGPR(MBB, MIB, Reg, true);
+ }
+ }
+
+ // Spill FPRs to the stack in the normal TargetInstrInfo way
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+ if (SystemZ::VR128BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::VR128BitRegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {}
+
+void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {}
+
+bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
+ return false;
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index c8312b836e57..6fddb4f81c41 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -10,6 +10,8 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZMachineFunctionInfo.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/TypeSize.h"
@@ -19,10 +21,26 @@ class SystemZTargetMachine;
class SystemZSubtarget;
class SystemZFrameLowering : public TargetFrameLowering {
+public:
+ SystemZFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl,
+ bool StackReal);
+
+ static std::unique_ptr<SystemZFrameLowering>
+ create(const SystemZSubtarget &STI);
+
+ // Override TargetFrameLowering.
+ bool isFPCloseToIncomingSP() const override { return false; }
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+};
+
+class SystemZELFFrameLowering : public SystemZFrameLowering {
IndexedMap<unsigned> RegSpillOffsets;
public:
- SystemZFrameLowering();
+ SystemZELFFrameLowering();
// Override TargetFrameLowering.
bool isFPCloseToIncomingSP() const override { return false; }
@@ -48,21 +66,14 @@ public:
void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
- MachineBasicBlock::iterator
- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const override;
// Return the byte offset from the incoming stack pointer of Reg's
// ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust
// the offset in case MF has packed-stack.
unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const;
- // Get or create the frame index of where the old frame pointer is stored.
- int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
-
bool usePackedStack(MachineFunction &MF) const;
// Return the offset of the backchain.
@@ -70,6 +81,35 @@ public:
// The back chain is stored topmost with packed-stack.
return usePackedStack(MF) ? SystemZMC::ELFCallFrameSize - 8 : 0;
}
+
+ // Get or create the frame index of where the old frame pointer is stored.
+ int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
+};
+
+class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+
+public:
+ SystemZXPLINKFrameLowering();
+
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index d70d48638b14..71432218068e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -82,6 +82,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
: TargetLowering(TM), Subtarget(STI) {
MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
+ auto *Regs = STI.getSpecialRegisters();
+
// Set up the register classes.
if (Subtarget.hasHighWord())
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
@@ -115,7 +117,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget.getRegisterInfo());
// Set up special registers.
- setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+ setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
// TODO: It may be better to default to latency-oriented scheduling, however
// LLVM's current latency-oriented scheduler can't handle physreg definitions
@@ -293,6 +295,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ // Handle bitcast from fp128 to i128.
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
for (MVT VT : MVT::integer_valuetypes()) {
@@ -1353,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
- case CCValAssign::BCvt:
- // If this is a short vector argument to be stored to the stack,
+ case CCValAssign::BCvt: {
+ assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
+ assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
+ VA.getValVT() == MVT::f128);
+ MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
+ ? MVT::v2i64
+ : VA.getLocVT();
+ Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
+ // For ELF, this is a short vector argument to be stored to the stack,
// bitcast to v2i64 and then extract first element.
- assert(VA.getLocVT() == MVT::i64);
- assert(VA.getValVT().isVector());
- Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
- DAG.getConstant(0, DL, MVT::i32));
+ if (BitCastToType == MVT::v2i64)
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
+ return Value;
+ }
case CCValAssign::Full:
return Value;
default:
@@ -1426,8 +1438,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
MachineRegisterInfo &MRI = MF.getRegInfo();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Detect unsupported vector argument types.
@@ -1468,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
NumFixedFPRs += 1;
RC = &SystemZ::FP64BitRegClass;
break;
+ case MVT::f128:
+ NumFixedFPRs += 2;
+ RC = &SystemZ::FP128BitRegClass;
+ break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -1521,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
}
- if (IsVarArg) {
+ // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
+ if (IsVarArg && Subtarget.isTargetELF()) {
// Save the number of non-varargs registers for later use by va_start, etc.
FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
@@ -1560,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
}
}
+ // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
+ // register (R5)
return Chain;
}
@@ -1600,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
+ SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
+
+ // FIXME: z/OS support to be added in later.
+ if (Subtarget.isTargetXPLINK64())
+ IsTailCall = false;
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
@@ -1620,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ if (Subtarget.isTargetXPLINK64())
+ // Although the XPLINK specifications for AMODE64 state that minimum size
+ // of the param area is minimum 32 bytes and no rounding is otherwise
+ // specified, we round this area in 64 bytes increments to be compatible
+ // with existing compilers.
+ NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
+
// Mark the start of the call.
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@@ -1670,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
- if (VA.isRegLoc())
+ if (VA.isRegLoc()) {
+ // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
+ // MVT::i128 type. We decompose the 128-bit type to a pair of its high
+ // and low values.
+ if (VA.getLocVT() == MVT::i128)
+ ArgValue = lowerI128ToGR128(DAG, ArgValue);
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
- else {
+ } else {
assert(VA.isMemLoc() && "Argument not register or memory");
// Work out the address of the stack slot. Unpromoted ints and
// floats are passed as right-justified 8-byte values.
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
- unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
+ StackPtr = DAG.getCopyFromReg(Chain, DL,
+ Regs->getStackPointerRegister(), PtrVT);
+ unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
+ VA.getLocMemOffset();
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Offset += 4;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
@@ -1689,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
+
+ // Although long doubles or vectors are passed through the stack when
+ // they are vararg (non-fixed arguments), if a long double or vector
+ // occupies the third and fourth slot of the argument list GPR3 should
+ // still shadow the third slot of the argument list.
+ if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
+ SDValue ShadowArgValue =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
+ DAG.getIntPtrConstant(1, DL));
+ RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
+ }
}
}
@@ -1700,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// associated Target* opcodes. Force %r1 to be used for indirect
// tail calls.
SDValue Glue;
+ // FIXME: Add support for XPLINK using the ADA register.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
@@ -2282,8 +2331,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
Comparison &C) {
if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
C.CCMask == SystemZ::CCMASK_CMP_NE) {
- for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
- SDNode *N = *I;
+ for (SDNode *N : C.Op0->uses()) {
if (N->getOpcode() == ISD::SUB &&
((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
(N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
@@ -2306,8 +2354,7 @@ static void adjustForFNeg(Comparison &C) {
return;
auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
if (C1 && C1->isZero()) {
- for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
- SDNode *N = *I;
+ for (SDNode *N : C.Op0->uses()) {
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
@@ -2333,8 +2380,7 @@ static void adjustForLTGFR(Comparison &C) {
if (C1 && C1->getZExtValue() == 32) {
SDValue ShlOp0 = C.Op0.getOperand(0);
// See whether X has any SIGN_EXTEND_INREG uses.
- for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
- SDNode *N = *I;
+ for (SDNode *N : ShlOp0->uses()) {
if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
C.Op0 = SDValue(N, 0);
@@ -3320,8 +3366,7 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -4139,17 +4184,21 @@ SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+ auto *Regs = Subtarget->getSpecialRegisters();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
report_fatal_error("Variable-sized stack allocations are not supported "
"in GHC calling convention");
return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
- SystemZ::R15D, Op.getValueType());
+ Regs->getStackPointerRegister(), Op.getValueType());
}
SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+ auto *Regs = Subtarget->getSpecialRegisters();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
@@ -4163,12 +4212,13 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SDLoc DL(Op);
if (StoreBackchain) {
- SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
+ SDValue OldSP = DAG.getCopyFromReg(
+ Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
MachinePointerInfo());
}
- Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
+ Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
if (StoreBackchain)
Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
@@ -5589,6 +5639,32 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
Results.push_back(Res.getValue(2));
break;
}
+ case ISD::BITCAST: {
+ SDValue Src = N->getOperand(0);
+ if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
+ !useSoftFloat()) {
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
+ SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
+ DAG.getConstant(1, DL, MVT::i32));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
+ DAG.getConstant(0, DL, MVT::i32));
+ } else {
+ assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
+ "Unrecognized register class for f128.");
+ SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
+ DL, MVT::f64, Src);
+ SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
+ DL, MVT::f64, Src);
+ Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
+ Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
+ }
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
+ }
+ break;
+ }
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -5634,15 +5710,10 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SUBCARRY);
OPCODE(GET_CCMASK);
OPCODE(MVC);
- OPCODE(MVC_LOOP);
OPCODE(NC);
- OPCODE(NC_LOOP);
OPCODE(OC);
- OPCODE(OC_LOOP);
OPCODE(XC);
- OPCODE(XC_LOOP);
OPCODE(CLC);
- OPCODE(CLC_LOOP);
OPCODE(STPCPY);
OPCODE(STRCMP);
OPCODE(SEARCH_STRING);
@@ -7071,13 +7142,19 @@ SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
- if (Base.isReg())
- return Base.getReg();
-
MachineBasicBlock *MBB = MI.getParent();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (Base.isReg()) {
+ // Copy Base into a new virtual register to help register coalescing in
+ // cases with multiple uses.
+ Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
+ .add(Base);
+ return Reg;
+ }
+
Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
.add(Base)
@@ -7103,8 +7180,8 @@ static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
// If we hit the end of the block, check whether CC is live into a
// successor.
if (miI == MBB->end()) {
- for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(SystemZ::CC))
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ if (Succ->isLiveIn(SystemZ::CC))
return false;
}
@@ -7796,26 +7873,67 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
uint64_t SrcDisp = MI.getOperand(3).getImm();
MachineOperand &LengthMO = MI.getOperand(4);
- uint64_t ImmLength = LengthMO.isImm() ? LengthMO.getImm() : 0;
- Register LenMinus1Reg =
- LengthMO.isReg() ? LengthMO.getReg() : SystemZ::NoRegister;
+ bool IsImmForm = LengthMO.isImm();
+ bool IsRegForm = !IsImmForm;
+
+ bool NeedsLoop = false;
+ uint64_t ImmLength = 0;
+ Register LenMinus1Reg = SystemZ::NoRegister;
+ if (IsImmForm) {
+ ImmLength = LengthMO.getImm();
+ ImmLength++; // Add back the '1' subtracted originally.
+ if (ImmLength == 0) {
+ MI.eraseFromParent();
+ return MBB;
+ }
+ if (Opcode == SystemZ::CLC) {
+ if (ImmLength > 3 * 256)
+ // A two-CLC sequence is a clear win over a loop, not least because
+ // it needs only one branch. A three-CLC sequence needs the same
+ // number of branches as a loop (i.e. 2), but is shorter. That
+ // brings us to lengths greater than 768 bytes. It seems relatively
+ // likely that a difference will be found within the first 768 bytes,
+ // so we just optimize for the smallest number of branch
+ // instructions, in order to avoid polluting the prediction buffer
+ // too much.
+ NeedsLoop = true;
+ } else if (ImmLength > 6 * 256)
+ // The heuristic we use is to prefer loops for anything that would
+ // require 7 or more MVCs. With these kinds of sizes there isn't much
+ // to choose between straight-line code and looping code, since the
+ // time will be dominated by the MVCs themselves.
+ NeedsLoop = true;
+ } else {
+ NeedsLoop = true;
+ LenMinus1Reg = LengthMO.getReg();
+ }
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
- MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
- ? SystemZ::splitBlockAfter(MI, MBB)
- : nullptr);
-
- // Check for the loop form, in which operand 5 is the trip count.
- if (MI.getNumExplicitOperands() > 5) {
- Register StartCountReg = MI.getOperand(5).getReg();
- bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+ MachineBasicBlock *EndMBB =
+ (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
+ ? SystemZ::splitBlockAfter(MI, MBB)
+ : nullptr);
+
+ if (NeedsLoop) {
+ Register StartCountReg =
+ MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ if (IsImmForm) {
+ TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
+ ImmLength &= 255;
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
+ .addReg(LenMinus1Reg)
+ .addReg(0)
+ .addImm(8);
+ }
auto loadZeroAddress = [&]() -> MachineOperand {
Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
return MachineOperand::CreateReg(Reg, false);
};
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
DestBase = loadZeroAddress();
if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
@@ -7842,12 +7960,12 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register ThisCountReg = MRI.createVirtualRegister(RC);
Register NextCountReg = MRI.createVirtualRegister(RC);
- if (LengthMO.isReg()) {
+ if (IsRegForm) {
AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
StartMBB = SystemZ::emitBlockAfter(MBB);
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
- NextMBB = LoopMBB;
- DoneMBB = SystemZ::emitBlockAfter(LoopMBB);
+ NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
+ DoneMBB = SystemZ::emitBlockAfter(NextMBB);
// MBB:
// # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
@@ -7882,7 +8000,6 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
DestBase = MachineOperand::CreateReg(NextDestReg, false);
SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
- ImmLength &= 255;
if (EndMBB && !ImmLength)
// If the loop handled the whole CLC range, DoneMBB will be empty with
// CC live-through into EndMBB, so add it as live-in.
@@ -7953,7 +8070,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
MBB->addSuccessor(DoneMBB);
MBB = DoneMBB;
- if (LengthMO.isReg()) {
+ if (IsRegForm) {
// DoneMBB:
// # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
// # Use EXecute Relative Long for the remainder of the bytes. The target
@@ -7966,19 +8083,23 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
.addReg(StartDestReg).addMBB(StartMBB)
- .addReg(NextDestReg).addMBB(LoopMBB);
+ .addReg(NextDestReg).addMBB(NextMBB);
if (!HaveSingleBase)
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
.addReg(StartSrcReg).addMBB(StartMBB)
- .addReg(NextSrcReg).addMBB(LoopMBB);
- MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass);
- BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
- .addImm(Opcode)
- .addReg(LenMinus1Reg)
- .addReg(RemDestReg).addImm(DestDisp)
- .addReg(RemSrcReg).addImm(SrcDisp);
+ .addReg(NextSrcReg).addMBB(NextMBB);
+ MachineInstrBuilder EXRL_MIB =
+ BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
+ .addImm(Opcode)
+ .addReg(LenMinus1Reg)
+ .addReg(RemDestReg).addImm(DestDisp)
+ .addReg(RemSrcReg).addImm(SrcDisp);
MBB->addSuccessor(AllDoneMBB);
MBB = AllDoneMBB;
+ if (EndMBB) {
+ EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
+ MBB->addLiveIn(SystemZ::CC);
+ }
}
}
@@ -8264,8 +8385,7 @@ MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
SDValue SystemZTargetLowering::
getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
SDLoc DL(SP);
return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
@@ -8497,21 +8617,18 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
- case SystemZ::MVCSequence:
- case SystemZ::MVCLoop:
+ case SystemZ::MVCImm:
+ case SystemZ::MVCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
- case SystemZ::NCSequence:
- case SystemZ::NCLoop:
+ case SystemZ::NCImm:
return emitMemMemWrapper(MI, MBB, SystemZ::NC);
- case SystemZ::OCSequence:
- case SystemZ::OCLoop:
+ case SystemZ::OCImm:
return emitMemMemWrapper(MI, MBB, SystemZ::OC);
- case SystemZ::XCSequence:
- case SystemZ::XCLoop:
- case SystemZ::XCLoopVarLen:
+ case SystemZ::XCImm:
+ case SystemZ::XCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
- case SystemZ::CLCSequence:
- case SystemZ::CLCLoop:
+ case SystemZ::CLCImm:
+ case SystemZ::CLCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 248efc11b87f..461f804ca55e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -117,23 +117,14 @@ enum NodeType : unsigned {
// MachineMemOperands rather than one.
MVC,
- // Like MVC, but implemented as a loop that handles X*256 bytes
- // followed by straight-line code to handle the rest (if any).
- // The value of X is passed as an additional operand.
- MVC_LOOP,
-
- // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
+ // Similar to MVC, but for logic operations (AND, OR, XOR).
NC,
- NC_LOOP,
OC,
- OC_LOOP,
XC,
- XC_LOOP,
// Use CLC to compare two blocks of memory, with the same comments
- // as for MVC and MVC_LOOP.
+ // as for MVC.
CLC,
- CLC_LOOP,
// Use an MVST-based sequence to implement stpcpy().
STPCPY,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 337164d55e5f..7cbe125533d3 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -128,9 +128,10 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
(EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
}
-defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
-defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
-defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
+// The length is given as one less for MVCImm.
+defm LoadStoreF32 : MVCLoadStore<load, f32, MVCImm, 3>;
+defm LoadStoreF64 : MVCLoadStore<load, f64, MVCImm, 7>;
+defm LoadStoreF128 : MVCLoadStore<load, f128, MVCImm, 15>;
//===----------------------------------------------------------------------===//
// Load instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 5cb46cdb36a6..cd60fff1ab11 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5329,42 +5329,37 @@ multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
-// The Sequence form uses a straight-line sequence of instructions and
-// the Loop form uses a loop of length-256 instructions followed by
-// another instruction to handle the excess.
-// The LoopVarLen form is for a loop with a non-constant length parameter.
-multiclass MemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
+// There are two pseudos for the different cases of when the length is
+// constant or variable. The length operand of a pseudo is actually one less
+// than the intended number of bytes, since the register case needs to use an
+// EXRL with a target instruction that adds one to the length always.
+multiclass MemorySS<string mnemonic, bits<8> opcode, SDPatternOperator memop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length)]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256)]>;
- def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- GR64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- GR64:$length, GR64:$count256)]>;
+ def Imm : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(memop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length),
+ [(memop bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length)]>;
}
}
// The same, but setting a CC result as comparison operator.
multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
+ SDPatternOperator memop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length))]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256))]>;
+ def Imm : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length))]>;
+ def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length),
+ [(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length))]>;
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b9f64198f4e5..2bf80882fa61 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -514,8 +515,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask,
- int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
assert(MI.isCompare() && "Caller should have checked for a comparison");
if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() &&
@@ -942,8 +943,9 @@ static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI,
NewMI->setFlag(Flag);
}
-MachineInstr *SystemZInstrInfo::convertToThreeAddress(
- MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *
+SystemZInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const {
MachineBasicBlock *MBB = MI.getParent();
// Try to convert an AND into an RISBG-type instruction.
@@ -984,6 +986,8 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
}
}
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
transferDeadCC(&MI, MIB);
return MIB;
}
@@ -1515,6 +1519,13 @@ unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
+ else if (MI.getOpcode() == SystemZ::PATCHPOINT)
+ return PatchPointOpers(&MI).getNumPatchBytes();
+ else if (MI.getOpcode() == SystemZ::STACKMAP)
+ return MI.getOperand(1).getImm();
+ else if (MI.getOpcode() == SystemZ::FENTRY_CALL)
+ return 6;
+
return MI.getDesc().getSize();
}
@@ -1923,7 +1934,7 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Reg, uint64_t Value) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- unsigned Opcode;
+ unsigned Opcode = 0;
if (isInt<16>(Value))
Opcode = SystemZ::LGHI;
else if (SystemZ::isImmLL(Value))
@@ -1931,11 +1942,23 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
else if (SystemZ::isImmLH(Value)) {
Opcode = SystemZ::LLILH;
Value >>= 16;
- } else {
- assert(isInt<32>(Value) && "Huge values not handled yet");
+ }
+ else if (isInt<32>(Value))
Opcode = SystemZ::LGFI;
+ if (Opcode) {
+ BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+ return;
}
- BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert (MRI.isSSA() && "Huge values only handled before reg-alloc .");
+ Register Reg0 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ Register Reg1 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::IMPLICIT_DEF), Reg0);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::IIHF64), Reg1)
+ .addReg(Reg0).addImm(Value >> 32);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::IILF64), Reg)
+ .addReg(Reg1).addImm(Value & ((uint64_t(1) << 32) - 1));
}
bool SystemZInstrInfo::verifyInstruction(const MachineInstr &MI,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 72dafc3c93c2..396f56c7f59c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -47,7 +47,8 @@ enum {
CCMaskFirst = (1 << 18),
CCMaskLast = (1 << 19),
IsLogical = (1 << 20),
- CCIfNoSignedWrap = (1 << 21)
+ CCIfNoSignedWrap = (1 << 21),
+ MemMemOp = (1 << 22)
};
static inline unsigned getAccessSize(unsigned int Flags) {
@@ -234,7 +235,8 @@ public:
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const override;
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
Register, Register, Register, int &, int &,
int &) const override;
@@ -270,9 +272,8 @@ public:
Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 7df7cc93d6eb..e4760229fd6b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -503,7 +503,7 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
// Memory-to-memory moves.
let mayLoad = 1, mayStore = 1 in
- defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
+ defm MVC : MemorySS<"mvc", 0xD2, z_mvc>;
let mayLoad = 1, mayStore = 1, Defs = [CC] in {
def MVCL : SideEffectBinaryMemMemRR<"mvcl", 0x0E, GR128, GR128>;
def MVCLE : SideEffectTernaryMemMemRS<"mvcle", 0xA8, GR128, GR128>;
@@ -1200,7 +1200,7 @@ let Defs = [CC] in {
// Block AND.
let mayLoad = 1, mayStore = 1 in
- defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>;
+ defm NC : MemorySS<"nc", 0xD4, z_nc>;
}
defm : RMWIByte<and, bdaddr12pair, NI>;
defm : RMWIByte<and, bdaddr20pair, NIY>;
@@ -1257,7 +1257,7 @@ let Defs = [CC] in {
// Block OR.
let mayLoad = 1, mayStore = 1 in
- defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>;
+ defm OC : MemorySS<"oc", 0xD6, z_oc>;
}
defm : RMWIByte<or, bdaddr12pair, OI>;
defm : RMWIByte<or, bdaddr20pair, OIY>;
@@ -1297,7 +1297,7 @@ let Defs = [CC] in {
// Block XOR.
let mayLoad = 1, mayStore = 1 in
- defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>;
+ defm XC : MemorySS<"xc", 0xD7, z_xc>;
}
defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
@@ -1624,7 +1624,7 @@ defm : ZXB<z_ucmp, GR64, CLGFR>;
// Memory-to-memory comparison.
let mayLoad = 1, Defs = [CC] in {
- defm CLC : CompareMemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
+ defm CLC : CompareMemorySS<"clc", 0xD5, z_clc>;
def CLCL : SideEffectBinaryMemMemRR<"clcl", 0x0F, GR128, GR128>;
def CLCLE : SideEffectTernaryMemMemRS<"clcle", 0xA9, GR128, GR128>;
def CLCLU : SideEffectTernaryMemMemRSY<"clclu", 0xEB8F, GR128, GR128>;
@@ -2173,7 +2173,7 @@ let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>;
def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>;
let hasNoSchedulingInfo = 1 in
- def EXRL_Pseudo : Pseudo<(outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1,
+ def EXRL_Pseudo : Alias<6, (outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1,
bdaddr12only:$bdl1, bdaddr12only:$bd2),
[]>;
}
@@ -2355,21 +2355,15 @@ let AddedComplexity = 4 in {
(RLLG GR64:$val, (LCR GR32:$shift), 0)>;
}
-// Peepholes for turning scalar operations into block operations.
-defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
- XCSequence, 1>;
-defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence,
- XCSequence, 2>;
-defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence,
- XCSequence, 4>;
-defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence,
- OCSequence, XCSequence, 1>;
-defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence,
- XCSequence, 2>;
-defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
- XCSequence, 4>;
-defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
- XCSequence, 8>;
+// Peepholes for turning scalar operations into block operations. The length
+// is given as one less for these pseudos.
+defm : BlockLoadStore<anyextloadi8, i32, MVCImm, NCImm, OCImm, XCImm, 0>;
+defm : BlockLoadStore<anyextloadi16, i32, MVCImm, NCImm, OCImm, XCImm, 1>;
+defm : BlockLoadStore<load, i32, MVCImm, NCImm, OCImm, XCImm, 3>;
+defm : BlockLoadStore<anyextloadi8, i64, MVCImm, NCImm, OCImm, XCImm, 0>;
+defm : BlockLoadStore<anyextloadi16, i64, MVCImm, NCImm, OCImm, XCImm, 1>;
+defm : BlockLoadStore<anyextloadi32, i64, MVCImm, NCImm, OCImm, XCImm, 3>;
+defm : BlockLoadStore<load, i64, MVCImm, NCImm, OCImm, XCImm, 7>;
//===----------------------------------------------------------------------===//
// Mnemonic Aliases
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index b1964321c78a..9c985c16f082 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -209,10 +209,24 @@ void SystemZLongBranch::skipTerminator(BlockPosition &Position,
Position.Address += Terminator.ExtraRelaxSize;
}
+static unsigned getInstSizeInBytes(const MachineInstr &MI,
+ const SystemZInstrInfo *TII) {
+ unsigned Size = TII->getInstSizeInBytes(MI);
+ assert((Size ||
+ // These do not have a size:
+ MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() ||
+ MI.isImplicitDef() || MI.getOpcode() == SystemZ::MemBarrier ||
+ // These have a size that may be zero:
+ MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP ||
+ MI.getOpcode() == SystemZ::PATCHPOINT) &&
+ "Missing size value for instruction.");
+ return Size;
+}
+
// Return a description of terminator instruction MI.
TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
TerminatorInfo Terminator;
- Terminator.Size = TII->getInstSizeInBytes(MI);
+ Terminator.Size = getInstSizeInBytes(MI, TII);
if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
switch (MI.getOpcode()) {
case SystemZ::J:
@@ -287,7 +301,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
while (MI != End && !MI->isTerminator()) {
- Block.Size += TII->getInstSizeInBytes(*MI);
+ Block.Size += getInstSizeInBytes(*MI, TII);
++MI;
}
skipNonTerminators(Position, Block);
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 9bee5e8d1864..4bc979de795d 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -46,9 +46,9 @@ static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
// The loop header has two predecessors, return the latch, but not for a
// single block loop.
if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
- for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I)
- if (Loop->contains(*I))
- PredMBB = (*I == MBB ? nullptr : *I);
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ if (Loop->contains(Pred))
+ PredMBB = (Pred == MBB ? nullptr : Pred);
}
assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
@@ -106,13 +106,12 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
// Emit incoming terminator(s). Be optimistic and assume that branch
// prediction will generally do "the right thing".
- for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator();
- I != SinglePredMBB->end(); I++) {
- LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump(););
- bool TakenBranch = (I->isBranch() &&
- (TII->getBranchInfo(*I).isIndirect() ||
- TII->getBranchInfo(*I).getMBBTarget() == MBB));
- HazardRec->emitInstruction(&*I, TakenBranch);
+ for (MachineInstr &MI : SinglePredMBB->terminators()) {
+ LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
+ bool TakenBranch = (MI.isBranch() &&
+ (TII->getBranchInfo(MI).isIndirect() ||
+ TII->getBranchInfo(MI).getMBBTarget() == MBB));
+ HazardRec->emitInstruction(&MI, TakenBranch);
if (TakenBranch)
break;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 992b1512a077..927d97233286 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -102,17 +102,6 @@ def SDT_ZMemMemLengthCC : SDTypeProfile<1, 3,
SDTCisPtrTy<1>,
SDTCisPtrTy<2>,
SDTCisVT<3, i64>]>;
-def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
- [SDTCisPtrTy<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, i64>,
- SDTCisVT<3, i64>]>;
-def SDT_ZMemMemLoopCC : SDTypeProfile<1, 4,
- [SDTCisVT<0, i32>,
- SDTCisPtrTy<1>,
- SDTCisPtrTy<2>,
- SDTCisVT<3, i64>,
- SDTCisVT<4, i64>]>;
def SDT_ZString : SDTypeProfile<1, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
@@ -416,24 +405,14 @@ def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128",
def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC,
[SDNPHasChain, SDNPMayLoad]>;
-def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoopCC,
- [SDNPHasChain, SDNPMayLoad]>;
def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC,
[SDNPHasChain, SDNPMayLoad]>;
def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 0062e39602f5..48cec176b006 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -190,7 +190,9 @@ bool SystemZRegisterInfo::getRegAllocationHints(
const MCPhysReg *
SystemZXPLINK64Registers::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SystemZ_XPLINK64_SaveList;
+ const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+ return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_SaveList
+ : CSR_SystemZ_XPLINK64_SaveList;
}
const MCPhysReg *
@@ -211,7 +213,9 @@ SystemZELFRegisters::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *
SystemZXPLINK64Registers::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return CSR_SystemZ_XPLINK64_RegMask;
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_RegMask
+ : CSR_SystemZ_XPLINK64_RegMask;
}
const uint32_t *
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 122504d4b44b..8ce01074873a 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
#include "SystemZ.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
@@ -44,9 +45,9 @@ inline bool isHighReg(unsigned int Reg) {
/// It is abstract, all calling conventions must override and
/// define the pure virtual member function defined in this class.
class SystemZCallingConventionRegisters {
+
public:
- /// \returns the register that keeps the
- /// return function address.
+ /// \returns the register that keeps the return function address.
virtual int getReturnFunctionAddressRegister() = 0;
/// \returns the register that keeps the
@@ -65,6 +66,12 @@ public:
virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const = 0;
+ /// \returns the offset to the locals area.
+ virtual int getCallFrameSize() = 0;
+
+ /// \returns the stack pointer bias.
+ virtual int getStackPointerBias() = 0;
+
/// Destroys the object. Bogus destructor allowing derived classes
/// to override it.
virtual ~SystemZCallingConventionRegisters(){};
@@ -82,12 +89,18 @@ public:
int getFramePointerRegister() override final { return SystemZ::R8D; };
+ int getAddressOfCalleeRegister() { return SystemZ::R6D; };
+
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction *MF) const override final;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
+ int getCallFrameSize() override final { return 128; }
+
+ int getStackPointerBias() override final { return 2048; }
+
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZXPLINK64Registers(){};
};
@@ -110,6 +123,10 @@ public:
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
+ int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
+
+ int getStackPointerBias() override final { return 0; }
+
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZELFRegisters(){};
};
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 4a9ea69d101c..f38e93109967 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -17,32 +17,29 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-// Decide whether it is best to use a loop or straight-line code for
-// a block operation of Size bytes with source address Src and destination
-// address Dest. Sequence is the opcode to use for straight-line code
-// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
-// Return the chain for the completed operation.
-static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
- unsigned Loop, SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size) {
- EVT PtrVT = Src.getValueType();
- // The heuristic we use is to prefer loops for anything that would
- // require 7 or more MVCs. With these kinds of sizes there isn't
- // much to choose between straight-line code and looping code,
- // since the time will be dominated by the MVCs themselves.
- // However, the loop has 4 or 5 instructions (depending on whether
- // the base addresses can be proved equal), so there doesn't seem
- // much point using a loop for 5 * 256 bytes or fewer. Anything in
- // the range (5 * 256, 6 * 256) will need another instruction after
- // the loop, so it doesn't seem worth using a loop then either.
- // The next value up, 6 * 256, can be implemented in the same
- // number of straight-line MVCs as 6 * 256 - 1.
- if (Size > 6 * 256)
- return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, DL, PtrVT),
- DAG.getConstant(Size / 256, DL, PtrVT));
- return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, DL, PtrVT));
+static SDVTList getMemMemVTs(unsigned Op, SelectionDAG &DAG) {
+ return Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other)
+ : DAG.getVTList(MVT::Other);
+}
+
+// Emit a mem-mem operation after subtracting one from size, which will be
+// added back during pseudo expansion. As the Reg case emitted here may be
+// converted by DAGCombiner into having an Imm length, they are both emitted
+// the same way.
+static SDValue emitMemMemImm(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size) {
+ return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src,
+ DAG.getConstant(Size - 1, DL, Src.getValueType()));
+}
+
+static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size) {
+ SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
+ DAG.getZExtOrTrunc(Size, DL, MVT::i64),
+ DAG.getConstant(-1, DL, MVT::i64));
+ return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src, LenMinus1);
}
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -53,9 +50,10 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
- return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
- Chain, Dst, Src, CSize->getZExtValue());
- return SDValue();
+ return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, Dst, Src,
+ CSize->getZExtValue());
+
+ return emitMemMemReg(DAG, DL, SystemZISD::MVC, Chain, Dst, Src, Size);
}
// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
@@ -127,52 +125,23 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
// Handle the special case of a memset of 0, which can use XC.
if (CByte && CByte->getZExtValue() == 0)
- return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
- Chain, Dst, Dst, Bytes);
+ return emitMemMemImm(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Bytes);
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
- return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
- Chain, DstPlus1, Dst, Bytes - 1);
+ return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, DstPlus1, Dst,
+ Bytes - 1);
}
// Variable length
- if (CByte && CByte->getZExtValue() == 0) {
+ if (CByte && CByte->getZExtValue() == 0)
// Handle the special case of a variable length memset of 0 with XC.
- SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
- DAG.getZExtOrTrunc(Size, DL, MVT::i64),
- DAG.getConstant(-1, DL, MVT::i64));
- SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
- DAG.getConstant(8, DL, MVT::i64));
- return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
- LenMinus1, TripC);
- }
- return SDValue();
-}
+ return emitMemMemReg(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Size);
-// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
-// deciding whether to use a loop or straight-line code.
-static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
- SDValue Src1, SDValue Src2, uint64_t Size) {
- SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
- EVT PtrVT = Src1.getValueType();
- // A two-CLC sequence is a clear win over a loop, not least because it
- // needs only one branch. A three-CLC sequence needs the same number
- // of branches as a loop (i.e. 2), but is shorter. That brings us to
- // lengths greater than 768 bytes. It seems relatively likely that
- // a difference will be found within the first 768 bytes, so we just
- // optimize for the smallest number of branch instructions, in order
- // to avoid polluting the prediction buffer too much. A loop only ever
- // needs 2 branches, whereas a straight-line sequence would need 3 or more.
- if (Size > 3 * 256)
- return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, DL, PtrVT),
- DAG.getConstant(Size / 256, DL, PtrVT));
- return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, DL, PtrVT));
+ return SDValue();
}
// Convert the current CC value into an integer that is 0 if CC == 0,
@@ -193,15 +162,16 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
+ SDValue CCReg;
+ // Swap operands to invert CC == 1 vs. CC == 2 cases.
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
- // Swap operands to invert CC == 1 vs. CC == 2 cases.
- SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
- Chain = CCReg.getValue(1);
- return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
- }
- return std::make_pair(SDValue(), SDValue());
+ CCReg = emitMemMemImm(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Bytes);
+ } else
+ CCReg = emitMemMemReg(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Size);
+ Chain = CCReg.getValue(1);
+ return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 3d27b70d6ef9..254e5e92449b 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -211,8 +211,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
LiveRegs.addLiveOuts(MBB);
// Iterate backwards through the block looking for instructions to change.
- for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
- MachineInstr &MI = *MBBI;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
switch (MI.getOpcode()) {
case SystemZ::IILF:
Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH);
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index bfcdee270f29..0f03d96655bf 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -89,9 +89,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasSoftFloat(false), TargetTriple(TT),
SpecialRegisters(initializeSpecialRegisters()),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(), FrameLowering() {}
-
-SystemZSubtarget::~SystemZSubtarget() { delete getSpecialRegisters(); }
+ TSInfo(), FrameLowering(SystemZFrameLowering::create(*this)) {}
bool SystemZSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index f6c155de44a0..67c5b8eb09b6 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -77,11 +77,11 @@ protected:
private:
Triple TargetTriple;
- SystemZCallingConventionRegisters *SpecialRegisters;
+ std::unique_ptr<SystemZCallingConventionRegisters> SpecialRegisters;
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
SystemZSelectionDAGInfo TSInfo;
- SystemZFrameLowering FrameLowering;
+ std::unique_ptr<const SystemZFrameLowering> FrameLowering;
SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
StringRef FS);
@@ -91,16 +91,23 @@ public:
SystemZSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- ~SystemZSubtarget();
-
SystemZCallingConventionRegisters *getSpecialRegisters() const {
assert(SpecialRegisters && "Unsupported SystemZ calling convention");
- return SpecialRegisters;
+ return SpecialRegisters.get();
+ }
+
+ template <class SR> SR &getSpecialRegisters() const {
+ return *static_cast<SR *>(getSpecialRegisters());
}
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return FrameLowering.get();
}
+
+ template <class TFL> const TFL *getFrameLowering() const {
+ return static_cast<const TFL *>(getFrameLowering());
+ }
+
const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const SystemZRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index a886f9b9d814..deb3358102ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
#include <string>
@@ -84,8 +84,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// 128-bit floats are aligned only to 64 bits.
Ret += "-f128:64";
- // When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
- if (VectorABI)
+ // When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
+ // bits. On z/OS, vector types are always aligned to 64 bits.
+ if (VectorABI || TT.isOSzOS())
Ret += "-v128:64";
// We prefer 16 bits of aligned for all globals; see above.
@@ -284,7 +285,7 @@ void SystemZPassConfig::addPreEmitPass() {
// vector instructions will be shortened into opcodes that compare
// elimination recognizes.
if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
// We eliminate comparisons here rather than earlier because some
// transformations can change the set of available CC values and we
@@ -310,7 +311,7 @@ void SystemZPassConfig::addPreEmitPass() {
// between the comparison and the branch, but it isn't clear whether
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
+ addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
// Do final scheduling after all other optimizations, to get an
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
new file mode 100644
index 000000000000..a610a90d2069
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
@@ -0,0 +1,55 @@
+//=- SystemZTargetStreamer.h - SystemZ Target Streamer ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class SystemZTargetStreamer : public MCTargetStreamer {
+public:
+ SystemZTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+
+ typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair;
+ struct CmpMCInst {
+ bool operator()(const MCInstSTIPair &MCI_STI_A,
+ const MCInstSTIPair &MCI_STI_B) const {
+ if (MCI_STI_A.second != MCI_STI_B.second)
+ return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second);
+ const MCInst &A = MCI_STI_A.first;
+ const MCInst &B = MCI_STI_B.first;
+ assert(A.getNumOperands() == B.getNumOperands() &&
+ A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 &&
+ B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst");
+ if (A.getOpcode() != B.getOpcode())
+ return A.getOpcode() < B.getOpcode();
+ if (A.getOperand(0).getReg() != B.getOperand(0).getReg())
+ return A.getOperand(0).getReg() < B.getOperand(0).getReg();
+ if (A.getOperand(1).getImm() != B.getOperand(1).getImm())
+ return A.getOperand(1).getImm() < B.getOperand(1).getImm();
+ if (A.getOperand(3).getReg() != B.getOperand(3).getReg())
+ return A.getOperand(3).getReg() < B.getOperand(3).getReg();
+ if (A.getOperand(4).getImm() != B.getOperand(4).getImm())
+ return A.getOperand(4).getImm() < B.getOperand(4).getImm();
+ return false;
+ }
+ };
+ typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap;
+ EXRLT2SymMap EXRLTargets2Sym;
+
+ void emitConstantPools() override;
+
+ virtual void emitMachine(StringRef CPU) = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 03c4da8495ab..6d66ebfced05 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -243,7 +243,8 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
}
void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
bool HasCall = false;
@@ -423,8 +424,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
(C->getType()->isVectorTy()
? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
: dyn_cast<const ConstantInt>(C));
- if (CVal != nullptr &&
- (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+ if (CVal && (CVal->getValue().isPowerOf2() ||
+ CVal->getValue().isNegatedPowerOf2()))
DivRemConstPow2 = true;
else
DivRemConst = true;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 51cf557ae99b..db4ec794b3e4 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -52,7 +52,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -82,8 +83,7 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -115,8 +115,7 @@ public:
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
diff --git a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 36291e079882..acfafd91bc17 100644
--- a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/SystemZTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 0a655a82b889..390457dbb2bc 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -101,46 +101,41 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
// dso_preemptable. At this point in time, the various IR producers
// have not been transitioned to always produce a dso_local when it
// is possible to do so.
- // In the case of ExternalSymbolSDNode, GV is null and we should just return
- // false. However, COFF currently relies on this to be true
//
// As a result we still have some logic in here to improve the quality of the
// generated code.
- // FIXME: Add a module level metadata for whether intrinsics should be assumed
- // local.
if (!GV)
- return TT.isOSBinFormatCOFF();
+ return false;
// If the IR producer requested that this GV be treated as dso local, obey.
if (GV->isDSOLocal())
return true;
- // DLLImport explicitly marks the GV as external.
- if (GV->hasDLLImportStorageClass())
- return false;
-
- // On MinGW, variables that haven't been declared with DLLImport may still
- // end up automatically imported by the linker. To make this feasible,
- // don't assume the variables to be DSO local unless we actually know
- // that for sure. This only has to be done for variables; for functions
- // the linker can insert thunks for calling functions from another DLL.
- if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() &&
- GV->isDeclarationForLinker() && isa<GlobalVariable>(GV))
- return false;
-
- // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
- // remain unresolved in the link, they can be resolved to zero, which is
- // outside the current DSO.
- if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage())
- return false;
+ if (TT.isOSBinFormatCOFF()) {
+ // DLLImport explicitly marks the GV as external.
+ if (GV->hasDLLImportStorageClass())
+ return false;
+
+ // On MinGW, variables that haven't been declared with DLLImport may still
+ // end up automatically imported by the linker. To make this feasible,
+ // don't assume the variables to be DSO local unless we actually know
+ // that for sure. This only has to be done for variables; for functions
+ // the linker can insert thunks for calling functions from another DLL.
+ if (TT.isWindowsGNUEnvironment() && GV->isDeclarationForLinker() &&
+ isa<GlobalVariable>(GV))
+ return false;
+
+ // Don't mark 'extern_weak' symbols as DSO local. If these symbols remain
+ // unresolved in the link, they can be resolved to zero, which is outside
+ // the current DSO.
+ if (GV->hasExternalWeakLinkage())
+ return false;
+
+ // Every other GV is local on COFF.
+ return true;
+ }
- // Every other GV is local on COFF.
- // Make an exception for windows OS in the triple: Some firmware builds use
- // *-win32-macho triples. This (accidentally?) produced windows relocations
- // without GOT tables in older clang versions; Keep this behaviour.
- // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables
- // either.
- if (TT.isOSBinFormatCOFF() || TT.isOSWindows())
+ if (TT.isOSBinFormatGOFF())
return true;
if (TT.isOSBinFormatMachO()) {
@@ -149,13 +144,8 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return GV->isStrongDefinitionForLinker();
}
- // Due to the AIX linkage model, any global with default visibility is
- // considered non-local.
- if (TT.isOSBinFormatXCOFF())
- return false;
-
- assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
- assert(RM != Reloc::DynamicNoPIC);
+ assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm() ||
+ TT.isOSBinFormatXCOFF());
return false;
}
diff --git a/llvm/lib/Target/TargetMachineC.cpp b/llvm/lib/Target/TargetMachineC.cpp
index 60fe84cadacc..55047a1bb3cd 100644
--- a/llvm/lib/Target/TargetMachineC.cpp
+++ b/llvm/lib/Target/TargetMachineC.cpp
@@ -18,10 +18,10 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/CodeGenCWrappers.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index a3309a68c76d..7e92e4b33812 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -25,7 +25,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
diff --git a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
index 20d609bc6b32..72c40cbe78c4 100644
--- a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
+++ b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -18,7 +18,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
index 9a6ae90b5c73..29c209934680 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -15,8 +15,8 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -164,7 +164,8 @@ public:
llvm_unreachable("relaxInstruction() should not be called");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
if ((Count % 8) != 0)
return false;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
index 76824335239b..9f29fc092c69 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
@@ -37,5 +37,4 @@ VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) {
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
- UseIntegratedAssembler = false;
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index 4c480c050274..f4fbf763e59c 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index a95a299def88..7c4bf1cfd672 100644
--- a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/VETargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 08a75b6b8c55..af69d04a17ca 100644
--- a/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -30,7 +30,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index b297e0fcd1a2..32315543826a 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -2508,13 +2508,12 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) {
case ISD::CopyToReg:
// Check all use of selections, bit operations, and copies. If all of them
// are safe, optimize truncate to extract_subreg.
- for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
- UI != UE; ++UI) {
- switch ((*UI)->getOpcode()) {
+ for (const SDNode *U : User->uses()) {
+ switch (U->getOpcode()) {
default:
// If the use is an instruction which treats the source operand as i32,
// it is safe to avoid truncate here.
- if (isI32Insn(*UI, N))
+ if (isI32Insn(U, N))
continue;
break;
case ISD::ANY_EXTEND:
@@ -2561,10 +2560,7 @@ SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
return SDValue();
// Check all use of this TRUNCATE.
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
- ++UI) {
- SDNode *User = *UI;
-
+ for (const SDNode *User : N->uses()) {
// Make sure that we're not going to replace TRUNCATE for non i32
// instructions.
//
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 9770052ff913..ddcfb9da8249 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -20,10 +20,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#define DEBUG_TYPE "ve-instr-info"
@@ -99,7 +99,7 @@ static bool isUncondBranchOpcode(int Opc) {
#define BRKIND(NAME) (Opc == NAME##a || Opc == NAME##a_nt || Opc == NAME##a_t)
// VE has other branch relative always instructions for word/double/float,
- // but we use only long branches in our lower. So, sanity check it here.
+ // but we use only long branches in our lower. So, check it here.
assert(!BRKIND(BRCFW) && !BRKIND(BRCFD) && !BRKIND(BRCFS) &&
"Branch relative word/double/float always instructions should not be "
"used!");
@@ -127,7 +127,7 @@ static bool isIndirectBranchOpcode(int Opc) {
#define BRKIND(NAME) \
(Opc == NAME##ari || Opc == NAME##ari_nt || Opc == NAME##ari_t)
// VE has other branch always instructions for word/double/float, but
- // we use only long branches in our lower. So, sanity check it here.
+ // we use only long branches in our lower. So, check it here.
assert(!BRKIND(BCFW) && !BRKIND(BCFD) && !BRKIND(BCFS) &&
"Branch word/double/float always instructions should not be used!");
return BRKIND(BCFL);
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 2f77daae7130..c3abbe2cafab 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -634,9 +634,7 @@ multiclass RRIm<string opcStr, bits<8>opc,
// Special RR multiclass for 128 bits shift left instruction.
// e.g. SLD
let Constraints = "$hi = $sx", DisableEncoding = "$hi", hasSideEffects = 0 in
-multiclass RRILDm<string opcStr, bits<8>opc,
- RegisterClass RC, ValueType Ty,
- SDPatternOperator OpNode = null_frag> {
+multiclass RRILDm<string opcStr, bits<8>opc, RegisterClass RC> {
def rrr : RR<opc, (outs RC:$sx), (ins RC:$hi, RC:$sz, I32:$sy),
!strconcat(opcStr, " $sx, $sz, $sy")>;
let cz = 0 in
@@ -653,9 +651,7 @@ multiclass RRILDm<string opcStr, bits<8>opc,
// Special RR multiclass for 128 bits shift right instruction.
// e.g. SRD
let Constraints = "$low = $sx", DisableEncoding = "$low", hasSideEffects = 0 in
-multiclass RRIRDm<string opcStr, bits<8>opc,
- RegisterClass RC, ValueType Ty,
- SDPatternOperator OpNode = null_frag> {
+multiclass RRIRDm<string opcStr, bits<8>opc, RegisterClass RC> {
def rrr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$low, I32:$sy),
!strconcat(opcStr, " $sx, $sz, $sy")>;
let cz = 0 in
@@ -685,7 +681,7 @@ multiclass RRI1m<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
// Special RR multiclass for MRG instruction.
// e.g. MRG
let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0 in
-multiclass RRMRGm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+multiclass RRMRGm<string opcStr, bits<8>opc, RegisterClass RC> {
def rr : RR<opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, RC:$sd),
!strconcat(opcStr, " $sx, $sy, $sz")>;
let cy = 0 in
@@ -719,7 +715,7 @@ multiclass RRSWPm<string opcStr, bits<8>opc,
// e.g. CMOVL, CMOVW, CMOVD, and etc.
let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0,
cfw = ? in
-multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC> {
def rr : RR<opc, (outs I64:$sx), (ins CCOp:$cfw, RC:$sy, I64:$sz, I64:$sd),
!strconcat(opcStr, " $sx, $sz, $sy")>;
let cy = 0 in
@@ -740,8 +736,8 @@ multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
// e.g. CVTWDSX, CVTWDZX, CVTWSSX, and etc.
// sz{3-0} = rounding mode
let cz = 0, hasSideEffects = 0 in
-multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
- RegisterClass RCi, ValueType Tyi> {
+multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo,
+ RegisterClass RCi> {
def r : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, RCi:$sy),
!strconcat(opcStr, "${rd} $sx, $sy")> {
bits<4> rd;
@@ -1265,7 +1261,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
// Section 8.5.6 - MRG (Merge)
-defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
+defm MRG : RRMRGm<"mrg", 0x56, I64>;
// Section 8.5.7 - LDZ (Leading Zero Count)
def ctlz_pat : PatFrags<(ops node:$src),
@@ -1297,10 +1293,10 @@ def : Pat<(i32 (bswap (i32 mimm:$src))),
(EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
// Section 8.5.11 - CMOV (Conditional Move)
-let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
-let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
-let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64>;
-let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32>;
+let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64>;
+let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32>;
+let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64>;
+let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32>;
def : MnemonicAlias<"cmov.l", "cmov.l.at">;
def : MnemonicAlias<"cmov.w", "cmov.w.at">;
def : MnemonicAlias<"cmov.d", "cmov.d.at">;
@@ -1315,14 +1311,14 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
// Section 8.6.2 - SLD (Shift Left Double)
-defm SLD : RRILDm<"sld", 0x64, I64, i64>;
+defm SLD : RRILDm<"sld", 0x64, I64>;
// Section 8.6.3 - SRL (Shift Right Logical)
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
// Section 8.6.4 - SRD (Shift Right Double)
-defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
+defm SRD : RRIRDm<"srd", 0x74, I64>;
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1405,16 +1401,16 @@ defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
// Section 8.7.11 - FIX (Convert to Fixed Point)
// cx: double/float, cw: sx/zx, sz{0-3} = round
let cx = 0, cw = 0 /* sign extend */ in
-defm CVTWDSX : CVTRDm<"cvt.w.d.sx", 0x4E, I32, i32, I64, f64>;
+defm CVTWDSX : CVTRDm<"cvt.w.d.sx", 0x4E, I32, I64>;
let cx = 0, cw = 1 /* zero extend */ in
-defm CVTWDZX : CVTRDm<"cvt.w.d.zx", 0x4E, I32, i32, I64, f64>;
+defm CVTWDZX : CVTRDm<"cvt.w.d.zx", 0x4E, I32, I64>;
let cx = 1, cw = 0 /* sign extend */ in
-defm CVTWSSX : CVTRDm<"cvt.w.s.sx", 0x4E, I32, i32, F32, f32>;
+defm CVTWSSX : CVTRDm<"cvt.w.s.sx", 0x4E, I32, F32>;
let cx = 1, cw = 1 /* zero extend */ in
-defm CVTWSZX : CVTRDm<"cvt.w.s.zx", 0x4E, I32, i32, F32, f32>;
+defm CVTWSZX : CVTRDm<"cvt.w.s.zx", 0x4E, I32, F32>;
// Section 8.7.12 - FIXX (Convert to Fixed Point)
-defm CVTLD : CVTRDm<"cvt.l.d", 0x4F, I64, i64, I64, f64>;
+defm CVTLD : CVTRDm<"cvt.l.d", 0x4F, I64, I64>;
// Section 8.7.13 - FLT (Convert to Floating Point)
defm CVTDW : CVTm<"cvt.d.w", 0x5E, I64, f64, I32, i32, sint_to_fp>;
@@ -1836,7 +1832,7 @@ multiclass ZXATMLDm<SDPatternOperator from, int VAL,
def : Pat<(i64 (and (anyext (from ADDRzii:$addr)), VAL)),
(i2l (tozii MEMzii:$addr))>;
}
-multiclass ZXATMLD32m<SDPatternOperator from, int VAL,
+multiclass ZXATMLD32m<SDPatternOperator from,
RM torri, RM torii,
RM tozri, RM tozii> {
def : Pat<(i64 (zext (from ADDRrri:$addr))),
@@ -1852,8 +1848,7 @@ defm : ZXATMLDm<atomic_load_8, 0xFF, LD1BZXrri, LD1BZXrii, LD1BZXzri,
LD1BZXzii>;
defm : ZXATMLDm<atomic_load_16, 0xFFFF, LD2BZXrri, LD2BZXrii, LD2BZXzri,
LD2BZXzii>;
-defm : ZXATMLD32m<atomic_load_32, 0xFFFFFFFF, LDLZXrri, LDLZXrii, LDLZXzri,
- LDLZXzii>;
+defm : ZXATMLD32m<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
// Atomic stores
multiclass ATMSTm<SDPatternOperator from, ValueType ty,
@@ -1871,7 +1866,6 @@ defm : ATMSTm<atomic_store_64, i64, STrri, STrii, STzri, STzii>;
// Optimized atomic stores with truncate
multiclass TRATMSTm<SDPatternOperator from,
- ValueType ty,
RM torri,
RM torii,
RM tozri,
@@ -1885,9 +1879,9 @@ multiclass TRATMSTm<SDPatternOperator from,
def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))),
(tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
}
-defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
-defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
-defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+defm : TRATMSTm<atomic_store_8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRATMSTm<atomic_store_16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRATMSTm<atomic_store_32, STLrri, STLrii, STLzri, STLzii>;
// Atomic swaps
def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)),
diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp
index daa6cfb8aa84..78ac742ebf52 100644
--- a/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/llvm/lib/Target/VE/VESubtarget.cpp
@@ -12,8 +12,8 @@
#include "VESubtarget.h"
#include "VE.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp
index 414ae09431c0..9f294f15da91 100644
--- a/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index 7003fb387670..ac03e0bf627e 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -20,8 +20,7 @@ include "VVPInstrInfo.td"
multiclass VectorBinaryArith<
SDPatternOperator OpNode,
ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
- string OpBaseName,
- SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ string OpBaseName> {
// No mask.
def : Pat<(OpNode
(any_broadcast ScalarVT:$sx),
@@ -56,10 +55,10 @@ multiclass VectorBinaryArith_ShortLong<
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
defm : VectorBinaryArith<OpNode,
LongScalarVT, LongDataVT, v256i1,
- LongOpBaseName, simm7, LO7>;
+ LongOpBaseName>;
defm : VectorBinaryArith<OpNode,
ShortScalarVT, ShortDataVT, v256i1,
- ShortOpBaseName, simm7, LO7>;
+ ShortOpBaseName>;
}
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index eb1dd879941a..7d1e6c553f81 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -31,9 +31,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -431,10 +431,10 @@ public:
bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
// FIXME: there is probably a cleaner way to do this.
- auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
- InstName.find(".store") != StringRef::npos ||
- InstName.find("prefetch") != StringRef::npos;
- auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
+ auto IsLoadStore = InstName.contains(".load") ||
+ InstName.contains(".store") ||
+ InstName.contains("prefetch");
+ auto IsAtomic = InstName.contains("atomic.");
if (IsLoadStore || IsAtomic) {
// Parse load/store operands of the form: offset:p2align=align
if (IsLoadStore && isNext(AsmToken::Colon)) {
@@ -450,7 +450,7 @@ public:
// v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
// index. We need to avoid parsing an extra alignment operand for the
// lane index.
- auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos;
+ auto IsLoadStoreLane = InstName.contains("_lane");
if (IsLoadStoreLane && Operands.size() == 4)
return false;
// Alignment not specified (or atomics, must use default alignment).
@@ -1114,6 +1114,8 @@ public:
void onEndOfFunction(SMLoc ErrorLoc) {
TC.endOfFunction(ErrorLoc);
+ // Reset the type checker state.
+ TC.Clear();
// Automatically output a .size directive, so it becomes optional for the
// user.
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index 2f9245a7c66c..a6b5d4252f2f 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -31,10 +31,10 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -74,6 +74,9 @@ bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) {
// which are mostly not helpful.
if (TypeErrorThisFunction)
return true;
+ // If we're currently in unreachable code, we surpress errors as well.
+ if (Unreachable)
+ return true;
TypeErrorThisFunction = true;
dumpTypeStack("current stack: ");
return Parser.Error(ErrorLoc, Msg);
@@ -89,8 +92,7 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc,
: StringRef(
"empty stack while popping value"));
}
- auto PVT = Stack.back();
- Stack.pop_back();
+ auto PVT = Stack.pop_back_val();
if (EVT.hasValue() && EVT.getValue() != PVT) {
return typeError(
ErrorLoc, StringRef("popped ") + WebAssembly::typeToString(PVT) +
@@ -155,8 +157,12 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
break;
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_DATA:
- if (SymRef->getKind() == MCSymbolRefExpr::VK_GOT) {
+ switch (SymRef->getKind()) {
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
Type = is64 ? wasm::ValType::I64 : wasm::ValType::I32;
+ return false;
+ default:
break;
}
LLVM_FALLTHROUGH;
@@ -167,17 +173,18 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
return false;
}
-void WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) {
+bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) {
// Check the return types.
for (auto RVT : llvm::reverse(ReturnTypes)) {
- popType(ErrorLoc, RVT);
+ if (popType(ErrorLoc, RVT))
+ return true;
}
if (!Stack.empty()) {
- typeError(ErrorLoc,
- std::to_string(Stack.size()) + " superfluous return values");
+ return typeError(ErrorLoc, std::to_string(Stack.size()) +
+ " superfluous return values");
}
- // Reset the type checker state.
- Clear();
+ Unreachable = true;
+ return false;
}
bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
@@ -213,13 +220,20 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
if (popType(ErrorLoc, {}))
return true;
} else if (Name == "end_block" || Name == "end_loop" || Name == "end_if" ||
- Name == "else") {
+ Name == "else" || Name == "end_try") {
if (checkEnd(ErrorLoc))
return true;
+ if (Name == "end_block")
+ Unreachable = false;
+ } else if (Name == "return") {
+ if (endOfFunction(ErrorLoc))
+ return true;
} else if (Name == "call_indirect" || Name == "return_call_indirect") {
// Function value.
if (popType(ErrorLoc, wasm::ValType::I32)) return true;
if (checkSig(ErrorLoc, LastSig)) return true;
+ if (Name == "return_call_indirect" && endOfFunction(ErrorLoc))
+ return true;
} else if (Name == "call" || Name == "return_call") {
const MCSymbolRefExpr *SymRef;
if (getSymRef(ErrorLoc, Inst, SymRef))
@@ -230,9 +244,25 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
" missing .functype");
if (checkSig(ErrorLoc, *Sig)) return true;
+ if (Name == "return_call" && endOfFunction(ErrorLoc))
+ return true;
+ } else if (Name == "catch") {
+ const MCSymbolRefExpr *SymRef;
+ if (getSymRef(ErrorLoc, Inst, SymRef))
+ return true;
+ const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ const auto *Sig = WasmSym->getSignature();
+ if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_TAG)
+ return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
+ " missing .tagtype");
+ // catch instruction pushes values whose types are specified in the tag's
+ // "params" part
+ Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end());
} else if (Name == "ref.null") {
auto VT = static_cast<wasm::ValType>(Inst.getOperand(0).getImm());
Stack.push_back(VT);
+ } else if (Name == "unreachable") {
+ Unreachable = true;
} else {
// The current instruction is a stack instruction which doesn't have
// explicit operands that indicate push/pop types, so we get those from
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
index a15a69b50418..aa35213ccca3 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
@@ -32,15 +32,9 @@ class WebAssemblyAsmTypeCheck final {
SmallVector<wasm::ValType, 4> ReturnTypes;
wasm::WasmSignature LastSig;
bool TypeErrorThisFunction = false;
+ bool Unreachable = false;
bool is64;
- void Clear() {
- Stack.clear();
- LocalTypes.clear();
- ReturnTypes.clear();
- TypeErrorThisFunction = false;
- }
-
void dumpTypeStack(Twine Msg);
bool typeError(SMLoc ErrorLoc, const Twine &Msg);
bool popType(SMLoc ErrorLoc, Optional<wasm::ValType> EVT);
@@ -57,8 +51,16 @@ public:
void funcDecl(const wasm::WasmSignature &Sig);
void localDecl(const SmallVector<wasm::ValType, 4> &Locals);
void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; }
- void endOfFunction(SMLoc ErrorLoc);
+ bool endOfFunction(SMLoc ErrorLoc);
bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst);
+
+ void Clear() {
+ Stack.clear();
+ LocalTypes.clear();
+ ReturnTypes.clear();
+ TypeErrorThisFunction = false;
+ Unreachable = false;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 6770ccc9df6a..2e1e4f061219 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -24,9 +24,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index d88311197c1a..85bb52c03e80 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -59,7 +59,8 @@ public:
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
const MCFixupKindInfo &
@@ -83,8 +84,8 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
return Infos[Kind - FirstTargetFixupKind];
}
-bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS,
- uint64_t Count) const {
+bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
for (uint64_t I = 0; I < Count; ++I)
OS << char(WebAssembly::Nop);
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 31cccb24d798..8f670ec88897 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -19,8 +19,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-mc-target-desc"
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 99defb42e380..d07bfce9abc1 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -95,6 +95,9 @@ enum TOF {
// platforms.
MO_GOT,
+ // Same as MO_GOT but the address stored in the global is a TLS address.
+ MO_GOT_TLS,
+
// On a symbol operand this indicates that the immediate is the symbol
// address relative the __memory_base wasm global.
// Only applicable to data symbols.
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index f67fab946746..405712906c40 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -74,6 +74,7 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(
switch (Modifier) {
case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
return wasm::R_WASM_GLOBAL_INDEX_LEB;
case MCSymbolRefExpr::VK_WASM_TBREL:
assert(SymA.isFunction());
@@ -88,7 +89,10 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(
: wasm::R_WASM_MEMORY_ADDR_REL_SLEB;
case MCSymbolRefExpr::VK_WASM_TYPEINDEX:
return wasm::R_WASM_TYPE_INDEX_LEB;
+ case MCSymbolRefExpr::VK_None:
+ break;
default:
+ report_fatal_error("unknown VariantKind");
break;
}
diff --git a/llvm/lib/Target/WebAssembly/README.txt b/llvm/lib/Target/WebAssembly/README.txt
index 934a3ba3bc4a..ab1cd8f0f84a 100644
--- a/llvm/lib/Target/WebAssembly/README.txt
+++ b/llvm/lib/Target/WebAssembly/README.txt
@@ -2,11 +2,11 @@
The object format emitted by the WebAssembly backed is documented in:
- * https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md
The C ABI is described in:
- * https://github.com/WebAssembly/tool-conventions/blob/master/BasicCABI.md
+ * https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md
For more information on WebAssembly itself, see the home page:
@@ -31,8 +31,8 @@ For more information, see:
The following documents contain some information on the semantics and binary
encoding of WebAssembly itself:
- * https://github.com/WebAssembly/design/blob/master/Semantics.md
- * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+ * https://github.com/WebAssembly/design/blob/main/Semantics.md
+ * https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
Some notes on ways that the generated code could be improved follow:
diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index f9a96819905f..e3daf6bfa72e 100644
--- a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/WebAssemblyTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-target-info"
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index 673dc9521ced..f6e96d9b2877 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -15,6 +15,8 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
+#include "llvm/IR/DerivedTypes.h"
+
namespace llvm {
class MachineBasicBlock;
@@ -35,18 +37,35 @@ enum WasmAddressSpace : unsigned {
// linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
// to these pointers are lowered to global.get / global.set or local.get /
// local.set, as appropriate.
- WASM_ADDRESS_SPACE_WASM_VAR = 1
+ WASM_ADDRESS_SPACE_VAR = 1,
+ // A non-integral address space for externref values
+ WASM_ADDRESS_SPACE_EXTERNREF = 10,
+ // A non-integral address space for funcref values
+ WASM_ADDRESS_SPACE_FUNCREF = 20,
};
inline bool isDefaultAddressSpace(unsigned AS) {
return AS == WASM_ADDRESS_SPACE_DEFAULT;
}
inline bool isWasmVarAddressSpace(unsigned AS) {
- return AS == WASM_ADDRESS_SPACE_WASM_VAR;
+ return AS == WASM_ADDRESS_SPACE_VAR;
}
inline bool isValidAddressSpace(unsigned AS) {
return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
}
+inline bool isFuncrefType(const Type *Ty) {
+ return isa<PointerType>(Ty) &&
+ Ty->getPointerAddressSpace() ==
+ WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
+}
+inline bool isExternrefType(const Type *Ty) {
+ return isa<PointerType>(Ty) &&
+ Ty->getPointerAddressSpace() ==
+ WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
+}
+inline bool isRefType(const Type *Ty) {
+ return isFuncrefType(Ty) || isExternrefType(Ty);
+}
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
bool mayThrow(const MachineInstr &MI);
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h
index 9eb960d018d3..803786e0c9c2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -25,12 +25,12 @@ class ModulePass;
class FunctionPass;
// LLVM IR passes.
-ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH,
- bool EnableSjLj);
+ModulePass *createWebAssemblyLowerEmscriptenEHSjLj();
ModulePass *createWebAssemblyLowerGlobalDtors();
ModulePass *createWebAssemblyAddMissingPrototypes();
ModulePass *createWebAssemblyFixFunctionBitcasts();
FunctionPass *createWebAssemblyOptimizeReturned();
+FunctionPass *createWebAssemblyLowerRefTypesIntPtrConv();
// ISel and immediate followup passes.
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
@@ -56,7 +56,7 @@ FunctionPass *createWebAssemblyLowerBrUnless();
FunctionPass *createWebAssemblyRegNumbering();
FunctionPass *createWebAssemblyDebugFixup();
FunctionPass *createWebAssemblyPeephole();
-FunctionPass *createWebAssemblyMCLowerPrePass();
+ModulePass *createWebAssemblyMCLowerPrePass();
// PassRegistry initialization declarations.
void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &);
@@ -85,6 +85,7 @@ void initializeWebAssemblyRegNumberingPass(PassRegistry &);
void initializeWebAssemblyDebugFixupPass(PassRegistry &);
void initializeWebAssemblyPeepholePass(PassRegistry &);
void initializeWebAssemblyMCLowerPrePassPass(PassRegistry &);
+void initializeWebAssemblyLowerRefTypesIntPtrConvPass(PassRegistry &);
namespace WebAssembly {
enum TargetIndex {
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index c1872dd91c58..a529c6217189 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -25,6 +25,9 @@ include "llvm/Target/Target.td"
def FeatureSIMD128 : SubtargetFeature<"simd128", "SIMDLevel", "SIMD128",
"Enable 128-bit SIMD">;
+def FeatureRelaxedSIMD : SubtargetFeature<"relaxed-simd", "SIMDLevel", "RelaxedSIMD",
+ "Enable relaxed-simd instructions">;
+
def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
"Enable Atomics">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index 530a55cda0e5..90e819912847 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -86,27 +86,37 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
F.getName());
}
- // Create a function prototype based on the first call site (first bitcast)
- // that we find.
+ // Find calls of this function, looking through bitcasts.
+ SmallVector<CallBase *> Calls;
+ SmallVector<Value *> Worklist;
+ Worklist.push_back(&F);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ for (User *U : V->users()) {
+ if (auto *BC = dyn_cast<BitCastOperator>(U))
+ Worklist.push_back(BC);
+ else if (auto *CB = dyn_cast<CallBase>(U))
+ if (CB->getCalledOperand() == V)
+ Calls.push_back(CB);
+ }
+ }
+
+ // Create a function prototype based on the first call site that we find.
FunctionType *NewType = nullptr;
- for (Use &U : F.uses()) {
- LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
- LLVM_DEBUG(dbgs() << *U.getUser() << "\n");
- if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
- if (auto *DestType = dyn_cast<FunctionType>(
- BC->getDestTy()->getPointerElementType())) {
- if (!NewType) {
- // Create a new function with the correct type
- NewType = DestType;
- LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n");
- } else if (NewType != DestType) {
- errs() << "warning: prototype-less function used with "
- "conflicting signatures: "
- << F.getName() << "\n";
- LLVM_DEBUG(dbgs() << " " << *DestType << "\n");
- LLVM_DEBUG(dbgs() << " "<< *NewType << "\n");
- }
- }
+ for (CallBase *CB : Calls) {
+ LLVM_DEBUG(dbgs() << "prototype-less call of " << F.getName() << ":\n");
+ LLVM_DEBUG(dbgs() << *CB << "\n");
+ FunctionType *DestType = CB->getFunctionType();
+ if (!NewType) {
+ // Create a new function with the correct type
+ NewType = DestType;
+ LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n");
+ } else if (NewType != DestType) {
+ errs() << "warning: prototype-less function used with "
+ "conflicting signatures: "
+ << F.getName() << "\n";
+ LLVM_DEBUG(dbgs() << " " << *DestType << "\n");
+ LLVM_DEBUG(dbgs() << " " << *NewType << "\n");
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 56829eb45e21..0d3f51693261 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -42,8 +42,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -51,8 +51,8 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
extern cl::opt<bool> WasmKeepRegisters;
-extern cl::opt<bool> EnableEmException;
-extern cl::opt<bool> EnableEmSjLj;
+extern cl::opt<bool> WasmEnableEmEH;
+extern cl::opt<bool> WasmEnableEmSjLj;
//===----------------------------------------------------------------------===//
// Helpers.
@@ -161,7 +161,7 @@ MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction(
"Emscripten EH/SjLj does not support multivalue returns: " +
std::string(F->getName()) + ": " +
WebAssembly::signatureToString(Sig);
- report_fatal_error(Msg);
+ report_fatal_error(Twine(Msg));
}
WasmSym = cast<MCSymbolWasm>(
GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig)));
@@ -234,26 +234,32 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
return WasmSym;
}
+ if (Name.startswith("GCC_except_table")) {
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
+ return WasmSym;
+ }
+
SmallVector<wasm::ValType, 4> Returns;
SmallVector<wasm::ValType, 4> Params;
- if (Name == "__cpp_exception") {
+ if (Name == "__cpp_exception" || Name == "__c_longjmp") {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TAG);
- // We can't confirm its signature index for now because there can be
- // imported exceptions. Set it to be 0 for now.
- WasmSym->setTagType(
- {wasm::WASM_TAG_ATTRIBUTE_EXCEPTION, /* SigIndex */ 0});
- // We may have multiple C++ compilation units to be linked together, each of
- // which defines the exception symbol. To resolve them, we declare them as
- // weak.
- WasmSym->setWeak(true);
+ // In static linking we define tag symbols in WasmException::endModule().
+ // But we may have multiple objects to be linked together, each of which
+ // defines the tag symbols. To resolve them, we declare them as weak. In
+ // dynamic linking we make tag symbols undefined in the backend, define it
+ // in JS, and feed them to each importing module.
+ if (!isPositionIndependent())
+ WasmSym->setWeak(true);
WasmSym->setExternal(true);
- // All C++ exceptions are assumed to have a single i32 (for wasm32) or i64
- // (for wasm64) param type and void return type. The reaon is, all C++
- // exception values are pointers, and to share the type section with
- // functions, exceptions are assumed to have void return type.
- Params.push_back(Subtarget.hasAddr64() ? wasm::ValType::I64
- : wasm::ValType::I32);
+ // Currently both C++ exceptions and C longjmps have a single pointer type
+ // param. For C++ exceptions it is a pointer to an exception object, and for
+ // C longjmps it is pointer to a struct that contains a setjmp buffer and a
+ // longjmp return value. We may consider using multiple value parameters for
+ // longjmps later when multivalue support is ready.
+ wasm::ValType AddrType =
+ Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
+ Params.push_back(AddrType);
} else { // Function symbols
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
getLibcallSignature(Subtarget, Name, Returns, Params);
@@ -309,7 +315,7 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) {
// will discard it later if it turns out not to be necessary.
auto Signature = signatureFromMVTs(Results, Params);
bool InvokeDetected = false;
- auto *Sym = getMCSymbolForFunction(&F, EnableEmException || EnableEmSjLj,
+ auto *Sym = getMCSymbolForFunction(&F, WasmEnableEmEH || WasmEnableEmSjLj,
Signature.get(), InvokeDetected);
// Multiple functions can be mapped to the same invoke symbol. For
@@ -497,6 +503,15 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
// This pseudo-feature tells the linker whether shared memory would be safe
EmitFeature("shared-mem");
+ // This is an "architecture", not a "feature", but we emit it as such for
+ // the benefit of tools like Binaryen and consistency with other producers.
+ // FIXME: Subtarget is null here, so can't Subtarget->hasAddr64() ?
+ if (M.getDataLayout().getPointerSize() == 8) {
+ // Can't use EmitFeature since "wasm-feature-memory64" is not a module
+ // flag.
+ EmittedFeatures.push_back({wasm::WASM_FEATURE_PREFIX_USED, "memory64"});
+ }
+
if (EmittedFeatures.size() == 0)
return;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 59d69e48b775..7832f199a2cc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -173,7 +173,7 @@ static bool explicitlyBranchesTo(MachineBasicBlock *Pred,
// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
// contains instructions that should go before the marker, and AfterSet contains
// ones that should go after the marker. In this function, AfterSet is only
-// used for sanity checking.
+// used for validation checking.
template <typename Container>
static MachineBasicBlock::iterator
getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
@@ -182,7 +182,7 @@ getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
while (InsertPos != MBB->begin()) {
if (BeforeSet.count(&*std::prev(InsertPos))) {
#ifndef NDEBUG
- // Sanity check
+ // Validation check
for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos)
assert(!AfterSet.count(&*std::prev(Pos)));
#endif
@@ -197,7 +197,7 @@ getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
// contains instructions that should go before the marker, and AfterSet contains
// ones that should go after the marker. In this function, BeforeSet is only
-// used for sanity checking.
+// used for validation checking.
template <typename Container>
static MachineBasicBlock::iterator
getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
@@ -206,7 +206,7 @@ getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
while (InsertPos != MBB->end()) {
if (AfterSet.count(&*InsertPos)) {
#ifndef NDEBUG
- // Sanity check
+ // Validation check
for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos)
assert(!BeforeSet.count(&*Pos));
#endif
@@ -842,8 +842,7 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
// INST ..., TeeReg, ...
// INST ..., Reg, ...
// INST ..., Reg, ...
- for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!WebAssembly::isTee(MI.getOpcode()))
continue;
Register TeeReg = MI.getOperand(0).getReg();
@@ -1671,8 +1670,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
SmallVector<EndMarkerInfo, 8> Stack;
SmallVector<const MachineBasicBlock *, 8> EHPadStack;
for (auto &MBB : reverse(MF)) {
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
case WebAssembly::TRY:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 4a0738dc3b7a..910a4e5e0d1a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -252,8 +252,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
assert(!WebAssembly::isArgument(MI.getOpcode()));
if (MI.isDebugInstr() || MI.isLabel())
@@ -380,9 +379,14 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
- InsertPt =
- BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
- .addImm(LocalId);
+ // Use a InsertPt as our DebugLoc, since MI may be discontinuous from
+ // the where this local is being inserted, causing non-linear stepping
+ // in the debugger or function entry points where variables aren't live
+ // yet. Alternative is previous instruction, but that is strictly worse
+ // since it can point at the previous statement.
+ // See crbug.com/1251909, crbug.com/1249745
+ InsertPt = BuildMI(MBB, InsertPt, InsertPt->getDebugLoc(),
+ TII->get(Opc), NewReg).addImm(LocalId);
MO.setReg(NewReg);
MFI.stackifyVReg(MRI, NewReg);
Changed = true;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 171d59ae4c6b..642aa6b4028a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -157,7 +157,7 @@ private:
void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB,
MachineMemOperand *MMO);
unsigned maskI1Value(unsigned Reg, const Value *V);
- unsigned getRegForI1Value(const Value *V, bool &Not);
+ unsigned getRegForI1Value(const Value *V, const BasicBlock *BB, bool &Not);
unsigned zeroExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From);
unsigned signExtendToI32(unsigned Reg, const Value *V,
@@ -418,20 +418,17 @@ unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
return zeroExtendToI32(Reg, V, MVT::i1);
}
-unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
+unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V,
+ const BasicBlock *BB,
+ bool &Not) {
if (const auto *ICmp = dyn_cast<ICmpInst>(V))
if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
- if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
+ if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32) &&
+ ICmp->getParent() == BB) {
Not = ICmp->isTrueWhenEqual();
return getRegForValue(ICmp->getOperand(0));
}
- Value *NotV;
- if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
- Not = true;
- return getRegForValue(NotV);
- }
-
Not = false;
unsigned Reg = getRegForValue(V);
if (Reg == 0)
@@ -648,11 +645,11 @@ bool WebAssemblyFastISel::fastLowerArguments() {
unsigned I = 0;
for (auto const &Arg : F->args()) {
const AttributeList &Attrs = F->getAttributes();
- if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
- Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
- Attrs.hasParamAttribute(I, Attribute::Nest))
+ if (Attrs.hasParamAttr(I, Attribute::ByVal) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftError) ||
+ Attrs.hasParamAttr(I, Attribute::InAlloca) ||
+ Attrs.hasParamAttr(I, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
@@ -825,25 +822,25 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
}
SmallVector<unsigned, 8> Args;
- for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) {
+ for (unsigned I = 0, E = Call->arg_size(); I < E; ++I) {
Value *V = Call->getArgOperand(I);
MVT::SimpleValueType ArgTy = getSimpleType(V->getType());
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
const AttributeList &Attrs = Call->getAttributes();
- if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
- Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
- Attrs.hasParamAttribute(I, Attribute::Nest))
+ if (Attrs.hasParamAttr(I, Attribute::ByVal) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftError) ||
+ Attrs.hasParamAttr(I, Attribute::InAlloca) ||
+ Attrs.hasParamAttr(I, Attribute::Nest))
return false;
unsigned Reg;
- if (Attrs.hasParamAttribute(I, Attribute::SExt))
+ if (Attrs.hasParamAttr(I, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasParamAttribute(I, Attribute::ZExt))
+ else if (Attrs.hasParamAttr(I, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
@@ -912,7 +909,8 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
const auto *Select = cast<SelectInst>(I);
bool Not;
- unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
+ unsigned CondReg =
+ getRegForI1Value(Select->getCondition(), I->getParent(), Not);
if (CondReg == 0)
return false;
@@ -1312,7 +1310,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)];
bool Not;
- unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
+ unsigned CondReg = getRegForI1Value(Br->getCondition(), Br->getParent(), Not);
if (CondReg == 0)
return false;
@@ -1370,9 +1368,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
}
unsigned Reg;
- if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::SExt))
+ if (FuncInfo.Fn->getAttributes().hasRetAttr(Attribute::SExt))
Reg = getRegForSignedValue(RV);
- else if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::ZExt))
+ else if (FuncInfo.Fn->getAttributes().hasRetAttr(Attribute::ZExt))
Reg = getRegForUnsignedValue(RV);
else
Reg = getRegForValue(RV);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
index 52aa3534c78e..5bdec89f1125 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
@@ -61,9 +61,13 @@ void fixBrTableIndex(MachineInstr &MI, MachineBasicBlock *MBB,
auto ExtMI = MF.getRegInfo().getVRegDef(MI.getOperand(0).getReg());
if (ExtMI->getOpcode() == WebAssembly::I64_EXTEND_U_I32) {
// Unnecessarily extending a 32-bit value to 64, remove it.
- assert(MI.getOperand(0).getReg() == ExtMI->getOperand(0).getReg());
+ auto ExtDefReg = ExtMI->getOperand(0).getReg();
+ assert(MI.getOperand(0).getReg() == ExtDefReg);
MI.getOperand(0).setReg(ExtMI->getOperand(1).getReg());
- ExtMI->eraseFromParent();
+ if (MF.getRegInfo().use_nodbg_empty(ExtDefReg)) {
+ // No more users of extend, delete it.
+ ExtMI->eraseFromParent();
+ }
} else {
// Incoming 64-bit value that needs to be truncated.
Register Reg32 =
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 7abb6fa8905c..2a4349e02f1b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -64,29 +64,21 @@ ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
// Recursively descend the def-use lists from V to find non-bitcast users of
// bitcasts of V.
static void findUses(Value *V, Function &F,
- SmallVectorImpl<std::pair<Use *, Function *>> &Uses,
- SmallPtrSetImpl<Constant *> &ConstantBCs) {
- for (Use &U : V->uses()) {
- if (auto *BC = dyn_cast<BitCastOperator>(U.getUser()))
- findUses(BC, F, Uses, ConstantBCs);
- else if (auto *A = dyn_cast<GlobalAlias>(U.getUser()))
- findUses(A, F, Uses, ConstantBCs);
- else if (U.get()->getType() != F.getType()) {
- CallBase *CB = dyn_cast<CallBase>(U.getUser());
- if (!CB)
- // Skip uses that aren't immediately called
- continue;
+ SmallVectorImpl<std::pair<CallBase *, Function *>> &Uses) {
+ for (User *U : V->users()) {
+ if (auto *BC = dyn_cast<BitCastOperator>(U))
+ findUses(BC, F, Uses);
+ else if (auto *A = dyn_cast<GlobalAlias>(U))
+ findUses(A, F, Uses);
+ else if (auto *CB = dyn_cast<CallBase>(U)) {
Value *Callee = CB->getCalledOperand();
if (Callee != V)
// Skip calls where the function isn't the callee
continue;
- if (isa<Constant>(U.get())) {
- // Only add constant bitcasts to the list once; they get RAUW'd
- auto C = ConstantBCs.insert(cast<Constant>(U.get()));
- if (!C.second)
- continue;
- }
- Uses.push_back(std::make_pair(&U, &F));
+ if (CB->getFunctionType() == F.getValueType())
+ // Skip uses that are immediately called
+ continue;
+ Uses.push_back(std::make_pair(CB, &F));
}
}
}
@@ -238,8 +230,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
Function *Main = nullptr;
CallInst *CallMain = nullptr;
- SmallVector<std::pair<Use *, Function *>, 0> Uses;
- SmallPtrSet<Constant *, 2> ConstantBCs;
+ SmallVector<std::pair<CallBase *, Function *>, 0> Uses;
// Collect all the places that need wrappers.
for (Function &F : M) {
@@ -247,7 +238,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
// bitcast type difference for swiftself and swifterror.
if (F.getCallingConv() == CallingConv::Swift)
continue;
- findUses(&F, F, Uses, ConstantBCs);
+ findUses(&F, F, Uses);
// If we have a "main" function, and its type isn't
// "int main(int argc, char *argv[])", create an artificial call with it
@@ -268,8 +259,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
Value *Casted =
ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
CallMain = CallInst::Create(MainTy, Casted, Args, "call_main");
- Use *UseMain = &CallMain->getOperandUse(2);
- Uses.push_back(std::make_pair(UseMain, &F));
+ Uses.push_back(std::make_pair(CallMain, &F));
}
}
}
@@ -277,16 +267,9 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
for (auto &UseFunc : Uses) {
- Use *U = UseFunc.first;
+ CallBase *CB = UseFunc.first;
Function *F = UseFunc.second;
- auto *PTy = cast<PointerType>(U->get()->getType());
- auto *Ty = dyn_cast<FunctionType>(PTy->getElementType());
-
- // If the function is casted to something like i8* as a "generic pointer"
- // to be later casted to something else, we can't generate a wrapper for it.
- // Just ignore such casts for now.
- if (!Ty)
- continue;
+ FunctionType *Ty = CB->getFunctionType();
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
@@ -296,10 +279,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!Wrapper)
continue;
- if (isa<Constant>(U->get()))
- U->get()->replaceAllUsesWith(Wrapper);
- else
- U->set(Wrapper);
+ CB->setCalledOperand(Wrapper);
}
// If we created a wrapper for main, rename the wrapper so that it's the
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 21519d6135b7..1fa0ea3867c7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -21,9 +21,9 @@ HANDLE_NODETYPE(LOCAL_GET)
HANDLE_NODETYPE(LOCAL_SET)
// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol
HANDLE_NODETYPE(Wrapper)
-// A special wapper used in PIC code for __memory_base/__table_base relative
-// access.
-HANDLE_NODETYPE(WrapperPIC)
+// A special node for TargetGlobalAddress used in PIC code for
+// __memory_base/__table_base relative access.
+HANDLE_NODETYPE(WrapperREL)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
HANDLE_NODETYPE(SHUFFLE)
@@ -41,8 +41,6 @@ HANDLE_NODETYPE(PROMOTE_LOW)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_S)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_U)
HANDLE_NODETYPE(DEMOTE_ZERO)
-HANDLE_NODETYPE(THROW)
-HANDLE_NODETYPE(CATCH)
HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
@@ -50,4 +48,5 @@ HANDLE_NODETYPE(MEMORY_FILL)
HANDLE_MEM_NODETYPE(LOAD_SPLAT)
HANDLE_MEM_NODETYPE(GLOBAL_GET)
HANDLE_MEM_NODETYPE(GLOBAL_SET)
+HANDLE_MEM_NODETYPE(TABLE_GET)
HANDLE_MEM_NODETYPE(TABLE_SET)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index f4bae59132e6..7e75989d3def 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -17,6 +17,7 @@
#include "WebAssemblyTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/IR/IntrinsicsWebAssembly.h"
@@ -24,6 +25,7 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "wasm-isel"
@@ -48,32 +50,11 @@ public:
return "WebAssembly Instruction Selection";
}
- void checkForInvalidNodes(const Function &F) {
- // This function will check for uses of ptrtoint on reference types and
- // report a fatal error if these are found.
- for (const BasicBlock &BB : F) {
- for (const Instruction &I : BB) {
- if (const PtrToIntInst *PTI = dyn_cast<const PtrToIntInst>(&I)) {
- const Value *V = PTI->getPointerOperand();
- if (WebAssemblyTargetLowering::isFuncrefType(V->getType()) ||
- WebAssemblyTargetLowering::isExternrefType(V->getType()))
- report_fatal_error("ptrtoint not allowed on reference types");
- } else if (const IntToPtrInst *ITP = dyn_cast<const IntToPtrInst>(&I)) {
- if (WebAssemblyTargetLowering::isFuncrefType(ITP->getDestTy()) ||
- WebAssemblyTargetLowering::isExternrefType(ITP->getDestTy()))
- report_fatal_error("inttoptr not allowed on reference types");
- }
- }
- }
- }
-
bool runOnMachineFunction(MachineFunction &MF) override {
LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n"
"********** Function: "
<< MF.getName() << '\n');
- checkForInvalidNodes(MF.getFunction());
-
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
@@ -85,7 +66,6 @@ public:
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- bool SelectExternRefAddr(const SDValue &Addr, const SDValue &Base);
// Include the pieces autogenerated from the target description.
#include "WebAssemblyGenDAGISel.inc"
@@ -107,6 +87,17 @@ void WebAssemblyDAGToDAGISel::PreprocessISelDAG() {
SelectionDAGISel::PreprocessISelDAG();
}
+static SDValue getTagSymNode(int Tag, SelectionDAG *DAG) {
+ assert(Tag == WebAssembly::CPP_EXCEPTION || WebAssembly::C_LONGJMP);
+ auto &MF = DAG->getMachineFunction();
+ const auto &TLI = DAG->getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG->getDataLayout());
+ const char *SymName = Tag == WebAssembly::CPP_EXCEPTION
+ ? MF.createExternalSymbolName("__cpp_exception")
+ : MF.createExternalSymbolName("__c_longjmp");
+ return DAG->getTargetExternalSymbol(SymName, PtrVT);
+}
+
void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -127,8 +118,7 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
break;
- uint64_t SyncScopeID =
- cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+ uint64_t SyncScopeID = Node->getConstantOperandVal(2);
MachineSDNode *Fence = nullptr;
switch (SyncScopeID) {
case SyncScope::SingleThread:
@@ -162,7 +152,7 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
case Intrinsic::wasm_tls_size: {
MachineSDNode *TLSSize = CurDAG->getMachineNode(
@@ -171,6 +161,7 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, TLSSize);
return;
}
+
case Intrinsic::wasm_tls_align: {
MachineSDNode *TLSAlign = CurDAG->getMachineNode(
GlobalGetIns, DL, PtrVT,
@@ -181,8 +172,11 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ const auto &TLI = CurDAG->getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(CurDAG->getDataLayout());
switch (IntNo) {
case Intrinsic::wasm_tls_base: {
MachineSDNode *TLSBase = CurDAG->getMachineNode(
@@ -192,9 +186,48 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, TLSBase);
return;
}
+
+ case Intrinsic::wasm_catch: {
+ int Tag = Node->getConstantOperandVal(2);
+ SDValue SymNode = getTagSymNode(Tag, CurDAG);
+ MachineSDNode *Catch =
+ CurDAG->getMachineNode(WebAssembly::CATCH, DL,
+ {
+ PtrVT, // exception pointer
+ MVT::Other // outchain type
+ },
+ {
+ SymNode, // exception symbol
+ Node->getOperand(0) // inchain
+ });
+ ReplaceNode(Node, Catch);
+ return;
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ switch (IntNo) {
+ case Intrinsic::wasm_throw: {
+ int Tag = Node->getConstantOperandVal(2);
+ SDValue SymNode = getTagSymNode(Tag, CurDAG);
+ MachineSDNode *Throw =
+ CurDAG->getMachineNode(WebAssembly::THROW, DL,
+ MVT::Other, // outchain type
+ {
+ SymNode, // exception symbol
+ Node->getOperand(3), // thrown value
+ Node->getOperand(0) // inchain
+ });
+ ReplaceNode(Node, Throw);
+ return;
+ }
}
break;
}
+
case WebAssemblyISD::CALL:
case WebAssemblyISD::RET_CALL: {
// CALL has both variable operands and variable results, but ISel only
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 62c53c0051ae..0df8f3e0e09c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
@@ -33,6 +32,7 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
@@ -88,7 +88,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
}
}
if (Subtarget->hasReferenceTypes()) {
- for (auto T : {MVT::externref, MVT::funcref}) {
+ // We need custom load and store lowering for both externref, funcref and
+ // Other. The MVT::Other here represents tables of reference types.
+ for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::STORE, T, Custom);
}
@@ -213,8 +215,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::SELECT_CC, T, Expand);
// Expand integer operations supported for scalars but not SIMD
- for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
- ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
+ for (auto Op :
+ {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
@@ -223,8 +225,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Legal);
- // And we have popcnt for i8x16
+ // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
+
+ // Custom lower bit counting operations for other types to scalarize them.
+ for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
+ for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
+ setOperationAction(Op, T, Custom);
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
@@ -303,9 +312,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
}
- // And some truncating stores are legal as well
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
- setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
}
// Don't do anything clever with build_pairs
@@ -338,6 +345,24 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setMinimumJumpTableEntries(2);
}
+MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,
+ uint32_t AS) const {
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
+ return MVT::externref;
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
+ return MVT::funcref;
+ return TargetLowering::getPointerTy(DL, AS);
+}
+
+MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,
+ uint32_t AS) const {
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
+ return MVT::externref;
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
+ return MVT::funcref;
+ return TargetLowering::getPointerMemTy(DL, AS);
+}
+
TargetLowering::AtomicExpansionKind
WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// We have wasm instructions for these
@@ -551,7 +576,21 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
if (IsIndirect) {
auto FnPtr = CallParams.getOperand(0);
CallParams.RemoveOperand(0);
- CallParams.addOperand(FnPtr);
+
+ // For funcrefs, call_indirect is done through __funcref_call_table and the
+ // funcref is always installed in slot 0 of the table, therefore instead of having
+ // the function pointer added at the end of the params list, a zero (the index in
+ // __funcref_call_table is added).
+ if (IsFuncrefCall) {
+ Register RegZero =
+ MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+ MachineInstrBuilder MIBC0 =
+ BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
+
+ BB->insert(CallResults.getIterator(), MIBC0);
+ MachineInstrBuilder(MF, CallParams).addReg(RegZero);
+ } else
+ CallParams.addOperand(FnPtr);
}
for (auto Def : CallResults.defs())
@@ -770,6 +809,13 @@ bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
}
+bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // Wasm doesn't support function addresses with offsets
+ const GlobalValue *GV = GA->getGlobal();
+ return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
+}
+
EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &C,
EVT VT) const {
@@ -823,6 +869,45 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
}
+void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
+ const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+ const SelectionDAG &DAG, unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = Op.getConstantOperandVal(0);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::wasm_bitmask: {
+ unsigned BitWidth = Known.getBitWidth();
+ EVT VT = Op.getOperand(1).getSimpleValueType();
+ unsigned PossibleBits = VT.getVectorNumElements();
+ APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
+ Known.Zero |= ZeroMask;
+ break;
+ }
+ }
+ }
+ }
+}
+
+TargetLoweringBase::LegalizeTypeAction
+WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
+ if (VT.isFixedLengthVector()) {
+ MVT EltVT = VT.getVectorElementType();
+ // We have legal vector types with these lane types, so widening the
+ // vector would let us use some of the lanes directly without having to
+ // extend or truncate values.
+ if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
+ EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
+ return TypeWidenVector;
+ }
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//
@@ -1088,7 +1173,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Lastly, if this is a call to a funcref we need to add an instruction
// table.set to the chain and transform the call.
- if (CLI.CB && isFuncrefType(CLI.CB->getCalledOperand()->getType())) {
+ if (CLI.CB &&
+ WebAssembly::isFuncrefType(CLI.CB->getCalledOperand()->getType())) {
// In the absence of function references proposal where a funcref call is
// lowered to call_ref, using reference types we generate a table.set to set
// the funcref to a special table used solely for this purpose, followed by
@@ -1106,7 +1192,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
MVT::funcref,
// Machine Mem Operand args
- MachinePointerInfo(WasmAddressSpace::FUNCREF),
+ MachinePointerInfo(
+ WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),
CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
MachineMemOperand::MOStore);
@@ -1325,6 +1412,10 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerLoad(Op, DAG);
case ISD::STORE:
return LowerStore(Op, DAG);
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ return DAG.UnrollVectorOp(Op.getNode());
}
}
@@ -1344,14 +1435,78 @@ static Optional<unsigned> IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) {
return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());
}
-bool WebAssemblyTargetLowering::isFuncrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() == WasmAddressSpace::FUNCREF;
+static bool IsWebAssemblyTable(SDValue Op) {
+ const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+ if (GA && WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace())) {
+ const GlobalValue *Value = GA->getGlobal();
+ const Type *Ty = Value->getValueType();
+
+ if (Ty->isArrayTy() && WebAssembly::isRefType(Ty->getArrayElementType()))
+ return true;
+ }
+ return false;
+}
+
+// This function will accept as Op any access to a table, so Op can
+// be the actual table or an offset into the table.
+static bool IsWebAssemblyTableWithOffset(SDValue Op) {
+ if (Op->getOpcode() == ISD::ADD && Op->getNumOperands() == 2)
+ return (Op->getOperand(1).getSimpleValueType() == MVT::i32 &&
+ IsWebAssemblyTableWithOffset(Op->getOperand(0))) ||
+ (Op->getOperand(0).getSimpleValueType() == MVT::i32 &&
+ IsWebAssemblyTableWithOffset(Op->getOperand(1)));
+
+ return IsWebAssemblyTable(Op);
}
-bool WebAssemblyTargetLowering::isExternrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() == WasmAddressSpace::EXTERNREF;
+// Helper for table pattern matching used in LowerStore and LowerLoad
+bool WebAssemblyTargetLowering::MatchTableForLowering(SelectionDAG &DAG,
+ const SDLoc &DL,
+ const SDValue &Base,
+ GlobalAddressSDNode *&GA,
+ SDValue &Idx) const {
+ // We expect the following graph for a load of the form:
+ // table[<var> + <constant offset>]
+ //
+ // Case 1:
+ // externref = load t1
+ // t1: i32 = add t2, i32:<constant offset>
+ // t2: i32 = add tX, table
+ //
+ // This is in some cases simplified to just:
+ // Case 2:
+ // externref = load t1
+ // t1: i32 = add t2, i32:tX
+ //
+ // So, unfortunately we need to check for both cases and if we are in the
+ // first case extract the table GlobalAddressNode and build a new node tY
+ // that's tY: i32 = add i32:<constant offset>, i32:tX
+ //
+ if (IsWebAssemblyTable(Base)) {
+ GA = cast<GlobalAddressSDNode>(Base);
+ Idx = DAG.getConstant(0, DL, MVT::i32);
+ } else {
+ GA = dyn_cast<GlobalAddressSDNode>(Base->getOperand(0));
+ if (GA) {
+ // We are in Case 2 above.
+ Idx = Base->getOperand(1);
+ if (!Idx || GA->getNumValues() != 1 || Idx->getNumValues() != 1)
+ return false;
+ } else {
+ // This might be Case 1 above (or an error)
+ SDValue V = Base->getOperand(0);
+ GA = dyn_cast<GlobalAddressSDNode>(V->getOperand(1));
+
+ if (V->getOpcode() != ISD::ADD || V->getNumOperands() != 2 || !GA)
+ return false;
+
+ SDValue IdxV = DAG.getNode(ISD::ADD, DL, MVT::i32, Base->getOperand(1),
+ V->getOperand(0));
+ Idx = IdxV;
+ }
+ }
+
+ return true;
}
SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
@@ -1362,6 +1517,26 @@ SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
const SDValue &Base = SN->getBasePtr();
const SDValue &Offset = SN->getOffset();
+ if (IsWebAssemblyTableWithOffset(Base)) {
+ if (!Offset->isUndef())
+ report_fatal_error(
+ "unexpected offset when loading from webassembly table", false);
+
+ SDValue Idx;
+ GlobalAddressSDNode *GA;
+
+ if (!MatchTableForLowering(DAG, DL, Base, GA, Idx))
+ report_fatal_error("failed pattern matching for lowering table store",
+ false);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue TableSetOps[] = {SN->getChain(), SDValue(GA, 0), Idx, Value};
+ SDValue TableSet =
+ DAG.getMemIntrinsicNode(WebAssemblyISD::TABLE_SET, DL, Tys, TableSetOps,
+ SN->getMemoryVT(), SN->getMemOperand());
+ return TableSet;
+ }
+
if (IsWebAssemblyGlobal(Base)) {
if (!Offset->isUndef())
report_fatal_error("unexpected offset when storing to webassembly global",
@@ -1394,6 +1569,26 @@ SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
const SDValue &Base = LN->getBasePtr();
const SDValue &Offset = LN->getOffset();
+ if (IsWebAssemblyTableWithOffset(Base)) {
+ if (!Offset->isUndef())
+ report_fatal_error(
+ "unexpected offset when loading from webassembly table", false);
+
+ GlobalAddressSDNode *GA;
+ SDValue Idx;
+
+ if (!MatchTableForLowering(DAG, DL, Base, GA, Idx))
+ report_fatal_error("failed pattern matching for lowering table load",
+ false);
+
+ SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
+ SDValue TableGetOps[] = {LN->getChain(), SDValue(GA, 0), Idx};
+ SDValue TableGet =
+ DAG.getMemIntrinsicNode(WebAssemblyISD::TABLE_GET, DL, Tys, TableGetOps,
+ LN->getMemoryVT(), LN->getMemOperand());
+ return TableGet;
+ }
+
if (IsWebAssemblyGlobal(Base)) {
if (!Offset->isUndef())
report_fatal_error(
@@ -1468,7 +1663,7 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
{DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
@@ -1495,7 +1690,6 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
const auto *GA = cast<GlobalAddressSDNode>(Op);
- MVT PtrVT = getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
@@ -1517,20 +1711,43 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
false);
}
- auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
- : WebAssembly::GLOBAL_GET_I32;
- const char *BaseName = MF.createExternalSymbolName("__tls_base");
+ auto model = GV->getThreadLocalMode();
- SDValue BaseAddr(
- DAG.getMachineNode(GlobalGet, DL, PtrVT,
- DAG.getTargetExternalSymbol(BaseName, PtrVT)),
- 0);
+ // Unsupported TLS modes
+ assert(model != GlobalValue::NotThreadLocal);
+ assert(model != GlobalValue::InitialExecTLSModel);
+
+ if (model == GlobalValue::LocalExecTLSModel ||
+ model == GlobalValue::LocalDynamicTLSModel ||
+ (model == GlobalValue::GeneralDynamicTLSModel &&
+ getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))) {
+ // For DSO-local TLS variables we use offset from __tls_base
+
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
+ auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+ : WebAssembly::GLOBAL_GET_I32;
+ const char *BaseName = MF.createExternalSymbolName("__tls_base");
+
+ SDValue BaseAddr(
+ DAG.getMachineNode(GlobalGet, DL, PtrVT,
+ DAG.getTargetExternalSymbol(BaseName, PtrVT)),
+ 0);
+
+ SDValue TLSOffset = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
+ SDValue SymOffset =
+ DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
+ }
- SDValue TLSOffset = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
- SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
+ assert(model == GlobalValue::GeneralDynamicTLSModel);
- return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
+ EVT VT = Op.getValueType();
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
+ GA->getOffset(),
+ WebAssemblyII::MO_GOT_TLS));
}
SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -1563,14 +1780,13 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
DAG.getTargetExternalSymbol(BaseName, PtrVT));
SDValue SymAddr = DAG.getNode(
- WebAssemblyISD::WrapperPIC, DL, VT,
+ WebAssemblyISD::WrapperREL, DL, VT,
DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
OperandFlags));
return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
- } else {
- OperandFlags = WebAssemblyII::MO_GOT;
}
+ OperandFlags = WebAssemblyII::MO_GOT;
}
return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
@@ -1640,21 +1856,6 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
MachinePointerInfo(SV));
}
-static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
- SelectionDAG &DAG) {
- // We only support C++ exceptions for now
- int Tag =
- cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
- if (Tag != WebAssembly::CPP_EXCEPTION)
- llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
- auto &MF = DAG.getMachineFunction();
- const auto &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- const char *SymName = MF.createExternalSymbolName("__cpp_exception");
- return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
- DAG.getTargetExternalSymbol(SymName, PtrVT));
-}
-
SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1662,10 +1863,10 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
switch (Op.getOpcode()) {
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
- IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ IntNo = Op.getConstantOperandVal(1);
break;
case ISD::INTRINSIC_WO_CHAIN:
- IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ IntNo = Op.getConstantOperandVal(0);
break;
default:
llvm_unreachable("Invalid intrinsic");
@@ -1677,38 +1878,22 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::wasm_lsda: {
- EVT VT = Op.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- auto &Context = MF.getMMI().getContext();
- MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
- Twine(MF.getFunctionNumber()));
- return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
- DAG.getMCSymbol(S, PtrVT));
- }
-
- case Intrinsic::wasm_throw: {
- SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
- return DAG.getNode(WebAssemblyISD::THROW, DL,
- MVT::Other, // outchain type
- {
- Op.getOperand(0), // inchain
- SymNode, // exception symbol
- Op.getOperand(3) // thrown value
- });
- }
-
- case Intrinsic::wasm_catch: {
- SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
- return DAG.getNode(WebAssemblyISD::CATCH, DL,
- {
- MVT::i32, // outchain type
- MVT::Other // return value
- },
- {
- Op.getOperand(0), // inchain
- SymNode // exception symbol
- });
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+ const char *SymName = MF.createExternalSymbolName(
+ "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
+ if (isPositionIndependent()) {
+ SDValue Node = DAG.getTargetExternalSymbol(
+ SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
+ const char *BaseName = MF.createExternalSymbolName("__memory_base");
+ SDValue BaseAddr =
+ DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+ DAG.getTargetExternalSymbol(BaseName, PtrVT));
+ SDValue SymAddr =
+ DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
+ }
+ SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
}
case Intrinsic::wasm_shuffle: {
@@ -1774,8 +1959,76 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
Op.getOperand(1));
}
+static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::v2f64)
+ return SDValue();
+
+ auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
+ unsigned &Index) -> bool {
+ switch (Op.getOpcode()) {
+ case ISD::SINT_TO_FP:
+ Opcode = WebAssemblyISD::CONVERT_LOW_S;
+ break;
+ case ISD::UINT_TO_FP:
+ Opcode = WebAssemblyISD::CONVERT_LOW_U;
+ break;
+ case ISD::FP_EXTEND:
+ Opcode = WebAssemblyISD::PROMOTE_LOW;
+ break;
+ default:
+ return false;
+ }
+
+ auto ExtractVector = Op.getOperand(0);
+ if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
+ return false;
+
+ SrcVec = ExtractVector.getOperand(0);
+ Index = ExtractVector.getConstantOperandVal(1);
+ return true;
+ };
+
+ unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
+ SDValue LHSSrcVec, RHSSrcVec;
+ if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
+ !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ return SDValue();
+
+ if (LHSOpcode != RHSOpcode)
+ return SDValue();
+
+ MVT ExpectedSrcVT;
+ switch (LHSOpcode) {
+ case WebAssemblyISD::CONVERT_LOW_S:
+ case WebAssemblyISD::CONVERT_LOW_U:
+ ExpectedSrcVT = MVT::v4i32;
+ break;
+ case WebAssemblyISD::PROMOTE_LOW:
+ ExpectedSrcVT = MVT::v4f32;
+ break;
+ }
+ if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ return SDValue();
+
+ auto Src = LHSSrcVec;
+ if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ // Shuffle the source vector so that the converted lanes are the low lanes.
+ Src = DAG.getVectorShuffle(
+ ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
+ {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ }
+ return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+}
+
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
+ if (auto ConvertLow = LowerConvertLow(Op, DAG))
+ return ConvertLow;
+
SDLoc DL(Op);
const EVT VecT = Op.getValueType();
const EVT LaneT = Op.getOperand(0).getValueType();
@@ -1901,12 +2154,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
size_t NumShuffleLanes = 0;
if (ShuffleCounts.size()) {
std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
- ShuffleCounts.erase(std::remove_if(ShuffleCounts.begin(),
- ShuffleCounts.end(),
- [&](const auto &Pair) {
- return Pair.first == ShuffleSrc1;
- }),
- ShuffleCounts.end());
+ llvm::erase_if(ShuffleCounts,
+ [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
}
if (ShuffleCounts.size()) {
size_t AdditionalShuffleLanes;
@@ -1974,7 +2223,23 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SmallVector<SDValue, 16> ConstLanes;
for (const SDValue &Lane : Op->op_values()) {
if (IsConstant(Lane)) {
- ConstLanes.push_back(Lane);
+ // Values may need to be fixed so that they will sign extend to be
+ // within the expected range during ISel. Check whether the value is in
+ // bounds based on the lane bit width and if it is out of bounds, lop
+ // off the extra bits and subtract 2^n to reflect giving the high bit
+ // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
+ // cannot possibly be out of range.
+ auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
+ int64_t Val = Const ? Const->getSExtValue() : 0;
+ uint64_t LaneBits = 128 / Lanes;
+ assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
+ "Unexpected out of bounds negative value");
+ if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
+ auto NewVal = ((uint64_t)Val % (1ll << LaneBits)) - (1ll << LaneBits);
+ ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
+ } else {
+ ConstLanes.push_back(Lane);
+ }
} else if (LaneT.isFloatingPoint()) {
ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
} else {
@@ -2227,120 +2492,6 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
}
static SDValue
-performVectorConvertLowCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- auto &DAG = DCI.DAG;
-
- EVT ResVT = N->getValueType(0);
- if (ResVT != MVT::v2f64)
- return SDValue();
-
- auto GetWasmConversionOp = [](unsigned Op) {
- switch (Op) {
- case ISD::SINT_TO_FP:
- return WebAssemblyISD::CONVERT_LOW_S;
- case ISD::UINT_TO_FP:
- return WebAssemblyISD::CONVERT_LOW_U;
- case ISD::FP_EXTEND:
- return WebAssemblyISD::PROMOTE_LOW;
- }
- llvm_unreachable("unexpected op");
- };
-
- if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- // Combine this:
- //
- // (v2f64 (extract_subvector
- // (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
- //
- // into (f64x2.convert_low_i32x4_{s,u} $x).
- //
- // Or this:
- //
- // (v2f64 (extract_subvector
- // (v4f64 (fp_extend (v4f32 $x))), 0))
- //
- // into (f64x2.promote_low_f32x4 $x).
- auto Conversion = N->getOperand(0);
- auto ConversionOp = Conversion.getOpcode();
- MVT ExpectedSourceType;
- switch (ConversionOp) {
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- ExpectedSourceType = MVT::v4i32;
- break;
- case ISD::FP_EXTEND:
- ExpectedSourceType = MVT::v4f32;
- break;
- default:
- return SDValue();
- }
-
- if (Conversion.getValueType() != MVT::v4f64)
- return SDValue();
-
- auto Source = Conversion.getOperand(0);
- if (Source.getValueType() != ExpectedSourceType)
- return SDValue();
-
- auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
- return SDValue();
-
- auto Op = GetWasmConversionOp(ConversionOp);
- return DAG.getNode(Op, SDLoc(N), ResVT, Source);
- }
-
- // Combine this:
- //
- // (v2f64 ({s,u}int_to_fp
- // (v2i32 (extract_subvector (v4i32 $x), 0))))
- //
- // into (f64x2.convert_low_i32x4_{s,u} $x).
- //
- // Or this:
- //
- // (v2f64 (fp_extend
- // (v2f32 (extract_subvector (v4f32 $x), 0))))
- //
- // into (f64x2.promote_low_f32x4 $x).
- auto ConversionOp = N->getOpcode();
- MVT ExpectedExtractType;
- MVT ExpectedSourceType;
- switch (ConversionOp) {
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- ExpectedExtractType = MVT::v2i32;
- ExpectedSourceType = MVT::v4i32;
- break;
- case ISD::FP_EXTEND:
- ExpectedExtractType = MVT::v2f32;
- ExpectedSourceType = MVT::v4f32;
- break;
- default:
- llvm_unreachable("unexpected opcode");
- }
-
- auto Extract = N->getOperand(0);
- if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return SDValue();
-
- if (Extract.getValueType() != ExpectedExtractType)
- return SDValue();
-
- auto Source = Extract.getOperand(0);
- if (Source.getValueType() != ExpectedSourceType)
- return SDValue();
-
- auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
- if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
- return SDValue();
-
- unsigned Op = GetWasmConversionOp(ConversionOp);
- return DAG.getNode(Op, SDLoc(N), ResVT, Source);
-}
-
-static SDValue
performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
@@ -2470,11 +2621,6 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return performVectorExtendCombine(N, DCI);
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- case ISD::FP_EXTEND:
- case ISD::EXTRACT_SUBVECTOR:
- return performVectorConvertLowCombine(N, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
case ISD::FP_ROUND:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 5d813fefb96b..f7b460f61dbb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -45,35 +45,8 @@ public:
WebAssemblyTargetLowering(const TargetMachine &TM,
const WebAssemblySubtarget &STI);
- enum WasmAddressSpace : unsigned {
- // WebAssembly uses the following address spaces:
- // AS 0 : is the default address space for values in linear memory
- DEFAULT = 0,
- // AS 1 : is a non-integral address space for global variables
- GLOBAL = 1,
- // AS 10 : is a non-integral address space for externref values
- EXTERNREF = 10,
- // AS 20 : is a non-integral address space for funcref values
- FUNCREF = 20,
- };
-
- MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
- if (AS == WasmAddressSpace::EXTERNREF)
- return MVT::externref;
- if (AS == WasmAddressSpace::FUNCREF)
- return MVT::funcref;
- return TargetLowering::getPointerTy(DL, AS);
- }
- MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override {
- if (AS == WasmAddressSpace::EXTERNREF)
- return MVT::externref;
- if (AS == WasmAddressSpace::FUNCREF)
- return MVT::funcref;
- return TargetLowering::getPointerMemTy(DL, AS);
- }
-
- static bool isFuncrefType(const Type *Ty);
- static bool isExternrefType(const Type *Ty);
+ MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override;
+ MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override;
private:
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
@@ -102,12 +75,21 @@ private:
bool *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
+
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(MVT VT) const override;
+
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -154,6 +136,11 @@ private:
SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
+ // Helper for LoadLoad and LowerStore
+ bool MatchTableForLowering(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue &Base, GlobalAddressSDNode *&GA,
+ SDValue &Idx) const;
+
// Custom DAG combine hooks
SDValue
PerformDAGCombine(SDNode *N,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 1ee6ae196d02..42183d1645e1 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -114,13 +114,13 @@ def NotifyPatOffsetOnly_A64 :
Requires<[HasAddr64, HasAtomics]>;
def NotifyPatGlobalAddrOffOnly_A32 :
- Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblyWrapper tglobaladdr:$off),
I32:$count)),
(MEMORY_ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)
>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def NotifyPatGlobalAddrOffOnly_A64 :
- Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblyWrapper tglobaladdr:$off),
I32:$count)),
(MEMORY_ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)
>,
@@ -185,12 +185,12 @@ defm : WaitPatOffsetOnly<i64, int_wasm_memory_atomic_wait64,
"MEMORY_ATOMIC_WAIT64">;
multiclass WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, string inst> {
- def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
+ def : Pat<(i32 (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp,
I64:$timeout)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
I64:$timeout)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
+ def : Pat<(i32 (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp,
I64:$timeout)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
I64:$timeout)>,
@@ -390,10 +390,10 @@ defm : AStorePatOffsetOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">;
defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
multiclass AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
+ def : Pat<(kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
+ def : Pat<(kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
@@ -592,10 +592,10 @@ multiclass BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
}
multiclass BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
@@ -659,7 +659,7 @@ class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>;
// Patterns for various addressing modes for truncating-extending binary RMWs.
multiclass BinRMWTruncExtPattern<
- PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+ PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32,
string inst8_32, string inst16_32, string inst8_64, string inst16_64, string inst32_64> {
// Truncating-extending binary RMWs with no constant offset
defm : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
@@ -724,27 +724,27 @@ multiclass BinRMWTruncExtPattern<
}
defm : BinRMWTruncExtPattern<
- atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64,
+ atomic_load_add_8, atomic_load_add_16, atomic_load_add_32,
"ATOMIC_RMW8_U_ADD_I32", "ATOMIC_RMW16_U_ADD_I32",
"ATOMIC_RMW8_U_ADD_I64", "ATOMIC_RMW16_U_ADD_I64", "ATOMIC_RMW32_U_ADD_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64,
+ atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32,
"ATOMIC_RMW8_U_SUB_I32", "ATOMIC_RMW16_U_SUB_I32",
"ATOMIC_RMW8_U_SUB_I64", "ATOMIC_RMW16_U_SUB_I64", "ATOMIC_RMW32_U_SUB_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64,
+ atomic_load_and_8, atomic_load_and_16, atomic_load_and_32,
"ATOMIC_RMW8_U_AND_I32", "ATOMIC_RMW16_U_AND_I32",
"ATOMIC_RMW8_U_AND_I64", "ATOMIC_RMW16_U_AND_I64", "ATOMIC_RMW32_U_AND_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64,
+ atomic_load_or_8, atomic_load_or_16, atomic_load_or_32,
"ATOMIC_RMW8_U_OR_I32", "ATOMIC_RMW16_U_OR_I32",
"ATOMIC_RMW8_U_OR_I64", "ATOMIC_RMW16_U_OR_I64", "ATOMIC_RMW32_U_OR_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64,
+ atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32,
"ATOMIC_RMW8_U_XOR_I32", "ATOMIC_RMW16_U_XOR_I32",
"ATOMIC_RMW8_U_XOR_I64", "ATOMIC_RMW16_U_XOR_I64", "ATOMIC_RMW32_U_XOR_I64">;
defm : BinRMWTruncExtPattern<
- atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64,
+ atomic_swap_8, atomic_swap_16, atomic_swap_32,
"ATOMIC_RMW8_U_XCHG_I32", "ATOMIC_RMW16_U_XCHG_I32",
"ATOMIC_RMW8_U_XCHG_I64", "ATOMIC_RMW16_U_XCHG_I64",
"ATOMIC_RMW32_U_XCHG_I64">;
@@ -826,11 +826,11 @@ multiclass TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
}
multiclass TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
ty:$new)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
ty:$new)>,
Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
@@ -895,7 +895,7 @@ class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>;
// Patterns for various addressing modes for truncating-extending ternary RMWs.
multiclass TerRMWTruncExtPattern<
- PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+ PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32,
string inst8_32, string inst16_32, string inst8_64, string inst16_64,
string inst32_64> {
// Truncating-extending ternary RMWs with no constant offset
@@ -961,7 +961,7 @@ multiclass TerRMWTruncExtPattern<
}
defm : TerRMWTruncExtPattern<
- atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
+ atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32,
"ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32",
"ATOMIC_RMW8_U_CMPXCHG_I64", "ATOMIC_RMW16_U_CMPXCHG_I64",
"ATOMIC_RMW32_U_CMPXCHG_I64">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 437b07bf8baf..be6547007aaf 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -130,8 +130,7 @@ let Predicates = [HasExceptionHandling] in {
// Throwing an exception: throw / rethrow
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
defm THROW : I<(outs), (ins tag_op:$tag, variable_ops),
- (outs), (ins tag_op:$tag),
- [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))],
+ (outs), (ins tag_op:$tag), [],
"throw \t$tag", "throw \t$tag", 0x08>;
defm RETHROW : NRI<(outs), (ins i32imm:$depth), [], "rethrow \t$depth", 0x09>;
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
@@ -147,14 +146,10 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
// Catching an exception: catch / catch_all
let hasCtrlDep = 1, hasSideEffects = 1 in {
-// Currently 'catch' can only extract an i32, which is sufficient for C++
-// support, but according to the spec 'catch' can extract any number of values
-// based on the tag type.
-defm CATCH : I<(outs I32:$dst), (ins tag_op:$tag),
- (outs), (ins tag_op:$tag),
- [(set I32:$dst,
- (WebAssemblycatch (WebAssemblywrapper texternalsym:$tag)))],
- "catch \t$dst, $tag", "catch \t$tag", 0x07>;
+let variadicOpsAreDefs = 1 in
+defm CATCH : I<(outs), (ins tag_op:$tag, variable_ops),
+ (outs), (ins tag_op:$tag), [],
+ "catch", "catch \t$tag", 0x07>;
defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x19>;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 15748067f123..ee9247a8bef9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -26,6 +26,10 @@ def HasSIMD128 :
Predicate<"Subtarget->hasSIMD128()">,
AssemblerPredicate<(all_of FeatureSIMD128), "simd128">;
+def HasRelaxedSIMD :
+ Predicate<"Subtarget->hasRelaxedSIMD()">,
+ AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">;
+
def HasAtomics :
Predicate<"Subtarget->hasAtomics()">,
AssemblerPredicate<(all_of FeatureAtomics), "atomics">;
@@ -77,10 +81,6 @@ def SDT_WebAssemblyLocalSet : SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>;
def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
- SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, []>;
-def SDT_WebAssemblyCatch : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
def SDT_WebAssemblyGlobalGet : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
def SDT_WebAssemblyGlobalSet : SDTypeProfile<0, 2, [SDTCisPtrTy<1>]>;
@@ -102,14 +102,10 @@ def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT",
def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
SDT_WebAssemblyReturn,
[SDNPHasChain, SDNPVariadic]>;
-def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
+def WebAssemblyWrapper : SDNode<"WebAssemblyISD::Wrapper",
SDT_WebAssemblyWrapper>;
-def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC",
- SDT_WebAssemblyWrapperPIC>;
-def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
- [SDNPHasChain, SDNPVariadic]>;
-def WebAssemblycatch : SDNode<"WebAssemblyISD::CATCH", SDT_WebAssemblyCatch,
- [SDNPHasChain, SDNPSideEffect]>;
+def WebAssemblyWrapperREL : SDNode<"WebAssemblyISD::WrapperREL",
+ SDT_WebAssemblyWrapper>;
def WebAssemblyglobal_get :
SDNode<"WebAssemblyISD::GLOBAL_GET", SDT_WebAssemblyGlobalGet,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -348,10 +344,10 @@ multiclass LOCAL<WebAssemblyRegClass rc, Operand global_op> {
} // hasSideEffects = 0
foreach vt = rc.RegTypes in {
def : Pat<(vt (WebAssemblyglobal_get
- (WebAssemblywrapper tglobaladdr:$addr))),
+ (WebAssemblyWrapper tglobaladdr:$addr))),
(!cast<NI>("GLOBAL_GET_" # rc) tglobaladdr:$addr)>;
def : Pat<(WebAssemblyglobal_set
- vt:$src, (WebAssemblywrapper tglobaladdr:$addr)),
+ vt:$src, (WebAssemblyWrapper tglobaladdr:$addr)),
(!cast<NI>("GLOBAL_SET_" # rc) tglobaladdr:$addr, vt:$src)>;
def : Pat<(vt (WebAssemblylocal_get (i32 timm:$local))),
(!cast<NI>("LOCAL_GET_" # rc) timm:$local)>;
@@ -386,38 +382,45 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
"f64.const\t$res, $imm", "f64.const\t$imm", 0x44>;
} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper tglobaladdr:$addr)),
(CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper tglobaladdr:$addr)),
(CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper tglobaladdr:$addr)),
(GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper tglobaladdr:$addr)),
(GLOBAL_GET_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapperREL tglobaladdr:$addr)),
(CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapperREL tglobaladdr:$addr)),
(CONST_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper tglobaltlsaddr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapperREL tglobaltlsaddr:$addr)),
(CONST_I32 tglobaltlsaddr:$addr)>, Requires<[HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper tglobaltlsaddr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapperREL tglobaltlsaddr:$addr)),
(CONST_I64 tglobaltlsaddr:$addr)>, Requires<[HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper tglobaltlsaddr:$addr)),
+ (GLOBAL_GET_I32 tglobaltlsaddr:$addr)>, Requires<[HasAddr32]>;
+def : Pat<(i64 (WebAssemblyWrapper tglobaltlsaddr:$addr)),
+ (GLOBAL_GET_I64 tglobaltlsaddr:$addr)>, Requires<[HasAddr64]>;
+
+def : Pat<(i32 (WebAssemblyWrapper texternalsym:$addr)),
(GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper texternalsym:$addr)),
(GLOBAL_GET_I64 texternalsym:$addr)>, Requires<[IsPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper texternalsym:$addr)),
(CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper texternalsym:$addr)),
(CONST_I64 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
-def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
+def : Pat<(i32 (WebAssemblyWrapperREL texternalsym:$addr)),
+ (CONST_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblyWrapperREL texternalsym:$addr)),
+ (CONST_I64 texternalsym:$addr)>, Requires<[IsPIC, HasAddr64]>;
//===----------------------------------------------------------------------===//
// Additional sets of instructions.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 82f5e985c558..a70f62dde845 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -117,10 +117,10 @@ defm : LoadPatOffsetOnly<f32, load, "LOAD_F32">;
defm : LoadPatOffsetOnly<f64, load, "LOAD_F64">;
multiclass LoadPatGlobalAddrOffOnly<ValueType ty, SDPatternOperator kind, string inst> {
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))),
(!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>,
Requires<[IsNotPIC, HasAddr32]>;
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))),
(!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>,
Requires<[IsNotPIC, HasAddr64]>;
}
@@ -313,11 +313,11 @@ defm : StorePatOffsetOnly<f32, store, "STORE_F32">;
defm : StorePatOffsetOnly<f64, store, "STORE_F64">;
multiclass StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)),
(!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0),
ty:$val)>,
Requires<[IsNotPIC, HasAddr32]>;
- def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)),
(!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0),
ty:$val)>,
Requires<[IsNotPIC, HasAddr64]>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 6429b46673a6..30b99c3a69a9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -11,17 +11,34 @@
///
//===----------------------------------------------------------------------===//
-// Instructions requiring HasSIMD128 and the simd128 prefix byte
-multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> simdop = -1> {
+// Instructions using the SIMD opcode prefix and requiring one of the SIMD
+// feature predicates.
+multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r,
+ string asmstr_s, bits<32> simdop,
+ Predicate simd_level> {
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
!if(!ge(simdop, 0x100),
!or(0xfd0000, !and(0xffff, simdop)),
!or(0xfd00, !and(0xff, simdop)))>,
- Requires<[HasSIMD128]>;
+ Requires<[simd_level]>;
+}
+
+multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
+ asmstr_s, simdop, HasSIMD128>;
}
+multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
+ asmstr_s, simdop, HasRelaxedSIMD>;
+}
+
+
defm "" : ARGUMENT<V128, v16i8>;
defm "" : ARGUMENT<V128, v8i16>;
defm "" : ARGUMENT<V128, v4i32>;
@@ -267,6 +284,16 @@ multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
defm "" : SIMDLoadZero<I32x4, 0x5c>;
defm "" : SIMDLoadZero<I64x2, 0x5d>;
+// Use load_zero to load scalars into vectors as well where possible.
+// TODO: i32, i16, and i8 scalars
+def load_scalar :
+ PatFrag<(ops node:$addr), (scalar_to_vector (i64 (load $addr)))>;
+defm : LoadPatNoOffset<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
+defm : LoadPatImmOff<v2i64, load_scalar, regPlusImm, "LOAD_ZERO_I64x2">;
+defm : LoadPatImmOff<v2i64, load_scalar, or_is_add, "LOAD_ZERO_I64x2">;
+defm : LoadPatOffsetOnly<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
+defm : LoadPatGlobalAddrOffOnly<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
+
// TODO: f32x4 and f64x2 as well
foreach vec = [I32x4, I64x2] in {
defvar inst = "LOAD_ZERO_"#vec;
@@ -1165,6 +1192,16 @@ def : Pat<(vec.int_vt (vselect
(pmax $lhs, $rhs)>;
}
+// And match the pmin/pmax LLVM intrinsics as well
+def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (PMIN_F32x4 V128:$lhs, V128:$rhs)>;
+def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (PMAX_F32x4 V128:$lhs, V128:$rhs)>;
+def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (PMIN_F64x2 V128:$lhs, V128:$rhs)>;
+def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (PMAX_F64x2 V128:$lhs, V128:$rhs)>;
+
//===----------------------------------------------------------------------===//
// Conversions
//===----------------------------------------------------------------------===//
@@ -1241,87 +1278,6 @@ multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
defm "" : SIMDNarrow<I16x8, 101>;
defm "" : SIMDNarrow<I32x4, 133>;
-// Use narrowing operations for truncating stores. Since the narrowing
-// operations are saturating instead of truncating, we need to mask
-// the stored values first.
-def store_v8i8_trunc_v8i16 :
- OutPatFrag<(ops node:$val),
- (EXTRACT_LANE_I64x2
- (NARROW_U_I8x16
- (AND
- (CONST_V128_I16x8
- 0x00ff, 0x00ff, 0x00ff, 0x00ff,
- 0x00ff, 0x00ff, 0x00ff, 0x00ff),
- node:$val),
- $val), // Unused input
- 0)>;
-
-def store_v4i16_trunc_v4i32 :
- OutPatFrag<(ops node:$val),
- (EXTRACT_LANE_I64x2
- (NARROW_U_I16x8
- (AND
- (CONST_V128_I32x4
- 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff),
- node:$val),
- $val), // Unused input
- 0)>;
-
-// Store patterns adapted from WebAssemblyInstrMemory.td
-multiclass NarrowingStorePatNoOffset<Vec vec, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, I32:$addr),
- (STORE_I64_A32 0, 0, $addr, (out $val))>,
- Requires<[HasAddr32]>;
- def : Pat<(node vec.vt:$val, I64:$addr),
- (STORE_I64_A64 0, 0, $addr, (out $val))>,
- Requires<[HasAddr64]>;
-}
-
-defm : NarrowingStorePatNoOffset<I16x8, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatNoOffset<I32x4, store_v4i16_trunc_v4i32>;
-
-multiclass NarrowingStorePatImmOff<Vec vec, PatFrag operand, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, (operand I32:$addr, imm:$off)),
- (STORE_I64_A32 0, imm:$off, $addr, (out $val))>,
- Requires<[HasAddr32]>;
- def : Pat<(node vec.vt:$val, (operand I64:$addr, imm:$off)),
- (STORE_I64_A64 0, imm:$off, $addr, (out $val))>,
- Requires<[HasAddr64]>;
-}
-
-defm : NarrowingStorePatImmOff<I16x8, regPlusImm, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatImmOff<I32x4, regPlusImm, store_v4i16_trunc_v4i32>;
-defm : NarrowingStorePatImmOff<I16x8, or_is_add, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatImmOff<I32x4, or_is_add, store_v4i16_trunc_v4i32>;
-
-multiclass NarrowingStorePatOffsetOnly<Vec vec, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, imm:$off),
- (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (out $val))>,
- Requires<[HasAddr32]>;
- def : Pat<(node vec.vt:$val, imm:$off),
- (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (out $val))>,
- Requires<[HasAddr64]>;
-}
-
-defm : NarrowingStorePatOffsetOnly<I16x8, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatOffsetOnly<I32x4, store_v4i16_trunc_v4i32>;
-
-multiclass NarrowingStorePatGlobalAddrOffOnly<Vec vec, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)),
- (STORE_I64_A32 0, tglobaladdr:$off, (CONST_I32 0), (out $val))>,
- Requires<[IsNotPIC, HasAddr32]>;
- def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)),
- (STORE_I64_A64 0, tglobaladdr:$off, (CONST_I64 0), (out $val))>,
- Requires<[IsNotPIC, HasAddr64]>;
-}
-
-defm : NarrowingStorePatGlobalAddrOffOnly<I16x8, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatGlobalAddrOffOnly<I32x4, store_v4i16_trunc_v4i32>;
-
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
foreach t1 = AllVecs in
@@ -1349,9 +1305,107 @@ def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+// Lower extending loads to load64_zero + promote_low
+def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let MemoryVT = v2f32;
+}
+// Adapted from the body of LoadPatNoOffset
+// TODO: other addressing patterns
+def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))),
+ (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>,
+ Requires<[HasAddr32]>;
+def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
+ (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>,
+ Requires<[HasAddr64]>;
+
//===----------------------------------------------------------------------===//
// Saturating Rounding Q-Format Multiplication
//===----------------------------------------------------------------------===//
defm Q15MULR_SAT_S :
SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
+
+//===----------------------------------------------------------------------===//
+// Fused Multiply- Add and Subtract (FMA/FMS)
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
+ defm FMA_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_fma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>;
+ defm FMS_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_fms
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>;
+}
+
+defm "" : SIMDFM<F32x4, 0xaf, 0xb0>;
+defm "" : SIMDFM<F64x2, 0xcf, 0xd0>;
+
+//===----------------------------------------------------------------------===//
+// Laneselect
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDLANESELECT<Vec vec, bits<32> op> {
+ defm LANESELECT_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_laneselect
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".laneselect\t$dst, $a, $b, $c", vec.prefix#".laneselect", op>;
+}
+
+defm "" : SIMDLANESELECT<I8x16, 0xb2>;
+defm "" : SIMDLANESELECT<I16x8, 0xb3>;
+defm "" : SIMDLANESELECT<I32x4, 0xd2>;
+defm "" : SIMDLANESELECT<I64x2, 0xd3>;
+
+
+//===----------------------------------------------------------------------===//
+// Relaxed swizzle
+//===----------------------------------------------------------------------===//
+
+defm RELAXED_SWIZZLE :
+ RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
+ [(set (v16i8 V128:$dst),
+ (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
+ "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 162>;
+
+//===----------------------------------------------------------------------===//
+// Relaxed floating-point min and max.
+//===----------------------------------------------------------------------===//
+
+multiclass SIMD_RELAXED_FMINMAX<Vec vec, bits<32> simdopMin, bits<32> simdopMax> {
+ defm RELAXED_FMIN_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_relaxed_min
+ (vec.vt V128:$a), (vec.vt V128:$b)))],
+ vec.prefix#".relaxed_min\t$dst, $a, $b", vec.prefix#".relaxed_min", simdopMin>;
+ defm RELAXED_FMAX_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_relaxed_max
+ (vec.vt V128:$a), (vec.vt V128:$b)))],
+ vec.prefix#".relaxed_max\t$dst, $a, $b", vec.prefix#".relaxed_max", simdopMax>;
+}
+
+defm "" : SIMD_RELAXED_FMINMAX<F32x4, 0xb4, 0xe2>;
+defm "" : SIMD_RELAXED_FMINMAX<F64x2, 0xd4, 0xee>;
+
+//===----------------------------------------------------------------------===//
+// Relaxed floating-point to int conversions
+//===----------------------------------------------------------------------===//
+
+multiclass SIMD_RELAXED_CONVERT<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> {
+ defm op#_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
+ vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
+}
+
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F32x4, int_wasm_relaxed_trunc_signed, "relaxed_trunc_f32x4_s", 0xa5>;
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned, "relaxed_trunc_f32x4_u", 0xa6>;
+
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F64x2, int_wasm_relaxed_trunc_zero_signed, "relaxed_trunc_f64x2_s_zero", 0xc5>;
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F64x2, int_wasm_relaxed_trunc_zero_unsigned, "relaxed_trunc_f64x2_u_zero", 0xc6>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
index 2348bb165daf..e44c2073eaeb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
@@ -11,9 +11,18 @@
/// Instructions that handle tables
//===----------------------------------------------------------------------===//
-multiclass TABLE<WebAssemblyRegClass rt> {
+def WebAssemblyTableSet_t : SDTypeProfile<0, 3, [SDTCisPtrTy<1>]>;
+def WebAssemblyTableSet : SDNode<"WebAssemblyISD::TABLE_SET", WebAssemblyTableSet_t,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def WebAssemblyTableGet_t : SDTypeProfile<1, 2, [SDTCisPtrTy<1>]>;
+def WebAssemblyTableGet : SDNode<"WebAssemblyISD::TABLE_GET", WebAssemblyTableGet_t,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+
+multiclass TABLE<WebAssemblyRegClass rc> {
let mayLoad = 1 in
- defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table, I32:$i),
+ defm TABLE_GET_#rc : I<(outs rc:$res), (ins table32_op:$table, I32:$i),
(outs), (ins table32_op:$table),
[],
"table.get\t$res, $table, $i",
@@ -21,41 +30,43 @@ multiclass TABLE<WebAssemblyRegClass rt> {
0x25>;
let mayStore = 1 in
- defm TABLE_SET_#rt : I<(outs), (ins table32_op:$table, I32:$i, rt:$val),
+ defm TABLE_SET_#rc : I<(outs), (ins table32_op:$table, I32:$i, rc:$val),
(outs), (ins table32_op:$table),
[],
"table.set\t$table, $i, $val",
"table.set\t$table",
0x26>;
- defm TABLE_GROW_#rt : I<(outs I32:$sz), (ins table32_op:$table, rt:$val, I32:$n),
+ defm TABLE_GROW_#rc : I<(outs I32:$sz), (ins table32_op:$table, rc:$val, I32:$n),
(outs), (ins table32_op:$table),
[],
"table.grow\t$sz, $table, $val, $n",
"table.grow\t$table",
0xfc0f>;
- defm TABLE_FILL_#rt : I<(outs), (ins table32_op:$table, I32:$i, rt:$val, I32:$n),
+ defm TABLE_FILL_#rc : I<(outs), (ins table32_op:$table, I32:$i, rc:$val, I32:$n),
(outs), (ins table32_op:$table),
[],
"table.fill\t$table, $i, $val, $n",
"table.fill\t$table",
0xfc11>;
+ foreach vt = rc.RegTypes in {
+ def : Pat<(vt (WebAssemblyTableGet (WebAssemblyWrapper tglobaladdr:$table), i32:$idx)),
+ (!cast<NI>("TABLE_GET_" # rc) tglobaladdr:$table, i32:$idx)>;
+ def : Pat<(WebAssemblyTableSet
+ (WebAssemblyWrapper tglobaladdr:$table),
+ i32:$idx,
+ vt:$src),
+ (!cast<NI>("TABLE_SET_" # rc) tglobaladdr:$table, i32:$idx, vt:$src)>;
+ }
}
defm "" : TABLE<FUNCREF>, Requires<[HasReferenceTypes]>;
defm "" : TABLE<EXTERNREF>, Requires<[HasReferenceTypes]>;
-def wasm_table_set_t : SDTypeProfile<0, 3, []>;
-def wasm_table_set : SDNode<"WebAssemblyISD::TABLE_SET", wasm_table_set_t,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
-def : Pat<(wasm_table_set i32:$table, i32:$idx, funcref:$r),
- (TABLE_SET_FUNCREF i32:$table, i32:$idx, funcref:$r)>,
- Requires<[HasReferenceTypes]>;
-def : Pat<(wasm_table_set i32:$table, i32:$idx, externref:$r),
- (TABLE_SET_EXTERNREF i32:$table, i32:$idx, externref:$r)>,
+def : Pat<(WebAssemblyTableSet mcsym:$table, i32:$idx, funcref:$r),
+ (TABLE_SET_FUNCREF mcsym:$table, i32:$idx, funcref:$r)>,
Requires<[HasReferenceTypes]>;
defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table),
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 01b3aa887738..52226206eb32 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -63,12 +63,11 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
auto &MRI = MF.getRegInfo();
for (auto &MBB : MF) {
- for (auto MII = MBB.begin(); MII != MBB.end();) {
- MachineInstr *MI = &*MII++;
- if (MI->getOpcode() != WebAssembly::BR_UNLESS)
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.getOpcode() != WebAssembly::BR_UNLESS)
continue;
- Register Cond = MI->getOperand(1).getReg();
+ Register Cond = MI.getOperand(1).getReg();
bool Inverted = false;
// Attempt to invert the condition in place.
@@ -189,7 +188,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// instruction to invert it.
if (!Inverted) {
Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
.addReg(Cond);
MFI.stackifyVReg(MRI, Tmp);
Cond = Tmp;
@@ -199,10 +198,10 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// The br_unless condition has now been inverted. Insert a br_if and
// delete the br_unless.
assert(Inverted);
- BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
- .add(MI->getOperand(0))
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(WebAssembly::BR_IF))
+ .add(MI.getOperand(0))
.addReg(Cond);
- MBB.erase(MI);
+ MBB.erase(&MI);
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 599829a9e474..4eacc921b6cd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -7,15 +7,12 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file lowers exception-related instructions and setjmp/longjmp
-/// function calls in order to use Emscripten's JavaScript try and catch
-/// mechanism.
+/// This file lowers exception-related instructions and setjmp/longjmp function
+/// calls to use Emscripten's library functions. The pass uses JavaScript's try
+/// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in
+/// case of Emscripten SjLJ.
///
-/// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's
-/// try and catch syntax and relevant exception-related libraries implemented
-/// in JavaScript glue code that will be produced by Emscripten.
-///
-/// * Exception handling
+/// * Emscripten exception handling
/// This pass lowers invokes and landingpads into library functions in JS glue
/// code. Invokes are lowered into function wrappers called invoke wrappers that
/// exist in JS side, which wraps the original function call with JS try-catch.
@@ -23,7 +20,7 @@
/// variables (see below) so we can check whether an exception occurred from
/// wasm code and handle it appropriately.
///
-/// * Setjmp-longjmp handling
+/// * Emscripten setjmp-longjmp handling
/// This pass lowers setjmp to a reasonably-performant approach for emscripten.
/// The idea is that each block with a setjmp is broken up into two parts: the
/// part containing setjmp and the part right after the setjmp. The latter part
@@ -52,7 +49,7 @@
/// __threwValue is 0 for exceptions, and the argument to longjmp in case of
/// longjmp.
///
-/// * Exception handling
+/// * Emscripten exception handling
///
/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
/// at link time. setThrew exists in Emscripten's compiler-rt:
@@ -121,16 +118,16 @@
/// call @llvm_eh_typeid_for(type)
/// llvm_eh_typeid_for function will be generated in JS glue code.
///
-/// * Setjmp / Longjmp handling
+/// * Emscripten setjmp / longjmp handling
///
-/// In case calls to longjmp() exists
+/// If there are calls to longjmp()
///
/// 1) Lower
-/// longjmp(buf, value)
+/// longjmp(env, val)
/// into
-/// emscripten_longjmp(buf, value)
+/// emscripten_longjmp(env, val)
///
-/// In case calls to setjmp() exists
+/// If there are calls to setjmp()
///
/// 2) In the function entry that calls setjmp, initialize setjmpTable and
/// sejmpTableSize as follows:
@@ -141,9 +138,9 @@
/// Emscripten compiler-rt.
///
/// 3) Lower
-/// setjmp(buf)
+/// setjmp(env)
/// into
-/// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
+/// setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize);
/// setjmpTableSize = getTempRet0();
/// For each dynamic setjmp call, setjmpTable stores its ID (a number which
/// is incrementally assigned from 0) and its label (a unique number that
@@ -151,10 +148,9 @@
/// setjmpTable, it is reallocated in saveSetjmp() in Emscripten's
/// compiler-rt and it will return the new table address, and assign the new
/// table size in setTempRet0(). saveSetjmp also stores the setjmp's ID into
-/// the buffer buf. A BB with setjmp is split into two after setjmp call in
+/// the buffer 'env'. A BB with setjmp is split into two after setjmp call in
/// order to make the post-setjmp BB the possible destination of longjmp BB.
///
-///
/// 4) Lower every call that might longjmp into
/// __THREW__ = 0;
/// call @__invoke_SIG(func, arg1, arg2)
@@ -171,7 +167,7 @@
/// %label = -1;
/// }
/// longjmp_result = getTempRet0();
-/// switch label {
+/// switch %label {
/// label 1: goto post-setjmp BB 1
/// label 2: goto post-setjmp BB 2
/// ...
@@ -188,23 +184,114 @@
/// occurred. Otherwise we jump to the right post-setjmp BB based on the
/// label.
///
+/// * Wasm setjmp / longjmp handling
+/// This mode still uses some Emscripten library functions but not JavaScript's
+/// try-catch mechanism. It instead uses Wasm exception handling intrinsics,
+/// which will be lowered to exception handling instructions.
+///
+/// If there are calls to longjmp()
+///
+/// 1) Lower
+/// longjmp(env, val)
+/// into
+/// __wasm_longjmp(env, val)
+///
+/// If there are calls to setjmp()
+///
+/// 2) and 3): The same as 2) and 3) in Emscripten SjLj.
+/// (setjmpTable/setjmpTableSize initialization + setjmp callsite
+/// transformation)
+///
+/// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value
+/// thrown by __wasm_longjmp function. In Emscripten library, we have this
+/// struct:
+///
+/// struct __WasmLongjmpArgs {
+/// void *env;
+/// int val;
+/// };
+/// struct __WasmLongjmpArgs __wasm_longjmp_args;
+///
+/// The thrown value here is a pointer to __wasm_longjmp_args struct object. We
+/// use this struct to transfer two values by throwing a single value. Wasm
+/// throw and catch instructions are capable of throwing and catching multiple
+/// values, but it also requires multivalue support that is currently not very
+/// reliable.
+/// TODO Switch to throwing and catching two values without using the struct
+///
+/// All longjmpable function calls will be converted to an invoke that will
+/// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we
+/// test the thrown values using testSetjmp function as we do for Emscripten
+/// SjLj. The main difference is, in Emscripten SjLj, we need to transform every
+/// longjmpable callsite into a sequence of code including testSetjmp() call; in
+/// Wasm SjLj we do the testing in only one place, in this catchpad.
+///
+/// After testing calling testSetjmp(), if the longjmp does not correspond to
+/// one of the setjmps within the current function, it rethrows the longjmp
+/// by calling __wasm_longjmp(). If it corresponds to one of setjmps in the
+/// function, we jump to the beginning of the function, which contains a switch
+/// to each post-setjmp BB. Again, in Emscripten SjLj, this switch is added for
+/// every longjmpable callsite; in Wasm SjLj we do this only once at the top of
+/// the function. (after setjmpTable/setjmpTableSize initialization)
+///
+/// The below is the pseudocode for what we have described
+///
+/// entry:
+/// Initialize setjmpTable and setjmpTableSize
+///
+/// setjmp.dispatch:
+/// switch %label {
+/// label 1: goto post-setjmp BB 1
+/// label 2: goto post-setjmp BB 2
+/// ...
+/// default: goto splitted next BB
+/// }
+/// ...
+///
+/// bb:
+/// invoke void @foo() ;; foo is a longjmpable function
+/// to label %next unwind label %catch.dispatch.longjmp
+/// ...
+///
+/// catch.dispatch.longjmp:
+/// %0 = catchswitch within none [label %catch.longjmp] unwind to caller
+///
+/// catch.longjmp:
+/// %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs
+/// %env = load 'env' field from __WasmLongjmpArgs
+/// %val = load 'val' field from __WasmLongjmpArgs
+/// %label = testSetjmp(mem[%env], setjmpTable, setjmpTableSize);
+/// if (%label == 0)
+/// __wasm_longjmp(%env, %val)
+/// catchret to %setjmp.dispatch
+///
///===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-lower-em-ehsjlj"
+// Emscripten's asm.js-style exception handling
+extern cl::opt<bool> WasmEnableEmEH;
+// Emscripten's asm.js-style setjmp/longjmp handling
+extern cl::opt<bool> WasmEnableEmSjLj;
+// Wasm setjmp/longjmp handling using wasm EH instructions
+extern cl::opt<bool> WasmEnableSjLj;
+
static cl::list<std::string>
EHAllowlist("emscripten-cxx-exceptions-allowed",
cl::desc("The list of function names in which Emscripten-style "
@@ -214,19 +301,25 @@ static cl::list<std::string>
namespace {
class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
- bool EnableEH; // Enable exception handling
- bool EnableSjLj; // Enable setjmp/longjmp handling
- bool DoSjLj; // Whether we actually perform setjmp/longjmp handling
-
- GlobalVariable *ThrewGV = nullptr;
- GlobalVariable *ThrewValueGV = nullptr;
- Function *GetTempRet0Func = nullptr;
- Function *SetTempRet0Func = nullptr;
- Function *ResumeF = nullptr;
- Function *EHTypeIDF = nullptr;
- Function *EmLongjmpF = nullptr;
- Function *SaveSetjmpF = nullptr;
- Function *TestSetjmpF = nullptr;
+ bool EnableEmEH; // Enable Emscripten exception handling
+ bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling
+ bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling
+ bool DoSjLj; // Whether we actually perform setjmp/longjmp handling
+
+ GlobalVariable *ThrewGV = nullptr; // __THREW__ (Emscripten)
+ GlobalVariable *ThrewValueGV = nullptr; // __threwValue (Emscripten)
+ Function *GetTempRet0F = nullptr; // getTempRet0() (Emscripten)
+ Function *SetTempRet0F = nullptr; // setTempRet0() (Emscripten)
+ Function *ResumeF = nullptr; // __resumeException() (Emscripten)
+ Function *EHTypeIDF = nullptr; // llvm.eh.typeid.for() (intrinsic)
+ Function *EmLongjmpF = nullptr; // emscripten_longjmp() (Emscripten)
+ Function *SaveSetjmpF = nullptr; // saveSetjmp() (Emscripten)
+ Function *TestSetjmpF = nullptr; // testSetjmp() (Emscripten)
+ Function *WasmLongjmpF = nullptr; // __wasm_longjmp() (Emscripten)
+ Function *CatchF = nullptr; // wasm.catch() (intrinsic)
+
+ // type of 'struct __WasmLongjmpArgs' defined in emscripten
+ Type *LongjmpArgsTy = nullptr;
// __cxa_find_matching_catch_N functions.
// Indexed by the number of clauses in an original landingpad instruction.
@@ -242,31 +335,47 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
return "WebAssembly Lower Emscripten Exceptions";
}
+ using InstVector = SmallVectorImpl<Instruction *>;
bool runEHOnFunction(Function &F);
bool runSjLjOnFunction(Function &F);
+ void handleLongjmpableCallsForEmscriptenSjLj(
+ Function &F, InstVector &SetjmpTableInsts,
+ InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs);
+ void
+ handleLongjmpableCallsForWasmSjLj(Function &F, InstVector &SetjmpTableInsts,
+ InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs);
Function *getFindMatchingCatch(Module &M, unsigned NumClauses);
Value *wrapInvoke(CallBase *CI);
void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw,
Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label,
- Value *&LongjmpResult, BasicBlock *&EndBB);
+ Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB,
+ PHINode *&CallEmLongjmpBBThrewPHI,
+ PHINode *&CallEmLongjmpBBThrewValuePHI,
+ BasicBlock *&EndBB);
Function *getInvokeWrapper(CallBase *CI);
bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); }
- bool canLongjmp(Module &M, const Value *Callee) const;
- bool isEmAsmCall(Module &M, const Value *Callee) const;
bool supportsException(const Function *F) const {
- return EnableEH && (areAllExceptionsAllowed() ||
- EHAllowlistSet.count(std::string(F->getName())));
+ return EnableEmEH && (areAllExceptionsAllowed() ||
+ EHAllowlistSet.count(std::string(F->getName())));
}
+ void replaceLongjmpWith(Function *LongjmpF, Function *NewF);
void rebuildSSA(Function &F);
public:
static char ID;
- WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
- : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) {
+ WebAssemblyLowerEmscriptenEHSjLj()
+ : ModulePass(ID), EnableEmEH(WasmEnableEmEH),
+ EnableEmSjLj(WasmEnableEmSjLj), EnableWasmSjLj(WasmEnableSjLj) {
+ assert(!(EnableEmSjLj && EnableWasmSjLj) &&
+ "Two SjLj modes cannot be turned on at the same time");
+ assert(!(EnableEmEH && EnableWasmSjLj) &&
+ "Wasm SjLj should be only used with Wasm EH");
EHAllowlistSet.insert(EHAllowlist.begin(), EHAllowlist.end());
}
bool runOnModule(Module &M) override;
@@ -282,9 +391,8 @@ INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE,
"WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
false, false)
-ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH,
- bool EnableSjLj) {
- return new WebAssemblyLowerEmscriptenEHSjLj(EnableEH, EnableSjLj);
+ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() {
+ return new WebAssemblyLowerEmscriptenEHSjLj();
}
static bool canThrow(const Value *V) {
@@ -353,12 +461,12 @@ static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name,
if (!F->hasFnAttribute("wasm-import-module")) {
llvm::AttrBuilder B;
B.addAttribute("wasm-import-module", "env");
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
}
if (!F->hasFnAttribute("wasm-import-name")) {
llvm::AttrBuilder B;
B.addAttribute("wasm-import-name", F->getName());
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
}
return F;
}
@@ -415,15 +523,6 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
Module *M = CI->getModule();
LLVMContext &C = M->getContext();
- // If we are calling a function that is noreturn, we must remove that
- // attribute. The code we insert here does expect it to return, after we
- // catch the exception.
- if (CI->doesNotReturn()) {
- if (auto *F = CI->getCalledFunction())
- F->removeFnAttr(Attribute::NoReturn);
- CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
- }
-
IRBuilder<> IRB(C);
IRB.SetInsertPoint(CI);
@@ -450,10 +549,10 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
// No attributes for the callee pointer.
ArgAttributes.push_back(AttributeSet());
// Copy the argument attributes from the original
- for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I)
- ArgAttributes.push_back(InvokeAL.getParamAttributes(I));
+ for (unsigned I = 0, E = CI->arg_size(); I < E; ++I)
+ ArgAttributes.push_back(InvokeAL.getParamAttrs(I));
- AttrBuilder FnAttrs(InvokeAL.getFnAttributes());
+ AttrBuilder FnAttrs(InvokeAL.getFnAttrs());
if (FnAttrs.contains(Attribute::AllocSize)) {
// The allocsize attribute (if any) referes to parameters by index and needs
// to be adjusted.
@@ -467,9 +566,8 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
}
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeList NewCallAL =
- AttributeList::get(C, AttributeSet::get(C, FnAttrs),
- InvokeAL.getRetAttributes(), ArgAttributes);
+ AttributeList NewCallAL = AttributeList::get(
+ C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttrs(), ArgAttributes);
NewCall->setAttributes(NewCallAL);
CI->replaceAllUsesWith(NewCall);
@@ -504,8 +602,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) {
return F;
}
-bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
- const Value *Callee) const {
+static bool canLongjmp(const Value *Callee) {
if (auto *CalleeF = dyn_cast<Function>(Callee))
if (CalleeF->isIntrinsic())
return false;
@@ -543,8 +640,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
return true;
}
-bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
- const Value *Callee) const {
+static bool isEmAsmCall(const Value *Callee) {
StringRef CalleeName = Callee->getName();
// This is an exhaustive list from Emscripten's <emscripten/em_asm.h>.
return CalleeName == "emscripten_asm_const_int" ||
@@ -558,7 +654,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
// The code this generates is equivalent to the following JavaScript code:
// %__threwValue.val = __threwValue;
// if (%__THREW__.val != 0 & %__threwValue.val != 0) {
-// %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
+// %label = testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
// if (%label == 0)
// emscripten_longjmp(%__THREW__.val, %__threwValue.val);
// setTempRet0(%__threwValue.val);
@@ -572,7 +668,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
BasicBlock *BB, DebugLoc DL, Value *Threw, Value *SetjmpTable,
Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult,
- BasicBlock *&EndBB) {
+ BasicBlock *&CallEmLongjmpBB, PHINode *&CallEmLongjmpBBThrewPHI,
+ PHINode *&CallEmLongjmpBBThrewValuePHI, BasicBlock *&EndBB) {
Function *F = BB->getParent();
Module *M = F->getParent();
LLVMContext &C = M->getContext();
@@ -591,10 +688,27 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
- // %label = _testSetjmp(mem[%__THREW__.val], _setjmpTable, _setjmpTableSize);
+ // Generate call.em.longjmp BB once and share it within the function
+ if (!CallEmLongjmpBB) {
+ // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
+ CallEmLongjmpBB = BasicBlock::Create(C, "call.em.longjmp", F);
+ IRB.SetInsertPoint(CallEmLongjmpBB);
+ CallEmLongjmpBBThrewPHI = IRB.CreatePHI(getAddrIntType(M), 4, "threw.phi");
+ CallEmLongjmpBBThrewValuePHI =
+ IRB.CreatePHI(IRB.getInt32Ty(), 4, "threwvalue.phi");
+ CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1);
+ CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1);
+ IRB.CreateCall(EmLongjmpF,
+ {CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI});
+ IRB.CreateUnreachable();
+ } else {
+ CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1);
+ CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1);
+ }
+
+ // %label = testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
// if (%label == 0)
IRB.SetInsertPoint(ThenBB1);
- BasicBlock *ThenBB2 = BasicBlock::Create(C, "if.then2", F);
BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
Value *ThrewPtr =
IRB.CreateIntToPtr(Threw, getAddrPtrType(M), Threw->getName() + ".p");
@@ -603,16 +717,11 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
Value *ThenLabel = IRB.CreateCall(
TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label");
Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
- IRB.CreateCondBr(Cmp2, ThenBB2, EndBB2);
-
- // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
- IRB.SetInsertPoint(ThenBB2);
- IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
- IRB.CreateUnreachable();
+ IRB.CreateCondBr(Cmp2, CallEmLongjmpBB, EndBB2);
// setTempRet0(%__threwValue.val);
IRB.SetInsertPoint(EndBB2);
- IRB.CreateCall(SetTempRet0Func, ThrewValue);
+ IRB.CreateCall(SetTempRet0F, ThrewValue);
IRB.CreateBr(EndBB1);
IRB.SetInsertPoint(ElseBB1);
@@ -628,53 +737,67 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
// Output parameter assignment
Label = LabelPHI;
EndBB = EndBB1;
- LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result");
+ LongjmpResult = IRB.CreateCall(GetTempRet0F, None, "longjmp_result");
}
void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
DT.recalculate(F); // CFG has been changed
- SSAUpdater SSA;
+
+ SSAUpdaterBulk SSA;
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
- SSA.Initialize(I.getType(), I.getName());
- SSA.AddAvailableValue(&BB, &I);
- for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
- Use &U = *UI;
- ++UI;
+ unsigned VarID = SSA.AddVariable(I.getName(), I.getType());
+ // If a value is defined by an invoke instruction, it is only available in
+ // its normal destination and not in its unwind destination.
+ if (auto *II = dyn_cast<InvokeInst>(&I))
+ SSA.AddAvailableValue(VarID, II->getNormalDest(), II);
+ else
+ SSA.AddAvailableValue(VarID, &BB, &I);
+ for (auto &U : I.uses()) {
auto *User = cast<Instruction>(U.getUser());
if (auto *UserPN = dyn_cast<PHINode>(User))
if (UserPN->getIncomingBlock(U) == &BB)
continue;
-
if (DT.dominates(&I, User))
continue;
- SSA.RewriteUseAfterInsertions(U);
+ SSA.AddUse(VarID, &U);
}
}
}
+ SSA.RewriteAllUses(&DT);
}
-// Replace uses of longjmp with emscripten_longjmp. emscripten_longjmp takes
-// arguments of type {i32, i32} (wasm32) / {i64, i32} (wasm64) and longjmp takes
-// {jmp_buf*, i32}, so we need a ptrtoint instruction here to make the type
-// match. jmp_buf* will eventually be lowered to i32 in the wasm backend.
-static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF,
- Function *EmLongjmpF) {
+// Replace uses of longjmp with a new longjmp function in Emscripten library.
+// In Emscripten SjLj, the new function is
+// void emscripten_longjmp(uintptr_t, i32)
+// In Wasm SjLj, the new function is
+// void __wasm_longjmp(i8*, i32)
+// Because the original libc longjmp function takes (jmp_buf*, i32), we need a
+// ptrtoint/bitcast instruction here to make the type match. jmp_buf* will
+// eventually be lowered to i32/i64 in the wasm backend.
+void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF,
+ Function *NewF) {
+ assert(NewF == EmLongjmpF || NewF == WasmLongjmpF);
Module *M = LongjmpF->getParent();
SmallVector<CallInst *, 8> ToErase;
LLVMContext &C = LongjmpF->getParent()->getContext();
IRBuilder<> IRB(C);
- // For calls to longjmp, replace it with emscripten_longjmp and cast its first
- // argument (jmp_buf*) to int
+ // For calls to longjmp, replace it with emscripten_longjmp/__wasm_longjmp and
+ // cast its first argument (jmp_buf*) appropriately
for (User *U : LongjmpF->users()) {
auto *CI = dyn_cast<CallInst>(U);
if (CI && CI->getCalledFunction() == LongjmpF) {
IRB.SetInsertPoint(CI);
- Value *Jmpbuf =
- IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "jmpbuf");
- IRB.CreateCall(EmLongjmpF, {Jmpbuf, CI->getArgOperand(1)});
+ Value *Env = nullptr;
+ if (NewF == EmLongjmpF)
+ Env =
+ IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "env");
+ else // WasmLongjmpF
+ Env =
+ IRB.CreateBitCast(CI->getArgOperand(0), IRB.getInt8PtrTy(), "env");
+ IRB.CreateCall(NewF, {Env, CI->getArgOperand(1)});
ToErase.push_back(CI);
}
}
@@ -682,14 +805,23 @@ static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF,
I->eraseFromParent();
// If we have any remaining uses of longjmp's function pointer, replace it
- // with (int(*)(jmp_buf*, int))emscripten_longjmp.
+ // with (void(*)(jmp_buf*, int))emscripten_longjmp / __wasm_longjmp.
if (!LongjmpF->uses().empty()) {
- Value *EmLongjmp =
- IRB.CreateBitCast(EmLongjmpF, LongjmpF->getType(), "em_longjmp");
- LongjmpF->replaceAllUsesWith(EmLongjmp);
+ Value *NewLongjmp =
+ IRB.CreateBitCast(NewF, LongjmpF->getType(), "longjmp.cast");
+ LongjmpF->replaceAllUsesWith(NewLongjmp);
}
}
+static bool containsLongjmpableCalls(const Function *F) {
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (canLongjmp(CB->getCalledOperand()))
+ return true;
+ return false;
+}
+
bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
@@ -698,39 +830,60 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
Function *SetjmpF = M.getFunction("setjmp");
Function *LongjmpF = M.getFunction("longjmp");
- bool SetjmpUsed = SetjmpF && !SetjmpF->use_empty();
- bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
- DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
+
+ // In some platforms _setjmp and _longjmp are used instead. Change these to
+ // use setjmp/longjmp instead, because we later detect these functions by
+ // their names.
+ Function *SetjmpF2 = M.getFunction("_setjmp");
+ Function *LongjmpF2 = M.getFunction("_longjmp");
+ if (SetjmpF2) {
+ if (SetjmpF) {
+ if (SetjmpF->getFunctionType() != SetjmpF2->getFunctionType())
+ report_fatal_error("setjmp and _setjmp have different function types");
+ } else {
+ SetjmpF = Function::Create(SetjmpF2->getFunctionType(),
+ GlobalValue::ExternalLinkage, "setjmp", M);
+ }
+ SetjmpF2->replaceAllUsesWith(SetjmpF);
+ }
+ if (LongjmpF2) {
+ if (LongjmpF) {
+ if (LongjmpF->getFunctionType() != LongjmpF2->getFunctionType())
+ report_fatal_error(
+ "longjmp and _longjmp have different function types");
+ } else {
+ LongjmpF = Function::Create(LongjmpF2->getFunctionType(),
+ GlobalValue::ExternalLinkage, "setjmp", M);
+ }
+ LongjmpF2->replaceAllUsesWith(LongjmpF);
+ }
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
assert(TPC && "Expected a TargetPassConfig");
auto &TM = TPC->getTM<WebAssemblyTargetMachine>();
- if (EnableEH && TM.Options.ExceptionModel == ExceptionHandling::Wasm)
- report_fatal_error("-exception-model=wasm not allowed with "
- "-enable-emscripten-cxx-exceptions");
-
// Declare (or get) global variables __THREW__, __threwValue, and
// getTempRet0/setTempRet0 function which are used in common for both
// exception handling and setjmp/longjmp handling
ThrewGV = getGlobalVariable(M, getAddrIntType(&M), TM, "__THREW__");
ThrewValueGV = getGlobalVariable(M, IRB.getInt32Ty(), TM, "__threwValue");
- GetTempRet0Func = getEmscriptenFunction(
+ GetTempRet0F = getEmscriptenFunction(
FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M);
- SetTempRet0Func = getEmscriptenFunction(
+ SetTempRet0F = getEmscriptenFunction(
FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
"setTempRet0", &M);
- GetTempRet0Func->setDoesNotThrow();
- SetTempRet0Func->setDoesNotThrow();
+ GetTempRet0F->setDoesNotThrow();
+ SetTempRet0F->setDoesNotThrow();
bool Changed = false;
// Function registration for exception handling
- if (EnableEH) {
+ if (EnableEmEH) {
// Register __resumeException function
FunctionType *ResumeFTy =
FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
ResumeF = getEmscriptenFunction(ResumeFTy, "__resumeException", &M);
+ ResumeF->addFnAttr(Attribute::NoReturn);
// Register llvm_eh_typeid_for function
FunctionType *EHTypeIDTy =
@@ -738,20 +891,55 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M);
}
+ if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) {
+ // Precompute setjmp users
+ for (User *U : SetjmpF->users()) {
+ if (auto *CB = dyn_cast<CallBase>(U)) {
+ auto *UserF = CB->getFunction();
+ // If a function that calls setjmp does not contain any other calls that
+ // can longjmp, we don't need to do any transformation on that function,
+ // so can ignore it
+ if (containsLongjmpableCalls(UserF))
+ SetjmpUsers.insert(UserF);
+ } else {
+ std::string S;
+ raw_string_ostream SS(S);
+ SS << *U;
+ report_fatal_error(Twine("Indirect use of setjmp is not supported: ") +
+ SS.str());
+ }
+ }
+ }
+
+ bool SetjmpUsed = SetjmpF && !SetjmpUsers.empty();
+ bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
+ DoSjLj = (EnableEmSjLj | EnableWasmSjLj) && (SetjmpUsed || LongjmpUsed);
+
// Function registration and data pre-gathering for setjmp/longjmp handling
if (DoSjLj) {
- // Register emscripten_longjmp function
- FunctionType *FTy = FunctionType::get(
- IRB.getVoidTy(), {getAddrIntType(&M), IRB.getInt32Ty()}, false);
- EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
+ assert(EnableEmSjLj || EnableWasmSjLj);
+ if (EnableEmSjLj) {
+ // Register emscripten_longjmp function
+ FunctionType *FTy = FunctionType::get(
+ IRB.getVoidTy(), {getAddrIntType(&M), IRB.getInt32Ty()}, false);
+ EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
+ EmLongjmpF->addFnAttr(Attribute::NoReturn);
+ } else { // EnableWasmSjLj
+ // Register __wasm_longjmp function, which calls __builtin_wasm_longjmp.
+ FunctionType *FTy = FunctionType::get(
+ IRB.getVoidTy(), {IRB.getInt8PtrTy(), IRB.getInt32Ty()}, false);
+ WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M);
+ WasmLongjmpF->addFnAttr(Attribute::NoReturn);
+ }
if (SetjmpF) {
// Register saveSetjmp function
FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- FTy = FunctionType::get(Type::getInt32PtrTy(C),
- {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
- Type::getInt32PtrTy(C), IRB.getInt32Ty()},
- false);
+ FunctionType *FTy =
+ FunctionType::get(Type::getInt32PtrTy(C),
+ {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
+ Type::getInt32PtrTy(C), IRB.getInt32Ty()},
+ false);
SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
// Register testSetjmp function
@@ -761,16 +949,18 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
false);
TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
- // Precompute setjmp users
- for (User *U : SetjmpF->users()) {
- auto *UI = cast<Instruction>(U);
- SetjmpUsers.insert(UI->getFunction());
- }
+ // wasm.catch() will be lowered down to wasm 'catch' instruction in
+ // instruction selection.
+ CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
+ // Type for struct __WasmLongjmpArgs
+ LongjmpArgsTy = StructType::get(IRB.getInt8PtrTy(), // env
+ IRB.getInt32Ty() // val
+ );
}
}
// Exception handling transformation
- if (EnableEH) {
+ if (EnableEmEH) {
for (Function &F : M) {
if (F.isDeclaration())
continue;
@@ -782,7 +972,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (DoSjLj) {
Changed = true; // We have setjmp or longjmp somewhere
if (LongjmpF)
- replaceLongjmpWithEmscriptenLongjmp(LongjmpF, EmLongjmpF);
+ replaceLongjmpWith(LongjmpF, EnableEmSjLj ? EmLongjmpF : WasmLongjmpF);
// Only traverse functions that uses setjmp in order not to insert
// unnecessary prep / cleanup code in every function
if (SetjmpF)
@@ -816,6 +1006,12 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
SmallVector<Instruction *, 64> ToErase;
SmallPtrSet<LandingPadInst *, 32> LandingPads;
+ // rethrow.longjmp BB that will be shared within the function.
+ BasicBlock *RethrowLongjmpBB = nullptr;
+ // PHI node for the loaded value of __THREW__ global variable in
+ // rethrow.longjmp BB
+ PHINode *RethrowLongjmpBBThrewPHI = nullptr;
+
for (BasicBlock &BB : F) {
auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
if (!II)
@@ -836,37 +1032,48 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
// setjmp, it will be appropriately handled in runSjLjOnFunction. But even
// if the function does not contain setjmp calls, we shouldn't silently
// ignore longjmps; we should rethrow them so they can be correctly
- // handled in somewhere up the call chain where setjmp is.
- // __THREW__'s value is 0 when nothing happened, 1 when an exception is
- // thrown, other values when longjmp is thrown.
+ // handled in somewhere up the call chain where setjmp is. __THREW__'s
+ // value is 0 when nothing happened, 1 when an exception is thrown, and
+ // other values when longjmp is thrown.
//
// if (%__THREW__.val == 0 || %__THREW__.val == 1)
// goto %tail
// else
// goto %longjmp.rethrow
//
- // longjmp.rethrow: ;; This is longjmp. Rethrow it
+ // rethrow.longjmp: ;; This is longjmp. Rethrow it
// %__threwValue.val = __threwValue
// emscripten_longjmp(%__THREW__.val, %__threwValue.val);
//
// tail: ;; Nothing happened or an exception is thrown
// ... Continue exception handling ...
- if (DoSjLj && !SetjmpUsers.count(&F) && canLongjmp(M, Callee)) {
+ if (DoSjLj && EnableEmSjLj && !SetjmpUsers.count(&F) &&
+ canLongjmp(Callee)) {
+ // Create longjmp.rethrow BB once and share it within the function
+ if (!RethrowLongjmpBB) {
+ RethrowLongjmpBB = BasicBlock::Create(C, "rethrow.longjmp", &F);
+ IRB.SetInsertPoint(RethrowLongjmpBB);
+ RethrowLongjmpBBThrewPHI =
+ IRB.CreatePHI(getAddrIntType(&M), 4, "threw.phi");
+ RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB);
+ Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
+ ThrewValueGV->getName() + ".val");
+ IRB.CreateCall(EmLongjmpF, {RethrowLongjmpBBThrewPHI, ThrewValue});
+ IRB.CreateUnreachable();
+ } else {
+ RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB);
+ }
+
+ IRB.SetInsertPoint(II); // Restore the insert point back
BasicBlock *Tail = BasicBlock::Create(C, "tail", &F);
- BasicBlock *RethrowBB = BasicBlock::Create(C, "longjmp.rethrow", &F);
Value *CmpEqOne =
IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one");
Value *CmpEqZero =
IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 0), "cmp.eq.zero");
Value *Or = IRB.CreateOr(CmpEqZero, CmpEqOne, "or");
- IRB.CreateCondBr(Or, Tail, RethrowBB);
- IRB.SetInsertPoint(RethrowBB);
- Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
- ThrewValueGV->getName() + ".val");
- IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
-
- IRB.CreateUnreachable();
+ IRB.CreateCondBr(Or, Tail, RethrowLongjmpBB);
IRB.SetInsertPoint(Tail);
+ BB.replaceSuccessorsPhiUsesWith(&BB, Tail);
}
// Insert a branch based on __THREW__ variable
@@ -961,7 +1168,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
Value *Undef = UndefValue::get(LPI->getType());
Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
- Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0");
+ Value *TempRet0 = IRB.CreateCall(GetTempRet0F, None, "tempret0");
Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
LPI->replaceAllUsesWith(Pair1);
@@ -997,14 +1204,15 @@ static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore,
}
bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
+ assert(EnableEmSjLj || EnableWasmSjLj);
Module &M = *F.getParent();
LLVMContext &C = F.getContext();
IRBuilder<> IRB(C);
SmallVector<Instruction *, 64> ToErase;
// Vector of %setjmpTable values
- std::vector<Instruction *> SetjmpTableInsts;
+ SmallVector<Instruction *, 4> SetjmpTableInsts;
// Vector of %setjmpTableSize values
- std::vector<Instruction *> SetjmpTableSizeInsts;
+ SmallVector<Instruction *, 4> SetjmpTableSizeInsts;
// Setjmp preparation
@@ -1012,11 +1220,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// We create this as an instruction intentionally, and we don't want to fold
// this instruction to a constant 4, because this value will be used in
// SSAUpdater.AddAvailableValue(...) later.
- BasicBlock &EntryBB = F.getEntryBlock();
- DebugLoc FirstDL = getOrCreateDebugLoc(&*EntryBB.begin(), F.getSubprogram());
- BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
- Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
- &*EntryBB.getFirstInsertionPt());
+ BasicBlock *Entry = &F.getEntryBlock();
+ DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram());
+ SplitBlock(Entry, &*Entry->getFirstInsertionPt());
+
+ BinaryOperator *SetjmpTableSize =
+ BinaryOperator::Create(Instruction::Add, IRB.getInt32(4), IRB.getInt32(0),
+ "setjmpTableSize", Entry->getTerminator());
SetjmpTableSize->setDebugLoc(FirstDL);
// setjmpTable = (int *) malloc(40);
Instruction *SetjmpTable = CallInst::CreateMalloc(
@@ -1036,13 +1246,14 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
SetjmpTableSizeInsts.push_back(SetjmpTableSize);
// Setjmp transformation
- std::vector<PHINode *> SetjmpRetPHIs;
+ SmallVector<PHINode *, 4> SetjmpRetPHIs;
Function *SetjmpF = M.getFunction("setjmp");
for (User *U : SetjmpF->users()) {
auto *CI = dyn_cast<CallInst>(U);
+ // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but
+ // we don't support two being used together yet.
if (!CI)
- report_fatal_error("Does not support indirect calls to setjmp");
-
+ report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet");
BasicBlock *BB = CI->getParent();
if (BB->getParent() != &F) // in other function
continue;
@@ -1072,14 +1283,136 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Instruction *NewSetjmpTable =
IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
Instruction *NewSetjmpTableSize =
- IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize");
+ IRB.CreateCall(GetTempRet0F, None, "setjmpTableSize");
SetjmpTableInsts.push_back(NewSetjmpTable);
SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
ToErase.push_back(CI);
}
- // Update each call that can longjmp so it can return to a setjmp where
- // relevant.
+ // Handle longjmpable calls.
+ if (EnableEmSjLj)
+ handleLongjmpableCallsForEmscriptenSjLj(
+ F, SetjmpTableInsts, SetjmpTableSizeInsts, SetjmpRetPHIs);
+ else // EnableWasmSjLj
+ handleLongjmpableCallsForWasmSjLj(F, SetjmpTableInsts, SetjmpTableSizeInsts,
+ SetjmpRetPHIs);
+
+ // Erase everything we no longer need in this function
+ for (Instruction *I : ToErase)
+ I->eraseFromParent();
+
+ // Free setjmpTable buffer before each return instruction + function-exiting
+ // call
+ SmallVector<Instruction *, 16> ExitingInsts;
+ for (BasicBlock &BB : F) {
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI))
+ ExitingInsts.push_back(TI);
+ // Any 'call' instruction with 'noreturn' attribute exits the function at
+ // this point. If this throws but unwinds to another EH pad within this
+ // function instead of exiting, this would have been an 'invoke', which
+ // happens if we use Wasm EH or Wasm SjLJ.
+ for (auto &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ bool IsNoReturn = CI->hasFnAttr(Attribute::NoReturn);
+ if (Function *CalleeF = CI->getCalledFunction())
+ IsNoReturn |= CalleeF->hasFnAttribute(Attribute::NoReturn);
+ if (IsNoReturn)
+ ExitingInsts.push_back(&I);
+ }
+ }
+ }
+ for (auto *I : ExitingInsts) {
+ DebugLoc DL = getOrCreateDebugLoc(I, F.getSubprogram());
+ // If this existing instruction is a call within a catchpad, we should add
+ // it as "funclet" to the operand bundle of 'free' call
+ SmallVector<OperandBundleDef, 1> Bundles;
+ if (auto *CB = dyn_cast<CallBase>(I))
+ if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
+ Bundles.push_back(OperandBundleDef(*Bundle));
+ auto *Free = CallInst::CreateFree(SetjmpTable, Bundles, I);
+ Free->setDebugLoc(DL);
+ // CallInst::CreateFree may create a bitcast instruction if its argument
+ // types mismatch. We need to set the debug loc for the bitcast too.
+ if (auto *FreeCallI = dyn_cast<CallInst>(Free)) {
+ if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0)))
+ BitCastI->setDebugLoc(DL);
+ }
+ }
+
+ // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
+ // (when buffer reallocation occurs)
+ // entry:
+ // setjmpTableSize = 4;
+ // setjmpTable = (int *) malloc(40);
+ // setjmpTable[0] = 0;
+ // ...
+ // somebb:
+ // setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize);
+ // setjmpTableSize = getTempRet0();
+ // So we need to make sure the SSA for these variables is valid so that every
+ // saveSetjmp and testSetjmp calls have the correct arguments.
+ SSAUpdater SetjmpTableSSA;
+ SSAUpdater SetjmpTableSizeSSA;
+ SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
+ SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
+ for (Instruction *I : SetjmpTableInsts)
+ SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
+ for (Instruction *I : SetjmpTableSizeInsts)
+ SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);
+
+ for (auto &U : make_early_inc_range(SetjmpTable->uses()))
+ if (auto *I = dyn_cast<Instruction>(U.getUser()))
+ if (I->getParent() != Entry)
+ SetjmpTableSSA.RewriteUse(U);
+ for (auto &U : make_early_inc_range(SetjmpTableSize->uses()))
+ if (auto *I = dyn_cast<Instruction>(U.getUser()))
+ if (I->getParent() != Entry)
+ SetjmpTableSizeSSA.RewriteUse(U);
+
+ // Finally, our modifications to the cfg can break dominance of SSA variables.
+ // For example, in this code,
+ // if (x()) { .. setjmp() .. }
+ // if (y()) { .. longjmp() .. }
+ // We must split the longjmp block, and it can jump into the block splitted
+ // from setjmp one. But that means that when we split the setjmp block, it's
+ // first part no longer dominates its second part - there is a theoretically
+ // possible control flow path where x() is false, then y() is true and we
+ // reach the second part of the setjmp block, without ever reaching the first
+ // part. So, we rebuild SSA form here.
+ rebuildSSA(F);
+ return true;
+}
+
+// Update each call that can longjmp so it can return to the corresponding
+// setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the
+// comments at top of the file for details.
+void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
+ Function &F, InstVector &SetjmpTableInsts, InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {
+ Module &M = *F.getParent();
+ LLVMContext &C = F.getContext();
+ IRBuilder<> IRB(C);
+ SmallVector<Instruction *, 64> ToErase;
+
+ // We need to pass setjmpTable and setjmpTableSize to testSetjmp function.
+ // These values are defined in the beginning of the function and also in each
+ // setjmp callsite, but we don't know which values we should use at this
+ // point. So here we arbitraily use the ones defined in the beginning of the
+ // function, and SSAUpdater will later update them to the correct values.
+ Instruction *SetjmpTable = *SetjmpTableInsts.begin();
+ Instruction *SetjmpTableSize = *SetjmpTableSizeInsts.begin();
+
+ // call.em.longjmp BB that will be shared within the function.
+ BasicBlock *CallEmLongjmpBB = nullptr;
+ // PHI node for the loaded value of __THREW__ global variable in
+ // call.em.longjmp BB
+ PHINode *CallEmLongjmpBBThrewPHI = nullptr;
+ // PHI node for the loaded value of __threwValue global variable in
+ // call.em.longjmp BB
+ PHINode *CallEmLongjmpBBThrewValuePHI = nullptr;
+ // rethrow.exn BB that will be shared within the function.
+ BasicBlock *RethrowExnBB = nullptr;
// Because we are creating new BBs while processing and don't want to make
// all these newly created BBs candidates again for longjmp processing, we
@@ -1092,15 +1425,18 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
for (unsigned I = 0; I < BBs.size(); I++) {
BasicBlock *BB = BBs[I];
for (Instruction &I : *BB) {
- assert(!isa<InvokeInst>(&I));
+ if (isa<InvokeInst>(&I))
+ report_fatal_error("When using Wasm EH with Emscripten SjLj, there is "
+ "a restriction that `setjmp` function call and "
+ "exception cannot be used within the same function");
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
const Value *Callee = CI->getCalledOperand();
- if (!canLongjmp(M, Callee))
+ if (!canLongjmp(Callee))
continue;
- if (isEmAsmCall(M, Callee))
+ if (isEmAsmCall(Callee))
report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
F.getName() +
". Please consider using EM_JS, or move the "
@@ -1171,19 +1507,26 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// tail:
// ...
if (supportsException(&F) && canThrow(Callee)) {
- IRB.SetInsertPoint(CI);
// We will add a new conditional branch. So remove the branch created
// when we split the BB
ToErase.push_back(BB->getTerminator());
+
+ // Generate rethrow.exn BB once and share it within the function
+ if (!RethrowExnBB) {
+ RethrowExnBB = BasicBlock::Create(C, "rethrow.exn", &F);
+ IRB.SetInsertPoint(RethrowExnBB);
+ CallInst *Exn =
+ IRB.CreateCall(getFindMatchingCatch(M, 0), {}, "exn");
+ IRB.CreateCall(ResumeF, {Exn});
+ IRB.CreateUnreachable();
+ }
+
+ IRB.SetInsertPoint(CI);
BasicBlock *NormalBB = BasicBlock::Create(C, "normal", &F);
- BasicBlock *RethrowBB = BasicBlock::Create(C, "eh.rethrow", &F);
Value *CmpEqOne =
IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one");
- IRB.CreateCondBr(CmpEqOne, RethrowBB, NormalBB);
- IRB.SetInsertPoint(RethrowBB);
- CallInst *Exn = IRB.CreateCall(getFindMatchingCatch(M, 0), {}, "exn");
- IRB.CreateCall(ResumeF, {Exn});
- IRB.CreateUnreachable();
+ IRB.CreateCondBr(CmpEqOne, RethrowExnBB, NormalBB);
+
IRB.SetInsertPoint(NormalBB);
IRB.CreateBr(Tail);
BB = NormalBB; // New insertion point to insert testSetjmp()
@@ -1202,7 +1545,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Value *LongjmpResult = nullptr;
BasicBlock *EndBB = nullptr;
wrapTestSetjmp(BB, CI->getDebugLoc(), Threw, SetjmpTable, SetjmpTableSize,
- Label, LongjmpResult, EndBB);
+ Label, LongjmpResult, CallEmLongjmpBB,
+ CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI,
+ EndBB);
assert(Label && LongjmpResult && EndBB);
// Create switch instruction
@@ -1224,76 +1569,184 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
}
}
- // Erase everything we no longer need in this function
for (Instruction *I : ToErase)
I->eraseFromParent();
+}
- // Free setjmpTable buffer before each return instruction
- for (BasicBlock &BB : F) {
- Instruction *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI)) {
- DebugLoc DL = getOrCreateDebugLoc(TI, F.getSubprogram());
- auto *Free = CallInst::CreateFree(SetjmpTable, TI);
- Free->setDebugLoc(DL);
- // CallInst::CreateFree may create a bitcast instruction if its argument
- // types mismatch. We need to set the debug loc for the bitcast too.
- if (auto *FreeCallI = dyn_cast<CallInst>(Free)) {
- if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0)))
- BitCastI->setDebugLoc(DL);
- }
- }
+// Create a catchpad in which we catch a longjmp's env and val arguments, test
+// if the longjmp corresponds to one of setjmps in the current function, and if
+// so, jump to the setjmp dispatch BB from which we go to one of post-setjmp
+// BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at
+// top of the file for details.
+void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
+ Function &F, InstVector &SetjmpTableInsts, InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {
+ Module &M = *F.getParent();
+ LLVMContext &C = F.getContext();
+ IRBuilder<> IRB(C);
+
+ // A function with catchswitch/catchpad instruction should have a personality
+ // function attached to it. Search for the wasm personality function, and if
+ // it exists, use it, and if it doesn't, create a dummy personality function.
+ // (SjLj is not going to call it anyway.)
+ if (!F.hasPersonalityFn()) {
+ StringRef PersName = getEHPersonalityName(EHPersonality::Wasm_CXX);
+ FunctionType *PersType =
+ FunctionType::get(IRB.getInt32Ty(), /* isVarArg */ true);
+ Value *PersF = M.getOrInsertFunction(PersName, PersType).getCallee();
+ F.setPersonalityFn(
+ cast<Constant>(IRB.CreateBitCast(PersF, IRB.getInt8PtrTy())));
}
- // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
- // (when buffer reallocation occurs)
+ // Use the entry BB's debugloc as a fallback
+ BasicBlock *Entry = &F.getEntryBlock();
+ DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram());
+ IRB.SetCurrentDebugLocation(FirstDL);
+
+ // Arbitrarily use the ones defined in the beginning of the function.
+ // SSAUpdater will later update them to the correct values.
+ Instruction *SetjmpTable = *SetjmpTableInsts.begin();
+ Instruction *SetjmpTableSize = *SetjmpTableSizeInsts.begin();
+
+ // Add setjmp.dispatch BB right after the entry block. Because we have
+ // initialized setjmpTable/setjmpTableSize in the entry block and split the
+ // rest into another BB, here 'OrigEntry' is the function's original entry
+ // block before the transformation.
+ //
// entry:
- // setjmpTableSize = 4;
- // setjmpTable = (int *) malloc(40);
- // setjmpTable[0] = 0;
- // ...
- // somebb:
- // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
- // setjmpTableSize = getTempRet0();
- // So we need to make sure the SSA for these variables is valid so that every
- // saveSetjmp and testSetjmp calls have the correct arguments.
- SSAUpdater SetjmpTableSSA;
- SSAUpdater SetjmpTableSizeSSA;
- SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
- SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
- for (Instruction *I : SetjmpTableInsts)
- SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
- for (Instruction *I : SetjmpTableSizeInsts)
- SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);
+ // setjmpTable / setjmpTableSize initialization
+ // setjmp.dispatch:
+ // switch will be inserted here later
+ // entry.split: (OrigEntry)
+ // the original function starts here
+ BasicBlock *OrigEntry = Entry->getNextNode();
+ BasicBlock *SetjmpDispatchBB =
+ BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry);
+ cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB);
+
+ // Create catch.dispatch.longjmp BB a catchswitch instruction
+ BasicBlock *CatchSwitchBB =
+ BasicBlock::Create(C, "catch.dispatch.longjmp", &F);
+ IRB.SetInsertPoint(CatchSwitchBB);
+ CatchSwitchInst *CatchSwitch =
+ IRB.CreateCatchSwitch(ConstantTokenNone::get(C), nullptr, 1);
+
+ // Create catch.longjmp BB and a catchpad instruction
+ BasicBlock *CatchLongjmpBB = BasicBlock::Create(C, "catch.longjmp", &F);
+ CatchSwitch->addHandler(CatchLongjmpBB);
+ IRB.SetInsertPoint(CatchLongjmpBB);
+ CatchPadInst *CatchPad = IRB.CreateCatchPad(CatchSwitch, {});
+
+ // Wasm throw and catch instructions can throw and catch multiple values, but
+ // that requires multivalue support in the toolchain, which is currently not
+ // very reliable. We instead throw and catch a pointer to a struct value of
+ // type 'struct __WasmLongjmpArgs', which is defined in Emscripten.
+ Instruction *CatchCI =
+ IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::C_LONGJMP)}, "thrown");
+ Value *LongjmpArgs =
+ IRB.CreateBitCast(CatchCI, LongjmpArgsTy->getPointerTo(), "longjmp.args");
+ Value *EnvField =
+ IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 0, "env_gep");
+ Value *ValField =
+ IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 1, "val_gep");
+ // void *env = __wasm_longjmp_args.env;
+ Instruction *Env = IRB.CreateLoad(IRB.getInt8PtrTy(), EnvField, "env");
+ // int val = __wasm_longjmp_args.val;
+ Instruction *Val = IRB.CreateLoad(IRB.getInt32Ty(), ValField, "val");
+
+ // %label = testSetjmp(mem[%env], setjmpTable, setjmpTableSize);
+ // if (%label == 0)
+ // __wasm_longjmp(%env, %val)
+ // catchret to %setjmp.dispatch
+ BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", &F);
+ BasicBlock *EndBB = BasicBlock::Create(C, "if.end", &F);
+ Value *EnvP = IRB.CreateBitCast(Env, getAddrPtrType(&M), "env.p");
+ Value *SetjmpID = IRB.CreateLoad(getAddrIntType(&M), EnvP, "setjmp.id");
+ Value *Label =
+ IRB.CreateCall(TestSetjmpF, {SetjmpID, SetjmpTable, SetjmpTableSize},
+ OperandBundleDef("funclet", CatchPad), "label");
+ Value *Cmp = IRB.CreateICmpEQ(Label, IRB.getInt32(0));
+ IRB.CreateCondBr(Cmp, ThenBB, EndBB);
+
+ IRB.SetInsertPoint(ThenBB);
+ CallInst *WasmLongjmpCI = IRB.CreateCall(
+ WasmLongjmpF, {Env, Val}, OperandBundleDef("funclet", CatchPad));
+ IRB.CreateUnreachable();
- for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end();
- UI != UE;) {
- // Grab the use before incrementing the iterator.
- Use &U = *UI;
- // Increment the iterator before removing the use from the list.
- ++UI;
- if (auto *I = dyn_cast<Instruction>(U.getUser()))
- if (I->getParent() != &EntryBB)
- SetjmpTableSSA.RewriteUse(U);
+ IRB.SetInsertPoint(EndBB);
+ // Jump to setjmp.dispatch block
+ IRB.CreateCatchRet(CatchPad, SetjmpDispatchBB);
+
+ // Go back to setjmp.dispatch BB
+ // setjmp.dispatch:
+ // switch %label {
+ // label 1: goto post-setjmp BB 1
+ // label 2: goto post-setjmp BB 2
+ // ...
+ // default: goto splitted next BB
+ // }
+ IRB.SetInsertPoint(SetjmpDispatchBB);
+ PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label.phi");
+ LabelPHI->addIncoming(Label, EndBB);
+ LabelPHI->addIncoming(IRB.getInt32(-1), Entry);
+ SwitchInst *SI = IRB.CreateSwitch(LabelPHI, OrigEntry, SetjmpRetPHIs.size());
+ // -1 means no longjmp happened, continue normally (will hit the default
+ // switch case). 0 means a longjmp that is not ours to handle, needs a
+ // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
+ // 0).
+ for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) {
+ SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent());
+ SetjmpRetPHIs[I]->addIncoming(Val, SetjmpDispatchBB);
}
- for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end();
- UI != UE;) {
- Use &U = *UI;
- ++UI;
- if (auto *I = dyn_cast<Instruction>(U.getUser()))
- if (I->getParent() != &EntryBB)
- SetjmpTableSizeSSA.RewriteUse(U);
+
+ // Convert all longjmpable call instructions to invokes that unwind to the
+ // newly created catch.dispatch.longjmp BB.
+ SmallVector<Instruction *, 64> ToErase;
+ for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) {
+ for (Instruction &I : *BB) {
+ auto *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+ const Value *Callee = CI->getCalledOperand();
+ if (!canLongjmp(Callee))
+ continue;
+ if (isEmAsmCall(Callee))
+ report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
+ F.getName() +
+ ". Please consider using EM_JS, or move the "
+ "EM_ASM into another function.",
+ false);
+ // This is __wasm_longjmp() call we inserted in this function, which
+ // rethrows the longjmp when the longjmp does not correspond to one of
+ // setjmps in this function. We should not convert this call to an invoke.
+ if (CI == WasmLongjmpCI)
+ continue;
+ ToErase.push_back(CI);
+
+ // Even if the callee function has attribute 'nounwind', which is true for
+ // all C functions, it can longjmp, which means it can throw a Wasm
+ // exception now.
+ CI->removeFnAttr(Attribute::NoUnwind);
+ if (Function *CalleeF = CI->getCalledFunction()) {
+ CalleeF->removeFnAttr(Attribute::NoUnwind);
+ }
+
+ IRB.SetInsertPoint(CI);
+ BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
+ // We will add a new invoke. So remove the branch created when we split
+ // the BB
+ ToErase.push_back(BB->getTerminator());
+ SmallVector<Value *, 8> Args(CI->args());
+ InvokeInst *II =
+ IRB.CreateInvoke(CI->getFunctionType(), CI->getCalledOperand(), Tail,
+ CatchSwitchBB, Args);
+ II->takeName(CI);
+ II->setDebugLoc(CI->getDebugLoc());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ }
}
- // Finally, our modifications to the cfg can break dominance of SSA variables.
- // For example, in this code,
- // if (x()) { .. setjmp() .. }
- // if (y()) { .. longjmp() .. }
- // We must split the longjmp block, and it can jump into the block splitted
- // from setjmp one. But that means that when we split the setjmp block, it's
- // first part no longer dominates its second part - there is a theoretically
- // possible control flow path where x() is false, then y() is true and we
- // reach the second part of the setjmp block, without ever reaching the first
- // part. So, we rebuild SSA form here.
- rebuildSSA(F);
- return true;
+ for (Instruction *I : ToErase)
+ I->eraseFromParent();
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 9ccbee819c35..3a0bef8c765c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -68,7 +68,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
if (!InitList)
return false;
- // Sanity-check @llvm.global_dtor's type.
+ // Validate @llvm.global_dtor's type.
auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
if (!ETy || ETy->getNumElements() != 3 ||
!ETy->getTypeAtIndex(0U)->isIntegerTy() ||
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
new file mode 100644
index 000000000000..8ff916c28c4e
--- /dev/null
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
@@ -0,0 +1,84 @@
+//=== WebAssemblyLowerRefTypesIntPtrConv.cpp -
+// Lower IntToPtr and PtrToInt on Reference Types ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Lowers IntToPtr and PtrToInt instructions on reference types to
+/// Trap instructions since they have been allowed to operate
+/// on non-integral pointers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Utils/WebAssemblyUtilities.h"
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-reftypes-intptr-conv"
+
+namespace {
+class WebAssemblyLowerRefTypesIntPtrConv final : public FunctionPass {
+ StringRef getPassName() const override {
+ return "WebAssembly Lower RefTypes Int-Ptr Conversions";
+ }
+
+ bool runOnFunction(Function &MF) override;
+
+public:
+ static char ID; // Pass identification
+ WebAssemblyLowerRefTypesIntPtrConv() : FunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyLowerRefTypesIntPtrConv::ID = 0;
+INITIALIZE_PASS(WebAssemblyLowerRefTypesIntPtrConv, DEBUG_TYPE,
+ "WebAssembly Lower RefTypes Int-Ptr Conversions", false, false)
+
+FunctionPass *llvm::createWebAssemblyLowerRefTypesIntPtrConv() {
+ return new WebAssemblyLowerRefTypesIntPtrConv();
+}
+
+bool WebAssemblyLowerRefTypesIntPtrConv::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "********** Lower RefTypes IntPtr Convs **********\n"
+ "********** Function: "
+ << F.getName() << '\n');
+
+ // This function will check for uses of ptrtoint and inttoptr on reference
+ // types and replace them with a trap instruction.
+ //
+ // We replace the instruction by a trap instruction
+ // and its uses by null in the case of inttoptr and 0 in the
+ // case of ptrtoint.
+ std::set<Instruction *> worklist;
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ PtrToIntInst *PTI = dyn_cast<PtrToIntInst>(&*I);
+ IntToPtrInst *ITP = dyn_cast<IntToPtrInst>(&*I);
+ if (!(PTI && WebAssembly::isRefType(PTI->getPointerOperand()->getType())) &&
+ !(ITP && WebAssembly::isRefType(ITP->getDestTy())))
+ continue;
+
+ UndefValue *U = UndefValue::get(I->getType());
+ I->replaceAllUsesWith(U);
+
+ Function *TrapIntrin =
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::debugtrap);
+ CallInst::Create(TrapIntrin, {}, "", &*I);
+
+ worklist.insert(&*I);
+ }
+
+ // erase each instruction replaced by trap
+ for (Instruction *I : worklist)
+ I->eraseFromParent();
+
+ return !worklist.empty();
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index ec2380a501ab..0b953a90aeab 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -17,6 +17,7 @@
#include "Utils/WebAssemblyTypeUtilities.h"
#include "Utils/WebAssemblyUtilities.h"
#include "WebAssemblyAsmPrinter.h"
+#include "WebAssemblyISelLowering.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,6 +29,7 @@
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
// This disables the removal of registers when lowering into MC, as required
@@ -38,8 +40,8 @@ cl::opt<bool>
" instruction output for test purposes only."),
cl::init(false));
-extern cl::opt<bool> EnableEmException;
-extern cl::opt<bool> EnableEmSjLj;
+extern cl::opt<bool> WasmEnableEmEH;
+extern cl::opt<bool> WasmEnableEmSjLj;
static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
@@ -56,15 +58,36 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
const TargetMachine &TM = MF.getTarget();
const Function &CurrentFunc = MF.getFunction();
+ Type *GlobalVT = Global->getValueType();
SmallVector<MVT, 1> VTs;
- computeLegalValueVTs(CurrentFunc, TM, Global->getValueType(), VTs);
- if (VTs.size() != 1)
+ computeLegalValueVTs(CurrentFunc, TM, GlobalVT, VTs);
+
+ // Tables are represented as Arrays in LLVM IR therefore
+ // they reach this point as aggregate Array types with an element type
+ // that is a reference type.
+ wasm::ValType Type;
+ if (GlobalVT->isArrayTy() &&
+ WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
+ MVT VT;
+ switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
+ case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
+ VT = MVT::funcref;
+ break;
+ case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
+ VT = MVT::externref;
+ break;
+ default:
+ report_fatal_error("unhandled address space type");
+ }
+ Type = WebAssembly::toValType(VT);
+ } else if (VTs.size() == 1) {
+ Type = WebAssembly::toValType(VTs[0]);
+ } else
report_fatal_error("Aggregate globals not yet implemented");
- bool Mutable = true;
- wasm::ValType Type = WebAssembly::toValType(VTs[0]);
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), Mutable});
+ WasmSym->setGlobalType(
+ wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
}
return WasmSym;
}
@@ -82,7 +105,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
bool InvokeDetected = false;
auto *WasmSym = Printer.getMCSymbolForFunction(
- F, EnableEmException || EnableEmSjLj, Signature.get(), InvokeDetected);
+ F, WasmEnableEmEH || WasmEnableEmSjLj, Signature.get(), InvokeDetected);
WasmSym->setSignature(Signature.get());
Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
@@ -102,6 +125,9 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
switch (TargetFlags) {
case WebAssemblyII::MO_NO_FLAG:
break;
+ case WebAssemblyII::MO_GOT_TLS:
+ Kind = MCSymbolRefExpr::VK_WASM_GOT_TLS;
+ break;
case WebAssemblyII::MO_GOT:
Kind = MCSymbolRefExpr::VK_GOT;
break;
@@ -275,15 +301,9 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
MCOp = lowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
case MachineOperand::MO_ExternalSymbol:
- // The target flag indicates whether this is a symbol for a
- // variable or a function.
- assert(MO.getTargetFlags() == 0 &&
- "WebAssembly uses only symbol flags on ExternalSymbols");
MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
break;
case MachineOperand::MO_MCSymbol:
- // This is currently used only for LSDA symbols (GCC_except_table),
- // because global addresses or other external symbols are handled above.
assert(MO.getTargetFlags() == 0 &&
"WebAssembly does not use target flags on MCSymbol");
MCOp = lowerSymbolOperand(MO, MO.getMCSymbol());
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
index 3daffd1c23a2..37ac8e75f4b7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
@@ -33,21 +33,21 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mclower-prepass"
namespace {
-class WebAssemblyMCLowerPrePass final : public MachineFunctionPass {
+class WebAssemblyMCLowerPrePass final : public ModulePass {
StringRef getPassName() const override {
return "WebAssembly MC Lower Pre Pass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
+ ModulePass::getAnalysisUsage(AU);
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnModule(Module &M) override;
public:
static char ID; // Pass identification, replacement for typeid
- WebAssemblyMCLowerPrePass() : MachineFunctionPass(ID) {}
+ WebAssemblyMCLowerPrePass() : ModulePass(ID) {}
};
} // end anonymous namespace
@@ -57,30 +57,43 @@ INITIALIZE_PASS(
"Collects information ahead of time for MC lowering",
false, false)
-FunctionPass *llvm::createWebAssemblyMCLowerPrePass() {
+ModulePass *llvm::createWebAssemblyMCLowerPrePass() {
return new WebAssemblyMCLowerPrePass();
}
-bool WebAssemblyMCLowerPrePass::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "********** MC Lower Pre Pass **********\n"
- "********** Function: "
- << MF.getName() << '\n');
+// NOTE: this is a ModulePass since we need to enforce that this code has run
+// for all functions before AsmPrinter. If this way of doing things is ever
+// suboptimal, we could opt to make it a MachineFunctionPass and instead use
+// something like createBarrierNoopPass() to enforce ordering.
+bool WebAssemblyMCLowerPrePass::runOnModule(Module &M) {
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ if (!MMIWP)
+ return true;
- MachineModuleInfo &MMI = MF.getMMI();
+ MachineModuleInfo &MMI = MMIWP->getMMI();
MachineModuleInfoWasm &MMIW = MMI.getObjFileInfo<MachineModuleInfoWasm>();
- for (MachineBasicBlock &MBB : MF) {
- for (auto &MI : MBB) {
- // FIXME: what should all be filtered out beyond these?
- if (MI.isDebugInstr() || MI.isInlineAsm())
- continue;
- for (MachineOperand &MO : MI.uses()) {
- if (MO.isSymbol()) {
- MMIW.MachineSymbolsUsed.insert(MO.getSymbolName());
+ for (Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+ if (!MF)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "********** MC Lower Pre Pass **********\n"
+ "********** Function: "
+ << MF->getName() << '\n');
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (auto &MI : MBB) {
+ // FIXME: what should all be filtered out beyond these?
+ if (MI.isDebugInstr() || MI.isInlineAsm())
+ continue;
+ for (MachineOperand &MO : MI.uses()) {
+ if (MO.isSymbol()) {
+ MMIW.MachineSymbolsUsed.insert(MO.getSymbolName());
+ }
}
}
}
}
-
return true;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index 9aea65cba280..2180f57c106a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -96,9 +96,8 @@ static bool replaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
SmallVector<SlotIndex, 4> Indices;
- for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end();
- I != E;) {
- MachineOperand &O = *I++;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI.use_nodbg_operands(FromReg))) {
MachineInstr *Where = O.getParent();
// Check that MI dominates the instruction in the normal way.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 6bfed1a7195c..9d83a75a8247 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -106,13 +106,12 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
// instructions to satisfy LiveIntervals' requirement that all uses be
// dominated by defs. Now that LiveIntervals has computed which of these
// defs are actually needed and which are dead, remove the dead ones.
- for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE;) {
- MachineInstr *MI = &*MII++;
- if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
- LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
- LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot());
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ for (MachineInstr &MI : llvm::make_early_inc_range(MF.front())) {
+ if (MI.isImplicitDef() && MI.getOperand(0).isDead()) {
+ LiveInterval &LI = LIS.getInterval(MI.getOperand(0).getReg());
+ LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(MI).getRegSlot());
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 96390de8f5e7..7912aeb4f502 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -56,7 +56,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
}
void OptimizeReturned::visitCallBase(CallBase &CB) {
- for (unsigned I = 0, E = CB.getNumArgOperands(); I < E; ++I)
+ for (unsigned I = 0, E = CB.arg_size(); I < E; ++I)
if (CB.paramHasAttr(I, Attribute::Returned)) {
Value *Arg = CB.getArgOperand(I);
// Ignore constants, globals, undef, etc.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index ed5f7ccc854f..8b8593ddcbdd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -112,8 +112,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
// Move ARGUMENT_* instructions to the top of the entry block, so that their
// liveness reflects the fact that these really are live-in values.
- for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
- MachineInstr &MI = *MII++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
if (WebAssembly::isArgument(MI.getOpcode())) {
MI.removeFromParent();
Entry.insert(Entry.begin(), &MI);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index d6adc2fd155c..42419259802e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -497,6 +497,10 @@ static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
return WebAssembly::TEE_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::TEE_V128;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::TEE_EXTERNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::TEE_FUNCREF;
llvm_unreachable("Unexpected register class");
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 9f5d6b2a9a47..71f0bd28e1be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -85,8 +85,8 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
// Replace explicit uses of the physical register with a virtual register.
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
unsigned VReg = WebAssembly::NoRegister;
- for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) {
- MachineOperand &MO = *I++;
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI.reg_operands(PReg))) {
if (!MO.isImplicit()) {
if (VReg == WebAssembly::NoRegister) {
VReg = MRI.createVirtualRegister(RC);
@@ -101,8 +101,6 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
}
}
MO.setReg(VReg);
- if (MO.getParent()->isDebugValue())
- MO.setIsDebug();
Changed = true;
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 7943e1ecc8e1..add3c799f4aa 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -15,7 +15,7 @@
#include "WebAssemblySubtarget.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyInstrInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-subtarget"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 43d5871f0aa0..b553c8150652 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -36,6 +36,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
enum SIMDEnum {
NoSIMD,
SIMD128,
+ RelaxedSIMD,
} SIMDLevel = NoSIMD;
bool HasAtomics = false;
@@ -89,6 +90,7 @@ public:
// Predicates used by WebAssemblyInstrInfo.td.
bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
+ bool hasRelaxedSIMD() const { return SIMDLevel >= RelaxedSIMD; }
bool hasAtomics() const { return HasAtomics; }
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasSignExt() const { return HasSignExt; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 746a7599c58c..80abccd74782 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
@@ -34,17 +34,27 @@ using namespace llvm;
#define DEBUG_TYPE "wasm"
// Emscripten's asm.js-style exception handling
-cl::opt<bool> EnableEmException(
- "enable-emscripten-cxx-exceptions",
- cl::desc("WebAssembly Emscripten-style exception handling"),
- cl::init(false));
+cl::opt<bool>
+ WasmEnableEmEH("enable-emscripten-cxx-exceptions",
+ cl::desc("WebAssembly Emscripten-style exception handling"),
+ cl::init(false));
// Emscripten's asm.js-style setjmp/longjmp handling
-cl::opt<bool> EnableEmSjLj(
+cl::opt<bool> WasmEnableEmSjLj(
"enable-emscripten-sjlj",
cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
cl::init(false));
+// Exception handling using wasm EH instructions
+cl::opt<bool> WasmEnableEH("wasm-enable-eh",
+ cl::desc("WebAssembly exception handling"),
+ cl::init(false));
+
+// setjmp/longjmp handling using wasm EH instrutions
+cl::opt<bool> WasmEnableSjLj("wasm-enable-sjlj",
+ cl::desc("WebAssembly setjmp/longjmp handling"),
+ cl::init(false));
+
// A command-line option to keep implicit locals
// for the purpose of testing with lit/llc ONLY.
// This produces output which is not valid WebAssembly, and is not supported
@@ -123,12 +133,14 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
: LLVMTargetMachine(
T,
TT.isArch64Bit()
- ? (TT.isOSEmscripten()
- ? "e-m:e-p:64:64-i64:64-f128:64-n32:64-S128-ni:1:10:20"
- : "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20")
- : (TT.isOSEmscripten()
- ? "e-m:e-p:32:32-i64:64-f128:64-n32:64-S128-ni:1:10:20"
- : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20"),
+ ? (TT.isOSEmscripten() ? "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-"
+ "f128:64-n32:64-S128-ni:1:10:20"
+ : "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-"
+ "n32:64-S128-ni:1:10:20")
+ : (TT.isOSEmscripten() ? "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-"
+ "f128:64-n32:64-S128-ni:1:10:20"
+ : "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-"
+ "n32:64-S128-ni:1:10:20"),
TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT),
getEffectiveCodeModel(CM, CodeModel::Large), OL),
TLOF(new WebAssemblyTargetObjectFile()) {
@@ -332,6 +344,7 @@ public:
void addPostRegAlloc() override;
bool addGCPasses() override { return false; }
void addPreEmitPass() override;
+ bool addPreISel() override;
// No reg alloc
bool addRegAssignAndRewriteFast() override { return false; }
@@ -355,6 +368,43 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
+static void checkSanityForEHAndSjLj(const TargetMachine *TM) {
+ // Sanity checking related to -exception-model
+ if (TM->Options.ExceptionModel != ExceptionHandling::None &&
+ TM->Options.ExceptionModel != ExceptionHandling::Wasm)
+ report_fatal_error("-exception-model should be either 'none' or 'wasm'");
+ if (WasmEnableEmEH && TM->Options.ExceptionModel == ExceptionHandling::Wasm)
+ report_fatal_error("-exception-model=wasm not allowed with "
+ "-enable-emscripten-cxx-exceptions");
+ if (WasmEnableEH && TM->Options.ExceptionModel != ExceptionHandling::Wasm)
+ report_fatal_error(
+ "-wasm-enable-eh only allowed with -exception-model=wasm");
+ if (WasmEnableSjLj && TM->Options.ExceptionModel != ExceptionHandling::Wasm)
+ report_fatal_error(
+ "-wasm-enable-sjlj only allowed with -exception-model=wasm");
+ if ((!WasmEnableEH && !WasmEnableSjLj) &&
+ TM->Options.ExceptionModel == ExceptionHandling::Wasm)
+ report_fatal_error(
+ "-exception-model=wasm only allowed with at least one of "
+ "-wasm-enable-eh or -wasm-enable-sjj");
+
+ // You can't enable two modes of EH at the same time
+ if (WasmEnableEmEH && WasmEnableEH)
+ report_fatal_error(
+ "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh");
+ // You can't enable two modes of SjLj at the same time
+ if (WasmEnableEmSjLj && WasmEnableSjLj)
+ report_fatal_error(
+ "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj");
+ // You can't mix Emscripten EH with Wasm SjLj.
+ if (WasmEnableEmEH && WasmEnableSjLj)
+ report_fatal_error(
+ "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj");
+ // Currently it is allowed to mix Wasm EH with Emscripten SjLj as an interim
+ // measure, but some code will error out at compile time in this combination.
+ // See WebAssemblyLowerEmscriptenEHSjLj pass for details.
+}
+
//===----------------------------------------------------------------------===//
// The following functions are called from lib/CodeGen/Passes.cpp to modify
// the CodeGen pass sequence.
@@ -381,23 +431,27 @@ void WebAssemblyPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createWebAssemblyOptimizeReturned());
+ checkSanityForEHAndSjLj(TM);
+
// If exception handling is not enabled and setjmp/longjmp handling is
// enabled, we lower invokes into calls and delete unreachable landingpad
// blocks. Lowering invokes when there is no EH support is done in
- // TargetPassConfig::addPassesToHandleExceptions, but this runs after this
- // function and SjLj handling expects all invokes to be lowered before.
- if (!EnableEmException &&
- TM->Options.ExceptionModel == ExceptionHandling::None) {
+ // TargetPassConfig::addPassesToHandleExceptions, but that runs after these IR
+ // passes and Emscripten SjLj handling expects all invokes to be lowered
+ // before.
+ if (!WasmEnableEmEH && !WasmEnableEH) {
addPass(createLowerInvokePass());
// The lower invoke pass may create unreachable code. Remove it in order not
// to process dead blocks in setjmp/longjmp handling.
addPass(createUnreachableBlockEliminationPass());
}
- // Handle exceptions and setjmp/longjmp if enabled.
- if (EnableEmException || EnableEmSjLj)
- addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
- EnableEmSjLj));
+ // Handle exceptions and setjmp/longjmp if enabled. Unlike Wasm EH preparation
+ // done in WasmEHPrepare pass, Wasm SjLj preparation shares libraries and
+ // transformation algorithms with Emscripten SjLj, so we run
+ // LowerEmscriptenEHSjLj pass also when Wasm SjLj is enabled.
+ if (WasmEnableEmEH || WasmEnableEmSjLj || WasmEnableSjLj)
+ addPass(createWebAssemblyLowerEmscriptenEHSjLj());
// Expand indirectbr instructions to switches.
addPass(createIndirectBrExpandPass());
@@ -518,6 +572,12 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyMCLowerPrePass());
}
+bool WebAssemblyPassConfig::addPreISel() {
+ TargetPassConfig::addPreISel();
+ addPass(createWebAssemblyLowerRefTypesIntPtrConv());
+ return false;
+}
+
yaml::MachineFunctionInfo *
WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const {
return new yaml::WebAssemblyFunctionInfo();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index d9bc7c6d2c3f..f1ebcbc6fc51 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -114,7 +114,8 @@ bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
}
void WebAssemblyTTIImpl::getUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) const {
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
// Scan the loop: don't unroll loops with calls. This is a standard approach
// for most (all?) targets.
for (BasicBlock *BB : L->blocks())
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 1a33bd20d027..50036f7f7e98 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -49,7 +49,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) const;
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const;
/// @}
@@ -59,8 +60,7 @@ public:
unsigned getNumberOfRegisters(unsigned ClassID) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 67ca67d6cee6..8ce6b47d10e8 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -31,10 +31,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
@@ -1758,8 +1758,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
// It is widely common for MS InlineAsm to use a global variable and one/two
// registers in a mmory expression, and though unaccessible via rip/eip.
if (IsGlobalLV && (BaseReg || IndexReg)) {
- Operands.push_back(
- X86Operand::CreateMem(getPointerWidth(), Disp, Start, End));
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
+ End, Size, Identifier, Decl));
return false;
}
// Otherwise, we set the base register to a non-zero value
@@ -2551,6 +2551,8 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
unsigned IndexReg = SM.getIndexReg();
+ if (IndexReg && BaseReg == X86::RIP)
+ BaseReg = 0;
unsigned Scale = SM.getScale();
if (!PtrInOperand)
Size = SM.getElementSize() << 3;
@@ -2655,7 +2657,7 @@ bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
Expr = nullptr;
Reg = RE->getRegNo();
- // Sanity check register.
+ // Check the register.
if (Reg == X86::EIZ || Reg == X86::RIZ)
return Error(
Loc, "%eiz and %riz can only be used as index registers",
@@ -2753,6 +2755,7 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
.Case("1to4", "{1to4}")
.Case("1to8", "{1to8}")
.Case("1to16", "{1to16}")
+ .Case("1to32", "{1to32}")
.Default(nullptr);
if (!BroadcastPrimitive)
return TokError("Invalid memory broadcast primitive.");
@@ -2914,7 +2917,7 @@ bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
return true;
- // Sanity check register.
+ // Check the register.
BaseReg = cast<X86MCExpr>(E)->getRegNo();
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
return Error(BaseLoc, "eiz and riz can only be used as index registers",
@@ -3126,9 +3129,10 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
unsigned ComparisonPredicate = ~0U;
- // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
@@ -3182,7 +3186,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
- if (CC != ~0U && (IsVCMP || CC < 8)) {
+ if (CC != ~0U && (IsVCMP || CC < 8) &&
+ (IsVCMP || PatchedName.back() != 'h')) {
if (PatchedName.endswith("ss"))
PatchedName = IsVCMP ? "vcmpss" : "cmpss";
else if (PatchedName.endswith("sd"))
@@ -3191,6 +3196,10 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
PatchedName = IsVCMP ? "vcmpps" : "cmpps";
else if (PatchedName.endswith("pd"))
PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+ else if (PatchedName.endswith("sh"))
+ PatchedName = "vcmpsh";
+ else if (PatchedName.endswith("ph"))
+ PatchedName = "vcmpph";
else
llvm_unreachable("Unexpected suffix!");
@@ -3859,6 +3868,176 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
}
break;
}
+ case X86::VFCMADDCPHZ128m:
+ case X86::VFCMADDCPHZ256m:
+ case X86::VFCMADDCPHZm:
+ case X86::VFCMADDCPHZ128mb:
+ case X86::VFCMADDCPHZ256mb:
+ case X86::VFCMADDCPHZmb:
+ case X86::VFCMADDCPHZ128mbk:
+ case X86::VFCMADDCPHZ256mbk:
+ case X86::VFCMADDCPHZmbk:
+ case X86::VFCMADDCPHZ128mbkz:
+ case X86::VFCMADDCPHZ256mbkz:
+ case X86::VFCMADDCPHZmbkz:
+ case X86::VFCMADDCPHZ128mk:
+ case X86::VFCMADDCPHZ256mk:
+ case X86::VFCMADDCPHZmk:
+ case X86::VFCMADDCPHZ128mkz:
+ case X86::VFCMADDCPHZ256mkz:
+ case X86::VFCMADDCPHZmkz:
+ case X86::VFCMADDCPHZ128r:
+ case X86::VFCMADDCPHZ256r:
+ case X86::VFCMADDCPHZr:
+ case X86::VFCMADDCPHZ128rk:
+ case X86::VFCMADDCPHZ256rk:
+ case X86::VFCMADDCPHZrk:
+ case X86::VFCMADDCPHZ128rkz:
+ case X86::VFCMADDCPHZ256rkz:
+ case X86::VFCMADDCPHZrkz:
+ case X86::VFCMADDCPHZrb:
+ case X86::VFCMADDCPHZrbk:
+ case X86::VFCMADDCPHZrbkz:
+ case X86::VFCMADDCSHZm:
+ case X86::VFCMADDCSHZmk:
+ case X86::VFCMADDCSHZmkz:
+ case X86::VFCMADDCSHZr:
+ case X86::VFCMADDCSHZrb:
+ case X86::VFCMADDCSHZrbk:
+ case X86::VFCMADDCSHZrbkz:
+ case X86::VFCMADDCSHZrk:
+ case X86::VFCMADDCSHZrkz:
+ case X86::VFMADDCPHZ128m:
+ case X86::VFMADDCPHZ256m:
+ case X86::VFMADDCPHZm:
+ case X86::VFMADDCPHZ128mb:
+ case X86::VFMADDCPHZ256mb:
+ case X86::VFMADDCPHZmb:
+ case X86::VFMADDCPHZ128mbk:
+ case X86::VFMADDCPHZ256mbk:
+ case X86::VFMADDCPHZmbk:
+ case X86::VFMADDCPHZ128mbkz:
+ case X86::VFMADDCPHZ256mbkz:
+ case X86::VFMADDCPHZmbkz:
+ case X86::VFMADDCPHZ128mk:
+ case X86::VFMADDCPHZ256mk:
+ case X86::VFMADDCPHZmk:
+ case X86::VFMADDCPHZ128mkz:
+ case X86::VFMADDCPHZ256mkz:
+ case X86::VFMADDCPHZmkz:
+ case X86::VFMADDCPHZ128r:
+ case X86::VFMADDCPHZ256r:
+ case X86::VFMADDCPHZr:
+ case X86::VFMADDCPHZ128rk:
+ case X86::VFMADDCPHZ256rk:
+ case X86::VFMADDCPHZrk:
+ case X86::VFMADDCPHZ128rkz:
+ case X86::VFMADDCPHZ256rkz:
+ case X86::VFMADDCPHZrkz:
+ case X86::VFMADDCPHZrb:
+ case X86::VFMADDCPHZrbk:
+ case X86::VFMADDCPHZrbkz:
+ case X86::VFMADDCSHZm:
+ case X86::VFMADDCSHZmk:
+ case X86::VFMADDCSHZmkz:
+ case X86::VFMADDCSHZr:
+ case X86::VFMADDCSHZrb:
+ case X86::VFMADDCSHZrbk:
+ case X86::VFMADDCSHZrbkz:
+ case X86::VFMADDCSHZrk:
+ case X86::VFMADDCSHZrkz: {
+ unsigned Dest = Inst.getOperand(0).getReg();
+ for (unsigned i = 2; i < Inst.getNumOperands(); i++)
+ if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
+ return Warning(Ops[0]->getStartLoc(), "Destination register should be "
+ "distinct from source registers");
+ break;
+ }
+ case X86::VFCMULCPHZ128rm:
+ case X86::VFCMULCPHZ256rm:
+ case X86::VFCMULCPHZrm:
+ case X86::VFCMULCPHZ128rmb:
+ case X86::VFCMULCPHZ256rmb:
+ case X86::VFCMULCPHZrmb:
+ case X86::VFCMULCPHZ128rmbk:
+ case X86::VFCMULCPHZ256rmbk:
+ case X86::VFCMULCPHZrmbk:
+ case X86::VFCMULCPHZ128rmbkz:
+ case X86::VFCMULCPHZ256rmbkz:
+ case X86::VFCMULCPHZrmbkz:
+ case X86::VFCMULCPHZ128rmk:
+ case X86::VFCMULCPHZ256rmk:
+ case X86::VFCMULCPHZrmk:
+ case X86::VFCMULCPHZ128rmkz:
+ case X86::VFCMULCPHZ256rmkz:
+ case X86::VFCMULCPHZrmkz:
+ case X86::VFCMULCPHZ128rr:
+ case X86::VFCMULCPHZ256rr:
+ case X86::VFCMULCPHZrr:
+ case X86::VFCMULCPHZ128rrk:
+ case X86::VFCMULCPHZ256rrk:
+ case X86::VFCMULCPHZrrk:
+ case X86::VFCMULCPHZ128rrkz:
+ case X86::VFCMULCPHZ256rrkz:
+ case X86::VFCMULCPHZrrkz:
+ case X86::VFCMULCPHZrrb:
+ case X86::VFCMULCPHZrrbk:
+ case X86::VFCMULCPHZrrbkz:
+ case X86::VFCMULCSHZrm:
+ case X86::VFCMULCSHZrmk:
+ case X86::VFCMULCSHZrmkz:
+ case X86::VFCMULCSHZrr:
+ case X86::VFCMULCSHZrrb:
+ case X86::VFCMULCSHZrrbk:
+ case X86::VFCMULCSHZrrbkz:
+ case X86::VFCMULCSHZrrk:
+ case X86::VFCMULCSHZrrkz:
+ case X86::VFMULCPHZ128rm:
+ case X86::VFMULCPHZ256rm:
+ case X86::VFMULCPHZrm:
+ case X86::VFMULCPHZ128rmb:
+ case X86::VFMULCPHZ256rmb:
+ case X86::VFMULCPHZrmb:
+ case X86::VFMULCPHZ128rmbk:
+ case X86::VFMULCPHZ256rmbk:
+ case X86::VFMULCPHZrmbk:
+ case X86::VFMULCPHZ128rmbkz:
+ case X86::VFMULCPHZ256rmbkz:
+ case X86::VFMULCPHZrmbkz:
+ case X86::VFMULCPHZ128rmk:
+ case X86::VFMULCPHZ256rmk:
+ case X86::VFMULCPHZrmk:
+ case X86::VFMULCPHZ128rmkz:
+ case X86::VFMULCPHZ256rmkz:
+ case X86::VFMULCPHZrmkz:
+ case X86::VFMULCPHZ128rr:
+ case X86::VFMULCPHZ256rr:
+ case X86::VFMULCPHZrr:
+ case X86::VFMULCPHZ128rrk:
+ case X86::VFMULCPHZ256rrk:
+ case X86::VFMULCPHZrrk:
+ case X86::VFMULCPHZ128rrkz:
+ case X86::VFMULCPHZ256rrkz:
+ case X86::VFMULCPHZrrkz:
+ case X86::VFMULCPHZrrb:
+ case X86::VFMULCPHZrrbk:
+ case X86::VFMULCPHZrrbkz:
+ case X86::VFMULCSHZrm:
+ case X86::VFMULCSHZrmk:
+ case X86::VFMULCSHZrmkz:
+ case X86::VFMULCSHZrr:
+ case X86::VFMULCSHZrrb:
+ case X86::VFMULCSHZrrbk:
+ case X86::VFMULCSHZrrbkz:
+ case X86::VFMULCSHZrrk:
+ case X86::VFMULCSHZrrkz: {
+ unsigned Dest = Inst.getOperand(0).getReg();
+ for (unsigned i = 1; i < Inst.getNumOperands(); i++)
+ if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
+ return Warning(Ops[0]->getStartLoc(), "Destination register should be "
+ "distinct from source registers");
+ break;
+ }
}
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
@@ -3916,12 +4095,12 @@ void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
// be found here:
// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
switch (Inst.getOpcode()) {
- case X86::RETW:
- case X86::RETL:
- case X86::RETQ:
- case X86::RETIL:
- case X86::RETIQ:
- case X86::RETIW: {
+ case X86::RET16:
+ case X86::RET32:
+ case X86::RET64:
+ case X86::RETI16:
+ case X86::RETI32:
+ case X86::RETI64: {
MCInst ShlInst, FenceInst;
bool Parse32 = is32BitMode() || Code16GCC;
unsigned Basereg =
@@ -4093,24 +4272,6 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
ForcedVEXEncoding != VEXEncoding_VEX3))
return Match_Unsupported;
- // These instructions match ambiguously with their VEX encoded counterparts
- // and appear first in the matching table. Reject them unless we're forcing
- // EVEX encoding.
- // FIXME: We really need a way to break the ambiguity.
- switch (Opc) {
- case X86::VCVTSD2SIZrm_Int:
- case X86::VCVTSD2SI64Zrm_Int:
- case X86::VCVTSS2SIZrm_Int:
- case X86::VCVTSS2SI64Zrm_Int:
- case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int:
- case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
- case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int:
- case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
- if (ForcedVEXEncoding != VEXEncoding_EVEX)
- return Match_Unsupported;
- break;
- }
-
return Match_Success;
}
@@ -4678,7 +4839,7 @@ bool X86AsmParser::parseDirectiveArch() {
bool X86AsmParser::parseDirectiveNops(SMLoc L) {
int64_t NumBytes = 0, Control = 0;
SMLoc NumBytesLoc, ControlLoc;
- const MCSubtargetInfo STI = getSTI();
+ const MCSubtargetInfo& STI = getSTI();
NumBytesLoc = getTok().getLoc();
if (getParser().checkForValidSection() ||
getParser().parseAbsoluteExpression(NumBytes))
@@ -4704,7 +4865,7 @@ bool X86AsmParser::parseDirectiveNops(SMLoc L) {
}
/// Emit nops
- getParser().getStreamer().emitNops(NumBytes, Control, L);
+ getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
return false;
}
@@ -4717,11 +4878,11 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
if (!Section) {
- getStreamer().InitSections(false);
+ getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
if (Section->UseCodeAlign())
- getStreamer().emitCodeAlignment(2, 0);
+ getStreamer().emitCodeAlignment(2, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(2, 0, 1, 0);
return false;
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 2bc6492483c0..9164c699b569 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -34,7 +34,6 @@ struct X86Operand final : public MCParsedAsmOperand {
StringRef SymName;
void *OpDecl;
bool AddressOf;
- bool CallOperand;
struct TokOp {
const char *Data;
@@ -79,7 +78,7 @@ struct X86Operand final : public MCParsedAsmOperand {
X86Operand(KindTy K, SMLoc Start, SMLoc End)
: Kind(K), StartLoc(Start), EndLoc(End), OpDecl(nullptr),
- AddressOf(false), CallOperand(false) {}
+ AddressOf(false) {}
StringRef getSymName() override { return SymName; }
void *getOpDecl() override { return OpDecl; }
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 82581eb3c30a..908eb6d1fab1 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -83,9 +83,9 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -150,6 +150,12 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext,
dec =
&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case MAP5:
+ dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case MAP6:
+ dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -332,7 +338,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
}
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
- ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
+ ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
insn->vectorExtensionType = TYPE_EVEX;
} else {
--insn->readerCursor; // unconsume byte1
@@ -800,10 +806,6 @@ static int readModRM(struct InternalInstruction *insn) {
return prefix##_DR0 + index; \
case TYPE_CONTROLREG: \
return prefix##_CR0 + index; \
- case TYPE_BNDR: \
- if (index > 3) \
- *valid = 0; \
- return prefix##_BND0 + index; \
case TYPE_MVSIBX: \
return prefix##_XMM0 + index; \
case TYPE_MVSIBY: \
@@ -876,11 +878,11 @@ static bool readOpcode(struct InternalInstruction *insn) {
insn->opcodeType = ONEBYTE;
if (insn->vectorExtensionType == TYPE_EVEX) {
- switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
+ switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
default:
LLVM_DEBUG(
- dbgs() << format("Unhandled mm field for instruction (0x%hhx)",
- mmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
+ dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
+ mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
return true;
case VEX_LOB_0F:
insn->opcodeType = TWOBYTE;
@@ -891,6 +893,12 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP5:
+ insn->opcodeType = MAP5;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_MAP6:
+ insn->opcodeType = MAP6;
+ return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
@@ -908,6 +916,12 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP5:
+ insn->opcodeType = MAP5;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_MAP6:
+ insn->opcodeType = MAP6;
+ return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
@@ -1043,6 +1057,12 @@ static int getInstructionIDWithAttrMask(uint16_t *instructionID,
case THREEDNOW_MAP:
decision = &THREEDNOW_MAP_SYM;
break;
+ case MAP5:
+ decision = &MAP5_SYM;
+ break;
+ case MAP6:
+ decision = &MAP6_SYM;
+ break;
}
if (decision->opcodeDecisions[insnCtx]
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 4318c17f03a0..95d3c8ede366 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -37,7 +37,7 @@ namespace X86Disassembler {
#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
-#define mmFromEVEX2of4(evex) ((evex) & 0x3)
+#define mmmFromEVEX2of4(evex) ((evex) & 0x7)
#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
#define ppFromEVEX3of4(evex) ((evex) & 0x3)
@@ -374,12 +374,6 @@ namespace X86Disassembler {
ENTRY(CR14) \
ENTRY(CR15)
-#define REGS_BOUND \
- ENTRY(BND0) \
- ENTRY(BND1) \
- ENTRY(BND2) \
- ENTRY(BND3)
-
#undef REGS_TMM
#define REGS_TMM \
ENTRY(TMM0) \
@@ -414,7 +408,6 @@ namespace X86Disassembler {
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
- REGS_BOUND \
REGS_TMM \
ENTRY(RIP)
@@ -489,7 +482,9 @@ enum SegmentOverride {
enum VEXLeadingOpcodeByte {
VEX_LOB_0F = 0x1,
VEX_LOB_0F38 = 0x2,
- VEX_LOB_0F3A = 0x3
+ VEX_LOB_0F3A = 0x3,
+ VEX_LOB_MAP5 = 0x5,
+ VEX_LOB_MAP6 = 0x6
};
enum XOPMapSelect {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index c685d7e0db81..baacf2f46183 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -153,6 +153,20 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
+ case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
+ case X86::VCMPSHZrm: case X86::VCMPSHZrr:
+ case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
+ case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
+ case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
+ case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
+ case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
+ case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
+ case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
+ case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
+ case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
+ case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
@@ -162,11 +176,15 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
if (Desc.TSFlags & X86II::EVEX_B) {
// Broadcast form.
- // Load size is based on W-bit.
- if (Desc.TSFlags & X86II::VEX_W)
+ // Load size is word for TA map. Otherwise it is based on W-bit.
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ printwordmem(MI, CurOp--, OS);
+ } else if (Desc.TSFlags & X86II::VEX_W) {
printqwordmem(MI, CurOp--, OS);
- else
+ } else {
printdwordmem(MI, CurOp--, OS);
+ }
// Print the number of elements broadcasted.
unsigned NumElts;
@@ -176,18 +194,28 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
else
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ NumElts *= 2;
+ }
OS << "{1to" << NumElts << "}";
} else {
- if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
- printdwordmem(MI, CurOp--, OS);
- else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS) {
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA)
+ printwordmem(MI, CurOp--, OS);
+ else
+ printdwordmem(MI, CurOp--, OS);
+ } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) {
+ assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA &&
+ "Unexpected op map!");
printqwordmem(MI, CurOp--, OS);
- else if (Desc.TSFlags & X86II::EVEX_L2)
+ } else if (Desc.TSFlags & X86II::EVEX_L2) {
printzmmwordmem(MI, CurOp--, OS);
- else if (Desc.TSFlags & X86II::VEX_L)
+ } else if (Desc.TSFlags & X86II::VEX_L) {
printymmwordmem(MI, CurOp--, OS);
- else
+ } else {
printxmmwordmem(MI, CurOp--, OS);
+ }
}
} else {
if (Desc.TSFlags & X86II::EVEX_B)
@@ -391,7 +419,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
uint64_t Target;
if (MIA->evaluateBranch(*MI, 0, 0, Target))
return;
- if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0))
+ if (MIA->evaluateMemoryOperandAddress(*MI, /*STI=*/nullptr, 0, 0))
return;
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 83f3614ded1a..d4f39b571394 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -29,9 +29,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -116,13 +116,6 @@ cl::opt<bool> X86PadForBranchAlign(
"x86-pad-for-branch-align", cl::init(true), cl::Hidden,
cl::desc("Pad previous instructions to implement branch alignment"));
-class X86ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
- bool HasRelocationAddend, bool foobar)
- : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
-};
-
class X86AsmBackend : public MCAsmBackend {
const MCSubtargetInfo &STI;
std::unique_ptr<const MCInstrInfo> MCII;
@@ -166,7 +159,8 @@ public:
bool allowAutoPadding() const override;
bool allowEnhancedRelaxation() const override;
- void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
+ void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
+ const MCSubtargetInfo &STI) override;
void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
unsigned getNumFixupKinds() const override {
@@ -207,9 +201,10 @@ public:
void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
- unsigned getMaximumNopSize() const override;
+ unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
} // end anonymous namespace
@@ -598,7 +593,7 @@ bool X86AsmBackend::needAlign(const MCInst &Inst) const {
/// Insert BoundaryAlignFragment before instructions to align branches.
void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
- const MCInst &Inst) {
+ const MCInst &Inst, const MCSubtargetInfo &STI) {
CanPadInst = canPadInst(Inst, OS);
if (!canPadBranches(OS))
@@ -637,7 +632,7 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
isFirstMacroFusibleInst(Inst, *MCII))) {
// If we meet a unfused branch or the first instuction in a fusiable pair,
// insert a BoundaryAlign fragment.
- OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
+ OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
}
}
@@ -1081,16 +1076,16 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
}
}
-unsigned X86AsmBackend::getMaximumNopSize() const {
+unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
if (STI.hasFeature(X86::Mode16Bit))
return 4;
if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
return 1;
- if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
return 7;
- if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
return 15;
- if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
return 11;
// FIXME: handle 32-bit mode
// 15-bytes is the longest single NOP instruction, but 10-bytes is
@@ -1101,7 +1096,8 @@ unsigned X86AsmBackend::getMaximumNopSize() const {
/// Write a sequence of optimal nops to the output, covering \p Count
/// bytes.
/// \return - true on success, false on failure
-bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
static const char Nops32Bit[10][11] = {
// nop
"\x90",
@@ -1138,9 +1134,9 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
};
const char(*Nops)[11] =
- STI.getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
+ STI->getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
- uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
+ uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
// length.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 58e233d86da1..4161765fc1ae 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -441,6 +441,11 @@ namespace X86II {
/// SYMBOL_LABEL @GOTPCREL
MO_GOTPCREL,
+ /// MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL
+ /// relocations are guaranteed to be emitted by the integrated assembler
+ /// instead of the relaxable R_X86_64[_REX]_GOTPCRELX relocations.
+ MO_GOTPCREL_NORELAX,
+
/// MO_PLT - On a symbol operand this indicates that the immediate is
/// offset to the PLT entry of symbol name from the current code location.
///
@@ -790,7 +795,7 @@ namespace X86II {
// belongs to. i.e. one-byte, two-byte, 0x0f 0x38, 0x0f 0x3a, etc.
//
OpMapShift = OpPrefixShift + 2,
- OpMapMask = 0x7 << OpMapShift,
+ OpMapMask = 0xF << OpMapShift,
// OB - OneByte - Set if this instruction has a one byte opcode.
OB = 0 << OpMapShift,
@@ -819,13 +824,17 @@ namespace X86II {
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
ThreeDNow = 7 << OpMapShift,
+ // MAP5, MAP6 - Prefix after the 0x0F prefix.
+ T_MAP5 = 8 << OpMapShift,
+ T_MAP6 = 9 << OpMapShift,
+
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
// etc. We only cares about REX.W and REX.R bits and only the former is
// statically determined.
//
- REXShift = OpMapShift + 3,
+ REXShift = OpMapShift + 4,
REX_W = 1 << REXShift,
//===------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index fa937d381613..8ab86f46ffe6 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -218,6 +218,9 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
return ELF::R_X86_64_REX_GOTPCRELX;
}
llvm_unreachable("unexpected relocation type!");
+ case MCSymbolRefExpr::VK_GOTPCREL_NORELAX:
+ checkIs32(Ctx, Loc, Type);
+ return ELF::R_X86_64_GOTPCREL;
case MCSymbolRefExpr::VK_X86_PLTOFF:
checkIs64(Ctx, Loc, Type);
return ELF::R_X86_64_PLTOFF64;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index d8dbbbbf2779..167580ec1ed0 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -264,6 +264,24 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
OS << "ss\t";
break;
+ case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
+ case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
+ case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
+ case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
+ case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
+ case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
+ case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
+ case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
+ case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
+ OS << "ph\t";
+ break;
+ case X86::VCMPSHZrm: case X86::VCMPSHZrr:
+ case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
+ case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
+ case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
+ OS << "sh\t";
+ break;
}
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index d5b205ad9a63..48c335f9a777 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -132,6 +132,20 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
+ case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
+ case X86::VCMPSHZrm: case X86::VCMPSHZrr:
+ case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
+ case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
+ case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
+ case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
+ case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
+ case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
+ case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
+ case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
+ case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
+ case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
@@ -152,11 +166,15 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
if (Desc.TSFlags & X86II::EVEX_B) {
// Broadcast form.
- // Load size is based on W-bit.
- if (Desc.TSFlags & X86II::VEX_W)
+ // Load size is word for TA map. Otherwise it is based on W-bit.
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ printwordmem(MI, CurOp++, OS);
+ } else if (Desc.TSFlags & X86II::VEX_W) {
printqwordmem(MI, CurOp++, OS);
- else
+ } else {
printdwordmem(MI, CurOp++, OS);
+ }
// Print the number of elements broadcasted.
unsigned NumElts;
@@ -166,18 +184,28 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
else
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ NumElts *= 2;
+ }
OS << "{1to" << NumElts << "}";
} else {
- if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
- printdwordmem(MI, CurOp++, OS);
- else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS) {
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA)
+ printwordmem(MI, CurOp++, OS);
+ else
+ printdwordmem(MI, CurOp++, OS);
+ } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) {
+ assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA &&
+ "Unexpected op map!");
printqwordmem(MI, CurOp++, OS);
- else if (Desc.TSFlags & X86II::EVEX_L2)
+ } else if (Desc.TSFlags & X86II::EVEX_L2) {
printzmmwordmem(MI, CurOp++, OS);
- else if (Desc.TSFlags & X86II::VEX_L)
+ } else if (Desc.TSFlags & X86II::VEX_L) {
printymmwordmem(MI, CurOp++, OS);
- else
+ } else {
printxmmwordmem(MI, CurOp++, OS);
+ }
}
} else {
printOperand(MI, CurOp++, OS);
@@ -349,7 +377,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
uint64_t Target;
if (MIA->evaluateBranch(*MI, 0, 0, Target))
return;
- if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0))
+ if (MIA->evaluateMemoryOperandAddress(*MI, /*STI=*/nullptr, 0, 0))
return;
}
const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 714d2d839054..4fa8bc64b245 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -799,7 +799,10 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// 0b00001: implied 0F leading opcode
// 0b00010: implied 0F 38 leading opcode bytes
// 0b00011: implied 0F 3A leading opcode bytes
- // 0b00100-0b11111: Reserved for future use
+ // 0b00100: Reserved for future use
+ // 0b00101: VEX MAP5
+ // 0b00110: VEX MAP6
+ // 0b00111-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
// 0b01001: XOP map select - 09h instructions with no imm byte
// 0b01010: XOP map select - 0Ah instructions with imm dword
@@ -825,6 +828,12 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::XOPA:
VEX_5M = 0xA;
break;
+ case X86II::T_MAP5:
+ VEX_5M = 0x5;
+ break;
+ case X86II::T_MAP6:
+ VEX_5M = 0x6;
+ break;
}
// VEX_4V (VEX vvvv field): a register specifier
@@ -1173,10 +1182,10 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// EVEX opcode prefix can have 4 bytes
//
// +-----+ +--------------+ +-------------------+ +------------------------+
- // | 62h | | RXBR' | 00mm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
+ // | 62h | | RXBR' | 0mmm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
// +-----+ +--------------+ +-------------------+ +------------------------+
- assert((VEX_5M & 0x3) == VEX_5M &&
- "More than 2 significant bits in VEX.m-mmmm fields for EVEX!");
+ assert((VEX_5M & 0x7) == VEX_5M &&
+ "More than 3 significant bits in VEX.m-mmmm fields for EVEX!");
emitByte(0x62, OS);
emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) |
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 12dc053cd970..9da0a8129f23 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -26,9 +26,9 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -405,8 +405,12 @@ public:
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override;
Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
+ const MCSubtargetInfo *STI,
uint64_t Addr,
uint64_t Size) const override;
+ Optional<uint64_t>
+ getMemoryOperandRelocationOffset(const MCInst &Inst,
+ uint64_t Size) const override;
};
#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
@@ -532,7 +536,8 @@ bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
}
Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
- const MCInst &Inst, uint64_t Addr, uint64_t Size) const {
+ const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
+ uint64_t Size) const {
const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
if (MemOpStart == -1)
@@ -555,6 +560,30 @@ Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
return None;
}
+Optional<uint64_t>
+X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
+ uint64_t Size) const {
+ if (Inst.getOpcode() != X86::LEA64r)
+ return None;
+ const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
+ int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
+ if (MemOpStart == -1)
+ return None;
+ MemOpStart += X86II::getOperandBias(MCID);
+ const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg);
+ const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg);
+ const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg);
+ const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt);
+ const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp);
+ // Must be a simple rip-relative address.
+ if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 ||
+ IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm())
+ return None;
+ // rip-relative ModR/M immediate is 32 bits.
+ assert(Size > 4 && "invalid instruction size for rip-relative lea");
+ return Size - 4;
+}
+
} // end of namespace X86_MC
} // end of namespace llvm
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
index 201b22d6232d..82f4460a42e7 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 075e85f4e243..10fc176b59d8 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -100,7 +100,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
return COFF::IMAGE_REL_I386_DIR32NB;
if (Modifier == MCSymbolRefExpr::VK_SECREL)
- return COFF::IMAGE_REL_AMD64_SECREL;
+ return COFF::IMAGE_REL_I386_SECREL;
return COFF::IMAGE_REL_I386_DIR32;
case FK_SecRel_2:
return COFF::IMAGE_REL_I386_SECTION;
diff --git a/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 18cda8f591c3..7490703251e9 100644
--- a/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/X86TargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheX86_32Target() {
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index eba5b6ce7836..10e1c5d6ed38 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -73,8 +73,8 @@ FunctionPass *createX86AvoidStoreForwardingBlocks();
/// Return a pass that lowers EFLAGS copy pseudo instructions.
FunctionPass *createX86FlagsCopyLoweringPass();
-/// Return a pass that expands WinAlloca pseudo-instructions.
-FunctionPass *createX86WinAllocaExpander();
+/// Return a pass that expands DynAlloca pseudo-instructions.
+FunctionPass *createX86DynAllocaExpander();
/// Return a pass that config the tile registers.
FunctionPass *createX86TileConfigPass();
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 53bbd93798ac..380507308c3d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -27,7 +27,7 @@ def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
"16-bit mode (i8086)">;
//===----------------------------------------------------------------------===//
-// X86 Subtarget features
+// X86 Subtarget ISA features
//===----------------------------------------------------------------------===//
def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
@@ -42,6 +42,9 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
"Support CMPXCHG8B instructions">;
+def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
+ "Enable SSE 4.2 CRC32 instruction">;
+
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
@@ -100,20 +103,6 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[FeatureCMPXCHG8B]>;
-def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
- "SHLD instruction is slow">;
-def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
- "PMULLD instruction is slow">;
-def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
- "true",
- "PMADDWD is slower than PMULLD">;
-// FIXME: This should not apply to CPUs that do not have SSE.
-def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
- "IsUAMem16Slow", "true",
- "Slow unaligned 16-byte memory access">;
-def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
- "IsUAMem32Slow", "true",
- "Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
@@ -184,6 +173,14 @@ def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
"HasVP2INTERSECT", "true",
"Enable AVX-512 vp2intersect",
[FeatureAVX512]>;
+// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
+// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
+// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
+// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
+// currently.
+def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
+ "Support 16-bit floating point",
+ [FeatureBWI, FeatureVLX, FeatureDQI]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -255,17 +252,6 @@ def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
"Support AMX-BF16 instructions",
[FeatureAMXTILE]>;
-def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
- "Use LEA for adjusting the stack pointer">;
-def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
- "HasSlowDivide32", "true",
- "Use 8-bit divide for positive values less than 256">;
-def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
- "HasSlowDivide64", "true",
- "Use 32-bit divide for positive values less than 2^32">;
-def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
- "PadShortFunctions", "true",
- "Pad short functions">;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
"Invalidate Process-Context Identifier">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
@@ -296,116 +282,244 @@ def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
"Support TSXLDTRK instructions">;
def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
"Has UINTR Instructions">;
+def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
+ "platform configuration instruction">;
+def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
+ "Support movdiri instruction">;
+def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
+ "Support movdir64b instruction">;
+
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+ : SubtargetFeature<
+ "ermsb", "HasERMSB", "true",
+ "REP MOVS/STOS are fast">;
+
+// Icelake and newer processors have Fast Short REP MOV.
+def FeatureFSRM
+ : SubtargetFeature<
+ "fsrm", "HasFSRM", "true",
+ "REP MOVSB of short lengths is faster">;
+
+def FeatureSoftFloat
+ : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+ "Use software floating point features">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget Security Mitigation features
+//===----------------------------------------------------------------------===//
+
+// Lower indirect calls using a special construct called a `retpoline` to
+// mitigate potential Spectre v2 attacks against them.
+def FeatureRetpolineIndirectCalls
+ : SubtargetFeature<
+ "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
+ "Remove speculation of indirect calls from the generated code">;
+
+// Lower indirect branches and switches either using conditional branch trees
+// or using a special construct called a `retpoline` to mitigate potential
+// Spectre v2 attacks against them.
+def FeatureRetpolineIndirectBranches
+ : SubtargetFeature<
+ "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
+ "Remove speculation of indirect branches from the generated code">;
+
+// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
+// `retpoline-indirect-branches` above.
+def FeatureRetpoline
+ : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
+ "Remove speculation of indirect branches from the "
+ "generated code, either by avoiding them entirely or "
+ "lowering them with a speculation blocking construct",
+ [FeatureRetpolineIndirectCalls,
+ FeatureRetpolineIndirectBranches]>;
+
+// Rely on external thunks for the emitted retpoline calls. This allows users
+// to provide their own custom thunk definitions in highly specialized
+// environments such as a kernel that does boot-time hot patching.
+def FeatureRetpolineExternalThunk
+ : SubtargetFeature<
+ "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
+ "When lowering an indirect call or branch using a `retpoline`, rely "
+ "on the specified user provided thunk rather than emitting one "
+ "ourselves. Only has effect when combined with some other retpoline "
+ "feature", [FeatureRetpolineIndirectCalls]>;
+
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+ : SubtargetFeature<
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+ "Prevent indirect calls/branches from using a memory operand, and "
+ "precede all indirect calls/branches from a register with an "
+ "LFENCE instruction to serialize control flow. Also decompose RET "
+ "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Enable SESES to mitigate speculative execution attacks
+def FeatureSpeculativeExecutionSideEffectSuppression
+ : SubtargetFeature<
+ "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
+ "Prevent speculative execution side channel timing attacks by "
+ "inserting a speculation barrier before memory reads, memory writes, "
+ "and conditional branches. Implies LVI Control Flow integrity.",
+ [FeatureLVIControlFlowIntegrity]>;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+ : SubtargetFeature<
+ "lvi-load-hardening", "UseLVILoadHardening", "true",
+ "Insert LFENCE instructions to prevent data speculatively injected "
+ "into loads from being used maliciously.">;
+
+def FeatureTaggedGlobals
+ : SubtargetFeature<
+ "tagged-globals", "AllowTaggedGlobals", "true",
+ "Use an instruction sequence for taking the address of a global "
+ "that allows a memory tag in the upper address bits.">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget Tuning features
+//===----------------------------------------------------------------------===//
+
+def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
+ "SHLD instruction is slow">;
+
+def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+ "PMULLD instruction is slow">;
+
+def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
+ "true",
+ "PMADDWD is slower than PMULLD">;
+
+// FIXME: This should not apply to CPUs that do not have SSE.
+def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+ "IsUAMem16Slow", "true",
+ "Slow unaligned 16-byte memory access">;
+
+def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
+ "IsUAMem32Slow", "true",
+ "Slow unaligned 32-byte memory access">;
+
+def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+ "Use LEA for adjusting the stack pointer">;
+
+def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+ "HasSlowDivide32", "true",
+ "Use 8-bit divide for positive values less than 256">;
+
+def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
+ "HasSlowDivide64", "true",
+ "Use 32-bit divide for positive values less than 2^32">;
+
+def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
+ "PadShortFunctions", "true",
+ "Pad short functions">;
+
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
-def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
"SlowTwoMemOps", "true",
"Two memory operand instructions are slow">;
-def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+
+def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
-def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+
+def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
-def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+
+def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
"LEA instruction with 3 ops or certain registers is slow">;
-def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+
+def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
-def FeatureSoftFloat
- : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
- "Use software floating point features">;
-def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
+
+def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
"HasPOPCNTFalseDeps", "true",
"POPCNT has a false dependency on dest register">;
-def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
+
+def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
-def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
- "platform configuration instruction">;
+
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
-def FeatureFastVariableCrossLaneShuffle
+def TuningFastVariableCrossLaneShuffle
: SubtargetFeature<"fast-variable-crosslane-shuffle",
"HasFastVariableCrossLaneShuffle",
"true", "Cross-lane shuffles with variable masks are fast">;
-def FeatureFastVariablePerLaneShuffle
+def TuningFastVariablePerLaneShuffle
: SubtargetFeature<"fast-variable-perlane-shuffle",
"HasFastVariablePerLaneShuffle",
"true", "Per-lane shuffles with variable masks are fast">;
// On some X86 processors, a vzeroupper instruction should be inserted after
// using ymm/zmm registers before executing code that may use SSE instructions.
-def FeatureInsertVZEROUPPER
+def TuningInsertVZEROUPPER
: SubtargetFeature<"vzeroupper",
"InsertVZEROUPPER",
"true", "Should insert vzeroupper instructions">;
-// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
-// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+
+// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
// The idea is that throughput bound code is likely to be vectorized, so for
// vectorized code we should care about the throughput of SQRT operations.
// But if the code is scalar that probably means that the code has some kind of
// dependency and we should care more about reducing the latency.
-def FeatureFastScalarFSQRT
+def TuningFastScalarFSQRT
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
-def FeatureFastVectorFSQRT
+def TuningFastVectorFSQRT
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
+
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
// be used to replace test/set sequences.
-def FeatureFastLZCNT
+def TuningFastLZCNT
: SubtargetFeature<
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
+
// If the target can efficiently decode NOPs upto 7-bytes in length.
-def FeatureFast7ByteNOP
+def TuningFast7ByteNOP
: SubtargetFeature<
"fast-7bytenop", "HasFast7ByteNOP", "true",
"Target can quickly decode up to 7 byte NOPs">;
+
// If the target can efficiently decode NOPs upto 11-bytes in length.
-def FeatureFast11ByteNOP
+def TuningFast11ByteNOP
: SubtargetFeature<
"fast-11bytenop", "HasFast11ByteNOP", "true",
"Target can quickly decode up to 11 byte NOPs">;
+
// If the target can efficiently decode NOPs upto 15-bytes in length.
-def FeatureFast15ByteNOP
+def TuningFast15ByteNOP
: SubtargetFeature<
"fast-15bytenop", "HasFast15ByteNOP", "true",
"Target can quickly decode up to 15 byte NOPs">;
+
// Sandy Bridge and newer processors can use SHLD with the same source on both
// inputs to implement rotate to avoid the partial flag update of the normal
// rotate instructions.
-def FeatureFastSHLDRotate
+def TuningFastSHLDRotate
: SubtargetFeature<
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
-// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
-// "string operations"). See "REP String Enhancement" in the Intel Software
-// Development Manual. This feature essentially means that REP MOVSB will copy
-// using the largest available size instead of copying bytes one by one, making
-// it at least as fast as REPMOVS{W,D,Q}.
-def FeatureERMSB
- : SubtargetFeature<
- "ermsb", "HasERMSB", "true",
- "REP MOVS/STOS are fast">;
-
-// Icelake and newer processors have Fast Short REP MOV.
-def FeatureFSRM
- : SubtargetFeature<
- "fsrm", "HasFSRM", "true",
- "REP MOVSB of short lengths is faster">;
-
// Bulldozer and newer processors can merge CMP/TEST (but not other
// instructions) with conditional branches.
-def FeatureBranchFusion
+def TuningBranchFusion
: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
"CMP/TEST can be fused with conditional branches">;
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
-def FeatureMacroFusion
+def TuningMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;
@@ -413,117 +527,54 @@ def FeatureMacroFusion
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
// Skylake Client processor has faster Gathers than HSW and performance is
// similar to Skylake Server (AVX-512).
-def FeatureHasFastGather
+def TuningFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast">;
-def FeaturePrefer128Bit
+def TuningPrefer128Bit
: SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
"Prefer 128-bit AVX instructions">;
-def FeaturePrefer256Bit
+def TuningPrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
-def FeaturePreferMaskRegisters
+def TuningPreferMaskRegisters
: SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
"Prefer AVX512 mask registers over PTEST/MOVMSK">;
-// Lower indirect calls using a special construct called a `retpoline` to
-// mitigate potential Spectre v2 attacks against them.
-def FeatureRetpolineIndirectCalls
- : SubtargetFeature<
- "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
- "Remove speculation of indirect calls from the generated code">;
-
-// Lower indirect branches and switches either using conditional branch trees
-// or using a special construct called a `retpoline` to mitigate potential
-// Spectre v2 attacks against them.
-def FeatureRetpolineIndirectBranches
- : SubtargetFeature<
- "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
- "Remove speculation of indirect branches from the generated code">;
-
-// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
-// `retpoline-indirect-branches` above.
-def FeatureRetpoline
- : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
- "Remove speculation of indirect branches from the "
- "generated code, either by avoiding them entirely or "
- "lowering them with a speculation blocking construct",
- [FeatureRetpolineIndirectCalls,
- FeatureRetpolineIndirectBranches]>;
-
-// Rely on external thunks for the emitted retpoline calls. This allows users
-// to provide their own custom thunk definitions in highly specialized
-// environments such as a kernel that does boot-time hot patching.
-def FeatureRetpolineExternalThunk
- : SubtargetFeature<
- "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
- "When lowering an indirect call or branch using a `retpoline`, rely "
- "on the specified user provided thunk rather than emitting one "
- "ourselves. Only has effect when combined with some other retpoline "
- "feature", [FeatureRetpolineIndirectCalls]>;
-
-// Mitigate LVI attacks against indirect calls/branches and call returns
-def FeatureLVIControlFlowIntegrity
- : SubtargetFeature<
- "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
- "Prevent indirect calls/branches from using a memory operand, and "
- "precede all indirect calls/branches from a register with an "
- "LFENCE instruction to serialize control flow. Also decompose RET "
- "instructions into a POP+LFENCE+JMP sequence.">;
-
-// Enable SESES to mitigate speculative execution attacks
-def FeatureSpeculativeExecutionSideEffectSuppression
- : SubtargetFeature<
- "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
- "Prevent speculative execution side channel timing attacks by "
- "inserting a speculation barrier before memory reads, memory writes, "
- "and conditional branches. Implies LVI Control Flow integrity.",
- [FeatureLVIControlFlowIntegrity]>;
-
-// Mitigate LVI attacks against data loads
-def FeatureLVILoadHardening
- : SubtargetFeature<
- "lvi-load-hardening", "UseLVILoadHardening", "true",
- "Insert LFENCE instructions to prevent data speculatively injected "
- "into loads from being used maliciously.">;
-
-// Direct Move instructions.
-def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
- "Support movdiri instruction">;
-def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
- "Support movdir64b instruction">;
-
-def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
+def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
"Indicates that the BEXTR instruction is implemented as a single uop "
"with good throughput">;
// Combine vector math operations with shuffles into horizontal math
// instructions if a CPU implements horizontal operations (introduced with
// SSE3) with better latency/throughput than the alternative sequence.
-def FeatureFastHorizontalOps
+def TuningFastHorizontalOps
: SubtargetFeature<
"fast-hops", "HasFastHorizontalOps", "true",
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
"normal vector instructions with shuffles">;
-def FeatureFastScalarShiftMasks
+def TuningFastScalarShiftMasks
: SubtargetFeature<
"fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
"Prefer a left/right scalar logical shift pair over a shift+and pair">;
-def FeatureFastVectorShiftMasks
+def TuningFastVectorShiftMasks
: SubtargetFeature<
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
"Prefer a left/right vector logical shift pair over a shift+and pair">;
-def FeatureFastMOVBE
+def TuningFastMOVBE
: SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
"Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
-def FeatureUseGLMDivSqrtCosts
+def TuningUseSLMArithCosts
+ : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
+ "Use Silvermont specific arithmetic costs">;
+
+def TuningUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
@@ -531,10 +582,13 @@ def FeatureUseGLMDivSqrtCosts
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
+//===----------------------------------------------------------------------===//
+// X86 CPU Families
+// TODO: Remove these - use general tuning features to determine codegen.
+//===----------------------------------------------------------------------===//
+
// Bonnell
def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
-// Silvermont
-def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -569,6 +623,7 @@ include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
+include "X86SchedIceLake.td"
//===----------------------------------------------------------------------===//
// X86 Processor Feature Lists
@@ -580,9 +635,10 @@ def ProcessorFeatures {
FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, Feature64Bit
];
- list<SubtargetFeature> X86_64V2Features = !listconcat(
- X86_64V1Features,
- [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
+ list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
+ FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT,
+ FeatureSSE42
+ ]);
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
FeatureMOVBE, FeatureXSAVE
@@ -596,8 +652,8 @@ def ProcessorFeatures {
// Nehalem
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
- list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
+ TuningInsertVZEROUPPER];
// Westmere
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -609,15 +665,15 @@ def ProcessorFeatures {
list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> SNBTuning = [FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowUAMem32,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowUAMem32,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SNBFeatures =
!listconcat(WSMFeatures, SNBAdditionalFeatures);
@@ -638,17 +694,17 @@ def ProcessorFeatures {
FeatureINVPCID,
FeatureLZCNT,
FeatureMOVBE];
- list<SubtargetFeature> HSWTuning = [FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningLZCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> HSWFeatures =
!listconcat(IVBFeatures, HSWAdditionalFeatures);
@@ -665,18 +721,18 @@ def ProcessorFeatures {
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT];
- list<SubtargetFeature> SKLTuning = [FeatureHasFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SKLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SKLFeatures =
!listconcat(BDWFeatures, SKLAdditionalFeatures);
@@ -692,19 +748,19 @@ def ProcessorFeatures {
FeatureVLX,
FeaturePKU,
FeatureCLWB];
- list<SubtargetFeature> SKXTuning = [FeatureHasFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePrefer256Bit,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SKXTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SKXFeatures =
!listconcat(BDWFeatures, SKXAdditionalFeatures);
@@ -730,18 +786,18 @@ def ProcessorFeatures {
FeatureVBMI,
FeatureIFMA,
FeatureSHA];
- list<SubtargetFeature> CNLTuning = [FeatureHasFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePrefer256Bit,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> CNLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> CNLFeatures =
!listconcat(SKLFeatures, CNLAdditionalFeatures);
@@ -755,7 +811,18 @@ def ProcessorFeatures {
FeatureGFNI,
FeatureRDPID,
FeatureFSRM];
- list<SubtargetFeature> ICLTuning = CNLTuning;
+ list<SubtargetFeature> ICLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> ICLFeatures =
!listconcat(CNLFeatures, ICLAdditionalFeatures);
@@ -763,7 +830,7 @@ def ProcessorFeatures {
list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
FeatureCLWB,
FeatureWBNOINVD];
- list<SubtargetFeature> ICXTuning = CNLTuning;
+ list<SubtargetFeature> ICXTuning = ICLTuning;
list<SubtargetFeature> ICXFeatures =
!listconcat(ICLFeatures, ICXAdditionalFeatures);
@@ -773,7 +840,7 @@ def ProcessorFeatures {
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureSHSTK];
- list<SubtargetFeature> TGLTuning = CNLTuning;
+ list<SubtargetFeature> TGLTuning = ICLTuning;
list<SubtargetFeature> TGLFeatures =
!listconcat(ICLFeatures, TGLAdditionalFeatures );
@@ -786,6 +853,7 @@ def ProcessorFeatures {
FeatureCLDEMOTE,
FeatureWAITPKG,
FeaturePTWRITE,
+ FeatureFP16,
FeatureAVXVNNI,
FeatureTSXLDTRK,
FeatureENQCMD,
@@ -811,31 +879,32 @@ def ProcessorFeatures {
FeatureMOVBE,
FeatureLAHFSAHF];
list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
- FeatureSlowUAMem16,
- FeatureLEAForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureSlowTwoMemOps,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions,
- FeatureInsertVZEROUPPER];
+ TuningSlowUAMem16,
+ TuningLEAForSP,
+ TuningSlowDivide32,
+ TuningSlowDivide64,
+ TuningSlowTwoMemOps,
+ TuningLEAUsesAG,
+ TuningPadShortFunctions,
+ TuningInsertVZEROUPPER];
// Silvermont
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
+ FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeaturePRFCHW,
FeatureRDRAND];
- list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowDivide64,
- FeatureSlowPMULLD,
- FeatureFast7ByteNOP,
- FeatureFastMOVBE,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningSlowDivide64,
+ TuningSlowPMULLD,
+ TuningFast7ByteNOP,
+ TuningFastMOVBE,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SLMFeatures =
!listconcat(AtomFeatures, SLMAdditionalFeatures);
@@ -849,25 +918,25 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureFSGSBase];
- list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureFastMOVBE,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningFastMOVBE,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> GLMFeatures =
!listconcat(SLMFeatures, GLMAdditionalFeatures);
// Goldmont Plus
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
FeatureRDPID];
- list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureFastMOVBE,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningFastMOVBE,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> GLPFeatures =
!listconcat(GLMFeatures, GLPAdditionalFeatures);
@@ -912,6 +981,7 @@ def ProcessorFeatures {
FeatureNOPL,
Feature64Bit,
FeatureCMPXCHG16B,
+ FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeatureXSAVE,
@@ -934,14 +1004,14 @@ def ProcessorFeatures {
FeatureBMI2,
FeatureFMA,
FeaturePRFCHW];
- list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64,
- FeatureSlow3OpsLEA,
- FeatureSlowIncDec,
- FeatureSlowTwoMemOps,
- FeaturePreferMaskRegisters,
- FeatureHasFastGather,
- FeatureFastMOVBE,
- FeatureSlowPMADDWD];
+ list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
+ TuningSlow3OpsLEA,
+ TuningSlowIncDec,
+ TuningSlowTwoMemOps,
+ TuningPreferMaskRegisters,
+ TuningFastGather,
+ TuningFastMOVBE,
+ TuningSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
list<SubtargetFeature> KNMFeatures =
!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
@@ -960,9 +1030,9 @@ def ProcessorFeatures {
FeatureLAHFSAHF,
FeatureCMOV,
Feature64Bit];
- list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
// Bobcat
list<SubtargetFeature> BtVer1Features = [FeatureX87,
@@ -979,29 +1049,30 @@ def ProcessorFeatures {
FeatureLZCNT,
FeaturePOPCNT,
FeatureLAHFSAHF];
- list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningFastVectorShiftMasks,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
// Jaguar
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
FeatureAES,
+ FeatureCRC32,
FeaturePCLMUL,
FeatureBMI,
FeatureF16C,
FeatureMOVBE,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFastHorizontalOps,
- FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks,
- FeatureFastMOVBE,
- FeatureSlowSHLD];
+ list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
+ TuningFastBEXTR,
+ TuningFastHorizontalOps,
+ TuningFast15ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningFastVectorShiftMasks,
+ TuningFastMOVBE,
+ TuningSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1013,6 +1084,7 @@ def ProcessorFeatures {
Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
+ FeatureCRC32,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureMMX,
@@ -1023,19 +1095,19 @@ def ProcessorFeatures {
FeatureXSAVE,
FeatureLWP,
FeatureLAHFSAHF];
- list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD,
- FeatureFast11ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureBranchFusion,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
+ TuningFast11ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningBranchFusion,
+ TuningInsertVZEROUPPER];
// PileDriver
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
FeatureBMI,
FeatureTBM,
- FeatureFMA,
- FeatureFastBEXTR];
- list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastMOVBE];
+ FeatureFMA];
+ list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
+ TuningFastMOVBE];
list<SubtargetFeature> BdVer2Tuning =
!listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
list<SubtargetFeature> BdVer2Features =
@@ -1070,6 +1142,7 @@ def ProcessorFeatures {
FeatureCMOV,
Feature64Bit,
FeatureCMPXCHG16B,
+ FeatureCRC32,
FeatureF16C,
FeatureFMA,
FeatureFSGSBase,
@@ -1092,14 +1165,14 @@ def ProcessorFeatures {
FeatureXSAVEC,
FeatureXSAVEOPT,
FeatureXSAVES];
- list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFast15ByteNOP,
- FeatureBranchFusion,
- FeatureFastScalarShiftMasks,
- FeatureFastMOVBE,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
+ TuningFastBEXTR,
+ TuningFast15ByteNOP,
+ TuningBranchFusion,
+ TuningFastScalarShiftMasks,
+ TuningFastMOVBE,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
FeatureWBNOINVD];
@@ -1112,8 +1185,8 @@ def ProcessorFeatures {
FeatureVAES,
FeatureVPCLMULQDQ];
list<SubtargetFeature> ZN3AdditionalTuning =
- [FeatureMacroFusion,
- FeatureFastVariablePerLaneShuffle];
+ [TuningMacroFusion,
+ TuningFastVariablePerLaneShuffle];
list<SubtargetFeature> ZN3Tuning =
!listconcat(ZNTuning, ZN3AdditionalTuning);
list<SubtargetFeature> ZN3Features =
@@ -1140,37 +1213,37 @@ class ProcModel<string Name, SchedMachineModel Model,
// It has no effect on code generation.
def : ProcModel<"generic", SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
- [FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowIncDec,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER]>;
+ [TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningMacroFusion,
+ TuningInsertVZEROUPPER]>;
def : Proc<"i386", [FeatureX87],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i486", [FeatureX87],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
FeatureNOPL],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
FeatureFXSR, FeatureNOPL],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -1186,30 +1259,30 @@ foreach P = ["pentium3", "pentium3m"] in {
def : ProcModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcModel<P, GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Intel Quark.
def : Proc<"lakemont", [FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// Intel Core Duo.
def : ProcModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// NetBurst.
def : ProcModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : ProcModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureCMPXCHG8B,
@@ -1222,8 +1295,8 @@ def : ProcModel<"nocona", GenericPostRAModel, [
FeatureCMPXCHG16B,
],
[
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
// Intel Core 2 Solo/Duo.
@@ -1240,9 +1313,9 @@ def : ProcModel<"core2", SandyBridgeModel, [
FeatureLAHFSAHF
],
[
- FeatureMacroFusion,
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningMacroFusion,
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
def : ProcModel<"penryn", SandyBridgeModel, [
FeatureX87,
@@ -1257,9 +1330,9 @@ def : ProcModel<"penryn", SandyBridgeModel, [
FeatureLAHFSAHF
],
[
- FeatureMacroFusion,
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningMacroFusion,
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
// Atom CPUs.
@@ -1328,13 +1401,13 @@ def : ProcModel<"cooperlake", SkylakeServerModel,
ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
def : ProcModel<"cannonlake", SkylakeServerModel,
ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
-def : ProcModel<"icelake-client", SkylakeServerModel,
+def : ProcModel<"icelake-client", IceLakeModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
-def : ProcModel<"rocketlake", SkylakeServerModel,
+def : ProcModel<"rocketlake", IceLakeModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
-def : ProcModel<"icelake-server", SkylakeServerModel,
+def : ProcModel<"icelake-server", IceLakeModel,
ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
-def : ProcModel<"tigerlake", SkylakeServerModel,
+def : ProcModel<"tigerlake", IceLakeModel,
ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
def : ProcModel<"sapphirerapids", SkylakeServerModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
@@ -1344,37 +1417,37 @@ def : ProcModel<"alderlake", SkylakeClientModel,
// AMD CPUs.
def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["athlon", "athlon-tbird"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
FeatureNOPL],
- [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
- [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
- [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
+ [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
+ TuningInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
Feature64Bit],
- [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
+ [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
+ TuningInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
@@ -1410,17 +1483,17 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip2", [FeatureX87, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3", [FeatureX87, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1434,11 +1507,11 @@ def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
// forming a common base for them.
def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
[
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowIncDec,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningMacroFusion,
+ TuningInsertVZEROUPPER
]>;
// x86-64 micro-architecture levels.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index a27645389dd4..2e08482e4ff6 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -37,10 +37,10 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -190,6 +190,7 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
+ case X86II::MO_GOTPCREL_NORELAX: O << "@GOTPCREL_NORELAX"; break;
case X86II::MO_GOT: O << "@GOT"; break;
case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
case X86II::MO_PLT: O << "@PLT"; break;
@@ -753,6 +754,8 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) {
void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
+ emitAsanMemaccessSymbols(M);
+
if (TT.isOSBinFormatMachO()) {
// Mach-O uses non-lazy symbol stubs to encode per-TU information into
// global table for symbol lookup.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h
index a3b74c8ee387..3b0983a7d935 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -23,6 +23,7 @@ class MCCodeEmitter;
class MCStreamer;
class X86Subtarget;
class TargetMachine;
+struct ASanAccessInfo;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget = nullptr;
@@ -30,7 +31,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
FaultMaps FM;
std::unique_ptr<MCCodeEmitter> CodeEmitter;
bool EmitFPOData = false;
- bool NeedsRetpoline = false;
// This utility class tracks the length of a stackmap instruction's 'shadow'.
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
@@ -98,6 +98,23 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+ // Address sanitizer specific lowering for X86.
+ void LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI);
+ void emitAsanMemaccessSymbols(Module &M);
+ void emitAsanMemaccessPartial(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI);
+ void emitAsanMemaccessFull(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI);
+ void emitAsanReportError(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI);
+
+ typedef std::tuple<unsigned /*Reg*/, uint32_t /*AccessInfo*/>
+ AsanMemaccessTuple;
+ std::map<AsanMemaccessTuple, MCSymbol *> AsanMemaccessSymbols;
+
// Choose between emitting .seh_ directives and .cv_fpo_ directives.
void EmitSEHInstruction(const MachineInstr *MI);
diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index b6a37f08d7e9..04931afdec51 100644
--- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -360,22 +360,17 @@ findPotentialBlockers(MachineInstr *LoadInst) {
if (BlockCount < InspectionLimit) {
MachineBasicBlock *MBB = LoadInst->getParent();
int LimitLeft = InspectionLimit - BlockCount;
- for (MachineBasicBlock::pred_iterator PB = MBB->pred_begin(),
- PE = MBB->pred_end();
- PB != PE; ++PB) {
- MachineBasicBlock *PMBB = *PB;
+ for (MachineBasicBlock *PMBB : MBB->predecessors()) {
int PredCount = 0;
- for (MachineBasicBlock::reverse_iterator PBInst = PMBB->rbegin(),
- PME = PMBB->rend();
- PBInst != PME; ++PBInst) {
- if (PBInst->isMetaInstruction())
+ for (MachineInstr &PBInst : llvm::reverse(*PMBB)) {
+ if (PBInst.isMetaInstruction())
continue;
PredCount++;
if (PredCount >= LimitLeft)
break;
- if (PBInst->getDesc().isCall())
+ if (PBInst.getDesc().isCall())
break;
- PotentialBlockers.push_back(&*PBInst);
+ PotentialBlockers.push_back(&PBInst);
}
}
}
@@ -542,9 +537,8 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
int DefVR = MI.getOperand(0).getReg();
if (!MRI->hasOneNonDBGUse(DefVR))
continue;
- for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
- UI != UE;) {
- MachineOperand &StoreMO = *UI++;
+ for (MachineOperand &StoreMO :
+ llvm::make_early_inc_range(MRI->use_nodbg_operands(DefVR))) {
MachineInstr &StoreMI = *StoreMO.getParent();
// Skip cases where the memcpy may overlap.
if (StoreMI.getParent() == MI.getParent() &&
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp
index c8bffb4d4d37..a14ce82313cb 100644
--- a/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -105,7 +105,7 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -195,7 +195,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 98883bbf59a8..4dd8a6cdd898 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -23,6 +23,13 @@ class CCIfNotSubtarget<string F, CCAction A>
"(State.getMachineFunction().getSubtarget()).", F),
A>;
+/// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits.
+class CCIfIsVarArgOnWin<CCAction A>
+ : CCIf<"State.isVarArg() && "
+ "State.getMachineFunction().getSubtarget().getTargetTriple()."
+ "isWindowsMSVCEnvironment()",
+ A>;
+
// Register classes for RegCall
class RC_X86_RegCall {
list<Register> GPR_8 = [];
@@ -233,19 +240,19 @@ def RetCC_X86Common : CallingConv<[
// Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
// registers, it won't have vector types.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
// 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX target feature.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX-512 target feature.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
// MMX vector types are always returned in MM0. If the target doesn't have
@@ -266,7 +273,11 @@ def RetCC_X86_32_C : CallingConv<[
// conv.
CCIfInReg<CCIfSubtarget<"hasSSE2()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
- CCIfType<[f32,f64], CCAssignToReg<[FP0, FP1]>>,
+ CCIfSubtarget<"hasX87()",
+ CCIfType<[f32, f64], CCAssignToReg<[FP0, FP1]>>>,
+ CCIfNotSubtarget<"hasX87()",
+ CCIfType<[f32], CCAssignToReg<[EAX, EDX, ECX]>>>,
+ CCIfType<[f16], CCAssignToReg<[XMM0,XMM1,XMM2]>>,
CCDelegateTo<RetCC_X86Common>
]>;
@@ -329,6 +340,7 @@ def RetCC_X86_32_VectorCall : CallingConv<[
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f16], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
@@ -552,7 +564,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[f16, f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
@@ -561,33 +573,33 @@ def CC_X86_64_C : CallingConv<[
// FIXME: This isn't precisely correct; the x86-64 ABI document says that
// fixed arguments to vararg functions are supposed to be passed in
// registers. Actually modeling that would be a lot of work, though.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
YMM4, YMM5, YMM6, YMM7]>>>>,
// The first 8 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()",
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+ CCIfType<[i32, i64, f16, f32, f64], CCAssignToStack<8, 8>>,
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCAssignToStack<16, 16>>,
// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCAssignToStack<32, 32>>,
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;
@@ -635,13 +647,13 @@ def CC_X86_Win64_C : CallingConv<[
CCIfCFGuardTarget<CCAssignToReg<[RAX]>>,
// 128 bit vectors are passed by pointer
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCPassIndirect<i64>>,
// 256 bit vectors are passed by pointer
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64], CCPassIndirect<i64>>,
// 512 bit vectors are passed by pointer
- CCIfType<[v64i8, v32i16, v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
+ CCIfType<[v64i8, v32i16, v16i32, v32f16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
// Long doubles are passed by pointer
CCIfType<[f80], CCPassIndirect<i64>>,
@@ -655,7 +667,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>,
// The first 4 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64],
+ CCIfType<[f16, f32, f64],
CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
[RCX , RDX , R8 , R9 ]>>,
@@ -678,7 +690,7 @@ def CC_X86_Win64_C : CallingConv<[
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>>
+ CCIfType<[i8, i16, i32, i64, f16, f32, f64], CCAssignToStack<8, 8>>
]>;
def CC_X86_Win64_VectorCall : CallingConv<[
@@ -757,33 +769,51 @@ def CC_X86_64_AnyReg : CallingConv<[
/// values are spilled on the stack.
def CC_X86_32_Vector_Common : CallingConv<[
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
+ CCAssignToStack<16, 16>>,
// 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCAssignToStack<32, 32>>,
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;
+/// CC_X86_Win32_Vector - In X86 Win32 calling conventions, extra vector
+/// values are spilled on the stack.
+def CC_X86_Win32_Vector : CallingConv<[
+ // Other SSE vectors get 16-byte stack slots that are 4-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
+ CCAssignToStack<16, 4>>,
+
+ // 256-bit AVX vectors get 32-byte stack slots that are 4-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
+ CCAssignToStack<32, 4>>,
+
+ // 512-bit AVX 512-bit vectors get 64-byte stack slots that are 4-byte aligned.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
+ CCAssignToStack<64, 4>>
+]>;
+
// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
// vector registers
def CC_X86_32_Vector_Standard : CallingConv<[
// SSE vector arguments are passed in XMM registers.
- CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2]>>>,
// AVX 256-bit vector arguments are passed in YMM registers.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,
// AVX 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
+ CCIfIsVarArgOnWin<CCDelegateTo<CC_X86_Win32_Vector>>,
CCDelegateTo<CC_X86_32_Vector_Common>
]>;
@@ -791,16 +821,16 @@ def CC_X86_32_Vector_Standard : CallingConv<[
// vector registers.
def CC_X86_32_Vector_Darwin : CallingConv<[
// SSE vector arguments are passed in XMM registers.
- CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
// AVX 256-bit vector arguments are passed in YMM registers.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
// AVX 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
CCDelegateTo<CC_X86_32_Vector_Common>
@@ -819,11 +849,15 @@ def CC_X86_32_Common : CallingConv<[
CCIfSubtarget<"hasSSE2()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f16], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
// The first 3 __m64 vector arguments are passed in mmx registers if the
// call is not a vararg call.
CCIfNotVarArg<CCIfType<[x86mmx],
CCAssignToReg<[MM0, MM1, MM2]>>>,
+ CCIfType<[f16], CCAssignToStack<4, 4>>,
+
// Integer/Float values get stored in stack slots that are 4 bytes in
// size and 4-byte aligned.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
@@ -831,8 +865,8 @@ def CC_X86_32_Common : CallingConv<[
// Doubles get 8-byte slots that are 4-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 4>>,
- // Long doubles get slots whose size depends on the subtarget.
- CCIfType<[f80], CCAssignToStack<0, 4>>,
+ // Long doubles get slots whose size and alignment depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
// Boolean vectors of AVX-512 are passed in SIMD registers.
// The call from AVX to AVX-512 function should work,
diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 05349a7c01f8..863438793acf 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -582,10 +582,9 @@ static bool checkEFLAGSLive(MachineInstr *MI) {
}
// We hit the end of the block, check whether EFLAGS is live into a successor.
- for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
- if ((*I)->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *Succ : BB->successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
return true;
- }
return false;
}
@@ -797,8 +796,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
MOp.setIsKill(false);
}
}
- MBB->erase(MachineBasicBlock::iterator(MI),
- std::next(MachineBasicBlock::iterator(MI)));
+ MBB->erase(&MI);
// Add this PHI to the rewrite table.
FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
diff --git a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
index 9ada0a8dd412..df8df1e3a65d 100644
--- a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
@@ -1,4 +1,4 @@
-//===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===//
+//===----- X86DynAllocaExpander.cpp - Expand DynAlloca pseudo instruction -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a pass that expands WinAlloca pseudo-instructions.
+// This file defines a pass that expands DynAlloca pseudo-instructions.
//
// It performs a conservative analysis to determine whether each allocation
// falls within a region of the stack that is safe to use, or whether stack
@@ -33,26 +33,26 @@ using namespace llvm;
namespace {
-class X86WinAllocaExpander : public MachineFunctionPass {
+class X86DynAllocaExpander : public MachineFunctionPass {
public:
- X86WinAllocaExpander() : MachineFunctionPass(ID) {}
+ X86DynAllocaExpander() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- /// Strategies for lowering a WinAlloca.
+ /// Strategies for lowering a DynAlloca.
enum Lowering { TouchAndSub, Sub, Probe };
- /// Deterministic-order map from WinAlloca instruction to desired lowering.
+ /// Deterministic-order map from DynAlloca instruction to desired lowering.
typedef MapVector<MachineInstr*, Lowering> LoweringMap;
- /// Compute which lowering to use for each WinAlloca instruction.
+ /// Compute which lowering to use for each DynAlloca instruction.
void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings);
/// Get the appropriate lowering based on current offset and amount.
Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount);
- /// Lower a WinAlloca instruction.
+ /// Lower a DynAlloca instruction.
void lower(MachineInstr* MI, Lowering L);
MachineRegisterInfo *MRI = nullptr;
@@ -64,22 +64,22 @@ private:
int64_t StackProbeSize = 0;
bool NoStackArgProbe = false;
- StringRef getPassName() const override { return "X86 WinAlloca Expander"; }
+ StringRef getPassName() const override { return "X86 DynAlloca Expander"; }
static char ID;
};
-char X86WinAllocaExpander::ID = 0;
+char X86DynAllocaExpander::ID = 0;
} // end anonymous namespace
-FunctionPass *llvm::createX86WinAllocaExpander() {
- return new X86WinAllocaExpander();
+FunctionPass *llvm::createX86DynAllocaExpander() {
+ return new X86DynAllocaExpander();
}
-/// Return the allocation amount for a WinAlloca instruction, or -1 if unknown.
-static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
- assert(MI->getOpcode() == X86::WIN_ALLOCA_32 ||
- MI->getOpcode() == X86::WIN_ALLOCA_64);
+/// Return the allocation amount for a DynAlloca instruction, or -1 if unknown.
+static int64_t getDynAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ assert(MI->getOpcode() == X86::DYN_ALLOCA_32 ||
+ MI->getOpcode() == X86::DYN_ALLOCA_64);
assert(MI->getOperand(0).isReg());
Register AmountReg = MI->getOperand(0).getReg();
@@ -93,8 +93,8 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
return Def->getOperand(1).getImm();
}
-X86WinAllocaExpander::Lowering
-X86WinAllocaExpander::getLowering(int64_t CurrentOffset,
+X86DynAllocaExpander::Lowering
+X86DynAllocaExpander::getLowering(int64_t CurrentOffset,
int64_t AllocaAmount) {
// For a non-constant amount or a large amount, we have to probe.
if (AllocaAmount < 0 || AllocaAmount > StackProbeSize)
@@ -128,11 +128,11 @@ static bool isPushPop(const MachineInstr &MI) {
}
}
-void X86WinAllocaExpander::computeLowerings(MachineFunction &MF,
+void X86DynAllocaExpander::computeLowerings(MachineFunction &MF,
LoweringMap &Lowerings) {
// Do a one-pass reverse post-order walk of the CFG to conservatively estimate
// the offset between the stack pointer and the lowest touched part of the
- // stack, and use that to decide how to lower each WinAlloca instruction.
+ // stack, and use that to decide how to lower each DynAlloca instruction.
// Initialize OutOffset[B], the stack offset at exit from B, to something big.
DenseMap<MachineBasicBlock *, int64_t> OutOffset;
@@ -153,10 +153,10 @@ void X86WinAllocaExpander::computeLowerings(MachineFunction &MF,
if (Offset == -1) Offset = INT32_MAX;
for (MachineInstr &MI : *MBB) {
- if (MI.getOpcode() == X86::WIN_ALLOCA_32 ||
- MI.getOpcode() == X86::WIN_ALLOCA_64) {
- // A WinAlloca moves StackPtr, and potentially touches it.
- int64_t Amount = getWinAllocaAmount(&MI, MRI);
+ if (MI.getOpcode() == X86::DYN_ALLOCA_32 ||
+ MI.getOpcode() == X86::DYN_ALLOCA_64) {
+ // A DynAlloca moves StackPtr, and potentially touches it.
+ int64_t Amount = getDynAllocaAmount(&MI, MRI);
Lowering L = getLowering(Offset, Amount);
Lowerings[&MI] = L;
switch (L) {
@@ -195,12 +195,12 @@ static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) {
return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri;
}
-void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
+void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
const DebugLoc &DL = MI->getDebugLoc();
MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::iterator I = *MI;
- int64_t Amount = getWinAllocaAmount(MI, MRI);
+ int64_t Amount = getDynAllocaAmount(MI, MRI);
if (Amount == 0) {
MI->eraseFromParent();
return;
@@ -209,7 +209,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
// These two variables differ on x32, which is a 64-bit target with a
// 32-bit alloca.
bool Is64Bit = STI->is64Bit();
- bool Is64BitAlloca = MI->getOpcode() == X86::WIN_ALLOCA_64;
+ bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64;
assert(SlotSize == 4 || SlotSize == 8);
switch (L) {
@@ -271,8 +271,8 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
AmountDef->eraseFromParent();
}
-bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getInfo<X86MachineFunctionInfo>()->hasWinAlloca())
+bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getInfo<X86MachineFunctionInfo>()->hasDynAlloca())
return false;
MRI = &MF.getRegInfo();
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 4add8d30e010..01dc509df795 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -236,19 +236,10 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
MBB.getParent()->moveCallSiteInfo(&MI, Marker);
// Emit call to ObjC runtime.
- unsigned RuntimeCallType = MI.getOperand(0).getImm();
- assert(RuntimeCallType <= 1 && "objc runtime call type must be 0 or 1");
- Module *M = MBB.getParent()->getFunction().getParent();
- auto &Context = M->getContext();
- auto *I8PtrTy = PointerType::get(IntegerType::get(Context, 8), 0);
- FunctionCallee Fn = M->getOrInsertFunction(
- RuntimeCallType == 0 ? "objc_retainAutoreleasedReturnValue"
- : "objc_unsafeClaimAutoreleasedReturnValue",
- FunctionType::get(I8PtrTy, {I8PtrTy}, false));
const uint32_t *RegMask =
TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
- .addGlobalAddress(cast<GlobalValue>(Fn.getCallee()), 0, 0)
+ .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
.addRegMask(RegMask)
.addReg(X86::RAX,
RegState::Implicit |
@@ -403,10 +394,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
if (StackAdj == 0) {
MIB = BuildMI(MBB, MBBI, DL,
- TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL));
+ TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32));
} else if (isUInt<16>(StackAdj)) {
MIB = BuildMI(MBB, MBBI, DL,
- TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL))
+ TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32))
.addImm(StackAdj);
} else {
assert(!STI->is64Bit() &&
@@ -416,7 +407,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);
BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
- MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
+ MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32));
}
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
MIB.add(MBBI->getOperand(I));
@@ -657,35 +648,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs(
EntryBlk->end());
TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk);
- int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm();
- Register BaseReg;
- uint64_t FrameOffset =
- X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed();
- uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm();
+ uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm();
+ uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm();
// TODO: add support for YMM and ZMM here.
unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
- for (int64_t OpndIdx = 3, RegIdx = 0;
+ for (int64_t OpndIdx = 7, RegIdx = 0;
OpndIdx < VAStartPseudoInstr->getNumOperands() - 1;
OpndIdx++, RegIdx++) {
-
- int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16;
-
- MachineMemOperand *MMO = Func->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset),
- MachineMemOperand::MOStore,
- /*Size=*/16, Align(16));
-
- BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc))
- .addReg(BaseReg)
- .addImm(/*Scale=*/1)
- .addReg(/*IndexReg=*/0)
- .addImm(/*Disp=*/Offset)
- .addReg(/*Segment=*/0)
- .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg())
- .addMemOperand(MMO);
+ auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc));
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16);
+ else
+ NewMI.add(VAStartPseudoInstr->getOperand(i + 1));
+ }
+ NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg());
assert(Register::isPhysicalRegister(
VAStartPseudoInstr->getOperand(OpndIdx).getReg()));
}
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index bb95ed3ccdc5..1ac998b7ff7e 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -55,6 +55,7 @@ class X86FastISel final : public FastISel {
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf64;
bool X86ScalarSSEf32;
+ bool X86ScalarSSEf16;
public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo,
@@ -63,6 +64,7 @@ public:
Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf16 = Subtarget->hasFP16();
}
bool fastSelectInstruction(const Instruction *I) override;
@@ -157,7 +159,8 @@ private:
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
+ (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
}
bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
@@ -786,7 +789,8 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
RC = &X86::GR32RegClass;
}
- if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL)
+ if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
+ GVFlags == X86II::MO_GOTPCREL_NORELAX)
StubAM.Base.Reg = X86::RIP;
LoadReg = createResultReg(RC);
@@ -1301,11 +1305,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
MachineInstrBuilder MIB;
if (X86MFInfo->getBytesToPopOnReturn()) {
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
+ TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
.addImm(X86MFInfo->getBytesToPopOnReturn());
} else {
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
}
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
@@ -2283,9 +2287,10 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
unsigned Opc;
switch (RetVT.SimpleTy) {
default: return false;
- case MVT::i8: Opc = X86::CMOV_GR8; break;
- case MVT::i16: Opc = X86::CMOV_GR16; break;
- case MVT::i32: Opc = X86::CMOV_GR32; break;
+ case MVT::i8: Opc = X86::CMOV_GR8; break;
+ case MVT::i16: Opc = X86::CMOV_GR16; break;
+ case MVT::f16: Opc = X86::CMOV_FR16X; break;
+ case MVT::i32: Opc = X86::CMOV_GR32; break;
case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
: X86::CMOV_FR32; break;
case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
@@ -2741,7 +2746,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
return false;
- return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memcpy", II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -2756,7 +2761,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (MSI->getDestAddressSpace() > 255)
return false;
- return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memset", II->arg_size() - 1);
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
@@ -2780,8 +2785,6 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!X86SelectAddress(DI->getAddress(), AM))
return false;
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
- // FIXME may need to add RegState::Debug to any registers produced,
- // although ESP/EBP should be the only ones at the moment.
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
@@ -3484,6 +3487,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// NonLazyBind calls or dllimport calls.
bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
OpFlags == X86II::MO_GOTPCREL ||
+ OpFlags == X86II::MO_GOTPCREL_NORELAX ||
OpFlags == X86II::MO_COFFSTUB;
unsigned CallOpc = NeedLoad
? (Is64Bit ? X86::CALL64m : X86::CALL32m)
@@ -3838,11 +3842,11 @@ unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
if (const auto *CI = dyn_cast<ConstantInt>(C))
return X86MaterializeInt(CI, VT);
- else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ if (const auto *CFP = dyn_cast<ConstantFP>(C))
return X86MaterializeFP(CFP, VT);
- else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (const auto *GV = dyn_cast<GlobalValue>(C))
return X86MaterializeGV(GV, VT);
- else if (isa<UndefValue>(C)) {
+ if (isa<UndefValue>(C)) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default:
diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 7031bd40215d..87c04a07cd13 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -44,6 +44,7 @@ class X86FastTileConfig : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ X86MachineFunctionInfo *X86FI = nullptr;
MachineInstr *getTileConfigPoint();
void tileConfig();
@@ -289,6 +290,8 @@ bool X86FastTileConfig::fastTileConfig() {
if (!CFGs.empty())
Changed = true;
}
+ if (Changed)
+ X86FI->setHasVirtualTileReg(true);
return Changed;
}
@@ -298,6 +301,7 @@ bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
ST = &MFunc.getSubtarget<X86Subtarget>();
TRI = ST->getRegisterInfo();
TII = MFunc.getSubtarget().getInstrInfo();
+ X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
return fastTileConfig();
}
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 05cab776e0b7..9a63cffe0a09 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -212,8 +212,7 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
// These instructions are all fine to convert.
break;
}
- MachineFunction::iterator MFI = MBB.getIterator();
- return TII->convertToThreeAddress(MFI, MI, nullptr);
+ return TII->convertToThreeAddress(MI, nullptr, nullptr);
}
FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index 2d9886e3f238..f24dbcfe972d 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -964,7 +964,11 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
if (!SetCCI.mayStore()) {
assert(SetCCI.getOperand(0).isReg() &&
"Cannot have a non-register defined operand to SETcc!");
- MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
+ Register OldReg = SetCCI.getOperand(0).getReg();
+ // Drop Kill flags on the old register before replacing. CondReg may have
+ // a longer live range.
+ MRI->clearKillFlags(OldReg);
+ MRI->replaceRegWith(OldReg, CondReg);
SetCCI.eraseFromParent();
return;
}
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e0f30f090171..60e1b37ed61c 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -832,6 +832,24 @@ static const TableEntry PopTable[] = {
{ X86::UCOM_Fr , X86::UCOM_FPr },
};
+static bool doesInstructionSetFPSW(MachineInstr &MI) {
+ if (const MachineOperand *MO = MI.findRegisterDefOperand(X86::FPSW))
+ if (!MO->isDead())
+ return true;
+ return false;
+}
+
+static MachineBasicBlock::iterator
+getNextFPInstruction(MachineBasicBlock::iterator I) {
+ MachineBasicBlock &MBB = *I->getParent();
+ while (++I != MBB.end()) {
+ MachineInstr &MI = *I;
+ if (X86::isX87Instruction(MI))
+ return I;
+ }
+ return MBB.end();
+}
+
/// popStackAfter - Pop the current value off of the top of the FP stack after
/// the specified instruction. This attempts to be sneaky and combine the pop
/// into the instruction itself if possible. The iterator is left pointing to
@@ -853,6 +871,14 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
I->RemoveOperand(0);
MI.dropDebugNumber();
} else { // Insert an explicit pop
+ // If this instruction sets FPSW, which is read in following instruction,
+ // insert pop after that reader.
+ if (doesInstructionSetFPSW(MI)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::iterator Next = getNextFPInstruction(I);
+ if (Next != MBB.end() && Next->readsRegister(X86::FPSW))
+ I = Next;
+ }
I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
}
}
@@ -1038,9 +1064,10 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
for (unsigned I = 0; I < N; ++I)
pushReg(N - I - 1);
- // Drop all variable values defined by this call -- we can't track them
- // once they've been stackified.
- I->dropDebugNumber();
+ // If this call has been modified, drop all variable values defined by it.
+ // We can't track them once they've been stackified.
+ if (STReturns)
+ I->dropDebugNumber();
}
/// If RET has an FP register use operand, pass the first one in ST(0) and
@@ -1732,16 +1759,14 @@ void FPS::setKillFlags(MachineBasicBlock &MBB) const {
LPR.addLiveOuts(MBB);
- for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
- I != E; ++I) {
- if (I->isDebugInstr())
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugInstr())
continue;
std::bitset<8> Defs;
SmallVector<MachineOperand *, 2> Uses;
- MachineInstr &MI = *I;
- for (auto &MO : I->operands()) {
+ for (auto &MO : MI.operands()) {
if (!MO.isReg())
continue;
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 4cde7971e597..bd780273509f 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);
- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
+ : Is64Bit ? X86::R11D
+ : X86::EAX;
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MF.insert(MBBIter, bodyMBB);
MF.insert(MBBIter, footMBB);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
+ : Is64Bit ? X86::R11D
+ : X86::EAX;
// Setup entry block
{
@@ -1349,25 +1353,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
- // Add RETADDR move area to callee saved frame size.
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta && IsWin64Prologue)
+ // Space reserved for stack-based arguments when making a (ABI-guaranteed)
+ // tail call.
+ unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
+ if (TailCallArgReserveSize && IsWin64Prologue)
report_fatal_error("Can't handle guaranteed tail call under win64 yet");
- if (TailCallReturnAddrDelta < 0)
- X86FI->setCalleeSavedFrameSize(
- X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
-
const bool EmitStackProbeCall =
STI.getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
if (HasFP && X86FI->hasSwiftAsyncContext()) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8),
- MachineFramePtr)
- .addUse(MachineFramePtr)
- .addImm(60)
- .setMIFlag(MachineInstr::FrameSetup);
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (STI.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* suitable to be
+ // combined with the frame pointer directly.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
+ .addUse(MachineFramePtr)
+ .addUse(X86::RIP)
+ .addImm(1)
+ .addUse(X86::NoRegister)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ X86II::MO_GOTPCREL)
+ .addUse(X86::NoRegister);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+
+ case SwiftAsyncFramePointerMode::Always:
+ BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
+ .addUse(MachineFramePtr)
+ .addImm(60)
+ .setMIFlag(MachineInstr::FrameSetup);
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
}
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
@@ -1391,7 +1414,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
!EmitStackProbeCall && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
- uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t MinSize =
+ X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
if (HasFP) MinSize += SlotSize;
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -1401,8 +1425,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Insert stack pointer adjustment for later moving of return addr. Only
// applies to tail call optimized functions where the callee argument stack
// size is bigger than the callers.
- if (TailCallReturnAddrDelta < 0) {
- BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
+ if (TailCallArgReserveSize != 0) {
+ BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
/*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -1451,7 +1475,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (X86FI->getRestoreBasePointer())
FrameSize += SlotSize;
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ NumBytes = FrameSize -
+ (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
// Callee-saved registers are pushed on stack before the stack is realigned.
if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
@@ -1554,7 +1579,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
} else {
assert(!IsFunclet && "funclets without FPs not yet implemented");
- NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ NumBytes = StackSize -
+ (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
}
// Update the offset adjustment, which is mainly used by codeview to translate
@@ -2011,6 +2037,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t StackSize = MFI.getStackSize();
uint64_t MaxAlign = calculateMaxStackAlign(MF);
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
bool HasFP = hasFP(MF);
uint64_t NumBytes = 0;
@@ -2024,14 +2051,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- NumBytes = FrameSize - CSSize;
+ NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
// Callee-saved registers were pushed on stack before the stack was
// realigned.
if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
NumBytes = alignTo(FrameSize, MaxAlign);
} else {
- NumBytes = StackSize - CSSize;
+ NumBytes = StackSize - CSSize - TailCallArgReserveSize;
}
uint64_t SEHStackAllocAmt = NumBytes;
@@ -2098,7 +2125,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
-
// If there is an ADD32ri or SUB32ri of ESP immediately before this
// instruction, merge the two instructions.
if (NumBytes || MFI.hasVarSizedObjects())
@@ -2140,10 +2166,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
- if (!hasFP(MF) && NeedsDwarfCFI) {
+ if (!HasFP && NeedsDwarfCFI) {
// Define the current CFA rule to use the provided offset.
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize));
+ MCCFIInstruction::cfiDefCfaOffset(
+ nullptr, CSSize + TailCallArgReserveSize + SlotSize));
}
--MBBI;
}
@@ -2157,7 +2184,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (NeedsWin64CFI && MF.hasWinCFI())
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
- if (!hasFP(MF) && NeedsDwarfCFI) {
+ if (!HasFP && NeedsDwarfCFI) {
MBBI = FirstCSPop;
int64_t Offset = -CSSize - SlotSize;
// Mark callee-saved pop instruction.
@@ -2177,9 +2204,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Emit DWARF info specifying the restores of the callee-saved registers.
// For epilogue with return inside or being other block without successor,
// no need to generate .cfi_restore for callee-saved registers.
- if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) {
+ if (NeedsDwarfCFI && !MBB.succ_empty())
emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
- }
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
// Add the return addr area delta back since we are not tail calling.
@@ -2193,13 +2219,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
// Emit tilerelease for AMX kernel.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
- for (unsigned I = 0; I < RC->getNumRegs(); I++)
- if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) {
- BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
- break;
- }
+ if (X86FI->hasVirtualTileReg())
+ BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
}
StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
@@ -2226,7 +2247,6 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t StackSize = MFI.getStackSize();
- bool HasFP = hasFP(MF);
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int64_t FPDelta = 0;
@@ -2262,39 +2282,27 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
"FPDelta isn't aligned per the Win64 ABI!");
}
-
- if (TRI->hasBasePointer(MF)) {
- assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
- if (FI < 0) {
- // Skip the saved EBP.
- return StackOffset::getFixed(Offset + SlotSize + FPDelta);
- } else {
- assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
- return StackOffset::getFixed(Offset + StackSize);
- }
- } else if (TRI->hasStackRealignment(MF)) {
- if (FI < 0) {
- // Skip the saved EBP.
- return StackOffset::getFixed(Offset + SlotSize + FPDelta);
- } else {
- assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
- return StackOffset::getFixed(Offset + StackSize);
- }
- // FIXME: Support tail calls
- } else {
- if (!HasFP)
- return StackOffset::getFixed(Offset + StackSize);
-
- // Skip the saved EBP.
+ if (FrameReg == TRI->getFramePtr()) {
+ // Skip saved EBP/RBP
Offset += SlotSize;
+ // Account for restricted Windows prologue.
+ Offset += FPDelta;
+
// Skip the RETADDR move area
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
Offset -= TailCallReturnAddrDelta;
+
+ return StackOffset::getFixed(Offset);
}
- return StackOffset::getFixed(Offset + FPDelta);
+ // FrameReg is either the stack pointer or a base pointer. But the base is
+ // located at the end of the statically known StackSize so the distinction
+ // doesn't really matter.
+ if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
+ assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
+ return StackOffset::getFixed(Offset + StackSize);
}
int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
@@ -3091,8 +3099,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
// having a ".", such as a simple <Module>.<Function>.<Arity>, or an
// "_", such as the BIF "suspend_0") as they are executed on another
// stack.
- if (F->getName().find("erlang.") != StringRef::npos ||
- F->getName().find("bif_") != StringRef::npos ||
+ if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
F->getName().find_first_of("._") == StringRef::npos)
continue;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index e9c7ba44b524..7ed05fd0331d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -216,6 +216,8 @@ namespace {
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
+ bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -336,10 +338,9 @@ namespace {
return false;
// Walk all the users of the immediate.
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
-
- SDNode *User = *UI;
+ for (const SDNode *User : N->uses()) {
+ if (UseCount >= 2)
+ break;
// This user is already selected. Count it as a legitimate use and
// move on.
@@ -433,6 +434,18 @@ namespace {
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
}
+ SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth,
+ const SDLoc &DL) {
+ assert(VecWidth == 128 && "Unexpected vector width");
+ uint64_t Index = N->getConstantOperandVal(2);
+ MVT VecVT = N->getSimpleValueType(0);
+ uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth;
+ assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index");
+ // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
+ // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
+ return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
+ }
+
// Helper to detect unneeded and instructions on shift amounts. Called
// from PatFrags in tablegen.
bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
@@ -504,8 +517,9 @@ namespace {
bool tryShiftAmountMod(SDNode *N);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTERNLOG(SDNode *N);
- bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC,
- SDValue A, SDValue B, SDValue C, uint8_t Imm);
+ bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB,
+ SDNode *ParentC, SDValue A, SDValue B, SDValue C,
+ uint8_t Imm);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
bool tryMatchBitSelect(SDNode *N);
@@ -877,19 +891,34 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}
- /// Convert vector increment or decrement to sub/add with an all-ones
- /// constant:
- /// add X, <1, 1...> --> sub X, <-1, -1...>
- /// sub X, <1, 1...> --> add X, <-1, -1...>
- /// The all-ones vector constant can be materialized using a pcmpeq
- /// instruction that is commonly recognized as an idiom (has no register
- /// dependency), so that's better/smaller than loading a splat 1 constant.
+ // Convert vector increment or decrement to sub/add with an all-ones
+ // constant:
+ // add X, <1, 1...> --> sub X, <-1, -1...>
+ // sub X, <1, 1...> --> add X, <-1, -1...>
+ // The all-ones vector constant can be materialized using a pcmpeq
+ // instruction that is commonly recognized as an idiom (has no register
+ // dependency), so that's better/smaller than loading a splat 1 constant.
+ //
+ // But don't do this if it would inhibit a potentially profitable load
+ // folding opportunity for the other operand. That only occurs with the
+ // intersection of:
+ // (1) The other operand (op0) is load foldable.
+ // (2) The op is an add (otherwise, we are *creating* an add and can still
+ // load fold the other op).
+ // (3) The target has AVX (otherwise, we have a destructive add and can't
+ // load fold the other op without killing the constant op).
+ // (4) The constant 1 vector has multiple uses (so it is profitable to load
+ // into a register anyway).
+ auto mayPreventLoadFold = [&]() {
+ return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
+ N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
+ !N->getOperand(1).hasOneUse();
+ };
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
- N->getSimpleValueType(0).isVector()) {
-
+ N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
APInt SplatVal;
if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
- SplatVal.isOneValue()) {
+ SplatVal.isOne()) {
SDLoc DL(N);
MVT VT = N->getSimpleValueType(0);
@@ -1121,7 +1150,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (VT.isVector() || VT == MVT::f128)
break;
- MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
+ MVT VecVT = VT == MVT::f64 ? MVT::v2f64
+ : VT == MVT::f32 ? MVT::v4f32
+ : MVT::v8f16;
+
SDLoc dl(N);
SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
N->getOperand(0));
@@ -2464,10 +2496,18 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-/// Helper for selectVectorAddr. Handles things that can be folded into a
-/// gather scatter address. The index register and scale should have already
-/// been handled.
-bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
+ X86ISelAddressMode &AM,
+ unsigned Depth) {
+ SDLoc dl(N);
+ LLVM_DEBUG({
+ dbgs() << "MatchVectorAddress: ";
+ AM.dump(CurDAG);
+ });
+ // Limit recursion.
+ if (Depth > 5)
+ return matchAddressBase(N, AM);
+
// TODO: Support other operations.
switch (N.getOpcode()) {
case ISD::Constant: {
@@ -2480,11 +2520,41 @@ bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
if (!matchWrapper(N, AM))
return false;
break;
+ case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
+ !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
+ Depth + 1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
+ Depth + 1) &&
+ !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
+ Depth + 1))
+ return false;
+ AM = Backup;
+
+ N = Handle.getValue();
+ break;
+ }
}
return matchAddressBase(N, AM);
}
+/// Helper for selectVectorAddr. Handles things that can be folded into a
+/// gather/scatter address. The index register and scale should have already
+/// been handled.
+bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
+ return matchVectorAddressRecursively(N, AM, 0);
+}
+
bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
SDValue IndexOp, SDValue ScaleOp,
SDValue &Base, SDValue &Scale,
@@ -3387,16 +3457,24 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
return false;
SDValue NBits;
+ bool NegateNBits;
// If we have BMI2's BZHI, we are ok with muti-use patterns.
// Else, if we only have BMI1's BEXTR, we require one-use.
- const bool CanHaveExtraUses = Subtarget->hasBMI2();
- auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
- return CanHaveExtraUses ||
+ const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
+ auto checkUses = [AllowExtraUsesByDefault](SDValue Op, unsigned NUses,
+ Optional<bool> AllowExtraUses) {
+ return AllowExtraUses.getValueOr(AllowExtraUsesByDefault) ||
Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
};
- auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
- auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+ auto checkOneUse = [checkUses](SDValue Op,
+ Optional<bool> AllowExtraUses = None) {
+ return checkUses(Op, 1, AllowExtraUses);
+ };
+ auto checkTwoUse = [checkUses](SDValue Op,
+ Optional<bool> AllowExtraUses = None) {
+ return checkUses(Op, 2, AllowExtraUses);
+ };
auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
@@ -3409,8 +3487,8 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
};
// a) x & ((1 << nbits) + (-1))
- auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
- &NBits](SDValue Mask) -> bool {
+ auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
+ &NegateNBits](SDValue Mask) -> bool {
// Match `add`. Must only have one use!
if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
return false;
@@ -3424,6 +3502,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (!isOneConstant(M0->getOperand(0)))
return false;
NBits = M0->getOperand(1);
+ NegateNBits = false;
return true;
};
@@ -3436,7 +3515,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// b) x & ~(-1 << nbits)
auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
- &NBits](SDValue Mask) -> bool {
+ &NBits, &NegateNBits](SDValue Mask) -> bool {
// Match `~()`. Must only have one use!
if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
return false;
@@ -3451,32 +3530,35 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (!isAllOnes(M0->getOperand(0)))
return false;
NBits = M0->getOperand(1);
+ NegateNBits = false;
return true;
};
- // Match potentially-truncated (bitwidth - y)
- auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
- unsigned Bitwidth) {
- // Skip over a truncate of the shift amount.
- if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
- ShiftAmt = ShiftAmt.getOperand(0);
- // The trunc should have been the only user of the real shift amount.
- if (!checkOneUse(ShiftAmt))
- return false;
- }
- // Match the shift amount as: (bitwidth - y). It should go away, too.
- if (ShiftAmt.getOpcode() != ISD::SUB)
- return false;
- auto *V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
+ // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
+ // or leave the shift amount as-is, but then we'll have to negate it.
+ auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt,
+ unsigned Bitwidth) {
+ NBits = ShiftAmt;
+ NegateNBits = true;
+ // Skip over a truncate of the shift amount, if any.
+ if (NBits.getOpcode() == ISD::TRUNCATE)
+ NBits = NBits.getOperand(0);
+ // Try to match the shift amount as (bitwidth - y). It should go away, too.
+ // If it doesn't match, that's fine, we'll just negate it ourselves.
+ if (NBits.getOpcode() != ISD::SUB)
+ return;
+ auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
if (!V0 || V0->getZExtValue() != Bitwidth)
- return false;
- NBits = ShiftAmt.getOperand(1);
- return true;
+ return;
+ NBits = NBits.getOperand(1);
+ NegateNBits = false;
};
+ // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
+ // or
// c) x & (-1 >> (32 - y))
- auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
- matchShiftAmt](SDValue Mask) -> bool {
+ auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
+ canonicalizeShiftAmt](SDValue Mask) -> bool {
// The mask itself may be truncated.
Mask = peekThroughOneUseTruncation(Mask);
unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
@@ -3490,27 +3572,39 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// The shift amount should not be used externally.
if (!checkOneUse(M1))
return false;
- return matchShiftAmt(M1, Bitwidth);
+ canonicalizeShiftAmt(M1, Bitwidth);
+ // Pattern c. is non-canonical, and is expanded into pattern d. iff there
+ // is no extra use of the mask. Clearly, there was one since we are here.
+ // But at the same time, if we need to negate the shift amount,
+ // then we don't want the mask to stick around, else it's unprofitable.
+ return !NegateNBits;
};
SDValue X;
+ // d) x << z >> z but then we'll have to subtract z from bitwidth
+ // or
// d) x << (32 - y) >> (32 - y)
- auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
+ auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
+ AllowExtraUsesByDefault, &NegateNBits,
&X](SDNode *Node) -> bool {
if (Node->getOpcode() != ISD::SRL)
return false;
SDValue N0 = Node->getOperand(0);
- if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
+ if (N0->getOpcode() != ISD::SHL)
return false;
unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
SDValue N1 = Node->getOperand(1);
SDValue N01 = N0->getOperand(1);
// Both of the shifts must be by the exact same value.
- // There should not be any uses of the shift amount outside of the pattern.
- if (N1 != N01 || !checkTwoUse(N1))
+ if (N1 != N01)
return false;
- if (!matchShiftAmt(N1, Bitwidth))
+ canonicalizeShiftAmt(N1, Bitwidth);
+ // There should not be any external uses of the inner shift / shift amount.
+ // Note that while we are generally okay with external uses given BMI2,
+ // iff we need to negate the shift amount, we are not okay with extra uses.
+ const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
+ if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
return false;
X = N0->getOperand(0);
return true;
@@ -3535,6 +3629,11 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
} else if (!matchPatternD(Node))
return false;
+ // If we need to negate the shift amount, require BMI2 BZHI support.
+ // It's just too unprofitable for BMI1 BEXTR.
+ if (NegateNBits && !Subtarget->hasBMI2())
+ return false;
+
SDLoc DL(Node);
// Truncate the shift amount.
@@ -3549,11 +3648,21 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
- NBits = SDValue(
- CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
- NBits, SRIdxVal), 0);
+ NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i32, ImplDef, NBits, SRIdxVal),
+ 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
+ // We might have matched the amount of high bits to be cleared,
+ // but we want the amount of low bits to be kept, so negate it then.
+ if (NegateNBits) {
+ SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC);
+
+ NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
+ }
+
if (Subtarget->hasBMI2()) {
// Great, just emit the the BZHI..
if (NVT != MVT::i32) {
@@ -4040,11 +4149,11 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
}
bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
- SDNode *ParentBC, SDValue A, SDValue B,
- SDValue C, uint8_t Imm) {
- assert(A.isOperandOf(ParentA));
- assert(B.isOperandOf(ParentBC));
- assert(C.isOperandOf(ParentBC));
+ SDNode *ParentB, SDNode *ParentC,
+ SDValue A, SDValue B, SDValue C,
+ uint8_t Imm) {
+ assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) &&
+ C.isOperandOf(ParentC) && "Incorrect parent node");
auto tryFoldLoadOrBCast =
[this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
@@ -4072,7 +4181,7 @@ bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
bool FoldedLoad = false;
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
FoldedLoad = true;
} else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
Tmp4)) {
@@ -4085,7 +4194,7 @@ bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
if (OldImm & 0x10) Imm |= 0x02;
if (OldImm & 0x08) Imm |= 0x40;
if (OldImm & 0x40) Imm |= 0x08;
- } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3,
+ } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3,
Tmp4)) {
FoldedLoad = true;
std::swap(B, C);
@@ -4163,7 +4272,6 @@ bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
}
// Try to match two logic ops to a VPTERNLOG.
-// FIXME: Handle inverted inputs?
// FIXME: Handle more complex patterns that use an operand more than once?
bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
MVT NVT = N->getSimpleValueType(0);
@@ -4206,12 +4314,31 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
SDValue B = FoldableOp.getOperand(0);
SDValue C = FoldableOp.getOperand(1);
+ SDNode *ParentA = N;
+ SDNode *ParentB = FoldableOp.getNode();
+ SDNode *ParentC = FoldableOp.getNode();
// We can build the appropriate control immediate by performing the logic
// operation we're matching using these constants for A, B, and C.
- const uint8_t TernlogMagicA = 0xf0;
- const uint8_t TernlogMagicB = 0xcc;
- const uint8_t TernlogMagicC = 0xaa;
+ uint8_t TernlogMagicA = 0xf0;
+ uint8_t TernlogMagicB = 0xcc;
+ uint8_t TernlogMagicC = 0xaa;
+
+ // Some of the inputs may be inverted, peek through them and invert the
+ // magic values accordingly.
+ // TODO: There may be a bitcast before the xor that we should peek through.
+ auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) {
+ if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() &&
+ ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) {
+ Magic = ~Magic;
+ Parent = Op.getNode();
+ Op = Op.getOperand(0);
+ }
+ };
+
+ PeekThroughNot(A, ParentA, TernlogMagicA);
+ PeekThroughNot(B, ParentB, TernlogMagicB);
+ PeekThroughNot(C, ParentC, TernlogMagicC);
uint8_t Imm;
switch (FoldableOp.getOpcode()) {
@@ -4235,7 +4362,7 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
case ISD::XOR: Imm ^= TernlogMagicA; break;
}
- return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm);
+ return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
}
/// If the high bits of an 'and' operand are known zero, try setting the
@@ -4295,7 +4422,7 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
// Check if the mask is -1. In that case, this is an unnecessary instruction
// that escaped earlier analysis.
- if (NegMaskVal.isAllOnesValue()) {
+ if (NegMaskVal.isAllOnes()) {
ReplaceNode(And, And0.getNode());
return true;
}
@@ -4572,7 +4699,7 @@ bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
ReplaceNode(N, Ternlog.getNode());
return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
- A, B, C, 0xCA);
+ Ternlog.getNode(), A, B, C, 0xCA);
}
void X86DAGToDAGISel::Select(SDNode *Node) {
@@ -4807,7 +4934,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::VPTERNLOG: {
uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
- if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0),
+ if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2), Imm))
return;
break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3a64b3460030..dba0321d9431 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48,9 +48,10 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -71,14 +72,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<int> ExperimentalPrefLoopAlignment(
- "x86-experimental-pref-loop-alignment", cl::init(4),
- cl::desc(
- "Sets the preferable loop alignment for experiments (as log2 bytes)"
- "(the last x86-experimental-pref-loop-alignment bits"
- " of the loop header PC will be 0)."),
- cl::Hidden);
-
static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
cl::desc(
@@ -117,6 +110,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
X86ScalarSSEf64 = Subtarget.hasSSE2();
X86ScalarSSEf32 = Subtarget.hasSSE1();
+ X86ScalarSSEf16 = Subtarget.hasFP16();
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Set up the TargetLowering object.
@@ -213,6 +207,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Signed saturation subtraction.
+ setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
+ setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
+ setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
+
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
// For slow shld targets we only lower for code size.
@@ -424,8 +425,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::PARITY, MVT::i8, Custom);
+ setOperationAction(ISD::PARITY, MVT::i16, Custom);
+ setOperationAction(ISD::PARITY, MVT::i32, Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
+ // popcntw is longer to encode than popcntl and also has a false dependency
+ // on the dest that popcntl hasn't had since Cannon Lake.
+ setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
@@ -434,11 +442,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
else
setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
-
- setOperationAction(ISD::PARITY, MVT::i16, Custom);
- setOperationAction(ISD::PARITY, MVT::i32, Custom);
- if (Subtarget.is64Bit())
- setOperationAction(ISD::PARITY, MVT::i64, Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
@@ -532,7 +535,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
- setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
+ if (Subtarget.getTargetTriple().isPS4CPU())
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
+ else
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -968,6 +974,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -1147,6 +1154,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1172,10 +1181,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
}
- // i8 vectors are custom because the source register and source
- // source memory operand types are not the same width.
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
-
if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
// We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
// do the pre and post work in the vector domain.
@@ -1677,6 +1682,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
+ // With BWI, expanding (and promoting the shifts) is the better.
+ if (!Subtarget.useBWIRegs())
+ setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
+
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
@@ -1903,6 +1912,155 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
+ auto setGroup = [&] (MVT VT) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::STRICT_FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ };
+
+ // AVX512_FP16 scalar operations
+ setGroup(MVT::f16);
+ addRegisterClass(MVT::f16, &X86::FR16XRegClass);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
+ setOperationAction(ISD::FROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ if (isTypeLegal(MVT::f80)) {
+ setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
+ }
+
+ setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
+
+ if (Subtarget.useAVX512Regs()) {
+ setGroup(MVT::v32f16);
+ addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
+ MVT::v32i16);
+
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
+
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
+ }
+
+ if (Subtarget.hasVLX()) {
+ addRegisterClass(MVT::v8f16, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v16f16, &X86::VR256XRegClass);
+ setGroup(MVT::v8f16);
+ setGroup(MVT::v16f16);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
+
+ // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
+
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
+
+ // Need to custom widen these to prevent scalarization.
+ setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4f16, Custom);
+ }
+
+ // Support fp16 0 immediate
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
+ }
+
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
@@ -1921,6 +2079,37 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
+ if (Subtarget.hasFP16()) {
+ // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
+ // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
+ // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
+ setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
+ // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
+ }
+
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
@@ -1969,7 +2158,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
+ // The MULO libcall is not part of libgcc, only compiler-rt.
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
+ // The MULO libcall is not part of libgcc, only compiler-rt.
+ setLibcallName(RTLIB::MULO_I128, nullptr);
// Combine sin / cos into _sincos_stret if it is available.
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
@@ -1983,6 +2176,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
setOperationAction(ISD::UREM, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
}
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
@@ -2070,8 +2271,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxLoadsPerMemcmp = 2;
MaxLoadsPerMemcmpOptSize = 2;
- // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
- setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
+ // Default loop alignment, which can be overridden by -align-loops.
+ setPrefLoopAlignment(Align(16));
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
@@ -2165,6 +2366,16 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
return RegisterVT;
}
+ // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
+ // So its default register type is f16. We override the type to v8f16 here.
+ if (VT == MVT::v3f16 && Subtarget.hasFP16())
+ return MVT::v8f16;
+
+ // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
+ if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
+ !Subtarget.hasX87())
+ return MVT::i32;
+
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
@@ -2183,6 +2394,20 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
return NumRegisters;
}
+ // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
+ // So its default register number is 3. We override the number to 1 here.
+ if (VT == MVT::v3f16 && Subtarget.hasFP16())
+ return 1;
+
+ // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
+ // x87 is disabled.
+ if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
+ if (VT == MVT::f64)
+ return 2;
+ if (VT == MVT::f80)
+ return 3;
+ }
+
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -2272,7 +2497,7 @@ static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
+uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
if (Subtarget.is64Bit()) {
// Max of 8 and alignment of type.
@@ -2294,7 +2519,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
/// preferred vector width.
EVT X86TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
// FIXME: Check if unaligned 64-byte accesses are slow.
@@ -2547,7 +2772,7 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::X86_FastCall);
- F->addAttribute(1, Attribute::AttrKind::InReg);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
return;
}
@@ -2898,16 +3123,15 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return false;
bool HasRet = false;
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != X86ISD::RET_FLAG)
+ for (const SDNode *U : Copy->uses()) {
+ if (U->getOpcode() != X86ISD::RET_FLAG)
return false;
// If we are returning more than one value, we can definitely
// not make a tail call see PR19530
- if (UI->getNumOperands() > 4)
+ if (U->getNumOperands() > 4)
return false;
- if (UI->getNumOperands() == 4 &&
- UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
+ if (U->getNumOperands() == 4 &&
+ U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
return false;
HasRet = true;
}
@@ -3137,38 +3361,40 @@ SDValue X86TargetLowering::LowerCallResult(
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
-/// CallIsStructReturn - Determines whether a call uses struct return
-/// semantics.
-enum StructReturnType {
- NotStructReturn,
- RegStructReturn,
- StackStructReturn
-};
-static StructReturnType
-callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
- if (Outs.empty())
- return NotStructReturn;
+/// Determines whether Args, either a set of outgoing arguments to a call, or a
+/// set of incoming args of a call, contains an sret pointer that the callee
+/// pops
+template <typename T>
+static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
+ const X86Subtarget &Subtarget) {
+ // Not C++20 (yet), so no concepts available.
+ static_assert(std::is_same<T, ISD::OutputArg>::value ||
+ std::is_same<T, ISD::InputArg>::value,
+ "requires ISD::OutputArg or ISD::InputArg");
- const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
- if (!Flags.isSRet())
- return NotStructReturn;
- if (Flags.isInReg() || IsMCU)
- return RegStructReturn;
- return StackStructReturn;
-}
+ // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
+ // for most compilations.
+ if (!Subtarget.is32Bit())
+ return false;
+
+ if (Args.empty())
+ return false;
-/// Determines whether a function uses struct return semantics.
-static StructReturnType
-argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
- if (Ins.empty())
- return NotStructReturn;
+ // Most calls do not have an sret argument, check the arg next.
+ const ISD::ArgFlagsTy &Flags = Args[0].Flags;
+ if (!Flags.isSRet() || Flags.isInReg())
+ return false;
+
+ // The MSVCabi does not pop the sret.
+ if (Subtarget.getTargetTriple().isOSMSVCRT())
+ return false;
+
+ // MCUs don't pop the sret
+ if (Subtarget.isTargetMCU())
+ return false;
- const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
- if (!Flags.isSRet())
- return NotStructReturn;
- if (Flags.isInReg() || IsMCU)
- return RegStructReturn;
- return StackStructReturn;
+ // Callee pops argument
+ return true;
}
/// Make a copy of an aggregate at address specified by "Src" to address
@@ -3533,13 +3759,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
SmallVector<SDValue, 12> SaveXMMOps;
SaveXMMOps.push_back(Chain);
SaveXMMOps.push_back(ALVal);
- SaveXMMOps.push_back(
- DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
+ SaveXMMOps.push_back(RSFIN);
SaveXMMOps.push_back(
DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
llvm::append_range(SaveXMMOps, LiveXMMRegs);
- MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
- MVT::Other, SaveXMMOps));
+ MachineMemOperand *StoreMMO =
+ DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
+ Offset),
+ MachineMemOperand::MOStore, 128, Align(16));
+ MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
+ DL, DAG.getVTList(MVT::Other),
+ SaveXMMOps, MVT::i8, StoreMMO));
}
if (!MemOps.empty())
@@ -3670,6 +3902,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
RC = &X86::GR32RegClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = &X86::GR64RegClass;
+ else if (RegVT == MVT::f16)
+ RC = &X86::FR16XRegClass;
else if (RegVT == MVT::f32)
RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
else if (RegVT == MVT::f64)
@@ -3767,12 +4001,12 @@ SDValue X86TargetLowering::LowerFormalArguments(
// the argument into a virtual register so that we can access it from the
// return points.
if (Ins[I].Flags.isSRet()) {
- Register Reg = FuncInfo->getSRetReturnReg();
- if (!Reg) {
- MVT PtrTy = getPointerTy(DAG.getDataLayout());
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
- FuncInfo->setSRetReturnReg(Reg);
- }
+ assert(!FuncInfo->getSRetReturnReg() &&
+ "SRet return has already been set");
+ MVT PtrTy = getPointerTy(DAG.getDataLayout());
+ Register Reg =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ FuncInfo->setSRetReturnReg(Reg);
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
break;
@@ -3800,9 +4034,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
- !Subtarget.getTargetTriple().isOSMSVCRT() &&
- argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
+ if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -3921,10 +4153,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget.is64Bit();
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
- StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
+ bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
bool HasNCSR = (CB && isa<CallInst>(CB) &&
CB->hasFnAttr("no_caller_saved_registers"));
@@ -3950,13 +4182,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
}
-
if (isTailCall && !IsMustTail) {
// Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, SR != NotStructReturn,
- MF.getFunction().hasStructRetAttr(), CLI.RetTy,
- Outs, OutVals, Ins, DAG);
+ isTailCall = IsEligibleForTailCallOptimization(
+ Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
+ Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -4199,7 +4429,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
+ if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
+ (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -4324,7 +4555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// address into a register.
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
} else if (Subtarget.isTarget64BitILP32() &&
- Callee->getValueType(0) == MVT::i32) {
+ Callee.getValueType() == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
@@ -4436,14 +4667,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
"tail calls cannot be marked with clang.arc.attachedcall");
assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
- // Add target constant to select ObjC runtime call just before the call
- // target. RuntimeCallType == 0 selects objc_retainAutoreleasedReturnValue,
- // RuntimeCallType == 0 selects objc_unsafeClaimAutoreleasedReturnValue when
- // epxanding the pseudo.
- unsigned RuntimeCallType =
- objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1;
- Ops.insert(Ops.begin() + 1,
- DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32));
+ // Add a target global address for the retainRV/claimRV runtime function
+ // just before the call target.
+ Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
+ Ops.insert(Ops.begin() + 1, GA);
Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
} else {
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
@@ -4459,20 +4688,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
// Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPop;
+ unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
- else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
- !Subtarget.getTargetTriple().isOSMSVCRT() &&
- SR == StackStructReturn)
- // If this is a call to a struct-return function, the callee
- // pops the hidden struct pointer, so we have to push it back.
- // This is common for Darwin/X86, Linux & Mingw32 targets.
- // For MSVC Win32 targets, the caller pops the hidden struct pointer.
+ else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
+ // If this call passes a struct-return pointer, the callee
+ // pops that struct pointer.
NumBytesForCalleeToPop = 4;
- else
- NumBytesForCalleeToPop = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
if (!IsSibcall) {
@@ -4631,9 +4854,8 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
bool X86TargetLowering::IsEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
@@ -4677,9 +4899,17 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (RegInfo->hasStackRealignment(MF))
return false;
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
- if (isCalleeStructRet || isCallerStructRet)
+ // Also avoid sibcall optimization if we're an sret return fn and the callee
+ // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
+ // insufficient.
+ if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+ // For a compatible tail call the callee must return our sret pointer. So it
+ // needs to be (a) an sret function itself and (b) we pass our sret as its
+ // sret. Condition #b is harder to determine.
+ return false;
+ } else if (IsCalleePopSRet)
+ // The callee pops an sret, so we cannot tail-call, as our caller doesn't
+ // expect that.
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
@@ -4833,15 +5063,44 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
-static bool MayFoldLoad(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
+ bool AssumeSingleUse) {
+ if (!AssumeSingleUse && !Op.hasOneUse())
+ return false;
+ if (!ISD::isNormalLoad(Op.getNode()))
+ return false;
+
+ // If this is an unaligned vector, make sure the target supports folding it.
+ auto *Ld = cast<LoadSDNode>(Op.getNode());
+ if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
+ Ld->getValueSizeInBits(0) == 128 && Ld->getAlignment() < 16)
+ return false;
+
+ // TODO: If this is a non-temporal load and the target has an instruction
+ // for it, it should not be folded. See "useNonTemporalLoad()".
+
+ return true;
+}
+
+bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
+ const X86Subtarget &Subtarget,
+ bool AssumeSingleUse) {
+ assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
+ if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
+ return false;
+
+ // We can not replace a wide volatile load with a broadcast-from-memory,
+ // because that would narrow the load, which isn't legal for volatiles.
+ auto *Ld = cast<LoadSDNode>(Op.getNode());
+ return !Ld->isVolatile() ||
+ Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
}
-static bool MayFoldIntoStore(SDValue Op) {
+bool X86::mayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}
-static bool MayFoldIntoZeroExtend(SDValue Op) {
+bool X86::mayFoldIntoZeroExtend(SDValue Op) {
if (Op.hasOneUse()) {
unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
return (ISD::ZERO_EXTEND == Opcode);
@@ -4872,6 +5131,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VBROADCAST:
@@ -5023,20 +5283,20 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
- bool isFP, SDValue &LHS, SDValue &RHS,
- SelectionDAG &DAG) {
+ bool isFP, SDValue &LHS, SDValue &RHS,
+ SelectionDAG &DAG) {
if (!isFP) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return X86::COND_NS;
}
- if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
- if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
+ if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
// X >= 0 -> X == 0, jump on !sign.
return X86::COND_NS;
}
@@ -5119,6 +5379,10 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
+static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
+ return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
+ VT.is512BitVector();
+}
bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
@@ -5312,10 +5576,13 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
VT = getTypeToTransformTo(Context, VT);
// If vector multiply is legal, assume that's faster than shl + add/sub.
- // TODO: Multiply is a complex op with higher latency and lower throughput in
- // most implementations, so this check could be loosened based on type
- // and/or a CPU attribute.
- if (isOperationLegal(ISD::MUL, VT))
+ // Multiply is a complex op with higher latency and lower throughput in
+ // most implementations, sub-vXi32 vector multiplies are always fast,
+ // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
+ // is always going to be slow.
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
+ (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
return false;
// shl+add, shl+sub, shl+add+neg
@@ -5393,11 +5660,10 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
}
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
- bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
+ bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
if (NoFloat) {
unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
@@ -5731,7 +5997,7 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
// Here we do not set undef elements as zeroable.
SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
if (V2IsZero) {
- assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
+ assert(!Zeroable.isZero() && "V2's non-undef elements are used?!");
for (int i = 0, Size = Mask.size(); i != Size; ++i)
if (Mask[i] != SM_SentinelUndef && Zeroable[i])
ZeroableMask[i] = SM_SentinelZero;
@@ -6037,62 +6303,67 @@ static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
"Can't split odd sized vector");
+ // If this is a splat value (with no-undefs) then use the lower subvector,
+ // which should be a free extraction.
SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
+ if (DAG.isSplatValue(Op, /*AllowUndefs*/ false))
+ return std::make_pair(Lo, Lo);
+
SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
return std::make_pair(Lo, Hi);
}
-// Split an unary integer op into 2 half sized ops.
-static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
+/// Break an operation into 2 half sized ops and then concatenate the results.
+static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG) {
+ unsigned NumOps = Op.getNumOperands();
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ // Extract the LHS Lo/Hi vectors
+ SmallVector<SDValue> LoOps(NumOps, SDValue());
+ SmallVector<SDValue> HiOps(NumOps, SDValue());
+ for (unsigned I = 0; I != NumOps; ++I) {
+ SDValue SrcOp = Op.getOperand(I);
+ if (!SrcOp.getValueType().isVector()) {
+ LoOps[I] = HiOps[I] = SrcOp;
+ continue;
+ }
+ std::tie(LoOps[I], HiOps[I]) = splitVector(SrcOp, DAG, dl);
+ }
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, LoVT, LoOps),
+ DAG.getNode(Op.getOpcode(), dl, HiVT, HiOps));
+}
+
+/// Break an unary integer operation into 2 half sized ops and then
+/// concatenate the result back.
+static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
// Make sure we only try to split 256/512-bit types to avoid creating
// narrow vectors.
+ EVT VT = Op.getValueType();
+ (void)VT;
assert((Op.getOperand(0).getValueType().is256BitVector() ||
Op.getOperand(0).getValueType().is512BitVector()) &&
(VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
assert(Op.getOperand(0).getValueType().getVectorNumElements() ==
VT.getVectorNumElements() &&
"Unexpected VTs!");
-
- SDLoc dl(Op);
-
- // Extract the Lo/Hi vectors
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, LoVT, Lo),
- DAG.getNode(Op.getOpcode(), dl, HiVT, Hi));
+ return splitVectorOp(Op, DAG);
}
/// Break a binary integer operation into 2 half sized ops and then
/// concatenate the result back.
static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
+ // Assert that all the types match.
EVT VT = Op.getValueType();
-
- // Sanity check that all the types match.
+ (void)VT;
assert(Op.getOperand(0).getValueType() == VT &&
Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");
assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
-
- SDLoc dl(Op);
-
- // Extract the LHS Lo/Hi vectors
- SDValue LHS1, LHS2;
- std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
-
- // Extract the RHS Lo/Hi vectors
- SDValue RHS1, RHS2;
- std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1),
- DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2));
+ return splitVectorOp(Op, DAG);
}
// Helper for splitting operands of an operation to legal target size and
@@ -6143,6 +6414,71 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
}
+// Helper function that extends a non-512-bit vector op to 512-bits on non-VLX
+// targets.
+static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT,
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.hasAVX512() && "AVX512 target expected");
+ MVT SVT = VT.getScalarType();
+
+ // If we have a 32/64 splatted constant, splat it to DstTy to
+ // encourage a foldable broadcast'd operand.
+ auto MakeBroadcastOp = [&](SDValue Op, MVT OpVT, MVT DstVT) {
+ unsigned OpEltSizeInBits = OpVT.getScalarSizeInBits();
+ // AVX512 broadcasts 32/64-bit operands.
+ // TODO: Support float once getAVX512Node is used by fp-ops.
+ if (!OpVT.isInteger() || OpEltSizeInBits < 32 ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(SVT))
+ return SDValue();
+ // If we're not widening, don't bother if we're not bitcasting.
+ if (OpVT == DstVT && Op.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Op))) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, OpEltSizeInBits) &&
+ !HasAnyUndefs && SplatValue.getBitWidth() == OpEltSizeInBits)
+ return DAG.getConstant(SplatValue, DL, DstVT);
+ }
+ return SDValue();
+ };
+
+ bool Widen = !(Subtarget.hasVLX() || VT.is512BitVector());
+
+ MVT DstVT = VT;
+ if (Widen)
+ DstVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits());
+
+ // Canonicalize src operands.
+ SmallVector<SDValue> SrcOps(Ops.begin(), Ops.end());
+ for (SDValue &Op : SrcOps) {
+ MVT OpVT = Op.getSimpleValueType();
+ // Just pass through scalar operands.
+ if (!OpVT.isVector())
+ continue;
+ assert(OpVT == VT && "Vector type mismatch");
+
+ if (SDValue BroadcastOp = MakeBroadcastOp(Op, OpVT, DstVT)) {
+ Op = BroadcastOp;
+ continue;
+ }
+
+ // Just widen the subvector by inserting into an undef wide vector.
+ if (Widen)
+ Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512);
+ }
+
+ SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps);
+
+ // Perform the 512-bit op then extract the bottom subvector.
+ if (Widen)
+ Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
+ return Res;
+}
+
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -6214,14 +6550,21 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
assert(IdxVal != 0 && "Unexpected index");
- NumElems = WideOpVT.getVectorNumElements();
- unsigned ShiftLeft = NumElems - SubVecNumElems;
- unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
- DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
- if (ShiftRight != 0)
- SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
- DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
+ // If upper elements of Vec are known undef, then just shift into place.
+ if (llvm::all_of(Vec->ops().slice(IdxVal + SubVecNumElems),
+ [](SDValue V) { return V.isUndef(); })) {
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
+ } else {
+ NumElems = WideOpVT.getVectorNumElements();
+ unsigned ShiftLeft = NumElems - SubVecNumElems;
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
+ if (ShiftRight != 0)
+ SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
+ }
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
@@ -6323,7 +6666,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Expected a 128/256/512-bit vector type");
- APInt Ones = APInt::getAllOnesValue(32);
+ APInt Ones = APInt::getAllOnes(32);
unsigned NumElts = VT.getSizeInBits() / 32;
SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
return DAG.getBitcast(VT, Vec);
@@ -6461,6 +6804,58 @@ static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}
+/// Returns a node that packs the LHS + RHS nodes together at half width.
+/// May return X86ISD::PACKSS/PACKUS, packing the top/bottom half.
+/// TODO: Add vXi64 -> vXi32 pack support with vector_shuffle node.
+/// TODO: Add subvector splitting if/when we have a need for it.
+static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
+ const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS,
+ bool PackHiHalf = false) {
+ MVT OpVT = LHS.getSimpleValueType();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool UsePackUS = Subtarget.hasSSE41() || EltSizeInBits == 8;
+ assert(OpVT == RHS.getSimpleValueType() &&
+ VT.getSizeInBits() == OpVT.getSizeInBits() &&
+ (EltSizeInBits * 2) == OpVT.getScalarSizeInBits() &&
+ "Unexpected PACK operand types");
+ assert((EltSizeInBits == 8 || EltSizeInBits == 16) &&
+ "Unexpected PACK result type");
+
+ // See if we already have sufficient leading bits for PACKSS/PACKUS.
+ if (!PackHiHalf) {
+ if (UsePackUS &&
+ DAG.computeKnownBits(LHS).countMaxActiveBits() <= EltSizeInBits &&
+ DAG.computeKnownBits(RHS).countMaxActiveBits() <= EltSizeInBits)
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);
+
+ if (DAG.ComputeMinSignedBits(LHS) <= EltSizeInBits &&
+ DAG.ComputeMinSignedBits(RHS) <= EltSizeInBits)
+ return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
+ }
+
+ // Fallback to sign/zero extending the requested half and pack.
+ SDValue Amt = DAG.getTargetConstant(EltSizeInBits, dl, MVT::i8);
+ if (UsePackUS) {
+ if (PackHiHalf) {
+ LHS = DAG.getNode(X86ISD::VSRLI, dl, OpVT, LHS, Amt);
+ RHS = DAG.getNode(X86ISD::VSRLI, dl, OpVT, RHS, Amt);
+ } else {
+ SDValue Mask = DAG.getConstant((1ULL << EltSizeInBits) - 1, dl, OpVT);
+ LHS = DAG.getNode(ISD::AND, dl, OpVT, LHS, Mask);
+ RHS = DAG.getNode(ISD::AND, dl, OpVT, RHS, Mask);
+ };
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);
+ };
+
+ if (!PackHiHalf) {
+ LHS = DAG.getNode(X86ISD::VSHLI, dl, OpVT, LHS, Amt);
+ RHS = DAG.getNode(X86ISD::VSHLI, dl, OpVT, RHS, Amt);
+ }
+ LHS = DAG.getNode(X86ISD::VSRAI, dl, OpVT, LHS, Amt);
+ RHS = DAG.getNode(X86ISD::VSRAI, dl, OpVT, RHS, Amt);
+ return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
+}
+
/// Return a vector_shuffle of the specified vector of zero or undef vector.
/// This produces a shuffle where the low element of V2 is swizzled into the
/// zero/undef vector, landing at element Idx.
@@ -6563,7 +6958,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
// Only treat an element as UNDEF if all bits are UNDEF.
- if (UndefEltBits.isAllOnesValue()) {
+ if (UndefEltBits.isAllOnes()) {
if (!AllowWholeUndefs)
return false;
UndefElts.setBit(i);
@@ -6602,59 +6997,36 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Handle UNDEFs.
if (Op.isUndef()) {
- APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
+ APInt UndefSrcElts = APInt::getAllOnes(NumElts);
SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract scalar constant bits.
if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
- APInt UndefSrcElts = APInt::getNullValue(1);
+ APInt UndefSrcElts = APInt::getZero(1);
SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
return CastBitData(UndefSrcElts, SrcEltBits);
}
if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
- APInt UndefSrcElts = APInt::getNullValue(1);
+ APInt UndefSrcElts = APInt::getZero(1);
APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
SmallVector<APInt, 64> SrcEltBits(1, RawBits);
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from build vector.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(Op)) {
+ BitVector Undefs;
+ SmallVector<APInt> SrcEltBits;
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
- unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
-
- APInt UndefSrcElts(NumSrcElts, 0);
- SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- const SDValue &Src = Op.getOperand(i);
- if (Src.isUndef()) {
- UndefSrcElts.setBit(i);
- continue;
- }
- auto *Cst = cast<ConstantSDNode>(Src);
- SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
+ if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) {
+ APInt UndefSrcElts = APInt::getNullValue(SrcEltBits.size());
+ for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I)
+ if (Undefs[I])
+ UndefSrcElts.setBit(I);
+ return CastBitData(UndefSrcElts, SrcEltBits);
}
- return CastBitData(UndefSrcElts, SrcEltBits);
- }
- if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
- unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
- unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
-
- APInt UndefSrcElts(NumSrcElts, 0);
- SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- const SDValue &Src = Op.getOperand(i);
- if (Src.isUndef()) {
- UndefSrcElts.setBit(i);
- continue;
- }
- auto *Cst = cast<ConstantFPSDNode>(Src);
- APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
- SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
- }
- return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from constant pool vector.
@@ -6704,17 +7076,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
SDValue Ptr = MemIntr->getBasePtr();
+ // The source constant may be larger than the subvector broadcast,
+ // ensure we extract the correct subvector constants.
if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) {
Type *CstTy = Cst->getType();
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
- if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0)
+ unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
+ if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||
+ (SizeInBits % SubVecSizeInBits) != 0)
return false;
- unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits();
- unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits;
- unsigned NumSubVecs = SizeInBits / CstSizeInBits;
+ unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
+ unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
+ unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
APInt UndefSubElts(NumSubElts, 0);
SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs,
- APInt(SubEltSizeInBits, 0));
+ APInt(CstEltSizeInBits, 0));
for (unsigned i = 0; i != NumSubElts; ++i) {
if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],
UndefSubElts, i))
@@ -6814,12 +7190,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
AllowPartialUndefs))
return false;
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
if (M < 0) {
UndefElts.setBit(i);
- EltBits.push_back(APInt::getNullValue(EltSizeInBits));
+ EltBits.push_back(APInt::getZero(EltSizeInBits));
} else if (M < (int)NumElts) {
if (UndefElts0[M])
UndefElts.setBit(i);
@@ -6916,8 +7292,8 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
int NumEltsPerLane = NumElts / NumLanes;
int NumInnerEltsPerLane = NumInnerElts / NumLanes;
- DemandedLHS = APInt::getNullValue(NumInnerElts);
- DemandedRHS = APInt::getNullValue(NumInnerElts);
+ DemandedLHS = APInt::getZero(NumInnerElts);
+ DemandedRHS = APInt::getZero(NumInnerElts);
// Map DemandedElts to the packed operands.
for (int Lane = 0; Lane != NumLanes; ++Lane) {
@@ -6940,8 +7316,8 @@ static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
int NumEltsPerLane = NumElts / NumLanes;
int HalfEltsPerLane = NumEltsPerLane / 2;
- DemandedLHS = APInt::getNullValue(NumElts);
- DemandedRHS = APInt::getNullValue(NumElts);
+ DemandedLHS = APInt::getZero(NumElts);
+ DemandedRHS = APInt::getZero(NumElts);
// Map DemandedElts to the horizontal operands.
for (int Idx = 0; Idx != NumElts; ++Idx) {
@@ -7148,6 +7524,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
DecodeScalarMoveMask(NumElems, /* IsLoad */ false, Mask);
@@ -7287,7 +7664,7 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2,
APInt &KnownUndef, APInt &KnownZero) {
int Size = Mask.size();
- KnownUndef = KnownZero = APInt::getNullValue(Size);
+ KnownUndef = KnownZero = APInt::getZero(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
@@ -7380,7 +7757,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
int Size = Mask.size();
SDValue V1 = Ops[0];
SDValue V2 = IsUnary ? V1 : Ops[1];
- KnownUndef = KnownZero = APInt::getNullValue(Size);
+ KnownUndef = KnownZero = APInt::getZero(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
@@ -7487,7 +7864,7 @@ static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask,
APInt &KnownUndef,
APInt &KnownZero) {
unsigned NumElts = Mask.size();
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
int M = Mask[i];
@@ -7760,9 +8137,9 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
// lanes), we can treat this as a truncation shuffle.
bool Offset0 = false, Offset1 = false;
if (Opcode == X86ISD::PACKSS) {
- if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
+ if ((!(N0.isUndef() || EltsLHS.isZero()) &&
DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) ||
- (!(N1.isUndef() || EltsRHS.isNullValue()) &&
+ (!(N1.isUndef() || EltsRHS.isZero()) &&
DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt))
return false;
// We can't easily fold ASHR into a shuffle, but if it was feeding a
@@ -7780,9 +8157,9 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
} else {
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
- if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
+ if ((!(N0.isUndef() || EltsLHS.isZero()) &&
!DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) ||
- (!(N1.isUndef() || EltsRHS.isNullValue()) &&
+ (!(N1.isUndef() || EltsRHS.isZero()) &&
!DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1)))
return false;
}
@@ -7983,7 +8360,7 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
APInt KnownUndef, KnownZero;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef,
KnownZero, DAG, Depth, ResolveKnownElts);
}
@@ -8467,10 +8844,10 @@ static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
case ISD::SCALAR_TO_VECTOR:
return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset);
case ISD::SRL:
- if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
- uint64_t Idx = IdxC->getZExtValue();
- if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
- ByteOffset += Idx / 8;
+ if (auto *AmtC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
+ uint64_t Amt = AmtC->getZExtValue();
+ if ((Amt % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
+ ByteOffset += Amt / 8;
return true;
}
}
@@ -8508,9 +8885,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
unsigned NumElems = Elts.size();
int LastLoadedElt = -1;
- APInt LoadMask = APInt::getNullValue(NumElems);
- APInt ZeroMask = APInt::getNullValue(NumElems);
- APInt UndefMask = APInt::getNullValue(NumElems);
+ APInt LoadMask = APInt::getZero(NumElems);
+ APInt ZeroMask = APInt::getZero(NumElems);
+ APInt UndefMask = APInt::getZero(NumElems);
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
@@ -8671,7 +9048,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// If the upper half of a ymm/zmm load is undef then just load the lower half.
if (VT.is256BitVector() || VT.is512BitVector()) {
unsigned HalfNumElems = NumElems / 2;
- if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) {
+ if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnes()) {
EVT HalfVT =
EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
SDValue HalfLD =
@@ -8685,7 +9062,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
- (LoadSizeInBits == 32 || LoadSizeInBits == 64) &&
+ ((LoadSizeInBits == 16 && Subtarget.hasFP16()) || LoadSizeInBits == 32 ||
+ LoadSizeInBits == 64) &&
((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
: MVT::getIntegerVT(LoadSizeInBits);
@@ -8709,7 +9087,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// BROADCAST - match the smallest possible repetition pattern, load that
// scalar/subvector element and then broadcast to the entire vector.
- if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
+ if (ZeroMask.isZero() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
unsigned RepeatSize = SubElems * BaseSizeInBits;
@@ -8758,6 +9136,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
while (Broadcast.getValueSizeInBits() < VT.getSizeInBits())
Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL);
} else {
+ if (!Subtarget.hasAVX2() &&
+ !X86::mayFoldLoadIntoBroadcastFromMem(
+ RepeatLoad, RepeatVT.getScalarType().getSimpleVT(),
+ Subtarget,
+ /*AssumeSingleUse=*/true))
+ return SDValue();
Broadcast =
DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad);
}
@@ -8800,7 +9184,9 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
- if (ScalarSize == 32) {
+ if (ScalarSize == 16) {
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
+ } else if (ScalarSize == 32) {
Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
} else {
assert(ScalarSize == 64 && "Unsupported floating point scalar size");
@@ -9009,6 +9395,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// with AVX2, also splat i8 and i16.
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) ||
(OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
@@ -9071,6 +9458,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
return BCast;
}
+ if (ScalarSize == 16 && Subtarget.hasFP16() && IsGE256)
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
// Unsupported broadcast.
return SDValue();
}
@@ -9760,7 +10150,7 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
unsigned NumElts = VT.getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
if (BV->getOperand(i).isUndef())
DemandedElts.clearBit(i);
@@ -10335,9 +10725,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return VectorConstant;
unsigned EVTBits = EltVT.getSizeInBits();
- APInt UndefMask = APInt::getNullValue(NumElems);
- APInt ZeroMask = APInt::getNullValue(NumElems);
- APInt NonZeroMask = APInt::getNullValue(NumElems);
+ APInt UndefMask = APInt::getZero(NumElems);
+ APInt ZeroMask = APInt::getZero(NumElems);
+ APInt NonZeroMask = APInt::getZero(NumElems);
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
@@ -10361,7 +10751,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// All undef vector. Return an UNDEF. All zero vectors were handled above.
if (NonZeroMask == 0) {
- assert(UndefMask.isAllOnesValue() && "Fully undef mask expected");
+ assert(UndefMask.isAllOnes() && "Fully undef mask expected");
return DAG.getUNDEF(VT);
}
@@ -10471,13 +10861,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumZero == 0)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
- (EltVT == MVT::i64 && Subtarget.is64Bit())) {
+ if (EltVT == MVT::i32 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
+ EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit()) ||
+ (EltVT == MVT::i16 && Subtarget.hasFP16())) {
assert((VT.is128BitVector() || VT.is256BitVector() ||
VT.is512BitVector()) &&
"Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ // Turn it into a MOVL (i.e. movsh, movss, movsd, movw or movd) to a
+ // zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
@@ -10607,7 +10999,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG, Subtarget))
return V;
- if (EVTBits == 16 && NumElems == 8)
+ if (EltVT == MVT::i16 && NumElems == 8)
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget))
return V;
@@ -10664,7 +11056,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (Subtarget.hasSSE41()) {
+ if (Subtarget.hasSSE41() && EltVT != MVT::f16) {
SDValue Result;
if (!Op.getOperand(0).isUndef())
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -11206,7 +11598,7 @@ static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
// Arbitrarily choose from the 2nd operand if the select condition element
// is undef.
// TODO: Can we do better by matching patterns such as even/odd?
- if (UndefElts[i] || EltBits[i].isNullValue())
+ if (UndefElts[i] || EltBits[i].isZero())
Mask[i] += NumElts;
}
@@ -11575,7 +11967,7 @@ static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale))
continue;
unsigned UpperElts = NumElts - NumSrcElts;
- if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
continue;
SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale);
SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts);
@@ -11672,7 +12064,7 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
unsigned NumSrcElts = NumElts / Scale;
unsigned UpperElts = NumElts - NumSrcElts;
if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) ||
- !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
continue;
SDValue Src = V1;
@@ -11729,7 +12121,7 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
// The elements beyond the truncation must be undef/zero.
unsigned UpperElts = NumElts - NumSrcElts;
if (UpperElts > 0 &&
- !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
continue;
bool UndefUppers =
UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);
@@ -11955,8 +12347,8 @@ static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
MVT LogicVT = VT;
if (EltVT == MVT::f32 || EltVT == MVT::f64) {
Zero = DAG.getConstantFP(0.0, DL, EltVT);
- APFloat AllOnesValue = APFloat::getAllOnesValue(
- SelectionDAG::EVTToAPFloatSemantics(EltVT), EltVT.getSizeInBits());
+ APFloat AllOnesValue =
+ APFloat::getAllOnesValue(SelectionDAG::EVTToAPFloatSemantics(EltVT));
AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT);
LogicVT =
MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
@@ -12038,10 +12430,15 @@ static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
- if (M == i)
+ if (M == i ||
+ (0 <= M && M < Size && IsElementEquivalent(Size, V1, V1, M, i))) {
+ Mask[i] = i;
continue;
- if (M == i + Size) {
+ }
+ if (M == (i + Size) ||
+ (Size <= M && IsElementEquivalent(Size, V2, V2, M - Size, i))) {
BlendMask |= 1ull << i;
+ Mask[i] = i + Size;
continue;
}
if (Zeroable[i]) {
@@ -12424,6 +12821,14 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
return SDValue();
}
+static bool isBroadcastShuffleMask(ArrayRef<int> Mask) {
+ return isUndefOrEqual(Mask, 0);
+}
+
+static bool isNoopOrBroadcastShuffleMask(ArrayRef<int> Mask) {
+ return isNoopShuffleMask(Mask) || isBroadcastShuffleMask(Mask);
+}
+
/// Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
@@ -12457,6 +12862,38 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
}
}
+ // If we effectively only demand the 0'th element of \p Input, and not only
+ // as 0'th element, then broadcast said input,
+ // and change \p InputMask to be a no-op (identity) mask.
+ auto canonicalizeBroadcastableInput = [DL, VT, &Subtarget,
+ &DAG](SDValue &Input,
+ MutableArrayRef<int> InputMask) {
+ unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
+ if (!Subtarget.hasAVX2() && (!Subtarget.hasAVX() || EltSizeInBits < 32 ||
+ !X86::mayFoldLoad(Input, Subtarget)))
+ return;
+ if (isNoopShuffleMask(InputMask))
+ return;
+ assert(isBroadcastShuffleMask(InputMask) &&
+ "Expected to demand only the 0'th element.");
+ Input = DAG.getNode(X86ISD::VBROADCAST, DL, VT, Input);
+ for (auto I : enumerate(InputMask)) {
+ int &InputMaskElt = I.value();
+ if (InputMaskElt >= 0)
+ InputMaskElt = I.index();
+ }
+ };
+
+ // Currently, we may need to produce one shuffle per input, and blend results.
+ // It is possible that the shuffle for one of the inputs is already a no-op.
+ // See if we can simplify non-no-op shuffles into broadcasts,
+ // which we consider to be strictly better than an arbitrary shuffle.
+ if (isNoopOrBroadcastShuffleMask(V1Mask) &&
+ isNoopOrBroadcastShuffleMask(V2Mask)) {
+ canonicalizeBroadcastableInput(V1, V1Mask);
+ canonicalizeBroadcastableInput(V2, V2Mask);
+ }
+
// Try to lower with the simpler initial blend/unpack/rotate strategies unless
// one of the input shuffles would be a no-op. We prefer to shuffle inputs as
// the shuffle may be able to fold with a load or other benefit. However, when
@@ -12974,7 +13411,7 @@ static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
- assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
+ assert(!Zeroable.isAllOnes() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
if (!isUndefUpperHalf(Mask))
@@ -13462,7 +13899,7 @@ static SDValue lowerShuffleAsElementInsertion(
if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
- if (EltVT == MVT::i8 || EltVT == MVT::i16) {
+ if (EltVT == MVT::i8 || (EltVT == MVT::i16 && !Subtarget.hasFP16())) {
// Using zext to expand a narrow element won't work for non-zero
// insertions.
if (!IsV1Zeroable)
@@ -13494,11 +13931,17 @@ static SDValue lowerShuffleAsElementInsertion(
if (!VT.is128BitVector())
return SDValue();
- // Otherwise, use MOVSD or MOVSS.
- assert((EltVT == MVT::f32 || EltVT == MVT::f64) &&
- "Only two types of floating point element types to handle!");
- return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
- ExtVT, V1, V2);
+ // Otherwise, use MOVSD, MOVSS or MOVSH.
+ unsigned MovOpc = 0;
+ if (EltVT == MVT::f16)
+ MovOpc = X86ISD::MOVSH;
+ else if (EltVT == MVT::f32)
+ MovOpc = X86ISD::MOVSS;
+ else if (EltVT == MVT::f64)
+ MovOpc = X86ISD::MOVSD;
+ else
+ llvm_unreachable("Unsupported floating point element type to handle!");
+ return DAG.getNode(MovOpc, DL, ExtVT, V1, V2);
}
// This lowering only works for the low element with floating point vectors.
@@ -15264,14 +15707,28 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false);
if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() &&
!Subtarget.hasVLX()) {
- SmallVector<SDValue, 8> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32));
- for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
- DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
- SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps);
- V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1),
- DWordClearMask);
- V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
- DWordClearMask);
+ // Check if this is part of a 256-bit vector truncation.
+ if (NumEvenDrops == 2 && Subtarget.hasAVX2() &&
+ peekThroughBitcasts(V1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ peekThroughBitcasts(V2).getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue V1V2 = concatSubVectors(V1, V2, DAG, DL);
+ V1V2 = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1V2,
+ getZeroVector(MVT::v16i16, Subtarget, DAG, DL),
+ DAG.getTargetConstant(0xEE, DL, MVT::i8));
+ V1V2 = DAG.getBitcast(MVT::v8i32, V1V2);
+ V1 = extract128BitVector(V1V2, 0, DAG, DL);
+ V2 = extract128BitVector(V1V2, 4, DAG, DL);
+ } else {
+ SmallVector<SDValue> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32));
+ for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
+ DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
+ SDValue DWordClearMask =
+ DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps);
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1),
+ DWordClearMask);
+ V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
+ DWordClearMask);
+ }
// Now pack things back together.
SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2);
if (NumEvenDrops == 2) {
@@ -15300,6 +15757,33 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Mask, Subtarget, DAG);
}
+/// Lower 8-lane 16-bit floating point shuffles.
+static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert(V1.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; });
+
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+ }
+ if (NumV2Elements == 1 && Mask[0] >= 8)
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8f16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return V;
+
+ V1 = DAG.getBitcast(MVT::v8i16, V1);
+ V2 = DAG.getBitcast(MVT::v8i16, V2);
+ return DAG.getBitcast(MVT::v8f16,
+ DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask));
+}
+
// Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets,
// sub-512-bit shuffles are padded to 512-bits for the shuffle and then
// the active subvector is extracted.
@@ -15705,6 +16189,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i16:
return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ case MVT::v8f16:
+ return lowerV8F16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i8:
return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
@@ -16083,22 +16569,13 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1);
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
- MayFoldLoad(peekThroughOneUseBitcasts(V1))) {
+ X86::mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
+ MVT MemVT = VT.getHalfNumVectorElementsVT();
+ unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
- if (!Ld->isNonTemporal()) {
- MVT MemVT = VT.getHalfNumVectorElementsVT();
- unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
- TypeSize::Fixed(Ofs), DL);
- SDValue Ops[] = {Ld->getChain(), Ptr};
- SDValue BcastLd = DAG.getMemIntrinsicNode(
- X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT,
- DAG.getMachineFunction().getMachineMemOperand(
- Ld->getMemOperand(), Ofs, MemVT.getStoreSize()));
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
- return BcastLd;
- }
+ if (SDValue BcstLd = getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL,
+ VT, MemVT, Ld, Ofs, DAG))
+ return BcstLd;
}
// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
@@ -17569,6 +18046,13 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
}
+ if (VT == MVT::v16f16) {
+ V1 = DAG.getBitcast(MVT::v16i16, V1);
+ V2 = DAG.getBitcast(MVT::v16i16, V2);
+ return DAG.getBitcast(MVT::v16f16,
+ DAG.getVectorShuffle(MVT::v16i16, DL, V1, V2, Mask));
+ }
+
switch (VT.SimpleTy) {
case MVT::v4f64:
return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
@@ -18135,6 +18619,13 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
}
+ if (VT == MVT::v32f16) {
+ V1 = DAG.getBitcast(MVT::v32i16, V1);
+ V2 = DAG.getBitcast(MVT::v32i16, V2);
+ return DAG.getBitcast(MVT::v32f16,
+ DAG.getVectorShuffle(MVT::v32i16, DL, V1, V2, Mask));
+ }
+
// Dispatch to each element type for lowering. If we don't have support for
// specific element type shuffles at 512 bits, immediately split them and
// lower them. Each lowering routine of a given type is allowed to assume that
@@ -18431,7 +18922,13 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
return false;
}
-/// Top-level lowering for x86 vector shuffles.
+// Forward declaration.
+static SDValue canonicalizeShuffleMaskWithHorizOp(
+ MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
+ unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget);
+
+ /// Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
/// vector shuffles. Most of the specific lowering strategies are encapsulated
@@ -18489,7 +18986,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
APInt Zeroable = KnownUndef | KnownZero;
- if (Zeroable.isAllOnesValue())
+ if (Zeroable.isAllOnes())
return getZeroVector(VT, Subtarget, DAG, DL);
bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());
@@ -18540,8 +19037,22 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ SmallVector<SDValue> Ops = {V1, V2};
+ SmallVector<int> Mask(OrigMask.begin(), OrigMask.end());
+
+ // Canonicalize the shuffle with any horizontal ops inputs.
+ // NOTE: This may update Ops and Mask.
+ if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
+ Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
+ return DAG.getBitcast(VT, HOp);
+
+ V1 = DAG.getBitcast(VT, Ops[0]);
+ V2 = DAG.getBitcast(VT, Ops[1]);
+ assert(NumElements == (int)Mask.size() &&
+ "canonicalizeShuffleMaskWithHorizOp "
+ "shouldn't alter the shuffle mask size");
+
// Commute the shuffle if it will improve canonicalization.
- SmallVector<int, 64> Mask(OrigMask.begin(), OrigMask.end());
if (canonicalizeShuffleMaskWithCommute(Mask)) {
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(V1, V2);
@@ -18686,8 +19197,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
if (VT.getSizeInBits() == 8) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
// we're going to zero extend the register or fold the store.
- if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) &&
- !MayFoldIntoStore(Op))
+ if (llvm::isNullConstant(Idx) && !X86::mayFoldIntoZeroExtend(Op) &&
+ !X86::mayFoldIntoStore(Op))
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
@@ -18840,14 +19351,18 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT VT = Op.getSimpleValueType();
- if (VT.getSizeInBits() == 16) {
+ if (VT == MVT::i16) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
// we're going to zero extend the register or fold the store (SSE41 only).
- if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
- !(Subtarget.hasSSE41() && MayFoldIntoStore(Op)))
+ if (IdxVal == 0 && !X86::mayFoldIntoZeroExtend(Op) &&
+ !(Subtarget.hasSSE41() && X86::mayFoldIntoStore(Op))) {
+ if (Subtarget.hasFP16())
+ return Op;
+
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
+ }
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
@@ -18886,12 +19401,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
}
- if (VT.getSizeInBits() == 32) {
+ if (VT == MVT::f16 || VT.getSizeInBits() == 32) {
if (IdxVal == 0)
return Op;
- // SHUFPS the element to the lowest double word, then movss.
- int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 };
+ // Shuffle the element to the lowest element, then movss or movsh.
+ SmallVector<int, 8> Mask(VecVT.getVectorNumElements(), -1);
+ Mask[0] = static_cast<int>(IdxVal);
Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0, dl));
@@ -18994,17 +19510,28 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
bool IsZeroElt = X86::isZeroNode(N1);
bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
- // If we are inserting a element, see if we can do this more efficiently with
- // a blend shuffle with a rematerializable vector than a costly integer
- // insertion.
- if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() &&
- (16 <= EltSizeInBits || (IsZeroElt && !VT.is128BitVector()))) {
- SmallVector<int, 8> BlendMask;
- for (unsigned i = 0; i != NumElts; ++i)
- BlendMask.push_back(i == IdxVal ? i + NumElts : i);
- SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
- : getOnesVector(VT, DAG, dl);
- return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
+ if (IsZeroElt || IsAllOnesElt) {
+ // Lower insertion of i8 -1 as an 'OR' blend.
+ // We don't deal with i8 0 since it appears to be handled elsewhere.
+ if (IsAllOnesElt && EltSizeInBits == 8 && !Subtarget.hasSSE41()) {
+ SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());
+ SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());
+ SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst);
+ CstVectorElts[IdxVal] = OnesCst;
+ SDValue CstVector = DAG.getBuildVector(VT, dl, CstVectorElts);
+ return DAG.getNode(ISD::OR, dl, VT, N0, CstVector);
+ }
+ // See if we can do this more efficiently with a blend shuffle with a
+ // rematerializable vector.
+ if (Subtarget.hasSSE41() &&
+ (EltSizeInBits >= 16 || (IsZeroElt && !VT.is128BitVector()))) {
+ SmallVector<int, 8> BlendMask;
+ for (unsigned i = 0; i != NumElts; ++i)
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
+ : getOnesVector(VT, DAG, dl);
+ return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
+ }
}
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
@@ -19024,12 +19551,28 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
}
+ unsigned NumEltsIn128 = 128 / EltSizeInBits;
+ assert(isPowerOf2_32(NumEltsIn128) &&
+ "Vectors will always have power-of-two number of elements.");
+
+ // If we are not inserting into the low 128-bit vector chunk,
+ // then prefer the broadcast+blend sequence.
+ // FIXME: relax the profitability check iff all N1 uses are insertions.
+ if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 &&
+ ((Subtarget.hasAVX2() && EltSizeInBits != 8) ||
+ (Subtarget.hasAVX() && (EltSizeInBits >= 32) &&
+ X86::mayFoldLoad(N1, Subtarget)))) {
+ SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);
+ SmallVector<int, 8> BlendMask;
+ for (unsigned i = 0; i != NumElts; ++i)
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ return DAG.getVectorShuffle(VT, dl, N0, N1SplatVec, BlendMask);
+ }
+
// Get the desired 128-bit vector chunk.
SDValue V = extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired chunk.
- unsigned NumEltsIn128 = 128 / EltSizeInBits;
- assert(isPowerOf2_32(NumEltsIn128));
// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
@@ -19041,10 +19584,10 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- // This will be just movd/movq/movss/movsd.
+ // This will be just movw/movd/movq/movsh/movss/movsd.
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) {
if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
- EltVT == MVT::i64) {
+ EltVT == MVT::f16 || EltVT == MVT::i64) {
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
}
@@ -19091,7 +19634,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// combine either bitwise AND or insert of float 0.0 to set these bits.
bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
- if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
+ if (IdxVal == 0 && (!MinSize || !X86::mayFoldLoad(N1, Subtarget))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
// than an insertps. Blends are simpler operations in hardware and so
@@ -19143,8 +19686,9 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 &&
"Expected an SSE type!");
- // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
- if (OpVT == MVT::v4i32)
+ // Pass through a v4i32 or V8i16 SCALAR_TO_VECTOR as that's what we use in
+ // tblgen.
+ if (OpVT == MVT::v4i32 || (OpVT == MVT::v8i16 && Subtarget.hasFP16()))
return Op;
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
@@ -19207,7 +19751,7 @@ unsigned X86TargetLowering::getGlobalWrapperKind(
return X86ISD::WrapperRIP;
// GOTPCREL references must always use RIP.
- if (OpFlags == X86II::MO_GOTPCREL)
+ if (OpFlags == X86II::MO_GOTPCREL || OpFlags == X86II::MO_GOTPCREL_NORELAX)
return X86ISD::WrapperRIP;
return X86ISD::Wrapper;
@@ -19682,92 +20226,6 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
-static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
- "Unexpected funnel shift opcode!");
-
- SDLoc DL(Op);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Amt = Op.getOperand(2);
-
- bool IsFSHR = Op.getOpcode() == ISD::FSHR;
-
- if (VT.isVector()) {
- assert(Subtarget.hasVBMI2() && "Expected VBMI2");
-
- if (IsFSHR)
- std::swap(Op0, Op1);
-
- // With AVX512, but not VLX we need to widen to get a 512-bit result type.
- if (!Subtarget.hasVLX() && !VT.is512BitVector()) {
- Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
- Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512);
- }
-
- SDValue Funnel;
- APInt APIntShiftAmt;
- MVT ResultVT = Op0.getSimpleValueType();
- if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
- uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
- Funnel =
- DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0,
- Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
- } else {
- if (!Subtarget.hasVLX() && !VT.is512BitVector())
- Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512);
- Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL,
- ResultVT, Op0, Op1, Amt);
- }
- if (!Subtarget.hasVLX() && !VT.is512BitVector())
- Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits());
- return Funnel;
- }
- assert(
- (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
- "Unexpected funnel shift type!");
-
- // Expand slow SHLD/SHRD cases if we are not optimizing for size.
- bool OptForSize = DAG.shouldOptForSize();
- bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
-
- // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.
- // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).
- if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
- !isa<ConstantSDNode>(Amt)) {
- unsigned EltSizeInBits = VT.getScalarSizeInBits();
- SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
- SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
- Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32);
- Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32);
- Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask);
- SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
- Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1);
- if (IsFSHR) {
- Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt);
- } else {
- Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt);
- Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift);
- }
- return DAG.getZExtOrTrunc(Res, DL, VT);
- }
-
- if (VT == MVT::i8 || ExpandFunnel)
- return SDValue();
-
- // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
- if (VT == MVT::i16) {
- Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
- DAG.getConstant(15, DL, Amt.getValueType()));
- unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
- return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
- }
-
- return Op;
-}
-
// Try to use a packed vector operation to handle i64 on 32-bit targets when
// AVX512DQ is enabled.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
@@ -19811,6 +20269,43 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, dl));
}
+// Try to use a packed vector operation to handle i64 on 32-bit targets.
+static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Op.getOpcode() == ISD::UINT_TO_FP) &&
+ "Unexpected opcode!");
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
+
+ if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
+ return SDValue();
+
+ // Pack the i64 into a vector, do the operation and extract.
+
+ assert(Subtarget.hasFP16() && "Expected FP16");
+
+ SDLoc dl(Op);
+ SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+ if (IsStrict) {
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
+ {Op.getOperand(0), InVec});
+ SDValue Chain = CvtVec.getValue(1);
+ SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Value, Chain}, dl);
+ }
+
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+}
+
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
const X86Subtarget &Subtarget) {
switch (Opcode) {
@@ -20024,6 +20519,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
+ return LowerWin64_INT128_TO_FP(Op, DAG);
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
@@ -20063,6 +20561,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
+ if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+ return V;
// SSE doesn't have an i16 conversion so we need to promote.
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
@@ -20521,6 +21021,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
+ if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
+ return LowerWin64_INT128_TO_FP(Op, DAG);
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
@@ -20542,6 +21045,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
+ if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+ return V;
// The transform for i64->f64 isn't correct for 0 when rounding to negative
// infinity. It produces -0.0, so disable under strictfp.
@@ -21323,9 +21828,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
MVT VT = Op->getSimpleValueType(0);
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
+ SDValue Res;
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
@@ -21350,10 +21857,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
}
- SDValue Res, Chain;
if (IsStrict) {
- Res =
- DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
+ Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(Opc, dl, ResVT, Src);
@@ -21367,6 +21872,67 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+ if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) {
+ if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
+ return Op;
+
+ MVT ResVT = VT;
+ MVT EleVT = VT.getVectorElementType();
+ if (EleVT != MVT::i64)
+ ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+ if (SrcVT != MVT::v8f16) {
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+ SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+ }
+
+ if (IsStrict) {
+ Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI
+ : X86ISD::STRICT_CVTTP2UI,
+ dl, {ResVT, MVT::Other}, {Chain, Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl,
+ ResVT, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ if (EleVT.getSizeInBits() < 16) {
+ ResVT = MVT::getVectorVT(EleVT, 8);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res);
+ }
+
+ if (ResVT != VT)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
+ if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+ if (IsStrict) {
+ Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
+ : ISD::STRICT_FP_TO_UINT,
+ dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
+ MVT::v8i32, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
// v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
assert(!IsSigned && "Expected unsigned conversion!");
@@ -21390,10 +21956,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Res, Chain;
if (IsStrict) {
Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
- {Op->getOperand(0), Src});
+ {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
@@ -21421,10 +21986,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Res, Chain;
if (IsStrict) {
Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
- {Op->getOperand(0), Src});
+ {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
@@ -21449,7 +22013,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
{Src, Zero, Zero, Zero});
Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
- {Op->getOperand(0), Tmp});
+ {Chain, Tmp});
SDValue Chain = Tmp.getValue(1);
Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
DAG.getIntPtrConstant(0, dl));
@@ -21532,17 +22096,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// FIXME: This does not generate an invalid exception if the input does not
// fit in i32. PR44019
if (Subtarget.is64Bit()) {
- SDValue Res, Chain;
if (IsStrict) {
- Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other},
- { Op.getOperand(0), Src });
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other},
+ {Chain, Src});
Chain = Res.getValue(1);
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
- return DAG.getMergeValues({ Res, Chain }, dl);
+ return DAG.getMergeValues({Res, Chain}, dl);
return Res;
}
@@ -21557,17 +22120,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// fit in i16. PR44019
if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
- SDValue Res, Chain;
if (IsStrict) {
- Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other},
- { Op.getOperand(0), Src });
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other},
+ {Chain, Src});
Chain = Res.getValue(1);
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
- return DAG.getMergeValues({ Res, Chain }, dl);
+ return DAG.getMergeValues({Res, Chain}, dl);
return Res;
}
@@ -21583,7 +22145,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(SrcVT, VT);
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
SDLoc(Op), Chain);
@@ -21595,7 +22156,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
// Fall back to X87.
- SDValue Chain;
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
if (IsStrict)
return DAG.getMergeValues({V, Chain}, dl);
@@ -21822,6 +22382,35 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::f128)
return SDValue();
+ if (VT == MVT::f80) {
+ if (SVT == MVT::f16) {
+ assert(Subtarget.hasFP16() && "Unexpected features!");
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+ MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ makeLibCall(DAG, LC, VT, In, CallOptions, DL,
+ IsStrict ? Op.getOperand(0) : SDValue());
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, DL);
+ else
+ return Tmp.first;
+ }
+ return Op;
+ }
+
+ if (SVT.getVectorElementType() == MVT::f16) {
+ assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
+ if (SVT == MVT::v2f16)
+ In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
+ DAG.getUNDEF(MVT::v2f16));
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
+ DAG.getUNDEF(MVT::v4f16));
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
+ {Op->getOperand(0), Res});
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
+ }
+
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
SDValue Res =
@@ -21835,8 +22424,11 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
- // It's legal except when f128 is involved
- if (In.getSimpleValueType() != MVT::f128)
+ MVT VT = Op.getSimpleValueType();
+ MVT SVT = In.getSimpleValueType();
+
+ // It's legal except when f128 is involved or we're converting f80->f16.
+ if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80))
return Op;
return SDValue();
@@ -22026,9 +22618,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
bool IsF128 = (VT == MVT::f128);
- assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
- VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
- VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+ assert(VT.isFloatingPoint() && VT != MVT::f80 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Unexpected type in LowerFABSorFNEG");
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
@@ -22042,7 +22633,9 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
bool IsFakeVector = !VT.isVector() && !IsF128;
MVT LogicVT = VT;
if (IsFakeVector)
- LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
+ LogicVT = (VT == MVT::f64) ? MVT::v2f64
+ : (VT == MVT::f32) ? MVT::v4f32
+ : MVT::v8f16;
unsigned EltBits = VT.getScalarSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
@@ -22087,9 +22680,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
bool IsF128 = (VT == MVT::f128);
- assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
- VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
- VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+ assert(VT.isFloatingPoint() && VT != MVT::f80 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Unexpected type in LowerFCOPYSIGN");
const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
@@ -22102,7 +22694,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
bool IsFakeVector = !VT.isVector() && !IsF128;
MVT LogicVT = VT;
if (IsFakeVector)
- LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
+ LogicVT = (VT == MVT::f64) ? MVT::v2f64
+ : (VT == MVT::f32) ? MVT::v4f32
+ : MVT::v8f16;
// The mask constants are automatically splatted for vector types.
unsigned EltSizeInBits = VT.getScalarSizeInBits();
@@ -22208,7 +22802,7 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType())
return false;
unsigned NumElts = VT.getVectorNumElements();
- APInt EltCount = APInt::getNullValue(NumElts);
+ APInt EltCount = APInt::getZero(NumElts);
M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first;
SrcOps.push_back(Src);
}
@@ -22227,7 +22821,7 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
} else {
// Quit if not all elements are used.
for (const auto &I : SrcOpMap)
- if (!I.second.isAllOnesValue())
+ if (!I.second.isAllOnes())
return false;
}
@@ -22250,7 +22844,7 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE);
auto MaskBits = [&](SDValue Src) {
- if (Mask.isAllOnesValue())
+ if (Mask.isAllOnes())
return Src;
EVT SrcVT = Src.getValueType();
SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT);
@@ -22288,8 +22882,8 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
// Without PTEST, a masked v2i64 or-reduction is not faster than
// scalarization.
- if (!Mask.isAllOnesValue() && VT.getScalarSizeInBits() > 32)
- return SDValue();
+ if (!Mask.isAllOnes() && VT.getScalarSizeInBits() > 32)
+ return SDValue();
V = DAG.getBitcast(MVT::v16i8, MaskBits(V));
V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V,
@@ -22312,7 +22906,7 @@ static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
// Check whether we're masking/truncating an OR-reduction result, in which
// case track the masked bits.
- APInt Mask = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt Mask = APInt::getAllOnes(Op.getScalarValueSizeInBits());
switch (Op.getOpcode()) {
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
@@ -22543,16 +23137,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// For equality comparisons try to use SIGN_EXTEND if the input was
// truncate from something with enough sign bits.
if (Op0.getOpcode() == ISD::TRUNCATE) {
- SDValue In = Op0.getOperand(0);
- unsigned EffBits =
- In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
- if (EffBits <= 16)
+ if (DAG.ComputeMinSignedBits(Op0.getOperand(0)) <= 16)
ExtendOp = ISD::SIGN_EXTEND;
} else if (Op1.getOpcode() == ISD::TRUNCATE) {
- SDValue In = Op1.getOperand(0);
- unsigned EffBits =
- In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
- if (EffBits <= 16)
+ if (DAG.ComputeMinSignedBits(Op1.getOperand(0)) <= 16)
ExtendOp = ISD::SIGN_EXTEND;
}
}
@@ -22618,6 +23206,7 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
@@ -22639,7 +23228,23 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
UseOneConstNR = false;
// There is no FSQRT for 512-bits, but there is RSQRT14.
unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT;
- return DAG.getNode(Opcode, SDLoc(Op), VT, Op);
+ return DAG.getNode(Opcode, DL, VT, Op);
+ }
+
+ if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
+ Subtarget.hasFP16()) {
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 0;
+
+ if (VT == MVT::f16) {
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ SDValue Undef = DAG.getUNDEF(MVT::v8f16);
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, Op);
+ Op = DAG.getNode(X86ISD::RSQRT14S, DL, MVT::v8f16, Undef, Op);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Op, Zero);
+ }
+
+ return DAG.getNode(X86ISD::RSQRT14, DL, VT, Op);
}
return SDValue();
}
@@ -22649,6 +23254,7 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
@@ -22673,7 +23279,23 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
// There is no FSQRT for 512-bits, but there is RCP14.
unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP;
- return DAG.getNode(Opcode, SDLoc(Op), VT, Op);
+ return DAG.getNode(Opcode, DL, VT, Op);
+ }
+
+ if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
+ Subtarget.hasFP16()) {
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 0;
+
+ if (VT == MVT::f16) {
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ SDValue Undef = DAG.getUNDEF(MVT::v8f16);
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, Op);
+ Op = DAG.getNode(X86ISD::RCP14S, DL, MVT::v8f16, Undef, Op);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Op, Zero);
+ }
+
+ return DAG.getNode(X86ISD::RCP14, DL, VT, Op);
}
return SDValue();
}
@@ -22696,7 +23318,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
- assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) &&
+ assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
"Unexpected divisor!");
// Only perform this transform if CMOV is supported otherwise the select
@@ -22956,7 +23578,7 @@ static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
// Avoid overflow/underflow.
const APInt &EltC = Elt->getAPIntValue();
- if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue()))
+ if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isZero()))
return SDValue();
NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
@@ -23037,7 +23659,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (isFP) {
#ifndef NDEBUG
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+ assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
@@ -23051,7 +23673,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&
(!IsStrict || Subtarget.hasVLX() ||
Op0.getSimpleValueType().is512BitVector())) {
- assert(VT.getVectorNumElements() <= 16);
+#ifndef NDEBUG
+ unsigned Num = VT.getVectorNumElements();
+ assert(Num <= 16 || (Num == 32 && EltVT == MVT::f16));
+#endif
Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP;
@@ -23272,7 +23897,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Cond = ISD::SETGT;
else if (ConstValue.isMaxSignedValue())
Cond = ISD::SETLT;
- else if (ConstValue.isNullValue() && DAG.SignBitIsZero(Op0))
+ else if (ConstValue.isZero() && DAG.SignBitIsZero(Op0))
Cond = ISD::SETGT;
}
@@ -23625,7 +24250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// TODO: Can we move this to TranslateX86CC to handle jumps/branches too?
if (auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) {
const APInt &Op1Val = Op1C->getAPIntValue();
- if (!Op1Val.isNullValue()) {
+ if (!Op1Val.isZero()) {
// Ensure the constant+1 doesn't overflow.
if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) ||
(CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) {
@@ -24053,8 +24678,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// being inserted between two CMOV's. (in i16 case too TBN)
// https://bugs.llvm.org/show_bug.cgi?id=40974
if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
- (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) &&
- !MayFoldLoad(Op2))) {
+ (Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) &&
+ !X86::mayFoldLoad(Op2, Subtarget))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
SDValue Ops[] = { Op2, Op1, CC, Cond };
@@ -24699,8 +25324,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
DAG.getRegister(Vreg, SPTy));
} else {
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size);
- MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true);
+ Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);
+ MF.getInfo<X86MachineFunctionInfo>()->setHasDynAlloca(true);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
Register SPReg = RegInfo->getStackRegister();
@@ -24814,7 +25439,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
}
if (ArgMode == 2) {
- // Sanity Check: Make sure using fp_offset makes sense.
+ // Make sure using fp_offset makes sense.
assert(!Subtarget.useSoftFloat() &&
!(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) &&
Subtarget.hasSSE1());
@@ -25554,6 +26179,35 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Swap Src1 and Src2 in the node creation
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
+ case CFMA_OP_MASKZ:
+ case CFMA_OP_MASK: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ MVT VT = Op.getSimpleValueType();
+
+ SDValue PassThru = Src3;
+ if (IntrData->Type == CFMA_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
+ // We add rounding mode to the Node when
+ // - RC Opcode is specified and
+ // - RC is not "current direction".
+ SDValue NewOp;
+ if (IntrData->Opc1 != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
+ NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Src3,
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ else if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
+ }
+ if (!NewOp)
+ NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
+ return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
+ }
case IFMA_OP:
// NOTE: We need to swizzle the operands to pass the multiply operands
// first.
@@ -26165,6 +26819,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
getPointerTy(DAG.getDataLayout())),
Op.getOperand(1), ShAmt);
}
+ case Intrinsic::thread_pointer: {
+ if (Subtarget.isTargetELF()) {
+ SDLoc dl(Op);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(
+ *DAG.getContext(), Subtarget.is64Bit() ? X86AS::FS : X86AS::GS));
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr));
+ }
+ report_fatal_error(
+ "Target OS doesn't support __builtin_thread_pointer() yet.");
+ }
}
}
@@ -26469,6 +27136,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
DAG.getConstant(0, dl, MVT::i32),
DAG.getConstant(0, dl, MVT::i32));
}
+ case llvm::Intrinsic::asan_check_memaccess: {
+ // Mark this as adjustsStack because it will be lowered to a call.
+ DAG.getMachineFunction().getFrameInfo().setAdjustsStack(true);
+ // Don't do anything here, we will expand these intrinsics out later.
+ return Op;
+ }
case llvm::Intrinsic::x86_flags_read_u32:
case llvm::Intrinsic::x86_flags_read_u64:
case llvm::Intrinsic::x86_flags_write_u32:
@@ -27044,11 +27717,11 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
- unsigned Idx = 1;
+ unsigned Idx = 0;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
+ if (Attrs.hasParamAttr(Idx, Attribute::InReg)) {
const DataLayout &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
@@ -27517,15 +28190,51 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
EVT SetCCResultType =
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
- // usubsat X, Y --> (X >u Y) ? X - Y : 0
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
- SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
- // TODO: Move this to DAGCombiner?
- if (SetCCResultType == VT &&
- DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
- return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
- return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ if (Opcode == ISD::USUBSAT) {
+ if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {
+ // Handle a special-case with a bit-hack instead of cmp+select:
+ // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
+ // If the target can use VPTERNLOG, DAGToDAG will match this as
+ // "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
+ // "broadcast" constant load.
+ ConstantSDNode *C = isConstOrConstSplat(Y, true);
+ if (C && C->getAPIntValue().isSignMask()) {
+ SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
+ SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
+ }
+ }
+ if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
+ // usubsat X, Y --> (X >u Y) ? X - Y : 0
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
+ SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
+ // TODO: Move this to DAGCombiner?
+ if (SetCCResultType == VT &&
+ DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
+ return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
+ return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+ }
+ }
+
+ if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
+ (!VT.isVector() || VT == MVT::v2i64)) {
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Result =
+ DAG.getNode(Opcode == ISD::SADDSAT ? ISD::SADDO : ISD::SSUBO, DL,
+ DAG.getVTList(VT, SetCCResultType), X, Y);
+ SDValue SumDiff = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue SatMin = DAG.getConstant(MinVal, DL, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, DL, VT);
+ SDValue SumNeg =
+ DAG.getSetCC(DL, SetCCResultType, SumDiff, Zero, ISD::SETLT);
+ Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(DL, VT, Overflow, Result, SumDiff);
}
// Use default expansion.
@@ -27542,7 +28251,7 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
SDValue N0 = Op.getOperand(0);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, DL, VT), N0);
- SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8),
+ SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_NS, DL, MVT::i8),
SDValue(Neg.getNode(), 1)};
return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
}
@@ -27646,9 +28355,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Multiply, mask the lower 8bits of the lo/hi results and pack.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
- RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
- RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT));
- return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+ return getPack(DAG, Subtarget, dl, VT, RLo, RHi);
}
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
@@ -27801,19 +28508,10 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi);
- if (Low) {
- // Mask the lower bits and pack the results to rejoin the halves.
- SDValue Mask = DAG.getConstant(255, dl, ExVT);
- SDValue LLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, Mask);
- SDValue LHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, Mask);
- *Low = DAG.getNode(X86ISD::PACKUS, dl, VT, LLo, LHi);
- }
-
- RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG);
- RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG);
+ if (Low)
+ *Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi);
- // Bitcast back to VT and then pack all the even elements from Lo and Hi.
- return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+ return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true);
}
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
@@ -28111,9 +28809,80 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
return DAG.getBitcast(VT, CallInfo.first);
}
+SDValue X86TargetLowering::LowerWin64_FP_TO_INT128(SDValue Op,
+ SelectionDAG &DAG,
+ SDValue &Chain) const {
+ assert(Subtarget.isTargetWin64() && "Unexpected target");
+ EVT VT = Op.getValueType();
+ bool IsStrict = Op->isStrictFPOpcode();
+
+ SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
+ EVT ArgVT = Arg.getValueType();
+
+ assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
+ "Unexpected return type for lowering");
+
+ RTLIB::Libcall LC;
+ if (Op->getOpcode() == ISD::FP_TO_SINT ||
+ Op->getOpcode() == ISD::STRICT_FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(ArgVT, VT);
+ else
+ LC = RTLIB::getFPTOUINT(ArgVT, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
+
+ SDLoc dl(Op);
+ MakeLibCallOptions CallOptions;
+ Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
+ SDValue Result;
+ // Expect the i128 argument returned as a v2i64 in xmm0, cast back to the
+ // expected VT (i128).
+ std::tie(Result, Chain) =
+ makeLibCall(DAG, LC, MVT::v2i64, Arg, CallOptions, dl, Chain);
+ Result = DAG.getBitcast(VT, Result);
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget.isTargetWin64() && "Unexpected target");
+ EVT VT = Op.getValueType();
+ bool IsStrict = Op->isStrictFPOpcode();
+
+ SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
+ EVT ArgVT = Arg.getValueType();
+
+ assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
+ "Unexpected argument type for lowering");
+
+ RTLIB::Libcall LC;
+ if (Op->getOpcode() == ISD::SINT_TO_FP ||
+ Op->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(ArgVT, VT);
+ else
+ LC = RTLIB::getUINTTOFP(ArgVT, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
+
+ SDLoc dl(Op);
+ MakeLibCallOptions CallOptions;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
+ // Pass the i128 argument as an indirect argument on the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain = DAG.getStore(Chain, dl, Arg, StackPtr, MPI, Align(16));
+
+ SDValue Result;
+ std::tie(Result, Chain) =
+ makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain);
+ return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
+}
+
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
-static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
+static bool supportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (VT.getScalarSizeInBits() < 16)
return false;
@@ -28133,14 +28902,14 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
static
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
+bool supportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
- return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
+ return supportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
-static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
+static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)
@@ -28216,7 +28985,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
- if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+ if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
// i64 SRA needs to be performed as partial shifts.
@@ -28231,8 +29000,15 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
// Simple i8 add case
- if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+ if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) {
+ // R may be undef at run-time, but (shl R, 1) must be an even number (LSB
+ // must be 0). (add undef, undef) however can be any value. To make this
+ // safe, we must freeze R to ensure that register allocation uses the same
+ // register for an undefined value. This ensures that the result will
+ // still be even and preserves the original semantics.
+ R = DAG.getNode(ISD::FREEZE, dl, VT, R);
return DAG.getNode(ISD::ADD, dl, VT, R, R);
+ }
// ashr(R, 7) === cmp_slt(R, 0)
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
@@ -28293,7 +29069,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
+ if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
@@ -28311,7 +29087,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
!Subtarget.hasXOP()) {
unsigned NumElts = VT.getVectorNumElements();
MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
+ if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
@@ -28363,7 +29139,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
+ if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
}
return SDValue();
@@ -28376,8 +29152,10 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
MVT VT = Amt.getSimpleValueType();
if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget.hasInt256() && VT == MVT::v16i16) ||
- (Subtarget.hasVBMI2() && VT == MVT::v32i16) ||
- (!Subtarget.hasAVX512() && VT == MVT::v16i8)))
+ (Subtarget.hasAVX512() && VT == MVT::v32i16) ||
+ (!Subtarget.hasAVX512() && VT == MVT::v16i8) ||
+ (Subtarget.hasInt256() && VT == MVT::v32i8) ||
+ (Subtarget.hasBWI() && VT == MVT::v64i8)))
return SDValue();
if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
@@ -28425,10 +29203,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG);
if (Subtarget.hasSSE41())
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
-
- return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo),
- DAG.getBitcast(VT, Hi),
- {0, 2, 4, 6, 8, 10, 12, 14});
+ return getPack(DAG, Subtarget, dl, VT, Lo, Hi);
}
return SDValue();
@@ -28456,9 +29231,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
- if (SupportedVectorVarShift(VT, Subtarget, Opc))
+ if (supportedVectorVarShift(VT, Subtarget, Opc))
return Op;
+ // i64 vector arithmetic shift can be emulated with the transform:
+ // M = lshr(SIGN_MASK, Amt)
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
+ if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) ||
+ (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
+ Opc == ISD::SRA) {
+ SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
+ SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
+ R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ R = DAG.getNode(ISD::XOR, dl, VT, R, M);
+ R = DAG.getNode(ISD::SUB, dl, VT, R, M);
+ return R;
+ }
+
// XOP has 128-bit variable logical/arithmetic shifts.
// +ve/-ve Amt = shift left/right.
if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
@@ -28484,19 +29273,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
- // i64 vector arithmetic shift can be emulated with the transform:
- // M = lshr(SIGN_MASK, Amt)
- // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
- if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
- Opc == ISD::SRA) {
- SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
- SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
- R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
- R = DAG.getNode(ISD::XOR, dl, VT, R, M);
- R = DAG.getNode(ISD::SUB, dl, VT, R, M);
- return R;
- }
-
// If possible, lower this shift as a sequence of two shifts by
// constant plus a BLENDing shuffle instead of scalarizing it.
// Example:
@@ -28552,7 +29328,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
- if (Opc == ISD::SHL)
+ // For v32i8 cases, it might be quicker to split/extend to vXi16 shifts.
+ if (Opc == ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() ||
+ Subtarget.canExtendTo512BW())))
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
@@ -28920,6 +29698,77 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
+static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
+ "Unexpected funnel shift opcode!");
+
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsFSHR = Op.getOpcode() == ISD::FSHR;
+
+ if (VT.isVector()) {
+ assert(Subtarget.hasVBMI2() && "Expected VBMI2");
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ APInt APIntShiftAmt;
+ if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
+ uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
+ SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
+ return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
+ {Op0, Op1, Imm}, DAG, Subtarget);
+ }
+ return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ {Op0, Op1, Amt}, DAG, Subtarget);
+ }
+ assert(
+ (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ "Unexpected funnel shift type!");
+
+ // Expand slow SHLD/SHRD cases if we are not optimizing for size.
+ bool OptForSize = DAG.shouldOptForSize();
+ bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
+
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).
+ if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
+ !isa<ConstantSDNode>(Amt)) {
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
+ SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
+ Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32);
+ Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32);
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask);
+ SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1);
+ if (IsFSHR) {
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt);
+ } else {
+ Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift);
+ }
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+
+ if (VT == MVT::i8 || ExpandFunnel)
+ return SDValue();
+
+ // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
+ if (VT == MVT::i16) {
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
+ DAG.getConstant(15, DL, Amt.getValueType()));
+ unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
+ return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
+ }
+
+ return Op;
+}
+
static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
@@ -28931,6 +29780,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
int NumElts = VT.getVectorNumElements();
+ bool IsROTL = Opcode == ISD::ROTL;
// Check for constant splat rotation amount.
APInt CstSplatValue;
@@ -28944,7 +29794,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
if (IsCstSplat) {
- unsigned RotOpc = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+ unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;
uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
return DAG.getNode(RotOpc, DL, VT, R,
DAG.getTargetConstant(RotAmt, DL, MVT::i8));
@@ -28956,11 +29806,11 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// AVX512 VBMI2 vXi16 - lower to funnel shifts.
if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
- unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR);
+ unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
}
- assert((Opcode == ISD::ROTL) && "Only ROTL supported");
+ assert(IsROTL && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
@@ -28996,16 +29846,41 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
bool IsSplatAmt = DAG.isSplatValue(Amt);
+ SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
// v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by
// the amount bit.
- if (EltSizeInBits == 8 && !IsSplatAmt) {
+ if (EltSizeInBits == 8) {
if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()))
return SDValue();
- // We don't need ModuloAmt here as we just peek at individual bits.
+ // Check for a hidden ISD::ROTR, vXi8 lowering can handle both, but we
+ // currently hit infinite loops in legalization if we allow ISD::ROTR.
+ // FIXME: Infinite ROTL<->ROTR legalization in TargetLowering::expandROT.
+ SDValue HiddenROTRAmt;
+ if (Amt.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(Amt.getOperand(0).getNode()))
+ HiddenROTRAmt = Amt.getOperand(1);
+
MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ // If the amount is a splat, attempt to fold as unpack(x,x) << zext(y):
+ // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw.
+ // rotr(x,y) -> (((aext(x) << bw) | zext(x)) >> (y & (bw-1))).
+ if (SDValue BaseRotAmt = DAG.getSplatValue(DAG.getNode(
+ ISD::AND, DL, VT, HiddenROTRAmt ? HiddenROTRAmt : Amt, AmtMask))) {
+ unsigned ShiftX86Opc = HiddenROTRAmt ? X86ISD::VSRLI : X86ISD::VSHLI;
+ BaseRotAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseRotAmt);
+ SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
+ SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
+ Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, BaseRotAmt,
+ Subtarget, DAG);
+ Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt,
+ Subtarget, DAG);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt);
+ }
+
+ // We don't need ModuloAmt here as we just peek at individual bits.
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
// On SSE41 targets we can use PBLENDVB which selects bytes based just
@@ -29024,6 +29899,15 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getSelect(DL, SelVT, C, V0, V1);
};
+ // 'Hidden' ROTR is currently only profitable on AVX512 targets where we
+ // have VPTERNLOG.
+ unsigned ShiftLHS = ISD::SHL;
+ unsigned ShiftRHS = ISD::SRL;
+ if (HiddenROTRAmt && useVPTERNLOG(Subtarget, VT)) {
+ std::swap(ShiftLHS, ShiftRHS);
+ Amt = HiddenROTRAmt;
+ }
+
// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
@@ -29035,8 +29919,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SDValue M;
M = DAG.getNode(
ISD::OR, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(4, DL, VT)),
- DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(4, DL, VT)));
+ DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(4, DL, VT)),
+ DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(4, DL, VT)));
R = SignBitSelect(VT, Amt, M, R);
// a += a
@@ -29045,8 +29929,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// r = VSELECT(r, rot(r, 2), a);
M = DAG.getNode(
ISD::OR, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(2, DL, VT)),
- DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(6, DL, VT)));
+ DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(2, DL, VT)),
+ DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(6, DL, VT)));
R = SignBitSelect(VT, Amt, M, R);
// a += a
@@ -29055,8 +29939,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// return VSELECT(r, rot(r, 1), a);
M = DAG.getNode(
ISD::OR, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(1, DL, VT)),
- DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(7, DL, VT)));
+ DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(1, DL, VT)),
+ DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(7, DL, VT)));
return SignBitSelect(VT, Amt, M, R);
}
@@ -29065,18 +29949,16 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// If the amount is a splat, perform the modulo BEFORE the splat,
// this helps LowerScalarVariableShift to remove the splat later.
Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt);
- Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
- DAG.getConstant(EltSizeInBits - 1, DL, VT));
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT),
SmallVector<int>(NumElts, 0));
} else {
- Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
- DAG.getConstant(EltSizeInBits - 1, DL, VT));
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
}
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
- bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
- SupportedVectorVarShift(VT, Subtarget, ISD::SRL);
+ bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
+ supportedVectorVarShift(VT, Subtarget, ISD::SRL);
// Fallback for splats + all supported variable shifts.
// Fallback for non-constants AVX2 vXi16 as well.
@@ -29088,9 +29970,11 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
}
- // As with shifts, convert the rotation amount to a multiplication factor.
+ // As with shifts, attempt to convert the rotation amount to a multiplication
+ // factor, fallback to general expansion.
SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG);
- assert(Scale && "Failed to convert ROTL amount to scale");
+ if (!Scale)
+ return SDValue();
// v8i16/v16i16: perform unsigned multiply hi/lo and OR the results.
if (EltSizeInBits == 16) {
@@ -29803,6 +30687,10 @@ static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
}
+ // If we have POPCNT, use the default expansion.
+ if (Subtarget.hasPOPCNT())
+ return SDValue();
+
if (VT == MVT::i64) {
// Xor the high and low 16-bits together using a 32-bit operation.
SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
@@ -30358,6 +31246,10 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
Mask = ExtendToType(Mask, MaskVT, DAG, true);
}
+ // Break dependency on the data register.
+ if (PassThru.isUndef())
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
N->getScale() };
SDValue NewGather = DAG.getMemIntrinsicNode(
@@ -30886,6 +31778,51 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
+ if (VT.isVector() && Subtarget.hasFP16() &&
+ SrcVT.getVectorElementType() == MVT::f16) {
+ EVT EleVT = VT.getVectorElementType();
+ EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+ if (SrcVT != MVT::v8f16) {
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+ SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+ }
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ Res =
+ DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ Res = DAG.getNode(Opc, dl, ResVT, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ if (EleVT.getSizeInBits() < 16) {
+ MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res);
+
+ // Now widen to 128 bits.
+ unsigned NumConcats = 128 / TmpVT.getSizeInBits();
+ MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats);
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+ }
+
+ Results.push_back(Res);
+ if (IsStrict)
+ Results.push_back(Chain);
+
+ return;
+ }
+
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
"Unexpected type action!");
@@ -31001,8 +31938,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(!VT.isVector() && "Vectors should have been handled above!");
- if (Subtarget.hasDQI() && VT == MVT::i64 &&
- (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
+ if ((Subtarget.hasDQI() && VT == MVT::i64 &&
+ (SrcVT == MVT::f32 || SrcVT == MVT::f64)) ||
+ (Subtarget.hasFP16() && SrcVT == MVT::f16)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
// If we use a 128-bit result we might need to use a target specific node.
@@ -31036,6 +31974,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
+ if (VT == MVT::i128 && Subtarget.isTargetWin64()) {
+ SDValue Chain;
+ SDValue V = LowerWin64_FP_TO_INT128(SDValue(N, 0), DAG, Chain);
+ Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
+
SDValue Chain;
if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) {
Results.push_back(V);
@@ -31059,9 +32006,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
N->getOpcode() == ISD::STRICT_SINT_TO_FP;
EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&
+ Subtarget.hasVLX()) {
+ if (Src.getValueType().getVectorElementType() == MVT::i16)
+ return;
+
+ if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ IsStrict ? DAG.getConstant(0, dl, MVT::v2i32)
+ : DAG.getUNDEF(MVT::v2i32));
+ if (IsStrict) {
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
+ SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
+ Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src));
+ }
+ return;
+ }
if (VT != MVT::v2f32)
return;
- SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
if (IsStrict) {
@@ -31162,14 +32131,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::FP_ROUND: {
bool IsStrict = N->isStrictFPOpcode();
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ EVT VT = N->getValueType(0);
+ EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
+ if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
+ SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
+ : DAG.getUNDEF(MVT::v2f32);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
+ }
if (!isTypeLegal(Src.getValueType()))
return;
SDValue V;
if (IsStrict)
- V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
- {N->getOperand(0), N->getOperand(1)});
+ V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
+ {N->getOperand(0), Src});
else
- V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
Results.push_back(V);
if (IsStrict)
Results.push_back(V.getValue(1));
@@ -31181,6 +32157,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// No other ValueType for FP_EXTEND should reach this point.
assert(N->getValueType(0) == MVT::v2f32 &&
"Do not know how to legalize this Node");
+ if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
+ return;
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
+ : DAG.getUNDEF(MVT::v2f16);
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
+ if (IsStrict)
+ V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), V});
+ else
+ V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V);
+ Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(V.getValue(1));
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -31656,6 +32647,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MOVSLDUP)
NODE_NAME_CASE(MOVSD)
NODE_NAME_CASE(MOVSS)
+ NODE_NAME_CASE(MOVSH)
NODE_NAME_CASE(UNPCKL)
NODE_NAME_CASE(UNPCKH)
NODE_NAME_CASE(VBROADCAST)
@@ -31684,7 +32676,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
NODE_NAME_CASE(VAARG_64)
NODE_NAME_CASE(VAARG_X32)
- NODE_NAME_CASE(WIN_ALLOCA)
+ NODE_NAME_CASE(DYN_ALLOCA)
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
NODE_NAME_CASE(SEG_ALLOCA)
@@ -31714,6 +32706,22 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FNMSUB_RND)
NODE_NAME_CASE(FMADDSUB_RND)
NODE_NAME_CASE(FMSUBADD_RND)
+ NODE_NAME_CASE(VFMADDC)
+ NODE_NAME_CASE(VFMADDC_RND)
+ NODE_NAME_CASE(VFCMADDC)
+ NODE_NAME_CASE(VFCMADDC_RND)
+ NODE_NAME_CASE(VFMULC)
+ NODE_NAME_CASE(VFMULC_RND)
+ NODE_NAME_CASE(VFCMULC)
+ NODE_NAME_CASE(VFCMULC_RND)
+ NODE_NAME_CASE(VFMULCSH)
+ NODE_NAME_CASE(VFMULCSH_RND)
+ NODE_NAME_CASE(VFCMULCSH)
+ NODE_NAME_CASE(VFCMULCSH_RND)
+ NODE_NAME_CASE(VFMADDCSH)
+ NODE_NAME_CASE(VFMADDCSH_RND)
+ NODE_NAME_CASE(VFCMADDCSH)
+ NODE_NAME_CASE(VFCMADDCSH_RND)
NODE_NAME_CASE(VPMADD52H)
NODE_NAME_CASE(VPMADD52L)
NODE_NAME_CASE(VRNDSCALE)
@@ -31954,6 +32962,7 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const {
bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
switch (Opcode) {
// TODO: Add more X86ISD opcodes once we have test coverage.
+ case X86ISD::AVG:
case X86ISD::PCMPEQ:
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ:
@@ -32047,6 +33056,36 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
bool X86TargetLowering::shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const {
+ using namespace llvm::PatternMatch;
+
+ FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
+ if (!VTy)
+ return false;
+
+ if (I->getOpcode() == Instruction::Mul &&
+ VTy->getElementType()->isIntegerTy(64)) {
+ for (auto &Op : I->operands()) {
+ // Make sure we are not already sinking this operand
+ if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ // Look for PMULDQ pattern where the input is a sext_inreg from vXi32 or
+ // the PMULUDQ pattern where the input is a zext_inreg from vXi32.
+ if (Subtarget.hasSSE41() &&
+ match(Op.get(), m_AShr(m_Shl(m_Value(), m_SpecificInt(32)),
+ m_SpecificInt(32)))) {
+ Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
+ Ops.push_back(&Op);
+ } else if (Subtarget.hasSSE2() &&
+ match(Op.get(),
+ m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
+ Ops.push_back(&Op);
+ }
+ }
+
+ return !Ops.empty();
+ }
+
// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
// to SDAG by sinking the shuffle instruction next to the shift.
@@ -32102,6 +33141,8 @@ bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget.hasFP16();
case MVT::f32:
case MVT::f64:
return true;
@@ -32180,13 +33221,9 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
// If we hit the end of the block, check whether EFLAGS is live into a
// successor.
- for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
- sEnd = BB->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock* succ = *sItr;
- if (succ->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *Succ : BB->successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
return true;
- }
return false;
}
@@ -32576,6 +33613,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
// conditional jump around it.
static bool isCMOVPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
+ case X86::CMOV_FR16X:
case X86::CMOV_FR32:
case X86::CMOV_FR32X:
case X86::CMOV_FR64:
@@ -32922,14 +33960,11 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
}
// Transfer any debug instructions inside the CMOV sequence to the sunk block.
- auto DbgEnd = MachineBasicBlock::iterator(LastCMOV);
- auto DbgIt = MachineBasicBlock::iterator(MI);
- while (DbgIt != DbgEnd) {
- auto Next = std::next(DbgIt);
- if (DbgIt->isDebugInstr())
- SinkMBB->push_back(DbgIt->removeFromParent());
- DbgIt = Next;
- }
+ auto DbgRange = llvm::make_range(MachineBasicBlock::iterator(MI),
+ MachineBasicBlock::iterator(LastCMOV));
+ for (MachineInstr &MI : llvm::make_early_inc_range(DbgRange))
+ if (MI.isDebugInstr())
+ SinkMBB->push_back(MI.removeFromParent());
// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
SinkMBB->splice(SinkMBB->end(), ThisMBB,
@@ -34576,6 +35611,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPBF16PS: {
unsigned Opc;
switch (MI.getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
@@ -34603,6 +35639,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED: {
unsigned Opc;
switch (MI.getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
case X86::PTILELOADD: Opc = X86::TILELOADD; break;
case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
case X86::PTILESTORED: Opc = X86::TILESTORED; break;
@@ -34795,8 +35832,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt DemandedLHS, DemandedRHS;
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
- Known.One = APInt::getAllOnesValue(BitWidth * 2);
- Known.Zero = APInt::getAllOnesValue(BitWidth * 2);
+ Known.One = APInt::getAllOnes(BitWidth * 2);
+ Known.Zero = APInt::getAllOnes(BitWidth * 2);
KnownBits Known2;
if (!!DemandedLHS) {
@@ -35197,17 +36234,16 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- // Match against a VZEXT_MOVL vXi32 zero-extending instruction.
- if (MaskEltSize == 32 && Mask[0] == 0) {
- if (isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) {
+ // Match against a VZEXT_MOVL vXi32 and vXi16 zero-extending instruction.
+ if (Mask[0] == 0 &&
+ (MaskEltSize == 32 || (MaskEltSize == 16 && Subtarget.hasFP16()))) {
+ if ((isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) ||
+ (V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
- return true;
- }
- if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
- Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
+ : !Subtarget.hasSSE2() ? MVT::v4f32
+ : MaskVT;
return true;
}
}
@@ -35251,11 +36287,14 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
- if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
+ if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
+ (MaskEltSize == 16 && Subtarget.hasFP16())) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
+ : !Subtarget.hasSSE2() ? MVT::v4f32
+ : MaskVT;
return true;
}
@@ -35501,6 +36540,12 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
SrcVT = DstVT = MVT::v4f32;
return true;
}
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) &&
+ Subtarget.hasFP16()) {
+ Shuffle = X86ISD::MOVSH;
+ SrcVT = DstVT = MVT::v8f16;
+ return true;
+ }
}
// Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
@@ -35538,8 +36583,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
unsigned Scale1 = NumV1Elts / NumMaskElts;
unsigned Scale2 = NumV2Elts / NumMaskElts;
- APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts);
- APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts);
+ APInt DemandedZeroV1 = APInt::getZero(NumV1Elts);
+ APInt DemandedZeroV2 = APInt::getZero(NumV2Elts);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef)
@@ -35560,12 +36605,58 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
IsBlend = false;
break;
}
- if (IsBlend &&
- DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
- DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
- Shuffle = ISD::OR;
- SrcVT = DstVT = MaskVT.changeTypeToInteger();
- return true;
+ if (IsBlend) {
+ if (DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
+ DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
+ Shuffle = ISD::OR;
+ SrcVT = DstVT = MaskVT.changeTypeToInteger();
+ return true;
+ }
+ if (NumV1Elts == NumV2Elts && NumV1Elts == NumMaskElts) {
+ // FIXME: handle mismatched sizes?
+ // TODO: investigate if `ISD::OR` handling in
+ // `TargetLowering::SimplifyDemandedVectorElts` can be improved instead.
+ auto computeKnownBitsElementWise = [&DAG](SDValue V) {
+ unsigned NumElts = V.getValueType().getVectorNumElements();
+ KnownBits Known(NumElts);
+ for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
+ APInt Mask = APInt::getOneBitSet(NumElts, EltIdx);
+ KnownBits PeepholeKnown = DAG.computeKnownBits(V, Mask);
+ if (PeepholeKnown.isZero())
+ Known.Zero.setBit(EltIdx);
+ if (PeepholeKnown.isAllOnes())
+ Known.One.setBit(EltIdx);
+ }
+ return Known;
+ };
+
+ KnownBits V1Known = computeKnownBitsElementWise(V1);
+ KnownBits V2Known = computeKnownBitsElementWise(V2);
+
+ for (unsigned i = 0; i != NumMaskElts && IsBlend; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ if (M == SM_SentinelZero) {
+ IsBlend &= V1Known.Zero[i] && V2Known.Zero[i];
+ continue;
+ }
+ if (M == (int)i) {
+ IsBlend &= V2Known.Zero[i] || V1Known.One[i];
+ continue;
+ }
+ if (M == (int)(i + NumMaskElts)) {
+ IsBlend &= V1Known.Zero[i] || V2Known.One[i];
+ continue;
+ }
+ llvm_unreachable("will not get here.");
+ }
+ if (IsBlend) {
+ Shuffle = ISD::OR;
+ SrcVT = DstVT = MaskVT.changeTypeToInteger();
+ return true;
+ }
+ }
}
}
@@ -35817,13 +36908,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return CanonicalizeShuffleInput(RootVT, V1);
}
+ SmallVector<int, 64> Mask(BaseMask.begin(), BaseMask.end());
+
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
// etc. can be simplified.
- if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
+ if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits && VT1.isVector()) {
SmallVector<int> ScaledMask, IdentityMask;
unsigned NumElts = VT1.getVectorNumElements();
- if (BaseMask.size() <= NumElts &&
- scaleShuffleElements(BaseMask, NumElts, ScaledMask)) {
+ if (Mask.size() <= NumElts &&
+ scaleShuffleElements(Mask, NumElts, ScaledMask)) {
for (unsigned i = 0; i != NumElts; ++i)
IdentityMask.push_back(i);
if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
@@ -35837,35 +36930,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// If the upper subvectors are zeroable, then an extract+insert is more
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
// to zero the upper subvectors.
- if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) {
+ if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
+ assert(isInRange(Mask[0], 0, NumBaseMaskElts) &&
"Unexpected lane shuffle");
Res = CanonicalizeShuffleInput(RootVT, V1);
- unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
- bool UseZero = isAnyZero(BaseMask);
+ unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
+ bool UseZero = isAnyZero(Mask);
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
}
// Narrow shuffle mask to v4x128.
- SmallVector<int, 4> Mask;
+ SmallVector<int, 4> ScaledMask;
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
- narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask);
+ narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask);
// Try to lower to vshuf64x2/vshuf32x4.
- auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask,
- SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
+ ArrayRef<int> ScaledMask, SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
unsigned PermMask = 0;
// Insure elements came from the same Op.
SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
for (int i = 0; i < 4; ++i) {
- assert(Mask[i] >= -1 && "Illegal shuffle sentinel value");
- if (Mask[i] < 0)
+ assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value");
+ if (ScaledMask[i] < 0)
continue;
- SDValue Op = Mask[i] >= 4 ? V2 : V1;
+ SDValue Op = ScaledMask[i] >= 4 ? V2 : V1;
unsigned OpIndex = i / 2;
if (Ops[OpIndex].isUndef())
Ops[OpIndex] = Op;
@@ -35875,7 +36969,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Convert the 128-bit shuffle mask selection values into 128-bit
// selection bits defined by a vshuf64x2 instruction's immediate control
// byte.
- PermMask |= (Mask[i] % 4) << (i * 2);
+ PermMask |= (ScaledMask[i] % 4) << (i * 2);
}
return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
@@ -35887,18 +36981,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// FIXME: Is there a better way to do this? is256BitLaneRepeatedShuffleMask
// doesn't work because our mask is for 128 bits and we don't have an MVT
// to match that.
- bool PreferPERMQ =
- UnaryShuffle && isUndefOrInRange(Mask[0], 0, 2) &&
- isUndefOrInRange(Mask[1], 0, 2) && isUndefOrInRange(Mask[2], 2, 4) &&
- isUndefOrInRange(Mask[3], 2, 4) &&
- (Mask[0] < 0 || Mask[2] < 0 || Mask[0] == (Mask[2] % 2)) &&
- (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2));
-
- if (!isAnyZero(Mask) && !PreferPERMQ) {
+ bool PreferPERMQ = UnaryShuffle && isUndefOrInRange(ScaledMask[0], 0, 2) &&
+ isUndefOrInRange(ScaledMask[1], 0, 2) &&
+ isUndefOrInRange(ScaledMask[2], 2, 4) &&
+ isUndefOrInRange(ScaledMask[3], 2, 4) &&
+ (ScaledMask[0] < 0 || ScaledMask[2] < 0 ||
+ ScaledMask[0] == (ScaledMask[2] % 2)) &&
+ (ScaledMask[1] < 0 || ScaledMask[3] < 0 ||
+ ScaledMask[1] == (ScaledMask[3] % 2));
+
+ if (!isAnyZero(ScaledMask) && !PreferPERMQ) {
if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
- if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))
+ if (SDValue V = MatchSHUF128(ShuffleVT, DL, ScaledMask, V1, V2, DAG))
return DAG.getBitcast(RootVT, V);
}
}
@@ -35908,25 +37004,27 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// If the upper half is zeroable, then an extract+insert is more optimal
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
// zero the upper half.
- if (isUndefOrZero(BaseMask[1])) {
+ if (isUndefOrZero(Mask[1])) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
+ assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle");
Res = CanonicalizeShuffleInput(RootVT, V1);
- Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
- return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
- DL, 256);
+ Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL);
+ return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL,
+ 256);
}
- // If we're splatting the low subvector, an insert-subvector 'concat'
+ // If we're inserting the low subvector, an insert-subvector 'concat'
// pattern is quicker than VPERM2X128.
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
- if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) {
+ if (BaseMask[0] == 0 && (BaseMask[1] == 0 || BaseMask[1] == 2) &&
+ !Subtarget.hasAVX2()) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- Res = CanonicalizeShuffleInput(RootVT, V1);
- Res = extractSubVector(Res, 0, DAG, DL, 128);
- return concatSubVectors(Res, Res, DAG, DL);
+ SDValue Lo = CanonicalizeShuffleInput(RootVT, V1);
+ SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2);
+ Hi = extractSubVector(Hi, 0, DAG, DL, 128);
+ return insertSubVector(Lo, Hi, NumRootElts / 2, DAG, DL, 128);
}
if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
@@ -35936,11 +37034,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// we need to use the zeroing feature.
// Prefer blends for sequential shuffles unless we are optimizing for size.
if (UnaryShuffle &&
- !(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
- (OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
+ !(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
+ (OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
- PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
+ PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
+ PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
return DAG.getNode(
X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
@@ -35951,16 +37049,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
if (!UnaryShuffle && !IsMaskedShuffle) {
- assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
+ assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) &&
"Unexpected shuffle sentinel value");
// Prefer blends to X86ISD::VPERM2X128.
- if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
- (BaseMask[0] == 2 && BaseMask[1] == 1))) {
+ if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] & 3) << 0);
- PermMask |= ((BaseMask[1] & 3) << 4);
- SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
- SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
+ PermMask |= ((Mask[0] & 3) << 0);
+ PermMask |= ((Mask[1] & 3) << 4);
+ SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
+ SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
CanonicalizeShuffleInput(RootVT, LHS),
CanonicalizeShuffleInput(RootVT, RHS),
@@ -35971,13 +37068,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// For masks that have been widened to 128-bit elements or more,
// narrow back down to 64-bit elements.
- SmallVector<int, 64> Mask;
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
- narrowShuffleMaskElts(MaskScale, BaseMask, Mask);
- } else {
- Mask.assign(BaseMask.begin(), BaseMask.end());
+ SmallVector<int, 64> ScaledMask;
+ narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
}
// For masked shuffles, we're trying to match the root width for better
@@ -36029,7 +37125,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (isUndefOrEqual(Mask, 0)) {
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- MayFoldLoad(V1.getOperand(0))) {
+ X86::mayFoldLoad(V1.getOperand(0), Subtarget)) {
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = V1.getOperand(0);
@@ -36306,8 +37402,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask &&
isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) &&
DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
- APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
- APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
+ APInt Zero = APInt::getZero(MaskEltSizeInBits);
+ APInt AllOnes = APInt::getAllOnes(MaskEltSizeInBits);
APInt UndefElts(NumMaskElts, 0);
SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
for (unsigned i = 0; i != NumMaskElts; ++i) {
@@ -36804,10 +37900,11 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
return SDValue();
}
- // Only fold if at least one of the constants is only used once or
- // the combined shuffle has included a variable mask shuffle, this
- // is to avoid constant pool bloat.
- if (!OneUseConstantOp && !HasVariableMask)
+ // If we're optimizing for size, only fold if at least one of the constants is
+ // only used once or the combined shuffle has included a variable mask
+ // shuffle, this is to avoid constant pool bloat.
+ bool IsOptimizingSize = DAG.shouldOptForSize();
+ if (IsOptimizingSize && !OneUseConstantOp && !HasVariableMask)
return SDValue();
// Shuffle the constant bits according to the mask.
@@ -36816,7 +37913,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
APInt ZeroElts(NumMaskElts, 0);
APInt ConstantElts(NumMaskElts, 0);
SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
- APInt::getNullValue(MaskSizeInBits));
+ APInt::getZero(MaskSizeInBits));
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
@@ -36847,10 +37944,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
ConstantElts.setBit(i);
ConstantBitData[i] = Bits;
}
- assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
+ assert((UndefElts | ZeroElts | ConstantElts).isAllOnes());
// Attempt to create a zero vector.
- if ((UndefElts | ZeroElts).isAllOnesValue())
+ if ((UndefElts | ZeroElts).isAllOnes())
return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL);
// Create the constant data.
@@ -36931,6 +38028,10 @@ static SDValue combineX86ShufflesRecursively(
if (!VT.isVector() || !VT.isSimple())
return SDValue(); // Bail if we hit a non-simple non-vector.
+ // FIXME: Just bail on f16 for now.
+ if (VT.getVectorElementType() == MVT::f16)
+ return SDValue();
+
assert((RootSizeInBits % VT.getSizeInBits()) == 0 &&
"Can only combine shuffles upto size of the root op.");
@@ -36939,7 +38040,7 @@ static SDValue combineX86ShufflesRecursively(
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
APInt OpUndef, OpZero;
- APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt OpDemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
OpZero, DAG, Depth, false))
@@ -36981,14 +38082,14 @@ static SDValue combineX86ShufflesRecursively(
// Only resolve zeros if it will remove an input, otherwise we might end
// up in an infinite loop.
bool ResolveKnownZeros = true;
- if (!OpZero.isNullValue()) {
- APInt UsedInputs = APInt::getNullValue(OpInputs.size());
+ if (!OpZero.isZero()) {
+ APInt UsedInputs = APInt::getZero(OpInputs.size());
for (int i = 0, e = OpMask.size(); i != e; ++i) {
int M = OpMask[i];
if (OpUndef[i] || OpZero[i] || isUndefOrZero(M))
continue;
UsedInputs.setBit(M / OpMask.size());
- if (UsedInputs.isAllOnesValue()) {
+ if (UsedInputs.isAllOnes()) {
ResolveKnownZeros = false;
break;
}
@@ -37178,6 +38279,48 @@ static SDValue combineX86ShufflesRecursively(
Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
return DAG.getBitcast(Root.getValueType(), HOp);
+ // Try to refine our inputs given our knowledge of target shuffle mask.
+ for (auto I : enumerate(Ops)) {
+ int OpIdx = I.index();
+ SDValue &Op = I.value();
+
+ // What range of shuffle mask element values results in picking from Op?
+ int Lo = OpIdx * Mask.size();
+ int Hi = Lo + Mask.size();
+
+ // Which elements of Op do we demand, given the mask's granularity?
+ APInt OpDemandedElts(Mask.size(), 0);
+ for (int MaskElt : Mask) {
+ if (isInRange(MaskElt, Lo, Hi)) { // Picks from Op?
+ int OpEltIdx = MaskElt - Lo;
+ OpDemandedElts.setBit(OpEltIdx);
+ }
+ }
+
+ // Is the shuffle result smaller than the root?
+ if (Op.getValueSizeInBits() < RootSizeInBits) {
+ // We padded the mask with undefs. But we now need to undo that.
+ unsigned NumExpectedVectorElts = Mask.size();
+ unsigned EltSizeInBits = RootSizeInBits / NumExpectedVectorElts;
+ unsigned NumOpVectorElts = Op.getValueSizeInBits() / EltSizeInBits;
+ assert(!OpDemandedElts.extractBits(
+ NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) &&
+ "Demanding the virtual undef widening padding?");
+ OpDemandedElts = OpDemandedElts.trunc(NumOpVectorElts); // NUW
+ }
+
+ // The Op itself may be of different VT, so we need to scale the mask.
+ unsigned NumOpElts = Op.getValueType().getVectorNumElements();
+ APInt OpScaledDemandedElts = APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);
+
+ // Can this operand be simplified any further, given it's demanded elements?
+ if (SDValue NewOp =
+ DAG.getTargetLoweringInfo().SimplifyMultipleUseDemandedVectorElts(
+ Op, OpScaledDemandedElts, DAG))
+ Op = NewOp;
+ }
+ // FIXME: should we rerun resolveTargetShuffleInputsAndMask() now?
+
// Widen any subvector shuffle inputs we've collected.
if (any_of(Ops, [RootSizeInBits](SDValue Op) {
return Op.getValueSizeInBits() < RootSizeInBits;
@@ -37424,8 +38567,10 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
SDValue N0 = V.getOperand(0);
SDValue N1 = V.getOperand(1);
unsigned Imm = V.getConstantOperandVal(2);
- if (!MayFoldLoad(peekThroughOneUseBitcasts(N0)) ||
- MayFoldLoad(peekThroughOneUseBitcasts(N1)))
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+ if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
+ X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
return SDValue();
Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
@@ -37721,6 +38866,13 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+ // broadcast(extract_vector_elt(x, 0)) -> broadcast(x).
+ if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Src.getOperand(1)) &&
+ DAG.getTargetLoweringInfo().isTypeLegal(
+ Src.getOperand(0).getValueType()))
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+
// Share broadcast with the longest vector and extract low subvector (free).
// Ensure the same SDValue from the SDNode use is being used.
for (SDNode *User : Src->uses())
@@ -37988,6 +39140,41 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
+ case X86ISD::SHUFP: {
+ // Fold shufps(shuffle(x),shuffle(y)) -> shufps(x,y).
+ // This is a more relaxed shuffle combiner that can ignore oneuse limits.
+ // TODO: Support types other than v4f32.
+ if (VT == MVT::v4f32) {
+ bool Updated = false;
+ SmallVector<int> Mask;
+ SmallVector<SDValue> Ops;
+ if (getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask) &&
+ Ops.size() == 2) {
+ for (int i = 0; i != 2; ++i) {
+ SmallVector<SDValue> SubOps;
+ SmallVector<int> SubMask, SubScaledMask;
+ SDValue Sub = peekThroughBitcasts(Ops[i]);
+ // TODO: Scaling might be easier if we specify the demanded elts.
+ if (getTargetShuffleInputs(Sub, SubOps, SubMask, DAG, 0, false) &&
+ scaleShuffleElements(SubMask, 4, SubScaledMask) &&
+ SubOps.size() == 1 && isUndefOrInRange(SubScaledMask, 0, 4)) {
+ int Ofs = i * 2;
+ Mask[Ofs + 0] = SubScaledMask[Mask[Ofs + 0] % 4] + (i * 4);
+ Mask[Ofs + 1] = SubScaledMask[Mask[Ofs + 1] % 4] + (i * 4);
+ Ops[i] = DAG.getBitcast(VT, SubOps[0]);
+ Updated = true;
+ }
+ }
+ }
+ if (Updated) {
+ for (int &M : Mask)
+ M %= 4;
+ Ops.push_back(getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, Ops);
+ }
+ }
+ return SDValue();
+ }
case X86ISD::VPERMI: {
// vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.
// TODO: Remove when we have preferred domains in combineX86ShuffleChain.
@@ -38057,6 +39244,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
assert(Mask.size() == 4);
break;
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
case X86ISD::MOVSS: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
@@ -38441,6 +39629,12 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
if (VT.is512BitVector())
return SDValue();
+ // Do not generate X86ISD::ADDSUB node for FP16's vector types even though
+ // the ADDSUB idiom has been successfully recognized. There are no known
+ // X86 targets with FP16 ADDSUB instructions!
+ if (VT.getVectorElementType() == MVT::f16)
+ return SDValue();
+
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
@@ -38568,7 +39762,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// Simplify source operands based on shuffle mask.
// TODO - merge this into combineX86ShufflesRecursively.
APInt KnownUndef, KnownZero;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
DCI))
return SDValue(N, 0);
@@ -38584,7 +39778,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const {
// If we're demanding all elements don't bother trying to simplify the mask.
unsigned NumElts = DemandedElts.getBitWidth();
- if (DemandedElts.isAllOnesValue())
+ if (DemandedElts.isAllOnes())
return false;
SDValue Mask = Op.getOperand(MaskIndex);
@@ -38671,6 +39865,58 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero | RHSZero;
break;
}
+ case X86ISD::VPMADDWD: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, 2 * NumElts);
+
+ if (SimplifyDemandedVectorElts(LHS, DemandedSrcElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedSrcElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+
+ // TODO: Multiply by zero.
+
+ // If RHS/LHS elements are known zero then we don't need the LHS/RHS equivalent.
+ APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ APInt DemandedRHSElts = DemandedSrcElts & ~LHSZero;
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHSElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PSADBW: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ assert(VT.getScalarType() == MVT::i64 &&
+ LHS.getValueType() == RHS.getValueType() &&
+ LHS.getValueType().getScalarType() == MVT::i8 &&
+ "Unexpected PSADBW types");
+
+ // Aggressively peek through ops to get at the demanded elts.
+ if (!DemandedElts.isAllOnes()) {
+ unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
+ LHS, DemandedSrcElts, TLO.DAG, Depth + 1);
+ SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
+ RHS, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewLHS || NewRHS) {
+ NewLHS = NewLHS ? NewLHS : LHS;
+ NewRHS = NewRHS ? NewRHS : RHS;
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
+ }
+ }
+ break;
+ }
case X86ISD::VSHL:
case X86ISD::VSRL:
case X86ISD::VSRA: {
@@ -38706,7 +39952,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
// Aggressively peek through ops to get at the demanded elts.
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(
@@ -38823,7 +40069,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Aggressively peek through ops to get at the demanded elts.
// TODO - we should do this for all target/faux shuffles ops.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
TLO.DAG, Depth + 1);
SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
@@ -38860,7 +40106,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Aggressively peek through ops to get at the demanded elts.
// TODO: Handle repeated operands.
- if (N0 != N1 && !DemandedElts.isAllOnesValue()) {
+ if (N0 != N1 && !DemandedElts.isAllOnes()) {
SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
TLO.DAG, Depth + 1);
SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
@@ -39019,15 +40265,11 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDLoc DL(Op);
EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
ExtSizeInBits / VT.getScalarSizeInBits());
- SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other);
- SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
- SDValue Bcst =
- TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys,
- Ops, MemVT, MemIntr->getMemOperand());
- TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
- Bcst.getValue(1));
- return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
- TLO.DAG, DL, ExtSizeInBits));
+ if (SDValue BcstLd =
+ getBROADCAST_LOAD(Opc, DL, BcstVT, MemVT, MemIntr, 0, TLO.DAG))
+ return TLO.CombineTo(Op,
+ insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0,
+ TLO.DAG, DL, ExtSizeInBits));
}
break;
}
@@ -39130,6 +40372,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
}
+ // For broadcasts, unless we *only* demand the 0'th element,
+ // stop attempts at simplification here, we aren't going to improve things,
+ // this is better than any potential shuffle.
+ if (isTargetShuffleSplat(Op) && !DemandedElts.isOne())
+ return false;
+
// Get target/faux shuffle mask.
APInt OpUndef, OpZero;
SmallVector<int, 64> OpMask;
@@ -39175,7 +40423,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
continue;
int Lo = Src * NumElts;
- APInt SrcElts = APInt::getNullValue(NumElts);
+ APInt SrcElts = APInt::getZero(NumElts);
for (int i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
int M = OpMask[i] - Lo;
@@ -39197,7 +40445,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// to match. This prevents combineX86ShuffleChain from returning a
// combined shuffle that's the same as the original root, causing an
// infinite loop.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range");
SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef);
@@ -39492,7 +40740,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Don't attempt this on AVX512 as it might affect broadcast folding.
// TODO: Should we attempt this for i32/i16 splats? They tend to be slower.
if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() &&
- OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) {
+ OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2) &&
+ Src->hasOneUse()) {
MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2);
SDValue NewSrc =
TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src);
@@ -39697,7 +40946,7 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op));
// Bitmask that indicates which ops have only been accessed 'inline'.
- APInt IdentityOp = APInt::getAllOnesValue(NumOps);
+ APInt IdentityOp = APInt::getAllOnes(NumOps);
for (int i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (!DemandedElts[i] || ShuffleUndef[i])
@@ -40351,9 +41600,9 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
isa<ConstantSDNode>(N0)) {
auto *C = cast<ConstantSDNode>(N0);
- if (C->isAllOnesValue())
+ if (C->isAllOnes())
return DAG.getConstant(1, SDLoc(N0), VT);
- if (C->isNullValue())
+ if (C->isZero())
return DAG.getConstant(0, SDLoc(N0), VT);
}
@@ -40419,6 +41668,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// Check if we have a bitcast from another integer type as well.
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64) ||
+ (Subtarget.hasFP16() && VT == MVT::f16) ||
(Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() &&
TLI.isTypeLegal(VT))))
return SDValue();
@@ -40547,7 +41797,7 @@ static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
else if (BinOp == ISD::SMIN)
Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::UMAX)
- Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT);
+ Mask = DAG.getAllOnesConstant(DL, SrcVT);
if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
@@ -40994,7 +42244,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
/// Extracting a scalar FP value from vector element 0 is free, so extract each
/// operand first, then perform the math as a scalar op.
-static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
+static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
@@ -41022,7 +42273,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2));
}
- if (VT != MVT::f32 && VT != MVT::f64)
+ if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 &&
+ VT != MVT::f64)
return SDValue();
// Vector FP selects don't fit the pattern of FP math ops (because the
@@ -41277,8 +42529,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (IsPextr) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(
- SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnes(VT.getSizeInBits()), DCI))
return SDValue(N, 0);
// PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling).
@@ -41336,7 +42588,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
return V;
- if (SDValue V = scalarizeExtEltFP(N, DAG))
+ if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
return V;
// Attempt to extract a i1 element by using MOVMSK to extract the signbits
@@ -41573,11 +42825,11 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
// Multiply condition by the difference if non-one.
- if (!AbsDiff.isOneValue())
+ if (!AbsDiff.isOne())
R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT));
// Add the base if non-zero.
- if (!FalseC->isNullValue())
+ if (!FalseC->isZero())
R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0));
return R;
@@ -41794,10 +43046,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) {
int NumElts = VT.getVectorNumElements();
for (int i = 0; i != NumElts; ++i) {
- if (CondMask[i] < NumElts)
+ // getConstVector sets negative shuffle mask values as undef, so ensure
+ // we hardcode SM_SentinelZero values to zero (0x80).
+ if (CondMask[i] < NumElts) {
+ LHSMask[i] = (LHSMask[i] == SM_SentinelZero) ? 0x80 : LHSMask[i];
RHSMask[i] = 0x80;
- else
+ } else {
LHSMask[i] = 0x80;
+ RHSMask[i] = (RHSMask[i] == SM_SentinelZero) ? 0x80 : RHSMask[i];
+ }
}
LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),
getConstVector(LHSMask, SimpleVT, DAG, DL, true));
@@ -42331,7 +43588,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
// We can handle comparisons with zero in a number of cases by manipulating
// the CC used.
- if (!Comparison.isNullValue())
+ if (!Comparison.isZero())
return SDValue();
if (CC == X86::COND_S && Addend == 1)
@@ -42737,7 +43994,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
unsigned NumElts = VecVT.getVectorNumElements();
unsigned NumEltBits = VecVT.getScalarSizeInBits();
- bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isNullValue();
+ bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isZero();
bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits &&
CmpVal.isMask(NumElts);
if (!IsAnyOf && !IsAllOf)
@@ -42830,12 +44087,12 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) &&
ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) {
unsigned NumShuffleElts = ShuffleMask.size();
- APInt DemandedElts = APInt::getNullValue(NumShuffleElts);
+ APInt DemandedElts = APInt::getZero(NumShuffleElts);
for (int M : ShuffleMask) {
assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index");
DemandedElts.setBit(M);
}
- if (DemandedElts.isAllOnesValue()) {
+ if (DemandedElts.isAllOnes()) {
SDLoc DL(EFLAGS);
SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
@@ -43316,8 +44573,9 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// If the upper 17 bits of each element are zero then we can use PMADDWD,
-// which is always at least as quick as PMULLD, except on KNL.
+// If the upper 17 bits of either element are zero and the other element are
+// zero/sign bits then we can use PMADDWD, which is always at least as quick as
+// PMULLD, except on KNL.
static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasSSE2())
@@ -43332,33 +44590,92 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector() || VT.getVectorElementType() != MVT::i32)
return SDValue();
- // Make sure the type is legal or will be widened to a legal type.
- if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ // Make sure the type is legal or can split/widen to a legal type.
+ // With AVX512 but without BWI, we would need to split v32i16.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
return SDValue();
- MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
+ EVT WVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, 2 * NumElts);
- // Without BWI, we would need to split v32i16.
- if (WVT == MVT::v32i16 && !Subtarget.hasBWI())
+ // With AVX512 but without BWI, we would need to split v32i16.
+ if (32 <= (2 * NumElts) && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- // If we are zero extending two steps without SSE4.1, its better to reduce
+ // If we are zero/sign extending two steps without SSE4.1, its better to
+ // reduce the vmul width instead.
+ if (!Subtarget.hasSSE41() &&
+ (((N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8)) ||
+ ((N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::SIGN_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8))))
+ return SDValue();
+
+ // If we are sign extending a wide vector without SSE4.1, its better to reduce
// the vmul width instead.
if (!Subtarget.hasSSE41() &&
- (N0.getOpcode() == ISD::ZERO_EXTEND &&
- N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
- (N1.getOpcode() == ISD::ZERO_EXTEND &&
- N1.getOperand(0).getScalarValueSizeInBits() <= 8))
+ (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueSizeInBits() > 128) &&
+ (N1.getOpcode() == ISD::SIGN_EXTEND &&
+ N1.getOperand(0).getValueSizeInBits() > 128))
return SDValue();
- APInt Mask17 = APInt::getHighBitsSet(32, 17);
- if (!DAG.MaskedValueIsZero(N1, Mask17) ||
- !DAG.MaskedValueIsZero(N0, Mask17))
+ // Sign bits must extend down to the lowest i16.
+ if (DAG.ComputeMinSignedBits(N1) > 16 || DAG.ComputeMinSignedBits(N0) > 16)
return SDValue();
+ // At least one of the elements must be zero in the upper 17 bits, or can be
+ // safely made zero without altering the final result.
+ auto GetZeroableOp = [&](SDValue Op) {
+ APInt Mask17 = APInt::getHighBitsSet(32, 17);
+ if (DAG.MaskedValueIsZero(Op, Mask17))
+ return Op;
+ // Mask off upper 16-bits of sign-extended constants.
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, Op,
+ DAG.getConstant(0xFFFF, SDLoc(N), VT));
+ if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) {
+ SDValue Src = Op.getOperand(0);
+ // Convert sext(vXi16) to zext(vXi16).
+ if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
+ // Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets
+ // which will expand the extension.
+ if (Src.getScalarValueSizeInBits() < 16 && !Subtarget.hasSSE41()) {
+ EVT ExtVT = VT.changeVectorElementType(MVT::i16);
+ Src = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), ExtVT, Src);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
+ }
+ }
+ // Convert SIGN_EXTEND_VECTOR_INREG to ZEXT_EXTEND_VECTOR_INREG.
+ if (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
+ N->isOnlyUserOf(Op.getNode())) {
+ SDValue Src = Op.getOperand(0);
+ if (Src.getScalarValueSizeInBits() == 16)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT, Src);
+ }
+ // Convert VSRAI(Op, 16) to VSRLI(Op, 16).
+ if (Op.getOpcode() == X86ISD::VSRAI && Op.getConstantOperandVal(1) == 16 &&
+ N->isOnlyUserOf(Op.getNode())) {
+ return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, Op.getOperand(0),
+ Op.getOperand(1));
+ }
+ return SDValue();
+ };
+ SDValue ZeroN0 = GetZeroableOp(N0);
+ SDValue ZeroN1 = GetZeroableOp(N1);
+ if (!ZeroN0 && !ZeroN1)
+ return SDValue();
+ N0 = ZeroN0 ? ZeroN0 : N0;
+ N1 = ZeroN1 ? ZeroN1 : N1;
+
// Use SplitOpsAndApply to handle AVX splitting.
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
@@ -43412,8 +44729,6 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Optimize a single multiply with constant into two operations in order to
-/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -43428,8 +44743,11 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
+ // Optimize a single multiply with constant into two operations in order to
+ // implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
if (!MulConstantOptimization)
return SDValue();
+
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
@@ -43569,9 +44887,7 @@ static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG,
"SRL or SRA node is required here!");
SDLoc DL(N);
- // Only do this with SSE4.1. On earlier targets reduceVMULWidth will expand
- // the multiply.
- if (!Subtarget.hasSSE41())
+ if (!Subtarget.hasSSE2())
return SDValue();
// The operation feeding into the shift must be a multiply.
@@ -43964,7 +45280,7 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
APInt Undefs(NumDstElts, 0);
- SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
+ SmallVector<APInt, 32> Bits(NumDstElts, APInt::getZero(DstBitsPerElt));
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
@@ -43994,9 +45310,9 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
if (Val.isIntN(DstBitsPerElt))
Val = Val.trunc(DstBitsPerElt);
else if (Val.isNegative())
- Val = APInt::getNullValue(DstBitsPerElt);
+ Val = APInt::getZero(DstBitsPerElt);
else
- Val = APInt::getAllOnesValue(DstBitsPerElt);
+ Val = APInt::getAllOnes(DstBitsPerElt);
}
Bits[Lane * NumDstEltsPerLane + Elt] = Val;
}
@@ -44048,6 +45364,14 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);
}
+
+ // Try again with pack(*_extend_vector_inreg, undef).
+ unsigned VecInRegOpc = IsSigned ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+ if (N0.getOpcode() == VecInRegOpc && N1.isUndef() &&
+ N0.getOperand(0).getScalarValueSizeInBits() < DstBitsPerElt)
+ return getEXTEND_VECTOR_INREG(ExtOpc, SDLoc(N), VT, N0.getOperand(0),
+ DAG);
}
// Attempt to combine as shuffle.
@@ -44066,47 +45390,25 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
"Unexpected horizontal add/sub opcode");
if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
- // For slow-hop targets, if we have a hop with a single op, see if we already
- // have another user that we can reuse and shuffle the result.
MVT VT = N->getSimpleValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (VT.is128BitVector() && LHS == RHS) {
- for (SDNode *User : LHS->uses()) {
- if (User != N && User->getOpcode() == N->getOpcode()) {
- MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
- if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) {
- return DAG.getBitcast(
- VT,
- DAG.getVectorShuffle(ShufVT, SDLoc(N),
- DAG.getBitcast(ShufVT, SDValue(User, 0)),
- DAG.getUNDEF(ShufVT), {0, 1, 0, 1}));
- }
- if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) {
- return DAG.getBitcast(
- VT,
- DAG.getVectorShuffle(ShufVT, SDLoc(N),
- DAG.getBitcast(ShufVT, SDValue(User, 0)),
- DAG.getUNDEF(ShufVT), {2, 3, 2, 3}));
- }
- }
- }
- }
// HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
LHS.getOpcode() == RHS.getOpcode() &&
- LHS.getValueType() == RHS.getValueType()) {
+ LHS.getValueType() == RHS.getValueType() &&
+ N->isOnlyUserOf(LHS.getNode()) && N->isOnlyUserOf(RHS.getNode())) {
SDValue LHS0 = LHS.getOperand(0);
- SDValue RHS0 = LHS.getOperand(1);
- SDValue LHS1 = RHS.getOperand(0);
+ SDValue LHS1 = LHS.getOperand(1);
+ SDValue RHS0 = RHS.getOperand(0);
SDValue RHS1 = RHS.getOperand(1);
- if ((LHS0 == RHS0 || LHS0.isUndef() || RHS0.isUndef()) &&
- (LHS1 == RHS1 || LHS1.isUndef() || RHS1.isUndef())) {
+ if ((LHS0 == LHS1 || LHS0.isUndef() || LHS1.isUndef()) &&
+ (RHS0 == RHS1 || RHS0.isUndef() || RHS1.isUndef())) {
SDLoc DL(N);
SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(),
- LHS0.isUndef() ? RHS0 : LHS0,
- LHS1.isUndef() ? RHS1 : LHS1);
+ LHS0.isUndef() ? LHS1 : LHS0,
+ RHS0.isUndef() ? RHS1 : RHS0);
MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
Res = DAG.getBitcast(ShufVT, Res);
SDValue NewLHS =
@@ -44115,9 +45417,8 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
SDValue NewRHS =
DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res,
getV4X86ShuffleImm8ForMask({2, 3, 2, 3}, DL, DAG));
- DAG.ReplaceAllUsesOfValueWith(LHS, DAG.getBitcast(VT, NewLHS));
- DAG.ReplaceAllUsesOfValueWith(RHS, DAG.getBitcast(VT, NewRHS));
- return SDValue(N, 0);
+ return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(VT, NewLHS),
+ DAG.getBitcast(VT, NewRHS));
}
}
}
@@ -44154,7 +45455,7 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
@@ -44256,8 +45557,8 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBitsPerElt),
+ DCI))
return SDValue(N, 0);
return SDValue();
@@ -44276,7 +45577,7 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ APInt::getAllOnes(NumBitsPerElt), DCI))
return SDValue(N, 0);
}
@@ -44315,12 +45616,15 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDValue CMP01 = CMP0->getOperand(1);
EVT VT = CMP00.getValueType();
- if (VT == MVT::f32 || VT == MVT::f64) {
+ if (VT == MVT::f32 || VT == MVT::f64 ||
+ (VT == MVT::f16 && Subtarget.hasFP16())) {
bool ExpectingFlags = false;
// Check for any users that want flags:
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- !ExpectingFlags && UI != UE; ++UI)
- switch (UI->getOpcode()) {
+ for (const SDNode *U : N->uses()) {
+ if (ExpectingFlags)
+ break;
+
+ switch (U->getOpcode()) {
default:
case ISD::BR_CC:
case ISD::BRCOND:
@@ -44333,6 +45637,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
case ISD::ANY_EXTEND:
break;
}
+ }
if (!ExpectingFlags) {
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
@@ -44396,7 +45701,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
}
/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
-static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::AND);
MVT VT = N->getSimpleValueType(0);
@@ -44543,17 +45848,19 @@ static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) {
return FPOpcode;
}
-/// If both input operands of a logic op are being cast from floating point
-/// types, try to convert this into a floating point logic node to avoid
-/// unnecessary moves from SSE to integer registers.
+/// If both input operands of a logic op are being cast from floating-point
+/// types or FP compares, try to convert this into a floating-point logic node
+/// to avoid unnecessary moves from SSE to integer registers.
static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST)
+ if (!((N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) ||
+ (N0.getOpcode() == ISD::SETCC && N1.getOpcode() == ISD::SETCC)))
return SDValue();
SDValue N00 = N0.getOperand(0);
@@ -44562,14 +45869,44 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
EVT N10Type = N10.getValueType();
// Ensure that both types are the same and are legal scalar fp types.
- if (N00Type != N10Type ||
- !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
- (Subtarget.hasSSE2() && N00Type == MVT::f64)))
+ if (N00Type != N10Type || !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
+ (Subtarget.hasSSE2() && N00Type == MVT::f64) ||
+ (Subtarget.hasFP16() && N00Type == MVT::f16)))
return SDValue();
- unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
- SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
- return DAG.getBitcast(VT, FPLogic);
+ if (N0.getOpcode() == ISD::BITCAST && !DCI.isBeforeLegalizeOps()) {
+ unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
+ SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+ return DAG.getBitcast(VT, FPLogic);
+ }
+
+ // The vector ISA for FP predicates is incomplete before AVX, so converting
+ // COMIS* to CMPS* may not be a win before AVX.
+ // TODO: Check types/predicates to see if they are available with SSE/SSE2.
+ if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*)
+ // and vector logic:
+ // logic (setcc N00, N01), (setcc N10, N11) -->
+ // extelt (logic (setcc (s2v N00), (s2v N01)), setcc (s2v N10), (s2v N11))), 0
+ unsigned NumElts = 128 / N00Type.getSizeInBits();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), N00Type, NumElts);
+ EVT BoolVecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
+ SDValue ZeroIndex = DAG.getVectorIdxConstant(0, DL);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N11 = N1.getOperand(1);
+ SDValue Vec00 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N00);
+ SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01);
+ SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10);
+ SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11);
+ SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11,
+ cast<CondCodeSDNode>(N1.getOperand(2))->get());
+ SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex);
}
// Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
@@ -44613,12 +45950,40 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
- EVT VT0 = Op0.getValueType();
- EVT VT1 = Op1.getValueType();
-
- if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
+ EVT VT = Op0.getValueType();
+ if (VT != Op1.getValueType() || !VT.isSimple() || !VT.isInteger())
return SDValue();
+ // Try to convert an "is positive" signbit masking operation into arithmetic
+ // shift and "andn". This saves a materialization of a -1 vector constant.
+ // The "is negative" variant should be handled more generally because it only
+ // requires "and" rather than "andn":
+ // and (pcmpgt X, -1), Y --> pandn (vsrai X, BitWidth - 1), Y
+ //
+ // This is limited to the original type to avoid producing even more bitcasts.
+ // If the bitcasts can't be eliminated, then it is unlikely that this fold
+ // will be profitable.
+ if (N->getValueType(0) == VT &&
+ supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRA)) {
+ SDValue X, Y;
+ if (Op1.hasOneUse() && Op1.getOpcode() == X86ISD::PCMPGT &&
+ isAllOnesOrAllOnesSplat(Op1.getOperand(1))) {
+ X = Op1.getOperand(0);
+ Y = Op0;
+ } else if (Op0.hasOneUse() && Op0.getOpcode() == X86ISD::PCMPGT &&
+ isAllOnesOrAllOnesSplat(Op0.getOperand(1))) {
+ X = Op0.getOperand(0);
+ Y = Op1;
+ }
+ if (X && Y) {
+ SDLoc DL(N);
+ SDValue Sra =
+ getTargetVShiftByConstNode(X86ISD::VSRAI, DL, VT.getSimpleVT(), X,
+ VT.getScalarSizeInBits() - 1, DAG);
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, Sra, Y);
+ }
+ }
+
APInt SplatVal;
if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
!SplatVal.isMask())
@@ -44628,17 +45993,17 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
if (isBitwiseNot(Op0))
return SDValue();
- if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
+ if (!supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRL))
return SDValue();
- unsigned EltBitWidth = VT0.getScalarSizeInBits();
+ unsigned EltBitWidth = VT.getScalarSizeInBits();
if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
unsigned ShiftVal = SplatVal.countTrailingOnes();
SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
- SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
+ SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@@ -44881,16 +46246,16 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
+ return FPLogic;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
- if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
+ if (SDValue R = combineAndNotIntoANDNP(N, DAG))
return R;
if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
@@ -44921,7 +46286,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) &&
getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) &&
llvm::all_of(EltBits, [](const APInt &M) {
- return M.isNullValue() || M.isAllOnesValue();
+ return M.isZero() || M.isAllOnes();
})) {
unsigned NumElts = SrcVecVT.getVectorNumElements();
unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8;
@@ -44933,8 +46298,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (UndefElts[i])
continue;
int VecIdx = Scale * Idx + i;
- ShuffleMask[VecIdx] =
- EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx;
+ ShuffleMask[VecIdx] = EltBits[i].isZero() ? SM_SentinelZero : VecIdx;
}
if (SDValue Shuffle = combineX86ShufflesRecursively(
@@ -44956,7 +46320,8 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
MVT VT = N->getSimpleValueType(0);
- if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if (!VT.isVector() || (EltSizeInBits % 8) != 0)
return SDValue();
SDValue N0 = peekThroughBitcasts(N->getOperand(0));
@@ -44966,9 +46331,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
// On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use
// VPTERNLOG. Otherwise only do this if either mask has multiple uses already.
- bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) ||
- Subtarget.hasVLX();
- if (!(Subtarget.hasXOP() || UseVPTERNLOG ||
+ if (!(Subtarget.hasXOP() || useVPTERNLOG(Subtarget, VT) ||
!N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse()))
return SDValue();
@@ -44992,13 +46355,19 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
- if (UseVPTERNLOG) {
- // Emit a VPTERNLOG node directly.
- SDValue A = DAG.getBitcast(VT, N0.getOperand(1));
- SDValue B = DAG.getBitcast(VT, N0.getOperand(0));
- SDValue C = DAG.getBitcast(VT, N1.getOperand(0));
+ if (useVPTERNLOG(Subtarget, VT)) {
+ // Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.
+ // VPTERNLOG is only available as vXi32/64-bit types.
+ MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64;
+ MVT OpVT =
+ MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());
+ SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));
+ SDValue B = DAG.getBitcast(OpVT, N0.getOperand(0));
+ SDValue C = DAG.getBitcast(OpVT, N1.getOperand(0));
SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
- return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm);
+ SDValue Res = getAVX512Node(X86ISD::VPTERNLOG, DL, OpVT, {A, B, C, Imm},
+ DAG, Subtarget);
+ return DAG.getBitcast(VT, Res);
}
SDValue X = N->getOperand(0);
@@ -45247,15 +46616,15 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
+ return FPLogic;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
return R;
@@ -45476,7 +46845,7 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
APInt SignedMax, SignedMin;
if (MatchPackUS) {
- SignedMax = APInt::getAllOnesValue(NumDstBits).zext(NumSrcBits);
+ SignedMax = APInt::getAllOnes(NumDstBits).zext(NumSrcBits);
SignedMin = APInt(NumSrcBits, 0);
} else {
SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
@@ -45641,6 +47010,11 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
});
};
+ auto IsZExtLike = [DAG = &DAG, ScalarVT](SDValue V) {
+ unsigned MaxActiveBits = DAG->computeKnownBits(V).countMaxActiveBits();
+ return MaxActiveBits <= ScalarVT.getSizeInBits();
+ };
+
// Check if each element of the vector is right-shifted by one.
SDValue LHS = In.getOperand(0);
SDValue RHS = In.getOperand(1);
@@ -45659,23 +47033,25 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
};
- auto AVGSplitter = [&](SDValue Op0, SDValue Op1) {
+ auto AVGSplitter = [&](std::array<SDValue, 2> Ops) {
+ for (SDValue &Op : Ops)
+ if (Op.getValueType() != VT)
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
// Pad to a power-of-2 vector, split+apply and extract the original vector.
unsigned NumElemsPow2 = PowerOf2Ceil(NumElems);
EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2);
if (NumElemsPow2 != NumElems) {
- SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT));
- SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT));
- for (unsigned i = 0; i != NumElems; ++i) {
- SDValue Idx = DAG.getIntPtrConstant(i, DL);
- Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx);
- Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx);
+ for (SDValue &Op : Ops) {
+ SmallVector<SDValue, 32> EltsOfOp(NumElemsPow2, DAG.getUNDEF(ScalarVT));
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Idx = DAG.getIntPtrConstant(i, DL);
+ EltsOfOp[i] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op, Idx);
+ }
+ Op = DAG.getBuildVector(Pow2VT, DL, EltsOfOp);
}
- Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0);
- Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1);
}
- SDValue Res =
- SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder);
+ SDValue Res = SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, Ops, AVGBuilder);
if (NumElemsPow2 == NumElems)
return Res;
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
@@ -45685,14 +47061,12 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Take care of the case when one of the operands is a constant vector whose
// element is in the range [1, 256].
if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
- Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
- Operands[0].getOperand(0).getValueType() == VT) {
+ IsZExtLike(Operands[0])) {
// The pattern is detected. Subtract one from the constant vector, then
// demote it and emit X86ISD::AVG instruction.
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
- Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
- return AVGSplitter(Operands[0].getOperand(0), Operands[1]);
+ return AVGSplitter({Operands[0], Operands[1]});
}
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
@@ -45731,15 +47105,12 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Check if Operands[0] and Operands[1] are results of type promotion.
for (int j = 0; j < 2; ++j)
- if (Operands[j].getValueType() != VT) {
- if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
- Operands[j].getOperand(0).getValueType() != VT)
+ if (Operands[j].getValueType() != VT)
+ if (!IsZExtLike(Operands[j]))
return SDValue();
- Operands[j] = Operands[j].getOperand(0);
- }
// The pattern is detected, emit X86ISD::AVG instruction(s).
- return AVGSplitter(Operands[0], Operands[1]);
+ return AVGSplitter({Operands[0], Operands[1]});
}
return SDValue();
@@ -46685,11 +48056,171 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Try to combine the following nodes
+// t29: i64 = X86ISD::Wrapper TargetConstantPool:i64
+// <i32 -2147483648[float -0.000000e+00]> 0
+// t27: v16i32[v16f32],ch = X86ISD::VBROADCAST_LOAD
+// <(load 4 from constant-pool)> t0, t29
+// [t30: v16i32 = bitcast t27]
+// t6: v16i32 = xor t7, t27[t30]
+// t11: v16f32 = bitcast t6
+// t21: v16f32 = X86ISD::VFMULC[X86ISD::VCFMULC] t11, t8
+// into X86ISD::VFCMULC[X86ISD::VFMULC] if possible:
+// t22: v16f32 = bitcast t7
+// t23: v16f32 = X86ISD::VFCMULC[X86ISD::VFMULC] t8, t22
+// t24: v32f16 = bitcast t23
+static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ int CombineOpcode =
+ N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;
+ auto isConjugationConstant = [](const Constant *c) {
+ if (const auto *CI = dyn_cast<ConstantInt>(c)) {
+ APInt ConjugationInt32 = APInt(32, 0x80000000, true);
+ APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);
+ switch (CI->getBitWidth()) {
+ case 16:
+ return false;
+ case 32:
+ return CI->getValue() == ConjugationInt32;
+ case 64:
+ return CI->getValue() == ConjugationInt64;
+ default:
+ llvm_unreachable("Unexpected bit width");
+ }
+ }
+ if (const auto *CF = dyn_cast<ConstantFP>(c))
+ return CF->isNegativeZeroValue();
+ return false;
+ };
+ auto combineConjugation = [&](SDValue &r) {
+ if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) {
+ SDValue XOR = LHS.getOperand(0);
+ if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) {
+ SDValue XORRHS = XOR.getOperand(1);
+ if (XORRHS.getOpcode() == ISD::BITCAST && XORRHS.hasOneUse())
+ XORRHS = XORRHS.getOperand(0);
+ if (XORRHS.getOpcode() == X86ISD::VBROADCAST_LOAD &&
+ XORRHS.getOperand(1).getNumOperands()) {
+ ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0));
+ if (CP && isConjugationConstant(CP->getConstVal())) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));
+ SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);
+ r = DAG.getBitcast(VT, FCMulC);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ };
+ SDValue Res;
+ if (combineConjugation(Res))
+ return Res;
+ std::swap(LHS, RHS);
+ if (combineConjugation(Res))
+ return Res;
+ return Res;
+}
+
+// Try to combine the following nodes:
+// FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C)) to FMA(B, C, A)
+static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ auto AllowContract = [&DAG](const SDNodeFlags &Flags) {
+ return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Flags.hasAllowContract();
+ };
+
+ auto HasNoSignedZero = [&DAG](const SDNodeFlags &Flags) {
+ return DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros();
+ };
+ auto IsVectorAllNegativeZero = [](const SDNode *N) {
+ if (N->getOpcode() != X86ISD::VBROADCAST_LOAD)
+ return false;
+ assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 &&
+ "Unexpected vector type!");
+ if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) {
+ APInt AI = APInt(32, 0x80008000, true);
+ if (const auto *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+ return CI->getValue() == AI;
+ if (const auto *CF = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CF->getValue() == APFloat(APFloat::IEEEsingle(), AI);
+ }
+ return false;
+ };
+
+ if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() ||
+ !AllowContract(N->getFlags()))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ bool IsConj;
+ SDValue FAddOp1, MulOp0, MulOp1;
+ auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
+ &IsVectorAllNegativeZero,
+ &HasNoSignedZero](SDValue N) -> bool {
+ if (!N.hasOneUse() || N.getOpcode() != ISD::BITCAST)
+ return false;
+ SDValue Op0 = N.getOperand(0);
+ unsigned Opcode = Op0.getOpcode();
+ if (Op0.hasOneUse() && AllowContract(Op0->getFlags())) {
+ if ((Opcode == X86ISD::VFMULC || Opcode == X86ISD::VFCMULC)) {
+ MulOp0 = Op0.getOperand(0);
+ MulOp1 = Op0.getOperand(1);
+ IsConj = Opcode == X86ISD::VFCMULC;
+ return true;
+ }
+ if ((Opcode == X86ISD::VFMADDC || Opcode == X86ISD::VFCMADDC) &&
+ ((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) &&
+ HasNoSignedZero(Op0->getFlags())) ||
+ IsVectorAllNegativeZero(Op0->getOperand(2).getNode()))) {
+ MulOp0 = Op0.getOperand(0);
+ MulOp1 = Op0.getOperand(1);
+ IsConj = Opcode == X86ISD::VFCMADDC;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ if (GetCFmulFrom(LHS))
+ FAddOp1 = RHS;
+ else if (GetCFmulFrom(RHS))
+ FAddOp1 = LHS;
+ else
+ return SDValue();
+
+ MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2);
+ FAddOp1 = DAG.getBitcast(CVT, FAddOp1);
+ unsigned NewOp = IsConj ? X86ISD::VFCMADDC : X86ISD::VFMADDC;
+ // FIXME: How do we handle when fast math flags of FADD are different from
+ // CFMUL's?
+ SDValue CFmul =
+ DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags());
+ return DAG.getBitcast(VT, CFmul);
+}
+
/// Do target-specific dag combines on floating-point adds/subs.
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget))
return HOp;
+
+ if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget))
+ return COp;
+
return SDValue();
}
@@ -46922,7 +48453,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
- In, APInt::getAllOnesValue(VT.getVectorNumElements()))) {
+ In, APInt::getAllOnes(VT.getVectorNumElements()))) {
if (*ShAmt == MinSignBits) {
SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
@@ -47178,7 +48709,7 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits()));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
@@ -47498,6 +49029,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
+ return FPLogic;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -47546,9 +49080,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
return combineFneg(N, DAG, DCI, Subtarget);
}
@@ -47562,7 +49093,7 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(NumBits));
+ APInt DemandedMask(APInt::getAllOnes(NumBits));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
@@ -47704,6 +49235,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64) ||
+ (Subtarget.hasFP16() && VT == MVT::f16) ||
(VT.isVector() && TLI.isTypeLegal(VT))))
return SDValue();
@@ -47765,7 +49297,7 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt KnownUndef, KnownZero;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
@@ -48265,6 +49797,9 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
return SDValue();
+ // We don't have CMPP Instruction for vxf16
+ if (N0.getOperand(0).getValueType().getVectorElementType() == MVT::f16)
+ return SDValue();
// We can only do this if the vector size in 256 bits or less.
unsigned Size = VT.getSizeInBits();
if (Size > 256 && Subtarget.useAVX512Regs())
@@ -48366,7 +49901,9 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
}
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
+ if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+ !Subtarget.hasAnyFMA()) &&
+ !(ScalarVT == MVT::f16 && Subtarget.hasFP16()))
return SDValue();
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
@@ -48873,7 +50410,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(NumBits));
+ APInt DemandedMask(APInt::getAllOnes(NumBits));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
@@ -48881,9 +50418,44 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
+ SDValue BasePtr = MemOp->getBasePtr();
+ SDValue Index = MemOp->getIndex();
+ SDValue Scale = MemOp->getScale();
+ SDValue Mask = MemOp->getMask();
+
+ // Attempt to fold an index scale into the scale value directly.
+ // For smaller indices, implicit sext is performed BEFORE scale, preventing
+ // this fold under most circumstances.
+ // TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
+ if ((Index.getOpcode() == X86ISD::VSHLI ||
+ (Index.getOpcode() == ISD::ADD &&
+ Index.getOperand(0) == Index.getOperand(1))) &&
+ isa<ConstantSDNode>(Scale) &&
+ BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) {
+ unsigned ShiftAmt =
+ Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
+ uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ uint64_t NewScaleAmt = ScaleAmt * (1ULL << ShiftAmt);
+ if (isPowerOf2_64(NewScaleAmt) && NewScaleAmt <= 8) {
+ SDValue NewIndex = Index.getOperand(0);
+ SDValue NewScale =
+ DAG.getTargetConstant(NewScaleAmt, SDLoc(N), Scale.getValueType());
+ if (N->getOpcode() == X86ISD::MGATHER)
+ return getAVX2GatherNode(N->getOpcode(), SDValue(N, 0), DAG,
+ MemOp->getOperand(1), Mask,
+ MemOp->getBasePtr(), NewIndex, NewScale,
+ MemOp->getChain(), Subtarget);
+ if (N->getOpcode() == X86ISD::MSCATTER)
+ return getScatterNode(N->getOpcode(), SDValue(N, 0), DAG,
+ MemOp->getOperand(1), Mask, MemOp->getBasePtr(),
+ NewIndex, NewScale, MemOp->getChain(), Subtarget);
+ }
+ }
+
// With vector masks we only demand the upper bit of the mask.
- SDValue Mask = cast<X86MaskedGatherScatterSDNode>(N)->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
@@ -48962,6 +50534,48 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
}
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ // Try to move splat constant adders from the index operand to the base
+ // pointer operand. Taking care to multiply by the scale. We can only do
+ // this when index element type is the same as the pointer type.
+ // Otherwise we need to be sure the math doesn't wrap before the scale.
+ if (Index.getOpcode() == ISD::ADD &&
+ Index.getValueType().getVectorElementType() == PtrVT &&
+ isa<ConstantSDNode>(Scale)) {
+ uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) {
+ BitVector UndefElts;
+ if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) {
+ // FIXME: Allow non-constant?
+ if (UndefElts.none()) {
+ // Apply the scale.
+ APInt Adder = C->getAPIntValue() * ScaleAmt;
+ // Add it to the existing base.
+ Base = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
+ DAG.getConstant(Adder, DL, PtrVT));
+ Index = Index.getOperand(0);
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
+ }
+ }
+
+ // It's also possible base is just a constant. In that case, just
+ // replace it with 0 and move the displacement into the index.
+ if (BV->isConstant() && isa<ConstantSDNode>(Base) &&
+ isOneConstant(Scale)) {
+ SDValue Splat = DAG.getSplatBuildVector(Index.getValueType(), DL, Base);
+ // Combine the constant build_vector and the constant base.
+ Splat = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
+ Index.getOperand(1), Splat);
+ // Add to the LHS of the original Index add.
+ Index = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
+ Index.getOperand(0), Splat);
+ Base = DAG.getConstant(0, DL, Base.getValueType());
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
+ }
+ }
+ }
+
if (DCI.isBeforeLegalizeOps()) {
unsigned IndexWidth = Index.getScalarValueSizeInBits();
@@ -49120,10 +50734,31 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
+ // UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
+ // UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
+ // UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
+ if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+ unsigned ScalarSize = InVT.getScalarSizeInBits();
+ if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ return SDValue();
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+ ScalarSize < 16 ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
+ return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+ }
+
// UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
- if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+ VT.getScalarType() != MVT::f16) {
SDLoc dl(N);
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
@@ -49162,10 +50797,31 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
+ // SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
+ // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
+ // SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
+ if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+ unsigned ScalarSize = InVT.getScalarSizeInBits();
+ if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ return SDValue();
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+ ScalarSize < 16 ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+ }
+
// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+ VT.getScalarType() != MVT::f16) {
SDLoc dl(N);
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
@@ -49244,10 +50900,7 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
- for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
-
+ for (const SDNode *User : Flags->uses()) {
X86::CondCode CC;
switch (User->getOpcode()) {
default:
@@ -49282,10 +50935,7 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
static bool onlyZeroFlagUsed(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
- for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
-
+ for (const SDNode *User : Flags->uses()) {
unsigned CCOpNo;
switch (User->getOpcode()) {
default:
@@ -49534,8 +51184,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// the general case below.
auto *ConstantX = dyn_cast<ConstantSDNode>(X);
if (ConstantX) {
- if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) ||
- (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) {
+ if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
// This is a complicated way to get -1 or 0 from the carry flag:
// -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
// 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
@@ -49544,8 +51194,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
Y.getOperand(1));
}
- if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) ||
- (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) {
+ if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_A && ConstantX->isZero())) {
SDValue EFLAGS = Y->getOperand(1);
if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
@@ -49643,8 +51293,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// fake operands:
// 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
// -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
- if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) ||
- (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) {
+ if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
SDValue Zero = DAG.getConstant(0, DL, ZVT);
SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
@@ -49657,8 +51307,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// with fake operands:
// 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
// -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
- if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) ||
- (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
+ if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
SDValue One = DAG.getConstant(1, DL, ZVT);
SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
@@ -49932,6 +51582,50 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
PMADDBuilder);
}
+// ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
+// If upper element in each pair of both VPMADDWD are zero then we can merge
+// the operand elements and use the implicit add of VPMADDWD.
+// TODO: Add support for VPMADDUBSW (which isn't commutable).
+static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
+ const SDLoc &DL, EVT VT) {
+ if (N0.getOpcode() != N1.getOpcode() || N0.getOpcode() != X86ISD::VPMADDWD)
+ return SDValue();
+
+ // TODO: Add 256/512-bit support once VPMADDWD combines with shuffles.
+ if (VT.getSizeInBits() > 128)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT OpVT = N0.getOperand(0).getSimpleValueType();
+ APInt DemandedBits = APInt::getAllOnes(OpVT.getScalarSizeInBits());
+ APInt DemandedHiElts = APInt::getSplat(2 * NumElts, APInt(2, 2));
+
+ bool Op0HiZero =
+ DAG.MaskedValueIsZero(N0.getOperand(0), DemandedBits, DemandedHiElts) ||
+ DAG.MaskedValueIsZero(N0.getOperand(1), DemandedBits, DemandedHiElts);
+ bool Op1HiZero =
+ DAG.MaskedValueIsZero(N1.getOperand(0), DemandedBits, DemandedHiElts) ||
+ DAG.MaskedValueIsZero(N1.getOperand(1), DemandedBits, DemandedHiElts);
+
+ // TODO: Check for zero lower elements once we have actual codegen that
+ // creates them.
+ if (!Op0HiZero || !Op1HiZero)
+ return SDValue();
+
+ // Create a shuffle mask packing the lower elements from each VPMADDWD.
+ SmallVector<int> Mask;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ Mask.push_back(2 * i);
+ Mask.push_back(2 * (i + NumElts));
+ }
+
+ SDValue LHS =
+ DAG.getVectorShuffle(OpVT, DL, N0.getOperand(0), N1.getOperand(0), Mask);
+ SDValue RHS =
+ DAG.getVectorShuffle(OpVT, DL, N0.getOperand(1), N1.getOperand(1), Mask);
+ return DAG.getNode(X86ISD::VPMADDWD, DL, VT, LHS, RHS);
+}
+
/// CMOV of constants requires materializing constant operands in registers.
/// Try to fold those constants into an 'add' instruction to reduce instruction
/// count. We do this with CMOV rather the generic 'select' because there are
@@ -49961,11 +51655,34 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
if (!isSuitableCmov(Cmov))
return SDValue();
- // add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FalseOp = Cmov.getOperand(0);
SDValue TrueOp = Cmov.getOperand(1);
+
+ // We will push the add through the select, but we can potentially do better
+ // if we know there is another add in the sequence and this is pointer math.
+ // In that case, we can absorb an add into the trailing memory op and avoid
+ // a 3-operand LEA which is likely slower than a 2-operand LEA.
+ // TODO: If target has "slow3OpsLEA", do this even without the trailing memop?
+ if (OtherOp.getOpcode() == ISD::ADD && OtherOp.hasOneUse() &&
+ !isa<ConstantSDNode>(OtherOp.getOperand(0)) &&
+ all_of(N->uses(), [&](SDNode *Use) {
+ auto *MemNode = dyn_cast<MemSDNode>(Use);
+ return MemNode && MemNode->getBasePtr().getNode() == N;
+ })) {
+ // add (cmov C1, C2), add (X, Y) --> add (cmov (add X, C1), (add X, C2)), Y
+ // TODO: We are arbitrarily choosing op0 as the 1st piece of the sum, but
+ // it is possible that choosing op1 might be better.
+ SDValue X = OtherOp.getOperand(0), Y = OtherOp.getOperand(1);
+ FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp);
+ TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp);
+ Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp,
+ Cmov.getOperand(2), Cmov.getOperand(3));
+ return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y);
+ }
+
+ // add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);
TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);
return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),
@@ -49978,13 +51695,16 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ SDLoc DL(N);
if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
return Select;
- if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
+ if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
+ return MAdd;
+ if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, DL, VT, Subtarget))
return MAdd;
- if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
+ if (SDValue MAdd = combineAddOfPMADDWD(DAG, Op0, Op1, DL, VT))
return MAdd;
// Try to synthesize horizontal adds from adds of shuffles.
@@ -50001,7 +51721,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (Op0.getOpcode() == ISD::ZERO_EXTEND &&
Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
TLI.isTypeLegal(Op0.getOperand(0).getValueType())) {
- SDLoc DL(N);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt);
}
@@ -50009,7 +51728,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (Op1.getOpcode() == ISD::ZERO_EXTEND &&
Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
TLI.isTypeLegal(Op1.getOperand(0).getValueType())) {
- SDLoc DL(N);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt);
}
@@ -50018,6 +51736,47 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
return combineAddOrSubToADCOrSBB(N, DAG);
}
+// Try to fold (sub Y, cmovns X, -X) -> (add Y, cmovns -X, X) if the cmov
+// condition comes from the subtract node that produced -X. This matches the
+// cmov expansion for absolute value. By swapping the operands we convert abs
+// to nabs.
+static SDValue combineSubABS(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (N1.getOpcode() != X86ISD::CMOV || !N1.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)N1.getConstantOperandVal(2);
+ if (CC != X86::COND_S && CC != X86::COND_NS)
+ return SDValue();
+
+ // Condition should come from a negate operation.
+ SDValue Cond = N1.getOperand(3);
+ if (Cond.getOpcode() != X86ISD::SUB || !isNullConstant(Cond.getOperand(0)))
+ return SDValue();
+ assert(Cond.getResNo() == 1 && "Unexpected result number");
+
+ // Get the X and -X from the negate.
+ SDValue NegX = Cond.getValue(0);
+ SDValue X = Cond.getOperand(1);
+
+ SDValue FalseOp = N1.getOperand(0);
+ SDValue TrueOp = N1.getOperand(1);
+
+ // Cmov operands should be X and NegX. Order doesn't matter.
+ if (!(TrueOp == X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp == X))
+ return SDValue();
+
+ // Build a new CMOV with the operands swapped.
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,
+ N1.getOperand(2), Cond);
+ // Convert sub to add.
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);
+}
+
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -50049,6 +51808,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd);
}
+ if (SDValue V = combineSubABS(N, DAG))
+ return V;
+
// Try to synthesize horizontal subs from subs of shuffles.
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;
@@ -50099,43 +51861,30 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (Op0.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
- // If this scalar/subvector broadcast_load is inserted into both halves, use
- // a larger broadcast_load. Update other uses to use an extracted subvector.
- if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ // If this simple subvector or scalar/subvector broadcast_load is inserted
+ // into both halves, use a larger broadcast_load. Update other uses to use
+ // an extracted subvector.
+ if (ISD::isNormalLoad(Op0.getNode()) ||
+ Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
- auto *MemIntr = cast<MemIntrinsicSDNode>(Op0);
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()};
- SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops,
- MemIntr->getMemoryVT(),
- MemIntr->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(
- Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
- DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
- return BcastLd;
- }
-
- // If this is a simple subvector load repeated across multiple lanes, then
- // broadcast the load. Update other uses to use an extracted subvector.
- if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) {
- if (Ld->isSimple() && !Ld->isNonTemporal() &&
- Ld->getExtensionType() == ISD::NON_EXTLOAD) {
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
- SDValue BcastLd =
- DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops,
- Ld->getMemoryVT(), Ld->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(
- Op0,
- extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
+ auto *Mem = cast<MemSDNode>(Op0);
+ unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD
+ ? X86ISD::VBROADCAST_LOAD
+ : X86ISD::SUBV_BROADCAST_LOAD;
+ if (SDValue BcastLd =
+ getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) {
+ SDValue BcastSrc =
+ extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits());
+ DAG.ReplaceAllUsesOfValueWith(Op0, BcastSrc);
return BcastLd;
}
}
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
- (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
+ (Subtarget.hasAVX2() ||
+ X86::mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0),
+ VT.getScalarType(), Subtarget)))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
Op0.getOperand(0),
@@ -50144,7 +51893,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Subtarget.hasAVX2() ||
- (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
+ (EltSizeInBits >= 32 &&
+ X86::mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
@@ -50773,7 +52523,7 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
// TODO: SimplifyDemandedBits instead?
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
- if (C->getAPIntValue().isOneValue())
+ if (C->getAPIntValue().isOne())
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
Src.getOperand(0));
@@ -50782,7 +52532,7 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
- if (C->isNullValue())
+ if (C->isZero())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
Src.getOperand(1));
@@ -50851,7 +52601,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(64), DCI))
return SDValue(N, 0);
// If the input is an extend_invec and the SimplifyDemandedBits call didn't
@@ -50885,6 +52635,29 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Simplify VPMADDUBSW/VPMADDWD operations.
+static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Multiply by zero.
+ // Don't return LHS/RHS as it may contain UNDEFs.
+ if (ISD::isBuildVectorAllZeros(LHS.getNode()) ||
+ ISD::isBuildVectorAllZeros(RHS.getNode()))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -50950,7 +52723,7 @@ static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
@@ -50988,6 +52761,9 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
+ if (Subtarget.hasFP16())
+ return SDValue();
+
bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
@@ -51096,6 +52872,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
+ if (Subtarget.hasFP16())
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
@@ -51156,8 +52935,7 @@ static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBits), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBits), DCI))
return SDValue(N, 0);
return SDValue();
@@ -51215,6 +52993,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
+ case X86ISD::VFCMULC:
+ case X86ISD::VFMULC: return combineFMulcFCMulc(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI);
@@ -51289,6 +53069,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
case X86ISD::VBROADCAST:
case X86ISD::VPPERM:
case X86ISD::VPERMI:
@@ -51319,13 +53100,16 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI);
case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
case X86ISD::MGATHER:
- case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI);
+ case X86ISD::MSCATTER:
+ return combineX86GatherScatter(N, DAG, DCI, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI);
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
+ case X86ISD::VPMADDUBSW:
+ case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
case X86ISD::KSHIFTL:
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
@@ -51451,7 +53235,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
- if (MayFoldLoad(N0) && IsFoldableRMW(N0, Op))
+ if (X86::mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
return false;
break;
}
@@ -51466,11 +53250,11 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
// Avoid disabling potential load folding opportunities.
- if (MayFoldLoad(N1) &&
+ if (X86::mayFoldLoad(N1, Subtarget) &&
(!Commute || !isa<ConstantSDNode>(N0) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op))))
return false;
- if (MayFoldLoad(N0) &&
+ if (X86::mayFoldLoad(N0, Subtarget) &&
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
@@ -51510,13 +53294,13 @@ static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
- if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
- std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
- std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
+ if (llvm::is_contained(AsmPieces, "~{cc}") &&
+ llvm::is_contained(AsmPieces, "~{flags}") &&
+ llvm::is_contained(AsmPieces, "~{fpsr}")) {
if (AsmPieces.size() == 3)
return true;
- else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
+ else if (llvm::is_contained(AsmPieces, "~{dirflag}"))
return true;
}
}
@@ -52041,7 +53825,8 @@ static bool isGRClass(const TargetRegisterClass &RC) {
/// Check if \p RC is a vector register class.
/// I.e., FR* / VR* or one of their variant.
static bool isFRClass(const TargetRegisterClass &RC) {
- return RC.hasSuperClassEq(&X86::FR32XRegClass) ||
+ return RC.hasSuperClassEq(&X86::FR16XRegClass) ||
+ RC.hasSuperClassEq(&X86::FR32XRegClass) ||
RC.hasSuperClassEq(&X86::FR64XRegClass) ||
RC.hasSuperClassEq(&X86::VR128XRegClass) ||
RC.hasSuperClassEq(&X86::VR256XRegClass) ||
@@ -52166,6 +53951,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (VT.SimpleTy) {
default: break;
// Scalar SSE types.
+ case MVT::f16:
+ if (VConstraint && Subtarget.hasFP16())
+ return std::make_pair(0U, &X86::FR16XRegClass);
+ break;
case MVT::f32:
case MVT::i32:
if (VConstraint && Subtarget.hasVLX())
@@ -52184,6 +53973,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
break;
// Vector types and fp128.
+ case MVT::v8f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
@@ -52195,6 +53988,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::VR128XRegClass);
return std::make_pair(0U, &X86::VR128RegClass);
// AVX types.
+ case MVT::v16f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -52206,6 +54003,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasAVX())
return std::make_pair(0U, &X86::VR256RegClass);
break;
+ case MVT::v32f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v64i8:
case MVT::v32i16:
case MVT::v8f64:
@@ -52235,12 +54036,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (VT.SimpleTy) {
default: break;
// Scalar SSE types.
+ case MVT::f16:
+ if (!Subtarget.hasFP16())
+ break;
+ return std::make_pair(X86::XMM0, &X86::FR16XRegClass);
case MVT::f32:
case MVT::i32:
return std::make_pair(X86::XMM0, &X86::FR32RegClass);
case MVT::f64:
case MVT::i64:
return std::make_pair(X86::XMM0, &X86::FR64RegClass);
+ case MVT::v8f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
@@ -52250,6 +54059,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v2f64:
return std::make_pair(X86::XMM0, &X86::VR128RegClass);
// AVX types.
+ case MVT::v16f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -52259,6 +54072,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasAVX())
return std::make_pair(X86::YMM0, &X86::VR256RegClass);
break;
+ case MVT::v32f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v64i8:
case MVT::v32i16:
case MVT::v8f64:
@@ -52416,7 +54233,9 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// find, ignoring the required type.
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
- if (VT == MVT::f32 || VT == MVT::i32)
+ if (VT == MVT::f16)
+ Res.second = &X86::FR16XRegClass;
+ else if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32XRegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64XRegClass;
@@ -52489,7 +54308,7 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
+ bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
return OptSize && !VT.isVector();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 869857bcc0d6..6805cb75f0f2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
@@ -460,6 +461,7 @@ namespace llvm {
MOVHLPS,
MOVSD,
MOVSS,
+ MOVSH,
UNPCKL,
UNPCKH,
VPERMILPV,
@@ -564,6 +566,27 @@ namespace llvm {
FMADDSUB_RND,
FMSUBADD_RND,
+ // AVX512-FP16 complex addition and multiplication.
+ VFMADDC,
+ VFMADDC_RND,
+ VFCMADDC,
+ VFCMADDC_RND,
+
+ VFMULC,
+ VFMULC_RND,
+ VFCMULC,
+ VFCMULC_RND,
+
+ VFMADDCSH,
+ VFMADDCSH_RND,
+ VFCMADDCSH,
+ VFCMADDCSH_RND,
+
+ VFMULCSH,
+ VFMULCSH_RND,
+ VFCMULCSH,
+ VFCMULCSH_RND,
+
// Compress and expand.
COMPRESS,
EXPAND,
@@ -627,12 +650,8 @@ namespace llvm {
// packed single precision.
DPBF16PS,
- // Save xmm argument registers to the stack, according to %al. An operator
- // is needed so that this can be expanded with control flow.
- VASTART_SAVE_XMM_REGS,
-
- // Windows's _chkstk call to do stack probing.
- WIN_ALLOCA,
+ // A stack checking function call. On Windows it's _chkstk call.
+ DYN_ALLOCA,
// For allocating variable amounts of stack space when using
// segmented stacks. Check if the current stacklet has enough space, and
@@ -848,6 +867,10 @@ namespace llvm {
AESENCWIDE256KL,
AESDECWIDE256KL,
+ // Save xmm argument registers to the stack, according to %al. An operator
+ // is needed so that this can be expanded with control flow.
+ VASTART_SAVE_XMM_REGS,
+
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
@@ -888,6 +911,25 @@ namespace llvm {
/// as zero if AllowPartialUndefs is set, else we fail and return false.
bool isConstantSplat(SDValue Op, APInt &SplatVal,
bool AllowPartialUndefs = true);
+
+ /// Check if Op is a load operation that could be folded into some other x86
+ /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
+ bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
+ bool AssumeSingleUse = false);
+
+ /// Check if Op is a load operation that could be folded into a vector splat
+ /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
+ bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
+ const X86Subtarget &Subtarget,
+ bool AssumeSingleUse = false);
+
+ /// Check if Op is a value that could be used to fold a store into some
+ /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
+ bool mayFoldIntoStore(SDValue Op);
+
+ /// Check if Op is an operation that could be folded into a zero extend x86
+ /// instruction.
+ bool mayFoldIntoZeroExtend(SDValue Op);
} // end namespace X86
//===--------------------------------------------------------------------===//
@@ -923,7 +965,7 @@ namespace llvm {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
- unsigned getByValTypeAlignment(Type *Ty,
+ uint64_t getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
EVT getOptimalMemOpType(const MemOp &Op,
@@ -989,7 +1031,7 @@ namespace llvm {
}
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const override;
+ const MachineFunction &MF) const override;
bool isCheapToSpeculateCttz() const override;
@@ -998,7 +1040,8 @@ namespace llvm {
bool isCtlzFast() const override;
bool hasBitPreservingFPLogic(EVT VT) const override {
- return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
+ return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
+ (VT == MVT::f16 && X86ScalarSSEf16);
}
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
@@ -1282,7 +1325,8 @@ namespace llvm {
/// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
+ (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
}
/// Returns true if it is beneficial to convert a load of a constant
@@ -1442,6 +1486,7 @@ namespace llvm {
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf32;
bool X86ScalarSSEf64;
+ bool X86ScalarSSEf16;
/// A list of legal FP immediates.
std::vector<APFloat> LegalFPImmediates;
@@ -1472,16 +1517,11 @@ namespace llvm {
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
- bool IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
SDValue Chain, bool IsTailCall,
bool Is64Bit, int FPDiff,
@@ -1540,6 +1580,9 @@ namespace llvm {
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
+ SDValue &Chain) const;
+ SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 85410c54a4d2..732b2b1a5ada 100644
--- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -92,7 +92,7 @@ static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
if (!CalleeFn)
return false;
AttributeList Attrs = CalleeFn->getAttributes();
- return Attrs.hasFnAttribute(Attribute::ReturnsTwice);
+ return Attrs.hasFnAttr(Attribute::ReturnsTwice);
}
bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
index 3d96d198b409..e08b4b7c03c6 100644
--- a/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -212,7 +212,7 @@ void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+ const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;
Entry->addLiveIn(ThunkReg);
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp
index 56d2709f5937..69a3d32a9314 100644
--- a/llvm/lib/Target/X86/X86InsertWait.cpp
+++ b/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -55,23 +55,6 @@ char WaitInsert::ID = 0;
FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); }
-/// Return true if the Reg is X87 register.
-static bool isX87Reg(unsigned Reg) {
- return (Reg == X86::FPCW || Reg == X86::FPSW ||
- (Reg >= X86::ST0 && Reg <= X86::ST7));
-}
-
-/// check if the instruction is X87 instruction
-static bool isX87Instruction(MachineInstr &MI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- if (isX87Reg(MO.getReg()))
- return true;
- }
- return false;
-}
-
static bool isX87ControlInstruction(MachineInstr &MI) {
switch (MI.getOpcode()) {
case X86::FNINIT:
@@ -121,7 +104,7 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
// Jump non X87 instruction.
- if (!isX87Instruction(*MI))
+ if (!X86::isX87Instruction(*MI))
continue;
// If the instruction instruction neither has float exception nor is
// a load/store instruction, or the instruction is x87 control
@@ -132,7 +115,7 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
// If the following instruction is an X87 instruction and isn't an X87
// non-waiting control instruction, we can omit insert wait instruction.
MachineBasicBlock::iterator AfterMI = std::next(MI);
- if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) &&
+ if (AfterMI != MBB.end() && X86::isX87Instruction(*AfterMI) &&
!isX87NonWaitingControlInstruction(*AfterMI))
continue;
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 7d9466f0d181..ff8710634e89 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -239,7 +239,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
KnownBits KnownUpperBits = llvm::computeKnownBits(
Amt, DemandedUpper, II.getModule()->getDataLayout());
if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
- (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) {
+ (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
SmallVector<int, 16> ZeroSplat(VWidth, 0);
Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
@@ -269,7 +269,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
}
// If shift-by-zero then just return the original value.
- if (Count.isNullValue())
+ if (Count.isZero())
return Vec;
// Handle cases when Shift >= BitWidth.
@@ -476,7 +476,7 @@ static Value *simplifyX86pack(IntrinsicInst &II,
// PACKUS: Truncate signed value with unsigned saturation.
// Source values less than zero are saturated to zero.
// Source values greater than dst maxuint are saturated to maxuint.
- MinValue = APInt::getNullValue(SrcScalarSizeInBits);
+ MinValue = APInt::getZero(SrcScalarSizeInBits);
MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
}
@@ -1764,7 +1764,7 @@ Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
// we know that DemandedMask is non-zero already.
APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
Type *VTy = II.getType();
- if (DemandedElts.isNullValue()) {
+ if (DemandedElts.isZero()) {
return ConstantInt::getNullValue(VTy);
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index dd61d91c3a62..8aee96e1c504 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -44,8 +44,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// It is a little bit complex for scalar types, where NumElts = 1.
// In this case we build v4f32 or v2f64
string VTName = "v" # !if (!eq (NumElts, 1),
+ !if (!eq (EltVT.Size, 16), 8,
!if (!eq (EltVT.Size, 32), 4,
- !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
+ !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
// The vector VT.
ValueType VT = !cast<ValueType>(VTName);
@@ -65,8 +66,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
// FP scalar memory operand for intrinsics - ssmem/sdmem.
- Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
- !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
+ Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
+ !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
+ !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
// Load patterns
PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
@@ -76,11 +78,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
- PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
- !cast<PatFrags>("sse_load_f32"),
- !if (!eq (EltTypeName, "f64"),
- !cast<PatFrags>("sse_load_f64"),
- ?));
+ PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
+ !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
@@ -95,9 +95,12 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
- SSEPackedInt));
+ !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
+ SSEPackedInt)));
- RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
+ RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
+ !if (!eq (EltTypeName, "f16"), FR16X,
+ FR64X));
dag ImmAllZerosV = (VT immAllZerosV);
@@ -109,6 +112,7 @@ def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
+def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
@@ -117,6 +121,7 @@ def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
+def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
@@ -124,6 +129,7 @@ def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
+def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
@@ -131,6 +137,7 @@ def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
// with the appropriate element type. This allows to use the same masking logic.
def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
+def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
@@ -149,6 +156,8 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
v4i32x_info>;
def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
v2i64x_info>;
+def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
+ v8f16x_info>;
def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
v4f32x_info>;
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
@@ -196,8 +205,9 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
string MaskingConstraint = "",
bit IsCommutable = 0,
bit IsKCommutable = 0,
- bit IsKZCommutable = IsCommutable> {
- let isCommutable = IsCommutable in
+ bit IsKZCommutable = IsCommutable,
+ string ClobberConstraint = ""> {
+ let isCommutable = IsCommutable, Constraints = ClobberConstraint in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
@@ -211,12 +221,15 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
MaskingPattern>,
EVEX_K {
// In case of the 3src subclass this is overridden with a let.
- string Constraints = MaskingConstraint;
+ string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
+ !if(!eq(MaskingConstraint, ""), ClobberConstraint,
+ !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
}
// Zero mask does not add any restrictions to commute operands transformation.
// So, it is Ok to use IsCommutable instead of IsKCommutable.
- let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
+ let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
+ Constraints = ClobberConstraint in
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
@@ -236,7 +249,8 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
string MaskingConstraint = "",
bit IsCommutable = 0,
bit IsKCommutable = 0,
- bit IsKZCommutable = IsCommutable> :
+ bit IsKZCommutable = IsCommutable,
+ string ClobberConstraint = ""> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
@@ -244,7 +258,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
[(set _.RC:$dst,
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
MaskingConstraint, IsCommutable,
- IsKCommutable, IsKZCommutable>;
+ IsKCommutable, IsKZCommutable, ClobberConstraint>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -254,6 +268,7 @@ multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskRHS,
+ string ClobberConstraint = "",
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable> :
AVX512_maskable_custom<O, F, Outs, Ins,
@@ -266,7 +281,7 @@ multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
[(set _.RC:$dst,
(vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
"$src0 = $dst", IsCommutable, IsKCommutable,
- IsKZCommutable>;
+ IsKZCommutable, ClobberConstraint>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -277,14 +292,15 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
dag RHS,
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
- SDPatternOperator Select = vselect_mask> :
+ SDPatternOperator Select = vselect_mask,
+ string ClobberConstraint = ""> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(Select _.KRCWM:$mask, RHS, _.RC:$src0),
Select, "$src0 = $dst", IsCommutable, IsKCommutable,
- IsKZCommutable>;
+ IsKZCommutable, ClobberConstraint>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the scalar instruction.
@@ -501,6 +517,12 @@ def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
}
+let Predicates = [HasFP16] in {
+def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
+}
+
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
@@ -513,6 +535,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
[(set VR128X:$dst, fp128imm0)]>;
}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
+ def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
+ [(set FR16X:$dst, fp16imm0)]>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
@@ -649,16 +677,22 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
// Codegen pattern with the alternative types insert VEC128 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
// Codegen pattern with the alternative types insert VEC256 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
@@ -944,17 +978,23 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
// Codegen pattern with the alternative types extract VEC256 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
@@ -1015,6 +1055,12 @@ def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
(iPTR 1)))>;
}
+let Predicates = [HasFP16, HasVLX] in
+def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
+ (v8f16 (VEXTRACTF32x4Z256rr
+ (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
+ (iPTR 1)))>;
+
// Additional patterns for handling a bitcast between the vselect and the
// extract_subvector.
@@ -1140,9 +1186,8 @@ def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
// AVX-512 BROADCAST
//---
// broadcast with a scalar argument.
-multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
- string Name,
- X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
(!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
@@ -1162,7 +1207,6 @@ multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
// Split version to allow mask and broadcast node to be different types. This
// helps support the 32x2 broadcasts.
multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
- string Name,
SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo MaskInfo,
X86VectorVTInfo DestInfo,
@@ -1251,54 +1295,49 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
}
// Helper class to force mask and broadcast result to same type.
-multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo,
bit IsConvertibleToThreeAddress> :
- avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
+ avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
DestInfo, DestInfo, SrcInfo,
IsConvertibleToThreeAddress>;
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _> {
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info512, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info512, _.info128>,
EVEX_V512;
}
let Predicates = [HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info256, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info256, _.info128>,
EVEX_V256;
}
}
multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _> {
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info512, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info512, _.info128>,
EVEX_V512;
}
let Predicates = [HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info256, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info256, _.info128>,
EVEX_V256;
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info128, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info128, _.info128>,
EVEX_V128;
}
}
@@ -1384,20 +1423,20 @@ defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
X86VBroadcast, GR64, HasAVX512>, VEX_W;
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _, Predicate prd,
- bit IsConvertibleToThreeAddress> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ bit IsConvertibleToThreeAddress> {
let Predicates = [prd] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _.info512, _.info128,
IsConvertibleToThreeAddress>,
EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _.info256, _.info128,
IsConvertibleToThreeAddress>,
EVEX_V256;
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
WriteShuffleXLd, _.info128, _.info128,
IsConvertibleToThreeAddress>,
EVEX_V128;
@@ -1439,6 +1478,31 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZrm addr:$src)>;
+
+ def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
+ (VPBROADCASTWZrr VR128X:$src)>;
+ def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
+ (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
+}
+let Predicates = [HasVLX, HasFP16] in {
+ def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZ128rm addr:$src)>;
+ def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZ256rm addr:$src)>;
+
+ def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
+ (VPBROADCASTWZ128rr VR128X:$src)>;
+ def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
+ (VPBROADCASTWZ256rr VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
+ (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
+ def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
+ (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
+}
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
@@ -1462,6 +1526,8 @@ def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTF64X4rm addr:$src)>;
+def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
+ (VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTI64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
@@ -1475,6 +1541,8 @@ def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4rm addr:$src)>;
+def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
@@ -1532,6 +1600,8 @@ def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4Z256rm addr:$src)>;
+def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4Z256rm addr:$src)>;
def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
@@ -1638,25 +1708,27 @@ def : Pat<(vselect_mask VK8WM:$mask,
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src> {
let Predicates = [HasDQI] in
- defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _Dst.info512,
_Src.info512, _Src.info128, 0, null_frag, null_frag>,
EVEX_V512;
let Predicates = [HasDQI, HasVLX] in
- defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _Dst.info256,
_Src.info256, _Src.info128, 0, null_frag, null_frag>,
EVEX_V256;
}
multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src> :
avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
let Predicates = [HasDQI, HasVLX] in
- defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
+ defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
WriteShuffleXLd, _Dst.info128,
_Src.info128, _Src.info128, 0, null_frag, null_frag>,
EVEX_V128;
@@ -2099,6 +2171,10 @@ let Predicates = [HasAVX512] in {
X86cmpms_su, X86cmpmsSAE_su,
SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
+let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
+ defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
+ X86cmpms_su, X86cmpmsSAE_su,
+ SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
@@ -2561,13 +2637,14 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
EVEX_B, Sched<[sched]>;
}
-multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
- let Predicates = [HasAVX512] in {
+multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
+ Predicate Pred = HasAVX512> {
+ let Predicates = [Pred] in {
defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
}
- let Predicates = [HasAVX512,HasVLX] in {
+ let Predicates = [Pred,HasVLX] in {
defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
}
@@ -2577,18 +2654,23 @@ defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
+ AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
- def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
- timm:$cc)),
+ def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
(VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
- def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
- timm:$cc)),
+ def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
(VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
+ (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
+}
+
// ----------------------------------------------------------------
// FPClass
@@ -2736,24 +2818,28 @@ multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
}
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
- bits<8> opcScalar, X86SchedWriteWidths sched,
- Predicate prd> {
+ bits<8> opcScalar, X86SchedWriteWidths sched> {
+ defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
+ sched, HasFP16>,
+ EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
+ defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
+ sched.Scl, f16x_info, HasFP16>,
+ EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
- sched, prd>,
- EVEX_CD8<32, CD8VF>;
+ sched, HasDQI>,
+ EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
- sched, prd>,
- EVEX_CD8<64, CD8VF> , VEX_W;
+ sched, HasDQI>,
+ EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
- sched.Scl, f32x_info, prd>, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
+ sched.Scl, f32x_info, HasDQI>, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
- sched.Scl, f64x_info, prd>, VEX_LIG,
- EVEX_CD8<64, CD8VT1>, VEX_W;
+ sched.Scl, f64x_info, HasDQI>, VEX_LIG,
+ EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
}
-defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
- HasDQI>, AVX512AIi8Base, EVEX;
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
//-----------------------------------------------------------------
// Mask register copy, including
@@ -3766,6 +3852,110 @@ let Predicates = [HasVLX] in {
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
+ def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
+ (VMOVAPSZrm addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (loadv32f16 addr:$src)),
+ (VMOVUPSZrm addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+
+ def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
+ (VMOVAPSZmr addr:$dst, VR512:$src)>;
+ def : Pat<(store (v32f16 VR512:$src), addr:$dst),
+ (VMOVUPSZmr addr:$dst, VR512:$src)>;
+ def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
+ (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
+}
+let Predicates = [HasFP16, HasVLX] in {
+ def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
+ def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
+ (VMOVAPSZ256rm addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (loadv16f16 addr:$src)),
+ (VMOVUPSZ256rm addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+
+ def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
+ (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
+ def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
+ (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
+ def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
+ (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
+ def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
+ (VMOVAPSZ128rm addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (loadv8f16 addr:$src)),
+ (VMOVUPSZ128rm addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+
+ def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
+ def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
+ def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
+ (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
+}
// Move Int Doubleword to Packed Double Int
//
@@ -3905,12 +4095,13 @@ def : Pat<(f64 (bitconvert VK64:$src)),
(VMOV64toSDZrr (KMOVQrk VK64:$src))>;
//===----------------------------------------------------------------------===//
-// AVX-512 MOVSS, MOVSD
+// AVX-512 MOVSH, MOVSS, MOVSD
//===----------------------------------------------------------------------===//
multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
- X86VectorVTInfo _> {
- let Predicates = [HasAVX512, OptForSize] in
+ X86VectorVTInfo _,
+ list<Predicate> prd = [HasAVX512, OptForSize]> {
+ let Predicates = prd in
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3976,6 +4167,9 @@ defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
+ [HasFP16]>,
+ VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
PatLeaf ZeroFP, X86VectorVTInfo _> {
@@ -4144,9 +4338,14 @@ def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
addr:$srcAddr)>;
}
+defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
+defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -4154,6 +4353,13 @@ defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (insert_subvector
+ (v32i1 immAllZerosV),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ GR8, sub_8bit>;
defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (insert_subvector
(v16i1 immAllZerosV),
@@ -4179,6 +4385,10 @@ defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
(iPTR 0))), GR8, sub_8bit>;
+defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -4186,6 +4396,13 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (insert_subvector
+ (v32i1 immAllZerosV),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ GR8, sub_8bit>;
defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (insert_subvector
(v16i1 immAllZerosV),
@@ -4211,6 +4428,16 @@ defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
(iPTR 0))), GR8, sub_8bit>;
+def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
+ (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
+ VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
+
+def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
+ (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
+
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
(v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
@@ -4259,6 +4486,32 @@ def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZer
(VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+ let Predicates = [HasFP16] in {
+ def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, T_MAP5XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSHZrr">,
+ Sched<[SchedWriteFShuffle.XMM]>;
+
+ let Constraints = "$src0 = $dst" in
+ def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
+ VR128X:$src1, VR128X:$src2),
+ "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
+ "$dst {${mask}}, $src1, $src2}",
+ []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSHZrrk">,
+ Sched<[SchedWriteFShuffle.XMM]>;
+
+ def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
+ "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, $src1, $src2}",
+ []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSHZrrkz">,
+ Sched<[SchedWriteFShuffle.XMM]>;
+ }
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4311,6 +4564,16 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
Sched<[SchedWriteFShuffle.XMM]>;
}
+def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
+def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
+ "$dst {${mask}}, $src1, $src2}",
+ (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
+ VR128X:$src1, VR128X:$src2), 0>;
+def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, $src1, $src2}",
+ (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
+ VR128X:$src1, VR128X:$src2), 0>;
def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
@@ -4393,6 +4656,29 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
+ (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
+
+ // FIXME we need better canonicalization in dag combine
+ def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
+ (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
+ def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
+ (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
+
+ def : Pat<(v8f16 (X86vzload16 addr:$src)),
+ (VMOVSHZrm addr:$src)>;
+
+ def : Pat<(v16f16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
+
+ def : Pat<(v32f16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
+}
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
@@ -5295,8 +5581,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode VecNode, X86FoldableSchedWrite sched,
- bit IsCommutable = 0> {
+ SDNode VecNode, X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@@ -5357,13 +5642,19 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
sched.PS.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
- sched.PS.Scl, IsCommutable>,
+ sched.PS.Scl>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
sched.PD.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
- sched.PD.Scl, IsCommutable>,
+ sched.PD.Scl>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ let Predicates = [HasFP16] in
+ defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
+ VecNode, sched.PH.Scl, IsCommutable>,
+ avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
+ sched.PH.Scl>,
+ T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5377,6 +5668,13 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
VecNode, SaeNode, sched.PD.Scl, IsCommutable,
NAME#"SD">,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ let Predicates = [HasFP16] in {
+ defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
+ VecNode, SaeNode, sched.PH.Scl, IsCommutable,
+ NAME#"SH">,
+ T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
+ NotEVEX2VEXConvertible;
+ }
}
defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
SchedWriteFAddSizes, 1>;
@@ -5432,47 +5730,60 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
+defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
+ SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
+ EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
+ NotEVEX2VEXConvertible;
+defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
+ SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
+ EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
+ NotEVEX2VEXConvertible;
+
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDPatternOperator MaskOpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable,
- bit IsKCommutable = IsCommutable> {
+ bit IsKCommutable = IsCommutable,
+ string suffix = _.Suffix,
+ string ClobberConstraint = "",
+ bit MayRaiseFPException = 1> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0,
- Uses = [MXCSR], mayRaiseFPException = 1 in {
+ Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
- (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
- IsKCommutable, IsKCommutable>,
- EVEX_4V, Sched<[sched]>;
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
+ IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
let mayLoad = 1 in {
defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
- (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
+ (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
+ ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
- "${src2}"#_.BroadcastStr#", $src1",
- "$src1, ${src2}"#_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
- (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
- EVEX_4V, EVEX_B,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
+ (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
+ ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNodeRnd,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+ X86FoldableSchedWrite sched, X86VectorVTInfo _,
+ string suffix = _.Suffix,
+ string ClobberConstraint = ""> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
- (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
+ (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
+ 0, 0, 0, vselect_mask, ClobberConstraint>,
EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
@@ -5519,9 +5830,32 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
}
}
+multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode,
+ X86SchedWriteSizes sched, bit IsCommutable = 0> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
+ sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasFP16] in {
+ defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
+ sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
+ sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
let Uses = [MXCSR] in
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
+ v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ }
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -5533,6 +5867,11 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
let Uses = [MXCSR] in
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
+ v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ }
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -5543,26 +5882,36 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
SchedWriteFAddSizes, 1>,
+ avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
SchedWriteFMulSizes, 1>,
+ avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
SchedWriteFAddSizes>,
+ avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
SchedWriteFDivSizes>,
+ avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
SchedWriteFCmpSizes, 0>,
+ avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
SchedWriteFCmpSizes, 0>,
+ avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
+ SchedWriteFCmpSizes, 1>,
+ avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
SchedWriteFCmpSizes, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
+ SchedWriteFCmpSizes, 1>,
+ avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
SchedWriteFCmpSizes, 1>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
@@ -5616,43 +5965,57 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
+ EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
+ EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
+ }
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
+ EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
- EVEX_V128, EVEX_CD8<32, CD8VF>;
+ EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
- EVEX_V256, EVEX_CD8<32, CD8VF>;
+ EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ }
+
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
+ EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
+ defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
+ EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
- SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
+ SchedWriteFAdd>, NotEVEX2VEXConvertible;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
- X86FoldableSchedWrite sched, X86VectorVTInfo _,
- string Name> {
+ X86FoldableSchedWrite sched, X86VectorVTInfo _> {
// NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
// There are just too many permutations due to commutability and bitcasts.
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
@@ -5687,13 +6050,13 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
+ defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
+ defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
- defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
+ defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
}
}
@@ -5710,20 +6073,20 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in {
defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
- v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
+ v32i16_info>, EVEX_V512, VEX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
- v64i8_info, NAME#"B">, EVEX_V512;
+ v64i8_info>, EVEX_V512;
}
- let Predicates = [HasVLX, HasBWI] in {
+ let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
- v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
+ v16i16x_info>, EVEX_V256, VEX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
- v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
+ v8i16x_info>, EVEX_V128, VEX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
- v32i8x_info, NAME#"B">, EVEX_V256;
+ v32i8x_info>, EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
- v16i8x_info, NAME#"B">, EVEX_V128;
+ v16i8x_info>, EVEX_V128;
}
}
@@ -6392,7 +6755,7 @@ let Predicates = [HasAVX512] in {
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6400,14 +6763,14 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6417,13 +6780,13 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
(MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6431,38 +6794,42 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd,
X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _, string Suff> {
- let Predicates = [HasAVX512] in {
+ AVX512VLVectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, _.info512, Suff>,
+ sched.ZMM, _.info512>,
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
- _.info512, Suff>,
+ _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
- let Predicates = [HasVLX, HasAVX512] in {
+ let Predicates = [HasVLX, prd] in {
defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, _.info256, Suff>,
+ sched.YMM, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, _.info128, Suff>,
+ sched.XMM, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd> {
+ defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f16_info, HasFP16>, T_MAP6PD;
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info, "PS">;
+ avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info, "PD">, VEX_W;
+ avx512vl_f64_info>, T8PD, VEX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
@@ -6481,7 +6848,7 @@ defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6489,14 +6856,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(null_frag),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6507,14 +6874,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
+ _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6522,38 +6889,42 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(null_frag),
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
- 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd,
X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _, string Suff> {
- let Predicates = [HasAVX512] in {
+ AVX512VLVectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, _.info512, Suff>,
+ sched.ZMM, _.info512>,
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
- _.info512, Suff>,
+ _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
- let Predicates = [HasVLX, HasAVX512] in {
+ let Predicates = [HasVLX, prd] in {
defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, _.info256, Suff>,
+ sched.YMM, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, _.info128, Suff>,
+ sched.XMM, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f16_info, HasFP16>, T_MAP6PD;
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info, "PS">;
+ avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info, "PD">, VEX_W;
+ avx512vl_f64_info>, T8PD, VEX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
@@ -6571,7 +6942,7 @@ defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6579,7 +6950,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(null_frag),
(_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6588,7 +6959,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6600,13 +6971,13 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6614,38 +6985,42 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(null_frag),
(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
- 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd,
X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _, string Suff> {
- let Predicates = [HasAVX512] in {
+ AVX512VLVectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, _.info512, Suff>,
+ sched.ZMM, _.info512>,
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
- _.info512, Suff>,
+ _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
- let Predicates = [HasVLX, HasAVX512] in {
+ let Predicates = [HasVLX, prd] in {
defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, _.info256, Suff>,
+ sched.YMM, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, _.info128, Suff>,
+ sched.XMM, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f16_info, HasFP16>, T_MAP6PD;
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info, "PS">;
+ avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info, "PD">, VEX_W;
+ avx512vl_f64_info>, T8PD, VEX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
@@ -6668,39 +7043,39 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
+ EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
let mayLoad = 1 in
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
+ EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
let isCodeGenOnly = 1, isCommutable = 1 in {
- def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
+ def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
- def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
+ !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
+ def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
let Uses = [MXCSR] in
- def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
+ def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
!strconcat(OpcodeStr,
"\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
!if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
- Sched<[SchedWriteFMA.Scl]>;
+ Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
}// isCodeGenOnly = 1
}// Constraints = "$src1 = $dst"
}
@@ -6744,10 +7119,15 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f32x_info, "SS">,
- EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f64x_info, "SD">,
- EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
+ }
+ let Predicates = [HasFP16] in {
+ defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+ OpNodeRnd, f16x_info, "SH">,
+ EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
}
}
@@ -6759,8 +7139,9 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86Fnmsu
multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
SDNode RndOp, string Prefix,
string Suffix, SDNode Move,
- X86VectorVTInfo _, PatLeaf ZeroFP> {
- let Predicates = [HasAVX512] in {
+ X86VectorVTInfo _, PatLeaf ZeroFP,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(Op _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
@@ -6958,6 +7339,14 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
}
}
+defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
+defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
"SS", X86Movss, v4f32x_info, fp32imm0>;
@@ -6990,13 +7379,13 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ T8PD, EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7005,7 +7394,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
@@ -7190,8 +7579,8 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
- string aliasStr> {
- let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+ string aliasStr, Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
@@ -7207,7 +7596,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
[(set DstVT.RC:$dst, (OpNode
(SrcVT.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- } // Predicates = [HasAVX512]
+ } // Predicates = [prd]
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7246,8 +7635,7 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
- X86FoldableSchedWrite sched,
- string aliasStr> {
+ X86FoldableSchedWrite sched> {
let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
@@ -7263,17 +7651,13 @@ multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
}
defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
- lrint, WriteCvtSS2I,
- "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+ lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
- llrint, WriteCvtSS2I,
- "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+ llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
- lrint, WriteCvtSD2I,
- "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+ lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
- llrint, WriteCvtSD2I,
- "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+ llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
@@ -7371,8 +7755,9 @@ def : Pat<(v2f64 (X86Movsd
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
SDNode OpNodeInt, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched, string aliasStr>{
-let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
+ X86FoldableSchedWrite sched, string aliasStr,
+ Predicate prd = HasAVX512> {
+let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
@@ -7399,7 +7784,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
[(set _DstRC.RC:$dst,
(OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
-} //HasAVX512
+} // Predicates = [prd]
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7497,33 +7882,47 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
EVEX_4V, VEX_LIG, Sched<[sched]>,
EVEX_B, EVEX_RC;
}
-multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
+multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+ X86VectorVTInfo _src, X86VectorVTInfo _dst,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
- OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
+ OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
}
}
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
- SDNode OpNode, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _src, X86VectorVTInfo _dst,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
- EVEX_CD8<32, CD8VT1>, XS;
+ EVEX_CD8<_src.EltSize, CD8VT1>;
}
}
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
+ f32x_info>, XD, VEX_W;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
- f64x_info>;
+ f64x_info>, XS;
+defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f64x_info,
+ f16x_info, HasFP16>, T_MAP5XD, VEX_W;
+defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+ f64x_info, HasFP16>, T_MAP5XS;
+defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f32x_info,
+ f16x_info, HasFP16>, T_MAP5PS;
+defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+ f32x_info, HasFP16>, T_MAP6PS;
def : Pat<(f64 (any_fpextend FR32X:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
@@ -7536,6 +7935,27 @@ def : Pat<(f32 (any_fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
+def : Pat<(f32 (any_fpextend FR16X:$src)),
+ (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
+ (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f64 (any_fpextend FR16X:$src)),
+ (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
+ (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f16 (any_fpround FR32X:$src)),
+ (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f16 (any_fpround FR64X:$src)),
+ (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
+ Requires<[HasFP16]>;
+
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
@@ -7649,39 +8069,76 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
-// Extend Float to Double
-multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
+// Extend [Float to Double, Half to Float]
+multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
any_fpextend, fpextend, sched.ZMM>,
- avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
+ X86any_vfpext, X86vfpext, sched.XMM,
+ _dst.info128.BroadcastStr,
"", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
- any_fpextend, fpextend, sched.YMM>, EVEX_V256;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
+ any_fpextend, fpextend, sched.YMM>, EVEX_V256;
}
}
-// Truncate Double to Float
-multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
- let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
+// Truncate [Double to Float, Float to Half]
+multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasAVX512,
+ PatFrag bcast128 = _src.info128.BroadcastLdFrag,
+ PatFrag loadVT128 = _src.info128.LdFrag,
+ RegisterClass maskRC128 = _src.info128.KRCWM> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
X86any_vfpround, X86vfpround, sched.ZMM>,
- avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
- f128mem, VK2WM>, EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
+ null_frag, null_frag, sched.XMM,
+ _src.info128.BroadcastStr, "{x}",
+ f128mem, maskRC128>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
X86any_vfpround, X86vfpround,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
+
+ // Special patterns to allow use of X86vmfpround for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
+
+ def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
+ (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+ def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
+
+ def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
+ (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+ (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+ _dst.info128.ImmAllZerosV, maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
}
def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -7725,40 +8182,185 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
VK4WM:$mask, f64mem:$src), 0, "att">;
}
-defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
+defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
+ avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
VEX_W, PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
- PS, EVEX_CD8<32, CD8VH>;
+defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
+ avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
+ PS, EVEX_CD8<32, CD8VH>;
-let Predicates = [HasVLX] in {
+// Extend Half to Double
+multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
+ any_fpextend, fpextend, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
+ X86vfpextSAE, sched.ZMM>, EVEX_V512;
+ def : Pat<(v8f64 (extloadv8f16 addr:$src)),
+ (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
+ X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
+ f32mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
+ X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
+ f64mem>, EVEX_V256;
+ }
+}
+
+// Truncate Double to Half
+multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
+ X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
+ X86vfproundRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
+ null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
+ VK4WM>, EVEX_V256;
+ }
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+ VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
+ avx512vl_f32_info, SchedWriteCvtPD2PS,
+ HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
+defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
+ avx512vl_f16_info, SchedWriteCvtPS2PD,
+ HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
+defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
+ VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
+ T_MAP5PS, EVEX_CD8<16, CD8VQ>;
+
+let Predicates = [HasFP16, HasVLX] in {
// Special patterns to allow use of X86vmfpround for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
- (VCVTPD2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+ def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
+ (VCVTPD2PHZ256rr VR256X:$src)>;
+ def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask)),
+ (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
+ (VCVTPD2PHZ256rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTPD2PHZ256rmb addr:$src)>;
+ def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
+ (VCVTPD2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
VK2WM:$mask),
- (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
VK2WM:$mask),
- (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+ (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
- (VCVTPD2PSZ128rm addr:$src)>;
- def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+ def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
+ (VCVTPD2PHZ128rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
VK2WM:$mask),
- (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+ (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
VK2WM:$mask),
- (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+ (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
- (VCVTPD2PSZ128rmb addr:$src)>;
+ def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTPD2PHZ128rmb addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
@@ -8079,26 +8681,60 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
}
// Convert Signed/Unsigned Quardword to Float
-multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- SDNode MaskOpNode, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- let Predicates = [HasDQI] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
+// Also Convert Signed/Unsigned Doubleword to Half
+multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
+ SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasDQI> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
MaskOpNode, sched.ZMM>,
- avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasDQI, HasVLX] in {
+ let Predicates = [prd, HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
- null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
+ null_frag, sched.XMM, _src.info128.BroadcastStr,
+ "{x}", i128mem, _src.info128.KRCWM>,
EVEX_V128, NotEVEX2VEXConvertible;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
- MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
+ MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
+ "{y}">, EVEX_V256,
NotEVEX2VEXConvertible;
+
+ // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
+
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
+ (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
+
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
+ (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+ (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+ _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
}
def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -8240,13 +8876,29 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
VEX_W, XS, EVEX_CD8<64, CD8VF>;
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
- sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
- VEX_W, PS, EVEX_CD8<64, CD8VF>;
+defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VMSintToFP,
+ X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+ SchedWriteCvtDQ2PS, HasFP16>,
+ T_MAP5PS, EVEX_CD8<32, CD8VF>;
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
- uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
- VEX_W, XD, EVEX_CD8<64, CD8VF>;
+defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
+ X86any_VUintToFP, X86VMUintToFP,
+ X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+ SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VMSintToFP,
+ X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+ SchedWriteCvtDQ2PS>, VEX_W, PS,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
+ X86any_VUintToFP, X86VMUintToFP,
+ X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+ SchedWriteCvtDQ2PS>, VEX_W, XD,
+ EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
@@ -8436,66 +9088,6 @@ let Predicates = [HasVLX] in {
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasDQI, HasVLX] in {
- // Special patterns to allow use of X86VMSintToFP for masking. Instruction
- // patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
- (VCVTQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
- def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
- (VCVTQQ2PSZ128rm addr:$src)>;
- def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
- (VCVTQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-
- // Special patterns to allow use of X86VMUintToFP for masking. Instruction
- // patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
- (VCVTUQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
- def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
- (VCVTUQQ2PSZ128rm addr:$src)>;
- def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
- (VCVTUQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
@@ -8626,9 +9218,9 @@ let Predicates = [HasVLX] in {
// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
- string OpcodeStr, Domain d,
- X86FoldableSchedWrite sched = WriteFComX> {
- let hasSideEffects = 0, Uses = [MXCSR] in
+ string OpcodeStr, Domain d,
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
@@ -8675,10 +9267,35 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
}
}
-/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
+let Defs = [EFLAGS], Predicates = [HasFP16] in {
+ defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
+ SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
+ EVEX_CD8<16, CD8VT1>;
+ defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
+ SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
+ EVEX_CD8<16, CD8VT1>;
+ defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
+ "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
+ "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ let isCodeGenOnly = 1 in {
+ defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
+ sse_load_f16, "ucomish", SSEPackedSingle>,
+ T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+
+ defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
+ sse_load_f16, "comish", SSEPackedSingle>,
+ T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ }
+}
+
+/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
+ X86FoldableSchedWrite sched, X86VectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -8693,6 +9310,13 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
+defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
+ f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
+ T_MAP6PD;
+defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
+ SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
+ EVEX_CD8<16, CD8VT1>, T_MAP6PD;
+let Uses = [MXCSR] in {
defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
f32x_info>, EVEX_CD8<32, CD8VT1>,
T8PD;
@@ -8705,6 +9329,7 @@ defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
EVEX_CD8<64, CD8VT1>, T8PD;
+}
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -8728,33 +9353,45 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
-let Uses = [MXCSR] in
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched> {
- defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
- v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
- v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
- // Define only if AVX512VL feature is present.
- let Predicates = [HasVLX] in {
- defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, sched.XMM, v4f32x_info>,
- EVEX_V128, EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, sched.YMM, v8f32x_info>,
- EVEX_V256, EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, sched.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, sched.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ let Uses = [MXCSR] in {
+ defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
+ v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
+ v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
-}
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
+ v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
-defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
-defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX], Uses = [MXCSR] in {
+ defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
+ OpNode, sched.XMM, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
+ OpNode, sched.YMM, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
+ OpNode, sched.XMM, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
+ OpNode, sched.YMM, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
+ OpNode, sched.XMM, v8f16x_info>,
+ EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
+ OpNode, sched.YMM, v16f16x_info>,
+ EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
+defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
@@ -8784,20 +9421,29 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+ sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
+}
+
+multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
+ let Predicates = [HasFP16] in
+ defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
+ EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
}
let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
- SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRcp.Scl>;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
- SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRsqrt.Scl>;
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
- SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRnd.Scl>,
+ avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
+ SchedWriteFRnd.Scl>;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8861,6 +9507,19 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
}
}
+multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeSAE, X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
+ avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
+ T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
+ EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
+ EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ }
+}
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
SchedWriteFRsqrt>, EVEX;
@@ -8871,6 +9530,8 @@ let Predicates = [HasERI] in {
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
SchedWriteFRnd>,
+ avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
+ SchedWriteFRnd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
SchedWriteFRnd>, EVEX;
@@ -8908,6 +9569,18 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.ZMM, v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.XMM, v8f16x_info>,
+ EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.YMM, v16f16x_info>,
+ EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ }
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.ZMM, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -8934,6 +9607,10 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR] in
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.ZMM, v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.ZMM, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -8943,8 +9620,8 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Name> {
- let ExeDomain = _.ExeDomain in {
+ X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
+ let ExeDomain = _.ExeDomain, Predicates = [prd] in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -8966,7 +9643,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
(i32 timm:$rc))>,
EVEX_B, EVEX_RC, Sched<[sched]>;
- let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -8979,13 +9656,13 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
}
}
- let Predicates = [HasAVX512] in {
+ let Predicates = [prd] in {
def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
(!cast<Instruction>(Name#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
}
- let Predicates = [HasAVX512, OptForSize] in {
+ let Predicates = [prd, OptForSize] in {
def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
(!cast<Instruction>(Name#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
@@ -8994,6 +9671,8 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
+ defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
+ EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
@@ -9058,6 +9737,12 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
}
+let Predicates = [HasFP16] in
+defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
+ SchedWriteFRnd.Scl, f16x_info>,
+ AVX512PSIi8Base, TA, EVEX_4V,
+ EVEX_CD8<16, CD8VT1>;
+
defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
SchedWriteFRnd.Scl, f32x_info>,
AVX512AIi8Base, EVEX_4V, VEX_LIG,
@@ -9086,6 +9771,9 @@ multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
}
}
+defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
+ (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
+ fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
@@ -9154,7 +9842,6 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
- X86VectorVTInfo DestInfo,
PatFrag truncFrag, PatFrag mtruncFrag,
string Name> {
@@ -9184,23 +9871,22 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
let Predicates = [HasVLX, prd] in {
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
- avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
- truncFrag, mtruncFrag, NAME>, EVEX_V128;
+ avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
+ mtruncFrag, NAME>, EVEX_V128;
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
- avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
- truncFrag, mtruncFrag, NAME>, EVEX_V256;
+ avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
+ mtruncFrag, NAME>, EVEX_V256;
}
let Predicates = [prd] in
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
VTSrcInfo.info512, DestInfoZ, x86memopZ>,
- avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
- truncFrag, mtruncFrag, NAME>, EVEX_V512;
+ avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
+ mtruncFrag, NAME>, EVEX_V512;
}
-multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDPatternOperator MaskNode,
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
@@ -9271,17 +9957,16 @@ multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
}
-defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
WriteShuffle256, truncstorevi8,
masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
-defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
+defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
WriteShuffle256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
-defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
- select_truncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8,
- X86vtruncus, X86vmtruncus>;
+defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
+ WriteShuffle256, truncstore_us_vi8,
+ masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
WriteShuffle256, truncstorevi16,
@@ -9454,8 +10139,9 @@ multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
}
multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
- SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ SDNode InVecNode, string ExtTy,
+ X86FoldableSchedWrite sched,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
@@ -9532,14 +10218,14 @@ multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext_invec, "z", WriteShuffle256>;
defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext_invec, "s", WriteShuffle256>;
defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
@@ -10304,24 +10990,26 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
X86SchedWriteWidths sched, Predicate prd>{
+ defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
+ opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
+ AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
- EVEX_CD8<32, CD8VF>;
+ AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
- EVEX_CD8<64, CD8VF>, VEX_W;
+ AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
X86VReduce, X86VReduce, X86VReduceSAE,
- SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
+ SchedWriteFRnd, HasDQI>;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
- SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, EVEX;
+ SchedWriteFRnd, HasAVX512>;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMant, X86VGetMantSAE,
- SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
+ SchedWriteFRnd, HasAVX512>;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeSAE,
@@ -10345,6 +11033,9 @@ defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
+ 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
+ AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
@@ -10352,6 +11043,9 @@ defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
+ 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
+ AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
@@ -10770,7 +11464,7 @@ multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
}
}
-multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
VTInfo.info512>, EVEX_V512;
@@ -10783,13 +11477,13 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
-multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
- defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
+ defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
avx512vl_f64_info>, XD, VEX_W;
}
-defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
+defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
@@ -10956,16 +11650,15 @@ defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
// VSHUFPS - VSHUFPD Operations
//===----------------------------------------------------------------------===//
-multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
- AVX512VLVectorVTInfo VTInfo_FP>{
+multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
SchedWriteFShuffle>,
EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
AVX512AIi8Base, EVEX_4V;
}
-defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
-defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
@@ -11598,6 +12291,11 @@ defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_
defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
+
multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
SDNode Move, X86VectorVTInfo _> {
let Predicates = [HasAVX512] in {
@@ -11609,6 +12307,7 @@ multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string Op
defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
+defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
//===----------------------------------------------------------------------===//
// AES instructions
@@ -11671,13 +12370,13 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
- AVX512FMA3Base, Sched<[sched]>;
+ T8PD, EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.LdFrag addr:$src3))))>,
- AVX512FMA3Base,
+ T8PD, EVEX_4V,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -11693,7 +12392,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
"$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
- AVX512FMA3Base, EVEX_B,
+ T8PD, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -12200,3 +12899,732 @@ let ExeDomain = SSEPackedSingle in
defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
avx512vl_f32_info, avx512vl_i32_info,
HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX512FP16
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasFP16] in {
+// Move word ( r/m16) to Packed word
+def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
+def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
+ T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
+
+def : Pat<(f16 (bitconvert GR16:$src)),
+ (f16 (COPY_TO_REGCLASS
+ (VMOVW2SHrr
+ (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
+ FR16X))>;
+def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
+ (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
+def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
+ (VMOVW2SHrr GR32:$src)>;
+// FIXME: We should really find a way to improve these patterns.
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (and GR32:$src, 0xffff))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
+def : Pat<(v16i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (and GR32:$src, 0xffff))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
+
+def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
+ (VMOVW2SHrr GR32:$src)>;
+
+// AVX 128-bit movw instruction write zeros in the high 128-bit part.
+def : Pat<(v8i16 (X86vzload16 addr:$src)),
+ (VMOVWrm addr:$src)>;
+def : Pat<(v16i16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
+
+// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
+def : Pat<(v32i16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
+
+def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
+ (VMOVWrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
+ (VMOVWrm addr:$src)>;
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (i32 (zextloadi16 addr:$src)))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
+def : Pat<(v16i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (i32 (zextloadi16 addr:$src)))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
+
+// Move word from xmm register to r/m16
+def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
+def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(store (i16 (extractelt (v8i16 VR128X:$src),
+ (iPTR 0))), addr:$dst)]>,
+ T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
+
+def : Pat<(i16 (bitconvert FR16X:$src)),
+ (i16 (EXTRACT_SUBREG
+ (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
+ sub_16bit))>;
+def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
+ (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
+}
+
+// Allow "vmovw" to use GR64
+let hasSideEffects = 0 in {
+ def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+ def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
+}
+
+// Convert 16-bit float to i16/u16
+multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNode, MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+ OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+ OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert 16-bit float to i16/u16 truncate
+multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNode, MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+ OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+ OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, avx512vl_i16_info,
+ avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
+ X86VUintToFpRnd, avx512vl_f16_info,
+ avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5XD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ avx512vl_i16_info, avx512vl_f16_info,
+ SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ avx512vl_i16_info, avx512vl_f16_info,
+ SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, avx512vl_i16_info,
+ avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
+ X86VSintToFpRnd, avx512vl_f16_info,
+ avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5XS, EVEX_CD8<16, CD8VF>;
+
+// Convert Half to Signed/Unsigned Doubleword
+multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert Half to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+
+defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VH>;
+defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
+ EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5XS,
+ EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PS,
+ EVEX_CD8<16, CD8VH>;
+
+// Convert Half to Signed/Unsigned Quardword
+multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v8f16x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
+ EVEX_V128;
+ // Explicitly specified broadcast string, since we take only 4 elements
+ // from v8f16x_info source
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
+ EVEX_V256;
+ }
+}
+
+// Convert Half to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v8f16x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
+ // Explicitly specified broadcast string, since we take only 4 elements
+ // from v8f16x_info source
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
+ }
+}
+
+defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+// Convert Signed/Unsigned Quardword to Half
+multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
+ // 512 memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
+ MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ i128mem, VK2WM>,
+ EVEX_V128, NotEVEX2VEXConvertible;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
+ null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
+ i256mem, VK4WM>,
+ EVEX_V256, NotEVEX2VEXConvertible;
+ }
+
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+ VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
+ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
+ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
+ EVEX_CD8<64, CD8VF>;
+
+// Convert half to signed/unsigned int 32/64
+defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
+ T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
+ T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
+ T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
+ T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+let Predicates = [HasFP16] in {
+ defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
+ v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
+ T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
+ v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
+ T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
+ v8f16x_info, i32mem, loadi32,
+ "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
+ v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
+ T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+ def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+
+ def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f16 (any_sint_to_fp GR32:$src)),
+ (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f16 (any_sint_to_fp GR64:$src)),
+ (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+ def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f16 (any_uint_to_fp GR32:$src)),
+ (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f16 (any_uint_to_fp GR64:$src)),
+ (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+ // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
+ // which produce unnecessary vmovsh instructions
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
+ (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
+ (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
+ (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
+ (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
+ (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
+ (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
+ (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
+ (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+} // Predicates = [HasFP16]
+
+let Predicates = [HasFP16, HasVLX] in {
+ // Special patterns to allow use of X86VMSintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
+ (VCVTQQ2PHZ256rr VR256X:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
+ (VCVTQQ2PHZ256rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTQQ2PHZ256rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
+ (VCVTQQ2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
+ (VCVTQQ2PHZ128rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTQQ2PHZ128rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86VMUintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
+ (VCVTUQQ2PHZ256rr VR256X:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
+ (VCVTUQQ2PHZ256rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTUQQ2PHZ256rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
+ (VCVTUQQ2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
+ (VCVTUQQ2PHZ128rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTUQQ2PHZ128rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+}
+
+let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
+ multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
+
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
+ (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
+ }
+} // Constraints = "@earlyclobber $dst, $src1 = $dst"
+
+multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let Constraints = "@earlyclobber $dst, $src1 = $dst" in
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
+ EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+
+multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
+ avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
+ EVEX_V512, Sched<[WriteFMAZ]>;
+ }
+ let Predicates = [HasVLX, HasFP16] in {
+ defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
+ defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
+ }
+}
+
+multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
+ WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
+ "", "@earlyclobber $dst">, EVEX_V512;
+ }
+ let Predicates = [HasVLX, HasFP16] in {
+ defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
+ WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
+ defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
+ WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
+ }
+}
+
+
+let Uses = [MXCSR] in {
+ defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
+ T_MAP6XS, EVEX_CD8<32, CD8VF>;
+ defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
+ T_MAP6XD, EVEX_CD8<32, CD8VF>;
+
+ defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
+ x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
+ defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
+ x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
+}
+
+
+multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
+ bit IsCommutable> {
+ let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
+ defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3",
+ (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
+ Sched<[WriteFMAX]>;
+ defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3",
+ (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
+ EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
+ }
+}
+
+multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, bit IsCommutable> {
+ let Predicates = [HasFP16] in {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
+ IsCommutable, IsCommutable, IsCommutable,
+ X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
+ 0, 0, 0, X86selects, "@earlyclobber $dst">,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
+ 0, 0, 0, X86selects, "@earlyclobber $dst">,
+ EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
+ }
+}
+
+let Uses = [MXCSR] in {
+ defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
+ T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
+ defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
+ T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
+
+ defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
+ T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
+ defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
+ T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
+}
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index e83e1e74ff52..8337d2b37383 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+// BinOpRM - Instructions like "adc reg, reg, [mem]".
+// There is an implicit register read at the end of the operand sequence.
+class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ // base, scale, index, offset, segment.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // implicit register read.
+ sched.ReadAfterFold]>;
+
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
@@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
- : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
+ : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
EFLAGS))]>;
@@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>; // reg
// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteADCRMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold, // reg
+ WriteALU.ReadAfterFold]>; // EFLAGS
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -1475,13 +1497,17 @@ multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
let hasSideEffects = 0 in {
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V, Sched<[sched, WriteIMulH]>;
+ []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-
- []>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
+ []>, T8XD, VEX_4V,
+ Sched<[WriteIMulHLd, sched.Folded,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Implicit read of EDX/RDX
+ sched.ReadAfterFold]>;
// Pseudo instructions to be used when the low result isn't used. The
// instruction is defined to keep the high if both destinations are the same.
@@ -1496,9 +1522,9 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2] in {
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
let Uses = [RDX] in
- defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, VEX_W;
}
//===----------------------------------------------------------------------===//
@@ -1525,7 +1551,12 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
} // SchedRW
- let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
+ let mayLoad = 1,
+ SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Implicit read of EFLAGS
+ WriteADC.ReadAfterFold] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 202d320cd731..ba52283b570d 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),
let SchedRW = [WriteSystem] in {
// x86-64 va_start lowering magic.
-let hasSideEffects = 1, Defs = [EFLAGS] in {
+let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in {
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
(outs),
- (ins GR8:$al,
- i32imm:$regsavefi, i32imm:$offset,
- variable_ops),
- "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
- [(X86vastart_save_xmm_regs GR8:$al,
- timm:$regsavefi,
- timm:$offset),
+ (ins GR8:$al, i8mem:$regsavefi, variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi",
+ [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi),
(implicit EFLAGS)]>;
}
@@ -153,15 +149,15 @@ def STACKALLOC_W_PROBING : I<0, Pseudo, (outs), (ins i64imm:$stacksize),
// (compared to ordinary calls) like stack pointer change.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
-def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),
+def DYN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),
"# dynamic stack allocation",
- [(X86WinAlloca GR32:$size)]>,
+ [(X86DynAlloca GR32:$size)]>,
Requires<[NotLP64]>;
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
-def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
+def DYN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
"# dynamic stack allocation",
- [(X86WinAlloca GR64:$size)]>,
+ [(X86DynAlloca GR64:$size)]>,
Requires<[In64BitMode]>;
} // SchedRW
@@ -261,6 +257,17 @@ let isPseudo = 1, SchedRW = [WriteSystem] in {
}
//===----------------------------------------------------------------------===//
+// Pseudo instructions used by address sanitizer.
+//===----------------------------------------------------------------------===//
+let
+ Defs = [R8, EFLAGS] in {
+def ASAN_CHECK_MEMACCESS : PseudoI<
+ (outs), (ins GR64NoR8:$addr, i32imm:$accessinfo),
+ [(int_asan_check_memaccess GR64NoR8:$addr, (i32 timm:$accessinfo))]>,
+ Sched<[]>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
@@ -555,6 +562,7 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
let Predicates = [HasMMX] in
defm _VR64 : CMOVrr_PSEUDO<VR64, x86mmx>;
+ defm _FR16X : CMOVrr_PSEUDO<FR16X, f16>;
let Predicates = [HasSSE1,NoAVX512] in
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
let Predicates = [HasSSE2,NoAVX512] in
@@ -612,6 +620,8 @@ let Predicates = [HasVLX] in {
(CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
(CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+ def : Pat<(v8f16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
(CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
@@ -623,6 +633,8 @@ let Predicates = [HasVLX] in {
(CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
(CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
+ def : Pat<(v16f16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
(CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
@@ -635,6 +647,8 @@ def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
(CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
(CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
+def : Pat<(v32f16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
(CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
@@ -953,7 +967,7 @@ multiclass ATOMIC_RMW_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
!strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
[(set
GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))]>,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))]>,
OpSize32;
def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$val, i64mem:$ptr),
@@ -1197,10 +1211,10 @@ def : Pat<(X86call (i64 tglobaladdr:$dst)),
def : Pat<(X86call (i64 texternalsym:$dst)),
(CALL64pcrel32 texternalsym:$dst)>;
-def : Pat<(X86call_rvmarker (timm:$sel), (i64 texternalsym:$dst)),
- (CALL64pcrel32_RVMARKER timm:$sel, texternalsym:$dst)>;
-def : Pat<(X86call_rvmarker (timm:$sel), (i64 tglobaladdr:$dst)),
- (CALL64pcrel32_RVMARKER timm:$sel, tglobaladdr:$dst)>;
+def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
+ (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, texternalsym:$dst)>;
+def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index a6cb17f17a17..6d969962afff 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -20,30 +20,30 @@
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
- def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ def RET32 : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
- def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ def RET64 : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
- def RETW : I <0xC3, RawFrm, (outs), (ins),
+ def RET16 : I <0xC3, RawFrm, (outs), (ins),
"ret{w}", []>, OpSize16;
- def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ def RETI32 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
- def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ def RETI64 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
- def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+ def RETI16 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret{w}\t$amt", []>, OpSize16;
- def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ def LRET32 : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{l|f}", []>, OpSize32;
- def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ def LRET64 : RI <0xCB, RawFrm, (outs), (ins),
"{l}ret{|f}q", []>, Requires<[In64BitMode]>;
- def LRETW : I <0xCB, RawFrm, (outs), (ins),
+ def LRET16 : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{w|f}", []>, OpSize16;
- def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "{l}ret{l|f}\t$amt", []>, OpSize32;
- def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
- def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "{l}ret{w|f}\t$amt", []>, OpSize16;
+ def LRETI32 : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{l|f}\t$amt", []>, OpSize32;
+ def LRETI64 : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
+ def LRETI16 : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{w|f}\t$amt", []>, OpSize16;
// The machine return from interrupt instruction, but sometimes we need to
// perform a post-epilogue stack adjustment. Codegen emits the pseudo form
@@ -419,15 +419,15 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
SchedRW = [WriteJump] in {
def CALL64m_RVMARKER :
- PseudoI<(outs), (ins i32imm:$sel, i64mem:$dst), [(X86call_rvmarker timm:$sel, (loadi64 addr:$dst))]>,
+ PseudoI<(outs), (ins i64imm:$rvfunc, i64mem:$dst), [(X86call_rvmarker tglobaladdr:$rvfunc, (loadi64 addr:$dst))]>,
Requires<[In64BitMode]>;
def CALL64r_RVMARKER :
- PseudoI<(outs), (ins i32imm:$sel, GR64:$dst), [(X86call_rvmarker timm:$sel, GR64:$dst)]>,
+ PseudoI<(outs), (ins i64imm:$rvfunc, GR64:$dst), [(X86call_rvmarker tglobaladdr:$rvfunc, GR64:$dst)]>,
Requires<[In64BitMode]>;
def CALL64pcrel32_RVMARKER :
- PseudoI<(outs), (ins i32imm:$sel, i64i32imm_brtarget:$dst), []>,
+ PseudoI<(outs), (ins i64imm:$rvfunc, i64i32imm_brtarget:$dst), []>,
Requires<[In64BitMode]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 27328fe42c44..1f92293fa73f 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -427,7 +427,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
- ValueType VT, X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched> {
let isCodeGenOnly = 1, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
@@ -540,20 +540,16 @@ let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, any_fma, loadf32,
SchedWriteFMA.Scl>,
- fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6A, "vfmaddss", ssmem, SchedWriteFMA.Scl>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86any_Fmsub, loadf32,
SchedWriteFMA.Scl>,
- fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6E, "vfmsubss", ssmem, SchedWriteFMA.Scl>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86any_Fnmadd, loadf32, SchedWriteFMA.Scl>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7A, "vfnmaddss", ssmem, SchedWriteFMA.Scl>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86any_Fnmsub, loadf32, SchedWriteFMA.Scl>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7E, "vfnmsubss", ssmem, SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", any_fma, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@@ -573,20 +569,16 @@ let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, any_fma, loadf64,
SchedWriteFMA.Scl>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6B, "vfmaddsd", sdmem, SchedWriteFMA.Scl>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86any_Fmsub, loadf64,
SchedWriteFMA.Scl>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6F, "vfmsubsd", sdmem, SchedWriteFMA.Scl>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86any_Fnmadd, loadf64, SchedWriteFMA.Scl>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, SchedWriteFMA.Scl>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86any_Fnmsub, loadf64, SchedWriteFMA.Scl>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", any_fma, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@@ -603,8 +595,8 @@ let ExeDomain = SSEPackedDouble in {
}
multiclass scalar_fma4_patterns<SDPatternOperator Op, string Name,
- ValueType VT, ValueType EltVT,
- RegisterClass RC, PatFrag mem_frag> {
+ ValueType VT, RegisterClass RC,
+ PatFrag mem_frag> {
let Predicates = [HasFMA4] in {
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2, RC:$src3))))),
@@ -629,12 +621,12 @@ multiclass scalar_fma4_patterns<SDPatternOperator Op, string Name,
}
}
-defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, FR64, loadf64>;
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 6d803e931b68..52b2a62316cd 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -28,35 +28,43 @@ using namespace llvm;
FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
-#define FMA3GROUP_PACKED_WIDTHS(Name, Suf, Attrs) \
- FMA3GROUP(Name, Suf##Ym, Attrs) \
- FMA3GROUP(Name, Suf##Yr, Attrs) \
+#define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
+
+#define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf##Ym, Attrs) \
+ FMA3GROUP(Name, Suf##Yr, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##m, Attrs) \
FMA3GROUP(Name, Suf##r, Attrs)
#define FMA3GROUP_PACKED(Name, Attrs) \
- FMA3GROUP_PACKED_WIDTHS(Name, PD, Attrs) \
- FMA3GROUP_PACKED_WIDTHS(Name, PS, Attrs)
+ FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
-#define FMA3GROUP_SCALAR_WIDTHS(Name, Suf, Attrs) \
+#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##Zm, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
FMA3GROUP(Name, Suf##Zr, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+
+#define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##m, Attrs) \
FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
FMA3GROUP(Name, Suf##r, Attrs) \
FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
#define FMA3GROUP_SCALAR(Name, Attrs) \
- FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
- FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs)
+ FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
#define FMA3GROUP_FULL(Name, Attrs) \
FMA3GROUP_PACKED(Name, Attrs) \
@@ -78,15 +86,19 @@ static const X86InstrFMA3Group Groups[] = {
#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
FMA3GROUP(Name, SDZ##Suf, Attrs) \
FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
+ FMA3GROUP(Name, SHZ##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
FMA3GROUP(Name, SSZ##Suf, Attrs) \
FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
@@ -130,14 +142,16 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
// FMA3 instructions have a well defined encoding pattern we can exploit.
uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
- bool IsFMA3 = ((TSFlags & X86II::EncodingMask) == X86II::VEX ||
- (TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
- (TSFlags & X86II::OpMapMask) == X86II::T8 &&
- (TSFlags & X86II::OpPrefixMask) == X86II::PD &&
- ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
- (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
- (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
- if (!IsFMA3)
+ bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
+ (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
+ (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
+ bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
+ (TSFlags & X86II::OpMapMask) == X86II::T8) ||
+ ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
+ ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
+ (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
+ bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
+ if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
return nullptr;
verifyTables();
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index cda28d18f4aa..e310f369be08 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -377,7 +377,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
} // SchedRW
} // Uses = [FPCW], mayRaiseFPException = 1
-let SchedRW = [WriteFTest] in {
+let SchedRW = [WriteFTest], Defs = [FPSW] in {
def XAM_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def XAM_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def XAM_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 17fe7f0bd310..6d4ad08842c7 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -815,10 +815,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0 },
{ X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0 },
{ X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0 },
+ { X86::VFPCLASSPHZ128rr, X86::VFPCLASSPHZ128rm, 0 },
+ { X86::VFPCLASSPHZ256rr, X86::VFPCLASSPHZ256rm, 0 },
+ { X86::VFPCLASSPHZrr, X86::VFPCLASSPHZrm, 0 },
{ X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rm, 0 },
{ X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rm, 0 },
{ X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrm, 0 },
{ X86::VFPCLASSSDZrr, X86::VFPCLASSSDZrm, TB_NO_REVERSE },
+ { X86::VFPCLASSSHZrr, X86::VFPCLASSSHZrm, TB_NO_REVERSE },
{ X86::VFPCLASSSSZrr, X86::VFPCLASSSSZrm, TB_NO_REVERSE },
{ X86::VFRCZPDYrr, X86::VFRCZPDYrm, 0 },
{ X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
@@ -829,12 +833,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0 },
{ X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0 },
{ X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0 },
+ { X86::VGETEXPPHZ128r, X86::VGETEXPPHZ128m, 0 },
+ { X86::VGETEXPPHZ256r, X86::VGETEXPPHZ256m, 0 },
+ { X86::VGETEXPPHZr, X86::VGETEXPPHZm, 0 },
{ X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0 },
{ X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0 },
{ X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0 },
{ X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0 },
{ X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0 },
{ X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0 },
+ { X86::VGETMANTPHZ128rri, X86::VGETMANTPHZ128rmi, 0 },
+ { X86::VGETMANTPHZ256rri, X86::VGETMANTPHZ256rmi, 0 },
+ { X86::VGETMANTPHZrri, X86::VGETMANTPHZrmi, 0 },
{ X86::VGETMANTPSZ128rri, X86::VGETMANTPSZ128rmi, 0 },
{ X86::VGETMANTPSZ256rri, X86::VGETMANTPSZ256rmi, 0 },
{ X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
@@ -1161,17 +1171,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VRCP14PSZr, X86::VRCP14PSZm, 0 },
{ X86::VRCP28PDZr, X86::VRCP28PDZm, 0 },
{ X86::VRCP28PSZr, X86::VRCP28PSZm, 0 },
+ { X86::VRCPPHZ128r, X86::VRCPPHZ128m, 0 },
+ { X86::VRCPPHZ256r, X86::VRCPPHZ256m, 0 },
+ { X86::VRCPPHZr, X86::VRCPPHZm, 0 },
{ X86::VRCPPSYr, X86::VRCPPSYm, 0 },
{ X86::VRCPPSr, X86::VRCPPSm, 0 },
{ X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0 },
{ X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0 },
{ X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0 },
+ { X86::VREDUCEPHZ128rri, X86::VREDUCEPHZ128rmi, 0 },
+ { X86::VREDUCEPHZ256rri, X86::VREDUCEPHZ256rmi, 0 },
+ { X86::VREDUCEPHZrri, X86::VREDUCEPHZrmi, 0 },
{ X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0 },
{ X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0 },
{ X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0 },
{ X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0 },
{ X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0 },
{ X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0 },
+ { X86::VRNDSCALEPHZ128rri, X86::VRNDSCALEPHZ128rmi, 0 },
+ { X86::VRNDSCALEPHZ256rri, X86::VRNDSCALEPHZ256rmi, 0 },
+ { X86::VRNDSCALEPHZrri, X86::VRNDSCALEPHZrmi, 0 },
{ X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0 },
{ X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0 },
{ X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0 },
@@ -1187,6 +1206,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0 },
{ X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0 },
{ X86::VRSQRT28PSZr, X86::VRSQRT28PSZm, 0 },
+ { X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128m, 0 },
+ { X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256m, 0 },
+ { X86::VRSQRTPHZr, X86::VRSQRTPHZm, 0 },
{ X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
{ X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
{ X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
@@ -1194,6 +1216,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0 },
{ X86::VSQRTPDZr, X86::VSQRTPDZm, 0 },
{ X86::VSQRTPDr, X86::VSQRTPDm, 0 },
+ { X86::VSQRTPHZ128r, X86::VSQRTPHZ128m, 0 },
+ { X86::VSQRTPHZ256r, X86::VSQRTPHZ256m, 0 },
+ { X86::VSQRTPHZr, X86::VSQRTPHZm, 0 },
{ X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
{ X86::VSQRTPSZ128r, X86::VSQRTPSZ128m, 0 },
{ X86::VSQRTPSZ256r, X86::VSQRTPSZ256m, 0 },
@@ -1550,6 +1575,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
{ X86::VADDPDrr, X86::VADDPDrm, 0 },
+ { X86::VADDPHZ128rr, X86::VADDPHZ128rm, 0 },
+ { X86::VADDPHZ256rr, X86::VADDPHZ256rm, 0 },
+ { X86::VADDPHZrr, X86::VADDPHZrm, 0 },
{ X86::VADDPSYrr, X86::VADDPSYrm, 0 },
{ X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
{ X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
@@ -1559,6 +1587,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE },
{ X86::VADDSDrr, X86::VADDSDrm, 0 },
{ X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE },
+ { X86::VADDSHZrr, X86::VADDSHZrm, 0 },
+ { X86::VADDSHZrr_Int, X86::VADDSHZrm_Int, TB_NO_REVERSE },
{ X86::VADDSSZrr, X86::VADDSSZrm, 0 },
{ X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE },
{ X86::VADDSSrr, X86::VADDSSrm, 0 },
@@ -1642,6 +1672,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
{ X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
{ X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
+ { X86::VCMPPHZ128rri, X86::VCMPPHZ128rmi, 0 },
+ { X86::VCMPPHZ256rri, X86::VCMPPHZ256rmi, 0 },
+ { X86::VCMPPHZrri, X86::VCMPPHZrmi, 0 },
{ X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
{ X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
{ X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 },
@@ -1651,6 +1684,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
{ X86::VCMPSDrr_Int, X86::VCMPSDrm_Int, TB_NO_REVERSE },
+ { X86::VCMPSHZrr, X86::VCMPSHZrm, 0 },
+ { X86::VCMPSHZrr_Int, X86::VCMPSHZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSrr, X86::VCMPSSrm, 0 },
@@ -1782,6 +1817,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
{ X86::VDIVPDrr, X86::VDIVPDrm, 0 },
+ { X86::VDIVPHZ128rr, X86::VDIVPHZ128rm, 0 },
+ { X86::VDIVPHZ256rr, X86::VDIVPHZ256rm, 0 },
+ { X86::VDIVPHZrr, X86::VDIVPHZrm, 0 },
{ X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
{ X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
{ X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
@@ -1791,6 +1829,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSDrr, X86::VDIVSDrm, 0 },
{ X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE },
+ { X86::VDIVSHZrr, X86::VDIVSHZrm, 0 },
+ { X86::VDIVSHZrr_Int, X86::VDIVSHZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
{ X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSSrr, X86::VDIVSSrm, 0 },
@@ -1806,6 +1846,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VEXPANDPSZ128rrkz, X86::VEXPANDPSZ128rmkz, TB_NO_REVERSE },
{ X86::VEXPANDPSZ256rrkz, X86::VEXPANDPSZ256rmkz, TB_NO_REVERSE },
{ X86::VEXPANDPSZrrkz, X86::VEXPANDPSZrmkz, TB_NO_REVERSE },
+ { X86::VFCMULCPHZ128rr, X86::VFCMULCPHZ128rm, 0 },
+ { X86::VFCMULCPHZ256rr, X86::VFCMULCPHZ256rm, 0 },
+ { X86::VFCMULCPHZrr, X86::VFCMULCPHZrm, 0 },
+ { X86::VFCMULCSHZrr, X86::VFCMULCSHZrm, TB_NO_REVERSE },
{ X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, 0 },
{ X86::VFMADDPD4rr, X86::VFMADDPD4mr, 0 },
{ X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, 0 },
@@ -1830,6 +1874,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE },
{ X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
{ X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE },
+ { X86::VFMULCPHZ128rr, X86::VFMULCPHZ128rm, 0 },
+ { X86::VFMULCPHZ256rr, X86::VFMULCPHZ256rm, 0 },
+ { X86::VFMULCPHZrr, X86::VFMULCPHZrm, 0 },
+ { X86::VFMULCSHZrr, X86::VFMULCSHZrm, TB_NO_REVERSE },
{ X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, 0 },
{ X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, 0 },
{ X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, 0 },
@@ -1849,26 +1897,38 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0 },
{ X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0 },
{ X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0 },
+ { X86::VFPCLASSPHZ128rrk, X86::VFPCLASSPHZ128rmk, 0 },
+ { X86::VFPCLASSPHZ256rrk, X86::VFPCLASSPHZ256rmk, 0 },
+ { X86::VFPCLASSPHZrrk, X86::VFPCLASSPHZrmk, 0 },
{ X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmk, 0 },
{ X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmk, 0 },
{ X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmk, 0 },
{ X86::VFPCLASSSDZrrk, X86::VFPCLASSSDZrmk, TB_NO_REVERSE },
+ { X86::VFPCLASSSHZrrk, X86::VFPCLASSSHZrmk, TB_NO_REVERSE },
{ X86::VFPCLASSSSZrrk, X86::VFPCLASSSSZrmk, TB_NO_REVERSE },
{ X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0 },
{ X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0 },
{ X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0 },
+ { X86::VGETEXPPHZ128rkz, X86::VGETEXPPHZ128mkz, 0 },
+ { X86::VGETEXPPHZ256rkz, X86::VGETEXPPHZ256mkz, 0 },
+ { X86::VGETEXPPHZrkz, X86::VGETEXPPHZmkz, 0 },
{ X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mkz, 0 },
{ X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mkz, 0 },
{ X86::VGETEXPPSZrkz, X86::VGETEXPPSZmkz, 0 },
{ X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE },
+ { X86::VGETEXPSHZr, X86::VGETEXPSHZm, TB_NO_REVERSE },
{ X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE },
{ X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmikz, 0 },
{ X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmikz, 0 },
{ X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0 },
+ { X86::VGETMANTPHZ128rrikz, X86::VGETMANTPHZ128rmikz, 0 },
+ { X86::VGETMANTPHZ256rrikz, X86::VGETMANTPHZ256rmikz, 0 },
+ { X86::VGETMANTPHZrrikz, X86::VGETMANTPHZrmikz, 0 },
{ X86::VGETMANTPSZ128rrikz, X86::VGETMANTPSZ128rmikz, 0 },
{ X86::VGETMANTPSZ256rrikz, X86::VGETMANTPSZ256rmikz, 0 },
{ X86::VGETMANTPSZrrikz, X86::VGETMANTPSZrmikz, 0 },
{ X86::VGETMANTSDZrri, X86::VGETMANTSDZrmi, TB_NO_REVERSE },
+ { X86::VGETMANTSHZrri, X86::VGETMANTSHZrmi, TB_NO_REVERSE },
{ X86::VGETMANTSSZrri, X86::VGETMANTSSZrmi, TB_NO_REVERSE },
{ X86::VGF2P8AFFINEINVQBYrri, X86::VGF2P8AFFINEINVQBYrmi, 0 },
{ X86::VGF2P8AFFINEINVQBZ128rri, X86::VGF2P8AFFINEINVQBZ128rmi, 0 },
@@ -1912,6 +1972,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
{ X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
{ X86::VMAXCPDrr, X86::VMAXCPDrm, 0 },
+ { X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rm, 0 },
+ { X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rm, 0 },
+ { X86::VMAXCPHZrr, X86::VMAXCPHZrm, 0 },
{ X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 },
{ X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
{ X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
@@ -1919,6 +1982,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXCPSrr, X86::VMAXCPSrm, 0 },
{ X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
{ X86::VMAXCSDrr, X86::VMAXCSDrm, 0 },
+ { X86::VMAXCSHZrr, X86::VMAXCSHZrm, 0 },
{ X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
{ X86::VMAXCSSrr, X86::VMAXCSSrm, 0 },
{ X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
@@ -1926,6 +1990,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
{ X86::VMAXPDrr, X86::VMAXPDrm, 0 },
+ { X86::VMAXPHZ128rr, X86::VMAXPHZ128rm, 0 },
+ { X86::VMAXPHZ256rr, X86::VMAXPHZ256rm, 0 },
+ { X86::VMAXPHZrr, X86::VMAXPHZrm, 0 },
{ X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
{ X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
{ X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
@@ -1935,6 +2002,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE },
{ X86::VMAXSDrr, X86::VMAXSDrm, 0 },
{ X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE },
+ { X86::VMAXSHZrr, X86::VMAXSHZrm, 0 },
+ { X86::VMAXSHZrr_Int, X86::VMAXSHZrm_Int, TB_NO_REVERSE },
{ X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
{ X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE },
{ X86::VMAXSSrr, X86::VMAXSSrm, 0 },
@@ -1944,6 +2013,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
{ X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
{ X86::VMINCPDrr, X86::VMINCPDrm, 0 },
+ { X86::VMINCPHZ128rr, X86::VMINCPHZ128rm, 0 },
+ { X86::VMINCPHZ256rr, X86::VMINCPHZ256rm, 0 },
+ { X86::VMINCPHZrr, X86::VMINCPHZrm, 0 },
{ X86::VMINCPSYrr, X86::VMINCPSYrm, 0 },
{ X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
{ X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
@@ -1951,6 +2023,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINCPSrr, X86::VMINCPSrm, 0 },
{ X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
{ X86::VMINCSDrr, X86::VMINCSDrm, 0 },
+ { X86::VMINCSHZrr, X86::VMINCSHZrm, 0 },
{ X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
{ X86::VMINCSSrr, X86::VMINCSSrm, 0 },
{ X86::VMINPDYrr, X86::VMINPDYrm, 0 },
@@ -1958,6 +2031,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
{ X86::VMINPDZrr, X86::VMINPDZrm, 0 },
{ X86::VMINPDrr, X86::VMINPDrm, 0 },
+ { X86::VMINPHZ128rr, X86::VMINPHZ128rm, 0 },
+ { X86::VMINPHZ256rr, X86::VMINPHZ256rm, 0 },
+ { X86::VMINPHZrr, X86::VMINPHZrm, 0 },
{ X86::VMINPSYrr, X86::VMINPSYrm, 0 },
{ X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
{ X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
@@ -1967,6 +2043,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
{ X86::VMINSDrr, X86::VMINSDrm, 0 },
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE },
+ { X86::VMINSHZrr, X86::VMINSHZrm, 0 },
+ { X86::VMINSHZrr_Int, X86::VMINSHZrm_Int, TB_NO_REVERSE },
{ X86::VMINSSZrr, X86::VMINSSZrm, 0 },
{ X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
@@ -2021,6 +2099,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 },
{ X86::VMULPDZrr, X86::VMULPDZrm, 0 },
{ X86::VMULPDrr, X86::VMULPDrm, 0 },
+ { X86::VMULPHZ128rr, X86::VMULPHZ128rm, 0 },
+ { X86::VMULPHZ256rr, X86::VMULPHZ256rm, 0 },
+ { X86::VMULPHZrr, X86::VMULPHZrm, 0 },
{ X86::VMULPSYrr, X86::VMULPSYrm, 0 },
{ X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 },
{ X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 },
@@ -2030,6 +2111,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE },
{ X86::VMULSDrr, X86::VMULSDrm, 0 },
{ X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE },
+ { X86::VMULSHZrr, X86::VMULSHZrm, 0 },
+ { X86::VMULSHZrr_Int, X86::VMULSHZrm_Int, TB_NO_REVERSE },
{ X86::VMULSSZrr, X86::VMULSSZrm, 0 },
{ X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
{ X86::VMULSSrr, X86::VMULSSrm, 0 },
@@ -2861,24 +2944,37 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0 },
{ X86::VRCP28SDZr, X86::VRCP28SDZm, TB_NO_REVERSE },
{ X86::VRCP28SSZr, X86::VRCP28SSZm, TB_NO_REVERSE },
+ { X86::VRCPPHZ128rkz, X86::VRCPPHZ128mkz, 0 },
+ { X86::VRCPPHZ256rkz, X86::VRCPPHZ256mkz, 0 },
+ { X86::VRCPPHZrkz, X86::VRCPPHZmkz, 0 },
+ { X86::VRCPSHZrr, X86::VRCPSHZrm, TB_NO_REVERSE },
{ X86::VRCPSSr, X86::VRCPSSm, 0 },
{ X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE },
{ X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmikz, 0 },
{ X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmikz, 0 },
{ X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0 },
+ { X86::VREDUCEPHZ128rrikz, X86::VREDUCEPHZ128rmikz, 0 },
+ { X86::VREDUCEPHZ256rrikz, X86::VREDUCEPHZ256rmikz, 0 },
+ { X86::VREDUCEPHZrrikz, X86::VREDUCEPHZrmikz, 0 },
{ X86::VREDUCEPSZ128rrikz, X86::VREDUCEPSZ128rmikz, 0 },
{ X86::VREDUCEPSZ256rrikz, X86::VREDUCEPSZ256rmikz, 0 },
{ X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmikz, 0 },
{ X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE },
+ { X86::VREDUCESHZrri, X86::VREDUCESHZrmi, TB_NO_REVERSE },
{ X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE },
{ X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0 },
{ X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0 },
{ X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0 },
+ { X86::VRNDSCALEPHZ128rrikz, X86::VRNDSCALEPHZ128rmikz, 0 },
+ { X86::VRNDSCALEPHZ256rrikz, X86::VRNDSCALEPHZ256rmikz, 0 },
+ { X86::VRNDSCALEPHZrrikz, X86::VRNDSCALEPHZrmikz, 0 },
{ X86::VRNDSCALEPSZ128rrikz, X86::VRNDSCALEPSZ128rmikz, 0 },
{ X86::VRNDSCALEPSZ256rrikz, X86::VRNDSCALEPSZ256rmikz, 0 },
{ X86::VRNDSCALEPSZrrikz, X86::VRNDSCALEPSZrmikz, 0 },
{ X86::VRNDSCALESDZr, X86::VRNDSCALESDZm, 0 },
{ X86::VRNDSCALESDZr_Int, X86::VRNDSCALESDZm_Int, TB_NO_REVERSE },
+ { X86::VRNDSCALESHZr, X86::VRNDSCALESHZm, 0 },
+ { X86::VRNDSCALESHZr_Int, X86::VRNDSCALESHZm_Int, TB_NO_REVERSE },
{ X86::VRNDSCALESSZr, X86::VRNDSCALESSZm, 0 },
{ X86::VRNDSCALESSZr_Int, X86::VRNDSCALESSZm_Int, TB_NO_REVERSE },
{ X86::VROUNDSDr, X86::VROUNDSDm, 0 },
@@ -2897,15 +2993,23 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0 },
{ X86::VRSQRT28SDZr, X86::VRSQRT28SDZm, TB_NO_REVERSE },
{ X86::VRSQRT28SSZr, X86::VRSQRT28SSZm, TB_NO_REVERSE },
+ { X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mkz, 0 },
+ { X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mkz, 0 },
+ { X86::VRSQRTPHZrkz, X86::VRSQRTPHZmkz, 0 },
+ { X86::VRSQRTSHZrr, X86::VRSQRTSHZrm, TB_NO_REVERSE },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE },
{ X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0 },
{ X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0 },
{ X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0 },
+ { X86::VSCALEFPHZ128rr, X86::VSCALEFPHZ128rm, 0 },
+ { X86::VSCALEFPHZ256rr, X86::VSCALEFPHZ256rm, 0 },
+ { X86::VSCALEFPHZrr, X86::VSCALEFPHZrm, 0 },
{ X86::VSCALEFPSZ128rr, X86::VSCALEFPSZ128rm, 0 },
{ X86::VSCALEFPSZ256rr, X86::VSCALEFPSZ256rm, 0 },
{ X86::VSCALEFPSZrr, X86::VSCALEFPSZrm, 0 },
{ X86::VSCALEFSDZrr, X86::VSCALEFSDZrm, TB_NO_REVERSE },
+ { X86::VSCALEFSHZrr, X86::VSCALEFSHZrm, TB_NO_REVERSE },
{ X86::VSCALEFSSZrr, X86::VSCALEFSSZrm, TB_NO_REVERSE },
{ X86::VSHUFF32X4Z256rri, X86::VSHUFF32X4Z256rmi, 0 },
{ X86::VSHUFF32X4Zrri, X86::VSHUFF32X4Zrmi, 0 },
@@ -2928,6 +3032,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0 },
{ X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0 },
{ X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0 },
+ { X86::VSQRTPHZ128rkz, X86::VSQRTPHZ128mkz, 0 },
+ { X86::VSQRTPHZ256rkz, X86::VSQRTPHZ256mkz, 0 },
+ { X86::VSQRTPHZrkz, X86::VSQRTPHZmkz, 0 },
{ X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mkz, 0 },
{ X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mkz, 0 },
{ X86::VSQRTPSZrkz, X86::VSQRTPSZmkz, 0 },
@@ -2935,6 +3042,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSQRTSDZr_Int, X86::VSQRTSDZm_Int, TB_NO_REVERSE },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE },
+ { X86::VSQRTSHZr, X86::VSQRTSHZm, 0 },
+ { X86::VSQRTSHZr_Int, X86::VSQRTSHZm_Int, TB_NO_REVERSE },
{ X86::VSQRTSSZr, X86::VSQRTSSZm, 0 },
{ X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
@@ -2944,6 +3053,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
{ X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
{ X86::VSUBPDrr, X86::VSUBPDrm, 0 },
+ { X86::VSUBPHZ128rr, X86::VSUBPHZ128rm, 0 },
+ { X86::VSUBPHZ256rr, X86::VSUBPHZ256rm, 0 },
+ { X86::VSUBPHZrr, X86::VSUBPHZrm, 0 },
{ X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
{ X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
{ X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 },
@@ -2953,6 +3065,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE },
{ X86::VSUBSDrr, X86::VSUBSDrm, 0 },
{ X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE },
+ { X86::VSUBSHZrr, X86::VSUBSHZrm, 0 },
+ { X86::VSUBSHZrr_Int, X86::VSUBSHZrm_Int, TB_NO_REVERSE },
{ X86::VSUBSSZrr, X86::VSUBSSZrm, 0 },
{ X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE },
{ X86::VSUBSSrr, X86::VSUBSSrm, 0 },
@@ -2999,10 +3113,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
{ X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
+ { X86::VADDPHZ128rrkz, X86::VADDPHZ128rmkz, 0 },
+ { X86::VADDPHZ256rrkz, X86::VADDPHZ256rmkz, 0 },
+ { X86::VADDPHZrrkz, X86::VADDPHZrmkz, 0 },
{ X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
{ X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
{ X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VADDSHZrr_Intkz, X86::VADDSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 },
{ X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 },
@@ -3041,10 +3159,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
{ X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 },
{ X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 },
+ { X86::VCMPPHZ128rrik, X86::VCMPPHZ128rmik, 0 },
+ { X86::VCMPPHZ256rrik, X86::VCMPPHZ256rmik, 0 },
+ { X86::VCMPPHZrrik, X86::VCMPPHZrmik, 0 },
{ X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 },
{ X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 },
{ X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 },
{ X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCMPSHZrr_Intk, X86::VCMPSHZrm_Intk, TB_NO_REVERSE },
{ X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmk, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmk, 0 },
@@ -3141,10 +3263,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
{ X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
+ { X86::VDIVPHZ128rrkz, X86::VDIVPHZ128rmkz, 0 },
+ { X86::VDIVPHZ256rrkz, X86::VDIVPHZ256rmkz, 0 },
+ { X86::VDIVPHZrrkz, X86::VDIVPHZrmkz, 0 },
{ X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
{ X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
{ X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDIVSHZrr_Intkz, X86::VDIVSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VDPBF16PSZ128r, X86::VDPBF16PSZ128m, 0 },
{ X86::VDPBF16PSZ256r, X86::VDPBF16PSZ256m, 0 },
@@ -3157,6 +3283,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VEXPANDPSZ128rrk, X86::VEXPANDPSZ128rmk, TB_NO_REVERSE },
{ X86::VEXPANDPSZ256rrk, X86::VEXPANDPSZ256rmk, TB_NO_REVERSE },
{ X86::VEXPANDPSZrrk, X86::VEXPANDPSZrmk, TB_NO_REVERSE },
+ { X86::VFCMADDCPHZ128r, X86::VFCMADDCPHZ128m, 0 },
+ { X86::VFCMADDCPHZ256r, X86::VFCMADDCPHZ256m, 0 },
+ { X86::VFCMADDCPHZr, X86::VFCMADDCPHZm, 0 },
+ { X86::VFCMADDCSHZr, X86::VFCMADDCSHZm, TB_NO_REVERSE },
+ { X86::VFCMULCPHZ128rrkz, X86::VFCMULCPHZ128rmkz, 0 },
+ { X86::VFCMULCPHZ256rrkz, X86::VFCMULCPHZ256rmkz, 0 },
+ { X86::VFCMULCPHZrrkz, X86::VFCMULCPHZrmkz, 0 },
+ { X86::VFCMULCSHZrrkz, X86::VFCMULCSHZrmkz, TB_NO_REVERSE },
{ X86::VFIXUPIMMPDZ128rri, X86::VFIXUPIMMPDZ128rmi, 0 },
{ X86::VFIXUPIMMPDZ256rri, X86::VFIXUPIMMPDZ256rmi, 0 },
{ X86::VFIXUPIMMPDZrri, X86::VFIXUPIMMPDZrmi, 0 },
@@ -3170,6 +3304,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0 },
{ X86::VFMADD132PDZr, X86::VFMADD132PDZm, 0 },
{ X86::VFMADD132PDr, X86::VFMADD132PDm, 0 },
+ { X86::VFMADD132PHZ128r, X86::VFMADD132PHZ128m, 0 },
+ { X86::VFMADD132PHZ256r, X86::VFMADD132PHZ256m, 0 },
+ { X86::VFMADD132PHZr, X86::VFMADD132PHZm, 0 },
{ X86::VFMADD132PSYr, X86::VFMADD132PSYm, 0 },
{ X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, 0 },
{ X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, 0 },
@@ -3179,6 +3316,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_NO_REVERSE },
{ X86::VFMADD132SDr, X86::VFMADD132SDm, 0 },
{ X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_NO_REVERSE },
+ { X86::VFMADD132SHZr, X86::VFMADD132SHZm, 0 },
+ { X86::VFMADD132SHZr_Int, X86::VFMADD132SHZm_Int, TB_NO_REVERSE },
{ X86::VFMADD132SSZr, X86::VFMADD132SSZm, 0 },
{ X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE },
{ X86::VFMADD132SSr, X86::VFMADD132SSm, 0 },
@@ -3188,6 +3327,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0 },
{ X86::VFMADD213PDZr, X86::VFMADD213PDZm, 0 },
{ X86::VFMADD213PDr, X86::VFMADD213PDm, 0 },
+ { X86::VFMADD213PHZ128r, X86::VFMADD213PHZ128m, 0 },
+ { X86::VFMADD213PHZ256r, X86::VFMADD213PHZ256m, 0 },
+ { X86::VFMADD213PHZr, X86::VFMADD213PHZm, 0 },
{ X86::VFMADD213PSYr, X86::VFMADD213PSYm, 0 },
{ X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, 0 },
{ X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, 0 },
@@ -3197,6 +3339,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_NO_REVERSE },
{ X86::VFMADD213SDr, X86::VFMADD213SDm, 0 },
{ X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_NO_REVERSE },
+ { X86::VFMADD213SHZr, X86::VFMADD213SHZm, 0 },
+ { X86::VFMADD213SHZr_Int, X86::VFMADD213SHZm_Int, TB_NO_REVERSE },
{ X86::VFMADD213SSZr, X86::VFMADD213SSZm, 0 },
{ X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE },
{ X86::VFMADD213SSr, X86::VFMADD213SSm, 0 },
@@ -3206,6 +3350,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0 },
{ X86::VFMADD231PDZr, X86::VFMADD231PDZm, 0 },
{ X86::VFMADD231PDr, X86::VFMADD231PDm, 0 },
+ { X86::VFMADD231PHZ128r, X86::VFMADD231PHZ128m, 0 },
+ { X86::VFMADD231PHZ256r, X86::VFMADD231PHZ256m, 0 },
+ { X86::VFMADD231PHZr, X86::VFMADD231PHZm, 0 },
{ X86::VFMADD231PSYr, X86::VFMADD231PSYm, 0 },
{ X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, 0 },
{ X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, 0 },
@@ -3215,10 +3362,16 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_NO_REVERSE },
{ X86::VFMADD231SDr, X86::VFMADD231SDm, 0 },
{ X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_NO_REVERSE },
+ { X86::VFMADD231SHZr, X86::VFMADD231SHZm, 0 },
+ { X86::VFMADD231SHZr_Int, X86::VFMADD231SHZm_Int, TB_NO_REVERSE },
{ X86::VFMADD231SSZr, X86::VFMADD231SSZm, 0 },
{ X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_NO_REVERSE },
{ X86::VFMADD231SSr, X86::VFMADD231SSm, 0 },
{ X86::VFMADD231SSr_Int, X86::VFMADD231SSm_Int, TB_NO_REVERSE },
+ { X86::VFMADDCPHZ128r, X86::VFMADDCPHZ128m, 0 },
+ { X86::VFMADDCPHZ256r, X86::VFMADDCPHZ256m, 0 },
+ { X86::VFMADDCPHZr, X86::VFMADDCPHZm, 0 },
+ { X86::VFMADDCSHZr, X86::VFMADDCSHZm, TB_NO_REVERSE },
{ X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, 0 },
{ X86::VFMADDPD4rr, X86::VFMADDPD4rm, 0 },
{ X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, 0 },
@@ -3232,6 +3385,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, 0 },
{ X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, 0 },
{ X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, 0 },
+ { X86::VFMADDSUB132PHZ128r, X86::VFMADDSUB132PHZ128m, 0 },
+ { X86::VFMADDSUB132PHZ256r, X86::VFMADDSUB132PHZ256m, 0 },
+ { X86::VFMADDSUB132PHZr, X86::VFMADDSUB132PHZm, 0 },
{ X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, 0 },
{ X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, 0 },
{ X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, 0 },
@@ -3242,6 +3398,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, 0 },
{ X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, 0 },
{ X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, 0 },
+ { X86::VFMADDSUB213PHZ128r, X86::VFMADDSUB213PHZ128m, 0 },
+ { X86::VFMADDSUB213PHZ256r, X86::VFMADDSUB213PHZ256m, 0 },
+ { X86::VFMADDSUB213PHZr, X86::VFMADDSUB213PHZm, 0 },
{ X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, 0 },
{ X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, 0 },
{ X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, 0 },
@@ -3252,6 +3411,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, 0 },
{ X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, 0 },
{ X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, 0 },
+ { X86::VFMADDSUB231PHZ128r, X86::VFMADDSUB231PHZ128m, 0 },
+ { X86::VFMADDSUB231PHZ256r, X86::VFMADDSUB231PHZ256m, 0 },
+ { X86::VFMADDSUB231PHZr, X86::VFMADDSUB231PHZm, 0 },
{ X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, 0 },
{ X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, 0 },
{ X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, 0 },
@@ -3266,6 +3428,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0 },
{ X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, 0 },
{ X86::VFMSUB132PDr, X86::VFMSUB132PDm, 0 },
+ { X86::VFMSUB132PHZ128r, X86::VFMSUB132PHZ128m, 0 },
+ { X86::VFMSUB132PHZ256r, X86::VFMSUB132PHZ256m, 0 },
+ { X86::VFMSUB132PHZr, X86::VFMSUB132PHZm, 0 },
{ X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, 0 },
{ X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, 0 },
{ X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, 0 },
@@ -3275,6 +3440,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB132SDr, X86::VFMSUB132SDm, 0 },
{ X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_NO_REVERSE },
+ { X86::VFMSUB132SHZr, X86::VFMSUB132SHZm, 0 },
+ { X86::VFMSUB132SHZr_Int, X86::VFMSUB132SHZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, 0 },
{ X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0 },
@@ -3284,6 +3451,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0 },
{ X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, 0 },
{ X86::VFMSUB213PDr, X86::VFMSUB213PDm, 0 },
+ { X86::VFMSUB213PHZ128r, X86::VFMSUB213PHZ128m, 0 },
+ { X86::VFMSUB213PHZ256r, X86::VFMSUB213PHZ256m, 0 },
+ { X86::VFMSUB213PHZr, X86::VFMSUB213PHZm, 0 },
{ X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, 0 },
{ X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, 0 },
{ X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, 0 },
@@ -3293,6 +3463,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB213SDr, X86::VFMSUB213SDm, 0 },
{ X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_NO_REVERSE },
+ { X86::VFMSUB213SHZr, X86::VFMSUB213SHZm, 0 },
+ { X86::VFMSUB213SHZr_Int, X86::VFMSUB213SHZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, 0 },
{ X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0 },
@@ -3302,6 +3474,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0 },
{ X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, 0 },
{ X86::VFMSUB231PDr, X86::VFMSUB231PDm, 0 },
+ { X86::VFMSUB231PHZ128r, X86::VFMSUB231PHZ128m, 0 },
+ { X86::VFMSUB231PHZ256r, X86::VFMSUB231PHZ256m, 0 },
+ { X86::VFMSUB231PHZr, X86::VFMSUB231PHZm, 0 },
{ X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, 0 },
{ X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, 0 },
{ X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, 0 },
@@ -3311,6 +3486,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB231SDr, X86::VFMSUB231SDm, 0 },
{ X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_NO_REVERSE },
+ { X86::VFMSUB231SHZr, X86::VFMSUB231SHZm, 0 },
+ { X86::VFMSUB231SHZr_Int, X86::VFMSUB231SHZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, 0 },
{ X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB231SSr, X86::VFMSUB231SSm, 0 },
@@ -3320,6 +3497,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, 0 },
{ X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, 0 },
{ X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, 0 },
+ { X86::VFMSUBADD132PHZ128r, X86::VFMSUBADD132PHZ128m, 0 },
+ { X86::VFMSUBADD132PHZ256r, X86::VFMSUBADD132PHZ256m, 0 },
+ { X86::VFMSUBADD132PHZr, X86::VFMSUBADD132PHZm, 0 },
{ X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, 0 },
{ X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, 0 },
{ X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, 0 },
@@ -3330,6 +3510,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, 0 },
{ X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, 0 },
{ X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, 0 },
+ { X86::VFMSUBADD213PHZ128r, X86::VFMSUBADD213PHZ128m, 0 },
+ { X86::VFMSUBADD213PHZ256r, X86::VFMSUBADD213PHZ256m, 0 },
+ { X86::VFMSUBADD213PHZr, X86::VFMSUBADD213PHZm, 0 },
{ X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, 0 },
{ X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, 0 },
{ X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, 0 },
@@ -3340,6 +3523,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, 0 },
{ X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, 0 },
{ X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, 0 },
+ { X86::VFMSUBADD231PHZ128r, X86::VFMSUBADD231PHZ128m, 0 },
+ { X86::VFMSUBADD231PHZ256r, X86::VFMSUBADD231PHZ256m, 0 },
+ { X86::VFMSUBADD231PHZr, X86::VFMSUBADD231PHZm, 0 },
{ X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, 0 },
{ X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, 0 },
{ X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, 0 },
@@ -3357,11 +3543,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE },
{ X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
{ X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE },
+ { X86::VFMULCPHZ128rrkz, X86::VFMULCPHZ128rmkz, 0 },
+ { X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmkz, 0 },
+ { X86::VFMULCPHZrrkz, X86::VFMULCPHZrmkz, 0 },
+ { X86::VFMULCSHZrrkz, X86::VFMULCSHZrmkz, TB_NO_REVERSE },
{ X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, 0 },
{ X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, 0 },
{ X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0 },
{ X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, 0 },
{ X86::VFNMADD132PDr, X86::VFNMADD132PDm, 0 },
+ { X86::VFNMADD132PHZ128r, X86::VFNMADD132PHZ128m, 0 },
+ { X86::VFNMADD132PHZ256r, X86::VFNMADD132PHZ256m, 0 },
+ { X86::VFNMADD132PHZr, X86::VFNMADD132PHZm, 0 },
{ X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, 0 },
{ X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, 0 },
{ X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, 0 },
@@ -3371,6 +3564,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD132SDr, X86::VFNMADD132SDm, 0 },
{ X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMADD132SHZr, X86::VFNMADD132SHZm, 0 },
+ { X86::VFNMADD132SHZr_Int, X86::VFNMADD132SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, 0 },
{ X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0 },
@@ -3380,6 +3575,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0 },
{ X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, 0 },
{ X86::VFNMADD213PDr, X86::VFNMADD213PDm, 0 },
+ { X86::VFNMADD213PHZ128r, X86::VFNMADD213PHZ128m, 0 },
+ { X86::VFNMADD213PHZ256r, X86::VFNMADD213PHZ256m, 0 },
+ { X86::VFNMADD213PHZr, X86::VFNMADD213PHZm, 0 },
{ X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, 0 },
{ X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, 0 },
{ X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, 0 },
@@ -3389,6 +3587,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD213SDr, X86::VFNMADD213SDm, 0 },
{ X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMADD213SHZr, X86::VFNMADD213SHZm, 0 },
+ { X86::VFNMADD213SHZr_Int, X86::VFNMADD213SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, 0 },
{ X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0 },
@@ -3398,6 +3598,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0 },
{ X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, 0 },
{ X86::VFNMADD231PDr, X86::VFNMADD231PDm, 0 },
+ { X86::VFNMADD231PHZ128r, X86::VFNMADD231PHZ128m, 0 },
+ { X86::VFNMADD231PHZ256r, X86::VFNMADD231PHZ256m, 0 },
+ { X86::VFNMADD231PHZr, X86::VFNMADD231PHZm, 0 },
{ X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, 0 },
{ X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, 0 },
{ X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, 0 },
@@ -3407,6 +3610,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD231SDr, X86::VFNMADD231SDm, 0 },
{ X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMADD231SHZr, X86::VFNMADD231SHZm, 0 },
+ { X86::VFNMADD231SHZr_Int, X86::VFNMADD231SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, 0 },
{ X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD231SSr, X86::VFNMADD231SSm, 0 },
@@ -3424,6 +3629,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0 },
{ X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, 0 },
{ X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, 0 },
+ { X86::VFNMSUB132PHZ128r, X86::VFNMSUB132PHZ128m, 0 },
+ { X86::VFNMSUB132PHZ256r, X86::VFNMSUB132PHZ256m, 0 },
+ { X86::VFNMSUB132PHZr, X86::VFNMSUB132PHZm, 0 },
{ X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, 0 },
{ X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, 0 },
{ X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, 0 },
@@ -3433,6 +3641,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB132SDZr_Int, X86::VFNMSUB132SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, 0 },
{ X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUB132SHZr, X86::VFNMSUB132SHZm, 0 },
+ { X86::VFNMSUB132SHZr_Int, X86::VFNMSUB132SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB132SSZr, X86::VFNMSUB132SSZm, 0 },
{ X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0 },
@@ -3442,6 +3652,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0 },
{ X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, 0 },
{ X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, 0 },
+ { X86::VFNMSUB213PHZ128r, X86::VFNMSUB213PHZ128m, 0 },
+ { X86::VFNMSUB213PHZ256r, X86::VFNMSUB213PHZ256m, 0 },
+ { X86::VFNMSUB213PHZr, X86::VFNMSUB213PHZm, 0 },
{ X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, 0 },
{ X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, 0 },
{ X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, 0 },
@@ -3451,6 +3664,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB213SDZr_Int, X86::VFNMSUB213SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, 0 },
{ X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUB213SHZr, X86::VFNMSUB213SHZm, 0 },
+ { X86::VFNMSUB213SHZr_Int, X86::VFNMSUB213SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB213SSZr, X86::VFNMSUB213SSZm, 0 },
{ X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0 },
@@ -3460,6 +3675,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0 },
{ X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, 0 },
{ X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, 0 },
+ { X86::VFNMSUB231PHZ128r, X86::VFNMSUB231PHZ128m, 0 },
+ { X86::VFNMSUB231PHZ256r, X86::VFNMSUB231PHZ256m, 0 },
+ { X86::VFNMSUB231PHZr, X86::VFNMSUB231PHZm, 0 },
{ X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, 0 },
{ X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, 0 },
{ X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, 0 },
@@ -3469,6 +3687,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB231SDZr_Int, X86::VFNMSUB231SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, 0 },
{ X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUB231SHZr, X86::VFNMSUB231SHZm, 0 },
+ { X86::VFNMSUB231SHZr_Int, X86::VFNMSUB231SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB231SSZr, X86::VFNMSUB231SSZm, 0 },
{ X86::VFNMSUB231SSZr_Int, X86::VFNMSUB231SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, 0 },
@@ -3484,18 +3704,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0 },
{ X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0 },
{ X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0 },
+ { X86::VGETEXPPHZ128rk, X86::VGETEXPPHZ128mk, 0 },
+ { X86::VGETEXPPHZ256rk, X86::VGETEXPPHZ256mk, 0 },
+ { X86::VGETEXPPHZrk, X86::VGETEXPPHZmk, 0 },
{ X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mk, 0 },
{ X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mk, 0 },
{ X86::VGETEXPPSZrk, X86::VGETEXPPSZmk, 0 },
{ X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE },
+ { X86::VGETEXPSHZrkz, X86::VGETEXPSHZmkz, TB_NO_REVERSE },
{ X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE },
{ X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0 },
{ X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0 },
{ X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0 },
+ { X86::VGETMANTPHZ128rrik, X86::VGETMANTPHZ128rmik, 0 },
+ { X86::VGETMANTPHZ256rrik, X86::VGETMANTPHZ256rmik, 0 },
+ { X86::VGETMANTPHZrrik, X86::VGETMANTPHZrmik, 0 },
{ X86::VGETMANTPSZ128rrik, X86::VGETMANTPSZ128rmik, 0 },
{ X86::VGETMANTPSZ256rrik, X86::VGETMANTPSZ256rmik, 0 },
{ X86::VGETMANTPSZrrik, X86::VGETMANTPSZrmik, 0 },
{ X86::VGETMANTSDZrrikz, X86::VGETMANTSDZrmikz, TB_NO_REVERSE },
+ { X86::VGETMANTSHZrrikz, X86::VGETMANTSHZrmikz, TB_NO_REVERSE },
{ X86::VGETMANTSSZrrikz, X86::VGETMANTSSZrmikz, TB_NO_REVERSE },
{ X86::VGF2P8AFFINEINVQBZ128rrikz, X86::VGF2P8AFFINEINVQBZ128rmikz, 0 },
{ X86::VGF2P8AFFINEINVQBZ256rrikz, X86::VGF2P8AFFINEINVQBZ256rmikz, 0 },
@@ -3521,30 +3749,44 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
{ X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
{ X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
+ { X86::VMAXCPHZ128rrkz, X86::VMAXCPHZ128rmkz, 0 },
+ { X86::VMAXCPHZ256rrkz, X86::VMAXCPHZ256rmkz, 0 },
+ { X86::VMAXCPHZrrkz, X86::VMAXCPHZrmkz, 0 },
{ X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 },
{ X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
{ X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
{ X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 },
{ X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
+ { X86::VMAXPHZ128rrkz, X86::VMAXPHZ128rmkz, 0 },
+ { X86::VMAXPHZ256rrkz, X86::VMAXPHZ256rmkz, 0 },
+ { X86::VMAXPHZrrkz, X86::VMAXPHZrmkz, 0 },
{ X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
{ X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
{ X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
{ X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMAXSHZrr_Intkz, X86::VMAXSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 },
{ X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 },
{ X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
+ { X86::VMINCPHZ128rrkz, X86::VMINCPHZ128rmkz, 0 },
+ { X86::VMINCPHZ256rrkz, X86::VMINCPHZ256rmkz, 0 },
+ { X86::VMINCPHZrrkz, X86::VMINCPHZrmkz, 0 },
{ X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 },
{ X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 },
{ X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
{ X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
{ X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
{ X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
+ { X86::VMINPHZ128rrkz, X86::VMINPHZ128rmkz, 0 },
+ { X86::VMINPHZ256rrkz, X86::VMINPHZ256rmkz, 0 },
+ { X86::VMINPHZrrkz, X86::VMINPHZrmkz, 0 },
{ X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
{ X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
{ X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMINSHZrr_Intkz, X86::VMINSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rrk, X86::VMOVAPDZ128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
{ X86::VMOVAPDZ256rrk, X86::VMOVAPDZ256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
@@ -3588,10 +3830,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
{ X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
+ { X86::VMULPHZ128rrkz, X86::VMULPHZ128rmkz, 0 },
+ { X86::VMULPHZ256rrkz, X86::VMULPHZ256rmkz, 0 },
+ { X86::VMULPHZrrkz, X86::VMULPHZrmkz, 0 },
{ X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
{ X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
{ X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMULSHZrr_Intkz, X86::VMULSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
{ X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
@@ -4258,21 +4504,33 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0 },
{ X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE },
{ X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE },
+ { X86::VRCPPHZ128rk, X86::VRCPPHZ128mk, 0 },
+ { X86::VRCPPHZ256rk, X86::VRCPPHZ256mk, 0 },
+ { X86::VRCPPHZrk, X86::VRCPPHZmk, 0 },
+ { X86::VRCPSHZrrkz, X86::VRCPSHZrmkz, TB_NO_REVERSE },
{ X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0 },
{ X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0 },
{ X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0 },
+ { X86::VREDUCEPHZ128rrik, X86::VREDUCEPHZ128rmik, 0 },
+ { X86::VREDUCEPHZ256rrik, X86::VREDUCEPHZ256rmik, 0 },
+ { X86::VREDUCEPHZrrik, X86::VREDUCEPHZrmik, 0 },
{ X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmik, 0 },
{ X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmik, 0 },
{ X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmik, 0 },
{ X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE },
+ { X86::VREDUCESHZrrikz, X86::VREDUCESHZrmikz, TB_NO_REVERSE },
{ X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE },
{ X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0 },
{ X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0 },
{ X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0 },
+ { X86::VRNDSCALEPHZ128rrik, X86::VRNDSCALEPHZ128rmik, 0 },
+ { X86::VRNDSCALEPHZ256rrik, X86::VRNDSCALEPHZ256rmik, 0 },
+ { X86::VRNDSCALEPHZrrik, X86::VRNDSCALEPHZrmik, 0 },
{ X86::VRNDSCALEPSZ128rrik, X86::VRNDSCALEPSZ128rmik, 0 },
{ X86::VRNDSCALEPSZ256rrik, X86::VRNDSCALEPSZ256rmik, 0 },
{ X86::VRNDSCALEPSZrrik, X86::VRNDSCALEPSZrmik, 0 },
{ X86::VRNDSCALESDZr_Intkz, X86::VRNDSCALESDZm_Intkz, TB_NO_REVERSE },
+ { X86::VRNDSCALESHZr_Intkz, X86::VRNDSCALESHZm_Intkz, TB_NO_REVERSE },
{ X86::VRNDSCALESSZr_Intkz, X86::VRNDSCALESSZm_Intkz, TB_NO_REVERSE },
{ X86::VRSQRT14PDZ128rk, X86::VRSQRT14PDZ128mk, 0 },
{ X86::VRSQRT14PDZ256rk, X86::VRSQRT14PDZ256mk, 0 },
@@ -4286,13 +4544,21 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0 },
{ X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE },
{ X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE },
+ { X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mk, 0 },
+ { X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mk, 0 },
+ { X86::VRSQRTPHZrk, X86::VRSQRTPHZmk, 0 },
+ { X86::VRSQRTSHZrrkz, X86::VRSQRTSHZrmkz, TB_NO_REVERSE },
{ X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0 },
{ X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0 },
{ X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0 },
+ { X86::VSCALEFPHZ128rrkz, X86::VSCALEFPHZ128rmkz, 0 },
+ { X86::VSCALEFPHZ256rrkz, X86::VSCALEFPHZ256rmkz, 0 },
+ { X86::VSCALEFPHZrrkz, X86::VSCALEFPHZrmkz, 0 },
{ X86::VSCALEFPSZ128rrkz, X86::VSCALEFPSZ128rmkz, 0 },
{ X86::VSCALEFPSZ256rrkz, X86::VSCALEFPSZ256rmkz, 0 },
{ X86::VSCALEFPSZrrkz, X86::VSCALEFPSZrmkz, 0 },
{ X86::VSCALEFSDZrrkz, X86::VSCALEFSDZrmkz, TB_NO_REVERSE },
+ { X86::VSCALEFSHZrrkz, X86::VSCALEFSHZrmkz, TB_NO_REVERSE },
{ X86::VSCALEFSSZrrkz, X86::VSCALEFSSZrmkz, TB_NO_REVERSE },
{ X86::VSHUFF32X4Z256rrikz, X86::VSHUFF32X4Z256rmikz, 0 },
{ X86::VSHUFF32X4Zrrikz, X86::VSHUFF32X4Zrmikz, 0 },
@@ -4311,18 +4577,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0 },
{ X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0 },
{ X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0 },
+ { X86::VSQRTPHZ128rk, X86::VSQRTPHZ128mk, 0 },
+ { X86::VSQRTPHZ256rk, X86::VSQRTPHZ256mk, 0 },
+ { X86::VSQRTPHZrk, X86::VSQRTPHZmk, 0 },
{ X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mk, 0 },
{ X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mk, 0 },
{ X86::VSQRTPSZrk, X86::VSQRTPSZmk, 0 },
{ X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE },
+ { X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE },
{ X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE },
{ X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
{ X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
+ { X86::VSUBPHZ128rrkz, X86::VSUBPHZ128rmkz, 0 },
+ { X86::VSUBPHZ256rrkz, X86::VSUBPHZ256rmkz, 0 },
+ { X86::VSUBPHZrrkz, X86::VSUBPHZrmkz, 0 },
{ X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
{ X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
{ X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VSUBSHZrr_Intkz, X86::VSUBSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
{ X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
@@ -4348,10 +4622,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
{ X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
{ X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
+ { X86::VADDPHZ128rrk, X86::VADDPHZ128rmk, 0 },
+ { X86::VADDPHZ256rrk, X86::VADDPHZ256rmk, 0 },
+ { X86::VADDPHZrrk, X86::VADDPHZrmk, 0 },
{ X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
{ X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
{ X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
{ X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VADDSHZrr_Intk, X86::VADDSHZrm_Intk, TB_NO_REVERSE },
{ X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
{ X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 },
{ X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 },
@@ -4374,18 +4652,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0 },
{ X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0 },
{ X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0 },
+ { X86::VCVTSD2SHZrr_Intk, X86::VCVTSD2SHZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSH2SDZrr_Intk, X86::VCVTSH2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSH2SSZrr_Intk, X86::VCVTSH2SSZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSS2SHZrr_Intk, X86::VCVTSS2SHZrm_Intk, TB_NO_REVERSE },
{ X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
{ X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0 },
{ X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0 },
{ X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
{ X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
+ { X86::VDIVPHZ128rrk, X86::VDIVPHZ128rmk, 0 },
+ { X86::VDIVPHZ256rrk, X86::VDIVPHZ256rmk, 0 },
+ { X86::VDIVPHZrrk, X86::VDIVPHZrmk, 0 },
{ X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
{ X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
{ X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDIVSHZrr_Intk, X86::VDIVSHZrm_Intk, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
{ X86::VDPBF16PSZ128rk, X86::VDPBF16PSZ128mk, 0 },
{ X86::VDPBF16PSZ128rkz, X86::VDPBF16PSZ128mkz, 0 },
@@ -4393,6 +4679,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VDPBF16PSZ256rkz, X86::VDPBF16PSZ256mkz, 0 },
{ X86::VDPBF16PSZrk, X86::VDPBF16PSZmk, 0 },
{ X86::VDPBF16PSZrkz, X86::VDPBF16PSZmkz, 0 },
+ { X86::VFCMADDCPHZ128rk, X86::VFCMADDCPHZ128mk, 0 },
+ { X86::VFCMADDCPHZ128rkz, X86::VFCMADDCPHZ128mkz, 0 },
+ { X86::VFCMADDCPHZ256rk, X86::VFCMADDCPHZ256mk, 0 },
+ { X86::VFCMADDCPHZ256rkz, X86::VFCMADDCPHZ256mkz, 0 },
+ { X86::VFCMADDCPHZrk, X86::VFCMADDCPHZmk, 0 },
+ { X86::VFCMADDCPHZrkz, X86::VFCMADDCPHZmkz, 0 },
+ { X86::VFCMADDCSHZrk, X86::VFCMADDCSHZmk, TB_NO_REVERSE },
+ { X86::VFCMADDCSHZrkz, X86::VFCMADDCSHZmkz, TB_NO_REVERSE },
+ { X86::VFCMULCPHZ128rrk, X86::VFCMULCPHZ128rmk, 0 },
+ { X86::VFCMULCPHZ256rrk, X86::VFCMULCPHZ256rmk, 0 },
+ { X86::VFCMULCPHZrrk, X86::VFCMULCPHZrmk, 0 },
+ { X86::VFCMULCSHZrrk, X86::VFCMULCSHZrmk, TB_NO_REVERSE },
{ X86::VFIXUPIMMPDZ128rrik, X86::VFIXUPIMMPDZ128rmik, 0 },
{ X86::VFIXUPIMMPDZ128rrikz, X86::VFIXUPIMMPDZ128rmikz, 0 },
{ X86::VFIXUPIMMPDZ256rrik, X86::VFIXUPIMMPDZ256rmik, 0 },
@@ -4415,6 +4713,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD132PDZ256rkz, X86::VFMADD132PDZ256mkz, 0 },
{ X86::VFMADD132PDZrk, X86::VFMADD132PDZmk, 0 },
{ X86::VFMADD132PDZrkz, X86::VFMADD132PDZmkz, 0 },
+ { X86::VFMADD132PHZ128rk, X86::VFMADD132PHZ128mk, 0 },
+ { X86::VFMADD132PHZ128rkz, X86::VFMADD132PHZ128mkz, 0 },
+ { X86::VFMADD132PHZ256rk, X86::VFMADD132PHZ256mk, 0 },
+ { X86::VFMADD132PHZ256rkz, X86::VFMADD132PHZ256mkz, 0 },
+ { X86::VFMADD132PHZrk, X86::VFMADD132PHZmk, 0 },
+ { X86::VFMADD132PHZrkz, X86::VFMADD132PHZmkz, 0 },
{ X86::VFMADD132PSZ128rk, X86::VFMADD132PSZ128mk, 0 },
{ X86::VFMADD132PSZ128rkz, X86::VFMADD132PSZ128mkz, 0 },
{ X86::VFMADD132PSZ256rk, X86::VFMADD132PSZ256mk, 0 },
@@ -4423,6 +4727,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD132PSZrkz, X86::VFMADD132PSZmkz, 0 },
{ X86::VFMADD132SDZr_Intk, X86::VFMADD132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD132SDZr_Intkz, X86::VFMADD132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADD132SHZr_Intk, X86::VFMADD132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0 },
@@ -4431,6 +4737,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD213PDZ256rkz, X86::VFMADD213PDZ256mkz, 0 },
{ X86::VFMADD213PDZrk, X86::VFMADD213PDZmk, 0 },
{ X86::VFMADD213PDZrkz, X86::VFMADD213PDZmkz, 0 },
+ { X86::VFMADD213PHZ128rk, X86::VFMADD213PHZ128mk, 0 },
+ { X86::VFMADD213PHZ128rkz, X86::VFMADD213PHZ128mkz, 0 },
+ { X86::VFMADD213PHZ256rk, X86::VFMADD213PHZ256mk, 0 },
+ { X86::VFMADD213PHZ256rkz, X86::VFMADD213PHZ256mkz, 0 },
+ { X86::VFMADD213PHZrk, X86::VFMADD213PHZmk, 0 },
+ { X86::VFMADD213PHZrkz, X86::VFMADD213PHZmkz, 0 },
{ X86::VFMADD213PSZ128rk, X86::VFMADD213PSZ128mk, 0 },
{ X86::VFMADD213PSZ128rkz, X86::VFMADD213PSZ128mkz, 0 },
{ X86::VFMADD213PSZ256rk, X86::VFMADD213PSZ256mk, 0 },
@@ -4439,6 +4751,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD213PSZrkz, X86::VFMADD213PSZmkz, 0 },
{ X86::VFMADD213SDZr_Intk, X86::VFMADD213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD213SDZr_Intkz, X86::VFMADD213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADD213SHZr_Intk, X86::VFMADD213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0 },
@@ -4447,6 +4761,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD231PDZ256rkz, X86::VFMADD231PDZ256mkz, 0 },
{ X86::VFMADD231PDZrk, X86::VFMADD231PDZmk, 0 },
{ X86::VFMADD231PDZrkz, X86::VFMADD231PDZmkz, 0 },
+ { X86::VFMADD231PHZ128rk, X86::VFMADD231PHZ128mk, 0 },
+ { X86::VFMADD231PHZ128rkz, X86::VFMADD231PHZ128mkz, 0 },
+ { X86::VFMADD231PHZ256rk, X86::VFMADD231PHZ256mk, 0 },
+ { X86::VFMADD231PHZ256rkz, X86::VFMADD231PHZ256mkz, 0 },
+ { X86::VFMADD231PHZrk, X86::VFMADD231PHZmk, 0 },
+ { X86::VFMADD231PHZrkz, X86::VFMADD231PHZmkz, 0 },
{ X86::VFMADD231PSZ128rk, X86::VFMADD231PSZ128mk, 0 },
{ X86::VFMADD231PSZ128rkz, X86::VFMADD231PSZ128mkz, 0 },
{ X86::VFMADD231PSZ256rk, X86::VFMADD231PSZ256mk, 0 },
@@ -4455,14 +4775,30 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD231PSZrkz, X86::VFMADD231PSZmkz, 0 },
{ X86::VFMADD231SDZr_Intk, X86::VFMADD231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD231SDZr_Intkz, X86::VFMADD231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADD231SHZr_Intk, X86::VFMADD231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMADD231SHZr_Intkz, X86::VFMADD231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD231SSZr_Intk, X86::VFMADD231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD231SSZr_Intkz, X86::VFMADD231SSZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADDCPHZ128rk, X86::VFMADDCPHZ128mk, 0 },
+ { X86::VFMADDCPHZ128rkz, X86::VFMADDCPHZ128mkz, 0 },
+ { X86::VFMADDCPHZ256rk, X86::VFMADDCPHZ256mk, 0 },
+ { X86::VFMADDCPHZ256rkz, X86::VFMADDCPHZ256mkz, 0 },
+ { X86::VFMADDCPHZrk, X86::VFMADDCPHZmk, 0 },
+ { X86::VFMADDCPHZrkz, X86::VFMADDCPHZmkz, 0 },
+ { X86::VFMADDCSHZrk, X86::VFMADDCSHZmk, TB_NO_REVERSE },
+ { X86::VFMADDCSHZrkz, X86::VFMADDCSHZmkz, TB_NO_REVERSE },
{ X86::VFMADDSUB132PDZ128rk, X86::VFMADDSUB132PDZ128mk, 0 },
{ X86::VFMADDSUB132PDZ128rkz, X86::VFMADDSUB132PDZ128mkz, 0 },
{ X86::VFMADDSUB132PDZ256rk, X86::VFMADDSUB132PDZ256mk, 0 },
{ X86::VFMADDSUB132PDZ256rkz, X86::VFMADDSUB132PDZ256mkz, 0 },
{ X86::VFMADDSUB132PDZrk, X86::VFMADDSUB132PDZmk, 0 },
{ X86::VFMADDSUB132PDZrkz, X86::VFMADDSUB132PDZmkz, 0 },
+ { X86::VFMADDSUB132PHZ128rk, X86::VFMADDSUB132PHZ128mk, 0 },
+ { X86::VFMADDSUB132PHZ128rkz, X86::VFMADDSUB132PHZ128mkz, 0 },
+ { X86::VFMADDSUB132PHZ256rk, X86::VFMADDSUB132PHZ256mk, 0 },
+ { X86::VFMADDSUB132PHZ256rkz, X86::VFMADDSUB132PHZ256mkz, 0 },
+ { X86::VFMADDSUB132PHZrk, X86::VFMADDSUB132PHZmk, 0 },
+ { X86::VFMADDSUB132PHZrkz, X86::VFMADDSUB132PHZmkz, 0 },
{ X86::VFMADDSUB132PSZ128rk, X86::VFMADDSUB132PSZ128mk, 0 },
{ X86::VFMADDSUB132PSZ128rkz, X86::VFMADDSUB132PSZ128mkz, 0 },
{ X86::VFMADDSUB132PSZ256rk, X86::VFMADDSUB132PSZ256mk, 0 },
@@ -4475,6 +4811,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADDSUB213PDZ256rkz, X86::VFMADDSUB213PDZ256mkz, 0 },
{ X86::VFMADDSUB213PDZrk, X86::VFMADDSUB213PDZmk, 0 },
{ X86::VFMADDSUB213PDZrkz, X86::VFMADDSUB213PDZmkz, 0 },
+ { X86::VFMADDSUB213PHZ128rk, X86::VFMADDSUB213PHZ128mk, 0 },
+ { X86::VFMADDSUB213PHZ128rkz, X86::VFMADDSUB213PHZ128mkz, 0 },
+ { X86::VFMADDSUB213PHZ256rk, X86::VFMADDSUB213PHZ256mk, 0 },
+ { X86::VFMADDSUB213PHZ256rkz, X86::VFMADDSUB213PHZ256mkz, 0 },
+ { X86::VFMADDSUB213PHZrk, X86::VFMADDSUB213PHZmk, 0 },
+ { X86::VFMADDSUB213PHZrkz, X86::VFMADDSUB213PHZmkz, 0 },
{ X86::VFMADDSUB213PSZ128rk, X86::VFMADDSUB213PSZ128mk, 0 },
{ X86::VFMADDSUB213PSZ128rkz, X86::VFMADDSUB213PSZ128mkz, 0 },
{ X86::VFMADDSUB213PSZ256rk, X86::VFMADDSUB213PSZ256mk, 0 },
@@ -4487,6 +4829,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADDSUB231PDZ256rkz, X86::VFMADDSUB231PDZ256mkz, 0 },
{ X86::VFMADDSUB231PDZrk, X86::VFMADDSUB231PDZmk, 0 },
{ X86::VFMADDSUB231PDZrkz, X86::VFMADDSUB231PDZmkz, 0 },
+ { X86::VFMADDSUB231PHZ128rk, X86::VFMADDSUB231PHZ128mk, 0 },
+ { X86::VFMADDSUB231PHZ128rkz, X86::VFMADDSUB231PHZ128mkz, 0 },
+ { X86::VFMADDSUB231PHZ256rk, X86::VFMADDSUB231PHZ256mk, 0 },
+ { X86::VFMADDSUB231PHZ256rkz, X86::VFMADDSUB231PHZ256mkz, 0 },
+ { X86::VFMADDSUB231PHZrk, X86::VFMADDSUB231PHZmk, 0 },
+ { X86::VFMADDSUB231PHZrkz, X86::VFMADDSUB231PHZmkz, 0 },
{ X86::VFMADDSUB231PSZ128rk, X86::VFMADDSUB231PSZ128mk, 0 },
{ X86::VFMADDSUB231PSZ128rkz, X86::VFMADDSUB231PSZ128mkz, 0 },
{ X86::VFMADDSUB231PSZ256rk, X86::VFMADDSUB231PSZ256mk, 0 },
@@ -4499,6 +4847,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB132PDZ256rkz, X86::VFMSUB132PDZ256mkz, 0 },
{ X86::VFMSUB132PDZrk, X86::VFMSUB132PDZmk, 0 },
{ X86::VFMSUB132PDZrkz, X86::VFMSUB132PDZmkz, 0 },
+ { X86::VFMSUB132PHZ128rk, X86::VFMSUB132PHZ128mk, 0 },
+ { X86::VFMSUB132PHZ128rkz, X86::VFMSUB132PHZ128mkz, 0 },
+ { X86::VFMSUB132PHZ256rk, X86::VFMSUB132PHZ256mk, 0 },
+ { X86::VFMSUB132PHZ256rkz, X86::VFMSUB132PHZ256mkz, 0 },
+ { X86::VFMSUB132PHZrk, X86::VFMSUB132PHZmk, 0 },
+ { X86::VFMSUB132PHZrkz, X86::VFMSUB132PHZmkz, 0 },
{ X86::VFMSUB132PSZ128rk, X86::VFMSUB132PSZ128mk, 0 },
{ X86::VFMSUB132PSZ128rkz, X86::VFMSUB132PSZ128mkz, 0 },
{ X86::VFMSUB132PSZ256rk, X86::VFMSUB132PSZ256mk, 0 },
@@ -4507,6 +4861,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmkz, 0 },
{ X86::VFMSUB132SDZr_Intk, X86::VFMSUB132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB132SDZr_Intkz, X86::VFMSUB132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMSUB132SHZr_Intk, X86::VFMSUB132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0 },
@@ -4515,6 +4871,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB213PDZ256rkz, X86::VFMSUB213PDZ256mkz, 0 },
{ X86::VFMSUB213PDZrk, X86::VFMSUB213PDZmk, 0 },
{ X86::VFMSUB213PDZrkz, X86::VFMSUB213PDZmkz, 0 },
+ { X86::VFMSUB213PHZ128rk, X86::VFMSUB213PHZ128mk, 0 },
+ { X86::VFMSUB213PHZ128rkz, X86::VFMSUB213PHZ128mkz, 0 },
+ { X86::VFMSUB213PHZ256rk, X86::VFMSUB213PHZ256mk, 0 },
+ { X86::VFMSUB213PHZ256rkz, X86::VFMSUB213PHZ256mkz, 0 },
+ { X86::VFMSUB213PHZrk, X86::VFMSUB213PHZmk, 0 },
+ { X86::VFMSUB213PHZrkz, X86::VFMSUB213PHZmkz, 0 },
{ X86::VFMSUB213PSZ128rk, X86::VFMSUB213PSZ128mk, 0 },
{ X86::VFMSUB213PSZ128rkz, X86::VFMSUB213PSZ128mkz, 0 },
{ X86::VFMSUB213PSZ256rk, X86::VFMSUB213PSZ256mk, 0 },
@@ -4523,6 +4885,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmkz, 0 },
{ X86::VFMSUB213SDZr_Intk, X86::VFMSUB213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB213SDZr_Intkz, X86::VFMSUB213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMSUB213SHZr_Intk, X86::VFMSUB213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0 },
@@ -4531,6 +4895,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB231PDZ256rkz, X86::VFMSUB231PDZ256mkz, 0 },
{ X86::VFMSUB231PDZrk, X86::VFMSUB231PDZmk, 0 },
{ X86::VFMSUB231PDZrkz, X86::VFMSUB231PDZmkz, 0 },
+ { X86::VFMSUB231PHZ128rk, X86::VFMSUB231PHZ128mk, 0 },
+ { X86::VFMSUB231PHZ128rkz, X86::VFMSUB231PHZ128mkz, 0 },
+ { X86::VFMSUB231PHZ256rk, X86::VFMSUB231PHZ256mk, 0 },
+ { X86::VFMSUB231PHZ256rkz, X86::VFMSUB231PHZ256mkz, 0 },
+ { X86::VFMSUB231PHZrk, X86::VFMSUB231PHZmk, 0 },
+ { X86::VFMSUB231PHZrkz, X86::VFMSUB231PHZmkz, 0 },
{ X86::VFMSUB231PSZ128rk, X86::VFMSUB231PSZ128mk, 0 },
{ X86::VFMSUB231PSZ128rkz, X86::VFMSUB231PSZ128mkz, 0 },
{ X86::VFMSUB231PSZ256rk, X86::VFMSUB231PSZ256mk, 0 },
@@ -4539,6 +4909,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB231PSZrkz, X86::VFMSUB231PSZmkz, 0 },
{ X86::VFMSUB231SDZr_Intk, X86::VFMSUB231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB231SDZr_Intkz, X86::VFMSUB231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMSUB231SHZr_Intk, X86::VFMSUB231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMSUB231SHZr_Intkz, X86::VFMSUB231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB231SSZr_Intk, X86::VFMSUB231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB231SSZr_Intkz, X86::VFMSUB231SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUBADD132PDZ128rk, X86::VFMSUBADD132PDZ128mk, 0 },
@@ -4547,6 +4919,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUBADD132PDZ256rkz, X86::VFMSUBADD132PDZ256mkz, 0 },
{ X86::VFMSUBADD132PDZrk, X86::VFMSUBADD132PDZmk, 0 },
{ X86::VFMSUBADD132PDZrkz, X86::VFMSUBADD132PDZmkz, 0 },
+ { X86::VFMSUBADD132PHZ128rk, X86::VFMSUBADD132PHZ128mk, 0 },
+ { X86::VFMSUBADD132PHZ128rkz, X86::VFMSUBADD132PHZ128mkz, 0 },
+ { X86::VFMSUBADD132PHZ256rk, X86::VFMSUBADD132PHZ256mk, 0 },
+ { X86::VFMSUBADD132PHZ256rkz, X86::VFMSUBADD132PHZ256mkz, 0 },
+ { X86::VFMSUBADD132PHZrk, X86::VFMSUBADD132PHZmk, 0 },
+ { X86::VFMSUBADD132PHZrkz, X86::VFMSUBADD132PHZmkz, 0 },
{ X86::VFMSUBADD132PSZ128rk, X86::VFMSUBADD132PSZ128mk, 0 },
{ X86::VFMSUBADD132PSZ128rkz, X86::VFMSUBADD132PSZ128mkz, 0 },
{ X86::VFMSUBADD132PSZ256rk, X86::VFMSUBADD132PSZ256mk, 0 },
@@ -4559,6 +4937,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUBADD213PDZ256rkz, X86::VFMSUBADD213PDZ256mkz, 0 },
{ X86::VFMSUBADD213PDZrk, X86::VFMSUBADD213PDZmk, 0 },
{ X86::VFMSUBADD213PDZrkz, X86::VFMSUBADD213PDZmkz, 0 },
+ { X86::VFMSUBADD213PHZ128rk, X86::VFMSUBADD213PHZ128mk, 0 },
+ { X86::VFMSUBADD213PHZ128rkz, X86::VFMSUBADD213PHZ128mkz, 0 },
+ { X86::VFMSUBADD213PHZ256rk, X86::VFMSUBADD213PHZ256mk, 0 },
+ { X86::VFMSUBADD213PHZ256rkz, X86::VFMSUBADD213PHZ256mkz, 0 },
+ { X86::VFMSUBADD213PHZrk, X86::VFMSUBADD213PHZmk, 0 },
+ { X86::VFMSUBADD213PHZrkz, X86::VFMSUBADD213PHZmkz, 0 },
{ X86::VFMSUBADD213PSZ128rk, X86::VFMSUBADD213PSZ128mk, 0 },
{ X86::VFMSUBADD213PSZ128rkz, X86::VFMSUBADD213PSZ128mkz, 0 },
{ X86::VFMSUBADD213PSZ256rk, X86::VFMSUBADD213PSZ256mk, 0 },
@@ -4571,18 +4955,34 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUBADD231PDZ256rkz, X86::VFMSUBADD231PDZ256mkz, 0 },
{ X86::VFMSUBADD231PDZrk, X86::VFMSUBADD231PDZmk, 0 },
{ X86::VFMSUBADD231PDZrkz, X86::VFMSUBADD231PDZmkz, 0 },
+ { X86::VFMSUBADD231PHZ128rk, X86::VFMSUBADD231PHZ128mk, 0 },
+ { X86::VFMSUBADD231PHZ128rkz, X86::VFMSUBADD231PHZ128mkz, 0 },
+ { X86::VFMSUBADD231PHZ256rk, X86::VFMSUBADD231PHZ256mk, 0 },
+ { X86::VFMSUBADD231PHZ256rkz, X86::VFMSUBADD231PHZ256mkz, 0 },
+ { X86::VFMSUBADD231PHZrk, X86::VFMSUBADD231PHZmk, 0 },
+ { X86::VFMSUBADD231PHZrkz, X86::VFMSUBADD231PHZmkz, 0 },
{ X86::VFMSUBADD231PSZ128rk, X86::VFMSUBADD231PSZ128mk, 0 },
{ X86::VFMSUBADD231PSZ128rkz, X86::VFMSUBADD231PSZ128mkz, 0 },
{ X86::VFMSUBADD231PSZ256rk, X86::VFMSUBADD231PSZ256mk, 0 },
{ X86::VFMSUBADD231PSZ256rkz, X86::VFMSUBADD231PSZ256mkz, 0 },
{ X86::VFMSUBADD231PSZrk, X86::VFMSUBADD231PSZmk, 0 },
{ X86::VFMSUBADD231PSZrkz, X86::VFMSUBADD231PSZmkz, 0 },
+ { X86::VFMULCPHZ128rrk, X86::VFMULCPHZ128rmk, 0 },
+ { X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmk, 0 },
+ { X86::VFMULCPHZrrk, X86::VFMULCPHZrmk, 0 },
+ { X86::VFMULCSHZrrk, X86::VFMULCSHZrmk, TB_NO_REVERSE },
{ X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mk, 0 },
{ X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mkz, 0 },
{ X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mk, 0 },
{ X86::VFNMADD132PDZ256rkz, X86::VFNMADD132PDZ256mkz, 0 },
{ X86::VFNMADD132PDZrk, X86::VFNMADD132PDZmk, 0 },
{ X86::VFNMADD132PDZrkz, X86::VFNMADD132PDZmkz, 0 },
+ { X86::VFNMADD132PHZ128rk, X86::VFNMADD132PHZ128mk, 0 },
+ { X86::VFNMADD132PHZ128rkz, X86::VFNMADD132PHZ128mkz, 0 },
+ { X86::VFNMADD132PHZ256rk, X86::VFNMADD132PHZ256mk, 0 },
+ { X86::VFNMADD132PHZ256rkz, X86::VFNMADD132PHZ256mkz, 0 },
+ { X86::VFNMADD132PHZrk, X86::VFNMADD132PHZmk, 0 },
+ { X86::VFNMADD132PHZrkz, X86::VFNMADD132PHZmkz, 0 },
{ X86::VFNMADD132PSZ128rk, X86::VFNMADD132PSZ128mk, 0 },
{ X86::VFNMADD132PSZ128rkz, X86::VFNMADD132PSZ128mkz, 0 },
{ X86::VFNMADD132PSZ256rk, X86::VFNMADD132PSZ256mk, 0 },
@@ -4591,6 +4991,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmkz, 0 },
{ X86::VFNMADD132SDZr_Intk, X86::VFNMADD132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD132SDZr_Intkz, X86::VFNMADD132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMADD132SHZr_Intk, X86::VFNMADD132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0 },
@@ -4599,6 +5001,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD213PDZ256rkz, X86::VFNMADD213PDZ256mkz, 0 },
{ X86::VFNMADD213PDZrk, X86::VFNMADD213PDZmk, 0 },
{ X86::VFNMADD213PDZrkz, X86::VFNMADD213PDZmkz, 0 },
+ { X86::VFNMADD213PHZ128rk, X86::VFNMADD213PHZ128mk, 0 },
+ { X86::VFNMADD213PHZ128rkz, X86::VFNMADD213PHZ128mkz, 0 },
+ { X86::VFNMADD213PHZ256rk, X86::VFNMADD213PHZ256mk, 0 },
+ { X86::VFNMADD213PHZ256rkz, X86::VFNMADD213PHZ256mkz, 0 },
+ { X86::VFNMADD213PHZrk, X86::VFNMADD213PHZmk, 0 },
+ { X86::VFNMADD213PHZrkz, X86::VFNMADD213PHZmkz, 0 },
{ X86::VFNMADD213PSZ128rk, X86::VFNMADD213PSZ128mk, 0 },
{ X86::VFNMADD213PSZ128rkz, X86::VFNMADD213PSZ128mkz, 0 },
{ X86::VFNMADD213PSZ256rk, X86::VFNMADD213PSZ256mk, 0 },
@@ -4607,6 +5015,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmkz, 0 },
{ X86::VFNMADD213SDZr_Intk, X86::VFNMADD213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD213SDZr_Intkz, X86::VFNMADD213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMADD213SHZr_Intk, X86::VFNMADD213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0 },
@@ -4615,6 +5025,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD231PDZ256rkz, X86::VFNMADD231PDZ256mkz, 0 },
{ X86::VFNMADD231PDZrk, X86::VFNMADD231PDZmk, 0 },
{ X86::VFNMADD231PDZrkz, X86::VFNMADD231PDZmkz, 0 },
+ { X86::VFNMADD231PHZ128rk, X86::VFNMADD231PHZ128mk, 0 },
+ { X86::VFNMADD231PHZ128rkz, X86::VFNMADD231PHZ128mkz, 0 },
+ { X86::VFNMADD231PHZ256rk, X86::VFNMADD231PHZ256mk, 0 },
+ { X86::VFNMADD231PHZ256rkz, X86::VFNMADD231PHZ256mkz, 0 },
+ { X86::VFNMADD231PHZrk, X86::VFNMADD231PHZmk, 0 },
+ { X86::VFNMADD231PHZrkz, X86::VFNMADD231PHZmkz, 0 },
{ X86::VFNMADD231PSZ128rk, X86::VFNMADD231PSZ128mk, 0 },
{ X86::VFNMADD231PSZ128rkz, X86::VFNMADD231PSZ128mkz, 0 },
{ X86::VFNMADD231PSZ256rk, X86::VFNMADD231PSZ256mk, 0 },
@@ -4623,6 +5039,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmkz, 0 },
{ X86::VFNMADD231SDZr_Intk, X86::VFNMADD231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD231SDZr_Intkz, X86::VFNMADD231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMADD231SHZr_Intk, X86::VFNMADD231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0 },
@@ -4631,6 +5049,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB132PDZ256rkz, X86::VFNMSUB132PDZ256mkz, 0 },
{ X86::VFNMSUB132PDZrk, X86::VFNMSUB132PDZmk, 0 },
{ X86::VFNMSUB132PDZrkz, X86::VFNMSUB132PDZmkz, 0 },
+ { X86::VFNMSUB132PHZ128rk, X86::VFNMSUB132PHZ128mk, 0 },
+ { X86::VFNMSUB132PHZ128rkz, X86::VFNMSUB132PHZ128mkz, 0 },
+ { X86::VFNMSUB132PHZ256rk, X86::VFNMSUB132PHZ256mk, 0 },
+ { X86::VFNMSUB132PHZ256rkz, X86::VFNMSUB132PHZ256mkz, 0 },
+ { X86::VFNMSUB132PHZrk, X86::VFNMSUB132PHZmk, 0 },
+ { X86::VFNMSUB132PHZrkz, X86::VFNMSUB132PHZmkz, 0 },
{ X86::VFNMSUB132PSZ128rk, X86::VFNMSUB132PSZ128mk, 0 },
{ X86::VFNMSUB132PSZ128rkz, X86::VFNMSUB132PSZ128mkz, 0 },
{ X86::VFNMSUB132PSZ256rk, X86::VFNMSUB132PSZ256mk, 0 },
@@ -4639,6 +5063,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmkz, 0 },
{ X86::VFNMSUB132SDZr_Intk, X86::VFNMSUB132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB132SDZr_Intkz, X86::VFNMSUB132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMSUB132SHZr_Intk, X86::VFNMSUB132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0 },
@@ -4647,6 +5073,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB213PDZ256rkz, X86::VFNMSUB213PDZ256mkz, 0 },
{ X86::VFNMSUB213PDZrk, X86::VFNMSUB213PDZmk, 0 },
{ X86::VFNMSUB213PDZrkz, X86::VFNMSUB213PDZmkz, 0 },
+ { X86::VFNMSUB213PHZ128rk, X86::VFNMSUB213PHZ128mk, 0 },
+ { X86::VFNMSUB213PHZ128rkz, X86::VFNMSUB213PHZ128mkz, 0 },
+ { X86::VFNMSUB213PHZ256rk, X86::VFNMSUB213PHZ256mk, 0 },
+ { X86::VFNMSUB213PHZ256rkz, X86::VFNMSUB213PHZ256mkz, 0 },
+ { X86::VFNMSUB213PHZrk, X86::VFNMSUB213PHZmk, 0 },
+ { X86::VFNMSUB213PHZrkz, X86::VFNMSUB213PHZmkz, 0 },
{ X86::VFNMSUB213PSZ128rk, X86::VFNMSUB213PSZ128mk, 0 },
{ X86::VFNMSUB213PSZ128rkz, X86::VFNMSUB213PSZ128mkz, 0 },
{ X86::VFNMSUB213PSZ256rk, X86::VFNMSUB213PSZ256mk, 0 },
@@ -4655,6 +5087,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmkz, 0 },
{ X86::VFNMSUB213SDZr_Intk, X86::VFNMSUB213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB213SDZr_Intkz, X86::VFNMSUB213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMSUB213SHZr_Intk, X86::VFNMSUB213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0 },
@@ -4663,6 +5097,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB231PDZ256rkz, X86::VFNMSUB231PDZ256mkz, 0 },
{ X86::VFNMSUB231PDZrk, X86::VFNMSUB231PDZmk, 0 },
{ X86::VFNMSUB231PDZrkz, X86::VFNMSUB231PDZmkz, 0 },
+ { X86::VFNMSUB231PHZ128rk, X86::VFNMSUB231PHZ128mk, 0 },
+ { X86::VFNMSUB231PHZ128rkz, X86::VFNMSUB231PHZ128mkz, 0 },
+ { X86::VFNMSUB231PHZ256rk, X86::VFNMSUB231PHZ256mk, 0 },
+ { X86::VFNMSUB231PHZ256rkz, X86::VFNMSUB231PHZ256mkz, 0 },
+ { X86::VFNMSUB231PHZrk, X86::VFNMSUB231PHZmk, 0 },
+ { X86::VFNMSUB231PHZrkz, X86::VFNMSUB231PHZmkz, 0 },
{ X86::VFNMSUB231PSZ128rk, X86::VFNMSUB231PSZ128mk, 0 },
{ X86::VFNMSUB231PSZ128rkz, X86::VFNMSUB231PSZ128mkz, 0 },
{ X86::VFNMSUB231PSZ256rk, X86::VFNMSUB231PSZ256mk, 0 },
@@ -4671,11 +5111,15 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB231PSZrkz, X86::VFNMSUB231PSZmkz, 0 },
{ X86::VFNMSUB231SDZr_Intk, X86::VFNMSUB231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB231SDZr_Intkz, X86::VFNMSUB231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMSUB231SHZr_Intk, X86::VFNMSUB231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMSUB231SHZr_Intkz, X86::VFNMSUB231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB231SSZr_Intk, X86::VFNMSUB231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB231SSZr_Intkz, X86::VFNMSUB231SSZm_Intkz, TB_NO_REVERSE },
{ X86::VGETEXPSDZrk, X86::VGETEXPSDZmk, TB_NO_REVERSE },
+ { X86::VGETEXPSHZrk, X86::VGETEXPSHZmk, TB_NO_REVERSE },
{ X86::VGETEXPSSZrk, X86::VGETEXPSSZmk, TB_NO_REVERSE },
{ X86::VGETMANTSDZrrik, X86::VGETMANTSDZrmik, TB_NO_REVERSE },
+ { X86::VGETMANTSHZrrik, X86::VGETMANTSHZrmik, TB_NO_REVERSE },
{ X86::VGETMANTSSZrrik, X86::VGETMANTSSZrmik, TB_NO_REVERSE },
{ X86::VGF2P8AFFINEINVQBZ128rrik, X86::VGF2P8AFFINEINVQBZ128rmik, 0 },
{ X86::VGF2P8AFFINEINVQBZ256rrik, X86::VGF2P8AFFINEINVQBZ256rmik, 0 },
@@ -4701,38 +5145,56 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
{ X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
{ X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
+ { X86::VMAXCPHZ128rrk, X86::VMAXCPHZ128rmk, 0 },
+ { X86::VMAXCPHZ256rrk, X86::VMAXCPHZ256rmk, 0 },
+ { X86::VMAXCPHZrrk, X86::VMAXCPHZrmk, 0 },
{ X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
{ X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
{ X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
{ X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
{ X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
+ { X86::VMAXPHZ128rrk, X86::VMAXPHZ128rmk, 0 },
+ { X86::VMAXPHZ256rrk, X86::VMAXPHZ256rmk, 0 },
+ { X86::VMAXPHZrrk, X86::VMAXPHZrmk, 0 },
{ X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
{ X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
{ X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
{ X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMAXSHZrr_Intk, X86::VMAXSHZrm_Intk, TB_NO_REVERSE },
{ X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, TB_NO_REVERSE },
{ X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 },
{ X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 },
{ X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
+ { X86::VMINCPHZ128rrk, X86::VMINCPHZ128rmk, 0 },
+ { X86::VMINCPHZ256rrk, X86::VMINCPHZ256rmk, 0 },
+ { X86::VMINCPHZrrk, X86::VMINCPHZrmk, 0 },
{ X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 },
{ X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 },
{ X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
{ X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
{ X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
{ X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
+ { X86::VMINPHZ128rrk, X86::VMINPHZ128rmk, 0 },
+ { X86::VMINPHZ256rrk, X86::VMINPHZ256rmk, 0 },
+ { X86::VMINPHZrrk, X86::VMINPHZrmk, 0 },
{ X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
{ X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
{ X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMINSHZrr_Intk, X86::VMINSHZrm_Intk, TB_NO_REVERSE },
{ X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, TB_NO_REVERSE },
{ X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
{ X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
+ { X86::VMULPHZ128rrk, X86::VMULPHZ128rmk, 0 },
+ { X86::VMULPHZ256rrk, X86::VMULPHZ256rmk, 0 },
+ { X86::VMULPHZrrk, X86::VMULPHZrmk, 0 },
{ X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
{ X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
{ X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMULSHZrr_Intk, X86::VMULSHZrm_Intk, TB_NO_REVERSE },
{ X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
{ X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
{ X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
@@ -5213,21 +5675,29 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VRCP14SSZrrk, X86::VRCP14SSZrmk, TB_NO_REVERSE },
{ X86::VRCP28SDZrk, X86::VRCP28SDZmk, TB_NO_REVERSE },
{ X86::VRCP28SSZrk, X86::VRCP28SSZmk, TB_NO_REVERSE },
+ { X86::VRCPSHZrrk, X86::VRCPSHZrmk, TB_NO_REVERSE },
{ X86::VREDUCESDZrrik, X86::VREDUCESDZrmik, TB_NO_REVERSE },
+ { X86::VREDUCESHZrrik, X86::VREDUCESHZrmik, TB_NO_REVERSE },
{ X86::VREDUCESSZrrik, X86::VREDUCESSZrmik, TB_NO_REVERSE },
{ X86::VRNDSCALESDZr_Intk, X86::VRNDSCALESDZm_Intk, TB_NO_REVERSE },
+ { X86::VRNDSCALESHZr_Intk, X86::VRNDSCALESHZm_Intk, TB_NO_REVERSE },
{ X86::VRNDSCALESSZr_Intk, X86::VRNDSCALESSZm_Intk, TB_NO_REVERSE },
{ X86::VRSQRT14SDZrrk, X86::VRSQRT14SDZrmk, TB_NO_REVERSE },
{ X86::VRSQRT14SSZrrk, X86::VRSQRT14SSZrmk, TB_NO_REVERSE },
{ X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE },
{ X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE },
+ { X86::VRSQRTSHZrrk, X86::VRSQRTSHZrmk, TB_NO_REVERSE },
{ X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0 },
{ X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0 },
{ X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0 },
+ { X86::VSCALEFPHZ128rrk, X86::VSCALEFPHZ128rmk, 0 },
+ { X86::VSCALEFPHZ256rrk, X86::VSCALEFPHZ256rmk, 0 },
+ { X86::VSCALEFPHZrrk, X86::VSCALEFPHZrmk, 0 },
{ X86::VSCALEFPSZ128rrk, X86::VSCALEFPSZ128rmk, 0 },
{ X86::VSCALEFPSZ256rrk, X86::VSCALEFPSZ256rmk, 0 },
{ X86::VSCALEFPSZrrk, X86::VSCALEFPSZrmk, 0 },
{ X86::VSCALEFSDZrrk, X86::VSCALEFSDZrmk, TB_NO_REVERSE },
+ { X86::VSCALEFSHZrrk, X86::VSCALEFSHZrmk, TB_NO_REVERSE },
{ X86::VSCALEFSSZrrk, X86::VSCALEFSSZrmk, TB_NO_REVERSE },
{ X86::VSHUFF32X4Z256rrik, X86::VSHUFF32X4Z256rmik, 0 },
{ X86::VSHUFF32X4Zrrik, X86::VSHUFF32X4Zrmik, 0 },
@@ -5244,14 +5714,19 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
{ X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
{ X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE },
+ { X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE },
{ X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE },
{ X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
{ X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
+ { X86::VSUBPHZ128rrk, X86::VSUBPHZ128rmk, 0 },
+ { X86::VSUBPHZ256rrk, X86::VSUBPHZ256rmk, 0 },
+ { X86::VSUBPHZrrk, X86::VSUBPHZrmk, 0 },
{ X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
{ X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
{ X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VSUBSHZrr_Intk, X86::VSUBSHZrm_Intk, TB_NO_REVERSE },
{ X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
{ X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
{ X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index dba13720cbd2..0e7033fc233a 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -149,8 +149,8 @@ def PS : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
// disable to ANDPS.
// Class specifying the opcode map.
-class Map<bits<3> val> {
- bits<3> Value = val;
+class Map<bits<4> val> {
+ bits<4> Value = val;
}
def OB : Map<0>;
def TB : Map<1>;
@@ -160,6 +160,8 @@ def XOP8 : Map<4>;
def XOP9 : Map<5>;
def XOPA : Map<6>;
def ThreeDNow : Map<7>;
+def T_MAP5 : Map<8>;
+def T_MAP6 : Map<9>;
// Class specifying the encoding
class Encoding<bits<2> val> {
@@ -204,6 +206,16 @@ class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
class ThreeDNow { Map OpMap = ThreeDNow; }
+class T_MAP5 { Map OpMap = T_MAP5; }
+class T_MAP5PS : T_MAP5 { Prefix OpPrefix = PS; } // none
+class T_MAP5PD : T_MAP5 { Prefix OpPrefix = PD; } // 0x66
+class T_MAP5XS : T_MAP5 { Prefix OpPrefix = XS; } // 0xF3
+class T_MAP5XD : T_MAP5 { Prefix OpPrefix = XD; } // 0xF2
+class T_MAP6 { Map OpMap = T_MAP6; }
+class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; }
+class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; }
+class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; }
+class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; }
class OBXS { Prefix OpPrefix = XS; }
class PS : TB { Prefix OpPrefix = PS; }
class PD : TB { Prefix OpPrefix = PD; }
@@ -284,6 +296,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+ let HasPositionOrder = 1;
+
//
// Attributes specific to X86 instructions...
//
@@ -301,7 +315,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
bits<3> OpPrefixBits = OpPrefix.Value;
Map OpMap = OB; // Which opcode map does this inst have?
- bits<3> OpMapBits = OpMap.Value;
+ bits<4> OpMapBits = OpMap.Value;
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
@@ -360,28 +374,28 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{10-9} = AdSizeBits;
// No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
let TSFlags{12-11} = OpPrefixBits{1-0};
- let TSFlags{15-13} = OpMapBits;
- let TSFlags{16} = hasREX_WPrefix;
- let TSFlags{20-17} = ImmT.Value;
- let TSFlags{23-21} = FPForm.Value;
- let TSFlags{24} = hasLockPrefix;
- let TSFlags{25} = hasREPPrefix;
- let TSFlags{27-26} = ExeDomain.Value;
- let TSFlags{29-28} = OpEncBits;
- let TSFlags{37-30} = Opcode;
+ let TSFlags{16-13} = OpMapBits;
+ let TSFlags{17} = hasREX_WPrefix;
+ let TSFlags{21-18} = ImmT.Value;
+ let TSFlags{24-22} = FPForm.Value;
+ let TSFlags{25} = hasLockPrefix;
+ let TSFlags{26} = hasREPPrefix;
+ let TSFlags{28-27} = ExeDomain.Value;
+ let TSFlags{30-29} = OpEncBits;
+ let TSFlags{38-31} = Opcode;
// Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
- let TSFlags{38} = HasVEX_W;
- let TSFlags{39} = hasVEX_4V;
- let TSFlags{40} = hasVEX_L;
- let TSFlags{41} = hasEVEX_K;
- let TSFlags{42} = hasEVEX_Z;
- let TSFlags{43} = hasEVEX_L2;
- let TSFlags{44} = hasEVEX_B;
+ let TSFlags{39} = HasVEX_W;
+ let TSFlags{40} = hasVEX_4V;
+ let TSFlags{41} = hasVEX_L;
+ let TSFlags{42} = hasEVEX_K;
+ let TSFlags{43} = hasEVEX_Z;
+ let TSFlags{44} = hasEVEX_L2;
+ let TSFlags{45} = hasEVEX_B;
// If we run out of TSFlags bits, it's possible to encode this in 3 bits.
- let TSFlags{51-45} = CD8_Scale;
- let TSFlags{52} = hasEVEX_RC;
- let TSFlags{53} = hasNoTrackPrefix;
- let TSFlags{54} = ExplicitVEXPrefix;
+ let TSFlags{52-46} = CD8_Scale;
+ let TSFlags{53} = hasEVEX_RC;
+ let TSFlags{54} = hasNoTrackPrefix;
+ let TSFlags{55} = ExplicitVEXPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -738,18 +752,19 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE42]>;
-// SS42FI - SSE 4.2 instructions with T8XD prefix.
-// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
-class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
-
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE42]>;
+// CRC32I - SSE 4.2 CRC32 instructions.
+// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
+// controlled by the SSE42 flag.
+class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
+
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//
@@ -870,7 +885,6 @@ class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
EVEX_4V, Requires<[HasAVX512]>;
-class AVX512FMA3Base : T8PD, EVEX_4V;
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 777c5a158b4c..166f1f8c3251 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,14 +130,12 @@ def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
- SDTCisSameSizeAs<0, 1>]>>;
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>]>>;
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
- SDTCisSameSizeAs<0, 1>]>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>]>,
[SDNPHasChain]>;
def X86any_vfpext : PatFrags<(ops node:$src),
@@ -145,13 +143,13 @@ def X86any_vfpext : PatFrags<(ops node:$src),
(X86vfpext node:$src)]>;
def X86vfpround: SDNode<"X86ISD::VFPROUND",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>,
[SDNPHasChain]>;
@@ -160,33 +158,32 @@ def X86any_vfpround : PatFrags<(ops node:$src),
(X86vfpround node:$src)]>;
def X86frounds : SDNode<"X86ISD::VFPROUNDS",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f64>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f64>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
def X86fpexts : SDNode<"X86ISD::VFPEXTS",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f32>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f32>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
- SDTCisSameSizeAs<0, 1>,
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>>;
@@ -417,6 +414,11 @@ def X86Movss : SDNode<"X86ISD::MOVSS",
SDTCisVT<1, v4f32>,
SDTCisVT<2, v4f32>]>>;
+def X86Movsh : SDNode<"X86ISD::MOVSH",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8f16>,
+ SDTCisVT<1, v8f16>,
+ SDTCisVT<2, v8f16>]>>;
+
def X86Movlhps : SDNode<"X86ISD::MOVLHPS",
SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
SDTCisVT<1, v4f32>,
@@ -570,6 +572,24 @@ def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>;
+def x86vfmaddc : SDNode<"X86ISD::VFMADDC", SDTFPTernaryOp, [SDNPCommutative]>;
+def x86vfmaddcRnd : SDNode<"X86ISD::VFMADDC_RND", SDTFmaRound, [SDNPCommutative]>;
+def x86vfcmaddc : SDNode<"X86ISD::VFCMADDC", SDTFPTernaryOp>;
+def x86vfcmaddcRnd : SDNode<"X86ISD::VFCMADDC_RND", SDTFmaRound>;
+def x86vfmulc : SDNode<"X86ISD::VFMULC", SDTFPBinOp, [SDNPCommutative]>;
+def x86vfmulcRnd : SDNode<"X86ISD::VFMULC_RND", SDTFPBinOpRound, [SDNPCommutative]>;
+def x86vfcmulc : SDNode<"X86ISD::VFCMULC", SDTFPBinOp>;
+def x86vfcmulcRnd : SDNode<"X86ISD::VFCMULC_RND", SDTFPBinOpRound>;
+
+def x86vfmaddcSh : SDNode<"X86ISD::VFMADDCSH", SDTFPTernaryOp, [SDNPCommutative]>;
+def x86vfcmaddcSh : SDNode<"X86ISD::VFCMADDCSH", SDTFPTernaryOp>;
+def x86vfmulcSh : SDNode<"X86ISD::VFMULCSH", SDTFPBinOp, [SDNPCommutative]>;
+def x86vfcmulcSh : SDNode<"X86ISD::VFCMULCSH", SDTFPBinOp>;
+def x86vfmaddcShRnd : SDNode<"X86ISD::VFMADDCSH_RND", SDTFmaRound, [SDNPCommutative]>;
+def x86vfcmaddcShRnd : SDNode<"X86ISD::VFCMADDCSH_RND",SDTFmaRound>;
+def x86vfmulcShRnd : SDNode<"X86ISD::VFMULCSH_RND", SDTFPBinOpRound, [SDNPCommutative]>;
+def x86vfcmulcShRnd : SDNode<"X86ISD::VFCMULCSH_RND", SDTFPBinOpRound>;
+
def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>;
def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>;
@@ -704,7 +724,6 @@ def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
// Masked versions of above
def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisInt<1>,
- SDTCisSameSizeAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>;
@@ -752,12 +771,12 @@ def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<1, 4>]> >;
def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>,
SDTCisVT<2, i32>]>>;
@@ -796,6 +815,7 @@ def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
//===----------------------------------------------------------------------===//
// 128-bit load pattern fragments
+def loadv8f16 : PatFrag<(ops node:$ptr), (v8f16 (load node:$ptr))>;
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
@@ -804,6 +824,7 @@ def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
// 256-bit load pattern fragments
+def loadv16f16 : PatFrag<(ops node:$ptr), (v16f16 (load node:$ptr))>;
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
@@ -812,6 +833,7 @@ def loadv16i16 : PatFrag<(ops node:$ptr), (v16i16 (load node:$ptr))>;
def loadv32i8 : PatFrag<(ops node:$ptr), (v32i8 (load node:$ptr))>;
// 512-bit load pattern fragments
+def loadv32f16 : PatFrag<(ops node:$ptr), (v32f16 (load node:$ptr))>;
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
@@ -823,6 +845,10 @@ def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
def extloadv2f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
def extloadv4f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
def extloadv8f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv2f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
+def extloadv4f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
+def extloadv8f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
+def extloadv16f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
// Like 'store', but always requires vector size alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -839,6 +865,8 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
// 128-bit aligned load pattern fragments
// NOTE: all 128-bit integer vector loads are promoted to v2i64
+def alignedloadv8f16 : PatFrag<(ops node:$ptr),
+ (v8f16 (alignedload node:$ptr))>;
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
@@ -854,6 +882,8 @@ def alignedloadv16i8 : PatFrag<(ops node:$ptr),
// 256-bit aligned load pattern fragments
// NOTE: all 256-bit integer vector loads are promoted to v4i64
+def alignedloadv16f16 : PatFrag<(ops node:$ptr),
+ (v16f16 (alignedload node:$ptr))>;
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
@@ -868,6 +898,8 @@ def alignedloadv32i8 : PatFrag<(ops node:$ptr),
(v32i8 (alignedload node:$ptr))>;
// 512-bit aligned load pattern fragments
+def alignedloadv32f16 : PatFrag<(ops node:$ptr),
+ (v32f16 (alignedload node:$ptr))>;
def alignedloadv16f32 : PatFrag<(ops node:$ptr),
(v16f32 (alignedload node:$ptr))>;
def alignedloadv8f64 : PatFrag<(ops node:$ptr),
@@ -926,6 +958,11 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
+def X86vzload16 : PatFrag<(ops node:$src),
+ (X86vzld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 2;
+}]>;
+
def X86vzload32 : PatFrag<(ops node:$src),
(X86vzld node:$src), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
@@ -976,6 +1013,10 @@ def X86SubVBroadcastld256 : PatFrag<(ops node:$src),
// only load a single element.
// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing
// the simple_load case.
+def sse_load_f16 : PatFrags<(ops node:$ptr),
+ [(v8f16 (simple_load node:$ptr)),
+ (v8f16 (X86vzload16 node:$ptr)),
+ (v8f16 (scalar_to_vector (loadf16 node:$ptr)))]>;
def sse_load_f32 : PatFrags<(ops node:$ptr),
[(v4f32 (simple_load node:$ptr)),
(v4f32 (X86vzload32 node:$ptr)),
@@ -985,9 +1026,13 @@ def sse_load_f64 : PatFrags<(ops node:$ptr),
(v2f64 (X86vzload64 node:$ptr)),
(v2f64 (scalar_to_vector (loadf64 node:$ptr)))]>;
+def shmem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+def fp16imm0 : PatLeaf<(f16 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
def fp32imm0 : PatLeaf<(f32 fpimm), [{
return N->isExactlyValue(+0.0);
@@ -1013,6 +1058,12 @@ def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{
return getInsertVINSERTImmediate(N, 128, SDLoc(N));
}]>;
+// INSERT_get_vperm2x128_imm xform function: convert insert_subvector index to
+// commuted VPERM2F128/VPERM2I128 imm.
+def INSERT_get_vperm2x128_commutedimm : SDNodeXForm<insert_subvector, [{
+ return getPermuteVINSERTCommutedImmediate(N, 128, SDLoc(N));
+}]>;
+
// EXTRACT_get_vextract256_imm xform function: convert extract_subvector index
// to VEXTRACTF64x4 imm.
def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 12a2d92fd888..639aa5199ea5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -82,7 +83,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
: X86::ADJCALLSTACKUP32),
X86::CATCHRET,
- (STI.is64Bit() ? X86::RETQ : X86::RETL)),
+ (STI.is64Bit() ? X86::RET64 : X86::RET32)),
Subtarget(STI), RI(STI.getTargetTriple()) {
}
@@ -699,6 +700,8 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
+ case X86::VMOVSHZrm:
+ case X86::VMOVSHZrm_alt:
MemBytes = 2;
return true;
case X86::MOV32rm:
@@ -795,6 +798,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
+ case X86::VMOVSHZmr:
MemBytes = 2;
return true;
case X86::MOV32mr:
@@ -980,6 +984,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0SH:
case X86::AVX512_FsFLD0SS:
case X86::AVX512_FsFLD0F128:
case X86::AVX_SET0:
@@ -1047,6 +1052,8 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::VMOVSSZrm_alt:
case X86::VMOVSDZrm:
case X86::VMOVSDZrm_alt:
+ case X86::VMOVSHZrm:
+ case X86::VMOVSHZrm_alt:
case X86::VMOVAPDZ128rm:
case X86::VMOVAPDZ256rm:
case X86::VMOVAPDZrm:
@@ -1189,7 +1196,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
- LiveVariables *LV) const {
+ LiveVariables *LV, LiveIntervals *LIS) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
if (AllowSP) {
@@ -1199,12 +1206,12 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
&X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
}
Register SrcReg = Src.getReg();
+ isKill = MI.killsRegister(SrcReg);
// For both LEA64 and LEA32 the register already has essentially the right
// type (32-bit or 64-bit) we may just need to forbid SP.
if (Opc != X86::LEA64_32r) {
NewSrc = SrcReg;
- isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -1219,8 +1226,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
ImplicitOp = Src;
ImplicitOp.setImplicit();
- NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
- isKill = Src.isKill();
+ NewSrc = getX86SubSuperRegister(SrcReg, 64);
assert(!Src.isUndef() && "Undef op doesn't need optimization");
} else {
// Virtual register of the wrong class, we have to create a temporary 64-bit
@@ -1229,24 +1235,36 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
MachineInstr *Copy =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
- .add(Src);
+ .addReg(SrcReg, getKillRegState(isKill));
// Which is obviously going to be dead after we're done with it.
isKill = true;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
+
+ if (LIS) {
+ SlotIndex CopyIdx = LIS->InsertMachineInstrInMaps(*Copy);
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ LiveInterval &LI = LIS->getInterval(SrcReg);
+ LiveRange::Segment *S = LI.getSegmentContaining(Idx);
+ if (S->end.getBaseIndex() == Idx)
+ S->end = CopyIdx.getRegSlot();
+ }
}
// We've set all the parameters without issue.
return true;
}
-MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
- unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
- LiveVariables *LV, bool Is8BitOp) const {
+MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS,
+ bool Is8BitOp) const {
// We handle 8-bit adds and various 16-bit opcodes in the switch below.
- MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
*RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
"Unexpected type for LEA transform");
@@ -1264,6 +1282,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned Opcode = X86::LEA64_32r;
Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ Register InRegLEA2;
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
@@ -1275,18 +1294,22 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineBasicBlock::iterator MBBI = MI.getIterator();
Register Dest = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
+ Register Src2;
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
+ MachineInstr *ImpDef =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA, RegState::Define, SubReg)
.addReg(Src, getKillRegState(IsKill));
+ MachineInstr *ImpDef2 = nullptr;
+ MachineInstr *InsMI2 = nullptr;
MachineInstrBuilder MIB =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::SHL8ri:
@@ -1316,11 +1339,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
case X86::ADD8rr_DB:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
- Register Src2 = MI.getOperand(2).getReg();
+ Src2 = MI.getOperand(2).getReg();
bool IsKill2 = MI.getOperand(2).isKill();
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
- unsigned InRegLEA2 = 0;
- MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
// ADD8rr/ADD16rr killed %reg1028, %reg1028
// just a single insert_subreg.
@@ -1332,8 +1353,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
- BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
- InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ ImpDef2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF),
+ InRegLEA2);
+ InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA2, RegState::Define, SubReg)
.addReg(Src2, getKillRegState(IsKill2));
addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
@@ -1346,7 +1368,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(IsDead))
.addReg(OutRegLEA, RegState::Kill, SubReg);
@@ -1360,6 +1382,45 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
LV->replaceKillInstruction(Dest, MI, *ExtMI);
}
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*ImpDef);
+ SlotIndex InsIdx = LIS->InsertMachineInstrInMaps(*InsMI);
+ if (ImpDef2)
+ LIS->InsertMachineInstrInMaps(*ImpDef2);
+ SlotIndex Ins2Idx;
+ if (InsMI2)
+ Ins2Idx = LIS->InsertMachineInstrInMaps(*InsMI2);
+ SlotIndex NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ SlotIndex ExtIdx = LIS->InsertMachineInstrInMaps(*ExtMI);
+ LIS->getInterval(InRegLEA);
+ LIS->getInterval(OutRegLEA);
+ if (InRegLEA2)
+ LIS->getInterval(InRegLEA2);
+
+ // Move the use of Src up to InsMI.
+ LiveInterval &SrcLI = LIS->getInterval(Src);
+ LiveRange::Segment *SrcSeg = SrcLI.getSegmentContaining(NewIdx);
+ if (SrcSeg->end == NewIdx.getRegSlot())
+ SrcSeg->end = InsIdx.getRegSlot();
+
+ if (InsMI2) {
+ // Move the use of Src2 up to InsMI2.
+ LiveInterval &Src2LI = LIS->getInterval(Src2);
+ LiveRange::Segment *Src2Seg = Src2LI.getSegmentContaining(NewIdx);
+ if (Src2Seg->end == NewIdx.getRegSlot())
+ Src2Seg->end = Ins2Idx.getRegSlot();
+ }
+
+ // Move the definition of Dest down to ExtMI.
+ LiveInterval &DestLI = LIS->getInterval(Dest);
+ LiveRange::Segment *DestSeg =
+ DestLI.getSegmentContaining(NewIdx.getRegSlot());
+ assert(DestSeg->start == NewIdx.getRegSlot() &&
+ DestSeg->valno->def == NewIdx.getRegSlot());
+ DestSeg->start = ExtIdx.getRegSlot();
+ DestSeg->valno->def = ExtIdx.getRegSlot();
+ }
+
return ExtMI;
}
@@ -1373,9 +1434,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the new instruction.
///
-MachineInstr *
-X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
// The following opcodes also sets the condition code register(s). Only
// convert them to equivalent lea if the condition code register def's
// are dead!
@@ -1398,6 +1459,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
MachineInstr *NewMI = nullptr;
+ Register SrcReg, SrcReg2;
bool Is64Bit = Subtarget.is64Bit();
bool Is8BitOp = false;
@@ -1432,10 +1494,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// LEA can't handle ESP.
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB =
@@ -1460,7 +1521,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
return nullptr;
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
}
case X86::INC64r:
case X86::INC32r: {
@@ -1468,10 +1529,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
(Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
- ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB =
@@ -1491,10 +1551,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
- ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1513,7 +1572,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LLVM_FALLTHROUGH;
case X86::DEC16r:
case X86::INC16r:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD32rr:
@@ -1525,21 +1584,26 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
else
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill;
- Register SrcReg;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, ImplicitOp, LV))
- return nullptr;
-
const MachineOperand &Src2 = MI.getOperand(2);
bool isKill2;
- Register SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
- SrcReg2, isKill2, ImplicitOp2, LV))
+ if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/false, SrcReg2, isKill2,
+ ImplicitOp2, LV, LIS))
return nullptr;
+ bool isKill;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (Src.getReg() == Src2.getReg()) {
+ // Don't call classify LEAReg a second time on the same register, in case
+ // the first call inserted a COPY from Src2 and marked it as killed.
+ isKill = isKill2;
+ SrcReg = SrcReg2;
+ } else {
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
+ return nullptr;
+ }
+
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1557,7 +1621,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LLVM_FALLTHROUGH;
case X86::ADD16rr:
case X86::ADD16rr_DB:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
@@ -1575,10 +1639,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1598,7 +1661,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::SUB8ri:
case X86::SUB16ri8:
case X86::SUB16ri:
@@ -1616,10 +1679,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1806,7 +1868,17 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
}
- MFI->insert(MI.getIterator(), NewMI); // Insert the new inst
+ MachineBasicBlock &MBB = *MI.getParent();
+ MBB.insert(MI.getIterator(), NewMI); // Insert the new inst
+
+ if (LIS) {
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ if (SrcReg)
+ LIS->getInterval(SrcReg);
+ if (SrcReg2)
+ LIS->getInterval(SrcReg2);
+ }
+
return NewMI;
}
@@ -2235,6 +2307,10 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VCMPSSZrr:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
+ case X86::VCMPSHZrr:
+ case X86::VCMPPHZrri:
+ case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rri:
case X86::VCMPPDZ128rri:
case X86::VCMPPSZ128rri:
case X86::VCMPPDZ256rri:
@@ -2481,6 +2557,10 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VCMPSSZrr:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
+ case X86::VCMPSHZrr:
+ case X86::VCMPPHZrri:
+ case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rri:
case X86::VCMPPDZ128rri:
case X86::VCMPPSZ128rri:
case X86::VCMPPDZ256rri:
@@ -2606,7 +2686,19 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPMADD52LUQZ256rkz:
case X86::VPMADD52LUQZr:
case X86::VPMADD52LUQZrk:
- case X86::VPMADD52LUQZrkz: {
+ case X86::VPMADD52LUQZrkz:
+ case X86::VFMADDCPHZr:
+ case X86::VFMADDCPHZrk:
+ case X86::VFMADDCPHZrkz:
+ case X86::VFMADDCPHZ128r:
+ case X86::VFMADDCPHZ128rk:
+ case X86::VFMADDCPHZ128rkz:
+ case X86::VFMADDCPHZ256r:
+ case X86::VFMADDCPHZ256rk:
+ case X86::VFMADDCPHZ256rkz:
+ case X86::VFMADDCSHZr:
+ case X86::VFMADDCSHZrk:
+ case X86::VFMADDCSHZrkz: {
unsigned CommutableOpIdx1 = 2;
unsigned CommutableOpIdx2 = 3;
if (X86II::isKMasked(Desc.TSFlags)) {
@@ -2834,11 +2926,6 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
return std::make_pair(CC, NeedSwap);
}
-/// Return a setcc opcode based on whether it has memory operand.
-unsigned X86::getSETOpc(bool HasMemoryOperand) {
- return HasMemoryOperand ? X86::SETCCr : X86::SETCCm;
-}
-
/// Return a cmov opcode for the given register size in bytes, and operand type.
unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
switch(RegBytes) {
@@ -2919,6 +3006,23 @@ unsigned X86::getSwappedVCMPImm(unsigned Imm) {
return Imm;
}
+/// Return true if the Reg is X87 register.
+static bool isX87Reg(unsigned Reg) {
+ return (Reg == X86::FPCW || Reg == X86::FPSW ||
+ (Reg >= X86::ST0 && Reg <= X86::ST7));
+}
+
+/// check if the instruction is X87 instruction
+bool X86::isX87Instruction(MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (isX87Reg(MO.getReg()))
+ return true;
+ }
+ return false;
+}
+
bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case X86::TCRETURNdi:
@@ -3018,13 +3122,13 @@ static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB,
// and fallthrough MBB. If we find more than one, we cannot identify the
// fallthrough MBB and should return nullptr.
MachineBasicBlock *FallthroughBB = nullptr;
- for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
- if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB))
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isEHPad() || (Succ == TBB && FallthroughBB))
continue;
// Return a nullptr if we found more than one fallthrough successor.
if (FallthroughBB && FallthroughBB != TBB)
return nullptr;
- FallthroughBB = *SI;
+ FallthroughBB = Succ;
}
return FallthroughBB;
}
@@ -3228,13 +3332,13 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
MachineInstr *ConditionDef = nullptr;
bool SingleUseCondition = true;
- for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
- if (I->modifiesRegister(X86::EFLAGS, TRI)) {
- ConditionDef = &*I;
+ for (MachineInstr &MI : llvm::drop_begin(llvm::reverse(MBB))) {
+ if (MI.modifiesRegister(X86::EFLAGS, TRI)) {
+ ConditionDef = &MI;
break;
}
- if (I->readsRegister(X86::EFLAGS, TRI))
+ if (MI.readsRegister(X86::EFLAGS, TRI))
SingleUseCondition = false;
}
@@ -3605,6 +3709,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
+ if (X86::FR16XRegClass.hasSubClassEq(RC)) {
+ assert(STI.hasFP16());
+ return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
+ }
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return load ? X86::MOV16rm : X86::MOV16mr;
case 4:
@@ -3680,12 +3788,6 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
HasAVX ? X86::VMOVUPSmr :
X86::MOVUPSmr);
}
- if (X86::BNDRRegClass.hasSubClassEq(RC)) {
- if (STI.is64Bit())
- return load ? X86::BNDMOV64rm : X86::BNDMOV64mr;
- else
- return load ? X86::BNDMOV32rm : X86::BNDMOV32mr;
- }
llvm_unreachable("Unknown 16-byte regclass");
}
case 32:
@@ -3904,8 +4006,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
}
bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
case X86::CMP64ri32:
@@ -3984,42 +4086,83 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}
-/// Check whether the first instruction, whose only
-/// purpose is to update flags, can be made redundant.
-/// CMPrr can be made redundant by SUBrr if the operands are the same.
-/// This function can be extended later on.
-/// SrcReg, SrcRegs: register operands for FlagI.
-/// ImmValue: immediate for FlagI if it takes an immediate.
-inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
+bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
Register SrcReg, Register SrcReg2,
- int ImmMask, int ImmValue,
- const MachineInstr &OI) {
- if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
- (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
- (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
- (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
- ((OI.getOperand(1).getReg() == SrcReg &&
- OI.getOperand(2).getReg() == SrcReg2) ||
- (OI.getOperand(1).getReg() == SrcReg2 &&
- OI.getOperand(2).getReg() == SrcReg)))
- return true;
-
- if (ImmMask != 0 &&
- ((FlagI.getOpcode() == X86::CMP64ri32 &&
- OI.getOpcode() == X86::SUB64ri32) ||
- (FlagI.getOpcode() == X86::CMP64ri8 &&
- OI.getOpcode() == X86::SUB64ri8) ||
- (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
- (FlagI.getOpcode() == X86::CMP32ri8 &&
- OI.getOpcode() == X86::SUB32ri8) ||
- (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
- (FlagI.getOpcode() == X86::CMP16ri8 &&
- OI.getOpcode() == X86::SUB16ri8) ||
- (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
- OI.getOperand(1).getReg() == SrcReg &&
- OI.getOperand(2).getImm() == ImmValue)
- return true;
- return false;
+ int64_t ImmMask, int64_t ImmValue,
+ const MachineInstr &OI, bool *IsSwapped,
+ int64_t *ImmDelta) const {
+ switch (OI.getOpcode()) {
+ case X86::CMP64rr:
+ case X86::CMP32rr:
+ case X86::CMP16rr:
+ case X86::CMP8rr:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ Register OISrcReg;
+ Register OISrcReg2;
+ int64_t OIMask;
+ int64_t OIValue;
+ if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
+ OIMask != ImmMask || OIValue != ImmValue)
+ return false;
+ if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
+ *IsSwapped = false;
+ return true;
+ }
+ if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
+ *IsSwapped = true;
+ return true;
+ }
+ return false;
+ }
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::TEST64rr:
+ case X86::TEST32rr:
+ case X86::TEST16rr:
+ case X86::TEST8rr: {
+ if (ImmMask != 0) {
+ Register OISrcReg;
+ Register OISrcReg2;
+ int64_t OIMask;
+ int64_t OIValue;
+ if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
+ SrcReg == OISrcReg && ImmMask == OIMask) {
+ if (OIValue == ImmValue) {
+ *ImmDelta = 0;
+ return true;
+ } else if (static_cast<uint64_t>(ImmValue) ==
+ static_cast<uint64_t>(OIValue) - 1) {
+ *ImmDelta = -1;
+ return true;
+ } else if (static_cast<uint64_t>(ImmValue) ==
+ static_cast<uint64_t>(OIValue) + 1) {
+ *ImmDelta = 1;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+ return FlagI.isIdenticalTo(OI);
+ }
+ default:
+ return false;
+ }
}
/// Check whether the definition can be converted
@@ -4189,8 +4332,8 @@ static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask,
- int CmpValue,
+ Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue,
const MachineRegisterInfo *MRI) const {
// Check whether we can replace SUB with CMP.
switch (CmpInstr.getOpcode()) {
@@ -4243,114 +4386,117 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
}
- // Get the unique definition of SrcReg.
- MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
- if (!MI) return false;
-
- // CmpInstr is the first instruction of the BB.
- MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+ // The following code tries to remove the comparison by re-using EFLAGS
+ // from earlier instructions.
- // If we are comparing against zero, check whether we can use MI to update
- // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
- if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
+
+ // Transformation currently requires SSA values.
+ if (SrcReg2.isPhysical())
return false;
+ MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+ assert(SrcRegDef && "Must have a definition (SSA)");
- // If we have a use of the source register between the def and our compare
- // instruction we can eliminate the compare iff the use sets EFLAGS in the
- // right way.
- bool ShouldUpdateCC = false;
+ MachineInstr *MI = nullptr;
+ MachineInstr *Sub = nullptr;
+ MachineInstr *Movr0Inst = nullptr;
bool NoSignFlag = false;
bool ClearsOverflowFlag = false;
+ bool ShouldUpdateCC = false;
+ bool IsSwapped = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
- // Scan forward from the use until we hit the use we're looking for or the
- // compare instruction.
- for (MachineBasicBlock::iterator J = MI;; ++J) {
- // Do we have a convertible instruction?
- NewCC = isUseDefConvertible(*J);
- if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
- J->getOperand(1).getReg() == SrcReg) {
- assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
- ShouldUpdateCC = true; // Update CC later on.
- // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
- // with the new def.
- Def = J;
- MI = &*Def;
- break;
- }
+ int64_t ImmDelta = 0;
- if (J == I)
+ // Search backward from CmpInstr for the next instruction defining EFLAGS.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
+ MachineBasicBlock::reverse_iterator From =
+ std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
+ for (MachineBasicBlock *MBB = &CmpMBB;;) {
+ for (MachineInstr &Inst : make_range(From, MBB->rend())) {
+ // Try to use EFLAGS from the instruction defining %SrcReg. Example:
+ // %eax = addl ...
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (&Inst == SrcRegDef) {
+ if (IsCmpZero &&
+ isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
+ MI = &Inst;
+ break;
+ }
+ // Cannot find other candidates before definition of SrcReg.
return false;
- }
- }
+ }
- // We are searching for an earlier instruction that can make CmpInstr
- // redundant and that instruction will be saved in Sub.
- MachineInstr *Sub = nullptr;
- const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
+ // Try to use EFLAGS produced by an instruction reading %SrcReg.
+ // Example:
+ // %eax = ...
+ // ...
+ // popcntl %eax
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (IsCmpZero) {
+ NewCC = isUseDefConvertible(Inst);
+ if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(1).getReg() == SrcReg) {
+ ShouldUpdateCC = true;
+ MI = &Inst;
+ break;
+ }
+ }
- // We iterate backward, starting from the instruction before CmpInstr and
- // stop when reaching the definition of a source register or done with the BB.
- // RI points to the instruction before CmpInstr.
- // If the definition is in this basic block, RE points to the definition;
- // otherwise, RE is the rend of the basic block.
- MachineBasicBlock::reverse_iterator
- RI = ++I.getReverse(),
- RE = CmpInstr.getParent() == MI->getParent()
- ? Def.getReverse() /* points to MI */
- : CmpInstr.getParent()->rend();
- MachineInstr *Movr0Inst = nullptr;
- for (; RI != RE; ++RI) {
- MachineInstr &Instr = *RI;
- // Check whether CmpInstr can be made redundant by the current instruction.
- if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
- CmpValue, Instr)) {
- Sub = &Instr;
- break;
- }
+ // Try to use EFLAGS from an instruction with similar flag results.
+ // Example:
+ // sub x, y or cmp x, y
+ // ... // EFLAGS not changed
+ // cmp x, y // <-- can be removed
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
+ Inst, &IsSwapped, &ImmDelta)) {
+ Sub = &Inst;
+ break;
+ }
- if (Instr.modifiesRegister(X86::EFLAGS, TRI) ||
- Instr.readsRegister(X86::EFLAGS, TRI)) {
- // This instruction modifies or uses EFLAGS.
+ // MOV32r0 is implemented with xor which clobbers condition code. It is
+ // safe to move up, if the definition to EFLAGS is dead and earlier
+ // instructions do not read or write EFLAGS.
+ if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
+ Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+ Movr0Inst = &Inst;
+ continue;
+ }
- // MOV32r0 etc. are implemented with xor which clobbers condition code.
- // They are safe to move up, if the definition to EFLAGS is dead and
- // earlier instructions do not read or write EFLAGS.
- if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
- Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
- Movr0Inst = &Instr;
- continue;
+ // Cannot do anything for any other EFLAG changes.
+ return false;
}
-
- // We can't remove CmpInstr.
- return false;
}
- }
- // Return false if no candidates exist.
- if (!IsCmpZero && !Sub)
- return false;
+ if (MI || Sub)
+ break;
- bool IsSwapped =
- (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg);
+ // Reached begin of basic block. Continue in predecessor if there is
+ // exactly one.
+ if (MBB->pred_size() != 1)
+ return false;
+ MBB = *MBB->pred_begin();
+ From = MBB->rbegin();
+ }
// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
// If we are done with the basic block, we need to check whether EFLAGS is
// live-out.
- bool IsSafe = false;
+ bool FlagsMayLiveOut = true;
SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
- MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
- for (++I; I != E; ++I) {
- const MachineInstr &Instr = *I;
+ MachineBasicBlock::iterator AfterCmpInstr =
+ std::next(MachineBasicBlock::iterator(CmpInstr));
+ for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
// We should check the usage if this instruction uses and updates EFLAGS.
if (!UseEFLAGS && ModifyEFLAGS) {
// It is safe to remove CmpInstr if EFLAGS is updated again.
- IsSafe = true;
+ FlagsMayLiveOut = false;
break;
}
if (!UseEFLAGS && !ModifyEFLAGS)
@@ -4358,7 +4504,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// EFLAGS is used by this instruction.
X86::CondCode OldCC = X86::COND_INVALID;
- if (IsCmpZero || IsSwapped) {
+ if (MI || IsSwapped || ImmDelta != 0) {
// We decode the condition code from opcode.
if (Instr.isBranch())
OldCC = X86::getCondFromBranch(Instr);
@@ -4370,7 +4516,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (OldCC == X86::COND_INVALID) return false;
}
X86::CondCode ReplacementCC = X86::COND_INVALID;
- if (IsCmpZero) {
+ if (MI) {
switch (OldCC) {
default: break;
case X86::COND_A: case X86::COND_AE:
@@ -4411,43 +4557,97 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// We swap the condition code and synthesize the new opcode.
ReplacementCC = getSwappedCondition(OldCC);
if (ReplacementCC == X86::COND_INVALID) return false;
+ ShouldUpdateCC = true;
+ } else if (ImmDelta != 0) {
+ unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
+ // Shift amount for min/max constants to adjust for 8/16/32 instruction
+ // sizes.
+ switch (OldCC) {
+ case X86::COND_L: // x <s (C + 1) --> x <=s C
+ if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_LE;
+ break;
+ case X86::COND_B: // x <u (C + 1) --> x <=u C
+ if (ImmDelta != 1 || CmpValue == 0)
+ return false;
+ ReplacementCC = X86::COND_BE;
+ break;
+ case X86::COND_GE: // x >=s (C + 1) --> x >s C
+ if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_G;
+ break;
+ case X86::COND_AE: // x >=u (C + 1) --> x >u C
+ if (ImmDelta != 1 || CmpValue == 0)
+ return false;
+ ReplacementCC = X86::COND_A;
+ break;
+ case X86::COND_G: // x >s (C - 1) --> x >=s C
+ if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_GE;
+ break;
+ case X86::COND_A: // x >u (C - 1) --> x >=u C
+ if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_AE;
+ break;
+ case X86::COND_LE: // x <=s (C - 1) --> x <s C
+ if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_L;
+ break;
+ case X86::COND_BE: // x <=u (C - 1) --> x <u C
+ if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_B;
+ break;
+ default:
+ return false;
+ }
+ ShouldUpdateCC = true;
}
- if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
+ if (ShouldUpdateCC && ReplacementCC != OldCC) {
// Push the MachineInstr to OpsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// instructions will be modified.
- OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC));
+ OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
}
if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
// It is safe to remove CmpInstr if EFLAGS is updated again or killed.
- IsSafe = true;
+ FlagsMayLiveOut = false;
break;
}
}
- // If EFLAGS is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if ((IsCmpZero || IsSwapped) && !IsSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock *Successor : MBB->successors())
+ // If we have to update users but EFLAGS is live-out abort, since we cannot
+ // easily find all of the users.
+ if (ShouldUpdateCC && FlagsMayLiveOut) {
+ for (MachineBasicBlock *Successor : CmpMBB.successors())
if (Successor->isLiveIn(X86::EFLAGS))
return false;
}
// The instruction to be updated is either Sub or MI.
- Sub = IsCmpZero ? MI : Sub;
+ assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
+ Sub = MI != nullptr ? MI : Sub;
+ MachineBasicBlock *SubBB = Sub->getParent();
// Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) {
+ // Only move within the same block so we don't accidentally move to a
+ // block with higher execution frequency.
+ if (&CmpMBB != SubBB)
+ return false;
// Look backwards until we find a def that doesn't use the current EFLAGS.
- Def = Sub;
- MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(),
+ MachineBasicBlock::reverse_iterator InsertI = Sub,
InsertE = Sub->getParent()->rend();
for (; InsertI != InsertE; ++InsertI) {
MachineInstr *Instr = &*InsertI;
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
- Sub->getParent()->remove(Movr0Inst);
+ Movr0Inst->getParent()->remove(Movr0Inst);
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
Movr0Inst);
break;
@@ -4469,6 +4669,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
.setImm(Op.second);
}
+ // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
+ for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
+ MBB = *MBB->pred_begin()) {
+ assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
+ if (!MBB->isLiveIn(X86::EFLAGS))
+ MBB->addLiveIn(X86::EFLAGS);
+ }
return true;
}
@@ -4755,6 +4962,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case X86::AVX512_128_SET0:
+ case X86::AVX512_FsFLD0SH:
case X86::AVX512_FsFLD0SS:
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0F128: {
@@ -5158,6 +5366,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VCVTUSI642SDZrr_Int:
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
+ case X86::VCVTSI2SHZrr:
+ case X86::VCVTSI2SHZrm:
+ case X86::VCVTSI2SHZrr_Int:
+ case X86::VCVTSI2SHZrrb_Int:
+ case X86::VCVTSI2SHZrm_Int:
+ case X86::VCVTSI642SHZrr:
+ case X86::VCVTSI642SHZrm:
+ case X86::VCVTSI642SHZrr_Int:
+ case X86::VCVTSI642SHZrrb_Int:
+ case X86::VCVTSI642SHZrm_Int:
+ case X86::VCVTUSI2SHZrr:
+ case X86::VCVTUSI2SHZrm:
+ case X86::VCVTUSI2SHZrr_Int:
+ case X86::VCVTUSI2SHZrrb_Int:
+ case X86::VCVTUSI2SHZrm_Int:
+ case X86::VCVTUSI642SHZrr:
+ case X86::VCVTUSI642SHZrm:
+ case X86::VCVTUSI642SHZrr_Int:
+ case X86::VCVTUSI642SHZrrb_Int:
+ case X86::VCVTUSI642SHZrm_Int:
// Load folding won't effect the undef register update since the input is
// a GPR.
return OpNum == 1 && !ForLoadFold;
@@ -5230,6 +5458,29 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VRCP14SDZrm:
case X86::VRCP14SSZrr:
case X86::VRCP14SSZrm:
+ case X86::VRCPSHZrr:
+ case X86::VRCPSHZrm:
+ case X86::VRSQRTSHZrr:
+ case X86::VRSQRTSHZrm:
+ case X86::VREDUCESHZrmi:
+ case X86::VREDUCESHZrri:
+ case X86::VREDUCESHZrrib:
+ case X86::VGETEXPSHZr:
+ case X86::VGETEXPSHZrb:
+ case X86::VGETEXPSHZm:
+ case X86::VGETMANTSHZrri:
+ case X86::VGETMANTSHZrrib:
+ case X86::VGETMANTSHZrmi:
+ case X86::VRNDSCALESHZr:
+ case X86::VRNDSCALESHZr_Int:
+ case X86::VRNDSCALESHZrb_Int:
+ case X86::VRNDSCALESHZm:
+ case X86::VRNDSCALESHZm_Int:
+ case X86::VSQRTSHZr:
+ case X86::VSQRTSHZr_Int:
+ case X86::VSQRTSHZrb_Int:
+ case X86::VSQRTSHZm:
+ case X86::VSQRTSHZm_Int:
case X86::VRCP28SDZr:
case X86::VRCP28SDZrb:
case X86::VRCP28SDZm:
@@ -5259,6 +5510,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VSQRTSDZrb_Int:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
+ case X86::VCVTSD2SHZrr:
+ case X86::VCVTSD2SHZrr_Int:
+ case X86::VCVTSD2SHZrrb_Int:
+ case X86::VCVTSD2SHZrm:
+ case X86::VCVTSD2SHZrm_Int:
+ case X86::VCVTSS2SHZrr:
+ case X86::VCVTSS2SHZrr_Int:
+ case X86::VCVTSS2SHZrrb_Int:
+ case X86::VCVTSS2SHZrm:
+ case X86::VCVTSS2SHZrm_Int:
+ case X86::VCVTSH2SDZrr:
+ case X86::VCVTSH2SDZrr_Int:
+ case X86::VCVTSH2SDZrrb_Int:
+ case X86::VCVTSH2SDZrm:
+ case X86::VCVTSH2SDZrm_Int:
+ case X86::VCVTSH2SSZrr:
+ case X86::VCVTSH2SSZrr_Int:
+ case X86::VCVTSH2SSZrrb_Int:
+ case X86::VCVTSH2SSZrm:
+ case X86::VCVTSH2SSZrm_Int:
return OpNum == 1;
case X86::VMOVSSZrrk:
case X86::VMOVSDZrrk:
@@ -6036,6 +6307,49 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
}
}
+ if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) && RegSize > 16) {
+ // These instructions only load 16 bits, we can't fold them if the
+ // destination register is wider than 16 bits (2 bytes), and its user
+ // instruction isn't scalar (SH).
+ switch (UserOpc) {
+ case X86::VADDSHZrr_Int:
+ case X86::VCMPSHZrr_Int:
+ case X86::VDIVSHZrr_Int:
+ case X86::VMAXSHZrr_Int:
+ case X86::VMINSHZrr_Int:
+ case X86::VMULSHZrr_Int:
+ case X86::VSUBSHZrr_Int:
+ case X86::VADDSHZrr_Intk: case X86::VADDSHZrr_Intkz:
+ case X86::VCMPSHZrr_Intk:
+ case X86::VDIVSHZrr_Intk: case X86::VDIVSHZrr_Intkz:
+ case X86::VMAXSHZrr_Intk: case X86::VMAXSHZrr_Intkz:
+ case X86::VMINSHZrr_Intk: case X86::VMINSHZrr_Intkz:
+ case X86::VMULSHZrr_Intk: case X86::VMULSHZrr_Intkz:
+ case X86::VSUBSHZrr_Intk: case X86::VSUBSHZrr_Intkz:
+ case X86::VFMADD132SHZr_Int: case X86::VFNMADD132SHZr_Int:
+ case X86::VFMADD213SHZr_Int: case X86::VFNMADD213SHZr_Int:
+ case X86::VFMADD231SHZr_Int: case X86::VFNMADD231SHZr_Int:
+ case X86::VFMSUB132SHZr_Int: case X86::VFNMSUB132SHZr_Int:
+ case X86::VFMSUB213SHZr_Int: case X86::VFNMSUB213SHZr_Int:
+ case X86::VFMSUB231SHZr_Int: case X86::VFNMSUB231SHZr_Int:
+ case X86::VFMADD132SHZr_Intk: case X86::VFNMADD132SHZr_Intk:
+ case X86::VFMADD213SHZr_Intk: case X86::VFNMADD213SHZr_Intk:
+ case X86::VFMADD231SHZr_Intk: case X86::VFNMADD231SHZr_Intk:
+ case X86::VFMSUB132SHZr_Intk: case X86::VFNMSUB132SHZr_Intk:
+ case X86::VFMSUB213SHZr_Intk: case X86::VFNMSUB213SHZr_Intk:
+ case X86::VFMSUB231SHZr_Intk: case X86::VFNMSUB231SHZr_Intk:
+ case X86::VFMADD132SHZr_Intkz: case X86::VFNMADD132SHZr_Intkz:
+ case X86::VFMADD213SHZr_Intkz: case X86::VFNMADD213SHZr_Intkz:
+ case X86::VFMADD231SHZr_Intkz: case X86::VFNMADD231SHZr_Intkz:
+ case X86::VFMSUB132SHZr_Intkz: case X86::VFNMSUB132SHZr_Intkz:
+ case X86::VFMSUB213SHZr_Intkz: case X86::VFNMSUB213SHZr_Intkz:
+ case X86::VFMSUB231SHZr_Intkz: case X86::VFNMSUB231SHZr_Intkz:
+ return false;
+ default:
+ return true;
+ }
+ }
+
return false;
}
@@ -6101,6 +6415,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_FsFLD0SS:
Alignment = Align(4);
break;
+ case X86::AVX512_FsFLD0SH:
+ Alignment = Align(2);
+ break;
default:
return nullptr;
}
@@ -6136,6 +6453,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
+ case X86::AVX512_FsFLD0SH:
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
case X86::FsFLD0SS:
@@ -6174,6 +6492,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getDoubleTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
+ else if (Opc == X86::AVX512_FsFLD0SH)
+ Ty = Type::getHalfTy(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
16);
@@ -8384,6 +8704,14 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VMINCSSrr:
case X86::VMINCSDZrr:
case X86::VMINCSSZrr:
+ case X86::VMAXCPHZ128rr:
+ case X86::VMAXCPHZ256rr:
+ case X86::VMAXCPHZrr:
+ case X86::VMAXCSHZrr:
+ case X86::VMINCPHZ128rr:
+ case X86::VMINCPHZ256rr:
+ case X86::VMINCPHZrr:
+ case X86::VMINCSHZrr:
return true;
case X86::ADDPDrr:
case X86::ADDPSrr:
@@ -8421,6 +8749,14 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VMULSSrr:
case X86::VMULSDZrr:
case X86::VMULSSZrr:
+ case X86::VADDPHZ128rr:
+ case X86::VADDPHZ256rr:
+ case X86::VADDPHZrr:
+ case X86::VADDSHZrr:
+ case X86::VMULPHZ128rr:
+ case X86::VMULPHZ256rr:
+ case X86::VMULPHZrr:
+ case X86::VMULSHZrr:
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
default:
@@ -8667,6 +9003,7 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_GOT, "x86-got"},
{MO_GOTOFF, "x86-gotoff"},
{MO_GOTPCREL, "x86-gotpcrel"},
+ {MO_GOTPCREL_NORELAX, "x86-gotpcrel-norelax"},
{MO_PLT, "x86-plt"},
{MO_TLSGD, "x86-tlsgd"},
{MO_TLSLD, "x86-tlsld"},
@@ -8966,13 +9303,8 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- const std::vector<MCCFIInstruction> &CFIInstructions =
- RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
- if (MBBI->isCFIInstruction()) {
- unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
- MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ if (MBBI->isCFIInstruction())
CFICount++;
- }
MBBI++;
}
@@ -9102,7 +9434,7 @@ void X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB,
// We're a normal call, so our sequence doesn't have a return instruction.
// Add it in.
- MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ));
+ MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RET64));
MBB.insert(MBB.end(), retq);
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index c663bb32af37..33ce55bbdb2b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -37,9 +37,6 @@ enum AsmComments {
/// the instruction operands should be swaped to match the condition code.
std::pair<CondCode, bool> getX86ConditionCode(CmpInst::Predicate Predicate);
-/// Return a setcc opcode based on whether it has a memory operand.
-unsigned getSETOpc(bool HasMemoryOperand = false);
-
/// Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false);
@@ -68,6 +65,8 @@ unsigned getSwappedVPCOMImm(unsigned Imm);
/// Get the VCMP immediate if the opcodes are swapped.
unsigned getSwappedVCMPImm(unsigned Imm);
+/// Check if the instruction is X87 instruction.
+bool isX87Instruction(MachineInstr &MI);
} // namespace X86
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -76,6 +75,7 @@ inline static bool isGlobalStubReference(unsigned char TargetFlag) {
switch (TargetFlag) {
case X86II::MO_DLLIMPORT: // dllimport stub.
case X86II::MO_GOTPCREL: // rip-relative GOT reference.
+ case X86II::MO_GOTPCREL_NORELAX: // rip-relative GOT reference.
case X86II::MO_GOT: // normal GOT reference.
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
@@ -250,7 +250,7 @@ public:
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned LEAOpcode, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
- LiveVariables *LV) const;
+ LiveVariables *LV, LiveIntervals *LIS) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -262,9 +262,8 @@ public:
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the new instruction.
///
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
/// Returns true iff the routine could find two commutable operands in the
/// given machine instruction.
@@ -510,14 +509,14 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
/// optimizeCompareInstr - Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
/// optimizeLoadInstr - Try to remove the load by folding it to a register
@@ -591,10 +590,9 @@ private:
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
/// super-register and then truncating back down to a 8/16-bit sub-register.
- MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
- MachineFunction::iterator &MFI,
- MachineInstr &MI,
+ MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr &MI,
LiveVariables *LV,
+ LiveIntervals *LIS,
bool Is8BitOp) const;
/// Handles memory folding for special case instructions, for instance those
@@ -631,6 +629,22 @@ private:
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2,
bool IsIntrinsic = false) const;
+
+ /// Returns true when instruction \p FlagI produces the same flags as \p OI.
+ /// The caller should pass in the results of calling analyzeCompare on \p OI:
+ /// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue.
+ /// If the flags match \p OI as if it had the input operands swapped then the
+ /// function succeeds and sets \p IsSwapped to true.
+ ///
+ /// Examples of OI, FlagI pairs returning true:
+ /// CMP %1, 42 and CMP %1, 42
+ /// CMP %1, %2 and %3 = SUB %1, %2
+ /// TEST %1, %1 and %2 = SUB %1, 0
+ /// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
+ bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
+ Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
+ const MachineInstr &OI, bool *IsSwapped,
+ int64_t *ImmDelta) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 34afedb5bad2..fee9939b8dfc 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
- SDTCisVT<1, iPTR>,
- SDTCisVT<2, iPTR>]>;
+ SDTCisPtrTy<1>]>;
def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
@@ -112,7 +111,7 @@ def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_X86DYN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
@@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
- [SDNPHasChain, SDNPVariadic]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>;
def X86vaarg64 :
SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
@@ -294,7 +293,7 @@ def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
-def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
+def X86DynAlloca : SDNode<"X86ISD::DYN_ALLOCA", SDT_X86DYN_ALLOCA,
[SDNPHasChain, SDNPOutGlue]>;
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
@@ -421,6 +420,7 @@ def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
+def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand>;
@@ -919,6 +919,7 @@ def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">;
def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
@@ -993,6 +994,7 @@ def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
+def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
@@ -1193,6 +1195,7 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
}]>;
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf16 : PatFrag<(ops node:$ptr), (f16 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
@@ -3155,9 +3158,6 @@ include "X86InstrAVX512.td"
include "X86InstrMMX.td"
include "X86Instr3DNow.td"
-// MPX instructions
-include "X86InstrMPX.td"
-
include "X86InstrVMX.td"
include "X86InstrSVM.td"
include "X86InstrSNP.td"
diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td
index b91e563a15f3..a716aab4260b 100644
--- a/llvm/lib/Target/X86/X86InstrKL.td
+++ b/llvm/lib/Target/X86/X86InstrKL.td
@@ -1,10 +1,9 @@
//===---------------------------*-tablegen-*-------------------------------===//
//===------------- X86InstrKL.td - KL Instruction Set Extension -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Target/X86/X86InstrMPX.td b/llvm/lib/Target/X86/X86InstrMPX.td
deleted file mode 100644
index 44ba071947c2..000000000000
--- a/llvm/lib/Target/X86/X86InstrMPX.td
+++ /dev/null
@@ -1,77 +0,0 @@
-//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the X86 MPX instruction set, defining the
-// instructions, and properties of the instructions which are needed for code
-// generation, machine code emission, and analysis.
-//
-//===----------------------------------------------------------------------===//
-
-// FIXME: Investigate a better scheduler class if MPX is ever used inside LLVM.
-let SchedRW = [WriteSystem] in {
-
-multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
- def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
- Requires<[Not64BitMode]>;
- def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
- Requires<[In64BitMode]>;
-}
-
-defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
-
-multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
- def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[Not64BitMode]>;
- def 64rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[In64BitMode]>;
-
- def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[Not64BitMode]>;
- def 64rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[In64BitMode]>;
-}
-defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
-defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
-defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
-
-def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- NotMemoryFoldable;
-let mayLoad = 1 in {
-def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[Not64BitMode]>, NotMemoryFoldable;
-def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[In64BitMode]>, NotMemoryFoldable;
-}
-let isCodeGenOnly = 1, ForceDisassemble = 1 in
-def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- NotMemoryFoldable;
-let mayStore = 1 in {
-def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[Not64BitMode]>, NotMemoryFoldable;
-def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[In64BitMode]>, NotMemoryFoldable;
-
-def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
- "bndstx\t{$src, $dst|$dst, $src}", []>, PS;
-}
-let mayLoad = 1 in
-def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- "bndldx\t{$src, $dst|$dst, $src}", []>, PS;
-} // SchedRW
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 41fda603d5a9..035f139e6f33 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -40,7 +40,7 @@ let isCodeGenOnly = 1 in {
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
-multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
+multiclass sse12_fp_scalar_int<bits<8> opc,
SDPatternOperator OpNode, RegisterClass RC,
ValueType VT, string asm, Operand memopr,
PatFrags mem_frags, Domain d,
@@ -187,8 +187,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
-multiclass sse12_move_rr<SDNode OpNode, ValueType vt,
- X86MemOperand x86memop, string base_opc,
+multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc,
string asm_opr, Domain d, string Name> {
let isCommutable = 1 in
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
@@ -210,7 +209,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
Domain d, string Name, Predicate pred> {
// AVX
let Predicates = [UseAVX, OptForSize] in
- defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
+ defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
"V"#Name>,
VEX_4V, VEX_LIG, VEX_WIG;
@@ -222,7 +221,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
let Predicates = [pred, NoSSE41_Or_OptForSize] in
- defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
+ defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
"\t{$src2, $dst|$dst, $src2}", d, Name>;
}
@@ -1747,20 +1746,20 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// XMM only
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
} // Predicates = [HasAVX, NoVLX]
@@ -1771,11 +1770,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
Sched<[WriteCvtPD2PS]>, SIMD_EXC;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
//===----------------------------------------------------------------------===//
@@ -2266,7 +2265,7 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
/// There are no patterns here because isel prefers integer versions for SSE2
/// and later. There are SSE1 v4f32 patterns later.
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86SchedWriteWidths sched> {
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
@@ -2296,11 +2295,11 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
}
}
-defm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>;
-defm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>;
-defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
+defm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>;
+defm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>;
let isCommutable = 0 in
- defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
@@ -2643,18 +2642,18 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
X86SchedWriteSizes sched> {
let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
- defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
- defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
+ defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched.PS.Scl>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
+ defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, sched.PD.Scl>, XD;
}
@@ -2790,8 +2789,8 @@ defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf
/// For the non-AVX defs, we need $src1 to be tied to $dst because
/// the HW instructions are 2 operand / destructive.
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- ValueType ScalarVT, X86MemOperand x86memop,
- Operand intmemop, SDPatternOperator OpNode, Domain d,
+ X86MemOperand x86memop, Operand intmemop,
+ SDPatternOperator OpNode, Domain d,
X86FoldableSchedWrite sched, Predicate target> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
@@ -2818,9 +2817,8 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
-multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
- PatFrags mem_frags, Intrinsic Intr,
- Predicate target, string Suffix> {
+multiclass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
+ Intrinsic Intr, Predicate target> {
let Predicates = [target] in {
// These are unary operations, but they are modeled as having 2 source operands
// because the high elements of the destination are unchanged in SSE.
@@ -2841,7 +2839,7 @@ multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
}
}
-multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, PatFrags mem_frags,
+multiclass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
Intrinsic Intr, Predicate target> {
let Predicates = [target] in {
def : Pat<(Intr VR128:$src),
@@ -2972,12 +2970,11 @@ let Predicates = [HasAVX, NoVLX] in {
Sched<[sched.XMM.Folded]>;
}
-multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
+multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> {
+ defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
- UseSSE1, "SS">, XS;
- defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
+ UseSSE1>, XS;
+ defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
AVXTarget>,
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
@@ -2985,7 +2982,7 @@ multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32, f32mem,
+ defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem,
ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
@@ -2994,7 +2991,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNod
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64, f64mem,
+ defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem,
sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
@@ -3010,10 +3007,10 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
- sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
+ sse1_fp_unop_s_intr<"rsqrt", HasAVX>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
- sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
+ sse1_fp_unop_s_intr<"rcp", HasAVX>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -6588,14 +6585,14 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
// of r and m.
class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
RegisterClass RCIn, SDPatternOperator Int> :
- SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
+ CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
Sched<[WriteCRC32]>;
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
X86MemOperand x86memop, SDPatternOperator Int> :
- SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
+ CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
@@ -7049,6 +7046,50 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
}
//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
+
+let ExeDomain = SSEPackedSingle in {
+let isCommutable = 1 in
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, u8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
+}
+
+// Immediate transform to help with commuting.
+def Perm2XCommuteImm : SDNodeXForm<timm, [{
+ return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
+}]>;
+
+multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
+ // Pattern with load in other operand.
+ def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
+ (Perm2XCommuteImm timm:$imm))>;
+}
+
+let Predicates = [HasAVX] in {
+ defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
+}
+
+let Predicates = [HasAVX1Only] in {
+ defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
+ defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
+}
+
+//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
@@ -7070,29 +7111,37 @@ let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
}
-multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
- PatFrag memop_frag> {
+multiclass vinsert_lowering<string InstrStr, string PermStr,
+ ValueType From, ValueType To,
+ PatFrag frommemop_frag, PatFrag tomemop_frag> {
def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
(iPTR imm)),
(!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
- (From (memop_frag addr:$src2)),
+ (From (frommemop_frag addr:$src2)),
(iPTR imm)),
(!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
+ // Folding "To" vector - convert to perm2x128 and commute inputs.
+ def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)),
+ (From VR128:$src2),
+ (iPTR imm)),
+ (!cast<Instruction>(PermStr#rm)
+ (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm),
+ addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>;
}
let Predicates = [HasAVX, NoVLX] in {
- defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>;
- defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>;
}
let Predicates = [HasAVX1Only] in {
- defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>;
- defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
- defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
//===----------------------------------------------------------------------===//
@@ -7297,50 +7346,6 @@ let ExeDomain = SSEPackedDouble in {
}
//===----------------------------------------------------------------------===//
-// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
-//
-
-let ExeDomain = SSEPackedSingle in {
-let isCommutable = 1 in
-def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
-def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
-}
-
-// Immediate transform to help with commuting.
-def Perm2XCommuteImm : SDNodeXForm<timm, [{
- return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
-}]>;
-
-multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
- // Pattern with load in other operand.
- def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
- (Perm2XCommuteImm timm:$imm))>;
-}
-
-let Predicates = [HasAVX] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
-}
-
-let Predicates = [HasAVX1Only] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
- defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
- defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
-}
-
-//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
// Note: These instruction do not affect the YMM16-YMM31.
//
@@ -7625,10 +7630,18 @@ let Predicates = [HasAVX1Only] in {
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
(v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm),
(v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>;
+ def : Pat<(v8f32 (X86VBroadcast v4f32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm),
+ (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
(v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm),
(v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
+ def : Pat<(v4f64 (X86VBroadcast v2f64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm),
+ (v2f64 (VMOVDDUPrr VR128:$src)), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>;
@@ -7741,10 +7754,10 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
}
let Predicates = [HasAVX2, NoVLX] in {
- defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>;
- defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
- defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
//===----------------------------------------------------------------------===//
@@ -7889,10 +7902,8 @@ let Predicates = [HasAVX2, NoVLX] in {
// VGATHER - GATHER Operations
// FIXME: Improve scheduling of gather instructions.
-multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
- ValueType VTy, RegisterClass RC256,
- X86MemOperand memop128, X86MemOperand memop256,
- ValueType MTx = VTx, ValueType MTy = VTy> {
+multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
+ X86MemOperand memop128, X86MemOperand memop256> {
let mayLoad = 1, hasSideEffects = 0 in {
def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
(ins VR128:$src1, memop128:$src2, VR128:$mask),
@@ -7911,27 +7922,27 @@ let Predicates = [HasAVX2] in {
let mayLoad = 1, hasSideEffects = 0, Constraints
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
in {
- defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64,
- VR256, vx128mem, vx256mem>, VEX_W;
- defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64,
- VR256, vx128mem, vy256mem>, VEX_W;
- defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32,
- VR256, vx128mem, vy256mem>;
- defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32,
- VR128, vx64mem, vy128mem>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
+ VR256, vx128mem, vx256mem>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
+ VR256, vx128mem, vy256mem>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
+ VR256, vx128mem, vy256mem>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
+ VR128, vx64mem, vy128mem>;
let ExeDomain = SSEPackedDouble in {
- defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64,
- VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W;
- defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64,
- VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W;
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
+ VR256, vx128mem, vx256mem>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
+ VR256, vx128mem, vy256mem>, VEX_W;
}
let ExeDomain = SSEPackedSingle in {
- defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32,
- VR256, vx128mem, vy256mem, v4i32, v8i32>;
- defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32,
- VR128, vx64mem, vy128mem, v4i32, v4i32>;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
+ VR256, vx128mem, vy256mem>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
+ VR128, vx64mem, vy128mem>;
}
}
}
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 48c27051a872..b4dd99d08a62 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -529,16 +529,17 @@ let SchedRW = [WriteSystem] in {
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
-let Predicates = [HasXSAVE] in {
+// NOTE: No HasXSAVE predicate so that these can be used with _xgetbv/_xsetbv
+// on Windows without needing to enable the xsave feature to be compatible with
+// MSVC.
let Defs = [EDX, EAX], Uses = [ECX] in
- def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS;
+def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS;
let Uses = [EDX, EAX, ECX] in
- def XSETBV : I<0x01, MRM_D1, (outs), (ins),
- "xsetbv",
- [(int_x86_xsetbv ECX, EDX, EAX)]>, PS;
+def XSETBV : I<0x01, MRM_D1, (outs), (ins),
+ "xsetbv",
+ [(int_x86_xsetbv ECX, EDX, EAX)]>, PS;
-} // HasXSAVE
let Uses = [EDX, EAX] in {
def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
@@ -583,7 +584,7 @@ def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
- def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB, REP;
+ def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
def : InstAlias<"xstorerng", (XSTORE)>;
diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td
index e98843bd3ae3..2429aa113fb1 100644
--- a/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -25,6 +25,8 @@ let Predicates = [NoAVX512] in {
let Predicates = [HasAVX512] in {
// A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f16 (extractelt (v8f16 VR128X:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v8f16 VR128X:$src), FR16X)>;
def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
@@ -32,6 +34,8 @@ let Predicates = [HasAVX512] in {
}
let Predicates = [NoVLX] in {
+ def : Pat<(v8f16 (scalar_to_vector FR16X:$src)),
+ (COPY_TO_REGCLASS FR16X:$src, VR128)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(COPY_TO_REGCLASS FR32:$src, VR128)>;
@@ -41,6 +45,8 @@ let Predicates = [NoVLX] in {
}
let Predicates = [HasVLX] in {
+ def : Pat<(v8f16 (scalar_to_vector FR16X:$src)),
+ (COPY_TO_REGCLASS FR16X:$src, VR128X)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
(COPY_TO_REGCLASS FR32X:$src, VR128X)>;
@@ -74,6 +80,7 @@ defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -85,6 +92,7 @@ defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -96,6 +104,7 @@ defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
// If we're inserting into an all zeros vector, just use a plain move which
@@ -103,8 +112,7 @@ defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
// any moves that we can prove are unnecessary.
multiclass subvec_zero_lowering<string MoveStr,
RegisterClass RC, ValueType DstTy,
- ValueType SrcTy, ValueType ZeroTy,
- SubRegIndex SubIdx> {
+ ValueType SrcTy, SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector immAllZerosV,
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
@@ -112,51 +120,57 @@ multiclass subvec_zero_lowering<string MoveStr,
}
let Predicates = [HasAVX, NoVLX] in {
- defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
+ defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>;
}
let Predicates = [HasVLX] in {
- defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, sub_ymm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, sub_ymm>;
+ defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, sub_ymm>;
+}
+
+let Predicates = [HasFP16, HasVLX] in {
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16f16, v8f16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v32f16, v8f16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>;
}
class maskzeroupper<ValueType vt, RegisterClass RC> :
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index ff531713037c..8abbaa92c8cf 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -479,7 +479,7 @@ static void X86SelectAddress(const MachineInstr &I,
"unsupported type.");
if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
- if (auto COff = getConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
+ if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
if (isInt<32>(Imm)) { // Check for displacement overflow.
AM.Disp = static_cast<int32_t>(Imm);
@@ -1065,7 +1065,7 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I,
return false;
Opcode = X86::ADC32rr;
- } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) {
+ } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
// carry is constant, support only 0.
if (*val != 0)
return false;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index de2500b8e1bd..1edec96bbec3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -24,6 +24,7 @@ enum IntrinsicType : uint16_t {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP_IMM8,
INTR_TYPE_3OP_IMM8,
+ CFMA_OP_MASK, CFMA_OP_MASKZ,
CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, BEXTRI,
CVTPD2PS_MASK,
INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE,
@@ -987,6 +988,236 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16),
+ X86_INTRINSIC_DATA(avx512fp16_add_ph_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx512fp16_div_ph_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
+ X86_INTRINSIC_DATA(avx512fp16_fpclass_ph_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_fpclass_ph_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_fpclass_ph_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_add_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FADDS, X86ISD::FADDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_ph_128, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_ph_256, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_ph_512, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_sh, CMP_MASK_SCALAR_CC,
+ X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_div_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FDIVS, X86ISD::FDIVS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_fpclass_sh, FPCLASSS, X86ISD::VFPCLASSS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_ph_128, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_ph_256, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_ph_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_sh, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_ph_128, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_ph_256, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_ph_512, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_sh, INTR_TYPE_3OP_SCALAR_MASK_SAE,
+ X86ISD::VGETMANTS, X86ISD::VGETMANTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_max_sh_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMAXS, X86ISD::FMAXS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_min_sh_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMINS, X86ISD::FMINS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_mul_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMULS, X86ISD::FMULS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_ph_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_ph_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_ph_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_sh, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_ph_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_ph_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_ph_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_sh, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_ph_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_ph_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_ph_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_sh, INTR_TYPE_SCALAR_MASK,
+ X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_ph_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_ph_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_ph_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_sh, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_ph_128, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_ph_256, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_ph_512, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, X86ISD::SCALEF_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_sh, INTR_TYPE_SCALAR_MASK,
+ X86ISD::SCALEFS, X86ISD::SCALEFS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_sqrt_sh, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSQRTS, X86ISD::FSQRTS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_sub_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSUBS, X86ISD::FSUBS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtdq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_128, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_256, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_256, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_256, INTR_TYPE_1OP_MASK, ISD::FP_EXTEND, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_128, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_256, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_256, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsd2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtss2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtudq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_256, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_cph_128, CFMA_OP_MASK, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_cph_256, CFMA_OP_MASK, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_cph_512, CFMA_OP_MASK, X86ISD::VFCMADDC, X86ISD::VFCMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_csh, CFMA_OP_MASK, X86ISD::VFCMADDCSH, X86ISD::VFCMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_cph_128, INTR_TYPE_2OP_MASK, X86ISD::VFCMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_cph_256, INTR_TYPE_2OP_MASK, X86ISD::VFCMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_cph_512, INTR_TYPE_2OP_MASK, X86ISD::VFCMULC, X86ISD::VFCMULC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_csh, INTR_TYPE_SCALAR_MASK, X86ISD::VFCMULCSH, X86ISD::VFCMULCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_cph_128, CFMA_OP_MASK, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_cph_256, CFMA_OP_MASK, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_cph_512, CFMA_OP_MASK, X86ISD::VFMADDC, X86ISD::VFMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_csh, CFMA_OP_MASK, X86ISD::VFMADDCSH, X86ISD::VFMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_cph_128, INTR_TYPE_2OP_MASK, X86ISD::VFMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_cph_256, INTR_TYPE_2OP_MASK, X86ISD::VFMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_cph_512, INTR_TYPE_2OP_MASK, X86ISD::VFMULC, X86ISD::VFMULC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_csh, INTR_TYPE_SCALAR_MASK, X86ISD::VFMULCSH, X86ISD::VFMULCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_cph_128, CFMA_OP_MASKZ, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_cph_256, CFMA_OP_MASKZ, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_cph_512, CFMA_OP_MASKZ, X86ISD::VFCMADDC, X86ISD::VFCMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_csh, CFMA_OP_MASKZ, X86ISD::VFCMADDCSH, X86ISD::VFCMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_cph_128, CFMA_OP_MASKZ, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_cph_256, CFMA_OP_MASKZ, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_cph_512, CFMA_OP_MASKZ, X86ISD::VFMADDC, X86ISD::VFMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_csh, CFMA_OP_MASKZ, X86ISD::VFMADDCSH, X86ISD::VFMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_max_ph_128, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(avx512fp16_max_ph_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(avx512fp16_max_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_min_ph_128, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx512fp16_min_ph_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx512fp16_min_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMIN, X86ISD::FMIN_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mul_ph_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
+ X86_INTRINSIC_DATA(avx512fp16_sqrt_ph_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
+ X86_INTRINSIC_DATA(avx512fp16_sub_ph_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcomi_sh, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+ /*fp16 scalar convert instruction*/
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsi2sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsi642sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtusi2sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtusi642sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vfmadd_f16, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vfmadd_ph_512, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_128, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_512, INTR_TYPE_3OP, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 165533eba346..4710e524931c 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -306,7 +306,8 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
OptimizePluginPath.c_str(), &ErrorMsg);
if (!ErrorMsg.empty())
- report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+ report_fatal_error(Twine("Failed to load opt plugin: \"") + ErrorMsg +
+ "\"");
OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
if (!OptimizeCut)
report_fatal_error("Invalid optimization plugin");
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
index 7b6276c1d87e..e562748c98fe 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -76,7 +76,7 @@ bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
bool Modified = false;
for (auto &MBB : MF) {
for (auto MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI) {
- if (MBBI->getOpcode() != X86::RETQ)
+ if (MBBI->getOpcode() != X86::RET64)
continue;
unsigned ClobberReg = TRI->findDeadCallerSavedReg(MBB, MBBI);
diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index 248069f4deb4..6b564a0356a6 100644
--- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -498,8 +498,8 @@ X86LowerAMXIntrinsics::lowerTileDP(Instruction *TileDP) {
Value *ResAMX =
Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));
// Delete TileDP intrinsic and do some clean-up.
- for (auto UI = TileDP->use_begin(), UE = TileDP->use_end(); UI != UE;) {
- Instruction *I = cast<Instruction>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(TileDP->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
Value *Vec;
if (match(I, m_BitCast(m_Value(Vec)))) {
I->replaceAllUsesWith(ResVec);
@@ -542,9 +542,8 @@ bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) {
Value *ResAMX =
Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));
// Delete tileloadd6 intrinsic and do some clean-up
- for (auto UI = TileLoadStore->use_begin(), UE = TileLoadStore->use_end();
- UI != UE;) {
- Instruction *I = cast<Instruction>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(TileLoadStore->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
Value *Vec;
if (match(I, m_BitCast(m_Value(Vec)))) {
I->replaceAllUsesWith(ResVec);
@@ -561,8 +560,8 @@ bool X86LowerAMXIntrinsics::lowerTileZero(Instruction *TileZero) {
IRBuilder<> Builder(TileZero);
FixedVectorType *V256I32Ty = FixedVectorType::get(Builder.getInt32Ty(), 256);
Value *VecZero = Constant::getNullValue(V256I32Ty);
- for (auto UI = TileZero->use_begin(), UE = TileZero->use_end(); UI != UE;) {
- Instruction *I = cast<Instruction>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(TileZero->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
Value *Vec;
if (match(I, m_BitCast(m_Value(Vec)))) {
I->replaceAllUsesWith(VecZero);
@@ -631,6 +630,7 @@ bool X86LowerAMXIntrinsics::visit() {
return C;
}
+namespace {
class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
public:
static char ID;
@@ -665,6 +665,7 @@ public:
AU.addRequired<TargetPassConfig>();
}
};
+} // namespace
static const char PassName[] = "Lower AMX intrinsics";
char X86LowerAMXIntrinsicsLegacyPass::ID = 0;
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 4ba44ccb6c16..7368b64efd9a 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -40,8 +40,10 @@
//
#include "X86.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -56,66 +58,44 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "lower-amx-type"
-static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder,
- BasicBlock *BB) {
+static bool isAMXCast(Instruction *II) {
+ return match(II,
+ m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(m_Value())) ||
+ match(II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(m_Value()));
+}
+
+static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB,
+ Type *Ty) {
Function &F = *BB->getParent();
Module *M = BB->getModule();
const DataLayout &DL = M->getDataLayout();
- Type *V256I32Ty = VectorType::get(Builder.getInt32Ty(), 256, false);
LLVMContext &Ctx = Builder.getContext();
auto AllocaAlignment = DL.getPrefTypeAlign(Type::getX86_AMXTy(Ctx));
unsigned AllocaAS = DL.getAllocaAddrSpace();
AllocaInst *AllocaRes =
- new AllocaInst(V256I32Ty, AllocaAS, "", &F.getEntryBlock().front());
+ new AllocaInst(Ty, AllocaAS, "", &F.getEntryBlock().front());
AllocaRes->setAlignment(AllocaAlignment);
return AllocaRes;
}
-namespace {
-class X86LowerAMXType {
- Function &Func;
- TargetMachine *TM = nullptr;
-
- // In AMX intrinsics we let Shape = {Row, Col}, but the
- // RealCol = Col / ElementSize. We may use the RealCol
- // as a new Row for other new created AMX intrinsics.
- std::map<Value *, Value *> Col2Row;
-
-public:
- X86LowerAMXType(Function &F, TargetMachine *TargetM) : Func(F), TM(TargetM) {}
- bool visit();
- void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast);
- void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST);
- bool transformBitcast(BitCastInst *Bitcast);
- std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
- Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
-};
-
-Value *X86LowerAMXType::getRowFromCol(Instruction *II, Value *V,
- unsigned Granularity) {
- if (Col2Row.count(V))
- return Col2Row[V];
- IRBuilder<> Builder(&*II->getParent()->getFirstInsertionPt());
- if (auto *I = dyn_cast<Instruction>(V)) {
- BasicBlock::iterator Iter = I->getIterator();
- ++Iter;
- Builder.SetInsertPoint(&*Iter);
- }
- ConstantInt *Gran = Builder.getInt16(Granularity);
- Value *RealRow = Builder.CreateUDiv(V, Gran);
- Col2Row[V] = RealRow;
- return RealRow;
+static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
+ for (Instruction &I : F.getEntryBlock())
+ if (!isa<AllocaInst>(&I))
+ return &I;
+ llvm_unreachable("No terminator in the entry block!");
}
-std::pair<Value *, Value *> X86LowerAMXType::getShape(IntrinsicInst *II,
- unsigned OpNo) {
+static std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
+ IRBuilder<> Builder(II);
Value *Row = nullptr, *Col = nullptr;
switch (II->getIntrinsicID()) {
default:
@@ -144,14 +124,32 @@ std::pair<Value *, Value *> X86LowerAMXType::getShape(IntrinsicInst *II,
Col = II->getArgOperand(2);
break;
case 5:
- Row = II->getArgOperand(2);
- // FIXME: There is a design bug for AMX shape, which the Col should be
- // Col/4 if it will be used as Row, but current Greedy RA can't handle
- // this case well, it may failed if we generate a new Shape definition.
- // So Let's just do it in O0 first.
- // Row = Row / 4
- if (TM->getOptLevel() == CodeGenOpt::None)
- Row = getRowFromCol(II, Row, 4);
+ if (isa<ConstantInt>(II->getArgOperand(2)))
+ Row = Builder.getInt16(
+ (cast<ConstantInt>(II->getOperand(2))->getSExtValue()) / 4);
+ else if (isa<Instruction>(II->getArgOperand(2))) {
+ // When it is not a const value and it is not a function argument, we
+ // create Row after the definition of II->getOperand(2) instead of
+ // before II. For example, II is %118, we try to getshape for %117:
+ // %117 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x
+ // i32> %115).
+ // %118 = call x86_amx @llvm.x86.tdpbf16ps.internal(i16
+ // %104, i16 %105, i16 %106, x86_amx %110, x86_amx %114, x86_amx
+ // %117).
+ // If we create %row = udiv i16 %106, 4 before %118(aka. II), then its
+ // definition is after its user(new tileload for %117).
+ // So, the best choice is to create %row right after the definition of
+ // %106.
+ Builder.SetInsertPoint(cast<Instruction>(II->getOperand(2)));
+ Row = Builder.CreateUDiv(II->getOperand(2), Builder.getInt16(4));
+ cast<Instruction>(Row)->moveAfter(cast<Instruction>(II->getOperand(2)));
+ } else {
+ // When it is not a const value and it is a function argument, we create
+ // Row at the entry bb.
+ IRBuilder<> NewBuilder(
+ getFirstNonAllocaInTheEntryBlock(*II->getFunction()));
+ Row = NewBuilder.CreateUDiv(II->getOperand(2), NewBuilder.getInt16(4));
+ }
Col = II->getArgOperand(1);
break;
}
@@ -162,6 +160,23 @@ std::pair<Value *, Value *> X86LowerAMXType::getShape(IntrinsicInst *II,
return std::make_pair(Row, Col);
}
+namespace {
+class X86LowerAMXType {
+ Function &Func;
+
+ // In AMX intrinsics we let Shape = {Row, Col}, but the
+ // RealCol = Col / ElementSize. We may use the RealCol
+ // as a new Row for other new created AMX intrinsics.
+ std::map<Value *, Value *> Col2Row;
+
+public:
+ X86LowerAMXType(Function &F) : Func(F) {}
+ bool visit();
+ void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast);
+ void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST);
+ bool transformBitcast(BitCastInst *Bitcast);
+};
+
// %src = load <256 x i32>, <256 x i32>* %addr, align 64
// %2 = bitcast <256 x i32> %src to x86_amx
// -->
@@ -230,8 +245,8 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
Value *I8Ptr, *Stride;
auto *Src = Bitcast->getOperand(0);
- auto Prepare = [&]() {
- AllocaAddr = createAllocaInstAtEntry(Builder, Bitcast->getParent());
+ auto Prepare = [&](Type *MemTy) {
+ AllocaAddr = createAllocaInstAtEntry(Builder, Bitcast->getParent(), MemTy);
I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
Stride = Builder.getInt64(64);
};
@@ -250,7 +265,7 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
if (!II)
return false; // May be bitcast from x86amx to <256 x i32>.
- Prepare();
+ Prepare(Bitcast->getOperand(0)->getType());
Builder.CreateStore(Src, AllocaAddr);
// TODO we can pick an constant operand for the shape.
Value *Row = nullptr, *Col = nullptr;
@@ -270,7 +285,7 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
auto *II = dyn_cast<IntrinsicInst>(Src);
if (!II)
return false; // May be bitcast from <256 x i32> to x86amx.
- Prepare();
+ Prepare(Bitcast->getType());
Value *Row = II->getOperand(0);
Value *Col = II->getOperand(1);
std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Src};
@@ -287,9 +302,7 @@ bool X86LowerAMXType::visit() {
Col2Row.clear();
for (BasicBlock *BB : post_order(&Func)) {
- for (BasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend();
- II != IE;) {
- Instruction &Inst = *II++;
+ for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*BB))) {
auto *Bitcast = dyn_cast<BitCastInst>(&Inst);
if (!Bitcast)
continue;
@@ -332,10 +345,8 @@ bool X86LowerAMXType::visit() {
continue;
}
StoreInst *ST = nullptr;
- for (auto UI = Bitcast->use_begin(), UE = Bitcast->use_end();
- UI != UE;) {
- Value *I = (UI++)->getUser();
- ST = dyn_cast<StoreInst>(I);
+ for (Use &U : Bitcast->uses()) {
+ ST = dyn_cast<StoreInst>(U.getUser());
if (ST)
break;
}
@@ -637,6 +648,366 @@ bool X86VolatileTileData::volatileTileData() {
namespace {
+class X86LowerAMXCast {
+ Function &Func;
+
+public:
+ X86LowerAMXCast(Function &F) : Func(F) {}
+ bool combineAMXcast(TargetLibraryInfo *TLI);
+ bool transformAMXCast(IntrinsicInst *AMXCast);
+ bool transformAllAMXCast();
+ bool optimizeAMXCastFromPhi(IntrinsicInst *CI, PHINode *PN,
+ SmallSetVector<Instruction *, 16> &DeadInst);
+};
+
+static bool DCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ salvageDebugInfo(*I);
+ salvageKnowledge(I);
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV)) {
+ if (isInstructionTriviallyDead(OpI, TLI)) {
+ WorkList.insert(OpI);
+ }
+ }
+ }
+ I->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+/// This function handles following case
+///
+/// A -> B amxcast
+/// PHI
+/// B -> A amxcast
+///
+/// All the related PHI nodes can be replaced by new PHI nodes with type A.
+/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
+bool X86LowerAMXCast::optimizeAMXCastFromPhi(
+ IntrinsicInst *CI, PHINode *PN,
+ SmallSetVector<Instruction *, 16> &DeadInst) {
+ IRBuilder<> Builder(CI);
+ Value *Src = CI->getOperand(0);
+ Type *SrcTy = Src->getType(); // Type B
+ Type *DestTy = CI->getType(); // Type A
+
+ SmallVector<PHINode *, 4> PhiWorklist;
+ SmallSetVector<PHINode *, 4> OldPhiNodes;
+
+ // Find all of the A->B casts and PHI nodes.
+ // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
+ // OldPhiNodes is used to track all known PHI nodes, before adding a new
+ // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
+ PhiWorklist.push_back(PN);
+ OldPhiNodes.insert(PN);
+ while (!PhiWorklist.empty()) {
+ auto *OldPN = PhiWorklist.pop_back_val();
+ for (Value *IncValue : OldPN->incoming_values()) {
+ // TODO: currently, We ignore cases where it is a const. In the future, we
+ // might support const.
+ if (isa<Constant>(IncValue))
+ return false;
+
+ if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
+ if (OldPhiNodes.insert(PNode))
+ PhiWorklist.push_back(PNode);
+ continue;
+ }
+ Instruction *ACI = dyn_cast<Instruction>(IncValue);
+ if (ACI && isAMXCast(ACI)) {
+ // Verify it's a A->B cast.
+ Type *TyA = ACI->getOperand(0)->getType();
+ Type *TyB = ACI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return false;
+ continue;
+ }
+ return false;
+ }
+ }
+
+ // Check that each user of each old PHI node is something that we can
+ // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
+ for (auto *OldPN : OldPhiNodes) {
+ for (User *V : OldPN->users()) {
+ Instruction *ACI = dyn_cast<Instruction>(V);
+ if (ACI && isAMXCast(ACI)) {
+ // Verify it's a B->A cast.
+ Type *TyB = ACI->getOperand(0)->getType();
+ Type *TyA = ACI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return false;
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ // As long as the user is another old PHI node, then even if we don't
+ // rewrite it, the PHI web we're considering won't have any users
+ // outside itself, so it'll be dead.
+ // example:
+ // bb.0:
+ // %0 = amxcast ...
+ // bb.1:
+ // %1 = amxcast ...
+ // bb.2:
+ // %goodphi = phi %0, %1
+ // %3 = amxcast %goodphi
+ // bb.3:
+ // %goodphi2 = phi %0, %goodphi
+ // %4 = amxcast %goodphi2
+ // When optimizeAMXCastFromPhi process %3 and %goodphi, %goodphi2 is
+ // outside the phi-web, so the combination stop When
+ // optimizeAMXCastFromPhi process %4 and %goodphi2, the optimization
+ // will be done.
+ if (OldPhiNodes.count(PHI) == 0)
+ return false;
+ } else
+ return false;
+ }
+ }
+
+ // For each old PHI node, create a corresponding new PHI node with a type A.
+ SmallDenseMap<PHINode *, PHINode *> NewPNodes;
+ for (auto *OldPN : OldPhiNodes) {
+ Builder.SetInsertPoint(OldPN);
+ PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
+ NewPNodes[OldPN] = NewPN;
+ }
+
+ // Fill in the operands of new PHI nodes.
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
+ Value *V = OldPN->getOperand(j);
+ Value *NewV = nullptr;
+ Instruction *ACI = dyn_cast<Instruction>(V);
+ // There should not be a AMXcast from a const.
+ if (ACI && isAMXCast(ACI))
+ NewV = ACI->getOperand(0);
+ else if (auto *PrevPN = dyn_cast<PHINode>(V))
+ NewV = NewPNodes[PrevPN];
+ assert(NewV);
+ NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
+ }
+ }
+
+ // Traverse all accumulated PHI nodes and process its users,
+ // which are Stores and BitcCasts. Without this processing
+ // NewPHI nodes could be replicated and could lead to extra
+ // moves generated after DeSSA.
+ // If there is a store with type B, change it to type A.
+
+ // Replace users of BitCast B->A with NewPHI. These will help
+ // later to get rid of a closure formed by OldPHI nodes.
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (User *V : make_early_inc_range(OldPN->users())) {
+ Instruction *ACI = dyn_cast<Instruction>(V);
+ if (ACI && isAMXCast(ACI)) {
+ Type *TyB = ACI->getOperand(0)->getType();
+ Type *TyA = ACI->getType();
+ assert(TyA == DestTy && TyB == SrcTy);
+ (void)TyA;
+ (void)TyB;
+ ACI->replaceAllUsesWith(NewPN);
+ DeadInst.insert(ACI);
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ // We don't need to push PHINode into DeadInst since they are operands
+ // of rootPN DCE can safely delete rootPN's operands if rootPN is dead.
+ assert(OldPhiNodes.contains(PHI));
+ (void)PHI;
+ } else
+ llvm_unreachable("all uses should be handled");
+ }
+ }
+ return true;
+}
+
+bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
+ bool Change = false;
+ // Collect tile cast instruction.
+ SmallVector<Instruction *, 8> Vec2TileInsts;
+ SmallVector<Instruction *, 8> Tile2VecInsts;
+ SmallVector<Instruction *, 8> PhiCastWorkList;
+ SmallSetVector<Instruction *, 16> DeadInst;
+ for (BasicBlock &BB : Func) {
+ for (Instruction &I : BB) {
+ Value *Vec;
+ if (match(&I,
+ m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(m_Value(Vec))))
+ Vec2TileInsts.push_back(&I);
+ else if (match(&I, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(
+ m_Value(Vec))))
+ Tile2VecInsts.push_back(&I);
+ }
+ }
+
+ auto Convert = [&](SmallVectorImpl<Instruction *> &Insts, Intrinsic::ID IID) {
+ for (auto *Inst : Insts) {
+ for (User *U : Inst->users()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ if (!II || II->getIntrinsicID() != IID)
+ continue;
+ // T1 = vec2tile V0
+ // V2 = tile2vec T1
+ // V3 = OP V2
+ // -->
+ // T1 = vec2tile V0
+ // V2 = tile2vec T1
+ // V3 = OP V0
+ II->replaceAllUsesWith(Inst->getOperand(0));
+ Change = true;
+ }
+ }
+ };
+
+ Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector);
+ Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile);
+
+ auto EraseInst = [&](SmallVectorImpl<Instruction *> &Insts) {
+ for (auto *Inst : Insts) {
+ if (Inst->use_empty()) {
+ Inst->eraseFromParent();
+ Change = true;
+ }
+ }
+ };
+
+ EraseInst(Vec2TileInsts);
+ EraseInst(Tile2VecInsts);
+
+ // Handle the A->B->A cast, and there is an intervening PHI node.
+ for (BasicBlock &BB : Func) {
+ for (Instruction &I : BB) {
+ if (isAMXCast(&I)) {
+ if (isa<PHINode>(I.getOperand(0)))
+ PhiCastWorkList.push_back(&I);
+ }
+ }
+ }
+ for (auto *I : PhiCastWorkList) {
+ // We skip the dead Amxcast.
+ if (DeadInst.contains(I))
+ continue;
+ PHINode *PN = cast<PHINode>(I->getOperand(0));
+ if (optimizeAMXCastFromPhi(cast<IntrinsicInst>(I), PN, DeadInst)) {
+ DeadInst.insert(PN);
+ Change = true;
+ }
+ }
+
+ // Since we create new phi and merge AMXCast, some old phis and AMXCast might
+ // have no uses. We do some DeadCodeElimination for them.
+ while (!DeadInst.empty()) {
+ Instruction *I = DeadInst.pop_back_val();
+ Change |= DCEInstruction(I, DeadInst, TLI);
+ }
+ return Change;
+}
+
+// There might be remaining AMXcast after combineAMXcast and they should be
+// handled elegantly.
+bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) {
+ IRBuilder<> Builder(AMXCast);
+ AllocaInst *AllocaAddr;
+ Value *I8Ptr, *Stride;
+ auto *Src = AMXCast->getOperand(0);
+
+ auto Prepare = [&](Type *MemTy) {
+ AllocaAddr = createAllocaInstAtEntry(Builder, AMXCast->getParent(), MemTy);
+ I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
+ Stride = Builder.getInt64(64);
+ };
+
+ if (AMXCast->getType()->isX86_AMXTy()) {
+ // %2 = amxcast <225 x i32> %src to x86_amx
+ // call void @llvm.x86.tilestored64.internal(i16 15, i16 60,
+ // i8* %addr3, i64 60, x86_amx %2)
+ // -->
+ // %addr = alloca <225 x i32>, align 64
+ // store <225 x i32> %src, <225 x i32>* %addr, align 64
+ // %addr2 = bitcast <225 x i32>* %addr to i8*
+ // %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 15, i16 60,
+ // i8* %addr2,
+ // i64 60)
+ // call void @llvm.x86.tilestored64.internal(i16 15, i16 60,
+ // i8* %addr3, i64 60, x86_amx %2)
+ Use &U = *(AMXCast->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ if (!II)
+ return false; // May be bitcast from x86amx to <256 x i32>.
+ Prepare(AMXCast->getOperand(0)->getType());
+ Builder.CreateStore(Src, AllocaAddr);
+ // TODO we can pick an constant operand for the shape.
+ Value *Row = nullptr, *Col = nullptr;
+ std::tie(Row, Col) = getShape(II, OpNo);
+ std::array<Value *, 4> Args = {
+ Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())};
+ Value *NewInst = Builder.CreateIntrinsic(
+ Intrinsic::x86_tileloadd64_internal, None, Args);
+ AMXCast->replaceAllUsesWith(NewInst);
+ AMXCast->eraseFromParent();
+ } else {
+ // %2 = amxcast x86_amx %src to <225 x i32>
+ // -->
+ // %addr = alloca <225 x i32>, align 64
+ // %addr2 = bitcast <225 x i32>* to i8*
+ // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col,
+ // i8* %addr2, i64 %stride)
+ // %2 = load <225 x i32>, <225 x i32>* %addr, align 64
+ auto *II = dyn_cast<IntrinsicInst>(Src);
+ if (!II)
+ return false; // May be bitcast from <256 x i32> to x86amx.
+ Prepare(AMXCast->getType());
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
+ std::array<Value *, 5> Args = {
+ Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty()), Src};
+ Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args);
+ Value *NewInst = Builder.CreateLoad(AMXCast->getType(), AllocaAddr);
+ AMXCast->replaceAllUsesWith(NewInst);
+ AMXCast->eraseFromParent();
+ }
+
+ return true;
+}
+
+bool X86LowerAMXCast::transformAllAMXCast() {
+ bool Change = false;
+ // Collect tile cast instruction.
+ SmallVector<Instruction *, 8> WorkLists;
+ for (BasicBlock &BB : Func) {
+ for (Instruction &I : BB) {
+ if (isAMXCast(&I))
+ WorkLists.push_back(&I);
+ }
+ }
+
+ for (auto *Inst : WorkLists) {
+ Change |= transformAMXCast(cast<IntrinsicInst>(Inst));
+ }
+
+ return Change;
+}
+
+} // anonymous namespace
+
+namespace {
+
class X86LowerAMXTypeLegacyPass : public FunctionPass {
public:
static char ID;
@@ -646,10 +1017,18 @@ public:
}
bool runOnFunction(Function &F) override {
+ bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ X86LowerAMXCast LAC(F);
+ C |= LAC.combineAMXcast(TLI);
+ // There might be remaining AMXcast after combineAMXcast and they should be
+ // handled elegantly.
+ C |= LAC.transformAllAMXCast();
- X86LowerAMXType LAT(F, TM);
- bool C = LAT.visit();
+ X86LowerAMXType LAT(F);
+ C |= LAT.visit();
// Prepare for fast register allocation at O0.
// Todo: May better check the volatile model of AMX code, not just
@@ -671,6 +1050,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -681,6 +1061,7 @@ char X86LowerAMXTypeLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index 03692d195768..d6b42145859d 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -75,9 +75,7 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
- MII != MIE;) {
- MachineInstr &MI = *MII++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!MI.isCopy())
continue;
MachineOperand &DstMO = MI.getOperand(0);
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 7d916f917d5e..c3cd634612a4 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -43,8 +43,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
using namespace llvm;
@@ -274,6 +277,9 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_GOTPCREL:
RefKind = MCSymbolRefExpr::VK_GOTPCREL;
break;
+ case X86II::MO_GOTPCREL_NORELAX:
+ RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
+ break;
case X86II::MO_GOT:
RefKind = MCSymbolRefExpr::VK_GOT;
break;
@@ -418,7 +424,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
}
static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
- return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
+ return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
}
Optional<MCOperand>
@@ -1094,11 +1100,11 @@ static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
if (Subtarget->is64Bit()) {
// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
// IndexReg/BaseReg below need to be updated.
- if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
+ if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
MaxNopLength = 7;
- else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
+ else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
MaxNopLength = 15;
- else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
+ else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
MaxNopLength = 11;
else
MaxNopLength = 10;
@@ -1323,6 +1329,244 @@ void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
.addExpr(Op));
}
+void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
+ // FIXME: Make this work on non-ELF.
+ if (!TM.getTargetTriple().isOSBinFormatELF()) {
+ report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
+ return;
+ }
+
+ unsigned Reg = MI.getOperand(0).getReg().id();
+ ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
+
+ MCSymbol *&Sym =
+ AsanMemaccessSymbols[AsanMemaccessTuple(Reg, AccessInfo.Packed)];
+ if (!Sym) {
+ std::string Name = AccessInfo.IsWrite ? "store" : "load";
+ std::string SymName = "__asan_check_" + Name +
+ utostr(1ULL << AccessInfo.AccessSizeIndex) + "_rn" +
+ utostr(Reg);
+ Sym = OutContext.getOrCreateSymbol(SymName);
+ }
+
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::CALL64pcrel32)
+ .addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
+}
+
+void X86AsmPrinter::emitAsanMemaccessPartial(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI) {
+ assert(AccessInfo.AccessSizeIndex == 0 || AccessInfo.AccessSizeIndex == 1 ||
+ AccessInfo.AccessSizeIndex == 2);
+ assert(Reg != X86::R8);
+
+ uint64_t ShadowBase;
+ int MappingScale;
+ bool OrShadowOffset;
+ getAddressSanitizerParams(
+ Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
+ AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(MappingScale),
+ STI);
+ if (OrShadowOffset) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(ShadowBase),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOV8rm)
+ .addReg(X86::R8B)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(0)
+ .addReg(X86::NoRegister),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::TEST8rr).addReg(X86::R8B).addReg(X86::R8B), STI);
+ } else {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOVSX32rm8)
+ .addReg(X86::R8D)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(ShadowBase)
+ .addReg(X86::NoRegister),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::TEST32rr).addReg(X86::R8D).addReg(X86::R8D), STI);
+ }
+ MCSymbol *AdditionalCheck = OutContext.createTempSymbol();
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JCC_1)
+ .addExpr(MCSymbolRefExpr::create(AdditionalCheck, OutContext))
+ .addImm(X86::COND_NE),
+ STI);
+ MCSymbol *ReturnSym = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(ReturnSym);
+ OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
+
+ // Shadow byte is non-zero so we need to perform additional checks.
+ OutStreamer->emitLabel(AdditionalCheck);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RCX),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::RCX)
+ .addReg(X86::NoRegister + Reg),
+ STI);
+ const size_t Granularity = 1ULL << MappingScale;
+ OutStreamer->emitInstruction(MCInstBuilder(X86::AND32ri8)
+ .addReg(X86::NoRegister)
+ .addReg(X86::ECX)
+ .addImm(Granularity - 1),
+ STI);
+ if (AccessInfo.AccessSizeIndex == 1) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
+ .addReg(X86::NoRegister)
+ .addReg(X86::ECX)
+ .addImm(1),
+ STI);
+ } else if (AccessInfo.AccessSizeIndex == 2) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
+ .addReg(X86::NoRegister)
+ .addReg(X86::ECX)
+ .addImm(3),
+ STI);
+ }
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::R8D).addImm(1),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RCX),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JCC_1)
+ .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext))
+ .addImm(X86::COND_L),
+ STI);
+
+ emitAsanReportError(M, Reg, AccessInfo, STI);
+}
+
+void X86AsmPrinter::emitAsanMemaccessFull(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI) {
+ assert(AccessInfo.AccessSizeIndex == 3 || AccessInfo.AccessSizeIndex == 4);
+ assert(Reg != X86::R8);
+
+ uint64_t ShadowBase;
+ int MappingScale;
+ bool OrShadowOffset;
+ getAddressSanitizerParams(
+ Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
+ AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(MappingScale),
+ STI);
+ if (OrShadowOffset) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(ShadowBase),
+ STI);
+ auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
+ OutStreamer->emitInstruction(MCInstBuilder(OpCode)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(0)
+ .addReg(X86::NoRegister)
+ .addImm(0),
+ STI);
+ } else {
+ auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
+ OutStreamer->emitInstruction(MCInstBuilder(OpCode)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(ShadowBase)
+ .addReg(X86::NoRegister)
+ .addImm(0),
+ STI);
+ }
+ MCSymbol *ReportCode = OutContext.createTempSymbol();
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JCC_1)
+ .addExpr(MCSymbolRefExpr::create(ReportCode, OutContext))
+ .addImm(X86::COND_NE),
+ STI);
+ MCSymbol *ReturnSym = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(ReturnSym);
+ OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
+
+ OutStreamer->emitLabel(ReportCode);
+ emitAsanReportError(M, Reg, AccessInfo, STI);
+}
+
+void X86AsmPrinter::emitAsanReportError(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI) {
+ std::string Name = AccessInfo.IsWrite ? "store" : "load";
+ MCSymbol *ReportError = OutContext.getOrCreateSymbol(
+ "__asan_report_" + Name + utostr(1ULL << AccessInfo.AccessSizeIndex));
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::RDI)
+ .addReg(X86::NoRegister + Reg),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JMP_4)
+ .addExpr(MCSymbolRefExpr::create(ReportError, MCSymbolRefExpr::VK_PLT,
+ OutContext)),
+ STI);
+}
+
+void X86AsmPrinter::emitAsanMemaccessSymbols(Module &M) {
+ if (AsanMemaccessSymbols.empty())
+ return;
+
+ const Triple &TT = TM.getTargetTriple();
+ assert(TT.isOSBinFormatELF());
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
+ assert(STI && "Unable to create subtarget info");
+
+ for (auto &P : AsanMemaccessSymbols) {
+ MCSymbol *Sym = P.second;
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".text.hot", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Sym->getName(),
+ /*IsComdat=*/true));
+
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
+ OutStreamer->emitLabel(Sym);
+
+ unsigned Reg = std::get<0>(P.first);
+ ASanAccessInfo AccessInfo(std::get<1>(P.first));
+
+ if (AccessInfo.AccessSizeIndex < 3) {
+ emitAsanMemaccessPartial(M, Reg, AccessInfo, *STI);
+ } else {
+ emitAsanMemaccessFull(M, Reg, AccessInfo, *STI);
+ }
+ }
+}
+
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// PATCHABLE_OP minsize, opcode, operands
@@ -1477,7 +1721,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
OutStreamer->AddComment("# XRay Custom Event Log");
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1573,7 +1817,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
OutStreamer->AddComment("# XRay Typed Event Log");
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1675,7 +1919,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
// call <relative offset, 32-bits> // 5 bytes
//
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1705,7 +1949,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
//
// This just makes sure that the alignment for the next instruction is 2.
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
unsigned OpCode = MI.getOperand(0).getImm();
MCInst Ret;
@@ -1729,7 +1973,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
// tail call much like how we have it in PATCHABLE_RET.
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
@@ -2563,6 +2807,9 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
return;
+ case X86::ASAN_CHECK_MEMACCESS:
+ return LowerASAN_CHECK_MEMACCESS(*MI);
+
case X86::MORESTACK_RET_RESTORE_R10:
// Return, then restore R10.
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 46d2e2a66fd6..99d1a97380dd 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -102,8 +102,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// True if this function uses the red zone.
bool UsesRedZone = false;
- /// True if this function has WIN_ALLOCA instructions.
- bool HasWinAlloca = false;
+ /// True if this function has DYN_ALLOCA instructions.
+ bool HasDynAlloca = false;
/// True if this function has any preallocated calls.
bool HasPreallocatedCall = false;
@@ -113,6 +113,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// other tools to detect the extended record.
bool HasSwiftAsyncContext = false;
+ /// True if this function has tile virtual register. This is used to
+ /// determine if we should insert tilerelease in frame lowering.
+ bool HasVirtualTileReg = false;
+
Optional<int> SwiftAsyncContextFrameIdx;
ValueMap<const Value *, size_t> PreallocatedIds;
@@ -198,8 +202,8 @@ public:
bool getUsesRedZone() const { return UsesRedZone; }
void setUsesRedZone(bool V) { UsesRedZone = V; }
- bool hasWinAlloca() const { return HasWinAlloca; }
- void setHasWinAlloca(bool v) { HasWinAlloca = v; }
+ bool hasDynAlloca() const { return HasDynAlloca; }
+ void setHasDynAlloca(bool v) { HasDynAlloca = v; }
bool hasPreallocatedCall() const { return HasPreallocatedCall; }
void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
@@ -207,6 +211,9 @@ public:
bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
+ bool hasVirtualTileReg() const { return HasVirtualTileReg; }
+ void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
+
Optional<int> getSwiftAsyncContextFrameIdx() const {
return SwiftAsyncContextFrameIdx;
}
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index ab4d2bd05772..6967a96ce83b 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -503,9 +503,7 @@ bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
// Process all instructions in basic block.
- for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr &MI = *I++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
// Instruction must be load or store.
if (!MI.mayLoadOrStore())
continue;
@@ -655,9 +653,8 @@ bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// isReplaceable function.
Register FirstVReg = First.getOperand(0).getReg();
Register LastVReg = Last.getOperand(0).getReg();
- for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end();
- UI != UE;) {
- MachineOperand &MO = *UI++;
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_operands(LastVReg))) {
MachineInstr &MI = *MO.getParent();
if (MI.isDebugValue()) {
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index e10dab72078d..47ae517ae76d 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -174,12 +174,9 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
}
// Follow branches in BB and look for returns
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
- I != MBB->succ_end(); ++I) {
- if (*I == MBB)
- continue;
- findReturns(*I, Cycles);
- }
+ for (MachineBasicBlock *Succ : MBB->successors())
+ if (Succ != MBB)
+ findReturns(Succ, Cycles);
}
/// cyclesUntilReturn - return true if the MBB has a return instruction,
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 3844667ccc74..25fcba1a7581 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -142,8 +142,24 @@ def SkylakeServerPfmCounters : ProcPfmCounters {
def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
def : PfmCountersBinding<"cannonlake", SkylakeServerPfmCounters>;
-def : PfmCountersBinding<"icelake-client", SkylakeServerPfmCounters>;
-def : PfmCountersBinding<"icelake-server", SkylakeServerPfmCounters>;
+
+def IceLakePfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"ICXPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"ICXPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"ICXPort23", "uops_dispatched_port:port_2_3">,
+ PfmIssueCounter<"ICXPort49", "uops_dispatched_port:port_4_9">,
+ PfmIssueCounter<"ICXPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
+ ];
+}
+def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
+def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
+def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>;
+def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>;
// AMD X86 Counters.
// Set basic counters for AMD cpus that we know libpfm4 supports.
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index b85a0b61d6f6..5d21f8666ec6 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -25,6 +25,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -235,6 +236,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
const TargetInstrInfo *TII = ST.getInstrInfo();
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
BitVector AMXRegs(TRI->getNumRegs());
for (unsigned I = 0; I < RC->getNumRegs(); I++)
@@ -294,6 +296,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
// There's no AMX instruction if we didn't find a tile config live in point.
if (CfgNeedInsert.empty())
return false;
+ X86FI->setHasVirtualTileReg(true);
// Avoid to insert ldtilecfg before any shape defs.
SmallVector<MachineBasicBlock *, 8> WorkList;
@@ -323,7 +326,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);
// Try to insert for the tile config live in points.
- for (auto I : CfgNeedInsert) {
+ for (const auto &I : CfgNeedInsert) {
SmallSet<MIRef, 8> InsertPoints;
SmallVector<MIRef, 8> WorkList({I});
while (!WorkList.empty()) {
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index c4748423baea..130cb61cdde2 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -816,10 +816,10 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
return 0;
case TargetOpcode::PATCHABLE_RET:
case X86::RET:
- case X86::RETL:
- case X86::RETQ:
- case X86::RETIL:
- case X86::RETIQ:
+ case X86::RET32:
+ case X86::RET64:
+ case X86::RETI32:
+ case X86::RETI64:
case X86::TCRETURNdi:
case X86::TCRETURNri:
case X86::TCRETURNmi:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 1ab9d2588a90..d835f452b67e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -373,12 +373,6 @@ def CR15 : X86Reg<"cr15", 15>;
def EIZ : X86Reg<"eiz", 4>;
def RIZ : X86Reg<"riz", 4>;
-// Bound registers, used in MPX instructions
-def BND0 : X86Reg<"bnd0", 0>;
-def BND1 : X86Reg<"bnd1", 1>;
-def BND2 : X86Reg<"bnd2", 2>;
-def BND3 : X86Reg<"bnd3", 3>;
-
// CET registers - Shadow Stack Pointer
def SSP : X86Reg<"ssp", 0>;
@@ -436,6 +430,12 @@ def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+// GR64 - 64-bit GPRs without R8 and RIP. Could be used when emitting code for
+// intrinsics, which use implict input registers.
+def GR64NoR8 : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP)>;
+
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
@@ -567,9 +567,9 @@ def RSTi : RegisterOperand<RST, "printSTiRegOperand">;
// Generic vector registers: VR64 and VR128.
// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
-def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v8f16, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32)>;
-def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v16f16, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
// Status flags registers.
@@ -587,7 +587,7 @@ def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
}
// AVX-512 vector/mask registers.
-def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v32f16, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 31)>;
// Represents the lower 16 registers that have VEX/legacy encodable subregs.
@@ -599,10 +599,12 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
+def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)>;
+
// Extended VR128 and VR256 for AVX-512 instructions
-def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v8f16, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32X)>;
-def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v16f16, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 31)>;
// Mask registers
@@ -632,9 +634,6 @@ def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
-// Bound registers
-def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
-
// Tiles
let CopyCost = -1 in // Don't allow copying of tile registers
def TILE : RegisterClass<"X86", [x86amx], 8192,
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index d2ced1c67407..2827981b7fb0 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -112,6 +112,25 @@ multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
// 2/3/7 cycle to recompute the address.
def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
+// Loads, stores, and moves, not folded with other operations.
+// Store_addr on 237.
+// Store_data on 4.
+defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1], 1>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def : WriteRes<WriteZero, []>;
+
+// Model the effect of clobbering the read-write mask operand of the GATHER operation.
+// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
+defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
+
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteADC, [BWPort06], 1>; // Integer ALU + flags op.
@@ -123,41 +142,41 @@ defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteMULX32, [BWPort1,BWPort06,BWPort0156], 3, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 3, [1,1], 2>;
defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
-// TODO: Why isn't the BWDivider used consistently?
-defm : X86WriteRes<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10], 1>;
-defm : X86WriteRes<WriteDiv16, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
-defm : X86WriteRes<WriteDiv32, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
-defm : X86WriteRes<WriteDiv64, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
-defm : X86WriteRes<WriteDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-
-defm : X86WriteRes<WriteIDiv8, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv16, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv32, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv64, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteIDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteIDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteIDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+def BWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(BWWriteIMulH.Latency, BroadwellModel.LoadLatency);
+}
-defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
-defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>;
-defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
+// Integer shifts and rotates.
+defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
+defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
+defm : BWWriteResPair<WriteRotate, [BWPort06], 1, [1], 1>;
+defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
+
+defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
defm : BWWriteResPair<WriteCMOV, [BWPort06], 1>; // Conditional move.
defm : X86WriteRes<WriteFCMOV, [BWPort1], 3, [1], 1>; // x87 conditional move.
@@ -176,6 +195,11 @@ defm : X86WriteRes<WriteBitTestSet, [BWPort06], 1, [1], 1>; // Bit Test + S
defm : X86WriteRes<WriteBitTestSetImmLd, [BWPort06,BWPort23], 5, [1,1], 3>;
defm : X86WriteRes<WriteBitTestSetRegLd, [BWPort0156,BWPort23], 5, [1,1], 2>;
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [BWPort15]>;
+
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
defm : BWWriteResPair<WriteBSR, [BWPort1], 3>;
@@ -183,43 +207,29 @@ defm : BWWriteResPair<WriteLZCNT, [BWPort1], 3>;
defm : BWWriteResPair<WriteTZCNT, [BWPort1], 3>;
defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
-// Integer shifts and rotates.
-defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
-defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-defm : BWWriteResPair<WriteRotate, [BWPort06], 1, [1], 1>;
-defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-
-// SHLD/SHRD.
-defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
-defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
-defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
-defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
-
// BMI1 BEXTR/BLS, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
defm : BWWriteResPair<WriteBLS, [BWPort15], 1>;
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
-// Loads, stores, and moves, not folded with other operations.
-defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
-defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1], 1>;
-
-// Model the effect of clobbering the read-write mask operand of the GATHER operation.
-// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
-defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
-
-// Idioms that clear a register, like xorps %xmm0, %xmm0.
-// These can often bypass execution ports completely.
-def : WriteRes<WriteZero, []>;
-
-// Treat misc copies as a move.
-def : InstRW<[WriteMove], (instrs COPY)>;
-
-// Branches don't produce values, so they have no latency, but they still
-// consume resources. Indirect branches can fold loads.
-defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
+// TODO: Why isn't the BWDivider used consistently?
+defm : X86WriteRes<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10], 1>;
+defm : X86WriteRes<WriteDiv16, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteIDiv8, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv32, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv64, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
// Floating point. This covers both scalar and vector operations.
defm : X86WriteRes<WriteFLD0, [BWPort01], 1, [1], 1>;
@@ -245,6 +255,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
@@ -285,6 +296,16 @@ defm : BWWriteResPair<WriteFDiv64X, [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; //
defm : BWWriteResPair<WriteFDiv64Y, [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (YMM).
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
+defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
+defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+
+defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
+defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
+defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+
defm : X86WriteRes<WriteFSqrt, [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root.
defm : X86WriteRes<WriteFSqrtLd, [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>;
defm : BWWriteResPair<WriteFSqrtX, [BWPort0,BWFPDivider], 11, [1,7], 1, 5>; // Floating point square root (XMM).
@@ -297,16 +318,6 @@ defm : BWWriteResPair<WriteFSqrt64Y, [BWPort0,BWPort015,BWFPDivider], 29, [2,1,2
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : BWWriteResPair<WriteFSqrt80, [BWPort0,BWFPDivider], 23, [1,9]>; // Floating point long double square root.
-defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
-defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
-defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
-defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-
-defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
-defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
-defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
-defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
@@ -336,6 +347,8 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
@@ -343,6 +356,48 @@ defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+// Conversion between integer and float.
+defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
@@ -366,12 +421,6 @@ defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [BWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [BWPort5], 1, [1], 1>;
-defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
-
-defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
-defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
-defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
@@ -379,6 +428,10 @@ defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : BWWriteResPair<WriteVecTest, [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions.
defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulX, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
@@ -397,6 +450,9 @@ defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : BWWriteResPair<WriteBlend, [BWPort5], 1, [1], 1, 5>; // Vector blends.
defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteVPMOV256, [BWPort5], 3, [1], 1, 6>; // 256-bit width packed vector width-changing move.
+defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
@@ -444,49 +500,7 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
let NumMicroOps = 3;
}
-// Conversion between integer and float.
-defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-
-defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
-defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>;
-defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
-
-defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-
-defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
-
-defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
-defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
-defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
-defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
-
-// Strings instructions.
+// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [BWPort0]> {
@@ -542,7 +556,7 @@ def : WriteRes<WriteVecMOVMSK, [BWPort0]> { let Latency = 3; }
def : WriteRes<WriteVecMOVMSKY, [BWPort0]> { let Latency = 3; }
def : WriteRes<WriteMMXMOVMSK, [BWPort0]> { let Latency = 1; }
-// AES instructions.
+// AES Instructions.
def : WriteRes<WriteAESDecEnc, [BWPort5]> { // Decryption, encryption.
let Latency = 7;
let NumMicroOps = 1;
@@ -578,27 +592,19 @@ def : WriteRes<WriteAESKeyGenLd, [BWPort0, BWPort5, BWPort23, BWPort015]> {
// Carry-less multiplication instructions.
defm : BWWriteResPair<WriteCLMul, [BWPort0], 5>;
+// Load/store MXCSR.
+def : WriteRes<WriteLDMXCSR, [BWPort0,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [BWPort4,BWPort5,BWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
// Catch-all for expensive system instructions.
-def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
-
-// AVX2.
-defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector shuffles.
-defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector variable shuffles.
-defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector shuffles.
-defm : BWWriteResPair<WriteVPMOV256, [BWPort5], 3, [1], 1, 6>; // 256-bit width packed vector width-changing move.
-defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector variable shuffles.
+def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; }
// Old microcoded instructions that nobody use.
-def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
+def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; }
// Fence instructions.
def : WriteRes<WriteFence, [BWPort23, BWPort4]>;
-// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [BWPort0,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [BWPort4,BWPort5,BWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
@@ -1104,7 +1110,7 @@ def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup84], (instrs LRETQ, RETQ)>;
+def: InstRW<[BWWriteResGroup84], (instrs LRET64, RET64)>;
def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 99fddcd4b2d5..68961d6245ab 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -117,12 +117,16 @@ multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
// 2/3/7 cycle to recompute the address.
def : WriteRes<WriteRMW, [HWPort237,HWPort4]>;
+// Loads, stores, and moves, not folded with other operations.
// Store_addr on 237.
// Store_data on 4.
defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
def : WriteRes<WriteZero, []>;
// Model the effect of clobbering the read-write mask operand of the GATHER operation.
@@ -140,12 +144,17 @@ defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteMULX32, [HWPort1,HWPort06,HWPort0156], 3, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 3, [1,1], 2>;
defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def HWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(HWWriteIMulH.Latency, HaswellModel.LoadLatency);
+}
defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
@@ -165,11 +174,15 @@ defm : X86WriteRes<WriteSHDrrcl,[HWPort1, HWPort06, HWPort0156], 6, [1, 1, 2], 4
defm : X86WriteRes<WriteSHDmri, [HWPort1, HWPort23, HWPort237, HWPort0156], 10, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[HWPort1, HWPort23, HWPort237, HWPort06, HWPort0156], 12, [1, 1, 1, 1, 2], 6>;
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
+
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
defm : HWWriteResPair<WriteCMOV, [HWPort06,HWPort0156], 2, [1,1], 2>; // Conditional move.
defm : X86WriteRes<WriteFCMOV, [HWPort1], 3, [1], 1>; // x87 conditional move.
+
def : WriteRes<WriteSETCC, [HWPort06]>; // Setcc.
def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let Latency = 2;
@@ -220,7 +233,7 @@ defm : X86WriteRes<WriteIDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>
defm : X86WriteRes<WriteIDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
defm : X86WriteRes<WriteIDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
-// Scalar and vector floating point.
+// Floating point. This covers both scalar and vector operations.
defm : X86WriteRes<WriteFLD0, [HWPort01], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [HWPort01], 1, [2], 2>;
defm : X86WriteRes<WriteFLDC, [HWPort01], 1, [2], 2>;
@@ -305,14 +318,14 @@ defm : HWWriteResPair<WriteFSqrt64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28
defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
-defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
-defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
-defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
-defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
-defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
-defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
-defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
-defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1
+defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
+defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
+defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
+defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
+defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
+defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
+defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
@@ -593,11 +606,28 @@ def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
def : WriteRes<WriteLDMXCSR, [HWPort0,HWPort23,HWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
def : WriteRes<WriteSTMXCSR, [HWPort4,HWPort5,HWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+// Catch-all for expensive system instructions.
def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
+
+// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+
+// Fence instructions.
def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
+
+// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
+defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
+defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 5>;
+defm : HWWriteResPair<WritePHAddX, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
+
//================ Exceptions ================//
//-- Specific Scheduling Models --//
@@ -680,7 +710,7 @@ def HWWriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
let NumMicroOps = 4;
let ResourceCycles = [1, 2, 1];
}
-def : InstRW<[HWWriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>;
+def : InstRW<[HWWriteRETI], (instregex "RETI(16|32|64)", "LRETI(16|32|64)")>;
// BOUND.
// r,m.
@@ -821,16 +851,6 @@ def HWWriteFXTRACT : SchedWriteRes<[]> {
}
def : InstRW<[HWWriteFXTRACT], (instrs FXTRACT)>;
-////////////////////////////////////////////////////////////////////////////////
-// Horizontal add/sub instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
-defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
-defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 5>;
-defm : HWWriteResPair<WritePHAddX, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
-defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
-
//=== Floating Point XMM and YMM Instructions ===//
// Remaining instrs.
@@ -1168,7 +1188,7 @@ def HWWriteResGroup41 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup41], (instrs LRETQ, RETL, RETQ)>;
+def: InstRW<[HWWriteResGroup41], (instrs LRET64, RET32, RET64)>;
def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> {
let Latency = 3;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
new file mode 100644
index 000000000000..889b9b7fa666
--- /dev/null
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -0,0 +1,2636 @@
+//=- X86SchedIceLake.td - X86 Ice Lake Scheduling ------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Ice Lake to support
+// instruction scheduling and other instruction cost heuristics.
+//
+// TODO: This is mainly a copy X86SchedSkylakeServer.td, but allows us to
+// iteratively improve scheduling handling toward better modelling the
+// Ice Lake (Sunny/Cypress Cove) microarchitecture.
+//
+//===----------------------------------------------------------------------===//
+
+def IceLakeModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and Ice Lake can
+ // decode 6 instructions per cycle.
+ let IssueWidth = 6;
+ let MicroOpBufferSize = 224; // Based on the reorder buffer.
+ let LoadLatency = 5;
+ let MispredictPenalty = 14;
+
+ // Based on the LSD (loop-stream detector) queue size and benchmarking data.
+ let LoopMicroOpBufferSize = 50;
+
+ // This flag is set to allow the scheduler to assign a default model to
+ // unrecognized opcodes.
+ let CompleteModel = 0;
+}
+
+let SchedModel = IceLakeModel in {
+
+// Ice Lake can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, and 6 handle all computation.
+// Ports 4 and 9 gets the data half of stores. Store data can be available later
+// than the store address, but since we don't model the latency of stores, we
+// can ignore that.
+// Ports 2 and 3 are identical. They handle loads and address calculations.
+// Ports 7 and 8 are identical. They handle stores address calculations.
+def ICXPort0 : ProcResource<1>;
+def ICXPort1 : ProcResource<1>;
+def ICXPort2 : ProcResource<1>;
+def ICXPort3 : ProcResource<1>;
+def ICXPort4 : ProcResource<1>;
+def ICXPort5 : ProcResource<1>;
+def ICXPort6 : ProcResource<1>;
+def ICXPort7 : ProcResource<1>;
+def ICXPort8 : ProcResource<1>;
+def ICXPort9 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>;
+def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>;
+def ICXPort237 : ProcResGroup<[ICXPort2, ICXPort3, ICXPort7]>;
+def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>;
+def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>;
+def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>;
+def ICXPort15 : ProcResGroup<[ICXPort1, ICXPort5]>;
+def ICXPort16 : ProcResGroup<[ICXPort1, ICXPort6]>;
+def ICXPort49 : ProcResGroup<[ICXPort4, ICXPort9]>;
+def ICXPort56 : ProcResGroup<[ICXPort5, ICXPort6]>;
+def ICXPort78 : ProcResGroup<[ICXPort7, ICXPort8]>;
+def ICXPort015 : ProcResGroup<[ICXPort0, ICXPort1, ICXPort5]>;
+def ICXPort056 : ProcResGroup<[ICXPort0, ICXPort5, ICXPort6]>;
+def ICXPort0156: ProcResGroup<[ICXPort0, ICXPort1, ICXPort5, ICXPort6]>;
+
+def ICXDivider : ProcResource<1>; // Integer division issued on port 0.
+// FP division and sqrt on port 0.
+def ICXFPDivider : ProcResource<1>;
+
+// 60 Entry Unified Scheduler
+def ICXPortAny : ProcResGroup<[ICXPort0, ICXPort1, ICXPort2, ICXPort3, ICXPort4,
+ ICXPort5, ICXPort6, ICXPort7, ICXPort8, ICXPort9]> {
+ let BufferSize=60;
+}
+
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 5>;
+
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 5).
+ def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> {
+ let Latency = !add(Lat, LoadLat);
+ let ResourceCycles = !listconcat([1], Res);
+ let NumMicroOps = !add(UOps, 1);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, and an extra port
+// 2/3/7 cycle to recompute the address.
+def : WriteRes<WriteRMW, [ICXPort237,ICXPort4]>;
+
+// Arithmetic.
+defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op.
+defm : ICXWriteResPair<WriteADC, [ICXPort06], 1>; // Integer ALU + flags op.
+
+// Integer multiplication.
+defm : ICXWriteResPair<WriteIMul8, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul16, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [ICXPort1,ICXPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteIMul16Reg, [ICXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteIMul16RegLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
+defm : ICXWriteResPair<WriteIMul32, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,1], 3>;
+defm : ICXWriteResPair<WriteMULX32, [ICXPort1,ICXPort06,ICXPort0156], 3, [1,1,1], 3>;
+defm : ICXWriteResPair<WriteIMul32Imm, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul32Reg, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul64, [ICXPort1,ICXPort5], 4, [1,1], 2>;
+defm : ICXWriteResPair<WriteMULX64, [ICXPort1,ICXPort5], 3, [1,1], 2>;
+defm : ICXWriteResPair<WriteIMul64Imm, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul64Reg, [ICXPort1], 3>;
+def ICXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(ICXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
+}
+
+defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort237,ICXPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the ICXDivider used?
+defm : ICXWriteResPair<WriteDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+
+defm : ICXWriteResPair<WriteCRC32, [ICXPort1], 3>;
+
+def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads.
+
+defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move.
+defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move.
+def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc.
+def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort4,ICXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+defm : X86WriteRes<WriteLAHFSAHF, [ICXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [ICXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [ICXPort06,ICXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [ICXPort0156,ICXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [ICXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [ICXPort06,ICXPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [ICXPort0156,ICXPort23], 5, [1,1], 2>;
+
+// Integer shifts and rotates.
+defm : ICXWriteResPair<WriteShift, [ICXPort06], 1>;
+defm : ICXWriteResPair<WriteShiftCL, [ICXPort06], 3, [3], 3>;
+defm : ICXWriteResPair<WriteRotate, [ICXPort06], 1, [1], 1>;
+defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort237,ICXPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>;
+
+// Bit counts.
+defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteBSR, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteLZCNT, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteTZCNT, [ICXPort1], 3>;
+defm : ICXWriteResPair<WritePOPCNT, [ICXPort1], 3>;
+
+// BMI1 BEXTR/BLS, BMI2 BZHI
+defm : ICXWriteResPair<WriteBEXTR, [ICXPort06,ICXPort15], 2, [1,1], 2>;
+defm : ICXWriteResPair<WriteBLS, [ICXPort15], 1>;
+defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>;
+
+// Loads, stores, and moves, not folded with other operations.
+defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [ICXPort237, ICXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [ICXPort237, ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>;
+
+// Model the effect of clobbering the read-write mask operand of the GATHER operation.
+// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
+defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def : WriteRes<WriteZero, []>;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm : ICXWriteResPair<WriteJump, [ICXPort06], 1>;
+
+// Floating point. This covers both scalar and vector operations.
+defm : X86WriteRes<WriteFLD0, [ICXPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteFLD1, [ICXPort05], 1, [2], 2>;
+defm : X86WriteRes<WriteFLDC, [ICXPort05], 1, [2], 2>;
+defm : X86WriteRes<WriteFLoad, [ICXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteFStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>;
+
+defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub.
+defm : ICXWriteResPair<WriteFAddX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFAddY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFAddZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFAdd64, [ICXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
+defm : ICXWriteResPair<WriteFAdd64X, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFAdd64Y, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFAdd64Z, [ICXPort05], 4, [1], 1, 7>;
+
+defm : ICXWriteResPair<WriteFCmp, [ICXPort01], 4, [1], 1, 5>; // Floating point compare.
+defm : ICXWriteResPair<WriteFCmpX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFCmpY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFCmpZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFCmp64, [ICXPort01], 4, [1], 1, 5>; // Floating point double compare.
+defm : ICXWriteResPair<WriteFCmp64X, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFCmp64Y, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFCmp64Z, [ICXPort05], 4, [1], 1, 7>;
+
+defm : ICXWriteResPair<WriteFCom, [ICXPort0], 2>; // Floating point compare to flags (X87).
+defm : ICXWriteResPair<WriteFComX, [ICXPort0], 2>; // Floating point compare to flags (SSE).
+
+defm : ICXWriteResPair<WriteFMul, [ICXPort01], 4, [1], 1, 5>; // Floating point multiplication.
+defm : ICXWriteResPair<WriteFMulX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFMulY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMulZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMul64, [ICXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
+defm : ICXWriteResPair<WriteFMul64X, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMul64Z, [ICXPort05], 4, [1], 1, 7>;
+
+defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
+defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
+defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
+//defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles.
+//defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles.
+defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
+
+defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
+defm : ICXWriteResPair<WriteFSqrtX, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 6>;
+defm : ICXWriteResPair<WriteFSqrtY, [ICXPort0,ICXFPDivider], 12, [1,6], 1, 7>;
+defm : ICXWriteResPair<WriteFSqrtZ, [ICXPort0,ICXPort5,ICXFPDivider], 20, [2,1,12], 3, 7>;
+defm : ICXWriteResPair<WriteFSqrt64, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
+defm : ICXWriteResPair<WriteFSqrt64X, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 6>;
+defm : ICXWriteResPair<WriteFSqrt64Y, [ICXPort0,ICXFPDivider], 18, [1,12],1, 7>;
+defm : ICXWriteResPair<WriteFSqrt64Z, [ICXPort0,ICXPort5,ICXFPDivider], 32, [2,1,24], 3, 7>;
+defm : ICXWriteResPair<WriteFSqrt80, [ICXPort0,ICXFPDivider], 21, [1,7]>; // Floating point long double square root.
+
+defm : ICXWriteResPair<WriteFRcp, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
+defm : ICXWriteResPair<WriteFRcpX, [ICXPort0], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFRcpY, [ICXPort0], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFRcpZ, [ICXPort0,ICXPort5], 4, [2,1], 3, 7>;
+
+defm : ICXWriteResPair<WriteFRsqrt, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
+defm : ICXWriteResPair<WriteFRsqrtX,[ICXPort0], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFRsqrtY,[ICXPort0], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>;
+
+defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
+defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMAZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
+defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>;
+defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
+defm : ICXWriteResPair<WriteDPPSZ,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
+defm : ICXWriteResPair<WriteFSign, [ICXPort0], 1>; // Floating point fabs/fchs.
+defm : ICXWriteResPair<WriteFRnd, [ICXPort01], 8, [2], 2, 6>; // Floating point rounding.
+defm : ICXWriteResPair<WriteFRndY, [ICXPort01], 8, [2], 2, 7>;
+defm : ICXWriteResPair<WriteFRndZ, [ICXPort05], 8, [2], 2, 7>;
+defm : ICXWriteResPair<WriteFLogic, [ICXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : ICXWriteResPair<WriteFLogicY, [ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
+defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
+defm : ICXWriteResPair<WriteFShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
+defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends.
+defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarBlend, [ICXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
+defm : ICXWriteResPair<WriteFVarBlendY,[ICXPort015], 2, [2], 2, 7>;
+defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>;
+
+// FMA Scheduling helper class.
+// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Vector integer operations.
+defm : X86WriteRes<WriteVecLoad, [ICXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadX, [ICXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadY, [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [ICXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>;
+
+defm : ICXWriteResPair<WriteVecALU, [ICXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : ICXWriteResPair<WriteVecALUX, [ICXPort01], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVecALUY, [ICXPort01], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecALUZ, [ICXPort0], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecLogic, [ICXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : ICXWriteResPair<WriteVecLogicX,[ICXPort015], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVecLogicY,[ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecLogicZ,[ICXPort05], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecTest, [ICXPort0,ICXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
+defm : ICXWriteResPair<WriteVecTestY, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
+defm : ICXWriteResPair<WriteVecTestZ, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
+defm : ICXWriteResPair<WriteVecIMul, [ICXPort0], 5, [1], 1, 5>; // Vector integer multiply.
+defm : ICXWriteResPair<WriteVecIMulX, [ICXPort01], 5, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVecIMulY, [ICXPort01], 5, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>;
+defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD.
+defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>;
+defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>;
+defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : ICXWriteResPair<WriteShuffleX, [ICXPort5], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort5], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteBlend, [ICXPort5], 1, [1], 1, 6>; // Vector blends.
+defm : ICXWriteResPair<WriteBlendY,[ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteBlendZ,[ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarBlend, [ICXPort015], 2, [2], 2, 6>; // Vector variable blends.
+defm : ICXWriteResPair<WriteVarBlendY,[ICXPort015], 2, [2], 2, 6>;
+defm : ICXWriteResPair<WriteVarBlendZ,[ICXPort05], 2, [1], 1, 6>;
+defm : ICXWriteResPair<WriteMPSAD, [ICXPort5], 4, [2], 2, 6>; // Vector MPSAD.
+defm : ICXWriteResPair<WriteMPSADY, [ICXPort5], 4, [2], 2, 7>;
+defm : ICXWriteResPair<WriteMPSADZ, [ICXPort5], 4, [2], 2, 7>;
+defm : ICXWriteResPair<WritePSADBW, [ICXPort5], 3, [1], 1, 5>; // Vector PSADBW.
+defm : ICXWriteResPair<WritePSADBWX, [ICXPort5], 3, [1], 1, 6>;
+defm : ICXWriteResPair<WritePSADBWY, [ICXPort5], 3, [1], 1, 7>;
+defm : ICXWriteResPair<WritePSADBWZ, [ICXPort5], 3, [1], 1, 7>;
+defm : ICXWriteResPair<WritePHMINPOS, [ICXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
+
+// Vector integer shifts.
+defm : ICXWriteResPair<WriteVecShift, [ICXPort0], 1, [1], 1, 5>;
+defm : X86WriteRes<WriteVecShiftX, [ICXPort5,ICXPort01], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftY, [ICXPort5,ICXPort01], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZ, [ICXPort5,ICXPort0], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftXLd, [ICXPort01,ICXPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [ICXPort01,ICXPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZLd, [ICXPort0,ICXPort23], 8, [1,1], 2>;
+
+defm : ICXWriteResPair<WriteVecShiftImm, [ICXPort0], 1, [1], 1, 5>;
+defm : ICXWriteResPair<WriteVecShiftImmX, [ICXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
+defm : ICXWriteResPair<WriteVecShiftImmY, [ICXPort01], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecShiftImmZ, [ICXPort0], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarVecShift, [ICXPort01], 1, [1], 1, 6>; // Variable vector shifts.
+defm : ICXWriteResPair<WriteVarVecShiftY, [ICXPort01], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>;
+
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [ICXPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVecInsertLd, [ICXPort5,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
+
+def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// Conversion between integer and float.
+defm : ICXWriteResPair<WriteCvtSS2I, [ICXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
+defm : ICXWriteResPair<WriteCvtPS2I, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPS2IY, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPS2IZ, [ICXPort05], 3>;
+defm : ICXWriteResPair<WriteCvtSD2I, [ICXPort01], 6, [2], 2>;
+defm : ICXWriteResPair<WriteCvtPD2I, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPD2IY, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPD2IZ, [ICXPort05], 3>;
+
+defm : ICXWriteResPair<WriteCvtI2SS, [ICXPort1], 4>;
+defm : ICXWriteResPair<WriteCvtI2PS, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PSY, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PSZ, [ICXPort05], 4>; // Needs more work: DD vs DQ.
+defm : ICXWriteResPair<WriteCvtI2SD, [ICXPort1], 4>;
+defm : ICXWriteResPair<WriteCvtI2PD, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PDY, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PDZ, [ICXPort05], 4>;
+
+defm : ICXWriteResPair<WriteCvtSS2SD, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPS2PD, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPS2PDY, [ICXPort5,ICXPort01], 3, [1,1], 2>;
+defm : ICXWriteResPair<WriteCvtPS2PDZ, [ICXPort05], 3, [2], 2>;
+defm : ICXWriteResPair<WriteCvtSD2SS, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPD2PS, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPD2PSY, [ICXPort5,ICXPort01], 3, [1,1], 2>;
+defm : ICXWriteResPair<WriteCvtPD2PSZ, [ICXPort05], 3, [2], 2>;
+
+defm : X86WriteRes<WriteCvtPH2PS, [ICXPort5,ICXPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [ICXPort23,ICXPort01], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [ICXPort23,ICXPort01], 10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 8, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort05], 8, [1,1,1,1], 4>;
+
+// Strings instructions.
+
+// Packed Compare Implicit Length Strings, Return Mask
+def : WriteRes<WritePCmpIStrM, [ICXPort0]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> {
+ let Latency = 16;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3,1];
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 9;
+ let ResourceCycles = [4,3,1,1];
+}
+def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 10;
+ let ResourceCycles = [4,3,1,1,1];
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [ICXPort0]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> {
+ let Latency = 16;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3,1];
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> {
+ let Latency = 18;
+ let NumMicroOps = 8;
+ let ResourceCycles = [4,3,1];
+}
+def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> {
+ let Latency = 24;
+ let NumMicroOps = 9;
+ let ResourceCycles = [4,3,1,1];
+}
+
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [ICXPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSK, [ICXPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSKY, [ICXPort0]> { let Latency = 2; }
+def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; }
+
+// AES instructions.
+def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption.
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+
+def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn.
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+
+def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation.
+ let Latency = 20;
+ let NumMicroOps = 11;
+ let ResourceCycles = [3,6,2];
+}
+def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 25;
+ let NumMicroOps = 11;
+ let ResourceCycles = [3,6,1,1];
+}
+
+// Carry-less multiplication instructions.
+def : WriteRes<WriteCLMul, [ICXPort5]> {
+ let Latency = 6;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+
+// Catch-all for expensive system instructions.
+def : WriteRes<WriteSystem, [ICXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
+
+// AVX2.
+defm : ICXWriteResPair<WriteFShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
+defm : ICXWriteResPair<WriteFVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
+defm : ICXWriteResPair<WriteShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
+defm : ICXWriteResPair<WriteVPMOV256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move.
+defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
+
+// Old microcoded instructions that nobody use.
+def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
+
+// Fence instructions.
+def : WriteRes<WriteFence, [ICXPort23, ICXPort4]>;
+
+// Load/store MXCSR.
+def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [ICXPort4,ICXPort5,ICXPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+
+// Nop, not very useful expect it provides a model for nops!
+def : WriteRes<WriteNop, []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>;
+defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>;
+defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>;
+defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>;
+defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
+
+// Remaining instrs.
+
+def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
+ "KANDN(B|D|Q|W)rr",
+ "KMOV(B|D|Q|W)kk",
+ "KNOT(B|D|Q|W)rr",
+ "KOR(B|D|Q|W)rr",
+ "KXNOR(B|D|Q|W)rr",
+ "KXOR(B|D|Q|W)rr",
+ "KSET0(B|D|Q|W)", // Same as KXOR
+ "KSET1(B|D|Q|W)", // Same as KXNOR
+ "MMX_PADDS(B|W)irr",
+ "MMX_PADDUS(B|W)irr",
+ "MMX_PAVG(B|W)irr",
+ "MMX_PCMPEQ(B|D|W)irr",
+ "MMX_PCMPGT(B|D|W)irr",
+ "MMX_P(MAX|MIN)SWirr",
+ "MMX_P(MAX|MIN)UBirr",
+ "MMX_PSUBS(B|W)irr",
+ "MMX_PSUBUS(B|W)irr",
+ "VPMOVB2M(Z|Z128|Z256)rr",
+ "VPMOVD2M(Z|Z128|Z256)rr",
+ "VPMOVQ2M(Z|Z128|Z256)rr",
+ "VPMOVW2M(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
+ "KMOV(B|D|Q|W)kr",
+ "UCOM_F(P?)r")>;
+
+def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
+
+def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>;
+
+def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
+
+def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
+
+def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
+ "VBLENDMPS(Z128|Z256)rr",
+ "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
+ "(V?)PADD(B|D|Q|W)rr",
+ "VPBLENDD(Y?)rri",
+ "VPBLENDMB(Z128|Z256)rr",
+ "VPBLENDMD(Z128|Z256)rr",
+ "VPBLENDMQ(Z128|Z256)rr",
+ "VPBLENDMW(Z128|Z256)rr",
+ "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
+ "VPTERNLOGD(Z|Z128|Z256)rri",
+ "VPTERNLOGQ(Z|Z128|Z256)rri")>;
+
+def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup10], (instrs CBW, CWDE, CDQE,
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
+
+def ICXWriteResGroup11 : SchedWriteRes<[ICXPort4,ICXPort237]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
+ "ST_FP(32|64|80)m")>;
+
+def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
+
+def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP,
+ MMX_MOVDQ2Qrr)>;
+
+def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup17], (instrs LFENCE,
+ WAIT,
+ XGETBV)>;
+
+def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>;
+
+def ICXWriteResGroup21 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>;
+
+def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup23], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8,
+ ADC16i16, SBB16i16,
+ ADC32i32, SBB32i32,
+ ADC64i32, SBB64i32)>;
+
+def ICXWriteResGroup25 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>;
+
+def ICXWriteResGroup27 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
+
+def ICXWriteResGroup28 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
+ STOSB, STOSL, STOSQ, STOSW)>;
+def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
+
+def ICXWriteResGroup29 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2,2,1];
+}
+def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
+
+def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
+ "KORTEST(B|D|Q|W)rr",
+ "KTEST(B|D|Q|W)rr")>;
+
+def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr",
+ "PEXT(32|64)rr")>;
+
+def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
+def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
+ "VALIGND(Z|Z128|Z256)rri",
+ "VALIGNQ(Z|Z128|Z256)rri",
+ "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
+ "VPBROADCAST(B|W)rr",
+ "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
+ "KSHIFTL(B|D|Q|W)ri",
+ "KSHIFTR(B|D|Q|W)ri",
+ "KUNPCK(BW|DQ|WD)rr",
+ "VCMPPD(Z|Z128|Z256)rri",
+ "VCMPPS(Z|Z128|Z256)rri",
+ "VCMP(SD|SS)Zrr",
+ "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
+ "VFPCLASS(SD|SS)Zrr",
+ "VPCMPB(Z|Z128|Z256)rri",
+ "VPCMPD(Z|Z128|Z256)rri",
+ "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
+ "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
+ "VPCMPQ(Z|Z128|Z256)rri",
+ "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
+ "VPCMPW(Z|Z128|Z256)rri",
+ "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>;
+
+def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
+
+def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
+
+def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
+
+def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>;
+
+def ICXWriteResGroup43 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>;
+
+def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
+
+def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>;
+
+def ICXWriteResGroup47 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
+
+def ICXWriteResGroup48 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>;
+
+def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
+
+def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr",
+ "(V?)CVTDQ2PSrr",
+ "VCVTPD2QQ(Z128|Z256)rr",
+ "VCVTPD2UQQ(Z128|Z256)rr",
+ "VCVTPS2DQ(Y|Z128|Z256)rr",
+ "(V?)CVTPS2DQrr",
+ "VCVTPS2UDQ(Z128|Z256)rr",
+ "VCVTQQ2PD(Z128|Z256)rr",
+ "VCVTTPD2QQ(Z128|Z256)rr",
+ "VCVTTPD2UQQ(Z128|Z256)rr",
+ "VCVTTPS2DQ(Z128|Z256)rr",
+ "(V?)CVTTPS2DQrr",
+ "VCVTTPS2UDQ(Z128|Z256)rr",
+ "VCVTUDQ2PS(Z128|Z256)rr",
+ "VCVTUQQ2PD(Z128|Z256)rr")>;
+
+def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup50z], (instrs VCVTDQ2PSZrr,
+ VCVTPD2QQZrr,
+ VCVTPD2UQQZrr,
+ VCVTPS2DQZrr,
+ VCVTPS2UDQZrr,
+ VCVTQQ2PDZrr,
+ VCVTTPD2QQZrr,
+ VCVTTPD2UQQZrr,
+ VCVTTPS2DQZrr,
+ VCVTTPS2UDQZrr,
+ VCVTUDQ2PSZrr,
+ VCVTUQQ2PDZrr)>;
+
+def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
+ "VEXPANDPS(Z|Z128|Z256)rr",
+ "VPEXPANDD(Z|Z128|Z256)rr",
+ "VPEXPANDQ(Z|Z128|Z256)rr",
+ "VPMOVDB(Z|Z128|Z256)rr",
+ "VPMOVDW(Z|Z128|Z256)rr",
+ "VPMOVQB(Z|Z128|Z256)rr",
+ "VPMOVQW(Z|Z128|Z256)rr",
+ "VPMOVSDB(Z|Z128|Z256)rr",
+ "VPMOVSDW(Z|Z128|Z256)rr",
+ "VPMOVSQB(Z|Z128|Z256)rr",
+ "VPMOVSQD(Z|Z128|Z256)rr",
+ "VPMOVSQW(Z|Z128|Z256)rr",
+ "VPMOVSWB(Z|Z128|Z256)rr",
+ "VPMOVUSDB(Z|Z128|Z256)rr",
+ "VPMOVUSDW(Z|Z128|Z256)rr",
+ "VPMOVUSQB(Z|Z128|Z256)rr",
+ "VPMOVUSQD(Z|Z128|Z256)rr",
+ "VPMOVUSWB(Z|Z128|Z256)rr",
+ "VPMOVWB(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup54 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
+ "IST_F(16|32)m",
+ "VPMOVQD(Z|Z128|Z256)mr(b?)")>;
+
+def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>;
+
+def ICXWriteResGroup56 : SchedWriteRes<[]> {
+ let Latency = 0;
+ let NumMicroOps = 4;
+ let ResourceCycles = [];
+}
+def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>;
+
+def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
+
+def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
+ "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71?
+
+def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr",
+ "MMX_CVT(T?)PS2PIirr",
+ "VCVTDQ2PDZ128rr",
+ "VCVTPD2DQZ128rr",
+ "(V?)CVT(T?)PD2DQrr",
+ "VCVTPD2PSZ128rr",
+ "(V?)CVTPD2PSrr",
+ "VCVTPD2UDQZ128rr",
+ "VCVTPS2PDZ128rr",
+ "(V?)CVTPS2PDrr",
+ "VCVTPS2QQZ128rr",
+ "VCVTPS2UQQZ128rr",
+ "VCVTQQ2PSZ128rr",
+ "(V?)CVTSD2SS(Z?)rr",
+ "(V?)CVTSI(64)?2SDrr",
+ "VCVTSI2SSZrr",
+ "(V?)CVTSI2SSrr",
+ "VCVTSI(64)?2SDZrr",
+ "VCVTSS2SDZrr",
+ "(V?)CVTSS2SDrr",
+ "VCVTTPD2DQZ128rr",
+ "VCVTTPD2UDQZ128rr",
+ "VCVTTPS2QQZ128rr",
+ "VCVTTPS2UQQZ128rr",
+ "VCVTUDQ2PDZ128rr",
+ "VCVTUQQ2PSZ128rr",
+ "VCVTUSI2SSZrr",
+ "VCVTUSI(64)?2SDZrr")>;
+
+def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
+
+def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>;
+
+def ICXWriteResGroup65 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
+ "VCVTPS2PHZ256mr(b?)",
+ "VCVTPS2PHZmr(b?)")>;
+
+def ICXWriteResGroup66 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
+ "VPMOVDW(Z|Z128|Z256)mr(b?)",
+ "VPMOVQB(Z|Z128|Z256)mr(b?)",
+ "VPMOVQW(Z|Z128|Z256)mr(b?)",
+ "VPMOVSDB(Z|Z128|Z256)mr(b?)",
+ "VPMOVSDW(Z|Z128|Z256)mr(b?)",
+ "VPMOVSQB(Z|Z128|Z256)mr(b?)",
+ "VPMOVSQD(Z|Z128|Z256)mr(b?)",
+ "VPMOVSQW(Z|Z128|Z256)mr(b?)",
+ "VPMOVSWB(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSDB(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSDW(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSQB(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSQD(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSQW(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSWB(Z|Z128|Z256)mr(b?)",
+ "VPMOVWB(Z|Z128|Z256)mr(b?)")>;
+
+def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,4];
+}
+def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>;
+
+def ICXWriteResGroup69 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,4];
+}
+def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>;
+
+def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm,
+ VMOVSHDUPrm,
+ VMOVSLDUPrm,
+ MOVSHDUPrm,
+ MOVSLDUPrm)>;
+
+def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
+ "VCOMPRESSPS(Z|Z128|Z256)rr",
+ "VPCOMPRESSD(Z|Z128|Z256)rr",
+ "VPCOMPRESSQ(Z|Z128|Z256)rr",
+ "VPERMW(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBirm,
+ MMX_PADDSWirm,
+ MMX_PADDUSBirm,
+ MMX_PADDUSWirm,
+ MMX_PAVGBirm,
+ MMX_PAVGWirm,
+ MMX_PCMPEQBirm,
+ MMX_PCMPEQDirm,
+ MMX_PCMPEQWirm,
+ MMX_PCMPGTBirm,
+ MMX_PCMPGTDirm,
+ MMX_PCMPGTWirm,
+ MMX_PMAXSWirm,
+ MMX_PMAXUBirm,
+ MMX_PMINSWirm,
+ MMX_PMINUBirm,
+ MMX_PSUBSBirm,
+ MMX_PSUBSWirm,
+ MMX_PSUBUSBirm,
+ MMX_PSUBUSWirm)>;
+
+def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>;
+def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
+
+def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm",
+ "MOVBE(16|32|64)rm")>;
+
+def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
+def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
+
+def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
+def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
+
+def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
+ "VCVTSI642SSZrr",
+ "VCVTUSI642SSZrr")>;
+
+def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
+
+def ICXWriteResGroup86 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
+
+def ICXWriteResGroup87 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm",
+ "PUSH(16|32|64)rmm")>;
+
+def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,5];
+}
+def: InstRW<[ICXWriteResGroup88], (instrs STD)>;
+
+def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
+
+def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
+
+def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
+ "VMOVSSZrm(b?)")>;
+
+def ICXWriteResGroup92a : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
+ "(V?)PMOV(SX|ZX)BQrm",
+ "(V?)PMOV(SX|ZX)BWrm",
+ "(V?)PMOV(SX|ZX)DQrm",
+ "(V?)PMOV(SX|ZX)WDrm",
+ "(V?)PMOV(SX|ZX)WQrm")>;
+
+def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
+ "VCVTPD2DQ(Y|Z256)rr",
+ "VCVTPD2PS(Y|Z256)rr",
+ "VCVTPD2UDQZ256rr",
+ "VCVTPS2PD(Y|Z256)rr",
+ "VCVTPS2QQZ256rr",
+ "VCVTPS2UQQZ256rr",
+ "VCVTQQ2PSZ256rr",
+ "VCVTTPD2DQ(Y|Z256)rr",
+ "VCVTTPD2UDQZ256rr",
+ "VCVTTPS2QQZ256rr",
+ "VCVTTPS2UQQZ256rr",
+ "VCVTUDQ2PDZ256rr",
+ "VCVTUQQ2PSZ256rr")>;
+
+def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
+ VCVTPD2DQZrr,
+ VCVTPD2PSZrr,
+ VCVTPD2UDQZrr,
+ VCVTPS2PDZrr,
+ VCVTPS2QQZrr,
+ VCVTPS2UQQZrr,
+ VCVTQQ2PSZrr,
+ VCVTTPD2DQZrr,
+ VCVTTPD2UDQZrr,
+ VCVTTPS2QQZrr,
+ VCVTTPS2UQQZrr,
+ VCVTUDQ2PDZrr,
+ VCVTUQQ2PSZrr)>;
+
+def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
+ VPBLENDDrmi)>;
+def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd],
+ (instregex "VBLENDMPDZ128rm(b?)",
+ "VBLENDMPSZ128rm(b?)",
+ "VBROADCASTI32X2Z128rm(b?)",
+ "VBROADCASTSSZ128rm(b?)",
+ "VINSERT(F|I)128rm",
+ "VMOVAPDZ128rm(b?)",
+ "VMOVAPSZ128rm(b?)",
+ "VMOVDDUPZ128rm(b?)",
+ "VMOVDQA32Z128rm(b?)",
+ "VMOVDQA64Z128rm(b?)",
+ "VMOVDQU16Z128rm(b?)",
+ "VMOVDQU32Z128rm(b?)",
+ "VMOVDQU64Z128rm(b?)",
+ "VMOVDQU8Z128rm(b?)",
+ "VMOVSHDUPZ128rm(b?)",
+ "VMOVSLDUPZ128rm(b?)",
+ "VMOVUPDZ128rm(b?)",
+ "VMOVUPSZ128rm(b?)",
+ "VPADD(B|D|Q|W)Z128rm(b?)",
+ "(V?)PADD(B|D|Q|W)rm",
+ "VPBLENDM(B|D|Q|W)Z128rm(b?)",
+ "VPBROADCASTDZ128rm(b?)",
+ "VPBROADCASTQZ128rm(b?)",
+ "VPSUB(B|D|Q|W)Z128rm(b?)",
+ "(V?)PSUB(B|D|Q|W)rm",
+ "VPTERNLOGDZ128rm(b?)i",
+ "VPTERNLOGQZ128rm(b?)i")>;
+
+def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
+
+def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2W128rr",
+ "VPERMI2W256rr",
+ "VPERMI2Wrr",
+ "VPERMT2W128rr",
+ "VPERMT2W256rr",
+ "VPERMT2Wrr")>;
+
+def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64,
+ SCASB, SCASL, SCASQ, SCASW)>;
+
+def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup100], (instregex "VCVTSS2USI64Zrr",
+ "(V?)CVTSS2SI64(Z?)rr",
+ "(V?)CVTTSS2SI64(Z?)rr",
+ "VCVTTSS2USI64Zrr")>;
+
+def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>;
+
+def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
+
+def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>;
+
+def ICXWriteResGroup106 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
+ "VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
+ "VPCOMPRESSD(Z|Z128|Z256)mr(b?)",
+ "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>;
+
+def ICXWriteResGroup107 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
+
+def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+ ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
+def ICXWriteResGroup108 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
+
+def ICXWriteResGroup109 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
+
+def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1,2,2,2];
+}
+def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
+ VPSCATTERQQZ128mr,
+ VSCATTERDPDZ128mr,
+ VSCATTERQPDZ128mr)>;
+
+def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1,3,1,2];
+}
+def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>;
+
+def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 11;
+ let ResourceCycles = [1,4,4,2];
+}
+def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
+ VPSCATTERQQZ256mr,
+ VSCATTERDPDZ256mr,
+ VSCATTERQPDZ256mr)>;
+
+def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 19;
+ let ResourceCycles = [1,8,8,2];
+}
+def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+ VPSCATTERQQZmr,
+ VSCATTERDPDZmr,
+ VSCATTERQPDZmr)>;
+
+def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 36;
+ let ResourceCycles = [1,16,1,16,2];
+}
+def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
+
+def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm",
+ "PEXT(32|64)rm")>;
+
+def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
+ "VPBROADCASTB(Z|Z256)rm(b?)",
+ "VPBROADCASTW(Z|Z256)rm(b?)")>;
+def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
+ VPBROADCASTWYrm,
+ VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
+
+def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
+ VPBLENDDYrmi)>;
+def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
+ (instregex "VBLENDMPD(Z|Z256)rm(b?)",
+ "VBLENDMPS(Z|Z256)rm(b?)",
+ "VBROADCASTF32X2Z256rm(b?)",
+ "VBROADCASTF32X2Zrm(b?)",
+ "VBROADCASTF32X4Z256rm(b?)",
+ "VBROADCASTF32X4rm(b?)",
+ "VBROADCASTF32X8rm(b?)",
+ "VBROADCASTF64X2Z128rm(b?)",
+ "VBROADCASTF64X2rm(b?)",
+ "VBROADCASTF64X4rm(b?)",
+ "VBROADCASTI32X2Z256rm(b?)",
+ "VBROADCASTI32X2Zrm(b?)",
+ "VBROADCASTI32X4Z256rm(b?)",
+ "VBROADCASTI32X4rm(b?)",
+ "VBROADCASTI32X8rm(b?)",
+ "VBROADCASTI64X2Z128rm(b?)",
+ "VBROADCASTI64X2rm(b?)",
+ "VBROADCASTI64X4rm(b?)",
+ "VBROADCASTSD(Z|Z256)rm(b?)",
+ "VBROADCASTSS(Z|Z256)rm(b?)",
+ "VINSERTF32x4(Z|Z256)rm(b?)",
+ "VINSERTF32x8Zrm(b?)",
+ "VINSERTF64x2(Z|Z256)rm(b?)",
+ "VINSERTF64x4Zrm(b?)",
+ "VINSERTI32x4(Z|Z256)rm(b?)",
+ "VINSERTI32x8Zrm(b?)",
+ "VINSERTI64x2(Z|Z256)rm(b?)",
+ "VINSERTI64x4Zrm(b?)",
+ "VMOVAPD(Z|Z256)rm(b?)",
+ "VMOVAPS(Z|Z256)rm(b?)",
+ "VMOVDDUP(Z|Z256)rm(b?)",
+ "VMOVDQA32(Z|Z256)rm(b?)",
+ "VMOVDQA64(Z|Z256)rm(b?)",
+ "VMOVDQU16(Z|Z256)rm(b?)",
+ "VMOVDQU32(Z|Z256)rm(b?)",
+ "VMOVDQU64(Z|Z256)rm(b?)",
+ "VMOVDQU8(Z|Z256)rm(b?)",
+ "VMOVSHDUP(Z|Z256)rm(b?)",
+ "VMOVSLDUP(Z|Z256)rm(b?)",
+ "VMOVUPD(Z|Z256)rm(b?)",
+ "VMOVUPS(Z|Z256)rm(b?)",
+ "VPADD(B|D|Q|W)Yrm",
+ "VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
+ "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
+ "VPBROADCASTD(Z|Z256)rm(b?)",
+ "VPBROADCASTQ(Z|Z256)rm(b?)",
+ "VPSUB(B|D|Q|W)Yrm",
+ "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
+ "VPTERNLOGD(Z|Z256)rm(b?)i",
+ "VPTERNLOGQ(Z|Z256)rm(b?)i")>;
+
+def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
+
+def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
+
+def ICXWriteResGroup128 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,1,3];
+}
+def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
+
+def ICXWriteResGroup130 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,1,2,1];
+}
+def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>;
+
+def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,2,1,2,2];
+}
+def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
+ VPSCATTERQDZ256mr,
+ VSCATTERQPSZ128mr,
+ VSCATTERQPSZ256mr)>;
+
+def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 12;
+ let ResourceCycles = [1,4,1,4,2];
+}
+def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
+ VSCATTERDPSZ128mr)>;
+
+def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 20;
+ let ResourceCycles = [1,8,1,8,2];
+}
+def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
+ VSCATTERDPSZ256mr)>;
+
+def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 36;
+ let ResourceCycles = [1,16,1,16,2];
+}
+def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
+
+def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
+
+def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
+def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
+ "VFPCLASSSDZrm(b?)",
+ "VFPCLASSSSZrm(b?)",
+ "(V?)PCMPGTQrm",
+ "VPERMI2D128rm(b?)",
+ "VPERMI2PD128rm(b?)",
+ "VPERMI2PS128rm(b?)",
+ "VPERMI2Q128rm(b?)",
+ "VPERMT2D128rm(b?)",
+ "VPERMT2PD128rm(b?)",
+ "VPERMT2PS128rm(b?)",
+ "VPERMT2Q128rm(b?)",
+ "VPMAXSQZ128rm(b?)",
+ "VPMAXUQZ128rm(b?)",
+ "VPMINSQZ128rm(b?)",
+ "VPMINUQZ128rm(b?)",
+ "VPMOVSXBDZ128rm(b?)",
+ "VPMOVSXBQZ128rm(b?)",
+ "VPMOVSXBWZ128rm(b?)",
+ "VPMOVSXDQZ128rm(b?)",
+ "VPMOVSXWDZ128rm(b?)",
+ "VPMOVSXWQZ128rm(b?)",
+ "VPMOVZXBDZ128rm(b?)",
+ "VPMOVZXBQZ128rm(b?)",
+ "VPMOVZXBWZ128rm(b?)",
+ "VPMOVZXDQZ128rm(b?)",
+ "VPMOVZXWDZ128rm(b?)",
+ "VPMOVZXWQZ128rm(b?)")>;
+
+def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
+ "VCMP(SD|SS)Zrm",
+ "VFPCLASSPDZ128rm(b?)",
+ "VFPCLASSPSZ128rm(b?)",
+ "VPCMPBZ128rmi(b?)",
+ "VPCMPDZ128rmi(b?)",
+ "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
+ "VPCMPGT(B|D|Q|W)Z128rm(b?)",
+ "VPCMPQZ128rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z128rmi(b?)",
+ "VPCMPWZ128rmi(b?)",
+ "VPTESTMBZ128rm(b?)",
+ "VPTESTMDZ128rm(b?)",
+ "VPTESTMQZ128rm(b?)",
+ "VPTESTMWZ128rm(b?)",
+ "VPTESTNMBZ128rm(b?)",
+ "VPTESTNMDZ128rm(b?)",
+ "VPTESTNMQZ128rm(b?)",
+ "VPTESTNMWZ128rm(b?)")>;
+
+def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
+ "(V?)CVTPS2PDrm")>;
+
+def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
+ "(V?)PHSUBSWrm")>;
+
+def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm",
+ "LSL(16|32|64)rm")>;
+
+def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>;
+def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
+ "ILD_F(16|32|64)m",
+ "VALIGND(Z|Z256)rm(b?)i",
+ "VALIGNQ(Z|Z256)rm(b?)i",
+ "VPMAXSQ(Z|Z256)rm(b?)",
+ "VPMAXUQ(Z|Z256)rm(b?)",
+ "VPMINSQ(Z|Z256)rm(b?)",
+ "VPMINUQ(Z|Z256)rm(b?)")>;
+
+def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
+ "VCMPPS(Z|Z256)rm(b?)i",
+ "VFPCLASSPD(Z|Z256)rm(b?)",
+ "VFPCLASSPS(Z|Z256)rm(b?)",
+ "VPCMPB(Z|Z256)rmi(b?)",
+ "VPCMPD(Z|Z256)rmi(b?)",
+ "VPCMPEQB(Z|Z256)rm(b?)",
+ "VPCMPEQD(Z|Z256)rm(b?)",
+ "VPCMPEQQ(Z|Z256)rm(b?)",
+ "VPCMPEQW(Z|Z256)rm(b?)",
+ "VPCMPGTB(Z|Z256)rm(b?)",
+ "VPCMPGTD(Z|Z256)rm(b?)",
+ "VPCMPGTQ(Z|Z256)rm(b?)",
+ "VPCMPGTW(Z|Z256)rm(b?)",
+ "VPCMPQ(Z|Z256)rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z256rmi(b?)",
+ "VPCMPU(B|D|Q|W)Zrmi(b?)",
+ "VPCMPW(Z|Z256)rmi(b?)",
+ "VPTESTM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTM(B|D|Q|W)Zrm(b?)",
+ "VPTESTNM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
+
+def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
+ "VCVTDQ2PSZ128rm(b?)",
+ "(V?)CVTDQ2PSrm",
+ "VCVTPD2QQZ128rm(b?)",
+ "VCVTPD2UQQZ128rm(b?)",
+ "VCVTPH2PSZ128rm(b?)",
+ "VCVTPS2DQZ128rm(b?)",
+ "(V?)CVTPS2DQrm",
+ "VCVTPS2PDZ128rm(b?)",
+ "VCVTPS2QQZ128rm(b?)",
+ "VCVTPS2UDQZ128rm(b?)",
+ "VCVTPS2UQQZ128rm(b?)",
+ "VCVTQQ2PDZ128rm(b?)",
+ "VCVTQQ2PSZ128rm(b?)",
+ "VCVTSS2SDZrm",
+ "(V?)CVTSS2SDrm",
+ "VCVTTPD2QQZ128rm(b?)",
+ "VCVTTPD2UQQZ128rm(b?)",
+ "VCVTTPS2DQZ128rm(b?)",
+ "(V?)CVTTPS2DQrm",
+ "VCVTTPS2QQZ128rm(b?)",
+ "VCVTTPS2UDQZ128rm(b?)",
+ "VCVTTPS2UQQZ128rm(b?)",
+ "VCVTUDQ2PDZ128rm(b?)",
+ "VCVTUDQ2PSZ128rm(b?)",
+ "VCVTUQQ2PDZ128rm(b?)",
+ "VCVTUQQ2PSZ128rm(b?)")>;
+
+def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
+ "VEXPANDPSZ128rm(b?)",
+ "VPEXPANDDZ128rm(b?)",
+ "VPEXPANDQZ128rm(b?)")>;
+
+def ICXWriteResGroup153 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>;
+
+def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
+ VPHSUBSWYrm)>;
+
+def ICXWriteResGroup157 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,1,1,1,3];
+}
+def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
+
+def ICXWriteResGroup159 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
+ let Latency = 11;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1,3];
+}
+def : SchedAlias<WriteFDivX, ICXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
+
+def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm,
+ VCVTPS2PDYrm)>;
+def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
+ "VCVTPH2PS(Z|Z256)rm(b?)",
+ "VCVTPS2PD(Z|Z256)rm(b?)",
+ "VCVTQQ2PD(Z|Z256)rm(b?)",
+ "VCVTQQ2PSZ256rm(b?)",
+ "VCVT(T?)PD2QQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PS2DQYrm",
+ "VCVT(T?)PS2DQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PS2QQZ256rm(b?)",
+ "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PS2UQQZ256rm(b?)",
+ "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
+ "VCVTUQQ2PD(Z|Z256)rm(b?)",
+ "VCVTUQQ2PSZ256rm(b?)")>;
+
+def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
+ "VEXPANDPD(Z|Z256)rm(b?)",
+ "VEXPANDPS(Z|Z256)rm(b?)",
+ "VPEXPANDD(Z|Z256)rm(b?)",
+ "VPEXPANDQ(Z|Z256)rm(b?)")>;
+
+def ICXWriteResGroup163 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup163], (instregex "VCVTSD2SSZrm")>;
+
+def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
+
+def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm)>;
+
+def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
+
+def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
+ let Latency = 11;
+ let NumMicroOps = 7;
+ let ResourceCycles = [2,3,2];
+}
+def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
+ "RCR(16|32|64)rCL")>;
+
+def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 11;
+ let NumMicroOps = 9;
+ let ResourceCycles = [1,5,1,2];
+}
+def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>;
+
+def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 11;
+ let NumMicroOps = 11;
+ let ResourceCycles = [2,9];
+}
+def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
+
+def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
+
+def ICXWriteResGroup174z : SchedWriteRes<[ICXPort05]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>;
+
+def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
+
+def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
+ "VCVT(T?)SS2USI64Zrm(b?)")>;
+
+def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
+ "VCVT(T?)PS2UQQZrm(b?)")>;
+
+def ICXWriteResGroup179 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup179], (instregex "CVTTSS2SI64rm")>;
+
+def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
+ "VPERMWZ256rm(b?)",
+ "VPERMWZrm(b?)")>;
+
+def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
+
+def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
+ "VPERMT2W128rm(b?)")>;
+
+def ICXWriteResGroup184 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
+ let Latency = 14;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1,3];
+}
+def : SchedAlias<WriteFDiv64, ICXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
+def : SchedAlias<WriteFDiv64X, ICXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup184_1 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
+ let Latency = 14;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1,5];
+}
+def : SchedAlias<WriteFDiv64Y, ICXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
+
+def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
+ "VCVTPD2PSZrm(b?)",
+ "VCVTPD2UDQZrm(b?)",
+ "VCVTQQ2PSZrm(b?)",
+ "VCVTTPD2DQZrm(b?)",
+ "VCVTTPD2UDQZrm(b?)",
+ "VCVTUQQ2PSZrm(b?)")>;
+
+def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2W256rm(b?)",
+ "VPERMI2Wrm(b?)",
+ "VPERMT2W256rm(b?)",
+ "VPERMT2Wrm(b?)")>;
+
+def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 14;
+ let NumMicroOps = 10;
+ let ResourceCycles = [2,4,1,3];
+}
+def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>;
+
+def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 15;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
+
+def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 15;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,2,2,1,2];
+}
+def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
+
+def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 15;
+ let NumMicroOps = 10;
+ let ResourceCycles = [1,1,1,5,1,1];
+}
+def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
+
+def ICXWriteResGroup199 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 16;
+ let NumMicroOps = 14;
+ let ResourceCycles = [1,1,1,4,2,5];
+}
+def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>;
+
+def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> {
+ let Latency = 12;
+ let NumMicroOps = 34;
+ let ResourceCycles = [1, 4, 5];
+}
+def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>;
+
+def ICXWriteResGroup201 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 17;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,5];
+}
+def : SchedAlias<WriteFDivXLd, ICXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 15;
+ let ResourceCycles = [2,1,2,4,2,4];
+}
+def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>;
+
+def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> {
+ let Latency = 21;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
+
+def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> {
+ let Latency = 18;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,1,5];
+}
+def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>;
+
+def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 18;
+ let NumMicroOps = 11;
+ let ResourceCycles = [2,1,1,4,1,2];
+}
+def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
+
+def ICXWriteResGroup209 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 19;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,4];
+}
+def : SchedAlias<WriteFDiv64Ld, ICXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> {
+ let Latency = 22;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
+
+def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort05]> {
+ let Latency = 22;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
+
+def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 20;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
+
+def ICXWriteResGroup216 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 20;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,4];
+}
+def : SchedAlias<WriteFDiv64XLd, ICXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
+ VGATHERDPDZ128rm, VPGATHERDQZ128rm,
+ VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
+
+def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
+ VGATHERQPDZ256rm, VPGATHERQQZ256rm,
+ VGATHERDPSZ128rm, VPGATHERDDZ128rm,
+ VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
+
+def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 21;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
+ VGATHERDPDZrm, VPGATHERDQZrm,
+ VGATHERQPDZrm, VPGATHERQQZrm,
+ VGATHERQPSZrm, VPGATHERQDZrm)>;
+
+def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,16,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
+
+def ICXWriteResGroup219 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 20;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,1,1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>;
+
+def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> {
+ let Latency = 20;
+ let NumMicroOps = 10;
+ let ResourceCycles = [1,2,7];
+}
+def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>;
+
+def ICXWriteResGroup222 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,8];
+}
+def : SchedAlias<WriteFDiv64YLd, ICXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 22;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
+
+def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
+
+def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
+
+def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 14;
+ let ResourceCycles = [5,5,4];
+}
+def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
+ "VPCONFLICTQZ256rr")>;
+
+def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 23;
+ let NumMicroOps = 19;
+ let ResourceCycles = [2,1,4,1,1,4,6];
+}
+def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>;
+
+def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 25;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
+
+def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 27;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
+
+def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 29;
+ let NumMicroOps = 15;
+ let ResourceCycles = [5,5,1,4];
+}
+def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
+
+def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 30;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
+
+def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> {
+ let Latency = 35;
+ let NumMicroOps = 23;
+ let ResourceCycles = [1,5,3,4,10];
+}
+def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
+ "IN(8|16|32)rr")>;
+
+def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 35;
+ let NumMicroOps = 23;
+ let ResourceCycles = [1,5,2,1,4,10];
+}
+def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir",
+ "OUT(8|16|32)rr")>;
+
+def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
+ let Latency = 37;
+ let NumMicroOps = 21;
+ let ResourceCycles = [9,7,5];
+}
+def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
+ "VPCONFLICTQZrr")>;
+
+def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
+ let Latency = 37;
+ let NumMicroOps = 31;
+ let ResourceCycles = [1,8,1,21];
+}
+def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>;
+
+def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort15,ICXPort0156]> {
+ let Latency = 40;
+ let NumMicroOps = 18;
+ let ResourceCycles = [1,1,2,3,1,1,1,8];
+}
+def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>;
+
+def ICXWriteResGroup253 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 41;
+ let NumMicroOps = 39;
+ let ResourceCycles = [1,10,1,1,26];
+}
+def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>;
+
+def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
+ let Latency = 42;
+ let NumMicroOps = 22;
+ let ResourceCycles = [2,20];
+}
+def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>;
+
+def ICXWriteResGroup255 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 42;
+ let NumMicroOps = 40;
+ let ResourceCycles = [1,11,1,1,26];
+}
+def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>;
+def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
+
+def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 44;
+ let NumMicroOps = 22;
+ let ResourceCycles = [9,7,1,5];
+}
+def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
+ "VPCONFLICTQZrm(b?)")>;
+
+def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> {
+ let Latency = 62;
+ let NumMicroOps = 64;
+ let ResourceCycles = [2,8,5,10,39];
+}
+def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>;
+
+def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 63;
+ let NumMicroOps = 88;
+ let ResourceCycles = [4,4,31,1,2,1,45];
+}
+def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>;
+
+def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 63;
+ let NumMicroOps = 90;
+ let ResourceCycles = [4,2,33,1,2,1,47];
+}
+def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>;
+
+def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
+ let Latency = 67;
+ let NumMicroOps = 35;
+ let ResourceCycles = [17,11,7];
+}
+def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
+
+def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 74;
+ let NumMicroOps = 36;
+ let ResourceCycles = [17,11,1,7];
+}
+def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
+
+def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> {
+ let Latency = 75;
+ let NumMicroOps = 15;
+ let ResourceCycles = [6,3,6];
+}
+def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>;
+
+def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 106;
+ let NumMicroOps = 100;
+ let ResourceCycles = [9,1,11,16,1,11,21,30];
+}
+def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>;
+
+def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 140;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>;
+
+def: InstRW<[WriteZero], (instrs CLC)>;
+
+
+// Instruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def ICXWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def ICXWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[ICXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def ICXWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[ICXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+ XORPDrr, VXORPDrr,
+ VXORPSZ128rr,
+ VXORPDZ128rr)>;
+
+def ICXWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[ICXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+ VXORPSZ256rr, VXORPDZ256rr)>;
+
+def ICXWriteFZeroIdiomZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicZ]>
+]>;
+def : InstRW<[ICXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
+
+def ICXWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+ VPXORDZ128rr, VPXORQZ128rr)>;
+
+def ICXWriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
+ VPXORDZ256rr, VPXORQZ256rr)>;
+
+def ICXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicZ]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
+
+def ICXWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def ICXWriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+ VPCMPGTDYrr,
+ VPCMPGTWYrr)>;
+
+def ICXWritePSUB : SchedWriteRes<[ICXPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [ICXWritePSUB]>
+]>;
+
+def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
+ PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
+ PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
+ PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
+ VPSUBBYrr, VPSUBBZ256rr,
+ VPSUBDYrr, VPSUBDZ256rr,
+ VPSUBQYrr, VPSUBQZ256rr,
+ VPSUBWYrr, VPSUBWZ256rr,
+ VPSUBBZrr,
+ VPSUBDZrr,
+ VPSUBQZrr,
+ VPSUBWZrr)>;
+def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [ICXWritePCMPGTQ]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+ VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> {
+ let Latency = 7;
+ let ResourceCycles = [1,2];
+ let NumMicroOps = 3;
+}
+
+def ICXCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [ICXWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def ICXCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [ICXWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[ICXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,2];
+ let NumMicroOps = 4;
+}
+
+def ICXSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [ICXWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def ICXSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [ICXWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>;
+
+} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 2f7157f43268..c8d7b0f72c1c 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -124,12 +124,17 @@ defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 3, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 3, [1,1], 2>;
defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency);
+}
defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
@@ -601,7 +606,7 @@ def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP,
LD_Frr, ST_Frr, ST_FPrr)>;
def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs.
-def: InstRW<[SBWriteResGroup2], (instrs RETQ)>;
+def: InstRW<[SBWriteResGroup2], (instrs RET64)>;
def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let Latency = 1;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 8486bdda0349..7d3229c3b023 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -122,12 +122,17 @@ defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteMULX32, [SKLPort1,SKLPort06,SKLPort0156], 3, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 3, [1,1], 2>;
defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SKLWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(SKLWriteIMulH.Latency, SkylakeClientModel.LoadLatency);
+}
defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
@@ -1170,7 +1175,7 @@ def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup98], (instrs LRETQ, RETQ)>;
+def: InstRW<[SKLWriteResGroup98], (instrs LRET64, RET64)>;
def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index ba80d47c4eb6..1d8417aef41e 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -123,12 +123,17 @@ defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1
defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
+}
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
@@ -1431,7 +1436,7 @@ def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup104], (instrs LRETQ, RETQ)>;
+def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>;
def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 09148fc19e57..1cb48175260a 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -87,8 +87,10 @@ class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
}
// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
-class X86SchedWriteSizes<X86SchedWriteWidths sPS,
+class X86SchedWriteSizes<X86SchedWriteWidths sPH,
+ X86SchedWriteWidths sPS,
X86SchedWriteWidths sPD> {
+ X86SchedWriteWidths PH = sPH;
X86SchedWriteWidths PS = sPS;
X86SchedWriteWidths PD = sPD;
}
@@ -146,7 +148,10 @@ defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by reg
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
-def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+defm WriteMULX32 : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
+defm WriteMULX64 : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
+def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by the RR variant of MULX).
+def WriteIMulHLd : SchedWrite; // Integer multiplication, high part (only used by the RM variant of MULX).
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
@@ -681,20 +686,22 @@ def SchedWriteVarBlend
WriteVarBlendY, WriteVarBlendZ>;
// Vector size wrappers.
+// FIXME: Currently PH uses the same schedule method as PS.
+// We may refine them later.
def SchedWriteFAddSizes
- : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
+ : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd, SchedWriteFAdd64>;
def SchedWriteFCmpSizes
- : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp, SchedWriteFCmp64>;
def SchedWriteFMulSizes
- : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul, SchedWriteFMul64>;
def SchedWriteFDivSizes
- : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv, SchedWriteFDiv64>;
def SchedWriteFSqrtSizes
- : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+ : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt, SchedWriteFSqrt64>;
def SchedWriteFLogicSizes
- : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
+ : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic, SchedWriteFLogic>;
def SchedWriteFShuffleSizes
- : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
+ : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle, SchedWriteFShuffle>;
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index d00c2e3718d3..6fd98280f560 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -56,17 +56,21 @@ multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> RMPorts,
int RRLat = 1, int RMLat = 1,
list<int> RRRes = [1],
- list<int> RMRes = [1]> {
+ list<int> RMRes = [1],
+ int RRUOps = 1,
+ int RMUOps = 1> {
// Register variant.
def : WriteRes<SchedRW, RRPorts> {
let Latency = RRLat;
let ResourceCycles = RRRes;
+ let NumMicroOps = RRUOps;
}
// Memory variant.
def : WriteRes<SchedRW.Folded, RMPorts> {
let Latency = RMLat;
let ResourceCycles = RMRes;
+ let NumMicroOps = RMUOps;
}
}
@@ -80,17 +84,20 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul8, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
-defm : AtomWriteResPair<WriteIMul16, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
-defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
-defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
-defm : AtomWriteResPair<WriteIMul32, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul8, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 7, [7,7], [7,7], 3, 3>;
+defm : AtomWriteResPair<WriteIMul16, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [7,7], [8,8], 4, 5>;
+defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 2, 3>;
+defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 2, 3>;
+defm : AtomWriteResPair<WriteIMul32, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
-defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
-defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
-defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 12, 12, [12,12], [12,12], 8, 8>;
+defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 14, 14, [14,14], [14,14], 7, 7>;
+defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 12, 12, [12,12], [12,12], 6, 6>;
defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
@@ -98,14 +105,14 @@ defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteCMPXCHG, [AtomPort01], [AtomPort01], 15, 15, [15]>;
defm : X86WriteRes<WriteCMPXCHGRMW, [AtomPort01, AtomPort0], 1, [1, 1], 1>;
-defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
-defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
-defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
-defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
-defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+defm : AtomWriteResPair<WriteDiv8, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 50, 68, [50,50], [68,68], 9, 9>;
+defm : AtomWriteResPair<WriteDiv16, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 50, 50, [50,50], [50,50], 12, 12>;
+defm : AtomWriteResPair<WriteDiv32, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 50, 50, [50,50], [50,50], 12, 12>;
+defm : AtomWriteResPair<WriteDiv64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],130,130,[130,130],[130,130], 38, 38>;
+defm : AtomWriteResPair<WriteIDiv8, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 26, 26>;
+defm : AtomWriteResPair<WriteIDiv16, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 29, 29>;
+defm : AtomWriteResPair<WriteIDiv32, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 29, 29>;
+defm : AtomWriteResPair<WriteIDiv64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],130,130,[130,130],[130,130], 60, 60>;
defm : X86WriteResPairUnsupported<WriteCRC32>;
@@ -132,8 +139,8 @@ defm : X86WriteRes<WriteBitTestSet, [AtomPort1], 1, [1], 1>;
def : WriteRes<WriteLEA, [AtomPort1]>;
// Bit counts.
-defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
-defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WriteBSF, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 16, 16, [16,16], [16,16], 10, 10>;
+defm : AtomWriteResPair<WriteBSR, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 16, 16, [16,16], [16,16], 10, 10>;
defm : X86WriteResPairUnsupported<WritePOPCNT>;
defm : X86WriteResPairUnsupported<WriteLZCNT>;
defm : X86WriteResPairUnsupported<WriteTZCNT>;
@@ -230,52 +237,52 @@ defm : AtomWriteResPair<WriteFAddX, [AtomPort1], [AtomPort0,AtomPort1],
defm : X86WriteResPairUnsupported<WriteFAddY>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
-defm : AtomWriteResPair<WriteFAdd64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteFAdd64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6], 3, 4>;
defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
-defm : AtomWriteResPair<WriteFCmpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteFCmpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6], 3, 4>;
defm : X86WriteResPairUnsupported<WriteFCmpY>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
-defm : AtomWriteResPair<WriteFCmp64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteFCmp64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6], 3, 4>;
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
-defm : AtomWriteResPair<WriteFComX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFComX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9],[10,10], 4, 5>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [2], [2]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
-defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10]>;
+defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9],[10,10], 6, 7>;
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
-defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFRcpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
-defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDiv, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 34, 34, [34,34], [34,34], 3, 4>;
+defm : AtomWriteResPair<WriteFDivX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 70, 70, [70,70], [70,70], 6, 7>;
defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
-defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFDiv64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 3, 4>;
+defm : AtomWriteResPair<WriteFDiv64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],125,125,[125,125],[125,125], 6, 7>;
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFSqrt, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 34, 34, [34,34], [34,34], 3, 4>;
+defm : AtomWriteResPair<WriteFSqrtX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 70, 70, [70,70], [70,70], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : AtomWriteResPair<WriteFSqrt64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFSqrt64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 3, 4>;
+defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],125,125,[125,125],[125,125], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
-defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
+defm : AtomWriteResPair<WriteFSqrt80, [AtomPort0], [AtomPort0], 71, 71, [71], [71]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFRndY>;
@@ -389,8 +396,8 @@ defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : X86WriteResPairUnsupported<WriteVecTest>;
defm : X86WriteResPairUnsupported<WriteVecTestY>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
-defm : AtomWriteResPair<WriteVecShift, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2]>;
-defm : AtomWriteResPair<WriteVecShiftX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2]>;
+defm : AtomWriteResPair<WriteVecShift, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2], 2, 3>;
+defm : AtomWriteResPair<WriteVecShiftX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2], 2, 3>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort0], [AtomPort0], 1, 1>;
@@ -417,7 +424,7 @@ defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4]>;
+defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4], 4, 5>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
@@ -471,11 +478,11 @@ defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
-defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
-defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteFHAdd, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [8,8], [9,9], 5, 6>;
+defm : X86WriteResPairUnsupported<WriteFHAddY>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 3, 4, [3,3], [4,4], 3, 4>;
+defm : AtomWriteResPair<WritePHAddX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [7,7], [8,8], 3, 4>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
@@ -487,8 +494,8 @@ defm : X86WriteResPairUnsupported<WriteCLMul>;
// Load/store MXCSR.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
-def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
+defm : X86WriteRes<WriteLDMXCSR, [AtomPort0,AtomPort1], 5, [5,5], 4>;
+defm : X86WriteRes<WriteSTMXCSR, [AtomPort0,AtomPort1], 15, [15,15], 4>;
////////////////////////////////////////////////////////////////////////////////
// Special Cases.
@@ -533,7 +540,7 @@ def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
PUSH16rmr, PUSH32rmr, PUSH64rmr,
PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
XCH_F)>;
-def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(16|32|64)$",
"IRET(16|32|64)?")>;
def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
@@ -551,10 +558,7 @@ def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
LFENCE,
STOSB, STOSL, STOSQ, STOSW,
- MOVSSrr, MOVSSrr_REV,
- PSLLDQri, PSRLDQri)>;
-def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
- "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+ MOVSSrr, MOVSSrr_REV)>;
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
let Latency = 2;
@@ -644,7 +648,6 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
- "(U)?COMIS(D|S)rr",
"CVT(T)?SS2SI64rr(_Int)?")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
@@ -652,8 +655,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
- "CVT(T)?SS2SI64rm(_Int)?")>;
+def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
@@ -817,8 +819,8 @@ def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
let Latency = 79;
let ResourceCycles = [79];
}
-def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
- "LRETI?(L|Q|W)")>;
+def : InstRW<[AtomWrite01_79], (instregex "RET(16|32|64)?$",
+ "LRETI?(16|32|64)")>;
def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
let Latency = 92;
diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 99d4011dae77..4c16b5b52b1d 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -435,7 +435,12 @@ defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
-defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
+
+// BMI2 MULX
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index cdd03830bcad..68ebaa244acf 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -209,7 +209,10 @@ defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
-defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 123844a73a59..5af9835f75a7 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -62,7 +62,7 @@ def : ReadAdvance<ReadInt2Fpu, 0>;
multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 3> {
+ int LoadUOps = 0, int LoadLat = 3> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -75,13 +75,13 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
-// A folded store needs a cycle on MEC_RSV for the store data, but it does not
-// need an extra port cycle to recompute the address.
-def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
+// A folded store needs a cycle on MEC_RSV for the store data (using the same uop),
+// but it does not need an extra port cycle to recompute the address.
+def : WriteRes<WriteRMW, [SLM_MEC_RSV]> { let NumMicroOps = 0; }
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
@@ -101,17 +101,20 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 5, [5], 3>;
+defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 5, [5], 4, 1>;
+defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 4, [4], 2, 1>;
+defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 4, [4], 2, 1>;
+defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 5, [5], 3, 1>;
defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
-def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 7, [7], 3>;
+defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 5, [2]>;
+defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 5, [2]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
@@ -140,12 +143,12 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
let ResourceCycles = [2,1];
}
defm : X86WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : X86WriteRes<WriteBitTestImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
-defm : X86WriteRes<WriteBitTestRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
-defm : X86WriteRes<WriteBitTestSet, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
-defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
+defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV0, SLM_IEC_RSV1], 1, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 4, [1,1,1], 7>;
+defm : X86WriteRes<WriteBitTestSet, [SLM_IEC_RSV0, SLM_IEC_RSV1], 1, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 3, [1,1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 3, [1,1,1], 7>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -153,8 +156,8 @@ defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1],
def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
// Bit counts.
-defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
-defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV0, SLM_IEC_RSV1], 10, [10,10], 10>;
+defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV0, SLM_IEC_RSV1], 10, [10,10], 10>;
defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
@@ -164,14 +167,14 @@ defm : X86WriteResPairUnsupported<WriteBEXTR>;
defm : X86WriteResPairUnsupported<WriteBLS>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
-defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [SLM_FPC_RSV01], 1, [1], 1>;
@@ -230,33 +233,33 @@ defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
-defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
-defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
-defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
-defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
+defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
-defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 12, [8], 5, 1>;
+defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 15, [12], 9, 1>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
@@ -277,7 +280,7 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 2, 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
@@ -369,8 +372,8 @@ defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2], 2>;
-defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2], 2>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
@@ -382,21 +385,21 @@ defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
-defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 5, [5], 4>;
-defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 5, [5], 4>;
+defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 5, [5], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 4, [4], 2, 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
-defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
-defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
+defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7, [5], 3, 1>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 5, [2]>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
@@ -417,26 +420,26 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>;
-defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV1], 6, [6], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteFHAddY>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
-defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
-defm : SLMWriteResPair<WritePCmpIStrM, [SLM_FPC_RSV0], 13, [13]>;
+defm : SLMWriteResPair<WritePCmpIStrM, [SLM_FPC_RSV0], 13, [13], 5, 1>;
// Packed Compare Explicit Length Strings, Return Mask
-defm : SLMWriteResPair<WritePCmpEStrM, [SLM_FPC_RSV0], 17, [17]>;
+defm : SLMWriteResPair<WritePCmpEStrM, [SLM_FPC_RSV0], 17, [17], 8, 1>;
// Packed Compare Implicit Length Strings, Return Index
-defm : SLMWriteResPair<WritePCmpIStrI, [SLM_FPC_RSV0], 17, [17]>;
+defm : SLMWriteResPair<WritePCmpIStrI, [SLM_FPC_RSV0], 17, [17], 6, 1>;
// Packed Compare Explicit Length Strings, Return Index
-defm : SLMWriteResPair<WritePCmpEStrI, [SLM_FPC_RSV0], 21, [21]>;
+defm : SLMWriteResPair<WritePCmpEStrI, [SLM_FPC_RSV0], 21, [21], 9, 1>;
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
@@ -450,7 +453,7 @@ defm : SLMWriteResPair<WriteAESIMC, [SLM_FPC_RSV0], 8, [5]>;
defm : SLMWriteResPair<WriteAESKeyGen, [SLM_FPC_RSV0], 8, [5]>;
// Carry-less multiplication instructions.
-defm : SLMWriteResPair<WriteCLMul, [SLM_FPC_RSV0], 10, [10]>;
+defm : SLMWriteResPair<WriteCLMul, [SLM_FPC_RSV0], 10, [10], 8, 1>;
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
@@ -462,15 +465,19 @@ def : WriteRes<WriteNop, []>;
def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [4];
+ let ResourceCycles = [8];
}
-def: InstRW<[SLMWriteResGroup1rr], (instrs PADDQrr, PSUBQrr, PCMPEQQrr)>;
+def: InstRW<[SLMWriteResGroup1rr], (instrs MMX_PADDQirr, PADDQrr,
+ MMX_PSUBQirr, PSUBQrr,
+ PCMPEQQrr)>;
def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,4];
+ let ResourceCycles = [1,8];
}
-def: InstRW<[SLMWriteResGroup1rm], (instrs PADDQrm, PSUBQrm, PCMPEQQrm)>;
+def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQirm, PADDQrm,
+ MMX_PSUBQirm, PSUBQrm,
+ PCMPEQQrm)>;
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 12f8e7cc76f7..8e30e5e10ca8 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -256,8 +256,13 @@ defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
// IMULH
-def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
- let Latency = 4;
+def ZnWriteIMulH : WriteRes<WriteIMulH, [ZnMultiplier]>{
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def : WriteRes<WriteIMulHLd, [ZnMultiplier]> {
+ let Latency = !add(ZnWriteIMulH.Latency, Znver1Model.LoadLatency);
+ let NumMicroOps = ZnWriteIMulH.NumMicroOps;
}
// Floating point operations
@@ -659,32 +664,10 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
}
def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
-// MULX.
-// r32,r32,r32.
-def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
- let Latency = 3;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>;
-
-// r32,r32,m32.
-def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
- let Latency = 8;
- let ResourceCycles = [1, 2, 2];
-}
-def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
-
-// r64,r64,r64.
-def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
- let Latency = 3;
-}
-def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>;
-
-// r64,r64,m64.
-def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
- let Latency = 8;
-}
-def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+// MULX
+// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
+defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
+defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
//-- Control transfer instructions --//
@@ -714,7 +697,7 @@ def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
+def : InstRW<[ZnWriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
"IRET(16|32|64)")>;
//-- Logic instructions --//
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 5b4b151d2938..a83c89e2f28a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -243,10 +243,17 @@ defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
// IMULH
-def : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
- let Latency = 4;
+def Zn2WriteIMulH : WriteRes<WriteIMulH, [Zn2Multiplier]>{
+ let Latency = 3;
+ let NumMicroOps = 0;
}
+def : WriteRes<WriteIMulHLd, [Zn2Multiplier]>{
+ let Latency = !add(Zn2WriteIMulH.Latency, Znver2Model.LoadLatency);
+ let NumMicroOps = Zn2WriteIMulH.NumMicroOps;
+}
+
+
// Floating point operations
defm : X86WriteRes<WriteFLoad, [Zn2AGU], 8, [1], 1>;
defm : X86WriteRes<WriteFLoadX, [Zn2AGU], 8, [1], 1>;
@@ -658,31 +665,9 @@ def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
// MULX.
-// r32,r32,r32.
-def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
- let Latency = 3;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
-
-// r32,r32,m32.
-def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
- let Latency = 7;
- let ResourceCycles = [1, 2, 2];
-}
-def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
-
-// r64,r64,r64.
-def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
- let Latency = 3;
-}
-def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
-
-// r64,r64,m64.
-def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
- let Latency = 7;
-}
-def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
+defm : Zn2WriteResPair<WriteMULX32, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
+defm : Zn2WriteResPair<WriteMULX64, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
//-- Control transfer instructions --//
@@ -712,7 +697,7 @@ def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
let NumMicroOps = 2;
}
-def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
+def : InstRW<[Zn2WriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
"IRET(16|32|64)")>;
//-- Logic instructions --//
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index 4a91a91a0f0f..be07c069aae1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -617,42 +617,15 @@ defm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/
defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
-
-def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
- let Latency = 4;
- let ResourceCycles = [1];
- let NumMicroOps = 2;
-}
-def : InstRW<[Zn3MULX32rr, WriteIMulH], (instrs MULX32rr)>;
-
-def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3MULX32rr.Latency);
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = Zn3MULX32rr.NumMicroOps;
-}
-def : InstRW<[Zn3MULX32rm, WriteIMulH], (instrs MULX32rm)>;
-
+defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
-
-def Zn3MULX64rr : SchedWriteRes<[Zn3Multiplier]> {
- let Latency = 4;
- let ResourceCycles = [1];
- let NumMicroOps = 2;
-}
-def : InstRW<[Zn3MULX64rr, WriteIMulH], (instrs MULX64rr)>;
-
-def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3MULX64rr.Latency);
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = Zn3MULX64rr.NumMicroOps;
-}
-def : InstRW<[Zn3MULX64rm, WriteIMulH], (instrs MULX64rm)>;
-
+defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
-defm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
+defm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part.
+defm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
defm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap.
defm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index a3238e6317a0..5e59081c63b0 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -71,9 +71,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
- if (const char *bzeroName = (ValC && ValC->isNullValue())
- ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
- : nullptr) {
+ if (const char *bzeroName =
+ (ValC && ValC->isZero())
+ ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
+ : nullptr) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 14a3fea240e7..1a97904e9bc9 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -100,7 +100,7 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
// Only treat the element as UNDEF if all bits are UNDEF, otherwise
// treat it as zero.
- if (EltUndef.isAllOnesValue()) {
+ if (EltUndef.isAllOnes()) {
UndefElts.setBit(i);
RawMask[i] = 0;
continue;
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index fcaf7c86128a..83a4a025f518 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -850,11 +850,9 @@ getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
- for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) {
- // Grab a reference and increment the iterator so we can remove this
- // instruction if needed without disturbing the iteration.
- MachineInstr &MI = *MII++;
-
+ // We use make_early_inc_range here so we can remove instructions if needed
+ // without disturbing the iteration.
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
// Must either be a call or a branch.
if (!MI.isCall() && !MI.isBranch())
continue;
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 4af0ac238f59..a3d4d04b1e0d 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -67,6 +67,13 @@ X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
unsigned char
X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
+ // Tagged globals have non-zero upper bits, which makes direct references
+ // require a 64-bit immediate. On the small code model this causes relocation
+ // errors, so we go through the GOT instead.
+ if (AllowTaggedGlobals && TM.getCodeModel() == CodeModel::Small && GV &&
+ !isa<Function>(GV))
+ return X86II::MO_GOTPCREL_NORELAX;
+
// If we're not PIC, it's not very interesting.
if (!isPositionIndependent())
return X86II::MO_NO_FLAG;
@@ -143,6 +150,9 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
return classifyLocalReference(GV);
if (isTargetCOFF()) {
+ // ExternalSymbolSDNode like _tls_index.
+ if (!GV)
+ return X86II::MO_NO_FLAG;
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
return X86II::MO_COFFSTUB;
@@ -157,6 +167,11 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
// reference for them.
if (TM.getCodeModel() == CodeModel::Large)
return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
+ // Tagged globals have non-zero upper bits, which makes direct references
+ // require a 64-bit immediate. So we can't let the linker relax the
+ // relocation to a 32-bit RIP-relative direct reference.
+ if (AllowTaggedGlobals && GV && !isa<Function>(GV))
+ return X86II::MO_GOTPCREL_NORELAX;
return X86II::MO_GOTPCREL;
}
@@ -184,10 +199,13 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
if (TM.shouldAssumeDSOLocal(M, GV))
return X86II::MO_NO_FLAG;
- // Functions on COFF can be non-DSO local for two reasons:
+ // Functions on COFF can be non-DSO local for three reasons:
+ // - They are intrinsic functions (!GV)
// - They are marked dllimport
// - They are extern_weak, and a stub is needed
if (isTargetCOFF()) {
+ if (!GV)
+ return X86II::MO_NO_FLAG;
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
return X86II::MO_COFFSTUB;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 935dbd882a44..9da54dc2e9b7 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -54,8 +54,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// are not a good idea. We should be migrating away from these.
enum X86ProcFamilyEnum {
Others,
- IntelAtom,
- IntelSLM
+ IntelAtom
};
enum X86SSEEnum {
@@ -353,6 +352,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX = false;
+ /// Processor has AVX-512 16 bit floating-point extenstions
+ bool HasFP16 = false;
+
/// Processor has PKU extenstions
bool HasPKU = false;
@@ -425,6 +427,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor supports User Level Interrupt instructions
bool HasUINTR = false;
+ /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
+ /// function is GPR only)
+ bool HasCRC32 = false;
+
/// Processor has a single uop BEXTR implementation.
bool HasFastBEXTR = false;
@@ -469,6 +475,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// loads from being used maliciously.
bool UseLVILoadHardening = false;
+ /// Use an instruction sequence for taking the address of a global that allows
+ /// a memory tag in the upper address bits.
+ bool AllowTaggedGlobals = false;
+
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@@ -495,6 +505,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Indicates target prefers AVX512 mask registers.
bool PreferMaskRegisters = false;
+ /// Use Silvermont specific arithmetic costs.
+ bool UseSLMArithCosts = false;
+
/// Use Goldmont specific floating point div/sqrt costs.
bool UseGLMDivSqrtCosts = false;
@@ -742,6 +755,7 @@ public:
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
+ bool hasFP16() const { return HasFP16; }
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
bool hasBF16() const { return HasBF16; }
@@ -763,6 +777,7 @@ public:
bool hasSERIALIZE() const { return HasSERIALIZE; }
bool hasTSXLDTRK() const { return HasTSXLDTRK; }
bool hasUINTR() const { return HasUINTR; }
+ bool hasCRC32() const { return HasCRC32; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;
@@ -784,8 +799,10 @@ public:
}
bool preferMaskRegisters() const { return PreferMaskRegisters; }
+ bool useSLMArithCosts() const { return UseSLMArithCosts; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+ bool allowTaggedGlobals() const { return AllowTaggedGlobals; }
bool useLVILoadHardening() const { return UseLVILoadHardening; }
bool useSpeculativeExecutionSideEffectSuppression() const {
return UseSpeculativeExecutionSideEffectSuppression;
@@ -819,7 +836,6 @@ public:
/// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
- bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
bool useAA() const override { return UseAA; }
@@ -933,6 +949,31 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
+ /// Return whether FrameLowering should always set the "extended frame
+ /// present" bit in FP, or set it based on a symbol in the runtime.
+ bool swiftAsyncContextIsDynamicallySet() const {
+ // Older OS versions (particularly system unwinders) are confused by the
+ // Swift extended frame, so when building code that might be run on them we
+ // must dynamically query the concurrency library to determine whether
+ // extended frames should be flagged as present.
+ const Triple &TT = getTargetTriple();
+
+ unsigned Major, Minor, Micro;
+ TT.getOSVersion(Major, Minor, Micro);
+ switch(TT.getOS()) {
+ default:
+ return false;
+ case Triple::IOS:
+ case Triple::TvOS:
+ return Major < 15;
+ case Triple::WatchOS:
+ return Major < 8;
+ case Triple::MacOSX:
+ case Triple::Darwin:
+ return Major < 12;
+ }
+ }
+
/// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index ee8cff3e008b..336985f3bf9d 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -39,11 +39,11 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/CFGuard.h"
@@ -503,7 +503,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86SpeculativeLoadHardeningPass());
addPass(createX86FlagsCopyLoweringPass());
- addPass(createX86WinAllocaExpander());
+ addPass(createX86DynAllocaExpander());
if (getOptLevel() != CodeGenOpt::None) {
addPass(createX86PreTileConfigPass());
@@ -585,6 +585,9 @@ void X86PassConfig::addPreEmitPass2() {
addPass(createEHContGuardCatchretPass());
}
addPass(createX86LoadValueInjectionRetHardeningPass());
+
+ // Insert pseudo probe annotation for callsite profiling
+ addPass(createPseudoProbeInserter());
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 971c430d73b1..06dacb638d16 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -206,6 +206,87 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ISD == ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
+ LT.second.getScalarType() == MVT::i32) {
+ // Check if the operands can be represented as a smaller datatype.
+ bool Op1Signed = false, Op2Signed = false;
+ unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
+ unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
+ unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
+
+ // If both are representable as i15 and at least one is constant,
+ // zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
+ // can treat this as PMADDWD which has the same costs as a vXi16 multiply.
+ if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
+ bool Op1Constant =
+ isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
+ bool Op2Constant =
+ isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
+ bool Op1Sext = isa<SExtInst>(Args[0]) &&
+ (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->hasSSE41()));
+ bool Op2Sext = isa<SExtInst>(Args[1]) &&
+ (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->hasSSE41()));
+
+ bool IsZeroExtended = !Op1Signed || !Op2Signed;
+ bool IsConstant = Op1Constant || Op2Constant;
+ bool IsSext = Op1Sext || Op2Sext;
+ if (IsConstant || IsZeroExtended || IsSext)
+ LT.second =
+ MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
+ }
+ }
+
+ if ((ISD == ISD::MUL || ISD == ISD::SDIV || ISD == ISD::SREM ||
+ ISD == ISD::UDIV || ISD == ISD::UREM) &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // Vector multiply by pow2 will be simplified to shifts.
+ if (ISD == ISD::MUL) {
+ InstructionCost Cost = getArithmeticInstrCost(
+ Instruction::Shl, Ty, CostKind, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
+ return Cost;
+ }
+
+ if (ISD == ISD::SDIV || ISD == ISD::SREM) {
+ // On X86, vector signed division by constants power-of-two are
+ // normally expanded to the sequence SRA + SRL + ADD + SRA.
+ // The OperandValue properties may not be the same as that of the previous
+ // operation; conservatively assume OP_None.
+ InstructionCost Cost =
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+
+ if (ISD == ISD::SREM) {
+ // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
+ Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
+ Op2Info);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
+ Op2Info);
+ }
+
+ return Cost;
+ }
+
+ // Vector unsigned division/remainder will be simplified to shifts/masks.
+ if (ISD == ISD::UDIV)
+ return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ // UREM
+ return getArithmeticInstrCost(Instruction::And, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ }
+
static const CostTblEntry GLMCostTable[] = {
{ ISD::FDIV, MVT::f32, 18 }, // divss
{ ISD::FDIV, MVT::v4f32, 35 }, // divps
@@ -241,9 +322,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::v2i64, 4 },
};
- if (ST->isSLM()) {
+ if (ST->useSLMArithCosts()) {
if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
// Check if the operands can be shrinked into a smaller datatype.
+ // TODO: Merge this into generiic vXi32 MUL patterns above.
bool Op1Signed = false;
unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
bool Op2Signed = false;
@@ -268,54 +350,6 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
}
}
- if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
- ISD == ISD::UREM) &&
- (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
- Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
- Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
- if (ISD == ISD::SDIV || ISD == ISD::SREM) {
- // On X86, vector signed division by constants power-of-two are
- // normally expanded to the sequence SRA + SRL + ADD + SRA.
- // The OperandValue properties may not be the same as that of the previous
- // operation; conservatively assume OP_None.
- InstructionCost Cost =
- 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
- Op2Info, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
- Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
- Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
-
- if (ISD == ISD::SREM) {
- // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
- Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
- Op2Info);
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
- Op2Info);
- }
-
- return Cost;
- }
-
- // Vector unsigned division/remainder will be simplified to shifts/masks.
- if (ISD == ISD::UDIV)
- return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
- Op1Info, Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
-
- else // UREM
- return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
- Op1Info, Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- }
-
static const CostTblEntry AVX512BWUniformConstCostTable[] = {
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
@@ -1005,6 +1039,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ADD, MVT::i64, 1 }, // Core (Merom) from http://www.agner.org/
{ ISD::SUB, MVT::i64, 1 }, // Core (Merom) from http://www.agner.org/
+ { ISD::MUL, MVT::i64, 2 }, // Nehalem from http://www.agner.org/
};
if (ST->is64Bit())
@@ -1121,6 +1156,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
return SubLT.first;
}
+
+ // If the insertion isn't aligned, treat it like a 2-op shuffle.
+ Kind = TTI::SK_PermuteTwoSrc;
}
// Handle some common (illegal) sub-vector types as they are often very cheap
@@ -1196,6 +1234,29 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.first = NumOfDests * NumOfShufflesPerDest;
}
+ static const CostTblEntry AVX512FP16ShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v8f16, 1}, // vpbroadcastw
+
+ {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
+ {TTI::SK_Reverse, MVT::v16f16, 2}, // vpermw
+ {TTI::SK_Reverse, MVT::v8f16, 1}, // vpshufb
+
+ {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // vpshufb
+
+ {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, 2} // vpermt2w
+ };
+
+ if (!ST->useSoftFloat() && ST->hasFP16())
+ if (const auto *Entry =
+ CostTableLookup(AVX512FP16ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
{TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
@@ -1533,6 +1594,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
// Mask zero extend is a sext + shift.
@@ -1546,6 +1608,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
@@ -1557,12 +1620,14 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // vpmovwb
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // vpmovwb
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
};
@@ -1606,17 +1671,26 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // vpmovdb
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 2 }, // vpmovdb
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, // vpmovdb
- { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, // vpmovdb
+ { ISD::TRUNCATE, MVT::v32i8, MVT::v16i32, 2 }, // vpmovdb
+ { ISD::TRUNCATE, MVT::v64i8, MVT::v16i32, 2 }, // vpmovdb
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, // vpmovdw
+ { ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, 2 }, // vpmovdw
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 2 }, // vpmovqb
{ ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, // vpshufb
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, // vpmovqb
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v8i64, 2 }, // vpmovqb
+ { ISD::TRUNCATE, MVT::v32i8, MVT::v8i64, 2 }, // vpmovqb
+ { ISD::TRUNCATE, MVT::v64i8, MVT::v8i64, 2 }, // vpmovqb
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 }, // vpmovqw
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v8i64, 2 }, // vpmovqw
+ { ISD::TRUNCATE, MVT::v32i16, MVT::v8i64, 2 }, // vpmovqw
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, // vpmovqd
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // zmm vpmovqd
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 },// 2*vpmovqd+concat+vpmovdb
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, // extend to v16i32
{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 },
+ { ISD::TRUNCATE, MVT::v64i8, MVT::v32i16, 8 },
// Sign extend is zmm vpternlogd+vptruncdb.
// Zero extend is zmm broadcast load+vptruncdw.
@@ -1889,6 +1963,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 4 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 1 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 1 },
@@ -1964,6 +2040,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb
{ ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 5 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
@@ -2365,13 +2443,21 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
assert(ISD && "Invalid opcode");
unsigned ExtraCost = 0;
- if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
// Some vector comparison predicates cost extra instructions.
+ // TODO: Should we invert this and assume worst case cmp costs
+ // and reduce for particular predicates?
if (MTy.isVector() &&
!((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
(ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
ST->hasBWI())) {
- switch (cast<CmpInst>(I)->getPredicate()) {
+ // Fallback to I if a specific predicate wasn't specified.
+ CmpInst::Predicate Pred = VecPred;
+ if (I && (Pred == CmpInst::BAD_ICMP_PREDICATE ||
+ Pred == CmpInst::BAD_FCMP_PREDICATE))
+ Pred = cast<CmpInst>(I)->getPredicate();
+
+ switch (Pred) {
case CmpInst::Predicate::ICMP_NE:
// xor(cmpeq(x,y),-1)
ExtraCost = 1;
@@ -2399,6 +2485,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
ExtraCost = 3;
}
break;
+ case CmpInst::Predicate::BAD_ICMP_PREDICATE:
+ case CmpInst::Predicate::BAD_FCMP_PREDICATE:
+ // Assume worst case scenario and add the maximum extra cost.
+ ExtraCost = 3;
+ break;
default:
break;
}
@@ -2502,7 +2593,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
};
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
return LT.first * (ExtraCost + Entry->Cost);
@@ -2556,6 +2647,22 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
// specialized in these tables yet.
+ static const CostTblEntry AVX512BITALGCostTbl[] = {
+ { ISD::CTPOP, MVT::v32i16, 1 },
+ { ISD::CTPOP, MVT::v64i8, 1 },
+ { ISD::CTPOP, MVT::v16i16, 1 },
+ { ISD::CTPOP, MVT::v32i8, 1 },
+ { ISD::CTPOP, MVT::v8i16, 1 },
+ { ISD::CTPOP, MVT::v16i8, 1 },
+ };
+ static const CostTblEntry AVX512VPOPCNTDQCostTbl[] = {
+ { ISD::CTPOP, MVT::v8i64, 1 },
+ { ISD::CTPOP, MVT::v16i32, 1 },
+ { ISD::CTPOP, MVT::v4i64, 1 },
+ { ISD::CTPOP, MVT::v8i32, 1 },
+ { ISD::CTPOP, MVT::v2i64, 1 },
+ { ISD::CTPOP, MVT::v4i32, 1 },
+ };
static const CostTblEntry AVX512CDCostTbl[] = {
{ ISD::CTLZ, MVT::v8i64, 1 },
{ ISD::CTLZ, MVT::v16i32, 1 },
@@ -2573,10 +2680,10 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostTblEntry AVX512BWCostTbl[] = {
{ ISD::ABS, MVT::v32i16, 1 },
{ ISD::ABS, MVT::v64i8, 1 },
- { ISD::BITREVERSE, MVT::v8i64, 5 },
- { ISD::BITREVERSE, MVT::v16i32, 5 },
- { ISD::BITREVERSE, MVT::v32i16, 5 },
- { ISD::BITREVERSE, MVT::v64i8, 5 },
+ { ISD::BITREVERSE, MVT::v8i64, 3 },
+ { ISD::BITREVERSE, MVT::v16i32, 3 },
+ { ISD::BITREVERSE, MVT::v32i16, 3 },
+ { ISD::BITREVERSE, MVT::v64i8, 2 },
{ ISD::BSWAP, MVT::v8i64, 1 },
{ ISD::BSWAP, MVT::v16i32, 1 },
{ ISD::BSWAP, MVT::v32i16, 1 },
@@ -2612,8 +2719,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::ABS, MVT::v8i64, 1 },
{ ISD::ABS, MVT::v16i32, 1 },
- { ISD::ABS, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::ABS, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::ABS, MVT::v32i16, 2 },
+ { ISD::ABS, MVT::v64i8, 2 },
{ ISD::ABS, MVT::v4i64, 1 },
{ ISD::ABS, MVT::v2i64, 1 },
{ ISD::BITREVERSE, MVT::v8i64, 36 },
@@ -2637,26 +2744,26 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTTZ, MVT::v64i8, 18 },
{ ISD::SMAX, MVT::v8i64, 1 },
{ ISD::SMAX, MVT::v16i32, 1 },
- { ISD::SMAX, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMAX, MVT::v32i16, 2 },
+ { ISD::SMAX, MVT::v64i8, 2 },
{ ISD::SMAX, MVT::v4i64, 1 },
{ ISD::SMAX, MVT::v2i64, 1 },
{ ISD::SMIN, MVT::v8i64, 1 },
{ ISD::SMIN, MVT::v16i32, 1 },
- { ISD::SMIN, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMIN, MVT::v32i16, 2 },
+ { ISD::SMIN, MVT::v64i8, 2 },
{ ISD::SMIN, MVT::v4i64, 1 },
{ ISD::SMIN, MVT::v2i64, 1 },
{ ISD::UMAX, MVT::v8i64, 1 },
{ ISD::UMAX, MVT::v16i32, 1 },
- { ISD::UMAX, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::UMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMAX, MVT::v32i16, 2 },
+ { ISD::UMAX, MVT::v64i8, 2 },
{ ISD::UMAX, MVT::v4i64, 1 },
{ ISD::UMAX, MVT::v2i64, 1 },
{ ISD::UMIN, MVT::v8i64, 1 },
{ ISD::UMIN, MVT::v16i32, 1 },
- { ISD::UMIN, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::UMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMIN, MVT::v32i16, 2 },
+ { ISD::UMIN, MVT::v64i8, 2 },
{ ISD::UMIN, MVT::v4i64, 1 },
{ ISD::UMIN, MVT::v2i64, 1 },
{ ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
@@ -2667,14 +2774,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq
{ ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq
{ ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq
- { ISD::SADDSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SADDSAT, MVT::v64i8, 2 }, // FIXME: include split
- { ISD::SSUBSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SSUBSAT, MVT::v64i8, 2 }, // FIXME: include split
- { ISD::UADDSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::UADDSAT, MVT::v64i8, 2 }, // FIXME: include split
- { ISD::USUBSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::USUBSAT, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SADDSAT, MVT::v32i16, 2 },
+ { ISD::SADDSAT, MVT::v64i8, 2 },
+ { ISD::SSUBSAT, MVT::v32i16, 2 },
+ { ISD::SSUBSAT, MVT::v64i8, 2 },
+ { ISD::UADDSAT, MVT::v32i16, 2 },
+ { ISD::UADDSAT, MVT::v64i8, 2 },
+ { ISD::USUBSAT, MVT::v32i16, 2 },
+ { ISD::USUBSAT, MVT::v64i8, 2 },
{ ISD::FMAXNUM, MVT::f32, 2 },
{ ISD::FMAXNUM, MVT::v4f32, 2 },
{ ISD::FMAXNUM, MVT::v8f32, 2 },
@@ -2703,25 +2810,41 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::v8i32, 1 },
{ ISD::ABS, MVT::v16i16, 1 },
{ ISD::ABS, MVT::v32i8, 1 },
- { ISD::BITREVERSE, MVT::v4i64, 5 },
- { ISD::BITREVERSE, MVT::v8i32, 5 },
- { ISD::BITREVERSE, MVT::v16i16, 5 },
- { ISD::BITREVERSE, MVT::v32i8, 5 },
+ { ISD::BITREVERSE, MVT::v2i64, 3 },
+ { ISD::BITREVERSE, MVT::v4i64, 3 },
+ { ISD::BITREVERSE, MVT::v4i32, 3 },
+ { ISD::BITREVERSE, MVT::v8i32, 3 },
+ { ISD::BITREVERSE, MVT::v8i16, 3 },
+ { ISD::BITREVERSE, MVT::v16i16, 3 },
+ { ISD::BITREVERSE, MVT::v16i8, 3 },
+ { ISD::BITREVERSE, MVT::v32i8, 3 },
{ ISD::BSWAP, MVT::v4i64, 1 },
{ ISD::BSWAP, MVT::v8i32, 1 },
{ ISD::BSWAP, MVT::v16i16, 1 },
- { ISD::CTLZ, MVT::v4i64, 23 },
- { ISD::CTLZ, MVT::v8i32, 18 },
- { ISD::CTLZ, MVT::v16i16, 14 },
- { ISD::CTLZ, MVT::v32i8, 9 },
- { ISD::CTPOP, MVT::v4i64, 7 },
- { ISD::CTPOP, MVT::v8i32, 11 },
- { ISD::CTPOP, MVT::v16i16, 9 },
- { ISD::CTPOP, MVT::v32i8, 6 },
- { ISD::CTTZ, MVT::v4i64, 10 },
- { ISD::CTTZ, MVT::v8i32, 14 },
- { ISD::CTTZ, MVT::v16i16, 12 },
- { ISD::CTTZ, MVT::v32i8, 9 },
+ { ISD::CTLZ, MVT::v2i64, 7 },
+ { ISD::CTLZ, MVT::v4i64, 7 },
+ { ISD::CTLZ, MVT::v4i32, 5 },
+ { ISD::CTLZ, MVT::v8i32, 5 },
+ { ISD::CTLZ, MVT::v8i16, 4 },
+ { ISD::CTLZ, MVT::v16i16, 4 },
+ { ISD::CTLZ, MVT::v16i8, 3 },
+ { ISD::CTLZ, MVT::v32i8, 3 },
+ { ISD::CTPOP, MVT::v2i64, 3 },
+ { ISD::CTPOP, MVT::v4i64, 3 },
+ { ISD::CTPOP, MVT::v4i32, 7 },
+ { ISD::CTPOP, MVT::v8i32, 7 },
+ { ISD::CTPOP, MVT::v8i16, 3 },
+ { ISD::CTPOP, MVT::v16i16, 3 },
+ { ISD::CTPOP, MVT::v16i8, 2 },
+ { ISD::CTPOP, MVT::v32i8, 2 },
+ { ISD::CTTZ, MVT::v2i64, 4 },
+ { ISD::CTTZ, MVT::v4i64, 4 },
+ { ISD::CTTZ, MVT::v4i32, 7 },
+ { ISD::CTTZ, MVT::v8i32, 7 },
+ { ISD::CTTZ, MVT::v8i16, 4 },
+ { ISD::CTTZ, MVT::v16i16, 4 },
+ { ISD::CTTZ, MVT::v16i8, 3 },
+ { ISD::CTTZ, MVT::v32i8, 3 },
{ ISD::SADDSAT, MVT::v16i16, 1 },
{ ISD::SADDSAT, MVT::v32i8, 1 },
{ ISD::SMAX, MVT::v8i32, 1 },
@@ -3093,10 +3216,18 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+ if (ST->hasBITALG())
+ if (const auto *Entry = CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+
+ if (ST->hasVPOPCNTDQ())
+ if (const auto *Entry = CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+
if (ST->hasCDI())
if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
@@ -3179,8 +3310,6 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
}
- // TODO - add BMI (TZCNT) scalar handling
-
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
@@ -3312,7 +3441,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index == -1U && (Opcode == Instruction::ExtractElement ||
Opcode == Instruction::InsertElement)) {
// TODO: On some SSE41+ targets, we expand to cmp+splat+select patterns:
- // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
+ // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
// TODO: Move this to BasicTTIImpl.h? We'd need better gep + index handling.
assert(isa<FixedVectorType>(Val) && "Fixed vector type expected");
@@ -3378,7 +3507,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Unexpected vector opcode");
MVT MScalarTy = LT.second.getScalarType();
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
return Entry->Cost + RegisterFileMoveCost;
@@ -3505,6 +3634,112 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
return Cost;
}
+InstructionCost
+X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF, const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy);
+ // We don't differentiate element types here, only element bit width.
+ EltTy = IntegerType::getIntNTy(EltTy->getContext(), EltTyBits);
+
+ auto bailout = [&]() {
+ return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ };
+
+ // For now, only deal with AVX512 cases.
+ if (!ST->hasAVX512())
+ return bailout();
+
+ // Do we have a native shuffle for this element type, or should we promote?
+ unsigned PromEltTyBits = EltTyBits;
+ switch (EltTyBits) {
+ case 32:
+ case 64:
+ break; // AVX512F.
+ case 16:
+ if (!ST->hasBWI())
+ PromEltTyBits = 32; // promote to i32, AVX512F.
+ break; // AVX512BW
+ case 8:
+ if (!ST->hasVBMI())
+ PromEltTyBits = 32; // promote to i32, AVX512F.
+ break; // AVX512VBMI
+ case 1:
+ // There is no support for shuffling i1 elements. We *must* promote.
+ if (ST->hasBWI()) {
+ if (ST->hasVBMI())
+ PromEltTyBits = 8; // promote to i8, AVX512VBMI.
+ else
+ PromEltTyBits = 16; // promote to i16, AVX512BW.
+ break;
+ }
+ return bailout();
+ default:
+ return bailout();
+ }
+ auto *PromEltTy = IntegerType::getIntNTy(EltTy->getContext(), PromEltTyBits);
+
+ auto *SrcVecTy = FixedVectorType::get(EltTy, VF);
+ auto *PromSrcVecTy = FixedVectorType::get(PromEltTy, VF);
+
+ int NumDstElements = VF * ReplicationFactor;
+ auto *PromDstVecTy = FixedVectorType::get(PromEltTy, NumDstElements);
+ auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
+
+ // Legalize the types.
+ MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second;
+ MVT LegalPromSrcVecTy = TLI->getTypeLegalizationCost(DL, PromSrcVecTy).second;
+ MVT LegalPromDstVecTy = TLI->getTypeLegalizationCost(DL, PromDstVecTy).second;
+ MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second;
+ // They should have legalized into vector types.
+ if (!LegalSrcVecTy.isVector() || !LegalPromSrcVecTy.isVector() ||
+ !LegalPromDstVecTy.isVector() || !LegalDstVecTy.isVector())
+ return bailout();
+
+ if (PromEltTyBits != EltTyBits) {
+ // If we have to perform the shuffle with wider elt type than our data type,
+ // then we will first need to anyext (we don't care about the new bits)
+ // the source elements, and then truncate Dst elements.
+ InstructionCost PromotionCost;
+ PromotionCost += getCastInstrCost(
+ Instruction::SExt, /*Dst=*/PromSrcVecTy, /*Src=*/SrcVecTy,
+ TargetTransformInfo::CastContextHint::None, CostKind);
+ PromotionCost +=
+ getCastInstrCost(Instruction::Trunc, /*Dst=*/DstVecTy,
+ /*Src=*/PromDstVecTy,
+ TargetTransformInfo::CastContextHint::None, CostKind);
+ return PromotionCost + getReplicationShuffleCost(PromEltTy,
+ ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ }
+
+ assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits &&
+ LegalSrcVecTy.getScalarType() == LegalDstVecTy.getScalarType() &&
+ "We expect that the legalization doesn't affect the element width, "
+ "doesn't coalesce/split elements.");
+
+ unsigned NumEltsPerDstVec = LegalDstVecTy.getVectorNumElements();
+ unsigned NumDstVectors =
+ divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
+
+ auto *SingleDstVecTy = FixedVectorType::get(EltTy, NumEltsPerDstVec);
+
+ // Not all the produced Dst elements may be demanded. In our case,
+ // given that a single Dst vector is formed by a single shuffle,
+ // if all elements that will form a single Dst vector aren't demanded,
+ // then we won't need to do that shuffle, so adjust the cost accordingly.
+ APInt DemandedDstVectors = APIntOps::ScaleBitMask(
+ DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec),
+ NumDstVectors);
+ unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
+
+ InstructionCost SingleShuffleCost =
+ getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy,
+ /*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr);
+ return NumDstVectorsDemanded * SingleShuffleCost;
+}
+
InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
@@ -3677,7 +3912,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
if ((IsLoad && !isLegalMaskedLoad(SrcVTy, Alignment)) ||
(IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) {
// Scalarization
- APInt DemandedElts = APInt::getAllOnesValue(NumElem);
+ APInt DemandedElts = APInt::getAllOnes(NumElem);
InstructionCost MaskSplitCost =
getScalarizationOverhead(MaskTy, DemandedElts, false, true);
InstructionCost ScalarCompareCost = getCmpSelInstrCost(
@@ -3795,7 +4030,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
EVT VT = TLI->getValueType(DL, ValTy);
if (VT.isSimple()) {
MVT MTy = VT.getSimpleVT();
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
return Entry->Cost;
@@ -3834,7 +4069,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
ArithmeticCost *= LT.first - 1;
}
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
return ArithmeticCost + Entry->Cost;
@@ -4589,16 +4824,17 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
bool VariableMask, Align Alignment,
unsigned AddressSpace) {
+ Type *ScalarTy = SrcVTy->getScalarType();
unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(VF);
+ APInt DemandedElts = APInt::getAllOnes(VF);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost MaskUnpackCost = 0;
if (VariableMask) {
auto *MaskTy =
FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
- MaskUnpackCost =
- getScalarizationOverhead(MaskTy, DemandedElts, false, true);
+ MaskUnpackCost = getScalarizationOverhead(
+ MaskTy, DemandedElts, /*Insert=*/false, /*Extract=*/true);
InstructionCost ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
@@ -4606,24 +4842,23 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
}
+ InstructionCost AddressUnpackCost = getScalarizationOverhead(
+ FixedVectorType::get(ScalarTy->getPointerTo(), VF), DemandedElts,
+ /*Insert=*/false, /*Extract=*/true);
+
// The cost of the scalar loads/stores.
InstructionCost MemoryOpCost =
- VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- MaybeAlign(Alignment), AddressSpace, CostKind);
-
- InstructionCost InsertExtractCost = 0;
- if (Opcode == Instruction::Load)
- for (unsigned i = 0; i < VF; ++i)
- // Add the cost of inserting each scalar load into the vector
- InsertExtractCost +=
- getVectorInstrCost(Instruction::InsertElement, SrcVTy, i);
- else
- for (unsigned i = 0; i < VF; ++i)
- // Add the cost of extracting each element out of the data vector
- InsertExtractCost +=
- getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i);
+ VF * getMemoryOpCost(Opcode, ScalarTy, MaybeAlign(Alignment),
+ AddressSpace, CostKind);
- return MemoryOpCost + MaskUnpackCost + InsertExtractCost;
+ // The cost of forming the vector from loaded scalars/
+ // scalarizing the vector to perform scalar stores.
+ InstructionCost InsertExtractCost =
+ getScalarizationOverhead(cast<FixedVectorType>(SrcVTy), DemandedElts,
+ /*Insert=*/Opcode == Instruction::Load,
+ /*Extract=*/Opcode == Instruction::Store);
+
+ return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
}
/// Calculate the cost of Gather / Scatter operation
@@ -4690,6 +4925,9 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
return true;
+ if (ScalarTy->isHalfTy() && ST->hasBWI() && ST->hasFP16())
+ return true;
+
if (!ScalarTy->isIntegerTy())
return false;
@@ -4732,7 +4970,7 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
// loads require AVX2).
if (DataSize == 32)
return ST->hasAVX();
- else if (DataSize == 16)
+ if (DataSize == 16)
return ST->hasSSE1();
return true;
}
@@ -4765,11 +5003,15 @@ bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
return isLegalMaskedExpandLoad(DataTy);
}
-bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
+bool X86TTIImpl::supportsGather() const {
// Some CPUs have better gather performance than others.
// TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
// enable gather with a -march.
- if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())))
+ return ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2());
+}
+
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
+ if (!supportsGather())
return false;
// This function is called now in two cases: from the Loop Vectorizer
@@ -4893,6 +5135,14 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+bool X86TTIImpl::prefersVectorizedAddressing() const {
+ return supportsGather();
+}
+
+bool X86TTIImpl::supportsEfficientVectorElementLoadStore() const {
+ return false;
+}
+
bool X86TTIImpl::enableInterleavedAccessVectorization() {
// TODO: We expect this to be beneficial regardless of arch,
// but there are currently some unexplained performance artifacts on Atom.
@@ -4900,122 +5150,6 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
return !(ST->isAtom());
}
-// Get estimation for interleaved load/store operations for AVX2.
-// \p Factor is the interleaved-access factor (stride) - number of
-// (interleaved) elements in the group.
-// \p Indices contains the indices for a strided load: when the
-// interleaved load has gaps they indicate which elements are used.
-// If Indices is empty (or if the number of indices is equal to the size
-// of the interleaved-access as given in \p Factor) the access has no gaps.
-//
-// As opposed to AVX-512, AVX2 does not have generic shuffles that allow
-// computing the cost using a generic formula as a function of generic
-// shuffles. We therefore use a lookup table instead, filled according to
-// the instruction sequences that codegen currently generates.
-InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
- unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
-
- if (UseMaskForCond || UseMaskForGaps)
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind,
- UseMaskForCond, UseMaskForGaps);
-
- // We currently Support only fully-interleaved groups, with no gaps.
- // TODO: Support also strided loads (interleaved-groups with gaps).
- if (Indices.size() && Indices.size() != Factor)
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-
- // VecTy for interleave memop is <VF*Factor x Elt>.
- // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
- // VecTy = <12 x i32>.
- MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
-
- // This function can be called with VecTy=<6xi128>, Factor=3, in which case
- // the VF=2, while v2i128 is an unsupported MVT vector type
- // (see MachineValueType.h::getVectorVT()).
- if (!LegalVT.isVector())
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-
- unsigned VF = VecTy->getNumElements() / Factor;
- Type *ScalarTy = VecTy->getElementType();
- // Deduplicate entries, model floats/pointers as appropriately-sized integers.
- if (!ScalarTy->isIntegerTy())
- ScalarTy =
- Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
-
- // Get the cost of all the memory operations.
- InstructionCost MemOpCosts = getMemoryOpCost(
- Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
-
- auto *VT = FixedVectorType::get(ScalarTy, VF);
- EVT ETy = TLI->getValueType(DL, VT);
- if (!ETy.isSimple())
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-
- // TODO: Complete for other data-types and strides.
- // Each combination of Stride, element bit width and VF results in a different
- // sequence; The cost tables are therefore accessed with:
- // Factor (stride) and VectorType=VFxiN.
- // The Cost accounts only for the shuffle sequence;
- // The cost of the loads/stores is accounted for separately.
- //
- static const CostTblEntry AVX2InterleavedLoadTbl[] = {
- {2, MVT::v4i64, 6}, // (load 8i64 and) deinterleave into 2 x 4i64
-
- {3, MVT::v2i8, 10}, // (load 6i8 and) deinterleave into 3 x 2i8
- {3, MVT::v4i8, 4}, // (load 12i8 and) deinterleave into 3 x 4i8
- {3, MVT::v8i8, 9}, // (load 24i8 and) deinterleave into 3 x 8i8
- {3, MVT::v16i8, 11}, // (load 48i8 and) deinterleave into 3 x 16i8
- {3, MVT::v32i8, 13}, // (load 96i8 and) deinterleave into 3 x 32i8
-
- {3, MVT::v8i32, 17}, // (load 24i32 and) deinterleave into 3 x 8i32
-
- {4, MVT::v2i8, 12}, // (load 8i8 and) deinterleave into 4 x 2i8
- {4, MVT::v4i8, 4}, // (load 16i8 and) deinterleave into 4 x 4i8
- {4, MVT::v8i8, 20}, // (load 32i8 and) deinterleave into 4 x 8i8
- {4, MVT::v16i8, 39}, // (load 64i8 and) deinterleave into 4 x 16i8
- {4, MVT::v32i8, 80}, // (load 128i8 and) deinterleave into 4 x 32i8
-
- {8, MVT::v8i32, 40} // (load 64i32 and) deinterleave into 8 x 8i32
- };
-
- static const CostTblEntry AVX2InterleavedStoreTbl[] = {
- {2, MVT::v4i64, 6}, // interleave 2 x 4i64 into 8i64 (and store)
-
- {3, MVT::v2i8, 7}, // interleave 3 x 2i8 into 6i8 (and store)
- {3, MVT::v4i8, 8}, // interleave 3 x 4i8 into 12i8 (and store)
- {3, MVT::v8i8, 11}, // interleave 3 x 8i8 into 24i8 (and store)
- {3, MVT::v16i8, 11}, // interleave 3 x 16i8 into 48i8 (and store)
- {3, MVT::v32i8, 13}, // interleave 3 x 32i8 into 96i8 (and store)
-
- {4, MVT::v2i8, 12}, // interleave 4 x 2i8 into 8i8 (and store)
- {4, MVT::v4i8, 9}, // interleave 4 x 4i8 into 16i8 (and store)
- {4, MVT::v8i8, 10}, // interleave 4 x 8i8 into 32i8 (and store)
- {4, MVT::v16i8, 10}, // interleave 4 x 16i8 into 64i8 (and store)
- {4, MVT::v32i8, 12} // interleave 4 x 32i8 into 128i8 (and store)
- };
-
- if (Opcode == Instruction::Load) {
- if (const auto *Entry =
- CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT()))
- return MemOpCosts + Entry->Cost;
- } else {
- assert(Opcode == Instruction::Store &&
- "Expected Store Instruction at this point");
- if (const auto *Entry =
- CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT()))
- return MemOpCosts + Entry->Cost;
- }
-
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-}
-
// Get estimation for interleaved load/store operations and strided load.
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
@@ -5024,12 +5158,6 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
-
- if (UseMaskForCond || UseMaskForGaps)
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind,
- UseMaskForCond, UseMaskForGaps);
-
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
// VecTy = <12 x i32>.
@@ -5044,12 +5172,46 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// Get the cost of one memory operation.
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
- InstructionCost MemOpCost = getMemoryOpCost(
- Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
+ InstructionCost MemOpCost;
+ if (UseMaskForCond || UseMaskForGaps)
+ MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment,
+ AddressSpace, CostKind);
+ else
+ MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment),
+ AddressSpace, CostKind);
unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
+ // FIXME: this is the most conservative estimate for the mask cost.
+ InstructionCost MaskCost;
+ if (UseMaskForCond || UseMaskForGaps) {
+ APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
+ for (unsigned Index : Indices) {
+ assert(Index < Factor && "Invalid index for interleaved memory op");
+ for (unsigned Elm = 0; Elm < VF; Elm++)
+ DemandedLoadStoreElts.setBit(Index + Elm * Factor);
+ }
+
+ Type *I8Type = Type::getInt8Ty(VecTy->getContext());
+
+ MaskCost = getReplicationShuffleCost(
+ I8Type, Factor, VF,
+ UseMaskForGaps ? DemandedLoadStoreElts
+ : APInt::getAllOnes(VecTy->getNumElements()),
+ CostKind);
+
+ // The Gaps mask is invariant and created outside the loop, therefore the
+ // cost of creating it is not accounted for here. However if we have both
+ // a MaskForGaps and some other mask that guards the execution of the
+ // memory access, we need to account for the cost of And-ing the two masks
+ // inside the loop.
+ if (UseMaskForGaps) {
+ auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements());
+ MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
+ }
+ }
+
if (Opcode == Instruction::Load) {
// The tables (AVX512InterleavedLoadTbl and AVX512InterleavedStoreTbl)
// contain the cost of the optimized shuffle sequence that the
@@ -5065,7 +5227,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
if (const auto *Entry =
CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT))
- return NumOfMemOps * MemOpCost + Entry->Cost;
+ return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
//If an entry does not exist, fallback to the default implementation.
// Kind of shuffle depends on number of loaded values.
@@ -5102,7 +5264,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
- NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
+ MaskCost + NumOfUnfoldedLoads * MemOpCost +
+ NumOfMoves;
return Cost;
}
@@ -5124,7 +5287,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
if (const auto *Entry =
CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT))
- return NumOfMemOps * MemOpCost + Entry->Cost;
+ return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
//If an entry does not exist, fallback to the default implementation.
// There is no strided stores meanwhile. And store can't be folded in
@@ -5138,33 +5301,321 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// We need additional instructions to keep sources.
unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
InstructionCost Cost =
+ MaskCost +
NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
NumOfMoves;
return Cost;
}
InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Opcode, Type *BaseTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
- auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
+ auto *VecTy = cast<FixedVectorType>(BaseTy);
+
+ auto isSupportedOnAVX512 = [&](Type *VecTy, bool HasBW) {
Type *EltTy = cast<VectorType>(VecTy)->getElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
EltTy->isIntegerTy(32) || EltTy->isPointerTy())
return true;
- if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8))
+ if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) ||
+ (!ST->useSoftFloat() && ST->hasFP16() && EltTy->isHalfTy()))
return HasBW;
return false;
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(
- Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment,
- AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
- if (ST->hasAVX2())
- return getInterleavedMemoryOpCostAVX2(
- Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment,
+ Opcode, VecTy, Factor, Indices, Alignment,
AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind,
+ UseMaskForCond, UseMaskForGaps);
+
+ // Get estimation for interleaved load/store operations for SSE-AVX2.
+ // As opposed to AVX-512, SSE-AVX2 do not have generic shuffles that allow
+ // computing the cost using a generic formula as a function of generic
+ // shuffles. We therefore use a lookup table instead, filled according to
+ // the instruction sequences that codegen currently generates.
+
+ // VecTy for interleave memop is <VF*Factor x Elt>.
+ // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
+ // VecTy = <12 x i32>.
+ MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+
+ // This function can be called with VecTy=<6xi128>, Factor=3, in which case
+ // the VF=2, while v2i128 is an unsupported MVT vector type
+ // (see MachineValueType.h::getVectorVT()).
+ if (!LegalVT.isVector())
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind);
+
+ unsigned VF = VecTy->getNumElements() / Factor;
+ Type *ScalarTy = VecTy->getElementType();
+ // Deduplicate entries, model floats/pointers as appropriately-sized integers.
+ if (!ScalarTy->isIntegerTy())
+ ScalarTy =
+ Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
+
+ // Get the cost of all the memory operations.
+ // FIXME: discount dead loads.
+ InstructionCost MemOpCosts = getMemoryOpCost(
+ Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
+
+ auto *VT = FixedVectorType::get(ScalarTy, VF);
+ EVT ETy = TLI->getValueType(DL, VT);
+ if (!ETy.isSimple())
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind);
+
+ // TODO: Complete for other data-types and strides.
+ // Each combination of Stride, element bit width and VF results in a different
+ // sequence; The cost tables are therefore accessed with:
+ // Factor (stride) and VectorType=VFxiN.
+ // The Cost accounts only for the shuffle sequence;
+ // The cost of the loads/stores is accounted for separately.
+ //
+ static const CostTblEntry AVX2InterleavedLoadTbl[] = {
+ {2, MVT::v2i8, 2}, // (load 4i8 and) deinterleave into 2 x 2i8
+ {2, MVT::v4i8, 2}, // (load 8i8 and) deinterleave into 2 x 4i8
+ {2, MVT::v8i8, 2}, // (load 16i8 and) deinterleave into 2 x 8i8
+ {2, MVT::v16i8, 4}, // (load 32i8 and) deinterleave into 2 x 16i8
+ {2, MVT::v32i8, 6}, // (load 64i8 and) deinterleave into 2 x 32i8
+
+ {2, MVT::v8i16, 6}, // (load 16i16 and) deinterleave into 2 x 8i16
+ {2, MVT::v16i16, 9}, // (load 32i16 and) deinterleave into 2 x 16i16
+ {2, MVT::v32i16, 18}, // (load 64i16 and) deinterleave into 2 x 32i16
+
+ {2, MVT::v8i32, 4}, // (load 16i32 and) deinterleave into 2 x 8i32
+ {2, MVT::v16i32, 8}, // (load 32i32 and) deinterleave into 2 x 16i32
+ {2, MVT::v32i32, 16}, // (load 64i32 and) deinterleave into 2 x 32i32
+
+ {2, MVT::v4i64, 4}, // (load 8i64 and) deinterleave into 2 x 4i64
+ {2, MVT::v8i64, 8}, // (load 16i64 and) deinterleave into 2 x 8i64
+ {2, MVT::v16i64, 16}, // (load 32i64 and) deinterleave into 2 x 16i64
+ {2, MVT::v32i64, 32}, // (load 64i64 and) deinterleave into 2 x 32i64
+
+ {3, MVT::v2i8, 3}, // (load 6i8 and) deinterleave into 3 x 2i8
+ {3, MVT::v4i8, 3}, // (load 12i8 and) deinterleave into 3 x 4i8
+ {3, MVT::v8i8, 6}, // (load 24i8 and) deinterleave into 3 x 8i8
+ {3, MVT::v16i8, 11}, // (load 48i8 and) deinterleave into 3 x 16i8
+ {3, MVT::v32i8, 14}, // (load 96i8 and) deinterleave into 3 x 32i8
+
+ {3, MVT::v2i16, 5}, // (load 6i16 and) deinterleave into 3 x 2i16
+ {3, MVT::v4i16, 7}, // (load 12i16 and) deinterleave into 3 x 4i16
+ {3, MVT::v8i16, 9}, // (load 24i16 and) deinterleave into 3 x 8i16
+ {3, MVT::v16i16, 28}, // (load 48i16 and) deinterleave into 3 x 16i16
+ {3, MVT::v32i16, 56}, // (load 96i16 and) deinterleave into 3 x 32i16
+
+ {3, MVT::v2i32, 3}, // (load 6i32 and) deinterleave into 3 x 2i32
+ {3, MVT::v4i32, 3}, // (load 12i32 and) deinterleave into 3 x 4i32
+ {3, MVT::v8i32, 7}, // (load 24i32 and) deinterleave into 3 x 8i32
+ {3, MVT::v16i32, 14}, // (load 48i32 and) deinterleave into 3 x 16i32
+ {3, MVT::v32i32, 32}, // (load 96i32 and) deinterleave into 3 x 32i32
+
+ {3, MVT::v2i64, 1}, // (load 6i64 and) deinterleave into 3 x 2i64
+ {3, MVT::v4i64, 5}, // (load 12i64 and) deinterleave into 3 x 4i64
+ {3, MVT::v8i64, 10}, // (load 24i64 and) deinterleave into 3 x 8i64
+ {3, MVT::v16i64, 20}, // (load 48i64 and) deinterleave into 3 x 16i64
+
+ {4, MVT::v2i8, 4}, // (load 8i8 and) deinterleave into 4 x 2i8
+ {4, MVT::v4i8, 4}, // (load 16i8 and) deinterleave into 4 x 4i8
+ {4, MVT::v8i8, 12}, // (load 32i8 and) deinterleave into 4 x 8i8
+ {4, MVT::v16i8, 24}, // (load 64i8 and) deinterleave into 4 x 16i8
+ {4, MVT::v32i8, 56}, // (load 128i8 and) deinterleave into 4 x 32i8
+
+ {4, MVT::v2i16, 6}, // (load 8i16 and) deinterleave into 4 x 2i16
+ {4, MVT::v4i16, 17}, // (load 16i16 and) deinterleave into 4 x 4i16
+ {4, MVT::v8i16, 33}, // (load 32i16 and) deinterleave into 4 x 8i16
+ {4, MVT::v16i16, 75}, // (load 64i16 and) deinterleave into 4 x 16i16
+ {4, MVT::v32i16, 150}, // (load 128i16 and) deinterleave into 4 x 32i16
+
+ {4, MVT::v2i32, 4}, // (load 8i32 and) deinterleave into 4 x 2i32
+ {4, MVT::v4i32, 8}, // (load 16i32 and) deinterleave into 4 x 4i32
+ {4, MVT::v8i32, 16}, // (load 32i32 and) deinterleave into 4 x 8i32
+ {4, MVT::v16i32, 32}, // (load 64i32 and) deinterleave into 4 x 16i32
+ {4, MVT::v32i32, 68}, // (load 128i32 and) deinterleave into 4 x 32i32
+
+ {4, MVT::v2i64, 6}, // (load 8i64 and) deinterleave into 4 x 2i64
+ {4, MVT::v4i64, 8}, // (load 16i64 and) deinterleave into 4 x 4i64
+ {4, MVT::v8i64, 20}, // (load 32i64 and) deinterleave into 4 x 8i64
+ {4, MVT::v16i64, 40}, // (load 64i64 and) deinterleave into 4 x 16i64
+
+ {6, MVT::v2i8, 6}, // (load 12i8 and) deinterleave into 6 x 2i8
+ {6, MVT::v4i8, 14}, // (load 24i8 and) deinterleave into 6 x 4i8
+ {6, MVT::v8i8, 18}, // (load 48i8 and) deinterleave into 6 x 8i8
+ {6, MVT::v16i8, 43}, // (load 96i8 and) deinterleave into 6 x 16i8
+ {6, MVT::v32i8, 82}, // (load 192i8 and) deinterleave into 6 x 32i8
+
+ {6, MVT::v2i16, 13}, // (load 12i16 and) deinterleave into 6 x 2i16
+ {6, MVT::v4i16, 9}, // (load 24i16 and) deinterleave into 6 x 4i16
+ {6, MVT::v8i16, 39}, // (load 48i16 and) deinterleave into 6 x 8i16
+ {6, MVT::v16i16, 106}, // (load 96i16 and) deinterleave into 6 x 16i16
+ {6, MVT::v32i16, 212}, // (load 192i16 and) deinterleave into 6 x 32i16
+
+ {6, MVT::v2i32, 6}, // (load 12i32 and) deinterleave into 6 x 2i32
+ {6, MVT::v4i32, 15}, // (load 24i32 and) deinterleave into 6 x 4i32
+ {6, MVT::v8i32, 31}, // (load 48i32 and) deinterleave into 6 x 8i32
+ {6, MVT::v16i32, 64}, // (load 96i32 and) deinterleave into 6 x 16i32
+
+ {6, MVT::v2i64, 6}, // (load 12i64 and) deinterleave into 6 x 2i64
+ {6, MVT::v4i64, 18}, // (load 24i64 and) deinterleave into 6 x 4i64
+ {6, MVT::v8i64, 36}, // (load 48i64 and) deinterleave into 6 x 8i64
+
+ {8, MVT::v8i32, 40} // (load 64i32 and) deinterleave into 8 x 8i32
+ };
+
+ static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
+ {2, MVT::v4i16, 2}, // (load 8i16 and) deinterleave into 2 x 4i16
+ };
+
+ static const CostTblEntry SSE2InterleavedLoadTbl[] = {
+ {2, MVT::v2i16, 2}, // (load 4i16 and) deinterleave into 2 x 2i16
+ {2, MVT::v4i16, 7}, // (load 8i16 and) deinterleave into 2 x 4i16
+
+ {2, MVT::v2i32, 2}, // (load 4i32 and) deinterleave into 2 x 2i32
+ {2, MVT::v4i32, 2}, // (load 8i32 and) deinterleave into 2 x 4i32
+
+ {2, MVT::v2i64, 2}, // (load 4i64 and) deinterleave into 2 x 2i64
+ };
+
+ static const CostTblEntry AVX2InterleavedStoreTbl[] = {
+ {2, MVT::v16i8, 3}, // interleave 2 x 16i8 into 32i8 (and store)
+ {2, MVT::v32i8, 4}, // interleave 2 x 32i8 into 64i8 (and store)
+
+ {2, MVT::v8i16, 3}, // interleave 2 x 8i16 into 16i16 (and store)
+ {2, MVT::v16i16, 4}, // interleave 2 x 16i16 into 32i16 (and store)
+ {2, MVT::v32i16, 8}, // interleave 2 x 32i16 into 64i16 (and store)
+
+ {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32 (and store)
+ {2, MVT::v8i32, 4}, // interleave 2 x 8i32 into 16i32 (and store)
+ {2, MVT::v16i32, 8}, // interleave 2 x 16i32 into 32i32 (and store)
+ {2, MVT::v32i32, 16}, // interleave 2 x 32i32 into 64i32 (and store)
+
+ {2, MVT::v2i64, 2}, // interleave 2 x 2i64 into 4i64 (and store)
+ {2, MVT::v4i64, 4}, // interleave 2 x 4i64 into 8i64 (and store)
+ {2, MVT::v8i64, 8}, // interleave 2 x 8i64 into 16i64 (and store)
+ {2, MVT::v16i64, 16}, // interleave 2 x 16i64 into 32i64 (and store)
+ {2, MVT::v32i64, 32}, // interleave 2 x 32i64 into 64i64 (and store)
+
+ {3, MVT::v2i8, 4}, // interleave 3 x 2i8 into 6i8 (and store)
+ {3, MVT::v4i8, 4}, // interleave 3 x 4i8 into 12i8 (and store)
+ {3, MVT::v8i8, 6}, // interleave 3 x 8i8 into 24i8 (and store)
+ {3, MVT::v16i8, 11}, // interleave 3 x 16i8 into 48i8 (and store)
+ {3, MVT::v32i8, 13}, // interleave 3 x 32i8 into 96i8 (and store)
+
+ {3, MVT::v2i16, 4}, // interleave 3 x 2i16 into 6i16 (and store)
+ {3, MVT::v4i16, 6}, // interleave 3 x 4i16 into 12i16 (and store)
+ {3, MVT::v8i16, 12}, // interleave 3 x 8i16 into 24i16 (and store)
+ {3, MVT::v16i16, 27}, // interleave 3 x 16i16 into 48i16 (and store)
+ {3, MVT::v32i16, 54}, // interleave 3 x 32i16 into 96i16 (and store)
+
+ {3, MVT::v2i32, 4}, // interleave 3 x 2i32 into 6i32 (and store)
+ {3, MVT::v4i32, 5}, // interleave 3 x 4i32 into 12i32 (and store)
+ {3, MVT::v8i32, 11}, // interleave 3 x 8i32 into 24i32 (and store)
+ {3, MVT::v16i32, 22}, // interleave 3 x 16i32 into 48i32 (and store)
+ {3, MVT::v32i32, 48}, // interleave 3 x 32i32 into 96i32 (and store)
+
+ {3, MVT::v2i64, 4}, // interleave 3 x 2i64 into 6i64 (and store)
+ {3, MVT::v4i64, 6}, // interleave 3 x 4i64 into 12i64 (and store)
+ {3, MVT::v8i64, 12}, // interleave 3 x 8i64 into 24i64 (and store)
+ {3, MVT::v16i64, 24}, // interleave 3 x 16i64 into 48i64 (and store)
+
+ {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8 (and store)
+ {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8 (and store)
+ {4, MVT::v8i8, 4}, // interleave 4 x 8i8 into 32i8 (and store)
+ {4, MVT::v16i8, 8}, // interleave 4 x 16i8 into 64i8 (and store)
+ {4, MVT::v32i8, 12}, // interleave 4 x 32i8 into 128i8 (and store)
+
+ {4, MVT::v2i16, 2}, // interleave 4 x 2i16 into 8i16 (and store)
+ {4, MVT::v4i16, 6}, // interleave 4 x 4i16 into 16i16 (and store)
+ {4, MVT::v8i16, 10}, // interleave 4 x 8i16 into 32i16 (and store)
+ {4, MVT::v16i16, 32}, // interleave 4 x 16i16 into 64i16 (and store)
+ {4, MVT::v32i16, 64}, // interleave 4 x 32i16 into 128i16 (and store)
+
+ {4, MVT::v2i32, 5}, // interleave 4 x 2i32 into 8i32 (and store)
+ {4, MVT::v4i32, 6}, // interleave 4 x 4i32 into 16i32 (and store)
+ {4, MVT::v8i32, 16}, // interleave 4 x 8i32 into 32i32 (and store)
+ {4, MVT::v16i32, 32}, // interleave 4 x 16i32 into 64i32 (and store)
+ {4, MVT::v32i32, 64}, // interleave 4 x 32i32 into 128i32 (and store)
+
+ {4, MVT::v2i64, 6}, // interleave 4 x 2i64 into 8i64 (and store)
+ {4, MVT::v4i64, 8}, // interleave 4 x 4i64 into 16i64 (and store)
+ {4, MVT::v8i64, 20}, // interleave 4 x 8i64 into 32i64 (and store)
+ {4, MVT::v16i64, 40}, // interleave 4 x 16i64 into 64i64 (and store)
+
+ {6, MVT::v2i8, 7}, // interleave 6 x 2i8 into 12i8 (and store)
+ {6, MVT::v4i8, 9}, // interleave 6 x 4i8 into 24i8 (and store)
+ {6, MVT::v8i8, 16}, // interleave 6 x 8i8 into 48i8 (and store)
+ {6, MVT::v16i8, 27}, // interleave 6 x 16i8 into 96i8 (and store)
+ {6, MVT::v32i8, 90}, // interleave 6 x 32i8 into 192i8 (and store)
+
+ {6, MVT::v2i16, 10}, // interleave 6 x 2i16 into 12i16 (and store)
+ {6, MVT::v4i16, 15}, // interleave 6 x 4i16 into 24i16 (and store)
+ {6, MVT::v8i16, 21}, // interleave 6 x 8i16 into 48i16 (and store)
+ {6, MVT::v16i16, 58}, // interleave 6 x 16i16 into 96i16 (and store)
+ {6, MVT::v32i16, 90}, // interleave 6 x 32i16 into 192i16 (and store)
+
+ {6, MVT::v2i32, 9}, // interleave 6 x 2i32 into 12i32 (and store)
+ {6, MVT::v4i32, 12}, // interleave 6 x 4i32 into 24i32 (and store)
+ {6, MVT::v8i32, 33}, // interleave 6 x 8i32 into 48i32 (and store)
+ {6, MVT::v16i32, 66}, // interleave 6 x 16i32 into 96i32 (and store)
+
+ {6, MVT::v2i64, 8}, // interleave 6 x 2i64 into 12i64 (and store)
+ {6, MVT::v4i64, 15}, // interleave 6 x 4i64 into 24i64 (and store)
+ {6, MVT::v8i64, 30}, // interleave 6 x 8i64 into 48i64 (and store)
+ };
+
+ static const CostTblEntry SSE2InterleavedStoreTbl[] = {
+ {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8 (and store)
+ {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8 (and store)
+ {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8 (and store)
+
+ {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16 (and store)
+ {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16 (and store)
+
+ {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32 (and store)
+ };
+
+ if (Opcode == Instruction::Load) {
+ auto GetDiscountedCost = [Factor, NumMembers = Indices.size(),
+ MemOpCosts](const CostTblEntry *Entry) {
+ // NOTE: this is just an approximation!
+ // It can over/under -estimate the cost!
+ return MemOpCosts + divideCeil(NumMembers * Entry->Cost, Factor);
+ };
+
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2InterleavedLoadTbl, Factor,
+ ETy.getSimpleVT()))
+ return GetDiscountedCost(Entry);
+
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
+ ETy.getSimpleVT()))
+ return GetDiscountedCost(Entry);
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2InterleavedLoadTbl, Factor,
+ ETy.getSimpleVT()))
+ return GetDiscountedCost(Entry);
+ } else {
+ assert(Opcode == Instruction::Store &&
+ "Expected Store Instruction at this point");
+ assert((!Indices.size() || Indices.size() == Factor) &&
+ "Interleaved store only supports fully-interleaved groups.");
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2InterleavedStoreTbl, Factor,
+ ETy.getSimpleVT()))
+ return MemOpCosts + Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2InterleavedStoreTbl, Factor,
+ ETy.getSimpleVT()))
+ return MemOpCosts + Entry->Cost;
+ }
+
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 69ff6584316e..c53424ec0026 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -45,52 +45,54 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureCMPXCHG16B,
X86::FeatureLAHFSAHF,
- // Codegen control options.
- X86::FeatureFast11ByteNOP,
- X86::FeatureFast15ByteNOP,
- X86::FeatureFastBEXTR,
- X86::FeatureFastHorizontalOps,
- X86::FeatureFastLZCNT,
- X86::FeatureFastScalarFSQRT,
- X86::FeatureFastSHLDRotate,
- X86::FeatureFastScalarShiftMasks,
- X86::FeatureFastVectorShiftMasks,
- X86::FeatureFastVariableCrossLaneShuffle,
- X86::FeatureFastVariablePerLaneShuffle,
- X86::FeatureFastVectorFSQRT,
- X86::FeatureLEAForSP,
- X86::FeatureLEAUsesAG,
- X86::FeatureLZCNTFalseDeps,
- X86::FeatureBranchFusion,
- X86::FeatureMacroFusion,
- X86::FeaturePadShortFunctions,
- X86::FeaturePOPCNTFalseDeps,
+ // Some older targets can be setup to fold unaligned loads.
X86::FeatureSSEUnalignedMem,
- X86::FeatureSlow3OpsLEA,
- X86::FeatureSlowDivide32,
- X86::FeatureSlowDivide64,
- X86::FeatureSlowIncDec,
- X86::FeatureSlowLEA,
- X86::FeatureSlowPMADDWD,
- X86::FeatureSlowPMULLD,
- X86::FeatureSlowSHLD,
- X86::FeatureSlowTwoMemOps,
- X86::FeatureSlowUAMem16,
- X86::FeaturePreferMaskRegisters,
- X86::FeatureInsertVZEROUPPER,
- X86::FeatureUseGLMDivSqrtCosts,
+
+ // Codegen control options.
+ X86::TuningFast11ByteNOP,
+ X86::TuningFast15ByteNOP,
+ X86::TuningFastBEXTR,
+ X86::TuningFastHorizontalOps,
+ X86::TuningFastLZCNT,
+ X86::TuningFastScalarFSQRT,
+ X86::TuningFastSHLDRotate,
+ X86::TuningFastScalarShiftMasks,
+ X86::TuningFastVectorShiftMasks,
+ X86::TuningFastVariableCrossLaneShuffle,
+ X86::TuningFastVariablePerLaneShuffle,
+ X86::TuningFastVectorFSQRT,
+ X86::TuningLEAForSP,
+ X86::TuningLEAUsesAG,
+ X86::TuningLZCNTFalseDeps,
+ X86::TuningBranchFusion,
+ X86::TuningMacroFusion,
+ X86::TuningPadShortFunctions,
+ X86::TuningPOPCNTFalseDeps,
+ X86::TuningSlow3OpsLEA,
+ X86::TuningSlowDivide32,
+ X86::TuningSlowDivide64,
+ X86::TuningSlowIncDec,
+ X86::TuningSlowLEA,
+ X86::TuningSlowPMADDWD,
+ X86::TuningSlowPMULLD,
+ X86::TuningSlowSHLD,
+ X86::TuningSlowTwoMemOps,
+ X86::TuningSlowUAMem16,
+ X86::TuningPreferMaskRegisters,
+ X86::TuningInsertVZEROUPPER,
+ X86::TuningUseSLMArithCosts,
+ X86::TuningUseGLMDivSqrtCosts,
// Perf-tuning flags.
- X86::FeatureHasFastGather,
- X86::FeatureSlowUAMem32,
+ X86::TuningFastGather,
+ X86::TuningSlowUAMem32,
// Based on whether user set the -mprefer-vector-width command line.
- X86::FeaturePrefer128Bit,
- X86::FeaturePrefer256Bit,
+ X86::TuningPrefer128Bit,
+ X86::TuningPrefer256Bit,
// CPU name enums. These just follow CPU string.
- X86::ProcIntelAtom,
- X86::ProcIntelSLM,
+ X86::ProcIntelAtom
};
public:
@@ -120,8 +122,7 @@ public:
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -144,14 +145,17 @@ public:
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract);
+ InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
@@ -180,9 +184,9 @@ public:
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
- InstructionCost getArithmeticReductionCost(
- unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind);
InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
@@ -192,19 +196,13 @@ public:
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getInterleavedMemoryOpCostAVX512(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
- InstructionCost getInterleavedMemoryOpCostAVX2(
- unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ TTI::TargetCostKind CostKind, bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
InstructionCost getIntImmCost(int64_t);
@@ -241,9 +239,12 @@ public:
SmallPtrSetImpl<Argument *> &Args) const;
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
+ bool prefersVectorizedAddressing() const;
+ bool supportsEfficientVectorElementLoadStore() const;
bool enableInterleavedAccessVectorization();
private:
+ bool supportsGather() const;
InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
bool VariableMask, Align Alignment,
unsigned AddressSpace);
diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86VZeroUpper.cpp
index c3031b698552..59b5dc111ce3 100644
--- a/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -271,10 +271,8 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
<< getBlockExitStateName(CurState) << '\n');
if (CurState == EXITS_DIRTY)
- for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end();
- SI != SE; ++SI)
- addDirtySuccessor(**SI);
+ for (MachineBasicBlock *Succ : MBB.successors())
+ addDirtySuccessor(*Succ);
BlockStates[MBB.getNumber()].ExitState = CurState;
}
diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp
index 8d8bd5e6b326..02186949960d 100644
--- a/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -458,7 +458,7 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
void WinEHStatePass::rewriteSetJmpCall(IRBuilder<> &Builder, Function &F,
CallBase &Call, Value *State) {
// Don't rewrite calls with a weird number of arguments.
- if (Call.getNumArgOperands() != 2)
+ if (Call.arg_size() != 2)
return;
SmallVector<OperandBundleDef, 1> OpBundles;
diff --git a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 0505686e645b..f6b97e9e84b3 100644
--- a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index b44984ff6b4c..c286b747a271 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -20,10 +20,10 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index d5f66c2bd824..8916c6ca7be7 100644
--- a/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/XCoreTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheXCoreTarget() {
diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 4ea775305e12..38b613700674 100644
--- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -38,8 +38,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <algorithm>
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 51fdfe54db18..7c86262269fc 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1315,7 +1315,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1));
}
} else {
- // sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
@@ -1643,7 +1643,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
// fold (ladd 0, 0, x) -> 0, x & 1
- if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ if (N0C && N0C->isZero() && N1C && N1C->isZero()) {
SDValue Carry = DAG.getConstant(0, dl, VT);
SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
DAG.getConstant(1, dl, VT));
@@ -1653,7 +1653,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
+ if (N1C && N1C->isZero() && N->hasNUsesOfValue(0, 1)) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
KnownBits Known = DAG.computeKnownBits(N2);
@@ -1675,7 +1675,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = N0.getValueType();
// fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
- if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ if (N0C && N0C->isZero() && N1C && N1C->isZero()) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
KnownBits Known = DAG.computeKnownBits(N2);
@@ -1690,7 +1690,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
+ if (N1C && N1C->isZero() && N->hasNUsesOfValue(0, 1)) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
KnownBits Known = DAG.computeKnownBits(N2);
@@ -1719,7 +1719,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
N1, N0, N2, N3);
// lmul(x, 0, a, b)
- if (N1C && N1C->isNullValue()) {
+ if (N1C && N1C->isZero()) {
// If the high result is unused fold to add(a, b)
if (N->hasNUsesOfValue(0, 0)) {
SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index 1b21e1ce195b..1b53d593c130 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -21,9 +21,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 6528154ab0e2..b5a683de33ab 100644
--- a/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
-#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -90,11 +89,11 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
if (PredBB->getTerminator()->getNumSuccessors() > 1)
PredBB = SplitEdge(PredBB, PN->getParent());
Instruction *InsertPos = PredBB->getTerminator();
- Instruction *NewInst = createReplacementInstr(CE, InsertPos);
+ Instruction *NewInst = CE->getAsInstruction(InsertPos);
PN->setOperand(I, NewInst);
}
} else if (Instruction *Instr = dyn_cast<Instruction>(WU)) {
- Instruction *NewInst = createReplacementInstr(CE, Instr);
+ Instruction *NewInst = CE->getAsInstruction(Instr);
Instr->replaceUsesOfWith(CE, NewInst);
} else {
ConstantExpr *CExpr = dyn_cast<ConstantExpr>(WU);
@@ -103,7 +102,7 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
}
}
} while (CE->hasNUsesOrMore(1)); // We need to check because a recursive
- // sibling may have used 'CE' when createReplacementInstr was called.
+ // sibling may have used 'CE' when getAsInstruction was called.
CE->destroyConstant();
return true;
}
diff --git a/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index 4b29751c7d06..1be707cb488c 100644
--- a/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -12,7 +12,7 @@
#include "XCoreSubtarget.h"
#include "XCore.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 046cd6b5db7d..2e49627a19bf 100644
--- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -20,8 +20,8 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -99,7 +99,7 @@ bool XCorePassConfig::addInstSelector() {
}
void XCorePassConfig::addPreEmitPass() {
- addPass(createXCoreFrameToArgsOffsetEliminationPass(), false);
+ addPass(createXCoreFrameToArgsOffsetEliminationPass());
}
// Force static initialization.
diff --git a/llvm/lib/TextAPI/TextStub.cpp b/llvm/lib/TextAPI/TextStub.cpp
index 5d85342adb26..b64f19ab65cc 100644
--- a/llvm/lib/TextAPI/TextStub.cpp
+++ b/llvm/lib/TextAPI/TextStub.cpp
@@ -1121,9 +1121,9 @@ TextAPIReader::get(MemoryBufferRef InputBuffer) {
auto File = std::unique_ptr<InterfaceFile>(
const_cast<InterfaceFile *>(Files.front()));
- for (auto Iter = std::next(Files.begin()); Iter != Files.end(); ++Iter)
+ for (const InterfaceFile *FI : llvm::drop_begin(Files))
File->addDocument(
- std::shared_ptr<InterfaceFile>(const_cast<InterfaceFile *>(*Iter)));
+ std::shared_ptr<InterfaceFile>(const_cast<InterfaceFile *>(FI)));
if (YAMLIn.error())
return make_error<StringError>(Ctx.ErrorMessage, YAMLIn.error());
diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index 961577f126ba..de1634ebed3c 100644
--- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -12,6 +12,7 @@
#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/COFFModuleDefinition.h"
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 85abbf6d86e0..7243e39c9029 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -18,6 +18,7 @@
#include "llvm-c/Transforms/AggressiveInstCombine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -205,8 +206,8 @@ struct MaskOps {
bool FoundAnd1;
MaskOps(unsigned BitWidth, bool MatchAnds)
- : Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
- MatchAndChain(MatchAnds), FoundAnd1(false) {}
+ : Root(nullptr), Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds),
+ FoundAnd1(false) {}
};
/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
@@ -377,10 +378,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
+ for (Instruction &I : llvm::reverse(BB)) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
- MadeChange |= tryToRecognizePopCount(I);
+ MadeChange |= tryToRecognizePopCount(I);
}
}
@@ -394,10 +395,11 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
-static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
+static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI,
+ DominatorTree &DT) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
- TruncInstCombine TIC(TLI, DL, DT);
+ TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
MadeChange |= foldUnusualPatterns(F, DT);
return MadeChange;
@@ -406,6 +408,7 @@ static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
@@ -415,16 +418,18 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
}
bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return runImpl(F, TLI, DT);
+ return runImpl(F, AC, TLI, DT);
}
PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
FunctionAnalysisManager &AM) {
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!runImpl(F, TLI, DT)) {
+ if (!runImpl(F, AC, TLI, DT)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
@@ -438,6 +443,7 @@ char AggressiveInstCombinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
"aggressive-instcombine",
"Combine pattern based expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index 42bcadfc7dcd..5d69e26d6ecc 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -17,6 +17,8 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -39,16 +41,18 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
namespace llvm {
- class DataLayout;
- class DominatorTree;
- class Function;
- class Instruction;
- class TargetLibraryInfo;
- class TruncInst;
- class Type;
- class Value;
+class AssumptionCache;
+class DataLayout;
+class DominatorTree;
+class Function;
+class Instruction;
+class TargetLibraryInfo;
+class TruncInst;
+class Type;
+class Value;
class TruncInstCombine {
+ AssumptionCache &AC;
TargetLibraryInfo &TLI;
const DataLayout &DL;
const DominatorTree &DT;
@@ -75,9 +79,9 @@ class TruncInstCombine {
MapVector<Instruction *, Info> InstInfoMap;
public:
- TruncInstCombine(TargetLibraryInfo &TLI, const DataLayout &DL,
- const DominatorTree &DT)
- : TLI(TLI), DL(DL), DT(DT), CurrentTruncInst(nullptr) {}
+ TruncInstCombine(AssumptionCache &AC, TargetLibraryInfo &TLI,
+ const DataLayout &DL, const DominatorTree &DT)
+ : AC(AC), TLI(TLI), DL(DL), DT(DT), CurrentTruncInst(nullptr) {}
/// Perform TruncInst pattern optimization on given function.
bool run(Function &F);
@@ -104,6 +108,18 @@ private:
/// to be reduced.
Type *getBestTruncatedType();
+ KnownBits computeKnownBits(const Value *V) const {
+ return llvm::computeKnownBits(V, DL, /*Depth=*/0, &AC,
+ /*CtxI=*/cast<Instruction>(CurrentTruncInst),
+ &DT);
+ }
+
+ unsigned ComputeNumSignBits(const Value *V) const {
+ return llvm::ComputeNumSignBits(
+ V, DL, /*Depth=*/0, &AC, /*CtxI=*/cast<Instruction>(CurrentTruncInst),
+ &DT);
+ }
+
/// Given a \p V value and a \p SclTy scalar type return the generated reduced
/// value of \p V based on the type \p SclTy.
///
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 16b82219e8ca..abac3f801a22 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -61,9 +62,18 @@ static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::InsertElement:
Ops.push_back(I->getOperand(0));
Ops.push_back(I->getOperand(1));
break;
+ case Instruction::ExtractElement:
+ Ops.push_back(I->getOperand(0));
+ break;
case Instruction::Select:
Ops.push_back(I->getOperand(1));
Ops.push_back(I->getOperand(2));
@@ -127,6 +137,13 @@ bool TruncInstCombine::buildTruncExpressionDag() {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::InsertElement:
+ case Instruction::ExtractElement:
case Instruction::Select: {
SmallVector<Value *, 2> Operands;
getRelevantOperands(I, Operands);
@@ -135,10 +152,9 @@ bool TruncInstCombine::buildTruncExpressionDag() {
}
default:
// TODO: Can handle more cases here:
- // 1. shufflevector, extractelement, insertelement
- // 2. udiv, urem
- // 3. shl, lshr, ashr
- // 4. phi node(and loop handling)
+ // 1. shufflevector
+ // 2. sdiv, srem
+ // 3. phi node(and loop handling)
// ...
return false;
}
@@ -270,6 +286,50 @@ Type *TruncInstCombine::getBestTruncatedType() {
unsigned OrigBitWidth =
CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
+ // Initialize MinBitWidth for shift instructions with the minimum number
+ // that is greater than shift amount (i.e. shift amount + 1).
+ // For `lshr` adjust MinBitWidth so that all potentially truncated
+ // bits of the value-to-be-shifted are zeros.
+ // For `ashr` adjust MinBitWidth so that all potentially truncated
+ // bits of the value-to-be-shifted are sign bits (all zeros or ones)
+ // and even one (first) untruncated bit is sign bit.
+ // Exit early if MinBitWidth is not less than original bitwidth.
+ for (auto &Itr : InstInfoMap) {
+ Instruction *I = Itr.first;
+ if (I->isShift()) {
+ KnownBits KnownRHS = computeKnownBits(I->getOperand(1));
+ unsigned MinBitWidth = KnownRHS.getMaxValue()
+ .uadd_sat(APInt(OrigBitWidth, 1))
+ .getLimitedValue(OrigBitWidth);
+ if (MinBitWidth == OrigBitWidth)
+ return nullptr;
+ if (I->getOpcode() == Instruction::LShr) {
+ KnownBits KnownLHS = computeKnownBits(I->getOperand(0));
+ MinBitWidth =
+ std::max(MinBitWidth, KnownLHS.getMaxValue().getActiveBits());
+ }
+ if (I->getOpcode() == Instruction::AShr) {
+ unsigned NumSignBits = ComputeNumSignBits(I->getOperand(0));
+ MinBitWidth = std::max(MinBitWidth, OrigBitWidth - NumSignBits + 1);
+ }
+ if (MinBitWidth >= OrigBitWidth)
+ return nullptr;
+ Itr.second.MinBitWidth = MinBitWidth;
+ }
+ if (I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::URem) {
+ unsigned MinBitWidth = 0;
+ for (const auto &Op : I->operands()) {
+ KnownBits Known = computeKnownBits(Op);
+ MinBitWidth =
+ std::max(Known.getMaxValue().getActiveBits(), MinBitWidth);
+ if (MinBitWidth >= OrigBitWidth)
+ return nullptr;
+ }
+ Itr.second.MinBitWidth = MinBitWidth;
+ }
+ }
+
// Calculate minimum allowed bit-width allowed for shrinking the currently
// visited truncate's operand.
unsigned MinBitWidth = getMinBitWidth();
@@ -356,10 +416,32 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor: {
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem: {
Value *LHS = getReducedOperand(I->getOperand(0), SclTy);
Value *RHS = getReducedOperand(I->getOperand(1), SclTy);
Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS);
+ // Preserve `exact` flag since truncation doesn't change exactness
+ if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
+ if (auto *ResI = dyn_cast<Instruction>(Res))
+ ResI->setIsExact(PEO->isExact());
+ break;
+ }
+ case Instruction::ExtractElement: {
+ Value *Vec = getReducedOperand(I->getOperand(0), SclTy);
+ Value *Idx = I->getOperand(1);
+ Res = Builder.CreateExtractElement(Vec, Idx);
+ break;
+ }
+ case Instruction::InsertElement: {
+ Value *Vec = getReducedOperand(I->getOperand(0), SclTy);
+ Value *NewElt = getReducedOperand(I->getOperand(1), SclTy);
+ Value *Idx = I->getOperand(2);
+ Res = Builder.CreateInsertElement(Vec, NewElt, Idx);
break;
}
case Instruction::Select: {
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 5b09cdb35791..67f8828e4c75 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -56,8 +56,10 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
bool Changed = false;
- for (auto IB = inst_begin(F), E = inst_end(F); IB != E;) {
- Instruction &I = *IB++;
+ bool IsPrivateAndUnprocessed =
+ F.hasFnAttribute(CORO_PRESPLIT_ATTR) && F.hasLocalLinkage();
+
+ for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
default:
@@ -71,6 +73,10 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
case Intrinsic::coro_alloc:
II->replaceAllUsesWith(ConstantInt::getTrue(Context));
break;
+ case Intrinsic::coro_async_resume:
+ II->replaceAllUsesWith(
+ ConstantPointerNull::get(cast<PointerType>(I.getType())));
+ break;
case Intrinsic::coro_id:
case Intrinsic::coro_id_retcon:
case Intrinsic::coro_id_retcon_once:
@@ -80,6 +86,13 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
case Intrinsic::coro_subfn_addr:
lowerSubFn(Builder, cast<CoroSubFnInst>(II));
break;
+ case Intrinsic::coro_end:
+ case Intrinsic::coro_suspend_retcon:
+ if (IsPrivateAndUnprocessed) {
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ } else
+ continue;
+ break;
case Intrinsic::coro_async_size_replace:
auto *Target = cast<ConstantStruct>(
cast<GlobalVariable>(II->getArgOperand(0)->stripPointerCasts())
@@ -115,7 +128,8 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) {
return coro::declaresIntrinsics(
M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr",
"llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace"});
+ "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace",
+ "llvm.coro.async.resume"});
}
PreservedAnalyses CoroCleanupPass::run(Function &F,
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 5e5e513cdfda..68a34bdcb1cd 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -150,8 +150,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
CoroIdInst *CoroId = nullptr;
SmallVector<CoroFreeInst *, 4> CoroFrees;
bool HasCoroSuspend = false;
- for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) {
- Instruction &I = *IB++;
+ for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
switch (CB->getIntrinsicID()) {
default:
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index beae5fdac8ab..ac3d078714ce 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -16,6 +16,7 @@
#include "CoroInternal.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/StackLifetime.h"
@@ -435,7 +436,7 @@ private:
DenseMap<Value*, unsigned> FieldIndexByKey;
public:
- FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
+ FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
Optional<Align> MaxFrameAlignment)
: DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
@@ -576,13 +577,8 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
using AllocaSetType = SmallVector<AllocaInst *, 4>;
SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
- // We need to add field for allocas at the end of this function. However, this
- // function has multiple exits, so we use this helper to avoid redundant code.
- struct RTTIHelper {
- std::function<void()> func;
- RTTIHelper(std::function<void()> &&func) : func(func) {}
- ~RTTIHelper() { func(); }
- } Helper([&]() {
+ // We need to add field for allocas at the end of this function.
+ auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
for (auto AllocaList : NonOverlapedAllocas) {
auto *LargestAI = *AllocaList.begin();
FieldIDType Id = addFieldForAlloca(LargestAI);
@@ -840,8 +836,9 @@ static StringRef solveTypeName(Type *Ty) {
return "UnknownType";
}
-static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout,
- DIScope *Scope, unsigned LineNum,
+static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
+ const DataLayout &Layout, DIScope *Scope,
+ unsigned LineNum,
DenseMap<Type *, DIType *> &DITypeCache) {
if (DIType *DT = DITypeCache.lookup(Ty))
return DT;
@@ -1348,13 +1345,17 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
}
void visitIntrinsicInst(IntrinsicInst &II) {
- if (II.getIntrinsicID() != Intrinsic::lifetime_start)
+ // When we found the lifetime markers refers to a
+ // subrange of the original alloca, ignore the lifetime
+ // markers to avoid misleading the analysis.
+ if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
+ !Offset.isZero())
return Base::visitIntrinsicInst(II);
LifetimeStarts.insert(&II);
}
void visitCallBase(CallBase &CB) {
- for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
+ for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op)
if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
PI.setEscaped(&CB);
handleMayWrite(CB);
@@ -1868,8 +1869,7 @@ static void cleanupSinglePredPHIs(Function &F) {
}
}
while (!Worklist.empty()) {
- auto *Phi = Worklist.back();
- Worklist.pop_back();
+ auto *Phi = Worklist.pop_back_val();
auto *OriginalValue = Phi->getIncomingValue(0);
Phi->replaceAllUsesWith(OriginalValue);
}
@@ -1984,14 +1984,15 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
if (CurrentBlock != U->getParent()) {
bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);
- CurrentBlock = IsInCoroSuspendBlock
- ? U->getParent()->getSinglePredecessor()
- : U->getParent();
+ CurrentBlock = U->getParent();
+ auto *InsertBlock = IsInCoroSuspendBlock
+ ? CurrentBlock->getSinglePredecessor()
+ : CurrentBlock;
CurrentMaterialization = cast<Instruction>(Def)->clone();
CurrentMaterialization->setName(Def->getName());
CurrentMaterialization->insertBefore(
- IsInCoroSuspendBlock ? CurrentBlock->getTerminator()
- : &*CurrentBlock->getFirstInsertionPt());
+ IsInCoroSuspendBlock ? InsertBlock->getTerminator()
+ : &*InsertBlock->getFirstInsertionPt());
}
if (auto *PN = dyn_cast<PHINode>(U)) {
assert(PN->getNumIncomingValues() == 1 &&
@@ -2244,12 +2245,7 @@ static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
/// intrinsics and attempting to MemToReg the alloca away.
static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
coro::Shape &Shape) {
- for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
- // We're likely changing the use list, so use a mutation-safe
- // iteration pattern.
- auto &Use = *UI;
- ++UI;
-
+ for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
// swifterror values can only be used in very specific ways.
// We take advantage of that here.
auto User = Use.getUser();
@@ -2510,11 +2506,11 @@ void coro::salvageDebugInfo(
DIExpression *Expr = DVI->getExpression();
// Follow the pointer arithmetic all the way to the incoming
// function argument and convert into a DIExpression.
- bool OutermostLoad = true;
+ bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
Value *Storage = DVI->getVariableLocationOp(0);
Value *OriginalStorage = Storage;
- while (Storage) {
- if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {
+ while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
+ if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
Storage = LdInst->getOperand(0);
// FIXME: This is a heuristic that works around the fact that
// LLVM IR debug intrinsics cannot yet distinguish between
@@ -2522,26 +2518,25 @@ void coro::salvageDebugInfo(
// implicitly a memory location no DW_OP_deref operation for the
// last direct load from an alloca is necessary. This condition
// effectively drops the *last* DW_OP_deref in the expression.
- if (!OutermostLoad)
+ if (!SkipOutermostLoad)
Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
- OutermostLoad = false;
- } else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {
+ } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
Storage = StInst->getOperand(0);
- } else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {
- SmallVector<Value *> AdditionalValues;
- DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl(
- *GEPInst, Expr,
- /*WithStackValue=*/false, 0, AdditionalValues);
- // Debug declares cannot currently handle additional location
- // operands.
- if (!SalvagedExpr || !AdditionalValues.empty())
+ } else {
+ SmallVector<uint64_t, 16> Ops;
+ SmallVector<Value *, 0> AdditionalValues;
+ Value *Op = llvm::salvageDebugInfoImpl(
+ *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
+ AdditionalValues);
+ if (!Op || !AdditionalValues.empty()) {
+ // If salvaging failed or salvaging produced more than one location
+ // operand, give up.
break;
- Expr = SalvagedExpr;
- Storage = GEPInst->getOperand(0);
- } else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))
- Storage = BCInst->getOperand(0);
- else
- break;
+ }
+ Storage = Op;
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
+ }
+ SkipOutermostLoad = false;
}
if (!Storage)
return;
diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 5ed800d67fe9..bf3d781ba43e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -638,7 +638,7 @@ public:
void checkWellFormed() const;
Function *getMustTailCallFunction() const {
- if (getNumArgOperands() < 3)
+ if (arg_size() < 3)
return nullptr;
return cast<Function>(
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index b6932dbbfc3f..fa1d92f439b8 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -520,8 +520,8 @@ void CoroCloner::replaceRetconOrAsyncSuspendUses() {
}
// Try to peephole extracts of an aggregate return.
- for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
- auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(NewS->uses())) {
+ auto *EVI = dyn_cast<ExtractValueInst>(U.getUser());
if (!EVI || EVI->getNumIndices() != 1)
continue;
@@ -622,12 +622,12 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
// If there are no arguments, this is a 'get' operation.
Value *MappedResult;
- if (Op->getNumArgOperands() == 0) {
+ if (Op->arg_empty()) {
auto ValueTy = Op->getType();
auto Slot = getSwiftErrorSlot(ValueTy);
MappedResult = Builder.CreateLoad(ValueTy, Slot);
} else {
- assert(Op->getNumArgOperands() == 1);
+ assert(Op->arg_size() == 1);
auto Value = MappedOp->getArgOperand(0);
auto ValueTy = Value->getType();
auto Slot = getSwiftErrorSlot(ValueTy);
@@ -669,7 +669,7 @@ void CoroCloner::salvageDebugInfo() {
for (DbgVariableIntrinsic *DVI : Worklist) {
if (IsUnreachableBlock(DVI->getParent()))
DVI->eraseFromParent();
- else if (dyn_cast_or_null<AllocaInst>(DVI->getVariableLocationOp(0))) {
+ else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
// Count all non-debuginfo uses in reachable blocks.
unsigned Uses = 0;
for (auto *User : DVI->getVariableLocationOp(0)->users())
@@ -738,8 +738,7 @@ void CoroCloner::replaceEntryBlock() {
// entry needs to be moved to the new entry.
Function *F = OldEntry->getParent();
DominatorTree DT{*F};
- for (auto IT = inst_begin(F), End = inst_end(F); IT != End;) {
- Instruction &I = *IT++;
+ for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
auto *Alloca = dyn_cast<AllocaInst>(&I);
if (!Alloca || I.use_empty())
continue;
@@ -773,9 +772,8 @@ Value *CoroCloner::deriveNewFramePointer() {
auto DbgLoc =
cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
// Calling i8* (i8*)
- auto *CallerContext = Builder.CreateCall(
- cast<FunctionType>(ProjectionFunc->getType()->getPointerElementType()),
- ProjectionFunc, CalleeContext);
+ auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(),
+ ProjectionFunc, CalleeContext);
CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
CallerContext->setDebugLoc(DbgLoc);
// The frame is located after the async_context header.
@@ -906,8 +904,7 @@ void CoroCloner::create() {
case coro::ABI::Switch:
// Bootstrap attributes by copying function attributes from the
// original function. This should include optimization settings and so on.
- NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
- OrigAttrs.getFnAttributes());
+ NewAttrs = NewAttrs.addFnAttributes(Context, OrigAttrs.getFnAttrs());
addFramePointerAttrs(NewAttrs, Context, 0,
Shape.FrameSize, Shape.FrameAlign);
@@ -929,9 +926,8 @@ void CoroCloner::create() {
}
// Transfer the original function's attributes.
- auto FnAttrs = OrigF.getAttributes().getFnAttributes();
- NewAttrs =
- NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, FnAttrs);
+ auto FnAttrs = OrigF.getAttributes().getFnAttrs();
+ NewAttrs = NewAttrs.addFnAttributes(Context, FnAttrs);
break;
}
case coro::ABI::Retcon:
@@ -1144,11 +1140,13 @@ static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
static void postSplitCleanup(Function &F) {
removeUnreachableBlocks(F);
+#ifndef NDEBUG
// For now, we do a mandatory verification step because we don't
// entirely trust this pass. Note that we don't want to add a verifier
// pass to FPM below because it will also verify all the global data.
if (verifyFunction(F, &errs()))
report_fatal_error("Broken function");
+#endif
}
// Assuming we arrived at the block NewBlock from Prev instruction, store
@@ -1262,7 +1260,7 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
Attribute::SwiftSelf, Attribute::SwiftError};
AttributeList Attrs = CI.getAttributes();
for (auto AK : ABIAttrs)
- if (Attrs.hasParamAttribute(0, AK))
+ if (Attrs.hasParamAttr(0, AK))
return false;
return true;
@@ -1357,7 +1355,7 @@ static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
auto *BB = Worklist.pop_back_val();
Set.insert(BB);
for (auto *Pred : predecessors(BB))
- if (Set.count(Pred) == 0)
+ if (!Set.contains(Pred))
Worklist.push_back(Pred);
}
@@ -1547,8 +1545,7 @@ static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
ArrayRef<Value *> Arguments,
IRBuilder<> &Builder) {
- auto *FnTy =
- cast<FunctionType>(MustTailCallFn->getType()->getPointerElementType());
+ auto *FnTy = MustTailCallFn->getFunctionType();
// Coerce the arguments, llvm optimizations seem to ignore the types in
// vaarg functions and throws away casts in optimized mode.
SmallVector<Value *, 8> CallArgs;
@@ -1568,8 +1565,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
// Reset various things that the optimizer might have decided it
// "knows" about the coroutine function due to not seeing a return.
F.removeFnAttr(Attribute::NoReturn);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F.removeRetAttr(Attribute::NoAlias);
+ F.removeRetAttr(Attribute::NonNull);
auto &Context = F.getContext();
auto *Int8PtrTy = Type::getInt8PtrTy(Context);
@@ -1667,8 +1664,8 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
// Reset various things that the optimizer might have decided it
// "knows" about the coroutine function due to not seeing a return.
F.removeFnAttr(Attribute::NoReturn);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F.removeRetAttr(Attribute::NoAlias);
+ F.removeRetAttr(Attribute::NonNull);
// Allocate the frame.
auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
@@ -1977,9 +1974,9 @@ static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
// %2 = bitcast %1 to [[TYPE]]
// ==>
// %2 = @some_function
- for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); UI != UE;) {
+ for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
// Look for bitcasts back to the original function type.
- auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
+ auto *Cast = dyn_cast<BitCastInst>(U.getUser());
if (!Cast || Cast->getType() != Fn->getType())
continue;
@@ -2019,10 +2016,9 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
// %2 = bitcast %1 to [[TYPE]]
// ==>
// %2 = @some_function
- for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
- UI != UE; ) {
+ for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
// Look for bitcasts back to the original function type.
- auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
+ auto *Cast = dyn_cast<BitCastInst>(U.getUser());
if (!Cast || Cast->getType() != Fn->getType()) continue;
// Check whether the replacement will introduce new direct calls.
@@ -2059,9 +2055,9 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
LazyCallGraph::SCC &C) {
bool Changed = false;
- for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); PI != PE;) {
+ for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
// Intrinsics can only be used in calls.
- auto *Prepare = cast<CallInst>((PI++)->getUser());
+ auto *Prepare = cast<CallInst>(P.getUser());
replacePrepare(Prepare, CG, C);
Changed = true;
}
@@ -2077,10 +2073,9 @@ static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
/// switch coroutines, which are lowered in multiple stages).
static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
bool Changed = false;
- for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
- PI != PE; ) {
+ for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
// Intrinsics can only be used in calls.
- auto *Prepare = cast<CallInst>((PI++)->getUser());
+ auto *Prepare = cast<CallInst>(P.getUser());
replacePrepare(Prepare, CG);
Changed = true;
}
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index ae2d9e192c87..e4883ef89db7 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -126,6 +126,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
"llvm.coro.alloc",
"llvm.coro.async.context.alloc",
"llvm.coro.async.context.dealloc",
+ "llvm.coro.async.resume",
"llvm.coro.async.size.replace",
"llvm.coro.async.store_resume",
"llvm.coro.begin",
@@ -311,10 +312,9 @@ void coro::Shape::buildFrom(Function &F) {
if (CoroBegin)
report_fatal_error(
"coroutine should have exactly one defining @llvm.coro.begin");
- CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- CB->removeAttribute(AttributeList::FunctionIndex,
- Attribute::NoDuplicate);
+ CB->addRetAttr(Attribute::NonNull);
+ CB->addRetAttr(Attribute::NoAlias);
+ CB->removeFnAttr(Attribute::NoDuplicate);
CoroBegin = CB;
break;
}
@@ -571,8 +571,8 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr,
llvm_unreachable("Unknown coro::ABI enum");
}
-LLVM_ATTRIBUTE_NORETURN
-static void fail(const Instruction *I, const char *Reason, Value *V) {
+[[noreturn]] static void fail(const Instruction *I, const char *Reason,
+ Value *V) {
#ifndef NDEBUG
I->dump();
if (V) {
@@ -722,7 +722,7 @@ void CoroAsyncEndInst::checkWellFormed() const {
return;
auto *FnTy =
cast<FunctionType>(MustTailCallFunc->getType()->getPointerElementType());
- if (FnTy->getNumParams() != (getNumArgOperands() - 3))
+ if (FnTy->getNumParams() != (arg_size() - 3))
fail(this,
"llvm.coro.end.async must tail call function argument type must "
"match the tail arguments",
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 532599b42e0d..01e724e22dcf 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -73,8 +73,8 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
},
ORE);
assert(OIC);
- emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller,
- *OIC, false, DEBUG_TYPE);
+ emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F,
+ *Caller, *OIC, false, DEBUG_TYPE);
InlineFunctionInfo IFI(
/*cg=*/nullptr, GetAssumptionCache, &PSI,
@@ -108,8 +108,10 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// Delete the non-comdat ones from the module and also from our vector.
auto NonComdatBegin = partition(
InlinedFunctions, [&](Function *F) { return F->hasComdat(); });
- for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end()))
+ for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end())) {
M.getFunctionList().erase(F);
+ Changed = true;
+ }
InlinedFunctions.erase(NonComdatBegin, InlinedFunctions.end());
if (!InlinedFunctions.empty()) {
@@ -117,8 +119,10 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// are not actually dead.
filterDeadComdatFunctions(M, InlinedFunctions);
// The remaining functions are actually dead.
- for (Function *F : InlinedFunctions)
+ for (Function *F : InlinedFunctions) {
M.getFunctionList().erase(F);
+ Changed = true;
+ }
}
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f670a101767e..93bb11433775 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -148,7 +148,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
} else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
@@ -177,9 +177,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Since loads will only have a single operand, and GEPs only a single
// non-index operand, this will record direct loads without any indices,
// and gep+loads with the GEP indices.
- for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
- II != IE; ++II)
- Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ for (const Use &I : llvm::drop_begin(UI->operands()))
+ Indices.push_back(cast<ConstantInt>(I)->getSExtValue());
// GEPs with a single 0 index can be merged with direct loads
if (Indices.size() == 1 && Indices.front() == 0)
Indices.clear();
@@ -231,8 +230,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Recompute the parameter attributes list based on the new arguments for
// the function.
- NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
- PAL.getRetAttributes(), ArgAttrVec));
+ NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
+ PAL.getRetAttrs(), ArgAttrVec));
ArgAttrVec.clear();
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
@@ -257,7 +256,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
++I, ++AI, ++ArgNo)
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
- ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
} else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = I->getParamByValType();
@@ -313,9 +312,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
newLoad->setAlignment(OrigLoad->getAlign());
// Transfer the AA info too.
- AAMDNodes AAInfo;
- OrigLoad->getAAMetadata(AAInfo);
- newLoad->setAAMetadata(AAInfo);
+ newLoad->setAAMetadata(OrigLoad->getAAMetadata());
Args.push_back(newLoad);
ArgAttrVec.push_back(AttributeSet());
@@ -325,7 +322,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Push any varargs arguments on the list.
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
Args.push_back(*AI);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -341,9 +338,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
NewCS = NewCall;
}
NewCS->setCallingConv(CB.getCallingConv());
- NewCS->setAttributes(
- AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
- CallPAL.getRetAttributes(), ArgAttrVec));
+ NewCS->setAttributes(AttributeList::get(F->getContext(),
+ CallPAL.getFnAttrs(),
+ CallPAL.getRetAttrs(), ArgAttrVec));
NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
ArgAttrVec.clear();
@@ -1018,11 +1015,12 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
do {
LocalChange = false;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
for (LazyCallGraph::Node &N : C) {
Function &OldF = N.getFunction();
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
// FIXME: This lambda must only be used with this function. We should
// skip the lambda and just get the AA results directly.
auto AARGetter = [&](Function &F) -> AAResults & {
@@ -1045,6 +1043,13 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
FAM.clear(OldF, OldF.getName());
OldF.eraseFromParent();
+
+ PreservedAnalyses FuncPA;
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (auto *U : NewF->users()) {
+ auto *UserF = cast<CallBase>(U)->getFunction();
+ FAM.invalidate(*UserF, FuncPA);
+ }
}
Changed |= LocalChange;
@@ -1053,7 +1058,12 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
if (!Changed)
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ // We've cleared out analyses for deleted functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We've manually invalidated analyses for functions we've modified.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
namespace {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 762317425026..edadc79e3a9f 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
@@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) {
return Constant::getNullValue(&Ty);
if (C->getType()->isPointerTy() && Ty.isPointerTy())
return ConstantExpr::getPointerCast(C, &Ty);
- if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
- return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
- if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
- return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) {
+ if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
+ return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
+ return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ }
}
return nullptr;
}
@@ -379,30 +382,30 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isStringAttribute()) {
StringRef Kind = Attr.getKindAsString();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isIntAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.removeAttributeAtIndex(Ctx, AttrIdx, Kind);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
@@ -655,9 +658,9 @@ bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
else
AttrList = getAssociatedFunction()->getAttributes();
- bool HasAttr = AttrList.hasAttribute(getAttrIdx(), AK);
+ bool HasAttr = AttrList.hasAttributeAtIndex(getAttrIdx(), AK);
if (HasAttr)
- Attrs.push_back(AttrList.getAttribute(getAttrIdx(), AK));
+ Attrs.push_back(AttrList.getAttributeAtIndex(getAttrIdx(), AK));
return HasAttr;
}
@@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
- if (!Visited.insert(U).second)
+ if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
continue;
LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
<< *U->getUser() << "\n");
@@ -1040,6 +1043,8 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
if (&SI->getOperandUse(0) == U) {
+ if (!Visited.insert(U).second)
+ continue;
SmallSetVector<Value *, 4> PotentialCopies;
if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies,
QueryingAA,
@@ -1118,6 +1123,10 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
if (CE->isCast() && CE->getType()->isPointerTy() &&
CE->getType()->getPointerElementType()->isFunctionTy()) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Use, is constant cast expression, add "
+ << CE->getNumUses()
+ << " uses of that expression instead!\n");
for (const Use &CEU : CE->uses())
Uses.push_back(&CEU);
continue;
@@ -1138,9 +1147,13 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Use *EffectiveUse =
ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
if (!ACS.isCallee(EffectiveUse)) {
- if (!RequireAllCallSites)
+ if (!RequireAllCallSites) {
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
+ << " is not a call of " << Fn.getName()
+ << ", skip use\n");
continue;
- LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser()
+ }
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
<< " is an invalid use of " << Fn.getName() << "\n");
return false;
}
@@ -1410,6 +1423,16 @@ void Attributor::runTillFixpoint() {
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations));
+ if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
+ auto Remark = [&](OptimizationRemarkMissed ORM) {
+ return ORM << "Attributor did not reach a fixpoint after "
+ << ore::NV("Iterations", MaxFixedPointIterations)
+ << " iterations.";
+ };
+ Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
+ emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
+ }
+
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
@@ -1919,55 +1942,91 @@ void Attributor::createShallowWrapper(Function &F) {
CallInst *CI = CallInst::Create(&F, Args, "", EntryBB);
CI->setTailCall(true);
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
+ CI->addFnAttr(Attribute::NoInline);
ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
NumFnShallowWrappersCreated++;
}
+bool Attributor::isInternalizable(Function &F) {
+ if (F.isDeclaration() || F.hasLocalLinkage() ||
+ GlobalValue::isInterposableLinkage(F.getLinkage()))
+ return false;
+ return true;
+}
+
Function *Attributor::internalizeFunction(Function &F, bool Force) {
if (!AllowDeepWrapper && !Force)
return nullptr;
- if (F.isDeclaration() || F.hasLocalLinkage() ||
- GlobalValue::isInterposableLinkage(F.getLinkage()))
+ if (!isInternalizable(F))
return nullptr;
- Module &M = *F.getParent();
- FunctionType *FnTy = F.getFunctionType();
+ SmallPtrSet<Function *, 2> FnSet = {&F};
+ DenseMap<Function *, Function *> InternalizedFns;
+ internalizeFunctions(FnSet, InternalizedFns);
- // create a copy of the current function
- Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
- F.getName() + ".internalized");
- ValueToValueMapTy VMap;
- auto *NewFArgIt = Copied->arg_begin();
- for (auto &Arg : F.args()) {
- auto ArgName = Arg.getName();
- NewFArgIt->setName(ArgName);
- VMap[&Arg] = &(*NewFArgIt++);
- }
- SmallVector<ReturnInst *, 8> Returns;
-
- // Copy the body of the original function to the new one
- CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly,
- Returns);
-
- // Set the linakage and visibility late as CloneFunctionInto has some implicit
- // requirements.
- Copied->setVisibility(GlobalValue::DefaultVisibility);
- Copied->setLinkage(GlobalValue::PrivateLinkage);
+ return InternalizedFns[&F];
+}
- // Copy metadata
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- F.getAllMetadata(MDs);
- for (auto MDIt : MDs)
- if (!Copied->hasMetadata())
- Copied->addMetadata(MDIt.first, *MDIt.second);
+bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+ DenseMap<Function *, Function *> &FnMap) {
+ for (Function *F : FnSet)
+ if (!Attributor::isInternalizable(*F))
+ return false;
- M.getFunctionList().insert(F.getIterator(), Copied);
- F.replaceAllUsesWith(Copied);
- Copied->setDSOLocal(true);
+ FnMap.clear();
+ // Generate the internalized version of each function.
+ for (Function *F : FnSet) {
+ Module &M = *F->getParent();
+ FunctionType *FnTy = F->getFunctionType();
+
+ // Create a copy of the current function
+ Function *Copied =
+ Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName() + ".internalized");
+ ValueToValueMapTy VMap;
+ auto *NewFArgIt = Copied->arg_begin();
+ for (auto &Arg : F->args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ // Copy the body of the original function to the new one
+ CloneFunctionInto(Copied, F, VMap,
+ CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+ // Set the linakage and visibility late as CloneFunctionInto has some
+ // implicit requirements.
+ Copied->setVisibility(GlobalValue::DefaultVisibility);
+ Copied->setLinkage(GlobalValue::PrivateLinkage);
+
+ // Copy metadata
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F->getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ if (!Copied->hasMetadata())
+ Copied->addMetadata(MDIt.first, *MDIt.second);
+
+ M.getFunctionList().insert(F->getIterator(), Copied);
+ Copied->setDSOLocal(true);
+ FnMap[F] = Copied;
+ }
+
+ // Replace all uses of the old function with the new internalized function
+ // unless the caller is a function that was just internalized.
+ for (Function *F : FnSet) {
+ auto &InternalizedFn = FnMap[F];
+ auto IsNotInternalized = [&](Use &U) -> bool {
+ if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+ return !FnMap.lookup(CB->getCaller());
+ return false;
+ };
+ F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
+ }
- return Copied;
+ return true;
}
bool Attributor::isValidFunctionSignatureRewrite(
@@ -1976,7 +2035,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
if (!RewriteSignatures)
return false;
- auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
+ Function *Fn = Arg.getParent();
+ auto CallSiteCanBeChanged = [Fn](AbstractCallSite ACS) {
// Forbid the call site to cast the function return type. If we need to
// rewrite these functions we need to re-create a cast for the new call site
// (if the old had uses).
@@ -1984,11 +2044,12 @@ bool Attributor::isValidFunctionSignatureRewrite(
ACS.getInstruction()->getType() !=
ACS.getCalledFunction()->getReturnType())
return false;
+ if (ACS.getCalledOperand()->getType() != Fn->getType())
+ return false;
// Forbid must-tail calls for now.
return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
};
- Function *Fn = Arg.getParent();
// Avoid var-arg functions for now.
if (Fn->isVarArg()) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite var-args functions\n");
@@ -2118,7 +2179,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
} else {
NewArgumentTypes.push_back(Arg.getType());
NewArgumentAttributes.push_back(
- OldFnAttributeList.getParamAttributes(Arg.getArgNo()));
+ OldFnAttributeList.getParamAttrs(Arg.getArgNo()));
}
}
@@ -2149,8 +2210,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
// the function.
LLVMContext &Ctx = OldFn->getContext();
NewFn->setAttributes(AttributeList::get(
- Ctx, OldFnAttributeList.getFnAttributes(),
- OldFnAttributeList.getRetAttributes(), NewArgumentAttributes));
+ Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
+ NewArgumentAttributes));
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
@@ -2195,7 +2256,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
} else {
NewArgOperands.push_back(ACS.getCallArgOperand(OldArgNum));
NewArgOperandAttributes.push_back(
- OldCallAttributeList.getParamAttributes(OldArgNum));
+ OldCallAttributeList.getParamAttrs(OldArgNum));
}
}
@@ -2225,8 +2286,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
NewCB->setCallingConv(OldCB->getCallingConv());
NewCB->takeName(OldCB);
NewCB->setAttributes(AttributeList::get(
- Ctx, OldCallAttributeList.getFnAttributes(),
- OldCallAttributeList.getRetAttributes(), NewArgOperandAttributes));
+ Ctx, OldCallAttributeList.getFnAttrs(),
+ OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
CallSitePairs.push_back({OldCB, NewCB});
return true;
@@ -2441,6 +2502,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function can be "readnone/argmemonly/inaccessiblememonly/...".
getOrCreateAAFor<AAMemoryLocation>(FPos);
+ // Every function can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(FPos);
+
// Every function might be applicable for Heap-To-Stack conversion.
if (EnableHeapToStack)
getOrCreateAAFor<AAHeapToStack>(FPos);
@@ -2526,6 +2590,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
auto CallSitePred = [&](Instruction &I) -> bool {
auto &CB = cast<CallBase>(I);
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
+ IRPosition CBFnPos = IRPosition::callsite_function(CB);
// Call sites might be dead if they do not have side effects and no live
// users. The return value might be dead if there are no live users.
@@ -2537,6 +2602,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!Callee)
return true;
+ // Every call site can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(CBFnPos);
+
// Skip declarations except if annotations on their call sites were
// explicitly requested.
if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
@@ -2549,7 +2617,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAValueSimplify>(CBRetPos);
}
- for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) {
+ for (int I = 0, E = CB.arg_size(); I < E; ++I) {
IRPosition CBArgPos = IRPosition::callsite_argument(CB, I);
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 98ce286d5139..ec08287393de 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Assumptions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -146,6 +148,7 @@ PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
PIPE_OPERATOR(AAPointerInfo)
+PIPE_OPERATOR(AAAssumptionInfo)
#undef PIPE_OPERATOR
@@ -203,46 +206,25 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
<< "-bytes as " << *ResTy << "\n");
if (Offset) {
- SmallVector<Value *, 4> Indices;
- std::string GEPName = Ptr->getName().str() + ".0";
-
- // Add 0 index to look through the pointer.
- assert((uint64_t)Offset < DL.getTypeAllocSize(PtrElemTy) &&
- "Offset out of bounds");
- Indices.push_back(Constant::getNullValue(IRB.getInt32Ty()));
-
Type *Ty = PtrElemTy;
- do {
- auto *STy = dyn_cast<StructType>(Ty);
- if (!STy)
- // Non-aggregate type, we cast and make byte-wise progress now.
- break;
-
- const StructLayout *SL = DL.getStructLayout(STy);
- if (int64_t(SL->getSizeInBytes()) < Offset)
- break;
-
- uint64_t Idx = SL->getElementContainingOffset(Offset);
- assert(Idx < STy->getNumElements() && "Offset calculation error!");
- uint64_t Rem = Offset - SL->getElementOffset(Idx);
- Ty = STy->getElementType(Idx);
-
- LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
- << " Idx: " << Idx << " Rem: " << Rem << "\n");
+ APInt IntOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
+ SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(Ty, IntOffset);
- GEPName += "." + std::to_string(Idx);
- Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
- Offset = Rem;
- } while (Offset);
+ SmallVector<Value *, 4> ValIndices;
+ std::string GEPName = Ptr->getName().str();
+ for (const APInt &Index : IntIndices) {
+ ValIndices.push_back(IRB.getInt(Index));
+ GEPName += "." + std::to_string(Index.getZExtValue());
+ }
// Create a GEP for the indices collected above.
- Ptr = IRB.CreateGEP(PtrElemTy, Ptr, Indices, GEPName);
+ Ptr = IRB.CreateGEP(PtrElemTy, Ptr, ValIndices, GEPName);
// If an offset is left we use byte-wise adjustment.
- if (Offset) {
+ if (IntOffset != 0) {
Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
- Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt32(Offset),
- GEPName + ".b" + Twine(Offset));
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset),
+ GEPName + ".b" + Twine(IntOffset.getZExtValue()));
}
}
@@ -431,6 +413,7 @@ const Value *stripAndAccumulateMinimalOffsets(
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
+ /* AllowInvariant */ false,
AttributorAnalysis);
}
@@ -503,6 +486,7 @@ static void clampReturnedValueStates(
S ^= *T;
}
+namespace {
/// Helper class for generic deduction: return value -> returned position.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
@@ -661,6 +645,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
return clampStateAndIndicateChange(S, AA.getState());
}
};
+} // namespace
/// Helper function to accumulate uses.
template <class AAType, typename StateType = typename AAType::StateType>
@@ -1051,6 +1036,7 @@ private:
BooleanState BS;
};
+namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1149,19 +1135,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
return true;
};
+ /// Helper struct, will support ranges eventually.
+ struct OffsetInfo {
+ int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
+
+ bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
+ };
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
State S = getState();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &AssociatedValue = getAssociatedValue();
- struct OffsetInfo {
- int64_t Offset = 0;
- };
const DataLayout &DL = A.getDataLayout();
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
- OffsetInfoMap[&AssociatedValue] = {};
+ OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
bool &Follow) {
@@ -1203,7 +1193,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
}
SmallVector<Value *, 8> Indices;
- for (Use &Idx : llvm::make_range(GEP->idx_begin(), GEP->idx_end())) {
+ for (Use &Idx : GEP->indices()) {
if (auto *CIdx = dyn_cast<ConstantInt>(Idx)) {
Indices.push_back(CIdx);
continue;
@@ -1219,8 +1209,52 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
Follow = true;
return true;
}
- if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr))
+ if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
return HandlePassthroughUser(Usr, PtrOI, Follow);
+
+ // For PHIs we need to take care of the recurrence explicitly as the value
+ // might change while we iterate through a loop. For now, we give up if
+ // the PHI is not invariant.
+ if (isa<PHINode>(Usr)) {
+ // Check if the PHI is invariant (so far).
+ OffsetInfo &UsrOI = OffsetInfoMap[Usr];
+ if (UsrOI == PtrOI)
+ return true;
+
+ // Check if the PHI operand has already an unknown offset as we can't
+ // improve on that anymore.
+ if (PtrOI.Offset == OffsetAndSize::Unknown) {
+ UsrOI = PtrOI;
+ Follow = true;
+ return true;
+ }
+
+ // Check if the PHI operand is not dependent on the PHI itself.
+ // TODO: This is not great as we look at the pointer type. However, it
+ // is unclear where the Offset size comes from with typeless pointers.
+ APInt Offset(
+ DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
+ 0);
+ if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true)) {
+ if (Offset != PtrOI.Offset) {
+ LLVM_DEBUG(dbgs()
+ << "[AAPointerInfo] PHI operand pointer offset mismatch "
+ << *CurPtr << " in " << *Usr << "\n");
+ return false;
+ }
+ return HandlePassthroughUser(Usr, PtrOI, Follow);
+ }
+
+ // TODO: Approximate in case we know the direction of the recurrence.
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
+ << *CurPtr << " in " << *Usr << "\n");
+ UsrOI = PtrOI;
+ UsrOI.Offset = OffsetAndSize::Unknown;
+ Follow = true;
+ return true;
+ }
+
if (auto *LoadI = dyn_cast<LoadInst>(Usr))
return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
AccessKind::AK_READ, PtrOI.Offset, Changed,
@@ -2388,6 +2422,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
const size_t NoUBPrevSize = AssumedNoUBInsts.size();
auto InspectMemAccessInstForUB = [&](Instruction &I) {
+ // Lang ref now states volatile store is not UB, let's skip them.
+ if (I.isVolatile() && I.mayWriteToMemory())
+ return true;
+
// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;
@@ -2467,7 +2505,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
Function *Callee = CB.getCalledFunction();
if (!Callee)
return true;
- for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) {
+ for (unsigned idx = 0; idx < CB.arg_size(); idx++) {
// If current argument is known to be simplified to null pointer and the
// corresponding argument position is known to have nonnull attribute,
// the argument is poison. Furthermore, if the argument is poison and
@@ -3135,8 +3173,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
// value passed at this call site.
// TODO: AbstractCallSite
const auto &CB = cast<CallBase>(getAnchorValue());
- for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
- OtherArgNo++)
+ for (unsigned OtherArgNo = 0; OtherArgNo < CB.arg_size(); OtherArgNo++)
if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
return false;
@@ -3354,6 +3391,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
bool isDeadStore(Attributor &A, StoreInst &SI) {
+ // Lang ref now states volatile store is not UB/dead, let's skip them.
+ if (SI.isVolatile())
+ return false;
+
bool UsedAssumedInformation = false;
SmallSetVector<Value *, 4> PotentialCopies;
if (!AA::getPotentialCopiesOfStoredValue(A, SI, PotentialCopies, *this,
@@ -5039,6 +5080,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
+} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5059,6 +5101,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
+namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -6464,7 +6507,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
CallBase *DC = cast<CallBase>(ACS.getInstruction());
int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
- assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() &&
+ assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->arg_size() &&
"Expected a direct call operand for callback call operand");
LLVM_DEBUG({
@@ -7287,10 +7330,12 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
case Instruction::Store:
// Stores cause the NO_WRITES property to disappear if the use is the
- // pointer operand. Note that we do assume that capturing was taken care of
- // somewhere else.
+ // pointer operand. Note that while capturing was taken care of somewhere
+ // else we need to deal with stores of the value that is not looked through.
if (cast<StoreInst>(UserI)->getPointerOperand() == U.get())
removeAssumedBits(NO_WRITES);
+ else
+ indicatePessimisticFixpoint();
return;
case Instruction::Call:
@@ -7336,6 +7381,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
+} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7628,11 +7674,14 @@ void AAMemoryLocationImpl::categorizePtrValue(
assert(!isa<GEPOperator>(Obj) && "GEPs should have been stripped.");
if (isa<UndefValue>(Obj))
continue;
- if (auto *Arg = dyn_cast<Argument>(Obj)) {
- if (Arg->hasByValAttr())
- MLK = NO_LOCAL_MEM;
- else
- MLK = NO_ARGUMENT_MEM;
+ if (isa<Argument>(Obj)) {
+ // TODO: For now we do not treat byval arguments as local copies performed
+ // on the call edge, though, we should. To make that happen we need to
+ // teach various passes, e.g., DSE, about the copy effect of a byval. That
+ // would also allow us to mark functions only accessing byval arguments as
+ // readnone again, atguably their acceses have no effect outside of the
+ // function, like accesses to allocas.
+ MLK = NO_ARGUMENT_MEM;
} else if (auto *GV = dyn_cast<GlobalValue>(Obj)) {
// Reading constant memory is not treated as a read "effect" by the
// function attr pass so we won't neither. Constants defined by TBAA are
@@ -7678,7 +7727,7 @@ void AAMemoryLocationImpl::categorizePtrValue(
void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
bool &Changed) {
- for (unsigned ArgNo = 0, E = CB.getNumArgOperands(); ArgNo < E; ++ArgNo) {
+ for (unsigned ArgNo = 0, E = CB.arg_size(); ArgNo < E; ++ArgNo) {
// Skip non-pointer arguments.
const Value *ArgOp = CB.getArgOperand(ArgNo);
@@ -8611,31 +8660,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
const APInt &RHS) {
- ICmpInst::Predicate Pred = ICI->getPredicate();
- switch (Pred) {
- case ICmpInst::ICMP_UGT:
- return LHS.ugt(RHS);
- case ICmpInst::ICMP_SGT:
- return LHS.sgt(RHS);
- case ICmpInst::ICMP_EQ:
- return LHS.eq(RHS);
- case ICmpInst::ICMP_UGE:
- return LHS.uge(RHS);
- case ICmpInst::ICMP_SGE:
- return LHS.sge(RHS);
- case ICmpInst::ICMP_ULT:
- return LHS.ult(RHS);
- case ICmpInst::ICMP_SLT:
- return LHS.slt(RHS);
- case ICmpInst::ICMP_NE:
- return LHS.ne(RHS);
- case ICmpInst::ICMP_ULE:
- return LHS.ule(RHS);
- case ICmpInst::ICMP_SLE:
- return LHS.sle(RHS);
- default:
- llvm_unreachable("Invalid ICmp predicate!");
- }
+ return ICmpInst::compare(LHS, RHS, ICI->getPredicate());
}
static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
@@ -8675,25 +8700,25 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
case Instruction::Mul:
return LHS * RHS;
case Instruction::UDiv:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.udiv(RHS);
case Instruction::SDiv:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.sdiv(RHS);
case Instruction::URem:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.urem(RHS);
case Instruction::SRem:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
@@ -9292,32 +9317,69 @@ struct AANoUndefCallSiteReturned final
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
};
-struct AACallEdgesFunction : public AACallEdges {
- AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
- : AACallEdges(IRP, A) {}
+struct AACallEdgesImpl : public AACallEdges {
+ AACallEdgesImpl(const IRPosition &IRP, Attributor &A) : AACallEdges(IRP, A) {}
+
+ virtual const SetVector<Function *> &getOptimisticEdges() const override {
+ return CalledFunctions;
+ }
+
+ virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+
+ virtual bool hasNonAsmUnknownCallee() const override {
+ return HasUnknownCalleeNonAsm;
+ }
+
+ const std::string getAsStr() const override {
+ return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
+ std::to_string(CalledFunctions.size()) + "]";
+ }
+ void trackStatistics() const override {}
+
+protected:
+ void addCalledFunction(Function *Fn, ChangeStatus &Change) {
+ if (CalledFunctions.insert(Fn)) {
+ Change = ChangeStatus::CHANGED;
+ LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
+ << "\n");
+ }
+ }
+
+ void setHasUnknownCallee(bool NonAsm, ChangeStatus &Change) {
+ if (!HasUnknownCallee)
+ Change = ChangeStatus::CHANGED;
+ if (NonAsm && !HasUnknownCalleeNonAsm)
+ Change = ChangeStatus::CHANGED;
+ HasUnknownCalleeNonAsm |= NonAsm;
+ HasUnknownCallee = true;
+ }
+
+private:
+ /// Optimistic set of functions that might be called by this position.
+ SetVector<Function *> CalledFunctions;
+
+ /// Is there any call with a unknown callee.
+ bool HasUnknownCallee = false;
+
+ /// Is there any call with a unknown callee, excluding any inline asm.
+ bool HasUnknownCalleeNonAsm = false;
+};
+
+struct AACallEdgesCallSite : public AACallEdgesImpl {
+ AACallEdgesCallSite(const IRPosition &IRP, Attributor &A)
+ : AACallEdgesImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
- bool OldHasUnknownCallee = HasUnknownCallee;
- bool OldHasUnknownCalleeNonAsm = HasUnknownCalleeNonAsm;
-
- auto AddCalledFunction = [&](Function *Fn) {
- if (CalledFunctions.insert(Fn)) {
- Change = ChangeStatus::CHANGED;
- LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
- << "\n");
- }
- };
auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown,
bool Stripped) -> bool {
if (Function *Fn = dyn_cast<Function>(&V)) {
- AddCalledFunction(Fn);
+ addCalledFunction(Fn, Change);
} else {
LLVM_DEBUG(dbgs() << "[AACallEdges] Unrecognized value: " << V << "\n");
- HasUnknown = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
// Explore all values.
@@ -9325,44 +9387,67 @@ struct AACallEdgesFunction : public AACallEdges {
};
// Process any value that we might call.
- auto ProcessCalledOperand = [&](Value *V, Instruction *Ctx) {
+ auto ProcessCalledOperand = [&](Value *V) {
+ bool DummyValue = false;
if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
- HasUnknownCallee, VisitValue, nullptr,
+ DummyValue, VisitValue, nullptr,
false)) {
// If we haven't gone through all values, assume that there are unknown
// callees.
- HasUnknownCallee = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
};
- auto ProcessCallInst = [&](Instruction &Inst) {
- CallBase &CB = static_cast<CallBase &>(Inst);
- if (CB.isInlineAsm()) {
- HasUnknownCallee = true;
- return true;
- }
+ CallBase *CB = static_cast<CallBase *>(getCtxI());
- // Process callee metadata if available.
- if (auto *MD = Inst.getMetadata(LLVMContext::MD_callees)) {
- for (auto &Op : MD->operands()) {
- Function *Callee = mdconst::extract_or_null<Function>(Op);
- if (Callee)
- AddCalledFunction(Callee);
- }
- // Callees metadata grantees that the called function is one of its
- // operands, So we are done.
- return true;
+ if (CB->isInlineAsm()) {
+ setHasUnknownCallee(false, Change);
+ return Change;
+ }
+
+ // Process callee metadata if available.
+ if (auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees)) {
+ for (auto &Op : MD->operands()) {
+ Function *Callee = mdconst::dyn_extract_or_null<Function>(Op);
+ if (Callee)
+ addCalledFunction(Callee, Change);
}
+ return Change;
+ }
- // The most simple case.
- ProcessCalledOperand(CB.getCalledOperand(), &Inst);
+ // The most simple case.
+ ProcessCalledOperand(CB->getCalledOperand());
- // Process callback functions.
- SmallVector<const Use *, 4u> CallbackUses;
- AbstractCallSite::getCallbackUses(CB, CallbackUses);
- for (const Use *U : CallbackUses)
- ProcessCalledOperand(U->get(), &Inst);
+ // Process callback functions.
+ SmallVector<const Use *, 4u> CallbackUses;
+ AbstractCallSite::getCallbackUses(*CB, CallbackUses);
+ for (const Use *U : CallbackUses)
+ ProcessCalledOperand(U->get());
+
+ return Change;
+ }
+};
+
+struct AACallEdgesFunction : public AACallEdgesImpl {
+ AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
+ : AACallEdgesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto ProcessCallInst = [&](Instruction &Inst) {
+ CallBase &CB = static_cast<CallBase &>(Inst);
+
+ auto &CBEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
+ if (CBEdges.hasNonAsmUnknownCallee())
+ setHasUnknownCallee(true, Change);
+ if (CBEdges.hasUnknownCallee())
+ setHasUnknownCallee(false, Change);
+
+ for (Function *F : CBEdges.getOptimisticEdges())
+ addCalledFunction(F, Change);
return true;
};
@@ -9373,155 +9458,323 @@ struct AACallEdgesFunction : public AACallEdges {
UsedAssumedInformation)) {
// If we haven't looked at all call like instructions, assume that there
// are unknown callees.
- HasUnknownCallee = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
- // Track changes.
- if (OldHasUnknownCallee != HasUnknownCallee ||
- OldHasUnknownCalleeNonAsm != HasUnknownCalleeNonAsm)
- Change = ChangeStatus::CHANGED;
-
return Change;
}
+};
- virtual const SetVector<Function *> &getOptimisticEdges() const override {
- return CalledFunctions;
- };
+struct AAFunctionReachabilityFunction : public AAFunctionReachability {
+private:
+ struct QuerySet {
+ void markReachable(Function *Fn) {
+ Reachable.insert(Fn);
+ Unreachable.erase(Fn);
+ }
+
+ ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList) {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ for (auto *AAEdges : AAEdgesList) {
+ if (AAEdges->hasUnknownCallee()) {
+ if (!CanReachUnknownCallee)
+ Change = ChangeStatus::CHANGED;
+ CanReachUnknownCallee = true;
+ return Change;
+ }
+ }
- virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+ for (Function *Fn : make_early_inc_range(Unreachable)) {
+ if (checkIfReachable(A, AA, AAEdgesList, Fn)) {
+ Change = ChangeStatus::CHANGED;
+ markReachable(Fn);
+ }
+ }
+ return Change;
+ }
- virtual bool hasNonAsmUnknownCallee() const override {
- return HasUnknownCalleeNonAsm;
- }
+ bool isReachable(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) {
+ // Assume that we can reach the function.
+ // TODO: Be more specific with the unknown callee.
+ if (CanReachUnknownCallee)
+ return true;
- const std::string getAsStr() const override {
- return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
- std::to_string(CalledFunctions.size()) + "]";
- }
+ if (Reachable.count(Fn))
+ return true;
- void trackStatistics() const override {}
+ if (Unreachable.count(Fn))
+ return false;
- /// Optimistic set of functions that might be called by this function.
- SetVector<Function *> CalledFunctions;
+ // We need to assume that this function can't reach Fn to prevent
+ // an infinite loop if this function is recursive.
+ Unreachable.insert(Fn);
- /// Is there any call with a unknown callee.
- bool HasUnknownCallee = false;
+ bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
+ if (Result)
+ markReachable(Fn);
+ return Result;
+ }
- /// Is there any call with a unknown callee, excluding any inline asm.
- bool HasUnknownCalleeNonAsm = false;
-};
+ bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList,
+ Function *Fn) const {
-struct AAFunctionReachabilityFunction : public AAFunctionReachability {
- AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
- : AAFunctionReachability(IRP, A) {}
+ // Handle the most trivial case first.
+ for (auto *AAEdges : AAEdgesList) {
+ const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
- bool canReach(Attributor &A, Function *Fn) const override {
- // Assume that we can reach any function if we can reach a call with
- // unknown callee.
- if (CanReachUnknownCallee)
- return true;
+ if (Edges.count(Fn))
+ return true;
+ }
- if (ReachableQueries.count(Fn))
- return true;
+ SmallVector<const AAFunctionReachability *, 8> Deps;
+ for (auto &AAEdges : AAEdgesList) {
+ const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
+
+ for (Function *Edge : Edges) {
+ // We don't need a dependency if the result is reachable.
+ const AAFunctionReachability &EdgeReachability =
+ A.getAAFor<AAFunctionReachability>(
+ AA, IRPosition::function(*Edge), DepClassTy::NONE);
+ Deps.push_back(&EdgeReachability);
+
+ if (EdgeReachability.canReach(A, Fn))
+ return true;
+ }
+ }
+
+ // The result is false for now, set dependencies and leave.
+ for (auto Dep : Deps)
+ A.recordDependence(AA, *Dep, DepClassTy::REQUIRED);
- if (UnreachableQueries.count(Fn))
return false;
+ }
+
+ /// Set of functions that we know for sure is reachable.
+ DenseSet<Function *> Reachable;
+
+ /// Set of functions that are unreachable, but might become reachable.
+ DenseSet<Function *> Unreachable;
+
+ /// If we can reach a function with a call to a unknown function we assume
+ /// that we can reach any function.
+ bool CanReachUnknownCallee = false;
+ };
+
+public:
+ AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
+ : AAFunctionReachability(IRP, A) {}
+ bool canReach(Attributor &A, Function *Fn) const override {
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
- const SetVector<Function *> &Edges = AAEdges.getOptimisticEdges();
- bool Result = checkIfReachable(A, Edges, Fn);
+ // Attributor returns attributes as const, so this function has to be
+ // const for users of this attribute to use it without having to do
+ // a const_cast.
+ // This is a hack for us to be able to cache queries.
+ auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ bool Result =
+ NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn);
+
+ return Result;
+ }
+
+ /// Can \p CB reach \p Fn
+ bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override {
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
// Attributor returns attributes as const, so this function has to be
// const for users of this attribute to use it without having to do
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ QuerySet &CBQuery = NonConstThis->CBQueries[&CB];
- if (Result)
- NonConstThis->ReachableQueries.insert(Fn);
- else
- NonConstThis->UnreachableQueries.insert(Fn);
+ bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn);
return Result;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- if (CanReachUnknownCallee)
- return ChangeStatus::UNCHANGED;
-
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
- const SetVector<Function *> &Edges = AAEdges.getOptimisticEdges();
ChangeStatus Change = ChangeStatus::UNCHANGED;
- if (AAEdges.hasUnknownCallee()) {
- bool OldCanReachUnknown = CanReachUnknownCallee;
- CanReachUnknownCallee = true;
- return OldCanReachUnknown ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
+ Change |= WholeFunction.update(A, *this, {&AAEdges});
- // Check if any of the unreachable functions become reachable.
- for (auto Current = UnreachableQueries.begin();
- Current != UnreachableQueries.end();) {
- if (!checkIfReachable(A, Edges, *Current)) {
- Current++;
- continue;
- }
- ReachableQueries.insert(*Current);
- UnreachableQueries.erase(*Current++);
- Change = ChangeStatus::CHANGED;
+ for (auto CBPair : CBQueries) {
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(*CBPair.first),
+ DepClassTy::REQUIRED);
+
+ Change |= CBPair.second.update(A, *this, {&AAEdges});
}
return Change;
}
const std::string getAsStr() const override {
- size_t QueryCount = ReachableQueries.size() + UnreachableQueries.size();
+ size_t QueryCount =
+ WholeFunction.Reachable.size() + WholeFunction.Unreachable.size();
- return "FunctionReachability [" + std::to_string(ReachableQueries.size()) +
- "," + std::to_string(QueryCount) + "]";
+ return "FunctionReachability [" +
+ std::to_string(WholeFunction.Reachable.size()) + "," +
+ std::to_string(QueryCount) + "]";
}
void trackStatistics() const override {}
private:
- bool canReachUnknownCallee() const override { return CanReachUnknownCallee; }
+ bool canReachUnknownCallee() const override {
+ return WholeFunction.CanReachUnknownCallee;
+ }
- bool checkIfReachable(Attributor &A, const SetVector<Function *> &Edges,
- Function *Fn) const {
- if (Edges.count(Fn))
- return true;
+ /// Used to answer if a the whole function can reacha a specific function.
+ QuerySet WholeFunction;
- for (Function *Edge : Edges) {
- // We don't need a dependency if the result is reachable.
- const AAFunctionReachability &EdgeReachability =
- A.getAAFor<AAFunctionReachability>(*this, IRPosition::function(*Edge),
- DepClassTy::NONE);
+ /// Used to answer if a call base inside this function can reach a specific
+ /// function.
+ DenseMap<CallBase *, QuerySet> CBQueries;
+};
- if (EdgeReachability.canReach(A, Fn))
- return true;
- }
- for (Function *Fn : Edges)
- A.getAAFor<AAFunctionReachability>(*this, IRPosition::function(*Fn),
- DepClassTy::REQUIRED);
+/// ---------------------- Assumption Propagation ------------------------------
+struct AAAssumptionInfoImpl : public AAAssumptionInfo {
+ AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A,
+ const DenseSet<StringRef> &Known)
+ : AAAssumptionInfo(IRP, A, Known) {}
- return false;
+ bool hasAssumption(const StringRef Assumption) const override {
+ return isValidState() && setContains(Assumption);
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ const SetContents &Known = getKnown();
+ const SetContents &Assumed = getAssumed();
+
+ const std::string KnownStr =
+ llvm::join(Known.getSet().begin(), Known.getSet().end(), ",");
+ const std::string AssumedStr =
+ (Assumed.isUniversal())
+ ? "Universal"
+ : llvm::join(Assumed.getSet().begin(), Assumed.getSet().end(), ",");
+
+ return "Known [" + KnownStr + "]," + " Assumed [" + AssumedStr + "]";
+ }
+};
+
+/// Propagates assumption information from parent functions to all of their
+/// successors. An assumption can be propagated if the containing function
+/// dominates the called function.
+///
+/// We start with a "known" set of assumptions already valid for the associated
+/// function and an "assumed" set that initially contains all possible
+/// assumptions. The assumed set is inter-procedurally updated by narrowing its
+/// contents as concrete values are known. The concrete values are seeded by the
+/// first nodes that are either entries into the call graph, or contains no
+/// assumptions. Each node is updated as the intersection of the assumed state
+/// with all of its predecessors.
+struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
+ AAAssumptionInfoFunction(const IRPosition &IRP, Attributor &A)
+ : AAAssumptionInfoImpl(IRP, A,
+ getAssumptions(*IRP.getAssociatedFunction())) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ const auto &Assumptions = getKnown();
+
+ // Don't manifest a universal set if it somehow made it here.
+ if (Assumptions.isUniversal())
+ return ChangeStatus::UNCHANGED;
+
+ Function *AssociatedFunction = getAssociatedFunction();
+
+ bool Changed = addAssumptions(*AssociatedFunction, Assumptions.getSet());
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool Changed = false;
+
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ const auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ *this, IRPosition::callsite_function(*ACS.getInstruction()),
+ DepClassTy::REQUIRED);
+ // Get the set of assumptions shared by all of this function's callers.
+ Changed |= getIntersection(AssumptionAA.getAssumed());
+ return !getAssumed().empty() || !getKnown().empty();
+ };
+
+ bool AllCallSitesKnown;
+ // Get the intersection of all assumptions held by this node's predecessors.
+ // If we don't know all the call sites then this is either an entry into the
+ // call graph or an empty node. This node is known to only contain its own
+ // assumptions and can be propagated to its successors.
+ if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override {}
+};
+
+/// Assumption Info defined for call sites.
+struct AAAssumptionInfoCallSite final : AAAssumptionInfoImpl {
+
+ AAAssumptionInfoCallSite(const IRPosition &IRP, Attributor &A)
+ : AAAssumptionInfoImpl(IRP, A, getInitialAssumptions(IRP)) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
}
- /// Set of functions that we know for sure is reachable.
- SmallPtrSet<Function *, 8> ReachableQueries;
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // Don't manifest a universal set if it somehow made it here.
+ if (getKnown().isUniversal())
+ return ChangeStatus::UNCHANGED;
+
+ CallBase &AssociatedCall = cast<CallBase>(getAssociatedValue());
+ bool Changed = addAssumptions(AssociatedCall, getAssumed().getSet());
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ auto &AssumptionAA =
+ A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
+ bool Changed = getIntersection(AssumptionAA.getAssumed());
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
- /// Set of functions that are unreachable, but might become reachable.
- SmallPtrSet<Function *, 8> UnreachableQueries;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
- /// If we can reach a function with a call to a unknown function we assume
- /// that we can reach any function.
- bool CanReachUnknownCallee = false;
+private:
+ /// Helper to initialized the known set as all the assumptions this call and
+ /// the callee contain.
+ DenseSet<StringRef> getInitialAssumptions(const IRPosition &IRP) {
+ const CallBase &CB = cast<CallBase>(IRP.getAssociatedValue());
+ auto Assumptions = getAssumptions(CB);
+ if (Function *F = IRP.getAssociatedFunction())
+ set_union(Assumptions, getAssumptions(*F));
+ if (Function *F = IRP.getAssociatedFunction())
+ set_union(Assumptions, getAssumptions(*F));
+ return Assumptions;
+ }
};
} // namespace
@@ -9559,6 +9812,7 @@ const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
const char AAPointerInfo::ID = 0;
+const char AAAssumptionInfo::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -9660,6 +9914,8 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
@@ -9679,7 +9935,6 @@ CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
-CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAFunctionReachability)
CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8e81f4bad4af..178d3f41963e 100644
--- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -153,33 +153,30 @@ static bool mergeConstants(Module &M) {
// were just merged.
while (true) {
// Find the canonical constants others will be merged with.
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
-
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// If this GV is dead, remove it.
- GV->removeDeadConstantUsers();
- if (GV->use_empty() && GV->hasLocalLinkage()) {
- GV->eraseFromParent();
+ GV.removeDeadConstantUsers();
+ if (GV.use_empty() && GV.hasLocalLinkage()) {
+ GV.eraseFromParent();
++ChangesMade;
continue;
}
- if (isUnmergeableGlobal(GV, UsedGlobals))
+ if (isUnmergeableGlobal(&GV, UsedGlobals))
continue;
// This transformation is legal for weak ODR globals in the sense it
// doesn't change semantics, but we really don't want to perform it
// anyway; it's likely to pessimize code generation, and some tools
// (like the Darwin linker in cases involving CFString) don't expect it.
- if (GV->isWeakForLinker())
+ if (GV.isWeakForLinker())
continue;
// Don't touch globals with metadata other then !dbg.
- if (hasMetadataOtherThanDebugLoc(GV))
+ if (hasMetadataOtherThanDebugLoc(&GV))
continue;
- Constant *Init = GV->getInitializer();
+ Constant *Init = GV.getInitializer();
// Check to see if the initializer is already known.
GlobalVariable *&Slot = CMap[Init];
@@ -188,9 +185,9 @@ static bool mergeConstants(Module &M) {
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
bool FirstConstantFound = !Slot;
- if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) {
- Slot = GV;
- LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
+ if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) {
+ Slot = &GV;
+ LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName()
<< (FirstConstantFound ? "\n" : " (updated)\n"));
}
}
@@ -199,18 +196,15 @@ static bool mergeConstants(Module &M) {
// SameContentReplacements vector. We cannot do the replacement in this pass
// because doing so may cause initializers of other globals to be rewritten,
// invalidating the Constant* pointers in CMap.
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
-
- if (isUnmergeableGlobal(GV, UsedGlobals))
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
+ if (isUnmergeableGlobal(&GV, UsedGlobals))
continue;
// We can only replace constant with local linkage.
- if (!GV->hasLocalLinkage())
+ if (!GV.hasLocalLinkage())
continue;
- Constant *Init = GV->getInitializer();
+ Constant *Init = GV.getInitializer();
// Check to see if the initializer is already known.
auto Found = CMap.find(Init);
@@ -218,16 +212,16 @@ static bool mergeConstants(Module &M) {
continue;
GlobalVariable *Slot = Found->second;
- if (Slot == GV)
+ if (Slot == &GV)
continue;
- if (makeMergeable(GV, Slot) == CanMerge::No)
+ if (makeMergeable(&GV, Slot) == CanMerge::No)
continue;
// Make all uses of the duplicate constant use the canonical version.
- LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @"
+ LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @"
<< Slot->getName() << "\n");
- SameContentReplacements.push_back(std::make_pair(GV, Slot));
+ SameContentReplacements.push_back(std::make_pair(&GV, Slot));
}
// Now that we have figured out which replacements must be made, do them all
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d95fd55870f8..fb9ab7954e36 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -175,8 +175,8 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value *> Args;
- for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
- CallBase *CB = dyn_cast<CallBase>(*I++);
+ for (User *U : llvm::make_early_inc_range(Fn.users())) {
+ CallBase *CB = dyn_cast<CallBase>(U);
if (!CB)
continue;
@@ -188,9 +188,9 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
if (!PAL.isEmpty()) {
SmallVector<AttributeSet, 8> ArgAttrs;
for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
- ArgAttrs.push_back(PAL.getParamAttributes(ArgNo));
- PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(),
- PAL.getRetAttributes(), ArgAttrs);
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(),
+ PAL.getRetAttrs(), ArgAttrs);
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -762,8 +762,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[ArgI] = true;
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgI));
- HasLiveReturnedArg |= PAL.hasParamAttribute(ArgI, Attribute::Returned);
+ ArgAttrVec.push_back(PAL.getParamAttrs(ArgI));
+ HasLiveReturnedArg |= PAL.hasParamAttr(ArgI, Attribute::Returned);
} else {
++NumArgumentsEliminated;
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument "
@@ -838,7 +838,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
// The existing function return attributes.
- AttrBuilder RAttrs(PAL.getRetAttributes());
+ AttrBuilder RAttrs(PAL.getRetAttrs());
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
@@ -853,8 +853,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Strip allocsize attributes. They might refer to the deleted arguments.
- AttributeSet FnAttrs = PAL.getFnAttributes().removeAttribute(
- F->getContext(), Attribute::AllocSize);
+ AttributeSet FnAttrs =
+ PAL.getFnAttrs().removeAttribute(F->getContext(), Attribute::AllocSize);
// Reconstruct the AttributesList based on the vector we constructed.
assert(ArgAttrVec.size() == Params.size());
@@ -889,7 +889,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Adjust the call return attributes in case the function was changed to
// return void.
- AttrBuilder RAttrs(CallPAL.getRetAttributes());
+ AttrBuilder RAttrs(CallPAL.getRetAttrs());
RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
@@ -903,7 +903,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[Pi]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- AttributeSet Attrs = CallPAL.getParamAttributes(Pi);
+ AttributeSet Attrs = CallPAL.getParamAttrs(Pi);
if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
@@ -922,7 +922,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Push any varargs arguments on the list. Don't forget their attributes.
for (auto E = CB.arg_end(); I != E; ++I, ++Pi) {
Args.push_back(*I);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(Pi));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi));
}
// Reconstruct the AttributesList based on the vector we constructed.
@@ -930,7 +930,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Again, be sure to remove any allocsize attributes, since their indices
// may now be incorrect.
- AttributeSet FnAttrs = CallPAL.getFnAttributes().removeAttribute(
+ AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute(
F->getContext(), Attribute::AllocSize);
AttributeList NewCallPAL = AttributeList::get(
@@ -1094,11 +1094,9 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
// fused with the next loop, because deleting a function invalidates
// information computed while surveying other functions.
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function &F = *I++;
+ for (Function &F : llvm::make_early_inc_range(M))
if (F.getFunctionType()->isVarArg())
Changed |= DeleteDeadVarargs(F);
- }
// Second phase:loop through the module, determining which arguments are live.
// We assume all arguments are dead unless proven otherwise (allowing us to
@@ -1109,13 +1107,10 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
SurveyFunction(F);
// Now, remove all dead arguments and return values from each function in
- // turn.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- // Increment now, because the function will probably get removed (ie.
- // replaced by a new one).
- Function *F = &*I++;
- Changed |= RemoveDeadStuffFromFunction(F);
- }
+ // turn. We use make_early_inc_range here because functions will probably get
+ // removed (i.e. replaced by new ones).
+ for (Function &F : llvm::make_early_inc_range(M))
+ Changed |= RemoveDeadStuffFromFunction(&F);
// Finally, look for any unused parameters in functions with non-local
// linkage and replace the passed in parameters with undef.
diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp
index ba0efd46af16..387f114f6ffa 100644
--- a/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -121,32 +121,27 @@ namespace {
}
// Visit the Aliases.
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E;) {
- Module::alias_iterator CurI = I;
- ++I;
-
- bool Delete = deleteStuff == (bool)Named.count(&*CurI);
- makeVisible(*CurI, Delete);
+ for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
+ bool Delete = deleteStuff == (bool)Named.count(&GA);
+ makeVisible(GA, Delete);
if (Delete) {
- Type *Ty = CurI->getValueType();
+ Type *Ty = GA.getValueType();
- CurI->removeFromParent();
+ GA.removeFromParent();
llvm::Value *Declaration;
if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
- Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
- CurI->getAddressSpace(),
- CurI->getName(), &M);
+ Declaration =
+ Function::Create(FTy, GlobalValue::ExternalLinkage,
+ GA.getAddressSpace(), GA.getName(), &M);
} else {
Declaration =
- new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
- nullptr, CurI->getName());
-
+ new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GA.getName());
}
- CurI->replaceAllUsesWith(Declaration);
- delete &*CurI;
+ GA.replaceAllUsesWith(Declaration);
+ delete &GA;
}
}
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 47fdf042f9d4..16d00a0c89e1 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -50,14 +50,14 @@ static void forceAttributes(Function &F) {
return Kind;
};
- for (auto &S : ForceAttributes) {
+ for (const auto &S : ForceAttributes) {
auto Kind = ParseFunctionAndAttr(S);
if (Kind == Attribute::None || F.hasFnAttribute(Kind))
continue;
F.addFnAttr(Kind);
}
- for (auto &S : ForceRemoveAttributes) {
+ for (const auto &S : ForceRemoveAttributes) {
auto Kind = ParseFunctionAndAttr(S);
if (Kind == Attribute::None || !F.hasFnAttribute(Kind))
continue;
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index ca8660a98ded..cde78713b554 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -14,10 +14,12 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -82,6 +84,11 @@ STATISTIC(NumNoFree, "Number of functions marked as nofree");
STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
STATISTIC(NumNoSync, "Number of functions marked as nosync");
+STATISTIC(NumThinLinkNoRecurse,
+ "Number of functions marked as norecurse during thinlink");
+STATISTIC(NumThinLinkNoUnwind,
+ "Number of functions marked as nounwind during thinlink");
+
static cl::opt<bool> EnableNonnullArgPropagation(
"enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
cl::desc("Try to propagate nonnull argument attributes from callsites to "
@@ -95,6 +102,10 @@ static cl::opt<bool> DisableNoFreeInference(
"disable-nofree-inference", cl::Hidden,
cl::desc("Stop inferring nofree attribute during function-attrs pass"));
+static cl::opt<bool> DisableThinLTOPropagation(
+ "disable-thinlto-funcattrs", cl::init(true), cl::Hidden,
+ cl::desc("Don't propagate function-attrs in thinLTO"));
+
namespace {
using SCCNodeSet = SmallSetVector<Function *, 8>;
@@ -131,12 +142,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
-
+ for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
- if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
// Ignore calls to functions in the same SCC, as long as the call sites
// don't have operand bundles. Calls with operand bundles are allowed to
// have memory effects not described by the memory effects of the call
@@ -170,14 +179,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
- for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) {
- Value *Arg = *CI;
+ for (const Use &U : Call->args()) {
+ const Value *Arg = U;
if (!Arg->getType()->isPtrOrPtrVectorTy())
continue;
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
- MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo);
+ MemoryLocation Loc =
+ MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
@@ -192,21 +200,21 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
ReadsMemory = true;
}
continue;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(LI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(SI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
@@ -217,10 +225,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// read or write memory.
//
// Writes memory, remember that.
- WritesMemory |= I->mayWriteToMemory();
+ WritesMemory |= I.mayWriteToMemory();
// If this instruction may read memory, remember that.
- ReadsMemory |= I->mayReadFromMemory();
+ ReadsMemory |= I.mayReadFromMemory();
}
if (WritesMemory) {
@@ -240,7 +248,8 @@ MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
-static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
+static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+ SmallSet<Function *, 8> &Changed) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
@@ -255,7 +264,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
AAR, SCCNodes)) {
case MAK_MayWrite:
- return false;
+ return;
case MAK_ReadOnly:
ReadsMemory = true;
break;
@@ -271,11 +280,10 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// If the SCC contains both functions that read and functions that write, then
// we cannot add readonly attributes.
if (ReadsMemory && WritesMemory)
- return false;
+ return;
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
- bool MadeChange = false;
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
@@ -289,7 +297,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
if (F->doesNotReadMemory() && WritesMemory)
continue;
- MadeChange = true;
+ Changed.insert(F);
// Clear out any existing attributes.
AttrBuilder AttrsToRemove;
@@ -303,7 +311,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
- F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove);
+ F->removeFnAttrs(AttrsToRemove);
// Add in the new attribute.
if (WritesMemory && !ReadsMemory)
@@ -318,8 +326,195 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
else
++NumReadNone;
}
+}
+
+// Compute definitive function attributes for a function taking into account
+// prevailing definitions and linkage types
+static FunctionSummary *calculatePrevailingSummary(
+ ValueInfo VI,
+ DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing) {
+
+ if (CachedPrevailingSummary.count(VI))
+ return CachedPrevailingSummary[VI];
+
+ /// At this point, prevailing symbols have been resolved. The following leads
+ /// to returning a conservative result:
+ /// - Multiple instances with local linkage. Normally local linkage would be
+ /// unique per module
+ /// as the GUID includes the module path. We could have a guid alias if
+ /// there wasn't any distinguishing path when each file was compiled, but
+ /// that should be rare so we'll punt on those.
+
+ /// These next 2 cases should not happen and will assert:
+ /// - Multiple instances with external linkage. This should be caught in
+ /// symbol resolution
+ /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
+ /// knowledge meaning we have to go conservative.
+
+ /// Otherwise, we calculate attributes for a function as:
+ /// 1. If we have a local linkage, take its attributes. If there's somehow
+ /// multiple, bail and go conservative.
+ /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
+ /// prevailing, take its attributes.
+ /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
+ /// differences. However, if the prevailing copy is known it will be used
+ /// so take its attributes. If the prevailing copy is in a native file
+ /// all IR copies will be dead and propagation will go conservative.
+ /// 4. AvailableExternally summaries without a prevailing copy are known to
+ /// occur in a couple of circumstances:
+ /// a. An internal function gets imported due to its caller getting
+ /// imported, it becomes AvailableExternally but no prevailing
+ /// definition exists. Because it has to get imported along with its
+ /// caller the attributes will be captured by propagating on its
+ /// caller.
+ /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
+ /// definitions of explicitly instanced template declarations
+ /// for inlining which are ultimately dropped from the TU. Since this
+ /// is localized to the TU the attributes will have already made it to
+ /// the callers.
+ /// These are edge cases and already captured by their callers so we
+ /// ignore these for now. If they become relevant to optimize in the
+ /// future this can be revisited.
+ /// 5. Otherwise, go conservative.
+
+ CachedPrevailingSummary[VI] = nullptr;
+ FunctionSummary *Local = nullptr;
+ FunctionSummary *Prevailing = nullptr;
+
+ for (const auto &GVS : VI.getSummaryList()) {
+ if (!GVS->isLive())
+ continue;
+
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject());
+ // Virtual and Unknown (e.g. indirect) calls require going conservative
+ if (!FS || FS->fflags().HasUnknownCall)
+ return nullptr;
+
+ const auto &Linkage = GVS->linkage();
+ if (GlobalValue::isLocalLinkage(Linkage)) {
+ if (Local) {
+ LLVM_DEBUG(
+ dbgs()
+ << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
+ "function "
+ << VI.name() << " from " << FS->modulePath() << ". Previous module "
+ << Local->modulePath() << "\n");
+ return nullptr;
+ }
+ Local = FS;
+ } else if (GlobalValue::isExternalLinkage(Linkage)) {
+ assert(IsPrevailing(VI.getGUID(), GVS.get()));
+ Prevailing = FS;
+ break;
+ } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
+ GlobalValue::isLinkOnceODRLinkage(Linkage) ||
+ GlobalValue::isWeakAnyLinkage(Linkage) ||
+ GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
+ if (IsPrevailing(VI.getGUID(), GVS.get())) {
+ Prevailing = FS;
+ break;
+ }
+ } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
+ // TODO: Handle these cases if they become meaningful
+ continue;
+ }
+ }
+
+ if (Local) {
+ assert(!Prevailing);
+ CachedPrevailingSummary[VI] = Local;
+ } else if (Prevailing) {
+ assert(!Local);
+ CachedPrevailingSummary[VI] = Prevailing;
+ }
- return MadeChange;
+ return CachedPrevailingSummary[VI];
+}
+
+bool llvm::thinLTOPropagateFunctionAttrs(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing) {
+ // TODO: implement addNoAliasAttrs once
+ // there's more information about the return type in the summary
+ if (DisableThinLTOPropagation)
+ return false;
+
+ DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
+ bool Changed = false;
+
+ auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
+ // Assume we can propagate unless we discover otherwise
+ FunctionSummary::FFlags InferredFlags;
+ InferredFlags.NoRecurse = (SCCNodes.size() == 1);
+ InferredFlags.NoUnwind = true;
+
+ for (auto &V : SCCNodes) {
+ FunctionSummary *CallerSummary =
+ calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing);
+
+ // Function summaries can fail to contain information such as declarations
+ if (!CallerSummary)
+ return;
+
+ if (CallerSummary->fflags().MayThrow)
+ InferredFlags.NoUnwind = false;
+
+ for (const auto &Callee : CallerSummary->calls()) {
+ FunctionSummary *CalleeSummary = calculatePrevailingSummary(
+ Callee.first, CachedPrevailingSummary, IsPrevailing);
+
+ if (!CalleeSummary)
+ return;
+
+ if (!CalleeSummary->fflags().NoRecurse)
+ InferredFlags.NoRecurse = false;
+
+ if (!CalleeSummary->fflags().NoUnwind)
+ InferredFlags.NoUnwind = false;
+
+ if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
+ break;
+ }
+ }
+
+ if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
+ Changed = true;
+ for (auto &V : SCCNodes) {
+ if (InferredFlags.NoRecurse) {
+ LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
+ << V.name() << "\n");
+ ++NumThinLinkNoRecurse;
+ }
+
+ if (InferredFlags.NoUnwind) {
+ LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
+ << V.name() << "\n");
+ ++NumThinLinkNoUnwind;
+ }
+
+ for (auto &S : V.getSummaryList()) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get())) {
+ if (InferredFlags.NoRecurse)
+ FS->setNoRecurse();
+
+ if (InferredFlags.NoUnwind)
+ FS->setNoUnwind();
+ }
+ }
+ }
+ }
+ };
+
+ // Call propagation functions on each SCC in the Index
+ for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd();
+ ++I) {
+ std::vector<ValueInfo> Nodes(*I);
+ PropagateAttributes(Nodes);
+ }
+ return Changed;
}
namespace {
@@ -395,7 +590,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
assert(UseIndex < CB->data_operands_size() &&
"Indirect function calls should have been filtered above!");
- if (UseIndex >= CB->getNumArgOperands()) {
+ if (UseIndex >= CB->arg_size()) {
// Data operand, but not a argument operand -- must be a bundle operand
assert(CB->hasOperandBundles() && "Must be!");
@@ -530,7 +725,7 @@ determinePointerReadAttrs(Argument *A,
assert(UseIndex < CB.data_operands_size() &&
"Data operand use expected!");
- bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands();
+ bool IsOperandBundleUse = UseIndex >= CB.arg_size();
if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
assert(F->isVarArg() && "More params than args in non-varargs call");
@@ -581,9 +776,8 @@ determinePointerReadAttrs(Argument *A,
}
/// Deduce returned attributes for the SCC.
-static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
@@ -623,11 +817,9 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
auto *A = cast<Argument>(RetArg);
A->addAttr(Attribute::Returned);
++NumReturned;
- Changed = true;
+ Changed.insert(F);
}
}
-
- return Changed;
}
/// If a callsite has arguments that are also arguments to the parent function,
@@ -693,9 +885,8 @@ static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
}
/// Deduce nocapture attributes for the SCC.
-static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
ArgumentGraph AG;
// Check each function in turn, determining which pointer arguments are not
@@ -707,7 +898,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!F->hasExactDefinition())
continue;
- Changed |= addArgumentAttrsFromCallsites(*F);
+ if (addArgumentAttrsFromCallsites(*F))
+ Changed.insert(F);
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
@@ -718,7 +910,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(F);
}
}
continue;
@@ -737,7 +929,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
// If it's trivially not captured, mark it nocapture now.
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(F);
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
@@ -761,7 +953,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None)
- Changed = addReadAttr(A, R);
+ if (addReadAttr(A, R))
+ Changed.insert(F);
}
}
}
@@ -785,7 +978,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Argument *A = ArgumentSCC[0]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(A->getParent());
}
continue;
}
@@ -827,7 +1020,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Argument *A = ArgumentSCC[i]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(A->getParent());
}
// We also want to compute readonly/readnone. With a small number of false
@@ -858,12 +1051,11 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ReadAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- Changed = addReadAttr(A, ReadAttr);
+ if (addReadAttr(A, ReadAttr))
+ Changed.insert(A->getParent());
}
}
}
-
- return Changed;
}
/// Tests whether a function is "malloc-like".
@@ -934,7 +1126,8 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
}
/// Deduce noalias attributes for the SCC.
-static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
+static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining which functions return noalias
// pointers.
for (Function *F : SCCNodes) {
@@ -946,7 +1139,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
- return false;
+ return;
// We annotate noalias return values, which are only applicable to
// pointer types.
@@ -954,10 +1147,9 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
continue;
if (!isFunctionMallocLike(F, SCCNodes))
- return false;
+ return;
}
- bool MadeChange = false;
for (Function *F : SCCNodes) {
if (F->returnDoesNotAlias() ||
!F->getReturnType()->isPointerTy())
@@ -965,10 +1157,8 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
F->setReturnDoesNotAlias();
++NumNoAlias;
- MadeChange = true;
+ Changed.insert(F);
}
-
- return MadeChange;
}
/// Tests whether this function is known to not return null.
@@ -1044,26 +1234,24 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
}
/// Deduce nonnull attributes for the SCC.
-static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
+static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Speculative that all functions in the SCC return only nonnull
// pointers. We may refute this as we analyze functions.
bool SCCReturnsNonNull = true;
- bool MadeChange = false;
-
// Check each function in turn, determining which functions return nonnull
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull))
+ if (F->getAttributes().hasRetAttr(Attribute::NonNull))
continue;
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
- return false;
+ return;
// We annotate nonnull return values, which are only applicable to
// pointer types.
@@ -1077,9 +1265,9 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// which prevents us from speculating about the entire SCC
LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
<< " as nonnull\n");
- F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
- MadeChange = true;
+ Changed.insert(F);
}
continue;
}
@@ -1090,19 +1278,16 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull) ||
+ if (F->getAttributes().hasRetAttr(Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
- MadeChange = true;
+ Changed.insert(F);
}
}
-
- return MadeChange;
}
namespace {
@@ -1155,12 +1340,13 @@ public:
InferenceDescriptors.push_back(AttrInference);
}
- bool run(const SCCNodeSet &SCCNodes);
+ void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed);
};
/// Perform all the requested attribute inference actions according to the
/// attribute predicates stored before.
-bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
+void AttributeInferer::run(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
// Go through all the functions in SCC and check corresponding attribute
// assumptions for each of them. Attributes that are invalid for this SCC
@@ -1169,7 +1355,7 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
// No attributes whose assumptions are still valid - done.
if (InferInSCC.empty())
- return false;
+ return;
// Check if our attributes ever need scanning/can be scanned.
llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) {
@@ -1212,9 +1398,8 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
}
if (InferInSCC.empty())
- return false;
+ return;
- bool Changed = false;
for (Function *F : SCCNodes)
// At this point InferInSCC contains only functions that were either:
// - explicitly skipped from scan/inference, or
@@ -1223,10 +1408,9 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
for (auto &ID : InferInSCC) {
if (ID.SkipFunction(*F))
continue;
- Changed = true;
+ Changed.insert(F);
ID.SetAttribute(*F);
}
- return Changed;
}
struct SCCNodesResult {
@@ -1243,7 +1427,7 @@ static bool InstrBreaksNonConvergent(Instruction &I,
// Breaks non-convergent assumption if CS is a convergent call to a function
// not in the SCC.
return CB && CB->isConvergent() &&
- SCCNodes.count(CB->getCalledFunction()) == 0;
+ !SCCNodes.contains(CB->getCalledFunction());
}
/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
@@ -1282,7 +1466,8 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
-static bool inferConvergent(const SCCNodeSet &SCCNodes) {
+static void inferConvergent(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
// Request to remove the convergent attribute from all functions in the SCC
@@ -1305,7 +1490,7 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
},
/* RequiresExactDefinition= */ false});
// Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
/// Infer attributes from all functions in the SCC by scanning every
@@ -1314,7 +1499,8 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
/// - addition of NoUnwind attribute
///
/// Returns true if any changes to function attributes were made.
-static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
+static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
if (!DisableNoUnwindInference)
@@ -1363,19 +1549,20 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
/* RequiresExactDefinition= */ true});
// Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
-static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
+static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
if (SCCNodes.size() != 1)
- return false;
+ return;
Function *F = *SCCNodes.begin();
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
- return false;
+ return;
// If all of the calls in F are identifiable and are to norecurse functions, F
// is norecurse. This check also detects self-recursion as F is not currently
@@ -1386,7 +1573,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
Function *Callee = CB->getCalledFunction();
if (!Callee || Callee == F || !Callee->doesNotRecurse())
// Function calls a potentially recursive function.
- return false;
+ return;
}
// Every call was to a non-recursive function other than this function, and
@@ -1394,7 +1581,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// recurse.
F->setDoesNotRecurse();
++NumNoRecurse;
- return true;
+ Changed.insert(F);
}
static bool instructionDoesNotReturn(Instruction &I) {
@@ -1412,9 +1599,8 @@ static bool basicBlockCanReturn(BasicBlock &BB) {
}
// Set the noreturn function attribute if possible.
-static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
F->doesNotReturn())
@@ -1424,11 +1610,9 @@ static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
// FIXME: this doesn't handle recursion or unreachable blocks.
if (none_of(*F, basicBlockCanReturn)) {
F->setDoesNotReturn();
- Changed = true;
+ Changed.insert(F);
}
}
-
- return Changed;
}
static bool functionWillReturn(const Function &F) {
@@ -1461,19 +1645,16 @@ static bool functionWillReturn(const Function &F) {
}
// Set the willreturn function attribute if possible.
-static bool addWillReturn(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addWillReturn(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || F->willReturn() || !functionWillReturn(*F))
continue;
F->setWillReturn();
NumWillReturn++;
- Changed = true;
+ Changed.insert(F);
}
-
- return Changed;
}
// Return true if this is an atomic which has an ordering stronger than
@@ -1532,7 +1713,8 @@ static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
}
// Infer the nosync attribute.
-static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
+static void addNoSyncAttr(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::NoSync,
@@ -1549,14 +1731,15 @@ static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
++NumNoSync;
},
/* RequiresExactDefinition= */ true});
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
SCCNodesResult Res;
Res.HasUnknownCall = false;
for (Function *F : Functions) {
- if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) ||
+ F->isPresplitCoroutine()) {
// Treat any function we're trying not to optimize as if it were an
// indirect call and omit it from the node set used below.
Res.HasUnknownCall = true;
@@ -1582,32 +1765,33 @@ static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
}
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
- AARGetterT &&AARGetter) {
+static SmallSet<Function *, 8>
+deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
SCCNodesResult Nodes = createSCCNodeSet(Functions);
- bool Changed = false;
// Bail if the SCC only contains optnone functions.
if (Nodes.SCCNodes.empty())
- return Changed;
+ return {};
+
+ SmallSet<Function *, 8> Changed;
- Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes);
- Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(Nodes.SCCNodes);
- Changed |= inferConvergent(Nodes.SCCNodes);
- Changed |= addNoReturnAttrs(Nodes.SCCNodes);
- Changed |= addWillReturn(Nodes.SCCNodes);
+ addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
+ addReadAttrs(Nodes.SCCNodes, AARGetter, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed);
+ inferConvergent(Nodes.SCCNodes, Changed);
+ addNoReturnAttrs(Nodes.SCCNodes, Changed);
+ addWillReturn(Nodes.SCCNodes, Changed);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
if (!Nodes.HasUnknownCall) {
- Changed |= addNoAliasAttrs(Nodes.SCCNodes);
- Changed |= addNonNullAttrs(Nodes.SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes);
- Changed |= addNoRecurseAttrs(Nodes.SCCNodes);
+ addNoAliasAttrs(Nodes.SCCNodes, Changed);
+ addNonNullAttrs(Nodes.SCCNodes, Changed);
+ inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed);
+ addNoRecurseAttrs(Nodes.SCCNodes, Changed);
}
- Changed |= addNoSyncAttr(Nodes.SCCNodes);
+ addNoSyncAttr(Nodes.SCCNodes, Changed);
// Finally, infer the maximal set of attributes from the ones we've inferred
// above. This is handling the cases where one attribute on a signature
@@ -1615,7 +1799,8 @@ static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
// the later is missing (or simply less sophisticated).
for (Function *F : Nodes.SCCNodes)
if (F)
- Changed |= inferAttributesFromOthers(*F);
+ if (inferAttributesFromOthers(*F))
+ Changed.insert(F);
return Changed;
}
@@ -1638,14 +1823,35 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
Functions.push_back(&N.getFunction());
}
- if (deriveAttrsInPostOrder(Functions, AARGetter)) {
- // We have not changed the call graph or removed/added functions.
- PreservedAnalyses PA;
- PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
- return PA;
+ auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter);
+ if (ChangedFunctions.empty())
+ return PreservedAnalyses::all();
+
+ // Invalidate analyses for modified functions so that we don't have to
+ // invalidate all analyses for all functions in this SCC.
+ PreservedAnalyses FuncPA;
+ // We haven't changed the CFG for modified functions.
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (Function *Changed : ChangedFunctions) {
+ FAM.invalidate(*Changed, FuncPA);
+ // Also invalidate any direct callers of changed functions since analyses
+ // may care about attributes of direct callees. For example, MemorySSA cares
+ // about whether or not a call's callee modifies memory and queries that
+ // through function attributes.
+ for (auto *U : Changed->users()) {
+ if (auto *Call = dyn_cast<CallBase>(U)) {
+ if (Call->getCalledFunction() == Changed)
+ FAM.invalidate(*Call->getFunction(), FuncPA);
+ }
+ }
}
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ // We have not added or removed functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We already invalidated all relevant function analyses above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
namespace {
@@ -1690,7 +1896,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
Functions.push_back(I->getFunction());
}
- return deriveAttrsInPostOrder(Functions, AARGetter);
+ return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 2f6cf0ca7087..d9b43109f629 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -187,23 +188,6 @@ selectCallee(const ModuleSummaryIndex &Index,
return false;
}
- // For SamplePGO, in computeImportForFunction the OriginalId
- // may have been used to locate the callee summary list (See
- // comment there).
- // The mapping from OriginalId to GUID may return a GUID
- // that corresponds to a static variable. Filter it out here.
- // This can happen when
- // 1) There is a call to a library function which is not defined
- // in the index.
- // 2) There is a static variable with the OriginalGUID identical
- // to the GUID of the library function in 1);
- // When this happens, the logic for SamplePGO kicks in and
- // the static variable in 2) will be found, which needs to be
- // filtered out.
- if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind) {
- Reason = FunctionImporter::ImportFailureReason::GlobalVar;
- return false;
- }
if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) {
Reason = FunctionImporter::ImportFailureReason::InterposableLinkage;
// There is no point in importing these, we can't inline them
@@ -264,21 +248,6 @@ using EdgeInfo =
} // anonymous namespace
-static ValueInfo
-updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
- if (!VI.getSummaryList().empty())
- return VI;
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- // FIXME: Consider updating the edges in the graph after building
- // it, rather than needing to perform this mapping on each walk.
- auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
- if (GUID == 0)
- return ValueInfo();
- return Index.getValueInfo(GUID);
-}
-
static bool shouldImportGlobal(const ValueInfo &VI,
const GVSummaryMapTy &DefinedGVSummaries) {
const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
@@ -400,10 +369,6 @@ static void computeImportForFunction(
continue;
}
- VI = updateValueInfoForIndirectCalls(Index, VI);
- if (!VI)
- continue;
-
if (DefinedGVSummaries.count(VI.getGUID())) {
// FIXME: Consider not skipping import if the module contains
// a non-prevailing def with interposable linkage. The prevailing copy
@@ -496,7 +461,7 @@ static void computeImportForFunction(
VI.name().str() + " due to " +
getFailureName(Reason);
auto Error = make_error<StringError>(
- Msg, std::make_error_code(std::errc::operation_not_supported));
+ Msg, make_error_code(errc::not_supported));
logAllUnhandledErrors(std::move(Error), errs(),
"Error importing module: ");
break;
@@ -839,16 +804,61 @@ void llvm::ComputeCrossModuleImportForModuleFromIndex(
#endif
}
-void llvm::computeDeadSymbols(
+// For SamplePGO, the indirect call targets for local functions will
+// have its original name annotated in profile. We try to find the
+// corresponding PGOFuncName as the GUID, and fix up the edges
+// accordingly.
+void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
+ FunctionSummary *FS) {
+ for (auto &EI : FS->mutableCalls()) {
+ if (!EI.first.getSummaryList().empty())
+ continue;
+ auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
+ if (GUID == 0)
+ continue;
+ // Update the edge to point directly to the correct GUID.
+ auto VI = Index.getValueInfo(GUID);
+ if (llvm::any_of(
+ VI.getSummaryList(),
+ [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
+ // The mapping from OriginalId to GUID may return a GUID
+ // that corresponds to a static variable. Filter it out here.
+ // This can happen when
+ // 1) There is a call to a library function which is not defined
+ // in the index.
+ // 2) There is a static variable with the OriginalGUID identical
+ // to the GUID of the library function in 1);
+ // When this happens the static variable in 2) will be found,
+ // which needs to be filtered out.
+ return SummaryPtr->getSummaryKind() ==
+ GlobalValueSummary::GlobalVarKind;
+ }))
+ continue;
+ EI.first = VI;
+ }
+}
+
+void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
+ for (const auto &Entry : Index) {
+ for (auto &S : Entry.second.SummaryList) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
+ updateValueInfoForIndirectCalls(Index, FS);
+ }
+ }
+}
+
+void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
assert(!Index.withGlobalValueDeadStripping());
- if (!ComputeDead)
- return;
- if (GUIDPreservedSymbols.empty())
- // Don't do anything when nothing is live, this is friendly with tests.
+ if (!ComputeDead ||
+ // Don't do anything when nothing is live, this is friendly with tests.
+ GUIDPreservedSymbols.empty()) {
+ // Still need to update indirect calls.
+ updateIndirectCalls(Index);
return;
+ }
unsigned LiveSymbols = 0;
SmallVector<ValueInfo, 128> Worklist;
Worklist.reserve(GUIDPreservedSymbols.size() * 2);
@@ -863,13 +873,16 @@ void llvm::computeDeadSymbols(
// Add values flagged in the index as live roots to the worklist.
for (const auto &Entry : Index) {
auto VI = Index.getValueInfo(Entry);
- for (auto &S : Entry.second.SummaryList)
+ for (auto &S : Entry.second.SummaryList) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
+ updateValueInfoForIndirectCalls(Index, FS);
if (S->isLive()) {
LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
Worklist.push_back(VI);
++LiveSymbols;
break;
}
+ }
}
// Make value live and add it to the worklist if it was not live before.
@@ -882,9 +895,6 @@ void llvm::computeDeadSymbols(
// binary, which increases the binary size unnecessarily. Note that
// if this code changes, the importer needs to change so that edges
// to functions marked dead are skipped.
- VI = updateValueInfoForIndirectCalls(Index, VI);
- if (!VI)
- return;
if (llvm::any_of(VI.getSummaryList(),
[](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
@@ -958,7 +968,8 @@ void llvm::computeDeadSymbolsWithConstProp(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
bool ImportEnabled) {
- computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+ computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
+ isPrevailing);
if (ImportEnabled)
Index.propagateAttributes(GUIDPreservedSymbols);
}
@@ -1040,13 +1051,33 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
return true;
}
-void llvm::thinLTOResolvePrevailingInModule(
- Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
- auto updateLinkage = [&](GlobalValue &GV) {
+void llvm::thinLTOFinalizeInModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals,
+ bool PropagateAttrs) {
+ auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
// See if the global summary analysis computed a new resolved linkage.
const auto &GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end())
return;
+
+ if (Propagate)
+ if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ // TODO: propagate ReadNone and ReadOnly.
+ if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
+ F->setDoesNotAccessMemory();
+
+ if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
+ F->setOnlyReadsMemory();
+
+ if (FS->fflags().NoRecurse && !F->doesNotRecurse())
+ F->setDoesNotRecurse();
+
+ if (FS->fflags().NoUnwind && !F->doesNotThrow())
+ F->setDoesNotThrow();
+ }
+ }
+
auto NewLinkage = GS->second->linkage();
if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
// Don't internalize anything here, because the code below
@@ -1105,11 +1136,11 @@ void llvm::thinLTOResolvePrevailingInModule(
// Process functions and global now
for (auto &GV : TheModule)
- updateLinkage(GV);
+ FinalizeInModule(GV, PropagateAttrs);
for (auto &GV : TheModule.globals())
- updateLinkage(GV);
+ FinalizeInModule(GV);
for (auto &GV : TheModule.aliases())
- updateLinkage(GV);
+ FinalizeInModule(GV);
}
/// Run internalization on \p TheModule based on symmary analysis.
@@ -1153,7 +1184,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
/// Make alias a clone of its aliasee.
static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
- Function *Fn = cast<Function>(GA->getBaseObject());
+ Function *Fn = cast<Function>(GA->getAliaseeObject());
ValueToValueMapTy VMap;
Function *NewFn = CloneFunction(Fn, VMap);
@@ -1259,12 +1290,12 @@ Expected<bool> FunctionImporter::importFunctions(
if (Error Err = GA.materialize())
return std::move(Err);
// Import alias as a copy of its aliasee.
- GlobalObject *Base = GA.getBaseObject();
- if (Error Err = Base->materialize())
+ GlobalObject *GO = GA.getAliaseeObject();
+ if (Error Err = GO->materialize())
return std::move(Err);
auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
- LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << Base->getGUID()
- << " " << Base->getName() << " from "
+ LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
+ << GO->getName() << " from "
<< SrcModule->getSourceFileName() << "\n");
if (EnableImportMetadata) {
// Add 'thinlto_src_module' metadata for statistics and debugging.
@@ -1303,7 +1334,7 @@ Expected<bool> FunctionImporter::importFunctions(
std::move(SrcModule), GlobalsToImport.getArrayRef(),
[](GlobalValue &, IRMover::ValueAdder) {},
/*IsPerformingImport=*/true))
- report_fatal_error("Function Import: link error: " +
+ report_fatal_error(Twine("Function Import: link error: ") +
toString(std::move(Err)));
ImportedCount += GlobalsToImport.size();
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index f61f4312b777..fbd083bb9bbf 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -11,7 +11,6 @@
// are propagated to the callee by specializing the function.
//
// Current limitations:
-// - It does not handle specialization of recursive functions,
// - It does not yet handle integer ranges.
// - Only 1 argument per function is specialised,
// - The cost-model could be further looked into,
@@ -22,6 +21,18 @@
// a direct way to steer function specialization, avoiding the cost-model,
// and thus control compile-times / code-size.
//
+// Todos:
+// - Specializing recursive functions relies on running the transformation a
+// number of times, which is controlled by option
+// `func-specialization-max-iters`. Thus, increasing this value and the
+// number of iterations, will linearly increase the number of times recursive
+// functions get specialized, see also the discussion in
+// https://reviews.llvm.org/D106426 for details. Perhaps there is a
+// compile-time friendlier way to control/limit the number of specialisations
+// for recursive functions.
+// - Don't transform the function if there is no function specialization
+// happens.
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
@@ -59,20 +70,166 @@ static cl::opt<unsigned> MaxConstantsThreshold(
"specialization"),
cl::init(3));
+static cl::opt<unsigned> SmallFunctionThreshold(
+ "func-specialization-size-threshold", cl::Hidden,
+ cl::desc("Don't specialize functions that have less than this theshold "
+ "number of instructions"),
+ cl::init(100));
+
static cl::opt<unsigned>
AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
cl::desc("Average loop iteration count cost"),
cl::init(10));
+static cl::opt<bool> SpecializeOnAddresses(
+ "func-specialization-on-address", cl::init(false), cl::Hidden,
+ cl::desc("Enable function specialization on the address of global values"));
+
+// TODO: This needs checking to see the impact on compile-times, which is why
+// this is off by default for now.
static cl::opt<bool> EnableSpecializationForLiteralConstant(
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
- cl::desc("Make function specialization available for literal constant."));
+ cl::desc("Enable specialization of functions that take a literal constant "
+ "as an argument."));
+
+// Helper to check if \p LV is either a constant or a constant
+// range with a single element. This should cover exactly the same cases as the
+// old ValueLatticeElement::isConstant() and is intended to be used in the
+// transition to ValueLatticeElement.
+static bool isConstant(const ValueLatticeElement &LV) {
+ return LV.isConstant() ||
+ (LV.isConstantRange() && LV.getConstantRange().isSingleElement());
+}
// Helper to check if \p LV is either overdefined or a constant int.
static bool isOverdefined(const ValueLatticeElement &LV) {
- return !LV.isUnknownOrUndef() && !LV.isConstant();
+ return !LV.isUnknownOrUndef() && !isConstant(LV);
+}
+
+static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) {
+ Value *StoreValue = nullptr;
+ for (auto *User : Alloca->users()) {
+ // We can't use llvm::isAllocaPromotable() as that would fail because of
+ // the usage in the CallInst, which is what we check here.
+ if (User == Call)
+ continue;
+ if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
+ if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
+ return nullptr;
+ continue;
+ }
+
+ if (auto *Store = dyn_cast<StoreInst>(User)) {
+ // This is a duplicate store, bail out.
+ if (StoreValue || Store->isVolatile())
+ return nullptr;
+ StoreValue = Store->getValueOperand();
+ continue;
+ }
+ // Bail if there is any other unknown usage.
+ return nullptr;
+ }
+ return dyn_cast_or_null<Constant>(StoreValue);
}
+// A constant stack value is an AllocaInst that has a single constant
+// value stored to it. Return this constant if such an alloca stack value
+// is a function argument.
+static Constant *getConstantStackValue(CallInst *Call, Value *Val,
+ SCCPSolver &Solver) {
+ if (!Val)
+ return nullptr;
+ Val = Val->stripPointerCasts();
+ if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
+ return ConstVal;
+ auto *Alloca = dyn_cast<AllocaInst>(Val);
+ if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
+ return nullptr;
+ return getPromotableAlloca(Alloca, Call);
+}
+
+// To support specializing recursive functions, it is important to propagate
+// constant arguments because after a first iteration of specialisation, a
+// reduced example may look like this:
+//
+// define internal void @RecursiveFn(i32* arg1) {
+// %temp = alloca i32, align 4
+// store i32 2 i32* %temp, align 4
+// call void @RecursiveFn.1(i32* nonnull %temp)
+// ret void
+// }
+//
+// Before a next iteration, we need to propagate the constant like so
+// which allows further specialization in next iterations.
+//
+// @funcspec.arg = internal constant i32 2
+//
+// define internal void @someFunc(i32* arg1) {
+// call void @otherFunc(i32* nonnull @funcspec.arg)
+// ret void
+// }
+//
+static void constantArgPropagation(SmallVectorImpl<Function *> &WorkList,
+ Module &M, SCCPSolver &Solver) {
+ // Iterate over the argument tracked functions see if there
+ // are any new constant values for the call instruction via
+ // stack variables.
+ for (auto *F : WorkList) {
+ // TODO: Generalize for any read only arguments.
+ if (F->arg_size() != 1)
+ continue;
+
+ auto &Arg = *F->arg_begin();
+ if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy())
+ continue;
+
+ for (auto *User : F->users()) {
+ auto *Call = dyn_cast<CallInst>(User);
+ if (!Call)
+ break;
+ auto *ArgOp = Call->getArgOperand(0);
+ auto *ArgOpType = ArgOp->getType();
+ auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
+ if (!ConstVal)
+ break;
+
+ Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
+ GlobalValue::InternalLinkage, ConstVal,
+ "funcspec.arg");
+
+ if (ArgOpType != ConstVal->getType())
+ GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType());
+
+ Call->setArgOperand(0, GV);
+
+ // Add the changed CallInst to Solver Worklist
+ Solver.visitCall(*Call);
+ }
+ }
+}
+
+// ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
+// interfere with the constantArgPropagation optimization.
+static void removeSSACopy(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!II)
+ continue;
+ if (II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+ }
+}
+
+static void removeSSACopy(Module &M) {
+ for (Function &F : M)
+ removeSSACopy(F);
+}
+
+namespace {
class FunctionSpecializer {
/// The IPSCCP Solver.
@@ -115,9 +272,14 @@ public:
for (auto *SpecializedFunc : CurrentSpecializations) {
SpecializedFuncs.insert(SpecializedFunc);
- // TODO: If we want to support specializing specialized functions,
- // initialize here the state of the newly created functions, marking
- // them argument-tracked and executable.
+ // Initialize the state of the newly created functions, marking them
+ // argument-tracked and executable.
+ if (SpecializedFunc->hasExactDefinition() &&
+ !SpecializedFunc->hasFnAttribute(Attribute::Naked))
+ Solver.addTrackedFunction(SpecializedFunc);
+ Solver.addArgumentTrackedFunction(SpecializedFunc);
+ FuncDecls.push_back(SpecializedFunc);
+ Solver.markBlockExecutable(&SpecializedFunc->front());
// Replace the function arguments for the specialized functions.
for (Argument &Arg : SpecializedFunc->args())
@@ -138,12 +300,22 @@ public:
const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
if (isOverdefined(IV))
return false;
- auto *Const = IV.isConstant() ? Solver.getConstant(IV)
- : UndefValue::get(V->getType());
+ auto *Const =
+ isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
V->replaceAllUsesWith(Const);
- // TODO: Update the solver here if we want to specialize specialized
- // functions.
+ for (auto *U : Const->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ if (Solver.isBlockExecutable(I->getParent()))
+ Solver.visit(I);
+
+ // Remove the instruction from Block and Solver.
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I->isSafeToRemove()) {
+ I->eraseFromParent();
+ Solver.removeLatticeValueFor(I);
+ }
+ }
return true;
}
@@ -152,6 +324,15 @@ private:
// also in the cost model.
unsigned NbFunctionsSpecialized = 0;
+ /// Clone the function \p F and remove the ssa_copy intrinsics added by
+ /// the SCCPSolver in the cloned version.
+ Function *cloneCandidateFunction(Function *F) {
+ ValueToValueMapTy EmptyMap;
+ Function *Clone = CloneFunction(F, EmptyMap);
+ removeSSACopy(*Clone);
+ return Clone;
+ }
+
/// This function decides whether to specialize function \p F based on the
/// known constant values its arguments can take on. Specialization is
/// performed on the first interesting argument. Specializations based on
@@ -162,9 +343,8 @@ private:
SmallVectorImpl<Function *> &Specializations) {
// Do not specialize the cloned function again.
- if (SpecializedFuncs.contains(F)) {
+ if (SpecializedFuncs.contains(F))
return false;
- }
// If we're optimizing the function for size, we shouldn't specialize it.
if (F->hasOptSize() ||
@@ -176,8 +356,25 @@ private:
if (!Solver.isBlockExecutable(&F->getEntryBlock()))
return false;
+ // It wastes time to specialize a function which would get inlined finally.
+ if (F->hasFnAttribute(Attribute::AlwaysInline))
+ return false;
+
LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
<< "\n");
+
+ // Determine if it would be profitable to create a specialization of the
+ // function where the argument takes on the given constant value. If so,
+ // add the constant to Constants.
+ auto FnSpecCost = getSpecializationCost(F);
+ if (!FnSpecCost.isValid()) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
+ FnSpecCost.print(dbgs()); dbgs() << "\n");
+
// Determine if we should specialize the function based on the values the
// argument can take on. If specialization is not profitable, we continue
// on to the next argument.
@@ -195,7 +392,7 @@ private:
// be set to false by isArgumentInteresting (that function only adds
// values to the Constants list that are deemed profitable).
SmallVector<Constant *, 4> Constants;
- if (!isArgumentInteresting(&A, Constants, IsPartial)) {
+ if (!isArgumentInteresting(&A, Constants, FnSpecCost, IsPartial)) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
continue;
}
@@ -214,8 +411,7 @@ private:
for (auto *C : Constants) {
// Clone the function. We leave the ValueToValueMap empty to allow
// IPSCCP to propagate the constant arguments.
- ValueToValueMapTy EmptyMap;
- Function *Clone = CloneFunction(F, EmptyMap);
+ Function *Clone = cloneCandidateFunction(F);
Argument *ClonedArg = Clone->arg_begin() + A.getArgNo();
// Rewrite calls to the function so that they call the clone instead.
@@ -231,9 +427,10 @@ private:
NbFunctionsSpecialized++;
}
- // TODO: if we want to support specialize specialized functions, and if
- // the function has been completely specialized, the original function is
- // no longer needed, so we would need to mark it unreachable here.
+ // If the function has been completely specialized, the original function
+ // is no longer needed. Mark it unreachable.
+ if (!IsPartial)
+ Solver.markFunctionUnreachable(F);
// FIXME: Only one argument per function.
return true;
@@ -253,7 +450,11 @@ private:
// If the code metrics reveal that we shouldn't duplicate the function, we
// shouldn't specialize it. Set the specialization cost to Invalid.
- if (Metrics.notDuplicatable) {
+ // Or if the lines of codes implies that this function is easy to get
+ // inlined so that we shouldn't specialize it.
+ if (Metrics.notDuplicatable ||
+ (!ForceFunctionSpecialization &&
+ Metrics.NumInsts < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
@@ -379,9 +580,8 @@ private:
/// argument.
bool isArgumentInteresting(Argument *A,
SmallVectorImpl<Constant *> &Constants,
+ const InstructionCost &FnSpecCost,
bool &IsPartial) {
- Function *F = A->getParent();
-
// For now, don't attempt to specialize functions based on the values of
// composite types.
if (!A->getType()->isSingleValueType() || A->user_empty())
@@ -420,18 +620,6 @@ private:
return false;
}
- // Determine if it would be profitable to create a specialization of the
- // function where the argument takes on the given constant value. If so,
- // add the constant to Constants.
- auto FnSpecCost = getSpecializationCost(F);
- if (!FnSpecCost.isValid()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
- FnSpecCost.print(dbgs()); dbgs() << "\n");
-
for (auto *C : PossibleConstants) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Constant: " << *C << "\n");
if (ForceFunctionSpecialization) {
@@ -475,6 +663,12 @@ private:
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
auto &CS = *cast<CallBase>(U);
+ // If the call site has attribute minsize set, that callsite won't be
+ // specialized.
+ if (CS.hasFnAttr(Attribute::MinSize)) {
+ AllConstant = false;
+ continue;
+ }
// If the parent of the call site will never be executed, we don't need
// to worry about the passed value.
@@ -482,11 +676,25 @@ private:
continue;
auto *V = CS.getArgOperand(A->getArgNo());
+ if (isa<PoisonValue>(V))
+ return false;
+
+ // For now, constant expressions are fine but only if they are function
+ // calls.
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (!isa<Function>(CE->getOperand(0)))
+ return false;
+
// TrackValueOfGlobalVariable only tracks scalar global variables.
if (auto *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->getValueType()->isSingleValueType()) {
+ // Check if we want to specialize on the address of non-constant
+ // global values.
+ if (!GV->isConstant())
+ if (!SpecializeOnAddresses)
+ return false;
+
+ if (!GV->getValueType()->isSingleValueType())
return false;
- }
}
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
@@ -506,6 +714,9 @@ private:
/// This function modifies calls to function \p F whose argument at index \p
/// ArgNo is equal to constant \p C. The calls are rewritten to call function
/// \p Clone instead.
+ ///
+ /// Callsites that have been marked with the MinSize function attribute won't
+ /// be specialized and rewritten.
void rewriteCallSites(Function *F, Function *Clone, Argument &Arg,
Constant *C) {
unsigned ArgNo = Arg.getArgNo();
@@ -527,24 +738,7 @@ private:
}
}
};
-
-/// Function to clean up the left over intrinsics from SCCP util.
-static void cleanup(Module &M) {
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
- Instruction *Inst = &*BI++;
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
- if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
- Value *Op = II->getOperand(0);
- Inst->replaceAllUsesWith(Op);
- Inst->eraseFromParent();
- }
- }
- }
- }
- }
-}
+} // namespace
bool llvm::runFunctionSpecialization(
Module &M, const DataLayout &DL,
@@ -597,12 +791,27 @@ bool llvm::runFunctionSpecialization(
Solver.trackValueOfGlobalVariable(&G);
}
+ auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
+ SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
+ TrackedFuncs.end());
+
+ // No tracked functions, so nothing to do: don't run the solver and remove
+ // the ssa_copy intrinsics that may have been introduced.
+ if (TrackedFuncs.empty()) {
+ removeSSACopy(M);
+ return false;
+ }
+
// Solve for constants.
auto RunSCCPSolver = [&](auto &WorkList) {
bool ResolvedUndefs = true;
while (ResolvedUndefs) {
+ // Not running the solver unnecessary is checked in regression test
+ // nothing-to-do.ll, so if this debug message is changed, this regression
+ // test needs updating too.
LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n");
+
Solver.solve();
LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n");
ResolvedUndefs = false;
@@ -615,15 +824,14 @@ bool llvm::runFunctionSpecialization(
for (BasicBlock &BB : *F) {
if (!Solver.isBlockExecutable(&BB))
continue;
+ // FIXME: The solver may make changes to the function here, so set
+ // Changed, even if later function specialization does not trigger.
for (auto &I : make_early_inc_range(BB))
- FS.tryToReplaceWithConstant(&I);
+ Changed |= FS.tryToReplaceWithConstant(&I);
}
}
};
- auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
- SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
- TrackedFuncs.end());
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n");
for (auto *F : FuncDecls)
@@ -637,14 +845,18 @@ bool llvm::runFunctionSpecialization(
unsigned I = 0;
while (FuncSpecializationMaxIters != I++ &&
FS.specializeFunctions(FuncDecls, CurrentSpecializations)) {
- // TODO: run the solver here for the specialized functions only if we want
- // to specialize recursively.
+
+ // Run the solver for the specialized functions.
+ RunSCCPSolver(CurrentSpecializations);
+
+ // Replace some unresolved constant arguments.
+ constantArgPropagation(FuncDecls, M, Solver);
CurrentSpecializations.clear();
Changed = true;
}
// Clean up the IR by removing ssa_copy intrinsics.
- cleanup(M);
+ removeSSACopy(M);
return Changed;
}
diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index fb4cb23b837e..5e5d2086adc2 100644
--- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -88,7 +88,7 @@ ModulePass *llvm::createGlobalDCEPass() {
static bool isEmptyFunction(Function *F) {
BasicBlock &Entry = F->getEntryBlock();
for (auto &I : Entry) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
if (auto *RI = dyn_cast<ReturnInst>(&I))
return !RI->getReturnValue();
@@ -210,7 +210,7 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
Constant *Ptr =
getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset,
- *Caller->getParent());
+ *Caller->getParent(), VTable);
if (!Ptr) {
LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
VFESafeVTables.erase(VTable);
@@ -416,6 +416,16 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// virtual function pointers with null, allowing us to remove the
// function itself.
++NumVFuncs;
+
+ // Detect vfuncs that are referenced as "relative pointers" which are used
+ // in Swift vtables, i.e. entries in the form of:
+ //
+ // i32 trunc (i64 sub (i64 ptrtoint @f, i64 ptrtoint ...)) to i32)
+ //
+ // In this case, replace the whole "sub" expression with constant 0 to
+ // avoid leaving a weird sub(0, symbol) expression behind.
+ replaceRelativePointerUsersWithZero(F);
+
F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType()));
}
EraseUnusedGlobalValue(F);
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 8750eb9ecc4e..b2c2efed7db8 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -208,9 +208,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
// Constants can't be pointers to dynamically allocated memory.
- for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
- UI != E;) {
- User *U = *UI++;
+ for (User *U : llvm::make_early_inc_range(GV->users())) {
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getValueOperand();
if (isa<Constant>(V)) {
@@ -703,8 +701,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
!ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
isa<LoadInst>(U->getOperand(0)) &&
isa<ConstantPointerNull>(U->getOperand(1))) {
- assert(isa<GlobalValue>(
- cast<LoadInst>(U->getOperand(0))->getPointerOperand()) &&
+ assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
+ ->getPointerOperand()
+ ->stripPointerCasts()) &&
"Should be GlobalVariable");
// This and only this kind of non-signed ICmpInst is to be replaced with
// the comparing of the value of the created global init bool later in
@@ -720,22 +719,55 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
/// Return true if all uses of any loads from GV will trap if the loaded value
/// is null. Note that this also permits comparisons of the loaded value
/// against null, as a special case.
-static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
- for (const User *U : GV->users())
- if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
- SmallPtrSet<const PHINode*, 8> PHIs;
- if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(GV);
+ while (!Worklist.empty()) {
+ const Value *P = Worklist.pop_back_val();
+ for (auto *U : P->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode *, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (auto *SI = dyn_cast<StoreInst>(U)) {
+ // Ignore stores to the global.
+ if (SI->getPointerOperand() != P)
+ return false;
+ } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->stripPointerCasts() != GV)
+ return false;
+ // Check further the ConstantExpr.
+ Worklist.push_back(CE);
+ } else {
+ // We don't know or understand this user, bail out.
return false;
- } else if (isa<StoreInst>(U)) {
- // Ignore stores to the global.
- } else {
- // We don't know or understand this user, bail out.
- //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
- return false;
+ }
}
+ }
+
return true;
}
+/// Get all the loads/store uses for global variable \p GV.
+static void allUsesOfLoadAndStores(GlobalVariable *GV,
+ SmallVector<Value *, 4> &Uses) {
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(GV);
+ while (!Worklist.empty()) {
+ auto *P = Worklist.pop_back_val();
+ for (auto *U : P->users()) {
+ if (auto *CE = dyn_cast<ConstantExpr>(U)) {
+ Worklist.push_back(CE);
+ continue;
+ }
+
+ assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
+ "Expect only load or store instructions");
+ Uses.push_back(U);
+ }
+ }
+}
+
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
@@ -817,8 +849,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(
bool AllNonStoreUsesGone = true;
// Replace all uses of loads with uses of uses of the stored value.
- for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
- User *GlobalUser = *GUI++;
+ for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
// If we were able to delete all uses of the loads
@@ -934,9 +965,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
}
}
- Constant *RepValue = NewGV;
- if (NewGV->getType() != GV->getValueType())
- RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
+ SmallPtrSet<Constant *, 1> RepValues;
+ RepValues.insert(NewGV);
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
@@ -947,9 +977,11 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
GV->getName()+".init", GV->getThreadLocalMode());
bool InitBoolUsed = false;
- // Loop over all uses of GV, processing them in turn.
- while (!GV->use_empty()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
+ // Loop over all instruction uses of GV, processing them in turn.
+ SmallVector<Value *, 4> Guses;
+ allUsesOfLoadAndStores(GV, Guses);
+ for (auto *U : Guses) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// The global is initialized when the store to it occurs. If the stored
// value is null value, the global bool is set to false, otherwise true.
new StoreInst(ConstantInt::getBool(
@@ -961,12 +993,14 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
continue;
}
- LoadInst *LI = cast<LoadInst>(GV->user_back());
+ LoadInst *LI = cast<LoadInst>(U);
while (!LI->use_empty()) {
Use &LoadUse = *LI->use_begin();
ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
if (!ICI) {
- LoadUse = RepValue;
+ auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
+ RepValues.insert(CE);
+ LoadUse.set(CE);
continue;
}
@@ -1012,40 +1046,53 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, DL, TLI);
- if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, DL, TLI);
+ for (auto *CE : RepValues)
+ ConstantPropUsersOf(CE, DL, TLI);
return NewGV;
}
-/// Scan the use-list of V checking to make sure that there are no complex uses
-/// of V. We permit simple things like dereferencing the pointer, but not
+/// Scan the use-list of GV checking to make sure that there are no complex uses
+/// of GV. We permit simple things like dereferencing the pointer, but not
/// storing through the address, unless it is to the specified global.
static bool
-valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
const GlobalVariable *GV) {
- for (const User *U : V->users()) {
- const Instruction *Inst = cast<Instruction>(U);
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(CI);
- if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
- continue; // Fine, ignore.
- }
+ while (!Worklist.empty()) {
+ const Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
- return false; // Storing the pointer itself... bad.
- continue; // Otherwise, storing through it, or storing into GV... fine.
- }
+ for (const Use &VUse : V->uses()) {
+ const User *U = VUse.getUser();
+ if (isa<LoadInst>(U) || isa<CmpInst>(U))
+ continue; // Fine, ignore.
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
- if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV))
- return false;
- continue;
- }
+ if (auto *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getValueOperand() == V &&
+ SI->getPointerOperand()->stripPointerCasts() != GV)
+ return false; // Storing the pointer not into GV... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
- return false;
+ if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+ Worklist.push_back(BCI);
+ continue;
+ }
+
+ if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ Worklist.push_back(GEPI);
+ continue;
+ }
+
+ return false;
+ }
}
+
return true;
}
@@ -1066,12 +1113,12 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// been reached). To do this, we check to see if all uses of the global
// would trap if the global were null: this proves that they must all
// happen after the malloc.
- if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ if (!allUsesOfLoadedValueWillTrapIfNull(GV))
return false;
// We can't optimize this if the malloc itself is used in a complex way,
// for example, being stored into multiple globals. This allows the
- // malloc to be stored into the specified global, loaded icmp'd.
+ // malloc to be stored into the specified global, loaded, gep, icmp'd.
// These are all things we could transform to using the global for.
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
return false;
@@ -1112,6 +1159,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// value was null.
if (GV->getInitializer()->getType()->isPointerTy() &&
GV->getInitializer()->isNullValue() &&
+ StoredOnceVal->getType()->isPointerTy() &&
!NullPointerIsDefined(
nullptr /* F */,
GV->getInitializer()->getType()->getPointerAddressSpace())) {
@@ -1442,8 +1490,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
append_range(UUsers, U->users());
for (auto *UU : UUsers) {
Instruction *UI = cast<Instruction>(UU);
- Instruction *NewU = U->getAsInstruction();
- NewU->insertBefore(UI);
+ Instruction *NewU = U->getAsInstruction(UI);
UI->replaceUsesOfWith(U, NewU);
}
// We've replaced all the uses, so destroy the constant. (destroyConstant
@@ -1456,6 +1503,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
/// it if possible. If we make a change, return true.
static bool
processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
@@ -1554,43 +1602,57 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
if (SRAGlobal(GV, DL))
return true;
}
- if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+ Value *StoredOnceValue = GS.getStoredOnceValue();
+ if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
+ // Avoid speculating constant expressions that might trap (div/rem).
+ auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
+ if (SOVConstant && SOVConstant->canTrap())
+ return Changed;
+
+ Function &StoreFn =
+ const_cast<Function &>(*GS.StoredOnceStore->getFunction());
+ bool CanHaveNonUndefGlobalInitializer =
+ GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
+ GV->getType()->getAddressSpace());
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
// global. This allows us to mark it constant.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (isa<UndefValue>(GV->getInitializer())) {
- // Change the initial value here.
- GV->setInitializer(SOVConstant);
-
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
-
- if (GV->use_empty()) {
- LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
- }
- ++NumSubstitute;
- return true;
+ // This is restricted to address spaces that allow globals to have
+ // initializers. NVPTX, for example, does not support initializers for
+ // shared memory (AS 3).
+ if (SOVConstant && SOVConstant->getType() == GV->getValueType() &&
+ isa<UndefValue>(GV->getInitializer()) &&
+ CanHaveNonUndefGlobalInitializer) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+
+ if (GV->use_empty()) {
+ LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
}
+ ++NumSubstitute;
+ return true;
+ }
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
- GetTLI))
+ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
- // boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
- if (GS.Ordering == AtomicOrdering::NotAtomic) {
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
- }
+ // boolean. Skip this optimization for AS that doesn't allow an initializer.
+ if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
+ (!isa<UndefValue>(GV->getInitializer()) ||
+ CanHaveNonUndefGlobalInitializer)) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
}
}
}
@@ -1602,6 +1664,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
/// make a change, return true.
static bool
processGlobal(GlobalValue &GV,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
if (GV.getName().startswith("llvm."))
@@ -1634,7 +1697,8 @@ processGlobal(GlobalValue &GV,
if (GVar->isConstant() || !GVar->hasInitializer())
return Changed;
- return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
+ return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
+ Changed;
}
/// Walk all of the direct calls of the specified function, changing them to
@@ -1651,7 +1715,7 @@ static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
Attribute::AttrKind A) {
unsigned AttrIndex;
if (Attrs.hasAttrSomewhere(A, &AttrIndex))
- return Attrs.removeAttribute(C, AttrIndex, A);
+ return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
return Attrs;
}
@@ -1864,10 +1928,8 @@ static void RemovePreallocated(Function *F) {
Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
if (!AllocaReplacement) {
auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
- auto *ArgType = UseCall
- ->getAttribute(AttributeList::FunctionIndex,
- Attribute::Preallocated)
- .getValueAsType();
+ auto *ArgType =
+ UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
Builder.SetInsertPoint(InsertBefore);
auto *Alloca =
@@ -1897,26 +1959,22 @@ OptimizeFunctions(Module &M,
bool Changed = false;
std::vector<Function *> AllCallsCold;
- for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
- Function *F = &*FI++;
- if (hasOnlyColdCalls(*F, GetBFI))
- AllCallsCold.push_back(F);
- }
+ for (Function &F : llvm::make_early_inc_range(M))
+ if (hasOnlyColdCalls(F, GetBFI))
+ AllCallsCold.push_back(&F);
// Optimize functions.
- for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
- Function *F = &*FI++;
-
+ for (Function &F : llvm::make_early_inc_range(M)) {
// Don't perform global opt pass on naked functions; we don't want fast
// calling conventions for naked functions.
- if (F->hasFnAttribute(Attribute::Naked))
+ if (F.hasFnAttribute(Attribute::Naked))
continue;
// Functions without names cannot be referenced outside this module.
- if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
- F->setLinkage(GlobalValue::InternalLinkage);
+ if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
+ F.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*F, NotDiscardableComdats)) {
+ if (deleteIfDead(F, NotDiscardableComdats)) {
Changed = true;
continue;
}
@@ -1931,17 +1989,17 @@ OptimizeFunctions(Module &M,
// some more complicated logic to break these cycles.
// Removing unreachable blocks might invalidate the dominator so we
// recalculate it.
- if (!F->isDeclaration()) {
- if (removeUnreachableBlocks(*F)) {
- auto &DT = LookupDomTree(*F);
- DT.recalculate(*F);
+ if (!F.isDeclaration()) {
+ if (removeUnreachableBlocks(F)) {
+ auto &DT = LookupDomTree(F);
+ DT.recalculate(F);
Changed = true;
}
}
- Changed |= processGlobal(*F, GetTLI, LookupDomTree);
+ Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
- if (!F->hasLocalLinkage())
+ if (!F.hasLocalLinkage())
continue;
// If we have an inalloca parameter that we can safely remove the
@@ -1949,56 +2007,55 @@ OptimizeFunctions(Module &M,
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
- if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
- !F->hasAddressTaken() && !hasMustTailCallers(F)) {
- RemoveAttribute(F, Attribute::InAlloca);
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+ !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
+ RemoveAttribute(&F, Attribute::InAlloca);
Changed = true;
}
// FIXME: handle invokes
// FIXME: handle musttail
- if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
- if (!F->hasAddressTaken() && !hasMustTailCallers(F) &&
- !hasInvokeCallers(F)) {
- RemovePreallocated(F);
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
+ if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
+ !hasInvokeCallers(&F)) {
+ RemovePreallocated(&F);
Changed = true;
}
continue;
}
- if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
+ if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
NumInternalFunc++;
- TargetTransformInfo &TTI = GetTTI(*F);
+ TargetTransformInfo &TTI = GetTTI(F);
// Change the calling convention to coldcc if either stress testing is
// enabled or the target would like to use coldcc on functions which are
// cold at all call sites and the callers contain no other non coldcc
// calls.
if (EnableColdCCStressTest ||
- (TTI.useColdCCForColdCall(*F) &&
- isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
- F->setCallingConv(CallingConv::Cold);
- changeCallSitesToColdCC(F);
+ (TTI.useColdCCForColdCall(F) &&
+ isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
+ F.setCallingConv(CallingConv::Cold);
+ changeCallSitesToColdCC(&F);
Changed = true;
NumColdCC++;
}
}
- if (hasChangeableCC(F) && !F->isVarArg() &&
- !F->hasAddressTaken()) {
+ if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
// Fast calling convention.
- F->setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(F);
+ F.setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(&F);
++NumFastCallFns;
Changed = true;
}
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
- !F->hasAddressTaken()) {
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F.hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
- RemoveAttribute(F, Attribute::Nest);
+ RemoveAttribute(&F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
@@ -2008,35 +2065,34 @@ OptimizeFunctions(Module &M,
static bool
OptimizeGlobalVars(Module &M,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global variables without names cannot be referenced outside this module.
- if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
- GV->setLinkage(GlobalValue::InternalLinkage);
+ if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
+ GV.setLinkage(GlobalValue::InternalLinkage);
// Simplify the initializer.
- if (GV->hasInitializer())
- if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
+ if (GV.hasInitializer())
+ if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
auto &DL = M.getDataLayout();
// TLI is not used in the case of a Constant, so use default nullptr
// for that optional parameter, since we don't have a Function to
// provide GetTLI anyway.
Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
if (New != C)
- GV->setInitializer(New);
+ GV.setInitializer(New);
}
- if (deleteIfDead(*GV, NotDiscardableComdats)) {
+ if (deleteIfDead(GV, NotDiscardableComdats)) {
Changed = true;
continue;
}
- Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
+ Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
}
@@ -2425,24 +2481,21 @@ OptimizeGlobalAliases(Module &M,
for (GlobalValue *GV : Used.used())
Used.compilerUsedErase(GV);
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E;) {
- GlobalAlias *J = &*I++;
-
+ for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
// Aliases without names cannot be referenced outside this module.
- if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
- J->setLinkage(GlobalValue::InternalLinkage);
+ if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
+ J.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*J, NotDiscardableComdats)) {
+ if (deleteIfDead(J, NotDiscardableComdats)) {
Changed = true;
continue;
}
// If the alias can change at link time, nothing can be done - bail out.
- if (J->isInterposable())
+ if (J.isInterposable())
continue;
- Constant *Aliasee = J->getAliasee();
+ Constant *Aliasee = J.getAliasee();
GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
// We can't trivially replace the alias with the aliasee if the aliasee is
// non-trivial in some way. We also can't replace the alias with the aliasee
@@ -2455,31 +2508,31 @@ OptimizeGlobalAliases(Module &M,
// Make all users of the alias use the aliasee instead.
bool RenameTarget;
- if (!hasUsesToReplace(*J, Used, RenameTarget))
+ if (!hasUsesToReplace(J, Used, RenameTarget))
continue;
- J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
+ J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
++NumAliasesResolved;
Changed = true;
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(&*J);
- Target->setLinkage(J->getLinkage());
- Target->setDSOLocal(J->isDSOLocal());
- Target->setVisibility(J->getVisibility());
- Target->setDLLStorageClass(J->getDLLStorageClass());
+ Target->takeName(&J);
+ Target->setLinkage(J.getLinkage());
+ Target->setDSOLocal(J.isDSOLocal());
+ Target->setVisibility(J.getVisibility());
+ Target->setDLLStorageClass(J.getDLLStorageClass());
- if (Used.usedErase(&*J))
+ if (Used.usedErase(&J))
Used.usedInsert(Target);
- if (Used.compilerUsedErase(&*J))
+ if (Used.compilerUsedErase(&J))
Used.compilerUsedInsert(Target);
- } else if (mayHaveOtherReferences(*J, Used))
+ } else if (mayHaveOtherReferences(J, Used))
continue;
// Delete the alias.
- M.getAliasList().erase(J);
+ M.getAliasList().erase(&J);
++NumAliasesRemoved;
Changed = true;
}
@@ -2526,7 +2579,7 @@ static bool cxxDtorIsEmpty(const Function &Fn) {
return false;
for (auto &I : Fn.getEntryBlock()) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
if (isa<ReturnInst>(I))
return true;
@@ -2552,12 +2605,11 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
// and remove them.
bool Changed = false;
- for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
- I != E;) {
+ for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
// We're only interested in calls. Theoretically, we could handle invoke
// instructions as well, but neither llvm-gcc nor clang generate invokes
// to __cxa_atexit.
- CallInst *CI = dyn_cast<CallInst>(*I++);
+ CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
@@ -2614,8 +2666,8 @@ static bool optimizeGlobalsInModule(
});
// Optimize non-address-taken globals.
- LocalChange |=
- OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
+ LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
+ NotDiscardableComdats);
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index 365b269dc3bf..e7d698c42fcf 100644
--- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -154,11 +154,8 @@ static bool splitGlobals(Module &M) {
return false;
bool Changed = false;
- for (auto I = M.global_begin(); I != M.global_end();) {
- GlobalVariable &GV = *I;
- ++I;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
Changed |= splitGlobal(GV);
- }
return Changed;
}
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index adf9ffba5780..b8a314c54f18 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
@@ -33,6 +34,10 @@
using namespace llvm;
using namespace IRSimilarity;
+// A command flag to be used for debugging to exclude branches from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableBranches;
+
// Set to true if the user wants the ir outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
// functions. Since the outliner is confined to a single module (modulo LTO),
@@ -71,8 +76,12 @@ struct OutlinableGroup {
/// for extraction.
bool IgnoreGroup = false;
- /// The return block for the overall function.
- BasicBlock *EndBB = nullptr;
+ /// The return blocks for the overall function.
+ DenseMap<Value *, BasicBlock *> EndBBs;
+
+ /// The PHIBlocks with their corresponding return block based on the return
+ /// value as the key.
+ DenseMap<Value *, BasicBlock *> PHIBlocks;
/// A set containing the different GVN store sets needed. Each array contains
/// a sorted list of the different values that need to be stored into output
@@ -87,6 +96,14 @@ struct OutlinableGroup {
/// index in ArgumentTypes is an output argument.
unsigned NumAggregateInputs = 0;
+ /// The mapping of the canonical numbering of the values in outlined sections
+ /// to specific arguments.
+ DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
+
+ /// The number of branches in the region target a basic block that is outside
+ /// of the region.
+ unsigned BranchesToOutside = 0;
+
/// The number of instructions that will be outlined by extracting \ref
/// Regions.
InstructionCost Benefit = 0;
@@ -118,20 +135,67 @@ struct OutlinableGroup {
/// \param SourceBB - the BasicBlock to pull Instructions from.
/// \param TargetBB - the BasicBlock to put Instruction into.
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
- BasicBlock::iterator BBCurr, BBEnd, BBNext;
- for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
- BBCurr = BBNext) {
- BBNext = std::next(BBCurr);
- BBCurr->moveBefore(TargetBB, TargetBB.end());
- }
+ for (Instruction &I : llvm::make_early_inc_range(SourceBB))
+ I.moveBefore(TargetBB, TargetBB.end());
+}
+
+/// A function to sort the keys of \p Map, which must be a mapping of constant
+/// values to basic blocks and return it in \p SortedKeys
+///
+/// \param SortedKeys - The vector the keys will be return in and sorted.
+/// \param Map - The DenseMap containing keys to sort.
+static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
+ DenseMap<Value *, BasicBlock *> &Map) {
+ for (auto &VtoBB : Map)
+ SortedKeys.push_back(VtoBB.first);
+
+ stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
+ const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
+ const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ assert(RHSC && "Not a constant integer in return value?");
+ assert(LHSC && "Not a constant integer in return value?");
+
+ return LHSC->getLimitedValue() < RHSC->getLimitedValue();
+ });
+}
+
+Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
+ Value *V) {
+ Optional<unsigned> GVN = Candidate->getGVN(V);
+ assert(GVN.hasValue() && "No GVN for incoming value");
+ Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
+ Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
+ Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
+ return FoundValueOpt.getValueOr(nullptr);
}
void OutlinableRegion::splitCandidate() {
assert(!CandidateSplit && "Candidate already split!");
+ Instruction *BackInst = Candidate->backInstruction();
+
+ Instruction *EndInst = nullptr;
+ // Check whether the last instruction is a terminator, if it is, we do
+ // not split on the following instruction. We leave the block as it is. We
+ // also check that this is not the last instruction in the Module, otherwise
+ // the check for whether the current following instruction matches the
+ // previously recorded instruction will be incorrect.
+ if (!BackInst->isTerminator() ||
+ BackInst->getParent() != &BackInst->getFunction()->back()) {
+ EndInst = Candidate->end()->Inst;
+ assert(EndInst && "Expected an end instruction?");
+ }
+
+ // We check if the current instruction following the last instruction in the
+ // region is the same as the recorded instruction following the last
+ // instruction. If they do not match, there could be problems in rewriting
+ // the program after outlining, so we ignore it.
+ if (!BackInst->isTerminator() &&
+ EndInst != BackInst->getNextNonDebugInstruction())
+ return;
+
Instruction *StartInst = (*Candidate->begin()).Inst;
- Instruction *EndInst = (*Candidate->end()).Inst;
- assert(StartInst && EndInst && "Expected a start and end instruction?");
+ assert(StartInst && "Expected a start instruction?");
StartBB = StartInst->getParent();
PrevBB = StartBB;
@@ -153,13 +217,20 @@ void OutlinableRegion::splitCandidate() {
std::string OriginalName = PrevBB->getName().str();
StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
-
- // This is the case for the inner block since we do not have to include
- // multiple blocks.
- EndBB = StartBB;
- FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+ PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
CandidateSplit = true;
+ if (!BackInst->isTerminator()) {
+ EndBB = EndInst->getParent();
+ FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+ EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
+ FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
+ return;
+ }
+
+ EndBB = BackInst->getParent();
+ EndsInBranch = true;
+ FollowBB = nullptr;
}
void OutlinableRegion::reattachCandidate() {
@@ -180,7 +251,6 @@ void OutlinableRegion::reattachCandidate() {
// inst3
// inst4
assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
- assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
// StartBB should only have one predecessor since we put an unconditional
// branch at the end of PrevBB when we split the BasicBlock.
@@ -189,21 +259,24 @@ void OutlinableRegion::reattachCandidate() {
"No Predecessor for the region start basic block!");
assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
- assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
PrevBB->getTerminator()->eraseFromParent();
- EndBB->getTerminator()->eraseFromParent();
moveBBContents(*StartBB, *PrevBB);
BasicBlock *PlacementBB = PrevBB;
if (StartBB != EndBB)
PlacementBB = EndBB;
- moveBBContents(*FollowBB, *PlacementBB);
+ if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
+ assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
+ assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
+ PlacementBB->getTerminator()->eraseFromParent();
+ moveBBContents(*FollowBB, *PlacementBB);
+ PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+ FollowBB->eraseFromParent();
+ }
PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
- PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
StartBB->eraseFromParent();
- FollowBB->eraseFromParent();
// Make sure to save changes back to the StartBB.
StartBB = PrevBB;
@@ -261,8 +334,9 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
// division instruction for targets that have a native division instruction.
// To be overly conservative, we only add 1 to the number of instructions for
// each division instruction.
- for (Instruction &I : *StartBB) {
- switch (I.getOpcode()) {
+ for (IRInstructionData &ID : *Candidate) {
+ Instruction *I = ID.Inst;
+ switch (I->getOpcode()) {
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::SDiv:
@@ -272,7 +346,7 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
Benefit += 1;
break;
default:
- Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+ Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
break;
}
}
@@ -373,8 +447,24 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
unsigned FunctionNameSuffix) {
assert(!Group.OutlinedFunction && "Function is already defined!");
+ Type *RetTy = Type::getVoidTy(M.getContext());
+ // All extracted functions _should_ have the same return type at this point
+ // since the similarity identifier ensures that all branches outside of the
+ // region occur in the same place.
+
+ // NOTE: Should we ever move to the model that uses a switch at every point
+ // needed, meaning that we could branch within the region or out, it is
+ // possible that we will need to switch to using the most general case all of
+ // the time.
+ for (OutlinableRegion *R : Group.Regions) {
+ Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
+ if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
+ (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
+ RetTy = ExtractedFuncType;
+ }
+
Group.OutlinedFunctionType = FunctionType::get(
- Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false);
+ RetTy, Group.ArgumentTypes, false);
// These functions will only be called from within the same module, so
// we can set an internal linkage.
@@ -430,21 +520,23 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
///
/// \param [in] Old - The function to move the basic blocks from.
/// \param [in] New - The function to move the basic blocks to.
-/// \returns the first return block for the function in New.
-static BasicBlock *moveFunctionData(Function &Old, Function &New) {
- Function::iterator CurrBB, NextBB, FinalBB;
- BasicBlock *NewEnd = nullptr;
- std::vector<Instruction *> DebugInsts;
- for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
- CurrBB = NextBB) {
- NextBB = std::next(CurrBB);
- CurrBB->removeFromParent();
- CurrBB->insertInto(&New);
- Instruction *I = CurrBB->getTerminator();
- if (isa<ReturnInst>(I))
- NewEnd = &(*CurrBB);
-
- for (Instruction &Val : *CurrBB) {
+/// \param [out] NewEnds - The return blocks of the new overall function.
+static void moveFunctionData(Function &Old, Function &New,
+ DenseMap<Value *, BasicBlock *> &NewEnds) {
+ for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
+ CurrBB.removeFromParent();
+ CurrBB.insertInto(&New);
+ Instruction *I = CurrBB.getTerminator();
+
+ // For each block we find a return instruction is, it is a potential exit
+ // path for the function. We keep track of each block based on the return
+ // value here.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
+
+ std::vector<Instruction *> DebugInsts;
+
+ for (Instruction &Val : CurrBB) {
// We must handle the scoping of called functions differently than
// other outlined instructions.
if (!isa<CallInst>(&Val)) {
@@ -476,8 +568,7 @@ static BasicBlock *moveFunctionData(Function &Old, Function &New) {
I->eraseFromParent();
}
- assert(NewEnd && "No return instruction for new function?");
- return NewEnd;
+ assert(NewEnds.size() > 0 && "No return instruction for new function?");
}
/// Find the the constants that will need to be lifted into arguments
@@ -664,11 +755,22 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// function to account for the extracted constants, we have two different
// counters as we find extracted arguments, and as we come across overall
// arguments.
+
+ // Additionally, in our first pass, for the first extracted function,
+ // we find argument locations for the canonical value numbering. This
+ // numbering overrides any discovered location for the extracted code.
for (unsigned InputVal : InputGVNs) {
+ Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
+ assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
+ unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
+
Optional<Value *> InputOpt = C.fromGVN(InputVal);
assert(InputOpt.hasValue() && "Global value number not found?");
Value *Input = InputOpt.getValue();
+ DenseMap<unsigned, unsigned>::iterator AggArgIt =
+ Group.CanonicalNumberToAggArg.find(CanonicalNumber);
+
if (!Group.InputTypesSet) {
Group.ArgumentTypes.push_back(Input->getType());
// If the input value has a swifterr attribute, make sure to mark the
@@ -684,17 +786,34 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// Check if we have a constant. If we do add it to the overall argument
// number to Constant map for the region, and continue to the next input.
if (Constant *CST = dyn_cast<Constant>(Input)) {
- Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ if (AggArgIt != Group.CanonicalNumberToAggArg.end())
+ Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
+ else {
+ Group.CanonicalNumberToAggArg.insert(
+ std::make_pair(CanonicalNumber, TypeIndex));
+ Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ }
TypeIndex++;
continue;
}
// It is not a constant, we create the mapping from extracted argument list
- // to the overall argument list.
+ // to the overall argument list, using the canonical location, if it exists.
assert(ArgInputs.count(Input) && "Input cannot be found!");
- Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
- Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
+ if (OriginalIndex != AggArgIt->second)
+ Region.ChangedArgOrder = true;
+ Region.ExtractedArgToAgg.insert(
+ std::make_pair(OriginalIndex, AggArgIt->second));
+ Region.AggArgToExtracted.insert(
+ std::make_pair(AggArgIt->second, OriginalIndex));
+ } else {
+ Group.CanonicalNumberToAggArg.insert(
+ std::make_pair(CanonicalNumber, TypeIndex));
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
+ Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ }
OriginalIndex++;
TypeIndex++;
}
@@ -718,10 +837,41 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
/// \param [in] Outputs - The values found by the code extractor.
static void
findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
- ArrayRef<Value *> Outputs) {
+ SetVector<Value *> &Outputs) {
OutlinableGroup &Group = *Region.Parent;
IRSimilarityCandidate &C = *Region.Candidate;
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ C.getBasicBlocks(BBSet, BE);
+
+ // Find the exits to the region.
+ SmallPtrSet<BasicBlock *, 1> Exits;
+ for (BasicBlock *Block : BE)
+ for (BasicBlock *Succ : successors(Block))
+ if (!BBSet.contains(Succ))
+ Exits.insert(Succ);
+
+ // After determining which blocks exit to PHINodes, we add these PHINodes to
+ // the set of outputs to be processed. We also check the incoming values of
+ // the PHINodes for whether they should no longer be considered outputs.
+ for (BasicBlock *ExitBB : Exits) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ SmallVector<unsigned, 2> IncomingVals;
+ for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
+ if (BBSet.contains(PN.getIncomingBlock(Idx)))
+ IncomingVals.push_back(Idx);
+
+ // Do not process PHI if there is one (or fewer) predecessor from region.
+ if (IncomingVals.size() <= 1)
+ continue;
+
+ Region.IgnoreRegion = true;
+ return;
+ }
+ }
+
// This counts the argument number in the extracted function.
unsigned OriginalIndex = Region.NumExtractedInputs;
@@ -797,7 +947,7 @@ void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
// Map the outputs found by the CodeExtractor to the arguments found for
// the overall function.
- findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef());
+ findExtractedOutputToOverallOutputMapping(Region, Outputs);
}
/// Replace the extracted function in the Region with a call to the overall
@@ -820,9 +970,10 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
assert(AggFunc && "Function to replace with is nullptr?");
// If the arguments are the same size, there are not values that need to be
- // made argument, or different output registers to handle. We can simply
- // replace the called function in this case.
- if (AggFunc->arg_size() == Call->arg_size()) {
+ // made into an argument, the argument ordering has not been change, or
+ // different output registers to handle. We can simply replace the called
+ // function in this case.
+ if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
<< *AggFunc << " with same number of arguments\n");
Call->setCalledFunction(AggFunc);
@@ -895,6 +1046,9 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Transfer any debug information.
Call->setDebugLoc(Region.Call->getDebugLoc());
+ // Since our output may determine which branch we go to, we make sure to
+ // propogate this new call value through the module.
+ OldCall->replaceAllUsesWith(Call);
// Remove the old instruction.
OldCall->eraseFromParent();
@@ -913,13 +1067,23 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// region with the arguments of the function for an OutlinableGroup.
//
/// \param [in] Region - The region of extracted code to be changed.
-/// \param [in,out] OutputBB - The BasicBlock for the output stores for this
+/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
/// region.
-static void replaceArgumentUses(OutlinableRegion &Region,
- BasicBlock *OutputBB) {
+/// \param [in] FirstFunction - A flag to indicate whether we are using this
+/// function to define the overall outlined function for all the regions, or
+/// if we are operating on one of the following regions.
+static void
+replaceArgumentUses(OutlinableRegion &Region,
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ bool FirstFunction = false) {
OutlinableGroup &Group = *Region.Parent;
assert(Region.ExtractedFunction && "Region has no extracted function?");
+ Function *DominatingFunction = Region.ExtractedFunction;
+ if (FirstFunction)
+ DominatingFunction = Group.OutlinedFunction;
+ DominatorTree DT(*DominatingFunction);
+
for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
ArgIdx++) {
assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
@@ -946,11 +1110,53 @@ static void replaceArgumentUses(OutlinableRegion &Region,
assert(InstAsUser && "User is nullptr!");
Instruction *I = cast<Instruction>(InstAsUser);
- I->setDebugLoc(DebugLoc());
- LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
- << *OutputBB << "\n");
+ BasicBlock *BB = I->getParent();
+ SmallVector<BasicBlock *, 4> Descendants;
+ DT.getDescendants(BB, Descendants);
+ bool EdgeAdded = false;
+ if (Descendants.size() == 0) {
+ EdgeAdded = true;
+ DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
+ DT.getDescendants(BB, Descendants);
+ }
+
+ // Iterate over the following blocks, looking for return instructions,
+ // if we find one, find the corresponding output block for the return value
+ // and move our store instruction there.
+ for (BasicBlock *DescendBB : Descendants) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
+ if (!RI)
+ continue;
+ Value *RetVal = RI->getReturnValue();
+ auto VBBIt = OutputBBs.find(RetVal);
+ assert(VBBIt != OutputBBs.end() && "Could not find output value!");
+
+ // If this is storing a PHINode, we must make sure it is included in the
+ // overall function.
+ StoreInst *SI = cast<StoreInst>(I);
+
+ Value *ValueOperand = SI->getValueOperand();
+
+ StoreInst *NewI = cast<StoreInst>(I->clone());
+ NewI->setDebugLoc(DebugLoc());
+ BasicBlock *OutputBB = VBBIt->second;
+ OutputBB->getInstList().push_back(NewI);
+ LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
+ << *OutputBB << "\n");
- I->moveBefore(*OutputBB, OutputBB->end());
+ if (FirstFunction)
+ continue;
+ Value *CorrVal =
+ Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
+ assert(CorrVal && "Value is nullptr?");
+ NewI->setOperand(0, CorrVal);
+ }
+
+ // If we added an edge for basic blocks without a predecessor, we remove it
+ // here.
+ if (EdgeAdded)
+ DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
+ I->eraseFromParent();
LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
<< *Region.ExtractedFunction << " with " << *AggArg
@@ -990,69 +1196,53 @@ void replaceConstants(OutlinableRegion &Region) {
}
}
-/// For the given function, find all the nondebug or lifetime instructions,
-/// and return them as a vector. Exclude any blocks in \p ExludeBlocks.
-///
-/// \param [in] F - The function we collect the instructions from.
-/// \param [in] ExcludeBlocks - BasicBlocks to ignore.
-/// \returns the list of instructions extracted.
-static std::vector<Instruction *>
-collectRelevantInstructions(Function &F,
- DenseSet<BasicBlock *> &ExcludeBlocks) {
- std::vector<Instruction *> RelevantInstructions;
-
- for (BasicBlock &BB : F) {
- if (ExcludeBlocks.contains(&BB))
- continue;
-
- for (Instruction &Inst : BB) {
- if (Inst.isLifetimeStartOrEnd())
- continue;
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- RelevantInstructions.push_back(&Inst);
- }
- }
-
- return RelevantInstructions;
-}
-
/// It is possible that there is a basic block that already performs the same
/// stores. This returns a duplicate block, if it exists
///
-/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
/// \param OutputStoreBBs [in] The existing output blocks.
/// \returns an optional value with the number output block if there is a match.
-Optional<unsigned>
-findDuplicateOutputBlock(BasicBlock *OutputBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
+Optional<unsigned> findDuplicateOutputBlock(
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
- bool WrongInst = false;
- bool WrongSize = false;
+ bool Mismatch = false;
unsigned MatchingNum = 0;
- for (BasicBlock *CompBB : OutputStoreBBs) {
- WrongInst = false;
- if (CompBB->size() - 1 != OutputBB->size()) {
- WrongSize = true;
- MatchingNum++;
- continue;
- }
-
- WrongSize = false;
- BasicBlock::iterator NIt = OutputBB->begin();
- for (Instruction &I : *CompBB) {
- if (isa<BranchInst>(&I))
- continue;
+ // We compare the new set output blocks to the other sets of output blocks.
+ // If they are the same number, and have identical instructions, they are
+ // considered to be the same.
+ for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
+ Mismatch = false;
+ for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
+ OutputBBs.find(VToB.first);
+ if (OutputBBIt == OutputBBs.end()) {
+ Mismatch = true;
+ break;
+ }
- if (!I.isIdenticalTo(&(*NIt))) {
- WrongInst = true;
+ BasicBlock *CompBB = VToB.second;
+ BasicBlock *OutputBB = OutputBBIt->second;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ Mismatch = true;
break;
}
- NIt++;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ Mismatch = true;
+ break;
+ }
+
+ NIt++;
+ }
}
- if (!WrongInst && !WrongSize)
+
+ if (!Mismatch)
return MatchingNum;
MatchingNum++;
@@ -1061,95 +1251,130 @@ findDuplicateOutputBlock(BasicBlock *OutputBB,
return None;
}
+/// Remove empty output blocks from the outlined region.
+///
+/// \param BlocksToPrune - Mapping of return values output blocks for the \p
+/// Region.
+/// \param Region - The OutlinableRegion we are analyzing.
+static bool
+analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
+ OutlinableRegion &Region) {
+ bool AllRemoved = true;
+ Value *RetValueForBB;
+ BasicBlock *NewBB;
+ SmallVector<Value *, 4> ToRemove;
+ // Iterate over the output blocks created in the outlined section.
+ for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
+ RetValueForBB = VtoBB.first;
+ NewBB = VtoBB.second;
+
+ // If there are no instructions, we remove it from the module, and also
+ // mark the value for removal from the return value to output block mapping.
+ if (NewBB->size() == 0) {
+ NewBB->eraseFromParent();
+ ToRemove.push_back(RetValueForBB);
+ continue;
+ }
+
+ // Mark that we could not remove all the blocks since they were not all
+ // empty.
+ AllRemoved = false;
+ }
+
+ // Remove the return value from the mapping.
+ for (Value *V : ToRemove)
+ BlocksToPrune.erase(V);
+
+ // Mark the region as having the no output scheme.
+ if (AllRemoved)
+ Region.OutputBlockNum = -1;
+
+ return AllRemoved;
+}
+
/// For the outlined section, move needed the StoreInsts for the output
/// registers into their own block. Then, determine if there is a duplicate
/// output block already created.
///
/// \param [in] OG - The OutlinableGroup of regions to be outlined.
/// \param [in] Region - The OutlinableRegion that is being analyzed.
-/// \param [in,out] OutputBB - the block that stores for this region will be
+/// \param [in,out] OutputBBs - the blocks that stores for this region will be
/// placed in.
-/// \param [in] EndBB - the final block of the extracted function.
+/// \param [in] EndBBs - the final blocks of the extracted function.
/// \param [in] OutputMappings - OutputMappings the mapping of values that have
/// been replaced by a new output value.
/// \param [in,out] OutputStoreBBs - The existing output blocks.
-static void
-alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
- BasicBlock *OutputBB, BasicBlock *EndBB,
- const DenseMap<Value *, Value *> &OutputMappings,
- std::vector<BasicBlock *> &OutputStoreBBs) {
- DenseSet<unsigned> ValuesToFind(Region.GVNStores.begin(),
- Region.GVNStores.end());
-
- // We iterate over the instructions in the extracted function, and find the
- // global value number of the instructions. If we find a value that should
- // be contained in a store, we replace the uses of the value with the value
- // from the overall function, so that the store is storing the correct
- // value from the overall function.
- DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
- OutputStoreBBs.end());
- ExcludeBBs.insert(OutputBB);
- std::vector<Instruction *> ExtractedFunctionInsts =
- collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
- std::vector<Instruction *> OverallFunctionInsts =
- collectRelevantInstructions(*OG.OutlinedFunction, ExcludeBBs);
-
- assert(ExtractedFunctionInsts.size() == OverallFunctionInsts.size() &&
- "Number of relevant instructions not equal!");
-
- unsigned NumInstructions = ExtractedFunctionInsts.size();
- for (unsigned Idx = 0; Idx < NumInstructions; Idx++) {
- Value *V = ExtractedFunctionInsts[Idx];
-
- if (OutputMappings.find(V) != OutputMappings.end())
- V = OutputMappings.find(V)->second;
- Optional<unsigned> GVN = Region.Candidate->getGVN(V);
-
- // If we have found one of the stored values for output, replace the value
- // with the corresponding one from the overall function.
- if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) {
- V->replaceAllUsesWith(OverallFunctionInsts[Idx]);
- if (ValuesToFind.size() == 0)
- break;
- }
-
- if (ValuesToFind.size() == 0)
- break;
- }
-
- assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
-
- // If the size of the block is 0, then there are no stores, and we do not
- // need to save this block.
- if (OutputBB->size() == 0) {
- Region.OutputBlockNum = -1;
- OutputBB->eraseFromParent();
+static void alignOutputBlockWithAggFunc(
+ OutlinableGroup &OG, OutlinableRegion &Region,
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ DenseMap<Value *, BasicBlock *> &EndBBs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
+ // If none of the output blocks have any instructions, this means that we do
+ // not have to determine if it matches any of the other output schemes, and we
+ // don't have to do anything else.
+ if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
return;
- }
- // Determine is there is a duplicate block.
+ // Determine is there is a duplicate set of blocks.
Optional<unsigned> MatchingBB =
- findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+ findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
- // If there is, we remove the new output block. If it does not,
- // we add it to our list of output blocks.
+ // If there is, we remove the new output blocks. If it does not,
+ // we add it to our list of sets of output blocks.
if (MatchingBB.hasValue()) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
<< MatchingBB.getValue());
Region.OutputBlockNum = MatchingBB.getValue();
- OutputBB->eraseFromParent();
+ for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
+ VtoBB.second->eraseFromParent();
return;
}
Region.OutputBlockNum = OutputStoreBBs.size();
- LLVM_DEBUG(dbgs() << "Create output block for region in"
- << Region.ExtractedFunction << " to "
- << *OutputBB);
- OutputStoreBBs.push_back(OutputBB);
- BranchInst::Create(EndBB, OutputBB);
+ Value *RetValueForBB;
+ BasicBlock *NewBB;
+ OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
+ for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
+ RetValueForBB = VtoBB.first;
+ NewBB = VtoBB.second;
+ DenseMap<Value *, BasicBlock *>::iterator VBBIt =
+ EndBBs.find(RetValueForBB);
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *NewBB);
+ BranchInst::Create(VBBIt->second, NewBB);
+ OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
+ }
+}
+
+/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
+/// before creating a basic block for each \p NewMap, and inserting into the new
+/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
+///
+/// \param OldMap [in] - The mapping to base the new mapping off of.
+/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
+/// \param ParentFunc [in] - The function to put the new basic block in.
+/// \param BaseName [in] - The start of the BasicBlock names to be appended to
+/// by an index value.
+static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
+ DenseMap<Value *, BasicBlock *> &NewMap,
+ Function *ParentFunc, Twine BaseName) {
+ unsigned Idx = 0;
+ std::vector<Value *> SortedKeys;
+
+ getSortedConstantKeys(SortedKeys, OldMap);
+
+ for (Value *RetVal : SortedKeys) {
+ BasicBlock *NewBB = BasicBlock::Create(
+ ParentFunc->getContext(),
+ Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
+ ParentFunc);
+ NewMap.insert(std::make_pair(RetVal, NewBB));
+ }
}
/// Create the switch statement for outlined function to differentiate between
@@ -1159,50 +1384,74 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
/// matches the needed stores for the extracted section.
/// \param [in] M - The module we are outlining from.
/// \param [in] OG - The group of regions to be outlined.
-/// \param [in] EndBB - The final block of the extracted function.
+/// \param [in] EndBBs - The final blocks of the extracted function.
/// \param [in,out] OutputStoreBBs - The existing output blocks.
-void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
+void createSwitchStatement(
+ Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
// We only need the switch statement if there is more than one store
// combination.
if (OG.OutputGVNCombinations.size() > 1) {
Function *AggFunc = OG.OutlinedFunction;
- // Create a final block
- BasicBlock *ReturnBlock =
- BasicBlock::Create(M.getContext(), "final_block", AggFunc);
- Instruction *Term = EndBB->getTerminator();
- Term->moveBefore(*ReturnBlock, ReturnBlock->end());
- // Put the switch statement in the old end basic block for the function with
- // a fall through to the new return block
- LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
- << OutputStoreBBs.size() << "\n");
- SwitchInst *SwitchI =
- SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
- ReturnBlock, OutputStoreBBs.size(), EndBB);
-
- unsigned Idx = 0;
- for (BasicBlock *BB : OutputStoreBBs) {
- SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
- BB);
- Term = BB->getTerminator();
- Term->setSuccessor(0, ReturnBlock);
- Idx++;
+ // Create a final block for each different return block.
+ DenseMap<Value *, BasicBlock *> ReturnBBs;
+ createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
+
+ for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
+ std::pair<Value *, BasicBlock *> &OutputBlock =
+ *OG.EndBBs.find(RetBlockPair.first);
+ BasicBlock *ReturnBlock = RetBlockPair.second;
+ BasicBlock *EndBB = OutputBlock.second;
+ Instruction *Term = EndBB->getTerminator();
+ // Move the return value to the final block instead of the original exit
+ // stub.
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function
+ // with a fall through to the new return block.
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
+ OutputStoreBB.find(OutputBlock.first);
+
+ if (OSBBIt == OutputStoreBB.end())
+ continue;
+
+ BasicBlock *BB = OSBBIt->second;
+ SwitchI->addCase(
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
}
return;
}
- // If there needs to be stores, move them from the output block to the end
- // block to save on branching instructions.
+ // If there needs to be stores, move them from the output blocks to their
+ // corresponding ending block.
if (OutputStoreBBs.size() == 1) {
LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
<< *OG.OutlinedFunction << "\n");
- BasicBlock *OutputBlock = OutputStoreBBs[0];
- Instruction *Term = OutputBlock->getTerminator();
- Term->eraseFromParent();
- Term = EndBB->getTerminator();
- moveBBContents(*OutputBlock, *EndBB);
- Term->moveBefore(*EndBB, EndBB->end());
- OutputBlock->eraseFromParent();
+ DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
+ for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
+ DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
+ EndBBs.find(VBPair.first);
+ assert(EndBBIt != EndBBs.end() && "Could not find end block");
+ BasicBlock *EndBB = EndBBIt->second;
+ BasicBlock *OutputBB = VBPair.second;
+ Instruction *Term = OutputBB->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBB, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBB->eraseFromParent();
+ }
}
}
@@ -1217,42 +1466,44 @@ void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
/// set of stores needed for the different functions.
/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
/// once outlining is complete.
-static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
- std::vector<BasicBlock *> &OutputStoreBBs,
- std::vector<Function *> &FuncsToRemove) {
+static void fillOverallFunction(
+ Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
+ std::vector<Function *> &FuncsToRemove) {
OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
// Move first extracted function's instructions into new function.
LLVM_DEBUG(dbgs() << "Move instructions from "
<< *CurrentOS->ExtractedFunction << " to instruction "
<< *CurrentGroup.OutlinedFunction << "\n");
-
- CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction,
- *CurrentGroup.OutlinedFunction);
+ moveFunctionData(*CurrentOS->ExtractedFunction,
+ *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
// Transfer the attributes from the function to the new function.
- for (Attribute A :
- CurrentOS->ExtractedFunction->getAttributes().getFnAttributes())
+ for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
CurrentGroup.OutlinedFunction->addFnAttr(A);
- // Create an output block for the first extracted function.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), Twine("output_block_") + Twine(static_cast<unsigned>(0)),
- CurrentGroup.OutlinedFunction);
+ // Create a new set of output blocks for the first extracted function.
+ DenseMap<Value *, BasicBlock *> NewBBs;
+ createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
+ CurrentGroup.OutlinedFunction, "output_block_0");
CurrentOS->OutputBlockNum = 0;
- replaceArgumentUses(*CurrentOS, NewBB);
+ replaceArgumentUses(*CurrentOS, NewBBs, true);
replaceConstants(*CurrentOS);
- // If the new basic block has no new stores, we can erase it from the module.
- // It it does, we create a branch instruction to the last basic block from the
- // new one.
- if (NewBB->size() == 0) {
- CurrentOS->OutputBlockNum = -1;
- NewBB->eraseFromParent();
- } else {
- BranchInst::Create(CurrentGroup.EndBB, NewBB);
- OutputStoreBBs.push_back(NewBB);
+ // We first identify if any output blocks are empty, if they are we remove
+ // them. We then create a branch instruction to the basic block to the return
+ // block for the function for each non empty output block.
+ if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
+ OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
+ for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator VBBIt =
+ CurrentGroup.EndBBs.find(VToBB.first);
+ BasicBlock *EndBB = VBBIt->second;
+ BranchInst::Create(EndBB, VToBB.second);
+ OutputStoreBBs.back().insert(VToBB);
+ }
}
// Replace the call to the extracted function with the outlined function.
@@ -1268,25 +1519,28 @@ void IROutliner::deduplicateExtractedSections(
std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
createFunction(M, CurrentGroup, OutlinedFunctionNum);
- std::vector<BasicBlock *> OutputStoreBBs;
+ std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
OutlinableRegion *CurrentOS;
fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+ std::vector<Value *> SortedKeys;
for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
CurrentOS = CurrentGroup.Regions[Idx];
AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
*CurrentOS->ExtractedFunction);
- // Create a new BasicBlock to hold the needed store instructions.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), "output_block_" + std::to_string(Idx),
- CurrentGroup.OutlinedFunction);
- replaceArgumentUses(*CurrentOS, NewBB);
+ // Create a set of BasicBlocks, one for each return block, to hold the
+ // needed store instructions.
+ DenseMap<Value *, BasicBlock *> NewBBs;
+ createAndInsertBasicBlocks(
+ CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
+ "output_block_" + Twine(static_cast<unsigned>(Idx)));
- alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
- CurrentGroup.EndBB, OutputMappings,
+ replaceArgumentUses(*CurrentOS, NewBBs);
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
+ CurrentGroup.EndBBs, OutputMappings,
OutputStoreBBs);
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
@@ -1294,11 +1548,78 @@ void IROutliner::deduplicateExtractedSections(
}
// Create a switch statement to handle the different output schemes.
- createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
OutlinedFunctionNum++;
}
+/// Checks that the next instruction in the InstructionDataList matches the
+/// next instruction in the module. If they do not, there could be the
+/// possibility that extra code has been inserted, and we must ignore it.
+///
+/// \param ID - The IRInstructionData to check the next instruction of.
+/// \returns true if the InstructionDataList and actual instruction match.
+static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
+ // We check if there is a discrepancy between the InstructionDataList
+ // and the actual next instruction in the module. If there is, it means
+ // that an extra instruction was added, likely by the CodeExtractor.
+
+ // Since we do not have any similarity data about this particular
+ // instruction, we cannot confidently outline it, and must discard this
+ // candidate.
+ IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
+ Instruction *NextIDLInst = NextIDIt->Inst;
+ Instruction *NextModuleInst = nullptr;
+ if (!ID.Inst->isTerminator())
+ NextModuleInst = ID.Inst->getNextNonDebugInstruction();
+ else if (NextIDLInst != nullptr)
+ NextModuleInst =
+ &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
+
+ if (NextIDLInst && NextIDLInst != NextModuleInst)
+ return false;
+
+ return true;
+}
+
+bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
+ const OutlinableRegion &Region) {
+ IRSimilarityCandidate *IRSC = Region.Candidate;
+ unsigned StartIdx = IRSC->getStartIdx();
+ unsigned EndIdx = IRSC->getEndIdx();
+
+ // A check to make sure that we are not about to attempt to outline something
+ // that has already been outlined.
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ if (Outlined.contains(Idx))
+ return false;
+
+ // We check if the recorded instruction matches the actual next instruction,
+ // if it does not, we fix it in the InstructionDataList.
+ if (!Region.Candidate->backInstruction()->isTerminator()) {
+ Instruction *NewEndInst =
+ Region.Candidate->backInstruction()->getNextNonDebugInstruction();
+ assert(NewEndInst && "Next instruction is a nullptr?");
+ if (Region.Candidate->end()->Inst != NewEndInst) {
+ IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+ IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
+ IRInstructionData(*NewEndInst,
+ InstructionClassifier.visit(*NewEndInst), *IDL);
+
+ // Insert the first IRInstructionData of the new region after the
+ // last IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->end(), *NewEndIRID);
+ }
+ }
+
+ return none_of(*IRSC, [this](IRInstructionData &ID) {
+ if (!nextIRInstructionDataMatchesNextInst(ID))
+ return true;
+
+ return !this->InstructionClassifier.visit(ID.Inst);
+ });
+}
+
void IROutliner::pruneIncompatibleRegions(
std::vector<IRSimilarityCandidate> &CandidateVec,
OutlinableGroup &CurrentGroup) {
@@ -1310,6 +1631,15 @@ void IROutliner::pruneIncompatibleRegions(
return LHS.getStartIdx() < RHS.getStartIdx();
});
+ IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
+ // Since outlining a call and a branch instruction will be the same as only
+ // outlinining a call instruction, we ignore it as a space saving.
+ if (FirstCandidate.getLength() == 2) {
+ if (isa<CallInst>(FirstCandidate.front()->Inst) &&
+ isa<BranchInst>(FirstCandidate.back()->Inst))
+ return;
+ }
+
unsigned CurrentEndIdx = 0;
for (IRSimilarityCandidate &IRSC : CandidateVec) {
PreviouslyOutlined = false;
@@ -1325,9 +1655,13 @@ void IROutliner::pruneIncompatibleRegions(
if (PreviouslyOutlined)
continue;
- // TODO: If in the future we can outline across BasicBlocks, we will need to
- // check all BasicBlocks contained in the region.
- if (IRSC.getStartBB()->hasAddressTaken())
+ // Check over the instructions, and if the basic block has its address
+ // taken for use somewhere else, we do not outline that block.
+ bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
+ return ID.Inst->getParent()->hasAddressTaken();
+ });
+
+ if (BBHasAddressTaken)
continue;
if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
@@ -1340,16 +1674,9 @@ void IROutliner::pruneIncompatibleRegions(
continue;
bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
- // We check if there is a discrepancy between the InstructionDataList
- // and the actual next instruction in the module. If there is, it means
- // that an extra instruction was added, likely by the CodeExtractor.
-
- // Since we do not have any similarity data about this particular
- // instruction, we cannot confidently outline it, and must discard this
- // candidate.
- if (std::next(ID.getIterator())->Inst !=
- ID.Inst->getNextNonDebugInstruction())
+ if (!nextIRInstructionDataMatchesNextInst(ID))
return true;
+
return !this->InstructionClassifier.visit(ID.Inst);
});
@@ -1416,10 +1743,33 @@ static InstructionCost findCostForOutputBlocks(Module &M,
OutlinableGroup &CurrentGroup,
TargetTransformInfo &TTI) {
InstructionCost OutputCost = 0;
+ unsigned NumOutputBranches = 0;
+
+ IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
+ DenseSet<BasicBlock *> CandidateBlocks;
+ Candidate.getBasicBlocks(CandidateBlocks);
+
+ // Count the number of different output branches that point to blocks outside
+ // of the region.
+ DenseSet<BasicBlock *> FoundBlocks;
+ for (IRInstructionData &ID : Candidate) {
+ if (!isa<BranchInst>(ID.Inst))
+ continue;
+
+ for (Value *V : ID.OperVals) {
+ BasicBlock *BB = static_cast<BasicBlock *>(V);
+ DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
+ if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
+ continue;
+ FoundBlocks.insert(BB);
+ NumOutputBranches++;
+ }
+ }
+
+ CurrentGroup.BranchesToOutside = NumOutputBranches;
for (const ArrayRef<unsigned> &OutputUse :
CurrentGroup.OutputGVNCombinations) {
- IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
for (unsigned GVN : OutputUse) {
Optional<Value *> OV = Candidate.fromGVN(GVN);
assert(OV.hasValue() && "Could not find value for GVN?");
@@ -1434,14 +1784,14 @@ static InstructionCost findCostForOutputBlocks(Module &M,
LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
<< " instructions to cost for output of type "
<< *V->getType() << "\n");
- OutputCost += StoreCost;
+ OutputCost += StoreCost * NumOutputBranches;
}
InstructionCost BranchCost =
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
<< " a branch instruction\n");
- OutputCost += BranchCost;
+ OutputCost += BranchCost * NumOutputBranches;
}
// If there is more than one output scheme, we must have a comparison and
@@ -1460,7 +1810,7 @@ static InstructionCost findCostForOutputBlocks(Module &M,
LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
<< " instructions for each switch case for each different"
<< " output path in a function\n");
- OutputCost += TotalCost;
+ OutputCost += TotalCost * NumOutputBranches;
}
return OutputCost;
@@ -1548,13 +1898,12 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
bool IROutliner::extractSection(OutlinableRegion &Region) {
SetVector<Value *> ArgInputs, Outputs, SinkCands;
- Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
-
assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
- assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!");
+ BasicBlock *InitialStart = Region.StartBB;
Function *OrigF = Region.StartBB->getParent();
CodeExtractorAnalysisCache CEAC(*OrigF);
- Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+ Region.ExtractedFunction =
+ Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
// If the extraction was successful, find the BasicBlock, and reassign the
// OutlinableRegion blocks
@@ -1565,7 +1914,23 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
return false;
}
- BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+ // Get the block containing the called branch, and reassign the blocks as
+ // necessary. If the original block still exists, it is because we ended on
+ // a branch instruction, and so we move the contents into the block before
+ // and assign the previous block correctly.
+ User *InstAsUser = Region.ExtractedFunction->user_back();
+ BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
+ Region.PrevBB = RewrittenBB->getSinglePredecessor();
+ assert(Region.PrevBB && "PrevBB is nullptr?");
+ if (Region.PrevBB == InitialStart) {
+ BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
+ Instruction *BI = NewPrev->getTerminator();
+ BI->eraseFromParent();
+ moveBBContents(*InitialStart, *NewPrev);
+ Region.PrevBB = NewPrev;
+ InitialStart->eraseFromParent();
+ }
+
Region.StartBB = RewrittenBB;
Region.EndBB = RewrittenBB;
@@ -1608,6 +1973,7 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
unsigned IROutliner::doOutline(Module &M) {
// Find the possible similarity sections.
+ InstructionClassifier.EnableBranches = !DisableBranches;
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
@@ -1622,12 +1988,17 @@ unsigned IROutliner::doOutline(Module &M) {
return LHS[0].getLength() * LHS.size() >
RHS[0].getLength() * RHS.size();
});
+ // Creating OutlinableGroups for each SimilarityCandidate to be used in
+ // each of the following for loops to avoid making an allocator.
+ std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
DenseSet<unsigned> NotSame;
- std::vector<Function *> FuncsToRemove;
+ std::vector<OutlinableGroup *> NegativeCostGroups;
+ std::vector<OutlinableRegion *> OutlinedRegions;
// Iterate over the possible sets of similarity.
+ unsigned PotentialGroupIdx = 0;
for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
- OutlinableGroup CurrentGroup;
+ OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
// Remove entries that were previously outlined
pruneIncompatibleRegions(CandidateVec, CurrentGroup);
@@ -1649,20 +2020,31 @@ unsigned IROutliner::doOutline(Module &M) {
// Create a CodeExtractor for each outlinable region. Identify inputs and
// outputs for each section using the code extractor and create the argument
// types for the Aggregate Outlining Function.
- std::vector<OutlinableRegion *> OutlinedRegions;
+ OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
// Break the outlinable region out of its parent BasicBlock into its own
// BasicBlocks (see function implementation).
OS->splitCandidate();
- std::vector<BasicBlock *> BE = {OS->StartBB};
+
+ // There's a chance that when the region is split, extra instructions are
+ // added to the region. This makes the region no longer viable
+ // to be split, so we ignore it for outlining.
+ if (!OS->CandidateSplit)
+ continue;
+
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ OS->Candidate->getBasicBlocks(BBSet, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
false, "outlined");
findAddInputsOutputs(M, *OS, NotSame);
if (!OS->IgnoreRegion)
OutlinedRegions.push_back(OS);
- else
- OS->reattachCandidate();
+
+ // We recombine the blocks together now that we have gathered all the
+ // needed information.
+ OS->reattachCandidate();
}
CurrentGroup.Regions = std::move(OutlinedRegions);
@@ -1675,12 +2057,11 @@ unsigned IROutliner::doOutline(Module &M) {
if (CostModel)
findCostBenefit(M, CurrentGroup);
- // If we are adhering to the cost model, reattach all the candidates
+ // If we are adhering to the cost model, skip those groups where the cost
+ // outweighs the benefits.
if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
- for (OutlinableRegion *OS : CurrentGroup.Regions)
- OS->reattachCandidate();
- OptimizationRemarkEmitter &ORE = getORE(
- *CurrentGroup.Regions[0]->Candidate->getFunction());
+ OptimizationRemarkEmitter &ORE =
+ getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
ORE.emit([&]() {
IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
@@ -1704,12 +2085,70 @@ unsigned IROutliner::doOutline(Module &M) {
continue;
}
+ NegativeCostGroups.push_back(&CurrentGroup);
+ }
+
+ ExtractorAllocator.DestroyAll();
+
+ if (NegativeCostGroups.size() > 1)
+ stable_sort(NegativeCostGroups,
+ [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
+ return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
+ });
+
+ std::vector<Function *> FuncsToRemove;
+ for (OutlinableGroup *CG : NegativeCostGroups) {
+ OutlinableGroup &CurrentGroup = *CG;
+
+ OutlinedRegions.clear();
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ // We check whether our region is compatible with what has already been
+ // outlined, and whether we need to ignore this item.
+ if (!isCompatibleWithAlreadyOutlinedCode(*Region))
+ continue;
+ OutlinedRegions.push_back(Region);
+ }
+
+ if (OutlinedRegions.size() < 2)
+ continue;
+
+ // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
+ // we are still outlining enough regions to make up for the added cost.
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ if (CostModel) {
+ CurrentGroup.Benefit = 0;
+ CurrentGroup.Cost = 0;
+ findCostBenefit(M, CurrentGroup);
+ if (CurrentGroup.Cost >= CurrentGroup.Benefit)
+ continue;
+ }
+ OutlinedRegions.clear();
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ Region->splitCandidate();
+ if (!Region->CandidateSplit)
+ continue;
+ OutlinedRegions.push_back(Region);
+ }
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ if (CurrentGroup.Regions.size() < 2) {
+ for (OutlinableRegion *R : CurrentGroup.Regions)
+ R->reattachCandidate();
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
<< " and benefit " << CurrentGroup.Benefit << "\n");
// Create functions out of all the sections, and mark them as outlined.
OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ OS->Candidate->getBasicBlocks(BBSet, BE);
+ OS->CE = new (ExtractorAllocator.Allocate())
+ CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+ false, "outlined");
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
@@ -1767,6 +2206,7 @@ bool IROutliner::run(Module &M) {
}
// Pass Manager Boilerplate
+namespace {
class IROutlinerLegacyPass : public ModulePass {
public:
static char ID;
@@ -1782,6 +2222,7 @@ public:
bool runOnModule(Module &M) override;
};
+} // namespace
bool IROutlinerLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 59260af88832..992c2b292e1e 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -31,9 +31,11 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineOrder.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
@@ -96,9 +98,53 @@ static cl::opt<std::string> CGSCCInlineReplayFile(
"cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
"Optimization remarks file containing inline remarks to be replayed "
- "by inlining from cgscc inline remarks."),
+ "by cgscc inlining."),
cl::Hidden);
+static cl::opt<ReplayInlinerSettings::Scope> CGSCCInlineReplayScope(
+ "cgscc-inline-replay-scope",
+ cl::init(ReplayInlinerSettings::Scope::Function),
+ cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
+ "Replay on functions that have remarks associated "
+ "with them (default)"),
+ clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
+ "Replay on the entire module")),
+ cl::desc("Whether inline replay should be applied to the entire "
+ "Module or just the Functions (default) that are present as "
+ "callers in remarks during cgscc inlining."),
+ cl::Hidden);
+
+static cl::opt<ReplayInlinerSettings::Fallback> CGSCCInlineReplayFallback(
+ "cgscc-inline-replay-fallback",
+ cl::init(ReplayInlinerSettings::Fallback::Original),
+ cl::values(
+ clEnumValN(
+ ReplayInlinerSettings::Fallback::Original, "Original",
+ "All decisions not in replay send to original advisor (default)"),
+ clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
+ "AlwaysInline", "All decisions not in replay are inlined"),
+ clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
+ "All decisions not in replay are not inlined")),
+ cl::desc(
+ "How cgscc inline replay treats sites that don't come from the replay. "
+ "Original: defers to original advisor, AlwaysInline: inline all sites "
+ "not in replay, NeverInline: inline no sites not in replay"),
+ cl::Hidden);
+
+static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
+ "cgscc-inline-replay-format",
+ cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
+ cl::values(
+ clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
+ clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
+ "<Line Number>:<Column Number>"),
+ clEnumValN(CallSiteFormat::Format::LineDiscriminator,
+ "LineDiscriminator", "<Line Number>.<Discriminator>"),
+ clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
+ "LineColumnDiscriminator",
+ "<Line Number>:<Column Number>.<Discriminator> (default)")),
+ cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
+
static cl::opt<bool> InlineEnablePriorityOrder(
"inline-enable-priority-order", cl::Hidden, cl::init(false),
cl::desc("Enable the priority inline order for the inliner"));
@@ -463,7 +509,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
}
++NumInlined;
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -661,9 +707,12 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
if (!CGSCCInlineReplayFile.empty())
- OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ OwnedAdvisor = getReplayInlineAdvisor(
M, FAM, M.getContext(), std::move(OwnedAdvisor),
- CGSCCInlineReplayFile,
+ ReplayInlinerSettings{CGSCCInlineReplayFile,
+ CGSCCInlineReplayScope,
+ CGSCCInlineReplayFallback,
+ {CGSCCInlineReplayFormat}},
/*EmitRemarks=*/true);
return *OwnedAdvisor;
@@ -674,153 +723,6 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
-template <typename T> class InlineOrder {
-public:
- using reference = T &;
- using const_reference = const T &;
-
- virtual ~InlineOrder() {}
-
- virtual size_t size() = 0;
-
- virtual void push(const T &Elt) = 0;
-
- virtual T pop() = 0;
-
- virtual const_reference front() = 0;
-
- virtual void erase_if(function_ref<bool(T)> Pred) = 0;
-
- bool empty() { return !size(); }
-};
-
-template <typename T, typename Container = SmallVector<T, 16>>
-class DefaultInlineOrder : public InlineOrder<T> {
- using reference = T &;
- using const_reference = const T &;
-
-public:
- size_t size() override { return Calls.size() - FirstIndex; }
-
- void push(const T &Elt) override { Calls.push_back(Elt); }
-
- T pop() override {
- assert(size() > 0);
- return Calls[FirstIndex++];
- }
-
- const_reference front() override {
- assert(size() > 0);
- return Calls[FirstIndex];
- }
-
- void erase_if(function_ref<bool(T)> Pred) override {
- Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
- Calls.end());
- }
-
-private:
- Container Calls;
- size_t FirstIndex = 0;
-};
-
-class Priority {
-public:
- Priority(int Size) : Size(Size) {}
-
- static bool isMoreDesirable(const Priority &S1, const Priority &S2) {
- return S1.Size < S2.Size;
- }
-
- static Priority evaluate(CallBase *CB) {
- Function *Callee = CB->getCalledFunction();
- return Priority(Callee->getInstructionCount());
- }
-
- int Size;
-};
-
-template <typename PriorityT>
-class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
- using T = std::pair<CallBase *, int>;
- using HeapT = std::pair<CallBase *, PriorityT>;
- using reference = T &;
- using const_reference = const T &;
-
- static bool cmp(const HeapT &P1, const HeapT &P2) {
- return PriorityT::isMoreDesirable(P2.second, P1.second);
- }
-
- // A call site could become less desirable for inlining because of the size
- // growth from prior inlining into the callee. This method is used to lazily
- // update the desirability of a call site if it's decreasing. It is only
- // called on pop() or front(), not every time the desirability changes. When
- // the desirability of the front call site decreases, an updated one would be
- // pushed right back into the heap. For simplicity, those cases where
- // the desirability of a call site increases are ignored here.
- void adjust() {
- bool Changed = false;
- do {
- CallBase *CB = Heap.front().first;
- const PriorityT PreviousGoodness = Heap.front().second;
- const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
- Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
- if (Changed) {
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- Heap.push_back({CB, CurrentGoodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- }
- } while (Changed);
- }
-
-public:
- size_t size() override { return Heap.size(); }
-
- void push(const T &Elt) override {
- CallBase *CB = Elt.first;
- const int InlineHistoryID = Elt.second;
- const PriorityT Goodness = PriorityT::evaluate(CB);
-
- Heap.push_back({CB, Goodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- InlineHistoryMap[CB] = InlineHistoryID;
- }
-
- T pop() override {
- assert(size() > 0);
- adjust();
-
- CallBase *CB = Heap.front().first;
- T Result = std::make_pair(CB, InlineHistoryMap[CB]);
- InlineHistoryMap.erase(CB);
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- return Result;
- }
-
- const_reference front() override {
- assert(size() > 0);
- adjust();
-
- CallBase *CB = Heap.front().first;
- return *InlineHistoryMap.find(CB);
- }
-
- void erase_if(function_ref<bool(T)> Pred) override {
- auto PredWrapper = [=](HeapT P) -> bool {
- return Pred(std::make_pair(P.first, 0));
- };
- Heap.erase(std::remove_if(Heap.begin(), Heap.end(), PredWrapper),
- Heap.end());
- std::make_heap(Heap.begin(), Heap.end(), cmp);
- }
-
-private:
- SmallVector<HeapT, 16> Heap;
- DenseMap<CallBase *, int> InlineHistoryMap;
-};
-
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
@@ -868,7 +770,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// incrementally maknig a single function grow in a super linear fashion.
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<Priority>>();
+ Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
else
Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
assert(Calls != nullptr && "Expected an initialized InlineOrder");
@@ -972,8 +874,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
- auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
+ std::unique_ptr<InlineAdvice> Advice =
+ Advisor.getAdvice(*CB, OnlyMandatory);
+
// Check whether we want to inline this callsite.
+ if (!Advice)
+ continue;
+
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
continue;
@@ -1104,6 +1011,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
UR.InlinedInternalEdges.insert({&N, OldC});
}
InlinedCallees.clear();
+
+ // Invalidate analyses for this function now so that we don't have to
+ // invalidate analyses for all functions in this SCC later.
+ FAM.invalidate(F, PreservedAnalyses::none());
}
// Now that we've finished inlining all of the calls across this SCC, delete
@@ -1147,10 +1058,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!Changed)
return PreservedAnalyses::all();
+ PreservedAnalyses PA;
// Even if we change the IR, we update the core CGSCC data structures and so
// can preserve the proxy to the function analysis manager.
- PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We have already invalidated all analyses on modified functions.
+ PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}
@@ -1173,7 +1086,11 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
+ if (!IAA.tryCreate(Params, Mode,
+ {CGSCCInlineReplayFile,
+ CGSCCInlineReplayScope,
+ CGSCCInlineReplayFallback,
+ {CGSCCInlineReplayFormat}})) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
@@ -1192,10 +1109,39 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
else
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
+
+ MPM.addPass(std::move(AfterCGMPM));
MPM.run(M, MAM);
- IAA.clear();
+ // Discard the InlineAdvisor, a subsequent inlining session should construct
+ // its own.
+ auto PA = PreservedAnalyses::all();
+ PA.abandon<InlineAdvisorAnalysis>();
+ return PA;
+}
- // The ModulePassManager has already taken care of invalidating analyses.
- return PreservedAnalyses::all();
+void InlinerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<InlinerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ if (OnlyMandatory)
+ OS << "<only-mandatory>";
+}
+
+void ModuleInlinerWrapperPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ // Print some info about passes added to the wrapper. This is however
+ // incomplete as InlineAdvisorAnalysis part isn't included (which also depends
+ // on Params and Mode).
+ if (!MPM.isEmpty()) {
+ MPM.printPipeline(OS, MapClassName2PassName);
+ OS << ",";
+ }
+ OS << "cgscc(";
+ if (MaxDevirtIterations != 0)
+ OS << "devirt<" << MaxDevirtIterations << ">(";
+ PM.printPipeline(OS, MapClassName2PassName);
+ if (MaxDevirtIterations != 0)
+ OS << ")";
+ OS << ")";
}
diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp
index db3b4384ce67..692e445cb7cb 100644
--- a/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -201,21 +201,6 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
AlwaysPreserved.insert(V->getName());
}
- // Mark all functions not in the api as internal.
- IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
- for (Function &I : M) {
- if (!maybeInternalize(I, ComdatMap))
- continue;
- Changed = true;
-
- if (ExternalNode)
- // Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
-
- ++NumFunctions;
- LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
- }
-
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
// FIXME: Shouldn't this just filter on llvm.metadata section??
@@ -237,6 +222,21 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
else
AlwaysPreserved.insert("__stack_chk_guard");
+ // Mark all functions not in the api as internal.
+ IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ComdatMap))
+ continue;
+ Changed = true;
+
+ if (ExternalNode)
+ // Remove a callgraph edge from the external node to this function.
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
+
+ ++NumFunctions;
+ LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
+ }
+
// Mark all global variables with initializers that are not in the api as
// internal as well.
for (auto &GV : M.globals()) {
diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index a497c0390bce..d9a59dd35fde 100644
--- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -283,3 +283,13 @@ PreservedAnalyses LoopExtractorPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
+
+void LoopExtractorPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopExtractorPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (NumLoops == 1)
+ OS << "single";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index b492b200c6d5..f78971f0e586 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -342,7 +342,8 @@ private:
struct ScopedSaveAliaseesAndUsed {
Module &M;
SmallVector<GlobalValue *, 4> Used, CompilerUsed;
- std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases;
+ std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases;
+ std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs;
ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
// The users of this class want to replace all function references except
@@ -362,13 +363,16 @@ struct ScopedSaveAliaseesAndUsed {
if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
GV->eraseFromParent();
- for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) {
+ for (auto &GA : M.aliases()) {
// FIXME: This should look past all aliases not just interposable ones,
// see discussion on D65118.
- if (auto *F =
- dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts()))
- FunctionAliases.push_back({&GIS, F});
+ if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts()))
+ FunctionAliases.push_back({&GA, F});
}
+
+ for (auto &GI : M.ifuncs())
+ if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts()))
+ ResolverIFuncs.push_back({&GI, F});
}
~ScopedSaveAliaseesAndUsed() {
@@ -376,8 +380,15 @@ struct ScopedSaveAliaseesAndUsed {
appendToCompilerUsed(M, CompilerUsed);
for (auto P : FunctionAliases)
- P.first->setIndirectSymbol(
+ P.first->setAliasee(
ConstantExpr::getBitCast(P.second, P.first->getType()));
+
+ for (auto P : ResolverIFuncs) {
+ // This does not preserve pointer casts that may have been stripped by the
+ // constructor, but the resolver's type is different from that of the
+ // ifunc anyway.
+ P.first->setResolver(P.second);
+ }
}
};
@@ -1550,17 +1561,28 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
F->getType());
- if (Functions[I]->isExported()) {
- if (IsJumpTableCanonical) {
- ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
- } else {
- GlobalAlias *JtAlias = GlobalAlias::create(
- F->getValueType(), 0, GlobalValue::ExternalLinkage,
- F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+
+ const bool IsExported = Functions[I]->isExported();
+ if (!IsJumpTableCanonical) {
+ GlobalValue::LinkageTypes LT = IsExported
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+ GlobalAlias *JtAlias = GlobalAlias::create(F->getValueType(), 0, LT,
+ F->getName() + ".cfi_jt",
+ CombinedGlobalElemPtr, &M);
+ if (IsExported)
JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ else
+ appendToUsed(M, {JtAlias});
+ }
+
+ if (IsExported) {
+ if (IsJumpTableCanonical)
+ ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
+ else
ExportSummary->cfiFunctionDecls().insert(std::string(F->getName()));
- }
}
+
if (!IsJumpTableCanonical) {
if (F->hasExternalWeakLinkage())
replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
@@ -1751,11 +1773,7 @@ static bool isDirectCall(Use& U) {
void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
bool IsJumpTableCanonical) {
SmallSetVector<Constant *, 4> Constants;
- auto UI = Old->use_begin(), E = Old->use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
-
+ for (Use &U : llvm::make_early_inc_range(Old->uses())) {
// Skip block addresses
if (isa<BlockAddress>(U.getUser()))
continue;
@@ -1792,12 +1810,11 @@ bool LowerTypeTestsModule::lower() {
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
if (DropTypeTests && TypeTestFunc) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = cast<CallInst>(U.getUser());
// Find and erase llvm.assume intrinsics for this llvm.type.test call.
- for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;)
- if (auto *Assume = dyn_cast<AssumeInst>((*CIU++).getUser()))
+ for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
+ if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
Assume->eraseFromParent();
// If the assume was merged with another assume, we might have a use on a
// phi (which will feed the assume). Simply replace the use on the phi
@@ -1835,13 +1852,9 @@ bool LowerTypeTestsModule::lower() {
return false;
if (ImportSummary) {
- if (TypeTestFunc) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
- importTypeTest(CI);
- }
- }
+ if (TypeTestFunc)
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses()))
+ importTypeTest(cast<CallInst>(U.getUser()));
if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
report_fatal_error(
@@ -2100,11 +2113,11 @@ bool LowerTypeTestsModule::lower() {
auto CI = cast<CallInst>(U.getUser());
std::vector<GlobalTypeMember *> Targets;
- if (CI->getNumArgOperands() % 2 != 1)
+ if (CI->arg_size() % 2 != 1)
report_fatal_error("number of arguments should be odd");
GlobalClassesTy::member_iterator CurSet;
- for (unsigned I = 1; I != CI->getNumArgOperands(); I += 2) {
+ for (unsigned I = 1; I != CI->arg_size(); I += 2) {
int64_t Offset;
auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
CI->getOperand(I), Offset, M.getDataLayout()));
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 9e6dd879ac01..97ef872c5499 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -463,17 +463,15 @@ bool MergeFunctions::runOnModule(Module &M) {
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
- for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
- Use *U = &*UI;
- ++UI;
- CallBase *CB = dyn_cast<CallBase>(U->getUser());
- if (CB && CB->isCallee(U)) {
+ for (Use &U : llvm::make_early_inc_range(Old->uses())) {
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
+ if (CB && CB->isCallee(&U)) {
// Do not copy attributes from the called function to the call-site.
// Function comparison ensures that the attributes are the same up to
// type congruences in byval(), in which case we need to keep the byval
// type of the call-site, not the callee function.
remove(CB->getFunction());
- U->set(BitcastNew);
+ U.set(BitcastNew);
}
}
}
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
new file mode 100644
index 000000000000..ebf080e87c3b
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -0,0 +1,354 @@
+//===- ModuleInliner.cpp - Code related to module inliner -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls in the module level. It doesn't need any infromation about
+// SCC or call graph, which is different from the SCC inliner. The decisions of
+// which calls are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ModuleInliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineOrder.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cassert>
+#include <functional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "module-inline"
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+
+static cl::opt<bool> InlineEnablePriorityOrder(
+ "module-inline-enable-priority-order", cl::Hidden, cl::init(true),
+ cl::desc("Enable the priority inline order for the module inliner"));
+
+/// Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
+ FunctionAnalysisManager &FAM,
+ Module &M) {
+ if (OwnedAdvisor)
+ return *OwnedAdvisor;
+
+ auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
+ if (!IAA) {
+ // It should still be possible to run the inliner as a stand-alone module
+ // pass, for test scenarios. In that case, we default to the
+ // DefaultInlineAdvisor, which doesn't need to keep state between module
+ // pass runs. It also uses just the default InlineParams. In this case, we
+ // need to use the provided FAM, which is valid for the duration of the
+ // inliner pass, and thus the lifetime of the owned advisor. The one we
+ // would get from the MAM can be invalidated as a result of the inliner's
+ // activity.
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params);
+
+ return *OwnedAdvisor;
+ }
+ assert(IAA->getAdvisor() &&
+ "Expected a present InlineAdvisorAnalysis also have an "
+ "InlineAdvisor initialized");
+ return *IAA->getAdvisor();
+}
+
+static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
+ LibFunc LF;
+
+ // Either this is a normal library function or a "vectorizable"
+ // function. Not using the VFDatabase here because this query
+ // is related only to libraries handled via the TLI.
+ return TLI.getLibFunc(F, LF) ||
+ TLI.isKnownVectorFunctionInLibrary(F.getName());
+}
+
+PreservedAnalyses ModuleInlinerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
+
+ auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
+ if (!IAA.tryCreate(Params, Mode, {})) {
+ M.getContext().emitError(
+ "Could not setup Inlining Advisor for the requested "
+ "mode and/or options");
+ return PreservedAnalyses::all();
+ }
+
+ bool Changed = false;
+
+ ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M);
+ Advisor.onPassEntry();
+
+ auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
+
+ // In the module inliner, a priority-based worklist is used for calls across
+ // the entire Module. With this module inliner, the inline order is not
+ // limited to bottom-up order. More globally scope inline order is enabled.
+ // Also, the inline deferral logic become unnecessary in this module inliner.
+ // It is possible to use other priority heuristics, e.g. profile-based
+ // heuristic.
+ //
+ // TODO: Here is a huge amount duplicate code between the module inliner and
+ // the SCC inliner, which need some refactoring.
+ std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
+ if (InlineEnablePriorityOrder)
+ Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
+ else
+ Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
+ assert(Calls != nullptr && "Expected an initialized InlineOrder");
+
+ // Populate the initial list of calls in this module.
+ for (Function &F : M) {
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(F))
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction()) {
+ if (!Callee->isDeclaration())
+ Calls->push({CB, -1});
+ else if (!isa<IntrinsicInst>(I)) {
+ using namespace ore;
+ setInlineRemark(*CB, "unavailable definition");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+ << NV("Callee", Callee) << " will not be inlined into "
+ << NV("Caller", CB->getCaller())
+ << " because its definition is unavailable"
+ << setIsVerbose();
+ });
+ }
+ }
+ }
+ if (Calls->empty())
+ return PreservedAnalyses::all();
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+ // Track a set vector of inlined callees so that we can augment the caller
+ // with all of their edges in the call graph before pruning out the ones that
+ // got simplified away.
+ SmallSetVector<Function *, 4> InlinedCallees;
+
+ // Track the dead functions to delete once finished with inlining calls. We
+ // defer deleting these to make it easier to handle the call graph updates.
+ SmallVector<Function *, 4> DeadFunctions;
+
+ // Loop forward over all of the calls.
+ while (!Calls->empty()) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls->front().first->getCaller();
+
+ LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
+ << " Function size: " << F.getInstructionCount()
+ << "\n");
+
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+
+ // Now process as many calls as we have within this caller in the sequence.
+ // We bail out as soon as the caller has to change so we can
+ // prepare the context of that new caller.
+ bool DidInline = false;
+ while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
+ auto P = Calls->pop();
+ CallBase *CB = P.first;
+ const int InlineHistoryID = P.second;
+ Function &Callee = *CB->getCalledFunction();
+
+ if (InlineHistoryID != -1 &&
+ inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(*CB, "recursive");
+ continue;
+ }
+
+ auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false);
+ // Check whether we want to inline this callsite.
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ continue;
+ }
+
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
+ InlineResult IR =
+ InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
+ if (!IR.isSuccess()) {
+ Advice->recordUnsuccessfulInlining(IR);
+ continue;
+ }
+
+ DidInline = true;
+ InlinedCallees.insert(&Callee);
+ ++NumInlined;
+
+ LLVM_DEBUG(dbgs() << " Size after inlining: "
+ << F.getInstructionCount() << "\n");
+
+ // Add any new callsites to defined functions to the worklist.
+ if (!IFI.InlinedCallSites.empty()) {
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({&Callee, InlineHistoryID});
+
+ for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+ Function *NewCallee = ICB->getCalledFunction();
+ if (!NewCallee) {
+ // Try to promote an indirect (virtual) call without waiting for
+ // the post-inline cleanup and the next DevirtSCCRepeatedPass
+ // iteration because the next iteration may not happen and we may
+ // miss inlining it.
+ if (tryPromoteCall(*ICB))
+ NewCallee = ICB->getCalledFunction();
+ }
+ if (NewCallee)
+ if (!NewCallee->isDeclaration())
+ Calls->push({ICB, NewHistoryID});
+ }
+ }
+
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(F, Callee);
+
+ // For local functions, check whether this makes the callee trivially
+ // dead. In that case, we can drop the body of the function eagerly
+ // which may reduce the number of callers of other functions to one,
+ // changing inline cost thresholds.
+ bool CalleeWasDeleted = false;
+ if (Callee.hasLocalLinkage()) {
+ // To check this we also need to nuke any dead constant uses (perhaps
+ // made dead by this operation on other functions).
+ Callee.removeDeadConstantUsers();
+ // if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+ if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) {
+ Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ return Call.first->getCaller() == &Callee;
+ });
+ // Clear the body and queue the function itself for deletion when we
+ // finish inlining.
+ // Note that after this point, it is an error to do anything other
+ // than use the callee's address or delete it.
+ Callee.dropAllReferences();
+ assert(!is_contained(DeadFunctions, &Callee) &&
+ "Cannot put cause a function to become dead twice!");
+ DeadFunctions.push_back(&Callee);
+ CalleeWasDeleted = true;
+ }
+ }
+ if (CalleeWasDeleted)
+ Advice->recordInliningWithCalleeDeleted();
+ else
+ Advice->recordInlining();
+ }
+
+ if (!DidInline)
+ continue;
+ Changed = true;
+
+ InlinedCallees.clear();
+ }
+
+ // Now that we've finished inlining all of the calls across this module,
+ // delete all of the trivially dead functions.
+ //
+ // Note that this walks a pointer set which has non-deterministic order but
+ // that is OK as all we do is delete things and add pointers to unordered
+ // sets.
+ for (Function *DeadF : DeadFunctions) {
+ // Clear out any cached analyses.
+ FAM.clear(*DeadF, DeadF->getName());
+
+ // And delete the actual function from the module.
+ // The Advisor may use Function pointers to efficiently index various
+ // internal maps, e.g. for memoization. Function cleanup passes like
+ // argument promotion create new functions. It is possible for a new
+ // function to be allocated at the address of a deleted function. We could
+ // index using names, but that's inefficient. Alternatively, we let the
+ // Advisor free the functions when it sees fit.
+ DeadF->getBasicBlockList().clear();
+ M.getFunctionList().remove(DeadF);
+
+ ++NumDeleted;
+ }
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b80349352719..f342c35fa283 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -33,6 +34,8 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
@@ -41,6 +44,8 @@
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <algorithm>
+
using namespace llvm;
using namespace omp;
@@ -72,6 +77,46 @@ static cl::opt<bool> HideMemoryTransferLatency(
" transfers"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> DisableOpenMPOptDeglobalization(
+ "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving deglobalization."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptSPMDization(
+ "openmp-opt-disable-spmdization", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptFolding(
+ "openmp-opt-disable-folding", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
+ "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations that replace the state machine."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> PrintModuleAfterOptimizations(
+ "openmp-opt-print-module", cl::ZeroOrMore,
+ cl::desc("Print the current module after OpenMP optimizations."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> AlwaysInlineDeviceFunctions(
+ "openmp-opt-inline-device", cl::ZeroOrMore,
+ cl::desc("Inline all applicible functions on the device."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool>
+ EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
+ cl::desc("Enables more verbose remarks."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<unsigned>
+ SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
+ cl::desc("Maximal number of attributor iterations."),
+ cl::init(256));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -328,7 +373,7 @@ struct OMPInformationCache : public InformationCache {
if (F->arg_size() != RTFArgTypes.size())
return false;
- auto RTFTyIt = RTFArgTypes.begin();
+ auto *RTFTyIt = RTFArgTypes.begin();
for (Argument &Arg : F->args()) {
if (Arg.getType() != *RTFTyIt)
return false;
@@ -503,7 +548,7 @@ struct KernelInfoState : AbstractState {
/// State to track if we are in SPMD-mode, assumed or know, and why we decided
/// we cannot be. If it is assumed, then RequiresFullRuntime should also be
/// false.
- BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
+ BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
/// The __kmpc_target_init call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
@@ -542,7 +587,9 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ ReachedKnownParallelRegions.indicatePessimisticFixpoint();
ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
@@ -550,6 +597,10 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicateOptimisticFixpoint();
+ SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ ReachedKnownParallelRegions.indicateOptimisticFixpoint();
+ ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -569,6 +620,12 @@ struct KernelInfoState : AbstractState {
return true;
}
+ /// Returns true if this kernel contains any OpenMP parallel regions.
+ bool mayContainParallelRegion() {
+ return !ReachedKnownParallelRegions.empty() ||
+ !ReachedUnknownParallelRegions.empty();
+ }
+
/// Return empty set as the best state of potential values.
static KernelInfoState getBestState() { return KernelInfoState(true); }
@@ -584,12 +641,14 @@ struct KernelInfoState : AbstractState {
// Do not merge two different _init and _deinit call sites.
if (KIS.KernelInitCB) {
if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
- indicatePessimisticFixpoint();
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
KernelInitCB = KIS.KernelInitCB;
}
if (KIS.KernelDeinitCB) {
if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
- indicatePessimisticFixpoint();
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
@@ -1032,8 +1091,8 @@ private:
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
+ for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
+ ++U)
Args.push_back(CI->getArgOperand(U));
CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
@@ -1041,9 +1100,9 @@ private:
NewCI->setDebugLoc(CI->getDebugLoc());
// Forward parameter attributes from the callback to the callee.
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
+ for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
+ ++U)
+ for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
NewCI->addParamAttr(
U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
@@ -1563,13 +1622,13 @@ private:
// TODO: Use dominance to find a good position instead.
auto CanBeMoved = [this](CallBase &CB) {
- unsigned NumArgs = CB.getNumArgOperands();
+ unsigned NumArgs = CB.arg_size();
if (NumArgs == 0)
return true;
if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
return false;
- for (unsigned u = 1; u < NumArgs; ++u)
- if (isa<Instruction>(CB.getArgOperand(u)))
+ for (unsigned U = 1; U < NumArgs; ++U)
+ if (isa<Instruction>(CB.getArgOperand(U)))
return false;
return true;
};
@@ -1612,7 +1671,7 @@ private:
// valid at the new location. For now we just pick a global one, either
// existing and used by one of the calls, or created from scratch.
if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
- if (CI->getNumArgOperands() > 0 &&
+ if (!CI->arg_empty() &&
CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
/* GlobalOnly */ true);
@@ -1695,8 +1754,8 @@ private:
// Transitively search for more arguments by looking at the users of the
// ones we know already. During the search the GTIdArgs vector is extended
// so we cannot cache the size nor can we use a range based for.
- for (unsigned u = 0; u < GTIdArgs.size(); ++u)
- AddUserArgs(*GTIdArgs[u]);
+ for (unsigned U = 0; U < GTIdArgs.size(); ++U)
+ AddUserArgs(*GTIdArgs[U]);
}
/// Kernel (=GPU) optimizations and utility functions
@@ -1822,6 +1881,10 @@ private:
OMPRTL___kmpc_kernel_end_parallel);
ExternalizationRAII BarrierSPMD(OMPInfoCache,
OMPRTL___kmpc_barrier_simple_spmd);
+ ExternalizationRAII BarrierGeneric(OMPInfoCache,
+ OMPRTL___kmpc_barrier_simple_generic);
+ ExternalizationRAII ThreadId(OMPInfoCache,
+ OMPRTL___kmpc_get_hardware_thread_id_in_block);
registerAAs(IsModulePass);
@@ -1918,6 +1981,10 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
if (!KernelParallelRFI)
return Changed;
+ // If we have disabled state machine changes, exit
+ if (DisableOpenMPOptStateMachineRewrite)
+ return Changed;
+
for (Function *F : SCC) {
// Check if the function is a use in a __kmpc_parallel_51 call at
@@ -1996,7 +2063,8 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
UndefValue::get(Int8Ty), F->getName() + ".ID");
for (Use *U : ToBeReplacedStateMachineUses)
- U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
+ U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ ID, U->get()->getType()));
++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
@@ -2508,9 +2576,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- // Check if the edge into the successor block compares the __kmpc_target_init
- // result with -1. If we are in non-SPMD-mode that signals only the main
- // thread will execute the edge.
+ // Check if the edge into the successor block contains a condition that only
+ // lets the main thread execute it.
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
if (!Edge || !Edge->isConditional())
return false;
@@ -2525,16 +2592,27 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (!C)
return false;
- // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
+ // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
if (C->isAllOnesValue()) {
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
- const int InitIsSPMDArgNo = 1;
- auto *IsSPMDModeCI =
- dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
- return IsSPMDModeCI && IsSPMDModeCI->isZero();
+ const int InitModeArgNo = 1;
+ auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
+ return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
+ }
+
+ if (C->isZero()) {
+ // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
+ return true;
+
+ // Match: 0 == llvm.amdgcn.workitem.id.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
+ return true;
}
return false;
@@ -2543,15 +2621,14 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// Merge all the predecessor states into the current basic block. A basic
// block is executed by a single thread if all of its predecessors are.
auto MergePredecessorStates = [&](BasicBlock *BB) {
- if (pred_begin(BB) == pred_end(BB))
+ if (pred_empty(BB))
return SingleThreadedBBs.contains(BB);
bool IsInitialThread = true;
- for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
- PredBB != PredEndBB; ++PredBB) {
- if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
BB))
- IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
+ IsInitialThread &= SingleThreadedBBs.contains(PredBB);
}
return IsInitialThread;
@@ -2683,9 +2760,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
- LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "
- << CB->getCaller()->getName() << " with "
- << AllocSize->getZExtValue()
+ LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
+ << " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
// Create a new shared memory buffer of the same size as the allocation
@@ -2734,7 +2810,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
const auto &ED = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
if (CallBase *CB = dyn_cast<CallBase>(U))
- if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
+ if (!isa<ConstantInt>(CB->getArgOperand(0)) ||
!ED.isExecutedByInitialThreadOnly(*CB))
MallocCalls.erase(CB);
}
@@ -2769,9 +2845,17 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
: "") +
std::string(" #PRs: ") +
- std::to_string(ReachedKnownParallelRegions.size()) +
+ (ReachedKnownParallelRegions.isValidState()
+ ? std::to_string(ReachedKnownParallelRegions.size())
+ : "<invalid>") +
", #Unknown PRs: " +
- std::to_string(ReachedUnknownParallelRegions.size());
+ (ReachedUnknownParallelRegions.isValidState()
+ ? std::to_string(ReachedUnknownParallelRegions.size())
+ : "<invalid>") +
+ ", #Reaching Kernels: " +
+ (ReachingKernelEntries.isValidState()
+ ? std::to_string(ReachingKernelEntries.size())
+ : "<invalid>");
}
/// Create an abstract attribute biew for the position \p IRP.
@@ -2797,6 +2881,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
: AAKernelInfo(IRP, A) {}
+ SmallPtrSet<Instruction *, 4> GuardedInstructions;
+
+ SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
+ return GuardedInstructions;
+ }
+
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
@@ -2843,8 +2933,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
},
Fn);
- assert((KernelInitCB && KernelDeinitCB) &&
- "Kernel without __kmpc_target_init or __kmpc_target_deinit!");
+ // Ignore kernels without initializers such as global constructors.
+ if (!KernelInitCB || !KernelDeinitCB) {
+ indicateOptimisticFixpoint();
+ return;
+ }
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
@@ -2859,7 +2952,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
// state. As long as we are not in an invalid state, we will create a
// custom state machine so the value should be a `i1 false`. If we are
// in an invalid state, we won't change the value that is in the IR.
- if (!isValidState())
+ if (!ReachedKnownParallelRegions.isValidState())
+ return nullptr;
+ // If we have disabled state machine rewrites, don't make a custom one.
+ if (DisableOpenMPOptStateMachineRewrite)
return nullptr;
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
@@ -2869,7 +2965,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
return FalseVal;
};
- Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
+ Attributor::SimplifictionCallbackTy ModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "SPMDCompatibilityTracker" argument of an
@@ -2885,8 +2981,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
} else {
UsedAssumedInformation = false;
}
- auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
- SPMDCompatibilityTracker.isAssumed());
+ auto *Val = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
+ SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
+ : OMP_TGT_EXEC_MODE_GENERIC);
return Val;
};
@@ -2911,8 +3009,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
return Val;
};
- constexpr const int InitIsSPMDArgNo = 1;
- constexpr const int DeinitIsSPMDArgNo = 1;
+ constexpr const int InitModeArgNo = 1;
+ constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
constexpr const int InitRequiresFullRuntimeArgNo = 3;
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
@@ -2920,11 +3018,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
- IsSPMDModeSimplifyCB);
+ IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
+ ModeSimplifyCB);
A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
- IsSPMDModeSimplifyCB);
+ IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
+ ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB,
InitRequiresFullRuntimeArgNo),
@@ -2935,10 +3033,25 @@ struct AAKernelInfoFunction : AAKernelInfo {
IsGenericModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
- ConstantInt *IsSPMDArg =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
- if (IsSPMDArg && !IsSPMDArg->isZero())
+ ConstantInt *ModeArg =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
+ if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ // This is a generic region but SPMDization is disabled so stop tracking.
+ else if (DisableOpenMPOptSPMDization)
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ }
+
+ /// Sanitize the string \p S such that it is a suitable global symbol name.
+ static std::string sanitizeForGlobalName(std::string S) {
+ std::replace_if(
+ S.begin(), S.end(),
+ [](const char C) {
+ return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
+ (C >= '0' && C <= '9') || C == '_');
+ },
+ '.');
+ return S;
}
/// Modify the IR based on the KernelInfoState as the fixpoint iteration is
@@ -2949,19 +3062,16 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
- // Known SPMD-mode kernels need no manifest changes.
- if (SPMDCompatibilityTracker.isKnown())
- return ChangeStatus::UNCHANGED;
-
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
- if (!changeToSPMDMode(A))
- buildCustomStateMachine(A);
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ if (!changeToSPMDMode(A, Changed))
+ return buildCustomStateMachine(A);
- return ChangeStatus::CHANGED;
+ return Changed;
}
- bool changeToSPMDMode(Attributor &A) {
+ bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!SPMDCompatibilityTracker.isAssumed()) {
@@ -2993,38 +3103,259 @@ struct AAKernelInfoFunction : AAKernelInfo {
return false;
}
- // Adjust the global exec mode flag that tells the runtime what mode this
- // kernel is executed in.
+ // Check if the kernel is already in SPMD mode, if so, return success.
Function *Kernel = getAnchorScope();
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
- assert(ExecMode->getInitializer() &&
- ExecMode->getInitializer()->isOneValue() &&
- "Initially non-SPMD kernel has SPMD exec mode!");
+ assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
// Set the global exec mode flag to indicate SPMD-Generic mode.
- constexpr int SPMDGeneric = 2;
- if (!ExecMode->getInitializer()->isZeroValue())
- ExecMode->setInitializer(
- ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));
+ assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
+ "ExecMode is not an integer!");
+ const int8_t ExecModeVal =
+ cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
+ if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
+ return true;
+
+ // We will now unconditionally modify the IR, indicate a change.
+ Changed = ChangeStatus::CHANGED;
+
+ auto CreateGuardedRegion = [&](Instruction *RegionStartI,
+ Instruction *RegionEndI) {
+ LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+ MemorySSAUpdater *MSU = nullptr;
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+ BasicBlock *ParentBB = RegionStartI->getParent();
+ Function *Fn = ParentBB->getParent();
+ Module &M = *Fn->getParent();
+
+ // Create all the blocks and logic.
+ // ParentBB:
+ // goto RegionCheckTidBB
+ // RegionCheckTidBB:
+ // Tid = __kmpc_hardware_thread_id()
+ // if (Tid != 0)
+ // goto RegionBarrierBB
+ // RegionStartBB:
+ // <execute instructions guarded>
+ // goto RegionEndBB
+ // RegionEndBB:
+ // <store escaping values to shared mem>
+ // goto RegionBarrierBB
+ // RegionBarrierBB:
+ // __kmpc_simple_barrier_spmd()
+ // // second barrier is omitted if lacking escaping values.
+ // <load escaping values from shared mem>
+ // __kmpc_simple_barrier_spmd()
+ // goto RegionExitBB
+ // RegionExitBB:
+ // <execute rest of instructions>
+
+ BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
+ DT, LI, MSU, "region.guarded.end");
+ BasicBlock *RegionBarrierBB =
+ SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
+ MSU, "region.barrier");
+ BasicBlock *RegionExitBB =
+ SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
+ DT, LI, MSU, "region.exit");
+ BasicBlock *RegionStartBB =
+ SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
+
+ assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
+ "Expected a different CFG");
+
+ BasicBlock *RegionCheckTidBB = SplitBlock(
+ ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
+
+ // Register basic blocks with the Attributor.
+ A.registerManifestAddedBasicBlock(*RegionEndBB);
+ A.registerManifestAddedBasicBlock(*RegionBarrierBB);
+ A.registerManifestAddedBasicBlock(*RegionExitBB);
+ A.registerManifestAddedBasicBlock(*RegionStartBB);
+ A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
+
+ bool HasBroadcastValues = false;
+ // Find escaping outputs from the guarded region to outside users and
+ // broadcast their values to them.
+ for (Instruction &I : *RegionStartBB) {
+ SmallPtrSet<Instruction *, 4> OutsideUsers;
+ for (User *Usr : I.users()) {
+ Instruction &UsrI = *cast<Instruction>(Usr);
+ if (UsrI.getParent() != RegionStartBB)
+ OutsideUsers.insert(&UsrI);
+ }
+
+ if (OutsideUsers.empty())
+ continue;
+
+ HasBroadcastValues = true;
+
+ // Emit a global variable in shared memory to store the broadcasted
+ // value.
+ auto *SharedMem = new GlobalVariable(
+ M, I.getType(), /* IsConstant */ false,
+ GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
+ sanitizeForGlobalName(
+ (I.getName() + ".guarded.output.alloc").str()),
+ nullptr, GlobalValue::NotThreadLocal,
+ static_cast<unsigned>(AddressSpace::Shared));
+
+ // Emit a store instruction to update the value.
+ new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
+
+ LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
+ I.getName() + ".guarded.output.load",
+ RegionBarrierBB->getTerminator());
+
+ // Emit a load instruction and replace uses of the output value.
+ for (Instruction *UsrI : OutsideUsers)
+ UsrI->replaceUsesOfWith(&I, LoadI);
+ }
+
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+
+ // Go to tid check BB in ParentBB.
+ const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
+ ParentBB->getTerminator()->eraseFromParent();
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(ParentBB, ParentBB->end()), DL);
+ OMPInfoCache.OMPBuilder.updateToLocation(Loc);
+ auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc);
+ Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr);
+ BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
+
+ // Add check for Tid in RegionCheckTidBB
+ RegionCheckTidBB->getTerminator()->eraseFromParent();
+ OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
+ InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
+ OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
+ FunctionCallee HardwareTidFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ Value *Tid =
+ OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
+ Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
+ OMPInfoCache.OMPBuilder.Builder
+ .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
+ ->setDebugLoc(DL);
+
+ // First barrier for synchronization, ensures main thread has updated
+ // values.
+ FunctionCallee BarrierFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_barrier_simple_spmd);
+ OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
+ RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
+ OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid})
+ ->setDebugLoc(DL);
+
+ // Second barrier ensures workers have read broadcast values.
+ if (HasBroadcastValues)
+ CallInst::Create(BarrierFn, {Ident, Tid}, "",
+ RegionBarrierBB->getTerminator())
+ ->setDebugLoc(DL);
+ };
+
+ auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ for (Instruction *GuardedI : SPMDCompatibilityTracker) {
+ BasicBlock *BB = GuardedI->getParent();
+ if (!Visited.insert(BB).second)
+ continue;
+
+ SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
+ Instruction *LastEffect = nullptr;
+ BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
+ while (++IP != IPEnd) {
+ if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
+ continue;
+ Instruction *I = &*IP;
+ if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
+ continue;
+ if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
+ LastEffect = nullptr;
+ continue;
+ }
+ if (LastEffect)
+ Reorders.push_back({I, LastEffect});
+ LastEffect = &*IP;
+ }
+ for (auto &Reorder : Reorders)
+ Reorder.first->moveBefore(Reorder.second);
+ }
+
+ SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
+
+ for (Instruction *GuardedI : SPMDCompatibilityTracker) {
+ BasicBlock *BB = GuardedI->getParent();
+ auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
+ IRPosition::function(*GuardedI->getFunction()), nullptr,
+ DepClassTy::NONE);
+ assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
+ auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
+ // Continue if instruction is already guarded.
+ if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
+ continue;
+
+ Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
+ for (Instruction &I : *BB) {
+ // If instruction I needs to be guarded update the guarded region
+ // bounds.
+ if (SPMDCompatibilityTracker.contains(&I)) {
+ CalleeAAFunction.getGuardedInstructions().insert(&I);
+ if (GuardedRegionStart)
+ GuardedRegionEnd = &I;
+ else
+ GuardedRegionStart = GuardedRegionEnd = &I;
+
+ continue;
+ }
+
+ // Instruction I does not need guarding, store
+ // any region found and reset bounds.
+ if (GuardedRegionStart) {
+ GuardedRegions.push_back(
+ std::make_pair(GuardedRegionStart, GuardedRegionEnd));
+ GuardedRegionStart = nullptr;
+ GuardedRegionEnd = nullptr;
+ }
+ }
+ }
+
+ for (auto &GR : GuardedRegions)
+ CreateGuardedRegion(GR.first, GR.second);
+
+ // Adjust the global exec mode flag that tells the runtime what mode this
+ // kernel is executed in.
+ assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
+ "Initially non-SPMD kernel has SPMD exec mode!");
+ ExecMode->setInitializer(
+ ConstantInt::get(ExecMode->getInitializer()->getType(),
+ ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
- const int InitIsSPMDArgNo = 1;
- const int DeinitIsSPMDArgNo = 1;
+ const int InitModeArgNo = 1;
+ const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
const int InitRequiresFullRuntimeArgNo = 3;
const int DeinitRequiresFullRuntimeArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
- A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
- *ConstantInt::getBool(Ctx, 1));
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
*ConstantInt::getBool(Ctx, 0));
A.changeUseAfterManifest(
- KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
- *ConstantInt::getBool(Ctx, 1));
+ KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, 0));
@@ -3042,10 +3373,15 @@ struct AAKernelInfoFunction : AAKernelInfo {
};
ChangeStatus buildCustomStateMachine(Attributor &A) {
- assert(ReachedKnownParallelRegions.isValidState() &&
- "Custom state machine with invalid parallel region states?");
+ // If we have disabled state machine rewrites, don't make a custom one
+ if (DisableOpenMPOptStateMachineRewrite)
+ return ChangeStatus::UNCHANGED;
- const int InitIsSPMDArgNo = 1;
+ // Don't rewrite the state machine if we are not in a valid state.
+ if (!ReachedKnownParallelRegions.isValidState())
+ return ChangeStatus::UNCHANGED;
+
+ const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
// Check if the current configuration is non-SPMD and generic state machine.
@@ -3054,14 +3390,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// we give up.
ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
- ConstantInt *IsSPMD =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
+ ConstantInt *Mode =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
- if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
- !IsSPMD->isZero())
+ if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
+ (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
@@ -3074,8 +3410,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// happen if there simply are no parallel regions. In the resulting kernel
// all worker threads will simply exit right away, leaving the main thread
// to do the work alone.
- if (ReachedKnownParallelRegions.empty() &&
- ReachedUnknownParallelRegions.empty()) {
+ if (!mayContainParallelRegion()) {
++NumOpenMPTargetRegionKernelsWithoutStateMachine;
auto Remark = [&](OptimizationRemark OR) {
@@ -3121,9 +3456,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Create all the blocks:
//
// InitCB = __kmpc_target_init(...)
- // bool IsWorker = InitCB >= 0;
+ // BlockHwSize =
+ // __kmpc_get_hardware_num_threads_in_block();
+ // WarpSize = __kmpc_get_warp_size();
+ // BlockSize = BlockHwSize - WarpSize;
+ // if (InitCB >= BlockSize) return;
+ // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
// if (IsWorker) {
- // SMBeginBB: __kmpc_barrier_simple_spmd(...);
+ // SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
// if (!WorkFn) return;
@@ -3137,7 +3477,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// ((WorkFnTy*)WorkFn)(...);
// SMEndParallelBB: __kmpc_kernel_end_parallel(...);
// }
- // SMDoneBB: __kmpc_barrier_simple_spmd(...);
+ // SMDoneBB: __kmpc_barrier_simple_generic(...);
// goto SMBeginBB;
// }
// UserCodeEntryBB: // user code
@@ -3149,6 +3489,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
BasicBlock *InitBB = KernelInitCB->getParent();
BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
KernelInitCB->getNextNode(), "thread.user_code.check");
+ BasicBlock *IsWorkerCheckBB =
+ BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineBeginBB = BasicBlock::Create(
Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
@@ -3165,6 +3507,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
A.registerManifestAddedBasicBlock(*InitBB);
A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
+ A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
@@ -3174,22 +3517,47 @@ struct AAKernelInfoFunction : AAKernelInfo {
const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
-
InitBB->getTerminator()->eraseFromParent();
+
+ Module &M = *Kernel->getParent();
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ FunctionCallee BlockHwSizeFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
+ FunctionCallee WarpSizeFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_warp_size);
+ Instruction *BlockHwSize =
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ BlockHwSize->setDebugLoc(DLoc);
+ Instruction *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ WarpSize->setDebugLoc(DLoc);
+ Instruction *BlockSize =
+ BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ BlockSize->setDebugLoc(DLoc);
+ Instruction *IsMainOrWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
+ BlockSize, "thread.is_main_or_worker", InitBB);
+ IsMainOrWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
+ InitBB);
+
Instruction *IsWorker =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", InitBB);
+ "thread.is_worker", IsWorkerCheckBB);
IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
+ BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
+ IsWorkerCheckBB);
// Create local storage for the work function pointer.
+ const DataLayout &DL = M.getDataLayout();
Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
- AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr",
- &Kernel->getEntryBlock().front());
+ Instruction *WorkFnAI =
+ new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
+ "worker.work_fn.addr", &Kernel->getEntryBlock().front());
WorkFnAI->setDebugLoc(DLoc);
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
OMPInfoCache.OMPBuilder.updateToLocation(
OpenMPIRBuilder::LocationDescription(
IRBuilder<>::InsertPoint(StateMachineBeginBB,
@@ -3199,13 +3567,23 @@ struct AAKernelInfoFunction : AAKernelInfo {
Value *Ident = KernelInitCB->getArgOperand(0);
Value *GTid = KernelInitCB;
- Module &M = *Kernel->getParent();
FunctionCallee BarrierFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___kmpc_barrier_simple_spmd);
+ M, OMPRTL___kmpc_barrier_simple_generic);
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
->setDebugLoc(DLoc);
+ if (WorkFnAI->getType()->getPointerAddressSpace() !=
+ (unsigned int)AddressSpace::Generic) {
+ WorkFnAI = new AddrSpaceCastInst(
+ WorkFnAI,
+ PointerType::getWithSamePointeeType(
+ cast<PointerType>(WorkFnAI->getType()),
+ (unsigned int)AddressSpace::Generic),
+ WorkFnAI->getName() + ".generic", StateMachineBeginBB);
+ WorkFnAI->setDebugLoc(DLoc);
+ }
+
FunctionCallee KernelParallelFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_kernel_parallel);
@@ -3243,8 +3621,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Now that we have most of the CFG skeleton it is time for the if-cascade
// that checks the function pointer we got from the runtime against the
// parallel regions we expect, if there are any.
- for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) {
- auto *ParallelRegion = ReachedKnownParallelRegions[i];
+ for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
+ auto *ParallelRegion = ReachedKnownParallelRegions[I];
BasicBlock *PRExecuteBB = BasicBlock::Create(
Ctx, "worker_state_machine.parallel_region.execute", Kernel,
StateMachineEndParallelBB);
@@ -3260,7 +3638,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Check if we need to compare the pointer at all or if we can just
// call the parallel region function.
Value *IsPR;
- if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) {
+ if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
Instruction *CmpI = ICmpInst::Create(
ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
"worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
@@ -3324,8 +3702,21 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (llvm::all_of(Objects,
[](const Value *Obj) { return isa<AllocaInst>(Obj); }))
return true;
+ // Check for AAHeapToStack moved objects which must not be guarded.
+ auto &HS = A.getAAFor<AAHeapToStack>(
+ *this, IRPosition::function(*I.getFunction()),
+ DepClassTy::OPTIONAL);
+ if (llvm::all_of(Objects, [&HS](const Value *Obj) {
+ auto *CB = dyn_cast<CallBase>(Obj);
+ if (!CB)
+ return false;
+ return HS.isAssumedHeapToStack(*CB);
+ })) {
+ return true;
+ }
}
- // For now we give up on everything but stores.
+
+ // Insert instruction that needs guarding.
SPMDCompatibilityTracker.insert(&I);
return true;
};
@@ -3339,9 +3730,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!IsKernelEntry) {
updateReachingKernelEntries(A);
updateParallelLevels(A);
+
+ if (!ParallelLevels.isValidState())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
}
// Callback to check a call instruction.
+ bool AllParallelRegionStatesWereFixed = true;
bool AllSPMDStatesWereFixed = true;
auto CheckCallInst = [&](Instruction &I) {
auto &CB = cast<CallBase>(I);
@@ -3349,13 +3744,37 @@ struct AAKernelInfoFunction : AAKernelInfo {
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
getState() ^= CBAA.getState();
AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
+ AllParallelRegionStatesWereFixed &=
+ CBAA.ReachedKnownParallelRegions.isAtFixpoint();
+ AllParallelRegionStatesWereFixed &=
+ CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
return true;
};
bool UsedAssumedInformationInCheckCallInst = false;
if (!A.checkForAllCallLikeInstructions(
- CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
+ CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
+ LLVM_DEBUG(dbgs() << TAG
+ << "Failed to visit all call-like instructions!\n";);
return indicatePessimisticFixpoint();
+ }
+
+ // If we haven't used any assumed information for the reached parallel
+ // region states we can fix it.
+ if (!UsedAssumedInformationInCheckCallInst &&
+ AllParallelRegionStatesWereFixed) {
+ ReachedKnownParallelRegions.indicateOptimisticFixpoint();
+ ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
+ }
+
+ // If we are sure there are no parallel regions in the kernel we do not
+ // want SPMD mode.
+ if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
+ ReachedKnownParallelRegions.isAtFixpoint() &&
+ ReachedUnknownParallelRegions.isValidState() &&
+ ReachedKnownParallelRegions.isValidState() &&
+ !mayContainParallelRegion())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
// If we haven't used any assumed information for the SPMD state we can fix
// it.
@@ -3454,14 +3873,14 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *Callee = getAssociatedFunction();
- // Helper to lookup an assumption string.
- auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
- return Fn && hasAssumption(*Fn, AssumptionStr);
- };
+ auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
// Check for SPMD-mode assumptions.
- if (HasAssumption(Callee, "ompx_spmd_amenable"))
+ if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ indicateOptimisticFixpoint();
+ }
// First weed out calls we do not care about, that is readonly/readnone
// calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
@@ -3483,14 +3902,16 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// Unknown callees might contain parallel regions, except if they have
// an appropriate assumption attached.
- if (!(HasAssumption(Callee, "omp_no_openmp") ||
- HasAssumption(Callee, "omp_no_parallelism")))
+ if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
+ AssumptionAA.hasAssumption("omp_no_parallelism")))
ReachedUnknownParallelRegions.insert(&CB);
// If SPMDCompatibilityTracker is not fixed, we need to give up on the
// idea we can run something unknown in SPMD-mode.
- if (!SPMDCompatibilityTracker.isAtFixpoint())
+ if (!SPMDCompatibilityTracker.isAtFixpoint()) {
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
+ }
// We have updated the state for this unknown call properly, there won't
// be any change so we indicate a fixpoint.
@@ -3506,6 +3927,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
switch (RF) {
// All the functions we know are compatible with SPMD mode.
case OMPRTL___kmpc_is_spmd_exec_mode:
+ case OMPRTL___kmpc_distribute_static_fini:
case OMPRTL___kmpc_for_static_fini:
case OMPRTL___kmpc_global_thread_num:
case OMPRTL___kmpc_get_hardware_num_threads_in_block:
@@ -3516,6 +3938,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPRTL___kmpc_end_master:
case OMPRTL___kmpc_barrier:
break;
+ case OMPRTL___kmpc_distribute_static_init_4:
+ case OMPRTL___kmpc_distribute_static_init_4u:
+ case OMPRTL___kmpc_distribute_static_init_8:
+ case OMPRTL___kmpc_distribute_static_init_8u:
case OMPRTL___kmpc_for_static_init_4:
case OMPRTL___kmpc_for_static_init_4u:
case OMPRTL___kmpc_for_static_init_8:
@@ -3533,6 +3959,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPScheduleType::DistributeChunked:
break;
default:
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
break;
};
@@ -3565,7 +3992,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
return;
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
- // generally.
+ // generally. However, they do not hide parallel regions.
SPMDCompatibilityTracker.insert(&CB);
break;
}
@@ -3685,6 +4112,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
}
void initialize(Attributor &A) override {
+ if (DisableOpenMPOptFolding)
+ indicatePessimisticFixpoint();
+
Function *Callee = getAssociatedFunction();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -3741,11 +4171,24 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
- Instruction &CB = *getCtxI();
- A.changeValueAfterManifest(CB, **SimplifiedValue);
- A.deleteAfterManifest(CB);
+ Instruction &I = *getCtxI();
+ A.changeValueAfterManifest(I, **SimplifiedValue);
+ A.deleteAfterManifest(I);
+
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ auto Remark = [&](OptimizationRemark OR) {
+ if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
+ return OR << "Replacing OpenMP runtime call "
+ << CB->getCalledFunction()->getName() << " with "
+ << ore::NV("FoldedValue", C->getZExtValue()) << ".";
+ return OR << "Replacing OpenMP runtime call "
+ << CB->getCalledFunction()->getName() << ".";
+ };
- LLVM_DEBUG(dbgs() << TAG << "Folding runtime call: " << CB << " with "
+ if (CB && EnableVerboseRemarks)
+ A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
+
+ LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
<< **SimplifiedValue << "\n");
Changed = ChangeStatus::CHANGED;
@@ -3979,7 +4422,6 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
-
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
@@ -4012,7 +4454,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
return false;
};
- GlobalizationRFI.foreachUse(SCC, CreateAA);
+ if (!DisableOpenMPOptDeglobalization)
+ GlobalizationRFI.foreachUse(SCC, CreateAA);
// Create an ExecutionDomain AA for every function and a HeapToStack AA for
// every function if there is a device kernel.
@@ -4024,7 +4467,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
continue;
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
- A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
+ if (!DisableOpenMPOptDeglobalization)
+ A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
for (auto &I : instructions(*F)) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
@@ -4176,28 +4620,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
ORE.emit([&]() {
OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
return ORA << "Could not internalize function. "
- << "Some optimizations may not be possible.";
+ << "Some optimizations may not be possible. [OMP140]";
});
};
// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
- DenseSet<const Function *> InternalizedFuncs;
- if (isOpenMPDevice(M))
+ DenseMap<Function *, Function *> InternalizedMap;
+ if (isOpenMPDevice(M)) {
+ SmallPtrSet<Function *, 16> InternalizeFns;
for (Function &F : M)
if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
!DisableInternalization) {
- if (Attributor::internalizeFunction(F, /* Force */ true)) {
- InternalizedFuncs.insert(&F);
+ if (Attributor::isInternalizable(F)) {
+ InternalizeFns.insert(&F);
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
EmitRemark(F);
}
}
+ Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
+ }
+
// Look at every function in the Module unless it was internalized.
SmallVector<Function *, 16> SCC;
for (Function &F : M)
- if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
+ if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
SCC.push_back(&F);
if (SCC.empty())
@@ -4215,12 +4663,24 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
+
+ // Optionally inline device functions for potentially better performance.
+ if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
+ for (Function &F : M)
+ if (!F.isDeclaration() && !Kernels.contains(&F) &&
+ !F.hasFnAttribute(Attribute::NoInline))
+ F.addFnAttr(Attribute::AlwaysInline);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
+
if (Changed)
return PreservedAnalyses::none();
@@ -4267,12 +4727,17 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
+
if (Changed)
return PreservedAnalyses::none();
@@ -4333,12 +4798,18 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
Allocator,
/*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
- return OMPOpt.run(false);
+ bool Result = OMPOpt.run(false);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
+
+ return Result;
}
bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index d517de38ace3..7402e399a88a 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -441,9 +441,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
};
auto BBProfileCount = [BFI](BasicBlock *BB) {
- return BFI->getBlockProfileCount(BB)
- ? BFI->getBlockProfileCount(BB).getValue()
- : 0;
+ return BFI->getBlockProfileCount(BB).getValueOr(0);
};
// Use the same computeBBInlineCost function to compute the cost savings of
@@ -1413,7 +1411,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
uint64_t CalleeEntryCountV =
- (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
+ (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
bool AnyInline = false;
for (User *User : Users) {
@@ -1461,8 +1459,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (AnyInline) {
Cloner.IsFunctionInlined = true;
if (CalleeEntryCount)
- Cloner.OrigFunc->setEntryCount(
- CalleeEntryCount.setCount(CalleeEntryCountV));
+ Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
+ CalleeEntryCountV, CalleeEntryCount->getType()));
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
OrigFuncORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index aa916345954d..74f68531b89a 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -437,6 +437,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
+ // The matrix extension can introduce large vector operations early, which can
+ // benefit from running vector-combine early on.
+ if (EnableMatrix)
+ MPM.add(createVectorCombinePass());
+
// Begin the loop pass pipeline.
if (EnableSimpleLoopUnswitch) {
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
@@ -1012,7 +1017,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
// Propage constant function arguments by specializing the functions.
- if (EnableFunctionSpecialization)
+ if (EnableFunctionSpecialization && OptLevel > 2)
PM.add(createFunctionSpecializationPass());
// Propagate constants at call sites into the functions they call. This
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index 081398a390fa..5779553ee732 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -135,6 +135,7 @@ PreservedAnalyses FunctionSpecializationPass::run(Module &M,
return PA;
}
+namespace {
struct FunctionSpecializationLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
FunctionSpecializationLegacyPass() : ModulePass(ID) {}
@@ -175,6 +176,7 @@ struct FunctionSpecializationLegacyPass : public ModulePass {
return runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis);
}
};
+} // namespace
char FunctionSpecializationLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 55b88ac14da5..bae9a1e27e75 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -32,7 +32,7 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
if (CalleeName.empty())
return getHottestChildContext(CallSite);
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end())
return &It->second;
@@ -64,8 +64,8 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
ContextTrieNode &ContextTrieNode::moveToChildContext(
const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
- StringRef ContextStrToRemove, bool DeleteNode) {
- uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
+ uint32_t ContextFramesToRemove, bool DeleteNode) {
+ uint64_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
assert(!AllChildContext.count(Hash) && "Node to remove must exist");
LineLocation OldCallSite = NodeToMove.CallSiteLoc;
ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
@@ -86,10 +86,10 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
FunctionSamples *FSamples = Node->getFunctionSamples();
if (FSamples) {
- FSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FSamples->getContext().promoteOnPath(ContextFramesToRemove);
FSamples->getContext().setState(SyntheticContext);
- LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext()
- << "\n");
+ LLVM_DEBUG(dbgs() << " Context promoted to: "
+ << FSamples->getContext().toString() << "\n");
}
for (auto &It : Node->getAllChildContext()) {
@@ -108,12 +108,12 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
StringRef CalleeName) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
// Note this essentially calls dtor and destroys that child context
AllChildContext.erase(Hash);
}
-std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
+std::map<uint64_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
return AllChildContext;
}
@@ -127,6 +127,15 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
FuncSamples = FSamples;
}
+Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; }
+
+void ContextTrieNode::addFunctionSize(uint32_t FSize) {
+ if (!FuncSize.hasValue())
+ FuncSize = 0;
+
+ FuncSize = FuncSize.getValue() + FSize;
+}
+
LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
ContextTrieNode *ContextTrieNode::getParentContext() const {
@@ -137,9 +146,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
ParentContext = Parent;
}
-void ContextTrieNode::dump() {
+void ContextTrieNode::dumpNode() {
dbgs() << "Node: " << FuncName << "\n"
<< " Callsite: " << CallSiteLoc << "\n"
+ << " Size: " << FuncSize << "\n"
<< " Children:\n";
for (auto &It : AllChildContext) {
@@ -147,20 +157,38 @@ void ContextTrieNode::dump() {
}
}
-uint32_t ContextTrieNode::nodeHash(StringRef ChildName,
+void ContextTrieNode::dumpTree() {
+ dbgs() << "Context Profile Tree:\n";
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(this);
+
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Node->dumpNode();
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ NodeQueue.push(ChildNode);
+ }
+ }
+}
+
+uint64_t ContextTrieNode::nodeHash(StringRef ChildName,
const LineLocation &Callsite) {
// We still use child's name for child hash, this is
// because for children of root node, we don't have
// different line/discriminator, and we'll rely on name
// to differentiate children.
- uint32_t NameHash = std::hash<std::string>{}(ChildName.str());
- uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator;
+ uint64_t NameHash = std::hash<std::string>{}(ChildName.str());
+ uint64_t LocId =
+ (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator;
return NameHash + (LocId << 5) + LocId;
}
ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end()) {
assert(It->second.getFuncName() == CalleeName &&
@@ -177,13 +205,16 @@ ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
// Profiler tracker than manages profiles and its associated context
SampleContextTracker::SampleContextTracker(
- StringMap<FunctionSamples> &Profiles) {
+ SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap)
+ : GUIDToFuncNameMap(GUIDToFuncNameMap) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
- SampleContext Context(FuncSample.first(), RawContext);
- LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
+ SampleContext Context = FuncSample.first;
+ LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
+ << "\n");
if (!Context.isBaseContext())
- FuncToCtxtProfiles[Context.getNameWithoutContext()].push_back(FSamples);
+ FuncToCtxtProfiles[Context.getName()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
@@ -200,6 +231,10 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
return nullptr;
CalleeName = FunctionSamples::getCanonicalFnName(CalleeName);
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID);
// For indirect call, CalleeName will be empty, in which case the context
// profile for callee with largest total samples will be returned.
@@ -207,7 +242,8 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
LLVM_DEBUG(if (FSamples) {
- dbgs() << " Callee context found: " << FSamples->getContext() << "\n";
+ dbgs() << " Callee context found: " << FSamples->getContext().toString()
+ << "\n";
});
return FSamples;
}
@@ -285,6 +321,11 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
bool MergeContext) {
LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID);
+
// Base profile is top-level node (child of root node), so try to retrieve
// existing top-level node for given function first. If it exists, it could be
// that we've merged base profile before, or there's actually context-less
@@ -299,14 +340,14 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
// into base profile.
for (auto *CSamples : FuncToCtxtProfiles[Name]) {
SampleContext &Context = CSamples->getContext();
- ContextTrieNode *FromNode = getContextFor(Context);
- if (FromNode == Node)
- continue;
-
// Skip inlined context profile and also don't re-merge any context
if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
continue;
+ ContextTrieNode *FromNode = getContextFor(Context);
+ if (FromNode == Node)
+ continue;
+
ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode);
assert((!Node || Node == &ToNode) && "Expect only one base profile");
Node = &ToNode;
@@ -324,7 +365,7 @@ void SampleContextTracker::markContextSamplesInlined(
const FunctionSamples *InlinedSamples) {
assert(InlinedSamples && "Expect non-null inlined samples");
LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
- << InlinedSamples->getContext() << "\n");
+ << InlinedSamples->getContext().toString() << "\n");
InlinedSamples->getContext().setState(InlinedContext);
}
@@ -376,30 +417,23 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
assert(FromSamples && "Shouldn't promote a context without profile");
LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
- << FromSamples->getContext() << "\n");
+ << FromSamples->getContext().toString() << "\n");
assert(!FromSamples->getContext().hasState(InlinedContext) &&
"Shouldn't promote inlined context profile");
- StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
+ uint32_t ContextFramesToRemove =
+ FromSamples->getContext().getContextFrames().size() - 1;
return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
- ContextStrToRemove);
+ ContextFramesToRemove);
}
-void SampleContextTracker::dump() {
- dbgs() << "Context Profile Tree:\n";
- std::queue<ContextTrieNode *> NodeQueue;
- NodeQueue.push(&RootContext);
-
- while (!NodeQueue.empty()) {
- ContextTrieNode *Node = NodeQueue.front();
- NodeQueue.pop();
- Node->dump();
+void SampleContextTracker::dump() { RootContext.dumpTree(); }
- for (auto &It : Node->getAllChildContext()) {
- ContextTrieNode *ChildNode = &It.second;
- NodeQueue.push(ChildNode);
- }
- }
+StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
+ if (!FunctionSamples::UseMD5)
+ return Node->getFuncName();
+ assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
+ return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data()));
}
ContextTrieNode *
@@ -444,11 +478,22 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
RootName = PrevDIL->getScope()->getSubprogram()->getName();
S.push_back(std::make_pair(LineLocation(0, 0), RootName));
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::list<std::string> MD5Names;
+ if (FunctionSamples::UseMD5) {
+ for (auto &Location : S) {
+ MD5Names.emplace_back();
+ getRepInFormat(Location.second, FunctionSamples::UseMD5, MD5Names.back());
+ Location.second = MD5Names.back();
+ }
+ }
+
ContextTrieNode *ContextNode = &RootContext;
int I = S.size();
while (--I >= 0 && ContextNode) {
LineLocation &CallSite = S[I].first;
- StringRef &CalleeName = S[I].second;
+ StringRef CalleeName = S[I].second;
ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
}
@@ -462,27 +507,18 @@ ContextTrieNode *
SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
bool AllowCreate) {
ContextTrieNode *ContextNode = &RootContext;
- StringRef ContextRemain = Context;
- StringRef ChildContext;
- StringRef CalleeName;
LineLocation CallSiteLoc(0, 0);
- while (ContextNode && !ContextRemain.empty()) {
- auto ContextSplit = SampleContext::splitContextString(ContextRemain);
- ChildContext = ContextSplit.first;
- ContextRemain = ContextSplit.second;
- LineLocation NextCallSiteLoc(0, 0);
- SampleContext::decodeContextString(ChildContext, CalleeName,
- NextCallSiteLoc);
-
+ for (auto &Callsite : Context.getContextFrames()) {
// Create child node at parent line/disc location
if (AllowCreate) {
ContextNode =
- ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName);
+ ContextNode->getOrCreateChildContext(CallSiteLoc, Callsite.FuncName);
} else {
- ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName);
+ ContextNode =
+ ContextNode->getChildContext(CallSiteLoc, Callsite.FuncName);
}
- CallSiteLoc = NextCallSiteLoc;
+ CallSiteLoc = Callsite.Location;
}
assert((!AllowCreate || ContextNode) &&
@@ -502,7 +538,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
ContextTrieNode &ToNode,
- StringRef ContextStrToRemove) {
+ uint32_t ContextFramesToRemove) {
FunctionSamples *FromSamples = FromNode.getFunctionSamples();
FunctionSamples *ToSamples = ToNode.getFunctionSamples();
if (FromSamples && ToSamples) {
@@ -510,19 +546,21 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
ToSamples->merge(*FromSamples);
ToSamples->getContext().setState(SyntheticContext);
FromSamples->getContext().setState(MergedContext);
+ if (FromSamples->getContext().hasAttribute(ContextShouldBeInlined))
+ ToSamples->getContext().setAttribute(ContextShouldBeInlined);
} else if (FromSamples) {
// Transfer FromSamples from FromNode to ToNode
ToNode.setFunctionSamples(FromSamples);
FromSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FromSamples->getContext().promoteOnPath(ContextFramesToRemove);
FromNode.setFunctionSamples(nullptr);
}
}
ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
- StringRef ContextStrToRemove) {
- assert(!ContextStrToRemove.empty() && "Context to remove can't be empty");
+ uint32_t ContextFramesToRemove) {
+ assert(ContextFramesToRemove && "Context to remove can't be empty");
// Ignore call site location if destination is top level under root
LineLocation NewCallSiteLoc = LineLocation(0, 0);
@@ -540,21 +578,21 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
// Do not delete node to move from its parent here because
// caller is iterating over children of that parent node.
ToNode = &ToNodeParent.moveToChildContext(
- NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false);
+ NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false);
} else {
// Destination node exists, merge samples for the context tree
- mergeContextNode(FromNode, *ToNode, ContextStrToRemove);
+ mergeContextNode(FromNode, *ToNode, ContextFramesToRemove);
LLVM_DEBUG({
if (ToNode->getFunctionSamples())
dbgs() << " Context promoted and merged to: "
- << ToNode->getFunctionSamples()->getContext() << "\n";
+ << ToNode->getFunctionSamples()->getContext().toString() << "\n";
});
// Recursively promote and merge children
for (auto &It : FromNode.getAllChildContext()) {
ContextTrieNode &FromChildNode = It.second;
promoteMergeContextSamplesTree(FromChildNode, *ToNode,
- ContextStrToRemove);
+ ContextFramesToRemove);
}
// Remove children once they're all merged
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 8e9c79fc7bbb..a961c47a7501 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -143,6 +143,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileSampleBlockAccurate(
+ "profile-sample-block-accurate", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "branches and calls as having 0 samples. Otherwise, treat "
+ "them conservatively as unknown. "));
+
static cl::opt<bool> ProfileAccurateForSymsInList(
"profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
cl::init(true),
@@ -214,6 +220,16 @@ static cl::opt<bool> CallsitePrioritizedInline(
cl::desc("Use call site prioritized inlining for sample profile loader."
"Currently only CSSPGO is supported."));
+static cl::opt<bool> UsePreInlinerDecision(
+ "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Use the preinliner decisions stored in profile context."));
+
+static cl::opt<bool> AllowRecursiveInline(
+ "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Allow sample loader inliner to inline recursive calls."));
+
static cl::opt<std::string> ProfileInlineReplayFile(
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
@@ -221,6 +237,50 @@ static cl::opt<std::string> ProfileInlineReplayFile(
"by inlining from sample profile loader."),
cl::Hidden);
+static cl::opt<ReplayInlinerSettings::Scope> ProfileInlineReplayScope(
+ "sample-profile-inline-replay-scope",
+ cl::init(ReplayInlinerSettings::Scope::Function),
+ cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
+ "Replay on functions that have remarks associated "
+ "with them (default)"),
+ clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
+ "Replay on the entire module")),
+ cl::desc("Whether inline replay should be applied to the entire "
+ "Module or just the Functions (default) that are present as "
+ "callers in remarks during sample profile inlining."),
+ cl::Hidden);
+
+static cl::opt<ReplayInlinerSettings::Fallback> ProfileInlineReplayFallback(
+ "sample-profile-inline-replay-fallback",
+ cl::init(ReplayInlinerSettings::Fallback::Original),
+ cl::values(
+ clEnumValN(
+ ReplayInlinerSettings::Fallback::Original, "Original",
+ "All decisions not in replay send to original advisor (default)"),
+ clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
+ "AlwaysInline", "All decisions not in replay are inlined"),
+ clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
+ "All decisions not in replay are not inlined")),
+ cl::desc("How sample profile inline replay treats sites that don't come "
+ "from the replay. Original: defers to original advisor, "
+ "AlwaysInline: inline all sites not in replay, NeverInline: "
+ "inline no sites not in replay"),
+ cl::Hidden);
+
+static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
+ "sample-profile-inline-replay-format",
+ cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
+ cl::values(
+ clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
+ clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
+ "<Line Number>:<Column Number>"),
+ clEnumValN(CallSiteFormat::Format::LineDiscriminator,
+ "LineDiscriminator", "<Line Number>.<Discriminator>"),
+ clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
+ "LineColumnDiscriminator",
+ "<Line Number>:<Column Number>.<Discriminator> (default)")),
+ cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
+
static cl::opt<unsigned>
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
cl::ZeroOrMore,
@@ -358,10 +418,10 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
- : SampleProfileLoaderBaseImpl(std::string(Name)),
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
- RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
+ LTOPhase(LTOPhase) {}
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -377,7 +437,7 @@ protected:
findFunctionSamples(const Instruction &I) const override;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
- void findExternalInlineCandidate(const FunctionSamples *Samples,
+ void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap,
uint64_t Threshold);
@@ -385,8 +445,11 @@ protected:
bool tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
+ bool getExternalInlineAdvisorShouldInline(CallBase &CB);
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
bool
@@ -417,9 +480,6 @@ protected:
/// Profile tracker for different context.
std::unique_ptr<SampleContextTracker> ContextTracker;
- /// Name of the profile remapping file to load.
- std::string RemappingFilename;
-
/// Flag indicating whether input profile is context-sensitive
bool ProfileIsCS = false;
@@ -464,7 +524,7 @@ protected:
bool ProfAccForSymsInList;
// External inline advisor used to replay inline decision from remarks.
- std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
+ std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
// A pseudo probe helper to correlate the imported sample counts.
std::unique_ptr<PseudoProbeManager> ProbeManager;
@@ -953,8 +1013,24 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
}
void SampleProfileLoader::findExternalInlineCandidate(
- const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
+ CallBase *CB, const FunctionSamples *Samples,
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
+
+ // If ExternalInlineAdvisor wants to inline an external function
+ // make sure it's imported
+ if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
+ // Samples may not exist for replayed function, if so
+ // just add the direct GUID and move on
+ if (!Samples) {
+ InlinedGUIDs.insert(
+ FunctionSamples::getGUID(CB->getCalledFunction()->getName()));
+ return;
+ }
+ // Otherwise, drop the threshold to import everything that we can
+ Threshold = 0;
+ }
+
assert(Samples && "expect non-null caller profile");
// For AutoFDO profile, retrieve candidate profiles by walking over
@@ -975,14 +1051,21 @@ void SampleProfileLoader::findExternalInlineCandidate(
// For CSSPGO profile, retrieve candidate profile by walking over the
// trie built for context profile. Note that also take call targets
// even if callee doesn't have a corresponding context profile.
- if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold)
+ if (!CalleeSample)
+ continue;
+
+ // If pre-inliner decision is used, honor that for importing as well.
+ bool PreInline =
+ UsePreInlinerDecision &&
+ CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
+ if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
continue;
StringRef Name = CalleeSample->getFuncName();
Function *Func = SymbolMap.lookup(Name);
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
@@ -992,7 +1075,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
const Function *Callee = SymbolMap.lookup(CalleeName);
if (!Callee || Callee->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
}
// Import hot child context profile associted with callees. Note that this
@@ -1042,16 +1125,20 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
- assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
- "GUIDToFuncNameMap has to be populated");
- AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCS)
- LocalNotInlinedCallSites.try_emplace(CB, FS);
- if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
- Hot = true;
- else if (shouldInlineColdCallee(*CB))
- ColdCandidates.push_back(CB);
+ if (!isa<IntrinsicInst>(I)) {
+ if ((FS = findCalleeFunctionSamples(*CB))) {
+ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
+ "GUIDToFuncNameMap has to be populated");
+ AllCandidates.push_back(CB);
+ if (FS->getEntrySamples() > 0 || ProfileIsCS)
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
+ if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
+ Hot = true;
+ else if (shouldInlineColdCallee(*CB))
+ ColdCandidates.push_back(CB);
+ } else if (getExternalInlineAdvisorShouldInline(*CB)) {
+ AllCandidates.push_back(CB);
+ }
}
}
}
@@ -1078,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1098,8 +1185,8 @@ bool SampleProfileLoader::inlineHotFunctions(
LocalChanged = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
- SymbolMap,
+ findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
+ InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
}
}
@@ -1184,8 +1271,8 @@ bool SampleProfileLoader::tryInlineCandidate(
*CalledFunction);
// The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
+ emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
+ *BB->getParent(), Cost, true, CSINLINE_DEBUG);
// Now populate the list of newly exposed call sites.
if (InlinedCallSites) {
@@ -1228,7 +1315,9 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
// Find the callee's profile. For indirect call, find hottest target profile.
const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
- if (!CalleeSamples)
+ // If ExternalInlineAdvisor wants to inline this site, do so even
+ // if Samples are not present.
+ if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
return false;
float Factor = 1.0;
@@ -1247,19 +1336,34 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
return true;
}
-InlineCost
-SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+Optional<InlineCost>
+SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
std::unique_ptr<InlineAdvice> Advice = nullptr;
if (ExternalInlineAdvisor) {
- Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return InlineCost::getNever("not previously inlined");
+ Advice = ExternalInlineAdvisor->getAdvice(CB);
+ if (Advice) {
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ return InlineCost::getNever("not previously inlined");
+ }
+ Advice->recordInlining();
+ return InlineCost::getAlways("previously inlined");
}
- Advice->recordInlining();
- return InlineCost::getAlways("previously inlined");
}
+ return {};
+}
+
+bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
+ Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
+ return Cost ? !!Cost.getValue() : false;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+ if (Optional<InlineCost> ReplayCost =
+ getExternalInlineAdvisorCost(*Candidate.CallInstr))
+ return ReplayCost.getValue();
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
int SampleThreshold = SampleColdCallSiteThreshold;
@@ -1274,7 +1378,9 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
assert(Callee && "Expect a definition for inline candidate of direct call");
InlineParams Params = getInlineParams();
+ // We will ignore the threshold from inline cost, so always get full cost.
Params.ComputeFullInlineCost = true;
+ Params.AllowRecursiveCall = AllowRecursiveInline;
// Checks if there is anything in the reachable portion of the callee at
// this callsite that makes this inlining potentially illegal. Need to
// set ComputeFullInlineCost, otherwise getInlineCost may return early
@@ -1288,6 +1394,25 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
if (Cost.isNever() || Cost.isAlways())
return Cost;
+ // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
+ // decisions based on hotness as well as accurate function byte sizes for
+ // given context using function/inlinee sizes from previous build. It
+ // stores the decision in profile, and also adjust/merge context profile
+ // aiming at better context-sensitive post-inline profile quality, assuming
+ // all inline decision estimates are going to be honored by compiler. Here
+ // we replay that inline decision under `sample-profile-use-preinliner`.
+ // Note that we don't need to handle negative decision from preinliner as
+ // context profile for not inlined calls are merged by preinliner already.
+ if (UsePreInlinerDecision && Candidate.CalleeSamples) {
+ // Once two node are merged due to promotion, we're losing some context
+ // so the original context-sensitive preinliner decision should be ignored
+ // for SyntheticContext.
+ SampleContext &Context = Candidate.CalleeSamples->getContext();
+ if (!Context.hasState(SyntheticContext) &&
+ Context.hasAttribute(ContextShouldBeInlined))
+ return InlineCost::getAlways("preinliner");
+ }
+
// For old FDO inliner, we inline the call site as long as cost is not
// "Never". The cost-benefit check is done earlier.
if (!CallsitePrioritizedInline) {
@@ -1357,7 +1482,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1405,8 +1530,9 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Changed = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
- SymbolMap, PSI->getOrCompHotCountThreshold());
+ findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
+ InlinedGUIDs, SymbolMap,
+ PSI->getOrCompHotCountThreshold());
}
}
@@ -1494,7 +1620,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
{static_cast<uint32_t>(BlockWeights[BB])}));
}
}
- } else if (OverwriteExistingWeights) {
+ } else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) {
// Set profile metadata (possibly annotated by LTO prelink) to zero or
// clear it for cold code.
for (auto &I : BB->getInstList()) {
@@ -1792,11 +1918,13 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
if (FAM && !ProfileInlineReplayFile.empty()) {
- ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
- M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
+ ExternalInlineAdvisor = getReplayInlineAdvisor(
+ M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
+ ReplayInlinerSettings{ProfileInlineReplayFile,
+ ProfileInlineReplayScope,
+ ProfileInlineReplayFallback,
+ {ProfileInlineReplayFormat}},
/*EmitRemarks=*/false);
- if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
- ExternalInlineAdvisor.reset();
}
// Apply tweaks if context-sensitive profile is available.
@@ -1810,13 +1938,21 @@ bool SampleProfileLoader::doInitialization(Module &M,
if (!CallsitePrioritizedInline.getNumOccurrences())
CallsitePrioritizedInline = true;
+ // For CSSPGO, use preinliner decision by default when available.
+ if (!UsePreInlinerDecision.getNumOccurrences())
+ UsePreInlinerDecision = true;
+
+ // For CSSPGO, we also allow recursive inline to best use context profile.
+ if (!AllowRecursiveInline.getNumOccurrences())
+ AllowRecursiveInline = true;
+
// Enable iterative-BFI by default for CSSPGO.
if (!UseIterativeBFIInference.getNumOccurrences())
UseIterativeBFIInference = true;
// Tracker for profiles under different context
- ContextTracker =
- std::make_unique<SampleContextTracker>(Reader->getProfiles());
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
}
// Load pseudo probe descriptors for probe-based function samples.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 08d316337ef5..21395460bccb 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -415,9 +415,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
FunctionAnalysisManager &FAM) {
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto BBProfileCount = [&BFI](BasicBlock *BB) {
- return BFI.getBlockProfileCount(BB)
- ? BFI.getBlockProfileCount(BB).getValue()
- : 0;
+ return BFI.getBlockProfileCount(BB).getValueOr(0);
};
// Collect the sum of execution weight for each probe.
diff --git a/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 655a7a404951..0f2412dce1c9 100644
--- a/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -30,23 +30,20 @@ static bool stripDeadPrototypes(Module &M) {
bool MadeChange = false;
// Erase dead function prototypes.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function *F = &*I++;
+ for (Function &F : llvm::make_early_inc_range(M)) {
// Function must be a prototype and unused.
- if (F->isDeclaration() && F->use_empty()) {
- F->eraseFromParent();
+ if (F.isDeclaration() && F.use_empty()) {
+ F.eraseFromParent();
++NumDeadPrototypes;
MadeChange = true;
}
}
// Erase dead global var prototypes.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ) {
- GlobalVariable *GV = &*I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global must be a prototype and unused.
- if (GV->isDeclaration() && GV->use_empty())
- GV->eraseFromParent();
+ if (GV.isDeclaration() && GV.use_empty())
+ GV.eraseFromParent();
}
// Return an indication of whether we changed anything or not.
diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 168740a1158e..9d4e9464f361 100644
--- a/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -214,13 +214,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
for (GlobalVariable &GV : M.globals()) {
- if (GV.hasLocalLinkage() && llvmUsedValues.count(&GV) == 0)
+ if (GV.hasLocalLinkage() && !llvmUsedValues.contains(&GV))
if (!PreserveDbgInfo || !GV.getName().startswith("llvm.dbg"))
GV.setName(""); // Internal symbols can't participate in linkage
}
for (Function &I : M) {
- if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
+ if (I.hasLocalLinkage() && !llvmUsedValues.contains(&I))
if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
I.setName(""); // Internal symbols can't participate in linkage
if (auto *Symtab = I.getValueSymbolTable())
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 37329b489555..0cc1b37844f6 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -33,6 +33,19 @@ using namespace llvm;
namespace {
+// Determine if a promotion alias should be created for a symbol name.
+static bool allowPromotionAlias(const std::string &Name) {
+ // Promotion aliases are used only in inline assembly. It's safe to
+ // simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar()
+ // and MCAsmInfoXCOFF::isAcceptableChar().
+ for (const char &C : Name) {
+ if (isAlnum(C) || C == '_' || C == '.')
+ continue;
+ return false;
+ }
+ return true;
+}
+
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
@@ -55,6 +68,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
}
}
+ std::string OldName = Name.str();
std::string NewName = (Name + ModuleId).str();
if (const auto *C = ExportGV.getComdat())
@@ -69,6 +83,13 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
ImportGV->setName(NewName);
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
}
+
+ if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {
+ // Create a local alias with the original name to avoid breaking
+ // references from inline assembly.
+ std::string Alias = ".set " + OldName + "," + NewName + "\n";
+ ExportM.appendModuleInlineAsm(Alias);
+ }
}
if (!RenamedComdats.empty())
@@ -143,8 +164,7 @@ void simplifyExternals(Module &M) {
FunctionType *EmptyFT =
FunctionType::get(Type::getVoidTy(M.getContext()), false);
- for (auto I = M.begin(), E = M.end(); I != E;) {
- Function &F = *I++;
+ for (Function &F : llvm::make_early_inc_range(M)) {
if (F.isDeclaration() && F.use_empty()) {
F.eraseFromParent();
continue;
@@ -160,16 +180,15 @@ void simplifyExternals(Module &M) {
F.getAddressSpace(), "", &M);
NewF->copyAttributesFrom(&F);
// Only copy function attribtues.
- NewF->setAttributes(
- AttributeList::get(M.getContext(), AttributeList::FunctionIndex,
- F.getAttributes().getFnAttributes()));
+ NewF->setAttributes(AttributeList::get(M.getContext(),
+ AttributeList::FunctionIndex,
+ F.getAttributes().getFnAttrs()));
NewF->takeName(&F);
F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
F.eraseFromParent();
}
- for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
- GlobalVariable &GV = *I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (GV.isDeclaration() && GV.use_empty()) {
GV.eraseFromParent();
continue;
@@ -304,7 +323,8 @@ void splitAndWriteThinLTOBitcode(
return true;
if (auto *F = dyn_cast<Function>(GV))
return EligibleVirtualFns.count(F);
- if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (auto *GVar =
+ dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))
return HasTypeMetadata(GVar);
return false;
}));
@@ -333,7 +353,7 @@ void splitAndWriteThinLTOBitcode(
// Remove all globals with type metadata, globals with comdats that live in
// MergedM, and aliases pointing to such globals from the thin LTO module.
filterModule(&M, [&](const GlobalValue *GV) {
- if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))
if (HasTypeMetadata(GVar))
return false;
if (const auto *C = GV->getComdat())
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 7a8946110785..61054e7ae46f 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1288,7 +1288,7 @@ void DevirtModule::tryICallBranchFunnel(
M.getDataLayout().getProgramAddressSpace(),
"branch_funnel", &M);
}
- JT->addAttribute(1, Attribute::Nest);
+ JT->addParamAttr(0, Attribute::Nest);
std::vector<Value *> JTArgs;
JTArgs.push_back(JT->arg_begin());
@@ -1361,10 +1361,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
M.getContext(), ArrayRef<Attribute>{Attribute::get(
M.getContext(), Attribute::Nest)}));
for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
- NewArgAttrs.push_back(Attrs.getParamAttributes(I));
+ NewArgAttrs.push_back(Attrs.getParamAttrs(I));
NewCS->setAttributes(
- AttributeList::get(M.getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), NewArgAttrs));
+ AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), NewArgAttrs));
CB.replaceAllUsesWith(NewCS);
CB.eraseFromParent();
@@ -1786,10 +1786,8 @@ void DevirtModule::scanTypeTestUsers(
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
- for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
- I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
@@ -1858,11 +1856,8 @@ void DevirtModule::scanTypeTestUsers(
void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
- for (auto I = TypeCheckedLoadFunc->use_begin(),
- E = TypeCheckedLoadFunc->use_end();
- I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d01a021bf3f4..eb1b8a29cfc5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -939,7 +939,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// add (xor X, LowMaskC), C --> sub (LowMaskC + C), X
if (C2->isMask()) {
KnownBits LHSKnown = computeKnownBits(X, 0, &Add);
- if ((*C2 | LHSKnown.Zero).isAllOnesValue())
+ if ((*C2 | LHSKnown.Zero).isAllOnes())
return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X);
}
@@ -963,7 +963,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
}
}
- if (C->isOneValue() && Op0->hasOneUse()) {
+ if (C->isOne() && Op0->hasOneUse()) {
// add (sext i1 X), 1 --> zext (not X)
// TODO: The smallest IR representation is (select X, 0, 1), and that would
// not require the one-use check. But we need to remove a transform in
@@ -1355,6 +1355,17 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (match(RHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(LHS)))))
return BinaryOperator::CreateAdd(A, Builder.CreateShl(LHS, 1, "reass.add"));
+ {
+ // (A + C1) + (C2 - B) --> (A - B) + (C1 + C2)
+ Constant *C1, *C2;
+ if (match(&I, m_c_Add(m_Add(m_Value(A), m_ImmConstant(C1)),
+ m_Sub(m_ImmConstant(C2), m_Value(B)))) &&
+ (LHS->hasOneUse() || RHS->hasOneUse())) {
+ Value *Sub = Builder.CreateSub(A, B);
+ return BinaryOperator::CreateAdd(Sub, ConstantExpr::getAdd(C1, C2));
+ }
+ }
+
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V);
@@ -1817,12 +1828,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (match(Op0, m_AllOnes()))
return BinaryOperator::CreateNot(Op1);
- // (~X) - (~Y) --> Y - X
- Value *X, *Y;
- if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))
- return BinaryOperator::CreateSub(Y, X);
-
// (X + -1) - Y --> ~Y + X
+ Value *X, *Y;
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);
@@ -1843,6 +1850,17 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateSub(X, Add);
}
+ // (~X) - (~Y) --> Y - X
+ // This is placed after the other reassociations and explicitly excludes a
+ // sub-of-sub pattern to avoid infinite looping.
+ if (isFreeToInvert(Op0, Op0->hasOneUse()) &&
+ isFreeToInvert(Op1, Op1->hasOneUse()) &&
+ !match(Op0, m_Sub(m_ImmConstant(), m_Value()))) {
+ Value *NotOp0 = Builder.CreateNot(Op0);
+ Value *NotOp1 = Builder.CreateNot(Op1);
+ return BinaryOperator::CreateSub(NotOp1, NotOp0);
+ }
+
auto m_AddRdx = [](Value *&Vec) {
return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
};
@@ -1892,7 +1910,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
- if ((*Op0C | RHSKnown.Zero).isAllOnesValue())
+ if ((*Op0C | RHSKnown.Zero).isAllOnes())
return BinaryOperator::CreateXor(Op1, Op0);
}
@@ -2039,12 +2057,31 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(
Op0, Builder.CreateNot(Y, Y->getName() + ".not"));
+ // ~X - Min/Max(~X, Y) -> ~Min/Max(X, ~Y) - X
+ // ~X - Min/Max(Y, ~X) -> ~Min/Max(X, ~Y) - X
+ // Min/Max(~X, Y) - ~X -> X - ~Min/Max(X, ~Y)
+ // Min/Max(Y, ~X) - ~X -> X - ~Min/Max(X, ~Y)
+ // As long as Y is freely invertible, this will be neutral or a win.
+ // Note: We don't generate the inverse max/min, just create the 'not' of
+ // it and let other folds do the rest.
+ if (match(Op0, m_Not(m_Value(X))) &&
+ match(Op1, m_c_MaxOrMin(m_Specific(Op0), m_Value(Y))) &&
+ !Op0->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *Not = Builder.CreateNot(Op1);
+ return BinaryOperator::CreateSub(Not, X);
+ }
+ if (match(Op1, m_Not(m_Value(X))) &&
+ match(Op0, m_c_MaxOrMin(m_Specific(Op1), m_Value(Y))) &&
+ !Op1->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *Not = Builder.CreateNot(Op0);
+ return BinaryOperator::CreateSub(X, Not);
+ }
+
+ // TODO: This is the same logic as above but handles the cmp-select idioms
+ // for min/max, so the use checks are increased to account for the
+ // extra instructions. If we canonicalize to intrinsics, this block
+ // can likely be removed.
{
- // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
- // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
- // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
- // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
- // So long as O here is freely invertible, this will be neutral or a win.
Value *LHS, *RHS, *A;
Value *NotA = Op0, *MinMax = Op1;
SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
@@ -2057,12 +2094,10 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) {
if (NotA == LHS)
std::swap(LHS, RHS);
- // LHS is now O above and expected to have at least 2 uses (the min/max)
- // NotA is epected to have 2 uses from the min/max and 1 from the sub.
+ // LHS is now Y above and expected to have at least 2 uses (the min/max)
+ // NotA is expected to have 2 uses from the min/max and 1 from the sub.
if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
!NotA->hasNUsesOrMore(4)) {
- // Note: We don't generate the inverse max/min, just create the not of
- // it and let other folds do the rest.
Value *Not = Builder.CreateNot(MinMax);
if (NotA == Op0)
return BinaryOperator::CreateSub(Not, A);
@@ -2119,7 +2154,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
unsigned BitWidth = Ty->getScalarSizeInBits();
unsigned Cttz = AddC->countTrailingZeros();
APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz));
- if ((HighMask & *AndC).isNullValue())
+ if ((HighMask & *AndC).isZero())
return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC)));
}
@@ -2133,6 +2168,19 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(
I, Builder.CreateIntrinsic(Intrinsic::umin, {I.getType()}, {Op0, Y}));
+ // umax(X, Op1) - Op1 --> usub.sat(X, Op1)
+ // TODO: The one-use restriction is not strictly necessary, but it may
+ // require improving other pattern matching and/or codegen.
+ if (match(Op0, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op1)))))
+ return replaceInstUsesWith(
+ I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
+
+ // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)
+ if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0});
+ return BinaryOperator::CreateNeg(USub);
+ }
+
// C - ctpop(X) => ctpop(~X) if C is bitwidth
if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) &&
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))))
@@ -2173,8 +2221,8 @@ static Instruction *foldFNegIntoConstant(Instruction &I) {
// TODO: We could propagate nsz/ninf from fdiv alone?
FastMathFlags FMF = I.getFastMathFlags();
FastMathFlags OpFMF = FNegOp->getFastMathFlags();
- FDiv->setHasNoSignedZeros(FMF.noSignedZeros() & OpFMF.noSignedZeros());
- FDiv->setHasNoInfs(FMF.noInfs() & OpFMF.noInfs());
+ FDiv->setHasNoSignedZeros(FMF.noSignedZeros() && OpFMF.noSignedZeros());
+ FDiv->setHasNoInfs(FMF.noInfs() && OpFMF.noInfs());
return FDiv;
}
// With NSZ [ counter-example with -0.0: -(-0.0 + 0.0) != 0.0 + -0.0 ]:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 120852c44474..06c9bf650f37 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -185,14 +185,15 @@ enum MaskedICmpType {
/// satisfies.
static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
ICmpInst::Predicate Pred) {
- ConstantInt *ACst = dyn_cast<ConstantInt>(A);
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ const APInt *ConstA = nullptr, *ConstB = nullptr, *ConstC = nullptr;
+ match(A, m_APInt(ConstA));
+ match(B, m_APInt(ConstB));
+ match(C, m_APInt(ConstC));
bool IsEq = (Pred == ICmpInst::ICMP_EQ);
- bool IsAPow2 = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2());
- bool IsBPow2 = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2());
+ bool IsAPow2 = ConstA && ConstA->isPowerOf2();
+ bool IsBPow2 = ConstB && ConstB->isPowerOf2();
unsigned MaskVal = 0;
- if (CCst && CCst->isZero()) {
+ if (ConstC && ConstC->isZero()) {
// if C is zero, then both A and B qualify as mask
MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed)
: (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed));
@@ -211,7 +212,7 @@ static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
if (IsAPow2)
MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed)
: (Mask_AllZeros | AMask_Mixed));
- } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ } else if (ConstA && ConstC && ConstC->isSubsetOf(*ConstA)) {
MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed);
}
@@ -221,7 +222,7 @@ static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
if (IsBPow2)
MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed)
: (Mask_AllZeros | BMask_Mixed));
- } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ } else if (ConstB && ConstC && ConstC->isSubsetOf(*ConstB)) {
MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed);
}
@@ -269,9 +270,9 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
ICmpInst *RHS,
ICmpInst::Predicate &PredL,
ICmpInst::Predicate &PredR) {
- // vectors are not (yet?) supported. Don't support pointers either.
- if (!LHS->getOperand(0)->getType()->isIntegerTy() ||
- !RHS->getOperand(0)->getType()->isIntegerTy())
+ // Don't allow pointers. Splat vectors are fine.
+ if (!LHS->getOperand(0)->getType()->isIntOrIntVectorTy() ||
+ !RHS->getOperand(0)->getType()->isIntOrIntVectorTy())
return None;
// Here comes the tricky part:
@@ -367,9 +368,9 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
} else {
return None;
}
+
+ assert(Ok && "Failed to find AND on the right side of the RHS icmp.");
}
- if (!Ok)
- return None;
if (L11 == A) {
B = L12;
@@ -619,8 +620,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// Remaining cases assume at least that B and D are constant, and depend on
// their actual values. This isn't strictly necessary, just a "handle the
// easy cases for now" decision.
- ConstantInt *BCst, *DCst;
- if (!match(B, m_ConstantInt(BCst)) || !match(D, m_ConstantInt(DCst)))
+ const APInt *ConstB, *ConstD;
+ if (!match(B, m_APInt(ConstB)) || !match(D, m_APInt(ConstD)))
return nullptr;
if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
@@ -629,11 +630,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
// Only valid if one of the masks is a superset of the other (check "B&D" is
// the same as either B or D).
- APInt NewMask = BCst->getValue() & DCst->getValue();
-
- if (NewMask == BCst->getValue())
+ APInt NewMask = *ConstB & *ConstD;
+ if (NewMask == *ConstB)
return LHS;
- else if (NewMask == DCst->getValue())
+ else if (NewMask == *ConstD)
return RHS;
}
@@ -642,11 +642,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
// Only valid if one of the masks is a superset of the other (check "B|D" is
// the same as either B or D).
- APInt NewMask = BCst->getValue() | DCst->getValue();
-
- if (NewMask == BCst->getValue())
+ APInt NewMask = *ConstB | *ConstD;
+ if (NewMask == *ConstB)
return LHS;
- else if (NewMask == DCst->getValue())
+ else if (NewMask == *ConstD)
return RHS;
}
@@ -661,23 +660,21 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// We can't simply use C and E because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set.
- ConstantInt *CCst, *ECst;
- if (!match(C, m_ConstantInt(CCst)) || !match(E, m_ConstantInt(ECst)))
+ const APInt *OldConstC, *OldConstE;
+ if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE)))
return nullptr;
- if (PredL != NewCC)
- CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
- if (PredR != NewCC)
- ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+
+ const APInt ConstC = PredL != NewCC ? *ConstB ^ *OldConstC : *OldConstC;
+ const APInt ConstE = PredR != NewCC ? *ConstD ^ *OldConstE : *OldConstE;
// If there is a conflict, we should actually return a false for the
// whole construct.
- if (((BCst->getValue() & DCst->getValue()) &
- (CCst->getValue() ^ ECst->getValue())).getBoolValue())
+ if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())
return ConstantInt::get(LHS->getType(), !IsAnd);
Value *NewOr1 = Builder.CreateOr(B, D);
- Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
Value *NewAnd = Builder.CreateAnd(A, NewOr1);
+ Constant *NewOr2 = ConstantInt::get(A->getType(), ConstC | ConstE);
return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
}
@@ -777,20 +774,6 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
}
- // Special case: get the ordering right when the values wrap around zero.
- // Ie, we assumed the constants were unsigned when swapping earlier.
- if (C1->isNullValue() && C2->isAllOnesValue())
- std::swap(C1, C2);
-
- if (*C1 == *C2 - 1) {
- // (X == 13 || X == 14) --> X - 13 <=u 1
- // (X != 13 && X != 14) --> X - 13 >u 1
- // An 'add' is the canonical IR form, so favor that over a 'sub'.
- Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
- auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
- return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
- }
-
return nullptr;
}
@@ -923,7 +906,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))
return nullptr;
- assert(!UnsetBitsMask.isNullValue() && "empty mask makes no sense.");
+ assert(!UnsetBitsMask.isZero() && "empty mask makes no sense.");
// Are they working on the same value?
Value *X;
@@ -1113,8 +1096,8 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) {
/// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
/// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
/// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
-static Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
- InstCombiner::BuilderTy &Builder) {
+Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool IsAnd) {
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
return nullptr;
@@ -1202,6 +1185,51 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder.CreateBinOp(Logic.getOpcode(), Cmp0, SubstituteCmp);
}
+/// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2)
+/// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2)
+/// into a single comparison using range-based reasoning.
+static Value *foldAndOrOfICmpsUsingRanges(
+ ICmpInst::Predicate Pred1, Value *V1, const APInt &C1,
+ ICmpInst::Predicate Pred2, Value *V2, const APInt &C2,
+ IRBuilderBase &Builder, bool IsAnd) {
+ // Look through add of a constant offset on V1, V2, or both operands. This
+ // allows us to interpret the V + C' < C'' range idiom into a proper range.
+ const APInt *Offset1 = nullptr, *Offset2 = nullptr;
+ if (V1 != V2) {
+ Value *X;
+ if (match(V1, m_Add(m_Value(X), m_APInt(Offset1))))
+ V1 = X;
+ if (match(V2, m_Add(m_Value(X), m_APInt(Offset2))))
+ V2 = X;
+ }
+
+ if (V1 != V2)
+ return nullptr;
+
+ ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, C1);
+ if (Offset1)
+ CR1 = CR1.subtract(*Offset1);
+
+ ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred2, C2);
+ if (Offset2)
+ CR2 = CR2.subtract(*Offset2);
+
+ Optional<ConstantRange> CR =
+ IsAnd ? CR1.exactIntersectWith(CR2) : CR1.exactUnionWith(CR2);
+ if (!CR)
+ return nullptr;
+
+ CmpInst::Predicate NewPred;
+ APInt NewC, Offset;
+ CR->getEquivalentICmp(NewPred, NewC, Offset);
+
+ Type *Ty = V1->getType();
+ Value *NewV = V1;
+ if (Offset != 0)
+ NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));
+ return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
BinaryOperator &And) {
@@ -1262,170 +1290,64 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
return X;
- if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true, Builder))
+ if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true))
return X;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
- ConstantInt *LHSC, *RHSC;
- if (!match(LHS->getOperand(1), m_ConstantInt(LHSC)) ||
- !match(RHS->getOperand(1), m_ConstantInt(RHSC)))
- return nullptr;
-
- if (LHSC == RHSC && PredL == PredR) {
- // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
- // where C is a power of 2 or
- // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
- if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) ||
- (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) {
- Value *NewOr = Builder.CreateOr(LHS0, RHS0);
- return Builder.CreateICmp(PredL, NewOr, LHSC);
- }
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+ // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
+ if (PredL == ICmpInst::ICMP_EQ && match(LHS->getOperand(1), m_ZeroInt()) &&
+ PredR == ICmpInst::ICMP_EQ && match(RHS->getOperand(1), m_ZeroInt()) &&
+ LHS0->getType() == RHS0->getType()) {
+ Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+ return Builder.CreateICmp(PredL, NewOr,
+ Constant::getNullValue(NewOr->getType()));
}
+ const APInt *LHSC, *RHSC;
+ if (!match(LHS->getOperand(1), m_APInt(LHSC)) ||
+ !match(RHS->getOperand(1), m_APInt(RHSC)))
+ return nullptr;
+
// (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
// where CMAX is the all ones value for the truncated type,
// iff the lower bits of C2 and CA are zero.
if (PredL == ICmpInst::ICMP_EQ && PredL == PredR && LHS->hasOneUse() &&
RHS->hasOneUse()) {
Value *V;
- ConstantInt *AndC, *SmallC = nullptr, *BigC = nullptr;
+ const APInt *AndC, *SmallC = nullptr, *BigC = nullptr;
// (trunc x) == C1 & (and x, CA) == C2
// (and x, CA) == C2 & (trunc x) == C1
if (match(RHS0, m_Trunc(m_Value(V))) &&
- match(LHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
+ match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
SmallC = RHSC;
BigC = LHSC;
} else if (match(LHS0, m_Trunc(m_Value(V))) &&
- match(RHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
+ match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
SmallC = LHSC;
BigC = RHSC;
}
if (SmallC && BigC) {
- unsigned BigBitSize = BigC->getType()->getBitWidth();
- unsigned SmallBitSize = SmallC->getType()->getBitWidth();
+ unsigned BigBitSize = BigC->getBitWidth();
+ unsigned SmallBitSize = SmallC->getBitWidth();
// Check that the low bits are zero.
APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
- if ((Low & AndC->getValue()).isNullValue() &&
- (Low & BigC->getValue()).isNullValue()) {
- Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue());
- APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
- Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
+ if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) {
+ Value *NewAnd = Builder.CreateAnd(V, Low | *AndC);
+ APInt N = SmallC->zext(BigBitSize) | *BigC;
+ Value *NewVal = ConstantInt::get(NewAnd->getType(), N);
return Builder.CreateICmp(PredL, NewAnd, NewVal);
}
}
}
- // From here on, we only handle:
- // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
- if (LHS0 != RHS0)
- return nullptr;
-
- // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
- if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
- PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
- PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
- PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
- return nullptr;
-
- // We can't fold (ugt x, C) & (sgt x, C2).
- if (!predicatesFoldable(PredL, PredR))
- return nullptr;
-
- // Ensure that the larger constant is on the RHS.
- bool ShouldSwap;
- if (CmpInst::isSigned(PredL) ||
- (ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
- ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
- else
- ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
-
- if (ShouldSwap) {
- std::swap(LHS, RHS);
- std::swap(LHSC, RHSC);
- std::swap(PredL, PredR);
- }
-
- // At this point, we know we have two icmp instructions
- // comparing a value against two constants and and'ing the result
- // together. Because of the above check, we know that we only have
- // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
- // (from the icmp folding check above), that the two constants
- // are not equal and that the larger constant is on the RHS
- assert(LHSC != RHSC && "Compares not folded above?");
-
- switch (PredL) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_ULT:
- // (X != 13 & X u< 14) -> X < 13
- if (LHSC->getValue() == (RHSC->getValue() - 1))
- return Builder.CreateICmpULT(LHS0, LHSC);
- if (LHSC->isZero()) // (X != 0 & X u< C) -> X-1 u< C-1
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- false, true);
- break; // (X != 13 & X u< 15) -> no change
- case ICmpInst::ICMP_SLT:
- // (X != 13 & X s< 14) -> X < 13
- if (LHSC->getValue() == (RHSC->getValue() - 1))
- return Builder.CreateICmpSLT(LHS0, LHSC);
- // (X != INT_MIN & X s< C) -> X-(INT_MIN+1) u< (C-(INT_MIN+1))
- if (LHSC->isMinValue(true))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- true, true);
- break; // (X != 13 & X s< 15) -> no change
- case ICmpInst::ICMP_NE:
- // Potential folds for this case should already be handled.
- break;
- }
- break;
- case ICmpInst::ICMP_UGT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE:
- // (X u> 13 & X != 14) -> X u> 14
- if (RHSC->getValue() == (LHSC->getValue() + 1))
- return Builder.CreateICmp(PredL, LHS0, RHSC);
- // X u> C & X != UINT_MAX -> (X-(C+1)) u< UINT_MAX-(C+1)
- if (RHSC->isMaxValue(false))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- false, true);
- break; // (X u> 13 & X != 15) -> no change
- case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) u< 1
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- false, true);
- }
- break;
- case ICmpInst::ICMP_SGT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE:
- // (X s> 13 & X != 14) -> X s> 14
- if (RHSC->getValue() == (LHSC->getValue() + 1))
- return Builder.CreateICmp(PredL, LHS0, RHSC);
- // X s> C & X != INT_MAX -> (X-(C+1)) u< INT_MAX-(C+1)
- if (RHSC->isMaxValue(true))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- true, true);
- break; // (X s> 13 & X != 15) -> no change
- case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) u< 1
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
- true);
- }
- break;
- }
-
- return nullptr;
+ return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC,
+ Builder, /* IsAnd */ true);
}
Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
@@ -1496,15 +1418,15 @@ static Instruction *reassociateFCmps(BinaryOperator &BO,
std::swap(Op0, Op1);
// Match inner binop and the predicate for combining 2 NAN checks into 1.
- BinaryOperator *BO1;
+ Value *BO10, *BO11;
FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD
: FCmpInst::FCMP_UNO;
if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||
- !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode)
+ !match(Op1, m_BinOp(Opcode, m_Value(BO10), m_Value(BO11))))
return nullptr;
// The inner logic op must have a matching fcmp operand.
- Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y;
+ Value *Y;
if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
Pred != NanPred || X->getType() != Y->getType())
std::swap(BO10, BO11);
@@ -1524,27 +1446,42 @@ static Instruction *reassociateFCmps(BinaryOperator &BO,
return BinaryOperator::Create(Opcode, NewFCmp, BO11);
}
-/// Match De Morgan's Laws:
+/// Match variations of De Morgan's Laws:
/// (~A & ~B) == (~(A | B))
/// (~A | ~B) == (~(A & B))
static Instruction *matchDeMorgansLaws(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
- auto Opcode = I.getOpcode();
+ const Instruction::BinaryOps Opcode = I.getOpcode();
assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
"Trying to match De Morgan's Laws with something other than and/or");
// Flip the logic operation.
- Opcode = (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
+ const Instruction::BinaryOps FlippedOpcode =
+ (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Value *A, *B;
- if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) &&
- match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) &&
+ if (match(Op0, m_OneUse(m_Not(m_Value(A)))) &&
+ match(Op1, m_OneUse(m_Not(m_Value(B)))) &&
!InstCombiner::isFreeToInvert(A, A->hasOneUse()) &&
!InstCombiner::isFreeToInvert(B, B->hasOneUse())) {
- Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan");
+ Value *AndOr =
+ Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");
return BinaryOperator::CreateNot(AndOr);
}
+ // The 'not' ops may require reassociation.
+ // (A & ~B) & ~C --> A & ~(B | C)
+ // (~B & A) & ~C --> A & ~(B | C)
+ // (A | ~B) | ~C --> A | ~(B & C)
+ // (~B | A) | ~C --> A | ~(B & C)
+ Value *C;
+ if (match(Op0, m_OneUse(m_c_BinOp(Opcode, m_Value(A), m_Not(m_Value(B))))) &&
+ match(Op1, m_Not(m_Value(C)))) {
+ Value *FlippedBO = Builder.CreateBinOp(FlippedOpcode, B, C);
+ return BinaryOperator::Create(Opcode, A, Builder.CreateNot(FlippedBO));
+ }
+
return nullptr;
}
@@ -1778,6 +1715,72 @@ Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) {
return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty);
}
+/// Try folding relatively complex patterns for both And and Or operations
+/// with all And and Or swapped.
+static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ const Instruction::BinaryOps Opcode = I.getOpcode();
+ assert(Opcode == Instruction::And || Opcode == Instruction::Or);
+
+ // Flip the logic operation.
+ const Instruction::BinaryOps FlippedOpcode =
+ (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Value *A, *B, *C;
+
+ // (~(A | B) & C) | ... --> ...
+ // (~(A & B) | C) & ... --> ...
+ // TODO: One use checks are conservative. We just need to check that a total
+ // number of multiple used values does not exceed reduction
+ // in operations.
+ if (match(Op0, m_c_BinOp(FlippedOpcode,
+ m_Not(m_BinOp(Opcode, m_Value(A), m_Value(B))),
+ m_Value(C)))) {
+ // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A
+ // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A)
+ if (match(Op1,
+ m_OneUse(m_c_BinOp(FlippedOpcode,
+ m_OneUse(m_Not(m_c_BinOp(Opcode, m_Specific(A),
+ m_Specific(C)))),
+ m_Specific(B))))) {
+ Value *Xor = Builder.CreateXor(B, C);
+ return (Opcode == Instruction::Or)
+ ? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(A))
+ : BinaryOperator::CreateNot(Builder.CreateAnd(Xor, A));
+ }
+
+ // (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B
+ // (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B)
+ if (match(Op1,
+ m_OneUse(m_c_BinOp(FlippedOpcode,
+ m_OneUse(m_Not(m_c_BinOp(Opcode, m_Specific(B),
+ m_Specific(C)))),
+ m_Specific(A))))) {
+ Value *Xor = Builder.CreateXor(A, C);
+ return (Opcode == Instruction::Or)
+ ? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(B))
+ : BinaryOperator::CreateNot(Builder.CreateAnd(Xor, B));
+ }
+
+ // (~(A | B) & C) | ~(A | C) --> ~((B & C) | A)
+ // (~(A & B) | C) & ~(A & C) --> ~((B | C) & A)
+ if (match(Op1, m_OneUse(m_Not(m_OneUse(
+ m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)))))))
+ return BinaryOperator::CreateNot(Builder.CreateBinOp(
+ Opcode, Builder.CreateBinOp(FlippedOpcode, B, C), A));
+
+ // (~(A | B) & C) | ~(B | C) --> ~((A & C) | B)
+ // (~(A & B) | C) & ~(B & C) --> ~((A | C) & B)
+ if (match(Op1, m_OneUse(m_Not(m_OneUse(
+ m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)))))))
+ return BinaryOperator::CreateNot(Builder.CreateBinOp(
+ Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B));
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -1803,6 +1806,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *Xor = foldAndToXor(I, Builder))
return Xor;
+ if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
+ return X;
+
// (A|B)&(A|C) -> A|(B&C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
@@ -1883,7 +1889,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
// (X + AddC) & LowMaskC --> X & LowMaskC
unsigned Ctlz = C->countLeadingZeros();
APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz));
- if ((*AddC & LowMask).isNullValue())
+ if ((*AddC & LowMask).isZero())
return BinaryOperator::CreateAnd(X, Op1);
// If we are masking the result of the add down to exactly one bit and
@@ -1896,44 +1902,37 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateXor(NewAnd, Op1);
}
}
- }
- ConstantInt *AndRHS;
- if (match(Op1, m_ConstantInt(AndRHS))) {
- const APInt &AndRHSMask = AndRHS->getValue();
-
- // Optimize a variety of ((val OP C1) & C2) combinations...
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
- // of X and OP behaves well when given trunc(C1) and X.
- // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt.
- switch (Op0I->getOpcode()) {
- default:
- break;
+ // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the
+ // bitwidth of X and OP behaves well when given trunc(C1) and X.
+ auto isSuitableBinOpcode = [](BinaryOperator *B) {
+ switch (B->getOpcode()) {
case Instruction::Xor:
case Instruction::Or:
case Instruction::Mul:
case Instruction::Add:
case Instruction::Sub:
- Value *X;
- ConstantInt *C1;
- // TODO: The one use restrictions could be relaxed a little if the AND
- // is going to be removed.
- if (match(Op0I, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))),
- m_ConstantInt(C1))))) {
- if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
- auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
- Value *BinOp;
- Value *Op0LHS = Op0I->getOperand(0);
- if (isa<ZExtInst>(Op0LHS))
- BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1);
- else
- BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X);
- auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
- auto *And = Builder.CreateAnd(BinOp, TruncC2);
- return new ZExtInst(And, Ty);
- }
- }
+ return true;
+ default:
+ return false;
+ }
+ };
+ BinaryOperator *BO;
+ if (match(Op0, m_OneUse(m_BinOp(BO))) && isSuitableBinOpcode(BO)) {
+ Value *X;
+ const APInt *C1;
+ // TODO: The one-use restrictions could be relaxed a little if the AND
+ // is going to be removed.
+ if (match(BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))), m_APInt(C1))) &&
+ C->isIntN(X->getType()->getScalarSizeInBits())) {
+ unsigned XWidth = X->getType()->getScalarSizeInBits();
+ Constant *TruncC1 = ConstantInt::get(X->getType(), C1->trunc(XWidth));
+ Value *BinOp = isa<ZExtInst>(BO->getOperand(0))
+ ? Builder.CreateBinOp(BO->getOpcode(), X, TruncC1)
+ : Builder.CreateBinOp(BO->getOpcode(), TruncC1, X);
+ Constant *TruncC = ConstantInt::get(X->getType(), C->trunc(XWidth));
+ Value *And = Builder.CreateAnd(BinOp, TruncC);
+ return new ZExtInst(And, Ty);
}
}
}
@@ -2071,13 +2070,13 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
A->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));
- // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
- if (match(&I, m_c_And(m_OneUse(m_AShr(
- m_NSWSub(m_Value(Y), m_Value(X)),
- m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
- m_Deferred(X)))) {
- Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
- return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
+ // (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0
+ unsigned FullShift = Ty->getScalarSizeInBits() - 1;
+ if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))),
+ m_Value(Y)))) {
+ Constant *Zero = ConstantInt::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+ return SelectInst::Create(Cmp, Y, Zero);
}
// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
@@ -2284,28 +2283,38 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
/// B, it can be used as the condition operand of a select instruction.
Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) {
- // Step 1: We may have peeked through bitcasts in the caller.
+ // We may have peeked through bitcasts in the caller.
// Exit immediately if we don't have (vector) integer types.
Type *Ty = A->getType();
if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy())
return nullptr;
- // Step 2: We need 0 or all-1's bitmasks.
- if (ComputeNumSignBits(A) != Ty->getScalarSizeInBits())
- return nullptr;
-
- // Step 3: If B is the 'not' value of A, we have our answer.
- if (match(A, m_Not(m_Specific(B)))) {
+ // If A is the 'not' operand of B and has enough signbits, we have our answer.
+ if (match(B, m_Not(m_Specific(A)))) {
// If these are scalars or vectors of i1, A can be used directly.
if (Ty->isIntOrIntVectorTy(1))
return A;
- return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(Ty));
+
+ // If we look through a vector bitcast, the caller will bitcast the operands
+ // to match the condition's number of bits (N x i1).
+ // To make this poison-safe, disallow bitcast from wide element to narrow
+ // element. That could allow poison in lanes where it was not present in the
+ // original code.
+ A = peekThroughBitcast(A);
+ if (A->getType()->isIntOrIntVectorTy()) {
+ unsigned NumSignBits = ComputeNumSignBits(A);
+ if (NumSignBits == A->getType()->getScalarSizeInBits() &&
+ NumSignBits <= Ty->getScalarSizeInBits())
+ return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(A->getType()));
+ }
+ return nullptr;
}
// If both operands are constants, see if the constants are inverse bitmasks.
Constant *AConst, *BConst;
if (match(A, m_Constant(AConst)) && match(B, m_Constant(BConst)))
- if (AConst == ConstantExpr::getNot(BConst))
+ if (AConst == ConstantExpr::getNot(BConst) &&
+ ComputeNumSignBits(A) == Ty->getScalarSizeInBits())
return Builder.CreateZExtOrTrunc(A, CmpInst::makeCmpResultType(Ty));
// Look for more complex patterns. The 'not' op may be hidden behind various
@@ -2349,10 +2358,17 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B,
B = peekThroughBitcast(B, true);
if (Value *Cond = getSelectCondition(A, B)) {
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
+ // If this is a vector, we may need to cast to match the condition's length.
// The bitcasts will either all exist or all not exist. The builder will
// not create unnecessary casts if the types already match.
- Value *BitcastC = Builder.CreateBitCast(C, A->getType());
- Value *BitcastD = Builder.CreateBitCast(D, A->getType());
+ Type *SelTy = A->getType();
+ if (auto *VecTy = dyn_cast<VectorType>(Cond->getType())) {
+ unsigned Elts = VecTy->getElementCount().getKnownMinValue();
+ Type *EltTy = Builder.getIntNTy(SelTy->getPrimitiveSizeInBits() / Elts);
+ SelTy = VectorType::get(EltTy, VecTy->getElementCount());
+ }
+ Value *BitcastC = Builder.CreateBitCast(C, SelTy);
+ Value *BitcastD = Builder.CreateBitCast(D, SelTy);
Value *Select = Builder.CreateSelect(Cond, BitcastC, BitcastD);
return Builder.CreateBitCast(Select, OrigType);
}
@@ -2374,8 +2390,9 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
- auto *LHSC = dyn_cast<ConstantInt>(LHS1);
- auto *RHSC = dyn_cast<ConstantInt>(RHS1);
+ const APInt *LHSC = nullptr, *RHSC = nullptr;
+ match(LHS1, m_APInt(LHSC));
+ match(RHS1, m_APInt(RHSC));
// Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
// --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
@@ -2389,40 +2406,41 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// This implies all values in the two ranges differ by exactly one bit.
if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) &&
PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() &&
- LHSC->getType() == RHSC->getType() &&
- LHSC->getValue() == (RHSC->getValue())) {
+ LHSC->getBitWidth() == RHSC->getBitWidth() && *LHSC == *RHSC) {
Value *AddOpnd;
- ConstantInt *LAddC, *RAddC;
- if (match(LHS0, m_Add(m_Value(AddOpnd), m_ConstantInt(LAddC))) &&
- match(RHS0, m_Add(m_Specific(AddOpnd), m_ConstantInt(RAddC))) &&
- LAddC->getValue().ugt(LHSC->getValue()) &&
- RAddC->getValue().ugt(LHSC->getValue())) {
+ const APInt *LAddC, *RAddC;
+ if (match(LHS0, m_Add(m_Value(AddOpnd), m_APInt(LAddC))) &&
+ match(RHS0, m_Add(m_Specific(AddOpnd), m_APInt(RAddC))) &&
+ LAddC->ugt(*LHSC) && RAddC->ugt(*LHSC)) {
- APInt DiffC = LAddC->getValue() ^ RAddC->getValue();
+ APInt DiffC = *LAddC ^ *RAddC;
if (DiffC.isPowerOf2()) {
- ConstantInt *MaxAddC = nullptr;
- if (LAddC->getValue().ult(RAddC->getValue()))
+ const APInt *MaxAddC = nullptr;
+ if (LAddC->ult(*RAddC))
MaxAddC = RAddC;
else
MaxAddC = LAddC;
- APInt RRangeLow = -RAddC->getValue();
- APInt RRangeHigh = RRangeLow + LHSC->getValue();
- APInt LRangeLow = -LAddC->getValue();
- APInt LRangeHigh = LRangeLow + LHSC->getValue();
+ APInt RRangeLow = -*RAddC;
+ APInt RRangeHigh = RRangeLow + *LHSC;
+ APInt LRangeLow = -*LAddC;
+ APInt LRangeHigh = LRangeLow + *LHSC;
APInt LowRangeDiff = RRangeLow ^ LRangeLow;
APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
: RRangeLow - LRangeLow;
if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
- RangeDiff.ugt(LHSC->getValue())) {
- Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
+ RangeDiff.ugt(*LHSC)) {
+ Type *Ty = AddOpnd->getType();
+ Value *MaskC = ConstantInt::get(Ty, ~DiffC);
Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC);
- Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC);
- return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC);
+ Value *NewAdd = Builder.CreateAdd(NewAnd,
+ ConstantInt::get(Ty, *MaxAddC));
+ return Builder.CreateICmp(LHS->getPredicate(), NewAdd,
+ ConstantInt::get(Ty, *LHSC));
}
}
}
@@ -2496,14 +2514,13 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
return X;
- if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false, Builder))
+ if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false))
return X;
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- // TODO: Remove this when foldLogOpOfMaskedICmps can handle vectors.
- if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_Zero()) &&
- PredR == ICmpInst::ICMP_NE && match(RHS1, m_Zero()) &&
- LHS0->getType()->isIntOrIntVectorTy() &&
+ // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
+ if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_ZeroInt()) &&
+ PredR == ICmpInst::ICMP_NE && match(RHS1, m_ZeroInt()) &&
LHS0->getType() == RHS0->getType()) {
Value *NewOr = Builder.CreateOr(LHS0, RHS0);
return Builder.CreateICmp(PredL, NewOr,
@@ -2514,114 +2531,8 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (!LHSC || !RHSC)
return nullptr;
- // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
- // iff C2 + CA == C1.
- if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) {
- ConstantInt *AddC;
- if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
- if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
- return Builder.CreateICmpULE(LHS0, LHSC);
- }
-
- // From here on, we only handle:
- // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
- if (LHS0 != RHS0)
- return nullptr;
-
- // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
- if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
- PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
- PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
- PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
- return nullptr;
-
- // We can't fold (ugt x, C) | (sgt x, C2).
- if (!predicatesFoldable(PredL, PredR))
- return nullptr;
-
- // Ensure that the larger constant is on the RHS.
- bool ShouldSwap;
- if (CmpInst::isSigned(PredL) ||
- (ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
- ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
- else
- ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
-
- if (ShouldSwap) {
- std::swap(LHS, RHS);
- std::swap(LHSC, RHSC);
- std::swap(PredL, PredR);
- }
-
- // At this point, we know we have two icmp instructions
- // comparing a value against two constants and or'ing the result
- // together. Because of the above check, we know that we only have
- // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
- // icmp folding check above), that the two constants are not
- // equal.
- assert(LHSC != RHSC && "Compares not folded above?");
-
- switch (PredL) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- // Potential folds for this case should already be handled.
- break;
- case ICmpInst::ICMP_UGT:
- // (X == 0 || X u> C) -> (X-1) u>= C
- if (LHSC->isMinValue(false))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1,
- false, false);
- // (X == 13 | X u> 14) -> no change
- break;
- case ICmpInst::ICMP_SGT:
- // (X == INT_MIN || X s> C) -> (X-(INT_MIN+1)) u>= C-INT_MIN
- if (LHSC->isMinValue(true))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1,
- true, false);
- // (X == 13 | X s> 14) -> no change
- break;
- }
- break;
- case ICmpInst::ICMP_ULT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
- // (X u< C || X == UINT_MAX) => (X-C) u>= UINT_MAX-C
- if (RHSC->isMaxValue(false))
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(),
- false, false);
- break;
- case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
- assert(!RHSC->isMaxValue(false) && "Missed icmp simplification");
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1,
- false, false);
- }
- break;
- case ICmpInst::ICMP_SLT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- // (X s< C || X == INT_MAX) => (X-C) u>= INT_MAX-C
- if (RHSC->isMaxValue(true))
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(),
- true, false);
- // (X s< 13 | X == 14) -> no change
- break;
- case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) u> 2
- assert(!RHSC->isMaxValue(true) && "Missed icmp simplification");
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true,
- false);
- }
- break;
- }
- return nullptr;
+ return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC,
+ Builder, /* IsAnd */ false);
}
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -2647,6 +2558,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *Xor = foldOrToXor(I, Builder))
return Xor;
+ if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
+ return X;
+
// (A&B)|(A&C) -> A&(B|C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
@@ -2684,69 +2598,63 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
Value *X, *Y;
const APInt *CV;
if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
- !CV->isAllOnesValue() && MaskedValueIsZero(Y, *CV, 0, &I)) {
+ !CV->isAllOnes() && MaskedValueIsZero(Y, *CV, 0, &I)) {
// (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
// The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
Value *Or = Builder.CreateOr(X, Y);
return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
}
- // (A & C)|(B & D)
+ // (A & C) | (B & D)
Value *A, *B, *C, *D;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- // (A & C1)|(B & C2)
- ConstantInt *C1, *C2;
- if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) {
- Value *V1 = nullptr, *V2 = nullptr;
- if ((C1->getValue() & C2->getValue()).isNullValue()) {
- // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
- // iff (C1&C2) == 0 and (N&~C1) == 0
- if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
- ((V1 == B &&
- MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N)
- (V2 == B &&
- MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V)
- return BinaryOperator::CreateAnd(A,
- Builder.getInt(C1->getValue()|C2->getValue()));
- // Or commutes, try both ways.
- if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
- ((V1 == A &&
- MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N)
- (V2 == A &&
- MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V)
- return BinaryOperator::CreateAnd(B,
- Builder.getInt(C1->getValue()|C2->getValue()));
-
- // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
- // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
- ConstantInt *C3 = nullptr, *C4 = nullptr;
- if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
- (C3->getValue() & ~C1->getValue()).isNullValue() &&
- match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
- (C4->getValue() & ~C2->getValue()).isNullValue()) {
- V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
- return BinaryOperator::CreateAnd(V2,
- Builder.getInt(C1->getValue()|C2->getValue()));
- }
- }
- if (C1->getValue() == ~C2->getValue()) {
- Value *X;
-
- // ((X|B)&C1)|(B&C2) -> (X&C1) | B iff C1 == ~C2
+ // (A & C0) | (B & C1)
+ const APInt *C0, *C1;
+ if (match(C, m_APInt(C0)) && match(D, m_APInt(C1))) {
+ Value *X;
+ if (*C0 == ~*C1) {
+ // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B
if (match(A, m_c_Or(m_Value(X), m_Specific(B))))
- return BinaryOperator::CreateOr(Builder.CreateAnd(X, C1), B);
- // (A&C2)|((X|A)&C1) -> (X&C2) | A iff C1 == ~C2
+ return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C0), B);
+ // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A
if (match(B, m_c_Or(m_Specific(A), m_Value(X))))
- return BinaryOperator::CreateOr(Builder.CreateAnd(X, C2), A);
+ return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C1), A);
- // ((X^B)&C1)|(B&C2) -> (X&C1) ^ B iff C1 == ~C2
+ // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B
if (match(A, m_c_Xor(m_Value(X), m_Specific(B))))
- return BinaryOperator::CreateXor(Builder.CreateAnd(X, C1), B);
- // (A&C2)|((X^A)&C1) -> (X&C2) ^ A iff C1 == ~C2
+ return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C0), B);
+ // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A
if (match(B, m_c_Xor(m_Specific(A), m_Value(X))))
- return BinaryOperator::CreateXor(Builder.CreateAnd(X, C2), A);
+ return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C1), A);
+ }
+
+ if ((*C0 & *C1).isZero()) {
+ // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1)
+ // iff (C0 & C1) == 0 and (X & ~C0) == 0
+ if (match(A, m_c_Or(m_Value(X), m_Specific(B))) &&
+ MaskedValueIsZero(X, ~*C0, 0, &I)) {
+ Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ return BinaryOperator::CreateAnd(A, C01);
+ }
+ // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1)
+ // iff (C0 & C1) == 0 and (X & ~C1) == 0
+ if (match(B, m_c_Or(m_Value(X), m_Specific(A))) &&
+ MaskedValueIsZero(X, ~*C1, 0, &I)) {
+ Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ return BinaryOperator::CreateAnd(B, C01);
+ }
+ // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1)
+ // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0.
+ const APInt *C2, *C3;
+ if (match(A, m_Or(m_Value(X), m_APInt(C2))) &&
+ match(B, m_Or(m_Specific(X), m_APInt(C3))) &&
+ (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) {
+ Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield");
+ Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ return BinaryOperator::CreateAnd(Or, C01);
+ }
}
}
@@ -2801,6 +2709,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// A | ( A ^ B) -> A | B
// A | (~A ^ B) -> A | ~B
// (A & B) | (A ^ B)
+ // ~A | (A ^ B) -> ~(A & B)
+ // The swap above should always make Op0 the 'not' for the last case.
if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
if (Op0 == A || Op0 == B)
return BinaryOperator::CreateOr(A, B);
@@ -2809,6 +2719,10 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
match(Op0, m_And(m_Specific(B), m_Specific(A))))
return BinaryOperator::CreateOr(A, B);
+ if ((Op0->hasOneUse() || Op1->hasOneUse()) &&
+ (match(Op0, m_Not(m_Specific(A))) || match(Op0, m_Not(m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
+
if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
Value *Not = Builder.CreateNot(B, B->getName() + ".not");
return BinaryOperator::CreateOr(Not, Op0);
@@ -3275,71 +3189,45 @@ bool InstCombinerImpl::sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I) {
return true;
}
-// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
-// here. We should standardize that construct where it is needed or choose some
-// other way to ensure that commutated variants of patterns are not missed.
-Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
- if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
- SQ.getWithInstruction(&I)))
- return replaceInstUsesWith(I, V);
-
- if (SimplifyAssociativeOrCommutative(I))
- return &I;
-
- if (Instruction *X = foldVectorBinop(I))
- return X;
-
- if (Instruction *NewXor = foldXorToXor(I, Builder))
- return NewXor;
-
- // (A&B)^(A&C) -> A&(B^C) etc
- if (Value *V = SimplifyUsingDistributiveLaws(I))
- return replaceInstUsesWith(I, V);
-
- // See if we can simplify any instructions used by the instruction whose sole
- // purpose is to compute bits we don't care about.
- if (SimplifyDemandedInstructionBits(I))
- return &I;
-
- if (Value *V = SimplifyBSwap(I, Builder))
- return replaceInstUsesWith(I, V);
-
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- Type *Ty = I.getType();
-
- // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
- // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
- // calls in there are unnecessary as SimplifyDemandedInstructionBits should
- // have already taken care of those cases.
- Value *M;
- if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
- m_c_And(m_Deferred(M), m_Value()))))
- return BinaryOperator::CreateOr(Op0, Op1);
+Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
+ Value *NotOp;
+ if (!match(&I, m_Not(m_Value(NotOp))))
+ return nullptr;
// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
- Value *X, *Y;
-
// We must eliminate the and/or (one-use) for these transforms to not increase
// the instruction count.
+ //
// ~(~X & Y) --> (X | ~Y)
// ~(Y & ~X) --> (X | ~Y)
- if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
+ //
+ // Note: The logical matches do not check for the commuted patterns because
+ // those are handled via SimplifySelectsFeedingBinaryOp().
+ Type *Ty = I.getType();
+ Value *X, *Y;
+ if (match(NotOp, m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y))))) {
Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
return BinaryOperator::CreateOr(X, NotY);
}
+ if (match(NotOp, m_OneUse(m_LogicalAnd(m_Not(m_Value(X)), m_Value(Y))))) {
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ return SelectInst::Create(X, ConstantInt::getTrue(Ty), NotY);
+ }
+
// ~(~X | Y) --> (X & ~Y)
// ~(Y | ~X) --> (X & ~Y)
- if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
+ if (match(NotOp, m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y))))) {
Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
return BinaryOperator::CreateAnd(X, NotY);
}
-
- if (Instruction *Xor = visitMaskedMerge(I, Builder))
- return Xor;
+ if (match(NotOp, m_OneUse(m_LogicalOr(m_Not(m_Value(X)), m_Value(Y))))) {
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ return SelectInst::Create(X, NotY, ConstantInt::getFalse(Ty));
+ }
// Is this a 'not' (~) fed by a binary operator?
BinaryOperator *NotVal;
- if (match(&I, m_Not(m_BinOp(NotVal)))) {
+ if (match(NotOp, m_BinOp(NotVal))) {
if (NotVal->getOpcode() == Instruction::And ||
NotVal->getOpcode() == Instruction::Or) {
// Apply DeMorgan's Law when inverts are free:
@@ -3411,9 +3299,164 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
NotVal);
}
- // Use DeMorgan and reassociation to eliminate a 'not' op.
+ // not (cmp A, B) = !cmp A, B
+ CmpInst::Predicate Pred;
+ if (match(NotOp, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
+ cast<CmpInst>(NotOp)->setPredicate(CmpInst::getInversePredicate(Pred));
+ return replaceInstUsesWith(I, NotOp);
+ }
+
+ // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
+ // ~min(~X, ~Y) --> max(X, Y)
+ // ~max(~X, Y) --> min(X, ~Y)
+ auto *II = dyn_cast<IntrinsicInst>(NotOp);
+ if (II && II->hasOneUse()) {
+ if (match(NotOp, m_MaxOrMin(m_Value(X), m_Value(Y))) &&
+ isFreeToInvert(X, X->hasOneUse()) &&
+ isFreeToInvert(Y, Y->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+ Value *NotX = Builder.CreateNot(X);
+ Value *NotY = Builder.CreateNot(Y);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, NotX, NotY);
+ return replaceInstUsesWith(I, InvMaxMin);
+ }
+ if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+ Value *NotY = Builder.CreateNot(Y);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotY);
+ return replaceInstUsesWith(I, InvMaxMin);
+ }
+ }
+
+ // TODO: Remove folds if we canonicalize to intrinsics (see above).
+ // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
+ //
+ // %notx = xor i32 %x, -1
+ // %cmp1 = icmp sgt i32 %notx, %y
+ // %smax = select i1 %cmp1, i32 %notx, i32 %y
+ // %res = xor i32 %smax, -1
+ // =>
+ // %noty = xor i32 %y, -1
+ // %cmp2 = icmp slt %x, %noty
+ // %res = select i1 %cmp2, i32 %x, i32 %noty
+ //
+ // Same is applicable for smin/umax/umin.
+ if (NotOp->hasOneUse()) {
+ Value *LHS, *RHS;
+ SelectPatternFlavor SPF = matchSelectPattern(NotOp, LHS, RHS).Flavor;
+ if (SelectPatternResult::isMinOrMax(SPF)) {
+ // It's possible we get here before the not has been simplified, so make
+ // sure the input to the not isn't freely invertible.
+ if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) {
+ Value *NotY = Builder.CreateNot(RHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
+ }
+
+ // It's possible we get here before the not has been simplified, so make
+ // sure the input to the not isn't freely invertible.
+ if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *NotX = Builder.CreateNot(LHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
+ }
+
+ // If both sides are freely invertible, then we can get rid of the xor
+ // completely.
+ if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+ isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
+ Value *NotLHS = Builder.CreateNot(LHS);
+ Value *NotRHS = Builder.CreateNot(RHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
+ NotLHS, NotRHS);
+ }
+ }
+
+ // Pull 'not' into operands of select if both operands are one-use compares
+ // or one is one-use compare and the other one is a constant.
+ // Inverting the predicates eliminates the 'not' operation.
+ // Example:
+ // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
+ // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
+ // not (select ?, (cmp TPred, ?, ?), true -->
+ // select ?, (cmp InvTPred, ?, ?), false
+ if (auto *Sel = dyn_cast<SelectInst>(NotOp)) {
+ Value *TV = Sel->getTrueValue();
+ Value *FV = Sel->getFalseValue();
+ auto *CmpT = dyn_cast<CmpInst>(TV);
+ auto *CmpF = dyn_cast<CmpInst>(FV);
+ bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV);
+ bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV);
+ if (InvertibleT && InvertibleF) {
+ if (CmpT)
+ CmpT->setPredicate(CmpT->getInversePredicate());
+ else
+ Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV)));
+ if (CmpF)
+ CmpF->setPredicate(CmpF->getInversePredicate());
+ else
+ Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV)));
+ return replaceInstUsesWith(I, Sel);
+ }
+ }
+ }
+
+ if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
+ return NewXor;
+
+ return nullptr;
+}
+
+// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
+// here. We should standardize that construct where it is needed or choose some
+// other way to ensure that commutated variants of patterns are not missed.
+Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
+ if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
+ SQ.getWithInstruction(&I)))
+ return replaceInstUsesWith(I, V);
+
+ if (SimplifyAssociativeOrCommutative(I))
+ return &I;
+
+ if (Instruction *X = foldVectorBinop(I))
+ return X;
+
+ if (Instruction *NewXor = foldXorToXor(I, Builder))
+ return NewXor;
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (Value *V = SimplifyBSwap(I, Builder))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *R = foldNot(I))
+ return R;
+
+ // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
+ // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
+ // calls in there are unnecessary as SimplifyDemandedInstructionBits should
+ // have already taken care of those cases.
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Value *M;
+ if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
+ m_c_And(m_Deferred(M), m_Value()))))
+ return BinaryOperator::CreateOr(Op0, Op1);
+
+ if (Instruction *Xor = visitMaskedMerge(I, Builder))
+ return Xor;
+
+ Value *X, *Y;
Constant *C1;
if (match(Op1, m_Constant(C1))) {
+ // Use DeMorgan and reassociation to eliminate a 'not' op.
Constant *C2;
if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {
// (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
@@ -3425,15 +3468,24 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2));
return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1));
}
- }
- // not (cmp A, B) = !cmp A, B
- CmpInst::Predicate Pred;
- if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
- cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
- return replaceInstUsesWith(I, Op0);
+ // Convert xor ([trunc] (ashr X, BW-1)), C =>
+ // select(X >s -1, C, ~C)
+ // The ashr creates "AllZeroOrAllOne's", which then optionally inverses the
+ // constant depending on whether this input is less than 0.
+ const APInt *CA;
+ if (match(Op0, m_OneUse(m_TruncOrSelf(
+ m_AShr(m_Value(X), m_APIntAllowUndef(CA))))) &&
+ *CA == X->getType()->getScalarSizeInBits() - 1 &&
+ !match(C1, m_AllOnes())) {
+ assert(!C1->isZeroValue() && "Unexpected xor with 0");
+ Value *ICmp =
+ Builder.CreateICmpSGT(X, Constant::getAllOnesValue(X->getType()));
+ return SelectInst::Create(ICmp, Op1, Builder.CreateNot(Op1));
+ }
}
+ Type *Ty = I.getType();
{
const APInt *RHSC;
if (match(Op1, m_APInt(RHSC))) {
@@ -3456,13 +3508,13 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
// canonicalize to a 'not' before the shift to help SCEV and codegen:
// (X << C) ^ RHSC --> ~X << C
if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) &&
- *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).shl(*C)) {
+ *RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).shl(*C)) {
Value *NotX = Builder.CreateNot(X);
return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C));
}
// (X >>u C) ^ RHSC --> ~X >>u C
if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) &&
- *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).lshr(*C)) {
+ *RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).lshr(*C)) {
Value *NotX = Builder.CreateNot(X);
return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C));
}
@@ -3572,101 +3624,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;
- // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
- // ~min(~X, ~Y) --> max(X, Y)
- // ~max(~X, Y) --> min(X, ~Y)
- auto *II = dyn_cast<IntrinsicInst>(Op0);
- if (II && match(Op1, m_AllOnes())) {
- if (match(Op0, m_MaxOrMin(m_Not(m_Value(X)), m_Not(m_Value(Y))))) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
- return replaceInstUsesWith(I, InvMaxMin);
- }
- if (match(Op0, m_OneUse(m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y))))) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *NotY = Builder.CreateNot(Y);
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotY);
- return replaceInstUsesWith(I, InvMaxMin);
- }
- }
-
- // TODO: Remove folds if we canonicalize to intrinsics (see above).
- // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
- //
- // %notx = xor i32 %x, -1
- // %cmp1 = icmp sgt i32 %notx, %y
- // %smax = select i1 %cmp1, i32 %notx, i32 %y
- // %res = xor i32 %smax, -1
- // =>
- // %noty = xor i32 %y, -1
- // %cmp2 = icmp slt %x, %noty
- // %res = select i1 %cmp2, i32 %x, i32 %noty
- //
- // Same is applicable for smin/umax/umin.
- if (match(Op1, m_AllOnes()) && Op0->hasOneUse()) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
- if (SelectPatternResult::isMinOrMax(SPF)) {
- // It's possible we get here before the not has been simplified, so make
- // sure the input to the not isn't freely invertible.
- if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) {
- Value *NotY = Builder.CreateNot(RHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
- }
-
- // It's possible we get here before the not has been simplified, so make
- // sure the input to the not isn't freely invertible.
- if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) {
- Value *NotX = Builder.CreateNot(LHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
- }
-
- // If both sides are freely invertible, then we can get rid of the xor
- // completely.
- if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
- isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
- Value *NotLHS = Builder.CreateNot(LHS);
- Value *NotRHS = Builder.CreateNot(RHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
- NotLHS, NotRHS);
- }
- }
-
- // Pull 'not' into operands of select if both operands are one-use compares
- // or one is one-use compare and the other one is a constant.
- // Inverting the predicates eliminates the 'not' operation.
- // Example:
- // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
- // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
- // not (select ?, (cmp TPred, ?, ?), true -->
- // select ?, (cmp InvTPred, ?, ?), false
- if (auto *Sel = dyn_cast<SelectInst>(Op0)) {
- Value *TV = Sel->getTrueValue();
- Value *FV = Sel->getFalseValue();
- auto *CmpT = dyn_cast<CmpInst>(TV);
- auto *CmpF = dyn_cast<CmpInst>(FV);
- bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV);
- bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV);
- if (InvertibleT && InvertibleF) {
- if (CmpT)
- CmpT->setPredicate(CmpT->getInversePredicate());
- else
- Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV)));
- if (CmpF)
- CmpF->setPredicate(CmpF->getInversePredicate());
- else
- Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV)));
- return replaceInstUsesWith(I, Sel);
- }
- }
- }
-
- if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
- return NewXor;
-
if (Instruction *Abs = canonicalizeAbs(I, Builder))
return Abs;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 726bb545be12..bfa7bfa2290a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -67,7 +67,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -79,11 +78,12 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumSimplified, "Number of library calls simplified");
static cl::opt<unsigned> GuardWideningWindow(
@@ -513,7 +513,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
// If the input to cttz/ctlz is known to be non-zero,
// then change the 'ZeroIsUndef' parameter to 'true'
// because we know the zero behavior can't affect the result.
- if (!Known.One.isNullValue() ||
+ if (!Known.One.isZero() ||
isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
&IC.getDominatorTree())) {
if (!match(II.getArgOperand(1), m_One()))
@@ -656,8 +656,8 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
unsigned NumOperands) {
- assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
- assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
+ assert(I.arg_size() >= NumOperands && "Not enough operands");
+ assert(E.arg_size() >= NumOperands && "Not enough operands");
for (unsigned i = 0; i < NumOperands; i++)
if (I.getArgOperand(i) != E.getArgOperand(i))
return false;
@@ -682,11 +682,11 @@ removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
for (; BI != BE; ++BI) {
if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
- if (isa<DbgInfoIntrinsic>(I) ||
+ if (I->isDebugOrPseudoInst() ||
I->getIntrinsicID() == EndI.getIntrinsicID())
continue;
if (IsStart(*I)) {
- if (haveSameOperands(EndI, *I, EndI.getNumArgOperands())) {
+ if (haveSameOperands(EndI, *I, EndI.arg_size())) {
IC.eraseInstFromFunction(*I);
IC.eraseInstFromFunction(EndI);
return true;
@@ -710,7 +710,7 @@ Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
}
static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
- assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap");
+ assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
Call.setArgOperand(0, Arg1);
@@ -754,6 +754,45 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
+/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
+/// can trigger other combines.
+static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
+ MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
+ "Expected a min or max intrinsic");
+
+ // TODO: Match vectors with undef elements, but undef may not propagate.
+ Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+ Value *X;
+ const APInt *C0, *C1;
+ if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
+ !match(Op1, m_APInt(C1)))
+ return nullptr;
+
+ // Check for necessary no-wrap and overflow constraints.
+ bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
+ auto *Add = cast<BinaryOperator>(Op0);
+ if ((IsSigned && !Add->hasNoSignedWrap()) ||
+ (!IsSigned && !Add->hasNoUnsignedWrap()))
+ return nullptr;
+
+ // If the constant difference overflows, then instsimplify should reduce the
+ // min/max to the add or C1.
+ bool Overflow;
+ APInt CDiff =
+ IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
+ assert(!Overflow && "Expected simplify of min/max");
+
+ // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
+ // Note: the "mismatched" no-overflow setting does not propagate.
+ Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
+ Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
+ return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
+ : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
+}
+
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
@@ -795,6 +834,63 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
}
+/// Reduce a sequence of min/max intrinsics with a common operand.
+static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
+ // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
+ auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
+ auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
+ RHS->getIntrinsicID() != MinMaxID ||
+ (!LHS->hasOneUse() && !RHS->hasOneUse()))
+ return nullptr;
+
+ Value *A = LHS->getArgOperand(0);
+ Value *B = LHS->getArgOperand(1);
+ Value *C = RHS->getArgOperand(0);
+ Value *D = RHS->getArgOperand(1);
+
+ // Look for a common operand.
+ Value *MinMaxOp = nullptr;
+ Value *ThirdOp = nullptr;
+ if (LHS->hasOneUse()) {
+ // If the LHS is only used in this chain and the RHS is used outside of it,
+ // reuse the RHS min/max because that will eliminate the LHS.
+ if (D == A || C == A) {
+ // min(min(a, b), min(c, a)) --> min(min(c, a), b)
+ // min(min(a, b), min(a, d)) --> min(min(a, d), b)
+ MinMaxOp = RHS;
+ ThirdOp = B;
+ } else if (D == B || C == B) {
+ // min(min(a, b), min(c, b)) --> min(min(c, b), a)
+ // min(min(a, b), min(b, d)) --> min(min(b, d), a)
+ MinMaxOp = RHS;
+ ThirdOp = A;
+ }
+ } else {
+ assert(RHS->hasOneUse() && "Expected one-use operand");
+ // Reuse the LHS. This will eliminate the RHS.
+ if (D == A || D == B) {
+ // min(min(a, b), min(c, a)) --> min(min(a, b), c)
+ // min(min(a, b), min(c, b)) --> min(min(a, b), c)
+ MinMaxOp = LHS;
+ ThirdOp = C;
+ } else if (C == A || C == B) {
+ // min(min(a, b), min(b, d)) --> min(min(a, b), d)
+ // min(min(a, b), min(c, b)) --> min(min(a, b), d)
+ MinMaxOp = LHS;
+ ThirdOp = D;
+ }
+ }
+
+ if (!MinMaxOp || !ThirdOp)
+ return nullptr;
+
+ Module *Mod = II->getModule();
+ Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
+ return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -896,7 +992,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
auto VWidth = IIFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
if (V != II)
return replaceInstUsesWith(*II, V);
@@ -1007,21 +1103,45 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
- if (match(I0, m_Not(m_Value(X)))) {
- // max (not X), (not Y) --> not (min X, Y)
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
- if (match(I1, m_Not(m_Value(Y))) &&
+ if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
+ // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
+ // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
+ // TODO: Canonicalize neg after min/max if I1 is constant.
+ if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
(I0->hasOneUse() || I1->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
- return BinaryOperator::CreateNot(InvMaxMin);
+ return BinaryOperator::CreateNSWNeg(InvMaxMin);
}
- // max (not X), C --> not(min X, ~C)
- if (match(I1, m_Constant(C)) && I0->hasOneUse()) {
- Constant *NotC = ConstantExpr::getNot(C);
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotC);
+ }
+
+ // If we can eliminate ~A and Y is free to invert:
+ // max ~A, Y --> ~(min A, ~Y)
+ //
+ // Examples:
+ // max ~A, ~Y --> ~(min A, Y)
+ // max ~A, C --> ~(min A, ~C)
+ // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
+ auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
+ Value *A;
+ if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
+ !isFreeToInvert(A, A->hasOneUse()) &&
+ isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *NotY = Builder.CreateNot(Y);
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
return BinaryOperator::CreateNot(InvMaxMin);
}
- }
+ return nullptr;
+ };
+
+ if (Instruction *I = moveNotAfterMinMax(I0, I1))
+ return I;
+ if (Instruction *I = moveNotAfterMinMax(I1, I0))
+ return I;
+
+ if (Instruction *I = moveAddAfterMinMax(II, Builder))
+ return I;
// smax(X, -X) --> abs(X)
// smin(X, -X) --> -abs(X)
@@ -1051,11 +1171,17 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
return Sel;
+ if (Instruction *SAdd = matchSAddSubSat(*II))
+ return SAdd;
+
if (match(I1, m_ImmConstant()))
if (auto *Sel = dyn_cast<SelectInst>(I0))
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
+ if (Instruction *NewMinMax = factorizeMinMaxTree(II))
+ return NewMinMax;
+
break;
}
case Intrinsic::bswap: {
@@ -1098,6 +1224,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Power->equalsInt(2))
return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
II->getArgOperand(0), II);
+
+ if (!Power->getValue()[0]) {
+ Value *X;
+ // If power is even:
+ // powi(-x, p) -> powi(x, p)
+ // powi(fabs(x), p) -> powi(x, p)
+ // powi(copysign(x, y), p) -> powi(x, p)
+ if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
+ match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
+ match(II->getArgOperand(0),
+ m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
+ return replaceOperand(*II, 0, X);
+ }
}
break;
@@ -1637,14 +1776,66 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::stackrestore: {
- // If the save is right next to the restore, remove the restore. This can
- // happen when variable allocas are DCE'd.
+ enum class ClassifyResult {
+ None,
+ Alloca,
+ StackRestore,
+ CallWithSideEffects,
+ };
+ auto Classify = [](const Instruction *I) {
+ if (isa<AllocaInst>(I))
+ return ClassifyResult::Alloca;
+
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return ClassifyResult::StackRestore;
+
+ if (II->mayHaveSideEffects())
+ return ClassifyResult::CallWithSideEffects;
+ } else {
+ // Consider all non-intrinsic calls to be side effects
+ return ClassifyResult::CallWithSideEffects;
+ }
+ }
+
+ return ClassifyResult::None;
+ };
+
+ // If the stacksave and the stackrestore are in the same BB, and there is
+ // no intervening call, alloca, or stackrestore of a different stacksave,
+ // remove the restore. This can happen when variable allocas are DCE'd.
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
- if (SS->getIntrinsicID() == Intrinsic::stacksave) {
- // Skip over debug info.
- if (SS->getNextNonDebugInstruction() == II) {
- return eraseInstFromFunction(CI);
+ if (SS->getIntrinsicID() == Intrinsic::stacksave &&
+ SS->getParent() == II->getParent()) {
+ BasicBlock::iterator BI(SS);
+ bool CannotRemove = false;
+ for (++BI; &*BI != II; ++BI) {
+ switch (Classify(&*BI)) {
+ case ClassifyResult::None:
+ // So far so good, look at next instructions.
+ break;
+
+ case ClassifyResult::StackRestore:
+ // If we found an intervening stackrestore for a different
+ // stacksave, we can't remove the stackrestore. Otherwise, continue.
+ if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
+ CannotRemove = true;
+ break;
+
+ case ClassifyResult::Alloca:
+ case ClassifyResult::CallWithSideEffects:
+ // If we found an alloca, a non-intrinsic call, or an intrinsic
+ // call with side effects, we can't remove the stackrestore.
+ CannotRemove = true;
+ break;
+ }
+ if (CannotRemove)
+ break;
}
+
+ if (!CannotRemove)
+ return eraseInstFromFunction(CI);
}
}
@@ -1654,29 +1845,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Instruction *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
- if (isa<AllocaInst>(BI)) {
+ switch (Classify(&*BI)) {
+ case ClassifyResult::None:
+ // So far so good, look at next instructions.
+ break;
+
+ case ClassifyResult::StackRestore:
+ // If there is a stackrestore below this one, remove this one.
+ return eraseInstFromFunction(CI);
+
+ case ClassifyResult::Alloca:
+ case ClassifyResult::CallWithSideEffects:
+ // If we found an alloca, a non-intrinsic call, or an intrinsic call
+ // with side effects (such as llvm.stacksave and llvm.read_register),
+ // we can't remove the stack restore.
CannotRemove = true;
break;
}
- if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
- if (auto *II2 = dyn_cast<IntrinsicInst>(BCI)) {
- // If there is a stackrestore below this one, remove this one.
- if (II2->getIntrinsicID() == Intrinsic::stackrestore)
- return eraseInstFromFunction(CI);
-
- // Bail if we cross over an intrinsic with side effects, such as
- // llvm.stacksave, or llvm.read_register.
- if (II2->mayHaveSideEffects()) {
- CannotRemove = true;
- break;
- }
- } else {
- // If we found a non-intrinsic call, we can't remove the stack
- // restore.
- CannotRemove = true;
- break;
- }
- }
+ if (CannotRemove)
+ break;
}
// If the stack restore is in a return, resume, or unwind block and if there
@@ -1963,6 +2150,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::experimental_vector_reverse: {
+ Value *BO0, *BO1, *X, *Y;
+ Value *Vec = II->getArgOperand(0);
+ if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
+ auto *OldBinOp = cast<BinaryOperator>(Vec);
+ if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(X)))) {
+ // rev(binop rev(X), rev(Y)) --> binop X, Y
+ if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(Y))))
+ return replaceInstUsesWith(CI,
+ BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), X, Y, OldBinOp,
+ OldBinOp->getName(), II));
+ // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
+ if (isSplatValue(BO1))
+ return replaceInstUsesWith(CI,
+ BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), X, BO1,
+ OldBinOp, OldBinOp->getName(), II));
+ }
+ // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
+ if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(Y))) &&
+ isSplatValue(BO0))
+ return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), BO0, Y,
+ OldBinOp, OldBinOp->getName(), II));
+ }
+ // rev(unop rev(X)) --> unop X
+ if (match(Vec, m_OneUse(m_UnOp(
+ m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(X)))))) {
+ auto *OldUnOp = cast<UnaryOperator>(Vec);
+ auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
+ OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
+ return replaceInstUsesWith(CI, NewUnOp);
+ }
+ break;
+ }
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_and: {
// Canonicalize logical or/and reductions:
@@ -1973,21 +2200,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
// %res = cmp eq iReduxWidth %val, 11111
Value *Arg = II->getArgOperand(0);
- Type *RetTy = II->getType();
- if (RetTy == Builder.getInt1Ty())
- if (auto *FVTy = dyn_cast<FixedVectorType>(Arg->getType())) {
- Value *Res = Builder.CreateBitCast(
- Arg, Builder.getIntNTy(FVTy->getNumElements()));
- if (IID == Intrinsic::vector_reduce_and) {
- Res = Builder.CreateICmpEQ(
- Res, ConstantInt::getAllOnesValue(Res->getType()));
- } else {
- assert(IID == Intrinsic::vector_reduce_or &&
- "Expected or reduction.");
- Res = Builder.CreateIsNotNull(Res);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = Builder.CreateBitCast(
+ Vect, Builder.getIntNTy(FTy->getNumElements()));
+ if (IID == Intrinsic::vector_reduce_and) {
+ Res = Builder.CreateICmpEQ(
+ Res, ConstantInt::getAllOnesValue(Res->getType()));
+ } else {
+ assert(IID == Intrinsic::vector_reduce_or &&
+ "Expected or reduction.");
+ Res = Builder.CreateIsNotNull(Res);
+ }
+ if (Arg != Vect)
+ Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
+ II->getType());
+ return replaceInstUsesWith(CI, Res);
}
- return replaceInstUsesWith(CI, Res);
- }
+ }
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_add: {
@@ -2017,12 +2249,117 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
LLVM_FALLTHROUGH;
}
- case Intrinsic::vector_reduce_mul:
- case Intrinsic::vector_reduce_xor:
- case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_xor: {
+ if (IID == Intrinsic::vector_reduce_xor) {
+ // Exclusive disjunction reduction over the vector with
+ // (potentially-extended) i1 element type is actually a
+ // (potentially-extended) arithmetic `add` reduction over the original
+ // non-extended value:
+ // vector_reduce_xor(?ext(<n x i1>))
+ // -->
+ // ?ext(vector_reduce_add(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = Builder.CreateAddReduce(Vect);
+ if (Arg != Vect)
+ Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
+ II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::vector_reduce_mul: {
+ if (IID == Intrinsic::vector_reduce_mul) {
+ // Multiplicative reduction over the vector with (potentially-extended)
+ // i1 element type is actually a (potentially zero-extended)
+ // logical `and` reduction over the original non-extended value:
+ // vector_reduce_mul(?ext(<n x i1>))
+ // -->
+ // zext(vector_reduce_and(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = Builder.CreateAndReduce(Vect);
+ if (Res->getType() != II->getType())
+ Res = Builder.CreateZExt(Res, II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::vector_reduce_umin:
- case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umax: {
+ if (IID == Intrinsic::vector_reduce_umin ||
+ IID == Intrinsic::vector_reduce_umax) {
+ // UMin/UMax reduction over the vector with (potentially-extended)
+ // i1 element type is actually a (potentially-extended)
+ // logical `and`/`or` reduction over the original non-extended value:
+ // vector_reduce_u{min,max}(?ext(<n x i1>))
+ // -->
+ // ?ext(vector_reduce_{and,or}(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = IID == Intrinsic::vector_reduce_umin
+ ? Builder.CreateAndReduce(Vect)
+ : Builder.CreateOrReduce(Vect);
+ if (Arg != Vect)
+ Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
+ II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax: {
+ if (IID == Intrinsic::vector_reduce_smin ||
+ IID == Intrinsic::vector_reduce_smax) {
+ // SMin/SMax reduction over the vector with (potentially-extended)
+ // i1 element type is actually a (potentially-extended)
+ // logical `and`/`or` reduction over the original non-extended value:
+ // vector_reduce_s{min,max}(<n x i1>)
+ // -->
+ // vector_reduce_{or,and}(<n x i1>)
+ // and
+ // vector_reduce_s{min,max}(sext(<n x i1>))
+ // -->
+ // sext(vector_reduce_{or,and}(<n x i1>))
+ // and
+ // vector_reduce_s{min,max}(zext(<n x i1>))
+ // -->
+ // zext(vector_reduce_{and,or}(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
+ if (Arg != Vect)
+ ExtOpc = cast<CastInst>(Arg)->getOpcode();
+ Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
+ (ExtOpc == Instruction::CastOps::ZExt))
+ ? Builder.CreateAndReduce(Vect)
+ : Builder.CreateOrReduce(Vect);
+ if (Arg != Vect)
+ Res = Builder.CreateCast(ExtOpc, Res, II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
case Intrinsic::vector_reduce_fadd:
@@ -2228,7 +2565,7 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
}
void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
- unsigned NumArgs = Call.getNumArgOperands();
+ unsigned NumArgs = Call.arg_size();
ConstantInt *Op0C = dyn_cast<ConstantInt>(Call.getOperand(0));
ConstantInt *Op1C =
(NumArgs == 1) ? nullptr : dyn_cast<ConstantInt>(Call.getOperand(1));
@@ -2239,55 +2576,46 @@ void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryI
if (isMallocLikeFn(&Call, TLI) && Op0C) {
if (isOpNewLikeFn(&Call, TLI))
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableBytes(
- Call.getContext(), Op0C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableBytes(
+ Call.getContext(), Op0C->getZExtValue()));
else
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op0C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op0C->getZExtValue()));
} else if (isAlignedAllocLikeFn(&Call, TLI)) {
if (Op1C)
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op1C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op1C->getZExtValue()));
// Add alignment attribute if alignment is a power of two constant.
if (Op0C && Op0C->getValue().ult(llvm::Value::MaximumAlignment) &&
isKnownNonZero(Call.getOperand(1), DL, 0, &AC, &Call, &DT)) {
uint64_t AlignmentVal = Op0C->getZExtValue();
if (llvm::isPowerOf2_64(AlignmentVal)) {
- Call.removeAttribute(AttributeList::ReturnIndex, Attribute::Alignment);
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithAlignment(Call.getContext(),
- Align(AlignmentVal)));
+ Call.removeRetAttr(Attribute::Alignment);
+ Call.addRetAttr(Attribute::getWithAlignment(Call.getContext(),
+ Align(AlignmentVal)));
}
}
} else if (isReallocLikeFn(&Call, TLI) && Op1C) {
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op1C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op1C->getZExtValue()));
} else if (isCallocLikeFn(&Call, TLI) && Op0C && Op1C) {
bool Overflow;
const APInt &N = Op0C->getValue();
APInt Size = N.umul_ov(Op1C->getValue(), Overflow);
if (!Overflow)
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Size.getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Size.getZExtValue()));
} else if (isStrdupLikeFn(&Call, TLI)) {
uint64_t Len = GetStringLength(Call.getOperand(0));
if (Len) {
// strdup
if (NumArgs == 1)
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Len));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Len));
// strndup
else if (NumArgs == 2 && Op1C)
- Call.addAttribute(
- AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1)));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1)));
}
}
}
@@ -2489,7 +2817,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
// isKnownNonNull -> nonnull attribute
if (!GCR.hasRetAttr(Attribute::NonNull) &&
isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
- GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ GCR.addRetAttr(Attribute::NonNull);
// We discovered new fact, re-check users.
Worklist.pushUsersToWorkList(GCR);
}
@@ -2646,19 +2974,19 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
- if (AttrBuilder(CallerPAL.getParamAttributes(i))
+ if (AttrBuilder(CallerPAL.getParamAttrs(i))
.overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (Call.isInAllocaArgument(i))
return false; // Cannot transform to and from inalloca.
- if (CallerPAL.hasParamAttribute(i, Attribute::SwiftError))
+ if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
return false;
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
- if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
+ if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
@@ -2699,7 +3027,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// that are compatible with being a vararg call argument.
unsigned SRetIdx;
if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
- SRetIdx > FT->getNumParams())
+ SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
return false;
}
@@ -2728,12 +3056,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Args.push_back(NewArg);
// Add any parameter attributes.
- if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
- AttrBuilder AB(CallerPAL.getParamAttributes(i));
+ if (CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
+ AttrBuilder AB(CallerPAL.getParamAttrs(i));
AB.addByValAttr(NewArg->getType()->getPointerElementType());
ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
} else
- ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+ ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
}
// If the function takes more arguments than the call was taking, add them
@@ -2760,12 +3088,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Args.push_back(NewArg);
// Add any parameter attributes.
- ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+ ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
}
}
}
- AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+ AttributeSet FnAttrs = CallerPAL.getFnAttrs();
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
@@ -2866,7 +3194,7 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
for (FunctionType::param_iterator I = NestFTy->param_begin(),
E = NestFTy->param_end();
I != E; ++NestArgNo, ++I) {
- AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
+ AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
if (AS.hasAttribute(Attribute::Nest)) {
// Record the parameter type and any other attributes.
NestTy = *I;
@@ -2902,7 +3230,7 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
// Add the original argument and attributes.
NewArgs.push_back(*I);
- NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
+ NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
++ArgNo;
++I;
@@ -2948,8 +3276,8 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
AttributeList NewPAL =
- AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), NewArgAttrs);
+ AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), NewArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
Call.getOperandBundlesAsDefs(OpBundles);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 04877bec94ec..ca87477c5d81 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -333,7 +333,7 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
SrcTy->getNumElements() == DestTy->getNumElements() &&
SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
- return new ShuffleVectorInst(CastX, UndefValue::get(DestTy), Mask);
+ return new ShuffleVectorInst(CastX, Mask);
}
}
@@ -701,10 +701,10 @@ static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
is_splat(Shuf->getShuffleMask()) &&
Shuf->getType() == Shuf->getOperand(0)->getType()) {
- // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
- Constant *NarrowUndef = UndefValue::get(Trunc.getType());
+ // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
+ // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
- return new ShuffleVectorInst(NarrowOp, NarrowUndef, Shuf->getShuffleMask());
+ return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
}
return nullptr;
@@ -961,14 +961,25 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
}
}
+
+ if (match(Src, m_VScale(DL))) {
+ if (Trunc.getFunction() &&
+ Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned MaxVScale = Trunc.getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (MaxVScale > 0 && Log2_32(MaxVScale) < DestWidth) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(Trunc, VScale);
+ }
+ }
+ }
+
return nullptr;
}
-/// Transform (zext icmp) to bitwise / integer operations in order to
-/// eliminate it. If DoTransform is false, just test whether the given
-/// (zext icmp) can be transformed.
-Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
- bool DoTransform) {
+Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
@@ -977,10 +988,8 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
- if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
- (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
- if (!DoTransform) return Cmp;
-
+ if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) ||
+ (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) {
Value *In = Cmp->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
@@ -1004,7 +1013,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
// zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
- if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
+ if ((Op1CV->isZero() || Op1CV->isPowerOf2()) &&
// This only works for EQ and NE
Cmp->isEquality()) {
// If Op1C some other power of two, convert:
@@ -1012,10 +1021,8 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
- if (!DoTransform) return Cmp;
-
bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
- if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
+ if (!Op1CV->isZero() && (*Op1CV != KnownZeroMask)) {
// (X&4) == 2 --> false
// (X&4) != 2 --> true
Constant *Res = ConstantInt::get(Zext.getType(), isNE);
@@ -1031,7 +1038,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
In->getName() + ".lobit");
}
- if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
+ if (!Op1CV->isZero() == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder.CreateXor(In, One);
}
@@ -1053,9 +1060,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
match(Cmp->getOperand(0),
m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
- if (!DoTransform)
- return Cmp;
-
if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
X = Builder.CreateNot(X);
Value *Lshr = Builder.CreateLShr(X, ShAmt);
@@ -1077,8 +1081,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
- if (!DoTransform) return Cmp;
-
Value *Result = Builder.CreateXor(LHS, RHS);
// Mask off any bits that are set and won't be shifted away.
@@ -1316,51 +1318,37 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
return transformZExtICmp(Cmp, CI);
- BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
- if (SrcI && SrcI->getOpcode() == Instruction::Or) {
- // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
- // of the (zext icmp) can be eliminated. If so, immediately perform the
- // according elimination.
- ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
- ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
- if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
- LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
- (transformZExtICmp(LHS, CI, false) ||
- transformZExtICmp(RHS, CI, false))) {
- // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
- Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
- Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
- Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
- if (auto *OrInst = dyn_cast<Instruction>(Or))
- Builder.SetInsertPoint(OrInst);
-
- // Perform the elimination.
- if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
- transformZExtICmp(LHS, *LZExt);
- if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
- transformZExtICmp(RHS, *RZExt);
-
- return replaceInstUsesWith(CI, Or);
- }
- }
-
// zext(trunc(X) & C) -> (X & zext(C)).
Constant *C;
Value *X;
- if (SrcI &&
- match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
+ if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
X->getType() == CI.getType())
return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
Value *And;
- if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
+ if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
X->getType() == CI.getType()) {
Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
}
+ if (match(Src, m_VScale(DL))) {
+ if (CI.getFunction() &&
+ CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned MaxVScale = CI.getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
+ if (MaxVScale > 0 && Log2_32(MaxVScale) < TypeWidth) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(CI, VScale);
+ }
+ }
+ }
+
return nullptr;
}
@@ -1605,6 +1593,32 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
return BinaryOperator::CreateAShr(A, NewShAmt);
}
+ // Splatting a bit of constant-index across a value:
+ // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
+ // TODO: If the dest type is different, use a cast (adjust use check).
+ if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
+ m_SpecificInt(SrcBitSize - 1)))) &&
+ X->getType() == DestTy) {
+ Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
+ Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1);
+ Value *Shl = Builder.CreateShl(X, ShlAmtC);
+ return BinaryOperator::CreateAShr(Shl, AshrAmtC);
+ }
+
+ if (match(Src, m_VScale(DL))) {
+ if (CI.getFunction() &&
+ CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned MaxVScale = CI.getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (MaxVScale > 0 && Log2_32(MaxVScale) < (SrcBitSize - 1)) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(CI, VScale);
+ }
+ }
+ }
+
return nullptr;
}
@@ -2060,6 +2074,19 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
+ // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
+ // While this can increase the number of instructions it doesn't actually
+ // increase the overall complexity since the arithmetic is just part of
+ // the GEP otherwise.
+ if (GEP->hasOneUse() &&
+ isa<ConstantPointerNull>(GEP->getPointerOperand())) {
+ return replaceInstUsesWith(CI,
+ Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
+ /*isSigned=*/false));
+ }
+ }
+
Value *Vec, *Scalar, *Index;
if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
m_Value(Scalar), m_Value(Index)))) &&
@@ -2133,9 +2160,9 @@ optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
if (SrcElts > DestElts) {
// If we're shrinking the number of elements (rewriting an integer
// truncate), just shuffle in the elements corresponding to the least
- // significant bits from the input and use undef as the second shuffle
+ // significant bits from the input and use poison as the second shuffle
// input.
- V2 = UndefValue::get(SrcTy);
+ V2 = PoisonValue::get(SrcTy);
// Make sure the shuffle mask selects the "least significant bits" by
// keeping elements from back of the src vector for big endian, and from the
// front for little endian.
@@ -2528,7 +2555,7 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
// As long as the user is another old PHI node, then even if we don't
// rewrite it, the PHI web we're considering won't have any users
// outside itself, so it'll be dead.
- if (OldPhiNodes.count(PHI) == 0)
+ if (!OldPhiNodes.contains(PHI))
return nullptr;
} else {
return nullptr;
@@ -2736,6 +2763,30 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
return new BitCastInst(InsElt->getOperand(1), DestTy);
}
+
+ // Convert an artificial vector insert into more analyzable bitwise logic.
+ unsigned BitWidth = DestTy->getScalarSizeInBits();
+ Value *X, *Y;
+ uint64_t IndexC;
+ if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
+ m_Value(Y), m_ConstantInt(IndexC)))) &&
+ DestTy->isIntegerTy() && X->getType() == DestTy &&
+ isDesirableIntType(BitWidth)) {
+ // Adjust for big endian - the LSBs are at the high index.
+ if (DL.isBigEndian())
+ IndexC = SrcVTy->getNumElements() - 1 - IndexC;
+
+ // We only handle (endian-normalized) insert to index 0. Any other insert
+ // would require a left-shift, so that is an extra instruction.
+ if (IndexC == 0) {
+ // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
+ unsigned EltWidth = Y->getType()->getScalarSizeInBits();
+ APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
+ Value *AndX = Builder.CreateAnd(X, MaskC);
+ Value *ZextY = Builder.CreateZExt(Y, DestTy);
+ return BinaryOperator::CreateOr(AndX, ZextY);
+ }
+ }
}
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2b0ef0c5f2cc..7a9e177f19da 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -78,15 +78,15 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
if (!ICmpInst::isSigned(Pred))
return false;
- if (C.isNullValue())
+ if (C.isZero())
return ICmpInst::isRelational(Pred);
- if (C.isOneValue()) {
+ if (C.isOne()) {
if (Pred == ICmpInst::ICMP_SLT) {
Pred = ICmpInst::ICMP_SLE;
return true;
}
- } else if (C.isAllOnesValue()) {
+ } else if (C.isAllOnes()) {
if (Pred == ICmpInst::ICMP_SGT) {
Pred = ICmpInst::ICMP_SGE;
return true;
@@ -541,7 +541,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
if (!CI->isNoopCast(DL))
return false;
- if (Explored.count(CI->getOperand(0)) == 0)
+ if (!Explored.contains(CI->getOperand(0)))
WorkList.push_back(CI->getOperand(0));
}
@@ -553,7 +553,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
GEP->getType() != Start->getType())
return false;
- if (Explored.count(GEP->getOperand(0)) == 0)
+ if (!Explored.contains(GEP->getOperand(0)))
WorkList.push_back(GEP->getOperand(0));
}
@@ -575,7 +575,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
// Explore the PHI nodes further.
for (auto *PN : PHIs)
for (Value *Op : PN->incoming_values())
- if (Explored.count(Op) == 0)
+ if (!Explored.contains(Op))
WorkList.push_back(Op);
}
@@ -589,7 +589,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
auto *Inst = dyn_cast<Instruction>(Val);
if (Inst == Base || Inst == PHI || !Inst || !PHI ||
- Explored.count(PHI) == 0)
+ !Explored.contains(PHI))
continue;
if (PHI->getParent() == Inst->getParent())
@@ -1147,12 +1147,12 @@ Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
};
// Don't bother doing any work for cases which InstSimplify handles.
- if (AP2.isNullValue())
+ if (AP2.isZero())
return nullptr;
bool IsAShr = isa<AShrOperator>(I.getOperand(0));
if (IsAShr) {
- if (AP2.isAllOnesValue())
+ if (AP2.isAllOnes())
return nullptr;
if (AP2.isNegative() != AP1.isNegative())
return nullptr;
@@ -1178,7 +1178,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
if (IsAShr && AP1 == AP2.ashr(Shift)) {
// There are multiple solutions if we are comparing against -1 and the LHS
// of the ashr is not a power of two.
- if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+ if (AP1.isAllOnes() && !AP2.isPowerOf2())
return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
} else if (AP1 == AP2.lshr(Shift)) {
@@ -1206,7 +1206,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
};
// Don't bother doing any work for cases which InstSimplify handles.
- if (AP2.isNullValue())
+ if (AP2.isZero())
return nullptr;
unsigned AP2TrailingZeros = AP2.countTrailingZeros();
@@ -1270,9 +1270,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// This is only really a signed overflow check if the inputs have been
// sign-extended; check for that condition. For example, if CI2 is 2^31 and
// the operands of the add are 64 bits wide, we need at least 33 sign bits.
- unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
- if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
- IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
+ if (IC.ComputeMinSignedBits(A, 0, &I) > NewWidth ||
+ IC.ComputeMinSignedBits(B, 0, &I) > NewWidth)
return nullptr;
// In order to replace the original add with a narrower
@@ -1544,7 +1543,7 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Trunc->getOperand(0);
- if (C.isOneValue() && C.getBitWidth() > 1) {
+ if (C.isOne() && C.getBitWidth() > 1) {
// icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
Value *V = nullptr;
if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
@@ -1725,7 +1724,7 @@ Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
// Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is
// preferable because it allows the C2 << Y expression to be hoisted out of a
// loop if Y is invariant and X is not.
- if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
+ if (Shift->hasOneUse() && C1.isZero() && Cmp.isEquality() &&
!Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
// Compute C2 << Y.
Value *NewShift =
@@ -1749,7 +1748,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
// For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
// TODO: We canonicalize to the longer form for scalars because we have
// better analysis/folds for icmp, and codegen may be better with icmp.
- if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() &&
+ if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isZero() &&
match(And->getOperand(1), m_One()))
return new TruncInst(And->getOperand(0), Cmp.getType());
@@ -1762,7 +1761,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
if (!And->hasOneUse())
return nullptr;
- if (Cmp.isEquality() && C1.isNullValue()) {
+ if (Cmp.isEquality() && C1.isZero()) {
// Restrict this fold to single-use 'and' (PR10267).
// Replace (and X, (1 << size(X)-1) != 0) with X s< 0
if (C2->isSignMask()) {
@@ -1812,7 +1811,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
// (icmp pred (and A, (or (shl 1, B), 1), 0))
//
// iff pred isn't signed
- if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
+ if (!Cmp.isSigned() && C1.isZero() && And->getOperand(0)->hasOneUse() &&
match(And->getOperand(1), m_One())) {
Constant *One = cast<Constant>(And->getOperand(1));
Value *Or = And->getOperand(0);
@@ -1889,7 +1888,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
// X & -C == -C -> X > u ~C
// X & -C != -C -> X <= u ~C
// iff C is a power of 2
- if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
+ if (Cmp.getOperand(1) == Y && C.isNegatedPowerOf2()) {
auto NewPred =
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
@@ -1899,7 +1898,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
// (X & C2) != 0 -> (trunc X) < 0
// iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
const APInt *C2;
- if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
+ if (And->hasOneUse() && C.isZero() && match(Y, m_APInt(C2))) {
int32_t ExactLogBase2 = C2->exactLogBase2();
if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
@@ -1920,7 +1919,7 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
BinaryOperator *Or,
const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
- if (C.isOneValue()) {
+ if (C.isOne()) {
// icmp slt signum(V) 1 --> icmp slt V, 1
Value *V = nullptr;
if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
@@ -1950,7 +1949,18 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
}
}
- if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
+ // (X | (X-1)) s< 0 --> X s< 1
+ // (X | (X-1)) s> -1 --> X s> 0
+ Value *X;
+ bool TrueIfSigned;
+ if (isSignBitCheck(Pred, C, TrueIfSigned) &&
+ match(Or, m_c_Or(m_Add(m_Value(X), m_AllOnes()), m_Deferred(X)))) {
+ auto NewPred = TrueIfSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT;
+ Constant *NewC = ConstantInt::get(X->getType(), TrueIfSigned ? 1 : 0);
+ return new ICmpInst(NewPred, X, NewC);
+ }
+
+ if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse())
return nullptr;
Value *P, *Q;
@@ -2001,14 +2011,14 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
// If the multiply does not wrap, try to divide the compare constant by the
// multiplication factor.
- if (Cmp.isEquality() && !MulC->isNullValue()) {
+ if (Cmp.isEquality() && !MulC->isZero()) {
// (mul nsw X, MulC) == C --> X == C /s MulC
- if (Mul->hasNoSignedWrap() && C.srem(*MulC).isNullValue()) {
+ if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC));
return new ICmpInst(Pred, Mul->getOperand(0), NewC);
}
// (mul nuw X, MulC) == C --> X == C /u MulC
- if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isNullValue()) {
+ if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC));
return new ICmpInst(Pred, Mul->getOperand(0), NewC);
}
@@ -2053,7 +2063,7 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
} else if (Cmp.isSigned()) {
Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
- if (C.isAllOnesValue()) {
+ if (C.isAllOnes()) {
// (1 << Y) <= -1 -> Y == 31
if (Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
@@ -2227,8 +2237,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
// icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
Value *X = Shr->getOperand(0);
CmpInst::Predicate Pred = Cmp.getPredicate();
- if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
- C.isNullValue())
+ if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && C.isZero())
return new ICmpInst(Pred, X, Cmp.getOperand(1));
const APInt *ShiftVal;
@@ -2316,7 +2325,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
if (Shr->isExact())
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
- if (C.isNullValue()) {
+ if (C.isZero()) {
// == 0 is u< 1.
if (Pred == CmpInst::ICMP_EQ)
return new ICmpInst(CmpInst::ICMP_ULT, X,
@@ -2355,7 +2364,7 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
return nullptr;
const APInt *DivisorC;
- if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
+ if (!C.isZero() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
return nullptr;
// Mask off the sign bit and the modulo bits (low-bits).
@@ -2435,8 +2444,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
// INT_MIN will also fail if the divisor is 1. Although folds of all these
// division-by-constant cases should be present, we can not assert that they
// have happened before we reach this icmp instruction.
- if (C2->isNullValue() || C2->isOneValue() ||
- (DivIsSigned && C2->isAllOnesValue()))
+ if (C2->isZero() || C2->isOne() || (DivIsSigned && C2->isAllOnes()))
return nullptr;
// Compute Prod = C * C2. We are essentially solving an equation of
@@ -2476,16 +2484,16 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
}
} else if (C2->isStrictlyPositive()) { // Divisor is > 0.
- if (C.isNullValue()) { // (X / pos) op 0
+ if (C.isZero()) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
LoBound = -(RangeSize - 1);
HiBound = RangeSize;
- } else if (C.isStrictlyPositive()) { // (X / pos) op pos
+ } else if (C.isStrictlyPositive()) { // (X / pos) op pos
LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow)
HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
- } else { // (X / pos) op neg
+ } else { // (X / pos) op neg
// e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
HiBound = Prod + 1;
LoOverflow = HiOverflow = ProdOV ? -1 : 0;
@@ -2497,7 +2505,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
} else if (C2->isNegative()) { // Divisor is < 0.
if (Div->isExact())
RangeSize.negate();
- if (C.isNullValue()) { // (X / neg) op 0
+ if (C.isZero()) { // (X / neg) op 0
// e.g. X/-5 op 0 --> [-4, 5)
LoBound = RangeSize + 1;
HiBound = -RangeSize;
@@ -2505,13 +2513,13 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
HiOverflow = 1; // [INTMIN+1, overflow)
HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
}
- } else if (C.isStrictlyPositive()) { // (X / neg) op pos
+ } else if (C.isStrictlyPositive()) { // (X / neg) op pos
// e.g. X/-5 op 3 --> [-19, -14)
HiBound = Prod + 1;
HiOverflow = LoOverflow = ProdOV ? -1 : 0;
if (!LoOverflow)
LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
- } else { // (X / neg) op neg
+ } else { // (X / neg) op neg
LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
LoOverflow = HiOverflow = ProdOV;
if (!HiOverflow)
@@ -2581,42 +2589,54 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
const APInt &C) {
Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
ICmpInst::Predicate Pred = Cmp.getPredicate();
- const APInt *C2;
- APInt SubResult;
+ Type *Ty = Sub->getType();
- // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0
- if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality())
- return new ICmpInst(Cmp.getPredicate(), Y,
- ConstantInt::get(Y->getType(), 0));
+ // (SubC - Y) == C) --> Y == (SubC - C)
+ // (SubC - Y) != C) --> Y != (SubC - C)
+ Constant *SubC;
+ if (Cmp.isEquality() && match(X, m_ImmConstant(SubC))) {
+ return new ICmpInst(Pred, Y,
+ ConstantExpr::getSub(SubC, ConstantInt::get(Ty, C)));
+ }
// (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
+ const APInt *C2;
+ APInt SubResult;
+ ICmpInst::Predicate SwappedPred = Cmp.getSwappedPredicate();
+ bool HasNSW = Sub->hasNoSignedWrap();
+ bool HasNUW = Sub->hasNoUnsignedWrap();
if (match(X, m_APInt(C2)) &&
- ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
- (Cmp.isSigned() && Sub->hasNoSignedWrap())) &&
+ ((Cmp.isUnsigned() && HasNUW) || (Cmp.isSigned() && HasNSW)) &&
!subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
- return new ICmpInst(Cmp.getSwappedPredicate(), Y,
- ConstantInt::get(Y->getType(), SubResult));
+ return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult));
// The following transforms are only worth it if the only user of the subtract
// is the icmp.
+ // TODO: This is an artificial restriction for all of the transforms below
+ // that only need a single replacement icmp.
if (!Sub->hasOneUse())
return nullptr;
+ // X - Y == 0 --> X == Y.
+ // X - Y != 0 --> X != Y.
+ if (Cmp.isEquality() && C.isZero())
+ return new ICmpInst(Pred, X, Y);
+
if (Sub->hasNoSignedWrap()) {
// (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
- if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
+ if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
// (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
- if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
+ if (Pred == ICmpInst::ICMP_SGT && C.isZero())
return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
// (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
- if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
+ if (Pred == ICmpInst::ICMP_SLT && C.isZero())
return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
// (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
- if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
+ if (Pred == ICmpInst::ICMP_SLT && C.isOne())
return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
}
@@ -2634,7 +2654,12 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
- return nullptr;
+ // We have handled special cases that reduce.
+ // Canonicalize any remaining sub to add as:
+ // (C2 - Y) > C --> (Y + ~C2) < ~C
+ Value *Add = Builder.CreateAdd(Y, ConstantInt::get(Ty, ~(*C2)), "notsub",
+ HasNUW, HasNSW);
+ return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C));
}
/// Fold icmp (add X, Y), C.
@@ -2723,6 +2748,14 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
ConstantExpr::getNeg(cast<Constant>(Y)));
+ // The range test idiom can use either ult or ugt. Arbitrarily canonicalize
+ // to the ult form.
+ // X+C2 >u C -> X+(C2-C-1) <u ~C
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_ULT,
+ Builder.CreateAdd(X, ConstantInt::get(Ty, *C2 - C - 1)),
+ ConstantInt::get(Ty, ~C));
+
return nullptr;
}
@@ -2830,8 +2863,7 @@ Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp,
return nullptr;
}
-static Instruction *foldICmpBitCast(ICmpInst &Cmp,
- InstCombiner::BuilderTy &Builder) {
+Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
if (!Bitcast)
return nullptr;
@@ -2917,6 +2949,39 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
return new ICmpInst(Pred, BCSrcOp, Op1);
}
+ const APInt *C;
+ if (!match(Cmp.getOperand(1), m_APInt(C)) ||
+ !Bitcast->getType()->isIntegerTy() ||
+ !Bitcast->getSrcTy()->isIntOrIntVectorTy())
+ return nullptr;
+
+ // If this is checking if all elements of a vector compare are set or not,
+ // invert the casted vector equality compare and test if all compare
+ // elements are clear or not. Compare against zero is generally easier for
+ // analysis and codegen.
+ // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0
+ // Example: are all elements equal? --> are zero elements not equal?
+ // TODO: Try harder to reduce compare of 2 freely invertible operands?
+ if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() &&
+ isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) {
+ Type *ScalarTy = Bitcast->getType();
+ Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), ScalarTy);
+ return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
+ }
+
+ // If this is checking if all elements of an extended vector are clear or not,
+ // compare in a narrow type to eliminate the extend:
+ // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
+ Value *X;
+ if (Cmp.isEquality() && C->isZero() && Bitcast->hasOneUse() &&
+ match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
+ Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
+ Value *NewCast = Builder.CreateBitCast(X, NewType);
+ return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
+ }
+ }
+
// Folding: icmp <pred> iN X, C
// where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
// and C is a splat of a K-bit pattern
@@ -2924,12 +2989,6 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
// Into:
// %E = extractelement <M x iK> %vec, i32 C'
// icmp <pred> iK %E, trunc(C)
- const APInt *C;
- if (!match(Cmp.getOperand(1), m_APInt(C)) ||
- !Bitcast->getType()->isIntegerTy() ||
- !Bitcast->getSrcTy()->isIntOrIntVectorTy())
- return nullptr;
-
Value *Vec;
ArrayRef<int> Mask;
if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
@@ -3055,7 +3114,7 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
switch (BO->getOpcode()) {
case Instruction::SRem:
// If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
- if (C.isNullValue() && BO->hasOneUse()) {
+ if (C.isZero() && BO->hasOneUse()) {
const APInt *BOC;
if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
@@ -3069,7 +3128,7 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
if (BO->hasOneUse())
return new ICmpInst(Pred, BOp0, ConstantExpr::getSub(RHS, BOC));
- } else if (C.isNullValue()) {
+ } else if (C.isZero()) {
// Replace ((add A, B) != 0) with (A != -B) if A or B is
// efficiently invertible, or if the add has just this one use.
if (Value *NegVal = dyn_castNegVal(BOp1))
@@ -3090,25 +3149,12 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
// For the xor case, we can xor two constants together, eliminating
// the explicit xor.
return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
- } else if (C.isNullValue()) {
+ } else if (C.isZero()) {
// Replace ((xor A, B) != 0) with (A != B)
return new ICmpInst(Pred, BOp0, BOp1);
}
}
break;
- case Instruction::Sub:
- if (BO->hasOneUse()) {
- // Only check for constant LHS here, as constant RHS will be canonicalized
- // to add and use the fold above.
- if (Constant *BOC = dyn_cast<Constant>(BOp0)) {
- // Replace ((sub BOC, B) != C) with (B != BOC-C).
- return new ICmpInst(Pred, BOp1, ConstantExpr::getSub(BOC, RHS));
- } else if (C.isNullValue()) {
- // Replace ((sub A, B) != 0) with (A != B).
- return new ICmpInst(Pred, BOp0, BOp1);
- }
- }
- break;
case Instruction::Or: {
const APInt *BOC;
if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
@@ -3132,7 +3178,7 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
break;
}
case Instruction::UDiv:
- if (C.isNullValue()) {
+ if (C.isZero()) {
// (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
return new ICmpInst(NewPred, BOp1, BOp0);
@@ -3149,25 +3195,26 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
+ const ICmpInst::Predicate Pred = Cmp.getPredicate();
+
switch (II->getIntrinsicID()) {
case Intrinsic::abs:
// abs(A) == 0 -> A == 0
// abs(A) == INT_MIN -> A == INT_MIN
- if (C.isNullValue() || C.isMinSignedValue())
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
- ConstantInt::get(Ty, C));
+ if (C.isZero() || C.isMinSignedValue())
+ return new ICmpInst(Pred, II->getArgOperand(0), ConstantInt::get(Ty, C));
break;
case Intrinsic::bswap:
// bswap(A) == C -> A == bswap(C)
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
+ return new ICmpInst(Pred, II->getArgOperand(0),
ConstantInt::get(Ty, C.byteSwap()));
case Intrinsic::ctlz:
case Intrinsic::cttz: {
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
if (C == BitWidth)
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
+ return new ICmpInst(Pred, II->getArgOperand(0),
ConstantInt::getNullValue(Ty));
// ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
@@ -3181,9 +3228,8 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
APInt Mask2 = IsTrailing
? APInt::getOneBitSet(BitWidth, Num)
: APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
- return new ICmpInst(Cmp.getPredicate(),
- Builder.CreateAnd(II->getArgOperand(0), Mask1),
- ConstantInt::get(Ty, Mask2));
+ return new ICmpInst(Pred, Builder.CreateAnd(II->getArgOperand(0), Mask1),
+ ConstantInt::get(Ty, Mask2));
}
break;
}
@@ -3191,28 +3237,49 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
case Intrinsic::ctpop: {
// popcount(A) == 0 -> A == 0 and likewise for !=
// popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
- bool IsZero = C.isNullValue();
+ bool IsZero = C.isZero();
if (IsZero || C == BitWidth)
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
- IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty));
+ return new ICmpInst(Pred, II->getArgOperand(0),
+ IsZero ? Constant::getNullValue(Ty)
+ : Constant::getAllOnesValue(Ty));
break;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ if (II->getArgOperand(0) == II->getArgOperand(1)) {
+ // (rot X, ?) == 0/-1 --> X == 0/-1
+ // TODO: This transform is safe to re-use undef elts in a vector, but
+ // the constant value passed in by the caller doesn't allow that.
+ if (C.isZero() || C.isAllOnes())
+ return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1));
+
+ const APInt *RotAmtC;
+ // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC)
+ // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC)
+ if (match(II->getArgOperand(2), m_APInt(RotAmtC)))
+ return new ICmpInst(Pred, II->getArgOperand(0),
+ II->getIntrinsicID() == Intrinsic::fshl
+ ? ConstantInt::get(Ty, C.rotr(*RotAmtC))
+ : ConstantInt::get(Ty, C.rotl(*RotAmtC)));
+ }
+ break;
+
case Intrinsic::uadd_sat: {
// uadd.sat(a, b) == 0 -> (a | b) == 0
- if (C.isNullValue()) {
+ if (C.isZero()) {
Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
- return new ICmpInst(Cmp.getPredicate(), Or, Constant::getNullValue(Ty));
+ return new ICmpInst(Pred, Or, Constant::getNullValue(Ty));
}
break;
}
case Intrinsic::usub_sat: {
// usub.sat(a, b) == 0 -> a <= b
- if (C.isNullValue()) {
- ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ
- ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
+ if (C.isZero()) {
+ ICmpInst::Predicate NewPred =
+ Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
return new ICmpInst(NewPred, II->getArgOperand(0), II->getArgOperand(1));
}
break;
@@ -3224,6 +3291,42 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
return nullptr;
}
+/// Fold an icmp with LLVM intrinsics
+static Instruction *foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp) {
+ assert(Cmp.isEquality());
+
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Op0 = Cmp.getOperand(0);
+ Value *Op1 = Cmp.getOperand(1);
+ const auto *IIOp0 = dyn_cast<IntrinsicInst>(Op0);
+ const auto *IIOp1 = dyn_cast<IntrinsicInst>(Op1);
+ if (!IIOp0 || !IIOp1 || IIOp0->getIntrinsicID() != IIOp1->getIntrinsicID())
+ return nullptr;
+
+ switch (IIOp0->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ case Intrinsic::bitreverse:
+ // If both operands are byte-swapped or bit-reversed, just compare the
+ // original values.
+ return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ // If both operands are rotated by same amount, just compare the
+ // original values.
+ if (IIOp0->getOperand(0) != IIOp0->getOperand(1))
+ break;
+ if (IIOp1->getOperand(0) != IIOp1->getOperand(1))
+ break;
+ if (IIOp0->getOperand(2) != IIOp1->getOperand(2))
+ break;
+ return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
+ default:
+ break;
+ }
+
+ return nullptr;
+}
+
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
IntrinsicInst *II,
@@ -3663,7 +3766,7 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
(WidestTy->getScalarSizeInBits() - 1) +
(NarrowestTy->getScalarSizeInBits() - 1);
APInt MaximalRepresentableShiftAmount =
- APInt::getAllOnesValue(XShAmt->getType()->getScalarSizeInBits());
+ APInt::getAllOnes(XShAmt->getType()->getScalarSizeInBits());
if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
return nullptr;
@@ -3746,19 +3849,22 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
/// Fold
/// (-1 u/ x) u< y
-/// ((x * y) u/ x) != y
+/// ((x * y) ?/ x) != y
/// to
-/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit
+/// @llvm.?mul.with.overflow(x, y) plus extraction of overflow bit
/// Note that the comparison is commutative, while inverted (u>=, ==) predicate
/// will mean that we are looking for the opposite answer.
-Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
+Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) {
ICmpInst::Predicate Pred;
Value *X, *Y;
Instruction *Mul;
+ Instruction *Div;
bool NeedNegation;
// Look for: (-1 u/ x) u</u>= y
if (!I.isEquality() &&
- match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
+ match(&I, m_c_ICmp(Pred,
+ m_CombineAnd(m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
+ m_Instruction(Div)),
m_Value(Y)))) {
Mul = nullptr;
@@ -3773,13 +3879,16 @@ Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
default:
return nullptr; // Wrong predicate.
}
- } else // Look for: ((x * y) u/ x) !=/== y
+ } else // Look for: ((x * y) / x) !=/== y
if (I.isEquality() &&
- match(&I, m_c_ICmp(Pred, m_Value(Y),
- m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
+ match(&I,
+ m_c_ICmp(Pred, m_Value(Y),
+ m_CombineAnd(
+ m_OneUse(m_IDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
m_Value(X)),
m_Instruction(Mul)),
- m_Deferred(X)))))) {
+ m_Deferred(X))),
+ m_Instruction(Div))))) {
NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ;
} else
return nullptr;
@@ -3791,19 +3900,22 @@ Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
if (MulHadOtherUses)
Builder.SetInsertPoint(Mul);
- Function *F = Intrinsic::getDeclaration(
- I.getModule(), Intrinsic::umul_with_overflow, X->getType());
- CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul");
+ Function *F = Intrinsic::getDeclaration(I.getModule(),
+ Div->getOpcode() == Instruction::UDiv
+ ? Intrinsic::umul_with_overflow
+ : Intrinsic::smul_with_overflow,
+ X->getType());
+ CallInst *Call = Builder.CreateCall(F, {X, Y}, "mul");
// If the multiplication was used elsewhere, to ensure that we don't leave
// "duplicate" instructions, replace uses of that original multiplication
// with the multiplication result from the with.overflow intrinsic.
if (MulHadOtherUses)
- replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val"));
+ replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "mul.val"));
- Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov");
+ Value *Res = Builder.CreateExtractValue(Call, 1, "mul.ov");
if (NeedNegation) // This technically increases instruction count.
- Res = Builder.CreateNot(Res, "umul.not.ov");
+ Res = Builder.CreateNot(Res, "mul.not.ov");
// If we replaced the mul, erase it. Do this after all uses of Builder,
// as the mul is used as insertion point.
@@ -4079,8 +4191,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 &&
match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality())
if (!C->countTrailingZeros() ||
- (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
- (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
+ (BO0 && BO1 && BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
+ (BO0 && BO1 && BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
return new ICmpInst(Pred, X, Y);
}
@@ -4146,8 +4258,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
break;
const APInt *C;
- if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
- !C->isOneValue()) {
+ if (match(BO0->getOperand(1), m_APInt(C)) && !C->isZero() &&
+ !C->isOne()) {
// icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
// Mask = -1 >> count-trailing-zeros(C).
if (unsigned TZs = C->countTrailingZeros()) {
@@ -4200,7 +4312,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
}
- if (Value *V = foldUnsignedMultiplicationOverflowCheck(I))
+ if (Value *V = foldMultiplicationOverflowCheck(I))
return replaceInstUsesWith(I, V);
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
@@ -4373,6 +4485,19 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
+ {
+ // Similar to above, but specialized for constant because invert is needed:
+ // (X | C) == (Y | C) --> (X ^ Y) & ~C == 0
+ Value *X, *Y;
+ Constant *C;
+ if (match(Op0, m_OneUse(m_Or(m_Value(X), m_Constant(C)))) &&
+ match(Op1, m_OneUse(m_Or(m_Value(Y), m_Specific(C))))) {
+ Value *Xor = Builder.CreateXor(X, Y);
+ Value *And = Builder.CreateAnd(Xor, ConstantExpr::getNot(C));
+ return new ICmpInst(Pred, And, Constant::getNullValue(And->getType()));
+ }
+ }
+
// Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
// and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
ConstantInt *Cst1;
@@ -4441,14 +4566,8 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
- // If both operands are byte-swapped or bit-reversed, just compare the
- // original values.
- // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
- // and handle more intrinsics.
- if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
- (match(Op0, m_BitReverse(m_Value(A))) &&
- match(Op1, m_BitReverse(m_Value(B)))))
- return new ICmpInst(Pred, A, B);
+ if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I))
+ return ICmp;
// Canonicalize checking for a power-of-2-or-zero value:
// (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
@@ -4474,6 +4593,74 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
: new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
}
+ // Match icmp eq (trunc (lshr A, BW), (ashr (trunc A), BW-1)), which checks the
+ // top BW/2 + 1 bits are all the same. Create "A >=s INT_MIN && A <=s INT_MAX",
+ // which we generate as "icmp ult (add A, 2^(BW-1)), 2^BW" to skip a few steps
+ // of instcombine.
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ if (match(Op0, m_AShr(m_Trunc(m_Value(A)), m_SpecificInt(BitWidth - 1))) &&
+ match(Op1, m_Trunc(m_LShr(m_Specific(A), m_SpecificInt(BitWidth)))) &&
+ A->getType()->getScalarSizeInBits() == BitWidth * 2 &&
+ (I.getOperand(0)->hasOneUse() || I.getOperand(1)->hasOneUse())) {
+ APInt C = APInt::getOneBitSet(BitWidth * 2, BitWidth - 1);
+ Value *Add = Builder.CreateAdd(A, ConstantInt::get(A->getType(), C));
+ return new ICmpInst(Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT
+ : ICmpInst::ICMP_UGE,
+ Add, ConstantInt::get(A->getType(), C.shl(1)));
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
+ InstCombiner::BuilderTy &Builder) {
+ const ICmpInst::Predicate Pred = ICmp.getPredicate();
+ Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
+
+ // Try to canonicalize trunc + compare-to-constant into a mask + cmp.
+ // The trunc masks high bits while the compare may effectively mask low bits.
+ Value *X;
+ const APInt *C;
+ if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C)))
+ return nullptr;
+
+ unsigned SrcBits = X->getType()->getScalarSizeInBits();
+ if (Pred == ICmpInst::ICMP_ULT) {
+ if (C->isPowerOf2()) {
+ // If C is a power-of-2 (one set bit):
+ // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
+ Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ Constant *Zero = ConstantInt::getNullValue(X->getType());
+ return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero);
+ }
+ // If C is a negative power-of-2 (high-bit mask):
+ // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
+ if (C->isNegatedPowerOf2()) {
+ Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
+ }
+ }
+
+ if (Pred == ICmpInst::ICMP_UGT) {
+ // If C is a low-bit-mask (C+1 is a power-of-2):
+ // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
+ if (C->isMask()) {
+ Constant *MaskC = ConstantInt::get(X->getType(), (~*C).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ Constant *Zero = ConstantInt::getNullValue(X->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+ }
+ // If C is not-of-power-of-2 (one clear bit):
+ // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
+ if ((~*C).isPowerOf2()) {
+ Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
+ }
+ }
+
return nullptr;
}
@@ -4620,6 +4807,9 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
}
+ if (Instruction *R = foldICmpWithTrunc(ICmp, Builder))
+ return R;
+
return foldICmpWithZextOrSext(ICmp, Builder);
}
@@ -4943,7 +5133,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
const APInt *RHS;
if (!match(I.getOperand(1), m_APInt(RHS)))
- return APInt::getAllOnesValue(BitWidth);
+ return APInt::getAllOnes(BitWidth);
// If this is a normal comparison, it demands all bits. If it is a sign bit
// comparison, it only demands the sign bit.
@@ -4965,7 +5155,7 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
default:
- return APInt::getAllOnesValue(BitWidth);
+ return APInt::getAllOnes(BitWidth);
}
}
@@ -5129,8 +5319,7 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
Op0Known, 0))
return &I;
- if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
- Op1Known, 0))
+ if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -5158,6 +5347,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
if (!isa<Constant>(Op1) && Op1Min == Op1Max)
return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
+ // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a
+ // min/max canonical compare with some other compare. That could lead to
+ // conflict with select canonicalization and infinite looping.
+ // FIXME: This constraint may go away if min/max intrinsics are canonical.
+ auto isMinMaxCmp = [&](Instruction &Cmp) {
+ if (!Cmp.hasOneUse())
+ return false;
+ Value *A, *B;
+ SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor;
+ if (!SelectPatternResult::isMinOrMax(SPF))
+ return false;
+ return match(Op0, m_MaxOrMin(m_Value(), m_Value())) ||
+ match(Op1, m_MaxOrMin(m_Value(), m_Value()));
+ };
+ if (!isMinMaxCmp(I)) {
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_ULT: {
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ // A <u C -> A == C-1 if min(A)+1 == C
+ if (*CmpC == Op0Min + 1)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC - 1));
+ // X <u C --> X == 0, if the number of zero bits in the bottom of X
+ // exceeds the log2 of C.
+ if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Constant::getNullValue(Op1->getType()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ // A >u C -> A == C+1 if max(a)-1 == C
+ if (*CmpC == Op0Max - 1)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC + 1));
+ // X >u C --> X != 0, if the number of zero bits in the bottom of X
+ // exceeds the log2 of C.
+ if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0,
+ Constant::getNullValue(Op1->getType()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC - 1));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC + 1));
+ }
+ break;
+ }
+ }
+ }
+
// Based on the range information we know about the LHS, see if we can
// simplify this comparison. For example, (x&4) < 8 is always true.
switch (Pred) {
@@ -5203,7 +5469,7 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
// Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
const APInt *CI;
- if (Op0KnownZeroInverted.isOneValue() &&
+ if (Op0KnownZeroInverted.isOne() &&
match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
// ((8 >>u X) & 1) == 0 -> X != 3
// ((8 >>u X) & 1) != 0 -> X == 3
@@ -5219,21 +5485,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- // A <u C -> A == C-1 if min(A)+1 == C
- if (*CmpC == Op0Min + 1)
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC - 1));
- // X <u C --> X == 0, if the number of zero bits in the bottom of X
- // exceeds the log2 of C.
- if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- Constant::getNullValue(Op1->getType()));
- }
break;
}
case ICmpInst::ICMP_UGT: {
@@ -5241,21 +5492,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- // A >u C -> A == C+1 if max(a)-1 == C
- if (*CmpC == Op0Max - 1)
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC + 1));
- // X >u C --> X != 0, if the number of zero bits in the bottom of X
- // exceeds the log2 of C.
- if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
- return new ICmpInst(ICmpInst::ICMP_NE, Op0,
- Constant::getNullValue(Op1->getType()));
- }
break;
}
case ICmpInst::ICMP_SLT: {
@@ -5263,14 +5499,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC - 1));
- }
break;
}
case ICmpInst::ICMP_SGT: {
@@ -5278,14 +5506,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC + 1));
- }
break;
}
case ICmpInst::ICMP_SGE:
@@ -5587,7 +5807,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
if (match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(M))) &&
V1Ty == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse())) {
Value *NewCmp = Builder.CreateCmp(Pred, V1, V2);
- return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
+ return new ShuffleVectorInst(NewCmp, M);
}
// Try to canonicalize compare with splatted operand and splat constant.
@@ -5608,8 +5828,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
ScalarC);
SmallVector<int, 8> NewM(M.size(), MaskSplatIndex);
Value *NewCmp = Builder.CreateCmp(Pred, V1, C);
- return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()),
- NewM);
+ return new ShuffleVectorInst(NewCmp, NewM);
}
return nullptr;
@@ -5645,6 +5864,23 @@ static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
return ExtractValueInst::Create(UAddOv, 1);
}
+static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
+ if (!I.getOperand(0)->getType()->isPointerTy() ||
+ NullPointerIsDefined(
+ I.getParent()->getParent(),
+ I.getOperand(0)->getType()->getPointerAddressSpace())) {
+ return nullptr;
+ }
+ Instruction *Op;
+ if (match(I.getOperand(0), m_Instruction(Op)) &&
+ match(I.getOperand(1), m_Zero()) &&
+ Op->isLaunderOrStripInvariantGroup()) {
+ return ICmpInst::Create(Instruction::ICmp, I.getPredicate(),
+ Op->getOperand(0), I.getOperand(1));
+ }
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -5698,9 +5934,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithDominatingICmp(I))
return Res;
- if (Instruction *Res = foldICmpBinOp(I, Q))
- return Res;
-
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
@@ -5746,6 +5979,15 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
+ // The folds in here may rely on wrapping flags and special constants, so
+ // they can break up min/max idioms in some cases but not seemingly similar
+ // patterns.
+ // FIXME: It may be possible to enhance select folding to make this
+ // unnecessary. It may also be moot if we canonicalize to min/max
+ // intrinsics.
+ if (Instruction *Res = foldICmpBinOp(I, Q))
+ return Res;
+
if (Instruction *Res = foldICmpInstWithConstant(I))
return Res;
@@ -5757,13 +5999,12 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
return Res;
- // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+ // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
+ if (auto *GEP = dyn_cast<GEPOperator>(Op0))
if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
return NI;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
- if (Instruction *NI = foldGEPICmp(GEP, Op0,
- ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ if (auto *GEP = dyn_cast<GEPOperator>(Op1))
+ if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I))
return NI;
// Try to optimize equality comparisons against alloca-based pointers.
@@ -5777,7 +6018,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
return New;
}
- if (Instruction *Res = foldICmpBitCast(I, Builder))
+ if (Instruction *Res = foldICmpBitCast(I))
return Res;
// TODO: Hoist this above the min/max bailout.
@@ -5879,6 +6120,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldVectorCmp(I, Builder))
return Res;
+ if (Instruction *Res = foldICmpInvariantGroup(I))
+ return Res;
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index eaa53348028d..72e1b21e8d49 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -22,14 +22,15 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
using namespace llvm::PatternMatch;
@@ -61,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
: public InstCombiner,
public InstVisitor<InstCombinerImpl, Instruction *> {
public:
- InstCombinerImpl(InstCombineWorklist &Worklist, BuilderTy &Builder,
+ InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
@@ -190,6 +191,7 @@ public:
private:
void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
+ bool isDesirableIntType(unsigned BitWidth) const;
bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
bool shouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
@@ -240,15 +242,11 @@ private:
///
/// \param ICI The icmp of the (zext icmp) pair we are interested in.
/// \parem CI The zext of the (zext icmp) pair we are interested in.
- /// \param DoTransform Pass false to just test whether the given (zext icmp)
- /// would be transformed. Pass true to actually perform the transformation.
///
/// \return null if the transformation cannot be performed. If the
/// transformation can be performed the new instruction that replaces the
- /// (zext icmp) pair will be returned (if \p DoTransform is false the
- /// unmodified \p ICI will be returned in this case).
- Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
- bool DoTransform = true);
+ /// (zext icmp) pair will be returned.
+ Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
@@ -319,13 +317,15 @@ private:
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
+ Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
Instruction *narrowFunnelShift(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
- Instruction *matchSAddSubSat(SelectInst &MinMax1);
+ Instruction *matchSAddSubSat(Instruction &MinMax1);
+ Instruction *foldNot(BinaryOperator &I);
void freelyInvertAllUsersOf(Value *V);
@@ -347,6 +347,8 @@ private:
Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or);
Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor);
+ Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd);
+
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
/// NOTE: Unlike most of instcombine, this returns a Value which should
/// already be inserted into the function.
@@ -623,6 +625,7 @@ public:
Instruction *foldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *foldPHIArgLoadIntoPHI(PHINode &PN);
Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
/// If an integer typed PHI has only one use which is an IntToPtr operation,
/// replace the PHI with an existing pointer typed PHI if it exists. Otherwise
@@ -657,7 +660,7 @@ public:
Instruction *foldSignBitTest(ICmpInst &I);
Instruction *foldICmpWithZero(ICmpInst &Cmp);
- Value *foldUnsignedMultiplicationOverflowCheck(ICmpInst &Cmp);
+ Value *foldMultiplicationOverflowCheck(ICmpInst &Cmp);
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
ConstantInt *C);
@@ -701,6 +704,7 @@ public:
const APInt &C);
Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
const APInt &C);
+ Instruction *foldICmpBitCast(ICmpInst &Cmp);
// Helpers of visitSelectInst().
Instruction *foldSelectExtConst(SelectInst &Sel);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index a8474e27383d..79a8a065d02a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -261,8 +261,8 @@ private:
bool PointerReplacer::collectUsers(Instruction &I) {
for (auto U : I.users()) {
- Instruction *Inst = cast<Instruction>(&*U);
- if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+ auto *Inst = cast<Instruction>(&*U);
+ if (auto *Load = dyn_cast<LoadInst>(Inst)) {
if (Load->isVolatile())
return false;
Worklist.insert(Load);
@@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) {
Worklist.insert(Inst);
if (!collectUsers(*Inst))
return false;
- } else if (isa<MemTransferInst>(Inst)) {
+ } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
+ if (MI->isVolatile())
+ return false;
Worklist.insert(Inst);
} else if (Inst->isLifetimeStartOrEnd()) {
continue;
@@ -335,8 +337,7 @@ void PointerReplacer::replace(Instruction *I) {
MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
MemCpy->isVolatile());
- AAMDNodes AAMD;
- MemCpy->getAAMetadata(AAMD);
+ AAMDNodes AAMD = MemCpy->getAAMetadata();
if (AAMD)
NewI->setAAMetadata(AAMD);
@@ -647,9 +648,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
if (NumElements == 1) {
LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
".unpack");
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- NewLoad->setAAMetadata(AAMD);
+ NewLoad->setAAMetadata(LI.getAAMetadata());
return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -678,9 +677,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
ST->getElementType(i), Ptr,
commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
// Propagate AA metadata. It'll still be valid on the narrowed load.
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- L->setAAMetadata(AAMD);
+ L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
}
@@ -693,9 +690,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto NumElements = AT->getNumElements();
if (NumElements == 1) {
LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- NewLoad->setAAMetadata(AAMD);
+ NewLoad->setAAMetadata(LI.getAAMetadata());
return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -727,9 +722,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
commonAlignment(Align, Offset),
Name + ".unpack");
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- L->setAAMetadata(AAMD);
+ L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
Offset += EltSize;
}
@@ -1206,9 +1199,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
- AAMDNodes AAMD;
- SI.getAAMetadata(AAMD);
- NS->setAAMetadata(AAMD);
+ NS->setAAMetadata(SI.getAAMetadata());
}
return true;
@@ -1254,9 +1245,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = commonAlignment(Align, Offset);
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
- AAMDNodes AAMD;
- SI.getAAMetadata(AAMD);
- NS->setAAMetadata(AAMD);
+ NS->setAAMetadata(SI.getAAMetadata());
Offset += EltSize;
}
@@ -1498,8 +1487,8 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
StoreInst *OtherStore = nullptr;
if (OtherBr->isUnconditional()) {
--BBI;
- // Skip over debugging info.
- while (isa<DbgInfoIntrinsic>(BBI) ||
+ // Skip over debugging info and pseudo probes.
+ while (BBI->isDebugOrPseudoInst() ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
if (BBI==OtherBB->begin())
return false;
@@ -1567,12 +1556,9 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
NewSI->setDebugLoc(MergedLoc);
// If the two stores had AA tags, merge them.
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
- if (AATags) {
- OtherStore->getAAMetadata(AATags, /* Merge = */ true);
- NewSI->setAAMetadata(AATags);
- }
+ AAMDNodes AATags = SI.getAAMetadata();
+ if (AATags)
+ NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
// Nuke the old stores.
eraseInstFromFunction(SI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6f2a8ebf839a..779d298da7a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -31,7 +31,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <cassert>
@@ -39,11 +38,12 @@
#include <cstdint>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
/// The specific integer value is used in a context where it is known to be
/// non-zero. If this allows us to simplify the computation, do so and return
/// the new operand, otherwise return null.
@@ -107,14 +107,19 @@ static Value *foldMulSelectToNegate(BinaryOperator &I,
// mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp
// mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp
if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())),
- m_Value(OtherOp))))
- return Builder.CreateSelect(Cond, OtherOp, Builder.CreateNeg(OtherOp));
-
+ m_Value(OtherOp)))) {
+ bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
+ Value *Neg = Builder.CreateNeg(OtherOp, "", false, HasAnyNoWrap);
+ return Builder.CreateSelect(Cond, OtherOp, Neg);
+ }
// mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp
// mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp
if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())),
- m_Value(OtherOp))))
- return Builder.CreateSelect(Cond, Builder.CreateNeg(OtherOp), OtherOp);
+ m_Value(OtherOp)))) {
+ bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
+ Value *Neg = Builder.CreateNeg(OtherOp, "", false, HasAnyNoWrap);
+ return Builder.CreateSelect(Cond, Neg, OtherOp);
+ }
// fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp
// fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp
@@ -564,6 +569,16 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
return replaceInstUsesWith(I, NewPow);
}
+ // powi(x, y) * powi(x, z) -> powi(x, y + z)
+ if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&
+ Y->getType() == Z->getType()) {
+ auto *YZ = Builder.CreateAdd(Y, Z);
+ auto *NewPow = Builder.CreateIntrinsic(
+ Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
+ return replaceInstUsesWith(I, NewPow);
+ }
+
// exp(X) * exp(Y) -> exp(X + Y)
if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
@@ -706,11 +721,11 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
assert(C1.getBitWidth() == C2.getBitWidth() && "Constant widths not equal");
// Bail if we will divide by zero.
- if (C2.isNullValue())
+ if (C2.isZero())
return false;
// Bail if we would divide INT_MIN by -1.
- if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
+ if (IsSigned && C1.isMinSignedValue() && C2.isAllOnes())
return false;
APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
@@ -778,11 +793,12 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
}
if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
- *C1 != C1->getBitWidth() - 1) ||
- (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) {
+ C1->ult(C1->getBitWidth() - 1)) ||
+ (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))) &&
+ C1->ult(C1->getBitWidth()))) {
APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
APInt C1Shifted = APInt::getOneBitSet(
- C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
+ C1->getBitWidth(), static_cast<unsigned>(C1->getZExtValue()));
// (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of 1 << C1.
if (isMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
@@ -803,7 +819,7 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
}
}
- if (!C2->isNullValue()) // avoid X udiv 0
+ if (!C2->isZero()) // avoid X udiv 0
if (Instruction *FoldedDiv = foldBinOpIntoSelectOrPhi(I))
return FoldedDiv;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index 37c7e6135501..7dc516c6fdc3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -215,6 +215,20 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
: Builder.CreateSExt(I->getOperand(0), I->getType(),
I->getName() + ".neg");
break;
+ case Instruction::Select: {
+ // If both arms of the select are constants, we don't need to recurse.
+ // Therefore, this transform is not limited by uses.
+ auto *Sel = cast<SelectInst>(I);
+ Constant *TrueC, *FalseC;
+ if (match(Sel->getTrueValue(), m_ImmConstant(TrueC)) &&
+ match(Sel->getFalseValue(), m_ImmConstant(FalseC))) {
+ Constant *NegTrueC = ConstantExpr::getNeg(TrueC);
+ Constant *NegFalseC = ConstantExpr::getNeg(FalseC);
+ return Builder.CreateSelect(Sel->getCondition(), NegTrueC, NegFalseC,
+ I->getName() + ".neg", /*MDFrom=*/I);
+ }
+ break;
+ }
default:
break; // Other instructions require recursive reasoning.
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 6c6351c70e3a..35739c3b9a21 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -299,6 +299,29 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
IntToPtr->getOperand(0)->getType());
}
+// Remove RoundTrip IntToPtr/PtrToInt Cast on PHI-Operand and
+// fold Phi-operand to bitcast.
+Instruction *InstCombinerImpl::foldPHIArgIntToPtrToPHI(PHINode &PN) {
+ // convert ptr2int ( phi[ int2ptr(ptr2int(x))] ) --> ptr2int ( phi [ x ] )
+ // Make sure all uses of phi are ptr2int.
+ if (!all_of(PN.users(), [](User *U) { return isa<PtrToIntInst>(U); }))
+ return nullptr;
+
+ // Iterating over all operands to check presence of target pointers for
+ // optimization.
+ bool OperandWithRoundTripCast = false;
+ for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) {
+ if (auto *NewOp =
+ simplifyIntToPtrRoundTripCast(PN.getIncomingValue(OpNum))) {
+ PN.setIncomingValue(OpNum, NewOp);
+ OperandWithRoundTripCast = true;
+ }
+ }
+ if (!OperandWithRoundTripCast)
+ return nullptr;
+ return &PN;
+}
+
/// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)],
/// turn this into a phi[a,c] and phi[b,d] and a single insertvalue.
Instruction *
@@ -1306,6 +1329,9 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (Instruction *Result = foldPHIArgZextsIntoPHI(PN))
return Result;
+ if (Instruction *Result = foldPHIArgIntToPtrToPHI(PN))
+ return Result;
+
// If all PHI operands are the same operation, pull them through the PHI,
// reducing code size.
if (isa<Instruction>(PN.getIncomingValue(0)) &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ce2b913dba61..4a1e82ae9c1d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -38,15 +38,16 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
static Value *createMinMax(InstCombiner::BuilderTy &Builder,
SelectPatternFlavor SPF, Value *A, Value *B) {
@@ -165,7 +166,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
// simplify/reduce the instructions.
APInt TC = *SelTC;
APInt FC = *SelFC;
- if (!TC.isNullValue() && !FC.isNullValue()) {
+ if (!TC.isZero() && !FC.isZero()) {
// If the select constants differ by exactly one bit and that's the same
// bit that is masked and checked by the select condition, the select can
// be replaced by bitwise logic to set/clear one bit of the constant result.
@@ -202,7 +203,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
- const APInt &ValC = !TC.isNullValue() ? TC : FC;
+ const APInt &ValC = !TC.isZero() ? TC : FC;
unsigned ValZeros = ValC.logBase2();
unsigned AndZeros = AndMask.logBase2();
@@ -224,7 +225,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
// Okay, now we know that everything is set up, we just don't know whether we
// have a icmp_ne or icmp_eq and whether the true or false val is the zero.
- bool ShouldNotVal = !TC.isNullValue();
+ bool ShouldNotVal = !TC.isZero();
ShouldNotVal ^= Pred == ICmpInst::ICMP_NE;
if (ShouldNotVal)
V = Builder.CreateXor(V, ValC);
@@ -319,8 +320,16 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
Value *X, *Y;
if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) &&
(TI->hasOneUse() || FI->hasOneUse())) {
+ // Intersect FMF from the fneg instructions and union those with the select.
+ FastMathFlags FMF = TI->getFastMathFlags();
+ FMF &= FI->getFastMathFlags();
+ FMF |= SI.getFastMathFlags();
Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI);
- return UnaryOperator::CreateFNegFMF(NewSel, TI);
+ if (auto *NewSelI = dyn_cast<Instruction>(NewSel))
+ NewSelI->setFastMathFlags(FMF);
+ Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewSel);
+ NewFNeg->setFastMathFlags(FMF);
+ return NewFNeg;
}
// Min/max intrinsic with a common operand can have the common operand pulled
@@ -420,10 +429,9 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
}
static bool isSelect01(const APInt &C1I, const APInt &C2I) {
- if (!C1I.isNullValue() && !C2I.isNullValue()) // One side must be zero.
+ if (!C1I.isZero() && !C2I.isZero()) // One side must be zero.
return false;
- return C1I.isOneValue() || C1I.isAllOnesValue() ||
- C2I.isOneValue() || C2I.isAllOnesValue();
+ return C1I.isOne() || C1I.isAllOnes() || C2I.isOne() || C2I.isAllOnes();
}
/// Try to fold the select into one of the operands to allow further
@@ -715,6 +723,58 @@ static Instruction *foldSetClearBits(SelectInst &Sel,
return nullptr;
}
+// select (x == 0), 0, x * y --> freeze(y) * x
+// select (y == 0), 0, x * y --> freeze(x) * y
+// select (x == 0), undef, x * y --> freeze(y) * x
+// select (x == undef), 0, x * y --> freeze(y) * x
+// Usage of mul instead of 0 will make the result more poisonous,
+// so the operand that was not checked in the condition should be frozen.
+// The latter folding is applied only when a constant compared with x is
+// is a vector consisting of 0 and undefs. If a constant compared with x
+// is a scalar undefined value or undefined vector then an expression
+// should be already folded into a constant.
+static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
+ auto *CondVal = SI.getCondition();
+ auto *TrueVal = SI.getTrueValue();
+ auto *FalseVal = SI.getFalseValue();
+ Value *X, *Y;
+ ICmpInst::Predicate Predicate;
+
+ // Assuming that constant compared with zero is not undef (but it may be
+ // a vector with some undef elements). Otherwise (when a constant is undef)
+ // the select expression should be already simplified.
+ if (!match(CondVal, m_ICmp(Predicate, m_Value(X), m_Zero())) ||
+ !ICmpInst::isEquality(Predicate))
+ return nullptr;
+
+ if (Predicate == ICmpInst::ICMP_NE)
+ std::swap(TrueVal, FalseVal);
+
+ // Check that TrueVal is a constant instead of matching it with m_Zero()
+ // to handle the case when it is a scalar undef value or a vector containing
+ // non-zero elements that are masked by undef elements in the compare
+ // constant.
+ auto *TrueValC = dyn_cast<Constant>(TrueVal);
+ if (TrueValC == nullptr ||
+ !match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) ||
+ !isa<Instruction>(FalseVal))
+ return nullptr;
+
+ auto *ZeroC = cast<Constant>(cast<Instruction>(CondVal)->getOperand(1));
+ auto *MergedC = Constant::mergeUndefsWith(TrueValC, ZeroC);
+ // If X is compared with 0 then TrueVal could be either zero or undef.
+ // m_Zero match vectors containing some undef elements, but for scalars
+ // m_Undef should be used explicitly.
+ if (!match(MergedC, m_Zero()) && !match(MergedC, m_Undef()))
+ return nullptr;
+
+ auto *FalseValI = cast<Instruction>(FalseVal);
+ auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),
+ *FalseValI);
+ IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);
+ return IC.replaceInstUsesWith(SI, FalseValI);
+}
+
/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
/// There are 8 commuted/swapped variants of this pattern.
/// TODO: Also support a - UMIN(a,b) patterns.
@@ -1229,8 +1289,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// Iff -C1 s<= C2 s<= C0-C1
// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
-static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
- InstCombiner::BuilderTy &Builder) {
+static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
+ InstCombiner::BuilderTy &Builder) {
Value *X = Sel0.getTrueValue();
Value *Sel1 = Sel0.getFalseValue();
@@ -1238,36 +1298,42 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// Said condition must be one-use.
if (!Cmp0.hasOneUse())
return nullptr;
+ ICmpInst::Predicate Pred0 = Cmp0.getPredicate();
Value *Cmp00 = Cmp0.getOperand(0);
Constant *C0;
if (!match(Cmp0.getOperand(1),
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
return nullptr;
- // Canonicalize Cmp0 into the form we expect.
+
+ if (!isa<SelectInst>(Sel1)) {
+ Pred0 = ICmpInst::getInversePredicate(Pred0);
+ std::swap(X, Sel1);
+ }
+
+ // Canonicalize Cmp0 into ult or uge.
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
- switch (Cmp0.getPredicate()) {
+ switch (Pred0) {
case ICmpInst::Predicate::ICMP_ULT:
+ case ICmpInst::Predicate::ICMP_UGE:
+ // Although icmp ult %x, 0 is an unusual thing to try and should generally
+ // have been simplified, it does not verify with undef inputs so ensure we
+ // are not in a strange state.
+ if (!match(C0, m_SpecificInt_ICMP(
+ ICmpInst::Predicate::ICMP_NE,
+ APInt::getZero(C0->getType()->getScalarSizeInBits()))))
+ return nullptr;
break; // Great!
case ICmpInst::Predicate::ICMP_ULE:
- // We'd have to increment C0 by one, and for that it must not have all-ones
- // element, but then it would have been canonicalized to 'ult' before
- // we get here. So we can't do anything useful with 'ule'.
- return nullptr;
case ICmpInst::Predicate::ICMP_UGT:
- // We want to canonicalize it to 'ult', so we'll need to increment C0,
- // which again means it must not have any all-ones elements.
+ // We want to canonicalize it to 'ult' or 'uge', so we'll need to increment
+ // C0, which again means it must not have any all-ones elements.
if (!match(C0,
- m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
- APInt::getAllOnesValue(
- C0->getType()->getScalarSizeInBits()))))
+ m_SpecificInt_ICMP(
+ ICmpInst::Predicate::ICMP_NE,
+ APInt::getAllOnes(C0->getType()->getScalarSizeInBits()))))
return nullptr; // Can't do, have all-ones element[s].
C0 = InstCombiner::AddOne(C0);
- std::swap(X, Sel1);
break;
- case ICmpInst::Predicate::ICMP_UGE:
- // The only way we'd get this predicate if this `icmp` has extra uses,
- // but then we won't be able to do this fold.
- return nullptr;
default:
return nullptr; // Unknown predicate.
}
@@ -1277,11 +1343,16 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!Sel1->hasOneUse())
return nullptr;
+ // If the types do not match, look through any truncs to the underlying
+ // instruction.
+ if (Cmp00->getType() != X->getType() && X->hasOneUse())
+ match(X, m_TruncOrSelf(m_Value(X)));
+
// We now can finish matching the condition of the outermost select:
// it should either be the X itself, or an addition of some constant to X.
Constant *C1;
if (Cmp00 == X)
- C1 = ConstantInt::getNullValue(Sel0.getType());
+ C1 = ConstantInt::getNullValue(X->getType());
else if (!match(Cmp00,
m_Add(m_Specific(X),
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
@@ -1335,6 +1406,8 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// The thresholds of this clamp-like pattern.
auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);
auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);
+ if (Pred0 == ICmpInst::Predicate::ICMP_UGE)
+ std::swap(ThresholdLowIncl, ThresholdHighExcl);
// The fold has a precondition 1: C2 s>= ThresholdLow
auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2,
@@ -1347,15 +1420,29 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!match(Precond2, m_One()))
return nullptr;
+ // If we are matching from a truncated input, we need to sext the
+ // ReplacementLow and ReplacementHigh values. Only do the transform if they
+ // are free to extend due to being constants.
+ if (X->getType() != Sel0.getType()) {
+ Constant *LowC, *HighC;
+ if (!match(ReplacementLow, m_ImmConstant(LowC)) ||
+ !match(ReplacementHigh, m_ImmConstant(HighC)))
+ return nullptr;
+ ReplacementLow = ConstantExpr::getSExt(LowC, X->getType());
+ ReplacementHigh = ConstantExpr::getSExt(HighC, X->getType());
+ }
+
// All good, finally emit the new pattern.
Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);
Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);
Value *MaybeReplacedLow =
Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);
- Instruction *MaybeReplacedHigh =
- SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
- return MaybeReplacedHigh;
+ // Create the final select. If we looked through a truncate above, we will
+ // need to retruncate the result.
+ Value *MaybeReplacedHigh = Builder.CreateSelect(
+ ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
+ return Builder.CreateTrunc(MaybeReplacedHigh, Sel0.getType());
}
// If we have
@@ -1446,8 +1533,8 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this))
return NewAbs;
- if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder))
- return NewAbs;
+ if (Value *V = canonicalizeClampLike(SI, *ICI, Builder))
+ return replaceInstUsesWith(SI, V);
if (Instruction *NewSel =
tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
@@ -1816,9 +1903,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
m_Value(TrueVal), m_Value(FalseVal))))
return false;
- auto IsZeroOrOne = [](const APInt &C) {
- return C.isNullValue() || C.isOneValue();
- };
+ auto IsZeroOrOne = [](const APInt &C) { return C.isZero() || C.isOne(); };
auto IsMinMax = [&](Value *Min, Value *Max) {
APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
@@ -2182,7 +2267,7 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
}
/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
-Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
+Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) {
Type *Ty = MinMax1.getType();
// We are looking for a tree of:
@@ -2212,23 +2297,14 @@ Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
return nullptr;
- // Also make sure that the number of uses is as expected. The "3"s are for the
- // the two items of min/max (the compare and the select).
- if (MinMax2->hasNUsesOrMore(3) || AddSub->hasNUsesOrMore(3))
+ // Also make sure that the number of uses is as expected. The 3 is for the
+ // the two items of the compare and the select, or 2 from a min/max.
+ unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3;
+ if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses))
return nullptr;
// Create the new type (which can be a vector type)
Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
- // Match the two extends from the add/sub
- Value *A, *B;
- if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B)))))
- return nullptr;
- // And check the incoming values are of a type smaller than or equal to the
- // size of the saturation. Otherwise the higher bits can cause different
- // results.
- if (A->getType()->getScalarSizeInBits() > NewBitWidth ||
- B->getType()->getScalarSizeInBits() > NewBitWidth)
- return nullptr;
Intrinsic::ID IntrinsicID;
if (AddSub->getOpcode() == Instruction::Add)
@@ -2238,10 +2314,16 @@ Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
else
return nullptr;
+ // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
+ // is usually achieved via a sext from a smaller type.
+ if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth ||
+ ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
+ return nullptr;
+
// Finally create and return the sat intrinsic, truncated to the new type
Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
- Value *AT = Builder.CreateSExt(A, NewTy);
- Value *BT = Builder.CreateSExt(B, NewTy);
+ Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
+ Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
Value *Sat = Builder.CreateCall(F, {AT, BT});
return CastInst::Create(Instruction::SExt, Sat, Ty);
}
@@ -2432,7 +2514,7 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
unsigned NumElts = VecTy->getNumElements();
APInt UndefElts(NumElts, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(NumElts));
+ APInt AllOnesEltMask(APInt::getAllOnes(NumElts));
if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) {
if (V != &Sel)
return replaceInstUsesWith(Sel, V);
@@ -2754,11 +2836,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
/* IsAnd */ IsAnd))
return I;
- if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))
- if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))
+ if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) {
+ if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) {
if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd,
/* IsLogical */ true))
return replaceInstUsesWith(SI, V);
+
+ if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd))
+ return replaceInstUsesWith(SI, V);
+ }
+ }
}
// select (select a, true, b), c, false -> select a, c, false
@@ -2863,14 +2950,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
}
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
- // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
- // also require nnan because we do not want to unintentionally change the
- // sign of a NaN value.
+ // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
// (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
- Instruction *FSub;
if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) &&
- match(TrueVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
(Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI);
return replaceInstUsesWith(SI, Fabs);
@@ -2878,7 +2961,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) &&
- match(FalseVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
(Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI);
return replaceInstUsesWith(SI, Fabs);
@@ -2886,11 +2968,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// With nnan and nsz:
// (X < +/-0.0) ? -X : X --> fabs(X)
// (X <= +/-0.0) ? -X : X --> fabs(X)
- Instruction *FNeg;
if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
- match(TrueVal, m_FNeg(m_Specific(FalseVal))) &&
- match(TrueVal, m_Instruction(FNeg)) && FNeg->hasNoNaNs() &&
- FNeg->hasNoSignedZeros() && SI.hasNoSignedZeros() &&
+ match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() &&
(Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI);
@@ -2900,9 +2979,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// (X > +/-0.0) ? X : -X --> fabs(X)
// (X >= +/-0.0) ? X : -X --> fabs(X)
if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
- match(FalseVal, m_FNeg(m_Specific(TrueVal))) &&
- match(FalseVal, m_Instruction(FNeg)) && FNeg->hasNoNaNs() &&
- FNeg->hasNoSignedZeros() && SI.hasNoSignedZeros() &&
+ match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() &&
(Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI);
@@ -2920,6 +2997,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
return Add;
if (Instruction *Or = foldSetClearBits(SI, Builder))
return Or;
+ if (Instruction *Mul = foldSelectZeroOrMul(SI, *this))
+ return Mul;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
auto *TI = dyn_cast<Instruction>(TrueVal);
@@ -2939,8 +3018,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Gep->getNumOperands() != 2 || Gep->getPointerOperand() != Base ||
!Gep->hasOneUse())
return nullptr;
- Type *ElementType = Gep->getResultElementType();
Value *Idx = Gep->getOperand(1);
+ if (isa<VectorType>(CondVal->getType()) && !isa<VectorType>(Idx->getType()))
+ return nullptr;
+ Type *ElementType = Gep->getResultElementType();
Value *NewT = Idx;
Value *NewF = Constant::getNullValue(Idx->getType());
if (Swap)
@@ -3188,9 +3269,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
KnownBits Known(1);
computeKnownBits(CondVal, Known, 0, &SI);
- if (Known.One.isOneValue())
+ if (Known.One.isOne())
return replaceInstUsesWith(SI, TrueVal);
- if (Known.Zero.isOneValue())
+ if (Known.Zero.isOne())
return replaceInstUsesWith(SI, FalseVal);
}
@@ -3230,7 +3311,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *Mask;
if (match(TrueVal, m_Zero()) &&
match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
- m_CombineOr(m_Undef(), m_Zero())))) {
+ m_CombineOr(m_Undef(), m_Zero()))) &&
+ (CondVal->getType() == Mask->getType())) {
// We can remove the select by ensuring the load zeros all lanes the
// select would have. We determine this by proving there is no overlap
// between the load and select masks.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index ca5e473fdecb..06421d553915 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -41,7 +41,7 @@ bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1,
(Sh0->getType()->getScalarSizeInBits() - 1) +
(Sh1->getType()->getScalarSizeInBits() - 1);
APInt MaximalRepresentableShiftAmount =
- APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
+ APInt::getAllOnes(ShAmt0->getType()->getScalarSizeInBits());
return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount);
}
@@ -172,8 +172,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
// There are many variants to this pattern:
// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
-// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt
-// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt
+// c) (x & (-1 l>> MaskShAmt)) << ShiftShAmt
+// d) (x & ((-1 << MaskShAmt) l>> MaskShAmt)) << ShiftShAmt
// e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt
// f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt
// All these patterns can be simplified to just:
@@ -213,11 +213,11 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes());
// (~(-1 << maskNbits))
auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
- // (-1 >> MaskShAmt)
- auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt));
- // ((-1 << MaskShAmt) >> MaskShAmt)
+ // (-1 l>> MaskShAmt)
+ auto MaskC = m_LShr(m_AllOnes(), m_Value(MaskShAmt));
+ // ((-1 << MaskShAmt) l>> MaskShAmt)
auto MaskD =
- m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
+ m_LShr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
Value *X;
Constant *NewMask;
@@ -240,7 +240,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// that shall remain in the root value (OuterShift).
// An extend of an undef value becomes zero because the high bits are never
- // completely unknown. Replace the the `undef` shift amounts with final
+ // completely unknown. Replace the `undef` shift amounts with final
// shift bitwidth to ensure that the value remains undef when creating the
// subsequent shift op.
SumOfShAmts = Constant::replaceUndefsWith(
@@ -272,7 +272,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// shall be unset in the root value (OuterShift).
// An extend of an undef value becomes zero because the high bits are never
- // completely unknown. Replace the the `undef` shift amounts with negated
+ // completely unknown. Replace the `undef` shift amounts with negated
// bitwidth of innermost shift to ensure that the value remains undef when
// creating the subsequent shift op.
unsigned WidestTyBitWidth = WidestTy->getScalarSizeInBits();
@@ -346,9 +346,8 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
// TODO: Remove the one-use check if the other logic operand (Y) is constant.
Value *X, *Y;
auto matchFirstShift = [&](Value *V) {
- BinaryOperator *BO;
APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits());
- return match(V, m_BinOp(BO)) && BO->getOpcode() == ShiftOpcode &&
+ return match(V, m_BinOp(ShiftOpcode, m_Value(), m_Value())) &&
match(V, m_OneUse(m_Shift(m_Value(X), m_Constant(C0)))) &&
match(ConstantExpr::getAdd(C0, C1),
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
@@ -661,23 +660,22 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I) {
- bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
const APInt *Op1C;
if (!match(Op1, m_APInt(Op1C)))
return nullptr;
// See if we can propagate this shift into the input, this covers the trivial
// cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ bool IsLeftShift = I.getOpcode() == Instruction::Shl;
if (I.getOpcode() != Instruction::AShr &&
- canEvaluateShifted(Op0, Op1C->getZExtValue(), isLeftShift, *this, &I)) {
+ canEvaluateShifted(Op0, Op1C->getZExtValue(), IsLeftShift, *this, &I)) {
LLVM_DEBUG(
dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: "
<< *Op0 << "\n SH: " << I << "\n");
return replaceInstUsesWith(
- I, getShiftedValue(Op0, Op1C->getZExtValue(), isLeftShift, *this, DL));
+ I, getShiftedValue(Op0, Op1C->getZExtValue(), IsLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
@@ -686,202 +684,72 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
unsigned TypeBits = Ty->getScalarSizeInBits();
assert(!Op1C->uge(TypeBits) &&
"Shift over the type width should have been removed already");
+ (void)TypeBits;
if (Instruction *FoldedShift = foldBinOpIntoSelectOrPhi(I))
return FoldedShift;
- // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
- if (auto *TI = dyn_cast<TruncInst>(Op0)) {
- // If 'shift2' is an ashr, we would have to get the sign bit into a funny
- // place. Don't try to do this transformation in this case. Also, we
- // require that the input operand is a shift-by-constant so that we have
- // confidence that the shifts will get folded together. We could do this
- // xform in more cases, but it is unlikely to be profitable.
- const APInt *TrShiftAmt;
- if (I.isLogicalShift() &&
- match(TI->getOperand(0), m_Shift(m_Value(), m_APInt(TrShiftAmt)))) {
- auto *TrOp = cast<Instruction>(TI->getOperand(0));
- Type *SrcTy = TrOp->getType();
-
- // Okay, we'll do this xform. Make the shift of shift.
- Constant *ShAmt = ConstantExpr::getZExt(Op1, SrcTy);
- // (shift2 (shift1 & 0x00FF), c2)
- Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
-
- // For logical shifts, the truncation has the effect of making the high
- // part of the register be zeros. Emulate this by inserting an AND to
- // clear the top bits as needed. This 'and' will usually be zapped by
- // other xforms later if dead.
- unsigned SrcSize = SrcTy->getScalarSizeInBits();
- Constant *MaskV =
- ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, TypeBits));
-
- // The mask we constructed says what the trunc would do if occurring
- // between the shifts. We want to know the effect *after* the second
- // shift. We know that it is a logical shift by a constant, so adjust the
- // mask as appropriate.
- MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt);
- // shift1 & 0x00FF
- Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName());
- // Return the value truncated to the interesting size.
- return new TruncInst(And, Ty);
- }
- }
-
- if (Op0->hasOneUse()) {
- if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
- // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
- Value *V1;
- const APInt *CC;
- switch (Op0BO->getOpcode()) {
- default: break;
- case Instruction::Add:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // These operators commute.
- // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
- if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
- match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
- m_Specific(Op1)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
- // (X + (Y << C))
- Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1,
- Op0BO->getOperand(1)->getName());
- unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
- APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
- Constant *Mask = ConstantInt::get(Ty, Bits);
- return BinaryOperator::CreateAnd(X, Mask);
- }
-
- // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
- Value *Op0BOOp1 = Op0BO->getOperand(1);
- if (isLeftShift && Op0BOOp1->hasOneUse() &&
- match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
- m_APInt(CC)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
- // X & (CC << C)
- Value *XM = Builder.CreateAnd(
- V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
- V1->getName() + ".mask");
- return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
- }
- LLVM_FALLTHROUGH;
- }
-
- case Instruction::Sub: {
- // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
- if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
- match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
- m_Specific(Op1)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
- // (X + (Y << C))
- Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS,
- Op0BO->getOperand(0)->getName());
- unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
- APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
- Constant *Mask = ConstantInt::get(Ty, Bits);
- return BinaryOperator::CreateAnd(X, Mask);
- }
-
- // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
- if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
- match(Op0BO->getOperand(0),
- m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
- m_APInt(CC)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
- // X & (CC << C)
- Value *XM = Builder.CreateAnd(
- V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
- V1->getName() + ".mask");
- return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
- }
-
- break;
- }
- }
+ if (!Op0->hasOneUse())
+ return nullptr;
- // If the operand is a bitwise operator with a constant RHS, and the
- // shift is the only use, we can pull it out of the shift.
- const APInt *Op0C;
- if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
- if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(Op0BO->getOperand(1)), Op1);
+ if (auto *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // If the operand is a bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ const APInt *Op0C;
+ if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
+ if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
+ Constant *NewRHS = ConstantExpr::get(
+ I.getOpcode(), cast<Constant>(Op0BO->getOperand(1)), Op1);
- Value *NewShift =
+ Value *NewShift =
Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
- NewShift->takeName(Op0BO);
-
- return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
- NewRHS);
- }
- }
-
- // If the operand is a subtract with a constant LHS, and the shift
- // is the only use, we can pull it out of the shift.
- // This folds (shl (sub C1, X), C2) -> (sub (C1 << C2), (shl X, C2))
- if (isLeftShift && Op0BO->getOpcode() == Instruction::Sub &&
- match(Op0BO->getOperand(0), m_APInt(Op0C))) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(Op0BO->getOperand(0)), Op1);
-
- Value *NewShift = Builder.CreateShl(Op0BO->getOperand(1), Op1);
NewShift->takeName(Op0BO);
- return BinaryOperator::CreateSub(NewRHS, NewShift);
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS);
}
}
+ }
- // If we have a select that conditionally executes some binary operator,
- // see if we can pull it the select and operator through the shift.
- //
- // For example, turning:
- // shl (select C, (add X, C1), X), C2
- // Into:
- // Y = shl X, C2
- // select C, (add Y, C1 << C2), Y
- Value *Cond;
- BinaryOperator *TBO;
- Value *FalseVal;
- if (match(Op0, m_Select(m_Value(Cond), m_OneUse(m_BinOp(TBO)),
- m_Value(FalseVal)))) {
- const APInt *C;
- if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
- match(TBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, TBO)) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(TBO->getOperand(1)), Op1);
-
- Value *NewShift =
- Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1);
- Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift,
- NewRHS);
- return SelectInst::Create(Cond, NewOp, NewShift);
- }
+ // If we have a select that conditionally executes some binary operator,
+ // see if we can pull it the select and operator through the shift.
+ //
+ // For example, turning:
+ // shl (select C, (add X, C1), X), C2
+ // Into:
+ // Y = shl X, C2
+ // select C, (add Y, C1 << C2), Y
+ Value *Cond;
+ BinaryOperator *TBO;
+ Value *FalseVal;
+ if (match(Op0, m_Select(m_Value(Cond), m_OneUse(m_BinOp(TBO)),
+ m_Value(FalseVal)))) {
+ const APInt *C;
+ if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
+ match(TBO->getOperand(1), m_APInt(C)) &&
+ canShiftBinOpWithConstantRHS(I, TBO)) {
+ Constant *NewRHS = ConstantExpr::get(
+ I.getOpcode(), cast<Constant>(TBO->getOperand(1)), Op1);
+
+ Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1);
+ Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, NewRHS);
+ return SelectInst::Create(Cond, NewOp, NewShift);
}
+ }
- BinaryOperator *FBO;
- Value *TrueVal;
- if (match(Op0, m_Select(m_Value(Cond), m_Value(TrueVal),
- m_OneUse(m_BinOp(FBO))))) {
- const APInt *C;
- if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
- match(FBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, FBO)) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(FBO->getOperand(1)), Op1);
-
- Value *NewShift =
- Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1);
- Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift,
- NewRHS);
- return SelectInst::Create(Cond, NewShift, NewOp);
- }
+ BinaryOperator *FBO;
+ Value *TrueVal;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(TrueVal),
+ m_OneUse(m_BinOp(FBO))))) {
+ const APInt *C;
+ if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
+ match(FBO->getOperand(1), m_APInt(C)) &&
+ canShiftBinOpWithConstantRHS(I, FBO)) {
+ Constant *NewRHS = ConstantExpr::get(
+ I.getOpcode(), cast<Constant>(FBO->getOperand(1)), Op1);
+
+ Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1);
+ Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS);
+ return SelectInst::Create(Cond, NewShift, NewOp);
}
}
@@ -908,41 +776,41 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
Type *Ty = I.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
- const APInt *ShAmtAPInt;
- if (match(Op1, m_APInt(ShAmtAPInt))) {
- unsigned ShAmt = ShAmtAPInt->getZExtValue();
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ unsigned ShAmtC = C->getZExtValue();
- // shl (zext X), ShAmt --> zext (shl X, ShAmt)
+ // shl (zext X), C --> zext (shl X, C)
// This is only valid if X would have zeros shifted out.
Value *X;
if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
- if (ShAmt < SrcWidth &&
- MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I))
- return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty);
+ if (ShAmtC < SrcWidth &&
+ MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmtC), 0, &I))
+ return new ZExtInst(Builder.CreateShl(X, ShAmtC), Ty);
}
// (X >> C) << C --> X & (-1 << C)
if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1)))) {
- APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
}
- const APInt *ShOp1;
- if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1)))) &&
- ShOp1->ult(BitWidth)) {
- unsigned ShrAmt = ShOp1->getZExtValue();
- if (ShrAmt < ShAmt) {
- // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt);
+ const APInt *C1;
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(C1)))) &&
+ C1->ult(BitWidth)) {
+ unsigned ShrAmt = C1->getZExtValue();
+ if (ShrAmt < ShAmtC) {
+ // If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
return NewShl;
}
- if (ShrAmt > ShAmt) {
- // If C1 > C2: (X >>?exact C1) << C2 --> X >>?exact (C1 - C2)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt);
+ if (ShrAmt > ShAmtC) {
+ // If C1 > C: (X >>?exact C1) << C --> X >>?exact (C1 - C)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmtC);
auto *NewShr = BinaryOperator::Create(
cast<BinaryOperator>(Op0)->getOpcode(), X, ShiftDiff);
NewShr->setIsExact(true);
@@ -950,49 +818,135 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
}
}
- if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(ShOp1)))) &&
- ShOp1->ult(BitWidth)) {
- unsigned ShrAmt = ShOp1->getZExtValue();
- if (ShrAmt < ShAmt) {
- // If C1 < C2: (X >>? C1) << C2 --> X << (C2 - C1) & (-1 << C2)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt);
+ if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(C1)))) &&
+ C1->ult(BitWidth)) {
+ unsigned ShrAmt = C1->getZExtValue();
+ if (ShrAmt < ShAmtC) {
+ // If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
Builder.Insert(NewShl);
- APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
}
- if (ShrAmt > ShAmt) {
- // If C1 > C2: (X >>? C1) << C2 --> X >>? (C1 - C2) & (-1 << C2)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt);
+ if (ShrAmt > ShAmtC) {
+ // If C1 > C: (X >>? C1) << C --> (X >>? (C1 - C)) & (-1 << C)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmtC);
auto *OldShr = cast<BinaryOperator>(Op0);
auto *NewShr =
BinaryOperator::Create(OldShr->getOpcode(), X, ShiftDiff);
NewShr->setIsExact(OldShr->isExact());
Builder.Insert(NewShr);
- APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewShr, ConstantInt::get(Ty, Mask));
}
}
- if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
- unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // Similar to above, but look through an intermediate trunc instruction.
+ BinaryOperator *Shr;
+ if (match(Op0, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr))))) &&
+ match(Shr, m_Shr(m_Value(X), m_APInt(C1)))) {
+ // The larger shift direction survives through the transform.
+ unsigned ShrAmtC = C1->getZExtValue();
+ unsigned ShDiff = ShrAmtC > ShAmtC ? ShrAmtC - ShAmtC : ShAmtC - ShrAmtC;
+ Constant *ShiftDiffC = ConstantInt::get(X->getType(), ShDiff);
+ auto ShiftOpc = ShrAmtC > ShAmtC ? Shr->getOpcode() : Instruction::Shl;
+
+ // If C1 > C:
+ // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
+ // If C > C1:
+ // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
+ Value *NewShift = Builder.CreateBinOp(ShiftOpc, X, ShiftDiffC, "sh.diff");
+ Value *Trunc = Builder.CreateTrunc(NewShift, Ty, "tr.sh.diff");
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, Mask));
+ }
+
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
if (AmtSum < BitWidth)
// (X << C1) << C2 --> X << (C1 + C2)
return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum));
}
+ // If we have an opposite shift by the same amount, we may be able to
+ // reorder binops and shifts to eliminate math/logic.
+ auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) {
+ switch (BinOpcode) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Sub:
+ // NOTE: Sub is not commutable and the tranforms below may not be valid
+ // when the shift-right is operand 1 (RHS) of the sub.
+ return true;
+ }
+ };
+ BinaryOperator *Op0BO;
+ if (match(Op0, m_OneUse(m_BinOp(Op0BO))) &&
+ isSuitableBinOpcode(Op0BO->getOpcode())) {
+ // Commute so shift-right is on LHS of the binop.
+ // (Y bop (X >> C)) << C -> ((X >> C) bop Y) << C
+ // (Y bop ((X >> C) & CC)) << C -> (((X >> C) & CC) bop Y) << C
+ Value *Shr = Op0BO->getOperand(0);
+ Value *Y = Op0BO->getOperand(1);
+ Value *X;
+ const APInt *CC;
+ if (Op0BO->isCommutative() && Y->hasOneUse() &&
+ (match(Y, m_Shr(m_Value(), m_Specific(Op1))) ||
+ match(Y, m_And(m_OneUse(m_Shr(m_Value(), m_Specific(Op1))),
+ m_APInt(CC)))))
+ std::swap(Shr, Y);
+
+ // ((X >> C) bop Y) << C -> (X bop (Y << C)) & (~0 << C)
+ if (match(Shr, m_OneUse(m_Shr(m_Value(X), m_Specific(Op1))))) {
+ // Y << C
+ Value *YS = Builder.CreateShl(Y, Op1, Op0BO->getName());
+ // (X bop (Y << C))
+ Value *B =
+ Builder.CreateBinOp(Op0BO->getOpcode(), X, YS, Shr->getName());
+ unsigned Op1Val = C->getLimitedValue(BitWidth);
+ APInt Bits = APInt::getHighBitsSet(BitWidth, BitWidth - Op1Val);
+ Constant *Mask = ConstantInt::get(Ty, Bits);
+ return BinaryOperator::CreateAnd(B, Mask);
+ }
+
+ // (((X >> C) & CC) bop Y) << C -> (X & (CC << C)) bop (Y << C)
+ if (match(Shr,
+ m_OneUse(m_And(m_OneUse(m_Shr(m_Value(X), m_Specific(Op1))),
+ m_APInt(CC))))) {
+ // Y << C
+ Value *YS = Builder.CreateShl(Y, Op1, Op0BO->getName());
+ // X & (CC << C)
+ Value *M = Builder.CreateAnd(X, ConstantInt::get(Ty, CC->shl(*C)),
+ X->getName() + ".mask");
+ return BinaryOperator::Create(Op0BO->getOpcode(), M, YS);
+ }
+ }
+
+ // (C1 - X) << C --> (C1 << C) - (X << C)
+ if (match(Op0, m_OneUse(m_Sub(m_APInt(C1), m_Value(X))))) {
+ Constant *NewLHS = ConstantInt::get(Ty, C1->shl(*C));
+ Value *NewShift = Builder.CreateShl(X, Op1);
+ return BinaryOperator::CreateSub(NewLHS, NewShift);
+ }
+
// If the shifted-out value is known-zero, then this is a NUW shift.
if (!I.hasNoUnsignedWrap() &&
- MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmt), 0, &I)) {
+ MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmtC), 0,
+ &I)) {
I.setHasNoUnsignedWrap();
return &I;
}
// If the shifted-out value is all signbits, then this is a NSW shift.
- if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmt) {
+ if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmtC) {
I.setHasNoSignedWrap();
return &I;
}
@@ -1048,12 +1002,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
- const APInt *ShAmtAPInt;
- if (match(Op1, m_APInt(ShAmtAPInt))) {
- unsigned ShAmt = ShAmtAPInt->getZExtValue();
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ unsigned ShAmtC = C->getZExtValue();
unsigned BitWidth = Ty->getScalarSizeInBits();
auto *II = dyn_cast<IntrinsicInst>(Op0);
- if (II && isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt &&
+ if (II && isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmtC &&
(II->getIntrinsicID() == Intrinsic::ctlz ||
II->getIntrinsicID() == Intrinsic::cttz ||
II->getIntrinsicID() == Intrinsic::ctpop)) {
@@ -1067,78 +1021,81 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
}
Value *X;
- const APInt *ShOp1;
- if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
- if (ShOp1->ult(ShAmt)) {
- unsigned ShlAmt = ShOp1->getZExtValue();
- Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt);
+ const APInt *C1;
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
+ if (C1->ult(ShAmtC)) {
+ unsigned ShlAmtC = C1->getZExtValue();
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShlAmtC);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
- // (X <<nuw C1) >>u C2 --> X >>u (C2 - C1)
+ // (X <<nuw C1) >>u C --> X >>u (C - C1)
auto *NewLShr = BinaryOperator::CreateLShr(X, ShiftDiff);
NewLShr->setIsExact(I.isExact());
return NewLShr;
}
- // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2)
+ // (X << C1) >>u C --> (X >>u (C - C1)) & (-1 >> C)
Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
- APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
}
- if (ShOp1->ugt(ShAmt)) {
- unsigned ShlAmt = ShOp1->getZExtValue();
- Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt);
+ if (C1->ugt(ShAmtC)) {
+ unsigned ShlAmtC = C1->getZExtValue();
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmtC - ShAmtC);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
- // (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+ // (X <<nuw C1) >>u C --> X <<nuw (C1 - C)
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(true);
return NewShl;
}
- // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2)
+ // (X << C1) >>u C --> X << (C1 - C) & (-1 >> C)
Value *NewShl = Builder.CreateShl(X, ShiftDiff);
- APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
}
- assert(*ShOp1 == ShAmt);
+ assert(*C1 == ShAmtC);
// (X << C) >>u C --> X & (-1 >>u C)
- APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
}
if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) &&
(!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
- assert(ShAmt < X->getType()->getScalarSizeInBits() &&
+ assert(ShAmtC < X->getType()->getScalarSizeInBits() &&
"Big shift not simplified to zero?");
// lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
- Value *NewLShr = Builder.CreateLShr(X, ShAmt);
+ Value *NewLShr = Builder.CreateLShr(X, ShAmtC);
return new ZExtInst(NewLShr, Ty);
}
- if (match(Op0, m_SExt(m_Value(X))) &&
- (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
- // Are we moving the sign bit to the low bit and widening with high zeros?
+ if (match(Op0, m_SExt(m_Value(X)))) {
unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits();
- if (ShAmt == BitWidth - 1) {
- // lshr (sext i1 X to iN), N-1 --> zext X to iN
- if (SrcTyBitWidth == 1)
- return new ZExtInst(X, Ty);
+ // lshr (sext i1 X to iN), C --> select (X, -1 >> C, 0)
+ if (SrcTyBitWidth == 1) {
+ auto *NewC = ConstantInt::get(
+ Ty, APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
+ return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty));
+ }
- // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
- if (Op0->hasOneUse()) {
+ if ((!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType())) &&
+ Op0->hasOneUse()) {
+ // Are we moving the sign bit to the low bit and widening with high
+ // zeros? lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
+ if (ShAmtC == BitWidth - 1) {
Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1);
return new ZExtInst(NewLShr, Ty);
}
- }
- // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
- if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) {
- // The new shift amount can't be more than the narrow source type.
- unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1);
- Value *AShr = Builder.CreateAShr(X, NewShAmt);
- return new ZExtInst(AShr, Ty);
+ // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
+ if (ShAmtC == BitWidth - SrcTyBitWidth) {
+ // The new shift amount can't be more than the narrow source type.
+ unsigned NewShAmt = std::min(ShAmtC, SrcTyBitWidth - 1);
+ Value *AShr = Builder.CreateAShr(X, NewShAmt);
+ return new ZExtInst(AShr, Ty);
+ }
}
}
Value *Y;
- if (ShAmt == BitWidth - 1) {
+ if (ShAmtC == BitWidth - 1) {
// lshr i32 or(X,-X), 31 --> zext (X != 0)
if (match(Op0, m_OneUse(m_c_Or(m_Neg(m_Value(X)), m_Deferred(X)))))
return new ZExtInst(Builder.CreateIsNotNull(X), Ty);
@@ -1150,32 +1107,55 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
// Check if a number is negative and odd:
// lshr i32 (srem X, 2), 31 --> and (X >> 31), X
if (match(Op0, m_OneUse(m_SRem(m_Value(X), m_SpecificInt(2))))) {
- Value *Signbit = Builder.CreateLShr(X, ShAmt);
+ Value *Signbit = Builder.CreateLShr(X, ShAmtC);
return BinaryOperator::CreateAnd(Signbit, X);
}
}
- if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
- unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // (X >>u C1) >>u C --> X >>u (C1 + C)
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(C1)))) {
// Oversized shifts are simplified to zero in InstSimplify.
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
if (AmtSum < BitWidth)
- // (X >>u C1) >>u C2 --> X >>u (C1 + C2)
return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
}
+ Instruction *TruncSrc;
+ if (match(Op0, m_OneUse(m_Trunc(m_Instruction(TruncSrc)))) &&
+ match(TruncSrc, m_LShr(m_Value(X), m_APInt(C1)))) {
+ unsigned SrcWidth = X->getType()->getScalarSizeInBits();
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
+
+ // If the combined shift fits in the source width:
+ // (trunc (X >>u C1)) >>u C --> and (trunc (X >>u (C1 + C)), MaskC
+ //
+ // If the first shift covers the number of bits truncated, then the
+ // mask instruction is eliminated (and so the use check is relaxed).
+ if (AmtSum < SrcWidth &&
+ (TruncSrc->hasOneUse() || C1->uge(SrcWidth - BitWidth))) {
+ Value *SumShift = Builder.CreateLShr(X, AmtSum, "sum.shift");
+ Value *Trunc = Builder.CreateTrunc(SumShift, Ty, I.getName());
+
+ // If the first shift does not cover the number of bits truncated, then
+ // we require a mask to get rid of high bits in the result.
+ APInt MaskC = APInt::getAllOnes(BitWidth).lshr(ShAmtC);
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, MaskC));
+ }
+ }
+
// Look for a "splat" mul pattern - it replicates bits across each half of
// a value, so a right shift is just a mask of the low bits:
// lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1
// TODO: Generalize to allow more than just half-width shifts?
const APInt *MulC;
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) &&
- ShAmt * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
- MulC->logBase2() == ShAmt)
+ ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
+ MulC->logBase2() == ShAmtC)
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
- MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmtC), 0, &I)) {
I.setIsExact();
return &I;
}
@@ -1346,6 +1326,22 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
}
}
+ // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
+ // as the pattern to splat the lowest bit.
+ // FIXME: iff X is already masked, we don't need the one-use check.
+ Value *X;
+ if (match(Op1, m_SpecificIntAllowUndef(BitWidth - 1)) &&
+ match(Op0, m_OneUse(m_Shl(m_Value(X),
+ m_SpecificIntAllowUndef(BitWidth - 1))))) {
+ Constant *Mask = ConstantInt::get(Ty, 1);
+ // Retain the knowledge about the ignored lanes.
+ Mask = Constant::mergeUndefsWith(
+ Constant::mergeUndefsWith(Mask, cast<Constant>(Op1)),
+ cast<Constant>(cast<Instruction>(Op0)->getOperand(1)));
+ X = Builder.CreateAnd(X, Mask);
+ return BinaryOperator::CreateNeg(X);
+ }
+
if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I))
return R;
@@ -1354,7 +1350,6 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, Op1);
// ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1
- Value *X;
if (match(Op0, m_OneUse(m_Not(m_Value(X))))) {
// Note that we must drop 'exact'-ness of the shift!
// Note that we can't keep undef's in -1 vector constant!
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 15b51ae8a5ee..e357a9da8b12 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -55,7 +55,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
KnownBits Known(BitWidth);
- APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+ APInt DemandedMask(APInt::getAllOnes(BitWidth));
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
@@ -124,7 +124,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
Known.resetAll();
- if (DemandedMask.isNullValue()) // Not demanding any bits from V.
+ if (DemandedMask.isZero()) // Not demanding any bits from V.
return UndefValue::get(VTy);
if (Depth == MaxAnalysisRecursionDepth)
@@ -274,8 +274,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// constant because that's a canonical 'not' op, and that is better for
// combining, SCEV, and codegen.
const APInt *C;
- if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) {
- if ((*C | ~DemandedMask).isAllOnesValue()) {
+ if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {
+ if ((*C | ~DemandedMask).isAllOnes()) {
// Force bits to 1 to create a 'not' op.
I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
return I;
@@ -385,8 +385,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known = KnownBits::commonBits(LHSKnown, RHSKnown);
break;
}
- case Instruction::ZExt:
case Instruction::Trunc: {
+ // If we do not demand the high bits of a right-shifted and truncated value,
+ // then we may be able to truncate it before the shift.
+ Value *X;
+ const APInt *C;
+ if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
+ // The shift amount must be valid (not poison) in the narrow type, and
+ // it must not be greater than the high bits demanded of the result.
+ if (C->ult(I->getType()->getScalarSizeInBits()) &&
+ C->ule(DemandedMask.countLeadingZeros())) {
+ // trunc (lshr X, C) --> lshr (trunc X), C
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ Value *Trunc = Builder.CreateTrunc(X, I->getType());
+ return Builder.CreateLShr(Trunc, C->getZExtValue());
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case Instruction::ZExt: {
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
@@ -516,8 +534,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I->getOperand(0);
// We can't do this with the LHS for subtraction, unless we are only
// demanding the LSB.
- if ((I->getOpcode() == Instruction::Add ||
- DemandedFromOps.isOneValue()) &&
+ if ((I->getOpcode() == Instruction::Add || DemandedFromOps.isOne()) &&
DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
@@ -615,7 +632,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedMask.isOneValue()) {
+ if (DemandedMask.isOne()) {
// Perform the logical shift right.
Instruction *NewVal = BinaryOperator::CreateLShr(
I->getOperand(0), I->getOperand(1), I->getName());
@@ -743,7 +760,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Instruction::URem: {
KnownBits Known2(BitWidth);
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ APInt AllOnes = APInt::getAllOnes(BitWidth);
if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
return I;
@@ -829,6 +846,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownBitsComputed = true;
break;
}
+ case Intrinsic::umax: {
+ // UMax(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-zero bit of C.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(II->getArgOperand(1), m_APInt(C)) &&
+ CTZ >= C->getActiveBits())
+ return II->getArgOperand(0);
+ break;
+ }
+ case Intrinsic::umin: {
+ // UMin(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-one bit of C.
+ // This comes from using DeMorgans on the above umax example.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(II->getArgOperand(1), m_APInt(C)) &&
+ CTZ >= C->getBitWidth() - C->countLeadingOnes())
+ return II->getArgOperand(0);
+ break;
+ }
default: {
// Handle target specific intrinsics
Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
@@ -1021,8 +1061,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
Known.Zero.setLowBits(ShlAmt - 1);
Known.Zero &= DemandedMask;
- APInt BitMask1(APInt::getAllOnesValue(BitWidth));
- APInt BitMask2(APInt::getAllOnesValue(BitWidth));
+ APInt BitMask1(APInt::getAllOnes(BitWidth));
+ APInt BitMask2(APInt::getAllOnes(BitWidth));
bool isLshr = (Shr->getOpcode() == Instruction::LShr);
BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
@@ -1088,7 +1128,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return nullptr;
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
- APInt EltMask(APInt::getAllOnesValue(VWidth));
+ APInt EltMask(APInt::getAllOnes(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
if (match(V, m_Undef())) {
@@ -1097,7 +1137,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return nullptr;
}
- if (DemandedElts.isNullValue()) { // If nothing is demanded, provide poison.
+ if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
UndefElts = EltMask;
return PoisonValue::get(V->getType());
}
@@ -1107,7 +1147,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (auto *C = dyn_cast<Constant>(V)) {
// Check if this is identity. If so, return 0 since we are not simplifying
// anything.
- if (DemandedElts.isAllOnesValue())
+ if (DemandedElts.isAllOnes())
return nullptr;
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
@@ -1260,7 +1300,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Handle trivial case of a splat. Only check the first element of LHS
// operand.
if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
- DemandedElts.isAllOnesValue()) {
+ DemandedElts.isAllOnes()) {
if (!match(I->getOperand(1), m_Undef())) {
I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
MadeChange = true;
@@ -1515,8 +1555,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Subtlety: If we load from a pointer, the pointer must be valid
// regardless of whether the element is demanded. Doing otherwise risks
// segfaults which didn't exist in the original program.
- APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
- DemandedPassThrough(DemandedElts);
+ APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
+ DemandedPassThrough(DemandedElts);
if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
for (unsigned i = 0; i < VWidth; i++) {
Constant *CElt = CV->getAggregateElement(i);
@@ -1568,7 +1608,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// If we've proven all of the lanes undef, return an undef value.
// TODO: Intersect w/demanded lanes
- if (UndefElts.isAllOnesValue())
+ if (UndefElts.isAllOnes())
return UndefValue::get(I->getType());;
return MadeChange ? I : nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 32b15376f898..32e537897140 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -35,37 +35,46 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumAggregateReconstructionsSimplified,
"Number of aggregate reconstructions turned into reuse of the "
"original aggregate");
/// Return true if the value is cheaper to scalarize than it is to leave as a
-/// vector operation. IsConstantExtractIndex indicates whether we are extracting
-/// one known element from a vector constant.
+/// vector operation. If the extract index \p EI is a constant integer then
+/// some operations may be cheap to scalarize.
///
/// FIXME: It's possible to create more instructions than previously existed.
-static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
+static bool cheapToScalarize(Value *V, Value *EI) {
+ ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
+
// If we can pick a scalar constant value out of a vector, that is free.
if (auto *C = dyn_cast<Constant>(V))
- return IsConstantExtractIndex || C->getSplatValue();
+ return CEI || C->getSplatValue();
+
+ if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
+ ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
+ // Index needs to be lower than the minimum size of the vector, because
+ // for scalable vector, the vector size is known at run time.
+ return CEI->getValue().ult(EC.getKnownMinValue());
+ }
// An insertelement to the same constant index as our extract will simplify
// to the scalar inserted element. An insertelement to a different constant
// index is irrelevant to our extract.
if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
- return IsConstantExtractIndex;
+ return CEI;
if (match(V, m_OneUse(m_Load(m_Value()))))
return true;
@@ -75,14 +84,12 @@ static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
Value *V0, *V1;
if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
- if (cheapToScalarize(V0, IsConstantExtractIndex) ||
- cheapToScalarize(V1, IsConstantExtractIndex))
+ if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
return true;
CmpInst::Predicate UnusedPred;
if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
- if (cheapToScalarize(V0, IsConstantExtractIndex) ||
- cheapToScalarize(V1, IsConstantExtractIndex))
+ if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
return true;
return false;
@@ -119,7 +126,8 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
// and that it is a binary operation which is cheap to scalarize.
// otherwise return nullptr.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !cheapToScalarize(PHIUser, EI.getIndexOperand()))
return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
@@ -170,24 +178,46 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
return &EI;
}
-static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
- InstCombiner::BuilderTy &Builder,
- bool IsBigEndian) {
+Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
Value *X;
uint64_t ExtIndexC;
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
- !X->getType()->isVectorTy() ||
!match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
return nullptr;
+ ElementCount NumElts =
+ cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
+ Type *DestTy = Ext.getType();
+ bool IsBigEndian = DL.isBigEndian();
+
+ // If we are casting an integer to vector and extracting a portion, that is
+ // a shift-right and truncate.
+ // TODO: Allow FP dest type by casting the trunc to FP?
+ if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() &&
+ isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) {
+ assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
+ "Expected fixed vector type for bitcast from scalar integer");
+
+ // Big endian requires adjusting the extract index since MSB is at index 0.
+ // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
+ // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
+ if (IsBigEndian)
+ ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
+ unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits();
+ if (!ShiftAmountC || Ext.getVectorOperand()->hasOneUse()) {
+ Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
+ return new TruncInst(Lshr, DestTy);
+ }
+ }
+
+ if (!X->getType()->isVectorTy())
+ return nullptr;
+
// If this extractelement is using a bitcast from a vector of the same number
// of elements, see if we can find the source element from the source vector:
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
auto *SrcTy = cast<VectorType>(X->getType());
- Type *DestTy = Ext.getType();
ElementCount NumSrcElts = SrcTy->getElementCount();
- ElementCount NumElts =
- cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
if (NumSrcElts == NumElts)
if (Value *Elt = findScalarElement(X, ExtIndexC))
return new BitCastInst(Elt, DestTy);
@@ -274,7 +304,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
// Conservatively assume that all elements are needed.
- APInt UsedElts(APInt::getAllOnesValue(VWidth));
+ APInt UsedElts(APInt::getAllOnes(VWidth));
switch (UserInstr->getOpcode()) {
case Instruction::ExtractElement: {
@@ -322,11 +352,11 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
} else {
- UnionUsedElts = APInt::getAllOnesValue(VWidth);
+ UnionUsedElts = APInt::getAllOnes(VWidth);
break;
}
- if (UnionUsedElts.isAllOnesValue())
+ if (UnionUsedElts.isAllOnes())
break;
}
@@ -388,7 +418,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// If the input vector has multiple uses, simplify it based on a union
// of all elements used.
APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
APInt UndefElts(NumElts, 0);
if (Value *V = SimplifyDemandedVectorElts(
SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
@@ -402,7 +432,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
+ if (Instruction *I = foldBitcastExtElt(EI))
return I;
// If there's a vector PHI feeding a scalar use through this extractelement
@@ -415,7 +445,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// TODO come up with a n-ary matcher that subsumes both unary and
// binary matchers.
UnaryOperator *UO;
- if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
+ if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
// extelt (unop X), Index --> unop (extelt X, Index)
Value *X = UO->getOperand(0);
Value *E = Builder.CreateExtractElement(X, Index);
@@ -423,7 +453,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
BinaryOperator *BO;
- if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
+ if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
Value *E0 = Builder.CreateExtractElement(X, Index);
@@ -434,7 +464,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *X, *Y;
CmpInst::Predicate Pred;
if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
- cheapToScalarize(SrcVec, IndexC)) {
+ cheapToScalarize(SrcVec, Index)) {
// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
Value *E0 = Builder.CreateExtractElement(X, Index);
Value *E1 = Builder.CreateExtractElement(Y, Index);
@@ -651,8 +681,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
return;
- auto *WideVec =
- new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
+ auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
// Insert the new shuffle after the vector operand of the extract is defined
// (as long as it's not a PHI) or at the start of the basic block of the
@@ -913,7 +942,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
"We don't store nullptr in SourceAggregate!");
assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
(I.index() != 0) &&
- "SourceAggregate should be valid after the the first element,");
+ "SourceAggregate should be valid after the first element,");
// For this element, is there a plausible source aggregate?
// FIXME: we could special-case undef element, IFF we know that in the
@@ -1179,7 +1208,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
if (!ElementPresent[i])
Mask[i] = -1;
- return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
+ return new ShuffleVectorInst(FirstIE, Mask);
}
/// Try to fold an insert element into an existing splat shuffle by changing
@@ -1208,15 +1237,15 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
// Replace the shuffle mask element at the index of this insert with a zero.
// For example:
- // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
- // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+ // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
+ // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
unsigned NumMaskElts =
cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
for (unsigned i = 0; i != NumMaskElts; ++i)
NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
- return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
+ return new ShuffleVectorInst(Op0, NewMask);
}
/// Try to fold an extract+insert element into an existing identity shuffle by
@@ -1348,6 +1377,10 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
NewShufElts[I] = ShufConstVec->getAggregateElement(I);
NewMaskElts[I] = Mask[I];
}
+
+ // Bail if we failed to find an element.
+ if (!NewShufElts[I])
+ return nullptr;
}
// Create new operands for a shuffle that includes the constant of the
@@ -1399,6 +1432,41 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
return nullptr;
}
+/// If both the base vector and the inserted element are extended from the same
+/// type, do the insert element in the narrow source type followed by extend.
+/// TODO: This can be extended to include other cast opcodes, but particularly
+/// if we create a wider insertelement, make sure codegen is not harmed.
+static Instruction *narrowInsElt(InsertElementInst &InsElt,
+ InstCombiner::BuilderTy &Builder) {
+ // We are creating a vector extend. If the original vector extend has another
+ // use, that would mean we end up with 2 vector extends, so avoid that.
+ // TODO: We could ease the use-clause to "if at least one op has one use"
+ // (assuming that the source types match - see next TODO comment).
+ Value *Vec = InsElt.getOperand(0);
+ if (!Vec->hasOneUse())
+ return nullptr;
+
+ Value *Scalar = InsElt.getOperand(1);
+ Value *X, *Y;
+ CastInst::CastOps CastOpcode;
+ if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
+ CastOpcode = Instruction::FPExt;
+ else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
+ CastOpcode = Instruction::SExt;
+ else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
+ CastOpcode = Instruction::ZExt;
+ else
+ return nullptr;
+
+ // TODO: We can allow mismatched types by creating an intermediate cast.
+ if (X->getType()->getScalarType() != Y->getType())
+ return nullptr;
+
+ // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
+ Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
+ return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
+}
+
Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
@@ -1495,7 +1563,7 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
if (V != &IE)
return replaceInstUsesWith(IE, V);
@@ -1518,6 +1586,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
return IdentityShuf;
+ if (Instruction *Ext = narrowInsElt(IE, Builder))
+ return Ext;
+
return nullptr;
}
@@ -1924,8 +1995,8 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
// Splat from element 0. Any mask element that is undefined remains undefined.
// For example:
- // shuf (inselt undef, X, 2), undef, <2,2,undef>
- // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
+ // shuf (inselt undef, X, 2), _, <2,2,undef>
+ // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
unsigned NumMaskElts =
cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts, 0);
@@ -1933,7 +2004,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
if (Mask[i] == UndefMaskElem)
NewMask[i] = Mask[i];
- return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
+ return new ShuffleVectorInst(NewIns, NewMask);
}
/// Try to fold shuffles that are the equivalent of a vector select.
@@ -2197,12 +2268,8 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
SmallVector<int, 16> Mask;
Shuf.getShuffleMask(Mask);
- // The shuffle must not change vector sizes.
- // TODO: This restriction could be removed if the insert has only one use
- // (because the transform would require a new length-changing shuffle).
int NumElts = Mask.size();
- if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
- return nullptr;
+ int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
// not be able to handle it there if the insertelement has >1 use.
@@ -2219,11 +2286,16 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
// Offset the index constant by the vector width because we are checking for
// accesses to the 2nd vector input of the shuffle.
- IdxC += NumElts;
+ IdxC += InpNumElts;
// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
if (!is_contained(Mask, (int)IdxC))
return IC.replaceOperand(Shuf, 1, X);
}
+ // For the rest of the transform, the shuffle must not change vector sizes.
+ // TODO: This restriction could be removed if the insert has only one use
+ // (because the transform would require a new length-changing shuffle).
+ if (NumElts != InpNumElts)
+ return nullptr;
// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
@@ -2413,16 +2485,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (LHS == RHS) {
assert(!match(RHS, m_Undef()) &&
"Shuffle with 2 undef ops not simplified?");
- // Remap any references to RHS to use LHS.
- SmallVector<int, 16> Elts;
- for (unsigned i = 0; i != VWidth; ++i) {
- // Propagate undef elements or force mask to LHS.
- if (Mask[i] < 0)
- Elts.push_back(UndefMaskElem);
- else
- Elts.push_back(Mask[i] % LHSWidth);
- }
- return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
+ return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
}
// shuffle undef, x, mask --> shuffle x, undef, mask'
@@ -2444,7 +2507,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return I;
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
if (V != &SVI)
return replaceInstUsesWith(SVI, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4e3b18e805ee..47b6dcb67a78 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -100,7 +100,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -109,11 +108,12 @@
#include <string>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumWorklistIterations,
"Number of instruction combining iterations performed");
@@ -202,23 +202,37 @@ Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(&Builder, DL, GEP);
}
+/// Legal integers and common types are considered desirable. This is used to
+/// avoid creating instructions with types that may not be supported well by the
+/// the backend.
+/// NOTE: This treats i8, i16 and i32 specially because they are common
+/// types in frontend languages.
+bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
+ switch (BitWidth) {
+ case 8:
+ case 16:
+ case 32:
+ return true;
+ default:
+ return DL.isLegalInteger(BitWidth);
+ }
+}
+
/// Return true if it is desirable to convert an integer computation from a
/// given bit width to a new bit width.
/// We don't want to convert from a legal to an illegal type or from a smaller
-/// to a larger illegal type. A width of '1' is always treated as a legal type
-/// because i1 is a fundamental type in IR, and there are many specialized
-/// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as
+/// to a larger illegal type. A width of '1' is always treated as a desirable
+/// type because i1 is a fundamental type in IR, and there are many specialized
+/// optimizations for i1 types. Common/desirable widths are equally treated as
/// legal to convert to, in order to open up more combining opportunities.
-/// NOTE: this treats i8, i16 and i32 specially, due to them being so common
-/// from frontend languages.
bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
unsigned ToWidth) const {
bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
- // Convert to widths of 8, 16 or 32 even if they are not legal types. Only
- // shrink types, to prevent infinite loops.
- if (ToWidth < FromWidth && (ToWidth == 8 || ToWidth == 16 || ToWidth == 32))
+ // Convert to desirable widths even if they are not legal types.
+ // Only shrink types, to prevent infinite loops.
+ if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
return true;
// If this is a legal integer from type, and the result would be an illegal
@@ -359,7 +373,8 @@ Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
PtrToInt->getSrcTy()->getPointerAddressSpace() &&
DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
- return Builder.CreateBitCast(PtrToInt->getOperand(0), CastTy);
+ return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
+ "", PtrToInt);
}
}
return nullptr;
@@ -961,14 +976,14 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
"Expected constant-foldable intrinsic");
Intrinsic::ID IID = II->getIntrinsicID();
- if (II->getNumArgOperands() == 1)
+ if (II->arg_size() == 1)
return Builder.CreateUnaryIntrinsic(IID, SO);
// This works for real binary ops like min/max (where we always expect the
// constant operand to be canonicalized as op1) and unary ops with a bonus
// constant argument like ctlz/cttz.
// TODO: Handle non-commutative binary intrinsics as below for binops.
- assert(II->getNumArgOperands() == 2 && "Expected binary intrinsic");
+ assert(II->arg_size() == 2 && "Expected binary intrinsic");
assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
}
@@ -1058,7 +1073,7 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
// Compare for equality including undefs as equal.
auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
const APInt *C;
- return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
};
if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
@@ -1120,9 +1135,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
BasicBlock *NonConstBB = nullptr;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
- // If I is a freeze instruction, count undef as a non-constant.
- if (match(InVal, m_ImmConstant()) &&
- (!isa<FreezeInst>(I) || isGuaranteedNotToBeUndefOrPoison(InVal)))
+ // For non-freeze, require constant operand
+ // For freeze, require non-undef, non-poison operand
+ if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
+ continue;
+ if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
continue;
if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
@@ -1268,61 +1285,19 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
/// specified offset. If so, fill them into NewIndices and return the resultant
/// element type, otherwise return null.
Type *
-InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
SmallVectorImpl<Value *> &NewIndices) {
Type *Ty = PtrTy->getElementType();
if (!Ty->isSized())
return nullptr;
- // Start with the index over the outer type. Note that the type size
- // might be zero (even if the offset isn't zero) if the indexed type
- // is something like [0 x {int, int}]
- Type *IndexTy = DL.getIndexType(PtrTy);
- int64_t FirstIdx = 0;
- if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
- FirstIdx = Offset/TySize;
- Offset -= FirstIdx*TySize;
-
- // Handle hosts where % returns negative instead of values [0..TySize).
- if (Offset < 0) {
- --FirstIdx;
- Offset += TySize;
- assert(Offset >= 0);
- }
- assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
- }
-
- NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
-
- // Index into the types. If we fail, set OrigBase to null.
- while (Offset) {
- // Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
- return nullptr;
-
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- assert(Offset < (int64_t)SL->getSizeInBytes() &&
- "Offset must stay within the indexed type");
-
- unsigned Elt = SL->getElementContainingOffset(Offset);
- NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
- Elt));
-
- Offset -= SL->getElementOffset(Elt);
- Ty = STy->getElementType(Elt);
- } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
- assert(EltSize && "Cannot index into a zero-sized array");
- NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
- Offset %= EltSize;
- Ty = AT->getElementType();
- } else {
- // Otherwise, we can't index into the middle of this atomic type, bail.
- return nullptr;
- }
- }
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
+ if (!Offset.isZero())
+ return nullptr;
+ for (const APInt &Index : Indices)
+ NewIndices.push_back(Builder.getInt(Index));
return Ty;
}
@@ -1623,7 +1598,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Value *XY = Builder.CreateBinOp(Opcode, X, Y);
if (auto *BO = dyn_cast<BinaryOperator>(XY))
BO->copyIRFlags(&Inst);
- return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
+ return new ShuffleVectorInst(XY, M);
};
// If both arguments of the binary operation are shuffles that use the same
@@ -1754,25 +1729,20 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Value *X;
ArrayRef<int> MaskC;
int SplatIndex;
- BinaryOperator *BO;
+ Value *Y, *OtherOp;
if (!match(LHS,
m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
!match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
- X->getType() != Inst.getType() || !match(RHS, m_OneUse(m_BinOp(BO))) ||
- BO->getOpcode() != Opcode)
+ X->getType() != Inst.getType() ||
+ !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
return nullptr;
// FIXME: This may not be safe if the analysis allows undef elements. By
// moving 'Y' before the splat shuffle, we are implicitly assuming
// that it is not undef/poison at the splat index.
- Value *Y, *OtherOp;
- if (isSplatValue(BO->getOperand(0), SplatIndex)) {
- Y = BO->getOperand(0);
- OtherOp = BO->getOperand(1);
- } else if (isSplatValue(BO->getOperand(1), SplatIndex)) {
- Y = BO->getOperand(1);
- OtherOp = BO->getOperand(0);
- } else {
+ if (isSplatValue(OtherOp, SplatIndex)) {
+ std::swap(Y, OtherOp);
+ } else if (!isSplatValue(Y, SplatIndex)) {
return nullptr;
}
@@ -1788,7 +1758,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// dropped to be safe.
if (isa<FPMathOperator>(R)) {
R->copyFastMathFlags(&Inst);
- R->andIRFlags(BO);
+ R->andIRFlags(RHS);
}
if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
NewInstBO->copyIRFlags(R);
@@ -1896,7 +1866,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
- if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
+ if (Value *V = SimplifyGEPInst(GEPEltType, Ops, GEP.isInBounds(),
+ SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
// For vector geps, use the generic demanded vector support.
@@ -1905,7 +1876,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
auto VWidth = GEPFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
UndefElts)) {
if (V != &GEP)
@@ -2117,10 +2088,12 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// -- have to recreate %src & %gep
// put NewSrc at same location as %src
Builder.SetInsertPoint(cast<Instruction>(PtrOp));
- auto *NewSrc = cast<GetElementPtrInst>(
- Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
- NewSrc->setIsInBounds(Src->isInBounds());
- auto *NewGEP =
+ Value *NewSrc =
+ Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName());
+ // Propagate 'inbounds' if the new source was not constant-folded.
+ if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc))
+ NewSrcGEPI->setIsInBounds(Src->isInBounds());
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
NewGEP->setIsInBounds(GEP.isInBounds());
return NewGEP;
@@ -2128,18 +2101,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
}
-
- // Fold (gep(gep(Ptr,Idx0),Idx1) -> gep(Ptr,add(Idx0,Idx1))
- if (GO1->getType() == SO1->getType()) {
- bool NewInBounds = GEP.isInBounds() && Src->isInBounds();
- auto *NewIdx =
- Builder.CreateAdd(GO1, SO1, GEP.getName() + ".idx",
- /*HasNUW*/ false, /*HasNSW*/ NewInBounds);
- auto *NewGEP = GetElementPtrInst::Create(
- GEPEltType, Src->getPointerOperand(), {NewIdx});
- NewGEP->setIsInBounds(NewInBounds);
- return NewGEP;
- }
}
// Note that if our source is a gep chain itself then we wait for that
@@ -2647,6 +2608,13 @@ static bool isAllocSiteRemovable(Instruction *AI,
Users.emplace_back(I);
continue;
}
+
+ if (isReallocLikeFn(I, TLI, true)) {
+ Users.emplace_back(I);
+ Worklist.push_back(I);
+ continue;
+ }
+
return false;
case Instruction::Store: {
@@ -2834,15 +2802,33 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
// At this point, we know that everything in FreeInstrBB can be moved
// before TI.
- for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
- It != End;) {
- Instruction &Instr = *It++;
+ for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
if (&Instr == FreeInstrBBTerminator)
break;
Instr.moveBefore(TI);
}
assert(FreeInstrBB->size() == 1 &&
"Only the branch instruction should remain");
+
+ // Now that we've moved the call to free before the NULL check, we have to
+ // remove any attributes on its parameter that imply it's non-null, because
+ // those attributes might have only been valid because of the NULL check, and
+ // we can get miscompiles if we keep them. This is conservative if non-null is
+ // also implied by something other than the NULL check, but it's guaranteed to
+ // be correct, and the conservativeness won't matter in practice, since the
+ // attributes are irrelevant for the call to free itself and the pointer
+ // shouldn't be used after the call.
+ AttributeList Attrs = FI.getAttributes();
+ Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
+ Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
+ if (Dereferenceable.isValid()) {
+ uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
+ Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
+ Attribute::Dereferenceable);
+ Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
+ }
+ FI.setAttributes(Attrs);
+
return &FI;
}
@@ -2861,6 +2847,15 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
if (isa<ConstantPointerNull>(Op))
return eraseInstFromFunction(FI);
+ // If we had free(realloc(...)) with no intervening uses, then eliminate the
+ // realloc() entirely.
+ if (CallInst *CI = dyn_cast<CallInst>(Op)) {
+ if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI, true)) {
+ return eraseInstFromFunction(
+ *replaceInstUsesWith(*CI, CI->getOperand(0)));
+ }
+ }
+
// If we optimize for code size, try to move the call to free before the null
// test so that simplify cfg can remove the empty block and dead code
// elimination the branch. I.e., helps to turn something like:
@@ -2947,7 +2942,7 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
- return isa<DbgInfoIntrinsic>(BBI) ||
+ return BBI->isDebugOrPseudoInst() ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
};
@@ -3138,26 +3133,21 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
// checking for overflow.
const APInt *C;
if (match(WO->getRHS(), m_APInt(C))) {
- // Compute the no-wrap range [X,Y) for LHS given RHS=C, then
- // check for the inverted range using range offset trick (i.e.
- // use a subtract to shift the range to bottom of either the
- // signed or unsigned domain and then use a single compare to
- // check range membership).
+ // Compute the no-wrap range for LHS given RHS=C, then construct an
+ // equivalent icmp, potentially using an offset.
ConstantRange NWR =
ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
WO->getNoWrapKind());
- APInt Min = WO->isSigned() ? NWR.getSignedMin() : NWR.getUnsignedMin();
- NWR = NWR.subtract(Min);
CmpInst::Predicate Pred;
- APInt NewRHSC;
- if (NWR.getEquivalentICmp(Pred, NewRHSC)) {
- auto *OpTy = WO->getRHS()->getType();
- auto *NewLHS = Builder.CreateSub(WO->getLHS(),
- ConstantInt::get(OpTy, Min));
- return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
- ConstantInt::get(OpTy, NewRHSC));
- }
+ APInt NewRHSC, Offset;
+ NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
+ auto *OpTy = WO->getRHS()->getType();
+ auto *NewLHS = WO->getLHS();
+ if (Offset != 0)
+ NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
+ return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
+ ConstantInt::get(OpTy, NewRHSC));
}
}
}
@@ -3183,9 +3173,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
// Whatever aliasing information we had for the orignal load must also
// hold for the smaller load, so propagate the annotations.
- AAMDNodes Nodes;
- L->getAAMetadata(Nodes);
- NL->setAAMetadata(Nodes);
+ NL->setAAMetadata(L->getAAMetadata());
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use replaceInstUsesWith().
return replaceInstUsesWith(EV, NL);
@@ -3568,8 +3556,14 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
// While we could change the other users of OrigOp to use freeze(OrigOp), that
// potentially reduces their optimization potential, so let's only do this iff
// the OrigOp is only used by the freeze.
- if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp) ||
- canCreateUndefOrPoison(dyn_cast<Operator>(OrigOp)))
+ if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
+ return nullptr;
+
+ // We can't push the freeze through an instruction which can itself create
+ // poison. If the only source of new poison is flags, we can simply
+ // strip them (since we know the only use is the freeze and nothing can
+ // benefit from them.)
+ if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
return nullptr;
// If operand is guaranteed not to be poison, there is no need to add freeze
@@ -3585,6 +3579,8 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
return nullptr;
}
+ OrigOpInst->dropPoisonGeneratingFlags();
+
// If all operands are guaranteed to be non-poison, we can drop freeze.
if (!MaybePoisonOperand)
return OrigOp;
@@ -3668,7 +3664,7 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
/// instruction past all of the instructions between it and the end of its
/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
- assert(I->getSingleUndroppableUse() && "Invariants didn't hold!");
+ assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
BasicBlock *SrcBlock = I->getParent();
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -3822,51 +3818,71 @@ bool InstCombinerImpl::run() {
// See if we can trivially sink this instruction to its user if we can
// prove that the successor is not executed more frequently than our block.
- if (EnableCodeSinking)
- if (Use *SingleUse = I->getSingleUndroppableUse()) {
- BasicBlock *BB = I->getParent();
- Instruction *UserInst = cast<Instruction>(SingleUse->getUser());
- BasicBlock *UserParent;
-
- // Get the block the use occurs in.
- if (PHINode *PN = dyn_cast<PHINode>(UserInst))
- UserParent = PN->getIncomingBlock(*SingleUse);
- else
- UserParent = UserInst->getParent();
-
- // Try sinking to another block. If that block is unreachable, then do
- // not bother. SimplifyCFG should handle it.
- if (UserParent != BB && DT.isReachableFromEntry(UserParent)) {
- // See if the user is one of our successors that has only one
- // predecessor, so that we don't have to split the critical edge.
- bool ShouldSink = UserParent->getUniquePredecessor() == BB;
- // Another option where we can sink is a block that ends with a
- // terminator that does not pass control to other block (such as
- // return or unreachable). In this case:
- // - I dominates the User (by SSA form);
- // - the User will be executed at most once.
- // So sinking I down to User is always profitable or neutral.
- if (!ShouldSink) {
- auto *Term = UserParent->getTerminator();
- ShouldSink = isa<ReturnInst>(Term) || isa<UnreachableInst>(Term);
- }
- if (ShouldSink) {
- assert(DT.dominates(BB, UserParent) &&
- "Dominance relation broken?");
- // Okay, the CFG is simple enough, try to sink this instruction.
- if (TryToSinkInstruction(I, UserParent)) {
- LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
- MadeIRChange = true;
- // We'll add uses of the sunk instruction below, but since sinking
- // can expose opportunities for it's *operands* add them to the
- // worklist
- for (Use &U : I->operands())
- if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
- Worklist.push(OpI);
- }
+ // Return the UserBlock if successful.
+ auto getOptionalSinkBlockForInst =
+ [this](Instruction *I) -> Optional<BasicBlock *> {
+ if (!EnableCodeSinking)
+ return None;
+ auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
+ if (!UserInst)
+ return None;
+
+ BasicBlock *BB = I->getParent();
+ BasicBlock *UserParent = nullptr;
+
+ // Special handling for Phi nodes - get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (PN->getIncomingValue(i) == I) {
+ // Bail out if we have uses in different blocks. We don't do any
+ // sophisticated analysis (i.e finding NearestCommonDominator of these
+ // use blocks).
+ if (UserParent && UserParent != PN->getIncomingBlock(i))
+ return None;
+ UserParent = PN->getIncomingBlock(i);
}
}
+ assert(UserParent && "expected to find user block!");
+ } else
+ UserParent = UserInst->getParent();
+
+ // Try sinking to another block. If that block is unreachable, then do
+ // not bother. SimplifyCFG should handle it.
+ if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
+ return None;
+
+ auto *Term = UserParent->getTerminator();
+ // See if the user is one of our successors that has only one
+ // predecessor, so that we don't have to split the critical edge.
+ // Another option where we can sink is a block that ends with a
+ // terminator that does not pass control to other block (such as
+ // return or unreachable). In this case:
+ // - I dominates the User (by SSA form);
+ // - the User will be executed at most once.
+ // So sinking I down to User is always profitable or neutral.
+ if (UserParent->getUniquePredecessor() == BB ||
+ (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term))) {
+ assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
+ return UserParent;
}
+ return None;
+ };
+
+ auto OptBB = getOptionalSinkBlockForInst(I);
+ if (OptBB) {
+ auto *UserParent = *OptBB;
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ if (TryToSinkInstruction(I, UserParent)) {
+ LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
+ MadeIRChange = true;
+ // We'll add uses of the sunk instruction below, but since
+ // sinking can expose opportunities for it's *operands* add
+ // them to the worklist
+ for (Use &U : I->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
+ Worklist.push(OpI);
+ }
+ }
// Now that we have an instruction, try combining it to simplify it.
Builder.SetInsertPoint(I);
@@ -3994,13 +4010,13 @@ public:
/// whose condition is a known constant, we only visit the reachable successors.
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
const TargetLibraryInfo *TLI,
- InstCombineWorklist &ICWorklist) {
+ InstructionWorklist &ICWorklist) {
bool MadeIRChange = false;
SmallPtrSet<BasicBlock *, 32> Visited;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(&F.front());
- SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
DenseMap<Constant *, Constant *> FoldedConstants;
AliasScopeTracker SeenAliasScopes;
@@ -4011,25 +4027,23 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
if (!Visited.insert(BB).second)
continue;
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = &*BBI++;
-
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
// ConstantProp instruction if trivially constant.
- if (!Inst->use_empty() &&
- (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
- if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst
+ if (!Inst.use_empty() &&
+ (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
+ if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
+ LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
<< '\n');
- Inst->replaceAllUsesWith(C);
+ Inst.replaceAllUsesWith(C);
++NumConstProp;
- if (isInstructionTriviallyDead(Inst, TLI))
- Inst->eraseFromParent();
+ if (isInstructionTriviallyDead(&Inst, TLI))
+ Inst.eraseFromParent();
MadeIRChange = true;
continue;
}
// See if we can constant fold its operands.
- for (Use &U : Inst->operands()) {
+ for (Use &U : Inst.operands()) {
if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
continue;
@@ -4039,7 +4053,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
FoldRes = ConstantFoldConstant(C, DL, TLI);
if (FoldRes != C) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
+ LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
<< "\n Old = " << *C
<< "\n New = " << *FoldRes << '\n');
U = FoldRes;
@@ -4050,9 +4064,9 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// Skip processing debug and pseudo intrinsics in InstCombine. Processing
// these call instructions consumes non-trivial amount of time and
// provides no value for the optimization.
- if (!Inst->isDebugOrPseudoInst()) {
- InstrsForInstCombineWorklist.push_back(Inst);
- SeenAliasScopes.analyse(Inst);
+ if (!Inst.isDebugOrPseudoInst()) {
+ InstrsForInstructionWorklist.push_back(&Inst);
+ SeenAliasScopes.analyse(&Inst);
}
}
@@ -4097,8 +4111,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
- ICWorklist.reserve(InstrsForInstCombineWorklist.size());
- for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+ ICWorklist.reserve(InstrsForInstructionWorklist.size());
+ for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
// DCE instruction if trivially dead. As we iterate in reverse program
// order here, we will clean up whole chains of dead instructions.
if (isInstructionTriviallyDead(Inst, TLI) ||
@@ -4118,7 +4132,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
static bool combineInstructionsOverFunction(
- Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
+ Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 0d4ca0bcecfb..b56329ad76ae 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -47,6 +48,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -176,7 +178,15 @@ const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
-static const unsigned kAllocaRzSize = 32;
+static const uint64_t kAllocaRzSize = 32;
+
+// ASanAccessInfo implementation constants.
+constexpr size_t kCompileKernelShift = 0;
+constexpr size_t kCompileKernelMask = 0x1;
+constexpr size_t kAccessSizeIndexShift = 1;
+constexpr size_t kAccessSizeIndexMask = 0xf;
+constexpr size_t kIsWriteShift = 5;
+constexpr size_t kIsWriteMask = 0x1;
// Command-line flags.
@@ -203,6 +213,11 @@ static cl::opt<bool> ClInstrumentWrites(
"asan-instrument-writes", cl::desc("instrument write instructions"),
cl::Hidden, cl::init(true));
+static cl::opt<bool>
+ ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(false),
+ cl::Hidden, cl::desc("Use Stack Safety analysis results"),
+ cl::Optional);
+
static cl::opt<bool> ClInstrumentAtomics(
"asan-instrument-atomics",
cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
@@ -348,6 +363,10 @@ static cl::opt<uint64_t>
static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptimizeCallbacks("asan-optimize-callbacks",
+ cl::desc("Optimize callbacks"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClOptSameTemp(
"asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
cl::Hidden, cl::init(true));
@@ -442,7 +461,7 @@ struct ShadowMapping {
} // end anonymous namespace
-static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
+static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
bool IsKasan) {
bool IsAndroid = TargetTriple.isAndroid();
bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
@@ -559,6 +578,32 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
return Mapping;
}
+namespace llvm {
+void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset) {
+ auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan);
+ *ShadowBase = Mapping.Offset;
+ *MappingScale = Mapping.Scale;
+ *OrShadowOffset = Mapping.OrShadowOffset;
+}
+
+ASanAccessInfo::ASanAccessInfo(int32_t Packed)
+ : Packed(Packed),
+ AccessSizeIndex((Packed >> kAccessSizeIndexShift) & kAccessSizeIndexMask),
+ IsWrite((Packed >> kIsWriteShift) & kIsWriteMask),
+ CompileKernel((Packed >> kCompileKernelShift) & kCompileKernelMask) {}
+
+ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
+ uint8_t AccessSizeIndex)
+ : Packed((IsWrite << kIsWriteShift) +
+ (CompileKernel << kCompileKernelShift) +
+ (AccessSizeIndex << kAccessSizeIndexShift)),
+ AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite),
+ CompileKernel(CompileKernel) {}
+
+} // namespace llvm
+
static uint64_t getRedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -609,6 +654,7 @@ char ASanGlobalsMetadataWrapperPass::ID = 0;
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer {
AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
+ const StackSafetyGlobalInfo *SSGI,
bool CompileKernel = false, bool Recover = false,
bool UseAfterScope = false,
AsanDetectStackUseAfterReturnMode UseAfterReturn =
@@ -619,10 +665,12 @@ struct AddressSanitizer {
UseAfterScope(UseAfterScope || ClUseAfterScope),
UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn
: UseAfterReturn),
- GlobalsMD(*GlobalsMD) {
+ GlobalsMD(*GlobalsMD), SSGI(SSGI) {
C = &(M.getContext());
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
+ Int8PtrTy = Type::getInt8PtrTy(*C);
+ Int32Ty = Type::getInt32Ty(*C);
TargetTriple = Triple(M.getTargetTriple());
Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
@@ -646,7 +694,7 @@ struct AddressSanitizer {
/// Check if we want (and can) handle this alloca.
bool isInterestingAlloca(const AllocaInst &AI);
- bool ignoreAccess(Value *Ptr);
+ bool ignoreAccess(Instruction *Inst, Value *Ptr);
void getInterestingMemoryOperands(
Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
@@ -713,6 +761,8 @@ private:
bool UseAfterScope;
AsanDetectStackUseAfterReturnMode UseAfterReturn;
Type *IntptrTy;
+ Type *Int8PtrTy;
+ Type *Int32Ty;
ShadowMapping Mapping;
FunctionCallee AsanHandleNoReturnFunc;
FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
@@ -729,6 +779,7 @@ private:
FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
Value *LocalDynamicShadow = nullptr;
const GlobalsMetadata &GlobalsMD;
+ const StackSafetyGlobalInfo *SSGI;
DenseMap<const AllocaInst *, bool> ProcessedAllocas;
FunctionCallee AMDGPUAddressShared;
@@ -755,16 +806,22 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ if (ClUseStackSafety)
+ AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool runOnFunction(Function &F) override {
GlobalsMetadata &GlobalsMD =
getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ const StackSafetyGlobalInfo *const SSGI =
+ ClUseStackSafety
+ ? &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult()
+ : nullptr;
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover,
- UseAfterScope, UseAfterReturn);
+ AddressSanitizer ASan(*F.getParent(), &GlobalsMD, SSGI, CompileKernel,
+ Recover, UseAfterScope, UseAfterReturn);
return ASan.instrumentFunction(F, TLI);
}
@@ -1212,20 +1269,15 @@ GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
return GlobalsMetadata(M);
}
-AddressSanitizerPass::AddressSanitizerPass(
- bool CompileKernel, bool Recover, bool UseAfterScope,
- AsanDetectStackUseAfterReturnMode UseAfterReturn)
- : CompileKernel(CompileKernel), Recover(Recover),
- UseAfterScope(UseAfterScope), UseAfterReturn(UseAfterReturn) {}
-
PreservedAnalyses AddressSanitizerPass::run(Function &F,
AnalysisManager<Function> &AM) {
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
Module &M = *F.getParent();
if (auto *R = MAMProxy.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
- AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope,
- UseAfterReturn);
+ AddressSanitizer Sanitizer(M, R, nullptr, Options.CompileKernel,
+ Options.Recover, Options.UseAfterScope,
+ Options.UseAfterReturn);
if (Sanitizer.instrumentFunction(F, TLI))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -1237,21 +1289,51 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
+void AddressSanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<AddressSanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.CompileKernel)
+ OS << "kernel";
+ OS << ">";
+}
+
+void ModuleAddressSanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<ModuleAddressSanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.CompileKernel)
+ OS << "kernel";
+ OS << ">";
+}
+
ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(
- bool CompileKernel, bool Recover, bool UseGlobalGC, bool UseOdrIndicator,
- AsanDtorKind DestructorKind)
- : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+ const AddressSanitizerOptions &Options, bool UseGlobalGC,
+ bool UseOdrIndicator, AsanDtorKind DestructorKind)
+ : Options(Options), UseGlobalGC(UseGlobalGC),
UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind) {}
PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
- AnalysisManager<Module> &AM) {
- GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
- ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover,
- UseGlobalGC, UseOdrIndicator,
- DestructorKind);
- if (Sanitizer.instrumentModule(M))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
+ ModuleAnalysisManager &MAM) {
+ GlobalsMetadata &GlobalsMD = MAM.getResult<ASanGlobalsMetadataAnalysis>(M);
+ ModuleAddressSanitizer ModuleSanitizer(M, &GlobalsMD, Options.CompileKernel,
+ Options.Recover, UseGlobalGC,
+ UseOdrIndicator, DestructorKind);
+ bool Modified = false;
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ const StackSafetyGlobalInfo *const SSGI =
+ ClUseStackSafety ? &MAM.getResult<StackSafetyGlobalAnalysis>(M) : nullptr;
+ for (Function &F : M) {
+ AddressSanitizer FunctionSanitizer(
+ M, &GlobalsMD, SSGI, Options.CompileKernel, Options.Recover,
+ Options.UseAfterScope, Options.UseAfterReturn);
+ const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ Modified |= FunctionSanitizer.instrumentFunction(F, &TLI);
+ }
+ Modified |= ModuleSanitizer.instrumentModule(M);
+ return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
@@ -1266,6 +1348,7 @@ INITIALIZE_PASS_BEGIN(
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
AddressSanitizerLegacyPass, "asan",
@@ -1404,7 +1487,7 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
return IsInteresting;
}
-bool AddressSanitizer::ignoreAccess(Value *Ptr) {
+bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
// Instrument acesses from different address spaces only for AMDGPU.
Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
if (PtrTy->getPointerAddressSpace() != 0 &&
@@ -1425,6 +1508,10 @@ bool AddressSanitizer::ignoreAccess(Value *Ptr) {
if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI))
return true;
+ if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) &&
+ findAllocaForValue(Ptr))
+ return true;
+
return false;
}
@@ -1439,22 +1526,22 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), None);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(), None);
@@ -1469,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
auto BasePtr = CI->getOperand(OpOffset);
- if (ignoreAccess(BasePtr))
+ if (ignoreAccess(LI, BasePtr))
return;
auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
MaybeAlign Alignment = Align(1);
@@ -1479,9 +1566,9 @@ void AddressSanitizer::getInterestingMemoryOperands(
Value *Mask = CI->getOperand(2 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
} else {
- for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
+ for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(CI->getArgOperand(ArgNo)))
+ ignoreAccess(LI, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
@@ -1738,9 +1825,20 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
}
IRBuilder<> IRB(InsertBefore);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+ const ASanAccessInfo AccessInfo(IsWrite, CompileKernel, AccessSizeIndex);
+
+ if (UseCalls && ClOptimizeCallbacks) {
+ const ASanAccessInfo AccessInfo(IsWrite, CompileKernel, AccessSizeIndex);
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ IRB.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::asan_check_memaccess),
+ {IRB.CreatePointerCast(Addr, Int8PtrTy),
+ ConstantInt::get(Int32Ty, AccessInfo.Packed)});
+ return;
+ }
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (UseCalls) {
if (Exp == 0)
IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
@@ -1936,7 +2034,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
// Globals from llvm.metadata aren't emitted, do not instrument them.
if (Section == "llvm.metadata") return false;
// Do not instrument globals from special LLVM sections.
- if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false;
+ if (Section.contains("__llvm") || Section.contains("__LLVM"))
+ return false;
// Do not instrument function pointers to initialization and termination
// routines: dynamic linker will not properly handle redzones.
@@ -2133,8 +2232,7 @@ Instruction *ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
AsanDtorFunction = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(*C), false),
GlobalValue::InternalLinkage, 0, kAsanModuleDtorName, &M);
- AsanDtorFunction->addAttribute(AttributeList::FunctionIndex,
- Attribute::NoUnwind);
+ AsanDtorFunction->addFnAttr(Attribute::NoUnwind);
// Ensure Dtor cannot be discarded, even if in a comdat.
appendToUsed(M, {AsanDtorFunction});
BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
@@ -2753,7 +2851,7 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
if (II && II->getIntrinsicID() == Intrinsic::localescape) {
// We found a call. Mark all the allocas passed in as uninteresting.
- for (Value *Arg : II->arg_operands()) {
+ for (Value *Arg : II->args()) {
AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
assert(AI && AI->isStaticAlloca() &&
"non-static alloca arg to localescape");
@@ -2774,6 +2872,8 @@ bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) {
bool AddressSanitizer::instrumentFunction(Function &F,
const TargetLibraryInfo *TLI) {
+ if (F.empty())
+ return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
if (F.getName().startswith("__asan_")) return false;
@@ -2916,7 +3016,8 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
if (LongSize != 32) return false;
CallInst *CI = dyn_cast<CallInst>(I);
if (!CI || !CI->isInlineAsm()) return false;
- if (CI->getNumArgOperands() <= 5) return false;
+ if (CI->arg_size() <= 5)
+ return false;
// We have inline assembly with quite a few arguments.
return true;
}
@@ -3112,7 +3213,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
assert(Alloca->isStaticAlloca());
}
assert((ClRealignStack & (ClRealignStack - 1)) == 0);
- size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
+ uint64_t FrameAlignment = std::max(L.FrameAlignment, uint64_t(ClRealignStack));
Alloca->setAlignment(Align(FrameAlignment));
return IRB.CreatePointerCast(Alloca, IntptrTy);
}
@@ -3256,8 +3357,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
// Minimal header size (left redzone) is 4 pointers,
// i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms.
- size_t Granularity = 1ULL << Mapping.Scale;
- size_t MinHeaderSize = std::max((size_t)ASan.LongSize / 2, Granularity);
+ uint64_t Granularity = 1ULL << Mapping.Scale;
+ uint64_t MinHeaderSize = std::max((uint64_t)ASan.LongSize / 2, Granularity);
const ASanStackFrameLayout &L =
ComputeASanStackFrameLayout(SVD, Granularity, MinHeaderSize);
@@ -3511,7 +3612,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
IRBuilder<> IRB(AI);
- const unsigned Alignment = std::max(kAllocaRzSize, AI->getAlignment());
+ const uint64_t Alignment = std::max(kAllocaRzSize, AI->getAlignment());
const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
Value *Zero = Constant::getNullValue(IntptrTy);
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 9acd82c005e6..1a7f7a365ce4 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -53,6 +53,8 @@ static bool runCGProfilePass(
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
Function *CalledF, uint64_t NewCount) {
+ if (NewCount == 0)
+ return;
if (!CalledF || !TTI.isLoweredToCall(CalledF) ||
CalledF->hasDLLImportStorageClass())
return;
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 3b4d80dc8023..497aac30c3f6 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1553,11 +1553,11 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
SI->swapValues();
SI->swapProfMetadata();
if (Scope->TrueBiasedSelects.count(SI)) {
- assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+ assert(!Scope->FalseBiasedSelects.contains(SI) &&
"Must not be already in");
Scope->FalseBiasedSelects.insert(SI);
} else if (Scope->FalseBiasedSelects.count(SI)) {
- assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+ assert(!Scope->TrueBiasedSelects.contains(SI) &&
"Must not be already in");
Scope->TrueBiasedSelects.insert(SI);
}
@@ -1592,7 +1592,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
SmallVector<Instruction *, 8> Users;
for (User *U : I.users()) {
if (auto *UI = dyn_cast<Instruction>(U)) {
- if (BlocksInScope.count(UI->getParent()) == 0 &&
+ if (!BlocksInScope.contains(UI->getParent()) &&
// Unless there's already a phi for I at the exit block.
!(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
CHR_DEBUG(dbgs() << "V " << I << "\n");
@@ -1752,7 +1752,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
// Create the combined branch condition and constant-fold the branches/selects
// in the hot path.
fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
- ProfileCount ? ProfileCount.getValue() : 0);
+ ProfileCount.getValueOr(0));
}
// A helper for transformScopes. Clone the blocks in the scope (excluding the
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 63aa84e4a77c..38c219ce3465 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -18,6 +18,9 @@
/// The analysis is based on automatic propagation of data flow labels (also
/// known as taint labels) through a program as it performs computation.
///
+/// Argument and return value labels are passed through TLS variables
+/// __dfsan_arg_tls and __dfsan_retval_tls.
+///
/// Each byte of application memory is backed by a shadow memory byte. The
/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
/// laid out as follows:
@@ -144,20 +147,22 @@ static cl::opt<bool> ClPreserveAlignment(
// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
// additional annotations for those functions, a call to one of those functions
// will produce a warning message, as the labelling behaviour of the function is
-// unknown. The other supported annotations are "functional" and "discard",
-// which are described below under DataFlowSanitizer::WrapperKind.
+// unknown. The other supported annotations for uninstrumented functions are
+// "functional" and "discard", which are described below under
+// DataFlowSanitizer::WrapperKind.
+// Functions will often be labelled with both "uninstrumented" and one of
+// "functional" or "discard". This will leave the function unchanged by this
+// pass, and create a wrapper function that will call the original.
+//
+// Instrumented functions can also be annotated as "force_zero_labels", which
+// will make all shadow and return values set zero labels.
+// Functions should never be labelled with both "force_zero_labels" and
+// "uninstrumented" or any of the unistrumented wrapper kinds.
static cl::list<std::string> ClABIListFiles(
"dfsan-abilist",
cl::desc("File listing native ABI functions and how the pass treats them"),
cl::Hidden);
-// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
-// functions (see DataFlowSanitizer::InstrumentedABI below).
-static cl::opt<bool>
- ClArgsABI("dfsan-args-abi",
- cl::desc("Use the argument ABI rather than the TLS ABI"),
- cl::Hidden);
-
// Controls whether the pass includes or ignores the labels of pointers in load
// instructions.
static cl::opt<bool> ClCombinePointerLabelsOnLoad(
@@ -349,18 +354,18 @@ transformFunctionAttributes(const TransformedFunction &TransformedFunction,
for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
I < IE; ++I) {
unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
- ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(I);
+ ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
}
// Copy annotations on varargs arguments.
for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
IE = CallSiteAttrs.getNumAttrSets();
I < IE; ++I) {
- ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(I));
+ ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
}
- return AttributeList::get(Ctx, CallSiteAttrs.getFnAttributes(),
- CallSiteAttrs.getRetAttributes(),
+ return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
+ CallSiteAttrs.getRetAttrs(),
llvm::makeArrayRef(ArgumentAttributes));
}
@@ -372,17 +377,6 @@ class DataFlowSanitizer {
enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
- /// Which ABI should be used for instrumented functions?
- enum InstrumentedABI {
- /// Argument and return value labels are passed through additional
- /// arguments and by modifying the return type.
- IA_Args,
-
- /// Argument and return value labels are passed through TLS variables
- /// __dfsan_arg_tls and __dfsan_retval_tls.
- IA_TLS
- };
-
/// How should calls to uninstrumented functions be handled?
enum WrapperKind {
/// This function is present in an uninstrumented form but we don't know
@@ -400,9 +394,7 @@ class DataFlowSanitizer {
/// Instead of calling the function, a custom wrapper __dfsw_F is called,
/// where F is the name of the function. This function may wrap the
- /// original function or provide its own implementation. This is similar to
- /// the IA_Args ABI, except that IA_Args uses a struct return type to
- /// pass the return value shadow in a register, while WK_Custom uses an
+ /// original function or provide its own implementation. WK_Custom uses an
/// extra pointer argument to return the shadow. This allows the wrapped
/// form of the function type to be expressed in C.
WK_Custom
@@ -469,10 +461,9 @@ class DataFlowSanitizer {
getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
bool isInstrumented(const Function *F);
bool isInstrumented(const GlobalAlias *GA);
- FunctionType *getArgsFunctionType(FunctionType *T);
+ bool isForceZeroLabels(const Function *F);
FunctionType *getTrampolineFunctionType(FunctionType *T);
TransformedFunction getCustomFunctionType(FunctionType *T);
- InstrumentedABI getInstrumentedABI();
WrapperKind getWrapperKind(Function *F);
void addGlobalNameSuffix(GlobalValue *GV);
Function *buildWrapperFunction(Function *F, StringRef NewFName,
@@ -496,18 +487,11 @@ class DataFlowSanitizer {
/// Returns whether the pass tracks origins. Supports only TLS ABI mode.
bool shouldTrackOrigins();
- /// Returns whether the pass tracks labels for struct fields and array
- /// indices. Supports only TLS ABI mode.
- bool shouldTrackFieldsAndIndices();
-
/// Returns a zero constant with the shadow type of OrigTy.
///
/// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
/// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
/// getZeroShadow(other type) = i16(0)
- ///
- /// Note that a zero shadow is always i16(0) when shouldTrackFieldsAndIndices
- /// returns false.
Constant *getZeroShadow(Type *OrigTy);
/// Returns a zero constant with the shadow type of V's type.
Constant *getZeroShadow(Value *V);
@@ -520,9 +504,6 @@ class DataFlowSanitizer {
/// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
/// getShadowTy([n x T]) = [n x getShadowTy(T)]
/// getShadowTy(other type) = i16
- ///
- /// Note that a shadow type is always i16 when shouldTrackFieldsAndIndices
- /// returns false.
Type *getShadowTy(Type *OrigTy);
/// Returns the shadow type of of V's type.
Type *getShadowTy(Value *V);
@@ -539,8 +520,8 @@ struct DFSanFunction {
DataFlowSanitizer &DFS;
Function *F;
DominatorTree DT;
- DataFlowSanitizer::InstrumentedABI IA;
bool IsNativeABI;
+ bool IsForceZeroLabels;
AllocaInst *LabelReturnAlloca = nullptr;
AllocaInst *OriginReturnAlloca = nullptr;
DenseMap<Value *, Value *> ValShadowMap;
@@ -571,8 +552,10 @@ struct DFSanFunction {
DenseMap<Value *, Value *> CachedCollapsedShadows;
DenseMap<Value *, std::set<Value *>> ShadowElements;
- DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
- : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+ DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
+ bool IsForceZeroLabels)
+ : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
+ IsForceZeroLabels(IsForceZeroLabels) {
DT.recalculate(*F);
}
@@ -787,17 +770,6 @@ DataFlowSanitizer::DataFlowSanitizer(
SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
}
-FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
- SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
- ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
- if (T->isVarArg())
- ArgTypes.push_back(PrimitiveShadowPtrTy);
- Type *RetType = T->getReturnType();
- if (!RetType->isVoidTy())
- RetType = StructType::get(RetType, PrimitiveShadowTy);
- return FunctionType::get(RetType, ArgTypes, T->isVarArg());
-}
-
FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
assert(!T->isVarArg());
SmallVector<Type *, 4> ArgTypes;
@@ -861,9 +833,6 @@ TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
}
bool DataFlowSanitizer::isZeroShadow(Value *V) {
- if (!shouldTrackFieldsAndIndices())
- return ZeroPrimitiveShadow == V;
-
Type *T = V->getType();
if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
@@ -880,19 +849,11 @@ bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
}
bool DataFlowSanitizer::shouldTrackOrigins() {
- static const bool ShouldTrackOrigins =
- ClTrackOrigins && getInstrumentedABI() == DataFlowSanitizer::IA_TLS;
+ static const bool ShouldTrackOrigins = ClTrackOrigins;
return ShouldTrackOrigins;
}
-bool DataFlowSanitizer::shouldTrackFieldsAndIndices() {
- return getInstrumentedABI() == DataFlowSanitizer::IA_TLS;
-}
-
Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
- if (!shouldTrackFieldsAndIndices())
- return ZeroPrimitiveShadow;
-
if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
return ZeroPrimitiveShadow;
Type *ShadowTy = getShadowTy(OrigTy);
@@ -992,8 +953,6 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
return Shadow;
- assert(DFS.shouldTrackFieldsAndIndices());
-
// Checks if the cached collapsed shadow value dominates Pos.
Value *&CS = CachedCollapsedShadows[Shadow];
if (CS && DT.dominates(CS, Pos))
@@ -1007,9 +966,6 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
}
Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
- if (!shouldTrackFieldsAndIndices())
- return PrimitiveShadowTy;
-
if (!OrigTy->isSized())
return PrimitiveShadowTy;
if (isa<IntegerType>(OrigTy))
@@ -1107,8 +1063,8 @@ bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
return !ABIList.isIn(*GA, "uninstrumented");
}
-DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
- return ClArgsABI ? IA_Args : IA_TLS;
+bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
+ return ABIList.isIn(*F, "force_zero_labels");
}
DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
@@ -1139,7 +1095,7 @@ void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
Pos = Asm.find("@");
if (Pos == std::string::npos)
- report_fatal_error("unsupported .symver: " + Asm);
+ report_fatal_error(Twine("unsupported .symver: ", Asm));
Asm.replace(Pos, 1, Suffix + "@");
GV->getParent()->setModuleInlineAsm(Asm);
@@ -1154,14 +1110,12 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
NewFName, F->getParent());
NewF->copyAttributesFrom(F);
- NewF->removeAttributes(
- AttributeList::ReturnIndex,
+ NewF->removeRetAttrs(
AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
if (F->isVarArg()) {
- NewF->removeAttributes(AttributeList::FunctionIndex,
- AttrBuilder().addAttribute("split-stack"));
+ NewF->removeFnAttrs(AttrBuilder().addAttribute("split-stack"));
CallInst::Create(DFSanVarargWrapperFn,
IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
BB);
@@ -1199,7 +1153,8 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
// F is called by a wrapped custom function with primitive shadows. So
// its arguments and return value need conversion.
- DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+ DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true,
+ /*ForceZeroLabels=*/false);
Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI;
++ValAI;
for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
@@ -1238,23 +1193,17 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
{
AttributeList AL;
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::ReadOnly);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanUnionLoadFn =
Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
}
{
AttributeList AL;
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::ReadOnly);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
"__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
}
@@ -1274,8 +1223,7 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
{
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
DFSanChainOriginFnTy, AL);
}
@@ -1283,8 +1231,7 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
"__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
}
@@ -1409,34 +1356,32 @@ bool DataFlowSanitizer::runImpl(Module &M) {
std::vector<Function *> FnsToInstrument;
SmallPtrSet<Function *, 2> FnsWithNativeABI;
+ SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
for (Function &F : M)
if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F))
FnsToInstrument.push_back(&F);
// Give function aliases prefixes when necessary, and build wrappers where the
// instrumentedness is inconsistent.
- for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
- AI != AE;) {
- GlobalAlias *GA = &*AI;
- ++AI;
+ for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
// Don't stop on weak. We assume people aren't playing games with the
// instrumentedness of overridden weak aliases.
- auto *F = dyn_cast<Function>(GA->getBaseObject());
+ auto *F = dyn_cast<Function>(GA.getAliaseeObject());
if (!F)
continue;
- bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+ bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
if (GAInst && FInst) {
- addGlobalNameSuffix(GA);
+ addGlobalNameSuffix(&GA);
} else if (GAInst != FInst) {
// Non-instrumented alias of an instrumented function, or vice versa.
// Replace the alias with a native-ABI wrapper of the aliasee. The pass
// below will take care of instrumenting it.
Function *NewF =
- buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
- GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
- NewF->takeName(GA);
- GA->eraseFromParent();
+ buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
+ GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
+ NewF->takeName(&GA);
+ GA.eraseFromParent();
FnsToInstrument.push_back(NewF);
}
}
@@ -1456,50 +1401,17 @@ bool DataFlowSanitizer::runImpl(Module &M) {
FT->getReturnType()->isVoidTy());
if (isInstrumented(&F)) {
+ if (isForceZeroLabels(&F))
+ FnsWithForceZeroLabel.insert(&F);
+
// Instrumented functions get a '.dfsan' suffix. This allows us to more
// easily identify cases of mismatching ABIs. This naming scheme is
// mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
- if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
- FunctionType *NewFT = getArgsFunctionType(FT);
- Function *NewF = Function::Create(NewFT, F.getLinkage(),
- F.getAddressSpace(), "", &M);
- NewF->copyAttributesFrom(&F);
- NewF->removeAttributes(
- AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
- for (Function::arg_iterator FArg = F.arg_begin(),
- NewFArg = NewF->arg_begin(),
- FArgEnd = F.arg_end();
- FArg != FArgEnd; ++FArg, ++NewFArg) {
- FArg->replaceAllUsesWith(&*NewFArg);
- }
- NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
-
- for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
- UI != UE;) {
- BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
- ++UI;
- if (BA) {
- BA->replaceAllUsesWith(
- BlockAddress::get(NewF, BA->getBasicBlock()));
- delete BA;
- }
- }
- F.replaceAllUsesWith(
- ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
- NewF->takeName(&F);
- F.eraseFromParent();
- *FI = NewF;
- addGlobalNameSuffix(NewF);
- } else {
- addGlobalNameSuffix(&F);
- }
+ addGlobalNameSuffix(&F);
} else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
// Build a wrapper function for F. The wrapper simply calls F, and is
// added to FnsToInstrument so that any instrumentation according to its
// WrapperKind is done in the second pass below.
- FunctionType *NewFT =
- getInstrumentedABI() == IA_Args ? getArgsFunctionType(FT) : FT;
// If the function being wrapped has local linkage, then preserve the
// function's linkage in the wrapper function.
@@ -1511,9 +1423,8 @@ bool DataFlowSanitizer::runImpl(Module &M) {
&F,
(shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
std::string(F.getName()),
- WrapperLinkage, NewFT);
- if (getInstrumentedABI() == IA_TLS)
- NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
+ WrapperLinkage, FT);
+ NewF->removeFnAttrs(ReadOnlyNoneAttrs);
Value *WrappedFnCst =
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
@@ -1552,7 +1463,8 @@ bool DataFlowSanitizer::runImpl(Module &M) {
removeUnreachableBlocks(*F);
- DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F));
+ DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
+ FnsWithForceZeroLabel.count(F));
// DFSanVisitor may create new basic blocks, which confuses df_iterator.
// Build a copy of the list before iterating over it.
@@ -1649,23 +1561,14 @@ Value *DFSanFunction::getOrigin(Value *V) {
if (Argument *A = dyn_cast<Argument>(V)) {
if (IsNativeABI)
return DFS.ZeroOrigin;
- switch (IA) {
- case DataFlowSanitizer::IA_TLS: {
- if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
- Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
- IRBuilder<> IRB(ArgOriginTLSPos);
- Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
- Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
- } else {
- // Overflow
- Origin = DFS.ZeroOrigin;
- }
- break;
- }
- case DataFlowSanitizer::IA_Args: {
+ if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
+ Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
+ IRBuilder<> IRB(ArgOriginTLSPos);
+ Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
+ Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
+ } else {
+ // Overflow
Origin = DFS.ZeroOrigin;
- break;
- }
}
} else {
Origin = DFS.ZeroOrigin;
@@ -1716,25 +1619,14 @@ Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
Value *DFSanFunction::getShadow(Value *V) {
if (!isa<Argument>(V) && !isa<Instruction>(V))
return DFS.getZeroShadow(V);
+ if (IsForceZeroLabels)
+ return DFS.getZeroShadow(V);
Value *&Shadow = ValShadowMap[V];
if (!Shadow) {
if (Argument *A = dyn_cast<Argument>(V)) {
if (IsNativeABI)
return DFS.getZeroShadow(V);
- switch (IA) {
- case DataFlowSanitizer::IA_TLS: {
- Shadow = getShadowForTLSArgument(A);
- break;
- }
- case DataFlowSanitizer::IA_Args: {
- unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
- Function::arg_iterator Arg = F->arg_begin();
- std::advance(Arg, ArgIdx);
- Shadow = &*Arg;
- assert(Shadow->getType() == DFS.PrimitiveShadowTy);
- break;
- }
- }
+ Shadow = getShadowForTLSArgument(A);
NonZeroChecks.push_back(Shadow);
} else {
Shadow = DFS.getZeroShadow(V);
@@ -1745,8 +1637,6 @@ Value *DFSanFunction::getShadow(Value *V) {
void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
assert(!ValShadowMap.count(I));
- assert(DFS.shouldTrackFieldsAndIndices() ||
- Shadow->getType() == DFS.PrimitiveShadowTy);
ValShadowMap[I] = Shadow;
}
@@ -2124,7 +2014,7 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
{IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
ConstantInt::get(DFS.IntptrTy, Size)});
- Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ Call->addRetAttr(Attribute::ZExt);
return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
DFS.PrimitiveShadowTy),
IRB.CreateTrunc(Call, DFS.OriginTy)};
@@ -2171,7 +2061,7 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
IRBuilder<> IRB(Pos);
CallInst *FallbackCall = IRB.CreateCall(
DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
- FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ FallbackCall->addRetAttr(Attribute::ZExt);
return {FallbackCall, Origin};
}
@@ -2563,15 +2453,12 @@ void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
}
void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- // Special case: if this is the bitcast (there is exactly 1 allowed) between
- // a musttail call and a ret, don't instrument. New instructions are not
- // allowed after a musttail call.
- if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
- if (CI->isMustTailCall())
- return;
- }
- // TODO: handle musttail call returns for IA_Args.
+ // Special case: if this is the bitcast (there is exactly 1 allowed) between
+ // a musttail call and a ret, don't instrument. New instructions are not
+ // allowed after a musttail call.
+ if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
+ if (CI->isMustTailCall())
+ return;
visitInstOperands(BCI);
}
@@ -2629,11 +2516,6 @@ void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
}
void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
- if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
- visitInstOperands(I);
- return;
- }
-
IRBuilder<> IRB(&I);
Value *Agg = I.getAggregateOperand();
Value *AggShadow = DFSF.getShadow(Agg);
@@ -2643,11 +2525,6 @@ void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
}
void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
- if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
- visitInstOperands(I);
- return;
- }
-
IRBuilder<> IRB(&I);
Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
@@ -2798,41 +2675,22 @@ static bool isAMustTailRetVal(Value *RetVal) {
void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
if (!DFSF.IsNativeABI && RI.getReturnValue()) {
- switch (DFSF.IA) {
- case DataFlowSanitizer::IA_TLS: {
- // Don't emit the instrumentation for musttail call returns.
- if (isAMustTailRetVal(RI.getReturnValue()))
- return;
-
- Value *S = DFSF.getShadow(RI.getReturnValue());
- IRBuilder<> IRB(&RI);
- Type *RT = DFSF.F->getFunctionType()->getReturnType();
- unsigned Size =
- getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
- if (Size <= RetvalTLSSize) {
- // If the size overflows, stores nothing. At callsite, oversized return
- // shadows are set to zero.
- IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB),
- ShadowTLSAlignment);
- }
- if (DFSF.DFS.shouldTrackOrigins()) {
- Value *O = DFSF.getOrigin(RI.getReturnValue());
- IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
- }
- break;
- }
- case DataFlowSanitizer::IA_Args: {
- // TODO: handle musttail call returns for IA_Args.
-
- IRBuilder<> IRB(&RI);
- Type *RT = DFSF.F->getFunctionType()->getReturnType();
- Value *InsVal =
- IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
- Value *InsShadow =
- IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
- RI.setOperand(0, InsShadow);
- break;
+ // Don't emit the instrumentation for musttail call returns.
+ if (isAMustTailRetVal(RI.getReturnValue()))
+ return;
+
+ Value *S = DFSF.getShadow(RI.getReturnValue());
+ IRBuilder<> IRB(&RI);
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
+ if (Size <= RetvalTLSSize) {
+ // If the size overflows, stores nothing. At callsite, oversized return
+ // shadows are set to zero.
+ IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
}
+ if (DFSF.DFS.shouldTrackOrigins()) {
+ Value *O = DFSF.getOrigin(RI.getReturnValue());
+ IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
}
}
}
@@ -2953,8 +2811,7 @@ bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
// Custom functions returning non-void will write to the return label.
if (!FT->getReturnType()->isVoidTy()) {
- CustomFn->removeAttributes(AttributeList::FunctionIndex,
- DFSF.DFS.ReadOnlyNoneAttrs);
+ CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
}
}
@@ -3056,32 +2913,30 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
FunctionType *FT = CB.getFunctionType();
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- // Stores argument shadows.
- unsigned ArgOffset = 0;
- const DataLayout &DL = getDataLayout();
- for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
- if (ShouldTrackOrigins) {
- // Ignore overflowed origins
- Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
- if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
- !DFSF.DFS.isZeroShadow(ArgShadow))
- IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
- DFSF.getArgOriginTLS(I, IRB));
- }
+ const DataLayout &DL = getDataLayout();
- unsigned Size =
- DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
- // Stop storing if arguments' size overflows. Inside a function, arguments
- // after overflow have zero shadow values.
- if (ArgOffset + Size > ArgTLSSize)
- break;
- IRB.CreateAlignedStore(
- DFSF.getShadow(CB.getArgOperand(I)),
- DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
- ShadowTLSAlignment);
- ArgOffset += alignTo(Size, ShadowTLSAlignment);
+ // Stores argument shadows.
+ unsigned ArgOffset = 0;
+ for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
+ if (ShouldTrackOrigins) {
+ // Ignore overflowed origins
+ Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
+ if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
+ !DFSF.DFS.isZeroShadow(ArgShadow))
+ IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
+ DFSF.getArgOriginTLS(I, IRB));
}
+
+ unsigned Size =
+ DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
+ // Stop storing if arguments' size overflows. Inside a function, arguments
+ // after overflow have zero shadow values.
+ if (ArgOffset + Size > ArgTLSSize)
+ break;
+ IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
+ DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
+ ShadowTLSAlignment);
+ ArgOffset += alignTo(Size, ShadowTLSAlignment);
}
Instruction *Next = nullptr;
@@ -3099,99 +2954,31 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
Next = CB.getNextNode();
}
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- // Don't emit the epilogue for musttail call returns.
- if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
- return;
-
- // Loads the return value shadow.
- IRBuilder<> NextIRB(Next);
- const DataLayout &DL = getDataLayout();
- unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
- if (Size > RetvalTLSSize) {
- // Set overflowed return shadow to be zero.
- DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
- } else {
- LoadInst *LI = NextIRB.CreateAlignedLoad(
- DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
- ShadowTLSAlignment, "_dfsret");
- DFSF.SkipInsts.insert(LI);
- DFSF.setShadow(&CB, LI);
- DFSF.NonZeroChecks.push_back(LI);
- }
-
- if (ShouldTrackOrigins) {
- LoadInst *LI = NextIRB.CreateLoad(
- DFSF.DFS.OriginTy, DFSF.getRetvalOriginTLS(), "_dfsret_o");
- DFSF.SkipInsts.insert(LI);
- DFSF.setOrigin(&CB, LI);
- }
- }
- }
-
- // Do all instrumentation for IA_Args down here to defer tampering with the
- // CFG in a way that SplitEdge may be able to detect.
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
- // TODO: handle musttail call returns for IA_Args.
-
- FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
- Value *Func =
- IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT));
-
- const unsigned NumParams = FT->getNumParams();
-
- // Copy original arguments.
- auto *ArgIt = CB.arg_begin(), *ArgEnd = CB.arg_end();
- std::vector<Value *> Args(NumParams);
- std::copy_n(ArgIt, NumParams, Args.begin());
-
- // Add shadow arguments by transforming original arguments.
- std::generate_n(std::back_inserter(Args), NumParams,
- [&]() { return DFSF.getShadow(*ArgIt++); });
-
- if (FT->isVarArg()) {
- unsigned VarArgSize = CB.arg_size() - NumParams;
- ArrayType *VarArgArrayTy =
- ArrayType::get(DFSF.DFS.PrimitiveShadowTy, VarArgSize);
- AllocaInst *VarArgShadow =
- new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
- "", &DFSF.F->getEntryBlock().front());
- Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
-
- // Copy remaining var args.
- unsigned GepIndex = 0;
- std::for_each(ArgIt, ArgEnd, [&](Value *Arg) {
- IRB.CreateStore(
- DFSF.getShadow(Arg),
- IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, GepIndex++));
- Args.push_back(Arg);
- });
- }
+ // Don't emit the epilogue for musttail call returns.
+ if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
+ return;
- CallBase *NewCB;
- if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
- NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
- II->getUnwindDest(), Args);
+ // Loads the return value shadow.
+ IRBuilder<> NextIRB(Next);
+ unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
+ if (Size > RetvalTLSSize) {
+ // Set overflowed return shadow to be zero.
+ DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
} else {
- NewCB = IRB.CreateCall(NewFT, Func, Args);
- }
- NewCB->setCallingConv(CB.getCallingConv());
- NewCB->setAttributes(CB.getAttributes().removeAttributes(
- *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCB->getType())));
-
- if (Next) {
- ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next);
- DFSF.SkipInsts.insert(ExVal);
- ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next);
- DFSF.SkipInsts.insert(ExShadow);
- DFSF.setShadow(ExVal, ExShadow);
- DFSF.NonZeroChecks.push_back(ExShadow);
-
- CB.replaceAllUsesWith(ExVal);
+ LoadInst *LI = NextIRB.CreateAlignedLoad(
+ DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
+ ShadowTLSAlignment, "_dfsret");
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(&CB, LI);
+ DFSF.NonZeroChecks.push_back(LI);
}
- CB.eraseFromParent();
+ if (ShouldTrackOrigins) {
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
+ DFSF.getRetvalOriginTLS(), "_dfsret_o");
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setOrigin(&CB, LI);
+ }
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index c99f2e66b1cc..325089fc4402 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -86,7 +86,7 @@ GCOVOptions GCOVOptions::getDefault() {
Options.Atomic = AtomicCounter;
if (DefaultGCOVVersion.size() != 4) {
- llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") +
DefaultGCOVVersion);
}
memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
@@ -1373,12 +1373,16 @@ Function *GCOVProfiler::insertReset(
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
IRBuilder<> Builder(Entry);
+ LLVMContext &C = Entry->getContext();
// Zero out the counters.
for (const auto &I : CountersBySP) {
GlobalVariable *GV = I.first;
- Constant *Null = Constant::getNullValue(GV->getValueType());
- Builder.CreateStore(Null, GV);
+ auto *GVTy = cast<ArrayType>(GV->getValueType());
+ Builder.CreateMemSet(GV, Constant::getNullValue(Type::getInt8Ty(C)),
+ GVTy->getNumElements() *
+ GVTy->getElementType()->getScalarSizeInBits() / 8,
+ GV->getAlign());
}
Type *RetTy = ResetF->getReturnType();
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 60a4ee8811fb..62c265e40dab 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -17,7 +17,10 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -26,6 +29,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
@@ -41,6 +45,7 @@
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -115,6 +120,17 @@ static cl::opt<bool>
cl::Hidden, cl::desc("Use Stack Safety analysis results"),
cl::Optional);
+static cl::opt<size_t> ClMaxLifetimes(
+ "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
+ cl::ReallyHidden,
+ cl::desc("How many lifetime ends to handle for a single alloca."),
+ cl::Optional);
+
+static cl::opt<bool>
+ ClUseAfterScope("hwasan-use-after-scope",
+ cl::desc("detect use after scope within function"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClUARRetagToZero(
"hwasan-uar-retag-to-zero",
cl::desc("Clear alloca tags before returning from the function to allow "
@@ -220,9 +236,21 @@ bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
return shouldInstrumentStack(TargetTriple) &&
mightUseStackSafetyAnalysis(DisableOptimization);
}
+
+bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
+ return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
+}
+
/// An instrumentation pass implementing detection of addressability bugs
/// using tagged pointers.
class HWAddressSanitizer {
+private:
+ struct AllocaInfo {
+ AllocaInst *AI;
+ SmallVector<IntrinsicInst *, 2> LifetimeStart;
+ SmallVector<IntrinsicInst *, 2> LifetimeEnd;
+ };
+
public:
HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
const StackSafetyGlobalInfo *SSI)
@@ -237,7 +265,11 @@ public:
void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
- bool sanitizeFunction(Function &F);
+ DenseMap<AllocaInst *, AllocaInst *> padInterestingAllocas(
+ const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument);
+ bool sanitizeFunction(Function &F,
+ llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT);
void initializeModule();
void createHwasanCtorComdat();
@@ -250,23 +282,34 @@ public:
void untagPointerOperand(Instruction *I, Value *Addr);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+
+ int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
+ void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
+ unsigned AccessSizeIndex,
+ Instruction *InsertBefore);
void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore);
+ bool ignoreMemIntrinsic(MemIntrinsic *MI);
void instrumentMemIntrinsic(MemIntrinsic *MI);
bool instrumentMemAccess(InterestingMemoryOperand &O);
- bool ignoreAccess(Value *Ptr);
+ bool ignoreAccess(Instruction *Inst, Value *Ptr);
void getInterestingMemoryOperands(
Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
bool isInterestingAlloca(const AllocaInst &AI);
- bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
+ void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
+ static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
+ const DominatorTree &DT);
bool instrumentStack(
- SmallVectorImpl<AllocaInst *> &Allocas,
+ MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
+ SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
+ llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT);
Value *readRegister(IRBuilder<> &IRB, StringRef Name);
bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
@@ -313,8 +356,9 @@ private:
bool WithFrameRecord;
void init(Triple &TargetTriple, bool InstrumentWithCalls);
- unsigned getObjectAlignment() const { return 1U << Scale; }
+ uint64_t getObjectAlignment() const { return 1ULL << Scale; }
};
+
ShadowMapping Mapping;
Type *VoidTy = Type::getVoidTy(M.getContext());
@@ -331,6 +375,7 @@ private:
bool InstrumentLandingPads;
bool InstrumentWithCalls;
bool InstrumentStack;
+ bool DetectUseAfterScope;
bool UsePageAliases;
bool HasMatchAllTag = false;
@@ -377,14 +422,21 @@ public:
}
bool runOnFunction(Function &F) override {
- if (shouldUseStackSafetyAnalysis(Triple(F.getParent()->getTargetTriple()),
- DisableOptimization)) {
+ auto TargetTriple = Triple(F.getParent()->getTargetTriple());
+ if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization)) {
// We cannot call getAnalysis in doInitialization, that would cause a
// crash as the required analyses are not initialized yet.
HWASan->setSSI(
&getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult());
}
- return HWASan->sanitizeFunction(F);
+ return HWASan->sanitizeFunction(
+ F,
+ [&]() -> const DominatorTree & {
+ return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ },
+ [&]() -> const PostDominatorTree & {
+ return getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ });
}
bool doFinalization(Module &M) override {
@@ -399,6 +451,8 @@ public:
// This is so we don't need to plumb TargetTriple all the way to here.
if (mightUseStackSafetyAnalysis(DisableOptimization))
AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
}
private:
@@ -417,6 +471,8 @@ INITIALIZE_PASS_BEGIN(
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
false)
INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(
HWAddressSanitizerLegacyPass, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
@@ -430,25 +486,41 @@ llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover,
DisableOptimization);
}
-HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover,
- bool DisableOptimization)
- : CompileKernel(CompileKernel), Recover(Recover),
- DisableOptimization(DisableOptimization) {}
-
PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
const StackSafetyGlobalInfo *SSI = nullptr;
- if (shouldUseStackSafetyAnalysis(llvm::Triple(M.getTargetTriple()),
- DisableOptimization))
+ auto TargetTriple = llvm::Triple(M.getTargetTriple());
+ if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
- HWAddressSanitizer HWASan(M, CompileKernel, Recover, SSI);
+
+ HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
bool Modified = false;
- for (Function &F : M)
- Modified |= HWASan.sanitizeFunction(F);
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ for (Function &F : M) {
+ Modified |= HWASan.sanitizeFunction(
+ F,
+ [&]() -> const DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ },
+ [&]() -> const PostDominatorTree & {
+ return FAM.getResult<PostDominatorTreeAnalysis>(F);
+ });
+ }
if (Modified)
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
+void HWAddressSanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.CompileKernel)
+ OS << "kernel;";
+ if (Options.Recover)
+ OS << "recover";
+ OS << ">";
+}
void HWAddressSanitizer::createHwasanCtorComdat() {
std::tie(HwasanCtorFunction, std::ignore) =
@@ -566,6 +638,7 @@ void HWAddressSanitizer::initializeModule() {
UsePageAliases = shouldUsePageAliases(TargetTriple);
InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
InstrumentStack = shouldInstrumentStack(TargetTriple);
+ DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
PointerTagShift = IsX86_64 ? 57 : 56;
TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
@@ -712,7 +785,7 @@ Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
}
}
-bool HWAddressSanitizer::ignoreAccess(Value *Ptr) {
+bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
// Do not instrument acesses from different address spaces; we cannot deal
// with them.
Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
@@ -726,6 +799,12 @@ bool HWAddressSanitizer::ignoreAccess(Value *Ptr) {
if (Ptr->isSwiftError())
return true;
+ if (findAllocaForValue(Ptr)) {
+ if (!InstrumentStack)
+ return true;
+ if (SSI && SSI->stackAccessIsSafe(*Inst))
+ return true;
+ }
return false;
}
@@ -740,29 +819,29 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), None);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(), None);
} else if (auto CI = dyn_cast<CallInst>(I)) {
- for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
+ for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(CI->getArgOperand(ArgNo)))
+ ignoreAccess(I, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
@@ -809,30 +888,38 @@ Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
}
+int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
+ unsigned AccessSizeIndex) {
+ return (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
+ (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
+ (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
+ (Recover << HWASanAccessInfo::RecoverShift) +
+ (IsWrite << HWASanAccessInfo::IsWriteShift) +
+ (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
+}
+
+void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
+ unsigned AccessSizeIndex,
+ Instruction *InsertBefore) {
+ assert(!UsePageAliases);
+ const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
+ IRBuilder<> IRB(InsertBefore);
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+ IRB.CreateCall(Intrinsic::getDeclaration(
+ M, UseShortGranules
+ ? Intrinsic::hwasan_check_memaccess_shortgranules
+ : Intrinsic::hwasan_check_memaccess),
+ {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+}
+
void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore) {
assert(!UsePageAliases);
- const int64_t AccessInfo =
- (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
- (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
- (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
- (Recover << HWASanAccessInfo::RecoverShift) +
- (IsWrite << HWASanAccessInfo::IsWriteShift) +
- (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
+ const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
IRBuilder<> IRB(InsertBefore);
- if (OutlinedChecks) {
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
- IRB.CreateCall(Intrinsic::getDeclaration(
- M, UseShortGranules
- ? Intrinsic::hwasan_check_memaccess_shortgranules
- : Intrinsic::hwasan_check_memaccess),
- {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
- return;
- }
-
Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
IRB.getInt8Ty());
@@ -908,6 +995,16 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
}
+bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
+ (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
+ }
+ if (isa<MemSetInst>(MI))
+ return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
+ return false;
+}
+
void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(MI);
if (isa<MemTransferInst>(MI)) {
@@ -943,6 +1040,8 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
if (InstrumentWithCalls) {
IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
IRB.CreatePointerCast(Addr, IntptrTy));
+ } else if (OutlinedChecks) {
+ instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
} else {
instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
}
@@ -968,7 +1067,7 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
return SizeInBytes * ArraySize;
}
-bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
+void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
size_t Size) {
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
if (!UseShortGranules)
@@ -999,7 +1098,6 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
AlignedSize - 1));
}
}
- return true;
}
unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
@@ -1231,17 +1329,53 @@ bool HWAddressSanitizer::instrumentLandingPads(
return true;
}
+static bool
+maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts,
+ const DominatorTree &DT) {
+ // If we have too many lifetime ends, give up, as the algorithm below is N^2.
+ if (Insts.size() > ClMaxLifetimes)
+ return true;
+ for (size_t I = 0; I < Insts.size(); ++I) {
+ for (size_t J = 0; J < Insts.size(); ++J) {
+ if (I == J)
+ continue;
+ if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, &DT))
+ return true;
+ }
+ }
+ return false;
+}
+
+// static
+bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
+ const DominatorTree &DT) {
+ // An alloca that has exactly one start and end in every possible execution.
+ // If it has multiple ends, they have to be unreachable from each other, so
+ // at most one of them is actually used for each execution of the function.
+ return AllocaInfo.LifetimeStart.size() == 1 &&
+ (AllocaInfo.LifetimeEnd.size() == 1 ||
+ (AllocaInfo.LifetimeEnd.size() > 0 &&
+ !maybeReachableFromEachOther(AllocaInfo.LifetimeEnd, DT)));
+}
+
bool HWAddressSanitizer::instrumentStack(
- SmallVectorImpl<AllocaInst *> &Allocas,
+ MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
+ SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
+ llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
// (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
// temp, shift-OR it into each alloca address and xor with the retag mask.
// This generates one extra instruction per alloca use.
- for (unsigned N = 0; N < Allocas.size(); ++N) {
- auto *AI = Allocas[N];
+ unsigned int I = 0;
+
+ for (auto &KV : AllocasToInstrument) {
+ auto N = I++;
+ auto *AI = KV.first;
+ AllocaInfo &Info = KV.second;
IRBuilder<> IRB(AI->getNextNode());
// Replace uses of the alloca with tagged address.
@@ -1268,17 +1402,40 @@ bool HWAddressSanitizer::instrumentStack(
}
size_t Size = getAllocaSizeInBytes(*AI);
- tagAlloca(IRB, AI, Tag, Size);
-
- for (auto RI : RetVec) {
- IRB.SetInsertPoint(RI);
-
- // Re-tag alloca memory with the special UAR tag.
- Value *Tag = getUARTag(IRB, StackTag);
- tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
+ size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ bool StandardLifetime =
+ UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
+ if (DetectUseAfterScope && StandardLifetime) {
+ IntrinsicInst *Start = Info.LifetimeStart[0];
+ IRB.SetInsertPoint(Start->getNextNode());
+ auto TagEnd = [&](Instruction *Node) {
+ IRB.SetInsertPoint(Node);
+ Value *UARTag = getUARTag(IRB, StackTag);
+ tagAlloca(IRB, AI, UARTag, AlignedSize);
+ };
+ tagAlloca(IRB, AI, Tag, Size);
+ if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd,
+ RetVec, TagEnd)) {
+ for (auto *End : Info.LifetimeEnd)
+ End->eraseFromParent();
+ }
+ } else {
+ tagAlloca(IRB, AI, Tag, Size);
+ for (auto *RI : RetVec) {
+ IRB.SetInsertPoint(RI);
+ Value *UARTag = getUARTag(IRB, StackTag);
+ tagAlloca(IRB, AI, UARTag, AlignedSize);
+ }
+ if (!StandardLifetime) {
+ for (auto &II : Info.LifetimeStart)
+ II->eraseFromParent();
+ for (auto &II : Info.LifetimeEnd)
+ II->eraseFromParent();
+ }
}
}
-
+ for (auto &I : UnrecognizedLifetimes)
+ I->eraseFromParent();
return true;
}
@@ -1300,7 +1457,42 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
!(SSI && SSI->isSafe(AI));
}
-bool HWAddressSanitizer::sanitizeFunction(Function &F) {
+DenseMap<AllocaInst *, AllocaInst *> HWAddressSanitizer::padInterestingAllocas(
+ const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument) {
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+ for (auto &KV : AllocasToInstrument) {
+ AllocaInst *AI = KV.first;
+ uint64_t Size = getAllocaSizeInBytes(*AI);
+ uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ AI->setAlignment(
+ Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
+ if (Size != AlignedSize) {
+ Type *AllocatedType = AI->getAllocatedType();
+ if (AI->isArrayAllocation()) {
+ uint64_t ArraySize =
+ cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+ AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+ }
+ Type *TypeWithPadding = StructType::get(
+ AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+ auto *NewAI = new AllocaInst(
+ TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+ NewAI->takeName(AI);
+ NewAI->setAlignment(AI->getAlign());
+ NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(AI->isSwiftError());
+ NewAI->copyMetadata(*AI);
+ auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+ AI->replaceAllUsesWith(Bitcast);
+ AllocaToPaddedAllocaMap[AI] = NewAI;
+ }
+ }
+ return AllocaToPaddedAllocaMap;
+}
+
+bool HWAddressSanitizer::sanitizeFunction(
+ Function &F, llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT) {
if (&F == HwasanCtorFunction)
return false;
@@ -1311,18 +1503,36 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
- SmallVector<AllocaInst *, 8> AllocasToInstrument;
+ MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
SmallVector<Instruction *, 8> RetVec;
SmallVector<Instruction *, 8> LandingPadVec;
+ SmallVector<Instruction *, 4> UnrecognizedLifetimes;
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
for (auto &BB : F) {
for (auto &Inst : BB) {
- if (InstrumentStack)
+ if (InstrumentStack) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isInterestingAlloca(*AI))
- AllocasToInstrument.push_back(AI);
+ AllocasToInstrument.insert({AI, {}});
+ continue;
+ }
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+ if (!AI) {
+ UnrecognizedLifetimes.push_back(&Inst);
+ continue;
+ }
+ if (!isInterestingAlloca(*AI))
+ continue;
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ AllocasToInstrument[AI].LifetimeStart.push_back(II);
+ else
+ AllocasToInstrument[AI].LifetimeEnd.push_back(II);
continue;
}
+ }
if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
isa<CleanupReturnInst>(Inst))
@@ -1343,7 +1553,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
getInterestingMemoryOperands(&Inst, OperandsToInstrument);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
- IntrinToInstrument.push_back(MI);
+ if (!ignoreMemIntrinsic(MI))
+ IntrinToInstrument.push_back(MI);
}
}
@@ -1377,38 +1588,14 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, StackTag);
+ instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
+ RetVec, StackTag, GetDT, GetPDT);
}
// Pad and align each of the allocas that we instrumented to stop small
// uninteresting allocas from hiding in instrumented alloca's padding and so
// that we have enough space to store real tags for short granules.
- DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
- for (AllocaInst *AI : AllocasToInstrument) {
- uint64_t Size = getAllocaSizeInBytes(*AI);
- uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- AI->setAlignment(
- Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
- if (Size != AlignedSize) {
- Type *AllocatedType = AI->getAllocatedType();
- if (AI->isArrayAllocation()) {
- uint64_t ArraySize =
- cast<ConstantInt>(AI->getArraySize())->getZExtValue();
- AllocatedType = ArrayType::get(AllocatedType, ArraySize);
- }
- Type *TypeWithPadding = StructType::get(
- AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
- auto *NewAI = new AllocaInst(
- TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
- NewAI->takeName(AI);
- NewAI->setAlignment(AI->getAlign());
- NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
- NewAI->setSwiftError(AI->isSwiftError());
- NewAI->copyMetadata(*AI);
- auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
- AI->replaceAllUsesWith(Bitcast);
- AllocaToPaddedAllocaMap[AI] = NewAI;
- }
- }
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap =
+ padInterestingAllocas(AllocasToInstrument);
if (!AllocaToPaddedAllocaMap.empty()) {
for (auto &BB : F) {
@@ -1434,13 +1621,11 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
// dynamic allocas.
if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
InsertPt = &*F.getEntryBlock().begin();
- for (auto II = EntryIRB.GetInsertBlock()->begin(),
- IE = EntryIRB.GetInsertBlock()->end();
- II != IE;) {
- Instruction *I = &*II++;
- if (auto *AI = dyn_cast<AllocaInst>(I))
+ for (Instruction &I :
+ llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
+ if (auto *AI = dyn_cast<AllocaInst>(&I))
if (isa<ConstantInt>(AI->getArraySize()))
- I->moveBefore(InsertPt);
+ I.moveBefore(InsertPt);
}
}
@@ -1586,9 +1771,10 @@ void HWAddressSanitizer::instrumentGlobals() {
Hasher.update(M.getSourceFileName());
MD5::MD5Result Hash;
Hasher.final(Hash);
- uint8_t Tag = Hash[0] & TagMaskByte;
+ uint8_t Tag = Hash[0];
for (GlobalVariable *GV : Globals) {
+ Tag &= TagMaskByte;
// Skip tag 0 in order to avoid collisions with untagged memory.
if (Tag == 0)
Tag = 1;
diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index 071feb876540..3ea314329079 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -1,9 +1,8 @@
//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 0d257bb6bd52..ad21fec269ec 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -446,13 +446,12 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
bool MadeChange = false;
PromotionCandidates.clear();
for (BasicBlock &BB : *F) {
- for (auto I = BB.begin(), E = BB.end(); I != E;) {
- auto Instr = I++;
- InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+ for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
+ InstrProfIncrementInst *Inc = castToIncrementInst(&Instr);
if (Inc) {
lowerIncrement(Inc);
MadeChange = true;
- } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
lowerValueProfileInst(Ind);
MadeChange = true;
}
@@ -520,6 +519,14 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
}
}
+static bool needsRuntimeHookUnconditionally(const Triple &TT) {
+ // On Fuchsia, we only need runtime hook if any counters are present.
+ if (TT.isOSFuchsia())
+ return false;
+
+ return true;
+}
+
/// Check if the module contains uses of any profiling intrinsics.
static bool containsProfilingIntrinsics(Module &M) {
if (auto *F = M.getFunction(
@@ -548,8 +555,11 @@ bool InstrProfiling::run(
UsedVars.clear();
TT = Triple(M.getTargetTriple());
+ bool MadeChange = false;
+
// Emit the runtime hook even if no counters are present.
- bool MadeChange = emitRuntimeHook();
+ if (needsRuntimeHookUnconditionally(TT))
+ MadeChange = emitRuntimeHook();
// Improve compile time by avoiding linear scans when there is no work.
GlobalVariable *CoverageNamesVar =
@@ -588,6 +598,7 @@ bool InstrProfiling::run(
emitVNodes();
emitNameData();
+ emitRuntimeHook();
emitRegistration();
emitUses();
emitInitialization();
@@ -692,7 +703,6 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI) {
IRBuilder<> Builder(&I);
- Type *Int64Ty = Type::getInt64Ty(M->getContext());
GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
@@ -747,14 +757,18 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
}
/// Get the name of a profiling variable for a particular function.
-static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix,
+ bool &Renamed) {
StringRef NamePrefix = getInstrProfNameVarPrefix();
StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
Function *F = Inc->getParent()->getParent();
Module *M = F->getParent();
if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
- !canRenameComdatFunc(*F))
+ !canRenameComdatFunc(*F)) {
+ Renamed = false;
return (Prefix + Name).str();
+ }
+ Renamed = true;
uint64_t FuncHash = Inc->getHash()->getZExtValue();
SmallVector<char, 24> HashPostfix;
if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
@@ -848,6 +862,15 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+ // Due to the limitation of binder as of 2021/09/28, the duplicate weak
+ // symbols in the same csect won't be discarded. When there are duplicate weak
+ // symbols, we can NOT guarantee that the relocations get resolved to the
+ // intended weak symbol, so we can not ensure the correctness of the relative
+ // CounterPtr, so we have to use private linkage for counter and data symbols.
+ if (TT.isOSBinFormatXCOFF()) {
+ Linkage = GlobalValue::PrivateLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
// Move the name variable to the right section. Place them in a COMDAT group
// if the associated function is a COMDAT. This will make sure that only one
// copy of counters of the COMDAT function will be emitted after linking. Keep
@@ -867,8 +890,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// discarded.
bool DataReferencedByCode = profDataReferencedByCode(*M);
bool NeedComdat = needsComdatForCounter(*Fn, *M);
- std::string CntsVarName = getVarName(Inc, getInstrProfCountersVarPrefix());
- std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix());
+ bool Renamed;
+ std::string CntsVarName =
+ getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
+ std::string DataVarName =
+ getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
auto MaybeSetComdat = [&](GlobalVariable *GV) {
bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
if (UseComdat) {
@@ -909,7 +935,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
auto *ValuesVar = new GlobalVariable(
*M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
- getVarName(Inc, getInstrProfValuesVarPrefix()));
+ getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
ValuesVar->setVisibility(Visibility);
ValuesVar->setSection(
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
@@ -920,6 +946,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
}
// Create data variable.
+ auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext());
auto *Int16Ty = Type::getInt16Ty(Ctx);
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
Type *DataTypes[] = {
@@ -936,10 +963,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
- Constant *DataVals[] = {
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
-#include "llvm/ProfileData/InstrProfData.inc"
- };
// If the data variable is not referenced by code (if we don't emit
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
// data variable live under linker GC, the data variable can be private. This
@@ -947,14 +970,30 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
//
// On COFF, a comdat leader cannot be local so we require DataReferencedByCode
// to be false.
- if (NS == 0 && (TT.isOSBinFormatELF() ||
- (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
+ //
+ // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
+ // that other copies must have the same CFG and cannot have value profiling.
+ // If no hash suffix, other profd copies may be referenced by code.
+ if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
+ (TT.isOSBinFormatELF() ||
+ (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
Linkage = GlobalValue::PrivateLinkage;
Visibility = GlobalValue::DefaultVisibility;
}
auto *Data =
- new GlobalVariable(*M, DataTy, false, Linkage,
- ConstantStruct::get(DataTy, DataVals), DataVarName);
+ new GlobalVariable(*M, DataTy, false, Linkage, nullptr, DataVarName);
+ // Reference the counter variable with a label difference (link-time
+ // constant).
+ auto *RelativeCounterPtr =
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
+ ConstantExpr::getPtrToInt(Data, IntPtrTy));
+
+ Constant *DataVals[] = {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
+
Data->setVisibility(Visibility);
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
@@ -1035,7 +1074,7 @@ void InstrProfiling::emitNameData() {
std::string CompressedNameStr;
if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
DoInstrProfNameCompression)) {
- report_fatal_error(toString(std::move(E)), false);
+ report_fatal_error(Twine(toString(std::move(E))), false);
}
auto &Ctx = M->getContext();
@@ -1102,9 +1141,9 @@ void InstrProfiling::emitRegistration() {
}
bool InstrProfiling::emitRuntimeHook() {
- // We expect the linker to be invoked with -u<hook_var> flag for Linux or
- // Fuchsia, in which case there is no need to emit the user function.
- if (TT.isOSLinux() || TT.isOSFuchsia())
+ // We expect the linker to be invoked with -u<hook_var> flag for Linux
+ // in which case there is no need to emit the external variable.
+ if (TT.isOSLinux())
return false;
// If the module's provided its own runtime, we don't need to do anything.
@@ -1117,23 +1156,28 @@ bool InstrProfiling::emitRuntimeHook() {
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
- // Make a function that uses it.
- auto *User = Function::Create(FunctionType::get(Int32Ty, false),
- GlobalValue::LinkOnceODRLinkage,
- getInstrProfRuntimeHookVarUseFuncName(), M);
- User->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- User->addFnAttr(Attribute::NoRedZone);
- User->setVisibility(GlobalValue::HiddenVisibility);
- if (TT.supportsCOMDAT())
- User->setComdat(M->getOrInsertComdat(User->getName()));
-
- IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
- auto *Load = IRB.CreateLoad(Int32Ty, Var);
- IRB.CreateRet(Load);
-
- // Mark the user variable as used so that it isn't stripped out.
- CompilerUsedVars.push_back(User);
+ if (TT.isOSBinFormatELF()) {
+ // Mark the user variable as used so that it isn't stripped out.
+ CompilerUsedVars.push_back(Var);
+ } else {
+ // Make a function that uses it.
+ auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+ GlobalValue::LinkOnceODRLinkage,
+ getInstrProfRuntimeHookVarUseFuncName(), M);
+ User->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ User->addFnAttr(Attribute::NoRedZone);
+ User->setVisibility(GlobalValue::HiddenVisibility);
+ if (TT.supportsCOMDAT())
+ User->setComdat(M->getOrInsertComdat(User->getName()));
+
+ IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
+ auto *Load = IRB.CreateLoad(Int32Ty, Var);
+ IRB.CreateRet(Load);
+
+ // Mark the function as used so that it isn't stripped out.
+ CompilerUsedVars.push_back(User);
+ }
return true;
}
@@ -1142,12 +1186,12 @@ void InstrProfiling::emitUses() {
// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
// we conservatively retain all unconditionally in the compiler.
//
- // On ELF, the linker can guarantee the associated sections will be retained
- // or discarded as a unit, so llvm.compiler.used is sufficient. Similarly on
- // COFF, if prof data is not referenced by code we use one comdat and ensure
- // this GC property as well. Otherwise, we have to conservatively make all of
- // the sections retained by the linker.
- if (TT.isOSBinFormatELF() ||
+ // On ELF and Mach-O, the linker can guarantee the associated sections will be
+ // retained or discarded as a unit, so llvm.compiler.used is sufficient.
+ // Similarly on COFF, if prof data is not referenced by code we use one comdat
+ // and ensure this GC property as well. Otherwise, we have to conservatively
+ // make all of the sections retained by the linker.
+ if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
(TT.isOSBinFormatCOFF() && !profDataReferencedByCode(*M)))
appendToCompilerUsed(*M, CompilerUsedVars);
else
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 0e6a404a9e0b..727672fa0605 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -107,6 +108,10 @@ static cl::opt<int>
cl::desc("granularity of memprof shadow mapping"),
cl::Hidden, cl::init(DefaultShadowGranularity));
+static cl::opt<bool> ClStack("memprof-instrument-stack",
+ cl::desc("Instrument scalar stack variables"),
+ cl::Hidden, cl::init(false));
+
// Debug flags.
static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
@@ -123,6 +128,8 @@ static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
+STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
namespace {
@@ -255,8 +262,6 @@ PreservedAnalyses MemProfilerPass::run(Function &F,
if (Profiler.instrumentFunction(F))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
-
- return PreservedAnalyses::all();
}
ModuleMemProfilerPass::ModuleMemProfilerPass() {}
@@ -448,6 +453,15 @@ void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
InterestingMemoryAccess &Access) {
+ // Skip instrumentation of stack accesses unless requested.
+ if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
+ if (Access.IsWrite)
+ ++NumSkippedStackWrites;
+ else
+ ++NumSkippedStackReads;
+ return;
+ }
+
if (Access.IsWrite)
NumInstrumentedWrites++;
else
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4e755bab15f3..4d15b784f486 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -673,14 +673,27 @@ PreservedAnalyses MemorySanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
-PreservedAnalyses MemorySanitizerPass::run(Module &M,
- ModuleAnalysisManager &AM) {
+PreservedAnalyses
+ModuleMemorySanitizerPass::run(Module &M, ModuleAnalysisManager &AM) {
if (Options.Kernel)
return PreservedAnalyses::all();
insertModuleCtor(M);
return PreservedAnalyses::none();
}
+void MemorySanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.Recover)
+ OS << "recover;";
+ if (Options.Kernel)
+ OS << "kernel;";
+ OS << "track-origins=" << Options.TrackOrigins;
+ OS << ">";
+}
+
char MemorySanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
@@ -1695,7 +1708,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (FArgEagerCheck) {
*ShadowPtr = getCleanShadow(V);
setOrigin(A, getCleanOrigin());
- continue;
+ break;
} else if (FArgByVal) {
Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
// ByVal pointer itself has clean shadow. We copy the actual
@@ -1745,8 +1758,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
- if (!FArgEagerCheck)
- ArgOffset += alignTo(Size, kShadowTLSAlignment);
+ ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
assert(*ShadowPtr && "Could not find shadow for an argument");
return *ShadowPtr;
@@ -2661,7 +2673,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
RetTy->isX86_MMXTy()))
return false;
- unsigned NumArgOperands = I.getNumArgOperands();
+ unsigned NumArgOperands = I.arg_size();
for (unsigned i = 0; i < NumArgOperands; ++i) {
Type *Ty = I.getArgOperand(i)->getType();
if (Ty != RetTy)
@@ -2688,7 +2700,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// We special-case intrinsics where this approach fails. See llvm.bswap
/// handling as an example of that.
bool handleUnknownIntrinsic(IntrinsicInst &I) {
- unsigned NumArgOperands = I.getNumArgOperands();
+ unsigned NumArgOperands = I.arg_size();
if (NumArgOperands == 0)
return false;
@@ -2762,10 +2774,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *CopyOp, *ConvertOp;
assert((!HasRoundingMode ||
- isa<ConstantInt>(I.getArgOperand(I.getNumArgOperands() - 1))) &&
+ isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
"Invalid rounding mode");
- switch (I.getNumArgOperands() - HasRoundingMode) {
+ switch (I.arg_size() - HasRoundingMode) {
case 2:
CopyOp = I.getArgOperand(0);
ConvertOp = I.getArgOperand(1);
@@ -2854,7 +2866,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// size, and the rest is ignored. Behavior is defined even if shift size is
// greater than register (or field) width.
void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
- assert(I.getNumArgOperands() == 2);
+ assert(I.arg_size() == 2);
IRBuilder<> IRB(&I);
// If any of the S2 bits are poisoned, the whole thing is poisoned.
// Otherwise perform the same shift on S1.
@@ -2919,7 +2931,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
// EltSizeInBits is used only for x86mmx arguments.
void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
- assert(I.getNumArgOperands() == 2);
+ assert(I.arg_size() == 2);
bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
IRBuilder<> IRB(&I);
Value *S1 = getShadow(&I, 0);
@@ -3653,9 +3665,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
.addAttribute(Attribute::ArgMemOnly)
.addAttribute(Attribute::Speculatable);
- Call->removeAttributes(AttributeList::FunctionIndex, B);
+ Call->removeFnAttrs(B);
if (Function *Func = Call->getCalledFunction()) {
- Func->removeAttributes(AttributeList::FunctionIndex, B);
+ Func->removeFnAttrs(B);
}
maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
@@ -3696,42 +3708,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (EagerCheck) {
insertShadowCheck(A, &CB);
- continue;
- }
- if (ByVal) {
- // ByVal requires some special handling as it's too big for a single
- // load
- assert(A->getType()->isPointerTy() &&
- "ByVal argument is not a pointer!");
- Size = DL.getTypeAllocSize(CB.getParamByValType(i));
- if (ArgOffset + Size > kParamTLSSize) break;
- const MaybeAlign ParamAlignment(CB.getParamAlign(i));
- MaybeAlign Alignment = llvm::None;
- if (ParamAlignment)
- Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
- Value *AShadowPtr =
- getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
- /*isStore*/ false)
- .first;
-
- Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
- Alignment, Size);
- // TODO(glider): need to copy origins.
- } else {
- // Any other parameters mean we need bit-grained tracking of uninit data
Size = DL.getTypeAllocSize(A->getType());
- if (ArgOffset + Size > kParamTLSSize) break;
- Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
- kShadowTLSAlignment);
- Constant *Cst = dyn_cast<Constant>(ArgShadow);
- if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
+ } else {
+ if (ByVal) {
+ // ByVal requires some special handling as it's too big for a single
+ // load
+ assert(A->getType()->isPointerTy() &&
+ "ByVal argument is not a pointer!");
+ Size = DL.getTypeAllocSize(CB.getParamByValType(i));
+ if (ArgOffset + Size > kParamTLSSize)
+ break;
+ const MaybeAlign ParamAlignment(CB.getParamAlign(i));
+ MaybeAlign Alignment = llvm::None;
+ if (ParamAlignment)
+ Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
+ Value *AShadowPtr =
+ getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
+ /*isStore*/ false)
+ .first;
+
+ Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
+ Alignment, Size);
+ // TODO(glider): need to copy origins.
+ } else {
+ // Any other parameters mean we need bit-grained tracking of uninit
+ // data
+ Size = DL.getTypeAllocSize(A->getType());
+ if (ArgOffset + Size > kParamTLSSize)
+ break;
+ Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+ kShadowTLSAlignment);
+ Constant *Cst = dyn_cast<Constant>(ArgShadow);
+ if (Cst && Cst->isNullValue())
+ ArgIsInitialized = true;
+ }
+ if (MS.TrackOrigins && !ArgIsInitialized)
+ IRB.CreateStore(getOrigin(A),
+ getOriginPtrForArgument(A, IRB, ArgOffset));
+ (void)Store;
+ assert(Store != nullptr);
+ LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
}
- if (MS.TrackOrigins && !ArgIsInitialized)
- IRB.CreateStore(getOrigin(A),
- getOriginPtrForArgument(A, IRB, ArgOffset));
- (void)Store;
- assert(Size != 0 && Store != nullptr);
- LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
+ assert(Size != 0);
ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
LLVM_DEBUG(dbgs() << " done with call args\n");
@@ -3807,7 +3825,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (isAMustTailRetVal(RetVal)) return;
Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
bool HasNoUndef =
- F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+ F.hasRetAttribute(Attribute::NoUndef);
bool StoreShadow = !(ClEagerChecks && HasNoUndef);
// FIXME: Consider using SpecialCaseList to specify a list of functions that
// must always return fully initialized values. For now, we hardcode "main".
@@ -4176,7 +4194,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
MemorySanitizerVisitor &MSV)
: F(F), MS(MS), MSV(MSV) {
AMD64FpEndOffset = AMD64FpEndOffsetSSE;
- for (const auto &Attr : F.getAttributes().getFnAttributes()) {
+ for (const auto &Attr : F.getAttributes().getFnAttrs()) {
if (Attr.isStringAttribute() &&
(Attr.getKindAsString() == "target-features")) {
if (Attr.getValueAsString().contains("-sse"))
@@ -5330,6 +5348,9 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
if (!CompileKernel && F.getName() == kMsanModuleCtorName)
return false;
+ if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+ return false;
+
MemorySanitizerVisitor Visitor(F, *this, TLI);
// Clear out readonly/readnone attributes.
@@ -5339,7 +5360,7 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
.addAttribute(Attribute::WriteOnly)
.addAttribute(Attribute::ArgMemOnly)
.addAttribute(Attribute::Speculatable);
- F.removeAttributes(AttributeList::FunctionIndex, B);
+ F.removeFnAttrs(B);
return Visitor.runOnFunction();
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3d9261eb99ba..af5946325bbb 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,6 +110,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -198,12 +199,14 @@ static cl::opt<bool>
"warnings about missing profile data for "
"functions."));
+namespace llvm {
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data.
-static cl::opt<bool>
+cl::opt<bool>
NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
cl::desc("Use this option to turn off/on "
"warnings about profile cfg mismatch."));
+} // namespace llvm
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data for Comdat functions, which often turns out to be false
@@ -462,7 +465,10 @@ public:
private:
bool runOnModule(Module &M) override {
createProfileFileNameVar(M, InstrProfileOutput);
- createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
+ // The variable in a comdat may be discarded by LTO. Ensure the
+ // declaration will be retained.
+ appendToCompilerUsed(
+ M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
return false;
}
std::string InstrProfileOutput;
@@ -1610,7 +1616,7 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
+ createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1630,7 +1636,10 @@ static bool InstrumentAllFunctions(
PreservedAnalyses
PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
- createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
+ // The variable in a comdat may be discarded by LTO. Ensure the declaration
+ // will be retained.
+ appendToCompilerUsed(
+ M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
return PreservedAnalyses::all();
}
@@ -1677,7 +1686,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
BlockFrequencyInfo NBFI(F, NBPI, LI);
#ifndef NDEBUG
auto BFIEntryCount = F.getEntryCount();
- assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&
+ assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) &&
"Invalid BFI Entrycount");
#endif
auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 7607464cc0b9..da8ee1f15bf8 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -55,6 +55,16 @@ const char SanCovTraceConstCmp1[] = "__sanitizer_cov_trace_const_cmp1";
const char SanCovTraceConstCmp2[] = "__sanitizer_cov_trace_const_cmp2";
const char SanCovTraceConstCmp4[] = "__sanitizer_cov_trace_const_cmp4";
const char SanCovTraceConstCmp8[] = "__sanitizer_cov_trace_const_cmp8";
+const char SanCovLoad1[] = "__sanitizer_cov_load1";
+const char SanCovLoad2[] = "__sanitizer_cov_load2";
+const char SanCovLoad4[] = "__sanitizer_cov_load4";
+const char SanCovLoad8[] = "__sanitizer_cov_load8";
+const char SanCovLoad16[] = "__sanitizer_cov_load16";
+const char SanCovStore1[] = "__sanitizer_cov_store1";
+const char SanCovStore2[] = "__sanitizer_cov_store2";
+const char SanCovStore4[] = "__sanitizer_cov_store4";
+const char SanCovStore8[] = "__sanitizer_cov_store8";
+const char SanCovStore16[] = "__sanitizer_cov_store16";
const char SanCovTraceDiv4[] = "__sanitizer_cov_trace_div4";
const char SanCovTraceDiv8[] = "__sanitizer_cov_trace_div8";
const char SanCovTraceGep[] = "__sanitizer_cov_trace_gep";
@@ -122,6 +132,14 @@ static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
cl::desc("Tracing of DIV instructions"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClLoadTracing("sanitizer-coverage-trace-loads",
+ cl::desc("Tracing of load instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClStoreTracing("sanitizer-coverage-trace-stores",
+ cl::desc("Tracing of store instructions"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
cl::desc("Tracing of GEP instructions"),
cl::Hidden, cl::init(false));
@@ -175,9 +193,11 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
Options.PCTable |= ClCreatePCTable;
Options.NoPrune |= !ClPruneBlocks;
Options.StackDepth |= ClStackDepth;
+ Options.TraceLoads |= ClLoadTracing;
+ Options.TraceStores |= ClStoreTracing;
if (!Options.TracePCGuard && !Options.TracePC &&
!Options.Inline8bitCounters && !Options.StackDepth &&
- !Options.InlineBoolFlag)
+ !Options.InlineBoolFlag && !Options.TraceLoads && !Options.TraceStores)
Options.TracePCGuard = true; // TracePCGuard is default.
return Options;
}
@@ -207,6 +227,8 @@ private:
ArrayRef<BinaryOperator *> DivTraceTargets);
void InjectTraceForGep(Function &F,
ArrayRef<GetElementPtrInst *> GepTraceTargets);
+ void InjectTraceForLoadsAndStores(Function &F, ArrayRef<LoadInst *> Loads,
+ ArrayRef<StoreInst *> Stores);
void InjectTraceForSwitch(Function &F,
ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
@@ -234,14 +256,17 @@ private:
std::string getSectionEnd(const std::string &Section) const;
FunctionCallee SanCovTracePCIndir;
FunctionCallee SanCovTracePC, SanCovTracePCGuard;
- FunctionCallee SanCovTraceCmpFunction[4];
- FunctionCallee SanCovTraceConstCmpFunction[4];
- FunctionCallee SanCovTraceDivFunction[2];
+ std::array<FunctionCallee, 4> SanCovTraceCmpFunction;
+ std::array<FunctionCallee, 4> SanCovTraceConstCmpFunction;
+ std::array<FunctionCallee, 5> SanCovLoadFunction;
+ std::array<FunctionCallee, 5> SanCovStoreFunction;
+ std::array<FunctionCallee, 2> SanCovTraceDivFunction;
FunctionCallee SanCovTraceGepFunction;
FunctionCallee SanCovTraceSwitchFunction;
GlobalVariable *SanCovLowestStack;
- Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
- *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy;
+ Type *Int128PtrTy, *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty,
+ *Int32PtrTy, *Int16PtrTy, *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty,
+ *Int1PtrTy;
Module *CurModule;
std::string CurModuleUniqueId;
Triple TargetTriple;
@@ -411,7 +436,9 @@ bool ModuleSanitizerCoverage::instrumentModule(
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
+ Int128PtrTy = PointerType::getUnqual(IRB.getInt128Ty());
Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+ Int16PtrTy = PointerType::getUnqual(IRB.getInt16Ty());
Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty());
@@ -452,6 +479,28 @@ bool ModuleSanitizerCoverage::instrumentModule(
SanCovTraceConstCmpFunction[3] =
M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+ // Loads.
+ SanCovLoadFunction[0] = M.getOrInsertFunction(SanCovLoad1, VoidTy, Int8PtrTy);
+ SanCovLoadFunction[1] =
+ M.getOrInsertFunction(SanCovLoad2, VoidTy, Int16PtrTy);
+ SanCovLoadFunction[2] =
+ M.getOrInsertFunction(SanCovLoad4, VoidTy, Int32PtrTy);
+ SanCovLoadFunction[3] =
+ M.getOrInsertFunction(SanCovLoad8, VoidTy, Int64PtrTy);
+ SanCovLoadFunction[4] =
+ M.getOrInsertFunction(SanCovLoad16, VoidTy, Int128PtrTy);
+ // Stores.
+ SanCovStoreFunction[0] =
+ M.getOrInsertFunction(SanCovStore1, VoidTy, Int8PtrTy);
+ SanCovStoreFunction[1] =
+ M.getOrInsertFunction(SanCovStore2, VoidTy, Int16PtrTy);
+ SanCovStoreFunction[2] =
+ M.getOrInsertFunction(SanCovStore4, VoidTy, Int32PtrTy);
+ SanCovStoreFunction[3] =
+ M.getOrInsertFunction(SanCovStore8, VoidTy, Int64PtrTy);
+ SanCovStoreFunction[4] =
+ M.getOrInsertFunction(SanCovStore16, VoidTy, Int128PtrTy);
+
{
AttributeList AL;
AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
@@ -632,6 +681,8 @@ void ModuleSanitizerCoverage::instrumentFunction(
SmallVector<Instruction *, 8> SwitchTraceTargets;
SmallVector<BinaryOperator *, 8> DivTraceTargets;
SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
+ SmallVector<LoadInst *, 8> Loads;
+ SmallVector<StoreInst *, 8> Stores;
const DominatorTree *DT = DTCallback(F);
const PostDominatorTree *PDT = PDTCallback(F);
@@ -661,6 +712,12 @@ void ModuleSanitizerCoverage::instrumentFunction(
if (Options.TraceGep)
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))
GepTraceTargets.push_back(GEP);
+ if (Options.TraceLoads)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&Inst))
+ Loads.push_back(LI);
+ if (Options.TraceStores)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&Inst))
+ Stores.push_back(SI);
if (Options.StackDepth)
if (isa<InvokeInst>(Inst) ||
(isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))
@@ -674,6 +731,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
InjectTraceForSwitch(F, SwitchTraceTargets);
InjectTraceForDiv(F, DivTraceTargets);
InjectTraceForGep(F, GepTraceTargets);
+ InjectTraceForLoadsAndStores(F, Loads, Stores);
}
GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
@@ -857,6 +915,40 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
}
}
+void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
+ Function &, ArrayRef<LoadInst *> Loads, ArrayRef<StoreInst *> Stores) {
+ auto CallbackIdx = [&](const Value *Ptr) -> int {
+ auto ElementTy = cast<PointerType>(Ptr->getType())->getElementType();
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(ElementTy);
+ return TypeSize == 8 ? 0
+ : TypeSize == 16 ? 1
+ : TypeSize == 32 ? 2
+ : TypeSize == 64 ? 3
+ : TypeSize == 128 ? 4
+ : -1;
+ };
+ Type *PointerType[5] = {Int8PtrTy, Int16PtrTy, Int32PtrTy, Int64PtrTy,
+ Int128PtrTy};
+ for (auto LI : Loads) {
+ IRBuilder<> IRB(LI);
+ auto Ptr = LI->getPointerOperand();
+ int Idx = CallbackIdx(Ptr);
+ if (Idx < 0)
+ continue;
+ IRB.CreateCall(SanCovLoadFunction[Idx],
+ IRB.CreatePointerCast(Ptr, PointerType[Idx]));
+ }
+ for (auto SI : Stores) {
+ IRBuilder<> IRB(SI);
+ auto Ptr = SI->getPointerOperand();
+ int Idx = CallbackIdx(Ptr);
+ if (Idx < 0)
+ continue;
+ IRB.CreateCall(SanCovStoreFunction[Idx],
+ IRB.CreatePointerCast(Ptr, PointerType[Idx]));
+ }
+}
+
void ModuleSanitizerCoverage::InjectTraceForCmp(
Function &, ArrayRef<Instruction *> CmpTraceTargets) {
for (auto I : CmpTraceTargets) {
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 063999a68236..f98e39d751f4 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -206,8 +206,8 @@ PreservedAnalyses ThreadSanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
-PreservedAnalyses ThreadSanitizerPass::run(Module &M,
- ModuleAnalysisManager &MAM) {
+PreservedAnalyses ModuleThreadSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
insertModuleCtor(M);
return PreservedAnalyses::none();
}
@@ -249,8 +249,7 @@ void ThreadSanitizer::initialize(Module &M) {
IRBuilder<> IRB(M.getContext());
AttributeList Attr;
- Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
+ Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
// Initialize the callbacks.
TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
IRB.getVoidTy(), IRB.getInt8PtrTy());
@@ -563,6 +562,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
// all.
if (F.hasFnAttribute(Attribute::Naked))
return false;
+
+ // __attribute__(disable_sanitizer_instrumentation) prevents all kinds of
+ // instrumentation.
+ if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+ return false;
+
initialize(*F.getParent());
SmallVector<InstructionInfo, 8> AllLoadsAndStores;
SmallVector<Instruction*, 8> LocalLoadsAndStores;
@@ -580,7 +585,8 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
AtomicAccesses.push_back(&Inst);
else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
LocalLoadsAndStores.push_back(&Inst);
- else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ else if ((isa<CallInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) ||
+ isa<InvokeInst>(Inst)) {
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
if (isa<MemIntrinsic>(Inst))
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 06b12149f597..1ca6ddabac5b 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -103,9 +103,8 @@ CallInst *BundledRetainClaimRVs::insertRVCallWithColors(
Instruction *InsertPt, CallBase *AnnotatedCall,
const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
IRBuilder<> Builder(InsertPt);
- bool IsRetainRV = objcarc::hasAttachedCallOpBundle(AnnotatedCall, true);
- Function *Func = EP.get(IsRetainRV ? ARCRuntimeEntryPointKind::RetainRV
- : ARCRuntimeEntryPointKind::ClaimRV);
+ Function *Func = *objcarc::getAttachedARCFunction(AnnotatedCall);
+ assert(Func && "operand isn't a Function");
Type *ParamTy = Func->getArg(0)->getType();
Value *CallArg = Builder.CreateBitCast(AnnotatedCall, ParamTy);
auto *Call =
@@ -115,16 +114,28 @@ CallInst *BundledRetainClaimRVs::insertRVCallWithColors(
}
BundledRetainClaimRVs::~BundledRetainClaimRVs() {
- if (ContractPass) {
- // At this point, we know that the annotated calls can't be tail calls as
- // they are followed by marker instructions and retainRV/claimRV calls. Mark
- // them as notail, so that the backend knows these calls can't be tail
- // calls.
- for (auto P : RVCalls)
- if (auto *CI = dyn_cast<CallInst>(P.second))
+ for (auto P : RVCalls) {
+ if (ContractPass) {
+ CallBase *CB = P.second;
+ // At this point, we know that the annotated calls can't be tail calls
+ // as they are followed by marker instructions and retainRV/claimRV
+ // calls. Mark them as notail so that the backend knows these calls
+ // can't be tail calls.
+ if (auto *CI = dyn_cast<CallInst>(CB))
CI->setTailCallKind(CallInst::TCK_NoTail);
- } else {
- for (auto P : RVCalls)
+
+ if (UseMarker) {
+ // Remove the retainRV/claimRV function operand from the operand bundle
+ // to reflect the fact that the backend is responsible for emitting only
+ // the marker instruction, but not the retainRV/claimRV call.
+ OperandBundleDef OB("clang.arc.attachedcall", None);
+ auto *NewCB = CallBase::Create(CB, OB, CB);
+ CB->replaceAllUsesWith(NewCB);
+ CB->eraseFromParent();
+ }
+ }
+
+ if (!ContractPass || !UseMarker)
EraseInstruction(P.first);
}
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 1f9d76969bfd..2b47bec7ffe8 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -105,8 +105,8 @@ CallInst *createCallInstWithColors(
class BundledRetainClaimRVs {
public:
- BundledRetainClaimRVs(ARCRuntimeEntryPoints &P, bool ContractPass)
- : EP(P), ContractPass(ContractPass) {}
+ BundledRetainClaimRVs(bool ContractPass, bool UseMarker)
+ : ContractPass(ContractPass), UseMarker(UseMarker) {}
~BundledRetainClaimRVs();
/// Insert a retainRV/claimRV call to the normal destination blocks of invokes
@@ -155,8 +155,10 @@ private:
/// A map of inserted retainRV/claimRV calls to annotated calls/invokes.
DenseMap<CallInst *, CallBase *> RVCalls;
- ARCRuntimeEntryPoints &EP;
bool ContractPass;
+
+ /// Indicates whether the target uses a special inline-asm marker.
+ bool UseMarker;
};
} // end namespace objcarc
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 6a928f2c7ffb..210ec60f2f87 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -64,30 +64,29 @@ bool OptimizeBB(BasicBlock *BB) {
bool Changed = false;
Instruction *Push = nullptr;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = &*I++;
- switch (GetBasicARCInstKind(Inst)) {
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ switch (GetBasicARCInstKind(&Inst)) {
case ARCInstKind::AutoreleasepoolPush:
- Push = Inst;
+ Push = &Inst;
break;
case ARCInstKind::AutoreleasepoolPop:
// If this pop matches a push and nothing in between can autorelease,
// zap the pair.
- if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+ if (Push && cast<CallInst>(&Inst)->getArgOperand(0) == Push) {
Changed = true;
LLVM_DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
"autorelease pair:\n"
" Pop: "
- << *Inst << "\n"
+ << Inst << "\n"
<< " Push: " << *Push
<< "\n");
- Inst->eraseFromParent();
+ Inst.eraseFromParent();
Push->eraseFromParent();
}
Push = nullptr;
break;
case ARCInstKind::CallOrUser:
- if (MayAutorelease(cast<CallBase>(*Inst)))
+ if (MayAutorelease(cast<CallBase>(Inst)))
Push = nullptr;
break;
default:
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 62161b5b6b40..c2ed94e8e1f6 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -226,13 +226,6 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
// of Inst.
ARCInstKind Class = GetBasicARCInstKind(Inst);
- // If Inst is an unrelated retain, we don't care about it.
- //
- // TODO: This is one area where the optimization could be made more
- // aggressive.
- if (IsRetain(Class))
- continue;
-
// If we have seen the store, but not the release...
if (Store) {
// We need to make sure that it is safe to move the release from its
@@ -248,8 +241,18 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
return nullptr;
}
- // Ok, now we know we have not seen a store yet. See if Inst can write to
- // our load location, if it can not, just ignore the instruction.
+ // Ok, now we know we have not seen a store yet.
+
+ // If Inst is a retain, we don't care about it as it doesn't prevent moving
+ // the load to the store.
+ //
+ // TODO: This is one area where the optimization could be made more
+ // aggressive.
+ if (IsRetain(Class))
+ continue;
+
+ // See if Inst can write to our load location, if it can not, just ignore
+ // the instruction.
if (!isModSet(AA->getModRefInfo(Inst, Loc)))
continue;
@@ -431,13 +434,21 @@ bool ObjCARCContract::tryToPeepholeInstruction(
LLVM_FALLTHROUGH;
case ARCInstKind::RetainRV:
case ARCInstKind::ClaimRV: {
- // If we're compiling for a target which needs a special inline-asm
- // marker to do the return value optimization and the retainRV/claimRV call
- // wasn't bundled with a call, insert the marker now.
+ bool IsInstContainedInBundle = BundledInsts->contains(Inst);
+
+ // Return now if the target doesn't need a special inline-asm marker. Return
+ // true if this is a bundled retainRV/claimRV call, which is going to be
+ // erased at the end of this pass, to avoid undoing objc-arc-expand and
+ // replacing uses of the retainRV/claimRV call's argument with its result.
if (!RVInstMarker)
- return false;
+ return IsInstContainedInBundle;
+
+ // The target needs a special inline-asm marker.
- if (BundledInsts->contains(Inst))
+ // We don't have to emit the marker if this is a bundled call since the
+ // backend is responsible for emitting it. Return false to undo
+ // objc-arc-expand.
+ if (IsInstContainedInBundle)
return false;
BasicBlock::iterator BBI = Inst->getIterator();
@@ -537,7 +548,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) {
AA = A;
DT = D;
PA.setAA(A);
- BundledRetainClaimRVs BRV(EP, true);
+ BundledRetainClaimRVs BRV(true, RVInstMarker);
BundledInsts = &BRV;
std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT);
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index d2121dcebe91..6b074ac5adab 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -56,12 +56,10 @@ static bool runImpl(Function &F) {
LLVM_DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName()
<< "\n");
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
- Instruction *Inst = &*I;
+ for (Instruction &Inst : instructions(&F)) {
+ LLVM_DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << Inst << "\n");
- LLVM_DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
-
- switch (GetBasicARCInstKind(Inst)) {
+ switch (GetBasicARCInstKind(&Inst)) {
case ARCInstKind::Retain:
case ARCInstKind::RetainRV:
case ARCInstKind::Autorelease:
@@ -73,12 +71,12 @@ static bool runImpl(Function &F) {
// harder. Undo any uses of this optimization that the front-end
// emitted here. We'll redo them in the contract pass.
Changed = true;
- Value *Value = cast<CallInst>(Inst)->getArgOperand(0);
- LLVM_DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst
+ Value *Value = cast<CallInst>(&Inst)->getArgOperand(0);
+ LLVM_DEBUG(dbgs() << "ObjCARCExpand: Old = " << Inst
<< "\n"
" New = "
<< *Value << "\n");
- Inst->replaceAllUsesWith(Value);
+ Inst.replaceAllUsesWith(Value);
break;
}
default:
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index ada6aa8d9b6d..0fa4904456cd 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2229,13 +2229,12 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
// Then, for each destroyWeak with an alloca operand, check to see if
// the alloca and all its users can be zapped.
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
- ARCInstKind Class = GetBasicARCInstKind(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
+ ARCInstKind Class = GetBasicARCInstKind(&Inst);
if (Class != ARCInstKind::DestroyWeak)
continue;
- CallInst *Call = cast<CallInst>(Inst);
+ CallInst *Call = cast<CallInst>(&Inst);
Value *Arg = Call->getArgOperand(0);
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
for (User *U : Alloca->users()) {
@@ -2250,8 +2249,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
}
}
Changed = true;
- for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) {
- CallInst *UserInst = cast<CallInst>(*UI++);
+ for (User *U : llvm::make_early_inc_range(Alloca->users())) {
+ CallInst *UserInst = cast<CallInst>(U);
switch (GetBasicARCInstKind(UserInst)) {
case ARCInstKind::InitWeak:
case ARCInstKind::StoreWeak:
@@ -2462,7 +2461,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) {
return false;
Changed = CFGChanged = false;
- BundledRetainClaimRVs BRV(EP, false);
+ BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent()));
BundledInsts = &BRV;
LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index a63e356ce1fc..6d0a67c91cfa 100644
--- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -56,7 +56,8 @@ class ProvenanceAnalysis {
CachedResultsTy CachedResults;
- DenseMap<const Value *, WeakTrackingVH> UnderlyingObjCPtrCache;
+ DenseMap<const Value *, std::pair<WeakVH, WeakTrackingVH>>
+ UnderlyingObjCPtrCache;
bool relatedCheck(const Value *A, const Value *B);
bool relatedSelect(const SelectInst *A, const Value *B);
diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 6fdfe787d438..fe637ee066a4 100644
--- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -58,11 +58,11 @@ bool PAEval::runOnFunction(Function &F) {
for (auto &Arg : F.args())
insertIfNamed(Values, &Arg);
- for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- insertIfNamed(Values, &*I);
+ for (Instruction &I : instructions(F)) {
+ insertIfNamed(Values, &I);
- for (auto &Op : I->operands())
- insertIfNamed(Values, Op);
+ for (auto &Op : I.operands())
+ insertIfNamed(Values, Op);
}
ProvenanceAnalysis PA;
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 6f3fdb88eda5..b693acceb3f6 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -538,7 +538,7 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
// that have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : instructions(F)) {
+ for (Instruction &I : llvm::reverse(instructions(F))) {
// Check if the instruction is alive.
if (isLive(&I))
continue;
@@ -554,9 +554,11 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
// Prepare to delete.
Worklist.push_back(&I);
salvageDebugInfo(I);
- I.dropAllReferences();
}
+ for (Instruction *&I : Worklist)
+ I->dropAllReferences();
+
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index be21db9087d2..e4ec5f266eb8 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -221,6 +221,10 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
AAPtr = AAPtr->stripPointerCastsSameRepresentation();
AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
+ if (!isa<SCEVConstant>(AlignSCEV))
+ // Added to suppress a crash because consumer doesn't expect non-constant
+ // alignments in the assume bundle. TODO: Consider generalizing caller.
+ return false;
if (AlignOB.Inputs.size() == 3)
OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
else
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index c06125788f37..6c2467db79f7 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -53,7 +53,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
if (J && J->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(J).isAllOnesValue()) {
+ !DB.getDemandedBits(J).isAllOnes()) {
Visited.insert(J);
WorkList.push_back(J);
}
@@ -84,7 +84,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(K).isAllOnesValue())
+ !DB.getDemandedBits(K).isAllOnes())
WorkList.push_back(K);
}
}
@@ -103,12 +103,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
// Remove instructions that are dead, either because they were not reached
// during analysis or have no demanded bits.
if (DB.isInstructionDead(&I) ||
- (I.getType()->isIntOrIntVectorTy() &&
- DB.getDemandedBits(&I).isNullValue() &&
+ (I.getType()->isIntOrIntVectorTy() && DB.getDemandedBits(&I).isZero() &&
wouldInstructionBeTriviallyDead(&I))) {
- salvageDebugInfo(I);
Worklist.push_back(&I);
- I.dropAllReferences();
Changed = true;
continue;
}
@@ -155,6 +152,11 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
}
}
+ for (Instruction *&I : llvm::reverse(Worklist)) {
+ salvageDebugInfo(*I);
+ I->dropAllReferences();
+ }
+
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 2eb94b721d96..95de59fa8262 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -467,7 +467,7 @@ static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallBase &CB,
BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
- for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
+ for (auto *Pred : llvm::reverse(Preds)) {
ConditionsTy Conditions;
// Record condition on edge BB(CS) <- Pred
recordCondition(CB, Pred, CB.getParent(), Conditions);
@@ -505,8 +505,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
- BasicBlock &BB = *BI++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
auto II = BB.getFirstNonPHIOrDbg()->getIterator();
auto IE = BB.getTerminator()->getIterator();
// Iterate until we reach the terminator instruction. tryToSplitCallSite
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 535f50d4f904..27f54f8026e1 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -762,7 +762,7 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
cast<PointerType>(Ty)->getAddressSpace());
Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
- Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
+ Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base,
Offset, "mat_gep", InsertionPt);
Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
} else
@@ -819,10 +819,9 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
// Aside from constant GEPs, only constant cast expressions are collected.
assert(ConstExpr->isCast() && "ConstExpr should be a cast");
- Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction(
+ findMatInsertPt(ConstUser.Inst, ConstUser.OpndIdx));
ConstExprInst->setOperand(0, Mat);
- ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
- ConstUser.OpndIdx));
// Use the same debug location as the instruction we are about to update.
ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index efd1c025d0cd..7f2d5d7d9987 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -268,6 +269,31 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
continue;
WorkList.emplace_back(DT.getNode(&BB));
+ // True as long as long as the current instruction is guaranteed to execute.
+ bool GuaranteedToExecute = true;
+ // Scan BB for assume calls.
+ // TODO: also use this scan to queue conditions to simplify, so we can
+ // interleave facts from assumes and conditions to simplify in a single
+ // basic block. And to skip another traversal of each basic block when
+ // simplifying.
+ for (Instruction &I : BB) {
+ Value *Cond;
+ // For now, just handle assumes with a single compare as condition.
+ if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
+ isa<CmpInst>(Cond)) {
+ if (GuaranteedToExecute) {
+ // The assume is guaranteed to execute when BB is entered, hence Cond
+ // holds on entry to BB.
+ WorkList.emplace_back(DT.getNode(&BB), cast<CmpInst>(Cond), false);
+ } else {
+ // Otherwise the condition only holds in the successors.
+ for (BasicBlock *Succ : successors(&BB))
+ WorkList.emplace_back(DT.getNode(Succ), cast<CmpInst>(Cond), false);
+ }
+ }
+ GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
+ }
+
auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
if (!Br || !Br->isConditional())
continue;
@@ -395,8 +421,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
for (auto &E : reverse(DFSInStack))
dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
});
- Cmp->replaceAllUsesWith(
- ConstantInt::getTrue(F.getParent()->getContext()));
+ Cmp->replaceUsesWithIf(
+ ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) {
+ // Conditions in an assume trivially simplify to true. Skip uses
+ // in assume calls to not destroy the available information.
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ return !II || II->getIntrinsicID() != Intrinsic::assume;
+ });
NumCondsRemoved++;
Changed = true;
}
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 36cbd42a5fdd..ca9567dc7ac8 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumUDivURemsNarrowed,
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
+STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
STATISTIC(NumAnd, "Number of ands removed");
STATISTIC(NumNW, "Number of no-wrap deductions");
STATISTIC(NumNSW, "Number of no-signed-wrap deductions");
@@ -295,11 +296,34 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
return true;
}
+static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
+ // Only for signed relational comparisons of scalar integers.
+ if (Cmp->getType()->isVectorTy() ||
+ !Cmp->getOperand(0)->getType()->isIntegerTy())
+ return false;
+
+ if (!Cmp->isSigned())
+ return false;
+
+ ICmpInst::Predicate UnsignedPred =
+ ConstantRange::getEquivalentPredWithFlippedSignedness(
+ Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp),
+ LVI->getConstantRange(Cmp->getOperand(1), Cmp));
+
+ if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+ return false;
+
+ ++NumSICmps;
+ Cmp->setPredicate(UnsignedPred);
+
+ return true;
+}
+
/// See if LazyValueInfo's ability to exploit edge conditions or range
/// information is sufficient to prove this comparison. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
-static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
Value *Op0 = Cmp->getOperand(0);
auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
if (!C)
@@ -318,6 +342,17 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
return true;
}
+static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+ if (constantFoldCmp(Cmp, LVI))
+ return true;
+
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cmp))
+ if (processICmp(ICmp, LVI))
+ return true;
+
+ return false;
+}
+
/// Simplify a switch instruction by removing cases which can never fire. If the
/// uselessness of a case could be determined locally then constant propagation
/// would already have figured it out. Instead, walk the predecessors and
@@ -341,7 +376,13 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
- for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ APInt Low =
+ APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits());
+ APInt High =
+ APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits());
+
+ SwitchInst::CaseIt CI = SI->case_begin();
+ for (auto CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
LazyValueInfo::Tristate State =
LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
@@ -374,9 +415,28 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
break;
}
+ // Get Lower/Upper bound from switch cases.
+ Low = APIntOps::smin(Case->getValue(), Low);
+ High = APIntOps::smax(Case->getValue(), High);
+
// Increment the case iterator since we didn't delete it.
++CI;
}
+
+ // Try to simplify default case as unreachable
+ if (CI == SI->case_end() && SI->getNumCases() != 0 &&
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) {
+ const ConstantRange SIRange =
+ LVI->getConstantRange(SI->getCondition(), SI);
+
+ // If the numbered switch cases cover the entire range of the condition,
+ // then the default case is not reachable.
+ if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High &&
+ SI->getNumCases() == High - Low + 1) {
+ createUnreachableSwitchDefault(SI, &DTU);
+ Changed = true;
+ }
+ }
}
if (Changed)
@@ -690,7 +750,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
// prove that such a combination is impossible, we need to bump the bitwidth.
- if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
+ if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) &&
CRs[0]->contains(
APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
++MinSignedBits;
@@ -1023,49 +1083,48 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
// blocks.
for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
bool BBChanged = false;
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *II = &*BI++;
- switch (II->getOpcode()) {
+ for (Instruction &II : llvm::make_early_inc_range(*BB)) {
+ switch (II.getOpcode()) {
case Instruction::Select:
- BBChanged |= processSelect(cast<SelectInst>(II), LVI);
+ BBChanged |= processSelect(cast<SelectInst>(&II), LVI);
break;
case Instruction::PHI:
- BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
+ BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ);
break;
case Instruction::ICmp:
case Instruction::FCmp:
- BBChanged |= processCmp(cast<CmpInst>(II), LVI);
+ BBChanged |= processCmp(cast<CmpInst>(&II), LVI);
break;
case Instruction::Load:
case Instruction::Store:
- BBChanged |= processMemAccess(II, LVI);
+ BBChanged |= processMemAccess(&II, LVI);
break;
case Instruction::Call:
case Instruction::Invoke:
- BBChanged |= processCallSite(cast<CallBase>(*II), LVI);
+ BBChanged |= processCallSite(cast<CallBase>(II), LVI);
break;
case Instruction::SRem:
case Instruction::SDiv:
- BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::UDiv:
case Instruction::URem:
- BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::AShr:
- BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::SExt:
- BBChanged |= processSExt(cast<SExtInst>(II), LVI);
+ BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
- BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::And:
- BBChanged |= processAnd(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI);
break;
}
}
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 90679bcac4b7..8c4523206070 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -1,9 +1,8 @@
//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,8 +83,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <deque>
-#include <unordered_map>
-#include <unordered_set>
using namespace llvm;
@@ -147,8 +144,7 @@ private:
Stack.push_back(SIToUnfold);
while (!Stack.empty()) {
- SelectInstToUnfold SIToUnfold = Stack.back();
- Stack.pop_back();
+ SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
@@ -174,6 +170,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
@@ -350,7 +347,7 @@ struct ClonedBlock {
typedef std::deque<BasicBlock *> PathType;
typedef std::vector<PathType> PathsType;
-typedef std::set<const BasicBlock *> VisitedBlocks;
+typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
typedef std::vector<ClonedBlock> CloneList;
// This data structure keeps track of all blocks that have been cloned. If two
@@ -493,7 +490,7 @@ private:
}
bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.find(InpVal) != SeenValues.end())
+ if (SeenValues.contains(InpVal))
return true;
if (isa<ConstantInt>(InpVal))
@@ -508,7 +505,7 @@ private:
void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.find(Val) != SeenValues.end())
+ if (SeenValues.contains(Val))
return;
if (Instruction *I = dyn_cast<Instruction>(Val))
Q.push_back(I);
@@ -533,7 +530,7 @@ private:
return false;
if (isa<PHINode>(SIUse) &&
- SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent())
+ SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
return false;
// If select will not be sunk during unfolding, and it is in the same basic
@@ -621,13 +618,9 @@ private:
// Some blocks have multiple edges to the same successor, and this set
// is used to prevent a duplicate path from being generated
SmallSet<BasicBlock *, 4> Successors;
-
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
- BasicBlock *Succ = *SI;
-
- if (Successors.find(Succ) != Successors.end())
+ for (BasicBlock *Succ : successors(BB)) {
+ if (!Successors.insert(Succ).second)
continue;
- Successors.insert(Succ);
// Found a cycle through the SwitchBlock
if (Succ == SwitchBlock) {
@@ -636,7 +629,7 @@ private:
}
// We have encountered a cycle, do not get caught in it
- if (Visited.find(Succ) != Visited.end())
+ if (Visited.contains(Succ))
continue;
PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
@@ -668,15 +661,14 @@ private:
SmallSet<Value *, 16> SeenValues;
while (!Stack.empty()) {
- PHINode *CurPhi = Stack.back();
- Stack.pop_back();
+ PHINode *CurPhi = Stack.pop_back_val();
Res[CurPhi->getParent()] = CurPhi;
SeenValues.insert(CurPhi);
for (Value *Incoming : CurPhi->incoming_values()) {
if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
- SeenValues.find(Incoming) != SeenValues.end()) {
+ SeenValues.contains(Incoming)) {
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585..a8ec8bb97970 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -13,10 +13,10 @@
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
// upwards.
-// 2. Check that there are no reads between EarlierAccess and the StartDef by
-// checking all uses starting at EarlierAccess and walking until we see
+// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
+// checking all uses starting at MaybeDeadAccess and walking until we see
// StartDef.
// 3. For each found CurrentDef, check that:
// 1. There are no barrier instructions between CurrentDef and StartDef (like
@@ -56,6 +56,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -122,7 +124,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
static cl::opt<unsigned>
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
- "dead store elimination (default = 100)"));
+ "dead store elimination (default = 150)"));
static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
cl::desc("The maximum number of steps while walking upwards to find "
@@ -203,39 +205,6 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
return false;
}
-/// Return a Location stored to by the specified instruction. If isRemovable
-/// returns true, this function and getLocForRead completely describe the memory
-/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst,
- const TargetLibraryInfo &TLI) {
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return MemoryLocation::get(SI);
-
- // memcpy/memmove/memset.
- if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
- return MemoryLocation::getForDest(MI);
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- default:
- return MemoryLocation(); // Unhandled intrinsic.
- case Intrinsic::init_trampoline:
- return MemoryLocation::getAfter(II->getArgOperand(0));
- case Intrinsic::masked_store:
- return MemoryLocation::getForArgument(II, 1, TLI);
- case Intrinsic::lifetime_end: {
- uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- return MemoryLocation(II->getArgOperand(1), Len);
- }
- }
- }
- if (auto *CB = dyn_cast<CallBase>(Inst))
- // All the supported TLI functions so far happen to have dest as their
- // first argument.
- return MemoryLocation::getAfter(CB->getArgOperand(0));
- return MemoryLocation();
-}
-
/// If the value of this instruction and the memory it writes to is unused, may
/// we delete this instruction?
static bool isRemovable(Instruction *I) {
@@ -333,147 +302,146 @@ enum OverwriteResult {
} // end anonymous namespace
/// Check if two instruction are masked stores that completely
-/// overwrite one another. More specifically, \p Later has to
-/// overwrite \p Earlier.
-static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
- const Instruction *Earlier,
+/// overwrite one another. More specifically, \p KillingI has to
+/// overwrite \p DeadI.
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
BatchAAResults &AA) {
- const auto *IIL = dyn_cast<IntrinsicInst>(Later);
- const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
- if (IIL == nullptr || IIE == nullptr)
+ const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
+ const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
+ if (KillingII == nullptr || DeadII == nullptr)
return OW_Unknown;
- if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
- IIE->getIntrinsicID() != Intrinsic::masked_store)
+ if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
+ DeadII->getIntrinsicID() != Intrinsic::masked_store)
return OW_Unknown;
// Pointers.
- Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
- Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
- if (LP != EP && !AA.isMustAlias(LP, EP))
+ Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+ Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+ if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
return OW_Unknown;
// Masks.
- // TODO: check that Later's mask is a superset of the Earlier's mask.
- if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+ // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+ if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
return OW_Unknown;
return OW_Complete;
}
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
+/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
+/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
+/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
+/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
+/// overwritten by a killing (smaller) store which doesn't write outside the big
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-/// NOTE: This function must only be called if both \p Later and \p Earlier
-/// write to the same underlying object with valid \p EarlierOff and \p
-/// LaterOff.
-static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
- const MemoryLocation &Earlier,
- int64_t EarlierOff, int64_t LaterOff,
- Instruction *DepWrite,
+/// NOTE: This function must only be called if both \p KillingLoc and \p
+/// DeadLoc belong to the same underlying object with valid \p KillingOff and
+/// \p DeadOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t KillingOff, int64_t DeadOff,
+ Instruction *DeadI,
InstOverlapIntervalsTy &IOL) {
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
- // earlier write.
+ // dead store.
// Note: The correctness of this logic depends on the fact that this function
// is not even called providing DepWrite when there are any intervening reads.
if (EnablePartialOverwriteTracking &&
- LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= EarlierOff) {
+ KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= DeadOff) {
// Insert our part of the overlap into the map.
- auto &IM = IOL[DepWrite];
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
- << ", " << int64_t(EarlierOff + EarlierSize)
- << ") Later [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ auto &IM = IOL[DeadI];
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
+ << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
+ << KillingOff << ", " << int64_t(KillingOff + KillingSize)
+ << ")\n");
// Make sure that we only insert non-overlapping intervals and combine
// adjacent intervals. The intervals are stored in the map with the ending
// offset as the key (in the half-open sense) and the starting offset as
// the value.
- int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
+ int64_t KillingIntStart = KillingOff;
+ int64_t KillingIntEnd = KillingOff + KillingSize;
- // Find any intervals ending at, or after, LaterIntStart which start
- // before LaterIntEnd.
- auto ILI = IM.lower_bound(LaterIntStart);
- if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+ // Find any intervals ending at, or after, KillingIntStart which start
+ // before KillingIntEnd.
+ auto ILI = IM.lower_bound(KillingIntStart);
+ if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
// This existing interval is overlapped with the current store somewhere
- // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
+ // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
// intervals and adjusting our start and end.
- LaterIntStart = std::min(LaterIntStart, ILI->second);
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ KillingIntStart = std::min(KillingIntStart, ILI->second);
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
// Continue erasing and adjusting our end in case other previous
// intervals are also overlapped with the current store.
//
- // |--- ealier 1 ---| |--- ealier 2 ---|
- // |------- later---------|
+ // |--- dead 1 ---| |--- dead 2 ---|
+ // |------- killing---------|
//
- while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
- assert(ILI->second > LaterIntStart && "Unexpected interval");
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
+ assert(ILI->second > KillingIntStart && "Unexpected interval");
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
}
}
- IM[LaterIntEnd] = LaterIntStart;
+ IM[KillingIntEnd] = KillingIntStart;
ILI = IM.begin();
- if (ILI->second <= EarlierOff &&
- ILI->first >= int64_t(EarlierOff + EarlierSize)) {
- LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") Composite Later [" << ILI->second << ", "
+ if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
+ LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
+ << DeadOff << ", " << int64_t(DeadOff + DeadSize)
+ << ") Composite KillingLoc [" << ILI->second << ", "
<< ILI->first << ")\n");
++NumCompletePartials;
return OW_Complete;
}
}
- // Check for an earlier store which writes to all the memory locations that
- // the later store writes to.
- if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
- int64_t(EarlierOff + EarlierSize) > LaterOff &&
- uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") by a later store [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ // Check for a dead store which writes to all the memory locations that
+ // the killing store writes to.
+ if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
+ int64_t(DeadOff + DeadSize) > KillingOff &&
+ uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
+ << ", " << int64_t(DeadOff + DeadSize)
+ << ") by a killing store [" << KillingOff << ", "
+ << int64_t(KillingOff + KillingSize) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}
- // Another interesting case is if the later store overwrites the end of the
- // earlier store.
+ // Another interesting case is if the killing store overwrites the end of the
+ // dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
- // In this case we may want to trim the size of earlier to avoid generating
- // writes to addresses which will definitely be overwritten later
+ // In this case we may want to trim the size of dead store to avoid
+ // generating stores to addresses which will definitely be overwritten killing
+ // store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
+ (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
return OW_End;
- // Finally, we also need to check if the later store overwrites the beginning
- // of the earlier store.
+ // Finally, we also need to check if the killing store overwrites the
+ // beginning of the dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
// In this case we may want to move the destination address and trim the size
- // of earlier to avoid generating writes to addresses which will definitely
- // be overwritten later.
+ // of dead store to avoid generating stores to addresses which will definitely
+ // be overwritten killing store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
- assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
+ (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
+ assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
"Expect to be handled as OW_Complete");
return OW_Begin;
}
@@ -505,7 +473,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
- MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+ MemoryLocation MemLoc;
+ if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+ MemLoc = MemoryLocation::getForDest(MemSet);
+ else
+ MemLoc = MemoryLocation::get(SecondI);
+
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
@@ -568,11 +541,11 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
return true;
}
-static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
- uint64_t &EarlierSize, int64_t LaterStart,
- uint64_t LaterSize, bool IsOverwriteEnd) {
- auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
- Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne();
+static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
+ uint64_t &DeadSize, int64_t KillingStart,
+ uint64_t KillingSize, bool IsOverwriteEnd) {
+ auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+ Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
// We assume that memet/memcpy operates in chunks of the "largest" native
// type size and aligned on the same value. That means optimal start and size
@@ -593,19 +566,19 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
// maintained on the remaining store.
if (IsOverwriteEnd) {
- // Calculate required adjustment for 'LaterStart'in order to keep remaining
- // store size aligned on 'PerfAlign'.
+ // Calculate required adjustment for 'KillingStart' in order to keep
+ // remaining store size aligned on 'PerfAlign'.
uint64_t Off =
- offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign);
- ToRemoveStart = LaterStart + Off;
- if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart))
+ offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
+ ToRemoveStart = KillingStart + Off;
+ if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
return false;
- ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart);
+ ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
} else {
- ToRemoveStart = EarlierStart;
- assert(LaterSize >= uint64_t(EarlierStart - LaterStart) &&
+ ToRemoveStart = DeadStart;
+ assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
"Not overlapping accesses?");
- ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart);
+ ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
// Calculate required adjustment for 'ToRemoveSize'in order to keep
// start of the remaining store aligned on 'PerfAlign'.
uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
@@ -619,10 +592,10 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
- assert(EarlierSize > ToRemoveSize && "Can't remove more than original size");
+ assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
- uint64_t NewSize = EarlierSize - ToRemoveSize;
- if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
+ uint64_t NewSize = DeadSize - ToRemoveSize;
+ if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
// When shortening an atomic memory intrinsic, the newly shortened
// length must remain an integer multiple of the element size.
const uint32_t ElementSize = AMI->getElementSizeInBytes();
@@ -631,65 +604,62 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
- << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
- << *EarlierWrite << "\n KILLER [" << ToRemoveStart << ", "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
+ << "\n KILLER [" << ToRemoveStart << ", "
<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
- Value *EarlierWriteLength = EarlierIntrinsic->getLength();
- Value *TrimmedLength =
- ConstantInt::get(EarlierWriteLength->getType(), NewSize);
- EarlierIntrinsic->setLength(TrimmedLength);
- EarlierIntrinsic->setDestAlignment(PrefAlign);
+ Value *DeadWriteLength = DeadIntrinsic->getLength();
+ Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
+ DeadIntrinsic->setLength(TrimmedLength);
+ DeadIntrinsic->setDestAlignment(PrefAlign);
if (!IsOverwriteEnd) {
- Value *OrigDest = EarlierIntrinsic->getRawDest();
+ Value *OrigDest = DeadIntrinsic->getRawDest();
Type *Int8PtrTy =
- Type::getInt8PtrTy(EarlierIntrinsic->getContext(),
+ Type::getInt8PtrTy(DeadIntrinsic->getContext(),
OrigDest->getType()->getPointerAddressSpace());
Value *Dest = OrigDest;
if (OrigDest->getType() != Int8PtrTy)
- Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", EarlierWrite);
+ Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
Value *Indices[1] = {
- ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)};
+ ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
- Type::getInt8Ty(EarlierIntrinsic->getContext()),
- Dest, Indices, "", EarlierWrite);
- NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
+ Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
+ NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
if (NewDestGEP->getType() != OrigDest->getType())
NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
- "", EarlierWrite);
- EarlierIntrinsic->setDest(NewDestGEP);
+ "", DeadI);
+ DeadIntrinsic->setDest(NewDestGEP);
}
- // Finally update start and size of earlier access.
+ // Finally update start and size of dead access.
if (!IsOverwriteEnd)
- EarlierStart += ToRemoveSize;
- EarlierSize = NewSize;
+ DeadStart += ToRemoveSize;
+ DeadSize = NewSize;
return true;
}
-static bool tryToShortenEnd(Instruction *EarlierWrite,
- OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart > EarlierStart &&
- // Note: "LaterStart - EarlierStart" is known to be positive due to
+ if (KillingStart > DeadStart &&
+ // Note: "KillingStart - KillingStart" is known to be positive due to
// preceding check.
- (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
- // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+ (uint64_t)(KillingStart - DeadStart) < DeadSize &&
+ // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
// be non negative due to preceding checks.
- LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, true)) {
+ KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ true)) {
IntervalMap.erase(OII);
return true;
}
@@ -697,28 +667,28 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
return false;
}
-static bool tryToShortenBegin(Instruction *EarlierWrite,
+static bool tryToShortenBegin(Instruction *DeadI,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart <= EarlierStart &&
- // Note: "EarlierStart - LaterStart" is known to be non negative due to
+ if (KillingStart <= DeadStart &&
+ // Note: "DeadStart - KillingStart" is known to be non negative due to
// preceding check.
- LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
- // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
- // positive due to preceding checks.
- assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
+ KillingSize > (uint64_t)(DeadStart - KillingStart)) {
+ // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
+ // be positive due to preceding checks.
+ assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
"Should have been handled as OW_Complete");
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, false)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ false)) {
IntervalMap.erase(OII);
return true;
}
@@ -726,71 +696,48 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
return false;
}
-static bool removePartiallyOverlappedStores(const DataLayout &DL,
- InstOverlapIntervalsTy &IOL,
- const TargetLibraryInfo &TLI) {
- bool Changed = false;
- for (auto OI : IOL) {
- Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
- assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
-
- const Value *Ptr = Loc.Ptr->stripPointerCasts();
- int64_t EarlierStart = 0;
- uint64_t EarlierSize = Loc.Size.getValue();
- GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
- OverlapIntervalsTy &IntervalMap = OI.second;
- Changed |=
- tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- if (IntervalMap.empty())
- continue;
- Changed |=
- tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- }
- return Changed;
-}
-
-static Constant *tryToMergePartialOverlappingStores(
- StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
- int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA,
- DominatorTree *DT) {
-
- if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
- Later && isa<ConstantInt>(Later->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) &&
- memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
+static Constant *
+tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
+ int64_t KillingOffset, int64_t DeadOffset,
+ const DataLayout &DL, BatchAAResults &AA,
+ DominatorTree *DT) {
+
+ if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
+ KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
+ memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
- // b) the later store is fully contained in the earlier one and
+ // b) the killing store is fully contained in the dead one and
// c) they both have a constant value
// d) none of the two stores need padding
- // Merge the two stores, replacing the earlier store's value with a
+ // Merge the two stores, replacing the dead store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
// some mem intrinsics (if needed)
- APInt EarlierValue =
- cast<ConstantInt>(Earlier->getValueOperand())->getValue();
- APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue();
- unsigned LaterBits = LaterValue.getBitWidth();
- assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
- LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+ APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
+ APInt KillingValue =
+ cast<ConstantInt>(KillingI->getValueOperand())->getValue();
+ unsigned KillingBits = KillingValue.getBitWidth();
+ assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
+ KillingValue = KillingValue.zext(DeadValue.getBitWidth());
// Offset of the smaller store inside the larger store
- unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
- unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() -
- BitOffsetDiff - LaterBits
- : BitOffsetDiff;
- APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
- LShiftAmount + LaterBits);
+ unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
+ unsigned LShiftAmount =
+ DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
+ : BitOffsetDiff;
+ APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
+ LShiftAmount + KillingBits);
// Clear the bits we'll be replacing, then OR with the smaller
// store, shifted appropriately.
- APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
- LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier
- << "\n Later: " << *Later
+ APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
+ LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
+ << "\n Killing: " << *KillingI
<< "\n Merged Value: " << Merged << '\n');
- return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged);
+ return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
}
return nullptr;
}
@@ -819,14 +766,17 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+ const TargetLibraryInfo &TLI) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
if (auto *CB = dyn_cast<CallBase>(DI))
- if (CB->onlyAccessesInaccessibleMemory())
+ if (CB->onlyAccessesInaccessibleMemory()) {
+ if (isAllocLikeFn(DI, &TLI))
+ return false;
return true;
-
+ }
// We can eliminate stores to locations not visible to the caller across
// throwing instructions.
if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +791,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D->getMemoryInst()))
+ if (isNoopIntrinsic(DI))
return true;
return false;
@@ -850,6 +800,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
struct DSEState {
Function &F;
AliasAnalysis &AA;
+ EarliestEscapeInfo EI;
/// The single BatchAA instance that is used to cache AA queries. It will
/// not be invalidated over the whole run. This is safe, because:
@@ -892,30 +843,29 @@ struct DSEState {
/// basic block.
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Class contains self-reference, make sure it's not copied/moved.
+ DSEState(const DSEState &) = delete;
+ DSEState &operator=(const DSEState &) = delete;
+
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
- : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()), LI(LI) {}
-
- static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
- DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI, const LoopInfo &LI) {
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
+ PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
for (BasicBlock *BB : post_order(&F)) {
- State.PostOrderNumbers[BB] = PO++;
+ PostOrderNumbers[BB] = PO++;
for (Instruction &I : *BB) {
MemoryAccess *MA = MSSA.getMemoryAccess(&I);
if (I.mayThrow() && !MA)
- State.ThrowingBlocks.insert(I.getParent());
+ ThrowingBlocks.insert(I.getParent());
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
- if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
- State.MemDefs.push_back(MD);
+ if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
+ (getLocForWriteEx(&I) || isMemTerminatorInst(&I)))
+ MemDefs.push_back(MD);
}
}
@@ -925,131 +875,134 @@ struct DSEState {
if (AI.hasPassPointeeByValueCopyAttr()) {
// For byval, the caller doesn't know the address of the allocation.
if (AI.hasByValAttr())
- State.InvisibleToCallerBeforeRet.insert({&AI, true});
- State.InvisibleToCallerAfterRet.insert({&AI, true});
+ InvisibleToCallerBeforeRet.insert({&AI, true});
+ InvisibleToCallerAfterRet.insert({&AI, true});
}
// Collect whether there is any irreducible control flow in the function.
- State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
- return State;
+ ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
}
- /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
- /// instruction) completely overwrites a store to the 'Earlier' location.
- /// (by \p EarlierI instruction).
- /// Return OW_MaybePartial if \p Later does not completely overwrite
- /// \p Earlier, but they both write to the same underlying object. In that
- /// case, use isPartialOverwrite to check if \p Later partially overwrites
- /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
- OverwriteResult
- isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
- const MemoryLocation &Later, const MemoryLocation &Earlier,
- int64_t &EarlierOff, int64_t &LaterOff) {
+ /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
+ /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
+ /// location (by \p DeadI instruction).
+ /// Return OW_MaybePartial if \p KillingI does not completely overwrite
+ /// \p DeadI, but they both write to the same underlying object. In that
+ /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+ /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
+ OverwriteResult isOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
+ const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t &KillingOff, int64_t &DeadOff) {
// AliasAnalysis does not always account for loops. Limit overwrite checks
- // to dependencies for which we can guarantee they are independant of any
+ // to dependencies for which we can guarantee they are independent of any
// loops they are in.
- if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
+ if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
return OW_Unknown;
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
- if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+ if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
// In case no constant size is known, try to an IR values for the number
// of bytes written and check if they match.
- const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
- const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
- if (LaterMemI && EarlierMemI) {
- const Value *LaterV = LaterMemI->getLength();
- const Value *EarlierV = EarlierMemI->getLength();
- if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later))
+ const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
+ const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
+ if (KillingMemI && DeadMemI) {
+ const Value *KillingV = KillingMemI->getLength();
+ const Value *DeadV = DeadMemI->getLength();
+ if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
return OW_Complete;
}
// Masked stores have imprecise locations, but we can reason about them
// to some extent.
- return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA);
+ return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// Query the alias information
- AliasResult AAR = BatchAA.alias(Later, Earlier);
+ AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
// If the start pointers are the same, we just have to compare sizes to see if
- // the later store was larger than the earlier store.
+ // the killing store was larger than the dead store.
if (AAR == AliasResult::MustAlias) {
- // Make sure that the Later size is >= the Earlier size.
- if (LaterSize >= EarlierSize)
+ // Make sure that the KillingSize size is >= the DeadSize size.
+ if (KillingSize >= DeadSize)
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
int32_t Off = AAR.getOffset();
- if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
+ if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
return OW_Complete;
}
- // Check to see if the later store is to the entire object (either a global,
- // an alloca, or a byval/inalloca argument). If so, then it clearly
+ // Check to see if the killing store is to the entire object (either a
+ // global, an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
- const Value *P1 = Earlier.Ptr->stripPointerCasts();
- const Value *P2 = Later.Ptr->stripPointerCasts();
- const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+ const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
+ const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
+ const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
+ const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
- if (UO1 != UO2)
+ if (DeadUndObj != KillingUndObj)
return OW_Unknown;
- // If the "Later" store is to a recognizable object, get its size.
- uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F);
- if (ObjectSize != MemoryLocation::UnknownSize)
- if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
+ // If the KillingI store is to a recognizable object, get its size.
+ uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
+ if (KillingUndObjSize != MemoryLocation::UnknownSize)
+ if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize)
return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- EarlierOff = 0;
- LaterOff = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
-
- // If the base pointers still differ, we have two completely different stores.
- if (BP1 != BP2)
+ DeadOff = 0;
+ KillingOff = 0;
+ const Value *DeadBasePtr =
+ GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
+ const Value *KillingBasePtr =
+ GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
+
+ // If the base pointers still differ, we have two completely different
+ // stores.
+ if (DeadBasePtr != KillingBasePtr)
return OW_Unknown;
- // The later access completely overlaps the earlier store if and only if
- // both start and end of the earlier one is "inside" the later one:
- // |<->|--earlier--|<->|
- // |-------later-------|
+ // The killing access completely overlaps the dead store if and only if
+ // both start and end of the dead one is "inside" the killing one:
+ // |<->|--dead--|<->|
+ // |-----killing------|
// Accesses may overlap if and only if start of one of them is "inside"
// another one:
- // |<->|--earlier--|<----->|
- // |-------later-------|
+ // |<->|--dead--|<-------->|
+ // |-------killing--------|
// OR
- // |----- earlier -----|
- // |<->|---later---|<----->|
+ // |-------dead-------|
+ // |<->|---killing---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
- // Check if the earlier access starts "not before" the later one.
- if (EarlierOff >= LaterOff) {
- // If the earlier access ends "not after" the later access then the earlier
- // one is completely overwritten by the later one.
- if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+ // Check if the dead access starts "not before" the killing one.
+ if (DeadOff >= KillingOff) {
+ // If the dead access ends "not after" the killing access then the
+ // dead one is completely overwritten by the killing one.
+ if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
return OW_Complete;
- // If start of the earlier access is "before" end of the later access then
- // accesses overlap.
- else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+ // If start of the dead access is "before" end of the killing access
+ // then accesses overlap.
+ else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
return OW_MaybePartial;
}
- // If start of the later access is "before" end of the earlier access then
+ // If start of the killing access is "before" end of the dead access then
// accesses overlap.
- else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+ else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
return OW_MaybePartial;
}
@@ -1106,8 +1059,13 @@ struct DSEState {
LibFunc LF;
if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
switch (LF) {
- case LibFunc_strcpy:
case LibFunc_strncpy:
+ if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
+ return MemoryLocation(CB->getArgOperand(0),
+ LocationSize::precise(Len->getZExtValue()),
+ CB->getAAMetadata());
+ LLVM_FALLTHROUGH;
+ case LibFunc_strcpy:
case LibFunc_strcat:
case LibFunc_strncat:
return {MemoryLocation::getAfter(CB->getArgOperand(0))};
@@ -1145,8 +1103,8 @@ struct DSEState {
int64_t InstWriteOffset, DepWriteOffset;
if (auto CC = getLocForWriteEx(UseInst))
- return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
return false;
}
@@ -1248,9 +1206,10 @@ struct DSEState {
const Value *LocUO = getUnderlyingObject(Loc.Ptr);
return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
- int64_t InstWriteOffset, DepWriteOffset;
- return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
@@ -1270,10 +1229,6 @@ struct DSEState {
if (CB->onlyAccessesInaccessibleMemory())
return false;
- // NOTE: For calls, the number of stores removed could be slightly improved
- // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
- // be expensive compared to the benefits in practice. For now, avoid more
- // expensive analysis to limit compile-time.
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
@@ -1329,15 +1284,15 @@ struct DSEState {
return IsGuaranteedLoopInvariantBase(Ptr);
}
- // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
- // no read access between them or on any other path to a function exit block
- // if \p DefLoc is not accessible after the function returns. If there is no
- // such MemoryDef, return None. The returned value may not (completely)
- // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
- // MemoryUse (read).
+ // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
+ // with no read access between them or on any other path to a function exit
+ // block if \p KillingLoc is not accessible after the function returns. If
+ // there is no such MemoryDef, return None. The returned value may not
+ // (completely) overwrite \p KillingLoc. Currently we bail out when we
+ // encounter an aliasing MemoryUse (read).
Optional<MemoryAccess *>
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
- const MemoryLocation &DefLoc, const Value *DefUO,
+ const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
bool IsMemTerm, unsigned &PartialLimit) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
@@ -1389,19 +1344,20 @@ struct DSEState {
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
Instruction *CurrentI = CurrentDef->getMemoryInst();
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
+ TLI))
continue;
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
- if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+ if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
}
// Check for anything that looks like it will be a barrier to further
// removal
- if (isDSEBarrier(DefUO, CurrentI)) {
+ if (isDSEBarrier(KillingUndObj, CurrentI)) {
LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
return None;
}
@@ -1410,14 +1366,14 @@ struct DSEState {
// clobber, bail out, as the path is not profitable. We skip this check
// for intrinsic calls, because the code knows how to handle memcpy
// intrinsics.
- if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+ if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
return None;
// Quick check if there are direct uses that are read-clobbers.
- if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+ if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
return !MSSA.dominates(StartAccess, UseOrDef) &&
- isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+ isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
return false;
})) {
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
@@ -1450,9 +1406,10 @@ struct DSEState {
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
continue;
} else {
- int64_t InstWriteOffset, DepWriteOffset;
- auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
- DepWriteOffset, InstWriteOffset);
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
+ KillingOffset, DeadOffset);
// If Current does not write to the same object as KillingDef, check
// the next candidate.
if (OR == OW_Unknown)
@@ -1473,30 +1430,25 @@ struct DSEState {
};
// Accesses to objects accessible after the function returns can only be
- // eliminated if the access is killed along all paths to the exit. Collect
+ // eliminated if the access is dead along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from EarlierAccess to any function exit.
+ // they cover all paths from MaybeDeadAccess to any function exit.
SmallPtrSet<Instruction *, 16> KillingDefs;
KillingDefs.insert(KillingDef->getMemoryInst());
- MemoryAccess *EarlierAccess = Current;
- Instruction *EarlierMemInst =
- cast<MemoryDef>(EarlierAccess)->getMemoryInst();
- LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
- << *EarlierMemInst << ")\n");
+ MemoryAccess *MaybeDeadAccess = Current;
+ MemoryLocation MaybeDeadLoc = *CurrentLoc;
+ Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
+ << *MaybeDeadI << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(EarlierAccess);
-
- // Optimistically collect all accesses for reads. If we do not find any
- // read clobbers, add them to the cache.
- SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
- if (!EarlierMemInst->mayReadFromMemory())
- KnownNoReads.insert(EarlierAccess);
- // Check if EarlierDef may be read.
+ PushMemUses(MaybeDeadAccess);
+
+ // Check if DeadDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
@@ -1508,7 +1460,6 @@ struct DSEState {
}
--ScanLimit;
NumDomMemDefChecks++;
- KnownNoReads.insert(UseAccess);
if (isa<MemoryPhi>(UseAccess)) {
if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
@@ -1535,7 +1486,7 @@ struct DSEState {
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
- if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
LLVM_DEBUG(
dbgs()
<< " ... skipping, memterminator invalidates following accesses\n");
@@ -1548,14 +1499,14 @@ struct DSEState {
continue;
}
- if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
return None;
}
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(*CurrentLoc, UseInst)) {
+ if (isReadClobber(MaybeDeadLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
@@ -1563,16 +1514,16 @@ struct DSEState {
// If this worklist walks back to the original memory access (and the
// pointer is not guarenteed loop invariant) then we cannot assume that a
// store kills itself.
- if (EarlierAccess == UseAccess &&
- !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) {
+ if (MaybeDeadAccess == UseAccess &&
+ !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
return None;
}
- // Otherwise, for the KillingDef and EarlierAccess we only have to check
+ // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
// if it reads the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
+ if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -1581,18 +1532,18 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
+ // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
- if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
LLVM_DEBUG(dbgs()
<< " ... found killing def " << *UseInst << "\n");
KillingDefs.insert(UseInst);
@@ -1608,9 +1559,9 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from
- // EarlierAccess to the exit.
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ // that the location is dead (=overwritten) along all paths from
+ // MaybeDeadAccess to the exit.
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
for (Instruction *KD : KillingDefs)
KillingBlocks.insert(KD->getParent());
@@ -1619,25 +1570,24 @@ struct DSEState {
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
- for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
- I != E; I++) {
+ for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
if (!CommonPred)
break;
- CommonPred = PDT.findNearestCommonDominator(CommonPred, *I);
+ CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates EarlierAccess.
+ // post-dominates MaybeDeadAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
- return {EarlierAccess};
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
+ return {MaybeDeadAccess};
return None;
}
- // If the common post-dominator does not post-dominate EarlierAccess,
- // there is a path from EarlierAccess to an exit not going through a
+ // If the common post-dominator does not post-dominate MaybeDeadAccess,
+ // there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
// If CommonPred is null, there are multiple exits from the function.
@@ -1650,16 +1600,16 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching EarlierAccess.
+ // killing blocks before reaching MaybeDeadAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == EarlierAccess->getBlock())
+ if (Current == MaybeDeadAccess->getBlock())
return None;
- // EarlierAccess is reachable from the entry, so we don't have to
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
// explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
@@ -1671,14 +1621,14 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
return None;
}
- // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
// potentially dead.
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
// Delete dead memory defs
@@ -1701,6 +1651,7 @@ struct DSEState {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
}
+
Updater.removeMemoryAccess(MA);
}
@@ -1715,47 +1666,49 @@ struct DSEState {
NowDeadInsts.push_back(OpI);
}
+ EI.removeInstruction(DeadInst);
DeadInst->eraseFromParent();
}
}
- // Check for any extra throws between SI and NI that block DSE. This only
- // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
- // throw are handled during the walk from one def to the next.
- bool mayThrowBetween(Instruction *SI, Instruction *NI,
- const Value *SILocUnd) {
- // First see if we can ignore it by using the fact that SI is an
+ // Check for any extra throws between \p KillingI and \p DeadI that block
+ // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
+ // MemoryDef that may throw are handled during the walk from one def to the
+ // next.
+ bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
+ const Value *KillingUndObj) {
+ // First see if we can ignore it by using the fact that KillingI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
- if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
+ if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
return false;
- if (SI->getParent() == NI->getParent())
- return ThrowingBlocks.count(SI->getParent());
+ if (KillingI->getParent() == DeadI->getParent())
+ return ThrowingBlocks.count(KillingI->getParent());
return !ThrowingBlocks.empty();
}
- // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
- // act as barriers:
- // * A memory instruction that may throw and \p SI accesses a non-stack
+ // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
+ // instructions act as barriers:
+ // * A memory instruction that may throw and \p KillingI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
- bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
- // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
- // like object that does not escape.
- if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
+ bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
+ // If DeadI may throw it acts as a barrier, unless we are to an
+ // alloca/alloca like object that does not escape.
+ if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
return true;
- // If NI is an atomic load/store stronger than monotonic, do not try to
+ // If DeadI is an atomic load/store stronger than monotonic, do not try to
// eliminate/reorder it.
- if (NI->isAtomic()) {
- if (auto *LI = dyn_cast<LoadInst>(NI))
+ if (DeadI->isAtomic()) {
+ if (auto *LI = dyn_cast<LoadInst>(DeadI))
return isStrongerThanMonotonic(LI->getOrdering());
- if (auto *SI = dyn_cast<StoreInst>(NI))
+ if (auto *SI = dyn_cast<StoreInst>(DeadI))
return isStrongerThanMonotonic(SI->getOrdering());
- if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+ if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
return isStrongerThanMonotonic(ARMW->getOrdering());
- if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+ if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
@@ -1776,7 +1729,6 @@ struct DSEState {
continue;
Instruction *DefI = Def->getMemoryInst();
- SmallVector<const Value *, 4> Pointers;
auto DefLoc = getLocForWriteEx(DefI);
if (!DefLoc)
continue;
@@ -1787,7 +1739,7 @@ struct DSEState {
// uncommon. If it turns out to be important, we can use
// getUnderlyingObjects here instead.
const Value *UO = getUnderlyingObject(DefLoc->Ptr);
- if (!UO || !isInvisibleToCallerAfterRet(UO))
+ if (!isInvisibleToCallerAfterRet(UO))
continue;
if (isWriteAtEndOfFunction(Def)) {
@@ -1804,8 +1756,7 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
- bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
- const Value *DefUO) {
+ bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
Constant *StoredConstant = nullptr;
@@ -1816,13 +1767,78 @@ struct DSEState {
if (StoredConstant && StoredConstant->isNullValue()) {
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
- if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
- auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
- // If UnderlyingDef is the clobbering access of Def, no instructions
- // between them can modify the memory location.
- auto *ClobberDef =
- MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
- return UnderlyingDef == ClobberDef;
+ if (DefUOInst) {
+ if (isCallocLikeFn(DefUOInst, &TLI)) {
+ auto *UnderlyingDef =
+ cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+ // If UnderlyingDef is the clobbering access of Def, no instructions
+ // between them can modify the memory location.
+ auto *ClobberDef =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+ return UnderlyingDef == ClobberDef;
+ }
+
+ if (MemSet) {
+ if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.getName() == "calloc")
+ return false;
+ auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+ if (!Malloc)
+ return false;
+ auto *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return false;
+ LibFunc Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc_malloc)
+ return false;
+
+ auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+ // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
+ // of malloc block
+ auto *MallocBB = Malloc->getParent(),
+ *MemsetBB = Memset->getParent();
+ if (MallocBB == MemsetBB)
+ return true;
+ auto *Ptr = Memset->getArgOperand(0);
+ auto *TI = MallocBB->getTerminator();
+ ICmpInst::Predicate Pred;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
+ FalseBB)))
+ return false;
+ if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
+ return false;
+ return true;
+ };
+
+ if (Malloc->getOperand(0) == MemSet->getLength()) {
+ if (shouldCreateCalloc(Malloc, MemSet) &&
+ DT.dominates(Malloc, MemSet) &&
+ memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+ IRBuilder<> IRB(Malloc);
+ const auto &DL = Malloc->getModule()->getDataLayout();
+ if (auto *Calloc =
+ emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+ Malloc->getArgOperand(0), IRB, TLI)) {
+ MemorySSAUpdater Updater(&MSSA);
+ auto *LastDef = cast<MemoryDef>(
+ Updater.getMemorySSA()->getMemoryAccess(Malloc));
+ auto *NewAccess = Updater.createMemoryAccessAfter(
+ cast<Instruction>(Calloc), LastDef, LastDef);
+ auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+ Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+ Updater.removeMemoryAccess(Malloc);
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+ }
+ }
}
}
@@ -1875,6 +1891,76 @@ struct DSEState {
return false;
}
+
+ bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+ bool Changed = false;
+ for (auto OI : IOL) {
+ Instruction *DeadI = OI.first;
+ MemoryLocation Loc = *getLocForWriteEx(DeadI);
+ assert(isRemovable(DeadI) && "Expect only removable instruction");
+
+ const Value *Ptr = Loc.Ptr->stripPointerCasts();
+ int64_t DeadStart = 0;
+ uint64_t DeadSize = Loc.Size.getValue();
+ GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
+ OverlapIntervalsTy &IntervalMap = OI.second;
+ Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+ if (IntervalMap.empty())
+ continue;
+ Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+ }
+ return Changed;
+ }
+
+ /// Eliminates writes to locations where the value that is being written
+ /// is already stored at the same location.
+ bool eliminateRedundantStoresOfExistingValues() {
+ bool MadeChange = false;
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
+ "already existing value\n");
+ for (auto *Def : MemDefs) {
+ if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
+ !isRemovable(Def->getMemoryInst()))
+ continue;
+ auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
+ continue;
+
+ Instruction *DefInst = Def->getMemoryInst();
+ Instruction *UpperInst = UpperDef->getMemoryInst();
+ auto IsRedundantStore = [this, DefInst,
+ UpperInst](MemoryLocation UpperLoc) {
+ if (DefInst->isIdenticalTo(UpperInst))
+ return true;
+ if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
+ if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
+ auto MaybeDefLoc = getLocForWriteEx(DefInst);
+ if (!MaybeDefLoc)
+ return false;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
+ InstWriteOffset, DepWriteOffset);
+ Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
+ return StoredByte && StoredByte == MemSetI->getOperand(1) &&
+ OR == OW_Complete;
+ }
+ }
+ return false;
+ };
+
+ auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
+ if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
+ isReadClobber(*MaybeUpperLoc, DefInst))
+ continue;
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
+ << '\n');
+ deleteDeadInstruction(DefInst);
+ NumRedundantStores++;
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
};
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -1883,68 +1969,64 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
if (State.SkipStores.count(KillingDef))
continue;
- Instruction *SI = KillingDef->getMemoryInst();
+ Instruction *KillingI = KillingDef->getMemoryInst();
- Optional<MemoryLocation> MaybeSILoc;
- if (State.isMemTerminatorInst(SI))
- MaybeSILoc = State.getLocForTerminator(SI).map(
+ Optional<MemoryLocation> MaybeKillingLoc;
+ if (State.isMemTerminatorInst(KillingI))
+ MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
else
- MaybeSILoc = State.getLocForWriteEx(SI);
+ MaybeKillingLoc = State.getLocForWriteEx(KillingI);
- if (!MaybeSILoc) {
+ if (!MaybeKillingLoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
- << *SI << "\n");
+ << *KillingI << "\n");
continue;
}
- MemoryLocation SILoc = *MaybeSILoc;
- assert(SILoc.Ptr && "SILoc should not be null");
- const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
-
- MemoryAccess *Current = KillingDef;
+ MemoryLocation KillingLoc = *MaybeKillingLoc;
+ assert(KillingLoc.Ptr && "KillingLoc should not be null");
+ const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
- << *Current << " (" << *SI << ")\n");
+ << *KillingDef << " (" << *KillingI << ")\n");
unsigned ScanLimit = MemorySSAScanLimit;
unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
-
- if (SILocUnd)
- ToCheck.insert(KillingDef->getDefiningAccess());
+ ToCheck.insert(KillingDef->getDefiningAccess());
bool Shortend = false;
- bool IsMemTerm = State.isMemTerminatorInst(SI);
+ bool IsMemTerm = State.isMemTerminatorInst(KillingI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
- Current = ToCheck[I];
+ MemoryAccess *Current = ToCheck[I];
if (State.SkipStores.count(Current))
continue;
- Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
- IsMemTerm, PartialLimit);
+ Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
+ KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
+ WalkerStepLimit, IsMemTerm, PartialLimit);
- if (!Next) {
+ if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *EarlierAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
- if (isa<MemoryPhi>(EarlierAccess)) {
+ MemoryAccess *DeadAccess = *MaybeDeadAccess;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
+ if (isa<MemoryPhi>(DeadAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = EarlierAccess->getBlock();
+ BasicBlock *PhiBlock = DeadAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -1955,72 +2037,73 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
continue;
}
- auto *NextDef = cast<MemoryDef>(EarlierAccess);
- Instruction *NI = NextDef->getMemoryInst();
- LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
- ToCheck.insert(NextDef->getDefiningAccess());
+ auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
+ Instruction *DeadI = DeadDefAccess->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
+ ToCheck.insert(DeadDefAccess->getDefiningAccess());
NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
- MemoryLocation NILoc = *State.getLocForWriteEx(NI);
+ MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
if (IsMemTerm) {
- const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
- if (SILocUnd != NIUnd)
+ const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
+ if (KillingUndObj != DeadUndObj)
continue;
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
} else {
- // Check if NI overwrites SI.
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
- DepWriteOffset, InstWriteOffset);
+ // Check if DeadI overwrites KillingI.
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ OverwriteResult OR = State.isOverwrite(
+ KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
if (OR == OW_MaybePartial) {
auto Iter = State.IOLs.insert(
std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
- NI->getParent(), InstOverlapIntervalsTy()));
+ DeadI->getParent(), InstOverlapIntervalsTy()));
auto &IOL = Iter.first->second;
- OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
- NI, IOL);
+ OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
+ DeadOffset, DeadI, IOL);
}
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
- auto *Earlier = dyn_cast<StoreInst>(NI);
- auto *Later = dyn_cast<StoreInst>(SI);
+ auto *DeadSI = dyn_cast<StoreInst>(DeadI);
+ auto *KillingSI = dyn_cast<StoreInst>(KillingI);
// We are re-using tryToMergePartialOverlappingStores, which requires
- // Earlier to domiante Later.
+ // DeadSI to dominate DeadSI.
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
- if (Earlier && Later && DT.dominates(Earlier, Later)) {
+ if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
if (Constant *Merged = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+ KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
State.BatchAA, &DT)) {
// Update stored value of earlier store to merged constant.
- Earlier->setOperand(0, Merged);
+ DeadSI->setOperand(0, Merged);
++NumModifiedStores;
MadeChange = true;
Shortend = true;
- // Remove later store and remove any outstanding overlap intervals
- // for the updated store.
- State.deleteDeadInstruction(Later);
- auto I = State.IOLs.find(Earlier->getParent());
+ // Remove killing store and remove any outstanding overlap
+ // intervals for the updated store.
+ State.deleteDeadInstruction(KillingSI);
+ auto I = State.IOLs.find(DeadSI->getParent());
if (I != State.IOLs.end())
- I->second.erase(Earlier);
+ I->second.erase(DeadSI);
break;
}
}
}
if (OR == OW_Complete) {
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
}
@@ -2028,10 +2111,11 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
// Check if the store is a no-op.
- if (!Shortend && isRemovable(SI) &&
- State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
- State.deleteDeadInstruction(SI);
+ if (!Shortend && isRemovable(KillingI) &&
+ State.storeIsNoop(KillingDef, KillingUndObj)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
+ << '\n');
+ State.deleteDeadInstruction(KillingI);
NumRedundantStores++;
MadeChange = true;
continue;
@@ -2040,8 +2124,9 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
- MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
+ MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+ MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index c77769368ede..66c9d9f0902a 100644
--- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -272,9 +272,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (PredBB && IsSafeToHoist(RemInst, RemBB) &&
IsSafeToHoist(DivInst, DivBB) &&
- llvm::all_of(successors(PredBB), [&](BasicBlock *BB) {
- return BB == DivBB || BB == RemBB;
- })) {
+ all_of(successors(PredBB),
+ [&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) &&
+ all_of(predecessors(DivBB),
+ [&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) {
DivDominates = true;
DivInst->moveBefore(PredBB->getTerminator());
Changed = true;
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 978c6a77b8dc..90f71f7729a7 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -293,7 +293,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
// TODO: Extend this to handle intrinsics with >2 operands where the 1st
// 2 operands are commutative.
auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
+ if (II && II->isCommutative() && II->arg_size() == 2) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
if (LHS > RHS)
std::swap(LHS, RHS);
@@ -363,7 +363,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
auto *LII = dyn_cast<IntrinsicInst>(LHSI);
auto *RII = dyn_cast<IntrinsicInst>(RHSI);
if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
- LII->isCommutative() && LII->getNumArgOperands() == 2) {
+ LII->isCommutative() && LII->arg_size() == 2) {
return LII->getArgOperand(0) == RII->getArgOperand(1) &&
LII->getArgOperand(1) == RII->getArgOperand(0);
}
@@ -1265,6 +1265,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
+ if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
+ continue;
+ }
+
// We can skip all invariant.start intrinsics since they only read memory,
// and we can forward values across it. For invariant starts without
// invariant ends, we can use the fact that the invariantness never ends to
@@ -1642,6 +1648,16 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
return PA;
}
+void EarlyCSEPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (UseMemorySSA)
+ OS << "memssa";
+ OS << ">";
+}
+
namespace {
/// A simple and fast domtree-based CSE pass.
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 8a5d4f568774..a98bb8358aef 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -256,7 +256,7 @@ void Float2IntPass::walkForwards() {
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 1 && "FNeg is a unary operator!");
unsigned Size = Ops[0].getBitWidth();
- auto Zero = ConstantRange(APInt::getNullValue(Size));
+ auto Zero = ConstantRange(APInt::getZero(Size));
return Zero.sub(Ops[0]);
};
break;
@@ -372,7 +372,7 @@ bool Float2IntPass::validateAndTransform() {
// If it does, transformation would be illegal.
//
// Don't count the roots, as they terminate the graphs.
- if (Roots.count(I) == 0) {
+ if (!Roots.contains(I)) {
// Set the type of the conversion while we're here.
if (!ConvertedToTy)
ConvertedToTy = I->getType();
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 16368aec7c3f..00506fb86006 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -126,7 +126,7 @@ static cl::opt<uint32_t> MaxBBSpeculations(
"into) when deducing if a value is fully available or not in GVN "
"(default = 600)"));
-struct llvm::GVN::Expression {
+struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
Type *type = nullptr;
@@ -155,17 +155,18 @@ struct llvm::GVN::Expression {
namespace llvm {
-template <> struct DenseMapInfo<GVN::Expression> {
- static inline GVN::Expression getEmptyKey() { return ~0U; }
- static inline GVN::Expression getTombstoneKey() { return ~1U; }
+template <> struct DenseMapInfo<GVNPass::Expression> {
+ static inline GVNPass::Expression getEmptyKey() { return ~0U; }
+ static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
- static unsigned getHashValue(const GVN::Expression &e) {
+ static unsigned getHashValue(const GVNPass::Expression &e) {
using llvm::hash_value;
return static_cast<unsigned>(hash_value(e));
}
- static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
+ static bool isEqual(const GVNPass::Expression &LHS,
+ const GVNPass::Expression &RHS) {
return LHS == RHS;
}
};
@@ -246,7 +247,7 @@ struct llvm::gvn::AvailableValue {
/// Emit code at the specified insertion point to adjust the value defined
/// here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
- GVN &gvn) const;
+ GVNPass &gvn) const;
};
/// Represents an AvailableValue which can be rematerialized at the end of
@@ -276,7 +277,7 @@ struct llvm::gvn::AvailableValueInBlock {
/// Emit code at the end of this block to adjust the value defined here to
/// the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(LoadInst *Load, GVN &gvn) const {
+ Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
}
};
@@ -285,7 +286,7 @@ struct llvm::gvn::AvailableValueInBlock {
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
+GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
Expression e;
e.type = I->getType();
e.opcode = I->getOpcode();
@@ -330,9 +331,8 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
return e;
}
-GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
+ unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
"Not a comparison!");
Expression e;
@@ -350,7 +350,8 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
return e;
}
-GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
+GVNPass::Expression
+GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
@@ -382,20 +383,21 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
// ValueTable External Functions
//===----------------------------------------------------------------------===//
-GVN::ValueTable::ValueTable() = default;
-GVN::ValueTable::ValueTable(const ValueTable &) = default;
-GVN::ValueTable::ValueTable(ValueTable &&) = default;
-GVN::ValueTable::~ValueTable() = default;
-GVN::ValueTable &GVN::ValueTable::operator=(const GVN::ValueTable &Arg) = default;
+GVNPass::ValueTable::ValueTable() = default;
+GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
+GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
+GVNPass::ValueTable::~ValueTable() = default;
+GVNPass::ValueTable &
+GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
/// add - Insert a value into the table with a specified value number.
-void GVN::ValueTable::add(Value *V, uint32_t num) {
+void GVNPass::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
if (PHINode *PN = dyn_cast<PHINode>(V))
NumberingPhi[num] = PN;
}
-uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
+uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
uint32_t e = assignExpNewValueNum(exp).first;
@@ -421,13 +423,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
// a normal load or store instruction.
CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
- if (!local_cdep ||
- local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
@@ -477,11 +478,11 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
return nextValueNumber++;
}
- if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (cdep->arg_size() != C->arg_size()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
@@ -500,11 +501,13 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
}
/// Returns true if a value number exists for the specified value.
-bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+bool GVNPass::ValueTable::exists(Value *V) const {
+ return valueNumbering.count(V) != 0;
+}
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
-uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
+uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
if (VI != valueNumbering.end())
return VI->second;
@@ -581,7 +584,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
if (Verify) {
assert(VI != valueNumbering.end() && "Value not numbered?");
@@ -594,15 +597,15 @@ uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
/// assigning it a new number if it did not have one before. Useful when
/// we deduced the result of a comparison, but don't immediately have an
/// instruction realizing that comparison to hand.
-uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
-void GVN::ValueTable::clear() {
+void GVNPass::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
NumberingPhi.clear();
@@ -614,7 +617,7 @@ void GVN::ValueTable::clear() {
}
/// Remove a value from the value numbering.
-void GVN::ValueTable::erase(Value *V) {
+void GVNPass::ValueTable::erase(Value *V) {
uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
// If V is PHINode, V <--> value number is an one-to-one mapping.
@@ -624,7 +627,7 @@ void GVN::ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
-void GVN::ValueTable::verifyRemoved(const Value *V) const {
+void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
@@ -635,28 +638,28 @@ void GVN::ValueTable::verifyRemoved(const Value *V) const {
// GVN Pass
//===----------------------------------------------------------------------===//
-bool GVN::isPREEnabled() const {
+bool GVNPass::isPREEnabled() const {
return Options.AllowPRE.getValueOr(GVNEnablePRE);
}
-bool GVN::isLoadPREEnabled() const {
+bool GVNPass::isLoadPREEnabled() const {
return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE);
}
-bool GVN::isLoadInLoopPREEnabled() const {
+bool GVNPass::isLoadInLoopPREEnabled() const {
return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
}
-bool GVN::isLoadPRESplitBackedgeEnabled() const {
+bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
return Options.AllowLoadPRESplitBackedge.getValueOr(
GVNEnableSplitBackedgeInLoadPRE);
}
-bool GVN::isMemDepEnabled() const {
+bool GVNPass::isMemDepEnabled() const {
return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
}
-PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
// FIXME: The order of evaluation of these 'getResult' calls is very
// significant! Re-ordering these variables will cause GVN when run alone to
// be less effective! We should fix memdep and basic-aa to not exhibit this
@@ -684,8 +687,26 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
return PA;
}
+void GVNPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ if (Options.AllowPRE != None)
+ OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;";
+ if (Options.AllowLoadPRE != None)
+ OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;";
+ if (Options.AllowLoadPRESplitBackedge != None)
+ OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-")
+ << "split-backedge-load-pre;";
+ if (Options.AllowMemDep != None)
+ OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep";
+ OS << ">";
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
+LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
errs() << "{\n";
for (auto &I : d) {
errs() << I.first << "\n";
@@ -835,7 +856,7 @@ static bool IsValueFullyAvailableInBlock(
static Value *
ConstructSSAForLoadSet(LoadInst *Load,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
- GVN &gvn) {
+ GVNPass &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
@@ -878,7 +899,7 @@ ConstructSSAForLoadSet(LoadInst *Load,
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
Instruction *InsertPt,
- GVN &gvn) const {
+ GVNPass &gvn) const {
Value *Res;
Type *LoadTy = Load->getType();
const DataLayout &DL = Load->getModule()->getDataLayout();
@@ -1002,8 +1023,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
ORE->emit(R);
}
-bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
- Value *Address, AvailableValue &Res) {
+bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
+ Value *Address, AvailableValue &Res) {
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1137,9 +1158,9 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
return false;
}
-void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
- AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
@@ -1182,7 +1203,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
"post condition violation");
}
-void GVN::eliminatePartiallyRedundantLoad(
+void GVNPass::eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
MapVector<BasicBlock *, Value *> &AvailableLoads) {
for (const auto &AvailableLoad : AvailableLoads) {
@@ -1212,8 +1233,7 @@ void GVN::eliminatePartiallyRedundantLoad(
}
// Transfer the old load's AA tags to the new load.
- AAMDNodes Tags;
- Load->getAAMetadata(Tags);
+ AAMDNodes Tags = Load->getAAMetadata();
if (Tags)
NewLoad->setAAMetadata(Tags);
@@ -1257,8 +1277,8 @@ void GVN::eliminatePartiallyRedundantLoad(
});
}
-bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1498,8 +1518,9 @@ bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return true;
}
-bool GVN::performLoopLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::performLoopLoadPRE(LoadInst *Load,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
if (!LI)
return false;
@@ -1590,7 +1611,7 @@ static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *Load) {
+bool GVNPass::processNonLocalLoad(LoadInst *Load) {
// non-local speculations are not allowed under asan.
if (Load->getParent()->getParent()->hasFnAttribute(
Attribute::SanitizeAddress) ||
@@ -1622,10 +1643,8 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
// If this load follows a GEP, see if we can PRE the indices before analyzing.
if (GetElementPtrInst *GEP =
dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
- for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
- OE = GEP->idx_end();
- OI != OE; ++OI)
- if (Instruction *I = dyn_cast<Instruction>(OI->get()))
+ for (Use &U : GEP->indices())
+ if (Instruction *I = dyn_cast<Instruction>(U.get()))
Changed |= performScalarPRE(I);
}
@@ -1673,8 +1692,11 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent()))
return Changed;
- return Changed || PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
- performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks);
+ if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
+ PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
+ return true;
+
+ return Changed;
}
static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
@@ -1738,7 +1760,7 @@ static bool hasUsersIn(Value *V, BasicBlock *BB) {
return false;
}
-bool GVN::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
+bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
Value *V = IntrinsicI->getArgOperand(0);
if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
@@ -1882,7 +1904,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
/// Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L) {
+bool GVNPass::processLoad(LoadInst *L) {
if (!MD)
return false;
@@ -1936,7 +1958,7 @@ bool GVN::processLoad(LoadInst *L) {
/// Return a pair the first field showing the value number of \p Exp and the
/// second field showing whether it is a value number newly created.
std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
uint32_t &e = expressionNumbering[Exp];
bool CreateNewValNum = !e;
if (CreateNewValNum) {
@@ -1951,8 +1973,8 @@ GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
/// Return whether all the values related with the same \p num are
/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
- GVN &Gvn) {
+bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVNPass &Gvn) {
LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
while (Vals && Vals->BB == BB)
Vals = Vals->Next;
@@ -1960,9 +1982,9 @@ bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
}
/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
- const BasicBlock *PhiBlock, uint32_t Num,
- GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVNPass &Gvn) {
auto FindRes = PhiTranslateTable.find({Num, Pred});
if (FindRes != PhiTranslateTable.end())
return FindRes->second;
@@ -1973,9 +1995,10 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
// Return true if the value number \p Num and NewNum have equal value.
// Return false if the result is unknown.
-bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
- const BasicBlock *Pred,
- const BasicBlock *PhiBlock, GVN &Gvn) {
+bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
+ const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ GVNPass &Gvn) {
CallInst *Call = nullptr;
LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
while (Vals) {
@@ -2008,9 +2031,9 @@ bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
/// Translate value number \p Num using phis, so that it has the values of
/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
- const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVNPass &Gvn) {
if (PHINode *PN = NumberingPhi[Num]) {
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
@@ -2063,8 +2086,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
/// again.
-void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
- const BasicBlock &CurrBlock) {
+void GVNPass::ValueTable::eraseTranslateCacheEntry(
+ uint32_t Num, const BasicBlock &CurrBlock) {
for (const BasicBlock *Pred : predecessors(&CurrBlock))
PhiTranslateTable.erase({Num, Pred});
}
@@ -2074,7 +2097,7 @@ void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
+Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return nullptr;
@@ -2113,7 +2136,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
-void GVN::assignBlockRPONumber(Function &F) {
+void GVNPass::assignBlockRPONumber(Function &F) {
BlockRPONumber.clear();
uint32_t NextBlockNumber = 1;
ReversePostOrderTraversal<Function *> RPOT(&F);
@@ -2122,7 +2145,7 @@ void GVN::assignBlockRPONumber(Function &F) {
InvalidBlockRPONumbers = false;
}
-bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
+bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
bool Changed = false;
for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
Value *Operand = Instr->getOperand(OpNum);
@@ -2142,8 +2165,9 @@ bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
/// 'RHS' everywhere in the scope. Returns whether a change was made.
/// If DominatesByEdge is false, then it means that we will propagate the RHS
/// value starting from the end of Root.Start.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
- bool DominatesByEdge) {
+bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root,
+ bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2291,7 +2315,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
/// When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I) {
+bool GVNPass::processInstruction(Instruction *I) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
@@ -2432,10 +2456,10 @@ bool GVN::processInstruction(Instruction *I) {
}
/// runOnFunction - This is the main transformation entry point for a function.
-bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
- const TargetLibraryInfo &RunTLI, AAResults &RunAA,
- MemoryDependenceResults *RunMD, LoopInfo *LI,
- OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
+bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
+ const TargetLibraryInfo &RunTLI, AAResults &RunAA,
+ MemoryDependenceResults *RunMD, LoopInfo *LI,
+ OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
AC = &RunAC;
DT = &RunDT;
VN.setDomTree(DT);
@@ -2457,10 +2481,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = &*FI++;
-
- bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+ bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -2502,7 +2524,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
return Changed;
}
-bool GVN::processBlock(BasicBlock *BB) {
+bool GVNPass::processBlock(BasicBlock *BB) {
// FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
@@ -2563,8 +2585,8 @@ bool GVN::processBlock(BasicBlock *BB) {
}
// Instantiate an expression in a predecessor that lacked it.
-bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- BasicBlock *Curr, unsigned int ValNo) {
+bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+ BasicBlock *Curr, unsigned int ValNo) {
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't originally present will have been instantiated earlier
@@ -2612,7 +2634,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
return true;
}
-bool GVN::performScalarPRE(Instruction *CurInst) {
+bool GVNPass::performScalarPRE(Instruction *CurInst) {
if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
@@ -2797,7 +2819,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
/// Perform a purely local form of PRE that looks for diamond
/// control flow patterns and attempts to perform simple PRE at the join point.
-bool GVN::performPRE(Function &F) {
+bool GVNPass::performPRE(Function &F) {
bool Changed = false;
for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
@@ -2824,7 +2846,7 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
-BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
// GVN does not require loop-simplify, do not try to preserve it if it is not
// possible.
BasicBlock *BB = SplitCriticalEdge(
@@ -2840,7 +2862,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
/// Split critical edges found during the previous
/// iteration that may enable further optimization.
-bool GVN::splitCriticalEdges() {
+bool GVNPass::splitCriticalEdges() {
if (toSplit.empty())
return false;
@@ -2860,7 +2882,7 @@ bool GVN::splitCriticalEdges() {
}
/// Executes one iteration of GVN
-bool GVN::iterateOnFunction(Function &F) {
+bool GVNPass::iterateOnFunction(Function &F) {
cleanupGlobalSets();
// Top-down walk of the dominator tree
@@ -2876,7 +2898,7 @@ bool GVN::iterateOnFunction(Function &F) {
return Changed;
}
-void GVN::cleanupGlobalSets() {
+void GVNPass::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
BlockRPONumber.clear();
@@ -2887,7 +2909,7 @@ void GVN::cleanupGlobalSets() {
/// Verify that the specified instruction does not occur in our
/// internal data structures.
-void GVN::verifyRemoved(const Instruction *Inst) const {
+void GVNPass::verifyRemoved(const Instruction *Inst) const {
VN.verifyRemoved(Inst);
// Walk through the value number scope to make sure the instruction isn't
@@ -2907,7 +2929,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
/// live successors, update their phi nodes by replacing the operands
/// corresponding to dead blocks with UndefVal.
-void GVN::addDeadBlock(BasicBlock *BB) {
+void GVNPass::addDeadBlock(BasicBlock *BB) {
SmallVector<BasicBlock *, 4> NewDead;
SmallSetVector<BasicBlock *, 4> DF;
@@ -2995,7 +3017,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
//
// Return true iff *NEW* dead code are found.
-bool GVN::processFoldableCondBr(BranchInst *BI) {
+bool GVNPass::processFoldableCondBr(BranchInst *BI) {
if (!BI || BI->isUnconditional())
return false;
@@ -3023,7 +3045,7 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
// associated val-num. As it normally has far more live instructions than dead
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
-void GVN::assignValNumForDeadCode() {
+void GVNPass::assignValNumForDeadCode() {
for (BasicBlock *BB : DeadBlocks) {
for (Instruction &Inst : *BB) {
unsigned ValNum = VN.lookupOrAdd(&Inst);
@@ -3078,7 +3100,7 @@ public:
}
private:
- GVN Impl;
+ GVNPass Impl;
};
char GVNLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 790d71992da4..fdc3afd9348a 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -169,7 +169,7 @@ class InsnInfo {
public:
// Inserts I and its value number in VNtoScalars.
- void insert(Instruction *I, GVN::ValueTable &VN) {
+ void insert(Instruction *I, GVNPass::ValueTable &VN) {
// Scalar instruction.
unsigned V = VN.lookupOrAdd(I);
VNtoScalars[{V, InvalidVN}].push_back(I);
@@ -184,7 +184,7 @@ class LoadInfo {
public:
// Insert Load and the value number of its memory address in VNtoLoads.
- void insert(LoadInst *Load, GVN::ValueTable &VN) {
+ void insert(LoadInst *Load, GVNPass::ValueTable &VN) {
if (Load->isSimple()) {
unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
VNtoLoads[{V, InvalidVN}].push_back(Load);
@@ -201,7 +201,7 @@ class StoreInfo {
public:
// Insert the Store and a hash number of the store address and the stored
// value in VNtoStores.
- void insert(StoreInst *Store, GVN::ValueTable &VN) {
+ void insert(StoreInst *Store, GVNPass::ValueTable &VN) {
if (!Store->isSimple())
return;
// Hash the store address and the stored value.
@@ -221,7 +221,7 @@ class CallInfo {
public:
// Insert Call and its value numbering in one of the VNtoCalls* containers.
- void insert(CallInst *Call, GVN::ValueTable &VN) {
+ void insert(CallInst *Call, GVNPass::ValueTable &VN) {
// A call that doesNotAccessMemory is handled as a Scalar,
// onlyReadsMemory will be handled as a Load instruction,
// all other calls will be handled as stores.
@@ -274,7 +274,7 @@ public:
unsigned int rank(const Value *V) const;
private:
- GVN::ValueTable VN;
+ GVNPass::ValueTable VN;
DominatorTree *DT;
PostDominatorTree *PDT;
AliasAnalysis *AA;
@@ -377,12 +377,12 @@ private:
if (!Root)
return;
// Depth first walk on PDom tree to fill the CHIargs at each PDF.
- RenameStackType RenameStack;
for (auto Node : depth_first(Root)) {
BasicBlock *BB = Node->getBlock();
if (!BB)
continue;
+ RenameStackType RenameStack;
// Collect all values in BB and push to stack.
fillRenameStack(BB, ValueBBs, RenameStack);
@@ -827,6 +827,8 @@ void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
auto it1 = ValueBBs.find(BB);
if (it1 != ValueBBs.end()) {
// Iterate in reverse order to keep lower ranked values on the top.
+ LLVM_DEBUG(dbgs() << "\nVisiting: " << BB->getName()
+ << " for pushing instructions on stack";);
for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
// Get the value of instruction I
LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 61eb4ce0ed46..82b81003ef21 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -46,6 +46,7 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
@@ -105,8 +106,10 @@ static void setCondition(Instruction *I, Value *NewCond) {
}
// Eliminates the guard instruction properly.
-static void eliminateGuard(Instruction *GuardInst) {
+static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
GuardInst->eraseFromParent();
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(GuardInst);
++GuardsEliminated;
}
@@ -114,6 +117,7 @@ class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
LoopInfo &LI;
+ MemorySSAUpdater *MSSAU;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
@@ -269,12 +273,12 @@ class GuardWideningImpl {
}
public:
-
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
- LoopInfo &LI, DomTreeNode *Root,
+ LoopInfo &LI, MemorySSAUpdater *MSSAU,
+ DomTreeNode *Root,
std::function<bool(BasicBlock*)> BlockFilter)
- : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
- {}
+ : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root),
+ BlockFilter(BlockFilter) {}
/// The entry point for this pass.
bool run();
@@ -313,7 +317,7 @@ bool GuardWideningImpl::run() {
if (!WidenedGuards.count(I)) {
assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
if (isSupportedGuardInstruction(I))
- eliminateGuard(I);
+ eliminateGuard(I, MSSAU);
else {
assert(isa<BranchInst>(I) &&
"Eliminated something other than guard or branch?");
@@ -514,27 +518,20 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
ConstantRange CR1 =
ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
- // SubsetIntersect is a subset of the actual mathematical intersection of
- // CR0 and CR1, while SupersetIntersect is a superset of the actual
- // mathematical intersection. If these two ConstantRanges are equal, then
- // we know we were able to represent the actual mathematical intersection
- // of CR0 and CR1, and can use the same to generate an icmp instruction.
- //
// Given what we're doing here and the semantics of guards, it would
- // actually be correct to just use SubsetIntersect, but that may be too
+ // be correct to use a subset intersection, but that may be too
// aggressive in cases we care about.
- auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse();
- auto SupersetIntersect = CR0.intersectWith(CR1);
-
- APInt NewRHSAP;
- CmpInst::Predicate Pred;
- if (SubsetIntersect == SupersetIntersect &&
- SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) {
- if (InsertPt) {
- ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP);
- Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) {
+ APInt NewRHSAP;
+ CmpInst::Predicate Pred;
+ if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
+ if (InsertPt) {
+ ConstantInt *NewRHS =
+ ConstantInt::get(Cond0->getContext(), NewRHSAP);
+ Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ }
+ return true;
}
- return true;
}
}
}
@@ -766,12 +763,18 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
- [](BasicBlock*) { return true; } ).run())
+ auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());
+ if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+ DT.getRootNode(), [](BasicBlock *) { return true; })
+ .run())
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -784,11 +787,17 @@ PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L.contains(BB);
};
- if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
- BlockFilter).run())
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr,
+ AR.DT.getNode(RootBB), BlockFilter).run())
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
@@ -805,8 +814,14 @@ struct GuardWideningLegacyPass : public FunctionPass {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
- [](BasicBlock*) { return true; } ).run();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+ return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+ DT.getRootNode(),
+ [](BasicBlock *) { return true; })
+ .run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -814,6 +829,7 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -833,13 +849,18 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+
BasicBlock *RootBB = L->getLoopPredecessor();
if (!RootBB)
RootBB = L->getHeader();
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
- return GuardWideningImpl(DT, PDT, LI,
+ return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr,
DT.getNode(RootBB), BlockFilter).run();
}
@@ -847,6 +868,7 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addPreserved<PostDominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
}
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9ee2a2d0bf08..ae2fe2767074 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -89,6 +89,7 @@
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "indvars"
@@ -155,6 +156,10 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ /// Try to improve our exit conditions by converting condition from signed
+ /// to unsigned or rotating computation out of the loop.
+ /// (See inline comment about why this is duplicated from simplifyAndExtend)
+ bool canonicalizeExitCondition(Loop *L);
/// Try to eliminate loop exits based on analyzeable exit counts
bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
/// Try to form loop invariant tests for loop exits by changing how many
@@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
ExitVal->getIncomingValue(PreheaderIdx));
+ SE->forgetValue(&PN);
}
}
}
@@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
return;
}
- if (!WI.WidestNativeType) {
+ if (!WI.WidestNativeType ||
+ Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
WI.IsSigned = IsSigned;
return;
}
- // We extend the IV to satisfy the sign of its first user, arbitrarily.
- if (WI.IsSigned != IsSigned)
- return;
-
- if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
- WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ // We extend the IV to satisfy the sign of its user(s), or 'signed'
+ // if there are multiple users with both sign- and zero extensions,
+ // in order not to introduce nondeterministic behaviour based on the
+ // unspecified order of a PHI nodes' users-iterator.
+ WI.IsSigned |= IsSigned;
}
//===----------------------------------------------------------------------===//
@@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
// Skip debug info intrinsics.
do {
--I;
- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+ } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ if (I->isDebugOrPseudoInst() && I == Preheader->begin())
Done = true;
} else {
Done = true;
@@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
replaceExitCond(BI, NewCond, DeadInsts);
}
+static void replaceLoopPHINodesWithPreheaderValues(
+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
+ auto *LoopPreheader = L->getLoopPreheader();
+ auto *LoopHeader = L->getHeader();
+ for (auto &PN : LoopHeader->phis()) {
+ auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+ PN.replaceAllUsesWith(PreheaderIncoming);
+ DeadInsts.emplace_back(&PN);
+ }
+}
+
static void replaceWithInvariantCond(
const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
@@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
ICmpInst::Predicate Pred;
Value *LHS, *RHS;
- using namespace PatternMatch;
BasicBlock *TrueSucc, *FalseSucc;
if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
@@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount(
return true;
}
+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
+ // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
+ // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
+ // never reaches the icmp since the zext doesn't fold to an AddRec unless
+ // it already has flags. The alternative to this would be to extending the
+ // set of "interesting" IV users to include the icmp, but doing that
+ // regresses results in practice by querying SCEVs before trip counts which
+ // rely on them which results in SCEV caching sub-optimal answers. The
+ // concern about caching sub-optimal results is why we only query SCEVs of
+ // the loop invariant RHS here.
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ bool Changed = false;
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse())
+ continue;
+
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ // For the range reasoning, avoid computing SCEVs in the loop to avoid
+ // poisoning cache with sub-optimal results. For the must-execute case,
+ // this is a neccessary precondition for correctness.
+ if (!L->isLoopInvariant(RHS)) {
+ if (!L->isLoopInvariant(LHS))
+ continue;
+ // Same logic applies for the inverse case
+ std::swap(LHS, RHS);
+ }
+
+ // Match (icmp signed-cond zext, RHS)
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
+ continue;
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
+ // replace the signed condition with the unsigned version.
+ ICmp->setPredicate(ICmp->getUnsignedPredicate());
+ Changed = true;
+ // Note: No SCEV invalidation needed. We've changed the predicate, but
+ // have not changed exit counts, or the values produced by the compare.
+ continue;
+ }
+ }
+
+ // Now that we've canonicalized the condition to match the extend,
+ // see if we can rotate the extend out of the loop.
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
+ continue;
+
+ bool Swapped = false;
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
+ // Nothing to rotate
+ continue;
+ if (L->isLoopInvariant(LHS)) {
+ // Same logic applies for the inverse case until we actually pick
+ // which operand of the compare to update.
+ Swapped = true;
+ std::swap(LHS, RHS);
+ }
+ assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
+
+ // Match (icmp unsigned-cond zext, RHS)
+ // TODO: Extend to handle corresponding sext/signed-cmp case
+ // TODO: Extend to other invertible functions
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))))
+ continue;
+
+ // In general, we only rotate if we can do so without increasing the number
+ // of instructions. The exception is when we have an zext(add-rec). The
+ // reason for allowing this exception is that we know we need to get rid
+ // of the zext for SCEV to be able to compute a trip count for said loops;
+ // we consider the new trip count valuable enough to increase instruction
+ // count by one.
+ if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
+ continue;
+
+ // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
+ // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
+ // when zext is loop varying and RHS is loop invariant. This converts
+ // loop varying work to loop-invariant work.
+ auto doRotateTransform = [&]() {
+ assert(ICmp->isUnsigned() && "must have proven unsigned already");
+ auto *NewRHS =
+ CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
+ L->getLoopPreheader()->getTerminator());
+ ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
+ ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
+ if (LHS->use_empty())
+ DeadInsts.push_back(LHS);
+ };
+
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ doRotateTransform();
+ Changed = true;
+ // Note, we are leaving SCEV in an unfortunately imprecise case here
+ // as rotation tends to reveal information about trip counts not
+ // previously visible.
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
- // TODO: Given we know the backedge can't be taken, we should go ahead
- // and break it. Or at least, kill all the header phis and simplify.
+ // We know that the backedge can't be taken, so we replace all
+ // the header PHIs with values coming from the preheader.
if (ExitCount->isZero()) {
foldExit(L, ExitingBB, true, DeadInsts);
+ replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
Changed = true;
continue;
}
- // If we end up with a pointer exit count, bail. Note that we can end up
- // with a pointer exit count for one exiting block, and not for another in
- // the same loop.
- if (!ExitCount->getType()->isIntegerTy() ||
- !MaxExitCount->getType()->isIntegerTy())
- continue;
+ assert(ExitCount->getType()->isIntegerTy() &&
+ MaxExitCount->getType()->isIntegerTy() &&
+ "Exit counts must be integers");
Type *WiderType =
SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
@@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// through *explicit* control flow. We have to eliminate the possibility of
// implicit exits (see below) before we know it's truly exact.
const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(ExactBTC) ||
- !SE->isLoopInvariant(ExactBTC, L) ||
- !isSafeToExpand(ExactBTC, *SE))
+ if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
return false;
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExactBTC->getType()->isIntegerTy())
- return false;
+ assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
+ assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
return true;
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount) ||
- !SE->isLoopInvariant(ExitCount, L) ||
- !isSafeToExpand(ExitCount, *SE))
- return true;
-
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExitCount->getType()->isIntegerTy())
+ if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
return true;
+ assert(SE->isLoopInvariant(ExitCount, L) &&
+ "Exit count must be loop invariant");
+ assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
return false;
};
@@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) {
}
// Eliminate redundant IV cycles.
- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
+
+ // Try to convert exit conditions to unsigned and rotate computation
+ // out of the loop. Note: Handles invalidation internally if needed.
+ Changed |= canonicalizeExitCondition(L);
// Try to eliminate loop exits based on analyzeable exit counts
if (optimizeLoopExits(L, Rewriter)) {
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f7d631f5e785..883d4afff3bd 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -96,10 +96,13 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -115,6 +118,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -146,6 +150,14 @@ static const unsigned UninitializedAddressSpace =
namespace {
using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
+// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
+// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
+// addrspace is inferred on the *use* of a pointer. This map is introduced to
+// infer addrspace from the addrspace predicate assumption built from assume
+// intrinsic. In that scenario, only specific uses (under valid assumption
+// context) could be inferred with a new addrspace.
+using PredicatedAddrSpaceMapTy =
+ DenseMap<std::pair<const Value *, const Value *>, unsigned>;
using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
class InferAddressSpaces : public FunctionPass {
@@ -160,6 +172,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -167,6 +181,8 @@ public:
};
class InferAddressSpacesImpl {
+ AssumptionCache &AC;
+ DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
const DataLayout *DL = nullptr;
@@ -174,21 +190,24 @@ class InferAddressSpacesImpl {
/// possible.
unsigned FlatAddrSpace = 0;
- // Returns the new address space of V if updated; otherwise, returns None.
- Optional<unsigned>
- updateAddressSpace(const Value &V,
- const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
+ // Try to update the address space of V. If V is updated, returns true and
+ // false otherwise.
+ bool updateAddressSpace(const Value &V,
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const;
// Tries to infer the specific address space of each address expression in
// Postorder.
void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) const;
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const;
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
Value *cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const;
// Changes the flat address expressions in function F to point to specific
@@ -196,7 +215,8 @@ class InferAddressSpacesImpl {
// all flat expressions in the use-def graph of function F.
bool rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
@@ -211,14 +231,18 @@ class InferAddressSpacesImpl {
std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
Value *cloneValueWithNewAddressSpace(
- Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const;
unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
+ unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
+
public:
- InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
- : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+ InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT,
+ const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
+ : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
bool run(Function &F);
};
@@ -232,8 +256,12 @@ void initializeInferAddressSpacesPass(PassRegistry &);
} // end namespace llvm
-INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
- false, false)
+INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
// Check whether that's no-op pointer bicast using a pair of
// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
@@ -505,6 +533,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
static Value *operandWithNewAddressSpaceOrCreateUndef(
const Use &OperandUse, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) {
Value *Operand = OperandUse.get();
@@ -517,6 +546,18 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
return NewOperand;
+ Instruction *Inst = cast<Instruction>(OperandUse.getUser());
+ auto I = PredicatedAS.find(std::make_pair(Inst, Operand));
+ if (I != PredicatedAS.end()) {
+ // Insert an addrspacecast on that operand before the user.
+ unsigned NewAS = I->second;
+ Type *NewPtrTy = PointerType::getWithSamePointeeType(
+ cast<PointerType>(Operand->getType()), NewAS);
+ auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
+ NewI->insertBefore(Inst);
+ return NewI;
+ }
+
UndefUsesToFix->push_back(&OperandUse);
return UndefValue::get(NewPtrTy);
}
@@ -536,6 +577,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
Type *NewPtrType = PointerType::getWithSamePointeeType(
cast<PointerType>(I->getType()), NewAddrSpace);
@@ -557,7 +599,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef(
II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
- UndefUsesToFix);
+ PredicatedAS, UndefUsesToFix);
Value *Rewrite =
TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
if (Rewrite) {
@@ -586,7 +628,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
NewPointerOperands.push_back(nullptr);
else
NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
- OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
+ OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ UndefUsesToFix));
}
switch (I->getOpcode()) {
@@ -708,9 +751,8 @@ static Value *cloneConstantExprWithNewAddressSpace(
if (CE->getOpcode() == Instruction::GetElementPtr) {
// Needs to specify the source type while constructing a getelementptr
// constant expression.
- return CE->getWithOperands(
- NewOperands, TargetType, /*OnlyIfReduced=*/false,
- NewOperands[0]->getType()->getPointerElementType());
+ return CE->getWithOperands(NewOperands, TargetType, /*OnlyIfReduced=*/false,
+ cast<GEPOperator>(CE)->getSourceElementType());
}
return CE->getWithOperands(NewOperands, TargetType);
@@ -724,6 +766,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
@@ -731,7 +774,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
- I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
+ I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, UndefUsesToFix);
if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
if (NewI->getParent() == nullptr) {
NewI->insertBefore(I);
@@ -779,46 +822,43 @@ bool InferAddressSpacesImpl::run(Function &F) {
// Runs a data-flow analysis to refine the address spaces of every expression
// in Postorder.
ValueToAddrSpaceMapTy InferredAddrSpace;
- inferAddressSpaces(Postorder, &InferredAddrSpace);
+ PredicatedAddrSpaceMapTy PredicatedAS;
+ inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F);
+ return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace,
+ PredicatedAS, &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
// constant expressions, so wrap values in WeakTrackingVH.
void InferAddressSpacesImpl::inferAddressSpaces(
ArrayRef<WeakTrackingVH> Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) const {
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
// Initially, all expressions are in the uninitialized address space.
for (Value *V : Postorder)
- (*InferredAddrSpace)[V] = UninitializedAddressSpace;
+ InferredAddrSpace[V] = UninitializedAddressSpace;
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
- // Tries to update the address space of the stack top according to the
+ // Try to update the address space of the stack top according to the
// address spaces of its operands.
- LLVM_DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n');
- Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
- if (!NewAS.hasValue())
+ if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))
continue;
- // If any updates are made, grabs its users to the worklist because
- // their address spaces can also be possibly updated.
- LLVM_DEBUG(dbgs() << " to " << NewAS.getValue() << '\n');
- (*InferredAddrSpace)[V] = NewAS.getValue();
for (Value *User : V->users()) {
// Skip if User is already in the worklist.
if (Worklist.count(User))
continue;
- auto Pos = InferredAddrSpace->find(User);
+ auto Pos = InferredAddrSpace.find(User);
// Our algorithm only updates the address spaces of flat address
// expressions, which are those in InferredAddrSpace.
- if (Pos == InferredAddrSpace->end())
+ if (Pos == InferredAddrSpace.end())
continue;
// Function updateAddressSpace moves the address space down a lattice
@@ -832,10 +872,37 @@ void InferAddressSpacesImpl::inferAddressSpaces(
}
}
-Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
- const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
+unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V,
+ Value *Opnd) const {
+ const Instruction *I = dyn_cast<Instruction>(&V);
+ if (!I)
+ return UninitializedAddressSpace;
+
+ Opnd = Opnd->stripInBoundsOffsets();
+ for (auto &AssumeVH : AC.assumptionsFor(Opnd)) {
+ if (!AssumeVH)
+ continue;
+ CallInst *CI = cast<CallInst>(AssumeVH);
+ if (!isValidAssumeForContext(CI, I, DT))
+ continue;
+
+ const Value *Ptr;
+ unsigned AS;
+ std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(CI->getArgOperand(0));
+ if (Ptr)
+ return AS;
+ }
+
+ return UninitializedAddressSpace;
+}
+
+bool InferAddressSpacesImpl::updateAddressSpace(
+ const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const {
assert(InferredAddrSpace.count(&V));
+ LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n');
+
// The new inferred address space equals the join of the address spaces
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;
@@ -861,7 +928,7 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
// address space is known.
if ((C1 && Src0AS == UninitializedAddressSpace) ||
(C0 && Src1AS == UninitializedAddressSpace))
- return None;
+ return false;
if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
NewAS = Src1AS;
@@ -878,10 +945,23 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
// Otherwise, infer the address space from its pointer operands.
for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
auto I = InferredAddrSpace.find(PtrOperand);
- unsigned OperandAS =
- I != InferredAddrSpace.end()
- ? I->second
- : PtrOperand->getType()->getPointerAddressSpace();
+ unsigned OperandAS;
+ if (I == InferredAddrSpace.end()) {
+ OperandAS = PtrOperand->getType()->getPointerAddressSpace();
+ if (OperandAS == FlatAddrSpace) {
+ // Check AC for assumption dominating V.
+ unsigned AS = getPredicatedAddrSpace(V, PtrOperand);
+ if (AS != UninitializedAddressSpace) {
+ LLVM_DEBUG(dbgs()
+ << " deduce operand AS from the predicate addrspace "
+ << AS << '\n');
+ OperandAS = AS;
+ // Record this use with the predicated AS.
+ PredicatedAS[std::make_pair(&V, PtrOperand)] = OperandAS;
+ }
+ }
+ } else
+ OperandAS = I->second;
// join(flat, *) = flat. So we can break if NewAS is already flat.
NewAS = joinAddressSpaces(NewAS, OperandAS);
@@ -894,8 +974,13 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
unsigned OldAS = InferredAddrSpace.lookup(&V);
assert(OldAS != FlatAddrSpace);
if (OldAS == NewAS)
- return None;
- return NewAS;
+ return false;
+
+ // If any updates are made, grabs its users to the worklist because
+ // their address spaces can also be possibly updated.
+ LLVM_DEBUG(dbgs() << " to " << NewAS << '\n');
+ InferredAddrSpace[&V] = NewAS;
+ return true;
}
/// \p returns true if \p U is the pointer operand of a memory instruction with
@@ -1026,7 +1111,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// operands are converted, the clone is naturally in the new address space by
@@ -1042,8 +1128,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
continue;
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
- Value *New = cloneValueWithNewAddressSpace(
- V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
+ Value *New =
+ cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
+ PredicatedAS, &UndefUsesToFix);
if (New)
ValueWithNewAddrSpace[V] = New;
}
@@ -1155,8 +1242,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (ASC->getDestAddressSpace() == NewAS) {
- if (ASC->getType()->getPointerElementType() !=
- NewV->getType()->getPointerElementType()) {
+ if (!cast<PointerType>(ASC->getType())
+ ->hasSameElementTypeAs(
+ cast<PointerType>(NewV->getType()))) {
NewV = CastInst::Create(Instruction::BitCast, NewV,
ASC->getType(), "", ASC);
}
@@ -1199,7 +1287,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
return InferAddressSpacesImpl(
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
FlatAddrSpace)
.run(F);
@@ -1217,11 +1308,14 @@ InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
PreservedAnalyses InferAddressSpacesPass::run(Function &F,
FunctionAnalysisManager &AM) {
bool Changed =
- InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
+ InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(F),
+ AM.getCachedResult<DominatorTreeAnalysis>(F),
+ &AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
.run(F);
if (Changed) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 9dc3b0351346..fe9a7211967c 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -331,7 +331,7 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
+ bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -360,7 +360,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
+ bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
@@ -377,12 +377,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
}
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
- LazyValueInfo *LVI_, AliasAnalysis *AA_,
- DomTreeUpdater *DTU_, bool HasProfileData_,
+ TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
+ AliasAnalysis *AA_, DomTreeUpdater *DTU_,
+ bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
+ TTI = TTI_;
LVI = LVI_;
AA = AA_;
DTU = DTU_;
@@ -514,7 +516,8 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
-static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
+ BasicBlock *BB,
Instruction *StopAt,
unsigned Threshold) {
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
@@ -550,26 +553,21 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
if (Size > Threshold)
return Size;
- // Debugger intrinsics don't incur code size.
- if (isa<DbgInfoIntrinsic>(I)) continue;
-
- // Pseudo-probes don't incur code size.
- if (isa<PseudoProbeInst>(I))
- continue;
-
- // If this is a pointer->pointer bitcast, it is free.
- if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
- continue;
-
- // Freeze instruction is free, too.
- if (isa<FreezeInst>(I))
- continue;
-
// Bail out if this instruction gives back a token type, it is not possible
// to duplicate it if it is used outside this BB.
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
return ~0U;
+ // Blocks with NoDuplicate are modelled as having infinite cost, so they
+ // are never duplicated.
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->cannotDuplicate() || CI->isConvergent())
+ return ~0U;
+
+ if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
+ == TargetTransformInfo::TCC_Free)
+ continue;
+
// All other instructions count for at least one unit.
++Size;
@@ -578,11 +576,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->cannotDuplicate() || CI->isConvergent())
- // Blocks with NoDuplicate are modelled as having infinite cost, so they
- // are never duplicated.
- return ~0U;
- else if (!isa<IntrinsicInst>(CI))
+ if (!isa<IntrinsicInst>(CI))
Size += 3;
else if (!CI->getType()->isVectorTy())
Size += 1;
@@ -1363,8 +1357,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// If all of the loads and stores that feed the value have the same AA tags,
// then we can propagate them onto any newly inserted loads.
- AAMDNodes AATags;
- LoadI->getAAMetadata(AATags);
+ AAMDNodes AATags = LoadI->getAAMetadata();
SmallPtrSet<BasicBlock*, 8> PredsScanned;
@@ -2235,10 +2228,10 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// Compute the cost of duplicating BB and PredBB.
- unsigned BBCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned BBCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
unsigned PredBBCost = getJumpThreadDuplicationCost(
- PredBB, PredBB->getTerminator(), BBDupThreshold);
+ TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
// Give up if costs are too high. We need to check BBCost and PredBBCost
// individually before checking their sum because getJumpThreadDuplicationCost
@@ -2346,8 +2339,8 @@ bool JumpThreadingPass::tryThreadEdge(
return false;
}
- unsigned JumpThreadCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -2615,8 +2608,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
return false;
}
- unsigned DuplicationCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
@@ -3032,7 +3025,8 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
ValueToValueMapTy UnguardedMapping, GuardedMapping;
Instruction *AfterGuard = Guard->getNextNode();
- unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+ unsigned Cost =
+ getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
if (Cost > BBDupThreshold)
return false;
// Duplicate all instructions before the guard and the guard itself to the
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 30058df3ded5..bf714d167670 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -117,13 +117,6 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
-// Default value of zero implies we use the regular alias set tracker mechanism
-// instead of the cross product using AA to identify aliasing of the memory
-// location we are interested in.
-static cl::opt<int>
-LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
- cl::desc("How many instruction to cross product using AA"));
-
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
// address the same issue. This flag applies only when LICM uses MemorySSA
@@ -151,7 +144,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop);
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
@@ -180,7 +174,7 @@ static Instruction *cloneInstructionInExitBlock(
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater *MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
@@ -206,9 +200,6 @@ struct LoopInvariantCodeMotion {
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
-
- std::unique_ptr<AliasSetTracker>
- collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA);
};
struct LegacyLICMPass : public LoopPass {
@@ -228,9 +219,7 @@ struct LegacyLICMPass : public LoopPass {
<< L->getHeader()->getNameOrAsOperand() << "\n");
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
- : nullptr;
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
BlockFrequencyInfo *BFI =
hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
@@ -257,10 +246,8 @@ struct LegacyLICMPass : public LoopPass {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
@@ -275,6 +262,9 @@ private:
PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &) {
+ if (!AR.MSSA)
+ report_fatal_error("LICM requires MemorySSA (loop-mssa)");
+
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
@@ -289,8 +279,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (AR.MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -298,6 +287,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
+ if (!AR.MSSA)
+ report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
+
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
@@ -316,8 +308,7 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (AR.MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -386,10 +377,6 @@ bool LoopInvariantCodeMotion::runOnLoop(
return false;
}
- std::unique_ptr<AliasSetTracker> CurAST;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> Flags;
-
// Don't sink stores from loops with coroutine suspend instructions.
// LICM would sink instructions into the default destination of
// the coroutine switch. The default destination of the switch is to
@@ -406,17 +393,9 @@ bool LoopInvariantCodeMotion::runOnLoop(
});
});
- if (!MSSA) {
- LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
- CurAST = collectAliasInfoForLoop(L, LI, AA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
- } else {
- LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
- }
+ MemorySSAUpdater MSSAU(MSSA);
+ SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*IsSink=*/true, L, MSSA);
// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();
@@ -435,14 +414,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
if (L->hasDedicatedExits())
- Changed |=
- sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
- Flags->setIsSink(false);
+ Changed |= LoopNestMode
+ ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
+ DT, BFI, TLI, TTI, L, &MSSAU,
+ &SafetyInfo, Flags, ORE)
+ : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
+ TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
+ Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
- *Flags.get(), ORE, LoopNestMode);
+ &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -452,7 +433,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
- !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) {
+ !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -466,55 +447,29 @@ bool LoopInvariantCodeMotion::runOnLoop(
SmallVector<Instruction *, 8> InsertPts;
SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
- if (MSSAU)
- MSSAInsertPts.reserve(ExitBlocks.size());
+ MSSAInsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
- if (MSSAU)
- MSSAInsertPts.push_back(nullptr);
+ MSSAInsertPts.push_back(nullptr);
}
PredIteratorCache PIC;
+ // Promoting one set of accesses may make the pointers for another set
+ // loop invariant, so run this in a loop (with the MaybePromotable set
+ // decreasing in size over time).
bool Promoted = false;
- if (CurAST.get()) {
- // Loop over all of the alias sets in the tracker object.
- for (AliasSet &AS : *CurAST) {
- // We can promote this alias set if it has a store, if it is a "Must"
- // alias set, if the pointer is loop invariant, and if we are not
- // eliminating any volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- !L->isLoopInvariant(AS.begin()->getValue()))
- continue;
-
- assert(
- !AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
-
- SmallSetVector<Value *, 8> PointerMustAliases;
- for (const auto &ASI : AS)
- PointerMustAliases.insert(ASI.getValue());
-
- Promoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
- DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ bool LocalPromoted;
+ do {
+ LocalPromoted = false;
+ for (const SmallSetVector<Value *, 8> &PointerMustAliases :
+ collectPromotionCandidates(MSSA, AA, L)) {
+ LocalPromoted |= promoteLoopAccessesToScalars(
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
+ LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
}
- } else {
- // Promoting one set of accesses may make the pointers for another set
- // loop invariant, so run this in a loop (with the MaybePromotable set
- // decreasing in size over time).
- bool LocalPromoted;
- do {
- LocalPromoted = false;
- for (const SmallSetVector<Value *, 8> &PointerMustAliases :
- collectPromotionCandidates(MSSA, AA, L)) {
- LocalPromoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
- LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE);
- }
- Promoted |= LocalPromoted;
- } while (LocalPromoted);
- }
+ Promoted |= LocalPromoted;
+ } while (LocalPromoted);
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@@ -536,8 +491,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
- if (MSSAU.get() && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
if (Changed && SE)
SE->forgetLoopDispositions(L);
@@ -552,17 +507,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ Loop *CurLoop, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion.");
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
// We want to visit children before parents. We will enque all the parents
// before their children in the worklist and process the worklist in reverse
@@ -587,7 +540,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
salvageKnowledge(&I);
salvageDebugInfo(I);
++II;
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
@@ -598,26 +551,46 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// operands of the instruction are loop invariant.
//
bool FreeInLoop = false;
+ bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
- isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
- ORE)) {
+ isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+ SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
+ &Flags, ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
salvageDebugInfo(I);
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
}
Changed = true;
}
}
}
}
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
+bool llvm::sinkRegionForLoopNest(
+ DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
+
+ bool Changed = false;
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ Worklist.insert(CurLoop);
+ appendLoopsToWorklist(*CurLoop, Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
+ TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+ }
+ return Changed;
+}
+
namespace {
// This is a helper class for hoistRegion to make it able to hoist control flow
// in order to be able to hoist phis. The way this works is that we initially
@@ -820,9 +793,8 @@ public:
if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
- if (MSSAU)
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
- HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
// The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
@@ -884,16 +856,14 @@ static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion.");
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
@@ -913,8 +883,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
continue;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
- Instruction &I = *II++;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to
// just fold it.
@@ -922,12 +891,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) {
LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
<< '\n');
- if (CurAST)
- CurAST->copyValue(&I, C);
// FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
I.replaceAllUsesWith(C);
if (isInstructionTriviallyDead(&I, TLI))
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
@@ -940,8 +907,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
- ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
+ true, &Flags, ORE) &&
worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
@@ -970,7 +937,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I);
I.replaceAllUsesWith(Product);
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
SafetyInfo, MSSAU, SE, ORE);
@@ -1049,7 +1016,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
}
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Now that we've finished hoisting make sure that LI and DT are still
@@ -1101,6 +1068,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
Addr = BC->getOperand(0);
}
+ // If we've ended up at a global/constant, bail. We shouldn't be looking at
+ // uselists for non-local Values in a loop pass.
+ if (isa<Constant>(Addr))
+ return false;
unsigned UsesVisited = 0;
// Traverse all uses of the load operand value, to see if invariant.start is
@@ -1273,7 +1244,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// writes to this memory in the loop, we can hoist or sink.
if (AAResults::onlyAccessesArgPointees(Behavior)) {
// TODO: expand to writeable arguments
- for (Value *Op : CI->arg_operands())
+ for (Value *Op : CI->args())
if (Op->getType()->isPointerTy()) {
bool Invalidated;
if (CurAST)
@@ -1443,7 +1414,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop) {
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
@@ -1460,6 +1432,15 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
if (!BlockColors.empty() &&
BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
return false;
+
+ if (LoopNestMode) {
+ while (isa<PHINode>(UI) && UI->hasOneUser() &&
+ UI->getNumOperands() == 1) {
+ if (!CurLoop->contains(UI))
+ break;
+ UI = cast<Instruction>(UI->user_back());
+ }
+ }
}
if (CurLoop->contains(UI)) {
@@ -1546,9 +1527,7 @@ static Instruction *cloneInstructionInExitBlock(
}
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
- if (AST)
- AST->deleteValue(&I);
+ MemorySSAUpdater *MSSAU) {
if (MSSAU)
MSSAU->removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I);
@@ -1599,8 +1578,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
// predecessor fairly simple.
if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *BBPred = *PI;
+ for (BasicBlock *BBPred : predecessors(BB)) {
if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
isa<CallBrInst>(BBPred->getTerminator()))
return false;
@@ -1786,7 +1764,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
- eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
+ eraseInstruction(*PN, *SafetyInfo, nullptr);
Changed = true;
}
return Changed;
@@ -1875,11 +1853,10 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- AliasSetTracker *AST;
MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
- int Alignment;
+ Align Alignment;
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
@@ -1907,13 +1884,13 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
- DebugLoc dl, int alignment, bool UnorderedAtomic,
- const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
+ MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
+ Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
+ ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
- PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
- Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+ PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+ Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo) {}
bool isInstInList(Instruction *I,
@@ -1940,39 +1917,29 @@ public:
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
- NewSI->setAlignment(Align(Alignment));
+ NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
- if (MSSAU) {
- MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
- MemoryAccess *NewMemAcc;
- if (!MSSAInsertPoint) {
- NewMemAcc = MSSAU->createMemoryAccessInBB(
- NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
- } else {
- NewMemAcc =
- MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
- }
- MSSAInsertPts[i] = NewMemAcc;
- MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
- // FIXME: true for safety, false may still be correct.
+ MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+ MemoryAccess *NewMemAcc;
+ if (!MSSAInsertPoint) {
+ NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+ } else {
+ NewMemAcc =
+ MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
+ MSSAInsertPts[i] = NewMemAcc;
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ // FIXME: true for safety, false may still be correct.
}
}
- void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
- // Update alias analysis.
- if (AST)
- AST->copyValue(LI, V);
- }
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- if (AST)
- AST->deleteValue(I);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
+ MSSAU->removeMemoryAccess(I);
}
};
@@ -2023,8 +1990,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -2189,9 +2156,9 @@ bool llvm::promoteLoopAccessesToScalars(
// Merge the AA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its AA tags.
- UI->getAAMetadata(AATags);
+ AATags = UI->getAAMetadata();
} else if (AATags) {
- UI->getAAMetadata(AATags, /* Merge = */ true);
+ AATags = AATags.merge(UI->getAAMetadata());
}
LoopUses.push_back(UI);
@@ -2256,9 +2223,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
- Alignment.value(), SawUnorderedAtomic, AATags,
- *SafetyInfo);
+ InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
@@ -2273,24 +2239,22 @@ bool llvm::promoteLoopAccessesToScalars(
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- if (MSSAU) {
- MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
- PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
- MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
- MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
- }
+ MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
- eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
return true;
}
@@ -2356,71 +2320,10 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}
-/// Returns an owning pointer to an alias set which incorporates aliasing info
-/// from L and all subloops of L.
-std::unique_ptr<AliasSetTracker>
-LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
- AAResults *AA) {
- auto CurAST = std::make_unique<AliasSetTracker>(*AA);
-
- // Add everything from all the sub loops.
- for (Loop *InnerL : L->getSubLoops())
- for (BasicBlock *BB : InnerL->blocks())
- CurAST->add(*BB);
-
- // And merge in this loop (without anything from inner loops).
- for (BasicBlock *BB : L->blocks())
- if (LI->getLoopFor(BB) == L)
- CurAST->add(*BB);
-
- return CurAST;
-}
-
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AAResults *AA) {
- // First check to see if any of the basic blocks in CurLoop invalidate *V.
- bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
-
- if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
- return isInvalidatedAccordingToAST;
-
- // Check with a diagnostic analysis if we can refine the information above.
- // This is to identify the limitations of using the AST.
- // The alias set mechanism used by LICM has a major weakness in that it
- // combines all things which may alias into a single set *before* asking
- // modref questions. As a result, a single readonly call within a loop will
- // collapse all loads and stores into a single alias set and report
- // invalidation if the loop contains any store. For example, readonly calls
- // with deopt states have this form and create a general alias set with all
- // loads and stores. In order to get any LICM in loops containing possible
- // deopt states we need a more precise invalidation of checking the mod ref
- // info of each instruction within the loop and LI. This has a complexity of
- // O(N^2), so currently, it is used only as a diagnostic tool since the
- // default value of LICMN2Threshold is zero.
-
- // Don't look at nested loops.
- if (CurLoop->begin() != CurLoop->end())
- return true;
-
- int N = 0;
- for (BasicBlock *BB : CurLoop->getBlocks())
- for (Instruction &I : *BB) {
- if (N >= LICMN2Theshold) {
- LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
- << *(MemLoc.Ptr) << "\n");
- return true;
- }
- N++;
- auto Res = AA->getModRefInfo(&I, MemLoc);
- if (isModSet(Res)) {
- LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
- << *(MemLoc.Ptr) << "\n");
- return true;
- }
- }
- LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
- return false;
+ return CurAST->getAliasSetFor(MemLoc).isMod();
}
bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 993b154dc9a8..d438d56e38ca 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -39,10 +40,12 @@ struct ConditionInfo {
ICmpInst::Predicate Pred;
/// AddRec llvm value
Value *AddRecValue;
+ /// Non PHI AddRec llvm value
+ Value *NonPHIAddRecValue;
/// Bound llvm value
Value *BoundValue;
/// AddRec SCEV
- const SCEV *AddRecSCEV;
+ const SCEVAddRecExpr *AddRecSCEV;
/// Bound SCEV
const SCEV *BoundSCEV;
@@ -54,19 +57,31 @@ struct ConditionInfo {
} // namespace
static void analyzeICmp(ScalarEvolution &SE, ICmpInst *ICmp,
- ConditionInfo &Cond) {
+ ConditionInfo &Cond, const Loop &L) {
Cond.ICmp = ICmp;
if (match(ICmp, m_ICmp(Cond.Pred, m_Value(Cond.AddRecValue),
m_Value(Cond.BoundValue)))) {
- Cond.AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
- Cond.BoundSCEV = SE.getSCEV(Cond.BoundValue);
+ const SCEV *AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
+ const SCEV *BoundSCEV = SE.getSCEV(Cond.BoundValue);
+ const SCEVAddRecExpr *LHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+ const SCEVAddRecExpr *RHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(BoundSCEV);
// Locate AddRec in LHSSCEV and Bound in RHSSCEV.
- if (isa<SCEVAddRecExpr>(Cond.BoundSCEV) &&
- !isa<SCEVAddRecExpr>(Cond.AddRecSCEV)) {
+ if (!LHSAddRecSCEV && RHSAddRecSCEV) {
std::swap(Cond.AddRecValue, Cond.BoundValue);
- std::swap(Cond.AddRecSCEV, Cond.BoundSCEV);
+ std::swap(AddRecSCEV, BoundSCEV);
Cond.Pred = ICmpInst::getSwappedPredicate(Cond.Pred);
}
+
+ Cond.AddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+ Cond.BoundSCEV = BoundSCEV;
+ Cond.NonPHIAddRecValue = Cond.AddRecValue;
+
+ // If the Cond.AddRecValue is PHI node, update Cond.NonPHIAddRecValue with
+ // value from backedge.
+ if (Cond.AddRecSCEV && isa<PHINode>(Cond.AddRecValue)) {
+ PHINode *PN = cast<PHINode>(Cond.AddRecValue);
+ Cond.NonPHIAddRecValue = PN->getIncomingValueForBlock(L.getLoopLatch());
+ }
}
}
@@ -118,21 +133,20 @@ static bool calculateUpperBound(const Loop &L, ScalarEvolution &SE,
static bool hasProcessableCondition(const Loop &L, ScalarEvolution &SE,
ICmpInst *ICmp, ConditionInfo &Cond,
bool IsExitCond) {
- analyzeICmp(SE, ICmp, Cond);
+ analyzeICmp(SE, ICmp, Cond, L);
// The BoundSCEV should be evaluated at loop entry.
if (!SE.isAvailableAtLoopEntry(Cond.BoundSCEV, &L))
return false;
- const SCEVAddRecExpr *AddRecSCEV = dyn_cast<SCEVAddRecExpr>(Cond.AddRecSCEV);
// Allowed AddRec as induction variable.
- if (!AddRecSCEV)
+ if (!Cond.AddRecSCEV)
return false;
- if (!AddRecSCEV->isAffine())
+ if (!Cond.AddRecSCEV->isAffine())
return false;
- const SCEV *StepRecSCEV = AddRecSCEV->getStepRecurrence(SE);
+ const SCEV *StepRecSCEV = Cond.AddRecSCEV->getStepRecurrence(SE);
// Allowed constant step.
if (!isa<SCEVConstant>(StepRecSCEV))
return false;
@@ -264,6 +278,14 @@ static BranchInst *findSplitCandidate(const Loop &L, ScalarEvolution &SE,
SplitCandidateCond.BoundSCEV->getType())
continue;
+ // After transformation, we assume the split condition of the pre-loop is
+ // always true. In order to guarantee it, we need to check the start value
+ // of the split cond AddRec satisfies the split condition.
+ if (!SE.isLoopEntryGuardedByCond(&L, SplitCandidateCond.Pred,
+ SplitCandidateCond.AddRecSCEV->getStart(),
+ SplitCandidateCond.BoundSCEV))
+ continue;
+
SplitCandidateCond.BI = BI;
return BI;
}
@@ -341,13 +363,45 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
".split", &LI, &DT, PostLoopBlocks);
remapInstructionsInBlocks(PostLoopBlocks, VMap);
- // Add conditional branch to check we can skip post-loop in its preheader.
BasicBlock *PostLoopPreHeader = PostLoop->getLoopPreheader();
- IRBuilder<> Builder(PostLoopPreHeader);
+ IRBuilder<> Builder(&PostLoopPreHeader->front());
+
+ // Update phi nodes in header of post-loop.
+ bool isExitingLatch =
+ (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ Value *ExitingCondLCSSAPhi = nullptr;
+ for (PHINode &PN : L.getHeader()->phis()) {
+ // Create LCSSA phi node in preheader of post-loop.
+ PHINode *LCSSAPhi =
+ Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+ LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+ // If the exiting block is loop latch, the phi does not have the update at
+ // last iteration. In this case, update lcssa phi with value from backedge.
+ LCSSAPhi->addIncoming(
+ isExitingLatch ? PN.getIncomingValueForBlock(L.getLoopLatch()) : &PN,
+ L.getExitingBlock());
+
+ // Update the start value of phi node in post-loop with the LCSSA phi node.
+ PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
+ PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader, LCSSAPhi);
+
+ // Find PHI with exiting condition from pre-loop. The PHI should be
+ // SCEVAddRecExpr and have same incoming value from backedge with
+ // ExitingCond.
+ if (!SE.isSCEVable(PN.getType()))
+ continue;
+
+ const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+ if (PhiSCEV && ExitingCond.NonPHIAddRecValue ==
+ PN.getIncomingValueForBlock(L.getLoopLatch()))
+ ExitingCondLCSSAPhi = LCSSAPhi;
+ }
+
+ // Add conditional branch to check we can skip post-loop in its preheader.
Instruction *OrigBI = PostLoopPreHeader->getTerminator();
ICmpInst::Predicate Pred = ICmpInst::ICMP_NE;
Value *Cond =
- Builder.CreateICmp(Pred, ExitingCond.AddRecValue, ExitingCond.BoundValue);
+ Builder.CreateICmp(Pred, ExitingCondLCSSAPhi, ExitingCond.BoundValue);
Builder.CreateCondBr(Cond, PostLoop->getHeader(), PostLoop->getExitBlock());
OrigBI->eraseFromParent();
@@ -368,21 +422,6 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
// Replace exiting bound value of pre-loop NewBound.
ExitingCond.ICmp->setOperand(1, NewBoundValue);
- // Replace IV's start value of post-loop by NewBound.
- for (PHINode &PN : L.getHeader()->phis()) {
- // Find PHI with exiting condition from pre-loop.
- if (SE.isSCEVable(PN.getType()) && isa<SCEVAddRecExpr>(SE.getSCEV(&PN))) {
- for (Value *Op : PN.incoming_values()) {
- if (Op == ExitingCond.AddRecValue) {
- // Find cloned PHI for post-loop.
- PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
- PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader,
- NewBoundValue);
- }
- }
- }
- }
-
// Replace SplitCandidateCond.BI's condition of pre-loop by True.
LLVMContext &Context = PreHeader->getContext();
SplitCandidateCond.BI->setCondition(ConstantInt::getTrue(Context));
@@ -398,6 +437,30 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
else
ExitingCond.BI->setSuccessor(1, PostLoopPreHeader);
+ // Update phi node in exit block of post-loop.
+ Builder.SetInsertPoint(&PostLoopPreHeader->front());
+ for (PHINode &PN : PostLoop->getExitBlock()->phis()) {
+ for (auto i : seq<int>(0, PN.getNumOperands())) {
+ // Check incoming block is pre-loop's exiting block.
+ if (PN.getIncomingBlock(i) == L.getExitingBlock()) {
+ Value *IncomingValue = PN.getIncomingValue(i);
+
+ // Create LCSSA phi node for incoming value.
+ PHINode *LCSSAPhi =
+ Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+ LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+ LCSSAPhi->addIncoming(IncomingValue, PN.getIncomingBlock(i));
+
+ // Replace pre-loop's exiting block by post-loop's preheader.
+ PN.setIncomingBlock(i, PostLoopPreHeader);
+ // Replace incoming value by LCSSAPhi.
+ PN.setIncomingValue(i, LCSSAPhi);
+ // Add a new incoming value with post-loop's exiting block.
+ PN.addIncoming(VMap[IncomingValue], PostLoop->getExitingBlock());
+ }
+ }
+ }
+
// Update dominator tree.
DT.changeImmediateDominator(PostLoopPreHeader, L.getExitingBlock());
DT.changeImmediateDominator(PostLoop->getExitBlock(), PostLoopPreHeader);
@@ -406,10 +469,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
SE.forgetLoop(&L);
// Canonicalize loops.
- // TODO: Try to update LCSSA information according to above change.
- formLCSSA(L, DT, &LI, &SE);
simplifyLoop(&L, &DT, &LI, &SE, nullptr, nullptr, true);
- formLCSSA(*PostLoop, DT, &LI, &SE);
simplifyLoop(PostLoop, &DT, &LI, &SE, nullptr, nullptr, true);
// Add new post-loop to loop pass manager.
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index a5d7835bd094..77d76609c926 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -127,6 +128,8 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
@@ -143,6 +146,7 @@ INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index f7e8442fae81..5814e2f043d5 100644
--- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -36,6 +36,8 @@ using namespace llvm;
#define DEBUG_TYPE "loop-delete"
STATISTIC(NumDeleted, "Number of loops deleted");
+STATISTIC(NumBackedgesBroken,
+ "Number of loops for which we managed to break the backedge");
static cl::opt<bool> EnableSymbolicExecution(
"loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
@@ -191,6 +193,20 @@ getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
Value *RHS =
getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
+ } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
+ Value *LHS =
+ getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
+ Value *RHS =
+ getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
+ FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
+ } else if (auto *Select = dyn_cast<SelectInst>(V)) {
+ Value *Cond =
+ getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
+ if (auto *C = dyn_cast<ConstantInt>(Cond)) {
+ auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
+ : Select->getFalseValue();
+ FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
+ }
}
if (!FirstIterV)
FirstIterV = V;
@@ -314,22 +330,20 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
}
using namespace PatternMatch;
- ICmpInst::Predicate Pred;
- Value *LHS, *RHS;
+ Value *Cond;
BasicBlock *IfTrue, *IfFalse;
auto *Term = BB->getTerminator();
- if (match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ if (match(Term, m_Br(m_Value(Cond),
m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
- if (!LHS->getType()->isIntegerTy()) {
+ auto *ICmp = dyn_cast<ICmpInst>(Cond);
+ if (!ICmp || !ICmp->getType()->isIntegerTy()) {
MarkAllSuccessorsLive(BB);
continue;
}
// Can we prove constant true or false for this condition?
- LHS = getValueOnFirstIteration(LHS, FirstIterValue, SQ);
- RHS = getValueOnFirstIteration(RHS, FirstIterValue, SQ);
- auto *KnownCondition = SimplifyICmpInst(Pred, LHS, RHS, SQ);
- if (!KnownCondition) {
+ auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
+ if (KnownCondition == ICmp) {
// Failed to simplify.
MarkAllSuccessorsLive(BB);
continue;
@@ -393,14 +407,25 @@ breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (!L->getLoopLatch())
return LoopDeletionResult::Unmodified;
- auto *BTC = SE.getBackedgeTakenCount(L);
- if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
- return LoopDeletionResult::Unmodified;
- if (!BTC->isZero() && !canProveExitOnFirstIteration(L, DT, LI))
- return LoopDeletionResult::Unmodified;
+ auto *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+ if (BTC->isZero()) {
+ // SCEV knows this backedge isn't taken!
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ ++NumBackedgesBroken;
+ return LoopDeletionResult::Deleted;
+ }
- breakLoopBackedge(L, DT, SE, LI, MSSA);
- return LoopDeletionResult::Deleted;
+ // If SCEV leaves open the possibility of a zero trip count, see if
+ // symbolically evaluating the first iteration lets us prove the backedge
+ // unreachable.
+ if (isa<SCEVCouldNotCompute>(BTC) || !SE.isKnownNonZero(BTC))
+ if (canProveExitOnFirstIteration(L, DT, LI)) {
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ ++NumBackedgesBroken;
+ return LoopDeletionResult::Deleted;
+ }
+
+ return LoopDeletionResult::Unmodified;
}
/// Remove a loop if it is dead.
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index bac3dc0f3fb9..0f4c767c1e4c 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1057,8 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index f54289f85ef5..965d1575518e 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -27,6 +27,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopFlatten.h"
+
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -49,11 +51,13 @@
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#define DEBUG_TYPE "loop-flatten"
-
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "loop-flatten"
+
+STATISTIC(NumFlattened, "Number of loops flattened");
+
static cl::opt<unsigned> RepeatedInstructionThreshold(
"loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
cl::desc("Limit on the cost of instructions that can be repeated due to "
@@ -90,9 +94,33 @@ struct FlattenInfo {
// Whether this holds the flatten info before or after widening.
bool Widened = false;
+ // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
+ // been applied. This bookkeeping is used so we can skip some checks on these
+ // phi nodes.
+ PHINode *NarrowInnerInductionPHI = nullptr;
+ PHINode *NarrowOuterInductionPHI = nullptr;
+
FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
+
+ bool isNarrowInductionPhi(PHINode *Phi) {
+ // This can't be the narrow phi if we haven't widened the IV first.
+ if (!Widened)
+ return false;
+ return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
+ }
};
+static bool
+setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
+ SmallPtrSetImpl<Instruction *> &IterationInstructions) {
+ TripCount = TC;
+ IterationInstructions.insert(Increment);
+ LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
+ LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
+ LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
+ return true;
+}
+
// Finds the induction variable, increment and trip count for a simple loop that
// we can flatten.
static bool findLoopComponents(
@@ -164,36 +192,68 @@ static bool findLoopComponents(
return false;
}
// The trip count is the RHS of the compare. If this doesn't match the trip
- // count computed by SCEV then this is either because the trip count variable
- // has been widened (then leave the trip count as it is), or because it is a
- // constant and another transformation has changed the compare, e.g.
- // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, then we don't flatten
- // the loop (yet).
- TripCount = Compare->getOperand(1);
+ // count computed by SCEV then this is because the trip count variable
+ // has been widened so the types don't match, or because it is a constant and
+ // another transformation has changed the compare (e.g. icmp ult %inc,
+ // tripcount -> icmp ult %j, tripcount-1), or both.
+ Value *RHS = Compare->getOperand(1);
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
+ return false;
+ }
+ // The use of the Extend=false flag on getTripCountFromExitCount was added
+ // during a refactoring to preserve existing behavior. However, there's
+ // nothing obvious in the surrounding code when handles the overflow case.
+ // FIXME: audit code to establish whether there's a latent bug here.
const SCEV *SCEVTripCount =
- SE->getTripCountFromExitCount(SE->getBackedgeTakenCount(L));
- if (SE->getSCEV(TripCount) != SCEVTripCount) {
- if (!IsWidened) {
- LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
- return false;
- }
- auto TripCountInst = dyn_cast<Instruction>(TripCount);
- if (!TripCountInst) {
- LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
- return false;
+ SE->getTripCountFromExitCount(BackedgeTakenCount, false);
+ const SCEV *SCEVRHS = SE->getSCEV(RHS);
+ if (SCEVRHS == SCEVTripCount)
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+ ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
+ if (ConstantRHS) {
+ const SCEV *BackedgeTCExt = nullptr;
+ if (IsWidened) {
+ const SCEV *SCEVTripCountExt;
+ // Find the extended backedge taken count and extended trip count using
+ // SCEV. One of these should now match the RHS of the compare.
+ BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
+ SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+ if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
}
- if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
- SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
- LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
- return false;
+ // If the RHS of the compare is equal to the backedge taken count we need
+ // to add one to get the trip count.
+ if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
+ ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
+ Value *NewRHS = ConstantInt::get(
+ ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+ return setLoopComponents(NewRHS, TripCount, Increment,
+ IterationInstructions);
}
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
}
- IterationInstructions.insert(Increment);
- LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
- LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
-
- LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
- return true;
+ // If the RHS isn't a constant then check that the reason it doesn't match
+ // the SCEV trip count is because the RHS is a ZExt or SExt instruction
+ // (and take the trip count to be the RHS).
+ if (!IsWidened) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
+ auto *TripCountInst = dyn_cast<Instruction>(RHS);
+ if (!TripCountInst) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
+ if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
+ SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
+ LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
+ return false;
+ }
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
}
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
@@ -221,6 +281,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
// them specially when doing the transformation.
if (&InnerPHI == FI.InnerInductionPHI)
continue;
+ if (FI.isNarrowInductionPhi(&InnerPHI))
+ continue;
// Each inner loop PHI node must have two incoming values/blocks - one
// from the pre-header, and one from the latch.
@@ -266,6 +328,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
}
for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
+ if (FI.isNarrowInductionPhi(&OuterPHI))
+ continue;
if (!SafeOuterPHIs.count(&OuterPHI)) {
LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
return false;
@@ -356,18 +420,25 @@ static bool checkIVUsers(FlattenInfo &FI) {
if (U == FI.InnerIncrement)
continue;
- // After widening the IVs, a trunc instruction might have been introduced, so
- // look through truncs.
+ // After widening the IVs, a trunc instruction might have been introduced,
+ // so look through truncs.
if (isa<TruncInst>(U)) {
if (!U->hasOneUse())
return false;
U = *U->user_begin();
}
+ // If the use is in the compare (which is also the condition of the inner
+ // branch) then the compare has been altered by another transformation e.g
+ // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
+ // a constant. Ignore this use as the compare gets removed later anyway.
+ if (U == FI.InnerBranch->getCondition())
+ continue;
+
LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
- Value *MatchedMul;
- Value *MatchedItCount;
+ Value *MatchedMul = nullptr;
+ Value *MatchedItCount = nullptr;
bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
m_Value(MatchedMul))) &&
match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
@@ -375,11 +446,23 @@ static bool checkIVUsers(FlattenInfo &FI) {
// Matches the same pattern as above, except it also looks for truncs
// on the phi, which can be the result of widening the induction variables.
- bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
- m_Value(MatchedMul))) &&
- match(MatchedMul,
- m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
- m_Value(MatchedItCount)));
+ bool IsAddTrunc =
+ match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
+ m_Value(MatchedItCount)));
+
+ if (!MatchedItCount)
+ return false;
+ // Look through extends if the IV has been widened.
+ if (FI.Widened &&
+ (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
+ assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
+ "Unexpected type mismatch in types after widening");
+ MatchedItCount = isa<SExtInst>(MatchedItCount)
+ ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
+ : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
+ }
if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
LLVM_DEBUG(dbgs() << "Use is optimisable\n");
@@ -451,17 +534,27 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
for (Value *V : FI.LinearIVUses) {
for (Value *U : V->users()) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // The IV is used as the operand of a GEP, and the IV is at least as
- // wide as the address space of the GEP. In this case, the GEP would
- // wrap around the address space before the IV increment wraps, which
- // would be UB.
- if (GEP->isInBounds() &&
- V->getType()->getIntegerBitWidth() >=
- DL.getPointerTypeSizeInBits(GEP->getType())) {
- LLVM_DEBUG(
- dbgs() << "use of linear IV would be UB if overflow occurred: ";
- GEP->dump());
- return OverflowResult::NeverOverflows;
+ for (Value *GEPUser : U->users()) {
+ Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
+ if (!isa<LoadInst>(GEPUserInst) &&
+ !(isa<StoreInst>(GEPUserInst) &&
+ GEP == GEPUserInst->getOperand(1)))
+ continue;
+ if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
+ FI.InnerLoop))
+ continue;
+ // The IV is used as the operand of a GEP which dominates the loop
+ // latch, and the IV is at least as wide as the address space of the
+ // GEP. In this case, the GEP would wrap around the address space
+ // before the IV increment wraps, which would be UB.
+ if (GEP->isInBounds() &&
+ V->getType()->getIntegerBitWidth() >=
+ DL.getPointerTypeSizeInBits(GEP->getType())) {
+ LLVM_DEBUG(
+ dbgs() << "use of linear IV would be UB if overflow occurred: ";
+ GEP->dump());
+ return OverflowResult::NeverOverflows;
+ }
}
}
}
@@ -518,7 +611,7 @@ static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI, LPMUpdater *U) {
Function *F = FI.OuterLoop->getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
{
@@ -574,7 +667,13 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
// deleted, and any information that have about the outer loop invalidated.
SE->forgetLoop(FI.OuterLoop);
SE->forgetLoop(FI.InnerLoop);
+ if (U)
+ U->markLoopAsDeleted(*FI.InnerLoop, FI.InnerLoop->getName());
LI->erase(FI.InnerLoop);
+
+ // Increment statistic value.
+ NumFlattened++;
+
return true;
}
@@ -605,14 +704,11 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
}
SCEVExpander Rewriter(*SE, DL, "loopflatten");
- SmallVector<WideIVInfo, 2> WideIVs;
SmallVector<WeakTrackingVH, 4> DeadInsts;
- WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
- WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
unsigned ElimExt = 0;
unsigned Widened = 0;
- for (const auto &WideIV : WideIVs) {
+ auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool {
PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts,
ElimExt, Widened, true /* HasGuards */,
true /* UsePostIncrementRanges */);
@@ -620,17 +716,35 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return false;
LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
- RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
- }
- // After widening, rediscover all the loop components.
+ Deleted = RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
+ return true;
+ };
+
+ bool Deleted;
+ if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
+ return false;
+ // Add the narrow phi to list, so that it will be adjusted later when the
+ // the transformation is performed.
+ if (!Deleted)
+ FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
+
+ if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
+ return false;
+
assert(Widened && "Widened IV expected");
FI.Widened = true;
+
+ // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
+ FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
+ FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
+
+ // After widening, rediscover all the loop components.
return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
}
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI, LPMUpdater *U) {
LLVM_DEBUG(
dbgs() << "Loop flattening running on outer loop "
<< FI.OuterLoop->getHeader()->getName() << " and inner loop "
@@ -641,12 +755,30 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return false;
// Check if we can widen the induction variables to avoid overflow checks.
- if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-
- // Check if the new iteration variable might overflow. In this case, we
- // need to version the loop, and select the original version at runtime if
- // the iteration space is too large.
+ bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
+
+ // It can happen that after widening of the IV, flattening may not be
+ // possible/happening, e.g. when it is deemed unprofitable. So bail here if
+ // that is the case.
+ // TODO: IV widening without performing the actual flattening transformation
+ // is not ideal. While this codegen change should not matter much, it is an
+ // unnecessary change which is better to avoid. It's unlikely this happens
+ // often, because if it's unprofitibale after widening, it should be
+ // unprofitabe before widening as checked in the first round of checks. But
+ // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
+ // relaxed. Because this is making a code change (the IV widening, but not
+ // the flattening), we return true here.
+ if (FI.Widened && !CanFlatten)
+ return true;
+
+ // If we have widened and can perform the transformation, do that here.
+ if (CanFlatten)
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
+
+ // Otherwise, if we haven't widened the IV, check if the new iteration
+ // variable might overflow. In this case, we need to version the loop, and
+ // select the original version at runtime if the iteration space is too
+ // large.
// TODO: We currently don't version the loop.
OverflowResult OR = checkOverflow(FI, DT, AC);
if (OR == OverflowResult::AlwaysOverflowsHigh ||
@@ -659,18 +791,18 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
}
LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
}
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, TargetTransformInfo *TTI) {
+ AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U) {
bool Changed = false;
for (Loop *InnerLoop : LN.getLoops()) {
auto *OuterLoop = InnerLoop->getParentLoop();
if (!OuterLoop)
continue;
FlattenInfo FI(OuterLoop, InnerLoop);
- Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
}
return Changed;
}
@@ -685,12 +817,12 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
- Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI);
+ Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U);
if (!Changed)
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+ return getLoopPassPreservedAnalyses();
}
namespace {
@@ -735,7 +867,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
bool Changed = false;
for (Loop *L : *LI) {
auto LN = LoopNest::getLoopNest(*L, *SE);
- Changed |= Flatten(*LN, DT, LI, SE, AC, TTI);
+ Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3d60e205b002..42da86a9ecf5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -217,15 +217,15 @@ private:
bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
MaybeAlign StoreAlignment, Value *StoredVal,
Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores,
const SCEVAddRecExpr *Ev, const SCEV *BECount,
- bool NegStride, bool IsLoopMemset = false);
+ bool IsNegStride, bool IsLoopMemset = false);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
- unsigned StoreSize, MaybeAlign StoreAlign,
+ const SCEV *StoreSize, MaybeAlign StoreAlign,
MaybeAlign LoadAlign, Instruction *TheStore,
Instruction *TheLoad,
const SCEVAddRecExpr *StoreEv,
@@ -625,8 +625,8 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(BB, ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(BB, ExitBlock))
return false;
bool MadeChange = false;
@@ -750,16 +750,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
bool Changed = false;
// For stores that start but don't end a link in the chain:
- for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
- it != e; ++it) {
- if (Tails.count(*it))
+ for (StoreInst *I : Heads) {
+ if (Tails.count(I))
continue;
// We found a store instr that starts a chain. Now follow the chain and try
// to transform it.
SmallPtrSet<Instruction *, 8> AdjacentStores;
- StoreInst *I = *it;
-
StoreInst *HeadStore = I;
unsigned StoreSize = 0;
@@ -784,12 +781,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
if (StoreSize != Stride && StoreSize != -Stride)
continue;
- bool NegStride = StoreSize == -Stride;
+ bool IsNegStride = StoreSize == -Stride;
- if (processLoopStridedStore(StorePtr, StoreSize,
+ Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
+ const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
+ if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
MaybeAlign(HeadStore->getAlignment()),
StoredVal, HeadStore, AdjacentStores, StoreEv,
- BECount, NegStride)) {
+ BECount, IsNegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
Changed = true;
}
@@ -857,15 +856,15 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
// Check if the stride matches the size of the memcpy. If so, then we know
// that every byte is touched in the loop.
- const SCEVConstant *StoreStride =
+ const SCEVConstant *ConstStoreStride =
dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- const SCEVConstant *LoadStride =
+ const SCEVConstant *ConstLoadStride =
dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
- if (!StoreStride || !LoadStride)
+ if (!ConstStoreStride || !ConstLoadStride)
return false;
- APInt StoreStrideValue = StoreStride->getAPInt();
- APInt LoadStrideValue = LoadStride->getAPInt();
+ APInt StoreStrideValue = ConstStoreStride->getAPInt();
+ APInt LoadStrideValue = ConstLoadStride->getAPInt();
// Huge stride value - give up
if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
return false;
@@ -875,7 +874,7 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
<< ore::NV("Inst", "memcpy") << " in "
<< ore::NV("Function", MCI->getFunction())
- << " function will not be hoised: "
+ << " function will not be hoisted: "
<< ore::NV("Reason", "memcpy size is not equal to stride");
});
return false;
@@ -887,16 +886,17 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
if (StoreStrideInt != LoadStrideInt)
return false;
- return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
- MCI->getDestAlign(), MCI->getSourceAlign(),
- MCI, MCI, StoreEv, LoadEv, BECount);
+ return processLoopStoreOfLoopLoad(
+ Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
+ MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
+ BECount);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
const SCEV *BECount) {
- // We can only handle non-volatile memsets with a constant size.
- if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ // We can only handle non-volatile memsets.
+ if (MSI->isVolatile())
return false;
// If we're not allowed to hack on memset, we fail.
@@ -909,23 +909,72 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop)
return false;
-
- // Reject memsets that are so large that they overflow an unsigned.
- uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- if ((SizeInBytes >> 32) != 0)
+ if (!Ev->isAffine()) {
+ LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
return false;
+ }
- // Check to see if the stride matches the size of the memset. If so, then we
- // know that every byte is touched in the loop.
- const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
- if (!ConstStride)
+ const SCEV *PointerStrideSCEV = Ev->getOperand(1);
+ const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
+ if (!PointerStrideSCEV || !MemsetSizeSCEV)
return false;
- APInt Stride = ConstStride->getAPInt();
- if (SizeInBytes != Stride && SizeInBytes != -Stride)
- return false;
+ bool IsNegStride = false;
+ const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
+
+ if (IsConstantSize) {
+ // Memset size is constant.
+ // Check if the pointer stride matches the memset size. If so, then
+ // we know that every byte is touched in the loop.
+ LLVM_DEBUG(dbgs() << " memset size is constant\n");
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+ if (!ConstStride)
+ return false;
+
+ APInt Stride = ConstStride->getAPInt();
+ if (SizeInBytes != Stride && SizeInBytes != -Stride)
+ return false;
+
+ IsNegStride = SizeInBytes == -Stride;
+ } else {
+ // Memset size is non-constant.
+ // Check if the pointer stride matches the memset size.
+ // To be conservative, the pass would not promote pointers that aren't in
+ // address space zero. Also, the pass only handles memset length and stride
+ // that are invariant for the top level loop.
+ LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
+ if (Pointer->getType()->getPointerAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
+ << "abort\n");
+ return false;
+ }
+ if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
+ LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
+ << "abort\n");
+ return false;
+ }
+
+ // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
+ IsNegStride = PointerStrideSCEV->isNonConstantNegative();
+ const SCEV *PositiveStrideSCEV =
+ IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
+ : PointerStrideSCEV;
+ LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
+ << " PositiveStrideSCEV: " << *PositiveStrideSCEV
+ << "\n");
+
+ if (PositiveStrideSCEV != MemsetSizeSCEV) {
+ // TODO: folding can be done to the SCEVs
+ // The folding is to fold expressions that is covered by the loop guard
+ // at loop entry. After the folding, compare again and proceed
+ // optimization if equal.
+ LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
+ return false;
+ }
+ }
// Verify that the memset value is loop invariant. If not, we can't promote
// the memset.
@@ -935,10 +984,10 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
- bool NegStride = SizeInBytes == -Stride;
- return processLoopStridedStore(
- Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
- SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
+ return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
+ MaybeAlign(MSI->getDestAlignment()),
+ SplatValue, MSI, MSIs, Ev, BECount,
+ IsNegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -946,9 +995,9 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
/// argument specifies what the verboten forms of access are (read or write).
static bool
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
- const SCEV *BECount, unsigned StoreSize,
+ const SCEV *BECount, const SCEV *StoreSizeSCEV,
AliasAnalysis &AA,
- SmallPtrSetImpl<Instruction *> &IgnoredStores) {
+ SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -956,9 +1005,11 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
- if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
+ const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+ if (BECst && ConstSize)
AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
- StoreSize);
+ ConstSize->getValue()->getZExtValue());
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -966,14 +1017,12 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// which will then no-alias a store to &A[100].
MemoryLocation StoreLoc(Ptr, AccessSize);
- for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
- ++BI)
- for (Instruction &I : **BI)
- if (IgnoredStores.count(&I) == 0 &&
+ for (BasicBlock *B : L->blocks())
+ for (Instruction &I : *B)
+ if (!IgnoredInsts.contains(&I) &&
isModOrRefSet(
intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
return true;
-
return false;
}
@@ -981,57 +1030,67 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// we're trying to memset. Therefore, we need to recompute the base pointer,
// which is just Start - BECount*Size.
static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
- Type *IntPtr, unsigned StoreSize,
+ Type *IntPtr, const SCEV *StoreSizeSCEV,
ScalarEvolution *SE) {
const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
- if (StoreSize != 1)
- Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ if (!StoreSizeSCEV->isOne()) {
+ // index = back edge count * store size
+ Index = SE->getMulExpr(Index,
+ SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
SCEV::FlagNUW);
+ }
+ // base pointer = start - index * store size
return SE->getMinusSCEV(Start, Index);
}
-/// Compute the number of bytes as a SCEV from the backedge taken count.
-///
-/// This also maps the SCEV into the provided type and tries to handle the
-/// computation in a way that will fold cleanly.
-static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
- unsigned StoreSize, Loop *CurLoop,
- const DataLayout *DL, ScalarEvolution *SE) {
- const SCEV *NumBytesS;
- // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+/// Compute trip count from the backedge taken count.
+static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr,
+ Loop *CurLoop, const DataLayout *DL,
+ ScalarEvolution *SE) {
+ const SCEV *TripCountS = nullptr;
+ // The # stored bytes is (BECount+1). Expand the trip count out to
// pointer size if it isn't already.
//
// If we're going to need to zero extend the BE count, check if we can add
// one to it prior to zero extending without overflow. Provided this is safe,
// it allows better simplification of the +1.
- if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
- DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
+ if (DL->getTypeSizeInBits(BECount->getType()) <
+ DL->getTypeSizeInBits(IntPtr) &&
SE->isLoopEntryGuardedByCond(
CurLoop, ICmpInst::ICMP_NE, BECount,
SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
- NumBytesS = SE->getZeroExtendExpr(
+ TripCountS = SE->getZeroExtendExpr(
SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
IntPtr);
} else {
- NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
- SE->getOne(IntPtr), SCEV::FlagNUW);
+ TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
+ SE->getOne(IntPtr), SCEV::FlagNUW);
}
- // And scale it based on the store size.
- if (StoreSize != 1) {
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- SCEV::FlagNUW);
- }
- return NumBytesS;
+ return TripCountS;
+}
+
+/// Compute the number of bytes as a SCEV from the backedge taken count.
+///
+/// This also maps the SCEV into the provided type and tries to handle the
+/// computation in a way that will fold cleanly.
+static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
+ const SCEV *StoreSizeSCEV, Loop *CurLoop,
+ const DataLayout *DL, ScalarEvolution *SE) {
+ const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE);
+
+ return SE->getMulExpr(TripCountSCEV,
+ SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
+ SCEV::FlagNUW);
}
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
- Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
+ Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
- const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
+ const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
@@ -1056,8 +1115,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
bool Changed = false;
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
- if (NegStride)
- Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1082,7 +1141,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Changed = true;
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores))
+ StoreSizeSCEV, *AA, Stores))
return Changed;
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
@@ -1091,7 +1150,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, everything looks good, insert the memset.
const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1138,13 +1197,20 @@ bool LoopIdiomRecognize::processLoopStridedStore(
<< "\n");
ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
- NewCall->getDebugLoc(), Preheader)
- << "Transformed loop-strided store in "
- << ore::NV("Function", TheStore->getFunction())
- << " function into a call to "
- << ore::NV("NewFunction", NewCall->getCalledFunction())
- << "() intrinsic";
+ OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
+ NewCall->getDebugLoc(), Preheader);
+ R << "Transformed loop-strided store in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function into a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() intrinsic";
+ if (!Stores.empty())
+ R << ore::setExtraArgs();
+ for (auto *I : Stores) {
+ R << ore::NV("FromBlock", I->getParent()->getName())
+ << ore::NV("ToBlock", Preheader->getName());
+ }
+ return R;
});
// Okay, the memset has been formed. Zap the original store and anything that
@@ -1181,16 +1247,63 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// random load we can't handle.
Value *LoadPtr = LI->getPointerOperand();
const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
- return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize,
+
+ const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
+ return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
SI->getAlign(), LI->getAlign(), SI, LI,
StoreEv, LoadEv, BECount);
}
+class MemmoveVerifier {
+public:
+ explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
+ const DataLayout &DL)
+ : DL(DL), LoadOff(0), StoreOff(0),
+ BP1(llvm::GetPointerBaseWithConstantOffset(
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+ BP2(llvm::GetPointerBaseWithConstantOffset(
+ StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
+ IsSameObject(BP1 == BP2) {}
+
+ bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
+ const Instruction &TheLoad,
+ bool IsMemCpy) const {
+ if (IsMemCpy) {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride.
+ if ((!IsNegStride && LoadOff <= StoreOff) ||
+ (IsNegStride && LoadOff >= StoreOff))
+ return false;
+ } else {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
+ int64_t LoadSize =
+ DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
+ if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ return false;
+ if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
+ (IsNegStride && LoadOff + LoadSize > StoreOff))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ const DataLayout &DL;
+ int64_t LoadOff;
+ int64_t StoreOff;
+ const Value *BP1;
+ const Value *BP2;
+
+public:
+ const bool IsSameObject;
+};
+
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
- Value *DestPtr, Value *SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign,
- MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad,
- const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount) {
+ Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
+ MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
+ Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
// FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
// conservatively bail here, since otherwise we may have to transform
@@ -1213,11 +1326,18 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
APInt Stride = getStoreStride(StoreEv);
- bool NegStride = StoreSize == -Stride;
+ const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+
+ // TODO: Deal with non-constant size; Currently expect constant store size
+ assert(ConstStoreSize && "store size is expected to be a constant");
+
+ int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
+ bool IsNegStride = StoreSize == -Stride;
// Handle negative strided loops.
- if (NegStride)
- StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ StrStart =
+ getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1237,19 +1357,24 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// the return value will read this comment, and leave them alone.
Changed = true;
- SmallPtrSet<Instruction *, 2> Stores;
- Stores.insert(TheStore);
+ SmallPtrSet<Instruction *, 2> IgnoredInsts;
+ IgnoredInsts.insert(TheStore);
bool IsMemCpy = isa<MemCpyInst>(TheStore);
const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
- bool UseMemMove =
+ bool LoopAccessStore =
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores);
- if (UseMemMove) {
- Stores.insert(TheLoad);
+ StoreSizeSCEV, *AA, IgnoredInsts);
+ if (LoopAccessStore) {
+ // For memmove case it's not enough to guarantee that loop doesn't access
+ // TheStore and TheLoad. Additionally we need to make sure that TheStore is
+ // the only user of TheLoad.
+ if (!TheLoad->hasOneUse())
+ return Changed;
+ IgnoredInsts.insert(TheLoad);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
- BECount, StoreSize, *AA, Stores)) {
+ BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
TheStore)
@@ -1260,15 +1385,16 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
});
return Changed;
}
- Stores.erase(TheLoad);
+ IgnoredInsts.erase(TheLoad);
}
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
// Handle negative strided loops.
- if (NegStride)
- LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ LdStart =
+ getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
@@ -1278,42 +1404,40 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
if (IsMemCpy)
- Stores.erase(TheStore);
+ IgnoredInsts.erase(TheStore);
+ MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- if (UseMemMove) {
- // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
- // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
- int64_t LoadOff = 0, StoreOff = 0;
- const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
- LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
- const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
- StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
- int64_t LoadSize =
- DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
- if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ StoreSizeSCEV, *AA, IgnoredInsts)) {
+ if (!IsMemCpy) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
+ TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
return Changed;
- if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
- (NegStride && LoadOff + LoadSize > StoreOff))
+ }
+ // At this point loop may access load only for memcpy in same underlying
+ // object. If that's not the case bail out.
+ if (!Verifier.IsSameObject)
return Changed;
}
+ bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
+ if (UseMemMove)
+ if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
+ IsMemCpy))
+ return Changed;
+
if (avoidLIRForMultiBlockLoop())
return Changed;
// Okay, everything is safe, we can transform this!
const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
@@ -1375,11 +1499,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() intrinsic from " << ore::NV("Inst", InstRemark)
<< " instruction in " << ore::NV("Function", TheStore->getFunction())
- << " function";
+ << " function"
+ << ore::setExtraArgs()
+ << ore::NV("FromBlock", TheStore->getParent()->getName())
+ << ore::NV("ToBlock", Preheader->getName());
});
- // Okay, the memcpy has been formed. Zap the original store and anything that
- // feeds into it.
+ // Okay, a new call to memcpy/memmove has been formed. Zap the original store
+ // and anything that feeds into it.
if (MSSAU)
MSSAU->removeMemoryAccess(TheStore, true);
deleteDeadInstruction(TheStore);
@@ -1544,24 +1671,22 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
CountInst = nullptr;
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
- IterE = LoopEntry->end();
- Iter != IterE; Iter++) {
- Instruction *Inst = &*Iter;
- if (Inst->getOpcode() != Instruction::Add)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
continue;
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
if (!Phi)
continue;
// Check if the result of the instruction is live of the loop.
bool LiveOutLoop = false;
- for (User *U : Inst->users()) {
+ for (User *U : Inst.users()) {
if ((cast<Instruction>(U))->getParent() != LoopEntry) {
LiveOutLoop = true;
break;
@@ -1569,7 +1694,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
}
if (LiveOutLoop) {
- CountInst = Inst;
+ CountInst = &Inst;
CountPhi = Phi;
break;
}
@@ -1670,22 +1795,20 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
// plus "cnt0". Currently it is not optimized.
// This step could be used to detect POPCNT instruction:
// cnt.next = cnt + (x.next & 1)
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
- IterE = LoopEntry->end();
- Iter != IterE; Iter++) {
- Instruction *Inst = &*Iter;
- if (Inst->getOpcode() != Instruction::Add)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
continue;
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
- PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
if (!Phi)
continue;
- CntInst = Inst;
+ CntInst = &Inst;
CntPhi = Phi;
break;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 3153a8721193..b9e63a4bc06f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -105,9 +105,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
continue;
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(I.uses())) {
auto *UserI = cast<Instruction>(U.getUser());
U.set(V);
@@ -195,15 +193,10 @@ public:
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent());
- MemorySSA *MSSA = nullptr;
- Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- }
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MemorySSAUpdater MSSAU(MSSA);
- return simplifyLoopInst(*L, DT, LI, AC, TLI,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ return simplifyLoopInst(*L, DT, LI, AC, TLI, &MSSAU);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -211,10 +204,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesCFG();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 34545f35b3c3..9f605b4ac4ad 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1710,16 +1710,12 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
- for (PHINode &PHI : InnerLoopHeader->phis()) {
- if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
- continue;
- InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
- }
- for (PHINode &PHI : OuterLoopHeader->phis()) {
- if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
- continue;
- OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
- }
+ for (PHINode &PHI : InnerLoopHeader->phis())
+ if (OuterInnerReductions.contains(&PHI))
+ InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
+ for (PHINode &PHI : OuterLoopHeader->phis())
+ if (OuterInnerReductions.contains(&PHI))
+ OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
// Now move the remaining reduction PHIs from outer to inner loop header and
// vice versa. The PHI nodes must be part of a reduction across the inner and
@@ -1767,6 +1763,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
return Changed;
}
+namespace {
/// Main LoopInterchange Pass.
struct LoopInterchangeLegacyPass : public LoopPass {
static char ID;
@@ -1795,6 +1792,7 @@ struct LoopInterchangeLegacyPass : public LoopPass {
return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
}
};
+} // namespace
char LoopInterchangeLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index aaf586173e44..21d59936616b 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -34,7 +34,6 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -109,8 +108,8 @@ struct StoreToLoadForwardingCandidate {
// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
- if (getPtrStride(PSE, LoadPtr, L) != 1 ||
- getPtrStride(PSE, StorePtr, L) != 1)
+ if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 ||
+ getPtrStride(PSE, LoadType, StorePtr, L) != 1)
return false;
auto &DL = Load->getParent()->getModule()->getDataLayout();
@@ -718,15 +717,12 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index f4fce4871331..3df4cfe8e4c1 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -44,6 +45,18 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
return PA;
}
+void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>::printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)>
+ MapClassName2PassName) {
+ for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) {
+ auto *P = LoopPasses[Idx].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ if (Idx + 1 < Size)
+ OS << ",";
+ }
+}
+
// Run both loop passes and loop-nest passes on top-level loop \p L.
PreservedAnalyses
LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
@@ -112,12 +125,6 @@ LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// notify the updater, otherwise U.ParentL might gets outdated and triggers
// assertion failures in addSiblingLoops and addChildLoops.
U.setParentLoop(L.getParentLoop());
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
return PA;
}
@@ -161,17 +168,17 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// notify the updater, otherwise U.ParentL might gets outdated and triggers
// assertion failures in addSiblingLoops and addChildLoops.
U.setParentLoop(L.getParentLoop());
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
return PA;
}
} // namespace llvm
+void FunctionToLoopPassAdaptor::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << (UseMemorySSA ? "loop-mssa(" : "loop(");
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+}
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
@@ -201,6 +208,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
? (&AM.getResult<BlockFrequencyAnalysis>(F))
: nullptr;
+ BranchProbabilityInfo *BPI =
+ UseBranchProbabilityInfo && F.hasProfileData()
+ ? (&AM.getResult<BranchProbabilityAnalysis>(F))
+ : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -209,6 +220,7 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
BFI,
+ BPI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
@@ -285,6 +297,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
else
PI.runAfterPass<Loop>(*Pass, *L, PassPA);
+ if (LAR.MSSA && !PassPA.getChecker<MemorySSAAnalysis>().preserved())
+ report_fatal_error("Loop pass manager using MemorySSA contains a pass "
+ "that does not preserve MemorySSA");
+
#ifndef NDEBUG
// LoopAnalysisResults should always be valid.
// Note that we don't LAR.SE.verify() because that can change observed SE
@@ -325,6 +341,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
PA.preserve<ScalarEvolutionAnalysis>();
if (UseBlockFrequencyInfo && F.hasProfileData())
PA.preserve<BlockFrequencyAnalysis>();
+ if (UseBranchProbabilityInfo && F.hasProfileData())
+ PA.preserve<BranchProbabilityAnalysis>();
if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 4f97641e2027..aa7e79a589f2 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -183,6 +183,8 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
@@ -254,7 +256,7 @@ class LoopPredication {
DominatorTree *DT;
ScalarEvolution *SE;
LoopInfo *LI;
- BranchProbabilityInfo *BPI;
+ MemorySSAUpdater *MSSAU;
Loop *L;
const DataLayout *DL;
@@ -302,16 +304,15 @@ class LoopPredication {
// If the loop always exits through another block in the loop, we should not
// predicate based on the latch check. For example, the latch check can be a
// very coarse grained check and there can be more fine grained exit checks
- // within the loop. We identify such unprofitable loops through BPI.
+ // within the loop.
bool isLoopProfitableToPredicate();
bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
public:
- LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
- ScalarEvolution *SE, LoopInfo *LI,
- BranchProbabilityInfo *BPI)
- : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
+ LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU)
+ : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
bool runOnLoop(Loop *L);
};
@@ -325,6 +326,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
getLoopAnalysisUsage(AU);
+ AU.addPreserved<MemorySSAWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
@@ -333,10 +335,12 @@ public:
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- BranchProbabilityInfo &BPI =
- getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LoopPredication LP(AA, DT, SE, LI, &BPI);
+ LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
return LP.runOnLoop(L);
}
};
@@ -358,16 +362,18 @@ Pass *llvm::createLoopPredicationPass() {
PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function *F = L.getHeader()->getParent();
- // For the new PM, we also can't use BranchProbabilityInfo as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but BPI is not preserved, hence a newly built one is needed.
- BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
- LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+ LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
+ MSSAU ? MSSAU.get() : nullptr);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
Optional<LoopICmp>
@@ -809,7 +815,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = Guard->getOperand(0);
Guard->setOperand(0, AllChecks);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
@@ -835,7 +841,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = BI->getCondition();
BI->setCondition(AllChecks);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
assert(isGuardAsWidenableBranch(BI) &&
"Stopped being a guard after transform?");
@@ -912,7 +918,7 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
bool LoopPredication::isLoopProfitableToPredicate() {
- if (SkipProfitabilityChecks || !BPI)
+ if (SkipProfitabilityChecks)
return true;
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
@@ -934,8 +940,61 @@ bool LoopPredication::isLoopProfitableToPredicate() {
"expected to be an exiting block with 2 succs!");
unsigned LatchBrExitIdx =
LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
+ // We compute branch probabilities without BPI. We do not rely on BPI since
+ // Loop predication is usually run in an LPM and BPI is only preserved
+ // lossily within loop pass managers, while BPI has an inherent notion of
+ // being complete for an entire function.
+
+ // If the latch exits into a deoptimize or an unreachable block, do not
+ // predicate on that latch check.
+ auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
+ if (isa<UnreachableInst>(LatchTerm) ||
+ LatchExitBlock->getTerminatingDeoptimizeCall())
+ return false;
+
+ auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
+ if (!ProfileData || !ProfileData->getOperand(0))
+ return false;
+ if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
+ if (!MDS->getString().equals("branch_weights"))
+ return false;
+ if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
+ return false;
+ return true;
+ };
+ MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
+ // Latch terminator has no valid profile data, so nothing to check
+ // profitability on.
+ if (!IsValidProfileData(LatchProfileData, LatchTerm))
+ return true;
+
+ auto ComputeBranchProbability =
+ [&](const BasicBlock *ExitingBlock,
+ const BasicBlock *ExitBlock) -> BranchProbability {
+ auto *Term = ExitingBlock->getTerminator();
+ MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
+ unsigned NumSucc = Term->getNumSuccessors();
+ if (IsValidProfileData(ProfileData, Term)) {
+ uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
+ for (unsigned i = 0; i < NumSucc; i++) {
+ ConstantInt *CI =
+ mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
+ ProfVal = CI->getValue().getZExtValue();
+ if (Term->getSuccessor(i) == ExitBlock)
+ Numerator += ProfVal;
+ Denominator += ProfVal;
+ }
+ return BranchProbability::getBranchProbability(Numerator, Denominator);
+ } else {
+ assert(LatchBlock != ExitingBlock &&
+ "Latch term should always have profile data!");
+ // No profile data, so we choose the weight as 1/num_of_succ(Src)
+ return BranchProbability::getBranchProbability(1, NumSucc);
+ }
+ };
+
BranchProbability LatchExitProbability =
- BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx);
+ ComputeBranchProbability(LatchBlock, LatchExitBlock);
// Protect against degenerate inputs provided by the user. Providing a value
// less than one, can invert the definition of profitable loop predication.
@@ -948,18 +1007,18 @@ bool LoopPredication::isLoopProfitableToPredicate() {
LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
ScaleFactor = 1.0;
}
- const auto LatchProbabilityThreshold =
- LatchExitProbability * ScaleFactor;
+ const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
for (const auto &ExitEdge : ExitEdges) {
BranchProbability ExitingBlockProbability =
- BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second);
+ ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
// Some exiting edge has higher probability than the latch exiting edge.
// No longer profitable to predicate.
if (ExitingBlockProbability > LatchProbabilityThreshold)
return false;
}
- // Using BPI, we have concluded that the most probable way to exit from the
+
+ // We have concluded that the most probable way to exit from the
// loop is through the latch (or there's no profile information and all
// exits are equally likely).
return true;
@@ -1071,28 +1130,26 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// widen so that we gain ability to analyze it's exit count and perform this
// transform. TODO: It'd be nice to know for sure the exit became
// analyzeable after dropping widenability.
- {
- bool Invalidate = false;
+ bool ChangedLoop = false;
- for (auto *ExitingBB : ExitingBlocks) {
- if (LI->getLoopFor(ExitingBB) != L)
- continue;
+ for (auto *ExitingBB : ExitingBlocks) {
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
- auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
- if (!BI)
- continue;
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
- Use *Cond, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
- L->contains(IfTrueBB)) {
- WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
- Invalidate = true;
- }
+ Use *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
+ L->contains(IfTrueBB)) {
+ WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
+ ChangedLoop = true;
}
- if (Invalidate)
- SE->forgetLoop(L);
}
+ if (ChangedLoop)
+ SE->forgetLoop(L);
// The use of umin(all analyzeable exits) instead of latch is subtle, but
// important for profitability. We may have a loop which hasn't been fully
@@ -1104,18 +1161,24 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
!SE->isLoopInvariant(MinEC, L) ||
!isSafeToExpandAt(MinEC, WidenableBR, *SE))
- return false;
+ return ChangedLoop;
// Subtlety: We need to avoid inserting additional uses of the WC. We know
// that it can only have one transitive use at the moment, and thus moving
// that use to just before the branch and inserting code before it and then
// modifying the operand is legal.
auto *IP = cast<Instruction>(WidenableBR->getCondition());
+ // Here we unconditionally modify the IR, so after this point we should return
+ // only `true`!
IP->moveBefore(WidenableBR);
+ if (MSSAU)
+ if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
+ MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
+ MemorySSA::BeforeTerminator);
Rewriter.setInsertPoint(IP);
IRBuilder<> B(IP);
- bool Changed = false;
+ bool InvalidateLoop = false;
Value *MinECV = nullptr; // lazily generated if needed
for (BasicBlock *ExitingBB : ExitingBlocks) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1172,16 +1235,18 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
Value *OldCond = BI->getCondition();
BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
- Changed = true;
+ InvalidateLoop = true;
}
- if (Changed)
+ if (InvalidateLoop)
// We just mutated a bunch of loop exits changing there exit counts
// widely. We need to force recomputation of the exit counts given these
// changes. Note that all of the inserted exits are never taken, and
// should be removed next time the CFG is modified.
SE->forgetLoop(L);
- return Changed;
+
+ // Always return `true` since we have moved the WidenableBR's condition.
+ return true;
}
bool LoopPredication::runOnLoop(Loop *Loop) {
@@ -1242,5 +1307,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
for (auto *Guard : GuardsAsWidenableBranches)
Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
Changed |= predicateLoopExits(L, Expander);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 6d5b19443c76..5ba137b1c85f 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -99,8 +99,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency)
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
// Lazy BFI and BPI are marked as preserved here so LoopRotate
@@ -121,13 +120,11 @@ public:
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- // Not requiring MemorySSA and getting it only if available will split
- // the loop pass pipeline when LoopRotate is being run first.
- auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAA)
- MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
- }
+ // Not requiring MemorySSA and getting it only if available will split
+ // the loop pass pipeline when LoopRotate is being run first.
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
// Vectorization requires loop-rotation. Use default threshold for loops the
// user explicitly marked for vectorization, even when header duplication is
// disabled.
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index cc6d11220807..a87843d658a9 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -733,13 +733,12 @@ public:
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
+ if (MSSAA && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
bool DeleteCurrentLoop = false;
bool Changed = simplifyLoopCFG(
*L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
@@ -750,10 +749,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addPreserved<MemorySSAWrapperPass>();
AU.addPreserved<DependenceAnalysisWrapperPass>();
getLoopAnalysisUsage(AU);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index a01287f587d7..c9c9e60d0921 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -323,15 +323,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// Traverse preheader's instructions in reverse order becaue if A depends
// on B (A appears after B), A needs to be sinked first before B can be
// sinked.
- for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
- Instruction *I = &*II++;
+ for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
// No need to check for instruction's operands are loop invariant.
- assert(L.hasLoopInvariantOperands(I) &&
+ assert(L.hasLoopInvariantOperands(&I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+ if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
LICMFlags.get()))
continue;
- if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+ if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
MSSAU.get()))
Changed = true;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 5f210380ae5a..a9a2266e1196 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -136,6 +136,12 @@ using namespace llvm;
/// worst cases before LSR burns too much compile time and stack space.
static const unsigned MaxIVUsers = 200;
+/// Limit the size of expression that SCEV-based salvaging will attempt to
+/// translate into a DIExpression.
+/// Choose a maximum size such that debuginfo is not excessively increased and
+/// the salvaging is not too expensive for the compiler.
+static const unsigned MaxSCEVSalvageExpressionSize = 64;
+
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
@@ -689,7 +695,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
const APInt &RA = RC->getAPInt();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
- if (RA.isAllOnesValue()) {
+ if (RA.isAllOnes()) {
if (LHS->getType()->isPointerTy())
return nullptr;
return SE.getMulExpr(LHS, RC);
@@ -2816,9 +2822,7 @@ static const SCEV *getExprBase(const SCEV *S) {
// there's nothing more complex.
// FIXME: not sure if we want to recognize negation.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
- for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
- E(Add->op_begin()); I != E; ++I) {
- const SCEV *SubExpr = *I;
+ for (const SCEV *SubExpr : reverse(Add->operands())) {
if (SubExpr->getSCEVType() == scAddExpr)
return getExprBase(SubExpr);
@@ -3150,7 +3154,7 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3385,7 +3389,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
- if (!isSafeToExpand(S, SE))
+ if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false))
LU.RigidFormula = true;
Formula F;
@@ -3934,6 +3938,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Check each interesting stride.
for (int64_t Factor : Factors) {
+ // Check that Factor can be represented by IntTy
+ if (!ConstantInt::isValueValidForType(IntTy, Factor))
+ continue;
// Check that the multiplication doesn't overflow.
if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
@@ -4082,6 +4089,14 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (DstTy->isPointerTy())
return;
+ // It is invalid to extend a pointer type so exit early if ScaledReg or
+ // any of the BaseRegs are pointers.
+ if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
+ return;
+ if (any_of(Base.BaseRegs,
+ [](const SCEV *S) { return S->getType()->isPointerTy(); }))
+ return;
+
for (Type *SrcTy : Types) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
@@ -5689,23 +5704,6 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
}
-#ifndef NDEBUG
- // All dominating loops must have preheaders, or SCEVExpander may not be able
- // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
- //
- // IVUsers analysis should only create users that are dominated by simple loop
- // headers. Since this loop should dominate all of its users, its user list
- // should be empty if this loop itself is not within a simple loop nest.
- for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
- Rung; Rung = Rung->getIDom()) {
- BasicBlock *BB = Rung->getBlock();
- const Loop *DomLoop = LI.getLoopFor(BB);
- if (DomLoop && DomLoop->getHeader() == BB) {
- assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
- }
- }
-#endif // DEBUG
-
LLVM_DEBUG(dbgs() << "\nLSR on loop ";
L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
dbgs() << ":\n");
@@ -5870,6 +5868,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
+namespace {
struct SCEVDbgValueBuilder {
SCEVDbgValueBuilder() = default;
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
@@ -5906,9 +5905,12 @@ struct SCEVDbgValueBuilder {
pushValue(V);
}
- void pushConst(const SCEVConstant *C) {
+ bool pushConst(const SCEVConstant *C) {
+ if (C->getAPInt().getMinSignedBits() > 64)
+ return false;
Expr.push_back(llvm::dwarf::DW_OP_consts);
Expr.push_back(C->getAPInt().getSExtValue());
+ return true;
}
/// Several SCEV types are sequences of the same arithmetic operator applied
@@ -5947,10 +5949,10 @@ struct SCEVDbgValueBuilder {
bool pushSCEV(const llvm::SCEV *S) {
bool Success = true;
if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
- pushConst(StartInt);
+ Success &= pushConst(StartInt);
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- if(!U->getValue())
+ if (!U->getValue())
return false;
pushValue(U->getValue());
@@ -6033,6 +6035,8 @@ struct SCEVDbgValueBuilder {
/// SCEV constant value is an identity function.
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getAPInt().getMinSignedBits() > 64)
+ return false;
int64_t I = C->getAPInt().getSExtValue();
switch (Op) {
case llvm::dwarf::DW_OP_plus:
@@ -6112,14 +6116,15 @@ struct DVIRecoveryRec {
Metadata *LocationOp;
const llvm::SCEV *SCEV;
};
+} // namespace
-static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
+static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
const SCEVDbgValueBuilder &IterationCount,
ScalarEvolution &SE) {
// LSR may add locations to previously single location-op DVIs which
// are currently not supported.
if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
- return false;
+ return;
// SCEVs for SSA values are most frquently of the form
// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
@@ -6127,45 +6132,70 @@ static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
// SCEVs have not been observed to result in debuginfo-lossy optimisations,
// so its not expected this point will be reached.
if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
<< *CachedDVI.SCEV << '\n');
const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
if (!Rec->isAffine())
- return false;
+ return;
+
+ if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return;
// Initialise a new builder with the iteration count expression. In
// combination with the value's SCEV this enables recovery.
SCEVDbgValueBuilder RecoverValue(IterationCount);
if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
- return true;
}
-static bool
+static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV,
+ int64_t Offset) {
+ assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value.");
+ DbgValueInst *DVI = DVIRec.DVI;
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true);
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n');
+ DVI->setExpression(Expr);
+ llvm::Value *ValIV = dyn_cast<llvm::Value>(&IV);
+ DVI->replaceVariableLocationOp(
+ 0u, llvm::MetadataAsValue::get(DVI->getContext(),
+ llvm::ValueAsMetadata::get(ValIV)));
+ LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: "
+ << *DVIRec.DVI << '\n');
+}
+
+static void
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar,
SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
if (DVIToUpdate.empty())
- return false;
+ return;
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
assert(SCEVInductionVar &&
"Anticipated a SCEV for the post-LSR induction variable");
- bool Changed = false;
if (const SCEVAddRecExpr *IVAddRec =
dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
+ if (!IVAddRec->isAffine())
+ return;
+
+ if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return;
+
+ // The iteration count is required to recover location values.
SCEVDbgValueBuilder IterCountExpr;
IterCountExpr.pushValue(LSRInductionVar);
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
<< '\n');
@@ -6180,20 +6210,34 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
// supported by SCEV salvaging. But, we can attempt a salvage by restoring
// the pre-LSR single-op expression.
if (DVIRec.DVI->hasArgList()) {
+ if (!DVIRec.DVI->getVariableLocationOp(0))
+ continue;
llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType();
DVIRec.DVI->setRawLocation(
llvm::ValueAsMetadata::get(UndefValue::get(Ty)));
DVIRec.DVI->setExpression(DVIRec.Expr);
}
- Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: "
+ << *DVIRec.SCEV << '\n');
+
+ // Create a simple expression if the IV and value to salvage SCEVs
+ // start values differ by only a constant value.
+ if (Optional<APInt> Offset =
+ SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) {
+ if (Offset.getValue().getMinSignedBits() <= 64)
+ RewriteDVIUsingOffset(DVIRec, *LSRInductionVar,
+ Offset.getValue().getSExtValue());
+ } else {
+ RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ }
}
}
- return Changed;
}
/// Identify and cache salvageable DVI locations and expressions along with the
-/// corresponding SCEV(s). Also ensure that the DVI is not deleted before
+/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
+/// cacheing and salvaging.
static void
DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
@@ -6204,10 +6248,24 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
if (!DVI)
continue;
+ if (DVI->isUndef())
+ continue;
+
if (DVI->hasArgList())
continue;
- if (!SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
+ if (!DVI->getVariableLocationOp(0) ||
+ !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
+ continue;
+
+ // SCEVUnknown wraps an llvm::Value, it does not have a start and stride.
+ // Therefore no translation to DIExpression is performed.
+ const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0));
+ if (isa<SCEVUnknown>(S))
+ continue;
+
+ // Avoid wasting resources generating an expression containing undef.
+ if (SE.containsUndefs(S))
continue;
SalvageableDVISCEVs.push_back(
@@ -6223,34 +6281,32 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
/// surviving subsequent transforms.
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
const LSRInstance &LSR) {
- // For now, just pick the first IV generated and inserted. Ideally pick an IV
- // that is unlikely to be optimised away by subsequent transforms.
+
+ auto IsSuitableIV = [&](PHINode *P) {
+ if (!SE.isSCEVable(P->getType()))
+ return false;
+ if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
+ return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
+ return false;
+ };
+
+ // For now, just pick the first IV that was generated and inserted by
+ // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
+ // by subsequent transforms.
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
if (!IV)
continue;
- assert(isa<PHINode>(&*IV) && "Expected PhI node.");
- if (SE.isSCEVable((*IV).getType())) {
- PHINode *Phi = dyn_cast<PHINode>(&*IV);
- LLVM_DEBUG(const llvm::SCEV *S = SE.getSCEV(Phi);
- dbgs() << "scev-salvage: IV : " << *IV << "with SCEV: " << *S
- << "\n");
- return Phi;
- }
- }
+ // There should only be PHI node IVs.
+ PHINode *P = cast<PHINode>(&*IV);
- for (PHINode &Phi : L.getHeader()->phis()) {
- if (!SE.isSCEVable(Phi.getType()))
- continue;
-
- const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi);
- if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV))
- if (!Rec->isAffine())
- continue;
+ if (IsSuitableIV(P))
+ return P;
+ }
- LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi
- << " with SCEV: " << *PhiSCEV << "\n");
- return &Phi;
+ for (PHINode &P : L.getHeader()->phis()) {
+ if (IsSuitableIV(&P))
+ return &P;
}
return nullptr;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 71eb393fcdd7..1ecbb86724e1 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -286,8 +286,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
AssumptionCache &AC, DependenceInfo &DI,
OptimizationRemarkEmitter &ORE, int OptLevel) {
TargetTransformInfo::UnrollingPreferences UP =
- gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
- None, None, None, None, None);
+ gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
+ None, None, None, None, None, None);
TargetTransformInfo::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 49501f324a49..67702520511b 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -184,7 +184,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ OptimizationRemarkEmitter &ORE, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
@@ -214,7 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Override with any target specific settings
- TTI.getUnrollingPreferences(L, SE, UP);
+ TTI.getUnrollingPreferences(L, SE, UP, &ORE);
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
@@ -318,6 +319,16 @@ struct EstimatedUnrollCost {
unsigned RolledDynamicCost;
};
+struct PragmaInfo {
+ PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
+ : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
+ PragmaEnableUnroll(PEU) {}
+ const bool UserUnrollCount;
+ const bool PragmaFullUnroll;
+ const unsigned PragmaCount;
+ const bool PragmaEnableUnroll;
+};
+
} // end anonymous namespace
/// Figure out if the loop is worth full unrolling.
@@ -746,13 +757,132 @@ public:
// Returns loop size estimation for unrolled loop, given the unrolling
// configuration specified by UP.
- uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
+ uint64_t
+ getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
+ const unsigned CountOverwrite = 0) const {
assert(LoopSize >= UP.BEInsns &&
"LoopSize should not be less than BEInsns!");
- return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+ if (CountOverwrite)
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
+ UP.BEInsns;
+ else
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
+ UP.BEInsns;
}
};
+static Optional<unsigned>
+shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
+ const unsigned TripMultiple, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ // Using unroll pragma
+ // 1st priority is unroll count set by "unroll-count" option.
+
+ if (PInfo.UserUnrollCount) {
+ if (UP.AllowRemainder &&
+ UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
+ return (unsigned)UnrollCount;
+ }
+
+ // 2nd priority is unroll count set by pragma.
+ if (PInfo.PragmaCount > 0) {
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
+ UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+ return PInfo.PragmaCount;
+ }
+
+ if (PInfo.PragmaFullUnroll && TripCount != 0) {
+ if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
+ return TripCount;
+ }
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
+
+static Optional<unsigned> shouldFullUnroll(
+ Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
+ ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+ const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+ return FullUnrollTripCount;
+
+ } else {
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+ return FullUnrollTripCount;
+ }
+ }
+ }
+ }
+ return None;
+}
+
+static Optional<unsigned>
+shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ unsigned count = UP.Count;
+ if (TripCount) {
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ count = 0;
+ return count;
+ }
+ if (count == 0)
+ count = TripCount;
+ if (UP.PartialThreshold != NoThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+ while (count != 0 && TripCount % count != 0)
+ count--;
+ if (UP.AllowRemainder && count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ count = UP.DefaultUnrollRuntimeCount;
+ while (count != 0 &&
+ UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count >>= 1;
+ }
+ if (count < 2) {
+ count = 0;
+ }
+ } else {
+ count = TripCount;
+ }
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+
+ return count;
+ }
+
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
@@ -770,7 +900,18 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
+ Optional<unsigned> UnrollFactor;
+
+ const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ const bool PragmaFullUnroll = hasUnrollFullPragma(L);
+ const unsigned PragmaCount = unrollCountPragmaValue(L);
+ const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+ const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
+
+ PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
+ PragmaEnableUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
@@ -782,47 +923,29 @@ bool llvm::computeUnrollCount(
UP.Runtime = false;
return true;
}
-
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
- bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
- if (UserUnrollCount) {
- UP.Count = UnrollCount;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
- return true;
- }
-
// 2nd priority is unroll count set by pragma.
- unsigned PragmaCount = unrollCountPragmaValue(L);
- if (PragmaCount > 0) {
- UP.Count = PragmaCount;
- UP.Runtime = true;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return true;
- }
- bool PragmaFullUnroll = hasUnrollFullPragma(L);
- if (PragmaFullUnroll && TripCount != 0) {
- UP.Count = TripCount;
- if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return false;
- }
+ UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
- bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
- bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
- PragmaEnableUnroll || UserUnrollCount;
-
- if (ExplicitUnroll && TripCount != 0) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
- // value which is larger than the default limits.
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ if (UserUnrollCount || (PragmaCount > 0)) {
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ }
+ UP.Runtime |= (PragmaCount > 0);
+ return ExplicitUnroll;
+ } else {
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+ // value which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
}
// 3rd priority is full unroll count.
@@ -852,71 +975,55 @@ bool llvm::computeUnrollCount(
unsigned FullUnrollTripCount =
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- }
- }
- }
+
+ UnrollFactor =
+ shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
+
+ // if shouldFullUnroll can do the unrolling, some side parameteres should be
+ // set
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+ TripCount = FullUnrollTripCount;
+ TripMultiple = UP.UpperBound ? 1 : TripMultiple;
+ return ExplicitUnroll;
+ } else {
+ UP.Count = FullUnrollTripCount;
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
}
+ // Before starting partial unrolling, set up.partial to true,
+ // if user explicitly asked for unrolling
+ if (TripCount)
+ UP.Partial |= ExplicitUnroll;
+
// 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- if (TripCount) {
- UP.Partial |= ExplicitUnroll;
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- UP.Count = 0;
- return false;
- }
- if (UP.Count == 0)
- UP.Count = TripCount;
+ UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma "
+ "because "
+ "unrolled size is too large.";
+ });
+
if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count =
- (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- while (UP.Count != 0 && TripCount % UP.Count != 0)
- UP.Count--;
- if (UP.AllowRemainder && UP.Count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- UP.Count = UP.DefaultUnrollRuntimeCount;
- while (UP.Count != 0 &&
- UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count >>= 1;
- }
- if (UP.Count < 2) {
+ if (UP.Count == 0) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -926,25 +1033,8 @@ bool llvm::computeUnrollCount(
"pragma "
"because unrolled size is too large.";
});
- UP.Count = 0;
}
- } else {
- UP.Count = TripCount;
}
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- UP.Count != TripCount)
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE,
- "FullUnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll pragma "
- "because "
- "unrolled size is too large.";
- });
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
- << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@@ -981,8 +1071,6 @@ bool llvm::computeUnrollCount(
UP.AllowExpensiveTripCount = true;
}
}
-
- // Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(
@@ -1017,7 +1105,7 @@ bool llvm::computeUnrollCount(
using namespace ore;
- if (PragmaCount > 0 && !UP.AllowRemainder)
+ if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DifferentUnrollCountFromDirected",
@@ -1079,7 +1167,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
+ L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
@@ -1529,3 +1617,25 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
return getLoopPassPreservedAnalyses();
}
+
+void LoopUnrollPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (UnrollOpts.AllowPartial != None)
+ OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
+ if (UnrollOpts.AllowPeeling != None)
+ OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
+ if (UnrollOpts.AllowRuntime != None)
+ OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
+ if (UnrollOpts.AllowUpperBound != None)
+ OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
+ if (UnrollOpts.AllowProfileBasedPeeling != None)
+ OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
+ << "profile-peeling;";
+ if (UnrollOpts.FullUnrollMaxCount != None)
+ OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
+ OS << "O" << UnrollOpts.OptLevel;
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9a854ff80246..76bb5497c2c2 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -232,10 +232,8 @@ namespace {
AU.addPreserved<LazyBranchProbabilityInfoPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
if (HasBranchDivergence)
AU.addRequired<LegacyDivergenceAnalysis>();
getLoopAnalysisUsage(AU);
@@ -539,11 +537,8 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
LPM = &LPMRef;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- assert(DT && "Cannot update MemorySSA without a valid DomTree.");
- }
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
CurrentLoop = L;
Function *F = CurrentLoop->getHeader()->getParent();
@@ -551,19 +546,19 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
if (SanitizeMemory)
SafetyInfo.computeLoopSafetyInfo(L);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
bool Changed = false;
do {
assert(CurrentLoop->isLCSSAForm(*DT));
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
RedoLoop = false;
Changed |= processCurrentLoop();
} while (RedoLoop);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
return Changed;
@@ -1312,8 +1307,7 @@ void LoopUnswitch::splitExitEdges(
for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
BasicBlock *ExitBlock = ExitBlocks[I];
- SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
- pred_end(ExitBlock));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index bd3001988369..186065db327e 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -55,11 +55,17 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
Value *NewValue,
DomTreeUpdater *DTU) {
bool HasDeadBlocks = false;
- SmallSetVector<Instruction *, 8> Worklist;
+ SmallSetVector<Instruction *, 8> UnsimplifiedUsers;
replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr,
- &Worklist);
- for (auto I : Worklist) {
- BranchInst *BI = dyn_cast<BranchInst>(I);
+ &UnsimplifiedUsers);
+ // UnsimplifiedUsers can contain PHI nodes that may be removed when
+ // replacing the branch instructions, so use a value handle worklist
+ // to handle those possibly removed instructions.
+ SmallVector<WeakVH, 8> Worklist(UnsimplifiedUsers.begin(),
+ UnsimplifiedUsers.end());
+
+ for (auto &VH : Worklist) {
+ BranchInst *BI = dyn_cast_or_null<BranchInst>(VH);
if (!BI)
continue;
if (BI->isUnconditional())
diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index ead8082f3036..1c186e9a0488 100644
--- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -357,11 +357,10 @@ static bool lowerExpectIntrinsic(Function &F) {
// Remove llvm.expect intrinsics. Iterate backwards in order
// to process select instructions before the intrinsic gets
// removed.
- for (auto BI = BB.rbegin(), BE = BB.rend(); BI != BE;) {
- Instruction *Inst = &*BI++;
- CallInst *CI = dyn_cast<CallInst>(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(BB))) {
+ CallInst *CI = dyn_cast<CallInst>(&Inst);
if (!CI) {
- if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(&Inst)) {
if (handleBrSelExpect(*SI))
ExpectIntrinsicsHandled++;
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 42c183a6408e..4e4097e13271 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -900,8 +900,7 @@ public:
// UndefedInsts and then check that we in fact remove them.
SmallSet<Instruction *, 16> UndefedInsts;
for (auto *Inst : reverse(ToRemove)) {
- for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
- Use &U = *I++;
+ for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
if (auto *Undefed = dyn_cast<Instruction>(U.getUser()))
UndefedInsts.insert(Undefed);
U.set(UndefValue::get(Inst->getType()));
@@ -981,8 +980,9 @@ public:
Value *EltPtr = createElementPtr(Ptr, EltTy, Builder);
MatrixTy Result;
for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
- Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(I), Stride,
- Shape.getStride(), EltTy, Builder);
+ Value *GEP = computeVectorAddr(
+ EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
+ Stride, Shape.getStride(), EltTy, Builder);
Value *Vector = Builder.CreateAlignedLoad(
VecTy, GEP, getAlignForIndex(I, Stride, EltTy, MAlign),
IsVolatile, "col.load");
@@ -1071,9 +1071,11 @@ public:
auto VType = cast<VectorType>(Ty);
Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
for (auto Vec : enumerate(StoreVal.vectors())) {
- Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(Vec.index()),
- Stride, StoreVal.getStride(),
- VType->getElementType(), Builder);
+ Value *GEP = computeVectorAddr(
+ EltPtr,
+ Builder.getIntN(Stride->getType()->getScalarSizeInBits(),
+ Vec.index()),
+ Stride, StoreVal.getStride(), VType->getElementType(), Builder);
Builder.CreateAlignedStore(Vec.value(), GEP,
getAlignForIndex(Vec.index(), Stride,
VType->getElementType(),
@@ -2261,6 +2263,16 @@ PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
return PreservedAnalyses::all();
}
+void LowerMatrixIntrinsicsPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LowerMatrixIntrinsicsPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Minimal)
+ OS << "minimal";
+ OS << ">";
+}
+
namespace {
class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2e36c50b75fc..67335a45fb58 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -67,9 +66,10 @@ using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
-static cl::opt<bool>
- EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
- cl::desc("Use MemorySSA-backed MemCpyOpt."));
+static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
+ "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
@@ -178,9 +178,9 @@ public:
}
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
- int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
-
- addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
+ TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
+ assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
+ addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
SI->getAlign().value(), SI);
}
@@ -282,13 +282,9 @@ private:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (!EnableMemorySSA)
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addPreserved<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- if (EnableMemorySSA)
- AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -304,7 +300,6 @@ INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
@@ -329,10 +324,7 @@ static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
}
void MemCpyOptPass::eraseInstruction(Instruction *I) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- if (MD)
- MD->removeInstruction(I);
+ MSSAU->removeMemoryAccess(I);
I->eraseFromParent();
}
@@ -371,6 +363,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
+ // We can't track scalable types
+ if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
+ if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
+ return nullptr;
+
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
@@ -389,14 +386,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// memsets.
MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
- if (MSSAU) {
- auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
- if (CurrentAcc) {
- MemInsertPoint = CurrentAcc;
- if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
- LastMemDef = CurrentDef;
- }
+ auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ if (CurrentAcc) {
+ MemInsertPoint = CurrentAcc;
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+ LastMemDef = CurrentDef;
}
// Calls that only access inaccessible memory do not block merging
@@ -426,6 +421,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
break;
+ // We can't track ranges involving scalable types.
+ if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
+ break;
+
// Check to see if this stored value is of the same byte-splattable value.
Value *StoredByte = isBytewiseValue(StoredVal, DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
@@ -494,19 +493,17 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- if (MSSAU) {
- assert(LastMemDef && MemInsertPoint &&
- "Both LastMemDef and MemInsertPoint need to be set");
- auto *NewDef =
- cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
- ? MSSAU->createMemoryAccessBefore(
- AMemSet, LastMemDef, MemInsertPoint)
- : MSSAU->createMemoryAccessAfter(
- AMemSet, LastMemDef, MemInsertPoint));
- MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- LastMemDef = NewDef;
- MemInsertPoint = NewDef;
- }
+ assert(LastMemDef && MemInsertPoint &&
+ "Both LastMemDef and MemInsertPoint need to be set");
+ auto *NewDef =
+ cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(
+ AMemSet, LastMemDef, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(
+ AMemSet, LastMemDef, MemInsertPoint));
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ LastMemDef = NewDef;
+ MemInsertPoint = NewDef;
// Zap all the stores.
for (Instruction *SI : Range.TheStores)
@@ -615,17 +612,15 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// TODO: Simplify this once P will be determined by MSSA, in which case the
// discrepancy can no longer occur.
MemoryUseOrDef *MemInsertPoint = nullptr;
- if (MSSAU) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
- MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
- } else {
- const Instruction *ConstP = P;
- for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
- ++LI->getReverseIterator())) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
- MemInsertPoint = MA;
- break;
- }
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+ } else {
+ const Instruction *ConstP = P;
+ for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+ ++LI->getReverseIterator())) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ MemInsertPoint = MA;
+ break;
}
}
}
@@ -634,12 +629,10 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
for (auto *I : llvm::reverse(ToLift)) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
- if (MSSAU) {
- assert(MemInsertPoint && "Must have found insert point");
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
- MSSAU->moveAfter(MA, MemInsertPoint);
- MemInsertPoint = MA;
- }
+ assert(MemInsertPoint && "Must have found insert point");
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ MSSAU->moveAfter(MA, MemInsertPoint);
+ MemInsertPoint = MA;
}
}
@@ -673,7 +666,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LI->getParent() == SI->getParent()) {
auto *T = LI->getType();
- if (T->isAggregateType()) {
+ // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
+ // the corresponding libcalls are not available.
+ // TODO: We should really distinguish between libcall availability and
+ // our ability to introduce intrinsics.
+ if (T->isAggregateType() &&
+ (EnableMemCpyOptWithoutLibcalls ||
+ (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// We use alias analysis to check if an instruction may store to
@@ -703,9 +702,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (P) {
// If we load from memory that may alias the memory we store to,
// memmove must be used to preserve semantic. If not, memcpy can
- // be used.
+ // be used. Also, if we load from constant memory, memcpy can be used
+ // as the constant memory won't be modified.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
UseMemMove = true;
uint64_t Size = DL.getTypeStoreSize(T);
@@ -724,13 +724,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(SI);
eraseInstruction(LI);
@@ -746,38 +743,21 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
CallInst *C = nullptr;
- if (EnableMemorySSA) {
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
- } else {
- MemDepResult ldep = MD->getDependency(LI);
- if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
- C = dyn_cast<CallInst>(ldep.getInst());
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+ // The load most post-dom the call. Limit to the same block for now.
+ // TODO: Support non-local call-slot optimization?
+ if (LoadClobber->getBlock() == SI->getParent())
+ C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
}
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (EnableMemorySSA) {
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
- C = nullptr;
- } else {
- for (BasicBlock::iterator I = --SI->getIterator(),
- E = C->getIterator();
- I != E; --I) {
- if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
- C = nullptr;
- break;
- }
- }
- }
+ if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(SI)))
+ C = nullptr;
}
if (C) {
@@ -796,6 +776,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
}
+ // The following code creates memset intrinsics out of thin air. Don't do
+ // this if the corresponding libfunc is not available.
+ // TODO: We should really distinguish between libcall availability and
+ // our ability to introduce intrinsics.
+ if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
+ return false;
+
// There are two cases that are interesting for this code to handle: memcpy
// and memset. Right now we only handle memset.
@@ -822,13 +809,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ // The newly inserted memset is immediately overwritten by the original
+ // store, so we do not need to rename uses.
+ auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ M, StoreDef->getDefiningAccess(), StoreDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
eraseInstruction(SI);
NumMemSetInfer++;
@@ -859,7 +845,7 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
- Value *cpySrc, uint64_t cpyLen,
+ Value *cpySrc, TypeSize cpySize,
Align cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
//
@@ -875,6 +861,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// src only holds uninitialized values at the moment of the call, meaning that
// the memcpy can be discarded rather than moved.
+ // We can't optimize scalable types.
+ if (cpySize.isScalable())
+ return false;
+
// Lifetime marks shouldn't be operated on.
if (Function *F = C->getCalledFunction())
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
@@ -893,13 +883,13 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLen < srcSize)
+ if (cpySize < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
- if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+ if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
DL, C, DT))
return false;
@@ -1020,11 +1010,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
}
- // Drop any cached information about the call, because we may have changed
- // its dependence information by changing its parameter.
- if (MD)
- MD->removeInstruction(C);
-
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
@@ -1073,28 +1058,19 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- if (EnableMemorySSA) {
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ // TODO: It would be sufficient to check the MDep source up to the memcpy
+ // size of M, rather than MDep.
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ return false;
// If the dest of the second might alias the source of the first, then the
- // source and dest might overlap. We still want to eliminate the intermediate
- // value, but we have to generate a memmove instead of memcpy.
+ // source and dest might overlap. In addition, if the source of the first
+ // points to constant memory, they won't overlap by definition. Otherwise, we
+ // still want to eliminate the intermediate value, but we have to generate a
+ // memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(MDep)))
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -1121,12 +1097,10 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
- auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
// Remove the instruction we're replacing.
eraseInstruction(M);
@@ -1156,30 +1130,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// Check that src and dst of the memcpy aren't the same. While memcpy
// operands cannot partially overlap, exact equality is allowed.
- if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
- LocationSize::precise(1)),
- MemoryLocation(MemCpy->getDest(),
- LocationSize::precise(1))))
+ if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
return false;
- if (EnableMemorySSA) {
- // We know that dst up to src_size is not written. We now need to make sure
- // that dst up to dst_size is not accessed. (If we did not move the memset,
- // checking for reads would be sufficient.)
- if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
- MSSA->getMemoryAccess(MemSet),
- MSSA->getMemoryAccess(MemCpy))) {
- return false;
- }
- } else {
- // We have already checked that dst up to src_size is not accessed. We
- // need to make sure that there are no accesses up to dst_size either.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
- MemCpy->getParent());
- if (DstDepInfo.getInst() != MemSet)
- return false;
- }
+ // We know that dst up to src_size is not written. We now need to make sure
+ // that dst up to dst_size is not accessed. (If we did not move the memset,
+ // checking for reads would be sufficient.)
+ if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+ MSSA->getMemoryAccess(MemSet),
+ MSSA->getMemoryAccess(MemCpy)))
+ return false;
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
@@ -1229,18 +1189,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
SrcSize),
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
- "MemCpy must be a MemoryDef");
- // The new memset is inserted after the memcpy, but it is known that its
- // defining access is the memset about to be removed which immediately
- // precedes the memcpy.
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, LastDef->getDefiningAccess(), LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ "MemCpy must be a MemoryDef");
+ // The new memset is inserted after the memcpy, but it is known that its
+ // defining access is the memset about to be removed which immediately
+ // precedes the memcpy.
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(MemSet);
return true;
@@ -1248,23 +1206,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
/// Determine whether the instruction has undefined content for the given Size,
/// either because it was freshly alloca'd or started its lifetime.
-static bool hasUndefContents(Instruction *I, Value *Size) {
- if (isa<AllocaInst>(I))
- return true;
-
- if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- if (LTSize->getZExtValue() >= CSize->getZExtValue())
- return true;
- }
-
- return false;
-}
-
-static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
- MemoryDef *Def, Value *Size) {
+static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+ MemoryDef *Def, Value *Size) {
if (MSSA->isLiveOnEntryDef(Def))
return isa<AllocaInst>(getUnderlyingObject(V));
@@ -1338,19 +1281,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
bool CanReduceSize = false;
- if (EnableMemorySSA) {
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSetAccess->getDefiningAccess(), MemCpyLoc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
- CanReduceSize = true;
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
CanReduceSize = true;
- }
if (!CanReduceSize)
return false;
@@ -1362,12 +1298,10 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
Instruction *NewM =
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
CopySize, MaybeAlign(MemCpy->getDestAlignment()));
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
return true;
}
@@ -1397,149 +1331,90 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
Instruction *NewM =
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
MaybeAlign(M->getDestAlignment()), false);
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(M);
++NumCpyToSet;
return true;
}
- if (EnableMemorySSA) {
- MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
- MemoryLocation DestLoc = MemoryLocation::getForDest(M);
- const MemoryAccess *DestClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- // The memcpy most post-dom the memset, so limit this to the same basic
- // block. A non-local generalization is likely not worthwhile.
- if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
- if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
- if (DestClobber->getBlock() == M->getParent())
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
-
- MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
- AnyClobber, MemoryLocation::getForSource(M));
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
- if (Instruction *MI = MD->getMemoryInst()) {
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for
- // now. Additionally, we need to ensure that there are no accesses
- // to dest between the call and the memcpy. Accesses to src will be
- // checked by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment,
- C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
- }
- if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep);
- if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- }
- }
-
- if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
- LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- } else {
- MemDepResult DepInfo = MD->getDependency(M);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- if (DepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+ const MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ // The memcpy most post-dom the memset, so limit this to the same basic
+ // block. A non-local generalization is likely not worthwhile.
+ if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+ if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+ if (DestClobber->getBlock() == M->getParent())
if (processMemSetMemCpyDependence(M, MDep))
return true;
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
+ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ AnyClobber, MemoryLocation::getForSource(M));
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+ if (Instruction *MI = MD->getMemoryInst()) {
+ if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for
+ // now. Additionally, we need to ensure that there are no accesses
+ // to dest between the call and the memcpy. Accesses to src will be
+ // checked by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
}
}
- }
-
- MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
- SrcLoc, true, M->getIterator(), M->getParent());
-
- if (SrcDepInfo.isClobber()) {
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ if (auto *MDep = dyn_cast<MemCpyInst>(MI))
return processMemCpyMemCpyDependence(M, MDep);
- } else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
-
- if (SrcDepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep)) {
+ LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
eraseInstruction(M);
++NumCpyToSet;
return true;
}
+ }
+ }
+
+ if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
+ LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
return false;
@@ -1548,12 +1423,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
- if (!TLI->has(LibFunc_memmove))
- return false;
-
- // See if the pointers alias.
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(M)))
+ // See if the source could be modified by this memmove potentially.
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
return false;
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1569,11 +1440,6 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
// For MemorySSA nothing really changes (except that memcpy may imply stricter
// aliasing guarantees).
- // MemDep may have over conservative information about this instruction, just
- // conservatively flush it from the cache.
- if (MD)
- MD->removeInstruction(M);
-
++NumMoveToCpy;
return true;
}
@@ -1584,24 +1450,16 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// Find out what feeds this byval argument.
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = CB.getParamByValType(ArgNo);
- uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
+ TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ if (!CallAccess)
+ return false;
MemCpyInst *MDep = nullptr;
- if (EnableMemorySSA) {
- MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
- if (!CallAccess)
- return false;
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- CallAccess->getDefiningAccess(), Loc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- Loc, true, CB.getIterator(), CB.getParent());
- if (!DepInfo.isClobber())
- return false;
- MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- }
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
@@ -1612,7 +1470,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (!C1 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || !TypeSize::isKnownGE(
+ TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
@@ -1639,19 +1498,9 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (EnableMemorySSA) {
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false,
- CB.getIterator(), MDep->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ return false;
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1718,47 +1567,33 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
- : AM.getCachedResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
- : AM.getCachedResult<MemorySSAAnalysis>(F);
+ auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange =
- runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
+ bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (MD)
- PA.preserve<MemoryDependenceAnalysis>();
- if (MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
-bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
- TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
- AssumptionCache *AC_, DominatorTree *DT_,
- MemorySSA *MSSA_) {
+bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+ AliasAnalysis *AA_, AssumptionCache *AC_,
+ DominatorTree *DT_, MemorySSA *MSSA_) {
bool MadeChange = false;
- MD = MD_;
TLI = TLI_;
AA = AA_;
AC = AC_;
DT = DT_;
MSSA = MSSA_;
MemorySSAUpdater MSSAU_(MSSA_);
- MSSAU = MSSA_ ? &MSSAU_ : nullptr;
- // If we don't have at least memset and memcpy, there is little point of doing
- // anything here. These are required by a freestanding implementation, so if
- // even they are disabled, there is no point in trying hard.
- if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
- return false;
+ MSSAU = &MSSAU_;
while (true) {
if (!iterateOnFunction(F))
@@ -1766,10 +1601,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
MadeChange = true;
}
- if (MSSA_ && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA_->verifyMemorySSA();
- MD = nullptr;
return MadeChange;
}
@@ -1778,17 +1612,11 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *MDWP = !EnableMemorySSA
- ? &getAnalysis<MemoryDependenceWrapperPass>()
- : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSAWP = EnableMemorySSA
- ? &getAnalysis<MemorySSAWrapperPass>()
- : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
- MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+ return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
}
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index f13f24ad2027..aac0deea5be3 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -66,15 +66,6 @@ namespace {
#define DEBUG_TYPE "mergeicmps"
-// Returns true if the instruction is a simple load or a simple store
-static bool isSimpleLoadOrStore(const Instruction *I) {
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isSimple();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isSimple();
- return false;
-}
-
// A BCE atom "Binary Compare Expression Atom" represents an integer load
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
// at the top.
@@ -154,6 +145,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
return {};
}
Value *const Addr = LoadI->getOperand(0);
+ if (Addr->getType()->getPointerAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
+ return {};
+ }
auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
if (!GEP)
return {};
@@ -234,6 +229,8 @@ class BCECmpBlock {
InstructionSet BlockInsts;
// The block requires splitting.
bool RequireSplit = false;
+ // Original order of this block in the chain.
+ unsigned OrigOrder = 0;
private:
BCECmp Cmp;
@@ -244,14 +241,13 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// If this instruction may clobber the loads and is in middle of the BCE cmp
// block instructions, then bail for now.
if (Inst->mayWriteToMemory()) {
- // Bail if this is not a simple load or store
- if (!isSimpleLoadOrStore(Inst))
- return false;
- // Disallow stores that might alias the BCE operands
- MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
- MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
- if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
- isModSet(AA.getModRefInfo(Inst, RLoc)))
+ auto MayClobber = [&](LoadInst *LI) {
+ // If a potentially clobbering instruction comes before the load,
+ // we can still safely sink the load.
+ return !Inst->comesBefore(LI) &&
+ isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
+ };
+ if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
return false;
}
// Make sure this instruction does not use any of the BCE cmp block
@@ -386,39 +382,83 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
<< Comparison.Rhs().BaseId << " + "
<< Comparison.Rhs().Offset << "\n");
LLVM_DEBUG(dbgs() << "\n");
+ Comparison.OrigOrder = Comparisons.size();
Comparisons.push_back(std::move(Comparison));
}
// A chain of comparisons.
class BCECmpChain {
- public:
- BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis &AA);
-
- int size() const { return Comparisons_.size(); }
+public:
+ using ContiguousBlocks = std::vector<BCECmpBlock>;
-#ifdef MERGEICMPS_DOT_ON
- void dump() const;
-#endif // MERGEICMPS_DOT_ON
+ BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis &AA);
bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
DomTreeUpdater &DTU);
-private:
- static bool IsContiguous(const BCECmpBlock &First,
- const BCECmpBlock &Second) {
- return First.Lhs().BaseId == Second.Lhs().BaseId &&
- First.Rhs().BaseId == Second.Rhs().BaseId &&
- First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
- First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+ bool atLeastOneMerged() const {
+ return any_of(MergedBlocks_,
+ [](const auto &Blocks) { return Blocks.size() > 1; });
}
+private:
PHINode &Phi_;
- std::vector<BCECmpBlock> Comparisons_;
+ // The list of all blocks in the chain, grouped by contiguity.
+ std::vector<ContiguousBlocks> MergedBlocks_;
// The original entry block (before sorting);
BasicBlock *EntryBlock_;
};
+static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) {
+ return First.Lhs().BaseId == Second.Lhs().BaseId &&
+ First.Rhs().BaseId == Second.Rhs().BaseId &&
+ First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
+ First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+}
+
+static unsigned getMinOrigOrder(const BCECmpChain::ContiguousBlocks &Blocks) {
+ unsigned MinOrigOrder = std::numeric_limits<unsigned>::max();
+ for (const BCECmpBlock &Block : Blocks)
+ MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder);
+ return MinOrigOrder;
+}
+
+/// Given a chain of comparison blocks, groups the blocks into contiguous
+/// ranges that can be merged together into a single comparison.
+static std::vector<BCECmpChain::ContiguousBlocks>
+mergeBlocks(std::vector<BCECmpBlock> &&Blocks) {
+ std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
+
+ // Sort to detect continuous offsets.
+ llvm::sort(Blocks,
+ [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+ return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+ std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+ });
+
+ BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr;
+ for (BCECmpBlock &Block : Blocks) {
+ if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
+ MergedBlocks.emplace_back();
+ LastMergedBlock = &MergedBlocks.back();
+ } else {
+ LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into "
+ << LastMergedBlock->back().BB->getName() << "\n");
+ }
+ LastMergedBlock->push_back(std::move(Block));
+ }
+
+ // While we allow reordering for merging, do not reorder unmerged comparisons.
+ // Doing so may introduce branch on poison.
+ llvm::sort(MergedBlocks, [](const BCECmpChain::ContiguousBlocks &LhsBlocks,
+ const BCECmpChain::ContiguousBlocks &RhsBlocks) {
+ return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
+ });
+
+ return MergedBlocks;
+}
+
BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
AliasAnalysis &AA)
: Phi_(Phi) {
@@ -498,47 +538,9 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
return;
}
EntryBlock_ = Comparisons[0].BB;
- Comparisons_ = std::move(Comparisons);
-#ifdef MERGEICMPS_DOT_ON
- errs() << "BEFORE REORDERING:\n\n";
- dump();
-#endif // MERGEICMPS_DOT_ON
- // Reorder blocks by LHS. We can do that without changing the
- // semantics because we are only accessing dereferencable memory.
- llvm::sort(Comparisons_,
- [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
- return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
- std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
- });
-#ifdef MERGEICMPS_DOT_ON
- errs() << "AFTER REORDERING:\n\n";
- dump();
-#endif // MERGEICMPS_DOT_ON
+ MergedBlocks_ = mergeBlocks(std::move(Comparisons));
}
-#ifdef MERGEICMPS_DOT_ON
-void BCECmpChain::dump() const {
- errs() << "digraph dag {\n";
- errs() << " graph [bgcolor=transparent];\n";
- errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
- errs() << " edge [color=black];\n";
- for (size_t I = 0; I < Comparisons_.size(); ++I) {
- const auto &Comparison = Comparisons_[I];
- errs() << " \"" << I << "\" [label=\"%"
- << Comparison.Lhs().Base()->getName() << " + "
- << Comparison.Lhs().Offset << " == %"
- << Comparison.Rhs().Base()->getName() << " + "
- << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
- << " bytes)\"];\n";
- const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
- if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
- errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
- }
- errs() << " \"Phi\" [label=\"Phi\"];\n";
- errs() << "}\n\n";
-}
-#endif // MERGEICMPS_DOT_ON
-
namespace {
// A class to compute the name of a set of merged basic blocks.
@@ -661,47 +663,18 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
DomTreeUpdater &DTU) {
- assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
- // First pass to check if there is at least one merge. If not, we don't do
- // anything and we keep analysis passes intact.
- const auto AtLeastOneMerged = [this]() {
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
- return true;
- }
- return false;
- };
- if (!AtLeastOneMerged())
- return false;
-
+ assert(atLeastOneMerged() && "simplifying trivial BCECmpChain");
LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
<< EntryBlock_->getName() << "\n");
// Effectively merge blocks. We go in the reverse direction from the phi block
// so that the next block is always available to branch to.
- const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
- BasicBlock *InsertBefore,
- BasicBlock *Next) {
- return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
- InsertBefore, Next, Phi_, TLI, AA, DTU);
- };
- int NumMerged = 1;
+ BasicBlock *InsertBefore = EntryBlock_;
BasicBlock *NextCmpBlock = Phi_.getParent();
- for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
- if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
- LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
- << " into " << Comparisons_[I + 1].BB->getName()
- << "\n");
- ++NumMerged;
- } else {
- NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
- NumMerged = 1;
- }
+ for (const auto &Blocks : reverse(MergedBlocks_)) {
+ InsertBefore = NextCmpBlock = mergeComparisons(
+ Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU);
}
- // Insert the entry block for the new chain before the old entry block.
- // If the old entry block was the function entry, this ensures that the new
- // entry can become the function entry.
- NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
// Replace the original cmp chain with the new cmp chain by pointing all
// predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
@@ -729,13 +702,16 @@ bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
// Delete merged blocks. This also removes incoming values in phi.
SmallVector<BasicBlock *, 16> DeadBlocks;
- for (auto &Cmp : Comparisons_) {
- LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
- DeadBlocks.push_back(Cmp.BB);
+ for (const auto &Blocks : MergedBlocks_) {
+ for (const BCECmpBlock &Block : Blocks) {
+ LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName()
+ << "\n");
+ DeadBlocks.push_back(Block.BB);
+ }
}
DeleteDeadBlocks(DeadBlocks, &DTU);
- Comparisons_.clear();
+ MergedBlocks_.clear();
return true;
}
@@ -835,8 +811,8 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
if (Blocks.empty()) return false;
BCECmpChain CmpChain(Blocks, Phi, AA);
- if (CmpChain.size() < 2) {
- LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
+ if (!CmpChain.atLeastOneMerged()) {
+ LLVM_DEBUG(dbgs() << "skip: nothing merged\n");
return false;
}
@@ -862,9 +838,9 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
bool MadeChange = false;
- for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+ for (BasicBlock &BB : llvm::drop_begin(F)) {
// A Phi operation is always first in a basic block.
- if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+ if (auto *const Phi = dyn_cast<PHINode>(&*BB.begin()))
MadeChange |= processPhi(*Phi, TLI, AA, DTU);
}
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 033fc168a67f..734532a6670c 100644
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -420,3 +420,12 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void MergedLoadStoreMotionPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index ded5caf53b5a..6dca30d9876e 100644
--- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -282,8 +282,12 @@ NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,
m_Value(LHS), m_Value(RHS));
if (match(I, MinMaxMatcher)) {
OrigSCEV = SE->getSCEV(I);
- return dyn_cast_or_null<Instruction>(
- tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS));
+ if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+ tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))
+ return NewMinMax;
+ if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+ tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))
+ return NewMinMax;
}
return nullptr;
}
@@ -596,58 +600,60 @@ Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
Value *LHS, Value *RHS) {
Value *A = nullptr, *B = nullptr;
MaxMinT m_MaxMin(m_Value(A), m_Value(B));
- for (unsigned int i = 0; i < 2; ++i) {
- if (!LHS->hasNUsesOrMore(3) && match(LHS, m_MaxMin)) {
- const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
- const SCEV *RHSExpr = SE->getSCEV(RHS);
- for (unsigned int j = 0; j < 2; ++j) {
- if (j == 0) {
- if (BExpr == RHSExpr)
- continue;
- // Transform 'I = (A op B) op RHS' to 'I = (A op RHS) op B' on the
- // first iteration.
- std::swap(BExpr, RHSExpr);
- } else {
- if (AExpr == RHSExpr)
- continue;
- // Transform 'I = (A op RHS) op B' 'I = (B op RHS) op A' on the second
- // iteration.
- std::swap(AExpr, RHSExpr);
- }
-
- // The optimization is profitable only if LHS can be removed in the end.
- // In other words LHS should be used (directly or indirectly) by I only.
- if (llvm::any_of(LHS->users(), [&](auto *U) {
- return U != I && !(U->hasOneUser() && *U->users().begin() == I);
- }))
- continue;
-
- SCEVExpander Expander(*SE, *DL, "nary-reassociate");
- SmallVector<const SCEV *, 2> Ops1{ BExpr, AExpr };
- const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
- const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
-
- Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
-
- if (!R1MinMax)
- continue;
-
- LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax
- << "\n");
-
- R1Expr = SE->getUnknown(R1MinMax);
- SmallVector<const SCEV *, 2> Ops2{ RHSExpr, R1Expr };
- const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
-
- Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
- NewMinMax->setName(Twine(I->getName()).concat(".nary"));
-
- LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
- << "NARY: Inserting: " << *NewMinMax << "\n");
- return NewMinMax;
- }
- }
- std::swap(LHS, RHS);
+
+ if (LHS->hasNUsesOrMore(3) ||
+ // The optimization is profitable only if LHS can be removed in the end.
+ // In other words LHS should be used (directly or indirectly) by I only.
+ llvm::any_of(LHS->users(),
+ [&](auto *U) {
+ return U != I &&
+ !(U->hasOneUser() && *U->users().begin() == I);
+ }) ||
+ !match(LHS, m_MaxMin))
+ return nullptr;
+
+ auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,
+ const SCEV *BExpr, Value *C,
+ const SCEV *CExpr) -> Value * {
+ SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};
+ const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
+ const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
+
+ Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
+
+ if (!R1MinMax)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");
+
+ SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),
+ SE->getUnknown(R1MinMax)};
+ const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
+
+ SCEVExpander Expander(*SE, *DL, "nary-reassociate");
+ Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
+ NewMinMax->setName(Twine(I->getName()).concat(".nary"));
+
+ LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
+ << "NARY: Inserting: " << *NewMinMax << "\n");
+ return NewMinMax;
+ };
+
+ const SCEV *AExpr = SE->getSCEV(A);
+ const SCEV *BExpr = SE->getSCEV(B);
+ const SCEV *RHSExpr = SE->getSCEV(RHS);
+
+ if (BExpr != RHSExpr) {
+ // Try (A op RHS) op B
+ if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))
+ return NewMinMax;
+ }
+
+ if (AExpr != RHSExpr) {
+ // Try (RHS op B) op A
+ if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))
+ return NewMinMax;
}
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index a137d13c6ea0..91215cd19e2b 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1194,9 +1194,10 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
- } else if (isa<GetElementPtrInst>(I)) {
- Value *V = SimplifyGEPInst(
- E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
+ } else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ Value *V = SimplifyGEPInst(GEPI->getSourceElementType(),
+ ArrayRef<Value *>(E->op_begin(), E->op_end()),
+ GEPI->isInBounds(), SQ);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (AllConstant) {
@@ -1818,7 +1819,7 @@ NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
// See if we know something about the comparison itself, like it is the target
// of an assume.
auto *CmpPI = PredInfo->getPredicateInfoFor(I);
- if (dyn_cast_or_null<PredicateAssume>(CmpPI))
+ if (isa_and_nonnull<PredicateAssume>(CmpPI))
return ExprResult::some(
createConstantExpression(ConstantInt::getTrue(CI->getType())));
@@ -3606,7 +3607,7 @@ void NewGVN::convertClassToDFSOrdered(
// Skip uses in unreachable blocks, as we're going
// to delete them.
- if (ReachableBlocks.count(IBlock) == 0)
+ if (!ReachableBlocks.contains(IBlock))
continue;
DomTreeNode *DomNode = DT->getNode(IBlock);
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 7872c553b412..44027ccd92ca 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -82,7 +82,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
// Add attribute "readnone" so that backend can use a native sqrt instruction
// for this call.
- Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ Call->addFnAttr(Attribute::ReadNone);
// Insert a FP compare instruction and use it as the CurrBB branch condition.
Builder.SetInsertPoint(CurrBBTerm);
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 888edc4d69a8..b0fb8daaba8f 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -140,7 +140,7 @@ XorOpnd::XorOpnd(Value *V) {
// view the operand as "V | 0"
SymbolicPart = V;
- ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
+ ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
isOr = true;
}
@@ -1279,10 +1279,10 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
/// be returned.
static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
- if (ConstOpnd.isNullValue())
+ if (ConstOpnd.isZero())
return nullptr;
- if (ConstOpnd.isAllOnesValue())
+ if (ConstOpnd.isAllOnes())
return Opnd;
Instruction *I = BinaryOperator::CreateAnd(
@@ -1304,7 +1304,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
- if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
+ if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
return false;
if (!Opnd1->getValue()->hasOneUse())
@@ -1361,7 +1361,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3((~C1) ^ C2);
// Do not increase code size!
- if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ if (!C3.isZero() && !C3.isAllOnes()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
@@ -1377,7 +1377,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3 = C1 ^ C2;
// Do not increase code size
- if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ if (!C3.isZero() && !C3.isAllOnes()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
@@ -1468,8 +1468,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
- if (!ConstOpnd.isNullValue() &&
- CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
@@ -1510,7 +1509,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
ValueEntry VE(getRank(O.getValue()), O.getValue());
Ops.push_back(VE);
}
- if (!ConstOpnd.isNullValue()) {
+ if (!ConstOpnd.isZero()) {
Value *C = ConstantInt::get(Ty, ConstOpnd);
ValueEntry VE(getRank(C), C);
Ops.push_back(VE);
@@ -1519,7 +1518,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
if (Sz == 1)
return Ops.back().Op;
if (Sz == 0) {
- assert(ConstOpnd.isNullValue());
+ assert(ConstOpnd.isZero());
return ConstantInt::get(Ty, ConstOpnd);
}
}
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bc0fecc972fc..2d3490b2d29e 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -755,7 +755,7 @@ public:
}
bool operator==(const BDVState &Other) const {
- return OriginalValue == OriginalValue && BaseValue == Other.BaseValue &&
+ return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
Status == Other.Status;
}
@@ -910,7 +910,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
VerifyStates();
LLVM_DEBUG(dbgs() << "States after initialization:\n");
- for (auto Pair : States) {
+ for (const auto &Pair : States) {
LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
@@ -1002,7 +1002,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
VerifyStates();
LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
- for (auto Pair : States) {
+ for (const auto &Pair : States) {
LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
@@ -1163,7 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// llvm::Value of the correct type (and still remain pure).
// This will remove the need to add bitcasts.
assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
- "Sanity -- findBaseOrBDV should be pure!");
+ "findBaseOrBDV should be pure!");
#endif
}
Value *Base = BlockToValue[InBB];
@@ -1377,11 +1377,11 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
return AL;
// Remove the readonly, readnone, and statepoint function attributes.
- AttrBuilder FnAttrs = AL.getFnAttributes();
+ AttrBuilder FnAttrs = AL.getFnAttrs();
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
- for (Attribute A : AL.getFnAttributes()) {
+ for (Attribute A : AL.getFnAttrs()) {
if (isStatepointDirectiveAttr(A))
FnAttrs.remove(A);
}
@@ -1533,9 +1533,8 @@ static StringRef getDeoptLowering(CallBase *Call) {
// FIXME: Calls have a *really* confusing interface around attributes
// with values.
const AttributeList &CSAS = Call->getAttributes();
- if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
- return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
- .getValueAsString();
+ if (CSAS.hasFnAttr(DeoptLowering))
+ return CSAS.getFnAttr(DeoptLowering).getValueAsString();
Function *F = Call->getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
@@ -1801,7 +1800,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
GCResult->setAttributes(
AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
- Call->getAttributes().getRetAttributes()));
+ Call->getAttributes().getRetAttrs()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1855,7 +1854,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
// It receives iterator to the statepoint gc relocates and emits a store to the
// assigned location (via allocaMap) for the each one of them. It adds the
// visited values into the visitedLiveValues set, which we will later use them
-// for sanity checking.
+// for validation checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, AllocaInst *> &AllocaMap,
@@ -2454,7 +2453,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
SmallVectorImpl<CallBase *> &ToUpdate,
DefiningValueMapTy &DVCache) {
#ifndef NDEBUG
- // sanity check the input
+ // Validate the input
std::set<CallBase *> Uniqued;
Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
@@ -2620,9 +2619,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// we just grab that.
llvm::append_range(Live, Info.StatepointToken->gc_args());
#ifndef NDEBUG
- // Do some basic sanity checks on our liveness results before performing
- // relocation. Relocation can and will turn mistakes in liveness results
- // into non-sensical code which is must harder to debug.
+ // Do some basic validation checking on our liveness results before
+ // performing relocation. Relocation can and will turn mistakes in liveness
+ // results into non-sensical code which is must harder to debug.
// TODO: It would be nice to test consistency as well
assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
"statepoint must be reachable or liveness is meaningless");
@@ -2641,7 +2640,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
unique_unsorted(Live);
#ifndef NDEBUG
- // sanity check
+ // Validation check
for (auto *Ptr : Live)
assert(isHandledGCPointerType(Ptr->getType()) &&
"must be a gc pointer type");
@@ -2656,18 +2655,19 @@ template <typename AttrHolder>
static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
unsigned Index) {
AttrBuilder R;
- if (AH.getDereferenceableBytes(Index))
+ AttributeSet AS = AH.getAttributes().getAttributes(Index);
+ if (AS.getDereferenceableBytes())
R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
- AH.getDereferenceableBytes(Index)));
- if (AH.getDereferenceableOrNullBytes(Index))
+ AS.getDereferenceableBytes()));
+ if (AS.getDereferenceableOrNullBytes())
R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
- AH.getDereferenceableOrNullBytes(Index)));
+ AS.getDereferenceableOrNullBytes()));
for (auto Attr : ParamAttrsToStrip)
- if (AH.getAttributes().hasAttribute(Index, Attr))
+ if (AS.hasAttribute(Attr))
R.addAttribute(Attr);
if (!R.empty())
- AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R));
+ AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R));
}
static void stripNonValidAttributesFromPrototype(Function &F) {
@@ -3016,7 +3016,7 @@ static SetVector<Value *> computeKillSet(BasicBlock *BB) {
#ifndef NDEBUG
/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
-/// sanity check for the liveness computation.
+/// validation check for the liveness computation.
static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
Instruction *TI, bool TermOkay = false) {
for (Value *V : Live) {
@@ -3103,7 +3103,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
} // while (!Worklist.empty())
#ifndef NDEBUG
- // Sanity check our output against SSA properties. This helps catch any
+ // Verify our output against SSA properties. This helps catch any
// missing kills during the above iteration.
for (BasicBlock &BB : F)
checkBasicSSA(DT, Data, BB);
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index b09f896d0157..28e00c873361 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -490,17 +490,17 @@ bool llvm::runIPSCCP(
AttrBuilder AttributesToRemove;
AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
- F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
+ F.removeFnAttrs(AttributesToRemove);
for (User *U : F.users()) {
auto *CB = dyn_cast<CallBase>(U);
if (!CB || CB->getCalledFunction() != &F)
continue;
- CB->removeAttributes(AttributeList::FunctionIndex,
- AttributesToRemove);
+ CB->removeFnAttrs(AttributesToRemove);
}
}
+ MadeChanges |= ReplacedPointerArg;
}
SmallPtrSet<Value *, 32> InsertedValues;
@@ -540,14 +540,13 @@ bool llvm::runIPSCCP(
DTU.deleteBB(DeadBB);
for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
- Instruction *Inst = &*BI++;
- if (Solver.getPredicateInfoFor(Inst)) {
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+ if (Solver.getPredicateInfoFor(&Inst)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
Value *Op = II->getOperand(0);
- Inst->replaceAllUsesWith(Op);
- Inst->eraseFromParent();
+ Inst.replaceAllUsesWith(Op);
+ Inst.eraseFromParent();
}
}
}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 5ec01454e5b2..31c8999c3724 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -122,7 +122,7 @@ namespace {
class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
std::string Prefix;
- const Twine getNameWithPrefix(const Twine &Name) const {
+ Twine getNameWithPrefix(const Twine &Name) const {
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
}
@@ -1275,8 +1275,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
// Get the AA tags and alignment to use from one of the loads. It does not
// matter which one we get and if any differ.
- AAMDNodes AATags;
- SomeLoad->getAAMetadata(AATags);
+ AAMDNodes AATags = SomeLoad->getAAMetadata();
Align Alignment = SomeLoad->getAlign();
// Rewrite all loads of the PN to use the new PHI.
@@ -1330,14 +1329,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
/// %V = select i1 %cond, i32 %V1, i32 %V2
///
/// We can do this to a select if its only uses are loads and if the operand
-/// to the select can be loaded unconditionally.
+/// to the select can be loaded unconditionally. If found an intervening bitcast
+/// with a single use of the load, allow the promotion.
static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
const DataLayout &DL = SI.getModule()->getDataLayout();
for (User *U : SI.users()) {
- LoadInst *LI = dyn_cast<LoadInst>(U);
+ LoadInst *LI;
+ BitCastInst *BC = dyn_cast<BitCastInst>(U);
+ if (BC && BC->hasOneUse())
+ LI = dyn_cast<LoadInst>(*BC->user_begin());
+ else
+ LI = dyn_cast<LoadInst>(U);
+
if (!LI || !LI->isSimple())
return false;
@@ -1363,13 +1369,27 @@ static void speculateSelectInstLoads(SelectInst &SI) {
Value *FV = SI.getFalseValue();
// Replace the loads of the select with a select of two loads.
while (!SI.use_empty()) {
- LoadInst *LI = cast<LoadInst>(SI.user_back());
+ LoadInst *LI;
+ BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
+ if (BC) {
+ assert(BC->hasOneUse() && "Bitcast should have a single use.");
+ LI = cast<LoadInst>(BC->user_back());
+ } else {
+ LI = cast<LoadInst>(SI.user_back());
+ }
+
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
- LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+ Value *NewTV =
+ BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
+ : TV;
+ Value *NewFV =
+ BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
+ : FV;
+ LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
LI->getName() + ".sroa.speculate.load.true");
- LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+ LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
@@ -1377,8 +1397,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
TL->setAlignment(LI->getAlign());
FL->setAlignment(LI->getAlign());
- AAMDNodes Tags;
- LI->getAAMetadata(Tags);
+ AAMDNodes Tags = LI->getAAMetadata();
if (Tags) {
TL->setAAMetadata(Tags);
FL->setAAMetadata(Tags);
@@ -1390,6 +1409,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
+ if (BC)
+ BC->eraseFromParent();
}
SI.eraseFromParent();
}
@@ -1462,76 +1483,6 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
-/// Recursively compute indices for a natural GEP.
-///
-/// This is the recursive step for getNaturalGEPWithOffset that walks down the
-/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
- Value *Ptr, Type *Ty, APInt &Offset,
- Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
- if (Offset == 0)
- return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
- NamePrefix);
-
- // We can't recurse through pointer types.
- if (Ty->isPointerTy())
- return nullptr;
-
- // We try to analyze GEPs over vectors here, but note that these GEPs are
- // extremely poorly defined currently. The long-term goal is to remove GEPing
- // over a vector from the IR completely.
- if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
- unsigned ElementSizeInBits =
- DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize();
- if (ElementSizeInBits % 8 != 0) {
- // GEPs over non-multiple of 8 size vector elements are invalid.
- return nullptr;
- }
- APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
- if (NumSkippedElements.ugt(cast<FixedVectorType>(VecTy)->getNumElements()))
- return nullptr;
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, NamePrefix);
- }
-
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
- Type *ElementTy = ArrTy->getElementType();
- APInt ElementSize(Offset.getBitWidth(),
- DL.getTypeAllocSize(ElementTy).getFixedSize());
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
- if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return nullptr;
-
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
- }
-
- StructType *STy = dyn_cast<StructType>(Ty);
- if (!STy)
- return nullptr;
-
- const StructLayout *SL = DL.getStructLayout(STy);
- uint64_t StructOffset = Offset.getZExtValue();
- if (StructOffset >= SL->getSizeInBytes())
- return nullptr;
- unsigned Index = SL->getElementContainingOffset(StructOffset);
- Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
- Type *ElementTy = STy->getElementType(Index);
- if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize()))
- return nullptr; // The offset points into alignment padding.
-
- Indices.push_back(IRB.getInt32(Index));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
-}
-
/// Get a natural GEP from a base pointer to a particular offset and
/// resulting in a particular type.
///
@@ -1556,18 +1507,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
- if (isa<ScalableVectorType>(ElementTy))
+
+ SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
+ if (Offset != 0)
return nullptr;
- APInt ElementSize(Offset.getBitWidth(),
- DL.getTypeAllocSize(ElementTy).getFixedSize());
- if (ElementSize == 0)
- return nullptr; // Zero-length arrays can't help us build a natural GEP.
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
-
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
+
+ for (const APInt &Index : IntIndices)
+ Indices.push_back(IRB.getInt(Index));
+ return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
+ NamePrefix);
}
/// Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1588,6 +1536,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
APInt Offset, Type *PointerTy,
const Twine &NamePrefix) {
+ // Create i8 GEP for opaque pointers.
+ if (Ptr->getType()->isOpaquePointerTy()) {
+ if (Offset != 0)
+ Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
+ NamePrefix + "sroa_idx");
+ return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
+ NamePrefix + "sroa_cast");
+ }
+
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1851,13 +1808,13 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
- } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
- // Disable vector promotion when there are loads or stores of an FCA.
- return false;
} else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
Type *LTy = LI->getType();
+ // Disable vector promotion when there are loads or stores of an FCA.
+ if (LTy->isStructTy())
+ return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(LTy->isIntegerTy());
LTy = SplitIntTy;
@@ -1868,6 +1825,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (SI->isVolatile())
return false;
Type *STy = SI->getValueOperand()->getType();
+ // Disable vector promotion when there are loads or stores of an FCA.
+ if (STy->isStructTy())
+ return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(STy->isIntegerTy());
STy = SplitIntTy;
@@ -2282,7 +2242,7 @@ class llvm::sroa::AllocaSliceRewriter
const DataLayout &DL;
AllocaSlices &AS;
- SROA &Pass;
+ SROAPass &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
@@ -2330,7 +2290,7 @@ class llvm::sroa::AllocaSliceRewriter
IRBuilderTy IRB;
public:
- AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
+ AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
@@ -2510,8 +2470,7 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- AAMDNodes AATags;
- LI.getAAMetadata(AATags);
+ AAMDNodes AATags = LI.getAAMetadata();
unsigned AS = LI.getPointerAddressSpace();
@@ -2675,9 +2634,7 @@ private:
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
-
+ AAMDNodes AATags = SI.getAAMetadata();
Value *V = SI.getValueOperand();
// Strip all inbounds GEPs and pointer casts to try to dig out any root
@@ -2743,7 +2700,9 @@ private:
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
+ return NewSI->getPointerOperand() == &NewAI &&
+ NewSI->getValueOperand()->getType() == NewAllocaTy &&
+ !SI.isVolatile();
}
/// Compute an integer value from splatting an i8 across the given
@@ -2784,8 +2743,7 @@ private:
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getRawDest() == OldPtr);
- AAMDNodes AATags;
- II.getAAMetadata(AATags);
+ AAMDNodes AATags = II.getAAMetadata();
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
@@ -2811,10 +2769,11 @@ private:
if (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)
return false;
+ // Length must be in range for FixedVectorType.
auto *C = cast<ConstantInt>(II.getLength());
- if (C->getBitWidth() > 64)
+ const uint64_t Len = C->getLimitedValue();
+ if (Len > std::numeric_limits<unsigned>::max())
return false;
- const auto Len = C->getZExtValue();
auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
return canConvertValue(DL, SrcTy, AllocaTy) &&
@@ -2912,8 +2871,7 @@ private:
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
- AAMDNodes AATags;
- II.getAAMetadata(AATags);
+ AAMDNodes AATags = II.getAAMetadata();
bool IsDest = &II.getRawDestUse() == OldUse;
assert((IsDest && II.getRawDest() == OldPtr) ||
@@ -3420,9 +3378,7 @@ private:
// We have an aggregate being loaded, split it apart.
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
- AAMDNodes AATags;
- LI.getAAMetadata(AATags);
- LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+ LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
getAdjustedAlignment(&LI, 0), DL);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
@@ -3473,9 +3429,7 @@ private:
// We have an aggregate being stored, split it apart.
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
- StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+ StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
getAdjustedAlignment(&SI, 0), DL);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
Visited.erase(&SI);
@@ -3801,7 +3755,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
/// there all along.
///
/// \returns true if any changes are made.
-bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
// Track the loads and stores which are candidates for pre-splitting here, in
@@ -4281,8 +4235,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- Partition &P) {
+AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4433,7 +4387,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
/// Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
-bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (AS.begin() == AS.end())
return false;
@@ -4604,7 +4558,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
/// Clobber a use with undef, deleting the used value if it becomes dead.
-void SROA::clobberUse(Use &U) {
+void SROAPass::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an undef value.
U = UndefValue::get(OldV->getType());
@@ -4623,7 +4577,7 @@ void SROA::clobberUse(Use &U) {
/// This analyzes the alloca to ensure we can reason about it, builds
/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
-bool SROA::runOnAlloca(AllocaInst &AI) {
+bool SROAPass::runOnAlloca(AllocaInst &AI) {
LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
++NumAllocasAnalyzed;
@@ -4697,7 +4651,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
-bool SROA::deleteDeadInstructions(
+bool SROAPass::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
@@ -4736,7 +4690,7 @@ bool SROA::deleteDeadInstructions(
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
/// This function returns whether any promotion occurred.
-bool SROA::promoteAllocas(Function &F) {
+bool SROAPass::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
@@ -4748,8 +4702,8 @@ bool SROA::promoteAllocas(Function &F) {
return true;
}
-PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
- AssumptionCache &RunAC) {
+PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC) {
LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
DT = &RunDT;
@@ -4803,7 +4757,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
return PA;
}
-PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F));
}
@@ -4814,7 +4768,7 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
/// SROA pass.
class llvm::sroa::SROALegacyPass : public FunctionPass {
/// The SROA implementation.
- SROA Impl;
+ SROAPass Impl;
public:
static char ID;
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index ca288a533f46..1284bae820a4 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -873,13 +873,11 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
auto &DL = F.getParent()->getDataLayout();
while (MadeChange) {
MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end();) {
- BasicBlock *BB = &*I++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL,
+ MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
DTU.hasValue() ? DTU.getPointer() : nullptr);
-
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
break;
@@ -933,7 +931,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
if (II) {
// The scalarization code below does not work for scalable vectors.
if (isa<ScalableVectorType>(II->getType()) ||
- any_of(II->arg_operands(),
+ any_of(II->args(),
[](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
return false;
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 8ef6b69673be..6b7419abe1d1 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -66,6 +66,15 @@ static cl::opt<bool>
namespace {
+BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+ BasicBlock *BB = Itr->getParent();
+ if (isa<PHINode>(Itr))
+ Itr = BB->getFirstInsertionPt();
+ if (Itr != BB->end())
+ Itr = skipDebugIntrinsics(Itr);
+ return Itr;
+}
+
// Used to store the scattered form of a vector.
using ValueVector = SmallVector<Value *, 8>;
@@ -371,10 +380,11 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
return Scatterer(Point->getParent(), Point->getIterator(),
UndefValue::get(V->getType()));
// Put the scattered form of an instruction directly after the
- // instruction.
+ // instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
- return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
- V, &Scattered[V]);
+ return Scatterer(
+ BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
+ &Scattered[V]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
@@ -530,7 +540,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
return false;
unsigned NumElems = cast<FixedVectorType>(VT)->getNumElements();
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
ValueVector ScalarOperands(NumArgs);
SmallVector<Scatterer, 8> Scattered(NumArgs);
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f216956406b6..ffa2f9adb978 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1164,8 +1164,11 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
DL = &F.getParent()->getDataLayout();
bool Changed = false;
for (BasicBlock &B : F) {
- for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
+ if (!DT->isReachableFromEntry(&B))
+ continue;
+
+ for (Instruction &I : llvm::make_early_inc_range(B))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
Changed |= splitGEP(GEP);
// No need to split GEP ConstantExprs because all its indices are constant
// already.
@@ -1258,10 +1261,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
DominatingSubs.clear();
for (const auto Node : depth_first(DT)) {
BasicBlock *BB = Node->getBlock();
- for (auto I = BB->begin(); I != BB->end(); ) {
- Instruction *Cur = &*I++;
- Changed |= reuniteExts(Cur);
- }
+ for (Instruction &I : llvm::make_early_inc_range(*BB))
+ Changed |= reuniteExts(&I);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b9cccc2af309..a27da047bfd3 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -81,6 +81,7 @@ static cl::opt<bool> EnableNonTrivialUnswitch(
static cl::opt<int>
UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
+ cl::ZeroOrMore,
cl::desc("The cost threshold for unswitching a loop."));
static cl::opt<bool> EnableUnswitchCostMultiplier(
@@ -108,6 +109,10 @@ static cl::opt<unsigned>
cl::desc("Max number of memory uses to explore during "
"partial unswitching analysis"),
cl::init(100), cl::Hidden);
+static cl::opt<bool> FreezeLoopUnswitchCond(
+ "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
+ cl::desc("If enabled, the freeze instruction will be added to condition "
+ "of loop unswitch to prevent miscompilation."));
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
@@ -195,15 +200,15 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
-static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
- ArrayRef<Value *> Invariants,
- bool Direction,
- BasicBlock &UnswitchedSucc,
- BasicBlock &NormalSucc) {
+static void buildPartialUnswitchConditionalBranch(
+ BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
+ BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
IRBuilder<> IRB(&BB);
Value *Cond = Direction ? IRB.CreateOr(Invariants) :
IRB.CreateAnd(Invariants);
+ if (InsertFreeze)
+ Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -564,7 +569,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!");
buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
- *UnswitchedBB, *NewPH);
+ *UnswitchedBB, *NewPH, false);
}
// Update the dominator tree with the added edge.
@@ -1587,10 +1592,12 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
BB->eraseFromParent();
}
-static void deleteDeadBlocksFromLoop(Loop &L,
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
- DominatorTree &DT, LoopInfo &LI,
- MemorySSAUpdater *MSSAU) {
+static void
+deleteDeadBlocksFromLoop(Loop &L,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
@@ -1640,6 +1647,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
}) &&
"If the child loop header is dead all blocks in the child loop must "
"be dead as well!");
+ DestroyLoopCB(*ChildL, ChildL->getName());
LI.destroy(ChildL);
return true;
});
@@ -1980,6 +1988,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
ParentL->removeChildLoop(llvm::find(*ParentL, &L));
else
LI.removeLoop(llvm::find(LI, &L));
+ // markLoopAsDeleted for L should be triggered by the caller (it is typically
+ // done by using the UnswitchCB callback).
LI.destroy(&L);
return false;
}
@@ -2019,7 +2029,8 @@ static void unswitchNontrivialInvariants(
SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2117,6 +2128,13 @@ static void unswitchNontrivialInvariants(
SE->forgetTopmostLoop(&L);
}
+ bool InsertFreeze = false;
+ if (FreezeLoopUnswitchCond) {
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
+ }
+
// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
@@ -2191,6 +2209,11 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ if (InsertFreeze) {
+ auto Cond = BI->getCondition();
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
+ BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
+ }
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2205,6 +2228,11 @@ static void unswitchNontrivialInvariants(
else
Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+ if (InsertFreeze) {
+ auto Cond = SI->getCondition();
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
+ SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
+ }
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
// remove and insert one edge in the domtree.
@@ -2285,7 +2313,7 @@ static void unswitchNontrivialInvariants(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
else
buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
- *ClonedPH, *LoopPH);
+ *ClonedPH, *LoopPH, InsertFreeze);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
if (MSSAU) {
@@ -2319,7 +2347,7 @@ static void unswitchNontrivialInvariants(
// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
- deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2364,7 +2392,9 @@ static void unswitchNontrivialInvariants(
ConstantInt *ContinueReplacement =
Direction ? ConstantInt::getFalse(BI->getContext())
: ConstantInt::getTrue(BI->getContext());
- for (Value *Invariant : Invariants)
+ for (Value *Invariant : Invariants) {
+ assert(!isa<Constant>(Invariant) &&
+ "Should not be replacing constant values!");
// Use make_early_inc_range here as set invalidates the iterator.
for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
Instruction *UserI = dyn_cast<Instruction>(U.getUser());
@@ -2379,6 +2409,7 @@ static void unswitchNontrivialInvariants(
DT.dominates(ClonedPH, UserI->getParent()))
U.set(UnswitchedReplacement);
}
+ }
}
// We can change which blocks are exit blocks of all the cloned sibling
@@ -2670,7 +2701,8 @@ static bool unswitchBestCondition(
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
@@ -2720,6 +2752,9 @@ static bool unswitchBestCondition(
Cond = CondNext;
BI->setCondition(Cond);
+ if (isa<Constant>(Cond))
+ continue;
+
if (L.isLoopInvariant(BI->getCondition())) {
UnswitchCandidates.push_back({BI, {BI->getCondition()}});
continue;
@@ -2958,7 +2993,7 @@ static bool unswitchBestCondition(
<< "\n");
unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
ExitBlocks, PartialIVInfo, DT, LI, AC,
- UnswitchCB, SE, MSSAU);
+ UnswitchCB, SE, MSSAU, DestroyLoopCB);
return true;
}
@@ -2988,7 +3023,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3036,7 +3072,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
- if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
+ if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
+ DestroyLoopCB))
return true;
// No other opportunities to unswitch.
@@ -3083,6 +3120,10 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
U.markLoopAsDeleted(L, LoopName);
};
+ auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
+ U.markLoopAsDeleted(L, Name);
+ };
+
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
@@ -3091,7 +3132,8 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
UnswitchCB, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DestroyLoopCB))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
@@ -3107,6 +3149,17 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
return PA;
}
+void SimpleLoopUnswitchPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (NonTrivial ? "" : "no-") << "nontrivial;";
+ OS << (Trivial ? "" : "no-") << "trivial";
+ OS << ">";
+}
+
namespace {
class SimpleLoopUnswitchLegacyPass : public LoopPass {
@@ -3126,10 +3179,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
@@ -3150,12 +3201,8 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- MemorySSA *MSSA = nullptr;
- Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- }
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MemorySSAUpdater MSSAU(MSSA);
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
@@ -3179,14 +3226,17 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LPM.markLoopAsDeleted(*L);
};
- if (MSSA && VerifyMemorySSA)
+ auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
+ LPM.markLoopAsDeleted(L);
+ };
+
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
- bool Changed =
- unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
+ UnswitchCB, SE, &MSSAU, DestroyLoopCB);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
// Historically this pass has had issues with the dominator tree so verify it
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 09d59b0e884a..86d3620c312e 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -224,7 +224,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
UniqueLoopHeaders.end());
+ unsigned IterCnt = 0;
+ (void)IterCnt;
while (LocalChange) {
+ assert(IterCnt++ < 1000 &&
+ "Sanity: iterative simplification didn't converge!");
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
@@ -319,6 +323,21 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
applyCommandLineOverridesToOptions(Options);
}
+void SimplifyCFGPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SimplifyCFGPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
+ OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+ OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
+ << "switch-to-lookup;";
+ OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
+ OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
+ OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts";
+ OS << ">";
+}
+
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index dfa30418ea01..06169a7834f6 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -268,7 +268,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(U)) {
return all_of(DVI->location_ops(), [&NotHoisted](Value *V) {
if (const auto *I = dyn_cast_or_null<Instruction>(V)) {
- if (NotHoisted.count(I) == 0)
+ if (!NotHoisted.contains(I))
return true;
}
return false;
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 20b8b982e14b..b47378808216 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -607,7 +607,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
if (IndexOffset == 1)
return C.Stride;
// Common case 2: if (i' - i) is -1, Bump = -S.
- if (IndexOffset.isAllOnesValue())
+ if (IndexOffset.isAllOnes())
return Builder.CreateNeg(C.Stride);
// Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
@@ -620,7 +620,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
return Builder.CreateShl(ExtendedStride, Exponent);
}
- if ((-IndexOffset).isPowerOf2()) {
+ if (IndexOffset.isNegatedPowerOf2()) {
// If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
ConstantInt *Exponent =
ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 846a9321f53e..3bcf92e28a21 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -262,7 +262,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
// Note that this runs whether we know an alloca has escaped or not. If
// it has, then we can't trust Tracker.AllocaUsers to be accurate.
bool SafeToTail = true;
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
if (isa<Constant>(Arg.getUser()))
continue;
if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
@@ -584,8 +584,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
// call instruction into the newly created temporarily variable.
void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
int OpndIdx) {
- PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
- Type *AggTy = ArgTy->getElementType();
+ Type *AggTy = CI->getParamByValType(OpndIdx);
+ assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
// Get alignment of byVal operand.
@@ -611,8 +611,8 @@ void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
// into the corresponding function argument location.
void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
CallInst *CI, int OpndIdx) {
- PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
- Type *AggTy = ArgTy->getElementType();
+ Type *AggTy = CI->getParamByValType(OpndIdx);
+ assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
// Get alignment of byVal operand.
@@ -667,7 +667,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
createTailRecurseLoopHeader(CI);
// Copy values of ByVal operands into local temporarily variables.
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I))
copyByValueOperandIntoLocalTemp(CI, I);
}
@@ -675,7 +675,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I)) {
copyLocalTempOfByValueOperandIntoArguments(CI, I);
ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 8cd16ca3906f..fdc914a72bfd 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -63,6 +63,9 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
auto Int64Ty = Builder.getInt64Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+ if (!M->getModuleFlag("amdgpu_hostcall")) {
+ M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+ }
return Builder.CreateCall(Fn, Version);
}
diff --git a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 01912297324a..cbc508bb863a 100644
--- a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -33,14 +33,14 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
// We also force minimal alignment for all vars to kMinAlignment so that vars
// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
-static const size_t kMinAlignment = 16;
+static const uint64_t kMinAlignment = 16;
// We want to add a full redzone after every variable.
// The larger the variable Size the larger is the redzone.
// The resulting frame size is a multiple of Alignment.
-static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
- size_t Alignment) {
- size_t Res = 0;
+static uint64_t VarAndRedzoneSize(uint64_t Size, uint64_t Granularity,
+ uint64_t Alignment) {
+ uint64_t Res = 0;
if (Size <= 4) Res = 16;
else if (Size <= 16) Res = 32;
else if (Size <= 128) Res = Size + 32;
@@ -52,7 +52,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
ASanStackFrameLayout
ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
- size_t Granularity, size_t MinHeaderSize) {
+ uint64_t Granularity, uint64_t MinHeaderSize) {
assert(Granularity >= 8 && Granularity <= 64 &&
(Granularity & (Granularity - 1)) == 0);
assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
@@ -67,22 +67,22 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
ASanStackFrameLayout Layout;
Layout.Granularity = Granularity;
Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
- size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
- Vars[0].Alignment);
+ uint64_t Offset =
+ std::max(std::max(MinHeaderSize, Granularity), Vars[0].Alignment);
assert((Offset % Granularity) == 0);
for (size_t i = 0; i < NumVars; i++) {
bool IsLast = i == NumVars - 1;
- size_t Alignment = std::max(Granularity, Vars[i].Alignment);
+ uint64_t Alignment = std::max(Granularity, Vars[i].Alignment);
(void)Alignment; // Used only in asserts.
- size_t Size = Vars[i].Size;
+ uint64_t Size = Vars[i].Size;
assert((Alignment & (Alignment - 1)) == 0);
assert(Layout.FrameAlignment >= Alignment);
assert((Offset % Alignment) == 0);
assert(Size > 0);
- size_t NextAlignment = IsLast ? Granularity
- : std::max(Granularity, Vars[i + 1].Alignment);
- size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity,
- NextAlignment);
+ uint64_t NextAlignment =
+ IsLast ? Granularity : std::max(Granularity, Vars[i + 1].Alignment);
+ uint64_t SizeWithRedzone =
+ VarAndRedzoneSize(Size, Granularity, NextAlignment);
Vars[i].Offset = Offset;
Offset += SizeWithRedzone;
}
@@ -118,7 +118,7 @@ GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
assert(Vars.size() > 0);
SmallVector<uint8_t, 64> SB;
SB.clear();
- const size_t Granularity = Layout.Granularity;
+ const uint64_t Granularity = Layout.Granularity;
SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
for (const auto &Var : Vars) {
SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
@@ -135,13 +135,13 @@ SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
const SmallVectorImpl<ASanStackVariableDescription> &Vars,
const ASanStackFrameLayout &Layout) {
SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
- const size_t Granularity = Layout.Granularity;
+ const uint64_t Granularity = Layout.Granularity;
for (const auto &Var : Vars) {
assert(Var.LifetimeSize <= Var.Size);
- const size_t LifetimeShadowSize =
+ const uint64_t LifetimeShadowSize =
(Var.LifetimeSize + Granularity - 1) / Granularity;
- const size_t Offset = Var.Offset / Granularity;
+ const uint64_t Offset = Var.Offset / Granularity;
std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
kAsanStackUseAfterScopeMagic);
}
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index d689e04da36f..f910f7c3c31f 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -67,7 +67,8 @@ bool isUsefullToPreserve(Attribute::AttrKind Kind) {
/// This function will try to transform the given knowledge into a more
/// canonical one. the canonical knowledge maybe the given one.
-RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, DataLayout DL) {
+RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK,
+ const DataLayout &DL) {
switch (RK.AttrKind) {
default:
return RK;
@@ -103,7 +104,7 @@ struct AssumeBuilderState {
Module *M;
using MapKey = std::pair<Value *, Attribute::AttrKind>;
- SmallMapVector<MapKey, unsigned, 8> AssumedKnowledgeMap;
+ SmallMapVector<MapKey, uint64_t, 8> AssumedKnowledgeMap;
Instruction *InstBeingModified = nullptr;
AssumptionCache* AC = nullptr;
DominatorTree* DT = nullptr;
@@ -196,28 +197,27 @@ struct AssumeBuilderState {
(!ShouldPreserveAllAttributes &&
!isUsefullToPreserve(Attr.getKindAsEnum())))
return;
- unsigned AttrArg = 0;
+ uint64_t AttrArg = 0;
if (Attr.isIntAttribute())
AttrArg = Attr.getValueAsInt();
addKnowledge({Attr.getKindAsEnum(), AttrArg, WasOn});
}
void addCall(const CallBase *Call) {
- auto addAttrList = [&](AttributeList AttrList) {
- for (unsigned Idx = AttributeList::FirstArgIndex;
- Idx < AttrList.getNumAttrSets(); Idx++)
- for (Attribute Attr : AttrList.getAttributes(Idx)) {
+ auto addAttrList = [&](AttributeList AttrList, unsigned NumArgs) {
+ for (unsigned Idx = 0; Idx < NumArgs; Idx++)
+ for (Attribute Attr : AttrList.getParamAttrs(Idx)) {
bool IsPoisonAttr = Attr.hasAttribute(Attribute::NonNull) ||
Attr.hasAttribute(Attribute::Alignment);
- if (!IsPoisonAttr || Call->isPassingUndefUB(Idx - 1))
- addAttribute(Attr, Call->getArgOperand(Idx - 1));
+ if (!IsPoisonAttr || Call->isPassingUndefUB(Idx))
+ addAttribute(Attr, Call->getArgOperand(Idx));
}
- for (Attribute Attr : AttrList.getFnAttributes())
+ for (Attribute Attr : AttrList.getFnAttrs())
addAttribute(Attr, nullptr);
};
- addAttrList(Call->getAttributes());
+ addAttrList(Call->getAttributes(), Call->arg_size());
if (Function *Fn = Call->getCalledFunction())
- addAttrList(Fn->getAttributes());
+ addAttrList(Fn->getAttributes(), Fn->arg_size());
}
AssumeInst *build() {
@@ -261,8 +261,7 @@ struct AssumeBuilderState {
addKnowledge({Attribute::NonNull, 0u, Pointer});
}
if (MA.valueOrOne() > 1)
- addKnowledge(
- {Attribute::Alignment, unsigned(MA.valueOrOne().value()), Pointer});
+ addKnowledge({Attribute::Alignment, MA.valueOrOne().value(), Pointer});
}
void addInstruction(Instruction *I) {
@@ -392,7 +391,7 @@ struct AssumeSimplify {
void dropRedundantKnowledge() {
struct MapValue {
IntrinsicInst *Assume;
- unsigned ArgValue;
+ uint64_t ArgValue;
CallInst::BundleOpInfo *BOI;
};
buildMapping(false);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ee933b638a23..6469c899feea 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -52,6 +53,12 @@ using namespace llvm;
#define DEBUG_TYPE "basicblock-utils"
+static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
+ "max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden,
+ cl::desc("Set the maximum path length when checking whether a basic block "
+ "is followed by a block that either has a terminating "
+ "deoptimizing call or is terminated with an unreachable"));
+
void llvm::DetatchDeadBlocks(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
@@ -230,7 +237,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
if (DTU) {
SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB));
SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
- succ_begin(PredBB));
+ succ_end(PredBB));
Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1);
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
@@ -485,6 +492,20 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BI = New;
}
+bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
+ // Remember visited blocks to avoid infinite loop
+ SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
+ unsigned Depth = 0;
+ while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth &&
+ VisitedBlocks.insert(BB).second) {
+ if (BB->getTerminatingDeoptimizeCall() ||
+ isa<UnreachableInst>(BB->getTerminator()))
+ return true;
+ BB = BB->getUniqueSuccessor();
+ }
+ return false;
+}
+
void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
BasicBlock::iterator BI(From);
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 35e22f7a57e2..957935398972 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -96,9 +96,9 @@ static bool setDoesNotThrow(Function &F) {
}
static bool setRetDoesNotAlias(Function &F) {
- if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias))
+ if (F.hasRetAttribute(Attribute::NoAlias))
return false;
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ F.addRetAttr(Attribute::NoAlias);
++NumNoAlias;
return true;
}
@@ -145,8 +145,8 @@ static bool setSignExtendedArg(Function &F, unsigned ArgNo) {
static bool setRetNoUndef(Function &F) {
if (!F.getReturnType()->isVoidTy() &&
- !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) {
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+ !F.hasRetAttribute(Attribute::NoUndef)) {
+ F.addRetAttr(Attribute::NoUndef);
++NumNoUndef;
return true;
}
@@ -174,7 +174,10 @@ static bool setArgNoUndef(Function &F, unsigned ArgNo) {
}
static bool setRetAndArgsNoUndef(Function &F) {
- return setRetNoUndef(F) | setArgsNoUndef(F);
+ bool UndefAdded = false;
+ UndefAdded |= setRetNoUndef(F);
+ UndefAdded |= setArgsNoUndef(F);
+ return UndefAdded;
}
static bool setReturnedArg(Function &F, unsigned ArgNo) {
@@ -1268,7 +1271,7 @@ Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *I8Ptr = Dst->getType();
return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
{castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
}
@@ -1453,9 +1456,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
// The incoming attribute set may have come from a speculatable intrinsic, but
// is being replaced with a library call which is not allowed to be
// speculatable.
- CI->setAttributes(Attrs.removeAttribute(B.getContext(),
- AttributeList::FunctionIndex,
- Attribute::Speculatable));
+ CI->setAttributes(
+ Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable));
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1498,9 +1500,8 @@ static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
// The incoming attribute set may have come from a speculatable intrinsic, but
// is being replaced with a library call which is not allowed to be
// speculatable.
- CI->setAttributes(Attrs.removeAttribute(B.getContext(),
- AttributeList::FunctionIndex,
- Attribute::Speculatable));
+ CI->setAttributes(
+ Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable));
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1655,8 +1656,8 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
return CI;
}
-Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilderBase &B, const TargetLibraryInfo &TLI) {
+Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo &TLI) {
if (!TLI.has(LibFunc_calloc))
return nullptr;
@@ -1664,8 +1665,8 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
StringRef CallocName = TLI.getName(LibFunc_calloc);
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- FunctionCallee Calloc = M->getOrInsertFunction(
- CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType);
+ FunctionCallee Calloc =
+ M->getOrInsertFunction(CallocName, B.getInt8PtrTy(), PtrType, PtrType);
inferLibFuncAttributes(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 87868251036c..ebe19f1751e5 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -424,6 +424,21 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
*FailureReason = "Argument type mismatch";
return false;
}
+ // Make sure that the callee and call agree on byval/inalloca. The types do
+ // not have to match.
+
+ if (Callee->hasParamAttribute(I, Attribute::ByVal) !=
+ CB.getAttributes().hasParamAttr(I, Attribute::ByVal)) {
+ if (FailureReason)
+ *FailureReason = "byval mismatch";
+ return false;
+ }
+ if (Callee->hasParamAttribute(I, Attribute::InAlloca) !=
+ CB.getAttributes().hasParamAttr(I, Attribute::InAlloca)) {
+ if (FailureReason)
+ *FailureReason = "inalloca mismatch";
+ return false;
+ }
}
for (; I < NumArgs; I++) {
// Vararg functions can have more arguments than parameters.
@@ -485,18 +500,19 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
CB.setArgOperand(ArgNo, Cast);
// Remove any incompatible attributes for the argument.
- AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
+ AttrBuilder ArgAttrs(CallerPAL.getParamAttrs(ArgNo));
ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
- // If byval is used, this must be a pointer type, and the byval type must
- // match the element type. Update it if present.
+ // We may have a different byval/inalloca type.
if (ArgAttrs.getByValType())
ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo));
+ if (ArgAttrs.getInAllocaType())
+ ArgAttrs.addInAllocaAttr(Callee->getParamInAllocaType(ArgNo));
NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
AttributeChanged = true;
} else
- NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo));
+ NewArgAttrs.push_back(CallerPAL.getParamAttrs(ArgNo));
}
// If the return type of the call site doesn't match that of the callee, cast
@@ -511,7 +527,7 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
// Set the new callsite attribute.
if (AttributeChanged)
- CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
+ CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttrs(),
AttributeSet::get(Ctx, RAttrs),
NewArgAttrs));
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 1f649fe6c748..049c7d113521 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -33,7 +33,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 0ac9a5aaa425..048e691e33cf 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -62,7 +62,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
NewBB->getInstList().push_back(NewInst);
VMap[&I] = NewInst; // Add instruction map to value.
- hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I));
+ hasCalls |= (isa<CallInst>(I) && !I.isDebugOrPseudoInst());
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (!AI->isStaticAlloca()) {
hasDynamicAllocas = true;
@@ -116,13 +116,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (const Argument &OldArg : OldFunc->args()) {
if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
NewArgAttrs[NewArg->getArgNo()] =
- OldAttrs.getParamAttributes(OldArg.getArgNo());
+ OldAttrs.getParamAttrs(OldArg.getArgNo());
}
}
NewFunc->setAttributes(
- AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
- OldAttrs.getRetAttributes(), NewArgAttrs));
+ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(),
+ OldAttrs.getRetAttrs(), NewArgAttrs));
// Everything else beyond this point deals with function instructions,
// so if we are dealing with a function declaration, we're done.
@@ -410,7 +410,7 @@ void PruningFunctionCloner::CloneBlock(
NewInst->setName(II->getName() + NameSuffix);
VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
- hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ hasCalls |= (isa<CallInst>(II) && !II->isDebugOrPseudoInst());
if (CodeInfo) {
CodeInfo->OrigVMap[&*II] = NewInst;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 9edc52b53550..96aff563aa9b 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -434,6 +434,7 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
}
// Now add the old exit block to the outline region.
Blocks.insert(CommonExitBlock);
+ OldTargets.push_back(NewExitBlock);
return CommonExitBlock;
}
@@ -885,7 +886,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// "target-features" attribute allowing it to be lowered.
// FIXME: This should be changed to check to see if a specific
// attribute can not be inherited.
- for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) {
+ for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) {
if (Attr.isStringAttribute()) {
if (Attr.getKindAsString() == "thunk")
continue;
@@ -943,6 +944,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
case Attribute::Cold:
+ case Attribute::DisableSanitizerInstrumentation:
case Attribute::Hot:
case Attribute::NoRecurse:
case Attribute::InlineHint:
@@ -1044,9 +1046,8 @@ static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
const SetVector<Value *> &SunkAllocas,
SetVector<Value *> &LifetimesStart) {
for (BasicBlock *BB : Blocks) {
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- auto *II = dyn_cast<IntrinsicInst>(&*It);
- ++It;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II || !II->isLifetimeStartOrEnd())
continue;
@@ -1247,45 +1248,57 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
// not in the region to be extracted.
std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
+ // Iterate over the previously collected targets, and create new blocks inside
+ // the function to branch to.
unsigned switchVal = 0;
+ for (BasicBlock *OldTarget : OldTargets) {
+ if (Blocks.count(OldTarget))
+ continue;
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (NewTarget)
+ continue;
+
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = nullptr;
+ assert(NumExitBlocks < 0xffff && "too many exit blocks for switch");
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+ }
+
for (BasicBlock *Block : Blocks) {
Instruction *TI = Block->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (!Blocks.count(TI->getSuccessor(i))) {
- BasicBlock *OldTarget = TI->getSuccessor(i);
- // add a new basic block which returns the appropriate value
- BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
- if (!NewTarget) {
- // If we don't already have an exit stub for this non-extracted
- // destination, create one now!
- NewTarget = BasicBlock::Create(Context,
- OldTarget->getName() + ".exitStub",
- newFunction);
- unsigned SuccNum = switchVal++;
-
- Value *brVal = nullptr;
- switch (NumExitBlocks) {
- case 0:
- case 1: break; // No value needed.
- case 2: // Conditional branch, return a bool
- brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
- break;
- default:
- brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
- break;
- }
-
- ReturnInst::Create(Context, brVal, NewTarget);
-
- // Update the switch instruction.
- TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
- SuccNum),
- OldTarget);
- }
-
- // rewrite the original branch instruction with this new target
- TI->setSuccessor(i, NewTarget);
- }
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (Blocks.count(TI->getSuccessor(i)))
+ continue;
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *NewTarget = ExitBlockMap[OldTarget];
+ assert(NewTarget && "Unknown target block!");
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
}
// Store the arguments right after the definition of output value.
@@ -1388,12 +1401,17 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+ auto newFuncIt = newFunction->front().getIterator();
for (BasicBlock *Block : Blocks) {
// Delete the basic block from the old function, and the list of blocks
oldBlocks.remove(Block);
// Insert this basic block into the new function
- newBlocks.push_back(Block);
+ // Insert the original blocks after the entry block created
+ // for the new function. The entry block may be followed
+ // by a set of exit blocks at this point, but these exit
+ // blocks better be placed at the end of the new function.
+ newFuncIt = newBlocks.insertAfter(newFuncIt, Block);
}
}
@@ -1569,6 +1587,13 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
Function *
CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
+ ValueSet Inputs, Outputs;
+ return extractCodeRegion(CEAC, Inputs, Outputs);
+}
+
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &inputs, ValueSet &outputs) {
if (!isEligible())
return nullptr;
@@ -1593,11 +1618,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
// Remove @llvm.assume calls that will be moved to the new function from the
// old function's assumption cache.
for (BasicBlock *Block : Blocks) {
- for (auto It = Block->begin(), End = Block->end(); It != End;) {
- Instruction *I = &*It;
- ++It;
-
- if (auto *AI = dyn_cast<AssumeInst>(I)) {
+ for (Instruction &I : llvm::make_early_inc_range(*Block)) {
+ if (auto *AI = dyn_cast<AssumeInst>(&I)) {
if (AC)
AC->unregisterAssumption(AI);
AI->eraseFromParent();
@@ -1627,6 +1649,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
}
NumExitBlocks = ExitBlocks.size();
+ for (BasicBlock *Block : Blocks) {
+ Instruction *TI = Block->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (Blocks.count(TI->getSuccessor(i)))
+ continue;
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ OldTargets.push_back(OldTarget);
+ }
+ }
+
// If we have to split PHI nodes of the entry or exit blocks, do so now.
severSplitPHINodesOfEntry(header);
severSplitPHINodesOfExits(ExitBlocks);
@@ -1657,7 +1689,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
}
newFuncRoot->getInstList().push_back(BranchI);
- ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ ValueSet SinkingCands, HoistingCands;
BasicBlock *CommonExit = nullptr;
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
assert(HoistingCands.empty() || CommonExit);
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index ce982c7403aa..648f4e64a4d2 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -309,7 +309,7 @@ collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst,
bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
DominatorTree &DT, const PostDominatorTree *PDT,
- DependenceInfo *DI) {
+ DependenceInfo *DI, bool CheckForEntireBlock) {
// Skip tests when we don't have PDT or DI
if (!PDT || !DI)
return false;
@@ -332,16 +332,24 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
return reportInvalidCandidate(I, NotControlFlowEquivalent);
- if (!DT.dominates(&InsertPoint, &I))
+ if (isReachedBefore(&I, &InsertPoint, &DT, PDT))
for (const Use &U : I.uses())
if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
return false;
- if (!DT.dominates(&I, &InsertPoint))
+ if (isReachedBefore(&InsertPoint, &I, &DT, PDT))
for (const Value *Op : I.operands())
- if (auto *OpInst = dyn_cast<Instruction>(Op))
- if (&InsertPoint == OpInst || !DT.dominates(OpInst, &InsertPoint))
+ if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+ if (&InsertPoint == OpInst)
+ return false;
+ // If OpInst is an instruction that appears earlier in the same BB as
+ // I, then it is okay to move since OpInst will still be available.
+ if (CheckForEntireBlock && I.getParent() == OpInst->getParent() &&
+ DT.dominates(OpInst, &I))
+ continue;
+ if (!DT.dominates(OpInst, &InsertPoint))
return false;
+ }
DT.updateDFSNumbers();
const bool MoveForward = domTreeLevelBefore(&DT, &I, &InsertPoint);
@@ -393,7 +401,8 @@ bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
if (BB.getTerminator() == &I)
return true;
- return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI);
+ return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI,
+ /*CheckForEntireBlock=*/true);
});
}
@@ -401,11 +410,9 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
DominatorTree &DT,
const PostDominatorTree &PDT,
DependenceInfo &DI) {
- for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
+ for (Instruction &I :
+ llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(FromBB)))) {
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
- Instruction &I = *It;
- // Increment the iterator before modifying FromBB.
- ++It;
if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
I.moveBefore(MovePos);
@@ -423,3 +430,47 @@ void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
I.moveBefore(MovePos);
}
}
+
+bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock,
+ const BasicBlock *OtherBlock,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT) {
+ assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) &&
+ "ThisBlock and OtherBlock must be CFG equivalent!");
+ const BasicBlock *CommonDominator =
+ DT->findNearestCommonDominator(ThisBlock, OtherBlock);
+ if (CommonDominator == nullptr)
+ return false;
+
+ /// Recursively check the predecessors of \p ThisBlock up to
+ /// their common dominator, and see if any of them post-dominates
+ /// \p OtherBlock.
+ SmallVector<const BasicBlock *, 8> WorkList;
+ SmallPtrSet<const BasicBlock *, 8> Visited;
+ WorkList.push_back(ThisBlock);
+ while (!WorkList.empty()) {
+ const BasicBlock *CurBlock = WorkList.back();
+ WorkList.pop_back();
+ Visited.insert(CurBlock);
+ if (PDT->dominates(CurBlock, OtherBlock))
+ return true;
+
+ for (auto *Pred : predecessors(CurBlock)) {
+ if (Pred == CommonDominator || Visited.count(Pred))
+ continue;
+ WorkList.push_back(Pred);
+ }
+ }
+ return false;
+}
+
+bool llvm::isReachedBefore(const Instruction *I0, const Instruction *I1,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT) {
+ const BasicBlock *BB0 = I0->getParent();
+ const BasicBlock *BB1 = I1->getParent();
+ if (BB0 == BB1)
+ return DT->dominates(I0, I1);
+
+ return nonStrictlyPostDominate(BB1, BB0, DT, PDT);
+}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 30c3fa521d52..fc7083b0c30d 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -457,14 +457,14 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
}
// This checks the preservation of original debug variable intrinsics.
-static bool checkVars(const DebugVarMap &DIFunctionsBefore,
- const DebugVarMap &DIFunctionsAfter,
+static bool checkVars(const DebugVarMap &DIVarsBefore,
+ const DebugVarMap &DIVarsAfter,
StringRef NameOfWrappedPass, StringRef FileNameFromCU,
bool ShouldWriteIntoJSON, llvm::json::Array &Bugs) {
bool Preserved = true;
- for (const auto &V : DIFunctionsBefore) {
- auto VarIt = DIFunctionsAfter.find(V.first);
- if (VarIt == DIFunctionsAfter.end())
+ for (const auto &V : DIVarsBefore) {
+ auto VarIt = DIVarsAfter.find(V.first);
+ if (VarIt == DIVarsAfter.end())
continue;
unsigned NumOfDbgValsAfter = VarIt->second;
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 31d03e1e86af..e3e8f63383df 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -89,7 +89,7 @@ static bool runOnFunction(Function &F, bool PostInlining) {
insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
Changed = true;
- F.removeAttribute(AttributeList::FunctionIndex, EntryAttr);
+ F.removeFnAttr(EntryAttr);
}
if (!ExitFunc.empty()) {
@@ -111,7 +111,7 @@ static bool runOnFunction(Function &F, bool PostInlining) {
insertCall(F, ExitFunc, T, DL);
Changed = true;
}
- F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
+ F.removeFnAttr(ExitAttr);
}
return Changed;
@@ -183,3 +183,13 @@ llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void llvm::EntryExitInstrumenterPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<llvm::EntryExitInstrumenterPass> *>(this)
+ ->printPipeline(OS, MapClassName2PassName);
+ OS << "<";
+ if (PostInlining)
+ OS << "post-inline";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index 463c223d9e8f..9c8aed94708e 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -128,11 +128,6 @@ isSimpleEnoughValueToCommit(Constant *C,
/// globals and GEP's of globals. This should be kept up to date with
/// CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) {
- // Conservatively, avoid aggregate types. This is because we don't
- // want to worry about them partially overlapping other stores.
- if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
- return false;
-
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
// Do not allow weak/*_odr/linkonce linkage or external globals.
return GV->hasUniqueInitializer();
@@ -284,7 +279,7 @@ bool Evaluator::getFormalParams(CallBase &CB, Function *F,
return false;
auto *FTy = F->getFunctionType();
- if (FTy->getNumParams() > CB.getNumArgOperands()) {
+ if (FTy->getNumParams() > CB.arg_size()) {
LLVM_DEBUG(dbgs() << "Too few arguments for function.\n");
return false;
}
@@ -343,7 +338,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
Ptr = FoldedPtr;
LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
}
- if (!isSimpleEnoughPointerToCommit(Ptr, DL)) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!SI->getValueOperand()->getType()->isSingleValueType() ||
+ !isSimpleEnoughPointerToCommit(Ptr, DL)) {
// If this is too complex for us to commit, reject it.
LLVM_DEBUG(
dbgs() << "Pointer is too complex for us to evaluate store.");
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 10f48fe827f4..8de3ce876bab 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -124,7 +124,7 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
// children to a new vector.
auto FirstChild = std::partition(
CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) {
- return L == NewLoop || Blocks.count(L->getHeader()) == 0;
+ return L == NewLoop || !Blocks.contains(L->getHeader());
});
SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
CandidateLoops.erase(FirstChild, CandidateLoops.end());
diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index dbcacc20b589..ddd3f597ae01 100644
--- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -162,7 +162,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// of \param BB (BB4) and should not have address-taken.
// There should exist only one such unconditional
// branch among the predecessors.
- if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ if (UnCondBlock || !PP || !Preds.contains(PP) ||
Pred->hasAddressTaken())
return false;
@@ -215,7 +215,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// PS is the successor which is not BB. Check successors to identify
// the last conditional branch.
- if (Preds.count(PS) == 0) {
+ if (!Preds.contains(PS)) {
// Case 2.
LastCondBlock = Pred;
} else {
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 2696557a719f..326864803d7c 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -110,7 +110,7 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
return Res;
- for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
+ for (unsigned i : L.indexes()) {
AttributeSet LAS = L.getAttributes(i);
AttributeSet RAS = R.getAttributes(i);
AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index f782396be7b6..9bfc73e4ba6c 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -105,8 +105,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
// value, not an aggregate), keep more specific information about
// stores.
if (GS.StoredType != GlobalStatus::Stored) {
- if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ const Value *Ptr = SI->getPointerOperand();
+ if (isa<ConstantExpr>(Ptr))
+ Ptr = Ptr->stripPointerCasts();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Value *StoredVal = SI->getOperand(0);
if (Constant *C = dyn_cast<Constant>(StoredVal)) {
@@ -125,9 +127,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
GS.StoredType = GlobalStatus::InitializerStored;
} else if (GS.StoredType < GlobalStatus::StoredOnce) {
GS.StoredType = GlobalStatus::StoredOnce;
- GS.StoredOnceValue = StoredVal;
+ GS.StoredOnceStore = SI;
} else if (GS.StoredType == GlobalStatus::StoredOnce &&
- GS.StoredOnceValue == StoredVal) {
+ GS.getStoredOnceValue() == StoredVal) {
// noop.
} else {
GS.StoredType = GlobalStatus::Stored;
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index a1e160d144dc..047bf5569ded 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -47,7 +47,7 @@ static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
// Add function declaration.
Type *RetTy = ToVectorTy(CI.getType(), VF);
SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI.arg_operands())
+ for (Value *ArgOperand : CI.args())
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
assert(!CI.getFunctionType()->isVarArg() &&
"VarArg functions are not supported.");
@@ -94,8 +94,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
const std::string TLIName =
std::string(TLI.getVectorizedFunction(ScalarName, VF));
if (!TLIName.empty()) {
- std::string MangledName = VFABI::mangleTLIVectorName(
- TLIName, ScalarName, CI.getNumArgOperands(), VF);
+ std::string MangledName =
+ VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
if (!OriginalSetOfMappings.count(MangledName)) {
Mappings.push_back(MangledName);
++NumCallInjected;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 792aa8208f27..f4776589910f 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -539,12 +539,10 @@ static Value *getUnwindDestToken(Instruction *EHPad,
static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
BasicBlock *BB, BasicBlock *UnwindEdge,
UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *I = &*BBI++;
-
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
- CallInst *CI = dyn_cast<CallInst>(I);
+ CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI || CI->doesNotThrow())
continue;
@@ -830,6 +828,7 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
}
}
+namespace {
/// Utility for cloning !noalias and !alias.scope metadata. When a code region
/// using scoped alias metadata is inlined, the aliasing relationships may not
/// hold between the two version. It is necessary to create a deep clone of the
@@ -851,6 +850,7 @@ public:
/// metadata.
void remap(Function::iterator FStart, Function::iterator FEnd);
};
+} // namespace
ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
const Function *F) {
@@ -1179,14 +1179,8 @@ static bool MayContainThrowingOrExitingCall(Instruction *Begin,
assert(Begin->getParent() == End->getParent() &&
"Expected to be in same basic block!");
- unsigned NumInstChecked = 0;
- // Check that all instructions in the range [Begin, End) are guaranteed to
- // transfer execution to successor.
- for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
- if (NumInstChecked++ > InlinerAttributeWindow ||
- !isGuaranteedToTransferExecutionToSuccessor(&I))
- return true;
- return false;
+ return !llvm::isGuaranteedToTransferExecutionToSuccessor(
+ Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1);
}
static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
@@ -1259,8 +1253,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
// existing attribute value (i.e. attributes such as dereferenceable,
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
AttributeList AL = NewRetVal->getAttributes();
- AttributeList NewAL =
- AL.addAttributes(Context, AttributeList::ReturnIndex, Valid);
+ AttributeList NewAL = AL.addRetAttributes(Context, Valid);
NewRetVal->setAttributes(NewAL);
}
}
@@ -1376,13 +1369,13 @@ static void UpdateCallGraphAfterInlining(CallBase &CB,
CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
}
-static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
- BasicBlock *InsertBlock,
+static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
+ Module *M, BasicBlock *InsertBlock,
InlineFunctionInfo &IFI) {
- Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
- Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
+ Value *Size =
+ Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
@@ -1393,13 +1386,13 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
/// When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
-static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+static Value *HandleByValArgument(Type *ByValType, Value *Arg,
+ Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- PointerType *ArgTy = cast<PointerType>(Arg->getType());
- Type *AggTy = ArgTy->getElementType();
-
+ assert(cast<PointerType>(Arg->getType())
+ ->isOpaqueOrPointeeTypeMatches(ByValType));
Function *Caller = TheCall->getFunction();
const DataLayout &DL = Caller->getParent()->getDataLayout();
@@ -1427,7 +1420,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
}
// Create the alloca. If we have DataLayout, use nice alignment.
- Align Alignment(DL.getPrefTypeAlignment(AggTy));
+ Align Alignment(DL.getPrefTypeAlignment(ByValType));
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
@@ -1435,7 +1428,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Alignment = max(Alignment, MaybeAlign(ByValAlignment));
Value *NewAlloca =
- new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
+ new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment,
Arg->getName(), &*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
@@ -1607,8 +1600,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
const ProfileCount &CalleeEntryCount,
const CallBase &TheCall, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *CallerBFI) {
- if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
- CalleeEntryCount.getCount() < 1)
+ if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
int64_t CallCount =
@@ -1617,40 +1609,39 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
}
void llvm::updateProfileCallee(
- Function *Callee, int64_t entryDelta,
+ Function *Callee, int64_t EntryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap) {
auto CalleeCount = Callee->getEntryCount();
if (!CalleeCount.hasValue())
return;
- uint64_t priorEntryCount = CalleeCount.getCount();
- uint64_t newEntryCount;
+ const uint64_t PriorEntryCount = CalleeCount->getCount();
// Since CallSiteCount is an estimate, it could exceed the original callee
// count and has to be set to 0 so guard against underflow.
- if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
- newEntryCount = 0;
- else
- newEntryCount = priorEntryCount + entryDelta;
+ const uint64_t NewEntryCount =
+ (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
+ ? 0
+ : PriorEntryCount + EntryDelta;
// During inlining ?
if (VMap) {
- uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
+ uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
for (auto Entry : *VMap)
if (isa<CallInst>(Entry.first))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
- CI->updateProfWeight(cloneEntryCount, priorEntryCount);
+ CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
}
- if (entryDelta) {
- Callee->setEntryCount(newEntryCount);
+ if (EntryDelta) {
+ Callee->setEntryCount(NewEntryCount);
for (BasicBlock &BB : *Callee)
// No need to update the callsite if it is pruned during inlining.
if (!VMap || VMap->count(&BB))
for (Instruction &I : BB)
if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(newEntryCount, priorEntryCount);
+ CI->updateProfWeight(NewEntryCount, PriorEntryCount);
}
}
@@ -1672,66 +1663,69 @@ void llvm::updateProfileCallee(
/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
/// a retainRV call.
static void
-inlineRetainOrClaimRVCalls(CallBase &CB,
+inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
const SmallVectorImpl<ReturnInst *> &Returns) {
Module *Mod = CB.getModule();
- bool IsRetainRV = objcarc::hasAttachedCallOpBundle(&CB, true),
+ assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
+ bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
IsClaimRV = !IsRetainRV;
for (auto *RI : Returns) {
Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
- BasicBlock::reverse_iterator I = ++(RI->getIterator().getReverse());
- BasicBlock::reverse_iterator EI = RI->getParent()->rend();
bool InsertRetainCall = IsRetainRV;
IRBuilder<> Builder(RI->getContext());
// Walk backwards through the basic block looking for either a matching
// autoreleaseRV call or an unannotated call.
- for (; I != EI;) {
- auto CurI = I++;
-
+ auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()),
+ RI->getParent()->rend());
+ for (Instruction &I : llvm::make_early_inc_range(InstRange)) {
// Ignore casts.
- if (isa<CastInst>(*CurI))
+ if (isa<CastInst>(I))
continue;
- if (auto *II = dyn_cast<IntrinsicInst>(&*CurI)) {
- if (II->getIntrinsicID() == Intrinsic::objc_autoreleaseReturnValue &&
- II->hasNUses(0) &&
- objcarc::GetRCIdentityRoot(II->getOperand(0)) == RetOpnd) {
- // If we've found a matching authoreleaseRV call:
- // - If claimRV is attached to the call, insert a call to objc_release
- // and erase the autoreleaseRV call.
- // - If retainRV is attached to the call, just erase the autoreleaseRV
- // call.
- if (IsClaimRV) {
- Builder.SetInsertPoint(II);
- Function *IFn =
- Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
- Value *BC =
- Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
- Builder.CreateCall(IFn, BC, "");
- }
- II->eraseFromParent();
- InsertRetainCall = false;
- }
- } else if (auto *CI = dyn_cast<CallInst>(&*CurI)) {
- if (objcarc::GetRCIdentityRoot(CI) == RetOpnd &&
- !objcarc::hasAttachedCallOpBundle(CI)) {
- // If we've found an unannotated call that defines RetOpnd, add a
- // "clang.arc.attachedcall" operand bundle.
- Value *BundleArgs[] = {ConstantInt::get(
- Builder.getInt64Ty(),
- objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))};
- OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
- auto *NewCall = CallBase::addOperandBundle(
- CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI);
- NewCall->copyMetadata(*CI);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- InsertRetainCall = false;
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
+ !II->hasNUses(0) ||
+ objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd)
+ break;
+
+ // If we've found a matching authoreleaseRV call:
+ // - If claimRV is attached to the call, insert a call to objc_release
+ // and erase the autoreleaseRV call.
+ // - If retainRV is attached to the call, just erase the autoreleaseRV
+ // call.
+ if (IsClaimRV) {
+ Builder.SetInsertPoint(II);
+ Function *IFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
+ Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
+ Builder.CreateCall(IFn, BC, "");
}
+ II->eraseFromParent();
+ InsertRetainCall = false;
+ break;
}
+ auto *CI = dyn_cast<CallInst>(&I);
+
+ if (!CI)
+ break;
+
+ if (objcarc::GetRCIdentityRoot(CI) != RetOpnd ||
+ objcarc::hasAttachedCallOpBundle(CI))
+ break;
+
+ // If we've found an unannotated call that defines RetOpnd, add a
+ // "clang.arc.attachedcall" operand bundle.
+ Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)};
+ OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
+ auto *NewCall = CallBase::addOperandBundle(
+ CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI);
+ NewCall->copyMetadata(*CI);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ InsertRetainCall = false;
break;
}
@@ -1895,8 +1889,13 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
+ struct ByValInit {
+ Value *Dst;
+ Value *Src;
+ Type *Ty;
+ };
// Keep a list of pair (dst, src) to emit byval initializations.
- SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+ SmallVector<ByValInit, 4> ByValInits;
// When inlining a function that contains noalias scope metadata,
// this metadata needs to be cloned so that the inlined blocks
@@ -1921,10 +1920,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
if (CB.isByValArgument(ArgNo)) {
- ActualArg = HandleByValArgument(ActualArg, &CB, CalledFunc, IFI,
+ ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg,
+ &CB, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo));
if (ActualArg != *AI)
- ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
+ ByValInits.push_back(
+ {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
}
VMap[&*I] = ActualArg;
@@ -1953,8 +1954,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
FirstNewBlock = LastBlock; ++FirstNewBlock;
// Insert retainRV/clainRV runtime calls.
- if (objcarc::hasAttachedCallOpBundle(&CB))
- inlineRetainOrClaimRVCalls(CB, Returns);
+ objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB);
+ if (RVCallKind != objcarc::ARCInstKind::None)
+ inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
// Updated caller/callee profiles only when requested. For sample loader
// inlining, the context-sensitive inlinee profile doesn't need to be
@@ -1966,13 +1968,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
CalledFunc->front());
- updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB,
- IFI.PSI, IFI.CallerBFI);
+ if (auto Profile = CalledFunc->getEntryCount())
+ updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI,
+ IFI.CallerBFI);
}
// Inject byval arguments initialization.
- for (std::pair<Value*, Value*> &Init : ByValInit)
- HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ for (ByValInit &Init : ByValInits)
+ HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
&*FirstNewBlock, IFI);
Optional<OperandBundleUse> ParentDeopt =
@@ -2100,9 +2103,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
SmallVector<Value*,4> VarArgsToForward;
SmallVector<AttributeSet, 4> VarArgsAttrs;
for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
- i < CB.getNumArgOperands(); i++) {
+ i < CB.arg_size(); i++) {
VarArgsToForward.push_back(CB.getArgOperand(i));
- VarArgsAttrs.push_back(CB.getAttributes().getParamAttributes(i));
+ VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i));
}
bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
@@ -2117,8 +2120,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
++BB) {
- for (auto II = BB->begin(); II != BB->end();) {
- Instruction &I = *II++;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
@@ -2135,15 +2137,15 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
for (unsigned ArgNo = 0;
ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
- ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
+ ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
}
// Add VarArg attributes.
ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
- Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), ArgAttrs);
+ Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), ArgAttrs);
// Add VarArgs to existing parameters.
- SmallVector<Value *, 6> Params(CI->arg_operands());
+ SmallVector<Value *, 6> Params(CI->args());
Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
CallInst *NewCI = CallInst::Create(
CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
@@ -2295,8 +2297,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
BB != E; ++BB) {
// Add bundle operands to any top-level call sites.
SmallVector<OperandBundleDef, 1> OpBundles;
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
- CallBase *I = dyn_cast<CallBase>(&*BBI++);
+ for (Instruction &II : llvm::make_early_inc_range(*BB)) {
+ CallBase *I = dyn_cast<CallBase>(&II);
if (!I)
continue;
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 277fd903e9aa..668626fef933 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -309,7 +309,7 @@ static void computeBlocksDominatingExits(
// worklist, unless we visited it already.
BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
- // Exit blocks can have an immediate dominator not beloinging to the
+ // Exit blocks can have an immediate dominator not belonging to the
// loop. For an exit block to be immediately dominated by another block
// outside the loop, it implies not all paths from that dominator, to the
// exit block, go through the loop.
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 7e5832148bc0..6958a89f5be6 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -304,7 +304,7 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func))
return;
- if (CI.getNumArgOperands() == 0)
+ if (CI.arg_empty())
return;
// TODO: Handle long double in other formats.
Type *ArgType = CI.getArgOperand(0)->getType();
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index d03d76f57ca1..74ab37fadf36 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1413,8 +1413,6 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
if (auto *AI =
dyn_cast_or_null<AllocaInst>(DII->getVariableLocationOp(0))) {
if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
- assert(ValueSize.isScalable() == FragmentSize->isScalable() &&
- "Both sizes should agree on the scalable flag.");
return TypeSize::isKnownGE(ValueSize, *FragmentSize);
}
}
@@ -1733,9 +1731,11 @@ void llvm::salvageDebugInfo(Instruction &I) {
void llvm::salvageDebugInfoForDbgValues(
Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
- // This is an arbitrary chosen limit on the maximum number of values we can
- // salvage up to in a DIArgList, used for performance reasons.
+ // These are arbitrary chosen limits on the maximum number of values and the
+ // maximum size of a debug expression we can salvage up to, used for
+ // performance reasons.
const unsigned MaxDebugArgs = 16;
+ const unsigned MaxExpressionSize = 128;
bool Salvaged = false;
for (auto *DII : DbgUsers) {
@@ -1752,23 +1752,30 @@ void llvm::salvageDebugInfoForDbgValues(
// must be updated in the DIExpression and potentially have additional
// values added; thus we call salvageDebugInfoImpl for each `I` instance in
// DIILocation.
+ Value *Op0 = nullptr;
DIExpression *SalvagedExpr = DII->getExpression();
auto LocItr = find(DIILocation, &I);
while (SalvagedExpr && LocItr != DIILocation.end()) {
+ SmallVector<uint64_t, 16> Ops;
unsigned LocNo = std::distance(DIILocation.begin(), LocItr);
- SalvagedExpr = salvageDebugInfoImpl(I, SalvagedExpr, StackValue, LocNo,
- AdditionalValues);
+ uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands();
+ Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues);
+ if (!Op0)
+ break;
+ SalvagedExpr =
+ DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue);
LocItr = std::find(++LocItr, DIILocation.end(), &I);
}
// salvageDebugInfoImpl should fail on examining the first element of
// DbgUsers, or none of them.
- if (!SalvagedExpr)
+ if (!Op0)
break;
- DII->replaceVariableLocationOp(&I, I.getOperand(0));
- if (AdditionalValues.empty()) {
+ DII->replaceVariableLocationOp(&I, Op0);
+ bool IsValidSalvageExpr = SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (AdditionalValues.empty() && IsValidSalvageExpr) {
DII->setExpression(SalvagedExpr);
- } else if (isa<DbgValueInst>(DII) &&
+ } else if (isa<DbgValueInst>(DII) && IsValidSalvageExpr &&
DII->getNumVariableLocationOps() + AdditionalValues.size() <=
MaxDebugArgs) {
DII->addVariableLocationOps(AdditionalValues, SalvagedExpr);
@@ -1793,16 +1800,16 @@ void llvm::salvageDebugInfoForDbgValues(
}
}
-bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
- uint64_t CurrentLocOps,
- SmallVectorImpl<uint64_t> &Opcodes,
- SmallVectorImpl<Value *> &AdditionalValues) {
+Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
+ uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
// Rewrite a GEP into a DIExpression.
MapVector<Value *, APInt> VariableOffsets;
APInt ConstantOffset(BitWidth, 0);
if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
- return false;
+ return nullptr;
if (!VariableOffsets.empty() && !CurrentLocOps) {
Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0});
CurrentLocOps = 1;
@@ -1816,7 +1823,7 @@ bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
dwarf::DW_OP_plus});
}
DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue());
- return true;
+ return GEP->getOperand(0);
}
uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
@@ -1849,14 +1856,14 @@ uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
}
}
-bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
- SmallVectorImpl<uint64_t> &Opcodes,
- SmallVectorImpl<Value *> &AdditionalValues) {
+Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
// Handle binary operations with constant integer operands as a special case.
auto *ConstInt = dyn_cast<ConstantInt>(BI->getOperand(1));
// Values wider than 64 bits cannot be represented within a DIExpression.
if (ConstInt && ConstInt->getBitWidth() > 64)
- return false;
+ return nullptr;
Instruction::BinaryOps BinOpcode = BI->getOpcode();
// Push any Constant Int operand onto the expression stack.
@@ -1867,7 +1874,7 @@ bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) {
uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val);
DIExpression::appendOffset(Opcodes, Offset);
- return true;
+ return BI->getOperand(0);
}
Opcodes.append({dwarf::DW_OP_constu, Val});
} else {
@@ -1883,62 +1890,51 @@ bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
// representation in a DIExpression.
uint64_t DwarfBinOp = getDwarfOpForBinOp(BinOpcode);
if (!DwarfBinOp)
- return false;
+ return nullptr;
Opcodes.push_back(DwarfBinOp);
-
- return true;
+ return BI->getOperand(0);
}
-DIExpression *
-llvm::salvageDebugInfoImpl(Instruction &I, DIExpression *SrcDIExpr,
- bool WithStackValue, unsigned LocNo,
- SmallVectorImpl<Value *> &AdditionalValues) {
- uint64_t CurrentLocOps = SrcDIExpr->getNumLocationOperands();
+Value *llvm::salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Ops,
+ SmallVectorImpl<Value *> &AdditionalValues) {
auto &M = *I.getModule();
auto &DL = M.getDataLayout();
- // Apply a vector of opcodes to the source DIExpression.
- auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
- DIExpression *DIExpr = SrcDIExpr;
- if (!Ops.empty()) {
- DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, LocNo, WithStackValue);
- }
- return DIExpr;
- };
-
- // initializer-list helper for applying operators to the source DIExpression.
- auto applyOps = [&](ArrayRef<uint64_t> Opcodes) {
- SmallVector<uint64_t, 8> Ops(Opcodes.begin(), Opcodes.end());
- return doSalvage(Ops);
- };
-
if (auto *CI = dyn_cast<CastInst>(&I)) {
+ Value *FromValue = CI->getOperand(0);
// No-op casts are irrelevant for debug info.
- if (CI->isNoopCast(DL))
- return SrcDIExpr;
+ if (CI->isNoopCast(DL)) {
+ return FromValue;
+ }
Type *Type = CI->getType();
+ if (Type->isPointerTy())
+ Type = DL.getIntPtrType(Type);
// Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged.
if (Type->isVectorTy() ||
- !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I)))
+ !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I) ||
+ isa<IntToPtrInst>(&I) || isa<PtrToIntInst>(&I)))
return nullptr;
- Value *FromValue = CI->getOperand(0);
- unsigned FromTypeBitSize = FromValue->getType()->getScalarSizeInBits();
+ llvm::Type *FromType = FromValue->getType();
+ if (FromType->isPointerTy())
+ FromType = DL.getIntPtrType(FromType);
+
+ unsigned FromTypeBitSize = FromType->getScalarSizeInBits();
unsigned ToTypeBitSize = Type->getScalarSizeInBits();
- return applyOps(DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
- isa<SExtInst>(&I)));
+ auto ExtOps = DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
+ isa<SExtInst>(&I));
+ Ops.append(ExtOps.begin(), ExtOps.end());
+ return FromValue;
}
- SmallVector<uint64_t, 8> Ops;
- if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- if (getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues))
- return doSalvage(Ops);
- } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
- if (getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues))
- return doSalvage(Ops);
- }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I))
+ return getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues);
+ if (auto *BI = dyn_cast<BinaryOperator>(&I))
+ return getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues);
+
// *Not* to do: we should not attempt to salvage load instructions,
// because the validity and lifetime of a dbg.value containing
// DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
@@ -2194,6 +2190,26 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
}
+void llvm::createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ auto *BB = Switch->getParent();
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
+ OrigDefaultBlock->removePredecessor(BB);
+ BasicBlock *NewDefaultBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
+ OrigDefaultBlock);
+ new UnreachableInst(Switch->getContext(), NewDefaultBlock);
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
+ if (!is_contained(successors(BB), OrigDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
+ DTU->applyUpdates(Updates);
+ }
+}
+
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
BasicBlock *UnwindEdge,
DomTreeUpdater *DTU) {
@@ -2669,9 +2685,7 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
assert(From->getType() == To->getType());
unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(From->uses())) {
if (!Dominates(Root, U))
continue;
U.set(To);
@@ -2687,9 +2701,7 @@ unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
auto *BB = From->getParent();
unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(From->uses())) {
auto *I = cast<Instruction>(U.getUser());
if (I->getParent() == BB)
continue;
@@ -3171,7 +3183,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
- APInt DemandedMask = APInt::getAllOnesValue(DemandedBW);
+ APInt DemandedMask = APInt::getAllOnes(DemandedBW);
bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0;
bool OKForBitReverse = MatchBitReversals;
for (unsigned BitIdx = 0;
@@ -3208,7 +3220,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
Instruction *Result = CallInst::Create(F, Provider, "rev", I);
InsertedInsts.push_back(Result);
- if (!DemandedMask.isAllOnesValue()) {
+ if (!DemandedMask.isAllOnes()) {
auto *Mask = ConstantInt::get(DemandedTy, DemandedMask);
Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I);
InsertedInsts.push_back(Result);
@@ -3235,7 +3247,7 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
if (F && !F->hasLocalLinkage() && F->hasName() &&
TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
!F->doesNotAccessMemory())
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
+ CI->addFnAttr(Attribute::NoBuiltin);
}
bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
@@ -3263,7 +3275,7 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
if (CB.isBundleOperand(OpIdx))
return false;
- if (OpIdx < CB.getNumArgOperands()) {
+ if (OpIdx < CB.arg_size()) {
// Some variadic intrinsics require constants in the variadic arguments,
// which currently aren't markable as immarg.
if (isa<IntrinsicInst>(CB) &&
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index cd1f6f0c78a5..f3cf42be8ba1 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -73,57 +74,39 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
-static cl::opt<bool> UnrollPeelMultiDeoptExit(
- "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
- cl::desc("Allow peeling of loops with multiple deopt exits."));
-
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
-// Designates that a Phi is estimated to become invariant after an "infinite"
-// number of loop iterations (i.e. only may become an invariant if the loop is
-// fully unrolled).
-static const unsigned InfiniteIterationsToInvariance =
- std::numeric_limits<unsigned>::max();
-
// Check whether we are capable of peeling this loop.
bool llvm::canPeel(Loop *L) {
// Make sure the loop is in simplified form
if (!L->isLoopSimplifyForm())
return false;
- if (UnrollPeelMultiDeoptExit) {
- SmallVector<BasicBlock *, 4> Exits;
- L->getUniqueNonLatchExitBlocks(Exits);
-
- if (!Exits.empty()) {
- // Latch's terminator is a conditional branch, Latch is exiting and
- // all non Latch exits ends up with deoptimize.
- const BasicBlock *Latch = L->getLoopLatch();
- const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
- return T && T->isConditional() && L->isLoopExiting(Latch) &&
- all_of(Exits, [](const BasicBlock *BB) {
- return BB->getTerminatingDeoptimizeCall();
- });
- }
- }
-
- // Only peel loops that contain a single exit
- if (!L->getExitingBlock() || !L->getUniqueExitBlock())
- return false;
-
// Don't try to peel loops where the latch is not the exiting block.
// This can be an indication of two different things:
// 1) The loop is not rotated.
// 2) The loop contains irreducible control flow that involves the latch.
const BasicBlock *Latch = L->getLoopLatch();
- if (Latch != L->getExitingBlock())
+ if (!L->isLoopExiting(Latch))
return false;
// Peeling is only supported if the latch is a branch.
if (!isa<BranchInst>(Latch->getTerminator()))
return false;
- return true;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+ // The latch must either be the only exiting block or all non-latch exit
+ // blocks have either a deopt or unreachable terminator or compose a chain of
+ // blocks where the last one is either deopt or unreachable terminated. Both
+ // deopt and unreachable terminators are a strong indication they are not
+ // taken. Note that this is a profitability check, not a legality check. Also
+ // note that LoopPeeling currently can only update the branch weights of latch
+ // blocks and branch weights to blocks with deopt or unreachable do not need
+ // updating.
+ return all_of(Exits, [](const BasicBlock *BB) {
+ return IsBlockFollowedByDeoptOrUnreachable(BB);
+ });
}
// This function calculates the number of iterations after which the given Phi
@@ -139,9 +122,9 @@ bool llvm::canPeel(Loop *L) {
// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
// %y = phi(0, 5),
// %a = %y + 1.
-static unsigned calculateIterationsToInvariance(
+static Optional<unsigned> calculateIterationsToInvariance(
PHINode *Phi, Loop *L, BasicBlock *BackEdge,
- SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+ SmallDenseMap<PHINode *, Optional<unsigned> > &IterationsToInvariance) {
assert(Phi->getParent() == L->getHeader() &&
"Non-loop Phi should not be checked for turning into invariant.");
assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
@@ -154,29 +137,90 @@ static unsigned calculateIterationsToInvariance(
Value *Input = Phi->getIncomingValueForBlock(BackEdge);
// Place infinity to map to avoid infinite recursion for cycled Phis. Such
// cycles can never stop on an invariant.
- IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
- unsigned ToInvariance = InfiniteIterationsToInvariance;
+ IterationsToInvariance[Phi] = None;
+ Optional<unsigned> ToInvariance = None;
if (L->isLoopInvariant(Input))
ToInvariance = 1u;
else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
// Only consider Phis in header block.
if (IncPhi->getParent() != L->getHeader())
- return InfiniteIterationsToInvariance;
+ return None;
// If the input becomes an invariant after X iterations, then our Phi
// becomes an invariant after X + 1 iterations.
- unsigned InputToInvariance = calculateIterationsToInvariance(
+ auto InputToInvariance = calculateIterationsToInvariance(
IncPhi, L, BackEdge, IterationsToInvariance);
- if (InputToInvariance != InfiniteIterationsToInvariance)
- ToInvariance = InputToInvariance + 1u;
+ if (InputToInvariance)
+ ToInvariance = *InputToInvariance + 1u;
}
// If we found that this Phi lies in an invariant chain, update the map.
- if (ToInvariance != InfiniteIterationsToInvariance)
+ if (ToInvariance)
IterationsToInvariance[Phi] = ToInvariance;
return ToInvariance;
}
+// Try to find any invariant memory reads that will become dereferenceable in
+// the remainder loop after peeling. The load must also be used (transitively)
+// by an exit condition. Returns the number of iterations to peel off (at the
+// moment either 0 or 1).
+static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
+ DominatorTree &DT) {
+ // Skip loops with a single exiting block, because there should be no benefit
+ // for the heuristic below.
+ if (L.getExitingBlock())
+ return 0;
+
+ // All non-latch exit blocks must have an UnreachableInst terminator.
+ // Otherwise the heuristic below may not be profitable.
+ SmallVector<BasicBlock *, 4> Exits;
+ L.getUniqueNonLatchExitBlocks(Exits);
+ if (any_of(Exits, [](const BasicBlock *BB) {
+ return !isa<UnreachableInst>(BB->getTerminator());
+ }))
+ return 0;
+
+ // Now look for invariant loads that dominate the latch and are not known to
+ // be dereferenceable. If there are such loads and no writes, they will become
+ // dereferenceable in the loop if the first iteration is peeled off. Also
+ // collect the set of instructions controlled by such loads. Only peel if an
+ // exit condition uses (transitively) such a load.
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Latch = L.getLoopLatch();
+ SmallPtrSet<Value *, 8> LoadUsers;
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ for (BasicBlock *BB : L.blocks()) {
+ for (Instruction &I : *BB) {
+ if (I.mayWriteToMemory())
+ return 0;
+
+ auto Iter = LoadUsers.find(&I);
+ if (Iter != LoadUsers.end()) {
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ // Do not look for reads in the header; they can already be hoisted
+ // without peeling.
+ if (BB == Header)
+ continue;
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ Value *Ptr = LI->getPointerOperand();
+ if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
+ !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ }
+ }
+ SmallVector<BasicBlock *> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
+ return LoadUsers.contains(Exiting->getTerminator());
+ }))
+ return 1;
+ return 0;
+}
+
// Return the number of iterations to peel off that make conditions in the
// body true/false. For example, if we peel 2 iterations off the loop below,
// the condition i < 2 can be evaluated at compile time.
@@ -292,8 +336,8 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE,
- unsigned Threshold) {
+ unsigned &TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -337,7 +381,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// First, check that we can peel at least one iteration.
if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
// Store the pre-calculated values here.
- SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+ SmallDenseMap<PHINode *, Optional<unsigned> > IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
// need to become invariants.
// Start the max computation with the UP.PeelCount value set by the target
@@ -347,10 +391,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
assert(BackEdge && "Loop is not in simplified form?");
for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
PHINode *Phi = cast<PHINode>(&*BI);
- unsigned ToInvariance = calculateIterationsToInvariance(
+ auto ToInvariance = calculateIterationsToInvariance(
Phi, L, BackEdge, IterationsToInvariance);
- if (ToInvariance != InfiniteIterationsToInvariance)
- DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
+ if (ToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
}
// Pay respect to limitations implied by loop size and the max peel count.
@@ -360,6 +404,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
DesiredPeelCount = std::max(DesiredPeelCount,
countToEliminateCompares(*L, MaxPeelCount, SE));
+ if (DesiredPeelCount == 0)
+ DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
+
if (DesiredPeelCount > 0) {
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
@@ -679,34 +726,27 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
L->getExitEdges(ExitEdges);
- DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+ // Remember dominators of blocks we might reach through exits to change them
+ // later. Immediate dominator of such block might change, because we add more
+ // routes which can lead to the exit: we can reach it from the peeled
+ // iterations too.
+ DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
if (DT) {
- // We'd like to determine the idom of exit block after peeling one
- // iteration.
- // Let Exit is exit block.
- // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
- // blocks.
- // Let Latch' and ExitingSet' are copies after a peeling.
- // We'd like to find an idom'(Exit) - idom of Exit after peeling.
- // It is an evident that idom'(Exit) will be the nearest common dominator
- // of ExitingSet and ExitingSet'.
- // idom(Exit) is a nearest common dominator of ExitingSet.
- // idom(Exit)' is a nearest common dominator of ExitingSet'.
- // Taking into account that we have a single Latch, Latch' will dominate
- // Header and idom(Exit).
- // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
- // All these basic blocks are in the same loop, so what we find is
- // (nearest common dominator of idom(Exit) and Latch)'.
- // In the loop below we remember nearest common dominator of idom(Exit) and
- // Latch to update idom of Exit later.
- assert(L->hasDedicatedExits() && "No dedicated exits?");
- for (auto Edge : ExitEdges) {
- if (ExitIDom.count(Edge.second))
- continue;
- BasicBlock *BB = DT->findNearestCommonDominator(
- DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
- assert(L->contains(BB) && "IDom is not in a loop");
- ExitIDom[Edge.second] = BB;
+ for (auto *BB : L->blocks()) {
+ auto *BBDomNode = DT->getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->children()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
+ }
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch);
+ for (auto *ChildBB : ChildrenToUpdate)
+ NonLoopBlocksIDom[ChildBB] = NewIDom;
}
}
@@ -795,13 +835,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
remapInstructionsInBlocks(NewBlocks, VMap);
if (DT) {
- // Latches of the cloned loops dominate over the loop exit, so idom of the
- // latter is the first cloned loop body, as original PreHeader dominates
- // the original loop body.
+ // Update IDoms of the blocks reachable through exits.
if (Iter == 0)
- for (auto Exit : ExitIDom)
- DT->changeImmediateDominator(Exit.first,
- cast<BasicBlock>(LVMap[Exit.second]));
+ for (auto BBIDom : NonLoopBlocksIDom)
+ DT->changeImmediateDominator(BBIDom.first,
+ cast<BasicBlock>(LVMap[BBIDom.second]));
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
#endif
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index ff7905bed91d..c66fd7bb0588 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -103,6 +103,7 @@ static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
BasicBlock *OrigPreheader,
ValueToValueMapTy &ValueMap,
+ ScalarEvolution *SE,
SmallVectorImpl<PHINode*> *InsertedPHIs) {
// Remove PHI node entries that are no longer live.
BasicBlock::iterator I, E = OrigHeader->end();
@@ -125,19 +126,15 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ // Force re-computation of OrigHeaderVal, as some users now need to use the
+ // new PHI node.
+ if (SE)
+ SE->forgetValue(OrigHeaderVal);
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
// Visit each use of the OrigHeader instruction.
- for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end();
- UI != UE;) {
- // Grab the use before incrementing the iterator.
- Use &U = *UI;
-
- // Increment the iterator before removing the use from the list.
- ++UI;
-
+ for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) {
// SSAUpdater can't handle a non-PHI use in the same block as an
// earlier def. We can easily handle those cases manually.
Instruction *UserInst = cast<Instruction>(U.getUser());
@@ -399,9 +396,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
D->getExpression()};
};
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
- for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
- I != E; ++I) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
+ for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) {
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
DbgIntrinsics.insert(makeHash(DII));
else
break;
@@ -563,7 +559,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
SmallVector<PHINode*, 2> InsertedPHIs;
// If there were any uses of instructions in the duplicated block outside the
// loop, update them, inserting PHI nodes as required
- RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE,
&InsertedPHIs);
// Attach dbg.value intrinsics to the new phis if that phi uses a value that
@@ -621,7 +617,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// one predecessor. Note that Exit could be an exit block for multiple
// nested loops, causing both of the edges to now be critical and need to
// be split.
- SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit));
bool SplitLatchEdge = false;
for (BasicBlock *ExitPred : ExitPreds) {
// We only need to split loop exit edges.
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index d2fd32c98d73..d14c006c8032 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -779,8 +779,7 @@ namespace {
AU.addPreserved<DependenceAnalysisWrapperPass>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
AU.addPreserved<BranchProbabilityInfoWrapperPass>();
- if (EnableMSSALoopDependency)
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -814,12 +813,10 @@ bool LoopSimplify::runOnFunction(Function &F) {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
MemorySSA *MSSA = nullptr;
std::unique_ptr<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAAnalysis) {
- MSSA = &MSSAAnalysis->getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- }
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAAnalysis) {
+ MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index a91bf7b7af13..b0c622b98d5e 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -224,13 +224,12 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : L->getBlocks()) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = &*I++;
- if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
- if (LI->replacementPreservesLCSSAForm(Inst, V))
- Inst->replaceAllUsesWith(V);
- if (isInstructionTriviallyDead(Inst))
- DeadInsts.emplace_back(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(&Inst, V))
+ Inst.replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(&Inst))
+ DeadInsts.emplace_back(&Inst);
}
// We can't do recursive deletion until we're done iterating, as we might
// have a phi which (potentially indirectly) uses instructions later in
@@ -515,6 +514,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+ // We place the unrolled iterations immediately after the original loop
+ // latch. This is a reasonable default placement if we don't have block
+ // frequencies, and if we do, well the layout will be adjusted later.
+ auto BlockInsertPt = std::next(LatchBlock->getIterator());
for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -523,7 +526,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
ValueToValueMapTy VMap;
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
- Header->getParent()->getBasicBlockList().push_back(New);
+ Header->getParent()->getBasicBlockList().insert(BlockInsertPt, New);
assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
"Header should not be in a sub-loop");
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 6749d3db743c..a92cb6a313d3 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
@@ -35,6 +36,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -167,8 +169,11 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
InsertPt->eraseFromParent();
- if (DT)
- DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
+ if (DT) {
+ auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
+ PrologExit);
+ DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
+ }
}
/// Connect the unrolling epilog code to the original loop.
@@ -215,7 +220,10 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// PN = PHI [I, Latch]
// ...
// Exit:
- // EpilogPN = PHI [PN, EpilogPreHeader]
+ // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
+ //
+ // Exits from non-latch blocks point to the original exit block and the
+ // epilogue edges have already been added.
//
// There is EpilogPreHeader incoming block instead of NewExit as
// NewExit was spilt 1 more time to get EpilogPreHeader.
@@ -282,8 +290,10 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
- if (DT)
- DT->changeImmediateDominator(Exit, NewExit);
+ if (DT) {
+ auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
+ DT->changeImmediateDominator(Exit, NewDom);
+ }
// Split the main loop exit to maintain canonicalization guarantees.
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
@@ -291,17 +301,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
PreserveLCSSA);
}
-/// Create a clone of the blocks in a loop and connect them together.
-/// If CreateRemainderLoop is false, loop structure will not be cloned,
-/// otherwise a new loop will be created including all cloned blocks, and the
-/// iterator of it switches to count NewIter down to 0.
+/// Create a clone of the blocks in a loop and connect them together. A new
+/// loop will be created including all cloned blocks, and the iterator of the
+/// new loop switched to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
-/// If loop structure is cloned InsertTop should be new preheader, InsertBot
-/// new loop exit.
-/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+/// InsertTop should be new preheader, InsertBot new loop exit.
+/// Returns the new cloned loop that is created.
static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, const bool UnrollRemainder,
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
+ const bool UnrollRemainder,
BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
@@ -315,8 +323,6 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
Loop *ParentLoop = L->getParentLoop();
NewLoopsMap NewLoops;
NewLoops[ParentLoop] = ParentLoop;
- if (!CreateRemainderLoop)
- NewLoops[L] = ParentLoop;
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
@@ -324,11 +330,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
- // If we're unrolling the outermost loop, there's no remainder loop,
- // and this block isn't in a nested loop, then the new block is not
- // in any loop. Otherwise, add it to loopinfo.
- if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
- addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
+ addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
VMap[*BB] = NewBB;
if (Header == *BB) {
@@ -349,27 +351,24 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
}
if (Latch == *BB) {
- // For the last block, if CreateRemainderLoop is false, create a direct
- // jump to InsertBot. If not, create a loop back to cloned head.
+ // For the last block, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
+ // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
+ // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
+ // thus we must compare the post-increment (wrapping) value.
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- if (!CreateRemainderLoop) {
- Builder.CreateBr(InsertBot);
- } else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
- suffix + ".iter",
- FirstLoopBB->getFirstNonPHI());
- Value *IdxSub =
- Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".sub");
- Value *IdxCmp =
- Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
- NewIdx->addIncoming(NewIter, InsertTop);
- NewIdx->addIncoming(IdxSub, NewBB);
- }
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
+ FirstLoopBB->getFirstNonPHI());
+ auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+ auto *One = ConstantInt::get(NewIdx->getType(), 1);
+ Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ NewIdx->addIncoming(Zero, InsertTop);
+ NewIdx->addIncoming(IdxNext, NewBB);
LatchBR->eraseFromParent();
}
}
@@ -378,99 +377,45 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (!CreateRemainderLoop) {
- if (UseEpilogRemainder) {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- NewPHI->removeIncomingValue(Latch, false);
- } else {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
- }
- } else {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
- idx = NewPHI->getBasicBlockIndex(Latch);
- Value *InVal = NewPHI->getIncomingValue(idx);
- NewPHI->setIncomingBlock(idx, NewLatch);
- if (Value *V = VMap.lookup(InVal))
- NewPHI->setIncomingValue(idx, V);
- }
- }
- if (CreateRemainderLoop) {
- Loop *NewLoop = NewLoops[L];
- assert(NewLoop && "L should have been cloned");
- MDNode *LoopID = NewLoop->getLoopID();
-
- // Only add loop metadata if the loop is not going to be completely
- // unrolled.
- if (UnrollRemainder)
- return NewLoop;
-
- Optional<MDNode *> NewLoopID = makeFollowupLoopID(
- LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
- if (NewLoopID.hasValue()) {
- NewLoop->setLoopID(NewLoopID.getValue());
-
- // Do not setLoopAlreadyUnrolled if loop attributes have been defined
- // explicitly.
- return NewLoop;
- }
-
- // Add unroll disable metadata to disable future unrolling for this loop.
- NewLoop->setLoopAlreadyUnrolled();
- return NewLoop;
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ NewPHI->setIncomingBlock(idx, NewLatch);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
}
- else
- return nullptr;
-}
-/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
-/// is populated with all the loop exit blocks other than the LatchExit block.
-static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
- bool PreserveLCSSA,
- bool UseEpilogRemainder) {
+ Loop *NewLoop = NewLoops[L];
+ assert(NewLoop && "L should have been cloned");
+ MDNode *LoopID = NewLoop->getLoopID();
- // We currently have some correctness constrains in unrolling a multi-exit
- // loop. Check for these below.
+ // Only add loop metadata if the loop is not going to be completely
+ // unrolled.
+ if (UnrollRemainder)
+ return NewLoop;
- // We rely on LCSSA form being preserved when the exit blocks are transformed.
- if (!PreserveLCSSA)
- return false;
+ Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+ LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+ if (NewLoopID.hasValue()) {
+ NewLoop->setLoopID(NewLoopID.getValue());
- // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
- // UnrollRuntimeMultiExit is true. This will need updating the logic in
- // connectEpilog/connectProlog.
- if (!LatchExit->getSinglePredecessor()) {
- LLVM_DEBUG(
- dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
- "predecessor.\n");
- return false;
+ // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+ // explicitly.
+ return NewLoop;
}
- // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
- // and L is an inner loop. This is because in presence of multiple exits, the
- // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
- // outer loop. This is automatically handled in the prolog case, so we do not
- // have that bug in prolog generation.
- if (UseEpilogRemainder && L->getParentLoop())
- return false;
- // All constraints have been satisfied.
- return true;
+ // Add unroll disable metadata to disable future unrolling for this loop.
+ NewLoop->setLoopAlreadyUnrolled();
+ return NewLoop;
}
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
/// we return true only if UnrollRuntimeMultiExit is set to true.
static bool canProfitablyUnrollMultiExitLoop(
Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
- bool PreserveLCSSA, bool UseEpilogRemainder) {
-
-#if !defined(NDEBUG)
- assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
- UseEpilogRemainder) &&
- "Should be safe to unroll before checking profitability!");
-#endif
+ bool UseEpilogRemainder) {
// Priority goes to UnrollRuntimeMultiExit if it's supplied.
if (UnrollRuntimeMultiExit.getNumOccurrences())
@@ -523,24 +468,56 @@ static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
uint64_t TrueWeight, FalseWeight;
BranchInst *LatchBR =
cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
- if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
- uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
- ? FalseWeight
- : TrueWeight;
- assert(UnrollFactor > 1);
- uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
- BasicBlock *Header = RemainderLoop->getHeader();
- BasicBlock *Latch = RemainderLoop->getLoopLatch();
- auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
- unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(RemainderLatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
+ return;
+ uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
+ ? FalseWeight
+ : TrueWeight;
+ assert(UnrollFactor > 1);
+ uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
+ BasicBlock *Header = RemainderLoop->getHeader();
+ BasicBlock *Latch = RemainderLoop->getLoopLatch();
+ auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(RemainderLatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
+/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
+/// accounting for the possibility of unsigned overflow in the 2s complement
+/// domain. Preconditions:
+/// 1) TripCount = BECount + 1 (allowing overflow)
+/// 2) Log2(Count) <= BitWidth(BECount)
+static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,
+ Value *TripCount, unsigned Count) {
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count))
+ // If the expression is zero, then either:
+ // 1. There are no iterations to be run in the prolog/epilog loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
+ // precondition of this method).
+ return B.CreateAnd(TripCount, Count - 1, "xtraiter");
+
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Constant *CountC = ConstantInt::get(BECount->getType(), Count);
+ Value *ModValTmp = B.CreateURem(BECount, CountC);
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ return B.CreateURem(ModValAdd, CountC, "xtraiter");
+}
+
+
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
@@ -624,19 +601,22 @@ bool llvm::UnrollRuntimeLoopRemainder(
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
L->getUniqueNonLatchExitBlocks(OtherExits);
- bool isMultiExitUnrollingEnabled =
- canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
- UseEpilogRemainder) &&
- canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
- UseEpilogRemainder);
- // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
- if (!isMultiExitUnrollingEnabled &&
- (!L->getExitingBlock() || OtherExits.size())) {
- LLVM_DEBUG(
- dbgs()
- << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
- "enabled!\n");
- return false;
+ // Support only single exit and exiting block unless multi-exit loop
+ // unrolling is enabled.
+ if (!L->getExitingBlock() || OtherExits.size()) {
+ // We rely on LCSSA form being preserved when the exit blocks are transformed.
+ // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
+ if (!PreserveLCSSA)
+ return false;
+
+ if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
+ UseEpilogRemainder)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
+ "enabled!\n");
+ return false;
+ }
}
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
@@ -659,6 +639,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
// Add 1 since the backedge count doesn't include the first loop iteration.
+ // (Note that overflow can occur, this is handled explicitly below)
const SCEV *TripCountSC =
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC)) {
@@ -706,8 +687,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
NewPreHeader->setName(PreHeader->getName() + ".new");
// Split LatchExit to create phi nodes from branch above.
- SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
- NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
+ NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
nullptr, PreserveLCSSA);
// NewExit gets its DebugLoc from LatchExit, which is not part of the
// original Loop.
@@ -717,6 +697,21 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Split NewExit to insert epilog remainder loop.
EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+
+ // If the latch exits from multiple level of nested loops, then
+ // by assumption there must be another loop exit which branches to the
+ // outer loop and we must adjust the loop for the newly inserted blocks
+ // to account for the fact that our epilogue is still in the same outer
+ // loop. Note that this leaves loopinfo temporarily out of sync with the
+ // CFG until the actual epilogue loop is inserted.
+ if (auto *ParentL = L->getParentLoop())
+ if (LI->getLoopFor(LatchExit) != ParentL) {
+ LI->removeBlock(NewExit);
+ ParentL->addBasicBlockToLoop(NewExit, *LI);
+ LI->removeBlock(EpilogPreHeader);
+ ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
+ }
+
} else {
// If prolog remainder
// Split the original preheader twice to insert prolog remainder loop
@@ -751,35 +746,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
- Value *ModVal;
- // Calculate ModVal = (BECount + 1) % Count.
- // Note that TripCount is BECount + 1.
- if (isPowerOf2_32(Count)) {
- // When Count is power of 2 we don't BECount for epilog case, however we'll
- // need it for a branch around unrolling loop for prolog case.
- ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // 1. There are no iterations to be run in the prolog/epilog loop.
- // OR
- // 2. The addition computing TripCount overflowed.
- //
- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
- // the number of iterations that remain to be run in the original loop is a
- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
- // explicitly check this above).
- } else {
- // As (BECount + 1) can potentially unsigned overflow we count
- // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
- Value *ModValTmp = B.CreateURem(BECount,
- ConstantInt::get(BECount->getType(),
- Count));
- Value *ModValAdd = B.CreateAdd(ModValTmp,
- ConstantInt::get(ModValTmp->getType(), 1));
- // At that point (BECount % Count) + 1 could be equal to Count.
- // To handle this case we need to take mod by Count one more time.
- ModVal = B.CreateURem(ModValAdd,
- ConstantInt::get(BECount->getType(), Count),
- "xtraiter");
- }
+ Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
+
Value *BranchVal =
UseEpilogRemainder ? B.CreateICmpULT(BECount,
ConstantInt::get(BECount->getType(),
@@ -810,18 +778,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- // For unroll factor 2 remainder loop will have 1 iterations.
- // Do not create 1 iteration loop.
- bool CreateRemainderLoop = (Count != 2);
-
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
- InsertTop, InsertBot,
+ L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Assign the maximum possible trip count as the back edge weight for the
@@ -840,36 +803,33 @@ bool llvm::UnrollRuntimeLoopRemainder(
// work is to update the phi nodes in the original loop, and take in the
// values from the cloned region.
for (auto *BB : OtherExits) {
- for (auto &II : *BB) {
-
- // Given we preserve LCSSA form, we know that the values used outside the
- // loop will be used through these phi nodes at the exit blocks that are
- // transformed below.
- if (!isa<PHINode>(II))
- break;
- PHINode *Phi = cast<PHINode>(&II);
- unsigned oldNumOperands = Phi->getNumIncomingValues();
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ for (PHINode &PN : BB->phis()) {
+ unsigned oldNumOperands = PN.getNumIncomingValues();
// Add the incoming values from the remainder code to the end of the phi
// node.
- for (unsigned i =0; i < oldNumOperands; i++){
- Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
- // newVal can be a constant or derived from values outside the loop, and
- // hence need not have a VMap value. Also, since lookup already generated
- // a default "null" VMap entry for this value, we need to populate that
- // VMap entry correctly, with the mapped entry being itself.
- if (!newVal) {
- newVal = Phi->getIncomingValue(i);
- VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
- }
- Phi->addIncoming(newVal,
- cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+ for (unsigned i = 0; i < oldNumOperands; i++){
+ auto *PredBB =PN.getIncomingBlock(i);
+ if (PredBB == Latch)
+ // The latch exit is handled seperately, see connectX
+ continue;
+ if (!L->contains(PredBB))
+ // Even if we had dedicated exits, the code above inserted an
+ // extra branch which can reach the latch exit.
+ continue;
+
+ auto *V = PN.getIncomingValue(i);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (L->contains(I))
+ V = VMap.lookup(I);
+ PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
}
}
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
for (BasicBlock *SuccBB : successors(BB)) {
- assert(!(any_of(OtherExits,
- [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
- SuccBB == LatchExit) &&
+ assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
"Breaks the definition of dedicated exits!");
}
#endif
@@ -931,23 +891,22 @@ bool llvm::UnrollRuntimeLoopRemainder(
PreserveLCSSA);
// Update counter in loop for unrolling.
- // I should be multiply of Count.
+ // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
+ // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
+ // thus we must compare the post-increment (wrapping) value.
IRBuilder<> B2(NewPreHeader->getTerminator());
Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- B2.SetInsertPoint(LatchBR);
PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
Header->getFirstNonPHI());
- Value *IdxSub =
- B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".nsub");
- Value *IdxCmp;
- if (LatchBR->getSuccessor(0) == Header)
- IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
- else
- IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
- NewIdx->addIncoming(TestVal, NewPreHeader);
- NewIdx->addIncoming(IdxSub, Latch);
+ B2.SetInsertPoint(LatchBR);
+ auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+ auto *One = ConstantInt::get(NewIdx->getType(), 1);
+ Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(Zero, NewPreHeader);
+ NewIdx->addIncoming(IdxNext, Latch);
LatchBR->setCondition(IdxCmp);
} else {
// Connect the prolog code to the original loop and update the
@@ -960,12 +919,49 @@ bool llvm::UnrollRuntimeLoopRemainder(
// of its parent loops, so the Scalar Evolution pass needs to be run again.
SE->forgetTopmostLoop(L);
- // Verify that the Dom Tree is correct.
+ // Verify that the Dom Tree and Loop Info are correct.
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
- if (DT)
+ if (DT) {
assert(DT->verify(DominatorTree::VerificationLevel::Full));
+ LI->verify(*DT);
+ }
#endif
+ // For unroll factor 2 remainder loop will have 1 iteration.
+ if (Count == 2 && DT && LI && SE) {
+ // TODO: This code could probably be pulled out into a helper function
+ // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
+ BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
+ assert(RemainderLatch);
+ SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
+ remainderLoop->getBlocks().end());
+ breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
+ remainderLoop = nullptr;
+
+ // Simplify loop values after breaking the backedge
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ for (BasicBlock *BB : RemainderBlocks) {
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(&Inst, V))
+ Inst.replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(&Inst))
+ DeadInsts.emplace_back(&Inst);
+ }
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ }
+
+ // Merge latch into exit block.
+ auto *ExitBB = RemainderLatch->getSingleSuccessor();
+ assert(ExitBB && "required after breaking cond br backedge");
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
+ }
+
// Canonicalize to LoopSimplifyForm both original and remainder loops. We
// cannot rely on the LoopUnrollPass to do this because it only does
// canonicalization for parent/subloops and not the sibling loops.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e4d78f9ada08..f0f079335683 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -612,10 +612,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
for (auto *Block : L->blocks())
for (Instruction &I : *Block) {
auto *Undef = UndefValue::get(I.getType());
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end();
- UI != E;) {
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(I.uses())) {
if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
if (L->contains(Usr->getParent()))
continue;
@@ -710,21 +707,58 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SE.forgetLoop(L);
- // Note: By splitting the backedge, and then explicitly making it unreachable
- // we gracefully handle corner cases such as non-bottom tested loops and the
- // like. We also have the benefit of being able to reuse existing well tested
- // code. It might be worth special casing the common bottom tested case at
- // some point to avoid code churn.
-
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+ // Update the CFG and domtree. We chose to special case a couple of
+ // of common cases for code quality and test readability reasons.
+ [&]() -> void {
+ if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
+ if (!BI->isConditional()) {
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
+ MSSAU.get());
+ return;
+ }
+
+ // Conditional latch/exit - note that latch can be shared by inner
+ // and outer loop so the other target doesn't need to an exit
+ if (L->isLoopExiting(Latch)) {
+ // TODO: Generalize ConstantFoldTerminator so that it can be used
+ // here without invalidating LCSSA or MemorySSA. (Tricky case for
+ // LCSSA: header is an exit block of a preceeding sibling loop w/o
+ // dedicated exits.)
+ const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
+ BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ Header->removePredecessor(Latch, true);
+
+ IRBuilder<> Builder(BI);
+ auto *NewBI = Builder.CreateBr(ExitBB);
+ // Transfer the metadata to the new branch instruction (minus the
+ // loop info since this is no longer a loop)
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
+ BI->eraseFromParent();
+ DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
+ if (MSSA)
+ MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
+ return;
+ }
+ }
- DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
- (void)changeToUnreachable(BackedgeBB->getTerminator(),
- /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
+ // General case. By splitting the backedge, and then explicitly making it
+ // unreachable we gracefully handle corner cases such as switch and invoke
+ // termiantors.
+ auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BackedgeBB->getTerminator(),
+ /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
+ }();
// Erase (and destroy) this loop instance. Handles relinking sub-loops
// and blocks within the loop as needed.
@@ -852,32 +886,37 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
- Value *Right) {
- CmpInst::Predicate Pred;
+CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
switch (RK) {
default:
llvm_unreachable("Unknown min/max recurrence kind");
case RecurKind::UMin:
- Pred = CmpInst::ICMP_ULT;
- break;
+ return CmpInst::ICMP_ULT;
case RecurKind::UMax:
- Pred = CmpInst::ICMP_UGT;
- break;
+ return CmpInst::ICMP_UGT;
case RecurKind::SMin:
- Pred = CmpInst::ICMP_SLT;
- break;
+ return CmpInst::ICMP_SLT;
case RecurKind::SMax:
- Pred = CmpInst::ICMP_SGT;
- break;
+ return CmpInst::ICMP_SGT;
case RecurKind::FMin:
- Pred = CmpInst::FCMP_OLT;
- break;
+ return CmpInst::FCMP_OLT;
case RecurKind::FMax:
- Pred = CmpInst::FCMP_OGT;
- break;
+ return CmpInst::FCMP_OGT;
}
+}
+Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
+ RecurKind RK, Value *Left, Value *Right) {
+ if (auto VTy = dyn_cast<VectorType>(Left->getType()))
+ StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
+ return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+}
+
+Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
+ Value *Right) {
+ CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
@@ -955,15 +994,50 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
+Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
+ const TargetTransformInfo *TTI,
+ Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi) {
+ assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+ Desc.getRecurrenceKind()) &&
+ "Unexpected reduction kind");
+ Value *InitVal = Desc.getRecurrenceStartValue();
+ Value *NewVal = nullptr;
+
+ // First use the original phi to determine the new value we're trying to
+ // select from in the loop.
+ SelectInst *SI = nullptr;
+ for (auto *U : OrigPhi->users()) {
+ if ((SI = dyn_cast<SelectInst>(U)))
+ break;
+ }
+ assert(SI && "One user of the original phi should be a select");
+
+ if (SI->getTrueValue() == OrigPhi)
+ NewVal = SI->getFalseValue();
+ else {
+ assert(SI->getFalseValue() == OrigPhi &&
+ "At least one input to the select should be the original Phi");
+ NewVal = SI->getTrueValue();
+ }
+
+ // Create a splat vector with the new value and compare this to the vector
+ // we want to reduce.
+ ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
+ Value *Right = Builder.CreateVectorSplat(EC, InitVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
+
+ // If any predicate is true it means that we want to select the new value.
+ Cmp = Builder.CreateOrReduce(Cmp);
+ return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
+}
+
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
const TargetTransformInfo *TTI,
Value *Src, RecurKind RdxKind,
ArrayRef<Value *> RedOps) {
- TargetTransformInfo::ReductionFlags RdxFlags;
- RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||
- RdxKind == RecurKind::FMax;
- RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
-
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
case RecurKind::Add:
@@ -1000,14 +1074,19 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- const RecurrenceDescriptor &Desc,
- Value *Src) {
+ const RecurrenceDescriptor &Desc, Value *Src,
+ PHINode *OrigPhi) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
IRBuilderBase::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(Desc.getFastMathFlags());
- return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
+
+ RecurKind RK = Desc.getRecurrenceKind();
+ if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
+ return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+
+ return createSimpleTargetReduction(B, TTI, Src, RK);
}
Value *llvm::createOrderedReduction(IRBuilderBase &B,
@@ -1081,58 +1160,6 @@ bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
// As a side effect, reduces the amount of IV processing within the loop.
//===----------------------------------------------------------------------===//
-// Return true if the SCEV expansion generated by the rewriter can replace the
-// original value. SCEV guarantees that it produces the same value, but the way
-// it is produced may be illegal IR. Ideally, this function will only be
-// called for verification.
-static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
- // If an SCEV expression subsumed multiple pointers, its expansion could
- // reassociate the GEP changing the base pointer. This is illegal because the
- // final address produced by a GEP chain must be inbounds relative to its
- // underlying object. Otherwise basic alias analysis, among other things,
- // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
- // producing an expression involving multiple pointers. Until then, we must
- // bail out here.
- //
- // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject
- // because it understands lcssa phis while SCEV does not.
- Value *FromPtr = FromVal;
- Value *ToPtr = ToVal;
- if (auto *GEP = dyn_cast<GEPOperator>(FromVal))
- FromPtr = GEP->getPointerOperand();
-
- if (auto *GEP = dyn_cast<GEPOperator>(ToVal))
- ToPtr = GEP->getPointerOperand();
-
- if (FromPtr != FromVal || ToPtr != ToVal) {
- // Quickly check the common case
- if (FromPtr == ToPtr)
- return true;
-
- // SCEV may have rewritten an expression that produces the GEP's pointer
- // operand. That's ok as long as the pointer operand has the same base
- // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the
- // base of a recurrence. This handles the case in which SCEV expansion
- // converts a pointer type recurrence into a nonrecurrent pointer base
- // indexed by an integer recurrence.
-
- // If the GEP base pointer is a vector of pointers, abort.
- if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
- return false;
-
- const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
- const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
- if (FromBase == ToBase)
- return true;
-
- LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: GEP rewrite bail out "
- << *FromBase << " != " << *ToBase << "\n");
-
- return false;
- }
- return true;
-}
-
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
SmallPtrSet<const Instruction *, 8> Visited;
SmallVector<const Instruction *, 8> WorkList;
@@ -1165,9 +1192,6 @@ struct RewritePhi {
Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
bool HighCost; // Is this expansion a high-cost?
- Value *Expansion = nullptr;
- bool ValidRewrite = false;
-
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
bool H)
: PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
@@ -1204,8 +1228,6 @@ static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet)
// phase later. Skip it in the loop invariant check below.
bool found = false;
for (const RewritePhi &Phi : RewritePhiSet) {
- if (!Phi.ValidRewrite)
- continue;
unsigned i = Phi.Ith;
if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
found = true;
@@ -1264,13 +1286,6 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
if (!SE->isSCEVable(PN->getType()))
continue;
- // It's necessary to tell ScalarEvolution about this explicitly so that
- // it can walk the def-use list and forget all SCEVs, as it may not be
- // watching the PHI itself. Once the new exit value is in place, there
- // may not be a def-use connection between the loop and every instruction
- // which got a SCEVAddRecExpr for that loop.
- SE->forgetValue(PN);
-
// Iterate over all of the values in all the PHI nodes.
for (unsigned i = 0; i != NumPreds; ++i) {
// If the value being merged in is not integer or is not defined
@@ -1339,61 +1354,49 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
}
}
- // Now that we've done preliminary filtering and billed all the SCEV's,
- // we can perform the last sanity check - the expansion must be valid.
- for (RewritePhi &Phi : RewritePhiSet) {
- Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(),
- Phi.ExpansionPoint);
+ // TODO: evaluate whether it is beneficial to change how we calculate
+ // high-cost: if we have SCEV 'A' which we know we will expand, should we
+ // calculate the cost of other SCEV's after expanding SCEV 'A', thus
+ // potentially giving cost bonus to those other SCEV's?
- LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
- << *(Phi.Expansion) << '\n'
- << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
+ int NumReplaced = 0;
+
+ // Transformation.
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ PHINode *PN = Phi.PN;
- // FIXME: isValidRewrite() is a hack. it should be an assert, eventually.
- Phi.ValidRewrite = isValidRewrite(SE, Phi.ExpansionPoint, Phi.Expansion);
- if (!Phi.ValidRewrite) {
- DeadInsts.push_back(Phi.Expansion);
+ // Only do the rewrite when the ExitValue can be expanded cheaply.
+ // If LoopCanBeDel is true, rewrite exit value aggressively.
+ if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost)
continue;
- }
+
+ Value *ExitVal = Rewriter.expandCodeFor(
+ Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
+
+ LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
+ << '\n'
+ << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
#ifndef NDEBUG
// If we reuse an instruction from a loop which is neither L nor one of
// its containing loops, we end up breaking LCSSA form for this loop by
// creating a new use of its instruction.
- if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion))
+ if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
if (EVL != L)
assert(EVL->contains(L) && "LCSSA breach detected!");
#endif
- }
-
- // TODO: after isValidRewrite() is an assertion, evaluate whether
- // it is beneficial to change how we calculate high-cost:
- // if we have SCEV 'A' which we know we will expand, should we calculate
- // the cost of other SCEV's after expanding SCEV 'A',
- // thus potentially giving cost bonus to those other SCEV's?
-
- bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
- int NumReplaced = 0;
-
- // Transformation.
- for (const RewritePhi &Phi : RewritePhiSet) {
- if (!Phi.ValidRewrite)
- continue;
-
- PHINode *PN = Phi.PN;
- Value *ExitVal = Phi.Expansion;
-
- // Only do the rewrite when the ExitValue can be expanded cheaply.
- // If LoopCanBeDel is true, rewrite exit value aggressively.
- if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
- DeadInsts.push_back(ExitVal);
- continue;
- }
NumReplaced++;
Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
PN->setIncomingValue(Phi.Ith, ExitVal);
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
// If this instruction is dead now, delete it. Don't do it now to avoid
// invalidating iterators.
@@ -1554,7 +1557,7 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
return ChecksWithBounds;
}
-std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
+Value *llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Exp) {
@@ -1563,22 +1566,10 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
LLVMContext &Ctx = Loc->getContext();
- Instruction *FirstInst = nullptr;
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- // FIXME: this helper is currently a duplicate of the one in
- // LoopVectorize.cpp.
- auto GetFirstInst = [](Instruction *FirstInst, Value *V,
- Instruction *Loc) -> Instruction * {
- if (FirstInst)
- return FirstInst;
- if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == Loc->getParent() ? I : nullptr;
- return nullptr;
- };
-
for (const auto &Check : ExpandedChecks) {
const PointerBounds &A = Check.first, &B = Check.second;
// Check if two pointers (A and B) conflict where conflict is computed as:
@@ -1607,30 +1598,16 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
// bound1 = (A.Start < B.End)
// IsConflict = bound0 & bound1
Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
- FirstInst = GetFirstInst(FirstInst, Cmp0, Loc);
Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
- FirstInst = GetFirstInst(FirstInst, Cmp1, Loc);
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
- FirstInst = GetFirstInst(FirstInst, IsConflict, Loc);
if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
- FirstInst = GetFirstInst(FirstInst, IsConflict, Loc);
}
MemoryRuntimeCheck = IsConflict;
}
- if (!MemoryRuntimeCheck)
- return std::make_pair(nullptr, nullptr);
-
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- Instruction *Check =
- BinaryOperator::CreateAnd(MemoryRuntimeCheck, ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(Check, "memcheck.conflict");
- FirstInst = GetFirstInst(FirstInst, Check, Loc);
- return std::make_pair(FirstInst, Check);
+ return MemoryRuntimeCheck;
}
Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 8a89158788cf..771b7d25b0f2 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -14,9 +14,9 @@
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
@@ -52,8 +52,7 @@ void LoopVersioning::versionLoop(
assert(VersionedLoop->isLoopSimplifyForm() &&
"Loop is not in loop-simplify form");
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
+ Value *MemRuntimeCheck;
Value *SCEVRuntimeCheck;
Value *RuntimeCheck = nullptr;
@@ -64,8 +63,8 @@ void LoopVersioning::versionLoop(
SCEVExpander Exp2(*RtPtrChecking.getSE(),
VersionedLoop->getHeader()->getModule()->getDataLayout(),
"induction");
- std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks(
- RuntimeCheckBB->getTerminator(), VersionedLoop, AliasChecks, Exp2);
+ MemRuntimeCheck = addRuntimeChecks(RuntimeCheckBB->getTerminator(),
+ VersionedLoop, AliasChecks, Exp2);
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
"scev.check");
@@ -354,14 +353,11 @@ PreservedAnalyses LoopVersioningPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 616b4e8eb01c..8dc4702993c3 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -442,7 +442,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransfomrInfo */ TTI);
+ /* TargetTransformInfo */ TTI);
}
}
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index ec8d7a7074cd..aff9d1311688 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -524,16 +524,14 @@ bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
bool Changed = false;
SmallPtrSet<BasicBlock *, 8> DeleteList;
- for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *Cur =
- &*I++; // Advance over block so we don't traverse new blocks
-
+ // We use make_early_inc_range here so that we don't traverse new blocks.
+ for (BasicBlock &Cur : llvm::make_early_inc_range(F)) {
// If the block is a dead Default block that will be deleted later, don't
// waste time processing it.
- if (DeleteList.count(Cur))
+ if (DeleteList.count(&Cur))
continue;
- if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur.getTerminator())) {
Changed = true;
ProcessSwitchInst(SI, DeleteList, AC, LVI);
}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 2aef37205c53..bb5ff59cba4b 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -125,7 +125,7 @@ Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
Function *Ctor = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalValue::InternalLinkage, 0, CtorName, &M);
- Ctor->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+ Ctor->addFnAttr(Attribute::NoUnwind);
BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
ReturnInst::Create(M.getContext(), CtorBB);
// Ensure Ctor cannot be discarded, even if in a comdat.
@@ -165,7 +165,7 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
if (Function *Ctor = M.getFunction(CtorName))
// FIXME: Sink this logic into the module, similar to the handling of
// globals. This will make moving to a concurrent model much easier.
- if (Ctor->arg_size() == 0 ||
+ if (Ctor->arg_empty() ||
Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
@@ -297,7 +297,6 @@ void VFABI::setVectorVariantNames(
"vector function declaration is missing.");
}
#endif
- CI->addAttribute(
- AttributeList::FunctionIndex,
+ CI->addFnAttr(
Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 91280762aaa7..bd2b6fafdf2e 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -23,6 +24,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// to ensure we dominate all of our uses. Always insert right before the
// relevant instruction (terminator, assume), so that we insert in proper
// order in the case of multiple predicateinfo in the same block.
+ // The number of named values is used to detect if a new declaration was
+ // added. If so, that declaration is tracked so that it can be removed when
+ // the analysis is done. The corner case were a new declaration results in
+ // a name clash and the old name being renamed is not considered as that
+ // represents an invalid module.
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
+ auto NumDecls = F.getParent()->getNumNamedValues();
Function *IF = Intrinsic::getDeclaration(
F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
PI.PredicateMap.insert({PIC, ValInfo});
@@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// Insert the predicate directly after the assume. While it also holds
// directly before it, assume(i1 true) is not a useful fact.
IRBuilder<> B(PAssume->AssumeInst->getNextNode());
+ auto NumDecls = F.getParent()->getNumNamedValues();
Function *IF = Intrinsic::getDeclaration(
F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
@@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
Builder.buildPredicateInfo();
}
+// Remove all declarations we created . The PredicateInfo consumers are
+// responsible for remove the ssa_copy calls created.
+PredicateInfo::~PredicateInfo() {
+ // Collect function pointers in set first, as SmallSet uses a SmallVector
+ // internally and we have to remove the asserting value handles first.
+ SmallPtrSet<Function *, 20> FunctionPtrs;
+ for (auto &F : CreatedDeclarations)
+ FunctionPtrs.insert(&*F);
+ CreatedDeclarations.clear();
+
+ for (Function *F : FunctionPtrs) {
+ assert(F->user_begin() == F->user_end() &&
+ "PredicateInfo consumer did not remove all SSA copies.");
+ F->eraseFromParent();
+ }
+}
+
Optional<PredicateConstraint> PredicateBase::getConstraint() const {
switch (Type) {
case PT_Assume:
@@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
}
+// Replace ssa_copy calls created by PredicateInfo with their operand.
+static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
+ const auto *PI = PredInfo.getPredicateInfoFor(&Inst);
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+}
+
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
+
+ replaceCreatedSSACopys(*PredInfo, F);
return false;
}
@@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(OS);
+ replaceCreatedSSACopys(*PredInfo, F);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 427028066026..b35ab57e0d87 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -70,7 +70,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (LI->isVolatile())
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI)
+ if (SI->getValueOperand() == AI ||
+ SI->getValueOperand()->getType() != AI->getAllocatedType())
return false; // Don't allow a store OF the AI, only INTO the AI.
// Note that atomic stores can be transformed; atomic semantics do
// not have any meaning for a local alloca.
diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index 85e5adaeaf5e..3ebc89158173 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -177,9 +177,7 @@ static bool convertToRelativeLookupTables(
bool Changed = false;
- for (auto GVI = M.global_begin(), E = M.global_end(); GVI != E;) {
- GlobalVariable &GV = *GVI++;
-
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (!shouldConvertToRelLookupTable(M, GV))
continue;
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 4cf99abcc10f..d7e8eaf677c6 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -540,8 +540,14 @@ void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A,
E = F->arg_end();
I != E; ++I, ++J)
if (J != A && ValueState.count(I)) {
- ValueState[J] = ValueState[I];
- pushToWorkList(ValueState[J], J);
+ // Note: This previously looked like this:
+ // ValueState[J] = ValueState[I];
+ // This is incorrect because the DenseMap class may resize the underlying
+ // memory when inserting `J`, which will invalidate the reference to `I`.
+ // Instead, we make sure `J` exists, then set it to `I` afterwards.
+ auto &NewValue = ValueState[J];
+ NewValue = ValueState[I];
+ pushToWorkList(NewValue, J);
}
}
@@ -802,6 +808,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
return;
ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ if (OpSt.isUnknownOrUndef())
+ return;
+
if (Constant *OpC = getConstant(OpSt)) {
// Fold the constant as we build.
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
@@ -809,9 +818,14 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
return;
// Propagate constant value
markConstant(&I, C);
- } else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) {
+ } else if (I.getDestTy()->isIntegerTy()) {
auto &LV = getValueState(&I);
- ConstantRange OpRange = OpSt.getConstantRange();
+ ConstantRange OpRange =
+ OpSt.isConstantRange()
+ ? OpSt.getConstantRange()
+ : ConstantRange::getFull(
+ I.getOperand(0)->getType()->getScalarSizeInBits());
+
Type *DestTy = I.getDestTy();
// Vectors where all elements have the same known constant range are treated
// as a single constant range in the lattice. When bitcasting such vectors,
@@ -826,7 +840,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
ConstantRange Res =
OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
- } else if (!OpSt.isUnknownOrUndef())
+ } else
markOverdefined(&I);
}
@@ -1183,10 +1197,10 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
// a declaration, maybe we can constant fold it.
if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) {
SmallVector<Constant *, 8> Operands;
- for (auto AI = CB.arg_begin(), E = CB.arg_end(); AI != E; ++AI) {
- if (AI->get()->getType()->isStructTy())
+ for (const Use &A : CB.args()) {
+ if (A.get()->getType()->isStructTy())
return markOverdefined(&CB); // Can't handle struct args.
- ValueLatticeElement State = getValueState(*AI);
+ ValueLatticeElement State = getValueState(A);
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index 917d5e0a1ef0..7de76b86817b 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -65,12 +65,6 @@ void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) {
Rewrites[Var].Uses.push_back(U);
}
-/// Return true if the SSAUpdater already has a value for the specified variable
-/// in the specified block.
-bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) {
- return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false;
-}
-
// Compute value at the given block BB. We either should already know it, or we
// should be able to recursively reach it going up dominator tree.
Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R,
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 3978e1e29825..a042146d7ace 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -747,9 +747,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// so that pointer operands are inserted first, which the code below relies on
// to form more involved GEPs.
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
- for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
- E(S->op_begin()); I != E; ++I)
- OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+ for (const SCEV *Op : reverse(S->operands()))
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
@@ -765,7 +764,11 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// This is the first operand. Just expand it.
Sum = expand(Op);
++I;
- } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ continue;
+ }
+
+ assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
+ if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
@@ -779,16 +782,6 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
NewOps.push_back(X);
}
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
- } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
- // The running sum is an integer, and there's a pointer at this level.
- // Try to form a getelementptr. If the running sum is instructions,
- // use a SCEVUnknown to avoid re-analyzing them.
- SmallVector<const SCEV *, 4> NewOps;
- NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
- SE.getSCEV(Sum));
- for (++I; I != E && I->first == CurLoop; ++I)
- NewOps.push_back(I->second);
- Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false);
@@ -817,9 +810,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Collect all the mul operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal.
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
- for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
- E(S->op_begin()); I != E; ++I)
- OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+ for (const SCEV *Op : reverse(S->operands()))
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
// Sort by loop. Use a stable sort so that constants follow non-constants.
llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
@@ -923,28 +915,6 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
-/// Move parts of Base into Rest to leave Base with the minimal
-/// expression that provides a pointer operand suitable for a
-/// GEP expansion.
-static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
- ScalarEvolution &SE) {
- while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
- Base = A->getStart();
- Rest = SE.getAddExpr(Rest,
- SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
- A->getStepRecurrence(SE),
- A->getLoop(),
- A->getNoWrapFlags(SCEV::FlagNW)));
- }
- if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
- Base = A->getOperand(A->getNumOperands()-1);
- SmallVector<const SCEV *, 8> NewAddOps(A->operands());
- NewAddOps.back() = Rest;
- Rest = SE.getAddExpr(NewAddOps);
- ExposePointerBase(Base, Rest, SE);
- }
-}
-
/// Determine if this is a well-behaved chain of instructions leading back to
/// the PHI. If so, it may be reused by expanded expressions.
bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
@@ -1125,22 +1095,6 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
return IncV;
}
-/// Hoist the addrec instruction chain rooted in the loop phi above the
-/// position. This routine assumes that this is possible (has been checked).
-void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi) {
- do {
- if (DT->dominates(InstToHoist, Pos))
- break;
- // Make sure the increment is where we want it. But don't move it
- // down past a potential existing post-inc user.
- fixupInsertPoints(InstToHoist);
- InstToHoist->moveBefore(Pos);
- Pos = InstToHoist;
- InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
- } while (InstToHoist != LoopPhi);
-}
-
/// Check whether we can cheaply express the requested SCEV in terms of
/// the available PHI SCEV by truncation and/or inversion of the step.
static bool canBeCheaplyTransformed(ScalarEvolution &SE,
@@ -1264,8 +1218,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (LSRMode) {
if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
continue;
- if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
- continue;
} else {
if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
continue;
@@ -1293,11 +1245,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
if (AddRecPhiMatch) {
- // Potentially, move the increment. We have made sure in
- // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
- if (L == IVIncInsertLoop)
- hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
-
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(AddRecPhiMatch);
@@ -1597,29 +1544,17 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
+ if (PointerType *PTy = dyn_cast<PointerType>(S->getType())) {
+ Value *StartV = expand(SE.getPointerBase(S));
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(SE.removePointerBase(S), PTy, Ty, StartV);
+ }
+
SmallVector<const SCEV *, 4> NewOps(S->operands());
NewOps[0] = SE.getConstant(Ty, 0);
const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
S->getNoWrapFlags(SCEV::FlagNW));
- // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
- // comments on expandAddToGEP for details.
- const SCEV *Base = S->getStart();
- // Dig into the expression to find the pointer base for a GEP.
- const SCEV *ExposedRest = Rest;
- ExposePointerBase(Base, ExposedRest, SE);
- // If we found a pointer, expand the AddRec with a GEP.
- if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
- // Make sure the Base isn't something exotic, such as a multiplied
- // or divided pointer value. In those cases, the result type isn't
- // actually a pointer type.
- if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
- Value *StartV = expand(Base);
- assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
- return expandAddToGEP(ExposedRest, PTy, Ty, StartV);
- }
- }
-
// Just do a normal add. Pre-expand the operands to suppress folding.
//
// The LHS and RHS values are factored out of the expand call to make the
@@ -1898,6 +1833,22 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
return V;
}
+/// Check whether value has nuw/nsw/exact set but SCEV does not.
+/// TODO: In reality it is better to check the poison recursively
+/// but this is better than nothing.
+static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) {
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
+ if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
+ return true;
+ if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
+ return true;
+ }
+ } else if (isa<PossiblyExactOperator>(I) && I->isExact())
+ return true;
+ return false;
+}
+
ScalarEvolution::ValueOffsetPair
SCEVExpander::FindValueInExprValueMap(const SCEV *S,
const Instruction *InsertPt) {
@@ -1907,19 +1858,22 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
if (CanonicalMode || !SE.containsAddRecurrence(S)) {
// If S is scConstant, it may be worse to reuse an existing Value.
if (S->getSCEVType() != scConstant && Set) {
- // Choose a Value from the set which dominates the insertPt.
- // insertPt should be inside the Value's parent loop so as not to break
+ // Choose a Value from the set which dominates the InsertPt.
+ // InsertPt should be inside the Value's parent loop so as not to break
// the LCSSA form.
for (auto const &VOPair : *Set) {
Value *V = VOPair.first;
ConstantInt *Offset = VOPair.second;
- Instruction *EntInst = nullptr;
- if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
- S->getType() == V->getType() &&
- EntInst->getFunction() == InsertPt->getFunction() &&
+ Instruction *EntInst = dyn_cast_or_null<Instruction>(V);
+ if (!EntInst)
+ continue;
+
+ assert(EntInst->getFunction() == InsertPt->getFunction());
+ if (S->getType() == V->getType() &&
SE.DT.dominates(EntInst, InsertPt) &&
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) &&
+ !SCEVLostPoisonFlags(S, EntInst))
return {V, Offset};
}
}
@@ -2068,7 +2022,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Phis.push_back(&PN);
if (TTI)
- llvm::sort(Phis, [](Value *LHS, Value *RHS) {
+ // Use stable_sort to preserve order of equivalent PHIs, so the order
+ // of the sorted Phis is the same from run to run on the same loop.
+ llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
@@ -2524,18 +2480,14 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
IntegerType *Ty =
IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
- Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false);
Value *NegStepValue =
expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false);
- Value *StartValue = expandCodeForImpl(
- isa<PointerType>(ARExpandTy) ? Start
- : SE.getPtrToIntExpr(Start, ARExpandTy),
- ARExpandTy, Loc, false);
+ Value *StartValue = expandCodeForImpl(Start, ARTy, Loc, false);
ConstantInt *Zero =
- ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
+ ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
Builder.SetInsertPoint(Loc);
// Compute |Step|
@@ -2544,25 +2496,33 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Get the backedge taken count and truncate or extended to the AR type.
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
- auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
- Intrinsic::umul_with_overflow, Ty);
// Compute |Step| * Backedge
- CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
- Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
- Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ Value *MulV, *OfMul;
+ if (Step->isOne()) {
+ // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
+ // needed, there is never an overflow, so to avoid artificially inflating
+ // the cost of the check, directly emit the optimized IR.
+ MulV = TruncTripCount;
+ OfMul = ConstantInt::getFalse(MulV->getContext());
+ } else {
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+ CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ }
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
Value *Add = nullptr, *Sub = nullptr;
- if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) {
- const SCEV *MulS = SE.getSCEV(MulV);
- const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
- Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
- ARPtrTy);
- Sub = Builder.CreateBitCast(
- expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
+ if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
+ StartValue = InsertNoopCastOfTo(
+ StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+ Value *NegMulV = Builder.CreateNeg(MulV);
+ Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+ Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
} else {
Add = Builder.CreateAdd(StartValue, MulV);
Sub = Builder.CreateSub(StartValue, MulV);
@@ -2686,9 +2646,11 @@ namespace {
// perfectly reduced form, which can't be guaranteed.
struct SCEVFindUnsafe {
ScalarEvolution &SE;
+ bool CanonicalMode;
bool IsUnsafe;
- SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
+ SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
+ : SE(SE), CanonicalMode(CanonicalMode), IsUnsafe(false) {}
bool follow(const SCEV *S) {
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
@@ -2704,6 +2666,14 @@ struct SCEVFindUnsafe {
IsUnsafe = true;
return false;
}
+
+ // For non-affine addrecs or in non-canonical mode we need a preheader
+ // to insert into.
+ if (!AR->getLoop()->getLoopPreheader() &&
+ (!CanonicalMode || !AR->isAffine())) {
+ IsUnsafe = true;
+ return false;
+ }
}
return true;
}
@@ -2712,8 +2682,8 @@ struct SCEVFindUnsafe {
}
namespace llvm {
-bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
- SCEVFindUnsafe Search(SE);
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode) {
+ SCEVFindUnsafe Search(SE, CanonicalMode);
visitAll(S, Search);
return !Search.IsUnsafe;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 583bb379488e..f467de5f924e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GuardUtils.h"
@@ -159,6 +160,13 @@ static cl::opt<unsigned>
cl::desc("Maximum cost of combining conditions when "
"folding branches"));
+static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
+ "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
+ cl::init(2),
+ cl::desc("Multiplier to apply to threshold when determining whether or not "
+ "to fold branch to common destination when vector operations are "
+ "present"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -272,7 +280,6 @@ public:
}
bool simplifyOnce(BasicBlock *BB);
- bool simplifyOnceImpl(BasicBlock *BB);
bool run(BasicBlock *BB);
// Helper to set Resimplify and return change indication.
@@ -1094,17 +1101,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Update (liveout) uses of bonus instructions,
// now that the bonus instruction has been cloned into predecessor.
- SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(BonusInst.getType(),
- (NewBonusInst->getName() + ".merge").str());
- SSAUpdate.AddAvailableValue(BB, &BonusInst);
- SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
+ // Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
- if (UI->getParent() != PredBlock)
- SSAUpdate.RewriteUseAfterInsertions(U);
- else // Use is in the same block as, and comes before, NewBonusInst.
- SSAUpdate.RewriteUse(U);
+ auto *PN = dyn_cast<PHINode>(UI);
+ if (!PN) {
+ assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
+ "If the user is not a PHI node, then it should be in the same "
+ "block as, and come after, the original bonus instruction.");
+ continue; // Keep using the original bonus instruction.
+ }
+ // Is this the block-closed SSA form PHI node?
+ if (PN->getIncomingBlock(U) == BB)
+ continue; // Great, keep using the original bonus instruction.
+ // The only other alternative is an "use" when coming from
+ // the predecessor block - here we should refer to the cloned bonus instr.
+ assert(PN->getIncomingBlock(U) == PredBlock &&
+ "Not in block-closed SSA form?");
+ U.set(NewBonusInst);
}
}
}
@@ -2044,7 +2058,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
unsigned NumPHIdValues = 0;
for (auto *I : *LRI)
for (auto *V : PHIOperands[I]) {
- if (InstructionsToSink.count(V) == 0)
+ if (!InstructionsToSink.contains(V))
++NumPHIdValues;
// FIXME: this check is overly optimistic. We may end up not sinking
// said instruction, due to the very same profitability check.
@@ -2250,6 +2264,23 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
return SI->getValueOperand();
return nullptr; // Unknown store.
}
+
+ if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
+ if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
+ LI->isSimple()) {
+ // Local objects (created by an `alloca` instruction) are always
+ // writable, so once we are past a read from a location it is valid to
+ // also write to that same location.
+ // If the address of the local object never escapes the function, that
+ // means it's never concurrently read or written, hence moving the store
+ // from under the condition will not introduce a data race.
+ auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
+ if (AI && !PointerMayBeCaptured(AI, false, true))
+ // Found a previous load, return it.
+ return LI;
+ }
+ // The load didn't work out, but we may still find a store.
+ }
}
return nullptr;
@@ -2545,17 +2576,17 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;
SmallPtrSet<const Value *, 32> EphValues;
- auto IsEphemeral = [&](const Value *V) {
- if (isa<AssumeInst>(V))
+ auto IsEphemeral = [&](const Instruction *I) {
+ if (isa<AssumeInst>(I))
return true;
- return isSafeToSpeculativelyExecute(V) &&
- all_of(V->users(),
+ return !I->mayHaveSideEffects() && !I->isTerminator() &&
+ all_of(I->users(),
[&](const User *U) { return EphValues.count(U); });
};
// Walk the loop in reverse so that we can identify ephemeral values properly
// (values only feeding assumes).
- for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
+ for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
// Can't fold blocks that contain noduplicate or convergent calls.
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (CI->cannotDuplicate() || CI->isConvergent())
@@ -2588,8 +2619,10 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// If we have a conditional branch on a PHI node value that is defined in the
/// same block as the branch and if any PHI entries are constants, thread edges
/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
- const DataLayout &DL, AssumptionCache *AC) {
+static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
+ DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -2703,13 +2736,25 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
DTU->applyUpdates(Updates);
}
- // Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true;
+ // Signal repeat, simplifying any other constants.
+ return None;
}
return false;
}
+static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL, AssumptionCache *AC) {
+ Optional<bool> Result;
+ bool EverChanged = false;
+ do {
+ // Note that None means "we changed things, but recurse further."
+ Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC);
+ EverChanged |= Result == None || *Result;
+ } while (Result == None);
+ return EverChanged;
+}
+
/// Given a BB that starts with the specified two-entry PHI node,
/// see if we can eliminate it.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
@@ -2845,8 +2890,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// instructions.
for (BasicBlock *IfBlock : IfBlocks)
for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
- !isa<PseudoProbeInst>(I)) {
+ if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
// the xform is not worth it.
@@ -3105,6 +3149,14 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
return true;
}
+/// Return if an instruction's type or any of its operands' types are a vector
+/// type.
+static bool isVectorOp(Instruction &I) {
+ return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
+ return U->getType()->isVectorTy();
+ });
+}
+
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
@@ -3189,6 +3241,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
+ bool SawVectorOp = false;
const unsigned PredCount = Preds.size();
for (Instruction &I : *BB) {
// Don't check the branch condition comparison itself.
@@ -3200,14 +3253,35 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// I must be safe to execute unconditionally.
if (!isSafeToSpeculativelyExecute(&I))
return false;
+ SawVectorOp |= isVectorOp(I);
// Account for the cost of duplicating this instruction into each
- // predecessor.
- NumBonusInsts += PredCount;
- // Early exits once we reach the limit.
- if (NumBonusInsts > BonusInstThreshold)
+ // predecessor. Ignore free instructions.
+ if (!TTI ||
+ TTI->getUserCost(&I, CostKind) != TargetTransformInfo::TCC_Free) {
+ NumBonusInsts += PredCount;
+
+ // Early exits once we reach the limit.
+ if (NumBonusInsts >
+ BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
+ return false;
+ }
+
+ auto IsBCSSAUse = [BB, &I](Use &U) {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ return PN->getIncomingBlock(U) == BB;
+ return UI->getParent() == BB && I.comesBefore(UI);
+ };
+
+ // Does this instruction require rewriting of uses?
+ if (!all_of(I.uses(), IsBCSSAUse))
return false;
}
+ if (NumBonusInsts >
+ BonusInstThreshold *
+ (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
+ return false;
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
@@ -3340,7 +3414,7 @@ static bool mergeConditionalStoreToAddress(
InstructionCost Cost = 0;
InstructionCost Budget =
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
- for (auto &I : BB->instructionsWithoutDebug()) {
+ for (auto &I : BB->instructionsWithoutDebug(false)) {
// Consider terminator instruction to be free.
if (I.isTerminator())
continue;
@@ -3413,10 +3487,7 @@ static bool mergeConditionalStoreToAddress(
/*BranchWeights=*/nullptr, DTU);
QB.SetInsertPoint(T);
StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
- AAMDNodes AAMD;
- PStore->getAAMetadata(AAMD, /*Merge=*/false);
- PStore->getAAMetadata(AAMD, /*Merge=*/true);
- SI->setAAMetadata(AAMD);
+ SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
// Choose the minimum alignment. If we could prove both stores execute, we
// could use biggest one. In this case, though, we only know that one of the
// stores executes. And we don't know it's safe to take the alignment from a
@@ -3666,7 +3737,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// fold the conditions into logical ops and one cond br.
// Ignore dbg intrinsics.
- if (&*BB->instructionsWithoutDebug().begin() != BI)
+ if (&*BB->instructionsWithoutDebug(false).begin() != BI)
return false;
int PBIOp, BIOp;
@@ -4711,29 +4782,6 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
-static void createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- auto *BB = Switch->getParent();
- BasicBlock *NewDefaultBlock = SplitBlockPredecessors(
- Switch->getDefaultDest(), Switch->getParent(), "", DTU);
- auto *OrigDefaultBlock = Switch->getDefaultDest();
- Switch->setDefaultDest(&*NewDefaultBlock);
- if (DTU)
- DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock},
- {DominatorTree::Delete, BB, OrigDefaultBlock}});
- SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU);
- SmallVector<DominatorTree::UpdateType, 2> Updates;
- if (DTU)
- for (auto *Successor : successors(NewDefaultBlock))
- Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
- auto *NewTerminator = NewDefaultBlock->getTerminator();
- new UnreachableInst(Switch->getContext(), NewTerminator);
- EraseTerminatorAndDCECond(NewTerminator);
- if (DTU)
- DTU->applyUpdates(Updates);
-}
-
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
@@ -5039,9 +5087,10 @@ static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI
return false;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- if (!CE->isGEPWithNoNotionalOverIndexing())
- return false;
- if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
+ // Pointer casts and in-bounds GEPs will not prohibit the backend from
+ // materializing the array of constants.
+ Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
+ if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
return false;
}
@@ -5111,7 +5160,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// which we can constant-propagate the CaseVal, continue to its successor.
SmallDenseMap<Value *, Constant *> ConstantPool;
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
- for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
+ for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
if (I.isTerminator()) {
// If the terminator is a simple branch, continue to the next block.
if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
@@ -5604,8 +5653,32 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
}
+static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ // Allow any legal type.
+ if (TTI.isTypeLegal(Ty))
+ return true;
+
+ auto *IT = dyn_cast<IntegerType>(Ty);
+ if (!IT)
+ return false;
+
+ // Also allow power of 2 integer types that have at least 8 bits and fit in
+ // a register. These types are common in frontend languages and targets
+ // usually support loads of these types.
+ // TODO: We could relax this to any integer that fits in a register and rely
+ // on ABI alignment and padding in the table to allow the load to be widened.
+ // Or we could widen the constants and truncate the load.
+ unsigned BitWidth = IT->getBitWidth();
+ return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
+ DL.fitsInLegalInteger(IT->getBitWidth());
+}
+
/// Determine whether a lookup table should be built for this switch, based on
/// the number of cases, size of the table, and the types of the results.
+// TODO: We could support larger than legal types by limiting based on the
+// number of loads required and/or table size. If the constants are small we
+// could use smaller table entries and extend after the load.
static bool
ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
const TargetTransformInfo &TTI, const DataLayout &DL,
@@ -5619,7 +5692,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
Type *Ty = I.second;
// Saturate this flag to true.
- HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+ HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
// Saturate this flag to false.
AllTablesFitInRegister =
@@ -6102,7 +6175,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// If the block only contains the switch, see if we can fold the block
// away into any preds.
- if (SI == &*BB->instructionsWithoutDebug().begin())
+ if (SI == &*BB->instructionsWithoutDebug(false).begin())
if (FoldValueComparisonIntoPredecessors(SI, Builder))
return requestResimplify();
}
@@ -6246,12 +6319,9 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
// The debug info in OtherPred doesn't cover the merged control flow that
// used to go through BB. We need to delete it or update it.
- for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
- Instruction &Inst = *I;
- I++;
+ for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
if (isa<DbgInfoIntrinsic>(Inst))
Inst.eraseFromParent();
- }
SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB));
for (BasicBlock *Succ : Succs) {
@@ -6338,6 +6408,11 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
}
bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ assert(
+ !isa<ConstantInt>(BI->getCondition()) &&
+ BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "Tautological conditional branch should have been eliminated already.");
+
BasicBlock *BB = BI->getParent();
if (!Options.SimplifyCondBranch)
return false;
@@ -6452,19 +6527,21 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
if (C->isNullValue() || isa<UndefValue>(C)) {
// Only look at the first use, avoid hurting compile time with long uselists
- User *Use = *I->user_begin();
+ auto *Use = cast<Instruction>(*I->user_begin());
+ // Bail out if Use is not in the same BB as I or Use == I or Use comes
+ // before I in the block. The latter two can be the case if Use is a PHI
+ // node.
+ if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
+ return false;
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
- for (BasicBlock::iterator
- i = ++BasicBlock::iterator(I),
- UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
- i != UI; ++i) {
- if (i == I->getParent()->end())
- return false;
- if (!isGuaranteedToTransferExecutionToSuccessor(&*i))
- return false;
- }
+ auto InstrRange =
+ make_range(std::next(I->getIterator()), Use->getIterator());
+ if (any_of(InstrRange, [](Instruction &I) {
+ return !isGuaranteedToTransferExecutionToSuccessor(&I);
+ }))
+ return false;
// Look through GEPs. A load from a GEP derived from NULL is still undefined
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
@@ -6540,21 +6617,51 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
// destination from conditional branches.
if (BI->isUnconditional())
Builder.CreateUnreachable();
- else
+ else {
+ // Preserve guarding condition in assume, because it might not be
+ // inferrable from any dominating condition.
+ Value *Cond = BI->getCondition();
+ if (BI->getSuccessor(0) == BB)
+ Builder.CreateAssumption(Builder.CreateNot(Cond));
+ else
+ Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
: BI->getSuccessor(0));
+ }
BI->eraseFromParent();
if (DTU)
DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // Redirect all branches leading to UB into
+ // a newly created unreachable block.
+ BasicBlock *Unreachable = BasicBlock::Create(
+ Predecessor->getContext(), "unreachable", BB->getParent(), BB);
+ Builder.SetInsertPoint(Unreachable);
+ // The new block contains only one instruction: Unreachable
+ Builder.CreateUnreachable();
+ for (auto &Case : SI->cases())
+ if (Case.getCaseSuccessor() == BB) {
+ BB->removePredecessor(Predecessor);
+ Case.setSuccessor(Unreachable);
+ }
+ if (SI->getDefaultDest() == BB) {
+ BB->removePredecessor(Predecessor);
+ SI->setDefaultDest(Unreachable);
+ }
+
+ if (DTU)
+ DTU->applyUpdates(
+ { { DominatorTree::Insert, Predecessor, Unreachable },
+ { DominatorTree::Delete, Predecessor, BB } });
+ return true;
}
- // TODO: SwitchInst.
}
return false;
}
-bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
bool Changed = false;
assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6578,7 +6685,8 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
Changed |= EliminateDuplicatePHINodes(BB);
// Check for and remove branches that will always cause undefined behavior.
- Changed |= removeUndefIntroducingPredecessor(BB, DTU);
+ if (removeUndefIntroducingPredecessor(BB, DTU))
+ return requestResimplify();
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
@@ -6603,7 +6711,8 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL);
+ if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
+ return true;
}
Instruction *Terminator = BB->getTerminator();
@@ -6632,12 +6741,6 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
return Changed;
}
-bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
- bool Changed = simplifyOnceImpl(BB);
-
- return Changed;
-}
-
bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool Changed = false;
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index bd30be011472..5b7fd4349c6c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -942,6 +942,7 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
} // namespace llvm
+namespace {
//===----------------------------------------------------------------------===//
// Widen Induction Variables - Extend the width of an IV to cover its
// widest uses.
@@ -1072,7 +1073,7 @@ protected:
private:
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
};
-
+} // namespace
/// Determine the insertion point for this user. By default, insert immediately
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b8e0f63c481d..e190a1294eb3 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -142,12 +142,10 @@ static void annotateDereferenceableBytes(CallInst *CI,
unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
if (!llvm::NullPointerIsDefined(F, AS) ||
CI->paramHasAttr(ArgNo, Attribute::NonNull))
- DerefBytes = std::max(CI->getDereferenceableOrNullBytes(
- ArgNo + AttributeList::FirstArgIndex),
+ DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo),
DereferenceableBytes);
-
- if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) <
- DerefBytes) {
+
+ if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) {
CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
if (!llvm::NullPointerIsDefined(F, AS) ||
CI->paramHasAttr(ArgNo, Attribute::NonNull))
@@ -512,14 +510,18 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+
+ // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
+ if (CI->use_empty())
+ return emitStrCpy(Dst, Src, B, TLI);
+
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = emitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
@@ -541,8 +543,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
// copy for us. Make a memcpy to copy the nul byte with align = 1.
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return DstEnd;
}
@@ -577,9 +578,9 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y)
Align MemSetAlign =
- CI->getAttributes().getParamAttributes(0).getAlignment().valueOrOne();
+ CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
- AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
+ AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
return Dst;
@@ -604,8 +605,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
@@ -1082,8 +1082,7 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1136,8 +1135,7 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
// any return attributes are compliant.
// TODO: Attach return value attributes to the 1st operand to preserve them?
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
@@ -1151,70 +1149,21 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
-/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilderBase &B) {
- // This has to be a memset of zeros (bzero).
- auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
- if (!FillValue || FillValue->getZExtValue() != 0)
- return nullptr;
-
- // TODO: We should handle the case where the malloc has more than one use.
- // This is necessary to optimize common patterns such as when the result of
- // the malloc is checked against null or when a memset intrinsic is used in
- // place of a memset library call.
- auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
- if (!Malloc || !Malloc->hasOneUse())
- return nullptr;
-
- // Is the inner call really malloc()?
- Function *InnerCallee = Malloc->getCalledFunction();
- if (!InnerCallee)
- return nullptr;
-
- LibFunc Func;
- if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
- Func != LibFunc_malloc)
- return nullptr;
-
- // The memset must cover the same number of bytes that are malloc'd.
- if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
- return nullptr;
-
- // Replace the malloc with a calloc. We need the data layout to know what the
- // actual size of a 'size_t' parameter is.
- B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
- const DataLayout &DL = Malloc->getModule()->getDataLayout();
- IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
- if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
- Malloc->getArgOperand(0),
- Malloc->getAttributes(), B, *TLI)) {
- substituteInParent(Malloc, Calloc);
- return Calloc;
- }
-
- return nullptr;
-}
-
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
annotateNonNullAndDereferenceable(CI, 0, Size, DL);
if (isa<IntrinsicInst>(CI))
return nullptr;
- if (auto *Calloc = foldMallocMemset(CI, B))
- return Calloc;
-
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1346,13 +1295,13 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
B.setFastMathFlags(CI->getFastMathFlags());
Value *Real, *Imag;
- if (CI->getNumArgOperands() == 1) {
+ if (CI->arg_size() == 1) {
Value *Op = CI->getArgOperand(0);
assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
Real = B.CreateExtractValue(Op, 0, "real");
Imag = B.CreateExtractValue(Op, 1, "imag");
} else {
- assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
+ assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
Real = CI->getArgOperand(0);
Imag = CI->getArgOperand(1);
}
@@ -2333,7 +2282,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
// Proceedings of PACT'98, Oct. 1998, IEEE
if (!CI->hasFnAttr(Attribute::Cold) &&
isReportingError(Callee, CI, StreamArg)) {
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
+ CI->addFnAttr(Attribute::Cold);
}
return nullptr;
@@ -2349,7 +2298,7 @@ static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
// These functions might be considered cold, but only if their stream
// argument is stderr.
- if (StreamArg >= (int)CI->getNumArgOperands())
+ if (StreamArg >= (int)CI->arg_size())
return false;
LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
if (!LI)
@@ -2381,7 +2330,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
// Try to remove call or emit putchar/puts.
- if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
+ if (FormatStr == "%s" && CI->arg_size() > 1) {
StringRef OperandStr;
if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
return nullptr;
@@ -2402,7 +2351,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// printf("foo\n") --> puts("foo")
if (FormatStr.back() == '\n' &&
- FormatStr.find('%') == StringRef::npos) { // No format characters.
+ !FormatStr.contains('%')) { // No format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
@@ -2412,12 +2361,12 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
- if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ if (FormatStr == "%c" && CI->arg_size() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy())
return emitPutChar(CI->getArgOperand(1), B, TLI);
// printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy())
return emitPutS(CI->getArgOperand(1), B, TLI);
return nullptr;
@@ -2469,10 +2418,10 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
// If we just have a format string (nothing else crazy) transform it.
Value *Dest = CI->getArgOperand(0);
- if (CI->getNumArgOperands() == 2) {
+ if (CI->arg_size() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
- if (FormatStr.find('%') != StringRef::npos)
+ if (FormatStr.contains('%'))
return nullptr; // we found a format specifier, bail out.
// sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
@@ -2485,8 +2434,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
return nullptr;
// Decode the second character of the format string.
@@ -2597,10 +2545,10 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
return nullptr;
// If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumArgOperands() == 3) {
+ if (CI->arg_size() == 3) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
- if (FormatStr.find('%') != StringRef::npos)
+ if (FormatStr.contains('%'))
return nullptr; // we found a format specifier, bail out.
if (N == 0)
@@ -2619,8 +2567,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() == 2 && FormatStr[0] == '%' &&
- CI->getNumArgOperands() == 4) {
+ if (FormatStr.size() == 2 && FormatStr[0] == '%' && CI->arg_size() == 4) {
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
@@ -2688,9 +2635,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
return nullptr;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumArgOperands() == 2) {
+ if (CI->arg_size() == 2) {
// Could handle %% -> % if we cared.
- if (FormatStr.find('%') != StringRef::npos)
+ if (FormatStr.contains('%'))
return nullptr; // We found a format specifier.
return emitFWrite(
@@ -2701,8 +2648,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
return nullptr;
// Decode the second character of the format string.
@@ -3066,7 +3012,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
- // TODO: Use foldMallocMemset() with memset intrinsic.
case Intrinsic::memset:
return optimizeMemSet(CI, Builder);
case Intrinsic::memcpy:
@@ -3266,8 +3211,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3280,8 +3224,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3289,15 +3232,12 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
IRBuilderBase &B) {
- // TODO: Try foldMallocMemset() here.
-
if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
CI->getArgOperand(2), Align(1));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3311,9 +3251,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
CI->getArgOperand(2), B, DL, TLI)) {
CallInst *NewCI = cast<CallInst>(Call);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(
- AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return NewCI;
}
return nullptr;
@@ -3354,7 +3292,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
else
return nullptr;
- Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+ // FIXME: There is really no guarantee that sizeof(size_t) is equal to
+ // sizeof(int*) for every target. So the assumption used here to derive the
+ // SizeTBits based on the size of an integer pointer in address space zero
+ // isn't always valid.
+ Type *SizeTTy = DL.getIntPtrType(CI->getContext(), /*AddressSpace=*/0);
Value *LenV = ConstantInt::get(SizeTTy, Len);
Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
// If the function was an __stpcpy_chk, and we were able to fold it into
diff --git a/llvm/lib/Transforms/Utils/SplitModule.cpp b/llvm/lib/Transforms/Utils/SplitModule.cpp
index 32f2f4e233b2..7e12bbd2851c 100644
--- a/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -24,7 +24,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
@@ -65,9 +64,8 @@ static void addNonConstUser(ClusterMapType &GVtoClusterMap,
if (const Instruction *I = dyn_cast<Instruction>(U)) {
const GlobalValue *F = I->getParent()->getParent();
GVtoClusterMap.unionSets(GV, F);
- } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
- isa<GlobalVariable>(U)) {
- GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
+ } else if (const GlobalValue *GVU = dyn_cast<GlobalValue>(U)) {
+ GVtoClusterMap.unionSets(GV, GVU);
} else {
llvm_unreachable("Underimplemented use case");
}
@@ -91,6 +89,13 @@ static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
}
}
+static const GlobalObject *getGVPartitioningRoot(const GlobalValue *GV) {
+ const GlobalObject *GO = GV->getAliaseeObject();
+ if (const auto *GI = dyn_cast_or_null<GlobalIFunc>(GO))
+ GO = GI->getResolverFunction();
+ return GO;
+}
+
// Find partitions for module in the way that no locals need to be
// globalized.
// Try to balance pack those partitions into N files since this roughly equals
@@ -123,12 +128,11 @@ static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap,
Member = &GV;
}
- // For aliases we should not separate them from their aliasees regardless
- // of linkage.
- if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
- if (const GlobalObject *Base = GIS->getBaseObject())
- GVtoClusterMap.unionSets(&GV, Base);
- }
+ // Aliases should not be separated from their aliasees and ifuncs should
+ // not be separated from their resolvers regardless of linkage.
+ if (const GlobalObject *Root = getGVPartitioningRoot(&GV))
+ if (&GV != Root)
+ GVtoClusterMap.unionSets(&GV, Root);
if (const Function *F = dyn_cast<Function>(&GV)) {
for (const BasicBlock &BB : *F) {
@@ -225,9 +229,8 @@ static void externalize(GlobalValue *GV) {
// Returns whether GV should be in partition (0-based) I of N.
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
- if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
- if (const GlobalObject *Base = GIS->getBaseObject())
- GV = Base;
+ if (const GlobalObject *Root = getGVPartitioningRoot(GV))
+ GV = Root;
StringRef Name;
if (const Comdat *C = GV->getComdat())
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index ec4ea848a5d4..6a0eb34a7999 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -184,7 +184,7 @@ performOnModule(Module &M) {
std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
if (!Error.empty())
- report_fatal_error("unable to transforn " + C.getName() + " in " +
+ report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
M.getModuleIdentifier() + ": " + Error);
if (C.getName() == Name)
@@ -256,11 +256,11 @@ bool RewriteMapParser::parse(const std::string &MapFile,
MemoryBuffer::getFile(MapFile);
if (!Mapping)
- report_fatal_error("unable to read rewrite map '" + MapFile + "': " +
- Mapping.getError().message());
+ report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
+ "': " + Mapping.getError().message());
if (!parse(*Mapping, DL))
- report_fatal_error("unable to parse rewrite map '" + MapFile + "'");
+ report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
return true;
}
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 6336af25ef98..dbe3cc93e72b 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -403,19 +403,10 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
- unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
- if (Offset) {
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
- Src, OffsetCst);
- }
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL))
return Offset;
return -1;
}
@@ -584,19 +575,11 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
- unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
- if (Offset) {
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
- Src, OffsetCst);
- }
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ return ConstantFoldLoadFromConstPtr(
+ Src, LoadTy, APInt(IndexSize, Offset), DL);
}
/// This function is called when we have a
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index f3afd42e6163..c3eafd6b2492 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -26,7 +26,8 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
@@ -68,7 +69,7 @@ struct WorklistEntry {
enum EntryKind {
MapGlobalInit,
MapAppendingVar,
- MapGlobalIndirectSymbol,
+ MapAliasOrIFunc,
RemapFunction
};
struct GVInitTy {
@@ -79,8 +80,8 @@ struct WorklistEntry {
GlobalVariable *GV;
Constant *InitPrefix;
};
- struct GlobalIndirectSymbolTy {
- GlobalIndirectSymbol *GIS;
+ struct AliasOrIFuncTy {
+ GlobalValue *GV;
Constant *Target;
};
@@ -91,7 +92,7 @@ struct WorklistEntry {
union {
GVInitTy GVInit;
AppendingGVTy AppendingGV;
- GlobalIndirectSymbolTy GlobalIndirectSymbol;
+ AliasOrIFuncTy AliasOrIFunc;
Function *RemapF;
} Data;
};
@@ -163,8 +164,8 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MCID);
- void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target,
- unsigned MCID);
+ void scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target,
+ unsigned MCID);
void scheduleRemapFunction(Function &F, unsigned MCID);
void flush();
@@ -873,10 +874,17 @@ void Mapper::flush() {
E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits));
break;
}
- case WorklistEntry::MapGlobalIndirectSymbol:
- E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol(
- mapConstant(E.Data.GlobalIndirectSymbol.Target));
+ case WorklistEntry::MapAliasOrIFunc: {
+ GlobalValue *GV = E.Data.AliasOrIFunc.GV;
+ Constant *Target = mapConstant(E.Data.AliasOrIFunc.Target);
+ if (auto *GA = dyn_cast<GlobalAlias>(GV))
+ GA->setAliasee(Target);
+ else if (auto *GI = dyn_cast<GlobalIFunc>(GV))
+ GI->setResolver(Target);
+ else
+ llvm_unreachable("Not alias or ifunc");
break;
+ }
case WorklistEntry::RemapFunction:
remapFunction(*E.Data.RemapF);
break;
@@ -944,12 +952,13 @@ void Mapper::remapInstruction(Instruction *I) {
LLVMContext &C = CB->getContext();
AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- for (Attribute::AttrKind TypedAttr :
- {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef,
- Attribute::InAlloca}) {
- if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
- Attrs = Attrs.replaceAttributeType(C, i, TypedAttr,
- TypeMapper->remapType(Ty));
+ for (int AttrIdx = Attribute::FirstTypeAttr;
+ AttrIdx <= Attribute::LastTypeAttr; AttrIdx++) {
+ Attribute::AttrKind TypedAttr = (Attribute::AttrKind)AttrIdx;
+ if (Type *Ty =
+ Attrs.getAttributeAtIndex(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeTypeAtIndex(C, i, TypedAttr,
+ TypeMapper->remapType(Ty));
break;
}
}
@@ -1068,16 +1077,18 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
AppendingInits.append(NewMembers.begin(), NewMembers.end());
}
-void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
- Constant &Target, unsigned MCID) {
- assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule");
+void Mapper::scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert((isa<GlobalAlias>(GV) || isa<GlobalIFunc>(GV)) &&
+ "Should be alias or ifunc");
assert(MCID < MCs.size() && "Invalid mapping context");
WorklistEntry WE;
- WE.Kind = WorklistEntry::MapGlobalIndirectSymbol;
+ WE.Kind = WorklistEntry::MapAliasOrIFunc;
WE.MCID = MCID;
- WE.Data.GlobalIndirectSymbol.GIS = &GIS;
- WE.Data.GlobalIndirectSymbol.Target = &Target;
+ WE.Data.AliasOrIFunc.GV = &GV;
+ WE.Data.AliasOrIFunc.Target = &Target;
Worklist.push_back(WE);
}
@@ -1174,10 +1185,14 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
}
-void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
- Constant &Target,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID);
+void ValueMapper::scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GA, Aliasee, MCID);
+}
+
+void ValueMapper::scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GI, Resolver, MCID);
}
void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 3b90997100f1..5a4a2f0924f6 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -694,31 +694,16 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
});
for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
- if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
- if (!is_contained(Chain, &I))
- MemoryInstrs.push_back(&I);
- else
- ChainInstrs.push_back(&I);
- } else if (isa<IntrinsicInst>(&I) &&
- cast<IntrinsicInst>(&I)->getIntrinsicID() ==
- Intrinsic::sideeffect) {
- // Ignore llvm.sideeffect calls.
- } else if (isa<IntrinsicInst>(&I) &&
- cast<IntrinsicInst>(&I)->getIntrinsicID() ==
- Intrinsic::pseudoprobe) {
- // Ignore llvm.pseudoprobe calls.
- } else if (isa<IntrinsicInst>(&I) &&
- cast<IntrinsicInst>(&I)->getIntrinsicID() == Intrinsic::assume) {
- // Ignore llvm.assume calls.
- } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
- LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I
- << '\n');
- break;
- } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
- LLVM_DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
- << '\n');
+ if ((isa<LoadInst>(I) || isa<StoreInst>(I)) && is_contained(Chain, &I)) {
+ ChainInstrs.push_back(&I);
+ continue;
+ }
+ if (I.mayThrow()) {
+ LLVM_DEBUG(dbgs() << "LSV: Found may-throw operation: " << I << '\n');
break;
}
+ if (I.mayReadOrWriteMemory())
+ MemoryInstrs.push_back(&I);
}
// Loop until we find an instruction in ChainInstrs that we can't vectorize.
@@ -751,26 +736,28 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
return LI->hasMetadata(LLVMContext::MD_invariant_load);
};
- // We can ignore the alias as long as the load comes before the store,
- // because that means we won't be moving the load past the store to
- // vectorize it (the vectorized load is inserted at the location of the
- // first load in the chain).
- if (isa<StoreInst>(MemInstr) && ChainLoad &&
- (IsInvariantLoad(ChainLoad) || ChainLoad->comesBefore(MemInstr)))
- continue;
-
- // Same case, but in reverse.
- if (MemLoad && isa<StoreInst>(ChainInstr) &&
- (IsInvariantLoad(MemLoad) || MemLoad->comesBefore(ChainInstr)))
- continue;
+ if (IsLoadChain) {
+ // We can ignore the alias as long as the load comes before the store,
+ // because that means we won't be moving the load past the store to
+ // vectorize it (the vectorized load is inserted at the location of the
+ // first load in the chain).
+ if (ChainInstr->comesBefore(MemInstr) ||
+ (ChainLoad && IsInvariantLoad(ChainLoad)))
+ continue;
+ } else {
+ // Same case, but in reverse.
+ if (MemInstr->comesBefore(ChainInstr) ||
+ (MemLoad && IsInvariantLoad(MemLoad)))
+ continue;
+ }
- if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
- MemoryLocation::get(ChainInstr))) {
+ ModRefInfo MR =
+ AA.getModRefInfo(MemInstr, MemoryLocation::get(ChainInstr));
+ if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) {
LLVM_DEBUG({
dbgs() << "LSV: Found alias:\n"
- " Aliasing instruction and pointer:\n"
+ " Aliasing instruction:\n"
<< " " << *MemInstr << '\n'
- << " " << *getLoadStorePointerOperand(MemInstr) << '\n'
<< " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n'
<< " " << *getLoadStorePointerOperand(ChainInstr) << '\n';
@@ -1085,9 +1072,12 @@ bool Vectorizer::vectorizeStoreChain(
if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n");
- return vectorizeStoreChain(Chain.slice(0, TargetVF),
- InstructionsProcessed) |
- vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |=
+ vectorizeStoreChain(Chain.slice(0, TargetVF), InstructionsProcessed);
+ Vectorized |=
+ vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
+ return Vectorized;
}
LLVM_DEBUG({
@@ -1104,8 +1094,10 @@ bool Vectorizer::vectorizeStoreChain(
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
- vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
Align NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
@@ -1119,15 +1111,17 @@ bool Vectorizer::vectorizeStoreChain(
if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
- vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);
Builder.SetInsertPoint(&*Last);
- Value *Vec = UndefValue::get(VecTy);
+ Value *Vec = PoisonValue::get(VecTy);
if (VecStoreTy) {
unsigned VecWidth = VecStoreTy->getNumElements();
@@ -1237,8 +1231,12 @@ bool Vectorizer::vectorizeLoadChain(
if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n");
- return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
- vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |=
+ vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed);
+ Vectorized |=
+ vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
+ return Vectorized;
}
// We won't try again to vectorize the elements of the chain, regardless of
@@ -1249,8 +1247,10 @@ bool Vectorizer::vectorizeLoadChain(
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
- vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
Align NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
@@ -1264,8 +1264,10 @@ bool Vectorizer::vectorizeLoadChain(
if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
- vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
LLVM_DEBUG({
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3c484fb0d28a..805011191da0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -419,7 +419,8 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
+int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
+ Value *Ptr) const {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
@@ -428,7 +429,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
bool CanAddPredicate = !OptForSize;
- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
+ int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
+ CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
@@ -747,7 +749,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI) {
auto *SE = PSE.getSE();
Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 5c4c4fdfa3f7..a7d6609f8c56 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -268,12 +268,6 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;
- /// The best number of elements of the vector types used in the
- /// transformed loop. BestVF = None means that vectorization is
- /// disabled.
- Optional<ElementCount> BestVF = None;
- unsigned BestUF = 0;
-
public:
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
@@ -295,12 +289,13 @@ public:
/// VF and its cost.
VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
- /// Finalize the best decision and dispose of all other VPlans.
- void setBestPlan(ElementCount VF, unsigned UF);
+ /// Return the best VPlan for \p VF.
+ VPlan &getBestPlanFor(ElementCount VF) const;
/// Generate the IR code for the body of the vectorized loop according to the
- /// best selected VPlan.
- void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT);
+ /// best selected \p VF, \p UF and VPlan \p BestPlan.
+ void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
+ InnerLoopVectorizer &LB, DominatorTree *DT);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printPlans(raw_ostream &O);
@@ -308,12 +303,9 @@ public:
/// Look through the existing plans and return true if we have one with all
/// the vectorization factors in question.
- bool hasPlanWithVFs(const ArrayRef<ElementCount> VFs) const {
- return any_of(VPlans, [&](const VPlanPtr &Plan) {
- return all_of(VFs, [&](const ElementCount &VF) {
- return Plan->hasVF(VF);
- });
- });
+ bool hasPlanWithVF(ElementCount VF) const {
+ return any_of(VPlans,
+ [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
}
/// Test a \p Predicate on a \p Range of VF's. Return the value of applying
@@ -351,13 +343,14 @@ private:
/// legal to vectorize the loop. This method creates VPlans using VPRecipes.
void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
- /// Adjust the recipes for any inloop reductions. The chain of instructions
- /// leading from the loop exit instr to the phi need to be converted to
- /// reductions, with one operand being vector and the other being the scalar
- /// reduction chain.
- void adjustRecipesForInLoopReductions(VPlanPtr &Plan,
- VPRecipeBuilder &RecipeBuilder,
- ElementCount MinVF);
+ // Adjust the recipes for reductions. For in-loop reductions the chain of
+ // instructions leading from the loop exit instr to the phi need to be
+ // converted to reductions, with one operand being vector and the other being
+ // the scalar reduction chain. For other reductions, a select is introduced
+ // between the phi and live-out recipes when folding the tail.
+ void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
+ VPRecipeBuilder &RecipeBuilder,
+ ElementCount MinVF);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f24ae6b100d5..23bb6f0860c9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -87,7 +87,6 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -332,8 +331,8 @@ static cl::opt<bool>
cl::desc("Prefer in-loop vector reductions, "
"overriding the targets preference."));
-cl::opt<bool> EnableStrictReductions(
- "enable-strict-reductions", cl::init(false), cl::Hidden,
+static cl::opt<bool> ForceOrderedReductions(
+ "force-ordered-reductions", cl::init(false), cl::Hidden,
cl::desc("Enable the vectorisation of loops with in-order (strict) "
"FP reductions"));
@@ -545,7 +544,8 @@ public:
/// vectorized loop.
void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask);
+ VPValue *StoredValue, VPValue *BlockInMask,
+ bool ConsecutiveStride, bool Reverse);
/// Set the debug location in the builder \p Ptr using the debug location in
/// \p V. If \p Ptr is None then it uses the class member's Builder.
@@ -590,12 +590,11 @@ protected:
/// Handle all cross-iteration phis in the header.
void fixCrossIterationPHIs(VPTransformState &State);
- /// Fix a first-order recurrence. This is the second phase of vectorizing
- /// this phi node.
+ /// Create the exit value of first order recurrences in the middle block and
+ /// update their users.
void fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, VPTransformState &State);
- /// Fix a reduction cross-iteration phi. This is the second phase of
- /// vectorizing this phi node.
+ /// Create code for the loop exit value of the reduction.
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
/// Clear NSW/NUW flags from reduction instructions if necessary.
@@ -621,9 +620,9 @@ protected:
/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
/// \p Opcode is relevant for FP induction variable.
- virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step,
- Instruction::BinaryOps Opcode =
- Instruction::BinaryOpsEnd);
+ virtual Value *
+ getStepVector(Value *Val, Value *StartIdx, Value *Step,
+ Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd);
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
/// variable on which to base the steps, \p Step is the size of the step, and
@@ -890,9 +889,9 @@ public:
private:
Value *getBroadcastInstrs(Value *V) override;
- Value *getStepVector(Value *Val, int StartIdx, Value *Step,
- Instruction::BinaryOps Opcode =
- Instruction::BinaryOpsEnd) override;
+ Value *getStepVector(
+ Value *Val, Value *StartIdx, Value *Step,
+ Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd) override;
Value *reverseVector(Value *Vec) override;
};
@@ -911,10 +910,9 @@ struct EpilogueLoopVectorizationInfo {
Value *TripCount = nullptr;
Value *VectorTripCount = nullptr;
- EpilogueLoopVectorizationInfo(unsigned MVF, unsigned MUF, unsigned EVF,
- unsigned EUF)
- : MainLoopVF(ElementCount::getFixed(MVF)), MainLoopUF(MUF),
- EpilogueVF(ElementCount::getFixed(EVF)), EpilogueUF(EUF) {
+ EpilogueLoopVectorizationInfo(ElementCount MVF, unsigned MUF,
+ ElementCount EVF, unsigned EUF)
+ : MainLoopVF(MVF), MainLoopUF(MUF), EpilogueVF(EVF), EpilogueUF(EUF) {
assert(EUF == 1 &&
"A high UF for the epilogue loop is likely not beneficial.");
}
@@ -1105,11 +1103,10 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
}
/// Return a value for Step multiplied by VF.
-static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) {
- assert(isa<ConstantInt>(Step) && "Expected an integer step");
- Constant *StepVal = ConstantInt::get(
- Step->getType(),
- cast<ConstantInt>(Step)->getSExtValue() * VF.getKnownMinValue());
+static Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF,
+ int64_t Step) {
+ assert(Ty->isIntegerTy() && "Expected an integer step");
+ Constant *StepVal = ConstantInt::get(Ty, Step * VF.getKnownMinValue());
return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal;
}
@@ -1121,6 +1118,13 @@ Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF) {
return VF.isScalable() ? B.CreateVScale(EC) : EC;
}
+static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) {
+ assert(FTy->isFloatingPointTy() && "Expected floating point type!");
+ Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
+ Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
+ return B.CreateUIToFP(RuntimeVF, FTy);
+}
+
void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
@@ -1319,8 +1323,7 @@ public:
/// the IsOrdered flag of RdxDesc is set and we do not allow reordering
/// of FP operations.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
- return EnableStrictReductions && !Hints->allowReordering() &&
- RdxDesc.isOrdered();
+ return !Hints->allowReordering() && RdxDesc.isOrdered();
}
/// \returns The smallest bitwidth each instruction can be represented with.
@@ -1495,14 +1498,14 @@ public:
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment) const {
- return Legal->isConsecutivePtr(Ptr) &&
+ return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedStore(DataType, Alignment);
}
/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment) const {
- return Legal->isConsecutivePtr(Ptr) &&
+ return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedLoad(DataType, Alignment);
}
@@ -1539,7 +1542,7 @@ public:
// through scalar predication or masked load/store or masked gather/scatter.
// Superset of instructions that return true for isScalarWithPredication.
bool isPredicatedInst(Instruction *I) {
- if (!blockNeedsPredication(I->getParent()))
+ if (!blockNeedsPredicationForAnyReason(I->getParent()))
return false;
// Loads and stores that need some form of masked operation are predicated
// instructions.
@@ -1593,7 +1596,10 @@ public:
/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const { return FoldTailByMasking; }
- bool blockNeedsPredication(BasicBlock *BB) const {
+ /// Returns true if the instructions in this block requires predication
+ /// for any reason, e.g. because tail folding now requires a predicate
+ /// or because the block in the original loop was predicated.
+ bool blockNeedsPredicationForAnyReason(BasicBlock *BB) const {
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
@@ -1928,7 +1934,7 @@ class GeneratedRTChecks {
/// The value representing the result of the generated memory runtime checks.
/// If it is nullptr, either no memory runtime checks have been generated or
/// they have been used.
- Instruction *MemRuntimeCheckCond = nullptr;
+ Value *MemRuntimeCheckCond = nullptr;
DominatorTree *DT;
LoopInfo *LI;
@@ -1971,7 +1977,7 @@ public:
MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr,
"vector.memcheck");
- std::tie(std::ignore, MemRuntimeCheckCond) =
+ MemRuntimeCheckCond =
addRuntimeChecks(MemCheckBlock->getTerminator(), L,
RtPtrChecking.getChecks(), MemCheckExp);
assert(MemRuntimeCheckCond &&
@@ -2030,7 +2036,6 @@ public:
if (MemCheckExp.isInsertedInstruction(&I))
continue;
SE.forgetValue(&I);
- SE.eraseValueFromMap(&I);
I.eraseFromParent();
}
}
@@ -2289,9 +2294,11 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Step = Builder.CreateTrunc(Step, TruncType);
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
}
+
+ Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
Value *SplatStart = Builder.CreateVectorSplat(VF, Start);
Value *SteppedStart =
- getStepVector(SplatStart, 0, Step, II.getInductionOpcode());
+ getStepVector(SplatStart, Zero, Step, II.getInductionOpcode());
// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -2308,12 +2315,11 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
// Multiply the vectorization factor by the step using integer or
// floating-point arithmetic as appropriate.
Type *StepType = Step->getType();
+ Value *RuntimeVF;
if (Step->getType()->isFloatingPointTy())
- StepType = IntegerType::get(StepType->getContext(),
- StepType->getScalarSizeInBits());
- Value *RuntimeVF = getRuntimeVF(Builder, StepType, VF);
- if (Step->getType()->isFloatingPointTy())
- RuntimeVF = Builder.CreateSIToFP(RuntimeVF, Step->getType());
+ RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, VF);
+ else
+ RuntimeVF = getRuntimeVF(Builder, StepType, VF);
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
// Create a vector splat to use in the induction update.
@@ -2388,9 +2394,13 @@ void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
if (isa<TruncInst>(EntryVal))
return;
- const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
- if (Casts.empty())
+ if (!CastDef) {
+ assert(ID.getCastInsts().empty() &&
+ "there are casts for ID, but no CastDef");
return;
+ }
+ assert(!ID.getCastInsts().empty() &&
+ "there is a CastDef, but no casts for ID");
// Only the first Cast instruction in the Casts vector is of interest.
// The rest of the Casts (if exist) have no uses outside the
// induction update chain itself.
@@ -2462,9 +2472,14 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part) {
assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ Value *StartIdx;
+ if (Step->getType()->isFloatingPointTy())
+ StartIdx = getRuntimeVFAsFloat(Builder, Step->getType(), VF * Part);
+ else
+ StartIdx = getRuntimeVF(Builder, Step->getType(), VF * Part);
+
Value *EntryPart =
- getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step,
- ID.getInductionOpcode());
+ getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode());
State.set(Def, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
@@ -2520,7 +2535,8 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, CastDef, State);
}
-Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
+Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx,
+ Value *Step,
Instruction::BinaryOps BinOp) {
// Create and check the types.
auto *ValVTy = cast<VectorType>(Val->getType());
@@ -2543,12 +2559,11 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
- // Add on StartIdx
- Value *StartIdxSplat = Builder.CreateVectorSplat(
- VLen, ConstantInt::get(InitVecValSTy, StartIdx));
- InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
+ // Splat the StartIdx
+ Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
if (STy->isIntegerTy()) {
+ InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
@@ -2561,6 +2576,8 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
"Binary Opcode should be specified for FP induction");
InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
+ InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
+
Step = Builder.CreateVectorSplat(VLen, Step);
Value *MulOp = Builder.CreateFMul(InitVec, Step);
return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
@@ -2609,8 +2626,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx0 =
- createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF);
+ Value *StartIdx0 = createStepForVF(Builder, IntStepTy, VF, Part);
if (!IsUniform && VF.isScalable()) {
auto *SplatStartIdx = Builder.CreateVectorSplat(VF, StartIdx0);
@@ -2838,12 +2854,25 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
auto *SubVT = VectorType::get(ScalarTy, VF);
// Vectorize the interleaved store group.
+ MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
+ assert((!MaskForGaps || useMaskedInterleavedAccesses(*TTI)) &&
+ "masked interleaved groups are not allowed.");
+ assert((!MaskForGaps || !VF.isScalable()) &&
+ "masking gaps for scalable vectors is not yet supported.");
for (unsigned Part = 0; Part < UF; Part++) {
// Collect the stored vector from each member.
SmallVector<Value *, 4> StoredVecs;
for (unsigned i = 0; i < InterleaveFactor; i++) {
- // Interleaved store group doesn't allow a gap, so each index has a member
- assert(Group->getMember(i) && "Fail to get a member from an interleaved store group");
+ assert((Group->getMember(i) || MaskForGaps) &&
+ "Fail to get a member from an interleaved store group");
+ Instruction *Member = Group->getMember(i);
+
+ // Skip the gaps in the group.
+ if (!Member) {
+ Value *Undef = PoisonValue::get(SubVT);
+ StoredVecs.push_back(Undef);
+ continue;
+ }
Value *StoredVec = State.get(StoredValues[i], Part);
@@ -2867,16 +2896,21 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
"interleaved.vec");
Instruction *NewStoreInstr;
- if (BlockInMask) {
- Value *BlockInMaskPart = State.get(BlockInMask, Part);
- Value *ShuffledMask = Builder.CreateShuffleVector(
- BlockInMaskPart,
- createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
- "interleaved.mask");
- NewStoreInstr = Builder.CreateMaskedStore(
- IVec, AddrParts[Part], Group->getAlign(), ShuffledMask);
- }
- else
+ if (BlockInMask || MaskForGaps) {
+ Value *GroupMask = MaskForGaps;
+ if (BlockInMask) {
+ Value *BlockInMaskPart = State.get(BlockInMask, Part);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ BlockInMaskPart,
+ createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
+ "interleaved.mask");
+ GroupMask = MaskForGaps ? Builder.CreateBinOp(Instruction::And,
+ ShuffledMask, MaskForGaps)
+ : ShuffledMask;
+ }
+ NewStoreInstr = Builder.CreateMaskedStore(IVec, AddrParts[Part],
+ Group->getAlign(), GroupMask);
+ } else
NewStoreInstr =
Builder.CreateAlignedStore(IVec, AddrParts[Part], Group->getAlign());
@@ -2886,7 +2920,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
void InnerLoopVectorizer::vectorizeMemoryInstruction(
Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask) {
+ VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride,
+ bool Reverse) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2895,31 +2930,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
assert((!SI || StoredValue) && "No stored value provided for widened store");
assert((!LI || !StoredValue) && "Stored value provided for widened load");
- LoopVectorizationCostModel::InstWidening Decision =
- Cost->getWideningDecision(Instr, VF);
- assert((Decision == LoopVectorizationCostModel::CM_Widen ||
- Decision == LoopVectorizationCostModel::CM_Widen_Reverse ||
- Decision == LoopVectorizationCostModel::CM_GatherScatter) &&
- "CM decision is not to widen the memory instruction");
-
Type *ScalarDataTy = getLoadStoreType(Instr);
auto *DataTy = VectorType::get(ScalarDataTy, VF);
const Align Alignment = getLoadStoreAlignment(Instr);
-
- // Determine if the pointer operand of the access is either consecutive or
- // reverse consecutive.
- bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse);
- bool ConsecutiveStride =
- Reverse || (Decision == LoopVectorizationCostModel::CM_Widen);
- bool CreateGatherScatter =
- (Decision == LoopVectorizationCostModel::CM_GatherScatter);
-
- // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector
- // gather/scatter. Otherwise Decision should have been to Scalarize.
- assert((ConsecutiveStride || CreateGatherScatter) &&
- "The instruction should be scalarized");
- (void)ConsecutiveStride;
+ bool CreateGatherScatter = !ConsecutiveStride;
VectorParts BlockInMaskParts(UF);
bool isMaskRequired = BlockInMask;
@@ -2953,7 +2968,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
} else {
- Value *Increment = createStepForVF(Builder, Builder.getInt32(Part), VF);
+ Value *Increment =
+ createStepForVF(Builder, Builder.getInt32Ty(), VF, Part);
PartPtr = cast<GetElementPtrInst>(
Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
PartPtr->setIsInBounds(InBounds);
@@ -3172,7 +3188,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
Type *Ty = TC->getType();
// This is where we can make the step a runtime constant.
- Value *Step = createStepForVF(Builder, ConstantInt::get(Ty, UF), VF);
+ Value *Step = createStepForVF(Builder, Ty, VF, UF);
// If the tail is to be folded by masking, round the number of iterations N
// up to a multiple of Step instead of rounding down. This is done by first
@@ -3262,8 +3278,7 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
// If tail is to be folded, vector loop takes care of all iterations.
Value *CheckMinIters = Builder.getFalse();
if (!Cost->foldTailByMasking()) {
- Value *Step =
- createStepForVF(Builder, ConstantInt::get(Count->getType(), UF), VF);
+ Value *Step = createStepForVF(Builder, Count->getType(), VF, UF);
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
}
// Create new preheader for vector loop.
@@ -3433,7 +3448,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
assert(isa<SCEVConstant>(Step) &&
"Expected constant step for pointer induction");
return B.CreateGEP(
- StartValue->getType()->getPointerElementType(), StartValue,
+ ID.getElementType(), StartValue,
CreateMul(Index,
Exp.expandCodeFor(Step, Index->getType()->getScalarType(),
GetInsertPoint())));
@@ -3739,7 +3754,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// The loop step is equal to the vectorization factor (num of SIMD elements)
// times the unroll factor (num of SIMD instructions).
Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt());
- Value *Step = createStepForVF(Builder, ConstantInt::get(IdxTy, UF), VF);
+ Value *Step = createStepForVF(Builder, IdxTy, VF, UF);
Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
Induction =
createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
@@ -3857,21 +3872,19 @@ struct CSEDenseMapInfo {
static void cse(BasicBlock *BB) {
// Perform simple cse.
SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *In = &*I++;
-
- if (!CSEDenseMapInfo::canHandle(In))
+ for (Instruction &In : llvm::make_early_inc_range(*BB)) {
+ if (!CSEDenseMapInfo::canHandle(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
- if (Instruction *V = CSEMap.lookup(In)) {
- In->replaceAllUsesWith(V);
- In->eraseFromParent();
+ if (Instruction *V = CSEMap.lookup(&In)) {
+ In.replaceAllUsesWith(V);
+ In.eraseFromParent();
continue;
}
- CSEMap[In] = In;
+ CSEMap[&In] = &In;
}
}
@@ -3881,7 +3894,7 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
Function *F = CI->getCalledFunction();
Type *ScalarRetTy = CI->getType();
SmallVector<Type *, 4> Tys, ScalarTys;
- for (auto &ArgOp : CI->arg_operands())
+ for (auto &ArgOp : CI->args())
ScalarTys.push_back(ArgOp->getType());
// Estimate cost of scalarized vector call. The source operands are assumed
@@ -3940,7 +3953,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();
- SmallVector<const Value *> Arguments(CI->arg_begin(), CI->arg_end());
+ SmallVector<const Value *> Arguments(CI->args());
FunctionType *FTy = CI->getCalledFunction()->getFunctionType();
SmallVector<Type *> ParamTys;
std::transform(FTy->param_begin(), FTy->param_end(),
@@ -3974,7 +3987,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from State indicates that it
// wasn't vectorized.
- VPValue *Def = State.Plan->getVPValue(KV.first);
+ // FIXME: Should not rely on getVPValue at this point.
+ VPValue *Def = State.Plan->getVPValue(KV.first, true);
if (!State.hasAnyVectorValue(Def))
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -4081,7 +4095,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from State indicates that it
// wasn't vectorized.
- VPValue *Def = State.Plan->getVPValue(KV.first);
+ // FIXME: Should not rely on getVPValue at this point.
+ VPValue *Def = State.Plan->getVPValue(KV.first, true);
if (!State.hasAnyVectorValue(Def))
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -4222,17 +4237,12 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
// After execution completes the vector loop, we extract the next value of
// the recurrence (x) to use as the initial value in the scalar loop.
- auto *IdxTy = Builder.getInt32Ty();
- auto *VecPhi = cast<PHINode>(State.get(PhiR, 0));
-
- // Fix the latch value of the new recurrence in the vector loop.
- VPValue *PreviousDef = PhiR->getBackedgeValue();
- Value *Incoming = State.get(PreviousDef, UF - 1);
- VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
-
// Extract the last vector element in the middle block. This will be the
// initial value for the recurrence when jumping to the scalar loop.
+ VPValue *PreviousDef = PhiR->getBackedgeValue();
+ Value *Incoming = State.get(PreviousDef, UF - 1);
auto *ExtractForScalar = Incoming;
+ auto *IdxTy = Builder.getInt32Ty();
if (VF.isVector()) {
auto *One = ConstantInt::get(IdxTy, 1);
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
@@ -4283,8 +4293,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
// and thus no phis which needed updated.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (any_of(LCSSAPhi.incoming_values(),
- [Phi](Value *V) { return V == Phi; }))
+ if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi))
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
}
@@ -4301,29 +4310,13 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
setDebugLocFromInst(ReductionStartValue);
- VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst);
+ VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Wrap flags are in general invalid after vectorization, clear them.
clearReductionWrapFlags(RdxDesc, State);
- // Fix the vector-loop phi.
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
-
- unsigned LastPartForNewPhi = PhiR->isOrdered() ? 1 : UF;
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *VecRdxPhi = State.get(PhiR->getVPSingleValue(), Part);
- Value *Val = State.get(PhiR->getBackedgeValue(), Part);
- if (PhiR->isOrdered())
- Val = State.get(PhiR->getBackedgeValue(), UF - 1);
-
- cast<PHINode>(VecRdxPhi)->addIncoming(Val, VectorLoopLatch);
- }
-
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
@@ -4361,7 +4354,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
RdxDesc.getOpcode(), PhiTy,
TargetTransformInfo::ReductionFlags())) {
auto *VecRdxPhi =
- cast<PHINode>(State.get(PhiR->getVPSingleValue(), Part));
+ cast<PHINode>(State.get(PhiR, Part));
VecRdxPhi->setIncomingValueForBlock(
LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel);
}
@@ -4382,13 +4375,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
: Builder.CreateZExt(Trunc, VecTy);
- for (Value::user_iterator UI = RdxParts[Part]->user_begin();
- UI != RdxParts[Part]->user_end();)
- if (*UI != Trunc) {
- (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd);
+ for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users()))
+ if (U != Trunc) {
+ U->replaceUsesOfWith(RdxParts[Part], Extnd);
RdxParts[Part] = Extnd;
- } else {
- ++UI;
}
}
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
@@ -4421,9 +4411,11 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- } else {
+ } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
+ ReducedPartRdx = createSelectCmpOp(Builder, ReductionStartValue, RK,
+ ReducedPartRdx, RdxPart);
+ else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
- }
}
}
@@ -4431,7 +4423,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// target reduction in the loop using a Reduction recipe.
if (VF.isVector() && !PhiR->isInLoop()) {
ReducedPartRdx =
- createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx);
+ createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (PhiTy != RdxDesc.getRecurrenceType())
@@ -4456,8 +4448,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// fixFirstOrderRecurrence for a more complete explaination of the logic.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (any_of(LCSSAPhi.incoming_values(),
- [LoopExitInst](Value *V) { return V == LoopExitInst; }))
+ if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst))
LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
// Fix the scalar loop reduction variable with the incoming reduction sum
@@ -4488,7 +4479,8 @@ void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &Rd
Instruction *Cur = Worklist.pop_back_val();
if (isa<OverflowingBinaryOperator>(Cur))
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = State.get(State.Plan->getVPValue(Cur), Part);
+ // FIXME: Should not rely on getVPValue at this point.
+ Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
cast<Instruction>(V)->dropPoisonGeneratingFlags();
}
@@ -4519,11 +4511,12 @@ void InnerLoopVectorizer::fixLCSSAPHIs(VPTransformState &State) {
// Can be a loop invariant incoming value or the last scalar value to be
// extracted from the vectorized loop.
+ // FIXME: Should not rely on getVPValue at this point.
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
Value *lastIncomingValue =
OrigLoop->isLoopInvariant(IncomingValue)
? IncomingValue
- : State.get(State.Plan->getVPValue(IncomingValue),
+ : State.get(State.Plan->getVPValue(IncomingValue, true),
VPIteration(UF - 1, Lane));
LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
}
@@ -4763,10 +4756,18 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
}
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *PartStart = createStepForVF(
- Builder, ConstantInt::get(PtrInd->getType(), Part), VF);
+ Value *PartStart =
+ createStepForVF(Builder, PtrInd->getType(), VF, Part);
if (NeedsVectorIndex) {
+ // Here we cache the whole vector, which means we can support the
+ // extraction of any lane. However, in some cases the extractelement
+ // instruction that is generated for scalar uses of this vector (e.g.
+ // a load instruction) is not folded away. Therefore we still
+ // calculate values for the first n lanes to avoid redundant moves
+ // (when extracting the 0th element) and to produce scalar code (i.e.
+ // additional add/gep instructions instead of expensive extractelement
+ // instructions) when extracting higher-order elements.
Value *PartStartSplat = Builder.CreateVectorSplat(VF, PartStart);
Value *Indices = Builder.CreateAdd(PartStartSplat, UnitStepVec);
Value *GlobalIndices = Builder.CreateAdd(PtrIndSplat, Indices);
@@ -4774,9 +4775,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
emitTransformedIndex(Builder, GlobalIndices, PSE.getSE(), DL, II);
SclrGep->setName("next.gep");
State.set(PhiR, SclrGep, Part);
- // We've cached the whole vector, which means we can support the
- // extraction of any lane.
- continue;
}
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
@@ -4813,7 +4811,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *NumUnrolledElems =
Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
Value *InductionGEP = GetElementPtrInst::Create(
- ScStValueType->getPointerElementType(), NewPointerPhi,
+ II.getElementType(), NewPointerPhi,
Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
InductionLoc);
NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
@@ -4832,7 +4830,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType));
Value *GEP = Builder.CreateGEP(
- ScStValueType->getPointerElementType(), NewPointerPhi,
+ II.getElementType(), NewPointerPhi,
Builder.CreateMul(
StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue),
"vector.gep"));
@@ -4979,7 +4977,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
auto *CI = cast<CallInst>(&I);
SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI->arg_operands())
+ for (Value *ArgOperand : CI->args())
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue()));
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
@@ -5128,8 +5126,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
Instruction *Update = cast<Instruction>(
cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
- ScalarPtrs.insert(Update);
- return;
+
+ // If there is more than one user of Update (Ptr), we shouldn't assume it
+ // will be scalar after vectorisation as other users of the instruction
+ // may require widening. Otherwise, add it to ScalarPtrs.
+ if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
+ ScalarPtrs.insert(Update);
+ return;
+ }
}
// We only care about bitcast and getelementptr instructions contained in
// the loop.
@@ -5142,12 +5146,11 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
if (Worklist.count(I))
return;
- // If all users of the pointer will be memory accesses and scalar, place the
- // pointer in ScalarPtrs. Otherwise, place the pointer in
- // PossibleNonScalarPtrs.
- if (llvm::all_of(I->users(), [&](User *U) {
- return (isa<LoadInst>(U) || isa<StoreInst>(U)) &&
- isScalarUse(cast<Instruction>(U), Ptr);
+ // If the use of the pointer will be a scalar use, and all users of the
+ // pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise,
+ // place the pointer in PossibleNonScalarPtrs.
+ if (isScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) {
+ return isa<LoadInst>(U) || isa<StoreInst>(U);
}))
ScalarPtrs.insert(I);
else
@@ -5254,7 +5257,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
}
bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) const {
- if (!blockNeedsPredication(I->getParent()))
+ if (!blockNeedsPredicationForAnyReason(I->getParent()))
return false;
switch(I->getOpcode()) {
default:
@@ -5297,12 +5300,20 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// Check if masking is required.
// A Group may need masking for one of two reasons: it resides in a block that
- // needs predication, or it was decided to use masking to deal with gaps.
+ // needs predication, or it was decided to use masking to deal with gaps
+ // (either a gap at the end of a load-access that may result in a speculative
+ // load, or any gaps in a store-access).
bool PredicatedAccessRequiresMasking =
- Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I);
- bool AccessWithGapsRequiresMasking =
- Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
- if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking)
+ blockNeedsPredicationForAnyReason(I->getParent()) &&
+ Legal->isMaskRequired(I);
+ bool LoadAccessWithGapsRequiresEpilogMasking =
+ isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
+ !isScalarEpilogueAllowed();
+ bool StoreAccessWithGapsRequiresMasking =
+ isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor());
+ if (!PredicatedAccessRequiresMasking &&
+ !LoadAccessWithGapsRequiresEpilogMasking &&
+ !StoreAccessWithGapsRequiresMasking)
return true;
// If masked interleaving is required, we expect that the user/target had
@@ -5311,6 +5322,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
assert(useMaskedInterleavedAccesses(TTI) &&
"Masked interleave-groups for predicated accesses are not enabled.");
+ if (Group->isReverse())
+ return false;
+
auto *Ty = getLoadStoreType(I);
const Align Alignment = getLoadStoreAlignment(I);
return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)
@@ -5320,14 +5334,13 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
Instruction *I, ElementCount VF) {
// Get and ensure we have a valid memory instruction.
- LoadInst *LI = dyn_cast<LoadInst>(I);
- StoreInst *SI = dyn_cast<StoreInst>(I);
- assert((LI || SI) && "Invalid memory instruction");
+ assert((isa<LoadInst, StoreInst>(I)) && "Invalid memory instruction");
auto *Ptr = getLoadStorePointerOperand(I);
+ auto *ScalarTy = getLoadStoreType(I);
// In order to be widened, the pointer should be consecutive, first of all.
- if (!Legal->isConsecutivePtr(Ptr))
+ if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
return false;
// If the instruction is a store located in a predicated block, it will be
@@ -5338,7 +5351,6 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// If the instruction's allocated size doesn't equal it's type size, it
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
- auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
if (hasIrregularType(ScalarTy, DL))
return false;
@@ -5369,12 +5381,14 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
return (!I || !TheLoop->contains(I));
};
+ // Worklist containing uniform instructions demanding lane 0.
SetVector<Instruction *> Worklist;
BasicBlock *Latch = TheLoop->getLoopLatch();
- // Instructions that are scalar with predication must not be considered
- // uniform after vectorization, because that would create an erroneous
- // replicating region where only a single instance out of VF should be formed.
+ // Add uniform instructions demanding lane 0 to the worklist. Instructions
+ // that are scalar with predication must not be considered uniform after
+ // vectorization, because that would create an erroneous replicating region
+ // where only a single instance out of VF should be formed.
// TODO: optimize such seldom cases if found important, see PR40816.
auto addToWorklistIfAllowed = [&](Instruction *I) -> void {
if (isOutOfScope(I)) {
@@ -5433,6 +5447,30 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sideeffect:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ if (TheLoop->hasLoopInvariantOperands(&I))
+ addToWorklistIfAllowed(&I);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // ExtractValue instructions must be uniform, because the operands are
+ // known to be loop-invariant.
+ if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
+ assert(isOutOfScope(EVI->getAggregateOperand()) &&
+ "Expected aggregate value to be loop invariant");
+ addToWorklistIfAllowed(EVI);
+ continue;
+ }
+
// If there's no pointer operand, there's nothing to do.
auto *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
@@ -5565,13 +5603,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
ElementCount
LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
- if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
- reportVectorizationInfo(
- "Disabling scalable vectorization, because target does not "
- "support scalable vectors.",
- "ScalableVectorsUnsupported", ORE, TheLoop);
+ if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors)
return ElementCount::getScalable(0);
- }
if (Hints->isScalableVectorizationDisabled()) {
reportVectorizationInfo("Scalable vectorization is explicitly disabled",
@@ -5579,6 +5612,8 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
return ElementCount::getScalable(0);
}
+ LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n");
+
auto MaxScalableVF = ElementCount::getScalable(
std::numeric_limits<ElementCount::ScalarTy>::max());
@@ -5614,6 +5649,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+ if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (VScaleMax > 0)
+ MaxVScale = VScaleMax;
+ }
MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)
@@ -5681,17 +5723,32 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
return MaxSafeFixedVF;
}
- LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
- << " is unsafe. Ignoring scalable UserVF.\n");
- ORE->emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
- TheLoop->getStartLoc(),
- TheLoop->getHeader())
- << "User-specified vectorization factor "
- << ore::NV("UserVectorizationFactor", UserVF)
- << " is unsafe. Ignoring the hint to let the compiler pick a "
- "suitable VF.";
- });
+ if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
+ LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+ << " is ignored because scalable vectors are not "
+ "available.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "User-specified vectorization factor "
+ << ore::NV("UserVectorizationFactor", UserVF)
+ << " is ignored because the target does not support scalable "
+ "vectors. The compiler will pick a more suitable value.";
+ });
+ } else {
+ LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+ << " is unsafe. Ignoring scalable UserVF.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "User-specified vectorization factor "
+ << ore::NV("UserVectorizationFactor", UserVF)
+ << " is unsafe. Ignoring the hint to let the compiler pick a "
+ "more suitable value.";
+ });
+ }
}
LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
@@ -5972,19 +6029,27 @@ bool LoopVectorizationCostModel::isMoreProfitable(
return RTCostA < RTCostB;
}
- // When set to preferred, for now assume vscale may be larger than 1, so
- // that scalable vectorization is slightly favorable over fixed-width
- // vectorization.
+ // Improve estimate for the vector width if it is scalable.
+ unsigned EstimatedWidthA = A.Width.getKnownMinValue();
+ unsigned EstimatedWidthB = B.Width.getKnownMinValue();
+ if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) {
+ if (A.Width.isScalable())
+ EstimatedWidthA *= VScale.getValue();
+ if (B.Width.isScalable())
+ EstimatedWidthB *= VScale.getValue();
+ }
+
+ // When set to preferred, for now assume vscale may be larger than 1 (or the
+ // one being tuned for), so that scalable vectorization is slightly favorable
+ // over fixed-width vectorization.
if (Hints->isScalableVectorizationPreferred())
if (A.Width.isScalable() && !B.Width.isScalable())
- return (CostA * B.Width.getKnownMinValue()) <=
- (CostB * A.Width.getKnownMinValue());
+ return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
// To avoid the need for FP division:
// (CostA / A.Width) < (CostB / B.Width)
// <=> (CostA * B.Width) < (CostB * A.Width)
- return (CostA * B.Width.getKnownMinValue()) <
- (CostB * A.Width.getKnownMinValue());
+ return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA);
}
VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
@@ -6014,11 +6079,22 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
VectorizationCostTy C = expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first);
- LLVM_DEBUG(
- dbgs() << "LV: Vector loop of width " << i << " costs: "
- << (Candidate.Cost / Candidate.Width.getKnownMinValue())
- << (i.isScalable() ? " (assuming a minimum vscale of 1)" : "")
- << ".\n");
+
+#ifndef NDEBUG
+ unsigned AssumedMinimumVscale = 1;
+ if (Optional<unsigned> VScale = TTI.getVScaleForTuning())
+ AssumedMinimumVscale = VScale.getValue();
+ unsigned Width =
+ Candidate.Width.isScalable()
+ ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
+ : Candidate.Width.getFixedValue();
+ LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i
+ << " costs: " << (Candidate.Cost / Width));
+ if (i.isScalable())
+ LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
+ << AssumedMinimumVscale << ")");
+ LLVM_DEBUG(dbgs() << ".\n");
+#endif
if (!C.second && !ForceVectorization) {
LLVM_DEBUG(
@@ -6182,15 +6258,6 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- // FIXME: This can be fixed for scalable vectors later, because at this stage
- // the LoopVectorizer will only consider vectorizing a loop with scalable
- // vectors when the loop has a hint to enable vectorization for a given VF.
- if (MainLoopVF.isScalable()) {
- LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization for scalable vectors not "
- "yet supported.\n");
- return Result;
- }
-
// Not really a cost consideration, but check for unsupported cases here to
// simplify the logic.
if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) {
@@ -6202,9 +6269,9 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
if (EpilogueVectorizationForceVF > 1) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
- if (LVP.hasPlanWithVFs(
- {MainLoopVF, ElementCount::getFixed(EpilogueVectorizationForceVF)}))
- return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0};
+ ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
+ if (LVP.hasPlanWithVF(ForcedEC))
+ return {ForcedEC, 0};
else {
LLVM_DEBUG(
dbgs()
@@ -6221,14 +6288,24 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- if (!isEpilogueVectorizationProfitable(MainLoopVF))
+ auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
+ if (MainLoopVF.isScalable())
+ LLVM_DEBUG(
+ dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
+ "yet supported. Converting to fixed-width (VF="
+ << FixedMainLoopVF << ") instead\n");
+
+ if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
+ "this loop\n");
return Result;
+ }
for (auto &NextVF : ProfitableVFs)
- if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
(Result.Width.getFixedValue() == 1 ||
isMoreProfitable(NextVF, Result)) &&
- LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width}))
+ LVP.hasPlanWithVF(NextVF.Width))
Result = NextVF;
if (Result != VectorizationFactor::Disabled())
@@ -6471,6 +6548,22 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
unsigned StoresIC = IC / (NumStores ? NumStores : 1);
unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
+ // There is little point in interleaving for reductions containing selects
+ // and compares when VF=1 since it may just create more overhead than it's
+ // worth for loops with small trip counts. This is because we still have to
+ // do the final reduction after the loop.
+ bool HasSelectCmpReductions =
+ HasReductions &&
+ any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+ RdxDesc.getRecurrenceKind());
+ });
+ if (HasSelectCmpReductions) {
+ LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
+ return 1;
+ }
+
// If we have a scalar reduction (vector reductions are already dealt with
// by this point), we can increase the critical path length if the loop
// we're interleaving is inside another loop. For tree-wise reductions
@@ -6756,7 +6849,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// determine if it would be better to not if-convert the blocks they are in.
// If so, we also record the instructions to scalarize.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!blockNeedsPredication(BB))
+ if (!blockNeedsPredicationForAnyReason(BB))
continue;
for (Instruction &I : *BB)
if (isScalarWithPredication(&I)) {
@@ -6851,7 +6944,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(I->getType(), VF)),
- APInt::getAllOnesValue(VF.getFixedValue()), true, false);
+ APInt::getAllOnes(VF.getFixedValue()), true, false);
ScalarCost +=
VF.getFixedValue() *
TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
@@ -6870,7 +6963,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
else if (needsExtract(J, VF)) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(J->getType(), VF)),
- APInt::getAllOnesValue(VF.getFixedValue()), false, true);
+ APInt::getAllOnes(VF.getFixedValue()), false, true);
}
}
@@ -7016,7 +7109,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
auto *Vec_i1Ty =
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
Cost += TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnesValue(VF.getKnownMinValue()),
+ Vec_i1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
/*Insert=*/false, /*Extract=*/true);
Cost += TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
@@ -7036,7 +7129,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
- int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -7117,18 +7210,16 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
unsigned InterleaveFactor = Group->getFactor();
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
- // Holds the indices of existing members in an interleaved load group.
- // An interleaved store group doesn't need this as it doesn't allow gaps.
+ // Holds the indices of existing members in the interleaved group.
SmallVector<unsigned, 4> Indices;
- if (isa<LoadInst>(I)) {
- for (unsigned i = 0; i < InterleaveFactor; i++)
- if (Group->getMember(i))
- Indices.push_back(i);
- }
+ for (unsigned IF = 0; IF < InterleaveFactor; IF++)
+ if (Group->getMember(IF))
+ Indices.push_back(IF);
// Calculate the cost of the whole interleaved group.
bool UseMaskForGaps =
- Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
+ (Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed()) ||
+ (isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlign(),
AS, TTI::TCK_RecipThroughput, Legal->isMaskRequired(I), UseMaskForGaps);
@@ -7210,8 +7301,41 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
VectorTy = VectorType::get(I->getOperand(0)->getType(), VectorTy);
Instruction *Op0, *Op1;
- if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
- !TheLoop->isLoopInvariant(RedOp)) {
+ if (RedOp &&
+ match(RedOp,
+ m_ZExtOrSExt(m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) &&
+ match(Op0, m_ZExtOrSExt(m_Value())) &&
+ Op0->getOpcode() == Op1->getOpcode() &&
+ Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
+ !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1) &&
+ (Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
+
+ // Matched reduce(ext(mul(ext(A), ext(B)))
+ // Note that the extend opcodes need to all match, or if A==B they will have
+ // been converted to zext(mul(sext(A), sext(A))) as it is known positive,
+ // which is equally fine.
+ bool IsUnsigned = isa<ZExtInst>(Op0);
+ auto *ExtType = VectorType::get(Op0->getOperand(0)->getType(), VectorTy);
+ auto *MulType = VectorType::get(Op0->getType(), VectorTy);
+
+ InstructionCost ExtCost =
+ TTI.getCastInstrCost(Op0->getOpcode(), MulType, ExtType,
+ TTI::CastContextHint::None, CostKind, Op0);
+ InstructionCost MulCost =
+ TTI.getArithmeticInstrCost(Instruction::Mul, MulType, CostKind);
+ InstructionCost Ext2Cost =
+ TTI.getCastInstrCost(RedOp->getOpcode(), VectorTy, MulType,
+ TTI::CastContextHint::None, CostKind, RedOp);
+
+ InstructionCost RedCost = TTI.getExtendedAddReductionCost(
+ /*IsMLA=*/true, IsUnsigned, RdxDesc.getRecurrenceType(), ExtType,
+ CostKind);
+
+ if (RedCost.isValid() &&
+ RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
+ return I == RetI ? RedCost : 0;
+ } else if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
+ !TheLoop->isLoopInvariant(RedOp)) {
// Matched reduce(ext(A))
bool IsUnsigned = isa<ZExtInst>(RedOp);
auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy);
@@ -7245,7 +7369,7 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
if (RedCost.isValid() && RedCost < ExtCost * 2 + MulCost + BaseCost)
return I == RetI ? RedCost : 0;
- } else {
+ } else if (!match(I, m_ZExtOrSExt(m_Value()))) {
// Matched reduce(mul())
InstructionCost MulCost =
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
@@ -7304,9 +7428,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *VectorTy;
InstructionCost C = getInstructionCost(I, VF, VectorTy);
- bool TypeNotScalarized =
- VF.isVector() && VectorTy->isVectorTy() &&
- TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue();
+ bool TypeNotScalarized = false;
+ if (VF.isVector() && VectorTy->isVectorTy()) {
+ unsigned NumParts = TTI.getNumberOfParts(VectorTy);
+ if (NumParts)
+ TypeNotScalarized = NumParts < VF.getKnownMinValue();
+ else
+ C = InstructionCost::getInvalid();
+ }
return VectorizationCostTy(C, TypeNotScalarized);
}
@@ -7327,8 +7456,8 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
if (!RetTy->isVoidTy() &&
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
Cost += TTI.getScalarizationOverhead(
- cast<VectorType>(RetTy), APInt::getAllOnesValue(VF.getKnownMinValue()),
- true, false);
+ cast<VectorType>(RetTy), APInt::getAllOnes(VF.getKnownMinValue()), true,
+ false);
// Some targets keep addresses scalar.
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
@@ -7340,7 +7469,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
// Collect operands to consider.
CallInst *CI = dyn_cast<CallInst>(I);
- Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+ Instruction::op_range Ops = CI ? CI->args() : I->operands();
// Skip operands that do not require extraction/scalarization and do not incur
// any overhead.
@@ -7391,8 +7520,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// We assume that widening is the best solution when possible.
if (memoryInstructionCanBeWidened(&I, VF)) {
InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
- int ConsecutiveStride =
- Legal->isConsecutivePtr(getLoadStorePointerOperand(&I));
+ int ConsecutiveStride = Legal->isConsecutivePtr(
+ getLoadStoreType(&I), getLoadStorePointerOperand(&I));
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Expected consecutive stride.");
InstWidening Decision =
@@ -7579,8 +7708,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
return (
TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnesValue(VF.getFixedValue()), false,
- true) +
+ Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()), false, true) +
(TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue()));
} else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
// The back-edge branch will remain, as will all scalar branches.
@@ -7893,7 +8021,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
// Check if the pointer operand of a load or store instruction is
// consecutive.
if (auto *Ptr = getLoadStorePointerOperand(Inst))
- return Legal->isConsecutivePtr(Ptr);
+ return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr);
return false;
}
@@ -8019,7 +8147,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return None;
// Invalidate interleave groups if all blocks of loop will be predicated.
- if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
+ if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
!useMaskedInterleavedAccesses(*TTI)) {
LLVM_DEBUG(
dbgs()
@@ -8105,28 +8233,30 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return SelectedVF;
}
-void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) {
- LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF
- << '\n');
- BestVF = VF;
- BestUF = UF;
+VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
+ assert(count_if(VPlans,
+ [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
+ 1 &&
+ "Best VF has not a single VPlan.");
- erase_if(VPlans, [VF](const VPlanPtr &Plan) {
- return !Plan->hasVF(VF);
- });
- assert(VPlans.size() == 1 && "Best VF has not a single VPlan.");
+ for (const VPlanPtr &Plan : VPlans) {
+ if (Plan->hasVF(VF))
+ return *Plan.get();
+ }
+ llvm_unreachable("No plan found!");
}
-void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
+void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
+ VPlan &BestVPlan,
+ InnerLoopVectorizer &ILV,
DominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF
+ << '\n');
+
// Perform the actual loop transformation.
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
- assert(BestVF.hasValue() && "Vectorization Factor is missing");
- assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
-
- VPTransformState State{
- *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()};
+ VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
@@ -8142,7 +8272,7 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
//===------------------------------------------------===//
// 2. Copy and widen instructions from the old loop into the new loop.
- VPlans.front()->execute(&State);
+ BestVPlan.execute(&State);
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
@@ -8222,21 +8352,19 @@ Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
-Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
+Value *InnerLoopUnroller::getStepVector(Value *Val, Value *StartIdx,
+ Value *Step,
Instruction::BinaryOps BinOp) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
Type *Ty = Val->getType();
assert(!Ty->isVectorTy() && "Val must be a scalar");
if (Ty->isFloatingPointTy()) {
- Constant *C = ConstantFP::get(Ty, (double)StartIdx);
-
// Floating-point operations inherit FMF via the builder's flags.
- Value *MulOp = Builder.CreateFMul(C, Step);
+ Value *MulOp = Builder.CreateFMul(StartIdx, Step);
return Builder.CreateBinOp(BinOp, Val, MulOp);
}
- Constant *C = ConstantInt::get(Ty, StartIdx);
- return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
+ return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step), "induction");
}
static void AddRuntimeUnrollDisableMetaData(Loop *L) {
@@ -8311,7 +8439,9 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
OldInduction = Legal->getPrimaryInduction();
Type *IdxTy = Legal->getWidestInductionType();
Value *StartIdx = ConstantInt::get(IdxTy, 0);
- Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
+
+ IRBuilder<> B(&*Lp->getLoopPreheader()->getFirstInsertionPt());
+ Value *Step = getRuntimeVF(B, IdxTy, VF * UF);
Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
EPI.VectorTripCount = CountRoundDown;
Induction =
@@ -8329,9 +8459,9 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
LLVM_DEBUG({
dbgs() << "Create Skeleton for epilogue vectorized loop (first pass)\n"
- << "Main Loop VF:" << EPI.MainLoopVF.getKnownMinValue()
+ << "Main Loop VF:" << EPI.MainLoopVF
<< ", Main Loop UF:" << EPI.MainLoopUF
- << ", Epilogue Loop VF:" << EPI.EpilogueVF.getKnownMinValue()
+ << ", Epilogue Loop VF:" << EPI.EpilogueVF
<< ", Epilogue Loop UF:" << EPI.EpilogueUF << "\n";
});
}
@@ -8346,8 +8476,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
Loop *L, BasicBlock *Bypass, bool ForEpilogue) {
assert(L && "Expected valid Loop.");
assert(Bypass && "Expected valid bypass basic block.");
- unsigned VFactor =
- ForEpilogue ? EPI.EpilogueVF.getKnownMinValue() : VF.getKnownMinValue();
+ ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF;
unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF;
Value *Count = getOrCreateTripCount(L);
// Reuse existing vector loop preheader for TC checks.
@@ -8361,7 +8490,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
Value *CheckMinIters = Builder.CreateICmp(
- P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor),
+ P, Count, createStepForVF(Builder, Count->getType(), VFactor, UFactor),
"min.iters.check");
if (!ForEpilogue)
@@ -8513,11 +8642,11 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ?
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
- Value *CheckMinIters = Builder.CreateICmp(
- P, Count,
- ConstantInt::get(Count->getType(),
- EPI.EpilogueVF.getKnownMinValue() * EPI.EpilogueUF),
- "min.epilog.iters.check");
+ Value *CheckMinIters =
+ Builder.CreateICmp(P, Count,
+ createStepForVF(Builder, Count->getType(),
+ EPI.EpilogueVF, EPI.EpilogueUF),
+ "min.epilog.iters.check");
ReplaceInstWithInst(
Insert->getTerminator(),
@@ -8530,7 +8659,7 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
void EpilogueVectorizerEpilogueLoop::printDebugTracesAtStart() {
LLVM_DEBUG({
dbgs() << "Create Skeleton for epilogue vectorized loop (second pass)\n"
- << "Epilogue Loop VF:" << EPI.EpilogueVF.getKnownMinValue()
+ << "Epilogue Loop VF:" << EPI.EpilogueVF
<< ", Epilogue Loop UF:" << EPI.EpilogueUF << "\n";
});
}
@@ -8628,7 +8757,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
VPValue *BlockMask = nullptr;
if (OrigLoop->getHeader() == BB) {
- if (!CM.blockNeedsPredication(BB))
+ if (!CM.blockNeedsPredicationForAnyReason(BB))
return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
// Create the block in mask as the first non-phi instruction in the block.
@@ -8643,9 +8772,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
if (Legal->getPrimaryInduction())
IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction());
else {
- auto IVRecipe = new VPWidenCanonicalIVRecipe();
+ auto *IVRecipe = new VPWidenCanonicalIVRecipe();
Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint);
- IV = IVRecipe->getVPSingleValue();
+ IV = IVRecipe;
}
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
bool TailFolded = !CM.isScalarEpilogueAllowed();
@@ -8708,12 +8837,21 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
if (Legal->isMaskRequired(I))
Mask = createBlockInMask(I->getParent(), Plan);
+ // Determine if the pointer operand of the access is either consecutive or
+ // reverse consecutive.
+ LoopVectorizationCostModel::InstWidening Decision =
+ CM.getWideningDecision(I, Range.Start);
+ bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
+ bool Consecutive =
+ Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask);
+ return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
+ Consecutive, Reverse);
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
- Mask);
+ Mask, Consecutive, Reverse);
}
VPWidenIntOrFpInductionRecipe *
@@ -8829,7 +8967,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
return nullptr;
- ArrayRef<VPValue *> Ops = Operands.take_front(CI->getNumArgOperands());
+ ArrayRef<VPValue *> Ops = Operands.take_front(CI->arg_size());
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()));
}
@@ -8916,6 +9054,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
+ // Even if the instruction is not marked as uniform, there are certain
+ // intrinsic calls that can be effectively treated as such, so we check for
+ // them here. Conservatively, we only do this for scalable vectors, since
+ // for fixed-width VFs we can always fall back on full scalarization.
+ if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) {
+ switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // For scalable vectors if one of the operands is variant then we still
+ // want to mark as uniform, which will generate one instruction for just
+ // the first lane of the vector. We can't scalarize the call in the same
+ // way as for fixed-width vectors because we don't know how many lanes
+ // there are.
+ //
+ // The reasons for doing it this way for scalable vectors are:
+ // 1. For the assume intrinsic generating the instruction for the first
+ // lane is still be better than not generating any at all. For
+ // example, the input may be a splat across all lanes.
+ // 2. For the lifetime start/end intrinsics the pointer operand only
+ // does anything useful when the input comes from a stack object,
+ // which suggests it should always be uniform. For non-stack objects
+ // the effect is to poison the object, which still allows us to
+ // remove the call.
+ IsUniform = true;
+ break;
+ default:
+ break;
+ }
+ }
+
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
setRecipe(I, Recipe);
@@ -9137,6 +9306,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RecipeBuilder.recordRecipeOf(R);
// For min/max reducitons, where we have a pair of icmp/select, we also
// need to record the ICmp recipe, so it can be removed later.
+ assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
+ "Only min/max recurrences allowed for inloop reductions");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
RecipeBuilder.recordRecipeOf(cast<Instruction>(R->getOperand(0)));
}
@@ -9165,22 +9336,27 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
auto Plan = std::make_unique<VPlan>();
- VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
- Plan->setEntry(VPBB);
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
+ VPBasicBlock *VPBB = nullptr;
+ VPBasicBlock *HeaderVPBB = nullptr;
+ SmallVector<VPWidenIntOrFpInductionRecipe *> InductionsToMove;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
- VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+ if (VPBB)
+ VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+ else {
+ Plan->setEntry(FirstVPBBForBB);
+ HeaderVPBB = FirstVPBBForBB;
+ }
VPBB = FirstVPBBForBB;
Builder.setInsertPoint(VPBB);
@@ -9222,6 +9398,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
Plan->addVPValue(UV, Def);
}
+ if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) &&
+ HeaderVPBB->getFirstNonPhi() != VPBB->end()) {
+ // Keep track of VPWidenIntOrFpInductionRecipes not in the phi section
+ // of the header block. That can happen for truncates of induction
+ // variables. Those recipes are moved to the phi section of the header
+ // block after applying SinkAfter, which relies on the original
+ // position of the trunc.
+ assert(isa<TruncInst>(Instr));
+ InductionsToMove.push_back(
+ cast<VPWidenIntOrFpInductionRecipe>(Recipe));
+ }
RecipeBuilder.setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
continue;
@@ -9239,17 +9426,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
+ assert(isa<VPBasicBlock>(Plan->getEntry()) &&
+ !Plan->getEntry()->getEntryBasicBlock()->empty() &&
+ "entry block must be set to a non-empty VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
- // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks
- // may also be empty, such as the last one VPBB, reflecting original
- // basic-blocks with no recipes.
- VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry());
- assert(PreEntry->empty() && "Expecting empty pre-entry block.");
- VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor());
- VPBlockUtils::disconnectBlocks(PreEntry, Entry);
- delete PreEntry;
-
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
// bring the VPlan to its final state.
@@ -9318,6 +9499,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
+ // Now that sink-after is done, move induction recipes for optimized truncates
+ // to the phi section of the header block.
+ for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove)
+ Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+
+ // Adjust the recipes for any inloop reductions.
+ adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start);
+
// Introduce a recipe to combine the incoming and previous values of a
// first-order recurrence.
for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
@@ -9325,16 +9514,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
if (!RecurPhi)
continue;
+ VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
+ VPBasicBlock *InsertBlock = PrevRecipe->getParent();
+ auto *Region = GetReplicateRegion(PrevRecipe);
+ if (Region)
+ InsertBlock = cast<VPBasicBlock>(Region->getSingleSuccessor());
+ if (Region || PrevRecipe->isPhi())
+ Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
+ else
+ Builder.setInsertPoint(InsertBlock, std::next(PrevRecipe->getIterator()));
+
auto *RecurSplice = cast<VPInstruction>(
Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
{RecurPhi, RecurPhi->getBackedgeValue()}));
- VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
- if (auto *Region = GetReplicateRegion(PrevRecipe)) {
- VPBasicBlock *Succ = cast<VPBasicBlock>(Region->getSingleSuccessor());
- RecurSplice->moveBefore(*Succ, Succ->getFirstNonPhi());
- } else
- RecurSplice->moveAfter(PrevRecipe);
RecurPhi->replaceAllUsesWith(RecurSplice);
// Set the first operand of RecurSplice to RecurPhi again, after replacing
// all users.
@@ -9372,22 +9565,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
- // Adjust the recipes for any inloop reductions.
- adjustRecipesForInLoopReductions(Plan, RecipeBuilder, Range.Start);
-
- // Finally, if tail is folded by masking, introduce selects between the phi
- // and the live-out instruction of each reduction, at the end of the latch.
- if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) {
- Builder.setInsertPoint(VPBB);
- auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
- for (auto &Reduction : Legal->getReductionVars()) {
- if (CM.isInLoopReduction(Reduction.first))
- continue;
- VPValue *Phi = Plan->getOrAddVPValue(Reduction.first);
- VPValue *Red = Plan->getOrAddVPValue(Reduction.second.getLoopExitInstr());
- Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi});
- }
- }
+ // From this point onwards, VPlan-to-VPlan transformations may change the plan
+ // in ways that accessing values using original IR values is incorrect.
+ Plan->disableValue2VPValue();
VPlanTransforms::sinkScalarOperands(*Plan);
VPlanTransforms::mergeReplicateRegions(*Plan);
@@ -9405,6 +9585,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RSO.flush();
Plan->setName(PlanName);
+ assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
}
@@ -9443,12 +9624,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
return Plan;
}
-// Adjust the recipes for any inloop reductions. The chain of instructions
-// leading from the loop exit instr to the phi need to be converted to
-// reductions, with one operand being vector and the other being the scalar
-// reduction chain.
-void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
- VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) {
+// Adjust the recipes for reductions. For in-loop reductions the chain of
+// instructions leading from the loop exit instr to the phi need to be converted
+// to reductions, with one operand being vector and the other being the scalar
+// reduction chain. For other reductions, a select is introduced between the phi
+// and live-out recipes when folding the tail.
+void LoopVectorizationPlanner::adjustRecipesForReductions(
+ VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
+ ElementCount MinVF) {
for (auto &Reduction : CM.getInLoopReductionChains()) {
PHINode *Phi = Reduction.first;
RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi];
@@ -9468,6 +9651,8 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
VPValue *ChainOp = Plan->getVPValue(Chain);
unsigned FirstOpId;
+ assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
+ "Only min/max recurrences allowed for inloop reductions");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
"Expected to replace a VPWidenSelectSC");
@@ -9505,6 +9690,21 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
Chain = R;
}
}
+
+ // If tail is folded by masking, introduce selects between the phi
+ // and the live-out instruction of each reduction, at the end of the latch.
+ if (CM.foldTailByMasking()) {
+ for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
+ VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!PhiR || PhiR->isInLoop())
+ continue;
+ Builder.setInsertPoint(LatchVPBB);
+ VPValue *Cond =
+ RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
+ VPValue *Red = PhiR->getBackedgeValue();
+ Builder.createNaryOp(Instruction::Select, {Cond, Red, PhiR});
+ }
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -9519,9 +9719,22 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
O << ", ";
Mask->printAsOperand(O, SlotTracker);
}
- for (unsigned i = 0; i < IG->getFactor(); ++i)
- if (Instruction *I = IG->getMember(i))
- O << "\n" << Indent << " " << VPlanIngredient(I) << " " << i;
+
+ unsigned OpIdx = 0;
+ for (unsigned i = 0; i < IG->getFactor(); ++i) {
+ if (!IG->getMember(i))
+ continue;
+ if (getNumStoreOperands() > 0) {
+ O << "\n" << Indent << " store ";
+ getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
+ O << " to index " << i;
+ } else {
+ O << "\n" << Indent << " ";
+ getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
+ O << " = load from index " << i;
+ }
+ ++OpIdx;
+ }
}
#endif
@@ -9605,17 +9818,20 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0);
+ RecurKind Kind = RdxDesc->getRecurrenceKind();
+ bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
+ // Propagate the fast-math flags carried by the underlying instruction.
+ IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
+ State.Builder.setFastMathFlags(RdxDesc->getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
- RecurKind Kind = RdxDesc->getRecurrenceKind();
- bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, Part);
VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
- Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
+ Value *Iden = RdxDesc->getRecurrenceIdentity(
Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags());
- Constant *IdenVec =
- ConstantVector::getSplat(VecTy->getElementCount(), Iden);
+ Value *IdenVec =
+ State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
NewVecOp = Select;
}
@@ -9627,8 +9843,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(),
- PrevInChain, NewVecOp);
+ (Instruction::BinaryOps)RdxDesc->getOpcode(Kind), PrevInChain,
+ NewVecOp);
PrevInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), Part);
@@ -9640,11 +9856,10 @@ void VPReductionRecipe::execute(VPTransformState &State) {
NewRed, PrevInChain);
} else if (IsOrdered)
NextInChain = NewRed;
- else {
+ else
NextInChain = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
+ (Instruction::BinaryOps)RdxDesc->getOpcode(Kind), NewRed,
PrevInChain);
- }
State.set(this, NextInChain, Part);
}
}
@@ -9757,7 +9972,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
State.ILV->vectorizeMemoryInstruction(
&Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(),
- StoredValue, getMask());
+ StoredValue, getMask(), Consecutive, Reverse);
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
@@ -9923,7 +10138,7 @@ static bool processLoopInVPlanNativePath(
VectorizationFactor::Disabled() == VF)
return false;
- LVP.setBestPlan(VF.Width, 1);
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
{
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
@@ -9932,7 +10147,7 @@ static bool processLoopInVPlanNativePath(
&CM, BFI, PSI, Checks);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
- LVP.executePlan(LB, DT);
+ LVP.executePlan(VF.Width, 1, BestPlan, LB, DT);
}
// Mark the loop as already vectorized to avoid vectorizing again.
@@ -10103,7 +10318,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (!LVL.canVectorizeFPMath(EnableStrictReductions)) {
+ bool AllowOrderedReductions;
+ // If the flag is set, use that instead and override the TTI behaviour.
+ if (ForceOrderedReductions.getNumOccurrences() > 0)
+ AllowOrderedReductions = ForceOrderedReductions;
+ else
+ AllowOrderedReductions = TTI->enableOrderedReductions();
+ if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) {
ORE->emit([&]() {
auto *ExactFPMathInst = Requirements.getExactFPInst();
return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",
@@ -10248,7 +10469,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
F->getParent()->getDataLayout());
if (!VF.Width.isScalar() || IC > 1)
Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
- LVP.setBestPlan(VF.Width, IC);
using namespace ore;
if (!VectorizeLoop) {
@@ -10257,7 +10477,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// interleave it.
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);
- LVP.executePlan(Unroller, DT);
+
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10276,14 +10498,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
- EpilogueLoopVectorizationInfo EPI(VF.Width.getKnownMinValue(), IC,
- EpilogueVF.Width.getKnownMinValue(),
- 1);
+ EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
- LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF);
- LVP.executePlan(MainILV, DT);
+ VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
+ LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV,
+ DT);
++LoopsVectorized;
simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
@@ -10291,13 +10512,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
- LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF);
EPI.MainLoopVF = EPI.EpilogueVF;
EPI.MainLoopUF = EPI.EpilogueUF;
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
ORE, EPI, &LVL, &CM, BFI, PSI,
Checks);
- LVP.executePlan(EpilogILV, DT);
+
+ VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF);
+ LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
+ DT);
++LoopsEpilogueVectorized;
if (!MainILV.areSafetyChecksAdded())
@@ -10305,7 +10528,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
&LVL, &CM, BFI, PSI, Checks);
- LVP.executePlan(LB, DT);
+
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
@@ -10423,15 +10648,12 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
@@ -10455,3 +10677,14 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void LoopVectorizePass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopVectorizePass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (InterleaveOnlyWhenForced ? "" : "no-") << "interleave-forced-only;";
+ OS << (VectorizeOnlyWhenForced ? "" : "no-") << "vectorize-forced-only;";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc3f5c7d4b48..e3ef0b794f68 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
@@ -200,12 +201,39 @@ static bool isValidElementType(Type *Ty) {
!Ty->isPPC_FP128Ty();
}
+/// \returns True if the value is a constant (but not globals/constant
+/// expressions).
+static bool isConstant(Value *V) {
+ return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
+}
+
+/// Checks if \p V is one of vector-like instructions, i.e. undef,
+/// insertelement/extractelement with constant indices for fixed vector type or
+/// extractvalue instruction.
+static bool isVectorLikeInstWithConstOps(Value *V) {
+ if (!isa<InsertElementInst, ExtractElementInst>(V) &&
+ !isa<ExtractValueInst, UndefValue>(V))
+ return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || isa<ExtractValueInst>(I))
+ return true;
+ if (!isa<FixedVectorType>(I->getOperand(0)->getType()))
+ return false;
+ if (isa<ExtractElementInst>(I))
+ return isConstant(I->getOperand(1));
+ assert(isa<InsertElementInst>(V) && "Expected only insertelement.");
+ return isConstant(I->getOperand(2));
+}
+
/// \returns true if all of the instructions in \p VL are in the same block or
/// false otherwise.
static bool allSameBlock(ArrayRef<Value *> VL) {
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
return false;
+ if (all_of(VL, isVectorLikeInstWithConstOps))
+ return true;
+
BasicBlock *BB = I0->getParent();
for (int I = 1, E = VL.size(); I < E; I++) {
auto *II = dyn_cast<Instruction>(VL[I]);
@@ -218,12 +246,6 @@ static bool allSameBlock(ArrayRef<Value *> VL) {
return true;
}
-/// \returns True if the value is a constant (but not globals/constant
-/// expressions).
-static bool isConstant(Value *V) {
- return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
-}
-
/// \returns True if all of the values in \p VL are constants (but not
/// globals/constant expressions).
static bool allConstant(ArrayRef<Value *> VL) {
@@ -232,12 +254,21 @@ static bool allConstant(ArrayRef<Value *> VL) {
return all_of(VL, isConstant);
}
-/// \returns True if all of the values in \p VL are identical.
+/// \returns True if all of the values in \p VL are identical or some of them
+/// are UndefValue.
static bool isSplat(ArrayRef<Value *> VL) {
- for (unsigned i = 1, e = VL.size(); i < e; ++i)
- if (VL[i] != VL[0])
+ Value *FirstNonUndef = nullptr;
+ for (Value *V : VL) {
+ if (isa<UndefValue>(V))
+ continue;
+ if (!FirstNonUndef) {
+ FirstNonUndef = V;
+ continue;
+ }
+ if (V != FirstNonUndef)
return false;
- return true;
+ }
+ return FirstNonUndef != nullptr;
}
/// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
@@ -295,8 +326,10 @@ static bool isCommutative(Instruction *I) {
/// TODO: Can we split off and reuse the shuffle mask detection from
/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>
-isShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
+isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
auto *EI0 = cast<ExtractElementInst>(VL[0]);
+ if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
+ return None;
unsigned Size =
cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
Value *Vec1 = nullptr;
@@ -504,7 +537,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
case Instruction::Call: {
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
if (hasVectorInstrinsicScalarOpd(ID, i))
return (CI->getArgOperand(i) == Scalar);
}
@@ -535,13 +568,67 @@ static bool isSimple(Instruction *I) {
return true;
}
+/// Shuffles \p Mask in accordance with the given \p SubMask.
+static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
+ if (SubMask.empty())
+ return;
+ if (Mask.empty()) {
+ Mask.append(SubMask.begin(), SubMask.end());
+ return;
+ }
+ SmallVector<int> NewMask(SubMask.size(), UndefMaskElem);
+ int TermValue = std::min(Mask.size(), SubMask.size());
+ for (int I = 0, E = SubMask.size(); I < E; ++I) {
+ if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
+ Mask[SubMask[I]] >= TermValue)
+ continue;
+ NewMask[I] = Mask[SubMask[I]];
+ }
+ Mask.swap(NewMask);
+}
+
+/// Order may have elements assigned special value (size) which is out of
+/// bounds. Such indices only appear on places which correspond to undef values
+/// (see canReuseExtract for details) and used in order to avoid undef values
+/// have effect on operands ordering.
+/// The first loop below simply finds all unused indices and then the next loop
+/// nest assigns these indices for undef values positions.
+/// As an example below Order has two undef positions and they have assigned
+/// values 3 and 7 respectively:
+/// before: 6 9 5 4 9 2 1 0
+/// after: 6 3 5 4 7 2 1 0
+static void fixupOrderingIndices(SmallVectorImpl<unsigned> &Order) {
+ const unsigned Sz = Order.size();
+ SmallBitVector UsedIndices(Sz);
+ SmallVector<int> MaskedIndices;
+ for (unsigned I = 0; I < Sz; ++I) {
+ if (Order[I] < Sz)
+ UsedIndices.set(Order[I]);
+ else
+ MaskedIndices.push_back(I);
+ }
+ if (MaskedIndices.empty())
+ return;
+ SmallVector<int> AvailableIndices(MaskedIndices.size());
+ unsigned Cnt = 0;
+ int Idx = UsedIndices.find_first();
+ do {
+ AvailableIndices[Cnt] = Idx;
+ Idx = UsedIndices.find_next(Idx);
+ ++Cnt;
+ } while (Idx > 0);
+ assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
+ for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
+ Order[MaskedIndices[I]] = AvailableIndices[I];
+}
+
namespace llvm {
static void inversePermutation(ArrayRef<unsigned> Indices,
SmallVectorImpl<int> &Mask) {
Mask.clear();
const unsigned E = Indices.size();
- Mask.resize(E, E + 1);
+ Mask.resize(E, UndefMaskElem);
for (unsigned I = 0; I < E; ++I)
Mask[Indices[I]] = I;
}
@@ -581,6 +668,22 @@ static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
return Index;
}
+/// Reorders the list of scalars in accordance with the given \p Order and then
+/// the \p Mask. \p Order - is the original order of the scalars, need to
+/// reorder scalars into an unordered state at first according to the given
+/// order. Then the ordered scalars are shuffled once again in accordance with
+/// the provided mask.
+static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
+ ArrayRef<int> Mask) {
+ assert(!Mask.empty() && "Expected non-empty mask.");
+ SmallVector<Value *> Prev(Scalars.size(),
+ UndefValue::get(Scalars.front()->getType()));
+ Prev.swap(Scalars);
+ for (unsigned I = 0, E = Prev.size(); I < E; ++I)
+ if (Mask[I] != UndefMaskElem)
+ Scalars[Mask[I]] = Prev[I];
+}
+
namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
@@ -645,13 +748,12 @@ public:
void buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst = None);
- /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
- /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
- /// into account (and updating it, if required) list of externally used
- /// values stored in \p ExternallyUsedValues.
- void buildTree(ArrayRef<Value *> Roots,
- ExtraValueToDebugLocsMap &ExternallyUsedValues,
- ArrayRef<Value *> UserIgnoreLst = None);
+ /// Builds external uses of the vectorized scalars, i.e. the list of
+ /// vectorized scalars to be extracted, their lanes and their scalar users. \p
+ /// ExternallyUsedValues contains additional list of external uses to handle
+ /// vectorization of reductions.
+ void
+ buildExternalUses(const ExtraValueToDebugLocsMap &ExternallyUsedValues = {});
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
@@ -659,8 +761,6 @@ public:
ScalarToTreeEntry.clear();
MustGather.clear();
ExternalUses.clear();
- NumOpsWantToKeepOrder.clear();
- NumOpsWantToKeepOriginalOrder = 0;
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -674,103 +774,28 @@ public:
/// Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// \returns The best order of instructions for vectorization.
- Optional<ArrayRef<unsigned>> bestOrder() const {
- assert(llvm::all_of(
- NumOpsWantToKeepOrder,
- [this](const decltype(NumOpsWantToKeepOrder)::value_type &D) {
- return D.getFirst().size() ==
- VectorizableTree[0]->Scalars.size();
- }) &&
- "All orders must have the same size as number of instructions in "
- "tree node.");
- auto I = std::max_element(
- NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
- [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
- const decltype(NumOpsWantToKeepOrder)::value_type &D2) {
- return D1.second < D2.second;
- });
- if (I == NumOpsWantToKeepOrder.end() ||
- I->getSecond() <= NumOpsWantToKeepOriginalOrder)
- return None;
-
- return makeArrayRef(I->getFirst());
- }
-
- /// Builds the correct order for root instructions.
- /// If some leaves have the same instructions to be vectorized, we may
- /// incorrectly evaluate the best order for the root node (it is built for the
- /// vector of instructions without repeated instructions and, thus, has less
- /// elements than the root node). This function builds the correct order for
- /// the root node.
- /// For example, if the root node is \<a+b, a+c, a+d, f+e\>, then the leaves
- /// are \<a, a, a, f\> and \<b, c, d, e\>. When we try to vectorize the first
- /// leaf, it will be shrink to \<a, b\>. If instructions in this leaf should
- /// be reordered, the best order will be \<1, 0\>. We need to extend this
- /// order for the root node. For the root node this order should look like
- /// \<3, 0, 1, 2\>. This function extends the order for the reused
- /// instructions.
- void findRootOrder(OrdersType &Order) {
- // If the leaf has the same number of instructions to vectorize as the root
- // - order must be set already.
- unsigned RootSize = VectorizableTree[0]->Scalars.size();
- if (Order.size() == RootSize)
- return;
- SmallVector<unsigned, 4> RealOrder(Order.size());
- std::swap(Order, RealOrder);
- SmallVector<int, 4> Mask;
- inversePermutation(RealOrder, Mask);
- Order.assign(Mask.begin(), Mask.end());
- // The leaf has less number of instructions - need to find the true order of
- // the root.
- // Scan the nodes starting from the leaf back to the root.
- const TreeEntry *PNode = VectorizableTree.back().get();
- SmallVector<const TreeEntry *, 4> Nodes(1, PNode);
- SmallPtrSet<const TreeEntry *, 4> Visited;
- while (!Nodes.empty() && Order.size() != RootSize) {
- const TreeEntry *PNode = Nodes.pop_back_val();
- if (!Visited.insert(PNode).second)
- continue;
- const TreeEntry &Node = *PNode;
- for (const EdgeInfo &EI : Node.UserTreeIndices)
- if (EI.UserTE)
- Nodes.push_back(EI.UserTE);
- if (Node.ReuseShuffleIndices.empty())
- continue;
- // Build the order for the parent node.
- OrdersType NewOrder(Node.ReuseShuffleIndices.size(), RootSize);
- SmallVector<unsigned, 4> OrderCounter(Order.size(), 0);
- // The algorithm of the order extension is:
- // 1. Calculate the number of the same instructions for the order.
- // 2. Calculate the index of the new order: total number of instructions
- // with order less than the order of the current instruction + reuse
- // number of the current instruction.
- // 3. The new order is just the index of the instruction in the original
- // vector of the instructions.
- for (unsigned I : Node.ReuseShuffleIndices)
- ++OrderCounter[Order[I]];
- SmallVector<unsigned, 4> CurrentCounter(Order.size(), 0);
- for (unsigned I = 0, E = Node.ReuseShuffleIndices.size(); I < E; ++I) {
- unsigned ReusedIdx = Node.ReuseShuffleIndices[I];
- unsigned OrderIdx = Order[ReusedIdx];
- unsigned NewIdx = 0;
- for (unsigned J = 0; J < OrderIdx; ++J)
- NewIdx += OrderCounter[J];
- NewIdx += CurrentCounter[OrderIdx];
- ++CurrentCounter[OrderIdx];
- assert(NewOrder[NewIdx] == RootSize &&
- "The order index should not be written already.");
- NewOrder[NewIdx] = I;
- }
- std::swap(Order, NewOrder);
- }
- assert(Order.size() == RootSize &&
- "Root node is expected or the size of the order must be the same as "
- "the number of elements in the root node.");
- assert(llvm::all_of(Order,
- [RootSize](unsigned Val) { return Val != RootSize; }) &&
- "All indices must be initialized");
- }
+ /// Checks if the specified gather tree entry \p TE can be represented as a
+ /// shuffled vector entry + (possibly) permutation with other gathers. It
+ /// implements the checks only for possibly ordered scalars (Loads,
+ /// ExtractElement, ExtractValue), which can be part of the graph.
+ Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
+
+ /// Reorders the current graph to the most profitable order starting from the
+ /// root node to the leaf nodes. The best order is chosen only from the nodes
+ /// of the same size (vectorization factor). Smaller nodes are considered
+ /// parts of subgraph with smaller VF and they are reordered independently. We
+ /// can make it because we still need to extend smaller nodes to the wider VF
+ /// and we can merge reordering shuffles with the widening shuffles.
+ void reorderTopToBottom();
+
+ /// Reorders the current graph to the most profitable order starting from
+ /// leaves to the root. It allows to rotate small subgraphs and reduce the
+ /// number of reshuffles if the leaf nodes use the same order. In this case we
+ /// can merge the orders and just shuffle user node instead of shuffling its
+ /// operands. Plus, even the leaf nodes have different orders, it allows to
+ /// sink reordering in the graph closer to the root node and merge it later
+ /// during analysis.
+ void reorderBottomToTop(bool IgnoreReorder = false);
/// \return The vector element size in bits to use when vectorizing the
/// expression tree ending at \p V. If V is a store, the size is the width of
@@ -793,6 +818,10 @@ public:
return MinVecRegSize;
}
+ unsigned getMinVF(unsigned Sz) const {
+ return std::max(2U, getMinVecRegSize() / Sz);
+ }
+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
@@ -809,7 +838,7 @@ public:
/// \returns True if the VectorizableTree is both tiny and not fully
/// vectorizable. We do not vectorize such trees.
- bool isTreeTinyAndNotFullyVectorizable() const;
+ bool isTreeTinyAndNotFullyVectorizable(bool ForReduction = false) const;
/// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
/// can be load combined in the backend. Load combining may not be allowed in
@@ -1578,10 +1607,12 @@ private:
Value *vectorizeTree(ArrayRef<Value *> VL);
/// \returns the scalarization cost for this type. Scalarization in this
- /// context means the creation of vectors from a group of scalars.
- InstructionCost
- getGatherCost(FixedVectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices) const;
+ /// context means the creation of vectors from a group of scalars. If \p
+ /// NeedToShuffle is true, need to add a cost of reshuffling some of the
+ /// vector elements.
+ InstructionCost getGatherCost(FixedVectorType *Ty,
+ const DenseSet<unsigned> &ShuffledIndices,
+ bool NeedToShuffle) const;
/// Checks if the gathered \p VL can be represented as shuffle(s) of previous
/// tree entries.
@@ -1605,7 +1636,7 @@ private:
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
- bool isFullyVectorizableTinyTree() const;
+ bool isFullyVectorizableTinyTree(bool ForReduction) const;
/// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
@@ -1621,14 +1652,43 @@ private:
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
- if (VL.size() == Scalars.size())
- return std::equal(VL.begin(), VL.end(), Scalars.begin());
- return VL.size() == ReuseShuffleIndices.size() &&
- std::equal(
- VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
- [this](Value *V, int Idx) { return V == Scalars[Idx]; });
+ auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
+ if (Mask.size() != VL.size() && VL.size() == Scalars.size())
+ return std::equal(VL.begin(), VL.end(), Scalars.begin());
+ return VL.size() == Mask.size() &&
+ std::equal(VL.begin(), VL.end(), Mask.begin(),
+ [Scalars](Value *V, int Idx) {
+ return (isa<UndefValue>(V) &&
+ Idx == UndefMaskElem) ||
+ (Idx != UndefMaskElem && V == Scalars[Idx]);
+ });
+ };
+ if (!ReorderIndices.empty()) {
+ // TODO: implement matching if the nodes are just reordered, still can
+ // treat the vector as the same if the list of scalars matches VL
+ // directly, without reordering.
+ SmallVector<int> Mask;
+ inversePermutation(ReorderIndices, Mask);
+ if (VL.size() == Scalars.size())
+ return IsSame(Scalars, Mask);
+ if (VL.size() == ReuseShuffleIndices.size()) {
+ ::addMask(Mask, ReuseShuffleIndices);
+ return IsSame(Scalars, Mask);
+ }
+ return false;
+ }
+ return IsSame(Scalars, ReuseShuffleIndices);
}
+ /// \return Final vectorization factor for the node. Defined by the total
+ /// number of vectorized scalars, including those, used several times in the
+ /// entry and counted in the \a ReuseShuffleIndices, if any.
+ unsigned getVectorFactor() const {
+ if (!ReuseShuffleIndices.empty())
+ return ReuseShuffleIndices.size();
+ return Scalars.size();
+ };
+
/// A vector of scalars.
ValueList Scalars;
@@ -1701,6 +1761,12 @@ private:
}
}
+ /// Reorders operands of the node to the given mask \p Mask.
+ void reorderOperands(ArrayRef<int> Mask) {
+ for (ValueList &Operand : Operands)
+ reorderScalars(Operand, Mask);
+ }
+
/// \returns the \p OpIdx operand of this TreeEntry.
ValueList &getOperand(unsigned OpIdx) {
assert(OpIdx < Operands.size() && "Off bounds");
@@ -1760,19 +1826,14 @@ private:
return AltOp ? AltOp->getOpcode() : 0;
}
- /// Update operations state of this entry if reorder occurred.
- bool updateStateIfReorder() {
- if (ReorderIndices.empty())
- return false;
- InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front());
- setOperations(S);
- return true;
- }
- /// When ReuseShuffleIndices is empty it just returns position of \p V
- /// within vector of Scalars. Otherwise, try to remap on its reuse index.
+ /// When ReuseReorderShuffleIndices is empty it just returns position of \p
+ /// V within vector of Scalars. Otherwise, try to remap on its reuse index.
int findLaneForValue(Value *V) const {
unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
+ if (!ReorderIndices.empty())
+ FoundLane = ReorderIndices[FoundLane];
+ assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
if (!ReuseShuffleIndices.empty()) {
FoundLane = std::distance(ReuseShuffleIndices.begin(),
find(ReuseShuffleIndices, FoundLane));
@@ -1856,7 +1917,7 @@ private:
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<int> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
TreeEntry::EntryState EntryState =
Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
@@ -1869,7 +1930,7 @@ private:
Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<int> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
assert(((!Bundle && EntryState == TreeEntry::NeedToGather) ||
(Bundle && EntryState != TreeEntry::NeedToGather)) &&
@@ -1877,12 +1938,25 @@ private:
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
- Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->State = EntryState;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
- Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
- Last->setOperations(S);
+ if (ReorderIndices.empty()) {
+ Last->Scalars.assign(VL.begin(), VL.end());
+ Last->setOperations(S);
+ } else {
+ // Reorder scalars and build final mask.
+ Last->Scalars.assign(VL.size(), nullptr);
+ transform(ReorderIndices, Last->Scalars.begin(),
+ [VL](unsigned Idx) -> Value * {
+ if (Idx >= VL.size())
+ return UndefValue::get(VL.front()->getType());
+ return VL[Idx];
+ });
+ InstructionsState S = getSameOpcode(Last->Scalars);
+ Last->setOperations(S);
+ Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
+ }
if (Last->State != TreeEntry::NeedToGather) {
for (Value *V : VL) {
assert(!getTreeEntry(V) && "Scalar already in tree!");
@@ -1965,12 +2039,9 @@ private:
if (result.hasValue()) {
return result.getValue();
}
- MemoryLocation Loc2 = getLocation(Inst2, AA);
bool aliased = true;
- if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
- // Do the alias check.
- aliased = !AA->isNoAlias(Loc1, Loc2);
- }
+ if (Loc1.Ptr && isSimple(Inst1))
+ aliased = isModOrRefSet(AA->getModRefInfo(Inst2, Loc1));
// Store the result in the cache.
result = aliased;
return aliased;
@@ -2434,14 +2505,6 @@ private:
}
};
- /// Contains orders of operations along with the number of bundles that have
- /// operations in this order. It stores only those orders that require
- /// reordering, if reordering is not required it is counted using \a
- /// NumOpsWantToKeepOriginalOrder.
- DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> NumOpsWantToKeepOrder;
- /// Number of bundles that do not require reordering.
- unsigned NumOpsWantToKeepOriginalOrder = 0;
-
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
@@ -2540,10 +2603,8 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
std::string Str;
raw_string_ostream OS(Str);
- if (isSplat(Entry->Scalars)) {
- OS << "<splat> " << *Entry->Scalars[0];
- return Str;
- }
+ if (isSplat(Entry->Scalars))
+ OS << "<splat> ";
for (auto V : Entry->Scalars) {
OS << *V;
if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
@@ -2594,21 +2655,539 @@ void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
};
}
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- ArrayRef<Value *> UserIgnoreLst) {
- ExtraValueToDebugLocsMap ExternallyUsedValues;
- buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
+/// Reorders the given \p Reuses mask according to the given \p Mask. \p Reuses
+/// contains original mask for the scalars reused in the node. Procedure
+/// transform this mask in accordance with the given \p Mask.
+static void reorderReuses(SmallVectorImpl<int> &Reuses, ArrayRef<int> Mask) {
+ assert(!Mask.empty() && Reuses.size() == Mask.size() &&
+ "Expected non-empty mask.");
+ SmallVector<int> Prev(Reuses.begin(), Reuses.end());
+ Prev.swap(Reuses);
+ for (unsigned I = 0, E = Prev.size(); I < E; ++I)
+ if (Mask[I] != UndefMaskElem)
+ Reuses[Mask[I]] = Prev[I];
}
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- ExtraValueToDebugLocsMap &ExternallyUsedValues,
- ArrayRef<Value *> UserIgnoreLst) {
- deleteTree();
- UserIgnoreList = UserIgnoreLst;
- if (!allSameType(Roots))
+/// Reorders the given \p Order according to the given \p Mask. \p Order - is
+/// the original order of the scalars. Procedure transforms the provided order
+/// in accordance with the given \p Mask. If the resulting \p Order is just an
+/// identity order, \p Order is cleared.
+static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
+ assert(!Mask.empty() && "Expected non-empty mask.");
+ SmallVector<int> MaskOrder;
+ if (Order.empty()) {
+ MaskOrder.resize(Mask.size());
+ std::iota(MaskOrder.begin(), MaskOrder.end(), 0);
+ } else {
+ inversePermutation(Order, MaskOrder);
+ }
+ reorderReuses(MaskOrder, Mask);
+ if (ShuffleVectorInst::isIdentityMask(MaskOrder)) {
+ Order.clear();
return;
- buildTree_rec(Roots, 0, EdgeInfo());
+ }
+ Order.assign(Mask.size(), Mask.size());
+ for (unsigned I = 0, E = Mask.size(); I < E; ++I)
+ if (MaskOrder[I] != UndefMaskElem)
+ Order[MaskOrder[I]] = I;
+ fixupOrderingIndices(Order);
+}
+
+Optional<BoUpSLP::OrdersType>
+BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
+ assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
+ unsigned NumScalars = TE.Scalars.size();
+ OrdersType CurrentOrder(NumScalars, NumScalars);
+ SmallVector<int> Positions;
+ SmallBitVector UsedPositions(NumScalars);
+ const TreeEntry *STE = nullptr;
+ // Try to find all gathered scalars that are gets vectorized in other
+ // vectorize node. Here we can have only one single tree vector node to
+ // correctly identify order of the gathered scalars.
+ for (unsigned I = 0; I < NumScalars; ++I) {
+ Value *V = TE.Scalars[I];
+ if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
+ continue;
+ if (const auto *LocalSTE = getTreeEntry(V)) {
+ if (!STE)
+ STE = LocalSTE;
+ else if (STE != LocalSTE)
+ // Take the order only from the single vector node.
+ return None;
+ unsigned Lane =
+ std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
+ if (Lane >= NumScalars)
+ return None;
+ if (CurrentOrder[Lane] != NumScalars) {
+ if (Lane != I)
+ continue;
+ UsedPositions.reset(CurrentOrder[Lane]);
+ }
+ // The partial identity (where only some elements of the gather node are
+ // in the identity order) is good.
+ CurrentOrder[Lane] = I;
+ UsedPositions.set(I);
+ }
+ }
+ // Need to keep the order if we have a vector entry and at least 2 scalars or
+ // the vectorized entry has just 2 scalars.
+ if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
+ auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
+ for (unsigned I = 0; I < NumScalars; ++I)
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
+ return false;
+ return true;
+ };
+ if (IsIdentityOrder(CurrentOrder)) {
+ CurrentOrder.clear();
+ return CurrentOrder;
+ }
+ auto *It = CurrentOrder.begin();
+ for (unsigned I = 0; I < NumScalars;) {
+ if (UsedPositions.test(I)) {
+ ++I;
+ continue;
+ }
+ if (*It == NumScalars) {
+ *It = I;
+ ++I;
+ }
+ ++It;
+ }
+ return CurrentOrder;
+ }
+ return None;
+}
+void BoUpSLP::reorderTopToBottom() {
+ // Maps VF to the graph nodes.
+ DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
+ // ExtractElement gather nodes which can be vectorized and need to handle
+ // their ordering.
+ DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
+ // Find all reorderable nodes with the given VF.
+ // Currently the are vectorized loads,extracts + some gathering of extracts.
+ for_each(VectorizableTree, [this, &VFToOrderedEntries, &GathersToOrders](
+ const std::unique_ptr<TreeEntry> &TE) {
+ // No need to reorder if need to shuffle reuses, still need to shuffle the
+ // node.
+ if (!TE->ReuseShuffleIndices.empty())
+ return;
+ if (TE->State == TreeEntry::Vectorize &&
+ isa<LoadInst, ExtractElementInst, ExtractValueInst, StoreInst,
+ InsertElementInst>(TE->getMainOp()) &&
+ !TE->isAltShuffle()) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ return;
+ }
+ if (TE->State == TreeEntry::NeedToGather) {
+ if (TE->getOpcode() == Instruction::ExtractElement &&
+ !TE->isAltShuffle() &&
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
+ ->getVectorOperandType()) &&
+ allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector.
+ OrdersType CurrentOrder;
+ bool Reuse =
+ canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
+ if (Reuse || !CurrentOrder.empty()) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), CurrentOrder);
+ return;
+ }
+ }
+ if (Optional<OrdersType> CurrentOrder =
+ findReusedOrderedScalars(*TE.get())) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ }
+ }
+ });
+
+ // Reorder the graph nodes according to their vectorization factor.
+ for (unsigned VF = VectorizableTree.front()->Scalars.size(); VF > 1;
+ VF /= 2) {
+ auto It = VFToOrderedEntries.find(VF);
+ if (It == VFToOrderedEntries.end())
+ continue;
+ // Try to find the most profitable order. We just are looking for the most
+ // used order and reorder scalar elements in the nodes according to this
+ // mostly used order.
+ const SmallPtrSetImpl<TreeEntry *> &OrderedEntries = It->getSecond();
+ // All operands are reordered and used only in this node - propagate the
+ // most used order to the user node.
+ MapVector<OrdersType, unsigned,
+ DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
+ OrdersUses;
+ SmallPtrSet<const TreeEntry *, 4> VisitedOps;
+ for (const TreeEntry *OpTE : OrderedEntries) {
+ // No need to reorder this nodes, still need to extend and to use shuffle,
+ // just need to merge reordering shuffle and the reuse shuffle.
+ if (!OpTE->ReuseShuffleIndices.empty())
+ continue;
+ // Count number of orders uses.
+ const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
+ if (OpTE->State == TreeEntry::NeedToGather)
+ return GathersToOrders.find(OpTE)->second;
+ return OpTE->ReorderIndices;
+ }();
+ // Stores actually store the mask, not the order, need to invert.
+ if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
+ OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
+ SmallVector<int> Mask;
+ inversePermutation(Order, Mask);
+ unsigned E = Order.size();
+ OrdersType CurrentOrder(E, E);
+ transform(Mask, CurrentOrder.begin(), [E](int Idx) {
+ return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
+ });
+ fixupOrderingIndices(CurrentOrder);
+ ++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
+ } else {
+ ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
+ }
+ }
+ // Set order of the user node.
+ if (OrdersUses.empty())
+ continue;
+ // Choose the most used order.
+ ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
+ unsigned Cnt = OrdersUses.front().second;
+ for (const auto &Pair : drop_begin(OrdersUses)) {
+ if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
+ BestOrder = Pair.first;
+ Cnt = Pair.second;
+ }
+ }
+ // Set order of the user node.
+ if (BestOrder.empty())
+ continue;
+ SmallVector<int> Mask;
+ inversePermutation(BestOrder, Mask);
+ SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
+ unsigned E = BestOrder.size();
+ transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
+ return I < E ? static_cast<int>(I) : UndefMaskElem;
+ });
+ // Do an actual reordering, if profitable.
+ for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ // Just do the reordering for the nodes with the given VF.
+ if (TE->Scalars.size() != VF) {
+ if (TE->ReuseShuffleIndices.size() == VF) {
+ // Need to reorder the reuses masks of the operands with smaller VF to
+ // be able to find the match between the graph nodes and scalar
+ // operands of the given node during vectorization/cost estimation.
+ assert(all_of(TE->UserTreeIndices,
+ [VF, &TE](const EdgeInfo &EI) {
+ return EI.UserTE->Scalars.size() == VF ||
+ EI.UserTE->Scalars.size() ==
+ TE->Scalars.size();
+ }) &&
+ "All users must be of VF size.");
+ // Update ordering of the operands with the smaller VF than the given
+ // one.
+ reorderReuses(TE->ReuseShuffleIndices, Mask);
+ }
+ continue;
+ }
+ if (TE->State == TreeEntry::Vectorize &&
+ isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
+ InsertElementInst>(TE->getMainOp()) &&
+ !TE->isAltShuffle()) {
+ // Build correct orders for extract{element,value}, loads and
+ // stores.
+ reorderOrder(TE->ReorderIndices, Mask);
+ if (isa<InsertElementInst, StoreInst>(TE->getMainOp()))
+ TE->reorderOperands(Mask);
+ } else {
+ // Reorder the node and its operands.
+ TE->reorderOperands(Mask);
+ assert(TE->ReorderIndices.empty() &&
+ "Expected empty reorder sequence.");
+ reorderScalars(TE->Scalars, Mask);
+ }
+ if (!TE->ReuseShuffleIndices.empty()) {
+ // Apply reversed order to keep the original ordering of the reused
+ // elements to avoid extra reorder indices shuffling.
+ OrdersType CurrentOrder;
+ reorderOrder(CurrentOrder, MaskOrder);
+ SmallVector<int> NewReuses;
+ inversePermutation(CurrentOrder, NewReuses);
+ addMask(NewReuses, TE->ReuseShuffleIndices);
+ TE->ReuseShuffleIndices.swap(NewReuses);
+ }
+ }
+ }
+}
+
+void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
+ SetVector<TreeEntry *> OrderedEntries;
+ DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
+ // Find all reorderable leaf nodes with the given VF.
+ // Currently the are vectorized loads,extracts without alternate operands +
+ // some gathering of extracts.
+ SmallVector<TreeEntry *> NonVectorized;
+ for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
+ &NonVectorized](
+ const std::unique_ptr<TreeEntry> &TE) {
+ if (TE->State != TreeEntry::Vectorize)
+ NonVectorized.push_back(TE.get());
+ // No need to reorder if need to shuffle reuses, still need to shuffle the
+ // node.
+ if (!TE->ReuseShuffleIndices.empty())
+ return;
+ if (TE->State == TreeEntry::Vectorize &&
+ isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
+ !TE->isAltShuffle()) {
+ OrderedEntries.insert(TE.get());
+ return;
+ }
+ if (TE->State == TreeEntry::NeedToGather) {
+ if (TE->getOpcode() == Instruction::ExtractElement &&
+ !TE->isAltShuffle() &&
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
+ ->getVectorOperandType()) &&
+ allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector with a single user only.
+ OrdersType CurrentOrder;
+ bool Reuse =
+ canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
+ if ((Reuse || !CurrentOrder.empty()) &&
+ !any_of(VectorizableTree,
+ [&TE](const std::unique_ptr<TreeEntry> &Entry) {
+ return Entry->State == TreeEntry::NeedToGather &&
+ Entry.get() != TE.get() &&
+ Entry->isSame(TE->Scalars);
+ })) {
+ OrderedEntries.insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), CurrentOrder);
+ return;
+ }
+ }
+ if (Optional<OrdersType> CurrentOrder =
+ findReusedOrderedScalars(*TE.get())) {
+ OrderedEntries.insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ }
+ }
+ });
+
+ // Checks if the operands of the users are reordarable and have only single
+ // use.
+ auto &&CheckOperands =
+ [this, &NonVectorized](const auto &Data,
+ SmallVectorImpl<TreeEntry *> &GatherOps) {
+ for (unsigned I = 0, E = Data.first->getNumOperands(); I < E; ++I) {
+ if (any_of(Data.second,
+ [I](const std::pair<unsigned, TreeEntry *> &OpData) {
+ return OpData.first == I &&
+ OpData.second->State == TreeEntry::Vectorize;
+ }))
+ continue;
+ ArrayRef<Value *> VL = Data.first->getOperand(I);
+ const TreeEntry *TE = nullptr;
+ const auto *It = find_if(VL, [this, &TE](Value *V) {
+ TE = getTreeEntry(V);
+ return TE;
+ });
+ if (It != VL.end() && TE->isSame(VL))
+ return false;
+ TreeEntry *Gather = nullptr;
+ if (count_if(NonVectorized, [VL, &Gather](TreeEntry *TE) {
+ assert(TE->State != TreeEntry::Vectorize &&
+ "Only non-vectorized nodes are expected.");
+ if (TE->isSame(VL)) {
+ Gather = TE;
+ return true;
+ }
+ return false;
+ }) > 1)
+ return false;
+ if (Gather)
+ GatherOps.push_back(Gather);
+ }
+ return true;
+ };
+ // 1. Propagate order to the graph nodes, which use only reordered nodes.
+ // I.e., if the node has operands, that are reordered, try to make at least
+ // one operand order in the natural order and reorder others + reorder the
+ // user node itself.
+ SmallPtrSet<const TreeEntry *, 4> Visited;
+ while (!OrderedEntries.empty()) {
+ // 1. Filter out only reordered nodes.
+ // 2. If the entry has multiple uses - skip it and jump to the next node.
+ MapVector<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
+ SmallVector<TreeEntry *> Filtered;
+ for (TreeEntry *TE : OrderedEntries) {
+ if (!(TE->State == TreeEntry::Vectorize ||
+ (TE->State == TreeEntry::NeedToGather &&
+ GathersToOrders.count(TE))) ||
+ TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
+ !all_of(drop_begin(TE->UserTreeIndices),
+ [TE](const EdgeInfo &EI) {
+ return EI.UserTE == TE->UserTreeIndices.front().UserTE;
+ }) ||
+ !Visited.insert(TE).second) {
+ Filtered.push_back(TE);
+ continue;
+ }
+ // Build a map between user nodes and their operands order to speedup
+ // search. The graph currently does not provide this dependency directly.
+ for (EdgeInfo &EI : TE->UserTreeIndices) {
+ TreeEntry *UserTE = EI.UserTE;
+ auto It = Users.find(UserTE);
+ if (It == Users.end())
+ It = Users.insert({UserTE, {}}).first;
+ It->second.emplace_back(EI.EdgeIdx, TE);
+ }
+ }
+ // Erase filtered entries.
+ for_each(Filtered,
+ [&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); });
+ for (const auto &Data : Users) {
+ // Check that operands are used only in the User node.
+ SmallVector<TreeEntry *> GatherOps;
+ if (!CheckOperands(Data, GatherOps)) {
+ for_each(Data.second,
+ [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
+ OrderedEntries.remove(Op.second);
+ });
+ continue;
+ }
+ // All operands are reordered and used only in this node - propagate the
+ // most used order to the user node.
+ MapVector<OrdersType, unsigned,
+ DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
+ OrdersUses;
+ SmallPtrSet<const TreeEntry *, 4> VisitedOps;
+ for (const auto &Op : Data.second) {
+ TreeEntry *OpTE = Op.second;
+ if (!OpTE->ReuseShuffleIndices.empty() ||
+ (IgnoreReorder && OpTE == VectorizableTree.front().get()))
+ continue;
+ const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
+ if (OpTE->State == TreeEntry::NeedToGather)
+ return GathersToOrders.find(OpTE)->second;
+ return OpTE->ReorderIndices;
+ }();
+ // Stores actually store the mask, not the order, need to invert.
+ if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
+ OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
+ SmallVector<int> Mask;
+ inversePermutation(Order, Mask);
+ unsigned E = Order.size();
+ OrdersType CurrentOrder(E, E);
+ transform(Mask, CurrentOrder.begin(), [E](int Idx) {
+ return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
+ });
+ fixupOrderingIndices(CurrentOrder);
+ ++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
+ } else {
+ ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
+ }
+ if (VisitedOps.insert(OpTE).second)
+ OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+ OpTE->UserTreeIndices.size();
+ assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
+ --OrdersUses[{}];
+ }
+ // If no orders - skip current nodes and jump to the next one, if any.
+ if (OrdersUses.empty()) {
+ for_each(Data.second,
+ [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
+ OrderedEntries.remove(Op.second);
+ });
+ continue;
+ }
+ // Choose the best order.
+ ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
+ unsigned Cnt = OrdersUses.front().second;
+ for (const auto &Pair : drop_begin(OrdersUses)) {
+ if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
+ BestOrder = Pair.first;
+ Cnt = Pair.second;
+ }
+ }
+ // Set order of the user node (reordering of operands and user nodes).
+ if (BestOrder.empty()) {
+ for_each(Data.second,
+ [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
+ OrderedEntries.remove(Op.second);
+ });
+ continue;
+ }
+ // Erase operands from OrderedEntries list and adjust their orders.
+ VisitedOps.clear();
+ SmallVector<int> Mask;
+ inversePermutation(BestOrder, Mask);
+ SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
+ unsigned E = BestOrder.size();
+ transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
+ return I < E ? static_cast<int>(I) : UndefMaskElem;
+ });
+ for (const std::pair<unsigned, TreeEntry *> &Op : Data.second) {
+ TreeEntry *TE = Op.second;
+ OrderedEntries.remove(TE);
+ if (!VisitedOps.insert(TE).second)
+ continue;
+ if (!TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty()) {
+ // Just reorder reuses indices.
+ reorderReuses(TE->ReuseShuffleIndices, Mask);
+ continue;
+ }
+ // Gathers are processed separately.
+ if (TE->State != TreeEntry::Vectorize)
+ continue;
+ assert((BestOrder.size() == TE->ReorderIndices.size() ||
+ TE->ReorderIndices.empty()) &&
+ "Non-matching sizes of user/operand entries.");
+ reorderOrder(TE->ReorderIndices, Mask);
+ }
+ // For gathers just need to reorder its scalars.
+ for (TreeEntry *Gather : GatherOps) {
+ assert(Gather->ReorderIndices.empty() &&
+ "Unexpected reordering of gathers.");
+ if (!Gather->ReuseShuffleIndices.empty()) {
+ // Just reorder reuses indices.
+ reorderReuses(Gather->ReuseShuffleIndices, Mask);
+ continue;
+ }
+ reorderScalars(Gather->Scalars, Mask);
+ OrderedEntries.remove(Gather);
+ }
+ // Reorder operands of the user node and set the ordering for the user
+ // node itself.
+ if (Data.first->State != TreeEntry::Vectorize ||
+ !isa<ExtractElementInst, ExtractValueInst, LoadInst>(
+ Data.first->getMainOp()) ||
+ Data.first->isAltShuffle())
+ Data.first->reorderOperands(Mask);
+ if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) ||
+ Data.first->isAltShuffle()) {
+ reorderScalars(Data.first->Scalars, Mask);
+ reorderOrder(Data.first->ReorderIndices, MaskOrder);
+ if (Data.first->ReuseShuffleIndices.empty() &&
+ !Data.first->ReorderIndices.empty() &&
+ !Data.first->isAltShuffle()) {
+ // Insert user node to the list to try to sink reordering deeper in
+ // the graph.
+ OrderedEntries.insert(Data.first);
+ }
+ } else {
+ reorderOrder(Data.first->ReorderIndices, Mask);
+ }
+ }
+ }
+ // If the reordering is unnecessary, just remove the reorder.
+ if (IgnoreReorder && !VectorizableTree.front()->ReorderIndices.empty() &&
+ VectorizableTree.front()->ReuseShuffleIndices.empty())
+ VectorizableTree.front()->ReorderIndices.clear();
+}
+
+void BoUpSLP::buildExternalUses(
+ const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// Collect the values that we need to extract from the tree.
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
@@ -2636,6 +3215,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
if (!UserInst)
continue;
+ if (isDeleted(UserInst))
+ continue;
+
// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {
Value *UseScalar = UseEntry->Scalars[0];
@@ -2664,14 +3246,120 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
}
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
+ ArrayRef<Value *> UserIgnoreLst) {
+ deleteTree();
+ UserIgnoreList = UserIgnoreLst;
+ if (!allSameType(Roots))
+ return;
+ buildTree_rec(Roots, 0, EdgeInfo());
+}
+
+namespace {
+/// Tracks the state we can represent the loads in the given sequence.
+enum class LoadsState { Gather, Vectorize, ScatterVectorize };
+} // anonymous namespace
+
+/// Checks if the given array of loads can be represented as a vectorized,
+/// scatter or just simple gather.
+static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<Value *> &PointerOps) {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load. For example, we don't want to vectorize loads that are smaller
+ // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
+ // treats loading/storing it as an i8 struct. If we vectorize loads/stores
+ // from such a struct, we read/write packed bits disagreeing with the
+ // unvectorized version.
+ Type *ScalarTy = VL0->getType();
+
+ if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy))
+ return LoadsState::Gather;
+
+ // Make sure all loads in the bundle are simple - we can't vectorize
+ // atomic or volatile loads.
+ PointerOps.clear();
+ PointerOps.resize(VL.size());
+ auto *POIter = PointerOps.begin();
+ for (Value *V : VL) {
+ auto *L = cast<LoadInst>(V);
+ if (!L->isSimple())
+ return LoadsState::Gather;
+ *POIter = L->getPointerOperand();
+ ++POIter;
+ }
+
+ Order.clear();
+ // Check the order of pointer operands.
+ if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) {
+ Value *Ptr0;
+ Value *PtrN;
+ if (Order.empty()) {
+ Ptr0 = PointerOps.front();
+ PtrN = PointerOps.back();
+ } else {
+ Ptr0 = PointerOps[Order.front()];
+ PtrN = PointerOps[Order.back()];
+ }
+ Optional<int> Diff =
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
+ // Check that the sorted loads are consecutive.
+ if (static_cast<unsigned>(*Diff) == VL.size() - 1)
+ return LoadsState::Vectorize;
+ Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
+ for (Value *V : VL)
+ CommonAlignment =
+ commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
+ CommonAlignment))
+ return LoadsState::ScatterVectorize;
+ }
+
+ return LoadsState::Gather;
+}
+
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
+ SmallVector<int> ReuseShuffleIndicies;
+ SmallVector<Value *> UniqueValues;
+ auto &&TryToFindDuplicates = [&VL, &ReuseShuffleIndicies, &UniqueValues,
+ &UserTreeIdx,
+ this](const InstructionsState &S) {
+ // Check that every instruction appears once in this bundle.
+ DenseMap<Value *, unsigned> UniquePositions;
+ for (Value *V : VL) {
+ auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
+ ReuseShuffleIndicies.emplace_back(isa<UndefValue>(V) ? -1
+ : Res.first->second);
+ if (Res.second)
+ UniqueValues.emplace_back(V);
+ }
+ size_t NumUniqueScalarValues = UniqueValues.size();
+ if (NumUniqueScalarValues == VL.size()) {
+ ReuseShuffleIndicies.clear();
+ } else {
+ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
+ if (NumUniqueScalarValues <= 1 ||
+ !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ return false;
+ }
+ VL = UniqueValues;
+ }
+ return true;
+ };
+
InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
@@ -2680,7 +3368,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
isa<ScalableVectorType>(
cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
@@ -2700,9 +3390,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// If all of the operands are identical or constant we have a simple solution.
- if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
+ // If we deal with insert/extract instructions, they all must have constant
+ // indices, otherwise we should gather them, not try to vectorize.
+ if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() ||
+ (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(S.MainOp) &&
+ !all_of(VL, isVectorLikeInstWithConstOps))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
@@ -2724,7 +3420,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
// Record the reuse of the tree node. FIXME, currently this is only used to
@@ -2743,7 +3441,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (getTreeEntry(I)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
@@ -2754,7 +3454,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *V : VL) {
if (MustGather.count(V) || is_contained(UserIgnoreList, V)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
@@ -2773,28 +3475,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Check that every instruction appears once in this bundle.
- SmallVector<unsigned, 4> ReuseShuffleIndicies;
- SmallVector<Value *, 4> UniqueValues;
- DenseMap<Value *, unsigned> UniquePositions;
- for (Value *V : VL) {
- auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
- ReuseShuffleIndicies.emplace_back(Res.first->second);
- if (Res.second)
- UniqueValues.emplace_back(V);
- }
- size_t NumUniqueScalarValues = UniqueValues.size();
- if (NumUniqueScalarValues == VL.size()) {
- ReuseShuffleIndicies.clear();
- } else {
- LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
- if (NumUniqueScalarValues <= 1 ||
- !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
- return;
- }
- VL = UniqueValues;
- }
+ if (!TryToFindDuplicates(S))
+ return;
auto &BSRef = BlocksSchedules[BB];
if (!BSRef)
@@ -2867,7 +3549,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
if (Reuse) {
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
- ++NumOpsWantToKeepOriginalOrder;
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
// This is a special case, as it does not gather, but at the same time
@@ -2885,12 +3566,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
dbgs() << " " << Idx;
dbgs() << "\n";
});
+ fixupOrderingIndices(CurrentOrder);
// Insert new order with initial value 0, if it does not exist,
// otherwise return the iterator to the existing one.
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
- findRootOrder(CurrentOrder);
- ++NumOpsWantToKeepOrder[CurrentOrder];
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
@@ -2910,8 +3590,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that we have a buildvector and not a shuffle of 2 or more
// different vectors.
ValueSet SourceVectors;
- for (Value *V : VL)
+ int MinIdx = std::numeric_limits<int>::max();
+ for (Value *V : VL) {
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
+ Optional<int> Idx = *getInsertIndex(V, 0);
+ if (!Idx || *Idx == UndefMaskElem)
+ continue;
+ MinIdx = std::min(MinIdx, *Idx);
+ }
if (count_if(VL, [&SourceVectors](Value *V) {
return !SourceVectors.contains(V);
@@ -2919,13 +3605,35 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Found 2nd source vector - cancel.
LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
"different source vectors.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
BS.cancelScheduling(VL, VL0);
return;
}
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx);
+ auto OrdCompare = [](const std::pair<int, int> &P1,
+ const std::pair<int, int> &P2) {
+ return P1.first > P2.first;
+ };
+ PriorityQueue<std::pair<int, int>, SmallVector<std::pair<int, int>>,
+ decltype(OrdCompare)>
+ Indices(OrdCompare);
+ for (int I = 0, E = VL.size(); I < E; ++I) {
+ Optional<int> Idx = *getInsertIndex(VL[I], 0);
+ if (!Idx || *Idx == UndefMaskElem)
+ continue;
+ Indices.emplace(*Idx, I);
+ }
+ OrdersType CurrentOrder(VL.size(), VL.size());
+ bool IsIdentity = true;
+ for (int I = 0, E = VL.size(); I < E; ++I) {
+ CurrentOrder[Indices.top().second] = I;
+ IsIdentity &= Indices.top().second == I;
+ Indices.pop();
+ }
+ if (IsIdentity)
+ CurrentOrder.clear();
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ None, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
constexpr int NumOps = 2;
@@ -2936,7 +3644,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TE->setOperand(I, VectorOperands[I]);
}
- buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, 0});
+ buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, NumOps - 1});
return;
}
case Instruction::Load: {
@@ -2946,90 +3654,52 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
- Type *ScalarTy = VL0->getType();
-
- if (DL->getTypeSizeInBits(ScalarTy) !=
- DL->getTypeAllocSizeInBits(ScalarTy)) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
- return;
- }
-
- // Make sure all loads in the bundle are simple - we can't vectorize
- // atomic or volatile loads.
- SmallVector<Value *, 4> PointerOps(VL.size());
- auto POIter = PointerOps.begin();
- for (Value *V : VL) {
- auto *L = cast<LoadInst>(V);
- if (!L->isSimple()) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
- return;
- }
- *POIter = L->getPointerOperand();
- ++POIter;
- }
-
+ SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
- // Check the order of pointer operands.
- if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
- Value *Ptr0;
- Value *PtrN;
+ TreeEntry *TE = nullptr;
+ switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, CurrentOrder,
+ PointerOps)) {
+ case LoadsState::Vectorize:
if (CurrentOrder.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
+ // Original loads are consecutive and does not require reordering.
+ TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
- Ptr0 = PointerOps[CurrentOrder.front()];
- PtrN = PointerOps[CurrentOrder.back()];
- }
- Optional<int> Diff = getPointersDiff(
- ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
- // Check that the sorted loads are consecutive.
- if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
- if (CurrentOrder.empty()) {
- // Original loads are consecutive and does not require reordering.
- ++NumOpsWantToKeepOriginalOrder;
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
- UserTreeIdx, ReuseShuffleIndicies);
- TE->setOperandsInOrder();
- LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
- } else {
- // Need to reorder.
- TreeEntry *TE =
- newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies, CurrentOrder);
- TE->setOperandsInOrder();
- LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
- findRootOrder(CurrentOrder);
- ++NumOpsWantToKeepOrder[CurrentOrder];
- }
- return;
- }
- Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
- for (Value *V : VL)
- CommonAlignment =
- commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
- if (TTI->isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
- CommonAlignment)) {
- // Vectorizing non-consecutive loads with `llvm.masked.gather`.
- TreeEntry *TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle,
- S, UserTreeIdx, ReuseShuffleIndicies);
- TE->setOperandsInOrder();
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
- LLVM_DEBUG(dbgs()
- << "SLP: added a vector of non-consecutive loads.\n");
- return;
+ fixupOrderingIndices(CurrentOrder);
+ // Need to reorder.
+ TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies, CurrentOrder);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}
+ TE->setOperandsInOrder();
+ break;
+ case LoadsState::ScatterVectorize:
+ // Vectorizing non-consecutive loads with `llvm.masked.gather`.
+ TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
+ UserTreeIdx, ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
+ break;
+ case LoadsState::Gather:
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+#ifndef NDEBUG
+ Type *ScalarTy = VL0->getType();
+ if (DL->getTypeSizeInBits(ScalarTy) !=
+ DL->getTypeAllocSizeInBits(ScalarTy))
+ LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+ else if (any_of(VL, [](Value *V) {
+ return !cast<LoadInst>(V)->isSimple();
+ }))
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
+ else
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+#endif // NDEBUG
+ break;
}
-
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
return;
}
case Instruction::ZExt:
@@ -3213,15 +3883,40 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
- TE->setOperandsInOrder();
- for (unsigned i = 0, e = 2; i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL)
- Operands.push_back(cast<Instruction>(V)->getOperand(i));
-
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ SmallVector<ValueList, 2> Operands(2);
+ // Prepare the operand vector for pointer operands.
+ for (Value *V : VL)
+ Operands.front().push_back(
+ cast<GetElementPtrInst>(V)->getPointerOperand());
+ TE->setOperand(0, Operands.front());
+ // Need to cast all indices to the same type before vectorization to
+ // avoid crash.
+ // Required to be able to find correct matches between different gather
+ // nodes and reuse the vectorized values rather than trying to gather them
+ // again.
+ int IndexIdx = 1;
+ Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
+ Type *Ty = all_of(VL,
+ [VL0Ty, IndexIdx](Value *V) {
+ return VL0Ty == cast<GetElementPtrInst>(V)
+ ->getOperand(IndexIdx)
+ ->getType();
+ })
+ ? VL0Ty
+ : DL->getIndexType(cast<GetElementPtrInst>(VL0)
+ ->getPointerOperandType()
+ ->getScalarType());
+ // Prepare the operand vector.
+ for (Value *V : VL) {
+ auto *Op = cast<Instruction>(V)->getOperand(IndexIdx);
+ auto *CI = cast<ConstantInt>(Op);
+ Operands.back().push_back(ConstantExpr::getIntegerCast(
+ CI, Ty, CI->getValue().isSignBitSet()));
}
+ TE->setOperand(IndexIdx, Operands.back());
+
+ for (unsigned I = 0, Ops = Operands.size(); I < Ops; ++I)
+ buildTree_rec(Operands[I], Depth + 1, {TE, I});
return;
}
case Instruction::Store: {
@@ -3276,21 +3971,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
if (CurrentOrder.empty()) {
// Original stores are consecutive and does not require reordering.
- ++NumOpsWantToKeepOriginalOrder;
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
UserTreeIdx, ReuseShuffleIndicies);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
} else {
+ fixupOrderingIndices(CurrentOrder);
TreeEntry *TE =
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
- findRootOrder(CurrentOrder);
- ++NumOpsWantToKeepOrder[CurrentOrder];
}
return;
}
@@ -3321,7 +4014,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
Function *F = CI->getCalledFunction();
- unsigned NumArgs = CI->getNumArgOperands();
+ unsigned NumArgs = CI->arg_size();
SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
for (unsigned j = 0; j != NumArgs; ++j)
if (hasVectorInstrinsicScalarOpd(ID, j))
@@ -3373,7 +4066,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
TE->setOperandsInOrder();
- for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
+ // For scalar operands no need to to create an entry since no need to
+ // vectorize it.
+ if (hasVectorInstrinsicScalarOpd(ID, i))
+ continue;
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL) {
@@ -3548,7 +4245,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
FastMathFlags FMF;
if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
FMF = FPCI->getFastMathFlags();
- SmallVector<const Value *> Arguments(CI->arg_begin(), CI->arg_end());
+ SmallVector<const Value *> Arguments(CI->args());
IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
dyn_cast<IntrinsicInst>(CI));
auto IntrinsicCost =
@@ -3621,25 +4318,42 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
return Cost;
}
-/// Shuffles \p Mask in accordance with the given \p SubMask.
-static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
- if (SubMask.empty())
- return;
- if (Mask.empty()) {
- Mask.append(SubMask.begin(), SubMask.end());
- return;
- }
- SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
- int TermValue = std::min(Mask.size(), SubMask.size());
- for (int I = 0, E = SubMask.size(); I < E; ++I) {
- if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
- Mask[SubMask[I]] >= TermValue) {
- NewMask[I] = UndefMaskElem;
- continue;
+/// Build shuffle mask for shuffle graph entries and lists of main and alternate
+/// operations operands.
+static void
+buildSuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
+ ArrayRef<int> ReusesIndices,
+ const function_ref<bool(Instruction *)> IsAltOp,
+ SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<Value *> *OpScalars = nullptr,
+ SmallVectorImpl<Value *> *AltScalars = nullptr) {
+ unsigned Sz = VL.size();
+ Mask.assign(Sz, UndefMaskElem);
+ SmallVector<int> OrderMask;
+ if (!ReorderIndices.empty())
+ inversePermutation(ReorderIndices, OrderMask);
+ for (unsigned I = 0; I < Sz; ++I) {
+ unsigned Idx = I;
+ if (!ReorderIndices.empty())
+ Idx = OrderMask[I];
+ auto *OpInst = cast<Instruction>(VL[Idx]);
+ if (IsAltOp(OpInst)) {
+ Mask[I] = Sz + Idx;
+ if (AltScalars)
+ AltScalars->push_back(OpInst);
+ } else {
+ Mask[I] = Idx;
+ if (OpScalars)
+ OpScalars->push_back(OpInst);
}
- NewMask[I] = Mask[SubMask[I]];
}
- Mask.swap(NewMask);
+ if (!ReusesIndices.empty()) {
+ SmallVector<int> NewMask(ReusesIndices.size(), UndefMaskElem);
+ transform(ReusesIndices, NewMask.begin(), [&Mask](int Idx) {
+ return Idx != UndefMaskElem ? Mask[Idx] : UndefMaskElem;
+ });
+ Mask.swap(NewMask);
+ }
}
InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
@@ -3661,13 +4375,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (MinBWs.count(VL[0]))
VecTy = FixedVectorType::get(
IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
- auto *FinalVecTy = VecTy;
+ unsigned EntryVF = E->getVectorFactor();
+ auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
- unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- if (NeedToShuffleReuses)
- FinalVecTy =
- FixedVectorType::get(VecTy->getElementType(), ReuseShuffleNumbers);
// FIXME: it tries to fix a problem with MSVC buildbots.
TargetTransformInfo &TTIRef = *TTI;
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
@@ -3785,7 +4496,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
- isShuffle(VL, Mask);
+ isFixedVectorShuffle(VL, Mask);
if (ShuffleKind.hasValue()) {
// Found the bunch of extractelement instructions that must be gathered
// into a vector and can be represented as a permutation elements in a
@@ -3803,6 +4514,92 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (NeedToShuffleReuses)
ReuseShuffleCost = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
+ // Improve gather cost for gather of loads, if we can group some of the
+ // loads into vector loads.
+ if (VL.size() > 2 && E->getOpcode() == Instruction::Load &&
+ !E->isAltShuffle()) {
+ BoUpSLP::ValueSet VectorizedLoads;
+ unsigned StartIdx = 0;
+ unsigned VF = VL.size() / 2;
+ unsigned VectorizedCnt = 0;
+ unsigned ScatterVectorizeCnt = 0;
+ const unsigned Sz = DL->getTypeSizeInBits(E->getMainOp()->getType());
+ for (unsigned MinVF = getMinVF(2 * Sz); VF >= MinVF; VF /= 2) {
+ for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
+ Cnt += VF) {
+ ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
+ if (!VectorizedLoads.count(Slice.front()) &&
+ !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
+ SmallVector<Value *> PointerOps;
+ OrdersType CurrentOrder;
+ LoadsState LS = canVectorizeLoads(Slice, Slice.front(), *TTI, *DL,
+ *SE, CurrentOrder, PointerOps);
+ switch (LS) {
+ case LoadsState::Vectorize:
+ case LoadsState::ScatterVectorize:
+ // Mark the vectorized loads so that we don't vectorize them
+ // again.
+ if (LS == LoadsState::Vectorize)
+ ++VectorizedCnt;
+ else
+ ++ScatterVectorizeCnt;
+ VectorizedLoads.insert(Slice.begin(), Slice.end());
+ // If we vectorized initial block, no need to try to vectorize it
+ // again.
+ if (Cnt == StartIdx)
+ StartIdx += VF;
+ break;
+ case LoadsState::Gather:
+ break;
+ }
+ }
+ }
+ // Check if the whole array was vectorized already - exit.
+ if (StartIdx >= VL.size())
+ break;
+ // Found vectorizable parts - exit.
+ if (!VectorizedLoads.empty())
+ break;
+ }
+ if (!VectorizedLoads.empty()) {
+ InstructionCost GatherCost = 0;
+ unsigned NumParts = TTI->getNumberOfParts(VecTy);
+ bool NeedInsertSubvectorAnalysis =
+ !NumParts || (VL.size() / VF) > NumParts;
+ // Get the cost for gathered loads.
+ for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
+ if (VectorizedLoads.contains(VL[I]))
+ continue;
+ GatherCost += getGatherCost(VL.slice(I, VF));
+ }
+ // The cost for vectorized loads.
+ InstructionCost ScalarsCost = 0;
+ for (Value *V : VectorizedLoads) {
+ auto *LI = cast<LoadInst>(V);
+ ScalarsCost += TTI->getMemoryOpCost(
+ Instruction::Load, LI->getType(), LI->getAlign(),
+ LI->getPointerAddressSpace(), CostKind, LI);
+ }
+ auto *LI = cast<LoadInst>(E->getMainOp());
+ auto *LoadTy = FixedVectorType::get(LI->getType(), VF);
+ Align Alignment = LI->getAlign();
+ GatherCost +=
+ VectorizedCnt *
+ TTI->getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
+ LI->getPointerAddressSpace(), CostKind, LI);
+ GatherCost += ScatterVectorizeCnt *
+ TTI->getGatherScatterOpCost(
+ Instruction::Load, LoadTy, LI->getPointerOperand(),
+ /*VariableMask=*/false, Alignment, CostKind, LI);
+ if (NeedInsertSubvectorAnalysis) {
+ // Add the cost for the subvectors insert.
+ for (int I = VF, E = VL.size(); I < E; I += VF)
+ GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy,
+ None, I, LoadTy);
+ }
+ return ReuseShuffleCost + GatherCost - ScalarsCost;
+ }
+ }
return ReuseShuffleCost + getGatherCost(VL);
}
InstructionCost CommonCost = 0;
@@ -3852,7 +4649,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
++Idx;
}
}
- Idx = ReuseShuffleNumbers;
+ Idx = EntryVF;
for (Value *V : VL) {
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(V);
@@ -3895,29 +4692,33 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
return CommonCost;
}
case Instruction::InsertElement: {
+ assert(E->ReuseShuffleIndices.empty() &&
+ "Unique insertelements only are expected.");
auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
unsigned const NumElts = SrcVecTy->getNumElements();
unsigned const NumScalars = VL.size();
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
// TODO: Add support for Instruction::InsertValue.
- unsigned Offset = UINT_MAX;
+ SmallVector<int> Mask;
+ if (!E->ReorderIndices.empty()) {
+ inversePermutation(E->ReorderIndices, Mask);
+ Mask.append(NumElts - NumScalars, UndefMaskElem);
+ } else {
+ Mask.assign(NumElts, UndefMaskElem);
+ std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ }
+ unsigned Offset = *getInsertIndex(VL0, 0);
bool IsIdentity = true;
- SmallVector<int> ShuffleMask(NumElts, UndefMaskElem);
+ SmallVector<int> PrevMask(NumElts, UndefMaskElem);
+ Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
- Optional<int> InsertIdx = getInsertIndex(VL[I], 0);
+ Optional<int> InsertIdx = getInsertIndex(VL[PrevMask[I]], 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
continue;
- unsigned Idx = *InsertIdx;
- DemandedElts.setBit(Idx);
- if (Idx < Offset) {
- Offset = Idx;
- IsIdentity &= I == 0;
- } else {
- assert(Idx >= Offset && "Failed to find vector index offset");
- IsIdentity &= Idx - Offset == I;
- }
- ShuffleMask[Idx] = I;
+ DemandedElts.setBit(*InsertIdx);
+ IsIdentity &= *InsertIdx - Offset == I;
+ Mask[*InsertIdx - Offset] = I;
}
assert(Offset < NumElts && "Failed to find vector index offset");
@@ -3932,8 +4733,23 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TargetTransformInfo::SK_PermuteSingleSrc,
FixedVectorType::get(SrcVecTy->getElementType(), Sz));
} else if (!IsIdentity) {
- Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy,
- ShuffleMask);
+ auto *FirstInsert =
+ cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
+ return !is_contained(E->Scalars,
+ cast<Instruction>(V)->getOperand(0));
+ }));
+ if (isa<UndefValue>(FirstInsert->getOperand(0))) {
+ Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask);
+ } else {
+ SmallVector<int> InsertMask(NumElts);
+ std::iota(InsertMask.begin(), InsertMask.end(), 0);
+ for (unsigned I = 0; I < NumElts; I++) {
+ if (Mask[I] != UndefMaskElem)
+ InsertMask[Offset + I] = NumElts + I;
+ }
+ Cost +=
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVecTy, InsertMask);
+ }
}
return Cost;
@@ -3955,7 +4771,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
// Calculate the cost of this instruction.
@@ -3980,7 +4796,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
@@ -4085,7 +4901,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost =
@@ -4103,7 +4919,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost = TTI->getArithmeticInstrCost(
@@ -4117,7 +4933,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
@@ -4160,7 +4976,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarEltCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
@@ -4215,14 +5031,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI::CastContextHint::None, CostKind);
}
- SmallVector<int> Mask(E->Scalars.size());
- for (unsigned I = 0, End = E->Scalars.size(); I < End; ++I) {
- auto *OpInst = cast<Instruction>(E->Scalars[I]);
- assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
- Mask[I] = I + (OpInst->getOpcode() == E->getAltOpcode() ? End : 0);
- }
- VecCost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, 0);
+ SmallVector<int> Mask;
+ buildSuffleEntryMask(
+ E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+ [E](Instruction *I) {
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ return I->getOpcode() == E->getAltOpcode();
+ },
+ Mask);
+ CommonCost =
+ TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy, Mask);
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
@@ -4231,13 +5049,30 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
}
}
-bool BoUpSLP::isFullyVectorizableTinyTree() const {
+bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
<< VectorizableTree.size() << " is fully vectorizable .\n");
+ auto &&AreVectorizableGathers = [this](const TreeEntry *TE, unsigned Limit) {
+ SmallVector<int> Mask;
+ return TE->State == TreeEntry::NeedToGather &&
+ !any_of(TE->Scalars,
+ [this](Value *V) { return EphValues.contains(V); }) &&
+ (allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
+ TE->Scalars.size() < Limit ||
+ (TE->getOpcode() == Instruction::ExtractElement &&
+ isFixedVectorShuffle(TE->Scalars, Mask)) ||
+ (TE->State == TreeEntry::NeedToGather &&
+ TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()));
+ };
+
// We only handle trees of heights 1 and 2.
if (VectorizableTree.size() == 1 &&
- VectorizableTree[0]->State == TreeEntry::Vectorize)
+ (VectorizableTree[0]->State == TreeEntry::Vectorize ||
+ (ForReduction &&
+ AreVectorizableGathers(VectorizableTree[0].get(),
+ VectorizableTree[0]->Scalars.size()) &&
+ VectorizableTree[0]->getVectorFactor() > 2)))
return true;
if (VectorizableTree.size() != 2)
@@ -4249,19 +5084,14 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
// or they are extractelements, which form shuffle.
SmallVector<int> Mask;
if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
- (allConstant(VectorizableTree[1]->Scalars) ||
- isSplat(VectorizableTree[1]->Scalars) ||
- (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
- VectorizableTree[1]->Scalars.size() <
- VectorizableTree[0]->Scalars.size()) ||
- (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
- VectorizableTree[1]->getOpcode() == Instruction::ExtractElement &&
- isShuffle(VectorizableTree[1]->Scalars, Mask))))
+ AreVectorizableGathers(VectorizableTree[1].get(),
+ VectorizableTree[0]->Scalars.size()))
return true;
// Gathering cost would be too much for tiny trees.
if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
- VectorizableTree[1]->State == TreeEntry::NeedToGather)
+ (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
+ VectorizableTree[0]->State != TreeEntry::ScatterVectorize))
return false;
return true;
@@ -4330,7 +5160,7 @@ bool BoUpSLP::isLoadCombineCandidate() const {
return true;
}
-bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
+bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// No need to vectorize inserts of gathered values.
if (VectorizableTree.size() == 2 &&
isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
@@ -4344,7 +5174,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
// If we have a tiny tree (a tree whose size is less than MinTreeSize), we
// can vectorize it if we can prove it fully vectorizable.
- if (isFullyVectorizableTinyTree())
+ if (isFullyVectorizableTinyTree(ForReduction))
return false;
assert(VectorizableTree.empty()
@@ -4496,7 +5326,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
- if (EU.User && isa<InsertElementInst>(EU.User)) {
+ if (isa_and_nonnull<InsertElementInst>(EU.User)) {
if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) {
Optional<int> InsertIdx = getInsertIndex(EU.User, 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
@@ -4508,8 +5338,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
return false;
auto *IE1 = cast<InsertElementInst>(VU);
auto *IE2 = cast<InsertElementInst>(V);
- // Go though of insertelement instructions trying to find either VU as
- // the original vector for IE2 or V as the original vector for IE1.
+ // Go through of insertelement instructions trying to find either VU
+ // as the original vector for IE2 or V as the original vector for IE1.
do {
if (IE1 == VU || IE2 == V)
return true;
@@ -4525,7 +5355,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
VF.push_back(FTy->getNumElements());
ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
FirstUsers.push_back(EU.User);
- DemandedElts.push_back(APInt::getNullValue(VF.back()));
+ DemandedElts.push_back(APInt::getZero(VF.back()));
VecId = FirstUsers.size() - 1;
} else {
VecId = std::distance(FirstUsers.begin(), It);
@@ -4705,18 +5535,11 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
} else {
// Try to find nodes with the same vector factor.
assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
- // FIXME: Shall be replaced by GetVF function once non-power-2 patch is
- // landed.
- auto &&GetVF = [](const TreeEntry *TE) {
- if (!TE->ReuseShuffleIndices.empty())
- return TE->ReuseShuffleIndices.size();
- return TE->Scalars.size();
- };
DenseMap<int, const TreeEntry *> VFToTE;
for (const TreeEntry *TE : UsedTEs.front())
- VFToTE.try_emplace(GetVF(TE), TE);
+ VFToTE.try_emplace(TE->getVectorFactor(), TE);
for (const TreeEntry *TE : UsedTEs.back()) {
- auto It = VFToTE.find(GetVF(TE));
+ auto It = VFToTE.find(TE->getVectorFactor());
if (It != VFToTE.end()) {
VF = It->first;
Entries.push_back(It->second);
@@ -4757,16 +5580,17 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
InstructionCost
BoUpSLP::getGatherCost(FixedVectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices) const {
+ const DenseSet<unsigned> &ShuffledIndices,
+ bool NeedToShuffle) const {
unsigned NumElts = Ty->getNumElements();
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (unsigned I = 0; I < NumElts; ++I)
if (!ShuffledIndices.count(I))
DemandedElts.setBit(I);
InstructionCost Cost =
TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
- if (!ShuffledIndices.empty())
+ if (NeedToShuffle)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
@@ -4777,6 +5601,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
ScalarTy = SI->getValueOperand()->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ bool DuplicateNonConst = false;
// Find the cost of inserting/extracting values from the vector.
// Check if the same elements are inserted several times and count them as
// shuffle candidates.
@@ -4785,12 +5610,17 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Iterate in reverse order to consider insert elements with the high cost.
for (unsigned I = VL.size(); I > 0; --I) {
unsigned Idx = I - 1;
- if (isConstant(VL[Idx]))
+ // No need to shuffle duplicates for constants.
+ if (isConstant(VL[Idx])) {
+ ShuffledElements.insert(Idx);
continue;
- if (!UniqueElements.insert(VL[Idx]).second)
+ }
+ if (!UniqueElements.insert(VL[Idx]).second) {
+ DuplicateNonConst = true;
ShuffledElements.insert(Idx);
+ }
}
- return getGatherCost(VecTy, ShuffledElements);
+ return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst);
}
// Perform operand reordering on the instructions in VL and return the reordered
@@ -5006,17 +5836,18 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
// block:
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
- // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
+ // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
// ... (use %2)
- // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
+ // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
// br %block
- SmallVector<int> UniqueIdxs;
+ SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
SmallSet<int, 4> UsedIdxs;
int Pos = 0;
int Sz = VL.size();
for (int Idx : E->ReuseShuffleIndices) {
- if (Idx != Sz && UsedIdxs.insert(Idx).second)
- UniqueIdxs.emplace_back(Pos);
+ if (Idx != Sz && Idx != UndefMaskElem &&
+ UsedIdxs.insert(Idx).second)
+ UniqueIdxs[Idx] = Pos;
++Pos;
}
assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
@@ -5047,11 +5878,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}).base());
VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
int UniqueVals = 0;
- bool HasUndefs = false;
for (Value *V : VL.drop_back(VL.size() - VF)) {
if (isa<UndefValue>(V)) {
ReuseShuffleIndicies.emplace_back(UndefMaskElem);
- HasUndefs = true;
continue;
}
if (isConstant(V)) {
@@ -5066,15 +5895,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
++UniqueVals;
}
}
- if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) {
+ if (UniqueVals == 1 && UniqueValues.size() == 1) {
// Emit pure splat vector.
- // FIXME: why it is not identified as an identity.
- unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem);
- if (NumUndefs == ReuseShuffleIndicies.size() - 1)
- ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
- UndefMaskElem);
- else
- ReuseShuffleIndicies.assign(VF, 0);
+ ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
+ UndefMaskElem);
} else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
ReuseShuffleIndicies.clear();
UniqueValues.clear();
@@ -5107,12 +5931,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- unsigned VF = E->Scalars.size();
- if (NeedToShuffleReuses)
- VF = E->ReuseShuffleIndices.size();
+ unsigned VF = E->getVectorFactor();
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
if (E->State == TreeEntry::NeedToGather) {
- setInsertPointAfterBundle(E);
+ if (E->getMainOp())
+ setInsertPointAfterBundle(E);
Value *Vec;
SmallVector<int> Mask;
SmallVector<const TreeEntry *> Entries;
@@ -5152,13 +5975,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
+ assert(
+ (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) &&
+ "PHI reordering is free.");
auto *PH = cast<PHINode>(VL0);
Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
Value *V = NewPhi;
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
@@ -5209,53 +6036,48 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return NewV;
}
case Instruction::InsertElement: {
- Builder.SetInsertPoint(VL0);
+ assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
+ Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
Value *V = vectorizeTree(E->getOperand(1));
+ // Create InsertVector shuffle if necessary
+ auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
+ return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
+ }));
const unsigned NumElts =
- cast<FixedVectorType>(VL0->getType())->getNumElements();
+ cast<FixedVectorType>(FirstInsert->getType())->getNumElements();
const unsigned NumScalars = E->Scalars.size();
+ unsigned Offset = *getInsertIndex(VL0, 0);
+ assert(Offset < NumElts && "Failed to find vector index offset");
+
+ // Create shuffle to resize vector
+ SmallVector<int> Mask;
+ if (!E->ReorderIndices.empty()) {
+ inversePermutation(E->ReorderIndices, Mask);
+ Mask.append(NumElts - NumScalars, UndefMaskElem);
+ } else {
+ Mask.assign(NumElts, UndefMaskElem);
+ std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ }
// Create InsertVector shuffle if necessary
- Instruction *FirstInsert = nullptr;
bool IsIdentity = true;
- unsigned Offset = UINT_MAX;
+ SmallVector<int> PrevMask(NumElts, UndefMaskElem);
+ Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
- Value *Scalar = E->Scalars[I];
- if (!FirstInsert &&
- !is_contained(E->Scalars, cast<Instruction>(Scalar)->getOperand(0)))
- FirstInsert = cast<Instruction>(Scalar);
+ Value *Scalar = E->Scalars[PrevMask[I]];
Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
continue;
- unsigned Idx = *InsertIdx;
- if (Idx < Offset) {
- Offset = Idx;
- IsIdentity &= I == 0;
- } else {
- assert(Idx >= Offset && "Failed to find vector index offset");
- IsIdentity &= Idx - Offset == I;
- }
- }
- assert(Offset < NumElts && "Failed to find vector index offset");
-
- // Create shuffle to resize vector
- SmallVector<int> Mask(NumElts, UndefMaskElem);
- if (!IsIdentity) {
- for (unsigned I = 0; I < NumScalars; ++I) {
- Value *Scalar = E->Scalars[I];
- Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- Mask[*InsertIdx - Offset] = I;
- }
- } else {
- std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ IsIdentity &= *InsertIdx - Offset == I;
+ Mask[*InsertIdx - Offset] = I;
}
if (!IsIdentity || NumElts != NumScalars)
V = Builder.CreateShuffleVector(V, Mask);
- if (NumElts != NumScalars) {
+ if ((!IsIdentity || Offset != 0 ||
+ !isa<UndefValue>(FirstInsert->getOperand(0))) &&
+ NumElts != NumScalars) {
SmallVector<int> InsertMask(NumElts);
std::iota(InsertMask.begin(), InsertMask.end(), 0);
for (unsigned I = 0; I < NumElts; I++) {
@@ -5295,6 +6117,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5317,6 +6140,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5337,6 +6161,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateSelect(Cond, True, False);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5360,6 +6185,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5403,6 +6229,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5414,9 +6241,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- bool IsReorder = E->updateStateIfReorder();
- if (IsReorder)
- VL0 = E->getMainOp();
setInsertPointAfterBundle(E);
LoadInst *LI = cast<LoadInst>(VL0);
@@ -5430,8 +6254,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast
// to ExternalUses list to make sure that an extract will be generated
// in the future.
- if (getTreeEntry(PO))
- ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
+ if (TreeEntry *Entry = getTreeEntry(PO)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(PO);
+ ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
+ }
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
} else {
@@ -5454,9 +6281,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::Store: {
- bool IsReorder = !E->ReorderIndices.empty();
- auto *SI = cast<StoreInst>(
- IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
+ auto *SI = cast<StoreInst>(VL0);
unsigned AS = SI->getPointerAddressSpace();
setInsertPointAfterBundle(E);
@@ -5474,8 +6299,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar, so add the new BitCast to
// ExternalUses to make sure that an extract will be generated in the
// future.
- if (getTreeEntry(ScalarPtr))
- ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
+ if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
+ ExternalUses.push_back(
+ ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
+ }
Value *V = propagateMetadata(ST, E->Scalars);
@@ -5484,37 +6313,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::GetElementPtr: {
+ auto *GEP0 = cast<GetElementPtrInst>(VL0);
setInsertPointAfterBundle(E);
Value *Op0 = vectorizeTree(E->getOperand(0));
- std::vector<Value *> OpVecs;
- for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
- ++j) {
- ValueList &VL = E->getOperand(j);
- // Need to cast all elements to the same type before vectorization to
- // avoid crash.
- Type *VL0Ty = VL0->getOperand(j)->getType();
- Type *Ty = llvm::all_of(
- VL, [VL0Ty](Value *V) { return VL0Ty == V->getType(); })
- ? VL0Ty
- : DL->getIndexType(cast<GetElementPtrInst>(VL0)
- ->getPointerOperandType()
- ->getScalarType());
- for (Value *&V : VL) {
- auto *CI = cast<ConstantInt>(V);
- V = ConstantExpr::getIntegerCast(CI, Ty,
- CI->getValue().isSignBitSet());
- }
- Value *OpVec = vectorizeTree(VL);
+ SmallVector<Value *> OpVecs;
+ for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) {
+ Value *OpVec = vectorizeTree(E->getOperand(J));
OpVecs.push_back(OpVec);
}
- Value *V = Builder.CreateGEP(
- cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
+ Value *V = Builder.CreateGEP(GEP0->getSourceElementType(), Op0, OpVecs);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5541,7 +6355,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
std::vector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl =
{FixedVectorType::get(CI->getType(), E->Scalars.size())};
- for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
+ for (int j = 0, e = CI->arg_size(); j < e; ++j) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
@@ -5577,10 +6391,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
- if (ScalarArg && getTreeEntry(ScalarArg))
- ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
+ if (ScalarArg) {
+ if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
+ ExternalUses.push_back(
+ ExternalUser(ScalarArg, cast<User>(V), FoundLane));
+ }
+ }
propagateIRFlags(V, E->Scalars, VL0);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5628,19 +6449,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// Also, gather up main and alt scalar ops to propagate IR flags to
// each vector operation.
ValueList OpScalars, AltScalars;
- unsigned Sz = E->Scalars.size();
- SmallVector<int> Mask(Sz);
- for (unsigned I = 0; I < Sz; ++I) {
- auto *OpInst = cast<Instruction>(E->Scalars[I]);
- assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
- if (OpInst->getOpcode() == E->getAltOpcode()) {
- Mask[I] = Sz + I;
- AltScalars.push_back(E->Scalars[I]);
- } else {
- Mask[I] = I;
- OpScalars.push_back(E->Scalars[I]);
- }
- }
+ SmallVector<int> Mask;
+ buildSuffleEntryMask(
+ E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+ [E](Instruction *I) {
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ return I->getOpcode() == E->getAltOpcode();
+ },
+ Mask, &OpScalars, &AltScalars);
propagateIRFlags(V0, OpScalars);
propagateIRFlags(V1, AltScalars);
@@ -5648,7 +6464,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
@@ -5823,7 +6638,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
// It is legal to delete users in the ignorelist.
- assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
+ assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) ||
+ (isa_and_nonnull<Instruction>(U) &&
+ isDeleted(cast<Instruction>(U)))) &&
"Deleting out-of-tree value");
}
}
@@ -5898,27 +6715,28 @@ void BoUpSLP::optimizeGatherSequence() {
"Worklist not sorted properly!");
BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
- Instruction *In = &*it++;
- if (isDeleted(In))
+ for (Instruction &In : llvm::make_early_inc_range(*BB)) {
+ if (isDeleted(&In))
continue;
- if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
+ if (!isa<InsertElementInst>(&In) && !isa<ExtractElementInst>(&In) &&
+ !isa<ShuffleVectorInst>(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
+ bool Replaced = false;
for (Instruction *v : Visited) {
- if (In->isIdenticalTo(v) &&
- DT->dominates(v->getParent(), In->getParent())) {
- In->replaceAllUsesWith(v);
- eraseInstruction(In);
- In = nullptr;
+ if (In.isIdenticalTo(v) &&
+ DT->dominates(v->getParent(), In.getParent())) {
+ In.replaceAllUsesWith(v);
+ eraseInstruction(&In);
+ Replaced = true;
break;
}
}
- if (In) {
- assert(!is_contained(Visited, In));
- Visited.push_back(In);
+ if (!Replaced) {
+ assert(!is_contained(Visited, &In));
+ Visited.push_back(&In);
}
}
}
@@ -5931,7 +6749,9 @@ void BoUpSLP::optimizeGatherSequence() {
Optional<BoUpSLP::ScheduleData *>
BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
- if (isa<PHINode>(S.OpValue) || isa<InsertElementInst>(S.OpValue))
+ // No need to schedule PHIs, insertelement, extractelement and extractvalue
+ // instructions.
+ if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue))
return nullptr;
// Initialize the instruction bundle.
@@ -6027,7 +6847,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
Value *OpValue) {
- if (isa<PHINode>(OpValue) || isa<InsertElementInst>(OpValue))
+ if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue))
return;
ScheduleData *Bundle = getScheduleData(OpValue);
@@ -6067,8 +6887,9 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
- assert(!isa<PHINode>(I) && !isa<InsertElementInst>(I) &&
- "phi nodes/insertelements don't need to be scheduled");
+ assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
+ "phi nodes/insertelements/extractelements/extractvalues don't need to "
+ "be scheduled");
auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
ScheduleData *ISD = getScheduleData(I);
if (!ISD)
@@ -6338,7 +7159,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
I = I->getNextNode()) {
BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
- assert((isa<InsertElementInst>(SD->Inst) ||
+ assert((isVectorLikeInstWithConstOps(SD->Inst) ||
SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
"scheduler and vectorizer bundle mismatch");
SD->FirstInBundle->SchedulingPriority = Idx++;
@@ -6681,9 +7502,7 @@ struct SLPVectorizer : public FunctionPass {
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
- bool doInitialization(Module &M) override {
- return false;
- }
+ bool doInitialization(Module &M) override { return false; }
bool runOnFunction(Function &F) override {
if (skipFunction(F))
@@ -6818,44 +7637,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
return Changed;
}
-/// Order may have elements assigned special value (size) which is out of
-/// bounds. Such indices only appear on places which correspond to undef values
-/// (see canReuseExtract for details) and used in order to avoid undef values
-/// have effect on operands ordering.
-/// The first loop below simply finds all unused indices and then the next loop
-/// nest assigns these indices for undef values positions.
-/// As an example below Order has two undef positions and they have assigned
-/// values 3 and 7 respectively:
-/// before: 6 9 5 4 9 2 1 0
-/// after: 6 3 5 4 7 2 1 0
-/// \returns Fixed ordering.
-static BoUpSLP::OrdersType fixupOrderingIndices(ArrayRef<unsigned> Order) {
- BoUpSLP::OrdersType NewOrder(Order.begin(), Order.end());
- const unsigned Sz = NewOrder.size();
- SmallBitVector UsedIndices(Sz);
- SmallVector<int> MaskedIndices;
- for (int I = 0, E = NewOrder.size(); I < E; ++I) {
- if (NewOrder[I] < Sz)
- UsedIndices.set(NewOrder[I]);
- else
- MaskedIndices.push_back(I);
- }
- if (MaskedIndices.empty())
- return NewOrder;
- SmallVector<int> AvailableIndices(MaskedIndices.size());
- unsigned Cnt = 0;
- int Idx = UsedIndices.find_first();
- do {
- AvailableIndices[Cnt] = Idx;
- Idx = UsedIndices.find_next(Idx);
- ++Cnt;
- } while (Idx > 0);
- assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
- for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
- NewOrder[MaskedIndices[I]] = AvailableIndices[I];
- return NewOrder;
-}
-
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
unsigned Idx) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
@@ -6871,19 +7652,13 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
<< "\n");
R.buildTree(Chain);
- Optional<ArrayRef<unsigned>> Order = R.bestOrder();
- // TODO: Handle orders of size less than number of elements in the vector.
- if (Order && Order->size() == Chain.size()) {
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(Chain.size());
- transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
- [Chain](const unsigned Idx) { return Chain[Idx]; });
- R.buildTree(ReorderedOps);
- }
if (R.isTreeTinyAndNotFullyVectorizable())
return false;
if (R.isLoadCombineCandidate())
return false;
+ R.reorderTopToBottom();
+ R.reorderBottomToTop();
+ R.buildExternalUses();
R.computeMinimumValueSizes();
@@ -7006,7 +7781,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned EltSize = R.getVectorElementSize(Operands[0]);
unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
- unsigned MinVF = std::max(2U, R.getMinVecRegSize() / EltSize);
+ unsigned MinVF = R.getMinVF(EltSize);
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
MaxElts);
@@ -7079,11 +7854,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
Value *VL[] = {A, B};
- return tryToVectorizeList(VL, R, /*AllowReorder=*/true);
+ return tryToVectorizeList(VL, R);
}
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- bool AllowReorder) {
+ bool LimitForRegisterSize) {
if (VL.size() < 2)
return false;
@@ -7117,7 +7892,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
}
unsigned Sz = R.getVectorElementSize(I0);
- unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+ unsigned MinVF = R.getMinVF(Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
if (MaxVF < 2) {
@@ -7155,7 +7930,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (!isPowerOf2_32(OpsWidth))
continue;
- if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
+ if ((LimitForRegisterSize && OpsWidth < MaxVF) ||
+ (VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
break;
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
@@ -7170,18 +7946,11 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< "\n");
R.buildTree(Ops);
- if (AllowReorder) {
- Optional<ArrayRef<unsigned>> Order = R.bestOrder();
- if (Order) {
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(Ops.size());
- transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
- [Ops](const unsigned Idx) { return Ops[Idx]; });
- R.buildTree(ReorderedOps);
- }
- }
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
+ R.reorderTopToBottom();
+ R.reorderBottomToTop();
+ R.buildExternalUses();
R.computeMinimumValueSizes();
InstructionCost Cost = R.getTreeCost();
@@ -7374,10 +8143,20 @@ class HorizontalReduction {
Value *RHS, const Twine &Name, bool UseSelect) {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
switch (Kind) {
- case RecurKind::Add:
- case RecurKind::Mul:
case RecurKind::Or:
+ if (UseSelect &&
+ LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
+ return Builder.CreateSelect(LHS, Builder.getTrue(), RHS, Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
+ Name);
case RecurKind::And:
+ if (UseSelect &&
+ LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
+ return Builder.CreateSelect(LHS, RHS, Builder.getFalse(), Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
+ Name);
+ case RecurKind::Add:
+ case RecurKind::Mul:
case RecurKind::Xor:
case RecurKind::FAdd:
case RecurKind::FMul:
@@ -7421,8 +8200,12 @@ class HorizontalReduction {
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name,
const ReductionOpsListType &ReductionOps) {
- bool UseSelect = ReductionOps.size() == 2;
- assert((!UseSelect || isa<SelectInst>(ReductionOps[1][0])) &&
+ bool UseSelect = ReductionOps.size() == 2 ||
+ // Logical or/and.
+ (ReductionOps.size() == 1 &&
+ isa<SelectInst>(ReductionOps.front().front()));
+ assert((!UseSelect || ReductionOps.size() != 2 ||
+ isa<SelectInst>(ReductionOps[1][0])) &&
"Expected cmp + select pairs for reduction");
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
@@ -7560,10 +8343,10 @@ class HorizontalReduction {
/// Checks if the instruction is in basic block \p BB.
/// For a cmp+sel min/max reduction check that both ops are in \p BB.
static bool hasSameParent(Instruction *I, BasicBlock *BB) {
- if (isCmpSelMinMax(I)) {
+ if (isCmpSelMinMax(I) || (isBoolLogicOp(I) && isa<SelectInst>(I))) {
auto *Sel = cast<SelectInst>(I);
- auto *Cmp = cast<Instruction>(Sel->getCondition());
- return Sel->getParent() == BB && Cmp->getParent() == BB;
+ auto *Cmp = dyn_cast<Instruction>(Sel->getCondition());
+ return Sel->getParent() == BB && Cmp && Cmp->getParent() == BB;
}
return I->getParent() == BB;
}
@@ -7745,13 +8528,13 @@ public:
}
/// Attempt to vectorize the tree found by matchAssociativeReduction.
- bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
// If there are a sufficient number of reduction values, reduce
// to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
unsigned NumReducedVals = ReducedVals.size();
if (NumReducedVals < 4)
- return false;
+ return nullptr;
// Intersect the fast-math-flags from all reduction operations.
FastMathFlags RdxFMF;
@@ -7825,22 +8608,14 @@ public:
unsigned i = 0;
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
- V.buildTree(VL, ExternallyUsedValues, IgnoreList);
- Optional<ArrayRef<unsigned>> Order = V.bestOrder();
- if (Order) {
- assert(Order->size() == VL.size() &&
- "Order size must be the same as number of vectorized "
- "instructions.");
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(VL.size());
- transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
- [VL](const unsigned Idx) { return VL[Idx]; });
- V.buildTree(ReorderedOps, ExternallyUsedValues, IgnoreList);
- }
- if (V.isTreeTinyAndNotFullyVectorizable())
+ V.buildTree(VL, IgnoreList);
+ if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true))
break;
if (V.isLoadCombineReductionCandidate(RdxKind))
break;
+ V.reorderTopToBottom();
+ V.reorderBottomToTop(/*IgnoreReorder=*/true);
+ V.buildExternalUses(ExternallyUsedValues);
// For a poison-safe boolean logic reduction, do not replace select
// instructions with logic ops. All reduced values will be frozen (see
@@ -7860,7 +8635,7 @@ public:
InstructionCost Cost = TreeCost + ReductionCost;
if (!Cost.isValid()) {
LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
- return false;
+ return nullptr;
}
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
@@ -7940,7 +8715,7 @@ public:
// vector reductions.
V.eraseInstructions(IgnoreList);
}
- return VectorizedTree != nullptr;
+ return VectorizedTree;
}
unsigned numReductionValues() const { return ReducedVals.size(); }
@@ -7950,6 +8725,7 @@ private:
InstructionCost getReductionCost(TargetTransformInfo *TTI,
Value *FirstReducedVal, unsigned ReduxWidth,
FastMathFlags FMF) {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *ScalarTy = FirstReducedVal->getType();
FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
InstructionCost VectorCost, ScalarCost;
@@ -7962,33 +8738,39 @@ private:
case RecurKind::FAdd:
case RecurKind::FMul: {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
- VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF);
- ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
+ VectorCost =
+ TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
+ ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
break;
}
case RecurKind::FMax:
case RecurKind::FMin: {
+ auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- /*unsigned=*/false);
- ScalarCost =
- TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- CmpInst::makeCmpResultType(ScalarTy));
+ /*unsigned=*/false, CostKind);
+ CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
+ ScalarCost = TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy,
+ SclCondTy, RdxPred, CostKind) +
+ TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+ SclCondTy, RdxPred, CostKind);
break;
}
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin: {
+ auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
bool IsUnsigned =
RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
- VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned);
- ScalarCost =
- TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- CmpInst::makeCmpResultType(ScalarTy));
+ VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned,
+ CostKind);
+ CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
+ ScalarCost = TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
+ SclCondTy, RdxPred, CostKind) +
+ TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+ SclCondTy, RdxPred, CostKind);
break;
}
default:
@@ -8010,6 +8792,7 @@ private:
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
+ ++NumVectorInstructions;
return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
ReductionOps.back());
}
@@ -8219,32 +9002,45 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// Skip the analysis of CmpInsts.Compiler implements postanalysis of the
// CmpInsts so we can skip extra attempts in
// tryToVectorizeHorReductionOrInstOperands and save compile time.
- SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
+ std::queue<std::pair<Instruction *, unsigned>> Stack;
+ Stack.emplace(Root, 0);
SmallPtrSet<Value *, 8> VisitedInstrs;
+ SmallVector<WeakTrackingVH> PostponedInsts;
bool Res = false;
+ auto &&TryToReduce = [TTI, &P, &R](Instruction *Inst, Value *&B0,
+ Value *&B1) -> Value * {
+ bool IsBinop = matchRdxBop(Inst, B0, B1);
+ bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
+ if (IsBinop || IsSelect) {
+ HorizontalReduction HorRdx;
+ if (HorRdx.matchAssociativeReduction(P, Inst))
+ return HorRdx.tryToReduce(R, TTI);
+ }
+ return nullptr;
+ };
while (!Stack.empty()) {
Instruction *Inst;
unsigned Level;
- std::tie(Inst, Level) = Stack.pop_back_val();
+ std::tie(Inst, Level) = Stack.front();
+ Stack.pop();
// Do not try to analyze instruction that has already been vectorized.
// This may happen when we vectorize instruction operands on a previous
// iteration while stack was populated before that happened.
if (R.isDeleted(Inst))
continue;
- Value *B0, *B1;
- bool IsBinop = matchRdxBop(Inst, B0, B1);
- bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
- if (IsBinop || IsSelect) {
- HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, Inst)) {
- if (HorRdx.tryToReduce(R, TTI)) {
- Res = true;
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- continue;
- }
+ Value *B0 = nullptr, *B1 = nullptr;
+ if (Value *V = TryToReduce(Inst, B0, B1)) {
+ Res = true;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Try to find another reduction.
+ Stack.emplace(I, Level);
+ continue;
}
+ } else {
+ bool IsBinop = B0 && B1;
if (P && IsBinop) {
Inst = dyn_cast<Instruction>(B0);
if (Inst == P)
@@ -8256,14 +9052,14 @@ static bool tryToVectorizeHorReductionOrInstOperands(
continue;
}
}
- }
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- // Do not try to vectorize CmpInst operands, this is done separately.
- if (!isa<CmpInst>(Inst) && Vectorize(Inst, R)) {
- Res = true;
- continue;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ // Do not try to vectorize CmpInst operands, this is done separately.
+ // Final attempt for binop args vectorization should happen after the loop
+ // to try to find reductions.
+ if (!isa<CmpInst>(Inst))
+ PostponedInsts.push_back(Inst);
}
// Try to vectorize operands.
@@ -8277,8 +9073,13 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// separately.
if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
I->getParent() == BB)
- Stack.emplace_back(I, Level);
+ Stack.emplace(I, Level);
}
+ // Try to vectorized binops where reductions were not found.
+ for (Value *V : PostponedInsts)
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ if (!R.isDeleted(Inst))
+ Res |= Vectorize(Inst, R);
return Res;
}
@@ -8313,7 +9114,7 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.
- return tryToVectorizeList(BuildVectorOpds, R, /*AllowReorder=*/false);
+ return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
@@ -8324,11 +9125,11 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
(llvm::all_of(BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst>(V); }) &&
- isShuffle(BuildVectorOpds, Mask)))
+ isFixedVectorShuffle(BuildVectorOpds, Mask)))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");
- return tryToVectorizeList(BuildVectorInsts, R, /*AllowReorder=*/true);
+ return tryToVectorizeList(BuildVectorInsts, R);
}
bool SLPVectorizerPass::vectorizeSimpleInstructions(
@@ -8369,6 +9170,78 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
return OpsChanged;
}
+template <typename T>
+static bool
+tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
+ function_ref<unsigned(T *)> Limit,
+ function_ref<bool(T *, T *)> Comparator,
+ function_ref<bool(T *, T *)> AreCompatible,
+ function_ref<bool(ArrayRef<T *>, bool)> TryToVectorize,
+ bool LimitForRegisterSize) {
+ bool Changed = false;
+ // Sort by type, parent, operands.
+ stable_sort(Incoming, Comparator);
+
+ // Try to vectorize elements base on their type.
+ SmallVector<T *> Candidates;
+ for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) {
+ // Look for the next elements with the same type, parent and operand
+ // kinds.
+ auto *SameTypeIt = IncIt;
+ while (SameTypeIt != E && AreCompatible(*SameTypeIt, *IncIt))
+ ++SameTypeIt;
+
+ // Try to vectorize them.
+ unsigned NumElts = (SameTypeIt - IncIt);
+ LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes ("
+ << NumElts << ")\n");
+ // The vectorization is a 3-state attempt:
+ // 1. Try to vectorize instructions with the same/alternate opcodes with the
+ // size of maximal register at first.
+ // 2. Try to vectorize remaining instructions with the same type, if
+ // possible. This may result in the better vectorization results rather than
+ // if we try just to vectorize instructions with the same/alternate opcodes.
+ // 3. Final attempt to try to vectorize all instructions with the
+ // same/alternate ops only, this may result in some extra final
+ // vectorization.
+ if (NumElts > 1 &&
+ TryToVectorize(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
+ // Success start over because instructions might have been changed.
+ Changed = true;
+ } else if (NumElts < Limit(*IncIt) &&
+ (Candidates.empty() ||
+ Candidates.front()->getType() == (*IncIt)->getType())) {
+ Candidates.append(IncIt, std::next(IncIt, NumElts));
+ }
+ // Final attempt to vectorize instructions with the same types.
+ if (Candidates.size() > 1 &&
+ (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
+ if (TryToVectorize(Candidates, /*LimitForRegisterSize=*/false)) {
+ // Success start over because instructions might have been changed.
+ Changed = true;
+ } else if (LimitForRegisterSize) {
+ // Try to vectorize using small vectors.
+ for (auto *It = Candidates.begin(), *End = Candidates.end();
+ It != End;) {
+ auto *SameTypeIt = It;
+ while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
+ ++SameTypeIt;
+ unsigned NumElts = (SameTypeIt - It);
+ if (NumElts > 1 && TryToVectorize(makeArrayRef(It, NumElts),
+ /*LimitForRegisterSize=*/false))
+ Changed = true;
+ It = SameTypeIt;
+ }
+ }
+ Candidates.clear();
+ }
+
+ // Start over at the next instruction of a different type (or the end).
+ IncIt = SameTypeIt;
+ }
+ return Changed;
+}
+
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
@@ -8377,11 +9250,89 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// node. Allows better to identify the chains that can be vectorized in the
// better way.
DenseMap<Value *, SmallVector<Value *, 4>> PHIToOpcodes;
+ auto PHICompare = [this, &PHIToOpcodes](Value *V1, Value *V2) {
+ assert(isValidElementType(V1->getType()) &&
+ isValidElementType(V2->getType()) &&
+ "Expected vectorizable types only.");
+ // It is fine to compare type IDs here, since we expect only vectorizable
+ // types, like ints, floats and pointers, we don't care about other type.
+ if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
+ return true;
+ if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
+ return false;
+ ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
+ ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
+ if (Opcodes1.size() < Opcodes2.size())
+ return true;
+ if (Opcodes1.size() > Opcodes2.size())
+ return false;
+ for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
+ // Undefs are compatible with any other value.
+ if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
+ continue;
+ if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
+ if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
+ DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
+ DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
+ if (!NodeI1)
+ return NodeI2 != nullptr;
+ if (!NodeI2)
+ return false;
+ assert((NodeI1 == NodeI2) ==
+ (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeI1 != NodeI2)
+ return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
+ InstructionsState S = getSameOpcode({I1, I2});
+ if (S.getOpcode())
+ continue;
+ return I1->getOpcode() < I2->getOpcode();
+ }
+ if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
+ continue;
+ if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
+ return true;
+ if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
+ return false;
+ }
+ return false;
+ };
+ auto AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
+ if (V1 == V2)
+ return true;
+ if (V1->getType() != V2->getType())
+ return false;
+ ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
+ ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
+ if (Opcodes1.size() != Opcodes2.size())
+ return false;
+ for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
+ // Undefs are compatible with any other value.
+ if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
+ continue;
+ if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
+ if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
+ if (I1->getParent() != I2->getParent())
+ return false;
+ InstructionsState S = getSameOpcode({I1, I2});
+ if (S.getOpcode())
+ continue;
+ return false;
+ }
+ if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
+ continue;
+ if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
+ return false;
+ }
+ return true;
+ };
+ auto Limit = [&R](Value *V) {
+ unsigned EltSize = R.getVectorElementSize(V);
+ return std::max(2U, R.getMaxVecRegSize() / EltSize);
+ };
- bool HaveVectorizedPhiNodes = true;
- while (HaveVectorizedPhiNodes) {
- HaveVectorizedPhiNodes = false;
-
+ bool HaveVectorizedPhiNodes = false;
+ do {
// Collect the incoming values from the PHIs.
Incoming.clear();
for (Instruction &I : *BB) {
@@ -8419,132 +9370,15 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
}
- // Sort by type, parent, operands.
- stable_sort(Incoming, [this, &PHIToOpcodes](Value *V1, Value *V2) {
- assert(isValidElementType(V1->getType()) &&
- isValidElementType(V2->getType()) &&
- "Expected vectorizable types only.");
- // It is fine to compare type IDs here, since we expect only vectorizable
- // types, like ints, floats and pointers, we don't care about other type.
- if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
- return true;
- if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
- return false;
- ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
- ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
- if (Opcodes1.size() < Opcodes2.size())
- return true;
- if (Opcodes1.size() > Opcodes2.size())
- return false;
- for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
- // Undefs are compatible with any other value.
- if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
- continue;
- if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
- if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
- DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
- DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
- if (!NodeI1)
- return NodeI2 != nullptr;
- if (!NodeI2)
- return false;
- assert((NodeI1 == NodeI2) ==
- (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
- "Different nodes should have different DFS numbers");
- if (NodeI1 != NodeI2)
- return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
- InstructionsState S = getSameOpcode({I1, I2});
- if (S.getOpcode())
- continue;
- return I1->getOpcode() < I2->getOpcode();
- }
- if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
- continue;
- if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
- return true;
- if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
- return false;
- }
- return false;
- });
-
- auto &&AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
- if (V1 == V2)
- return true;
- if (V1->getType() != V2->getType())
- return false;
- ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
- ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
- if (Opcodes1.size() != Opcodes2.size())
- return false;
- for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
- // Undefs are compatible with any other value.
- if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
- continue;
- if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
- if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
- if (I1->getParent() != I2->getParent())
- return false;
- InstructionsState S = getSameOpcode({I1, I2});
- if (S.getOpcode())
- continue;
- return false;
- }
- if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
- continue;
- if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
- return false;
- }
- return true;
- };
-
- // Try to vectorize elements base on their type.
- SmallVector<Value *, 4> Candidates;
- for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
- E = Incoming.end();
- IncIt != E;) {
-
- // Look for the next elements with the same type, parent and operand
- // kinds.
- SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
- while (SameTypeIt != E && AreCompatiblePHIs(*SameTypeIt, *IncIt)) {
- VisitedInstrs.insert(*SameTypeIt);
- ++SameTypeIt;
- }
-
- // Try to vectorize them.
- unsigned NumElts = (SameTypeIt - IncIt);
- LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs ("
- << NumElts << ")\n");
- // The order in which the phi nodes appear in the program does not matter.
- // So allow tryToVectorizeList to reorder them if it is beneficial. This
- // is done when there are exactly two elements since tryToVectorizeList
- // asserts that there are only two values when AllowReorder is true.
- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
- /*AllowReorder=*/true)) {
- // Success start over because instructions might have been changed.
- HaveVectorizedPhiNodes = true;
- Changed = true;
- } else if (NumElts < 4 &&
- (Candidates.empty() ||
- Candidates.front()->getType() == (*IncIt)->getType())) {
- Candidates.append(IncIt, std::next(IncIt, NumElts));
- }
- // Final attempt to vectorize phis with the same types.
- if (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType()) {
- if (Candidates.size() > 1 &&
- tryToVectorizeList(Candidates, R, /*AllowReorder=*/true)) {
- // Success start over because instructions might have been changed.
- HaveVectorizedPhiNodes = true;
- Changed = true;
- }
- Candidates.clear();
- }
-
- // Start over at the next instruction of a different type (or the end).
- IncIt = SameTypeIt;
- }
- }
+ HaveVectorizedPhiNodes = tryToVectorizeSequence<Value>(
+ Incoming, Limit, PHICompare, AreCompatiblePHIs,
+ [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
+ return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
+ },
+ /*LimitForRegisterSize=*/true);
+ Changed |= HaveVectorizedPhiNodes;
+ VisitedInstrs.insert(Incoming.begin(), Incoming.end());
+ } while (HaveVectorizedPhiNodes);
VisitedInstrs.clear();
@@ -8797,6 +9631,10 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
return V1->getValueOperand()->getValueID() ==
V2->getValueOperand()->getValueID();
};
+ auto Limit = [&R, this](StoreInst *SI) {
+ unsigned EltSize = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ return R.getMinVF(EltSize);
+ };
// Attempt to sort and vectorize each of the store-groups.
for (auto &Pair : Stores) {
@@ -8806,33 +9644,15 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
<< Pair.second.size() << ".\n");
- stable_sort(Pair.second, StoreSorter);
-
- // Try to vectorize elements based on their compatibility.
- for (ArrayRef<StoreInst *>::iterator IncIt = Pair.second.begin(),
- E = Pair.second.end();
- IncIt != E;) {
-
- // Look for the next elements with the same type.
- ArrayRef<StoreInst *>::iterator SameTypeIt = IncIt;
- Type *EltTy = (*IncIt)->getPointerOperand()->getType();
-
- while (SameTypeIt != E && AreCompatibleStores(*SameTypeIt, *IncIt))
- ++SameTypeIt;
-
- // Try to vectorize them.
- unsigned NumElts = (SameTypeIt - IncIt);
- LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at stores ("
- << NumElts << ")\n");
- if (NumElts > 1 && !EltTy->getPointerElementType()->isVectorTy() &&
- vectorizeStores(makeArrayRef(IncIt, NumElts), R)) {
- // Success start over because instructions might have been changed.
- Changed = true;
- }
+ if (!isValidElementType(Pair.second.front()->getValueOperand()->getType()))
+ continue;
- // Start over at the next instruction of a different type (or the end).
- IncIt = SameTypeIt;
- }
+ Changed |= tryToVectorizeSequence<StoreInst>(
+ Pair.second, Limit, StoreSorter, AreCompatibleStores,
+ [this, &R](ArrayRef<StoreInst *> Candidates, bool) {
+ return vectorizeStores(Candidates, R);
+ },
+ /*LimitForRegisterSize=*/false);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5f39fe1c17a3..638467f94e1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -815,6 +815,28 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
+ // Fix the latch value of reduction and first-order recurrences phis in the
+ // vector loop.
+ VPBasicBlock *Header = Entry->getEntryBasicBlock();
+ for (VPRecipeBase &R : Header->phis()) {
+ auto *PhiR = dyn_cast<VPWidenPHIRecipe>(&R);
+ if (!PhiR || !(isa<VPFirstOrderRecurrencePHIRecipe>(&R) ||
+ isa<VPReductionPHIRecipe>(&R)))
+ continue;
+ // For first-order recurrences and in-order reduction phis, only a single
+ // part is generated, which provides the last part from the previous
+ // iteration. Otherwise all UF parts are generated.
+ bool SinglePartNeeded = isa<VPFirstOrderRecurrencePHIRecipe>(&R) ||
+ cast<VPReductionPHIRecipe>(&R)->isOrdered();
+ unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
+ for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
+ Value *VecPhi = State->get(PhiR, Part);
+ Value *Val = State->get(PhiR->getBackedgeValue(),
+ SinglePartNeeded ? State->UF - 1 : Part);
+ cast<PHINode>(VecPhi)->addIncoming(Val, VectorLatchBB);
+ }
+ }
+
// Setup branch terminator successors for VPBBs in VPBBsToFix based on
// VPBB's successors.
for (auto VPBB : State->CFG.VPBBsToFix) {
@@ -862,6 +884,13 @@ void VPlan::print(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);
O << "VPlan '" << Name << "' {";
+
+ if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+ O << "\nLive-in ";
+ BackedgeTakenCount->printAsOperand(O, SlotTracker);
+ O << " = backedge-taken count\n";
+ }
+
for (const VPBlockBase *Block : depth_first(getEntry())) {
O << '\n';
Block->print(O, "", SlotTracker);
@@ -920,12 +949,12 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
+Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
Twine(getOrCreateBID(Block));
}
-const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
+Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
const std::string &Name = Block->getName();
if (!Name.empty())
return Name;
@@ -1235,7 +1264,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
VF.isScalar() ? Indices.back() : ConstantVector::get(Indices);
// Add the consecutive indices to the vector value.
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
- State.set(getVPSingleValue(), CanonicalVectorIV, Part);
+ State.set(this, CanonicalVectorIV, Part);
}
}
@@ -1243,7 +1272,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
- getVPSingleValue()->printAsOperand(O, SlotTracker);
+ printAsOperand(O, SlotTracker);
O << " = WIDEN-CANONICAL-INDUCTION";
}
#endif
@@ -1306,12 +1335,16 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
State.set(this, EntryPart, Part);
}
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
VPValue *StartVPV = getStartValue();
Value *StartV = StartVPV->getLiveInIRValue();
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) {
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
// MinMax reduction have the start value as their identify.
if (ScalarPHI) {
Iden = StartV;
@@ -1322,12 +1355,11 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
} else {
- Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity(
- RK, VecTy->getScalarType(), RdxDesc.getFastMathFlags());
- Iden = IdenC;
+ Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
+ RdxDesc.getFastMathFlags());
if (!ScalarPHI) {
- Iden = ConstantVector::getSplat(State.VF, IdenC);
+ Iden = Builder.CreateVectorSplat(State.VF, Iden);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
Constant *Zero = Builder.getInt32(0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bdf09d15c27f..00ee31007cb7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1312,7 +1312,7 @@ public:
// The first operand is the address, followed by the stored values, followed
// by an optional mask.
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
- .slice(1, getNumOperands() - (HasMask ? 2 : 1));
+ .slice(1, getNumStoreOperands());
}
/// Generate the wide load or store, and shuffles.
@@ -1325,6 +1325,12 @@ public:
#endif
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
+
+ /// Returns the number of stored operands of this interleave group. Returns 0
+ /// for load interleave groups.
+ unsigned getNumStoreOperands() const {
+ return getNumOperands() - (HasMask ? 2 : 1);
+ }
};
/// A recipe to represent inloop reduction operations, performing a reduction on
@@ -1508,6 +1514,12 @@ public:
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
Instruction &Ingredient;
+ // Whether the loaded-from / stored-to addresses are consecutive.
+ bool Consecutive;
+
+ // Whether the consecutive loaded/stored addresses are in reverse order.
+ bool Reverse;
+
void setMask(VPValue *Mask) {
if (!Mask)
return;
@@ -1519,16 +1531,21 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
}
public:
- VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load) {
+ VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
+ bool Consecutive, bool Reverse)
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load),
+ Consecutive(Consecutive), Reverse(Reverse) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
- VPValue *StoredValue, VPValue *Mask)
+ VPValue *StoredValue, VPValue *Mask,
+ bool Consecutive, bool Reverse)
: VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
- Ingredient(Store) {
+ Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
setMask(Mask);
}
@@ -1558,6 +1575,13 @@ public:
return getOperand(1); // Stored value is the 2nd, mandatory operand.
}
+ // Return whether the loaded-from / stored-to addresses are consecutive.
+ bool isConsecutive() const { return Consecutive; }
+
+ // Return whether the consecutive loaded/stored addresses are in reverse
+ // order.
+ bool isReverse() const { return Reverse; }
+
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
@@ -1569,11 +1593,11 @@ public:
};
/// A Recipe for widening the canonical induction variable of the vector loop.
-class VPWidenCanonicalIVRecipe : public VPRecipeBase {
+class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
public:
- VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC, {}) {
- new VPValue(nullptr, this);
- }
+ VPWidenCanonicalIVRecipe()
+ : VPRecipeBase(VPWidenCanonicalIVSC, {}),
+ VPValue(VPValue::VPVWidenCanonicalIVSC, nullptr, this) {}
~VPWidenCanonicalIVRecipe() override = default;
@@ -2094,6 +2118,10 @@ class VPlan {
/// Holds the VPLoopInfo analysis for this VPlan.
VPLoopInfo VPLInfo;
+ /// Indicates whether it is safe use the Value2VPValue mapping or if the
+ /// mapping cannot be used any longer, because it is stale.
+ bool Value2VPValueEnabled = true;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
if (Entry)
@@ -2135,6 +2163,10 @@ public:
return BackedgeTakenCount;
}
+ /// Mark the plan to indicate that using Value2VPValue is not safe any
+ /// longer, because it may be stale.
+ void disableValue2VPValue() { Value2VPValueEnabled = false; }
+
void addVF(ElementCount VF) { VFs.insert(VF); }
bool hasVF(ElementCount VF) { return VFs.count(VF); }
@@ -2148,6 +2180,8 @@ public:
void addExternalDef(VPValue *VPVal) { VPExternalDefs.insert(VPVal); }
void addVPValue(Value *V) {
+ assert(Value2VPValueEnabled &&
+ "IR value to VPValue mapping may be out of date!");
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
VPValue *VPV = new VPValue(V);
@@ -2156,25 +2190,39 @@ public:
}
void addVPValue(Value *V, VPValue *VPV) {
+ assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
Value2VPValue[V] = VPV;
}
- VPValue *getVPValue(Value *V) {
+ /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
+ /// checking whether it is safe to query VPValues using IR Values.
+ VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
+ assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get the VPValue of a null Value");
assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
return Value2VPValue[V];
}
- VPValue *getOrAddVPValue(Value *V) {
+ /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
+ /// OverrideAllowed can be used to disable checking whether it is safe to
+ /// query VPValues using IR Values.
+ VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
+ assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get or add the VPValue of a null Value");
if (!Value2VPValue.count(V))
addVPValue(V);
return getVPValue(V);
}
- void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
+ void removeVPValueFor(Value *V) {
+ assert(Value2VPValueEnabled &&
+ "IR value to VPValue mapping may be out of date!");
+ Value2VPValue.erase(V);
+ }
/// Return the VPLoopInfo analysis for this VPlan.
VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
@@ -2244,9 +2292,9 @@ class VPlanPrinter {
return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
}
- const Twine getOrCreateName(const VPBlockBase *Block);
+ Twine getOrCreateName(const VPBlockBase *Block);
- const Twine getUID(const VPBlockBase *Block);
+ Twine getUID(const VPBlockBase *Block);
/// Print the information related to a CFG edge between two VPBlockBases.
void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 52b5ae083d0e..ded5bc04beb5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -31,19 +31,18 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
VPBasicBlock *VPBB = Base->getEntryBasicBlock();
// Introduce each ingredient into VPlan.
- for (auto I = VPBB->begin(), E = VPBB->end(); I != E;) {
- VPRecipeBase *Ingredient = &*I++;
- VPValue *VPV = Ingredient->getVPSingleValue();
+ for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) {
+ VPValue *VPV = Ingredient.getVPSingleValue();
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
if (DeadInstructions.count(Inst)) {
VPValue DummyValue;
VPV->replaceAllUsesWith(&DummyValue);
- Ingredient->eraseFromParent();
+ Ingredient.eraseFromParent();
continue;
}
VPRecipeBase *NewRecipe = nullptr;
- if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(Ingredient)) {
+ if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
InductionDescriptor II = Inductions.lookup(Phi);
if (II.getKind() == InductionDescriptor::IK_IntInduction ||
@@ -55,25 +54,25 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
continue;
}
} else {
- assert(isa<VPInstruction>(Ingredient) &&
+ assert(isa<VPInstruction>(&Ingredient) &&
"only VPInstructions expected here");
assert(!isa<PHINode>(Inst) && "phis should be handled above");
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
*Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- nullptr /*Mask*/);
+ nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
*Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- Plan->getOrAddVPValue(Store->getValueOperand()),
- nullptr /*Mask*/);
+ Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
+ false /*Consecutive*/, false /*Reverse*/);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(
GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
- NewRecipe = new VPWidenCallRecipe(
- *CI, Plan->mapToVPValues(CI->arg_operands()));
+ NewRecipe =
+ new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()));
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
bool InvariantCond =
SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
@@ -85,13 +84,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
}
}
- NewRecipe->insertBefore(Ingredient);
+ NewRecipe->insertBefore(&Ingredient);
if (NewRecipe->getNumDefinedValues() == 1)
VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
else
assert(NewRecipe->getNumDefinedValues() == 0 &&
"Only recpies with zero or one defined values expected");
- Ingredient->eraseFromParent();
+ Ingredient.eraseFromParent();
Plan->removeVPValueFor(Inst);
for (auto *Def : NewRecipe->definedValues()) {
Plan->addVPValue(Inst, Def);
@@ -106,44 +105,76 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
bool Changed = false;
// First, collect the operands of all predicated replicate recipes as seeds
// for sinking.
- SetVector<VPValue *> WorkList;
+ SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (auto &Recipe : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
if (!RepR || !RepR->isPredicated())
continue;
- WorkList.insert(RepR->op_begin(), RepR->op_end());
+ for (VPValue *Op : RepR->operands())
+ WorkList.insert(std::make_pair(RepR->getParent(), Op));
}
}
// Try to sink each replicate recipe in the worklist.
while (!WorkList.empty()) {
- auto *C = WorkList.pop_back_val();
+ VPBasicBlock *SinkTo;
+ VPValue *C;
+ std::tie(SinkTo, C) = WorkList.pop_back_val();
auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def);
- if (!SinkCandidate || SinkCandidate->isUniform())
- continue;
-
- // All users of SinkCandidate must be in the same block in order to perform
- // sinking. Therefore the destination block for sinking must match the block
- // containing the first user.
- auto *FirstUser = dyn_cast<VPRecipeBase>(*SinkCandidate->user_begin());
- if (!FirstUser)
- continue;
- VPBasicBlock *SinkTo = FirstUser->getParent();
- if (SinkCandidate->getParent() == SinkTo ||
+ if (!SinkCandidate || SinkCandidate->isUniform() ||
+ SinkCandidate->getParent() == SinkTo ||
SinkCandidate->mayHaveSideEffects() ||
SinkCandidate->mayReadOrWriteMemory())
continue;
- // All recipe users of the sink candidate must be in the same block SinkTo.
- if (any_of(SinkCandidate->users(), [SinkTo](VPUser *U) {
- auto *UI = dyn_cast<VPRecipeBase>(U);
- return !UI || UI->getParent() != SinkTo;
- }))
+ bool NeedsDuplicating = false;
+ // All recipe users of the sink candidate must be in the same block SinkTo
+ // or all users outside of SinkTo must be uniform-after-vectorization (
+ // i.e., only first lane is used) . In the latter case, we need to duplicate
+ // SinkCandidate. At the moment, we identify such UAV's by looking for the
+ // address operands of widened memory recipes.
+ auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
+ SinkCandidate](VPUser *U) {
+ auto *UI = dyn_cast<VPRecipeBase>(U);
+ if (!UI)
+ return false;
+ if (UI->getParent() == SinkTo)
+ return true;
+ auto *WidenI = dyn_cast<VPWidenMemoryInstructionRecipe>(UI);
+ if (WidenI && WidenI->getAddr() == SinkCandidate) {
+ NeedsDuplicating = true;
+ return true;
+ }
+ return false;
+ };
+ if (!all_of(SinkCandidate->users(), CanSinkWithUser))
continue;
+ if (NeedsDuplicating) {
+ Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue());
+ auto *Clone =
+ new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
+ // TODO: add ".cloned" suffix to name of Clone's VPValue.
+
+ Clone->insertBefore(SinkCandidate);
+ SmallVector<VPUser *, 4> Users(SinkCandidate->user_begin(),
+ SinkCandidate->user_end());
+ for (auto *U : Users) {
+ auto *UI = cast<VPRecipeBase>(U);
+ if (UI->getParent() == SinkTo)
+ continue;
+
+ for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
+ if (UI->getOperand(Idx) != SinkCandidate)
+ continue;
+ UI->setOperand(Idx, Clone);
+ }
+ }
+ }
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
- WorkList.insert(SinkCandidate->op_begin(), SinkCandidate->op_end());
+ for (VPValue *Op : SinkCandidate->operands())
+ WorkList.insert(std::make_pair(SinkTo, Op));
Changed = true;
}
return Changed;
@@ -234,12 +265,15 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
- for (VPUser *U : Phi1ToMove.getVPSingleValue()->users()) {
+ VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
+ SmallVector<VPUser *> Users(Phi1ToMoveV->user_begin(),
+ Phi1ToMoveV->user_end());
+ for (VPUser *U : Users) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI || UI->getParent() != Then2)
continue;
for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
- if (Phi1ToMove.getVPSingleValue() != U->getOperand(I))
+ if (Phi1ToMoveV != U->getOperand(I))
continue;
U->setOperand(I, PredInst1);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 6eec8d14de4a..6d6ea4eb30f1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -128,3 +128,33 @@ void VPlanVerifier::verifyHierarchicalCFG(
assert(!TopRegion->getParent() && "VPlan Top Region should have no parent.");
verifyRegionRec(TopRegion);
}
+
+bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
+ for (const VPBasicBlock *VPBB :
+ VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
+ // Verify that phi-like recipes are at the beginning of the block, with no
+ // other recipes in between.
+ auto RecipeI = VPBB->begin();
+ auto End = VPBB->end();
+ while (RecipeI != End && RecipeI->isPhi())
+ RecipeI++;
+
+ while (RecipeI != End) {
+ if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) {
+ errs() << "Found phi-like recipe after non-phi recipe";
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ errs() << ": ";
+ RecipeI->dump();
+ errs() << "after\n";
+ std::prev(RecipeI)->dump();
+#endif
+ return false;
+ }
+ RecipeI++;
+ }
+ }
+ return true;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.h b/llvm/lib/Transforms/Vectorize/VPlanVerifier.h
index 8e8de441648a..839c24e2c9f4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.h
@@ -26,6 +26,7 @@
namespace llvm {
class VPRegionBlock;
+class VPlan;
/// Struct with utility functions that can be used to check the consistency and
/// invariants of a VPlan, including the components of its H-CFG.
@@ -35,6 +36,12 @@ struct VPlanVerifier {
/// 1. Region/Block verification: Check the Region/Block verification
/// invariants for every region in the H-CFG.
void verifyHierarchicalCFG(const VPRegionBlock *TopRegion) const;
+
+ /// Verify invariants for general VPlans. Currently it checks the following:
+ /// 1. all phi-like recipes must be at the beginning of a block, with no other
+ /// recipes in between. Note that currently there is still an exception for
+ /// VPBlendRecipes.
+ static bool verifyPlanIsValid(const VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d18bcd34620c..57b11e9414ba 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -31,10 +31,12 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
+#define DEBUG_TYPE "vector-combine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "vector-combine"
STATISTIC(NumVecLoad, "Number of vector loads formed");
STATISTIC(NumVecCmp, "Number of vector compares formed");
STATISTIC(NumVecBO, "Number of vector binops formed");
@@ -61,8 +63,10 @@ namespace {
class VectorCombine {
public:
VectorCombine(Function &F, const TargetTransformInfo &TTI,
- const DominatorTree &DT, AAResults &AA, AssumptionCache &AC)
- : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC) {}
+ const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
+ bool ScalarizationOnly)
+ : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC),
+ ScalarizationOnly(ScalarizationOnly) {}
bool run();
@@ -74,12 +78,18 @@ private:
AAResults &AA;
AssumptionCache &AC;
+ /// If true only perform scalarization combines and do not introduce new
+ /// vector operations.
+ bool ScalarizationOnly;
+
+ InstructionWorklist Worklist;
+
bool vectorizeLoadInsert(Instruction &I);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
unsigned PreferredExtractIndex) const;
bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
- unsigned Opcode,
+ const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
unsigned PreferredExtractIndex);
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
@@ -92,14 +102,27 @@ private:
bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
+ bool foldShuffleOfBinops(Instruction &I);
+
+ void replaceValue(Value &Old, Value &New) {
+ Old.replaceAllUsesWith(&New);
+ New.takeName(&Old);
+ if (auto *NewI = dyn_cast<Instruction>(&New)) {
+ Worklist.pushUsersToWorkList(*NewI);
+ Worklist.pushValue(NewI);
+ }
+ Worklist.pushValue(&Old);
+ }
+
+ void eraseInstruction(Instruction &I) {
+ for (Value *Op : I.operands())
+ Worklist.pushValue(Op);
+ Worklist.remove(&I);
+ I.eraseFromParent();
+ }
};
} // namespace
-static void replaceValue(Value &Old, Value &New) {
- Old.replaceAllUsesWith(&New);
- New.takeName(&Old);
-}
-
bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
@@ -284,12 +307,13 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
/// \p ConvertToShuffle to that extract instruction.
bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
- unsigned Opcode,
+ const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
unsigned PreferredExtractIndex) {
assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
isa<ConstantInt>(Ext1->getOperand(1)) &&
"Expected constant extract indexes");
+ unsigned Opcode = I.getOpcode();
Type *ScalarTy = Ext0->getType();
auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
InstructionCost ScalarOpCost, VectorOpCost;
@@ -302,10 +326,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
} else {
assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
"Expected a compare");
- ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
- CmpInst::makeCmpResultType(ScalarTy));
- VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
- CmpInst::makeCmpResultType(VecTy));
+ CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
+ ScalarOpCost = TTI.getCmpSelInstrCost(
+ Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
+ VectorOpCost = TTI.getCmpSelInstrCost(
+ Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
}
// Get cost estimates for the extract elements. These costs will factor into
@@ -480,8 +505,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
ExtractElementInst *ExtractToChange;
- if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
- InsertIndex))
+ if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
return false;
if (ExtractToChange) {
@@ -501,6 +525,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
else
foldExtExtBinop(Ext0, Ext1, I);
+ Worklist.push(Ext0);
+ Worklist.push(Ext1);
return true;
}
@@ -623,8 +649,11 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
unsigned Opcode = I.getOpcode();
InstructionCost ScalarOpCost, VectorOpCost;
if (IsCmp) {
- ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
- VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
+ CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
+ ScalarOpCost = TTI.getCmpSelInstrCost(
+ Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
+ VectorOpCost = TTI.getCmpSelInstrCost(
+ Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
} else {
ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
@@ -724,7 +753,10 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
InstructionCost OldCost =
TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
- OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
+ OldCost +=
+ TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
+ CmpInst::makeCmpResultType(I0->getType()), Pred) *
+ 2;
OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
// The proposed vector pattern is:
@@ -733,7 +765,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
- InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
+ InstructionCost NewCost = TTI.getCmpSelInstrCost(
+ CmpOpcode, X->getType(), CmpInst::makeCmpResultType(X->getType()), Pred);
SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
ShufMask[CheapIndex] = ExpensiveIndex;
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy,
@@ -774,18 +807,98 @@ static bool isMemModifiedBetween(BasicBlock::iterator Begin,
});
}
+/// Helper class to indicate whether a vector index can be safely scalarized and
+/// if a freeze needs to be inserted.
+class ScalarizationResult {
+ enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
+
+ StatusTy Status;
+ Value *ToFreeze;
+
+ ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
+ : Status(Status), ToFreeze(ToFreeze) {}
+
+public:
+ ScalarizationResult(const ScalarizationResult &Other) = default;
+ ~ScalarizationResult() {
+ assert(!ToFreeze && "freeze() not called with ToFreeze being set");
+ }
+
+ static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
+ static ScalarizationResult safe() { return {StatusTy::Safe}; }
+ static ScalarizationResult safeWithFreeze(Value *ToFreeze) {
+ return {StatusTy::SafeWithFreeze, ToFreeze};
+ }
+
+ /// Returns true if the index can be scalarize without requiring a freeze.
+ bool isSafe() const { return Status == StatusTy::Safe; }
+ /// Returns true if the index cannot be scalarized.
+ bool isUnsafe() const { return Status == StatusTy::Unsafe; }
+ /// Returns true if the index can be scalarize, but requires inserting a
+ /// freeze.
+ bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
+
+ /// Reset the state of Unsafe and clear ToFreze if set.
+ void discard() {
+ ToFreeze = nullptr;
+ Status = StatusTy::Unsafe;
+ }
+
+ /// Freeze the ToFreeze and update the use in \p User to use it.
+ void freeze(IRBuilder<> &Builder, Instruction &UserI) {
+ assert(isSafeWithFreeze() &&
+ "should only be used when freezing is required");
+ assert(is_contained(ToFreeze->users(), &UserI) &&
+ "UserI must be a user of ToFreeze");
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(cast<Instruction>(&UserI));
+ Value *Frozen =
+ Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
+ for (Use &U : make_early_inc_range((UserI.operands())))
+ if (U.get() == ToFreeze)
+ U.set(Frozen);
+
+ ToFreeze = nullptr;
+ }
+};
+
/// Check if it is legal to scalarize a memory access to \p VecTy at index \p
/// Idx. \p Idx must access a valid vector element.
-static bool canScalarizeAccess(FixedVectorType *VecTy, Value *Idx,
- Instruction *CtxI, AssumptionCache &AC) {
- if (auto *C = dyn_cast<ConstantInt>(Idx))
- return C->getValue().ult(VecTy->getNumElements());
+static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy,
+ Value *Idx, Instruction *CtxI,
+ AssumptionCache &AC,
+ const DominatorTree &DT) {
+ if (auto *C = dyn_cast<ConstantInt>(Idx)) {
+ if (C->getValue().ult(VecTy->getNumElements()))
+ return ScalarizationResult::safe();
+ return ScalarizationResult::unsafe();
+ }
- APInt Zero(Idx->getType()->getScalarSizeInBits(), 0);
- APInt MaxElts(Idx->getType()->getScalarSizeInBits(), VecTy->getNumElements());
+ unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
+ APInt Zero(IntWidth, 0);
+ APInt MaxElts(IntWidth, VecTy->getNumElements());
ConstantRange ValidIndices(Zero, MaxElts);
- ConstantRange IdxRange = computeConstantRange(Idx, true, &AC, CtxI, 0);
- return ValidIndices.contains(IdxRange);
+ ConstantRange IdxRange(IntWidth, true);
+
+ if (isGuaranteedNotToBePoison(Idx, &AC)) {
+ if (ValidIndices.contains(computeConstantRange(Idx, true, &AC, CtxI, &DT)))
+ return ScalarizationResult::safe();
+ return ScalarizationResult::unsafe();
+ }
+
+ // If the index may be poison, check if we can insert a freeze before the
+ // range of the index is restricted.
+ Value *IdxBase;
+ ConstantInt *CI;
+ if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
+ IdxRange = IdxRange.binaryAnd(CI->getValue());
+ } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
+ IdxRange = IdxRange.urem(CI->getValue());
+ }
+
+ if (ValidIndices.contains(IdxRange))
+ return ScalarizationResult::safeWithFreeze(IdxBase);
+ return ScalarizationResult::unsafe();
}
/// The memory operation on a vector of \p ScalarType had alignment of
@@ -833,12 +946,17 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
// modified between, vector type matches store size, and index is inbounds.
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
!DL.typeSizeEqualsStoreSize(Load->getType()) ||
- !canScalarizeAccess(VecTy, Idx, Load, AC) ||
- SrcAddr != SI->getPointerOperand()->stripPointerCasts() ||
+ SrcAddr != SI->getPointerOperand()->stripPointerCasts())
+ return false;
+
+ auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
+ if (ScalarizableIdx.isUnsafe() ||
isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
MemoryLocation::get(SI), AA))
return false;
+ if (ScalarizableIdx.isSafeWithFreeze())
+ ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
Value *GEP = Builder.CreateInBoundsGEP(
SI->getValueOperand()->getType(), SI->getPointerOperand(),
{ConstantInt::get(Idx->getType(), 0), Idx});
@@ -849,8 +967,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
DL);
NSI->setAlignment(ScalarOpAlignment);
replaceValue(I, *NSI);
- // Need erasing the store manually.
- I.eraseFromParent();
+ eraseInstruction(I);
return true;
}
@@ -860,11 +977,10 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
/// Try to scalarize vector loads feeding extractelement instructions.
bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
Value *Ptr;
- Value *Idx;
- if (!match(&I, m_ExtractElt(m_Load(m_Value(Ptr)), m_Value(Idx))))
+ if (!match(&I, m_Load(m_Value(Ptr))))
return false;
- auto *LI = cast<LoadInst>(I.getOperand(0));
+ auto *LI = cast<LoadInst>(&I);
const DataLayout &DL = I.getModule()->getDataLayout();
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
return false;
@@ -909,8 +1025,12 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
else if (LastCheckedInst->comesBefore(UI))
LastCheckedInst = UI;
- if (!canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC))
+ auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
+ if (!ScalarIdx.isSafe()) {
+ // TODO: Freeze index if it is safe to do so.
+ ScalarIdx.discard();
return false;
+ }
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
OriginalCost +=
@@ -946,6 +1066,60 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
return true;
}
+/// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
+/// "binop (shuffle), (shuffle)".
+bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
+ auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
+ if (!VecTy)
+ return false;
+
+ BinaryOperator *B0, *B1;
+ ArrayRef<int> Mask;
+ if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
+ m_Mask(Mask))) ||
+ B0->getOpcode() != B1->getOpcode() || B0->getType() != VecTy)
+ return false;
+
+ // Try to replace a binop with a shuffle if the shuffle is not costly.
+ // The new shuffle will choose from a single, common operand, so it may be
+ // cheaper than the existing two-operand shuffle.
+ SmallVector<int> UnaryMask = createUnaryMask(Mask, Mask.size());
+ Instruction::BinaryOps Opcode = B0->getOpcode();
+ InstructionCost BinopCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
+ InstructionCost ShufCost = TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy, UnaryMask);
+ if (ShufCost > BinopCost)
+ return false;
+
+ // If we have something like "add X, Y" and "add Z, X", swap ops to match.
+ Value *X = B0->getOperand(0), *Y = B0->getOperand(1);
+ Value *Z = B1->getOperand(0), *W = B1->getOperand(1);
+ if (BinaryOperator::isCommutative(Opcode) && X != Z && Y != W)
+ std::swap(X, Y);
+
+ Value *Shuf0, *Shuf1;
+ if (X == Z) {
+ // shuf (bo X, Y), (bo X, W) --> bo (shuf X), (shuf Y, W)
+ Shuf0 = Builder.CreateShuffleVector(X, UnaryMask);
+ Shuf1 = Builder.CreateShuffleVector(Y, W, Mask);
+ } else if (Y == W) {
+ // shuf (bo X, Y), (bo Z, Y) --> bo (shuf X, Z), (shuf Y)
+ Shuf0 = Builder.CreateShuffleVector(X, Z, Mask);
+ Shuf1 = Builder.CreateShuffleVector(Y, UnaryMask);
+ } else {
+ return false;
+ }
+
+ Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
+ // Intersect flags from the old binops.
+ if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
+ NewInst->copyIRFlags(B0);
+ NewInst->andIRFlags(B1);
+ }
+ replaceValue(I, *NewBO);
+ return true;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
@@ -957,29 +1131,43 @@ bool VectorCombine::run() {
return false;
bool MadeChange = false;
+ auto FoldInst = [this, &MadeChange](Instruction &I) {
+ Builder.SetInsertPoint(&I);
+ if (!ScalarizationOnly) {
+ MadeChange |= vectorizeLoadInsert(I);
+ MadeChange |= foldExtractExtract(I);
+ MadeChange |= foldBitcastShuf(I);
+ MadeChange |= foldExtractedCmps(I);
+ MadeChange |= foldShuffleOfBinops(I);
+ }
+ MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= foldSingleElementStore(I);
+ };
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
continue;
// Use early increment range so that we can erase instructions in loop.
for (Instruction &I : make_early_inc_range(BB)) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
- Builder.SetInsertPoint(&I);
- MadeChange |= vectorizeLoadInsert(I);
- MadeChange |= foldExtractExtract(I);
- MadeChange |= foldBitcastShuf(I);
- MadeChange |= scalarizeBinopOrCmp(I);
- MadeChange |= foldExtractedCmps(I);
- MadeChange |= scalarizeLoadExtract(I);
- MadeChange |= foldSingleElementStore(I);
+ FoldInst(I);
}
}
- // We're done with transforms, so remove dead instructions.
- if (MadeChange)
- for (BasicBlock &BB : F)
- SimplifyInstructionsInBlock(&BB);
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.removeOne();
+ if (!I)
+ continue;
+
+ if (isInstructionTriviallyDead(I)) {
+ eraseInstruction(*I);
+ continue;
+ }
+
+ FoldInst(*I);
+ }
return MadeChange;
}
@@ -1014,7 +1202,7 @@ public:
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- VectorCombine Combiner(F, TTI, DT, AA, AC);
+ VectorCombine Combiner(F, TTI, DT, AA, AC, false);
return Combiner.run();
}
};
@@ -1038,7 +1226,7 @@ PreservedAnalyses VectorCombinePass::run(Function &F,
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
AAResults &AA = FAM.getResult<AAManager>(F);
- VectorCombine Combiner(F, TTI, DT, AA, AC);
+ VectorCombine Combiner(F, TTI, DT, AA, AC, ScalarizationOnly);
if (!Combiner.run())
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
index 6af7bc699d05..1be1d34417eb 100644
--- a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -35,7 +35,7 @@ void WindowsManifestError::log(raw_ostream &OS) const { OS << Msg; }
class WindowsManifestMerger::WindowsManifestMergerImpl {
public:
~WindowsManifestMergerImpl();
- Error merge(const MemoryBuffer &Manifest);
+ Error merge(MemoryBufferRef Manifest);
std::unique_ptr<MemoryBuffer> getMergedManifest();
private:
@@ -620,7 +620,7 @@ WindowsManifestMerger::WindowsManifestMergerImpl::~WindowsManifestMergerImpl() {
}
Error WindowsManifestMerger::WindowsManifestMergerImpl::merge(
- const MemoryBuffer &Manifest) {
+ MemoryBufferRef Manifest) {
if (Merged)
return make_error<WindowsManifestError>(
"merge after getMergedManifest is not supported");
@@ -690,7 +690,7 @@ WindowsManifestMerger::WindowsManifestMergerImpl::~WindowsManifestMergerImpl() {
}
Error WindowsManifestMerger::WindowsManifestMergerImpl::merge(
- const MemoryBuffer &Manifest) {
+ MemoryBufferRef Manifest) {
return make_error<WindowsManifestError>("no libxml2");
}
@@ -708,7 +708,7 @@ WindowsManifestMerger::WindowsManifestMerger()
WindowsManifestMerger::~WindowsManifestMerger() {}
-Error WindowsManifestMerger::merge(const MemoryBuffer &Manifest) {
+Error WindowsManifestMerger::merge(MemoryBufferRef Manifest) {
return Impl->merge(Manifest);
}
diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp
index e6534e5a7be7..c60efa465bb6 100644
--- a/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/llvm/lib/XRay/InstrumentationMap.cpp
@@ -86,10 +86,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
"Failed to find XRay instrumentation map.",
std::make_error_code(std::errc::executable_format_error));
- if (Expected<StringRef> E = I->getContents())
- Contents = *E;
- else
- return E.takeError();
+ if (Error E = I->getContents().moveInto(Contents))
+ return E;
RelocMap Relocs;
if (ObjFile.getBinary()->isELF()) {
@@ -190,7 +188,7 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
SledEntry::FunctionKinds::TAIL,
SledEntry::FunctionKinds::LOG_ARGS_ENTER,
SledEntry::FunctionKinds::CUSTOM_EVENT};
- if (Kind >= sizeof(Kinds))
+ if (Kind >= sizeof(Kinds) / sizeof(Kinds[0]))
return errorCodeToError(
std::make_error_code(std::errc::executable_format_error));
Entry.Kind = Kinds[Kind];
diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp
index 2601ee318f7d..451e1cd98ee8 100644
--- a/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -269,7 +269,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
std::vector<GlobalValue *> ToRemove;
// First, remove aliases to functions we're about to purge.
for (GlobalAlias &Alias : M->aliases()) {
- GlobalObject *Root = Alias.getBaseObject();
+ GlobalObject *Root = Alias.getAliaseeObject();
Function *F = dyn_cast_or_null<Function>(Root);
if (F) {
if (Functions.count(F))
@@ -358,8 +358,7 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
for (auto A : Attrs)
AB.addAttribute(A);
AttributeList NewAttrs;
- NewAttrs =
- NewAttrs.addAttributes(BD.getContext(), AttributeList::FunctionIndex, AB);
+ NewAttrs = NewAttrs.addFnAttributes(BD.getContext(), AB);
// Set this new list of attributes on the function.
F->setAttributes(NewAttrs);
@@ -375,7 +374,7 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
// Pass along the set of attributes that caused the crash.
Attrs.clear();
- for (Attribute A : NewAttrs.getFnAttributes()) {
+ for (Attribute A : NewAttrs.getFnAttrs()) {
Attrs.push_back(A);
}
return true;
@@ -787,14 +786,13 @@ bool ReduceCrashingInstructions::TestInsts(
for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
- for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
- Instruction *Inst = &*I++;
- if (!Instructions.count(Inst) && !Inst->isTerminator() &&
- !Inst->isEHPad() && !Inst->getType()->isTokenTy() &&
- !Inst->isSwiftError()) {
- if (!Inst->getType()->isVoidTy())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- Inst->eraseFromParent();
+ for (Instruction &Inst : llvm::make_early_inc_range(*FI)) {
+ if (!Instructions.count(&Inst) && !Inst.isTerminator() &&
+ !Inst.isEHPad() && !Inst.getType()->isTokenTy() &&
+ !Inst.isSwiftError()) {
+ if (!Inst.getType()->isVoidTy())
+ Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
+ Inst.eraseFromParent();
}
}
@@ -1232,7 +1230,7 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) {
assert(Fn && "Could not find function?");
std::vector<Attribute> Attrs;
- for (Attribute A : Fn->getAttributes().getFnAttributes())
+ for (Attribute A : Fn->getAttributes().getFnAttrs())
Attrs.push_back(A);
OldSize += Attrs.size();
diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp
index ca78735202fc..848baf90965b 100644
--- a/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -223,8 +223,8 @@ bool BugDriver::runPasses(Module &Program,
for (std::vector<std::string>::const_iterator I = pass_args.begin(),
E = pass_args.end();
I != E; ++I)
- Args.push_back(I->c_str());
- Args.push_back(Temp->TmpName.c_str());
+ Args.push_back(*I);
+ Args.push_back(Temp->TmpName);
Args.append(ExtraArgs.begin(), ExtraArgs.end());
LLVM_DEBUG(errs() << "\nAbout to run:\t";
diff --git a/llvm/tools/bugpoint/ToolRunner.cpp b/llvm/tools/bugpoint/ToolRunner.cpp
index b81ab07980dd..d3111e574e7c 100644
--- a/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/llvm/tools/bugpoint/ToolRunner.cpp
@@ -192,7 +192,7 @@ Expected<int> LLI::ExecuteProgram(const std::string &Bitcode,
outs() << "<lli>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = LLIArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = LLIArgs.size(); i != e; ++i) errs()
<< " " << LLIArgs[i];
errs() << "\n";);
return RunProgramWithTimeout(LLIPath, LLIArgs, InputFile, OutputFile,
@@ -460,7 +460,7 @@ Expected<CC::FileType> LLC::OutputCode(const std::string &Bitcode,
outs() << (UseIntegratedAssembler ? "<llc-ia>" : "<llc>");
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = LLCArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = LLCArgs.size(); i != e; ++i) errs()
<< " " << LLCArgs[i];
errs() << "\n";);
if (RunProgramWithTimeout(LLCPath, LLCArgs, "", "", "", Timeout, MemoryLimit))
@@ -578,7 +578,7 @@ Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
outs() << "<jit>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = JITArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = JITArgs.size(); i != e; ++i) errs()
<< " " << JITArgs[i];
errs() << "\n";);
LLVM_DEBUG(errs() << "\nSending output to " << OutputFile << "\n");
@@ -685,7 +685,7 @@ Expected<int> CC::ExecuteProgram(const std::string &ProgramFile,
outs() << "<CC>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = CCArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = CCArgs.size(); i != e; ++i) errs()
<< " " << CCArgs[i];
errs() << "\n";);
if (RunProgramWithTimeout(CCPath, CCArgs, "", "", ""))
@@ -733,7 +733,7 @@ Expected<int> CC::ExecuteProgram(const std::string &ProgramFile,
outs().flush();
LLVM_DEBUG(
errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = ProgramArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = ProgramArgs.size(); i != e; ++i) errs()
<< " " << ProgramArgs[i];
errs() << "\n";);
@@ -829,7 +829,7 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
outs() << "<CC>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = CCArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = CCArgs.size(); i != e; ++i) errs()
<< " " << CCArgs[i];
errs() << "\n";);
if (RunProgramWithTimeout(CCPath, CCArgs, "", "", ""))
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 6a1e2bae2096..9d80f062c8f9 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
@@ -36,6 +37,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
@@ -47,8 +49,8 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -82,6 +84,19 @@ TimeCompilations("time-compilations", cl::Hidden, cl::init(1u),
cl::value_desc("N"),
cl::desc("Repeat compilation N times for timing"));
+static cl::opt<bool> TimeTrace("time-trace", cl::desc("Record time trace"));
+
+static cl::opt<unsigned> TimeTraceGranularity(
+ "time-trace-granularity",
+ cl::desc(
+ "Minimum time granularity (in microseconds) traced by time profiler"),
+ cl::init(500), cl::Hidden);
+
+static cl::opt<std::string>
+ TimeTraceFile("time-trace-file",
+ cl::desc("Specify time trace file destination"),
+ cl::value_desc("filename"));
+
static cl::opt<std::string>
BinutilsVersion("binutils-version", cl::Hidden,
cl::desc("Produced object files can use all ELF features "
@@ -201,8 +216,7 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass(
static int compileModule(char **, LLVMContext &);
-LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Msg,
- StringRef Filename = "") {
+[[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") {
SmallString<256> Prefix;
if (!Filename.empty()) {
if (Filename == "-")
@@ -213,7 +227,7 @@ LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Msg,
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN static void reportError(Error Err, StringRef Filename) {
+[[noreturn]] static void reportError(Error Err, StringRef Filename) {
assert(Err);
handleAllErrors(createFileError(Filename, std::move(Err)),
[&](const ErrorInfoBase &EI) { reportError(EI.message()); });
@@ -330,8 +344,6 @@ int main(int argc, char **argv) {
// Enable debug stream buffering.
EnableDebugBuffering = true;
- LLVMContext Context;
-
// Initialize targets first, so that --version shows registered targets.
InitializeAllTargets();
InitializeAllTargetMCs();
@@ -366,6 +378,21 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
+ if (TimeTrace)
+ timeTraceProfilerInitialize(TimeTraceGranularity, argv[0]);
+ auto TimeTraceScopeExit = make_scope_exit([]() {
+ if (TimeTrace) {
+ if (auto E = timeTraceProfilerWrite(TimeTraceFile, OutputFilename)) {
+ handleAllErrors(std::move(E), [&](const StringError &SE) {
+ errs() << SE.getMessage() << "\n";
+ });
+ return;
+ }
+ timeTraceProfilerCleanup();
+ }
+ });
+
+ LLVMContext Context;
Context.setDiscardValueNames(DiscardValueNames);
// Set a diagnostic handler that doesn't exit on the first error
diff --git a/llvm/tools/lli/ChildTarget/ChildTarget.cpp b/llvm/tools/lli/ChildTarget/ChildTarget.cpp
index 5772baca1d09..cf1b03a141c5 100644
--- a/llvm/tools/lli/ChildTarget/ChildTarget.cpp
+++ b/llvm/tools/lli/ChildTarget/ChildTarget.cpp
@@ -1,69 +1,76 @@
-#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
-#include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h"
-#include "llvm/Support/Debug.h"
+//===----------- ChildTarget.cpp - Out-of-proc executor for lli -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple out-of-process executor for lli.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h"
#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Process.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
#include <sstream>
-#include "../RemoteJITUtils.h"
-
using namespace llvm;
using namespace llvm::orc;
-using namespace llvm::sys;
-
-#ifdef __x86_64__
-typedef OrcX86_64_SysV HostOrcArch;
-#else
-typedef OrcGenericABI HostOrcArch;
-#endif
ExitOnError ExitOnErr;
int main(int argc, char *argv[]) {
+#if LLVM_ENABLE_THREADS
if (argc != 3) {
errs() << "Usage: " << argv[0] << " <input fd> <output fd>\n";
return 1;
}
- ExitOnErr.setBanner(std::string(argv[0]) + ":");
-
- int InFD;
- int OutFD;
- {
- std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]);
- InFDStream >> InFD;
- OutFDStream >> OutFD;
- }
-
if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
errs() << "Error loading program symbols.\n";
return 1;
}
- auto SymbolLookup = [](const std::string &Name) {
- return RTDyldMemoryManager::getSymbolAddressInProcess(Name);
- };
+ ExitOnErr.setBanner(std::string(argv[0]) + ": ");
- auto RegisterEHFrames = [](uint8_t *Addr, uint32_t Size) {
- RTDyldMemoryManager::registerEHFramesInProcess(Addr, Size);
- };
-
- auto DeregisterEHFrames = [](uint8_t *Addr, uint32_t Size) {
- RTDyldMemoryManager::deregisterEHFramesInProcess(Addr, Size);
- };
-
- shared::FDRawByteChannel Channel(InFD, OutFD);
- typedef remote::OrcRemoteTargetServer<shared::FDRawByteChannel, HostOrcArch>
- JITServer;
- JITServer Server(Channel, SymbolLookup, RegisterEHFrames, DeregisterEHFrames);
-
- while (!Server.receivedTerminate())
- ExitOnErr(Server.handleOne());
+ int InFD = 0;
+ int OutFD = 0;
+ {
+ std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]);
+ InFDStream >> InFD;
+ OutFDStream >> OutFD;
+ }
- close(InFD);
- close(OutFD);
+ auto Server =
+ ExitOnErr(SimpleRemoteEPCServer::Create<FDSimpleRemoteEPCTransport>(
+ [](SimpleRemoteEPCServer::Setup &S) -> Error {
+ S.setDispatcher(
+ std::make_unique<SimpleRemoteEPCServer::ThreadDispatcher>());
+ S.bootstrapSymbols() =
+ SimpleRemoteEPCServer::defaultBootstrapSymbols();
+ S.services().push_back(
+ std::make_unique<rt_bootstrap::SimpleExecutorMemoryManager>());
+ return Error::success();
+ },
+ InFD, OutFD));
+
+ ExitOnErr(Server->waitForDisconnect());
return 0;
+
+#else
+ errs() << argv[0]
+ << " error: this tool requires threads, but LLVM was "
+ "built with LLVM_ENABLE_THREADS=Off\n";
+ return 1;
+#endif
}
diff --git a/llvm/tools/lli/RemoteJITUtils.h b/llvm/tools/lli/ForwardingMemoryManager.h
index cc8d034f62a5..99a545e60de4 100644
--- a/llvm/tools/lli/RemoteJITUtils.h
+++ b/llvm/tools/lli/ForwardingMemoryManager.h
@@ -10,21 +10,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
-#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
+#ifndef LLVM_TOOLS_LLI_FORWARDINGMEMORYMANAGER_H
+#define LLVM_TOOLS_LLI_FORWARDINGMEMORYMANAGER_H
-#include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include <mutex>
-
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#else
-#include <io.h>
-#endif
-
-// launch the remote process (see lli.cpp) and return a channel to it.
-std::unique_ptr<llvm::orc::shared::FDRawByteChannel> launchRemote();
namespace llvm {
@@ -70,9 +60,7 @@ public:
MemMgr->registerEHFrames(Addr, LoadAddr, Size);
}
- void deregisterEHFrames() override {
- MemMgr->deregisterEHFrames();
- }
+ void deregisterEHFrames() override { MemMgr->deregisterEHFrames(); }
bool finalizeMemory(std::string *ErrMsg = nullptr) override {
return MemMgr->finalizeMemory(ErrMsg);
@@ -90,8 +78,7 @@ public:
return Resolver->findSymbol(Name);
}
- JITSymbol
- findSymbolInLogicalDylib(const std::string &Name) override {
+ JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
return Resolver->findSymbolInLogicalDylib(Name);
}
@@ -100,17 +87,31 @@ private:
std::shared_ptr<LegacyJITSymbolResolver> Resolver;
};
-template <typename RemoteT>
class RemoteResolver : public LegacyJITSymbolResolver {
public:
-
- RemoteResolver(RemoteT &R) : R(R) {}
+ static Expected<std::unique_ptr<RemoteResolver>>
+ Create(orc::ExecutorProcessControl &EPC) {
+ auto DylibMgr =
+ orc::EPCGenericDylibManager::CreateWithDefaultBootstrapSymbols(EPC);
+ if (!DylibMgr)
+ return DylibMgr.takeError();
+ auto H = DylibMgr->open("", 0);
+ if (!H)
+ return H.takeError();
+ return std::unique_ptr<RemoteResolver>(
+ new RemoteResolver(std::move(*DylibMgr), std::move(*H)));
+ }
JITSymbol findSymbol(const std::string &Name) override {
- if (auto Addr = R.getSymbolAddress(Name))
- return JITSymbol(*Addr, JITSymbolFlags::Exported);
- else
- return Addr.takeError();
+ orc::RemoteSymbolLookupSet R;
+ R.push_back({std::move(Name), false});
+ if (auto Addrs = DylibMgr.lookup(H, R)) {
+ if (Addrs->size() != 1)
+ return make_error<StringError>("Unexpected remote lookup result",
+ inconvertibleErrorCode());
+ return JITSymbol(Addrs->front().getValue(), JITSymbolFlags::Exported);
+ } else
+ return Addrs.takeError();
}
JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
@@ -118,8 +119,13 @@ public:
}
public:
- RemoteT &R;
+ RemoteResolver(orc::EPCGenericDylibManager DylibMgr,
+ orc::tpctypes::DylibHandle H)
+ : DylibMgr(std::move(DylibMgr)), H(std::move(H)) {}
+
+ orc::EPCGenericDylibManager DylibMgr;
+ orc::tpctypes::DylibHandle H;
};
-}
+} // namespace llvm
-#endif
+#endif // LLVM_TOOLS_LLI_FORWARDINGMEMORYMANAGER_H
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index af614c01b9a8..d20daa07196b 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "ExecutionUtils.h"
-#include "RemoteJITUtils.h"
+#include "ForwardingMemoryManager.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeReader.h"
@@ -30,11 +30,12 @@
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
@@ -68,6 +69,12 @@
#include "llvm/Transforms/Instrumentation.h"
#include <cerrno>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
#ifdef __CYGWIN__
#include <cygwin/version.h>
#if defined(CYGWIN_VERSION_DLL_MAJOR) && CYGWIN_VERSION_DLL_MAJOR<1007
@@ -348,13 +355,12 @@ private:
return false;
std::string CacheSubdir = ModID.substr(PrefixLength);
-#if defined(_WIN32)
- // Transform "X:\foo" => "/X\foo" for convenience.
- if (isalpha(CacheSubdir[0]) && CacheSubdir[1] == ':') {
+ // Transform "X:\foo" => "/X\foo" for convenience on Windows.
+ if (is_style_windows(llvm::sys::path::Style::native) &&
+ isalpha(CacheSubdir[0]) && CacheSubdir[1] == ':') {
CacheSubdir[1] = CacheSubdir[0];
CacheSubdir[0] = '/';
}
-#endif
CacheName = CacheDir + CacheSubdir;
size_t pos = CacheName.rfind('.');
@@ -410,8 +416,7 @@ CodeGenOpt::Level getOptLevel() {
llvm_unreachable("Unrecognized opt level.");
}
-LLVM_ATTRIBUTE_NORETURN
-static void reportError(SMDiagnostic Err, const char *ProgName) {
+[[noreturn]] static void reportError(SMDiagnostic Err, const char *ProgName) {
Err.print(ProgName, errs());
exit(1);
}
@@ -419,6 +424,7 @@ static void reportError(SMDiagnostic Err, const char *ProgName) {
Error loadDylibs();
int runOrcJIT(const char *ProgName);
void disallowOrcOptions();
+Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote();
//===----------------------------------------------------------------------===//
// main Driver function
@@ -659,6 +665,10 @@ int main(int argc, char **argv, char * const *envp) {
#endif
}
+ std::unique_ptr<orc::ExecutorProcessControl> EPC =
+ RemoteMCJIT ? ExitOnErr(launchRemote())
+ : ExitOnErr(orc::SelfExecutorProcessControl::Create());
+
if (!RemoteMCJIT) {
// If the program doesn't explicitly call exit, we will need the Exit
// function later on to make an explicit call, so get the function now.
@@ -709,22 +719,10 @@ int main(int argc, char **argv, char * const *envp) {
// it couldn't. This is a limitation of the LLI implementation, not the
// MCJIT itself. FIXME.
- // Lanch the remote process and get a channel to it.
- std::unique_ptr<orc::shared::FDRawByteChannel> C = launchRemote();
- if (!C) {
- WithColor::error(errs(), argv[0]) << "failed to launch remote JIT.\n";
- exit(1);
- }
-
- // Create a remote target client running over the channel.
- llvm::orc::ExecutionSession ES(
- std::make_unique<orc::UnsupportedExecutorProcessControl>());
- ES.setErrorReporter([&](Error Err) { ExitOnErr(std::move(Err)); });
- typedef orc::remote::OrcRemoteTargetClient MyRemote;
- auto R = ExitOnErr(MyRemote::Create(*C, ES));
-
// Create a remote memory manager.
- auto RemoteMM = ExitOnErr(R->createRemoteMemoryManager());
+ auto RemoteMM = ExitOnErr(
+ orc::EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
+ *EPC));
// Forward MCJIT's memory manager calls to the remote memory manager.
static_cast<ForwardingMemoryManager*>(RTDyldMM)->setMemMgr(
@@ -732,16 +730,16 @@ int main(int argc, char **argv, char * const *envp) {
// Forward MCJIT's symbol resolution calls to the remote.
static_cast<ForwardingMemoryManager *>(RTDyldMM)->setResolver(
- std::make_unique<RemoteResolver<MyRemote>>(*R));
-
+ ExitOnErr(RemoteResolver::Create(*EPC)));
// Grab the target address of the JIT'd main function on the remote and call
// it.
// FIXME: argv and envp handling.
- JITTargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str());
+ auto Entry =
+ orc::ExecutorAddr(EE->getFunctionAddress(EntryFn->getName().str()));
EE->finalizeObject();
LLVM_DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at 0x"
- << format("%llx", Entry) << "\n");
- Result = ExitOnErr(R->callIntVoid(Entry));
+ << format("%llx", Entry.getValue()) << "\n");
+ Result = ExitOnErr(EPC->runAsMain(Entry, {}));
// Like static constructors, the remote target MCJIT support doesn't handle
// this yet. It could. FIXME.
@@ -752,7 +750,7 @@ int main(int argc, char **argv, char * const *envp) {
EE.reset();
// Signal the remote target that we're done JITing.
- ExitOnErr(R->terminateSession());
+ ExitOnErr(EPC->disconnect());
}
return Result;
@@ -1062,7 +1060,8 @@ int runOrcJIT(const char *ProgName) {
if (EPC) {
// ExecutorProcessControl-based execution with JITLink.
- Result = ExitOnErr(EPC->runAsMain(MainSym.getAddress(), InputArgv));
+ Result = ExitOnErr(
+ EPC->runAsMain(orc::ExecutorAddr(MainSym.getAddress()), InputArgv));
} else {
// Manual in-process execution with RuntimeDyld.
using MainFnTy = int(int, char *[]);
@@ -1099,7 +1098,7 @@ void disallowOrcOptions() {
}
}
-std::unique_ptr<orc::shared::FDRawByteChannel> launchRemote() {
+Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote() {
#ifndef LLVM_ON_UNIX
llvm_unreachable("launchRemote not supported on non-Unix platforms");
#else
@@ -1148,8 +1147,9 @@ std::unique_ptr<orc::shared::FDRawByteChannel> launchRemote() {
close(PipeFD[0][0]);
close(PipeFD[1][1]);
- // Return an RPC channel connected to our end of the pipes.
- return std::make_unique<orc::shared::FDRawByteChannel>(PipeFD[1][0],
- PipeFD[0][1]);
+ // Return a SimpleRemoteEPC instance connected to our end of the pipes.
+ return orc::SimpleRemoteEPC::Create<orc::FDSimpleRemoteEPCTransport>(
+ std::make_unique<llvm::orc::InPlaceTaskDispatcher>(),
+ llvm::orc::SimpleRemoteEPC::Setup(), PipeFD[1][0], PipeFD[0][1]);
#endif
}
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 0e1dce6bc2e8..175ec8d022c2 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -96,11 +96,11 @@ OPTIONS:
OPERATIONS:
d - delete [files] from the archive
m - move [files] in the archive
- p - print [files] found in the archive
+ p - print contents of [files] found in the archive
q - quick append [files] to the archive
r - replace or insert [files] into the archive
s - act as ranlib
- t - display contents of archive
+ t - display list of files in archive
x - extract [files] from the archive
MODIFIERS:
@@ -136,14 +136,14 @@ static unsigned MRILineNumber;
static bool ParsingMRIScript;
// Show the error plus the usage message, and exit.
-LLVM_ATTRIBUTE_NORETURN static void badUsage(Twine Error) {
+[[noreturn]] static void badUsage(Twine Error) {
WithColor::error(errs(), ToolName) << Error << "\n";
printHelpMessage();
exit(1);
}
// Show the error message and exit.
-LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
+[[noreturn]] static void fail(Twine Error) {
if (ParsingMRIScript) {
WithColor::error(errs(), ToolName)
<< "script line " << MRILineNumber << ": " << Error << "\n";
diff --git a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index f4851bfb2a9c..a238b0cf5922 100644
--- a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -11,8 +11,9 @@
// llvm-bcanalyzer [options] x.bc - Read LLVM bitcode from the x.bc file
//
// Options:
-// --help - Output information about command line switches
-// --dump - Dump low-level bitcode structure in readable format
+// --help - Output information about command line switches
+// --dump - Dump low-level bitcode structure in readable format
+// --dump-blockinfo - Dump the BLOCKINFO_BLOCK, when used with --dump
//
// This tool provides analytical information about a bitcode file. It is
// intended as an aid to developers of bitcode reading and writing software. It
@@ -47,6 +48,11 @@ static cl::opt<std::string> InputFilename(cl::Positional,
static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"),
cl::cat(BCAnalyzerCategory));
+static cl::opt<bool> DumpBlockinfo("dump-blockinfo",
+ cl::desc("Include BLOCKINFO details in low"
+ " level dump"),
+ cl::cat(BCAnalyzerCategory));
+
//===----------------------------------------------------------------------===//
// Bitcode specific analysis.
//===----------------------------------------------------------------------===//
@@ -114,6 +120,7 @@ int main(int argc, char **argv) {
O.Histogram = !NoHistogram;
O.Symbolic = !NonSymbolic;
O.ShowBinaryBlobs = ShowBinaryBlobs;
+ O.DumpBlockinfo = DumpBlockinfo;
ExitOnErr(BA.analyze(
Dump ? Optional<BCDumpOptions>(O) : Optional<BCDumpOptions>(None),
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index 02c0106cbc29..5c9ff41a2d5d 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -784,10 +784,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
// If path-equivalence was given and is a comma seperated pair then set
// PathRemapping.
- auto EquivPair = StringRef(PathRemap).split(',');
- if (!(EquivPair.first.empty() && EquivPair.second.empty()))
+ if (!PathRemap.empty()) {
+ auto EquivPair = StringRef(PathRemap).split(',');
+ if (EquivPair.first.empty() || EquivPair.second.empty()) {
+ error("invalid argument '" + PathRemap +
+ "', must be in format 'from,to'",
+ "-path-equivalence");
+ return 1;
+ }
+
PathRemapping = {std::string(EquivPair.first),
std::string(EquivPair.second)};
+ }
// If a demangler is supplied, check if it exists and register it.
if (!DemanglerOpts.empty()) {
diff --git a/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
index 6cf5d9285b90..0096a3d44d85 100644
--- a/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
+++ b/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -167,7 +167,7 @@ void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n'
- << "BFH:" << Summary.BranchCoverage.getCovered() << '\n';
+ << "BRH:" << Summary.BranchCoverage.getCovered() << '\n';
}
void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
diff --git a/llvm/tools/llvm-cov/CoverageFilters.cpp b/llvm/tools/llvm-cov/CoverageFilters.cpp
index da3b5214eec4..fac7518d7da2 100644
--- a/llvm/tools/llvm-cov/CoverageFilters.cpp
+++ b/llvm/tools/llvm-cov/CoverageFilters.cpp
@@ -21,7 +21,7 @@ bool NameCoverageFilter::matches(
const coverage::CoverageMapping &,
const coverage::FunctionRecord &Function) const {
StringRef FuncName = Function.Name;
- return FuncName.find(Name) != StringRef::npos;
+ return FuncName.contains(Name);
}
bool NameRegexCoverageFilter::matches(
diff --git a/llvm/tools/llvm-cxxdump/Error.cpp b/llvm/tools/llvm-cxxdump/Error.cpp
index 25317820409c..053d0e0764bc 100644
--- a/llvm/tools/llvm-cxxdump/Error.cpp
+++ b/llvm/tools/llvm-cxxdump/Error.cpp
@@ -12,6 +12,7 @@
#include "Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include <string>
using namespace llvm;
diff --git a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index f214288e951b..1430674dbadc 100644
--- a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -13,6 +13,7 @@
#include "llvm-cxxdump.h"
#include "Error.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
@@ -20,7 +21,6 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -49,7 +49,7 @@ static void error(std::error_code EC) {
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN static void error(Error Err) {
+[[noreturn]] static void error(Error Err) {
logAllUnhandledErrors(std::move(Err), WithColor::error(outs()),
"reading file: ");
outs().flush();
diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index d8bf8dbccce0..ccfaaa96deb2 100644
--- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -65,34 +65,27 @@ static void error(const Twine &Message) {
}
static std::string demangle(const std::string &Mangled) {
- int Status;
- std::string Prefix;
-
const char *DecoratedStr = Mangled.c_str();
if (StripUnderscore)
if (DecoratedStr[0] == '_')
++DecoratedStr;
- size_t DecoratedLength = strlen(DecoratedStr);
+ std::string Result;
+ if (nonMicrosoftDemangle(DecoratedStr, Result))
+ return Result;
+
+ std::string Prefix;
char *Undecorated = nullptr;
- if (Types ||
- ((DecoratedLength >= 2 && strncmp(DecoratedStr, "_Z", 2) == 0) ||
- (DecoratedLength >= 4 && strncmp(DecoratedStr, "___Z", 4) == 0)))
- Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, &Status);
+ if (Types)
+ Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, nullptr);
- if (!Undecorated &&
- (DecoratedLength > 6 && strncmp(DecoratedStr, "__imp_", 6) == 0)) {
+ if (!Undecorated && strncmp(DecoratedStr, "__imp_", 6) == 0) {
Prefix = "import thunk for ";
- Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
- }
-
- if (!Undecorated &&
- (DecoratedLength >= 2 && strncmp(DecoratedStr, "_R", 2) == 0)) {
- Undecorated = rustDemangle(DecoratedStr, nullptr, nullptr, &Status);
+ Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, nullptr);
}
- std::string Result(Undecorated ? Prefix + Undecorated : Mangled);
+ Result = Undecorated ? Prefix + Undecorated : Mangled;
free(Undecorated);
return Result;
}
@@ -128,7 +121,7 @@ static void SplitStringDelims(
static bool IsLegalItaniumChar(char C) {
// Itanium CXX ABI [External Names]p5.1.1:
// '$' and '.' in mangled names are reserved for private implementations.
- return isalnum(C) || C == '.' || C == '$' || C == '_';
+ return isAlnum(C) || C == '.' || C == '$' || C == '_';
}
// If 'Split' is true, then 'Mangled' is broken into individual words and each
diff --git a/llvm/tools/llvm-diff/DiffConsumer.cpp b/llvm/tools/llvm-diff/lib/DiffConsumer.cpp
index a703f42f14c3..b6eb71916acf 100644
--- a/llvm/tools/llvm-diff/DiffConsumer.cpp
+++ b/llvm/tools/llvm-diff/lib/DiffConsumer.cpp
@@ -134,6 +134,12 @@ void DiffConsumer::indent() {
while (N--) out << ' ';
}
+void DiffConsumer::reset() {
+ contexts.clear();
+ Differences = false;
+ Indent = 0;
+}
+
bool DiffConsumer::hadDifferences() const {
return Differences;
}
diff --git a/llvm/tools/llvm-diff/DiffConsumer.h b/llvm/tools/llvm-diff/lib/DiffConsumer.h
index f7b2f2450eec..08c3afcbe111 100644
--- a/llvm/tools/llvm-diff/DiffConsumer.h
+++ b/llvm/tools/llvm-diff/lib/DiffConsumer.h
@@ -78,6 +78,7 @@ class StringRef;
DiffConsumer()
: out(errs()), Differences(false), Indent(0) {}
+ void reset();
bool hadDifferences() const;
void enterContext(const Value *L, const Value *R) override;
void exitContext() override;
diff --git a/llvm/tools/llvm-diff/DiffLog.cpp b/llvm/tools/llvm-diff/lib/DiffLog.cpp
index d31a345d255c..d31a345d255c 100644
--- a/llvm/tools/llvm-diff/DiffLog.cpp
+++ b/llvm/tools/llvm-diff/lib/DiffLog.cpp
diff --git a/llvm/tools/llvm-diff/DiffLog.h b/llvm/tools/llvm-diff/lib/DiffLog.h
index d8b07b971198..d8b07b971198 100644
--- a/llvm/tools/llvm-diff/DiffLog.h
+++ b/llvm/tools/llvm-diff/lib/DiffLog.h
diff --git a/llvm/tools/llvm-diff/DifferenceEngine.cpp b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
index eb746cd2a865..eb746cd2a865 100644
--- a/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
diff --git a/llvm/tools/llvm-diff/DifferenceEngine.h b/llvm/tools/llvm-diff/lib/DifferenceEngine.h
index 436a35566360..436a35566360 100644
--- a/llvm/tools/llvm-diff/DifferenceEngine.h
+++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.h
diff --git a/llvm/tools/llvm-diff/llvm-diff.cpp b/llvm/tools/llvm-diff/llvm-diff.cpp
index 8a11179e741e..d9d19f35ffee 100644
--- a/llvm/tools/llvm-diff/llvm-diff.cpp
+++ b/llvm/tools/llvm-diff/llvm-diff.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "DiffLog.h"
-#include "DifferenceEngine.h"
+#include "lib/DiffLog.h"
+#include "lib/DifferenceEngine.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 19a971afa311..b237e014038d 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -29,6 +29,9 @@ constexpr int NumOfCoverageCategories = 12;
/// This is used for zero location coverage bucket.
constexpr unsigned ZeroCoverageBucket = 0;
+/// The UINT64_MAX is used as an indication of the overflow.
+constexpr uint64_t OverflowValue = std::numeric_limits<uint64_t>::max();
+
/// This represents variables DIE offsets.
using AbstractOriginVarsTy = llvm::SmallVector<uint64_t>;
/// This maps function DIE offset to its variables.
@@ -36,22 +39,43 @@ using AbstractOriginVarsTyMap = llvm::DenseMap<uint64_t, AbstractOriginVarsTy>;
/// This represents function DIE offsets containing an abstract_origin.
using FunctionsWithAbstractOriginTy = llvm::SmallVector<uint64_t>;
+/// This represents a data type for the stats and it helps us to
+/// detect an overflow.
+/// NOTE: This can be implemented as a template if there is an another type
+/// needing this.
+struct SaturatingUINT64 {
+ /// Number that represents the stats.
+ uint64_t Value;
+
+ SaturatingUINT64(uint64_t Value_) : Value(Value_) {}
+
+ void operator++(int) { return *this += 1; }
+ void operator+=(uint64_t Value_) {
+ if (Value != OverflowValue) {
+ if (Value < OverflowValue - Value_)
+ Value += Value_;
+ else
+ Value = OverflowValue;
+ }
+ }
+};
+
/// Holds statistics for one function (or other entity that has a PC range and
/// contains variables, such as a compile unit).
struct PerFunctionStats {
/// Number of inlined instances of this function.
- unsigned NumFnInlined = 0;
+ uint64_t NumFnInlined = 0;
/// Number of out-of-line instances of this function.
- unsigned NumFnOutOfLine = 0;
+ uint64_t NumFnOutOfLine = 0;
/// Number of inlined instances that have abstract origins.
- unsigned NumAbstractOrigins = 0;
+ uint64_t NumAbstractOrigins = 0;
/// Number of variables and parameters with location across all inlined
/// instances.
- unsigned TotalVarWithLoc = 0;
+ uint64_t TotalVarWithLoc = 0;
/// Number of constants with location across all inlined instances.
- unsigned ConstantMembers = 0;
+ uint64_t ConstantMembers = 0;
/// Number of arificial variables, parameters or members across all instances.
- unsigned NumArtificial = 0;
+ uint64_t NumArtificial = 0;
/// List of all Variables and parameters in this function.
StringSet<> VarsInFunction;
/// Compile units also cover a PC range, but have this flag set to false.
@@ -59,63 +83,63 @@ struct PerFunctionStats {
/// Function has source location information.
bool HasSourceLocation = false;
/// Number of function parameters.
- unsigned NumParams = 0;
+ uint64_t NumParams = 0;
/// Number of function parameters with source location.
- unsigned NumParamSourceLocations = 0;
+ uint64_t NumParamSourceLocations = 0;
/// Number of function parameters with type.
- unsigned NumParamTypes = 0;
+ uint64_t NumParamTypes = 0;
/// Number of function parameters with a DW_AT_location.
- unsigned NumParamLocations = 0;
+ uint64_t NumParamLocations = 0;
/// Number of local variables.
- unsigned NumLocalVars = 0;
+ uint64_t NumLocalVars = 0;
/// Number of local variables with source location.
- unsigned NumLocalVarSourceLocations = 0;
+ uint64_t NumLocalVarSourceLocations = 0;
/// Number of local variables with type.
- unsigned NumLocalVarTypes = 0;
+ uint64_t NumLocalVarTypes = 0;
/// Number of local variables with DW_AT_location.
- unsigned NumLocalVarLocations = 0;
+ uint64_t NumLocalVarLocations = 0;
};
/// Holds accumulated global statistics about DIEs.
struct GlobalStats {
/// Total number of PC range bytes covered by DW_AT_locations.
- unsigned TotalBytesCovered = 0;
+ SaturatingUINT64 TotalBytesCovered = 0;
/// Total number of parent DIE PC range bytes covered by DW_AT_Locations.
- unsigned ScopeBytesCovered = 0;
+ SaturatingUINT64 ScopeBytesCovered = 0;
/// Total number of PC range bytes in each variable's enclosing scope.
- unsigned ScopeBytes = 0;
+ SaturatingUINT64 ScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value).
- unsigned ScopeEntryValueBytesCovered = 0;
+ SaturatingUINT64 ScopeEntryValueBytesCovered = 0;
/// Total number of PC range bytes covered by DW_AT_locations of
/// formal parameters.
- unsigned ParamScopeBytesCovered = 0;
+ SaturatingUINT64 ParamScopeBytesCovered = 0;
/// Total number of PC range bytes in each parameter's enclosing scope.
- unsigned ParamScopeBytes = 0;
+ SaturatingUINT64 ParamScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value) (only for parameters).
- unsigned ParamScopeEntryValueBytesCovered = 0;
+ SaturatingUINT64 ParamScopeEntryValueBytesCovered = 0;
/// Total number of PC range bytes covered by DW_AT_locations (only for local
/// variables).
- unsigned LocalVarScopeBytesCovered = 0;
+ SaturatingUINT64 LocalVarScopeBytesCovered = 0;
/// Total number of PC range bytes in each local variable's enclosing scope.
- unsigned LocalVarScopeBytes = 0;
+ SaturatingUINT64 LocalVarScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value) (only for local variables).
- unsigned LocalVarScopeEntryValueBytesCovered = 0;
+ SaturatingUINT64 LocalVarScopeEntryValueBytesCovered = 0;
/// Total number of call site entries (DW_AT_call_file & DW_AT_call_line).
- unsigned CallSiteEntries = 0;
+ SaturatingUINT64 CallSiteEntries = 0;
/// Total number of call site DIEs (DW_TAG_call_site).
- unsigned CallSiteDIEs = 0;
+ SaturatingUINT64 CallSiteDIEs = 0;
/// Total number of call site parameter DIEs (DW_TAG_call_site_parameter).
- unsigned CallSiteParamDIEs = 0;
+ SaturatingUINT64 CallSiteParamDIEs = 0;
/// Total byte size of concrete functions. This byte size includes
/// inline functions contained in the concrete functions.
- unsigned FunctionSize = 0;
+ SaturatingUINT64 FunctionSize = 0;
/// Total byte size of inlined functions. This is the total number of bytes
/// for the top inline functions within concrete functions. This can help
/// tune the inline settings when compiling to match user expectations.
- unsigned InlineFunctionSize = 0;
+ SaturatingUINT64 InlineFunctionSize = 0;
};
/// Holds accumulated debug location statistics about local variables and
@@ -126,37 +150,37 @@ struct LocationStats {
/// of variables with the no debug location at all, but the last element
/// in the vector represents the number of fully covered variables within
/// its scope.
- std::vector<unsigned> VarParamLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> VarParamLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Map non debug entry values coverage.
- std::vector<unsigned> VarParamNonEntryValLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> VarParamNonEntryValLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// The debug location statistics for formal parameters.
- std::vector<unsigned> ParamLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> ParamLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Map non debug entry values coverage for formal parameters.
- std::vector<unsigned> ParamNonEntryValLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> ParamNonEntryValLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// The debug location statistics for local variables.
- std::vector<unsigned> LocalVarLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> LocalVarLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Map non debug entry values coverage for local variables.
- std::vector<unsigned> LocalVarNonEntryValLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> LocalVarNonEntryValLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Total number of local variables and function parameters processed.
- unsigned NumVarParam = 0;
+ SaturatingUINT64 NumVarParam = 0;
/// Total number of formal parameters processed.
- unsigned NumParam = 0;
+ SaturatingUINT64 NumParam = 0;
/// Total number of local variables processed.
- unsigned NumVar = 0;
+ SaturatingUINT64 NumVar = 0;
};
} // namespace
/// Collect debug location statistics for one DIE.
static void collectLocStats(uint64_t ScopeBytesCovered, uint64_t BytesInScope,
- std::vector<unsigned> &VarParamLocStats,
- std::vector<unsigned> &ParamLocStats,
- std::vector<unsigned> &LocalVarLocStats,
+ std::vector<SaturatingUINT64> &VarParamLocStats,
+ std::vector<SaturatingUINT64> &ParamLocStats,
+ std::vector<SaturatingUINT64> &LocalVarLocStats,
bool IsParam, bool IsLocalVar) {
auto getCoverageBucket = [ScopeBytesCovered, BytesInScope]() -> unsigned {
// No debug location at all for the variable.
@@ -173,11 +197,11 @@ static void collectLocStats(uint64_t ScopeBytesCovered, uint64_t BytesInScope,
unsigned CoverageBucket = getCoverageBucket();
- VarParamLocStats[CoverageBucket]++;
+ VarParamLocStats[CoverageBucket].Value++;
if (IsParam)
- ParamLocStats[CoverageBucket]++;
+ ParamLocStats[CoverageBucket].Value++;
else if (IsLocalVar)
- LocalVarLocStats[CoverageBucket]++;
+ LocalVarLocStats[CoverageBucket].Value++;
}
/// Construct an identifier for a given DIE from its Prefix, Name, DeclFileName
@@ -298,7 +322,7 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
U->getFormParams().Format);
// Consider the expression containing the DW_OP_entry_value as
// an entry value.
- return llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
+ return llvm::any_of(Expression, [](const DWARFExpression::Operation &Op) {
return Op.getCode() == dwarf::DW_OP_entry_value ||
Op.getCode() == dwarf::DW_OP_GNU_entry_value;
});
@@ -350,11 +374,11 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
// Calculate the debug location statistics.
if (BytesInScope && !DeferLocStats) {
- LocStats.NumVarParam++;
+ LocStats.NumVarParam.Value++;
if (IsParam)
- LocStats.NumParam++;
+ LocStats.NumParam.Value++;
else if (IsLocalVar)
- LocStats.NumVar++;
+ LocStats.NumVar.Value++;
collectLocStats(ScopeBytesCovered, BytesInScope, LocStats.VarParamLocStats,
LocStats.ParamLocStats, LocStats.LocalVarLocStats, IsParam,
@@ -389,7 +413,7 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
GlobalStats.LocalVarScopeEntryValueBytesCovered +=
BytesEntryValuesCovered;
}
- assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytes);
+ assert(GlobalStats.ScopeBytesCovered.Value <= GlobalStats.ScopeBytes.Value);
}
if (IsConstantMember) {
@@ -603,45 +627,78 @@ static void collectStatsRecursive(
/// Print human-readable output.
/// \{
static void printDatum(json::OStream &J, const char *Key, json::Value Value) {
- J.attribute(Key, Value);
+ if (Value == OverflowValue)
+ J.attribute(Key, "overflowed");
+ else
+ J.attribute(Key, Value);
+
LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
}
static void printLocationStats(json::OStream &J, const char *Key,
- std::vector<unsigned> &LocationStats) {
- J.attribute(
- (Twine(Key) + " with 0% of parent scope covered by DW_AT_location").str(),
- LocationStats[0]);
+ std::vector<SaturatingUINT64> &LocationStats) {
+ if (LocationStats[0].Value == OverflowValue)
+ J.attribute((Twine(Key) +
+ " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute(
+ (Twine(Key) + " with 0% of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[0].Value);
LLVM_DEBUG(
llvm::dbgs() << Key
<< " with 0% of parent scope covered by DW_AT_location: \\"
- << LocationStats[0] << '\n');
- J.attribute(
- (Twine(Key) + " with (0%,10%) of parent scope covered by DW_AT_location")
- .str(),
- LocationStats[1]);
+ << LocationStats[0].Value << '\n');
+
+ if (LocationStats[1].Value == OverflowValue)
+ J.attribute((Twine(Key) +
+ " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute((Twine(Key) +
+ " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[1].Value);
LLVM_DEBUG(llvm::dbgs()
<< Key
<< " with (0%,10%) of parent scope covered by DW_AT_location: "
- << LocationStats[1] << '\n');
+ << LocationStats[1].Value << '\n');
+
for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) {
- J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
- Twine(i * 10) + "%) of parent scope covered by DW_AT_location")
- .str(),
- LocationStats[i]);
+ if (LocationStats[i].Value == OverflowValue)
+ J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
+ Twine(i * 10) +
+ "%) of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
+ Twine(i * 10) +
+ "%) of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[i].Value);
LLVM_DEBUG(llvm::dbgs()
<< Key << " with [" << (i - 1) * 10 << "%," << i * 10
<< "%) of parent scope covered by DW_AT_location: "
- << LocationStats[i]);
+ << LocationStats[i].Value);
}
- J.attribute(
- (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
- .str(),
- LocationStats[NumOfCoverageCategories - 1]);
+ if (LocationStats[NumOfCoverageCategories - 1].Value == OverflowValue)
+ J.attribute(
+ (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute(
+ (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[NumOfCoverageCategories - 1].Value);
LLVM_DEBUG(
llvm::dbgs() << Key
<< " with 100% of parent scope covered by DW_AT_location: "
- << LocationStats[NumOfCoverageCategories - 1]);
+ << LocationStats[NumOfCoverageCategories - 1].Value);
}
static void printSectionSizes(json::OStream &J, const SectionSizes &Sizes) {
@@ -750,31 +807,31 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
/// version.
- unsigned Version = 8;
- unsigned VarParamTotal = 0;
- unsigned VarParamUnique = 0;
- unsigned VarParamWithLoc = 0;
- unsigned NumFunctions = 0;
- unsigned NumInlinedFunctions = 0;
- unsigned NumFuncsWithSrcLoc = 0;
- unsigned NumAbstractOrigins = 0;
- unsigned ParamTotal = 0;
- unsigned ParamWithType = 0;
- unsigned ParamWithLoc = 0;
- unsigned ParamWithSrcLoc = 0;
- unsigned LocalVarTotal = 0;
- unsigned LocalVarWithType = 0;
- unsigned LocalVarWithSrcLoc = 0;
- unsigned LocalVarWithLoc = 0;
+ unsigned Version = 9;
+ SaturatingUINT64 VarParamTotal = 0;
+ SaturatingUINT64 VarParamUnique = 0;
+ SaturatingUINT64 VarParamWithLoc = 0;
+ SaturatingUINT64 NumFunctions = 0;
+ SaturatingUINT64 NumInlinedFunctions = 0;
+ SaturatingUINT64 NumFuncsWithSrcLoc = 0;
+ SaturatingUINT64 NumAbstractOrigins = 0;
+ SaturatingUINT64 ParamTotal = 0;
+ SaturatingUINT64 ParamWithType = 0;
+ SaturatingUINT64 ParamWithLoc = 0;
+ SaturatingUINT64 ParamWithSrcLoc = 0;
+ SaturatingUINT64 LocalVarTotal = 0;
+ SaturatingUINT64 LocalVarWithType = 0;
+ SaturatingUINT64 LocalVarWithSrcLoc = 0;
+ SaturatingUINT64 LocalVarWithLoc = 0;
for (auto &Entry : Statistics) {
PerFunctionStats &Stats = Entry.getValue();
- unsigned TotalVars = Stats.VarsInFunction.size() *
+ uint64_t TotalVars = Stats.VarsInFunction.size() *
(Stats.NumFnInlined + Stats.NumFnOutOfLine);
// Count variables in global scope.
if (!Stats.IsFunction)
TotalVars =
Stats.NumLocalVars + Stats.ConstantMembers + Stats.NumArtificial;
- unsigned Constants = Stats.ConstantMembers;
+ uint64_t Constants = Stats.ConstantMembers;
VarParamWithLoc += Stats.TotalVarWithLoc + Constants;
VarParamTotal += TotalVars;
VarParamUnique += Stats.VarsInFunction.size();
@@ -806,70 +863,72 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(J, "file", Filename.str());
printDatum(J, "format", FormatName);
- printDatum(J, "#functions", NumFunctions);
- printDatum(J, "#functions with location", NumFuncsWithSrcLoc);
- printDatum(J, "#inlined functions", NumInlinedFunctions);
- printDatum(J, "#inlined functions with abstract origins", NumAbstractOrigins);
+ printDatum(J, "#functions", NumFunctions.Value);
+ printDatum(J, "#functions with location", NumFuncsWithSrcLoc.Value);
+ printDatum(J, "#inlined functions", NumInlinedFunctions.Value);
+ printDatum(J, "#inlined functions with abstract origins",
+ NumAbstractOrigins.Value);
// This includes local variables and formal parameters.
- printDatum(J, "#unique source variables", VarParamUnique);
- printDatum(J, "#source variables", VarParamTotal);
- printDatum(J, "#source variables with location", VarParamWithLoc);
+ printDatum(J, "#unique source variables", VarParamUnique.Value);
+ printDatum(J, "#source variables", VarParamTotal.Value);
+ printDatum(J, "#source variables with location", VarParamWithLoc.Value);
- printDatum(J, "#call site entries", GlobalStats.CallSiteEntries);
- printDatum(J, "#call site DIEs", GlobalStats.CallSiteDIEs);
- printDatum(J, "#call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
+ printDatum(J, "#call site entries", GlobalStats.CallSiteEntries.Value);
+ printDatum(J, "#call site DIEs", GlobalStats.CallSiteDIEs.Value);
+ printDatum(J, "#call site parameter DIEs",
+ GlobalStats.CallSiteParamDIEs.Value);
printDatum(J, "sum_all_variables(#bytes in parent scope)",
- GlobalStats.ScopeBytes);
+ GlobalStats.ScopeBytes.Value);
printDatum(J,
"sum_all_variables(#bytes in any scope covered by DW_AT_location)",
- GlobalStats.TotalBytesCovered);
+ GlobalStats.TotalBytesCovered.Value);
printDatum(J,
"sum_all_variables(#bytes in parent scope covered by "
"DW_AT_location)",
- GlobalStats.ScopeBytesCovered);
+ GlobalStats.ScopeBytesCovered.Value);
printDatum(J,
"sum_all_variables(#bytes in parent scope covered by "
"DW_OP_entry_value)",
- GlobalStats.ScopeEntryValueBytesCovered);
+ GlobalStats.ScopeEntryValueBytesCovered.Value);
printDatum(J, "sum_all_params(#bytes in parent scope)",
- GlobalStats.ParamScopeBytes);
+ GlobalStats.ParamScopeBytes.Value);
printDatum(J,
"sum_all_params(#bytes in parent scope covered by DW_AT_location)",
- GlobalStats.ParamScopeBytesCovered);
+ GlobalStats.ParamScopeBytesCovered.Value);
printDatum(J,
"sum_all_params(#bytes in parent scope covered by "
"DW_OP_entry_value)",
- GlobalStats.ParamScopeEntryValueBytesCovered);
+ GlobalStats.ParamScopeEntryValueBytesCovered.Value);
printDatum(J, "sum_all_local_vars(#bytes in parent scope)",
- GlobalStats.LocalVarScopeBytes);
+ GlobalStats.LocalVarScopeBytes.Value);
printDatum(J,
"sum_all_local_vars(#bytes in parent scope covered by "
"DW_AT_location)",
- GlobalStats.LocalVarScopeBytesCovered);
+ GlobalStats.LocalVarScopeBytesCovered.Value);
printDatum(J,
"sum_all_local_vars(#bytes in parent scope covered by "
"DW_OP_entry_value)",
- GlobalStats.LocalVarScopeEntryValueBytesCovered);
+ GlobalStats.LocalVarScopeEntryValueBytesCovered.Value);
- printDatum(J, "#bytes within functions", GlobalStats.FunctionSize);
+ printDatum(J, "#bytes within functions", GlobalStats.FunctionSize.Value);
printDatum(J, "#bytes within inlined functions",
- GlobalStats.InlineFunctionSize);
+ GlobalStats.InlineFunctionSize.Value);
// Print the summary for formal parameters.
- printDatum(J, "#params", ParamTotal);
- printDatum(J, "#params with source location", ParamWithSrcLoc);
- printDatum(J, "#params with type", ParamWithType);
- printDatum(J, "#params with binary location", ParamWithLoc);
+ printDatum(J, "#params", ParamTotal.Value);
+ printDatum(J, "#params with source location", ParamWithSrcLoc.Value);
+ printDatum(J, "#params with type", ParamWithType.Value);
+ printDatum(J, "#params with binary location", ParamWithLoc.Value);
// Print the summary for local variables.
- printDatum(J, "#local vars", LocalVarTotal);
- printDatum(J, "#local vars with source location", LocalVarWithSrcLoc);
- printDatum(J, "#local vars with type", LocalVarWithType);
- printDatum(J, "#local vars with binary location", LocalVarWithLoc);
+ printDatum(J, "#local vars", LocalVarTotal.Value);
+ printDatum(J, "#local vars with source location", LocalVarWithSrcLoc.Value);
+ printDatum(J, "#local vars with type", LocalVarWithType.Value);
+ printDatum(J, "#local vars with binary location", LocalVarWithLoc.Value);
// Print the debug section sizes.
printSectionSizes(J, Sizes);
@@ -877,32 +936,34 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
// Print the location statistics for variables (includes local variables
// and formal parameters).
printDatum(J, "#variables processed by location statistics",
- LocStats.NumVarParam);
+ LocStats.NumVarParam.Value);
printLocationStats(J, "#variables", LocStats.VarParamLocStats);
printLocationStats(J, "#variables - entry values",
LocStats.VarParamNonEntryValLocStats);
// Print the location statistics for formal parameters.
- printDatum(J, "#params processed by location statistics", LocStats.NumParam);
+ printDatum(J, "#params processed by location statistics",
+ LocStats.NumParam.Value);
printLocationStats(J, "#params", LocStats.ParamLocStats);
printLocationStats(J, "#params - entry values",
LocStats.ParamNonEntryValLocStats);
// Print the location statistics for local variables.
printDatum(J, "#local vars processed by location statistics",
- LocStats.NumVar);
+ LocStats.NumVar.Value);
printLocationStats(J, "#local vars", LocStats.LocalVarLocStats);
printLocationStats(J, "#local vars - entry values",
LocStats.LocalVarNonEntryValLocStats);
J.objectEnd();
OS << '\n';
- LLVM_DEBUG(
- llvm::dbgs() << "Total Availability: "
- << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
- << "%\n";
- llvm::dbgs() << "PC Ranges covered: "
- << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
- GlobalStats.ScopeBytes)
- << "%\n");
+ LLVM_DEBUG(llvm::dbgs() << "Total Availability: "
+ << (int)std::round((VarParamWithLoc.Value * 100.0) /
+ VarParamTotal.Value)
+ << "%\n";
+ llvm::dbgs() << "PC Ranges covered: "
+ << (int)std::round(
+ (GlobalStats.ScopeBytesCovered.Value * 100.0) /
+ GlobalStats.ScopeBytes.Value)
+ << "%\n");
return true;
}
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index a324ff710af5..9eeaddf14928 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -169,7 +169,7 @@ static list<std::string>
static alias FindAlias("f", desc("Alias for --find."), aliasopt(Find),
cl::NotHidden);
static opt<bool> IgnoreCase("ignore-case",
- desc("Ignore case distinctions when searching."),
+ desc("Ignore case distinctions when using --name."),
value_desc("i"), cat(DwarfDumpCategory));
static alias IgnoreCaseAlias("i", desc("Alias for --ignore-case."),
aliasopt(IgnoreCase), cl::NotHidden);
@@ -192,11 +192,12 @@ static opt<std::string>
cl::value_desc("filename"), cat(DwarfDumpCategory));
static alias OutputFilenameAlias("out-file", desc("Alias for -o."),
aliasopt(OutputFilename));
-static opt<bool>
- UseRegex("regex",
- desc("Treat any <pattern> strings as regular expressions when "
- "searching instead of just as an exact string match."),
- cat(DwarfDumpCategory));
+static opt<bool> UseRegex(
+ "regex",
+ desc("Treat any <pattern> strings as regular "
+ "expressions when searching with --name. If --ignore-case is also "
+ "specified, the regular expression becomes case-insensitive."),
+ cat(DwarfDumpCategory));
static alias RegexAlias("x", desc("Alias for --regex"), aliasopt(UseRegex),
cl::NotHidden);
static opt<bool>
@@ -536,8 +537,9 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
};
if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
if (filterArch(*Obj)) {
- std::unique_ptr<DWARFContext> DICtx =
- DWARFContext::create(*Obj, nullptr, "", RecoverableErrorHandler);
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
+ *Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
+ RecoverableErrorHandler);
if (!HandleObj(*Obj, *DICtx, Filename, OS))
Result = false;
}
@@ -548,8 +550,9 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
auto &Obj = **MachOOrErr;
if (filterArch(Obj)) {
- std::unique_ptr<DWARFContext> DICtx =
- DWARFContext::create(Obj, nullptr, "", RecoverableErrorHandler);
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
+ Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
+ RecoverableErrorHandler);
if (!HandleObj(Obj, *DICtx, ObjName, OS))
Result = false;
}
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 1f583728c141..4b6f7bc8dd34 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -20,10 +20,10 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index 45bfa84fb826..995ebacacb87 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -227,6 +227,10 @@ static cl::opt<bool> ListDependentLibrariesOnly(
"Instead of running LTO, list the dependent libraries in each IR file"),
cl::cat(LTOCategory));
+static cl::opt<bool> QueryHasCtorDtor(
+ "query-hasCtorDtor", cl::init(false),
+ cl::desc("Queries LTOModule::hasCtorDtor() on each IR file"));
+
static cl::opt<bool>
SetMergedModule("set-merged-module", cl::init(false),
cl::desc("Use the first input module as the merged module"),
@@ -371,7 +375,7 @@ static void printIndexStats() {
ExitOnErr(getModuleSummaryIndexForFile(Filename));
// Skip files without a module summary.
if (!Index)
- report_fatal_error(Filename + " does not contain an index");
+ report_fatal_error(Twine(Filename) + " does not contain an index");
unsigned Calls = 0, Refs = 0, Functions = 0, Alias = 0, Globals = 0;
for (auto &Summaries : *Index) {
@@ -394,22 +398,27 @@ static void printIndexStats() {
}
}
-/// List symbols in each IR file.
+/// Load each IR file and dump certain information based on active flags.
///
/// The main point here is to provide lit-testable coverage for the LTOModule
-/// functionality that's exposed by the C API to list symbols. Moreover, this
-/// provides testing coverage for modules that have been created in their own
-/// contexts.
-static void listSymbols(const TargetOptions &Options) {
+/// functionality that's exposed by the C API. Moreover, this provides testing
+/// coverage for modules that have been created in their own contexts.
+static void testLTOModule(const TargetOptions &Options) {
for (auto &Filename : InputFilenames) {
std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<LTOModule> Module =
getLocalLTOModule(Filename, Buffer, Options);
- // List the symbols.
- outs() << Filename << ":\n";
- for (int I = 0, E = Module->getSymbolCount(); I != E; ++I)
- outs() << Module->getSymbolName(I) << "\n";
+ if (ListSymbolsOnly) {
+ // List the symbols.
+ outs() << Filename << ":\n";
+ for (int I = 0, E = Module->getSymbolCount(); I != E; ++I)
+ outs() << Module->getSymbolName(I) << "\n";
+ }
+ if (QueryHasCtorDtor)
+ outs() << Filename
+ << ": hasCtorDtor = " << (Module->hasCtorDtor() ? "true" : "false")
+ << "\n";
}
}
@@ -478,6 +487,10 @@ static void createCombinedModuleSummaryIndex() {
ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Filename)));
ExitOnErr(readModuleSummaryIndex(*MB, CombinedIndex, NextModuleId++));
}
+ // In order to use this index for testing, specifically import testing, we
+ // need to update any indirect call edges created from SamplePGO, so that they
+ // point to the correct GUIDs.
+ updateIndirectCalls(CombinedIndex);
std::error_code EC;
assert(!OutputFilename.empty());
raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
@@ -939,8 +952,8 @@ int main(int argc, char **argv) {
// set up the TargetOptions for the machine
TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple());
- if (ListSymbolsOnly) {
- listSymbols(Options);
+ if (ListSymbolsOnly || QueryHasCtorDtor) {
+ testLTOModule(Options);
return 0;
}
@@ -1050,7 +1063,7 @@ int main(int argc, char **argv) {
CodeGen.addMustPreserveSymbol(KeptDSOSyms[i]);
// Set cpu and attrs strings for the default target/subtarget.
- CodeGen.setCpu(codegen::getMCPU().c_str());
+ CodeGen.setCpu(codegen::getMCPU());
CodeGen.setOptLevel(OptLevel - '0');
CodeGen.setAttrs(codegen::getMAttrs());
@@ -1084,8 +1097,7 @@ int main(int argc, char **argv) {
error("writing merged module failed.");
}
- auto AddStream =
- [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+ auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
std::string PartFilename = OutputFilename;
if (Parallelism != 1)
PartFilename += "." + utostr(Task);
@@ -1095,7 +1107,7 @@ int main(int argc, char **argv) {
std::make_unique<raw_fd_ostream>(PartFilename, EC, sys::fs::OF_None);
if (EC)
error("error opening the file '" + PartFilename + "': " + EC.message());
- return std::make_unique<lto::NativeObjectStream>(std::move(S));
+ return std::make_unique<CachedFileStream>(std::move(S));
};
if (!CodeGen.compileOptimized(AddStream, Parallelism))
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index c0bff1eabee2..6f6f6c1ed90f 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -19,10 +19,10 @@
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/LTO/Caching.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
@@ -362,23 +362,23 @@ static int run(int argc, char **argv) {
if (HasErrors)
return 1;
- auto AddStream =
- [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+ auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
std::string Path = OutputFilename + "." + utostr(Task);
std::error_code EC;
auto S = std::make_unique<raw_fd_ostream>(Path, EC, sys::fs::OF_None);
check(EC, Path);
- return std::make_unique<lto::NativeObjectStream>(std::move(S));
+ return std::make_unique<CachedFileStream>(std::move(S));
};
auto AddBuffer = [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
*AddStream(Task)->OS << MB->getBuffer();
};
- NativeObjectCache Cache;
+ FileCache Cache;
if (!CacheDir.empty())
- Cache = check(localCache(CacheDir, AddBuffer), "failed to create cache");
+ Cache = check(localCache("ThinLTO", "Thin", CacheDir, AddBuffer),
+ "failed to create cache");
check(Lto.run(AddStream, Cache), "LTO::run failed");
return 0;
diff --git a/llvm/tools/llvm-mc/Disassembler.cpp b/llvm/tools/llvm-mc/Disassembler.cpp
index 16ab99548adf..ac55d05db192 100644
--- a/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/llvm/tools/llvm-mc/Disassembler.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -156,7 +156,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
}
// Set up initial section manually here
- Streamer.InitSections(false);
+ Streamer.initSections(false, STI);
bool ErrorOccurred = false;
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index 24c601b7033f..4e5a12e53a6b 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -26,6 +26,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileUtilities.h"
@@ -34,7 +35,6 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
@@ -571,7 +571,7 @@ int main(int argc, char **argv) {
MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ false));
if (NoExecStack)
- Str->InitSections(true);
+ Str->initSections(true, *STI);
}
// Use Assembler information for parsing.
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
index 6ad2a65592b9..6cdd0ba797aa 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -114,7 +114,7 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions(
// Need to initialize an MCTargetStreamer otherwise
// certain asm directives will cause a segfault.
- // Using nulls() so that anything emitted by the MCTagetStreamer
+ // Using nulls() so that anything emitted by the MCTargetStreamer
// doesn't show up in the llvm-mca output.
raw_ostream &OSRef = nulls();
formatted_raw_ostream FOSRef(OSRef);
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.h b/llvm/tools/llvm-mca/CodeRegionGenerator.h
index 1c11784ca3fb..ac02131b2f39 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.h
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.h
@@ -20,9 +20,9 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include <memory>
namespace llvm {
@@ -37,7 +37,7 @@ protected:
CodeRegionGenerator &operator=(const CodeRegionGenerator &) = delete;
public:
- CodeRegionGenerator(SourceMgr &SM) : Regions(SM) {}
+ CodeRegionGenerator(llvm::SourceMgr &SM) : Regions(SM) {}
virtual ~CodeRegionGenerator();
virtual Expected<const CodeRegions &>
parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0;
@@ -54,7 +54,7 @@ class AsmCodeRegionGenerator final : public CodeRegionGenerator {
unsigned AssemblerDialect; // This is set during parsing.
public:
- AsmCodeRegionGenerator(const Target &T, SourceMgr &SM, MCContext &C,
+ AsmCodeRegionGenerator(const Target &T, llvm::SourceMgr &SM, MCContext &C,
const MCAsmInfo &A, const MCSubtargetInfo &S,
const MCInstrInfo &I)
: CodeRegionGenerator(SM), TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I),
diff --git a/llvm/tools/llvm-mca/PipelinePrinter.cpp b/llvm/tools/llvm-mca/PipelinePrinter.cpp
index 955b825891fa..9d06c6a19395 100644
--- a/llvm/tools/llvm-mca/PipelinePrinter.cpp
+++ b/llvm/tools/llvm-mca/PipelinePrinter.cpp
@@ -14,7 +14,6 @@
#include "PipelinePrinter.h"
#include "CodeRegion.h"
#include "Views/InstructionView.h"
-#include "Views/View.h"
namespace llvm {
namespace mca {
diff --git a/llvm/tools/llvm-mca/PipelinePrinter.h b/llvm/tools/llvm-mca/PipelinePrinter.h
index 1365f75be0f5..fd262f0a8a5d 100644
--- a/llvm/tools/llvm-mca/PipelinePrinter.h
+++ b/llvm/tools/llvm-mca/PipelinePrinter.h
@@ -16,11 +16,11 @@
#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
#define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/Context.h"
#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/View.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "llvm-mca"
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 81b582f74a6b..cfd12691c03f 100644
--- a/llvm/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -33,9 +33,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
#include <map>
namespace llvm {
diff --git a/llvm/tools/llvm-mca/Views/InstructionView.h b/llvm/tools/llvm-mca/Views/InstructionView.h
index 1843b0513dfc..cec07eef6a80 100644
--- a/llvm/tools/llvm-mca/Views/InstructionView.h
+++ b/llvm/tools/llvm-mca/Views/InstructionView.h
@@ -15,7 +15,7 @@
#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
-#include "Views/View.h"
+#include "llvm/MCA/View.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
index ec5c5f431e12..3de2a22ac32d 100644
--- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
+++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -35,9 +35,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
namespace llvm {
namespace mca {
diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 86b46e93aa7c..ed3736c64515 100644
--- a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
+++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -28,8 +28,8 @@
#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
-#include "Views/View.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
#include <map>
namespace llvm {
diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
index 66f4b0011866..9d2f71c13e5a 100644
--- a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
+++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -36,9 +36,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
#include <map>
namespace llvm {
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h
index e2c7cfd19e94..21f3fad23ca0 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.h
+++ b/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -28,9 +28,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
-#include "Views/View.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -71,12 +71,6 @@ class SummaryView : public View {
// Used to map resource indices to actual processor resource IDs.
llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
- // Compute the reciprocal throughput for the analyzed code block.
- // The reciprocal block throughput is computed as the MAX between:
- // - NumMicroOps / DispatchWidth
- // - Total Resource Cycles / #Units (for every resource consumed).
- double getBlockRThroughput() const;
-
/// Compute the data we want to print out in the object DV.
void collectData(DisplayValues &DV) const;
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 9a949761bb75..5c05edbdea68 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
double AverageTime1, AverageTime2, AverageTime3;
AverageTime1 =
- (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions;
- AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions;
- AverageTime3 =
- (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions;
+ (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
+ AverageTime2 =
+ (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
+ AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
+ CumulativeExecutions;
OS << Executions;
OS.PadToColumn(13);
@@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
BufferSize);
- OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
OS.PadToColumn(20);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
BufferSize);
- OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
OS.PadToColumn(27);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
CumulativeExecutions,
getSubTargetInfo().getSchedModel().MicroOpBufferSize);
- OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
if (OS.has_colors())
OS.resetColor();
@@ -295,8 +296,10 @@ void TimelineView::printTimeline(raw_ostream &OS) const {
// attribute is set correctly whether or not it is greater
// than timeline-max-cycles so we can use that to ensure
// we don't early exit because of a 0 latency instruction.
- if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0)
+ if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) {
+ FOS << "Truncated display due to cycle limit\n";
return;
+ }
unsigned SourceIndex = IID % Source.size();
printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
diff --git a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp b/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp
deleted file mode 100644
index a655f3faf1bf..000000000000
--- a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements methods from the AMDGPUCustomBehaviour class.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUCustomBehaviour.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "llvm/Support/WithColor.h"
-
-namespace llvm {
-namespace mca {
-
-AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
- const SourceMgr &SrcMgr,
- const MCInstrInfo &MCII)
- : CustomBehaviour(STI, SrcMgr, MCII) {}
-
-unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
- const InstRef &IR) {
- return 0;
-}
-
-} // namespace mca
-} // namespace llvm
diff --git a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h b/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h
deleted file mode 100644
index 0dd21c7b4c44..000000000000
--- a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===------------------- AMDGPUCustomBehaviour.h ----------------*-C++ -* -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the AMDGPUCustomBehaviour class which inherits from
-/// CustomBehaviour.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_LIB_AMDGPU_AMDGPUCUSTOMBEHAVIOUR_H
-#define LLVM_TOOLS_LLVM_MCA_LIB_AMDGPU_AMDGPUCUSTOMBEHAVIOUR_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MCA/CustomBehaviour.h"
-#include "llvm/Support/TargetParser.h"
-
-namespace llvm {
-namespace mca {
-
-class AMDGPUInstrPostProcess : public InstrPostProcess {
-public:
- AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
- : InstrPostProcess(STI, MCII) {}
-
- ~AMDGPUInstrPostProcess() {}
-
- void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) override {}
-};
-
-class AMDGPUCustomBehaviour : public CustomBehaviour {
-public:
- AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const SourceMgr &SrcMgr,
- const MCInstrInfo &MCII);
-
- ~AMDGPUCustomBehaviour() {}
-
- /// This method is used to determine if an instruction
- /// should be allowed to be dispatched. The return value is
- /// how many cycles until the instruction can be dispatched.
- /// This method is called after MCA has already checked for
- /// register and hardware dependencies so this method should only
- /// implement custom behaviour and dependencies that are not picked up
- /// by MCA naturally.
- unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
- const InstRef &IR) override;
-};
-
-} // namespace mca
-} // namespace llvm
-
-#endif /* LLVM_TOOLS_LLVM_MCA_LIB_AMDGPU_AMDGPUCUSTOMBEHAVIOUR_H */
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index a473cd8f1719..0b58ca377ce1 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -32,9 +32,6 @@
#include "Views/SchedulerStatistics.h"
#include "Views/SummaryView.h"
#include "Views/TimelineView.h"
-#ifdef HAS_AMDGPU
-#include "lib/AMDGPU/AMDGPUCustomBehaviour.h"
-#endif
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -43,6 +40,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/MCA/CodeEmitter.h"
#include "llvm/MCA/Context.h"
#include "llvm/MCA/CustomBehaviour.h"
@@ -59,7 +57,6 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
@@ -293,39 +290,6 @@ static void processViewOptions(bool IsOutOfOrder) {
processOptionImpl(PrintRetireStats, Default);
}
-std::unique_ptr<mca::InstrPostProcess>
-createInstrPostProcess(const Triple &TheTriple, const MCSubtargetInfo &STI,
- const MCInstrInfo &MCII) {
- // Might be a good idea to have a separate flag so that InstrPostProcess
- // can be used with or without CustomBehaviour
- if (DisableCustomBehaviour)
- return std::make_unique<mca::InstrPostProcess>(STI, MCII);
-#ifdef HAS_AMDGPU
- if (TheTriple.isAMDGPU())
- return std::make_unique<mca::AMDGPUInstrPostProcess>(STI, MCII);
-#endif
- return std::make_unique<mca::InstrPostProcess>(STI, MCII);
-}
-
-std::unique_ptr<mca::CustomBehaviour>
-createCustomBehaviour(const Triple &TheTriple, const MCSubtargetInfo &STI,
- const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII) {
- // Build the appropriate CustomBehaviour object for the current target.
- // The CustomBehaviour class should never depend on the source code,
- // but it can depend on the list of mca::Instruction and any classes
- // that can be built using just the target info. If you need extra
- // information from the source code or the list of MCInst, consider
- // adding that information to the mca::Instruction class and setting
- // it during InstrBuilder::createInstruction().
- if (DisableCustomBehaviour)
- return std::make_unique<mca::CustomBehaviour>(STI, SrcMgr, MCII);
-#ifdef HAS_AMDGPU
- if (TheTriple.isAMDGPU())
- return std::make_unique<mca::AMDGPUCustomBehaviour>(STI, SrcMgr, MCII);
-#endif
- return std::make_unique<mca::CustomBehaviour>(STI, SrcMgr, MCII);
-}
-
// Returns true on success.
static bool runPipeline(mca::Pipeline &P) {
// Handle pipeline errors here.
@@ -344,6 +308,7 @@ int main(int argc, char **argv) {
InitializeAllTargetInfos();
InitializeAllTargetMCs();
InitializeAllAsmParsers();
+ InitializeAllTargetMCAs();
// Enable printing of available targets when flag --version is specified.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
@@ -532,8 +497,18 @@ int main(int argc, char **argv) {
// Lower the MCInst sequence into an mca::Instruction sequence.
ArrayRef<MCInst> Insts = Region->getInstructions();
mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts);
- std::unique_ptr<mca::InstrPostProcess> IPP =
- createInstrPostProcess(TheTriple, *STI, *MCII);
+
+ std::unique_ptr<mca::InstrPostProcess> IPP;
+ if (!DisableCustomBehaviour) {
+ IPP = std::unique_ptr<mca::InstrPostProcess>(
+ TheTarget->createInstrPostProcess(*STI, *MCII));
+ }
+ if (!IPP)
+ // If the target doesn't have its own IPP implemented (or the
+ // -disable-cb flag is set) then we use the base class
+ // (which does nothing).
+ IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
+
std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
for (const MCInst &MCI : Insts) {
Expected<std::unique_ptr<mca::Instruction>> Inst =
@@ -602,14 +577,35 @@ int main(int argc, char **argv) {
// the source code (but it can depend on the list of
// mca::Instruction or any objects that can be reconstructed
// from the target information).
- std::unique_ptr<mca::CustomBehaviour> CB =
- createCustomBehaviour(TheTriple, *STI, S, *MCII);
+ std::unique_ptr<mca::CustomBehaviour> CB;
+ if (!DisableCustomBehaviour)
+ CB = std::unique_ptr<mca::CustomBehaviour>(
+ TheTarget->createCustomBehaviour(*STI, S, *MCII));
+ if (!CB)
+ // If the target doesn't have its own CB implemented (or the -disable-cb
+ // flag is set) then we use the base class (which does nothing).
+ CB = std::make_unique<mca::CustomBehaviour>(*STI, S, *MCII);
// Create a basic pipeline simulating an out-of-order backend.
auto P = MCA.createDefaultPipeline(PO, S, *CB);
mca::PipelinePrinter Printer(*P, *Region, RegionIdx, *STI, PO);
+ // Targets can define their own custom Views that exist within their
+ // /lib/Target/ directory so that the View can utilize their CustomBehaviour
+ // or other backend symbols / functionality that are not already exposed
+ // through one of the MC-layer classes. These Views will be initialized
+ // using the CustomBehaviour::getViews() variants.
+ // If a target makes a custom View that does not depend on their target
+ // CB or their backend, they should put the View within
+ // /tools/llvm-mca/Views/ instead.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getStartViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
// When we output JSON, we add a view that contains the instructions
// and CPU resource information.
if (PrintJson) {
@@ -635,6 +631,16 @@ int main(int argc, char **argv) {
Printer.addView(std::make_unique<mca::InstructionInfoView>(
*STI, *MCII, CE, ShowEncoding, Insts, *IP));
+ // Fetch custom Views that are to be placed after the InstructionInfoView.
+ // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
+ // for more info.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getPostInstrInfoViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
if (PrintDispatchStats)
Printer.addView(std::make_unique<mca::DispatchStatistics>());
@@ -659,6 +665,16 @@ int main(int argc, char **argv) {
TimelineMaxCycles));
}
+ // Fetch custom Views that are to be placed after all other Views.
+ // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
+ // for more info.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getEndViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
if (!runPipeline(*P))
return 1;
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index ffb427a3f2bd..0864985377ce 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -64,7 +64,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -530,7 +530,7 @@ struct DarwinStabName {
uint8_t NType;
const char *Name;
};
-static const struct DarwinStabName DarwinStabNames[] = {
+const struct DarwinStabName DarwinStabNames[] = {
{MachO::N_GSYM, "GSYM"},
{MachO::N_FNAME, "FNAME"},
{MachO::N_FUN, "FUN"},
@@ -599,22 +599,16 @@ static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
outs() << format(" %02x", NType);
}
-static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
- if (StripUnderscore && !Name.empty() && Name[0] == '_')
- Name = Name.substr(1);
+static Optional<std::string> demangle(const std::string &Name,
+ bool StripUnderscore) {
+ const char *Mangled = Name.c_str();
+ if (StripUnderscore && Mangled[0] == '_')
+ Mangled = Mangled + 1;
- if (!Name.startswith("_Z"))
- return None;
-
- int Status;
- char *Undecorated =
- itaniumDemangle(Name.str().c_str(), nullptr, nullptr, &Status);
- if (Status != 0)
- return None;
-
- std::string S(Undecorated);
- free(Undecorated);
- return S;
+ std::string Demangled;
+ if (nonMicrosoftDemangle(Mangled, Demangled))
+ return Demangled;
+ return None;
}
static bool symbolIsDefined(const NMSymbol &Sym) {
@@ -1575,90 +1569,11 @@ static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) {
}
}
-namespace {
-struct SymbolVersion {
- std::string Name;
- bool IsDefault;
-};
-} // namespace
-
-template <class ELFT>
-static Expected<std::vector<SymbolVersion>>
-readSymbolVersionsELF(const ELFFile<ELFT> &Obj, StringRef FileName,
- ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
- using Elf_Shdr = typename ELFT::Shdr;
-
- // We called sections() earlier, so can't fail here.
- typename ELFT::ShdrRange SectionsOrErr = cantFail(Obj.sections());
- const Elf_Shdr *SymVerSec = nullptr;
- const Elf_Shdr *SymVerNeedSec = nullptr;
- const Elf_Shdr *SymVerDefSec = nullptr;
- for (const Elf_Shdr &Sec : SectionsOrErr) {
- if (Sec.sh_type == ELF::SHT_GNU_versym)
- SymVerSec = &Sec;
- else if (Sec.sh_type == ELF::SHT_GNU_verdef)
- SymVerDefSec = &Sec;
- else if (Sec.sh_type == ELF::SHT_GNU_verneed)
- SymVerNeedSec = &Sec;
- }
-
- if (!SymVerSec)
- return std::vector<SymbolVersion>{};
-
- Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
- Obj.loadVersionMap(SymVerNeedSec, SymVerDefSec);
- if (!MapOrErr)
- return MapOrErr.takeError();
-
- std::vector<SymbolVersion> Ret;
- size_t I = 0;
- for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
- ++I;
- Expected<const typename ELFT::Versym *> VerEntryOrErr =
- Obj.template getEntry<typename ELFT::Versym>(*SymVerSec, I);
- if (!VerEntryOrErr)
- return createError("unable to read an entry with index " + Twine(I) +
- " from " + describe(Obj, *SymVerSec) + ": " +
- toString(VerEntryOrErr.takeError()));
-
- Expected<uint32_t> FlagsOrErr = It->getFlags();
- if (!FlagsOrErr)
- return createError("unable to read flags for symbol with index " +
- Twine(I) + ": " + toString(FlagsOrErr.takeError()));
-
- bool IsDefault;
- Expected<StringRef> VerOrErr = Obj.getSymbolVersionByIndex(
- (*VerEntryOrErr)->vs_index, IsDefault, *MapOrErr,
- (*FlagsOrErr) & SymbolRef::SF_Undefined);
- if (!VerOrErr)
- return createError("unable to get a version for entry " + Twine(I) +
- " of " + describe(Obj, *SymVerSec) + ": " +
- toString(VerOrErr.takeError()));
-
- Ret.push_back({(*VerOrErr).str(), IsDefault});
- }
-
- return Ret;
-}
-
-static Expected<std::vector<SymbolVersion>>
-readSymbolVersionsELF(const ELFObjectFileBase &Obj,
- ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
- if (const auto *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
- return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
- else if (const auto *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
- return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
- else if (const auto *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
- return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
- return readSymbolVersionsELF(cast<ELF64BEObjectFile>(&Obj)->getELFFile(),
- Obj.getFileName(), Symbols);
-}
-
static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
StringRef ArchiveName = {},
StringRef ArchitectureName = {}) {
auto Symbols = Obj.symbols();
- std::vector<SymbolVersion> SymbolVersions;
+ std::vector<VersionEntry> SymbolVersions;
if (DynamicSyms) {
const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
if (!E) {
@@ -1667,8 +1582,8 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
Symbols = E->getDynamicSymbolIterators();
- if (Expected<std::vector<SymbolVersion>> VersionsOrErr =
- readSymbolVersionsELF(*E, Symbols))
+ if (Expected<std::vector<VersionEntry>> VersionsOrErr =
+ E->readDynsymVersions())
SymbolVersions = std::move(*VersionsOrErr);
else
WithColor::warning(errs(), ToolName)
@@ -1738,7 +1653,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
if (!SymbolVersions.empty() && !SymbolVersions[I].Name.empty())
S.Name +=
- (SymbolVersions[I].IsDefault ? "@@" : "@") + SymbolVersions[I].Name;
+ (SymbolVersions[I].IsVerDef ? "@@" : "@") + SymbolVersions[I].Name;
S.Sym = Sym;
SymbolList.push_back(S);
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index e50ac2e12e2f..38c9cd09433b 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -94,7 +94,7 @@ static Error addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
return Error::success();
}
-static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
+static uint32_t flagsToCharacteristics(SectionFlag AllFlags, uint32_t OldChar) {
// Need to preserve alignment flags.
const uint32_t PreserveMask =
IMAGE_SCN_ALIGN_1BYTES | IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_ALIGN_4BYTES |
@@ -107,8 +107,7 @@ static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
// Setup new section characteristics based on the flags provided in command
// line.
- uint32_t NewCharacteristics =
- (Sec.Header.Characteristics & PreserveMask) | IMAGE_SCN_MEM_READ;
+ uint32_t NewCharacteristics = (OldChar & PreserveMask) | IMAGE_SCN_MEM_READ;
if ((AllFlags & SectionFlag::SecAlloc) && !(AllFlags & SectionFlag::SecLoad))
NewCharacteristics |= IMAGE_SCN_CNT_UNINITIALIZED_DATA;
@@ -128,7 +127,7 @@ static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
if (AllFlags & SectionFlag::SecExclude)
NewCharacteristics |= IMAGE_SCN_LNK_REMOVE;
- Sec.Header.Characteristics = NewCharacteristics;
+ return NewCharacteristics;
}
static Error handleArgs(const CommonConfig &Config, Object &Obj) {
@@ -226,7 +225,8 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
for (Section &Sec : Obj.getMutableSections()) {
const auto It = Config.SetSectionFlags.find(Sec.Name);
if (It != Config.SetSectionFlags.end())
- setSectionFlags(Sec, It->second.NewFlags);
+ Sec.Header.Characteristics = flagsToCharacteristics(
+ It->second.NewFlags, Sec.Header.Characteristics);
}
for (const auto &Flag : Config.AddSection) {
@@ -238,11 +238,18 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
return createFileError(FileName, errorCodeToError(BufOrErr.getError()));
auto Buf = std::move(*BufOrErr);
+ uint32_t Characteristics;
+ const auto It = Config.SetSectionFlags.find(SecName);
+ if (It != Config.SetSectionFlags.end())
+ Characteristics = flagsToCharacteristics(It->second.NewFlags, 0);
+ else
+ Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES;
+
addSection(
Obj, SecName,
makeArrayRef(reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
Buf->getBufferSize()),
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES);
+ Characteristics);
}
if (!Config.AddGnuDebugLink.empty())
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp
index 1c17b8408ee7..ec2628c7eca9 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -107,7 +107,7 @@ void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
// section,
// remove those as well as nothing will include them (and we can't
// leave them dangling).
- if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) == 1)
+ if (RemovedSections.contains(Sym.AssociativeComdatTargetSectionId))
AssociatedSections.insert(Sym.TargetSectionId);
return RemovedSections.contains(Sym.TargetSectionId);
});
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
index e7be64faab65..cbd0e4261238 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -406,7 +406,7 @@ Expected<uint32_t> COFFWriter::virtualAddressToFileAddress(uint32_t RVA) {
// the debug_directory structs in there, and set the PointerToRawData field
// in all of them, according to their new physical location in the file.
Error COFFWriter::patchDebugDirectory() {
- if (Obj.DataDirectories.size() < DEBUG_DIRECTORY)
+ if (Obj.DataDirectories.size() <= DEBUG_DIRECTORY)
return Error::success();
const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
if (Dir->Size <= 0)
@@ -426,15 +426,13 @@ Error COFFWriter::patchDebugDirectory() {
uint8_t *End = Ptr + Dir->Size;
while (Ptr < End) {
debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr);
- if (!Debug->AddressOfRawData)
- return createStringError(object_error::parse_failed,
- "debug directory payload outside of "
- "mapped sections not supported");
- if (Expected<uint32_t> FilePosOrErr =
- virtualAddressToFileAddress(Debug->AddressOfRawData))
- Debug->PointerToRawData = *FilePosOrErr;
- else
- return FilePosOrErr.takeError();
+ if (Debug->PointerToRawData) {
+ if (Expected<uint32_t> FilePosOrErr =
+ virtualAddressToFileAddress(Debug->AddressOfRawData))
+ Debug->PointerToRawData = *FilePosOrErr;
+ else
+ return FilePosOrErr.takeError();
+ }
Ptr += sizeof(debug_directory);
Offset += sizeof(debug_directory);
}
diff --git a/llvm/tools/llvm-objcopy/CommonConfig.h b/llvm/tools/llvm-objcopy/CommonConfig.h
index 131ce5c59114..ea39a6da2ba5 100644
--- a/llvm/tools/llvm-objcopy/CommonConfig.h
+++ b/llvm/tools/llvm-objcopy/CommonConfig.h
@@ -210,14 +210,7 @@ struct CommonConfig {
// Repeated options
std::vector<StringRef> AddSection;
std::vector<StringRef> DumpSection;
- std::vector<StringRef> RPathToAdd;
- std::vector<StringRef> RPathToPrepend;
- DenseMap<StringRef, StringRef> RPathsToUpdate;
- DenseMap<StringRef, StringRef> InstallNamesToUpdate;
- DenseSet<StringRef> RPathsToRemove;
-
- // install-name-tool's id option
- Optional<StringRef> SharedLibId;
+ std::vector<StringRef> UpdateSection;
// Section matchers
NameMatcher KeepSection;
@@ -239,23 +232,13 @@ struct CommonConfig {
StringMap<SectionFlagsUpdate> SetSectionFlags;
StringMap<StringRef> SymbolsToRename;
- // ELF entry point address expression. The input parameter is an entry point
- // address in the input ELF file. The entry address in the output file is
- // calculated with EntryExpr(input_address), when either --set-start or
- // --change-start is used.
- std::function<uint64_t(uint64_t)> EntryExpr;
-
// Symbol info specified by --add-symbol option.
std::vector<NewSymbolInfo> SymbolsToAdd;
// Boolean options
- bool AllowBrokenLinks = false;
bool DeterministicArchives = true;
bool ExtractDWO = false;
bool ExtractMainPartition = false;
- bool KeepFileSymbols = false;
- bool KeepUndefined = false;
- bool LocalizeHidden = false;
bool OnlyKeepDebug = false;
bool PreserveDates = false;
bool StripAll = false;
@@ -264,12 +247,9 @@ struct CommonConfig {
bool StripDebug = false;
bool StripNonAlloc = false;
bool StripSections = false;
- bool StripSwiftSymbols = false;
bool StripUnneeded = false;
bool Weaken = false;
bool DecompressDebugSections = false;
- // install-name-tool's --delete_all_rpaths
- bool RemoveAllRpaths = false;
DebugCompressionType CompressionType = DebugCompressionType::None;
};
diff --git a/llvm/tools/llvm-objcopy/ConfigManager.cpp b/llvm/tools/llvm-objcopy/ConfigManager.cpp
index 9f7d06b99418..2e5cf9357a52 100644
--- a/llvm/tools/llvm-objcopy/ConfigManager.cpp
+++ b/llvm/tools/llvm-objcopy/ConfigManager.cpp
@@ -39,7 +39,7 @@ enum ObjcopyID {
#include "ObjcopyOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info ObjcopyInfoTable[] = {
+const opt::OptTable::Info ObjcopyInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{OBJCOPY_##PREFIX, \
@@ -79,7 +79,7 @@ enum InstallNameToolID {
#include "InstallNameToolOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info InstallNameToolInfoTable[] = {
+const opt::OptTable::Info InstallNameToolInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{INSTALL_NAME_TOOL_##PREFIX, \
@@ -116,7 +116,7 @@ enum BitcodeStripID {
#include "BitcodeStripOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info BitcodeStripInfoTable[] = {
+const opt::OptTable::Info BitcodeStripInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{BITCODE_STRIP_##PREFIX, \
@@ -153,7 +153,7 @@ enum StripID {
#include "StripOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info StripInfoTable[] = {
+const opt::OptTable::Info StripInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{STRIP_##PREFIX, NAME, HELPTEXT, \
@@ -559,27 +559,21 @@ static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
}
Expected<const ELFConfig &> ConfigManager::getELFConfig() const {
- if (Common.StripSwiftSymbols || Common.KeepUndefined)
- return createStringError(llvm::errc::invalid_argument,
- "option not supported by llvm-objcopy for ELF");
-
return ELF;
}
Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
- if (Common.AllowBrokenLinks || !Common.SplitDWO.empty() ||
- !Common.SymbolsPrefix.empty() || !Common.AllocSectionsPrefix.empty() ||
- !Common.DumpSection.empty() || !Common.KeepSection.empty() ||
- ELF.NewSymbolVisibility || !Common.SymbolsToGlobalize.empty() ||
+ if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() ||
+ !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() ||
!Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
!Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
!Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- Common.ExtractDWO || Common.LocalizeHidden || Common.PreserveDates ||
- Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
- Common.StripSwiftSymbols || Common.KeepUndefined || Common.Weaken ||
+ Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
+ Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
Common.DecompressDebugSections ||
Common.DiscardMode == DiscardType::Locals ||
- !Common.SymbolsToAdd.empty() || Common.EntryExpr) {
+ !Common.SymbolsToAdd.empty()) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for COFF");
}
@@ -588,19 +582,18 @@ Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
}
Expected<const MachOConfig &> ConfigManager::getMachOConfig() const {
- if (Common.AllowBrokenLinks || !Common.SplitDWO.empty() ||
- !Common.SymbolsPrefix.empty() || !Common.AllocSectionsPrefix.empty() ||
- !Common.KeepSection.empty() || ELF.NewSymbolVisibility ||
+ if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() ||
!Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() ||
!Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() ||
!Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
!Common.UnneededSymbolsToRemove.empty() ||
!Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
- Common.ExtractDWO || Common.LocalizeHidden || Common.PreserveDates ||
- Common.StripAllGNU || Common.StripDWO || Common.StripNonAlloc ||
- Common.StripSections || Common.Weaken || Common.DecompressDebugSections ||
- Common.StripUnneeded || Common.DiscardMode == DiscardType::Locals ||
- !Common.SymbolsToAdd.empty() || Common.EntryExpr) {
+ Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU ||
+ Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
+ Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded ||
+ Common.DiscardMode == DiscardType::Locals ||
+ !Common.SymbolsToAdd.empty()) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for MachO");
}
@@ -612,8 +605,7 @@ Expected<const WasmConfig &> ConfigManager::getWasmConfig() const {
if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition ||
!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
!Common.AllocSectionsPrefix.empty() ||
- Common.DiscardMode != DiscardType::None || ELF.NewSymbolVisibility ||
- !Common.SymbolsToAdd.empty() || !Common.RPathToAdd.empty() ||
+ Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() ||
!Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() ||
!Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() ||
!Common.UnneededSymbolsToRemove.empty() ||
@@ -684,6 +676,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
ELFConfig &ELFConfig = ConfigMgr.ELF;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
Config.InputFilename = Positional[0];
Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
if (InputArgs.hasArg(OBJCOPY_target) &&
@@ -887,6 +880,17 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
"bad format for --add-section: missing file name");
Config.AddSection.push_back(ArgValue);
}
+ for (auto Arg : InputArgs.filtered(OBJCOPY_update_section)) {
+ StringRef ArgValue(Arg->getValue());
+ if (!ArgValue.contains('='))
+ return createStringError(errc::invalid_argument,
+ "bad format for --update-section: missing '='");
+ if (ArgValue.split("=").second.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --update-section: missing file name");
+ Config.UpdateSection.push_back(ArgValue);
+ }
for (auto *Arg : InputArgs.filtered(OBJCOPY_dump_section)) {
StringRef Value(Arg->getValue());
if (Value.split('=').second.empty())
@@ -905,7 +909,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
Config.ExtractMainPartition =
InputArgs.hasArg(OBJCOPY_extract_main_partition);
- Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
+ ELFConfig.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals))
Config.DiscardMode =
@@ -913,13 +917,13 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
? DiscardType::All
: DiscardType::Locals;
Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
- Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
- Config.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined);
+ ELFConfig.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
+ MachOConfig.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined);
Config.DecompressDebugSections =
InputArgs.hasArg(OBJCOPY_decompress_debug_sections);
if (Config.DiscardMode == DiscardType::All) {
Config.StripDebug = true;
- Config.KeepFileSymbols = true;
+ ELFConfig.KeepFileSymbols = true;
}
for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create(
@@ -993,7 +997,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
Config.SymbolsToAdd.push_back(*SymInfo);
}
- Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
+ ELFConfig.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
Config.DeterministicArchives = InputArgs.hasFlag(
OBJCOPY_enable_deterministic_archives,
@@ -1013,16 +1017,16 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
return createStringError(
EAddr.getError(), "bad entry point address: '%s'", Arg->getValue());
- Config.EntryExpr = [EAddr](uint64_t) { return *EAddr; };
+ ELFConfig.EntryExpr = [EAddr](uint64_t) { return *EAddr; };
} else if (Arg->getOption().matches(OBJCOPY_change_start)) {
auto EIncr = getAsInteger<int64_t>(Arg->getValue());
if (!EIncr)
return createStringError(EIncr.getError(),
"bad entry point increment: '%s'",
Arg->getValue());
- auto Expr = Config.EntryExpr ? std::move(Config.EntryExpr)
- : [](uint64_t A) { return A; };
- Config.EntryExpr = [Expr, EIncr](uint64_t EAddr) {
+ auto Expr = ELFConfig.EntryExpr ? std::move(ELFConfig.EntryExpr)
+ : [](uint64_t A) { return A; };
+ ELFConfig.EntryExpr = [Expr, EIncr](uint64_t EAddr) {
return Expr(EAddr) + *EIncr;
};
}
@@ -1057,6 +1061,7 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
DriverConfig DC;
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
InstallNameToolOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
llvm::opt::InputArgList InputArgs =
@@ -1087,27 +1092,27 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
}
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath))
- Config.RPathToAdd.push_back(Arg->getValue());
+ MachOConfig.RPathToAdd.push_back(Arg->getValue());
for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_prepend_rpath))
- Config.RPathToPrepend.push_back(Arg->getValue());
+ MachOConfig.RPathToPrepend.push_back(Arg->getValue());
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) {
StringRef RPath = Arg->getValue();
// Cannot add and delete the same rpath at the same time.
- if (is_contained(Config.RPathToAdd, RPath))
+ if (is_contained(MachOConfig.RPathToAdd, RPath))
return createStringError(
errc::invalid_argument,
"cannot specify both -add_rpath '%s' and -delete_rpath '%s'",
RPath.str().c_str(), RPath.str().c_str());
- if (is_contained(Config.RPathToPrepend, RPath))
+ if (is_contained(MachOConfig.RPathToPrepend, RPath))
return createStringError(
errc::invalid_argument,
"cannot specify both -prepend_rpath '%s' and -delete_rpath '%s'",
RPath.str().c_str(), RPath.str().c_str());
- Config.RPathsToRemove.insert(RPath);
+ MachOConfig.RPathsToRemove.insert(RPath);
}
for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_rpath)) {
@@ -1118,51 +1123,52 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
// Cannot specify duplicate -rpath entries
auto It1 = find_if(
- Config.RPathsToUpdate,
+ MachOConfig.RPathsToUpdate,
[&Match](const DenseMap<StringRef, StringRef>::value_type &OldNew) {
return Match(OldNew.getFirst()) || Match(OldNew.getSecond());
});
- if (It1 != Config.RPathsToUpdate.end())
+ if (It1 != MachOConfig.RPathsToUpdate.end())
return createStringError(errc::invalid_argument,
"cannot specify both -rpath '" +
It1->getFirst() + "' '" + It1->getSecond() +
"' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -delete_rpath and -rpath
- auto It2 = find_if(Config.RPathsToRemove, Match);
- if (It2 != Config.RPathsToRemove.end())
+ auto It2 = find_if(MachOConfig.RPathsToRemove, Match);
+ if (It2 != MachOConfig.RPathsToRemove.end())
return createStringError(errc::invalid_argument,
"cannot specify both -delete_rpath '" + *It2 +
"' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -add_rpath and -rpath
- auto It3 = find_if(Config.RPathToAdd, Match);
- if (It3 != Config.RPathToAdd.end())
+ auto It3 = find_if(MachOConfig.RPathToAdd, Match);
+ if (It3 != MachOConfig.RPathToAdd.end())
return createStringError(errc::invalid_argument,
"cannot specify both -add_rpath '" + *It3 +
"' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -prepend_rpath and -rpath.
- auto It4 = find_if(Config.RPathToPrepend, Match);
- if (It4 != Config.RPathToPrepend.end())
+ auto It4 = find_if(MachOConfig.RPathToPrepend, Match);
+ if (It4 != MachOConfig.RPathToPrepend.end())
return createStringError(errc::invalid_argument,
"cannot specify both -prepend_rpath '" + *It4 +
"' and -rpath '" + Old + "' '" + New + "'");
- Config.RPathsToUpdate.insert({Old, New});
+ MachOConfig.RPathsToUpdate.insert({Old, New});
}
if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) {
- Config.SharedLibId = Arg->getValue();
- if (Config.SharedLibId->empty())
+ MachOConfig.SharedLibId = Arg->getValue();
+ if (MachOConfig.SharedLibId->empty())
return createStringError(errc::invalid_argument,
"cannot specify an empty id");
}
for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change))
- Config.InstallNamesToUpdate.insert({Arg->getValue(0), Arg->getValue(1)});
+ MachOConfig.InstallNamesToUpdate.insert(
+ {Arg->getValue(0), Arg->getValue(1)});
- Config.RemoveAllRpaths =
+ MachOConfig.RemoveAllRpaths =
InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths);
SmallVector<StringRef, 2> Positional;
@@ -1281,6 +1287,8 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
+ ELFConfig &ELFConfig = ConfigMgr.ELF;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard))
return createStringError(errc::invalid_argument,
@@ -1292,7 +1300,7 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
: InputArgs.hasArg(STRIP_wildcard)
? MatchStyle::Wildcard
: MatchStyle::Literal;
- Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
+ ELFConfig.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
if (InputArgs.hasArg(STRIP_discard_all, STRIP_discard_locals))
@@ -1305,10 +1313,10 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all))
Config.StripAll = Arg->getOption().getID() == STRIP_strip_all;
Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu);
- Config.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols);
+ MachOConfig.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols);
Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug);
- Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
- Config.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined);
+ ELFConfig.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
+ MachOConfig.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined);
for (auto Arg : InputArgs.filtered(STRIP_keep_section))
if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create(
@@ -1337,7 +1345,7 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
if (Config.DiscardMode == DiscardType::All) {
Config.StripDebug = true;
- Config.KeepFileSymbols = true;
+ ELFConfig.KeepFileSymbols = true;
}
Config.DeterministicArchives =
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h b/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
index 42d407da17ff..229a8d61fb83 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
+++ b/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
@@ -20,6 +20,16 @@ namespace objcopy {
// ELF specific configuration for copying/stripping a single file.
struct ELFConfig {
uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT;
+
+ // ELF entry point address expression. The input parameter is an entry point
+ // address in the input ELF file. The entry address in the output file is
+ // calculated with EntryExpr(input_address), when either --set-start or
+ // --change-start is used.
+ std::function<uint64_t(uint64_t)> EntryExpr;
+
+ bool AllowBrokenLinks = false;
+ bool KeepFileSymbols = false;
+ bool LocalizeHidden = false;
};
} // namespace objcopy
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index 986eeca6256c..16de84a961b5 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -204,8 +204,7 @@ static bool isCompressable(const SectionBase &Sec) {
}
static Error replaceDebugSections(
- Object &Obj, SectionPred &RemovePred,
- function_ref<bool(const SectionBase &)> ShouldReplace,
+ Object &Obj, function_ref<bool(const SectionBase &)> ShouldReplace,
function_ref<Expected<SectionBase *>(const SectionBase *)> AddSection) {
// Build a list of the debug sections we are going to replace.
// We can't call `AddSection` while iterating over sections,
@@ -225,17 +224,7 @@ static Error replaceDebugSections(
FromTo[S] = *NewSection;
}
- // Now we want to update the target sections of relocation
- // sections. Also we will update the relocations themselves
- // to update the symbol references.
- for (auto &Sec : Obj.sections())
- Sec.replaceSectionReferences(FromTo);
-
- RemovePred = [ShouldReplace, RemovePred](const SectionBase &Sec) {
- return ShouldReplace(Sec) || RemovePred(Sec);
- };
-
- return Error::success();
+ return Obj.replaceSections(FromTo);
}
static bool isUnneededSymbol(const Symbol &Sym) {
@@ -244,7 +233,8 @@ static bool isUnneededSymbol(const Symbol &Sym) {
Sym.Type != STT_SECTION;
}
-static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
+static Error updateAndRemoveSymbols(const CommonConfig &Config,
+ const ELFConfig &ELFConfig, Object &Obj) {
// TODO: update or remove symbols only if there is an option that affects
// them.
if (!Obj.SymbolTable)
@@ -254,7 +244,7 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
// Common and undefined symbols don't make sense as local symbols, and can
// even cause crashes if we localize those, so skip them.
if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF &&
- ((Config.LocalizeHidden &&
+ ((ELFConfig.LocalizeHidden &&
(Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
Config.SymbolsToLocalize.matches(Sym.Name)))
Sym.Binding = STB_LOCAL;
@@ -304,7 +294,7 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
auto RemoveSymbolsPred = [&](const Symbol &Sym) {
if (Config.SymbolsToKeep.matches(Sym.Name) ||
- (Config.KeepFileSymbols && Sym.Type == STT_FILE))
+ (ELFConfig.KeepFileSymbols && Sym.Type == STT_FILE))
return false;
if ((Config.DiscardMode == DiscardType::All ||
@@ -339,7 +329,8 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
return Obj.removeSymbols(RemoveSymbolsPred);
}
-static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
+static Error replaceAndRemoveSections(const CommonConfig &Config,
+ const ELFConfig &ELFConfig, Object &Obj) {
SectionPred RemovePred = [](const SectionBase &) { return false; };
// Removes:
@@ -465,7 +456,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
// and at least one of those symbols is present
// (equivalently, the updated symbol table is not empty)
// the symbol table and the string table should not be removed.
- if ((!Config.SymbolsToKeep.empty() || Config.KeepFileSymbols) &&
+ if ((!Config.SymbolsToKeep.empty() || ELFConfig.KeepFileSymbols) &&
Obj.SymbolTable && !Obj.SymbolTable->empty()) {
RemovePred = [&Obj, RemovePred](const SectionBase &Sec) {
if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab())
@@ -474,9 +465,12 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
};
}
+ if (Error E = Obj.removeSections(ELFConfig.AllowBrokenLinks, RemovePred))
+ return E;
+
if (Config.CompressionType != DebugCompressionType::None) {
if (Error Err = replaceDebugSections(
- Obj, RemovePred, isCompressable,
+ Obj, isCompressable,
[&Config, &Obj](const SectionBase *S) -> Expected<SectionBase *> {
Expected<CompressedSection> NewSection =
CompressedSection::create(*S, Config.CompressionType);
@@ -488,7 +482,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
return Err;
} else if (Config.DecompressDebugSections) {
if (Error Err = replaceDebugSections(
- Obj, RemovePred,
+ Obj,
[](const SectionBase &S) { return isa<CompressedSection>(&S); },
[&Obj](const SectionBase *S) {
const CompressedSection *CS = cast<CompressedSection>(S);
@@ -497,7 +491,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
return Err;
}
- return Obj.removeSections(Config.AllowBrokenLinks, RemovePred);
+ return Error::success();
}
// Add symbol to the Object symbol table with the specified properties.
@@ -554,6 +548,22 @@ static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo,
Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
}
+static Error
+handleUserSection(StringRef Flag,
+ function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = MemoryBuffer::getFile(File);
+ if (!BufOrErr)
+ return createFileError(File, errorCodeToError(BufOrErr.getError()));
+ std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+ Buf->getBufferSize());
+ return F(SecName, Data);
+}
+
// This function handles the high level operations of GNU objcopy including
// handling command line options. It's important to outline certain properties
// we expect to hold of the command line operations. Any operation that "keeps"
@@ -570,7 +580,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
if (!Config.SplitDWO.empty() && Config.ExtractDWO) {
return Obj.removeSections(
- Config.AllowBrokenLinks,
+ ELFConfig.AllowBrokenLinks,
[&Obj](const SectionBase &Sec) { return onlyKeepDWOPred(Obj, Sec); });
}
@@ -587,21 +597,39 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
// remove the relocation sections before removing the symbols. That allows
// us to avoid reporting the inappropriate errors about removing symbols
// named in relocations.
- if (Error E = replaceAndRemoveSections(Config, Obj))
+ if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj))
return E;
- if (Error E = updateAndRemoveSymbols(Config, Obj))
+ if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj))
return E;
if (!Config.SectionsToRename.empty()) {
+ std::vector<RelocationSectionBase *> RelocSections;
+ DenseSet<SectionBase *> RenamedSections;
for (SectionBase &Sec : Obj.sections()) {
+ auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec);
const auto Iter = Config.SectionsToRename.find(Sec.Name);
if (Iter != Config.SectionsToRename.end()) {
const SectionRename &SR = Iter->second;
Sec.Name = std::string(SR.NewName);
if (SR.NewFlags.hasValue())
setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
- }
+ RenamedSections.insert(&Sec);
+ } else if (RelocSec && !(Sec.Flags & SHF_ALLOC))
+ // Postpone processing relocation sections which are not specified in
+ // their explicit '--rename-section' commands until after their target
+ // sections are renamed.
+ // Dynamic relocation sections (i.e. ones with SHF_ALLOC) should be
+ // renamed only explicitly. Otherwise, renaming, for example, '.got.plt'
+ // would affect '.rela.plt', which is not desirable.
+ RelocSections.push_back(RelocSec);
+ }
+
+ // Rename relocation sections according to their target sections.
+ for (RelocationSectionBase *RelocSec : RelocSections) {
+ auto Iter = RenamedSections.find(RelocSec->getSection());
+ if (Iter != RenamedSections.end())
+ RelocSec->Name = (RelocSec->getNamePrefix() + (*Iter)->Name).str();
}
}
@@ -624,27 +652,16 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
// .rela.prefix.plt since GNU objcopy does so.
const SectionBase *TargetSec = RelocSec->getSection();
if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) {
- StringRef prefix;
- switch (Sec.Type) {
- case SHT_REL:
- prefix = ".rel";
- break;
- case SHT_RELA:
- prefix = ".rela";
- break;
- default:
- llvm_unreachable("not a relocation section");
- }
-
// If the relocation section comes *after* the target section, we
// don't add Config.AllocSectionsPrefix because we've already added
// the prefix to TargetSec->Name. Otherwise, if the relocation
// section comes *before* the target section, we add the prefix.
if (PrefixedSections.count(TargetSec))
- Sec.Name = (prefix + TargetSec->Name).str();
+ Sec.Name = (RelocSec->getNamePrefix() + TargetSec->Name).str();
else
- Sec.Name =
- (prefix + Config.AllocSectionsPrefix + TargetSec->Name).str();
+ Sec.Name = (RelocSec->getNamePrefix() + Config.AllocSectionsPrefix +
+ TargetSec->Name)
+ .str();
}
}
}
@@ -664,21 +681,23 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
Sec.Type = SHT_NOBITS;
for (const auto &Flag : Config.AddSection) {
- std::pair<StringRef, StringRef> SecPair = Flag.split("=");
- StringRef SecName = SecPair.first;
- StringRef File = SecPair.second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFile(File);
- if (!BufOrErr)
- return createFileError(File, errorCodeToError(BufOrErr.getError()));
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
- ArrayRef<uint8_t> Data(
- reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
- Buf->getBufferSize());
- OwnedDataSection &NewSection =
- Obj.addSection<OwnedDataSection>(SecName, Data);
- if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
- NewSection.Type = SHT_NOTE;
+ auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
+ OwnedDataSection &NewSection =
+ Obj.addSection<OwnedDataSection>(Name, Data);
+ if (Name.startswith(".note") && Name != ".note.GNU-stack")
+ NewSection.Type = SHT_NOTE;
+ return Error::success();
+ };
+ if (Error E = handleUserSection(Flag, AddSection))
+ return E;
+ }
+
+ for (StringRef Flag : Config.UpdateSection) {
+ auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
+ return Obj.updateSection(Name, Data);
+ };
+ if (Error E = handleUserSection(Flag, UpdateSection))
+ return E;
}
if (!Config.AddGnuDebugLink.empty())
@@ -705,8 +724,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
}
}
- if (Config.EntryExpr)
- Obj.Entry = Config.EntryExpr(Obj.Entry);
+ if (ELFConfig.EntryExpr)
+ Obj.Entry = ELFConfig.EntryExpr(Obj.Entry);
return Error::success();
}
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index ba91d08e5540..3db5028e85f7 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -893,6 +893,17 @@ Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
return Visitor.visit(*this);
}
+StringRef RelocationSectionBase::getNamePrefix() const {
+ switch (Type) {
+ case SHT_REL:
+ return ".rel";
+ case SHT_RELA:
+ return ".rela";
+ default:
+ llvm_unreachable("not a relocation section");
+ }
+}
+
Error RelocationSection::removeSectionReferences(
bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
if (ToRemove(Symbols)) {
@@ -1342,13 +1353,16 @@ void IHexELFBuilder::addDataSections() {
if (R.HexData.empty())
continue;
RecAddr = R.Addr + SegmentAddr + BaseAddr;
- if (!Section || Section->Addr + Section->Size != RecAddr)
- // OriginalOffset field is only used to sort section properly, so
- // instead of keeping track of real offset in IHEX file, we use
- // section number.
+ if (!Section || Section->Addr + Section->Size != RecAddr) {
+ // OriginalOffset field is only used to sort sections before layout, so
+ // instead of keeping track of real offsets in IHEX file, and as
+ // layoutSections() and layoutSectionsForOnlyKeepDebug() use
+ // llvm::stable_sort(), we can just set it to a constant (zero).
Section = &Obj->addSection<OwnedDataSection>(
- ".sec" + std::to_string(SecNo++), RecAddr,
- ELF::SHF_ALLOC | ELF::SHF_WRITE, SecNo);
+ ".sec" + std::to_string(SecNo), RecAddr,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE, 0);
+ SecNo++;
+ }
Section->appendHexData(R.HexData);
break;
case IHexRecord::EndOfFile:
@@ -2093,6 +2107,17 @@ template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
Size);
}
+ for (auto it : Obj.getUpdatedSections()) {
+ SectionBase *Sec = it.first;
+ ArrayRef<uint8_t> Data = it.second;
+
+ auto *Parent = Sec->ParentSegment;
+ assert(Parent && "This section should've been part of a segment.");
+ uint64_t Offset =
+ Sec->OriginalOffset - Parent->OriginalOffset + Parent->Offset;
+ llvm::copy(Data, Buf->getBufferStart() + Offset);
+ }
+
// Iterate over removed sections and overwrite their old data with zeroes.
for (auto &Sec : Obj.removedSections()) {
Segment *Parent = Sec.ParentSegment;
@@ -2110,6 +2135,37 @@ ELFWriter<ELFT>::ELFWriter(Object &Obj, raw_ostream &Buf, bool WSH,
: Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs),
OnlyKeepDebug(OnlyKeepDebug) {}
+Error Object::updateSection(StringRef Name, ArrayRef<uint8_t> Data) {
+ auto It = llvm::find_if(Sections,
+ [&](const SecPtr &Sec) { return Sec->Name == Name; });
+ if (It == Sections.end())
+ return createStringError(errc::invalid_argument, "section '%s' not found",
+ Name.str().c_str());
+
+ auto *OldSec = It->get();
+ if (!OldSec->hasContents())
+ return createStringError(
+ errc::invalid_argument,
+ "section '%s' can't be updated because it does not have contents",
+ Name.str().c_str());
+
+ if (Data.size() > OldSec->Size && OldSec->ParentSegment)
+ return createStringError(errc::invalid_argument,
+ "cannot fit data of size %zu into section '%s' "
+ "with size %zu that is part of a segment",
+ Data.size(), Name.str().c_str(), OldSec->Size);
+
+ if (!OldSec->ParentSegment) {
+ *It = std::make_unique<OwnedDataSection>(*OldSec, Data);
+ } else {
+ // The segment writer will be in charge of updating these contents.
+ OldSec->Size = Data.size();
+ UpdatedSections[OldSec] = Data;
+ }
+
+ return Error::success();
+}
+
Error Object::removeSections(
bool AllowBrokenLinks, std::function<bool(const SectionBase &)> ToRemove) {
@@ -2162,6 +2218,30 @@ Error Object::removeSections(
return Error::success();
}
+Error Object::replaceSections(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+ auto SectionIndexLess = [](const SecPtr &Lhs, const SecPtr &Rhs) {
+ return Lhs->Index < Rhs->Index;
+ };
+ assert(llvm::is_sorted(Sections, SectionIndexLess) &&
+ "Sections are expected to be sorted by Index");
+ // Set indices of new sections so that they can be later sorted into positions
+ // of removed ones.
+ for (auto &I : FromTo)
+ I.second->Index = I.first->Index;
+
+ // Notify all sections about the replacement.
+ for (auto &Sec : Sections)
+ Sec->replaceSectionReferences(FromTo);
+
+ if (Error E = removeSections(
+ /*AllowBrokenLinks=*/false,
+ [=](const SectionBase &Sec) { return FromTo.count(&Sec) > 0; }))
+ return E;
+ llvm::sort(Sections, SectionIndexLess);
+ return Error::success();
+}
+
Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
if (SymbolTable)
for (const SecPtr &Sec : Sections)
@@ -2200,20 +2280,6 @@ Error Object::addNewSymbolTable() {
return Error::success();
}
-void Object::sortSections() {
- // Use stable_sort to maintain the original ordering as closely as possible.
- llvm::stable_sort(Sections, [](const SecPtr &A, const SecPtr &B) {
- // Put SHT_GROUP sections first, since group section headers must come
- // before the sections they contain. This also matches what GNU objcopy
- // does.
- if (A->Type != B->Type &&
- (A->Type == ELF::SHT_GROUP || B->Type == ELF::SHT_GROUP))
- return A->Type == ELF::SHT_GROUP;
- // For all other sections, sort by offset order.
- return A->OriginalOffset < B->OriginalOffset;
- });
-}
-
// Orders segments such that if x = y->ParentSegment then y comes before x.
static void orderSegments(std::vector<Segment *> &Segments) {
llvm::stable_sort(Segments, compareSegmentsByOffset);
@@ -2262,6 +2328,9 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
// the offset from the start of the segment. Using the offset from the start
// of the segment we can assign a new offset to the section. For sections not
// covered by segments we can just bump Offset to the next valid location.
+ // While it is not necessary, layout the sections in the order based on their
+ // original offsets to resemble the input file as close as possible.
+ std::vector<SectionBase *> OutOfSegmentSections;
uint32_t Index = 1;
for (auto &Sec : Sections) {
Sec.Index = Index++;
@@ -2269,12 +2338,19 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
auto Segment = *Sec.ParentSegment;
Sec.Offset =
Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset);
- } else {
- Offset = alignTo(Offset, Sec.Align == 0 ? 1 : Sec.Align);
- Sec.Offset = Offset;
- if (Sec.Type != SHT_NOBITS)
- Offset += Sec.Size;
- }
+ } else
+ OutOfSegmentSections.push_back(&Sec);
+ }
+
+ llvm::stable_sort(OutOfSegmentSections,
+ [](const SectionBase *Lhs, const SectionBase *Rhs) {
+ return Lhs->OriginalOffset < Rhs->OriginalOffset;
+ });
+ for (auto *Sec : OutOfSegmentSections) {
+ Offset = alignTo(Offset, Sec->Align == 0 ? 1 : Sec->Align);
+ Sec->Offset = Offset;
+ if (Sec->Type != SHT_NOBITS)
+ Offset += Sec->Size;
}
return Offset;
}
@@ -2282,38 +2358,49 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus
// occupy no space in the file.
static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) {
+ // The layout algorithm requires the sections to be handled in the order of
+ // their offsets in the input file, at least inside segments.
+ std::vector<SectionBase *> Sections;
+ Sections.reserve(Obj.sections().size());
uint32_t Index = 1;
for (auto &Sec : Obj.sections()) {
Sec.Index = Index++;
-
- auto *FirstSec = Sec.ParentSegment && Sec.ParentSegment->Type == PT_LOAD
- ? Sec.ParentSegment->firstSection()
+ Sections.push_back(&Sec);
+ }
+ llvm::stable_sort(Sections,
+ [](const SectionBase *Lhs, const SectionBase *Rhs) {
+ return Lhs->OriginalOffset < Rhs->OriginalOffset;
+ });
+
+ for (auto *Sec : Sections) {
+ auto *FirstSec = Sec->ParentSegment && Sec->ParentSegment->Type == PT_LOAD
+ ? Sec->ParentSegment->firstSection()
: nullptr;
// The first section in a PT_LOAD has to have congruent offset and address
// modulo the alignment, which usually equals the maximum page size.
- if (FirstSec && FirstSec == &Sec)
- Off = alignTo(Off, Sec.ParentSegment->Align, Sec.Addr);
+ if (FirstSec && FirstSec == Sec)
+ Off = alignTo(Off, Sec->ParentSegment->Align, Sec->Addr);
// sh_offset is not significant for SHT_NOBITS sections, but the congruence
// rule must be followed if it is the first section in a PT_LOAD. Do not
// advance Off.
- if (Sec.Type == SHT_NOBITS) {
- Sec.Offset = Off;
+ if (Sec->Type == SHT_NOBITS) {
+ Sec->Offset = Off;
continue;
}
if (!FirstSec) {
// FirstSec being nullptr generally means that Sec does not have the
// SHF_ALLOC flag.
- Off = Sec.Align ? alignTo(Off, Sec.Align) : Off;
- } else if (FirstSec != &Sec) {
+ Off = Sec->Align ? alignTo(Off, Sec->Align) : Off;
+ } else if (FirstSec != Sec) {
// The offset is relative to the first section in the PT_LOAD segment. Use
// sh_offset for non-SHF_ALLOC sections.
- Off = Sec.OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset;
+ Off = Sec->OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset;
}
- Sec.Offset = Off;
- Off += Sec.Size;
+ Sec->Offset = Off;
+ Off += Sec->Size;
}
return Off;
}
@@ -2460,7 +2547,6 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
if (Error E = removeUnneededSections(Obj))
return E;
- Obj.sortSections();
// We need to assign indexes before we perform layout because we need to know
// if we need large indexes or not. We can assign indexes first and check as
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h
index 6fd26afa3ca1..811af4b51310 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -48,12 +48,12 @@ class Object;
struct Symbol;
class SectionTableRef {
- MutableArrayRef<std::unique_ptr<SectionBase>> Sections;
+ ArrayRef<std::unique_ptr<SectionBase>> Sections;
public:
- using iterator = pointee_iterator<std::unique_ptr<SectionBase> *>;
+ using iterator = pointee_iterator<const std::unique_ptr<SectionBase> *>;
- explicit SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)
+ explicit SectionTableRef(ArrayRef<std::unique_ptr<SectionBase>> Secs)
: Sections(Secs) {}
SectionTableRef(const SectionTableRef &) = default;
@@ -429,6 +429,7 @@ public:
virtual void markSymbols();
virtual void
replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
+ virtual bool hasContents() const { return false; }
// Notify the section that it is subject to removal.
virtual void onRemove();
};
@@ -493,6 +494,9 @@ public:
function_ref<bool(const SectionBase *)> ToRemove) override;
Error initialize(SectionTableRef SecTable) override;
void finalize() override;
+ bool hasContents() const override {
+ return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL;
+ }
};
class OwnedDataSection : public SectionBase {
@@ -518,9 +522,15 @@ public:
OriginalOffset = SecOff;
}
+ OwnedDataSection(SectionBase &S, ArrayRef<uint8_t> Data)
+ : SectionBase(S), Data(std::begin(Data), std::end(Data)) {
+ Size = Data.size();
+ }
+
void appendHexData(StringRef HexData);
Error accept(SectionVisitor &Sec) const override;
Error accept(MutableSectionVisitor &Visitor) override;
+ bool hasContents() const override { return true; }
};
class CompressedSection : public SectionBase {
@@ -745,6 +755,8 @@ public:
const SectionBase *getSection() const { return SecToApplyRel; }
void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
+ StringRef getNamePrefix() const;
+
static bool classof(const SectionBase *S) {
return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
}
@@ -1016,6 +1028,7 @@ private:
std::vector<SecPtr> Sections;
std::vector<SegPtr> Segments;
std::vector<SecPtr> RemovedSections;
+ DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
static bool sectionIsAlloc(const SectionBase &Sec) {
return Sec.Flags & ELF::SHF_ALLOC;
@@ -1023,10 +1036,6 @@ private:
public:
template <class T>
- using Range = iterator_range<
- pointee_iterator<typename std::vector<std::unique_ptr<T>>::iterator>>;
-
- template <class T>
using ConstRange = iterator_range<pointee_iterator<
typename std::vector<std::unique_ptr<T>>::const_iterator>>;
@@ -1054,11 +1063,7 @@ public:
SymbolTableSection *SymbolTable = nullptr;
SectionIndexSection *SectionIndexTable = nullptr;
- void sortSections();
- SectionTableRef sections() { return SectionTableRef(Sections); }
- ConstRange<SectionBase> sections() const {
- return make_pointee_range(Sections);
- }
+ SectionTableRef sections() const { return SectionTableRef(Sections); }
iterator_range<
filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
decltype(&sectionIsAlloc)>>
@@ -1066,6 +1071,9 @@ public:
return make_filter_range(make_pointee_range(Sections), sectionIsAlloc);
}
+ const auto &getUpdatedSections() const { return UpdatedSections; }
+ Error updateSection(StringRef Name, ArrayRef<uint8_t> Data);
+
SectionBase *findSection(StringRef Name) {
auto SecIt =
find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
@@ -1073,11 +1081,11 @@ public:
}
SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
- Range<Segment> segments() { return make_pointee_range(Segments); }
ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
Error removeSections(bool AllowBrokenLinks,
std::function<bool(const SectionBase &)> ToRemove);
+ Error replaceSections(const DenseMap<SectionBase *, SectionBase *> &FromTo);
Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
template <class T, class... Ts> T &addSection(Ts &&... Args) {
auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h b/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
index 7c5dbfde19a0..93f9facfcf0b 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
@@ -9,11 +9,33 @@
#ifndef LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
#define LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
namespace llvm {
namespace objcopy {
// Mach-O specific configuration for copying/stripping a single file.
-struct MachOConfig {};
+struct MachOConfig {
+ // Repeated options
+ std::vector<StringRef> RPathToAdd;
+ std::vector<StringRef> RPathToPrepend;
+ DenseMap<StringRef, StringRef> RPathsToUpdate;
+ DenseMap<StringRef, StringRef> InstallNamesToUpdate;
+ DenseSet<StringRef> RPathsToRemove;
+
+ // install-name-tool's id option
+ Optional<StringRef> SharedLibId;
+
+ // Boolean options
+ bool StripSwiftSymbols = false;
+ bool KeepUndefined = false;
+
+ // install-name-tool's --delete_all_rpaths
+ bool RemoveAllRpaths = false;
+};
} // namespace objcopy
} // namespace llvm
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index 6ed21806fe5e..3cac77411845 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -249,8 +249,12 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
uint64_t StartOfExportTrie =
StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
- uint64_t StartOfDataInCode =
+ uint64_t StartOfDyldExportsTrie =
StartOfFunctionStarts + O.FunctionStarts.Data.size();
+ uint64_t StartOfChainedFixups =
+ StartOfDyldExportsTrie + O.ExportsTrie.Data.size();
+ uint64_t StartOfDataInCode =
+ StartOfChainedFixups + O.ChainedFixups.Data.size();
uint64_t StartOfLinkerOptimizationHint =
StartOfDataInCode + O.DataInCode.Data.size();
uint64_t StartOfSymbols =
@@ -262,10 +266,31 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
uint64_t StartOfCodeSignature =
StartOfSymbolStrings + StrTableBuilder.getSize();
- if (O.CodeSignatureCommandIndex)
+ uint32_t CodeSignatureSize = 0;
+ if (O.CodeSignatureCommandIndex) {
StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
+
+ // Note: These calculations are to be kept in sync with the same
+ // calculations performed in LLD's CodeSignatureSection.
+ const uint32_t AllHeadersSize =
+ alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
+ CodeSignature.Align);
+ const uint32_t BlockCount =
+ (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
+ CodeSignature.BlockSize;
+ const uint32_t Size =
+ alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
+ CodeSignature.Align);
+
+ CodeSignature.StartOffset = StartOfCodeSignature;
+ CodeSignature.AllHeadersSize = AllHeadersSize;
+ CodeSignature.BlockCount = BlockCount;
+ CodeSignature.OutputFileName = OutputFileName;
+ CodeSignature.Size = Size;
+ CodeSignatureSize = Size;
+ }
uint64_t LinkEditSize =
- (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
+ StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
// Now we have determined the layout of the contents of the __LINKEDIT
// segment. Update its load command.
@@ -293,7 +318,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
switch (cmd) {
case MachO::LC_CODE_SIGNATURE:
MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
- MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
+ MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
break;
case MachO::LC_SYMTAB:
MLC.symtab_command_data.symoff = StartOfSymbols;
@@ -332,6 +357,14 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
break;
+ case MachO::LC_DYLD_CHAINED_FIXUPS:
+ MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
+ MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
+ break;
+ case MachO::LC_DYLD_EXPORTS_TRIE:
+ MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
+ MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size();
+ break;
case MachO::LC_DYLD_INFO:
case MachO::LC_DYLD_INFO_ONLY:
MLC.dyld_info_command_data.rebase_off =
@@ -380,6 +413,10 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
case MachO::LC_SOURCE_VERSION:
case MachO::LC_THREAD:
case MachO::LC_UNIXTHREAD:
+ case MachO::LC_SUB_FRAMEWORK:
+ case MachO::LC_SUB_UMBRELLA:
+ case MachO::LC_SUB_CLIENT:
+ case MachO::LC_SUB_LIBRARY:
// Nothing to update.
break;
default:
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
index 5fe6683e27f3..44d03b4af7e8 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
@@ -16,10 +16,49 @@ namespace llvm {
namespace objcopy {
namespace macho {
+/// When MachO binaries include a LC_CODE_SIGNATURE load command,
+/// the __LINKEDIT data segment will include a section corresponding
+/// to the LC_CODE_SIGNATURE load command. This section serves as a signature
+/// for the binary. Included in the CodeSignature section is a header followed
+/// by a hash of the binary. If present, the CodeSignature section is the
+/// last component of the binary.
+struct CodeSignatureInfo {
+ // NOTE: These values are to be kept in sync with those in
+ // LLD's CodeSignatureSection class.
+
+ static constexpr uint32_t Align = 16;
+ static constexpr uint8_t BlockSizeShift = 12;
+ // The binary is read in blocks of the following size.
+ static constexpr size_t BlockSize = (1 << BlockSizeShift); // 4 KiB
+ // For each block, a SHA256 hash (256 bits, 32 bytes) is written to
+ // the CodeSignature section.
+ static constexpr size_t HashSize = 256 / 8;
+ static constexpr size_t BlobHeadersSize = llvm::alignTo<8>(
+ sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
+ // The size of the entire header depends upon the filename the binary is being
+ // written to, but the rest of the header is fixed in size.
+ static constexpr uint32_t FixedHeadersSize =
+ BlobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
+
+ // The offset relative to the start of the binary where
+ // the CodeSignature section should begin.
+ uint32_t StartOffset;
+ // The size of the entire header, output file name size included.
+ uint32_t AllHeadersSize;
+ // The number of blocks required to hash the binary.
+ uint32_t BlockCount;
+ StringRef OutputFileName;
+ // The size of the entire CodeSignature section, including both the header and
+ // hashes.
+ uint32_t Size;
+};
+
class MachOLayoutBuilder {
Object &O;
bool Is64Bit;
+ StringRef OutputFileName;
uint64_t PageSize;
+ CodeSignatureInfo CodeSignature;
// Points to the __LINKEDIT segment if it exists.
MachO::macho_load_command *LinkEditLoadCommand = nullptr;
@@ -37,14 +76,18 @@ class MachOLayoutBuilder {
bool Is64Bit);
public:
- MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
- : O(O), Is64Bit(Is64Bit), PageSize(PageSize),
+ MachOLayoutBuilder(Object &O, bool Is64Bit, StringRef OutputFileName,
+ uint64_t PageSize)
+ : O(O), Is64Bit(Is64Bit), OutputFileName(OutputFileName),
+ PageSize(PageSize),
StrTableBuilder(getStringTableBuilderKind(O, Is64Bit)) {}
// Recomputes and updates fields in the given object such as file offsets.
Error layout();
StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
+
+ const CodeSignatureInfo &getCodeSignature() { return CodeSignature; }
};
} // end namespace macho
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 823306916bbe..9e7b91d73057 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -9,6 +9,7 @@
#include "MachOObjcopy.h"
#include "../llvm-objcopy.h"
#include "CommonConfig.h"
+#include "MachO/MachOConfig.h"
#include "MachOReader.h"
#include "MachOWriter.h"
#include "MultiFormatConfig.h"
@@ -19,6 +20,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
using namespace llvm;
@@ -87,17 +89,20 @@ static void markSymbols(const CommonConfig &, Object &Obj) {
(*ISE.Symbol)->Referenced = true;
}
-static void updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
+static void updateAndRemoveSymbols(const CommonConfig &Config,
+ const MachOConfig &MachOConfig,
+ Object &Obj) {
for (SymbolEntry &Sym : Obj.SymTable) {
auto I = Config.SymbolsToRename.find(Sym.Name);
if (I != Config.SymbolsToRename.end())
Sym.Name = std::string(I->getValue());
}
- auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) {
+ auto RemovePred = [Config, MachOConfig,
+ &Obj](const std::unique_ptr<SymbolEntry> &N) {
if (N->Referenced)
return false;
- if (Config.KeepUndefined && N->isUndefinedSymbol())
+ if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
return false;
if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
return false;
@@ -106,8 +111,9 @@ static void updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
return true;
// This behavior is consistent with cctools' strip.
- if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) &&
- Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol())
+ if (MachOConfig.StripSwiftSymbols &&
+ (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
+ *Obj.SwiftVersion && N->isSwiftSymbol())
return true;
return false;
};
@@ -139,17 +145,17 @@ static LoadCommand buildRPathLoadCommand(StringRef Path) {
return LC;
}
-static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
+static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
// Remove RPaths.
- DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
- Config.RPathsToRemove.end());
+ DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
+ MachOConfig.RPathsToRemove.end());
LoadCommandPred RemovePred = [&RPathsToRemove,
- &Config](const LoadCommand &LC) {
+ &MachOConfig](const LoadCommand &LC) {
if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
// When removing all RPaths we don't need to care
// about what it contains
- if (Config.RemoveAllRpaths)
+ if (MachOConfig.RemoveAllRpaths)
return true;
StringRef RPath = getPayloadString(LC);
@@ -166,7 +172,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
// Emit an error if the Mach-O binary does not contain an rpath path name
// specified in -delete_rpath.
- for (StringRef RPath : Config.RPathsToRemove) {
+ for (StringRef RPath : MachOConfig.RPathsToRemove) {
if (RPathsToRemove.count(RPath))
return createStringError(errc::invalid_argument,
"no LC_RPATH load command with path: %s",
@@ -182,7 +188,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
}
// Throw errors for invalid RPaths.
- for (const auto &OldNew : Config.RPathsToUpdate) {
+ for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
StringRef Old = OldNew.getFirst();
StringRef New = OldNew.getSecond();
if (!RPaths.contains(Old))
@@ -198,14 +204,14 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
for (LoadCommand &LC : Obj.LoadCommands) {
switch (LC.MachOLoadCommand.load_command_data.cmd) {
case MachO::LC_ID_DYLIB:
- if (Config.SharedLibId)
+ if (MachOConfig.SharedLibId)
updateLoadCommandPayloadString<MachO::dylib_command>(
- LC, *Config.SharedLibId);
+ LC, *MachOConfig.SharedLibId);
break;
case MachO::LC_RPATH: {
StringRef RPath = getPayloadString(LC);
- StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath);
+ StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
if (!NewRPath.empty())
updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
break;
@@ -217,7 +223,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
case MachO::LC_LOAD_WEAK_DYLIB:
StringRef InstallName = getPayloadString(LC);
StringRef NewInstallName =
- Config.InstallNamesToUpdate.lookup(InstallName);
+ MachOConfig.InstallNamesToUpdate.lookup(InstallName);
if (!NewInstallName.empty())
updateLoadCommandPayloadString<MachO::dylib_command>(LC,
NewInstallName);
@@ -226,7 +232,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
}
// Add new RPaths.
- for (StringRef RPath : Config.RPathToAdd) {
+ for (StringRef RPath : MachOConfig.RPathToAdd) {
if (RPaths.contains(RPath))
return createStringError(errc::invalid_argument,
"rpath '" + RPath +
@@ -235,7 +241,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
}
- for (StringRef RPath : Config.RPathToPrepend) {
+ for (StringRef RPath : MachOConfig.RPathToPrepend) {
if (RPaths.contains(RPath))
return createStringError(errc::invalid_argument,
"rpath '" + RPath +
@@ -248,7 +254,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
// Unlike appending rpaths, the indexes of subsequent load commands must
// be recalculated after prepending one.
- if (!Config.RPathToPrepend.empty())
+ if (!MachOConfig.RPathToPrepend.empty())
Obj.updateLoadCommandIndexes();
return Error::success();
@@ -333,7 +339,8 @@ static Error isValidMachOCannonicalName(StringRef Name) {
return Error::success();
}
-static Error handleArgs(const CommonConfig &Config, Object &Obj) {
+static Error handleArgs(const CommonConfig &Config,
+ const MachOConfig &MachOConfig, Object &Obj) {
// Dump sections before add/remove for compatibility with GNU objcopy.
for (StringRef Flag : Config.DumpSection) {
StringRef SectionName;
@@ -350,7 +357,7 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
if (Config.StripAll)
markSymbols(Config, Obj);
- updateAndRemoveSymbols(Config, Obj);
+ updateAndRemoveSymbols(Config, MachOConfig, Obj);
if (Config.StripAll)
for (LoadCommand &LC : Obj.LoadCommands)
@@ -367,14 +374,14 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
return E;
}
- if (Error E = processLoadCommands(Config, Obj))
+ if (Error E = processLoadCommands(MachOConfig, Obj))
return E;
return Error::success();
}
Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
- const MachOConfig &,
+ const MachOConfig &MachOConfig,
object::MachOObjectFile &In,
raw_ostream &Out) {
MachOReader Reader(In);
@@ -382,7 +389,12 @@ Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
if (!O)
return createFileError(Config.InputFilename, O.takeError());
- if (Error E = handleArgs(Config, **O))
+ if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
+ return createStringError(std::errc::not_supported,
+ "%s: MH_PRELOAD files are not supported",
+ Config.InputFilename.str().c_str());
+
+ if (Error E = handleArgs(Config, MachOConfig, **O))
return createFileError(Config.InputFilename, std::move(E));
// Page size used for alignment of segment sizes in Mach-O executables and
@@ -398,7 +410,8 @@ Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
PageSize = 4096;
}
- MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
+ MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
+ sys::path::filename(Config.OutputFilename), PageSize, Out);
if (auto E = Writer.finalize())
return E;
return Writer.write();
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
index e30940a8d6eb..d03eee9d5fdb 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
@@ -24,7 +24,8 @@ struct MachOConfig;
class MultiFormatConfig;
namespace macho {
-Error executeObjcopyOnBinary(const CommonConfig &Config, const MachOConfig &,
+Error executeObjcopyOnBinary(const CommonConfig &Config,
+ const MachOConfig &MachOConfig,
object::MachOObjectFile &In, raw_ostream &Out);
Error executeObjcopyOnMachOUniversalBinary(
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index 7d1c29b42c2e..d68d1692997a 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -116,6 +116,7 @@ Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
Error MachOReader::readLoadCommands(Object &O) const {
// For MachO sections indices start from 1.
uint32_t NextSectionIndex = 1;
+ static constexpr char TextSegmentName[] = "__TEXT";
for (auto LoadCmd : MachOObj.load_commands()) {
LoadCommand LC;
switch (LoadCmd.C.cmd) {
@@ -123,6 +124,14 @@ Error MachOReader::readLoadCommands(Object &O) const {
O.CodeSignatureCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_SEGMENT:
+ // LoadCmd.Ptr might not be aligned temporarily as
+ // MachO::segment_command requires, but the segname char pointer do not
+ // have alignment restrictions.
+ if (StringRef(reinterpret_cast<const char *>(
+ LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
+ TextSegmentName)
+ O.TextSegmentCommandIndex = O.LoadCommands.size();
+
if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
extractSections<MachO::section, MachO::segment_command>(
LoadCmd, MachOObj, NextSectionIndex))
@@ -131,6 +140,14 @@ Error MachOReader::readLoadCommands(Object &O) const {
return Sections.takeError();
break;
case MachO::LC_SEGMENT_64:
+ // LoadCmd.Ptr might not be aligned temporarily as
+ // MachO::segment_command_64 requires, but the segname char pointer do
+ // not have alignment restrictions.
+ if (StringRef(reinterpret_cast<const char *>(
+ LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
+ TextSegmentName)
+ O.TextSegmentCommandIndex = O.LoadCommands.size();
+
if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
extractSections<MachO::section_64, MachO::segment_command_64>(
LoadCmd, MachOObj, NextSectionIndex))
@@ -157,6 +174,12 @@ Error MachOReader::readLoadCommands(Object &O) const {
case MachO::LC_FUNCTION_STARTS:
O.FunctionStartsCommandIndex = O.LoadCommands.size();
break;
+ case MachO::LC_DYLD_EXPORTS_TRIE:
+ O.ExportsTrieCommandIndex = O.LoadCommands.size();
+ break;
+ case MachO::LC_DYLD_CHAINED_FIXUPS:
+ O.ChainedFixupsCommandIndex = O.LoadCommands.size();
+ break;
}
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
@@ -271,10 +294,6 @@ void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex,
arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
}
-void MachOReader::readCodeSignature(Object &O) const {
- return readLinkData(O, O.CodeSignatureCommandIndex, O.CodeSignature);
-}
-
void MachOReader::readDataInCodeData(Object &O) const {
return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
}
@@ -288,6 +307,14 @@ void MachOReader::readFunctionStartsData(Object &O) const {
return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
}
+void MachOReader::readExportsTrie(Object &O) const {
+ return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
+}
+
+void MachOReader::readChainedFixups(Object &O) const {
+ return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
+}
+
void MachOReader::readIndirectSymbolTable(Object &O) const {
MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
constexpr uint32_t AbsOrLocalMask =
@@ -336,10 +363,11 @@ Expected<std::unique_ptr<Object>> MachOReader::create() const {
readWeakBindInfo(*Obj);
readLazyBindInfo(*Obj);
readExportInfo(*Obj);
- readCodeSignature(*Obj);
readDataInCodeData(*Obj);
readLinkerOptimizationHint(*Obj);
readFunctionStartsData(*Obj);
+ readExportsTrie(*Obj);
+ readChainedFixups(*Obj);
readIndirectSymbolTable(*Obj);
readSwiftVersion(*Obj);
return std::move(Obj);
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h
index ca3a0214cb6d..b29e86ca642e 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h
@@ -41,6 +41,8 @@ class MachOReader : public Reader {
void readDataInCodeData(Object &O) const;
void readLinkerOptimizationHint(Object &O) const;
void readFunctionStartsData(Object &O) const;
+ void readExportsTrie(Object &O) const;
+ void readChainedFixups(Object &O) const;
void readIndirectSymbolTable(Object &O) const;
void readSwiftVersion(Object &O) const;
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 295098ed4118..688945afe944 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -14,10 +14,16 @@
#include "llvm/Object/MachO.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SHA256.h"
#include <memory>
+#if defined(__APPLE__)
+#include <sys/mman.h>
+#endif
+
using namespace llvm;
using namespace llvm::objcopy::macho;
+using namespace llvm::support::endian;
size_t MachOWriter::headerSize() const {
return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
@@ -127,6 +133,26 @@ size_t MachOWriter::totalSize() const {
LinkEditDataCommand.datasize);
}
+ if (O.ChainedFixupsCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ChainedFixupsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
+
+ if (O.ExportsTrieCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ExportsTrieCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
+
// Otherwise, use the last section / reloction.
for (const LoadCommand &LC : O.LoadCommands)
for (const std::unique_ptr<Section> &S : LC.Sections) {
@@ -423,8 +449,147 @@ void MachOWriter::writeLinkData(Optional<size_t> LCIndex, const LinkData &LD) {
memcpy(Out, LD.Data.data(), LD.Data.size());
}
+static uint64_t
+getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) {
+ const MachO::macho_load_command &MLC =
+ TextSegmentLoadCommand.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ return MLC.segment_command_data.fileoff;
+ case MachO::LC_SEGMENT_64:
+ return MLC.segment_command_64_data.fileoff;
+ default:
+ return 0;
+ }
+}
+
+static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) {
+ const MachO::macho_load_command &MLC =
+ TextSegmentLoadCommand.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ return MLC.segment_command_data.filesize;
+ case MachO::LC_SEGMENT_64:
+ return MLC.segment_command_64_data.filesize;
+ default:
+ return 0;
+ }
+}
+
void MachOWriter::writeCodeSignatureData() {
- return writeLinkData(O.CodeSignatureCommandIndex, O.CodeSignature);
+ // NOTE: This CodeSignature section behaviour must be kept in sync with that
+ // performed in LLD's CodeSignatureSection::write /
+ // CodeSignatureSection::writeHashes. Furthermore, this call must occur only
+ // after the rest of the binary has already been written to the buffer. This
+ // is because the buffer is read from to perform the necessary hashing.
+
+ // The CodeSignature section is the last section in the MachO binary and
+ // contains a hash of all content in the binary before it. Since llvm-objcopy
+ // has likely modified the target binary, the hash must be regenerated
+ // entirely. To generate this hash, we must read from the start of the binary
+ // (HashReadStart) to just before the start of the CodeSignature section
+ // (HashReadEnd).
+
+ const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature();
+
+ uint8_t *BufferStart = reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+ uint8_t *HashReadStart = BufferStart;
+ uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset;
+
+ // The CodeSignature section begins with a header, after which the hashes
+ // of each page of the binary are written.
+ uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize;
+
+ uint32_t TextSegmentFileOff = 0;
+ uint32_t TextSegmentFileSize = 0;
+ if (O.TextSegmentCommandIndex) {
+ const LoadCommand &TextSegmentLoadCommand =
+ O.LoadCommands[*O.TextSegmentCommandIndex];
+ assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd ==
+ MachO::LC_SEGMENT ||
+ TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd ==
+ MachO::LC_SEGMENT_64);
+ assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand
+ .segment_command_data.segname) == "__TEXT");
+ TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand);
+ TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand);
+ }
+
+ const uint32_t FileNamePad = CodeSignature.AllHeadersSize -
+ CodeSignature.FixedHeadersSize -
+ CodeSignature.OutputFileName.size();
+
+ // Write code section header.
+ auto *SuperBlob = reinterpret_cast<MachO::CS_SuperBlob *>(HashReadEnd);
+ write32be(&SuperBlob->magic, MachO::CSMAGIC_EMBEDDED_SIGNATURE);
+ write32be(&SuperBlob->length, CodeSignature.Size);
+ write32be(&SuperBlob->count, 1);
+ auto *BlobIndex = reinterpret_cast<MachO::CS_BlobIndex *>(&SuperBlob[1]);
+ write32be(&BlobIndex->type, MachO::CSSLOT_CODEDIRECTORY);
+ write32be(&BlobIndex->offset, CodeSignature.BlobHeadersSize);
+ auto *CodeDirectory = reinterpret_cast<MachO::CS_CodeDirectory *>(
+ HashReadEnd + CodeSignature.BlobHeadersSize);
+ write32be(&CodeDirectory->magic, MachO::CSMAGIC_CODEDIRECTORY);
+ write32be(&CodeDirectory->length,
+ CodeSignature.Size - CodeSignature.BlobHeadersSize);
+ write32be(&CodeDirectory->version, MachO::CS_SUPPORTSEXECSEG);
+ write32be(&CodeDirectory->flags, MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED);
+ write32be(&CodeDirectory->hashOffset,
+ sizeof(MachO::CS_CodeDirectory) +
+ CodeSignature.OutputFileName.size() + FileNamePad);
+ write32be(&CodeDirectory->identOffset, sizeof(MachO::CS_CodeDirectory));
+ CodeDirectory->nSpecialSlots = 0;
+ write32be(&CodeDirectory->nCodeSlots, CodeSignature.BlockCount);
+ write32be(&CodeDirectory->codeLimit, CodeSignature.StartOffset);
+ CodeDirectory->hashSize = static_cast<uint8_t>(CodeSignature.HashSize);
+ CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256;
+ CodeDirectory->platform = 0;
+ CodeDirectory->pageSize = CodeSignature.BlockSizeShift;
+ CodeDirectory->spare2 = 0;
+ CodeDirectory->scatterOffset = 0;
+ CodeDirectory->teamOffset = 0;
+ CodeDirectory->spare3 = 0;
+ CodeDirectory->codeLimit64 = 0;
+ write64be(&CodeDirectory->execSegBase, TextSegmentFileOff);
+ write64be(&CodeDirectory->execSegLimit, TextSegmentFileSize);
+ write64be(&CodeDirectory->execSegFlags, O.Header.FileType == MachO::MH_EXECUTE
+ ? MachO::CS_EXECSEG_MAIN_BINARY
+ : 0);
+
+ auto *Id = reinterpret_cast<char *>(&CodeDirectory[1]);
+ memcpy(Id, CodeSignature.OutputFileName.begin(),
+ CodeSignature.OutputFileName.size());
+ memset(Id + CodeSignature.OutputFileName.size(), 0, FileNamePad);
+
+ // Write the hashes.
+ uint8_t *CurrHashReadPosition = HashReadStart;
+ uint8_t *CurrHashWritePosition = HashWriteStart;
+ while (CurrHashReadPosition < HashReadEnd) {
+ StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition),
+ std::min(HashReadEnd - CurrHashReadPosition,
+ static_cast<ssize_t>(CodeSignature.BlockSize)));
+ SHA256 Hasher;
+ Hasher.update(Block);
+ StringRef Hash = Hasher.final();
+ assert(Hash.size() == CodeSignature.HashSize);
+ memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize);
+ CurrHashReadPosition += CodeSignature.BlockSize;
+ CurrHashWritePosition += CodeSignature.HashSize;
+ }
+#if defined(__APPLE__)
+ // This is macOS-specific work-around and makes no sense for any
+ // other host OS. See https://openradar.appspot.com/FB8914231
+ //
+ // The macOS kernel maintains a signature-verification cache to
+ // quickly validate applications at time of execve(2). The trouble
+ // is that for the kernel creates the cache entry at the time of the
+ // mmap(2) call, before we have a chance to write either the code to
+ // sign or the signature header+hashes. The fix is to invalidate
+ // all cached data associated with the output file, thus discarding
+ // the bogus prematurely-cached signature.
+ msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size,
+ MS_INVALIDATE);
+#endif
}
void MachOWriter::writeDataInCodeData() {
@@ -440,6 +605,14 @@ void MachOWriter::writeFunctionStartsData() {
return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts);
}
+void MachOWriter::writeChainedFixupsData() {
+ return writeLinkData(O.ChainedFixupsCommandIndex, O.ChainedFixups);
+}
+
+void MachOWriter::writeExportsTrieData() {
+ return writeLinkData(O.ExportsTrieCommandIndex, O.ExportsTrie);
+}
+
void MachOWriter::writeTail() {
typedef void (MachOWriter::*WriteHandlerType)(void);
typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@@ -525,6 +698,26 @@ void MachOWriter::writeTail() {
&MachOWriter::writeFunctionStartsData);
}
+ if (O.ChainedFixupsCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ChainedFixupsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff,
+ &MachOWriter::writeChainedFixupsData);
+ }
+
+ if (O.ExportsTrieCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ExportsTrieCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff,
+ &MachOWriter::writeExportsTrieData);
+ }
+
llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
return LHS.first < RHS.first;
});
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
index c8c06d644e9f..a172534dac8a 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
@@ -50,13 +50,16 @@ class MachOWriter {
void writeDataInCodeData();
void writeLinkerOptimizationHint();
void writeFunctionStartsData();
+ void writeChainedFixupsData();
+ void writeExportsTrieData();
void writeTail();
public:
- MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize,
- raw_ostream &Out)
+ MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian,
+ StringRef OutputFileName, uint64_t PageSize, raw_ostream &Out)
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian),
- PageSize(PageSize), Out(Out), LayoutBuilder(O, Is64Bit, PageSize) {}
+ PageSize(PageSize), Out(Out),
+ LayoutBuilder(O, Is64Bit, OutputFileName, PageSize) {}
size_t totalSize() const;
Error finalize();
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp
index b4f98fa84cb5..6312adbbc9f7 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp
@@ -29,10 +29,24 @@ void SymbolTable::removeSymbols(
}
void Object::updateLoadCommandIndexes() {
+ static constexpr char TextSegmentName[] = "__TEXT";
// Update indices of special load commands
for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
LoadCommand &LC = LoadCommands[Index];
switch (LC.MachOLoadCommand.load_command_data.cmd) {
+ case MachO::LC_CODE_SIGNATURE:
+ CodeSignatureCommandIndex = Index;
+ break;
+ case MachO::LC_SEGMENT:
+ if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
+ TextSegmentName)
+ TextSegmentCommandIndex = Index;
+ break;
+ case MachO::LC_SEGMENT_64:
+ if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
+ TextSegmentName)
+ TextSegmentCommandIndex = Index;
+ break;
case MachO::LC_SYMTAB:
SymTabCommandIndex = Index;
break;
@@ -52,6 +66,12 @@ void Object::updateLoadCommandIndexes() {
case MachO::LC_FUNCTION_STARTS:
FunctionStartsCommandIndex = Index;
break;
+ case MachO::LC_DYLD_CHAINED_FIXUPS:
+ ChainedFixupsCommandIndex = Index;
+ break;
+ case MachO::LC_DYLD_EXPORTS_TRIE:
+ ExportsTrieCommandIndex = Index;
+ break;
}
}
}
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index 207502e2241b..13aaf42634b0 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -315,7 +315,8 @@ struct Object {
LinkData DataInCode;
LinkData LinkerOptimizationHint;
LinkData FunctionStarts;
- LinkData CodeSignature;
+ LinkData ExportsTrie;
+ LinkData ChainedFixups;
Optional<uint32_t> SwiftVersion;
@@ -325,14 +326,21 @@ struct Object {
Optional<size_t> SymTabCommandIndex;
/// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
Optional<size_t> DyLdInfoCommandIndex;
- /// The index LC_DYSYMTAB load comamnd if present.
+ /// The index LC_DYSYMTAB load command if present.
Optional<size_t> DySymTabCommandIndex;
- /// The index LC_DATA_IN_CODE load comamnd if present.
+ /// The index LC_DATA_IN_CODE load command if present.
Optional<size_t> DataInCodeCommandIndex;
- /// The index of LC_LINKER_OPTIMIZATIN_HINT load comamnd if present.
+ /// The index of LC_LINKER_OPTIMIZATIN_HINT load command if present.
Optional<size_t> LinkerOptimizationHintCommandIndex;
- /// The index LC_FUNCTION_STARTS load comamnd if present.
+ /// The index LC_FUNCTION_STARTS load command if present.
Optional<size_t> FunctionStartsCommandIndex;
+ /// The index LC_DYLD_CHAINED_FIXUPS load command if present.
+ Optional<size_t> ChainedFixupsCommandIndex;
+ /// The index LC_DYLD_EXPORTS_TRIE load command if present.
+ Optional<size_t> ExportsTrieCommandIndex;
+ /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command
+ /// corresponding to the __TEXT segment.
+ Optional<size_t> TextSegmentCommandIndex;
BumpPtrAllocator Alloc;
StringSaver NewSectionsContents;
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 63abbe4c2020..bc624442aa51 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -50,7 +50,8 @@ defm rename_section
: Eq<"rename-section",
"Renames a section from old to new, optionally with specified flags. "
"Flags supported for GNU compatibility: alloc, load, noload, "
- "readonly, debug, code, data, rom, share, contents, merge, strings.">,
+ "readonly, exclude, debug, code, data, rom, share, contents, merge, "
+ "strings.">,
MetaVarName<"old=new[,flag1,...]">;
defm redefine_symbol
: Eq<"redefine-sym", "Change the name of a symbol old to new">,
@@ -82,8 +83,8 @@ defm set_section_alignment
defm set_section_flags
: Eq<"set-section-flags",
"Set section flags for a given section. Flags supported for GNU "
- "compatibility: alloc, load, noload, readonly, debug, code, data, "
- "rom, share, contents, merge, strings.">,
+ "compatibility: alloc, load, noload, readonly, exclude, debug, code, "
+ "data, rom, share, contents, merge, strings.">,
MetaVarName<"section=flag1[,flag2,...]">;
def S : Flag<["-"], "S">,
@@ -214,3 +215,7 @@ defm add_symbol
"compatibility: debug, constructor, warning, indirect, synthetic, "
"unique-object, before.">,
MetaVarName<"name=[section:]value[,flags]">;
+
+defm update_section
+ : Eq<"update-section", "Add section <name> with contents from a file <file>.">,
+ MetaVarName<"name=file">;
diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp
index 09a900182d24..32fdd1a4d5c3 100644
--- a/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -31,6 +31,159 @@ using namespace llvm::objdump;
using namespace llvm::object;
using namespace llvm::Win64EH;
+namespace {
+template <typename T> struct EnumEntry {
+ T Value;
+ StringRef Name;
+};
+
+class COFFDumper {
+public:
+ explicit COFFDumper(const llvm::object::COFFObjectFile &Obj) : Obj(Obj) {
+ Is64 = !Obj.getPE32Header();
+ }
+
+ template <class PEHeader> void printPEHeader(const PEHeader &Hdr) const;
+
+private:
+ template <typename T> FormattedNumber formatAddr(T V) const {
+ return format_hex_no_prefix(V, Is64 ? 16 : 8);
+ }
+
+ uint32_t getBaseOfData(const void *Hdr) const {
+ return Is64 ? 0 : static_cast<const pe32_header *>(Hdr)->BaseOfData;
+ }
+
+ const llvm::object::COFFObjectFile &Obj;
+ bool Is64;
+};
+} // namespace
+
+constexpr EnumEntry<uint16_t> PEHeaderMagic[] = {
+ {uint16_t(COFF::PE32Header::PE32), "PE32"},
+ {uint16_t(COFF::PE32Header::PE32_PLUS), "PE32+"},
+};
+
+constexpr EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
+ {COFF::IMAGE_SUBSYSTEM_UNKNOWN, "unspecified"},
+ {COFF::IMAGE_SUBSYSTEM_NATIVE, "NT native"},
+ {COFF::IMAGE_SUBSYSTEM_WINDOWS_GUI, "Windows GUI"},
+ {COFF::IMAGE_SUBSYSTEM_WINDOWS_CUI, "Windows CUI"},
+ {COFF::IMAGE_SUBSYSTEM_POSIX_CUI, "POSIX CUI"},
+ {COFF::IMAGE_SUBSYSTEM_WINDOWS_CE_GUI, "Wince CUI"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_APPLICATION, "EFI application"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER, "EFI boot service driver"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER, "EFI runtime driver"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_ROM, "SAL runtime driver"},
+ {COFF::IMAGE_SUBSYSTEM_XBOX, "XBOX"},
+};
+
+template <typename T, typename TEnum>
+static void printOptionalEnumName(T Value,
+ ArrayRef<EnumEntry<TEnum>> EnumValues) {
+ for (const EnumEntry<TEnum> &I : EnumValues)
+ if (I.Value == Value) {
+ outs() << "\t(" << I.Name << ')';
+ return;
+ }
+}
+
+template <class PEHeader>
+void COFFDumper::printPEHeader(const PEHeader &Hdr) const {
+ auto print = [](const char *K, auto V, const char *Fmt = "%d\n") {
+ outs() << format("%-23s ", K) << format(Fmt, V);
+ };
+ auto printU16 = [&](const char *K, support::ulittle16_t V,
+ const char *Fmt = "%d\n") { print(K, uint16_t(V), Fmt); };
+ auto printU32 = [&](const char *K, support::ulittle32_t V,
+ const char *Fmt = "%d\n") { print(K, uint32_t(V), Fmt); };
+ auto printAddr = [=](const char *K, uint64_t V) {
+ outs() << format("%-23s ", K) << formatAddr(V) << '\n';
+ };
+
+ printU16("Magic", Hdr.Magic, "%04x");
+ printOptionalEnumName(Hdr.Magic, makeArrayRef(PEHeaderMagic));
+ outs() << '\n';
+ print("MajorLinkerVersion", Hdr.MajorLinkerVersion);
+ print("MinorLinkerVersion", Hdr.MinorLinkerVersion);
+ printAddr("SizeOfCode", Hdr.SizeOfCode);
+ printAddr("SizeOfInitializedData", Hdr.SizeOfInitializedData);
+ printAddr("SizeOfUninitializedData", Hdr.SizeOfUninitializedData);
+ printAddr("AddressOfEntryPoint", Hdr.AddressOfEntryPoint);
+ printAddr("BaseOfCode", Hdr.BaseOfCode);
+ if (!Is64)
+ printAddr("BaseOfData", getBaseOfData(&Hdr));
+ printAddr("ImageBase", Hdr.ImageBase);
+ printU32("SectionAlignment", Hdr.SectionAlignment, "%08x\n");
+ printU32("FileAlignment", Hdr.FileAlignment, "%08x\n");
+ printU16("MajorOSystemVersion", Hdr.MajorOperatingSystemVersion);
+ printU16("MinorOSystemVersion", Hdr.MinorOperatingSystemVersion);
+ printU16("MajorImageVersion", Hdr.MajorImageVersion);
+ printU16("MinorImageVersion", Hdr.MinorImageVersion);
+ printU16("MajorSubsystemVersion", Hdr.MajorSubsystemVersion);
+ printU16("MinorSubsystemVersion", Hdr.MinorSubsystemVersion);
+ printU32("Win32Version", Hdr.Win32VersionValue, "%08x\n");
+ printU32("SizeOfImage", Hdr.SizeOfImage, "%08x\n");
+ printU32("SizeOfHeaders", Hdr.SizeOfHeaders, "%08x\n");
+ printU32("CheckSum", Hdr.CheckSum, "%08x\n");
+ printU16("Subsystem", Hdr.Subsystem, "%08x");
+ printOptionalEnumName(Hdr.Subsystem, makeArrayRef(PEWindowsSubsystem));
+ outs() << '\n';
+
+ printU16("DllCharacteristics", Hdr.DLLCharacteristics, "%08x\n");
+#define FLAG(Name) \
+ if (Hdr.DLLCharacteristics & COFF::IMAGE_DLL_CHARACTERISTICS_##Name) \
+ outs() << "\t\t\t\t\t" << #Name << '\n';
+ FLAG(HIGH_ENTROPY_VA);
+ FLAG(DYNAMIC_BASE);
+ FLAG(FORCE_INTEGRITY);
+ FLAG(NX_COMPAT);
+ FLAG(NO_ISOLATION);
+ FLAG(NO_SEH);
+ FLAG(NO_BIND);
+ FLAG(APPCONTAINER);
+ FLAG(WDM_DRIVER);
+ FLAG(GUARD_CF);
+ FLAG(TERMINAL_SERVER_AWARE);
+#undef FLAG
+
+ printAddr("SizeOfStackReserve", Hdr.SizeOfStackReserve);
+ printAddr("SizeOfStackCommit", Hdr.SizeOfStackCommit);
+ printAddr("SizeOfHeapReserve", Hdr.SizeOfHeapReserve);
+ printAddr("SizeOfHeapCommit", Hdr.SizeOfHeapCommit);
+ printU32("LoaderFlags", Hdr.LoaderFlags, "%08x\n");
+ printU32("NumberOfRvaAndSizes", Hdr.NumberOfRvaAndSize, "%08x\n");
+
+ static const char *DirName[COFF::NUM_DATA_DIRECTORIES + 1] = {
+ "Export Directory [.edata (or where ever we found it)]",
+ "Import Directory [parts of .idata]",
+ "Resource Directory [.rsrc]",
+ "Exception Directory [.pdata]",
+ "Security Directory",
+ "Base Relocation Directory [.reloc]",
+ "Debug Directory",
+ "Description Directory",
+ "Special Directory",
+ "Thread Storage Directory [.tls]",
+ "Load Configuration Directory",
+ "Bound Import Directory",
+ "Import Address Table Directory",
+ "Delay Import Directory",
+ "CLR Runtime Header",
+ "Reserved",
+ };
+ outs() << "\nThe Data Directory\n";
+ for (uint32_t I = 0; I != array_lengthof(DirName); ++I) {
+ uint32_t Addr = 0, Size = 0;
+ if (const data_directory *Data = Obj.getDataDirectory(I)) {
+ Addr = Data->RelativeVirtualAddress;
+ Size = Data->Size;
+ }
+ outs() << format("Entry %x ", I) << formatAddr(Addr)
+ << format(" %08x %s\n", uint32_t(Size), DirName[I]);
+ }
+}
+
// Returns the name of the unwind code.
static StringRef getUnwindCodeTypeName(uint8_t Code) {
switch(Code) {
@@ -278,10 +431,7 @@ static void printTLSDirectory(const COFFObjectFile *Obj) {
return;
const data_directory *DataDir = Obj->getDataDirectory(COFF::TLS_TABLE);
- if (!DataDir)
- reportError("missing data dir for TLS table", Obj->getFileName());
-
- if (DataDir->RelativeVirtualAddress == 0)
+ if (!DataDir || DataDir->RelativeVirtualAddress == 0)
return;
uintptr_t IntPtr = 0;
@@ -625,12 +775,47 @@ void objdump::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
}
}
-void objdump::printCOFFFileHeader(const object::ObjectFile *Obj) {
- const COFFObjectFile *file = dyn_cast<const COFFObjectFile>(Obj);
- printTLSDirectory(file);
- printLoadConfiguration(file);
- printImportTables(file);
- printExportTable(file);
+void objdump::printCOFFFileHeader(const COFFObjectFile &Obj) {
+ COFFDumper CD(Obj);
+ const uint16_t Cha = Obj.getCharacteristics();
+ outs() << "Characteristics 0x" << Twine::utohexstr(Cha) << '\n';
+#define FLAG(F, Name) \
+ if (Cha & F) \
+ outs() << '\t' << Name << '\n';
+ FLAG(COFF::IMAGE_FILE_RELOCS_STRIPPED, "relocations stripped");
+ FLAG(COFF::IMAGE_FILE_EXECUTABLE_IMAGE, "executable");
+ FLAG(COFF::IMAGE_FILE_LINE_NUMS_STRIPPED, "line numbers stripped");
+ FLAG(COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED, "symbols stripped");
+ FLAG(COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE, "large address aware");
+ FLAG(COFF::IMAGE_FILE_BYTES_REVERSED_LO, "little endian");
+ FLAG(COFF::IMAGE_FILE_32BIT_MACHINE, "32 bit words");
+ FLAG(COFF::IMAGE_FILE_DEBUG_STRIPPED, "debugging information removed");
+ FLAG(COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP,
+ "copy to swap file if on removable media");
+ FLAG(COFF::IMAGE_FILE_NET_RUN_FROM_SWAP,
+ "copy to swap file if on network media");
+ FLAG(COFF::IMAGE_FILE_SYSTEM, "system file");
+ FLAG(COFF::IMAGE_FILE_DLL, "DLL");
+ FLAG(COFF::IMAGE_FILE_UP_SYSTEM_ONLY, "run only on uniprocessor machine");
+ FLAG(COFF::IMAGE_FILE_BYTES_REVERSED_HI, "big endian");
+#undef FLAG
+
+ // TODO Support PE_IMAGE_DEBUG_TYPE_REPRO.
+ // Since ctime(3) returns a 26 character string of the form:
+ // "Sun Sep 16 01:03:52 1973\n\0"
+ // just print 24 characters.
+ const time_t Timestamp = Obj.getTimeDateStamp();
+ outs() << format("\nTime/Date %.24s\n", ctime(&Timestamp));
+
+ if (const pe32_header *Hdr = Obj.getPE32Header())
+ CD.printPEHeader<pe32_header>(*Hdr);
+ else if (const pe32plus_header *Hdr = Obj.getPE32PlusHeader())
+ CD.printPEHeader<pe32plus_header>(*Hdr);
+
+ printTLSDirectory(&Obj);
+ printLoadConfiguration(&Obj);
+ printImportTables(&Obj);
+ printExportTable(&Obj);
}
void objdump::printCOFFSymbolTable(const object::COFFImportFile *i) {
diff --git a/llvm/tools/llvm-objdump/COFFDump.h b/llvm/tools/llvm-objdump/COFFDump.h
index 21f97bdeb83c..f933f79523a0 100644
--- a/llvm/tools/llvm-objdump/COFFDump.h
+++ b/llvm/tools/llvm-objdump/COFFDump.h
@@ -28,7 +28,7 @@ Error getCOFFRelocationValueString(const object::COFFObjectFile *Obj,
llvm::SmallVectorImpl<char> &Result);
void printCOFFUnwindInfo(const object::COFFObjectFile *O);
-void printCOFFFileHeader(const object::ObjectFile *O);
+void printCOFFFileHeader(const object::COFFObjectFile &Obj);
void printCOFFSymbolTable(const object::COFFImportFile *I);
void printCOFFSymbolTable(const object::COFFObjectFile *O);
} // namespace objdump
diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp
index da7415834c63..98e71497d022 100644
--- a/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -145,7 +145,7 @@ static uint64_t getSectionLMA(const ELFFile<ELFT> &Obj,
const object::ELFSectionRef &Sec) {
auto PhdrRangeOrErr = Obj.program_headers();
if (!PhdrRangeOrErr)
- report_fatal_error(toString(PhdrRangeOrErr.takeError()));
+ report_fatal_error(Twine(toString(PhdrRangeOrErr.takeError())));
// Search for a PT_LOAD segment containing the requested section. Use this
// segment's p_addr to calculate the section's LMA.
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 7c1fdf03542f..b0cf1f775ced 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -33,6 +33,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Option/ArgList.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
@@ -10053,6 +10053,10 @@ static void PrintLinkEditDataCommand(MachO::linkedit_data_command ld,
outs() << " cmd LC_DYLIB_CODE_SIGN_DRS\n";
else if (ld.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT)
outs() << " cmd LC_LINKER_OPTIMIZATION_HINT\n";
+ else if (ld.cmd == MachO::LC_DYLD_EXPORTS_TRIE)
+ outs() << " cmd LC_DYLD_EXPORTS_TRIE\n";
+ else if (ld.cmd == MachO::LC_DYLD_CHAINED_FIXUPS)
+ outs() << " cmd LC_DYLD_CHAINED_FIXUPS\n";
else
outs() << " cmd " << ld.cmd << " (?)\n";
outs() << " cmdsize " << ld.cmdsize;
@@ -10196,7 +10200,9 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
Command.C.cmd == MachO::LC_FUNCTION_STARTS ||
Command.C.cmd == MachO::LC_DATA_IN_CODE ||
Command.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS ||
- Command.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) {
+ Command.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT ||
+ Command.C.cmd == MachO::LC_DYLD_EXPORTS_TRIE ||
+ Command.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) {
MachO::linkedit_data_command Ld =
Obj->getLinkeditDataLoadCommand(Command);
PrintLinkEditDataCommand(Ld, Buf.size());
diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td
index 1b19733c65d0..9f27a6cdf163 100644
--- a/llvm/tools/llvm-objdump/ObjdumpOpts.td
+++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td
@@ -1,5 +1,12 @@
include "llvm/Option/OptParser.td"
+multiclass Eq<string name, string help> {
+ def NAME : Separate<["--"], name>;
+ def NAME #_eq : Joined<["--"], name #"=">,
+ Alias<!cast<Separate>(NAME)>,
+ HelpText<help>;
+}
+
def help : Flag<["--"], "help">,
HelpText<"Display available options (--help-hidden for more)">;
@@ -16,7 +23,8 @@ def adjust_vma_EQ : Joined<["--"], "adjust-vma=">,
HelpText<"Increase the displayed address by the specified offset">;
def all_headers : Flag<["--"], "all-headers">,
- HelpText<"Display all available header information">;
+ HelpText<"Display all available header information, "
+ "relocation entries and the symbol table">;
def : Flag<["-"], "x">, Alias<all_headers>, HelpText<"Alias for --all-headers">;
def arch_name_EQ : Joined<["--"], "arch-name=">,
@@ -32,11 +40,11 @@ def demangle : Flag<["--"], "demangle">, HelpText<"Demangle symbol names">;
def : Flag<["-"], "C">, Alias<demangle>, HelpText<"Alias for --demangle">;
def disassemble : Flag<["--"], "disassemble">,
- HelpText<"Display assembler mnemonics for the machine instructions">;
+ HelpText<"Disassemble all executable sections found in the input files">;
def : Flag<["-"], "d">, Alias<disassemble>, HelpText<"Alias for --disassemble">;
def disassemble_all : Flag<["--"], "disassemble-all">,
- HelpText<"Display assembler mnemonics for the machine instructions">;
+ HelpText<"Disassemble all sections found in the input files">;
def : Flag<["-"], "D">, Alias<disassemble_all>,
HelpText<"Alias for --disassemble-all">;
@@ -66,10 +74,12 @@ def : Flag<["-"], "R">, Alias<dynamic_reloc>,
HelpText<"Alias for --dynamic-reloc">;
def dwarf_EQ : Joined<["--"], "dwarf=">,
- HelpText<"Dump of dwarf debug sections">, Values<"frames">;
+ HelpText<"Dump the specified DWARF debug sections. The "
+ "only supported value is 'frames'">,
+ Values<"frames">;
def fault_map_section : Flag<["--"], "fault-map-section">,
- HelpText<"Display contents of faultmap section">;
+ HelpText<"Display the content of the fault map section">;
def file_headers : Flag<["--"], "file-headers">,
HelpText<"Display the contents of the overall file header">;
@@ -82,9 +92,10 @@ def : Flag<["-"], "s">, Alias<full_contents>,
HelpText<"Alias for --full-contents">;
def line_numbers : Flag<["--"], "line-numbers">,
- HelpText<"Display source line numbers with "
- "disassembly. Implies disassemble object">;
-def : Flag<["-"], "l">, Alias<line_numbers>,
+ HelpText<"When disassembling, display source line numbers. "
+ "Implies --disassemble">;
+def : Flag<["-"], "l">,
+ Alias<line_numbers>,
HelpText<"Alias for --line-numbers">;
def macho : Flag<["--"], "macho">,
@@ -104,7 +115,7 @@ def no_show_raw_insn : Flag<["--"], "no-show-raw-insn">,
"do not print the instruction bytes.">;
def no_leading_addr : Flag<["--"], "no-leading-addr">,
- HelpText<"Print no leading address">;
+ HelpText<"When disassembling, do not print leading addresses">;
def raw_clang_ast : Flag<["--"], "raw-clang-ast">,
HelpText<"Dump the raw binary contents of the clang AST section">;
@@ -143,15 +154,18 @@ def show_lma : Flag<["--"], "show-lma">,
HelpText<"Display LMA column when dumping ELF section headers">;
def source : Flag<["--"], "source">,
- HelpText<"Display source inlined with disassembly. Implies disassemble object">;
+ HelpText<"When disassembling, display source interleaved with the "
+ "disassembly. Implies --disassemble">;
def : Flag<["-"], "S">, Alias<source>, HelpText<"Alias for --source">;
def start_address_EQ : Joined<["--"], "start-address=">,
MetaVarName<"address">,
- HelpText<"Disassemble beginning at address">;
+ HelpText<"Set the start address for disassembling, "
+ "printing relocations and printing symbols">;
def stop_address_EQ : Joined<["--"], "stop-address=">,
MetaVarName<"address">,
- HelpText<"Stop disassembly at address">;
+ HelpText<"Set the stop address for disassembling, "
+ "printing relocations and printing symbols">;
def syms : Flag<["--"], "syms">,
HelpText<"Display the symbol table">;
@@ -180,19 +194,19 @@ def wide : Flag<["--"], "wide">,
HelpText<"Ignored for compatibility with GNU objdump">;
def : Flag<["-"], "w">, Alias<wide>;
-def prefix : Separate<["--"], "prefix">,
- HelpText<"Add prefix to absolute paths">;
-
-def prefix_strip : Separate<["--"], "prefix-strip">,
- HelpText<"Strip out initial directories from absolute "
- "paths. No effect without --prefix">;
+defm prefix : Eq<"prefix", "Add prefix to absolute paths">,
+ MetaVarName<"prefix">;
+defm prefix_strip
+ : Eq<"prefix-strip", "Strip out initial directories from absolute "
+ "paths. No effect without --prefix">,
+ MetaVarName<"prefix">;
def debug_vars_EQ : Joined<["--"], "debug-vars=">,
- Values<"unicode,ascii">;
-def : Flag<["--"], "debug-vars">,
HelpText<"Print the locations (in registers or memory) of "
- "source-level variables alongside disassembly">,
- Alias<debug_vars_EQ>, AliasArgs<["unicode"]>;
+ "source-level variables alongside disassembly. "
+ "Supported formats: ascii, unicode (default)">,
+ Values<"unicode,ascii">;
+def : Flag<["--"], "debug-vars">, Alias<debug_vars_EQ>, AliasArgs<["unicode"]>;
def debug_vars_indent_EQ : Joined<["--"], "debug-vars-indent=">,
HelpText<"Distance to indent the source-level variable display, "
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp
index c4cc5fe7e21c..b8fb2ed3d063 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.cpp
+++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp
@@ -58,6 +58,24 @@ objdump::getXCOFFSymbolCsectSMC(const XCOFFObjectFile *Obj,
return CsectAuxEntOrErr.get().getStorageMappingClass();
}
+Optional<object::SymbolRef>
+objdump::getXCOFFSymbolContainingSymbolRef(const XCOFFObjectFile *Obj,
+ const SymbolRef &Sym) {
+
+ const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl());
+ if (!SymRef.isCsectSymbol())
+ return None;
+
+ Expected<XCOFFCsectAuxRef> CsectAuxEntOrErr = SymRef.getXCOFFCsectAuxRef();
+ if (!CsectAuxEntOrErr || !CsectAuxEntOrErr.get().isLabel())
+ return None;
+ uint32_t Idx =
+ static_cast<uint32_t>(CsectAuxEntOrErr.get().getSectionOrLength());
+ DataRefImpl DRI;
+ DRI.p = Obj->getSymbolByIndex(Idx);
+ return SymbolRef(DRI, Obj);
+}
+
bool objdump::isLabel(const XCOFFObjectFile *Obj, const SymbolRef &Sym) {
const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl());
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.h b/llvm/tools/llvm-objdump/XCOFFDump.h
index dbf520021594..6796f00aef6f 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.h
+++ b/llvm/tools/llvm-objdump/XCOFFDump.h
@@ -20,6 +20,10 @@ Optional<XCOFF::StorageMappingClass>
getXCOFFSymbolCsectSMC(const object::XCOFFObjectFile *Obj,
const object::SymbolRef &Sym);
+Optional<object::SymbolRef>
+getXCOFFSymbolContainingSymbolRef(const object::XCOFFObjectFile *Obj,
+ const object::SymbolRef &Sym);
+
bool isLabel(const object::XCOFFObjectFile *Obj, const object::SymbolRef &Sym);
std::string getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo,
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 48ae92f734c7..6f6f543f2f47 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -47,6 +47,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
@@ -71,7 +72,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -297,16 +297,15 @@ void objdump::reportWarning(const Twine &Message, StringRef File) {
<< "'" << File << "': " << Message << "\n";
}
-LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File,
- const Twine &Message) {
+[[noreturn]] void objdump::reportError(StringRef File, const Twine &Message) {
outs().flush();
WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName,
- StringRef ArchiveName,
- StringRef ArchitectureName) {
+[[noreturn]] void objdump::reportError(Error E, StringRef FileName,
+ StringRef ArchiveName,
+ StringRef ArchitectureName) {
assert(E);
outs().flush();
WithColor::error(errs(), ToolName);
@@ -325,7 +324,7 @@ static void reportCmdLineWarning(const Twine &Message) {
WithColor::warning(errs(), ToolName) << Message << "\n";
}
-LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
+[[noreturn]] static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
@@ -1286,6 +1285,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (shouldAdjustVA(Section))
VMAAdjustment = AdjustVMA;
+ // In executable and shared objects, r_offset holds a virtual address.
+ // Subtract SectionAddr from the r_offset field of a relocation to get
+ // the section offset.
+ uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr;
uint64_t Size;
uint64_t Index;
bool PrintedSection = false;
@@ -1432,7 +1435,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// For --reloc: print zero blocks patched by relocations, so that
// relocations can be shown in the dump.
if (RelCur != RelEnd)
- MaxOffset = RelCur->getOffset() - Index;
+ MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index,
+ MaxOffset);
if (size_t N =
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
@@ -1481,7 +1485,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (!PrintTarget)
if (Optional<uint64_t> MaybeTarget =
MIA->evaluateMemoryOperandAddress(
- Inst, SectionAddr + Index, Size)) {
+ Inst, STI, SectionAddr + Index, Size)) {
Target = *MaybeTarget;
PrintTarget = true;
// Do not print real address when symbolizing.
@@ -1581,7 +1585,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (Obj->getArch() != Triple::hexagon) {
// Print relocation for instruction and data.
while (RelCur != RelEnd) {
- uint64_t Offset = RelCur->getOffset();
+ uint64_t Offset = RelCur->getOffset() - RelAdjustment;
// If this relocation is hidden, skip it.
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
++RelCur;
@@ -1770,7 +1774,9 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) {
return;
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
- if (!Elf || Elf->getEType() != ELF::ET_DYN) {
+ if (!Elf || !any_of(Elf->sections(), [](const ELFSectionRef Sec) {
+ return Sec.getType() == ELF::SHT_DYNAMIC;
+ })) {
reportError(Obj->getFileName(), "not a dynamic object");
return;
}
@@ -1779,7 +1785,12 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) {
if (DynRelSec.empty())
return;
- outs() << "DYNAMIC RELOCATION RECORDS\n";
+ outs() << "\nDYNAMIC RELOCATION RECORDS\n";
+ const uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
+ const uint32_t TypePadding = 24;
+ outs() << left_justify("OFFSET", OffsetPadding) << ' '
+ << left_justify("TYPE", TypePadding) << " VALUE\n";
+
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
for (const SectionRef &Section : DynRelSec)
for (const RelocationRef &Reloc : Section.relocations()) {
@@ -1789,8 +1800,8 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) {
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
reportError(std::move(E), Obj->getFileName());
- outs() << format(Fmt.data(), Address) << " " << RelocName << " "
- << ValueStr << "\n";
+ outs() << format(Fmt.data(), Address) << ' '
+ << left_justify(RelocName, TypePadding) << ' ' << ValueStr << '\n';
}
}
@@ -1922,7 +1933,8 @@ void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
if (!DumpDynamic) {
outs() << "\nSYMBOL TABLE:\n";
for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I)
- printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
+ printSymbol(O, *I, {}, FileName, ArchiveName, ArchitectureName,
+ DumpDynamic);
return;
}
@@ -1935,12 +1947,21 @@ void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
}
const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O);
- for (auto I = ELF->getDynamicSymbolIterators().begin();
- I != ELF->getDynamicSymbolIterators().end(); ++I)
- printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
+ auto Symbols = ELF->getDynamicSymbolIterators();
+ Expected<std::vector<VersionEntry>> SymbolVersionsOrErr =
+ ELF->readDynsymVersions();
+ if (!SymbolVersionsOrErr) {
+ reportWarning(toString(SymbolVersionsOrErr.takeError()), FileName);
+ SymbolVersionsOrErr = std::vector<VersionEntry>();
+ (void)!SymbolVersionsOrErr;
+ }
+ for (auto &Sym : Symbols)
+ printSymbol(O, Sym, *SymbolVersionsOrErr, FileName, ArchiveName,
+ ArchitectureName, DumpDynamic);
}
void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
+ ArrayRef<VersionEntry> SymbolVersions,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O);
@@ -2029,22 +2050,66 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
} else if (Common) {
outs() << "*COM*";
} else if (Section == O->section_end()) {
- outs() << "*UND*";
+ if (O->isXCOFF()) {
+ XCOFFSymbolRef XCOFFSym = dyn_cast<const XCOFFObjectFile>(O)->toSymbolRef(
+ Symbol.getRawDataRefImpl());
+ if (XCOFF::N_DEBUG == XCOFFSym.getSectionNumber())
+ outs() << "*DEBUG*";
+ else
+ outs() << "*UND*";
+ } else
+ outs() << "*UND*";
} else {
StringRef SegmentName = getSegmentName(MachO, *Section);
if (!SegmentName.empty())
outs() << SegmentName << ",";
StringRef SectionName = unwrapOrError(Section->getName(), FileName);
outs() << SectionName;
- }
+ if (O->isXCOFF()) {
+ Optional<SymbolRef> SymRef = getXCOFFSymbolContainingSymbolRef(
+ dyn_cast<const XCOFFObjectFile>(O), Symbol);
+ if (SymRef) {
- if (Common || O->isELF()) {
- uint64_t Val =
- Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
- outs() << '\t' << format(Fmt, Val);
+ Expected<StringRef> NameOrErr = SymRef.getValue().getName();
+
+ if (NameOrErr) {
+ outs() << " (csect:";
+ std::string SymName(NameOrErr.get());
+
+ if (Demangle)
+ SymName = demangle(SymName);
+
+ if (SymbolDescription)
+ SymName = getXCOFFSymbolDescription(
+ createSymbolInfo(O, SymRef.getValue()), SymName);
+
+ outs() << ' ' << SymName;
+ outs() << ") ";
+ } else
+ reportWarning(toString(NameOrErr.takeError()), FileName);
+ }
+ }
}
+ if (Common)
+ outs() << '\t' << format(Fmt, static_cast<uint64_t>(Symbol.getAlignment()));
+ else if (O->isXCOFF())
+ outs() << '\t'
+ << format(Fmt, dyn_cast<const XCOFFObjectFile>(O)->getSymbolSize(
+ Symbol.getRawDataRefImpl()));
+ else if (O->isELF())
+ outs() << '\t' << format(Fmt, ELFSymbolRef(Symbol).getSize());
+
if (O->isELF()) {
+ if (!SymbolVersions.empty()) {
+ const VersionEntry &Ver =
+ SymbolVersions[Symbol.getRawDataRefImpl().d.b - 1];
+ std::string Str;
+ if (!Ver.Name.empty())
+ Str = Ver.IsVerDef ? ' ' + Ver.Name : '(' + Ver.Name + ')';
+ outs() << ' ' << left_justify(Str, 12);
+ }
+
uint8_t Other = ELFSymbolRef(Symbol).getOther();
switch (Other) {
case ELF::STV_DEFAULT:
@@ -2066,10 +2131,14 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
outs() << " .hidden";
}
+ std::string SymName(Name);
if (Demangle)
- outs() << ' ' << demangle(std::string(Name)) << '\n';
- else
- outs() << ' ' << Name << '\n';
+ SymName = demangle(SymName);
+
+ if (O->isXCOFF() && SymbolDescription)
+ SymName = getXCOFFSymbolDescription(createSymbolInfo(O, Symbol), SymName);
+
+ outs() << ' ' << SymName << '\n';
}
static void printUnwindInfo(const ObjectFile *O) {
@@ -2176,7 +2245,7 @@ static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
return;
}
if (O->isCOFF())
- return printCOFFFileHeader(O);
+ return printCOFFFileHeader(cast<object::COFFObjectFile>(*O));
if (O->isWasm())
return printWasmFileHeader(O);
if (O->isMachO()) {
@@ -2431,6 +2500,11 @@ static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
}
}
+static void invalidArgValue(const opt::Arg *A) {
+ reportCmdLineError("'" + StringRef(A->getValue()) +
+ "' is not a valid value for '" + A->getSpelling() + "'");
+}
+
static std::vector<std::string>
commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) {
std::vector<std::string> Values;
@@ -2504,8 +2578,11 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ);
DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes);
if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) {
- DwarfDumpType =
- StringSwitch<DIDumpType>(A->getValue()).Case("frames", DIDT_DebugFrame);
+ DwarfDumpType = StringSwitch<DIDumpType>(A->getValue())
+ .Case("frames", DIDT_DebugFrame)
+ .Default(DIDT_Null);
+ if (DwarfDumpType == DIDT_Null)
+ invalidArgValue(A);
}
DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc);
FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section);
@@ -2542,7 +2619,10 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) {
DbgVariables = StringSwitch<DebugVarsFormat>(A->getValue())
.Case("ascii", DVASCII)
- .Case("unicode", DVUnicode);
+ .Case("unicode", DVUnicode)
+ .Default(DVInvalid);
+ if (DbgVariables == DVInvalid)
+ invalidArgValue(A);
}
parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent);
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h
index 33fb3f207f8e..864a9920efbe 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -26,15 +26,12 @@ class ELFSectionRef;
class MachOObjectFile;
class MachOUniversalBinary;
class RelocationRef;
+struct VersionEntry;
} // namespace object
namespace objdump {
-enum DebugVarsFormat {
- DVDisabled,
- DVUnicode,
- DVASCII,
-};
+enum DebugVarsFormat { DVDisabled, DVUnicode, DVASCII, DVInvalid };
extern bool ArchiveHeaders;
extern int DbgIndent;
@@ -137,12 +134,13 @@ void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName = StringRef(),
bool DumpDynamic = false);
void printSymbol(const object::ObjectFile *O, const object::SymbolRef &Symbol,
+ ArrayRef<object::VersionEntry> SymbolVersions,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic);
-LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, const Twine &Message);
-LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName,
- StringRef ArchiveName = "",
- StringRef ArchitectureName = "");
+[[noreturn]] void reportError(StringRef File, const Twine &Message);
+[[noreturn]] void reportError(Error E, StringRef FileName,
+ StringRef ArchiveName = "",
+ StringRef ArchitectureName = "");
void reportWarning(const Twine &Message, StringRef File);
template <typename T, typename... Ts>
diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
index 3d2490509c03..b631bdf8f2b1 100644
--- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
@@ -373,7 +373,7 @@ static void explainDbiModiSubstreamOffset(LinePrinter &P, DbiStream &Dbi,
++Index;
}
- DbiModuleDescriptor &Descriptor = *Prev;
+ const DbiModuleDescriptor &Descriptor = *Prev;
P.formatLine("which contains the descriptor for module {0} ({1}).", Index,
Descriptor.getModuleName());
}
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.cpp b/llvm/tools/llvm-pdbutil/LinePrinter.cpp
index 280c000bd65f..dd6ca5bf41b1 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.cpp
+++ b/llvm/tools/llvm-pdbutil/LinePrinter.cpp
@@ -100,7 +100,7 @@ bool LinePrinter::IsClassExcluded(const ClassLayout &Class) {
}
void LinePrinter::formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
- uint32_t StartOffset) {
+ uint64_t StartOffset) {
NewLine();
OS << Label << " (";
if (!Data.empty()) {
@@ -113,7 +113,7 @@ void LinePrinter::formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
}
void LinePrinter::formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
- uint64_t Base, uint32_t StartOffset) {
+ uint64_t Base, uint64_t StartOffset) {
NewLine();
OS << Label << " (";
if (!Data.empty()) {
@@ -131,7 +131,7 @@ struct Run {
Run() = default;
explicit Run(uint32_t Block) : Block(Block) {}
uint32_t Block = 0;
- uint32_t ByteLen = 0;
+ uint64_t ByteLen = 0;
};
} // namespace
@@ -143,7 +143,7 @@ static std::vector<Run> computeBlockRuns(uint32_t BlockSize,
ArrayRef<support::ulittle32_t> Blocks = Layout.Blocks;
assert(!Blocks.empty());
- uint32_t StreamBytesRemaining = Layout.Length;
+ uint64_t StreamBytesRemaining = Layout.Length;
uint32_t CurrentBlock = Blocks[0];
Runs.emplace_back(CurrentBlock);
while (!Blocks.empty()) {
@@ -153,7 +153,8 @@ static std::vector<Run> computeBlockRuns(uint32_t BlockSize,
Runs.emplace_back(NextBlock);
CurrentRun = &Runs.back();
}
- uint32_t Used = std::min(BlockSize, StreamBytesRemaining);
+ uint64_t Used =
+ std::min(static_cast<uint64_t>(BlockSize), StreamBytesRemaining);
CurrentRun->ByteLen += Used;
StreamBytesRemaining -= Used;
CurrentBlock = NextBlock;
@@ -162,7 +163,7 @@ static std::vector<Run> computeBlockRuns(uint32_t BlockSize,
return Runs;
}
-static std::pair<Run, uint32_t> findRun(uint32_t Offset, ArrayRef<Run> Runs) {
+static std::pair<Run, uint64_t> findRun(uint64_t Offset, ArrayRef<Run> Runs) {
for (const auto &R : Runs) {
if (Offset < R.ByteLen)
return std::make_pair(R, Offset);
@@ -173,8 +174,8 @@ static std::pair<Run, uint32_t> findRun(uint32_t Offset, ArrayRef<Run> Runs) {
void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
uint32_t StreamIdx,
- StringRef StreamPurpose, uint32_t Offset,
- uint32_t Size) {
+ StringRef StreamPurpose, uint64_t Offset,
+ uint64_t Size) {
if (StreamIdx >= File.getNumStreams()) {
formatLine("Stream {0}: Not present", StreamIdx);
return;
@@ -193,7 +194,7 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
return;
}
- uint32_t End =
+ uint64_t End =
(Size == 0) ? S->getLength() : std::min(Offset + Size, S->getLength());
Size = End - Offset;
@@ -222,10 +223,10 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
OS << "\n";
Run FoundRun;
- uint32_t RunOffset;
+ uint64_t RunOffset;
std::tie(FoundRun, RunOffset) = findRun(Substream.Offset, Runs);
assert(FoundRun.ByteLen >= RunOffset);
- uint32_t Len = FoundRun.ByteLen - RunOffset;
+ uint64_t Len = FoundRun.ByteLen - RunOffset;
Len = std::min(Len, Reader.bytesRemaining());
uint64_t Base = FoundRun.Block * File.getBlockSize() + RunOffset;
ArrayRef<uint8_t> Data;
@@ -246,13 +247,14 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
void LinePrinter::formatMsfStreamBlocks(
PDBFile &File, const msf::MSFStreamLayout &StreamLayout) {
auto Blocks = makeArrayRef(StreamLayout.Blocks);
- uint32_t L = StreamLayout.Length;
+ uint64_t L = StreamLayout.Length;
while (L > 0) {
NewLine();
assert(!Blocks.empty());
OS << formatv("Block {0} (\n", uint32_t(Blocks.front()));
- uint32_t UsedBytes = std::min(L, File.getBlockSize());
+ uint64_t UsedBytes =
+ std::min(L, static_cast<uint64_t>(File.getBlockSize()));
ArrayRef<uint8_t> BlockData =
cantFail(File.getBlockData(Blocks.front(), File.getBlockSize()));
uint64_t BaseOffset = Blocks.front();
@@ -267,7 +269,7 @@ void LinePrinter::formatMsfStreamBlocks(
}
}
-bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint32_t Size) {
+bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size) {
if (IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters))
return true;
if (Size < opts::pretty::SizeThreshold)
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.h b/llvm/tools/llvm-pdbutil/LinePrinter.h
index 7ecfae17354f..aa8159c0e094 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.h
+++ b/llvm/tools/llvm-pdbutil/LinePrinter.h
@@ -49,13 +49,13 @@ public:
}
void formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
- uint32_t StartOffset);
+ uint64_t StartOffset);
void formatBinary(StringRef Label, ArrayRef<uint8_t> Data, uint64_t BaseAddr,
- uint32_t StartOffset);
+ uint64_t StartOffset);
void formatMsfStreamData(StringRef Label, PDBFile &File, uint32_t StreamIdx,
- StringRef StreamPurpose, uint32_t Offset,
- uint32_t Size);
+ StringRef StreamPurpose, uint64_t Offset,
+ uint64_t Size);
void formatMsfStreamData(StringRef Label, PDBFile &File,
const msf::MSFStreamLayout &Stream,
BinarySubstreamRef Substream);
@@ -66,7 +66,7 @@ public:
int getIndentLevel() const { return CurrentIndent; }
bool IsClassExcluded(const ClassLayout &Class);
- bool IsTypeExcluded(llvm::StringRef TypeName, uint32_t Size);
+ bool IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size);
bool IsSymbolExcluded(llvm::StringRef SymbolName);
bool IsCompilandExcluded(llvm::StringRef CompilandName);
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 66d70120ac9b..fd67cac3cdd2 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -521,9 +521,9 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
// Find hot/warm functions in sample profile which is cold in instr profile
// and adjust the profiles of those functions in the instr profile.
for (const auto &PD : Reader->getProfiles()) {
- StringRef FName = PD.getKey();
- const sampleprof::FunctionSamples &FS = PD.getValue();
- auto It = InstrProfileMap.find(FName);
+ auto &FContext = PD.first;
+ const sampleprof::FunctionSamples &FS = PD.second;
+ auto It = InstrProfileMap.find(FContext.toString());
if (FS.getHeadSamples() > ColdSampleThreshold &&
It != InstrProfileMap.end() &&
It->second.MaxCount <= ColdInstrThreshold &&
@@ -690,7 +690,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
bool SampleMergeColdContext, bool SampleTrimColdContext,
bool SampleColdContextFrameDepth, FailureMode FailMode) {
using namespace sampleprof;
- StringMap<FunctionSamples> ProfileMap;
+ SampleProfileMap ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
LLVMContext Context;
sampleprof::ProfileSymbolList WriterList;
@@ -716,7 +716,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
continue;
}
- StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
+ SampleProfileMap &Profiles = Reader->getProfiles();
if (ProfileIsProbeBased.hasValue() &&
ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
exitWithError(
@@ -725,19 +725,19 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
if (ProfileIsCS.hasValue() && ProfileIsCS != FunctionSamples::ProfileIsCS)
exitWithError("cannot merge CS profile with non-CS profile");
ProfileIsCS = FunctionSamples::ProfileIsCS;
- for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
- E = Profiles.end();
+ for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
I != E; ++I) {
sampleprof_error Result = sampleprof_error::success;
FunctionSamples Remapped =
Remapper ? remapSamples(I->second, *Remapper, Result)
: FunctionSamples();
FunctionSamples &Samples = Remapper ? Remapped : I->second;
- StringRef FName = Samples.getNameWithContext();
- MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
+ SampleContext FContext = Samples.getContext();
+ MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
if (Result != sampleprof_error::success) {
std::error_code EC = make_error_code(Result);
- handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName);
+ handleMergeWriterError(errorCodeToError(EC), Input.Filename,
+ FContext.toString());
}
}
@@ -759,7 +759,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
SampleProfColdThreshold, SampleTrimColdContext,
- SampleMergeColdContext, SampleColdContextFrameDepth);
+ SampleMergeColdContext, SampleColdContextFrameDepth, false);
}
auto WriterOrErr =
@@ -836,7 +836,7 @@ static void parseInputFilenamesFile(MemoryBuffer *Buffer,
if (SanitizedEntry.startswith("#"))
continue;
// If there's no comma, it's an unweighted profile.
- else if (SanitizedEntry.find(',') == StringRef::npos)
+ else if (!SanitizedEntry.contains(','))
addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
else
addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
@@ -1022,8 +1022,8 @@ static void overlapInstrProfile(const std::string &BaseFilename,
namespace {
struct SampleOverlapStats {
- StringRef BaseName;
- StringRef TestName;
+ SampleContext BaseName;
+ SampleContext TestName;
// Number of overlap units
uint64_t OverlapCount;
// Total samples of overlap units
@@ -1226,6 +1226,9 @@ public:
/// Load profiles specified by BaseFilename and TestFilename.
std::error_code loadProfiles();
+ using FuncSampleStatsMap =
+ std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
+
private:
SampleOverlapStats ProfOverlap;
SampleOverlapStats HotFuncOverlap;
@@ -1236,8 +1239,8 @@ private:
std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
// BaseStats and TestStats hold FuncSampleStats for each function, with
// function name as the key.
- StringMap<FuncSampleStats> BaseStats;
- StringMap<FuncSampleStats> TestStats;
+ FuncSampleStatsMap BaseStats;
+ FuncSampleStatsMap TestStats;
// Low similarity threshold in floating point number
double LowSimilarityThreshold;
// Block samples above BaseHotThreshold or TestHotThreshold are considered hot
@@ -1276,8 +1279,8 @@ private:
void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
uint64_t HotBlockCount);
- void getHotFunctions(const StringMap<FuncSampleStats> &ProfStats,
- StringMap<FuncSampleStats> &HotFunc,
+ void getHotFunctions(const FuncSampleStatsMap &ProfStats,
+ FuncSampleStatsMap &HotFunc,
uint64_t HotThreshold) const;
void computeHotFuncOverlap();
@@ -1381,26 +1384,26 @@ void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
}
void SampleOverlapAggregator::getHotFunctions(
- const StringMap<FuncSampleStats> &ProfStats,
- StringMap<FuncSampleStats> &HotFunc, uint64_t HotThreshold) const {
+ const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
+ uint64_t HotThreshold) const {
for (const auto &F : ProfStats) {
if (isFunctionHot(F.second, HotThreshold))
- HotFunc.try_emplace(F.first(), F.second);
+ HotFunc.emplace(F.first, F.second);
}
}
void SampleOverlapAggregator::computeHotFuncOverlap() {
- StringMap<FuncSampleStats> BaseHotFunc;
+ FuncSampleStatsMap BaseHotFunc;
getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
HotFuncOverlap.BaseCount = BaseHotFunc.size();
- StringMap<FuncSampleStats> TestHotFunc;
+ FuncSampleStatsMap TestHotFunc;
getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
HotFuncOverlap.TestCount = TestHotFunc.size();
HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
for (const auto &F : BaseHotFunc) {
- if (TestHotFunc.count(F.first()))
+ if (TestHotFunc.count(F.first))
++HotFuncOverlap.OverlapCount;
else
++HotFuncOverlap.UnionCount;
@@ -1612,23 +1615,25 @@ double SampleOverlapAggregator::computeSampleFunctionOverlap(
void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
using namespace sampleprof;
- StringMap<const FunctionSamples *> BaseFuncProf;
+ std::unordered_map<SampleContext, const FunctionSamples *,
+ SampleContext::Hash>
+ BaseFuncProf;
const auto &BaseProfiles = BaseReader->getProfiles();
for (const auto &BaseFunc : BaseProfiles) {
- BaseFuncProf.try_emplace(BaseFunc.second.getNameWithContext(),
- &(BaseFunc.second));
+ BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
}
ProfOverlap.UnionCount = BaseFuncProf.size();
const auto &TestProfiles = TestReader->getProfiles();
for (const auto &TestFunc : TestProfiles) {
SampleOverlapStats FuncOverlap;
- FuncOverlap.TestName = TestFunc.second.getNameWithContext();
+ FuncOverlap.TestName = TestFunc.second.getContext();
assert(TestStats.count(FuncOverlap.TestName) &&
"TestStats should have records for all functions in test profile "
"except inlinees");
FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
+ bool Matched = false;
const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
if (Match == BaseFuncProf.end()) {
const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
@@ -1650,7 +1655,7 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
// Two functions match with each other. Compute function-level overlap and
// aggregate them into profile-level overlap.
- FuncOverlap.BaseName = Match->second->getNameWithContext();
+ FuncOverlap.BaseName = Match->second->getContext();
assert(BaseStats.count(FuncOverlap.BaseName) &&
"BaseStats should have records for all functions in base profile "
"except inlinees");
@@ -1673,6 +1678,7 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
// Remove matched base functions for later reporting functions not found
// in test profile.
BaseFuncProf.erase(Match);
+ Matched = true;
}
// Print function-level similarity information if specified by options.
@@ -1680,11 +1686,10 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
"TestStats should have records for all functions in test profile "
"except inlinees");
if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
- (Match != BaseFuncProf.end() &&
- FuncOverlap.Similarity < LowSimilarityThreshold) ||
- (Match != BaseFuncProf.end() && !FuncFilter.NameFilter.empty() &&
- FuncOverlap.BaseName.find(FuncFilter.NameFilter) !=
- FuncOverlap.BaseName.npos)) {
+ (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
+ (Matched && !FuncFilter.NameFilter.empty() &&
+ FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
+ std::string::npos)) {
assert(ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0");
FuncOverlap.BaseWeight =
@@ -1699,11 +1704,10 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
// Traverse through functions in base profile but not in test profile.
for (const auto &F : BaseFuncProf) {
- assert(BaseStats.count(F.second->getNameWithContext()) &&
+ assert(BaseStats.count(F.second->getContext()) &&
"BaseStats should have records for all functions in base profile "
"except inlinees");
- const FuncSampleStats &FuncStats =
- BaseStats[F.second->getNameWithContext()];
+ const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
++ProfOverlap.BaseUniqueCount;
ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
@@ -1734,7 +1738,7 @@ void SampleOverlapAggregator::initializeSampleProfileOverlap() {
FuncSampleStats FuncStats;
getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
ProfOverlap.BaseSample += FuncStats.SampleSum;
- BaseStats.try_emplace(I.second.getNameWithContext(), FuncStats);
+ BaseStats.emplace(I.second.getContext(), FuncStats);
}
const auto &TestProf = TestReader->getProfiles();
@@ -1743,7 +1747,7 @@ void SampleOverlapAggregator::initializeSampleProfileOverlap() {
FuncSampleStats FuncStats;
getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
ProfOverlap.TestSample += FuncStats.SampleSum;
- TestStats.try_emplace(I.second.getNameWithContext(), FuncStats);
+ TestStats.emplace(I.second.getContext(), FuncStats);
}
ProfOverlap.BaseName = StringRef(BaseFilename);
@@ -1807,13 +1811,15 @@ void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
FOS.PadToColumn(TestSampleCol);
FOS << F.second.TestSample;
FOS.PadToColumn(FuncNameCol);
- FOS << F.second.TestName << "\n";
+ FOS << F.second.TestName.toString() << "\n";
}
}
void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
- OS << "Profile overlap infomation for base_profile: " << ProfOverlap.BaseName
- << " and test_profile: " << ProfOverlap.TestName << "\nProgram level:\n";
+ OS << "Profile overlap infomation for base_profile: "
+ << ProfOverlap.BaseName.toString()
+ << " and test_profile: " << ProfOverlap.TestName.toString()
+ << "\nProgram level:\n";
OS << " Whole program profile similarity: "
<< format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
@@ -1909,22 +1915,13 @@ std::error_code SampleOverlapAggregator::loadProfiles() {
// Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
// profile summary.
- const uint64_t HotCutoff = 990000;
ProfileSummary &BasePS = BaseReader->getSummary();
- for (const auto &SummaryEntry : BasePS.getDetailedSummary()) {
- if (SummaryEntry.Cutoff == HotCutoff) {
- BaseHotThreshold = SummaryEntry.MinCount;
- break;
- }
- }
-
ProfileSummary &TestPS = TestReader->getSummary();
- for (const auto &SummaryEntry : TestPS.getDetailedSummary()) {
- if (SummaryEntry.Cutoff == HotCutoff) {
- TestHotThreshold = SummaryEntry.MinCount;
- break;
- }
- }
+ BaseHotThreshold =
+ ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
+ TestHotThreshold =
+ ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
+
return std::error_code();
}
@@ -2111,9 +2108,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (FuncIsCS != ShowCS)
continue;
}
- bool Show =
- ShowAllFunctions || (!ShowFunction.empty() &&
- Func.Name.find(ShowFunction) != Func.Name.npos);
+ bool Show = ShowAllFunctions ||
+ (!ShowFunction.empty() && Func.Name.contains(ShowFunction));
bool doTextFormatDump = (Show && TextFormat);
@@ -2271,7 +2267,7 @@ static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
namespace {
struct HotFuncInfo {
- StringRef FuncName;
+ std::string FuncName;
uint64_t TotalCount;
double TotalCountPercent;
uint64_t MaxCount;
@@ -2282,8 +2278,8 @@ struct HotFuncInfo {
EntryCount(0) {}
HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
- : FuncName(FN), TotalCount(TS), TotalCountPercent(TSP), MaxCount(MS),
- EntryCount(ES) {}
+ : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
+ MaxCount(MS), EntryCount(ES) {}
};
} // namespace
@@ -2298,7 +2294,7 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
uint64_t HotFuncCount, uint64_t TotalFuncCount,
uint64_t HotProfCount, uint64_t TotalProfCount,
const std::string &HotFuncMetric,
- raw_fd_ostream &OS) {
+ uint32_t TopNFunctions, raw_fd_ostream &OS) {
assert(ColumnOffset.size() == ColumnTitle.size() &&
"ColumnOffset and ColumnTitle should have the same size");
assert(ColumnTitle.size() >= 4 &&
@@ -2327,7 +2323,10 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
}
FOS << "\n";
- for (const HotFuncInfo &R : PrintValues) {
+ uint32_t Count = 0;
+ for (const auto &R : PrintValues) {
+ if (TopNFunctions && (Count++ == TopNFunctions))
+ break;
FOS.PadToColumn(ColumnOffset[0]);
FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
FOS.PadToColumn(ColumnOffset[1]);
@@ -2339,9 +2338,9 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
}
}
-static int
-showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
- ProfileSummary &PS, raw_fd_ostream &OS) {
+static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
+ ProfileSummary &PS, uint32_t TopN,
+ raw_fd_ostream &OS) {
using namespace sampleprof;
const uint32_t HotFuncCutoff = 990000;
@@ -2391,18 +2390,19 @@ showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
: 0;
PrintValues.emplace_back(HotFuncInfo(
- Func.getNameWithContext(), Func.getTotalSamples(), TotalSamplePercent,
- FuncPair.second.second, Func.getEntrySamples()));
+ Func.getContext().toString(), Func.getTotalSamples(),
+ TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples()));
}
dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
Profiles.size(), HotFuncSample, ProfileTotalSample,
- Metric, OS);
+ Metric, TopN, OS);
return 0;
}
static int showSampleProfile(const std::string &Filename, bool ShowCounts,
- bool ShowAllFunctions, bool ShowDetailedSummary,
+ uint32_t TopN, bool ShowAllFunctions,
+ bool ShowDetailedSummary,
const std::string &ShowFunction,
bool ShowProfileSymbolList,
bool ShowSectionInfoOnly, bool ShowHotFuncList,
@@ -2426,7 +2426,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
if (ShowAllFunctions || ShowFunction.empty())
Reader->dump(OS);
else
- Reader->dumpFunctionProfile(ShowFunction, OS);
+ // TODO: parse context string to support filtering by contexts.
+ Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
if (ShowProfileSymbolList) {
std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
@@ -2440,8 +2441,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
PS.printDetailedSummary(OS);
}
- if (ShowHotFuncList)
- showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), OS);
+ if (ShowHotFuncList || TopN)
+ showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS);
return 0;
}
@@ -2532,10 +2533,10 @@ static int show_main(int argc, const char *argv[]) {
ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
TextFormat, ShowBinaryIds, OS);
else
- return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
- ShowDetailedSummary, ShowFunction,
- ShowProfileSymbolList, ShowSectionInfoOnly,
- ShowHotFuncList, OS);
+ return showSampleProfile(Filename, ShowCounts, TopNFunctions,
+ ShowAllFunctions, ShowDetailedSummary,
+ ShowFunction, ShowProfileSymbolList,
+ ShowSectionInfoOnly, ShowHotFuncList, OS);
}
int main(int argc, const char *argv[]) {
diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
index 3d8acbf48fa9..d97cea4b6d6a 100644
--- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -426,7 +426,7 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
auto Ret = ELF.getSection(*Symbol, SymTab, ShndxTable);
if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+ report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
return *Ret;
}
}
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 99ee639fc45d..78be632f2153 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -238,22 +238,27 @@ ErrorOr<SymbolRef> Decoder::getRelocatedSymbol(const COFFObjectFile &,
return inconvertibleErrorCode();
}
-SymbolRef Decoder::getPreferredSymbol(const COFFObjectFile &COFF,
- SymbolRef Sym) {
+SymbolRef Decoder::getPreferredSymbol(const COFFObjectFile &COFF, SymbolRef Sym,
+ uint64_t &SymbolOffset) {
// The symbol resolved by getRelocatedSymbol can be any internal
// nondescriptive symbol; try to resolve a more descriptive one.
COFFSymbolRef CoffSym = COFF.getCOFFSymbol(Sym);
- if (CoffSym.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL)
+ if (CoffSym.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CoffSym.getSectionDefinition() == nullptr)
return Sym;
for (const auto &S : COFF.symbols()) {
COFFSymbolRef CS = COFF.getCOFFSymbol(S);
if (CS.getSectionNumber() == CoffSym.getSectionNumber() &&
- CS.getValue() == CoffSym.getValue()) {
- if (CS.isExternal())
- return S;
- if (CS.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL) {
+ CS.getValue() <= CoffSym.getValue() + SymbolOffset &&
+ CS.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CS.getSectionDefinition() == nullptr) {
+ uint32_t Offset = CoffSym.getValue() + SymbolOffset - CS.getValue();
+ if (Offset <= SymbolOffset) {
+ SymbolOffset = Offset;
Sym = S;
CoffSym = CS;
+ if (CS.isExternal() && SymbolOffset == 0)
+ return Sym;
}
}
}
@@ -277,12 +282,14 @@ ErrorOr<SymbolRef> Decoder::getSymbolForLocation(
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(AddressOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
// We apply SymbolOffset here directly. We return it separately to allow
// the caller to print it as an offset on the symbol name.
SymbolAddress = *AddressOrErr + SymbolOffset;
+
+ if (FunctionOnly) // Resolve label/section symbols into function names.
+ SymOrErr = getPreferredSymbol(COFF, *SymOrErr, SymbolOffset);
} else {
// No matching relocation found; operating on a linked image. Try to
// find a descriptive symbol if possible. The immediate offset contains
@@ -292,8 +299,6 @@ ErrorOr<SymbolRef> Decoder::getSymbolForLocation(
SymbolOffset = 0;
SymOrErr = getSymbol(COFF, SymbolAddress, FunctionOnly);
}
- if (SymOrErr && FunctionOnly) // Resolve label symbols into function names
- SymOrErr = getPreferredSymbol(COFF, *SymOrErr);
return SymOrErr;
}
@@ -1000,8 +1005,7 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(Name.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
ListScope EHS(SW, "ExceptionHandler");
@@ -1040,8 +1044,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
FunctionName = *FunctionNameOrErr;
}
@@ -1055,8 +1058,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(Name.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
SW.printString("ExceptionRecord",
@@ -1101,8 +1103,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
FunctionName = *FunctionNameOrErr;
}
@@ -1143,8 +1144,7 @@ bool Decoder::dumpPackedARM64Entry(const object::COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
FunctionName = *FunctionNameOrErr;
}
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
index efe16850c7fa..920d4e5f7332 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -154,7 +154,8 @@ class Decoder {
bool FunctionOnly = false);
object::SymbolRef getPreferredSymbol(const object::COFFObjectFile &COFF,
- object::SymbolRef Sym);
+ object::SymbolRef Sym,
+ uint64_t &SymbolOffset);
bool dumpXDataRecord(const object::COFFObjectFile &COFF,
const object::SectionRef &Section,
diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp
index 96124cc03484..b235398e7a45 100644
--- a/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -337,7 +337,7 @@ void COFFDumper::printBinaryBlockWithRelocs(StringRef Label,
}
}
-static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
+const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_UNKNOWN ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AM33 ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AMD64 ),
@@ -362,7 +362,7 @@ static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_WCEMIPSV2)
};
-static const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
+const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_RELOCS_STRIPPED ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_EXECUTABLE_IMAGE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LINE_NUMS_STRIPPED ),
@@ -380,7 +380,7 @@ static const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_BYTES_REVERSED_HI )
};
-static const EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
+const EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_UNKNOWN ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_NATIVE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_WINDOWS_GUI ),
@@ -394,7 +394,7 @@ static const EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_XBOX ),
};
-static const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
+const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY ),
@@ -453,7 +453,7 @@ ImageSectionCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE )
};
-static const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
+const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
{ "Null" , COFF::IMAGE_SYM_TYPE_NULL },
{ "Void" , COFF::IMAGE_SYM_TYPE_VOID },
{ "Char" , COFF::IMAGE_SYM_TYPE_CHAR },
@@ -472,14 +472,14 @@ static const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
{ "DWord" , COFF::IMAGE_SYM_TYPE_DWORD }
};
-static const EnumEntry<COFF::SymbolComplexType> ImageSymDType[] = {
+const EnumEntry<COFF::SymbolComplexType> ImageSymDType[] = {
{ "Null" , COFF::IMAGE_SYM_DTYPE_NULL },
{ "Pointer" , COFF::IMAGE_SYM_DTYPE_POINTER },
{ "Function", COFF::IMAGE_SYM_DTYPE_FUNCTION },
{ "Array" , COFF::IMAGE_SYM_DTYPE_ARRAY }
};
-static const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
+const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
{ "EndOfFunction" , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION },
{ "Null" , COFF::IMAGE_SYM_CLASS_NULL },
{ "Automatic" , COFF::IMAGE_SYM_CLASS_AUTOMATIC },
@@ -509,7 +509,7 @@ static const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
{ "CLRToken" , COFF::IMAGE_SYM_CLASS_CLR_TOKEN }
};
-static const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
+const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
{ "NoDuplicates", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES },
{ "Any" , COFF::IMAGE_COMDAT_SELECT_ANY },
{ "SameSize" , COFF::IMAGE_COMDAT_SELECT_SAME_SIZE },
@@ -519,7 +519,7 @@ static const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
{ "Newest" , COFF::IMAGE_COMDAT_SELECT_NEWEST }
};
-static const EnumEntry<COFF::DebugType> ImageDebugType[] = {
+const EnumEntry<COFF::DebugType> ImageDebugType[] = {
{"Unknown", COFF::IMAGE_DEBUG_TYPE_UNKNOWN},
{"COFF", COFF::IMAGE_DEBUG_TYPE_COFF},
{"CodeView", COFF::IMAGE_DEBUG_TYPE_CODEVIEW},
@@ -548,7 +548,7 @@ WeakExternalCharacteristics[] = {
{ "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS }
};
-static const EnumEntry<uint32_t> SubSectionTypes[] = {
+const EnumEntry<uint32_t> SubSectionTypes[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, Symbols),
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, Lines),
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, StringTable),
@@ -564,13 +564,13 @@ static const EnumEntry<uint32_t> SubSectionTypes[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, CoffSymbolRVA),
};
-static const EnumEntry<uint32_t> FrameDataFlags[] = {
+const EnumEntry<uint32_t> FrameDataFlags[] = {
LLVM_READOBJ_ENUM_ENT(FrameData, HasSEH),
LLVM_READOBJ_ENUM_ENT(FrameData, HasEH),
LLVM_READOBJ_ENUM_ENT(FrameData, IsFunctionStart),
};
-static const EnumEntry<uint8_t> FileChecksumKindNames[] = {
+const EnumEntry<uint8_t> FileChecksumKindNames[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, None),
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, MD5),
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, SHA1),
@@ -709,7 +709,10 @@ void COFFDumper::printPEHeader(const PEHeader *Hdr) {
};
for (uint32_t i = 0; i < Hdr->NumberOfRvaAndSize; ++i)
- printDataDirectory(i, directory[i]);
+ if (i < sizeof(directory) / sizeof(char *))
+ printDataDirectory(i, directory[i]);
+ else
+ printDataDirectory(i, "Unknown");
}
}
diff --git a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
index 2dfe21684a62..5dc947e024b9 100644
--- a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -185,7 +185,8 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
reportError(DataOrErr.takeError(), ObjF.getFileName());
// Construct DWARFDataExtractor to handle relocations ("PC Begin" fields).
- std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(ObjF, nullptr);
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
+ ObjF, DWARFContext::ProcessDebugRelocations::Process, nullptr);
DWARFDataExtractor DE(DICtx->getDWARFObj(),
DICtx->getDWARFObj().getEHFrameSection(),
ELFT::TargetEndianness == support::endianness::little,
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index f221acba979a..4abea0b1d23d 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -49,6 +49,8 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MSP430AttributeParser.h"
+#include "llvm/Support/MSP430Attributes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MipsABIFlags.h"
#include "llvm/Support/RISCVAttributeParser.h"
@@ -339,7 +341,8 @@ protected:
return DynRegionInfo(ObjF, *this, Obj.base() + Offset, Size, EntSize);
}
- void printAttributes();
+ void printAttributes(unsigned, std::unique_ptr<ELFAttributeParser>,
+ support::endianness);
void printMipsReginfo();
void printMipsOptions();
@@ -963,19 +966,19 @@ findNotEmptySectionByAddress(const ELFO &Obj, StringRef FileName,
return nullptr;
}
-static const EnumEntry<unsigned> ElfClass[] = {
+const EnumEntry<unsigned> ElfClass[] = {
{"None", "none", ELF::ELFCLASSNONE},
{"32-bit", "ELF32", ELF::ELFCLASS32},
{"64-bit", "ELF64", ELF::ELFCLASS64},
};
-static const EnumEntry<unsigned> ElfDataEncoding[] = {
+const EnumEntry<unsigned> ElfDataEncoding[] = {
{"None", "none", ELF::ELFDATANONE},
{"LittleEndian", "2's complement, little endian", ELF::ELFDATA2LSB},
{"BigEndian", "2's complement, big endian", ELF::ELFDATA2MSB},
};
-static const EnumEntry<unsigned> ElfObjectFileType[] = {
+const EnumEntry<unsigned> ElfObjectFileType[] = {
{"None", "NONE (none)", ELF::ET_NONE},
{"Relocatable", "REL (Relocatable file)", ELF::ET_REL},
{"Executable", "EXEC (Executable file)", ELF::ET_EXEC},
@@ -983,7 +986,7 @@ static const EnumEntry<unsigned> ElfObjectFileType[] = {
{"Core", "CORE (Core file)", ELF::ET_CORE},
};
-static const EnumEntry<unsigned> ElfOSABI[] = {
+const EnumEntry<unsigned> ElfOSABI[] = {
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
@@ -1004,22 +1007,22 @@ static const EnumEntry<unsigned> ElfOSABI[] = {
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
-static const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
+const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
{"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL},
{"AMDGPU_MESA3D", "AMDGPU - MESA3D", ELF::ELFOSABI_AMDGPU_MESA3D}
};
-static const EnumEntry<unsigned> ARMElfOSABI[] = {
+const EnumEntry<unsigned> ARMElfOSABI[] = {
{"ARM", "ARM", ELF::ELFOSABI_ARM}
};
-static const EnumEntry<unsigned> C6000ElfOSABI[] = {
+const EnumEntry<unsigned> C6000ElfOSABI[] = {
{"C6000_ELFABI", "Bare-metal C6000", ELF::ELFOSABI_C6000_ELFABI},
{"C6000_LINUX", "Linux C6000", ELF::ELFOSABI_C6000_LINUX}
};
-static const EnumEntry<unsigned> ElfMachineType[] = {
+const EnumEntry<unsigned> ElfMachineType[] = {
ENUM_ENT(EM_NONE, "None"),
ENUM_ENT(EM_M32, "WE32100"),
ENUM_ENT(EM_SPARC, "Sparc"),
@@ -1185,19 +1188,19 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"),
};
-static const EnumEntry<unsigned> ElfSymbolBindings[] = {
+const EnumEntry<unsigned> ElfSymbolBindings[] = {
{"Local", "LOCAL", ELF::STB_LOCAL},
{"Global", "GLOBAL", ELF::STB_GLOBAL},
{"Weak", "WEAK", ELF::STB_WEAK},
{"Unique", "UNIQUE", ELF::STB_GNU_UNIQUE}};
-static const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
+const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
{"DEFAULT", "DEFAULT", ELF::STV_DEFAULT},
{"INTERNAL", "INTERNAL", ELF::STV_INTERNAL},
{"HIDDEN", "HIDDEN", ELF::STV_HIDDEN},
{"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}};
-static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
+const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
{ "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL }
};
@@ -1208,7 +1211,7 @@ static const char *getGroupType(uint32_t Flag) {
return "(unknown)";
}
-static const EnumEntry<unsigned> ElfSectionFlags[] = {
+const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_WRITE, "W"),
ENUM_ENT(SHF_ALLOC, "A"),
ENUM_ENT(SHF_EXECINSTR, "X"),
@@ -1224,20 +1227,20 @@ static const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_EXCLUDE, "E"),
};
-static const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
+const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
ENUM_ENT(XCORE_SHF_CP_SECTION, ""),
ENUM_ENT(XCORE_SHF_DP_SECTION, "")
};
-static const EnumEntry<unsigned> ElfARMSectionFlags[] = {
+const EnumEntry<unsigned> ElfARMSectionFlags[] = {
ENUM_ENT(SHF_ARM_PURECODE, "y")
};
-static const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
+const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
ENUM_ENT(SHF_HEX_GPREL, "")
};
-static const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
+const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
ENUM_ENT(SHF_MIPS_NODUPES, ""),
ENUM_ENT(SHF_MIPS_NAMES, ""),
ENUM_ENT(SHF_MIPS_LOCAL, ""),
@@ -1248,7 +1251,7 @@ static const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
ENUM_ENT(SHF_MIPS_STRING, "")
};
-static const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
+const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
ENUM_ENT(SHF_X86_64_LARGE, "l")
};
@@ -1395,13 +1398,13 @@ static std::string getGNUPtType(unsigned Arch, unsigned Type) {
return Seg.drop_front(3).str();
}
-static const EnumEntry<unsigned> ElfSegmentFlags[] = {
+const EnumEntry<unsigned> ElfSegmentFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, PF_X),
LLVM_READOBJ_ENUM_ENT(ELF, PF_W),
LLVM_READOBJ_ENUM_ENT(ELF, PF_R)
};
-static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
+const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_NOREORDER, "noreorder"),
ENUM_ENT(EF_MIPS_PIC, "pic"),
ENUM_ENT(EF_MIPS_CPIC, "cpic"),
@@ -1447,7 +1450,7 @@ static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
-static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
+const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
@@ -1501,7 +1504,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};
-static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
+const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
@@ -1559,7 +1562,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
};
-static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
+const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
ENUM_ENT(EF_RISCV_RVC, "RVC"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_SINGLE, "single-float ABI"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_DOUBLE, "double-float ABI"),
@@ -1567,7 +1570,7 @@ static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
ENUM_ENT(EF_RISCV_RVE, "RVE")
};
-static const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
+const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR1),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR2),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR25),
@@ -1590,29 +1593,32 @@ static const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
};
-static const EnumEntry<unsigned> ElfSymOtherFlags[] = {
+const EnumEntry<unsigned> ElfSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL),
LLVM_READOBJ_ENUM_ENT(ELF, STV_HIDDEN),
LLVM_READOBJ_ENUM_ENT(ELF, STV_PROTECTED)
};
-static const EnumEntry<unsigned> ElfMipsSymOtherFlags[] = {
+const EnumEntry<unsigned> ElfMipsSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PIC),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MICROMIPS)
};
-static const EnumEntry<unsigned> ElfAArch64SymOtherFlags[] = {
+const EnumEntry<unsigned> ElfAArch64SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_AARCH64_VARIANT_PCS)
};
-static const EnumEntry<unsigned> ElfMips16SymOtherFlags[] = {
+const EnumEntry<unsigned> ElfMips16SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MIPS16)
};
+const EnumEntry<unsigned> ElfRISCVSymOtherFlags[] = {
+ LLVM_READOBJ_ENUM_ENT(ELF, STO_RISCV_VARIANT_CC)};
+
static const char *getElfMipsOptionsOdkType(unsigned Odk) {
switch (Odk) {
LLVM_READOBJ_ENUM_CASE(ELF, ODK_NULL);
@@ -2065,7 +2071,7 @@ template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
{ #enum, prefix##_##enum }
-static const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
+const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF, ORIGIN),
LLVM_READOBJ_DT_FLAG_ENT(DF, SYMBOLIC),
LLVM_READOBJ_DT_FLAG_ENT(DF, TEXTREL),
@@ -2073,7 +2079,7 @@ static const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF, STATIC_TLS)
};
-static const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
+const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOW),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GLOBAL),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GROUP),
@@ -2103,7 +2109,7 @@ static const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF_1, PIE),
};
-static const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
+const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(RHF, NONE),
LLVM_READOBJ_DT_FLAG_ENT(RHF, QUICKSTART),
LLVM_READOBJ_DT_FLAG_ENT(RHF, NOTPOT),
@@ -2292,6 +2298,8 @@ std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
case DT_INIT_ARRAYSZ:
case DT_FINI_ARRAYSZ:
case DT_PREINIT_ARRAYSZ:
+ case DT_RELRSZ:
+ case DT_RELRENT:
case DT_ANDROID_RELSZ:
case DT_ANDROID_RELASZ:
return std::to_string(Value) + " (bytes)";
@@ -2557,8 +2565,27 @@ template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
switch (Obj.getHeader().e_machine) {
case EM_ARM:
+ if (Obj.isLE())
+ printAttributes(ELF::SHT_ARM_ATTRIBUTES,
+ std::make_unique<ARMAttributeParser>(&W),
+ support::little);
+ else
+ reportUniqueWarning("attribute printing not implemented for big-endian "
+ "ARM objects");
+ break;
case EM_RISCV:
- printAttributes();
+ if (Obj.isLE())
+ printAttributes(ELF::SHT_RISCV_ATTRIBUTES,
+ std::make_unique<RISCVAttributeParser>(&W),
+ support::little);
+ else
+ reportUniqueWarning("attribute printing not implemented for big-endian "
+ "RISC-V objects");
+ break;
+ case EM_MSP430:
+ printAttributes(ELF::SHT_MSP430_ATTRIBUTES,
+ std::make_unique<MSP430AttributeParser>(&W),
+ support::little);
break;
case EM_MIPS: {
printMipsABIFlags();
@@ -2581,20 +2608,15 @@ template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
}
}
-template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
- if (!Obj.isLE()) {
- W.startLine() << "Attributes not implemented.\n";
- return;
- }
-
- const unsigned Machine = Obj.getHeader().e_machine;
- assert((Machine == EM_ARM || Machine == EM_RISCV) &&
- "Attributes not implemented.");
-
+template <class ELFT>
+void ELFDumper<ELFT>::printAttributes(
+ unsigned AttrShType, std::unique_ptr<ELFAttributeParser> AttrParser,
+ support::endianness Endianness) {
+ assert((AttrShType != ELF::SHT_NULL) && AttrParser &&
+ "Incomplete ELF attribute implementation");
DictScope BA(W, "BuildAttributes");
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
- if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES &&
- Sec.sh_type != ELF::SHT_RISCV_ATTRIBUTES)
+ if (Sec.sh_type != AttrShType)
continue;
ArrayRef<uint8_t> Contents;
@@ -2613,13 +2635,7 @@ template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
W.printHex("FormatVersion", Contents[0]);
- auto ParseAttrubutes = [&]() {
- if (Machine == EM_ARM)
- return ARMAttributeParser(&W).parse(Contents, support::little);
- return RISCVAttributeParser(&W).parse(Contents, support::little);
- };
-
- if (Error E = ParseAttrubutes())
+ if (Error E = AttrParser->parse(Contents, Endianness))
reportUniqueWarning("unable to dump attributes from the " +
describe(Sec) + ": " + toString(std::move(E)));
}
@@ -2934,7 +2950,7 @@ MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
}
}
-static const EnumEntry<unsigned> ElfMipsISAExtType[] = {
+const EnumEntry<unsigned> ElfMipsISAExtType[] = {
{"None", Mips::AFL_EXT_NONE},
{"Broadcom SB-1", Mips::AFL_EXT_SB1},
{"Cavium Networks Octeon", Mips::AFL_EXT_OCTEON},
@@ -2957,7 +2973,7 @@ static const EnumEntry<unsigned> ElfMipsISAExtType[] = {
{"Toshiba R3900", Mips::AFL_EXT_3900}
};
-static const EnumEntry<unsigned> ElfMipsASEFlags[] = {
+const EnumEntry<unsigned> ElfMipsASEFlags[] = {
{"DSP", Mips::AFL_ASE_DSP},
{"DSPR2", Mips::AFL_ASE_DSPR2},
{"Enhanced VA Scheme", Mips::AFL_ASE_EVA},
@@ -2975,7 +2991,7 @@ static const EnumEntry<unsigned> ElfMipsASEFlags[] = {
{"GINV", Mips::AFL_ASE_GINV},
};
-static const EnumEntry<unsigned> ElfMipsFpABIType[] = {
+const EnumEntry<unsigned> ElfMipsFpABIType[] = {
{"Hard or soft float", Mips::Val_GNU_MIPS_ABI_FP_ANY},
{"Hard float (double precision)", Mips::Val_GNU_MIPS_ABI_FP_DOUBLE},
{"Hard float (single precision)", Mips::Val_GNU_MIPS_ABI_FP_SINGLE},
@@ -3762,6 +3778,15 @@ void GNUELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
Fields[5].Str.append(" | " + to_hexString(Other, false));
Fields[5].Str.append("]");
}
+ } else if (this->Obj.getHeader().e_machine == ELF::EM_RISCV) {
+ uint8_t Other = Symbol.st_other & ~0x3;
+ if (Other & STO_RISCV_VARIANT_CC) {
+ Other &= ~STO_RISCV_VARIANT_CC;
+ Fields[5].Str += " [VARIANT_CC";
+ if (Other != 0)
+ Fields[5].Str.append(" | " + to_hexString(Other, false));
+ Fields[5].Str.append("]");
+ }
} else {
Fields[5].Str +=
" [<other: " + to_string(format_hex(Symbol.st_other, 2)) + ">]";
@@ -4358,7 +4383,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printDynamicTable() {
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
std::string Type =
- std::string("(") + this->Obj.getDynamicTagAsString(Tag).c_str() + ")";
+ std::string("(") + this->Obj.getDynamicTagAsString(Tag) + ")";
std::string Value = this->getDynamicEntry(Tag, Entry.getVal());
OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10)
<< format(ValueFmt.c_str(), Type.c_str()) << Value << "\n";
@@ -4951,7 +4976,7 @@ static std::string getGNUBuildId(ArrayRef<uint8_t> Desc) {
return OS.str();
}
-static StringRef getGNUGoldVersion(ArrayRef<uint8_t> Desc) {
+static StringRef getDescAsStringRef(ArrayRef<uint8_t> Desc) {
return StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
}
@@ -4975,7 +5000,7 @@ static bool printGNUNote(raw_ostream &OS, uint32_t NoteType,
break;
}
case ELF::NT_GNU_GOLD_VERSION:
- OS << " Version: " << getGNUGoldVersion(Desc);
+ OS << " Version: " << getDescAsStringRef(Desc);
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
OS << " Properties:";
@@ -4987,7 +5012,27 @@ static bool printGNUNote(raw_ostream &OS, uint32_t NoteType,
return true;
}
-static const EnumEntry<unsigned> FreeBSDFeatureCtlFlags[] = {
+template <typename ELFT>
+static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType,
+ ArrayRef<uint8_t> Desc) {
+ switch (NoteType) {
+ default:
+ return false;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION:
+ OS << " Version: " << getDescAsStringRef(Desc);
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER:
+ OS << " Producer: " << getDescAsStringRef(Desc);
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION:
+ OS << " Producer version: " << getDescAsStringRef(Desc);
+ break;
+ }
+ OS << '\n';
+ return true;
+}
+
+const EnumEntry<unsigned> FreeBSDFeatureCtlFlags[] = {
{"ASLR_DISABLE", NT_FREEBSD_FCTL_ASLR_DISABLE},
{"PROTMAX_DISABLE", NT_FREEBSD_FCTL_PROTMAX_DISABLE},
{"STKGAP_DISABLE", NT_FREEBSD_FCTL_STKGAP_DISABLE},
@@ -5251,14 +5296,14 @@ static void printCoreNote(raw_ostream &OS, const CoreNote &Note) {
}
}
-static const NoteType GenericNoteTypes[] = {
+const NoteType GenericNoteTypes[] = {
{ELF::NT_VERSION, "NT_VERSION (version)"},
{ELF::NT_ARCH, "NT_ARCH (architecture)"},
{ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
{ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
};
-static const NoteType GNUNoteTypes[] = {
+const NoteType GNUNoteTypes[] = {
{ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"},
{ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"},
{ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"},
@@ -5266,7 +5311,7 @@ static const NoteType GNUNoteTypes[] = {
{ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"},
};
-static const NoteType FreeBSDCoreNoteTypes[] = {
+const NoteType FreeBSDCoreNoteTypes[] = {
{ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"},
{ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"},
{ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"},
@@ -5280,7 +5325,7 @@ static const NoteType FreeBSDCoreNoteTypes[] = {
{ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"},
};
-static const NoteType FreeBSDNoteTypes[] = {
+const NoteType FreeBSDNoteTypes[] = {
{ELF::NT_FREEBSD_ABI_TAG, "NT_FREEBSD_ABI_TAG (ABI version tag)"},
{ELF::NT_FREEBSD_NOINIT_TAG, "NT_FREEBSD_NOINIT_TAG (no .init tag)"},
{ELF::NT_FREEBSD_ARCH_TAG, "NT_FREEBSD_ARCH_TAG (architecture tag)"},
@@ -5288,7 +5333,15 @@ static const NoteType FreeBSDNoteTypes[] = {
"NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"},
};
-static const NoteType AMDNoteTypes[] = {
+const NoteType OpenBSDCoreNoteTypes[] = {
+ {ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"},
+ {ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"},
+ {ELF::NT_OPENBSD_REGS, "NT_OPENBSD_REGS (regular registers)"},
+ {ELF::NT_OPENBSD_FPREGS, "NT_OPENBSD_FPREGS (floating point registers)"},
+ {ELF::NT_OPENBSD_WCOOKIE, "NT_OPENBSD_WCOOKIE (window cookie)"},
+};
+
+const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_HSA_CODE_OBJECT_VERSION,
"NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"},
{ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"},
@@ -5298,11 +5351,20 @@ static const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"},
};
-static const NoteType AMDGPUNoteTypes[] = {
+const NoteType AMDGPUNoteTypes[] = {
{ELF::NT_AMDGPU_METADATA, "NT_AMDGPU_METADATA (AMDGPU Metadata)"},
};
-static const NoteType CoreNoteTypes[] = {
+const NoteType LLVMOMPOFFLOADNoteTypes[] = {
+ {ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION,
+ "NT_LLVM_OPENMP_OFFLOAD_VERSION (image format version)"},
+ {ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER,
+ "NT_LLVM_OPENMP_OFFLOAD_PRODUCER (producing toolchain)"},
+ {ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION,
+ "NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION (producing toolchain version)"},
+};
+
+const NoteType CoreNoteTypes[] = {
{ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
{ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
{ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"},
@@ -5391,10 +5453,19 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) {
return FindNote(FreeBSDNoteTypes);
}
}
+ if (Name.startswith("OpenBSD") && ELFType == ELF::ET_CORE) {
+ // OpenBSD also places the generic core notes in the OpenBSD namespace.
+ StringRef Result = FindNote(OpenBSDCoreNoteTypes);
+ if (!Result.empty())
+ return Result;
+ return FindNote(CoreNoteTypes);
+ }
if (Name == "AMD")
return FindNote(AMDNoteTypes);
if (Name == "AMDGPU")
return FindNote(AMDGPUNoteTypes);
+ if (Name == "LLVMOMPOFFLOAD")
+ return FindNote(LLVMOMPOFFLOADNoteTypes);
if (ELFType == ELF::ET_CORE)
return FindNote(CoreNoteTypes);
@@ -5530,6 +5601,9 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
OS << " " << N.Type << ":\n " << N.Value << '\n';
return Error::success();
}
+ } else if (Name == "LLVMOMPOFFLOAD") {
+ if (printLLVMOMPOFFLOADNote<ELFT>(OS, Type, Descriptor))
+ return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
DataExtractor DescExtractor(Descriptor,
@@ -6532,6 +6606,10 @@ void LLVMELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfAArch64SymOtherFlags),
std::end(ElfAArch64SymOtherFlags));
+ } else if (this->Obj.getHeader().e_machine == EM_RISCV) {
+ SymOtherFlags.insert(SymOtherFlags.end(),
+ std::begin(ElfRISCVSymOtherFlags),
+ std::end(ElfRISCVSymOtherFlags));
}
W.printFlags("Other", Symbol.st_other, makeArrayRef(SymOtherFlags), 0x3u);
}
@@ -6650,7 +6728,7 @@ void LLVMELFDumper<ELFT>::printVersionSymbolSection(const Elf_Shdr *Sec) {
}
}
-static const EnumEntry<unsigned> SymVersionFlags[] = {
+const EnumEntry<unsigned> SymVersionFlags[] = {
{"Base", "BASE", VER_FLG_BASE},
{"Weak", "WEAK", VER_FLG_WEAK},
{"Info", "INFO", VER_FLG_INFO}};
@@ -6818,14 +6896,14 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
FunctionSec =
unwrapOrError(this->FileName, this->Obj.getSection(Sec.sh_link));
ListScope L(W, "BBAddrMap");
- Expected<std::vector<Elf_BBAddrMap>> BBAddrMapOrErr =
+ Expected<std::vector<BBAddrMap>> BBAddrMapOrErr =
this->Obj.decodeBBAddrMap(Sec);
if (!BBAddrMapOrErr) {
this->reportUniqueWarning("unable to dump " + this->describe(Sec) + ": " +
toString(BBAddrMapOrErr.takeError()));
continue;
}
- for (const Elf_BBAddrMap &AM : *BBAddrMapOrErr) {
+ for (const BBAddrMap &AM : *BBAddrMapOrErr) {
DictScope D(W, "Function");
W.printHex("At", AM.Addr);
SmallVector<uint32_t> FuncSymIndex =
@@ -6840,7 +6918,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
W.printString("Name", FuncName);
ListScope L(W, "BB entries");
- for (const typename Elf_BBAddrMap::BBEntry &BBE : AM.BBEntries) {
+ for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) {
DictScope L(W);
W.printHex("Offset", BBE.Offset);
W.printHex("Size", BBE.Size);
@@ -6892,7 +6970,7 @@ static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
break;
}
case ELF::NT_GNU_GOLD_VERSION:
- W.printString("Version", getGNUGoldVersion(Desc));
+ W.printString("Version", getDescAsStringRef(Desc));
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
ListScope D(W, "Property");
@@ -6903,6 +6981,26 @@ static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
return true;
}
+template <typename ELFT>
+static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType,
+ ArrayRef<uint8_t> Desc,
+ ScopedPrinter &W) {
+ switch (NoteType) {
+ default:
+ return false;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION:
+ W.printString("Version", getDescAsStringRef(Desc));
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER:
+ W.printString("Producer", getDescAsStringRef(Desc));
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION:
+ W.printString("Producer version", getDescAsStringRef(Desc));
+ break;
+ }
+ return true;
+}
+
static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) {
W.printNumber("Page Size", Note.PageSize);
for (const CoreFileMapping &Mapping : Note.Mappings) {
@@ -6970,6 +7068,9 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
W.printString(N.Type, N.Value);
return Error::success();
}
+ } else if (Name == "LLVMOMPOFFLOAD") {
+ if (printLLVMOMPOFFLOADNoteLLVMStyle<ELFT>(Type, Descriptor, W))
+ return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
DataExtractor DescExtractor(Descriptor,
diff --git a/llvm/tools/llvm-readobj/MachODumper.cpp b/llvm/tools/llvm-readobj/MachODumper.cpp
index 433ca9335324..945b16b8db86 100644
--- a/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -74,7 +74,7 @@ std::unique_ptr<ObjDumper> createMachODumper(const object::MachOObjectFile &Obj,
} // namespace llvm
-static const EnumEntry<uint32_t> MachOMagics[] = {
+const EnumEntry<uint32_t> MachOMagics[] = {
{ "Magic", MachO::MH_MAGIC },
{ "Cigam", MachO::MH_CIGAM },
{ "Magic64", MachO::MH_MAGIC_64 },
@@ -83,7 +83,7 @@ static const EnumEntry<uint32_t> MachOMagics[] = {
{ "FatCigam", MachO::FAT_CIGAM },
};
-static const EnumEntry<uint32_t> MachOHeaderFileTypes[] = {
+const EnumEntry<uint32_t> MachOHeaderFileTypes[] = {
{ "Relocatable", MachO::MH_OBJECT },
{ "Executable", MachO::MH_EXECUTE },
{ "FixedVMLibrary", MachO::MH_FVMLIB },
@@ -97,7 +97,7 @@ static const EnumEntry<uint32_t> MachOHeaderFileTypes[] = {
{ "KextBundle", MachO::MH_KEXT_BUNDLE },
};
-static const EnumEntry<uint32_t> MachOHeaderCpuTypes[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuTypes[] = {
{ "Any" , static_cast<uint32_t>(MachO::CPU_TYPE_ANY) },
{ "X86" , MachO::CPU_TYPE_X86 },
{ "X86-64" , MachO::CPU_TYPE_X86_64 },
@@ -109,7 +109,7 @@ static const EnumEntry<uint32_t> MachOHeaderCpuTypes[] = {
{ "PowerPC64" , MachO::CPU_TYPE_POWERPC64 },
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX86[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX86[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_I386_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_386),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_486),
@@ -132,13 +132,13 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX86[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_XEON_MP),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX64[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX64[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_X86_64_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_X86_ARCH1),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_X86_64_H),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_V4T),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_V6),
@@ -153,17 +153,17 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_V7EM),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_V8),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64E),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_SPARC_ALL),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesPPC[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesPPC[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_601),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_602),
@@ -179,7 +179,7 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesPPC[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_970),
};
-static const EnumEntry<uint32_t> MachOHeaderFlags[] = {
+const EnumEntry<uint32_t> MachOHeaderFlags[] = {
LLVM_READOBJ_ENUM_ENT(MachO, MH_NOUNDEFS),
LLVM_READOBJ_ENUM_ENT(MachO, MH_INCRLINK),
LLVM_READOBJ_ENUM_ENT(MachO, MH_DYLDLINK),
@@ -208,7 +208,7 @@ static const EnumEntry<uint32_t> MachOHeaderFlags[] = {
LLVM_READOBJ_ENUM_ENT(MachO, MH_APP_EXTENSION_SAFE),
};
-static const EnumEntry<unsigned> MachOSectionTypes[] = {
+const EnumEntry<unsigned> MachOSectionTypes[] = {
{ "Regular" , MachO::S_REGULAR },
{ "ZeroFill" , MachO::S_ZEROFILL },
{ "CStringLiterals" , MachO::S_CSTRING_LITERALS },
@@ -233,7 +233,7 @@ static const EnumEntry<unsigned> MachOSectionTypes[] = {
{ "ThreadLocalInitFunctionPointers", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS }
};
-static const EnumEntry<unsigned> MachOSectionAttributes[] = {
+const EnumEntry<unsigned> MachOSectionAttributes[] = {
{ "LocReloc" , 1 << 0 /*S_ATTR_LOC_RELOC */ },
{ "ExtReloc" , 1 << 1 /*S_ATTR_EXT_RELOC */ },
{ "SomeInstructions" , 1 << 2 /*S_ATTR_SOME_INSTRUCTIONS */ },
@@ -246,7 +246,7 @@ static const EnumEntry<unsigned> MachOSectionAttributes[] = {
{ "PureInstructions" , 1 << 23 /*S_ATTR_PURE_INSTRUCTIONS */ },
};
-static const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
+const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
{ "UndefinedNonLazy", 0 },
{ "ReferenceFlagUndefinedLazy", 1 },
{ "ReferenceFlagDefined", 2 },
@@ -255,7 +255,7 @@ static const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
{ "ReferenceFlagPrivateUndefinedLazy", 5 }
};
-static const EnumEntry<unsigned> MachOSymbolFlags[] = {
+const EnumEntry<unsigned> MachOSymbolFlags[] = {
{ "ThumbDef", 0x8 },
{ "ReferencedDynamically", 0x10 },
{ "NoDeadStrip", 0x20 },
@@ -266,7 +266,7 @@ static const EnumEntry<unsigned> MachOSymbolFlags[] = {
{ "ColdFunc", 0x400 },
};
-static const EnumEntry<unsigned> MachOSymbolTypes[] = {
+const EnumEntry<unsigned> MachOSymbolTypes[] = {
{ "Undef", 0x0 },
{ "Abs", 0x2 },
{ "Indirect", 0xA },
diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp
index 87c229356e20..dc4a3031f914 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.cpp
+++ b/llvm/tools/llvm-readobj/ObjDumper.cpp
@@ -52,9 +52,23 @@ static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
W << (isPrint(Start[i]) ? static_cast<char>(Start[i]) : '.');
}
-void ObjDumper::printAsStringList(StringRef StringContent) {
+void ObjDumper::printAsStringList(StringRef StringContent,
+ size_t StringDataOffset) {
+ size_t StrSize = StringContent.size();
+ if (StrSize == 0)
+ return;
+ if (StrSize < StringDataOffset) {
+ reportUniqueWarning("offset (0x" + Twine::utohexstr(StringDataOffset) +
+ ") is past the end of the contents (size 0x" +
+ Twine::utohexstr(StrSize) + ")");
+ return;
+ }
+
const uint8_t *StrContent = StringContent.bytes_begin();
- const uint8_t *CurrentWord = StrContent;
+ // Some formats contain additional metadata at the start which should not be
+ // interpreted as strings. Skip these bytes, but account for them in the
+ // string offsets.
+ const uint8_t *CurrentWord = StrContent + StringDataOffset;
const uint8_t *StrEnd = StringContent.bytes_end();
while (CurrentWord <= StrEnd) {
diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h
index 7e1c0ca35127..b395a95f3cb4 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/llvm/tools/llvm-readobj/ObjDumper.h
@@ -97,6 +97,9 @@ public:
llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
bool GHash) {}
+ // Only implement for XCOFF
+ virtual void printAuxiliaryHeader() {}
+
// Only implemented for MachO.
virtual void printMachODataInCode() { }
virtual void printMachOVersionMin() { }
@@ -110,7 +113,7 @@ public:
virtual void printStackMap() const = 0;
- void printAsStringList(StringRef StringContent);
+ void printAsStringList(StringRef StringContent, size_t StringDataOffset = 0);
void printSectionsAsString(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections);
diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td
index 493b93769eb4..7723691e8225 100644
--- a/llvm/tools/llvm-readobj/Opts.td
+++ b/llvm/tools/llvm-readobj/Opts.td
@@ -29,14 +29,14 @@ def file_header : FF<"file-header", "Display file header">;
def headers : FF<"headers", "Equivalent to setting: --file-header, --program-headers, --section-headers">;
defm hex_dump : Eq<"hex-dump", "Display the specified section(s) as hexadecimal bytes">, MetaVarName<"<name or index>">;
def relocs : FF<"relocs", "Display the relocation entries in the file">;
-def section_data : FF<"section-data", "Display section data for each section shown">;
+def section_data : FF<"section-data", "Display section data for each section shown. This option has no effect for GNU style output">;
def section_details : FF<"section-details", "Display the section details">;
def section_headers : FF<"section-headers", "Display section headers">;
def section_mapping : FF<"section-mapping", "Display the section to segment mapping">;
def section_mapping_EQ_false : FF<"section-mapping=false", "Don't display the section to segment mapping">, Flags<[HelpHidden]>;
-def section_relocations : FF<"section-relocations", "Display relocations for each section shown">;
-def section_symbols : FF<"section-symbols", "Display symbols for each section shown">;
-def stack_sizes : FF<"stack-sizes", "Display contents of all stack sizes sections">;
+def section_relocations : FF<"section-relocations", "Display relocations for each section shown. This option has no effect for GNU style output">;
+def section_symbols : FF<"section-symbols", "Display symbols for each section shown. This option has no effect for GNU style output">;
+def stack_sizes : FF<"stack-sizes", "Display contents of all stack sizes sections. This option has no effect for GNU style output">;
def stackmap : FF<"stackmap", "Display contents of stackmap section">;
defm string_dump : Eq<"string-dump", "Display the specified section(s) as a list of strings">, MetaVarName<"<name or index>">;
def string_table : FF<"string-table", "Display the string table (only for XCOFF now)">;
@@ -47,10 +47,10 @@ def unwind : FF<"unwind", "Display unwind information">;
def grp_elf : OptionGroup<"kind">, HelpText<"OPTIONS (ELF specific)">;
def dynamic_table : FF<"dynamic-table", "Display the dynamic section table">, Group<grp_elf>;
def elf_linker_options : FF<"elf-linker-options", "Display the .linker-options section">, Group<grp_elf>;
-defm elf_output_style : Eq<"elf-output-style", "Specify ELF dump style">, Group<grp_elf>;
+defm elf_output_style : Eq<"elf-output-style", "Specify ELF dump style: LLVM or GNU">, Group<grp_elf>;
def histogram : FF<"histogram", "Display bucket list histogram for hash sections">, Group<grp_elf>;
def section_groups : FF<"section-groups", "Display section groups">, Group<grp_elf>;
-def gnu_hash_table : FF<"gnu-hash-table", "Display .gnu.hash section">, Group<grp_elf>;
+def gnu_hash_table : FF<"gnu-hash-table", "Display the GNU hash table for dynamic symbols">, Group<grp_elf>;
def hash_symbols : FF<"hash-symbols", "Display the dynamic symbols derived from the hash section">, Group<grp_elf>;
def hash_table : FF<"hash-table", "Display .hash section">, Group<grp_elf>;
def needed_libs : FF<"needed-libs", "Display the needed libraries">, Group<grp_elf>;
@@ -83,12 +83,16 @@ def coff_load_config : FF<"coff-load-config", "Display load config">, Group<grp_
def coff_resources : FF<"coff-resources", "Display .rsrc section">, Group<grp_coff>;
def coff_tls_directory : FF<"coff-tls-directory", "Display TLS directory">, Group<grp_coff>;
+// XCOFF specific options.
+def grp_xcoff : OptionGroup<"kind">, HelpText<"OPTIONS (XCOFF specific)">;
+def auxiliary_header : FF<"auxiliary-header" , "display the auxiliary header">, Group<grp_xcoff>;
+
def help : FF<"help", "Display this help">;
def version : FF<"version", "Display the version">;
// Ignored for GNU readelf compatibility.
-def : F<"W", "Ignored for GNU readelf compatibility">;
-def : FF<"wide", "Ignored for GNU readelf compatibility">;
+def wide : FF<"wide", "Ignored for GNU readelf compatibility">;
+def : F<"W", "Ignored for GNU readelf compatibility">, Alias<wide>;
// Traditional llvm-readobj Aliases.
def : Flag<["--"], "dt">, Alias<dyn_syms>, HelpText<"Alias for --dyn-syms">;
diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp
index f7dcaa35656f..d76332d1ba36 100644
--- a/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -20,7 +20,7 @@ using namespace object;
namespace {
-static const EnumEntry<unsigned> WasmSymbolTypes[] = {
+const EnumEntry<unsigned> WasmSymbolTypes[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SYMBOL_TYPE_##X }
ENUM_ENTRY(FUNCTION), ENUM_ENTRY(DATA), ENUM_ENTRY(GLOBAL),
@@ -28,7 +28,7 @@ static const EnumEntry<unsigned> WasmSymbolTypes[] = {
#undef ENUM_ENTRY
};
-static const EnumEntry<uint32_t> WasmSectionTypes[] = {
+const EnumEntry<uint32_t> WasmSectionTypes[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SEC_##X }
ENUM_ENTRY(CUSTOM), ENUM_ENTRY(TYPE), ENUM_ENTRY(IMPORT),
@@ -39,7 +39,7 @@ static const EnumEntry<uint32_t> WasmSectionTypes[] = {
#undef ENUM_ENTRY
};
-static const EnumEntry<unsigned> WasmSymbolFlags[] = {
+const EnumEntry<unsigned> WasmSymbolFlags[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SYMBOL_##X }
ENUM_ENTRY(BINDING_GLOBAL),
diff --git a/llvm/tools/llvm-readobj/Win64EHDumper.cpp b/llvm/tools/llvm-readobj/Win64EHDumper.cpp
index 7e84c1bca35d..da964d3132e7 100644
--- a/llvm/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/llvm/tools/llvm-readobj/Win64EHDumper.cpp
@@ -16,13 +16,13 @@ using namespace llvm;
using namespace llvm::object;
using namespace llvm::Win64EH;
-static const EnumEntry<unsigned> UnwindFlags[] = {
+const EnumEntry<unsigned> UnwindFlags[] = {
{ "ExceptionHandler", UNW_ExceptionHandler },
{ "TerminateHandler", UNW_TerminateHandler },
{ "ChainInfo" , UNW_ChainInfo }
};
-static const EnumEntry<unsigned> UnwindOpInfo[] = {
+const EnumEntry<unsigned> UnwindOpInfo[] = {
{ "RAX", 0 },
{ "RCX", 1 },
{ "RDX", 2 },
@@ -125,14 +125,52 @@ static std::error_code getSymbol(const COFFObjectFile &COFF, uint64_t VA,
return inconvertibleErrorCode();
}
+static object::SymbolRef getPreferredSymbol(const COFFObjectFile &COFF,
+ object::SymbolRef Sym,
+ uint32_t &SymbolOffset,
+ bool IsRangeEnd) {
+ // The symbol resolved by ResolveSymbol can be any internal
+ // nondescriptive symbol; try to resolve a more descriptive one.
+ COFFSymbolRef CoffSym = COFF.getCOFFSymbol(Sym);
+ if (CoffSym.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CoffSym.getSectionDefinition() == nullptr)
+ return Sym;
+ for (const auto &S : COFF.symbols()) {
+ COFFSymbolRef CS = COFF.getCOFFSymbol(S);
+ if (CS.getSectionNumber() == CoffSym.getSectionNumber() &&
+ CS.getValue() <= CoffSym.getValue() + SymbolOffset &&
+ CS.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CS.getSectionDefinition() == nullptr) {
+ uint32_t Offset = CoffSym.getValue() + SymbolOffset - CS.getValue();
+ // For the end of a range, don't pick a symbol with a zero offset;
+ // prefer a symbol with a small positive offset.
+ if (Offset <= SymbolOffset && (!IsRangeEnd || Offset > 0)) {
+ SymbolOffset = Offset;
+ Sym = S;
+ CoffSym = CS;
+ if (CS.isExternal() && SymbolOffset == 0)
+ return Sym;
+ }
+ }
+ }
+ return Sym;
+}
+
static std::string formatSymbol(const Dumper::Context &Ctx,
const coff_section *Section, uint64_t Offset,
- uint32_t Displacement) {
+ uint32_t Displacement,
+ bool IsRangeEnd = false) {
std::string Buffer;
raw_string_ostream OS(Buffer);
SymbolRef Symbol;
if (!Ctx.ResolveSymbol(Section, Offset, Symbol, Ctx.UserData)) {
+ // We found a relocation at the given offset in the section, pointing
+ // at a symbol.
+
+ // Try to resolve label/section symbols into function names.
+ Symbol = getPreferredSymbol(Ctx.COFF, Symbol, Displacement, IsRangeEnd);
+
Expected<StringRef> Name = Symbol.getName();
if (Name) {
OS << *Name;
@@ -207,7 +245,8 @@ void Dumper::printRuntimeFunctionEntry(const Context &Ctx,
SW.printString("StartAddress",
formatSymbol(Ctx, Section, Offset + 0, RF.StartAddress));
SW.printString("EndAddress",
- formatSymbol(Ctx, Section, Offset + 4, RF.EndAddress));
+ formatSymbol(Ctx, Section, Offset + 4, RF.EndAddress,
+ /*IsRangeEnd=*/true));
SW.printString("UnwindInfoAddress",
formatSymbol(Ctx, Section, Offset + 8, RF.UnwindInfoOffset));
}
diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index 94ef96e447ce..38e459cd5425 100644
--- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -13,8 +13,11 @@
#include "ObjDumper.h"
#include "llvm-readobj.h"
#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ScopedPrinter.h"
+#include <stddef.h>
+
using namespace llvm;
using namespace object;
@@ -27,6 +30,7 @@ public:
: ObjDumper(Writer, Obj.getFileName()), Obj(Obj) {}
void printFileHeaders() override;
+ void printAuxiliaryHeader() override;
void printSectionHeaders() override;
void printRelocations() override;
void printSymbols() override;
@@ -44,7 +48,11 @@ private:
void printCsectAuxEnt(XCOFFCsectAuxRef AuxEntRef);
void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr);
void printSymbol(const SymbolRef &);
- void printRelocations(ArrayRef<XCOFFSectionHeader32> Sections);
+ template <typename RelTy> void printRelocation(RelTy Reloc);
+ template <typename Shdr, typename RelTy>
+ void printRelocations(ArrayRef<Shdr> Sections);
+ void printAuxiliaryHeader(const XCOFFAuxiliaryHeader32 *AuxHeader);
+ void printAuxiliaryHeader(const XCOFFAuxiliaryHeader64 *AuxHeader);
const XCOFFObjectFile &Obj;
};
} // anonymous namespace
@@ -96,6 +104,13 @@ void XCOFFDumper::printFileHeaders() {
// XCOFFObjectFile has the necessary support.
}
+void XCOFFDumper::printAuxiliaryHeader() {
+ if (Obj.is64Bit())
+ printAuxiliaryHeader(Obj.auxiliaryHeader64());
+ else
+ printAuxiliaryHeader(Obj.auxiliaryHeader32());
+}
+
void XCOFFDumper::printSectionHeaders() {
if (Obj.is64Bit())
printSectionHeaders(Obj.sections64());
@@ -105,12 +120,12 @@ void XCOFFDumper::printSectionHeaders() {
void XCOFFDumper::printRelocations() {
if (Obj.is64Bit())
- llvm_unreachable("64-bit relocation output not implemented!");
+ printRelocations<XCOFFSectionHeader64, XCOFFRelocation64>(Obj.sections64());
else
- printRelocations(Obj.sections32());
+ printRelocations<XCOFFSectionHeader32, XCOFFRelocation32>(Obj.sections32());
}
-static const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
+const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(R_POS), ECase(R_RL), ECase(R_RLA), ECase(R_NEG),
@@ -122,50 +137,71 @@ static const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
#undef ECase
};
-void XCOFFDumper::printRelocations(ArrayRef<XCOFFSectionHeader32> Sections) {
- if (!opts::ExpandRelocs)
- report_fatal_error("Unexpanded relocation output not implemented.");
+template <typename RelTy> void XCOFFDumper::printRelocation(RelTy Reloc) {
+ Expected<StringRef> ErrOrSymbolName =
+ Obj.getSymbolNameByIndex(Reloc.SymbolIndex);
+ if (Error E = ErrOrSymbolName.takeError()) {
+ reportUniqueWarning(std::move(E));
+ return;
+ }
+ StringRef SymbolName = *ErrOrSymbolName;
+ StringRef RelocName = XCOFF::getRelocationTypeString(Reloc.Type);
+ if (opts::ExpandRelocs) {
+ DictScope Group(W, "Relocation");
+ W.printHex("Virtual Address", Reloc.VirtualAddress);
+ W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex);
+ W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No");
+ W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0);
+ W.printNumber("Length", Reloc.getRelocatedLength());
+ W.printEnum("Type", (uint8_t)Reloc.Type,
+ makeArrayRef(RelocationTypeNameclass));
+ } else {
+ raw_ostream &OS = W.startLine();
+ OS << W.hex(Reloc.VirtualAddress) << " " << RelocName << " " << SymbolName
+ << "(" << Reloc.SymbolIndex << ") " << W.hex(Reloc.Info) << "\n";
+ }
+}
+template <typename Shdr, typename RelTy>
+void XCOFFDumper::printRelocations(ArrayRef<Shdr> Sections) {
ListScope LS(W, "Relocations");
uint16_t Index = 0;
- for (const auto &Sec : Sections) {
+ for (const Shdr &Sec : Sections) {
++Index;
// Only the .text, .data, .tdata, and STYP_DWARF sections have relocation.
if (Sec.Flags != XCOFF::STYP_TEXT && Sec.Flags != XCOFF::STYP_DATA &&
Sec.Flags != XCOFF::STYP_TDATA && Sec.Flags != XCOFF::STYP_DWARF)
continue;
- auto Relocations = unwrapOrError(Obj.getFileName(), Obj.relocations(Sec));
+ Expected<ArrayRef<RelTy>> ErrOrRelocations = Obj.relocations<Shdr, RelTy>(Sec);
+ if (Error E = ErrOrRelocations.takeError()) {
+ reportUniqueWarning(std::move(E));
+ continue;
+ }
+
+ const ArrayRef<RelTy> Relocations = *ErrOrRelocations;
if (Relocations.empty())
continue;
W.startLine() << "Section (index: " << Index << ") " << Sec.getName()
<< " {\n";
- for (auto Reloc : Relocations) {
- StringRef SymbolName = unwrapOrError(
- Obj.getFileName(), Obj.getSymbolNameByIndex(Reloc.SymbolIndex));
-
- DictScope RelocScope(W, "Relocation");
- W.printHex("Virtual Address", Reloc.VirtualAddress);
- W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex);
- W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No");
- W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0);
- W.printNumber("Length", Reloc.getRelocatedLength());
- W.printEnum("Type", (uint8_t)Reloc.Type,
- makeArrayRef(RelocationTypeNameclass));
- }
+ W.indent();
+
+ for (const RelTy Reloc : Relocations)
+ printRelocation(Reloc);
+
W.unindent();
W.startLine() << "}\n";
}
}
-static const EnumEntry<XCOFF::CFileStringType> FileStringType[] = {
+const EnumEntry<XCOFF::CFileStringType> FileStringType[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(XFT_FN), ECase(XFT_CT), ECase(XFT_CV), ECase(XFT_CD)
#undef ECase
};
-static const EnumEntry<XCOFF::SymbolAuxType> SymAuxType[] = {
+const EnumEntry<XCOFF::SymbolAuxType> SymAuxType[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(AUX_EXCEPT), ECase(AUX_FCN), ECase(AUX_SYM), ECase(AUX_FILE),
@@ -203,7 +239,7 @@ static const EnumEntry<XCOFF::StorageMappingClass> CsectStorageMappingClass[] =
#undef ECase
};
-static const EnumEntry<XCOFF::SymbolType> CsectSymbolTypeClass[] = {
+const EnumEntry<XCOFF::SymbolType> CsectSymbolTypeClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(XTY_ER), ECase(XTY_SD), ECase(XTY_LD), ECase(XTY_CM)
@@ -253,7 +289,7 @@ void XCOFFDumper::printSectAuxEntForStat(
W.printNumber("NumberOfLineNum", AuxEntPtr->NumberOfLineNum);
}
-static const EnumEntry<XCOFF::StorageClass> SymStorageClass[] = {
+const EnumEntry<XCOFF::StorageClass> SymStorageClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(C_NULL), ECase(C_AUTO), ECase(C_EXT), ECase(C_STAT),
@@ -302,14 +338,14 @@ static StringRef GetSymbolValueName(XCOFF::StorageClass SC) {
}
}
-static const EnumEntry<XCOFF::CFileLangId> CFileLangIdClass[] = {
+const EnumEntry<XCOFF::CFileLangId> CFileLangIdClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(TB_C), ECase(TB_CPLUSPLUS)
#undef ECase
};
-static const EnumEntry<XCOFF::CFileCpuId> CFileCpuIdClass[] = {
+const EnumEntry<XCOFF::CFileCpuId> CFileCpuIdClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(TCPU_PPC64), ECase(TCPU_COM), ECase(TCPU_970)
@@ -460,7 +496,12 @@ void XCOFFDumper::printSymbols() {
void XCOFFDumper::printStringTable() {
DictScope DS(W, "StringTable");
StringRef StrTable = Obj.getStringTable();
- printAsStringList(StrTable);
+ uint32_t StrTabSize = StrTable.size();
+ W.printNumber("Length", StrTabSize);
+ // Print strings from the fifth byte, since the first four bytes contain the
+ // length (in bytes) of the string table (including the length field).
+ if (StrTabSize > 4)
+ printAsStringList(StrTable, 4);
}
void XCOFFDumper::printDynamicSymbols() {
@@ -476,10 +517,46 @@ void XCOFFDumper::printStackMap() const {
}
void XCOFFDumper::printNeededLibraries() {
- llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+ ListScope D(W, "NeededLibraries");
+ auto ImportFilesOrError = Obj.getImportFileTable();
+ if (!ImportFilesOrError) {
+ reportUniqueWarning(ImportFilesOrError.takeError());
+ return;
+ }
+
+ StringRef ImportFileTable = ImportFilesOrError.get();
+ const char *CurrentStr = ImportFileTable.data();
+ const char *TableEnd = ImportFileTable.end();
+ // Default column width for names is 13 even if no names are that long.
+ size_t BaseWidth = 13;
+
+ // Get the max width of BASE columns.
+ for (size_t StrIndex = 0; CurrentStr < TableEnd; ++StrIndex) {
+ size_t CurrentLen = strlen(CurrentStr);
+ CurrentStr += strlen(CurrentStr) + 1;
+ if (StrIndex % 3 == 1)
+ BaseWidth = std::max(BaseWidth, CurrentLen);
+ }
+
+ auto &OS = static_cast<formatted_raw_ostream &>(W.startLine());
+ // Each entry consists of 3 strings: the path_name, base_name and
+ // archive_member_name. The first entry is a default LIBPATH value and other
+ // entries have no path_name. We just dump the base_name and
+ // archive_member_name here.
+ OS << left_justify("BASE", BaseWidth) << " MEMBER\n";
+ CurrentStr = ImportFileTable.data();
+ for (size_t StrIndex = 0; CurrentStr < TableEnd;
+ ++StrIndex, CurrentStr += strlen(CurrentStr) + 1) {
+ if (StrIndex >= 3 && StrIndex % 3 != 0) {
+ if (StrIndex % 3 == 1)
+ OS << " " << left_justify(CurrentStr, BaseWidth) << " ";
+ else
+ OS << CurrentStr << "\n";
+ }
+ }
}
-static const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
+const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(STYP_PAD), ECase(STYP_DWARF), ECase(STYP_TEXT),
@@ -523,6 +600,176 @@ void XCOFFDumper::printGenericSectionHeader(T &Sec) const {
W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
}
+void XCOFFDumper::printAuxiliaryHeader(
+ const XCOFFAuxiliaryHeader32 *AuxHeader) {
+ if (AuxHeader == nullptr)
+ return;
+ uint16_t AuxSize = Obj.getOptionalHeaderSize();
+ uint16_t PartialFieldOffset = AuxSize;
+ const char *PartialFieldName = nullptr;
+
+ DictScope DS(W, "AuxiliaryHeader");
+
+#define PrintAuxMember32(H, S, T) \
+ if (offsetof(XCOFFAuxiliaryHeader32, T) + \
+ sizeof(XCOFFAuxiliaryHeader32::T) <= \
+ AuxSize) \
+ W.print##H(S, AuxHeader->T); \
+ else if (offsetof(XCOFFAuxiliaryHeader32, T) < AuxSize) { \
+ PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader32, T); \
+ PartialFieldName = S; \
+ }
+
+ PrintAuxMember32(Hex, "Magic", AuxMagic);
+ PrintAuxMember32(Hex, "Version", Version);
+ PrintAuxMember32(Hex, "Size of .text section", TextSize);
+ PrintAuxMember32(Hex, "Size of .data section", InitDataSize);
+ PrintAuxMember32(Hex, "Size of .bss section", BssDataSize);
+ PrintAuxMember32(Hex, "Entry point address", EntryPointAddr);
+ PrintAuxMember32(Hex, ".text section start address", TextStartAddr);
+ PrintAuxMember32(Hex, ".data section start address", DataStartAddr);
+ PrintAuxMember32(Hex, "TOC anchor address", TOCAnchorAddr);
+ PrintAuxMember32(Number, "Section number of entryPoint", SecNumOfEntryPoint);
+ PrintAuxMember32(Number, "Section number of .text", SecNumOfText);
+ PrintAuxMember32(Number, "Section number of .data", SecNumOfData);
+ PrintAuxMember32(Number, "Section number of TOC", SecNumOfTOC);
+ PrintAuxMember32(Number, "Section number of loader data", SecNumOfLoader);
+ PrintAuxMember32(Number, "Section number of .bss", SecNumOfBSS);
+ PrintAuxMember32(Hex, "Maxium alignment of .text", MaxAlignOfText);
+ PrintAuxMember32(Hex, "Maxium alignment of .data", MaxAlignOfData);
+ PrintAuxMember32(Hex, "Module type", ModuleType);
+ PrintAuxMember32(Hex, "CPU type of objects", CpuFlag);
+ PrintAuxMember32(Hex, "(Reserved)", CpuType);
+ PrintAuxMember32(Hex, "Maximum stack size", MaxStackSize);
+ PrintAuxMember32(Hex, "Maximum data size", MaxDataSize);
+ PrintAuxMember32(Hex, "Reserved for debugger", ReservedForDebugger);
+ PrintAuxMember32(Hex, "Text page size", TextPageSize);
+ PrintAuxMember32(Hex, "Data page size", DataPageSize);
+ PrintAuxMember32(Hex, "Stack page size", StackPageSize);
+ if (offsetof(XCOFFAuxiliaryHeader32, FlagAndTDataAlignment) +
+ sizeof(XCOFFAuxiliaryHeader32::FlagAndTDataAlignment) <=
+ AuxSize) {
+ W.printHex("Flag", AuxHeader->getFlag());
+ W.printHex("Alignment of thread-local storage",
+ AuxHeader->getTDataAlignment());
+ }
+
+ PrintAuxMember32(Number, "Section number for .tdata", SecNumOfTData);
+ PrintAuxMember32(Number, "Section number for .tbss", SecNumOfTBSS);
+
+ // Deal with error.
+ if (PartialFieldOffset < AuxSize) {
+ std::string ErrInfo;
+ llvm::raw_string_ostream StringOS(ErrInfo);
+ StringOS << "Only partial field for " << PartialFieldName << " at offset ("
+ << PartialFieldOffset << ").";
+ StringOS.flush();
+ reportWarning(
+ make_error<GenericBinaryError>(ErrInfo, object_error::parse_failed),
+ "-");
+ W.printBinary(
+ "Raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) + PartialFieldOffset,
+ AuxSize - PartialFieldOffset));
+ } else if (sizeof(XCOFFAuxiliaryHeader32) < AuxSize) {
+ reportWarning(make_error<GenericBinaryError>(
+ "There are extra data beyond auxiliary header",
+ object_error::parse_failed),
+ "-");
+ W.printBinary("Extra raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) +
+ sizeof(XCOFFAuxiliaryHeader32),
+ AuxSize - sizeof(XCOFFAuxiliaryHeader32)));
+ }
+
+#undef PrintAuxMember32
+}
+
+void XCOFFDumper::printAuxiliaryHeader(
+ const XCOFFAuxiliaryHeader64 *AuxHeader) {
+ if (AuxHeader == nullptr)
+ return;
+ uint16_t AuxSize = Obj.getOptionalHeaderSize();
+ uint16_t PartialFieldOffset = AuxSize;
+ const char *PartialFieldName = nullptr;
+
+ DictScope DS(W, "AuxiliaryHeader");
+
+#define PrintAuxMember64(H, S, T) \
+ if (offsetof(XCOFFAuxiliaryHeader64, T) + \
+ sizeof(XCOFFAuxiliaryHeader64::T) <= \
+ AuxSize) \
+ W.print##H(S, AuxHeader->T); \
+ else if (offsetof(XCOFFAuxiliaryHeader64, T) < AuxSize) { \
+ PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader64, T); \
+ PartialFieldName = S; \
+ }
+
+ PrintAuxMember64(Hex, "Magic", AuxMagic);
+ PrintAuxMember64(Hex, "Version", Version);
+ PrintAuxMember64(Hex, "Reserved for debugger", ReservedForDebugger);
+ PrintAuxMember64(Hex, ".text section start address", TextStartAddr);
+ PrintAuxMember64(Hex, ".data section start address", DataStartAddr);
+ PrintAuxMember64(Hex, "TOC anchor address", TOCAnchorAddr);
+ PrintAuxMember64(Number, "Section number of entryPoint", SecNumOfEntryPoint);
+ PrintAuxMember64(Number, "Section number of .text", SecNumOfText);
+ PrintAuxMember64(Number, "Section number of .data", SecNumOfData);
+ PrintAuxMember64(Number, "Section number of TOC", SecNumOfTOC);
+ PrintAuxMember64(Number, "Section number of loader data", SecNumOfLoader);
+ PrintAuxMember64(Number, "Section number of .bss", SecNumOfBSS);
+ PrintAuxMember64(Hex, "Maxium alignment of .text", MaxAlignOfText);
+ PrintAuxMember64(Hex, "Maxium alignment of .data", MaxAlignOfData);
+ PrintAuxMember64(Hex, "Module type", ModuleType);
+ PrintAuxMember64(Hex, "CPU type of objects", CpuFlag);
+ PrintAuxMember64(Hex, "(Reserved)", CpuType);
+ PrintAuxMember64(Hex, "Text page size", TextPageSize);
+ PrintAuxMember64(Hex, "Data page size", DataPageSize);
+ PrintAuxMember64(Hex, "Stack page size", StackPageSize);
+ if (offsetof(XCOFFAuxiliaryHeader64, FlagAndTDataAlignment) +
+ sizeof(XCOFFAuxiliaryHeader64::FlagAndTDataAlignment) <=
+ AuxSize) {
+ W.printHex("Flag", AuxHeader->getFlag());
+ W.printHex("Alignment of thread-local storage",
+ AuxHeader->getTDataAlignment());
+ }
+ PrintAuxMember64(Hex, "Size of .text section", TextSize);
+ PrintAuxMember64(Hex, "Size of .data section", InitDataSize);
+ PrintAuxMember64(Hex, "Size of .bss section", BssDataSize);
+ PrintAuxMember64(Hex, "Entry point address", EntryPointAddr);
+ PrintAuxMember64(Hex, "Maximum stack size", MaxStackSize);
+ PrintAuxMember64(Hex, "Maximum data size", MaxDataSize);
+ PrintAuxMember64(Number, "Section number for .tdata", SecNumOfTData);
+ PrintAuxMember64(Number, "Section number for .tbss", SecNumOfTBSS);
+ PrintAuxMember64(Hex, "Additional flags 64-bit XCOFF", XCOFF64Flag);
+
+ if (PartialFieldOffset < AuxSize) {
+ std::string ErrInfo;
+ llvm::raw_string_ostream StringOS(ErrInfo);
+ StringOS << "Only partial field for " << PartialFieldName << " at offset ("
+ << PartialFieldOffset << ").";
+ StringOS.flush();
+ reportWarning(
+ make_error<GenericBinaryError>(ErrInfo, object_error::parse_failed),
+ "-");
+ ;
+ W.printBinary(
+ "Raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) + PartialFieldOffset,
+ AuxSize - PartialFieldOffset));
+ } else if (sizeof(XCOFFAuxiliaryHeader64) < AuxSize) {
+ reportWarning(make_error<GenericBinaryError>(
+ "There are extra data beyond auxiliary header",
+ object_error::parse_failed),
+ "-");
+ W.printBinary("Extra raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) +
+ sizeof(XCOFFAuxiliaryHeader64),
+ AuxSize - sizeof(XCOFFAuxiliaryHeader64)));
+ }
+
+#undef PrintAuxMember64
+}
+
template <typename T>
void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
ListScope Group(W, "Sections");
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 0b49f03f4275..a598e2c28832 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -23,6 +23,7 @@
#include "WindowsResourceDumper.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/WithColor.h"
using namespace llvm;
@@ -65,7 +65,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -149,6 +149,9 @@ static bool COFFLoadConfig;
static bool COFFResources;
static bool COFFTLSDirectory;
+// XCOFF specific options.
+static bool XCOFFAuxiliaryHeader;
+
OutputStyleTy Output = OutputStyleTy::LLVM;
static std::vector<std::string> InputFilenames;
} // namespace opts
@@ -157,7 +160,7 @@ static StringRef ToolName;
namespace llvm {
-LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) {
+[[noreturn]] static void error(Twine Msg) {
// Flush the standard output to print the error at a
// proper place.
fouts().flush();
@@ -165,7 +168,7 @@ LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) {
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input) {
+[[noreturn]] void reportError(Error Err, StringRef Input) {
assert(Err);
if (Input == "-")
Input = "<stdin>";
@@ -268,6 +271,9 @@ static void parseOptions(const opt::InputArgList &Args) {
opts::COFFResources = Args.hasArg(OPT_coff_resources);
opts::COFFTLSDirectory = Args.hasArg(OPT_coff_tls_directory);
+ // XCOFF specific options.
+ opts::XCOFFAuxiliaryHeader = Args.hasArg(OPT_auxiliary_header);
+
opts::InputFilenames = Args.getAllArgValues(OPT_INPUT);
}
@@ -343,6 +349,9 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
if (opts::FileHeaders)
Dumper->printFileHeaders();
+ if (Obj.isXCOFF() && opts::XCOFFAuxiliaryHeader)
+ Dumper->printAuxiliaryHeader();
+
// This is only used for ELF currently. In some cases, when an object is
// corrupt (e.g. truncated), we can't dump anything except the file header.
if (!ContentErrString.empty())
@@ -577,6 +586,7 @@ int main(int argc, char *argv[]) {
if (opts::All) {
opts::FileHeaders = true;
+ opts::XCOFFAuxiliaryHeader = true;
opts::ProgramHeaders = true;
opts::SectionHeaders = true;
opts::Symbols = true;
@@ -595,6 +605,7 @@ int main(int argc, char *argv[]) {
if (opts::Headers) {
opts::FileHeaders = true;
+ opts::XCOFFAuxiliaryHeader = true;
opts::ProgramHeaders = true;
opts::SectionHeaders = true;
}
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h
index 43d19b4d3f5c..7672da5c0aae 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -21,7 +21,7 @@ namespace llvm {
}
// Various helper functions.
- LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input);
+ [[noreturn]] void reportError(Error Err, StringRef Input);
void reportWarning(Error Err, StringRef Input);
template <class T> T unwrapOrError(StringRef Input, Expected<T> EO) {
diff --git a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index f02d8981b30e..21339a3f8f3d 100644
--- a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -33,7 +34,6 @@
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -206,6 +206,9 @@ public:
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID, StringRef SectionName,
bool IsReadOnly) override;
+ TrivialMemoryManager::TLSSection
+ allocateTLSSection(uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName) override;
/// If non null, records subsequent Name -> SectionID mappings.
void setSectionIDsMap(SectionIDMap *SecIDMap) {
@@ -252,7 +255,8 @@ public:
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
- report_fatal_error("Can't allocate enough memory: " + EC.message());
+ report_fatal_error(Twine("Can't allocate enough memory: ") +
+ EC.message());
PreallocSlab = MB;
UsePreallocation = true;
@@ -282,6 +286,9 @@ private:
uintptr_t SlabSize = 0;
uintptr_t CurrentSlabOffset = 0;
SectionIDMap *SecIDMap = nullptr;
+#if defined(__x86_64__) && defined(__ELF__)
+ unsigned UsedTLSStorage = 0;
+#endif
};
uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -306,7 +313,8 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
- report_fatal_error("MemoryManager allocation failed: " + EC.message());
+ report_fatal_error(Twine("MemoryManager allocation failed: ") +
+ EC.message());
FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
@@ -334,11 +342,52 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
- report_fatal_error("MemoryManager allocation failed: " + EC.message());
+ report_fatal_error(Twine("MemoryManager allocation failed: ") +
+ EC.message());
DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
+// In case the execution needs TLS storage, we define a very small TLS memory
+// area here that will be used in allocateTLSSection().
+#if defined(__x86_64__) && defined(__ELF__)
+extern "C" {
+alignas(16) __attribute__((visibility("hidden"), tls_model("initial-exec"),
+ used)) thread_local char LLVMRTDyldTLSSpace[16];
+}
+#endif
+
+TrivialMemoryManager::TLSSection
+TrivialMemoryManager::allocateTLSSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) {
+#if defined(__x86_64__) && defined(__ELF__)
+ if (Size + UsedTLSStorage > sizeof(LLVMRTDyldTLSSpace)) {
+ return {};
+ }
+
+ // Get the offset of the TLSSpace in the TLS block by using a tpoff
+ // relocation here.
+ int64_t TLSOffset;
+ asm("leaq LLVMRTDyldTLSSpace@tpoff, %0" : "=r"(TLSOffset));
+
+ TLSSection Section;
+ // We use the storage directly as the initialization image. This means that
+ // when a new thread is spawned after this allocation, it will not be
+ // initialized correctly. This means, llvm-rtdyld will only support TLS in a
+ // single thread.
+ Section.InitializationImage =
+ reinterpret_cast<uint8_t *>(LLVMRTDyldTLSSpace + UsedTLSStorage);
+ Section.Offset = TLSOffset + UsedTLSStorage;
+
+ UsedTLSStorage += Size;
+
+ return Section;
+#else
+ return {};
+#endif
+}
+
static const char *ProgramName;
static void ErrorAndExit(const Twine &Msg) {
@@ -349,10 +398,10 @@ static void ErrorAndExit(const Twine &Msg) {
static void loadDylibs() {
for (const std::string &Dylib : Dylibs) {
if (!sys::fs::is_regular_file(Dylib))
- report_fatal_error("Dylib not found: '" + Dylib + "'.");
+ report_fatal_error(Twine("Dylib not found: '") + Dylib + "'.");
std::string ErrMsg;
if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
- report_fatal_error("Error loading '" + Dylib + "': " + ErrMsg);
+ report_fatal_error(Twine("Error loading '") + Dylib + "': " + ErrMsg);
}
}
@@ -413,8 +462,9 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
}
}
- std::unique_ptr<DIContext> Context =
- DWARFContext::create(*SymbolObj, LoadedObjInfo.get());
+ std::unique_ptr<DIContext> Context = DWARFContext::create(
+ *SymbolObj, DWARFContext::ProcessDebugRelocations::Process,
+ LoadedObjInfo.get());
std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
object::computeSymbolSizes(*SymbolObj);
@@ -710,15 +760,15 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
size_t EqualsIdx = Mapping.find_first_of('=');
if (EqualsIdx == StringRef::npos)
- report_fatal_error("Invalid dummy symbol specification '" + Mapping +
- "'. Should be '<symbol name>=<addr>'");
+ report_fatal_error(Twine("Invalid dummy symbol specification '") +
+ Mapping + "'. Should be '<symbol name>=<addr>'");
std::string Symbol = Mapping.substr(0, EqualsIdx);
std::string AddrStr = Mapping.substr(EqualsIdx + 1);
uint64_t Addr;
if (StringRef(AddrStr).getAsInteger(0, Addr))
- report_fatal_error("Invalid symbol mapping '" + Mapping + "'.");
+ report_fatal_error(Twine("Invalid symbol mapping '") + Mapping + "'.");
MemMgr.addDummySymbol(Symbol, Addr);
}
@@ -974,7 +1024,7 @@ int main(int argc, char **argv) {
Timers = ShowTimes ? std::make_unique<RTDyldTimers>() : nullptr;
- int Result;
+ int Result = 0;
switch (Action) {
case AC_Execute:
Result = executeInput();
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index ece322999107..f2be4e7d0712 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -452,10 +452,10 @@ struct ConstModifier: public Modifier {
switch (getRandom() % 7) {
case 0:
return PT->push_back(ConstantInt::get(
- Ty, APInt::getAllOnesValue(Ty->getPrimitiveSizeInBits())));
+ Ty, APInt::getAllOnes(Ty->getPrimitiveSizeInBits())));
case 1:
- return PT->push_back(ConstantInt::get(
- Ty, APInt::getNullValue(Ty->getPrimitiveSizeInBits())));
+ return PT->push_back(
+ ConstantInt::get(Ty, APInt::getZero(Ty->getPrimitiveSizeInBits())));
case 2:
case 3:
case 4:
diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp
index 0b068749917b..26be3914fb92 100644
--- a/llvm/tools/llvm-strings/llvm-strings.cpp
+++ b/llvm/tools/llvm-strings/llvm-strings.cpp
@@ -43,7 +43,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -73,7 +73,7 @@ static bool PrintFileName;
enum radix { none, octal, hexadecimal, decimal };
static radix Radix;
-LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
+[[noreturn]] static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 227ce12a6d9a..2adbf1f1731d 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -52,7 +52,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
diff --git a/llvm/tools/llvm-tli-checker/Opts.td b/llvm/tools/llvm-tli-checker/Opts.td
new file mode 100644
index 000000000000..b1acef4093c4
--- /dev/null
+++ b/llvm/tools/llvm-tli-checker/Opts.td
@@ -0,0 +1,16 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name, string help> : Flag<["--"], name>, HelpText<help>;
+multiclass Eq<string name, string metavar, string help> {
+ def NAME #_EQ : Joined<["--"], name #"=">,
+ HelpText<help>, MetaVarName<metavar>;
+ def : Separate<["--"], name>, Alias<!cast<Joined>(NAME #_EQ)>;
+}
+
+def help : F<"help", "Display available options">;
+def : Flag<["-"], "h">, HelpText<"Alias for --help">, Alias<help>;
+def dump_tli : F<"dump-tli", "Dump TLI's list of functions and whether they are available">;
+defm triple : Eq<"triple", "<triple>", "Target triple">;
+defm libdir : Eq<"libdir", "<directory>", "Root directory for finding library files">;
+def separate : F<"separate", "Report on each library file separately">;
+def report_EQ : Joined<["--"], "report=">, HelpText<"Level of detail to report">, Values<"summary,discrepancy,full">;
diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
new file mode 100644
index 000000000000..bf25efc0b0bd
--- /dev/null
+++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
@@ -0,0 +1,357 @@
+//===-- llvm-tli-checker.cpp - Compare TargetLibraryInfo to SDK libraries -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/WithColor.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+// Command-line option boilerplate.
+namespace {
+enum ID {
+ OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ OPT_##ID,
+#include "Opts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Opts.inc"
+#undef PREFIX
+
+const opt::OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ { \
+ PREFIX, NAME, HELPTEXT, \
+ METAVAR, OPT_##ID, opt::Option::KIND##Class, \
+ PARAM, FLAGS, OPT_##GROUP, \
+ OPT_##ALIAS, ALIASARGS, VALUES},
+#include "Opts.inc"
+#undef OPTION
+};
+
+class TLICheckerOptTable : public opt::OptTable {
+public:
+ TLICheckerOptTable() : OptTable(InfoTable) {}
+};
+} // namespace
+
+// We have three levels of reporting.
+enum class ReportKind {
+ Error, // For argument parsing errors.
+ Summary, // Report counts but not details.
+ Discrepancy, // Report where TLI and the library differ.
+ Full // Report for every known-to-TLI function.
+};
+
+// Most of the ObjectFile interfaces return an Expected<T>, so make it easy
+// to ignore those.
+template <typename T> T unwrapIgnoreError(Expected<T> E) {
+ if (E)
+ return std::move(*E);
+ // Sink the error and return a nothing value.
+ consumeError(E.takeError());
+ return T();
+}
+
+static void fail(const Twine &Message) {
+ WithColor::error() << Message << '\n';
+ exit(EXIT_FAILURE);
+}
+
+// Some problem occurred with an archive member; complain and continue.
+static void reportArchiveChildIssue(const object::Archive::Child &C, int Index,
+ StringRef ArchiveFilename) {
+ // First get the member name.
+ std::string ChildName;
+ Expected<StringRef> NameOrErr = C.getName();
+ if (NameOrErr)
+ ChildName = std::string(NameOrErr.get());
+ else {
+ // Ignore the name-fetch error, just report the index.
+ consumeError(NameOrErr.takeError());
+ ChildName = "<file index: " + std::to_string(Index) + ">";
+ }
+
+ WithColor::warning() << ArchiveFilename << "(" << ChildName
+ << "): member is not usable\n";
+}
+
+// Return Name, and if Name is mangled, append "aka" and the demangled name.
+static std::string PrintableName(StringRef Name) {
+ std::string OutputName = "'";
+ OutputName += Name;
+ OutputName += "'";
+ if (Name.startswith("_Z") || Name.startswith("??")) {
+ OutputName += " aka ";
+ OutputName += demangle(Name.str());
+ }
+ return OutputName;
+}
+
+// Store all the names that TargetLibraryInfo knows about; the bool indicates
+// whether TLI has it marked as "available" for the target of interest.
+// This is a vector to preserve the sorted order for better reporting.
+struct TLINameList : std::vector<std::pair<StringRef, bool>> {
+ // Record all the TLI info in the vector.
+ void initialize(StringRef TargetTriple);
+ // Print out what we found.
+ void dump();
+};
+TLINameList TLINames;
+
+void TLINameList::initialize(StringRef TargetTriple) {
+ Triple T(TargetTriple);
+ TargetLibraryInfoImpl TLII(T);
+ TargetLibraryInfo TLI(TLII);
+
+ reserve(LibFunc::NumLibFuncs);
+ size_t NumAvailable = 0;
+ for (unsigned FI = 0; FI != LibFunc::NumLibFuncs; ++FI) {
+ LibFunc LF = (LibFunc)FI;
+ bool Available = TLI.has(LF);
+ // getName returns names only for available funcs.
+ TLII.setAvailable(LF);
+ emplace_back(TLI.getName(LF), Available);
+ if (Available)
+ ++NumAvailable;
+ }
+ outs() << "TLI knows " << LibFunc::NumLibFuncs << " symbols, " << NumAvailable
+ << " available for '" << TargetTriple << "'\n";
+}
+
+void TLINameList::dump() {
+ // Assume this gets called after initialize(), so we have the above line of
+ // output as a header. So, for example, no need to repeat the triple.
+ for (auto &TLIName : TLINames) {
+ outs() << (TLIName.second ? " " : "not ")
+ << "available: " << PrintableName(TLIName.first) << '\n';
+ }
+}
+
+// Store all the exported symbol names we found in the input libraries.
+// We use a map to get hashed lookup speed; the bool is meaningless.
+class SDKNameMap : public StringMap<bool> {
+ void populateFromObject(ObjectFile *O);
+ void populateFromArchive(Archive *A);
+
+public:
+ void populateFromFile(StringRef LibDir, StringRef LibName);
+};
+SDKNameMap SDKNames;
+
+// Given an ObjectFile, extract the global function symbols.
+void SDKNameMap::populateFromObject(ObjectFile *O) {
+ // FIXME: Support COFF.
+ if (!O->isELF()) {
+ WithColor::warning() << "Only ELF-format files are supported\n";
+ return;
+ }
+ auto *ELF = cast<const ELFObjectFileBase>(O);
+
+ for (auto I = ELF->getDynamicSymbolIterators().begin();
+ I != ELF->getDynamicSymbolIterators().end(); ++I) {
+ // We want only global function symbols.
+ SymbolRef::Type Type = unwrapIgnoreError(I->getType());
+ uint32_t Flags = unwrapIgnoreError(I->getFlags());
+ StringRef Name = unwrapIgnoreError(I->getName());
+ if (Type == SymbolRef::ST_Function && (Flags & SymbolRef::SF_Global))
+ insert({Name, true});
+ }
+}
+
+// Unpack an archive and populate from the component object files.
+// This roughly imitates dumpArchive() from llvm-objdump.cpp.
+void SDKNameMap::populateFromArchive(Archive *A) {
+ Error Err = Error::success();
+ int Index = -1;
+ for (auto &C : A->children(Err)) {
+ ++Index;
+ Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
+ if (!ChildOrErr) {
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
+ // Issue a generic warning.
+ consumeError(std::move(E));
+ reportArchiveChildIssue(C, Index, A->getFileName());
+ }
+ continue;
+ }
+ if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
+ populateFromObject(O);
+ // Ignore non-object archive members.
+ }
+ if (Err)
+ WithColor::defaultErrorHandler(std::move(Err));
+}
+
+// Unpack a library file and extract the global function names.
+void SDKNameMap::populateFromFile(StringRef LibDir, StringRef LibName) {
+ // Pick an arbitrary but reasonable default size.
+ SmallString<255> Filepath(LibDir);
+ sys::path::append(Filepath, LibName);
+ if (!sys::fs::exists(Filepath)) {
+ WithColor::warning() << "Could not find '" << StringRef(Filepath) << "'\n";
+ return;
+ }
+ outs() << "\nLooking for symbols in '" << StringRef(Filepath) << "'\n";
+ auto ExpectedBinary = createBinary(Filepath);
+ if (!ExpectedBinary) {
+ // FIXME: Report this better.
+ WithColor::defaultWarningHandler(ExpectedBinary.takeError());
+ return;
+ }
+ OwningBinary<Binary> OBinary = std::move(*ExpectedBinary);
+ Binary &Binary = *OBinary.getBinary();
+ size_t Precount = size();
+ if (Archive *A = dyn_cast<Archive>(&Binary))
+ populateFromArchive(A);
+ else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
+ populateFromObject(O);
+ else {
+ WithColor::warning() << "Not an Archive or ObjectFile: '"
+ << StringRef(Filepath) << "'\n";
+ return;
+ }
+ if (Precount == size())
+ WithColor::warning() << "No symbols found in '" << StringRef(Filepath)
+ << "'\n";
+ else
+ outs() << "Found " << size() - Precount << " global function symbols in '"
+ << StringRef(Filepath) << "'\n";
+}
+
+int main(int argc, char *argv[]) {
+ InitLLVM X(argc, argv);
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ TLICheckerOptTable Tbl;
+ opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver,
+ [&](StringRef Msg) { fail(Msg); });
+
+ if (Args.hasArg(OPT_help)) {
+ std::string Usage(argv[0]);
+ Usage += " [options] library-file [library-file...]";
+ Tbl.printHelp(outs(), Usage.c_str(),
+ "LLVM TargetLibraryInfo versus SDK checker");
+ outs() << "\nPass @FILE as argument to read options or library names from "
+ "FILE.\n";
+ return 0;
+ }
+
+ TLINames.initialize(Args.getLastArgValue(OPT_triple_EQ));
+
+ // --dump-tli doesn't require any input files.
+ if (Args.hasArg(OPT_dump_tli)) {
+ TLINames.dump();
+ return 0;
+ }
+
+ std::vector<std::string> LibList = Args.getAllArgValues(OPT_INPUT);
+ if (LibList.empty()) {
+ WithColor::error() << "No input files\n";
+ exit(EXIT_FAILURE);
+ }
+ StringRef LibDir = Args.getLastArgValue(OPT_libdir_EQ);
+ bool SeparateMode = Args.hasArg(OPT_separate);
+
+ ReportKind ReportLevel =
+ SeparateMode ? ReportKind::Summary : ReportKind::Discrepancy;
+ if (const opt::Arg *A = Args.getLastArg(OPT_report_EQ)) {
+ ReportLevel = StringSwitch<ReportKind>(A->getValue())
+ .Case("summary", ReportKind::Summary)
+ .Case("discrepancy", ReportKind::Discrepancy)
+ .Case("full", ReportKind::Full)
+ .Default(ReportKind::Error);
+ if (ReportLevel == ReportKind::Error) {
+ WithColor::error() << "invalid option for --report: " << A->getValue();
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (size_t I = 0; I < LibList.size(); ++I) {
+ // In SeparateMode we report on input libraries individually; otherwise
+ // we do one big combined search. Reading to the end of LibList here
+ // will cause the outer while loop to terminate cleanly.
+ if (SeparateMode) {
+ SDKNames.clear();
+ SDKNames.populateFromFile(LibDir, LibList[I]);
+ if (SDKNames.empty())
+ continue;
+ } else {
+ do
+ SDKNames.populateFromFile(LibDir, LibList[I]);
+ while (++I < LibList.size());
+ if (SDKNames.empty()) {
+ WithColor::error() << "NO symbols found!\n";
+ break;
+ }
+ outs() << "Found a grand total of " << SDKNames.size()
+ << " library symbols\n";
+ }
+ unsigned TLIdoesSDKdoesnt = 0;
+ unsigned TLIdoesntSDKdoes = 0;
+ unsigned TLIandSDKboth = 0;
+ unsigned TLIandSDKneither = 0;
+ for (auto &TLIName : TLINames) {
+ bool TLIHas = TLIName.second;
+ bool SDKHas = SDKNames.count(TLIName.first) == 1;
+ int Which = int(TLIHas) * 2 + int(SDKHas);
+ switch (Which) {
+ case 0: ++TLIandSDKneither; break;
+ case 1: ++TLIdoesntSDKdoes; break;
+ case 2: ++TLIdoesSDKdoesnt; break;
+ case 3: ++TLIandSDKboth; break;
+ }
+ // If the results match, report only if user requested a full report.
+ ReportKind Threshold =
+ TLIHas == SDKHas ? ReportKind::Full : ReportKind::Discrepancy;
+ if (Threshold <= ReportLevel) {
+ constexpr char YesNo[2][4] = {"no ", "yes"};
+ constexpr char Indicator[4][3] = {"!!", ">>", "<<", "=="};
+ outs() << Indicator[Which] << " TLI " << YesNo[TLIHas] << " SDK "
+ << YesNo[SDKHas] << ": " << PrintableName(TLIName.first) << '\n';
+ }
+ }
+
+ assert(TLIandSDKboth + TLIandSDKneither + TLIdoesSDKdoesnt +
+ TLIdoesntSDKdoes ==
+ LibFunc::NumLibFuncs);
+ outs() << "<< Total TLI yes SDK no: " << TLIdoesSDKdoesnt
+ << "\n>> Total TLI no SDK yes: " << TLIdoesntSDKdoes
+ << "\n== Total TLI yes SDK yes: " << TLIandSDKboth;
+ if (TLIandSDKboth == 0) {
+ outs() << " *** NO TLI SYMBOLS FOUND";
+ if (SeparateMode)
+ outs() << " in '" << LibList[I] << "'";
+ }
+ outs() << '\n';
+
+ if (!SeparateMode) {
+ if (TLIdoesSDKdoesnt == 0 && TLIdoesntSDKdoes == 0)
+ outs() << "PASS: LLVM TLI matched SDK libraries successfully.\n";
+ else
+ outs() << "FAIL: LLVM TLI doesn't match SDK libraries.\n";
+ }
+ }
+}
diff --git a/llvm/tools/llvm-xray/xray-color-helper.cpp b/llvm/tools/llvm-xray/xray-color-helper.cpp
index e2cae21e162b..b2ed63881bdc 100644
--- a/llvm/tools/llvm-xray/xray-color-helper.cpp
+++ b/llvm/tools/llvm-xray/xray-color-helper.cpp
@@ -21,7 +21,7 @@ using namespace xray;
// Sequential ColorMaps, which are used to represent information
// from some minimum to some maximum.
-static const std::tuple<uint8_t, uint8_t, uint8_t> SequentialMaps[][9] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> SequentialMaps[][9] = {
{// The greys color scheme from http://colorbrewer2.org/
std::make_tuple(255, 255, 255), std::make_tuple(240, 240, 240),
std::make_tuple(217, 217, 217), std::make_tuple(189, 189, 189),
@@ -42,7 +42,7 @@ static const std::tuple<uint8_t, uint8_t, uint8_t> SequentialMaps[][9] = {
std::make_tuple(2, 56, 88)}};
// Sequential Maps extend the last colors given out of range inputs.
-static const std::tuple<uint8_t, uint8_t, uint8_t> SequentialBounds[][2] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> SequentialBounds[][2] = {
{// The Bounds for the greys color scheme
std::make_tuple(255, 255, 255), std::make_tuple(0, 0, 0)},
{// The Bounds for the OrRd color Scheme
@@ -58,7 +58,7 @@ ColorHelper::ColorHelper(ColorHelper::SequentialScheme S)
// representing differenes, or a range that goes from negative to positive.
// These take an input in the range [-1,1].
-static const std::tuple<uint8_t, uint8_t, uint8_t> DivergingCoeffs[][11] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> DivergingCoeffs[][11] = {
{// The PiYG color scheme from http://colorbrewer2.org/
std::make_tuple(142, 1, 82), std::make_tuple(197, 27, 125),
std::make_tuple(222, 119, 174), std::make_tuple(241, 182, 218),
@@ -69,7 +69,7 @@ static const std::tuple<uint8_t, uint8_t, uint8_t> DivergingCoeffs[][11] = {
// Diverging maps use out of bounds ranges to show missing data. Missing Right
// Being below min, and missing left being above max.
-static const std::tuple<uint8_t, uint8_t, uint8_t> DivergingBounds[][2] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> DivergingBounds[][2] = {
{// The PiYG color scheme has green and red for missing right and left
// respectively.
std::make_tuple(255, 0, 0), std::make_tuple(0, 255, 0)}};
diff --git a/llvm/tools/llvm-xray/xray-converter.cpp b/llvm/tools/llvm-xray/xray-converter.cpp
index 47cb645a5408..82d0261ec4da 100644
--- a/llvm/tools/llvm-xray/xray-converter.cpp
+++ b/llvm/tools/llvm-xray/xray-converter.cpp
@@ -57,6 +57,15 @@ static cl::opt<bool>
cl::init(false), cl::sub(Convert));
static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize),
cl::desc("Alias for -symbolize"));
+static cl::opt<bool>
+ NoDemangle("no-demangle",
+ cl::desc("determines whether to demangle function name "
+ "when symbolizing function ids from the input log"),
+ cl::init(false), cl::sub(Convert));
+
+static cl::opt<bool> Demangle("demangle",
+ cl::desc("demangle symbols (default)"),
+ cl::sub(Convert));
static cl::opt<std::string>
ConvertInstrMap("instr_map",
@@ -373,7 +382,10 @@ static CommandRegistration Unused(&Convert, []() -> Error {
}
const auto &FunctionAddresses = Map.getFunctionAddresses();
- symbolize::LLVMSymbolizer Symbolizer;
+ symbolize::LLVMSymbolizer::Options SymbolizerOpts;
+ if (Demangle.getPosition() < NoDemangle.getPosition())
+ SymbolizerOpts.Demangle = false;
+ symbolize::LLVMSymbolizer Symbolizer(SymbolizerOpts);
llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer,
FunctionAddresses);
llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize);
diff --git a/llvm/tools/llvm-xray/xray-extract.cpp b/llvm/tools/llvm-xray/xray-extract.cpp
index a6ffacc6ab92..52767a00f615 100644
--- a/llvm/tools/llvm-xray/xray-extract.cpp
+++ b/llvm/tools/llvm-xray/xray-extract.cpp
@@ -45,11 +45,12 @@ static cl::opt<bool> ExtractSymbolize("symbolize", cl::value_desc("symbolize"),
cl::sub(Extract));
static cl::alias ExtractSymbolize2("s", cl::aliasopt(ExtractSymbolize),
cl::desc("alias for -symbolize"));
-static cl::opt<bool> ExtractNoDemangle("no-demangle",
- cl::value_desc("no-demangle"),
- cl::init(false),
- cl::desc("don't demangle symbols"),
- cl::sub(Extract));
+static cl::opt<bool> Demangle("demangle",
+ cl::desc("demangle symbols (default)"),
+ cl::sub(Extract));
+static cl::opt<bool> NoDemangle("no-demangle",
+ cl::desc("don't demangle symbols"),
+ cl::sub(Extract));
namespace {
@@ -90,7 +91,7 @@ static CommandRegistration Unused(&Extract, []() -> Error {
const auto &FunctionAddresses =
InstrumentationMapOrError->getFunctionAddresses();
symbolize::LLVMSymbolizer::Options opts;
- if (ExtractNoDemangle)
+ if (Demangle.getPosition() < NoDemangle.getPosition())
opts.Demangle = false;
symbolize::LLVMSymbolizer Symbolizer(opts);
llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer,
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 8b1fbd09e40b..631d8eed5d7a 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -137,6 +137,7 @@ extern cl::opt<std::string> ProfileFile;
extern cl::opt<CSPGOKind> CSPGOKindFlag;
extern cl::opt<std::string> CSProfileGenFile;
extern cl::opt<bool> DisableBasicAA;
+extern cl::opt<bool> PrintPipelinePasses;
} // namespace llvm
static cl::opt<std::string>
@@ -173,58 +174,58 @@ bool tryParsePipelineText(PassBuilder &PB,
static void registerEPCallbacks(PassBuilder &PB) {
if (tryParsePipelineText<FunctionPassManager>(PB, PeepholeEPPipeline))
PB.registerPeepholeEPCallback(
- [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse PeepholeEP pipeline: ");
Err(PB.parsePassPipeline(PM, PeepholeEPPipeline));
});
if (tryParsePipelineText<LoopPassManager>(PB,
LateLoopOptimizationsEPPipeline))
PB.registerLateLoopOptimizationsEPCallback(
- [&PB](LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](LoopPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: ");
Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline));
});
if (tryParsePipelineText<LoopPassManager>(PB, LoopOptimizerEndEPPipeline))
PB.registerLoopOptimizerEndEPCallback(
- [&PB](LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](LoopPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: ");
Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB,
ScalarOptimizerLateEPPipeline))
PB.registerScalarOptimizerLateEPCallback(
- [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: ");
Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline));
});
if (tryParsePipelineText<CGSCCPassManager>(PB, CGSCCOptimizerLateEPPipeline))
PB.registerCGSCCOptimizerLateEPCallback(
- [&PB](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](CGSCCPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: ");
Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB, VectorizerStartEPPipeline))
PB.registerVectorizerStartEPCallback(
- [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse VectorizerStartEP pipeline: ");
Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline));
});
if (tryParsePipelineText<ModulePassManager>(PB, PipelineStartEPPipeline))
PB.registerPipelineStartEPCallback(
- [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse PipelineStartEP pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline));
});
if (tryParsePipelineText<ModulePassManager>(
PB, PipelineEarlySimplificationEPPipeline))
PB.registerPipelineEarlySimplificationEPCallback(
- [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse EarlySimplification pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
PB.registerOptimizerLastEPCallback(
- [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
});
@@ -259,12 +260,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
PGOOptions::SampleUse);
break;
case NoPGO:
- if (DebugInfoForProfiling)
+ if (DebugInfoForProfiling || PseudoProbeForProfiling)
P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
- true);
- else if (PseudoProbeForProfiling)
- P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
- false, true);
+ DebugInfoForProfiling, PseudoProbeForProfiling);
else
P = None;
}
@@ -287,6 +285,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
P->CSAction = PGOOptions::CSIRUse;
}
}
+ if (TM)
+ TM->setPGOOption(P);
+
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
CGSCCAnalysisManager CGAM;
@@ -339,18 +340,17 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
PB.registerPipelineParsingCallback(
[](StringRef Name, ModulePassManager &MPM,
ArrayRef<PassBuilder::PipelineElement>) {
+ AddressSanitizerOptions Opts;
if (Name == "asan-pipeline") {
MPM.addPass(
RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
- MPM.addPass(
- createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
- MPM.addPass(ModuleAddressSanitizerPass());
+ MPM.addPass(ModuleAddressSanitizerPass(Opts));
return true;
} else if (Name == "asan-function-pipeline") {
MPM.addPass(
RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
MPM.addPass(
- createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
+ createModuleToFunctionPassAdaptor(AddressSanitizerPass(Opts)));
return true;
}
return false;
@@ -411,6 +411,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
if (EnableDebugify)
MPM.addPass(NewPMDebugifyPass());
+ // Add passes according to the -passes options.
if (!PassPipeline.empty()) {
assert(Passes.empty() &&
"PassPipeline and Passes should not both contain passes");
@@ -419,10 +420,26 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
return false;
}
}
+ // Add passes specified using the legacy PM syntax (i.e. not using
+ // -passes). This should be removed later when such support has been
+ // deprecated, i.e. when all lit tests running opt (and not using
+ // -enable-new-pm=0) have been updated to use -passes.
for (auto PassName : Passes) {
std::string ModifiedPassName(PassName.begin(), PassName.end());
if (PB.isAnalysisPassName(PassName))
ModifiedPassName = "require<" + ModifiedPassName + ">";
+ // FIXME: These translations are supposed to be removed when lit tests that
+ // use these names have been updated to use the -passes syntax (and when the
+ // support for using the old syntax to specify passes is considered as
+ // deprecated for the new PM).
+ if (ModifiedPassName == "early-cse-memssa")
+ ModifiedPassName = "early-cse<memssa>";
+ else if (ModifiedPassName == "post-inline-ee-instrument")
+ ModifiedPassName = "ee-instrument<post-inline>";
+ else if (ModifiedPassName == "loop-extract-single")
+ ModifiedPassName = "loop-extract<single>";
+ else if (ModifiedPassName == "lower-matrix-intrinsics-minimal")
+ ModifiedPassName = "lower-matrix-intrinsics<minimal>";
if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName)) {
errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
@@ -455,6 +472,17 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// Before executing passes, print the final values of the LLVM options.
cl::PrintOptionValues();
+ // Print a textual, '-passes=' compatible, representation of pipeline if
+ // requested.
+ if (PrintPipelinePasses) {
+ MPM.printPipeline(outs(), [&PIC](StringRef ClassName) {
+ auto PassName = PIC.getPassNameForClassName(ClassName);
+ return PassName.empty() ? ClassName : PassName;
+ });
+ outs() << "\n";
+ return true;
+ }
+
// Now that we have all of the passes ready, run them.
MPM.run(M, MAM);
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index 094f517fb703..7793a5471793 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -38,6 +38,7 @@
#include "llvm/LinkAllIR.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
@@ -46,7 +47,6 @@
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
@@ -102,9 +102,6 @@ static cl::opt<bool>
Force("f", cl::desc("Enable binary output on terminals"));
static cl::opt<bool>
-PrintEachXForm("p", cl::desc("Print module after each transformation"));
-
-static cl::opt<bool>
NoOutput("disable-output",
cl::desc("Do not write result bitcode file"), cl::Hidden);
@@ -146,17 +143,7 @@ static cl::opt<bool>
StripNamedMetadata("strip-named-metadata",
cl::desc("Strip module-level named metadata"));
-static cl::opt<bool>
- DisableInline("disable-inlining",
- cl::desc("Do not run the inliner pass (legacy PM only)"));
-
-static cl::opt<bool>
-DisableOptimizations("disable-opt",
- cl::desc("Do not run any optimization passes"));
-static cl::opt<bool> StandardLinkOpts(
- "std-link-opts",
- cl::desc("Include the standard link time optimizations (legacy PM only)"));
static cl::opt<bool>
OptLevelO0("O0", cl::desc("Optimization level 0. Similar to clang -O0. "
@@ -368,9 +355,7 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
Builder.OptLevel = OptLevel;
Builder.SizeLevel = SizeLevel;
- if (DisableInline) {
- // No inlining pass
- } else if (OptLevel > 1) {
+ if (OptLevel > 1) {
Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false);
} else {
Builder.Inliner = createAlwaysInlinerLegacyPass();
@@ -418,17 +403,6 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
Builder.populateModulePassManager(MPM);
}
-static void AddStandardLinkPasses(legacy::PassManagerBase &PM) {
- PassManagerBuilder Builder;
- Builder.VerifyInput = true;
- if (DisableOptimizations)
- Builder.OptLevel = 0;
-
- if (!DisableInline)
- Builder.Inliner = createFunctionInliningPass();
- Builder.populateLTOPassManager(PM);
-}
-
//===----------------------------------------------------------------------===//
// CodeGen-related helper functions.
//
@@ -507,9 +481,10 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
return false;
std::vector<StringRef> PassNamePrefix = {
- "x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-", "nvptx-",
- "mips-", "lanai-", "hexagon-", "bpf-", "avr-", "thumb2-", "arm-",
- "si-", "gcn-", "amdgpu-", "aarch64-", "amdgcn-", "polly-"};
+ "x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-",
+ "nvptx-", "mips-", "lanai-", "hexagon-", "bpf-", "avr-",
+ "thumb2-", "arm-", "si-", "gcn-", "amdgpu-", "aarch64-",
+ "amdgcn-", "polly-", "riscv-"};
std::vector<StringRef> PassNameContain = {"ehprepare"};
std::vector<StringRef> PassNameExact = {
"safe-stack", "cost-model",
@@ -797,19 +772,32 @@ int main(int argc, char **argv) {
<< "Cannot specify passes via both -foo-pass and --passes=foo-pass\n";
return 1;
}
+ auto NumOLevel = OptLevelO0 + OptLevelO1 + OptLevelO2 + OptLevelO3 +
+ OptLevelOs + OptLevelOz;
+ if (NumOLevel > 1) {
+ errs() << "Cannot specify multiple -O#\n";
+ return 1;
+ }
+ if (NumOLevel > 0 && PassPipeline.getNumOccurrences() > 0) {
+ errs() << "Cannot specify -O# and --passes=, use "
+ "-passes='default<O#>,other-pass'\n";
+ return 1;
+ }
+ std::string Pipeline = PassPipeline;
+
SmallVector<StringRef, 4> Passes;
if (OptLevelO0)
- Passes.push_back("default<O0>");
+ Pipeline = "default<O0>";
if (OptLevelO1)
- Passes.push_back("default<O1>");
+ Pipeline = "default<O1>";
if (OptLevelO2)
- Passes.push_back("default<O2>");
+ Pipeline = "default<O2>";
if (OptLevelO3)
- Passes.push_back("default<O3>");
+ Pipeline = "default<O3>";
if (OptLevelOs)
- Passes.push_back("default<Os>");
+ Pipeline = "default<Os>";
if (OptLevelOz)
- Passes.push_back("default<Oz>");
+ Pipeline = "default<Oz>";
for (const auto &P : PassList)
Passes.push_back(P->getPassArgument());
OutputKind OK = OK_NoOutput;
@@ -828,7 +816,7 @@ int main(int argc, char **argv) {
// string. Hand off the rest of the functionality to the new code for that
// layer.
return runPassPipeline(argv[0], *M, TM.get(), &TLII, Out.get(),
- ThinLinkOut.get(), RemarksFile.get(), PassPipeline,
+ ThinLinkOut.get(), RemarksFile.get(), Pipeline,
Passes, OK, VK, PreserveAssemblyUseListOrder,
PreserveBitcodeUseListOrder, EmitSummaryIndex,
EmitModuleHash, EnableDebugify)
@@ -909,12 +897,6 @@ int main(int argc, char **argv) {
// Create a new optimization pass for each one specified on the command line
for (unsigned i = 0; i < PassList.size(); ++i) {
- if (StandardLinkOpts &&
- StandardLinkOpts.getPosition() < PassList.getPosition(i)) {
- AddStandardLinkPasses(Passes);
- StandardLinkOpts = false;
- }
-
if (OptLevelO0 && OptLevelO0.getPosition() < PassList.getPosition(i)) {
AddOptimizationPasses(Passes, *FPasses, TM.get(), 0, 0);
OptLevelO0 = false;
@@ -976,15 +958,6 @@ int main(int argc, char **argv) {
}
}
}
-
- if (PrintEachXForm)
- Passes.add(
- createPrintModulePass(errs(), "", PreserveAssemblyUseListOrder));
- }
-
- if (StandardLinkOpts) {
- AddStandardLinkPasses(Passes);
- StandardLinkOpts = false;
}
if (OptLevelO0)
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 00bdd127e3c2..be17d5c718c2 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -636,6 +636,15 @@ struct MatchableInfo {
if (RequiredFeatures.size() != RHS.RequiredFeatures.size())
return RequiredFeatures.size() > RHS.RequiredFeatures.size();
+ // For X86 AVX/AVX512 instructions, we prefer vex encoding because the
+ // vex encoding size is smaller. Since X86InstrSSE.td is included ahead
+ // of X86InstrAVX512.td, the AVX instruction ID is less than AVX512 ID.
+ // We use the ID to sort AVX instruction before AVX512 instruction in
+ // matching table.
+ if (TheDef->isSubClassOf("Instruction") &&
+ TheDef->getValueAsBit("HasPositionOrder"))
+ return TheDef->getID() < RHS.TheDef->getID();
+
return false;
}
@@ -1062,7 +1071,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
// Remove comments from the asm string. We know that the asmstring only
// has one line.
if (!CommentDelimiter.empty() &&
- StringRef(AsmString).find(CommentDelimiter) != StringRef::npos)
+ StringRef(AsmString).contains(CommentDelimiter))
PrintFatalError(TheDef->getLoc(),
"asmstring for instruction has comment character in it, "
"mark it isCodeGenOnly");
@@ -1077,7 +1086,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
std::set<std::string> OperandNames;
for (const AsmOperand &Op : AsmOperands) {
StringRef Tok = Op.Token;
- if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
+ if (Tok[0] == '$' && Tok.contains(':'))
PrintFatalError(TheDef->getLoc(),
"matchable with operand modifier '" + Tok +
"' not supported by asm matcher. Mark isCodeGenOnly!");
@@ -3915,8 +3924,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (HasDeprecation) {
OS << " std::string Info;\n";
- OS << " if (!getParser().getTargetParser().\n";
- OS << " getTargetOptions().MCNoDeprecatedWarn &&\n";
+ OS << " if (!getParser().getTargetParser().getTargetOptions().MCNoDeprecatedWarn &&\n";
OS << " MII.getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
OS << " SMLoc Loc = ((" << Target.getName()
<< "Operand &)*Operands[0]).getStartLoc();\n";
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 94fd8f7e92b4..bb13c4033db7 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -457,9 +457,14 @@ void AsmWriterEmitter::EmitPrintInstruction(
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
+ // This function has some huge switch statements that causing excessive
+ // compile time in LLVM profile instrumenation build. This print function
+ // usually is not frequently called in compilation. Here we disable the
+ // profile instrumenation for this function.
O << "/// printInstruction - This method is automatically generated by "
"tablegen\n"
"/// from the instruction set description.\n"
+ "LLVM_NO_PROFILE_INSTRUMENT_FUNCTION\n"
"void "
<< Target.getName() << ClassName
<< "::printInstruction(const MCInst *MI, uint64_t Address, "
diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index ffc2878d3508..ee77ef5eda5f 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -515,7 +515,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " std::string msg;\n"
<< " raw_string_ostream Msg(msg);\n"
<< " Msg << \"Not supported instr: \" << MI;\n"
- << " report_fatal_error(Msg.str());\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
<< " }\n";
if (UseAPInt)
o << " Inst = Value;\n";
@@ -638,7 +638,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " if (MissingFeatures.test(i))\n"
<< " Msg << SubtargetFeatureNames[i] << \" \";\n"
<< " Msg << \"predicate(s) are not met\";\n"
- << " report_fatal_error(Msg.str());\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
<< " }\n"
<< "#else\n"
<< " // Silence unused variable warning on targets that don't use MCII for "
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index c1a3a34d928b..4a247050ceeb 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -451,13 +451,16 @@ static Iter max_if(Iter B, Iter E, Pred P, Less L) {
}
/// Make sure that for each type in Small, there exists a larger type in Big.
-bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
- TypeSetByHwMode &Big) {
+bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big,
+ bool SmallIsVT) {
ValidateOnExit _1(Small, *this), _2(Big, *this);
if (TP.hasError())
return false;
bool Changed = false;
+ assert((!SmallIsVT || !Small.empty()) &&
+ "Small should not be empty for SDTCisVTSmallerThanOp");
+
if (Small.empty())
Changed |= EnforceAny(Small);
if (Big.empty())
@@ -476,7 +479,9 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
TypeSetByHwMode::SetType &S = Small.get(M);
TypeSetByHwMode::SetType &B = Big.get(M);
- if (any_of(S, isIntegerOrPtr) && any_of(S, isIntegerOrPtr)) {
+ assert((!SmallIsVT || !S.empty()) && "Expected non-empty type");
+
+ if (any_of(S, isIntegerOrPtr) && any_of(B, isIntegerOrPtr)) {
auto NotInt = [](MVT VT) { return !isIntegerOrPtr(VT); };
Changed |= berase_if(S, NotInt);
Changed |= berase_if(B, NotInt);
@@ -484,6 +489,11 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
auto NotFP = [](MVT VT) { return !isFloatingPoint(VT); };
Changed |= berase_if(S, NotFP);
Changed |= berase_if(B, NotFP);
+ } else if (SmallIsVT && B.empty()) {
+ // B is empty and since S is a specific VT, it will never be empty. Don't
+ // report this as a change, just clear S and continue. This prevents an
+ // infinite loop.
+ S.clear();
} else if (S.empty() || B.empty()) {
Changed = !S.empty() || !B.empty();
S.clear();
@@ -1612,20 +1622,22 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
unsigned OResNo = 0;
TreePatternNode *OtherNode =
getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
- return NodeToApply->UpdateNodeType(ResNo, OtherNode->getExtType(OResNo),TP)|
- OtherNode->UpdateNodeType(OResNo,NodeToApply->getExtType(ResNo),TP);
+ return (int)NodeToApply->UpdateNodeType(ResNo,
+ OtherNode->getExtType(OResNo), TP) |
+ (int)OtherNode->UpdateNodeType(OResNo,
+ NodeToApply->getExtType(ResNo), TP);
}
case SDTCisVTSmallerThanOp: {
// The NodeToApply must be a leaf node that is a VT. OtherOperandNum must
// have an integer type that is smaller than the VT.
if (!NodeToApply->isLeaf() ||
!isa<DefInit>(NodeToApply->getLeafValue()) ||
- !static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef()
+ !cast<DefInit>(NodeToApply->getLeafValue())->getDef()
->isSubClassOf("ValueType")) {
TP.error(N->getOperator()->getName() + " expects a VT operand!");
return false;
}
- DefInit *DI = static_cast<DefInit*>(NodeToApply->getLeafValue());
+ DefInit *DI = cast<DefInit>(NodeToApply->getLeafValue());
const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
auto VVT = getValueTypeByHwMode(DI->getDef(), T.getHwModes());
TypeSetByHwMode TypeListTmp(VVT);
@@ -1635,7 +1647,8 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
OResNo);
- return TI.EnforceSmallerThan(TypeListTmp, OtherNode->getExtType(OResNo));
+ return TI.EnforceSmallerThan(TypeListTmp, OtherNode->getExtType(OResNo),
+ /*SmallIsVT*/ true);
}
case SDTCisOpSmallerThanOp: {
unsigned BResNo = 0;
@@ -3819,7 +3832,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
InstInputs.erase(OpName); // It occurred, remove from map.
if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) {
- Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
+ Record *InRec = cast<DefInit>(InVal->getLeafValue())->getDef();
if (!checkOperandClass(Op, InRec))
I.error("Operand $" + OpName + "'s register class disagrees"
" between the operand and pattern");
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index a69f1e2e3030..39d81230a4f2 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -298,8 +298,11 @@ struct TypeInfer {
/// unchanged.
bool EnforceAny(TypeSetByHwMode &Out);
/// Make sure that for each type in \p Small, there exists a larger type
- /// in \p Big.
- bool EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big);
+ /// in \p Big. \p SmallIsVT indicates that this is being called for
+ /// SDTCisVTSmallerThanOp. In that case the TypeSetByHwMode is re-created for
+ /// each call and needs special consideration in how we detect changes.
+ bool EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big,
+ bool SmallIsVT = false);
/// 1. Ensure that for each type T in \p Vec, T is a vector type, and that
/// for each type U in \p Elem, U is a scalar type.
/// 2. Ensure that for each (scalar) type U in \p Elem, there exists a
diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp
index 6f718acbac3e..38871eb8cf3c 100644
--- a/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// CodeGenMapTable provides functionality for the TabelGen to create
+// CodeGenMapTable provides functionality for the TableGen to create
// relation mapping between instructions. Relation models are defined using
// InstrMapping as a base class. This file implements the functionality which
// parses these definitions and generates relation maps using the information
@@ -443,14 +443,16 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS,
if (ValueCols.size() > 1) {
for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
ListInit *ColumnI = ValueCols[i];
+ OS << " if (";
for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) {
std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
- OS << " if (in" << ColName;
+ OS << "in" << ColName;
OS << " == ";
OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString();
- if (j < ColumnI->size() - 1) OS << " && ";
- else OS << ")\n";
+ if (j < ColumnI->size() - 1)
+ OS << " && ";
}
+ OS << ")\n";
OS << " return " << InstrMapDesc.getName();
OS << "Table[mid]["<<i+1<<"];\n";
}
@@ -480,9 +482,10 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
if (ValueCols.size() > 1) {
for (Init *CF : ColFields->getValues()) {
std::string ColName = CF->getAsUnquotedString();
- OS << ", enum " << ColName << " in" << ColName << ") {\n";
+ OS << ", enum " << ColName << " in" << ColName;
}
- } else { OS << ") {\n"; }
+ }
+ OS << ") {\n";
// Emit map table.
unsigned TableSize = emitBinSearchTable(OS);
diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
index 930b7742103e..afaeb73ffab1 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -734,7 +734,7 @@ static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
: TheDef(R), Name(std::string(R->getName())),
- TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1) {
+ TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), TSFlags(0) {
GeneratePressureSet = R->getValueAsBit("GeneratePressureSet");
std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
if (TypeList.empty())
@@ -802,6 +802,12 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
if (AllocationPriority < 0 || AllocationPriority > 63)
PrintFatalError(R->getLoc(), "AllocationPriority out of range [0,63]");
this->AllocationPriority = AllocationPriority;
+
+ BitsInit *TSF = R->getValueAsBitsInit("TSFlags");
+ for (unsigned I = 0, E = TSF->getNumBits(); I != E; ++I) {
+ BitInit *Bit = cast<BitInit>(TSF->getBit(I));
+ TSFlags |= uint8_t(Bit->getValue()) << I;
+ }
}
// Create an inferred register class that was missing from the .td files.
@@ -811,7 +817,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
StringRef Name, Key Props)
: Members(*Props.Members), TheDef(nullptr), Name(std::string(Name)),
TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), RSI(Props.RSI),
- CopyCost(0), Allocatable(true), AllocationPriority(0) {
+ CopyCost(0), Allocatable(true), AllocationPriority(0), TSFlags(0) {
Artificial = true;
GeneratePressureSet = false;
for (const auto R : Members) {
@@ -839,6 +845,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) {
});
AltOrderSelect = Super.AltOrderSelect;
AllocationPriority = Super.AllocationPriority;
+ TSFlags = Super.TSFlags;
GeneratePressureSet |= Super.GeneratePressureSet;
// Copy all allocation orders, filter out foreign registers from the larger
@@ -1617,9 +1624,9 @@ static void computeUberSets(std::vector<UberRegSet> &UberSets,
assert(USetID && "register number 0 is invalid");
AllocatableRegs.insert((*Regs.begin())->EnumValue);
- for (auto I = std::next(Regs.begin()), E = Regs.end(); I != E; ++I) {
- AllocatableRegs.insert((*I)->EnumValue);
- UberSetIDs.join(USetID, (*I)->EnumValue);
+ for (const CodeGenRegister *CGR : llvm::drop_begin(Regs)) {
+ AllocatableRegs.insert(CGR->EnumValue);
+ UberSetIDs.join(USetID, CGR->EnumValue);
}
}
// Combine non-allocatable regs.
@@ -1908,6 +1915,9 @@ void CodeGenRegBank::computeRegUnitSets() {
RegUnitSets.pop_back();
}
+ if (RegUnitSets.empty())
+ PrintFatalError("RegUnitSets cannot be empty!");
+
LLVM_DEBUG(dbgs() << "\nBefore pruning:\n"; for (unsigned USIdx = 0,
USEnd = RegUnitSets.size();
USIdx < USEnd; ++USIdx) {
@@ -2018,7 +2028,8 @@ void CodeGenRegBank::computeRegUnitSets() {
}
}
LLVM_DEBUG(dbgs() << "\n");
- assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass");
+ assert((!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) &&
+ "missing unit set for regclass");
}
// For each register unit, ensure that we have the list of UnitSets that
diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h
index 6a0696011a40..c9fcf83b0a8a 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/CodeGenRegisters.h
@@ -332,6 +332,7 @@ namespace llvm {
bool Allocatable;
StringRef AltOrderSelect;
uint8_t AllocationPriority;
+ uint8_t TSFlags;
/// Contains the combination of the lane masks of all subregisters.
LaneBitmask LaneMask;
/// True if there are at least 2 subregisters which do not interfere.
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 7311819f77ff..137f99078faf 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -77,6 +77,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::ppcf128: return "MVT::ppcf128";
case MVT::x86mmx: return "MVT::x86mmx";
case MVT::x86amx: return "MVT::x86amx";
+ case MVT::i64x8: return "MVT::i64x8";
case MVT::Glue: return "MVT::Glue";
case MVT::isVoid: return "MVT::isVoid";
case MVT::v1i1: return "MVT::v1i1";
diff --git a/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp
index e931801f82a4..94ad6ee285d4 100644
--- a/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/llvm/utils/TableGen/CompressInstEmitter.cpp
@@ -1,17 +1,17 @@
-//===- RISCVCompressInstEmitter.cpp - Generator for RISCV Compression -===//
+//===-------- CompressInstEmitter.cpp - Generator for Compression ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-// RISCVCompressInstEmitter implements a tablegen-driven CompressPat based
-// RISCV Instruction Compression mechanism.
+// CompressInstEmitter implements a tablegen-driven CompressPat based
+// Instruction Compression mechanism.
//
-//===--------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
-// RISCVCompressInstEmitter implements a tablegen-driven CompressPat Instruction
-// Compression mechanism for generating RISCV compressed instructions
-// (C ISA Extension) from the expanded instruction form.
+// CompressInstEmitter implements a tablegen-driven CompressPat Instruction
+// Compression mechanism for generating compressed instructions from the
+// expanded instruction form.
// This tablegen backend processes CompressPat declarations in a
// td file and generates all the required checks to validate the pattern
@@ -21,10 +21,18 @@
// immediate inputs.
//
// Example:
-// class CompressPat<dag input, dag output> {
+// /// Defines a Pat match between compressed and uncompressed instruction.
+// /// The relationship and helper function generation are handled by
+// /// CompressInstEmitter backend.
+// class CompressPat<dag input, dag output, list<Predicate> predicates = []> {
+// /// Uncompressed instruction description.
// dag Input = input;
-// dag Output = output;
-// list<Predicate> Predicates = [];
+// /// Compressed instruction description.
+// dag Output = output;
+// /// Predicates that must be true for this to match.
+// list<Predicate> Predicates = predicates;
+// /// Duplicate match when tied operand is just different.
+// bit isCompressOnly = false;
// }
//
// let Predicates = [HasStdExtC] in {
@@ -32,10 +40,9 @@
// (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
// }
//
-// The result is an auto-generated header file
-// 'RISCVGenCompressInstEmitter.inc' which exports two functions for
-// compressing/uncompressing MCInst instructions, plus
-// some helper functions:
+// The <TargetName>GenCompressInstEmitter.inc is an auto-generated header
+// file which exports two functions for compressing/uncompressing MCInst
+// instructions, plus some helper functions:
//
// bool compressInst(MCInst &OutInst, const MCInst &MI,
// const MCSubtargetInfo &STI,
@@ -49,9 +56,9 @@
// an instruction is compressable:
//
// bool isCompressibleInst(const MachineInstr& MI,
-// const RISCVSubtarget *Subtarget,
-// const MCRegisterInfo &MRI,
-// const MCSubtargetInfo &STI);
+// const <TargetName>Subtarget *Subtarget,
+// const MCRegisterInfo &MRI,
+// const MCSubtargetInfo &STI);
//
// The clients that include this auto-generated header file and
// invoke these functions can compress an instruction before emitting
@@ -79,28 +86,35 @@ using namespace llvm;
#define DEBUG_TYPE "compress-inst-emitter"
namespace {
-class RISCVCompressInstEmitter {
+class CompressInstEmitter {
struct OpData {
enum MapKind { Operand, Imm, Reg };
MapKind Kind;
union {
- unsigned Operand; // Operand number mapped to.
- int64_t Imm; // Integer immediate value.
- Record *Reg; // Physical register.
+ // Operand number mapped to.
+ unsigned Operand;
+ // Integer immediate value.
+ int64_t Imm;
+ // Physical register.
+ Record *Reg;
} Data;
- int TiedOpIdx = -1; // Tied operand index within the instruction.
+ // Tied operand index within the instruction.
+ int TiedOpIdx = -1;
};
struct CompressPat {
- CodeGenInstruction Source; // The source instruction definition.
- CodeGenInstruction Dest; // The destination instruction to transform to.
- std::vector<Record *>
- PatReqFeatures; // Required target features to enable pattern.
- IndexedMap<OpData>
- SourceOperandMap; // Maps operands in the Source Instruction to
- // the corresponding Dest instruction operand.
- IndexedMap<OpData>
- DestOperandMap; // Maps operands in the Dest Instruction
- // to the corresponding Source instruction operand.
+ // The source instruction definition.
+ CodeGenInstruction Source;
+ // The destination instruction to transform to.
+ CodeGenInstruction Dest;
+ // Required target features to enable pattern.
+ std::vector<Record *> PatReqFeatures;
+ // Maps operands in the Source Instruction to
+ IndexedMap<OpData> SourceOperandMap;
+ // the corresponding Dest instruction operand.
+ // Maps operands in the Dest Instruction
+ // to the corresponding Source instruction operand.
+ IndexedMap<OpData> DestOperandMap;
+
bool IsCompressOnly;
CompressPat(CodeGenInstruction &S, CodeGenInstruction &D,
std::vector<Record *> RF, IndexedMap<OpData> &SourceMap,
@@ -132,13 +146,13 @@ class RISCVCompressInstEmitter {
CodeGenInstruction &DestInst);
public:
- RISCVCompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+ CompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {}
void run(raw_ostream &o);
};
} // End anonymous namespace.
-bool RISCVCompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
+bool CompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
assert(Reg->isSubClassOf("Register") && "Reg record should be a Register");
assert(RegClass->isSubClassOf("RegisterClass") &&
"RegClass record should be a RegisterClass");
@@ -148,9 +162,8 @@ bool RISCVCompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
return RC.contains(R);
}
-bool RISCVCompressInstEmitter::validateTypes(Record *DagOpType,
- Record *InstOpType,
- bool IsSourceInst) {
+bool CompressInstEmitter::validateTypes(Record *DagOpType, Record *InstOpType,
+ bool IsSourceInst) {
if (DagOpType == InstOpType)
return true;
// Only source instruction operands are allowed to not match Input Dag
@@ -187,9 +200,10 @@ bool RISCVCompressInstEmitter::validateTypes(Record *DagOpType,
/// operands and fixed registers it expects the Dag operand type to be contained
/// in the instantiated instruction operand type. For immediate operands and
/// immediates no validation checks are enforced at pattern validation time.
-void RISCVCompressInstEmitter::addDagOperandMapping(
- Record *Rec, DagInit *Dag, CodeGenInstruction &Inst,
- IndexedMap<OpData> &OperandMap, bool IsSourceInst) {
+void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag,
+ CodeGenInstruction &Inst,
+ IndexedMap<OpData> &OperandMap,
+ bool IsSourceInst) {
// TiedCount keeps track of the number of operands skipped in Inst
// operands list to get to the corresponding Dag operand. This is
// necessary because the number of operands in Inst might be greater
@@ -293,7 +307,7 @@ static bool validateArgsTypes(Init *Arg1, Init *Arg2) {
// name have the same types. For example in 'C_ADD $rs1, $rs2' we generate the
// mapping $rs1 --> 0, $rs2 ---> 1. If the operand appears twice in the (tied)
// same Dag we use the last occurrence for indexing.
-void RISCVCompressInstEmitter::createDagOperandMapping(
+void CompressInstEmitter::createDagOperandMapping(
Record *Rec, StringMap<unsigned> &SourceOperands,
StringMap<unsigned> &DestOperands, DagInit *SourceDag, DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap) {
@@ -340,7 +354,7 @@ void RISCVCompressInstEmitter::createDagOperandMapping(
/// Map operand names in the Dag to their index in both corresponding input and
/// output instructions. Validate that operands defined in the input are
/// used in the output pattern while populating the maps.
-void RISCVCompressInstEmitter::createInstOperandMapping(
+void CompressInstEmitter::createInstOperandMapping(
Record *Rec, DagInit *SourceDag, DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap, IndexedMap<OpData> &DestOperandMap,
StringMap<unsigned> &SourceOperands, CodeGenInstruction &DestInst) {
@@ -409,7 +423,7 @@ void RISCVCompressInstEmitter::createInstOperandMapping(
/// and generate warning.
/// - Immediate operand type in Dag Input differs from the corresponding Source
/// Instruction type and generate a warning.
-void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
+void CompressInstEmitter::evaluateCompressPat(Record *Rec) {
// Validate input Dag operands.
DagInit *SourceDag = Rec->getValueAsDag("Input");
assert(SourceDag && "Missing 'Input' in compress pattern!");
@@ -417,9 +431,6 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
// Checking we are transforming from compressed to uncompressed instructions.
Record *Operator = SourceDag->getOperatorAsDef(Rec->getLoc());
- if (!Operator->isSubClassOf("RVInst"))
- PrintFatalError(Rec->getLoc(), "Input instruction '" + Operator->getName() +
- "' is not a 32 bit wide instruction!");
CodeGenInstruction SourceInst(Operator);
verifyDagOpCount(SourceInst, SourceDag, true);
@@ -429,13 +440,16 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n");
Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc());
- if (!DestOperator->isSubClassOf("RVInst16"))
- PrintFatalError(Rec->getLoc(), "Output instruction '" +
- DestOperator->getName() +
- "' is not a 16 bit wide instruction!");
CodeGenInstruction DestInst(DestOperator);
verifyDagOpCount(DestInst, DestDag, false);
+ if (Operator->getValueAsInt("Size") <= DestOperator->getValueAsInt("Size"))
+ PrintFatalError(
+ Rec->getLoc(),
+ "Compressed instruction '" + DestOperator->getName() +
+ "'is not strictly smaller than the uncompressed instruction '" +
+ Operator->getName() + "' !");
+
// Fill the mapping from the source to destination instructions.
IndexedMap<OpData> SourceOperandMap;
@@ -548,15 +562,15 @@ static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr,
CombinedStream.indent(4) << "} // if\n";
}
-void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
- EmitterType EType) {
+void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
+ EmitterType EType) {
Record *AsmWriter = Target.getAsmWriter();
if (!AsmWriter->getValueAsInt("PassSubtarget"))
PrintFatalError(AsmWriter->getLoc(),
"'PassSubtarget' is false. SubTargetInfo object is needed "
"for target features.\n");
- StringRef Namespace = Target.getName();
+ StringRef TargetName = Target.getName();
// Sort entries in CompressPatterns to handle instructions that can have more
// than one candidate for compression\uncompression, e.g ADD can be
@@ -599,14 +613,14 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
FuncH.indent(25) << "const MCInst &MI,\n";
FuncH.indent(25) << "const MCSubtargetInfo &STI,\n";
FuncH.indent(25) << "MCContext &Context) {\n";
- } else if (EType == EmitterType::Uncompress){
+ } else if (EType == EmitterType::Uncompress) {
FuncH << "static bool uncompressInst(MCInst &OutInst,\n";
FuncH.indent(27) << "const MCInst &MI,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
} else if (EType == EmitterType::CheckCompress) {
FuncH << "static bool isCompressibleInst(const MachineInstr &MI,\n";
- FuncH.indent(27) << "const RISCVSubtarget *Subtarget,\n";
+ FuncH.indent(27) << "const " << TargetName << "Subtarget *Subtarget,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
}
@@ -631,9 +645,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
CaseStream << " default: return false;\n";
bool CompressOrCheck =
- EType == EmitterType::Compress || EType == EmitterType::CheckCompress;
+ EType == EmitterType::Compress || EType == EmitterType::CheckCompress;
bool CompressOrUncompress =
- EType == EmitterType::Compress || EType == EmitterType::Uncompress;
+ EType == EmitterType::Compress || EType == EmitterType::Uncompress;
for (auto &CompressPat : CompressPatterns) {
if (EType == EmitterType::Uncompress && CompressPat.IsCompressOnly)
@@ -644,20 +658,22 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
raw_string_ostream CondStream(CondString);
raw_string_ostream CodeStream(CodeString);
CodeGenInstruction &Source =
- CompressOrCheck ? CompressPat.Source : CompressPat.Dest;
+ CompressOrCheck ? CompressPat.Source : CompressPat.Dest;
CodeGenInstruction &Dest =
CompressOrCheck ? CompressPat.Dest : CompressPat.Source;
- IndexedMap<OpData> SourceOperandMap = CompressOrCheck ?
- CompressPat.SourceOperandMap : CompressPat.DestOperandMap;
- IndexedMap<OpData> &DestOperandMap = CompressOrCheck ?
- CompressPat.DestOperandMap : CompressPat.SourceOperandMap;
+ IndexedMap<OpData> SourceOperandMap = CompressOrCheck
+ ? CompressPat.SourceOperandMap
+ : CompressPat.DestOperandMap;
+ IndexedMap<OpData> &DestOperandMap = CompressOrCheck
+ ? CompressPat.DestOperandMap
+ : CompressPat.SourceOperandMap;
CurOp = Source.TheDef->getName();
// Check current and previous opcode to decide to continue or end a case.
if (CurOp != PrevOp) {
if (!PrevOp.empty())
CaseStream.indent(6) << "break;\n } // case " + PrevOp + "\n";
- CaseStream.indent(4) << "case " + Namespace + "::" + CurOp + ": {\n";
+ CaseStream.indent(4) << "case " + TargetName + "::" + CurOp + ": {\n";
}
std::set<std::pair<bool, StringRef>> FeaturesSet;
@@ -676,7 +692,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Emit checks for all required features.
for (auto &Op : FeaturesSet) {
StringRef Not = Op.first ? "!" : "";
- CondStream.indent(6) << Not << "STI.getFeatureBits()[" << Namespace
+ CondStream.indent(6) << Not << "STI.getFeatureBits()[" << TargetName
<< "::" << Op.second << "]"
<< " &&\n";
}
@@ -687,7 +703,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
for (auto &Op : Set) {
bool isLast = &Op == &*Set.rbegin();
StringRef Not = Op.first ? "!" : "";
- CondStream << Not << "STI.getFeatureBits()[" << Namespace
+ CondStream << Not << "STI.getFeatureBits()[" << TargetName
<< "::" << Op.second << "]";
if (!isLast)
CondStream << " || ";
@@ -720,7 +736,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
case OpData::Reg: {
Record *Reg = SourceOperandMap[OpNo].Data.Reg;
CondStream.indent(6)
- << "(MI.getOperand(" << OpNo << ").getReg() == " << Namespace
+ << "(MI.getOperand(" << OpNo << ").getReg() == " << TargetName
<< "::" << Reg->getName() << ") &&\n";
break;
}
@@ -728,7 +744,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
CodeStream.indent(6) << "// " << Dest.AsmString << "\n";
if (CompressOrUncompress)
- CodeStream.indent(6) << "OutInst.setOpcode(" << Namespace
+ CodeStream.indent(6) << "OutInst.setOpcode(" << TargetName
<< "::" << Dest.TheDef->getName() << ");\n";
OpNo = 0;
for (const auto &DestOperand : Dest.Operands) {
@@ -744,7 +760,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Don't check register class if this is a tied operand, it was done
// for the operand its tied to.
if (DestOperand.getTiedRegister() == -1)
- CondStream.indent(6) << "(MRI.getRegClass(" << Namespace
+ CondStream.indent(6) << "(MRI.getRegClass(" << TargetName
<< "::" << DestOperand.Rec->getName()
<< "RegClassID).contains(MI.getOperand("
<< OpIdx << ").getReg())) &&\n";
@@ -759,7 +775,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
getPredicates(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec,
"MCOperandPredicate");
CondStream.indent(6)
- << Namespace << "ValidateMCOperand("
+ << TargetName << "ValidateMCOperand("
<< "MI.getOperand(" << OpIdx << "), STI, " << Entry << ") &&\n";
} else {
unsigned Entry =
@@ -767,7 +783,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
DestOperand.Rec, "ImmediateCode");
CondStream.indent(6)
<< "MI.getOperand(" << OpIdx << ").isImm() &&\n";
- CondStream.indent(6) << Namespace << "ValidateMachineOperand("
+ CondStream.indent(6) << TargetName << "ValidateMachineOperand("
<< "MI.getOperand(" << OpIdx
<< "), Subtarget, " << Entry << ") &&\n";
}
@@ -782,14 +798,14 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates,
DestOperand.Rec, "MCOperandPredicate");
CondStream.indent(6)
- << Namespace << "ValidateMCOperand("
+ << TargetName << "ValidateMCOperand("
<< "MCOperand::createImm(" << DestOperandMap[OpNo].Data.Imm
<< "), STI, " << Entry << ") &&\n";
} else {
unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
DestOperand.Rec, "ImmediateCode");
CondStream.indent(6)
- << Namespace
+ << TargetName
<< "ValidateMachineOperand(MachineOperand::CreateImm("
<< DestOperandMap[OpNo].Data.Imm << "), SubTarget, " << Entry
<< ") &&\n";
@@ -803,7 +819,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Fixed register has been validated at pattern validation time.
Record *Reg = DestOperandMap[OpNo].Data.Reg;
CodeStream.indent(6)
- << "OutInst.addOperand(MCOperand::createReg(" << Namespace
+ << "OutInst.addOperand(MCOperand::createReg(" << TargetName
<< "::" << Reg->getName() << "));\n";
}
} break;
@@ -822,7 +838,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
Func.indent(2) << "return false;\n}\n";
if (!MCOpPredicates.empty()) {
- o << "static bool " << Namespace
+ o << "static bool " << TargetName
<< "ValidateMCOperand(const MCOperand &MCOp,\n"
<< " const MCSubtargetInfo &STI,\n"
<< " unsigned PredicateIndex) {\n"
@@ -838,9 +854,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
if (!ImmLeafPredicates.empty()) {
- o << "static bool " << Namespace
+ o << "static bool " << TargetName
<< "ValidateMachineOperand(const MachineOperand &MO,\n"
- << " const RISCVSubtarget *Subtarget,\n"
+ << " const " << TargetName << "Subtarget *Subtarget,\n"
<< " unsigned PredicateIndex) {\n"
<< " int64_t Imm = MO.getImm();\n"
<< " switch (PredicateIndex) {\n"
@@ -867,7 +883,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
o << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n";
}
-void RISCVCompressInstEmitter::run(raw_ostream &o) {
+void CompressInstEmitter::run(raw_ostream &o) {
std::vector<Record *> Insts = Records.getAllDerivedDefinitions("CompressPat");
// Process the CompressPat definitions, validating them as we do so.
@@ -887,7 +903,7 @@ void RISCVCompressInstEmitter::run(raw_ostream &o) {
namespace llvm {
void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS) {
- RISCVCompressInstEmitter(RK).run(OS);
+ CompressInstEmitter(RK).run(OS);
}
} // namespace llvm
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 693073672fc1..d08186b7094b 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1212,11 +1212,13 @@ PredicateListMatcher<OperandPredicateMatcher>::getNoPredicateComment() const {
/// one as another.
class SameOperandMatcher : public OperandPredicateMatcher {
std::string MatchingName;
+ unsigned OrigOpIdx;
public:
- SameOperandMatcher(unsigned InsnVarID, unsigned OpIdx, StringRef MatchingName)
+ SameOperandMatcher(unsigned InsnVarID, unsigned OpIdx, StringRef MatchingName,
+ unsigned OrigOpIdx)
: OperandPredicateMatcher(OPM_SameOperand, InsnVarID, OpIdx),
- MatchingName(MatchingName) {}
+ MatchingName(MatchingName), OrigOpIdx(OrigOpIdx) {}
static bool classof(const PredicateMatcher *P) {
return P->getKind() == OPM_SameOperand;
@@ -1227,6 +1229,7 @@ public:
bool isIdentical(const PredicateMatcher &B) const override {
return OperandPredicateMatcher::isIdentical(B) &&
+ OrigOpIdx == cast<SameOperandMatcher>(&B)->OrigOpIdx &&
MatchingName == cast<SameOperandMatcher>(&B)->MatchingName;
}
};
@@ -3291,7 +3294,8 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
// If the operand is already defined, then we must ensure both references in
// the matcher have the exact same node.
- OM.addPredicate<SameOperandMatcher>(OM.getSymbolicName());
+ OM.addPredicate<SameOperandMatcher>(
+ OM.getSymbolicName(), getOperandMatcher(OM.getSymbolicName()).getOpIdx());
}
void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) {
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 3d1d258e342e..437b5f002027 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -249,7 +249,8 @@ enum IIT_Info {
IIT_BF16 = 48,
IIT_STRUCT9 = 49,
IIT_V256 = 50,
- IIT_AMX = 51
+ IIT_AMX = 51,
+ IIT_PPCF128 = 52
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -274,6 +275,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
case MVT::f32: return Sig.push_back(IIT_F32);
case MVT::f64: return Sig.push_back(IIT_F64);
case MVT::f128: return Sig.push_back(IIT_F128);
+ case MVT::ppcf128: return Sig.push_back(IIT_PPCF128);
case MVT::token: return Sig.push_back(IIT_TOKEN);
case MVT::Metadata: return Sig.push_back(IIT_METADATA);
case MVT::x86mmx: return Sig.push_back(IIT_MMX);
diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp
index a76640f6d11f..a7256499d566 100644
--- a/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/PredicateExpander.cpp
@@ -470,7 +470,7 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup
increaseIndentLevel();
OS.indent(getIndentLevel() * 2);
if (ShouldUpdateOpcodeMask) {
- if (PI.OperandMask.isNullValue())
+ if (PI.OperandMask.isZero())
OS << "Mask.clearAllBits();\n";
else
OS << "Mask = " << PI.OperandMask << ";\n";
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 037fad207ac7..1ed7bc103f9c 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1411,6 +1411,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
<< SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n ";
printMask(OS, RC.LaneMask);
OS << ",\n " << (unsigned)RC.AllocationPriority << ",\n "
+ << format("0x%02x", RC.TSFlags) << ", /* TSFlags */\n "
<< (RC.HasDisjunctSubRegs?"true":"false")
<< ", /* HasDisjunctSubRegs */\n "
<< (RC.CoveredBySubRegs?"true":"false")
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 89069ec3e4ff..d1a9ecb06a2b 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -994,6 +994,8 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2,
emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[5], XOP9_MAP_STR);
emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[6], XOPA_MAP_STR);
emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[7], THREEDNOW_MAP_STR);
+ emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[8], MAP5_STR);
+ emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[9], MAP6_STR);
}
void DisassemblerTables::emit(raw_ostream &o) const {
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.h b/llvm/utils/TableGen/X86DisassemblerTables.h
index 63af68b6fbfa..2e4ff1e2ce08 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.h
+++ b/llvm/utils/TableGen/X86DisassemblerTables.h
@@ -41,7 +41,9 @@ private:
/// [5] XOP9 map opcode
/// [6] XOPA map opcode
/// [7] 3dnow map opcode
- std::unique_ptr<ContextDecision> Tables[8];
+ /// [8] fixed length MAP5 opcode
+ /// [9] fixed length MAP6 opcode
+ std::unique_ptr<ContextDecision> Tables[10];
// Table of ModRM encodings.
typedef std::map<std::vector<unsigned>, unsigned> ModRMMapTy;
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 009dc036cf97..36c71843d70e 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -65,7 +65,7 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
<< " // EVEX scalar with corresponding VEX.\n";
// Print all entries added to the table
- for (auto Pair : Table) {
+ for (const auto &Pair : Table) {
OS << " { X86::" << Pair.first->TheDef->getName()
<< ", X86::" << Pair.second->TheDef->getName() << " },\n";
}
@@ -80,7 +80,7 @@ void X86EVEX2VEXTablesEmitter::printCheckPredicate(
<< " unsigned Opc = MI.getOpcode();\n"
<< " switch (Opc) {\n"
<< " default: return true;\n";
- for (auto Pair : Predicates)
+ for (const auto &Pair : Predicates)
OS << " case X86::" << Pair.first << ": return " << Pair.second << ";\n";
OS << " }\n"
<< "}\n\n";
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 85d926215113..0a8d0750cf13 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -79,13 +79,13 @@ const ManualMapEntry ManualMapSet[] = {
static bool isExplicitAlign(const CodeGenInstruction *Inst) {
return any_of(ExplicitAlign, [Inst](const char *InstStr) {
- return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
+ return Inst->TheDef->getName().contains(InstStr);
});
}
static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
- return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
+ return Inst->TheDef->getName().contains(InstStr);
});
}
@@ -278,7 +278,7 @@ static inline bool hasMemoryFormat(const Record *Inst) {
}
static inline bool isNOREXRegClass(const Record *Op) {
- return Op->getName().find("_NOREX") != StringRef::npos;
+ return Op->getName().contains("_NOREX");
}
static inline bool isRegisterOperand(const Record *Rec) {
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index c2ca3791ac36..a9b384155965 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -109,12 +109,12 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
// FIXME: Is there some better way to check for In64BitMode?
std::vector<Record*> Predicates = Rec->getValueAsListOfDefs("Predicates");
for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
- if (Predicates[i]->getName().find("Not64Bit") != Name.npos ||
- Predicates[i]->getName().find("In32Bit") != Name.npos) {
+ if (Predicates[i]->getName().contains("Not64Bit") ||
+ Predicates[i]->getName().contains("In32Bit")) {
Is32Bit = true;
break;
}
- if (Predicates[i]->getName().find("In64Bit") != Name.npos) {
+ if (Predicates[i]->getName().contains("In64Bit")) {
Is64Bit = true;
break;
}
@@ -752,6 +752,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::XOP9: opcodeType = XOP9_MAP; break;
case X86Local::XOPA: opcodeType = XOPA_MAP; break;
case X86Local::ThreeDNow: opcodeType = THREEDNOW_MAP; break;
+ case X86Local::T_MAP5: opcodeType = MAP5; break;
+ case X86Local::T_MAP6: opcodeType = MAP6; break;
}
std::unique_ptr<ModRMFilter> filter;
@@ -901,10 +903,13 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("FR64X", TYPE_XMM)
TYPE("f64mem", TYPE_M)
TYPE("sdmem", TYPE_M)
+ TYPE("FR16X", TYPE_XMM)
TYPE("FR32", TYPE_XMM)
TYPE("FR32X", TYPE_XMM)
TYPE("f32mem", TYPE_M)
+ TYPE("f16mem", TYPE_M)
TYPE("ssmem", TYPE_M)
+ TYPE("shmem", TYPE_M)
TYPE("RST", TYPE_ST)
TYPE("RSTi", TYPE_ST)
TYPE("i128mem", TYPE_M)
@@ -1019,6 +1024,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
ENCODING("FR128", ENCODING_IB)
ENCODING("VR128", ENCODING_IB)
ENCODING("VR256", ENCODING_IB)
+ ENCODING("FR16X", ENCODING_IB)
ENCODING("FR32X", ENCODING_IB)
ENCODING("FR64X", ENCODING_IB)
ENCODING("VR128X", ENCODING_IB)
@@ -1047,6 +1053,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
ENCODING("FR32", ENCODING_RM)
ENCODING("FR64X", ENCODING_RM)
ENCODING("FR32X", ENCODING_RM)
+ ENCODING("FR16X", ENCODING_RM)
ENCODING("VR64", ENCODING_RM)
ENCODING("VR256", ENCODING_RM)
ENCODING("VR256X", ENCODING_RM)
@@ -1058,11 +1065,6 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_RM)
ENCODING("VK32", ENCODING_RM)
ENCODING("VK64", ENCODING_RM)
- ENCODING("VK1PAIR", ENCODING_RM)
- ENCODING("VK2PAIR", ENCODING_RM)
- ENCODING("VK4PAIR", ENCODING_RM)
- ENCODING("VK8PAIR", ENCODING_RM)
- ENCODING("VK16PAIR", ENCODING_RM)
ENCODING("BNDR", ENCODING_RM)
ENCODING("TILE", ENCODING_RM)
errs() << "Unhandled R/M register encoding " << s << "\n";
@@ -1091,6 +1093,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
ENCODING("VR128X", ENCODING_REG)
ENCODING("FR64X", ENCODING_REG)
ENCODING("FR32X", ENCODING_REG)
+ ENCODING("FR16X", ENCODING_REG)
ENCODING("VR512", ENCODING_REG)
ENCODING("VK1", ENCODING_REG)
ENCODING("VK2", ENCODING_REG)
@@ -1127,6 +1130,7 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("FR64", ENCODING_VVVV)
ENCODING("VR128", ENCODING_VVVV)
ENCODING("VR256", ENCODING_VVVV)
+ ENCODING("FR16X", ENCODING_VVVV)
ENCODING("FR32X", ENCODING_VVVV)
ENCODING("FR64X", ENCODING_VVVV)
ENCODING("VR128X", ENCODING_VVVV)
@@ -1139,11 +1143,6 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_VVVV)
ENCODING("VK32", ENCODING_VVVV)
ENCODING("VK64", ENCODING_VVVV)
- ENCODING("VK1PAIR", ENCODING_VVVV)
- ENCODING("VK2PAIR", ENCODING_VVVV)
- ENCODING("VK4PAIR", ENCODING_VVVV)
- ENCODING("VK8PAIR", ENCODING_VVVV)
- ENCODING("VK16PAIR", ENCODING_VVVV)
ENCODING("TILE", ENCODING_VVVV)
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
llvm_unreachable("Unhandled VEX.vvvv register encoding");
@@ -1170,6 +1169,7 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
ENCODING("i32mem", ENCODING_RM)
ENCODING("i64mem", ENCODING_RM)
ENCODING("i8mem", ENCODING_RM)
+ ENCODING("shmem", ENCODING_RM)
ENCODING("ssmem", ENCODING_RM)
ENCODING("sdmem", ENCODING_RM)
ENCODING("f128mem", ENCODING_RM)
@@ -1177,6 +1177,7 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
ENCODING("f512mem", ENCODING_RM)
ENCODING("f64mem", ENCODING_RM)
ENCODING("f32mem", ENCODING_RM)
+ ENCODING("f16mem", ENCODING_RM)
ENCODING("i128mem", ENCODING_RM)
ENCODING("i256mem", ENCODING_RM)
ENCODING("i512mem", ENCODING_RM)
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index a7b88b4d12ed..d4fad2cc3f0f 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -130,7 +130,8 @@ namespace X86Local {
};
enum {
- OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6, ThreeDNow = 7
+ OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6, ThreeDNow = 7,
+ T_MAP5 = 8, T_MAP6 = 9
};
enum {
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 473746887574..6c0e86e3aab2 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -393,6 +393,8 @@ kmpc_set_disp_num_buffers 267
__kmpc_error 281
__kmpc_masked 282
__kmpc_end_masked 283
+ __kmpc_scope 286
+ __kmpc_end_scope 287
%endif
# User API entry points that have both lower- and upper- case versions for Fortran.
@@ -523,6 +525,7 @@ kmp_set_disp_num_buffers 890
__kmpc_set_default_allocator
__kmpc_get_default_allocator
__kmpc_alloc
+ __kmpc_aligned_alloc
__kmpc_calloc
__kmpc_realloc
__kmpc_free
@@ -548,6 +551,8 @@ kmp_set_disp_num_buffers 890
omp_display_env 733
omp_calloc 776
omp_realloc 777
+ omp_aligned_alloc 778
+ omp_aligned_calloc 806
omp_null_allocator DATA
omp_default_mem_alloc DATA
@@ -1216,6 +1221,29 @@ kmp_set_disp_num_buffers 890
__kmpc_atomic_fixed8u_mul_fp
%endif
+ # OpenMP 5.1 atomics
+ __kmpc_atomic_float10_max 2139
+ __kmpc_atomic_float10_min 2140
+ __kmpc_atomic_float10_max_cpt 2141
+ __kmpc_atomic_float10_min_cpt 2142
+
+ __kmpc_atomic_bool_1_cas 2143
+ __kmpc_atomic_bool_2_cas 2144
+ __kmpc_atomic_bool_4_cas 2145
+ __kmpc_atomic_bool_8_cas 2146
+ __kmpc_atomic_val_1_cas 2147
+ __kmpc_atomic_val_2_cas 2148
+ __kmpc_atomic_val_4_cas 2149
+ __kmpc_atomic_val_8_cas 2150
+ __kmpc_atomic_bool_1_cas_cpt 2151
+ __kmpc_atomic_bool_2_cas_cpt 2152
+ __kmpc_atomic_bool_4_cas_cpt 2153
+ __kmpc_atomic_bool_8_cas_cpt 2154
+ __kmpc_atomic_val_1_cas_cpt 2155
+ __kmpc_atomic_val_2_cas_cpt 2156
+ __kmpc_atomic_val_4_cas_cpt 2157
+ __kmpc_atomic_val_8_cas_cpt 2158
+
%endif
# end of file #
diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt
index cb79ae72e67b..ac188af31055 100644
--- a/openmp/runtime/src/exports_so.txt
+++ b/openmp/runtime/src/exports_so.txt
@@ -120,5 +120,7 @@ GOMP_4.5 {
} GOMP_4.0;
GOMP_5.0 {
} GOMP_4.5;
+GOMP_5.0.1 {
+} GOMP_5.0;
# end of file #
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 0b5436fd5801..351da540fadb 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -269,6 +269,7 @@ Using_int_Value "%1$s value \"%2$d\" will be used."
Using_uint_Value "%1$s value \"%2$u\" will be used."
Using_uint64_Value "%1$s value \"%2$s\" will be used."
Using_str_Value "%1$s value \"%2$s\" will be used."
+BarrierPatternOverride "Mixing other barrier patterns with dist is prohibited. Using dist for all barrier patterns."
MaxValueUsing "%1$s maximum value \"%2$d\" will be used."
MinValueUsing "%1$s minimum value \"%2$d\" will be used."
MemoryAllocFailed "Memory allocation failed."
@@ -359,6 +360,7 @@ OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the d
OmpNoAllocator "Allocator %1$s is not available, will use default allocator."
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead."
+TopologyHybrid "%1$s: hybrid core type detected: %2$d %3$s cores."
# --- OpenMP errors detected at runtime ---
#
@@ -467,6 +469,8 @@ AffHWSubsetNotExistGeneric "KMP_HW_SUBSET ignored: %1$s: level not detected in
AffHWSubsetEqvLayers "KMP_HW_SUBSET ignored: %1$s, %2$s: layers are equivalent, please only specify one."
AffHWSubsetOutOfOrder "KMP_HW_SUBSET ignored: %1$s layer should come after %2$s."
AffEqualTopologyTypes "%1$s: topology layer \"%2$s\" is equivalent to \"%3$s\"."
+AffGranTooCoarseProcGroup "%1$s: granularity=%2$s is too coarse, setting granularity=group."
+StgDeprecatedValue "%1$s: \"%2$s\" value is deprecated. Please use \"%3$s\" instead."
# --------------------------------------------------------------------------------------------------
-*- HINTS -*-
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index 588c52b02a8f..6c1bd2396048 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -431,14 +431,23 @@
extern omp_allocator_handle_t __KAI_KMPC_CONVENTION omp_get_default_allocator(void);
# ifdef __cplusplus
extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a = omp_null_allocator);
- extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t a = omp_null_allocator);
+ extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size,
+ omp_allocator_handle_t a = omp_null_allocator);
+ extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size,
+ omp_allocator_handle_t a = omp_null_allocator);
+ extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
+ omp_allocator_handle_t a = omp_null_allocator);
extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size,
omp_allocator_handle_t allocator = omp_null_allocator,
omp_allocator_handle_t free_allocator = omp_null_allocator);
extern void __KAI_KMPC_CONVENTION omp_free(void * ptr, omp_allocator_handle_t a = omp_null_allocator);
# else
extern void *__KAI_KMPC_CONVENTION omp_alloc(size_t size, omp_allocator_handle_t a);
+ extern void *__KAI_KMPC_CONVENTION omp_aligned_alloc(size_t align, size_t size,
+ omp_allocator_handle_t a);
extern void *__KAI_KMPC_CONVENTION omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t a);
+ extern void *__KAI_KMPC_CONVENTION omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
+ omp_allocator_handle_t a);
extern void *__KAI_KMPC_CONVENTION omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator);
extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a);
diff --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.f90.var
index 48622e2154c0..f475d8dbe688 100644
--- a/openmp/runtime/src/include/omp_lib.f90.var
+++ b/openmp/runtime/src/include/omp_lib.f90.var
@@ -650,6 +650,55 @@
integer(c_int), value :: device_num
end function omp_target_is_accessible
+ function omp_alloc(size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_alloc
+ integer(c_size_t), value :: size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_alloc
+
+ function omp_aligned_alloc(alignment, size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_aligned_alloc
+ integer(c_size_t), value :: alignment, size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_aligned_alloc
+
+ function omp_calloc(nmemb, size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_calloc
+ integer(c_size_t), value :: nmemb, size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_calloc
+
+ function omp_aligned_calloc(alignment, nmemb, size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_aligned_calloc
+ integer(c_size_t), value :: alignment, nmemb, size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_aligned_calloc
+
+ function omp_realloc(ptr, size, allocator, free_allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_realloc
+ type(c_ptr), value :: ptr
+ integer(c_size_t), value :: size
+ integer(omp_allocator_handle_kind), value :: allocator
+ integer(omp_allocator_handle_kind), value :: free_allocator
+ end function omp_realloc
+
+ subroutine omp_free(ptr, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr
+ type(c_ptr), value :: ptr
+ integer(omp_allocator_handle_kind), value :: allocator
+ end subroutine omp_free
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index 9122fb26613e..a1d0c1f97770 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -219,11 +219,11 @@
integer(kind=omp_allocator_handle_kind)omp_thread_mem_alloc
parameter(omp_thread_mem_alloc=8)
! Preview of target memory support
- integer(kind=omp_allocator_handle_kind)llvm_omp_target_host_mem_alloc
+ integer(omp_allocator_handle_kind)llvm_omp_target_host_mem_alloc
parameter(llvm_omp_target_host_mem_alloc=100)
- integer(kind=omp_allocator_handle_kind)llvm_omp_target_shared_mem_alloc
+ integer(omp_allocator_handle_kind)llvm_omp_target_shared_mem_alloc
parameter(llvm_omp_target_shared_mem_alloc=101)
- integer(kind=omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
+ integer(omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
parameter(llvm_omp_target_device_mem_alloc=102)
integer(kind=omp_memspace_handle_kind)omp_default_mem_space
@@ -237,11 +237,11 @@
integer(kind=omp_memspace_handle_kind)omp_low_lat_mem_space
parameter(omp_low_lat_mem_space=4)
! Preview of target memory support
- integer(kind=omp_memspace_handle_kind)llvm_omp_target_host_mem_space
+ integer(omp_memspace_handle_kind)llvm_omp_target_host_mem_space
parameter(llvm_omp_target_host_mem_space=100)
- integer(kind=omp_memspace_handle_kind)llvm_omp_target_shared_mem_space
+ integer(omp_memspace_handle_kind)llvm_omp_target_shared_mem_space
parameter(llvm_omp_target_shared_mem_space=101)
- integer(kind=omp_memspace_handle_kind)llvm_omp_target_device_mem_space
+ integer(omp_memspace_handle_kind)llvm_omp_target_device_mem_space
parameter(llvm_omp_target_device_mem_space=102)
integer(kind=omp_pause_resource_kind)omp_pause_resume
@@ -479,7 +479,7 @@
function omp_get_supported_active_levels() bind(c)
import
- integer (kind=omp_integer_kind) omp_get_supported_active_levels
+ integer(kind=omp_integer_kind)omp_get_supported_active_levels
end function omp_get_supported_active_levels
subroutine omp_fulfill_event(event) bind(c)
@@ -745,6 +745,55 @@
integer(c_int), value :: device_num
end function omp_target_is_accessible
+ function omp_alloc(size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_alloc
+ integer(c_size_t), value :: size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_alloc
+
+ function omp_aligned_alloc(alignment, size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_aligned_alloc
+ integer(c_size_t), value :: alignment, size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_aligned_alloc
+
+ function omp_calloc(nmemb, size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_calloc
+ integer(c_size_t), value :: nmemb, size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_calloc
+
+ function omp_aligned_calloc(alignment, nmemb, size, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_aligned_calloc
+ integer(c_size_t), value :: alignment, nmemb, size
+ integer(omp_allocator_handle_kind), value :: allocator
+ end function omp_aligned_calloc
+
+ function omp_realloc(ptr, size, allocator, free_allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t
+ type(c_ptr) omp_realloc
+ type(c_ptr), value :: ptr
+ integer(c_size_t), value :: size
+ integer(omp_allocator_handle_kind), value :: allocator
+ integer(omp_allocator_handle_kind), value :: free_allocator
+ end function omp_realloc
+
+ subroutine omp_free(ptr, allocator) bind(c)
+ use omp_lib_kinds
+ use, intrinsic :: iso_c_binding, only : c_ptr
+ type(c_ptr), value :: ptr
+ integer(omp_allocator_handle_kind), value :: allocator
+ end subroutine omp_free
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 05264f4433d3..ee068ab32f70 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -115,6 +115,7 @@ typedef unsigned int kmp_hwloc_depth_t;
#include "kmp_debug.h"
#include "kmp_lock.h"
#include "kmp_version.h"
+#include "kmp_barrier.h"
#if USE_DEBUGGER
#include "kmp_debugger.h"
#endif
@@ -263,6 +264,7 @@ typedef union kmp_root kmp_root_p;
template <bool C = false, bool S = true> class kmp_flag_32;
template <bool C = false, bool S = true> class kmp_flag_64;
+template <bool C = false, bool S = true> class kmp_atomic_flag_64;
class kmp_flag_oncore;
#ifdef __cplusplus
@@ -847,6 +849,7 @@ typedef struct kmp_nested_proc_bind_t {
} kmp_nested_proc_bind_t;
extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
+extern kmp_proc_bind_t __kmp_teams_proc_bind;
extern int __kmp_display_affinity;
extern char *__kmp_affinity_format;
@@ -987,7 +990,7 @@ typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
typedef struct kmp_allocator_t {
omp_memspace_handle_t memspace;
void **memkind; // pointer to memkind
- int alignment;
+ size_t alignment;
omp_alloctrait_value_t fb;
kmp_allocator_t *fb_data;
kmp_uint64 pool_size;
@@ -1001,13 +1004,25 @@ extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
+// external interfaces, may be used by compiler
extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
+extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,
+ omp_allocator_handle_t al);
extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,
omp_allocator_handle_t al);
extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,
omp_allocator_handle_t al,
omp_allocator_handle_t free_al);
extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
+// internal interfaces, contain real implementation
+extern void *__kmp_alloc(int gtid, size_t align, size_t sz,
+ omp_allocator_handle_t al);
+extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,
+ omp_allocator_handle_t al);
+extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,
+ omp_allocator_handle_t al,
+ omp_allocator_handle_t free_al);
+extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
extern void __kmp_init_memkind();
extern void __kmp_fini_memkind();
@@ -1066,7 +1081,9 @@ extern void __kmp_init_target_mem();
#define KMP_MIN_BLOCKTIME (0)
#define KMP_MAX_BLOCKTIME \
(INT_MAX) /* Must be this for "infinite" setting the work */
-#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */
+
+/* __kmp_blocktime is in milliseconds */
+#define KMP_DEFAULT_BLOCKTIME (__kmp_is_hybrid_cpu() ? (0) : (200))
#if KMP_USE_MONITOR
#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
@@ -1204,6 +1221,13 @@ typedef struct kmp_cpuid {
kmp_uint32 edx;
} kmp_cpuid_t;
+typedef struct kmp_cpuinfo_flags_t {
+ unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
+ unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
+ unsigned hybrid : 1;
+ unsigned reserved : 29; // Ensure size of 32 bits
+} kmp_cpuinfo_flags_t;
+
typedef struct kmp_cpuinfo {
int initialized; // If 0, other fields are not initialized.
int signature; // CPUID(1).EAX
@@ -1211,8 +1235,7 @@ typedef struct kmp_cpuinfo {
int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
// Model << 4 ) + Model)
int stepping; // CPUID(1).EAX[3:0] ( Stepping )
- int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
- int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
+ kmp_cpuinfo_flags_t flags;
int apic_id;
int physical_id;
int logical_id;
@@ -1437,6 +1460,8 @@ __kmp_mm_mwait(unsigned extensions, unsigned hints) {
/* Support datatypes for the orphaned construct nesting checks. */
/* ------------------------------------------------------------------------ */
+/* When adding to this enum, add its corresponding string in cons_text_c[]
+ * array in kmp_error.cpp */
enum cons_type {
ct_none,
ct_parallel,
@@ -1879,6 +1904,15 @@ typedef struct kmp_disp {
0 // Thread th_reap_state: not safe to reap (tasking)
#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
+// The flag_type describes the storage used for the flag.
+enum flag_type {
+ flag32, /**< atomic 32 bit flags */
+ flag64, /**< 64 bit flags */
+ atomic_flag64, /**< atomic 64 bit flags */
+ flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
+ flag_unset
+};
+
enum barrier_type {
bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
barriers if enabled) */
@@ -1902,6 +1936,7 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */
bp_hyper_bar = 2, /* Hypercube-embedded tree with min
branching factor 2^n */
bp_hierarchical_bar = 3, /* Machine hierarchy tree */
+ bp_dist_bar = 4, /* Distributed barrier */
bp_last_bar /* Placeholder to mark the end */
} kmp_bar_pat_e;
@@ -2241,22 +2276,26 @@ typedef union kmp_depnode kmp_depnode_t;
typedef struct kmp_depnode_list kmp_depnode_list_t;
typedef struct kmp_dephash_entry kmp_dephash_entry_t;
+// macros for checking dep flag as an integer
#define KMP_DEP_IN 0x1
#define KMP_DEP_OUT 0x2
#define KMP_DEP_INOUT 0x3
#define KMP_DEP_MTX 0x4
#define KMP_DEP_SET 0x8
+#define KMP_DEP_ALL 0x80
// Compiler sends us this info:
typedef struct kmp_depend_info {
kmp_intptr_t base_addr;
size_t len;
union {
- kmp_uint8 flag;
- struct {
+ kmp_uint8 flag; // flag as an unsigned char
+ struct { // flag as a set of 8 bits
unsigned in : 1;
unsigned out : 1;
unsigned mtx : 1;
unsigned set : 1;
+ unsigned unused : 3;
+ unsigned all : 1;
} flags;
};
} kmp_depend_info_t;
@@ -2302,6 +2341,7 @@ struct kmp_dephash_entry {
typedef struct kmp_dephash {
kmp_dephash_entry_t **buckets;
size_t size;
+ kmp_depnode_t *last_all;
size_t generation;
kmp_uint32 nelements;
kmp_uint32 nconflicts;
@@ -2409,13 +2449,6 @@ struct kmp_taskdata { /* aligned during dynamic allocation */
kmp_depnode_t
*td_depnode; // Pointer to graph node if this task has dependencies
kmp_task_team_t *td_task_team;
- // The global thread id of the encountering thread. We need it because when a
- // regular task depends on a hidden helper task, and the hidden helper task
- // is finished on a hidden helper thread, it will call __kmp_release_deps to
- // release all dependences. If now the task is a regular task, we need to pass
- // the encountering gtid such that the task will be picked up and executed by
- // its encountering team instead of hidden helper team.
- kmp_int32 encountering_gtid;
size_t td_size_alloc; // Size of task structure, including shareds etc.
#if defined(KMP_GOMP_COMPAT)
// 4 or 8 byte integers for the loop bounds in GOMP_taskloop
@@ -2626,6 +2659,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
/* while awaiting queuing lock acquire */
volatile void *th_sleep_loc; // this points at a kmp_flag<T>
+ flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
ident_t *th_ident;
unsigned th_x; // Random number generator data
@@ -2646,6 +2680,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
written by the worker thread) */
kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
int th_active; // ! sleeping; 32 bits for TCR/TCW
+ std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
+ // 0 = not used in team; 1 = used in team;
+ // 2 = transitioning to not used in team; 3 = transitioning to used in team
struct cons_header *th_cons; // used for consistency check
#if KMP_USE_HIER_SCHED
// used for hierarchical scheduling
@@ -2825,6 +2862,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
#if USE_ITT_BUILD
void *t_stack_id; // team specific stack stitching id (for ittnotify)
#endif /* USE_ITT_BUILD */
+ distributedBarrier *b; // Distributed barrier data associated with team
} kmp_base_team_t;
union KMP_ALIGN_CACHE kmp_team {
@@ -2949,6 +2987,9 @@ extern int __kmp_storage_map_verbose_specified;
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
extern kmp_cpuinfo_t __kmp_cpuinfo;
+static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
+#else
+static inline bool __kmp_is_hybrid_cpu() { return false; }
#endif
extern volatile int __kmp_init_serial;
@@ -4118,6 +4159,10 @@ typedef enum kmp_severity_t {
} kmp_severity_t;
extern void __kmpc_error(ident_t *loc, int severity, const char *message);
+// Support for scope directive
+KMP_EXPORT void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
+KMP_EXPORT void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
+
#ifdef __cplusplus
}
#endif
@@ -4126,18 +4171,26 @@ template <bool C, bool S>
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
+template <bool C, bool S>
+extern void __kmp_atomic_suspend_64(int th_gtid,
+ kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
template <bool C, bool S>
extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
+template <bool C, bool S>
+extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
#endif
template <bool C, bool S>
extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>
extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
+template <bool C, bool S>
+extern void __kmp_atomic_resume_64(int target_gtid,
+ kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
template <bool C, bool S>
@@ -4156,6 +4209,14 @@ int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
+template <bool C, bool S>
+int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
+ kmp_atomic_flag_64<C, S> *flag,
+ int final_spin, int *thread_finished,
+#if USE_ITT_BUILD
+ void *itt_sync_obj,
+#endif /* USE_ITT_BUILD */
+ kmp_int32 is_constrained);
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_oncore *flag, int final_spin,
int *thread_finished,
@@ -4213,6 +4274,15 @@ public:
}
}
}
+ /// Instead of erroring out, return non-zero when
+ /// unsuccessful fopen() for any reason
+ int try_open(const char *filename, const char *mode) {
+ KMP_ASSERT(!f);
+ f = fopen(filename, mode);
+ if (!f)
+ return errno;
+ return 0;
+ }
/// Set the FILE* object to stdout and output there
/// No open call should happen before this call.
void set_stdout() {
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 3a092a803276..84086f30317a 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -26,6 +26,7 @@
#define HWLOC_GROUP_KIND_INTEL_DIE 104
#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220
#endif
+#include <ctype.h>
// The machine topology
kmp_topology_t *__kmp_topology = nullptr;
@@ -123,6 +124,20 @@ const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
return ((plural) ? "unknowns" : "unknown");
}
+const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
+ switch (type) {
+ case KMP_HW_CORE_TYPE_UNKNOWN:
+ return "unknown";
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+ case KMP_HW_CORE_TYPE_ATOM:
+ return "Intel Atom(R) processor";
+ case KMP_HW_CORE_TYPE_CORE:
+ return "Intel(R) Core(TM) processor";
+#endif
+ }
+ return "unknown";
+}
+
////////////////////////////////////////////////////////////////////////////////
// kmp_hw_thread_t methods
int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
@@ -174,20 +189,91 @@ void kmp_hw_thread_t::print() const {
for (int i = 0; i < depth; ++i) {
printf("%4d ", ids[i]);
}
+ if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
+ printf(" (%s)", __kmp_hw_get_core_type_string(core_type));
+ }
printf("\n");
}
////////////////////////////////////////////////////////////////////////////////
// kmp_topology_t methods
+// Add a layer to the topology based on the ids. Assume the topology
+// is perfectly nested (i.e., so no object has more than one parent)
+void kmp_topology_t::_insert_layer(kmp_hw_t type, const int *ids) {
+ // Figure out where the layer should go by comparing the ids of the current
+ // layers with the new ids
+ int target_layer;
+ int previous_id = kmp_hw_thread_t::UNKNOWN_ID;
+ int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID;
+
+ // Start from the highest layer and work down to find target layer
+ // If new layer is equal to another layer then put the new layer above
+ for (target_layer = 0; target_layer < depth; ++target_layer) {
+ bool layers_equal = true;
+ bool strictly_above_target_layer = false;
+ for (int i = 0; i < num_hw_threads; ++i) {
+ int id = hw_threads[i].ids[target_layer];
+ int new_id = ids[i];
+ if (id != previous_id && new_id == previous_new_id) {
+ // Found the layer we are strictly above
+ strictly_above_target_layer = true;
+ layers_equal = false;
+ break;
+ } else if (id == previous_id && new_id != previous_new_id) {
+ // Found a layer we are below. Move to next layer and check.
+ layers_equal = false;
+ break;
+ }
+ previous_id = id;
+ previous_new_id = new_id;
+ }
+ if (strictly_above_target_layer || layers_equal)
+ break;
+ }
+
+ // Found the layer we are above. Now move everything to accommodate the new
+ // layer. And put the new ids and type into the topology.
+ for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
+ types[j] = types[i];
+ types[target_layer] = type;
+ for (int k = 0; k < num_hw_threads; ++k) {
+ for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
+ hw_threads[k].ids[j] = hw_threads[k].ids[i];
+ hw_threads[k].ids[target_layer] = ids[k];
+ }
+ equivalent[type] = type;
+ depth++;
+}
+
+#if KMP_GROUP_AFFINITY
+// Insert the Windows Processor Group structure into the topology
+void kmp_topology_t::_insert_windows_proc_groups() {
+ // Do not insert the processor group structure for a single group
+ if (__kmp_num_proc_groups == 1)
+ return;
+ kmp_affin_mask_t *mask;
+ int *ids = (int *)__kmp_allocate(sizeof(int) * num_hw_threads);
+ KMP_CPU_ALLOC(mask);
+ for (int i = 0; i < num_hw_threads; ++i) {
+ KMP_CPU_ZERO(mask);
+ KMP_CPU_SET(hw_threads[i].os_id, mask);
+ ids[i] = __kmp_get_proc_group(mask);
+ }
+ KMP_CPU_FREE(mask);
+ _insert_layer(KMP_HW_PROC_GROUP, ids);
+ __kmp_free(ids);
+}
+#endif
+
// Remove layers that don't add information to the topology.
// This is done by having the layer take on the id = UNKNOWN_ID (-1)
void kmp_topology_t::_remove_radix1_layers() {
int preference[KMP_HW_LAST];
int top_index1, top_index2;
// Set up preference associative array
- preference[KMP_HW_PROC_GROUP] = 110;
- preference[KMP_HW_SOCKET] = 100;
+ preference[KMP_HW_SOCKET] = 110;
+ preference[KMP_HW_PROC_GROUP] = 100;
preference[KMP_HW_CORE] = 95;
preference[KMP_HW_THREAD] = 90;
preference[KMP_HW_NUMA] = 85;
@@ -305,6 +391,13 @@ void kmp_topology_t::_gather_enumeration_information() {
count[i] = 0;
ratio[i] = 0;
}
+ if (__kmp_is_hybrid_cpu()) {
+ for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+ core_types_count[i] = 0;
+ core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
+ }
+ }
+ int core_level = get_level(KMP_HW_CORE);
for (int i = 0; i < num_hw_threads; ++i) {
kmp_hw_thread_t &hw_thread = hw_threads[i];
for (int layer = 0; layer < depth; ++layer) {
@@ -320,6 +413,9 @@ void kmp_topology_t::_gather_enumeration_information() {
ratio[l] = max[l];
max[l] = 1;
}
+ // Figure out the number of each core type for hybrid CPUs
+ if (__kmp_is_hybrid_cpu() && core_level >= 0 && layer <= core_level)
+ _increment_core_type(hw_thread.core_type);
break;
}
}
@@ -406,7 +502,7 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
kmp_topology_t *retval;
// Allocate all data in one large allocation
size_t size = sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc +
- sizeof(int) * ndepth * 3;
+ sizeof(int) * (size_t)KMP_HW_LAST * 3;
char *bytes = (char *)__kmp_allocate(size);
retval = (kmp_topology_t *)bytes;
if (nproc > 0) {
@@ -419,8 +515,8 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
int *arr =
(int *)(bytes + sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc);
retval->types = (kmp_hw_t *)arr;
- retval->ratio = arr + ndepth;
- retval->count = arr + 2 * ndepth;
+ retval->ratio = arr + (size_t)KMP_HW_LAST;
+ retval->count = arr + 2 * (size_t)KMP_HW_LAST;
KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
for (int i = 0; i < ndepth; ++i) {
retval->types[i] = types[i];
@@ -478,6 +574,19 @@ void kmp_topology_t::dump() const {
}
printf("\n");
+ printf("* core_types:\n");
+ for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+ if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
+ printf(" %d %s core%c\n", core_types_count[i],
+ __kmp_hw_get_core_type_string(core_types[i]),
+ ((core_types_count[i] > 1) ? 's' : ' '));
+ } else {
+ if (i == 0)
+ printf("No hybrid information available\n");
+ break;
+ }
+ }
+
printf("* equivalent map:\n");
KMP_FOREACH_HW_TYPE(i) {
const char *key = __kmp_hw_get_keyword(i);
@@ -571,6 +680,15 @@ void kmp_topology_t::print(const char *env_var) const {
}
KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
+ if (__kmp_is_hybrid_cpu()) {
+ for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+ if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
+ break;
+ KMP_INFORM(TopologyHybrid, env_var, core_types_count[i],
+ __kmp_hw_get_core_type_string(core_types[i]));
+ }
+ }
+
if (num_hw_threads <= 0) {
__kmp_str_buf_free(&buf);
return;
@@ -585,6 +703,9 @@ void kmp_topology_t::print(const char *env_var) const {
__kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
__kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
}
+ if (__kmp_is_hybrid_cpu())
+ __kmp_str_buf_print(
+ &buf, "(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type));
KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
}
@@ -592,6 +713,9 @@ void kmp_topology_t::print(const char *env_var) const {
}
void kmp_topology_t::canonicalize() {
+#if KMP_GROUP_AFFINITY
+ _insert_windows_proc_groups();
+#endif
_remove_radix1_layers();
_gather_enumeration_information();
_discover_uniformity();
@@ -640,6 +764,25 @@ void kmp_topology_t::canonicalize() {
__kmp_hw_get_catalog_string(gran_type));
__kmp_affinity_gran = gran_type;
}
+#if KMP_GROUP_AFFINITY
+ // If more than one processor group exists, and the level of
+ // granularity specified by the user is too coarse, then the
+ // granularity must be adjusted "down" to processor group affinity
+ // because threads can only exist within one processor group.
+ // For example, if a user sets granularity=socket and there are two
+ // processor groups that cover a socket, then the runtime must
+ // restrict the granularity down to the processor group level.
+ if (__kmp_num_proc_groups > 1) {
+ int gran_depth = __kmp_topology->get_level(gran_type);
+ int proc_group_depth = __kmp_topology->get_level(KMP_HW_PROC_GROUP);
+ if (gran_depth >= 0 && proc_group_depth >= 0 &&
+ gran_depth < proc_group_depth) {
+ KMP_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY",
+ __kmp_hw_get_catalog_string(__kmp_affinity_gran));
+ __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP;
+ }
+ }
+#endif
__kmp_affinity_gran_levels = 0;
for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
__kmp_affinity_gran_levels++;
@@ -681,6 +824,9 @@ bool kmp_topology_t::filter_hw_subset() {
if (!__kmp_hw_subset)
return false;
+ // First, sort the KMP_HW_SUBSET items by the machine topology
+ __kmp_hw_subset->sort();
+
// Check to see if KMP_HW_SUBSET is a valid subset of the detected topology
int hw_subset_depth = __kmp_hw_subset->get_depth();
kmp_hw_t specified[KMP_HW_LAST];
@@ -712,23 +858,6 @@ bool kmp_topology_t::filter_hw_subset() {
}
specified[equivalent_type] = type;
- // Check to see if layers are in order
- if (i + 1 < hw_subset_depth) {
- kmp_hw_t next_type = get_equivalent_type(__kmp_hw_subset->at(i + 1).type);
- if (next_type == KMP_HW_UNKNOWN) {
- KMP_WARNING(
- AffHWSubsetNotExistGeneric,
- __kmp_hw_get_catalog_string(__kmp_hw_subset->at(i + 1).type));
- return false;
- }
- int next_topology_level = get_level(next_type);
- if (level > next_topology_level) {
- KMP_WARNING(AffHWSubsetOutOfOrder, __kmp_hw_get_catalog_string(type),
- __kmp_hw_get_catalog_string(next_type));
- return false;
- }
- }
-
// Check to see if each layer's num & offset parameters are valid
max_count = get_ratio(level);
if (max_count < 0 || num + offset > max_count) {
@@ -986,7 +1115,67 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
return buf;
}
-void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
+// Return (possibly empty) affinity mask representing the offline CPUs
+// Caller must free the mask
+kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
+ kmp_affin_mask_t *offline;
+ KMP_CPU_ALLOC(offline);
+ KMP_CPU_ZERO(offline);
+#if KMP_OS_LINUX
+ int n, begin_cpu, end_cpu;
+ kmp_safe_raii_file_t offline_file;
+ auto skip_ws = [](FILE *f) {
+ int c;
+ do {
+ c = fgetc(f);
+ } while (isspace(c));
+ if (c != EOF)
+ ungetc(c, f);
+ };
+ // File contains CSV of integer ranges representing the offline CPUs
+ // e.g., 1,2,4-7,9,11-15
+ int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
+ if (status != 0)
+ return offline;
+ while (!feof(offline_file)) {
+ skip_ws(offline_file);
+ n = fscanf(offline_file, "%d", &begin_cpu);
+ if (n != 1)
+ break;
+ skip_ws(offline_file);
+ int c = fgetc(offline_file);
+ if (c == EOF || c == ',') {
+ // Just single CPU
+ end_cpu = begin_cpu;
+ } else if (c == '-') {
+ // Range of CPUs
+ skip_ws(offline_file);
+ n = fscanf(offline_file, "%d", &end_cpu);
+ if (n != 1)
+ break;
+ skip_ws(offline_file);
+ c = fgetc(offline_file); // skip ','
+ } else {
+ // Syntax problem
+ break;
+ }
+ // Ensure a valid range of CPUs
+ if (begin_cpu < 0 || begin_cpu >= __kmp_xproc || end_cpu < 0 ||
+ end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
+ continue;
+ }
+ // Insert [begin_cpu, end_cpu] into offline mask
+ for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
+ KMP_CPU_SET(cpu, offline);
+ }
+ }
+#endif
+ return offline;
+}
+
+// Return the number of available procs
+int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
+ int avail_proc = 0;
KMP_CPU_ZERO(mask);
#if KMP_GROUP_AFFINITY
@@ -999,6 +1188,7 @@ void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
int num = __kmp_GetActiveProcessorCount(group);
for (i = 0; i < num; i++) {
KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
+ avail_proc++;
}
}
} else
@@ -1007,10 +1197,18 @@ void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
{
int proc;
+ kmp_affin_mask_t *offline_cpus = __kmp_affinity_get_offline_cpus();
for (proc = 0; proc < __kmp_xproc; proc++) {
+ // Skip offline CPUs
+ if (KMP_CPU_ISSET(proc, offline_cpus))
+ continue;
KMP_CPU_SET(proc, mask);
+ avail_proc++;
}
+ KMP_CPU_FREE(offline_cpus);
}
+
+ return avail_proc;
}
// All of the __kmp_affinity_create_*_map() routines should allocate the
@@ -1156,6 +1354,45 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
return true;
}
+ // Handle multiple types of cores if they exist on the system
+ int nr_cpu_kinds = hwloc_cpukinds_get_nr(tp, 0);
+
+ typedef struct kmp_hwloc_cpukinds_info_t {
+ int efficiency;
+ kmp_hw_core_type_t core_type;
+ hwloc_bitmap_t mask;
+ } kmp_hwloc_cpukinds_info_t;
+ kmp_hwloc_cpukinds_info_t *cpukinds = nullptr;
+
+ if (nr_cpu_kinds > 0) {
+ unsigned nr_infos;
+ struct hwloc_info_s *infos;
+ cpukinds = (kmp_hwloc_cpukinds_info_t *)__kmp_allocate(
+ sizeof(kmp_hwloc_cpukinds_info_t) * nr_cpu_kinds);
+ for (unsigned idx = 0; idx < (unsigned)nr_cpu_kinds; ++idx) {
+ cpukinds[idx].efficiency = -1;
+ cpukinds[idx].core_type = KMP_HW_CORE_TYPE_UNKNOWN;
+ cpukinds[idx].mask = hwloc_bitmap_alloc();
+ if (hwloc_cpukinds_get_info(tp, idx, cpukinds[idx].mask,
+ &cpukinds[idx].efficiency, &nr_infos, &infos,
+ 0) == 0) {
+ for (unsigned i = 0; i < nr_infos; ++i) {
+ if (__kmp_str_match("CoreType", 8, infos[i].name)) {
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+ if (__kmp_str_match("IntelAtom", 9, infos[i].value)) {
+ cpukinds[idx].core_type = KMP_HW_CORE_TYPE_ATOM;
+ break;
+ } else if (__kmp_str_match("IntelCore", 9, infos[i].value)) {
+ cpukinds[idx].core_type = KMP_HW_CORE_TYPE_CORE;
+ break;
+ }
+#endif
+ }
+ }
+ }
+ }
+ }
+
root = hwloc_get_root_obj(tp);
// Figure out the depth and types in the topology
@@ -1215,6 +1452,18 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
hw_thread.clear();
hw_thread.ids[index] = pu->logical_index;
hw_thread.os_id = pu->os_index;
+ // If multiple core types, then set that attribute for the hardware thread
+ if (cpukinds) {
+ int cpukind_index = -1;
+ for (int i = 0; i < nr_cpu_kinds; ++i) {
+ if (hwloc_bitmap_isset(cpukinds[i].mask, hw_thread.os_id)) {
+ cpukind_index = i;
+ break;
+ }
+ }
+ if (cpukind_index >= 0)
+ hw_thread.core_type = cpukinds[cpukind_index].core_type;
+ }
index--;
}
obj = pu;
@@ -1258,6 +1507,13 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
if (included)
hw_thread_index++;
}
+
+ // Free the core types information
+ if (cpukinds) {
+ for (int idx = 0; idx < nr_cpu_kinds; ++idx)
+ hwloc_bitmap_free(cpukinds[idx].mask);
+ __kmp_free(cpukinds);
+ }
__kmp_topology->sort_ids();
return true;
}
@@ -1782,6 +2038,16 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
return true;
}
+// Hybrid cpu detection using CPUID.1A
+// Thread should be pinned to processor already
+static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type,
+ unsigned *native_model_id) {
+ kmp_cpuid buf;
+ __kmp_x86_cpuid(0x1a, 0, &buf);
+ *type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
+ *native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
+}
+
// Intel(R) microarchitecture code name Nehalem, Dunnington and later
// architectures support a newer interface for specifying the x2APIC Ids,
// based on CPUID.B or CPUID.1F
@@ -2051,6 +2317,13 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
}
}
+ // Hybrid information
+ if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
+ kmp_hw_core_type_t type;
+ unsigned native_model_id;
+ __kmp_get_hybrid_info(&type, &native_model_id);
+ hw_thread.core_type = type;
+ }
hw_thread_index++;
}
KMP_ASSERT(hw_thread_index > 0);
@@ -2386,7 +2659,10 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
unsigned val;
if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
goto no_val;
- KMP_ASSERT(nodeIdIndex + level <= maxIndex);
+ // validate the input before using level:
+ if (level > (unsigned)__kmp_xproc) { // level is too big
+ level = __kmp_xproc;
+ }
if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
goto dup_field;
threadInfo[num_avail][nodeIdIndex + level] = val;
@@ -3497,8 +3773,8 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_affin_fullMask);
KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
}
- __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
- __kmp_avail_proc = __kmp_xproc;
+ __kmp_avail_proc =
+ __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
#if KMP_OS_WINDOWS
// Set the process affinity mask since threads' affinity
// masks must be subset of process mask in Windows* OS
@@ -4145,14 +4421,19 @@ int __kmp_aux_set_affinity(void **mask) {
int __kmp_aux_get_affinity(void **mask) {
int gtid;
int retval;
+#if KMP_OS_WINDOWS || KMP_DEBUG
kmp_info_t *th;
-
+#endif
if (!KMP_AFFINITY_CAPABLE()) {
return -1;
}
gtid = __kmp_entry_gtid();
+#if KMP_OS_WINDOWS || KMP_DEBUG
th = __kmp_threads[gtid];
+#else
+ (void)gtid; // unused variable
+#endif
KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
KA_TRACE(
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index 8e72922d2c6e..76ba38bc8fc2 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -598,6 +598,17 @@ class KMPNativeAffinity : public KMPAffinity {
#endif /* KMP_OS_WINDOWS */
#endif /* KMP_AFFINITY_SUPPORTED */
+typedef enum kmp_hw_core_type_t {
+ KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+ KMP_HW_CORE_TYPE_ATOM = 0x20,
+ KMP_HW_CORE_TYPE_CORE = 0x40,
+ KMP_HW_MAX_NUM_CORE_TYPES = 3,
+#else
+ KMP_HW_MAX_NUM_CORE_TYPES = 1,
+#endif
+} kmp_hw_core_type_t;
+
class kmp_hw_thread_t {
public:
static const int UNKNOWN_ID = -1;
@@ -607,11 +618,14 @@ public:
int sub_ids[KMP_HW_LAST];
bool leader;
int os_id;
+ kmp_hw_core_type_t core_type;
+
void print() const;
void clear() {
for (int i = 0; i < (int)KMP_HW_LAST; ++i)
ids[i] = UNKNOWN_ID;
leader = false;
+ core_type = KMP_HW_CORE_TYPE_UNKNOWN;
}
};
@@ -624,7 +638,9 @@ class kmp_topology_t {
int depth;
- // The following arrays are all 'depth' long
+ // The following arrays are all 'depth' long and have been
+ // allocated to hold up to KMP_HW_LAST number of objects if
+ // needed so layers can be added without reallocation of any array
// Orderd array of the types in the topology
kmp_hw_t *types;
@@ -637,6 +653,11 @@ class kmp_topology_t {
// Storage containing the absolute number of each topology layer
int *count;
+ // Storage containing the core types and the number of
+ // each core type for hybrid processors
+ kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
+ int core_types_count[KMP_HW_MAX_NUM_CORE_TYPES];
+
// The hardware threads array
// hw_threads is num_hw_threads long
// Each hw_thread's ids and sub_ids are depth deep
@@ -652,6 +673,14 @@ class kmp_topology_t {
// Flags describing the topology
flags_t flags;
+ // Insert a new topology layer after allocation
+ void _insert_layer(kmp_hw_t type, const int *ids);
+
+#if KMP_GROUP_AFFINITY
+ // Insert topology information about Windows Processor groups
+ void _insert_windows_proc_groups();
+#endif
+
// Count each item & get the num x's per y
// e.g., get the number of cores and the number of threads per core
// for each (x, y) in (KMP_HW_* , KMP_HW_*)
@@ -675,6 +704,20 @@ class kmp_topology_t {
// Set the last level cache equivalent type
void _set_last_level_cache();
+ // Increments the number of cores of type 'type'
+ void _increment_core_type(kmp_hw_core_type_t type) {
+ for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
+ if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) {
+ core_types[i] = type;
+ core_types_count[i] = 1;
+ break;
+ } else if (core_types[i] == type) {
+ core_types_count[i]++;
+ break;
+ }
+ }
+ }
+
public:
// Force use of allocate()/deallocate()
kmp_topology_t() = delete;
@@ -773,6 +816,7 @@ public:
void print(const char *env_var = "KMP_AFFINITY") const;
void dump() const;
};
+extern kmp_topology_t *__kmp_topology;
class kmp_hw_subset_t {
public:
@@ -790,6 +834,15 @@ private:
bool absolute;
// The set must be able to handle up to KMP_HW_LAST number of layers
KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
+ // Sorting the KMP_HW_SUBSET items to follow topology order
+ // All unknown topology types will be at the beginning of the subset
+ static int hw_subset_compare(const void *i1, const void *i2) {
+ kmp_hw_t type1 = ((const item_t *)i1)->type;
+ kmp_hw_t type2 = ((const item_t *)i2)->type;
+ int level1 = __kmp_topology->get_level(type1);
+ int level2 = __kmp_topology->get_level(type2);
+ return level1 - level2;
+ }
public:
// Force use of allocate()/deallocate()
@@ -848,6 +901,10 @@ public:
}
depth--;
}
+ void sort() {
+ KMP_DEBUG_ASSERT(__kmp_topology);
+ qsort(items, depth, sizeof(item_t), hw_subset_compare);
+ }
bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
void dump() const {
printf("**********************\n");
@@ -863,8 +920,6 @@ public:
printf("**********************\n");
}
};
-
-extern kmp_topology_t *__kmp_topology;
extern kmp_hw_subset_t *__kmp_hw_subset;
/* A structure for holding machine-specific hierarchy info to be computed once
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 857855cf12d6..0f76906714b1 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -883,7 +883,7 @@ static void bpool(kmp_info_t *th, void *buf, bufsize len) {
__kmp_bget_dequeue(th); /* Release any queued buffers */
#ifdef SizeQuant
- len &= ~(SizeQuant - 1);
+ len &= ~((bufsize)(SizeQuant - 1));
#endif
if (thr->pool_len == 0) {
thr->pool_len = len;
@@ -1484,31 +1484,74 @@ typedef struct kmp_mem_desc { // Memory block descriptor
void *ptr_align; // Pointer to aligned memory, returned
kmp_allocator_t *allocator; // allocator
} kmp_mem_desc_t;
-static int alignment = sizeof(void *); // let's align to pointer size
+static int alignment = sizeof(void *); // align to pointer size by default
+// external interfaces are wrappers over internal implementation
void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
+ KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
+ void *ptr = __kmp_alloc(gtid, 0, size, allocator);
+ KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
+ return ptr;
+}
+
+void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
+ omp_allocator_handle_t allocator) {
+ KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
+ (int)size, allocator));
+ void *ptr = __kmp_alloc(gtid, algn, size, allocator);
+ KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
+ return ptr;
+}
+
+void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
+ omp_allocator_handle_t allocator) {
+ KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
+ (int)size, allocator));
+ void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
+ KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
+ return ptr;
+}
+
+void *__kmpc_realloc(int gtid, void *ptr, size_t size,
+ omp_allocator_handle_t allocator,
+ omp_allocator_handle_t free_allocator) {
+ KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
+ allocator, free_allocator));
+ void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
+ KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
+ return nptr;
+}
+
+void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
+ KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
+ ___kmpc_free(gtid, ptr, allocator);
+ KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
+ return;
+}
+
+// internal implementation, called from inside the library
+void *__kmp_alloc(int gtid, size_t algn, size_t size,
+ omp_allocator_handle_t allocator) {
void *ptr = NULL;
kmp_allocator_t *al;
KMP_DEBUG_ASSERT(__kmp_init_serial);
-
if (size == 0)
return NULL;
-
if (allocator == omp_null_allocator)
allocator = __kmp_threads[gtid]->th.th_def_allocator;
- KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
- al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
+ al = RCAST(kmp_allocator_t *, allocator);
int sz_desc = sizeof(kmp_mem_desc_t);
kmp_mem_desc_t desc;
kmp_uintptr_t addr; // address returned by allocator
kmp_uintptr_t addr_align; // address to return to caller
kmp_uintptr_t addr_descr; // address of memory block descriptor
- int align = alignment; // default alignment
- if (allocator > kmp_max_mem_alloc && al->alignment > 0) {
- align = al->alignment; // alignment requested by user
- }
+ size_t align = alignment; // default alignment
+ if (allocator > kmp_max_mem_alloc && al->alignment > align)
+ align = al->alignment; // alignment required by allocator trait
+ if (align < algn)
+ align = algn; // max of allocator trait, parameter and sizeof(void*)
desc.size_orig = size;
desc.size_a = size + sz_desc + align;
@@ -1537,7 +1580,7 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
- return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
} // else ptr == NULL;
} else {
// pool has enough space
@@ -1551,7 +1594,7 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
- return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
}
}
}
@@ -1567,7 +1610,7 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
- return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
}
}
}
@@ -1623,7 +1666,7 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
} else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
- return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
} // else ptr == NULL;
} else {
// pool has enough space
@@ -1639,7 +1682,7 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal alloc
}
- KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
+ KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
if (ptr == NULL)
return NULL;
@@ -1653,12 +1696,11 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
*((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
KMP_MB();
- KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid));
return desc.ptr_align;
}
-void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
- omp_allocator_handle_t allocator) {
+void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
+ omp_allocator_handle_t allocator) {
void *ptr = NULL;
kmp_allocator_t *al;
KMP_DEBUG_ASSERT(__kmp_init_serial);
@@ -1666,10 +1708,7 @@ void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
if (allocator == omp_null_allocator)
allocator = __kmp_threads[gtid]->th.th_def_allocator;
- KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
- (int)size, allocator));
-
- al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
+ al = RCAST(kmp_allocator_t *, allocator);
if (nmemb == 0 || size == 0)
return ptr;
@@ -1681,31 +1720,27 @@ void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
return ptr;
}
- ptr = __kmpc_alloc(gtid, nmemb * size, allocator);
+ ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
if (ptr) {
memset(ptr, 0x00, nmemb * size);
}
- KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
return ptr;
}
-void *__kmpc_realloc(int gtid, void *ptr, size_t size,
- omp_allocator_handle_t allocator,
- omp_allocator_handle_t free_allocator) {
+void *__kmp_realloc(int gtid, void *ptr, size_t size,
+ omp_allocator_handle_t allocator,
+ omp_allocator_handle_t free_allocator) {
void *nptr = NULL;
KMP_DEBUG_ASSERT(__kmp_init_serial);
if (size == 0) {
if (ptr != NULL)
- __kmpc_free(gtid, ptr, free_allocator);
+ ___kmpc_free(gtid, ptr, free_allocator);
return nptr;
}
- KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
- allocator, free_allocator));
-
- nptr = __kmpc_alloc(gtid, size, allocator);
+ nptr = __kmp_alloc(gtid, 0, size, allocator);
if (nptr != NULL && ptr != NULL) {
kmp_mem_desc_t desc;
@@ -1724,15 +1759,13 @@ void *__kmpc_realloc(int gtid, void *ptr, size_t size,
}
if (nptr != NULL) {
- __kmpc_free(gtid, ptr, free_allocator);
+ ___kmpc_free(gtid, ptr, free_allocator);
}
- KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
return nptr;
}
-void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
- KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
+void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
if (ptr == NULL)
return;
@@ -1792,8 +1825,6 @@ void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
}
__kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
}
- KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
- allocator));
}
/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
@@ -1924,9 +1955,10 @@ void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
In debug mode, fill the memory block with 0xEF before call to free(). */
void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
kmp_mem_descr_t descr;
+#if KMP_DEBUG
kmp_uintptr_t addr_allocated; // Address returned by malloc().
kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
-
+#endif
KE_TRACE(25,
("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
KMP_ASSERT(ptr != NULL);
@@ -1938,18 +1970,15 @@ void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
"ptr_aligned=%p, size_aligned=%d\n",
descr.ptr_allocated, (int)descr.size_allocated,
descr.ptr_aligned, (int)descr.size_aligned));
-
+#if KMP_DEBUG
addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
-
KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
addr_allocated + descr.size_allocated);
-
-#ifdef KMP_DEBUG
memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
// Fill memory block with 0xEF, it helps catch using freed memory.
#endif
diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp
index fcc06216a4fa..83d646054f63 100644
--- a/openmp/runtime/src/kmp_atomic.cpp
+++ b/openmp/runtime/src/kmp_atomic.cpp
@@ -732,7 +732,7 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \
+ (*lhs) = (TYPE)((*lhs)OP rhs); \
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
// ------------------------------------------------------------------------
@@ -791,14 +791,14 @@ static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
{ \
TYPE old_value, new_value; \
old_value = *(TYPE volatile *)lhs; \
- new_value = (TYPE)(old_value OP((TYPE)rhs)); \
+ new_value = (TYPE)(old_value OP rhs); \
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
(kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
*VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
KMP_DO_PAUSE; \
\
old_value = *(TYPE volatile *)lhs; \
- new_value = (TYPE)(old_value OP((TYPE)rhs)); \
+ new_value = (TYPE)(old_value OP rhs); \
} \
}
@@ -1235,6 +1235,10 @@ MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
KMP_ARCH_X86) // __kmpc_atomic_float8_max
MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
KMP_ARCH_X86) // __kmpc_atomic_float8_min
+MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
+ 1) // __kmpc_atomic_float10_max
+MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
+ 1) // __kmpc_atomic_float10_min
#if KMP_HAVE_QUAD
MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1) // __kmpc_atomic_float16_max
@@ -2717,6 +2721,10 @@ MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
+MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
+ 1) // __kmpc_atomic_float10_max_cpt
+MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
+ 1) // __kmpc_atomic_float10_min_cpt
#if KMP_HAVE_QUAD
MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
1) // __kmpc_atomic_float16_max_cpt
@@ -3686,6 +3694,171 @@ void __kmpc_atomic_end(void) {
__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
}
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+// OpenMP 5.1 compare and swap
+
+/*!
+@param loc Source code location
+@param gtid Global thread id
+@param x Memory location to operate on
+@param e Expected value
+@param d Desired value
+@return Result of comparison
+
+Implements Compare And Swap atomic operation.
+
+Sample code:
+#pragma omp atomic compare update capture
+ { r = x == e; if(r) { x = d; } }
+*/
+bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
+ return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
+}
+bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
+ short d) {
+ return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
+}
+bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
+ kmp_int32 d) {
+ return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
+}
+bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
+ kmp_int64 d) {
+ return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
+}
+
+/*!
+@param loc Source code location
+@param gtid Global thread id
+@param x Memory location to operate on
+@param e Expected value
+@param d Desired value
+@return Old value of x
+
+Implements Compare And Swap atomic operation.
+
+Sample code:
+#pragma omp atomic compare update capture
+ { v = x; if (x == e) { x = d; } }
+*/
+char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
+ return KMP_COMPARE_AND_STORE_RET8(x, e, d);
+}
+short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
+ short d) {
+ return KMP_COMPARE_AND_STORE_RET16(x, e, d);
+}
+kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
+ kmp_int32 e, kmp_int32 d) {
+ return KMP_COMPARE_AND_STORE_RET32(x, e, d);
+}
+kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
+ kmp_int64 e, kmp_int64 d) {
+ return KMP_COMPARE_AND_STORE_RET64(x, e, d);
+}
+
+/*!
+@param loc Source code location
+@param gtid Global thread id
+@param x Memory location to operate on
+@param e Expected value
+@param d Desired value
+@param pv Captured value location
+@return Result of comparison
+
+Implements Compare And Swap + Capture atomic operation.
+
+v gets old valie of x if comparison failed, untouched otherwise.
+Sample code:
+#pragma omp atomic compare update capture
+ { r = x == e; if(r) { x = d; } else { v = x; } }
+*/
+bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
+ char d, char *pv) {
+ char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
+ if (old == e)
+ return true;
+ KMP_ASSERT(pv != NULL);
+ *pv = old;
+ return false;
+}
+bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
+ short d, short *pv) {
+ short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
+ if (old == e)
+ return true;
+ KMP_ASSERT(pv != NULL);
+ *pv = old;
+ return false;
+}
+bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
+ kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
+ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
+ if (old == e)
+ return true;
+ KMP_ASSERT(pv != NULL);
+ *pv = old;
+ return false;
+}
+bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
+ kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
+ kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
+ if (old == e)
+ return true;
+ KMP_ASSERT(pv != NULL);
+ *pv = old;
+ return false;
+}
+
+/*!
+@param loc Source code location
+@param gtid Global thread id
+@param x Memory location to operate on
+@param e Expected value
+@param d Desired value
+@param pv Captured value location
+@return Old value of x
+
+Implements Compare And Swap + Capture atomic operation.
+
+v gets new valie of x.
+Sample code:
+#pragma omp atomic compare update capture
+ { if (x == e) { x = d; }; v = x; }
+*/
+char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
+ char d, char *pv) {
+ char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
+ KMP_ASSERT(pv != NULL);
+ *pv = old == e ? d : old;
+ return old;
+}
+short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
+ short d, short *pv) {
+ short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
+ KMP_ASSERT(pv != NULL);
+ *pv = old == e ? d : old;
+ return old;
+}
+kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
+ kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
+ kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
+ KMP_ASSERT(pv != NULL);
+ *pv = old == e ? d : old;
+ return old;
+}
+kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
+ kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
+ kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
+ KMP_ASSERT(pv != NULL);
+ *pv = old == e ? d : old;
+ return old;
+}
+
+// End OpenMP 5.1 compare + capture
+#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
+
/*!
@}
*/
diff --git a/openmp/runtime/src/kmp_atomic.h b/openmp/runtime/src/kmp_atomic.h
index 6a0827aaf1ea..079b917285b1 100644
--- a/openmp/runtime/src/kmp_atomic.h
+++ b/openmp/runtime/src/kmp_atomic.h
@@ -578,6 +578,10 @@ void __kmpc_atomic_float8_max(ident_t *id_ref, int gtid, kmp_real64 *lhs,
kmp_real64 rhs);
void __kmpc_atomic_float8_min(ident_t *id_ref, int gtid, kmp_real64 *lhs,
kmp_real64 rhs);
+void __kmpc_atomic_float10_max(ident_t *id_ref, int gtid, long double *lhs,
+ long double rhs);
+void __kmpc_atomic_float10_min(ident_t *id_ref, int gtid, long double *lhs,
+ long double rhs);
#if KMP_HAVE_QUAD
void __kmpc_atomic_float16_max(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs,
QUAD_LEGACY rhs);
@@ -1254,6 +1258,12 @@ kmp_real64 __kmpc_atomic_float8_max_cpt(ident_t *id_ref, int gtid,
kmp_real64 __kmpc_atomic_float8_min_cpt(ident_t *id_ref, int gtid,
kmp_real64 *lhs, kmp_real64 rhs,
int flag);
+long double __kmpc_atomic_float10_max_cpt(ident_t *id_ref, int gtid,
+ long double *lhs, long double rhs,
+ int flag);
+long double __kmpc_atomic_float10_min_cpt(ident_t *id_ref, int gtid,
+ long double *lhs, long double rhs,
+ int flag);
#if KMP_HAVE_QUAD
QUAD_LEGACY __kmpc_atomic_float16_max_cpt(ident_t *id_ref, int gtid,
QUAD_LEGACY *lhs, QUAD_LEGACY rhs,
@@ -1756,6 +1766,78 @@ long double __kmpc_atomic_float10_div_cpt_rev_fp(ident_t *id_ref, int gtid,
// End of OpenMP 4.0 capture
+// OpenMP 5.1 compare and swap
+/*
+ __kmpc_atomic_bool_1_cas
+ __kmpc_atomic_bool_2_cas
+ __kmpc_atomic_bool_4_cas
+ __kmpc_atomic_bool_8_cas
+ __kmpc_atomic_val_1_cas
+ __kmpc_atomic_val_2_cas
+ __kmpc_atomic_val_4_cas
+ __kmpc_atomic_val_8_cas
+ __kmpc_atomic_bool_1_cas_cpt
+ __kmpc_atomic_bool_2_cas_cpt
+ __kmpc_atomic_bool_4_cas_cpt
+ __kmpc_atomic_bool_8_cas_cpt
+ __kmpc_atomic_val_1_cas_cpt
+ __kmpc_atomic_val_2_cas_cpt
+ __kmpc_atomic_val_4_cas_cpt
+ __kmpc_atomic_val_8_cas_cpt
+*/
+// In all interfaces of CAS (Compare And Swap):
+// r is the boolean result of comparison
+// x is memory location to operate on
+// e is expected (old) value
+// d is desired (new) value
+// pv is pointer to captured value v whose location may coincide with e
+
+// { r = x == e; if(r) { x = d; } }
+// functions return result of comparison
+bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d);
+bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
+ short d);
+bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
+ kmp_int32 d);
+bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
+ kmp_int64 d);
+
+// { v = x; if (x == e) { x = d; } }
+// functions return old value
+char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d);
+short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
+ short d);
+kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
+ kmp_int32 e, kmp_int32 d);
+kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
+ kmp_int64 e, kmp_int64 d);
+
+// { r = x == e; if(r) { x = d; } else { v = x; } }
+// v gets old value if comparison failed, untouched otherwise
+// functions return result of comparison
+bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
+ char d, char *pv);
+bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
+ short d, short *pv);
+bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
+ kmp_int32 e, kmp_int32 d, kmp_int32 *pv);
+bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
+ kmp_int64 e, kmp_int64 d, kmp_int64 *pv);
+
+// { if (x == e) { x = d; }; v = x; }
+// v gets old value if comparison failed, new value otherwise
+// functions return old value
+char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
+ char d, char *pv);
+short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
+ short d, short *pv);
+kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
+ kmp_int32 e, kmp_int32 d, kmp_int32 *pv);
+kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
+ kmp_int64 e, kmp_int64 d, kmp_int64 *pv);
+
+// End OpenMP 5.1 compare + capture
+
#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
/* ------------------------------------------------------------------------ */
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 93112156a1ef..97bf9811bcd0 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -10,12 +10,14 @@
//
//===----------------------------------------------------------------------===//
-#include "kmp.h"
#include "kmp_wait_release.h"
+#include "kmp_barrier.h"
#include "kmp_itt.h"
#include "kmp_os.h"
#include "kmp_stats.h"
#include "ompt-specific.h"
+// for distributed barrier
+#include "kmp_affinity.h"
#if KMP_MIC
#include <immintrin.h>
@@ -38,6 +40,516 @@
void __kmp_print_structure(void); // Forward declaration
// ---------------------------- Barrier Algorithms ----------------------------
+// Distributed barrier
+
+// Compute how many threads to have polling each cache-line.
+// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
+void distributedBarrier::computeVarsForN(size_t n) {
+ int nsockets = 1;
+ if (__kmp_topology) {
+ int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
+ int core_level = __kmp_topology->get_level(KMP_HW_CORE);
+ int ncores_per_socket =
+ __kmp_topology->calculate_ratio(core_level, socket_level);
+ nsockets = __kmp_topology->get_count(socket_level);
+
+ if (nsockets <= 0)
+ nsockets = 1;
+ if (ncores_per_socket <= 0)
+ ncores_per_socket = 1;
+
+ threads_per_go = ncores_per_socket >> 1;
+ if (!fix_threads_per_go) {
+ // Minimize num_gos
+ if (threads_per_go > 4) {
+ if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
+ threads_per_go = threads_per_go >> 1;
+ }
+ if (threads_per_go > 4 && nsockets == 1)
+ threads_per_go = threads_per_go >> 1;
+ }
+ }
+ if (threads_per_go == 0)
+ threads_per_go = 1;
+ fix_threads_per_go = true;
+ num_gos = n / threads_per_go;
+ if (n % threads_per_go)
+ num_gos++;
+ if (nsockets == 1 || num_gos == 1)
+ num_groups = 1;
+ else {
+ num_groups = num_gos / nsockets;
+ if (num_gos % nsockets)
+ num_groups++;
+ }
+ if (num_groups <= 0)
+ num_groups = 1;
+ gos_per_group = num_gos / num_groups;
+ if (num_gos % num_groups)
+ gos_per_group++;
+ threads_per_group = threads_per_go * gos_per_group;
+ } else {
+ num_gos = n / threads_per_go;
+ if (n % threads_per_go)
+ num_gos++;
+ if (num_gos == 1)
+ num_groups = 1;
+ else {
+ num_groups = num_gos / 2;
+ if (num_gos % 2)
+ num_groups++;
+ }
+ gos_per_group = num_gos / num_groups;
+ if (num_gos % num_groups)
+ gos_per_group++;
+ threads_per_group = threads_per_go * gos_per_group;
+ }
+}
+
+void distributedBarrier::computeGo(size_t n) {
+ // Minimize num_gos
+ for (num_gos = 1;; num_gos++)
+ if (IDEAL_CONTENTION * num_gos >= n)
+ break;
+ threads_per_go = n / num_gos;
+ if (n % num_gos)
+ threads_per_go++;
+ while (num_gos > MAX_GOS) {
+ threads_per_go++;
+ num_gos = n / threads_per_go;
+ if (n % threads_per_go)
+ num_gos++;
+ }
+ computeVarsForN(n);
+}
+
+// This function is to resize the barrier arrays when the new number of threads
+// exceeds max_threads, which is the current size of all the arrays
+void distributedBarrier::resize(size_t nthr) {
+ KMP_DEBUG_ASSERT(nthr > max_threads);
+
+ // expand to requested size * 2
+ max_threads = nthr * 2;
+
+ // allocate arrays to new max threads
+ for (int i = 0; i < MAX_ITERS; ++i) {
+ if (flags[i])
+ flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
+ max_threads * sizeof(flags_s));
+ else
+ flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s));
+ }
+
+ if (go)
+ go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s));
+ else
+ go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s));
+
+ if (iter)
+ iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s));
+ else
+ iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s));
+
+ if (sleep)
+ sleep =
+ (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s));
+ else
+ sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s));
+}
+
+// This function is to set all the go flags that threads might be waiting
+// on, and when blocktime is not infinite, it should be followed by a wake-up
+// call to each thread
+kmp_uint64 distributedBarrier::go_release() {
+ kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
+ for (size_t j = 0; j < num_gos; j++) {
+ go[j].go.store(next_go);
+ }
+ return next_go;
+}
+
+void distributedBarrier::go_reset() {
+ for (size_t j = 0; j < max_threads; ++j) {
+ for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
+ flags[i][j].stillNeed = 1;
+ }
+ go[j].go.store(0);
+ iter[j].iter = 0;
+ }
+}
+
+// This function inits/re-inits the distributed barrier for a particular number
+// of threads. If a resize of arrays is needed, it calls the resize function.
+void distributedBarrier::init(size_t nthr) {
+ size_t old_max = max_threads;
+ if (nthr > max_threads) { // need more space in arrays
+ resize(nthr);
+ }
+
+ for (size_t i = 0; i < max_threads; i++) {
+ for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
+ flags[j][i].stillNeed = 1;
+ }
+ go[i].go.store(0);
+ iter[i].iter = 0;
+ if (i >= old_max)
+ sleep[i].sleep = false;
+ }
+
+ // Recalculate num_gos, etc. based on new nthr
+ computeVarsForN(nthr);
+
+ num_threads = nthr;
+
+ if (team_icvs == NULL)
+ team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
+}
+
+// This function is used only when KMP_BLOCKTIME is not infinite.
+// static
+void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
+ size_t start, size_t stop, size_t inc,
+ size_t tid) {
+ KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
+ if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+ return;
+
+ kmp_info_t **other_threads = team->t.t_threads;
+ for (size_t thr = start; thr < stop; thr += inc) {
+ KMP_DEBUG_ASSERT(other_threads[thr]);
+ int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
+ // Wake up worker regardless of if it appears to be sleeping or not
+ __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
+ }
+}
+
+static void __kmp_dist_barrier_gather(
+ enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+ void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
+ kmp_team_t *team;
+ distributedBarrier *b;
+ kmp_info_t **other_threads;
+ kmp_uint64 my_current_iter, my_next_iter;
+ kmp_uint32 nproc;
+ bool group_leader;
+
+ team = this_thr->th.th_team;
+ nproc = this_thr->th.th_team_nproc;
+ other_threads = team->t.t_threads;
+ b = team->t.b;
+ my_current_iter = b->iter[tid].iter;
+ my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
+ group_leader = ((tid % b->threads_per_group) == 0);
+
+ KA_TRACE(20,
+ ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
+ gtid, team->t.t_id, tid, bt));
+
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ // Barrier imbalance - save arrive time to the thread
+ if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
+ this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
+ __itt_get_timestamp();
+ }
+#endif
+
+ if (group_leader) {
+ // Start from the thread after the group leader
+ size_t group_start = tid + 1;
+ size_t group_end = tid + b->threads_per_group;
+ size_t threads_pending = 0;
+
+ if (group_end > nproc)
+ group_end = nproc;
+ do { // wait for threads in my group
+ threads_pending = 0;
+ // Check all the flags every time to avoid branch misspredict
+ for (size_t thr = group_start; thr < group_end; thr++) {
+ // Each thread uses a different cache line
+ threads_pending += b->flags[my_current_iter][thr].stillNeed;
+ }
+ // Execute tasks here
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
+ kmp_task_team_t *task_team = this_thr->th.th_task_team;
+ if (task_team != NULL) {
+ if (TCR_SYNC_4(task_team->tt.tt_active)) {
+ if (KMP_TASKING_ENABLED(task_team)) {
+ int tasks_completed = FALSE;
+ __kmp_atomic_execute_tasks_64(
+ this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
+ &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+ } else
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ }
+ } else {
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ } // if
+ }
+ if (TCR_4(__kmp_global.g.g_done)) {
+ if (__kmp_global.g.g_abort)
+ __kmp_abort_thread();
+ break;
+ } else if (__kmp_tasking_mode != tskm_immediate_exec &&
+ this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
+ this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
+ }
+ } while (threads_pending > 0);
+
+ if (reduce) { // Perform reduction if needed
+ OMPT_REDUCTION_DECL(this_thr, gtid);
+ OMPT_REDUCTION_BEGIN;
+ // Group leader reduces all threads in group
+ for (size_t thr = group_start; thr < group_end; thr++) {
+ (*reduce)(this_thr->th.th_local.reduce_data,
+ other_threads[thr]->th.th_local.reduce_data);
+ }
+ OMPT_REDUCTION_END;
+ }
+
+ // Set flag for next iteration
+ b->flags[my_next_iter][tid].stillNeed = 1;
+ // Each thread uses a different cache line; resets stillNeed to 0 to
+ // indicate it has reached the barrier
+ b->flags[my_current_iter][tid].stillNeed = 0;
+
+ do { // wait for all group leaders
+ threads_pending = 0;
+ for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
+ threads_pending += b->flags[my_current_iter][thr].stillNeed;
+ }
+ // Execute tasks here
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
+ kmp_task_team_t *task_team = this_thr->th.th_task_team;
+ if (task_team != NULL) {
+ if (TCR_SYNC_4(task_team->tt.tt_active)) {
+ if (KMP_TASKING_ENABLED(task_team)) {
+ int tasks_completed = FALSE;
+ __kmp_atomic_execute_tasks_64(
+ this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
+ &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+ } else
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ }
+ } else {
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ } // if
+ }
+ if (TCR_4(__kmp_global.g.g_done)) {
+ if (__kmp_global.g.g_abort)
+ __kmp_abort_thread();
+ break;
+ } else if (__kmp_tasking_mode != tskm_immediate_exec &&
+ this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
+ this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
+ }
+ } while (threads_pending > 0);
+
+ if (reduce) { // Perform reduction if needed
+ if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders
+ OMPT_REDUCTION_DECL(this_thr, gtid);
+ OMPT_REDUCTION_BEGIN;
+ for (size_t thr = b->threads_per_group; thr < nproc;
+ thr += b->threads_per_group) {
+ (*reduce)(this_thr->th.th_local.reduce_data,
+ other_threads[thr]->th.th_local.reduce_data);
+ }
+ OMPT_REDUCTION_END;
+ }
+ }
+ } else {
+ // Set flag for next iteration
+ b->flags[my_next_iter][tid].stillNeed = 1;
+ // Each thread uses a different cache line; resets stillNeed to 0 to
+ // indicate it has reached the barrier
+ b->flags[my_current_iter][tid].stillNeed = 0;
+ }
+
+ KMP_MFENCE();
+
+ KA_TRACE(20,
+ ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
+ gtid, team->t.t_id, tid, bt));
+}
+
+static void __kmp_dist_barrier_release(
+ enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
+ int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
+ kmp_team_t *team;
+ distributedBarrier *b;
+ kmp_bstate_t *thr_bar;
+ kmp_uint64 my_current_iter, next_go;
+ size_t my_go_index;
+ bool group_leader;
+
+ KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
+ gtid, tid, bt));
+
+ thr_bar = &this_thr->th.th_bar[bt].bb;
+
+ if (!KMP_MASTER_TID(tid)) {
+ // workers and non-master group leaders need to check their presence in team
+ do {
+ if (this_thr->th.th_used_in_team.load() != 1 &&
+ this_thr->th.th_used_in_team.load() != 3) {
+ // Thread is not in use in a team. Wait on location in tid's thread
+ // struct. The 0 value tells anyone looking that this thread is spinning
+ // or sleeping until this location becomes 3 again; 3 is the transition
+ // state to get to 1 which is waiting on go and being in the team
+ kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
+ if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
+ 0) ||
+ this_thr->th.th_used_in_team.load() == 0) {
+ my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
+ }
+#if USE_ITT_BUILD && USE_ITT_NOTIFY
+ if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
+ // In fork barrier where we could not get the object reliably
+ itt_sync_obj =
+ __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
+ // Cancel wait on previous parallel region...
+ __kmp_itt_task_starting(itt_sync_obj);
+
+ if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+ return;
+
+ itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+ if (itt_sync_obj != NULL)
+ // Call prepare as early as possible for "new" barrier
+ __kmp_itt_task_finished(itt_sync_obj);
+ } else
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+ if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+ return;
+ }
+ if (this_thr->th.th_used_in_team.load() != 1 &&
+ this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
+ continue;
+ if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+ return;
+
+ // At this point, the thread thinks it is in use in a team, or in
+ // transition to be used in a team, but it might have reached this barrier
+ // before it was marked unused by the team. Unused threads are awoken and
+ // shifted to wait on local thread struct elsewhere. It also might reach
+ // this point by being picked up for use by a different team. Either way,
+ // we need to update the tid.
+ tid = __kmp_tid_from_gtid(gtid);
+ team = this_thr->th.th_team;
+ KMP_DEBUG_ASSERT(tid >= 0);
+ KMP_DEBUG_ASSERT(team);
+ b = team->t.b;
+ my_current_iter = b->iter[tid].iter;
+ next_go = my_current_iter + distributedBarrier::MAX_ITERS;
+ my_go_index = tid / b->threads_per_go;
+ if (this_thr->th.th_used_in_team.load() == 3) {
+ KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
+ }
+ // Check if go flag is set
+ if (b->go[my_go_index].go.load() != next_go) {
+ // Wait on go flag on team
+ kmp_atomic_flag_64<false, true> my_flag(
+ &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
+ my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
+ KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
+ b->iter[tid].iter == 0);
+ KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
+ }
+
+ if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+ return;
+ // At this point, the thread's go location was set. This means the primary
+ // thread is safely in the barrier, and so this thread's data is
+ // up-to-date, but we should check again that this thread is really in
+ // use in the team, as it could have been woken up for the purpose of
+ // changing team size, or reaping threads at shutdown.
+ if (this_thr->th.th_used_in_team.load() == 1)
+ break;
+ } while (1);
+
+ if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
+ return;
+
+ group_leader = ((tid % b->threads_per_group) == 0);
+ if (group_leader) {
+ // Tell all the threads in my group they can go!
+ for (size_t go_idx = my_go_index + 1;
+ go_idx < my_go_index + b->gos_per_group; go_idx++) {
+ b->go[go_idx].go.store(next_go);
+ }
+ // Fence added so that workers can see changes to go. sfence inadequate.
+ KMP_MFENCE();
+ }
+
+#if KMP_BARRIER_ICV_PUSH
+ if (propagate_icvs) { // copy ICVs to final dest
+ __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
+ tid, FALSE);
+ copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
+ (kmp_internal_control_t *)team->t.b->team_icvs);
+ copy_icvs(&thr_bar->th_fixed_icvs,
+ &team->t.t_implicit_task_taskdata[tid].td_icvs);
+ }
+#endif
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
+ // This thread is now awake and participating in the barrier;
+ // wake up the other threads in the group
+ size_t nproc = this_thr->th.th_team_nproc;
+ size_t group_end = tid + b->threads_per_group;
+ if (nproc < group_end)
+ group_end = nproc;
+ __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
+ }
+ } else { // Primary thread
+ team = this_thr->th.th_team;
+ b = team->t.b;
+ my_current_iter = b->iter[tid].iter;
+ next_go = my_current_iter + distributedBarrier::MAX_ITERS;
+#if KMP_BARRIER_ICV_PUSH
+ if (propagate_icvs) {
+ // primary thread has ICVs in final destination; copy
+ copy_icvs(&thr_bar->th_fixed_icvs,
+ &team->t.t_implicit_task_taskdata[tid].td_icvs);
+ }
+#endif
+ // Tell all the group leaders they can go!
+ for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
+ b->go[go_idx].go.store(next_go);
+ }
+
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+ // Wake-up the group leaders
+ size_t nproc = this_thr->th.th_team_nproc;
+ __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
+ b->threads_per_group, tid);
+ }
+
+ // Tell all the threads in my group they can go!
+ for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
+ b->go[go_idx].go.store(next_go);
+ }
+
+ // Fence added so that workers can see changes to go. sfence inadequate.
+ KMP_MFENCE();
+
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+ // Wake-up the other threads in my group
+ size_t nproc = this_thr->th.th_team_nproc;
+ size_t group_end = tid + b->threads_per_group;
+ if (nproc < group_end)
+ group_end = nproc;
+ __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
+ }
+ }
+ // Update to next iteration
+ KMP_ASSERT(my_current_iter == b->iter[tid].iter);
+ b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
+
+ KA_TRACE(
+ 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
+ gtid, team->t.t_id, tid, bt));
+}
// Linear Barrier
template <bool cancellable = false>
@@ -1354,6 +1866,11 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
} else {
switch (__kmp_barrier_gather_pattern[bt]) {
+ case bp_dist_bar: {
+ __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
+ reduce USE_ITT_BUILD_ARG(itt_sync_obj));
+ break;
+ }
case bp_hyper_bar: {
// don't set branch bits to 0; use linear
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
@@ -1467,6 +1984,12 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
} else {
switch (__kmp_barrier_release_pattern[bt]) {
+ case bp_dist_bar: {
+ KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
+ __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
+ FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
+ break;
+ }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
@@ -1596,6 +2119,11 @@ void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
if (!team->t.t_serialized) {
if (KMP_MASTER_GTID(gtid)) {
switch (__kmp_barrier_release_pattern[bt]) {
+ case bp_dist_bar: {
+ __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
+ FALSE USE_ITT_BUILD_ARG(NULL));
+ break;
+ }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
@@ -1634,7 +2162,6 @@ void __kmp_join_barrier(int gtid) {
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team;
kmp_uint nproc;
- kmp_info_t *master_thread;
int tid;
#ifdef KMP_DEBUG
int team_id;
@@ -1656,9 +2183,7 @@ void __kmp_join_barrier(int gtid) {
tid = __kmp_tid_from_gtid(gtid);
#ifdef KMP_DEBUG
team_id = team->t.t_id;
-#endif /* KMP_DEBUG */
- master_thread = this_thr->th.th_team_master;
-#ifdef KMP_DEBUG
+ kmp_info_t *master_thread = this_thr->th.th_team_master;
if (master_thread != team->t.t_threads[0]) {
__kmp_print_structure();
}
@@ -1705,8 +2230,8 @@ void __kmp_join_barrier(int gtid) {
if (__kmp_tasking_mode == tskm_extra_barrier) {
__kmp_tasking_barrier(team, this_thr, gtid);
- KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
- team_id, tid));
+ KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
+ gtid, team_id, tid));
}
#ifdef KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -1715,8 +2240,9 @@ void __kmp_join_barrier(int gtid) {
__kmp_gtid_from_thread(this_thr), team_id,
team->t.t_task_team[this_thr->th.th_task_state],
this_thr->th.th_task_team));
- KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
- team->t.t_task_team[this_thr->th.th_task_state]);
+ if (this_thr->th.th_task_team)
+ KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
+ team->t.t_task_team[this_thr->th.th_task_state]);
}
#endif /* KMP_DEBUG */
@@ -1742,6 +2268,11 @@ void __kmp_join_barrier(int gtid) {
#endif /* USE_ITT_BUILD */
switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
+ case bp_dist_bar: {
+ __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
+ NULL USE_ITT_BUILD_ARG(itt_sync_obj));
+ break;
+ }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
__kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
@@ -1787,8 +2318,7 @@ void __kmp_join_barrier(int gtid) {
team_thread->th.th_stats->setIdleFlag();
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
team_thread->th.th_sleep_loc != NULL)
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
- team_thread->th.th_sleep_loc);
+ __kmp_null_resume_wrapper(team_thread);
}
#endif
#if USE_ITT_BUILD
@@ -1806,8 +2336,6 @@ void __kmp_join_barrier(int gtid) {
kmp_uint64 cur_time = __itt_get_timestamp();
ident_t *loc = team->t.t_ident;
kmp_info_t **other_threads = team->t.t_threads;
- int nproc = this_thr->th.th_team_nproc;
- int i;
switch (__kmp_forkjoin_frames_mode) {
case 1:
__kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
@@ -1824,7 +2352,7 @@ void __kmp_join_barrier(int gtid) {
// Set arrive time to zero to be able to check it in
// __kmp_invoke_task(); the same is done inside the loop below
this_thr->th.th_bar_arrive_time = 0;
- for (i = 1; i < nproc; ++i) {
+ for (kmp_uint i = 1; i < nproc; ++i) {
delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
other_threads[i]->th.th_bar_arrive_time = 0;
}
@@ -1933,6 +2461,11 @@ void __kmp_fork_barrier(int gtid, int tid) {
} // primary thread
switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
+ case bp_dist_bar: {
+ __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
+ TRUE USE_ITT_BUILD_ARG(NULL));
+ break;
+ }
case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
__kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
diff --git a/openmp/runtime/src/kmp_barrier.h b/openmp/runtime/src/kmp_barrier.h
new file mode 100644
index 000000000000..ac28a13217e9
--- /dev/null
+++ b/openmp/runtime/src/kmp_barrier.h
@@ -0,0 +1,141 @@
+/*
+ * kmp_barrier.h
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_BARRIER_H
+#define KMP_BARRIER_H
+
+#include "kmp.h"
+#include "kmp_i18n.h"
+
+#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
+#include <xmmintrin.h>
+#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
+#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
+#elif KMP_HAVE_ALIGNED_ALLOC
+#define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
+#define KMP_ALIGNED_FREE(ptr) free(ptr)
+#elif KMP_HAVE_POSIX_MEMALIGN
+static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
+ void *ptr;
+ int n = posix_memalign(&ptr, alignment, size);
+ if (n != 0) {
+ if (ptr)
+ free(ptr);
+ return nullptr;
+ }
+ return ptr;
+}
+#define KMP_ALIGNED_FREE(ptr) free(ptr)
+#elif KMP_HAVE__ALIGNED_MALLOC
+#include <malloc.h>
+#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
+#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
+#else
+#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
+#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
+#endif
+
+// Use four cache lines: MLC tends to prefetch the next or previous cache line
+// creating a possible fake conflict between cores, so this is the only way to
+// guarantee that no such prefetch can happen.
+#ifndef KMP_FOURLINE_ALIGN_CACHE
+#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
+#endif
+
+#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
+
+class distributedBarrier {
+ struct flags_s {
+ kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
+ };
+
+ struct go_s {
+ std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
+ };
+
+ struct iter_s {
+ kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
+ };
+
+ struct sleep_s {
+ std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
+ };
+
+ void init(size_t nthr);
+ void resize(size_t nthr);
+ void computeGo(size_t n);
+ void computeVarsForN(size_t n);
+
+public:
+ enum {
+ MAX_ITERS = 3,
+ MAX_GOS = 8,
+ IDEAL_GOS = 4,
+ IDEAL_CONTENTION = 16,
+ };
+
+ flags_s *flags[MAX_ITERS];
+ go_s *go;
+ iter_s *iter;
+ sleep_s *sleep;
+
+ size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
+ size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
+ // number of go signals each requiring one write per iteration
+ size_t KMP_ALIGN_CACHE num_gos;
+ // number of groups of gos
+ size_t KMP_ALIGN_CACHE num_groups;
+ // threads per go signal
+ size_t KMP_ALIGN_CACHE threads_per_go;
+ bool KMP_ALIGN_CACHE fix_threads_per_go;
+ // threads per group
+ size_t KMP_ALIGN_CACHE threads_per_group;
+ // number of go signals in a group
+ size_t KMP_ALIGN_CACHE gos_per_group;
+ void *team_icvs;
+
+ distributedBarrier() = delete;
+ ~distributedBarrier() = delete;
+
+ // Used instead of constructor to create aligned data
+ static distributedBarrier *allocate(int nThreads) {
+ distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
+ sizeof(distributedBarrier), 4 * CACHE_LINE);
+ if (!d) {
+ KMP_FATAL(MemoryAllocFailed);
+ }
+ d->num_threads = 0;
+ d->max_threads = 0;
+ for (int i = 0; i < MAX_ITERS; ++i)
+ d->flags[i] = NULL;
+ d->go = NULL;
+ d->iter = NULL;
+ d->sleep = NULL;
+ d->team_icvs = NULL;
+ d->fix_threads_per_go = false;
+ // calculate gos and groups ONCE on base size
+ d->computeGo(nThreads);
+ d->init(nThreads);
+ return d;
+ }
+
+ static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
+
+ void update_num_threads(size_t nthr) { init(nthr); }
+
+ bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
+ size_t get_num_threads() { return num_threads; }
+ kmp_uint64 go_release();
+ void go_reset();
+};
+
+#endif // KMP_BARRIER_H
diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake
index 0b07d115ff7b..40d20115c9ec 100644
--- a/openmp/runtime/src/kmp_config.h.cmake
+++ b/openmp/runtime/src/kmp_config.h.cmake
@@ -84,6 +84,16 @@
#define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM
#cmakedefine01 LIBOMP_ARCH_AARCH64_A64FX
#define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX
+#cmakedefine01 LIBOMP_HAVE_XMMINTRIN_H
+#define KMP_HAVE_XMMINTRIN_H LIBOMP_HAVE_XMMINTRIN_H
+#cmakedefine01 LIBOMP_HAVE__MM_MALLOC
+#define KMP_HAVE__MM_MALLOC LIBOMP_HAVE__MM_MALLOC
+#cmakedefine01 LIBOMP_HAVE_ALIGNED_ALLOC
+#define KMP_HAVE_ALIGNED_ALLOC LIBOMP_HAVE_ALIGNED_ALLOC
+#cmakedefine01 LIBOMP_HAVE_POSIX_MEMALIGN
+#define KMP_HAVE_POSIX_MEMALIGN LIBOMP_HAVE_POSIX_MEMALIGN
+#cmakedefine01 LIBOMP_HAVE__ALIGNED_MALLOC
+#define KMP_HAVE__ALIGNED_MALLOC LIBOMP_HAVE__ALIGNED_MALLOC
// Configured cache line based on architecture
#if KMP_ARCH_PPC64
@@ -124,4 +134,9 @@
# define KMP_GOMP_COMPAT
#endif
+// use shared memory with dynamic library (except Android, where shm_*
+// functions don't exist).
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !__ANDROID__
+#define KMP_USE_SHM
+#endif
#endif // KMP_CONFIG_H
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 2a7c9a8cb2ec..e95c2f072509 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -288,15 +288,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
kmp_info_t *master_th = __kmp_threads[gtid];
- kmp_team_t *parent_team = master_th->th.th_team;
- ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
- if (lwt)
- ompt_frame = &(lwt->ompt_task_info.frame);
- else {
- int tid = __kmp_tid_from_gtid(gtid);
- ompt_frame = &(
- parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
- }
+ ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -320,6 +312,12 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
);
va_end(ap);
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ ompt_frame->enter_frame = ompt_data_none;
+ }
+#endif
}
#if KMP_STATS_ENABLED
@@ -578,9 +576,6 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
__kmp_free(top);
}
- // if( serial_team -> t.t_serialized > 1 )
- serial_team->t.t_level--;
-
/* pop dispatch buffers stack */
KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
{
@@ -605,6 +600,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+ __kmp_pop_current_task_from_thread(this_thr);
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
ompd_bp_parallel_end();
@@ -623,8 +619,6 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_dispatch =
&this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
- __kmp_pop_current_task_from_thread(this_thr);
-
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
this_thr->th.th_current_task->td_flags.executing = 1;
@@ -645,6 +639,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
}
+ serial_team->t.t_level--;
if (__kmp_env_consistency_check)
__kmp_pop_parallel(global_tid, NULL);
#if OMPT_SUPPORT
@@ -686,7 +681,7 @@ void __kmpc_flush(ident_t *loc) {
if (!__kmp_cpuinfo.initialized) {
__kmp_query_cpuid(&__kmp_cpuinfo);
}
- if (!__kmp_cpuinfo.sse2) {
+ if (!__kmp_cpuinfo.flags.sse2) {
// CPU cannot execute SSE2 instructions.
} else {
#if KMP_COMPILER_ICC
@@ -1359,7 +1354,7 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
+#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
#else
#define KMP_CPUINFO_RTM 0
#endif
@@ -4330,24 +4325,35 @@ void __kmpc_doacross_fini(ident_t *loc, int gtid) {
KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
}
-/* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */
+/* OpenMP 5.1 Memory Management routines */
void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
- return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
+ return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
+}
+
+void *omp_aligned_alloc(size_t align, size_t size,
+ omp_allocator_handle_t allocator) {
+ return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
}
void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
- return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
+ return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
+}
+
+void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
+ omp_allocator_handle_t allocator) {
+ return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
}
void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
omp_allocator_handle_t free_allocator) {
- return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
+ return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
free_allocator);
}
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
- __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
+ ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
}
+/* end of OpenMP 5.1 Memory Management routines */
int __kmpc_get_target_offload(void) {
if (!__kmp_init_serial) {
@@ -4395,6 +4401,38 @@ void __kmpc_error(ident_t *loc, int severity, const char *message) {
__kmp_str_free(&src_loc);
}
+// Mark begin of scope directive.
+void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
+// reserved is for extension of scope directive and not used.
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
+ kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
+ int tid = __kmp_tid_from_gtid(gtid);
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_scope, ompt_scope_begin,
+ &(team->t.ompt_team_info.parallel_data),
+ &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
+ OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif // OMPT_SUPPORT && OMPT_OPTIONAL
+}
+
+// Mark end of scope directive
+void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
+// reserved is for extension of scope directive and not used.
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
+ kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
+ int tid = __kmp_tid_from_gtid(gtid);
+ ompt_callbacks.ompt_callback(ompt_callback_work)(
+ ompt_work_scope, ompt_scope_end,
+ &(team->t.ompt_team_info.parallel_data),
+ &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
+ OMPT_GET_RETURN_ADDRESS(0));
+ }
+#endif // OMPT_SUPPORT && OMPT_OPTIONAL
+}
+
#ifdef KMP_USE_VERSION_SYMBOLS
// For GOMP compatibility there are two versions of each omp_* API.
// One is the plain C symbol and one is the Fortran symbol with an appended
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index cc2d0012bf38..1aaffc76909a 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -72,8 +72,8 @@ void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
static inline int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule,
bool use_hier = false) {
// Pick up the nonmonotonic/monotonic bits from the scheduling type
- // TODO: make nonmonotonic when static_steal is fixed
- int monotonicity = SCHEDULE_MONOTONIC;
+ // Nonmonotonic as default for dynamic schedule when no modifier is specified
+ int monotonicity = SCHEDULE_NONMONOTONIC;
// Let default be monotonic for executables
// compiled with OpenMP* 4.5 or less compilers
@@ -561,6 +561,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
_control87(_PC_64, _MCW_PC); // 0,0x30000
#endif
/* value used for comparison in solver for cross-over point */
+ KMP_ASSERT(tc > 0);
long double target = ((long double)chunk * 2 + 1) * nproc / tc;
/* crossover point--chunk indexes equal to or greater than
@@ -668,6 +669,8 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
case kmp_sch_static_chunked:
case kmp_sch_dynamic_chunked:
dynamic_init:
+ if (tc == 0)
+ break;
if (pr->u.p.parm1 <= 0)
pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
else if (pr->u.p.parm1 > tc)
@@ -1713,7 +1716,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
status = 0; // nothing to do, don't try atomic op
break;
}
- KMP_DEBUG_ASSERT(init % chunk == 0);
+ KMP_DEBUG_ASSERT(chunk && init % chunk == 0);
// compare with K*nproc*(chunk+1), K=2 by default
if ((T)remaining < pr->u.p.parm2) {
// use dynamic-style schedule
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index 5b9e396e3dd9..fc2bff595d7c 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -712,5 +712,6 @@
#define KMP_API_NAME_GOMP_SECTIONS2_START GOMP_sections2_start
#define KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER \
GOMP_workshare_task_reduction_unregister
-
+#define KMP_API_NAME_GOMP_ALLOC GOMP_alloc
+#define KMP_API_NAME_GOMP_FREE GOMP_free
#endif /* KMP_FTN_OS_H */
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 24de14fe8c33..4aea5a2d8663 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -110,8 +110,8 @@ char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin"
"reduction"
#endif // KMP_FAST_REDUCTION_BARRIER
};
-char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree",
- "hyper", "hierarchical"};
+char const *__kmp_barrier_pattern_name[bp_last_bar] = {
+ "linear", "tree", "hyper", "hierarchical", "dist"};
int __kmp_allThreadsSpecified = 0;
size_t __kmp_align_alloc = CACHE_LINE;
@@ -280,6 +280,7 @@ char *__kmp_cpuinfo_file = NULL;
#endif /* KMP_AFFINITY_SUPPORTED */
kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0};
+kmp_proc_bind_t __kmp_teams_proc_bind = proc_bind_spread;
int __kmp_affinity_num_places = 0;
int __kmp_display_affinity = FALSE;
char *__kmp_affinity_format = NULL;
diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp
index 61a3199f1a03..d77d4809a7e9 100644
--- a/openmp/runtime/src/kmp_gsupport.cpp
+++ b/openmp/runtime/src/kmp_gsupport.cpp
@@ -23,18 +23,24 @@ enum {
KMP_GOMP_TASK_DEPENDS_FLAG = 8
};
+enum {
+ KMP_GOMP_DEPOBJ_IN = 1,
+ KMP_GOMP_DEPOBJ_OUT = 2,
+ KMP_GOMP_DEPOBJ_INOUT = 3,
+ KMP_GOMP_DEPOBJ_MTXINOUTSET = 4
+};
+
// This class helps convert gomp dependency info into
// kmp_depend_info_t structures
class kmp_gomp_depends_info_t {
void **depend;
kmp_int32 num_deps;
- size_t num_out, num_mutexinout, num_in;
+ size_t num_out, num_mutexinout, num_in, num_depobj;
size_t offset;
public:
kmp_gomp_depends_info_t(void **depend) : depend(depend) {
size_t ndeps = (kmp_intptr_t)depend[0];
- size_t num_doable;
// GOMP taskdep structure:
// if depend[0] != 0:
// depend = [ ndeps | nout | &out | ... | &out | &in | ... | &in ]
@@ -45,21 +51,17 @@ public:
if (ndeps) {
num_out = (kmp_intptr_t)depend[1];
num_in = ndeps - num_out;
- num_mutexinout = 0;
- num_doable = ndeps;
+ num_mutexinout = num_depobj = 0;
offset = 2;
} else {
ndeps = (kmp_intptr_t)depend[1];
num_out = (kmp_intptr_t)depend[2];
num_mutexinout = (kmp_intptr_t)depend[3];
num_in = (kmp_intptr_t)depend[4];
- num_doable = num_out + num_mutexinout + num_in;
+ num_depobj = ndeps - num_out - num_mutexinout - num_in;
+ KMP_ASSERT(num_depobj <= ndeps);
offset = 5;
}
- // TODO: Support gomp depobj
- if (ndeps != num_doable) {
- KMP_FATAL(GompFeatureNotSupported, "depobj");
- }
num_deps = static_cast<kmp_int32>(ndeps);
}
kmp_int32 get_num_deps() const { return num_deps; }
@@ -67,7 +69,6 @@ public:
kmp_depend_info_t retval;
memset(&retval, '\0', sizeof(retval));
KMP_ASSERT(index < (size_t)num_deps);
- retval.base_addr = (kmp_intptr_t)depend[offset + index];
retval.len = 0;
// Because inout and out are logically equivalent,
// use inout and in dependency flags. GOMP does not provide a
@@ -75,10 +76,37 @@ public:
if (index < num_out) {
retval.flags.in = 1;
retval.flags.out = 1;
+ retval.base_addr = (kmp_intptr_t)depend[offset + index];
} else if (index >= num_out && index < (num_out + num_mutexinout)) {
retval.flags.mtx = 1;
- } else {
+ retval.base_addr = (kmp_intptr_t)depend[offset + index];
+ } else if (index >= (num_out + num_mutexinout) &&
+ index < (num_out + num_mutexinout + num_in)) {
retval.flags.in = 1;
+ retval.base_addr = (kmp_intptr_t)depend[offset + index];
+ } else {
+ // depobj is a two element array (size of elements are size of pointer)
+ // depobj[0] = base_addr
+ // depobj[1] = type (in, out, inout, mutexinoutset, etc.)
+ kmp_intptr_t *depobj = (kmp_intptr_t *)depend[offset + index];
+ retval.base_addr = depobj[0];
+ switch (depobj[1]) {
+ case KMP_GOMP_DEPOBJ_IN:
+ retval.flags.in = 1;
+ break;
+ case KMP_GOMP_DEPOBJ_OUT:
+ retval.flags.out = 1;
+ break;
+ case KMP_GOMP_DEPOBJ_INOUT:
+ retval.flags.in = 1;
+ retval.flags.out = 1;
+ break;
+ case KMP_GOMP_DEPOBJ_MTXINOUTSET:
+ retval.flags.mtx = 1;
+ break;
+ default:
+ KMP_FATAL(GompFeatureNotSupported, "Unknown depobj type");
+ }
}
return retval;
}
@@ -1206,7 +1234,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
// The low-order bit is the "untied" flag
if (!(gomp_flags & KMP_GOMP_TASK_UNTIED_FLAG)) {
- input_flags->tiedness = 1;
+ input_flags->tiedness = TASK_TIED;
}
// The second low-order bit is the "final" flag
if (gomp_flags & KMP_GOMP_TASK_FINAL_FLAG) {
@@ -1494,6 +1522,13 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
#if OMPT_SUPPORT
+ ompt_frame_t *task_frame;
+ kmp_info_t *thr;
+ if (ompt_enabled.enabled) {
+ thr = __kmp_threads[gtid];
+ task_frame = &(thr->th.th_current_task->ompt_task_info.frame);
+ task_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
+ }
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
@@ -1509,9 +1544,31 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
}
+
+#if OMPT_SUPPORT
+ ompt_frame_t *child_frame;
+ if (ompt_enabled.enabled) {
+ child_frame = &(thr->th.th_current_task->ompt_task_info.frame);
+ child_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
+ }
+#endif
+
task(data);
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ child_frame->exit_frame = ompt_data_none;
+ }
+#endif
+
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
+
+#if OMPT_SUPPORT
+ if (ompt_enabled.enabled) {
+ task_frame->enter_frame = ompt_data_none;
+ }
+#endif
}
#define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \
@@ -1738,7 +1795,7 @@ void __GOMP_taskloop(void (*func)(void *), void *data,
KMP_ASSERT(arg_align > 0);
// The low-order bit is the "untied" flag
if (!(gomp_flags & 1)) {
- input_flags->tiedness = 1;
+ input_flags->tiedness = TASK_TIED;
}
// The second low-order bit is the "final" flag
if (gomp_flags & 2) {
@@ -2428,6 +2485,26 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)(
}
}
+// allocator construct
+void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ALLOC)(size_t alignment, size_t size,
+ uintptr_t allocator) {
+ int gtid = __kmp_entry_gtid();
+ KA_TRACE(20, ("GOMP_alloc: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+ return __kmp_alloc(gtid, alignment, size, (omp_allocator_handle_t)allocator);
+}
+
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_FREE)(void *ptr, uintptr_t allocator) {
+ int gtid = __kmp_entry_gtid();
+ KA_TRACE(20, ("GOMP_free: T#%d\n", gtid));
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ OMPT_STORE_RETURN_ADDRESS(gtid);
+#endif
+ return ___kmpc_free(gtid, ptr, (omp_allocator_handle_t)allocator);
+}
+
/* The following sections of code create aliases for the GOMP_* functions, then
create versioned symbols using the assembler directive .symver. This is only
pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
@@ -2616,6 +2693,10 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0");
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50,
"GOMP_5.0");
+
+// GOMP_5.0.1 versioned symbols
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ALLOC, 501, "GOMP_5.0.1");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_FREE, 501, "GOMP_5.0.1");
#endif // KMP_USE_VERSION_SYMBOLS
#ifdef __cplusplus
diff --git a/openmp/runtime/src/kmp_itt.cpp b/openmp/runtime/src/kmp_itt.cpp
index 0aa8a70fc70f..29c8735ff993 100644
--- a/openmp/runtime/src/kmp_itt.cpp
+++ b/openmp/runtime/src/kmp_itt.cpp
@@ -24,12 +24,9 @@
#include "ittnotify_config.h"
__itt_global __kmp_ittapi_clean_global;
extern __itt_global __kmp_itt__ittapi_global;
-kmp_int32 __kmp_barrier_domain_count;
-kmp_int32 __kmp_region_domain_count;
-__itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
-__itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
-__itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
-kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
+
+kmp_itthash_t __kmp_itt_barrier_domains = {{0}, 0};
+kmp_itthash_t __kmp_itt_region_domains = {{0}, 0};
__itt_domain *metadata_domain = NULL;
__itt_string_handle *string_handle_imbl = NULL;
__itt_string_handle *string_handle_loop = NULL;
diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h
index 9872764a375c..21eeb37e62f6 100644
--- a/openmp/runtime/src/kmp_itt.h
+++ b/openmp/runtime/src/kmp_itt.h
@@ -278,15 +278,21 @@ __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
} /* if */ \
} while (0)
-const int KMP_MAX_FRAME_DOMAINS =
- 512; // Maximum number of frame domains to use (maps to
+// Maximum number of frame domains to use (maps to
// different OpenMP regions in the user source code).
-extern kmp_int32 __kmp_barrier_domain_count;
-extern kmp_int32 __kmp_region_domain_count;
-extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS];
-extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS];
-extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS];
-extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS];
+const int KMP_MAX_FRAME_DOMAINS = 997;
+typedef struct kmp_itthash_entry {
+ ident_t *loc;
+ int team_size;
+ __itt_domain *d;
+ struct kmp_itthash_entry *next_in_bucket;
+} kmp_itthash_entry_t;
+typedef struct kmp_itthash {
+ kmp_itthash_entry_t *buckets[KMP_MAX_FRAME_DOMAINS];
+ int count; // just a heuristic to limit number of entries
+} kmp_itthash_t;
+extern kmp_itthash_t __kmp_itt_region_domains;
+extern kmp_itthash_t __kmp_itt_barrier_domains;
extern __itt_domain *metadata_domain;
extern __itt_string_handle *string_handle_imbl;
extern __itt_string_handle *string_handle_loop;
diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl
index ecfcb966bb79..5e75f60124af 100644
--- a/openmp/runtime/src/kmp_itt.inl
+++ b/openmp/runtime/src/kmp_itt.inl
@@ -63,6 +63,49 @@ extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
static kmp_bootstrap_lock_t metadata_lock =
KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
+#if USE_ITT_NOTIFY
+LINKAGE size_t __kmp_itthash_hash(kmp_intptr_t addr, size_t hsize) {
+ return ((addr >> 6) ^ (addr >> 2)) % hsize;
+}
+LINKAGE kmp_itthash_entry *__kmp_itthash_find(kmp_info_t *thread,
+ kmp_itthash_t *h, ident_t *loc,
+ int team_size) {
+ kmp_itthash_entry_t *entry;
+ size_t bucket = __kmp_itthash_hash((kmp_intptr_t)loc, KMP_MAX_FRAME_DOMAINS);
+ for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
+ if (entry->loc == loc && entry->team_size == team_size)
+ break;
+
+ if (entry == NULL) {
+ // two foreign threads could report frames concurrently
+ int cnt = KMP_TEST_THEN_INC32(&h->count);
+ if (cnt >= KMP_MAX_FRAME_DOMAINS) {
+ KMP_TEST_THEN_DEC32(&h->count); // revert the count
+ return entry; // too many entries
+ }
+ // create new entry
+ entry = (kmp_itthash_entry_t *)__kmp_thread_malloc(
+ thread, sizeof(kmp_itthash_entry_t));
+ entry->loc = loc;
+ entry->team_size = team_size;
+ entry->d = NULL;
+ entry->next_in_bucket = h->buckets[bucket];
+ while (!KMP_COMPARE_AND_STORE_PTR(&h->buckets[bucket],
+ entry->next_in_bucket, entry)) {
+ KMP_CPU_PAUSE();
+ entry->next_in_bucket = h->buckets[bucket];
+ }
+ }
+#if KMP_DEBUG
+ else {
+ // check the contents of the location info is unique
+ KMP_DEBUG_ASSERT(loc->psource == entry->loc->psource);
+ }
+#endif
+ return entry;
+}
+#endif
+
/* Parallel region reporting.
* __kmp_itt_region_forking should be called by primary thread of a team.
Exact moment of call does not matter, but it should be completed before any
@@ -87,97 +130,53 @@ LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
// The frame notifications are only supported for the outermost teams.
return;
}
- ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
- if (loc) {
- // Use the reserved_2 field to store the index to the region domain.
- // Assume that reserved_2 contains zero initially. Since zero is special
- // value here, store the index into domain array increased by 1.
- if (loc->reserved_2 == 0) {
- if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm =
- KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
- return; // loc->reserved_2 is still 0
- }
- // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
- // frm = loc->reserved_2 - 1; // get value saved by other thread
- // for same loc
- //} // AC: this block is to replace next unsynchronized line
-
- // We need to save indexes for both region and barrier frames. We'll use
- // loc->reserved_2 field but put region index to the low two bytes and
- // barrier indexes to the high two bytes. It is OK because
- // KMP_MAX_FRAME_DOMAINS = 512.
- loc->reserved_2 |= (frm + 1); // save "new" value
-
- // Transform compiler-generated region location into the format
- // that the tools more or less standardized on:
- // "<func>$omp$parallel@[file:]<line>[:<col>]"
- char *buff = NULL;
- kmp_str_loc_t str_loc =
- __kmp_str_loc_init(loc->psource, /* init_fname */ false);
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
- team_size, str_loc.file, str_loc.line,
- str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
- __itt_suppress_pop();
-
- __kmp_str_free(&buff);
- if (barriers) {
- if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm = KMP_TEST_THEN_INC32(
- &__kmp_barrier_domain_count); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32(
- &__kmp_barrier_domain_count); // revert the count
- return; // loc->reserved_2 is still 0
- }
- char *buff = NULL;
- buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
- str_loc.file, str_loc.col);
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
- __itt_suppress_pop();
- __kmp_str_free(&buff);
- // Save the barrier frame index to the high two bytes.
- loc->reserved_2 |= (frm + 1) << 16;
- }
- }
- __kmp_str_loc_free(&str_loc);
- __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
- }
- } else { // Region domain exists for this location
- // Check if team size was changed. Then create new region domain for this
- // location
- unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
- if ((frm < KMP_MAX_FRAME_DOMAINS) &&
- (__kmp_itt_region_team_size[frm] != team_size)) {
+ kmp_info_t *th = __kmp_thread_from_gtid(gtid);
+ ident_t *loc = th->th.th_ident;
+ if (!loc) {
+ // no sense to report a region without location info
+ return;
+ }
+ kmp_itthash_entry *e;
+ e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size);
+ if (e == NULL)
+ return; // too many entries in the hash
+ if (e->d == NULL) {
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$parallel@[file:]<line>[:<col>]"
+ char *buff = NULL;
+ kmp_str_loc_t str_loc =
+ __kmp_str_loc_init(loc->psource, /* init_fname */ false);
+ buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line, str_loc.col);
+
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ e->d = __itt_domain_create(buff);
+ KMP_ASSERT(e->d != NULL);
+ __itt_suppress_pop();
+
+ __kmp_str_free(&buff);
+ if (barriers) {
+ kmp_itthash_entry *e;
+ e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0);
+ if (e != NULL) {
+ KMP_DEBUG_ASSERT(e->d == NULL);
char *buff = NULL;
- kmp_str_loc_t str_loc =
- __kmp_str_loc_init(loc->psource, /* init_fname */ false);
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
- team_size, str_loc.file, str_loc.line,
- str_loc.col);
-
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
+ str_loc.file, str_loc.line);
__itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
+ e->d = __itt_domain_create(buff);
+ KMP_ASSERT(e->d != NULL);
__itt_suppress_pop();
-
__kmp_str_free(&buff);
- __kmp_str_loc_free(&str_loc);
- __kmp_itt_region_team_size[frm] = team_size;
- __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
- } else { // Team size was not changed. Use existing domain.
- __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
}
}
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
- loc->reserved_2, loc);
+ __kmp_str_loc_free(&str_loc);
}
+ __itt_frame_begin_v3(e->d, NULL);
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
+ loc);
#endif
} // __kmp_itt_region_forking
@@ -186,6 +185,11 @@ LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
__itt_timestamp end, int imbalance,
ident_t *loc, int team_size, int region) {
#if USE_ITT_NOTIFY
+ if (!loc) {
+ // no sense to report a region without location info
+ return;
+ }
+ kmp_info_t *th = __kmp_thread_from_gtid(gtid);
if (region) {
kmp_team_t *team = __kmp_team_from_gtid(gtid);
int serialized = (region == 2 ? 1 : 0);
@@ -193,132 +197,67 @@ LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
// The frame notifications are only supported for the outermost teams.
return;
}
- // Check region domain has not been created before. It's index is saved in
- // the low two bytes.
- if ((loc->reserved_2 & 0x0000FFFF) == 0) {
- if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm =
- KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
- return; // loc->reserved_2 is still 0
- }
-
- // We need to save indexes for both region and barrier frames. We'll use
- // loc->reserved_2 field but put region index to the low two bytes and
- // barrier indexes to the high two bytes. It is OK because
- // KMP_MAX_FRAME_DOMAINS = 512.
- loc->reserved_2 |= (frm + 1); // save "new" value
-
- // Transform compiler-generated region location into the format
- // that the tools more or less standardized on:
- // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
- char *buff = NULL;
- kmp_str_loc_t str_loc =
- __kmp_str_loc_init(loc->psource, /* init_fname */ false);
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
- team_size, str_loc.file, str_loc.line,
- str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
- __itt_suppress_pop();
-
- __kmp_str_free(&buff);
- __kmp_str_loc_free(&str_loc);
- __kmp_itt_region_team_size[frm] = team_size;
- __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
- }
- } else { // Region domain exists for this location
- // Check if team size was changed. Then create new region domain for this
- // location
- unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
- if (frm >= KMP_MAX_FRAME_DOMAINS)
- return; // something's gone wrong, returning
- if (__kmp_itt_region_team_size[frm] != team_size) {
- char *buff = NULL;
- kmp_str_loc_t str_loc =
- __kmp_str_loc_init(loc->psource, /* init_fname */ false);
- buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
- team_size, str_loc.file, str_loc.line,
- str_loc.col);
-
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
- __itt_suppress_pop();
+ // Check region domain has not been created before.
+ kmp_itthash_entry *e;
+ e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size);
+ if (e == NULL)
+ return; // too many entries in the hash
+ if (e->d == NULL) { // new entry, need to calculate domain
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
+ char *buff = NULL;
+ kmp_str_loc_t str_loc =
+ __kmp_str_loc_init(loc->psource, /* init_fname */ false);
+ buff =
+ __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line, str_loc.col);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ e->d = __itt_domain_create(buff);
+ KMP_ASSERT(e->d != NULL);
+ __itt_suppress_pop();
- __kmp_str_free(&buff);
- __kmp_str_loc_free(&str_loc);
- __kmp_itt_region_team_size[frm] = team_size;
- __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
- } else { // Team size was not changed. Use existing domain.
- __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
- }
+ __kmp_str_free(&buff);
+ __kmp_str_loc_free(&str_loc);
}
+ __itt_frame_submit_v3(e->d, NULL, begin, end);
KMP_ITT_DEBUG_LOCK();
KMP_ITT_DEBUG_PRINT(
- "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
- gtid, loc->reserved_2, region, loc, begin, end);
+ "[reg sub] gtid=%d, domain=%p, region:%d, loc:%p, beg:%llu, end:%llu\n",
+ gtid, e->d, region, loc, begin, end);
return;
} else { // called for barrier reporting
- if (loc) {
- if ((loc->reserved_2 & 0xFFFF0000) == 0) {
- if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
- int frm = KMP_TEST_THEN_INC32(
- &__kmp_barrier_domain_count); // get "old" value
- if (frm >= KMP_MAX_FRAME_DOMAINS) {
- KMP_TEST_THEN_DEC32(
- &__kmp_barrier_domain_count); // revert the count
- return; // loc->reserved_2 is still 0
- }
- // Save the barrier frame index to the high two bytes.
- loc->reserved_2 |= (frm + 1) << 16; // save "new" value
-
- // Transform compiler-generated region location into the format
- // that the tools more or less standardized on:
- // "<func>$omp$frame@[file:]<line>[:<col>]"
- kmp_str_loc_t str_loc =
- __kmp_str_loc_init(loc->psource, /* init_fname */ false);
- if (imbalance) {
- char *buff_imb = NULL;
- buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
- str_loc.func, team_size, str_loc.file,
- str_loc.col);
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
- __itt_suppress_pop();
- __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
- end);
- __kmp_str_free(&buff_imb);
- } else {
- char *buff = NULL;
- buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
- str_loc.file, str_loc.col);
- __itt_suppress_push(__itt_suppress_memory_errors);
- __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
- __itt_suppress_pop();
- __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
- end);
- __kmp_str_free(&buff);
- }
- __kmp_str_loc_free(&str_loc);
- }
- } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
- if (imbalance) {
- __itt_frame_submit_v3(
- __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
- begin, end);
- } else {
- __itt_frame_submit_v3(
- __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
- begin, end);
- }
+ kmp_itthash_entry *e;
+ e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0);
+ if (e == NULL)
+ return; // too many entries in the hash
+ if (e->d == NULL) { // new entry, need to calculate domain
+ // Transform compiler-generated region location into the format
+ // that the tools more or less standardized on:
+ // "<func>$omp$frame@[file:]<line>[:<col>]"
+ kmp_str_loc_t str_loc =
+ __kmp_str_loc_init(loc->psource, /* init_fname */ false);
+ char *buff = NULL;
+ if (imbalance) {
+ buff =
+ __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", str_loc.func,
+ team_size, str_loc.file, str_loc.line);
+ } else {
+ buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
+ str_loc.file, str_loc.line);
}
- KMP_ITT_DEBUG_LOCK();
- KMP_ITT_DEBUG_PRINT(
- "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
- loc->reserved_2, loc, begin, end);
+ __itt_suppress_push(__itt_suppress_memory_errors);
+ e->d = __itt_domain_create(buff);
+ KMP_ASSERT(e->d != NULL);
+ __itt_suppress_pop();
+ __kmp_str_free(&buff);
+ __kmp_str_loc_free(&str_loc);
}
+ __itt_frame_submit_v3(e->d, NULL, begin, end);
+ KMP_ITT_DEBUG_LOCK();
+ KMP_ITT_DEBUG_PRINT(
+ "[frm sub] gtid=%d, domain=%p, loc:%p, beg:%llu, end:%llu\n", gtid,
+ e->d, loc, begin, end);
}
#endif
} // __kmp_itt_frame_submit
@@ -432,15 +371,18 @@ LINKAGE void __kmp_itt_region_joined(int gtid) {
// The frame notifications are only supported for the outermost teams.
return;
}
- ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
- if (loc && loc->reserved_2) {
- unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
- if (frm < KMP_MAX_FRAME_DOMAINS) {
- KMP_ITT_DEBUG_LOCK();
- __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
- KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
- loc->reserved_2, loc);
- }
+ kmp_info_t *th = __kmp_thread_from_gtid(gtid);
+ ident_t *loc = th->th.th_ident;
+ if (loc) {
+ kmp_itthash_entry *e = __kmp_itthash_find(th, &__kmp_itt_region_domains,
+ loc, th->th.th_team_nproc);
+ if (e == NULL)
+ return; // too many entries in the hash
+ KMP_DEBUG_ASSERT(e->d);
+ KMP_ITT_DEBUG_LOCK();
+ __itt_frame_end_v3(e->d, NULL);
+ KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
+ loc);
}
#endif
} // __kmp_itt_region_joined
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 59726f2b9f21..f3bdb03663a6 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -1344,14 +1344,15 @@ static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
}
int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
- kmp_info_t *this_thr;
volatile kmp_int32 *head_id_p = &lck->lk.head_id;
volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
KA_TRACE(1000,
("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
KMP_DEBUG_ASSERT(gtid >= 0);
- this_thr = __kmp_thread_from_gtid(gtid);
+#if KMP_DEBUG || DEBUG_QUEUING_LOCKS
+ kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
+#endif
KMP_DEBUG_ASSERT(this_thr != NULL);
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK(gtid + 1, "rel ent");
@@ -3103,7 +3104,7 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
kmp_int32 gtid,
kmp_indirect_locktag_t tag) {
kmp_indirect_lock_t *lck;
- kmp_lock_index_t idx;
+ kmp_lock_index_t idx, table_idx;
__kmp_acquire_lock(&__kmp_global_lock, gtid);
@@ -3116,26 +3117,41 @@ kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
lck));
} else {
- idx = __kmp_i_lock_table.next;
- // Check capacity and double the size if it is full
- if (idx == __kmp_i_lock_table.size) {
- // Double up the space for block pointers
- int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
- kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
- 2 * row * sizeof(kmp_indirect_lock_t *));
- KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
- row * sizeof(kmp_indirect_lock_t *));
- kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
- __kmp_i_lock_table.table = new_table;
- __kmp_free(old_table);
- // Allocate new objects in the new blocks
- for (int i = row; i < 2 * row; ++i)
- *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
- KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
- __kmp_i_lock_table.size = 2 * idx;
+ kmp_uint32 row, col;
+ kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
+ idx = 0;
+ // Find location in list of lock tables to put new lock
+ while (1) {
+ table_idx = lock_table->next; // index within this table
+ idx += lock_table->next; // global index within list of tables
+ if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
+ row = table_idx / KMP_I_LOCK_CHUNK;
+ col = table_idx % KMP_I_LOCK_CHUNK;
+ // Allocate a new row of locks if necessary
+ if (!lock_table->table[row]) {
+ lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
+ sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
+ }
+ break;
+ }
+ // Allocate a new lock table if necessary with double the capacity
+ if (!lock_table->next_table) {
+ kmp_indirect_lock_table_t *next_table =
+ (kmp_indirect_lock_table_t *)__kmp_allocate(
+ sizeof(kmp_indirect_lock_table_t));
+ next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
+ sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
+ next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
+ next_table->next = 0;
+ next_table->next_table = nullptr;
+ lock_table->next_table = next_table;
+ }
+ lock_table = lock_table->next_table;
+ KMP_ASSERT(lock_table);
}
- __kmp_i_lock_table.next++;
- lck = KMP_GET_I_LOCK(idx);
+ lock_table->next++;
+
+ lck = &lock_table->table[row][col];
// Allocate a new base lock object
lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
KA_TRACE(20,
@@ -3166,10 +3182,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
}
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
- if (idx >= __kmp_i_lock_table.size) {
- KMP_FATAL(LockIsUninitialized, func);
- }
- lck = KMP_GET_I_LOCK(idx);
+ lck = __kmp_get_i_lock(idx);
} else {
lck = *((kmp_indirect_lock_t **)user_lock);
}
@@ -3179,7 +3192,7 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
return lck;
} else {
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
- return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
+ return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
} else {
return *((kmp_indirect_lock_t **)user_lock);
}
@@ -3189,13 +3202,13 @@ __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
kmp_dyna_lockseq_t seq) {
#if KMP_USE_ADAPTIVE_LOCKS
- if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
+ if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
seq = lockseq_queuing;
}
#endif
#if KMP_USE_TSX
- if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
+ if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
seq = lockseq_queuing;
}
#endif
@@ -3322,12 +3335,13 @@ void __kmp_init_dynamic_user_locks() {
return;
// Initialize lock index table
- __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
- __kmp_i_lock_table.table =
- (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
+ __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
+ __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
+ sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
*(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
__kmp_i_lock_table.next = 0;
+ __kmp_i_lock_table.next_table = nullptr;
// Indirect lock size
__kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
@@ -3392,7 +3406,6 @@ void __kmp_init_dynamic_user_locks() {
// Clean up the lock table.
void __kmp_cleanup_indirect_user_locks() {
- kmp_lock_index_t i;
int k;
// Clean up locks in the pools first (they were already destroyed before going
@@ -3410,22 +3423,29 @@ void __kmp_cleanup_indirect_user_locks() {
__kmp_indirect_lock_pool[k] = NULL;
}
// Clean up the remaining undestroyed locks.
- for (i = 0; i < __kmp_i_lock_table.next; i++) {
- kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
- if (l->lock != NULL) {
- // Locks not destroyed explicitly need to be destroyed here.
- KMP_I_LOCK_FUNC(l, destroy)(l->lock);
- KA_TRACE(
- 20,
- ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
- l));
- __kmp_free(l->lock);
+ kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
+ while (ptr) {
+ for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
+ if (!ptr->table[row])
+ continue;
+ for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
+ kmp_indirect_lock_t *l = &ptr->table[row][col];
+ if (l->lock) {
+ // Locks not destroyed explicitly need to be destroyed here.
+ KMP_I_LOCK_FUNC(l, destroy)(l->lock);
+ KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
+ "from table\n",
+ l));
+ __kmp_free(l->lock);
+ }
+ }
+ __kmp_free(ptr->table[row]);
}
+ kmp_indirect_lock_table_t *next_table = ptr->next_table;
+ if (ptr != &__kmp_i_lock_table)
+ __kmp_free(ptr);
+ ptr = next_table;
}
- // Free the table
- for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
- __kmp_free(__kmp_i_lock_table.table[i]);
- __kmp_free(__kmp_i_lock_table.table);
__kmp_init_user_locks = FALSE;
}
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 4f6ad6414e53..90afd8fd7eb3 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -1217,22 +1217,41 @@ extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
: NULL)
-#define KMP_I_LOCK_CHUNK \
- 1024 // number of kmp_indirect_lock_t objects to be allocated together
+// number of kmp_indirect_lock_t objects to be allocated together
+#define KMP_I_LOCK_CHUNK 1024
+// Keep at a power of 2 since it is used in multiplication & division
+KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0);
+// number of row entries in the initial lock table
+#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8
// Lock table for indirect locks.
typedef struct kmp_indirect_lock_table {
kmp_indirect_lock_t **table; // blocks of indirect locks allocated
- kmp_lock_index_t size; // size of the indirect lock table
+ kmp_uint32 nrow_ptrs; // number *table pointer entries in table
kmp_lock_index_t next; // index to the next lock to be allocated
+ struct kmp_indirect_lock_table *next_table;
} kmp_indirect_lock_table_t;
extern kmp_indirect_lock_table_t __kmp_i_lock_table;
// Returns the indirect lock associated with the given index.
-#define KMP_GET_I_LOCK(index) \
- (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) + \
- (index) % KMP_I_LOCK_CHUNK)
+// Returns nullptr if no lock at given index
+static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) {
+ kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
+ while (lock_table) {
+ kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK;
+ if (idx < max_locks) {
+ kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK;
+ kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK;
+ if (!lock_table->table[row] || idx >= lock_table->next)
+ break;
+ return &lock_table->table[row][col];
+ }
+ idx -= max_locks;
+ lock_table = lock_table->next_table;
+ }
+ return nullptr;
+}
// Number of locks in a lock block, which is fixed to "1" now.
// TODO: No lock block implementation now. If we do support, we need to manage
@@ -1241,8 +1260,9 @@ extern int __kmp_num_locks_in_block;
// Fast lock table lookup without consistency checking
#define KMP_LOOKUP_I_LOCK(l) \
- ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \
- : *((kmp_indirect_lock_t **)(l)))
+ ((OMP_LOCK_T_SIZE < sizeof(void *)) \
+ ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)) \
+ : *((kmp_indirect_lock_t **)(l)))
// Used once in kmp_error.cpp
extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index 4437cf251892..d71e9aecb3f6 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -1025,6 +1025,30 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
#define KMP_MB() /* nothing to do */
#endif
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#if KMP_COMPILER_ICC
+#define KMP_MFENCE_() _mm_mfence()
+#define KMP_SFENCE_() _mm_sfence()
+#elif KMP_COMPILER_MSVC
+#define KMP_MFENCE_() MemoryBarrier()
+#define KMP_SFENCE_() MemoryBarrier()
+#else
+#define KMP_MFENCE_() __sync_synchronize()
+#define KMP_SFENCE_() __sync_synchronize()
+#endif
+#define KMP_MFENCE() \
+ if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \
+ __kmp_query_cpuid(&__kmp_cpuinfo); \
+ } \
+ if (__kmp_cpuinfo.flags.sse2) { \
+ KMP_MFENCE_(); \
+ }
+#define KMP_SFENCE() KMP_SFENCE_()
+#else
+#define KMP_MFENCE() KMP_MB()
+#define KMP_SFENCE() KMP_MB()
+#endif
+
#ifndef KMP_IMB
#define KMP_IMB() /* nothing to do */
#endif
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 16d415dc8a74..4505d269c2b6 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -107,6 +107,10 @@ static int __kmp_unregister_root_other_thread(int gtid);
static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
+void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
+ int new_nthreads);
+void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
+
/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique identifier of executing
thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
@@ -910,7 +914,8 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
assured that there are enough threads available, because we checked on that
earlier within critical section forkjoin */
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
- kmp_info_t *master_th, int master_gtid) {
+ kmp_info_t *master_th, int master_gtid,
+ int fork_teams_workers) {
int i;
int use_hot_team;
@@ -999,7 +1004,12 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
}
#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(team);
+ // Do not partition the places list for teams construct workers who
+ // haven't actually been forked to do real work yet. This partitioning
+ // will take place in the parallel region nested within the teams construct.
+ if (!fork_teams_workers) {
+ __kmp_partition_places(team);
+ }
#endif
}
@@ -1204,7 +1214,7 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_team = serial_team;
serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
- KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
+ KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
this_thr->th.th_current_task));
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
this_thr->th.th_current_task->td_flags.executing = 0;
@@ -1563,15 +1573,24 @@ int __kmp_fork_call(ident_t *loc, int gtid,
/* Change number of threads in the team if requested */
if (master_set_numthreads) { // The parallel has num_threads clause
- if (master_set_numthreads < master_th->th.th_teams_size.nth) {
+ if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
// AC: only can reduce number of threads dynamically, can't increase
kmp_info_t **other_threads = parent_team->t.t_threads;
+ // NOTE: if using distributed barrier, we need to run this code block
+ // even when the team size appears not to have changed from the max.
+ int old_proc = master_th->th.th_teams_size.nth;
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
+ bp_dist_bar) {
+ __kmp_resize_dist_barrier(parent_team, old_proc,
+ master_set_numthreads);
+ __kmp_add_threads_to_team(parent_team, master_set_numthreads);
+ }
parent_team->t.t_nproc = master_set_numthreads;
for (i = 0; i < master_set_numthreads; ++i) {
other_threads[i]->th.th_team_nproc = master_set_numthreads;
}
- // Keep extra threads hot in the team for possible next parallels
}
+ // Keep extra threads hot in the team for possible next parallels
master_th->th.th_set_nproc = 0;
}
@@ -1584,6 +1603,41 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
+ // Figure out the proc_bind policy for the nested parallel within teams
+ kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
+ // proc_bind_default means don't update
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
+ if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
+ proc_bind = proc_bind_false;
+ } else {
+ // No proc_bind clause specified; use current proc-bind-var
+ if (proc_bind == proc_bind_default) {
+ proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
+ }
+ /* else: The proc_bind policy was specified explicitly on parallel
+ clause.
+ This overrides proc-bind-var for this parallel region, but does not
+ change proc-bind-var. */
+ // Figure the value of proc-bind-var for the child threads.
+ if ((level + 1 < __kmp_nested_proc_bind.used) &&
+ (__kmp_nested_proc_bind.bind_types[level + 1] !=
+ master_th->th.th_current_task->td_icvs.proc_bind)) {
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
+ }
+ }
+ KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
+ // Need to change the bind-var ICV to correct value for each implicit task
+ if (proc_bind_icv != proc_bind_default &&
+ master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
+ kmp_info_t **other_threads = parent_team->t.t_threads;
+ for (i = 0; i < master_th->th.th_team_nproc; ++i) {
+ other_threads[i]->th.th_current_task->td_icvs.proc_bind =
+ proc_bind_icv;
+ }
+ }
+ // Reset for next parallel region
+ master_th->th.th_set_proc_bind = proc_bind_default;
+
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
KMP_ITT_DEBUG) &&
@@ -1600,6 +1654,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
}
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+#if KMP_AFFINITY_SUPPORTED
+ __kmp_partition_places(parent_team);
+#endif
KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n",
@@ -1635,6 +1692,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
+ // Need this to happen before we determine the number of threads, not while
+ // we are allocating the team
+ //__kmp_push_current_task_to_thread(master_th, parent_team, 0);
int enter_teams = 0;
if (parent_team->t.t_active_level >=
master_th->th.th_current_task->td_icvs.max_active_levels) {
@@ -1642,13 +1702,10 @@ int __kmp_fork_call(ident_t *loc, int gtid,
} else {
enter_teams = ((ap == NULL && active_level == 0) ||
(ap && teams_level > 0 && teams_level == level));
- nthreads =
- master_set_numthreads
- ? master_set_numthreads
- : get__nproc_2(
- parent_team,
- master_tid); // TODO: get nproc directly from current task
-
+ nthreads = master_set_numthreads
+ ? master_set_numthreads
+ // TODO: get nproc directly from current task
+ : get__nproc_2(parent_team, master_tid);
// Check if we need to take forkjoin lock? (no need for serialized
// parallel out of teams construct). This code moved here from
// __kmp_reserve_threads() to speedup nested serialized parallels.
@@ -1940,16 +1997,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// Figure out the proc_bind_policy for the new team.
kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
- kmp_proc_bind_t proc_bind_icv =
- proc_bind_default; // proc_bind_default means don't update
+ // proc_bind_default means don't update
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
proc_bind = proc_bind_false;
} else {
+ // No proc_bind clause specified; use current proc-bind-var for this
+ // parallel region
if (proc_bind == proc_bind_default) {
- // No proc_bind clause specified; use current proc-bind-var for this
- // parallel region
proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
}
+ // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
+ if (master_th->th.th_teams_microtask &&
+ microtask == (microtask_t)__kmp_teams_master) {
+ proc_bind = __kmp_teams_proc_bind;
+ }
/* else: The proc_bind policy was specified explicitly on parallel clause.
This overrides proc-bind-var for this parallel region, but does not
change proc-bind-var. */
@@ -1957,7 +2019,11 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if ((level + 1 < __kmp_nested_proc_bind.used) &&
(__kmp_nested_proc_bind.bind_types[level + 1] !=
master_th->th.th_current_task->td_icvs.proc_bind)) {
- proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
+ // Do not modify the proc bind icv for the two teams construct forks
+ // They just let the proc bind icv pass through
+ if (!master_th->th.th_teams_microtask ||
+ !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
}
}
@@ -1983,6 +2049,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#endif
proc_bind, &new_icvs,
argc USE_NESTED_HOT_ARG(master_th));
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
+ copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
} else {
/* allocate a new parallel team */
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
@@ -1993,6 +2061,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
proc_bind,
&master_th->th.th_current_task->td_icvs,
argc USE_NESTED_HOT_ARG(master_th));
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
+ copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
+ &master_th->th.th_current_task->td_icvs);
}
KF_TRACE(
10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
@@ -2124,7 +2195,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
root->r.r_active = TRUE;
- __kmp_fork_team_threads(root, team, master_th, gtid);
+ __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
__kmp_setup_icv_copy(team, nthreads,
&master_th->th.th_current_task->td_icvs, loc);
@@ -2359,6 +2430,12 @@ void __kmp_join_call(ident_t *loc, int gtid
parent_team->t.t_stack_id = NULL;
}
#endif
+
+ if (team->t.t_nproc > 1 &&
+ __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ team->t.b->update_num_threads(team->t.t_nproc);
+ __kmp_add_threads_to_team(team, team->t.t_nproc);
+ }
}
KMP_MB();
@@ -2387,6 +2464,14 @@ void __kmp_join_call(ident_t *loc, int gtid
} // active_level == 1
#endif /* USE_ITT_BUILD */
+#if KMP_AFFINITY_SUPPORTED
+ if (!exit_teams) {
+ // Restore master thread's partition.
+ master_th->th.th_first_place = team->t.t_first_place;
+ master_th->th.th_last_place = team->t.t_last_place;
+ }
+#endif // KMP_AFFINITY_SUPPORTED
+
if (master_th->th.th_teams_microtask && !exit_teams &&
team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
team->t.t_level == master_th->th.th_teams_level + 1) {
@@ -2494,11 +2579,6 @@ void __kmp_join_call(ident_t *loc, int gtid
master_th, team));
__kmp_pop_current_task_from_thread(master_th);
-#if KMP_AFFINITY_SUPPORTED
- // Restore master thread's partition.
- master_th->th.th_first_place = team->t.t_first_place;
- master_th->th.th_last_place = team->t.t_last_place;
-#endif // KMP_AFFINITY_SUPPORTED
master_th->th.th_def_allocator = team->t.t_def_allocator;
#if OMPD_SUPPORT
@@ -2646,6 +2726,9 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
+ }
// Release the extra threads we don't need any more.
for (f = new_nth; f < hot_team->t.t_nproc; f++) {
KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
@@ -2665,6 +2748,11 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
}
#endif
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ hot_team->t.b->update_num_threads(new_nth);
+ __kmp_add_threads_to_team(hot_team, new_nth);
+ }
+
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
// Update the t_nproc field in the threads that are still active.
@@ -4112,7 +4200,6 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
this_thr->th.th_team_nproc = team->t.t_nproc;
this_thr->th.th_team_master = master;
this_thr->th.th_team_serialized = team->t.t_serialized;
- TCW_PTR(this_thr->th.th_sleep_loc, NULL);
KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
@@ -4281,6 +4368,12 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_task_state_top = 0;
new_thr->th.th_task_state_stack_sz = 4;
+ if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ // Make sure pool thread has transitioned to waiting on own thread struct
+ KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
+ // Thread activated in __kmp_allocate_team when increasing team size
+ }
+
#ifdef KMP_ADJUST_BLOCKTIME
/* Adjust blocktime back to zero if necessary */
/* Middle initialization might not have occurred yet */
@@ -4448,6 +4541,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
balign[b].bb.use_oncore_barrier = 0;
}
+ TCW_PTR(new_thr->th.th_sleep_loc, NULL);
+ new_thr->th.th_sleep_loc_type = flag_unset;
+
new_thr->th.th_spin_here = FALSE;
new_thr->th.th_next_waiting = 0;
#if KMP_OS_UNIX
@@ -4976,6 +5072,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
kmp_team_t *team;
int use_hot_team = !root->r.r_active;
int level = 0;
+ int do_place_partition = 1;
KA_TRACE(20, ("__kmp_allocate_team: called\n"));
KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
@@ -4997,6 +5094,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
++level; // not increment if #teams==1, or for outer fork of the teams;
// increment otherwise
}
+ // Do not perform the place partition if inner fork of the teams
+ // Wait until nested parallel region encountered inside teams construct
+ if ((master->th.th_teams_size.nteams == 1 &&
+ master->th.th_teams_level >= team->t.t_level) ||
+ (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
+ do_place_partition = 0;
}
hot_teams = master->th.th_hot_teams;
if (level < __kmp_hot_teams_max_level && hot_teams &&
@@ -5027,6 +5130,17 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
+ if (team->t.t_nproc != new_nproc &&
+ __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ // Distributed barrier may need a resize
+ int old_nthr = team->t.t_nproc;
+ __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
+ }
+
+ // If not doing the place partition, then reset the team's proc bind
+ // to indicate that partitioning of all threads still needs to take place
+ if (do_place_partition == 0)
+ team->t.t_proc_bind = proc_bind_default;
// Has the number of threads changed?
/* Let's assume the most common case is that the number of threads is
unchanged, and put that case first. */
@@ -5056,16 +5170,20 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
if ((team->t.t_size_changed == 0) &&
(team->t.t_proc_bind == new_proc_bind)) {
if (new_proc_bind == proc_bind_spread) {
- __kmp_partition_places(
- team, 1); // add flag to update only master for spread
+ if (do_place_partition) {
+ // add flag to update only master for spread
+ __kmp_partition_places(team, 1);
+ }
}
KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n",
team->t.t_id, new_proc_bind, team->t.t_first_place,
team->t.t_last_place));
} else {
- KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
- __kmp_partition_places(team);
+ if (do_place_partition) {
+ KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
+ __kmp_partition_places(team);
+ }
}
#else
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
@@ -5076,6 +5194,11 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
new_nproc));
team->t.t_size_changed = 1;
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ // Barrier size already reduced earlier in this function
+ // Activate team threads via th_used_in_team
+ __kmp_add_threads_to_team(team, new_nproc);
+ }
#if KMP_NESTED_HOT_TEAMS
if (__kmp_hot_teams_mode == 0) {
// AC: saved number of threads should correspond to team's value in this
@@ -5137,10 +5260,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
- KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
+ if (do_place_partition) {
+ KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(team);
+ __kmp_partition_places(team);
#endif
+ }
} else { // team->t.t_nproc < new_nproc
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
kmp_affin_mask_t *old_mask;
@@ -5152,7 +5277,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
KA_TRACE(20,
("__kmp_allocate_team: increasing hot team thread count to %d\n",
new_nproc));
-
+ int old_nproc = team->t.t_nproc; // save old value and use to update only
team->t.t_size_changed = 1;
#if KMP_NESTED_HOT_TEAMS
@@ -5179,10 +5304,9 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
team->t.t_nproc = new_nproc; // just get reserved threads involved
} else {
- // we may have some threads in reserve, but not enough
- team->t.t_nproc =
- hot_teams[level]
- .hot_team_nth; // get reserved threads involved if any
+ // We may have some threads in reserve, but not enough;
+ // get reserved threads involved if any.
+ team->t.t_nproc = hot_teams[level].hot_team_nth;
hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
#endif // KMP_NESTED_HOT_TEAMS
if (team->t.t_max_nproc < new_nproc) {
@@ -5237,8 +5361,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if KMP_NESTED_HOT_TEAMS
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
#endif // KMP_NESTED_HOT_TEAMS
+ if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ // Barrier size already increased earlier in this function
+ // Activate team threads via th_used_in_team
+ __kmp_add_threads_to_team(team, new_nproc);
+ }
/* make sure everyone is syncronized */
- int old_nproc = team->t.t_nproc; // save old value and use to update only
// new threads below
__kmp_initialize_team(team, new_nproc, new_icvs,
root->r.r_uber_thread->th.th_ident);
@@ -5273,10 +5401,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
- KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
+ if (do_place_partition) {
+ KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(team);
+ __kmp_partition_places(team);
#endif
+ }
} // Check changes in number of threads
kmp_info_t *master = team->t.t_threads[0];
@@ -5342,6 +5472,13 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
/* take this team from the team pool */
__kmp_team_pool = team->t.t_next_pool;
+ if (max_nproc > 1 &&
+ __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ if (!team->t.b) { // Allocate barrier structure
+ team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
+ }
+ }
+
/* setup the team for fresh use */
__kmp_initialize_team(team, new_nproc, new_icvs, NULL);
@@ -5397,6 +5534,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
/* and set it up */
team->t.t_max_nproc = max_nproc;
+ if (max_nproc > 1 &&
+ __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ // Allocate barrier structure
+ team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
+ }
+
/* NOTE well, for some reason allocating one big buffer and dividing it up
seems to really hurt performance a lot on the P4, so, let's not use this */
__kmp_allocate_team_arrays(team, max_nproc);
@@ -5469,7 +5612,6 @@ void __kmp_free_team(kmp_root_t *root,
int use_hot_team = team == root->r.r_hot_team;
#if KMP_NESTED_HOT_TEAMS
int level;
- kmp_hot_team_ptr_t *hot_teams;
if (master) {
level = team->t.t_active_level - 1;
if (master->th.th_teams_microtask) { // in teams construct?
@@ -5483,7 +5625,9 @@ void __kmp_free_team(kmp_root_t *root,
// team_of_workers before the parallel
} // team->t.t_level will be increased inside parallel
}
- hot_teams = master->th.th_hot_teams;
+#if KMP_DEBUG
+ kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
+#endif
if (level < __kmp_hot_teams_max_level) {
KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
use_hot_team = 1;
@@ -5553,10 +5697,43 @@ void __kmp_free_team(kmp_root_t *root,
/* free the worker threads */
for (f = 1; f < team->t.t_nproc; ++f) {
KMP_DEBUG_ASSERT(team->t.t_threads[f]);
+ if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
+ 1, 2);
+ }
__kmp_free_thread(team->t.t_threads[f]);
+ }
+
+ if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ if (team->t.b) {
+ // wake up thread at old location
+ team->t.b->go_release();
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+ for (f = 1; f < team->t.t_nproc; ++f) {
+ if (team->t.b->sleep[f].sleep) {
+ __kmp_atomic_resume_64(
+ team->t.t_threads[f]->th.th_info.ds.ds_gtid,
+ (kmp_atomic_flag_64<> *)NULL);
+ }
+ }
+ }
+ // Wait for threads to be removed from team
+ for (int f = 1; f < team->t.t_nproc; ++f) {
+ while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
+ KMP_CPU_PAUSE();
+ }
+ }
+ }
+
+ for (f = 1; f < team->t.t_nproc; ++f) {
team->t.t_threads[f] = NULL;
}
+ if (team->t.t_max_nproc > 1 &&
+ __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ distributedBarrier::deallocate(team->t.b);
+ team->t.b = NULL;
+ }
/* put the team back in the team pool */
/* TODO limit size of team pool, call reap_team if pool too large */
team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
@@ -5955,11 +6132,18 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
KA_TRACE(
20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
gtid));
- /* Need release fence here to prevent seg faults for tree forkjoin barrier
- * (GEH) */
- kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
- thread);
- __kmp_release_64(&flag);
+ if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
+ while (
+ !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
+ KMP_CPU_PAUSE();
+ __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
+ } else {
+ /* Need release fence here to prevent seg faults for tree forkjoin
+ barrier (GEH) */
+ kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
+ thread);
+ __kmp_release_64(&flag);
+ }
}
// Terminate OS thread.
@@ -6054,6 +6238,31 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
} // __kmp_reap_thread
+static void __kmp_itthash_clean(kmp_info_t *th) {
+#if USE_ITT_NOTIFY
+ if (__kmp_itt_region_domains.count > 0) {
+ for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
+ kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
+ while (bucket) {
+ kmp_itthash_entry_t *next = bucket->next_in_bucket;
+ __kmp_thread_free(th, bucket);
+ bucket = next;
+ }
+ }
+ }
+ if (__kmp_itt_barrier_domains.count > 0) {
+ for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
+ kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
+ while (bucket) {
+ kmp_itthash_entry_t *next = bucket->next_in_bucket;
+ __kmp_thread_free(th, bucket);
+ bucket = next;
+ }
+ }
+ }
+#endif
+}
+
static void __kmp_internal_end(void) {
int i;
@@ -6240,6 +6449,7 @@ void __kmp_internal_end_library(int gtid_req) {
gtid));
return;
} else {
+ __kmp_itthash_clean(__kmp_threads[gtid]);
KA_TRACE(
10,
("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
@@ -6486,7 +6696,7 @@ void __kmp_register_library_startup(void) {
char *value = NULL; // Actual value of the environment variable.
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library
+#if defined(KMP_USE_SHM)
char *shm_name = __kmp_str_format("/%s", name);
int shm_preexist = 0;
char *data1;
@@ -6591,7 +6801,7 @@ void __kmp_register_library_startup(void) {
} break;
case 2: { // Neighbor is dead.
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library
+#if defined(KMP_USE_SHM)
// close shared memory.
shm_unlink(shm_name); // this removes file in /dev/shm
#else
@@ -6605,7 +6815,7 @@ void __kmp_register_library_startup(void) {
}
}
KMP_INTERNAL_FREE((void *)value);
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library
+#if defined(KMP_USE_SHM)
KMP_INTERNAL_FREE((void *)shm_name);
#endif
} // while
@@ -6618,7 +6828,7 @@ void __kmp_unregister_library(void) {
char *name = __kmp_reg_status_name();
char *value = NULL;
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library
+#if defined(KMP_USE_SHM)
char *shm_name = __kmp_str_format("/%s", name);
int fd1 = shm_open(shm_name, O_RDONLY, 0666);
if (fd1 == -1) {
@@ -6639,14 +6849,14 @@ void __kmp_unregister_library(void) {
KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
// Ok, this is our variable. Delete it.
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library
+#if defined(KMP_USE_SHM)
shm_unlink(shm_name); // this removes file in /dev/shm
#else
__kmp_env_unset(name);
#endif
}
-#if KMP_OS_UNIX && KMP_DYNAMIC_LIB // shared memory is with dynamic library
+#if defined(KMP_USE_SHM)
KMP_INTERNAL_FREE(shm_name);
#endif
@@ -6844,8 +7054,8 @@ static void __kmp_do_serial_initialize(void) {
#if KMP_FAST_REDUCTION_BARRIER
#define kmp_reduction_barrier_gather_bb ((int)1)
#define kmp_reduction_barrier_release_bb ((int)1)
-#define kmp_reduction_barrier_gather_pat bp_hyper_bar
-#define kmp_reduction_barrier_release_pat bp_hyper_bar
+#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
+#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
#endif // KMP_FAST_REDUCTION_BARRIER
for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
@@ -8702,6 +8912,96 @@ void __kmp_omp_display_env(int verbose) {
__kmp_release_bootstrap_lock(&__kmp_initz_lock);
}
+// The team size is changing, so distributed barrier must be modified
+void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
+ int new_nthreads) {
+ KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
+ bp_dist_bar);
+ kmp_info_t **other_threads = team->t.t_threads;
+
+ // We want all the workers to stop waiting on the barrier while we adjust the
+ // size of the team.
+ for (int f = 1; f < old_nthreads; ++f) {
+ KMP_DEBUG_ASSERT(other_threads[f] != NULL);
+ // Ignore threads that are already inactive or not present in the team
+ if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
+ // teams construct causes thread_limit to get passed in, and some of
+ // those could be inactive; just ignore them
+ continue;
+ }
+ // If thread is transitioning still to in_use state, wait for it
+ if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
+ while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
+ KMP_CPU_PAUSE();
+ }
+ // The thread should be in_use now
+ KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
+ // Transition to unused state
+ team->t.t_threads[f]->th.th_used_in_team.store(2);
+ KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
+ }
+ // Release all the workers
+ kmp_uint64 new_value; // new value for go
+ new_value = team->t.b->go_release();
+
+ KMP_MFENCE();
+
+ // Workers should see transition status 2 and move to 0; but may need to be
+ // woken up first
+ size_t my_go_index;
+ int count = old_nthreads - 1;
+ while (count > 0) {
+ count = old_nthreads - 1;
+ for (int f = 1; f < old_nthreads; ++f) {
+ my_go_index = f / team->t.b->threads_per_go;
+ if (other_threads[f]->th.th_used_in_team.load() != 0) {
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
+ kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
+ void *, other_threads[f]->th.th_sleep_loc);
+ __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
+ }
+ } else {
+ KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
+ count--;
+ }
+ }
+ }
+ // Now update the barrier size
+ team->t.b->update_num_threads(new_nthreads);
+ team->t.b->go_reset();
+}
+
+void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
+ // Add the threads back to the team
+ KMP_DEBUG_ASSERT(team);
+ // Threads were paused and pointed at th_used_in_team temporarily during a
+ // resize of the team. We're going to set th_used_in_team to 3 to indicate to
+ // the thread that it should transition itself back into the team. Then, if
+ // blocktime isn't infinite, the thread could be sleeping, so we send a resume
+ // to wake it up.
+ for (int f = 1; f < new_nthreads; ++f) {
+ KMP_DEBUG_ASSERT(team->t.t_threads[f]);
+ KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
+ 3);
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
+ __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
+ (kmp_flag_32<false, false> *)NULL);
+ }
+ }
+ // The threads should be transitioning to the team; when they are done, they
+ // should have set th_used_in_team to 1. This loop forces master to wait until
+ // all threads have moved into the team and are waiting in the barrier.
+ int count = new_nthreads - 1;
+ while (count > 0) {
+ count = new_nthreads - 1;
+ for (int f = 1; f < new_nthreads; ++f) {
+ if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
+ count--;
+ }
+ }
+ }
+}
+
// Globals and functions for hidden helper task
kmp_info_t **__kmp_hidden_helper_threads;
kmp_info_t *__kmp_hidden_helper_main_thread;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index f287c27f29a5..8f7cee2382b4 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -164,7 +164,12 @@ int __kmp_convert_to_milliseconds(char const *data) {
return (INT_MAX);
value = (double)0.0;
mult = '\0';
+#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
+ // On Windows, each %c parameter needs additional size parameter for sscanf_s
+ nvalues = KMP_SSCANF(data, "%lf%c%c", &value, &mult, 1, &extra, 1);
+#else
nvalues = KMP_SSCANF(data, "%lf%c%c", &value, &mult, &extra);
+#endif
if (nvalues < 1)
return (-1);
if (nvalues == 1)
@@ -297,8 +302,8 @@ void __kmp_check_stksize(size_t *val) {
// if system stack size is too big then limit the size for worker threads
if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics...
*val = KMP_DEFAULT_STKSIZE * 16;
- if (*val < KMP_MIN_STKSIZE)
- *val = KMP_MIN_STKSIZE;
+ if (*val < __kmp_sys_min_stksize)
+ *val = __kmp_sys_min_stksize;
if (*val > KMP_MAX_STKSIZE)
*val = KMP_MAX_STKSIZE; // dead code currently, but may work in future
#if KMP_OS_DARWIN
@@ -426,6 +431,7 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
int *out_range, char *out_routine,
char *out_file, int *out_lb,
int *out_ub) {
+ const char *par_range_value;
size_t len = KMP_STRLEN(value) + 1;
par_range_to_print = (char *)KMP_INTERNAL_MALLOC(len + 1);
KMP_STRNCPY_S(par_range_to_print, len + 1, value, len + 1);
@@ -434,11 +440,14 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
__kmp_par_range_ub = INT_MAX;
for (;;) {
unsigned int len;
- if (*value == '\0') {
+ if (!value || *value == '\0') {
break;
}
if (!__kmp_strcasecmp_with_sentinel("routine", value, '=')) {
- value = strchr(value, '=') + 1;
+ par_range_value = strchr(value, '=') + 1;
+ if (!par_range_value)
+ goto par_range_error;
+ value = par_range_value;
len = __kmp_readstr_with_sentinel(out_routine, value,
KMP_PAR_RANGE_ROUTINE_LEN - 1, ',');
if (len == 0) {
@@ -451,7 +460,10 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
continue;
}
if (!__kmp_strcasecmp_with_sentinel("filename", value, '=')) {
- value = strchr(value, '=') + 1;
+ par_range_value = strchr(value, '=') + 1;
+ if (!par_range_value)
+ goto par_range_error;
+ value = par_range_value;
len = __kmp_readstr_with_sentinel(out_file, value,
KMP_PAR_RANGE_FILENAME_LEN - 1, ',');
if (len == 0) {
@@ -465,7 +477,10 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
}
if ((!__kmp_strcasecmp_with_sentinel("range", value, '=')) ||
(!__kmp_strcasecmp_with_sentinel("incl_range", value, '='))) {
- value = strchr(value, '=') + 1;
+ par_range_value = strchr(value, '=') + 1;
+ if (!par_range_value)
+ goto par_range_error;
+ value = par_range_value;
if (KMP_SSCANF(value, "%d:%d", out_lb, out_ub) != 2) {
goto par_range_error;
}
@@ -477,7 +492,10 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
continue;
}
if (!__kmp_strcasecmp_with_sentinel("excl_range", value, '=')) {
- value = strchr(value, '=') + 1;
+ par_range_value = strchr(value, '=') + 1;
+ if (!par_range_value)
+ goto par_range_error;
+ value = par_range_value;
if (KMP_SSCANF(value, "%d:%d", out_lb, out_ub) != 2) {
goto par_range_error;
}
@@ -1684,6 +1702,8 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
const char *var;
/* ---------- Barrier method control ------------ */
+ static int dist_req = 0, non_dist_req = 0;
+ static bool warn = 1;
for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
var = __kmp_barrier_pattern_env_name[i];
@@ -1695,6 +1715,11 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
for (j = bp_linear_bar; j < bp_last_bar; j++) {
if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1,
',')) {
+ if (j == bp_dist_bar) {
+ dist_req++;
+ } else {
+ non_dist_req++;
+ }
__kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j;
break;
}
@@ -1709,6 +1734,11 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
if (comma != NULL) {
for (j = bp_linear_bar; j < bp_last_bar; j++) {
if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) {
+ if (j == bp_dist_bar) {
+ dist_req++;
+ } else {
+ non_dist_req++;
+ }
__kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j;
break;
}
@@ -1723,6 +1753,20 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value,
}
}
}
+ if (dist_req != 0) {
+ // set all barriers to dist
+ if ((non_dist_req != 0) && warn) {
+ KMP_INFORM(BarrierPatternOverride, name,
+ __kmp_barrier_pattern_name[bp_dist_bar]);
+ warn = 0;
+ }
+ for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
+ if (__kmp_barrier_release_pattern[i] != bp_dist_bar)
+ __kmp_barrier_release_pattern[i] = bp_dist_bar;
+ if (__kmp_barrier_gather_pattern[i] != bp_dist_bar)
+ __kmp_barrier_gather_pattern[i] = bp_dist_bar;
+ }
+ }
} // __kmp_stg_parse_barrier_pattern
static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
@@ -1739,7 +1783,7 @@ static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
__kmp_str_buf_print(buffer, " %s='",
__kmp_barrier_pattern_env_name[i]);
}
- KMP_DEBUG_ASSERT(j < bs_last_barrier && k < bs_last_barrier);
+ KMP_DEBUG_ASSERT(j < bp_last_bar && k < bp_last_bar);
__kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j],
__kmp_barrier_pattern_name[k]);
}
@@ -3092,6 +3136,7 @@ static void __kmp_stg_parse_topology_method(char const *name, char const *value,
}
#if KMP_GROUP_AFFINITY
else if (__kmp_str_match("group", 1, value)) {
+ KMP_WARNING(StgDeprecatedValue, name, value, "all");
__kmp_affinity_top_method = affinity_top_method_group;
}
#endif /* KMP_GROUP_AFFINITY */
@@ -3155,6 +3200,47 @@ static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer,
}
} // __kmp_stg_print_topology_method
+// KMP_TEAMS_PROC_BIND
+struct kmp_proc_bind_info_t {
+ const char *name;
+ kmp_proc_bind_t proc_bind;
+};
+static kmp_proc_bind_info_t proc_bind_table[] = {
+ {"spread", proc_bind_spread},
+ {"true", proc_bind_spread},
+ {"close", proc_bind_close},
+ // teams-bind = false means "replicate the primary thread's affinity"
+ {"false", proc_bind_primary},
+ {"primary", proc_bind_primary}};
+static void __kmp_stg_parse_teams_proc_bind(char const *name, char const *value,
+ void *data) {
+ int valid;
+ const char *end;
+ valid = 0;
+ for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]);
+ ++i) {
+ if (__kmp_match_str(proc_bind_table[i].name, value, &end)) {
+ __kmp_teams_proc_bind = proc_bind_table[i].proc_bind;
+ valid = 1;
+ break;
+ }
+ }
+ if (!valid) {
+ KMP_WARNING(StgInvalidValue, name, value);
+ }
+}
+static void __kmp_stg_print_teams_proc_bind(kmp_str_buf_t *buffer,
+ char const *name, void *data) {
+ const char *value = KMP_I18N_STR(NotDefined);
+ for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]);
+ ++i) {
+ if (__kmp_teams_proc_bind == proc_bind_table[i].proc_bind) {
+ value = proc_bind_table[i].name;
+ break;
+ }
+ }
+ __kmp_stg_print_str(buffer, name, value);
+}
#endif /* KMP_AFFINITY_SUPPORTED */
// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X*
@@ -4415,7 +4501,7 @@ static void __kmp_stg_parse_lock_kind(char const *name, char const *value,
}
#if KMP_USE_ADAPTIVE_LOCKS
else if (__kmp_str_match("adaptive", 1, value)) {
- if (__kmp_cpuinfo.rtm) { // ??? Is cpuinfo available here?
+ if (__kmp_cpuinfo.flags.rtm) { // ??? Is cpuinfo available here?
__kmp_user_lock_kind = lk_adaptive;
KMP_STORE_LOCK_SEQ(adaptive);
} else {
@@ -4427,7 +4513,7 @@ static void __kmp_stg_parse_lock_kind(char const *name, char const *value,
#endif // KMP_USE_ADAPTIVE_LOCKS
#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
else if (__kmp_str_match("rtm_queuing", 1, value)) {
- if (__kmp_cpuinfo.rtm) {
+ if (__kmp_cpuinfo.flags.rtm) {
__kmp_user_lock_kind = lk_rtm_queuing;
KMP_STORE_LOCK_SEQ(rtm_queuing);
} else {
@@ -4436,7 +4522,7 @@ static void __kmp_stg_parse_lock_kind(char const *name, char const *value,
KMP_STORE_LOCK_SEQ(queuing);
}
} else if (__kmp_str_match("rtm_spin", 1, value)) {
- if (__kmp_cpuinfo.rtm) {
+ if (__kmp_cpuinfo.flags.rtm) {
__kmp_user_lock_kind = lk_rtm_spin;
KMP_STORE_LOCK_SEQ(rtm_spin);
} else {
@@ -5260,6 +5346,8 @@ static kmp_setting_t __kmp_stg_table[] = {
#endif /* KMP_GOMP_COMPAT */
{"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind,
NULL, 0, 0},
+ {"KMP_TEAMS_PROC_BIND", __kmp_stg_parse_teams_proc_bind,
+ __kmp_stg_print_teams_proc_bind, NULL, 0, 0},
{"OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0},
{"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method,
__kmp_stg_print_topology_method, NULL, 0, 0},
@@ -5942,65 +6030,27 @@ void __kmp_env_initialize(char const *string) {
// Handle the Win 64 group affinity stuff if there are multiple
// processor groups, or if the user requested it, and OMP 4.0
// affinity is not in effect.
- if (((__kmp_num_proc_groups > 1) &&
- (__kmp_affinity_type == affinity_default) &&
- (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)) ||
- (__kmp_affinity_top_method == affinity_top_method_group)) {
+ if (__kmp_num_proc_groups > 1 &&
+ __kmp_affinity_type == affinity_default &&
+ __kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
+ // Do not respect the initial processor affinity mask if it is assigned
+ // exactly one Windows Processor Group since this is interpreted as the
+ // default OS assignment. Not respecting the mask allows the runtime to
+ // use all the logical processors in all groups.
if (__kmp_affinity_respect_mask == affinity_respect_mask_default &&
exactly_one_group) {
__kmp_affinity_respect_mask = FALSE;
}
+ // Use compact affinity with anticipation of pinning to at least the
+ // group granularity since threads can only be bound to one group.
if (__kmp_affinity_type == affinity_default) {
__kmp_affinity_type = affinity_compact;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
}
- if (__kmp_affinity_top_method == affinity_top_method_default) {
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
- __kmp_affinity_top_method = affinity_top_method_group;
- __kmp_affinity_gran = KMP_HW_PROC_GROUP;
- } else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) {
- __kmp_affinity_top_method = affinity_top_method_group;
- } else {
- __kmp_affinity_top_method = affinity_top_method_all;
- }
- } else if (__kmp_affinity_top_method == affinity_top_method_group) {
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
- __kmp_affinity_gran = KMP_HW_PROC_GROUP;
- } else if ((__kmp_affinity_gran != KMP_HW_PROC_GROUP) &&
- (__kmp_affinity_gran != KMP_HW_THREAD)) {
- const char *str = __kmp_hw_get_keyword(__kmp_affinity_gran);
- KMP_WARNING(AffGranTopGroup, var, str);
- __kmp_affinity_gran = KMP_HW_THREAD;
- }
- } else {
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
- __kmp_affinity_gran = KMP_HW_CORE;
- } else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) {
- const char *str = NULL;
- switch (__kmp_affinity_type) {
- case affinity_physical:
- str = "physical";
- break;
- case affinity_logical:
- str = "logical";
- break;
- case affinity_compact:
- str = "compact";
- break;
- case affinity_scatter:
- str = "scatter";
- break;
- case affinity_explicit:
- str = "explicit";
- break;
- // No MIC on windows, so no affinity_balanced case
- default:
- KMP_DEBUG_ASSERT(0);
- }
- KMP_WARNING(AffGranGroupType, var, str);
- __kmp_affinity_gran = KMP_HW_CORE;
- }
- }
+ if (__kmp_affinity_top_method == affinity_top_method_default)
+ __kmp_affinity_top_method = affinity_top_method_all;
+ if (__kmp_affinity_gran == KMP_HW_UNKNOWN)
+ __kmp_affinity_gran = KMP_HW_PROC_GROUP;
} else
#endif /* KMP_GROUP_AFFINITY */
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index 4c5053df3fef..113221c066a3 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -246,6 +246,8 @@ enum stats_state_e {
// KMP_tree_release -- time in __kmp_tree_barrier_release
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
+// KMP_dist_gather -- time in __kmp_dist_barrier_gather
+// KMP_dist_release -- time in __kmp_dist_barrier_release
// clang-format off
#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
macro(KMP_fork_call, 0, arg) \
@@ -255,6 +257,8 @@ enum stats_state_e {
macro(KMP_hier_release, 0, arg) \
macro(KMP_hyper_gather, 0, arg) \
macro(KMP_hyper_release, 0, arg) \
+ macro(KMP_dist_gather, 0, arg) \
+ macro(KMP_dist_release, 0, arg) \
macro(KMP_linear_gather, 0, arg) \
macro(KMP_linear_release, 0, arg) \
macro(KMP_tree_gather, 0, arg) \
diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp
index ffce2b88ab35..e64f989fbc69 100644
--- a/openmp/runtime/src/kmp_str.cpp
+++ b/openmp/runtime/src/kmp_str.cpp
@@ -515,6 +515,31 @@ int __kmp_str_match(char const *target, int len, char const *data) {
return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
} // __kmp_str_match
+// If data contains all of target, returns true, otherwise returns false.
+// len should be the length of target
+bool __kmp_str_contains(char const *target, int len, char const *data) {
+ int i = 0, j = 0, start = 0;
+ if (target == NULL || data == NULL) {
+ return FALSE;
+ }
+ while (target[i]) {
+ if (!data[j])
+ return FALSE;
+ if (TOLOWER(target[i]) != TOLOWER(data[j])) {
+ j = start + 1;
+ start = j;
+ i = 0;
+ } else {
+ if (i == 0)
+ start = j;
+ j++;
+ i++;
+ }
+ }
+
+ return i == len;
+} // __kmp_str_contains
+
int __kmp_str_match_false(char const *data) {
int result =
__kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
diff --git a/openmp/runtime/src/kmp_str.h b/openmp/runtime/src/kmp_str.h
index ff6179908ef1..855b5df55d69 100644
--- a/openmp/runtime/src/kmp_str.h
+++ b/openmp/runtime/src/kmp_str.h
@@ -106,6 +106,7 @@ int __kmp_str_eqf(char const *lhs, char const *rhs);
char *__kmp_str_format(char const *format, ...);
void __kmp_str_free(char **str);
int __kmp_str_match(char const *target, int len, char const *data);
+bool __kmp_str_contains(char const *target, int len, char const *data);
int __kmp_str_match_false(char const *data);
int __kmp_str_match_true(char const *data);
void __kmp_str_replace(char *str, char search_for, char replace_with);
diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp
index 87e5388ca9db..6aaec688013f 100644
--- a/openmp/runtime/src/kmp_stub.cpp
+++ b/openmp/runtime/src/kmp_stub.cpp
@@ -376,25 +376,85 @@ omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
omp_memspace_handle_t const llvm_omp_target_device_mem_space =
(omp_memspace_handle_t const)102;
#endif /* KMP_OS_WINDOWS */
-void *omp_alloc(size_t size, const omp_allocator_handle_t allocator) {
+
+void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
+ i;
+ void *res;
+#if KMP_OS_WINDOWS
+ // Returns a pointer to the memory block, or NULL if failed.
+ // Sets errno to ENOMEM or EINVAL if memory allocation failed or parameter
+ // validation failed.
+ res = _aligned_malloc(size, 1);
+#else
+ res = malloc(size);
+#endif
+ return res;
+}
+
+void *omp_aligned_alloc(size_t a, size_t size, omp_allocator_handle_t al) {
i;
- return malloc(size);
+ int err;
+ void *res;
+#if KMP_OS_WINDOWS
+ res = _aligned_malloc(size, a);
+#else
+ if (err = posix_memalign(&res, a, size)) {
+ errno = err; // can be EINVAL or ENOMEM
+ res = NULL;
+ }
+#endif
+ return res;
+}
+
+void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t al) {
+ i;
+ void *res;
+#if KMP_OS_WINDOWS
+ res = _aligned_recalloc(NULL, nmemb, size, 1);
+#else
+ res = calloc(nmemb, size);
+#endif
+ return res;
}
-void *omp_calloc(size_t nmemb, size_t size,
- const omp_allocator_handle_t allocator) {
+
+void *omp_aligned_calloc(size_t a, size_t nmemb, size_t size,
+ omp_allocator_handle_t al) {
i;
- return calloc(nmemb, size);
+ int err;
+ void *res;
+#if KMP_OS_WINDOWS
+ res = _aligned_recalloc(NULL, nmemb, size, a);
+#else
+ if (err = posix_memalign(&res, a, nmemb * size)) {
+ errno = err; // can be EINVAL or ENOMEM
+ res = NULL;
+ }
+ memset(res, 0x00, size);
+#endif
+ return res;
}
-void *omp_realloc(void *ptr, size_t size,
- const omp_allocator_handle_t allocator,
- const omp_allocator_handle_t free_allocator) {
+
+void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t al,
+ omp_allocator_handle_t free_al) {
i;
- return realloc(ptr, size);
+ void *res;
+#if KMP_OS_WINDOWS
+ res = _aligned_realloc(ptr, size, 1);
+#else
+ res = realloc(ptr, size);
+#endif
+ return res;
}
-void omp_free(void *ptr, const omp_allocator_handle_t allocator) {
+
+void omp_free(void *ptr, omp_allocator_handle_t allocator) {
i;
+#if KMP_OS_WINDOWS
+ _aligned_free(ptr);
+#else
free(ptr);
+#endif
}
+
/* OpenMP 5.0 Affinity Format */
void omp_set_affinity_format(char const *format) { i; }
size_t omp_get_affinity_format(char *buffer, size_t size) {
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index 162fb38e1eed..7d2774a738fb 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -86,6 +86,7 @@ static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
h->buckets = (kmp_dephash_entry **)(h + 1);
h->generation = gen;
h->nconflicts = 0;
+ h->last_all = current_dephash->last_all;
// make sure buckets are properly initialized
for (size_t i = 0; i < new_size; i++) {
@@ -142,6 +143,7 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
h->nelements = 0;
h->nconflicts = 0;
h->buckets = (kmp_dephash_entry **)(h + 1);
+ h->last_all = NULL;
for (size_t i = 0; i < h_size; i++)
h->buckets[i] = 0;
@@ -174,7 +176,10 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
thread, sizeof(kmp_dephash_entry_t));
#endif
entry->addr = addr;
- entry->last_out = NULL;
+ if (!h->last_all) // no predecessor task with omp_all_memory dependence
+ entry->last_out = NULL;
+ else // else link the omp_all_memory depnode to the new entry
+ entry->last_out = __kmp_node_ref(h->last_all);
entry->last_set = NULL;
entry->prev_set = NULL;
entry->last_flag = 0;
@@ -290,6 +295,63 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
return npredecessors;
}
+static inline kmp_int32
+__kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
+ bool dep_barrier, kmp_task_t *task) {
+ KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
+ "dep_barrier = %d\n",
+ gtid, dep_barrier));
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_int32 npredecessors = 0;
+
+ // process previous omp_all_memory node if any
+ npredecessors +=
+ __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
+ __kmp_node_deref(thread, h->last_all);
+ if (!dep_barrier) {
+ h->last_all = __kmp_node_ref(node);
+ } else {
+ // if this is a sync point in the serial sequence, then the previous
+ // outputs are guaranteed to be completed after the execution of this
+ // task so the previous output nodes can be cleared.
+ h->last_all = NULL;
+ }
+
+ // process all regular dependences
+ for (size_t i = 0; i < h->size; i++) {
+ kmp_dephash_entry_t *info = h->buckets[i];
+ if (!info) // skip empty slots in dephash
+ continue;
+ for (; info; info = info->next_in_bucket) {
+ // for each entry the omp_all_memory works as OUT dependence
+ kmp_depnode_t *last_out = info->last_out;
+ kmp_depnode_list_t *last_set = info->last_set;
+ kmp_depnode_list_t *prev_set = info->prev_set;
+ if (last_set) {
+ npredecessors +=
+ __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
+ __kmp_depnode_list_free(thread, last_set);
+ __kmp_depnode_list_free(thread, prev_set);
+ info->last_set = NULL;
+ info->prev_set = NULL;
+ info->last_flag = 0; // no sets in this dephash entry
+ } else {
+ npredecessors +=
+ __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
+ }
+ __kmp_node_deref(thread, last_out);
+ if (!dep_barrier) {
+ info->last_out = __kmp_node_ref(node);
+ } else {
+ info->last_out = NULL;
+ }
+ }
+ }
+ KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
+ npredecessors));
+ return npredecessors;
+}
+
template <bool filter>
static inline kmp_int32
__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
@@ -344,6 +406,13 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
// link node as successor of all nodes in the prev_set if any
npredecessors +=
__kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
+ if (dep_barrier) {
+ // clean last_out and prev_set if any; don't touch last_set
+ __kmp_node_deref(thread, last_out);
+ info->last_out = NULL;
+ __kmp_depnode_list_free(thread, prev_set);
+ info->prev_set = NULL;
+ }
} else { // last_set is of different dep kind, make it prev_set
// link node as successor of all nodes in the last_set
npredecessors +=
@@ -353,13 +422,21 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
info->last_out = NULL;
// clean prev_set if any
__kmp_depnode_list_free(thread, prev_set);
- // move last_set to prev_set, new last_set will be allocated
- info->prev_set = last_set;
+ if (!dep_barrier) {
+ // move last_set to prev_set, new last_set will be allocated
+ info->prev_set = last_set;
+ } else {
+ info->prev_set = NULL;
+ info->last_flag = 0;
+ }
info->last_set = NULL;
}
- info->last_flag = dep->flag; // store dep kind of the last_set
- info->last_set = __kmp_add_node(thread, info->last_set, node);
-
+ // for dep_barrier last_flag value should remain:
+ // 0 if last_set is empty, unchanged otherwise
+ if (!dep_barrier) {
+ info->last_flag = dep->flag; // store dep kind of the last_set
+ info->last_set = __kmp_add_node(thread, info->last_set, node);
+ }
// check if we are processing MTX dependency
if (dep->flag == KMP_DEP_MTX) {
if (info->mtx_lock == NULL) {
@@ -402,7 +479,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
kmp_depend_info_t *dep_list,
kmp_int32 ndeps_noalias,
kmp_depend_info_t *noalias_dep_list) {
- int i, n_mtxs = 0;
+ int i, n_mtxs = 0, dep_all = 0;
#if KMP_DEBUG
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
#endif
@@ -414,7 +491,8 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
// Filter deps in dep_list
// TODO: Different algorithm for large dep_list ( > 10 ? )
for (i = 0; i < ndeps; i++) {
- if (dep_list[i].base_addr != 0) {
+ if (dep_list[i].base_addr != 0 &&
+ dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX) {
KMP_DEBUG_ASSERT(
dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
dep_list[i].flag == KMP_DEP_INOUT ||
@@ -436,6 +514,13 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
}
}
+ } else if (dep_list[i].flag == KMP_DEP_ALL ||
+ dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX) {
+ // omp_all_memory dependence can be marked by compiler by either
+ // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
+ // omp_all_memory overrides all other dependences if any
+ dep_all = 1;
+ break;
}
}
@@ -449,10 +534,14 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
// the end
int npredecessors;
- npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
- dep_list, task);
- npredecessors += __kmp_process_deps<false>(
- gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
+ if (!dep_all) { // regular dependences
+ npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
+ ndeps, dep_list, task);
+ npredecessors += __kmp_process_deps<false>(
+ gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
+ } else { // omp_all_memory dependence
+ npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
+ }
node->dn.task = task;
KMP_MB();
@@ -756,8 +845,6 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_depnode_t node = {0};
__kmp_init_node(&node);
- // the stack owns the node
- __kmp_node_ref(&node);
if (!__kmp_check_deps(gtid, &node, NULL, &current_task->td_dephash,
DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h
index d1576dd5b791..99f182bbd050 100644
--- a/openmp/runtime/src/kmp_taskdeps.h
+++ b/openmp/runtime/src/kmp_taskdeps.h
@@ -23,8 +23,7 @@ static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
return;
kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
- // TODO: temporarily disable assertion until the bug with dependences is fixed
- // KMP_DEBUG_ASSERT(n >= 0);
+ KMP_DEBUG_ASSERT(n >= 0);
if (n == 0) {
KMP_ASSERT(node->dn.nrefs == 0);
#if USE_FAST_MEMORY
@@ -74,6 +73,8 @@ static inline void __kmp_dephash_free_entries(kmp_info_t *thread,
h->buckets[i] = 0;
}
}
+ __kmp_node_deref(thread, h->last_all);
+ h->last_all = NULL;
}
static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
@@ -145,9 +146,10 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
// encountering thread's queue; otherwise, it can be pushed to its own
// queue.
if (!next_taskdata->td_flags.hidden_helper) {
- __kmpc_give_task(
- successor->dn.task,
- __kmp_tid_from_gtid(next_taskdata->encountering_gtid));
+ kmp_int32 encountering_gtid =
+ next_taskdata->td_alloc_thread->th.th_info.ds.ds_gtid;
+ kmp_int32 encountering_tid = __kmp_tid_from_gtid(encountering_gtid);
+ __kmpc_give_task(successor->dn.task, encountering_tid);
} else {
__kmp_omp_task(gtid, successor->dn.task, false);
}
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 6c3e2c95cb5a..d956df1b2a37 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -324,10 +324,16 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- // We don't need to map to shadow gtid if it is already hidden helper thread
- if (taskdata->td_flags.hidden_helper && !KMP_HIDDEN_HELPER_THREAD(gtid)) {
- gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
- thread = __kmp_threads[gtid];
+ // If we encounter a hidden helper task, and the current thread is not a
+ // hidden helper thread, we have to give the task to any hidden helper thread
+ // starting from its shadow one.
+ if (UNLIKELY(taskdata->td_flags.hidden_helper &&
+ !KMP_HIDDEN_HELPER_THREAD(gtid))) {
+ kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
+ __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
+ // Signal the hidden helper threads.
+ __kmp_hidden_helper_worker_thread_signal();
+ return TASK_SUCCESSFULLY_PUSHED;
}
kmp_task_team_t *task_team = thread->th.th_task_team;
@@ -434,16 +440,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
gtid, taskdata, thread_data->td.td_deque_ntasks,
thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
- auto hidden_helper = taskdata->td_flags.hidden_helper;
-
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
- // Signal one worker thread to execute the task
- if (UNLIKELY(hidden_helper)) {
- // Wake hidden helper threads up if they're sleeping
- __kmp_hidden_helper_worker_thread_signal();
- }
-
return TASK_SUCCESSFULLY_PUSHED;
}
@@ -809,6 +807,24 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
gtid, taskdata, children));
}
+// Only need to keep track of child task counts if any of the following:
+// 1. team parallel and tasking not serialized;
+// 2. it is a proxy or detachable or hidden helper task
+// 3. the children counter of its parent task is greater than 0.
+// The reason for the 3rd one is for serialized team that found detached task,
+// hidden helper task, T. In this case, the execution of T is still deferred,
+// and it is also possible that a regular task depends on T. In this case, if we
+// don't track the children, task synchronization will be broken.
+static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
+ kmp_tasking_flags_t flags = taskdata->td_flags;
+ bool ret = !(flags.team_serial || flags.tasking_ser);
+ ret = ret || flags.proxy == TASK_PROXY ||
+ flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
+ ret = ret ||
+ KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
+ return ret;
+}
+
// __kmp_task_finish: bookkeeping to do when a task finishes execution
//
// gtid: global thread ID for calling thread
@@ -825,8 +841,9 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
kmp_info_t *thread = __kmp_threads[gtid];
kmp_task_team_t *task_team =
thread->th.th_task_team; // might be NULL for serial teams...
+#if KMP_DEBUG
kmp_int32 children = 0;
-
+#endif
KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n",
gtid, taskdata, resumed_task));
@@ -934,16 +951,15 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
if (ompt)
__ompt_task_finish(task, resumed_task, ompt_task_complete);
#endif
-
- // Only need to keep track of count if team parallel and tasking not
- // serialized, or task is detachable and event has already been fulfilled
- if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
- taskdata->td_flags.detachable == TASK_DETACHABLE ||
- taskdata->td_flags.hidden_helper) {
+ // TODO: What would be the balance between the conditions in the function
+ // and an atomic operation?
+ if (__kmp_track_children_task(taskdata)) {
__kmp_release_deps(gtid, taskdata);
// Predecrement simulated by "- 1" calculation
- children =
- KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
+#if KMP_DEBUG
+ children = -1 +
+#endif
+ KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
if (taskdata->td_taskgroup)
KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
@@ -1189,7 +1205,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task;
kmp_taskdata_t *taskdata;
kmp_info_t *thread = __kmp_threads[gtid];
- kmp_info_t *encountering_thread = thread;
kmp_team_t *team = thread->th.th_team;
kmp_taskdata_t *parent_task = thread->th.th_current_task;
size_t shareds_offset;
@@ -1201,15 +1216,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
if (__kmp_enable_hidden_helper) {
if (!TCR_4(__kmp_init_hidden_helper))
__kmp_hidden_helper_initialize();
-
- // For a hidden helper task encountered by a regular thread, we will push
- // the task to the (gtid%__kmp_hidden_helper_threads_num)-th hidden helper
- // thread.
- if (!KMP_HIDDEN_HELPER_THREAD(gtid)) {
- thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
- // We don't change the parent-child relation for hidden helper task as
- // we need that to do per-task-region synchronization.
- }
} else {
// If the hidden helper task is not enabled, reset the flag to FALSE.
flags->hidden_helper = FALSE;
@@ -1232,8 +1238,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
// Untied task encountered causes the TSC algorithm to check entire deque of
// the victim thread. If no untied task encountered, then checking the head
// of the deque should be enough.
- KMP_CHECK_UPDATE(
- encountering_thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
+ KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
}
// Detachable tasks are not proxy tasks yet but could be in the future. Doing
@@ -1247,32 +1252,30 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
}
/* are we running in a sequential parallel or tskm_immediate_exec... we need
tasking support enabled */
- if ((encountering_thread->th.th_task_team) == NULL) {
+ if ((thread->th.th_task_team) == NULL) {
/* This should only happen if the team is serialized
setup a task team and propagate it to the thread */
KMP_DEBUG_ASSERT(team->t.t_serialized);
KA_TRACE(30,
("T#%d creating task team in __kmp_task_alloc for proxy task\n",
gtid));
- __kmp_task_team_setup(
- encountering_thread, team,
- 1); // 1 indicates setup the current team regardless of nthreads
- encountering_thread->th.th_task_team =
- team->t.t_task_team[encountering_thread->th.th_task_state];
+ // 1 indicates setup the current team regardless of nthreads
+ __kmp_task_team_setup(thread, team, 1);
+ thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
}
- kmp_task_team_t *task_team = encountering_thread->th.th_task_team;
+ kmp_task_team_t *task_team = thread->th.th_task_team;
/* tasking must be enabled now as the task might not be pushed */
if (!KMP_TASKING_ENABLED(task_team)) {
KA_TRACE(
30,
("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
- __kmp_enable_tasking(task_team, encountering_thread);
- kmp_int32 tid = encountering_thread->th.th_info.ds.ds_tid;
+ __kmp_enable_tasking(task_team, thread);
+ kmp_int32 tid = thread->th.th_info.ds.ds_tid;
kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
// No lock needed since only owner can allocate
if (thread_data->td.td_deque == NULL) {
- __kmp_alloc_task_deque(encountering_thread, thread_data);
+ __kmp_alloc_task_deque(thread, thread_data);
}
}
@@ -1297,11 +1300,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
// Avoid double allocation here by combining shareds with taskdata
#if USE_FAST_MEMORY
- taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(
- encountering_thread, shareds_offset + sizeof_shareds);
+ taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
+ sizeof_shareds);
#else /* ! USE_FAST_MEMORY */
- taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(
- encountering_thread, shareds_offset + sizeof_shareds);
+ taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
+ sizeof_shareds);
#endif /* USE_FAST_MEMORY */
task = KMP_TASKDATA_TO_TASK(taskdata);
@@ -1328,7 +1331,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
taskdata->td_task_id = KMP_GEN_TASK_ID();
taskdata->td_team = thread->th.th_team;
- taskdata->td_alloc_thread = encountering_thread;
+ taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
taskdata->td_level = parent_task->td_level + 1; // increment nesting level
KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
@@ -1342,10 +1345,16 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
taskdata->td_flags = *flags;
- taskdata->encountering_gtid = gtid;
taskdata->td_task_team = thread->th.th_task_team;
taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
taskdata->td_flags.tasktype = TASK_EXPLICIT;
+ // If it is hidden helper task, we need to set the team and task team
+ // correspondingly.
+ if (flags->hidden_helper) {
+ kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
+ taskdata->td_team = shadow_thread->th.th_team;
+ taskdata->td_task_team = shadow_thread->th.th_task_team;
+ }
// GEH - TODO: fix this to copy parent task's value of tasking_ser flag
taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
@@ -1382,11 +1391,9 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_init(taskdata, gtid);
#endif
- // Only need to keep track of child task counts if team parallel and tasking
- // not serialized or if it is a proxy or detachable or hidden helper task
- if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE ||
- flags->hidden_helper ||
- !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+ // TODO: What would be the balance between the conditions in the function and
+ // an atomic operation?
+ if (__kmp_track_children_task(taskdata)) {
KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
if (parent_task->td_taskgroup)
KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
@@ -1438,10 +1445,12 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
size_t sizeof_shareds,
kmp_routine_entry_t task_entry,
kmp_int64 device_id) {
- if (__kmp_enable_hidden_helper) {
- auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
+ auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
+ // target task is untied defined in the specification
+ input_flags.tiedness = TASK_UNTIED;
+
+ if (__kmp_enable_hidden_helper)
input_flags.hidden_helper = TRUE;
- }
return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
sizeof_shareds, task_entry);
@@ -1612,13 +1621,15 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
#endif
+ if (task->routine != NULL) {
#ifdef KMP_GOMP_COMPAT
- if (taskdata->td_flags.native) {
- ((void (*)(void *))(*(task->routine)))(task->shareds);
- } else
+ if (taskdata->td_flags.native) {
+ ((void (*)(void *))(*(task->routine)))(task->shareds);
+ } else
#endif /* KMP_GOMP_COMPAT */
- {
- (*(task->routine))(gtid, task);
+ {
+ (*(task->routine))(gtid, task);
+ }
}
KMP_POP_PARTITIONED_TIMER();
@@ -2832,15 +2843,14 @@ static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
// We need to un-mark this victim as a finished victim. This must be done
// before releasing the lock, or else other threads (starting with the
// primary thread victim) might be prematurely released from the barrier!!!
- kmp_int32 count;
-
- count = KMP_ATOMIC_INC(unfinished_threads);
-
+#if KMP_DEBUG
+ kmp_int32 count =
+#endif
+ KMP_ATOMIC_INC(unfinished_threads);
KA_TRACE(
20,
("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
gtid, count + 1, task_team));
-
*thread_finished = FALSE;
}
TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
@@ -2947,8 +2957,7 @@ static inline int __kmp_execute_tasks_template(
(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
NULL)) {
asleep = 1;
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
- other_thread->th.th_sleep_loc);
+ __kmp_null_resume_wrapper(other_thread);
// A sleeping thread should not have any tasks on it's queue.
// There is a slight possibility that it resumes, steals a task
// from another thread, which spawns more tasks, all in the time
@@ -3033,9 +3042,10 @@ static inline int __kmp_execute_tasks_template(
// done. This decrement might be to the spin location, and result in the
// termination condition being satisfied.
if (!*thread_finished) {
- kmp_int32 count;
-
- count = KMP_ATOMIC_DEC(unfinished_threads) - 1;
+#if KMP_DEBUG
+ kmp_int32 count = -1 +
+#endif
+ KMP_ATOMIC_DEC(unfinished_threads);
KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
"unfinished_threads to %d task_team=%p\n",
gtid, count, task_team));
@@ -3097,6 +3107,16 @@ int __kmp_execute_tasks_64(
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
+template <bool C, bool S>
+int __kmp_atomic_execute_tasks_64(
+ kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
+ int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_template(
+ thread, gtid, flag, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
int __kmp_execute_tasks_oncore(
kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
@@ -3123,6 +3143,14 @@ template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
int *USE_ITT_BUILD_ARG(void *),
kmp_int32);
+template int __kmp_atomic_execute_tasks_64<false, true>(
+ kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
+ int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
+template int __kmp_atomic_execute_tasks_64<true, false>(
+ kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
+ int *USE_ITT_BUILD_ARG(void *), kmp_int32);
+
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
// next barrier so they can assist in executing enqueued tasks.
// First thread in allocates the task team atomically.
@@ -3161,7 +3189,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
// tasks and execute them. In extra barrier mode, tasks do not sleep
// at the separate tasking barrier, so this isn't a problem.
for (i = 0; i < nthreads; i++) {
- volatile void *sleep_loc;
+ void *sleep_loc;
kmp_info_t *thread = threads_data[i].td.td_thr;
if (i == this_thr->th.th_info.ds.ds_tid) {
@@ -3178,7 +3206,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+ __kmp_null_resume_wrapper(thread);
} else {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),
@@ -3546,7 +3574,7 @@ void __kmp_wait_to_unref_task_teams(void) {
__kmp_gtid_from_thread(thread)));
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- volatile void *sleep_loc;
+ void *sleep_loc;
// If the thread is sleeping, awaken it.
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {
@@ -3554,7 +3582,7 @@ void __kmp_wait_to_unref_task_teams(void) {
10,
("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+ __kmp_null_resume_wrapper(thread);
}
}
}
@@ -3868,11 +3896,12 @@ static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
}
static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+#if KMP_DEBUG
kmp_int32 children = 0;
-
// Predecrement simulated by "- 1" calculation
- children =
- KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
+ children = -1 +
+#endif
+ KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
KMP_DEBUG_ASSERT(children >= 0);
// Remove the imaginary children
@@ -3935,7 +3964,7 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
// This should be similar to start_k = __kmp_get_random( thread ) % nthreads
// but we cannot use __kmp_get_random here
- kmp_int32 start_k = start;
+ kmp_int32 start_k = start % nthreads;
kmp_int32 pass = 1;
kmp_int32 k = start_k;
diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp
index c4bfead9d0d6..48d31e5ee7b7 100644
--- a/openmp/runtime/src/kmp_utility.cpp
+++ b/openmp/runtime/src/kmp_utility.cpp
@@ -129,7 +129,7 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
p->initialized = 1;
- p->sse2 = 1; // Assume SSE2 by default.
+ p->flags.sse2 = 1; // Assume SSE2 by default.
__kmp_x86_cpuid(0, 0, &buf);
@@ -169,7 +169,7 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
data[i] = (t & 0xff);
}
- p->sse2 = (buf.edx >> 26) & 1;
+ p->flags.sse2 = (buf.edx >> 26) & 1;
#ifdef KMP_DEBUG
@@ -247,15 +247,21 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
i, buf.eax, buf.ebx, buf.ecx, buf.edx));
}
#endif
-#if KMP_USE_ADAPTIVE_LOCKS
- p->rtm = 0;
+ p->flags.rtm = 0;
+ p->flags.hybrid = 0;
if (max_arg > 7) {
/* RTM bit CPUID.07:EBX, bit 11 */
+ /* HYRBID bit CPUID.07:EDX, bit 15 */
__kmp_x86_cpuid(7, 0, &buf);
- p->rtm = (buf.ebx >> 11) & 1;
- KA_TRACE(trace_level, (" RTM"));
+ p->flags.rtm = (buf.ebx >> 11) & 1;
+ p->flags.hybrid = (buf.edx >> 15) & 1;
+ if (p->flags.rtm) {
+ KA_TRACE(trace_level, (" RTM"));
+ }
+ if (p->flags.hybrid) {
+ KA_TRACE(trace_level, (" HYBRID"));
+ }
}
-#endif
}
{ // Parse CPU brand string for frequency, saving the string for later.
diff --git a/openmp/runtime/src/kmp_wait_release.cpp b/openmp/runtime/src/kmp_wait_release.cpp
index cabb5722f4dc..d41ddf231e3f 100644
--- a/openmp/runtime/src/kmp_wait_release.cpp
+++ b/openmp/runtime/src/kmp_wait_release.cpp
@@ -33,6 +33,10 @@ template <bool C, bool S>
void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_mwait_template(th_gtid, flag);
}
+template <bool C, bool S>
+void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
+ __kmp_mwait_template(th_gtid, flag);
+}
void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_mwait_template(th_gtid, flag);
}
@@ -40,4 +44,8 @@ void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);
+template void
+__kmp_atomic_mwait_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
+template void
+__kmp_atomic_mwait_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
#endif
diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index d528ce9f1801..226150dfb781 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -33,96 +33,288 @@ higher level operations such as barriers and fork/join.
@{
*/
-/*!
- * The flag_type describes the storage used for the flag.
- */
-enum flag_type {
- flag32, /**< 32 bit flags */
- flag64, /**< 64 bit flags */
- flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
-};
-
struct flag_properties {
unsigned int type : 16;
unsigned int reserved : 16;
};
-/*!
- * Base class for wait/release volatile flag
- */
-template <typename P> class kmp_flag_native {
- volatile P *loc;
- flag_properties t;
+template <enum flag_type FlagType> struct flag_traits {};
+
+template <> struct flag_traits<flag32> {
+ typedef kmp_uint32 flag_t;
+ static const flag_type t = flag32;
+ static inline flag_t tcr(flag_t f) { return TCR_4(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR32(f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND32(f, v);
+ }
+};
+
+template <> struct flag_traits<atomic_flag64> {
+ typedef kmp_uint64 flag_t;
+ static const flag_type t = atomic_flag64;
+ static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR64(f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND64(f, v);
+ }
+};
+
+template <> struct flag_traits<flag64> {
+ typedef kmp_uint64 flag_t;
+ static const flag_type t = flag64;
+ static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR64(f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND64(f, v);
+ }
+};
+
+template <> struct flag_traits<flag_oncore> {
+ typedef kmp_uint64 flag_t;
+ static const flag_type t = flag_oncore;
+ static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR64(f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND64(f, v);
+ }
+};
+
+/*! Base class for all flags */
+template <flag_type FlagType> class kmp_flag {
+protected:
+ flag_properties t; /**< "Type" of the flag in loc */
+ kmp_info_t *waiting_threads[1]; /**< Threads sleeping on this thread. */
+ kmp_uint32 num_waiting_threads; /**< #threads sleeping on this thread. */
+ std::atomic<bool> *sleepLoc;
public:
- typedef P flag_t;
- kmp_flag_native(volatile P *p, flag_type ft)
- : loc(p), t({(short unsigned int)ft, 0U}) {}
- volatile P *get() { return loc; }
- void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
- void set(volatile P *new_loc) { loc = new_loc; }
+ typedef flag_traits<FlagType> traits_type;
+ kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
+ kmp_flag(int nwaiters)
+ : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
+ kmp_flag(std::atomic<bool> *sloc)
+ : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
+ /*! @result the flag_type */
flag_type get_type() { return (flag_type)(t.type); }
- P load() { return *loc; }
- void store(P val) { *loc = val; }
+
+ /*! param i in index into waiting_threads
+ * @result the thread that is waiting at index i */
+ kmp_info_t *get_waiter(kmp_uint32 i) {
+ KMP_DEBUG_ASSERT(i < num_waiting_threads);
+ return waiting_threads[i];
+ }
+ /*! @result num_waiting_threads */
+ kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+ /*! @param thr in the thread which is now waiting
+ * Insert a waiting thread at index 0. */
+ void set_waiter(kmp_info_t *thr) {
+ waiting_threads[0] = thr;
+ num_waiting_threads = 1;
+ }
+ enum barrier_type get_bt() { return bs_last_barrier; }
};
-/*!
- * Base class for wait/release atomic flag
- */
-template <typename P> class kmp_flag {
- std::atomic<P>
- *loc; /**< Pointer to the flag storage that is modified by another thread
- */
- flag_properties t; /**< "Type" of the flag in loc */
+/*! Base class for wait/release volatile flag */
+template <typename PtrType, flag_type FlagType, bool Sleepable>
+class kmp_flag_native : public kmp_flag<FlagType> {
+protected:
+ volatile PtrType *loc;
+ PtrType checker; /**< When flag==checker, it has been released. */
+ typedef flag_traits<FlagType> traits_type;
+
public:
- typedef P flag_t;
- kmp_flag(std::atomic<P> *p, flag_type ft)
- : loc(p), t({(short unsigned int)ft, 0U}) {}
- /*!
- * @result the pointer to the actual flag
- */
- std::atomic<P> *get() { return loc; }
- /*!
- * @result void* pointer to the actual flag
- */
+ typedef PtrType flag_t;
+ kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
+ kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
+ : kmp_flag<FlagType>(1), loc(p) {
+ this->waiting_threads[0] = thr;
+ }
+ kmp_flag_native(volatile PtrType *p, PtrType c)
+ : kmp_flag<FlagType>(), loc(p), checker(c) {}
+ kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
+ : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
+ virtual ~kmp_flag_native() {}
+ void *operator new(size_t size) { return __kmp_allocate(size); }
+ void operator delete(void *p) { __kmp_free(p); }
+ volatile PtrType *get() { return loc; }
+ void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
+ void set(volatile PtrType *new_loc) { loc = new_loc; }
+ PtrType load() { return *loc; }
+ void store(PtrType val) { *loc = val; }
+ /*! @result true if the flag object has been released. */
+ virtual bool done_check() {
+ if (Sleepable && !(this->sleepLoc))
+ return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
+ checker;
+ else
+ return traits_type::tcr(*(this->get())) == checker;
+ }
+ /*! @param old_loc in old value of flag
+ * @result true if the flag's old value indicates it was released. */
+ virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
+ /*! @result true if the flag object is not yet released.
+ * Used in __kmp_wait_template like:
+ * @code
+ * while (flag.notdone_check()) { pause(); }
+ * @endcode */
+ virtual bool notdone_check() {
+ return traits_type::tcr(*(this->get())) != checker;
+ }
+ /*! @result Actual flag value before release was applied.
+ * Trigger all waiting threads to run by modifying flag to release state. */
+ void internal_release() {
+ (void)traits_type::test_then_add4((volatile PtrType *)this->get());
+ }
+ /*! @result Actual flag value before sleep bit(s) set.
+ * Notes that there is at least one thread sleeping on the flag by setting
+ * sleep bit(s). */
+ PtrType set_sleeping() {
+ if (this->sleepLoc) {
+ this->sleepLoc->store(true);
+ return *(this->get());
+ }
+ return traits_type::test_then_or((volatile PtrType *)this->get(),
+ KMP_BARRIER_SLEEP_STATE);
+ }
+ /*! @result Actual flag value before sleep bit(s) cleared.
+ * Notes that there are no longer threads sleeping on the flag by clearing
+ * sleep bit(s). */
+ void unset_sleeping() {
+ if (this->sleepLoc) {
+ this->sleepLoc->store(false);
+ return;
+ }
+ traits_type::test_then_and((volatile PtrType *)this->get(),
+ ~KMP_BARRIER_SLEEP_STATE);
+ }
+ /*! @param old_loc in old value of flag
+ * Test if there are threads sleeping on the flag's old value in old_loc. */
+ bool is_sleeping_val(PtrType old_loc) {
+ if (this->sleepLoc)
+ return this->sleepLoc->load();
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ /*! Test whether there are threads sleeping on the flag. */
+ bool is_sleeping() {
+ if (this->sleepLoc)
+ return this->sleepLoc->load();
+ return is_sleeping_val(*(this->get()));
+ }
+ bool is_any_sleeping() {
+ if (this->sleepLoc)
+ return this->sleepLoc->load();
+ return is_sleeping_val(*(this->get()));
+ }
+ kmp_uint8 *get_stolen() { return NULL; }
+};
+
+/*! Base class for wait/release atomic flag */
+template <typename PtrType, flag_type FlagType, bool Sleepable>
+class kmp_flag_atomic : public kmp_flag<FlagType> {
+protected:
+ std::atomic<PtrType> *loc; /**< Pointer to flag location to wait on */
+ PtrType checker; /**< Flag == checker means it has been released. */
+public:
+ typedef flag_traits<FlagType> traits_type;
+ typedef PtrType flag_t;
+ kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
+ kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
+ : kmp_flag<FlagType>(1), loc(p) {
+ this->waiting_threads[0] = thr;
+ }
+ kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
+ : kmp_flag<FlagType>(), loc(p), checker(c) {}
+ kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
+ : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
+ /*! @result the pointer to the actual flag */
+ std::atomic<PtrType> *get() { return loc; }
+ /*! @result void* pointer to the actual flag */
void *get_void_p() { return RCAST(void *, loc); }
- /*!
- * @param new_loc in set loc to point at new_loc
- */
- void set(std::atomic<P> *new_loc) { loc = new_loc; }
- /*!
- * @result the flag_type
- */
- flag_type get_type() { return (flag_type)(t.type); }
- /*!
- * @result flag value
- */
- P load() { return loc->load(std::memory_order_acquire); }
- /*!
- * @param val the new flag value to be stored
- */
- void store(P val) { loc->store(val, std::memory_order_release); }
- // Derived classes must provide the following:
- /*
- kmp_info_t * get_waiter(kmp_uint32 i);
- kmp_uint32 get_num_waiters();
- bool done_check();
- bool done_check_val(P old_loc);
- bool notdone_check();
- P internal_release();
- void suspend(int th_gtid);
- void mwait(int th_gtid);
- void resume(int th_gtid);
- P set_sleeping();
- P unset_sleeping();
- bool is_sleeping();
- bool is_any_sleeping();
- bool is_sleeping_val(P old_loc);
- int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
- int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
- is_constrained);
- */
+ /*! @param new_loc in set loc to point at new_loc */
+ void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
+ /*! @result flag value */
+ PtrType load() { return loc->load(std::memory_order_acquire); }
+ /*! @param val the new flag value to be stored */
+ void store(PtrType val) { loc->store(val, std::memory_order_release); }
+ /*! @result true if the flag object has been released. */
+ bool done_check() {
+ if (Sleepable && !(this->sleepLoc))
+ return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
+ else
+ return this->load() == checker;
+ }
+ /*! @param old_loc in old value of flag
+ * @result true if the flag's old value indicates it was released. */
+ bool done_check_val(PtrType old_loc) { return old_loc == checker; }
+ /*! @result true if the flag object is not yet released.
+ * Used in __kmp_wait_template like:
+ * @code
+ * while (flag.notdone_check()) { pause(); }
+ * @endcode */
+ bool notdone_check() { return this->load() != checker; }
+ /*! @result Actual flag value before release was applied.
+ * Trigger all waiting threads to run by modifying flag to release state. */
+ void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
+ /*! @result Actual flag value before sleep bit(s) set.
+ * Notes that there is at least one thread sleeping on the flag by setting
+ * sleep bit(s). */
+ PtrType set_sleeping() {
+ if (this->sleepLoc) {
+ this->sleepLoc->store(true);
+ return *(this->get());
+ }
+ return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
+ }
+ /*! @result Actual flag value before sleep bit(s) cleared.
+ * Notes that there are no longer threads sleeping on the flag by clearing
+ * sleep bit(s). */
+ void unset_sleeping() {
+ if (this->sleepLoc) {
+ this->sleepLoc->store(false);
+ return;
+ }
+ KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
+ }
+ /*! @param old_loc in old value of flag
+ * Test whether there are threads sleeping on flag's old value in old_loc. */
+ bool is_sleeping_val(PtrType old_loc) {
+ if (this->sleepLoc)
+ return this->sleepLoc->load();
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ /*! Test whether there are threads sleeping on the flag. */
+ bool is_sleeping() {
+ if (this->sleepLoc)
+ return this->sleepLoc->load();
+ return is_sleeping_val(this->load());
+ }
+ bool is_any_sleeping() {
+ if (this->sleepLoc)
+ return this->sleepLoc->load();
+ return is_sleeping_val(this->load());
+ }
+ kmp_uint8 *get_stolen() { return NULL; }
};
#if OMPT_SUPPORT
@@ -264,8 +456,9 @@ final_spin=FALSE)
ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
- ompt_lw_taskteam_t *team =
- this_thr->th.th_team->t.ompt_serialized_team_info;
+ ompt_lw_taskteam_t *team = NULL;
+ if (this_thr->th.th_team)
+ team = this_thr->th.th_team->t.ompt_serialized_team_info;
if (team) {
tId = &(team->ompt_task_info.task_data);
} else {
@@ -340,11 +533,11 @@ final_spin=FALSE)
disabled (KMP_TASKING=0). */
if (task_team != NULL) {
if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team))
+ if (KMP_TASKING_ENABLED(task_team)) {
flag->execute_tasks(
this_thr, th_gtid, final_spin,
&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- else
+ } else
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
} else {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
@@ -557,6 +750,7 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) {
else {
// if flag changes here, wake-up happens immediately
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
+ th->th.th_sleep_loc_type = flag->get_type();
__kmp_unlock_suspend_mx(th);
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
#if KMP_HAVE_UMWAIT
@@ -574,6 +768,7 @@ static inline void __kmp_mwait_template(int th_gtid, C *flag) {
if (flag->is_sleeping())
flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
}
// Mark thread as active again
th->th.th_active = TRUE;
@@ -624,251 +819,15 @@ template <class C> static inline void __kmp_release_template(C *flag) {
}
}
-template <typename FlagType> struct flag_traits {};
-
-template <> struct flag_traits<kmp_uint32> {
- typedef kmp_uint32 flag_t;
- static const flag_type t = flag32;
- static inline flag_t tcr(flag_t f) { return TCR_4(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) {
- return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
- }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_OR32(f, v);
- }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_AND32(f, v);
- }
-};
-
-template <> struct flag_traits<kmp_uint64> {
- typedef kmp_uint64 flag_t;
- static const flag_type t = flag64;
- static inline flag_t tcr(flag_t f) { return TCR_8(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) {
- return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
- }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_OR64(f, v);
- }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
- return KMP_TEST_THEN_AND64(f, v);
- }
-};
-
-// Basic flag that does not use C11 Atomics
-template <typename FlagType, bool Sleepable>
-class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
- typedef flag_traits<FlagType> traits_type;
- FlagType checker; /**< Value to compare flag to to check if flag has been
- released. */
- kmp_info_t
- *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
- kmp_uint32
- num_waiting_threads; /**< Number of threads sleeping on this thread. */
-public:
- kmp_basic_flag_native(volatile FlagType *p)
- : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
- kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
- : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
- waiting_threads[0] = thr;
- }
- kmp_basic_flag_native(volatile FlagType *p, FlagType c)
- : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
- num_waiting_threads(0) {}
- /*!
- * param i in index into waiting_threads
- * @result the thread that is waiting at index i
- */
- kmp_info_t *get_waiter(kmp_uint32 i) {
- KMP_DEBUG_ASSERT(i < num_waiting_threads);
- return waiting_threads[i];
- }
- /*!
- * @result num_waiting_threads
- */
- kmp_uint32 get_num_waiters() { return num_waiting_threads; }
- /*!
- * @param thr in the thread which is now waiting
- *
- * Insert a waiting thread at index 0.
- */
- void set_waiter(kmp_info_t *thr) {
- waiting_threads[0] = thr;
- num_waiting_threads = 1;
- }
- /*!
- * @result true if the flag object has been released.
- */
- bool done_check() {
- if (Sleepable)
- return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
- checker;
- else
- return traits_type::tcr(*(this->get())) == checker;
- }
- /*!
- * @param old_loc in old value of flag
- * @result true if the flag's old value indicates it was released.
- */
- bool done_check_val(FlagType old_loc) { return old_loc == checker; }
- /*!
- * @result true if the flag object is not yet released.
- * Used in __kmp_wait_template like:
- * @code
- * while (flag.notdone_check()) { pause(); }
- * @endcode
- */
- bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
- /*!
- * @result Actual flag value before release was applied.
- * Trigger all waiting threads to run by modifying flag to release state.
- */
- void internal_release() {
- (void)traits_type::test_then_add4((volatile FlagType *)this->get());
- }
- /*!
- * @result Actual flag value before sleep bit(s) set.
- * Notes that there is at least one thread sleeping on the flag by setting
- * sleep bit(s).
- */
- FlagType set_sleeping() {
- return traits_type::test_then_or((volatile FlagType *)this->get(),
- KMP_BARRIER_SLEEP_STATE);
- }
- /*!
- * @result Actual flag value before sleep bit(s) cleared.
- * Notes that there are no longer threads sleeping on the flag by clearing
- * sleep bit(s).
- */
- FlagType unset_sleeping() {
- return traits_type::test_then_and((volatile FlagType *)this->get(),
- ~KMP_BARRIER_SLEEP_STATE);
- }
- /*!
- * @param old_loc in old value of flag
- * Test whether there are threads sleeping on the flag's old value in old_loc.
- */
- bool is_sleeping_val(FlagType old_loc) {
- return old_loc & KMP_BARRIER_SLEEP_STATE;
- }
- /*!
- * Test whether there are threads sleeping on the flag.
- */
- bool is_sleeping() { return is_sleeping_val(*(this->get())); }
- bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
- kmp_uint8 *get_stolen() { return NULL; }
- enum barrier_type get_bt() { return bs_last_barrier; }
-};
-
-template <typename FlagType, bool Sleepable>
-class kmp_basic_flag : public kmp_flag<FlagType> {
- typedef flag_traits<FlagType> traits_type;
- FlagType checker; /**< Value to compare flag to to check if flag has been
- released. */
- kmp_info_t
- *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
- kmp_uint32
- num_waiting_threads; /**< Number of threads sleeping on this thread. */
-public:
- kmp_basic_flag(std::atomic<FlagType> *p)
- : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
- kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
- : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
- waiting_threads[0] = thr;
- }
- kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
- : kmp_flag<FlagType>(p, traits_type::t), checker(c),
- num_waiting_threads(0) {}
- /*!
- * param i in index into waiting_threads
- * @result the thread that is waiting at index i
- */
- kmp_info_t *get_waiter(kmp_uint32 i) {
- KMP_DEBUG_ASSERT(i < num_waiting_threads);
- return waiting_threads[i];
- }
- /*!
- * @result num_waiting_threads
- */
- kmp_uint32 get_num_waiters() { return num_waiting_threads; }
- /*!
- * @param thr in the thread which is now waiting
- *
- * Insert a waiting thread at index 0.
- */
- void set_waiter(kmp_info_t *thr) {
- waiting_threads[0] = thr;
- num_waiting_threads = 1;
- }
- /*!
- * @result true if the flag object has been released.
- */
- bool done_check() {
- if (Sleepable)
- return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
- else
- return this->load() == checker;
- }
- /*!
- * @param old_loc in old value of flag
- * @result true if the flag's old value indicates it was released.
- */
- bool done_check_val(FlagType old_loc) { return old_loc == checker; }
- /*!
- * @result true if the flag object is not yet released.
- * Used in __kmp_wait_template like:
- * @code
- * while (flag.notdone_check()) { pause(); }
- * @endcode
- */
- bool notdone_check() { return this->load() != checker; }
- /*!
- * @result Actual flag value before release was applied.
- * Trigger all waiting threads to run by modifying flag to release state.
- */
- void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
- /*!
- * @result Actual flag value before sleep bit(s) set.
- * Notes that there is at least one thread sleeping on the flag by setting
- * sleep bit(s).
- */
- FlagType set_sleeping() {
- return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
- }
- /*!
- * @result Actual flag value before sleep bit(s) cleared.
- * Notes that there are no longer threads sleeping on the flag by clearing
- * sleep bit(s).
- */
- FlagType unset_sleeping() {
- return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
- }
- /*!
- * @param old_loc in old value of flag
- * Test whether there are threads sleeping on the flag's old value in old_loc.
- */
- bool is_sleeping_val(FlagType old_loc) {
- return old_loc & KMP_BARRIER_SLEEP_STATE;
- }
- /*!
- * Test whether there are threads sleeping on the flag.
- */
- bool is_sleeping() { return is_sleeping_val(this->load()); }
- bool is_any_sleeping() { return is_sleeping_val(this->load()); }
- kmp_uint8 *get_stolen() { return NULL; }
- enum barrier_type get_bt() { return bs_last_barrier; }
-};
-
template <bool Cancellable, bool Sleepable>
-class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
+class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
public:
kmp_flag_32(std::atomic<kmp_uint32> *p)
- : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
+ : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
- : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
+ : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
- : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
+ : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
@@ -895,14 +854,16 @@ public:
};
template <bool Cancellable, bool Sleepable>
-class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
+class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
public:
kmp_flag_64(volatile kmp_uint64 *p)
- : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
+ : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
- : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
+ : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
- : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
+ : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
+ kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
+ : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
@@ -928,20 +889,52 @@ public:
flag_type get_ptr_type() { return flag64; }
};
+template <bool Cancellable, bool Sleepable>
+class kmp_atomic_flag_64
+ : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
+public:
+ kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
+ : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
+ kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
+ : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
+ kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
+ : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
+ kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
+ std::atomic<bool> *loc)
+ : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
+ void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
+ void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
+ void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
+ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_atomic_execute_tasks_64(
+ this_thr, gtid, this, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+ }
+ bool wait(kmp_info_t *this_thr,
+ int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ if (final_spin)
+ return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
+ Sleepable>(
+ this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
+ else
+ return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
+ Sleepable>(
+ this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
+ }
+ void release() { __kmp_release_template(this); }
+ flag_type get_ptr_type() { return atomic_flag64; }
+};
+
// Hierarchical 64-bit on-core barrier instantiation
-class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
- kmp_uint64 checker;
- kmp_info_t *waiting_threads[1];
- kmp_uint32 num_waiting_threads;
- kmp_uint32
- offset; /**< Portion of flag that is of interest for an operation. */
+class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
+ kmp_uint32 offset; /**< Portion of flag of interest for an operation. */
bool flag_switch; /**< Indicates a switch in flag location. */
enum barrier_type bt; /**< Barrier type. */
- kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
- location. */
+ kmp_info_t *this_thr; /**< Thread to redirect to different flag location. */
#if USE_ITT_BUILD
- void *
- itt_sync_obj; /**< ITT object that must be passed to new flag location. */
+ void *itt_sync_obj; /**< ITT object to pass to new flag location. */
#endif
unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
@@ -949,31 +942,26 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
public:
kmp_flag_oncore(volatile kmp_uint64 *p)
- : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
- flag_switch(false) {}
+ : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
+ }
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
- : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
- offset(idx), flag_switch(false) {}
+ : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
+ flag_switch(false),
+ bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
enum barrier_type bar_t,
kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
- : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
- num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
+ : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
+ flag_switch(false), bt(bar_t),
this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
- kmp_info_t *get_waiter(kmp_uint32 i) {
- KMP_DEBUG_ASSERT(i < num_waiting_threads);
- return waiting_threads[i];
- }
- kmp_uint32 get_num_waiters() { return num_waiting_threads; }
- void set_waiter(kmp_info_t *thr) {
- waiting_threads[0] = thr;
- num_waiting_threads = 1;
- }
- bool done_check_val(kmp_uint64 old_loc) {
+ virtual ~kmp_flag_oncore() override {}
+ void *operator new(size_t size) { return __kmp_allocate(size); }
+ void operator delete(void *p) { __kmp_free(p); }
+ bool done_check_val(kmp_uint64 old_loc) override {
return byteref(&old_loc, offset) == checker;
}
- bool done_check() { return done_check_val(*get()); }
- bool notdone_check() {
+ bool done_check() override { return done_check_val(*get()); }
+ bool notdone_check() override {
// Calculate flag_switch
if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
flag_switch = true;
@@ -997,17 +985,6 @@ public:
KMP_TEST_THEN_OR64(get(), mask);
}
}
- kmp_uint64 set_sleeping() {
- return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
- }
- kmp_uint64 unset_sleeping() {
- return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
- }
- bool is_sleeping_val(kmp_uint64 old_loc) {
- return old_loc & KMP_BARRIER_SLEEP_STATE;
- }
- bool is_sleeping() { return is_sleeping_val(*get()); }
- bool is_any_sleeping() { return is_sleeping_val(*get()); }
void wait(kmp_info_t *this_thr, int final_spin) {
if (final_spin)
__kmp_wait_template<kmp_flag_oncore, TRUE>(
@@ -1038,27 +1015,39 @@ public:
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
#endif
}
- kmp_uint8 *get_stolen() { return NULL; }
enum barrier_type get_bt() { return bt; }
flag_type get_ptr_type() { return flag_oncore; }
};
-// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
-// associated with int gtid.
-static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
+static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
+ int gtid = __kmp_gtid_from_thread(thr);
+ void *flag = CCAST(void *, thr->th.th_sleep_loc);
+ flag_type type = thr->th.th_sleep_loc_type;
if (!flag)
return;
-
- switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
+ // Attempt to wake up a thread: examine its type and call appropriate template
+ switch (type) {
case flag32:
- __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
+ __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
break;
case flag64:
- __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
+ __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
+ break;
+ case atomic_flag64:
+ __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
break;
case flag_oncore:
- __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
+ __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
+ break;
+#ifdef KMP_DEBUG
+ case flag_unset:
+ KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
break;
+ default:
+ KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
+ "known flag type\n",
+ type));
+#endif
}
}
diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp
index 3d8ef041f724..c1468c0c322e 100644
--- a/openmp/runtime/src/ompt-general.cpp
+++ b/openmp/runtime/src/ompt-general.cpp
@@ -295,9 +295,16 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
OMPT_VERBOSE_INIT_CONTINUED_PRINT("Success. \n");
OMPT_VERBOSE_INIT_PRINT("Searching for ompt_start_tool in %s... ",
fname);
+ dlerror(); // Clear any existing error
start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool");
if (!start_tool) {
- OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: %s\n", dlerror());
+ char *error = dlerror();
+ if (error != NULL) {
+ OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: %s\n", error);
+ } else {
+ OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: %s\n",
+ "ompt_start_tool = NULL");
+ }
} else
#elif KMP_OS_WINDOWS
OMPT_VERBOSE_INIT_PRINT("Opening %s... ", fname);
diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp
index 1ad0e17ed408..c28b9bd1a660 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -283,10 +283,6 @@ void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
link_lwt->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
*OMPT_CUR_TEAM_INFO(thr) = tmp_team;
- ompt_task_info_t tmp_task = lwt->ompt_task_info;
- link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
- *OMPT_CUR_TASK_INFO(thr) = tmp_task;
-
// link the taskteam into the list of taskteams:
ompt_lw_taskteam_t *my_parent =
thr->th.th_team->t.ompt_serialized_team_info;
@@ -297,6 +293,10 @@ void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
ompd_bp_parallel_begin();
}
#endif
+
+ ompt_task_info_t tmp_task = lwt->ompt_task_info;
+ link_lwt->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
+ *OMPT_CUR_TASK_INFO(thr) = tmp_task;
} else {
// this is the first serialized team, so we just store the values in the
// team and drop the taskteam-object
@@ -313,6 +313,9 @@ void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
if (lwtask) {
+ ompt_task_info_t tmp_task = lwtask->ompt_task_info;
+ lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
+ *OMPT_CUR_TASK_INFO(thr) = tmp_task;
#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP) {
ompd_bp_parallel_end();
@@ -324,10 +327,6 @@ void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
lwtask->ompt_team_info = *OMPT_CUR_TEAM_INFO(thr);
*OMPT_CUR_TEAM_INFO(thr) = tmp_team;
- ompt_task_info_t tmp_task = lwtask->ompt_task_info;
- lwtask->ompt_task_info = *OMPT_CUR_TASK_INFO(thr);
- *OMPT_CUR_TASK_INFO(thr) = tmp_task;
-
if (lwtask->heap) {
__kmp_free(lwtask);
lwtask = NULL;
@@ -365,13 +364,9 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type,
if (team == NULL)
return 0;
ompt_lw_taskteam_t *lwt = NULL,
- *next_lwt = LWT_FROM_TEAM(taskdata->td_team),
- *prev_lwt = NULL;
+ *next_lwt = LWT_FROM_TEAM(taskdata->td_team);
while (ancestor_level > 0) {
- // needed for thread_num
- prev_team = team;
- prev_lwt = lwt;
// next lightweight team (if any)
if (lwt)
lwt = lwt->parent;
@@ -390,6 +385,7 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type,
taskdata = taskdata->td_parent;
if (team == NULL)
return 0;
+ prev_team = team;
team = team->t.t_parent;
if (taskdata) {
next_lwt = LWT_FROM_TEAM(taskdata->td_team);
@@ -431,9 +427,18 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type,
if (thread_num) {
if (level == 0)
*thread_num = __kmp_get_tid();
- else if (prev_lwt)
+ else if (lwt)
*thread_num = 0;
- else
+ else if (!prev_team) {
+ // The innermost parallel region contains at least one explicit task.
+ // The task at level > 0 is either an implicit task that
+ // corresponds to the mentioned region or one of the explicit tasks
+ // nested inside the same region. Note that the task isn't the
+ // innermost explicit tasks (because of condition level > 0).
+ // Since the task at this level still belongs to the innermost parallel
+ // region, thread_num is determined the same way as for level==0.
+ *thread_num = __kmp_get_tid();
+ } else
*thread_num = prev_team->t.t_master_tid;
// *thread_num = team->t.t_master_tid;
}
diff --git a/openmp/runtime/src/thirdparty/ittnotify/LICENSE.txt b/openmp/runtime/src/thirdparty/ittnotify/LICENSE.txt
new file mode 100644
index 000000000000..37478a55f35f
--- /dev/null
+++ b/openmp/runtime/src/thirdparty/ittnotify/LICENSE.txt
@@ -0,0 +1,8 @@
+Copyright (c) 2019 Intel Corporation. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/openmp/runtime/src/thirdparty/ittnotify/disable_warnings.h b/openmp/runtime/src/thirdparty/ittnotify/disable_warnings.h
index e331ffe72950..6499247ee358 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/disable_warnings.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/disable_warnings.h
@@ -1,4 +1,4 @@
-// clang-format off
+
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -11,20 +11,26 @@
#if ITT_PLATFORM == ITT_PLATFORM_WIN
-#pragma warning(disable: 593) /* parameter "XXXX" was set but never used */
-#pragma warning(disable: 344) /* typedef name has already been declared (with
+#if defined _MSC_VER
+
+#pragma warning(disable : 593) /* parameter "XXXX" was set but never used */
+#pragma warning(disable : 344) /* typedef name has already been declared (with \
same type) */
-#pragma warning(disable: 174) /* expression has no effect */
-#pragma warning(disable: 4127) /* conditional expression is constant */
-#pragma warning(disable: 4306) /* conversion from '?' to '?' of greater size */
+#pragma warning(disable : 174) /* expression has no effect */
+#pragma warning(disable : 4127) /* conditional expression is constant */
+#pragma warning( \
+ disable : 4306) /* conversion from '?' to '?' of greater size */
+
+#endif /* _MSC_VER */
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#if defined __INTEL_COMPILER
-#pragma warning(disable: 869) /* parameter "XXXXX" was never referenced */
-#pragma warning(disable: 1418) /* external function definition with no prior
- declaration */
-#pragma warning(disable: 1419) /* external declaration in primary source file */
+#pragma warning(disable : 869) /* parameter "XXXXX" was never referenced */
+#pragma warning(disable : 1418) /* external function definition with no prior \
+ declaration */
+#pragma warning( \
+ disable : 1419) /* external declaration in primary source file */
#endif /* __INTEL_COMPILER */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
index e1eee8cde613..10616bbde67b 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
@@ -15,7 +15,8 @@
@brief Public User API functions and types
@mainpage
-The ITT API is used to annotate a user's program with additional information
+The Instrumentation and Tracing Technology API (ITT API) is used to
+annotate a user's program with additional information
that can be used by correctness and performance tools. The user inserts
calls in their program. Those calls generate information that is collected
at runtime, and used by Intel(R) Threading Tools.
@@ -180,7 +181,13 @@ The same ID may not be reused for different instances, unless a previous
#if ITT_PLATFORM == ITT_PLATFORM_WIN
/* use __forceinline (VC++ specific) */
-#define ITT_INLINE __forceinline
+#if defined(__MINGW32__) && !defined(__cplusplus)
+#define ITT_INLINE \
+ static __inline__ __attribute__((__always_inline__, __gnu_inline__))
+#else
+#define ITT_INLINE static __forceinline
+#endif /* __MINGW32__ */
+
#define ITT_INLINE_ATTRIBUTE /* nothing */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/*
@@ -373,6 +380,91 @@ ITT_STUBV(ITTAPI, void, detach, (void))
/** @endcond */
/**
+ * @defgroup Intel Processor Trace control
+ * API from this group provides control over collection and analysis of Intel
+ * Processor Trace (Intel PT) data Information about Intel Processor Trace
+ * technology can be found here (Volume 3 chapter 35):
+ * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
+ * Use this API to mark particular code regions for loading detailed performance
+ * statistics. This mode makes your analysis faster and more accurate.
+ * @{
+ */
+typedef unsigned char __itt_pt_region;
+
+/**
+ * @brief function saves a region name marked with Intel PT API and returns a
+ * region id. Only 7 names can be registered. Attempts to register more names
+ * will be ignored and a region id with auto names will be returned. For
+ * automatic naming of regions pass NULL as function parameter
+ */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+__itt_pt_region ITTAPI __itt_pt_region_createA(const char *name);
+__itt_pt_region ITTAPI __itt_pt_region_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+#define __itt_pt_region_create __itt_pt_region_createW
+#else /* UNICODE */
+#define __itt_pt_region_create __itt_pt_region_createA
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_pt_region ITTAPI __itt_pt_region_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#define __itt_pt_region_createA ITTNOTIFY_DATA(pt_region_createA)
+#define __itt_pt_region_createA_ptr ITTNOTIFY_NAME(pt_region_createA)
+#define __itt_pt_region_createW ITTNOTIFY_DATA(pt_region_createW)
+#define __itt_pt_region_createW_ptr ITTNOTIFY_NAME(pt_region_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create ITTNOTIFY_DATA(pt_region_create)
+#define __itt_pt_region_create_ptr ITTNOTIFY_NAME(pt_region_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#define __itt_pt_region_createA(name) (__itt_pt_region)0
+#define __itt_pt_region_createA_ptr 0
+#define __itt_pt_region_createW(name) (__itt_pt_region)0
+#define __itt_pt_region_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create(name) (__itt_pt_region)0
+#define __itt_pt_region_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#define __itt_pt_region_createA_ptr 0
+#define __itt_pt_region_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief function contains a special code pattern identified on the
+ * post-processing stage and marks the beginning of a code region targeted for
+ * Intel PT analysis
+ * @param[in] region - region id, 0 <= region < 8
+ */
+void __itt_mark_pt_region_begin(__itt_pt_region region);
+/**
+ * @brief function contains a special code pattern identified on the
+ * post-processing stage and marks the end of a code region targeted for Intel
+ * PT analysis
+ * @param[in] region - region id, 0 <= region < 8
+ */
+void __itt_mark_pt_region_end(__itt_pt_region region);
+/** @} Intel PT control group*/
+
+/**
* @defgroup threads Threads
* @ingroup public
* Give names to threads
@@ -467,11 +559,9 @@ ITT_STUBV(ITTAPI, void, thread_ignore, (void))
* @{
*/
-// clang-format off
-/*****************************************************************//**
+/*********************************************************************
* @name group of functions used for error suppression in correctness tools
*********************************************************************/
-// clang-format on
/** @{ */
/**
* @hideinitializer
@@ -765,11 +855,9 @@ ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
-// clang-format off
-/*****************************************************************//**
+/*********************************************************************
* @name group of functions is used for performance measurement tools
*********************************************************************/
-// clang-format on
/** @{ */
/**
* @brief Enter spin loop on user-defined sync object
@@ -855,11 +943,9 @@ ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
/** @} sync group */
-// clang-format off
-/**************************************************************//**
+/******************************************************************
* @name group of functions is used for correctness checking tools
******************************************************************/
-// clang-format on
/** @{ */
/**
* @ingroup legacy
@@ -2449,7 +2535,7 @@ ITT_STUBV(ITTAPI, void, marker,
* name of the metadata), and a value (the actual data). The encoding of
* the value depends on the type of the metadata.
*
- * The type of metadata is specified by an enumerated type __itt_metadata_type.
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
* @{
*/
@@ -3530,7 +3616,7 @@ ITT_STUBV(ITTAPI, void, relation_add_ex,
ITTNOTIFY_VOID_D5(relation_add_ex, d, x, y, z, a, b)
#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex)
#else /* INTEL_NO_ITTNOTIFY_API */
-#define __itt_relation_add_to_current_ex(domain, clock_domain, timestamp, \
+#define __itt_relation_add_to_current_ex(domain, clock_domain, timestame, \
relation, tail)
#define __itt_relation_add_to_current_ex_ptr 0
#define __itt_relation_add_ex(domain, clock_domain, timestamp, head, relation, \
@@ -3903,11 +3989,13 @@ ITT_STUBV(ITTAPI, void, enable_attach, (void))
/** @endcond */
/**
- * @brief Module load info
- * This API is used to report necessary information in case of module relocation
- * @param[in] start_addr - relocated module start address
- * @param[in] end_addr - relocated module end address
- * @param[in] path - file system path to the module
+ * @brief Module load notification
+ * This API is used to report necessary information in case of bypassing default
+ * system loader. Notification should be done immidiatelly after this module is
+ * loaded to process memory.
+ * @param[in] start_addr - module start address
+ * @param[in] end_addr - module end address
+ * @param[in] path - file system full path to the module
*/
#if ITT_PLATFORM == ITT_PLATFORM_WIN
void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr,
@@ -3968,6 +4056,319 @@ ITT_STUB(ITTAPI, void, module_load,
#endif /* INTEL_NO_MACRO_BODY */
/** @endcond */
+/**
+ * @brief Report module unload
+ * This API is used to report necessary information in case of bypassing default
+ * system loader. Notification should be done just before the module is unloaded
+ * from process memory.
+ * @param[in] addr - base address of loaded module
+ */
+void ITTAPI __itt_module_unload(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, module_unload, (void *addr))
+#define __itt_module_unload ITTNOTIFY_VOID(module_unload)
+#define __itt_module_unload_ptr ITTNOTIFY_NAME(module_unload)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_module_unload(addr)
+#define __itt_module_unload_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_module_unload_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum {
+ __itt_module_type_unknown = 0,
+ __itt_module_type_elf,
+ __itt_module_type_coff
+} __itt_module_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum {
+ itt_section_type_unknown,
+ itt_section_type_bss, /* notifies that the section contains uninitialized
+ * data. These are the relevant section types and the
+ * modules that contain them: ELF module: SHT_NOBITS
+ * section type COFF module:
+ * IMAGE_SCN_CNT_UNINITIALIZED_DATA section type
+ */
+ itt_section_type_data, /* notifies that section contains initialized data.
+ * These are the relevant section types and the modules
+ * that contain them: ELF module: SHT_PROGBITS section
+ * type COFF module: IMAGE_SCN_CNT_INITIALIZED_DATA
+ * section type
+ */
+ itt_section_type_text /* notifies that the section contains executable code.
+ * These are the relevant section types and the modules
+ * that contain them: ELF module: SHT_PROGBITS section
+ * type COFF module: IMAGE_SCN_CNT_CODE section type
+ */
+} __itt_section_type;
+/** @endcond */
+
+/**
+ * @hideinitializer
+ * @brief bit-mask, detects a section attribute that indicates whether a section
+ * can be executed as code: These are the relevant section attributes and the
+ * modules that contain them: ELF module: PF_X section attribute COFF module:
+ * IMAGE_SCN_MEM_EXECUTE attribute
+ */
+#define __itt_section_exec 0x20000000
+
+/**
+ * @hideinitializer
+ * @brief bit-mask, detects a section attribute that indicates whether a section
+ * can be read. These are the relevant section attributes and the modules that
+ * contain them: ELF module: PF_R attribute COFF module: IMAGE_SCN_MEM_READ
+ * attribute
+ */
+#define __itt_section_read 0x40000000
+
+/**
+ * @hideinitializer
+ * @brief bit-mask, detects a section attribute that indicates whether a section
+ * can be written to. These are the relevant section attributes and the modules
+ * that contain them: ELF module: PF_W attribute COFF module:
+ * IMAGE_SCN_MEM_WRITE attribute
+ */
+#define __itt_section_write 0x80000000
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_section_info {
+ const char *name; /*!< Section name in UTF8 */
+ __itt_section_type type; /*!< Section content and semantics description */
+ size_t flags; /*!< Section bit flags that describe attributes using bit mask
+ * Zero if disabled, non-zero if enabled
+ */
+ void *start_addr; /*!< Section load(relocated) start address */
+ size_t size; /*!< Section file offset */
+ size_t file_offset; /*!< Section size */
+} __itt_section_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_module_object {
+ unsigned int version; /*!< API version*/
+ __itt_id module_id; /*!< Unique identifier. This is unchanged for sections
+ that belong to the same module */
+ __itt_module_type module_type; /*!< Binary module format */
+ const char *module_name; /*!< Unique module name or path to module in UTF8
+ * Contains module name when module_bufer and
+ * module_size exist Contains module path when
+ * module_bufer and module_size absent module_name
+ * remains the same for the certain module_id
+ */
+ void *module_buffer; /*!< Module buffer content */
+ size_t module_size; /*!< Module buffer size */
+ /*!< If module_buffer and module_size exist, the binary module is dumped onto
+ * the system. If module_buffer and module_size do not exist, the binary
+ * module exists on the system already. The module_name parameter contains the
+ * path to the module.
+ */
+ __itt_section_info *section_array; /*!< Reference to section information */
+ size_t section_number;
+} __itt_module_object;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Load module content and its loaded(relocated) sections.
+ * This API is useful to save a module, or specify its location on the system
+ * and report information about loaded sections. The target module is saved on
+ * the system if module buffer content and size are available. If module buffer
+ * content and size are unavailable, the module name contains the path to the
+ * existing binary module.
+ * @param[in] module_obj - provides module and section information, along with
+ * unique module identifiers (name,module ID) which bind the binary module to
+ * particular sections.
+ */
+void ITTAPI __itt_module_load_with_sections(__itt_module_object *module_obj);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, module_load_with_sections,
+ (__itt_module_object * module_obj))
+#define __itt_module_load_with_sections \
+ ITTNOTIFY_VOID(module_load_with_sections)
+#define __itt_module_load_with_sections_ptr \
+ ITTNOTIFY_NAME(module_load_with_sections)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_module_load_with_sections(module_obj)
+#define __itt_module_load_with_sections_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_module_load_with_sections_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Unload a module and its loaded(relocated) sections.
+ * This API notifies that the module and its sections were unloaded.
+ * @param[in] module_obj - provides module and sections information, along with
+ * unique module identifiers (name,module ID) which bind the binary module to
+ * particular sections.
+ */
+void ITTAPI __itt_module_unload_with_sections(__itt_module_object *module_obj);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, module_unload_with_sections,
+ (__itt_module_object * module_obj))
+#define __itt_module_unload_with_sections \
+ ITTNOTIFY_VOID(module_unload_with_sections)
+#define __itt_module_unload_with_sections_ptr \
+ ITTNOTIFY_NAME(module_unload_with_sections)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_module_unload_with_sections(module_obj)
+#define __itt_module_unload_with_sections_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_module_unload_with_sections_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_histogram {
+ const __itt_domain *domain; /*!< Domain of the histogram*/
+ const char *nameA; /*!< Name of the histogram */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t *nameW;
+#else /* UNICODE || _UNICODE */
+ void *nameW;
+#endif /* UNICODE || _UNICODE */
+ __itt_metadata_type x_type; /*!< Type of the histogram X axis */
+ __itt_metadata_type y_type; /*!< Type of the histogram Y axis */
+ int extra1; /*!< Reserved to the runtime */
+ void *extra2; /*!< Reserved to the runtime */
+ struct ___itt_histogram *next;
+} __itt_histogram;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create a typed histogram instance with given name/domain.
+ * @param[in] domain The domain controlling the call.
+ * @param[in] name The name of the histogram.
+ * @param[in] x_type The type of the X axis in histogram (may be 0 to calculate
+ * batch statistics).
+ * @param[in] y_type The type of the Y axis in histogram.
+ */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+__itt_histogram *ITTAPI __itt_histogram_createA(const __itt_domain *domain,
+ const char *name,
+ __itt_metadata_type x_type,
+ __itt_metadata_type y_type);
+__itt_histogram *ITTAPI __itt_histogram_createW(const __itt_domain *domain,
+ const wchar_t *name,
+ __itt_metadata_type x_type,
+ __itt_metadata_type y_type);
+#if defined(UNICODE) || defined(_UNICODE)
+#define __itt_histogram_create __itt_histogram_createW
+#define __itt_histogram_create_ptr __itt_histogram_createW_ptr
+#else /* UNICODE */
+#define __itt_histogram_create __itt_histogram_createA
+#define __itt_histogram_create_ptr __itt_histogram_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_histogram *ITTAPI __itt_histogram_create(const __itt_domain *domain,
+ const char *name,
+ __itt_metadata_type x_type,
+ __itt_metadata_type y_type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_histogram *, histogram_createA,
+ (const __itt_domain *domain, const char *name,
+ __itt_metadata_type x_type, __itt_metadata_type y_type))
+ITT_STUB(ITTAPI, __itt_histogram *, histogram_createW,
+ (const __itt_domain *domain, const wchar_t *name,
+ __itt_metadata_type x_type, __itt_metadata_type y_type))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_histogram *, histogram_create,
+ (const __itt_domain *domain, const char *name,
+ __itt_metadata_type x_type, __itt_metadata_type y_type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#define __itt_histogram_createA ITTNOTIFY_DATA(histogram_createA)
+#define __itt_histogram_createA_ptr ITTNOTIFY_NAME(histogram_createA)
+#define __itt_histogram_createW ITTNOTIFY_DATA(histogram_createW)
+#define __itt_histogram_createW_ptr ITTNOTIFY_NAME(histogram_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_histogram_create ITTNOTIFY_DATA(histogram_create)
+#define __itt_histogram_create_ptr ITTNOTIFY_NAME(histogram_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#define __itt_histogram_createA(domain, name, x_type, y_type) \
+ (__itt_histogram *)0
+#define __itt_histogram_createA_ptr 0
+#define __itt_histogram_createW(domain, name, x_type, y_type) \
+ (__itt_histogram *)0
+#define __itt_histogram_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_histogram_create(domain, name, x_type, y_type) \
+ (__itt_histogram *)0
+#define __itt_histogram_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#define __itt_histogram_createA_ptr 0
+#define __itt_histogram_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_histogram_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Submit statistics for a histogram instance.
+ * @param[in] hist Pointer to the histogram instance to which the histogram
+ * statistic is to be dumped.
+ * @param[in] length The number of elements in dumped axis data array.
+ * @param[in] x_data The X axis dumped data itself (may be NULL to calculate
+ * batch statistics).
+ * @param[in] y_data The Y axis dumped data itself.
+ */
+void ITTAPI __itt_histogram_submit(__itt_histogram *hist, size_t length,
+ void *x_data, void *y_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, histogram_submit,
+ (__itt_histogram * hist, size_t length, void *x_data, void *y_data))
+#define __itt_histogram_submit ITTNOTIFY_VOID(histogram_submit)
+#define __itt_histogram_submit_ptr ITTNOTIFY_NAME(histogram_submit)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_histogram_submit(hist, length, x_data, y_data)
+#define __itt_histogram_submit_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_histogram_submit_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index a49236b14885..a452b7643bdb 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -114,7 +114,13 @@
#if ITT_PLATFORM == ITT_PLATFORM_WIN
/* use __forceinline (VC++ specific) */
-#define ITT_INLINE __forceinline
+#if defined(__MINGW32__) && !defined(__cplusplus)
+#define ITT_INLINE \
+ static __inline__ __attribute__((__always_inline__, __gnu_inline__))
+#else
+#define ITT_INLINE static __forceinline
+#endif /* __MINGW32__ */
+
#define ITT_INLINE_ATTRIBUTE /* nothing */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/*
@@ -140,10 +146,9 @@
#define ITT_ARCH_IA32E 2
#endif /* ITT_ARCH_IA32E */
-/* Was there a magical reason we didn't have 3 here before? */
-#ifndef ITT_ARCH_AARCH64
-#define ITT_ARCH_AARCH64 3
-#endif /* ITT_ARCH_AARCH64 */
+#ifndef ITT_ARCH_IA64
+#define ITT_ARCH_IA64 3
+#endif /* ITT_ARCH_IA64 */
#ifndef ITT_ARCH_ARM
#define ITT_ARCH_ARM 4
@@ -153,17 +158,9 @@
#define ITT_ARCH_PPC64 5
#endif /* ITT_ARCH_PPC64 */
-#ifndef ITT_ARCH_MIPS
-#define ITT_ARCH_MIPS 6
-#endif /* ITT_ARCH_MIPS */
-
-#ifndef ITT_ARCH_MIPS64
-#define ITT_ARCH_MIPS64 6
-#endif /* ITT_ARCH_MIPS64 */
-
-#ifndef ITT_ARCH_RISCV64
-#define ITT_ARCH_RISCV64 7
-#endif /* ITT_ARCH_RISCV64 */
+#ifndef ITT_ARCH_ARM64
+#define ITT_ARCH_ARM64 6
+#endif /* ITT_ARCH_ARM64 */
#ifndef ITT_ARCH
#if defined _M_IX86 || defined __i386__
@@ -174,16 +171,10 @@
#define ITT_ARCH ITT_ARCH_IA64
#elif defined _M_ARM || defined __arm__
#define ITT_ARCH ITT_ARCH_ARM
+#elif defined __aarch64__
+#define ITT_ARCH ITT_ARCH_ARM64
#elif defined __powerpc64__
#define ITT_ARCH ITT_ARCH_PPC64
-#elif defined __aarch64__
-#define ITT_ARCH ITT_ARCH_AARCH64
-#elif defined __mips__ && !defined __mips64
-#define ITT_ARCH ITT_ARCH_MIPS
-#elif defined __mips__ && defined __mips64
-#define ITT_ARCH ITT_ARCH_MIPS64
-#elif defined __riscv && __riscv_xlen == 64
-#define ITT_ARCH ITT_ARCH_RISCV64
#endif
#endif
@@ -212,10 +203,10 @@
{ 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }
/* Replace with snapshot date YYYYMMDD for promotion build. */
-#define API_VERSION_BUILD 20151119
+#define API_VERSION_BUILD 20180723
#ifndef API_VERSION_NUM
-#define API_VERSION_NUM 0.0.0
+#define API_VERSION_NUM 3.20.1
#endif /* API_VERSION_NUM */
#define API_VERSION \
@@ -228,8 +219,13 @@
typedef HMODULE lib_t;
typedef DWORD TIDT;
typedef CRITICAL_SECTION mutex_t;
+#ifdef __cplusplus
+#define MUTEX_INITIALIZER \
+ {}
+#else
#define MUTEX_INITIALIZER \
{ 0 }
+#endif
#define strong_alias(name, aliasname) /* empty for Windows */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#include <dlfcn.h>
@@ -318,7 +314,17 @@ ITT_INLINE long __itt_interlocked_increment(volatile long *ptr) {
#ifdef SDL_STRNCPY_S
#define __itt_fstrcpyn(s1, b, s2, l) SDL_STRNCPY_S(s1, b, s2, l)
#else
-#define __itt_fstrcpyn(s1, b, s2, l) strncpy(s1, s2, l)
+#define __itt_fstrcpyn(s1, b, s2, l) \
+ { \
+ if (b > 0) { \
+ /* 'volatile' is used to suppress the warning that a destination */ \
+ /* bound depends on the length of the source. */ \
+ volatile size_t num_to_copy = \
+ (size_t)(b - 1) < (size_t)(l) ? (size_t)(b - 1) : (size_t)(l); \
+ strncpy(s1, s2, num_to_copy); \
+ s1[num_to_copy] = 0; \
+ } \
+ }
#endif /* SDL_STRNCPY_S */
#define __itt_fstrdup(s) strdup(s)
@@ -342,9 +348,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void *ptr, long addend) {
: "memory");
return result;
}
-#elif ITT_ARCH == ITT_ARCH_ARM || ITT_ARCH == ITT_ARCH_PPC64 || \
- ITT_ARCH == ITT_ARCH_AARCH64 || ITT_ARCH == ITT_ARCH_MIPS || \
- ITT_ARCH == ITT_ARCH_MIPS64 || ITT_ARCH == ITT_ARCH_RISCV64
+#else
#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT
@@ -441,6 +445,7 @@ typedef struct __itt_counter_info {
struct ___itt_domain;
struct ___itt_string_handle;
+struct ___itt_histogram;
typedef struct ___itt_global {
unsigned char magic[8];
@@ -462,6 +467,8 @@ typedef struct ___itt_global {
struct ___itt_string_handle *string_list;
__itt_collection_state state;
__itt_counter_info_t *counter_list;
+ unsigned int ipt_collect_events;
+ struct ___itt_histogram *histogram_list;
} __itt_global;
#pragma pack(pop)
@@ -604,4 +611,40 @@ typedef struct ___itt_global {
} \
}
+#define NEW_HISTOGRAM_W(gptr, h, h_tail, domain, name, x_type, y_type) \
+ { \
+ h = (__itt_histogram *)malloc(sizeof(__itt_histogram)); \
+ if (h != NULL) { \
+ h->domain = domain; \
+ h->nameA = NULL; \
+ h->nameW = name ? _wcsdup(name) : NULL; \
+ h->x_type = x_type; \
+ h->y_type = y_type; \
+ h->extra1 = 0; \
+ h->extra2 = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->histogram_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+ }
+
+#define NEW_HISTOGRAM_A(gptr, h, h_tail, domain, name, x_type, y_type) \
+ { \
+ h = (__itt_histogram *)malloc(sizeof(__itt_histogram)); \
+ if (h != NULL) { \
+ h->domain = domain; \
+ h->nameA = name ? __itt_fstrdup(name) : NULL; \
+ h->nameW = NULL; \
+ h->x_type = x_type; \
+ h->y_type = y_type; \
+ h->extra1 = 0; \
+ h->extra2 = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->histogram_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+ }
+
#endif /* _ITTNOTIFY_CONFIG_H_ */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
index eae3c7615cd7..48ffcb5e669d 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
@@ -7,14 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "kmp_config.h"
-#include "kmp_os.h"
+#include "kmp_config.h" // INTEL_ITTNOTIFY_PREFIX definition
#include "ittnotify_config.h"
#if ITT_PLATFORM == ITT_PLATFORM_WIN
-#if defined(__MINGW32__)
-#include <limits.h>
-#else
+#if !defined(PATH_MAX)
#define PATH_MAX 512
#endif
#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
@@ -32,14 +29,51 @@
#include "ittnotify.h"
#include "legacy/ittnotify.h"
-#if KMP_MSVC_COMPAT
#include "disable_warnings.h"
-#endif
-static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n";
+static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n";
#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, n)
+#ifndef HAS_CPP_ATTR
+#if defined(__cplusplus) && defined(__has_cpp_attribute)
+#define HAS_CPP_ATTR(X) __has_cpp_attribute(X)
+#else
+#define HAS_CPP_ATTR(X) 0
+#endif
+#endif
+
+#ifndef HAS_C_ATTR
+#if defined(__STDC__) && defined(__has_c_attribute)
+#define HAS_C_ATTR(X) __has_c_attribute(X)
+#else
+#define HAS_C_ATTR(X) 0
+#endif
+#endif
+
+#ifndef HAS_GNU_ATTR
+#if defined(__has_attribute)
+#define HAS_GNU_ATTR(X) __has_attribute(X)
+#else
+#define HAS_GNU_ATTR(X) 0
+#endif
+#endif
+
+#ifndef ITT_ATTRIBUTE_FALLTHROUGH
+#if (HAS_CPP_ATTR(fallthrough) || HAS_C_ATTR(fallthrough)) && \
+ (__cplusplus >= 201703L || _MSVC_LANG >= 201703L)
+#define ITT_ATTRIBUTE_FALLTHROUGH [[fallthrough]]
+#elif HAS_CPP_ATTR(gnu::fallthrough)
+#define ITT_ATTRIBUTE_FALLTHROUGH [[gnu::fallthrough]]
+#elif HAS_CPP_ATTR(clang::fallthrough)
+#define ITT_ATTRIBUTE_FALLTHROUGH [[clang::fallthrough]]
+#elif HAS_GNU_ATTR(fallthrough) && !__INTEL_COMPILER
+#define ITT_ATTRIBUTE_FALLTHROUGH __attribute__((fallthrough))
+#else
+#define ITT_ATTRIBUTE_FALLTHROUGH
+#endif
+#endif
+
#if ITT_OS == ITT_OS_WIN
static const char *ittnotify_lib_name = "libittnotify.dll";
#elif ITT_OS == ITT_OS_LINUX || ITT_OS == ITT_OS_FREEBSD
@@ -92,13 +126,8 @@ static const char *ittnotify_lib_name = "libittnotify.dylib";
#endif
-#ifndef PATH_MAX
-#define PATH_MAX 4096
-#endif
-
#ifndef LIB_VAR_NAME
-#if ITT_ARCH == ITT_ARCH_IA32 || ITT_ARCH == ITT_ARCH_ARM || \
- ITT_ARCH == ITT_ARCH_MIPS
+#if ITT_ARCH == ITT_ARCH_IA32 || ITT_ARCH == ITT_ARCH_ARM
#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32
#else
#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64
@@ -120,6 +149,8 @@ static const char *ittnotify_lib_name = "libittnotify.dylib";
} \
}
+#define ITT_MODULE_OBJECT_VERSION 1
+
typedef int(__itt_init_ittlib_t)(const char *, __itt_group_id);
/* this define used to control initialization function name. */
@@ -138,6 +169,8 @@ static __itt_fini_ittlib_t *__itt_fini_ittlib_ptr = _N_(fini_ittlib);
#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr
#endif /* __itt_fini_ittlib_name */
+extern __itt_global _N_(_ittapi_global);
+
/* building pointers to imported funcs */
#undef ITT_STUBV
#undef ITT_STUB
@@ -148,7 +181,9 @@ static __itt_fini_ittlib_t *__itt_fini_ittlib_ptr = _N_(fini_ittlib);
ITT_VERSIONIZE(ITT_JOIN(_N_(name), _init)); \
ITT_EXTERN_C_END \
static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name), _init)) args { \
- __itt_init_ittlib_name(NULL, __itt_group_all); \
+ if (!_N_(_ittapi_global).api_initialized && \
+ _N_(_ittapi_global).thread_list == NULL) \
+ __itt_init_ittlib_name(NULL, __itt_group_all); \
if (ITTNOTIFY_NAME(name) && \
ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name), _init))) \
return ITTNOTIFY_NAME(name) params; \
@@ -163,7 +198,9 @@ static __itt_fini_ittlib_t *__itt_fini_ittlib_ptr = _N_(fini_ittlib);
ITT_VERSIONIZE(ITT_JOIN(_N_(name), _init)); \
ITT_EXTERN_C_END \
static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name), _init)) args { \
- __itt_init_ittlib_name(NULL, __itt_group_all); \
+ if (!_N_(_ittapi_global).api_initialized && \
+ _N_(_ittapi_global).thread_list == NULL) \
+ __itt_init_ittlib_name(NULL, __itt_group_all); \
if (ITTNOTIFY_NAME(name) && \
ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name), _init))) \
ITTNOTIFY_NAME(name) params; \
@@ -218,13 +255,13 @@ static __itt_group_alias group_alias[] = {
#pragma pack(pop)
-// clang-format off
-#if ITT_PLATFORM == ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#if _MSC_VER
#pragma warning(push)
-#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function
- pointer 'XXX' to data pointer 'void *' */
+#pragma warning(disable : 4054) /* warning C4054: 'type cast' : from function \
+ pointer 'XXX' to data pointer 'void *' */
+#endif
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-// clang-format onå
static __itt_api_info api_list[] = {
/* Define functions with static implementation */
@@ -252,8 +289,10 @@ static __itt_api_info api_list[] = {
#include "ittnotify_static.h"
{NULL, NULL, NULL, NULL, __itt_group_none}};
-#if ITT_PLATFORM == ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#if _MSC_VER
#pragma warning(pop)
+#endif
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/* static part descriptor which handles. all notification api attributes. */
@@ -275,48 +314,48 @@ __itt_global _N_(_ittapi_global) = {
NULL, /* domain_list */
NULL, /* string_list */
__itt_collection_normal, /* collection state */
- NULL /* counter_list */
+ NULL, /* counter_list */
+ 0, /* ipt_collect_events */
+ NULL /* histogram_list */
};
typedef void(__itt_api_init_t)(__itt_global *, __itt_group_id);
typedef void(__itt_api_fini_t)(__itt_global *);
+static __itt_domain dummy_domain;
/* ========================================================================= */
#ifdef ITT_NOTIFY_EXT_REPORT
ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args);
#endif /* ITT_NOTIFY_EXT_REPORT */
-// clang-format off
-#if ITT_PLATFORM == ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#if _MSC_VER
#pragma warning(push)
-#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer
- 'void *' to function pointer 'XXX' */
+#pragma warning( \
+ disable : 4055) /* warning C4055: 'type cast' : from data pointer 'void *' \
+ to function pointer 'XXX' */
+#endif
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-// clang-format on
-static void __itt_report_error(unsigned code_arg, ...) {
+static void __itt_report_error(int code, ...) {
va_list args;
- va_start(args, code_arg);
-
- // We use unsigned for the code argument and explicitly cast it here to the
- // right enumerator because variadic functions are not compatible with
- // default promotions.
- __itt_error_code code = (__itt_error_code)code_arg;
-
+ va_start(args, code);
if (_N_(_ittapi_global).error_handler != NULL) {
__itt_error_handler_t *handler =
(__itt_error_handler_t *)(size_t)_N_(_ittapi_global).error_handler;
- handler(code, args);
+ handler((__itt_error_code)code, args);
}
#ifdef ITT_NOTIFY_EXT_REPORT
- _N_(error_handler)(code, args);
+ _N_(error_handler)((__itt_error_code)code, args);
#endif /* ITT_NOTIFY_EXT_REPORT */
va_end(args);
}
-#if ITT_PLATFORM == ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#if _MSC_VER
#pragma warning(pop)
+#endif
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#if ITT_PLATFORM == ITT_PLATFORM_WIN
@@ -335,6 +374,9 @@ ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW), _init))(const wchar_t *name) {
ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW), _init))) {
__itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return ITTNOTIFY_NAME(domain_createW)(name);
+ } else {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return &dummy_domain;
}
}
for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL;
@@ -381,6 +423,15 @@ static __itt_domain *ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),
return ITTNOTIFY_NAME(domain_create)(name);
}
#endif
+ else {
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#else
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif
+ return &dummy_domain;
+ }
}
for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL;
h_tail = h, h = h->next) {
@@ -395,6 +446,38 @@ static __itt_domain *ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),
return h;
}
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(
+ _N_(module_load_with_sections), _init))(__itt_module_object *module_obj) {
+ if (!_N_(_ittapi_global).api_initialized &&
+ _N_(_ittapi_global).thread_list == NULL) {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+ if (ITTNOTIFY_NAME(module_load_with_sections) &&
+ ITTNOTIFY_NAME(module_load_with_sections) !=
+ ITT_VERSIONIZE(ITT_JOIN(_N_(module_load_with_sections), _init))) {
+ if (module_obj != NULL) {
+ module_obj->version = ITT_MODULE_OBJECT_VERSION;
+ ITTNOTIFY_NAME(module_load_with_sections)(module_obj);
+ }
+ }
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(
+ _N_(module_unload_with_sections), _init))(__itt_module_object *module_obj) {
+ if (!_N_(_ittapi_global).api_initialized &&
+ _N_(_ittapi_global).thread_list == NULL) {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+ if (ITTNOTIFY_NAME(module_unload_with_sections) &&
+ ITTNOTIFY_NAME(module_unload_with_sections) !=
+ ITT_VERSIONIZE(ITT_JOIN(_N_(module_unload_with_sections), _init))) {
+ if (module_obj != NULL) {
+ module_obj->version = ITT_MODULE_OBJECT_VERSION;
+ ITTNOTIFY_NAME(module_unload_with_sections)(module_obj);
+ }
+ }
+}
+
#if ITT_PLATFORM == ITT_PLATFORM_WIN
static __itt_string_handle *ITTAPI ITT_VERSIONIZE(
ITT_JOIN(_N_(string_handle_createW), _init))(const wchar_t *name) {
@@ -411,6 +494,9 @@ static __itt_string_handle *ITTAPI ITT_VERSIONIZE(
ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW), _init))) {
__itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return ITTNOTIFY_NAME(string_handle_createW)(name);
+ } else {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return NULL;
}
}
for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL;
@@ -456,6 +542,15 @@ ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create), _init))(const char *name)
return ITTNOTIFY_NAME(string_handle_create)(name);
}
#endif
+ else {
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#else
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif
+ return NULL;
+ }
}
for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL;
h_tail = h, h = h->next) {
@@ -487,11 +582,14 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(
ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW), _init))) {
__itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return ITTNOTIFY_NAME(counter_createW)(name, domain);
+ } else {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return NULL;
}
}
for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL;
h_tail = h, h = h->next) {
- if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) &&
+ if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) &&
((h->domainW == NULL && domain == NULL) ||
(h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain))))
break;
@@ -537,10 +635,20 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),
return ITTNOTIFY_NAME(counter_create)(name, domain);
}
#endif
+ else {
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#else
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif
+ return NULL;
+ }
}
for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL;
h_tail = h, h = h->next) {
- if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) &&
+ if (h->nameA != NULL && h->type == (int)type &&
+ !__itt_fstrcmp(h->nameA, name) &&
((h->domainA == NULL && domain == NULL) ||
(h->domainA != NULL && domain != NULL &&
!__itt_fstrcmp(h->domainA, domain))))
@@ -571,11 +679,14 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),
ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW), _init))) {
__itt_mutex_unlock(&_N_(_ittapi_global).mutex);
return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type);
+ } else {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return NULL;
}
}
for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL;
h_tail = h, h = h->next) {
- if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) &&
+ if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) &&
((h->domainW == NULL && domain == NULL) ||
(h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain))))
break;
@@ -620,10 +731,20 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(
return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type);
}
#endif
+ else {
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#else
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif
+ return NULL;
+ }
}
for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL;
h_tail = h, h = h->next) {
- if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) &&
+ if (h->nameA != NULL && h->type == (int)type &&
+ !__itt_fstrcmp(h->nameA, name) &&
((h->domainA == NULL && domain == NULL) ||
(h->domainA != NULL && domain != NULL &&
!__itt_fstrcmp(h->domainA, domain))))
@@ -637,6 +758,106 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(
return (__itt_counter)h;
}
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+static __itt_histogram *ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createW),
+ _init))(
+ const __itt_domain *domain, const wchar_t *name, __itt_metadata_type x_type,
+ __itt_metadata_type y_type) {
+ __itt_histogram *h_tail = NULL, *h = NULL;
+
+ if (domain == NULL || name == NULL) {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized) {
+ if (ITTNOTIFY_NAME(histogram_createW) &&
+ ITTNOTIFY_NAME(histogram_createW) !=
+ ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createW), _init))) {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(histogram_createW)(domain, name, x_type, y_type);
+ } else {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return NULL;
+ }
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).histogram_list; h != NULL;
+ h_tail = h, h = h->next) {
+ if (h->domain == NULL)
+ continue;
+ else if (h->domain != domain && h->nameW != NULL && !wcscmp(h->nameW, name))
+ break;
+ }
+ if (h == NULL) {
+ NEW_HISTOGRAM_W(&_N_(_ittapi_global), h, h_tail, domain, name, x_type,
+ y_type);
+ }
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return (__itt_histogram *)h;
+}
+
+static __itt_histogram *ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createA),
+ _init))(
+ const __itt_domain *domain, const char *name, __itt_metadata_type x_type,
+ __itt_metadata_type y_type)
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_histogram *ITTAPI ITT_VERSIONIZE(ITT_JOIN(
+ _N_(histogram_create), _init))(const __itt_domain *domain, const char *name,
+ __itt_metadata_type x_type,
+ __itt_metadata_type y_type)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+ __itt_histogram *h_tail = NULL, *h = NULL;
+
+ if (domain == NULL || name == NULL) {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized) {
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ if (ITTNOTIFY_NAME(histogram_createA) &&
+ ITTNOTIFY_NAME(histogram_createA) !=
+ ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_createA), _init))) {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(histogram_createA)(domain, name, x_type, y_type);
+ }
+#else
+ if (ITTNOTIFY_NAME(histogram_create) &&
+ ITTNOTIFY_NAME(histogram_create) !=
+ ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_create), _init))) {
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(histogram_create)(domain, name, x_type, y_type);
+ }
+#endif
+ else {
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#else
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif
+ return NULL;
+ }
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).histogram_list; h != NULL;
+ h_tail = h, h = h->next) {
+ if (h->domain == NULL)
+ continue;
+ else if (h->domain != domain && h->nameA != NULL &&
+ !__itt_fstrcmp(h->nameA, name))
+ break;
+ }
+ if (h == NULL) {
+ NEW_HISTOGRAM_A(&_N_(_ittapi_global), h, h_tail, domain, name, x_type,
+ y_type);
+ }
+ if (PTHREAD_SYMBOLS)
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return (__itt_histogram *)h;
+}
+
/* -------------------------------------------------------------------------- */
static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause), _init))(void) {
@@ -968,8 +1189,8 @@ static const char *__itt_get_lib_name(void) {
return lib_name;
}
-/* Avoid clashes with std::min, reported by tbb team */
-#define __itt_min(a, b) (a) < (b) ? (a) : (b)
+/* Avoid clashes with std::min */
+#define __itt_min(a, b) ((a) < (b) ? (a) : (b))
static __itt_group_id __itt_get_groups(void) {
int i;
@@ -1026,7 +1247,7 @@ static int __itt_lib_version(lib_t lib) {
/* It's not used right now! Comment it out to avoid warnings.
static void __itt_reinit_all_pointers(void)
{
- int i;
+ register int i;
// Fill all pointers with initial stubs
for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
*_N_(_ittapi_global).api_list_ptr[i].func_ptr =
@@ -1036,22 +1257,23 @@ _N_(_ittapi_global).api_list_ptr[i].init_func;
static void __itt_nullify_all_pointers(void) {
int i;
- /* Nullify all pointers except domain_create, string_handle_create and
+ /* Nulify all pointers except domain_create, string_handle_create and
* counter_create */
for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
*_N_(_ittapi_global).api_list_ptr[i].func_ptr =
_N_(_ittapi_global).api_list_ptr[i].null_func;
}
-// clang-format off
-#if ITT_PLATFORM == ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#if _MSC_VER
#pragma warning(push)
-#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function
- pointer 'XXX' to data pointer 'void *' */
-#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer
- 'void *' to function pointer 'XXX' */
+#pragma warning(disable : 4054) /* warning C4054: 'type cast' : from function \
+ pointer 'XXX' to data pointer 'void *' */
+#pragma warning( \
+ disable : 4055) /* warning C4055: 'type cast' : from data pointer 'void *' \
+ to function pointer 'XXX' */
+#endif
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-// clang-format on
ITT_EXTERN_C void _N_(fini_ittlib)(void) {
__itt_api_fini_t *__itt_api_fini_ptr = NULL;
@@ -1087,6 +1309,59 @@ ITT_EXTERN_C void _N_(fini_ittlib)(void) {
}
}
+/* !!! this function should be called under mutex lock !!! */
+static void __itt_free_allocated_resources(void) {
+ __itt_string_handle *current_string = _N_(_ittapi_global).string_list;
+ while (current_string != NULL) {
+ __itt_string_handle *tmp = current_string->next;
+ free((char *)current_string->strA);
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ free((wchar_t *)current_string->strW);
+#endif
+ free(current_string);
+ current_string = tmp;
+ }
+ _N_(_ittapi_global).string_list = NULL;
+
+ __itt_domain *current_domain = _N_(_ittapi_global).domain_list;
+ while (current_domain != NULL) {
+ __itt_domain *tmp = current_domain->next;
+ free((char *)current_domain->nameA);
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ free((wchar_t *)current_domain->nameW);
+#endif
+ free(current_domain);
+ current_domain = tmp;
+ }
+ _N_(_ittapi_global).domain_list = NULL;
+
+ __itt_counter_info_t *current_couter = _N_(_ittapi_global).counter_list;
+ while (current_couter != NULL) {
+ __itt_counter_info_t *tmp = current_couter->next;
+ free((char *)current_couter->nameA);
+ free((char *)current_couter->domainA);
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ free((wchar_t *)current_couter->nameW);
+ free((wchar_t *)current_couter->domainW);
+#endif
+ free(current_couter);
+ current_couter = tmp;
+ }
+ _N_(_ittapi_global).counter_list = NULL;
+
+ __itt_histogram *current_histogram = _N_(_ittapi_global).histogram_list;
+ while (current_histogram != NULL) {
+ __itt_histogram *tmp = current_histogram->next;
+ free((char *)current_histogram->nameA);
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ free((wchar_t *)current_histogram->nameW);
+#endif
+ free(current_histogram);
+ current_histogram = tmp;
+ }
+ _N_(_ittapi_global).histogram_list = NULL;
+}
+
ITT_EXTERN_C int _N_(init_ittlib)(const char *lib_name,
__itt_group_id init_groups) {
int i;
@@ -1120,7 +1395,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char *lib_name,
switch (lib_version) {
case 0:
groups = __itt_group_legacy;
- KMP_FALLTHROUGH();
+ ITT_ATTRIBUTE_FALLTHROUGH;
case 1:
/* Fill all pointers from dynamic library */
for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL;
@@ -1140,8 +1415,10 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char *lib_name,
__itt_error_no_symbol, lib_name,
_N_(_ittapi_global).api_list_ptr[i].name);
#ifdef ITT_COMPLETE_GROUP
- zero_group = (__itt_group_id)(
- zero_group | _N_(_ittapi_global).api_list_ptr[i].group);
+ zero_group =
+ (__itt_group_id)(zero_group | _N_(_ittapi_global)
+ .api_list_ptr[i]
+ .group);
#endif /* ITT_COMPLETE_GROUP */
}
} else
@@ -1183,6 +1460,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char *lib_name,
break;
}
} else {
+ __itt_free_allocated_resources();
__itt_nullify_all_pointers();
__itt_report_error(__itt_error_no_module, lib_name,
@@ -1194,6 +1472,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char *lib_name,
);
}
} else {
+ __itt_free_allocated_resources();
__itt_nullify_all_pointers();
}
_N_(_ittapi_global).api_initialized = 1;
@@ -1229,6 +1508,50 @@ _N_(set_error_handler)(__itt_error_handler_t *handler) {
return prev;
}
-#if ITT_PLATFORM == ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+#if _MSC_VER
#pragma warning(pop)
+#endif
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** __itt_mark_pt_region functions marks region of interest
+ * region parameter defines different regions.
+ * 0 <= region < 8 */
+
+#if defined(ITT_API_IPT_SUPPORT) && \
+ (ITT_PLATFORM == ITT_PLATFORM_WIN || \
+ ITT_PLATFORM == ITT_PLATFORM_POSIX) && \
+ !defined(__ANDROID__)
+void __itt_pt_mark(__itt_pt_region region);
+void __itt_pt_mark_event(__itt_pt_region region);
+#endif
+
+ITT_EXTERN_C void _N_(mark_pt_region_begin)(__itt_pt_region region) {
+#if defined(ITT_API_IPT_SUPPORT) && \
+ (ITT_PLATFORM == ITT_PLATFORM_WIN || \
+ ITT_PLATFORM == ITT_PLATFORM_POSIX) && \
+ !defined(__ANDROID__)
+ if (_N_(_ittapi_global).ipt_collect_events == 1) {
+ __itt_pt_mark_event(2 * region);
+ } else {
+ __itt_pt_mark(2 * region);
+ }
+#else
+ (void)region;
+#endif
+}
+
+ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region) {
+#if defined(ITT_API_IPT_SUPPORT) && \
+ (ITT_PLATFORM == ITT_PLATFORM_WIN || \
+ ITT_PLATFORM == ITT_PLATFORM_POSIX) && \
+ !defined(__ANDROID__)
+ if (_N_(_ittapi_global).ipt_collect_events == 1) {
+ __itt_pt_mark_event(2 * region + 1);
+ } else {
+ __itt_pt_mark(2 * region + 1);
+ }
+#else
+ (void)region;
+#endif
+}
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
index cb884a8b3a1c..d06133d8c3de 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h
@@ -36,6 +36,13 @@ ITT_STUB(ITTAPI, __itt_domain *, domain_create, (const char *name),
(ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"")
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, module_load_with_sections,
+ (__itt_module_object * module_obj), (ITT_FORMAT module_obj),
+ module_load_with_sections, __itt_group_module, "%p")
+ITT_STUBV(ITTAPI, void, module_unload_with_sections,
+ (__itt_module_object * module_obj), (ITT_FORMAT module_obj),
+ module_unload_with_sections, __itt_group_module, "%p")
+
#if ITT_PLATFORM == ITT_PLATFORM_WIN
ITT_STUB(ITTAPI, __itt_string_handle *, string_handle_createA,
(const char *name), (ITT_FORMAT name), string_handle_createA,
@@ -110,6 +117,26 @@ ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen),
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore,
__itt_group_thread | __itt_group_legacy, "no args")
+
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_histogram *, histogram_createA,
+ (const __itt_domain *domain, const char *name,
+ __itt_metadata_type x_type, __itt_metadata_type y_type),
+ (ITT_FORMAT domain, name, x_type, y_type), histogram_createA,
+ __itt_group_structure, "%p, \"%s\", %d, %d")
+ITT_STUB(ITTAPI, __itt_histogram *, histogram_createW,
+ (const __itt_domain *domain, const wchar_t *name,
+ __itt_metadata_type x_type, __itt_metadata_type y_type),
+ (ITT_FORMAT domain, name, x_type, y_type), histogram_createW,
+ __itt_group_structure, "%p, \"%s\", %d, %d")
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_histogram *, histogram_create,
+ (const __itt_domain *domain, const char *name,
+ __itt_metadata_type x_type, __itt_metadata_type y_type),
+ (ITT_FORMAT domain, name, x_type, y_type), histogram_create,
+ __itt_group_structure, "%p, \"%s\", %d, %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
#endif /* __ITT_INTERNAL_BODY */
ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach,
@@ -524,6 +551,16 @@ ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain),
ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain),
(ITT_FORMAT domain), frame_create, __itt_group_frame, "\"%s\"")
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM == ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name),
+ (ITT_FORMAT name), pt_region_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name),
+ (ITT_FORMAT name), pt_region_createW, __itt_group_structure, "\"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name),
+ (ITT_FORMAT name), pt_region_create, __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* __ITT_INTERNAL_BODY */
ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame),
frame_begin, __itt_group_frame, "%p")
@@ -737,22 +774,27 @@ ITT_STUB(ITTAPI, int, av_save,
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
#endif /* __ITT_INTERNAL_BODY */
-#ifndef __ITT_INTERNAL_BODY
#if ITT_PLATFORM == ITT_PLATFORM_WIN
ITT_STUBV(ITTAPI, void, module_loadA,
(void *start_addr, void *end_addr, const char *path),
(ITT_FORMAT start_addr, end_addr, path), module_loadA,
- __itt_group_none, "%p, %p, %p")
+ __itt_group_module, "%p, %p, %p")
ITT_STUBV(ITTAPI, void, module_loadW,
(void *start_addr, void *end_addr, const wchar_t *path),
(ITT_FORMAT start_addr, end_addr, path), module_loadW,
- __itt_group_none, "%p, %p, %p")
+ __itt_group_module, "%p, %p, %p")
#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
ITT_STUBV(ITTAPI, void, module_load,
(void *start_addr, void *end_addr, const char *path),
(ITT_FORMAT start_addr, end_addr, path), module_load,
- __itt_group_none, "%p, %p, %p")
+ __itt_group_module, "%p, %p, %p")
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, module_unload, (void *start_addr),
+ (ITT_FORMAT start_addr), module_unload, __itt_group_module, "%p")
+
+ITT_STUBV(ITTAPI, void, histogram_submit,
+ (__itt_histogram * hist, size_t length, void *x_data, void *y_data),
+ (ITT_FORMAT hist, length, x_data, y_data), histogram_submit,
+ __itt_group_structure, "%p, %lu, %p, %p")
#endif /* __ITT_INTERNAL_INIT */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
index 66afd8c7d183..9aff80335927 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h
@@ -29,6 +29,7 @@ typedef enum ___itt_group_id {
__itt_group_structure = 1 << 12,
__itt_group_suppress = 1 << 13,
__itt_group_arrays = 1 << 14,
+ __itt_group_module = 1 << 15,
__itt_group_all = -1
} __itt_group_id;
@@ -57,6 +58,7 @@ typedef struct ___itt_group_list {
{__itt_group_structure, "structure"}, \
{__itt_group_suppress, "suppress"}, \
{__itt_group_arrays, "arrays"}, \
+ {__itt_group_module, "module"}, \
{__itt_group_none, NULL}}
#endif /* _ITTNOTIFY_TYPES_H_ */
diff --git a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
index 384a55881e1f..193256f6a42d 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h
@@ -119,7 +119,13 @@
#if ITT_PLATFORM == ITT_PLATFORM_WIN
/* use __forceinline (VC++ specific) */
-#define ITT_INLINE __forceinline
+#if defined(__MINGW32__) && !defined(__cplusplus)
+#define ITT_INLINE \
+ static __inline__ __attribute__((__always_inline__, __gnu_inline__))
+#else
+#define ITT_INLINE static __forceinline
+#endif /* __MINGW32__ */
+
#define ITT_INLINE_ATTRIBUTE /* nothing */
#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
/*
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 42ad1d56f9ec..5cd6ad6a0339 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1051,6 +1051,8 @@ void __kmp_reap_worker(kmp_info_t *th) {
"exit_val = %p\n",
th->th.th_info.ds.ds_gtid, exit_val));
}
+#else
+ (void)status; // unused variable
#endif /* KMP_DEBUG */
KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
@@ -1232,7 +1234,7 @@ static void __kmp_atfork_child(void) {
// affinity in the parent
kmp_set_thread_affinity_mask_initial();
#endif
- // Set default not to bind threads tightly in the child (we’re expecting
+ // Set default not to bind threads tightly in the child (we're expecting
// over-subscription after the fork and this can improve things for
// scripting languages that use OpenMP inside process-parallel code).
__kmp_affinity_type = affinity_none;
@@ -1407,9 +1409,13 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
/* TODO: shouldn't this use release semantics to ensure that
__kmp_suspend_initialize_thread gets called first? */
old_spin = flag->set_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, (void *)flag);
+ th->th.th_sleep_loc_type = flag->get_type();
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
__kmp_unlock_suspend_mx(th);
return;
}
@@ -1417,8 +1423,10 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
" was %x\n",
th_gtid, flag->get(), flag->load(), old_spin));
- if (flag->done_check_val(old_spin)) {
- old_spin = flag->unset_sleeping();
+ if (flag->done_check_val(old_spin) || flag->done_check()) {
+ flag->unset_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
"for spin(%p)\n",
th_gtid, flag->get()));
@@ -1427,7 +1435,6 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
"with low probability" return when the condition variable has
not been signaled or broadcast */
int deactivated = FALSE;
- TCW_PTR(th->th.th_sleep_loc, (void *)flag);
while (flag->is_sleeping()) {
#ifdef DEBUG_SUSPEND
@@ -1449,6 +1456,9 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
deactivated = TRUE;
}
+ KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
+ KMP_DEBUG_ASSERT(flag->get_type() == th->th.th_sleep_loc_type);
+
#if USE_SUSPEND_TIMEOUT
struct timespec now;
struct timeval tval;
@@ -1478,6 +1488,18 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
KMP_SYSFAIL("pthread_cond_wait", status);
}
+
+ KMP_DEBUG_ASSERT(flag->get_type() == flag->get_ptr_type());
+
+ if (!flag->is_sleeping() &&
+ ((status == EINTR) || (status == ETIMEDOUT))) {
+ // if interrupt or timeout, and thread is no longer sleeping, we need to
+ // make sure sleep_loc gets reset; however, this shouldn't be needed if
+ // we woke up with resume
+ flag->unset_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
+ }
#ifdef KMP_DEBUG
if (status == ETIMEDOUT) {
if (flag->is_sleeping()) {
@@ -1487,6 +1509,8 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
"not set!\n",
th_gtid));
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
}
} else if (flag->is_sleeping()) {
KF_TRACE(100,
@@ -1504,6 +1528,13 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
}
}
}
+ // We may have had the loop variable set before entering the loop body;
+ // so we need to reset sleep_loc.
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
+
+ KMP_DEBUG_ASSERT(!flag->is_sleeping());
+ KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
#ifdef DEBUG_SUSPEND
{
char buffer[128];
@@ -1525,6 +1556,10 @@ template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);
}
+template <bool C, bool S>
+void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
+ __kmp_suspend_template(th_gtid, flag);
+}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag);
}
@@ -1532,6 +1567,10 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
+template void
+__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
+template void
+__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
/* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE.
@@ -1554,36 +1593,50 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
__kmp_lock_suspend_mx(th);
- if (!flag) { // coming from __kmp_null_resume_wrapper
+ if (!flag || flag != th->th.th_sleep_loc) {
+ // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
+ // different location; wake up at new location
flag = (C *)CCAST(void *, th->th.th_sleep_loc);
}
// First, check if the flag is null or its type has changed. If so, someone
// else woke it up.
- if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
- // simply shows what flag was cast to
+ if (!flag) { // Thread doesn't appear to be sleeping on anything
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag(%p)\n",
- gtid, target_gtid, NULL));
+ gtid, target_gtid, (void *)NULL));
__kmp_unlock_suspend_mx(th);
return;
+ } else if (flag->get_type() != th->th.th_sleep_loc_type) {
+ // Flag type does not appear to match this function template; possibly the
+ // thread is sleeping on something else. Try null resume again.
+ KF_TRACE(
+ 5,
+ ("__kmp_resume_template: T#%d retrying, thread T#%d Mismatch flag(%p), "
+ "spin(%p) type=%d ptr_type=%d\n",
+ gtid, target_gtid, flag, flag->get(), flag->get_type(),
+ th->th.th_sleep_loc_type));
+ __kmp_unlock_suspend_mx(th);
+ __kmp_null_resume_wrapper(th);
+ return;
} else { // if multiple threads are sleeping, flag should be internally
// referring to a specific thread here
- typename C::flag_t old_spin = flag->unset_sleeping();
- if (!flag->is_sleeping_val(old_spin)) {
+ if (!flag->is_sleeping()) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
- "awake: flag(%p): "
- "%u => %u\n",
- gtid, target_gtid, flag->get(), old_spin, flag->load()));
+ "awake: flag(%p): %u\n",
+ gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
__kmp_unlock_suspend_mx(th);
return;
}
- KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
- "sleep bit for flag's loc(%p): "
- "%u => %u\n",
- gtid, target_gtid, flag->get(), old_spin, flag->load()));
}
+ KMP_DEBUG_ASSERT(flag);
+ flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
+
+ KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
+ "sleep bit for flag's loc(%p): %u\n",
+ gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
#ifdef DEBUG_SUSPEND
{
@@ -1609,12 +1662,19 @@ template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);
}
+template <bool C, bool S>
+void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
+ __kmp_resume_template(target_gtid, flag);
+}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);
}
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
+template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
+template void
+__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
#if KMP_USE_MONITOR
void __kmp_resume_monitor() {
@@ -1741,8 +1801,12 @@ static int __kmp_get_xproc(void) {
int r = 0;
-#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
- KMP_OS_OPENBSD || KMP_OS_HURD
+#if KMP_OS_LINUX
+
+ __kmp_type_convert(sysconf(_SC_NPROCESSORS_CONF), &(r));
+
+#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD || \
+ KMP_OS_HURD
__kmp_type_convert(sysconf(_SC_NPROCESSORS_ONLN), &(r));
diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp
index 320920283c9d..0a0801c7ece2 100644
--- a/openmp/runtime/src/z_Windows_NT_util.cpp
+++ b/openmp/runtime/src/z_Windows_NT_util.cpp
@@ -240,13 +240,12 @@ static void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx,
continue;
}
// condition fulfilled, exiting
- old_f = flag->unset_sleeping();
- KMP_DEBUG_ASSERT(old_f & KMP_BARRIER_SLEEP_STATE);
+ flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
- KF_TRACE(50,
- ("__kmp_win32_cond_wait: exiting, condition "
- "fulfilled: flag's loc(%p): %u => %u\n",
- flag->get(), (unsigned int)old_f, (unsigned int)flag->load()));
+ th->th.th_sleep_loc_type = flag_unset;
+ KF_TRACE(50, ("__kmp_win32_cond_wait: exiting, condition "
+ "fulfilled: flag's loc(%p): %u\n",
+ flag->get(), (unsigned int)flag->load()));
__kmp_win32_mutex_lock(&cv->waiters_count_lock_);
KMP_DEBUG_ASSERT(cv->waiters_count_ > 0);
@@ -376,9 +375,13 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
/* TODO: shouldn't this use release semantics to ensure that
__kmp_suspend_initialize_thread gets called first? */
old_spin = flag->set_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, (void *)flag);
+ th->th.th_sleep_loc_type = flag->get_type();
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
__kmp_unlock_suspend_mx(th);
return;
}
@@ -387,8 +390,10 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
" loc(%p)==%u\n",
th_gtid, flag->get(), (unsigned int)flag->load()));
- if (flag->done_check_val(old_spin)) {
- old_spin = flag->unset_sleeping();
+ if (flag->done_check_val(old_spin) || flag->done_check()) {
+ flag->unset_sleeping();
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
"for flag's loc(%p)\n",
th_gtid, flag->get()));
@@ -400,7 +405,7 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
low probability" return when the condition variable has not been signaled
or broadcast */
int deactivated = FALSE;
- TCW_PTR(th->th.th_sleep_loc, (void *)flag);
+
while (flag->is_sleeping()) {
KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
"kmp_win32_cond_wait()\n",
@@ -415,13 +420,14 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
}
deactivated = TRUE;
- __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
- flag);
- } else {
- __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
- flag);
}
+ KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
+ KMP_DEBUG_ASSERT(th->th.th_sleep_loc_type == flag->get_type());
+
+ __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
+ flag);
+
#ifdef KMP_DEBUG
if (flag->is_sleeping()) {
KF_TRACE(100,
@@ -431,6 +437,14 @@ static inline void __kmp_suspend_template(int th_gtid, C *flag) {
} // while
+ // We may have had the loop variable set before entering the loop body;
+ // so we need to reset sleep_loc.
+ TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
+
+ KMP_DEBUG_ASSERT(!flag->is_sleeping());
+ KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
+
// Mark the thread as active again (if it was previous marked as inactive)
if (deactivated) {
th->th.th_active = TRUE;
@@ -453,6 +467,10 @@ template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);
}
+template <bool C, bool S>
+void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
+ __kmp_suspend_template(th_gtid, flag);
+}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag);
}
@@ -460,6 +478,10 @@ void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
+template void
+__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
+template void
+__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
/* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE */
@@ -477,32 +499,35 @@ static inline void __kmp_resume_template(int target_gtid, C *flag) {
__kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th);
- if (!flag) { // coming from __kmp_null_resume_wrapper
+ if (!flag || flag != th->th.th_sleep_loc) {
+ // coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
+ // different location; wake up at new location
flag = (C *)th->th.th_sleep_loc;
}
// First, check if the flag is null or its type has changed. If so, someone
// else woke it up.
- if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
- // simply shows what
- // flag was cast to
+ if (!flag || flag->get_type() != th->th.th_sleep_loc_type) {
+ // simply shows what flag was cast to
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag's loc(%p)\n",
gtid, target_gtid, NULL));
__kmp_unlock_suspend_mx(th);
return;
} else {
- typename C::flag_t old_spin = flag->unset_sleeping();
- if (!flag->is_sleeping_val(old_spin)) {
+ if (!flag->is_sleeping()) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
- "awake: flag's loc(%p): %u => %u\n",
- gtid, target_gtid, flag->get(), (unsigned int)old_spin,
- (unsigned int)flag->load()));
+ "awake: flag's loc(%p): %u\n",
+ gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
__kmp_unlock_suspend_mx(th);
return;
}
}
+ KMP_DEBUG_ASSERT(flag);
+ flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);
+ th->th.th_sleep_loc_type = flag_unset;
+
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep "
"bit for flag's loc(%p)\n",
gtid, target_gtid, flag->get()));
@@ -523,12 +548,19 @@ template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);
}
+template <bool C, bool S>
+void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
+ __kmp_resume_template(target_gtid, flag);
+}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);
}
template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
+template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
+template void
+__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
void __kmp_yield() { Sleep(0); }